From 93d7a3b4be30c705c39377b9e75b24a184f6c4fe Mon Sep 17 00:00:00 2001
From: Mika Kahola <mika.kahola@intel.com>
Date: Mon, 17 Jun 2019 11:24:13 +0300
Subject: drm/i915/icl: Add missing device ID

We are missing PCI device ID for SKU ICLLP U GT 1.5F (0x8A54) as per BSPec.

BSpec: 19092

Signed-off-by: Mika Kahola <mika.kahola@intel.com>
Reviewed-by: Clint Taylor <Clinton.A.Taylor@intel.com>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190617082413.22549-1-mika.kahola@intel.com
---
 include/drm/i915_pciids.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 6d60ea68c171..6c342ac470c8 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -568,7 +568,8 @@
 	INTEL_VGA_DEVICE(0x8A56, info), \
 	INTEL_VGA_DEVICE(0x8A71, info), \
 	INTEL_VGA_DEVICE(0x8A70, info), \
-	INTEL_VGA_DEVICE(0x8A53, info)
+	INTEL_VGA_DEVICE(0x8A53, info), \
+	INTEL_VGA_DEVICE(0x8A54, info)
 
 #define INTEL_ICL_11_IDS(info) \
 	INTEL_ICL_PORT_F_IDS(info), \
-- 
cgit v1.2.3


From bf73fc0fa9cf78e37d6ee99e8d12bfa2083594d6 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 3 Jul 2019 15:37:02 +0100
Subject: drm/i915: Show support for accurate sw PMU busyness tracking

Expose whether or not we support the PMU software tracking in our
scheduler capabilities, so userspace can query at runtime.

v2: Use I915_SCHEDULER_CAP_ENGINE_BUSY_STATS for a less ambiguous
capability name.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190703143702.11339-1-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++++---
 drivers/gpu/drm/i915/i915_pmu.c           | 4 +---
 include/uapi/drm/i915_drm.h               | 1 +
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index c1fb5fa3952e..7d6d6e62e9cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -688,9 +688,10 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915)
 		u8 engine;
 		u8 sched;
 	} map[] = {
-#define MAP(x, y) { ilog2(I915_ENGINE_HAS_##x), ilog2(I915_SCHEDULER_CAP_##y) }
-		MAP(PREEMPTION, PREEMPTION),
-		MAP(SEMAPHORES, SEMAPHORES),
+#define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) }
+		MAP(HAS_PREEMPTION, PREEMPTION),
+		MAP(HAS_SEMAPHORES, SEMAPHORES),
+		MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS),
 #undef MAP
 	};
 	struct intel_engine_cs *engine;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8fe46ee920a0..eff86483bec0 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -102,10 +102,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
 	/*
 	 * Also there is software busyness tracking available we do not
 	 * need the timer for I915_SAMPLE_BUSY counter.
-	 *
-	 * Use RCS as proxy for all engines.
 	 */
-	else if (intel_engine_supports_stats(i915->engine[RCS0]))
+	else if (i915->caps.scheduler & I915_SCHEDULER_CAP_ENGINE_BUSY_STATS)
 		enable &= ~BIT(I915_SAMPLE_BUSY);
 
 	/*
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 328d05e77d9f..469dc512cca3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
 #define   I915_SCHEDULER_CAP_SEMAPHORES	(1ul << 3)
+#define   I915_SCHEDULER_CAP_ENGINE_BUSY_STATS	(1ul << 4)
 
 #define I915_PARAM_HUC_STATUS		 42
 
-- 
cgit v1.2.3


From 163fa23435cc9c705a71001d4aa15f3f945554a1 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 3 Jul 2019 16:25:52 +0800
Subject: percpu: Make pcpu_setup_first_chunk() void function

pcpu_setup_first_chunk() will panic or BUG_ON if the are some
error and doesn't return any error, hence it can be defined to
return void.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
[Dennis: fixed kbuild warning for pcpu_page_first_chunk()]
---
 arch/ia64/mm/contig.c    |  5 +----
 arch/ia64/mm/discontig.c |  5 +----
 include/linux/percpu.h   |  2 +-
 mm/percpu.c              | 19 +++++++------------
 4 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index d29fb6b9fa33..db09a693f094 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -134,10 +134,7 @@ setup_per_cpu_areas(void)
 	ai->atom_size		= PAGE_SIZE;
 	ai->alloc_size		= PERCPU_PAGE_SIZE;
 
-	rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
-	if (rc)
-		panic("failed to setup percpu area (err=%d)", rc);
-
+	pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
 	pcpu_free_alloc_info(ai);
 }
 #else
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 05490dd073e6..004dee231874 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -245,10 +245,7 @@ void __init setup_per_cpu_areas(void)
 		gi->cpu_map		= &cpu_map[unit];
 	}
 
-	rc = pcpu_setup_first_chunk(ai, base);
-	if (rc)
-		panic("failed to setup percpu area (err=%d)", rc);
-
+	pcpu_setup_first_chunk(ai, base);
 	pcpu_free_alloc_info(ai);
 }
 #endif
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9909dc0e273a..5e76af742c80 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -105,7 +105,7 @@ extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 							     int nr_units);
 extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
 
-extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+extern void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
diff --git a/mm/percpu.c b/mm/percpu.c
index 9821241fdede..5a918a4b1da0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2267,12 +2267,9 @@ static void pcpu_dump_alloc_info(const char *lvl,
  * share the same vm, but use offset regions in the area allocation map.
  * The chunk serving the dynamic region is circulated in the chunk slots
  * and available for dynamic allocation like any other chunk.
- *
- * RETURNS:
- * 0 on success, -errno on failure.
  */
-int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
-				  void *base_addr)
+void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+				   void *base_addr)
 {
 	size_t size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	size_t static_size, dyn_size;
@@ -2457,7 +2454,6 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	/* we're done */
 	pcpu_base_addr = base_addr;
-	return 0;
 }
 
 #ifdef CONFIG_SMP
@@ -2710,7 +2706,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	struct pcpu_alloc_info *ai;
 	size_t size_sum, areas_size;
 	unsigned long max_distance;
-	int group, i, highest_group, rc;
+	int group, i, highest_group, rc = 0;
 
 	ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
 				   cpu_distance_fn);
@@ -2795,7 +2791,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 		PFN_DOWN(size_sum), ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
-	rc = pcpu_setup_first_chunk(ai, base);
+	pcpu_setup_first_chunk(ai, base);
 	goto out_free;
 
 out_free_areas:
@@ -2839,7 +2835,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
-	int unit, i, j, rc;
+	int unit, i, j, rc = 0;
 	int upa;
 	int nr_g0_units;
 
@@ -2920,7 +2916,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 		unit_pages, psize_str, ai->static_size,
 		ai->reserved_size, ai->dyn_size);
 
-	rc = pcpu_setup_first_chunk(ai, vm.addr);
+	pcpu_setup_first_chunk(ai, vm.addr);
 	goto out_free_ar;
 
 enomem:
@@ -3014,8 +3010,7 @@ void __init setup_per_cpu_areas(void)
 	ai->groups[0].nr_units = 1;
 	ai->groups[0].cpu_map[0] = 0;
 
-	if (pcpu_setup_first_chunk(ai, fc) < 0)
-		panic("Failed to initialize percpu areas.");
+	pcpu_setup_first_chunk(ai, fc);
 	pcpu_free_alloc_info(ai);
 }
 
-- 
cgit v1.2.3


From 52db6685932e326ed607644ab7ebdae8c194adda Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 10 Jul 2019 16:59:55 +0900
Subject: ASoC: simple_card_utils.h: care NULL dai at asoc_simple_debug_dai()

props->xxx_dai might be NULL when DPCM.
This patch cares it for debug.

Fixes: commit 0580dde59438 ("ASoC: simple-card-utils: add asoc_simple_debug_info()")
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87o922gw4u.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/simple_card_utils.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 954563ee2277..985a5f583de4 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -141,6 +141,10 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
 {
 	struct device *dev = simple_priv_to_dev(priv);
 
+	/* dai might be NULL */
+	if (!dai)
+		return;
+
 	if (dai->name)
 		dev_dbg(dev, "%s dai name = %s\n",
 			name, dai->name);
-- 
cgit v1.2.3


From 22be8233b34f4f468934c5fefcbe6151766fb8f2 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Thu, 11 Jul 2019 04:53:25 -0400
Subject: media: videodev2.h: change V4L2_PIX_FMT_BGRA444 define: fourcc was
 already in use

The V4L2_PIX_FMT_BGRA444 define clashed with the pre-existing V4L2_PIX_FMT_SGRBG12
which strangely enough used the same fourcc, even though that fourcc made no sense
for a Bayer format. In any case, you can't have duplicates, so change the fourcc of
V4L2_PIX_FMT_BGRA444.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>      # for v5.2 and up
Fixes: 6c84f9b1d2900 ("media: v4l: Add definitions for missing 16-bit RGB4444 formats")
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/uapi/linux/videodev2.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9d9705ceda76..2427bc4d8eba 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -518,7 +518,13 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_RGBX444 v4l2_fourcc('R', 'X', '1', '2') /* 16  rrrrgggg bbbbxxxx */
 #define V4L2_PIX_FMT_ABGR444 v4l2_fourcc('A', 'B', '1', '2') /* 16  aaaabbbb ggggrrrr */
 #define V4L2_PIX_FMT_XBGR444 v4l2_fourcc('X', 'B', '1', '2') /* 16  xxxxbbbb ggggrrrr */
-#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('B', 'A', '1', '2') /* 16  bbbbgggg rrrraaaa */
+
+/*
+ * Originally this had 'BA12' as fourcc, but this clashed with the older
+ * V4L2_PIX_FMT_SGRBG12 which inexplicably used that same fourcc.
+ * So use 'GA12' instead for V4L2_PIX_FMT_BGRA444.
+ */
+#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('G', 'A', '1', '2') /* 16  bbbbgggg rrrraaaa */
 #define V4L2_PIX_FMT_BGRX444 v4l2_fourcc('B', 'X', '1', '2') /* 16  bbbbgggg rrrrxxxx */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
 #define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16  ARGB-1-5-5-5  */
-- 
cgit v1.2.3


From 9747f0c2fb9e1a6d7c907d34a373924c84093afa Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Thu, 11 Jul 2019 10:30:59 -0700
Subject: drm/i915/tgl: Add TGL PCI IDs

Current list of PCI IDs for Tiger Lake.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Mika Kahola <mika.kahola@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190711173115.28296-6-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/i915_pci.c |  1 +
 include/drm/i915_pciids.h       | 10 ++++++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index da926485845d..e83c94cf2744 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -865,6 +865,7 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_CNL_IDS(&intel_cannonlake_info),
 	INTEL_ICL_11_IDS(&intel_icelake_11_info),
 	INTEL_EHL_IDS(&intel_elkhartlake_info),
+	INTEL_TGL_12_IDS(&intel_tigerlake_12_info),
 	{0, 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, pciidlist);
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index 6c342ac470c8..a70c982ddff9 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -583,4 +583,14 @@
 	INTEL_VGA_DEVICE(0x4551, info), \
 	INTEL_VGA_DEVICE(0x4541, info)
 
+/* TGL */
+#define INTEL_TGL_12_IDS(info) \
+	INTEL_VGA_DEVICE(0x9A49, info), \
+	INTEL_VGA_DEVICE(0x9A40, info), \
+	INTEL_VGA_DEVICE(0x9A59, info), \
+	INTEL_VGA_DEVICE(0x9A60, info), \
+	INTEL_VGA_DEVICE(0x9A68, info), \
+	INTEL_VGA_DEVICE(0x9A70, info), \
+	INTEL_VGA_DEVICE(0x9A78, info)
+
 #endif /* _I915_PCIIDS_H */
-- 
cgit v1.2.3


From 6c8337dafaa9a328216c62a79c9a03176af3ce70 Mon Sep 17 00:00:00 2001
From: Vandita Kulkarni <vandita.kulkarni@intel.com>
Date: Thu, 11 Jul 2019 10:31:06 -0700
Subject: drm/i915/tgl: Add additional ports for Tiger Lake
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are 2 new additional typeC ports in Tiger Lake and PORT-C is now a
combophy port. This results in 6 typeC ports and 3 combophy ports.
These 6 TC ports can be DP alternate mode, DP over thunderbolt, native
DP on legacy DP connector or native HDMI on legacy connector.

v2: Rebase on new modular FIA code (Lucas)
v3: Also add new port in port_identifier(), even though it can't
    possibly be used there (requested by José)
v4: Add conversion port->tc_port in helper function after introction of
    phy namespace (Lucas)

Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Signed-off-by: Vandita Kulkarni <vandita.kulkarni@intel.com>
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190711173115.28296-13-lucas.demarchi@intel.com
---
 drivers/gpu/drm/i915/display/intel_ddi.c     | 12 ++++++++++++
 drivers/gpu/drm/i915/display/intel_display.c |  3 +++
 drivers/gpu/drm/i915/display/intel_display.h |  8 ++++++++
 include/drm/i915_component.h                 |  2 +-
 include/drm/i915_drm.h                       |  3 +++
 5 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index 1662e5c2be1c..8445244aa593 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -4286,6 +4286,18 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 		intel_dig_port->ddi_io_power_domain =
 			POWER_DOMAIN_PORT_DDI_F_IO;
 		break;
+	case PORT_G:
+		intel_dig_port->ddi_io_power_domain =
+			POWER_DOMAIN_PORT_DDI_G_IO;
+		break;
+	case PORT_H:
+		intel_dig_port->ddi_io_power_domain =
+			POWER_DOMAIN_PORT_DDI_H_IO;
+		break;
+	case PORT_I:
+		intel_dig_port->ddi_io_power_domain =
+			POWER_DOMAIN_PORT_DDI_I_IO;
+		break;
 	default:
 		MISSING_CASE(port);
 	}
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index c13784b81fb1..13ff4177beba 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6706,6 +6706,9 @@ enum tc_port intel_port_to_tc(struct drm_i915_private *dev_priv, enum port port)
 	if (!intel_phy_is_tc(dev_priv, intel_port_to_phy(dev_priv, port)))
 		return PORT_TC_NONE;
 
+	if (INTEL_GEN(dev_priv) >= 12)
+		return port - PORT_D;
+
 	return port - PORT_C;
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index 1f75b0a627fd..72ce27079a56 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -177,6 +177,12 @@ static inline const char *port_identifier(enum port port)
 		return "Port E";
 	case PORT_F:
 		return "Port F";
+	case PORT_G:
+		return "Port G";
+	case PORT_H:
+		return "Port H";
+	case PORT_I:
+		return "Port I";
 	default:
 		return "<invalid>";
 	}
@@ -189,6 +195,8 @@ enum tc_port {
 	PORT_TC2,
 	PORT_TC3,
 	PORT_TC4,
+	PORT_TC5,
+	PORT_TC6,
 
 	I915_MAX_TC_PORTS
 };
diff --git a/include/drm/i915_component.h b/include/drm/i915_component.h
index dcb95bd9dee6..55c3b123581b 100644
--- a/include/drm/i915_component.h
+++ b/include/drm/i915_component.h
@@ -34,7 +34,7 @@ enum i915_component_type {
 /* MAX_PORT is the number of port
  * It must be sync with I915_MAX_PORTS defined i915_drv.h
  */
-#define MAX_PORTS 6
+#define MAX_PORTS 9
 
 /**
  * struct i915_audio_component - Used for direct communication between i915 and hda drivers
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 7523e9a7b6e2..eb30062359d1 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -109,6 +109,9 @@ enum port {
 	PORT_D,
 	PORT_E,
 	PORT_F,
+	PORT_G,
+	PORT_H,
+	PORT_I,
 
 	I915_MAX_PORTS
 };
-- 
cgit v1.2.3


From b5893ffc274be966f95aa35f35916fa8725af154 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Fri, 12 Jul 2019 13:24:25 +0200
Subject: drm/i915: Drop extern qualifiers from header function prototypes

Follow dim checkpatch recommendation so it doesn't complain on that now
and again on header file modifications.

v2: drop testing leftover (Chris)

Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190712112429.740-2-janusz.krzysztofik@linux.intel.com
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h |  2 +-
 drivers/gpu/drm/i915/gvt/gtt.h             | 13 +++++----
 drivers/gpu/drm/i915/i915_drv.h            | 47 ++++++++++++++----------------
 drivers/gpu/drm/i915/i915_irq.h            |  4 +--
 drivers/gpu/drm/i915/oa/i915_oa_bdw.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_bxt.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_chv.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_cnl.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_glk.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_hsw.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_icl.h      |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h   |  2 +-
 drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h   |  2 +-
 include/drm/i915_drm.h                     | 10 +++----
 19 files changed, 51 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 20754c15412a..67aea07ea019 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -81,7 +81,7 @@ i915_gem_object_lookup(struct drm_file *file, u32 handle)
 }
 
 __deprecated
-extern struct drm_gem_object *
+struct drm_gem_object *
 drm_gem_object_lookup(struct drm_file *file, u32 handle);
 
 __attribute__((nonnull))
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index 42d0394f0de2..88789316807d 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -205,17 +205,18 @@ struct intel_vgpu_gtt {
 	struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
 };
 
-extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
-extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu);
+int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
+void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu);
 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
 void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
 
-extern int intel_gvt_init_gtt(struct intel_gvt *gvt);
+int intel_gvt_init_gtt(struct intel_gvt *gvt);
 void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
-extern void intel_gvt_clean_gtt(struct intel_gvt *gvt);
+void intel_gvt_clean_gtt(struct intel_gvt *gvt);
 
-extern struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
-		int page_table_level, void *root_entry);
+struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
+					      int page_table_level,
+					      void *root_entry);
 
 struct intel_vgpu_oos_page {
 	struct intel_vgpu_ppgtt_spt *spt;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 56527a7a1666..1a6b4e14a405 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2390,19 +2390,17 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level,
 	__i915_printk(dev_priv, KERN_ERR, fmt, ##__VA_ARGS__)
 
 #ifdef CONFIG_COMPAT
-extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
-			      unsigned long arg);
+long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
 #else
 #define i915_compat_ioctl NULL
 #endif
 extern const struct dev_pm_ops i915_pm_ops;
 
-extern int i915_driver_load(struct pci_dev *pdev,
-			    const struct pci_device_id *ent);
-extern void i915_driver_unload(struct drm_device *dev);
+int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent);
+void i915_driver_unload(struct drm_device *dev);
 
-extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
-extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
+void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
+void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv);
@@ -2672,14 +2670,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 			    bool is_master);
 
 /* i915_perf.c */
-extern void i915_perf_init(struct drm_i915_private *dev_priv);
-extern void i915_perf_fini(struct drm_i915_private *dev_priv);
-extern void i915_perf_register(struct drm_i915_private *dev_priv);
-extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
+void i915_perf_init(struct drm_i915_private *dev_priv);
+void i915_perf_fini(struct drm_i915_private *dev_priv);
+void i915_perf_register(struct drm_i915_private *dev_priv);
+void i915_perf_unregister(struct drm_i915_private *dev_priv);
 
 /* i915_suspend.c */
-extern int i915_save_state(struct drm_i915_private *dev_priv);
-extern int i915_restore_state(struct drm_i915_private *dev_priv);
+int i915_save_state(struct drm_i915_private *dev_priv);
+int i915_restore_state(struct drm_i915_private *dev_priv);
 
 /* i915_sysfs.c */
 void i915_setup_sysfs(struct drm_i915_private *dev_priv);
@@ -2693,23 +2691,22 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 }
 
 /* modesetting */
-extern void intel_modeset_init_hw(struct drm_device *dev);
-extern int intel_modeset_init(struct drm_device *dev);
-extern void intel_modeset_cleanup(struct drm_device *dev);
-extern int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv,
-				       bool state);
-extern void intel_display_resume(struct drm_device *dev);
-extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
-extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
-extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
+void intel_modeset_init_hw(struct drm_device *dev);
+int intel_modeset_init(struct drm_device *dev);
+void intel_modeset_cleanup(struct drm_device *dev);
+int intel_modeset_vga_set_state(struct drm_i915_private *dev_priv, bool state);
+void intel_display_resume(struct drm_device *dev);
+void i915_redisable_vga(struct drm_i915_private *dev_priv);
+void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
+void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
 
 int i915_reg_read_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file);
 
-extern struct intel_display_error_state *
+struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_i915_private *dev_priv);
-extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
-					    struct intel_display_error_state *error);
+void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
+				     struct intel_display_error_state *error);
 
 #define __I915_REG_OP(op__, dev_priv__, ...) \
 	intel_uncore_##op__(&(dev_priv__)->uncore, __VA_ARGS__)
diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h
index d93fa4e75442..4f803f910177 100644
--- a/drivers/gpu/drm/i915/i915_irq.h
+++ b/drivers/gpu/drm/i915/i915_irq.h
@@ -13,8 +13,8 @@
 struct drm_i915_private;
 struct intel_crtc;
 
-extern void intel_irq_init(struct drm_i915_private *dev_priv);
-extern void intel_irq_fini(struct drm_i915_private *dev_priv);
+void intel_irq_init(struct drm_i915_private *dev_priv);
+void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bdw.h b/drivers/gpu/drm/i915/oa/i915_oa_bdw.h
index 0e667f1a8aa1..b5ed68882588 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_bdw.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_bdw.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_BDW_H__
 #define __I915_OA_BDW_H__
 
-extern void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_bdw(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_bxt.h b/drivers/gpu/drm/i915/oa/i915_oa_bxt.h
index 679e92cf4f1d..43c3e4ab030a 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_bxt.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_bxt.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_BXT_H__
 #define __I915_OA_BXT_H__
 
-extern void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_bxt(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h
index 4d6025559bbe..1b4b563bc585 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt2.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_CFLGT2_H__
 #define __I915_OA_CFLGT2_H__
 
-extern void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_cflgt2(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h
index 0697f4077402..500565e055cd 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_cflgt3.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_CFLGT3_H__
 #define __I915_OA_CFLGT3_H__
 
-extern void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_cflgt3(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_chv.h b/drivers/gpu/drm/i915/oa/i915_oa_chv.h
index 0986eae3135f..ad85d6a6a573 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_chv.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_chv.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_CHV_H__
 #define __I915_OA_CHV_H__
 
-extern void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_chv(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_cnl.h b/drivers/gpu/drm/i915/oa/i915_oa_cnl.h
index e830a406aff2..9faaca38b587 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_cnl.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_cnl.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_CNL_H__
 #define __I915_OA_CNL_H__
 
-extern void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_cnl(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_glk.h b/drivers/gpu/drm/i915/oa/i915_oa_glk.h
index 06dedf991edb..cc13a1e9fd3e 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_glk.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_glk.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_GLK_H__
 #define __I915_OA_GLK_H__
 
-extern void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_glk(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_hsw.h b/drivers/gpu/drm/i915/oa/i915_oa_hsw.h
index 3d0c870cd0bd..f0ddcc79c761 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_hsw.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_hsw.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_HSW_H__
 #define __I915_OA_HSW_H__
 
-extern void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_hsw(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_icl.h b/drivers/gpu/drm/i915/oa/i915_oa_icl.h
index 24eaa97d61ba..e501651d385b 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_icl.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_icl.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_ICL_H__
 #define __I915_OA_ICL_H__
 
-extern void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h
index a55398a904de..dc460e6e0fae 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt2.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_KBLGT2_H__
 #define __I915_OA_KBLGT2_H__
 
-extern void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_kblgt2(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h
index 3ddd3483b7cc..5926992b735a 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_kblgt3.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_KBLGT3_H__
 #define __I915_OA_KBLGT3_H__
 
-extern void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_kblgt3(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h
index be6256037239..353db35b36c1 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt2.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_SKLGT2_H__
 #define __I915_OA_SKLGT2_H__
 
-extern void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_sklgt2(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h
index 650beb068e56..52f94c674b62 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt3.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_SKLGT3_H__
 #define __I915_OA_SKLGT3_H__
 
-extern void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_sklgt3(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h
index 8dcf849d131e..8e364820cc63 100644
--- a/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h
+++ b/drivers/gpu/drm/i915/oa/i915_oa_sklgt4.h
@@ -10,6 +10,6 @@
 #ifndef __I915_OA_SKLGT4_H__
 #define __I915_OA_SKLGT4_H__
 
-extern void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv);
+void i915_perf_load_test_config_sklgt4(struct drm_i915_private *dev_priv);
 
 #endif
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index eb30062359d1..23274cf92712 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -30,11 +30,11 @@
 #include <uapi/drm/i915_drm.h>
 
 /* For use by IPS driver */
-extern unsigned long i915_read_mch_val(void);
-extern bool i915_gpu_raise(void);
-extern bool i915_gpu_lower(void);
-extern bool i915_gpu_busy(void);
-extern bool i915_gpu_turbo_disable(void);
+unsigned long i915_read_mch_val(void);
+bool i915_gpu_raise(void);
+bool i915_gpu_lower(void);
+bool i915_gpu_busy(void);
+bool i915_gpu_turbo_disable(void);
 
 /* Exported from arch/x86/kernel/early-quirks.c */
 extern struct resource intel_graphics_stolen_res;
-- 
cgit v1.2.3


From 3c00fb0bf0e0f061715c04ad609de93ddc046aa1 Mon Sep 17 00:00:00 2001
From: xiao ruizhu <katrina.xiaorz@gmail.com>
Date: Thu, 4 Jul 2019 11:31:13 +0800
Subject: netfilter: nf_conntrack_sip: fix expectation clash

When conntracks change during a dialog, SDP messages may be sent from
different conntracks to establish expects with identical tuples. In this
case expects conflict may be detected for the 2nd SDP message and end up
with a process failure.

The fixing here is to reuse an existing expect who has the same tuple for a
different conntrack if any.

Here are two scenarios for the case.

1)
         SERVER                   CPE

           |      INVITE SDP       |
      5060 |<----------------------|5060
           |      100 Trying       |
      5060 |---------------------->|5060
           |      183 SDP          |
      5060 |---------------------->|5060    ===> Conntrack 1
           |       PRACK           |
     50601 |<----------------------|5060
           |    200 OK (PRACK)     |
     50601 |---------------------->|5060
           |    200 OK (INVITE)    |
      5060 |---------------------->|5060
           |        ACK            |
     50601 |<----------------------|5060
           |                       |
           |<--- RTP stream ------>|
           |                       |
           |    INVITE SDP (t38)   |
     50601 |---------------------->|5060    ===> Conntrack 2

With a certain configuration in the CPE, SIP messages "183 with SDP" and
"re-INVITE with SDP t38" will go through the sip helper to create
expects for RTP and RTCP.

It is okay to create RTP and RTCP expects for "183", whose master
connection source port is 5060, and destination port is 5060.

In the "183" message, port in Contact header changes to 50601 (from the
original 5060). So the following requests e.g. PRACK and ACK are sent to
port 50601. It is a different conntrack (let call Conntrack 2) from the
original INVITE (let call Conntrack 1) due to the port difference.

In this example, after the call is established, there is RTP stream but no
RTCP stream for Conntrack 1, so the RTP expect created upon "183" is
cleared, and RTCP expect created for Conntrack 1 retains.

When "re-INVITE with SDP t38" arrives to create RTP&RTCP expects, current
ALG implementation will call nf_ct_expect_related() for RTP and RTCP. The
expects tuples are identical to those for Conntrack 1. RTP expect for
Conntrack 2 succeeds in creation as the one for Conntrack 1 has been
removed. RTCP expect for Conntrack 2 fails in creation because it has
idential tuples and 'conflict' with the one retained for Conntrack 1. And
then result in a failure in processing of the re-INVITE.

2)

    SERVER A                 CPE

       |      REGISTER     |
  5060 |<------------------| 5060  ==> CT1
       |       200         |
  5060 |------------------>| 5060
       |                   |
       |   INVITE SDP(1)   |
  5060 |<------------------| 5060
       | 300(multi choice) |
  5060 |------------------>| 5060                    SERVER B
       |       ACK         |
  5060 |<------------------| 5060
                                  |    INVITE SDP(2)    |
                             5060 |-------------------->| 5060  ==> CT2
                                  |       100           |
                             5060 |<--------------------| 5060
                                  | 200(contact changes)|
                             5060 |<--------------------| 5060
                                  |       ACK           |
                             5060 |-------------------->| 50601 ==> CT3
                                  |                     |
                                  |<--- RTP stream ---->|
                                  |                     |
                                  |       BYE           |
                             5060 |<--------------------| 50601
                                  |       200           |
                             5060 |-------------------->| 50601
       |   INVITE SDP(3)   |
  5060 |<------------------| 5060  ==> CT1

CPE sends an INVITE request(1) to Server A, and creates a RTP&RTCP expect
pair for this Conntrack 1 (CT1). Server A responds 300 to redirect to
Server B. The RTP&RTCP expect pairs created on CT1 are removed upon 300
response.

CPE sends the INVITE request(2) to Server B, and creates an expect pair
for the new conntrack (due to destination address difference), let call
CT2. Server B changes the port to 50601 in 200 OK response, and the
following requests ACK and BYE from CPE are sent to 50601. The call is
established. There is RTP stream and no RTCP stream. So RTP expect is
removed and RTCP expect for CT2 retains.

As BYE request is sent from port 50601, it is another conntrack, let call
CT3, different from CT2 due to the port difference. So the BYE request will
not remove the RTCP expect for CT2.

Then another outgoing call is made, with the same RTP port being used (not
definitely but possibly). CPE firstly sends the INVITE request(3) to Server
A, and tries to create a RTP&RTCP expect pairs for this CT1. In current ALG
implementation, the RTCP expect for CT1 fails in creation because it
'conflicts' with the residual one for CT2. As a result the INVITE request
fails to send.

Signed-off-by: xiao ruizhu <katrina.xiaorz@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_expect.h | 12 +++++++++---
 net/ipv4/netfilter/nf_nat_h323.c            | 12 ++++++------
 net/netfilter/ipvs/ip_vs_nfct.c             |  2 +-
 net/netfilter/nf_conntrack_amanda.c         |  2 +-
 net/netfilter/nf_conntrack_broadcast.c      |  2 +-
 net/netfilter/nf_conntrack_expect.c         | 26 +++++++++++++++++++-------
 net/netfilter/nf_conntrack_ftp.c            |  2 +-
 net/netfilter/nf_conntrack_h323_main.c      | 18 +++++++++---------
 net/netfilter/nf_conntrack_irc.c            |  2 +-
 net/netfilter/nf_conntrack_netlink.c        |  4 ++--
 net/netfilter/nf_conntrack_pptp.c           |  4 ++--
 net/netfilter/nf_conntrack_sane.c           |  2 +-
 net/netfilter/nf_conntrack_sip.c            | 10 +++++++---
 net/netfilter/nf_conntrack_tftp.c           |  2 +-
 net/netfilter/nf_nat_amanda.c               |  2 +-
 net/netfilter/nf_nat_ftp.c                  |  2 +-
 net/netfilter/nf_nat_irc.c                  |  2 +-
 net/netfilter/nf_nat_sip.c                  |  8 +++++---
 net/netfilter/nf_nat_tftp.c                 |  2 +-
 net/netfilter/nft_ct.c                      |  2 +-
 20 files changed, 71 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 93ce6b0daaba..573429be4d59 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -76,6 +76,11 @@ struct nf_conntrack_expect_policy {
 #define NF_CT_EXPECT_CLASS_DEFAULT	0
 #define NF_CT_EXPECT_MAX_CNT		255
 
+/* Allow to reuse expectations with the same tuples from different master
+ * conntracks.
+ */
+#define NF_CT_EXP_F_SKIP_MASTER	0x1
+
 int nf_conntrack_expect_pernet_init(struct net *net);
 void nf_conntrack_expect_pernet_fini(struct net *net);
 
@@ -122,10 +127,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
 		       u_int8_t, const __be16 *, const __be16 *);
 void nf_ct_expect_put(struct nf_conntrack_expect *exp);
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
-				u32 portid, int report);
-static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+				u32 portid, int report, unsigned int flags);
+static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
 {
-	return nf_ct_expect_related_report(expect, 0, 0);
+	return nf_ct_expect_related_report(expect, 0, 0, flags);
 }
 
 #endif /*_NF_CONNTRACK_EXPECT_H*/
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 87b711fd5a44..3e2685c120c7 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -221,11 +221,11 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp, 0);
 		if (ret == 0) {
 			rtcp_exp->tuple.dst.u.udp.port =
 			    htons(nated_port + 1);
-			ret = nf_ct_expect_related(rtcp_exp);
+			ret = nf_ct_expect_related(rtcp_exp, 0);
 			if (ret == 0)
 				break;
 			else if (ret == -EBUSY) {
@@ -296,7 +296,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -352,7 +352,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -444,7 +444,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -537,7 +537,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(nated_port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 403541996952..08adcb222986 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -231,7 +231,7 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
 
 	IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
 		      __func__, ct, ARG_TUPLE(&exp->tuple));
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 }
 EXPORT_SYMBOL(ip_vs_nfct_expect_related);
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 42ee659d0d1e..d011d2eb0848 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -159,7 +159,7 @@ static int amanda_help(struct sk_buff *skb,
 		if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_amanda(skb, ctinfo, protoff,
 					    off - dataoff, len, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 921a7b95be68..1ba6becc3079 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -68,7 +68,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 	exp->class		  = NF_CT_EXPECT_CLASS_DEFAULT;
 	exp->helper               = NULL;
 
-	nf_ct_expect_related(exp);
+	nf_ct_expect_related(exp, 0);
 	nf_ct_expect_put(exp);
 
 	nf_ct_refresh(ct, skb, timeout * HZ);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index ffd1f4906c4f..65364de915d1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -249,13 +249,22 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
 static inline int expect_matches(const struct nf_conntrack_expect *a,
 				 const struct nf_conntrack_expect *b)
 {
-	return a->master == b->master &&
-	       nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+	return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
 	       nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
 	       net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
 	       nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
 }
 
+static bool master_matches(const struct nf_conntrack_expect *a,
+			   const struct nf_conntrack_expect *b,
+			   unsigned int flags)
+{
+	if (flags & NF_CT_EXP_F_SKIP_MASTER)
+		return true;
+
+	return a->master == b->master;
+}
+
 /* Generally a bad idea to call this: could have matched already. */
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 {
@@ -399,7 +408,8 @@ static void evict_oldest_expect(struct nf_conn *master,
 		nf_ct_remove_expect(last);
 }
 
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+				       unsigned int flags)
 {
 	const struct nf_conntrack_expect_policy *p;
 	struct nf_conntrack_expect *i;
@@ -417,8 +427,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	}
 	h = nf_ct_expect_dst_hash(net, &expect->tuple);
 	hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
-		if (expect_matches(i, expect)) {
-			if (i->class != expect->class)
+		if (master_matches(i, expect, flags) &&
+		    expect_matches(i, expect)) {
+			if (i->class != expect->class ||
+			    i->master != expect->master)
 				return -EALREADY;
 
 			if (nf_ct_remove_expect(i))
@@ -453,12 +465,12 @@ out:
 }
 
 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
-				u32 portid, int report)
+				u32 portid, int report, unsigned int flags)
 {
 	int ret;
 
 	spin_lock_bh(&nf_conntrack_expect_lock);
-	ret = __nf_ct_expect_check(expect);
+	ret = __nf_ct_expect_check(expect, flags);
 	if (ret < 0)
 		goto out;
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 8c6c11bab5b6..0ecb3e289ef2 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -525,7 +525,7 @@ skip_nl_seq:
 				 protoff, matchoff, matchlen, exp);
 	else {
 		/* Can't expect this?  Best to drop packet now. */
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 6497e5fc0871..8ba037b76ad3 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -305,8 +305,8 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
 				   taddr, port, rtp_port, rtp_exp, rtcp_exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(rtp_exp) == 0) {
-			if (nf_ct_expect_related(rtcp_exp) == 0) {
+		if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+			if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
 				pr_debug("nf_ct_h323: expect RTP ");
 				nf_ct_dump_tuple(&rtp_exp->tuple);
 				pr_debug("nf_ct_h323: expect RTCP ");
@@ -364,7 +364,7 @@ static int expect_t120(struct sk_buff *skb,
 		ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_h323: expect T.120 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -701,7 +701,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
 			       port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect H.245 ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -825,7 +825,7 @@ static int expect_callforwarding(struct sk_buff *skb,
 					 protoff, data, dataoff,
 					 taddr, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_q931: expect Call Forwarding ");
 			nf_ct_dump_tuple(&exp->tuple);
 		} else
@@ -1284,7 +1284,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
 		ret = nat_q931(skb, ct, ctinfo, protoff, data,
 			       taddr, i, port, exp);
 	} else {		/* Conntrack only */
-		if (nf_ct_expect_related(exp) == 0) {
+		if (nf_ct_expect_related(exp, 0) == 0) {
 			pr_debug("nf_ct_ras: expect Q.931 ");
 			nf_ct_dump_tuple(&exp->tuple);
 
@@ -1349,7 +1349,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct,
 			  IPPROTO_UDP, NULL, &port);
 	exp->helper = nf_conntrack_helper_ras;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect RAS ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1561,7 +1561,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct,
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
@@ -1615,7 +1615,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct,
 	exp->flags = NF_CT_EXPECT_PERMANENT;
 	exp->helper = nf_conntrack_helper_q931;
 
-	if (nf_ct_expect_related(exp) == 0) {
+	if (nf_ct_expect_related(exp, 0) == 0) {
 		pr_debug("nf_ct_ras: expect Q.931 ");
 		nf_ct_dump_tuple(&exp->tuple);
 	} else
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7ac156f1f3bc..e40988a2f22f 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -213,7 +213,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
 						 addr_beg_p - ib_ptr,
 						 addr_end_p - addr_beg_p,
 						 exp);
-			else if (nf_ct_expect_related(exp) != 0) {
+			else if (nf_ct_expect_related(exp, 0) != 0) {
 				nf_ct_helper_log(skb, ct,
 						 "cannot add expectation");
 				ret = NF_DROP;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1b77444d5b52..6aa01eb6fe99 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2616,7 +2616,7 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
 	if (IS_ERR(exp))
 		return PTR_ERR(exp);
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 	return err;
 }
@@ -3367,7 +3367,7 @@ ctnetlink_create_expect(struct net *net,
 		goto err_rcu;
 	}
 
-	err = nf_ct_expect_related_report(exp, portid, report);
+	err = nf_ct_expect_related_report(exp, portid, report, 0);
 	nf_ct_expect_put(exp);
 err_rcu:
 	rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index b22042ad0fca..a971183f11af 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -234,9 +234,9 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid)
 	nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
 	if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
 		nf_nat_pptp_exp_gre(exp_orig, exp_reply);
-	if (nf_ct_expect_related(exp_orig) != 0)
+	if (nf_ct_expect_related(exp_orig, 0) != 0)
 		goto out_put_both;
-	if (nf_ct_expect_related(exp_reply) != 0)
+	if (nf_ct_expect_related(exp_reply, 0) != 0)
 		goto out_unexpect_orig;
 
 	/* Add GRE keymap entries */
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 81448c3db661..1aebd6569d4e 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -153,7 +153,7 @@ static int help(struct sk_buff *skb,
 	nf_ct_dump_tuple(&exp->tuple);
 
 	/* Can't expect this?  Best to drop packet now. */
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, ct, "cannot add expectation");
 		ret = NF_DROP;
 	}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 107251731809..b83dc9bf0a5d 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -977,11 +977,15 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
 		/* -EALREADY handling works around end-points that send
 		 * SDP messages with identical port but different media type,
 		 * we pretend expectation was set up.
+		 * It also works in the case that SDP messages are sent with
+		 * identical expect tuples but for different master conntracks.
 		 */
-		int errp = nf_ct_expect_related(rtp_exp);
+		int errp = nf_ct_expect_related(rtp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 		if (errp == 0 || errp == -EALREADY) {
-			int errcp = nf_ct_expect_related(rtcp_exp);
+			int errcp = nf_ct_expect_related(rtcp_exp,
+						NF_CT_EXP_F_SKIP_MASTER);
 
 			if (errcp == 0 || errcp == -EALREADY)
 				ret = NF_ACCEPT;
@@ -1296,7 +1300,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
 		ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
 				    exp, matchoff, matchlen);
 	else {
-		if (nf_ct_expect_related(exp) != 0) {
+		if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		} else
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index df6d6d61bd58..80ee53f29f68 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -78,7 +78,7 @@ static int tftp_help(struct sk_buff *skb,
 		nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
 		if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
 			ret = nf_nat_tftp(skb, ctinfo, exp);
-		else if (nf_ct_expect_related(exp) != 0) {
+		else if (nf_ct_expect_related(exp, 0) != 0) {
 			nf_ct_helper_log(skb, ct, "cannot add expectation");
 			ret = NF_DROP;
 		}
diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c
index a352604d6186..3bc7e0854efe 100644
--- a/net/netfilter/nf_nat_amanda.c
+++ b/net/netfilter/nf_nat_amanda.c
@@ -48,7 +48,7 @@ static unsigned int help(struct sk_buff *skb,
 		int res;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		res = nf_ct_expect_related(exp);
+		res = nf_ct_expect_related(exp, 0);
 		if (res == 0)
 			break;
 		else if (res != -EBUSY) {
diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c
index d48484a9d52d..aace6768a64e 100644
--- a/net/netfilter/nf_nat_ftp.c
+++ b/net/netfilter/nf_nat_ftp.c
@@ -91,7 +91,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index dfb7ef8845bd..c691ab8d234c 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb,
 		int ret;
 
 		exp->tuple.dst.u.tcp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, 0);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index e338d91980d8..f0a735e86851 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -414,7 +414,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
 		int ret;
 
 		exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(exp);
+		ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret != -EBUSY) {
@@ -607,7 +607,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 		int ret;
 
 		rtp_exp->tuple.dst.u.udp.port = htons(port);
-		ret = nf_ct_expect_related(rtp_exp);
+		ret = nf_ct_expect_related(rtp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == -EBUSY)
 			continue;
 		else if (ret < 0) {
@@ -615,7 +616,8 @@ static unsigned int nf_nat_sdp_media(struct sk_buff *skb, unsigned int protoff,
 			break;
 		}
 		rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
-		ret = nf_ct_expect_related(rtcp_exp);
+		ret = nf_ct_expect_related(rtcp_exp,
+					   NF_CT_EXP_F_SKIP_MASTER);
 		if (ret == 0)
 			break;
 		else if (ret == -EBUSY) {
diff --git a/net/netfilter/nf_nat_tftp.c b/net/netfilter/nf_nat_tftp.c
index 833a11f68031..1a591132d6eb 100644
--- a/net/netfilter/nf_nat_tftp.c
+++ b/net/netfilter/nf_nat_tftp.c
@@ -30,7 +30,7 @@ static unsigned int help(struct sk_buff *skb,
 		= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
 	exp->dir = IP_CT_DIR_REPLY;
 	exp->expectfn = nf_nat_follow_master;
-	if (nf_ct_expect_related(exp) != 0) {
+	if (nf_ct_expect_related(exp, 0) != 0) {
 		nf_ct_helper_log(skb, exp->master, "cannot add expectation");
 		return NF_DROP;
 	}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 827ab6196df9..46ca8bcca1bd 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1252,7 +1252,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
 		          priv->l4proto, NULL, &priv->dport);
 	exp->timeout.expires = jiffies + priv->timeout * HZ;
 
-	if (nf_ct_expect_related(exp) != 0)
+	if (nf_ct_expect_related(exp, 0) != 0)
 		regs->verdict.code = NF_DROP;
 }
 
-- 
cgit v1.2.3


From 05ba4c895363db795f3d54f2da0de56d6520e52d Mon Sep 17 00:00:00 2001
From: Yonatan Goldschmidt <yon.goldschmidt@gmail.com>
Date: Mon, 8 Jul 2019 15:57:09 -0700
Subject: netfilter: Update obsolete comments referring to ip_conntrack

In 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") the new
generic nf_conntrack was introduced, and it came to supersede the old
ip_conntrack.

This change updates (some) of the obsolete comments referring to old
file/function names of the ip_conntrack mechanism, as well as removes a
few self-referencing comments that we shouldn't maintain anymore.

I did not update any comments referring to historical actions (e.g,
comments like "this file was derived from ..." were left untouched, even
if the referenced file is no longer here).

Signed-off-by: Yonatan Goldschmidt <yon.goldschmidt@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_h323_asn1.h | 3 +--
 net/ipv4/netfilter/ipt_CLUSTERIP.c               | 4 ++--
 net/netfilter/Kconfig                            | 6 ++----
 net/netfilter/nf_conntrack_core.c                | 4 +---
 net/netfilter/nf_conntrack_h323_asn1.c           | 5 ++---
 net/netfilter/nf_conntrack_proto_gre.c           | 2 --
 net/netfilter/nf_conntrack_proto_icmp.c          | 2 +-
 net/netfilter/nf_nat_core.c                      | 2 +-
 8 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h
index 91d6275292a5..19df78341fb3 100644
--- a/include/linux/netfilter/nf_conntrack_h323_asn1.h
+++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /****************************************************************************
- * ip_conntrack_h323_asn1.h - BER and PER decoding library for H.323
- * 			      conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
  *
  * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4d6bf7ac0792..6bdb1ab8af61 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -416,8 +416,8 @@ clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	     ctinfo == IP_CT_RELATED_REPLY))
 		return XT_CONTINUE;
 
-	/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
-	 * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+	/* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+	 * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
 	 * on, which all have an ID field [relevant for hashing]. */
 
 	hash = clusterip_hashfn(skb, cipinfo->config);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 32a45c03786e..0d65f4d39494 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -223,8 +223,6 @@ config NF_CONNTRACK_FTP
 	  of Network Address Translation on them.
 
 	  This is FTP support on Layer 3 independent connection tracking.
-	  Layer 3 independent connection tracking is experimental scheme
-	  which generalize ip_conntrack to support other layer 3 protocols.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
@@ -338,7 +336,7 @@ config NF_CONNTRACK_SIP
 	help
 	  SIP is an application-layer control protocol that can establish,
 	  modify, and terminate multimedia sessions (conferences) such as
-	  Internet telephony calls. With the ip_conntrack_sip and
+	  Internet telephony calls. With the nf_conntrack_sip and
 	  the nf_nat_sip modules you can support the protocol on a connection
 	  tracking/NATing firewall.
 
@@ -1313,7 +1311,7 @@ config NETFILTER_XT_MATCH_HELPER
 	depends on NETFILTER_ADVANCED
 	help
 	  Helper matching allows you to match packets in dynamic connections
-	  tracked by a conntrack-helper, ie. ip_conntrack_ftp
+	  tracked by a conntrack-helper, ie. nf_conntrack_ftp
 
 	  To compile it as a module, choose M here.  If unsure, say Y.
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index bdfeacee0817..a542761e90d1 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1817,9 +1817,7 @@ EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
 #include <linux/netfilter/nfnetlink_conntrack.h>
 #include <linux/mutex.h>
 
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple)
 {
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index 8f6ba8162f0b..573cb4481481 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -1,11 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * 			      	     conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
  *
  * Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  *
- * See ip_conntrack_helper_h323_asn1.h for details.
+ * See nf_conntrack_helper_h323_asn1.h for details.
  */
 
 #ifdef __KERNEL__
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index c2eb365f1723..5b05487a60d2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
  * Connection tracking protocol helper module for GRE.
  *
  * GRE is a generic encapsulation protocol, which is generally not very
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index dd53e2b20f6b..097deba7441a 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -215,7 +215,7 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
 		return -NF_ACCEPT;
 	}
 
-	/* See ip_conntrack_proto_tcp.c */
+	/* See nf_conntrack_proto_tcp.c */
 	if (state->net->ct.sysctl_checksum &&
 	    state->hook == NF_INET_PRE_ROUTING &&
 	    nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 9ab410455992..3f6023ed4966 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -519,7 +519,7 @@ another_round:
  * and NF_INET_LOCAL_OUT, we change the destination to map into the
  * range. It might not be possible to get a unique tuple, but we try.
  * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
-- 
cgit v1.2.3


From b83329fb473f29d34d85d642e3a3313bb2871fa9 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 10 Jul 2019 12:05:57 +0200
Subject: netfilter: synproxy: fix erroneous tcp mss option

Now synproxy sends the mss value set by the user on client syn-ack packet
instead of the mss value that client announced.

Fixes: 48b1de4c110a ("netfilter: add SYNPROXY core/target")
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h | 1 +
 net/ipv4/netfilter/ipt_SYNPROXY.c             | 2 ++
 net/ipv6/netfilter/ip6t_SYNPROXY.c            | 2 ++
 net/netfilter/nf_synproxy_core.c              | 4 ++--
 net/netfilter/nft_synproxy.c                  | 2 ++
 5 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 8f00125b06f4..44513b93bd55 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -68,6 +68,7 @@ struct synproxy_options {
 	u8				options;
 	u8				wscale;
 	u16				mss;
+	u16				mss_encode;
 	u32				tsval;
 	u32				tsecr;
 };
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 8e7f84ec783d..0e70f3f65f6f 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss;
+		opts.mss = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index e77ea1ed5edd..5cdb4a69d277 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,6 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
+		opts.mss_encode = opts.mss;
+		opts.mss = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index b101f187eda8..09718e5a9e41 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -470,7 +470,7 @@ synproxy_send_client_synack(struct net *net,
 	struct iphdr *iph, *niph;
 	struct tcphdr *nth;
 	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
+	u16 mss = opts->mss_encode;
 
 	iph = ip_hdr(skb);
 
@@ -884,7 +884,7 @@ synproxy_send_client_synack_ipv6(struct net *net,
 	struct ipv6hdr *iph, *niph;
 	struct tcphdr *nth;
 	unsigned int tcp_hdr_size;
-	u16 mss = opts->mss;
+	u16 mss = opts->mss_encode;
 
 	iph = ipv6_hdr(skb);
 
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 80060ade8a5b..928e661d1517 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,6 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 		opts->options |= NF_SYNPROXY_OPT_ECN;
 
 	opts->options &= priv->info.options;
+	opts->mss_encode = opts->mss;
+	opts->mss = info->mss;
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
 		synproxy_init_timestamp_cookie(info, opts);
 	else
-- 
cgit v1.2.3


From 07b0fdecb2477396bcb69609019aade2b22124a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 16 Jul 2019 07:58:31 -0700
Subject: blkcg: allow blkcg_policy->pd_stat() to print non-debug info too

Currently, ->pd_stat() is called only when moduleparam
blkcg_debug_stats is set which prevents it from printing non-debug
policy-specific statistics.  Let's move debug testing down so that
->pd_stat() can print non-debug stat too.  This patch doesn't cause
any visible behavior change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 9 +++------
 block/blk-iolatency.c      | 3 +++
 include/linux/blk-cgroup.h | 1 +
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 24ed26957367..55a7dc227dfb 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -54,7 +54,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
 
 static LIST_HEAD(all_blkcgs);		/* protected by blkcg_pol_mutex */
 
-static bool blkcg_debug_stats = false;
+bool blkcg_debug_stats = false;
 static struct workqueue_struct *blkcg_punt_bio_wq;
 
 static bool blkcg_policy_enabled(struct request_queue *q,
@@ -944,10 +944,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 					 dbytes, dios);
 		}
 
-		if (!blkcg_debug_stats)
-			goto next;
-
-		if (atomic_read(&blkg->use_delay)) {
+		if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
 			has_stats = true;
 			off += scnprintf(buf+off, size-off,
 					 " use_delay=%d delay_nsec=%llu",
@@ -967,7 +964,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
 				has_stats = true;
 			off += written;
 		}
-next:
+
 		if (has_stats) {
 			if (off < size - 1) {
 				off += scnprintf(buf+off, size-off, "\n");
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index d973c38ee4fd..0fff7b56df0e 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -917,6 +917,9 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
 	unsigned long long avg_lat;
 	unsigned long long cur_win;
 
+	if (!blkcg_debug_stats)
+		return 0;
+
 	if (iolat->ssd)
 		return iolatency_ssd_stat(iolat, buf, size);
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 689a58231288..12811091fd50 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -181,6 +181,7 @@ struct blkcg_policy {
 
 extern struct blkcg blkcg_root;
 extern struct cgroup_subsys_state * const blkcg_root_css;
+extern bool blkcg_debug_stats;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
-- 
cgit v1.2.3


From e0aaa332e6a97dae57ad59cdb19e21f83c3d081c Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 15 Jul 2019 12:00:21 +0200
Subject: xfrm interface: ifname may be wrong in logs

The ifname is copied when the interface is created, but is never updated
later. In fact, this property is used only in one error message, where the
netdevice pointer is available, thus let's use it.

Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        |  1 -
 net/xfrm/xfrm_interface.c | 10 +---------
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index b22db30c3d88..ad761ef84797 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -983,7 +983,6 @@ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev);
 
 struct xfrm_if_parms {
-	char name[IFNAMSIZ];	/* name of XFRM device */
 	int link;		/* ifindex of underlying L2 interface */
 	u32 if_id;		/* interface identifyer */
 };
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 2310dc9e35c2..68336ee00d72 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -145,8 +145,6 @@ static int xfrmi_create(struct net_device *dev)
 	if (err < 0)
 		goto out;
 
-	strcpy(xi->p.name, dev->name);
-
 	dev_hold(dev);
 	xfrmi_link(xfrmn, xi);
 
@@ -294,7 +292,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	if (tdev == dev) {
 		stats->collisions++;
 		net_warn_ratelimited("%s: Local routing loop detected!\n",
-				     xi->p.name);
+				     dev->name);
 		goto tx_err_dst_release;
 	}
 
@@ -638,12 +636,6 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 	int err;
 
 	xfrmi_netlink_parms(data, &p);
-
-	if (!tb[IFLA_IFNAME])
-		return -EINVAL;
-
-	nla_strlcpy(p.name, tb[IFLA_IFNAME], IFNAMSIZ);
-
 	xi = xfrmi_locate(net, &p);
 	if (xi)
 		return -EEXIST;
-- 
cgit v1.2.3


From 22d6552f827ef76ade3edf6bbb3f05048a0a7d8b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 15 Jul 2019 12:00:23 +0200
Subject: xfrm interface: fix management of phydev

With the current implementation, phydev cannot be removed:

$ ip link add dummy type dummy
$ ip link add xfrm1 type xfrm dev dummy if_id 1
$ ip l d dummy
 kernel:[77938.465445] unregister_netdevice: waiting for dummy to become free. Usage count = 1

Manage it like in ip tunnels, ie just keep the ifindex. Not that the side
effect, is that the phydev is now optional.

Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Tested-by: Julien Floret <julien.floret@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h        |  1 -
 net/xfrm/xfrm_interface.c | 32 +++++++++++++++++---------------
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index ad761ef84797..aa08a7a5f6ac 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -990,7 +990,6 @@ struct xfrm_if_parms {
 struct xfrm_if {
 	struct xfrm_if __rcu *next;	/* next interface in list */
 	struct net_device *dev;		/* virtual device associated with interface */
-	struct net_device *phydev;	/* physical device */
 	struct net *net;		/* netns for packet i/o */
 	struct xfrm_if_parms p;		/* interface parms */
 
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 53e5e47b2c55..2ab4859df55a 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -175,7 +175,6 @@ static void xfrmi_dev_uninit(struct net_device *dev)
 	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 
 	xfrmi_unlink(xfrmn, xi);
-	dev_put(xi->phydev);
 	dev_put(dev);
 }
 
@@ -362,7 +361,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_err;
 	}
 
-	fl.flowi_oif = xi->phydev->ifindex;
+	fl.flowi_oif = xi->p.link;
 
 	ret = xfrmi_xmit2(skb, dev, &fl);
 	if (ret < 0)
@@ -548,7 +547,7 @@ static int xfrmi_get_iflink(const struct net_device *dev)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
 
-	return xi->phydev->ifindex;
+	return xi->p.link;
 }
 
 
@@ -574,12 +573,14 @@ static void xfrmi_dev_setup(struct net_device *dev)
 	dev->needs_free_netdev	= true;
 	dev->priv_destructor	= xfrmi_dev_free;
 	netif_keep_dst(dev);
+
+	eth_broadcast_addr(dev->broadcast);
 }
 
 static int xfrmi_dev_init(struct net_device *dev)
 {
 	struct xfrm_if *xi = netdev_priv(dev);
-	struct net_device *phydev = xi->phydev;
+	struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link);
 	int err;
 
 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
@@ -594,13 +595,19 @@ static int xfrmi_dev_init(struct net_device *dev)
 
 	dev->features |= NETIF_F_LLTX;
 
-	dev->needed_headroom = phydev->needed_headroom;
-	dev->needed_tailroom = phydev->needed_tailroom;
+	if (phydev) {
+		dev->needed_headroom = phydev->needed_headroom;
+		dev->needed_tailroom = phydev->needed_tailroom;
 
-	if (is_zero_ether_addr(dev->dev_addr))
-		eth_hw_addr_inherit(dev, phydev);
-	if (is_zero_ether_addr(dev->broadcast))
-		memcpy(dev->broadcast, phydev->broadcast, dev->addr_len);
+		if (is_zero_ether_addr(dev->dev_addr))
+			eth_hw_addr_inherit(dev, phydev);
+		if (is_zero_ether_addr(dev->broadcast))
+			memcpy(dev->broadcast, phydev->broadcast,
+			       dev->addr_len);
+	} else {
+		eth_hw_addr_random(dev);
+		eth_broadcast_addr(dev->broadcast);
+	}
 
 	return 0;
 }
@@ -644,13 +651,8 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 	xi->p = p;
 	xi->net = net;
 	xi->dev = dev;
-	xi->phydev = dev_get_by_index(net, p.link);
-	if (!xi->phydev)
-		return -ENODEV;
 
 	err = xfrmi_create(dev);
-	if (err < 0)
-		dev_put(xi->phydev);
 	return err;
 }
 
-- 
cgit v1.2.3


From 4d2e26a38fbcde2ba14882cbdb845caa1c17e19b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 10 Apr 2019 08:32:42 -0300
Subject: docs: powerpc: convert docs to ReST and rename to *.rst

Convert docs to ReST and add them to the arch-specific
book.

The conversion here was trivial, as almost every file there
was already using an elegant format close to ReST standard.

The changes were mostly to mark literal blocks and add a few
missing section title identifiers.

One note with regards to "--": on Sphinx, this can't be used
to identify a list, as it will format it badly. This can be
used, however, to identify a long hyphen - and "---" is an
even longer one.

At its new index.rst, let's add a :orphan: while this is not linked to
the main index.rst file, in order to avoid build warnings.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> # cxl
---
 Documentation/PCI/pci-error-recovery.rst           |   5 +-
 Documentation/index.rst                            |   1 +
 Documentation/powerpc/DAWR-POWER9.txt              |  90 ----
 Documentation/powerpc/bootwrapper.rst              | 155 ++++++
 Documentation/powerpc/bootwrapper.txt              | 141 -----
 Documentation/powerpc/cpu_families.rst             | 222 ++++++++
 Documentation/powerpc/cpu_families.txt             | 221 --------
 Documentation/powerpc/cpu_features.rst             |  60 +++
 Documentation/powerpc/cpu_features.txt             |  56 --
 Documentation/powerpc/cxl.rst                      | 467 +++++++++++++++++
 Documentation/powerpc/cxl.txt                      | 449 ----------------
 Documentation/powerpc/cxlflash.rst                 | 433 +++++++++++++++
 Documentation/powerpc/cxlflash.txt                 | 429 ---------------
 Documentation/powerpc/dawr-power9.rst              |  93 ++++
 Documentation/powerpc/dscr.rst                     |  87 +++
 Documentation/powerpc/dscr.txt                     |  83 ---
 Documentation/powerpc/eeh-pci-error-recovery.rst   | 336 ++++++++++++
 Documentation/powerpc/eeh-pci-error-recovery.txt   | 334 ------------
 Documentation/powerpc/firmware-assisted-dump.rst   | 301 +++++++++++
 Documentation/powerpc/firmware-assisted-dump.txt   | 292 -----------
 Documentation/powerpc/hvcs.rst                     | 581 +++++++++++++++++++++
 Documentation/powerpc/hvcs.txt                     | 567 --------------------
 Documentation/powerpc/index.rst                    |  34 ++
 Documentation/powerpc/isa-versions.rst             |  15 +-
 Documentation/powerpc/mpc52xx.rst                  |  43 ++
 Documentation/powerpc/mpc52xx.txt                  |  39 --
 .../powerpc/pci_iov_resource_on_powernv.rst        | 312 +++++++++++
 .../powerpc/pci_iov_resource_on_powernv.txt        | 301 -----------
 Documentation/powerpc/pmu-ebb.rst                  | 138 +++++
 Documentation/powerpc/pmu-ebb.txt                  | 137 -----
 Documentation/powerpc/ptrace.rst                   | 156 ++++++
 Documentation/powerpc/ptrace.txt                   | 151 ------
 Documentation/powerpc/qe_firmware.rst              | 296 +++++++++++
 Documentation/powerpc/qe_firmware.txt              | 295 -----------
 Documentation/powerpc/syscall64-abi.rst            | 110 ++++
 Documentation/powerpc/syscall64-abi.txt            | 105 ----
 Documentation/powerpc/transactional_memory.rst     | 247 +++++++++
 Documentation/powerpc/transactional_memory.txt     | 244 ---------
 MAINTAINERS                                        |   6 +-
 arch/powerpc/kernel/exceptions-64s.S               |   2 +-
 drivers/soc/fsl/qe/qe.c                            |   2 +-
 drivers/tty/hvc/hvcs.c                             |   2 +-
 include/soc/fsl/qe/qe.h                            |   2 +-
 43 files changed, 4090 insertions(+), 3950 deletions(-)
 delete mode 100644 Documentation/powerpc/DAWR-POWER9.txt
 create mode 100644 Documentation/powerpc/bootwrapper.rst
 delete mode 100644 Documentation/powerpc/bootwrapper.txt
 create mode 100644 Documentation/powerpc/cpu_families.rst
 delete mode 100644 Documentation/powerpc/cpu_families.txt
 create mode 100644 Documentation/powerpc/cpu_features.rst
 delete mode 100644 Documentation/powerpc/cpu_features.txt
 create mode 100644 Documentation/powerpc/cxl.rst
 delete mode 100644 Documentation/powerpc/cxl.txt
 create mode 100644 Documentation/powerpc/cxlflash.rst
 delete mode 100644 Documentation/powerpc/cxlflash.txt
 create mode 100644 Documentation/powerpc/dawr-power9.rst
 create mode 100644 Documentation/powerpc/dscr.rst
 delete mode 100644 Documentation/powerpc/dscr.txt
 create mode 100644 Documentation/powerpc/eeh-pci-error-recovery.rst
 delete mode 100644 Documentation/powerpc/eeh-pci-error-recovery.txt
 create mode 100644 Documentation/powerpc/firmware-assisted-dump.rst
 delete mode 100644 Documentation/powerpc/firmware-assisted-dump.txt
 create mode 100644 Documentation/powerpc/hvcs.rst
 delete mode 100644 Documentation/powerpc/hvcs.txt
 create mode 100644 Documentation/powerpc/index.rst
 create mode 100644 Documentation/powerpc/mpc52xx.rst
 delete mode 100644 Documentation/powerpc/mpc52xx.txt
 create mode 100644 Documentation/powerpc/pci_iov_resource_on_powernv.rst
 delete mode 100644 Documentation/powerpc/pci_iov_resource_on_powernv.txt
 create mode 100644 Documentation/powerpc/pmu-ebb.rst
 delete mode 100644 Documentation/powerpc/pmu-ebb.txt
 create mode 100644 Documentation/powerpc/ptrace.rst
 delete mode 100644 Documentation/powerpc/ptrace.txt
 create mode 100644 Documentation/powerpc/qe_firmware.rst
 delete mode 100644 Documentation/powerpc/qe_firmware.txt
 create mode 100644 Documentation/powerpc/syscall64-abi.rst
 delete mode 100644 Documentation/powerpc/syscall64-abi.txt
 create mode 100644 Documentation/powerpc/transactional_memory.rst
 delete mode 100644 Documentation/powerpc/transactional_memory.txt

(limited to 'include')

diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index 83db42092935..e5d450df06b4 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -403,7 +403,7 @@ That is, the recovery API only requires that:
 .. note::
 
    Implementation details for the powerpc platform are discussed in
-   the file Documentation/powerpc/eeh-pci-error-recovery.txt
+   the file Documentation/powerpc/eeh-pci-error-recovery.rst
 
    As of this writing, there is a growing list of device drivers with
    patches implementing error recovery. Not all of these patches are in
@@ -422,3 +422,6 @@ That is, the recovery API only requires that:
    - drivers/net/cxgb3
    - drivers/net/s2io.c
    - drivers/net/qlge
+
+The End
+-------
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 70ae148ec980..3fe6170aa41d 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -143,6 +143,7 @@ implementation.
    arm64/index
    ia64/index
    m68k/index
+   powerpc/index
    riscv/index
    s390/index
    sh/index
diff --git a/Documentation/powerpc/DAWR-POWER9.txt b/Documentation/powerpc/DAWR-POWER9.txt
deleted file mode 100644
index ecdbb076438c..000000000000
--- a/Documentation/powerpc/DAWR-POWER9.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-DAWR issues on POWER9
-============================
-
-On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
-if it points to cache inhibited (CI) memory. Currently Linux has no way to
-disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
-disabled by this commit:
-
-    commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
-    Author: Michael Neuling <mikey@neuling.org>
-    Date:   Tue Mar 27 15:37:24 2018 +1100
-    powerpc: Disable DAWR in the base POWER9 CPU features
-
-Technical Details:
-============================
-
-DAWR has 6 different ways of being set.
-1) ptrace
-2) h_set_mode(DAWR)
-3) h_set_dabr()
-4) kvmppc_set_one_reg()
-5) xmon
-
-For ptrace, we now advertise zero breakpoints on POWER9 via the
-PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to
-software emulation of the watchpoint (which is slow).
-
-h_set_mode(DAWR) and h_set_dabr() will now return an error to the
-guest on a POWER9 host. Current Linux guests ignore this error, so
-they will silently not get the DAWR.
-
-kvmppc_set_one_reg() will store the value in the vcpu but won't
-actually set it on POWER9 hardware. This is done so we don't break
-migration from POWER8 to POWER9, at the cost of silently losing the
-DAWR on the migration.
-
-For xmon, the 'bd' command will return an error on P9.
-
-Consequences for users
-============================
-
-For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
-will accept the command. Unfortunately since there is no hardware
-support for the watchpoint, GDB will software emulate the watchpoint
-making it run very slowly.
-
-The same will also be true for any guests started on a POWER9
-host. The watchpoint will fail and GDB will fall back to software
-emulation.
-
-If a guest is started on a POWER8 host, GDB will accept the watchpoint
-and configure the hardware to use the DAWR. This will run at full
-speed since it can use the hardware emulation. Unfortunately if this
-guest is migrated to a POWER9 host, the watchpoint will be lost on the
-POWER9. Loads and stores to the watchpoint locations will not be
-trapped in GDB. The watchpoint is remembered, so if the guest is
-migrated back to the POWER8 host, it will start working again.
-
-Force enabling the DAWR
-=============================
-Kernels (since ~v5.2) have an option to force enable the DAWR via:
-
-  echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
-
-This enables the DAWR even on POWER9.
-
-This is a dangerous setting, USE AT YOUR OWN RISK.
-
-Some users may not care about a bad user crashing their box
-(ie. single user/desktop systems) and really want the DAWR.  This
-allows them to force enable DAWR.
-
-This flag can also be used to disable DAWR access. Once this is
-cleared, all DAWR access should be cleared immediately and your
-machine once again safe from crashing.
-
-Userspace may get confused by toggling this. If DAWR is force
-enabled/disabled between getting the number of breakpoints (via
-PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
-inconsistent view of what's available. Similarly for guests.
-
-For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
-enabled in the host AND the guest. For this reason, this won't work on
-POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
-dawr_enable_dangerous file will fail if the hypervisor doesn't support
-writing the DAWR.
-
-To double check the DAWR is working, run this kernel selftest:
-  tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
-Any errors/failures/skips mean something is wrong.
diff --git a/Documentation/powerpc/bootwrapper.rst b/Documentation/powerpc/bootwrapper.rst
new file mode 100644
index 000000000000..a6292afba573
--- /dev/null
+++ b/Documentation/powerpc/bootwrapper.rst
@@ -0,0 +1,155 @@
+========================
+The PowerPC boot wrapper
+========================
+
+Copyright (C) Secret Lab Technologies Ltd.
+
+PowerPC image targets compresses and wraps the kernel image (vmlinux) with
+a boot wrapper to make it usable by the system firmware.  There is no
+standard PowerPC firmware interface, so the boot wrapper is designed to
+be adaptable for each kind of image that needs to be built.
+
+The boot wrapper can be found in the arch/powerpc/boot/ directory.  The
+Makefile in that directory has targets for all the available image types.
+The different image types are used to support all of the various firmware
+interfaces found on PowerPC platforms.  OpenFirmware is the most commonly
+used firmware type on general purpose PowerPC systems from Apple, IBM and
+others.  U-Boot is typically found on embedded PowerPC hardware, but there
+are a handful of other firmware implementations which are also popular.  Each
+firmware interface requires a different image format.
+
+The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
+it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
+image.  The details of the build system is discussed in the next section.
+Currently, the following image format targets exist:
+
+   ==================== ========================================================
+   cuImage.%:		Backwards compatible uImage for older version of
+			U-Boot (for versions that don't understand the device
+			tree).  This image embeds a device tree blob inside
+			the image.  The boot wrapper, kernel and device tree
+			are all embedded inside the U-Boot uImage file format
+			with boot wrapper code that extracts data from the old
+			bd_info structure and loads the data into the device
+			tree before jumping into the kernel.
+
+			Because of the series of #ifdefs found in the
+			bd_info structure used in the old U-Boot interfaces,
+			cuImages are platform specific.  Each specific
+			U-Boot platform has a different platform init file
+			which populates the embedded device tree with data
+			from the platform specific bd_info file.  The platform
+			specific cuImage platform init code can be found in
+			`arch/powerpc/boot/cuboot.*.c`. Selection of the correct
+			cuImage init code for a specific board can be found in
+			the wrapper structure.
+
+   dtbImage.%:		Similar to zImage, except device tree blob is embedded
+			inside the image instead of provided by firmware.  The
+			output image file can be either an elf file or a flat
+			binary depending on the platform.
+
+			dtbImages are used on systems which do not have an
+			interface for passing a device tree directly.
+			dtbImages are similar to simpleImages except that
+			dtbImages have platform specific code for extracting
+			data from the board firmware, but simpleImages do not
+			talk to the firmware at all.
+
+			PlayStation 3 support uses dtbImage.  So do Embedded
+			Planet boards using the PlanetCore firmware.  Board
+			specific initialization code is typically found in a
+			file named arch/powerpc/boot/<platform>.c; but this
+			can be overridden by the wrapper script.
+
+   simpleImage.%:	Firmware independent compressed image that does not
+			depend on any particular firmware interface and embeds
+			a device tree blob.  This image is a flat binary that
+			can be loaded to any location in RAM and jumped to.
+			Firmware cannot pass any configuration data to the
+			kernel with this image type and it depends entirely on
+			the embedded device tree for all information.
+
+			The simpleImage is useful for booting systems with
+			an unknown firmware interface or for booting from
+			a debugger when no firmware is present (such as on
+			the Xilinx Virtex platform).  The only assumption that
+			simpleImage makes is that RAM is correctly initialized
+			and that the MMU is either off or has RAM mapped to
+			base address 0.
+
+			simpleImage also supports inserting special platform
+			specific initialization code to the start of the bootup
+			sequence.  The virtex405 platform uses this feature to
+			ensure that the cache is invalidated before caching
+			is enabled.  Platform specific initialization code is
+			added as part of the wrapper script and is keyed on
+			the image target name.  For example, all
+			simpleImage.virtex405-* targets will add the
+			virtex405-head.S initialization code (This also means
+			that the dts file for virtex405 targets should be
+			named (virtex405-<board>.dts).  Search the wrapper
+			script for 'virtex405' and see the file
+			arch/powerpc/boot/virtex405-head.S for details.
+
+   treeImage.%;		Image format for used with OpenBIOS firmware found
+			on some ppc4xx hardware.  This image embeds a device
+			tree blob inside the image.
+
+   uImage:		Native image format used by U-Boot.  The uImage target
+			does not add any boot code.  It just wraps a compressed
+			vmlinux in the uImage data structure.  This image
+			requires a version of U-Boot that is able to pass
+			a device tree to the kernel at boot.  If using an older
+			version of U-Boot, then you need to use a cuImage
+			instead.
+
+   zImage.%:		Image format which does not embed a device tree.
+			Used by OpenFirmware and other firmware interfaces
+			which are able to supply a device tree.  This image
+			expects firmware to provide the device tree at boot.
+			Typically, if you have general purpose PowerPC
+			hardware then you want this image format.
+   ==================== ========================================================
+
+Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
+and cuImage) all generate the device tree blob from a file in the
+arch/powerpc/boot/dts/ directory.  The Makefile selects the correct device
+tree source based on the name of the target.  Therefore, if the kernel is
+built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the
+build system will use arch/powerpc/boot/dts/walnut.dts to build
+treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build
+the simpleImage.virtex405-ml403.
+
+Two special targets called 'zImage' and 'zImage.initrd' also exist.  These
+targets build all the default images as selected by the kernel configuration.
+Default images are selected by the boot wrapper Makefile
+(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable.  Look
+at the Makefile to see which default image targets are available.
+
+How it is built
+---------------
+arch/powerpc is designed to support multiplatform kernels, which means
+that a single vmlinux image can be booted on many different target boards.
+It also means that the boot wrapper must be able to wrap for many kinds of
+images on a single build.  The design decision was made to not use any
+conditional compilation code (#ifdef, etc) in the boot wrapper source code.
+All of the boot wrapper pieces are buildable at any time regardless of the
+kernel configuration.  Building all the wrapper bits on every kernel build
+also ensures that obscure parts of the wrapper are at the very least compile
+tested in a large variety of environments.
+
+The wrapper is adapted for different image types at link time by linking in
+just the wrapper bits that are appropriate for the image type.  The 'wrapper
+script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
+is responsible for selecting the correct wrapper bits for the image type.
+The arguments are well documented in the script's comment block, so they
+are not repeated here.  However, it is worth mentioning that the script
+uses the -p (platform) argument as the main method of deciding which wrapper
+bits to compile in.  Look for the large 'case "$platform" in' block in the
+middle of the script.  This is also the place where platform specific fixups
+can be selected by changing the link order.
+
+In particular, care should be taken when working with cuImages.  cuImage
+wrapper bits are very board specific and care should be taken to make sure
+the target you are trying to build is supported by the wrapper bits.
diff --git a/Documentation/powerpc/bootwrapper.txt b/Documentation/powerpc/bootwrapper.txt
deleted file mode 100644
index d60fced5e1cc..000000000000
--- a/Documentation/powerpc/bootwrapper.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-The PowerPC boot wrapper
-------------------------
-Copyright (C) Secret Lab Technologies Ltd.
-
-PowerPC image targets compresses and wraps the kernel image (vmlinux) with
-a boot wrapper to make it usable by the system firmware.  There is no
-standard PowerPC firmware interface, so the boot wrapper is designed to
-be adaptable for each kind of image that needs to be built.
-
-The boot wrapper can be found in the arch/powerpc/boot/ directory.  The
-Makefile in that directory has targets for all the available image types.
-The different image types are used to support all of the various firmware
-interfaces found on PowerPC platforms.  OpenFirmware is the most commonly
-used firmware type on general purpose PowerPC systems from Apple, IBM and
-others.  U-Boot is typically found on embedded PowerPC hardware, but there
-are a handful of other firmware implementations which are also popular.  Each
-firmware interface requires a different image format.
-
-The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
-it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
-image.  The details of the build system is discussed in the next section.
-Currently, the following image format targets exist:
-
-   cuImage.%:		Backwards compatible uImage for older version of
-			U-Boot (for versions that don't understand the device
-			tree).  This image embeds a device tree blob inside
-			the image.  The boot wrapper, kernel and device tree
-			are all embedded inside the U-Boot uImage file format
-			with boot wrapper code that extracts data from the old
-			bd_info structure and loads the data into the device
-			tree before jumping into the kernel.
-			  Because of the series of #ifdefs found in the
-			bd_info structure used in the old U-Boot interfaces,
-			cuImages are platform specific.  Each specific
-			U-Boot platform has a different platform init file
-			which populates the embedded device tree with data
-			from the platform specific bd_info file.  The platform
-			specific cuImage platform init code can be found in
-			arch/powerpc/boot/cuboot.*.c.  Selection of the correct
-			cuImage init code for a specific board can be found in
-			the wrapper structure.
-   dtbImage.%:		Similar to zImage, except device tree blob is embedded
-			inside the image instead of provided by firmware.  The
-			output image file can be either an elf file or a flat
-			binary depending on the platform.
-			  dtbImages are used on systems which do not have an
-			interface for passing a device tree directly.
-			dtbImages are similar to simpleImages except that
-			dtbImages have platform specific code for extracting
-			data from the board firmware, but simpleImages do not
-			talk to the firmware at all.
-			  PlayStation 3 support uses dtbImage.  So do Embedded
-			Planet boards using the PlanetCore firmware.  Board
-			specific initialization code is typically found in a
-			file named arch/powerpc/boot/<platform>.c; but this
-			can be overridden by the wrapper script.
-   simpleImage.%:	Firmware independent compressed image that does not
-			depend on any particular firmware interface and embeds
-			a device tree blob.  This image is a flat binary that
-			can be loaded to any location in RAM and jumped to.
-			Firmware cannot pass any configuration data to the
-			kernel with this image type and it depends entirely on
-			the embedded device tree for all information.
-			  The simpleImage is useful for booting systems with
-			an unknown firmware interface or for booting from
-			a debugger when no firmware is present (such as on
-			the Xilinx Virtex platform).  The only assumption that
-			simpleImage makes is that RAM is correctly initialized
-			and that the MMU is either off or has RAM mapped to
-			base address 0.
-			  simpleImage also supports inserting special platform
-			specific initialization code to the start of the bootup
-			sequence.  The virtex405 platform uses this feature to
-			ensure that the cache is invalidated before caching
-			is enabled.  Platform specific initialization code is
-			added as part of the wrapper script and is keyed on
-			the image target name.  For example, all
-			simpleImage.virtex405-* targets will add the
-			virtex405-head.S initialization code (This also means
-			that the dts file for virtex405 targets should be
-			named (virtex405-<board>.dts).  Search the wrapper
-			script for 'virtex405' and see the file
-			arch/powerpc/boot/virtex405-head.S for details.
-   treeImage.%;		Image format for used with OpenBIOS firmware found
-			on some ppc4xx hardware.  This image embeds a device
-			tree blob inside the image.
-   uImage:		Native image format used by U-Boot.  The uImage target
-			does not add any boot code.  It just wraps a compressed
-			vmlinux in the uImage data structure.  This image
-			requires a version of U-Boot that is able to pass
-			a device tree to the kernel at boot.  If using an older
-			version of U-Boot, then you need to use a cuImage
-			instead.
-   zImage.%:		Image format which does not embed a device tree.
-			Used by OpenFirmware and other firmware interfaces
-			which are able to supply a device tree.  This image
-			expects firmware to provide the device tree at boot.
-			Typically, if you have general purpose PowerPC
-			hardware then you want this image format.
-
-Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
-and cuImage) all generate the device tree blob from a file in the
-arch/powerpc/boot/dts/ directory.  The Makefile selects the correct device
-tree source based on the name of the target.  Therefore, if the kernel is
-built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the
-build system will use arch/powerpc/boot/dts/walnut.dts to build
-treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build
-the simpleImage.virtex405-ml403.
-
-Two special targets called 'zImage' and 'zImage.initrd' also exist.  These
-targets build all the default images as selected by the kernel configuration.
-Default images are selected by the boot wrapper Makefile
-(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable.  Look
-at the Makefile to see which default image targets are available.
-
-How it is built
----------------
-arch/powerpc is designed to support multiplatform kernels, which means
-that a single vmlinux image can be booted on many different target boards.
-It also means that the boot wrapper must be able to wrap for many kinds of
-images on a single build.  The design decision was made to not use any
-conditional compilation code (#ifdef, etc) in the boot wrapper source code.
-All of the boot wrapper pieces are buildable at any time regardless of the
-kernel configuration.  Building all the wrapper bits on every kernel build
-also ensures that obscure parts of the wrapper are at the very least compile
-tested in a large variety of environments.
-
-The wrapper is adapted for different image types at link time by linking in
-just the wrapper bits that are appropriate for the image type.  The 'wrapper
-script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
-is responsible for selecting the correct wrapper bits for the image type.
-The arguments are well documented in the script's comment block, so they
-are not repeated here.  However, it is worth mentioning that the script
-uses the -p (platform) argument as the main method of deciding which wrapper
-bits to compile in.  Look for the large 'case "$platform" in' block in the
-middle of the script.  This is also the place where platform specific fixups
-can be selected by changing the link order.
-
-In particular, care should be taken when working with cuImages.  cuImage
-wrapper bits are very board specific and care should be taken to make sure
-the target you are trying to build is supported by the wrapper bits.
diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst
new file mode 100644
index 000000000000..1e063c5440c3
--- /dev/null
+++ b/Documentation/powerpc/cpu_families.rst
@@ -0,0 +1,222 @@
+============
+CPU Families
+============
+
+This document tries to summarise some of the different cpu families that exist
+and are supported by arch/powerpc.
+
+
+Book3S (aka sPAPR)
+------------------
+
+- Hash MMU
+- Mix of 32 & 64 bit::
+
+   +--------------+                 +----------------+
+   |  Old POWER   | --------------> | RS64 (threads) |
+   +--------------+                 +----------------+
+          |
+          |
+          v
+   +--------------+                 +----------------+      +------+
+   |     601      | --------------> |      603       | ---> | e300 |
+   +--------------+                 +----------------+      +------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+      +-------+
+   |     604      |                 |    750 (G3)    | ---> | 750CX |
+   +--------------+                 +----------------+      +-------+
+          |                                 |                   |
+          |                                 |                   |
+          v                                 v                   v
+   +--------------+                 +----------------+      +-------+
+   | 620 (64 bit) |                 |      7400      |      | 750CL |
+   +--------------+                 +----------------+      +-------+
+          |                                 |                   |
+          |                                 |                   |
+          v                                 v                   v
+   +--------------+                 +----------------+      +-------+
+   |  POWER3/630  |                 |      7410      |      | 750FX |
+   +--------------+                 +----------------+      +-------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+
+   |   POWER3+    |                 |      7450      |
+   +--------------+                 +----------------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+                 +----------------+
+   |    POWER4    |                 |      7455      |
+   +--------------+                 +----------------+
+          |                                 |
+          |                                 |
+          v                                 v
+   +--------------+     +-------+   +----------------+
+   |   POWER4+    | --> |  970  |   |      7447      |
+   +--------------+     +-------+   +----------------+
+          |                 |               |
+          |                 |               |
+          v                 v               v
+   +--------------+     +-------+   +----------------+
+   |    POWER5    |     | 970FX |   |      7448      |
+   +--------------+     +-------+   +----------------+
+          |                 |               |
+          |                 |               |
+          v                 v               v
+   +--------------+     +-------+   +----------------+
+   |   POWER5+    |     | 970MP |   |      e600      |
+   +--------------+     +-------+   +----------------+
+          |
+          |
+          v
+   +--------------+
+   |   POWER5++   |
+   +--------------+
+          |
+          |
+          v
+   +--------------+       +-------+
+   |    POWER6    | <-?-> | Cell  |
+   +--------------+       +-------+
+          |
+          |
+          v
+   +--------------+
+   |    POWER7    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |   POWER7+    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |    POWER8    |
+   +--------------+
+
+
+   +---------------+
+   | PA6T (64 bit) |
+   +---------------+
+
+
+IBM BookE
+---------
+
+- Software loaded TLB.
+- All 32 bit::
+
+   +--------------+
+   |     401      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     403      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     405      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     440      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+     +----------------+
+   |     450      | --> |      BG/P      |
+   +--------------+     +----------------+
+          |
+          |
+          v
+   +--------------+
+   |     460      |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |     476      |
+   +--------------+
+
+
+Motorola/Freescale 8xx
+----------------------
+
+- Software loaded with hardware assist.
+- All 32 bit::
+
+   +-------------+
+   | MPC8xx Core |
+   +-------------+
+
+
+Freescale BookE
+---------------
+
+- Software loaded TLB.
+- e6500 adds HW loaded indirect TLB entries.
+- Mix of 32 & 64 bit::
+
+   +--------------+
+   |     e200     |
+   +--------------+
+
+
+   +--------------------------------+
+   |              e500              |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |             e500v2             |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |        e500mc (Book3e)         |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   |          e5500 (64 bit)        |
+   +--------------------------------+
+                   |
+                   |
+                   v
+   +--------------------------------+
+   | e6500 (HW TLB) (Multithreaded) |
+   +--------------------------------+
+
+
+IBM A2 core
+-----------
+
+- Book3E, software loaded TLB + HW loaded indirect TLB entries.
+- 64 bit::
+
+   +--------------+     +----------------+
+   |   A2 core    | --> |      WSP       |
+   +--------------+     +----------------+
+           |
+           |
+           v
+   +--------------+
+   |     BG/Q     |
+   +--------------+
diff --git a/Documentation/powerpc/cpu_families.txt b/Documentation/powerpc/cpu_families.txt
deleted file mode 100644
index fc08e22feb1a..000000000000
--- a/Documentation/powerpc/cpu_families.txt
+++ /dev/null
@@ -1,221 +0,0 @@
-CPU Families
-============
-
-This document tries to summarise some of the different cpu families that exist
-and are supported by arch/powerpc.
-
-
-Book3S (aka sPAPR)
-------------------
-
- - Hash MMU
- - Mix of 32 & 64 bit
-
-   +--------------+                 +----------------+
-   |  Old POWER   | --------------> | RS64 (threads) |
-   +--------------+                 +----------------+
-          |
-          |
-          v
-   +--------------+                 +----------------+      +------+
-   |     601      | --------------> |      603       | ---> | e300 |
-   +--------------+                 +----------------+      +------+
-          |                                 |
-          |                                 |
-          v                                 v
-   +--------------+                 +----------------+      +-------+
-   |     604      |                 |    750 (G3)    | ---> | 750CX |
-   +--------------+                 +----------------+      +-------+
-          |                                 |                   |
-          |                                 |                   |
-          v                                 v                   v
-   +--------------+                 +----------------+      +-------+
-   | 620 (64 bit) |                 |      7400      |      | 750CL |
-   +--------------+                 +----------------+      +-------+
-          |                                 |                   |
-          |                                 |                   |
-          v                                 v                   v
-   +--------------+                 +----------------+      +-------+
-   |  POWER3/630  |                 |      7410      |      | 750FX |
-   +--------------+                 +----------------+      +-------+
-          |                                 |
-          |                                 |
-          v                                 v
-   +--------------+                 +----------------+
-   |   POWER3+    |                 |      7450      |
-   +--------------+                 +----------------+
-          |                                 |
-          |                                 |
-          v                                 v
-   +--------------+                 +----------------+
-   |    POWER4    |                 |      7455      |
-   +--------------+                 +----------------+
-          |                                 |
-          |                                 |
-          v                                 v
-   +--------------+     +-------+   +----------------+
-   |   POWER4+    | --> |  970  |   |      7447      |
-   +--------------+     +-------+   +----------------+
-          |                 |               |
-          |                 |               |
-          v                 v               v
-   +--------------+     +-------+   +----------------+
-   |    POWER5    |     | 970FX |   |      7448      |
-   +--------------+     +-------+   +----------------+
-          |                 |               |
-          |                 |               |
-          v                 v               v
-   +--------------+     +-------+   +----------------+
-   |   POWER5+    |     | 970MP |   |      e600      |
-   +--------------+     +-------+   +----------------+
-          |
-          |
-          v
-   +--------------+
-   |   POWER5++   |
-   +--------------+
-          |
-          |
-          v
-   +--------------+       +-------+
-   |    POWER6    | <-?-> | Cell  |
-   +--------------+       +-------+
-          |
-          |
-          v
-   +--------------+
-   |    POWER7    |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |   POWER7+    |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |    POWER8    |
-   +--------------+
-
-
-   +---------------+
-   | PA6T (64 bit) |
-   +---------------+
-
-
-IBM BookE
----------
-
- - Software loaded TLB.
- - All 32 bit
-
-   +--------------+
-   |     401      |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |     403      |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |     405      |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |     440      |
-   +--------------+
-          |
-          |
-          v
-   +--------------+     +----------------+
-   |     450      | --> |      BG/P      |
-   +--------------+     +----------------+
-          |
-          |
-          v
-   +--------------+
-   |     460      |
-   +--------------+
-          |
-          |
-          v
-   +--------------+
-   |     476      |
-   +--------------+
-
-
-Motorola/Freescale 8xx
-----------------------
-
- - Software loaded with hardware assist.
- - All 32 bit
-
-   +-------------+
-   | MPC8xx Core |
-   +-------------+
-
-
-Freescale BookE
----------------
-
- - Software loaded TLB.
- - e6500 adds HW loaded indirect TLB entries.
- - Mix of 32 & 64 bit
-
-   +--------------+
-   |     e200     |
-   +--------------+
-
-
-   +--------------------------------+
-   |              e500              |
-   +--------------------------------+
-                   |
-                   |
-                   v
-   +--------------------------------+
-   |             e500v2             |
-   +--------------------------------+
-                   |
-                   |
-                   v
-   +--------------------------------+
-   |        e500mc (Book3e)         |
-   +--------------------------------+
-                   |
-                   |
-                   v
-   +--------------------------------+
-   |          e5500 (64 bit)        |
-   +--------------------------------+
-                   |
-                   |
-                   v
-   +--------------------------------+
-   | e6500 (HW TLB) (Multithreaded) |
-   +--------------------------------+
-
-
-IBM A2 core
------------
-
- - Book3E, software loaded TLB + HW loaded indirect TLB entries.
- - 64 bit
-
-   +--------------+     +----------------+
-   |   A2 core    | --> |      WSP       |
-   +--------------+     +----------------+
-           |
-           |
-           v
-   +--------------+
-   |     BG/Q     |
-   +--------------+
diff --git a/Documentation/powerpc/cpu_features.rst b/Documentation/powerpc/cpu_features.rst
new file mode 100644
index 000000000000..b7bcdd2f41bb
--- /dev/null
+++ b/Documentation/powerpc/cpu_features.rst
@@ -0,0 +1,60 @@
+============
+CPU Features
+============
+
+Hollis Blanchard <hollis@austin.ibm.com>
+5 Jun 2002
+
+This document describes the system (including self-modifying code) used in the
+PPC Linux kernel to support a variety of PowerPC CPUs without requiring
+compile-time selection.
+
+Early in the boot process the ppc32 kernel detects the current CPU type and
+chooses a set of features accordingly. Some examples include Altivec support,
+split instruction and data caches, and if the CPU supports the DOZE and NAP
+sleep modes.
+
+Detection of the feature set is simple. A list of processors can be found in
+arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
+each value in the list. If a match is found, the cpu_features of cur_cpu_spec
+is assigned to the feature bitmask for this processor and a __setup_cpu
+function is called.
+
+C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
+particular feature bit. This is done in quite a few places, for example
+in ppc_setup_l2cr().
+
+Implementing cpufeatures in assembly is a little more involved. There are
+several paths that are performance-critical and would suffer if an array
+index, structure dereference, and conditional branch were added. To avoid the
+performance penalty but still allow for runtime (rather than compile-time) CPU
+selection, unused code is replaced by 'nop' instructions. This nop'ing is
+based on CPU 0's capabilities, so a multi-processor system with non-identical
+processors will not work (but such a system would likely have other problems
+anyways).
+
+After detecting the processor type, the kernel patches out sections of code
+that shouldn't be used by writing nop's over it. Using cpufeatures requires
+just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
+transfer_to_handler::
+
+	#ifdef CONFIG_ALTIVEC
+	BEGIN_FTR_SECTION
+		mfspr	r22,SPRN_VRSAVE		/* if G4, save vrsave register value */
+		stw	r22,THREAD_VRSAVE(r23)
+	END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+	#endif /* CONFIG_ALTIVEC */
+
+If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
+instructions are replaced with nop's.
+
+The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
+and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
+cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
+should be used in the majority of cases.
+
+The END_FTR_SECTION macros are implemented by storing information about this
+code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
+(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
+__ftr_fixup, and if the required feature is not present it will loop writing
+nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt
deleted file mode 100644
index ae09df8722c8..000000000000
--- a/Documentation/powerpc/cpu_features.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-Hollis Blanchard <hollis@austin.ibm.com>
-5 Jun 2002
-
-This document describes the system (including self-modifying code) used in the
-PPC Linux kernel to support a variety of PowerPC CPUs without requiring
-compile-time selection.
-
-Early in the boot process the ppc32 kernel detects the current CPU type and
-chooses a set of features accordingly. Some examples include Altivec support,
-split instruction and data caches, and if the CPU supports the DOZE and NAP
-sleep modes.
-
-Detection of the feature set is simple. A list of processors can be found in
-arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
-each value in the list. If a match is found, the cpu_features of cur_cpu_spec
-is assigned to the feature bitmask for this processor and a __setup_cpu
-function is called.
-
-C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
-particular feature bit. This is done in quite a few places, for example
-in ppc_setup_l2cr().
-
-Implementing cpufeatures in assembly is a little more involved. There are
-several paths that are performance-critical and would suffer if an array
-index, structure dereference, and conditional branch were added. To avoid the
-performance penalty but still allow for runtime (rather than compile-time) CPU
-selection, unused code is replaced by 'nop' instructions. This nop'ing is
-based on CPU 0's capabilities, so a multi-processor system with non-identical
-processors will not work (but such a system would likely have other problems
-anyways).
-
-After detecting the processor type, the kernel patches out sections of code
-that shouldn't be used by writing nop's over it. Using cpufeatures requires
-just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
-transfer_to_handler:
-
-	#ifdef CONFIG_ALTIVEC
-	BEGIN_FTR_SECTION
-		mfspr	r22,SPRN_VRSAVE		/* if G4, save vrsave register value */
-		stw	r22,THREAD_VRSAVE(r23)
-	END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-	#endif /* CONFIG_ALTIVEC */
-
-If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
-instructions are replaced with nop's.
-
-The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
-and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
-cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
-should be used in the majority of cases.
-
-The END_FTR_SECTION macros are implemented by storing information about this
-code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
-(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
-__ftr_fixup, and if the required feature is not present it will loop writing
-nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
diff --git a/Documentation/powerpc/cxl.rst b/Documentation/powerpc/cxl.rst
new file mode 100644
index 000000000000..920546d81326
--- /dev/null
+++ b/Documentation/powerpc/cxl.rst
@@ -0,0 +1,467 @@
+====================================
+Coherent Accelerator Interface (CXL)
+====================================
+
+Introduction
+============
+
+    The coherent accelerator interface is designed to allow the
+    coherent connection of accelerators (FPGAs and other devices) to a
+    POWER system. These devices need to adhere to the Coherent
+    Accelerator Interface Architecture (CAIA).
+
+    IBM refers to this as the Coherent Accelerator Processor Interface
+    or CAPI. In the kernel it's referred to by the name CXL to avoid
+    confusion with the ISDN CAPI subsystem.
+
+    Coherent in this context means that the accelerator and CPUs can
+    both access system memory directly and with the same effective
+    addresses.
+
+
+Hardware overview
+=================
+
+    ::
+
+         POWER8/9             FPGA
+       +----------+        +---------+
+       |          |        |         |
+       |   CPU    |        |   AFU   |
+       |          |        |         |
+       |          |        |         |
+       |          |        |         |
+       +----------+        +---------+
+       |   PHB    |        |         |
+       |   +------+        |   PSL   |
+       |   | CAPP |<------>|         |
+       +---+------+  PCIE  +---------+
+
+    The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP)
+    unit which is part of the PCIe Host Bridge (PHB). This is managed
+    by Linux by calls into OPAL. Linux doesn't directly program the
+    CAPP.
+
+    The FPGA (or coherently attached device) consists of two parts.
+    The POWER Service Layer (PSL) and the Accelerator Function Unit
+    (AFU). The AFU is used to implement specific functionality behind
+    the PSL. The PSL, among other things, provides memory address
+    translation services to allow each AFU direct access to userspace
+    memory.
+
+    The AFU is the core part of the accelerator (eg. the compression,
+    crypto etc function). The kernel has no knowledge of the function
+    of the AFU. Only userspace interacts directly with the AFU.
+
+    The PSL provides the translation and interrupt services that the
+    AFU needs. This is what the kernel interacts with. For example, if
+    the AFU needs to read a particular effective address, it sends
+    that address to the PSL, the PSL then translates it, fetches the
+    data from memory and returns it to the AFU. If the PSL has a
+    translation miss, it interrupts the kernel and the kernel services
+    the fault. The context to which this fault is serviced is based on
+    who owns that acceleration function.
+
+    - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0.
+    - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0.
+
+    This PSL Version 9 provides new features such as:
+
+    * Interaction with the nest MMU on the P9 chip.
+    * Native DMA support.
+    * Supports sending ASB_Notify messages for host thread wakeup.
+    * Supports Atomic operations.
+    * etc.
+
+    Cards with a PSL9 won't work on a POWER8 system and cards with a
+    PSL8 won't work on a POWER9 system.
+
+AFU Modes
+=========
+
+    There are two programming modes supported by the AFU. Dedicated
+    and AFU directed. AFU may support one or both modes.
+
+    When using dedicated mode only one MMU context is supported. In
+    this mode, only one userspace process can use the accelerator at
+    time.
+
+    When using AFU directed mode, up to 16K simultaneous contexts can
+    be supported. This means up to 16K simultaneous userspace
+    applications may use the accelerator (although specific AFUs may
+    support fewer). In this mode, the AFU sends a 16 bit context ID
+    with each of its requests. This tells the PSL which context is
+    associated with each operation. If the PSL can't translate an
+    operation, the ID can also be accessed by the kernel so it can
+    determine the userspace context associated with an operation.
+
+
+MMIO space
+==========
+
+    A portion of the accelerator MMIO space can be directly mapped
+    from the AFU to userspace. Either the whole space can be mapped or
+    just a per context portion. The hardware is self describing, hence
+    the kernel can determine the offset and size of the per context
+    portion.
+
+
+Interrupts
+==========
+
+    AFUs may generate interrupts that are destined for userspace. These
+    are received by the kernel as hardware interrupts and passed onto
+    userspace by a read syscall documented below.
+
+    Data storage faults and error interrupts are handled by the kernel
+    driver.
+
+
+Work Element Descriptor (WED)
+=============================
+
+    The WED is a 64-bit parameter passed to the AFU when a context is
+    started. Its format is up to the AFU hence the kernel has no
+    knowledge of what it represents. Typically it will be the
+    effective address of a work queue or status block where the AFU
+    and userspace can share control and status information.
+
+
+
+
+User API
+========
+
+1. AFU character devices
+
+    For AFUs operating in AFU directed mode, two character device
+    files will be created. /dev/cxl/afu0.0m will correspond to a
+    master context and /dev/cxl/afu0.0s will correspond to a slave
+    context. Master contexts have access to the full MMIO space an
+    AFU provides. Slave contexts have access to only the per process
+    MMIO space an AFU provides.
+
+    For AFUs operating in dedicated process mode, the driver will
+    only create a single character device per AFU called
+    /dev/cxl/afu0.0d. This will have access to the entire MMIO space
+    that the AFU provides (like master contexts in AFU directed).
+
+    The types described below are defined in include/uapi/misc/cxl.h
+
+    The following file operations are supported on both slave and
+    master devices.
+
+    A userspace library libcxl is available here:
+
+	https://github.com/ibm-capi/libcxl
+
+    This provides a C interface to this kernel API.
+
+open
+----
+
+    Opens the device and allocates a file descriptor to be used with
+    the rest of the API.
+
+    A dedicated mode AFU only has one context and only allows the
+    device to be opened once.
+
+    An AFU directed mode AFU can have many contexts, the device can be
+    opened once for each context that is available.
+
+    When all available contexts are allocated the open call will fail
+    and return -ENOSPC.
+
+    Note:
+	  IRQs need to be allocated for each context, which may limit
+          the number of contexts that can be created, and therefore
+          how many times the device can be opened. The POWER8 CAPP
+          supports 2040 IRQs and 3 are used by the kernel, so 2037 are
+          left. If 1 IRQ is needed per context, then only 2037
+          contexts can be allocated. If 4 IRQs are needed per context,
+          then only 2037/4 = 509 contexts can be allocated.
+
+
+ioctl
+-----
+
+    CXL_IOCTL_START_WORK:
+        Starts the AFU context and associates it with the current
+        process. Once this ioctl is successfully executed, all memory
+        mapped into this process is accessible to this AFU context
+        using the same effective addresses. No additional calls are
+        required to map/unmap memory. The AFU memory context will be
+        updated as userspace allocates and frees memory. This ioctl
+        returns once the AFU context is started.
+
+        Takes a pointer to a struct cxl_ioctl_start_work
+
+            ::
+
+                struct cxl_ioctl_start_work {
+                        __u64 flags;
+                        __u64 work_element_descriptor;
+                        __u64 amr;
+                        __s16 num_interrupts;
+                        __s16 reserved1;
+                        __s32 reserved2;
+                        __u64 reserved3;
+                        __u64 reserved4;
+                        __u64 reserved5;
+                        __u64 reserved6;
+                };
+
+            flags:
+                Indicates which optional fields in the structure are
+                valid.
+
+            work_element_descriptor:
+                The Work Element Descriptor (WED) is a 64-bit argument
+                defined by the AFU. Typically this is an effective
+                address pointing to an AFU specific structure
+                describing what work to perform.
+
+            amr:
+                Authority Mask Register (AMR), same as the powerpc
+                AMR. This field is only used by the kernel when the
+                corresponding CXL_START_WORK_AMR value is specified in
+                flags. If not specified the kernel will use a default
+                value of 0.
+
+            num_interrupts:
+                Number of userspace interrupts to request. This field
+                is only used by the kernel when the corresponding
+                CXL_START_WORK_NUM_IRQS value is specified in flags.
+                If not specified the minimum number required by the
+                AFU will be allocated. The min and max number can be
+                obtained from sysfs.
+
+            reserved fields:
+                For ABI padding and future extensions
+
+    CXL_IOCTL_GET_PROCESS_ELEMENT:
+        Get the current context id, also known as the process element.
+        The value is returned from the kernel as a __u32.
+
+
+mmap
+----
+
+    An AFU may have an MMIO space to facilitate communication with the
+    AFU. If it does, the MMIO space can be accessed via mmap. The size
+    and contents of this area are specific to the particular AFU. The
+    size can be discovered via sysfs.
+
+    In AFU directed mode, master contexts are allowed to map all of
+    the MMIO space and slave contexts are allowed to only map the per
+    process MMIO space associated with the context. In dedicated
+    process mode the entire MMIO space can always be mapped.
+
+    This mmap call must be done after the START_WORK ioctl.
+
+    Care should be taken when accessing MMIO space. Only 32 and 64-bit
+    accesses are supported by POWER8. Also, the AFU will be designed
+    with a specific endianness, so all MMIO accesses should consider
+    endianness (recommend endian(3) variants like: le64toh(),
+    be64toh() etc). These endian issues equally apply to shared memory
+    queues the WED may describe.
+
+
+read
+----
+
+    Reads events from the AFU. Blocks if no events are pending
+    (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
+    unrecoverable error or if the card is removed.
+
+    read() will always return an integral number of events.
+
+    The buffer passed to read() must be at least 4K bytes.
+
+    The result of the read will be a buffer of one or more events,
+    each event is of type struct cxl_event, of varying size::
+
+            struct cxl_event {
+                    struct cxl_event_header header;
+                    union {
+                            struct cxl_event_afu_interrupt irq;
+                            struct cxl_event_data_storage fault;
+                            struct cxl_event_afu_error afu_error;
+                    };
+            };
+
+    The struct cxl_event_header is defined as
+
+        ::
+
+            struct cxl_event_header {
+                    __u16 type;
+                    __u16 size;
+                    __u16 process_element;
+                    __u16 reserved1;
+            };
+
+        type:
+            This defines the type of event. The type determines how
+            the rest of the event is structured. These types are
+            described below and defined by enum cxl_event_type.
+
+        size:
+            This is the size of the event in bytes including the
+            struct cxl_event_header. The start of the next event can
+            be found at this offset from the start of the current
+            event.
+
+        process_element:
+            Context ID of the event.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_AFU_INTERRUPT then the event
+    structure is defined as
+
+        ::
+
+            struct cxl_event_afu_interrupt {
+                    __u16 flags;
+                    __u16 irq; /* Raised AFU interrupt number */
+                    __u32 reserved1;
+            };
+
+        flags:
+            These flags indicate which optional fields are present
+            in this struct. Currently all fields are mandatory.
+
+        irq:
+            The IRQ number sent by the AFU.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_DATA_STORAGE then the event
+    structure is defined as
+
+        ::
+
+            struct cxl_event_data_storage {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 addr;
+                    __u64 dsisr;
+                    __u64 reserved3;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are mandatory.
+
+        address:
+            The address that the AFU unsuccessfully attempted to
+            access. Valid accesses will be handled transparently by the
+            kernel but invalid accesses will generate this event.
+
+        dsisr:
+            This field gives information on the type of fault. It is a
+            copy of the DSISR from the PSL hardware when the address
+            fault occurred. The form of the DSISR is as defined in the
+            CAIA.
+
+        reserved fields:
+            For future extensions
+
+    If the event type is CXL_EVENT_AFU_ERROR then the event structure
+    is defined as
+
+        ::
+
+            struct cxl_event_afu_error {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 error;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are Mandatory.
+
+        error:
+            Error status from the AFU. Defined by the AFU.
+
+        reserved fields:
+            For future extensions and padding
+
+
+2. Card character device (powerVM guest only)
+
+    In a powerVM guest, an extra character device is created for the
+    card. The device is only used to write (flash) a new image on the
+    FPGA accelerator. Once the image is written and verified, the
+    device tree is updated and the card is reset to reload the updated
+    image.
+
+open
+----
+
+    Opens the device and allocates a file descriptor to be used with
+    the rest of the API. The device can only be opened once.
+
+ioctl
+-----
+
+CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE:
+    Starts and controls flashing a new FPGA image. Partial
+    reconfiguration is not supported (yet), so the image must contain
+    a copy of the PSL and AFU(s). Since an image can be quite large,
+    the caller may have to iterate, splitting the image in smaller
+    chunks.
+
+    Takes a pointer to a struct cxl_adapter_image::
+
+        struct cxl_adapter_image {
+            __u64 flags;
+            __u64 data;
+            __u64 len_data;
+            __u64 len_image;
+            __u64 reserved1;
+            __u64 reserved2;
+            __u64 reserved3;
+            __u64 reserved4;
+        };
+
+    flags:
+        These flags indicate which optional fields are present in
+        this struct. Currently all fields are mandatory.
+
+    data:
+        Pointer to a buffer with part of the image to write to the
+        card.
+
+    len_data:
+        Size of the buffer pointed to by data.
+
+    len_image:
+        Full size of the image.
+
+
+Sysfs Class
+===========
+
+    A cxl sysfs class is added under /sys/class/cxl to facilitate
+    enumeration and tuning of the accelerators. Its layout is
+    described in Documentation/ABI/testing/sysfs-class-cxl
+
+
+Udev rules
+==========
+
+    The following udev rules could be used to create a symlink to the
+    most logical chardev to use in any programming mode (afuX.Yd for
+    dedicated, afuX.Ys for afu directed), since the API is virtually
+    identical for each::
+
+	SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
+	SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
+	                  KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt
deleted file mode 100644
index c5e8d5098ed3..000000000000
--- a/Documentation/powerpc/cxl.txt
+++ /dev/null
@@ -1,449 +0,0 @@
-Coherent Accelerator Interface (CXL)
-====================================
-
-Introduction
-============
-
-    The coherent accelerator interface is designed to allow the
-    coherent connection of accelerators (FPGAs and other devices) to a
-    POWER system. These devices need to adhere to the Coherent
-    Accelerator Interface Architecture (CAIA).
-
-    IBM refers to this as the Coherent Accelerator Processor Interface
-    or CAPI. In the kernel it's referred to by the name CXL to avoid
-    confusion with the ISDN CAPI subsystem.
-
-    Coherent in this context means that the accelerator and CPUs can
-    both access system memory directly and with the same effective
-    addresses.
-
-
-Hardware overview
-=================
-
-         POWER8/9             FPGA
-       +----------+        +---------+
-       |          |        |         |
-       |   CPU    |        |   AFU   |
-       |          |        |         |
-       |          |        |         |
-       |          |        |         |
-       +----------+        +---------+
-       |   PHB    |        |         |
-       |   +------+        |   PSL   |
-       |   | CAPP |<------>|         |
-       +---+------+  PCIE  +---------+
-
-    The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP)
-    unit which is part of the PCIe Host Bridge (PHB). This is managed
-    by Linux by calls into OPAL. Linux doesn't directly program the
-    CAPP.
-
-    The FPGA (or coherently attached device) consists of two parts.
-    The POWER Service Layer (PSL) and the Accelerator Function Unit
-    (AFU). The AFU is used to implement specific functionality behind
-    the PSL. The PSL, among other things, provides memory address
-    translation services to allow each AFU direct access to userspace
-    memory.
-
-    The AFU is the core part of the accelerator (eg. the compression,
-    crypto etc function). The kernel has no knowledge of the function
-    of the AFU. Only userspace interacts directly with the AFU.
-
-    The PSL provides the translation and interrupt services that the
-    AFU needs. This is what the kernel interacts with. For example, if
-    the AFU needs to read a particular effective address, it sends
-    that address to the PSL, the PSL then translates it, fetches the
-    data from memory and returns it to the AFU. If the PSL has a
-    translation miss, it interrupts the kernel and the kernel services
-    the fault. The context to which this fault is serviced is based on
-    who owns that acceleration function.
-
-    POWER8 <-----> PSL Version 8 is compliant to the CAIA Version 1.0.
-    POWER9 <-----> PSL Version 9 is compliant to the CAIA Version 2.0.
-    This PSL Version 9 provides new features such as:
-    * Interaction with the nest MMU on the P9 chip.
-    * Native DMA support.
-    * Supports sending ASB_Notify messages for host thread wakeup.
-    * Supports Atomic operations.
-    * ....
-
-    Cards with a PSL9 won't work on a POWER8 system and cards with a
-    PSL8 won't work on a POWER9 system.
-
-AFU Modes
-=========
-
-    There are two programming modes supported by the AFU. Dedicated
-    and AFU directed. AFU may support one or both modes.
-
-    When using dedicated mode only one MMU context is supported. In
-    this mode, only one userspace process can use the accelerator at
-    time.
-
-    When using AFU directed mode, up to 16K simultaneous contexts can
-    be supported. This means up to 16K simultaneous userspace
-    applications may use the accelerator (although specific AFUs may
-    support fewer). In this mode, the AFU sends a 16 bit context ID
-    with each of its requests. This tells the PSL which context is
-    associated with each operation. If the PSL can't translate an
-    operation, the ID can also be accessed by the kernel so it can
-    determine the userspace context associated with an operation.
-
-
-MMIO space
-==========
-
-    A portion of the accelerator MMIO space can be directly mapped
-    from the AFU to userspace. Either the whole space can be mapped or
-    just a per context portion. The hardware is self describing, hence
-    the kernel can determine the offset and size of the per context
-    portion.
-
-
-Interrupts
-==========
-
-    AFUs may generate interrupts that are destined for userspace. These
-    are received by the kernel as hardware interrupts and passed onto
-    userspace by a read syscall documented below.
-
-    Data storage faults and error interrupts are handled by the kernel
-    driver.
-
-
-Work Element Descriptor (WED)
-=============================
-
-    The WED is a 64-bit parameter passed to the AFU when a context is
-    started. Its format is up to the AFU hence the kernel has no
-    knowledge of what it represents. Typically it will be the
-    effective address of a work queue or status block where the AFU
-    and userspace can share control and status information.
-
-
-
-
-User API
-========
-
-1. AFU character devices
-
-    For AFUs operating in AFU directed mode, two character device
-    files will be created. /dev/cxl/afu0.0m will correspond to a
-    master context and /dev/cxl/afu0.0s will correspond to a slave
-    context. Master contexts have access to the full MMIO space an
-    AFU provides. Slave contexts have access to only the per process
-    MMIO space an AFU provides.
-
-    For AFUs operating in dedicated process mode, the driver will
-    only create a single character device per AFU called
-    /dev/cxl/afu0.0d. This will have access to the entire MMIO space
-    that the AFU provides (like master contexts in AFU directed).
-
-    The types described below are defined in include/uapi/misc/cxl.h
-
-    The following file operations are supported on both slave and
-    master devices.
-
-    A userspace library libcxl is available here:
-	https://github.com/ibm-capi/libcxl
-    This provides a C interface to this kernel API.
-
-open
-----
-
-    Opens the device and allocates a file descriptor to be used with
-    the rest of the API.
-
-    A dedicated mode AFU only has one context and only allows the
-    device to be opened once.
-
-    An AFU directed mode AFU can have many contexts, the device can be
-    opened once for each context that is available.
-
-    When all available contexts are allocated the open call will fail
-    and return -ENOSPC.
-
-    Note: IRQs need to be allocated for each context, which may limit
-          the number of contexts that can be created, and therefore
-          how many times the device can be opened. The POWER8 CAPP
-          supports 2040 IRQs and 3 are used by the kernel, so 2037 are
-          left. If 1 IRQ is needed per context, then only 2037
-          contexts can be allocated. If 4 IRQs are needed per context,
-          then only 2037/4 = 509 contexts can be allocated.
-
-
-ioctl
------
-
-    CXL_IOCTL_START_WORK:
-        Starts the AFU context and associates it with the current
-        process. Once this ioctl is successfully executed, all memory
-        mapped into this process is accessible to this AFU context
-        using the same effective addresses. No additional calls are
-        required to map/unmap memory. The AFU memory context will be
-        updated as userspace allocates and frees memory. This ioctl
-        returns once the AFU context is started.
-
-        Takes a pointer to a struct cxl_ioctl_start_work:
-
-                struct cxl_ioctl_start_work {
-                        __u64 flags;
-                        __u64 work_element_descriptor;
-                        __u64 amr;
-                        __s16 num_interrupts;
-                        __s16 reserved1;
-                        __s32 reserved2;
-                        __u64 reserved3;
-                        __u64 reserved4;
-                        __u64 reserved5;
-                        __u64 reserved6;
-                };
-
-            flags:
-                Indicates which optional fields in the structure are
-                valid.
-
-            work_element_descriptor:
-                The Work Element Descriptor (WED) is a 64-bit argument
-                defined by the AFU. Typically this is an effective
-                address pointing to an AFU specific structure
-                describing what work to perform.
-
-            amr:
-                Authority Mask Register (AMR), same as the powerpc
-                AMR. This field is only used by the kernel when the
-                corresponding CXL_START_WORK_AMR value is specified in
-                flags. If not specified the kernel will use a default
-                value of 0.
-
-            num_interrupts:
-                Number of userspace interrupts to request. This field
-                is only used by the kernel when the corresponding
-                CXL_START_WORK_NUM_IRQS value is specified in flags.
-                If not specified the minimum number required by the
-                AFU will be allocated. The min and max number can be
-                obtained from sysfs.
-
-            reserved fields:
-                For ABI padding and future extensions
-
-    CXL_IOCTL_GET_PROCESS_ELEMENT:
-        Get the current context id, also known as the process element.
-        The value is returned from the kernel as a __u32.
-
-
-mmap
-----
-
-    An AFU may have an MMIO space to facilitate communication with the
-    AFU. If it does, the MMIO space can be accessed via mmap. The size
-    and contents of this area are specific to the particular AFU. The
-    size can be discovered via sysfs.
-
-    In AFU directed mode, master contexts are allowed to map all of
-    the MMIO space and slave contexts are allowed to only map the per
-    process MMIO space associated with the context. In dedicated
-    process mode the entire MMIO space can always be mapped.
-
-    This mmap call must be done after the START_WORK ioctl.
-
-    Care should be taken when accessing MMIO space. Only 32 and 64-bit
-    accesses are supported by POWER8. Also, the AFU will be designed
-    with a specific endianness, so all MMIO accesses should consider
-    endianness (recommend endian(3) variants like: le64toh(),
-    be64toh() etc). These endian issues equally apply to shared memory
-    queues the WED may describe.
-
-
-read
-----
-
-    Reads events from the AFU. Blocks if no events are pending
-    (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
-    unrecoverable error or if the card is removed.
-
-    read() will always return an integral number of events.
-
-    The buffer passed to read() must be at least 4K bytes.
-
-    The result of the read will be a buffer of one or more events,
-    each event is of type struct cxl_event, of varying size.
-
-            struct cxl_event {
-                    struct cxl_event_header header;
-                    union {
-                            struct cxl_event_afu_interrupt irq;
-                            struct cxl_event_data_storage fault;
-                            struct cxl_event_afu_error afu_error;
-                    };
-            };
-
-    The struct cxl_event_header is defined as:
-
-            struct cxl_event_header {
-                    __u16 type;
-                    __u16 size;
-                    __u16 process_element;
-                    __u16 reserved1;
-            };
-
-        type:
-            This defines the type of event. The type determines how
-            the rest of the event is structured. These types are
-            described below and defined by enum cxl_event_type.
-
-        size:
-            This is the size of the event in bytes including the
-            struct cxl_event_header. The start of the next event can
-            be found at this offset from the start of the current
-            event.
-
-        process_element:
-            Context ID of the event.
-
-        reserved field:
-            For future extensions and padding.
-
-    If the event type is CXL_EVENT_AFU_INTERRUPT then the event
-    structure is defined as:
-
-            struct cxl_event_afu_interrupt {
-                    __u16 flags;
-                    __u16 irq; /* Raised AFU interrupt number */
-                    __u32 reserved1;
-            };
-
-        flags:
-            These flags indicate which optional fields are present
-            in this struct. Currently all fields are mandatory.
-
-        irq:
-            The IRQ number sent by the AFU.
-
-        reserved field:
-            For future extensions and padding.
-
-    If the event type is CXL_EVENT_DATA_STORAGE then the event
-    structure is defined as:
-
-            struct cxl_event_data_storage {
-                    __u16 flags;
-                    __u16 reserved1;
-                    __u32 reserved2;
-                    __u64 addr;
-                    __u64 dsisr;
-                    __u64 reserved3;
-            };
-
-        flags:
-            These flags indicate which optional fields are present in
-            this struct. Currently all fields are mandatory.
-
-        address:
-            The address that the AFU unsuccessfully attempted to
-            access. Valid accesses will be handled transparently by the
-            kernel but invalid accesses will generate this event.
-
-        dsisr:
-            This field gives information on the type of fault. It is a
-            copy of the DSISR from the PSL hardware when the address
-            fault occurred. The form of the DSISR is as defined in the
-            CAIA.
-
-        reserved fields:
-            For future extensions
-
-    If the event type is CXL_EVENT_AFU_ERROR then the event structure
-    is defined as:
-
-            struct cxl_event_afu_error {
-                    __u16 flags;
-                    __u16 reserved1;
-                    __u32 reserved2;
-                    __u64 error;
-            };
-
-        flags:
-            These flags indicate which optional fields are present in
-            this struct. Currently all fields are Mandatory.
-
-        error:
-            Error status from the AFU. Defined by the AFU.
-
-        reserved fields:
-            For future extensions and padding
-
-
-2. Card character device (powerVM guest only)
-
-    In a powerVM guest, an extra character device is created for the
-    card. The device is only used to write (flash) a new image on the
-    FPGA accelerator. Once the image is written and verified, the
-    device tree is updated and the card is reset to reload the updated
-    image.
-
-open
-----
-
-    Opens the device and allocates a file descriptor to be used with
-    the rest of the API. The device can only be opened once.
-
-ioctl
------
-
-CXL_IOCTL_DOWNLOAD_IMAGE:
-CXL_IOCTL_VALIDATE_IMAGE:
-    Starts and controls flashing a new FPGA image. Partial
-    reconfiguration is not supported (yet), so the image must contain
-    a copy of the PSL and AFU(s). Since an image can be quite large,
-    the caller may have to iterate, splitting the image in smaller
-    chunks.
-
-    Takes a pointer to a struct cxl_adapter_image:
-        struct cxl_adapter_image {
-            __u64 flags;
-            __u64 data;
-            __u64 len_data;
-            __u64 len_image;
-            __u64 reserved1;
-            __u64 reserved2;
-            __u64 reserved3;
-            __u64 reserved4;
-        };
-
-    flags:
-        These flags indicate which optional fields are present in
-        this struct. Currently all fields are mandatory.
-
-    data:
-        Pointer to a buffer with part of the image to write to the
-        card.
-
-    len_data:
-        Size of the buffer pointed to by data.
-
-    len_image:
-        Full size of the image.
-
-
-Sysfs Class
-===========
-
-    A cxl sysfs class is added under /sys/class/cxl to facilitate
-    enumeration and tuning of the accelerators. Its layout is
-    described in Documentation/ABI/testing/sysfs-class-cxl
-
-
-Udev rules
-==========
-
-    The following udev rules could be used to create a symlink to the
-    most logical chardev to use in any programming mode (afuX.Yd for
-    dedicated, afuX.Ys for afu directed), since the API is virtually
-    identical for each:
-
-	SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
-	SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
-	                  KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
diff --git a/Documentation/powerpc/cxlflash.rst b/Documentation/powerpc/cxlflash.rst
new file mode 100644
index 000000000000..cea67931b3b9
--- /dev/null
+++ b/Documentation/powerpc/cxlflash.rst
@@ -0,0 +1,433 @@
+================================
+Coherent Accelerator (CXL) Flash
+================================
+
+Introduction
+============
+
+    The IBM Power architecture provides support for CAPI (Coherent
+    Accelerator Power Interface), which is available to certain PCIe slots
+    on Power 8 systems. CAPI can be thought of as a special tunneling
+    protocol through PCIe that allow PCIe adapters to look like special
+    purpose co-processors which can read or write an application's
+    memory and generate page faults. As a result, the host interface to
+    an adapter running in CAPI mode does not require the data buffers to
+    be mapped to the device's memory (IOMMU bypass) nor does it require
+    memory to be pinned.
+
+    On Linux, Coherent Accelerator (CXL) kernel services present CAPI
+    devices as a PCI device by implementing a virtual PCI host bridge.
+    This abstraction simplifies the infrastructure and programming
+    model, allowing for drivers to look similar to other native PCI
+    device drivers.
+
+    CXL provides a mechanism by which user space applications can
+    directly talk to a device (network or storage) bypassing the typical
+    kernel/device driver stack. The CXL Flash Adapter Driver enables a
+    user space application direct access to Flash storage.
+
+    The CXL Flash Adapter Driver is a kernel module that sits in the
+    SCSI stack as a low level device driver (below the SCSI disk and
+    protocol drivers) for the IBM CXL Flash Adapter. This driver is
+    responsible for the initialization of the adapter, setting up the
+    special path for user space access, and performing error recovery. It
+    communicates directly the Flash Accelerator Functional Unit (AFU)
+    as described in Documentation/powerpc/cxl.rst.
+
+    The cxlflash driver supports two, mutually exclusive, modes of
+    operation at the device (LUN) level:
+
+        - Any flash device (LUN) can be configured to be accessed as a
+          regular disk device (i.e.: /dev/sdc). This is the default mode.
+
+        - Any flash device (LUN) can be configured to be accessed from
+          user space with a special block library. This mode further
+          specifies the means of accessing the device and provides for
+          either raw access to the entire LUN (referred to as direct
+          or physical LUN access) or access to a kernel/AFU-mediated
+          partition of the LUN (referred to as virtual LUN access). The
+          segmentation of a disk device into virtual LUNs is assisted
+          by special translation services provided by the Flash AFU.
+
+Overview
+========
+
+    The Coherent Accelerator Interface Architecture (CAIA) introduces a
+    concept of a master context. A master typically has special privileges
+    granted to it by the kernel or hypervisor allowing it to perform AFU
+    wide management and control. The master may or may not be involved
+    directly in each user I/O, but at the minimum is involved in the
+    initial setup before the user application is allowed to send requests
+    directly to the AFU.
+
+    The CXL Flash Adapter Driver establishes a master context with the
+    AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
+    Adapter Problem Space Memory Map looks like this::
+
+                     +-------------------------------+
+                     |    512 * 64 KB User MMIO      |
+                     |        (per context)          |
+                     |       User Accessible         |
+                     +-------------------------------+
+                     |    512 * 128 B per context    |
+                     |    Provisioning and Control   |
+                     |   Trusted Process accessible  |
+                     +-------------------------------+
+                     |         64 KB Global          |
+                     |   Trusted Process accessible  |
+                     +-------------------------------+
+
+    This driver configures itself into the SCSI software stack as an
+    adapter driver. The driver is the only entity that is considered a
+    Trusted Process to program the Provisioning and Control and Global
+    areas in the MMIO Space shown above.  The master context driver
+    discovers all LUNs attached to the CXL Flash adapter and instantiates
+    scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN
+    seen from each path.
+
+    Once these scsi block devices are instantiated, an application
+    written to a specification provided by the block library may get
+    access to the Flash from user space (without requiring a system call).
+
+    This master context driver also provides a series of ioctls for this
+    block library to enable this user space access.  The driver supports
+    two modes for accessing the block device.
+
+    The first mode is called a virtual mode. In this mode a single scsi
+    block device (/dev/sdb) may be carved up into any number of distinct
+    virtual LUNs. The virtual LUNs may be resized as long as the sum of
+    the sizes of all the virtual LUNs, along with the meta-data associated
+    with it does not exceed the physical capacity.
+
+    The second mode is called the physical mode. In this mode a single
+    block device (/dev/sdb) may be opened directly by the block library
+    and the entire space for the LUN is available to the application.
+
+    Only the physical mode provides persistence of the data.  i.e. The
+    data written to the block device will survive application exit and
+    restart and also reboot. The virtual LUNs do not persist (i.e. do
+    not survive after the application terminates or the system reboots).
+
+
+Block library API
+=================
+
+    Applications intending to get access to the CXL Flash from user
+    space should use the block library, as it abstracts the details of
+    interfacing directly with the cxlflash driver that are necessary for
+    performing administrative actions (i.e.: setup, tear down, resize).
+    The block library can be thought of as a 'user' of services,
+    implemented as IOCTLs, that are provided by the cxlflash driver
+    specifically for devices (LUNs) operating in user space access
+    mode. While it is not a requirement that applications understand
+    the interface between the block library and the cxlflash driver,
+    a high-level overview of each supported service (IOCTL) is provided
+    below.
+
+    The block library can be found on GitHub:
+    http://github.com/open-power/capiflash
+
+
+CXL Flash Driver LUN IOCTLs
+===========================
+
+    Users, such as the block library, that wish to interface with a flash
+    device (LUN) via user space access need to use the services provided
+    by the cxlflash driver. As these services are implemented as ioctls,
+    a file descriptor handle must first be obtained in order to establish
+    the communication channel between a user and the kernel.  This file
+    descriptor is obtained by opening the device special file associated
+    with the scsi disk device (/dev/sdb) that was created during LUN
+    discovery. As per the location of the cxlflash driver within the
+    SCSI protocol stack, this open is actually not seen by the cxlflash
+    driver. Upon successful open, the user receives a file descriptor
+    (herein referred to as fd1) that should be used for issuing the
+    subsequent ioctls listed below.
+
+    The structure definitions for these IOCTLs are available in:
+    uapi/scsi/cxlflash_ioctl.h
+
+DK_CXLFLASH_ATTACH
+------------------
+
+    This ioctl obtains, initializes, and starts a context using the CXL
+    kernel services. These services specify a context id (u16) by which
+    to uniquely identify the context and its allocated resources. The
+    services additionally provide a second file descriptor (herein
+    referred to as fd2) that is used by the block library to initiate
+    memory mapped I/O (via mmap()) to the CXL flash device and poll for
+    completion events. This file descriptor is intentionally installed by
+    this driver and not the CXL kernel services to allow for intermediary
+    notification and access in the event of a non-user-initiated close(),
+    such as a killed process. This design point is described in further
+    detail in the description for the DK_CXLFLASH_DETACH ioctl.
+
+    There are a few important aspects regarding the "tokens" (context id
+    and fd2) that are provided back to the user:
+
+        - These tokens are only valid for the process under which they
+          were created. The child of a forked process cannot continue
+          to use the context id or file descriptor created by its parent
+          (see DK_CXLFLASH_VLUN_CLONE for further details).
+
+        - These tokens are only valid for the lifetime of the context and
+          the process under which they were created. Once either is
+          destroyed, the tokens are to be considered stale and subsequent
+          usage will result in errors.
+
+	- A valid adapter file descriptor (fd2 >= 0) is only returned on
+	  the initial attach for a context. Subsequent attaches to an
+	  existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
+	  do not provide the adapter file descriptor as it was previously
+	  made known to the application.
+
+        - When a context is no longer needed, the user shall detach from
+          the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
+	  returns with a valid adapter file descriptor and the return flag
+	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+	  close the adapter file descriptor following a successful detach.
+
+	- When this ioctl returns with a valid fd2 and the return flag
+	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+	  close fd2 in the following circumstances:
+
+		+ Following a successful detach of the last user of the context
+		+ Following a successful recovery on the context's original fd2
+		+ In the child process of a fork(), following a clone ioctl,
+		  on the fd2 associated with the source context
+
+        - At any time, a close on fd2 will invalidate the tokens. Applications
+	  should exercise caution to only close fd2 when appropriate (outlined
+	  in the previous bullet) to avoid premature loss of I/O.
+
+DK_CXLFLASH_USER_DIRECT
+-----------------------
+    This ioctl is responsible for transitioning the LUN to direct
+    (physical) mode access and configuring the AFU for direct access from
+    user space on a per-context basis. Additionally, the block size and
+    last logical block address (LBA) are returned to the user.
+
+    As mentioned previously, when operating in user space access mode,
+    LUNs may be accessed in whole or in part. Only one mode is allowed
+    at a time and if one mode is active (outstanding references exist),
+    requests to use the LUN in a different mode are denied.
+
+    The AFU is configured for direct access from user space by adding an
+    entry to the AFU's resource handle table. The index of the entry is
+    treated as a resource handle that is returned to the user. The user
+    is then able to use the handle to reference the LUN during I/O.
+
+DK_CXLFLASH_USER_VIRTUAL
+------------------------
+    This ioctl is responsible for transitioning the LUN to virtual mode
+    of access and configuring the AFU for virtual access from user space
+    on a per-context basis. Additionally, the block size and last logical
+    block address (LBA) are returned to the user.
+
+    As mentioned previously, when operating in user space access mode,
+    LUNs may be accessed in whole or in part. Only one mode is allowed
+    at a time and if one mode is active (outstanding references exist),
+    requests to use the LUN in a different mode are denied.
+
+    The AFU is configured for virtual access from user space by adding
+    an entry to the AFU's resource handle table. The index of the entry
+    is treated as a resource handle that is returned to the user. The
+    user is then able to use the handle to reference the LUN during I/O.
+
+    By default, the virtual LUN is created with a size of 0. The user
+    would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow
+    the virtual LUN to a desired size. To avoid having to perform this
+    resize for the initial creation of the virtual LUN, the user has the
+    option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL
+    ioctl, such that when success is returned to the user, the
+    resource handle that is provided is already referencing provisioned
+    storage. This is reflected by the last LBA being a non-zero value.
+
+    When a LUN is accessible from more than one port, this ioctl will
+    return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This
+    provides the user with a hint that I/O can be retried in the event
+    of an I/O error as the LUN can be reached over multiple paths.
+
+DK_CXLFLASH_VLUN_RESIZE
+-----------------------
+    This ioctl is responsible for resizing a previously created virtual
+    LUN and will fail if invoked upon a LUN that is not in virtual
+    mode. Upon success, an updated last LBA is returned to the user
+    indicating the new size of the virtual LUN associated with the
+    resource handle.
+
+    The partitioning of virtual LUNs is jointly mediated by the cxlflash
+    driver and the AFU. An allocation table is kept for each LUN that is
+    operating in the virtual mode and used to program a LUN translation
+    table that the AFU references when provided with a resource handle.
+
+    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+    In addition to returning a failure to user, cxlflash will also schedule
+    an asynchronous AFU reset. Should the user choose to retry the operation,
+    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+    can either retry the operation or treat it as a failure.
+
+DK_CXLFLASH_RELEASE
+-------------------
+    This ioctl is responsible for releasing a previously obtained
+    reference to either a physical or virtual LUN. This can be
+    thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or
+    DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle
+    is no longer valid and the entry in the resource handle table is
+    made available to be used again.
+
+    As part of the release process for virtual LUNs, the virtual LUN
+    is first resized to 0 to clear out and free the translation tables
+    associated with the virtual LUN reference.
+
+DK_CXLFLASH_DETACH
+------------------
+    This ioctl is responsible for unregistering a context with the
+    cxlflash driver and release outstanding resources that were
+    not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon
+    success, all "tokens" which had been provided to the user from the
+    DK_CXLFLASH_ATTACH onward are no longer valid.
+
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ close the fd2 associated with the context
+    following the detach of the final user of the context.
+
+DK_CXLFLASH_VLUN_CLONE
+----------------------
+    This ioctl is responsible for cloning a previously created
+    context to a more recently created context. It exists solely to
+    support maintaining user space access to storage after a process
+    forks. Upon success, the child process (which invoked the ioctl)
+    will have access to the same LUNs via the same resource handle(s)
+    as the parent, but under a different context.
+
+    Context sharing across processes is not supported with CXL and
+    therefore each fork must be met with establishing a new context
+    for the child process. This ioctl simplifies the state management
+    and playback required by a user in such a scenario. When a process
+    forks, child process can clone the parents context by first creating
+    a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to
+    perform the clone from the parent to the child.
+
+    The clone itself is fairly simple. The resource handle and lun
+    translation tables are copied from the parent context to the child's
+    and then synced with the AFU.
+
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ close the fd2 associated with the source
+    context (still resident/accessible in the parent process) following the
+    clone. This is to avoid a stale entry in the file descriptor table of the
+    child process.
+
+    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+    In addition to returning a failure to user, cxlflash will also schedule
+    an asynchronous AFU reset. Should the user choose to retry the operation,
+    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+    can either retry the operation or treat it as a failure.
+
+DK_CXLFLASH_VERIFY
+------------------
+    This ioctl is used to detect various changes such as the capacity of
+    the disk changing, the number of LUNs visible changing, etc. In cases
+    where the changes affect the application (such as a LUN resize), the
+    cxlflash driver will report the changed state to the application.
+
+    The user calls in when they want to validate that a LUN hasn't been
+    changed in response to a check condition. As the user is operating out
+    of band from the kernel, they will see these types of events without
+    the kernel's knowledge. When encountered, the user's architected
+    behavior is to call in to this ioctl, indicating what they want to
+    verify and passing along any appropriate information. For now, only
+    verifying a LUN change (ie: size different) with sense data is
+    supported.
+
+DK_CXLFLASH_RECOVER_AFU
+-----------------------
+    This ioctl is used to drive recovery (if such an action is warranted)
+    of a specified user context. Any state associated with the user context
+    is re-established upon successful recovery.
+
+    User contexts are put into an error condition when the device needs to
+    be reset or is terminating. Users are notified of this error condition
+    by seeing all 0xF's on an MMIO read. Upon encountering this, the
+    architected behavior for a user is to call into this ioctl to recover
+    their context. A user may also call into this ioctl at any time to
+    check if the device is operating normally. If a failure is returned
+    from this ioctl, the user is expected to gracefully clean up their
+    context via release/detach ioctls. Until they do, the context they
+    hold is not relinquished. The user may also optionally exit the process
+    at which time the context/resources they held will be freed as part of
+    the release fop.
+
+    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+    attach, the application _must_ unmap and close the fd2 associated with the
+    original context following this ioctl returning success and indicating that
+    the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
+
+DK_CXLFLASH_MANAGE_LUN
+----------------------
+    This ioctl is used to switch a LUN from a mode where it is available
+    for file-system access (legacy), to a mode where it is set aside for
+    exclusive user space access (superpipe). In case a LUN is visible
+    across multiple ports and adapters, this ioctl is used to uniquely
+    identify each LUN by its World Wide Node Name (WWNN).
+
+
+CXL Flash Driver Host IOCTLs
+============================
+
+    Each host adapter instance that is supported by the cxlflash driver
+    has a special character device associated with it to enable a set of
+    host management function. These character devices are hosted in a
+    class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`.
+
+    Applications can be written to perform various functions using the
+    host ioctl APIs below.
+
+    The structure definitions for these IOCTLs are available in:
+    uapi/scsi/cxlflash_ioctl.h
+
+HT_CXLFLASH_LUN_PROVISION
+-------------------------
+    This ioctl is used to create and delete persistent LUNs on cxlflash
+    devices that lack an external LUN management interface. It is only
+    valid when used with AFUs that support the LUN provision capability.
+
+    When sufficient space is available, LUNs can be created by specifying
+    the target port to host the LUN and a desired size in 4K blocks. Upon
+    success, the LUN ID and WWID of the created LUN will be returned and
+    the SCSI bus can be scanned to detect the change in LUN topology. Note
+    that partial allocations are not supported. Should a creation fail due
+    to a space issue, the target port can be queried for its current LUN
+    geometry.
+
+    To remove a LUN, the device must first be disassociated from the Linux
+    SCSI subsystem. The LUN deletion can then be initiated by specifying a
+    target port and LUN ID. Upon success, the LUN geometry associated with
+    the port will be updated to reflect new number of provisioned LUNs and
+    available capacity.
+
+    To query the LUN geometry of a port, the target port is specified and
+    upon success, the following information is presented:
+
+        - Maximum number of provisioned LUNs allowed for the port
+        - Current number of provisioned LUNs for the port
+        - Maximum total capacity of provisioned LUNs for the port (4K blocks)
+        - Current total capacity of provisioned LUNs for the port (4K blocks)
+
+    With this information, the number of available LUNs and capacity can be
+    can be calculated.
+
+HT_CXLFLASH_AFU_DEBUG
+---------------------
+    This ioctl is used to debug AFUs by supporting a command pass-through
+    interface. It is only valid when used with AFUs that support the AFU
+    debug capability.
+
+    With exception of buffer management, AFU debug commands are opaque to
+    cxlflash and treated as pass-through. For debug commands that do require
+    data transfer, the user supplies an adequately sized data buffer and must
+    specify the data transfer direction with respect to the host. There is a
+    maximum transfer size of 256K imposed. Note that partial read completions
+    are not supported - when errors are experienced with a host read data
+    transfer, the data buffer is not copied back to the user.
diff --git a/Documentation/powerpc/cxlflash.txt b/Documentation/powerpc/cxlflash.txt
deleted file mode 100644
index a64bdaa0a1cf..000000000000
--- a/Documentation/powerpc/cxlflash.txt
+++ /dev/null
@@ -1,429 +0,0 @@
-Introduction
-============
-
-    The IBM Power architecture provides support for CAPI (Coherent
-    Accelerator Power Interface), which is available to certain PCIe slots
-    on Power 8 systems. CAPI can be thought of as a special tunneling
-    protocol through PCIe that allow PCIe adapters to look like special
-    purpose co-processors which can read or write an application's
-    memory and generate page faults. As a result, the host interface to
-    an adapter running in CAPI mode does not require the data buffers to
-    be mapped to the device's memory (IOMMU bypass) nor does it require
-    memory to be pinned.
-
-    On Linux, Coherent Accelerator (CXL) kernel services present CAPI
-    devices as a PCI device by implementing a virtual PCI host bridge.
-    This abstraction simplifies the infrastructure and programming
-    model, allowing for drivers to look similar to other native PCI
-    device drivers.
-
-    CXL provides a mechanism by which user space applications can
-    directly talk to a device (network or storage) bypassing the typical
-    kernel/device driver stack. The CXL Flash Adapter Driver enables a
-    user space application direct access to Flash storage.
-
-    The CXL Flash Adapter Driver is a kernel module that sits in the
-    SCSI stack as a low level device driver (below the SCSI disk and
-    protocol drivers) for the IBM CXL Flash Adapter. This driver is
-    responsible for the initialization of the adapter, setting up the
-    special path for user space access, and performing error recovery. It
-    communicates directly the Flash Accelerator Functional Unit (AFU)
-    as described in Documentation/powerpc/cxl.txt.
-
-    The cxlflash driver supports two, mutually exclusive, modes of
-    operation at the device (LUN) level:
-
-        - Any flash device (LUN) can be configured to be accessed as a
-          regular disk device (i.e.: /dev/sdc). This is the default mode.
-
-        - Any flash device (LUN) can be configured to be accessed from
-          user space with a special block library. This mode further
-          specifies the means of accessing the device and provides for
-          either raw access to the entire LUN (referred to as direct
-          or physical LUN access) or access to a kernel/AFU-mediated
-          partition of the LUN (referred to as virtual LUN access). The
-          segmentation of a disk device into virtual LUNs is assisted
-          by special translation services provided by the Flash AFU.
-
-Overview
-========
-
-    The Coherent Accelerator Interface Architecture (CAIA) introduces a
-    concept of a master context. A master typically has special privileges
-    granted to it by the kernel or hypervisor allowing it to perform AFU
-    wide management and control. The master may or may not be involved
-    directly in each user I/O, but at the minimum is involved in the
-    initial setup before the user application is allowed to send requests
-    directly to the AFU.
-
-    The CXL Flash Adapter Driver establishes a master context with the
-    AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
-    Adapter Problem Space Memory Map looks like this:
-
-                     +-------------------------------+
-                     |    512 * 64 KB User MMIO      |
-                     |        (per context)          |
-                     |       User Accessible         |
-                     +-------------------------------+
-                     |    512 * 128 B per context    |
-                     |    Provisioning and Control   |
-                     |   Trusted Process accessible  |
-                     +-------------------------------+
-                     |         64 KB Global          |
-                     |   Trusted Process accessible  |
-                     +-------------------------------+
-
-    This driver configures itself into the SCSI software stack as an
-    adapter driver. The driver is the only entity that is considered a
-    Trusted Process to program the Provisioning and Control and Global
-    areas in the MMIO Space shown above.  The master context driver
-    discovers all LUNs attached to the CXL Flash adapter and instantiates
-    scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN
-    seen from each path.
-
-    Once these scsi block devices are instantiated, an application
-    written to a specification provided by the block library may get
-    access to the Flash from user space (without requiring a system call).
-
-    This master context driver also provides a series of ioctls for this
-    block library to enable this user space access.  The driver supports
-    two modes for accessing the block device.
-
-    The first mode is called a virtual mode. In this mode a single scsi
-    block device (/dev/sdb) may be carved up into any number of distinct
-    virtual LUNs. The virtual LUNs may be resized as long as the sum of
-    the sizes of all the virtual LUNs, along with the meta-data associated
-    with it does not exceed the physical capacity.
-
-    The second mode is called the physical mode. In this mode a single
-    block device (/dev/sdb) may be opened directly by the block library
-    and the entire space for the LUN is available to the application.
-
-    Only the physical mode provides persistence of the data.  i.e. The
-    data written to the block device will survive application exit and
-    restart and also reboot. The virtual LUNs do not persist (i.e. do
-    not survive after the application terminates or the system reboots).
-
-
-Block library API
-=================
-
-    Applications intending to get access to the CXL Flash from user
-    space should use the block library, as it abstracts the details of
-    interfacing directly with the cxlflash driver that are necessary for
-    performing administrative actions (i.e.: setup, tear down, resize).
-    The block library can be thought of as a 'user' of services,
-    implemented as IOCTLs, that are provided by the cxlflash driver
-    specifically for devices (LUNs) operating in user space access
-    mode. While it is not a requirement that applications understand
-    the interface between the block library and the cxlflash driver,
-    a high-level overview of each supported service (IOCTL) is provided
-    below.
-
-    The block library can be found on GitHub:
-    http://github.com/open-power/capiflash
-
-
-CXL Flash Driver LUN IOCTLs
-===========================
-
-    Users, such as the block library, that wish to interface with a flash
-    device (LUN) via user space access need to use the services provided
-    by the cxlflash driver. As these services are implemented as ioctls,
-    a file descriptor handle must first be obtained in order to establish
-    the communication channel between a user and the kernel.  This file
-    descriptor is obtained by opening the device special file associated
-    with the scsi disk device (/dev/sdb) that was created during LUN
-    discovery. As per the location of the cxlflash driver within the
-    SCSI protocol stack, this open is actually not seen by the cxlflash
-    driver. Upon successful open, the user receives a file descriptor
-    (herein referred to as fd1) that should be used for issuing the
-    subsequent ioctls listed below.
-
-    The structure definitions for these IOCTLs are available in:
-    uapi/scsi/cxlflash_ioctl.h
-
-DK_CXLFLASH_ATTACH
-------------------
-
-    This ioctl obtains, initializes, and starts a context using the CXL
-    kernel services. These services specify a context id (u16) by which
-    to uniquely identify the context and its allocated resources. The
-    services additionally provide a second file descriptor (herein
-    referred to as fd2) that is used by the block library to initiate
-    memory mapped I/O (via mmap()) to the CXL flash device and poll for
-    completion events. This file descriptor is intentionally installed by
-    this driver and not the CXL kernel services to allow for intermediary
-    notification and access in the event of a non-user-initiated close(),
-    such as a killed process. This design point is described in further
-    detail in the description for the DK_CXLFLASH_DETACH ioctl.
-
-    There are a few important aspects regarding the "tokens" (context id
-    and fd2) that are provided back to the user:
-
-        - These tokens are only valid for the process under which they
-          were created. The child of a forked process cannot continue
-          to use the context id or file descriptor created by its parent
-          (see DK_CXLFLASH_VLUN_CLONE for further details).
-
-        - These tokens are only valid for the lifetime of the context and
-          the process under which they were created. Once either is
-          destroyed, the tokens are to be considered stale and subsequent
-          usage will result in errors.
-
-	- A valid adapter file descriptor (fd2 >= 0) is only returned on
-	  the initial attach for a context. Subsequent attaches to an
-	  existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
-	  do not provide the adapter file descriptor as it was previously
-	  made known to the application.
-
-        - When a context is no longer needed, the user shall detach from
-          the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
-	  returns with a valid adapter file descriptor and the return flag
-	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
-	  close the adapter file descriptor following a successful detach.
-
-	- When this ioctl returns with a valid fd2 and the return flag
-	  DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
-	  close fd2 in the following circumstances:
-
-		+ Following a successful detach of the last user of the context
-		+ Following a successful recovery on the context's original fd2
-		+ In the child process of a fork(), following a clone ioctl,
-		  on the fd2 associated with the source context
-
-        - At any time, a close on fd2 will invalidate the tokens. Applications
-	  should exercise caution to only close fd2 when appropriate (outlined
-	  in the previous bullet) to avoid premature loss of I/O.
-
-DK_CXLFLASH_USER_DIRECT
------------------------
-    This ioctl is responsible for transitioning the LUN to direct
-    (physical) mode access and configuring the AFU for direct access from
-    user space on a per-context basis. Additionally, the block size and
-    last logical block address (LBA) are returned to the user.
-
-    As mentioned previously, when operating in user space access mode,
-    LUNs may be accessed in whole or in part. Only one mode is allowed
-    at a time and if one mode is active (outstanding references exist),
-    requests to use the LUN in a different mode are denied.
-
-    The AFU is configured for direct access from user space by adding an
-    entry to the AFU's resource handle table. The index of the entry is
-    treated as a resource handle that is returned to the user. The user
-    is then able to use the handle to reference the LUN during I/O.
-
-DK_CXLFLASH_USER_VIRTUAL
-------------------------
-    This ioctl is responsible for transitioning the LUN to virtual mode
-    of access and configuring the AFU for virtual access from user space
-    on a per-context basis. Additionally, the block size and last logical
-    block address (LBA) are returned to the user.
-
-    As mentioned previously, when operating in user space access mode,
-    LUNs may be accessed in whole or in part. Only one mode is allowed
-    at a time and if one mode is active (outstanding references exist),
-    requests to use the LUN in a different mode are denied.
-
-    The AFU is configured for virtual access from user space by adding
-    an entry to the AFU's resource handle table. The index of the entry
-    is treated as a resource handle that is returned to the user. The
-    user is then able to use the handle to reference the LUN during I/O.
-
-    By default, the virtual LUN is created with a size of 0. The user
-    would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow
-    the virtual LUN to a desired size. To avoid having to perform this
-    resize for the initial creation of the virtual LUN, the user has the
-    option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL
-    ioctl, such that when success is returned to the user, the
-    resource handle that is provided is already referencing provisioned
-    storage. This is reflected by the last LBA being a non-zero value.
-
-    When a LUN is accessible from more than one port, this ioctl will
-    return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This
-    provides the user with a hint that I/O can be retried in the event
-    of an I/O error as the LUN can be reached over multiple paths.
-
-DK_CXLFLASH_VLUN_RESIZE
------------------------
-    This ioctl is responsible for resizing a previously created virtual
-    LUN and will fail if invoked upon a LUN that is not in virtual
-    mode. Upon success, an updated last LBA is returned to the user
-    indicating the new size of the virtual LUN associated with the
-    resource handle.
-
-    The partitioning of virtual LUNs is jointly mediated by the cxlflash
-    driver and the AFU. An allocation table is kept for each LUN that is
-    operating in the virtual mode and used to program a LUN translation
-    table that the AFU references when provided with a resource handle.
-
-    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
-    In addition to returning a failure to user, cxlflash will also schedule
-    an asynchronous AFU reset. Should the user choose to retry the operation,
-    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
-    can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_RELEASE
--------------------
-    This ioctl is responsible for releasing a previously obtained
-    reference to either a physical or virtual LUN. This can be
-    thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or
-    DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle
-    is no longer valid and the entry in the resource handle table is
-    made available to be used again.
-
-    As part of the release process for virtual LUNs, the virtual LUN
-    is first resized to 0 to clear out and free the translation tables
-    associated with the virtual LUN reference.
-
-DK_CXLFLASH_DETACH
-------------------
-    This ioctl is responsible for unregistering a context with the
-    cxlflash driver and release outstanding resources that were
-    not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon
-    success, all "tokens" which had been provided to the user from the
-    DK_CXLFLASH_ATTACH onward are no longer valid.
-
-    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
-    attach, the application _must_ close the fd2 associated with the context
-    following the detach of the final user of the context.
-
-DK_CXLFLASH_VLUN_CLONE
-----------------------
-    This ioctl is responsible for cloning a previously created
-    context to a more recently created context. It exists solely to
-    support maintaining user space access to storage after a process
-    forks. Upon success, the child process (which invoked the ioctl)
-    will have access to the same LUNs via the same resource handle(s)
-    as the parent, but under a different context.
-
-    Context sharing across processes is not supported with CXL and
-    therefore each fork must be met with establishing a new context
-    for the child process. This ioctl simplifies the state management
-    and playback required by a user in such a scenario. When a process
-    forks, child process can clone the parents context by first creating
-    a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to
-    perform the clone from the parent to the child.
-
-    The clone itself is fairly simple. The resource handle and lun
-    translation tables are copied from the parent context to the child's
-    and then synced with the AFU.
-
-    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
-    attach, the application _must_ close the fd2 associated with the source
-    context (still resident/accessible in the parent process) following the
-    clone. This is to avoid a stale entry in the file descriptor table of the
-    child process.
-
-    This ioctl can return -EAGAIN if an AFU sync operation takes too long.
-    In addition to returning a failure to user, cxlflash will also schedule
-    an asynchronous AFU reset. Should the user choose to retry the operation,
-    it is expected to succeed. If this ioctl fails with -EAGAIN, the user
-    can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_VERIFY
-------------------
-    This ioctl is used to detect various changes such as the capacity of
-    the disk changing, the number of LUNs visible changing, etc. In cases
-    where the changes affect the application (such as a LUN resize), the
-    cxlflash driver will report the changed state to the application.
-
-    The user calls in when they want to validate that a LUN hasn't been
-    changed in response to a check condition. As the user is operating out
-    of band from the kernel, they will see these types of events without
-    the kernel's knowledge. When encountered, the user's architected
-    behavior is to call in to this ioctl, indicating what they want to
-    verify and passing along any appropriate information. For now, only
-    verifying a LUN change (ie: size different) with sense data is
-    supported.
-
-DK_CXLFLASH_RECOVER_AFU
------------------------
-    This ioctl is used to drive recovery (if such an action is warranted)
-    of a specified user context. Any state associated with the user context
-    is re-established upon successful recovery.
-
-    User contexts are put into an error condition when the device needs to
-    be reset or is terminating. Users are notified of this error condition
-    by seeing all 0xF's on an MMIO read. Upon encountering this, the
-    architected behavior for a user is to call into this ioctl to recover
-    their context. A user may also call into this ioctl at any time to
-    check if the device is operating normally. If a failure is returned
-    from this ioctl, the user is expected to gracefully clean up their
-    context via release/detach ioctls. Until they do, the context they
-    hold is not relinquished. The user may also optionally exit the process
-    at which time the context/resources they held will be freed as part of
-    the release fop.
-
-    When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
-    attach, the application _must_ unmap and close the fd2 associated with the
-    original context following this ioctl returning success and indicating that
-    the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
-
-DK_CXLFLASH_MANAGE_LUN
-----------------------
-    This ioctl is used to switch a LUN from a mode where it is available
-    for file-system access (legacy), to a mode where it is set aside for
-    exclusive user space access (superpipe). In case a LUN is visible
-    across multiple ports and adapters, this ioctl is used to uniquely
-    identify each LUN by its World Wide Node Name (WWNN).
-
-
-CXL Flash Driver Host IOCTLs
-============================
-
-    Each host adapter instance that is supported by the cxlflash driver
-    has a special character device associated with it to enable a set of
-    host management function. These character devices are hosted in a
-    class dedicated for cxlflash and can be accessed via /dev/cxlflash/*.
-
-    Applications can be written to perform various functions using the
-    host ioctl APIs below.
-
-    The structure definitions for these IOCTLs are available in:
-    uapi/scsi/cxlflash_ioctl.h
-
-HT_CXLFLASH_LUN_PROVISION
--------------------------
-    This ioctl is used to create and delete persistent LUNs on cxlflash
-    devices that lack an external LUN management interface. It is only
-    valid when used with AFUs that support the LUN provision capability.
-
-    When sufficient space is available, LUNs can be created by specifying
-    the target port to host the LUN and a desired size in 4K blocks. Upon
-    success, the LUN ID and WWID of the created LUN will be returned and
-    the SCSI bus can be scanned to detect the change in LUN topology. Note
-    that partial allocations are not supported. Should a creation fail due
-    to a space issue, the target port can be queried for its current LUN
-    geometry.
-
-    To remove a LUN, the device must first be disassociated from the Linux
-    SCSI subsystem. The LUN deletion can then be initiated by specifying a
-    target port and LUN ID. Upon success, the LUN geometry associated with
-    the port will be updated to reflect new number of provisioned LUNs and
-    available capacity.
-
-    To query the LUN geometry of a port, the target port is specified and
-    upon success, the following information is presented:
-
-        - Maximum number of provisioned LUNs allowed for the port
-        - Current number of provisioned LUNs for the port
-        - Maximum total capacity of provisioned LUNs for the port (4K blocks)
-        - Current total capacity of provisioned LUNs for the port (4K blocks)
-
-    With this information, the number of available LUNs and capacity can be
-    can be calculated.
-
-HT_CXLFLASH_AFU_DEBUG
----------------------
-    This ioctl is used to debug AFUs by supporting a command pass-through
-    interface. It is only valid when used with AFUs that support the AFU
-    debug capability.
-
-    With exception of buffer management, AFU debug commands are opaque to
-    cxlflash and treated as pass-through. For debug commands that do require
-    data transfer, the user supplies an adequately sized data buffer and must
-    specify the data transfer direction with respect to the host. There is a
-    maximum transfer size of 256K imposed. Note that partial read completions
-    are not supported - when errors are experienced with a host read data
-    transfer, the data buffer is not copied back to the user.
diff --git a/Documentation/powerpc/dawr-power9.rst b/Documentation/powerpc/dawr-power9.rst
new file mode 100644
index 000000000000..c96ab6befd9c
--- /dev/null
+++ b/Documentation/powerpc/dawr-power9.rst
@@ -0,0 +1,93 @@
+=====================
+DAWR issues on POWER9
+=====================
+
+On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
+if it points to cache inhibited (CI) memory. Currently Linux has no way to
+disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
+disabled by this commit::
+
+    commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
+    Author: Michael Neuling <mikey@neuling.org>
+    Date:   Tue Mar 27 15:37:24 2018 +1100
+    powerpc: Disable DAWR in the base POWER9 CPU features
+
+Technical Details:
+==================
+
+DAWR has 6 different ways of being set.
+1) ptrace
+2) h_set_mode(DAWR)
+3) h_set_dabr()
+4) kvmppc_set_one_reg()
+5) xmon
+
+For ptrace, we now advertise zero breakpoints on POWER9 via the
+PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to
+software emulation of the watchpoint (which is slow).
+
+h_set_mode(DAWR) and h_set_dabr() will now return an error to the
+guest on a POWER9 host. Current Linux guests ignore this error, so
+they will silently not get the DAWR.
+
+kvmppc_set_one_reg() will store the value in the vcpu but won't
+actually set it on POWER9 hardware. This is done so we don't break
+migration from POWER8 to POWER9, at the cost of silently losing the
+DAWR on the migration.
+
+For xmon, the 'bd' command will return an error on P9.
+
+Consequences for users
+======================
+
+For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
+will accept the command. Unfortunately since there is no hardware
+support for the watchpoint, GDB will software emulate the watchpoint
+making it run very slowly.
+
+The same will also be true for any guests started on a POWER9
+host. The watchpoint will fail and GDB will fall back to software
+emulation.
+
+If a guest is started on a POWER8 host, GDB will accept the watchpoint
+and configure the hardware to use the DAWR. This will run at full
+speed since it can use the hardware emulation. Unfortunately if this
+guest is migrated to a POWER9 host, the watchpoint will be lost on the
+POWER9. Loads and stores to the watchpoint locations will not be
+trapped in GDB. The watchpoint is remembered, so if the guest is
+migrated back to the POWER8 host, it will start working again.
+
+Force enabling the DAWR
+=======================
+Kernels (since ~v5.2) have an option to force enable the DAWR via::
+
+  echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR.  This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+
+  tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+
+Any errors/failures/skips mean something is wrong.
diff --git a/Documentation/powerpc/dscr.rst b/Documentation/powerpc/dscr.rst
new file mode 100644
index 000000000000..2ab99006014c
--- /dev/null
+++ b/Documentation/powerpc/dscr.rst
@@ -0,0 +1,87 @@
+===================================
+DSCR (Data Stream Control Register)
+===================================
+
+DSCR register in powerpc allows user to have some control of prefetch of data
+stream in the processor. Please refer to the ISA documents or related manual
+for more detailed information regarding how to use this DSCR to attain this
+control of the prefetches . This document here provides an overview of kernel
+support for DSCR, related kernel objects, it's functionalities and exported
+user interface.
+
+(A) Data Structures:
+
+	(1) thread_struct::
+
+		dscr		/* Thread DSCR value */
+		dscr_inherit	/* Thread has changed default DSCR */
+
+	(2) PACA::
+
+		dscr_default	/* per-CPU DSCR default value */
+
+	(3) sysfs.c::
+
+		dscr_default	/* System DSCR default value */
+
+(B) Scheduler Changes:
+
+	Scheduler will write the per-CPU DSCR default which is stored in the
+	CPU's PACA value into the register if the thread has dscr_inherit value
+	cleared which means that it has not changed the default DSCR till now.
+	If the dscr_inherit value is set which means that it has changed the
+	default DSCR value, scheduler will write the changed value which will
+	now be contained in thread struct's dscr into the register instead of
+	the per-CPU default PACA based DSCR value.
+
+	NOTE: Please note here that the system wide global DSCR value never
+	gets used directly in the scheduler process context switch at all.
+
+(C) SYSFS Interface:
+
+	- Global DSCR default:		/sys/devices/system/cpu/dscr_default
+	- CPU specific DSCR default:	/sys/devices/system/cpu/cpuN/dscr
+
+	Changing the global DSCR default in the sysfs will change all the CPU
+	specific DSCR defaults immediately in their PACA structures. Again if
+	the current process has the dscr_inherit clear, it also writes the new
+	value into every CPU's DSCR register right away and updates the current
+	thread's DSCR value as well.
+
+	Changing the CPU specific DSCR default value in the sysfs does exactly
+	the same thing as above but unlike the global one above, it just changes
+	stuff for that particular CPU instead for all the CPUs on the system.
+
+(D) User Space Instructions:
+
+	The DSCR register can be accessed in the user space using any of these
+	two SPR numbers available for that purpose.
+
+	(1) Problem state SPR:		0x03	(Un-privileged, POWER8 only)
+	(2) Privileged state SPR:	0x11	(Privileged)
+
+	Accessing DSCR through privileged SPR number (0x11) from user space
+	works, as it is emulated following an illegal instruction exception
+	inside the kernel. Both mfspr and mtspr instructions are emulated.
+
+	Accessing DSCR through user level SPR (0x03) from user space will first
+	create a facility unavailable exception. Inside this exception handler
+	all mfspr instruction based read attempts will get emulated and returned
+	where as the first mtspr instruction based write attempts will enable
+	the DSCR facility for the next time around (both for read and write) by
+	setting DSCR facility in the FSCR register.
+
+(E) Specifics about 'dscr_inherit':
+
+	The thread struct element 'dscr_inherit' represents whether the thread
+	in question has attempted and changed the DSCR itself using any of the
+	following methods. This element signifies whether the thread wants to
+	use the CPU default DSCR value or its own changed DSCR value in the
+	kernel.
+
+		(1) mtspr instruction	(SPR number 0x03)
+		(2) mtspr instruction	(SPR number 0x11)
+		(3) ptrace interface	(Explicitly set user DSCR value)
+
+	Any child of the process created after this event in the process inherits
+	this same behaviour as well.
diff --git a/Documentation/powerpc/dscr.txt b/Documentation/powerpc/dscr.txt
deleted file mode 100644
index ece300c64f76..000000000000
--- a/Documentation/powerpc/dscr.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-			DSCR (Data Stream Control Register)
-		================================================
-
-DSCR register in powerpc allows user to have some control of prefetch of data
-stream in the processor. Please refer to the ISA documents or related manual
-for more detailed information regarding how to use this DSCR to attain this
-control of the prefetches . This document here provides an overview of kernel
-support for DSCR, related kernel objects, it's functionalities and exported
-user interface.
-
-(A) Data Structures:
-
-	(1) thread_struct:
-		dscr		/* Thread DSCR value */
-		dscr_inherit	/* Thread has changed default DSCR */
-
-	(2) PACA:
-		dscr_default	/* per-CPU DSCR default value */
-
-	(3) sysfs.c:
-		dscr_default	/* System DSCR default value */
-
-(B) Scheduler Changes:
-
-	Scheduler will write the per-CPU DSCR default which is stored in the
-	CPU's PACA value into the register if the thread has dscr_inherit value
-	cleared which means that it has not changed the default DSCR till now.
-	If the dscr_inherit value is set which means that it has changed the
-	default DSCR value, scheduler will write the changed value which will
-	now be contained in thread struct's dscr into the register instead of
-	the per-CPU default PACA based DSCR value.
-
-	NOTE: Please note here that the system wide global DSCR value never
-	gets used directly in the scheduler process context switch at all.
-
-(C) SYSFS Interface:
-
-	Global DSCR default:		/sys/devices/system/cpu/dscr_default
-	CPU specific DSCR default:	/sys/devices/system/cpu/cpuN/dscr
-
-	Changing the global DSCR default in the sysfs will change all the CPU
-	specific DSCR defaults immediately in their PACA structures. Again if
-	the current process has the dscr_inherit clear, it also writes the new
-	value into every CPU's DSCR register right away and updates the current
-	thread's DSCR value as well.
-
-	Changing the CPU specific DSCR default value in the sysfs does exactly
-	the same thing as above but unlike the global one above, it just changes
-	stuff for that particular CPU instead for all the CPUs on the system.
-
-(D) User Space Instructions:
-
-	The DSCR register can be accessed in the user space using any of these
-	two SPR numbers available for that purpose.
-
-	(1) Problem state SPR:		0x03	(Un-privileged, POWER8 only)
-	(2) Privileged state SPR:	0x11	(Privileged)
-
-	Accessing DSCR through privileged SPR number (0x11) from user space
-	works, as it is emulated following an illegal instruction exception
-	inside the kernel. Both mfspr and mtspr instructions are emulated.
-
-	Accessing DSCR through user level SPR (0x03) from user space will first
-	create a facility unavailable exception. Inside this exception handler
-	all mfspr instruction based read attempts will get emulated and returned
-	where as the first mtspr instruction based write attempts will enable
-	the DSCR facility for the next time around (both for read and write) by
-	setting DSCR facility in the FSCR register.
-
-(E) Specifics about 'dscr_inherit':
-
-	The thread struct element 'dscr_inherit' represents whether the thread
-	in question has attempted and changed the DSCR itself using any of the
-	following methods. This element signifies whether the thread wants to
-	use the CPU default DSCR value or its own changed DSCR value in the
-	kernel.
-
-		(1) mtspr instruction	(SPR number 0x03)
-		(2) mtspr instruction	(SPR number 0x11)
-		(3) ptrace interface	(Explicitly set user DSCR value)
-
-	Any child of the process created after this event in the process inherits
-	this same behaviour as well.
diff --git a/Documentation/powerpc/eeh-pci-error-recovery.rst b/Documentation/powerpc/eeh-pci-error-recovery.rst
new file mode 100644
index 000000000000..438a87ebc095
--- /dev/null
+++ b/Documentation/powerpc/eeh-pci-error-recovery.rst
@@ -0,0 +1,336 @@
+==========================
+PCI Bus EEH Error Recovery
+==========================
+
+Linas Vepstas <linas@austin.ibm.com>
+
+12 January 2005
+
+
+Overview:
+---------
+The IBM POWER-based pSeries and iSeries computers include PCI bus
+controller chips that have extended capabilities for detecting and
+reporting a large variety of PCI bus error conditions.  These features
+go under the name of "EEH", for "Enhanced Error Handling".  The EEH
+hardware features allow PCI bus errors to be cleared and a PCI
+card to be "rebooted", without also having to reboot the operating
+system.
+
+This is in contrast to traditional PCI error handling, where the
+PCI chip is wired directly to the CPU, and an error would cause
+a CPU machine-check/check-stop condition, halting the CPU entirely.
+Another "traditional" technique is to ignore such errors, which
+can lead to data corruption, both of user data or of kernel data,
+hung/unresponsive adapters, or system crashes/lockups.  Thus,
+the idea behind EEH is that the operating system can become more
+reliable and robust by protecting it from PCI errors, and giving
+the OS the ability to "reboot"/recover individual PCI devices.
+
+Future systems from other vendors, based on the PCI-E specification,
+may contain similar features.
+
+
+Causes of EEH Errors
+--------------------
+EEH was originally designed to guard against hardware failure, such
+as PCI cards dying from heat, humidity, dust, vibration and bad
+electrical connections. The vast majority of EEH errors seen in
+"real life" are due to either poorly seated PCI cards, or,
+unfortunately quite commonly, due to device driver bugs, device firmware
+bugs, and sometimes PCI card hardware bugs.
+
+The most common software bug, is one that causes the device to
+attempt to DMA to a location in system memory that has not been
+reserved for DMA access for that card.  This is a powerful feature,
+as it prevents what; otherwise, would have been silent memory
+corruption caused by the bad DMA.  A number of device driver
+bugs have been found and fixed in this way over the past few
+years.  Other possible causes of EEH errors include data or
+address line parity errors (for example, due to poor electrical
+connectivity due to a poorly seated card), and PCI-X split-completion
+errors (due to software, device firmware, or device PCI hardware bugs).
+The vast majority of "true hardware failures" can be cured by
+physically removing and re-seating the PCI card.
+
+
+Detection and Recovery
+----------------------
+In the following discussion, a generic overview of how to detect
+and recover from EEH errors will be presented. This is followed
+by an overview of how the current implementation in the Linux
+kernel does it.  The actual implementation is subject to change,
+and some of the finer points are still being debated.  These
+may in turn be swayed if or when other architectures implement
+similar functionality.
+
+When a PCI Host Bridge (PHB, the bus controller connecting the
+PCI bus to the system CPU electronics complex) detects a PCI error
+condition, it will "isolate" the affected PCI card.  Isolation
+will block all writes (either to the card from the system, or
+from the card to the system), and it will cause all reads to
+return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
+This value was chosen because it is the same value you would
+get if the device was physically unplugged from the slot.
+This includes access to PCI memory, I/O space, and PCI config
+space.  Interrupts; however, will continued to be delivered.
+
+Detection and recovery are performed with the aid of ppc64
+firmware.  The programming interfaces in the Linux kernel
+into the firmware are referred to as RTAS (Run-Time Abstraction
+Services).  The Linux kernel does not (should not) access
+the EEH function in the PCI chipsets directly, primarily because
+there are a number of different chipsets out there, each with
+different interfaces and quirks. The firmware provides a
+uniform abstraction layer that will work with all pSeries
+and iSeries hardware (and be forwards-compatible).
+
+If the OS or device driver suspects that a PCI slot has been
+EEH-isolated, there is a firmware call it can make to determine if
+this is the case. If so, then the device driver should put itself
+into a consistent state (given that it won't be able to complete any
+pending work) and start recovery of the card.  Recovery normally
+would consist of resetting the PCI device (holding the PCI #RST
+line high for two seconds), followed by setting up the device
+config space (the base address registers (BAR's), latency timer,
+cache line size, interrupt line, and so on).  This is followed by a
+reinitialization of the device driver.  In a worst-case scenario,
+the power to the card can be toggled, at least on hot-plug-capable
+slots.  In principle, layers far above the device driver probably
+do not need to know that the PCI card has been "rebooted" in this
+way; ideally, there should be at most a pause in Ethernet/disk/USB
+I/O while the card is being reset.
+
+If the card cannot be recovered after three or four resets, the
+kernel/device driver should assume the worst-case scenario, that the
+card has died completely, and report this error to the sysadmin.
+In addition, error messages are reported through RTAS and also through
+syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
+The correct way to deal with failed adapters is to use the standard
+PCI hotplug tools to remove and replace the dead card.
+
+
+Current PPC64 Linux EEH Implementation
+--------------------------------------
+At this time, a generic EEH recovery mechanism has been implemented,
+so that individual device drivers do not need to be modified to support
+EEH recovery.  This generic mechanism piggy-backs on the PCI hotplug
+infrastructure,  and percolates events up through the userspace/udev
+infrastructure.  Following is a detailed description of how this is
+accomplished.
+
+EEH must be enabled in the PHB's very early during the boot process,
+and if a PCI slot is hot-plugged. The former is performed by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
+drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
+EEH must be enabled before a PCI scan of the device can proceed.
+Current Power5 hardware will not work unless EEH is enabled;
+although older Power4 can run with it disabled.  Effectively,
+EEH can no longer be turned off.  PCI devices *must* be
+registered with the EEH code; the EEH code needs to know about
+the I/O address ranges of the PCI device in order to detect an
+error.  Given an arbitrary address, the routine
+pci_get_device_by_addr() will find the pci device associated
+with that address (if any).
+
+The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
+etc. include a check to see if the i/o read returned all-0xff's.
+If so, these make a call to eeh_dn_check_failure(), which in turn
+asks the firmware if the all-ff's value is the sign of a true EEH
+error.  If it is not, processing continues as normal.  The grand
+total number of these false alarms or "false positives" can be
+seen in /proc/ppc64/eeh (subject to change).  Normally, almost
+all of these occur during boot, when the PCI bus is scanned, where
+a large number of 0xff reads are part of the bus scan procedure.
+
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages).  This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
+beforehand.
+
+Next, it uses the Linux kernel notifier chain/work queue mechanism to
+allow any interested parties to find out about the failure.  Device
+drivers, or other parts of the kernel, can use
+`eeh_register_notifier(struct notifier_block *)` to find out about EEH
+events.  The event will include a pointer to the pci device, the
+device node and some state info.  Receivers of the event can "do as
+they wish"; the default handler will be described further in this
+section.
+
+To assist in the recovery of the device, eeh.c exports the
+following functions:
+
+rtas_set_slot_reset()
+   assert the  PCI #RST line for 1/8th of a second
+rtas_configure_bridge()
+   ask firmware to configure any PCI bridges
+   located topologically under the pci slot.
+eeh_save_bars() and eeh_restore_bars():
+   save and restore the PCI
+   config-space info for a device and any devices under it.
+
+
+A handler for the EEH notifier_block events is implemented in
+drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
+It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
+This last call causes the device driver for the card to be stopped,
+which causes uevents to go out to user space. This triggers
+user-space scripts that might issue commands such as "ifdown eth0"
+for ethernet cards, and so on.  This handler then sleeps for 5 seconds,
+hoping to give the user-space scripts enough time to complete.
+It then resets the PCI card, reconfigures the device BAR's, and
+any bridges underneath. It then calls rpaphp_enable_pci_slot(),
+which restarts the device driver and triggers more user-space
+events (for example, calling "ifup eth0" for ethernet cards).
+
+
+Device Shutdown and User-Space Events
+-------------------------------------
+This section documents what happens when a pci slot is unconfigured,
+focusing on how the device driver gets shut down, and on how the
+events get delivered to user-space scripts.
+
+Following is an example sequence of events that cause a device driver
+close function to be called during the first phase of an EEH reset.
+The following sequence is an example of the pcnet32 device driver::
+
+    rpa_php_unconfig_pci_adapter (struct slot *)  // in rpaphp_pci.c
+    {
+      calls
+      pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
+      {
+        calls
+        pci_destroy_dev (struct pci_dev *)
+        {
+          calls
+          device_unregister (&dev->dev) // in /drivers/base/core.c
+          {
+            calls
+            device_del (struct device *)
+            {
+              calls
+              bus_remove_device() // in /drivers/base/bus.c
+              {
+                calls
+                device_release_driver()
+                {
+                  calls
+                  struct device_driver->remove() which is just
+                  pci_device_remove()  // in /drivers/pci/pci_driver.c
+                  {
+                    calls
+                    struct pci_driver->remove() which is just
+                    pcnet32_remove_one() // in /drivers/net/pcnet32.c
+                    {
+                      calls
+                      unregister_netdev() // in /net/core/dev.c
+                      {
+                        calls
+                        dev_close()  // in /net/core/dev.c
+                        {
+                           calls dev->stop();
+                           which is just pcnet32_close() // in pcnet32.c
+                           {
+                             which does what you wanted
+                             to stop the device
+                           }
+                        }
+                     }
+                   which
+                   frees pcnet32 device driver memory
+                }
+     }}}}}}
+
+
+in drivers/pci/pci_driver.c,
+struct device_driver->remove() is just pci_device_remove()
+which calls struct pci_driver->remove() which is pcnet32_remove_one()
+which calls unregister_netdev()  (in net/core/dev.c)
+which calls dev_close()  (in net/core/dev.c)
+which calls dev->stop() which is pcnet32_close()
+which then does the appropriate shutdown.
+
+---
+
+Following is the analogous stack trace for events sent to user-space
+when the pci device is unconfigured::
+
+  rpa_php_unconfig_pci_adapter() {             // in rpaphp_pci.c
+    calls
+    pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
+      calls
+      pci_destroy_dev (struct pci_dev *) {
+        calls
+        device_unregister (&dev->dev) {        // in /drivers/base/core.c
+          calls
+          device_del(struct device * dev) {    // in /drivers/base/core.c
+            calls
+            kobject_del() {                    //in /libs/kobject.c
+              calls
+              kobject_uevent() {               // in /libs/kobject.c
+                calls
+                kset_uevent() {                // in /lib/kobject.c
+                  calls
+                  kset->uevent_ops->uevent()   // which is really just
+                  a call to
+                  dev_uevent() {               // in /drivers/base/core.c
+                    calls
+                    dev->bus->uevent() which is really just a call to
+                    pci_uevent () {            // in drivers/pci/hotplug.c
+                      which prints device name, etc....
+                   }
+                 }
+                 then kobject_uevent() sends a netlink uevent to userspace
+                 --> userspace uevent
+                 (during early boot, nobody listens to netlink events and
+                 kobject_uevent() executes uevent_helper[], which runs the
+                 event process /sbin/hotplug)
+             }
+           }
+           kobject_del() then calls sysfs_remove_dir(), which would
+           trigger any user-space daemon that was watching /sysfs,
+           and notice the delete event.
+
+
+Pro's and Con's of the Current Design
+-------------------------------------
+There are several issues with the current EEH software recovery design,
+which may be addressed in future revisions.  But first, note that the
+big plus of the current design is that no changes need to be made to
+individual device drivers, so that the current design throws a wide net.
+The biggest negative of the design is that it potentially disturbs
+network daemons and file systems that didn't need to be disturbed.
+
+-  A minor complaint is that resetting the network card causes
+   user-space back-to-back ifdown/ifup burps that potentially disturb
+   network daemons, that didn't need to even know that the pci
+   card was being rebooted.
+
+-  A more serious concern is that the same reset, for SCSI devices,
+   causes havoc to mounted file systems.  Scripts cannot post-facto
+   unmount a file system without flushing pending buffers, but this
+   is impossible, because I/O has already been stopped.  Thus,
+   ideally, the reset should happen at or below the block layer,
+   so that the file systems are not disturbed.
+
+   Reiserfs does not tolerate errors returned from the block device.
+   Ext3fs seems to be tolerant, retrying reads/writes until it does
+   succeed. Both have been only lightly tested in this scenario.
+
+   The SCSI-generic subsystem already has built-in code for performing
+   SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
+   (HBA) resets.  These are cascaded into a chain of attempted
+   resets if a SCSI command fails. These are completely hidden
+   from the block layer.  It would be very natural to add an EEH
+   reset into this chain of events.
+
+-  If a SCSI error occurs for the root device, all is lost unless
+   the sysadmin had the foresight to run /bin, /sbin, /etc, /var
+   and so on, out of ramdisk/tmpfs.
+
+
+Conclusions
+-----------
+There's forward progress ...
diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.txt
deleted file mode 100644
index 678189280bb4..000000000000
--- a/Documentation/powerpc/eeh-pci-error-recovery.txt
+++ /dev/null
@@ -1,334 +0,0 @@
-
-
-                      PCI Bus EEH Error Recovery
-                      --------------------------
-                           Linas Vepstas
-                       <linas@austin.ibm.com>
-                          12 January 2005
-
-
-Overview:
----------
-The IBM POWER-based pSeries and iSeries computers include PCI bus
-controller chips that have extended capabilities for detecting and
-reporting a large variety of PCI bus error conditions.  These features
-go under the name of "EEH", for "Enhanced Error Handling".  The EEH
-hardware features allow PCI bus errors to be cleared and a PCI
-card to be "rebooted", without also having to reboot the operating
-system.
-
-This is in contrast to traditional PCI error handling, where the
-PCI chip is wired directly to the CPU, and an error would cause
-a CPU machine-check/check-stop condition, halting the CPU entirely.
-Another "traditional" technique is to ignore such errors, which
-can lead to data corruption, both of user data or of kernel data,
-hung/unresponsive adapters, or system crashes/lockups.  Thus,
-the idea behind EEH is that the operating system can become more
-reliable and robust by protecting it from PCI errors, and giving
-the OS the ability to "reboot"/recover individual PCI devices.
-
-Future systems from other vendors, based on the PCI-E specification,
-may contain similar features.
-
-
-Causes of EEH Errors
---------------------
-EEH was originally designed to guard against hardware failure, such
-as PCI cards dying from heat, humidity, dust, vibration and bad
-electrical connections. The vast majority of EEH errors seen in
-"real life" are due to either poorly seated PCI cards, or,
-unfortunately quite commonly, due to device driver bugs, device firmware
-bugs, and sometimes PCI card hardware bugs.
-
-The most common software bug, is one that causes the device to
-attempt to DMA to a location in system memory that has not been
-reserved for DMA access for that card.  This is a powerful feature,
-as it prevents what; otherwise, would have been silent memory
-corruption caused by the bad DMA.  A number of device driver
-bugs have been found and fixed in this way over the past few
-years.  Other possible causes of EEH errors include data or
-address line parity errors (for example, due to poor electrical
-connectivity due to a poorly seated card), and PCI-X split-completion
-errors (due to software, device firmware, or device PCI hardware bugs).
-The vast majority of "true hardware failures" can be cured by
-physically removing and re-seating the PCI card.
-
-
-Detection and Recovery
-----------------------
-In the following discussion, a generic overview of how to detect
-and recover from EEH errors will be presented. This is followed
-by an overview of how the current implementation in the Linux
-kernel does it.  The actual implementation is subject to change,
-and some of the finer points are still being debated.  These
-may in turn be swayed if or when other architectures implement
-similar functionality.
-
-When a PCI Host Bridge (PHB, the bus controller connecting the
-PCI bus to the system CPU electronics complex) detects a PCI error
-condition, it will "isolate" the affected PCI card.  Isolation
-will block all writes (either to the card from the system, or
-from the card to the system), and it will cause all reads to
-return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
-This value was chosen because it is the same value you would
-get if the device was physically unplugged from the slot.
-This includes access to PCI memory, I/O space, and PCI config
-space.  Interrupts; however, will continued to be delivered.
-
-Detection and recovery are performed with the aid of ppc64
-firmware.  The programming interfaces in the Linux kernel
-into the firmware are referred to as RTAS (Run-Time Abstraction
-Services).  The Linux kernel does not (should not) access
-the EEH function in the PCI chipsets directly, primarily because
-there are a number of different chipsets out there, each with
-different interfaces and quirks. The firmware provides a
-uniform abstraction layer that will work with all pSeries
-and iSeries hardware (and be forwards-compatible).
-
-If the OS or device driver suspects that a PCI slot has been
-EEH-isolated, there is a firmware call it can make to determine if
-this is the case. If so, then the device driver should put itself
-into a consistent state (given that it won't be able to complete any
-pending work) and start recovery of the card.  Recovery normally
-would consist of resetting the PCI device (holding the PCI #RST
-line high for two seconds), followed by setting up the device
-config space (the base address registers (BAR's), latency timer,
-cache line size, interrupt line, and so on).  This is followed by a
-reinitialization of the device driver.  In a worst-case scenario,
-the power to the card can be toggled, at least on hot-plug-capable
-slots.  In principle, layers far above the device driver probably
-do not need to know that the PCI card has been "rebooted" in this
-way; ideally, there should be at most a pause in Ethernet/disk/USB
-I/O while the card is being reset.
-
-If the card cannot be recovered after three or four resets, the
-kernel/device driver should assume the worst-case scenario, that the
-card has died completely, and report this error to the sysadmin.
-In addition, error messages are reported through RTAS and also through
-syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
-The correct way to deal with failed adapters is to use the standard
-PCI hotplug tools to remove and replace the dead card.
-
-
-Current PPC64 Linux EEH Implementation
---------------------------------------
-At this time, a generic EEH recovery mechanism has been implemented,
-so that individual device drivers do not need to be modified to support
-EEH recovery.  This generic mechanism piggy-backs on the PCI hotplug
-infrastructure,  and percolates events up through the userspace/udev
-infrastructure.  Following is a detailed description of how this is
-accomplished.
-
-EEH must be enabled in the PHB's very early during the boot process,
-and if a PCI slot is hot-plugged. The former is performed by
-eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
-drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
-EEH must be enabled before a PCI scan of the device can proceed.
-Current Power5 hardware will not work unless EEH is enabled;
-although older Power4 can run with it disabled.  Effectively,
-EEH can no longer be turned off.  PCI devices *must* be
-registered with the EEH code; the EEH code needs to know about
-the I/O address ranges of the PCI device in order to detect an
-error.  Given an arbitrary address, the routine
-pci_get_device_by_addr() will find the pci device associated
-with that address (if any).
-
-The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
-etc. include a check to see if the i/o read returned all-0xff's.
-If so, these make a call to eeh_dn_check_failure(), which in turn
-asks the firmware if the all-ff's value is the sign of a true EEH
-error.  If it is not, processing continues as normal.  The grand
-total number of these false alarms or "false positives" can be
-seen in /proc/ppc64/eeh (subject to change).  Normally, almost
-all of these occur during boot, when the PCI bus is scanned, where
-a large number of 0xff reads are part of the bus scan procedure.
-
-If a frozen slot is detected, code in 
-arch/powerpc/platforms/pseries/eeh.c will print a stack trace to 
-syslog (/var/log/messages).  This stack trace has proven to be very 
-useful to device-driver authors for finding out at what point the EEH 
-error was detected, as the error itself usually occurs slightly 
-beforehand.
-
-Next, it uses the Linux kernel notifier chain/work queue mechanism to
-allow any interested parties to find out about the failure.  Device
-drivers, or other parts of the kernel, can use
-eeh_register_notifier(struct notifier_block *) to find out about EEH
-events.  The event will include a pointer to the pci device, the
-device node and some state info.  Receivers of the event can "do as
-they wish"; the default handler will be described further in this
-section.
-
-To assist in the recovery of the device, eeh.c exports the
-following functions:
-
-rtas_set_slot_reset() -- assert the  PCI #RST line for 1/8th of a second
-rtas_configure_bridge() -- ask firmware to configure any PCI bridges
-   located topologically under the pci slot.
-eeh_save_bars() and eeh_restore_bars(): save and restore the PCI
-   config-space info for a device and any devices under it.
-
-
-A handler for the EEH notifier_block events is implemented in
-drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
-It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
-This last call causes the device driver for the card to be stopped,
-which causes uevents to go out to user space. This triggers
-user-space scripts that might issue commands such as "ifdown eth0"
-for ethernet cards, and so on.  This handler then sleeps for 5 seconds,
-hoping to give the user-space scripts enough time to complete.
-It then resets the PCI card, reconfigures the device BAR's, and
-any bridges underneath. It then calls rpaphp_enable_pci_slot(),
-which restarts the device driver and triggers more user-space
-events (for example, calling "ifup eth0" for ethernet cards).
-
-
-Device Shutdown and User-Space Events
--------------------------------------
-This section documents what happens when a pci slot is unconfigured,
-focusing on how the device driver gets shut down, and on how the
-events get delivered to user-space scripts.
-
-Following is an example sequence of events that cause a device driver
-close function to be called during the first phase of an EEH reset.
-The following sequence is an example of the pcnet32 device driver.
-
-    rpa_php_unconfig_pci_adapter (struct slot *)  // in rpaphp_pci.c
-    {
-      calls
-      pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
-      {
-        calls
-        pci_destroy_dev (struct pci_dev *)
-        {
-          calls
-          device_unregister (&dev->dev) // in /drivers/base/core.c
-          {
-            calls
-            device_del (struct device *)
-            {
-              calls
-              bus_remove_device() // in /drivers/base/bus.c
-              {
-                calls
-                device_release_driver()
-                {
-                  calls
-                  struct device_driver->remove() which is just
-                  pci_device_remove()  // in /drivers/pci/pci_driver.c
-                  {
-                    calls
-                    struct pci_driver->remove() which is just
-                    pcnet32_remove_one() // in /drivers/net/pcnet32.c
-                    {
-                      calls
-                      unregister_netdev() // in /net/core/dev.c
-                      {
-                        calls
-                        dev_close()  // in /net/core/dev.c
-                        {
-                           calls dev->stop();
-                           which is just pcnet32_close() // in pcnet32.c
-                           {
-                             which does what you wanted
-                             to stop the device
-                           }
-                        }
-                     }
-                   which
-                   frees pcnet32 device driver memory
-                }
-     }}}}}}
-
-
-    in drivers/pci/pci_driver.c,
-    struct device_driver->remove() is just pci_device_remove()
-    which calls struct pci_driver->remove() which is pcnet32_remove_one()
-    which calls unregister_netdev()  (in net/core/dev.c)
-    which calls dev_close()  (in net/core/dev.c)
-    which calls dev->stop() which is pcnet32_close()
-    which then does the appropriate shutdown.
-
----
-Following is the analogous stack trace for events sent to user-space
-when the pci device is unconfigured.
-
-rpa_php_unconfig_pci_adapter() {             // in rpaphp_pci.c
-  calls
-  pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
-    calls
-    pci_destroy_dev (struct pci_dev *) {
-      calls
-      device_unregister (&dev->dev) {        // in /drivers/base/core.c
-        calls
-        device_del(struct device * dev) {    // in /drivers/base/core.c
-          calls
-          kobject_del() {                    //in /libs/kobject.c
-            calls
-            kobject_uevent() {               // in /libs/kobject.c
-              calls
-              kset_uevent() {                // in /lib/kobject.c
-                calls
-                kset->uevent_ops->uevent()   // which is really just
-                a call to
-                dev_uevent() {               // in /drivers/base/core.c
-                  calls
-                  dev->bus->uevent() which is really just a call to
-                  pci_uevent () {            // in drivers/pci/hotplug.c
-                    which prints device name, etc....
-                 }
-               }
-               then kobject_uevent() sends a netlink uevent to userspace
-               --> userspace uevent
-               (during early boot, nobody listens to netlink events and
-               kobject_uevent() executes uevent_helper[], which runs the
-               event process /sbin/hotplug)
-           }
-         }
-         kobject_del() then calls sysfs_remove_dir(), which would
-         trigger any user-space daemon that was watching /sysfs,
-         and notice the delete event.
-
-
-Pro's and Con's of the Current Design
--------------------------------------
-There are several issues with the current EEH software recovery design,
-which may be addressed in future revisions.  But first, note that the
-big plus of the current design is that no changes need to be made to
-individual device drivers, so that the current design throws a wide net.
-The biggest negative of the design is that it potentially disturbs
-network daemons and file systems that didn't need to be disturbed.
-
--- A minor complaint is that resetting the network card causes
-   user-space back-to-back ifdown/ifup burps that potentially disturb
-   network daemons, that didn't need to even know that the pci
-   card was being rebooted.
-
--- A more serious concern is that the same reset, for SCSI devices,
-   causes havoc to mounted file systems.  Scripts cannot post-facto
-   unmount a file system without flushing pending buffers, but this
-   is impossible, because I/O has already been stopped.  Thus,
-   ideally, the reset should happen at or below the block layer,
-   so that the file systems are not disturbed.
-
-   Reiserfs does not tolerate errors returned from the block device.
-   Ext3fs seems to be tolerant, retrying reads/writes until it does
-   succeed. Both have been only lightly tested in this scenario.
-
-   The SCSI-generic subsystem already has built-in code for performing
-   SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
-   (HBA) resets.  These are cascaded into a chain of attempted
-   resets if a SCSI command fails. These are completely hidden
-   from the block layer.  It would be very natural to add an EEH
-   reset into this chain of events.
-
--- If a SCSI error occurs for the root device, all is lost unless
-   the sysadmin had the foresight to run /bin, /sbin, /etc, /var
-   and so on, out of ramdisk/tmpfs.
-
-
-Conclusions
------------
-There's forward progress ...
-
-
diff --git a/Documentation/powerpc/firmware-assisted-dump.rst b/Documentation/powerpc/firmware-assisted-dump.rst
new file mode 100644
index 000000000000..9ca12830a48e
--- /dev/null
+++ b/Documentation/powerpc/firmware-assisted-dump.rst
@@ -0,0 +1,301 @@
+======================
+Firmware-Assisted Dump
+======================
+
+July 2011
+
+The goal of firmware-assisted dump is to enable the dump of
+a crashed system, and to do so from a fully-reset system, and
+to minimize the total elapsed time until the system is back
+in production use.
+
+- Firmware assisted dump (fadump) infrastructure is intended to replace
+  the existing phyp assisted dump.
+- Fadump uses the same firmware interfaces and memory reservation model
+  as phyp assisted dump.
+- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
+  in the ELF format in the same way as kdump. This helps us reuse the
+  kdump infrastructure for dump capture and filtering.
+- Unlike phyp dump, userspace tool does not need to refer any sysfs
+  interface while reading /proc/vmcore.
+- Unlike phyp dump, fadump allows user to release all the memory reserved
+  for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
+- Once enabled through kernel boot parameter, fadump can be
+  started/stopped through /sys/kernel/fadump_registered interface (see
+  sysfs files section below) and can be easily integrated with kdump
+  service start/stop init scripts.
+
+Comparing with kdump or other strategies, firmware-assisted
+dump offers several strong, practical advantages:
+
+-  Unlike kdump, the system has been reset, and loaded
+   with a fresh copy of the kernel.  In particular,
+   PCI and I/O devices have been reinitialized and are
+   in a clean, consistent state.
+-  Once the dump is copied out, the memory that held the dump
+   is immediately available to the running kernel. And therefore,
+   unlike kdump, fadump doesn't need a 2nd reboot to get back
+   the system to the production configuration.
+
+The above can only be accomplished by coordination with,
+and assistance from the Power firmware. The procedure is
+as follows:
+
+-  The first kernel registers the sections of memory with the
+   Power firmware for dump preservation during OS initialization.
+   These registered sections of memory are reserved by the first
+   kernel during early boot.
+
+-  When a system crashes, the Power firmware will save
+   the low memory (boot memory of size larger of 5% of system RAM
+   or 256MB) of RAM to the previous registered region. It will
+   also save system registers, and hardware PTE's.
+
+   NOTE:
+         The term 'boot memory' means size of the low memory chunk
+         that is required for a kernel to boot successfully when
+         booted with restricted memory. By default, the boot memory
+         size will be the larger of 5% of system RAM or 256MB.
+         Alternatively, user can also specify boot memory size
+         through boot parameter 'crashkernel=' which will override
+         the default calculated size. Use this option if default
+         boot memory size is not sufficient for second kernel to
+         boot successfully. For syntax of crashkernel= parameter,
+         refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
+         provided in crashkernel= parameter, it will be ignored
+         as fadump uses a predefined offset to reserve memory
+         for boot memory dump preservation in case of a crash.
+
+-  After the low memory (boot memory) area has been saved, the
+   firmware will reset PCI and other hardware state.  It will
+   *not* clear the RAM. It will then launch the bootloader, as
+   normal.
+
+-  The freshly booted kernel will notice that there is a new
+   node (ibm,dump-kernel) in the device tree, indicating that
+   there is crash data available from a previous boot. During
+   the early boot OS will reserve rest of the memory above
+   boot memory size effectively booting with restricted memory
+   size. This will make sure that the second kernel will not
+   touch any of the dump memory area.
+
+-  User-space tools will read /proc/vmcore to obtain the contents
+   of memory, which holds the previous crashed kernel dump in ELF
+   format. The userspace tools may copy this info to disk, or
+   network, nas, san, iscsi, etc. as desired.
+
+-  Once the userspace tool is done saving dump, it will echo
+   '1' to /sys/kernel/fadump_release_mem to release the reserved
+   memory back to general use, except the memory required for
+   next firmware-assisted dump registration.
+
+   e.g.::
+
+     # echo 1 > /sys/kernel/fadump_release_mem
+
+Please note that the firmware-assisted dump feature
+is only available on Power6 and above systems with recent
+firmware versions.
+
+Implementation details:
+-----------------------
+
+During boot, a check is made to see if firmware supports
+this feature on that particular machine. If it does, then
+we check to see if an active dump is waiting for us. If yes
+then everything but boot memory size of RAM is reserved during
+early boot (See Fig. 2). This area is released once we finish
+collecting the dump from user land scripts (e.g. kdump scripts)
+that are run. If there is dump data, then the
+/sys/kernel/fadump_release_mem file is created, and the reserved
+memory is held.
+
+If there is no waiting dump data, then only the memory required
+to hold CPU state, HPTE region, boot memory dump and elfcore
+header, is usually reserved at an offset greater than boot memory
+size (see Fig. 1). This area is *not* released: this region will
+be kept permanently reserved, so that it can act as a receptacle
+for a copy of the boot memory content in addition to CPU state
+and HPTE region, in the case a crash does occur. Since this reserved
+memory area is used only after the system crash, there is no point in
+blocking this significant chunk of memory from production kernel.
+Hence, the implementation uses the Linux kernel's Contiguous Memory
+Allocator (CMA) for memory reservation if CMA is configured for kernel.
+With CMA reservation this memory will be available for applications to
+use it, while kernel is prevented from using it. With this fadump will
+still be able to capture all of the kernel memory and most of the user
+space memory except the user pages that were present in CMA region::
+
+  o Memory Reservation during first kernel
+
+  Low memory                                         Top of memory
+  0      boot memory size                                       |
+  |           |                |<--Reserved dump area -->|      |
+  V           V                |   Permanent Reservation |      V
+  +-----------+----------/ /---+---+----+-----------+----+------+
+  |           |                |CPU|HPTE|  DUMP     |ELF |      |
+  +-----------+----------/ /---+---+----+-----------+----+------+
+        |                                           ^
+        |                                           |
+        \                                           /
+         -------------------------------------------
+          Boot memory content gets transferred to
+          reserved area by firmware at the time of
+          crash
+                   Fig. 1
+
+  o Memory Reservation during second kernel after crash
+
+  Low memory                                        Top of memory
+  0      boot memory size                                       |
+  |           |<------------- Reserved dump area ----------- -->|
+  V           V                                                 V
+  +-----------+----------/ /---+---+----+-----------+----+------+
+  |           |                |CPU|HPTE|  DUMP     |ELF |      |
+  +-----------+----------/ /---+---+----+-----------+----+------+
+        |                                              |
+        V                                              V
+   Used by second                                /proc/vmcore
+   kernel to boot
+                   Fig. 2
+
+Currently the dump will be copied from /proc/vmcore to a
+a new file upon user intervention. The dump data available through
+/proc/vmcore will be in ELF format. Hence the existing kdump
+infrastructure (kdump scripts) to save the dump works fine with
+minor modifications.
+
+The tools to examine the dump will be same as the ones
+used for kdump.
+
+How to enable firmware-assisted dump (fadump):
+----------------------------------------------
+
+1. Set config option CONFIG_FA_DUMP=y and build kernel.
+2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+   By default, fadump reserved memory will be initialized as CMA area.
+   Alternatively, user can boot linux kernel with 'fadump=nocma' to
+   prevent fadump to use CMA.
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
+   to specify size of the memory to reserve for boot memory dump
+   preservation.
+
+NOTE:
+     1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+        use 'crashkernel=' to specify size of the memory to reserve
+        for boot memory dump preservation.
+     2. If firmware-assisted dump fails to reserve memory then it
+        will fallback to existing kdump mechanism if 'crashkernel='
+        option is set at kernel cmdline.
+     3. if user wants to capture all of user space memory and ok with
+        reserved memory not available to production system, then
+        'fadump=nocma' kernel parameter can be used to fallback to
+        old behaviour.
+
+Sysfs/debugfs files:
+--------------------
+
+Firmware-assisted dump feature uses sysfs file system to hold
+the control files and debugfs file to display memory reserved region.
+
+Here is the list of files under kernel sysfs:
+
+ /sys/kernel/fadump_enabled
+    This is used to display the fadump status.
+
+    - 0 = fadump is disabled
+    - 1 = fadump is enabled
+
+    This interface can be used by kdump init scripts to identify if
+    fadump is enabled in the kernel and act accordingly.
+
+ /sys/kernel/fadump_registered
+    This is used to display the fadump registration status as well
+    as to control (start/stop) the fadump registration.
+
+    - 0 = fadump is not registered.
+    - 1 = fadump is registered and ready to handle system crash.
+
+    To register fadump echo 1 > /sys/kernel/fadump_registered and
+    echo 0 > /sys/kernel/fadump_registered for un-register and stop the
+    fadump. Once the fadump is un-registered, the system crash will not
+    be handled and vmcore will not be captured. This interface can be
+    easily integrated with kdump service start/stop.
+
+ /sys/kernel/fadump_release_mem
+    This file is available only when fadump is active during
+    second kernel. This is used to release the reserved memory
+    region that are held for saving crash dump. To release the
+    reserved memory echo 1 to it::
+
+	echo 1  > /sys/kernel/fadump_release_mem
+
+    After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
+    file will change to reflect the new memory reservations.
+
+    The existing userspace tools (kdump infrastructure) can be easily
+    enhanced to use this interface to release the memory reserved for
+    dump and continue without 2nd reboot.
+
+Here is the list of files under powerpc debugfs:
+(Assuming debugfs is mounted on /sys/kernel/debug directory.)
+
+ /sys/kernel/debug/powerpc/fadump_region
+    This file shows the reserved memory regions if fadump is
+    enabled otherwise this file is empty. The output format
+    is::
+
+      <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+
+    e.g.
+    Contents when fadump is registered during first kernel::
+
+      # cat /sys/kernel/debug/powerpc/fadump_region
+      CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+      HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+      DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+
+    Contents when fadump is active during second kernel::
+
+      # cat /sys/kernel/debug/powerpc/fadump_region
+      CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+      HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+      DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+          : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+
+NOTE:
+      Please refer to Documentation/filesystems/debugfs.txt on
+      how to mount the debugfs filesystem.
+
+
+TODO:
+-----
+ - Need to come up with the better approach to find out more
+   accurate boot memory size that is required for a kernel to
+   boot successfully when booted with restricted memory.
+ - The fadump implementation introduces a fadump crash info structure
+   in the scratch area before the ELF core header. The idea of introducing
+   this structure is to pass some important crash info data to the second
+   kernel which will help second kernel to populate ELF core header with
+   correct data before it gets exported through /proc/vmcore. The current
+   design implementation does not address a possibility of introducing
+   additional fields (in future) to this structure without affecting
+   compatibility. Need to come up with the better approach to address this.
+
+   The possible approaches are:
+
+	1. Introduce version field for version tracking, bump up the version
+	whenever a new field is added to the structure in future. The version
+	field can be used to find out what fields are valid for the current
+	version of the structure.
+	2. Reserve the area of predefined size (say PAGE_SIZE) for this
+	structure and have unused area as reserved (initialized to zero)
+	for future field additions.
+
+   The advantage of approach 1 over 2 is we don't need to reserve extra space.
+
+Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+
+This document is based on the original documentation written for phyp
+
+assisted dump by Linas Vepstas and Manish Ahuja.
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
deleted file mode 100644
index 10e7f4d16c14..000000000000
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ /dev/null
@@ -1,292 +0,0 @@
-
-                   Firmware-Assisted Dump
-                   ------------------------
-                       July 2011
-
-The goal of firmware-assisted dump is to enable the dump of
-a crashed system, and to do so from a fully-reset system, and
-to minimize the total elapsed time until the system is back
-in production use.
-
-- Firmware assisted dump (fadump) infrastructure is intended to replace
-  the existing phyp assisted dump.
-- Fadump uses the same firmware interfaces and memory reservation model
-  as phyp assisted dump.
-- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
-  in the ELF format in the same way as kdump. This helps us reuse the
-  kdump infrastructure for dump capture and filtering.
-- Unlike phyp dump, userspace tool does not need to refer any sysfs
-  interface while reading /proc/vmcore.
-- Unlike phyp dump, fadump allows user to release all the memory reserved
-  for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
-- Once enabled through kernel boot parameter, fadump can be
-  started/stopped through /sys/kernel/fadump_registered interface (see
-  sysfs files section below) and can be easily integrated with kdump
-  service start/stop init scripts.
-
-Comparing with kdump or other strategies, firmware-assisted
-dump offers several strong, practical advantages:
-
--- Unlike kdump, the system has been reset, and loaded
-   with a fresh copy of the kernel.  In particular,
-   PCI and I/O devices have been reinitialized and are
-   in a clean, consistent state.
--- Once the dump is copied out, the memory that held the dump
-   is immediately available to the running kernel. And therefore,
-   unlike kdump, fadump doesn't need a 2nd reboot to get back
-   the system to the production configuration.
-
-The above can only be accomplished by coordination with,
-and assistance from the Power firmware. The procedure is
-as follows:
-
--- The first kernel registers the sections of memory with the
-   Power firmware for dump preservation during OS initialization.
-   These registered sections of memory are reserved by the first
-   kernel during early boot.
-
--- When a system crashes, the Power firmware will save
-   the low memory (boot memory of size larger of 5% of system RAM
-   or 256MB) of RAM to the previous registered region. It will
-   also save system registers, and hardware PTE's.
-
-   NOTE: The term 'boot memory' means size of the low memory chunk
-         that is required for a kernel to boot successfully when
-         booted with restricted memory. By default, the boot memory
-         size will be the larger of 5% of system RAM or 256MB.
-         Alternatively, user can also specify boot memory size
-         through boot parameter 'crashkernel=' which will override
-         the default calculated size. Use this option if default
-         boot memory size is not sufficient for second kernel to
-         boot successfully. For syntax of crashkernel= parameter,
-         refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
-         provided in crashkernel= parameter, it will be ignored
-         as fadump uses a predefined offset to reserve memory
-         for boot memory dump preservation in case of a crash.
-
--- After the low memory (boot memory) area has been saved, the
-   firmware will reset PCI and other hardware state.  It will
-   *not* clear the RAM. It will then launch the bootloader, as
-   normal.
-
--- The freshly booted kernel will notice that there is a new
-   node (ibm,dump-kernel) in the device tree, indicating that
-   there is crash data available from a previous boot. During
-   the early boot OS will reserve rest of the memory above
-   boot memory size effectively booting with restricted memory
-   size. This will make sure that the second kernel will not
-   touch any of the dump memory area.
-
--- User-space tools will read /proc/vmcore to obtain the contents
-   of memory, which holds the previous crashed kernel dump in ELF
-   format. The userspace tools may copy this info to disk, or
-   network, nas, san, iscsi, etc. as desired.
-
--- Once the userspace tool is done saving dump, it will echo
-   '1' to /sys/kernel/fadump_release_mem to release the reserved
-   memory back to general use, except the memory required for
-   next firmware-assisted dump registration.
-
-   e.g.
-     # echo 1 > /sys/kernel/fadump_release_mem
-
-Please note that the firmware-assisted dump feature
-is only available on Power6 and above systems with recent
-firmware versions.
-
-Implementation details:
-----------------------
-
-During boot, a check is made to see if firmware supports
-this feature on that particular machine. If it does, then
-we check to see if an active dump is waiting for us. If yes
-then everything but boot memory size of RAM is reserved during
-early boot (See Fig. 2). This area is released once we finish
-collecting the dump from user land scripts (e.g. kdump scripts)
-that are run. If there is dump data, then the
-/sys/kernel/fadump_release_mem file is created, and the reserved
-memory is held.
-
-If there is no waiting dump data, then only the memory required
-to hold CPU state, HPTE region, boot memory dump and elfcore
-header, is usually reserved at an offset greater than boot memory
-size (see Fig. 1). This area is *not* released: this region will
-be kept permanently reserved, so that it can act as a receptacle
-for a copy of the boot memory content in addition to CPU state
-and HPTE region, in the case a crash does occur. Since this reserved
-memory area is used only after the system crash, there is no point in
-blocking this significant chunk of memory from production kernel.
-Hence, the implementation uses the Linux kernel's Contiguous Memory
-Allocator (CMA) for memory reservation if CMA is configured for kernel.
-With CMA reservation this memory will be available for applications to
-use it, while kernel is prevented from using it. With this fadump will
-still be able to capture all of the kernel memory and most of the user
-space memory except the user pages that were present in CMA region.
-
-  o Memory Reservation during first kernel
-
-  Low memory                                         Top of memory
-  0      boot memory size                                       |
-  |           |                |<--Reserved dump area -->|      |
-  V           V                |   Permanent Reservation |      V
-  +-----------+----------/ /---+---+----+-----------+----+------+
-  |           |                |CPU|HPTE|  DUMP     |ELF |      |
-  +-----------+----------/ /---+---+----+-----------+----+------+
-        |                                           ^
-        |                                           |
-        \                                           /
-         -------------------------------------------
-          Boot memory content gets transferred to
-          reserved area by firmware at the time of
-          crash
-                   Fig. 1
-
-  o Memory Reservation during second kernel after crash
-
-  Low memory                                        Top of memory
-  0      boot memory size                                       |
-  |           |<------------- Reserved dump area ----------- -->|
-  V           V                                                 V
-  +-----------+----------/ /---+---+----+-----------+----+------+
-  |           |                |CPU|HPTE|  DUMP     |ELF |      |
-  +-----------+----------/ /---+---+----+-----------+----+------+
-        |                                              |
-        V                                              V
-   Used by second                                /proc/vmcore
-   kernel to boot
-                   Fig. 2
-
-Currently the dump will be copied from /proc/vmcore to a
-a new file upon user intervention. The dump data available through
-/proc/vmcore will be in ELF format. Hence the existing kdump
-infrastructure (kdump scripts) to save the dump works fine with
-minor modifications.
-
-The tools to examine the dump will be same as the ones
-used for kdump.
-
-How to enable firmware-assisted dump (fadump):
--------------------------------------
-
-1. Set config option CONFIG_FA_DUMP=y and build kernel.
-2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
-   By default, fadump reserved memory will be initialized as CMA area.
-   Alternatively, user can boot linux kernel with 'fadump=nocma' to
-   prevent fadump to use CMA.
-3. Optionally, user can also set 'crashkernel=' kernel cmdline
-   to specify size of the memory to reserve for boot memory dump
-   preservation.
-
-NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
-         use 'crashkernel=' to specify size of the memory to reserve
-         for boot memory dump preservation.
-      2. If firmware-assisted dump fails to reserve memory then it
-         will fallback to existing kdump mechanism if 'crashkernel='
-         option is set at kernel cmdline.
-      3. if user wants to capture all of user space memory and ok with
-         reserved memory not available to production system, then
-         'fadump=nocma' kernel parameter can be used to fallback to
-         old behaviour.
-
-Sysfs/debugfs files:
-------------
-
-Firmware-assisted dump feature uses sysfs file system to hold
-the control files and debugfs file to display memory reserved region.
-
-Here is the list of files under kernel sysfs:
-
- /sys/kernel/fadump_enabled
-
-    This is used to display the fadump status.
-    0 = fadump is disabled
-    1 = fadump is enabled
-
-    This interface can be used by kdump init scripts to identify if
-    fadump is enabled in the kernel and act accordingly.
-
- /sys/kernel/fadump_registered
-
-    This is used to display the fadump registration status as well
-    as to control (start/stop) the fadump registration.
-    0 = fadump is not registered.
-    1 = fadump is registered and ready to handle system crash.
-
-    To register fadump echo 1 > /sys/kernel/fadump_registered and
-    echo 0 > /sys/kernel/fadump_registered for un-register and stop the
-    fadump. Once the fadump is un-registered, the system crash will not
-    be handled and vmcore will not be captured. This interface can be
-    easily integrated with kdump service start/stop.
-
- /sys/kernel/fadump_release_mem
-
-    This file is available only when fadump is active during
-    second kernel. This is used to release the reserved memory
-    region that are held for saving crash dump. To release the
-    reserved memory echo 1 to it:
-
-    echo 1  > /sys/kernel/fadump_release_mem
-
-    After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
-    file will change to reflect the new memory reservations.
-
-    The existing userspace tools (kdump infrastructure) can be easily
-    enhanced to use this interface to release the memory reserved for
-    dump and continue without 2nd reboot.
-
-Here is the list of files under powerpc debugfs:
-(Assuming debugfs is mounted on /sys/kernel/debug directory.)
-
- /sys/kernel/debug/powerpc/fadump_region
-
-    This file shows the reserved memory regions if fadump is
-    enabled otherwise this file is empty. The output format
-    is:
-    <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
-
-    e.g.
-    Contents when fadump is registered during first kernel
-
-    # cat /sys/kernel/debug/powerpc/fadump_region
-    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
-    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
-    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
-
-    Contents when fadump is active during second kernel
-
-    # cat /sys/kernel/debug/powerpc/fadump_region
-    CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
-    HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
-    DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
-        : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
-
-NOTE: Please refer to Documentation/filesystems/debugfs.txt on
-      how to mount the debugfs filesystem.
-
-
-TODO:
------
- o Need to come up with the better approach to find out more
-   accurate boot memory size that is required for a kernel to
-   boot successfully when booted with restricted memory.
- o The fadump implementation introduces a fadump crash info structure
-   in the scratch area before the ELF core header. The idea of introducing
-   this structure is to pass some important crash info data to the second
-   kernel which will help second kernel to populate ELF core header with
-   correct data before it gets exported through /proc/vmcore. The current
-   design implementation does not address a possibility of introducing
-   additional fields (in future) to this structure without affecting
-   compatibility. Need to come up with the better approach to address this.
-   The possible approaches are:
-	1. Introduce version field for version tracking, bump up the version
-	whenever a new field is added to the structure in future. The version
-	field can be used to find out what fields are valid for the current
-	version of the structure.
-	2. Reserve the area of predefined size (say PAGE_SIZE) for this
-	structure and have unused area as reserved (initialized to zero)
-	for future field additions.
-   The advantage of approach 1 over 2 is we don't need to reserve extra space.
----
-Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
-This document is based on the original documentation written for phyp
-assisted dump by Linas Vepstas and Manish Ahuja.
diff --git a/Documentation/powerpc/hvcs.rst b/Documentation/powerpc/hvcs.rst
new file mode 100644
index 000000000000..6808acde672f
--- /dev/null
+++ b/Documentation/powerpc/hvcs.rst
@@ -0,0 +1,581 @@
+===============================================================
+HVCS IBM "Hypervisor Virtual Console Server" Installation Guide
+===============================================================
+
+for Linux Kernel 2.6.4+
+
+Copyright (C) 2004 IBM Corporation
+
+.. ===========================================================================
+.. NOTE:Eight space tabs are the optimum editor setting for reading this file.
+.. ===========================================================================
+
+
+Author(s): Ryan S. Arnold <rsa@us.ibm.com>
+
+Date Created: March, 02, 2004
+Last Changed: August, 24, 2004
+
+.. Table of contents:
+
+	1.  Driver Introduction:
+	2.  System Requirements
+	3.  Build Options:
+		3.1  Built-in:
+		3.2  Module:
+	4.  Installation:
+	5.  Connection:
+	6.  Disconnection:
+	7.  Configuration:
+	8.  Questions & Answers:
+	9.  Reporting Bugs:
+
+1. Driver Introduction:
+=======================
+
+This is the device driver for the IBM Hypervisor Virtual Console Server,
+"hvcs".  The IBM hvcs provides a tty driver interface to allow Linux user
+space applications access to the system consoles of logically partitioned
+operating systems (Linux and AIX) running on the same partitioned Power5
+ppc64 system.  Physical hardware consoles per partition are not practical
+on this hardware so system consoles are accessed by this driver using
+firmware interfaces to virtual terminal devices.
+
+2. System Requirements:
+=======================
+
+This device driver was written using 2.6.4 Linux kernel APIs and will only
+build and run on kernels of this version or later.
+
+This driver was written to operate solely on IBM Power5 ppc64 hardware
+though some care was taken to abstract the architecture dependent firmware
+calls from the driver code.
+
+Sysfs must be mounted on the system so that the user can determine which
+major and minor numbers are associated with each vty-server.  Directions
+for sysfs mounting are outside the scope of this document.
+
+3. Build Options:
+=================
+
+The hvcs driver registers itself as a tty driver.  The tty layer
+dynamically allocates a block of major and minor numbers in a quantity
+requested by the registering driver.  The hvcs driver asks the tty layer
+for 64 of these major/minor numbers by default to use for hvcs device node
+entries.
+
+If the default number of device entries is adequate then this driver can be
+built into the kernel.  If not, the default can be over-ridden by inserting
+the driver as a module with insmod parameters.
+
+3.1 Built-in:
+-------------
+
+The following menuconfig example demonstrates selecting to build this
+driver into the kernel::
+
+	Device Drivers  --->
+		Character devices  --->
+			<*> IBM Hypervisor Virtual Console Server Support
+
+Begin the kernel make process.
+
+3.2 Module:
+-----------
+
+The following menuconfig example demonstrates selecting to build this
+driver as a kernel module::
+
+	Device Drivers  --->
+		Character devices  --->
+			<M> IBM Hypervisor Virtual Console Server Support
+
+The make process will build the following kernel modules:
+
+	- hvcs.ko
+	- hvcserver.ko
+
+To insert the module with the default allocation execute the following
+commands in the order they appear::
+
+	insmod hvcserver.ko
+	insmod hvcs.ko
+
+The hvcserver module contains architecture specific firmware calls and must
+be inserted first, otherwise the hvcs module will not find some of the
+symbols it expects.
+
+To override the default use an insmod parameter as follows (requesting 4
+tty devices as an example)::
+
+	insmod hvcs.ko hvcs_parm_num_devs=4
+
+There is a maximum number of dev entries that can be specified on insmod.
+We think that 1024 is currently a decent maximum number of server adapters
+to allow.  This can always be changed by modifying the constant in the
+source file before building.
+
+NOTE: The length of time it takes to insmod the driver seems to be related
+to the number of tty interfaces the registering driver requests.
+
+In order to remove the driver module execute the following command::
+
+	rmmod hvcs.ko
+
+The recommended method for installing hvcs as a module is to use depmod to
+build a current modules.dep file in /lib/modules/`uname -r` and then
+execute::
+
+	modprobe hvcs hvcs_parm_num_devs=4
+
+The modules.dep file indicates that hvcserver.ko needs to be inserted
+before hvcs.ko and modprobe uses this file to smartly insert the modules in
+the proper order.
+
+The following modprobe command is used to remove hvcs and hvcserver in the
+proper order::
+
+	modprobe -r hvcs
+
+4. Installation:
+================
+
+The tty layer creates sysfs entries which contain the major and minor
+numbers allocated for the hvcs driver.  The following snippet of "tree"
+output of the sysfs directory shows where these numbers are presented::
+
+	sys/
+	|-- *other sysfs base dirs*
+	|
+	|-- class
+	|   |-- *other classes of devices*
+	|   |
+	|   `-- tty
+	|       |-- *other tty devices*
+	|       |
+	|       |-- hvcs0
+	|       |   `-- dev
+	|       |-- hvcs1
+	|       |   `-- dev
+	|       |-- hvcs2
+	|       |   `-- dev
+	|       |-- hvcs3
+	|       |   `-- dev
+	|       |
+	|       |-- *other tty devices*
+	|
+	|-- *other sysfs base dirs*
+
+For the above examples the following output is a result of cat'ing the
+"dev" entry in the hvcs directory::
+
+	Pow5:/sys/class/tty/hvcs0/ # cat dev
+	254:0
+
+	Pow5:/sys/class/tty/hvcs1/ # cat dev
+	254:1
+
+	Pow5:/sys/class/tty/hvcs2/ # cat dev
+	254:2
+
+	Pow5:/sys/class/tty/hvcs3/ # cat dev
+	254:3
+
+The output from reading the "dev" attribute is the char device major and
+minor numbers that the tty layer has allocated for this driver's use.  Most
+systems running hvcs will already have the device entries created or udev
+will do it automatically.
+
+Given the example output above, to manually create a /dev/hvcs* node entry
+mknod can be used as follows::
+
+	mknod /dev/hvcs0 c 254 0
+	mknod /dev/hvcs1 c 254 1
+	mknod /dev/hvcs2 c 254 2
+	mknod /dev/hvcs3 c 254 3
+
+Using mknod to manually create the device entries makes these device nodes
+persistent.  Once created they will exist prior to the driver insmod.
+
+Attempting to connect an application to /dev/hvcs* prior to insertion of
+the hvcs module will result in an error message similar to the following::
+
+	"/dev/hvcs*: No such device".
+
+NOTE: Just because there is a device node present doesn't mean that there
+is a vty-server device configured for that node.
+
+5. Connection
+=============
+
+Since this driver controls devices that provide a tty interface a user can
+interact with the device node entries using any standard tty-interactive
+method (e.g. "cat", "dd", "echo").  The intent of this driver however, is
+to provide real time console interaction with a Linux partition's console,
+which requires the use of applications that provide bi-directional,
+interactive I/O with a tty device.
+
+Applications (e.g. "minicom" and "screen") that act as terminal emulators
+or perform terminal type control sequence conversion on the data being
+passed through them are NOT acceptable for providing interactive console
+I/O.  These programs often emulate antiquated terminal types (vt100 and
+ANSI) and expect inbound data to take the form of one of these supported
+terminal types but they either do not convert, or do not _adequately_
+convert, outbound data into the terminal type of the terminal which invoked
+them (though screen makes an attempt and can apparently be configured with
+much termcap wrestling.)
+
+For this reason kermit and cu are two of the recommended applications for
+interacting with a Linux console via an hvcs device.  These programs simply
+act as a conduit for data transfer to and from the tty device.  They do not
+require inbound data to take the form of a particular terminal type, nor do
+they cook outbound data to a particular terminal type.
+
+In order to ensure proper functioning of console applications one must make
+sure that once connected to a /dev/hvcs console that the console's $TERM
+env variable is set to the exact terminal type of the terminal emulator
+used to launch the interactive I/O application.  If one is using xterm and
+kermit to connect to /dev/hvcs0 when the console prompt becomes available
+one should "export TERM=xterm" on the console.  This tells ncurses
+applications that are invoked from the console that they should output
+control sequences that xterm can understand.
+
+As a precautionary measure an hvcs user should always "exit" from their
+session before disconnecting an application such as kermit from the device
+node.  If this is not done, the next user to connect to the console will
+continue using the previous user's logged in session which includes
+using the $TERM variable that the previous user supplied.
+
+Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
+is used to connect to each vty-server adapter.  In order to determine which
+vty-server adapter is associated with which /dev/hvcs* node a special sysfs
+attribute has been added to each vty-server sysfs entry.  This entry is
+called "index" and showing it reveals an integer that refers to the
+/dev/hvcs* entry to use to connect to that device.  For instance cating the
+index attribute of vty-server adapter 30000004 shows the following::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
+	2
+
+This index of '2' means that in order to connect to vty-server adapter
+30000004 the user should interact with /dev/hvcs2.
+
+It should be noted that due to the system hotplug I/O capabilities of a
+system the /dev/hvcs* entry that interacts with a particular vty-server
+adapter is not guaranteed to remain the same across system reboots.  Look
+in the Q & A section for more on this issue.
+
+6. Disconnection
+================
+
+As a security feature to prevent the delivery of stale data to an
+unintended target the Power5 system firmware disables the fetching of data
+and discards that data when a connection between a vty-server and a vty has
+been severed.  As an example, when a vty-server is immediately disconnected
+from a vty following output of data to the vty the vty adapter may not have
+enough time between when it received the data interrupt and when the
+connection was severed to fetch the data from firmware before the fetch is
+disabled by firmware.
+
+When hvcs is being used to serve consoles this behavior is not a huge issue
+because the adapter stays connected for large amounts of time following
+almost all data writes.  When hvcs is being used as a tty conduit to tunnel
+data between two partitions [see Q & A below] this is a huge problem
+because the standard Linux behavior when cat'ing or dd'ing data to a device
+is to open the tty, send the data, and then close the tty.  If this driver
+manually terminated vty-server connections on tty close this would close
+the vty-server and vty connection before the target vty has had a chance to
+fetch the data.
+
+Additionally, disconnecting a vty-server and vty only on module removal or
+adapter removal is impractical because other vty-servers in other
+partitions may require the usage of the target vty at any time.
+
+Due to this behavioral restriction disconnection of vty-servers from the
+connected vty is a manual procedure using a write to a sysfs attribute
+outlined below, on the other hand the initial vty-server connection to a
+vty is established automatically by this driver.  Manual vty-server
+connection is never required.
+
+In order to terminate the connection between a vty-server and vty the
+"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
+Reading this attribute reveals the current connection state of the
+vty-server adapter.  A zero means that the vty-server is not connected to a
+vty.  A one indicates that a connection is active.
+
+Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
+connection between the vty-server and target vty ONLY if the vterm_state
+previously read '1'.  The write directive is ignored if the vterm_state
+read '0' or if any value other than '0' was written to the vterm_state
+attribute.  The following example will show the method used for verifying
+the vty-server connection status and disconnecting a vty-server connection::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+	1
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+	0
+
+All vty-server connections are automatically terminated when the device is
+hotplug removed and when the module is removed.
+
+7. Configuration
+================
+
+Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
+is symlinked in several other sysfs tree directories, notably under the
+hvcs driver entry, which looks like the following example::
+
+	Pow5:/sys/bus/vio/drivers/hvcs # ls
+	.  ..  30000003  30000004  rescan
+
+By design, firmware notifies the hvcs driver of vty-server lifetimes and
+partner vty removals but not the addition of partner vtys.  Since an HMC
+Super Admin can add partner info dynamically we have provided the hvcs
+driver sysfs directory with the "rescan" update attribute which will query
+firmware and update the partner info for all the vty-servers that this
+driver manages.  Writing a '1' to the attribute triggers the update.  An
+explicit example follows:
+
+	Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
+
+Reading the attribute will indicate a state of '1' or '0'.  A one indicates
+that an update is in process.  A zero indicates that an update has
+completed or was never executed.
+
+Vty-server entries in this directory are a 32 bit partition unique unit
+address that is created by firmware.  An example vty-server sysfs entry
+looks like the following::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
+	.   current_vty   devspec       name          partner_vtys
+	..  index         partner_clcs  vterm_state
+
+Each entry is provided, by default with a "name" attribute.  Reading the
+"name" attribute will reveal the device type as shown in the following
+example::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
+	vty-server
+
+Each entry is also provided, by default, with a "devspec" attribute which
+reveals the full device specification when read, as shown in the following
+example::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
+	/vdevice/vty-server@30000004
+
+Each vty-server sysfs dir is provided with two read-only attributes that
+provide lists of easily parsed partner vty data: "partner_vtys" and
+"partner_clcs"::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
+	30000000
+	30000001
+	30000002
+	30000000
+	30000000
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
+	U5112.428.103048A-V3-C0
+	U5112.428.103048A-V3-C2
+	U5112.428.103048A-V3-C3
+	U5112.428.103048A-V4-C0
+	U5112.428.103048A-V5-C0
+
+Reading partner_vtys returns a list of partner vtys.  Vty unit address
+numbering is only per-partition-unique so entries will frequently repeat.
+
+Reading partner_clcs returns a list of "converged location codes" which are
+composed of a system serial number followed by "-V*", where the '*' is the
+target partition number, and "-C*", where the '*' is the slot of the
+adapter.  The first vty partner corresponds to the first clc item, the
+second vty partner to the second clc item, etc.
+
+A vty-server can only be connected to a single vty at a time.  The entry,
+"current_vty" prints the clc of the currently selected partner vty when
+read.
+
+The current_vty can be changed by writing a valid partner clc to the entry
+as in the following example::
+
+	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
+	8A-V4-C0 > current_vty
+
+Changing the current_vty when a vty-server is already connected to a vty
+does not affect the current connection.  The change takes effect when the
+currently open connection is freed.
+
+Information on the "vterm_state" attribute was covered earlier on the
+chapter entitled "disconnection".
+
+8. Questions & Answers:
+=======================
+
+Q: What are the security concerns involving hvcs?
+
+A: There are three main security concerns:
+
+	1. The creator of the /dev/hvcs* nodes has the ability to restrict
+	the access of the device entries to certain users or groups.  It
+	may be best to create a special hvcs group privilege for providing
+	access to system consoles.
+
+	2. To provide network security when grabbing the console it is
+	suggested that the user connect to the console hosting partition
+	using a secure method, such as SSH or sit at a hardware console.
+
+	3. Make sure to exit the user session when done with a console or
+	the next vty-server connection (which may be from another
+	partition) will experience the previously logged in session.
+
+---------------------------------------------------------------------------
+
+Q: How do I multiplex a console that I grab through hvcs so that other
+people can see it:
+
+A: You can use "screen" to directly connect to the /dev/hvcs* device and
+setup a session on your machine with the console group privileges.  As
+pointed out earlier by default screen doesn't provide the termcap settings
+for most terminal emulators to provide adequate character conversion from
+term type "screen" to others.  This means that curses based programs may
+not display properly in screen sessions.
+
+---------------------------------------------------------------------------
+
+Q: Why are the colors all messed up?
+Q: Why are the control characters acting strange or not working?
+Q: Why is the console output all strange and unintelligible?
+
+A: Please see the preceding section on "Connection" for a discussion of how
+applications can affect the display of character control sequences.
+Additionally, just because you logged into the console using and xterm
+doesn't mean someone else didn't log into the console with the HMC console
+(vt320) before you and leave the session logged in.  The best thing to do
+is to export TERM to the terminal type of your terminal emulator when you
+get the console.  Additionally make sure to "exit" the console before you
+disconnect from the console.  This will ensure that the next user gets
+their own TERM type set when they login.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, can't open connection: /dev/hvcs*"What is happening?
+
+A: Some other Power5 console mechanism has a connection to the vty and
+isn't giving it up.  You can try to force disconnect the consoles from the
+HMC by right clicking on the partition and then selecting "close terminal".
+Otherwise you have to hunt down the people who have console authority.  It
+is possible that you already have the console open using another kermit
+session and just forgot about it.  Please review the console options for
+Power5 systems to determine the many ways a system console can be held.
+
+OR
+
+A: Another user may not have a connectivity method currently attached to a
+/dev/hvcs device but the vterm_state may reveal that they still have the
+vty-server connection established.  They need to free this using the method
+outlined in the section on "Disconnection" in order for others to connect
+to the target vty.
+
+OR
+
+A: The user profile you are using to execute kermit probably doesn't have
+permissions to use the /dev/hvcs* device.
+
+OR
+
+A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
+entry still exists (on systems without udev).
+
+OR
+
+A: There is not a corresponding vty-server device that maps to an existing
+/dev/hvcs* entry.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, write access to UUCP lockfile directory denied."
+
+A: The /dev/hvcs* entry you have specified doesn't exist where you said it
+does?  Maybe you haven't inserted the module (on systems with udev).
+
+---------------------------------------------------------------------------
+
+Q: If I already have one Linux partition installed can I use hvcs on said
+partition to provide the console for the install of a second Linux
+partition?
+
+A: Yes granted that your are connected to the /dev/hvcs* device using
+kermit or cu or some other program that doesn't provide terminal emulation.
+
+---------------------------------------------------------------------------
+
+Q: Can I connect to more than one partition's console at a time using this
+driver?
+
+A: Yes.  Of course this means that there must be more than one vty-server
+configured for this partition and each must point to a disconnected vty.
+
+---------------------------------------------------------------------------
+
+Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
+
+A: Yes, if you have dlpar and hotplug enabled for your system and it has
+been built into the kernel the hvcs drivers is configured to dynamically
+handle additions of new devices and removals of unused devices.
+
+---------------------------------------------------------------------------
+
+Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
+after a reboot.  What happened?
+
+A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
+in the order that the adapters are exposed.  Due to hotplug capabilities of
+this driver assignment of hotplug added vty-servers may be in a different
+order than how they would be exposed on module load.  Rebooting or
+reloading the module after dynamic addition may result in the /dev/hvcs*
+and vty-server coupling changing if a vty-server adapter was added in a
+slot between two other vty-server adapters.  Refer to the section above
+on how to determine which vty-server goes with which /dev/hvcs* node.
+Hint; look at the sysfs "index" attribute for the vty-server.
+
+---------------------------------------------------------------------------
+
+Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
+device on that partition as the other end of the pipe?
+
+A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
+for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
+In order to get a tty conduit working between the two partitions the HMC
+Super Admin must create an additional "serial server" for the target
+partition with the HMC gui which will show up as /dev/hvc* when the target
+partition is rebooted.
+
+The HMC Super Admin then creates an additional "serial client" for the
+current partition and points this at the target partition's newly created
+"serial server" adapter (remember the slot).  This shows up as an
+additional /dev/hvcs* device.
+
+Now a program on the target system can be configured to read or write to
+/dev/hvc* and another program on the current partition can be configured to
+read or write to /dev/hvcs*.  Now you have a tty conduit between two
+partitions.
+
+---------------------------------------------------------------------------
+
+9. Reporting Bugs:
+==================
+
+The proper channel for reporting bugs is either through the Linux OS
+distribution company that provided your OS or by posting issues to the
+PowerPC development mailing list at:
+
+linuxppc-dev@lists.ozlabs.org
+
+This request is to provide a documented and searchable public exchange
+of the problems and solutions surrounding this driver for the benefit of
+all users.
diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.txt
deleted file mode 100644
index a730ca5a07f8..000000000000
--- a/Documentation/powerpc/hvcs.txt
+++ /dev/null
@@ -1,567 +0,0 @@
-===========================================================================
-				   HVCS
-	IBM "Hypervisor Virtual Console Server" Installation Guide
-			  for Linux Kernel 2.6.4+
-		    Copyright (C) 2004 IBM Corporation
-
-===========================================================================
-NOTE:Eight space tabs are the optimum editor setting for reading this file.
-===========================================================================
-
-	       Author(s) :  Ryan S. Arnold <rsa@us.ibm.com>
-		       Date Created: March, 02, 2004
-		       Last Changed: August, 24, 2004
-
----------------------------------------------------------------------------
-Table of contents:
-
-	1.  Driver Introduction:
-	2.  System Requirements
-	3.  Build Options:
-		3.1  Built-in:
-		3.2  Module:
-	4.  Installation:
-	5.  Connection:
-	6.  Disconnection:
-	7.  Configuration:
-	8.  Questions & Answers:
-	9.  Reporting Bugs:
-
----------------------------------------------------------------------------
-1. Driver Introduction:
-
-This is the device driver for the IBM Hypervisor Virtual Console Server,
-"hvcs".  The IBM hvcs provides a tty driver interface to allow Linux user
-space applications access to the system consoles of logically partitioned
-operating systems (Linux and AIX) running on the same partitioned Power5
-ppc64 system.  Physical hardware consoles per partition are not practical
-on this hardware so system consoles are accessed by this driver using
-firmware interfaces to virtual terminal devices.
-
----------------------------------------------------------------------------
-2. System Requirements:
-
-This device driver was written using 2.6.4 Linux kernel APIs and will only
-build and run on kernels of this version or later.
-
-This driver was written to operate solely on IBM Power5 ppc64 hardware
-though some care was taken to abstract the architecture dependent firmware
-calls from the driver code.
-
-Sysfs must be mounted on the system so that the user can determine which
-major and minor numbers are associated with each vty-server.  Directions
-for sysfs mounting are outside the scope of this document.
-
----------------------------------------------------------------------------
-3. Build Options:
-
-The hvcs driver registers itself as a tty driver.  The tty layer
-dynamically allocates a block of major and minor numbers in a quantity
-requested by the registering driver.  The hvcs driver asks the tty layer
-for 64 of these major/minor numbers by default to use for hvcs device node
-entries.
-
-If the default number of device entries is adequate then this driver can be
-built into the kernel.  If not, the default can be over-ridden by inserting
-the driver as a module with insmod parameters.
-
----------------------------------------------------------------------------
-3.1 Built-in:
-
-The following menuconfig example demonstrates selecting to build this
-driver into the kernel.
-
-	Device Drivers  --->
-		Character devices  --->
-			<*> IBM Hypervisor Virtual Console Server Support
-
-Begin the kernel make process.
-
----------------------------------------------------------------------------
-3.2 Module:
-
-The following menuconfig example demonstrates selecting to build this
-driver as a kernel module.
-
-	Device Drivers  --->
-		Character devices  --->
-			<M> IBM Hypervisor Virtual Console Server Support
-
-The make process will build the following kernel modules:
-
-	hvcs.ko
-	hvcserver.ko
-
-To insert the module with the default allocation execute the following
-commands in the order they appear:
-
-	insmod hvcserver.ko
-	insmod hvcs.ko
-
-The hvcserver module contains architecture specific firmware calls and must
-be inserted first, otherwise the hvcs module will not find some of the
-symbols it expects.
-
-To override the default use an insmod parameter as follows (requesting 4
-tty devices as an example):
-
-	insmod hvcs.ko hvcs_parm_num_devs=4
-
-There is a maximum number of dev entries that can be specified on insmod.
-We think that 1024 is currently a decent maximum number of server adapters
-to allow.  This can always be changed by modifying the constant in the
-source file before building.
-
-NOTE: The length of time it takes to insmod the driver seems to be related
-to the number of tty interfaces the registering driver requests.
-
-In order to remove the driver module execute the following command:
-
-	rmmod hvcs.ko
-
-The recommended method for installing hvcs as a module is to use depmod to
-build a current modules.dep file in /lib/modules/`uname -r` and then
-execute:
-
-modprobe hvcs hvcs_parm_num_devs=4
-
-The modules.dep file indicates that hvcserver.ko needs to be inserted
-before hvcs.ko and modprobe uses this file to smartly insert the modules in
-the proper order.
-
-The following modprobe command is used to remove hvcs and hvcserver in the
-proper order:
-
-modprobe -r hvcs
-
----------------------------------------------------------------------------
-4. Installation:
-
-The tty layer creates sysfs entries which contain the major and minor
-numbers allocated for the hvcs driver.  The following snippet of "tree"
-output of the sysfs directory shows where these numbers are presented:
-
-	sys/
-	|-- *other sysfs base dirs*
-	|
-	|-- class
-	|   |-- *other classes of devices*
-	|   |
-	|   `-- tty
-	|       |-- *other tty devices*
-	|       |
-	|       |-- hvcs0
-	|       |   `-- dev
-	|       |-- hvcs1
-	|       |   `-- dev
-	|       |-- hvcs2
-	|       |   `-- dev
-	|       |-- hvcs3
-	|       |   `-- dev
-	|       |
-	|       |-- *other tty devices*
-	|
-	|-- *other sysfs base dirs*
-
-For the above examples the following output is a result of cat'ing the
-"dev" entry in the hvcs directory:
-
-	Pow5:/sys/class/tty/hvcs0/ # cat dev
-	254:0
-
-	Pow5:/sys/class/tty/hvcs1/ # cat dev
-	254:1
-
-	Pow5:/sys/class/tty/hvcs2/ # cat dev
-	254:2
-
-	Pow5:/sys/class/tty/hvcs3/ # cat dev
-	254:3
-
-The output from reading the "dev" attribute is the char device major and
-minor numbers that the tty layer has allocated for this driver's use.  Most
-systems running hvcs will already have the device entries created or udev
-will do it automatically.
-
-Given the example output above, to manually create a /dev/hvcs* node entry
-mknod can be used as follows:
-
-	mknod /dev/hvcs0 c 254 0
-	mknod /dev/hvcs1 c 254 1
-	mknod /dev/hvcs2 c 254 2
-	mknod /dev/hvcs3 c 254 3
-
-Using mknod to manually create the device entries makes these device nodes
-persistent.  Once created they will exist prior to the driver insmod.
-
-Attempting to connect an application to /dev/hvcs* prior to insertion of
-the hvcs module will result in an error message similar to the following:
-
-	"/dev/hvcs*: No such device".
-
-NOTE: Just because there is a device node present doesn't mean that there
-is a vty-server device configured for that node.
-
----------------------------------------------------------------------------
-5. Connection
-
-Since this driver controls devices that provide a tty interface a user can
-interact with the device node entries using any standard tty-interactive
-method (e.g. "cat", "dd", "echo").  The intent of this driver however, is
-to provide real time console interaction with a Linux partition's console,
-which requires the use of applications that provide bi-directional,
-interactive I/O with a tty device.
-
-Applications (e.g. "minicom" and "screen") that act as terminal emulators
-or perform terminal type control sequence conversion on the data being
-passed through them are NOT acceptable for providing interactive console
-I/O.  These programs often emulate antiquated terminal types (vt100 and
-ANSI) and expect inbound data to take the form of one of these supported
-terminal types but they either do not convert, or do not _adequately_
-convert, outbound data into the terminal type of the terminal which invoked
-them (though screen makes an attempt and can apparently be configured with
-much termcap wrestling.)
-
-For this reason kermit and cu are two of the recommended applications for
-interacting with a Linux console via an hvcs device.  These programs simply
-act as a conduit for data transfer to and from the tty device.  They do not
-require inbound data to take the form of a particular terminal type, nor do
-they cook outbound data to a particular terminal type.
-
-In order to ensure proper functioning of console applications one must make
-sure that once connected to a /dev/hvcs console that the console's $TERM
-env variable is set to the exact terminal type of the terminal emulator
-used to launch the interactive I/O application.  If one is using xterm and
-kermit to connect to /dev/hvcs0 when the console prompt becomes available
-one should "export TERM=xterm" on the console.  This tells ncurses
-applications that are invoked from the console that they should output
-control sequences that xterm can understand.
-
-As a precautionary measure an hvcs user should always "exit" from their
-session before disconnecting an application such as kermit from the device
-node.  If this is not done, the next user to connect to the console will
-continue using the previous user's logged in session which includes
-using the $TERM variable that the previous user supplied.
-
-Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
-is used to connect to each vty-server adapter.  In order to determine which
-vty-server adapter is associated with which /dev/hvcs* node a special sysfs
-attribute has been added to each vty-server sysfs entry.  This entry is
-called "index" and showing it reveals an integer that refers to the
-/dev/hvcs* entry to use to connect to that device.  For instance cating the
-index attribute of vty-server adapter 30000004 shows the following.
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
-	2
-
-This index of '2' means that in order to connect to vty-server adapter
-30000004 the user should interact with /dev/hvcs2.
-
-It should be noted that due to the system hotplug I/O capabilities of a
-system the /dev/hvcs* entry that interacts with a particular vty-server
-adapter is not guaranteed to remain the same across system reboots.  Look
-in the Q & A section for more on this issue.
-
----------------------------------------------------------------------------
-6. Disconnection
-
-As a security feature to prevent the delivery of stale data to an
-unintended target the Power5 system firmware disables the fetching of data
-and discards that data when a connection between a vty-server and a vty has
-been severed.  As an example, when a vty-server is immediately disconnected
-from a vty following output of data to the vty the vty adapter may not have
-enough time between when it received the data interrupt and when the
-connection was severed to fetch the data from firmware before the fetch is
-disabled by firmware.
-
-When hvcs is being used to serve consoles this behavior is not a huge issue
-because the adapter stays connected for large amounts of time following
-almost all data writes.  When hvcs is being used as a tty conduit to tunnel
-data between two partitions [see Q & A below] this is a huge problem
-because the standard Linux behavior when cat'ing or dd'ing data to a device
-is to open the tty, send the data, and then close the tty.  If this driver
-manually terminated vty-server connections on tty close this would close
-the vty-server and vty connection before the target vty has had a chance to
-fetch the data.
-
-Additionally, disconnecting a vty-server and vty only on module removal or
-adapter removal is impractical because other vty-servers in other
-partitions may require the usage of the target vty at any time.
-
-Due to this behavioral restriction disconnection of vty-servers from the
-connected vty is a manual procedure using a write to a sysfs attribute
-outlined below, on the other hand the initial vty-server connection to a
-vty is established automatically by this driver.  Manual vty-server
-connection is never required.
-
-In order to terminate the connection between a vty-server and vty the
-"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
-Reading this attribute reveals the current connection state of the
-vty-server adapter.  A zero means that the vty-server is not connected to a
-vty.  A one indicates that a connection is active.
-
-Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
-connection between the vty-server and target vty ONLY if the vterm_state
-previously read '1'.  The write directive is ignored if the vterm_state
-read '0' or if any value other than '0' was written to the vterm_state
-attribute.  The following example will show the method used for verifying
-the vty-server connection status and disconnecting a vty-server connection.
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
-	1
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
-	0
-
-All vty-server connections are automatically terminated when the device is
-hotplug removed and when the module is removed.
-
----------------------------------------------------------------------------
-7. Configuration
-
-Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
-is symlinked in several other sysfs tree directories, notably under the
-hvcs driver entry, which looks like the following example:
-
-	Pow5:/sys/bus/vio/drivers/hvcs # ls
-	.  ..  30000003  30000004  rescan
-
-By design, firmware notifies the hvcs driver of vty-server lifetimes and
-partner vty removals but not the addition of partner vtys.  Since an HMC
-Super Admin can add partner info dynamically we have provided the hvcs
-driver sysfs directory with the "rescan" update attribute which will query
-firmware and update the partner info for all the vty-servers that this
-driver manages.  Writing a '1' to the attribute triggers the update.  An
-explicit example follows:
-
-	Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
-
-Reading the attribute will indicate a state of '1' or '0'.  A one indicates
-that an update is in process.  A zero indicates that an update has
-completed or was never executed.
-
-Vty-server entries in this directory are a 32 bit partition unique unit
-address that is created by firmware.  An example vty-server sysfs entry
-looks like the following:
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
-	.   current_vty   devspec       name          partner_vtys
-	..  index         partner_clcs  vterm_state
-
-Each entry is provided, by default with a "name" attribute.  Reading the
-"name" attribute will reveal the device type as shown in the following
-example:
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
-	vty-server
-
-Each entry is also provided, by default, with a "devspec" attribute which
-reveals the full device specification when read, as shown in the following
-example:
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
-	/vdevice/vty-server@30000004
-
-Each vty-server sysfs dir is provided with two read-only attributes that
-provide lists of easily parsed partner vty data: "partner_vtys" and
-"partner_clcs".
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
-	30000000
-	30000001
-	30000002
-	30000000
-	30000000
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
-	U5112.428.103048A-V3-C0
-	U5112.428.103048A-V3-C2
-	U5112.428.103048A-V3-C3
-	U5112.428.103048A-V4-C0
-	U5112.428.103048A-V5-C0
-
-Reading partner_vtys returns a list of partner vtys.  Vty unit address
-numbering is only per-partition-unique so entries will frequently repeat.
-
-Reading partner_clcs returns a list of "converged location codes" which are
-composed of a system serial number followed by "-V*", where the '*' is the
-target partition number, and "-C*", where the '*' is the slot of the
-adapter.  The first vty partner corresponds to the first clc item, the
-second vty partner to the second clc item, etc.
-
-A vty-server can only be connected to a single vty at a time.  The entry,
-"current_vty" prints the clc of the currently selected partner vty when
-read.
-
-The current_vty can be changed by writing a valid partner clc to the entry
-as in the following example:
-
-	Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
-	8A-V4-C0 > current_vty
-
-Changing the current_vty when a vty-server is already connected to a vty
-does not affect the current connection.  The change takes effect when the
-currently open connection is freed.
-
-Information on the "vterm_state" attribute was covered earlier on the
-chapter entitled "disconnection".
-
----------------------------------------------------------------------------
-8. Questions & Answers:
-===========================================================================
-Q: What are the security concerns involving hvcs?
-
-A: There are three main security concerns:
-
-	1. The creator of the /dev/hvcs* nodes has the ability to restrict
-	the access of the device entries to certain users or groups.  It
-	may be best to create a special hvcs group privilege for providing
-	access to system consoles.
-
-	2. To provide network security when grabbing the console it is
-	suggested that the user connect to the console hosting partition
-	using a secure method, such as SSH or sit at a hardware console.
-
-	3. Make sure to exit the user session when done with a console or
-	the next vty-server connection (which may be from another
-	partition) will experience the previously logged in session.
-
----------------------------------------------------------------------------
-Q: How do I multiplex a console that I grab through hvcs so that other
-people can see it:
-
-A: You can use "screen" to directly connect to the /dev/hvcs* device and
-setup a session on your machine with the console group privileges.  As
-pointed out earlier by default screen doesn't provide the termcap settings
-for most terminal emulators to provide adequate character conversion from
-term type "screen" to others.  This means that curses based programs may
-not display properly in screen sessions.
-
----------------------------------------------------------------------------
-Q: Why are the colors all messed up?
-Q: Why are the control characters acting strange or not working?
-Q: Why is the console output all strange and unintelligible?
-
-A: Please see the preceding section on "Connection" for a discussion of how
-applications can affect the display of character control sequences.
-Additionally, just because you logged into the console using and xterm
-doesn't mean someone else didn't log into the console with the HMC console
-(vt320) before you and leave the session logged in.  The best thing to do
-is to export TERM to the terminal type of your terminal emulator when you
-get the console.  Additionally make sure to "exit" the console before you
-disconnect from the console.  This will ensure that the next user gets
-their own TERM type set when they login.
-
----------------------------------------------------------------------------
-Q: When I try to CONNECT kermit to an hvcs device I get:
-"Sorry, can't open connection: /dev/hvcs*"What is happening?
-
-A: Some other Power5 console mechanism has a connection to the vty and
-isn't giving it up.  You can try to force disconnect the consoles from the
-HMC by right clicking on the partition and then selecting "close terminal".
-Otherwise you have to hunt down the people who have console authority.  It
-is possible that you already have the console open using another kermit
-session and just forgot about it.  Please review the console options for
-Power5 systems to determine the many ways a system console can be held.
-
-OR
-
-A: Another user may not have a connectivity method currently attached to a
-/dev/hvcs device but the vterm_state may reveal that they still have the
-vty-server connection established.  They need to free this using the method
-outlined in the section on "Disconnection" in order for others to connect
-to the target vty.
-
-OR
-
-A: The user profile you are using to execute kermit probably doesn't have
-permissions to use the /dev/hvcs* device.
-
-OR
-
-A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
-entry still exists (on systems without udev).
-
-OR
-
-A: There is not a corresponding vty-server device that maps to an existing
-/dev/hvcs* entry.
-
----------------------------------------------------------------------------
-Q: When I try to CONNECT kermit to an hvcs device I get:
-"Sorry, write access to UUCP lockfile directory denied."
-
-A: The /dev/hvcs* entry you have specified doesn't exist where you said it
-does?  Maybe you haven't inserted the module (on systems with udev).
-
----------------------------------------------------------------------------
-Q: If I already have one Linux partition installed can I use hvcs on said
-partition to provide the console for the install of a second Linux
-partition?
-
-A: Yes granted that your are connected to the /dev/hvcs* device using
-kermit or cu or some other program that doesn't provide terminal emulation.
-
----------------------------------------------------------------------------
-Q: Can I connect to more than one partition's console at a time using this
-driver?
-
-A: Yes.  Of course this means that there must be more than one vty-server
-configured for this partition and each must point to a disconnected vty.
-
----------------------------------------------------------------------------
-Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
-
-A: Yes, if you have dlpar and hotplug enabled for your system and it has
-been built into the kernel the hvcs drivers is configured to dynamically
-handle additions of new devices and removals of unused devices.
-
----------------------------------------------------------------------------
-Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
-after a reboot.  What happened?
-
-A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
-in the order that the adapters are exposed.  Due to hotplug capabilities of
-this driver assignment of hotplug added vty-servers may be in a different
-order than how they would be exposed on module load.  Rebooting or
-reloading the module after dynamic addition may result in the /dev/hvcs*
-and vty-server coupling changing if a vty-server adapter was added in a
-slot between two other vty-server adapters.  Refer to the section above
-on how to determine which vty-server goes with which /dev/hvcs* node.
-Hint; look at the sysfs "index" attribute for the vty-server.
-
----------------------------------------------------------------------------
-Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
-device on that partition as the other end of the pipe?
-
-A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
-for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
-In order to get a tty conduit working between the two partitions the HMC
-Super Admin must create an additional "serial server" for the target
-partition with the HMC gui which will show up as /dev/hvc* when the target
-partition is rebooted.
-
-The HMC Super Admin then creates an additional "serial client" for the
-current partition and points this at the target partition's newly created
-"serial server" adapter (remember the slot).  This shows up as an
-additional /dev/hvcs* device.
-
-Now a program on the target system can be configured to read or write to
-/dev/hvc* and another program on the current partition can be configured to
-read or write to /dev/hvcs*.  Now you have a tty conduit between two
-partitions.
-
----------------------------------------------------------------------------
-9. Reporting Bugs:
-
-The proper channel for reporting bugs is either through the Linux OS
-distribution company that provided your OS or by posting issues to the
-PowerPC development mailing list at:
-
-linuxppc-dev@lists.ozlabs.org
-
-This request is to provide a documented and searchable public exchange
-of the problems and solutions surrounding this driver for the benefit of
-all users.
diff --git a/Documentation/powerpc/index.rst b/Documentation/powerpc/index.rst
new file mode 100644
index 000000000000..549b1cdd77ae
--- /dev/null
+++ b/Documentation/powerpc/index.rst
@@ -0,0 +1,34 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+powerpc
+=======
+
+.. toctree::
+    :maxdepth: 1
+
+    bootwrapper
+    cpu_families
+    cpu_features
+    cxl
+    cxlflash
+    dawr-power9
+    dscr
+    eeh-pci-error-recovery
+    firmware-assisted-dump
+    hvcs
+    isa-versions
+    mpc52xx
+    pci_iov_resource_on_powernv
+    pmu-ebb
+    ptrace
+    qe_firmware
+    syscall64-abi
+    transactional_memory
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/powerpc/isa-versions.rst b/Documentation/powerpc/isa-versions.rst
index 66c24140ebf1..a363d8c1603c 100644
--- a/Documentation/powerpc/isa-versions.rst
+++ b/Documentation/powerpc/isa-versions.rst
@@ -1,13 +1,12 @@
-:orphan:
-
+==========================
 CPU to ISA Version Mapping
 ==========================
 
 Mapping of some CPU versions to relevant ISA versions.
 
-========= ====================
+========= ====================================================================
 CPU       Architecture version
-========= ====================
+========= ====================================================================
 Power9    Power ISA v3.0B
 Power8    Power ISA v2.07
 Power7    Power ISA v2.06
@@ -24,7 +23,7 @@ PPC970    - PowerPC User Instruction Set Architecture Book I v2.01
           - PowerPC Virtual Environment Architecture Book II v2.01
           - PowerPC Operating Environment Architecture Book III v2.01
           - Plus Altivec/VMX ~= 2.03
-========= ====================
+========= ====================================================================
 
 
 Key Features
@@ -60,9 +59,9 @@ Power5     No
 PPC970     No
 ========== ====
 
-========== ====================
+========== ====================================
 CPU        Transactional Memory
-========== ====================
+========== ====================================
 Power9     Yes (* see transactional_memory.txt)
 Power8     Yes
 Power7     No
@@ -73,4 +72,4 @@ Power5++   No
 Power5+    No
 Power5     No
 PPC970     No
-========== ====================
+========== ====================================
diff --git a/Documentation/powerpc/mpc52xx.rst b/Documentation/powerpc/mpc52xx.rst
new file mode 100644
index 000000000000..8676ac63e077
--- /dev/null
+++ b/Documentation/powerpc/mpc52xx.rst
@@ -0,0 +1,43 @@
+=============================
+Linux 2.6.x on MPC52xx family
+=============================
+
+For the latest info, go to http://www.246tNt.com/mpc52xx/
+
+To compile/use :
+
+  - U-Boot::
+
+     # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+        if you wish to ).
+     # make lite5200_defconfig
+     # make uImage
+
+     then, on U-boot:
+     => tftpboot 200000 uImage
+     => tftpboot 400000 pRamdisk
+     => bootm 200000 400000
+
+  - DBug::
+
+     # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+        if you wish to ).
+     # make lite5200_defconfig
+     # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
+     # make zImage.initrd
+     # make
+
+     then in DBug:
+     DBug> dn -i zImage.initrd.lite5200
+
+
+Some remarks:
+
+ - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
+   is not supported, and I'm not sure anyone is interesting in working on it
+   so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
+   nothing to do with the MPC5200. I also included the 'MPC' for the same
+   reason.
+ - Of course, I inspired myself from the 2.4 port. If you think I forgot to
+   mention you/your company in the copyright of some code, I'll correct it
+   ASAP.
diff --git a/Documentation/powerpc/mpc52xx.txt b/Documentation/powerpc/mpc52xx.txt
deleted file mode 100644
index 0d540a31ea1a..000000000000
--- a/Documentation/powerpc/mpc52xx.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Linux 2.6.x on MPC52xx family
------------------------------
-
-For the latest info, go to http://www.246tNt.com/mpc52xx/
-
-To compile/use :
-
-  - U-Boot:
-     # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
-        if you wish to ).
-     # make lite5200_defconfig
-     # make uImage
-
-     then, on U-boot:
-     => tftpboot 200000 uImage
-     => tftpboot 400000 pRamdisk
-     => bootm 200000 400000
-
-  - DBug:
-     # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
-        if you wish to ).
-     # make lite5200_defconfig
-     # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
-     # make zImage.initrd
-     # make
-
-     then in DBug:
-     DBug> dn -i zImage.initrd.lite5200
-
-
-Some remarks :
- - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
-   is not supported, and I'm not sure anyone is interesting in working on it
-   so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
-   nothing to do with the MPC5200. I also included the 'MPC' for the same
-   reason.
- - Of course, I inspired myself from the 2.4 port. If you think I forgot to
-   mention you/your company in the copyright of some code, I'll correct it
-   ASAP.
diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.rst b/Documentation/powerpc/pci_iov_resource_on_powernv.rst
new file mode 100644
index 000000000000..f5a5793e1613
--- /dev/null
+++ b/Documentation/powerpc/pci_iov_resource_on_powernv.rst
@@ -0,0 +1,312 @@
+===================================================
+PCI Express I/O Virtualization Resource on Powerenv
+===================================================
+
+Wei Yang <weiyang@linux.vnet.ibm.com>
+
+Benjamin Herrenschmidt <benh@au1.ibm.com>
+
+Bjorn Helgaas <bhelgaas@google.com>
+
+26 Aug 2014
+
+This document describes the requirement from hardware for PCI MMIO resource
+sizing and assignment on PowerKVM and how generic PCI code handles this
+requirement. The first two sections describe the concepts of Partitionable
+Endpoints and the implementation on P8 (IODA2). The next two sections talks
+about considerations on enabling SRIOV on IODA2.
+
+1. Introduction to Partitionable Endpoints
+==========================================
+
+A Partitionable Endpoint (PE) is a way to group the various resources
+associated with a device or a set of devices to provide isolation between
+partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
+to freeze a device that is causing errors in order to limit the possibility
+of propagation of bad data.
+
+There is thus, in HW, a table of PE states that contains a pair of "frozen"
+state bits (one for MMIO and one for DMA, they get set together but can be
+cleared independently) for each PE.
+
+When a PE is frozen, all stores in any direction are dropped and all loads
+return all 1's value. MSIs are also blocked. There's a bit more state that
+captures things like the details of the error that caused the freeze etc., but
+that's not critical.
+
+The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
+are matched to their corresponding PEs.
+
+The following section provides a rough description of what we have on P8
+(IODA2).  Keep in mind that this is all per PHB (PCI host bridge).  Each PHB
+is a completely separate HW entity that replicates the entire logic, so has
+its own set of PEs, etc.
+
+2. Implementation of Partitionable Endpoints on P8 (IODA2)
+==========================================================
+
+P8 supports up to 256 Partitionable Endpoints per PHB.
+
+  * Inbound
+
+    For DMA, MSIs and inbound PCIe error messages, we have a table (in
+    memory but accessed in HW by the chip) that provides a direct
+    correspondence between a PCIe RID (bus/dev/fn) with a PE number.
+    We call this the RTT.
+
+    - For DMA we then provide an entire address space for each PE that can
+      contain two "windows", depending on the value of PCI address bit 59.
+      Each window can be configured to be remapped via a "TCE table" (IOMMU
+      translation table), which has various configurable characteristics
+      not described here.
+
+    - For MSIs, we have two windows in the address space (one at the top of
+      the 32-bit space and one much higher) which, via a combination of the
+      address and MSI value, will result in one of the 2048 interrupts per
+      bridge being triggered.  There's a PE# in the interrupt controller
+      descriptor table as well which is compared with the PE# obtained from
+      the RTT to "authorize" the device to emit that specific interrupt.
+
+    - Error messages just use the RTT.
+
+  * Outbound.  That's where the tricky part is.
+
+    Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
+    from the CPU address space to the PCI address space.  There is one M32
+    window and sixteen M64 windows.  They have different characteristics.
+    First what they have in common: they forward a configurable portion of
+    the CPU address space to the PCIe bus and must be naturally aligned
+    power of two in size.  The rest is different:
+
+    - The M32 window:
+
+      * Is limited to 4GB in size.
+
+      * Drops the top bits of the address (above the size) and replaces
+	them with a configurable value.  This is typically used to generate
+	32-bit PCIe accesses.  We configure that window at boot from FW and
+	don't touch it from Linux; it's usually set to forward a 2GB
+	portion of address space from the CPU to PCIe
+	0x8000_0000..0xffff_ffff.  (Note: The top 64KB are actually
+	reserved for MSIs but this is not a problem at this point; we just
+	need to ensure Linux doesn't assign anything there, the M32 logic
+	ignores that however and will forward in that space if we try).
+
+      * It is divided into 256 segments of equal size.  A table in the chip
+	maps each segment to a PE#.  That allows portions of the MMIO space
+	to be assigned to PEs on a segment granularity.  For a 2GB window,
+	the segment granularity is 2GB/256 = 8MB.
+
+    Now, this is the "main" window we use in Linux today (excluding
+    SR-IOV).  We basically use the trick of forcing the bridge MMIO windows
+    onto a segment alignment/granularity so that the space behind a bridge
+    can be assigned to a PE.
+
+    Ideally we would like to be able to have individual functions in PEs
+    but that would mean using a completely different address allocation
+    scheme where individual function BARs can be "grouped" to fit in one or
+    more segments.
+
+    - The M64 windows:
+
+      * Must be at least 256MB in size.
+
+      * Do not translate addresses (the address on PCIe is the same as the
+	address on the PowerBus).  There is a way to also set the top 14
+	bits which are not conveyed by PowerBus but we don't use this.
+
+      * Can be configured to be segmented.  When not segmented, we can
+	specify the PE# for the entire window.  When segmented, a window
+	has 256 segments; however, there is no table for mapping a segment
+	to a PE#.  The segment number *is* the PE#.
+
+      * Support overlaps.  If an address is covered by multiple windows,
+	there's a defined ordering for which window applies.
+
+    We have code (fairly new compared to the M32 stuff) that exploits that
+    for large BARs in 64-bit space:
+
+    We configure an M64 window to cover the entire region of address space
+    that has been assigned by FW for the PHB (about 64GB, ignore the space
+    for the M32, it comes out of a different "reserve").  We configure it
+    as segmented.
+
+    Then we do the same thing as with M32, using the bridge alignment
+    trick, to match to those giant segments.
+
+    Since we cannot remap, we have two additional constraints:
+
+    - We do the PE# allocation *after* the 64-bit space has been assigned
+      because the addresses we use directly determine the PE#.  We then
+      update the M32 PE# for the devices that use both 32-bit and 64-bit
+      spaces or assign the remaining PE# to 32-bit only devices.
+
+    - We cannot "group" segments in HW, so if a device ends up using more
+      than one segment, we end up with more than one PE#.  There is a HW
+      mechanism to make the freeze state cascade to "companion" PEs but
+      that only works for PCIe error messages (typically used so that if
+      you freeze a switch, it freezes all its children).  So we do it in
+      SW.  We lose a bit of effectiveness of EEH in that case, but that's
+      the best we found.  So when any of the PEs freezes, we freeze the
+      other ones for that "domain".  We thus introduce the concept of
+      "master PE" which is the one used for DMA, MSIs, etc., and "secondary
+      PEs" that are used for the remaining M64 segments.
+
+    We would like to investigate using additional M64 windows in "single
+    PE" mode to overlay over specific BARs to work around some of that, for
+    example for devices with very large BARs, e.g., GPUs.  It would make
+    sense, but we haven't done it yet.
+
+3. Considerations for SR-IOV on PowerKVM
+========================================
+
+  * SR-IOV Background
+
+    The PCIe SR-IOV feature allows a single Physical Function (PF) to
+    support several Virtual Functions (VFs).  Registers in the PF's SR-IOV
+    Capability control the number of VFs and whether they are enabled.
+
+    When VFs are enabled, they appear in Configuration Space like normal
+    PCI devices, but the BARs in VF config space headers are unusual.  For
+    a non-VF device, software uses BARs in the config space header to
+    discover the BAR sizes and assign addresses for them.  For VF devices,
+    software uses VF BAR registers in the *PF* SR-IOV Capability to
+    discover sizes and assign addresses.  The BARs in the VF's config space
+    header are read-only zeros.
+
+    When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
+    base address for all the corresponding VF(n) BARs.  For example, if the
+    PF SR-IOV Capability is programmed to enable eight VFs, and it has a
+    1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
+    This region is divided into eight contiguous 1MB regions, each of which
+    is a BAR0 for one of the VFs.  Note that even though the VF BAR
+    describes an 8MB region, the alignment requirement is for a single VF,
+    i.e., 1MB in this example.
+
+  There are several strategies for isolating VFs in PEs:
+
+  - M32 window: There's one M32 window, and it is split into 256
+    equally-sized segments.  The finest granularity possible is a 256MB
+    window with 1MB segments.  VF BARs that are 1MB or larger could be
+    mapped to separate PEs in this window.  Each segment can be
+    individually mapped to a PE via the lookup table, so this is quite
+    flexible, but it works best when all the VF BARs are the same size.  If
+    they are different sizes, the entire window has to be small enough that
+    the segment size matches the smallest VF BAR, which means larger VF
+    BARs span several segments.
+
+  - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
+    to a single PE, so it could only isolate one VF.
+
+  - Single segmented M64 windows: A segmented M64 window could be used just
+    like the M32 window, but the segments can't be individually mapped to
+    PEs (the segment number is the PE#), so there isn't as much
+    flexibility.  A VF with multiple BARs would have to be in a "domain" of
+    multiple PEs, which is not as well isolated as a single PE.
+
+  - Multiple segmented M64 windows: As usual, each window is split into 256
+    equally-sized segments, and the segment number is the PE#.  But if we
+    use several M64 windows, they can be set to different base addresses
+    and different segment sizes.  If we have VFs that each have a 1MB BAR
+    and a 32MB BAR, we could use one M64 window to assign 1MB segments and
+    another M64 window to assign 32MB segments.
+
+  Finally, the plan to use M64 windows for SR-IOV, which will be described
+  more in the next two sections.  For a given VF BAR, we need to
+  effectively reserve the entire 256 segments (256 * VF BAR size) and
+  position the VF BAR to start at the beginning of a free range of
+  segments/PEs inside that M64 window.
+
+  The goal is of course to be able to give a separate PE for each VF.
+
+  The IODA2 platform has 16 M64 windows, which are used to map MMIO
+  range to PE#.  Each M64 window defines one MMIO range and this range is
+  divided into 256 segments, with each segment corresponding to one PE.
+
+  We decide to leverage this M64 window to map VFs to individual PEs, since
+  SR-IOV VF BARs are all the same size.
+
+  But doing so introduces another problem: total_VFs is usually smaller
+  than the number of M64 window segments, so if we map one VF BAR directly
+  to one M64 window, some part of the M64 window will map to another
+  device's MMIO range.
+
+  IODA supports 256 PEs, so segmented windows contain 256 segments, so if
+  total_VFs is less than 256, we have the situation in Figure 1.0, where
+  segments [total_VFs, 255] of the M64 window may map to some MMIO range on
+  other devices::
+
+     0      1                     total_VFs - 1
+     +------+------+-     -+------+------+
+     |      |      |  ...  |      |      |
+     +------+------+-     -+------+------+
+
+                           VF(n) BAR space
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+                           M64 window
+
+		Figure 1.0 Direct map VF(n) BAR space
+
+  Our current solution is to allocate 256 segments even if the VF(n) BAR
+  space doesn't need that much, as shown in Figure 1.1::
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+                           VF(n) BAR space + extra
+
+     0      1                     total_VFs - 1                255
+     +------+------+-     -+------+------+-      -+------+------+
+     |      |      |  ...  |      |      |   ...  |      |      |
+     +------+------+-     -+------+------+-      -+------+------+
+
+			   M64 window
+
+		Figure 1.1 Map VF(n) BAR space + extra
+
+  Allocating the extra space ensures that the entire M64 window will be
+  assigned to this one SR-IOV device and none of the space will be
+  available for other devices.  Note that this only expands the space
+  reserved in software; there are still only total_VFs VFs, and they only
+  respond to segments [0, total_VFs - 1].  There's nothing in hardware that
+  responds to segments [total_VFs, 255].
+
+4. Implications for the Generic PCI Code
+========================================
+
+The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
+aligned to the size of an individual VF BAR.
+
+In IODA2, the MMIO address determines the PE#.  If the address is in an M32
+window, we can set the PE# by updating the table that translates segments
+to PE#s.  Similarly, if the address is in an unsegmented M64 window, we can
+set the PE# for the window.  But if it's in a segmented M64 window, the
+segment number is the PE#.
+
+Therefore, the only way to control the PE# for a VF is to change the base
+of the VF(n) BAR space in the VF BAR.  If the PCI core allocates the exact
+amount of space required for the VF(n) BAR space, the VF BAR value is fixed
+and cannot be changed.
+
+On the other hand, if the PCI core allocates additional space, the VF BAR
+value can be changed as long as the entire VF(n) BAR space remains inside
+the space allocated by the core.
+
+Ideally the segment size will be the same as an individual VF BAR size.
+Then each VF will be in its own PE.  The VF BARs (and therefore the PE#s)
+are contiguous.  If VF0 is in PE(x), then VF(n) is in PE(x+n).  If we
+allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
+
+If the segment size is smaller than the VF BAR size, it will take several
+segments to cover a VF BAR, and a VF will be in several PEs.  This is
+possible, but the isolation isn't as good, and it reduces the number of PE#
+choices because instead of consuming only numVFs segments, the VF(n) BAR
+space will consume (numVFs * n) segments.  That means there aren't as many
+available segments for adjusting base of the VF(n) BAR space.
diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.txt b/Documentation/powerpc/pci_iov_resource_on_powernv.txt
deleted file mode 100644
index b55c5cd83f8d..000000000000
--- a/Documentation/powerpc/pci_iov_resource_on_powernv.txt
+++ /dev/null
@@ -1,301 +0,0 @@
-Wei Yang <weiyang@linux.vnet.ibm.com>
-Benjamin Herrenschmidt <benh@au1.ibm.com>
-Bjorn Helgaas <bhelgaas@google.com>
-26 Aug 2014
-
-This document describes the requirement from hardware for PCI MMIO resource
-sizing and assignment on PowerKVM and how generic PCI code handles this
-requirement. The first two sections describe the concepts of Partitionable
-Endpoints and the implementation on P8 (IODA2). The next two sections talks
-about considerations on enabling SRIOV on IODA2.
-
-1. Introduction to Partitionable Endpoints
-
-A Partitionable Endpoint (PE) is a way to group the various resources
-associated with a device or a set of devices to provide isolation between
-partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
-to freeze a device that is causing errors in order to limit the possibility
-of propagation of bad data.
-
-There is thus, in HW, a table of PE states that contains a pair of "frozen"
-state bits (one for MMIO and one for DMA, they get set together but can be
-cleared independently) for each PE.
-
-When a PE is frozen, all stores in any direction are dropped and all loads
-return all 1's value. MSIs are also blocked. There's a bit more state that
-captures things like the details of the error that caused the freeze etc., but
-that's not critical.
-
-The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
-are matched to their corresponding PEs.
-
-The following section provides a rough description of what we have on P8
-(IODA2).  Keep in mind that this is all per PHB (PCI host bridge).  Each PHB
-is a completely separate HW entity that replicates the entire logic, so has
-its own set of PEs, etc.
-
-2. Implementation of Partitionable Endpoints on P8 (IODA2)
-
-P8 supports up to 256 Partitionable Endpoints per PHB.
-
-  * Inbound
-
-    For DMA, MSIs and inbound PCIe error messages, we have a table (in
-    memory but accessed in HW by the chip) that provides a direct
-    correspondence between a PCIe RID (bus/dev/fn) with a PE number.
-    We call this the RTT.
-
-    - For DMA we then provide an entire address space for each PE that can
-      contain two "windows", depending on the value of PCI address bit 59.
-      Each window can be configured to be remapped via a "TCE table" (IOMMU
-      translation table), which has various configurable characteristics
-      not described here.
-
-    - For MSIs, we have two windows in the address space (one at the top of
-      the 32-bit space and one much higher) which, via a combination of the
-      address and MSI value, will result in one of the 2048 interrupts per
-      bridge being triggered.  There's a PE# in the interrupt controller
-      descriptor table as well which is compared with the PE# obtained from
-      the RTT to "authorize" the device to emit that specific interrupt.
-
-    - Error messages just use the RTT.
-
-  * Outbound.  That's where the tricky part is.
-
-    Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
-    from the CPU address space to the PCI address space.  There is one M32
-    window and sixteen M64 windows.  They have different characteristics.
-    First what they have in common: they forward a configurable portion of
-    the CPU address space to the PCIe bus and must be naturally aligned
-    power of two in size.  The rest is different:
-
-    - The M32 window:
-
-      * Is limited to 4GB in size.
-
-      * Drops the top bits of the address (above the size) and replaces
-	them with a configurable value.  This is typically used to generate
-	32-bit PCIe accesses.  We configure that window at boot from FW and
-	don't touch it from Linux; it's usually set to forward a 2GB
-	portion of address space from the CPU to PCIe
-	0x8000_0000..0xffff_ffff.  (Note: The top 64KB are actually
-	reserved for MSIs but this is not a problem at this point; we just
-	need to ensure Linux doesn't assign anything there, the M32 logic
-	ignores that however and will forward in that space if we try).
-
-      * It is divided into 256 segments of equal size.  A table in the chip
-	maps each segment to a PE#.  That allows portions of the MMIO space
-	to be assigned to PEs on a segment granularity.  For a 2GB window,
-	the segment granularity is 2GB/256 = 8MB.
-
-    Now, this is the "main" window we use in Linux today (excluding
-    SR-IOV).  We basically use the trick of forcing the bridge MMIO windows
-    onto a segment alignment/granularity so that the space behind a bridge
-    can be assigned to a PE.
-
-    Ideally we would like to be able to have individual functions in PEs
-    but that would mean using a completely different address allocation
-    scheme where individual function BARs can be "grouped" to fit in one or
-    more segments.
-
-    - The M64 windows:
-
-      * Must be at least 256MB in size.
-
-      * Do not translate addresses (the address on PCIe is the same as the
-	address on the PowerBus).  There is a way to also set the top 14
-	bits which are not conveyed by PowerBus but we don't use this.
-
-      * Can be configured to be segmented.  When not segmented, we can
-	specify the PE# for the entire window.  When segmented, a window
-	has 256 segments; however, there is no table for mapping a segment
-	to a PE#.  The segment number *is* the PE#.
-
-      * Support overlaps.  If an address is covered by multiple windows,
-	there's a defined ordering for which window applies.
-
-    We have code (fairly new compared to the M32 stuff) that exploits that
-    for large BARs in 64-bit space:
-
-    We configure an M64 window to cover the entire region of address space
-    that has been assigned by FW for the PHB (about 64GB, ignore the space
-    for the M32, it comes out of a different "reserve").  We configure it
-    as segmented.
-
-    Then we do the same thing as with M32, using the bridge alignment
-    trick, to match to those giant segments.
-
-    Since we cannot remap, we have two additional constraints:
-
-    - We do the PE# allocation *after* the 64-bit space has been assigned
-      because the addresses we use directly determine the PE#.  We then
-      update the M32 PE# for the devices that use both 32-bit and 64-bit
-      spaces or assign the remaining PE# to 32-bit only devices.
-
-    - We cannot "group" segments in HW, so if a device ends up using more
-      than one segment, we end up with more than one PE#.  There is a HW
-      mechanism to make the freeze state cascade to "companion" PEs but
-      that only works for PCIe error messages (typically used so that if
-      you freeze a switch, it freezes all its children).  So we do it in
-      SW.  We lose a bit of effectiveness of EEH in that case, but that's
-      the best we found.  So when any of the PEs freezes, we freeze the
-      other ones for that "domain".  We thus introduce the concept of
-      "master PE" which is the one used for DMA, MSIs, etc., and "secondary
-      PEs" that are used for the remaining M64 segments.
-
-    We would like to investigate using additional M64 windows in "single
-    PE" mode to overlay over specific BARs to work around some of that, for
-    example for devices with very large BARs, e.g., GPUs.  It would make
-    sense, but we haven't done it yet.
-
-3. Considerations for SR-IOV on PowerKVM
-
-  * SR-IOV Background
-
-    The PCIe SR-IOV feature allows a single Physical Function (PF) to
-    support several Virtual Functions (VFs).  Registers in the PF's SR-IOV
-    Capability control the number of VFs and whether they are enabled.
-
-    When VFs are enabled, they appear in Configuration Space like normal
-    PCI devices, but the BARs in VF config space headers are unusual.  For
-    a non-VF device, software uses BARs in the config space header to
-    discover the BAR sizes and assign addresses for them.  For VF devices,
-    software uses VF BAR registers in the *PF* SR-IOV Capability to
-    discover sizes and assign addresses.  The BARs in the VF's config space
-    header are read-only zeros.
-
-    When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
-    base address for all the corresponding VF(n) BARs.  For example, if the
-    PF SR-IOV Capability is programmed to enable eight VFs, and it has a
-    1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
-    This region is divided into eight contiguous 1MB regions, each of which
-    is a BAR0 for one of the VFs.  Note that even though the VF BAR
-    describes an 8MB region, the alignment requirement is for a single VF,
-    i.e., 1MB in this example.
-
-  There are several strategies for isolating VFs in PEs:
-
-  - M32 window: There's one M32 window, and it is split into 256
-    equally-sized segments.  The finest granularity possible is a 256MB
-    window with 1MB segments.  VF BARs that are 1MB or larger could be
-    mapped to separate PEs in this window.  Each segment can be
-    individually mapped to a PE via the lookup table, so this is quite
-    flexible, but it works best when all the VF BARs are the same size.  If
-    they are different sizes, the entire window has to be small enough that
-    the segment size matches the smallest VF BAR, which means larger VF
-    BARs span several segments.
-
-  - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
-    to a single PE, so it could only isolate one VF.
-
-  - Single segmented M64 windows: A segmented M64 window could be used just
-    like the M32 window, but the segments can't be individually mapped to
-    PEs (the segment number is the PE#), so there isn't as much
-    flexibility.  A VF with multiple BARs would have to be in a "domain" of
-    multiple PEs, which is not as well isolated as a single PE.
-
-  - Multiple segmented M64 windows: As usual, each window is split into 256
-    equally-sized segments, and the segment number is the PE#.  But if we
-    use several M64 windows, they can be set to different base addresses
-    and different segment sizes.  If we have VFs that each have a 1MB BAR
-    and a 32MB BAR, we could use one M64 window to assign 1MB segments and
-    another M64 window to assign 32MB segments.
-
-  Finally, the plan to use M64 windows for SR-IOV, which will be described
-  more in the next two sections.  For a given VF BAR, we need to
-  effectively reserve the entire 256 segments (256 * VF BAR size) and
-  position the VF BAR to start at the beginning of a free range of
-  segments/PEs inside that M64 window.
-
-  The goal is of course to be able to give a separate PE for each VF.
-
-  The IODA2 platform has 16 M64 windows, which are used to map MMIO
-  range to PE#.  Each M64 window defines one MMIO range and this range is
-  divided into 256 segments, with each segment corresponding to one PE.
-
-  We decide to leverage this M64 window to map VFs to individual PEs, since
-  SR-IOV VF BARs are all the same size.
-
-  But doing so introduces another problem: total_VFs is usually smaller
-  than the number of M64 window segments, so if we map one VF BAR directly
-  to one M64 window, some part of the M64 window will map to another
-  device's MMIO range.
-
-  IODA supports 256 PEs, so segmented windows contain 256 segments, so if
-  total_VFs is less than 256, we have the situation in Figure 1.0, where
-  segments [total_VFs, 255] of the M64 window may map to some MMIO range on
-  other devices:
-
-     0      1                     total_VFs - 1
-     +------+------+-     -+------+------+
-     |      |      |  ...  |      |      |
-     +------+------+-     -+------+------+
-
-                           VF(n) BAR space
-
-     0      1                     total_VFs - 1                255
-     +------+------+-     -+------+------+-      -+------+------+
-     |      |      |  ...  |      |      |   ...  |      |      |
-     +------+------+-     -+------+------+-      -+------+------+
-
-                           M64 window
-
-		Figure 1.0 Direct map VF(n) BAR space
-
-  Our current solution is to allocate 256 segments even if the VF(n) BAR
-  space doesn't need that much, as shown in Figure 1.1:
-
-     0      1                     total_VFs - 1                255
-     +------+------+-     -+------+------+-      -+------+------+
-     |      |      |  ...  |      |      |   ...  |      |      |
-     +------+------+-     -+------+------+-      -+------+------+
-
-                           VF(n) BAR space + extra
-
-     0      1                     total_VFs - 1                255
-     +------+------+-     -+------+------+-      -+------+------+
-     |      |      |  ...  |      |      |   ...  |      |      |
-     +------+------+-     -+------+------+-      -+------+------+
-
-			   M64 window
-
-		Figure 1.1 Map VF(n) BAR space + extra
-
-  Allocating the extra space ensures that the entire M64 window will be
-  assigned to this one SR-IOV device and none of the space will be
-  available for other devices.  Note that this only expands the space
-  reserved in software; there are still only total_VFs VFs, and they only
-  respond to segments [0, total_VFs - 1].  There's nothing in hardware that
-  responds to segments [total_VFs, 255].
-
-4. Implications for the Generic PCI Code
-
-The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
-aligned to the size of an individual VF BAR.
-
-In IODA2, the MMIO address determines the PE#.  If the address is in an M32
-window, we can set the PE# by updating the table that translates segments
-to PE#s.  Similarly, if the address is in an unsegmented M64 window, we can
-set the PE# for the window.  But if it's in a segmented M64 window, the
-segment number is the PE#.
-
-Therefore, the only way to control the PE# for a VF is to change the base
-of the VF(n) BAR space in the VF BAR.  If the PCI core allocates the exact
-amount of space required for the VF(n) BAR space, the VF BAR value is fixed
-and cannot be changed.
-
-On the other hand, if the PCI core allocates additional space, the VF BAR
-value can be changed as long as the entire VF(n) BAR space remains inside
-the space allocated by the core.
-
-Ideally the segment size will be the same as an individual VF BAR size.
-Then each VF will be in its own PE.  The VF BARs (and therefore the PE#s)
-are contiguous.  If VF0 is in PE(x), then VF(n) is in PE(x+n).  If we
-allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
-
-If the segment size is smaller than the VF BAR size, it will take several
-segments to cover a VF BAR, and a VF will be in several PEs.  This is
-possible, but the isolation isn't as good, and it reduces the number of PE#
-choices because instead of consuming only numVFs segments, the VF(n) BAR
-space will consume (numVFs * n) segments.  That means there aren't as many
-available segments for adjusting base of the VF(n) BAR space.
diff --git a/Documentation/powerpc/pmu-ebb.rst b/Documentation/powerpc/pmu-ebb.rst
new file mode 100644
index 000000000000..4f474758eb55
--- /dev/null
+++ b/Documentation/powerpc/pmu-ebb.rst
@@ -0,0 +1,138 @@
+========================
+PMU Event Based Branches
+========================
+
+Event Based Branches (EBBs) are a feature which allows the hardware to
+branch directly to a specified user space address when certain events occur.
+
+The full specification is available in Power ISA v2.07:
+
+  https://www.power.org/documentation/power-isa-version-2-07/
+
+One type of event for which EBBs can be configured is PMU exceptions. This
+document describes the API for configuring the Power PMU to generate EBBs,
+using the Linux perf_events API.
+
+
+Terminology
+-----------
+
+Throughout this document we will refer to an "EBB event" or "EBB events". This
+just refers to a struct perf_event which has set the "EBB" flag in its
+attr.config. All events which can be configured on the hardware PMU are
+possible "EBB events".
+
+
+Background
+----------
+
+When a PMU EBB occurs it is delivered to the currently running process. As such
+EBBs can only sensibly be used by programs for self-monitoring.
+
+It is a feature of the perf_events API that events can be created on other
+processes, subject to standard permission checks. This is also true of EBB
+events, however unless the target process enables EBBs (via mtspr(BESCR)) no
+EBBs will ever be delivered.
+
+This makes it possible for a process to enable EBBs for itself, but not
+actually configure any events. At a later time another process can come along
+and attach an EBB event to the process, which will then cause EBBs to be
+delivered to the first process. It's not clear if this is actually useful.
+
+
+When the PMU is configured for EBBs, all PMU interrupts are delivered to the
+user process. This means once an EBB event is scheduled on the PMU, no non-EBB
+events can be configured. This means that EBB events can not be run
+concurrently with regular 'perf' commands, or any other perf events.
+
+It is however safe to run 'perf' commands on a process which is using EBBs. The
+kernel will in general schedule the EBB event, and perf will be notified that
+its events could not run.
+
+The exclusion between EBB events and regular events is implemented using the
+existing "pinned" and "exclusive" attributes of perf_events. This means EBB
+events will be given priority over other events, unless they are also pinned.
+If an EBB event and a regular event are both pinned, then whichever is enabled
+first will be scheduled and the other will be put in error state. See the
+section below titled "Enabling an EBB event" for more information.
+
+
+Creating an EBB event
+---------------------
+
+To request that an event is counted using EBB, the event code should have bit
+63 set.
+
+EBB events must be created with a particular, and restrictive, set of
+attributes - this is so that they interoperate correctly with the rest of the
+perf_events subsystem.
+
+An EBB event must be created with the "pinned" and "exclusive" attributes set.
+Note that if you are creating a group of EBB events, only the leader can have
+these attributes set.
+
+An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
+"enable_on_exec" attributes.
+
+An EBB event must be attached to a task. This is specified to perf_event_open()
+by passing a pid value, typically 0 indicating the current task.
+
+All events in a group must agree on whether they want EBB. That is all events
+must request EBB, or none may request EBB.
+
+EBB events must specify the PMC they are to be counted on. This ensures
+userspace is able to reliably determine which PMC the event is scheduled on.
+
+
+Enabling an EBB event
+---------------------
+
+Once an EBB event has been successfully opened, it must be enabled with the
+perf_events API. This can be achieved either via the ioctl() interface, or the
+prctl() interface.
+
+However, due to the design of the perf_events API, enabling an event does not
+guarantee that it has been scheduled on the PMU. To ensure that the EBB event
+has been scheduled on the PMU, you must perform a read() on the event. If the
+read() returns EOF, then the event has not been scheduled and EBBs are not
+enabled.
+
+This behaviour occurs because the EBB event is pinned and exclusive. When the
+EBB event is enabled it will force all other non-pinned events off the PMU. In
+this case the enable will be successful. However if there is already an event
+pinned on the PMU then the enable will not be successful.
+
+
+Reading an EBB event
+--------------------
+
+It is possible to read() from an EBB event. However the results are
+meaningless. Because interrupts are being delivered to the user process the
+kernel is not able to count the event, and so will return a junk value.
+
+
+Closing an EBB event
+--------------------
+
+When an EBB event is finished with, you can close it using close() as for any
+regular event. If this is the last EBB event the PMU will be deconfigured and
+no further PMU EBBs will be delivered.
+
+
+EBB Handler
+-----------
+
+The EBB handler is just regular userspace code, however it must be written in
+the style of an interrupt handler. When the handler is entered all registers
+are live (possibly) and so must be saved somehow before the handler can invoke
+other code.
+
+It's up to the program how to handle this. For C programs a relatively simple
+option is to create an interrupt frame on the stack and save registers there.
+
+Fork
+----
+
+EBB events are not inherited across fork. If the child process wishes to use
+EBBs it should open a new event for itself. Similarly the EBB state in
+BESCR/EBBHR/EBBRR is cleared across fork().
diff --git a/Documentation/powerpc/pmu-ebb.txt b/Documentation/powerpc/pmu-ebb.txt
deleted file mode 100644
index 73cd163dbfb8..000000000000
--- a/Documentation/powerpc/pmu-ebb.txt
+++ /dev/null
@@ -1,137 +0,0 @@
-PMU Event Based Branches
-========================
-
-Event Based Branches (EBBs) are a feature which allows the hardware to
-branch directly to a specified user space address when certain events occur.
-
-The full specification is available in Power ISA v2.07:
-
-  https://www.power.org/documentation/power-isa-version-2-07/
-
-One type of event for which EBBs can be configured is PMU exceptions. This
-document describes the API for configuring the Power PMU to generate EBBs,
-using the Linux perf_events API.
-
-
-Terminology
------------
-
-Throughout this document we will refer to an "EBB event" or "EBB events". This
-just refers to a struct perf_event which has set the "EBB" flag in its
-attr.config. All events which can be configured on the hardware PMU are
-possible "EBB events".
-
-
-Background
-----------
-
-When a PMU EBB occurs it is delivered to the currently running process. As such
-EBBs can only sensibly be used by programs for self-monitoring.
-
-It is a feature of the perf_events API that events can be created on other
-processes, subject to standard permission checks. This is also true of EBB
-events, however unless the target process enables EBBs (via mtspr(BESCR)) no
-EBBs will ever be delivered.
-
-This makes it possible for a process to enable EBBs for itself, but not
-actually configure any events. At a later time another process can come along
-and attach an EBB event to the process, which will then cause EBBs to be
-delivered to the first process. It's not clear if this is actually useful.
-
-
-When the PMU is configured for EBBs, all PMU interrupts are delivered to the
-user process. This means once an EBB event is scheduled on the PMU, no non-EBB
-events can be configured. This means that EBB events can not be run
-concurrently with regular 'perf' commands, or any other perf events.
-
-It is however safe to run 'perf' commands on a process which is using EBBs. The
-kernel will in general schedule the EBB event, and perf will be notified that
-its events could not run.
-
-The exclusion between EBB events and regular events is implemented using the
-existing "pinned" and "exclusive" attributes of perf_events. This means EBB
-events will be given priority over other events, unless they are also pinned.
-If an EBB event and a regular event are both pinned, then whichever is enabled
-first will be scheduled and the other will be put in error state. See the
-section below titled "Enabling an EBB event" for more information.
-
-
-Creating an EBB event
----------------------
-
-To request that an event is counted using EBB, the event code should have bit
-63 set.
-
-EBB events must be created with a particular, and restrictive, set of
-attributes - this is so that they interoperate correctly with the rest of the
-perf_events subsystem.
-
-An EBB event must be created with the "pinned" and "exclusive" attributes set.
-Note that if you are creating a group of EBB events, only the leader can have
-these attributes set.
-
-An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
-"enable_on_exec" attributes.
-
-An EBB event must be attached to a task. This is specified to perf_event_open()
-by passing a pid value, typically 0 indicating the current task.
-
-All events in a group must agree on whether they want EBB. That is all events
-must request EBB, or none may request EBB.
-
-EBB events must specify the PMC they are to be counted on. This ensures
-userspace is able to reliably determine which PMC the event is scheduled on.
-
-
-Enabling an EBB event
----------------------
-
-Once an EBB event has been successfully opened, it must be enabled with the
-perf_events API. This can be achieved either via the ioctl() interface, or the
-prctl() interface.
-
-However, due to the design of the perf_events API, enabling an event does not
-guarantee that it has been scheduled on the PMU. To ensure that the EBB event
-has been scheduled on the PMU, you must perform a read() on the event. If the
-read() returns EOF, then the event has not been scheduled and EBBs are not
-enabled.
-
-This behaviour occurs because the EBB event is pinned and exclusive. When the
-EBB event is enabled it will force all other non-pinned events off the PMU. In
-this case the enable will be successful. However if there is already an event
-pinned on the PMU then the enable will not be successful.
-
-
-Reading an EBB event
---------------------
-
-It is possible to read() from an EBB event. However the results are
-meaningless. Because interrupts are being delivered to the user process the
-kernel is not able to count the event, and so will return a junk value.
-
-
-Closing an EBB event
---------------------
-
-When an EBB event is finished with, you can close it using close() as for any
-regular event. If this is the last EBB event the PMU will be deconfigured and
-no further PMU EBBs will be delivered.
-
-
-EBB Handler
------------
-
-The EBB handler is just regular userspace code, however it must be written in
-the style of an interrupt handler. When the handler is entered all registers
-are live (possibly) and so must be saved somehow before the handler can invoke
-other code.
-
-It's up to the program how to handle this. For C programs a relatively simple
-option is to create an interrupt frame on the stack and save registers there.
-
-Fork
-----
-
-EBB events are not inherited across fork. If the child process wishes to use
-EBBs it should open a new event for itself. Similarly the EBB state in
-BESCR/EBBHR/EBBRR is cleared across fork().
diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst
new file mode 100644
index 000000000000..864d4b6dddd1
--- /dev/null
+++ b/Documentation/powerpc/ptrace.rst
@@ -0,0 +1,156 @@
+======
+Ptrace
+======
+
+GDB intends to support the following hardware debug features of BookE
+processors:
+
+4 hardware breakpoints (IAC)
+2 hardware watchpoints (read, write and read-write) (DAC)
+2 value conditions for the hardware watchpoints (DVC)
+
+For that, we need to extend ptrace so that GDB can query and set these
+resources. Since we're extending, we're trying to create an interface
+that's extendable and that covers both BookE and server processors, so
+that GDB doesn't need to special-case each of them. We added the
+following 3 new ptrace requests.
+
+1. PTRACE_PPC_GETHWDEBUGINFO
+============================
+
+Query for GDB to discover the hardware debug features. The main info to
+be returned here is the minimum alignment for the hardware watchpoints.
+BookE processors don't have restrictions here, but server processors have
+an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
+adding special cases to GDB based on what it sees in AUXV.
+
+Since we're at it, we added other useful info that the kernel can return to
+GDB: this query will return the number of hardware breakpoints, hardware
+watchpoints and whether it supports a range of addresses and a condition.
+The query will fill the following structure provided by the requesting process::
+
+  struct ppc_debug_info {
+       unit32_t version;
+       unit32_t num_instruction_bps;
+       unit32_t num_data_bps;
+       unit32_t num_condition_regs;
+       unit32_t data_bp_alignment;
+       unit32_t sizeof_condition; /* size of the DVC register */
+       uint64_t features; /* bitmask of the individual flags */
+  };
+
+features will have bits indicating whether there is support for::
+
+  #define PPC_DEBUG_FEATURE_INSN_BP_RANGE		0x1
+  #define PPC_DEBUG_FEATURE_INSN_BP_MASK		0x2
+  #define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x4
+  #define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x8
+  #define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
+
+2. PTRACE_SETHWDEBUG
+
+Sets a hardware breakpoint or watchpoint, according to the provided structure::
+
+  struct ppc_hw_breakpoint {
+        uint32_t version;
+  #define PPC_BREAKPOINT_TRIGGER_EXECUTE  0x1
+  #define PPC_BREAKPOINT_TRIGGER_READ     0x2
+ #define PPC_BREAKPOINT_TRIGGER_WRITE    0x4
+        uint32_t trigger_type;       /* only some combinations allowed */
+  #define PPC_BREAKPOINT_MODE_EXACT               0x0
+  #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE     0x1
+  #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE     0x2
+  #define PPC_BREAKPOINT_MODE_MASK                0x3
+        uint32_t addr_mode;          /* address match mode */
+
+  #define PPC_BREAKPOINT_CONDITION_MODE   0x3
+  #define PPC_BREAKPOINT_CONDITION_NONE   0x0
+  #define PPC_BREAKPOINT_CONDITION_AND    0x1
+  #define PPC_BREAKPOINT_CONDITION_EXACT  0x1	/* different name for the same thing as above */
+  #define PPC_BREAKPOINT_CONDITION_OR     0x2
+  #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
+  #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000	/* byte enable bits */
+  #define PPC_BREAKPOINT_CONDITION_BE(n)  (1<<((n)+16))
+        uint32_t condition_mode;     /* break/watchpoint condition flags */
+
+        uint64_t addr;
+        uint64_t addr2;
+        uint64_t condition_value;
+  };
+
+A request specifies one event, not necessarily just one register to be set.
+For instance, if the request is for a watchpoint with a condition, both the
+DAC and DVC registers will be set in the same request.
+
+With this GDB can ask for all kinds of hardware breakpoints and watchpoints
+that the BookE supports. COMEFROM breakpoints available in server processors
+are not contemplated, but that is out of the scope of this work.
+
+ptrace will return an integer (handle) uniquely identifying the breakpoint or
+watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+request to ask for its removal. Return -ENOSPC if the requested breakpoint
+can't be allocated on the registers.
+
+Some examples of using the structure to:
+
+- set a breakpoint in the first breakpoint register::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = 0;
+
+- set a watchpoint which triggers on reads in the second watchpoint register::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = 0;
+
+- set a watchpoint which triggers only with a specific value::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
+    p.addr            = (uint64_t) address;
+    p.addr2           = 0;
+    p.condition_value = (uint64_t) condition;
+
+- set a ranged hardware breakpoint::
+
+    p.version         = PPC_DEBUG_CURRENT_VERSION;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) begin_range;
+    p.addr2           = (uint64_t) end_range;
+    p.condition_value = 0;
+
+- set a watchpoint in server processors (BookS)::
+
+    p.version         = 1;
+    p.trigger_type    = PPC_BREAKPOINT_TRIGGER_RW;
+    p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+    or
+    p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
+
+    p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
+    p.addr            = (uint64_t) begin_range;
+    /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+     * addr2 - addr <= 8 Bytes.
+     */
+    p.addr2           = (uint64_t) end_range;
+    p.condition_value = 0;
+
+3. PTRACE_DELHWDEBUG
+
+Takes an integer which identifies an existing breakpoint or watchpoint
+(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
+corresponding breakpoint or watchpoint..
diff --git a/Documentation/powerpc/ptrace.txt b/Documentation/powerpc/ptrace.txt
deleted file mode 100644
index 99c5ce88d0fe..000000000000
--- a/Documentation/powerpc/ptrace.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-GDB intends to support the following hardware debug features of BookE
-processors:
-
-4 hardware breakpoints (IAC)
-2 hardware watchpoints (read, write and read-write) (DAC)
-2 value conditions for the hardware watchpoints (DVC)
-
-For that, we need to extend ptrace so that GDB can query and set these
-resources. Since we're extending, we're trying to create an interface
-that's extendable and that covers both BookE and server processors, so
-that GDB doesn't need to special-case each of them. We added the
-following 3 new ptrace requests.
-
-1. PTRACE_PPC_GETHWDEBUGINFO
-
-Query for GDB to discover the hardware debug features. The main info to
-be returned here is the minimum alignment for the hardware watchpoints.
-BookE processors don't have restrictions here, but server processors have
-an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
-adding special cases to GDB based on what it sees in AUXV.
-
-Since we're at it, we added other useful info that the kernel can return to
-GDB: this query will return the number of hardware breakpoints, hardware
-watchpoints and whether it supports a range of addresses and a condition.
-The query will fill the following structure provided by the requesting process:
-
-struct ppc_debug_info {
-       unit32_t version;
-       unit32_t num_instruction_bps;
-       unit32_t num_data_bps;
-       unit32_t num_condition_regs;
-       unit32_t data_bp_alignment;
-       unit32_t sizeof_condition; /* size of the DVC register */
-       uint64_t features; /* bitmask of the individual flags */
-};
-
-features will have bits indicating whether there is support for:
-
-#define PPC_DEBUG_FEATURE_INSN_BP_RANGE		0x1
-#define PPC_DEBUG_FEATURE_INSN_BP_MASK		0x2
-#define PPC_DEBUG_FEATURE_DATA_BP_RANGE		0x4
-#define PPC_DEBUG_FEATURE_DATA_BP_MASK		0x8
-#define PPC_DEBUG_FEATURE_DATA_BP_DAWR		0x10
-
-2. PTRACE_SETHWDEBUG
-
-Sets a hardware breakpoint or watchpoint, according to the provided structure:
-
-struct ppc_hw_breakpoint {
-        uint32_t version;
-#define PPC_BREAKPOINT_TRIGGER_EXECUTE  0x1
-#define PPC_BREAKPOINT_TRIGGER_READ     0x2
-#define PPC_BREAKPOINT_TRIGGER_WRITE    0x4
-        uint32_t trigger_type;       /* only some combinations allowed */
-#define PPC_BREAKPOINT_MODE_EXACT               0x0
-#define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE     0x1
-#define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE     0x2
-#define PPC_BREAKPOINT_MODE_MASK                0x3
-        uint32_t addr_mode;          /* address match mode */
-
-#define PPC_BREAKPOINT_CONDITION_MODE   0x3
-#define PPC_BREAKPOINT_CONDITION_NONE   0x0
-#define PPC_BREAKPOINT_CONDITION_AND    0x1
-#define PPC_BREAKPOINT_CONDITION_EXACT  0x1	/* different name for the same thing as above */
-#define PPC_BREAKPOINT_CONDITION_OR     0x2
-#define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
-#define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000	/* byte enable bits */
-#define PPC_BREAKPOINT_CONDITION_BE(n)  (1<<((n)+16))
-        uint32_t condition_mode;     /* break/watchpoint condition flags */
-
-        uint64_t addr;
-        uint64_t addr2;
-        uint64_t condition_value;
-};
-
-A request specifies one event, not necessarily just one register to be set.
-For instance, if the request is for a watchpoint with a condition, both the
-DAC and DVC registers will be set in the same request.
-
-With this GDB can ask for all kinds of hardware breakpoints and watchpoints
-that the BookE supports. COMEFROM breakpoints available in server processors
-are not contemplated, but that is out of the scope of this work.
-
-ptrace will return an integer (handle) uniquely identifying the breakpoint or
-watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
-request to ask for its removal. Return -ENOSPC if the requested breakpoint
-can't be allocated on the registers.
-
-Some examples of using the structure to:
-
-- set a breakpoint in the first breakpoint register
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = 0;
-
-- set a watchpoint which triggers on reads in the second watchpoint register
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = 0;
-
-- set a watchpoint which triggers only with a specific value
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_READ;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
-  p.addr            = (uint64_t) address;
-  p.addr2           = 0;
-  p.condition_value = (uint64_t) condition;
-
-- set a ranged hardware breakpoint
-
-  p.version         = PPC_DEBUG_CURRENT_VERSION;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_EXECUTE;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) begin_range;
-  p.addr2           = (uint64_t) end_range;
-  p.condition_value = 0;
-
-- set a watchpoint in server processors (BookS)
-
-  p.version         = 1;
-  p.trigger_type    = PPC_BREAKPOINT_TRIGGER_RW;
-  p.addr_mode       = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-  or
-  p.addr_mode       = PPC_BREAKPOINT_MODE_EXACT;
-
-  p.condition_mode  = PPC_BREAKPOINT_CONDITION_NONE;
-  p.addr            = (uint64_t) begin_range;
-  /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
-   * addr2 - addr <= 8 Bytes.
-   */
-  p.addr2           = (uint64_t) end_range;
-  p.condition_value = 0;
-
-3. PTRACE_DELHWDEBUG
-
-Takes an integer which identifies an existing breakpoint or watchpoint
-(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
-corresponding breakpoint or watchpoint..
diff --git a/Documentation/powerpc/qe_firmware.rst b/Documentation/powerpc/qe_firmware.rst
new file mode 100644
index 000000000000..42f5103140c9
--- /dev/null
+++ b/Documentation/powerpc/qe_firmware.rst
@@ -0,0 +1,296 @@
+=========================================
+Freescale QUICC Engine Firmware Uploading
+=========================================
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+    Freescale Semiconductor
+
+.. Table of Contents
+
+   I - Software License for Firmware
+
+   II - Microcode Availability
+
+   III - Description and Terminology
+
+   IV - Microcode Programming Details
+
+   V - Firmware Structure Layout
+
+   VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license.  For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels.  Some are available on
+http://opensource.freescale.com.  For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+=================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+	1) describes the microcode's purpose
+	2) describes how and where to upload the microcode
+	3) specifies the values of various registers
+	4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor.  To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+   IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+   needs split I-RAM.  Split I-RAM is only meaningful for SOCs that have
+   QEs with multiple RISC processors, such as the 8360.  Splitting the I-RAM
+   allows each processor to run a different microcode, effectively creating an
+   asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+   in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+   data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode.  These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated.  This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined.  This table describes the structure::
+
+	---------------------------------------------------------------
+	| Offset in |                  | Destination Offset | Size of |
+	|   array   |     Protocol     |   within PRAM      | Operand |
+	--------------------------------------------------------------|
+	|     0     | Ethernet         |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     4     | ATM              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     8     | PPP              |      0xF8          | 4 bytes |
+	|           | interworking     |                    |         |
+	---------------------------------------------------------------
+	|     12    | Ethernet RX      |      0x22          | 1 byte  |
+	|           | Distributor Page |                    |         |
+	---------------------------------------------------------------
+	|     16    | ATM Globtal      |      0x28          | 1 byte  |
+	|           | Params Table     |                    |         |
+	---------------------------------------------------------------
+	|     20    | Insert Frame     |      0xF8          | 4 bytes |
+	---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the software drivers.  Each bit has its own impact
+and has special instructions for the s/w associated with it.  This structure is
+described in this table::
+
+	-----------------------------------------------------------------------
+	| Bit #  |     Name     |   Description                               |
+	-----------------------------------------------------------------------
+	|   0    | General      | Indicates that prior to each host command   |
+	|        | push command | given by the application, the software must |
+	|        |              | assert a special host command (push command)|
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   1    | UCC ATM      | Indicates that after issuing ATM RX INIT    |
+	|        | RX INIT      | command, the host must issue another special|
+	|        | push command | command (push command) and immediately      |
+	|        |              | following that re-issue the ATM RX INIT     |
+	|        |              | command. (This makes the sequence of        |
+	|        |              | initializing the ATM receiver a sequence of |
+	|        |              | three host commands)                        |
+	|        |              | CECDR = 0x00800000.                         |
+	|        |              | CECR = 0x01c1000f.                          |
+	-----------------------------------------------------------------------
+	|   2    | Add/remove   | Indicates that following the specific host  |
+	|        | command      | command: "Add/Remove entry in Hash Lookup   |
+	|        | validation   | Table" used in Interworking setup, the user |
+	|        |              | must issue another command.                 |
+	|        |              | CECDR = 0xce000003.                         |
+	|        |              | CECR = 0x01c10f58.                          |
+	-----------------------------------------------------------------------
+	|   3    | General push | Indicates that the s/w has to initialize    |
+	|        | command      | some pointers in the Ethernet thread pages  |
+	|        |              | which are used when Header Compression is   |
+	|        |              | activated.  The full details of these       |
+	|        |              | pointers is located in the software drivers.|
+	-----------------------------------------------------------------------
+	|   4    | General push | Indicates that after issuing Ethernet TX    |
+	|        | command      | INIT command, user must issue this command  |
+	|        |              | for each SNUM of Ethernet TX thread.        |
+	|        |              | CECDR = 0x00800003.                         |
+	|        |              | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM},  |
+	|        |              |        1'b{1}, 12'b{0}, 4'b{1}              |
+	-----------------------------------------------------------------------
+	| 5 - 31 |     N/A      | Reserved, set to zero.                      |
+	-----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file.  This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode.  The format of these files
+do not lend themselves to simple inclusion into other code.  Hence,
+the need for a more portable format.  This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob.  This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian.  See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself.  To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+	The 'length' field is the size, in bytes, of the entire structure,
+	including all the microcode embedded in it, as well as the CRC (if
+	present).
+
+	The 'magic' field is an array of three bytes that contains the letters
+	'Q', 'E', and 'F'.  This is an identifier that indicates that this
+	structure is a QE Firmware structure.
+
+	The 'version' field is a single byte that indicates the version of this
+	structure.  If the layout of the structure should ever need to be
+	changed to add support for additional types of microcode, then the
+	version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures.  There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself.  Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match.  However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbers,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0::
+
+     soc.model = 8323
+     soc.major = 1
+     soc.minor = 0
+
+'padding' is necessary for structure alignment.  This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers.  Each bit has its own impact and has special
+instructions for the driver associated with it.  This field is stored in
+the QE library and available to any driver that calles qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps.  As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calles qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+	For each RISC processor there is one 'microcode' structure.  The first
+	'microcode' structure is for the first RISC, and so on.
+
+	The 'id' field is a null-terminated string suitable for printing that
+	identifies this particular microcode.
+
+	'traps' is an array of 16 words that contain hardware trap values
+	for each of the 16 traps.  If trap[i] is 0, then this particular
+	trap is to be ignored (i.e. not written to TIBCR[i]).  The entire value
+	is written as-is to the TIBCR[i] register, so be sure to set the EN
+	and T_IBP bits if necessary.
+
+	'eccr' is the value to program into the ECCR register.
+
+	'iram_offset' is the offset into IRAM to start writing the
+	microcode.
+
+	'count' is the number of 32-bit words in the microcode.
+
+	'code_offset' is the offset, in bytes, from the beginning of this
+	structure where the microcode itself can be found.  The first
+	microcode binary should be located immediately after the 'microcode'
+	array.
+
+	'major', 'minor', and 'revision' are the major, minor, and revision
+	version numbers, respectively, of the microcode.  If all values are 0,
+	then these fields are ignored.
+
+	'reserved' is necessary for structure alignment.  Since 'microcode'
+	is an array, the 64-bit 'extended_modes' field needs to be aligned
+	on a 64-bit boundary, and this can only happen if the size of
+	'microcode' is a multiple of 8 bytes.  To ensure that, we add
+	'reserved'.
+
+After the last microcode is a 32-bit CRC.  It can be calculated using
+this algorithm::
+
+  u32 crc32(const u8 *p, unsigned int len)
+  {
+	unsigned int i;
+	u32 crc = 0;
+
+	while (len--) {
+	   crc ^= *p++;
+	   for (i = 0; i < 8; i++)
+		   crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+	}
+	return crc;
+  }
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
deleted file mode 100644
index e7ac24aec4ff..000000000000
--- a/Documentation/powerpc/qe_firmware.txt
+++ /dev/null
@@ -1,295 +0,0 @@
-	   Freescale QUICC Engine Firmware Uploading
-	   -----------------------------------------
-
-(c) 2007 Timur Tabi <timur at freescale.com>,
-    Freescale Semiconductor
-
-Table of Contents
-=================
-
-  I - Software License for Firmware
-
-  II - Microcode Availability
-
-  III - Description and Terminology
-
-  IV - Microcode Programming Details
-
-  V - Firmware Structure Layout
-
-  VI - Sample Code for Creating Firmware Files
-
-Revision Information
-====================
-
-November 30, 2007: Rev 1.0 - Initial version
-
-I - Software License for Firmware
-=================================
-
-Each firmware file comes with its own software license.  For information on
-the particular license, please see the license text that is distributed with
-the firmware.
-
-II - Microcode Availability
-===========================
-
-Firmware files are distributed through various channels.  Some are available on
-http://opensource.freescale.com.  For other firmware files, please contact
-your Freescale representative or your operating system vendor.
-
-III - Description and Terminology
-================================
-
-In this document, the term 'microcode' refers to the sequence of 32-bit
-integers that compose the actual QE microcode.
-
-The term 'firmware' refers to a binary blob that contains the microcode as
-well as other data that
-
-	1) describes the microcode's purpose
-	2) describes how and where to upload the microcode
-	3) specifies the values of various registers
-	4) includes additional data for use by specific device drivers
-
-Firmware files are binary files that contain only a firmware.
-
-IV - Microcode Programming Details
-===================================
-
-The QE architecture allows for only one microcode present in I-RAM for each
-RISC processor.  To replace any current microcode, a full QE reset (which
-disables the microcode) must be performed first.
-
-QE microcode is uploaded using the following procedure:
-
-1) The microcode is placed into I-RAM at a specific location, using the
-   IRAM.IADD and IRAM.IDATA registers.
-
-2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
-   needs split I-RAM.  Split I-RAM is only meaningful for SOCs that have
-   QEs with multiple RISC processors, such as the 8360.  Splitting the I-RAM
-   allows each processor to run a different microcode, effectively creating an
-   asymmetric multiprocessing (AMP) system.
-
-3) The TIBCR trap registers are loaded with the addresses of the trap handlers
-   in the microcode.
-
-4) The RSP.ECCR register is programmed with the value provided.
-
-5) If necessary, device drivers that need the virtual traps and extended mode
-   data will use them.
-
-Virtual Microcode Traps
-
-These virtual traps are conditional branches in the microcode.  These are
-"soft" provisional introduced in the ROMcode in order to enable higher
-flexibility and save h/w traps If new features are activated or an issue is
-being fixed in the RAM package utilizing they should be activated.  This data
-structure signals the microcode which of these virtual traps is active.
-
-This structure contains 6 words that the application should copy to some
-specific been defined.  This table describes the structure.
-
-	---------------------------------------------------------------
-	| Offset in |                  | Destination Offset | Size of |
-	|   array   |     Protocol     |   within PRAM      | Operand |
-	--------------------------------------------------------------|
-	|     0     | Ethernet         |      0xF8          | 4 bytes |
-	|           | interworking     |                    |         |
-	---------------------------------------------------------------
-	|     4     | ATM              |      0xF8          | 4 bytes |
-	|           | interworking     |                    |         |
-	---------------------------------------------------------------
-	|     8     | PPP              |      0xF8          | 4 bytes |
-	|           | interworking     |                    |         |
-	---------------------------------------------------------------
-	|     12    | Ethernet RX      |      0x22          | 1 byte  |
-	|           | Distributor Page |                    |         |
-	---------------------------------------------------------------
-	|     16    | ATM Globtal      |      0x28          | 1 byte  |
-	|           | Params Table     |                    |         |
-	---------------------------------------------------------------
-	|     20    | Insert Frame     |      0xF8          | 4 bytes |
-	---------------------------------------------------------------
-
-
-Extended Modes
-
-This is a double word bit array (64 bits) that defines special functionality
-which has an impact on the software drivers.  Each bit has its own impact
-and has special instructions for the s/w associated with it.  This structure is
-described in this table:
-
-	-----------------------------------------------------------------------
-	| Bit #  |     Name     |   Description                               |
-	-----------------------------------------------------------------------
-	|   0    | General      | Indicates that prior to each host command   |
-	|        | push command | given by the application, the software must |
-	|        |              | assert a special host command (push command)|
-	|        |              | CECDR = 0x00800000.                         |
-	|        |              | CECR = 0x01c1000f.                          |
-	-----------------------------------------------------------------------
-	|   1    | UCC ATM      | Indicates that after issuing ATM RX INIT    |
-	|        | RX INIT      | command, the host must issue another special|
-	|        | push command | command (push command) and immediately      |
-	|        |              | following that re-issue the ATM RX INIT     |
-	|        |              | command. (This makes the sequence of        |
-	|        |              | initializing the ATM receiver a sequence of |
-	|        |              | three host commands)                        |
-	|        |              | CECDR = 0x00800000.                         |
-	|        |              | CECR = 0x01c1000f.                          |
-	-----------------------------------------------------------------------
-	|   2    | Add/remove   | Indicates that following the specific host  |
-	|        | command      | command: "Add/Remove entry in Hash Lookup   |
-	|        | validation   | Table" used in Interworking setup, the user |
-	|        |              | must issue another command.                 |
-	|        |              | CECDR = 0xce000003.                         |
-	|        |              | CECR = 0x01c10f58.                          |
-	-----------------------------------------------------------------------
-	|   3    | General push | Indicates that the s/w has to initialize    |
-	|        | command      | some pointers in the Ethernet thread pages  |
-	|        |              | which are used when Header Compression is   |
-	|        |              | activated.  The full details of these       |
-	|        |              | pointers is located in the software drivers.|
-	-----------------------------------------------------------------------
-	|   4    | General push | Indicates that after issuing Ethernet TX    |
-	|        | command      | INIT command, user must issue this command  |
-	|        |              | for each SNUM of Ethernet TX thread.        |
-	|        |              | CECDR = 0x00800003.                         |
-	|        |              | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM},  |
-	|        |              |        1'b{1}, 12'b{0}, 4'b{1}              |
-	-----------------------------------------------------------------------
-	| 5 - 31 |     N/A      | Reserved, set to zero.                      |
-	-----------------------------------------------------------------------
-
-V - Firmware Structure Layout
-==============================
-
-QE microcode from Freescale is typically provided as a header file.  This
-header file contains macros that define the microcode binary itself as well as
-some other data used in uploading that microcode.  The format of these files
-do not lend themselves to simple inclusion into other code.  Hence,
-the need for a more portable format.  This section defines that format.
-
-Instead of distributing a header file, the microcode and related data are
-embedded into a binary blob.  This blob is passed to the qe_upload_firmware()
-function, which parses the blob and performs everything necessary to upload
-the microcode.
-
-All integers are big-endian.  See the comments for function
-qe_upload_firmware() for up-to-date implementation information.
-
-This structure supports versioning, where the version of the structure is
-embedded into the structure itself.  To ensure forward and backwards
-compatibility, all versions of the structure must use the same 'qe_header'
-structure at the beginning.
-
-'header' (type: struct qe_header):
-	The 'length' field is the size, in bytes, of the entire structure,
-	including all the microcode embedded in it, as well as the CRC (if
-	present).
-
-	The 'magic' field is an array of three bytes that contains the letters
-	'Q', 'E', and 'F'.  This is an identifier that indicates that this
-	structure is a QE Firmware structure.
-
-	The 'version' field is a single byte that indicates the version of this
-	structure.  If the layout of the structure should ever need to be
-	changed to add support for additional types of microcode, then the
-	version number should also be changed.
-
-The 'id' field is a null-terminated string(suitable for printing) that
-identifies the firmware.
-
-The 'count' field indicates the number of 'microcode' structures.  There
-must be one and only one 'microcode' structure for each RISC processor.
-Therefore, this field also represents the number of RISC processors for this
-SOC.
-
-The 'soc' structure contains the SOC numbers and revisions used to match
-the microcode to the SOC itself.  Normally, the microcode loader should
-check the data in this structure with the SOC number and revisions, and
-only upload the microcode if there's a match.  However, this check is not
-made on all platforms.
-
-Although it is not recommended, you can specify '0' in the soc.model
-field to skip matching SOCs altogether.
-
-The 'model' field is a 16-bit number that matches the actual SOC. The
-'major' and 'minor' fields are the major and minor revision numbers,
-respectively, of the SOC.
-
-For example, to match the 8323, revision 1.0:
-     soc.model = 8323
-     soc.major = 1
-     soc.minor = 0
-
-'padding' is necessary for structure alignment.  This field ensures that the
-'extended_modes' field is aligned on a 64-bit boundary.
-
-'extended_modes' is a bitfield that defines special functionality which has an
-impact on the device drivers.  Each bit has its own impact and has special
-instructions for the driver associated with it.  This field is stored in
-the QE library and available to any driver that calles qe_get_firmware_info().
-
-'vtraps' is an array of 8 words that contain virtual trap values for each
-virtual traps.  As with 'extended_modes', this field is stored in the QE
-library and available to any driver that calles qe_get_firmware_info().
-
-'microcode' (type: struct qe_microcode):
-	For each RISC processor there is one 'microcode' structure.  The first
-	'microcode' structure is for the first RISC, and so on.
-
-	The 'id' field is a null-terminated string suitable for printing that
-	identifies this particular microcode.
-
-	'traps' is an array of 16 words that contain hardware trap values
-	for each of the 16 traps.  If trap[i] is 0, then this particular
-	trap is to be ignored (i.e. not written to TIBCR[i]).  The entire value
-	is written as-is to the TIBCR[i] register, so be sure to set the EN
-	and T_IBP bits if necessary.
-
-	'eccr' is the value to program into the ECCR register.
-
-	'iram_offset' is the offset into IRAM to start writing the
-	microcode.
-
-	'count' is the number of 32-bit words in the microcode.
-
-	'code_offset' is the offset, in bytes, from the beginning of this
-	structure where the microcode itself can be found.  The first
-	microcode binary should be located immediately after the 'microcode'
-	array.
-
-	'major', 'minor', and 'revision' are the major, minor, and revision
-	version numbers, respectively, of the microcode.  If all values are 0,
-	then these fields are ignored.
-
-	'reserved' is necessary for structure alignment.  Since 'microcode'
-	is an array, the 64-bit 'extended_modes' field needs to be aligned
-	on a 64-bit boundary, and this can only happen if the size of
-	'microcode' is a multiple of 8 bytes.  To ensure that, we add
-	'reserved'.
-
-After the last microcode is a 32-bit CRC.  It can be calculated using
-this algorithm:
-
-u32 crc32(const u8 *p, unsigned int len)
-{
-	unsigned int i;
-	u32 crc = 0;
-
-	while (len--) {
-	   crc ^= *p++;
-	   for (i = 0; i < 8; i++)
-		   crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
-	}
-	return crc;
-}
-
-VI - Sample Code for Creating Firmware Files
-============================================
-
-A Python program that creates firmware binaries from the header files normally
-distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
new file mode 100644
index 000000000000..e49f69f941b9
--- /dev/null
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -0,0 +1,110 @@
+===============================================
+Power Architecture 64-bit Linux system call ABI
+===============================================
+
+syscall
+=======
+
+syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI
+specification C function calling sequence, including register preservation
+rules, with the following differences.
+
+.. [1] Some syscalls (typically low-level management functions) may have
+       different calling sequences (e.g., rt_sigreturn).
+
+Parameters and return value
+---------------------------
+The system call number is specified in r0.
+
+There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
+
+Both a return value and a return error code are returned. cr0.SO is the return
+error code, and r3 is the return value or error code. When cr0.SO is clear,
+the syscall succeeded and r3 is the return value. When cr0.SO is set, the
+syscall failed and r3 is the error code that generally corresponds to errno.
+
+Stack
+-----
+System calls do not modify the caller's stack frame. For example, the caller's
+stack frame LR and CR save fields are not used.
+
+Register preservation rules
+---------------------------
+Register preservation rules match the ELF ABI calling sequence with the
+following differences:
+
+=========== ============= ========================================
+r0          Volatile      (System call number.)
+r3          Volatile      (Parameter 1, and return value.)
+r4-r8       Volatile      (Parameters 2-6.)
+cr0         Volatile      (cr0.SO is the return error condition)
+cr1, cr5-7  Nonvolatile
+lr          Nonvolatile
+=========== ============= ========================================
+
+All floating point and vector data registers as well as control and status
+registers are nonvolatile.
+
+Invocation
+----------
+The syscall is performed with the sc instruction, and returns with execution
+continuing at the instruction following the sc instruction.
+
+Transactional Memory
+--------------------
+Syscall behavior can change if the processor is in transactional or suspended
+transaction state, and the syscall can affect the behavior of the transaction.
+
+If the processor is in suspended state when a syscall is made, the syscall
+will be performed as normal, and will return as normal. The syscall will be
+performed in suspended state, so its side effects will be persistent according
+to the usual transactional memory semantics. A syscall may or may not result
+in the transaction being doomed by hardware.
+
+If the processor is in transactional state when a syscall is made, then the
+behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF
+auxiliary vector.
+
+- If present, which is the case for newer kernels, then the syscall will not
+  be performed and the transaction will be doomed by the kernel with the
+  failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR.
+
+- If not present (older kernels), then the kernel will suspend the
+  transactional state and the syscall will proceed as in the case of a
+  suspended state syscall, and will resume the transactional state before
+  returning to the caller. This case is not well defined or supported, so this
+  behavior should not be relied upon.
+
+
+vsyscall
+========
+
+vsyscall calling sequence matches the syscall calling sequence, with the
+following differences. Some vsyscalls may have different calling sequences.
+
+Parameters and return value
+---------------------------
+r0 is not used as an input. The vsyscall is selected by its address.
+
+Stack
+-----
+The vsyscall may or may not use the caller's stack frame save areas.
+
+Register preservation rules
+---------------------------
+
+=========== ========
+r0          Volatile
+cr1, cr5-7  Volatile
+lr          Volatile
+=========== ========
+
+Invocation
+----------
+The vsyscall is performed with a branch-with-link instruction to the vsyscall
+function address.
+
+Transactional Memory
+--------------------
+vsyscalls will run in the same transactional state as the caller. A vsyscall
+may or may not result in the transaction being doomed by hardware.
diff --git a/Documentation/powerpc/syscall64-abi.txt b/Documentation/powerpc/syscall64-abi.txt
deleted file mode 100644
index fa716a0d88bd..000000000000
--- a/Documentation/powerpc/syscall64-abi.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-===============================================
-Power Architecture 64-bit Linux system call ABI
-===============================================
-
-syscall
-=======
-
-syscall calling sequence[*] matches the Power Architecture 64-bit ELF ABI
-specification C function calling sequence, including register preservation
-rules, with the following differences.
-
-[*] Some syscalls (typically low-level management functions) may have
-    different calling sequences (e.g., rt_sigreturn).
-
-Parameters and return value
----------------------------
-The system call number is specified in r0.
-
-There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
-
-Both a return value and a return error code are returned. cr0.SO is the return
-error code, and r3 is the return value or error code. When cr0.SO is clear,
-the syscall succeeded and r3 is the return value. When cr0.SO is set, the
-syscall failed and r3 is the error code that generally corresponds to errno.
-
-Stack
------
-System calls do not modify the caller's stack frame. For example, the caller's
-stack frame LR and CR save fields are not used.
-
-Register preservation rules
----------------------------
-Register preservation rules match the ELF ABI calling sequence with the
-following differences:
-
-r0:         Volatile.   (System call number.)
-r3:         Volatile.   (Parameter 1, and return value.)
-r4-r8:      Volatile.   (Parameters 2-6.)
-cr0:        Volatile    (cr0.SO is the return error condition)
-cr1, cr5-7: Nonvolatile.
-lr:         Nonvolatile.
-
-All floating point and vector data registers as well as control and status
-registers are nonvolatile.
-
-Invocation
-----------
-The syscall is performed with the sc instruction, and returns with execution
-continuing at the instruction following the sc instruction.
-
-Transactional Memory
---------------------
-Syscall behavior can change if the processor is in transactional or suspended
-transaction state, and the syscall can affect the behavior of the transaction.
-
-If the processor is in suspended state when a syscall is made, the syscall
-will be performed as normal, and will return as normal. The syscall will be
-performed in suspended state, so its side effects will be persistent according
-to the usual transactional memory semantics. A syscall may or may not result
-in the transaction being doomed by hardware.
-
-If the processor is in transactional state when a syscall is made, then the
-behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF
-auxiliary vector.
-
-- If present, which is the case for newer kernels, then the syscall will not
-  be performed and the transaction will be doomed by the kernel with the
-  failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR.
-
-- If not present (older kernels), then the kernel will suspend the
-  transactional state and the syscall will proceed as in the case of a
-  suspended state syscall, and will resume the transactional state before
-  returning to the caller. This case is not well defined or supported, so this
-  behavior should not be relied upon.
-
-
-vsyscall
-========
-
-vsyscall calling sequence matches the syscall calling sequence, with the
-following differences. Some vsyscalls may have different calling sequences.
-
-Parameters and return value
----------------------------
-r0 is not used as an input. The vsyscall is selected by its address.
-
-Stack
------
-The vsyscall may or may not use the caller's stack frame save areas.
-
-Register preservation rules
----------------------------
-r0: Volatile.
-cr1, cr5-7: Volatile.
-lr: Volatile.
-
-Invocation
-----------
-The vsyscall is performed with a branch-with-link instruction to the vsyscall
-function address.
-
-Transactional Memory
---------------------
-vsyscalls will run in the same transactional state as the caller. A vsyscall
-may or may not result in the transaction being doomed by hardware.
diff --git a/Documentation/powerpc/transactional_memory.rst b/Documentation/powerpc/transactional_memory.rst
new file mode 100644
index 000000000000..09955103acb4
--- /dev/null
+++ b/Documentation/powerpc/transactional_memory.rst
@@ -0,0 +1,247 @@
+============================
+Transactional Memory support
+============================
+
+POWER kernel support for this feature is currently limited to supporting
+its use by user programs.  It is not currently used by the kernel itself.
+
+This file aims to sum up how it is supported by Linux and what behaviour you
+can expect from your user programs.
+
+
+Basic overview
+==============
+
+Hardware Transactional Memory is supported on POWER8 processors, and is a
+feature that enables a different form of atomic memory access.  Several new
+instructions are presented to delimit transactions; transactions are
+guaranteed to either complete atomically or roll back and undo any partial
+changes.
+
+A simple transaction looks like this::
+
+  begin_move_money:
+    tbegin
+    beq   abort_handler
+
+    ld    r4, SAVINGS_ACCT(r3)
+    ld    r5, CURRENT_ACCT(r3)
+    subi  r5, r5, 1
+    addi  r4, r4, 1
+    std   r4, SAVINGS_ACCT(r3)
+    std   r5, CURRENT_ACCT(r3)
+
+    tend
+
+    b     continue
+
+  abort_handler:
+    ... test for odd failures ...
+
+    /* Retry the transaction if it failed because it conflicted with
+     * someone else: */
+    b     begin_move_money
+
+
+The 'tbegin' instruction denotes the start point, and 'tend' the end point.
+Between these points the processor is in 'Transactional' state; any memory
+references will complete in one go if there are no conflicts with other
+transactional or non-transactional accesses within the system.  In this
+example, the transaction completes as though it were normal straight-line code
+IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
+atomic move of money from the current account to the savings account has been
+performed.  Even though the normal ld/std instructions are used (note no
+lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
+updated, or neither will be updated.
+
+If, in the meantime, there is a conflict with the locations accessed by the
+transaction, the transaction will be aborted by the CPU.  Register and memory
+state will roll back to that at the 'tbegin', and control will continue from
+'tbegin+4'.  The branch to abort_handler will be taken this second time; the
+abort handler can check the cause of the failure, and retry.
+
+Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
+and a few other status/flag regs; see the ISA for details.
+
+Causes of transaction aborts
+============================
+
+- Conflicts with cache lines used by other processors
+- Signals
+- Context switches
+- See the ISA for full documentation of everything that will abort transactions.
+
+
+Syscalls
+========
+
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
+
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel.  However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware.  The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure.  No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
+
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library.  Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
+
+
+Signals
+=======
+
+Delivery of signals (both sync and async) during transactions provides a second
+thread state (ucontext/mcontext) to represent the second transactional register
+state.  Signal delivery 'treclaim's to capture both register states, so signals
+abort transactions.  The usual ucontext_t passed to the signal handler
+represents the checkpointed/original register state; the signal appears to have
+arisen at 'tbegin+4'.
+
+If the sighandler ucontext has uc_link set, a second ucontext has been
+delivered.  For future compatibility the MSR.TS field should be checked to
+determine the transactional state -- if so, the second ucontext in uc->uc_link
+represents the active transactional registers at the point of the signal.
+
+For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
+field shows the transactional mode.
+
+For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
+bits are stored in the MSR of the second ucontext, i.e. in
+uc->uc_link->uc_mcontext.regs->msr.  The top word contains the transactional
+state TS.
+
+However, basic signal handlers don't need to be aware of transactions
+and simply returning from the handler will deal with things correctly:
+
+Transaction-aware signal handlers can read the transactional register state
+from the second ucontext.  This will be necessary for crash handlers to
+determine, for example, the address of the instruction causing the SIGSEGV.
+
+Example signal handler::
+
+    void crash_handler(int sig, siginfo_t *si, void *uc)
+    {
+      ucontext_t *ucp = uc;
+      ucontext_t *transactional_ucp = ucp->uc_link;
+
+      if (ucp_link) {
+        u64 msr = ucp->uc_mcontext.regs->msr;
+        /* May have transactional ucontext! */
+  #ifndef __powerpc64__
+        msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
+  #endif
+        if (MSR_TM_ACTIVE(msr)) {
+           /* Yes, we crashed during a transaction.  Oops. */
+   fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
+                           "crashy instruction was at 0x%llx\n",
+                           ucp->uc_mcontext.regs->nip,
+                           transactional_ucp->uc_mcontext.regs->nip);
+        }
+      }
+
+      fix_the_problem(ucp->dar);
+    }
+
+When in an active transaction that takes a signal, we need to be careful with
+the stack.  It's possible that the stack has moved back up after the tbegin.
+The obvious case here is when the tbegin is called inside a function that
+returns before a tend.  In this case, the stack is part of the checkpointed
+transactional memory state.  If we write over this non transactionally or in
+suspend, we are in trouble because if we get a tm abort, the program counter and
+stack pointer will be back at the tbegin but our in memory stack won't be valid
+anymore.
+
+To avoid this, when taking a signal in an active transaction, we need to use
+the stack pointer from the checkpointed state, rather than the speculated
+state.  This ensures that the signal context (written tm suspended) will be
+written below the stack required for the rollback.  The transaction is aborted
+because of the treclaim, so any memory written between the tbegin and the
+signal will be rolled back anyway.
+
+For signals taken in non-TM or suspended mode, we use the
+normal/non-checkpointed stack pointer.
+
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
+
+Failure cause codes used by kernel
+==================================
+
+These are defined in <asm/reg.h>, and distinguish different reasons why the
+kernel aborted a transaction:
+
+ ====================== ================================
+ TM_CAUSE_RESCHED       Thread was rescheduled.
+ TM_CAUSE_TLBI          Software TLB invalid.
+ TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
+ TM_CAUSE_SYSCALL       Syscall from active transaction.
+ TM_CAUSE_SIGNAL        Signal delivered.
+ TM_CAUSE_MISC          Currently unused.
+ TM_CAUSE_ALIGNMENT     Alignment fault.
+ TM_CAUSE_EMULATE       Emulation that touched memory.
+ ====================== ================================
+
+These can be checked by the user program's abort handler as TEXASR[0:7].  If
+bit 7 is set, it indicates that the error is consider persistent.  For example
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
+
+GDB
+===
+
+GDB and ptrace are not currently TM-aware.  If one stops during a transaction,
+it looks like the transaction has just started (the checkpointed state is
+presented).  The transaction cannot then be continued and will take the failure
+handler route.  Furthermore, the transactional 2nd register state will be
+inaccessible.  GDB can currently be used on programs using TM, but not sensibly
+in parts within transactions.
+
+POWER9
+======
+
+TM on POWER9 has issues with storing the complete register state. This
+is described in this commit::
+
+    commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
+    Author: Paul Mackerras <paulus@ozlabs.org>
+    Date:   Wed Mar 21 21:32:01 2018 +1100
+    KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
+
+To account for this different POWER9 chips have TM enabled in
+different ways.
+
+On POWER9N DD2.01 and below, TM is disabled. ie
+HWCAP2[PPC_FEATURE2_HTM] is not set.
+
+On POWER9N DD2.1 TM is configured by firmware to always abort a
+transaction when tm suspend occurs. So tsuspend will cause a
+transaction to be aborted and rolled back. Kernel exceptions will also
+cause the transaction to be aborted and rolled back and the exception
+will not occur. If userspace constructs a sigcontext that enables TM
+suspend, the sigcontext will be rejected by the kernel. This mode is
+advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set.
+HWCAP2[PPC_FEATURE2_HTM] is not set in this mode.
+
+On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as
+described in commit 4bb3c7a0208f), hence TM is enabled for guests
+ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that
+makes heavy use of TM suspend (tsuspend or kernel suspend) will result
+in traps into the hypervisor and hence will suffer a performance
+degradation. Host userspace has TM disabled
+ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it
+at some point in the future if we bring the emulation into host
+userspace context switching).
+
+POWER9C DD1.2 and above are only available with POWERVM and hence
+Linux only runs as a guest. On these systems TM is emulated like on
+POWER9N DD2.2.
+
+Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and
+POWER9C DD1.2. Since earlier POWER9 processors don't support TM
+emulation, migration from POWER8 to POWER9 is not supported there.
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
deleted file mode 100644
index 52c023e14f26..000000000000
--- a/Documentation/powerpc/transactional_memory.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-Transactional Memory support
-============================
-
-POWER kernel support for this feature is currently limited to supporting
-its use by user programs.  It is not currently used by the kernel itself.
-
-This file aims to sum up how it is supported by Linux and what behaviour you
-can expect from your user programs.
-
-
-Basic overview
-==============
-
-Hardware Transactional Memory is supported on POWER8 processors, and is a
-feature that enables a different form of atomic memory access.  Several new
-instructions are presented to delimit transactions; transactions are
-guaranteed to either complete atomically or roll back and undo any partial
-changes.
-
-A simple transaction looks like this:
-
-begin_move_money:
-  tbegin
-  beq   abort_handler
-
-  ld    r4, SAVINGS_ACCT(r3)
-  ld    r5, CURRENT_ACCT(r3)
-  subi  r5, r5, 1
-  addi  r4, r4, 1
-  std   r4, SAVINGS_ACCT(r3)
-  std   r5, CURRENT_ACCT(r3)
-
-  tend
-
-  b     continue
-
-abort_handler:
-  ... test for odd failures ...
-
-  /* Retry the transaction if it failed because it conflicted with
-   * someone else: */
-  b     begin_move_money
-
-
-The 'tbegin' instruction denotes the start point, and 'tend' the end point.
-Between these points the processor is in 'Transactional' state; any memory
-references will complete in one go if there are no conflicts with other
-transactional or non-transactional accesses within the system.  In this
-example, the transaction completes as though it were normal straight-line code
-IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
-atomic move of money from the current account to the savings account has been
-performed.  Even though the normal ld/std instructions are used (note no
-lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
-updated, or neither will be updated.
-
-If, in the meantime, there is a conflict with the locations accessed by the
-transaction, the transaction will be aborted by the CPU.  Register and memory
-state will roll back to that at the 'tbegin', and control will continue from
-'tbegin+4'.  The branch to abort_handler will be taken this second time; the
-abort handler can check the cause of the failure, and retry.
-
-Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
-and a few other status/flag regs; see the ISA for details.
-
-Causes of transaction aborts
-============================
-
-- Conflicts with cache lines used by other processors
-- Signals
-- Context switches
-- See the ISA for full documentation of everything that will abort transactions.
-
-
-Syscalls
-========
-
-Syscalls made from within an active transaction will not be performed and the
-transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
-| TM_CAUSE_PERSISTENT.
-
-Syscalls made from within a suspended transaction are performed as normal and
-the transaction is not explicitly doomed by the kernel.  However, what the
-kernel does to perform the syscall may result in the transaction being doomed
-by the hardware.  The syscall is performed in suspended mode so any side
-effects will be persistent, independent of transaction success or failure.  No
-guarantees are provided by the kernel about which syscalls will affect
-transaction success.
-
-Care must be taken when relying on syscalls to abort during active transactions
-if the calls are made via a library.  Libraries may cache values (which may
-give the appearance of success) or perform operations that cause transaction
-failure before entering the kernel (which may produce different failure codes).
-Examples are glibc's getpid() and lazy symbol resolution.
-
-
-Signals
-=======
-
-Delivery of signals (both sync and async) during transactions provides a second
-thread state (ucontext/mcontext) to represent the second transactional register
-state.  Signal delivery 'treclaim's to capture both register states, so signals
-abort transactions.  The usual ucontext_t passed to the signal handler
-represents the checkpointed/original register state; the signal appears to have
-arisen at 'tbegin+4'.
-
-If the sighandler ucontext has uc_link set, a second ucontext has been
-delivered.  For future compatibility the MSR.TS field should be checked to
-determine the transactional state -- if so, the second ucontext in uc->uc_link
-represents the active transactional registers at the point of the signal.
-
-For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
-field shows the transactional mode.
-
-For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
-bits are stored in the MSR of the second ucontext, i.e. in
-uc->uc_link->uc_mcontext.regs->msr.  The top word contains the transactional
-state TS.
-
-However, basic signal handlers don't need to be aware of transactions
-and simply returning from the handler will deal with things correctly:
-
-Transaction-aware signal handlers can read the transactional register state
-from the second ucontext.  This will be necessary for crash handlers to
-determine, for example, the address of the instruction causing the SIGSEGV.
-
-Example signal handler:
-
-    void crash_handler(int sig, siginfo_t *si, void *uc)
-    {
-      ucontext_t *ucp = uc;
-      ucontext_t *transactional_ucp = ucp->uc_link;
-
-      if (ucp_link) {
-        u64 msr = ucp->uc_mcontext.regs->msr;
-        /* May have transactional ucontext! */
-#ifndef __powerpc64__
-        msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
-#endif
-        if (MSR_TM_ACTIVE(msr)) {
-           /* Yes, we crashed during a transaction.  Oops. */
-   fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
-                           "crashy instruction was at 0x%llx\n",
-                           ucp->uc_mcontext.regs->nip,
-                           transactional_ucp->uc_mcontext.regs->nip);
-        }
-      }
-
-      fix_the_problem(ucp->dar);
-    }
-
-When in an active transaction that takes a signal, we need to be careful with
-the stack.  It's possible that the stack has moved back up after the tbegin.
-The obvious case here is when the tbegin is called inside a function that
-returns before a tend.  In this case, the stack is part of the checkpointed
-transactional memory state.  If we write over this non transactionally or in
-suspend, we are in trouble because if we get a tm abort, the program counter and
-stack pointer will be back at the tbegin but our in memory stack won't be valid
-anymore.
-
-To avoid this, when taking a signal in an active transaction, we need to use
-the stack pointer from the checkpointed state, rather than the speculated
-state.  This ensures that the signal context (written tm suspended) will be
-written below the stack required for the rollback.  The transaction is aborted
-because of the treclaim, so any memory written between the tbegin and the
-signal will be rolled back anyway.
-
-For signals taken in non-TM or suspended mode, we use the
-normal/non-checkpointed stack pointer.
-
-Any transaction initiated inside a sighandler and suspended on return
-from the sighandler to the kernel will get reclaimed and discarded.
-
-Failure cause codes used by kernel
-==================================
-
-These are defined in <asm/reg.h>, and distinguish different reasons why the
-kernel aborted a transaction:
-
- TM_CAUSE_RESCHED       Thread was rescheduled.
- TM_CAUSE_TLBI          Software TLB invalid.
- TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL       Syscall from active transaction.
- TM_CAUSE_SIGNAL        Signal delivered.
- TM_CAUSE_MISC          Currently unused.
- TM_CAUSE_ALIGNMENT     Alignment fault.
- TM_CAUSE_EMULATE       Emulation that touched memory.
-
-These can be checked by the user program's abort handler as TEXASR[0:7].  If
-bit 7 is set, it indicates that the error is consider persistent.  For example
-a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
-
-GDB
-===
-
-GDB and ptrace are not currently TM-aware.  If one stops during a transaction,
-it looks like the transaction has just started (the checkpointed state is
-presented).  The transaction cannot then be continued and will take the failure
-handler route.  Furthermore, the transactional 2nd register state will be
-inaccessible.  GDB can currently be used on programs using TM, but not sensibly
-in parts within transactions.
-
-POWER9
-======
-
-TM on POWER9 has issues with storing the complete register state. This
-is described in this commit:
-
-    commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
-    Author: Paul Mackerras <paulus@ozlabs.org>
-    Date:   Wed Mar 21 21:32:01 2018 +1100
-    KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
-
-To account for this different POWER9 chips have TM enabled in
-different ways.
-
-On POWER9N DD2.01 and below, TM is disabled. ie
-HWCAP2[PPC_FEATURE2_HTM] is not set.
-
-On POWER9N DD2.1 TM is configured by firmware to always abort a
-transaction when tm suspend occurs. So tsuspend will cause a
-transaction to be aborted and rolled back. Kernel exceptions will also
-cause the transaction to be aborted and rolled back and the exception
-will not occur. If userspace constructs a sigcontext that enables TM
-suspend, the sigcontext will be rejected by the kernel. This mode is
-advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set.
-HWCAP2[PPC_FEATURE2_HTM] is not set in this mode.
-
-On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as
-described in commit 4bb3c7a0208f), hence TM is enabled for guests
-ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that
-makes heavy use of TM suspend (tsuspend or kernel suspend) will result
-in traps into the hypervisor and hence will suffer a performance
-degradation. Host userspace has TM disabled
-ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it
-at some point in the future if we bring the emulation into host
-userspace context switching).
-
-POWER9C DD1.2 and above are only available with POWERVM and hence
-Linux only runs as a guest. On these systems TM is emulated like on
-POWER9N DD2.2.
-
-Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and
-POWER9C DD1.2. Since earlier POWER9 processors don't support TM
-emulation, migration from POWER8 to POWER9 is not supported there.
diff --git a/MAINTAINERS b/MAINTAINERS
index c144bd6a432e..8671909ee75c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4468,7 +4468,7 @@ F:	arch/powerpc/platforms/powernv/pci-cxl.c
 F:	drivers/misc/cxl/
 F:	include/misc/cxl*
 F:	include/uapi/misc/cxl.h
-F:	Documentation/powerpc/cxl.txt
+F:	Documentation/powerpc/cxl.rst
 F:	Documentation/ABI/testing/sysfs-class-cxl
 
 CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
@@ -4479,7 +4479,7 @@ L:	linux-scsi@vger.kernel.org
 S:	Supported
 F:	drivers/scsi/cxlflash/
 F:	include/uapi/scsi/cxlflash_ioctl.h
-F:	Documentation/powerpc/cxlflash.txt
+F:	Documentation/powerpc/cxlflash.rst
 
 CYBERPRO FB DRIVER
 M:	Russell King <linux@armlinux.org.uk>
@@ -12353,7 +12353,7 @@ F:	Documentation/PCI/pci-error-recovery.rst
 F:	drivers/pci/pcie/aer.c
 F:	drivers/pci/pcie/dpc.c
 F:	drivers/pci/pcie/err.c
-F:	Documentation/powerpc/eeh-pci-error-recovery.txt
+F:	Documentation/powerpc/eeh-pci-error-recovery.rst
 F:	arch/powerpc/kernel/eeh*.c
 F:	arch/powerpc/platforms/*/eeh*.c
 F:	arch/powerpc/include/*/eeh*.h
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index eee5bef736c8..6ba3cc2ef8ab 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1531,7 +1531,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  *
  * Call convention:
  *
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
  *
  * For hypercalls, the register convention is as follows:
  * r0 volatile
diff --git a/drivers/soc/fsl/qe/qe.c b/drivers/soc/fsl/qe/qe.c
index 62c6ba17991a..c9519e62308c 100644
--- a/drivers/soc/fsl/qe/qe.c
+++ b/drivers/soc/fsl/qe/qe.c
@@ -419,7 +419,7 @@ static void qe_upload_microcode(const void *base,
 /*
  * Upload a microcode to the I-RAM at a specific address.
  *
- * See Documentation/powerpc/qe_firmware.txt for information on QE microcode
+ * See Documentation/powerpc/qe_firmware.rst for information on QE microcode
  * uploading.
  *
  * Currently, only version 1 is supported, so the 'version' field must be
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index cb4db1b3ca3c..5fb214e67d73 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -47,7 +47,7 @@
  * using the 2.6 Linux kernel kref construct.
  *
  * For direction on installation and usage of this driver please reference
- * Documentation/powerpc/hvcs.txt.
+ * Documentation/powerpc/hvcs.rst.
  */
 
 #include <linux/device.h>
diff --git a/include/soc/fsl/qe/qe.h b/include/soc/fsl/qe/qe.h
index 3f9d6b6a5691..c1036d16ed03 100644
--- a/include/soc/fsl/qe/qe.h
+++ b/include/soc/fsl/qe/qe.h
@@ -259,7 +259,7 @@ static inline int qe_alive_during_sleep(void)
 
 /* Structure that defines QE firmware binary files.
  *
- * See Documentation/powerpc/qe_firmware.txt for a description of these
+ * See Documentation/powerpc/qe_firmware.rst for a description of these
  * fields.
  */
 struct qe_firmware {
-- 
cgit v1.2.3


From a6d81d30d3cd87f85bfd922358eb18b8146c4925 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Tue, 16 Jul 2019 16:19:25 -0400
Subject: wait: add wq_has_single_sleeper helper

rq-qos sits in the io path so we want to take locks as sparingly as
possible.  To accomplish this we try not to take the waitqueue head lock
unless we are sure we need to go to sleep, and we have an optimization
to make sure that we don't starve out existing waiters.  Since we check
if there are existing waiters locklessly we need to be able to update
our view of the waitqueue list after we've added ourselves to the
waitqueue.  Accomplish this by adding this helper to see if there is
more than just ourselves on the list.

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/wait.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..30c515520fb2 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -126,6 +126,19 @@ static inline int waitqueue_active(struct wait_queue_head *wq_head)
 	return !list_empty(&wq_head->head);
 }
 
+/**
+ * wq_has_single_sleeper - check if there is only one sleeper
+ * @wq_head: wait queue head
+ *
+ * Returns true of wq_head has only one sleeper on the list.
+ *
+ * Please refer to the comment for waitqueue_active.
+ */
+static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head)
+{
+	return list_is_singular(&wq_head->head);
+}
+
 /**
  * wq_has_sleeper - check if there are any waiting processes
  * @wq_head: wait queue head
-- 
cgit v1.2.3


From 87dbad02d2254b741c71ce859c451fb1ae6f5340 Mon Sep 17 00:00:00 2001
From: Xiaojie Yuan <xiaojie.yuan@amd.com>
Date: Mon, 17 Dec 2018 18:00:26 +0800
Subject: drm/amdgpu: add navi14 asic type

Add CHIP_NAVI14 to the list of asic types.

Signed-off-by: Xiaojie Yuan <xiaojie.yuan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Jack Xiao <Jack.Xiao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 include/drm/amd_asic_type.h                | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a7f893cf724..ba87964c6515 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -99,6 +99,7 @@ static const char *amdgpu_asic_name[] = {
 	"VEGA20",
 	"RAVEN",
 	"NAVI10",
+	"NAVI14",
 	"LAST",
 };
 
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index bcc2bcf32886..0c4766af04af 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -50,6 +50,7 @@ enum amd_asic_type {
 	CHIP_VEGA20,
 	CHIP_RAVEN,
 	CHIP_NAVI10,
+	CHIP_NAVI14,
 	CHIP_LAST,
 };
 
-- 
cgit v1.2.3


From d6c3b24ea28ddae06c9ff9fe9ae58664144d7ae8 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Tue, 9 Jul 2019 09:16:01 -0500
Subject: drm/amdgpu: add Arcturus asic type

Add asic type for Arcturus.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 include/drm/amd_asic_type.h                | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 6aa3c3e5bd50..c1edf105c660 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -99,6 +99,7 @@ static const char *amdgpu_asic_name[] = {
 	"VEGA12",
 	"VEGA20",
 	"RAVEN",
+	"ARCTURUS",
 	"NAVI10",
 	"NAVI14",
 	"LAST",
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 0c4766af04af..0f5a12a99948 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -49,6 +49,7 @@ enum amd_asic_type {
 	CHIP_VEGA12,
 	CHIP_VEGA20,
 	CHIP_RAVEN,
+	CHIP_ARCTURUS,
 	CHIP_NAVI10,
 	CHIP_NAVI14,
 	CHIP_LAST,
-- 
cgit v1.2.3


From 00289cd87676e14913d2d8492d1ce05c4baafdae Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 17 Jul 2019 18:07:53 -0700
Subject: drivers/base: Introduce kill_device()

The libnvdimm subsystem arranges for devices to be destroyed as a result
of a sysfs operation. Since device_unregister() cannot be called from
an actively running sysfs attribute of the same device libnvdimm
arranges for device_unregister() to be performed in an out-of-line async
context.

The driver core maintains a 'dead' state for coordinating its own racing
async registration / de-registration requests. Rather than add local
'dead' state tracking infrastructure to libnvdimm device objects, export
the existing state tracking via a new kill_device() helper.

The kill_device() helper simply marks the device as dead, i.e. that it
is on its way to device_del(), or returns that the device was already
dead. This can be used in advance of calling device_unregister() for
subsystems like libnvdimm that might need to handle multiple user
threads racing to delete a device.

This refactoring does not change any behavior, but it is a pre-requisite
for follow-on fixes and therefore marked for -stable.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Fixes: 4d88a97aa9e8 ("libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver...")
Cc: <stable@vger.kernel.org>
Tested-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/156341207332.292348.14959761496009347574.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/base/core.c    | 27 +++++++++++++++++++--------
 include/linux/device.h |  1 +
 2 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index fd7511e04e62..eaf3aa0cb803 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2211,6 +2211,24 @@ void put_device(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(put_device);
 
+bool kill_device(struct device *dev)
+{
+	/*
+	 * Require the device lock and set the "dead" flag to guarantee that
+	 * the update behavior is consistent with the other bitfields near
+	 * it and that we cannot have an asynchronous probe routine trying
+	 * to run while we are tearing out the bus/class/sysfs from
+	 * underneath the device.
+	 */
+	lockdep_assert_held(&dev->mutex);
+
+	if (dev->p->dead)
+		return false;
+	dev->p->dead = true;
+	return true;
+}
+EXPORT_SYMBOL_GPL(kill_device);
+
 /**
  * device_del - delete device from system.
  * @dev: device.
@@ -2230,15 +2248,8 @@ void device_del(struct device *dev)
 	struct kobject *glue_dir = NULL;
 	struct class_interface *class_intf;
 
-	/*
-	 * Hold the device lock and set the "dead" flag to guarantee that
-	 * the update behavior is consistent with the other bitfields near
-	 * it and that we cannot have an asynchronous probe routine trying
-	 * to run while we are tearing out the bus/class/sysfs from
-	 * underneath the device.
-	 */
 	device_lock(dev);
-	dev->p->dead = true;
+	kill_device(dev);
 	device_unlock(dev);
 
 	/* Notify clients of device removal.  This call must come
diff --git a/include/linux/device.h b/include/linux/device.h
index e85264fb6616..0da5c67f6be1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1373,6 +1373,7 @@ extern int (*platform_notify_remove)(struct device *dev);
  */
 extern struct device *get_device(struct device *dev);
 extern void put_device(struct device *dev);
+extern bool kill_device(struct device *dev);
 
 #ifdef CONFIG_DEVTMPFS
 extern int devtmpfs_create_node(struct device *dev);
-- 
cgit v1.2.3


From 87a30e1f05d73a34e6d1895065541369131aaf1c Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 17 Jul 2019 18:08:26 -0700
Subject: driver-core, libnvdimm: Let device subsystems add local lockdep
 coverage

For good reason, the standard device_lock() is marked
lockdep_set_novalidate_class() because there is simply no sane way to
describe the myriad ways the device_lock() ordered with other locks.
However, that leaves subsystems that know their own local device_lock()
ordering rules to find lock ordering mistakes manually. Instead,
introduce an optional / additional lockdep-enabled lock that a subsystem
can acquire in all the same paths that the device_lock() is acquired.

A conversion of the NFIT driver and NVDIMM subsystem to a
lockdep-validate device_lock() scheme is included. The
debug_nvdimm_lock() implementation implements the correct lock-class and
stacking order for the libnvdimm device topology hierarchy.

Yes, this is a hack, but hopefully it is a useful hack for other
subsystems device_lock() debug sessions. Quoting Greg:

    "Yeah, it feels a bit hacky but it's really up to a subsystem to mess up
     using it as much as anything else, so user beware :)

     I don't object to it if it makes things easier for you to debug."

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Link: https://lore.kernel.org/r/156341210661.292348.7014034644265455704.stgit@dwillia2-desk3.amr.corp.intel.com
---
 drivers/acpi/nfit/core.c        | 28 ++++++++---------
 drivers/acpi/nfit/nfit.h        | 24 +++++++++++++++
 drivers/base/core.c             |  3 ++
 drivers/nvdimm/btt_devs.c       | 16 +++++-----
 drivers/nvdimm/bus.c            | 28 ++++++++++-------
 drivers/nvdimm/core.c           | 10 +++---
 drivers/nvdimm/dimm_devs.c      |  4 +--
 drivers/nvdimm/namespace_devs.c | 36 +++++++++++-----------
 drivers/nvdimm/nd-core.h        | 68 +++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/pfn_devs.c       | 24 +++++++--------
 drivers/nvdimm/pmem.c           |  4 +--
 drivers/nvdimm/region.c         |  2 +-
 drivers/nvdimm/region_devs.c    | 16 +++++-----
 include/linux/device.h          |  5 +++
 14 files changed, 187 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 23022cf20d26..f22139458ce1 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1282,7 +1282,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
 	if (rc)
 		return rc;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
@@ -1299,7 +1299,7 @@ static ssize_t hw_error_scrub_store(struct device *dev,
 			break;
 		}
 	}
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	if (rc)
 		return rc;
 	return size;
@@ -1319,7 +1319,7 @@ static ssize_t scrub_show(struct device *dev,
 	ssize_t rc = -ENXIO;
 	bool busy;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (!nd_desc) {
 		device_unlock(dev);
@@ -1339,7 +1339,7 @@ static ssize_t scrub_show(struct device *dev,
 	}
 
 	mutex_unlock(&acpi_desc->init_mutex);
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	return rc;
 }
 
@@ -1356,14 +1356,14 @@ static ssize_t scrub_store(struct device *dev,
 	if (val != 1)
 		return -EINVAL;
 
-	device_lock(dev);
+	nfit_device_lock(dev);
 	nd_desc = dev_get_drvdata(dev);
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
 		rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
 	}
-	device_unlock(dev);
+	nfit_device_unlock(dev);
 	if (rc)
 		return rc;
 	return size;
@@ -1749,9 +1749,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
 	struct acpi_device *adev = data;
 	struct device *dev = &adev->dev;
 
-	device_lock(dev->parent);
+	nfit_device_lock(dev->parent);
 	__acpi_nvdimm_notify(dev, event);
-	device_unlock(dev->parent);
+	nfit_device_unlock(dev->parent);
 }
 
 static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
@@ -3457,8 +3457,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 	struct device *dev = acpi_desc->dev;
 
 	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
-	device_lock(dev);
-	device_unlock(dev);
+	nfit_device_lock(dev);
+	nfit_device_unlock(dev);
 
 	/* Bounce the init_mutex to complete initial registration */
 	mutex_lock(&acpi_desc->init_mutex);
@@ -3602,8 +3602,8 @@ void acpi_nfit_shutdown(void *data)
 	 * acpi_nfit_ars_rescan() submissions have had a chance to
 	 * either submit or see ->cancel set.
 	 */
-	device_lock(bus_dev);
-	device_unlock(bus_dev);
+	nfit_device_lock(bus_dev);
+	nfit_device_unlock(bus_dev);
 
 	flush_workqueue(nfit_wq);
 }
@@ -3746,9 +3746,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
 
 static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
 {
-	device_lock(&adev->dev);
+	nfit_device_lock(&adev->dev);
 	__acpi_nfit_notify(&adev->dev, adev->handle, event);
-	device_unlock(&adev->dev);
+	nfit_device_unlock(&adev->dev);
 }
 
 static const struct acpi_device_id acpi_nfit_ids[] = {
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 6ee2b02af73e..24241941181c 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -312,6 +312,30 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
 }
 
+#ifdef CONFIG_PROVE_LOCKING
+static inline void nfit_device_lock(struct device *dev)
+{
+	device_lock(dev);
+	mutex_lock(&dev->lockdep_mutex);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+	mutex_unlock(&dev->lockdep_mutex);
+	device_unlock(dev);
+}
+#else
+static inline void nfit_device_lock(struct device *dev)
+{
+	device_lock(dev);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+	device_unlock(dev);
+}
+#endif
+
 const guid_t *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
 void acpi_nfit_shutdown(void *data);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index eaf3aa0cb803..4825949d6547 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1663,6 +1663,9 @@ void device_initialize(struct device *dev)
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
+#ifdef CONFIG_PROVE_LOCKING
+	mutex_init(&dev->lockdep_mutex);
+#endif
 	lockdep_set_novalidate_class(&dev->mutex);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 62d00fffa4af..3508a79110c7 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -62,14 +62,14 @@ static ssize_t sector_size_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
 			btt_lbasize_supported);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -91,11 +91,11 @@ static ssize_t uuid_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -120,13 +120,13 @@ static ssize_t namespace_store(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -138,14 +138,14 @@ static ssize_t size_show(struct device *dev,
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver)
 		rc = sprintf(buf, "%llu\n", nd_btt->size);
 	else {
 		/* no size to convey if the btt instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index df41f3571dc9..798c5c4aea9c 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -26,7 +26,7 @@
 
 int nvdimm_major;
 static int nvdimm_bus_major;
-static struct class *nd_class;
+struct class *nd_class;
 static DEFINE_IDA(nd_ida);
 
 static int to_nd_device_type(struct device *dev)
@@ -91,7 +91,10 @@ static int nvdimm_bus_probe(struct device *dev)
 			dev->driver->name, dev_name(dev));
 
 	nvdimm_bus_probe_start(nvdimm_bus);
+	debug_nvdimm_lock(dev);
 	rc = nd_drv->probe(dev);
+	debug_nvdimm_unlock(dev);
+
 	if (rc == 0)
 		nd_region_probe_success(nvdimm_bus, dev);
 	else
@@ -113,8 +116,11 @@ static int nvdimm_bus_remove(struct device *dev)
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc = 0;
 
-	if (nd_drv->remove)
+	if (nd_drv->remove) {
+		debug_nvdimm_lock(dev);
 		rc = nd_drv->remove(dev);
+		debug_nvdimm_unlock(dev);
+	}
 	nd_region_disable(nvdimm_bus, dev);
 
 	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
@@ -140,7 +146,7 @@ static void nvdimm_bus_shutdown(struct device *dev)
 
 void nd_device_notify(struct device *dev, enum nvdimm_event event)
 {
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_device_driver *nd_drv;
 
@@ -148,7 +154,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event)
 		if (nd_drv->notify)
 			nd_drv->notify(dev, event);
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 }
 EXPORT_SYMBOL(nd_device_notify);
 
@@ -296,7 +302,7 @@ static void nvdimm_bus_release(struct device *dev)
 	kfree(nvdimm_bus);
 }
 
-static bool is_nvdimm_bus(struct device *dev)
+bool is_nvdimm_bus(struct device *dev)
 {
 	return dev->release == nvdimm_bus_release;
 }
@@ -575,9 +581,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
 		 * or otherwise let the async path handle it if the
 		 * unregistration was already queued.
 		 */
-		device_lock(dev);
+		nd_device_lock(dev);
 		killed = kill_device(dev);
-		device_unlock(dev);
+		nd_device_unlock(dev);
 
 		if (!killed)
 			return;
@@ -888,10 +894,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
 		if (nvdimm_bus->probe_active == 0)
 			break;
 		nvdimm_bus_unlock(dev);
-		device_unlock(dev);
+		nd_device_unlock(dev);
 		wait_event(nvdimm_bus->wait,
 				nvdimm_bus->probe_active == 0);
-		device_lock(dev);
+		nd_device_lock(dev);
 		nvdimm_bus_lock(dev);
 	} while (true);
 }
@@ -1107,7 +1113,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		goto out;
 	}
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
 	if (rc)
@@ -1129,7 +1135,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 out_unlock:
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 out:
 	kfree(in_env);
 	kfree(out_env);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 5e1f060547bf..9204f1e9fd14 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -246,7 +246,7 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
  *
  * Enforce that uuids can only be changed while the device is disabled
  * (driver detached)
- * LOCKING: expects device_lock() is held on entry
+ * LOCKING: expects nd_device_lock() is held on entry
  */
 int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
 		size_t len)
@@ -347,15 +347,15 @@ static DEVICE_ATTR_RO(provider);
 
 static int flush_namespaces(struct device *dev, void *data)
 {
-	device_lock(dev);
-	device_unlock(dev);
+	nd_device_lock(dev);
+	nd_device_unlock(dev);
 	return 0;
 }
 
 static int flush_regions_dimms(struct device *dev, void *data)
 {
-	device_lock(dev);
-	device_unlock(dev);
+	nd_device_lock(dev);
+	nd_device_unlock(dev);
 	device_for_each_child(dev, NULL, flush_namespaces);
 	return 0;
 }
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index dfecd6e17043..29a065e769ea 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -484,12 +484,12 @@ static ssize_t security_store(struct device *dev,
 	 * done while probing is idle and the DIMM is not in active use
 	 * in any region.
 	 */
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __security_store(dev, buf, len);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index a434a5964cb9..92cd809d7e43 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -410,7 +410,7 @@ static ssize_t alt_name_store(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __alt_name_store(dev, buf, len);
@@ -418,7 +418,7 @@ static ssize_t alt_name_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1077,7 +1077,7 @@ static ssize_t size_store(struct device *dev,
 	if (rc)
 		return rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __size_store(dev, val);
@@ -1103,7 +1103,7 @@ static ssize_t size_store(struct device *dev,
 	dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1286,7 +1286,7 @@ static ssize_t uuid_store(struct device *dev,
 	} else
 		return -ENXIO;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	if (to_ndns(dev)->claim)
@@ -1302,7 +1302,7 @@ static ssize_t uuid_store(struct device *dev,
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1376,7 +1376,7 @@ static ssize_t sector_size_store(struct device *dev,
 	} else
 		return -ENXIO;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	if (to_ndns(dev)->claim)
 		rc = -EBUSY;
@@ -1387,7 +1387,7 @@ static ssize_t sector_size_store(struct device *dev,
 	dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
 			buf, buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -1502,9 +1502,9 @@ static ssize_t holder_show(struct device *dev,
 	struct nd_namespace_common *ndns = to_ndns(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1541,7 +1541,7 @@ static ssize_t holder_class_store(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	rc = __holder_class_store(dev, buf);
@@ -1549,7 +1549,7 @@ static ssize_t holder_class_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc < 0 ? rc : len;
 }
@@ -1560,7 +1560,7 @@ static ssize_t holder_class_show(struct device *dev,
 	struct nd_namespace_common *ndns = to_ndns(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (ndns->claim_class == NVDIMM_CCLASS_NONE)
 		rc = sprintf(buf, "\n");
 	else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
@@ -1572,7 +1572,7 @@ static ssize_t holder_class_show(struct device *dev,
 		rc = sprintf(buf, "dax\n");
 	else
 		rc = sprintf(buf, "<unknown>\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1586,7 +1586,7 @@ static ssize_t mode_show(struct device *dev,
 	char *mode;
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	claim = ndns->claim;
 	if (claim && is_nd_btt(claim))
 		mode = "safe";
@@ -1599,7 +1599,7 @@ static ssize_t mode_show(struct device *dev,
 	else
 		mode = "raw";
 	rc = sprintf(buf, "%s\n", mode);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -1703,8 +1703,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		 * Flush any in-progess probes / removals in the driver
 		 * for the raw personality of this namespace.
 		 */
-		device_lock(&ndns->dev);
-		device_unlock(&ndns->dev);
+		nd_device_lock(&ndns->dev);
+		nd_device_unlock(&ndns->dev);
 		if (ndns->dev.driver) {
 			dev_dbg(&ndns->dev, "is active, can't bind %s\n",
 					dev_name(dev));
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 6cd470547106..0ac52b6eb00e 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -9,6 +9,7 @@
 #include <linux/sizes.h>
 #include <linux/mutex.h>
 #include <linux/nd.h>
+#include "nd.h"
 
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
@@ -182,4 +183,71 @@ ssize_t nd_namespace_store(struct device *dev,
 		struct nd_namespace_common **_ndns, const char *buf,
 		size_t len);
 struct nd_pfn *to_nd_pfn_safe(struct device *dev);
+bool is_nvdimm_bus(struct device *dev);
+
+#ifdef CONFIG_PROVE_LOCKING
+extern struct class *nd_class;
+
+enum {
+	LOCK_BUS,
+	LOCK_NDCTL,
+	LOCK_REGION,
+	LOCK_DIMM = LOCK_REGION,
+	LOCK_NAMESPACE,
+	LOCK_CLAIM,
+};
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+	if (is_nd_region(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION);
+	else if (is_nvdimm(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM);
+	else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM);
+	else if (dev->parent && (is_nd_region(dev->parent)))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE);
+	else if (is_nvdimm_bus(dev))
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS);
+	else if (dev->class && dev->class == nd_class)
+		mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL);
+	else
+		dev_WARN(dev, "unknown lock level\n");
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+	mutex_unlock(&dev->lockdep_mutex);
+}
+
+static inline void nd_device_lock(struct device *dev)
+{
+	device_lock(dev);
+	debug_nvdimm_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+	debug_nvdimm_unlock(dev);
+	device_unlock(dev);
+}
+#else
+static inline void nd_device_lock(struct device *dev)
+{
+	device_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+	device_unlock(dev);
+}
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+}
+#endif
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 0f81fc56bbfd..9b09fe18e666 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -67,7 +67,7 @@ static ssize_t mode_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc = 0;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	if (dev->driver)
 		rc = -EBUSY;
@@ -89,7 +89,7 @@ static ssize_t mode_store(struct device *dev,
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -132,14 +132,14 @@ static ssize_t align_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
 			nd_pfn_supported_alignments());
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -161,11 +161,11 @@ static ssize_t uuid_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc ? rc : len;
 }
@@ -190,13 +190,13 @@ static ssize_t namespace_store(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -208,7 +208,7 @@ static ssize_t resource_show(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 		u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -222,7 +222,7 @@ static ssize_t resource_show(struct device *dev,
 		/* no address to convey if the pfn instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
@@ -234,7 +234,7 @@ static ssize_t size_show(struct device *dev,
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver) {
 		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 		u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -250,7 +250,7 @@ static ssize_t size_show(struct device *dev,
 		/* no size to convey if the pfn instance is disabled */
 		rc = -ENXIO;
 	}
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 28cb44c61d4a..53797e7be18a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -520,8 +520,8 @@ static int nd_pmem_remove(struct device *dev)
 		nvdimm_namespace_detach_btt(to_nd_btt(dev));
 	else {
 		/*
-		 * Note, this assumes device_lock() context to not race
-		 * nd_pmem_notify()
+		 * Note, this assumes nd_device_lock() context to not
+		 * race nd_pmem_notify()
 		 */
 		sysfs_put(pmem->bb_state);
 		pmem->bb_state = NULL;
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 488c47ac4c4a..37bf8719a2a4 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -102,7 +102,7 @@ static int nd_region_remove(struct device *dev)
 	nvdimm_bus_unlock(dev);
 
 	/*
-	 * Note, this assumes device_lock() context to not race
+	 * Note, this assumes nd_device_lock() context to not race
 	 * nd_region_notify()
 	 */
 	sysfs_put(nd_region->bb_state);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index a15276cdec7d..91b5a7ade0d5 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -329,7 +329,7 @@ static ssize_t set_cookie_show(struct device *dev,
 	 * the v1.1 namespace label cookie definition. To read all this
 	 * data we need to wait for probing to settle.
 	 */
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	if (nd_region->ndr_mappings) {
@@ -346,7 +346,7 @@ static ssize_t set_cookie_show(struct device *dev,
 		}
 	}
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	if (rc)
 		return rc;
@@ -422,12 +422,12 @@ static ssize_t available_size_show(struct device *dev,
 	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
 	 * problem to not race itself.
 	 */
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	available = nd_region_available_dpa(nd_region);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return sprintf(buf, "%llu\n", available);
 }
@@ -439,12 +439,12 @@ static ssize_t max_available_extent_show(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev);
 	unsigned long long available = 0;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	wait_nvdimm_bus_probe_idle(dev);
 	available = nd_region_allocatable_dpa(nd_region);
 	nvdimm_bus_unlock(dev);
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return sprintf(buf, "%llu\n", available);
 }
@@ -563,12 +563,12 @@ static ssize_t region_badblocks_show(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev);
 	ssize_t rc;
 
-	device_lock(dev);
+	nd_device_lock(dev);
 	if (dev->driver)
 		rc = badblocks_show(&nd_region->bb, buf, 0);
 	else
 		rc = -ENXIO;
-	device_unlock(dev);
+	nd_device_unlock(dev);
 
 	return rc;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 0da5c67f6be1..9237b857b598 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -909,6 +909,8 @@ struct dev_links_info {
  * 		This identifies the device type and carries type-specific
  * 		information.
  * @mutex:	Mutex to synchronize calls to its driver.
+ * @lockdep_mutex: An optional debug lock that a subsystem can use as a
+ * 		peer lock to gain localized lockdep coverage of the device_lock.
  * @bus:	Type of bus device is on.
  * @driver:	Which driver has allocated this
  * @platform_data: Platform data specific to the device.
@@ -991,6 +993,9 @@ struct device {
 					   core doesn't touch it */
 	void		*driver_data;	/* Driver data, set and get with
 					   dev_set_drvdata/dev_get_drvdata */
+#ifdef CONFIG_PROVE_LOCKING
+	struct mutex		lockdep_mutex;
+#endif
 	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
-- 
cgit v1.2.3


From 0c7294ddae73ad8d7532f95a86259e311e991a55 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 19 Jul 2019 18:20:14 +0200
Subject: net: flow_offload: remove netns parameter from flow_block_cb_alloc()

No need to annotate the netns on the flow block callback object,
flow_block_cb_is_busy() already checks for used blocks.

Fixes: d63db30c8537 ("net: flow_offload: add flow_block_cb_alloc() and flow_block_cb_free()")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c    | 3 +--
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      | 5 ++---
 drivers/net/ethernet/mscc/ocelot_flower.c           | 3 +--
 drivers/net/ethernet/mscc/ocelot_tc.c               | 2 +-
 drivers/net/ethernet/netronome/nfp/flower/offload.c | 6 ++----
 include/net/flow_offload.h                          | 3 +--
 net/core/flow_offload.c                             | 9 +++------
 net/dsa/slave.c                                     | 2 +-
 8 files changed, 12 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 7245d287633d..2162412073c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -735,8 +735,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 		list_add(&indr_priv->list,
 			 &rpriv->uplink_priv.tc_indr_block_priv_list);
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       mlx5e_rep_indr_setup_block_cb,
+		block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
 					       indr_priv, indr_priv,
 					       mlx5e_rep_indr_tc_block_unbind);
 		if (IS_ERR(block_cb)) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4d34d42b3b0e..a469035400cf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1610,8 +1610,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
 		if (!acl_block)
 			return -ENOMEM;
-		block_cb = flow_block_cb_alloc(f->net,
-					       mlxsw_sp_setup_tc_block_cb_flower,
+		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
 					       mlxsw_sp, acl_block,
 					       mlxsw_sp_tc_block_flower_release);
 		if (IS_ERR(block_cb)) {
@@ -1702,7 +1701,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 					  &mlxsw_sp_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port,
+		block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
 					       mlxsw_sp_port, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 7aaddc09c185..6a11aea8b186 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -323,8 +323,7 @@ int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
 		if (!port_block)
 			return -ENOMEM;
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       ocelot_setup_tc_block_cb_flower,
+		block_cb = flow_block_cb_alloc(ocelot_setup_tc_block_cb_flower,
 					       port, port_block,
 					       ocelot_tc_block_unbind);
 		if (IS_ERR(block_cb)) {
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
index 9e6464ffae5d..abbcb66bf5ac 100644
--- a/drivers/net/ethernet/mscc/ocelot_tc.c
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -156,7 +156,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 		if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL);
+		block_cb = flow_block_cb_alloc(cb, port, port, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index faa8ba012a37..93ab0db6c504 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1318,8 +1318,7 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 					  &nfp_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       nfp_flower_setup_tc_block_cb,
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb,
 					       repr, repr, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
@@ -1424,8 +1423,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 		cb_priv->app = app;
 		list_add(&cb_priv->list, &priv->indr_block_cb_priv);
 
-		block_cb = flow_block_cb_alloc(f->net,
-					       nfp_flower_setup_indr_block_cb,
+		block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb,
 					       cb_priv, cb_priv,
 					       nfp_flower_setup_indr_tc_release);
 		if (IS_ERR(block_cb)) {
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index db337299e81e..aa9b5287b231 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -264,7 +264,6 @@ struct flow_block_offload {
 struct flow_block_cb {
 	struct list_head	driver_list;
 	struct list_head	list;
-	struct net		*net;
 	tc_setup_cb_t		*cb;
 	void			*cb_ident;
 	void			*cb_priv;
@@ -272,7 +271,7 @@ struct flow_block_cb {
 	unsigned int		refcnt;
 };
 
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(tc_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv));
 void flow_block_cb_free(struct flow_block_cb *block_cb);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 76f8db3841d7..507de4b48815 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule,
 }
 EXPORT_SYMBOL(flow_rule_match_enc_opts);
 
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(tc_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv))
 {
@@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
 	if (!block_cb)
 		return ERR_PTR(-ENOMEM);
 
-	block_cb->net = net;
 	block_cb->cb = cb;
 	block_cb->cb_ident = cb_ident;
 	block_cb->cb_priv = cb_priv;
@@ -200,8 +199,7 @@ struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
 	struct flow_block_cb *block_cb;
 
 	list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
-		if (block_cb->net == f->net &&
-		    block_cb->cb == cb &&
+		if (block_cb->cb == cb &&
 		    block_cb->cb_ident == cb_ident)
 			return block_cb;
 	}
@@ -261,8 +259,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 		if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, cb_ident,
-					       cb_priv, NULL);
+		block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 614c38ece104..6ca9ec58f881 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -967,7 +967,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 		if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
 			return -EBUSY;
 
-		block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL);
+		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
 		if (IS_ERR(block_cb))
 			return PTR_ERR(block_cb);
 
-- 
cgit v1.2.3


From a7323311515d488b7714bb7504a1d50fabb0bfcf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 19 Jul 2019 18:20:15 +0200
Subject: net: flow_offload: rename tc_setup_cb_t to flow_setup_cb_t

Rename this type definition and adapt users.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c |  2 +-
 drivers/net/ethernet/mscc/ocelot_tc.c          |  2 +-
 include/net/flow_offload.h                     | 17 +++++++++++------
 include/net/pkt_cls.h                          |  5 ++---
 include/net/sch_generic.h                      |  6 ++----
 net/core/flow_offload.c                        |  9 +++++----
 net/dsa/slave.c                                |  2 +-
 net/sched/cls_api.c                            |  2 +-
 net/sched/cls_bpf.c                            |  2 +-
 net/sched/cls_flower.c                         |  2 +-
 net/sched/cls_matchall.c                       |  2 +-
 net/sched/cls_u32.c                            |  6 +++---
 12 files changed, 30 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a469035400cf..51cd0b6f1f3e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1679,7 +1679,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 	bool ingress;
 	int err;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
index abbcb66bf5ac..fba9512e9ca6 100644
--- a/drivers/net/ethernet/mscc/ocelot_tc.c
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -134,7 +134,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 				 struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 	int err;
 
 	netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index aa9b5287b231..23b299235baf 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -2,8 +2,8 @@
 #define _NET_FLOW_OFFLOAD_H
 
 #include <linux/kernel.h>
+#include <linux/list.h>
 #include <net/flow_dissector.h>
-#include <net/sch_generic.h>
 
 struct flow_match {
 	struct flow_dissector	*dissector;
@@ -261,23 +261,27 @@ struct flow_block_offload {
 	struct netlink_ext_ack *extack;
 };
 
+enum tc_setup_type;
+typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
+			    void *cb_priv);
+
 struct flow_block_cb {
 	struct list_head	driver_list;
 	struct list_head	list;
-	tc_setup_cb_t		*cb;
+	flow_setup_cb_t		*cb;
 	void			*cb_ident;
 	void			*cb_priv;
 	void			(*release)(void *cb_priv);
 	unsigned int		refcnt;
 };
 
-struct flow_block_cb *flow_block_cb_alloc(tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv));
 void flow_block_cb_free(struct flow_block_cb *block_cb);
 
 struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload,
-					   tc_setup_cb_t *cb, void *cb_ident);
+					   flow_setup_cb_t *cb, void *cb_ident);
 
 void *flow_block_cb_priv(struct flow_block_cb *block_cb);
 void flow_block_cb_incref(struct flow_block_cb *block_cb);
@@ -295,11 +299,12 @@ static inline void flow_block_cb_remove(struct flow_block_cb *block_cb,
 	list_move(&block_cb->list, &offload->cb_list);
 }
 
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
 			   struct list_head *driver_block_list);
 
 int flow_block_cb_setup_simple(struct flow_block_offload *f,
-			       struct list_head *driver_list, tc_setup_cb_t *cb,
+			       struct list_head *driver_list,
+			       flow_setup_cb_t *cb,
 			       void *cb_ident, void *cb_priv, bool ingress_only);
 
 enum flow_cls_command {
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 841faadceb6e..e429809ca90d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -6,7 +6,6 @@
 #include <linux/workqueue.h>
 #include <net/sch_generic.h>
 #include <net/act_api.h>
-#include <net/flow_offload.h>
 #include <net/net_namespace.h>
 
 /* TC action not accessible from user space */
@@ -126,14 +125,14 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 }
 
 static inline
-int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
+int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb,
 			       void *cb_priv)
 {
 	return 0;
 }
 
 static inline
-void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb,
+void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 				  void *cb_priv)
 {
 }
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 855167bbc372..9482e060483b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -15,6 +15,7 @@
 #include <linux/mutex.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
+#include <net/flow_offload.h>
 
 struct Qdisc_ops;
 struct qdisc_walker;
@@ -22,9 +23,6 @@ struct tcf_walker;
 struct module;
 struct bpf_flow_keys;
 
-typedef int tc_setup_cb_t(enum tc_setup_type type,
-			  void *type_data, void *cb_priv);
-
 typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
 				    enum tc_setup_type type, void *type_data);
 
@@ -313,7 +311,7 @@ struct tcf_proto_ops {
 	void			(*walk)(struct tcf_proto *tp,
 					struct tcf_walker *arg, bool rtnl_held);
 	int			(*reoffload)(struct tcf_proto *tp, bool add,
-					     tc_setup_cb_t *cb, void *cb_priv,
+					     flow_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
 	void			(*bind_class)(void *, u32, unsigned long);
 	void *			(*tmplt_create)(struct net *net,
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 507de4b48815..a800fa78d96c 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule,
 }
 EXPORT_SYMBOL(flow_rule_match_enc_opts);
 
-struct flow_block_cb *flow_block_cb_alloc(tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
 					  void *cb_ident, void *cb_priv,
 					  void (*release)(void *cb_priv))
 {
@@ -194,7 +194,7 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
 EXPORT_SYMBOL(flow_block_cb_free);
 
 struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
-					   tc_setup_cb_t *cb, void *cb_ident)
+					   flow_setup_cb_t *cb, void *cb_ident)
 {
 	struct flow_block_cb *block_cb;
 
@@ -226,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb)
 }
 EXPORT_SYMBOL(flow_block_cb_decref);
 
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
 			   struct list_head *driver_block_list)
 {
 	struct flow_block_cb *block_cb;
@@ -243,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy);
 
 int flow_block_cb_setup_simple(struct flow_block_offload *f,
 			       struct list_head *driver_block_list,
-			       tc_setup_cb_t *cb, void *cb_ident, void *cb_priv,
+			       flow_setup_cb_t *cb,
+			       void *cb_ident, void *cb_priv,
 			       bool ingress_only)
 {
 	struct flow_block_cb *block_cb;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6ca9ec58f881..d697a64fb564 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -951,7 +951,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 				    struct flow_block_offload *f)
 {
 	struct flow_block_cb *block_cb;
-	tc_setup_cb_t *cb;
+	flow_setup_cb_t *cb;
 
 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		cb = dsa_slave_setup_tc_block_cb_ig;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d144233423c5..78f0f2815b8c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1514,7 +1514,7 @@ void tcf_block_put(struct tcf_block *block)
 EXPORT_SYMBOL(tcf_block_put);
 
 static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 			    void *cb_priv, bool add, bool offload_in_use,
 			    struct netlink_ext_ack *extack)
 {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 691f71830134..3f7a9c02b70c 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -651,7 +651,7 @@ skip:
 	}
 }
 
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			     void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_bpf_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 38d6e85693fc..054123742e32 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1800,7 +1800,7 @@ fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
 	return NULL;
 }
 
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block = tp->chain->block;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index a30d2f8feb32..455ea2793f9b 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -282,7 +282,7 @@ skip:
 	arg->count++;
 }
 
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			  void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct cls_mall_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index be9e46c77e8b..8614088edd1b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -1152,7 +1152,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
 }
 
 static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_cls_u32_offload cls_u32 = {};
@@ -1172,7 +1172,7 @@ static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
 }
 
 static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
-			       bool add, tc_setup_cb_t *cb, void *cb_priv,
+			       bool add, flow_setup_cb_t *cb, void *cb_priv,
 			       struct netlink_ext_ack *extack)
 {
 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -1213,7 +1213,7 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	return 0;
 }
 
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 			 void *cb_priv, struct netlink_ext_ack *extack)
 {
 	struct tc_u_common *tp_c = tp->data;
-- 
cgit v1.2.3


From 14bfb13f0ed525ed117b5d1f3e77e7c0a6be15de Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 19 Jul 2019 18:20:16 +0200
Subject: net: flow_offload: add flow_block structure and use it

This object stores the flow block callbacks that are attached to this
block. Update flow_block_cb_lookup() to take this new object.

This patch restores the block sharing feature.

Fixes: da3eeb904ff4 ("net: flow_offload: add list handling functions")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c    |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      |  8 +++++---
 drivers/net/ethernet/mscc/ocelot_flower.c           |  8 ++++----
 drivers/net/ethernet/mscc/ocelot_tc.c               |  2 +-
 drivers/net/ethernet/netronome/nfp/flower/offload.c |  5 +++--
 include/net/flow_offload.h                          | 12 +++++++++++-
 include/net/netfilter/nf_tables.h                   |  5 +++--
 include/net/sch_generic.h                           |  2 +-
 net/core/flow_offload.c                             |  6 +++---
 net/dsa/slave.c                                     |  2 +-
 net/netfilter/nf_tables_api.c                       |  2 +-
 net/netfilter/nf_tables_offload.c                   |  5 +++--
 net/sched/cls_api.c                                 | 10 +++++++---
 13 files changed, 44 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 2162412073c5..7f747cb1a4f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -752,7 +752,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 		if (!indr_priv)
 			return -ENOENT;
 
-		block_cb = flow_block_cb_lookup(f,
+		block_cb = flow_block_cb_lookup(f->block,
 						mlx5e_rep_indr_setup_block_cb,
 						indr_priv);
 		if (!block_cb)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 51cd0b6f1f3e..650638152bbc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1604,7 +1604,8 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 	bool register_block = false;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
 					mlxsw_sp);
 	if (!block_cb) {
 		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
@@ -1656,7 +1657,8 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 	struct flow_block_cb *block_cb;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block,
+					mlxsw_sp_setup_tc_block_cb_flower,
 					mlxsw_sp);
 	if (!block_cb)
 		return;
@@ -1717,7 +1719,7 @@ static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 	case FLOW_BLOCK_UNBIND:
 		mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
 						      f, ingress);
-		block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port);
+		block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 6a11aea8b186..59487d446a09 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -316,8 +316,8 @@ int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port,
 	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
 		return -EOPNOTSUPP;
 
-	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
-					port);
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
 	if (!block_cb) {
 		port_block = ocelot_port_block_create(port);
 		if (!port_block)
@@ -350,8 +350,8 @@ void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port,
 {
 	struct flow_block_cb *block_cb;
 
-	block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
-					port);
+	block_cb = flow_block_cb_lookup(f->block,
+					ocelot_setup_tc_block_cb_flower, port);
 	if (!block_cb)
 		return;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
index fba9512e9ca6..16a6db71ca5e 100644
--- a/drivers/net/ethernet/mscc/ocelot_tc.c
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -169,7 +169,7 @@ static int ocelot_setup_tc_block(struct ocelot_port *port,
 		list_add_tail(&block_cb->driver_list, f->driver_block_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, port);
+		block_cb = flow_block_cb_lookup(f->block, cb, port);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 93ab0db6c504..e209f150c5f2 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1327,7 +1327,8 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
 		list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb,
+		block_cb = flow_block_cb_lookup(f->block,
+						nfp_flower_setup_tc_block_cb,
 						repr);
 		if (!block_cb)
 			return -ENOENT;
@@ -1440,7 +1441,7 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 		if (!cb_priv)
 			return -ENOENT;
 
-		block_cb = flow_block_cb_lookup(f,
+		block_cb = flow_block_cb_lookup(f->block,
 						nfp_flower_setup_indr_block_cb,
 						cb_priv);
 		if (!block_cb)
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 23b299235baf..b16d21636d69 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -249,6 +249,10 @@ enum flow_block_binder_type {
 	FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
 };
 
+struct flow_block {
+	struct list_head cb_list;
+};
+
 struct netlink_ext_ack;
 
 struct flow_block_offload {
@@ -256,6 +260,7 @@ struct flow_block_offload {
 	enum flow_block_binder_type binder_type;
 	bool block_shared;
 	struct net *net;
+	struct flow_block *block;
 	struct list_head cb_list;
 	struct list_head *driver_block_list;
 	struct netlink_ext_ack *extack;
@@ -280,7 +285,7 @@ struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
 					  void (*release)(void *cb_priv));
 void flow_block_cb_free(struct flow_block_cb *block_cb);
 
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload,
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
 					   flow_setup_cb_t *cb, void *cb_ident);
 
 void *flow_block_cb_priv(struct flow_block_cb *block_cb);
@@ -337,4 +342,9 @@ flow_cls_offload_flow_rule(struct flow_cls_offload *flow_cmd)
 	return flow_cmd->rule;
 }
 
+static inline void flow_block_init(struct flow_block *flow_block)
+{
+	INIT_LIST_HEAD(&flow_block->cb_list);
+}
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 35dfdd9f69b3..9b624566b82d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -11,6 +11,7 @@
 #include <linux/rhashtable.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netlink.h>
+#include <net/flow_offload.h>
 
 struct module;
 
@@ -951,7 +952,7 @@ struct nft_stats {
  *	@stats: per-cpu chain stats
  *	@chain: the chain
  *	@dev_name: device name that this base chain is attached to (if any)
- *	@cb_list: list of flow block callbacks (for hardware offload)
+ *	@flow_block: flow block (for hardware offload)
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops;
@@ -961,7 +962,7 @@ struct nft_base_chain {
 	struct nft_stats __percpu	*stats;
 	struct nft_chain		chain;
 	char 				dev_name[IFNAMSIZ];
-	struct list_head		cb_list;
+	struct flow_block		flow_block;
 };
 
 static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9482e060483b..6b6b01234dd9 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -399,7 +399,7 @@ struct tcf_block {
 	refcount_t refcnt;
 	struct net *net;
 	struct Qdisc *q;
-	struct list_head cb_list;
+	struct flow_block flow_block;
 	struct list_head owner_list;
 	bool keep_dst;
 	unsigned int offloadcnt; /* Number of oddloaded filters */
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index a800fa78d96c..d63b970784dc 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -193,12 +193,12 @@ void flow_block_cb_free(struct flow_block_cb *block_cb)
 }
 EXPORT_SYMBOL(flow_block_cb_free);
 
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
 					   flow_setup_cb_t *cb, void *cb_ident)
 {
 	struct flow_block_cb *block_cb;
 
-	list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
+	list_for_each_entry(block_cb, &block->cb_list, list) {
 		if (block_cb->cb == cb &&
 		    block_cb->cb_ident == cb_ident)
 			return block_cb;
@@ -268,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 		list_add_tail(&block_cb->driver_list, driver_block_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, cb_ident);
+		block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index d697a64fb564..33f41178afcc 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -975,7 +975,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 		list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
 		return 0;
 	case FLOW_BLOCK_UNBIND:
-		block_cb = flow_block_cb_lookup(f, cb, dev);
+		block_cb = flow_block_cb_lookup(f->block, cb, dev);
 		if (!block_cb)
 			return -ENOENT;
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 014e06b0b5cf..605a7cfe7ca7 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1662,7 +1662,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 		chain->flags |= NFT_BASE_CHAIN | flags;
 		basechain->policy = NF_ACCEPT;
-		INIT_LIST_HEAD(&basechain->cb_list);
+		flow_block_init(&basechain->flow_block);
 	} else {
 		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 		if (chain == NULL)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 2c3302845f67..64f5fd5f240e 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -116,7 +116,7 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
 	struct flow_block_cb *block_cb;
 	int err;
 
-	list_for_each_entry(block_cb, &basechain->cb_list, list) {
+	list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err < 0)
 			return err;
@@ -154,7 +154,7 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
 static int nft_flow_offload_bind(struct flow_block_offload *bo,
 				 struct nft_base_chain *basechain)
 {
-	list_splice(&bo->cb_list, &basechain->cb_list);
+	list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
 	return 0;
 }
 
@@ -198,6 +198,7 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
 		return -EOPNOTSUPP;
 
 	bo.command = cmd;
+	bo.block = &basechain->flow_block;
 	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
 	bo.extack = &extack;
 	INIT_LIST_HEAD(&bo.cb_list);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 78f0f2815b8c..15796fd47fda 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -691,6 +691,8 @@ static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
 	if (!indr_dev->block)
 		return;
 
+	bo.block = &indr_dev->block->flow_block;
+
 	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 			  &bo);
 	tcf_block_setup(indr_dev->block, &bo);
@@ -775,6 +777,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 		.command	= command,
 		.binder_type	= ei->binder_type,
 		.net		= dev_net(dev),
+		.block		= &block->flow_block,
 		.block_shared	= tcf_block_shared(block),
 		.extack		= extack,
 	};
@@ -810,6 +813,7 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 	bo.net = dev_net(dev);
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
+	bo.block = &block->flow_block;
 	bo.block_shared = tcf_block_shared(block);
 	bo.extack = extack;
 	INIT_LIST_HEAD(&bo.cb_list);
@@ -987,8 +991,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		return ERR_PTR(-ENOMEM);
 	}
 	mutex_init(&block->lock);
+	flow_block_init(&block->flow_block);
 	INIT_LIST_HEAD(&block->chain_list);
-	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
 	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
 
@@ -1570,7 +1574,7 @@ static int tcf_block_bind(struct tcf_block *block,
 
 		i++;
 	}
-	list_splice(&bo->cb_list, &block->cb_list);
+	list_splice(&bo->cb_list, &block->flow_block.cb_list);
 
 	return 0;
 
@@ -3156,7 +3160,7 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 	if (block->nooffloaddevcnt && err_stop)
 		return -EOPNOTSUPP;
 
-	list_for_each_entry(block_cb, &block->cb_list, list) {
+	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err) {
 			if (err_stop)
-- 
cgit v1.2.3


From 91046d6364afde646734c7ead1f649d253c386e9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 25 Jun 2019 10:04:51 +0200
Subject: nl80211: fix VENDOR_CMD_RAW_DATA

Since ERR_PTR() is an inline, not a macro, just open-code it
here so it's usable as an initializer, fixing the build in
brcmfmac.

Reported-by: Arend Van Spriel <arend.vanspriel@broadcom.com>
Fixes: 901bb9891855 ("nl80211: require and validate vendor command policy")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 88c27153a4bc..45850a8391d9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4170,7 +4170,7 @@ struct sta_opmode_info {
 	u8 rx_nss;
 };
 
-#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA))
+#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)(long)(-ENODATA))
 
 /**
  * struct wiphy_vendor_command - vendor command definition
-- 
cgit v1.2.3


From 5edaac063bbf1267260ad2a5b9bb803399343e58 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Thu, 27 Jun 2019 11:58:32 +0200
Subject: nl80211: fix NL80211_HE_MAX_CAPABILITY_LEN

NL80211_HE_MAX_CAPABILITY_LEN has changed between D2.0 and D4.0. It is now
MAC (6) + PHY (11) + MCS (12) + PPE (25) = 54.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190627095832.19445-1-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 75758ec26c8b..beb9a9d0c00a 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2863,7 +2863,7 @@ enum nl80211_attrs {
 #define NL80211_HT_CAPABILITY_LEN		26
 #define NL80211_VHT_CAPABILITY_LEN		12
 #define NL80211_HE_MIN_CAPABILITY_LEN           16
-#define NL80211_HE_MAX_CAPABILITY_LEN           51
+#define NL80211_HE_MAX_CAPABILITY_LEN           54
 #define NL80211_MAX_NR_CIPHER_SUITES		5
 #define NL80211_MAX_NR_AKM_SUITES		2
 
-- 
cgit v1.2.3


From 408d2bbbfd4687c435ee5d4967dbe95bc9be82ed Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Sun, 21 Jul 2019 12:31:05 +0100
Subject: kbuild: add net/netfilter/nf_tables_offload.h to header-test
 blacklist.

net/netfilter/nf_tables_offload.h includes net/netfilter/nf_tables.h
which is itself on the blacklist.

Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/Kbuild b/include/Kbuild
index 7e9f1acb9dd5..8de846a83d8f 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -909,6 +909,7 @@ header-test-			+= net/netfilter/nf_tables.h
 header-test-			+= net/netfilter/nf_tables_core.h
 header-test-			+= net/netfilter/nf_tables_ipv4.h
 header-test-			+= net/netfilter/nf_tables_ipv6.h
+header-test-			+= net/netfilter/nf_tables_offload.h
 header-test-			+= net/netfilter/nft_fib.h
 header-test-			+= net/netfilter/nft_meta.h
 header-test-			+= net/netfilter/nft_reject.h
-- 
cgit v1.2.3


From 903e9d1bffb557220af276eda97b9d6b103ec9e0 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Thu, 18 Jul 2019 07:26:46 +0300
Subject: connector: remove redundant input callback from cn_dev

A small cleanup: this callback is never used.
Originally fixed by Stanislav Kinsburskiy <skinsbursky@virtuozzo.com>
for OpenVZ7 bug OVZ-6877

cc: stanislav.kinsburskiy@gmail.com
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 6 +-----
 include/linux/connector.h     | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 23553ed6b548..2d22d6bf52f2 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -248,16 +248,12 @@ static int __maybe_unused cn_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct cn_dev cdev = {
-	.input   = cn_rx_skb,
-};
-
 static int cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= CN_NETLINK_USERS + 0xf,
-		.input	= dev->input,
+		.input	= cn_rx_skb,
 	};
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 6b6c7396a584..cb732643471b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -50,7 +50,6 @@ struct cn_dev {
 
 	u32 seq, groups;
 	struct sock *nls;
-	void (*input) (struct sk_buff *skb);
 
 	struct cn_queue_dev *cbdev;
 };
-- 
cgit v1.2.3


From c9b07eab0c8760bdd4cf8624c482ee145a322a3b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Jul 2019 13:22:27 -0400
Subject: audit_inode(): switch to passing AUDIT_INODE_...

don't bother with remapping LOOKUP_... values - all callers pass
constants and we can just as well pass the right ones from the
very beginning.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            |  6 +++---
 include/linux/audit.h | 20 +++++++-------------
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 5b8c72dc0217..3fca26398bc2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2391,7 +2391,7 @@ static struct filename *filename_parentat(int dfd, struct filename *name,
 	if (likely(!retval)) {
 		*last = nd.last;
 		*type = nd.last_type;
-		audit_inode(name, parent->dentry, LOOKUP_PARENT);
+		audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
 	} else {
 		putname(name);
 		name = ERR_PTR(retval);
@@ -2718,7 +2718,7 @@ filename_mountpoint(int dfd, struct filename *name, struct path *path,
 	if (unlikely(error == -ESTALE))
 		error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
 	if (likely(!error))
-		audit_inode(name, path->dentry, LOOKUP_NO_EVAL);
+		audit_inode(name, path->dentry, AUDIT_INODE_NOEVAL);
 	restore_nameidata();
 	putname(name);
 	return error;
@@ -3299,7 +3299,7 @@ static int do_last(struct nameidata *nd,
 		if (error)
 			return error;
 
-		audit_inode(nd->name, dir, LOOKUP_PARENT);
+		audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
 		/* trailing slashes? */
 		if (unlikely(nd->last.name[nd->last.len]))
 			return -EISDIR;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 97d0925454df..543763ab0354 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -252,6 +252,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
 #define audit_is_compat(arch)  false
 #endif
 
+#define AUDIT_INODE_PARENT	1	/* dentry represents the parent */
+#define AUDIT_INODE_HIDDEN	2	/* audit record should be hidden */
+#define AUDIT_INODE_NOEVAL	4	/* audit record incomplete */
+
 #ifdef CONFIG_AUDITSYSCALL
 #include <asm/syscall.h> /* for syscall_get_arch() */
 
@@ -265,9 +269,6 @@ extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern struct filename *__audit_reusename(const __user char *uptr);
 extern void __audit_getname(struct filename *name);
 
-#define AUDIT_INODE_PARENT	1	/* dentry represents the parent */
-#define AUDIT_INODE_HIDDEN	2	/* audit record should be hidden */
-#define AUDIT_INODE_NOEVAL	4	/* audit record incomplete */
 extern void __audit_inode(struct filename *name, const struct dentry *dentry,
 				unsigned int flags);
 extern void __audit_file(const struct file *);
@@ -328,16 +329,9 @@ static inline void audit_getname(struct filename *name)
 }
 static inline void audit_inode(struct filename *name,
 				const struct dentry *dentry,
-				unsigned int flags) {
-	if (unlikely(!audit_dummy_context())) {
-		unsigned int aflags = 0;
-
-		if (flags & LOOKUP_PARENT)
-			aflags |= AUDIT_INODE_PARENT;
-		if (flags & LOOKUP_NO_EVAL)
-			aflags |= AUDIT_INODE_NOEVAL;
+				unsigned int aflags) {
+	if (unlikely(!audit_dummy_context()))
 		__audit_inode(name, dentry, aflags);
-	}
 }
 static inline void audit_file(struct file *file)
 {
@@ -561,7 +555,7 @@ static inline void __audit_inode_child(struct inode *parent,
 { }
 static inline void audit_inode(struct filename *name,
 				const struct dentry *dentry,
-				unsigned int parent)
+				unsigned int aflags)
 { }
 static inline void audit_file(struct file *file)
 {
-- 
cgit v1.2.3


From b617158dc096709d8600c53b6052144d12b89fab Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 19 Jul 2019 11:52:33 -0700
Subject: tcp: be more careful in tcp_fragment()

Some applications set tiny SO_SNDBUF values and expect
TCP to just work. Recent patches to address CVE-2019-11478
broke them in case of losses, since retransmits might
be prevented.

We should allow these flows to make progress.

This patch allows the first and last skb in retransmit queue
to be split even if memory limits are hit.

It also adds the some room due to the fact that tcp_sendmsg()
and tcp_sendpage() might overshoot sk_wmem_queued by about one full
TSO skb (64KB size). Note this allowance was already present
in stable backports for kernels < 4.15

Note for < 4.15 backports :
 tcp_rtx_queue_tail() will probably look like :

static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
{
	struct sk_buff *skb = tcp_send_head(sk);

	return skb ? tcp_write_queue_prev(sk, skb) : tcp_write_queue_tail(sk);
}

Fixes: f070ef2ac667 ("tcp: tcp_fragment() should apply sane memory limits")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrew Prout <aprout@ll.mit.edu>
Tested-by: Andrew Prout <aprout@ll.mit.edu>
Tested-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Tested-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Christoph Paasch <cpaasch@apple.com>
Cc: Jonathan Looney <jtl@netflix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  5 +++++
 net/ipv4/tcp_output.c | 13 +++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f42d300f0cfa..e5cf514ba118 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1709,6 +1709,11 @@ static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
 	return skb_rb_first(&sk->tcp_rtx_queue);
 }
 
+static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
+{
+	return skb_rb_last(&sk->tcp_rtx_queue);
+}
+
 static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
 {
 	return skb_peek(&sk->sk_write_queue);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4af1f5dae9d3..6e4afc48d7bb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1288,6 +1288,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *buff;
 	int nsize, old_factor;
+	long limit;
 	int nlen;
 	u8 flags;
 
@@ -1298,8 +1299,16 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	if (nsize < 0)
 		nsize = 0;
 
-	if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
-		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+	/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+	 * We need some allowance to not penalize applications setting small
+	 * SO_SNDBUF values.
+	 * Also allow first and last skb in retransmit queue to be split.
+	 */
+	limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+	if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+		     tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+		     skb != tcp_rtx_queue_head(sk) &&
+		     skb != tcp_rtx_queue_tail(sk))) {
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3


From 893a1c97205a3ece0cbb3f571a3b972080f3b4c7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 16 Jul 2019 13:55:23 -0600
Subject: blk-mq: allow REQ_NOWAIT to return an error inline

By default, if a caller sets REQ_NOWAIT and we need to block, we'll
return -EAGAIN through the bio->bi_end_io() callback. For some use
cases, this makes it hard to use.

Allow a caller to ask for inline return of errors related to
blocking by also setting REQ_NOWAIT_INLINE.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            | 8 ++++++--
 include/linux/blk_types.h | 5 ++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..2bc2c0705660 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1960,9 +1960,13 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, &data);
 	if (unlikely(!rq)) {
 		rq_qos_cleanup(q, bio);
-		if (bio->bi_opf & REQ_NOWAIT)
+
+		cookie = BLK_QC_T_NONE;
+		if (bio->bi_opf & REQ_NOWAIT_INLINE)
+			cookie = BLK_QC_T_EAGAIN;
+		else if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
-		return BLK_QC_T_NONE;
+		return cookie;
 	}
 
 	trace_block_getrq(q, bio, bio->bi_opf);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index feff3fe4467e..1b1fa1557e68 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,6 +311,7 @@ enum req_flag_bits {
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NOWAIT,           /* Don't wait if request will block */
+	__REQ_NOWAIT_INLINE,	/* Return would-block error inline */
 	/*
 	 * When a shared kthread needs to issue a bio for a cgroup, doing
 	 * so synchronously can lead to priority inversions as the kthread
@@ -345,6 +346,7 @@ enum req_flag_bits {
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
+#define REQ_NOWAIT_INLINE	(1ULL << __REQ_NOWAIT_INLINE)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
@@ -418,12 +420,13 @@ static inline int op_stat_group(unsigned int op)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
+#define BLK_QC_T_EAGAIN		-2U
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
-	return cookie != BLK_QC_T_NONE;
+	return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
-- 
cgit v1.2.3


From 5d4b45a1dd7b00feab57624035dcdbc1bab2e0f8 Mon Sep 17 00:00:00 2001
From: Markus Koch <markus@notsyncing.net>
Date: Sun, 21 Jul 2019 20:20:28 +0300
Subject: Input: add support for the FlySky FS-iA6B RC receiver

This patch adds support for the FlySky FS-iA6B RC receiver (serial IBUS).

It allows the usage of the FlySky FS-i6 and other AFHDS compliant remote
controls as a joystick input device.

To use it, a patch to inputattach which adds the FS-iA6B as a 115200 baud
serial device is required. I will upstream it after this patch is merged.

More information about the hardware can be found here:

https://notsyncing.net/?p=blog&b=2018.linux-fsia6b

Signed-off-by: Markus Koch <markus@notsyncing.net>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 MAINTAINERS                     |   6 ++
 drivers/input/joystick/Kconfig  |  10 ++
 drivers/input/joystick/Makefile |   5 +-
 drivers/input/joystick/fsia6b.c | 231 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serio.h      |   1 +
 5 files changed, 251 insertions(+), 2 deletions(-)
 create mode 100644 drivers/input/joystick/fsia6b.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 677ef41cb012..cdd25b0a1218 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12394,6 +12394,12 @@ S:	Maintained
 F:	Documentation/input/devices/pxrc.rst
 F:	drivers/input/joystick/pxrc.c
 
+FLYSKY FSIA6B RC RECEIVER
+M:	Markus Koch <markus@notsyncing.net>
+L:	linux-input@vger.kernel.org
+S:	Maintained
+F:	drivers/input/joystick/fsia6b.c
+
 PHONET PROTOCOL
 M:	Remi Denis-Courmont <courmisch@gmail.com>
 S:	Supported
diff --git a/drivers/input/joystick/Kconfig b/drivers/input/joystick/Kconfig
index 72b932901d00..312b854b5506 100644
--- a/drivers/input/joystick/Kconfig
+++ b/drivers/input/joystick/Kconfig
@@ -362,4 +362,14 @@ config JOYSTICK_PXRC
 	  To compile this driver as a module, choose M here: the
 	  module will be called pxrc.
 
+config JOYSTICK_FSIA6B
+	tristate "FlySky FS-iA6B RC Receiver"
+	select SERIO
+	help
+	  Say Y here if you use a FlySky FS-i6 RC remote control along with the
+	  FS-iA6B RC receiver as a joystick input device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called fsia6b.
+
 endif
diff --git a/drivers/input/joystick/Makefile b/drivers/input/joystick/Makefile
index dd0492ebbed7..8656023f6ef5 100644
--- a/drivers/input/joystick/Makefile
+++ b/drivers/input/joystick/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_JOYSTICK_AS5011)		+= as5011.o
 obj-$(CONFIG_JOYSTICK_ANALOG)		+= analog.o
 obj-$(CONFIG_JOYSTICK_COBRA)		+= cobra.o
 obj-$(CONFIG_JOYSTICK_DB9)		+= db9.o
+obj-$(CONFIG_JOYSTICK_FSIA6B)		+= fsia6b.o
 obj-$(CONFIG_JOYSTICK_GAMECON)		+= gamecon.o
 obj-$(CONFIG_JOYSTICK_GF2K)		+= gf2k.o
 obj-$(CONFIG_JOYSTICK_GRIP)		+= grip.o
@@ -23,7 +24,7 @@ obj-$(CONFIG_JOYSTICK_JOYDUMP)		+= joydump.o
 obj-$(CONFIG_JOYSTICK_MAGELLAN)		+= magellan.o
 obj-$(CONFIG_JOYSTICK_MAPLE)		+= maplecontrol.o
 obj-$(CONFIG_JOYSTICK_PSXPAD_SPI)	+= psxpad-spi.o
-obj-$(CONFIG_JOYSTICK_PXRC)			+= pxrc.o
+obj-$(CONFIG_JOYSTICK_PXRC)		+= pxrc.o
 obj-$(CONFIG_JOYSTICK_SIDEWINDER)	+= sidewinder.o
 obj-$(CONFIG_JOYSTICK_SPACEBALL)	+= spaceball.o
 obj-$(CONFIG_JOYSTICK_SPACEORB)		+= spaceorb.o
@@ -32,7 +33,7 @@ obj-$(CONFIG_JOYSTICK_TMDC)		+= tmdc.o
 obj-$(CONFIG_JOYSTICK_TURBOGRAFX)	+= turbografx.o
 obj-$(CONFIG_JOYSTICK_TWIDJOY)		+= twidjoy.o
 obj-$(CONFIG_JOYSTICK_WARRIOR)		+= warrior.o
+obj-$(CONFIG_JOYSTICK_WALKERA0701)	+= walkera0701.o
 obj-$(CONFIG_JOYSTICK_XPAD)		+= xpad.o
 obj-$(CONFIG_JOYSTICK_ZHENHUA)		+= zhenhua.o
-obj-$(CONFIG_JOYSTICK_WALKERA0701)	+= walkera0701.o
 
diff --git a/drivers/input/joystick/fsia6b.c b/drivers/input/joystick/fsia6b.c
new file mode 100644
index 000000000000..e78c4c768990
--- /dev/null
+++ b/drivers/input/joystick/fsia6b.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FS-iA6B iBus RC receiver driver
+ *
+ * This driver provides all 14 channels of the FlySky FS-ia6B RC receiver
+ * as analog values.
+ *
+ * Additionally, the channels can be converted to discrete switch values.
+ * By default, it is configured for the offical FS-i6 remote control.
+ * If you use a different hardware configuration, you can configure it
+ * using the `switch_config` parameter.
+ */
+
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define DRIVER_DESC		"FS-iA6B iBus RC receiver"
+
+MODULE_AUTHOR("Markus Koch <markus@notsyncing.net>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+#define IBUS_SERVO_COUNT	14
+
+static char *switch_config = "00000022320000";
+module_param(switch_config, charp, 0444);
+MODULE_PARM_DESC(switch_config,
+		 "Amount of switch positions per channel (14 characters, 0-3)");
+
+static int fsia6b_axes[IBUS_SERVO_COUNT] = {
+	ABS_X, ABS_Y,
+	ABS_Z, ABS_RX,
+	ABS_RY, ABS_RZ,
+	ABS_HAT0X, ABS_HAT0Y,
+	ABS_HAT1X, ABS_HAT1Y,
+	ABS_HAT2X, ABS_HAT2Y,
+	ABS_HAT3X, ABS_HAT3Y
+};
+
+enum ibus_state { SYNC, COLLECT, PROCESS };
+
+struct ibus_packet {
+	enum ibus_state state;
+
+	int offset;
+	u16 ibuf;
+	u16 channel[IBUS_SERVO_COUNT];
+};
+
+struct fsia6b {
+	struct input_dev *dev;
+	struct ibus_packet packet;
+
+	char phys[32];
+};
+
+static irqreturn_t fsia6b_serio_irq(struct serio *serio,
+				    unsigned char data, unsigned int flags)
+{
+	struct fsia6b *fsia6b = serio_get_drvdata(serio);
+	int i;
+	int sw_state;
+	int sw_id = BTN_0;
+
+	fsia6b->packet.ibuf = (data << 8) | ((fsia6b->packet.ibuf >> 8) & 0xFF);
+
+	switch (fsia6b->packet.state) {
+	case SYNC:
+		if (fsia6b->packet.ibuf == 0x4020)
+			fsia6b->packet.state = COLLECT;
+		break;
+
+	case COLLECT:
+		fsia6b->packet.state = PROCESS;
+		break;
+
+	case PROCESS:
+		fsia6b->packet.channel[fsia6b->packet.offset] =
+				fsia6b->packet.ibuf;
+		fsia6b->packet.offset++;
+
+		if (fsia6b->packet.offset == IBUS_SERVO_COUNT) {
+			fsia6b->packet.offset = 0;
+			fsia6b->packet.state = SYNC;
+			for (i = 0; i < IBUS_SERVO_COUNT; ++i) {
+				input_report_abs(fsia6b->dev, fsia6b_axes[i],
+						 fsia6b->packet.channel[i]);
+
+				sw_state = 0;
+				if (fsia6b->packet.channel[i] > 1900)
+					sw_state = 1;
+				else if (fsia6b->packet.channel[i] < 1100)
+					sw_state = 2;
+
+				switch (switch_config[i]) {
+				case '3':
+					input_report_key(fsia6b->dev,
+							 sw_id++,
+							 sw_state == 0);
+					/* fall-through */
+				case '2':
+					input_report_key(fsia6b->dev,
+							 sw_id++,
+							 sw_state == 1);
+					/* fall-through */
+				case '1':
+					input_report_key(fsia6b->dev,
+							 sw_id++,
+							 sw_state == 2);
+				}
+			}
+			input_sync(fsia6b->dev);
+		} else {
+			fsia6b->packet.state = COLLECT;
+		}
+		break;
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int fsia6b_serio_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct fsia6b *fsia6b;
+	struct input_dev *input_dev;
+	int err;
+	int i, j;
+	int sw_id = 0;
+
+	fsia6b = kzalloc(sizeof(*fsia6b), GFP_KERNEL);
+	if (!fsia6b)
+		return -ENOMEM;
+
+	fsia6b->packet.ibuf = 0;
+	fsia6b->packet.offset = 0;
+	fsia6b->packet.state = SYNC;
+
+	serio_set_drvdata(serio, fsia6b);
+
+	input_dev = input_allocate_device();
+	if (!input_dev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+	fsia6b->dev = input_dev;
+
+	snprintf(fsia6b->phys, sizeof(fsia6b->phys), "%s/input0", serio->phys);
+
+	input_dev->name = DRIVER_DESC;
+	input_dev->phys = fsia6b->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor = SERIO_FSIA6B;
+	input_dev->id.product = serio->id.id;
+	input_dev->id.version = 0x0100;
+	input_dev->dev.parent = &serio->dev;
+
+	for (i = 0; i < IBUS_SERVO_COUNT; i++)
+		input_set_abs_params(input_dev, fsia6b_axes[i],
+				     1000, 2000, 2, 2);
+
+	/* Register switch configuration */
+	for (i = 0; i < IBUS_SERVO_COUNT; i++) {
+		if (switch_config[i] < '0' || switch_config[i] > '3') {
+			dev_err(&fsia6b->dev->dev,
+				"Invalid switch configuration supplied for fsia6b.\n");
+			err = -EINVAL;
+			goto fail2;
+		}
+
+		for (j = '1'; j <= switch_config[i]; j++) {
+			input_set_capability(input_dev, EV_KEY, BTN_0 + sw_id);
+			sw_id++;
+		}
+	}
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto fail2;
+
+	err = input_register_device(fsia6b->dev);
+	if (err)
+		goto fail3;
+
+	return 0;
+
+fail3:	serio_close(serio);
+fail2:	input_free_device(input_dev);
+fail1:	serio_set_drvdata(serio, NULL);
+	kfree(fsia6b);
+	return err;
+}
+
+static void fsia6b_serio_disconnect(struct serio *serio)
+{
+	struct fsia6b *fsia6b = serio_get_drvdata(serio);
+
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_unregister_device(fsia6b->dev);
+	kfree(fsia6b);
+}
+
+static const struct serio_device_id fsia6b_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_FSIA6B,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, fsia6b_serio_ids);
+
+static struct serio_driver fsia6b_serio_drv = {
+	.driver		= {
+		.name	= "fsia6b"
+	},
+	.description	= DRIVER_DESC,
+	.id_table	= fsia6b_serio_ids,
+	.interrupt	= fsia6b_serio_irq,
+	.connect	= fsia6b_serio_connect,
+	.disconnect	= fsia6b_serio_disconnect
+};
+
+module_serio_driver(fsia6b_serio_drv)
diff --git a/include/uapi/linux/serio.h b/include/uapi/linux/serio.h
index a0cac1d8670d..50e991952c97 100644
--- a/include/uapi/linux/serio.h
+++ b/include/uapi/linux/serio.h
@@ -82,5 +82,6 @@
 #define SERIO_EGALAX	0x3f
 #define SERIO_PULSE8_CEC	0x40
 #define SERIO_RAINSHADOW_CEC	0x41
+#define SERIO_FSIA6B	0x42
 
 #endif /* _UAPI_SERIO_H */
-- 
cgit v1.2.3


From f9adc23ee91e6f561bb70c6147d8d45bd164d62f Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 16 Jul 2019 09:22:03 +0300
Subject: futex: Cleanup generic SMP variant of arch_futex_atomic_op_inuser()

The generic SMP variant of arch_futex_atomic_op_inuser() returns always
-ENOSYS so the switch case and surrounding code are pointless. Remove it
and just return -ENOSYS.

Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/12bdaca8-99eb-e576-f842-5970ab1d6a92@virtuozzo.com
---
 include/asm-generic/futex.h | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 8666fe7f35d7..02970b11f71f 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -118,26 +118,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
 static inline int
 arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
 {
-	int oldval = 0, ret;
-
-	pagefault_disable();
-
-	switch (op) {
-	case FUTEX_OP_SET:
-	case FUTEX_OP_ADD:
-	case FUTEX_OP_OR:
-	case FUTEX_OP_ANDN:
-	case FUTEX_OP_XOR:
-	default:
-		ret = -ENOSYS;
-	}
-
-	pagefault_enable();
-
-	if (!ret)
-		*oval = oldval;
-
-	return ret;
+	return -ENOSYS;
 }
 
 static inline int
-- 
cgit v1.2.3


From f240652b6032b48ad7fa35c5e701cc4c8d697c0b Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 5 Jul 2019 10:53:21 -0700
Subject: x86/mpx: Remove MPX APIs

MPX is being removed from the kernel due to a lack of support in the
toolchain going forward (gcc).

The first step is to remove the userspace-visible ABIs so that applications
will stop using it.  The most visible one are the enable/disable prctl()s.
Remove them first.

This is the most minimal and least invasive change needed to ensure that
apps stop using MPX with new kernels.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190705175321.DB42F0AD@viggo.jf.intel.com
---
 include/uapi/linux/prctl.h |  2 +-
 kernel/sys.c               | 16 ++--------------
 2 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 094bb03b9cc2..961e0a4a0f73 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -181,7 +181,7 @@ struct prctl_mm_map {
 #define PR_GET_THP_DISABLE	42
 
 /*
- * Tell the kernel to start/stop helping userspace manage bounds tables.
+ * No longer implemented, but left here to ensure the numbers stay reserved:
  */
 #define PR_MPX_ENABLE_MANAGEMENT  43
 #define PR_MPX_DISABLE_MANAGEMENT 44
diff --git a/kernel/sys.c b/kernel/sys.c
index 2969304c29fe..384b000b7865 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -103,12 +103,6 @@
 #ifndef SET_TSC_CTL
 # define SET_TSC_CTL(a)		(-EINVAL)
 #endif
-#ifndef MPX_ENABLE_MANAGEMENT
-# define MPX_ENABLE_MANAGEMENT()	(-EINVAL)
-#endif
-#ifndef MPX_DISABLE_MANAGEMENT
-# define MPX_DISABLE_MANAGEMENT()	(-EINVAL)
-#endif
 #ifndef GET_FP_MODE
 # define GET_FP_MODE(a)		(-EINVAL)
 #endif
@@ -2456,15 +2450,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		up_write(&me->mm->mmap_sem);
 		break;
 	case PR_MPX_ENABLE_MANAGEMENT:
-		if (arg2 || arg3 || arg4 || arg5)
-			return -EINVAL;
-		error = MPX_ENABLE_MANAGEMENT();
-		break;
 	case PR_MPX_DISABLE_MANAGEMENT:
-		if (arg2 || arg3 || arg4 || arg5)
-			return -EINVAL;
-		error = MPX_DISABLE_MANAGEMENT();
-		break;
+		/* No longer implemented: */
+		return -EINVAL;
 	case PR_SET_FP_MODE:
 		error = SET_FP_MODE(me, arg2);
 		break;
-- 
cgit v1.2.3


From 6b0e42771795334bd24d089402d04d93bac048d3 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Sun, 21 Jul 2019 19:01:34 -0400
Subject: locks: revise generic_add_lease tracepoint

Now that check_conflicting_open uses inode->i_readcount instead of
the dentry->d_count to detect opens for read, revise the tracepoint
to display that value instead.

Also, fl is never NULL, so no need to check for that in the fast
assign section.

Cc: Amir Goldstein <amir73il@gmail.com>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 include/trace/events/filelock.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h
index 4b735923f2ff..c705e4944a50 100644
--- a/include/trace/events/filelock.h
+++ b/include/trace/events/filelock.h
@@ -176,7 +176,7 @@ TRACE_EVENT(generic_add_lease,
 	TP_STRUCT__entry(
 		__field(unsigned long, i_ino)
 		__field(int, wcount)
-		__field(int, dcount)
+		__field(int, rcount)
 		__field(int, icount)
 		__field(dev_t, s_dev)
 		__field(fl_owner_t, fl_owner)
@@ -188,16 +188,16 @@ TRACE_EVENT(generic_add_lease,
 		__entry->s_dev = inode->i_sb->s_dev;
 		__entry->i_ino = inode->i_ino;
 		__entry->wcount = atomic_read(&inode->i_writecount);
-		__entry->dcount = d_count(fl->fl_file->f_path.dentry);
+		__entry->rcount = atomic_read(&inode->i_readcount);
 		__entry->icount = atomic_read(&inode->i_count);
-		__entry->fl_owner = fl ? fl->fl_owner : NULL;
-		__entry->fl_flags = fl ? fl->fl_flags : 0;
-		__entry->fl_type = fl ? fl->fl_type : 0;
+		__entry->fl_owner = fl->fl_owner;
+		__entry->fl_flags = fl->fl_flags;
+		__entry->fl_type = fl->fl_type;
 	),
 
-	TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d dcount=%d icount=%d fl_owner=0x%p fl_flags=%s fl_type=%s",
+	TP_printk("dev=0x%x:0x%x ino=0x%lx wcount=%d rcount=%d icount=%d fl_owner=0x%p fl_flags=%s fl_type=%s",
 		MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
-		__entry->i_ino, __entry->wcount, __entry->dcount,
+		__entry->i_ino, __entry->wcount, __entry->rcount,
 		__entry->icount, __entry->fl_owner,
 		show_fl_flags(__entry->fl_flags),
 		show_fl_type(__entry->fl_type))
-- 
cgit v1.2.3


From 62ec3d13601bd626ca7a0edef6d45dbb753d94e8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 21 Jul 2019 23:23:08 +0900
Subject: ASoC: SOF: use __u32 instead of uint32_t in uapi headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_UAPI_HEADER_TEST=y, exported headers are compile-tested to
make sure they can be included from user-space.

Currently, header.h and fw.h are excluded from the test coverage.
To make them join the compile-test, we need to fix the build errors
attached below.

For a case like this, we decided to use __u{8,16,32,64} variable types
in this discussion:

  https://lkml.org/lkml/2019/6/5/18

Build log:

  CC      usr/include/sound/sof/header.h.s
  CC      usr/include/sound/sof/fw.h.s
In file included from <command-line>:32:0:
./usr/include/sound/sof/header.h:19:2: error: unknown type name ‘uint32_t’
  uint32_t magic;  /**< 'S', 'O', 'F', '\0' */
  ^~~~~~~~
./usr/include/sound/sof/header.h:20:2: error: unknown type name ‘uint32_t’
  uint32_t type;  /**< component specific type */
  ^~~~~~~~
./usr/include/sound/sof/header.h:21:2: error: unknown type name ‘uint32_t’
  uint32_t size;  /**< size in bytes of data excl. this struct */
  ^~~~~~~~
./usr/include/sound/sof/header.h:22:2: error: unknown type name ‘uint32_t’
  uint32_t abi;  /**< SOF ABI version */
  ^~~~~~~~
./usr/include/sound/sof/header.h:23:2: error: unknown type name ‘uint32_t’
  uint32_t reserved[4]; /**< reserved for future use */
  ^~~~~~~~
./usr/include/sound/sof/header.h:24:2: error: unknown type name ‘uint32_t’
  uint32_t data[0]; /**< Component data - opaque to core */
  ^~~~~~~~
In file included from <command-line>:32:0:
./usr/include/sound/sof/fw.h:49:2: error: unknown type name ‘uint32_t’
  uint32_t size;  /* bytes minus this header */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:50:2: error: unknown type name ‘uint32_t’
  uint32_t offset; /* offset from base */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:64:2: error: unknown type name ‘uint32_t’
  uint32_t size;  /* bytes minus this header */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:65:2: error: unknown type name ‘uint32_t’
  uint32_t num_blocks; /* number of blocks */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:73:2: error: unknown type name ‘uint32_t’
  uint32_t file_size; /* size of file minus this header */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:74:2: error: unknown type name ‘uint32_t’
  uint32_t num_modules; /* number of modules */
  ^~~~~~~~
./usr/include/sound/sof/fw.h:75:2: error: unknown type name ‘uint32_t’
  uint32_t abi;  /* version of header format */
  ^~~~~~~~

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://lore.kernel.org/r/20190721142308.30306-1-yamada.masahiro@socionext.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/uapi/sound/sof/fw.h     | 16 +++++++++-------
 include/uapi/sound/sof/header.h | 14 ++++++++------
 2 files changed, 17 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/uapi/sound/sof/fw.h b/include/uapi/sound/sof/fw.h
index 1afca973eb09..e9f697467a86 100644
--- a/include/uapi/sound/sof/fw.h
+++ b/include/uapi/sound/sof/fw.h
@@ -13,6 +13,8 @@
 #ifndef __INCLUDE_UAPI_SOF_FW_H__
 #define __INCLUDE_UAPI_SOF_FW_H__
 
+#include <linux/types.h>
+
 #define SND_SOF_FW_SIG_SIZE	4
 #define SND_SOF_FW_ABI		1
 #define SND_SOF_FW_SIG		"Reef"
@@ -46,8 +48,8 @@ enum snd_sof_fw_blk_type {
 
 struct snd_sof_blk_hdr {
 	enum snd_sof_fw_blk_type type;
-	uint32_t size;		/* bytes minus this header */
-	uint32_t offset;	/* offset from base */
+	__u32 size;		/* bytes minus this header */
+	__u32 offset;		/* offset from base */
 } __packed;
 
 /*
@@ -61,8 +63,8 @@ enum snd_sof_fw_mod_type {
 
 struct snd_sof_mod_hdr {
 	enum snd_sof_fw_mod_type type;
-	uint32_t size;		/* bytes minus this header */
-	uint32_t num_blocks;	/* number of blocks */
+	__u32 size;		/* bytes minus this header */
+	__u32 num_blocks;	/* number of blocks */
 } __packed;
 
 /*
@@ -70,9 +72,9 @@ struct snd_sof_mod_hdr {
  */
 struct snd_sof_fw_header {
 	unsigned char sig[SND_SOF_FW_SIG_SIZE]; /* "Reef" */
-	uint32_t file_size;	/* size of file minus this header */
-	uint32_t num_modules;	/* number of modules */
-	uint32_t abi;		/* version of header format */
+	__u32 file_size;	/* size of file minus this header */
+	__u32 num_modules;	/* number of modules */
+	__u32 abi;		/* version of header format */
 } __packed;
 
 #endif
diff --git a/include/uapi/sound/sof/header.h b/include/uapi/sound/sof/header.h
index 7868990b0d6f..5f4518e7a972 100644
--- a/include/uapi/sound/sof/header.h
+++ b/include/uapi/sound/sof/header.h
@@ -9,6 +9,8 @@
 #ifndef __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__
 #define __INCLUDE_UAPI_SOUND_SOF_USER_HEADER_H__
 
+#include <linux/types.h>
+
 /*
  * Header for all non IPC ABI data.
  *
@@ -16,12 +18,12 @@
  * Used by any bespoke component data structures or binary blobs.
  */
 struct sof_abi_hdr {
-	uint32_t magic;		/**< 'S', 'O', 'F', '\0' */
-	uint32_t type;		/**< component specific type */
-	uint32_t size;		/**< size in bytes of data excl. this struct */
-	uint32_t abi;		/**< SOF ABI version */
-	uint32_t reserved[4];	/**< reserved for future use */
-	uint32_t data[0];	/**< Component data - opaque to core */
+	__u32 magic;		/**< 'S', 'O', 'F', '\0' */
+	__u32 type;		/**< component specific type */
+	__u32 size;		/**< size in bytes of data excl. this struct */
+	__u32 abi;		/**< SOF ABI version */
+	__u32 reserved[4];	/**< reserved for future use */
+	__u32 data[0];		/**< Component data - opaque to core */
 }  __packed;
 
 #endif
-- 
cgit v1.2.3


From cfc8f568aada98f9608a0a62511ca18d647613e2 Mon Sep 17 00:00:00 2001
From: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
Date: Fri, 19 Jul 2019 10:05:30 +0000
Subject: ASoC: Define a set of DAPM pre/post-up events

Prepare to use SND_SOC_DAPM_PRE_POST_PMU definition to
reduce coming code size and make it more readable.

Cc: stable@vger.kernel.org
Signed-off-by: Oleksandr Suvorov <oleksandr.suvorov@toradex.com>
Reviewed-by: Marcel Ziswiler <marcel.ziswiler@toradex.com>
Reviewed-by: Igor Opaniuk <igor.opaniuk@toradex.com>
Reviewed-by: Fabio Estevam <festevam@gmail.com>
Link: https://lore.kernel.org/r/20190719100524.23300-2-oleksandr.suvorov@toradex.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index c00a0b8ade08..6c6694160130 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -353,6 +353,8 @@ struct device;
 #define SND_SOC_DAPM_WILL_PMD   0x80    /* called at start of sequence */
 #define SND_SOC_DAPM_PRE_POST_PMD \
 				(SND_SOC_DAPM_PRE_PMD | SND_SOC_DAPM_POST_PMD)
+#define SND_SOC_DAPM_PRE_POST_PMU \
+				(SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU)
 
 /* convenience event type detection */
 #define SND_SOC_DAPM_EVENT_ON(e)	\
-- 
cgit v1.2.3


From 318892ac068397f40ff81d9155898da01493b1d2 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 19 Jul 2019 10:29:14 -0700
Subject: net/tls: don't arm strparser immediately in tls_set_sw_offload()

In tls_set_device_offload_rx() we prepare the software context
for RX fallback and proceed to add the connection to the device.
Unfortunately, software context prep includes arming strparser
so in case of a later error we have to release the socket lock
to call strp_done().

In preparation for not releasing the socket lock half way through
callbacks move arming strparser into a separate function.
Following patches will make use of that.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h    |  1 +
 net/tls/tls_device.c |  1 +
 net/tls/tls_main.c   |  8 +++++---
 net/tls/tls_sw.c     | 19 ++++++++++++-------
 4 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 584609174fe0..43f551cd508b 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -355,6 +355,7 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 		  unsigned int optlen);
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7c0b2b778703..4d67d72f007c 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1045,6 +1045,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
 	rc = tls_set_sw_offload(sk, ctx, 0);
 	if (rc)
 		goto release_ctx;
+	tls_sw_strparser_arm(sk, ctx);
 
 	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
 					     &ctx->crypto_recv.info,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 4674e57e66b0..85a9d7d57b32 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -526,6 +526,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 		{
 #endif
 			rc = tls_set_sw_offload(sk, ctx, 1);
+			if (rc)
+				goto err_crypto_info;
 			conf = TLS_SW;
 		}
 	} else {
@@ -537,13 +539,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 		{
 #endif
 			rc = tls_set_sw_offload(sk, ctx, 0);
+			if (rc)
+				goto err_crypto_info;
+			tls_sw_strparser_arm(sk, ctx);
 			conf = TLS_SW;
 		}
 	}
 
-	if (rc)
-		goto err_crypto_info;
-
 	if (tx)
 		ctx->tx_conf = conf;
 	else
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 53b4ad94e74a..f58a8ffc2a9c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2160,6 +2160,18 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
 	}
 }
 
+void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_rx *rx_ctx = tls_sw_ctx_rx(tls_ctx);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	rx_ctx->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = tls_data_ready;
+	write_unlock_bh(&sk->sk_callback_lock);
+
+	strp_check_rcv(&rx_ctx->strp);
+}
+
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2357,13 +2369,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
 		cb.parse_msg = tls_read_size;
 
 		strp_init(&sw_ctx_rx->strp, sk, &cb);
-
-		write_lock_bh(&sk->sk_callback_lock);
-		sw_ctx_rx->saved_data_ready = sk->sk_data_ready;
-		sk->sk_data_ready = tls_data_ready;
-		write_unlock_bh(&sk->sk_callback_lock);
-
-		strp_check_rcv(&sw_ctx_rx->strp);
 	}
 
 	goto out;
-- 
cgit v1.2.3


From f87e62d45e51b12d48d2cb46b5cde8f83b866bc4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:16 -0700
Subject: net/tls: remove close callback sock unlock/lock around TX work flush

The tls close() callback currently drops the sock lock, makes a
cancel_delayed_work_sync() call, and then relocks the sock.

By restructuring the code we can avoid droping lock and then
reclaiming it. To simplify this we do the following,

 tls_sk_proto_close
 set_bit(CLOSING)
 set_bit(SCHEDULE)
 cancel_delay_work_sync() <- cancel workqueue
 lock_sock(sk)
 ...
 release_sock(sk)
 strp_done()

Setting the CLOSING bit prevents the SCHEDULE bit from being
cleared by any workqueue items e.g. if one happens to be
scheduled and run between when we set SCHEDULE bit and cancel
work. Then because SCHEDULE bit is set now no new work will
be scheduled.

Tested with net selftests and bpf selftests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h  |  2 ++
 net/tls/tls_main.c |  3 +++
 net/tls/tls_sw.c   | 24 +++++++++++++++++-------
 3 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 43f551cd508b..d4276cb6de53 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -162,6 +162,7 @@ struct tls_sw_context_tx {
 	int async_capable;
 
 #define BIT_TX_SCHEDULED	0
+#define BIT_TX_CLOSING		1
 	unsigned long tx_bitmask;
 };
 
@@ -360,6 +361,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7ab682ed99fa..5c29b410cf7d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -268,6 +268,9 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 	bool free_ctx = false;
 
+	if (ctx->tx_conf == TLS_SW)
+		tls_sw_cancel_work_tx(ctx);
+
 	lock_sock(sk);
 	sk_proto_close = ctx->sk_proto_close;
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index f58a8ffc2a9c..38c0e53c727d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2054,6 +2054,15 @@ static void tls_data_ready(struct sock *sk)
 	}
 }
 
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+
+	set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask);
+	set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask);
+	cancel_delayed_work_sync(&ctx->tx_work.work);
+}
+
 void tls_sw_free_resources_tx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2065,11 +2074,6 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	if (atomic_read(&ctx->encrypt_pending))
 		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
 
-	release_sock(sk);
-	cancel_delayed_work_sync(&ctx->tx_work.work);
-	lock_sock(sk);
-
-	/* Tx whatever records we can transmit and abandon the rest */
 	tls_tx_records(sk, -1);
 
 	/* Free up un-sent records in tx_list. First, free
@@ -2137,11 +2141,17 @@ static void tx_work_handler(struct work_struct *work)
 					       struct tx_work, work);
 	struct sock *sk = tx_work->sk;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_sw_context_tx *ctx;
 
-	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+	if (unlikely(!tls_ctx))
 		return;
 
+	ctx = tls_sw_ctx_tx(tls_ctx);
+	if (test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask))
+		return;
+
+	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+		return;
 	lock_sock(sk);
 	tls_tx_records(sk, -1);
 	release_sock(sk);
-- 
cgit v1.2.3


From 313ab004805cf52a42673b15852b3842474ccd87 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:17 -0700
Subject: net/tls: remove sock unlock/lock around strp_done()

The tls close() callback currently drops the sock lock to call
strp_done(). Split up the RX cleanup into stopping the strparser
and releasing most resources, syncing strparser and finally
freeing the context.

To avoid the need for a strp_done() call on the cleanup path
of device offload make sure we don't arm the strparser until
we are sure init will be successful.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tls.h    |  7 +++---
 net/tls/tls_device.c |  1 -
 net/tls/tls_main.c   | 61 ++++++++++++++++++++++++++--------------------------
 net/tls/tls_sw.c     | 40 +++++++++++++++++++++++++---------
 4 files changed, 64 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index d4276cb6de53..235508e35fd4 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -107,9 +107,7 @@ struct tls_device {
 enum {
 	TLS_BASE,
 	TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
 	TLS_HW,
-#endif
 	TLS_HW_RECORD,
 	TLS_NUM_CONFIG,
 };
@@ -357,14 +355,17 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
 
 int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
+void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
 void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
-void tls_sw_free_resources_tx(struct sock *sk);
+void tls_sw_release_resources_tx(struct sock *sk);
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 bool tls_sw_stream_read(const struct sock *sk);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 4d67d72f007c..7c0b2b778703 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1045,7 +1045,6 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
 	rc = tls_set_sw_offload(sk, ctx, 0);
 	if (rc)
 		goto release_ctx;
-	tls_sw_strparser_arm(sk, ctx);
 
 	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
 					     &ctx->crypto_recv.info,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 5c29b410cf7d..d152a00a7a27 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,24 +261,9 @@ void tls_ctx_free(struct tls_context *ctx)
 	kfree(ctx);
 }
 
-static void tls_sk_proto_close(struct sock *sk, long timeout)
+static void tls_sk_proto_cleanup(struct sock *sk,
+				 struct tls_context *ctx, long timeo)
 {
-	struct tls_context *ctx = tls_get_ctx(sk);
-	long timeo = sock_sndtimeo(sk, 0);
-	void (*sk_proto_close)(struct sock *sk, long timeout);
-	bool free_ctx = false;
-
-	if (ctx->tx_conf == TLS_SW)
-		tls_sw_cancel_work_tx(ctx);
-
-	lock_sock(sk);
-	sk_proto_close = ctx->sk_proto_close;
-
-	if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
-		free_ctx = true;
-		goto skip_tx_cleanup;
-	}
-
 	if (unlikely(sk->sk_write_pending) &&
 	    !wait_on_pending_writer(sk, &timeo))
 		tls_handle_open_record(sk, 0);
@@ -287,7 +272,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	if (ctx->tx_conf == TLS_SW) {
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
-		tls_sw_free_resources_tx(sk);
+		tls_sw_release_resources_tx(sk);
 #ifdef CONFIG_TLS_DEVICE
 	} else if (ctx->tx_conf == TLS_HW) {
 		tls_device_free_resources_tx(sk);
@@ -295,26 +280,40 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	}
 
 	if (ctx->rx_conf == TLS_SW)
-		tls_sw_free_resources_rx(sk);
+		tls_sw_release_resources_rx(sk);
 
 #ifdef CONFIG_TLS_DEVICE
 	if (ctx->rx_conf == TLS_HW)
 		tls_device_offload_cleanup_rx(sk);
-
-	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
-#else
-	{
 #endif
-		tls_ctx_free(ctx);
-		ctx = NULL;
-	}
+}
+
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+	void (*sk_proto_close)(struct sock *sk, long timeout);
+	struct tls_context *ctx = tls_get_ctx(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	bool free_ctx;
+
+	if (ctx->tx_conf == TLS_SW)
+		tls_sw_cancel_work_tx(ctx);
+
+	lock_sock(sk);
+	free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
+	sk_proto_close = ctx->sk_proto_close;
+
+	if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
+		tls_sk_proto_cleanup(sk, ctx, timeo);
 
-skip_tx_cleanup:
 	release_sock(sk);
+	if (ctx->tx_conf == TLS_SW)
+		tls_sw_free_ctx_tx(ctx);
+	if (ctx->rx_conf == TLS_SW || ctx->rx_conf == TLS_HW)
+		tls_sw_strparser_done(ctx);
+	if (ctx->rx_conf == TLS_SW)
+		tls_sw_free_ctx_rx(ctx);
 	sk_proto_close(sk, timeout);
-	/* free ctx for TLS_HW_RECORD, used by tcp_set_state
-	 * for sk->sk_prot->unhash [tls_hw_unhash]
-	 */
+
 	if (free_ctx)
 		tls_ctx_free(ctx);
 }
@@ -541,9 +540,9 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 			rc = tls_set_sw_offload(sk, ctx, 0);
 			if (rc)
 				goto err_crypto_info;
-			tls_sw_strparser_arm(sk, ctx);
 			conf = TLS_SW;
 		}
+		tls_sw_strparser_arm(sk, ctx);
 	}
 
 	if (tx)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 38c0e53c727d..91d21b048a9b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2063,7 +2063,7 @@ void tls_sw_cancel_work_tx(struct tls_context *tls_ctx)
 	cancel_delayed_work_sync(&ctx->tx_work.work);
 }
 
-void tls_sw_free_resources_tx(struct sock *sk)
+void tls_sw_release_resources_tx(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
@@ -2096,6 +2096,11 @@ void tls_sw_free_resources_tx(struct sock *sk)
 
 	crypto_free_aead(ctx->aead_send);
 	tls_free_open_rec(sk);
+}
+
+void tls_sw_free_ctx_tx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 
 	kfree(ctx);
 }
@@ -2114,25 +2119,40 @@ void tls_sw_release_resources_rx(struct sock *sk)
 		skb_queue_purge(&ctx->rx_list);
 		crypto_free_aead(ctx->aead_recv);
 		strp_stop(&ctx->strp);
-		write_lock_bh(&sk->sk_callback_lock);
-		sk->sk_data_ready = ctx->saved_data_ready;
-		write_unlock_bh(&sk->sk_callback_lock);
-		release_sock(sk);
-		strp_done(&ctx->strp);
-		lock_sock(sk);
+		/* If tls_sw_strparser_arm() was not called (cleanup paths)
+		 * we still want to strp_stop(), but sk->sk_data_ready was
+		 * never swapped.
+		 */
+		if (ctx->saved_data_ready) {
+			write_lock_bh(&sk->sk_callback_lock);
+			sk->sk_data_ready = ctx->saved_data_ready;
+			write_unlock_bh(&sk->sk_callback_lock);
+		}
 	}
 }
 
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_strparser_done(struct tls_context *tls_ctx)
 {
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
-	tls_sw_release_resources_rx(sk);
+	strp_done(&ctx->strp);
+}
+
+void tls_sw_free_ctx_rx(struct tls_context *tls_ctx)
+{
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
 	kfree(ctx);
 }
 
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+
+	tls_sw_release_resources_rx(sk);
+	tls_sw_free_ctx_rx(tls_ctx);
+}
+
 /* The work handler to transmitt the encrypted records in tx_list */
 static void tx_work_handler(struct work_struct *work)
 {
-- 
cgit v1.2.3


From 32857cf57f920cdc03b5095f08febec94cf9c36b Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:18 -0700
Subject: net/tls: fix transition through disconnect with close

It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE
state via tcp_disconnect() without actually calling tcp_close which
would then call the tls close callback. Because of this a user could
disconnect a socket then put it in a LISTEN state which would break
our assumptions about sockets always being ESTABLISHED state.

More directly because close() can call unhash() and unhash is
implemented by sockmap if a sockmap socket has TLS enabled we can
incorrectly destroy the psock from unhash() and then call its close
handler again. But because the psock (sockmap socket representation)
is already destroyed we call close handler in sk->prot. However,
in some cases (TLS BASE/BASE case) this will still point at the
sockmap close handler resulting in a circular call and crash reported
by syzbot.

To fix both above issues implement the unhash() routine for TLS.

v4:
 - add note about tls offload still needing the fix;
 - move sk_proto to the cold cache line;
 - split TX context free into "release" and "free",
   otherwise the GC work itself is in already freed
   memory;
 - more TX before RX for consistency;
 - reuse tls_ctx_free();
 - schedule the GC work after we're done with context
   to avoid UAF;
 - don't set the unhash in all modes, all modes "inherit"
   TLS_BASE's callbacks anyway;
 - disable the unhash hook for TLS_HW.

Fixes: 3c4d7559159bf ("tls: kernel TLS support")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 Documentation/networking/tls-offload.rst |  6 ++++
 include/net/tls.h                        |  5 ++-
 net/tls/tls_main.c                       | 55 ++++++++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 048e5ca44824..8a1eeb393316 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -513,3 +513,9 @@ Redirects leak clear text
 
 In the RX direction, if segment has already been decrypted by the device
 and it gets redirected or mirrored - clear text will be transmitted out.
+
+shutdown() doesn't clear TLS state
+----------------------------------
+
+shutdown() system call allows for a TLS socket to be reused as a different
+connection. Offload doesn't currently handle that.
diff --git a/include/net/tls.h b/include/net/tls.h
index 235508e35fd4..9e425ac2de45 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -271,6 +271,8 @@ struct tls_context {
 	unsigned long flags;
 
 	/* cache cold stuff */
+	struct proto *sk_proto;
+
 	void (*sk_destruct)(struct sock *sk);
 	void (*sk_proto_close)(struct sock *sk, long timeout);
 
@@ -288,6 +290,8 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+
+	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
@@ -359,7 +363,6 @@ void tls_sw_strparser_done(struct tls_context *tls_ctx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
-void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_release_resources_tx(struct sock *sk);
 void tls_sw_free_ctx_tx(struct tls_context *tls_ctx);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d152a00a7a27..48f1c26459d0 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,6 +261,33 @@ void tls_ctx_free(struct tls_context *ctx)
 	kfree(ctx);
 }
 
+static void tls_ctx_free_deferred(struct work_struct *gc)
+{
+	struct tls_context *ctx = container_of(gc, struct tls_context, gc);
+
+	/* Ensure any remaining work items are completed. The sk will
+	 * already have lost its tls_ctx reference by the time we get
+	 * here so no xmit operation will actually be performed.
+	 */
+	if (ctx->tx_conf == TLS_SW) {
+		tls_sw_cancel_work_tx(ctx);
+		tls_sw_free_ctx_tx(ctx);
+	}
+
+	if (ctx->rx_conf == TLS_SW) {
+		tls_sw_strparser_done(ctx);
+		tls_sw_free_ctx_rx(ctx);
+	}
+
+	tls_ctx_free(ctx);
+}
+
+static void tls_ctx_free_wq(struct tls_context *ctx)
+{
+	INIT_WORK(&ctx->gc, tls_ctx_free_deferred);
+	schedule_work(&ctx->gc);
+}
+
 static void tls_sk_proto_cleanup(struct sock *sk,
 				 struct tls_context *ctx, long timeo)
 {
@@ -288,6 +315,26 @@ static void tls_sk_proto_cleanup(struct sock *sk,
 #endif
 }
 
+static void tls_sk_proto_unhash(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	struct tls_context *ctx;
+
+	if (unlikely(!icsk->icsk_ulp_data)) {
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+	}
+
+	ctx = tls_get_ctx(sk);
+	tls_sk_proto_cleanup(sk, ctx, timeo);
+	icsk->icsk_ulp_data = NULL;
+
+	if (ctx->sk_proto->unhash)
+		ctx->sk_proto->unhash(sk);
+	tls_ctx_free_wq(ctx);
+}
+
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
 	void (*sk_proto_close)(struct sock *sk, long timeout);
@@ -305,6 +352,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
 		tls_sk_proto_cleanup(sk, ctx, timeo);
 
+	sk->sk_prot = ctx->sk_proto;
 	release_sock(sk);
 	if (ctx->tx_conf == TLS_SW)
 		tls_sw_free_ctx_tx(ctx);
@@ -608,6 +656,7 @@ static struct tls_context *create_ctx(struct sock *sk)
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
+	ctx->unhash = sk->sk_prot->unhash;
 	return ctx;
 }
 
@@ -731,6 +780,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_BASE][TLS_BASE].setsockopt	= tls_setsockopt;
 	prot[TLS_BASE][TLS_BASE].getsockopt	= tls_getsockopt;
 	prot[TLS_BASE][TLS_BASE].close		= tls_sk_proto_close;
+	prot[TLS_BASE][TLS_BASE].unhash		= tls_sk_proto_unhash;
 
 	prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_SW][TLS_BASE].sendmsg		= tls_sw_sendmsg;
@@ -748,16 +798,20 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
+	prot[TLS_HW][TLS_BASE].unhash		= base->unhash;
 	prot[TLS_HW][TLS_BASE].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_BASE].sendpage		= tls_device_sendpage;
 
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
+	prot[TLS_HW][TLS_SW].unhash		= base->unhash;
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
 
 	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
+	prot[TLS_BASE][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
+	prot[TLS_SW][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
@@ -794,6 +848,7 @@ static int tls_init(struct sock *sk)
 	tls_build_proto(sk);
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
+	ctx->sk_proto = sk->sk_prot;
 	update_sk_prot(sk, ctx);
 out:
 	return rc;
-- 
cgit v1.2.3


From 95fa145479fbc0a0c1fd3274ceb42ec03c042a4a Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Jul 2019 10:29:22 -0700
Subject: bpf: sockmap/tls, close can race with map free

When a map free is called and in parallel a socket is closed we
have two paths that can potentially reset the socket prot ops, the
bpf close() path and the map free path. This creates a problem
with which prot ops should be used from the socket closed side.

If the map_free side completes first then we want to call the
original lowest level ops. However, if the tls path runs first
we want to call the sockmap ops. Additionally there was no locking
around prot updates in TLS code paths so the prot ops could
be changed multiple times once from TLS path and again from sockmap
side potentially leaving ops pointed at either TLS or sockmap
when psock and/or tls context have already been destroyed.

To fix this race first only update ops inside callback lock
so that TLS, sockmap and lowest level all agree on prot state.
Second and a ULP callback update() so that lower layers can
inform the upper layer when they are being removed allowing the
upper layer to reset prot ops.

This gets us close to allowing sockmap and tls to be stacked
in arbitrary order but will save that patch for *next trees.

v4:
 - make sure we don't free things for device;
 - remove the checks which swap the callbacks back
   only if TLS is at the top.

Reported-by: syzbot+06537213db7ba2745c4a@syzkaller.appspotmail.com
Fixes: 02c558b2d5d6 ("bpf: sockmap, support for msg_peek in sk_msg with redirect ingress")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h |  8 +++++++-
 include/net/tcp.h     |  3 +++
 net/core/skmsg.c      |  4 ++--
 net/ipv4/tcp_ulp.c    | 13 +++++++++++++
 net/tls/tls_main.c    | 33 ++++++++++++++++++++++++++++-----
 5 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 50ced8aba9db..e4b3fb4bb77c 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -354,7 +354,13 @@ static inline void sk_psock_restore_proto(struct sock *sk,
 	sk->sk_write_space = psock->saved_write_space;
 
 	if (psock->sk_proto) {
-		sk->sk_prot = psock->sk_proto;
+		struct inet_connection_sock *icsk = inet_csk(sk);
+		bool has_ulp = !!icsk->icsk_ulp_data;
+
+		if (has_ulp)
+			tcp_update_ulp(sk, psock->sk_proto);
+		else
+			sk->sk_prot = psock->sk_proto;
 		psock->sk_proto = NULL;
 	}
 }
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f42d300f0cfa..c82a23470081 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2103,6 +2103,8 @@ struct tcp_ulp_ops {
 
 	/* initialize ulp */
 	int (*init)(struct sock *sk);
+	/* update ulp */
+	void (*update)(struct sock *sk, struct proto *p);
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 
@@ -2114,6 +2116,7 @@ void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
+void tcp_update_ulp(struct sock *sk, struct proto *p);
 
 #define MODULE_ALIAS_TCP_ULP(name)				\
 	__MODULE_INFO(alias, alias_userspace, name);		\
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93bffaad2135..6832eeb4b785 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -585,12 +585,12 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy);
 
 void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
 {
-	rcu_assign_sk_user_data(sk, NULL);
 	sk_psock_cork_free(psock);
 	sk_psock_zap_ingress(psock);
-	sk_psock_restore_proto(sk, psock);
 
 	write_lock_bh(&sk->sk_callback_lock);
+	sk_psock_restore_proto(sk, psock);
+	rcu_assign_sk_user_data(sk, NULL);
 	if (psock->progs.skb_parser)
 		sk_psock_stop_strp(sk, psock);
 	write_unlock_bh(&sk->sk_callback_lock);
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 3d8a1d835471..4849edb62d52 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -96,6 +96,19 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
 	rcu_read_unlock();
 }
 
+void tcp_update_ulp(struct sock *sk, struct proto *proto)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (!icsk->icsk_ulp_ops) {
+		sk->sk_prot = proto;
+		return;
+	}
+
+	if (icsk->icsk_ulp_ops->update)
+		icsk->icsk_ulp_ops->update(sk, proto);
+}
+
 void tcp_cleanup_ulp(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 48f1c26459d0..f208f8455ef2 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -328,7 +328,10 @@ static void tls_sk_proto_unhash(struct sock *sk)
 
 	ctx = tls_get_ctx(sk);
 	tls_sk_proto_cleanup(sk, ctx, timeo);
+	write_lock_bh(&sk->sk_callback_lock);
 	icsk->icsk_ulp_data = NULL;
+	sk->sk_prot = ctx->sk_proto;
+	write_unlock_bh(&sk->sk_callback_lock);
 
 	if (ctx->sk_proto->unhash)
 		ctx->sk_proto->unhash(sk);
@@ -337,7 +340,7 @@ static void tls_sk_proto_unhash(struct sock *sk)
 
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
-	void (*sk_proto_close)(struct sock *sk, long timeout);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tls_context *ctx = tls_get_ctx(sk);
 	long timeo = sock_sndtimeo(sk, 0);
 	bool free_ctx;
@@ -347,12 +350,15 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 
 	lock_sock(sk);
 	free_ctx = ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW;
-	sk_proto_close = ctx->sk_proto_close;
 
 	if (ctx->tx_conf != TLS_BASE || ctx->rx_conf != TLS_BASE)
 		tls_sk_proto_cleanup(sk, ctx, timeo);
 
+	write_lock_bh(&sk->sk_callback_lock);
+	if (free_ctx)
+		icsk->icsk_ulp_data = NULL;
 	sk->sk_prot = ctx->sk_proto;
+	write_unlock_bh(&sk->sk_callback_lock);
 	release_sock(sk);
 	if (ctx->tx_conf == TLS_SW)
 		tls_sw_free_ctx_tx(ctx);
@@ -360,7 +366,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		tls_sw_strparser_done(ctx);
 	if (ctx->rx_conf == TLS_SW)
 		tls_sw_free_ctx_rx(ctx);
-	sk_proto_close(sk, timeout);
+	ctx->sk_proto_close(sk, timeout);
 
 	if (free_ctx)
 		tls_ctx_free(ctx);
@@ -827,7 +833,7 @@ static int tls_init(struct sock *sk)
 	int rc = 0;
 
 	if (tls_hw_prot(sk))
-		goto out;
+		return 0;
 
 	/* The TLS ulp is currently supported only for TCP sockets
 	 * in ESTABLISHED state.
@@ -838,22 +844,38 @@ static int tls_init(struct sock *sk)
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return -ENOTSUPP;
 
+	tls_build_proto(sk);
+
 	/* allocate tls context */
+	write_lock_bh(&sk->sk_callback_lock);
 	ctx = create_ctx(sk);
 	if (!ctx) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
-	tls_build_proto(sk);
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
 	ctx->sk_proto = sk->sk_prot;
 	update_sk_prot(sk, ctx);
 out:
+	write_unlock_bh(&sk->sk_callback_lock);
 	return rc;
 }
 
+static void tls_update(struct sock *sk, struct proto *p)
+{
+	struct tls_context *ctx;
+
+	ctx = tls_get_ctx(sk);
+	if (likely(ctx)) {
+		ctx->sk_proto_close = p->close;
+		ctx->sk_proto = p;
+	} else {
+		sk->sk_prot = p;
+	}
+}
+
 void tls_register_device(struct tls_device *device)
 {
 	spin_lock_bh(&device_spinlock);
@@ -874,6 +896,7 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.name			= "tls",
 	.owner			= THIS_MODULE,
 	.init			= tls_init,
+	.update			= tls_update,
 };
 
 static int __init tls_register(void)
-- 
cgit v1.2.3


From 9c71b9eb3cb25856e29f0486eae9ee1ba864ba51 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 10:16:44 +0200
Subject: dmaengine: omap-dma: make omap_dma_filter_fn private

With the audio driver no longer referring to this function, it
can be made private to the dmaengine driver itself, and the
header file removed.

Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/lkml/20190307151646.1016966-1-arnd@arndb.de/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20190722081705.2084961-1-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/omap-dma.c      |  3 ++-
 include/linux/omap-dma.h       |  2 --
 include/linux/omap-dmaengine.h | 18 ------------------
 3 files changed, 2 insertions(+), 21 deletions(-)
 delete mode 100644 include/linux/omap-dmaengine.h

(limited to 'include')

diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
index ba2489d4ea24..49da402a1927 100644
--- a/drivers/dma/ti/omap-dma.c
+++ b/drivers/dma/ti/omap-dma.c
@@ -202,6 +202,7 @@ static const unsigned es_bytes[] = {
 	[CSDP_DATA_TYPE_32] = 4,
 };
 
+static bool omap_dma_filter_fn(struct dma_chan *chan, void *param);
 static struct of_dma_filter_info omap_dma_info = {
 	.filter_fn = omap_dma_filter_fn,
 };
@@ -1637,7 +1638,7 @@ static struct platform_driver omap_dma_driver = {
 	},
 };
 
-bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
+static bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	if (chan->device->dev->driver == &omap_dma_driver.driver) {
 		struct omap_dmadev *od = to_omap_dma_dev(chan->device);
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 840ce551e773..ba3cfbb52312 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -1,8 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_OMAP_DMA_H
 #define __LINUX_OMAP_DMA_H
-#include <linux/omap-dmaengine.h>
-
 /*
  *  Legacy OMAP DMA handling defines and functions
  *
diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h
deleted file mode 100644
index b6e42f933c40..000000000000
--- a/include/linux/omap-dmaengine.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * OMAP DMA Engine support
- */
-#ifndef __LINUX_OMAP_DMAENGINE_H
-#define __LINUX_OMAP_DMAENGINE_H
-
-struct dma_chan;
-
-#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
-bool omap_dma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-{
-	return false;
-}
-#endif
-#endif /* __LINUX_OMAP_DMAENGINE_H */
-- 
cgit v1.2.3


From d2bfe7b5d182dad54aedb9dcdb736e8816ead1c3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 10:16:45 +0200
Subject: dmaengine: edma: make edma_filter_fn private

With the audio driver no longer referring to this function, it
can be made private to the dmaengine driver itself, and the
header file removed.

Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20190722081705.2084961-2-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/edma.c |  5 +++--
 include/linux/edma.h  | 29 -----------------------------
 2 files changed, 3 insertions(+), 31 deletions(-)
 delete mode 100644 include/linux/edma.h

(limited to 'include')

diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
index ceabdea40ae0..f2549ee3fb49 100644
--- a/drivers/dma/ti/edma.c
+++ b/drivers/dma/ti/edma.c
@@ -15,7 +15,6 @@
 
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
-#include <linux/edma.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -2185,6 +2184,8 @@ static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
 }
 #endif
 
+static bool edma_filter_fn(struct dma_chan *chan, void *param);
+
 static int edma_probe(struct platform_device *pdev)
 {
 	struct edma_soc_info	*info = pdev->dev.platform_data;
@@ -2524,7 +2525,7 @@ static struct platform_driver edma_tptc_driver = {
 	},
 };
 
-bool edma_filter_fn(struct dma_chan *chan, void *param)
+static bool edma_filter_fn(struct dma_chan *chan, void *param)
 {
 	bool match = false;
 
diff --git a/include/linux/edma.h b/include/linux/edma.h
deleted file mode 100644
index a1307e7827e8..000000000000
--- a/include/linux/edma.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * TI EDMA DMA engine driver
- *
- * Copyright 2012 Texas Instruments
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#ifndef __LINUX_EDMA_H
-#define __LINUX_EDMA_H
-
-struct dma_chan;
-
-#if defined(CONFIG_TI_EDMA) || defined(CONFIG_TI_EDMA_MODULE)
-bool edma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool edma_filter_fn(struct dma_chan *chan, void *param)
-{
-	return false;
-}
-#endif
-
-#endif
-- 
cgit v1.2.3


From effa467870c7612012885df4e246bdb8ffd8e44c Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Tue, 16 Jul 2019 22:38:05 +0100
Subject: iommu/vt-d: Don't queue_iova() if there is no flush queue

Intel VT-d driver was reworked to use common deferred flushing
implementation. Previously there was one global per-cpu flush queue,
afterwards - one per domain.

Before deferring a flush, the queue should be allocated and initialized.

Currently only domains with IOMMU_DOMAIN_DMA type initialize their flush
queue. It's probably worth to init it for static or unmanaged domains
too, but it may be arguable - I'm leaving it to iommu folks.

Prevent queuing an iova flush if the domain doesn't have a queue.
The defensive check seems to be worth to keep even if queue would be
initialized for all kinds of domains. And is easy backportable.

On 4.19.43 stable kernel it has a user-visible effect: previously for
devices in si domain there were crashes, on sata devices:

 BUG: spinlock bad magic on CPU#6, swapper/0/1
  lock: 0xffff88844f582008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #1
 Call Trace:
  <IRQ>
  dump_stack+0x61/0x7e
  spin_bug+0x9d/0xa3
  do_raw_spin_lock+0x22/0x8e
  _raw_spin_lock_irqsave+0x32/0x3a
  queue_iova+0x45/0x115
  intel_unmap+0x107/0x113
  intel_unmap_sg+0x6b/0x76
  __ata_qc_complete+0x7f/0x103
  ata_qc_complete+0x9b/0x26a
  ata_qc_complete_multiple+0xd0/0xe3
  ahci_handle_port_interrupt+0x3ee/0x48a
  ahci_handle_port_intr+0x73/0xa9
  ahci_single_level_irq_intr+0x40/0x60
  __handle_irq_event_percpu+0x7f/0x19a
  handle_irq_event_percpu+0x32/0x72
  handle_irq_event+0x38/0x56
  handle_edge_irq+0x102/0x121
  handle_irq+0x147/0x15c
  do_IRQ+0x66/0xf2
  common_interrupt+0xf/0xf
 RIP: 0010:__do_softirq+0x8c/0x2df

The same for usb devices that use ehci-pci:
 BUG: spinlock bad magic on CPU#0, swapper/0/1
  lock: 0xffff88844f402008, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.43 #4
 Call Trace:
  <IRQ>
  dump_stack+0x61/0x7e
  spin_bug+0x9d/0xa3
  do_raw_spin_lock+0x22/0x8e
  _raw_spin_lock_irqsave+0x32/0x3a
  queue_iova+0x77/0x145
  intel_unmap+0x107/0x113
  intel_unmap_page+0xe/0x10
  usb_hcd_unmap_urb_setup_for_dma+0x53/0x9d
  usb_hcd_unmap_urb_for_dma+0x17/0x100
  unmap_urb_for_dma+0x22/0x24
  __usb_hcd_giveback_urb+0x51/0xc3
  usb_giveback_urb_bh+0x97/0xde
  tasklet_action_common.isra.4+0x5f/0xa1
  tasklet_action+0x2d/0x30
  __do_softirq+0x138/0x2df
  irq_exit+0x7d/0x8b
  smp_apic_timer_interrupt+0x10f/0x151
  apic_timer_interrupt+0xf/0x20
  </IRQ>
 RIP: 0010:_raw_spin_unlock_irqrestore+0x17/0x39

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: iommu@lists.linux-foundation.org
Cc: <stable@vger.kernel.org> # 4.14+
Fixes: 13cf01744608 ("iommu/vt-d: Make use of iova deferred flushing")
Signed-off-by: Dmitry Safonov <dima@arista.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c |  3 ++-
 drivers/iommu/iova.c        | 18 ++++++++++++++----
 include/linux/iova.h        |  6 ++++++
 3 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 9b1d62d03370..72c6d647bec9 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3561,7 +3561,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 
 	freelist = domain_unmap(domain, start_pfn, last_pfn);
 
-	if (intel_iommu_strict || (pdev && pdev->untrusted)) {
+	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
+			!has_iova_flush_queue(&domain->iovad)) {
 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
 				      nrpages, !freelist, 0);
 		/* free iova */
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index d499b2621239..8413ae54904a 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -54,9 +54,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 }
 EXPORT_SYMBOL_GPL(init_iova_domain);
 
+bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+	return !!iovad->fq;
+}
+
 static void free_iova_flush_queue(struct iova_domain *iovad)
 {
-	if (!iovad->fq)
+	if (!has_iova_flush_queue(iovad))
 		return;
 
 	if (timer_pending(&iovad->fq_timer))
@@ -74,13 +79,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
 int init_iova_flush_queue(struct iova_domain *iovad,
 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
 {
+	struct iova_fq __percpu *queue;
 	int cpu;
 
 	atomic64_set(&iovad->fq_flush_start_cnt,  0);
 	atomic64_set(&iovad->fq_flush_finish_cnt, 0);
 
-	iovad->fq = alloc_percpu(struct iova_fq);
-	if (!iovad->fq)
+	queue = alloc_percpu(struct iova_fq);
+	if (!queue)
 		return -ENOMEM;
 
 	iovad->flush_cb   = flush_cb;
@@ -89,13 +95,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
 	for_each_possible_cpu(cpu) {
 		struct iova_fq *fq;
 
-		fq = per_cpu_ptr(iovad->fq, cpu);
+		fq = per_cpu_ptr(queue, cpu);
 		fq->head = 0;
 		fq->tail = 0;
 
 		spin_lock_init(&fq->lock);
 	}
 
+	smp_wmb();
+
+	iovad->fq = queue;
+
 	timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
 	atomic_set(&iovad->fq_timer_on, 0);
 
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 781b96ac706f..cd0f1de901a8 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -155,6 +155,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
 void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
 void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	unsigned long start_pfn);
+bool has_iova_flush_queue(struct iova_domain *iovad);
 int init_iova_flush_queue(struct iova_domain *iovad,
 			  iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@@ -235,6 +236,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
 {
 }
 
+bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+	return false;
+}
+
 static inline int init_iova_flush_queue(struct iova_domain *iovad,
 					iova_flush_cb flush_cb,
 					iova_entry_dtor entry_dtor)
-- 
cgit v1.2.3


From ae24fb49d01103c80d6ff3b78714259c1c62c958 Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe@linaro.org>
Date: Mon, 22 Jul 2019 15:40:07 +0100
Subject: iommu/virtio: Update to most recent specification

Following specification review a few things were changed in v8 of the
virtio-iommu series [1], but have been omitted when merging the base
driver. Add them now:

* Remove the EXEC flag.
* Add feature bit for the MMIO flag.
* Change domain_bits to domain_range.
* Add NOMEM status flag.

[1] https://lore.kernel.org/linux-iommu/20190530170929.19366-1-jean-philippe.brucker@arm.com/

Fixes: edcd69ab9a32 ("iommu: Add virtio-iommu driver")
Reported-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/virtio-iommu.c      | 40 ++++++++++++++++++++++++++++-----------
 include/uapi/linux/virtio_iommu.h | 32 +++++++++++++++++--------------
 2 files changed, 47 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 433f4d2ee956..80a740df0737 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -2,7 +2,7 @@
 /*
  * Virtio driver for the paravirtualized IOMMU
  *
- * Copyright (C) 2018 Arm Limited
+ * Copyright (C) 2019 Arm Limited
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -47,7 +47,10 @@ struct viommu_dev {
 	/* Device configuration */
 	struct iommu_domain_geometry	geometry;
 	u64				pgsize_bitmap;
-	u8				domain_bits;
+	u32				first_domain;
+	u32				last_domain;
+	/* Supported MAP flags */
+	u32				map_flags;
 	u32				probe_size;
 };
 
@@ -62,6 +65,7 @@ struct viommu_domain {
 	struct viommu_dev		*viommu;
 	struct mutex			mutex; /* protects viommu pointer */
 	unsigned int			id;
+	u32				map_flags;
 
 	spinlock_t			mappings_lock;
 	struct rb_root_cached		mappings;
@@ -113,6 +117,8 @@ static int viommu_get_req_errno(void *buf, size_t len)
 		return -ENOENT;
 	case VIRTIO_IOMMU_S_FAULT:
 		return -EFAULT;
+	case VIRTIO_IOMMU_S_NOMEM:
+		return -ENOMEM;
 	case VIRTIO_IOMMU_S_IOERR:
 	case VIRTIO_IOMMU_S_DEVERR:
 	default:
@@ -607,15 +613,15 @@ static int viommu_domain_finalise(struct viommu_dev *viommu,
 {
 	int ret;
 	struct viommu_domain *vdomain = to_viommu_domain(domain);
-	unsigned int max_domain = viommu->domain_bits > 31 ? ~0 :
-				  (1U << viommu->domain_bits) - 1;
 
 	vdomain->viommu		= viommu;
+	vdomain->map_flags	= viommu->map_flags;
 
 	domain->pgsize_bitmap	= viommu->pgsize_bitmap;
 	domain->geometry	= viommu->geometry;
 
-	ret = ida_alloc_max(&viommu->domain_ids, max_domain, GFP_KERNEL);
+	ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
+			      viommu->last_domain, GFP_KERNEL);
 	if (ret >= 0)
 		vdomain->id = (unsigned int)ret;
 
@@ -710,7 +716,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
 		      phys_addr_t paddr, size_t size, int prot)
 {
 	int ret;
-	int flags;
+	u32 flags;
 	struct virtio_iommu_req_map map;
 	struct viommu_domain *vdomain = to_viommu_domain(domain);
 
@@ -718,6 +724,9 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
 		(prot & IOMMU_WRITE ? VIRTIO_IOMMU_MAP_F_WRITE : 0) |
 		(prot & IOMMU_MMIO ? VIRTIO_IOMMU_MAP_F_MMIO : 0);
 
+	if (flags & ~vdomain->map_flags)
+		return -EINVAL;
+
 	ret = viommu_add_mapping(vdomain, iova, paddr, size, flags);
 	if (ret)
 		return ret;
@@ -1027,7 +1036,8 @@ static int viommu_probe(struct virtio_device *vdev)
 		goto err_free_vqs;
 	}
 
-	viommu->domain_bits = 32;
+	viommu->map_flags = VIRTIO_IOMMU_MAP_F_READ | VIRTIO_IOMMU_MAP_F_WRITE;
+	viommu->last_domain = ~0U;
 
 	/* Optional features */
 	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_INPUT_RANGE,
@@ -1038,9 +1048,13 @@ static int viommu_probe(struct virtio_device *vdev)
 			     struct virtio_iommu_config, input_range.end,
 			     &input_end);
 
-	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_BITS,
-			     struct virtio_iommu_config, domain_bits,
-			     &viommu->domain_bits);
+	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+			     struct virtio_iommu_config, domain_range.start,
+			     &viommu->first_domain);
+
+	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_DOMAIN_RANGE,
+			     struct virtio_iommu_config, domain_range.end,
+			     &viommu->last_domain);
 
 	virtio_cread_feature(vdev, VIRTIO_IOMMU_F_PROBE,
 			     struct virtio_iommu_config, probe_size,
@@ -1052,6 +1066,9 @@ static int viommu_probe(struct virtio_device *vdev)
 		.force_aperture	= true,
 	};
 
+	if (virtio_has_feature(vdev, VIRTIO_IOMMU_F_MMIO))
+		viommu->map_flags |= VIRTIO_IOMMU_MAP_F_MMIO;
+
 	viommu_ops.pgsize_bitmap = viommu->pgsize_bitmap;
 
 	virtio_device_ready(vdev);
@@ -1130,9 +1147,10 @@ static void viommu_config_changed(struct virtio_device *vdev)
 
 static unsigned int features[] = {
 	VIRTIO_IOMMU_F_MAP_UNMAP,
-	VIRTIO_IOMMU_F_DOMAIN_BITS,
 	VIRTIO_IOMMU_F_INPUT_RANGE,
+	VIRTIO_IOMMU_F_DOMAIN_RANGE,
 	VIRTIO_IOMMU_F_PROBE,
+	VIRTIO_IOMMU_F_MMIO,
 };
 
 static struct virtio_device_id id_table[] = {
diff --git a/include/uapi/linux/virtio_iommu.h b/include/uapi/linux/virtio_iommu.h
index ba1b460c9944..237e36a280cb 100644
--- a/include/uapi/linux/virtio_iommu.h
+++ b/include/uapi/linux/virtio_iommu.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
 /*
- * Virtio-iommu definition v0.9
+ * Virtio-iommu definition v0.12
  *
- * Copyright (C) 2018 Arm Ltd.
+ * Copyright (C) 2019 Arm Ltd.
  */
 #ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
 #define _UAPI_LINUX_VIRTIO_IOMMU_H
@@ -11,26 +11,31 @@
 
 /* Feature bits */
 #define VIRTIO_IOMMU_F_INPUT_RANGE		0
-#define VIRTIO_IOMMU_F_DOMAIN_BITS		1
+#define VIRTIO_IOMMU_F_DOMAIN_RANGE		1
 #define VIRTIO_IOMMU_F_MAP_UNMAP		2
 #define VIRTIO_IOMMU_F_BYPASS			3
 #define VIRTIO_IOMMU_F_PROBE			4
+#define VIRTIO_IOMMU_F_MMIO			5
 
-struct virtio_iommu_range {
-	__u64					start;
-	__u64					end;
+struct virtio_iommu_range_64 {
+	__le64					start;
+	__le64					end;
+};
+
+struct virtio_iommu_range_32 {
+	__le32					start;
+	__le32					end;
 };
 
 struct virtio_iommu_config {
 	/* Supported page sizes */
-	__u64					page_size_mask;
+	__le64					page_size_mask;
 	/* Supported IOVA range */
-	struct virtio_iommu_range		input_range;
+	struct virtio_iommu_range_64		input_range;
 	/* Max domain ID size */
-	__u8					domain_bits;
-	__u8					padding[3];
+	struct virtio_iommu_range_32		domain_range;
 	/* Probe buffer size */
-	__u32					probe_size;
+	__le32					probe_size;
 };
 
 /* Request types */
@@ -49,6 +54,7 @@ struct virtio_iommu_config {
 #define VIRTIO_IOMMU_S_RANGE			0x05
 #define VIRTIO_IOMMU_S_NOENT			0x06
 #define VIRTIO_IOMMU_S_FAULT			0x07
+#define VIRTIO_IOMMU_S_NOMEM			0x08
 
 struct virtio_iommu_req_head {
 	__u8					type;
@@ -78,12 +84,10 @@ struct virtio_iommu_req_detach {
 
 #define VIRTIO_IOMMU_MAP_F_READ			(1 << 0)
 #define VIRTIO_IOMMU_MAP_F_WRITE		(1 << 1)
-#define VIRTIO_IOMMU_MAP_F_EXEC			(1 << 2)
-#define VIRTIO_IOMMU_MAP_F_MMIO			(1 << 3)
+#define VIRTIO_IOMMU_MAP_F_MMIO			(1 << 2)
 
 #define VIRTIO_IOMMU_MAP_F_MASK			(VIRTIO_IOMMU_MAP_F_READ |	\
 						 VIRTIO_IOMMU_MAP_F_WRITE |	\
-						 VIRTIO_IOMMU_MAP_F_EXEC |	\
 						 VIRTIO_IOMMU_MAP_F_MMIO)
 
 struct virtio_iommu_req_map {
-- 
cgit v1.2.3


From 60c1b220d8bc6baeaf837cd60f94a331b25c26bc Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Thu, 27 Jun 2019 12:52:58 -0700
Subject: cpu-topology: Move cpu topology code to common code.

Both RISC-V & ARM64 are using cpu-map device tree to describe
their cpu topology. It's better to move the relevant code to
a common place instead of duplicate code.

To: Will Deacon <will.deacon@arm.com>
To: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Atish Patra <atish.patra@wdc.com>
[Tested on QDF2400]
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
[Tested on Juno and other embedded platforms.]
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/arm64/include/asm/topology.h |  23 ---
 arch/arm64/kernel/topology.c      | 303 +-------------------------------------
 drivers/base/arch_topology.c      | 296 +++++++++++++++++++++++++++++++++++++
 include/linux/arch_topology.h     |  28 ++++
 include/linux/topology.h          |   1 +
 5 files changed, 329 insertions(+), 322 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index 0524f2438649..a4d945db95a2 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -4,29 +4,6 @@
 
 #include <linux/cpumask.h>
 
-struct cpu_topology {
-	int thread_id;
-	int core_id;
-	int package_id;
-	int llc_id;
-	cpumask_t thread_sibling;
-	cpumask_t core_sibling;
-	cpumask_t llc_sibling;
-};
-
-extern struct cpu_topology cpu_topology[NR_CPUS];
-
-#define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
-#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
-#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
-#define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
-#define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
-
-void init_cpu_topology(void);
-void store_cpu_topology(unsigned int cpuid);
-void remove_cpu_topology(unsigned int cpuid);
-const struct cpumask *cpu_coregroup_mask(int cpu);
-
 #ifdef CONFIG_NUMA
 
 struct pci_bus;
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 0825c4a856e3..6b95c91e7d67 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -14,250 +14,13 @@
 #include <linux/acpi.h>
 #include <linux/arch_topology.h>
 #include <linux/cacheinfo.h>
-#include <linux/cpu.h>
-#include <linux/cpumask.h>
 #include <linux/init.h>
 #include <linux/percpu.h>
-#include <linux/node.h>
-#include <linux/nodemask.h>
-#include <linux/of.h>
-#include <linux/sched.h>
-#include <linux/sched/topology.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/string.h>
 
 #include <asm/cpu.h>
 #include <asm/cputype.h>
 #include <asm/topology.h>
 
-static int __init get_cpu_for_node(struct device_node *node)
-{
-	struct device_node *cpu_node;
-	int cpu;
-
-	cpu_node = of_parse_phandle(node, "cpu", 0);
-	if (!cpu_node)
-		return -1;
-
-	cpu = of_cpu_node_to_id(cpu_node);
-	if (cpu >= 0)
-		topology_parse_cpu_capacity(cpu_node, cpu);
-	else
-		pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
-
-	of_node_put(cpu_node);
-	return cpu;
-}
-
-static int __init parse_core(struct device_node *core, int package_id,
-			     int core_id)
-{
-	char name[10];
-	bool leaf = true;
-	int i = 0;
-	int cpu;
-	struct device_node *t;
-
-	do {
-		snprintf(name, sizeof(name), "thread%d", i);
-		t = of_get_child_by_name(core, name);
-		if (t) {
-			leaf = false;
-			cpu = get_cpu_for_node(t);
-			if (cpu >= 0) {
-				cpu_topology[cpu].package_id = package_id;
-				cpu_topology[cpu].core_id = core_id;
-				cpu_topology[cpu].thread_id = i;
-			} else {
-				pr_err("%pOF: Can't get CPU for thread\n",
-				       t);
-				of_node_put(t);
-				return -EINVAL;
-			}
-			of_node_put(t);
-		}
-		i++;
-	} while (t);
-
-	cpu = get_cpu_for_node(core);
-	if (cpu >= 0) {
-		if (!leaf) {
-			pr_err("%pOF: Core has both threads and CPU\n",
-			       core);
-			return -EINVAL;
-		}
-
-		cpu_topology[cpu].package_id = package_id;
-		cpu_topology[cpu].core_id = core_id;
-	} else if (leaf) {
-		pr_err("%pOF: Can't get CPU for leaf core\n", core);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int __init parse_cluster(struct device_node *cluster, int depth)
-{
-	char name[10];
-	bool leaf = true;
-	bool has_cores = false;
-	struct device_node *c;
-	static int package_id __initdata;
-	int core_id = 0;
-	int i, ret;
-
-	/*
-	 * First check for child clusters; we currently ignore any
-	 * information about the nesting of clusters and present the
-	 * scheduler with a flat list of them.
-	 */
-	i = 0;
-	do {
-		snprintf(name, sizeof(name), "cluster%d", i);
-		c = of_get_child_by_name(cluster, name);
-		if (c) {
-			leaf = false;
-			ret = parse_cluster(c, depth + 1);
-			of_node_put(c);
-			if (ret != 0)
-				return ret;
-		}
-		i++;
-	} while (c);
-
-	/* Now check for cores */
-	i = 0;
-	do {
-		snprintf(name, sizeof(name), "core%d", i);
-		c = of_get_child_by_name(cluster, name);
-		if (c) {
-			has_cores = true;
-
-			if (depth == 0) {
-				pr_err("%pOF: cpu-map children should be clusters\n",
-				       c);
-				of_node_put(c);
-				return -EINVAL;
-			}
-
-			if (leaf) {
-				ret = parse_core(c, package_id, core_id++);
-			} else {
-				pr_err("%pOF: Non-leaf cluster with core %s\n",
-				       cluster, name);
-				ret = -EINVAL;
-			}
-
-			of_node_put(c);
-			if (ret != 0)
-				return ret;
-		}
-		i++;
-	} while (c);
-
-	if (leaf && !has_cores)
-		pr_warn("%pOF: empty cluster\n", cluster);
-
-	if (leaf)
-		package_id++;
-
-	return 0;
-}
-
-static int __init parse_dt_topology(void)
-{
-	struct device_node *cn, *map;
-	int ret = 0;
-	int cpu;
-
-	cn = of_find_node_by_path("/cpus");
-	if (!cn) {
-		pr_err("No CPU information found in DT\n");
-		return 0;
-	}
-
-	/*
-	 * When topology is provided cpu-map is essentially a root
-	 * cluster with restricted subnodes.
-	 */
-	map = of_get_child_by_name(cn, "cpu-map");
-	if (!map)
-		goto out;
-
-	ret = parse_cluster(map, 0);
-	if (ret != 0)
-		goto out_map;
-
-	topology_normalize_cpu_scale();
-
-	/*
-	 * Check that all cores are in the topology; the SMP code will
-	 * only mark cores described in the DT as possible.
-	 */
-	for_each_possible_cpu(cpu)
-		if (cpu_topology[cpu].package_id == -1)
-			ret = -EINVAL;
-
-out_map:
-	of_node_put(map);
-out:
-	of_node_put(cn);
-	return ret;
-}
-
-/*
- * cpu topology table
- */
-struct cpu_topology cpu_topology[NR_CPUS];
-EXPORT_SYMBOL_GPL(cpu_topology);
-
-const struct cpumask *cpu_coregroup_mask(int cpu)
-{
-	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
-
-	/* Find the smaller of NUMA, core or LLC siblings */
-	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
-		/* not numa in package, lets use the package siblings */
-		core_mask = &cpu_topology[cpu].core_sibling;
-	}
-	if (cpu_topology[cpu].llc_id != -1) {
-		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
-			core_mask = &cpu_topology[cpu].llc_sibling;
-	}
-
-	return core_mask;
-}
-
-static void update_siblings_masks(unsigned int cpuid)
-{
-	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
-	int cpu;
-
-	/* update core and thread sibling masks */
-	for_each_online_cpu(cpu) {
-		cpu_topo = &cpu_topology[cpu];
-
-		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
-			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
-			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
-		}
-
-		if (cpuid_topo->package_id != cpu_topo->package_id)
-			continue;
-
-		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
-
-		if (cpuid_topo->core_id != cpu_topo->core_id)
-			continue;
-
-		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
-		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
-	}
-}
-
 void store_cpu_topology(unsigned int cpuid)
 {
 	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
@@ -296,59 +59,19 @@ topology_populated:
 	update_siblings_masks(cpuid);
 }
 
-static void clear_cpu_topology(int cpu)
-{
-	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
-
-	cpumask_clear(&cpu_topo->llc_sibling);
-	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
-
-	cpumask_clear(&cpu_topo->core_sibling);
-	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
-	cpumask_clear(&cpu_topo->thread_sibling);
-	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
-}
-
-static void __init reset_cpu_topology(void)
-{
-	unsigned int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
-
-		cpu_topo->thread_id = -1;
-		cpu_topo->core_id = 0;
-		cpu_topo->package_id = -1;
-		cpu_topo->llc_id = -1;
-
-		clear_cpu_topology(cpu);
-	}
-}
-
-void remove_cpu_topology(unsigned int cpu)
-{
-	int sibling;
-
-	for_each_cpu(sibling, topology_core_cpumask(cpu))
-		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
-	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
-		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
-	for_each_cpu(sibling, topology_llc_cpumask(cpu))
-		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
-
-	clear_cpu_topology(cpu);
-}
-
 #ifdef CONFIG_ACPI
 /*
  * Propagate the topology information of the processor_topology_node tree to the
  * cpu_topology array.
  */
-static int __init parse_acpi_topology(void)
+int __init parse_acpi_topology(void)
 {
 	bool is_threaded;
 	int cpu, topology_id;
 
+	if (acpi_disabled)
+		return 0;
+
 	is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK;
 
 	for_each_possible_cpu(cpu) {
@@ -384,24 +107,6 @@ static int __init parse_acpi_topology(void)
 
 	return 0;
 }
-
-#else
-static inline int __init parse_acpi_topology(void)
-{
-	return -EINVAL;
-}
 #endif
 
-void __init init_cpu_topology(void)
-{
-	reset_cpu_topology();
 
-	/*
-	 * Discard anything that was parsed if we hit an error so we
-	 * don't use partial information.
-	 */
-	if (!acpi_disabled && parse_acpi_topology())
-		reset_cpu_topology();
-	else if (of_have_populated_dt() && parse_dt_topology())
-		reset_cpu_topology();
-}
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 63c1e76739f1..5dc0e1ddd080 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -15,6 +15,11 @@
 #include <linux/string.h>
 #include <linux/sched/topology.h>
 #include <linux/cpuset.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
 
 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
 
@@ -241,3 +246,294 @@ static void parsing_done_workfn(struct work_struct *work)
 #else
 core_initcall(free_raw_capacity);
 #endif
+
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+static int __init get_cpu_for_node(struct device_node *node)
+{
+	struct device_node *cpu_node;
+	int cpu;
+
+	cpu_node = of_parse_phandle(node, "cpu", 0);
+	if (!cpu_node)
+		return -1;
+
+	cpu = of_cpu_node_to_id(cpu_node);
+	if (cpu >= 0)
+		topology_parse_cpu_capacity(cpu_node, cpu);
+	else
+		pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
+
+	of_node_put(cpu_node);
+	return cpu;
+}
+
+static int __init parse_core(struct device_node *core, int package_id,
+			     int core_id)
+{
+	char name[10];
+	bool leaf = true;
+	int i = 0;
+	int cpu;
+	struct device_node *t;
+
+	do {
+		snprintf(name, sizeof(name), "thread%d", i);
+		t = of_get_child_by_name(core, name);
+		if (t) {
+			leaf = false;
+			cpu = get_cpu_for_node(t);
+			if (cpu >= 0) {
+				cpu_topology[cpu].package_id = package_id;
+				cpu_topology[cpu].core_id = core_id;
+				cpu_topology[cpu].thread_id = i;
+			} else {
+				pr_err("%pOF: Can't get CPU for thread\n",
+				       t);
+				of_node_put(t);
+				return -EINVAL;
+			}
+			of_node_put(t);
+		}
+		i++;
+	} while (t);
+
+	cpu = get_cpu_for_node(core);
+	if (cpu >= 0) {
+		if (!leaf) {
+			pr_err("%pOF: Core has both threads and CPU\n",
+			       core);
+			return -EINVAL;
+		}
+
+		cpu_topology[cpu].package_id = package_id;
+		cpu_topology[cpu].core_id = core_id;
+	} else if (leaf) {
+		pr_err("%pOF: Can't get CPU for leaf core\n", core);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init parse_cluster(struct device_node *cluster, int depth)
+{
+	char name[10];
+	bool leaf = true;
+	bool has_cores = false;
+	struct device_node *c;
+	static int package_id __initdata;
+	int core_id = 0;
+	int i, ret;
+
+	/*
+	 * First check for child clusters; we currently ignore any
+	 * information about the nesting of clusters and present the
+	 * scheduler with a flat list of them.
+	 */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "cluster%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			leaf = false;
+			ret = parse_cluster(c, depth + 1);
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	/* Now check for cores */
+	i = 0;
+	do {
+		snprintf(name, sizeof(name), "core%d", i);
+		c = of_get_child_by_name(cluster, name);
+		if (c) {
+			has_cores = true;
+
+			if (depth == 0) {
+				pr_err("%pOF: cpu-map children should be clusters\n",
+				       c);
+				of_node_put(c);
+				return -EINVAL;
+			}
+
+			if (leaf) {
+				ret = parse_core(c, package_id, core_id++);
+			} else {
+				pr_err("%pOF: Non-leaf cluster with core %s\n",
+				       cluster, name);
+				ret = -EINVAL;
+			}
+
+			of_node_put(c);
+			if (ret != 0)
+				return ret;
+		}
+		i++;
+	} while (c);
+
+	if (leaf && !has_cores)
+		pr_warn("%pOF: empty cluster\n", cluster);
+
+	if (leaf)
+		package_id++;
+
+	return 0;
+}
+
+static int __init parse_dt_topology(void)
+{
+	struct device_node *cn, *map;
+	int ret = 0;
+	int cpu;
+
+	cn = of_find_node_by_path("/cpus");
+	if (!cn) {
+		pr_err("No CPU information found in DT\n");
+		return 0;
+	}
+
+	/*
+	 * When topology is provided cpu-map is essentially a root
+	 * cluster with restricted subnodes.
+	 */
+	map = of_get_child_by_name(cn, "cpu-map");
+	if (!map)
+		goto out;
+
+	ret = parse_cluster(map, 0);
+	if (ret != 0)
+		goto out_map;
+
+	topology_normalize_cpu_scale();
+
+	/*
+	 * Check that all cores are in the topology; the SMP code will
+	 * only mark cores described in the DT as possible.
+	 */
+	for_each_possible_cpu(cpu)
+		if (cpu_topology[cpu].package_id == -1)
+			ret = -EINVAL;
+
+out_map:
+	of_node_put(map);
+out:
+	of_node_put(cn);
+	return ret;
+}
+
+/*
+ * cpu topology table
+ */
+struct cpu_topology cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu));
+
+	/* Find the smaller of NUMA, core or LLC siblings */
+	if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) {
+		/* not numa in package, lets use the package siblings */
+		core_mask = &cpu_topology[cpu].core_sibling;
+	}
+	if (cpu_topology[cpu].llc_id != -1) {
+		if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask))
+			core_mask = &cpu_topology[cpu].llc_sibling;
+	}
+
+	return core_mask;
+}
+
+void update_siblings_masks(unsigned int cpuid)
+{
+	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
+	int cpu;
+
+	/* update core and thread sibling masks */
+	for_each_online_cpu(cpu) {
+		cpu_topo = &cpu_topology[cpu];
+
+		if (cpuid_topo->llc_id == cpu_topo->llc_id) {
+			cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling);
+			cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling);
+		}
+
+		if (cpuid_topo->package_id != cpu_topo->package_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
+		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
+
+		if (cpuid_topo->core_id != cpu_topo->core_id)
+			continue;
+
+		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
+		cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
+	}
+}
+
+static void clear_cpu_topology(int cpu)
+{
+	struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+	cpumask_clear(&cpu_topo->llc_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
+
+	cpumask_clear(&cpu_topo->core_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
+	cpumask_clear(&cpu_topo->thread_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
+}
+
+static void __init reset_cpu_topology(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_topology *cpu_topo = &cpu_topology[cpu];
+
+		cpu_topo->thread_id = -1;
+		cpu_topo->core_id = -1;
+		cpu_topo->package_id = -1;
+		cpu_topo->llc_id = -1;
+
+		clear_cpu_topology(cpu);
+	}
+}
+
+void remove_cpu_topology(unsigned int cpu)
+{
+	int sibling;
+
+	for_each_cpu(sibling, topology_core_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
+	for_each_cpu(sibling, topology_sibling_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+	for_each_cpu(sibling, topology_llc_cpumask(cpu))
+		cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
+
+	clear_cpu_topology(cpu);
+}
+
+__weak int __init parse_acpi_topology(void)
+{
+	return 0;
+}
+
+void __init init_cpu_topology(void)
+{
+	reset_cpu_topology();
+
+	/*
+	 * Discard anything that was parsed if we hit an error so we
+	 * don't use partial information.
+	 */
+	if (parse_acpi_topology())
+		reset_cpu_topology();
+	else if (of_have_populated_dt() && parse_dt_topology())
+		reset_cpu_topology();
+}
+#endif
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 1cfe05ea1d89..ede0ce4623b4 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -33,4 +33,32 @@ unsigned long topology_get_freq_scale(int cpu)
 	return per_cpu(freq_scale, cpu);
 }
 
+struct cpu_topology {
+	int thread_id;
+	int core_id;
+	int package_id;
+	int llc_id;
+	cpumask_t thread_sibling;
+	cpumask_t core_sibling;
+	cpumask_t llc_sibling;
+};
+
+#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY
+extern struct cpu_topology cpu_topology[NR_CPUS];
+
+#define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
+#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
+#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
+#define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+#define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
+void init_cpu_topology(void);
+void store_cpu_topology(unsigned int cpuid);
+const struct cpumask *cpu_coregroup_mask(int cpu);
+#endif
+
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
+void update_siblings_masks(unsigned int cpu);
+#endif
+void remove_cpu_topology(unsigned int cpuid);
+
 #endif /* _LINUX_ARCH_TOPOLOGY_H_ */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 47a3e3c08036..2a19d196af28 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -27,6 +27,7 @@
 #ifndef _LINUX_TOPOLOGY_H
 #define _LINUX_TOPOLOGY_H
 
+#include <linux/arch_topology.h>
 #include <linux/cpumask.h>
 #include <linux/bitops.h>
 #include <linux/mmzone.h>
-- 
cgit v1.2.3


From ca74b316df96d7c40ee3e8301065607c11c60c27 Mon Sep 17 00:00:00 2001
From: Atish Patra <atish.patra@wdc.com>
Date: Thu, 27 Jun 2019 12:52:59 -0700
Subject: arm: Use common cpu_topology structure and functions.

Currently, ARM32 and ARM64 uses different data structures to represent
their cpu topologies. Since, we are moving the ARM64 topology to common
code to be used by other architectures, we can reuse that for ARM32 as
well.

Take this opprtunity to remove the redundant functions from ARM32 and
reuse the common code instead.

To: Russell King <linux@armlinux.org.uk>
Signed-off-by: Atish Patra <atish.patra@wdc.com>
Tested-by: Sudeep Holla <sudeep.holla@arm.com> (on TC2)
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/arm/include/asm/topology.h | 20 --------------
 arch/arm/kernel/topology.c      | 60 +++++------------------------------------
 drivers/base/arch_topology.c    |  4 ++-
 include/linux/arch_topology.h   |  6 ++---
 4 files changed, 11 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/topology.h b/arch/arm/include/asm/topology.h
index 2a786f54d8b8..8a0fae94d45e 100644
--- a/arch/arm/include/asm/topology.h
+++ b/arch/arm/include/asm/topology.h
@@ -5,26 +5,6 @@
 #ifdef CONFIG_ARM_CPU_TOPOLOGY
 
 #include <linux/cpumask.h>
-
-struct cputopo_arm {
-	int thread_id;
-	int core_id;
-	int socket_id;
-	cpumask_t thread_sibling;
-	cpumask_t core_sibling;
-};
-
-extern struct cputopo_arm cpu_topology[NR_CPUS];
-
-#define topology_physical_package_id(cpu)	(cpu_topology[cpu].socket_id)
-#define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
-#define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
-#define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
-
-void init_cpu_topology(void);
-void store_cpu_topology(unsigned int cpuid);
-const struct cpumask *cpu_coregroup_mask(int cpu);
-
 #include <linux/arch_topology.h>
 
 /* Replace task scheduler's default frequency-invariant accounting */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index d17cb1e6d679..5b9faba03afb 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -177,17 +177,6 @@ static inline void parse_dt_topology(void) {}
 static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
- /*
- * cpu topology table
- */
-struct cputopo_arm cpu_topology[NR_CPUS];
-EXPORT_SYMBOL_GPL(cpu_topology);
-
-const struct cpumask *cpu_coregroup_mask(int cpu)
-{
-	return &cpu_topology[cpu].core_sibling;
-}
-
 /*
  * The current assumption is that we can power gate each core independently.
  * This will be superseded by DT binding once available.
@@ -197,32 +186,6 @@ const struct cpumask *cpu_corepower_mask(int cpu)
 	return &cpu_topology[cpu].thread_sibling;
 }
 
-static void update_siblings_masks(unsigned int cpuid)
-{
-	struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
-	int cpu;
-
-	/* update core and thread sibling masks */
-	for_each_possible_cpu(cpu) {
-		cpu_topo = &cpu_topology[cpu];
-
-		if (cpuid_topo->socket_id != cpu_topo->socket_id)
-			continue;
-
-		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
-		if (cpu != cpuid)
-			cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
-
-		if (cpuid_topo->core_id != cpu_topo->core_id)
-			continue;
-
-		cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling);
-		if (cpu != cpuid)
-			cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling);
-	}
-	smp_wmb();
-}
-
 /*
  * store_cpu_topology is called at boot when only one cpu is running
  * and with the mutex cpu_hotplug.lock locked, when several cpus have booted,
@@ -230,7 +193,7 @@ static void update_siblings_masks(unsigned int cpuid)
  */
 void store_cpu_topology(unsigned int cpuid)
 {
-	struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid];
+	struct cpu_topology *cpuid_topo = &cpu_topology[cpuid];
 	unsigned int mpidr;
 
 	/* If the cpu topology has been already set, just return */
@@ -250,12 +213,12 @@ void store_cpu_topology(unsigned int cpuid)
 			/* core performance interdependency */
 			cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+			cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
 		} else {
 			/* largely independent cores */
 			cpuid_topo->thread_id = -1;
 			cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0);
-			cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+			cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1);
 		}
 	} else {
 		/*
@@ -265,7 +228,7 @@ void store_cpu_topology(unsigned int cpuid)
 		 */
 		cpuid_topo->thread_id = -1;
 		cpuid_topo->core_id = 0;
-		cpuid_topo->socket_id = -1;
+		cpuid_topo->package_id = -1;
 	}
 
 	update_siblings_masks(cpuid);
@@ -275,7 +238,7 @@ void store_cpu_topology(unsigned int cpuid)
 	pr_info("CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
 		cpu_topology[cpuid].core_id,
-		cpu_topology[cpuid].socket_id, mpidr);
+		cpu_topology[cpuid].package_id, mpidr);
 }
 
 static inline int cpu_corepower_flags(void)
@@ -298,18 +261,7 @@ static struct sched_domain_topology_level arm_topology[] = {
  */
 void __init init_cpu_topology(void)
 {
-	unsigned int cpu;
-
-	/* init core mask and capacity */
-	for_each_possible_cpu(cpu) {
-		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
-
-		cpu_topo->thread_id = -1;
-		cpu_topo->core_id =  -1;
-		cpu_topo->socket_id = -1;
-		cpumask_clear(&cpu_topo->core_sibling);
-		cpumask_clear(&cpu_topo->thread_sibling);
-	}
+	reset_cpu_topology();
 	smp_wmb();
 
 	parse_dt_topology();
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 5dc0e1ddd080..b54d241a2ff5 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -423,6 +423,7 @@ out:
 	of_node_put(cn);
 	return ret;
 }
+#endif
 
 /*
  * cpu topology table
@@ -488,7 +489,7 @@ static void clear_cpu_topology(int cpu)
 	cpumask_set_cpu(cpu, &cpu_topo->thread_sibling);
 }
 
-static void __init reset_cpu_topology(void)
+void __init reset_cpu_topology(void)
 {
 	unsigned int cpu;
 
@@ -523,6 +524,7 @@ __weak int __init parse_acpi_topology(void)
 	return 0;
 }
 
+#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 void __init init_cpu_topology(void)
 {
 	reset_cpu_topology();
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index ede0ce4623b4..42f2b5126094 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -54,11 +54,9 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
-#endif
-
-#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
 void update_siblings_masks(unsigned int cpu);
-#endif
 void remove_cpu_topology(unsigned int cpuid);
+void reset_cpu_topology(void);
+#endif
 
 #endif /* _LINUX_ARCH_TOPOLOGY_H_ */
-- 
cgit v1.2.3


From 2b74c878b0eae4c32629c2d5ba69a29f69048313 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Mon, 15 Jul 2019 12:45:28 -0400
Subject: IB/hfi1: Unreserve a flushed OPFN request

When an OPFN request is flushed, the request is completed without
unreserving itself from the send queue. Subsequently, when a new
request is post sent, the following warning will be triggered:

WARNING: CPU: 4 PID: 8130 at rdmavt/qp.c:1761 rvt_post_send+0x72a/0x880 [rdmavt]
Call Trace:
[<ffffffffbbb61e41>] dump_stack+0x19/0x1b
[<ffffffffbb497688>] __warn+0xd8/0x100
[<ffffffffbb4977cd>] warn_slowpath_null+0x1d/0x20
[<ffffffffc01c941a>] rvt_post_send+0x72a/0x880 [rdmavt]
[<ffffffffbb4dcabe>] ? account_entity_dequeue+0xae/0xd0
[<ffffffffbb61d645>] ? __kmalloc+0x55/0x230
[<ffffffffc04e1a4c>] ib_uverbs_post_send+0x37c/0x5d0 [ib_uverbs]
[<ffffffffc04e5e36>] ? rdma_lookup_put_uobject+0x26/0x60 [ib_uverbs]
[<ffffffffc04dbce6>] ib_uverbs_write+0x286/0x460 [ib_uverbs]
[<ffffffffbb6f9457>] ? security_file_permission+0x27/0xa0
[<ffffffffbb641650>] vfs_write+0xc0/0x1f0
[<ffffffffbb64246f>] SyS_write+0x7f/0xf0
[<ffffffffbbb74ddb>] system_call_fastpath+0x22/0x27

This patch fixes the problem by moving rvt_qp_wqe_unreserve() into
rvt_qp_complete_swqe() to simplify the code and make it less
error-prone.

Fixes: ca95f802ef51 ("IB/hfi1: Unreserve a reserved request when it is completed")
Link: https://lore.kernel.org/r/20190715164528.74174.31364.stgit@awfm-01.aw.intel.com
Cc: <stable@vger.kernel.org>
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/rc.c | 2 --
 include/rdma/rdmavt_qp.h        | 9 ++++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 0477c14633ab..024a7c2b6124 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1835,7 +1835,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
 		    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
 		trdma_clean_swqe(qp, wqe);
-		rvt_qp_wqe_unreserve(qp, wqe);
 		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
 		rvt_qp_complete_swqe(qp,
 				     wqe,
@@ -1882,7 +1881,6 @@ struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
 	if (cmp_psn(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 		trdma_clean_swqe(qp, wqe);
-		rvt_qp_wqe_unreserve(qp, wqe);
 		trace_hfi1_qp_send_completion(qp, wqe, qp->s_last);
 		rvt_qp_complete_swqe(qp,
 				     wqe,
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 0eeea520a853..e06c77d76463 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -608,7 +608,7 @@ static inline void rvt_qp_wqe_reserve(
 /**
  * rvt_qp_wqe_unreserve - clean reserved operation
  * @qp - the rvt qp
- * @wqe - the send wqe
+ * @flags - send wqe flags
  *
  * This decrements the reserve use count.
  *
@@ -620,11 +620,9 @@ static inline void rvt_qp_wqe_reserve(
  * the compiler does not juggle the order of the s_last
  * ring index and the decrementing of s_reserved_used.
  */
-static inline void rvt_qp_wqe_unreserve(
-	struct rvt_qp *qp,
-	struct rvt_swqe *wqe)
+static inline void rvt_qp_wqe_unreserve(struct rvt_qp *qp, int flags)
 {
-	if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) {
+	if (unlikely(flags & RVT_SEND_RESERVE_USED)) {
 		atomic_dec(&qp->s_reserved_used);
 		/* insure no compiler re-order up to s_last change */
 		smp_mb__after_atomic();
@@ -853,6 +851,7 @@ rvt_qp_complete_swqe(struct rvt_qp *qp,
 	u32 byte_len, last;
 	int flags = wqe->wr.send_flags;
 
+	rvt_qp_wqe_unreserve(qp, flags);
 	rvt_put_qp_swqe(qp, wqe);
 
 	need_completion =
-- 
cgit v1.2.3


From 2169e6daa1ffa6e9869fcc56ff7df23c9287f1ec Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 11 Jun 2019 09:49:53 -0400
Subject: media: media/pci: don't set description for ENUM_FMT

The V4L2 core sets the description for the driver in order to ensure
consistent naming.

So drop the strscpy of the description in drivers. Also remove any
description strings in driver-internal structures since those are
no longer needed.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/common/saa7146/saa7146_video.c   | 16 ++-------------
 drivers/media/pci/bt8xx/bttv-driver.c          | 19 -----------------
 drivers/media/pci/bt8xx/bttv-risc.c            |  8 ++++----
 drivers/media/pci/bt8xx/bttvp.h                |  1 -
 drivers/media/pci/cobalt/cobalt-v4l2.c         |  5 -----
 drivers/media/pci/cx23885/cx23885-417.c        |  1 -
 drivers/media/pci/cx23885/cx23885-video.c      |  7 ++-----
 drivers/media/pci/cx23885/cx23885.h            |  1 -
 drivers/media/pci/cx25821/cx25821-video.c      |  7 ++-----
 drivers/media/pci/cx25821/cx25821.h            |  1 -
 drivers/media/pci/cx88/cx88-blackbird.c        |  2 --
 drivers/media/pci/cx88/cx88-video.c            | 17 +++-------------
 drivers/media/pci/cx88/cx88.h                  |  1 -
 drivers/media/pci/dt3155/dt3155.c              |  1 -
 drivers/media/pci/meye/meye.c                  |  3 ---
 drivers/media/pci/saa7134/saa7134-empress.c    |  2 --
 drivers/media/pci/saa7134/saa7134-video.c      | 28 ++++----------------------
 drivers/media/pci/saa7134/saa7134.h            |  1 -
 drivers/media/pci/saa7164/saa7164-encoder.c    |  1 -
 drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c |  7 -------
 drivers/media/pci/solo6x10/solo6x10-v4l2.c     |  2 --
 drivers/media/pci/sta2x11/sta2x11_vip.c        |  2 --
 drivers/media/pci/tw68/tw68-video.c            | 13 ------------
 drivers/media/pci/tw68/tw68.h                  |  1 -
 include/media/drv-intf/saa7146_vv.h            |  1 -
 25 files changed, 17 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/saa7146/saa7146_video.c b/drivers/media/common/saa7146/saa7146_video.c
index 4c399a42e874..d16122039b0c 100644
--- a/drivers/media/common/saa7146/saa7146_video.c
+++ b/drivers/media/common/saa7146/saa7146_video.c
@@ -20,62 +20,52 @@ MODULE_PARM_DESC(max_memory, "maximum memory usage for capture buffers (default:
 /* format descriptions for capture and preview */
 static struct saa7146_format formats[] = {
 	{
-		.name		= "RGB-8 (3-3-2)",
 		.pixelformat	= V4L2_PIX_FMT_RGB332,
 		.trans		= RGB08_COMPOSED,
 		.depth		= 8,
 		.flags		= 0,
 	}, {
-		.name		= "RGB-16 (5/B-6/G-5/R)",
 		.pixelformat	= V4L2_PIX_FMT_RGB565,
 		.trans		= RGB16_COMPOSED,
 		.depth		= 16,
 		.flags		= 0,
 	}, {
-		.name		= "RGB-24 (B-G-R)",
 		.pixelformat	= V4L2_PIX_FMT_BGR24,
 		.trans		= RGB24_COMPOSED,
 		.depth		= 24,
 		.flags		= 0,
 	}, {
-		.name		= "RGB-32 (B-G-R)",
 		.pixelformat	= V4L2_PIX_FMT_BGR32,
 		.trans		= RGB32_COMPOSED,
 		.depth		= 32,
 		.flags		= 0,
 	}, {
-		.name		= "RGB-32 (R-G-B)",
 		.pixelformat	= V4L2_PIX_FMT_RGB32,
 		.trans		= RGB32_COMPOSED,
 		.depth		= 32,
 		.flags		= 0,
 		.swap		= 0x2,
 	}, {
-		.name		= "Greyscale-8",
 		.pixelformat	= V4L2_PIX_FMT_GREY,
 		.trans		= Y8,
 		.depth		= 8,
 		.flags		= 0,
 	}, {
-		.name		= "YUV 4:2:2 planar (Y-Cb-Cr)",
 		.pixelformat	= V4L2_PIX_FMT_YUV422P,
 		.trans		= YUV422_DECOMPOSED,
 		.depth		= 16,
 		.flags		= FORMAT_BYTE_SWAP|FORMAT_IS_PLANAR,
 	}, {
-		.name		= "YVU 4:2:0 planar (Y-Cb-Cr)",
 		.pixelformat	= V4L2_PIX_FMT_YVU420,
 		.trans		= YUV420_DECOMPOSED,
 		.depth		= 12,
 		.flags		= FORMAT_BYTE_SWAP|FORMAT_IS_PLANAR,
 	}, {
-		.name		= "YUV 4:2:0 planar (Y-Cb-Cr)",
 		.pixelformat	= V4L2_PIX_FMT_YUV420,
 		.trans		= YUV420_DECOMPOSED,
 		.depth		= 12,
 		.flags		= FORMAT_IS_PLANAR,
 	}, {
-		.name		= "YUV 4:2:2 (U-Y-V-Y)",
 		.pixelformat	= V4L2_PIX_FMT_UYVY,
 		.trans		= YUV422_COMPOSED,
 		.depth		= 16,
@@ -147,10 +137,10 @@ int saa7146_start_preview(struct saa7146_fh *fh)
 	}
 	vv->ov.win = fmt.fmt.win;
 
-	DEB_D("%dx%d+%d+%d %s field=%s\n",
+	DEB_D("%dx%d+%d+%d 0x%08x field=%s\n",
 	      vv->ov.win.w.width, vv->ov.win.w.height,
 	      vv->ov.win.w.left, vv->ov.win.w.top,
-	      vv->ov_fmt->name, v4l2_field_names[vv->ov.win.field]);
+	      vv->ov_fmt->pixelformat, v4l2_field_names[vv->ov.win.field]);
 
 	if (0 != (ret = saa7146_enable_overlay(fh))) {
 		DEB_D("enabling overlay failed: %d\n", ret);
@@ -515,8 +505,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void *fh, struct v4l2_fmtd
 {
 	if (f->index >= ARRAY_SIZE(formats))
 		return -EINVAL;
-	strscpy((char *)f->description, formats[f->index].name,
-		sizeof(f->description));
 	f->pixelformat = formats[f->index].pixelformat;
 	return 0;
 }
diff --git a/drivers/media/pci/bt8xx/bttv-driver.c b/drivers/media/pci/bt8xx/bttv-driver.c
index 612d1c0010c1..a359da7773a9 100644
--- a/drivers/media/pci/bt8xx/bttv-driver.c
+++ b/drivers/media/pci/bt8xx/bttv-driver.c
@@ -503,77 +503,65 @@ static const unsigned int BTTV_TVNORMS = ARRAY_SIZE(bttv_tvnorms);
    packed pixel formats must come first */
 static const struct bttv_format formats[] = {
 	{
-		.name     = "8 bpp, gray",
 		.fourcc   = V4L2_PIX_FMT_GREY,
 		.btformat = BT848_COLOR_FMT_Y8,
 		.depth    = 8,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "8 bpp, dithered color",
 		.fourcc   = V4L2_PIX_FMT_HI240,
 		.btformat = BT848_COLOR_FMT_RGB8,
 		.depth    = 8,
 		.flags    = FORMAT_FLAGS_PACKED | FORMAT_FLAGS_DITHER,
 	},{
-		.name     = "15 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB555,
 		.btformat = BT848_COLOR_FMT_RGB15,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "15 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB555X,
 		.btformat = BT848_COLOR_FMT_RGB15,
 		.btswap   = 0x03, /* byteswap */
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "16 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB565,
 		.btformat = BT848_COLOR_FMT_RGB16,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "16 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB565X,
 		.btformat = BT848_COLOR_FMT_RGB16,
 		.btswap   = 0x03, /* byteswap */
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "24 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR24,
 		.btformat = BT848_COLOR_FMT_RGB24,
 		.depth    = 24,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "32 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR32,
 		.btformat = BT848_COLOR_FMT_RGB32,
 		.depth    = 32,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "32 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB32,
 		.btformat = BT848_COLOR_FMT_RGB32,
 		.btswap   = 0x0f, /* byte+word swap */
 		.depth    = 32,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "4:2:2, packed, YUYV",
 		.fourcc   = V4L2_PIX_FMT_YUYV,
 		.btformat = BT848_COLOR_FMT_YUY2,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "4:2:2, packed, UYVY",
 		.fourcc   = V4L2_PIX_FMT_UYVY,
 		.btformat = BT848_COLOR_FMT_YUY2,
 		.btswap   = 0x03, /* byteswap */
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	},{
-		.name     = "4:2:2, planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV422P,
 		.btformat = BT848_COLOR_FMT_YCrCb422,
 		.depth    = 16,
@@ -581,7 +569,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 1,
 		.vshift   = 0,
 	},{
-		.name     = "4:2:0, planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV420,
 		.btformat = BT848_COLOR_FMT_YCrCb422,
 		.depth    = 12,
@@ -589,7 +576,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 1,
 		.vshift   = 1,
 	},{
-		.name     = "4:2:0, planar, Y-Cr-Cb",
 		.fourcc   = V4L2_PIX_FMT_YVU420,
 		.btformat = BT848_COLOR_FMT_YCrCb422,
 		.depth    = 12,
@@ -597,7 +583,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 1,
 		.vshift   = 1,
 	},{
-		.name     = "4:1:1, planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV411P,
 		.btformat = BT848_COLOR_FMT_YCrCb411,
 		.depth    = 12,
@@ -605,7 +590,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 2,
 		.vshift   = 0,
 	},{
-		.name     = "4:1:0, planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV410,
 		.btformat = BT848_COLOR_FMT_YCrCb411,
 		.depth    = 9,
@@ -613,7 +597,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 2,
 		.vshift   = 2,
 	},{
-		.name     = "4:1:0, planar, Y-Cr-Cb",
 		.fourcc   = V4L2_PIX_FMT_YVU410,
 		.btformat = BT848_COLOR_FMT_YCrCb411,
 		.depth    = 9,
@@ -621,7 +604,6 @@ static const struct bttv_format formats[] = {
 		.hshift   = 2,
 		.vshift   = 2,
 	},{
-		.name     = "raw scanlines",
 		.fourcc   = -1,
 		.btformat = BT848_COLOR_FMT_RAW,
 		.depth    = 8,
@@ -2500,7 +2482,6 @@ static int bttv_enum_fmt_cap_ovr(struct v4l2_fmtdesc *f)
 		return -EINVAL;
 
 	f->pixelformat = formats[i].fourcc;
-	strscpy(f->description, formats[i].name, sizeof(f->description));
 
 	return i;
 }
diff --git a/drivers/media/pci/bt8xx/bttv-risc.c b/drivers/media/pci/bt8xx/bttv-risc.c
index 6b59ca337c7f..fc8708047be8 100644
--- a/drivers/media/pci/bt8xx/bttv-risc.c
+++ b/drivers/media/pci/bt8xx/bttv-risc.c
@@ -699,9 +699,9 @@ bttv_buffer_risc(struct bttv *btv, struct bttv_buffer *buf)
 	const struct bttv_tvnorm *tvnorm = bttv_tvnorms + buf->tvnorm;
 	struct videobuf_dmabuf *dma=videobuf_to_dma(&buf->vb);
 
-	dprintk("%d: buffer field: %s  format: %s  size: %dx%d\n",
+	dprintk("%d: buffer field: %s  format: 0x%08x  size: %dx%d\n",
 		btv->c.nr, v4l2_field_names[buf->vb.field],
-		buf->fmt->name, buf->vb.width, buf->vb.height);
+		buf->fmt->fourcc, buf->vb.width, buf->vb.height);
 
 	/* packed pixel modes */
 	if (buf->fmt->flags & FORMAT_FLAGS_PACKED) {
@@ -860,9 +860,9 @@ bttv_overlay_risc(struct bttv *btv,
 		  struct bttv_buffer *buf)
 {
 	/* check interleave, bottom+top fields */
-	dprintk("%d: overlay fields: %s format: %s  size: %dx%d\n",
+	dprintk("%d: overlay fields: %s format: 0x%08x  size: %dx%d\n",
 		btv->c.nr, v4l2_field_names[buf->vb.field],
-		fmt->name, ov->w.width, ov->w.height);
+		fmt->fourcc, ov->w.width, ov->w.height);
 
 	/* calculate geometry */
 	bttv_calc_geo(btv,&buf->geo,ov->w.width,ov->w.height,
diff --git a/drivers/media/pci/bt8xx/bttvp.h b/drivers/media/pci/bt8xx/bttvp.h
index b159d6ddbfcf..4abf43657846 100644
--- a/drivers/media/pci/bt8xx/bttvp.h
+++ b/drivers/media/pci/bt8xx/bttvp.h
@@ -99,7 +99,6 @@ struct bttv_tvnorm {
 extern const struct bttv_tvnorm bttv_tvnorms[];
 
 struct bttv_format {
-	char *name;
 	int  fourcc;          /* video4linux 2      */
 	int  btformat;        /* BT848_COLOR_FMT_*  */
 	int  btswap;          /* BT848_COLOR_CTL_*  */
diff --git a/drivers/media/pci/cobalt/cobalt-v4l2.c b/drivers/media/pci/cobalt/cobalt-v4l2.c
index 39dabd4da60f..ea96f333ee2b 100644
--- a/drivers/media/pci/cobalt/cobalt-v4l2.c
+++ b/drivers/media/pci/cobalt/cobalt-v4l2.c
@@ -688,15 +688,12 @@ static int cobalt_enum_fmt_vid_cap(struct file *file, void *priv_fh,
 {
 	switch (f->index) {
 	case 0:
-		strscpy(f->description, "YUV 4:2:2", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_YUYV;
 		break;
 	case 1:
-		strscpy(f->description, "RGB24", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_RGB24;
 		break;
 	case 2:
-		strscpy(f->description, "RGB32", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_BGR32;
 		break;
 	default:
@@ -893,11 +890,9 @@ static int cobalt_enum_fmt_vid_out(struct file *file, void *priv_fh,
 {
 	switch (f->index) {
 	case 0:
-		strscpy(f->description, "YUV 4:2:2", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_YUYV;
 		break;
 	case 1:
-		strscpy(f->description, "RGB32", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_BGR32;
 		break;
 	default:
diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c
index 82f96a4091ac..2327fe612610 100644
--- a/drivers/media/pci/cx23885/cx23885-417.c
+++ b/drivers/media/pci/cx23885/cx23885-417.c
@@ -1339,7 +1339,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index != 0)
 		return -EINVAL;
 
-	strscpy(f->description, "MPEG", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_MPEG;
 
 	return 0;
diff --git a/drivers/media/pci/cx23885/cx23885-video.c b/drivers/media/pci/cx23885/cx23885-video.c
index b254473db9a3..8098b15493de 100644
--- a/drivers/media/pci/cx23885/cx23885-video.c
+++ b/drivers/media/pci/cx23885/cx23885-video.c
@@ -67,7 +67,6 @@ MODULE_PARM_DESC(vid_limit, "capture memory limit in megabytes");
 #define FORMAT_FLAGS_PACKED       0x01
 static struct cx23885_fmt formats[] = {
 	{
-		.name     = "4:2:2, packed, YUYV",
 		.fourcc   = V4L2_PIX_FMT_YUYV,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
@@ -411,9 +410,9 @@ static int buffer_prepare(struct vb2_buffer *vb)
 	default:
 		BUG();
 	}
-	dprintk(2, "[%p/%d] buffer_init - %dx%d %dbpp \"%s\" - dma=0x%08lx\n",
+	dprintk(2, "[%p/%d] buffer_init - %dx%d %dbpp 0x%08x - dma=0x%08lx\n",
 		buf, buf->vb.vb2_buf.index,
-		dev->width, dev->height, dev->fmt->depth, dev->fmt->name,
+		dev->width, dev->height, dev->fmt->depth, dev->fmt->fourcc,
 		(unsigned long)buf->risc.dma);
 	return 0;
 }
@@ -647,8 +646,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (unlikely(f->index >= ARRAY_SIZE(formats)))
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name,
-		sizeof(f->description));
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index 9da66fdd5a0d..a95a2e4c6a0d 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -127,7 +127,6 @@
 	V4L2_STD_PAL_60 |  V4L2_STD_SECAM_L   |  V4L2_STD_SECAM_DK)
 
 struct cx23885_fmt {
-	char  *name;
 	u32   fourcc;          /* v4l2 format id */
 	int   depth;
 	int   flags;
diff --git a/drivers/media/pci/cx25821/cx25821-video.c b/drivers/media/pci/cx25821/cx25821-video.c
index de7641170478..a10261da0db6 100644
--- a/drivers/media/pci/cx25821/cx25821-video.c
+++ b/drivers/media/pci/cx25821/cx25821-video.c
@@ -35,12 +35,10 @@ MODULE_PARM_DESC(irq_debug, "enable debug messages [IRQ handler]");
 
 static const struct cx25821_fmt formats[] = {
 	{
-		.name = "4:1:1, packed, Y41P",
 		.fourcc = V4L2_PIX_FMT_Y41P,
 		.depth = 12,
 		.flags = FORMAT_FLAGS_PACKED,
 	}, {
-		.name = "4:2:2, packed, YUYV",
 		.fourcc = V4L2_PIX_FMT_YUYV,
 		.depth = 16,
 		.flags = FORMAT_FLAGS_PACKED,
@@ -215,9 +213,9 @@ static int cx25821_buffer_prepare(struct vb2_buffer *vb)
 		break;
 	}
 
-	dprintk(2, "[%p/%d] buffer_prep - %dx%d %dbpp \"%s\" - dma=0x%08lx\n",
+	dprintk(2, "[%p/%d] buffer_prep - %dx%d %dbpp 0x%08x - dma=0x%08lx\n",
 		buf, buf->vb.vb2_buf.index, chan->width, chan->height,
-		chan->fmt->depth, chan->fmt->name,
+		chan->fmt->depth, chan->fmt->fourcc,
 		(unsigned long)buf->risc.dma);
 
 	return ret;
@@ -311,7 +309,6 @@ static int cx25821_vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 	if (unlikely(f->index >= ARRAY_SIZE(formats)))
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name, sizeof(f->description));
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
diff --git a/drivers/media/pci/cx25821/cx25821.h b/drivers/media/pci/cx25821/cx25821.h
index 47dbaae78509..017307984094 100644
--- a/drivers/media/pci/cx25821/cx25821.h
+++ b/drivers/media/pci/cx25821/cx25821.h
@@ -83,7 +83,6 @@
 #define VID_CHANNEL_NUM 8
 
 struct cx25821_fmt {
-	char *name;
 	u32 fourcc;		/* v4l2 format id */
 	int depth;
 	int flags;
diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index 200d68827073..d3da7f4297af 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c
@@ -805,9 +805,7 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index != 0)
 		return -EINVAL;
 
-	strscpy(f->description, "MPEG", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_MPEG;
-	f->flags = V4L2_FMT_FLAG_COMPRESSED;
 	return 0;
 }
 
diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index e59a74514c7c..dcc0f02aeb70 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c
@@ -69,62 +69,52 @@ MODULE_PARM_DESC(irq_debug, "enable debug messages [IRQ handler]");
 
 static const struct cx8800_fmt formats[] = {
 	{
-		.name     = "8 bpp, gray",
 		.fourcc   = V4L2_PIX_FMT_GREY,
 		.cxformat = ColorFormatY8,
 		.depth    = 8,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "15 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB555,
 		.cxformat = ColorFormatRGB15,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "15 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB555X,
 		.cxformat = ColorFormatRGB15 | ColorFormatBSWAP,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "16 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB565,
 		.cxformat = ColorFormatRGB16,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "16 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB565X,
 		.cxformat = ColorFormatRGB16 | ColorFormatBSWAP,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "24 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR24,
 		.cxformat = ColorFormatRGB24,
 		.depth    = 24,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "32 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR32,
 		.cxformat = ColorFormatRGB32,
 		.depth    = 32,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "32 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB32,
 		.cxformat = ColorFormatRGB32 | ColorFormatBSWAP |
 			    ColorFormatWSWAP,
 		.depth    = 32,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "4:2:2, packed, YUYV",
 		.fourcc   = V4L2_PIX_FMT_YUYV,
 		.cxformat = ColorFormatYUY2,
 		.depth    = 16,
 		.flags    = FORMAT_FLAGS_PACKED,
 	}, {
-		.name     = "4:2:2, packed, UYVY",
 		.fourcc   = V4L2_PIX_FMT_UYVY,
 		.cxformat = ColorFormatYUY2 | ColorFormatBSWAP,
 		.depth    = 16,
@@ -489,9 +479,9 @@ static int buffer_prepare(struct vb2_buffer *vb)
 		break;
 	}
 	dprintk(2,
-		"[%p/%d] buffer_prepare - %dx%d %dbpp \"%s\" - dma=0x%08lx\n",
-		buf, buf->vb.vb2_buf.index,
-		core->width, core->height, dev->fmt->depth, dev->fmt->name,
+		"[%p/%d] %s - %dx%d %dbpp 0x%08x - dma=0x%08lx\n",
+		buf, buf->vb.vb2_buf.index, __func__,
+		core->width, core->height, dev->fmt->depth, dev->fmt->fourcc,
 		(unsigned long)buf->risc.dma);
 	return 0;
 }
@@ -829,7 +819,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (unlikely(f->index >= ARRAY_SIZE(formats)))
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name, sizeof(f->description));
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index a70a50dc3edf..744a22328ebc 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -99,7 +99,6 @@ static inline unsigned int norm_maxh(v4l2_std_id norm)
 /* static data                                                 */
 
 struct cx8800_fmt {
-	const char  *name;
 	u32   fourcc;          /* v4l2 format id */
 	int   depth;
 	int   flags;
diff --git a/drivers/media/pci/dt3155/dt3155.c b/drivers/media/pci/dt3155/dt3155.c
index b4cdda50e742..7480f0d3ad0f 100644
--- a/drivers/media/pci/dt3155/dt3155.c
+++ b/drivers/media/pci/dt3155/dt3155.c
@@ -306,7 +306,6 @@ static int dt3155_enum_fmt_vid_cap(struct file *filp,
 	if (f->index)
 		return -EINVAL;
 	f->pixelformat = V4L2_PIX_FMT_GREY;
-	strscpy(f->description, "8-bit Greyscale", sizeof(f->description));
 	return 0;
 }
 
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index 8218810c899e..0e61c81356ef 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1104,12 +1104,9 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void *fh,
 	if (f->index == 0) {
 		/* standard YUV 422 capture */
 		f->flags = 0;
-		strscpy(f->description, "YUV422", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_YUYV;
 	} else {
 		/* compressed MJPEG capture */
-		f->flags = V4L2_FMT_FLAG_COMPRESSED;
-		strscpy(f->description, "MJPEG", sizeof(f->description));
 		f->pixelformat = V4L2_PIX_FMT_MJPEG;
 	}
 
diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index 1a41a56afec6..cb65d345fd3e 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -91,9 +91,7 @@ static int empress_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index != 0)
 		return -EINVAL;
 
-	strscpy(f->description, "MPEG TS", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_MPEG;
-	f->flags = V4L2_FMT_FLAG_COMPRESSED;
 	return 0;
 }
 
diff --git a/drivers/media/pci/saa7134/saa7134-video.c b/drivers/media/pci/saa7134/saa7134-video.c
index 606df51bb636..342cabf48064 100644
--- a/drivers/media/pci/saa7134/saa7134-video.c
+++ b/drivers/media/pci/saa7134/saa7134-video.c
@@ -90,70 +90,58 @@ static int video_out[][9] = {
 
 static struct saa7134_format formats[] = {
 	{
-		.name     = "8 bpp gray",
 		.fourcc   = V4L2_PIX_FMT_GREY,
 		.depth    = 8,
 		.pm       = 0x06,
 	},{
-		.name     = "15 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB555,
 		.depth    = 16,
 		.pm       = 0x13 | 0x80,
 	},{
-		.name     = "15 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB555X,
 		.depth    = 16,
 		.pm       = 0x13 | 0x80,
 		.bswap    = 1,
 	},{
-		.name     = "16 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_RGB565,
 		.depth    = 16,
 		.pm       = 0x10 | 0x80,
 	},{
-		.name     = "16 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB565X,
 		.depth    = 16,
 		.pm       = 0x10 | 0x80,
 		.bswap    = 1,
 	},{
-		.name     = "24 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR24,
 		.depth    = 24,
 		.pm       = 0x11,
 	},{
-		.name     = "24 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB24,
 		.depth    = 24,
 		.pm       = 0x11,
 		.bswap    = 1,
 	},{
-		.name     = "32 bpp RGB, le",
 		.fourcc   = V4L2_PIX_FMT_BGR32,
 		.depth    = 32,
 		.pm       = 0x12,
 	},{
-		.name     = "32 bpp RGB, be",
 		.fourcc   = V4L2_PIX_FMT_RGB32,
 		.depth    = 32,
 		.pm       = 0x12,
 		.bswap    = 1,
 		.wswap    = 1,
 	},{
-		.name     = "4:2:2 packed, YUYV",
 		.fourcc   = V4L2_PIX_FMT_YUYV,
 		.depth    = 16,
 		.pm       = 0x00,
 		.bswap    = 1,
 		.yuv      = 1,
 	},{
-		.name     = "4:2:2 packed, UYVY",
 		.fourcc   = V4L2_PIX_FMT_UYVY,
 		.depth    = 16,
 		.pm       = 0x00,
 		.yuv      = 1,
 	},{
-		.name     = "4:2:2 planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV422P,
 		.depth    = 16,
 		.pm       = 0x09,
@@ -162,7 +150,6 @@ static struct saa7134_format formats[] = {
 		.hshift   = 1,
 		.vshift   = 0,
 	},{
-		.name     = "4:2:0 planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YUV420,
 		.depth    = 12,
 		.pm       = 0x0a,
@@ -171,7 +158,6 @@ static struct saa7134_format formats[] = {
 		.hshift   = 1,
 		.vshift   = 1,
 	},{
-		.name     = "4:2:0 planar, Y-Cb-Cr",
 		.fourcc   = V4L2_PIX_FMT_YVU420,
 		.depth    = 12,
 		.pm       = 0x0a,
@@ -720,10 +706,10 @@ static int start_preview(struct saa7134_dev *dev)
 		return err;
 
 	dev->ovfield = dev->win.field;
-	video_dbg("start_preview %dx%d+%d+%d %s field=%s\n",
-		dev->win.w.width, dev->win.w.height,
-		dev->win.w.left, dev->win.w.top,
-		dev->ovfmt->name, v4l2_field_names[dev->ovfield]);
+	video_dbg("%s %dx%d+%d+%d 0x%08x field=%s\n", __func__,
+		  dev->win.w.width, dev->win.w.height,
+		  dev->win.w.left, dev->win.w.top,
+		  dev->ovfmt->fourcc, v4l2_field_names[dev->ovfield]);
 
 	/* setup window + clipping */
 	set_size(dev, TASK_B, dev->win.w.width, dev->win.w.height,
@@ -1780,9 +1766,6 @@ static int saa7134_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index >= FORMATS)
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name,
-		sizeof(f->description));
-
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
@@ -1799,9 +1782,6 @@ static int saa7134_enum_fmt_vid_overlay(struct file *file, void  *priv,
 	if ((f->index >= FORMATS) || formats[f->index].planar)
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name,
-		sizeof(f->description));
-
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
diff --git a/drivers/media/pci/saa7134/saa7134.h b/drivers/media/pci/saa7134/saa7134.h
index 6324f174c6f9..77c325e64a97 100644
--- a/drivers/media/pci/saa7134/saa7134.h
+++ b/drivers/media/pci/saa7134/saa7134.h
@@ -98,7 +98,6 @@ struct saa7134_tvaudio {
 };
 
 struct saa7134_format {
-	char           *name;
 	unsigned int   fourcc;
 	unsigned int   depth;
 	unsigned int   pm;
diff --git a/drivers/media/pci/saa7164/saa7164-encoder.c b/drivers/media/pci/saa7164/saa7164-encoder.c
index 43fdaa2d32bd..3fca7257a720 100644
--- a/drivers/media/pci/saa7164/saa7164-encoder.c
+++ b/drivers/media/pci/saa7164/saa7164-encoder.c
@@ -503,7 +503,6 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index != 0)
 		return -EINVAL;
 
-	strscpy(f->description, "MPEG", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_MPEG;
 
 	return 0;
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
index 609100a46ff8..a02f08459e14 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2-enc.c
@@ -822,25 +822,18 @@ static int solo_enc_enum_fmt_cap(struct file *file, void *priv,
 		switch (dev_type) {
 		case SOLO_DEV_6010:
 			f->pixelformat = V4L2_PIX_FMT_MPEG4;
-			strscpy(f->description, "MPEG-4 part 2",
-				sizeof(f->description));
 			break;
 		case SOLO_DEV_6110:
 			f->pixelformat = V4L2_PIX_FMT_H264;
-			strscpy(f->description, "H.264", sizeof(f->description));
 			break;
 		}
 		break;
 	case 1:
 		f->pixelformat = V4L2_PIX_FMT_MJPEG;
-		strscpy(f->description, "MJPEG", sizeof(f->description));
 		break;
 	default:
 		return -EINVAL;
 	}
-
-	f->flags = V4L2_FMT_FLAG_COMPRESSED;
-
 	return 0;
 }
 
diff --git a/drivers/media/pci/solo6x10/solo6x10-v4l2.c b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
index a968f75920b5..2efa539d6075 100644
--- a/drivers/media/pci/solo6x10/solo6x10-v4l2.c
+++ b/drivers/media/pci/solo6x10/solo6x10-v4l2.c
@@ -458,8 +458,6 @@ static int solo_enum_fmt_cap(struct file *file, void *priv,
 		return -EINVAL;
 
 	f->pixelformat = V4L2_PIX_FMT_UYVY;
-	strscpy(f->description, "UYUV 4:2:2 Packed", sizeof(f->description));
-
 	return 0;
 }
 
diff --git a/drivers/media/pci/sta2x11/sta2x11_vip.c b/drivers/media/pci/sta2x11/sta2x11_vip.c
index e52e29814378..fd3de3bb0c89 100644
--- a/drivers/media/pci/sta2x11/sta2x11_vip.c
+++ b/drivers/media/pci/sta2x11/sta2x11_vip.c
@@ -560,9 +560,7 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 	if (f->index != 0)
 		return -EINVAL;
 
-	strscpy(f->description, "4:2:2, packed, UYVY", sizeof(f->description));
 	f->pixelformat = V4L2_PIX_FMT_UYVY;
-	f->flags = 0;
 	return 0;
 }
 
diff --git a/drivers/media/pci/tw68/tw68-video.c b/drivers/media/pci/tw68/tw68-video.c
index 8e0952d65ad4..99e74c22d3be 100644
--- a/drivers/media/pci/tw68/tw68-video.c
+++ b/drivers/media/pci/tw68/tw68-video.c
@@ -34,53 +34,43 @@
  */
 static const struct tw68_format formats[] = {
 	{
-		.name		= "15 bpp RGB, le",
 		.fourcc		= V4L2_PIX_FMT_RGB555,
 		.depth		= 16,
 		.twformat	= ColorFormatRGB15,
 	}, {
-		.name		= "15 bpp RGB, be",
 		.fourcc		= V4L2_PIX_FMT_RGB555X,
 		.depth		= 16,
 		.twformat	= ColorFormatRGB15 | ColorFormatBSWAP,
 	}, {
-		.name		= "16 bpp RGB, le",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.depth		= 16,
 		.twformat	= ColorFormatRGB16,
 	}, {
-		.name		= "16 bpp RGB, be",
 		.fourcc		= V4L2_PIX_FMT_RGB565X,
 		.depth		= 16,
 		.twformat	= ColorFormatRGB16 | ColorFormatBSWAP,
 	}, {
-		.name		= "24 bpp RGB, le",
 		.fourcc		= V4L2_PIX_FMT_BGR24,
 		.depth		= 24,
 		.twformat	= ColorFormatRGB24,
 	}, {
-		.name		= "24 bpp RGB, be",
 		.fourcc		= V4L2_PIX_FMT_RGB24,
 		.depth		= 24,
 		.twformat	= ColorFormatRGB24 | ColorFormatBSWAP,
 	}, {
-		.name		= "32 bpp RGB, le",
 		.fourcc		= V4L2_PIX_FMT_BGR32,
 		.depth		= 32,
 		.twformat	= ColorFormatRGB32,
 	}, {
-		.name		= "32 bpp RGB, be",
 		.fourcc		= V4L2_PIX_FMT_RGB32,
 		.depth		= 32,
 		.twformat	= ColorFormatRGB32 | ColorFormatBSWAP |
 				  ColorFormatWSWAP,
 	}, {
-		.name		= "4:2:2 packed, YUYV",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.depth		= 16,
 		.twformat	= ColorFormatYUY2,
 	}, {
-		.name		= "4:2:2 packed, UYVY",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.depth		= 16,
 		.twformat	= ColorFormatYUY2 | ColorFormatBSWAP,
@@ -774,9 +764,6 @@ static int tw68_enum_fmt_vid_cap(struct file *file, void  *priv,
 	if (f->index >= FORMATS)
 		return -EINVAL;
 
-	strscpy(f->description, formats[f->index].name,
-		sizeof(f->description));
-
 	f->pixelformat = formats[f->index].fourcc;
 
 	return 0;
diff --git a/drivers/media/pci/tw68/tw68.h b/drivers/media/pci/tw68/tw68.h
index 7021290d726a..a1f422d6e600 100644
--- a/drivers/media/pci/tw68/tw68.h
+++ b/drivers/media/pci/tw68/tw68.h
@@ -85,7 +85,6 @@ struct tw68_tvnorm {
 };
 
 struct tw68_format {
-	char	*name;
 	u32	fourcc;
 	u32	depth;
 	u32	twformat;
diff --git a/include/media/drv-intf/saa7146_vv.h b/include/media/drv-intf/saa7146_vv.h
index b34d86bb0664..635805fb35e8 100644
--- a/include/media/drv-intf/saa7146_vv.h
+++ b/include/media/drv-intf/saa7146_vv.h
@@ -32,7 +32,6 @@ struct	saa7146_video_dma {
 #define FORMAT_IS_PLANAR	0x2
 
 struct saa7146_format {
-	char	*name;
 	u32	pixelformat;
 	u32	trans;
 	u8	depth;
-- 
cgit v1.2.3


From 642ac63d166d07e43396a4d8c48ab24cb072cb18 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 11 Jun 2019 10:02:43 -0400
Subject: media: drivers/staging/media: don't set description for ENUM_FMT

The V4L2 core sets the format description and flags for the driver in order
to ensure consistent naming.

So drop the strscpy of the description in drivers. Also remove any
description strings in driver-internal structures since those are
no longer needed.

Note that bcm2835-camera.c: the formats array still stores the flags
field for compressed formats since that information is used elsewhere
in the driver. But enum_fmt doesn't use it anymore, since the core
will set the COMPRESSED flag correctly.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/staging/media/omap4iss/iss_video.c         | 40 ++++++++++------------
 drivers/staging/media/omap4iss/iss_video.h         |  2 --
 drivers/staging/media/soc_camera/soc_camera.c      |  2 --
 .../vc04_services/bcm2835-camera/bcm2835-camera.c  | 29 ----------------
 .../vc04_services/bcm2835-camera/mmal-common.h     |  1 -
 include/media/drv-intf/soc_mediabus.h              |  2 --
 6 files changed, 19 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c
index c307707480f7..54144dc9f509 100644
--- a/drivers/staging/media/omap4iss/iss_video.c
+++ b/drivers/staging/media/omap4iss/iss_video.c
@@ -31,61 +31,61 @@
 static struct iss_format_info formats[] = {
 	{ MEDIA_BUS_FMT_Y8_1X8, MEDIA_BUS_FMT_Y8_1X8,
 	  MEDIA_BUS_FMT_Y8_1X8, MEDIA_BUS_FMT_Y8_1X8,
-	  V4L2_PIX_FMT_GREY, 8, "Greyscale 8 bpp", },
+	  V4L2_PIX_FMT_GREY, 8, },
 	{ MEDIA_BUS_FMT_Y10_1X10, MEDIA_BUS_FMT_Y10_1X10,
 	  MEDIA_BUS_FMT_Y10_1X10, MEDIA_BUS_FMT_Y8_1X8,
-	  V4L2_PIX_FMT_Y10, 10, "Greyscale 10 bpp", },
+	  V4L2_PIX_FMT_Y10, 10, },
 	{ MEDIA_BUS_FMT_Y12_1X12, MEDIA_BUS_FMT_Y10_1X10,
 	  MEDIA_BUS_FMT_Y12_1X12, MEDIA_BUS_FMT_Y8_1X8,
-	  V4L2_PIX_FMT_Y12, 12, "Greyscale 12 bpp", },
+	  V4L2_PIX_FMT_Y12, 12, },
 	{ MEDIA_BUS_FMT_SBGGR8_1X8, MEDIA_BUS_FMT_SBGGR8_1X8,
 	  MEDIA_BUS_FMT_SBGGR8_1X8, MEDIA_BUS_FMT_SBGGR8_1X8,
-	  V4L2_PIX_FMT_SBGGR8, 8, "BGGR Bayer 8 bpp", },
+	  V4L2_PIX_FMT_SBGGR8, 8, },
 	{ MEDIA_BUS_FMT_SGBRG8_1X8, MEDIA_BUS_FMT_SGBRG8_1X8,
 	  MEDIA_BUS_FMT_SGBRG8_1X8, MEDIA_BUS_FMT_SGBRG8_1X8,
-	  V4L2_PIX_FMT_SGBRG8, 8, "GBRG Bayer 8 bpp", },
+	  V4L2_PIX_FMT_SGBRG8, 8, },
 	{ MEDIA_BUS_FMT_SGRBG8_1X8, MEDIA_BUS_FMT_SGRBG8_1X8,
 	  MEDIA_BUS_FMT_SGRBG8_1X8, MEDIA_BUS_FMT_SGRBG8_1X8,
-	  V4L2_PIX_FMT_SGRBG8, 8, "GRBG Bayer 8 bpp", },
+	  V4L2_PIX_FMT_SGRBG8, 8, },
 	{ MEDIA_BUS_FMT_SRGGB8_1X8, MEDIA_BUS_FMT_SRGGB8_1X8,
 	  MEDIA_BUS_FMT_SRGGB8_1X8, MEDIA_BUS_FMT_SRGGB8_1X8,
-	  V4L2_PIX_FMT_SRGGB8, 8, "RGGB Bayer 8 bpp", },
+	  V4L2_PIX_FMT_SRGGB8, 8, },
 	{ MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8, MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8,
 	  MEDIA_BUS_FMT_SGRBG10_1X10, 0,
-	  V4L2_PIX_FMT_SGRBG10DPCM8, 8, "GRBG Bayer 10 bpp DPCM8",  },
+	  V4L2_PIX_FMT_SGRBG10DPCM8, 8, },
 	{ MEDIA_BUS_FMT_SBGGR10_1X10, MEDIA_BUS_FMT_SBGGR10_1X10,
 	  MEDIA_BUS_FMT_SBGGR10_1X10, MEDIA_BUS_FMT_SBGGR8_1X8,
-	  V4L2_PIX_FMT_SBGGR10, 10, "BGGR Bayer 10 bpp", },
+	  V4L2_PIX_FMT_SBGGR10, 10, },
 	{ MEDIA_BUS_FMT_SGBRG10_1X10, MEDIA_BUS_FMT_SGBRG10_1X10,
 	  MEDIA_BUS_FMT_SGBRG10_1X10, MEDIA_BUS_FMT_SGBRG8_1X8,
-	  V4L2_PIX_FMT_SGBRG10, 10, "GBRG Bayer 10 bpp", },
+	  V4L2_PIX_FMT_SGBRG10, 10, },
 	{ MEDIA_BUS_FMT_SGRBG10_1X10, MEDIA_BUS_FMT_SGRBG10_1X10,
 	  MEDIA_BUS_FMT_SGRBG10_1X10, MEDIA_BUS_FMT_SGRBG8_1X8,
-	  V4L2_PIX_FMT_SGRBG10, 10, "GRBG Bayer 10 bpp", },
+	  V4L2_PIX_FMT_SGRBG10, 10, },
 	{ MEDIA_BUS_FMT_SRGGB10_1X10, MEDIA_BUS_FMT_SRGGB10_1X10,
 	  MEDIA_BUS_FMT_SRGGB10_1X10, MEDIA_BUS_FMT_SRGGB8_1X8,
-	  V4L2_PIX_FMT_SRGGB10, 10, "RGGB Bayer 10 bpp", },
+	  V4L2_PIX_FMT_SRGGB10, 10, },
 	{ MEDIA_BUS_FMT_SBGGR12_1X12, MEDIA_BUS_FMT_SBGGR10_1X10,
 	  MEDIA_BUS_FMT_SBGGR12_1X12, MEDIA_BUS_FMT_SBGGR8_1X8,
-	  V4L2_PIX_FMT_SBGGR12, 12, "BGGR Bayer 12 bpp", },
+	  V4L2_PIX_FMT_SBGGR12, 12, },
 	{ MEDIA_BUS_FMT_SGBRG12_1X12, MEDIA_BUS_FMT_SGBRG10_1X10,
 	  MEDIA_BUS_FMT_SGBRG12_1X12, MEDIA_BUS_FMT_SGBRG8_1X8,
-	  V4L2_PIX_FMT_SGBRG12, 12, "GBRG Bayer 12 bpp", },
+	  V4L2_PIX_FMT_SGBRG12, 12, },
 	{ MEDIA_BUS_FMT_SGRBG12_1X12, MEDIA_BUS_FMT_SGRBG10_1X10,
 	  MEDIA_BUS_FMT_SGRBG12_1X12, MEDIA_BUS_FMT_SGRBG8_1X8,
-	  V4L2_PIX_FMT_SGRBG12, 12, "GRBG Bayer 12 bpp", },
+	  V4L2_PIX_FMT_SGRBG12, 12, },
 	{ MEDIA_BUS_FMT_SRGGB12_1X12, MEDIA_BUS_FMT_SRGGB10_1X10,
 	  MEDIA_BUS_FMT_SRGGB12_1X12, MEDIA_BUS_FMT_SRGGB8_1X8,
-	  V4L2_PIX_FMT_SRGGB12, 12, "RGGB Bayer 12 bpp", },
+	  V4L2_PIX_FMT_SRGGB12, 12, },
 	{ MEDIA_BUS_FMT_UYVY8_1X16, MEDIA_BUS_FMT_UYVY8_1X16,
 	  MEDIA_BUS_FMT_UYVY8_1X16, 0,
-	  V4L2_PIX_FMT_UYVY, 16, "YUV 4:2:2 (UYVY)", },
+	  V4L2_PIX_FMT_UYVY, 16, },
 	{ MEDIA_BUS_FMT_YUYV8_1X16, MEDIA_BUS_FMT_YUYV8_1X16,
 	  MEDIA_BUS_FMT_YUYV8_1X16, 0,
-	  V4L2_PIX_FMT_YUYV, 16, "YUV 4:2:2 (YUYV)", },
+	  V4L2_PIX_FMT_YUYV, 16, },
 	{ MEDIA_BUS_FMT_YUYV8_1_5X8, MEDIA_BUS_FMT_YUYV8_1_5X8,
 	  MEDIA_BUS_FMT_YUYV8_1_5X8, 0,
-	  V4L2_PIX_FMT_NV12, 8, "YUV 4:2:0 (NV12)", },
+	  V4L2_PIX_FMT_NV12, 8, },
 };
 
 const struct iss_format_info *
@@ -563,8 +563,6 @@ iss_video_enum_format(struct file *file, void *fh, struct v4l2_fmtdesc *f)
 
 		if (index == 0) {
 			f->pixelformat = info->pixelformat;
-			strscpy(f->description, info->description,
-				sizeof(f->description));
 			return 0;
 		}
 
diff --git a/drivers/staging/media/omap4iss/iss_video.h b/drivers/staging/media/omap4iss/iss_video.h
index f22489edb562..8b3dd92021e1 100644
--- a/drivers/staging/media/omap4iss/iss_video.h
+++ b/drivers/staging/media/omap4iss/iss_video.h
@@ -36,7 +36,6 @@ struct v4l2_pix_format;
  *	shifted to be 8 bits per pixel. =0 if format is not shiftable.
  * @pixelformat: V4L2 pixel format FCC identifier
  * @bpp: Bits per pixel
- * @description: Human-readable format description
  */
 struct iss_format_info {
 	u32 code;
@@ -45,7 +44,6 @@ struct iss_format_info {
 	u32 flavor;
 	u32 pixelformat;
 	unsigned int bpp;
-	const char *description;
 };
 
 enum iss_pipeline_stream_state {
diff --git a/drivers/staging/media/soc_camera/soc_camera.c b/drivers/staging/media/soc_camera/soc_camera.c
index a6232dcd59bc..7b9448e3c9ba 100644
--- a/drivers/staging/media/soc_camera/soc_camera.c
+++ b/drivers/staging/media/soc_camera/soc_camera.c
@@ -869,8 +869,6 @@ static int soc_camera_enum_fmt_vid_cap(struct file *file, void  *priv,
 
 	format = icd->user_formats[f->index].host_fmt;
 
-	if (format->name)
-		strscpy(f->description, format->name, sizeof(f->description));
 	f->pixelformat = format->fourcc;
 	return 0;
 }
diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
index ea54cc27e645..d4d1e44b16b2 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
+++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
@@ -75,34 +75,27 @@ static const struct v4l2_fract
 /* video formats */
 static struct mmal_fmt formats[] = {
 	{
-		.name = "4:2:0, planar, YUV",
 		.fourcc = V4L2_PIX_FMT_YUV420,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_I420,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
 		.remove_padding = 1,
 	}, {
-		.name = "4:2:2, packed, YUYV",
 		.fourcc = V4L2_PIX_FMT_YUYV,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_YUYV,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
 		.remove_padding = 0,
 	}, {
-		.name = "RGB24 (LE)",
 		.fourcc = V4L2_PIX_FMT_RGB24,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_RGB24,
 		.depth = 24,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 3,
 		.remove_padding = 0,
 	}, {
-		.name = "JPEG",
 		.fourcc = V4L2_PIX_FMT_JPEG,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
 		.mmal = MMAL_ENCODING_JPEG,
@@ -111,7 +104,6 @@ static struct mmal_fmt formats[] = {
 		.ybbp = 0,
 		.remove_padding = 0,
 	}, {
-		.name = "H264",
 		.fourcc = V4L2_PIX_FMT_H264,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
 		.mmal = MMAL_ENCODING_H264,
@@ -120,7 +112,6 @@ static struct mmal_fmt formats[] = {
 		.ybbp = 0,
 		.remove_padding = 0,
 	}, {
-		.name = "MJPEG",
 		.fourcc = V4L2_PIX_FMT_MJPEG,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
 		.mmal = MMAL_ENCODING_MJPEG,
@@ -129,72 +120,56 @@ static struct mmal_fmt formats[] = {
 		.ybbp = 0,
 		.remove_padding = 0,
 	}, {
-		.name = "4:2:2, packed, YVYU",
 		.fourcc = V4L2_PIX_FMT_YVYU,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_YVYU,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
 		.remove_padding = 0,
 	}, {
-		.name = "4:2:2, packed, VYUY",
 		.fourcc = V4L2_PIX_FMT_VYUY,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_VYUY,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
 		.remove_padding = 0,
 	}, {
-		.name = "4:2:2, packed, UYVY",
 		.fourcc = V4L2_PIX_FMT_UYVY,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_UYVY,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
 		.remove_padding = 0,
 	}, {
-		.name = "4:2:0, planar, NV12",
 		.fourcc = V4L2_PIX_FMT_NV12,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_NV12,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
 		.remove_padding = 1,
 	}, {
-		.name = "RGB24 (BE)",
 		.fourcc = V4L2_PIX_FMT_BGR24,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_BGR24,
 		.depth = 24,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 3,
 		.remove_padding = 0,
 	}, {
-		.name = "4:2:0, planar, YVU",
 		.fourcc = V4L2_PIX_FMT_YVU420,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_YV12,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
 		.remove_padding = 1,
 	}, {
-		.name = "4:2:0, planar, NV21",
 		.fourcc = V4L2_PIX_FMT_NV21,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_NV21,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
 		.remove_padding = 1,
 	}, {
-		.name = "RGB32 (BE)",
 		.fourcc = V4L2_PIX_FMT_BGR32,
-		.flags = 0,
 		.mmal = MMAL_ENCODING_BGRA,
 		.depth = 32,
 		.mmal_component = COMP_CAMERA,
@@ -716,9 +691,7 @@ static int vidioc_enum_fmt_vid_overlay(struct file *file, void *priv,
 
 	fmt = &formats[f->index];
 
-	strlcpy((char *)f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
-	f->flags = fmt->flags;
 
 	return 0;
 }
@@ -919,9 +892,7 @@ static int vidioc_enum_fmt_vid_cap(struct file *file, void *priv,
 
 	fmt = &formats[f->index];
 
-	strlcpy((char *)f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
-	f->flags = fmt->flags;
 
 	return 0;
 }
diff --git a/drivers/staging/vc04_services/bcm2835-camera/mmal-common.h b/drivers/staging/vc04_services/bcm2835-camera/mmal-common.h
index 6f56c517d850..ff5398737b4a 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/mmal-common.h
+++ b/drivers/staging/vc04_services/bcm2835-camera/mmal-common.h
@@ -26,7 +26,6 @@ struct mmal_msg_context;
 
 /* mapping between v4l and mmal video modes */
 struct mmal_fmt {
-	char  *name;
 	u32   fourcc;          /* v4l2 format id */
 	int   flags;           /* v4l2 flags field */
 	u32   mmal;
diff --git a/include/media/drv-intf/soc_mediabus.h b/include/media/drv-intf/soc_mediabus.h
index 73de3bd0c605..361f8852c9fc 100644
--- a/include/media/drv-intf/soc_mediabus.h
+++ b/include/media/drv-intf/soc_mediabus.h
@@ -66,7 +66,6 @@ enum soc_mbus_layout {
 
 /**
  * struct soc_mbus_pixelfmt - Data format on the media bus
- * @name:		Name of the format
  * @fourcc:		Fourcc code, that will be obtained if the data is
  *			stored in memory in the following way:
  * @packing:		Type of sample-packing, that has to be used
@@ -74,7 +73,6 @@ enum soc_mbus_layout {
  * @bits_per_sample:	How many bits the bridge has to sample
  */
 struct soc_mbus_pixelfmt {
-	const char		*name;
 	u32			fourcc;
 	enum soc_mbus_packing	packing;
 	enum soc_mbus_order	order;
-- 
cgit v1.2.3


From 59fe916c84f891aab35019adc45377a10f5690b1 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 11 Jun 2019 10:25:15 -0400
Subject: media: media/platform: don't set description in ENUM_FMT

The V4L2 core sets the format description and flags for the driver in order
to ensure consistent naming.

So drop the strscpy of the description in drivers. Also remove any
description strings in driver-internal structures since those are
no longer needed.

And in am437x-vpfe.c drop an unnecessary f->type assignment in
vpfe_enum_fmt().

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Benoit Parrot <bparrot@ti.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
[hverkuil-cisco@xs4all.nl: addressed some small suggestions from Laurent]
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/am437x/am437x-vpfe.c        | 18 ++-------------
 drivers/media/platform/davinci/vpbe_display.c      | 14 ++---------
 drivers/media/platform/davinci/vpif_capture.c      | 11 ++-------
 drivers/media/platform/davinci/vpif_display.c      |  4 ----
 drivers/media/platform/exynos-gsc/gsc-core.c       | 22 ------------------
 drivers/media/platform/exynos-gsc/gsc-core.h       |  2 --
 drivers/media/platform/exynos4-is/fimc-capture.c   |  3 ---
 drivers/media/platform/exynos4-is/fimc-core.c      | 20 ----------------
 drivers/media/platform/exynos4-is/fimc-isp-video.c |  1 -
 drivers/media/platform/exynos4-is/fimc-isp.c       |  3 ---
 drivers/media/platform/exynos4-is/fimc-lite.c      |  8 -------
 drivers/media/platform/exynos4-is/fimc-m2m.c       |  1 -
 drivers/media/platform/m2m-deinterlace.c           |  4 ----
 drivers/media/platform/marvell-ccic/mcam-core.c    | 10 --------
 drivers/media/platform/mx2_emmaprp.c               |  4 ----
 drivers/media/platform/omap/omap_vout.c            |  7 ------
 drivers/media/platform/s3c-camif/camif-capture.c   | 11 ++++-----
 drivers/media/platform/s3c-camif/camif-core.c      |  6 -----
 drivers/media/platform/s3c-camif/camif-core.h      |  1 -
 drivers/media/platform/s5p-g2d/g2d.c               | 10 +-------
 drivers/media/platform/s5p-g2d/g2d.h               |  1 -
 drivers/media/platform/s5p-jpeg/jpeg-core.c        | 27 ----------------------
 drivers/media/platform/s5p-jpeg/jpeg-core.h        |  2 --
 drivers/media/platform/s5p-mfc/s5p_mfc_common.h    |  1 -
 drivers/media/platform/s5p-mfc/s5p_mfc_dec.c       | 19 +--------------
 drivers/media/platform/s5p-mfc/s5p_mfc_enc.c       | 14 +----------
 drivers/media/platform/sh_veu.c                    | 19 +++++++--------
 drivers/media/platform/sh_vou.c                    | 12 ++--------
 drivers/media/platform/ti-vpe/vpe.c                | 12 ----------
 drivers/media/platform/via-camera.c                |  4 ----
 drivers/media/platform/xilinx/xilinx-dma.c         |  2 --
 drivers/media/platform/xilinx/xilinx-vip.c         | 16 ++++++-------
 drivers/media/platform/xilinx/xilinx-vip.h         |  2 --
 include/media/drv-intf/exynos-fimc.h               |  2 --
 34 files changed, 31 insertions(+), 262 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
index fe7b937eb5f2..7582c26f8459 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.c
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -76,7 +76,6 @@ struct bus_format {
 
 /*
  * struct vpfe_fmt - VPFE media bus format information
- * @name: V4L2 format description
  * @code: V4L2 media bus format code
  * @shifted: V4L2 media bus format code for the same pixel layout but
  *	shifted to be 8 bits per pixel. =0 if format is not shiftable.
@@ -86,7 +85,6 @@ struct bus_format {
  * @supported: Indicates format supported by subdev
  */
 struct vpfe_fmt {
-	const char *name;
 	u32 fourcc;
 	u32 code;
 	struct bus_format l;
@@ -97,7 +95,6 @@ struct vpfe_fmt {
 
 static struct vpfe_fmt formats[] = {
 	{
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.code		= MEDIA_BUS_FMT_YUYV8_2X8,
 		.l.width	= 10,
@@ -106,7 +103,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 2,
 		.supported	= false,
 	}, {
-		.name		= "YUV 4:2:2 packed, CbYCrY",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.code		= MEDIA_BUS_FMT_UYVY8_2X8,
 		.l.width	= 10,
@@ -115,7 +111,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 2,
 		.supported	= false,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_YVYU,
 		.code		= MEDIA_BUS_FMT_YVYU8_2X8,
 		.l.width	= 10,
@@ -124,7 +119,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 2,
 		.supported	= false,
 	}, {
-		.name		= "YUV 4:2:2 packed, CrYCbY",
 		.fourcc		= V4L2_PIX_FMT_VYUY,
 		.code		= MEDIA_BUS_FMT_VYUY8_2X8,
 		.l.width	= 10,
@@ -133,7 +127,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 2,
 		.supported	= false,
 	}, {
-		.name		= "RAW8 BGGR",
 		.fourcc		= V4L2_PIX_FMT_SBGGR8,
 		.code		= MEDIA_BUS_FMT_SBGGR8_1X8,
 		.l.width	= 10,
@@ -142,7 +135,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 1,
 		.supported	= false,
 	}, {
-		.name		= "RAW8 GBRG",
 		.fourcc		= V4L2_PIX_FMT_SGBRG8,
 		.code		= MEDIA_BUS_FMT_SGBRG8_1X8,
 		.l.width	= 10,
@@ -151,7 +143,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 1,
 		.supported	= false,
 	}, {
-		.name		= "RAW8 GRBG",
 		.fourcc		= V4L2_PIX_FMT_SGRBG8,
 		.code		= MEDIA_BUS_FMT_SGRBG8_1X8,
 		.l.width	= 10,
@@ -160,7 +151,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 1,
 		.supported	= false,
 	}, {
-		.name		= "RAW8 RGGB",
 		.fourcc		= V4L2_PIX_FMT_SRGGB8,
 		.code		= MEDIA_BUS_FMT_SRGGB8_1X8,
 		.l.width	= 10,
@@ -169,7 +159,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 1,
 		.supported	= false,
 	}, {
-		.name		= "RGB565 (LE)",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.code		= MEDIA_BUS_FMT_RGB565_2X8_LE,
 		.l.width	= 10,
@@ -178,7 +167,6 @@ static struct vpfe_fmt formats[] = {
 		.s.bpp		= 2,
 		.supported	= false,
 	}, {
-		.name		= "RGB565 (BE)",
 		.fourcc		= V4L2_PIX_FMT_RGB565X,
 		.code		= MEDIA_BUS_FMT_RGB565_2X8_BE,
 		.l.width	= 10,
@@ -1540,12 +1528,10 @@ static int vpfe_enum_fmt(struct file *file, void  *priv,
 	if (!fmt)
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
-	f->type = vpfe->fmt.type;
 
-	vpfe_dbg(1, vpfe, "vpfe_enum_format: mbus index: %d code: %x pixelformat: %s [%s]\n",
-		f->index, fmt->code, print_fourcc(fmt->fourcc), fmt->name);
+	vpfe_dbg(1, vpfe, "vpfe_enum_format: mbus index: %d code: %x pixelformat: %s\n",
+		 f->index, fmt->code, print_fourcc(fmt->fourcc));
 
 	return 0;
 }
diff --git a/drivers/media/platform/davinci/vpbe_display.c b/drivers/media/platform/davinci/vpbe_display.c
index 000b191c42d8..ed10a9bbf96b 100644
--- a/drivers/media/platform/davinci/vpbe_display.c
+++ b/drivers/media/platform/davinci/vpbe_display.c
@@ -792,7 +792,6 @@ static int vpbe_display_enum_fmt(struct file *file, void  *priv,
 {
 	struct vpbe_layer *layer = video_drvdata(file);
 	struct vpbe_device *vpbe_dev = layer->disp_dev->vpbe_dev;
-	unsigned int index = 0;
 
 	v4l2_dbg(1, debug, &vpbe_dev->v4l2_dev,
 				"VIDIOC_ENUM_FMT, layer id = %d\n",
@@ -803,19 +802,10 @@ static int vpbe_display_enum_fmt(struct file *file, void  *priv,
 	}
 
 	/* Fill in the information about format */
-	index = fmt->index;
-	memset(fmt, 0, sizeof(*fmt));
-	fmt->index = index;
-	fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-	if (index == 0) {
-		strscpy(fmt->description, "YUV 4:2:2 - UYVY",
-			sizeof(fmt->description));
+	if (fmt->index == 0)
 		fmt->pixelformat = V4L2_PIX_FMT_UYVY;
-	} else {
-		strscpy(fmt->description, "Y/CbCr 4:2:0",
-			sizeof(fmt->description));
+	else
 		fmt->pixelformat = V4L2_PIX_FMT_NV12;
-	}
 
 	return 0;
 }
diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index f0f7ef638c56..621d28470d2b 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -938,17 +938,10 @@ static int vpif_enum_fmt_vid_cap(struct file *file, void  *priv,
 	}
 
 	/* Fill in the information about format */
-	if (ch->vpifparams.iface.if_type == VPIF_IF_RAW_BAYER) {
-		fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-		strscpy(fmt->description, "Raw Mode -Bayer Pattern GrRBGb",
-			sizeof(fmt->description));
+	if (ch->vpifparams.iface.if_type == VPIF_IF_RAW_BAYER)
 		fmt->pixelformat = V4L2_PIX_FMT_SBGGR8;
-	} else {
-		fmt->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
-		strscpy(fmt->description, "YCbCr4:2:2 Semi-Planar",
-			sizeof(fmt->description));
+	else
 		fmt->pixelformat = V4L2_PIX_FMT_NV16;
-	}
 	return 0;
 }
 
diff --git a/drivers/media/platform/davinci/vpif_display.c b/drivers/media/platform/davinci/vpif_display.c
index a69897c68a50..be32f87001cd 100644
--- a/drivers/media/platform/davinci/vpif_display.c
+++ b/drivers/media/platform/davinci/vpif_display.c
@@ -601,11 +601,7 @@ static int vpif_enum_fmt_vid_out(struct file *file, void  *priv,
 		return -EINVAL;
 
 	/* Fill in the information about format */
-	fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-	strscpy(fmt->description, "YCbCr4:2:2 YC Planar",
-		sizeof(fmt->description));
 	fmt->pixelformat = V4L2_PIX_FMT_YUV422P;
-	fmt->flags = 0;
 	return 0;
 }
 
diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c
index 854869f0024e..f6650b45bc3d 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.c
+++ b/drivers/media/platform/exynos-gsc/gsc-core.c
@@ -27,21 +27,18 @@
 
 static const struct gsc_fmt gsc_formats[] = {
 	{
-		.name		= "RGB565",
 		.pixelformat	= V4L2_PIX_FMT_RGB565X,
 		.depth		= { 16 },
 		.color		= GSC_RGB,
 		.num_planes	= 1,
 		.num_comp	= 1,
 	}, {
-		.name		= "BGRX-8-8-8-8, 32 bpp",
 		.pixelformat	= V4L2_PIX_FMT_BGR32,
 		.depth		= { 32 },
 		.color		= GSC_RGB,
 		.num_planes	= 1,
 		.num_comp	= 1,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.pixelformat	= V4L2_PIX_FMT_YUYV,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -51,7 +48,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_comp	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 	}, {
-		.name		= "YUV 4:2:2 packed, CbYCrY",
 		.pixelformat	= V4L2_PIX_FMT_UYVY,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -61,7 +57,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_comp	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_UYVY8_2X8,
 	}, {
-		.name		= "YUV 4:2:2 packed, CrYCbY",
 		.pixelformat	= V4L2_PIX_FMT_VYUY,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -71,7 +66,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_comp	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_VYUY8_2X8,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.pixelformat	= V4L2_PIX_FMT_YVYU,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -81,7 +75,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_comp	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_YVYU8_2X8,
 	}, {
-		.name		= "YUV 4:4:4 planar, YCbYCr",
 		.pixelformat	= V4L2_PIX_FMT_YUV32,
 		.depth		= { 32 },
 		.color		= GSC_YUV444,
@@ -90,7 +83,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 1,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/Cb/Cr",
 		.pixelformat	= V4L2_PIX_FMT_YUV422P,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -99,7 +91,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 3,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/CbCr",
 		.pixelformat	= V4L2_PIX_FMT_NV16,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -108,7 +99,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:2 non-contig, Y/CbCr",
 		.pixelformat	= V4L2_PIX_FMT_NV16M,
 		.depth		= { 8, 8 },
 		.color		= GSC_YUV422,
@@ -117,7 +107,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 2,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/CrCb",
 		.pixelformat	= V4L2_PIX_FMT_NV61,
 		.depth		= { 16 },
 		.color		= GSC_YUV422,
@@ -126,7 +115,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:2 non-contig, Y/CrCb",
 		.pixelformat	= V4L2_PIX_FMT_NV61M,
 		.depth		= { 8, 8 },
 		.color		= GSC_YUV422,
@@ -135,7 +123,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 2,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:0 planar, YCbCr",
 		.pixelformat	= V4L2_PIX_FMT_YUV420,
 		.depth		= { 12 },
 		.color		= GSC_YUV420,
@@ -144,7 +131,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 3,
 	}, {
-		.name		= "YUV 4:2:0 planar, YCrCb",
 		.pixelformat	= V4L2_PIX_FMT_YVU420,
 		.depth		= { 12 },
 		.color		= GSC_YUV420,
@@ -154,7 +140,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_comp	= 3,
 
 	}, {
-		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.pixelformat	= V4L2_PIX_FMT_NV12,
 		.depth		= { 12 },
 		.color		= GSC_YUV420,
@@ -163,7 +148,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:0 planar, Y/CrCb",
 		.pixelformat	= V4L2_PIX_FMT_NV21,
 		.depth		= { 12 },
 		.color		= GSC_YUV420,
@@ -172,7 +156,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 1,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 2p, Y/CrCb",
 		.pixelformat	= V4L2_PIX_FMT_NV21M,
 		.depth		= { 8, 4 },
 		.color		= GSC_YUV420,
@@ -181,7 +164,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 2,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 2p, Y/CbCr",
 		.pixelformat	= V4L2_PIX_FMT_NV12M,
 		.depth		= { 8, 4 },
 		.color		= GSC_YUV420,
@@ -190,7 +172,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 2,
 		.num_comp	= 2,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 3p, Y/Cb/Cr",
 		.pixelformat	= V4L2_PIX_FMT_YUV420M,
 		.depth		= { 8, 2, 2 },
 		.color		= GSC_YUV420,
@@ -199,7 +180,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 3,
 		.num_comp	= 3,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 3p, Y/Cr/Cb",
 		.pixelformat	= V4L2_PIX_FMT_YVU420M,
 		.depth		= { 8, 2, 2 },
 		.color		= GSC_YUV420,
@@ -208,7 +188,6 @@ static const struct gsc_fmt gsc_formats[] = {
 		.num_planes	= 3,
 		.num_comp	= 3,
 	}, {
-		.name		= "YUV 4:2:0 n.c. 2p, Y/CbCr tiled",
 		.pixelformat	= V4L2_PIX_FMT_NV12MT_16X16,
 		.depth		= { 8, 4 },
 		.color		= GSC_YUV420,
@@ -335,7 +314,6 @@ int gsc_enum_fmt(struct v4l2_fmtdesc *f)
 	if (!fmt)
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->pixelformat;
 
 	return 0;
diff --git a/drivers/media/platform/exynos-gsc/gsc-core.h b/drivers/media/platform/exynos-gsc/gsc-core.h
index 772183b090c2..8e5a9acb78aa 100644
--- a/drivers/media/platform/exynos-gsc/gsc-core.h
+++ b/drivers/media/platform/exynos-gsc/gsc-core.h
@@ -103,7 +103,6 @@ enum gsc_yuv_fmt {
 /**
  * struct gsc_fmt - the driver's internal color format data
  * @mbus_code: Media Bus pixel code, -1 if not applicable
- * @name: format description
  * @pixelformat: the fourcc code for this format, 0 if not applicable
  * @yorder: Y/C order
  * @corder: Chrominance order control
@@ -114,7 +113,6 @@ enum gsc_yuv_fmt {
  */
 struct gsc_fmt {
 	u32 mbus_code;
-	char	*name;
 	u32	pixelformat;
 	u32	color;
 	u32	yorder;
diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c
index 66510365dd5d..121d609ff856 100644
--- a/drivers/media/platform/exynos4-is/fimc-capture.c
+++ b/drivers/media/platform/exynos4-is/fimc-capture.c
@@ -738,10 +738,7 @@ static int fimc_cap_enum_fmt(struct file *file, void *priv,
 			       f->index);
 	if (!fmt)
 		return -EINVAL;
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
-	if (fmt->fourcc == MEDIA_BUS_FMT_JPEG_1X8)
-		f->flags |= V4L2_FMT_FLAG_COMPRESSED;
 	return 0;
 }
 
diff --git a/drivers/media/platform/exynos4-is/fimc-core.c b/drivers/media/platform/exynos4-is/fimc-core.c
index 7006f54bfee2..cde60fbb23a8 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.c
+++ b/drivers/media/platform/exynos4-is/fimc-core.c
@@ -36,7 +36,6 @@ static char *fimc_clocks[MAX_FIMC_CLOCKS] = {
 
 static struct fimc_fmt fimc_formats[] = {
 	{
-		.name		= "RGB565",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_RGB565,
@@ -44,7 +43,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 1,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "BGR666",
 		.fourcc		= V4L2_PIX_FMT_BGR666,
 		.depth		= { 32 },
 		.color		= FIMC_FMT_RGB666,
@@ -52,7 +50,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 1,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "BGRA8888, 32 bpp",
 		.fourcc		= V4L2_PIX_FMT_BGR32,
 		.depth		= { 32 },
 		.color		= FIMC_FMT_RGB888,
@@ -60,7 +57,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 1,
 		.flags		= FMT_FLAGS_M2M | FMT_HAS_ALPHA,
 	}, {
-		.name		= "ARGB1555",
 		.fourcc		= V4L2_PIX_FMT_RGB555,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_RGB555,
@@ -68,7 +64,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 1,
 		.flags		= FMT_FLAGS_M2M_OUT | FMT_HAS_ALPHA,
 	}, {
-		.name		= "ARGB4444",
 		.fourcc		= V4L2_PIX_FMT_RGB444,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_RGB444,
@@ -76,11 +71,9 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 1,
 		.flags		= FMT_FLAGS_M2M_OUT | FMT_HAS_ALPHA,
 	}, {
-		.name		= "YUV 4:4:4",
 		.mbus_code	= MEDIA_BUS_FMT_YUV10_1X30,
 		.flags		= FMT_FLAGS_WRITEBACK,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_YCBYCR422,
@@ -89,7 +82,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.flags		= FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
-		.name		= "YUV 4:2:2 packed, CbYCrY",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_CBYCRY422,
@@ -98,7 +90,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_UYVY8_2X8,
 		.flags		= FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
-		.name		= "YUV 4:2:2 packed, CrYCbY",
 		.fourcc		= V4L2_PIX_FMT_VYUY,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_CRYCBY422,
@@ -107,7 +98,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_VYUY8_2X8,
 		.flags		= FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_YVYU,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_YCRYCB422,
@@ -116,7 +106,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_YVYU8_2X8,
 		.flags		= FMT_FLAGS_M2M | FMT_FLAGS_CAM,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV422P,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_YCBYCR422,
@@ -124,7 +113,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 3,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV16,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_YCBYCR422,
@@ -132,7 +120,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 2,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:2 planar, Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV61,
 		.depth		= { 16 },
 		.color		= FIMC_FMT_YCRYCB422,
@@ -140,7 +127,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 2,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:0 planar, YCbCr",
 		.fourcc		= V4L2_PIX_FMT_YUV420,
 		.depth		= { 12 },
 		.color		= FIMC_FMT_YCBCR420,
@@ -148,7 +134,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 3,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
 		.depth		= { 12 },
 		.color		= FIMC_FMT_YCBCR420,
@@ -156,7 +141,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 2,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 2p, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12M,
 		.color		= FIMC_FMT_YCBCR420,
 		.depth		= { 8, 4 },
@@ -164,7 +148,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 2,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 3p, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV420M,
 		.color		= FIMC_FMT_YCBCR420,
 		.depth		= { 8, 2, 2 },
@@ -172,7 +155,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 3,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "YUV 4:2:0 non-contig. 2p, tiled",
 		.fourcc		= V4L2_PIX_FMT_NV12MT,
 		.color		= FIMC_FMT_YCBCR420,
 		.depth		= { 8, 4 },
@@ -180,7 +162,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.colplanes	= 2,
 		.flags		= FMT_FLAGS_M2M,
 	}, {
-		.name		= "JPEG encoded data",
 		.fourcc		= V4L2_PIX_FMT_JPEG,
 		.color		= FIMC_FMT_JPEG,
 		.depth		= { 8 },
@@ -189,7 +170,6 @@ static struct fimc_fmt fimc_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_JPEG_1X8,
 		.flags		= FMT_FLAGS_CAM | FMT_FLAGS_COMPRESSED,
 	}, {
-		.name		= "S5C73MX interleaved UYVY/JPEG",
 		.fourcc		= V4L2_PIX_FMT_S5C_UYVY_JPG,
 		.color		= FIMC_FMT_YUYV_JPEG,
 		.depth		= { 8 },
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index a75f932a289a..378cc302e1f8 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -362,7 +362,6 @@ static int isp_video_enum_fmt(struct file *file, void *priv,
 	if (WARN_ON(fmt == NULL))
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
 
 	return 0;
diff --git a/drivers/media/platform/exynos4-is/fimc-isp.c b/drivers/media/platform/exynos4-is/fimc-isp.c
index 907b83e6649d..cde0d254ec1c 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp.c
@@ -33,21 +33,18 @@ module_param_named(debug_isp, fimc_isp_debug, int, S_IRUGO | S_IWUSR);
 
 static const struct fimc_fmt fimc_isp_formats[FIMC_ISP_NUM_FORMATS] = {
 	{
-		.name		= "RAW8 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG8,
 		.depth		= { 8 },
 		.color		= FIMC_FMT_RAW8,
 		.memplanes	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
 	}, {
-		.name		= "RAW10 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG10,
 		.depth		= { 10 },
 		.color		= FIMC_FMT_RAW10,
 		.memplanes	= 1,
 		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
 	}, {
-		.name		= "RAW12 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG12,
 		.depth		= { 12 },
 		.color		= FIMC_FMT_RAW12,
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index c1f0aee02e5e..e87c6a09205b 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -39,7 +39,6 @@ module_param(debug, int, 0644);
 
 static const struct fimc_fmt fimc_lite_formats[] = {
 	{
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.depth		= { 16 },
@@ -48,7 +47,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.flags		= FMT_FLAGS_YUV,
 	}, {
-		.name		= "YUV 4:2:2 packed, CbYCrY",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.depth		= { 16 },
@@ -57,7 +55,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_UYVY8_2X8,
 		.flags		= FMT_FLAGS_YUV,
 	}, {
-		.name		= "YUV 4:2:2 packed, CrYCbY",
 		.fourcc		= V4L2_PIX_FMT_VYUY,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.depth		= { 16 },
@@ -66,7 +63,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_VYUY8_2X8,
 		.flags		= FMT_FLAGS_YUV,
 	}, {
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_YVYU,
 		.colorspace	= V4L2_COLORSPACE_JPEG,
 		.depth		= { 16 },
@@ -75,7 +71,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_YVYU8_2X8,
 		.flags		= FMT_FLAGS_YUV,
 	}, {
-		.name		= "RAW8 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG8,
 		.colorspace	= V4L2_COLORSPACE_SRGB,
 		.depth		= { 8 },
@@ -84,7 +79,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_SGRBG8_1X8,
 		.flags		= FMT_FLAGS_RAW_BAYER,
 	}, {
-		.name		= "RAW10 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG10,
 		.colorspace	= V4L2_COLORSPACE_SRGB,
 		.depth		= { 16 },
@@ -93,7 +87,6 @@ static const struct fimc_fmt fimc_lite_formats[] = {
 		.mbus_code	= MEDIA_BUS_FMT_SGRBG10_1X10,
 		.flags		= FMT_FLAGS_RAW_BAYER,
 	}, {
-		.name		= "RAW12 (GRBG)",
 		.fourcc		= V4L2_PIX_FMT_SGRBG12,
 		.colorspace	= V4L2_COLORSPACE_SRGB,
 		.depth		= { 16 },
@@ -667,7 +660,6 @@ static int fimc_lite_enum_fmt(struct file *file, void *priv,
 		return -EINVAL;
 
 	fmt = &fimc_lite_formats[f->index];
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
 
 	return 0;
diff --git a/drivers/media/platform/exynos4-is/fimc-m2m.c b/drivers/media/platform/exynos4-is/fimc-m2m.c
index 62e876fc3555..c70c2cbe3eb1 100644
--- a/drivers/media/platform/exynos4-is/fimc-m2m.c
+++ b/drivers/media/platform/exynos4-is/fimc-m2m.c
@@ -247,7 +247,6 @@ static int fimc_m2m_enum_fmt(struct file *file, void *priv,
 	if (!fmt)
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
 	return 0;
 }
diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c
index beb7fd7442fb..e9310eeec6cf 100644
--- a/drivers/media/platform/m2m-deinterlace.c
+++ b/drivers/media/platform/m2m-deinterlace.c
@@ -37,7 +37,6 @@ module_param(debug, bool, 0644);
 	v4l2_dbg(1, debug, &dev->v4l2_dev, "%s: " fmt, __func__, ## arg)
 
 struct deinterlace_fmt {
-	char	*name;
 	u32	fourcc;
 	/* Types the format can be used for */
 	u32	types;
@@ -45,12 +44,10 @@ struct deinterlace_fmt {
 
 static struct deinterlace_fmt formats[] = {
 	{
-		.name	= "YUV 4:2:0 Planar",
 		.fourcc	= V4L2_PIX_FMT_YUV420,
 		.types	= MEM2MEM_CAPTURE | MEM2MEM_OUTPUT,
 	},
 	{
-		.name	= "YUYV 4:2:2",
 		.fourcc	= V4L2_PIX_FMT_YUYV,
 		.types	= MEM2MEM_CAPTURE | MEM2MEM_OUTPUT,
 	},
@@ -470,7 +467,6 @@ static int enum_fmt(struct v4l2_fmtdesc *f, u32 type)
 	if (i < NUM_FORMATS) {
 		/* Format found */
 		fmt = &formats[i];
-		strscpy(f->description, fmt->name, sizeof(f->description));
 		f->pixelformat = fmt->fourcc;
 		return 0;
 	}
diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c
index dc30c48d4671..30ac454e702e 100644
--- a/drivers/media/platform/marvell-ccic/mcam-core.c
+++ b/drivers/media/platform/marvell-ccic/mcam-core.c
@@ -98,56 +98,48 @@ MODULE_PARM_DESC(buffer_mode,
 	container_of(notifier, struct mcam_camera, notifier)
 
 static struct mcam_format_struct {
-	__u8 *desc;
 	__u32 pixelformat;
 	int bpp;   /* Bytes per pixel */
 	bool planar;
 	u32 mbus_code;
 } mcam_formats[] = {
 	{
-		.desc		= "YUYV 4:2:2",
 		.pixelformat	= V4L2_PIX_FMT_YUYV,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 2,
 		.planar		= false,
 	},
 	{
-		.desc		= "YVYU 4:2:2",
 		.pixelformat	= V4L2_PIX_FMT_YVYU,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 2,
 		.planar		= false,
 	},
 	{
-		.desc		= "YUV 4:2:0 PLANAR",
 		.pixelformat	= V4L2_PIX_FMT_YUV420,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 1,
 		.planar		= true,
 	},
 	{
-		.desc		= "YVU 4:2:0 PLANAR",
 		.pixelformat	= V4L2_PIX_FMT_YVU420,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 1,
 		.planar		= true,
 	},
 	{
-		.desc		= "XRGB 444",
 		.pixelformat	= V4L2_PIX_FMT_XRGB444,
 		.mbus_code	= MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE,
 		.bpp		= 2,
 		.planar		= false,
 	},
 	{
-		.desc		= "RGB 565",
 		.pixelformat	= V4L2_PIX_FMT_RGB565,
 		.mbus_code	= MEDIA_BUS_FMT_RGB565_2X8_LE,
 		.bpp		= 2,
 		.planar		= false,
 	},
 	{
-		.desc		= "Raw RGB Bayer",
 		.pixelformat	= V4L2_PIX_FMT_SBGGR8,
 		.mbus_code	= MEDIA_BUS_FMT_SBGGR8_1X8,
 		.bpp		= 1,
@@ -1369,8 +1361,6 @@ static int mcam_vidioc_enum_fmt_vid_cap(struct file *filp,
 {
 	if (fmt->index >= N_MCAM_FMTS)
 		return -EINVAL;
-	strscpy(fmt->description, mcam_formats[fmt->index].desc,
-		sizeof(fmt->description));
 	fmt->pixelformat = mcam_formats[fmt->index].pixelformat;
 	return 0;
 }
diff --git a/drivers/media/platform/mx2_emmaprp.c b/drivers/media/platform/mx2_emmaprp.c
index 333324c75027..4d4225ab1589 100644
--- a/drivers/media/platform/mx2_emmaprp.c
+++ b/drivers/media/platform/mx2_emmaprp.c
@@ -145,7 +145,6 @@ module_param(debug, bool, 0644);
 #define PRP_INTR_ST_CH2OVF	(1 << 8)
 
 struct emmaprp_fmt {
-	char	*name;
 	u32	fourcc;
 	/* Types the format can be used for */
 	u32	types;
@@ -153,12 +152,10 @@ struct emmaprp_fmt {
 
 static struct emmaprp_fmt formats[] = {
 	{
-		.name	= "YUV 4:2:0 Planar",
 		.fourcc	= V4L2_PIX_FMT_YUV420,
 		.types	= MEM2MEM_CAPTURE,
 	},
 	{
-		.name	= "4:2:2, packed, YUYV",
 		.fourcc	= V4L2_PIX_FMT_YUYV,
 		.types	= MEM2MEM_OUTPUT,
 	},
@@ -409,7 +406,6 @@ static int enum_fmt(struct v4l2_fmtdesc *f, u32 type)
 	if (i < NUM_FORMATS) {
 		/* Format found */
 		fmt = &formats[i];
-		strscpy(f->description, fmt->name, sizeof(f->description) - 1);
 		f->pixelformat = fmt->fourcc;
 		return 0;
 	}
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index cb6a9e3946b6..1f6742536c46 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -114,14 +114,12 @@ static const struct v4l2_fmtdesc omap_formats[] = {
 		 *      Byte 0                    Byte 1
 		 *      g2 g1 g0 b4 b3 b2 b1 b0   r4 r3 r2 r1 r0 g5 g4 g3
 		 */
-		.description = "RGB565, le",
 		.pixelformat = V4L2_PIX_FMT_RGB565,
 	},
 	{
 		/* Note:  V4L2 defines RGB32 as: RGB-8-8-8-8  we use
 		 *  this for RGB24 unpack mode, the last 8 bits are ignored
 		 * */
-		.description = "RGB32, le",
 		.pixelformat = V4L2_PIX_FMT_RGB32,
 	},
 	{
@@ -129,15 +127,12 @@ static const struct v4l2_fmtdesc omap_formats[] = {
 		 *        this for RGB24 packed mode
 		 *
 		 */
-		.description = "RGB24, le",
 		.pixelformat = V4L2_PIX_FMT_RGB24,
 	},
 	{
-		.description = "YUYV (YUV 4:2:2), packed",
 		.pixelformat = V4L2_PIX_FMT_YUYV,
 	},
 	{
-		.description = "UYVY, packed",
 		.pixelformat = V4L2_PIX_FMT_UYVY,
 	},
 };
@@ -1060,8 +1055,6 @@ static int vidioc_enum_fmt_vid_out(struct file *file, void *fh,
 		return -EINVAL;
 
 	fmt->flags = omap_formats[index].flags;
-	strscpy(fmt->description, omap_formats[index].description,
-		sizeof(fmt->description));
 	fmt->pixelformat = omap_formats[index].pixelformat;
 
 	return 0;
diff --git a/drivers/media/platform/s3c-camif/camif-capture.c b/drivers/media/platform/s3c-camif/camif-capture.c
index a876d0873ebc..2191fdded9da 100644
--- a/drivers/media/platform/s3c-camif/camif-capture.c
+++ b/drivers/media/platform/s3c-camif/camif-capture.c
@@ -685,10 +685,7 @@ static int s3c_camif_vidioc_enum_fmt(struct file *file, void *priv,
 	if (!fmt)
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
-
-	pr_debug("fmt(%d): %s\n", f->index, f->description);
 	return 0;
 }
 
@@ -802,10 +799,10 @@ static int s3c_camif_vidioc_s_fmt(struct file *file, void *priv,
 	if (vp->owner == NULL)
 		vp->owner = priv;
 
-	pr_debug("%ux%u. payload: %u. fmt: %s. %d %d. sizeimage: %d. bpl: %d\n",
-		out_frame->f_width, out_frame->f_height, vp->payload, fmt->name,
-		pix->width * pix->height * fmt->depth, fmt->depth,
-		pix->sizeimage, pix->bytesperline);
+	pr_debug("%ux%u. payload: %u. fmt: 0x%08x. %d %d. sizeimage: %d. bpl: %d\n",
+		 out_frame->f_width, out_frame->f_height, vp->payload,
+		 fmt->fourcc, pix->width * pix->height * fmt->depth,
+		 fmt->depth, pix->sizeimage, pix->bytesperline);
 
 	return 0;
 }
diff --git a/drivers/media/platform/s3c-camif/camif-core.c b/drivers/media/platform/s3c-camif/camif-core.c
index b05ce0149ca1..f0acd6edcbba 100644
--- a/drivers/media/platform/s3c-camif/camif-core.c
+++ b/drivers/media/platform/s3c-camif/camif-core.c
@@ -42,7 +42,6 @@ static char *camif_clocks[CLK_MAX_NUM] = {
 
 static const struct camif_fmt camif_formats[] = {
 	{
-		.name		= "YUV 4:2:2 planar, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV422P,
 		.depth		= 16,
 		.ybpp		= 1,
@@ -51,7 +50,6 @@ static const struct camif_fmt camif_formats[] = {
 		.flags		= FMT_FL_S3C24XX_CODEC |
 				  FMT_FL_S3C64XX,
 	}, {
-		.name		= "YUV 4:2:0 planar, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV420,
 		.depth		= 12,
 		.ybpp		= 1,
@@ -60,7 +58,6 @@ static const struct camif_fmt camif_formats[] = {
 		.flags		= FMT_FL_S3C24XX_CODEC |
 				  FMT_FL_S3C64XX,
 	}, {
-		.name		= "YVU 4:2:0 planar, Y/Cr/Cb",
 		.fourcc		= V4L2_PIX_FMT_YVU420,
 		.depth		= 12,
 		.ybpp		= 1,
@@ -69,7 +66,6 @@ static const struct camif_fmt camif_formats[] = {
 		.flags		= FMT_FL_S3C24XX_CODEC |
 				  FMT_FL_S3C64XX,
 	}, {
-		.name		= "RGB565, 16 bpp",
 		.fourcc		= V4L2_PIX_FMT_RGB565X,
 		.depth		= 16,
 		.ybpp		= 2,
@@ -78,7 +74,6 @@ static const struct camif_fmt camif_formats[] = {
 		.flags		= FMT_FL_S3C24XX_PREVIEW |
 				  FMT_FL_S3C64XX,
 	}, {
-		.name		= "XRGB8888, 32 bpp",
 		.fourcc		= V4L2_PIX_FMT_RGB32,
 		.depth		= 32,
 		.ybpp		= 4,
@@ -87,7 +82,6 @@ static const struct camif_fmt camif_formats[] = {
 		.flags		= FMT_FL_S3C24XX_PREVIEW |
 				  FMT_FL_S3C64XX,
 	}, {
-		.name		= "BGR666",
 		.fourcc		= V4L2_PIX_FMT_BGR666,
 		.depth		= 32,
 		.ybpp		= 4,
diff --git a/drivers/media/platform/s3c-camif/camif-core.h b/drivers/media/platform/s3c-camif/camif-core.h
index efdc00b4ec6f..f937e638490f 100644
--- a/drivers/media/platform/s3c-camif/camif-core.h
+++ b/drivers/media/platform/s3c-camif/camif-core.h
@@ -89,7 +89,6 @@ enum img_fmt {
  * @ybpp:      number of luminance bytes per pixel
  */
 struct camif_fmt {
-	char *name;
 	u32 fourcc;
 	u32 color;
 	u16 colplanes;
diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c
index 152d192d5c3f..b3fc0ff8a5ec 100644
--- a/drivers/media/platform/s5p-g2d/g2d.c
+++ b/drivers/media/platform/s5p-g2d/g2d.c
@@ -29,31 +29,26 @@
 
 static struct g2d_fmt formats[] = {
 	{
-		.name	= "XRGB_8888",
 		.fourcc	= V4L2_PIX_FMT_RGB32,
 		.depth	= 32,
 		.hw	= COLOR_MODE(ORDER_XRGB, MODE_XRGB_8888),
 	},
 	{
-		.name	= "RGB_565",
 		.fourcc	= V4L2_PIX_FMT_RGB565X,
 		.depth	= 16,
 		.hw	= COLOR_MODE(ORDER_XRGB, MODE_RGB_565),
 	},
 	{
-		.name	= "XRGB_1555",
 		.fourcc	= V4L2_PIX_FMT_RGB555X,
 		.depth	= 16,
 		.hw	= COLOR_MODE(ORDER_XRGB, MODE_XRGB_1555),
 	},
 	{
-		.name	= "XRGB_4444",
 		.fourcc	= V4L2_PIX_FMT_RGB444,
 		.depth	= 16,
 		.hw	= COLOR_MODE(ORDER_XRGB, MODE_XRGB_4444),
 	},
 	{
-		.name	= "PACKED_RGB_888",
 		.fourcc	= V4L2_PIX_FMT_RGB24,
 		.depth	= 24,
 		.hw	= COLOR_MODE(ORDER_XRGB, MODE_PACKED_RGB_888),
@@ -303,12 +298,9 @@ static int vidioc_querycap(struct file *file, void *priv,
 
 static int vidioc_enum_fmt(struct file *file, void *prv, struct v4l2_fmtdesc *f)
 {
-	struct g2d_fmt *fmt;
 	if (f->index >= NUM_FORMATS)
 		return -EINVAL;
-	fmt = &formats[f->index];
-	f->pixelformat = fmt->fourcc;
-	strscpy(f->description, fmt->name, sizeof(f->description));
+	f->pixelformat = formats[f->index].fourcc;
 	return 0;
 }
 
diff --git a/drivers/media/platform/s5p-g2d/g2d.h b/drivers/media/platform/s5p-g2d/g2d.h
index def0ec0dabeb..c2309c1370da 100644
--- a/drivers/media/platform/s5p-g2d/g2d.h
+++ b/drivers/media/platform/s5p-g2d/g2d.h
@@ -61,7 +61,6 @@ struct g2d_ctx {
 };
 
 struct g2d_fmt {
-	char	*name;
 	u32	fourcc;
 	int	depth;
 	u32	hw;
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c
index a3bc884b7df1..20cad061e908 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.c
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c
@@ -35,7 +35,6 @@
 
 static struct s5p_jpeg_fmt sjpeg_formats[] = {
 	{
-		.name		= "JPEG JFIF",
 		.fourcc		= V4L2_PIX_FMT_JPEG,
 		.flags		= SJPEG_FMT_FLAG_ENC_CAPTURE |
 				  SJPEG_FMT_FLAG_DEC_OUTPUT |
@@ -44,7 +43,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 				  SJPEG_FMT_FLAG_EXYNOS4,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -57,7 +55,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -70,7 +67,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCbYCr",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -83,7 +79,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_YVYU,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -96,7 +91,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_YVYU,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -109,7 +103,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -122,7 +115,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 packed, YCrYCb",
 		.fourcc		= V4L2_PIX_FMT_VYUY,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -135,7 +127,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "RGB565",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -148,7 +139,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "RGB565",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -161,7 +151,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "RGB565X",
 		.fourcc		= V4L2_PIX_FMT_RGB565X,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -174,7 +163,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "RGB565",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.depth		= 16,
 		.colplanes	= 1,
@@ -186,7 +174,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "ARGB8888, 32 bpp",
 		.fourcc		= V4L2_PIX_FMT_RGB32,
 		.depth		= 32,
 		.colplanes	= 1,
@@ -199,7 +186,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "ARGB8888, 32 bpp",
 		.fourcc		= V4L2_PIX_FMT_RGB32,
 		.depth		= 32,
 		.colplanes	= 1,
@@ -212,7 +198,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "YUV 4:4:4 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV24,
 		.depth		= 24,
 		.colplanes	= 2,
@@ -225,7 +210,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "YUV 4:4:4 planar, Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV42,
 		.depth		= 24,
 		.colplanes	= 2,
@@ -238,7 +222,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_444,
 	},
 	{
-		.name		= "YUV 4:2:2 planar, Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV61,
 		.depth		= 16,
 		.colplanes	= 2,
@@ -251,7 +234,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:2 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV16,
 		.depth		= 16,
 		.colplanes	= 2,
@@ -264,7 +246,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_422,
 	},
 	{
-		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
 		.depth		= 12,
 		.colplanes	= 2,
@@ -277,7 +258,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
 		.depth		= 12,
 		.colplanes	= 2,
@@ -290,7 +270,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 planar, Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12,
 		.depth		= 12,
 		.colplanes	= 2,
@@ -303,7 +282,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 planar, Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV21,
 		.depth		= 12,
 		.colplanes	= 2,
@@ -316,7 +294,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 planar, Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV21,
 		.depth		= 12,
 		.colplanes	= 2,
@@ -330,7 +307,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 contiguous 3-planar, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV420,
 		.depth		= 12,
 		.colplanes	= 3,
@@ -343,7 +319,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "YUV 4:2:0 contiguous 3-planar, Y/Cb/Cr",
 		.fourcc		= V4L2_PIX_FMT_YUV420,
 		.depth		= 12,
 		.colplanes	= 3,
@@ -356,7 +331,6 @@ static struct s5p_jpeg_fmt sjpeg_formats[] = {
 		.subsampling	= V4L2_JPEG_CHROMA_SUBSAMPLING_420,
 	},
 	{
-		.name		= "Gray",
 		.fourcc		= V4L2_PIX_FMT_GREY,
 		.depth		= 8,
 		.colplanes	= 1,
@@ -1314,7 +1288,6 @@ static int enum_fmt(struct s5p_jpeg_ctx *ctx,
 	if (i >= n)
 		return -EINVAL;
 
-	strscpy(f->description, sjpeg_formats[i].name, sizeof(f->description));
 	f->pixelformat = sjpeg_formats[i].fourcc;
 
 	return 0;
diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.h b/drivers/media/platform/s5p-jpeg/jpeg-core.h
index 34f87f6c02f2..3bc52f83f5bc 100644
--- a/drivers/media/platform/s5p-jpeg/jpeg-core.h
+++ b/drivers/media/platform/s5p-jpeg/jpeg-core.h
@@ -150,7 +150,6 @@ struct s5p_jpeg_variant {
 
 /**
  * struct jpeg_fmt - driver's internal color format data
- * @name:	format description
  * @fourcc:	the fourcc code, 0 if not applicable
  * @depth:	number of bits per pixel
  * @colplanes:	number of color planes (1 for packed formats)
@@ -159,7 +158,6 @@ struct s5p_jpeg_variant {
  * @flags:	flags describing format applicability
  */
 struct s5p_jpeg_fmt {
-	char	*name;
 	u32	fourcc;
 	int	depth;
 	int	colplanes;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
index 5dc086516360..96d1ecd1521b 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h
@@ -718,7 +718,6 @@ struct s5p_mfc_ctx {
  *			used by the MFC
  */
 struct s5p_mfc_fmt {
-	char *name;
 	u32 fourcc;
 	u32 codec_mode;
 	enum s5p_mfc_fmt_type type;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
index 4017c8b471f4..61e144a35201 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
@@ -29,7 +29,6 @@
 
 static struct s5p_mfc_fmt formats[] = {
 	{
-		.name		= "4:2:0 2 Planes 16x16 Tiles",
 		.fourcc		= V4L2_PIX_FMT_NV12MT_16X16,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -37,7 +36,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6_BIT | MFC_V7_BIT,
 	},
 	{
-		.name		= "4:2:0 2 Planes 64x32 Tiles",
 		.fourcc		= V4L2_PIX_FMT_NV12MT,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -45,7 +43,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5_BIT,
 	},
 	{
-		.name		= "4:2:0 2 Planes Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12M,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -53,7 +50,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6PLUS_BITS,
 	},
 	{
-		.name		= "4:2:0 2 Planes Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV21M,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -61,7 +57,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6PLUS_BITS,
 	},
 	{
-		.name		= "H264 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_H264,
 		.codec_mode	= S5P_MFC_CODEC_H264_DEC,
 		.type		= MFC_FMT_DEC,
@@ -69,7 +64,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "H264/MVC Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_H264_MVC,
 		.codec_mode	= S5P_MFC_CODEC_H264_MVC_DEC,
 		.type		= MFC_FMT_DEC,
@@ -77,7 +71,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6PLUS_BITS,
 	},
 	{
-		.name		= "H263 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_H263,
 		.codec_mode	= S5P_MFC_CODEC_H263_DEC,
 		.type		= MFC_FMT_DEC,
@@ -85,7 +78,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "MPEG1 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_MPEG1,
 		.codec_mode	= S5P_MFC_CODEC_MPEG2_DEC,
 		.type		= MFC_FMT_DEC,
@@ -93,7 +85,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "MPEG2 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_MPEG2,
 		.codec_mode	= S5P_MFC_CODEC_MPEG2_DEC,
 		.type		= MFC_FMT_DEC,
@@ -101,7 +92,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "MPEG4 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_MPEG4,
 		.codec_mode	= S5P_MFC_CODEC_MPEG4_DEC,
 		.type		= MFC_FMT_DEC,
@@ -109,7 +99,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "XviD Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_XVID,
 		.codec_mode	= S5P_MFC_CODEC_MPEG4_DEC,
 		.type		= MFC_FMT_DEC,
@@ -117,7 +106,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "VC1 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_VC1_ANNEX_G,
 		.codec_mode	= S5P_MFC_CODEC_VC1_DEC,
 		.type		= MFC_FMT_DEC,
@@ -125,7 +113,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "VC1 RCV Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_VC1_ANNEX_L,
 		.codec_mode	= S5P_MFC_CODEC_VC1RCV_DEC,
 		.type		= MFC_FMT_DEC,
@@ -133,7 +120,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "VP8 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_VP8,
 		.codec_mode	= S5P_MFC_CODEC_VP8_DEC,
 		.type		= MFC_FMT_DEC,
@@ -279,7 +265,6 @@ static int vidioc_enum_fmt(struct file *file, struct v4l2_fmtdesc *f,
 							bool out)
 {
 	struct s5p_mfc_dev *dev = video_drvdata(file);
-	struct s5p_mfc_fmt *fmt;
 	int i, j = 0;
 
 	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
@@ -296,9 +281,7 @@ static int vidioc_enum_fmt(struct file *file, struct v4l2_fmtdesc *f,
 	}
 	if (i == ARRAY_SIZE(formats))
 		return -EINVAL;
-	fmt = &formats[i];
-	strscpy(f->description, fmt->name, sizeof(f->description));
-	f->pixelformat = fmt->fourcc;
+	f->pixelformat = formats[i].fourcc;
 	return 0;
 }
 
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index 97e76480e942..912fe0c5ab18 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -32,7 +32,6 @@
 
 static struct s5p_mfc_fmt formats[] = {
 	{
-		.name		= "4:2:0 2 Planes 16x16 Tiles",
 		.fourcc		= V4L2_PIX_FMT_NV12MT_16X16,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -40,7 +39,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6_BIT | MFC_V7_BIT,
 	},
 	{
-		.name		= "4:2:0 2 Planes 64x32 Tiles",
 		.fourcc		= V4L2_PIX_FMT_NV12MT,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -48,7 +46,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5_BIT,
 	},
 	{
-		.name		= "4:2:0 2 Planes Y/CbCr",
 		.fourcc		= V4L2_PIX_FMT_NV12M,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -56,7 +53,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "4:2:0 2 Planes Y/CrCb",
 		.fourcc		= V4L2_PIX_FMT_NV21M,
 		.codec_mode	= S5P_MFC_CODEC_NONE,
 		.type		= MFC_FMT_RAW,
@@ -64,7 +60,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V6PLUS_BITS,
 	},
 	{
-		.name		= "H264 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_H264,
 		.codec_mode	= S5P_MFC_CODEC_H264_ENC,
 		.type		= MFC_FMT_ENC,
@@ -72,7 +67,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "MPEG4 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_MPEG4,
 		.codec_mode	= S5P_MFC_CODEC_MPEG4_ENC,
 		.type		= MFC_FMT_ENC,
@@ -80,7 +74,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "H263 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_H263,
 		.codec_mode	= S5P_MFC_CODEC_H263_ENC,
 		.type		= MFC_FMT_ENC,
@@ -88,7 +81,6 @@ static struct s5p_mfc_fmt formats[] = {
 		.versions	= MFC_V5PLUS_BITS,
 	},
 	{
-		.name		= "VP8 Encoded Stream",
 		.fourcc		= V4L2_PIX_FMT_VP8,
 		.codec_mode	= S5P_MFC_CODEC_VP8_ENC,
 		.type		= MFC_FMT_ENC,
@@ -1320,7 +1312,6 @@ static int vidioc_enum_fmt(struct file *file, struct v4l2_fmtdesc *f,
 							bool out)
 {
 	struct s5p_mfc_dev *dev = video_drvdata(file);
-	struct s5p_mfc_fmt *fmt;
 	int i, j = 0;
 
 	for (i = 0; i < ARRAY_SIZE(formats); ++i) {
@@ -1332,10 +1323,7 @@ static int vidioc_enum_fmt(struct file *file, struct v4l2_fmtdesc *f,
 			continue;
 
 		if (j == f->index) {
-			fmt = &formats[i];
-			strscpy(f->description, fmt->name,
-				sizeof(f->description));
-			f->pixelformat = fmt->fourcc;
+			f->pixelformat = formats[i].fourcc;
 			return 0;
 		}
 		++j;
diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c
index 5a9ba05c996e..26360af2325e 100644
--- a/drivers/media/platform/sh_veu.c
+++ b/drivers/media/platform/sh_veu.c
@@ -86,7 +86,6 @@ struct sh_veu_file {
 };
 
 struct sh_veu_format {
-	char *name;
 	u32 fourcc;
 	unsigned int depth;
 	unsigned int ydepth;
@@ -144,14 +143,14 @@ enum sh_veu_fmt_idx {
  * aligned for NV24.
  */
 static const struct sh_veu_format sh_veu_fmt[] = {
-	[SH_VEU_FMT_NV12]   = { .ydepth = 8, .depth = 12, .name = "NV12", .fourcc = V4L2_PIX_FMT_NV12 },
-	[SH_VEU_FMT_NV16]   = { .ydepth = 8, .depth = 16, .name = "NV16", .fourcc = V4L2_PIX_FMT_NV16 },
-	[SH_VEU_FMT_NV24]   = { .ydepth = 8, .depth = 24, .name = "NV24", .fourcc = V4L2_PIX_FMT_NV24 },
-	[SH_VEU_FMT_RGB332] = { .ydepth = 8, .depth = 8, .name = "RGB332", .fourcc = V4L2_PIX_FMT_RGB332 },
-	[SH_VEU_FMT_RGB444] = { .ydepth = 16, .depth = 16, .name = "RGB444", .fourcc = V4L2_PIX_FMT_RGB444 },
-	[SH_VEU_FMT_RGB565] = { .ydepth = 16, .depth = 16, .name = "RGB565", .fourcc = V4L2_PIX_FMT_RGB565 },
-	[SH_VEU_FMT_RGB666] = { .ydepth = 32, .depth = 32, .name = "BGR666", .fourcc = V4L2_PIX_FMT_BGR666 },
-	[SH_VEU_FMT_RGB24]  = { .ydepth = 24, .depth = 24, .name = "RGB24", .fourcc = V4L2_PIX_FMT_RGB24 },
+	[SH_VEU_FMT_NV12]   = { .ydepth = 8, .depth = 12, .fourcc = V4L2_PIX_FMT_NV12 },
+	[SH_VEU_FMT_NV16]   = { .ydepth = 8, .depth = 16, .fourcc = V4L2_PIX_FMT_NV16 },
+	[SH_VEU_FMT_NV24]   = { .ydepth = 8, .depth = 24, .fourcc = V4L2_PIX_FMT_NV24 },
+	[SH_VEU_FMT_RGB332] = { .ydepth = 8, .depth = 8, .fourcc = V4L2_PIX_FMT_RGB332 },
+	[SH_VEU_FMT_RGB444] = { .ydepth = 16, .depth = 16, .fourcc = V4L2_PIX_FMT_RGB444 },
+	[SH_VEU_FMT_RGB565] = { .ydepth = 16, .depth = 16, .fourcc = V4L2_PIX_FMT_RGB565 },
+	[SH_VEU_FMT_RGB666] = { .ydepth = 32, .depth = 32, .fourcc = V4L2_PIX_FMT_BGR666 },
+	[SH_VEU_FMT_RGB24]  = { .ydepth = 24, .depth = 24, .fourcc = V4L2_PIX_FMT_RGB24 },
 };
 
 #define DEFAULT_IN_VFMT (struct sh_veu_vfmt){						\
@@ -359,8 +358,6 @@ static int sh_veu_enum_fmt(struct v4l2_fmtdesc *f, const int *fmt, int fmt_num)
 	if (f->index >= fmt_num)
 		return -EINVAL;
 
-	strscpy(f->description, sh_veu_fmt[fmt[f->index]].name,
-		sizeof(f->description));
 	f->pixelformat = sh_veu_fmt[fmt[f->index]].fourcc;
 	return 0;
 }
diff --git a/drivers/media/platform/sh_vou.c b/drivers/media/platform/sh_vou.c
index 5799aa4b9323..4fc1b4e11b70 100644
--- a/drivers/media/platform/sh_vou.c
+++ b/drivers/media/platform/sh_vou.c
@@ -138,7 +138,6 @@ static void sh_vou_reg_ab_set(struct sh_vou_device *vou_dev, unsigned int reg,
 
 struct sh_vou_fmt {
 	u32		pfmt;
-	char		*desc;
 	unsigned char	bpp;
 	unsigned char	bpl;
 	unsigned char	rgb;
@@ -152,7 +151,6 @@ static struct sh_vou_fmt vou_fmt[] = {
 		.pfmt	= V4L2_PIX_FMT_NV12,
 		.bpp	= 12,
 		.bpl	= 1,
-		.desc	= "YVU420 planar",
 		.yf	= 0,
 		.rgb	= 0,
 	},
@@ -160,7 +158,6 @@ static struct sh_vou_fmt vou_fmt[] = {
 		.pfmt	= V4L2_PIX_FMT_NV16,
 		.bpp	= 16,
 		.bpl	= 1,
-		.desc	= "YVYU planar",
 		.yf	= 1,
 		.rgb	= 0,
 	},
@@ -168,7 +165,6 @@ static struct sh_vou_fmt vou_fmt[] = {
 		.pfmt	= V4L2_PIX_FMT_RGB24,
 		.bpp	= 24,
 		.bpl	= 3,
-		.desc	= "RGB24",
 		.pkf	= 2,
 		.rgb	= 1,
 	},
@@ -176,7 +172,6 @@ static struct sh_vou_fmt vou_fmt[] = {
 		.pfmt	= V4L2_PIX_FMT_RGB565,
 		.bpp	= 16,
 		.bpl	= 2,
-		.desc	= "RGB565",
 		.pkf	= 3,
 		.rgb	= 1,
 	},
@@ -184,7 +179,6 @@ static struct sh_vou_fmt vou_fmt[] = {
 		.pfmt	= V4L2_PIX_FMT_RGB565X,
 		.bpp	= 16,
 		.bpl	= 2,
-		.desc	= "RGB565 byteswapped",
 		.pkf	= 3,
 		.rgb	= 1,
 	},
@@ -398,9 +392,6 @@ static int sh_vou_enum_fmt_vid_out(struct file *file, void  *priv,
 
 	dev_dbg(vou_dev->v4l2_dev.dev, "%s()\n", __func__);
 
-	fmt->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-	strscpy(fmt->description, vou_fmt[fmt->index].desc,
-		sizeof(fmt->description));
 	fmt->pixelformat = vou_fmt[fmt->index].pfmt;
 
 	return 0;
@@ -494,7 +485,8 @@ static void sh_vou_configure_geometry(struct sh_vou_device *vou_dev,
 	if (h_idx)
 		vouvcr |= (1 << 14) | vou_scale_v_fld[h_idx - 1];
 
-	dev_dbg(vou_dev->v4l2_dev.dev, "%s: scaling 0x%x\n", fmt->desc, vouvcr);
+	dev_dbg(vou_dev->v4l2_dev.dev, "0x%08x: scaling 0x%x\n",
+		fmt->pfmt, vouvcr);
 
 	/* To produce a colour bar for testing set bit 23 of VOUVCR */
 	sh_vou_reg_ab_write(vou_dev, VOUVCR, vouvcr);
diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c
index dda04498ac56..60b575bb44c4 100644
--- a/drivers/media/platform/ti-vpe/vpe.c
+++ b/drivers/media/platform/ti-vpe/vpe.c
@@ -224,7 +224,6 @@ static const struct vpe_port_data port_data[11] = {
 
 /* driver info for each of the supported video formats */
 struct vpe_fmt {
-	char	*name;			/* human-readable name */
 	u32	fourcc;			/* standard format identifier */
 	u8	types;			/* CAPTURE and/or OUTPUT */
 	u8	coplanar;		/* set for unpacked Luma and Chroma */
@@ -234,7 +233,6 @@ struct vpe_fmt {
 
 static struct vpe_fmt vpe_formats[] = {
 	{
-		.name		= "NV16 YUV 422 co-planar",
 		.fourcc		= V4L2_PIX_FMT_NV16,
 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 		.coplanar	= 1,
@@ -243,7 +241,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "NV12 YUV 420 co-planar",
 		.fourcc		= V4L2_PIX_FMT_NV12,
 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 		.coplanar	= 1,
@@ -252,7 +249,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "YUYV 422 packed",
 		.fourcc		= V4L2_PIX_FMT_YUYV,
 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 		.coplanar	= 0,
@@ -260,7 +256,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "UYVY 422 packed",
 		.fourcc		= V4L2_PIX_FMT_UYVY,
 		.types		= VPE_FMT_TYPE_CAPTURE | VPE_FMT_TYPE_OUTPUT,
 		.coplanar	= 0,
@@ -268,7 +263,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "RGB888 packed",
 		.fourcc		= V4L2_PIX_FMT_RGB24,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -276,7 +270,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "ARGB32",
 		.fourcc		= V4L2_PIX_FMT_RGB32,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -284,7 +277,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "BGR888 packed",
 		.fourcc		= V4L2_PIX_FMT_BGR24,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -292,7 +284,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "ABGR32",
 		.fourcc		= V4L2_PIX_FMT_BGR32,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -300,7 +291,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "RGB565",
 		.fourcc		= V4L2_PIX_FMT_RGB565,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -308,7 +298,6 @@ static struct vpe_fmt vpe_formats[] = {
 				  },
 	},
 	{
-		.name		= "RGB5551",
 		.fourcc		= V4L2_PIX_FMT_RGB555,
 		.types		= VPE_FMT_TYPE_CAPTURE,
 		.coplanar	= 0,
@@ -1514,7 +1503,6 @@ static int __enum_fmt(struct v4l2_fmtdesc *f, u32 type)
 	if (!fmt)
 		return -EINVAL;
 
-	strscpy(f->description, fmt->name, sizeof(f->description));
 	f->pixelformat = fmt->fourcc;
 	return 0;
 }
diff --git a/drivers/media/platform/via-camera.c b/drivers/media/platform/via-camera.c
index 038de7a2027a..d5f811820be9 100644
--- a/drivers/media/platform/via-camera.c
+++ b/drivers/media/platform/via-camera.c
@@ -142,13 +142,11 @@ static struct via_camera *via_cam_info;
  * now this information must be managed at this level too.
  */
 static struct via_format {
-	__u8 *desc;
 	__u32 pixelformat;
 	int bpp;   /* Bytes per pixel */
 	u32 mbus_code;
 } via_formats[] = {
 	{
-		.desc		= "YUYV 4:2:2",
 		.pixelformat	= V4L2_PIX_FMT_YUYV,
 		.mbus_code	= MEDIA_BUS_FMT_YUYV8_2X8,
 		.bpp		= 2,
@@ -860,8 +858,6 @@ static int viacam_enum_fmt_vid_cap(struct file *filp, void *priv,
 {
 	if (fmt->index >= N_VIA_FMTS)
 		return -EINVAL;
-	strscpy(fmt->description, via_formats[fmt->index].desc,
-		sizeof(fmt->description));
 	fmt->pixelformat = via_formats[fmt->index].pixelformat;
 	return 0;
 }
diff --git a/drivers/media/platform/xilinx/xilinx-dma.c b/drivers/media/platform/xilinx/xilinx-dma.c
index c9d5fdb2d407..7bd2600cdc9a 100644
--- a/drivers/media/platform/xilinx/xilinx-dma.c
+++ b/drivers/media/platform/xilinx/xilinx-dma.c
@@ -524,8 +524,6 @@ xvip_dma_enum_format(struct file *file, void *fh, struct v4l2_fmtdesc *f)
 		return -EINVAL;
 
 	f->pixelformat = dma->format.pixelformat;
-	strscpy(f->description, dma->fmtinfo->description,
-		sizeof(f->description));
 
 	return 0;
 }
diff --git a/drivers/media/platform/xilinx/xilinx-vip.c b/drivers/media/platform/xilinx/xilinx-vip.c
index 08a825c3a3f6..6ad61b08a31a 100644
--- a/drivers/media/platform/xilinx/xilinx-vip.c
+++ b/drivers/media/platform/xilinx/xilinx-vip.c
@@ -25,21 +25,21 @@
 
 static const struct xvip_video_format xvip_video_formats[] = {
 	{ XVIP_VF_YUV_422, 8, NULL, MEDIA_BUS_FMT_UYVY8_1X16,
-	  2, V4L2_PIX_FMT_YUYV, "4:2:2, packed, YUYV" },
+	  2, V4L2_PIX_FMT_YUYV },
 	{ XVIP_VF_YUV_444, 8, NULL, MEDIA_BUS_FMT_VUY8_1X24,
-	  3, V4L2_PIX_FMT_YUV444, "4:4:4, packed, YUYV" },
+	  3, V4L2_PIX_FMT_YUV444 },
 	{ XVIP_VF_RBG, 8, NULL, MEDIA_BUS_FMT_RBG888_1X24,
-	  3, 0, NULL },
+	  3, 0 },
 	{ XVIP_VF_MONO_SENSOR, 8, "mono", MEDIA_BUS_FMT_Y8_1X8,
-	  1, V4L2_PIX_FMT_GREY, "Greyscale 8-bit" },
+	  1, V4L2_PIX_FMT_GREY },
 	{ XVIP_VF_MONO_SENSOR, 8, "rggb", MEDIA_BUS_FMT_SRGGB8_1X8,
-	  1, V4L2_PIX_FMT_SRGGB8, "Bayer 8-bit RGGB" },
+	  1, V4L2_PIX_FMT_SRGGB8 },
 	{ XVIP_VF_MONO_SENSOR, 8, "grbg", MEDIA_BUS_FMT_SGRBG8_1X8,
-	  1, V4L2_PIX_FMT_SGRBG8, "Bayer 8-bit GRBG" },
+	  1, V4L2_PIX_FMT_SGRBG8 },
 	{ XVIP_VF_MONO_SENSOR, 8, "gbrg", MEDIA_BUS_FMT_SGBRG8_1X8,
-	  1, V4L2_PIX_FMT_SGBRG8, "Bayer 8-bit GBRG" },
+	  1, V4L2_PIX_FMT_SGBRG8 },
 	{ XVIP_VF_MONO_SENSOR, 8, "bggr", MEDIA_BUS_FMT_SBGGR8_1X8,
-	  1, V4L2_PIX_FMT_SBGGR8, "Bayer 8-bit BGGR" },
+	  1, V4L2_PIX_FMT_SBGGR8 },
 };
 
 /**
diff --git a/drivers/media/platform/xilinx/xilinx-vip.h b/drivers/media/platform/xilinx/xilinx-vip.h
index ba939dd52818..47da39211ae4 100644
--- a/drivers/media/platform/xilinx/xilinx-vip.h
+++ b/drivers/media/platform/xilinx/xilinx-vip.h
@@ -108,7 +108,6 @@ struct xvip_device {
  * @code: media bus format code
  * @bpp: bytes per pixel (when stored in memory)
  * @fourcc: V4L2 pixel format FCC identifier
- * @description: format description, suitable for userspace
  */
 struct xvip_video_format {
 	unsigned int vf_code;
@@ -117,7 +116,6 @@ struct xvip_video_format {
 	unsigned int code;
 	unsigned int bpp;
 	u32 fourcc;
-	const char *description;
 };
 
 const struct xvip_video_format *xvip_get_format_by_code(unsigned int code);
diff --git a/include/media/drv-intf/exynos-fimc.h b/include/media/drv-intf/exynos-fimc.h
index 59703439bb37..6b9ef631d6bb 100644
--- a/include/media/drv-intf/exynos-fimc.h
+++ b/include/media/drv-intf/exynos-fimc.h
@@ -87,7 +87,6 @@ struct fimc_source_info {
 /**
  * struct fimc_fmt - color format data structure
  * @mbus_code: media bus pixel code, -1 if not applicable
- * @name: format description
  * @fourcc: fourcc code for this format, 0 if not applicable
  * @color: the driver's private color format id
  * @memplanes: number of physically non-contiguous data planes
@@ -99,7 +98,6 @@ struct fimc_source_info {
  */
 struct fimc_fmt {
 	u32 mbus_code;
-	char	*name;
 	u32	fourcc;
 	u32	color;
 	u16	memplanes;
-- 
cgit v1.2.3


From 66b5f1c439843bcbab01cc7f3854ae2742f3d1e3 Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Thu, 18 Jul 2019 23:30:03 -0700
Subject: net-ipv6-ndisc: add support for RFC7710 RA Captive Portal Identifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is trivial since we already have support for the entirely
identical (from the kernel's point of view) RDNSS and DNSSL that
also contain opaque data that needs to be passed down to userspace.

As specified in RFC7710, Captive Portal option contains a URL.
8-bit identifier of the option type as assigned by the IANA is 37.
This option should also be treated as userland.

Hence, treat ND option 37 as userland (Captive Portal support)

See:
  https://tools.ietf.org/html/rfc7710
  https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml

Fixes: e35f30c131a56
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: Remin Nguyen Van <reminv@google.com>
Cc: Alexey I. Froloff <raorn@raorn.name>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 1 +
 net/ipv6/ndisc.c    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 366150053043..b2f715ca0567 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -40,6 +40,7 @@ enum {
 	ND_OPT_RDNSS = 25,		/* RFC5006 */
 	ND_OPT_DNSSL = 31,		/* RFC6106 */
 	ND_OPT_6CO = 34,		/* RFC6775 */
+	ND_OPT_CAPTIVE_PORTAL = 37,	/* RFC7710 */
 	__ND_OPT_MAX
 };
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 083cc1c94cd3..53caf59c591e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -196,6 +196,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev,
 {
 	return opt->nd_opt_type == ND_OPT_RDNSS ||
 		opt->nd_opt_type == ND_OPT_DNSSL ||
+		opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
 		ndisc_ops_is_useropt(dev, opt->nd_opt_type);
 }
 
-- 
cgit v1.2.3


From a57d6acaf352d91e52271704f45c72e14cd2d98a Mon Sep 17 00:00:00 2001
From: Pawel Osciak <posciak@chromium.org>
Date: Thu, 11 Jul 2019 16:26:42 -0400
Subject: media: uapi: Add VP8 stateless decoder API

Add the parsed VP8 frame pixel format and controls, to be used
with the new stateless decoder API for VP8 to provide parameters
for accelerator (aka stateless) codecs.

Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Signed-off-by: Pawel Osciak <posciak@chromium.org>
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/biblio.rst            |  10 +
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst   | 323 +++++++++++++++++++++
 Documentation/media/uapi/v4l/pixfmt-compressed.rst |  20 ++
 drivers/media/v4l2-core/v4l2-ctrls.c               |  10 +
 drivers/media/v4l2-core/v4l2-ioctl.c               |   1 +
 include/media/v4l2-ctrls.h                         |   3 +
 include/media/vp8-ctrls.h                          | 110 +++++++
 7 files changed, 477 insertions(+)
 create mode 100644 include/media/vp8-ctrls.h

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/biblio.rst b/Documentation/media/uapi/v4l/biblio.rst
index 8f4eb8823d82..ad2ff258afa8 100644
--- a/Documentation/media/uapi/v4l/biblio.rst
+++ b/Documentation/media/uapi/v4l/biblio.rst
@@ -395,3 +395,13 @@ colimg
 :title:     Color Imaging: Fundamentals and Applications
 
 :author:    Erik Reinhard et al.
+
+.. _vp8:
+
+VP8
+===
+
+
+:title:     RFC 6386: "VP8 Data Format and Decoding Guide"
+
+:author:    J. Bankoski et al.
diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index d6ea2ffd65c5..c5f39dd50043 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -2234,6 +2234,329 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     Quantization parameter for a P frame for FWHT. Valid range: from 1
     to 31.
 
+.. _v4l2-mpeg-vp8:
+
+``V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER (struct)``
+    Specifies the frame parameters for the associated VP8 parsed frame data.
+    This includes the necessary parameters for
+    configuring a stateless hardware decoding pipeline for VP8.
+    The bitstream parameters are defined according to :ref:`vp8`.
+
+    .. note::
+
+       This compound control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_ctrl_vp8_frame_header
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{5.8cm}|p{4.8cm}|p{6.6cm}|
+
+.. flat-table:: struct v4l2_ctrl_vp8_frame_header
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - struct :c:type:`v4l2_vp8_segment_header`
+      - ``segment_header``
+      - Structure with segment-based adjustments metadata.
+    * - struct :c:type:`v4l2_vp8_loopfilter_header`
+      - ``loopfilter_header``
+      - Structure with loop filter level adjustments metadata.
+    * - struct :c:type:`v4l2_vp8_quantization_header`
+      - ``quant_header``
+      - Structure with VP8 dequantization indices metadata.
+    * - struct :c:type:`v4l2_vp8_entropy_header`
+      - ``entropy_header``
+      - Structure with VP8 entropy coder probabilities metadata.
+    * - struct :c:type:`v4l2_vp8_entropy_coder_state`
+      - ``coder_state``
+      - Structure with VP8 entropy coder state.
+    * - __u16
+      - ``width``
+      - The width of the frame. Must be set for all frames.
+    * - __u16
+      - ``height``
+      - The height of the frame. Must be set for all frames.
+    * - __u8
+      - ``horizontal_scale``
+      - Horizontal scaling factor.
+    * - __u8
+      - ``vertical_scaling factor``
+      - Vertical scale.
+    * - __u8
+      - ``version``
+      - Bitstream version.
+    * - __u8
+      - ``prob_skip_false``
+      - Indicates the probability that the macroblock is not skipped.
+    * - __u8
+      - ``prob_intra``
+      - Indicates the probability that a macroblock is intra-predicted.
+    * - __u8
+      - ``prob_last``
+      - Indicates the probability that the last reference frame is used
+        for inter-prediction
+    * - __u8
+      - ``prob_gf``
+      - Indicates the probability that the golden reference frame is used
+        for inter-prediction
+    * - __u8
+      - ``num_dct_parts``
+      - Number of DCT coefficients partitions. Must be one of: 1, 2, 4, or 8.
+    * - __u32
+      - ``first_part_size``
+      - Size of the first partition, i.e. the control partition.
+    * - __u32
+      - ``first_part_header_bits``
+      - Size in bits of the first partition header portion.
+    * - __u32
+      - ``dct_part_sizes[8]``
+      - DCT coefficients sizes.
+    * - __u64
+      - ``last_frame_ts``
+      - Timestamp for the V4L2 capture buffer to use as last reference frame, used
+        with inter-coded frames. The timestamp refers to the ``timestamp`` field in
+	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
+	function to convert the struct :c:type:`timeval` in struct
+	:c:type:`v4l2_buffer` to a __u64.
+    * - __u64
+      - ``golden_frame_ts``
+      - Timestamp for the V4L2 capture buffer to use as last reference frame, used
+        with inter-coded frames. The timestamp refers to the ``timestamp`` field in
+	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
+	function to convert the struct :c:type:`timeval` in struct
+	:c:type:`v4l2_buffer` to a __u64.
+    * - __u64
+      - ``alt_frame_ts``
+      - Timestamp for the V4L2 capture buffer to use as alternate reference frame, used
+        with inter-coded frames. The timestamp refers to the ``timestamp`` field in
+	struct :c:type:`v4l2_buffer`. Use the :c:func:`v4l2_timeval_to_ns()`
+	function to convert the struct :c:type:`timeval` in struct
+	:c:type:`v4l2_buffer` to a __u64.
+    * - __u64
+      - ``flags``
+      - See :ref:`Frame Header Flags <vp8_frame_header_flags>`
+
+.. _vp8_frame_header_flags:
+
+``Frame Header Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_KEY_FRAME``
+      - 0x01
+      - Indicates if the frame is a key frame.
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_EXPERIMENTAL``
+      - 0x02
+      - Experimental bitstream.
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_SHOW_FRAME``
+      - 0x04
+      - Show frame flag, indicates if the frame is for display.
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_MB_NO_SKIP_COEFF``
+      - 0x08
+      - Enable/disable skipping of macroblocks with no non-zero coefficients.
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_SIGN_BIAS_GOLDEN``
+      - 0x10
+      - Sign of motion vectors when the golden frame is referenced.
+    * - ``V4L2_VP8_FRAME_HEADER_FLAG_SIGN_BIAS_ALT``
+      - 0x20
+      - Sign of motion vectors when the alt frame is referenced.
+
+.. c:type:: v4l2_vp8_entropy_coder_state
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.5cm}|p{6.3cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_vp8_entropy_coder_state
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``range``
+      -
+    * - __u8
+      - ``value``
+      -
+    * - __u8
+      - ``bit_count``
+      -
+    * - __u8
+      - ``padding``
+      - Applications and drivers must set this to zero.
+
+.. c:type:: v4l2_vp8_segment_header
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.5cm}|p{6.3cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_vp8_segment_header
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __s8
+      - ``quant_update[4]``
+      - Signed quantizer value update.
+    * - __s8
+      - ``lf_update[4]``
+      - Signed loop filter level value update.
+    * - __u8
+      - ``segment_probs[3]``
+      - Segment probabilities.
+    * - __u8
+      - ``padding``
+      - Applications and drivers must set this to zero.
+    * - __u32
+      - ``flags``
+      - See :ref:`Segment Header Flags <vp8_segment_header_flags>`
+
+.. _vp8_segment_header_flags:
+
+``Segment Header Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_VP8_SEGMENT_HEADER_FLAG_ENABLED``
+      - 0x01
+      - Enable/disable segment-based adjustments.
+    * - ``V4L2_VP8_SEGMENT_HEADER_FLAG_UPDATE_MAP``
+      - 0x02
+      - Indicates if the macroblock segmentation map is updated in this frame.
+    * - ``V4L2_VP8_SEGMENT_HEADER_FLAG_UPDATE_FEATURE_DATA``
+      - 0x04
+      - Indicates if the segment feature data is updated in this frame.
+    * - ``V4L2_VP8_SEGMENT_HEADER_FLAG_DELTA_VALUE_MODE``
+      - 0x08
+      - If is set, the segment feature data mode is delta-value.
+        If cleared, it's absolute-value.
+
+.. c:type:: v4l2_vp8_loopfilter_header
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.5cm}|p{6.3cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_vp8_loopfilter_header
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __s8
+      - ``ref_frm_delta[4]``
+      - Reference adjustment (signed) delta value.
+    * - __s8
+      - ``mb_mode_delta[4]``
+      - Macroblock prediction mode adjustment (signed) delta value.
+    * - __u8
+      - ``sharpness_level``
+      - Sharpness level
+    * - __u8
+      - ``level``
+      - Filter level
+    * - __u16
+      - ``padding``
+      - Applications and drivers must set this to zero.
+    * - __u32
+      - ``flags``
+      - See :ref:`Loopfilter Header Flags <vp8_loopfilter_header_flags>`
+
+.. _vp8_loopfilter_header_flags:
+
+``Loopfilter Header Flags``
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_VP8_LF_HEADER_ADJ_ENABLE``
+      - 0x01
+      - Enable/disable macroblock-level loop filter adjustment.
+    * - ``V4L2_VP8_LF_HEADER_DELTA_UPDATE``
+      - 0x02
+      - Indicates if the delta values used in an adjustment are updated.
+    * - ``V4L2_VP8_LF_FILTER_TYPE_SIMPLE``
+      - 0x04
+      - If set, indicates the filter type is simple.
+        If cleared, the filter type is normal.
+
+.. c:type:: v4l2_vp8_quantization_header
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.5cm}|p{6.3cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_vp8_quantization_header
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``y_ac_qi``
+      - Luma AC coefficient table index.
+    * - __s8
+      - ``y_dc_delta``
+      - Luma DC delta vaue.
+    * - __s8
+      - ``y2_dc_delta``
+      - Y2 block DC delta value.
+    * - __s8
+      - ``y2_ac_delta``
+      - Y2 block AC delta value.
+    * - __s8
+      - ``uv_dc_delta``
+      - Chroma DC delta value.
+    * - __s8
+      - ``uv_ac_delta``
+      - Chroma AC delta value.
+    * - __u16
+      - ``padding``
+      - Applications and drivers must set this to zero.
+
+.. c:type:: v4l2_vp8_entropy_header
+
+.. cssclass:: longtable
+
+.. tabularcolumns:: |p{1.5cm}|p{6.3cm}|p{9.4cm}|
+
+.. flat-table:: struct v4l2_vp8_entropy_header
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - __u8
+      - ``coeff_probs[4][8][3][11]``
+      - Coefficient update probabilities.
+    * - __u8
+      - ``y_mode_probs[4]``
+      - Luma mode update probabilities.
+    * - __u8
+      - ``uv_mode_probs[3]``
+      - Chroma mode update probabilities.
+    * - __u8
+      - ``mv_probs[2][19]``
+      - MV decoding update probabilities.
+    * - __u8
+      - ``padding[3]``
+      - Applications and drivers must set this to zero.
+
 .. raw:: latex
 
     \normalsize
diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index 4b701fc7653e..f52a7b67023d 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -133,6 +133,26 @@ Compressed Formats
       - ``V4L2_PIX_FMT_VP8``
       - 'VP80'
       - VP8 video elementary stream.
+    * .. _V4L2-PIX-FMT-VP8-FRAME:
+
+      - ``V4L2_PIX_FMT_VP8_FRAME``
+      - 'VP8F'
+      - VP8 parsed frame, as extracted from the container.
+	This format is adapted for stateless video decoders that implement a
+	VP8 pipeline (using the :ref:`mem2mem` and :ref:`media-request-api`).
+	Metadata associated with the frame to decode is required to be passed
+	through the ``V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER`` control.
+	See the :ref:`associated Codec Control IDs <v4l2-mpeg-vp8>`.
+	Exactly one output and one capture buffer must be provided for use with
+	this pixel format. The output buffer must contain the appropriate number
+	of macroblocks to decode a full corresponding frame to the matching
+	capture buffer.
+
+	.. note::
+
+	   This format is not yet part of the public kernel API and it
+	   is expected to change.
+
     * .. _V4L2-PIX-FMT-VP9:
 
       - ``V4L2_PIX_FMT_VP9``
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 739418aa9108..b2c9f5816c4a 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -885,6 +885,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP:		return "VPX P-Frame QP Value";
 	case V4L2_CID_MPEG_VIDEO_VP8_PROFILE:			return "VP8 Profile";
 	case V4L2_CID_MPEG_VIDEO_VP9_PROFILE:			return "VP9 Profile";
+	case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:		return "VP8 Frame Header";
 
 	/* HEVC controls */
 	case V4L2_CID_MPEG_VIDEO_HEVC_I_FRAME_QP:		return "HEVC I-Frame QP Value";
@@ -1345,6 +1346,9 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS:
 		*type = V4L2_CTRL_TYPE_H264_DECODE_PARAMS;
 		break;
+	case V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER:
+		*type = V4L2_CTRL_TYPE_VP8_FRAME_HEADER;
+		break;
 	default:
 		*type = V4L2_CTRL_TYPE_INTEGER;
 		break;
@@ -1690,6 +1694,9 @@ static int std_validate_compound(const struct v4l2_ctrl *ctrl, u32 idx,
 	case V4L2_CTRL_TYPE_H264_SLICE_PARAMS:
 	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
 		break;
+
+	case V4L2_CTRL_TYPE_VP8_FRAME_HEADER:
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -2360,6 +2367,9 @@ static struct v4l2_ctrl *v4l2_ctrl_new(struct v4l2_ctrl_handler *hdl,
 	case V4L2_CTRL_TYPE_H264_DECODE_PARAMS:
 		elem_size = sizeof(struct v4l2_ctrl_h264_decode_params);
 		break;
+	case V4L2_CTRL_TYPE_VP8_FRAME_HEADER:
+		elem_size = sizeof(struct v4l2_ctrl_vp8_frame_header);
+		break;
 	default:
 		if (type < V4L2_CTRL_COMPOUND_TYPES)
 			elem_size = sizeof(s32);
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 20cc23ef730e..80efc581e3f9 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1348,6 +1348,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 		case V4L2_PIX_FMT_VC1_ANNEX_G:	descr = "VC-1 (SMPTE 412M Annex G)"; break;
 		case V4L2_PIX_FMT_VC1_ANNEX_L:	descr = "VC-1 (SMPTE 412M Annex L)"; break;
 		case V4L2_PIX_FMT_VP8:		descr = "VP8"; break;
+		case V4L2_PIX_FMT_VP8_FRAME:    descr = "VP8 Frame"; break;
 		case V4L2_PIX_FMT_VP9:		descr = "VP9"; break;
 		case V4L2_PIX_FMT_HEVC:		descr = "HEVC"; break; /* aka H.265 */
 		case V4L2_PIX_FMT_FWHT:		descr = "FWHT"; break; /* used in vicodec */
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index b4433483af23..6e9dc9c44bb1 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -20,6 +20,7 @@
 #include <media/mpeg2-ctrls.h>
 #include <media/fwht-ctrls.h>
 #include <media/h264-ctrls.h>
+#include <media/vp8-ctrls.h>
 
 /* forward references */
 struct file;
@@ -48,6 +49,7 @@ struct poll_table_struct;
  * @p_h264_scaling_matrix:	Pointer to a struct v4l2_ctrl_h264_scaling_matrix.
  * @p_h264_slice_params:	Pointer to a struct v4l2_ctrl_h264_slice_params.
  * @p_h264_decode_params:	Pointer to a struct v4l2_ctrl_h264_decode_params.
+ * @p_vp8_frame_header:		Pointer to a VP8 frame header structure.
  * @p:				Pointer to a compound value.
  */
 union v4l2_ctrl_ptr {
@@ -65,6 +67,7 @@ union v4l2_ctrl_ptr {
 	struct v4l2_ctrl_h264_scaling_matrix *p_h264_scaling_matrix;
 	struct v4l2_ctrl_h264_slice_params *p_h264_slice_params;
 	struct v4l2_ctrl_h264_decode_params *p_h264_decode_params;
+	struct v4l2_ctrl_vp8_frame_header *p_vp8_frame_header;
 	void *p;
 };
 
diff --git a/include/media/vp8-ctrls.h b/include/media/vp8-ctrls.h
new file mode 100644
index 000000000000..6cc2eeea4c90
--- /dev/null
+++ b/include/media/vp8-ctrls.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * These are the VP8 state controls for use with stateless VP8
+ * codec drivers.
+ *
+ * It turns out that these structs are not stable yet and will undergo
+ * more changes. So keep them private until they are stable and ready to
+ * become part of the official public API.
+ */
+
+#ifndef _VP8_CTRLS_H_
+#define _VP8_CTRLS_H_
+
+#define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F')
+
+#define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER (V4L2_CID_MPEG_BASE + 2000)
+#define V4L2_CTRL_TYPE_VP8_FRAME_HEADER 0x301
+
+#define V4L2_VP8_SEGMENT_HEADER_FLAG_ENABLED              0x01
+#define V4L2_VP8_SEGMENT_HEADER_FLAG_UPDATE_MAP           0x02
+#define V4L2_VP8_SEGMENT_HEADER_FLAG_UPDATE_FEATURE_DATA  0x04
+#define V4L2_VP8_SEGMENT_HEADER_FLAG_DELTA_VALUE_MODE     0x08
+
+struct v4l2_vp8_segment_header {
+	__s8 quant_update[4];
+	__s8 lf_update[4];
+	__u8 segment_probs[3];
+	__u8 padding;
+	__u32 flags;
+};
+
+#define V4L2_VP8_LF_HEADER_ADJ_ENABLE	0x01
+#define V4L2_VP8_LF_HEADER_DELTA_UPDATE	0x02
+#define V4L2_VP8_LF_FILTER_TYPE_SIMPLE	0x04
+struct v4l2_vp8_loopfilter_header {
+	__s8 ref_frm_delta[4];
+	__s8 mb_mode_delta[4];
+	__u8 sharpness_level;
+	__u8 level;
+	__u16 padding;
+	__u32 flags;
+};
+
+struct v4l2_vp8_quantization_header {
+	__u8 y_ac_qi;
+	__s8 y_dc_delta;
+	__s8 y2_dc_delta;
+	__s8 y2_ac_delta;
+	__s8 uv_dc_delta;
+	__s8 uv_ac_delta;
+	__u16 padding;
+};
+
+struct v4l2_vp8_entropy_header {
+	__u8 coeff_probs[4][8][3][11];
+	__u8 y_mode_probs[4];
+	__u8 uv_mode_probs[3];
+	__u8 mv_probs[2][19];
+	__u8 padding[3];
+};
+
+struct v4l2_vp8_entropy_coder_state {
+	__u8 range;
+	__u8 value;
+	__u8 bit_count;
+	__u8 padding;
+};
+
+#define V4L2_VP8_FRAME_HEADER_FLAG_KEY_FRAME		0x01
+#define V4L2_VP8_FRAME_HEADER_FLAG_EXPERIMENTAL		0x02
+#define V4L2_VP8_FRAME_HEADER_FLAG_SHOW_FRAME		0x04
+#define V4L2_VP8_FRAME_HEADER_FLAG_MB_NO_SKIP_COEFF	0x08
+#define V4L2_VP8_FRAME_HEADER_FLAG_SIGN_BIAS_GOLDEN	0x10
+#define V4L2_VP8_FRAME_HEADER_FLAG_SIGN_BIAS_ALT	0x20
+
+#define VP8_FRAME_IS_KEY_FRAME(hdr) \
+	(!!((hdr)->flags & V4L2_VP8_FRAME_HEADER_FLAG_KEY_FRAME))
+
+struct v4l2_ctrl_vp8_frame_header {
+	struct v4l2_vp8_segment_header segment_header;
+	struct v4l2_vp8_loopfilter_header lf_header;
+	struct v4l2_vp8_quantization_header quant_header;
+	struct v4l2_vp8_entropy_header entropy_header;
+	struct v4l2_vp8_entropy_coder_state coder_state;
+
+	__u16 width;
+	__u16 height;
+
+	__u8 horizontal_scale;
+	__u8 vertical_scale;
+
+	__u8 version;
+	__u8 prob_skip_false;
+	__u8 prob_intra;
+	__u8 prob_last;
+	__u8 prob_gf;
+	__u8 num_dct_parts;
+
+	__u32 first_part_size;
+	__u32 first_part_header_bits;
+	__u32 dct_part_sizes[8];
+
+	__u64 last_frame_ts;
+	__u64 golden_frame_ts;
+	__u64 alt_frame_ts;
+
+	__u64 flags;
+};
+
+#endif
-- 
cgit v1.2.3


From a81431e7d1077a84695fc7be487fc2b4930f8c3d Mon Sep 17 00:00:00 2001
From: Sean Young <sean@mess.org>
Date: Fri, 12 Jul 2019 18:46:59 -0400
Subject: media: rc: remove unused #define RC_PROTO_BIT_ALL

This lists all the protocols that the kernel knows about, however there
are no users.

Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/rc-map.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index bebd3c4c6338..3a7f8728f6ec 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -38,22 +38,6 @@
 #define RC_PROTO_BIT_RCMM32		BIT_ULL(RC_PROTO_RCMM32)
 #define RC_PROTO_BIT_XBOX_DVD		BIT_ULL(RC_PROTO_XBOX_DVD)
 
-#define RC_PROTO_BIT_ALL \
-			(RC_PROTO_BIT_UNKNOWN | RC_PROTO_BIT_OTHER | \
-			 RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \
-			 RC_PROTO_BIT_RC5_SZ | RC_PROTO_BIT_JVC | \
-			 RC_PROTO_BIT_SONY12 | RC_PROTO_BIT_SONY15 | \
-			 RC_PROTO_BIT_SONY20 | RC_PROTO_BIT_NEC | \
-			 RC_PROTO_BIT_NECX | RC_PROTO_BIT_NEC32 | \
-			 RC_PROTO_BIT_SANYO | \
-			 RC_PROTO_BIT_MCIR2_KBD | RC_PROTO_BIT_MCIR2_MSE | \
-			 RC_PROTO_BIT_RC6_0 | RC_PROTO_BIT_RC6_6A_20 | \
-			 RC_PROTO_BIT_RC6_6A_24 | RC_PROTO_BIT_RC6_6A_32 | \
-			 RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_SHARP | \
-			 RC_PROTO_BIT_XMP | RC_PROTO_BIT_CEC | \
-			 RC_PROTO_BIT_IMON | RC_PROTO_BIT_RCMM12 | \
-			 RC_PROTO_BIT_RCMM24 | RC_PROTO_BIT_RCMM32 | \
-			 RC_PROTO_BIT_XBOX_DVD)
 /* All rc protocols for which we have decoders */
 #define RC_PROTO_BIT_ALL_IR_DECODER \
 			(RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC5X_20 | \
-- 
cgit v1.2.3


From 66193b24514c91aeda88da744554b2665471aeae Mon Sep 17 00:00:00 2001
From: Jan Pieter van Woerkom <jp@jpvw.nl>
Date: Wed, 17 Jul 2019 14:09:10 -0400
Subject: media: dvbsky: add support for Mygica T230C v2

Adds support for the "Mygica T230C v2" to the dvbsky driver.

Signed-off-by: Jan Pieter van Woerkom <jp@jpvw.nl>
Tested-by: Frank Rysanek <Frantisek.Rysanek@post.cz>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/usb/dvb-usb-v2/dvbsky.c | 5 +++++
 include/media/dvb-usb-ids.h           | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/media/usb/dvb-usb-v2/dvbsky.c b/drivers/media/usb/dvb-usb-v2/dvbsky.c
index 8610487f2d72..bbfe1cfdc013 100644
--- a/drivers/media/usb/dvb-usb-v2/dvbsky.c
+++ b/drivers/media/usb/dvb-usb-v2/dvbsky.c
@@ -540,6 +540,8 @@ static int dvbsky_mygica_t230c_attach(struct dvb_usb_adapter *adap)
 	si2168_config.i2c_adapter = &i2c_adapter;
 	si2168_config.fe = &adap->fe[0];
 	si2168_config.ts_mode = SI2168_TS_PARALLEL;
+	if (le16_to_cpu(d->udev->descriptor.idProduct) == USB_PID_MYGICA_T230C2)
+		si2168_config.ts_mode |= SI2168_TS_CLK_MANUAL;
 	si2168_config.ts_clock_inv = 1;
 
 	state->i2c_client_demod = dvb_module_probe("si2168", NULL,
@@ -779,6 +781,9 @@ static const struct usb_device_id dvbsky_id_table[] = {
 	{ DVB_USB_DEVICE(USB_VID_CONEXANT, USB_PID_MYGICA_T230C,
 		&mygica_t230c_props, "MyGica Mini DVB-T2 USB Stick T230C",
 		RC_MAP_TOTAL_MEDIA_IN_HAND_02) },
+	{ DVB_USB_DEVICE(USB_VID_CONEXANT, USB_PID_MYGICA_T230C2,
+		&mygica_t230c_props, "MyGica Mini DVB-T2 USB Stick T230C v2",
+		RC_MAP_TOTAL_MEDIA_IN_HAND_02) },
 	{ }
 };
 MODULE_DEVICE_TABLE(usb, dvbsky_id_table);
diff --git a/include/media/dvb-usb-ids.h b/include/media/dvb-usb-ids.h
index 52875e3eee71..7ce4e8332421 100644
--- a/include/media/dvb-usb-ids.h
+++ b/include/media/dvb-usb-ids.h
@@ -388,6 +388,7 @@
 #define USB_PID_MYGICA_D689				0xd811
 #define USB_PID_MYGICA_T230				0xc688
 #define USB_PID_MYGICA_T230C				0xc689
+#define USB_PID_MYGICA_T230C2				0xc68a
 #define USB_PID_ELGATO_EYETV_DIVERSITY			0x0011
 #define USB_PID_ELGATO_EYETV_DTT			0x0021
 #define USB_PID_ELGATO_EYETV_DTT_2			0x003f
-- 
cgit v1.2.3


From cf949bbe22bee8749078e0b810ee2dc60a983746 Mon Sep 17 00:00:00 2001
From: Avri Altman <avri.altman@wdc.com>
Date: Wed, 12 Jun 2019 16:34:37 +0300
Subject: scsi: ufs: uapi: Fix SPDX license identifier

Added 'WITH Linux-syscall-note' exception which is the officially assigned
exception identifier for the kernel syscall exception.  This exception
makes it possible to include GPL headers into non GPL code without
confusing license compliance tools.

Fixes: a851b2bd3632 (scsi: uapi: ufs: Make utp_upiu_req visible to user space)
Signed-off-by: Avri Altman <avri.altman@wdc.com>
Reviewed-by: Pedro Sousa <pedrom.sousa@synopsys.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/uapi/scsi/scsi_bsg_ufs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/scsi/scsi_bsg_ufs.h b/include/uapi/scsi/scsi_bsg_ufs.h
index 17c7abd0803a..9988db6ad244 100644
--- a/include/uapi/scsi/scsi_bsg_ufs.h
+++ b/include/uapi/scsi/scsi_bsg_ufs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * UFS Transport SGIO v4 BSG Message Support
  *
-- 
cgit v1.2.3


From 8930a6c207918d5a5675eedab06a71096b1a3d47 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 30 May 2019 13:28:10 +0200
Subject: scsi: core: add support for request batching

This allows a list of requests to be issued, with the LLD only writing the
hardware doorbell when necessary, after the last request was prepared.
This is more efficient if we have lists of requests to issue, particularly
on virtualized hardware, where writing the doorbell is more expensive than
on real hardware.

The use case for this is plugged IO, where blk-mq flushes a batch of
requests all at once.

The API is the same as for blk-mq, just with blk-mq concepts tweaked to
fit the SCSI subsystem API: the "last" flag in blk_mq_queue_data becomes a
flag in scsi_cmnd, while the queue_num in the commit_rqs callback is
extracted from the hctx and passed as a parameter.

The only complication is that blk-mq uses different plugging heuristics
depending on whether commit_rqs is present or not.  So we have two
different sets of blk_mq_ops and pick one depending on whether the
scsi_host template uses commit_rqs or not.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c  | 37 ++++++++++++++++++++++++++++++++++---
 include/scsi/scsi_cmnd.h |  1 +
 include/scsi/scsi_host.h | 16 ++++++++++++++--
 3 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9381171c2fc0..c72bce2f0cf1 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1666,10 +1666,11 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		blk_mq_start_request(req);
 	}
 
+	cmd->flags &= SCMD_PRESERVED_FLAGS;
 	if (sdev->simple_tags)
 		cmd->flags |= SCMD_TAGGED;
-	else
-		cmd->flags &= ~SCMD_TAGGED;
+	if (bd->last)
+		cmd->flags |= SCMD_LAST;
 
 	scsi_init_cmd_errh(cmd);
 	cmd->scsi_done = scsi_mq_done;
@@ -1807,10 +1808,37 @@ void __scsi_init_queue(struct Scsi_Host *shost, struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(__scsi_init_queue);
 
+static const struct blk_mq_ops scsi_mq_ops_no_commit = {
+	.get_budget	= scsi_mq_get_budget,
+	.put_budget	= scsi_mq_put_budget,
+	.queue_rq	= scsi_queue_rq,
+	.complete	= scsi_softirq_done,
+	.timeout	= scsi_timeout,
+#ifdef CONFIG_BLK_DEBUG_FS
+	.show_rq	= scsi_show_rq,
+#endif
+	.init_request	= scsi_mq_init_request,
+	.exit_request	= scsi_mq_exit_request,
+	.initialize_rq_fn = scsi_initialize_rq,
+	.busy		= scsi_mq_lld_busy,
+	.map_queues	= scsi_map_queues,
+};
+
+
+static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+	struct scsi_device *sdev = q->queuedata;
+	struct Scsi_Host *shost = sdev->host;
+
+	shost->hostt->commit_rqs(shost, hctx->queue_num);
+}
+
 static const struct blk_mq_ops scsi_mq_ops = {
 	.get_budget	= scsi_mq_get_budget,
 	.put_budget	= scsi_mq_put_budget,
 	.queue_rq	= scsi_queue_rq,
+	.commit_rqs	= scsi_commit_rqs,
 	.complete	= scsi_softirq_done,
 	.timeout	= scsi_timeout,
 #ifdef CONFIG_BLK_DEBUG_FS
@@ -1846,7 +1874,10 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
 			sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT;
 
 	memset(&shost->tag_set, 0, sizeof(shost->tag_set));
-	shost->tag_set.ops = &scsi_mq_ops;
+	if (shost->hostt->commit_rqs)
+		shost->tag_set.ops = &scsi_mq_ops;
+	else
+		shost->tag_set.ops = &scsi_mq_ops_no_commit;
 	shost->tag_set.nr_hw_queues = shost->nr_hw_queues ? : 1;
 	shost->tag_set.queue_depth = shost->can_queue;
 	shost->tag_set.cmd_size = cmd_size;
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 76ed5e4acd38..91bd749a02f7 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -57,6 +57,7 @@ struct scsi_pointer {
 #define SCMD_TAGGED		(1 << 0)
 #define SCMD_UNCHECKED_ISA_DMA	(1 << 1)
 #define SCMD_INITIALIZED	(1 << 2)
+#define SCMD_LAST		(1 << 3)
 /* flags preserved across unprep / reprep */
 #define SCMD_PRESERVED_FLAGS	(SCMD_UNCHECKED_ISA_DMA | SCMD_INITIALIZED)
 
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index cc139dbd71e5..31e0d6ca1eba 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -80,8 +80,10 @@ struct scsi_host_template {
 	 * command block to the LLDD.  When the driver finished
 	 * processing the command the done callback is invoked.
 	 *
-	 * If queuecommand returns 0, then the HBA has accepted the
-	 * command.  The done() function must be called on the command
+	 * If queuecommand returns 0, then the driver has accepted the
+	 * command.  It must also push it to the HBA if the scsi_cmnd
+	 * flag SCMD_LAST is set, or if the driver does not implement
+	 * commit_rqs.  The done() function must be called on the command
 	 * when the driver has finished with it. (you may call done on the
 	 * command before queuecommand returns, but in this case you
 	 * *must* return 0 from queuecommand).
@@ -109,6 +111,16 @@ struct scsi_host_template {
 	 */
 	int (* queuecommand)(struct Scsi_Host *, struct scsi_cmnd *);
 
+	/*
+	 * The commit_rqs function is used to trigger a hardware
+	 * doorbell after some requests have been queued with
+	 * queuecommand, when an error is encountered before sending
+	 * the request with SCMD_LAST set.
+	 *
+	 * STATUS: OPTIONAL
+	 */
+	void (*commit_rqs)(struct Scsi_Host *, u16);
+
 	/*
 	 * This is an error handling strategy routine.  You don't need to
 	 * define one of these if you don't want to - there is a default
-- 
cgit v1.2.3


From 6ee82ef04e38b0d8b09b04bc1b068673deed6582 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Mon, 1 Jul 2019 13:46:51 +0200
Subject: clk: Add missing documentation of devm_clk_bulk_get_optional()
 argument

Fix an incomplete devm_clk_bulk_get_optional() function documentation
by adding description of the num_clks argument as in other *clk_bulk*
functions.

Fixes: 9bd5ef0bd874 ("clk: Add devm_clk_bulk_get_optional() function")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 3c096c7a51dc..853a8f181394 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -359,6 +359,7 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 /**
  * devm_clk_bulk_get_optional - managed get multiple optional consumer clocks
  * @dev: device for clock "consumer"
+ * @num_clks: the number of clk_bulk_data
  * @clks: pointer to the clk_bulk_data table of consumer
  *
  * Behaves the same as devm_clk_bulk_get() except where there is no clock
-- 
cgit v1.2.3


From af311ff9a69189a03548efd5a47d4bb44644fd45 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 17 May 2019 14:09:22 -0700
Subject: firmware: qcom_scm: Cleanup code in qcom_scm_assign_mem()

There are some questionable coding styles in this function. It looks
quite odd to deref a pointer with array indexing that only uses the
first element. Also, destroying an input/output variable halfway through
the function and then overwriting it on success is not clear. It's
better to use a local variable and the kernel macros to step through
each bit set in a bitmask and clearly show where outputs are set.

Cc: Ian Jackson <ian.jackson@citrix.com>
Cc: Julien Grall <julien.grall@arm.com>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Avaneesh Kumar Dwivedi <akdwived@codeaurora.org>
Tested-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
[bjorn: Changed for_each_set_bit() size to BITS_PER_LONG]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/firmware/qcom_scm.c | 34 ++++++++++++++++------------------
 include/linux/qcom_scm.h    |  9 +++++----
 2 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c
index c7e63af91567..4802ab170fe5 100644
--- a/drivers/firmware/qcom_scm.c
+++ b/drivers/firmware/qcom_scm.c
@@ -434,7 +434,8 @@ EXPORT_SYMBOL(qcom_scm_set_remote_state);
  */
 int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
 			unsigned int *srcvm,
-			struct qcom_scm_vmperm *newvm, int dest_cnt)
+			const struct qcom_scm_vmperm *newvm,
+			unsigned int dest_cnt)
 {
 	struct qcom_scm_current_perm_info *destvm;
 	struct qcom_scm_mem_map_info *mem_to_map;
@@ -449,11 +450,10 @@ int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
 	int next_vm;
 	__le32 *src;
 	void *ptr;
-	int ret;
-	int len;
-	int i;
+	int ret, i, b;
+	unsigned long srcvm_bits = *srcvm;
 
-	src_sz = hweight_long(*srcvm) * sizeof(*src);
+	src_sz = hweight_long(srcvm_bits) * sizeof(*src);
 	mem_to_map_sz = sizeof(*mem_to_map);
 	dest_sz = dest_cnt * sizeof(*destvm);
 	ptr_sz = ALIGN(src_sz, SZ_64) + ALIGN(mem_to_map_sz, SZ_64) +
@@ -466,28 +466,26 @@ int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
 
 	/* Fill source vmid detail */
 	src = ptr;
-	len = hweight_long(*srcvm);
-	for (i = 0; i < len; i++) {
-		src[i] = cpu_to_le32(ffs(*srcvm) - 1);
-		*srcvm ^= 1 << (ffs(*srcvm) - 1);
-	}
+	i = 0;
+	for_each_set_bit(b, &srcvm_bits, BITS_PER_LONG)
+		src[i++] = cpu_to_le32(b);
 
 	/* Fill details of mem buff to map */
 	mem_to_map = ptr + ALIGN(src_sz, SZ_64);
 	mem_to_map_phys = ptr_phys + ALIGN(src_sz, SZ_64);
-	mem_to_map[0].mem_addr = cpu_to_le64(mem_addr);
-	mem_to_map[0].mem_size = cpu_to_le64(mem_sz);
+	mem_to_map->mem_addr = cpu_to_le64(mem_addr);
+	mem_to_map->mem_size = cpu_to_le64(mem_sz);
 
 	next_vm = 0;
 	/* Fill details of next vmid detail */
 	destvm = ptr + ALIGN(mem_to_map_sz, SZ_64) + ALIGN(src_sz, SZ_64);
 	dest_phys = ptr_phys + ALIGN(mem_to_map_sz, SZ_64) + ALIGN(src_sz, SZ_64);
-	for (i = 0; i < dest_cnt; i++) {
-		destvm[i].vmid = cpu_to_le32(newvm[i].vmid);
-		destvm[i].perm = cpu_to_le32(newvm[i].perm);
-		destvm[i].ctx = 0;
-		destvm[i].ctx_size = 0;
-		next_vm |= BIT(newvm[i].vmid);
+	for (i = 0; i < dest_cnt; i++, destvm++, newvm++) {
+		destvm->vmid = cpu_to_le32(newvm->vmid);
+		destvm->perm = cpu_to_le32(newvm->perm);
+		destvm->ctx = 0;
+		destvm->ctx_size = 0;
+		next_vm |= BIT(newvm->vmid);
 	}
 
 	ret = __qcom_scm_assign_mem(__scm->dev, mem_to_map_phys, mem_to_map_sz,
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 3f12cc77fb58..2d5eff506e13 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -49,8 +49,9 @@ extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
 extern int qcom_scm_pas_auth_and_reset(u32 peripheral);
 extern int qcom_scm_pas_shutdown(u32 peripheral);
 extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
-			       unsigned int *src, struct qcom_scm_vmperm *newvm,
-			       int dest_cnt);
+			       unsigned int *src,
+			       const struct qcom_scm_vmperm *newvm,
+			       unsigned int dest_cnt);
 extern void qcom_scm_cpu_power_down(u32 flags);
 extern u32 qcom_scm_get_version(void);
 extern int qcom_scm_set_remote_state(u32 state, u32 id);
@@ -87,8 +88,8 @@ qcom_scm_pas_auth_and_reset(u32 peripheral) { return -ENODEV; }
 static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
 static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
 				      unsigned int *src,
-				      struct qcom_scm_vmperm *newvm,
-				      int dest_cnt) { return -ENODEV; }
+				      const struct qcom_scm_vmperm *newvm,
+				      unsigned int dest_cnt) { return -ENODEV; }
 static inline void qcom_scm_cpu_power_down(u32 flags) {}
 static inline u32 qcom_scm_get_version(void) { return 0; }
 static inline u32
-- 
cgit v1.2.3


From d8e18a516f8f67404c0d21af8c93d0474fba0876 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:26 -0700
Subject: net: Use skb accessors in network core

In preparation for unifying the skb_frag and bio_vec, use the fine
accessors which already exist and use skb_frag_t instead of
struct skb_frag_struct.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/core/skbuff.c      | 24 ++++++++++++++----------
 net/core/tso.c         |  8 ++++----
 net/ipv4/tcp.c         | 14 ++++++++------
 net/kcm/kcmsock.c      |  8 ++++----
 net/tls/tls_device.c   | 14 +++++++-------
 6 files changed, 38 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8af86d995d6..f9078e7edb53 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3166,7 +3166,7 @@ static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 	if (skb_zcopy(skb))
 		return false;
 	if (i) {
-		const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		return page == skb_frag_page(frag) &&
 		       off == frag->page_offset + skb_frag_size(frag);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0338820ee0ec..ba9a36903503 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2485,19 +2485,19 @@ do_frag_list:
 	for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
 		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
 
-		if (offset < frag->size)
+		if (offset < skb_frag_size(frag))
 			break;
 
-		offset -= frag->size;
+		offset -= skb_frag_size(frag);
 	}
 
 	for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) {
 		skb_frag_t *frag  = &skb_shinfo(skb)->frags[fragidx];
 
-		slen = min_t(size_t, len, frag->size - offset);
+		slen = min_t(size_t, len, skb_frag_size(frag) - offset);
 
 		while (slen) {
-			ret = kernel_sendpage_locked(sk, frag->page.p,
+			ret = kernel_sendpage_locked(sk, skb_frag_page(frag),
 						     frag->page_offset + offset,
 						     slen, MSG_DONTWAIT);
 			if (ret <= 0)
@@ -2975,11 +2975,15 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
 	skb_zerocopy_clone(to, from, GFP_ATOMIC);
 
 	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
+		int size;
+
 		if (!len)
 			break;
 		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
-		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
-		len -= skb_shinfo(to)->frags[j].size;
+		size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]),
+					len);
+		skb_frag_size_set(&skb_shinfo(to)->frags[j], size);
+		len -= size;
 		skb_frag_ref(to, j);
 		j++;
 	}
@@ -3293,7 +3297,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb)
 int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 {
 	int from, to, merge, todo;
-	struct skb_frag_struct *fragfrom, *fragto;
+	skb_frag_t *fragfrom, *fragto;
 
 	BUG_ON(shiftlen > skb->len);
 
@@ -3625,10 +3629,10 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
 	struct page *page;
 
 	page = virt_to_head_page(frag_skb->head);
-	head_frag.page.p = page;
+	__skb_frag_set_page(&head_frag, page);
 	head_frag.page_offset = frag_skb->data -
 		(unsigned char *)page_address(page);
-	head_frag.size = skb_headlen(frag_skb);
+	skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
 	return head_frag;
 }
 
@@ -4021,7 +4025,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
 
 		pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
 
-		frag->page.p	  = page;
+		__skb_frag_set_page(frag, page);
 		frag->page_offset = first_offset;
 		skb_frag_size_set(frag, first_size);
 
diff --git a/net/core/tso.c b/net/core/tso.c
index 43f4eba61933..d4d5c077ad72 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -55,8 +55,8 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size)
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
 
 		/* Move to next segment */
-		tso->size = frag->size;
-		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->size = skb_frag_size(frag);
+		tso->data = skb_frag_address(frag);
 		tso->next_frag_idx++;
 	}
 }
@@ -79,8 +79,8 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso)
 		skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx];
 
 		/* Move to next segment */
-		tso->size = frag->size;
-		tso->data = page_address(frag->page.p) + frag->page_offset;
+		tso->size = skb_frag_size(frag);
+		tso->data = skb_frag_address(frag);
 		tso->next_frag_idx++;
 	}
 }
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 776905899ac0..f62f0e7e3cdd 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1776,19 +1776,21 @@ static int tcp_zerocopy_receive(struct sock *sk,
 				break;
 			frags = skb_shinfo(skb)->frags;
 			while (offset) {
-				if (frags->size > offset)
+				if (skb_frag_size(frags) > offset)
 					goto out;
-				offset -= frags->size;
+				offset -= skb_frag_size(frags);
 				frags++;
 			}
 		}
-		if (frags->size != PAGE_SIZE || frags->page_offset) {
+		if (skb_frag_size(frags) != PAGE_SIZE || frags->page_offset) {
 			int remaining = zc->recv_skip_hint;
+			int size = skb_frag_size(frags);
 
-			while (remaining && (frags->size != PAGE_SIZE ||
+			while (remaining && (size != PAGE_SIZE ||
 					     frags->page_offset)) {
-				remaining -= frags->size;
+				remaining -= size;
 				frags++;
+				size = skb_frag_size(frags);
 			}
 			zc->recv_skip_hint -= remaining;
 			break;
@@ -3781,7 +3783,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 		return 1;
 
 	for (i = 0; i < shi->nr_frags; ++i) {
-		const struct skb_frag_struct *f = &shi->frags[i];
+		const skb_frag_t *f = &shi->frags[i];
 		unsigned int offset = f->page_offset;
 		struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
 
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 5dbc0c48f8cb..05f63c4300e9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -635,15 +635,15 @@ do_frag_list:
 			frag_offset = 0;
 do_frag:
 			frag = &skb_shinfo(skb)->frags[fragidx];
-			if (WARN_ON(!frag->size)) {
+			if (WARN_ON(!skb_frag_size(frag))) {
 				ret = -EINVAL;
 				goto out;
 			}
 
 			ret = kernel_sendpage(psock->sk->sk_socket,
-					      frag->page.p,
+					      skb_frag_page(frag),
 					      frag->page_offset + frag_offset,
-					      frag->size - frag_offset,
+					      skb_frag_size(frag) - frag_offset,
 					      MSG_DONTWAIT);
 			if (ret <= 0) {
 				if (ret == -EAGAIN) {
@@ -678,7 +678,7 @@ do_frag:
 			sent += ret;
 			frag_offset += ret;
 			KCM_STATS_ADD(psock->stats.tx_bytes, ret);
-			if (frag_offset < frag->size) {
+			if (frag_offset < skb_frag_size(frag)) {
 				/* Not finished with this frag */
 				goto do_frag;
 			}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7c0b2b778703..4ec8a06fa5d1 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -243,14 +243,14 @@ static void tls_append_frag(struct tls_record_info *record,
 	skb_frag_t *frag;
 
 	frag = &record->frags[record->num_frags - 1];
-	if (frag->page.p == pfrag->page &&
-	    frag->page_offset + frag->size == pfrag->offset) {
-		frag->size += size;
+	if (skb_frag_page(frag) == pfrag->page &&
+	    frag->page_offset + skb_frag_size(frag) == pfrag->offset) {
+		skb_frag_size_add(frag, size);
 	} else {
 		++frag;
-		frag->page.p = pfrag->page;
+		__skb_frag_set_page(frag, pfrag->page);
 		frag->page_offset = pfrag->offset;
-		frag->size = size;
+		skb_frag_size_set(frag, size);
 		++record->num_frags;
 		get_page(pfrag->page);
 	}
@@ -301,8 +301,8 @@ static int tls_push_record(struct sock *sk,
 		frag = &record->frags[i];
 		sg_unmark_end(&offload_ctx->sg_tx_data[i]);
 		sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag),
-			    frag->size, frag->page_offset);
-		sk_mem_charge(sk, frag->size);
+			    skb_frag_size(frag), frag->page_offset);
+		sk_mem_charge(sk, skb_frag_size(frag));
 		get_page(skb_frag_page(frag));
 	}
 	sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]);
-- 
cgit v1.2.3


From b656722906efe434f0befe1d4ae4bb7a66fdc124 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:27 -0700
Subject: net: Increase the size of skb_frag_t

To increase commonality between block and net, we are going to replace
the skb_frag_t with the bio_vec.  This patch increases the size of
skb_frag_t on 32-bit machines from 8 bytes to 12 bytes.  The size is
unchanged on 64-bit machines.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f9078e7edb53..7910935410e6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,13 +314,8 @@ struct skb_frag_struct {
 	struct {
 		struct page *p;
 	} page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
 	__u32 page_offset;
 	__u32 size;
-#else
-	__u16 page_offset;
-	__u16 size;
-#endif
 };
 
 /**
-- 
cgit v1.2.3


From f58ecf1b7d58911921014ffd12c77a4ad33ade71 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:28 -0700
Subject: net: Reorder the contents of skb_frag_t

Match the layout of bio_vec.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7910935410e6..b9dc8b4f24b1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -314,8 +314,8 @@ struct skb_frag_struct {
 	struct {
 		struct page *p;
 	} page;
-	__u32 page_offset;
 	__u32 size;
+	__u32 page_offset;
 };
 
 /**
-- 
cgit v1.2.3


From 1dfa5bd38545c6f6a8b6c496e58db93f80da1076 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:29 -0700
Subject: net: Rename skb_frag page to bv_page

One step closer to turning the skb_frag_t into a bio_vec.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 +++++-------
 net/core/skbuff.c      |  2 +-
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b9dc8b4f24b1..8076e2ba8349 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -311,9 +311,7 @@ extern int sysctl_max_skb_frags;
 typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
-	struct {
-		struct page *p;
-	} page;
+	struct page *bv_page;
 	__u32 size;
 	__u32 page_offset;
 };
@@ -374,7 +372,7 @@ static inline bool skb_frag_must_loop(struct page *p)
  *	skb_frag_foreach_page - loop over pages in a fragment
  *
  *	@f:		skb frag to operate on
- *	@f_off:		offset from start of f->page.p
+ *	@f_off:		offset from start of f->bv_page
  *	@f_len:		length from f_off to loop over
  *	@p:		(temp var) current page
  *	@p_off:		(temp var) offset from start of current page,
@@ -2084,7 +2082,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 	 * that not all callers have unique ownership of the page but rely
 	 * on page_is_pfmemalloc doing the right thing(tm).
 	 */
-	frag->page.p		  = page;
+	frag->bv_page		  = page;
 	frag->page_offset	  = off;
 	skb_frag_size_set(frag, size);
 
@@ -2872,7 +2870,7 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
-	return frag->page.p;
+	return frag->bv_page;
 }
 
 /**
@@ -2958,7 +2956,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
  */
 static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page)
 {
-	frag->page.p = page;
+	frag->bv_page = page;
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ba9a36903503..0b788df5a75b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3364,7 +3364,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen)
 
 		} else {
 			__skb_frag_ref(fragfrom);
-			fragto->page = fragfrom->page;
+			fragto->bv_page = fragfrom->bv_page;
 			fragto->page_offset = fragfrom->page_offset;
 			skb_frag_size_set(fragto, todo);
 
-- 
cgit v1.2.3


From b8b576a16f79efbdde49348147f491b176537d88 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:30 -0700
Subject: net: Rename skb_frag_t size to bv_len

Improved compatibility with bvec

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8076e2ba8349..e849e411d1f3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -312,7 +312,7 @@ typedef struct skb_frag_struct skb_frag_t;
 
 struct skb_frag_struct {
 	struct page *bv_page;
-	__u32 size;
+	unsigned int bv_len;
 	__u32 page_offset;
 };
 
@@ -322,7 +322,7 @@ struct skb_frag_struct {
  */
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 {
-	return frag->size;
+	return frag->bv_len;
 }
 
 /**
@@ -332,7 +332,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
  */
 static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 {
-	frag->size = size;
+	frag->bv_len = size;
 }
 
 /**
@@ -342,7 +342,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
  */
 static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 {
-	frag->size += delta;
+	frag->bv_len += delta;
 }
 
 /**
@@ -352,7 +352,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
  */
 static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
 {
-	frag->size -= delta;
+	frag->bv_len -= delta;
 }
 
 /**
-- 
cgit v1.2.3


From 8842d285bafa9ff7719f4107b6545a11dcd41995 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 22 Jul 2019 20:08:31 -0700
Subject: net: Convert skb_frag_t to bio_vec

There are a lot of users of frag->page_offset, so use a union
to avoid converting those users today.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bvec.h   | 5 ++++-
 include/linux/skbuff.h | 9 ++-------
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index a032f01e928c..7f2b2ea9399c 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -18,7 +18,10 @@
 struct bio_vec {
 	struct page	*bv_page;
 	unsigned int	bv_len;
-	unsigned int	bv_offset;
+	union {
+		__u32		page_offset;
+		unsigned int	bv_offset;
+	};
 };
 
 struct bvec_iter {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e849e411d1f3..718742b1c505 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -14,6 +14,7 @@
 #include <linux/compiler.h>
 #include <linux/time.h>
 #include <linux/bug.h>
+#include <linux/bvec.h>
 #include <linux/cache.h>
 #include <linux/rbtree.h>
 #include <linux/socket.h>
@@ -308,13 +309,7 @@ extern int sysctl_max_skb_frags;
  */
 #define GSO_BY_FRAGS	0xFFFF
 
-typedef struct skb_frag_struct skb_frag_t;
-
-struct skb_frag_struct {
-	struct page *bv_page;
-	unsigned int bv_len;
-	__u32 page_offset;
-};
+typedef struct bio_vec skb_frag_t;
 
 /**
  * skb_frag_size - Returns the size of a skb fragment
-- 
cgit v1.2.3


From 3a79bc63d90750f737ab9d7219bd3091d2fd6d84 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Jul 2019 13:03:20 +0200
Subject: PCI: irq: Introduce rearm_wake_irq()

Introduce a new function, rearm_wake_irq(), allowing a wakeup IRQ
to be armed for systen wakeup detection again without running any
action handlers associated with it after it has been armed for
wakeup detection and triggered.

That is useful for IRQs, like ACPI SCI, that may deliver wakeup
as well as non-wakeup interrupts when armed for systen wakeup
detection.  In those cases, it may be possible to determine whether
or not the delivered interrupt is a systen wakeup one without
running the entire action handler (or handlers, if the IRQ is
shared) for the IRQ, and if the interrupt turns out to be a
non-wakeup one, the IRQ can be rearmed with the help of the
new function.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  1 +
 kernel/irq/pm.c           | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5b8328a99b2a..0e9cdb3efda7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -238,6 +238,7 @@ extern void teardown_percpu_nmi(unsigned int irq);
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
+extern void rearm_wake_irq(unsigned int irq);
 
 /**
  * struct irq_affinity_notify - context for notification of IRQ affinity changes
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index d6961d3c6f9e..8f557fa1f4fe 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -176,6 +176,26 @@ static void resume_irqs(bool want_early)
 	}
 }
 
+/**
+ * rearm_wake_irq - rearm a wakeup interrupt line after signaling wakeup
+ * @irq: Interrupt to rearm
+ */
+void rearm_wake_irq(unsigned int irq)
+{
+	unsigned long flags;
+	struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL);
+
+	if (!desc || !(desc->istate & IRQS_SUSPENDED) ||
+	    !irqd_is_wakeup_set(&desc->irq_data))
+		return;
+
+	desc->istate &= ~IRQS_SUSPENDED;
+	irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+	__enable_irq(desc);
+
+	irq_put_desc_busunlock(desc, flags);
+}
+
 /**
  * irq_pm_syscore_ops - enable interrupt lines early
  *
-- 
cgit v1.2.3


From 6921de898ba8f2ec91cfea70e7160b89c477382e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Jul 2019 13:03:28 +0200
Subject: ACPICA: Return u32 from acpi_dispatch_gpe()

In some cases it is useful to know whether or not the
acpi_ev_detect_gpe() called by acpi_dispatch_gpe() has found
the GPE to be active, so return the return value of it (whose
data type is u32) from latter.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/acpica/evxfgpe.c | 6 +++---
 include/acpi/acpixf.h         | 8 +++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 710488ec59e9..04a40d563dd6 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -644,17 +644,17 @@ ACPI_EXPORT_SYMBOL(acpi_get_gpe_status)
  * PARAMETERS:  gpe_device          - Parent GPE Device. NULL for GPE0/GPE1
  *              gpe_number          - GPE level within the GPE block
  *
- * RETURN:      None
+ * RETURN:      INTERRUPT_HANDLED or INTERRUPT_NOT_HANDLED
  *
  * DESCRIPTION: Detect and dispatch a General Purpose Event to either a function
  *              (e.g. EC) or method (e.g. _Lxx/_Exx) handler.
  *
  ******************************************************************************/
-void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)
+u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number)
 {
 	ACPI_FUNCTION_TRACE(acpi_dispatch_gpe);
 
-	acpi_ev_detect_gpe(gpe_device, NULL, gpe_number);
+	return acpi_ev_detect_gpe(gpe_device, NULL, gpe_number);
 }
 
 ACPI_EXPORT_SYMBOL(acpi_dispatch_gpe)
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3845c8fcc94e..4ed603a3b448 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -297,6 +297,9 @@ ACPI_GLOBAL(u8, acpi_gbl_system_awake_and_running);
 #define ACPI_HW_DEPENDENT_RETURN_OK(prototype) \
 	ACPI_EXTERNAL_RETURN_OK(prototype)
 
+#define ACPI_HW_DEPENDENT_RETURN_UINT32(prototype) \
+	ACPI_EXTERNAL_RETURN_UINT32(prototype)
+
 #define ACPI_HW_DEPENDENT_RETURN_VOID(prototype) \
 	ACPI_EXTERNAL_RETURN_VOID(prototype)
 
@@ -307,6 +310,9 @@ ACPI_GLOBAL(u8, acpi_gbl_system_awake_and_running);
 #define ACPI_HW_DEPENDENT_RETURN_OK(prototype) \
 	static ACPI_INLINE prototype {return(AE_OK);}
 
+#define ACPI_HW_DEPENDENT_RETURN_UINT32(prototype) \
+	static ACPI_INLINE prototype {return(0);}
+
 #define ACPI_HW_DEPENDENT_RETURN_VOID(prototype) \
 	static ACPI_INLINE prototype {return;}
 
@@ -738,7 +744,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
 						     u32 gpe_number,
 						     acpi_event_status
 						     *event_status))
-ACPI_HW_DEPENDENT_RETURN_VOID(void acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
+ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
-- 
cgit v1.2.3


From 56b991849009f5def0443bfb2f48c8321d888e15 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Jul 2019 23:52:03 +0200
Subject: PM: sleep: Simplify suspend-to-idle control flow

After commit 33e4f80ee69b ("ACPI / PM: Ignore spurious SCI wakeups
from suspend-to-idle") the "noirq" phases of device suspend and
resume may run for multiple times during suspend-to-idle, if there
are spurious system wakeup events while suspended.  However, this
is complicated and fragile and actually unnecessary.

The main reason for doing this is that on some systems the EC may
signal system wakeup events (power button events, for example) as
well as events that should not cause the system to resume (spurious
system wakeup events).  Thus, in order to determine whether or not
a given event signaled by the EC while suspended is a proper system
wakeup one, the EC GPE needs to be dispatched and to start with that
was achieved by allowing the ACPI SCI action handler to run, which
was only possible after calling resume_device_irqs().

However, dispatching the EC GPE this way turned out to take too much
time in some cases and some EC events might be missed due to that, so
commit 68e22011856f ("ACPI: EC: Dispatch the EC GPE directly on
s2idle wake") started to dispatch the EC GPE right after a wakeup
event has been detected, so in fact the full ACPI SCI action handler
doesn't need to run any more to deal with the wakeups coming from the
EC.

Use this observation to simplify the suspend-to-idle control flow
so that the "noirq" phases of device suspend and resume are each
run only once in every suspend-to-idle cycle, which is reported to
significantly reduce power drawn by some systems when suspended to
idle (by allowing them to reach a deep platform-wide low-power state
through the suspend-to-idle flow).  [What appears to happen is that
the "noirq" resume of devices after a spurious EC wakeup brings some
devices into a state in which they prevent the platform from reaching
the deep low-power state going forward, even after a subsequent
"noirq" suspend phase, and on some systems the EC triggers such
wakeups already when the "noirq" suspend of devices is running for
the first time in the given suspend/resume cycle, so the platform
cannot reach the deep low-power state at all.]

First, make acpi_s2idle_wake() use the acpi_ec_dispatch_gpe() return
value to determine whether or not the wakeup may have been triggered
by the EC (in which case the system wakeup is canceled and ACPI
events are processed in order to determine whether or not the event
is a proper system wakeup one) and use rearm_wake_irq() (introduced
by a previous change) in it to rearm the ACPI SCI for system wakeup
detection in case the system will remain suspended.

Second, drop acpi_s2idle_sync(), which is not needed any more, and
the corresponding global platform suspend-to-idle callback.

Next, drop the pm_wakeup_pending() check (which is an optimization
only) from __device_suspend_noirq() to prevent it from returning
errors on system wakeups occurring before the "noirq" phase of
device suspend is complete (as in the case of suspend-to-idle it is
not known whether or not these wakeups are suprious at that point),
in order to avoid having to carry out a "noirq" resume of devices
on a spurious system wakeup.

Finally, change the code flow in s2idle_loop() to (1) run the
"noirq" suspend of devices once before starting the loop, (2) check
for spurious EC wakeups (via the platform ->wake callback) for the
first time before calling s2idle_enter(), and (3) run the "noirq"
resume of devices once after leaving the loop.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/acpi/sleep.c      | 47 +++++++++++++++++++++--------------------
 drivers/base/power/main.c |  5 -----
 include/linux/suspend.h   |  1 -
 kernel/power/suspend.c    | 53 ++++++++++++++++++++---------------------------
 4 files changed, 48 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 3debe1a42655..970ae7c7a3f7 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -986,33 +986,37 @@ static void acpi_s2idle_wake(void)
 		lpi_check_constraints();
 
 	/*
-	 * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means
-	 * that the SCI has triggered while suspended, so cancel the wakeup in
-	 * case it has not been a wakeup event (the GPEs will be checked later).
+	 * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has
+	 * not triggered while suspended, so bail out.
 	 */
-	if (acpi_sci_irq_valid() &&
-	    !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) {
+	if (!acpi_sci_irq_valid() ||
+	    irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq)))
+		return;
+
+	/*
+	 * If there are EC events to process, the wakeup may be a spurious one
+	 * coming from the EC.
+	 */
+	if (acpi_ec_dispatch_gpe()) {
+		/*
+		 * Cancel the wakeup and process all pending events in case
+		 * there are any wakeup ones in there.
+		 *
+		 * Note that if any non-EC GPEs are active at this point, the
+		 * SCI will retrigger after the rearming below, so no events
+		 * should be missed by canceling the wakeup here.
+		 */
 		pm_system_cancel_wakeup();
 		/*
-		 * On some platforms with the LPS0 _DSM device noirq resume
-		 * takes too much time for EC wakeup events to survive, so look
-		 * for them now.
+		 * The EC driver uses the system workqueue and an additional
+		 * special one, so those need to be flushed too.
 		 */
-		acpi_ec_dispatch_gpe();
+		acpi_os_wait_events_complete(); /* synchronize EC GPE processing */
+		acpi_ec_flush_work();
+		acpi_os_wait_events_complete(); /* synchronize Notify handling */
 	}
-}
 
-static void acpi_s2idle_sync(void)
-{
-	/*
-	 * Process all pending events in case there are any wakeup ones.
-	 *
-	 * The EC driver uses the system workqueue and an additional special
-	 * one, so those need to be flushed too.
-	 */
-	acpi_os_wait_events_complete();	/* synchronize SCI IRQ handling */
-	acpi_ec_flush_work();
-	acpi_os_wait_events_complete();	/* synchronize Notify handling */
+	rearm_wake_irq(acpi_sci_irq);
 }
 
 static void acpi_s2idle_restore(void)
@@ -1044,7 +1048,6 @@ static const struct platform_s2idle_ops acpi_s2idle_ops = {
 	.begin = acpi_s2idle_begin,
 	.prepare = acpi_s2idle_prepare,
 	.wake = acpi_s2idle_wake,
-	.sync = acpi_s2idle_sync,
 	.restore = acpi_s2idle_restore,
 	.end = acpi_s2idle_end,
 };
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 7fb2c39bc725..f08332fab531 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1291,11 +1291,6 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 	if (async_error)
 		goto Complete;
 
-	if (pm_wakeup_pending()) {
-		async_error = -EBUSY;
-		goto Complete;
-	}
-
 	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 9c0ad1a3a727..66ce3871ed61 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -191,7 +191,6 @@ struct platform_s2idle_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
 	void (*wake)(void);
-	void (*sync)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index c874a7026e24..907b2be0372f 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -119,48 +119,41 @@ static void s2idle_enter(void)
 
 static void s2idle_loop(void)
 {
+	int error;
+
+	dpm_noirq_begin();
+	error = dpm_noirq_suspend_devices(PMSG_SUSPEND);
+	if (error)
+		goto resume;
+
 	pm_pr_dbg("suspend-to-idle\n");
 
+	/*
+	 * Suspend-to-idle equals:
+	 * frozen processes + suspended devices + idle processors.
+	 * Thus s2idle_enter() should be called right after all devices have
+	 * been suspended.
+	 *
+	 * Wakeups during the noirq suspend of devices may be spurious, so try
+	 * to avoid them upfront.
+	 */
 	for (;;) {
-		int error;
-
-		dpm_noirq_begin();
-
-		/*
-		 * Suspend-to-idle equals
-		 * frozen processes + suspended devices + idle processors.
-		 * Thus s2idle_enter() should be called right after
-		 * all devices have been suspended.
-		 *
-		 * Wakeups during the noirq suspend of devices may be spurious,
-		 * so prevent them from terminating the loop right away.
-		 */
-		error = dpm_noirq_suspend_devices(PMSG_SUSPEND);
-		if (!error)
-			s2idle_enter();
-		else if (error == -EBUSY && pm_wakeup_pending())
-			error = 0;
-
-		if (!error && s2idle_ops && s2idle_ops->wake)
+		if (s2idle_ops && s2idle_ops->wake)
 			s2idle_ops->wake();
 
-		dpm_noirq_resume_devices(PMSG_RESUME);
-
-		dpm_noirq_end();
-
-		if (error)
-			break;
-
-		if (s2idle_ops && s2idle_ops->sync)
-			s2idle_ops->sync();
-
 		if (pm_wakeup_pending())
 			break;
 
 		pm_wakeup_clear(false);
+
+		s2idle_enter();
 	}
 
 	pm_pr_dbg("resume from suspend-to-idle\n");
+
+resume:
+	dpm_noirq_resume_devices(PMSG_RESUME);
+	dpm_noirq_end();
 }
 
 void s2idle_wake(void)
-- 
cgit v1.2.3


From b605c44c30b59990e806f930c37bd288b9d901a5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Jul 2019 23:52:18 +0200
Subject: PM: sleep: Drop dpm_noirq_begin() and dpm_noirq_end()

Note that after previous changes dpm_noirq_begin() and
dpm_noirq_end() each have only one caller, so move the code from
them to their respective callers and drop them.

Also note that dpm_noirq_resume_devices() and
dpm_noirq_suspend_devices() need not be exported any more, so make
them both static.

This change is not expected to alter functionality.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/base/power/main.c | 30 ++++++++++++------------------
 include/linux/pm.h        |  4 ----
 2 files changed, 12 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index f08332fab531..134a8af51511 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -716,7 +716,7 @@ static void async_resume_noirq(void *data, async_cookie_t cookie)
 	put_device(dev);
 }
 
-void dpm_noirq_resume_devices(pm_message_t state)
+static void dpm_noirq_resume_devices(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
@@ -760,13 +760,6 @@ void dpm_noirq_resume_devices(pm_message_t state)
 	trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false);
 }
 
-void dpm_noirq_end(void)
-{
-	resume_device_irqs();
-	device_wakeup_disarm_wake_irqs();
-	cpuidle_resume();
-}
-
 /**
  * dpm_resume_noirq - Execute "noirq resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
@@ -777,7 +770,11 @@ void dpm_noirq_end(void)
 void dpm_resume_noirq(pm_message_t state)
 {
 	dpm_noirq_resume_devices(state);
-	dpm_noirq_end();
+
+	resume_device_irqs();
+	device_wakeup_disarm_wake_irqs();
+
+	cpuidle_resume();
 }
 
 static pm_callback_t dpm_subsys_resume_early_cb(struct device *dev,
@@ -1357,14 +1354,7 @@ static int device_suspend_noirq(struct device *dev)
 	return __device_suspend_noirq(dev, pm_transition, false);
 }
 
-void dpm_noirq_begin(void)
-{
-	cpuidle_pause();
-	device_wakeup_arm_wake_irqs();
-	suspend_device_irqs();
-}
-
-int dpm_noirq_suspend_devices(pm_message_t state)
+static int dpm_noirq_suspend_devices(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
@@ -1421,7 +1411,11 @@ int dpm_suspend_noirq(pm_message_t state)
 {
 	int ret;
 
-	dpm_noirq_begin();
+	cpuidle_pause();
+
+	device_wakeup_arm_wake_irqs();
+	suspend_device_irqs();
+
 	ret = dpm_noirq_suspend_devices(state);
 	if (ret)
 		dpm_resume_noirq(resume_event(state));
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3619a870eaa4..4c441be03079 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -712,8 +712,6 @@ struct dev_pm_domain {
 extern void device_pm_lock(void);
 extern void dpm_resume_start(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
-extern void dpm_noirq_resume_devices(pm_message_t state);
-extern void dpm_noirq_end(void);
 extern void dpm_resume_noirq(pm_message_t state);
 extern void dpm_resume_early(pm_message_t state);
 extern void dpm_resume(pm_message_t state);
@@ -722,8 +720,6 @@ extern void dpm_complete(pm_message_t state);
 extern void device_pm_unlock(void);
 extern int dpm_suspend_end(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
-extern void dpm_noirq_begin(void);
-extern int dpm_noirq_suspend_devices(pm_message_t state);
 extern int dpm_suspend_noirq(pm_message_t state);
 extern int dpm_suspend_late(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
-- 
cgit v1.2.3


From 201c1db90cd643282185a00770f12f95da330eca Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 23 Jul 2019 09:51:00 +0200
Subject: iommu/iova: Fix compilation error with !CONFIG_IOMMU_IOVA

The stub function for !CONFIG_IOMMU_IOVA needs to be
'static inline'.

Fixes: effa467870c76 ('iommu/vt-d: Don't queue_iova() if there is no flush queue')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/iova.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iova.h b/include/linux/iova.h
index cd0f1de901a8..a0637abffee8 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -236,7 +236,7 @@ static inline void init_iova_domain(struct iova_domain *iovad,
 {
 }
 
-bool has_iova_flush_queue(struct iova_domain *iovad)
+static inline bool has_iova_flush_queue(struct iova_domain *iovad)
 {
 	return false;
 }
-- 
cgit v1.2.3


From bca031e2c8aa22a978a2452bf959e27e9fa73dc7 Mon Sep 17 00:00:00 2001
From: Zenghui Yu <yuzenghui@huawei.com>
Date: Thu, 18 Jul 2019 08:15:10 +0000
Subject: KVM: arm/arm64: Introduce kvm_pmu_vcpu_init() to setup PMU counter
 index

We use "pmc->idx" and the "chained" bitmap to determine if the pmc is
chained, in kvm_pmu_pmc_is_chained().  But idx might be uninitialized
(and random) when we doing this decision, through a KVM_ARM_VCPU_INIT
ioctl -> kvm_pmu_vcpu_reset(). And the test_bit() against this random
idx will potentially hit a KASAN BUG [1].

In general, idx is the static property of a PMU counter that is not
expected to be modified across resets, as suggested by Julien.  It
looks more reasonable if we can setup the PMU counter idx for a vcpu
in its creation time. Introduce a new function - kvm_pmu_vcpu_init()
for this basic setup. Oh, and the KASAN BUG will get fixed this way.

[1] https://www.spinics.net/lists/kvm-arm/msg36700.html

Fixes: 80f393a23be6 ("KVM: arm/arm64: Support chained PMU counters")
Suggested-by: Andrew Murray <andrew.murray@arm.com>
Suggested-by: Julien Thierry <julien.thierry@arm.com>
Acked-by: Julien Thierry <julien.thierry@arm.com>
Signed-off-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_pmu.h |  2 ++
 virt/kvm/arm/arm.c    |  2 ++
 virt/kvm/arm/pmu.c    | 18 +++++++++++++++---
 3 files changed, 19 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 16c769a7f979..6db030439e29 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -34,6 +34,7 @@ struct kvm_pmu {
 u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx);
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
 u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
+void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
@@ -71,6 +72,7 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
 {
 	return 0;
 }
+static inline void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f645c0fbf7ec..c704fa696184 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -340,6 +340,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
 
+	kvm_pmu_vcpu_init(vcpu);
+
 	kvm_arm_reset_debug_ptr(vcpu);
 
 	return kvm_vgic_vcpu_init(vcpu);
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 3dd8238ed246..362a01886bab 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -214,6 +214,20 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 	kvm_pmu_release_perf_event(pmc);
 }
 
+/**
+ * kvm_pmu_vcpu_init - assign pmu counter idx for cpu
+ * @vcpu: The vcpu pointer
+ *
+ */
+void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	int i;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+
+	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
+		pmu->pmc[i].idx = i;
+}
+
 /**
  * kvm_pmu_vcpu_reset - reset pmu state for cpu
  * @vcpu: The vcpu pointer
@@ -224,10 +238,8 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 	int i;
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 
-	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
 		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
-		pmu->pmc[i].idx = i;
-	}
 
 	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
 }
-- 
cgit v1.2.3


From 4475f8c4ab7b248991a60d9c02808dbb813d6be8 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Mon, 22 Jul 2019 10:24:33 +0100
Subject: ALSA: compress: Fix regression on compressed capture streams

A previous fix to the stop handling on compressed capture streams causes
some knock on issues. The previous fix updated snd_compr_drain_notify to
set the state back to PREPARED for capture streams. This causes some
issues however as the handling for snd_compr_poll differs between the
two states and some user-space applications were relying on the poll
failing after the stream had been stopped.

To correct this regression whilst still fixing the original problem the
patch was addressing, update the capture handling to skip the PREPARED
state rather than skipping the SETUP state as it has done until now.

Fixes: 4f2ab5e1d13d ("ALSA: compress: Fix stop handling on compressed capture streams")
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Acked-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/compress_driver.h |  5 +----
 sound/core/compress_offload.c   | 16 +++++++++++-----
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/sound/compress_driver.h b/include/sound/compress_driver.h
index c5188ff724d1..bc88d6f964da 100644
--- a/include/sound/compress_driver.h
+++ b/include/sound/compress_driver.h
@@ -173,10 +173,7 @@ static inline void snd_compr_drain_notify(struct snd_compr_stream *stream)
 	if (snd_BUG_ON(!stream))
 		return;
 
-	if (stream->direction == SND_COMPRESS_PLAYBACK)
-		stream->runtime->state = SNDRV_PCM_STATE_SETUP;
-	else
-		stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+	stream->runtime->state = SNDRV_PCM_STATE_SETUP;
 
 	wake_up(&stream->runtime->sleep);
 }
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 99b882158705..d79aee6b9edd 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -574,10 +574,7 @@ snd_compr_set_params(struct snd_compr_stream *stream, unsigned long arg)
 		stream->metadata_set = false;
 		stream->next_track = false;
 
-		if (stream->direction == SND_COMPRESS_PLAYBACK)
-			stream->runtime->state = SNDRV_PCM_STATE_SETUP;
-		else
-			stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+		stream->runtime->state = SNDRV_PCM_STATE_SETUP;
 	} else {
 		return -EPERM;
 	}
@@ -693,8 +690,17 @@ static int snd_compr_start(struct snd_compr_stream *stream)
 {
 	int retval;
 
-	if (stream->runtime->state != SNDRV_PCM_STATE_PREPARED)
+	switch (stream->runtime->state) {
+	case SNDRV_PCM_STATE_SETUP:
+		if (stream->direction != SND_COMPRESS_CAPTURE)
+			return -EPERM;
+		break;
+	case SNDRV_PCM_STATE_PREPARED:
+		break;
+	default:
 		return -EPERM;
+	}
+
 	retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_START);
 	if (!retval)
 		stream->runtime->state = SNDRV_PCM_STATE_RUNNING;
-- 
cgit v1.2.3


From 6298b78742be6593d372ed1b5bfa5397e1393595 Mon Sep 17 00:00:00 2001
From: Janusz Jankowski <janusz.jankowski@linux.intel.com>
Date: Mon, 22 Jul 2019 09:14:02 -0500
Subject: ASoC: SOF: Intel: ssp: BCLK delay parameter

Some codecs require BCLK to be on for some time, before sending
any data. SOF can enable BCLK and then wait for guaranteed time,
before starting DMA on SSP start.

Signed-off-by: Janusz Jankowski <janusz.jankowski@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190722141402.7194-22-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai-intel.h   | 3 +++
 include/uapi/sound/sof/abi.h    | 2 +-
 include/uapi/sound/sof/tokens.h | 1 +
 sound/soc/sof/topology.c        | 3 +++
 4 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/sof/dai-intel.h b/include/sound/sof/dai-intel.h
index 4bb8ee138ba7..a81afd3fbd41 100644
--- a/include/sound/sof/dai-intel.h
+++ b/include/sound/sof/dai-intel.h
@@ -76,6 +76,9 @@ struct sof_ipc_dai_ssp_params {
 	uint16_t tdm_per_slot_padding_flag;
 	uint32_t clks_control;
 	uint32_t quirks;
+	uint32_t bclk_delay;	/* guaranteed time (ms) for which BCLK
+				 * will be driven, before sending data
+				 */
 } __packed;
 
 /* HDA Configuration Request - SOF_IPC_DAI_HDA_CONFIG */
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index 4a9c24434f42..dff70a42445a 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -26,7 +26,7 @@
 
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
-#define SOF_ABI_MINOR 8
+#define SOF_ABI_MINOR 9
 #define SOF_ABI_PATCH 0
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index dc1b27daaac6..6435240cef13 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -75,6 +75,7 @@
 #define SOF_TKN_INTEL_SSP_FRAME_PULSE_WIDTH	503
 #define SOF_TKN_INTEL_SSP_QUIRKS		504
 #define SOF_TKN_INTEL_SSP_TDM_PADDING_PER_SLOT	505
+#define SOF_TKN_INTEL_SSP_BCLK_DELAY		506
 
 /* DMIC */
 #define SOF_TKN_INTEL_DMIC_DRIVER_VERSION	600
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 432ae343f960..12b7d900b9c2 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -748,6 +748,9 @@ static const struct sof_topology_token ssp_tokens[] = {
 		get_token_u16,
 		offsetof(struct sof_ipc_dai_ssp_params,
 			 tdm_per_slot_padding_flag), 0},
+	{SOF_TKN_INTEL_SSP_BCLK_DELAY, SND_SOC_TPLG_TUPLE_TYPE_WORD,
+		get_token_u32,
+		offsetof(struct sof_ipc_dai_ssp_params, bclk_delay), 0},
 
 };
 
-- 
cgit v1.2.3


From 967b109096b2c2b7ef45a3fce44908efe05779a9 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 23 Jul 2019 10:07:40 +1000
Subject: media: uapi: new file needs types.h

Today's linux-next build (x86_64 allmodconfig) failed like this:

	include/media/vp8-ctrls.h:25:2: error: unknown type name '__s8'
	  __s8 quant_update[4];
	  ^~~~
	...
	include/media/vp8-ctrls.h:107:2: error: unknown type name '__u64'
	  __u64 flags;
	  ^~~~~

Caused by commit a57d6acaf352 ("media: uapi: Add VP8 stateless decoder API")

Fixes: a57d6acaf352 ("media: uapi: Add VP8 stateless decoder API")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/vp8-ctrls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/vp8-ctrls.h b/include/media/vp8-ctrls.h
index 6cc2eeea4c90..53cba826e482 100644
--- a/include/media/vp8-ctrls.h
+++ b/include/media/vp8-ctrls.h
@@ -11,6 +11,8 @@
 #ifndef _VP8_CTRLS_H_
 #define _VP8_CTRLS_H_
 
+#include <linux/types.h>
+
 #define V4L2_PIX_FMT_VP8_FRAME v4l2_fourcc('V', 'P', '8', 'F')
 
 #define V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER (V4L2_CID_MPEG_BASE + 2000)
-- 
cgit v1.2.3


From 327fe1d42b83f8a06b33ba30159582b49af5fc8e Mon Sep 17 00:00:00 2001
From: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Date: Tue, 23 Jul 2019 00:27:41 -0300
Subject: block: blk-mq: Remove blk_mq_sched_started_request and
 started_request

blk_mq_sched_completed_request is a function that checks if the elevator
related to the request has started_request implemented, but currently, none of
the available IO schedulers implement started_request, so remove both.

Signed-off-by: Marcos Paulo de Souza <marcos.souza.org@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.h     | 9 ---------
 block/blk-mq.c           | 2 --
 include/linux/elevator.h | 1 -
 3 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index cf22ab00fefb..126021fc3a11 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -61,15 +61,6 @@ static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 		e->type->ops.completed_request(rq, now);
 }
 
-static inline void blk_mq_sched_started_request(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.started_request)
-		e->type->ops.started_request(rq);
-}
-
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2bc2c0705660..f78d3287dd82 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -669,8 +669,6 @@ void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
-	blk_mq_sched_started_request(rq);
-
 	trace_block_rq_issue(q, rq);
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 17cd0078377c..1dd014c9c87b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -45,7 +45,6 @@ struct elevator_mq_ops {
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
 	void (*completed_request)(struct request *, u64);
-	void (*started_request)(struct request *);
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
 	struct request *(*next_request)(struct request_queue *, struct request *);
-- 
cgit v1.2.3


From 06532750010e06dd4b6d69983773677df7fc5291 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Mon, 22 Jul 2019 18:51:49 +0200
Subject: dma-mapping: use dma_get_mask in dma_addressing_limited

We currently have cases where the dma_addressing_limited() gets
called with dma_mask unset. This causes a NULL pointer dereference.

Use dma_get_mask() accessor to prevent the crash.

Fixes: b866455423e0 ("dma-mapping: add a dma_addressing_limited helper")
Signed-off-by: Eric Auger <eric.auger@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index e11b115dd0e4..f7d1eea32c78 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -689,8 +689,8 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
  */
 static inline bool dma_addressing_limited(struct device *dev)
 {
-	return min_not_zero(*dev->dma_mask, dev->bus_dma_mask) <
-		dma_get_required_mask(dev);
+	return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) <
+			    dma_get_required_mask(dev);
 }
 
 #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
-- 
cgit v1.2.3


From aa6166c2ac28392d64f2d8b3acfb56c8fe657147 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:04 +0900
Subject: ASoC: soc-dai: mv soc_dai_hw_params() to soc-dai

Sometimes ALSA SoC naming is very random.
Current soc_dai_hw_params() should use snd_soc_dai_xxx() style.
And then, 1st parameter should be dai. Otherwise it is confusable.
 - soc_dai_hw_params(..., dai);
 + snd_soc_dai_hw_params(dai, ...);

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87zhl6hn5b.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  4 ++++
 include/sound/soc.h     |  4 ----
 sound/soc/soc-dai.c     | 30 ++++++++++++++++++++++++++++++
 sound/soc/soc-dapm.c    |  4 ++--
 sound/soc/soc-pcm.c     | 35 +++--------------------------------
 5 files changed, 39 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index f5d70041108f..3773262a1b77 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -145,6 +145,10 @@ int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai,
 
 int snd_soc_dai_is_dummy(struct snd_soc_dai *dai);
 
+int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
+			  struct snd_pcm_substream *substream,
+			  struct snd_pcm_hw_params *params);
+
 struct snd_soc_dai_ops {
 	/*
 	 * DAI clocking configuration, all optional.
diff --git a/include/sound/soc.h b/include/sound/soc.h
index 4e8071269639..d770606732cd 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -505,10 +505,6 @@ int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms);
 int snd_soc_set_runtime_hwparams(struct snd_pcm_substream *substream,
 	const struct snd_pcm_hardware *hw);
 
-int soc_dai_hw_params(struct snd_pcm_substream *substream,
-		      struct snd_pcm_hw_params *params,
-		      struct snd_soc_dai *dai);
-
 /* Jack reporting */
 int snd_soc_card_jack_new(struct snd_soc_card *card, const char *id, int type,
 	struct snd_soc_jack *jack, struct snd_soc_jack_pin *pins,
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index a1009ead40de..f883d27d136f 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -252,3 +252,33 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute,
 		return -ENOTSUPP;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dai_digital_mute);
+
+int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
+			  struct snd_pcm_substream *substream,
+			  struct snd_pcm_hw_params *params)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	int ret;
+
+	/* perform any topology hw_params fixups before DAI  */
+	if (rtd->dai_link->be_hw_params_fixup) {
+		ret = rtd->dai_link->be_hw_params_fixup(rtd, params);
+		if (ret < 0) {
+			dev_err(rtd->dev,
+				"ASoC: hw_params topology fixup failed %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	if (dai->driver->ops->hw_params) {
+		ret = dai->driver->ops->hw_params(substream, params, dai);
+		if (ret < 0) {
+			dev_err(dai->dev, "ASoC: can't set %s hw params: %d\n",
+				dai->name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index f013b24c050a..8fc6a01f5d8b 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3839,7 +3839,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 				}
 			}
 			source->active++;
-			ret = soc_dai_hw_params(&substream, params, source);
+			ret = snd_soc_dai_hw_params(source, &substream, params);
 			if (ret < 0)
 				goto out;
 
@@ -3861,7 +3861,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 				}
 			}
 			sink->active++;
-			ret = soc_dai_hw_params(&substream, params, sink);
+			ret = snd_soc_dai_hw_params(sink, &substream, params);
 			if (ret < 0)
 				goto out;
 
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 4878d22ebd8c..420cc94e0a46 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -877,36 +877,6 @@ static void soc_pcm_codec_params_fixup(struct snd_pcm_hw_params *params,
 	interval->max = channels;
 }
 
-int soc_dai_hw_params(struct snd_pcm_substream *substream,
-		      struct snd_pcm_hw_params *params,
-		      struct snd_soc_dai *dai)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	int ret;
-
-	/* perform any topology hw_params fixups before DAI  */
-	if (rtd->dai_link->be_hw_params_fixup) {
-		ret = rtd->dai_link->be_hw_params_fixup(rtd, params);
-		if (ret < 0) {
-			dev_err(rtd->dev,
-				"ASoC: hw_params topology fixup failed %d\n",
-				ret);
-			return ret;
-		}
-	}
-
-	if (dai->driver->ops->hw_params) {
-		ret = dai->driver->ops->hw_params(substream, params, dai);
-		if (ret < 0) {
-			dev_err(dai->dev, "ASoC: can't set %s hw params: %d\n",
-				dai->name, ret);
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
 static int soc_pcm_components_hw_free(struct snd_pcm_substream *substream,
 				      struct snd_soc_component *last)
 {
@@ -989,7 +959,8 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 			soc_pcm_codec_params_fixup(&codec_params,
 						   codec_dai->rx_mask);
 
-		ret = soc_dai_hw_params(substream, &codec_params, codec_dai);
+		ret = snd_soc_dai_hw_params(codec_dai, substream,
+					    &codec_params);
 		if(ret < 0)
 			goto codec_err;
 
@@ -1001,7 +972,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 		snd_soc_dapm_update_dai(substream, &codec_params, codec_dai);
 	}
 
-	ret = soc_dai_hw_params(substream, params, cpu_dai);
+	ret = snd_soc_dai_hw_params(cpu_dai, substream, params);
 	if (ret < 0)
 		goto interface_err;
 
-- 
cgit v1.2.3


From 846faaed9df7899e74311db3aec0a41a2f6bc345 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:19 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_hw_free()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_hw_free() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87y30qhn4w.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     |  7 +++++++
 sound/soc/soc-dapm.c    |  7 ++-----
 sound/soc/soc-pcm.c     | 12 ++++--------
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 3773262a1b77..5222b6a758f2 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -148,6 +148,8 @@ int snd_soc_dai_is_dummy(struct snd_soc_dai *dai);
 int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
 			  struct snd_pcm_substream *substream,
 			  struct snd_pcm_hw_params *params);
+void snd_soc_dai_hw_free(struct snd_soc_dai *dai,
+			 struct snd_pcm_substream *substream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index f883d27d136f..39a685e6acd5 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -282,3 +282,10 @@ int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
 
 	return 0;
 }
+
+void snd_soc_dai_hw_free(struct snd_soc_dai *dai,
+			 struct snd_pcm_substream *substream)
+{
+	if (dai->driver->ops->hw_free)
+		dai->driver->ops->hw_free(substream, dai);
+}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8fc6a01f5d8b..0783b05133ad 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3898,9 +3898,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 		snd_soc_dapm_widget_for_each_source_path(w, path) {
 			source = path->source->priv;
 
-			if (source->driver->ops->hw_free)
-				source->driver->ops->hw_free(&substream,
-							     source);
+			snd_soc_dai_hw_free(source, &substream);
 
 			source->active--;
 			if (source->driver->ops->shutdown)
@@ -3912,8 +3910,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 		snd_soc_dapm_widget_for_each_sink_path(w, path) {
 			sink = path->sink->priv;
 
-			if (sink->driver->ops->hw_free)
-				sink->driver->ops->hw_free(&substream, sink);
+			snd_soc_dai_hw_free(sink, &substream);
 
 			sink->active--;
 			if (sink->driver->ops->shutdown)
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 420cc94e0a46..58fc4e98ab59 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1011,8 +1011,7 @@ out:
 component_err:
 	soc_pcm_components_hw_free(substream, component);
 
-	if (cpu_dai->driver->ops->hw_free)
-		cpu_dai->driver->ops->hw_free(substream, cpu_dai);
+	snd_soc_dai_hw_free(cpu_dai, substream);
 	cpu_dai->rate = 0;
 
 interface_err:
@@ -1023,8 +1022,7 @@ codec_err:
 		if (!snd_soc_dai_stream_valid(codec_dai, substream->stream))
 			continue;
 
-		if (codec_dai->driver->ops->hw_free)
-			codec_dai->driver->ops->hw_free(substream, codec_dai);
+		snd_soc_dai_hw_free(codec_dai, substream);
 		codec_dai->rate = 0;
 	}
 
@@ -1083,12 +1081,10 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 		if (!snd_soc_dai_stream_valid(codec_dai, substream->stream))
 			continue;
 
-		if (codec_dai->driver->ops->hw_free)
-			codec_dai->driver->ops->hw_free(substream, codec_dai);
+		snd_soc_dai_hw_free(codec_dai, substream);
 	}
 
-	if (cpu_dai->driver->ops->hw_free)
-		cpu_dai->driver->ops->hw_free(substream, cpu_dai);
+	snd_soc_dai_hw_free(cpu_dai, substream);
 
 	mutex_unlock(&rtd->pcm_mutex);
 	return 0;
-- 
cgit v1.2.3


From 5a52a04531486e2ab069b7882432c8b266db36e6 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:32 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_startup()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_startup() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87wogahn4i.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     | 11 +++++++++++
 sound/soc/soc-dapm.c    | 28 ++++++++++------------------
 sound/soc/soc-pcm.c     | 27 +++++++++++----------------
 4 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 5222b6a758f2..0d16c5bb20bb 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -150,6 +150,8 @@ int snd_soc_dai_hw_params(struct snd_soc_dai *dai,
 			  struct snd_pcm_hw_params *params);
 void snd_soc_dai_hw_free(struct snd_soc_dai *dai,
 			 struct snd_pcm_substream *substream);
+int snd_soc_dai_startup(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 39a685e6acd5..6e196636e42f 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -289,3 +289,14 @@ void snd_soc_dai_hw_free(struct snd_soc_dai *dai,
 	if (dai->driver->ops->hw_free)
 		dai->driver->ops->hw_free(substream, dai);
 }
+
+int snd_soc_dai_startup(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream)
+{
+	int ret = 0;
+
+	if (dai->driver->ops->startup)
+		ret = dai->driver->ops->startup(substream, dai);
+
+	return ret;
+}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 0783b05133ad..71bfd049480a 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3828,15 +3828,11 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 		snd_soc_dapm_widget_for_each_source_path(w, path) {
 			source = path->source->priv;
 
-			if (source->driver->ops->startup) {
-				ret = source->driver->ops->startup(&substream,
-								   source);
-				if (ret < 0) {
-					dev_err(source->dev,
-						"ASoC: startup() failed: %d\n",
-						ret);
-					goto out;
-				}
+			ret = snd_soc_dai_startup(source, &substream);
+			if (ret < 0) {
+				dev_err(source->dev,
+					"ASoC: startup() failed: %d\n", ret);
+				goto out;
 			}
 			source->active++;
 			ret = snd_soc_dai_hw_params(source, &substream, params);
@@ -3850,15 +3846,11 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 		snd_soc_dapm_widget_for_each_sink_path(w, path) {
 			sink = path->sink->priv;
 
-			if (sink->driver->ops->startup) {
-				ret = sink->driver->ops->startup(&substream,
-								 sink);
-				if (ret < 0) {
-					dev_err(sink->dev,
-						"ASoC: startup() failed: %d\n",
-						ret);
-					goto out;
-				}
+			ret = snd_soc_dai_startup(sink, &substream);
+			if (ret < 0) {
+				dev_err(sink->dev,
+					"ASoC: startup() failed: %d\n", ret);
+				goto out;
 			}
 			sink->active++;
 			ret = snd_soc_dai_hw_params(sink, &substream, params);
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 58fc4e98ab59..9c8713a3eef1 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -535,13 +535,11 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
 
 	/* startup the audio subsystem */
-	if (cpu_dai->driver->ops->startup) {
-		ret = cpu_dai->driver->ops->startup(substream, cpu_dai);
-		if (ret < 0) {
-			dev_err(cpu_dai->dev, "ASoC: can't open interface"
-				" %s: %d\n", cpu_dai->name, ret);
-			goto out;
-		}
+	ret = snd_soc_dai_startup(cpu_dai, substream);
+	if (ret < 0) {
+		dev_err(cpu_dai->dev, "ASoC: can't open interface %s: %d\n",
+			cpu_dai->name, ret);
+		goto out;
 	}
 
 	ret = soc_pcm_components_open(substream, &component);
@@ -549,15 +547,12 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 		goto component_err;
 
 	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->startup) {
-			ret = codec_dai->driver->ops->startup(substream,
-							      codec_dai);
-			if (ret < 0) {
-				dev_err(codec_dai->dev,
-					"ASoC: can't open codec %s: %d\n",
-					codec_dai->name, ret);
-				goto codec_dai_err;
-			}
+		ret = snd_soc_dai_startup(codec_dai, substream);
+		if (ret < 0) {
+			dev_err(codec_dai->dev,
+				"ASoC: can't open codec %s: %d\n",
+				codec_dai->name, ret);
+			goto codec_dai_err;
 		}
 
 		if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
-- 
cgit v1.2.3


From 330fcb5135e0588b1ea3b0bbab587d1317c1cf7b Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:39 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_shutdown()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_shutdown() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87v9vuhn4b.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     |  7 +++++++
 sound/soc/soc-dapm.c    |  7 ++-----
 sound/soc/soc-pcm.c     | 18 ++++++------------
 4 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 0d16c5bb20bb..32545d457b3d 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -152,6 +152,8 @@ void snd_soc_dai_hw_free(struct snd_soc_dai *dai,
 			 struct snd_pcm_substream *substream);
 int snd_soc_dai_startup(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream);
+void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
+			  struct snd_pcm_substream *substream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 6e196636e42f..67ff6cc1fe02 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -300,3 +300,10 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai,
 
 	return ret;
 }
+
+void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
+			 struct snd_pcm_substream *substream)
+{
+	if (dai->driver->ops->shutdown)
+		dai->driver->ops->shutdown(substream, dai);
+}
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 71bfd049480a..1d04612601ad 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3893,9 +3893,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 			snd_soc_dai_hw_free(source, &substream);
 
 			source->active--;
-			if (source->driver->ops->shutdown)
-				source->driver->ops->shutdown(&substream,
-							      source);
+			snd_soc_dai_shutdown(source, &substream);
 		}
 
 		substream.stream = SNDRV_PCM_STREAM_PLAYBACK;
@@ -3905,8 +3903,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w,
 			snd_soc_dai_hw_free(sink, &substream);
 
 			sink->active--;
-			if (sink->driver->ops->shutdown)
-				sink->driver->ops->shutdown(&substream, sink);
+			snd_soc_dai_shutdown(sink, &substream);
 		}
 		break;
 
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 9c8713a3eef1..ed5ae23c7104 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -641,16 +641,13 @@ machine_err:
 	i = rtd->num_codecs;
 
 codec_dai_err:
-	for_each_rtd_codec_dai_rollback(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->shutdown)
-			codec_dai->driver->ops->shutdown(substream, codec_dai);
-	}
+	for_each_rtd_codec_dai_rollback(rtd, i, codec_dai)
+		snd_soc_dai_shutdown(codec_dai, substream);
 
 component_err:
 	soc_pcm_components_close(substream, component);
 
-	if (cpu_dai->driver->ops->shutdown)
-		cpu_dai->driver->ops->shutdown(substream, cpu_dai);
+	snd_soc_dai_shutdown(cpu_dai, substream);
 out:
 	mutex_unlock(&rtd->pcm_mutex);
 
@@ -728,13 +725,10 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
 
 	snd_soc_dai_digital_mute(cpu_dai, 1, substream->stream);
 
-	if (cpu_dai->driver->ops->shutdown)
-		cpu_dai->driver->ops->shutdown(substream, cpu_dai);
+	snd_soc_dai_shutdown(cpu_dai, substream);
 
-	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->shutdown)
-			codec_dai->driver->ops->shutdown(substream, codec_dai);
-	}
+	for_each_rtd_codec_dai(rtd, i, codec_dai)
+		snd_soc_dai_shutdown(codec_dai, substream);
 
 	if (rtd->dai_link->ops->shutdown)
 		rtd->dai_link->ops->shutdown(substream);
-- 
cgit v1.2.3


From 4beb8e109d30d339d44308a767dd6f5614492f3e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:45 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_prepare()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_prepare() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87tvbehn46.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     | 11 +++++++++++
 sound/soc/soc-pcm.c     | 27 +++++++++++----------------
 3 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 32545d457b3d..c7dff6a0b5b9 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -154,6 +154,8 @@ int snd_soc_dai_startup(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream);
 void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
 			  struct snd_pcm_substream *substream);
+int snd_soc_dai_prepare(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 67ff6cc1fe02..cb810888c563 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -307,3 +307,14 @@ void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
 	if (dai->driver->ops->shutdown)
 		dai->driver->ops->shutdown(substream, dai);
 }
+
+int snd_soc_dai_prepare(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream)
+{
+	int ret = 0;
+
+	if (dai->driver->ops->prepare)
+		ret = dai->driver->ops->prepare(substream, dai);
+
+	return ret;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index ed5ae23c7104..d7611af90dce 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -814,27 +814,22 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	}
 
 	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->prepare) {
-			ret = codec_dai->driver->ops->prepare(substream,
-							      codec_dai);
-			if (ret < 0) {
-				dev_err(codec_dai->dev,
-					"ASoC: codec DAI prepare error: %d\n",
-					ret);
-				goto out;
-			}
-		}
-	}
-
-	if (cpu_dai->driver->ops->prepare) {
-		ret = cpu_dai->driver->ops->prepare(substream, cpu_dai);
+		ret = snd_soc_dai_prepare(codec_dai, substream);
 		if (ret < 0) {
-			dev_err(cpu_dai->dev,
-				"ASoC: cpu DAI prepare error: %d\n", ret);
+			dev_err(codec_dai->dev,
+				"ASoC: codec DAI prepare error: %d\n",
+				ret);
 			goto out;
 		}
 	}
 
+	ret = snd_soc_dai_prepare(cpu_dai, substream);
+	if (ret < 0) {
+		dev_err(cpu_dai->dev,
+			"ASoC: cpu DAI prepare error: %d\n", ret);
+		goto out;
+	}
+
 	/* cancel any delayed stream shutdown that is pending */
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
 	    rtd->pop_wait) {
-- 
cgit v1.2.3


From 95aef35533844f35544851b0cdc1fc154b603307 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:51 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_trigger()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_trigger() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87sgqyhn40.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     | 12 ++++++++++++
 sound/soc/soc-pcm.c     | 17 ++++++-----------
 3 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index c7dff6a0b5b9..72b8e76f1cc4 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -156,6 +156,8 @@ void snd_soc_dai_shutdown(struct snd_soc_dai *dai,
 			  struct snd_pcm_substream *substream);
 int snd_soc_dai_prepare(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream);
+int snd_soc_dai_trigger(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream, int cmd);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index cb810888c563..18c447e169f6 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -318,3 +318,15 @@ int snd_soc_dai_prepare(struct snd_soc_dai *dai,
 
 	return ret;
 }
+
+int snd_soc_dai_trigger(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream,
+			int cmd)
+{
+	int ret = 0;
+
+	if (dai->driver->ops->trigger)
+		ret = dai->driver->ops->trigger(substream, cmd, dai);
+
+	return ret;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index d7611af90dce..a628b08f966e 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1084,12 +1084,9 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	int i, ret;
 
 	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->trigger) {
-			ret = codec_dai->driver->ops->trigger(substream,
-							      cmd, codec_dai);
-			if (ret < 0)
-				return ret;
-		}
+		ret = snd_soc_dai_trigger(codec_dai, substream, cmd);
+		if (ret < 0)
+			return ret;
 	}
 
 	for_each_rtdcom(rtd, rtdcom) {
@@ -1104,11 +1101,9 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 			return ret;
 	}
 
-	if (cpu_dai->driver->ops->trigger) {
-		ret = cpu_dai->driver->ops->trigger(substream, cmd, cpu_dai);
-		if (ret < 0)
-			return ret;
-	}
+	snd_soc_dai_trigger(cpu_dai, substream, cmd);
+	if (ret < 0)
+		return ret;
 
 	if (rtd->dai_link->ops->trigger) {
 		ret = rtd->dai_link->ops->trigger(substream, cmd);
-- 
cgit v1.2.3


From 5c0769af4caf8fbdad2e9c0051ab0081b8e22b0a Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:33:56 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_bespoke_trigger()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_bespoke_trigger() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87r26ihn3u.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     | 12 ++++++++++++
 sound/soc/soc-pcm.c     | 16 ++++++----------
 3 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 72b8e76f1cc4..6a5566d459ad 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -158,6 +158,8 @@ int snd_soc_dai_prepare(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream);
 int snd_soc_dai_trigger(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream, int cmd);
+int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
+			struct snd_pcm_substream *substream, int cmd);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 18c447e169f6..6f466cfcbeef 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -330,3 +330,15 @@ int snd_soc_dai_trigger(struct snd_soc_dai *dai,
 
 	return ret;
 }
+
+int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
+				struct snd_pcm_substream *substream,
+				int cmd)
+{
+	int ret = 0;
+
+	if (dai->driver->ops->bespoke_trigger)
+		ret = dai->driver->ops->bespoke_trigger(substream, cmd, dai);
+
+	return ret;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index a628b08f966e..a10627f1ceff 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1123,19 +1123,15 @@ static int soc_pcm_bespoke_trigger(struct snd_pcm_substream *substream,
 	int i, ret;
 
 	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->bespoke_trigger) {
-			ret = codec_dai->driver->ops->bespoke_trigger(substream,
-								cmd, codec_dai);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	if (cpu_dai->driver->ops->bespoke_trigger) {
-		ret = cpu_dai->driver->ops->bespoke_trigger(substream, cmd, cpu_dai);
+		ret = snd_soc_dai_bespoke_trigger(codec_dai, substream, cmd);
 		if (ret < 0)
 			return ret;
 	}
+
+	snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd);
+	if (ret < 0)
+		return ret;
+
 	return 0;
 }
 /*
-- 
cgit v1.2.3


From 1dea80d4b2bd3b53c58f008ca2bcd73182583711 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:34:09 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_delay()

Current ALSA SoC is directly using dai->driver->ops->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_delay() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87o91mhn3i.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-dai.c     | 11 +++++++++++
 sound/soc/soc-pcm.c     |  9 +++------
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 6a5566d459ad..7cfed3034511 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -160,6 +160,8 @@ int snd_soc_dai_trigger(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream, int cmd);
 int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream, int cmd);
+snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
+				    struct snd_pcm_substream *substream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 6f466cfcbeef..5b5b979cd1f3 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -342,3 +342,14 @@ int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
 
 	return ret;
 }
+
+snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
+				    struct snd_pcm_substream *substream)
+{
+	int delay = 0;
+
+	if (dai->driver->ops->delay)
+		delay = dai->driver->ops->delay(substream, dai);
+
+	return delay;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index a10627f1ceff..f3137723301c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1169,14 +1169,11 @@ static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream)
 	/* base delay if assigned in pointer callback */
 	delay = runtime->delay;
 
-	if (cpu_dai->driver->ops->delay)
-		delay += cpu_dai->driver->ops->delay(substream, cpu_dai);
+	delay += snd_soc_dai_delay(cpu_dai, substream);
 
 	for_each_rtd_codec_dai(rtd, i, codec_dai) {
-		if (codec_dai->driver->ops->delay)
-			codec_delay = max(codec_delay,
-					codec_dai->driver->ops->delay(substream,
-								    codec_dai));
+		codec_delay = max(codec_delay,
+				  snd_soc_dai_delay(codec_dai, substream));
 	}
 	delay += codec_delay;
 
-- 
cgit v1.2.3


From e0f2262292d0c8160cfd9a8c40425107fb65ab29 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:34:29 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_suspend()

Current ALSA SoC is directly using dai->driver->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_suspend() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87muh6hn2x.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 1 +
 sound/soc/soc-core.c    | 8 ++++----
 sound/soc/soc-dai.c     | 6 ++++++
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 7cfed3034511..6c5604a7dbc2 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -162,6 +162,7 @@ int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
 			struct snd_pcm_substream *substream, int cmd);
 snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
 				    struct snd_pcm_substream *substream);
+void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6e8c5c8eeaec..7493afb2371c 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -511,8 +511,8 @@ int snd_soc_suspend(struct device *dev)
 		if (rtd->dai_link->ignore_suspend)
 			continue;
 
-		if (cpu_dai->driver->suspend && !cpu_dai->driver->bus_control)
-			cpu_dai->driver->suspend(cpu_dai);
+		if (!cpu_dai->driver->bus_control)
+			snd_soc_dai_suspend(cpu_dai);
 	}
 
 	/* close any waiting streams */
@@ -584,8 +584,8 @@ int snd_soc_suspend(struct device *dev)
 		if (rtd->dai_link->ignore_suspend)
 			continue;
 
-		if (cpu_dai->driver->suspend && cpu_dai->driver->bus_control)
-			cpu_dai->driver->suspend(cpu_dai);
+		if (cpu_dai->driver->bus_control)
+			snd_soc_dai_suspend(cpu_dai);
 
 		/* deactivate pins to sleep state */
 		pinctrl_pm_select_sleep_state(cpu_dai->dev);
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 5b5b979cd1f3..3373598e0682 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -353,3 +353,9 @@ snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
 
 	return delay;
 }
+
+void snd_soc_dai_suspend(struct snd_soc_dai *dai)
+{
+	if (dai->driver->suspend)
+		dai->driver->suspend(dai);
+}
-- 
cgit v1.2.3


From 24b09d051164680f0a1d1910efe21ce36ad5c1ca Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:34:43 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_resume()

Current ALSA SoC is directly using dai->driver->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_resume() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87lfwqhn2j.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 1 +
 sound/soc/soc-core.c    | 8 ++++----
 sound/soc/soc-dai.c     | 6 ++++++
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 6c5604a7dbc2..ed78e34a814e 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -163,6 +163,7 @@ int snd_soc_dai_bespoke_trigger(struct snd_soc_dai *dai,
 snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
 				    struct snd_pcm_substream *substream);
 void snd_soc_dai_suspend(struct snd_soc_dai *dai);
+void snd_soc_dai_resume(struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 7493afb2371c..5c02f90cea69 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -631,8 +631,8 @@ static void soc_resume_deferred(struct work_struct *work)
 		if (rtd->dai_link->ignore_suspend)
 			continue;
 
-		if (cpu_dai->driver->resume && cpu_dai->driver->bus_control)
-			cpu_dai->driver->resume(cpu_dai);
+		if (cpu_dai->driver->bus_control)
+			snd_soc_dai_resume(cpu_dai);
 	}
 
 	for_each_card_components(card, component) {
@@ -678,8 +678,8 @@ static void soc_resume_deferred(struct work_struct *work)
 		if (rtd->dai_link->ignore_suspend)
 			continue;
 
-		if (cpu_dai->driver->resume && !cpu_dai->driver->bus_control)
-			cpu_dai->driver->resume(cpu_dai);
+		if (!cpu_dai->driver->bus_control)
+			snd_soc_dai_resume(cpu_dai);
 	}
 
 	if (card->resume_post)
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 3373598e0682..ddb6f217c0ed 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -359,3 +359,9 @@ void snd_soc_dai_suspend(struct snd_soc_dai *dai)
 	if (dai->driver->suspend)
 		dai->driver->suspend(dai);
 }
+
+void snd_soc_dai_resume(struct snd_soc_dai *dai)
+{
+	if (dai->driver->resume)
+		dai->driver->resume(dai);
+}
-- 
cgit v1.2.3


From cfd9b5fbfe1e8763018aea2600aa0d6ff015ebfc Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:34:56 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_probe()

Current ALSA SoC is directly using dai->driver->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_probe() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87k1cahn26.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  1 +
 sound/soc/soc-core.c    | 15 +++++++--------
 sound/soc/soc-dai.c     |  7 +++++++
 3 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index ed78e34a814e..da8d8b889089 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -164,6 +164,7 @@ snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
 				    struct snd_pcm_substream *substream);
 void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 void snd_soc_dai_resume(struct snd_soc_dai *dai);
+int snd_soc_dai_probe(struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 5c02f90cea69..3e73468225f9 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1434,18 +1434,17 @@ static int soc_probe_link_components(struct snd_soc_card *card,
 
 static int soc_probe_dai(struct snd_soc_dai *dai, int order)
 {
+	int ret;
+
 	if (dai->probed ||
 	    dai->driver->probe_order != order)
 		return 0;
 
-	if (dai->driver->probe) {
-		int ret = dai->driver->probe(dai);
-
-		if (ret < 0) {
-			dev_err(dai->dev, "ASoC: failed to probe DAI %s: %d\n",
-				dai->name, ret);
-			return ret;
-		}
+	ret = snd_soc_dai_probe(dai);
+	if (ret < 0) {
+		dev_err(dai->dev, "ASoC: failed to probe DAI %s: %d\n",
+			dai->name, ret);
+		return ret;
 	}
 
 	dai->probed = 1;
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index ddb6f217c0ed..55c1fac99613 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -365,3 +365,10 @@ void snd_soc_dai_resume(struct snd_soc_dai *dai)
 	if (dai->driver->resume)
 		dai->driver->resume(dai);
 }
+
+int snd_soc_dai_probe(struct snd_soc_dai *dai)
+{
+	if (dai->driver->probe)
+		return dai->driver->probe(dai);
+	return 0;
+}
-- 
cgit v1.2.3


From dcdab5820edd6123911dbd767ee1e389008b6a83 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:35:05 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_remove()

Current ALSA SoC is directly using dai->driver->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_remvoe() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87imruhn1x.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  1 +
 sound/soc/soc-core.c    | 13 ++++++-------
 sound/soc/soc-dai.c     |  7 +++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index da8d8b889089..2a11f177ce01 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -165,6 +165,7 @@ snd_pcm_sframes_t snd_soc_dai_delay(struct snd_soc_dai *dai,
 void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 void snd_soc_dai_resume(struct snd_soc_dai *dai);
 int snd_soc_dai_probe(struct snd_soc_dai *dai);
+int snd_soc_dai_remove(struct snd_soc_dai *dai);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 3e73468225f9..727fd342b3fb 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -992,13 +992,12 @@ static void soc_remove_dai(struct snd_soc_dai *dai, int order)
 	    dai->driver->remove_order != order)
 		return;
 
-	if (dai->driver->remove) {
-		err = dai->driver->remove(dai);
-		if (err < 0)
-			dev_err(dai->dev,
-				"ASoC: failed to remove %s: %d\n",
-				dai->name, err);
-	}
+	err = snd_soc_dai_remove(dai);
+	if (err < 0)
+		dev_err(dai->dev,
+			"ASoC: failed to remove %s: %d\n",
+			dai->name, err);
+
 	dai->probed = 0;
 }
 
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 55c1fac99613..384765c747da 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -372,3 +372,10 @@ int snd_soc_dai_probe(struct snd_soc_dai *dai)
 		return dai->driver->probe(dai);
 	return 0;
 }
+
+int snd_soc_dai_remove(struct snd_soc_dai *dai)
+{
+	if (dai->driver->remove)
+		return dai->driver->remove(dai);
+	return 0;
+}
-- 
cgit v1.2.3


From b423c4202135f7794e0a9c55a884f5933d8e7156 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:35:29 +0900
Subject: ASoC: soc-dai: add snd_soc_dai_compress_new()

Current ALSA SoC is directly using dai->driver->xxx,
thus, it has deep nested bracket, and it makes code unreadable.
This patch adds new snd_soc_dai_compress_new() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87h87ehn1a.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h |  2 ++
 sound/soc/soc-core.c    | 15 ++++++++-------
 sound/soc/soc-dai.c     |  8 ++++++++
 3 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 2a11f177ce01..0f8b09520020 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -166,6 +166,8 @@ void snd_soc_dai_suspend(struct snd_soc_dai *dai);
 void snd_soc_dai_resume(struct snd_soc_dai *dai);
 int snd_soc_dai_probe(struct snd_soc_dai *dai);
 int snd_soc_dai_remove(struct snd_soc_dai *dai);
+int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
+			     struct snd_soc_pcm_runtime *rtd, int num);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 727fd342b3fb..458b090f026a 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1545,15 +1545,16 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 			num = rtd->dai_link->id;
 	}
 
-	if (cpu_dai->driver->compress_new) {
-		/* create compress_device" */
-		ret = cpu_dai->driver->compress_new(rtd, num);
-		if (ret < 0) {
+	/* create compress_device if possible */
+	ret = snd_soc_dai_compress_new(cpu_dai, rtd, num);
+	if (ret != -ENOTSUPP) {
+		if (ret < 0)
 			dev_err(card->dev, "ASoC: can't create compress %s\n",
 					 dai_link->stream_name);
-			return ret;
-		}
-	} else if (!dai_link->params) {
+		return ret;
+	}
+
+	if (!dai_link->params) {
 		/* create the pcm */
 		ret = soc_new_pcm(rtd, num);
 		if (ret < 0) {
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index 384765c747da..e6f161b9f975 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -379,3 +379,11 @@ int snd_soc_dai_remove(struct snd_soc_dai *dai)
 		return dai->driver->remove(dai);
 	return 0;
 }
+
+int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
+			     struct snd_soc_pcm_runtime *rtd, int num)
+{
+	if (dai->driver->compress_new)
+		return dai->driver->compress_new(rtd, num);
+	return -ENOTSUPP;
+}
-- 
cgit v1.2.3


From 467fece8fbc6774a3a3bd0981e1a342fb5022706 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 22 Jul 2019 10:36:16 +0900
Subject: ASoC: soc-dai: move snd_soc_dai_stream_valid() to soc-dai.c

snd_soc_dai_stream_valid() is function to check stream validity.
But, some code is using it, some code are checking stream->channels_min
directly. Doing samethings by different method is confusable.
This patch uses same funcntion for same purpose.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ftmyhmzz.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h  |  1 +
 sound/soc/soc-compress.c |  9 ++++-----
 sound/soc/soc-dai.c      | 18 ++++++++++++++++++
 sound/soc/soc-pcm.c      | 39 ++++++++++-----------------------------
 4 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index 0f8b09520020..dc48fe081a20 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -168,6 +168,7 @@ int snd_soc_dai_probe(struct snd_soc_dai *dai);
 int snd_soc_dai_remove(struct snd_soc_dai *dai);
 int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
 			     struct snd_soc_pcm_runtime *rtd, int num);
+bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream);
 
 struct snd_soc_dai_ops {
 	/*
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index ddef4ff677ce..289211069a1e 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -872,14 +872,13 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
 	}
 
 	/* check client and interface hw capabilities */
-	if (codec_dai->driver->playback.channels_min)
+	if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) &&
+	    snd_soc_dai_stream_valid(cpu_dai,   SNDRV_PCM_STREAM_PLAYBACK))
 		playback = 1;
-	if (codec_dai->driver->capture.channels_min)
+	if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_CAPTURE) &&
+	    snd_soc_dai_stream_valid(cpu_dai,   SNDRV_PCM_STREAM_CAPTURE))
 		capture = 1;
 
-	capture = capture && cpu_dai->driver->capture.channels_min;
-	playback = playback && cpu_dai->driver->playback.channels_min;
-
 	/*
 	 * Compress devices are unidirectional so only one of the directions
 	 * should be set, check for that (xor)
diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c
index e6f161b9f975..1c7f63871c1d 100644
--- a/sound/soc/soc-dai.c
+++ b/sound/soc/soc-dai.c
@@ -387,3 +387,21 @@ int snd_soc_dai_compress_new(struct snd_soc_dai *dai,
 		return dai->driver->compress_new(rtd, num);
 	return -ENOTSUPP;
 }
+
+/*
+ * snd_soc_dai_stream_valid() - check if a DAI supports the given stream
+ *
+ * Returns true if the DAI supports the indicated stream type.
+ */
+bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int dir)
+{
+	struct snd_soc_pcm_stream *stream;
+
+	if (dir == SNDRV_PCM_STREAM_PLAYBACK)
+		stream = &dai->driver->playback;
+	else
+		stream = &dai->driver->capture;
+
+	/* If the codec specifies any channels at all, it supports the stream */
+	return stream->channels_min;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index f3137723301c..fabeac164a6c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -29,24 +29,6 @@
 
 #define DPCM_MAX_BE_USERS	8
 
-/*
- * snd_soc_dai_stream_valid() - check if a DAI supports the given stream
- *
- * Returns true if the DAI supports the indicated stream type.
- */
-static bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream)
-{
-	struct snd_soc_pcm_stream *codec_stream;
-
-	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
-		codec_stream = &dai->driver->playback;
-	else
-		codec_stream = &dai->driver->capture;
-
-	/* If the codec specifies any channels at all, it supports the stream */
-	return codec_stream->channels_min;
-}
-
 /**
  * snd_soc_runtime_activate() - Increment active count for PCM runtime components
  * @rtd: ASoC PCM runtime that is activated
@@ -2688,8 +2670,8 @@ static int soc_dpcm_fe_runtime_update(struct snd_soc_pcm_runtime *fe, int new)
 		new ? "new" : "old", fe->dai_link->name);
 
 	/* skip if FE doesn't have playback capability */
-	if (!fe->cpu_dai->driver->playback.channels_min ||
-	    !fe->codec_dai->driver->playback.channels_min)
+	if (!snd_soc_dai_stream_valid(fe->cpu_dai,   SNDRV_PCM_STREAM_PLAYBACK) ||
+	    !snd_soc_dai_stream_valid(fe->codec_dai, SNDRV_PCM_STREAM_PLAYBACK))
 		goto capture;
 
 	/* skip if FE isn't currently playing */
@@ -2719,8 +2701,8 @@ static int soc_dpcm_fe_runtime_update(struct snd_soc_pcm_runtime *fe, int new)
 
 capture:
 	/* skip if FE doesn't have capture capability */
-	if (!fe->cpu_dai->driver->capture.channels_min ||
-	    !fe->codec_dai->driver->capture.channels_min)
+	if (!snd_soc_dai_stream_valid(fe->cpu_dai,   SNDRV_PCM_STREAM_CAPTURE) ||
+	    !snd_soc_dai_stream_valid(fe->codec_dai, SNDRV_PCM_STREAM_CAPTURE))
 		return 0;
 
 	/* skip if FE isn't currently capturing */
@@ -3030,14 +3012,13 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 		capture = rtd->dai_link->dpcm_capture;
 	} else {
 		for_each_rtd_codec_dai(rtd, i, codec_dai) {
-			if (codec_dai->driver->playback.channels_min)
+			if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_PLAYBACK) &&
+			    snd_soc_dai_stream_valid(cpu_dai,   SNDRV_PCM_STREAM_PLAYBACK))
 				playback = 1;
-			if (codec_dai->driver->capture.channels_min)
+			if (snd_soc_dai_stream_valid(codec_dai, SNDRV_PCM_STREAM_CAPTURE) &&
+			    snd_soc_dai_stream_valid(cpu_dai,   SNDRV_PCM_STREAM_CAPTURE))
 				capture = 1;
 		}
-
-		capture = capture && cpu_dai->driver->capture.channels_min;
-		playback = playback && cpu_dai->driver->playback.channels_min;
 	}
 
 	if (rtd->dai_link->playback_only) {
@@ -3375,11 +3356,11 @@ static ssize_t dpcm_state_read_file(struct file *file, char __user *user_buf,
 	if (!buf)
 		return -ENOMEM;
 
-	if (fe->cpu_dai->driver->playback.channels_min)
+	if (snd_soc_dai_stream_valid(fe->cpu_dai, SNDRV_PCM_STREAM_PLAYBACK))
 		offset += dpcm_show_state(fe, SNDRV_PCM_STREAM_PLAYBACK,
 					buf + offset, out_count - offset);
 
-	if (fe->cpu_dai->driver->capture.channels_min)
+	if (snd_soc_dai_stream_valid(fe->cpu_dai, SNDRV_PCM_STREAM_CAPTURE))
 		offset += dpcm_show_state(fe, SNDRV_PCM_STREAM_CAPTURE,
 					buf + offset, out_count - offset);
 
-- 
cgit v1.2.3


From 6749d59016981bca6d7000e40bdb08eed78dfa6f Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Tue, 23 Jul 2019 15:33:59 +0100
Subject: net: sched: include mpls actions in hardware intermediate
 representation

A recent addition to TC actions is the ability to manipulate the MPLS
headers on packets.

In preparation to offload such actions to hardware, update the IR code to
accept and prepare the new actions.

Note that no driver currently impliments the MPLS dec_ttl action so this
is not included.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h   | 19 +++++++++++
 include/net/tc_act/tc_mpls.h | 75 ++++++++++++++++++++++++++++++++++++++++++++
 net/sched/cls_api.c          | 25 +++++++++++++++
 3 files changed, 119 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index b16d21636d69..00b9aab5fdc1 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -131,6 +131,9 @@ enum flow_action_id {
 	FLOW_ACTION_SAMPLE,
 	FLOW_ACTION_POLICE,
 	FLOW_ACTION_CT,
+	FLOW_ACTION_MPLS_PUSH,
+	FLOW_ACTION_MPLS_POP,
+	FLOW_ACTION_MPLS_MANGLE,
 };
 
 /* This is mirroring enum pedit_header_type definition for easy mapping between
@@ -184,6 +187,22 @@ struct flow_action_entry {
 			int action;
 			u16 zone;
 		} ct;
+		struct {				/* FLOW_ACTION_MPLS_PUSH */
+			u32		label;
+			__be16		proto;
+			u8		tc;
+			u8		bos;
+			u8		ttl;
+		} mpls_push;
+		struct {				/* FLOW_ACTION_MPLS_POP */
+			__be16		proto;
+		} mpls_pop;
+		struct {				/* FLOW_ACTION_MPLS_MANGLE */
+			u32		label;
+			u8		tc;
+			u8		bos;
+			u8		ttl;
+		} mpls_mangle;
 	};
 };
 
diff --git a/include/net/tc_act/tc_mpls.h b/include/net/tc_act/tc_mpls.h
index 4bc3d9250ef0..721de4f5733a 100644
--- a/include/net/tc_act/tc_mpls.h
+++ b/include/net/tc_act/tc_mpls.h
@@ -27,4 +27,79 @@ struct tcf_mpls {
 };
 #define to_mpls(a) ((struct tcf_mpls *)a)
 
+static inline bool is_tcf_mpls(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->id == TCA_ID_MPLS)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_mpls_action(const struct tc_action *a)
+{
+	u32 tcfm_action;
+
+	rcu_read_lock();
+	tcfm_action = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_action;
+	rcu_read_unlock();
+
+	return tcfm_action;
+}
+
+static inline __be16 tcf_mpls_proto(const struct tc_action *a)
+{
+	__be16 tcfm_proto;
+
+	rcu_read_lock();
+	tcfm_proto = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_proto;
+	rcu_read_unlock();
+
+	return tcfm_proto;
+}
+
+static inline u32 tcf_mpls_label(const struct tc_action *a)
+{
+	u32 tcfm_label;
+
+	rcu_read_lock();
+	tcfm_label = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_label;
+	rcu_read_unlock();
+
+	return tcfm_label;
+}
+
+static inline u8 tcf_mpls_tc(const struct tc_action *a)
+{
+	u8 tcfm_tc;
+
+	rcu_read_lock();
+	tcfm_tc = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_tc;
+	rcu_read_unlock();
+
+	return tcfm_tc;
+}
+
+static inline u8 tcf_mpls_bos(const struct tc_action *a)
+{
+	u8 tcfm_bos;
+
+	rcu_read_lock();
+	tcfm_bos = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_bos;
+	rcu_read_unlock();
+
+	return tcfm_bos;
+}
+
+static inline u8 tcf_mpls_ttl(const struct tc_action *a)
+{
+	u8 tcfm_ttl;
+
+	rcu_read_lock();
+	tcfm_ttl = rcu_dereference(to_mpls(a)->mpls_p)->tcfm_ttl;
+	rcu_read_unlock();
+
+	return tcfm_ttl;
+}
+
 #endif /* __NET_TC_MPLS_H */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index efd3cfb80a2a..3565d9aa09aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -36,6 +36,7 @@
 #include <net/tc_act/tc_sample.h>
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
+#include <net/tc_act/tc_mpls.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -3269,6 +3270,30 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->id = FLOW_ACTION_CT;
 			entry->ct.action = tcf_ct_action(act);
 			entry->ct.zone = tcf_ct_zone(act);
+		} else if (is_tcf_mpls(act)) {
+			switch (tcf_mpls_action(act)) {
+			case TCA_MPLS_ACT_PUSH:
+				entry->id = FLOW_ACTION_MPLS_PUSH;
+				entry->mpls_push.proto = tcf_mpls_proto(act);
+				entry->mpls_push.label = tcf_mpls_label(act);
+				entry->mpls_push.tc = tcf_mpls_tc(act);
+				entry->mpls_push.bos = tcf_mpls_bos(act);
+				entry->mpls_push.ttl = tcf_mpls_ttl(act);
+				break;
+			case TCA_MPLS_ACT_POP:
+				entry->id = FLOW_ACTION_MPLS_POP;
+				entry->mpls_pop.proto = tcf_mpls_proto(act);
+				break;
+			case TCA_MPLS_ACT_MODIFY:
+				entry->id = FLOW_ACTION_MPLS_MANGLE;
+				entry->mpls_mangle.label = tcf_mpls_label(act);
+				entry->mpls_mangle.tc = tcf_mpls_tc(act);
+				entry->mpls_mangle.bos = tcf_mpls_bos(act);
+				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
+				break;
+			default:
+				goto err_out;
+			}
 		} else {
 			goto err_out;
 		}
-- 
cgit v1.2.3


From d9b8aadaffa65809d146cf0f8632a22a946367d7 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 19 Jul 2019 11:18:15 +0200
Subject: bpf: fix narrower loads on s390

The very first check in test_pkt_md_access is failing on s390, which
happens because loading a part of a struct __sk_buff field produces
an incorrect result.

The preprocessed code of the check is:

{
	__u8 tmp = *((volatile __u8 *)&skb->len +
		((sizeof(skb->len) - sizeof(__u8)) / sizeof(__u8)));
	if (tmp != ((*(volatile __u32 *)&skb->len) & 0xFF)) return 2;
};

clang generates the following code for it:

      0:	71 21 00 03 00 00 00 00	r2 = *(u8 *)(r1 + 3)
      1:	61 31 00 00 00 00 00 00	r3 = *(u32 *)(r1 + 0)
      2:	57 30 00 00 00 00 00 ff	r3 &= 255
      3:	5d 23 00 1d 00 00 00 00	if r2 != r3 goto +29 <LBB0_10>

Finally, verifier transforms it to:

  0: (61) r2 = *(u32 *)(r1 +104)
  1: (bc) w2 = w2
  2: (74) w2 >>= 24
  3: (bc) w2 = w2
  4: (54) w2 &= 255
  5: (bc) w2 = w2

The problem is that when verifier emits the code to replace a partial
load of a struct __sk_buff field (*(u8 *)(r1 + 3)) with a full load of
struct sk_buff field (*(u32 *)(r1 + 104)), an optional shift and a
bitwise AND, it assumes that the machine is little endian and
incorrectly decides to use a shift.

Adjust shift count calculation to account for endianness.

Fixes: 31fd85816dbe ("bpf: permits narrower load from bpf program context fields")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 13 +++++++++++++
 kernel/bpf/verifier.c  |  4 ++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ff65d22cf336..92c6e31fb008 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -24,6 +24,7 @@
 
 #include <net/sch_generic.h>
 
+#include <asm/byteorder.h>
 #include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
@@ -747,6 +748,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 	return size <= size_default && (size & (size - 1)) == 0;
 }
 
+static inline u8
+bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
+{
+	u8 load_off = off & (size_default - 1);
+
+#ifdef __LITTLE_ENDIAN
+	return load_off * 8;
+#else
+	return (size_default - (load_off + size)) * 8;
+#endif
+}
+
 #define bpf_ctx_wide_access_ok(off, size, type, field)			\
 	(size == sizeof(__u64) &&					\
 	off >= offsetof(type, field) &&					\
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5900cbb966b1..c84d83f86141 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8616,8 +8616,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		if (is_narrower_load && size < target_size) {
-			u8 shift = (off & (size_default - 1)) * 8;
-
+			u8 shift = bpf_ctx_narrow_load_shift(off, size,
+							     size_default);
 			if (ctx_field_size <= 4) {
 				if (shift)
 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
-- 
cgit v1.2.3


From 4f77e0956bd99901af2bfc6641437ff8bff8d4a4 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Mon, 15 Jul 2019 14:34:16 -0600
Subject: PCI: Remove pci_block_cfg_access() et al (unused)

Remove the following unused functions from include/linux/pci.h:

  pci_block_cfg_access()
  pci_block_cfg_access_in_atomic()
  pci_unblock_cfg_access()

These were added by fb51ccbf217c ("PCI: Rework config space blocking
services"), though no callers were added.

Link: https://lore.kernel.org/r/20190715203416.37547-1-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
---
 include/linux/pci.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..61b64fa1b0b1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1747,11 +1747,6 @@ static inline void pci_release_regions(struct pci_dev *dev) { }
 
 static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; }
 
-static inline void pci_block_cfg_access(struct pci_dev *dev) { }
-static inline int pci_block_cfg_access_in_atomic(struct pci_dev *dev)
-{ return 0; }
-static inline void pci_unblock_cfg_access(struct pci_dev *dev) { }
-
 static inline struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
 { return NULL; }
 static inline struct pci_dev *pci_get_slot(struct pci_bus *bus,
-- 
cgit v1.2.3


From 5523ca8f624dc9268bda109d37cbdc3efb5e79be Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 21 Jul 2019 14:50:39 +0200
Subject: scsi: fcoe: fix a typo

#define relative to FCOE CTLR start with FCOE_CTLR, except
FCOE_CTRL_SOL_TOV.

This is likely a typo and CTRL should be CTLR here as well.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fcoe/fcoe_ctlr.c | 2 +-
 include/scsi/libfcoe.h        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 590ec8009f52..1a85fe9e4b7b 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -1019,7 +1019,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 {
 	struct fcoe_fcf *fcf;
 	struct fcoe_fcf new;
-	unsigned long sol_tov = msecs_to_jiffies(FCOE_CTRL_SOL_TOV);
+	unsigned long sol_tov = msecs_to_jiffies(FCOE_CTLR_SOL_TOV);
 	int first = 0;
 	int mtu_valid;
 	int found = 0;
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index c50fb297e265..dc14b52577f7 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -31,7 +31,7 @@
  * FIP tunable parameters.
  */
 #define FCOE_CTLR_START_DELAY	2000	/* mS after first adv. to choose FCF */
-#define FCOE_CTRL_SOL_TOV	2000	/* min. solicitation interval (mS) */
+#define FCOE_CTLR_SOL_TOV	2000	/* min. solicitation interval (mS) */
 #define FCOE_CTLR_FCF_LIMIT	20	/* max. number of FCF entries */
 #define FCOE_CTLR_VN2VN_LOGIN_LIMIT 3	/* max. VN2VN rport login retries */
 
-- 
cgit v1.2.3


From 60649d4e0af6c26b6c423dea9c57f39e823fc0c5 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 23 Jul 2019 14:08:47 +0200
Subject: can: remove obsolete empty ioctl() handler

With commit c7cbdbf29f488a ("net: rework SIOCGSTAMP ioctl handling") the only
ioctl function in can_ioctl() has been removed.

As this SIOCGSTAMP ioctl command is now handled in net/socket.c we can entirely
remove the CAN specific ioctl functions.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h | 1 -
 net/can/af_can.c         | 9 ---------
 net/can/bcm.c            | 2 +-
 net/can/raw.c            | 2 +-
 4 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 6099bc18bd0c..f8284a94a13d 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -57,6 +57,5 @@ extern void can_rx_unregister(struct net *net, struct net_device *dev,
 			      void *data);
 
 extern int can_send(struct sk_buff *skb, int loop);
-extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
 #endif /* !_CAN_CORE_H */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 80281ef2ccbd..9c86de2da45e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -87,15 +87,6 @@ static atomic_t skbcounter = ATOMIC_INIT(0);
  * af_can socket functions
  */
 
-int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	default:
-		return -ENOIOCTLCMD;
-	}
-}
-EXPORT_SYMBOL(can_ioctl);
-
 static void can_sock_destruct(struct sock *sk)
 {
 	skb_queue_purge(&sk->sk_receive_queue);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a34ee52f19ea..1eecf4d3e8d2 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1688,7 +1688,7 @@ static const struct proto_ops bcm_ops = {
 	.accept        = sock_no_accept,
 	.getname       = sock_no_getname,
 	.poll          = datagram_poll,
-	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
+	.ioctl         = sock_no_ioctl,
 	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
diff --git a/net/can/raw.c b/net/can/raw.c
index afcbff063a67..bbbe3dd0abe9 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -845,7 +845,7 @@ static const struct proto_ops raw_ops = {
 	.accept        = sock_no_accept,
 	.getname       = raw_getname,
 	.poll          = datagram_poll,
-	.ioctl         = can_ioctl,	/* use can_ioctl() from af_can.c */
+	.ioctl         = sock_no_ioctl,
 	.gettstamp     = sock_gettstamp,
 	.listen        = sock_no_listen,
 	.shutdown      = sock_no_shutdown,
-- 
cgit v1.2.3


From fba76a58452694b9b13c07e48839fa84c75f57af Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 23 Jul 2019 15:17:55 +0200
Subject: can: Add SPDX license identifiers for CAN subsystem

Add missing SPDX identifiers for the CAN network layer and correct the SPDX
license for two of its include files to make sure the BSD-3-Clause applies
for the entire subsystem.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h | 2 +-
 include/linux/can/skb.h  | 2 +-
 net/can/af_can.c         | 1 +
 net/can/af_can.h         | 1 +
 net/can/bcm.c            | 1 +
 net/can/gw.c             | 1 +
 net/can/proc.c           | 1 +
 net/can/raw.c            | 1 +
 8 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index f8284a94a13d..708c10d3417a 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * linux/can/core.h
  *
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index b3379a97245c..a954def26c0d 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * linux/can/skb.h
  *
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 9c86de2da45e..76cf83b2bd40 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
 /*
  * af_can.c - Protocol family CAN core module
  *            (used by different CAN protocol modules)
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 9cb3719632bd..ef21f7c6bc80 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * Copyright (c) 2002-2007 Volkswagen Group Electronic Research
  * All rights reserved.
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 1eecf4d3e8d2..8da986b19d88 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content
  *
diff --git a/net/can/gw.c b/net/can/gw.c
index 5275ddf580bc..8abae841d504 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * gw.c - CAN frame Gateway/Router/Bridge with netlink interface
  *
diff --git a/net/can/proc.c b/net/can/proc.c
index 70fea17bb04c..edb822c31902 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * proc.c - procfs support for Protocol family CAN core module
  *
diff --git a/net/can/raw.c b/net/can/raw.c
index bbbe3dd0abe9..ff720272f7b7 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /*
  * raw.c - Raw sockets for protocol family CAN
  *
-- 
cgit v1.2.3


From 2f5947dfcaecb99f2dd559156eecbeb7b95e4c02 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Jul 2019 09:24:49 +0200
Subject: Documentation: move Documentation/virtual to Documentation/virt

Renaming docs seems to be en vogue at the moment, so fix on of the
grossly misnamed directories.  We usually never use "virtual" as
a shortcut for virtualization in the kernel, but always virt,
as seen in the virt/ top-level directory.  Fix up the documentation
to match that.

Fixes: ed16648eb5b8 ("Move kvm, uml, and lguest subdirectories under a common "virtual" directory, I.E:")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/admin-guide/kernel-parameters.txt    |    2 +-
 Documentation/virt/index.rst                       |   18 +
 Documentation/virt/kvm/amd-memory-encryption.rst   |  250 +
 Documentation/virt/kvm/api.txt                     | 5296 ++++++++++++++++++++
 Documentation/virt/kvm/arm/hyp-abi.txt             |   53 +
 Documentation/virt/kvm/arm/psci.txt                |   61 +
 Documentation/virt/kvm/cpuid.rst                   |  107 +
 Documentation/virt/kvm/devices/README              |    1 +
 Documentation/virt/kvm/devices/arm-vgic-its.txt    |  181 +
 Documentation/virt/kvm/devices/arm-vgic-v3.txt     |  251 +
 Documentation/virt/kvm/devices/arm-vgic.txt        |  127 +
 Documentation/virt/kvm/devices/mpic.txt            |   53 +
 Documentation/virt/kvm/devices/s390_flic.txt       |  163 +
 Documentation/virt/kvm/devices/vcpu.txt            |   62 +
 Documentation/virt/kvm/devices/vfio.txt            |   36 +
 Documentation/virt/kvm/devices/vm.txt              |  270 +
 Documentation/virt/kvm/devices/xics.txt            |   66 +
 Documentation/virt/kvm/devices/xive.txt            |  197 +
 Documentation/virt/kvm/halt-polling.txt            |  136 +
 Documentation/virt/kvm/hypercalls.txt              |  154 +
 Documentation/virt/kvm/index.rst                   |   11 +
 Documentation/virt/kvm/locking.txt                 |  215 +
 Documentation/virt/kvm/mmu.txt                     |  449 ++
 Documentation/virt/kvm/msr.txt                     |  284 ++
 Documentation/virt/kvm/nested-vmx.txt              |  240 +
 Documentation/virt/kvm/ppc-pv.txt                  |  212 +
 Documentation/virt/kvm/review-checklist.txt        |   38 +
 Documentation/virt/kvm/s390-diag.txt               |   83 +
 Documentation/virt/kvm/timekeeping.txt             |  612 +++
 Documentation/virt/kvm/vcpu-requests.rst           |  307 ++
 Documentation/virt/paravirt_ops.rst                |   35 +
 Documentation/virt/uml/UserModeLinux-HOWTO.txt     | 4589 +++++++++++++++++
 Documentation/virtual/index.rst                    |   18 -
 .../virtual/kvm/amd-memory-encryption.rst          |  250 -
 Documentation/virtual/kvm/api.txt                  | 5296 --------------------
 Documentation/virtual/kvm/arm/hyp-abi.txt          |   53 -
 Documentation/virtual/kvm/arm/psci.txt             |   61 -
 Documentation/virtual/kvm/cpuid.rst                |  107 -
 Documentation/virtual/kvm/devices/README           |    1 -
 Documentation/virtual/kvm/devices/arm-vgic-its.txt |  181 -
 Documentation/virtual/kvm/devices/arm-vgic-v3.txt  |  251 -
 Documentation/virtual/kvm/devices/arm-vgic.txt     |  127 -
 Documentation/virtual/kvm/devices/mpic.txt         |   53 -
 Documentation/virtual/kvm/devices/s390_flic.txt    |  163 -
 Documentation/virtual/kvm/devices/vcpu.txt         |   62 -
 Documentation/virtual/kvm/devices/vfio.txt         |   36 -
 Documentation/virtual/kvm/devices/vm.txt           |  270 -
 Documentation/virtual/kvm/devices/xics.txt         |   66 -
 Documentation/virtual/kvm/devices/xive.txt         |  197 -
 Documentation/virtual/kvm/halt-polling.txt         |  136 -
 Documentation/virtual/kvm/hypercalls.txt           |  154 -
 Documentation/virtual/kvm/index.rst                |   11 -
 Documentation/virtual/kvm/locking.txt              |  215 -
 Documentation/virtual/kvm/mmu.txt                  |  449 --
 Documentation/virtual/kvm/msr.txt                  |  284 --
 Documentation/virtual/kvm/nested-vmx.txt           |  240 -
 Documentation/virtual/kvm/ppc-pv.txt               |  212 -
 Documentation/virtual/kvm/review-checklist.txt     |   38 -
 Documentation/virtual/kvm/s390-diag.txt            |   83 -
 Documentation/virtual/kvm/timekeeping.txt          |  612 ---
 Documentation/virtual/kvm/vcpu-requests.rst        |  307 --
 Documentation/virtual/paravirt_ops.rst             |   35 -
 Documentation/virtual/uml/UserModeLinux-HOWTO.txt  | 4589 -----------------
 MAINTAINERS                                        |    6 +-
 arch/powerpc/include/uapi/asm/kvm_para.h           |    2 +-
 arch/x86/kvm/mmu.c                                 |    2 +-
 include/uapi/linux/kvm.h                           |    4 +-
 tools/include/uapi/linux/kvm.h                     |    4 +-
 virt/kvm/arm/arm.c                                 |    2 +-
 virt/kvm/arm/vgic/vgic-mmio-v3.c                   |    2 +-
 virt/kvm/arm/vgic/vgic.h                           |    4 +-
 71 files changed, 14571 insertions(+), 14571 deletions(-)
 create mode 100644 Documentation/virt/index.rst
 create mode 100644 Documentation/virt/kvm/amd-memory-encryption.rst
 create mode 100644 Documentation/virt/kvm/api.txt
 create mode 100644 Documentation/virt/kvm/arm/hyp-abi.txt
 create mode 100644 Documentation/virt/kvm/arm/psci.txt
 create mode 100644 Documentation/virt/kvm/cpuid.rst
 create mode 100644 Documentation/virt/kvm/devices/README
 create mode 100644 Documentation/virt/kvm/devices/arm-vgic-its.txt
 create mode 100644 Documentation/virt/kvm/devices/arm-vgic-v3.txt
 create mode 100644 Documentation/virt/kvm/devices/arm-vgic.txt
 create mode 100644 Documentation/virt/kvm/devices/mpic.txt
 create mode 100644 Documentation/virt/kvm/devices/s390_flic.txt
 create mode 100644 Documentation/virt/kvm/devices/vcpu.txt
 create mode 100644 Documentation/virt/kvm/devices/vfio.txt
 create mode 100644 Documentation/virt/kvm/devices/vm.txt
 create mode 100644 Documentation/virt/kvm/devices/xics.txt
 create mode 100644 Documentation/virt/kvm/devices/xive.txt
 create mode 100644 Documentation/virt/kvm/halt-polling.txt
 create mode 100644 Documentation/virt/kvm/hypercalls.txt
 create mode 100644 Documentation/virt/kvm/index.rst
 create mode 100644 Documentation/virt/kvm/locking.txt
 create mode 100644 Documentation/virt/kvm/mmu.txt
 create mode 100644 Documentation/virt/kvm/msr.txt
 create mode 100644 Documentation/virt/kvm/nested-vmx.txt
 create mode 100644 Documentation/virt/kvm/ppc-pv.txt
 create mode 100644 Documentation/virt/kvm/review-checklist.txt
 create mode 100644 Documentation/virt/kvm/s390-diag.txt
 create mode 100644 Documentation/virt/kvm/timekeeping.txt
 create mode 100644 Documentation/virt/kvm/vcpu-requests.rst
 create mode 100644 Documentation/virt/paravirt_ops.rst
 create mode 100644 Documentation/virt/uml/UserModeLinux-HOWTO.txt
 delete mode 100644 Documentation/virtual/index.rst
 delete mode 100644 Documentation/virtual/kvm/amd-memory-encryption.rst
 delete mode 100644 Documentation/virtual/kvm/api.txt
 delete mode 100644 Documentation/virtual/kvm/arm/hyp-abi.txt
 delete mode 100644 Documentation/virtual/kvm/arm/psci.txt
 delete mode 100644 Documentation/virtual/kvm/cpuid.rst
 delete mode 100644 Documentation/virtual/kvm/devices/README
 delete mode 100644 Documentation/virtual/kvm/devices/arm-vgic-its.txt
 delete mode 100644 Documentation/virtual/kvm/devices/arm-vgic-v3.txt
 delete mode 100644 Documentation/virtual/kvm/devices/arm-vgic.txt
 delete mode 100644 Documentation/virtual/kvm/devices/mpic.txt
 delete mode 100644 Documentation/virtual/kvm/devices/s390_flic.txt
 delete mode 100644 Documentation/virtual/kvm/devices/vcpu.txt
 delete mode 100644 Documentation/virtual/kvm/devices/vfio.txt
 delete mode 100644 Documentation/virtual/kvm/devices/vm.txt
 delete mode 100644 Documentation/virtual/kvm/devices/xics.txt
 delete mode 100644 Documentation/virtual/kvm/devices/xive.txt
 delete mode 100644 Documentation/virtual/kvm/halt-polling.txt
 delete mode 100644 Documentation/virtual/kvm/hypercalls.txt
 delete mode 100644 Documentation/virtual/kvm/index.rst
 delete mode 100644 Documentation/virtual/kvm/locking.txt
 delete mode 100644 Documentation/virtual/kvm/mmu.txt
 delete mode 100644 Documentation/virtual/kvm/msr.txt
 delete mode 100644 Documentation/virtual/kvm/nested-vmx.txt
 delete mode 100644 Documentation/virtual/kvm/ppc-pv.txt
 delete mode 100644 Documentation/virtual/kvm/review-checklist.txt
 delete mode 100644 Documentation/virtual/kvm/s390-diag.txt
 delete mode 100644 Documentation/virtual/kvm/timekeeping.txt
 delete mode 100644 Documentation/virtual/kvm/vcpu-requests.rst
 delete mode 100644 Documentation/virtual/paravirt_ops.rst
 delete mode 100644 Documentation/virtual/uml/UserModeLinux-HOWTO.txt

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 099c5a4be95b..8a8880cec34b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2532,7 +2532,7 @@
 			mem_encrypt=on:		Activate SME
 			mem_encrypt=off:	Do not activate SME
 
-			Refer to Documentation/virtual/kvm/amd-memory-encryption.rst
+			Refer to Documentation/virt/kvm/amd-memory-encryption.rst
 			for details on when memory encryption can be activated.
 
 	mem_sleep_default=	[SUSPEND] Default system suspend mode:
diff --git a/Documentation/virt/index.rst b/Documentation/virt/index.rst
new file mode 100644
index 000000000000..062ffb527043
--- /dev/null
+++ b/Documentation/virt/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+Linux Virtualization Support
+============================
+
+.. toctree::
+   :maxdepth: 2
+
+   kvm/index
+   paravirt_ops
+
+.. only:: html and subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst
new file mode 100644
index 000000000000..d18c97b4e140
--- /dev/null
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -0,0 +1,250 @@
+======================================
+Secure Encrypted Virtualization (SEV)
+======================================
+
+Overview
+========
+
+Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
+
+SEV is an extension to the AMD-V architecture which supports running
+virtual machines (VMs) under the control of a hypervisor. When enabled,
+the memory contents of a VM will be transparently encrypted with a key
+unique to that VM.
+
+The hypervisor can determine the SEV support through the CPUID
+instruction. The CPUID function 0x8000001f reports information related
+to SEV::
+
+	0x8000001f[eax]:
+			Bit[1] 	indicates support for SEV
+	    ...
+		  [ecx]:
+			Bits[31:0]  Number of encrypted guests supported simultaneously
+
+If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+(MSR_K7_HWCR) can be used to determine if it can be enabled::
+
+	0xc001_0010:
+		Bit[23]	   1 = memory encryption can be enabled
+			   0 = memory encryption can not be enabled
+
+	0xc001_0015:
+		Bit[0]	   1 = memory encryption can be enabled
+			   0 = memory encryption can not be enabled
+
+When SEV support is available, it can be enabled in a specific VM by
+setting the SEV bit before executing VMRUN.::
+
+	VMCB[0x90]:
+		Bit[1]	    1 = SEV is enabled
+			    0 = SEV is disabled
+
+SEV hardware uses ASIDs to associate a memory encryption key with a VM.
+Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
+defined in the CPUID 0x8000001f[ecx] field.
+
+SEV Key Management
+==================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+KVM implements the following commands to support common lifecycle events of SEV
+guests, such as launching, running, snapshotting, migrating and decommissioning.
+
+1. KVM_SEV_INIT
+---------------
+
+The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
+context. In a typical workflow, this command should be the first command issued.
+
+Returns: 0 on success, -negative on error
+
+2. KVM_SEV_LAUNCH_START
+-----------------------
+
+The KVM_SEV_LAUNCH_START command is used for creating the memory encryption
+context. To create the encryption context, user must provide a guest policy,
+the owner's public Diffie-Hellman (PDH) key and session information.
+
+Parameters: struct  kvm_sev_launch_start (in/out)
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_start {
+                __u32 handle;           /* if zero then firmware creates a new handle */
+                __u32 policy;           /* guest's policy */
+
+                __u64 dh_uaddr;         /* userspace address pointing to the guest owner's PDH key */
+                __u32 dh_len;
+
+                __u64 session_addr;     /* userspace address which points to the guest session information */
+                __u32 session_len;
+        };
+
+On success, the 'handle' field contains a new handle and on error, a negative value.
+
+For more details, see SEV spec Section 6.2.
+
+3. KVM_SEV_LAUNCH_UPDATE_DATA
+-----------------------------
+
+The KVM_SEV_LAUNCH_UPDATE_DATA is used for encrypting a memory region. It also
+calculates a measurement of the memory contents. The measurement is a signature
+of the memory contents that can be sent to the guest owner as an attestation
+that the memory was encrypted correctly by the firmware.
+
+Parameters (in): struct  kvm_sev_launch_update_data
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_update {
+                __u64 uaddr;    /* userspace address to be encrypted (must be 16-byte aligned) */
+                __u32 len;      /* length of the data to be encrypted (must be 16-byte aligned) */
+        };
+
+For more details, see SEV spec Section 6.3.
+
+4. KVM_SEV_LAUNCH_MEASURE
+-------------------------
+
+The KVM_SEV_LAUNCH_MEASURE command is used to retrieve the measurement of the
+data encrypted by the KVM_SEV_LAUNCH_UPDATE_DATA command. The guest owner may
+wait to provide the guest with confidential information until it can verify the
+measurement. Since the guest owner knows the initial contents of the guest at
+boot, the measurement can be verified by comparing it to what the guest owner
+expects.
+
+Parameters (in): struct  kvm_sev_launch_measure
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_measure {
+                __u64 uaddr;    /* where to copy the measurement */
+                __u32 len;      /* length of measurement blob */
+        };
+
+For more details on the measurement verification flow, see SEV spec Section 6.4.
+
+5. KVM_SEV_LAUNCH_FINISH
+------------------------
+
+After completion of the launch flow, the KVM_SEV_LAUNCH_FINISH command can be
+issued to make the guest ready for the execution.
+
+Returns: 0 on success, -negative on error
+
+6. KVM_SEV_GUEST_STATUS
+-----------------------
+
+The KVM_SEV_GUEST_STATUS command is used to retrieve status information about a
+SEV-enabled guest.
+
+Parameters (out): struct kvm_sev_guest_status
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_guest_status {
+                __u32 handle;   /* guest handle */
+                __u32 policy;   /* guest policy */
+                __u8 state;     /* guest state (see enum below) */
+        };
+
+SEV guest state:
+
+::
+
+        enum {
+        SEV_STATE_INVALID = 0;
+        SEV_STATE_LAUNCHING,    /* guest is currently being launched */
+        SEV_STATE_SECRET,       /* guest is being launched and ready to accept the ciphertext data */
+        SEV_STATE_RUNNING,      /* guest is fully launched and running */
+        SEV_STATE_RECEIVING,    /* guest is being migrated in from another SEV machine */
+        SEV_STATE_SENDING       /* guest is getting migrated out to another SEV machine */
+        };
+
+7. KVM_SEV_DBG_DECRYPT
+----------------------
+
+The KVM_SEV_DEBUG_DECRYPT command can be used by the hypervisor to request the
+firmware to decrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_dbg {
+                __u64 src_uaddr;        /* userspace address of data to decrypt */
+                __u64 dst_uaddr;        /* userspace address of destination */
+                __u32 len;              /* length of memory region to decrypt */
+        };
+
+The command returns an error if the guest policy does not allow debugging.
+
+8. KVM_SEV_DBG_ENCRYPT
+----------------------
+
+The KVM_SEV_DEBUG_ENCRYPT command can be used by the hypervisor to request the
+firmware to encrypt the data at the given memory region.
+
+Parameters (in): struct kvm_sev_dbg
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_dbg {
+                __u64 src_uaddr;        /* userspace address of data to encrypt */
+                __u64 dst_uaddr;        /* userspace address of destination */
+                __u32 len;              /* length of memory region to encrypt */
+        };
+
+The command returns an error if the guest policy does not allow debugging.
+
+9. KVM_SEV_LAUNCH_SECRET
+------------------------
+
+The KVM_SEV_LAUNCH_SECRET command can be used by the hypervisor to inject secret
+data after the measurement has been validated by the guest owner.
+
+Parameters (in): struct kvm_sev_launch_secret
+
+Returns: 0 on success, -negative on error
+
+::
+
+        struct kvm_sev_launch_secret {
+                __u64 hdr_uaddr;        /* userspace address containing the packet header */
+                __u32 hdr_len;
+
+                __u64 guest_uaddr;      /* the guest memory region where the secret should be injected */
+                __u32 guest_len;
+
+                __u64 trans_uaddr;      /* the hypervisor memory region which contains the secret */
+                __u32 trans_len;
+        };
+
+References
+==========
+
+
+See [white-paper]_, [api-spec]_, [amd-apm]_ and [kvm-forum]_ for more info.
+
+.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
+.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
+.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
+.. [kvm-forum]  http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
new file mode 100644
index 000000000000..2d067767b617
--- /dev/null
+++ b/Documentation/virt/kvm/api.txt
@@ -0,0 +1,5296 @@
+The Definitive KVM (Kernel-based Virtual Machine) API Documentation
+===================================================================
+
+1. General description
+----------------------
+
+The kvm API is a set of ioctls that are issued to control various aspects
+of a virtual machine.  The ioctls belong to three classes:
+
+ - System ioctls: These query and set global attributes which affect the
+   whole kvm subsystem.  In addition a system ioctl is used to create
+   virtual machines.
+
+ - VM ioctls: These query and set attributes that affect an entire virtual
+   machine, for example memory layout.  In addition a VM ioctl is used to
+   create virtual cpus (vcpus) and devices.
+
+   VM ioctls must be issued from the same process (address space) that was
+   used to create the VM.
+
+ - vcpu ioctls: These query and set attributes that control the operation
+   of a single virtual cpu.
+
+   vcpu ioctls should be issued from the same thread that was used to create
+   the vcpu, except for asynchronous vcpu ioctl that are marked as such in
+   the documentation.  Otherwise, the first ioctl after switching threads
+   could see a performance impact.
+
+ - device ioctls: These query and set attributes that control the operation
+   of a single device.
+
+   device ioctls must be issued from the same process (address space) that
+   was used to create the VM.
+
+2. File descriptors
+-------------------
+
+The kvm API is centered around file descriptors.  An initial
+open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
+can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
+handle will create a VM file descriptor which can be used to issue VM
+ioctls.  A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will
+create a virtual cpu or device and return a file descriptor pointing to
+the new resource.  Finally, ioctls on a vcpu or device fd can be used
+to control the vcpu or device.  For vcpus, this includes the important
+task of actually running guest code.
+
+In general file descriptors can be migrated among processes by means
+of fork() and the SCM_RIGHTS facility of unix domain socket.  These
+kinds of tricks are explicitly not supported by kvm.  While they will
+not cause harm to the host, their actual behavior is not guaranteed by
+the API.  See "General description" for details on the ioctl usage
+model that is supported by KVM.
+
+It is important to note that althought VM ioctls may only be issued from
+the process that created the VM, a VM's lifecycle is associated with its
+file descriptor, not its creator (process).  In other words, the VM and
+its resources, *including the associated address space*, are not freed
+until the last reference to the VM's file descriptor has been released.
+For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will
+not be freed until both the parent (original) process and its child have
+put their references to the VM's file descriptor.
+
+Because a VM's resources are not freed until the last reference to its
+file descriptor is released, creating additional references to a VM via
+via fork(), dup(), etc... without careful consideration is strongly
+discouraged and may have unwanted side effects, e.g. memory allocated
+by and on behalf of the VM's process may not be freed/unaccounted when
+the VM is shut down.
+
+
+3. Extensions
+-------------
+
+As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
+incompatible change are allowed.  However, there is an extension
+facility that allows backward-compatible extensions to the API to be
+queried and used.
+
+The extension mechanism is not based on the Linux version number.
+Instead, kvm defines extension identifiers and a facility to query
+whether a particular extension identifier is available.  If it is, a
+set of ioctls is available for application use.
+
+
+4. API description
+------------------
+
+This section describes ioctls that can be used to control kvm guests.
+For each ioctl, the following information is provided along with a
+description:
+
+  Capability: which KVM extension provides this ioctl.  Can be 'basic',
+      which means that is will be provided by any kernel that supports
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
+      means availability needs to be checked with KVM_CHECK_EXTENSION
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Type: system, vm, or vcpu.
+
+  Parameters: what parameters are accepted by the ioctl.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+4.1 KVM_GET_API_VERSION
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: none
+Returns: the constant KVM_API_VERSION (=12)
+
+This identifies the API version as the stable kvm API. It is not
+expected that this number will change.  However, Linux 2.6.20 and
+2.6.21 report earlier versions; these are not documented and not
+supported.  Applications should refuse to run if KVM_GET_API_VERSION
+returns a value other than 12.  If this check passes, all ioctls
+described as 'basic' will be available.
+
+
+4.2 KVM_CREATE_VM
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: machine type identifier (KVM_VM_*)
+Returns: a VM fd that can be used to control the new virtual machine.
+
+The new VM has no virtual cpus and no memory.
+You probably want to use 0 as machine type.
+
+In order to create user controlled virtual machines on S390, check
+KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
+privileged user (CAP_SYS_ADMIN).
+
+To use hardware assisted virtualization on MIPS (VZ ASE) rather than
+the default trap & emulate implementation (which changes the virtual
+memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
+flag KVM_VM_MIPS_VZ.
+
+
+On arm64, the physical address size for a VM (IPA Size limit) is limited
+to 40bits by default. The limit can be configured if the host supports the
+extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
+KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
+identifier, where IPA_Bits is the maximum width of any physical
+address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
+machine type identifier.
+
+e.g, to configure a guest to use 48bit physical address size :
+
+    vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
+
+The requested size (IPA_Bits) must be :
+  0 - Implies default size, 40bits (for backward compatibility)
+
+  or
+
+  N - Implies N bits, where N is a positive integer such that,
+      32 <= N <= Host_IPA_Limit
+
+Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
+is dependent on the CPU capability and the kernel configuration. The limit can
+be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
+ioctl() at run-time.
+
+Please note that configuring the IPA size does not affect the capability
+exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
+size of the address translated by the stage2 level (guest physical to
+host physical address translations).
+
+
+4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
+
+Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_msr_list (in/out)
+Returns: 0 on success; -1 on error
+Errors:
+  EFAULT:    the msr index list cannot be read from or written to
+  E2BIG:     the msr index list is to be to fit in the array specified by
+             the user.
+
+struct kvm_msr_list {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 indices[0];
+};
+
+The user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
+indices array with their numbers.
+
+KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
+varies by kvm version and host processor, but does not change otherwise.
+
+Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
+not returned in the MSR list, as different vcpus can have a different number
+of banks, as set via the KVM_X86_SETUP_MCE ioctl.
+
+KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
+to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
+and processor features that are exposed via MSRs (e.g., VMX capabilities).
+This list also varies by kvm version and host processor, but does not change
+otherwise.
+
+
+4.4 KVM_CHECK_EXTENSION
+
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
+Architectures: all
+Type: system ioctl, vm ioctl
+Parameters: extension identifier (KVM_CAP_*)
+Returns: 0 if unsupported; 1 (or some other positive integer) if supported
+
+The API allows the application to query about extensions to the core
+kvm API.  Userspace passes an extension identifier (an integer) and
+receives an integer that describes the extension availability.
+Generally 0 means no and 1 means yes, but some extensions may report
+additional information in the integer return value.
+
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
+
+4.5 KVM_GET_VCPU_MMAP_SIZE
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: none
+Returns: size of vcpu mmap area, in bytes
+
+The KVM_RUN ioctl (cf.) communicates with userspace via a shared
+memory region.  This ioctl returns the size of that region.  See the
+KVM_RUN documentation for details.
+
+
+4.6 KVM_SET_MEMORY_REGION
+
+Capability: basic
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_memory_region (in)
+Returns: 0 on success, -1 on error
+
+This ioctl is obsolete and has been removed.
+
+
+4.7 KVM_CREATE_VCPU
+
+Capability: basic
+Architectures: all
+Type: vm ioctl
+Parameters: vcpu id (apic id on x86)
+Returns: vcpu fd on success, -1 on error
+
+This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
+The vcpu id is an integer in the range [0, max_vcpu_id).
+
+The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
+the KVM_CHECK_EXTENSION ioctl() at run-time.
+The maximum possible value for max_vcpus can be retrieved using the
+KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
+cpus max.
+If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
+same as the value returned from KVM_CAP_NR_VCPUS.
+
+The maximum possible value for max_vcpu_id can be retrieved using the
+KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
+is the same as the value returned from KVM_CAP_MAX_VCPUS.
+
+On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
+threads in one or more virtual CPU cores.  (This is because the
+hardware requires all the hardware threads in a CPU core to be in the
+same partition.)  The KVM_CAP_PPC_SMT capability indicates the number
+of vcpus per virtual core (vcore).  The vcore id is obtained by
+dividing the vcpu id by the number of vcpus per vcore.  The vcpus in a
+given vcore will always be in the same physical core as each other
+(though that might be a different physical core from time to time).
+Userspace can control the threading (SMT) mode of the guest by its
+allocation of vcpu ids.  For example, if userspace wants
+single-threaded guest vcpus, it should make all vcpu ids be a multiple
+of the number of vcpus per vcore.
+
+For virtual cpus that have been created with S390 user controlled virtual
+machines, the resulting vcpu fd can be memory mapped at page offset
+KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
+cpu's hardware control block.
+
+
+4.8 KVM_GET_DIRTY_LOG (vm ioctl)
+
+Capability: basic
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_dirty_log (in/out)
+Returns: 0 on success, -1 on error
+
+/* for KVM_GET_DIRTY_LOG */
+struct kvm_dirty_log {
+	__u32 slot;
+	__u32 padding;
+	union {
+		void __user *dirty_bitmap; /* one bit per page */
+		__u64 padding;
+	};
+};
+
+Given a memory slot, return a bitmap containing any pages dirtied
+since the last call to this ioctl.  Bit 0 is the first page in the
+memory slot.  Ensure the entire structure is cleared to avoid padding
+issues.
+
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
+The bits in the dirty bitmap are cleared before the ioctl returns, unless
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled.  For more information,
+see the description of the capability.
+
+4.9 KVM_SET_MEMORY_ALIAS
+
+Capability: basic
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_memory_alias (in)
+Returns: 0 (success), -1 (error)
+
+This ioctl is obsolete and has been removed.
+
+
+4.10 KVM_RUN
+
+Capability: basic
+Architectures: all
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+Errors:
+  EINTR:     an unmasked signal is pending
+
+This ioctl is used to run a guest virtual cpu.  While there are no
+explicit parameters, there is an implicit parameter block that can be
+obtained by mmap()ing the vcpu fd at offset 0, with the size given by
+KVM_GET_VCPU_MMAP_SIZE.  The parameter block is formatted as a 'struct
+kvm_run' (see below).
+
+
+4.11 KVM_GET_REGS
+
+Capability: basic
+Architectures: all except ARM, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_regs (out)
+Returns: 0 on success, -1 on error
+
+Reads the general purpose registers from the vcpu.
+
+/* x86 */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 rax, rbx, rcx, rdx;
+	__u64 rsi, rdi, rsp, rbp;
+	__u64 r8,  r9,  r10, r11;
+	__u64 r12, r13, r14, r15;
+	__u64 rip, rflags;
+};
+
+/* mips */
+struct kvm_regs {
+	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+	__u64 gpr[32];
+	__u64 hi;
+	__u64 lo;
+	__u64 pc;
+};
+
+
+4.12 KVM_SET_REGS
+
+Capability: basic
+Architectures: all except ARM, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_regs (in)
+Returns: 0 on success, -1 on error
+
+Writes the general purpose registers into the vcpu.
+
+See KVM_GET_REGS for the data structure.
+
+
+4.13 KVM_GET_SREGS
+
+Capability: basic
+Architectures: x86, ppc
+Type: vcpu ioctl
+Parameters: struct kvm_sregs (out)
+Returns: 0 on success, -1 on error
+
+Reads special registers from the vcpu.
+
+/* x86 */
+struct kvm_sregs {
+	struct kvm_segment cs, ds, es, fs, gs, ss;
+	struct kvm_segment tr, ldt;
+	struct kvm_dtable gdt, idt;
+	__u64 cr0, cr2, cr3, cr4, cr8;
+	__u64 efer;
+	__u64 apic_base;
+	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
+};
+
+/* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
+
+interrupt_bitmap is a bitmap of pending external interrupts.  At most
+one bit may be set.  This interrupt has been acknowledged by the APIC
+but not yet injected into the cpu core.
+
+
+4.14 KVM_SET_SREGS
+
+Capability: basic
+Architectures: x86, ppc
+Type: vcpu ioctl
+Parameters: struct kvm_sregs (in)
+Returns: 0 on success, -1 on error
+
+Writes special registers into the vcpu.  See KVM_GET_SREGS for the
+data structures.
+
+
+4.15 KVM_TRANSLATE
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_translation (in/out)
+Returns: 0 on success, -1 on error
+
+Translates a virtual address according to the vcpu's current address
+translation mode.
+
+struct kvm_translation {
+	/* in */
+	__u64 linear_address;
+
+	/* out */
+	__u64 physical_address;
+	__u8  valid;
+	__u8  writeable;
+	__u8  usermode;
+	__u8  pad[5];
+};
+
+
+4.16 KVM_INTERRUPT
+
+Capability: basic
+Architectures: x86, ppc, mips
+Type: vcpu ioctl
+Parameters: struct kvm_interrupt (in)
+Returns: 0 on success, negative on failure.
+
+Queues a hardware interrupt vector to be injected.
+
+/* for KVM_INTERRUPT */
+struct kvm_interrupt {
+	/* in */
+	__u32 irq;
+};
+
+X86:
+
+Returns: 0 on success,
+	 -EEXIST if an interrupt is already enqueued
+	 -EINVAL the the irq number is invalid
+	 -ENXIO if the PIC is in the kernel
+	 -EFAULT if the pointer is invalid
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ioctl is useful if the in-kernel PIC is not used.
+
+PPC:
+
+Queues an external interrupt to be injected. This ioctl is overleaded
+with 3 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+  This injects an edge type external interrupt into the guest once it's ready
+  to receive interrupts. When injected, the interrupt is done.
+
+b) KVM_INTERRUPT_UNSET
+
+  This unsets any pending interrupt.
+
+  Only available with KVM_CAP_PPC_UNSET_IRQ.
+
+c) KVM_INTERRUPT_SET_LEVEL
+
+  This injects a level type external interrupt into the guest context. The
+  interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
+  is triggered.
+
+  Only available with KVM_CAP_PPC_IRQ_LEVEL.
+
+Note that any value for 'irq' other than the ones stated above is invalid
+and incurs unexpected behavior.
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
+MIPS:
+
+Queues an external interrupt to be injected into the virtual CPU. A negative
+interrupt number dequeues the interrupt.
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
+
+4.17 KVM_DEBUG_GUEST
+
+Capability: basic
+Architectures: none
+Type: vcpu ioctl
+Parameters: none)
+Returns: -1 on error
+
+Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
+
+
+4.18 KVM_GET_MSRS
+
+Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
+Architectures: x86
+Type: system ioctl, vcpu ioctl
+Parameters: struct kvm_msrs (in/out)
+Returns: number of msrs successfully returned;
+        -1 on error
+
+When used as a system ioctl:
+Reads the values of MSR-based features that are available for the VM.  This
+is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
+The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
+in a system ioctl.
+
+When used as a vcpu ioctl:
+Reads model-specific registers from the vcpu.  Supported msr indices can
+be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
+
+struct kvm_msrs {
+	__u32 nmsrs; /* number of msrs in entries */
+	__u32 pad;
+
+	struct kvm_msr_entry entries[0];
+};
+
+struct kvm_msr_entry {
+	__u32 index;
+	__u32 reserved;
+	__u64 data;
+};
+
+Application code should set the 'nmsrs' member (which indicates the
+size of the entries array) and the 'index' member of each array entry.
+kvm will fill in the 'data' member.
+
+
+4.19 KVM_SET_MSRS
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_msrs (in)
+Returns: 0 on success, -1 on error
+
+Writes model-specific registers to the vcpu.  See KVM_GET_MSRS for the
+data structures.
+
+Application code should set the 'nmsrs' member (which indicates the
+size of the entries array), and the 'index' and 'data' members of each
+array entry.
+
+
+4.20 KVM_SET_CPUID
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_cpuid (in)
+Returns: 0 on success, -1 on error
+
+Defines the vcpu responses to the cpuid instruction.  Applications
+should use the KVM_SET_CPUID2 ioctl if available.
+
+
+struct kvm_cpuid_entry {
+	__u32 function;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding;
+};
+
+/* for KVM_SET_CPUID */
+struct kvm_cpuid {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry entries[0];
+};
+
+
+4.21 KVM_SET_SIGNAL_MASK
+
+Capability: basic
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_signal_mask (in)
+Returns: 0 on success, -1 on error
+
+Defines which signals are blocked during execution of KVM_RUN.  This
+signal mask temporarily overrides the threads signal mask.  Any
+unblocked signal received (except SIGKILL and SIGSTOP, which retain
+their traditional behaviour) will cause KVM_RUN to return with -EINTR.
+
+Note the signal will only be delivered if not blocked by the original
+signal mask.
+
+/* for KVM_SET_SIGNAL_MASK */
+struct kvm_signal_mask {
+	__u32 len;
+	__u8  sigset[0];
+};
+
+
+4.22 KVM_GET_FPU
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_fpu (out)
+Returns: 0 on success, -1 on error
+
+Reads the floating point state from the vcpu.
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u8  fpr[8][16];
+	__u16 fcw;
+	__u16 fsw;
+	__u8  ftwx;  /* in fxsave format */
+	__u8  pad1;
+	__u16 last_opcode;
+	__u64 last_ip;
+	__u64 last_dp;
+	__u8  xmm[16][16];
+	__u32 mxcsr;
+	__u32 pad2;
+};
+
+
+4.23 KVM_SET_FPU
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_fpu (in)
+Returns: 0 on success, -1 on error
+
+Writes the floating point state to the vcpu.
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+	__u8  fpr[8][16];
+	__u16 fcw;
+	__u16 fsw;
+	__u8  ftwx;  /* in fxsave format */
+	__u8  pad1;
+	__u16 last_opcode;
+	__u64 last_ip;
+	__u64 last_dp;
+	__u8  xmm[16][16];
+	__u32 mxcsr;
+	__u32 pad2;
+};
+
+
+4.24 KVM_CREATE_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+Architectures: x86, ARM, arm64, s390
+Type: vm ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Creates an interrupt controller model in the kernel.
+On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
+future vcpus to have a local APIC.  IRQ routing for GSIs 0-15 is set to both
+PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
+On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
+KVM_CREATE_DEVICE, which also supports creating a GICv2.  Using
+KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
+On s390, a dummy irq routing table is created.
+
+Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
+before KVM_CREATE_IRQCHIP can be used.
+
+
+4.25 KVM_IRQ_LINE
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86, arm, arm64
+Type: vm ioctl
+Parameters: struct kvm_irq_level
+Returns: 0 on success, -1 on error
+
+Sets the level of a GSI input to the interrupt controller model in the kernel.
+On some architectures it is required that an interrupt controller model has
+been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
+interrupts require the level to be set to 1 and then back to 0.
+
+On real hardware, interrupt pins can be active-low or active-high.  This
+does not matter for the level field of struct kvm_irq_level: 1 always
+means active (asserted), 0 means inactive (deasserted).
+
+x86 allows the operating system to program the interrupt polarity
+(active-low/active-high) for level-triggered interrupts, and KVM used
+to consider the polarity.  However, due to bitrot in the handling of
+active-low interrupts, the above convention is now valid on x86 too.
+This is signaled by KVM_CAP_X86_IOAPIC_POLARITY_IGNORED.  Userspace
+should not present interrupts to the guest as active-low unless this
+capability is present (or unless it is not using the in-kernel irqchip,
+of course).
+
+
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus.  The irq field is interpreted
+like this:
+
+  bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
+  field: | irq_type  | vcpu_index |     irq_id     |
+
+The irq_type field has the following values:
+- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
+- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+               (the vcpu_index field is ignored)
+- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+
+(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
+
+In both cases, level is used to assert/deassert the line.
+
+struct kvm_irq_level {
+	union {
+		__u32 irq;     /* GSI */
+		__s32 status;  /* not used for KVM_IRQ_LEVEL */
+	};
+	__u32 level;           /* 0 or 1 */
+};
+
+
+4.26 KVM_GET_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_irqchip (in/out)
+Returns: 0 on success, -1 on error
+
+Reads the state of a kernel interrupt controller created with
+KVM_CREATE_IRQCHIP into a buffer provided by the caller.
+
+struct kvm_irqchip {
+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
+	__u32 pad;
+        union {
+		char dummy[512];  /* reserving space */
+		struct kvm_pic_state pic;
+		struct kvm_ioapic_state ioapic;
+	} chip;
+};
+
+
+4.27 KVM_SET_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_irqchip (in)
+Returns: 0 on success, -1 on error
+
+Sets the state of a kernel interrupt controller created with
+KVM_CREATE_IRQCHIP from a buffer provided by the caller.
+
+struct kvm_irqchip {
+	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
+	__u32 pad;
+        union {
+		char dummy[512];  /* reserving space */
+		struct kvm_pic_state pic;
+		struct kvm_ioapic_state ioapic;
+	} chip;
+};
+
+
+4.28 KVM_XEN_HVM_CONFIG
+
+Capability: KVM_CAP_XEN_HVM
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_xen_hvm_config (in)
+Returns: 0 on success, -1 on error
+
+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
+page, and provides the starting address and size of the hypercall
+blobs in userspace.  When the guest writes the MSR, kvm copies one
+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
+memory.
+
+struct kvm_xen_hvm_config {
+	__u32 flags;
+	__u32 msr;
+	__u64 blob_addr_32;
+	__u64 blob_addr_64;
+	__u8 blob_size_32;
+	__u8 blob_size_64;
+	__u8 pad2[30];
+};
+
+
+4.29 KVM_GET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (out)
+Returns: 0 on success, -1 on error
+
+Gets the current timestamp of kvmclock as seen by the current guest. In
+conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
+set of bits that KVM can return in struct kvm_clock_data's flag member.
+
+The only flag defined now is KVM_CLOCK_TSC_STABLE.  If set, the returned
+value is the exact kvmclock value seen by all VCPUs at the instant
+when KVM_GET_CLOCK was called.  If clear, the returned value is simply
+CLOCK_MONOTONIC plus a constant offset; the offset can be modified
+with KVM_SET_CLOCK.  KVM will try to make all VCPUs follow this clock,
+but the exact value read by each VCPU could differ, because the host
+TSC is not stable.
+
+struct kvm_clock_data {
+	__u64 clock;  /* kvmclock current value */
+	__u32 flags;
+	__u32 pad[9];
+};
+
+
+4.30 KVM_SET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (in)
+Returns: 0 on success, -1 on error
+
+Sets the current timestamp of kvmclock to the value specified in its parameter.
+In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+struct kvm_clock_data {
+	__u64 clock;  /* kvmclock current value */
+	__u32 flags;
+	__u32 pad[9];
+};
+
+
+4.31 KVM_GET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Extended by: KVM_CAP_INTR_SHADOW
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_vcpu_event (out)
+Returns: 0 on success, -1 on error
+
+X86:
+
+Gets currently pending exceptions, interrupts, and NMIs as well as related
+states of the vcpu.
+
+struct kvm_vcpu_events {
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 has_error_code;
+		__u8 pending;
+		__u32 error_code;
+	} exception;
+	struct {
+		__u8 injected;
+		__u8 nr;
+		__u8 soft;
+		__u8 shadow;
+	} interrupt;
+	struct {
+		__u8 injected;
+		__u8 pending;
+		__u8 masked;
+		__u8 pad;
+	} nmi;
+	__u32 sipi_vector;
+	__u32 flags;
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
+	__u8 reserved[27];
+	__u8 exception_has_payload;
+	__u64 exception_payload;
+};
+
+The following bits are defined in the flags field:
+
+- KVM_VCPUEVENT_VALID_SHADOW may be set to signal that
+  interrupt.shadow contains a valid state.
+
+- KVM_VCPUEVENT_VALID_SMM may be set to signal that smi contains a
+  valid state.
+
+- KVM_VCPUEVENT_VALID_PAYLOAD may be set to signal that the
+  exception_has_payload, exception_payload, and exception.pending
+  fields contain a valid state. This bit will be set whenever
+  KVM_CAP_EXCEPTION_PAYLOAD is enabled.
+
+ARM/ARM64:
+
+If the guest accesses a device that is being emulated by the host kernel in
+such a way that a real device would generate a physical SError, KVM may make
+a virtual SError pending for that VCPU. This system error interrupt remains
+pending until the guest takes the exception by unmasking PSTATE.A.
+
+Running the VCPU may cause it to take a pending SError, or make an access that
+causes an SError to become pending. The event's description is only valid while
+the VPCU is not running.
+
+This API provides a way to read and write the pending 'event' state that is not
+visible to the guest. To save, restore or migrate a VCPU the struct representing
+the state can be read then written using this GET/SET API, along with the other
+guest-visible registers. It is not possible to 'cancel' an SError that has been
+made pending.
+
+A device being emulated in user-space may also wish to generate an SError. To do
+this the events structure can be populated by user-space. The current state
+should be read first, to ensure no existing SError is pending. If an existing
+SError is pending, the architecture's 'Multiple SError interrupts' rules should
+be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
+Serviceability (RAS) Specification").
+
+SError exceptions always have an ESR value. Some CPUs have the ability to
+specify what the virtual SError's ESR value should be. These systems will
+advertise KVM_CAP_ARM_INJECT_SERROR_ESR. In this case exception.has_esr will
+always have a non-zero value when read, and the agent making an SError pending
+should specify the ISS field in the lower 24 bits of exception.serror_esr. If
+the system supports KVM_CAP_ARM_INJECT_SERROR_ESR, but user-space sets the events
+with exception.has_esr as zero, KVM will choose an ESR.
+
+Specifying exception.has_esr on a system that does not support it will return
+-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
+will return -EINVAL.
+
+struct kvm_vcpu_events {
+	struct {
+		__u8 serror_pending;
+		__u8 serror_has_esr;
+		/* Align it to 8 bytes */
+		__u8 pad[6];
+		__u64 serror_esr;
+	} exception;
+	__u32 reserved[12];
+};
+
+4.32 KVM_SET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Extended by: KVM_CAP_INTR_SHADOW
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_vcpu_event (in)
+Returns: 0 on success, -1 on error
+
+X86:
+
+Set pending exceptions, interrupts, and NMIs as well as related states of the
+vcpu.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
+Fields that may be modified asynchronously by running VCPUs can be excluded
+from the update. These fields are nmi.pending, sipi_vector, smi.smm,
+smi.pending. Keep the corresponding bits in the flags field cleared to
+suppress overwriting the current in-kernel state. The bits are:
+
+KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
+KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+KVM_VCPUEVENT_VALID_SMM         - transfer the smi sub-struct.
+
+If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
+the flags field to signal that interrupt.shadow contains a valid state and
+shall be written into the VCPU.
+
+KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
+
+If KVM_CAP_EXCEPTION_PAYLOAD is enabled, KVM_VCPUEVENT_VALID_PAYLOAD
+can be set in the flags field to signal that the
+exception_has_payload, exception_payload, and exception.pending fields
+contain a valid state and shall be written into the VCPU.
+
+ARM/ARM64:
+
+Set the pending SError exception state for this VCPU. It is not possible to
+'cancel' an Serror that has been made pending.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
+
+4.33 KVM_GET_DEBUGREGS
+
+Capability: KVM_CAP_DEBUGREGS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_debugregs (out)
+Returns: 0 on success, -1 on error
+
+Reads debug registers from the vcpu.
+
+struct kvm_debugregs {
+	__u64 db[4];
+	__u64 dr6;
+	__u64 dr7;
+	__u64 flags;
+	__u64 reserved[9];
+};
+
+
+4.34 KVM_SET_DEBUGREGS
+
+Capability: KVM_CAP_DEBUGREGS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_debugregs (in)
+Returns: 0 on success, -1 on error
+
+Writes debug registers into the vcpu.
+
+See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
+yet and must be cleared on entry.
+
+
+4.35 KVM_SET_USER_MEMORY_REGION
+
+Capability: KVM_CAP_USER_MEMORY
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_userspace_memory_region (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_userspace_memory_region {
+	__u32 slot;
+	__u32 flags;
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 userspace_addr; /* start of the userspace allocated memory */
+};
+
+/* for kvm_memory_region::flags */
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)
+
+This ioctl allows the user to create, modify or delete a guest physical
+memory slot.  Bits 0-15 of "slot" specify the slot id and this value
+should be less than the maximum number of user memory slots supported per
+VM.  The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS.
+Slots may not overlap in guest physical address space.
+
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
+specifies the address space which is being modified.  They must be
+less than the value that KVM_CHECK_EXTENSION returns for the
+KVM_CAP_MULTI_ADDRESS_SPACE capability.  Slots in separate address spaces
+are unrelated; the restriction on overlapping slots only applies within
+each address space.
+
+Deleting a slot is done by passing zero for memory_size.  When changing
+an existing slot, it may be moved in the guest physical memory space,
+or its flags may be modified, but it may not be resized.
+
+Memory for the region is taken starting at the address denoted by the
+field userspace_addr, which must point at user addressable memory for
+the entire memory slot size.  Any object may back this memory, including
+anonymous memory, ordinary files, and hugetlbfs.
+
+It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
+be identical.  This allows large pages in the guest to be backed by large
+pages in the host.
+
+The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
+KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
+writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
+use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
+to make a new slot read-only.  In this case, writes to this memory will be
+posted to userspace as KVM_EXIT_MMIO exits.
+
+When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
+the memory region are automatically reflected into the guest.  For example, an
+mmap() that affects the region will be made visible immediately.  Another
+example is madvise(MADV_DROP).
+
+It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
+The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
+allocation and is deprecated.
+
+
+4.36 KVM_SET_TSS_ADDR
+
+Capability: KVM_CAP_SET_TSS_ADDR
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long tss_address (in)
+Returns: 0 on success, -1 on error
+
+This ioctl defines the physical address of a three-page region in the guest
+physical address space.  The region must be within the first 4GB of the
+guest physical address space and must not conflict with any memory slot
+or any mmio address.  The guest may malfunction if it accesses this memory
+region.
+
+This ioctl is required on Intel-based hosts.  This is needed on Intel hardware
+because of a quirk in the virtualization implementation (see the internals
+documentation when it pops into existence).
+
+
+4.37 KVM_ENABLE_CAP
+
+Capability: KVM_CAP_ENABLE_CAP
+Architectures: mips, ppc, s390
+Type: vcpu ioctl
+Parameters: struct kvm_enable_cap (in)
+Returns: 0 on success; -1 on error
+
+Capability: KVM_CAP_ENABLE_CAP_VM
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_enable_cap (in)
+Returns: 0 on success; -1 on error
+
++Not all extensions are enabled by default. Using this ioctl the application
+can enable an extension, making it available to the guest.
+
+On systems that do not support this ioctl, it always fails. On systems that
+do support it, it only works for extensions that are supported for enablement.
+
+To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
+be used.
+
+struct kvm_enable_cap {
+       /* in */
+       __u32 cap;
+
+The capability that is supposed to get enabled.
+
+       __u32 flags;
+
+A bitfield indicating future enhancements. Has to be 0 for now.
+
+       __u64 args[4];
+
+Arguments for enabling a feature. If a feature needs initial values to
+function properly, this is the place to put them.
+
+       __u8  pad[64];
+};
+
+The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
+for vm-wide capabilities.
+
+4.38 KVM_GET_MP_STATE
+
+Capability: KVM_CAP_MP_STATE
+Architectures: x86, s390, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_mp_state (out)
+Returns: 0 on success; -1 on error
+
+struct kvm_mp_state {
+	__u32 mp_state;
+};
+
+Returns the vcpu's current "multiprocessing state" (though also valid on
+uniprocessor guests).
+
+Possible values are:
+
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86,arm/arm64]
+ - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
+                                 which has not yet received an INIT signal [x86]
+ - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
+                                 now ready for a SIPI [x86]
+ - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
+                                 is waiting for an interrupt [x86]
+ - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
+ - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390,arm/arm64]
+ - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
+ - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
+                                 [s390]
+ - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
+                                 [s390]
+
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
+
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
+
+4.39 KVM_SET_MP_STATE
+
+Capability: KVM_CAP_MP_STATE
+Architectures: x86, s390, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_mp_state (in)
+Returns: 0 on success; -1 on error
+
+Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
+arguments.
+
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
+
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
+
+4.40 KVM_SET_IDENTITY_MAP_ADDR
+
+Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long identity (in)
+Returns: 0 on success, -1 on error
+
+This ioctl defines the physical address of a one-page region in the guest
+physical address space.  The region must be within the first 4GB of the
+guest physical address space and must not conflict with any memory slot
+or any mmio address.  The guest may malfunction if it accesses this memory
+region.
+
+Setting the address to 0 will result in resetting the address to its default
+(0xfffbc000).
+
+This ioctl is required on Intel-based hosts.  This is needed on Intel hardware
+because of a quirk in the virtualization implementation (see the internals
+documentation when it pops into existence).
+
+Fails if any VCPU has already been created.
+
+4.41 KVM_SET_BOOT_CPU_ID
+
+Capability: KVM_CAP_SET_BOOT_CPU_ID
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long vcpu_id
+Returns: 0 on success, -1 on error
+
+Define which vcpu is the Bootstrap Processor (BSP).  Values are the same
+as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
+is vcpu 0.
+
+
+4.42 KVM_GET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+This ioctl would copy current vcpu's xsave struct to the userspace.
+
+
+4.43 KVM_SET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+	__u32 region[1024];
+};
+
+This ioctl would copy userspace's xsave struct to the kernel.
+
+
+4.44 KVM_GET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
+This ioctl would copy current vcpu's xcrs to the userspace.
+
+
+4.45 KVM_SET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+	__u32 xcr;
+	__u32 reserved;
+	__u64 value;
+};
+
+struct kvm_xcrs {
+	__u32 nr_xcrs;
+	__u32 flags;
+	struct kvm_xcr xcrs[KVM_MAX_XCRS];
+	__u64 padding[16];
+};
+
+This ioctl would set vcpu's xcr to the value userspace specified.
+
+
+4.46 KVM_GET_SUPPORTED_CPUID
+
+Capability: KVM_CAP_EXT_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are supported by both the
+hardware and kvm in its default configuration.  Userspace can use the
+information returned by this ioctl to construct cpuid information (for
+KVM_SET_CPUID2) that is consistent with hardware, kernel, and
+userspace capabilities, and with user requirements (for example, the
+user may wish to constrain cpuid to emulate older hardware, or for
+feature consistency across a cluster).
+
+Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
+expose cpuid features (e.g. MONITOR) which are not supported by kvm in
+its default configuration. If userspace enables such capabilities, it
+is responsible for modifying the results of this ioctl appropriately.
+
+Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
+with the 'nent' field indicating the number of entries in the variable-size
+array 'entries'.  If the number of entries is too low to describe the cpu
+capabilities, an error (E2BIG) is returned.  If the number is too high,
+the 'nent' field is adjusted and an error (ENOMEM) is returned.  If the
+number is just right, the 'nent' field is adjusted to the number of valid
+entries in the 'entries' array, which is then filled.
+
+The entries returned are the host cpuid as returned by the cpuid instruction,
+with unknown or unsupported features masked out.  Some features (for example,
+x2apic), may not be present in the host cpu, but are exposed by kvm if it can
+emulate them efficiently. The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
+as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
+support.  Instead it is reported via
+
+  ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
+
+if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
+feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
+
+
+4.47 KVM_PPC_GET_PVINFO
+
+Capability: KVM_CAP_PPC_GET_PVINFO
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_pvinfo (out)
+Returns: 0 on success, !0 on error
+
+struct kvm_ppc_pvinfo {
+	__u32 flags;
+	__u32 hcall[4];
+	__u8  pad[108];
+};
+
+This ioctl fetches PV specific information that need to be passed to the guest
+using the device tree or other means from vm context.
+
+The hcall array defines 4 instructions that make up a hypercall.
+
+If any additional field gets added to this structure later on, a bit for that
+additional piece of information will be set in the flags bitmap.
+
+The flags bitmap is defined as:
+
+   /* the host supports the ePAPR idle hcall
+   #define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
+
+4.52 KVM_SET_GSI_ROUTING
+
+Capability: KVM_CAP_IRQ_ROUTING
+Architectures: x86 s390 arm arm64
+Type: vm ioctl
+Parameters: struct kvm_irq_routing (in)
+Returns: 0 on success, -1 on error
+
+Sets the GSI routing table entries, overwriting any previously set entries.
+
+On arm/arm64, GSI routing has the following limitation:
+- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
+
+struct kvm_irq_routing {
+	__u32 nr;
+	__u32 flags;
+	struct kvm_irq_routing_entry entries[0];
+};
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_entry {
+	__u32 gsi;
+	__u32 type;
+	__u32 flags;
+	__u32 pad;
+	union {
+		struct kvm_irq_routing_irqchip irqchip;
+		struct kvm_irq_routing_msi msi;
+		struct kvm_irq_routing_s390_adapter adapter;
+		struct kvm_irq_routing_hv_sint hv_sint;
+		__u32 pad[8];
+	} u;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
+
+flags:
+- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
+  type, specifies that the devid field contains a valid value.  The per-VM
+  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+  the device ID.  If this capability is not available, userspace should
+  never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
+- zero otherwise
+
+struct kvm_irq_routing_irqchip {
+	__u32 irqchip;
+	__u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+	__u32 address_lo;
+	__u32 address_hi;
+	__u32 data;
+	union {
+		__u32 pad;
+		__u32 devid;
+	};
+};
+
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message.  For PCI, this is usually a
+BFD identifier in the lower 16 bits.
+
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
+address_hi must be zero.
+
+struct kvm_irq_routing_s390_adapter {
+	__u64 ind_addr;
+	__u64 summary_addr;
+	__u64 ind_offset;
+	__u32 summary_offset;
+	__u32 adapter_id;
+};
+
+struct kvm_irq_routing_hv_sint {
+	__u32 vcpu;
+	__u32 sint;
+};
+
+
+4.55 KVM_SET_TSC_KHZ
+
+Capability: KVM_CAP_TSC_CONTROL
+Architectures: x86
+Type: vcpu ioctl
+Parameters: virtual tsc_khz
+Returns: 0 on success, -1 on error
+
+Specifies the tsc frequency for the virtual machine. The unit of the
+frequency is KHz.
+
+
+4.56 KVM_GET_TSC_KHZ
+
+Capability: KVM_CAP_GET_TSC_KHZ
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: virtual tsc-khz on success, negative value on error
+
+Returns the tsc frequency of the guest. The unit of the return value is
+KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
+error.
+
+
+4.57 KVM_GET_LAPIC
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_lapic_state (out)
+Returns: 0 on success, -1 on error
+
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+	char regs[KVM_APIC_REG_SIZE];
+};
+
+Reads the Local APIC registers and copies them into the input argument.  The
+data format and layout are the same as documented in the architecture manual.
+
+If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
+enabled, then the format of APIC_ID register depends on the APIC mode
+(reported by MSR_IA32_APICBASE) of its VCPU.  x2APIC stores APIC ID in
+the APIC_ID register (bytes 32-35).  xAPIC only allows an 8-bit APIC ID
+which is stored in bits 31-24 of the APIC register, or equivalently in
+byte 35 of struct kvm_lapic_state's regs field.  KVM_GET_LAPIC must then
+be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
+
+If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
+always uses xAPIC format.
+
+
+4.58 KVM_SET_LAPIC
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_lapic_state (in)
+Returns: 0 on success, -1 on error
+
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+	char regs[KVM_APIC_REG_SIZE];
+};
+
+Copies the input argument into the Local APIC registers.  The data format
+and layout are the same as documented in the architecture manual.
+
+The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
+regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
+See the note in KVM_GET_LAPIC.
+
+
+4.59 KVM_IOEVENTFD
+
+Capability: KVM_CAP_IOEVENTFD
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_ioeventfd (in)
+Returns: 0 on success, !0 on error
+
+This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
+within the guest.  A guest write in the registered address will signal the
+provided event instead of triggering an exit.
+
+struct kvm_ioeventfd {
+	__u64 datamatch;
+	__u64 addr;        /* legal pio/mmio address */
+	__u32 len;         /* 0, 1, 2, 4, or 8 bytes    */
+	__s32 fd;
+	__u32 flags;
+	__u8  pad[36];
+};
+
+For the special case of virtio-ccw devices on s390, the ioevent is matched
+to a subchannel/virtqueue tuple instead.
+
+The following flags are defined:
+
+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
+#define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
+#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
+
+If datamatch flag is set, the event will be signaled only if the written value
+to the registered address is equal to datamatch in struct kvm_ioeventfd.
+
+For virtio-ccw devices, addr contains the subchannel id and datamatch the
+virtqueue index.
+
+With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
+the kernel will ignore the length of guest write and may get a faster vmexit.
+The speedup may only apply to specific architectures, but the ioeventfd will
+work anyway.
+
+4.60 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+	__u64 bitmap;
+	__u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array.  This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything.  It must
+be set to the number of set bits in the bitmap.
+
+
+4.62 KVM_CREATE_SPAPR_TCE
+
+Capability: KVM_CAP_SPAPR_TCE
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_create_spapr_tce (in)
+Returns: file descriptor for manipulating the created TCE table
+
+This creates a virtual TCE (translation control entry) table, which
+is an IOMMU for PAPR-style virtual I/O.  It is used to translate
+logical addresses used in virtual I/O into guest physical addresses,
+and provides a scatter/gather capability for PAPR virtual I/O.
+
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+	__u64 liobn;
+	__u32 window_size;
+};
+
+The liobn field gives the logical IO bus number for which to create a
+TCE table.  The window_size field specifies the size of the DMA window
+which this TCE table will translate - the table will contain one 64
+bit TCE entry for every 4kiB of the DMA window.
+
+When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
+table has been created using this ioctl(), the kernel will handle it
+in real mode, updating the TCE table.  H_PUT_TCE calls for other
+liobns will cause a vm exit and must be handled by userspace.
+
+The return value is a file descriptor which can be passed to mmap(2)
+to map the created TCE table into userspace.  This lets userspace read
+the entries written by kernel-handled H_PUT_TCE calls, and also lets
+userspace update the TCE table directly which is useful in some
+circumstances.
+
+
+4.63 KVM_ALLOCATE_RMA
+
+Capability: KVM_CAP_PPC_RMA
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_allocate_rma (out)
+Returns: file descriptor for mapping the allocated RMA
+
+This allocates a Real Mode Area (RMA) from the pool allocated at boot
+time by the kernel.  An RMA is a physically-contiguous, aligned region
+of memory used on older POWER processors to provide the memory which
+will be accessed by real-mode (MMU off) accesses in a KVM guest.
+POWER processors support a set of sizes for the RMA that usually
+includes 64MB, 128MB, 256MB and some larger powers of two.
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+	__u64 rma_size;
+};
+
+The return value is a file descriptor which can be passed to mmap(2)
+to map the allocated RMA into userspace.  The mapped area can then be
+passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
+RMA for a virtual machine.  The size of the RMA in bytes (which is
+fixed at host kernel boot time) is returned in the rma_size field of
+the argument structure.
+
+The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
+is supported; 2 if the processor requires all virtual machines to have
+an RMA, or 1 if the processor can use an RMA but doesn't require it,
+because it supports the Virtual RMA (VRMA) facility.
+
+
+4.64 KVM_NMI
+
+Capability: KVM_CAP_USER_NMI
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an NMI on the thread's vcpu.  Note this is well defined only
+when KVM_CREATE_IRQCHIP has not been called, since this is an interface
+between the virtual cpu core and virtual local APIC.  After KVM_CREATE_IRQCHIP
+has been called, this interface is completely emulated within the kernel.
+
+To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the
+following algorithm:
+
+  - pause the vcpu
+  - read the local APIC's state (KVM_GET_LAPIC)
+  - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1)
+  - if so, issue KVM_NMI
+  - resume the vcpu
+
+Some guests configure the LINT1 NMI input to cause a panic, aiding in
+debugging.
+
+
+4.65 KVM_S390_UCAS_MAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+	struct kvm_s390_ucas_mapping {
+		__u64 user_addr;
+		__u64 vcpu_addr;
+		__u64 length;
+	};
+
+This ioctl maps the memory at "user_addr" with the length "length" to
+the vcpu's address space starting at "vcpu_addr". All parameters need to
+be aligned by 1 megabyte.
+
+
+4.66 KVM_S390_UCAS_UNMAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+	struct kvm_s390_ucas_mapping {
+		__u64 user_addr;
+		__u64 vcpu_addr;
+		__u64 length;
+	};
+
+This ioctl unmaps the memory in the vcpu's address space starting at
+"vcpu_addr" with the length "length". The field "user_addr" is ignored.
+All parameters need to be aligned by 1 megabyte.
+
+
+4.67 KVM_S390_VCPU_FAULT
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: vcpu absolute address (in)
+Returns: 0 in case of success
+
+This call creates a page table entry on the virtual cpu's address space
+(for user controlled virtual machines) or the virtual machine's address
+space (for regular virtual machines). This only works for minor faults,
+thus it's recommended to access subject memory page via the user page
+table upfront. This is useful to handle validity intercepts for user
+controlled virtual machines to fault in the virtual cpu's lowcore pages
+prior to calling the KVM_RUN ioctl.
+
+
+4.68 KVM_SET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in)
+Returns: 0 on success, negative value on failure
+Errors:
+  ENOENT:   no such register
+  EINVAL:   invalid register ID, or no such register
+  EPERM:    (arm64) register access not allowed before vcpu finalization
+(These error codes are indicative only: do not rely on a specific error
+code being returned in a specific situation.)
+
+struct kvm_one_reg {
+       __u64 id;
+       __u64 addr;
+};
+
+Using this ioctl, a single vcpu register can be set to a specific value
+defined by user space with the passed in struct kvm_one_reg, where id
+refers to the register identifier as described below and addr is a pointer
+to a variable with the respective size. There can be architecture agnostic
+and architecture specific registers. Each have their own range of operation
+and their own constants and width. To keep track of the implemented
+registers, find a list below:
+
+  Arch  |           Register            | Width (bits)
+        |                               |
+  PPC   | KVM_REG_PPC_HIOR              | 64
+  PPC   | KVM_REG_PPC_IAC1              | 64
+  PPC   | KVM_REG_PPC_IAC2              | 64
+  PPC   | KVM_REG_PPC_IAC3              | 64
+  PPC   | KVM_REG_PPC_IAC4              | 64
+  PPC   | KVM_REG_PPC_DAC1              | 64
+  PPC   | KVM_REG_PPC_DAC2              | 64
+  PPC   | KVM_REG_PPC_DABR              | 64
+  PPC   | KVM_REG_PPC_DSCR              | 64
+  PPC   | KVM_REG_PPC_PURR              | 64
+  PPC   | KVM_REG_PPC_SPURR             | 64
+  PPC   | KVM_REG_PPC_DAR               | 64
+  PPC   | KVM_REG_PPC_DSISR             | 32
+  PPC   | KVM_REG_PPC_AMR               | 64
+  PPC   | KVM_REG_PPC_UAMOR             | 64
+  PPC   | KVM_REG_PPC_MMCR0             | 64
+  PPC   | KVM_REG_PPC_MMCR1             | 64
+  PPC   | KVM_REG_PPC_MMCRA             | 64
+  PPC   | KVM_REG_PPC_MMCR2             | 64
+  PPC   | KVM_REG_PPC_MMCRS             | 64
+  PPC   | KVM_REG_PPC_SIAR              | 64
+  PPC   | KVM_REG_PPC_SDAR              | 64
+  PPC   | KVM_REG_PPC_SIER              | 64
+  PPC   | KVM_REG_PPC_PMC1              | 32
+  PPC   | KVM_REG_PPC_PMC2              | 32
+  PPC   | KVM_REG_PPC_PMC3              | 32
+  PPC   | KVM_REG_PPC_PMC4              | 32
+  PPC   | KVM_REG_PPC_PMC5              | 32
+  PPC   | KVM_REG_PPC_PMC6              | 32
+  PPC   | KVM_REG_PPC_PMC7              | 32
+  PPC   | KVM_REG_PPC_PMC8              | 32
+  PPC   | KVM_REG_PPC_FPR0              | 64
+          ...
+  PPC   | KVM_REG_PPC_FPR31             | 64
+  PPC   | KVM_REG_PPC_VR0               | 128
+          ...
+  PPC   | KVM_REG_PPC_VR31              | 128
+  PPC   | KVM_REG_PPC_VSR0              | 128
+          ...
+  PPC   | KVM_REG_PPC_VSR31             | 128
+  PPC   | KVM_REG_PPC_FPSCR             | 64
+  PPC   | KVM_REG_PPC_VSCR              | 32
+  PPC   | KVM_REG_PPC_VPA_ADDR          | 64
+  PPC   | KVM_REG_PPC_VPA_SLB           | 128
+  PPC   | KVM_REG_PPC_VPA_DTL           | 128
+  PPC   | KVM_REG_PPC_EPCR              | 32
+  PPC   | KVM_REG_PPC_EPR               | 32
+  PPC   | KVM_REG_PPC_TCR               | 32
+  PPC   | KVM_REG_PPC_TSR               | 32
+  PPC   | KVM_REG_PPC_OR_TSR            | 32
+  PPC   | KVM_REG_PPC_CLEAR_TSR         | 32
+  PPC   | KVM_REG_PPC_MAS0              | 32
+  PPC   | KVM_REG_PPC_MAS1              | 32
+  PPC   | KVM_REG_PPC_MAS2              | 64
+  PPC   | KVM_REG_PPC_MAS7_3            | 64
+  PPC   | KVM_REG_PPC_MAS4              | 32
+  PPC   | KVM_REG_PPC_MAS6              | 32
+  PPC   | KVM_REG_PPC_MMUCFG            | 32
+  PPC   | KVM_REG_PPC_TLB0CFG           | 32
+  PPC   | KVM_REG_PPC_TLB1CFG           | 32
+  PPC   | KVM_REG_PPC_TLB2CFG           | 32
+  PPC   | KVM_REG_PPC_TLB3CFG           | 32
+  PPC   | KVM_REG_PPC_TLB0PS            | 32
+  PPC   | KVM_REG_PPC_TLB1PS            | 32
+  PPC   | KVM_REG_PPC_TLB2PS            | 32
+  PPC   | KVM_REG_PPC_TLB3PS            | 32
+  PPC   | KVM_REG_PPC_EPTCFG            | 32
+  PPC   | KVM_REG_PPC_ICP_STATE         | 64
+  PPC   | KVM_REG_PPC_VP_STATE          | 128
+  PPC   | KVM_REG_PPC_TB_OFFSET         | 64
+  PPC   | KVM_REG_PPC_SPMC1             | 32
+  PPC   | KVM_REG_PPC_SPMC2             | 32
+  PPC   | KVM_REG_PPC_IAMR              | 64
+  PPC   | KVM_REG_PPC_TFHAR             | 64
+  PPC   | KVM_REG_PPC_TFIAR             | 64
+  PPC   | KVM_REG_PPC_TEXASR            | 64
+  PPC   | KVM_REG_PPC_FSCR              | 64
+  PPC   | KVM_REG_PPC_PSPB              | 32
+  PPC   | KVM_REG_PPC_EBBHR             | 64
+  PPC   | KVM_REG_PPC_EBBRR             | 64
+  PPC   | KVM_REG_PPC_BESCR             | 64
+  PPC   | KVM_REG_PPC_TAR               | 64
+  PPC   | KVM_REG_PPC_DPDES             | 64
+  PPC   | KVM_REG_PPC_DAWR              | 64
+  PPC   | KVM_REG_PPC_DAWRX             | 64
+  PPC   | KVM_REG_PPC_CIABR             | 64
+  PPC   | KVM_REG_PPC_IC                | 64
+  PPC   | KVM_REG_PPC_VTB               | 64
+  PPC   | KVM_REG_PPC_CSIGR             | 64
+  PPC   | KVM_REG_PPC_TACR              | 64
+  PPC   | KVM_REG_PPC_TCSCR             | 64
+  PPC   | KVM_REG_PPC_PID               | 64
+  PPC   | KVM_REG_PPC_ACOP              | 64
+  PPC   | KVM_REG_PPC_VRSAVE            | 32
+  PPC   | KVM_REG_PPC_LPCR              | 32
+  PPC   | KVM_REG_PPC_LPCR_64           | 64
+  PPC   | KVM_REG_PPC_PPR               | 64
+  PPC   | KVM_REG_PPC_ARCH_COMPAT       | 32
+  PPC   | KVM_REG_PPC_DABRX             | 32
+  PPC   | KVM_REG_PPC_WORT              | 64
+  PPC	| KVM_REG_PPC_SPRG9             | 64
+  PPC	| KVM_REG_PPC_DBSR              | 32
+  PPC   | KVM_REG_PPC_TIDR              | 64
+  PPC   | KVM_REG_PPC_PSSCR             | 64
+  PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
+  PPC   | KVM_REG_PPC_PTCR              | 64
+  PPC   | KVM_REG_PPC_TM_GPR0           | 64
+          ...
+  PPC   | KVM_REG_PPC_TM_GPR31          | 64
+  PPC   | KVM_REG_PPC_TM_VSR0           | 128
+          ...
+  PPC   | KVM_REG_PPC_TM_VSR63          | 128
+  PPC   | KVM_REG_PPC_TM_CR             | 64
+  PPC   | KVM_REG_PPC_TM_LR             | 64
+  PPC   | KVM_REG_PPC_TM_CTR            | 64
+  PPC   | KVM_REG_PPC_TM_FPSCR          | 64
+  PPC   | KVM_REG_PPC_TM_AMR            | 64
+  PPC   | KVM_REG_PPC_TM_PPR            | 64
+  PPC   | KVM_REG_PPC_TM_VRSAVE         | 64
+  PPC   | KVM_REG_PPC_TM_VSCR           | 32
+  PPC   | KVM_REG_PPC_TM_DSCR           | 64
+  PPC   | KVM_REG_PPC_TM_TAR            | 64
+  PPC   | KVM_REG_PPC_TM_XER            | 64
+        |                               |
+  MIPS  | KVM_REG_MIPS_R0               | 64
+          ...
+  MIPS  | KVM_REG_MIPS_R31              | 64
+  MIPS  | KVM_REG_MIPS_HI               | 64
+  MIPS  | KVM_REG_MIPS_LO               | 64
+  MIPS  | KVM_REG_MIPS_PC               | 64
+  MIPS  | KVM_REG_MIPS_CP0_INDEX        | 32
+  MIPS  | KVM_REG_MIPS_CP0_ENTRYLO0     | 64
+  MIPS  | KVM_REG_MIPS_CP0_ENTRYLO1     | 64
+  MIPS  | KVM_REG_MIPS_CP0_CONTEXT      | 64
+  MIPS  | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32
+  MIPS  | KVM_REG_MIPS_CP0_USERLOCAL    | 64
+  MIPS  | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64
+  MIPS  | KVM_REG_MIPS_CP0_PAGEMASK     | 32
+  MIPS  | KVM_REG_MIPS_CP0_PAGEGRAIN    | 32
+  MIPS  | KVM_REG_MIPS_CP0_SEGCTL0      | 64
+  MIPS  | KVM_REG_MIPS_CP0_SEGCTL1      | 64
+  MIPS  | KVM_REG_MIPS_CP0_SEGCTL2      | 64
+  MIPS  | KVM_REG_MIPS_CP0_PWBASE       | 64
+  MIPS  | KVM_REG_MIPS_CP0_PWFIELD      | 64
+  MIPS  | KVM_REG_MIPS_CP0_PWSIZE       | 64
+  MIPS  | KVM_REG_MIPS_CP0_WIRED        | 32
+  MIPS  | KVM_REG_MIPS_CP0_PWCTL        | 32
+  MIPS  | KVM_REG_MIPS_CP0_HWRENA       | 32
+  MIPS  | KVM_REG_MIPS_CP0_BADVADDR     | 64
+  MIPS  | KVM_REG_MIPS_CP0_BADINSTR     | 32
+  MIPS  | KVM_REG_MIPS_CP0_BADINSTRP    | 32
+  MIPS  | KVM_REG_MIPS_CP0_COUNT        | 32
+  MIPS  | KVM_REG_MIPS_CP0_ENTRYHI      | 64
+  MIPS  | KVM_REG_MIPS_CP0_COMPARE      | 32
+  MIPS  | KVM_REG_MIPS_CP0_STATUS       | 32
+  MIPS  | KVM_REG_MIPS_CP0_INTCTL       | 32
+  MIPS  | KVM_REG_MIPS_CP0_CAUSE        | 32
+  MIPS  | KVM_REG_MIPS_CP0_EPC          | 64
+  MIPS  | KVM_REG_MIPS_CP0_PRID         | 32
+  MIPS  | KVM_REG_MIPS_CP0_EBASE        | 64
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG       | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG1      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG2      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG3      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG4      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG5      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG7      | 32
+  MIPS  | KVM_REG_MIPS_CP0_XCONTEXT     | 64
+  MIPS  | KVM_REG_MIPS_CP0_ERROREPC     | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH1    | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH2    | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH3    | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH4    | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH5    | 64
+  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH6    | 64
+  MIPS  | KVM_REG_MIPS_CP0_MAAR(0..63)  | 64
+  MIPS  | KVM_REG_MIPS_COUNT_CTL        | 64
+  MIPS  | KVM_REG_MIPS_COUNT_RESUME     | 64
+  MIPS  | KVM_REG_MIPS_COUNT_HZ         | 64
+  MIPS  | KVM_REG_MIPS_FPR_32(0..31)    | 32
+  MIPS  | KVM_REG_MIPS_FPR_64(0..31)    | 64
+  MIPS  | KVM_REG_MIPS_VEC_128(0..31)   | 128
+  MIPS  | KVM_REG_MIPS_FCR_IR           | 32
+  MIPS  | KVM_REG_MIPS_FCR_CSR          | 32
+  MIPS  | KVM_REG_MIPS_MSA_IR           | 32
+  MIPS  | KVM_REG_MIPS_MSA_CSR          | 32
+
+ARM registers are mapped using the lower 32 bits.  The upper 16 of that
+is the register group type, or coprocessor number:
+
+ARM core registers have the following id bit patterns:
+  0x4020 0000 0010 <index into the kvm_regs struct:16>
+
+ARM 32-bit CP15 registers have the following id bit patterns:
+  0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
+
+ARM 64-bit CP15 registers have the following id bit patterns:
+  0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
+
+ARM CCSIDR registers are demultiplexed by CSSELR value:
+  0x4020 0000 0011 00 <csselr:8>
+
+ARM 32-bit VFP control registers have the following id bit patterns:
+  0x4020 0000 0012 1 <regno:12>
+
+ARM 64-bit FP registers have the following id bit patterns:
+  0x4030 0000 0012 0 <regno:12>
+
+ARM firmware pseudo-registers have the following bit pattern:
+  0x4030 0000 0014 <regno:16>
+
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+  0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+Specifically:
+    Encoding            Register  Bits  kvm_regs member
+----------------------------------------------------------------
+  0x6030 0000 0010 0000 X0          64  regs.regs[0]
+  0x6030 0000 0010 0002 X1          64  regs.regs[1]
+    ...
+  0x6030 0000 0010 003c X30         64  regs.regs[30]
+  0x6030 0000 0010 003e SP          64  regs.sp
+  0x6030 0000 0010 0040 PC          64  regs.pc
+  0x6030 0000 0010 0042 PSTATE      64  regs.pstate
+  0x6030 0000 0010 0044 SP_EL1      64  sp_el1
+  0x6030 0000 0010 0046 ELR_EL1     64  elr_el1
+  0x6030 0000 0010 0048 SPSR_EL1    64  spsr[KVM_SPSR_EL1] (alias SPSR_SVC)
+  0x6030 0000 0010 004a SPSR_ABT    64  spsr[KVM_SPSR_ABT]
+  0x6030 0000 0010 004c SPSR_UND    64  spsr[KVM_SPSR_UND]
+  0x6030 0000 0010 004e SPSR_IRQ    64  spsr[KVM_SPSR_IRQ]
+  0x6060 0000 0010 0050 SPSR_FIQ    64  spsr[KVM_SPSR_FIQ]
+  0x6040 0000 0010 0054 V0         128  fp_regs.vregs[0]    (*)
+  0x6040 0000 0010 0058 V1         128  fp_regs.vregs[1]    (*)
+    ...
+  0x6040 0000 0010 00d0 V31        128  fp_regs.vregs[31]   (*)
+  0x6020 0000 0010 00d4 FPSR        32  fp_regs.fpsr
+  0x6020 0000 0010 00d5 FPCR        32  fp_regs.fpcr
+
+(*) These encodings are not accepted for SVE-enabled vcpus.  See
+    KVM_ARM_VCPU_INIT.
+
+    The equivalent register content can be accessed via bits [127:0] of
+    the corresponding SVE Zn registers instead for vcpus that have SVE
+    enabled (see below).
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+  0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+  0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
+arm64 firmware pseudo-registers have the following bit pattern:
+  0x6030 0000 0014 <regno:16>
+
+arm64 SVE registers have the following bit patterns:
+  0x6080 0000 0015 00 <n:5> <slice:5>   Zn bits[2048*slice + 2047 : 2048*slice]
+  0x6050 0000 0015 04 <n:4> <slice:5>   Pn bits[256*slice + 255 : 256*slice]
+  0x6050 0000 0015 060 <slice:5>        FFR bits[256*slice + 255 : 256*slice]
+  0x6060 0000 0015 ffff                 KVM_REG_ARM64_SVE_VLS pseudo-register
+
+Access to register IDs where 2048 * slice >= 128 * max_vq will fail with
+ENOENT.  max_vq is the vcpu's maximum supported vector length in 128-bit
+quadwords: see (**) below.
+
+These registers are only accessible on vcpus for which SVE is enabled.
+See KVM_ARM_VCPU_INIT for details.
+
+In addition, except for KVM_REG_ARM64_SVE_VLS, these registers are not
+accessible until the vcpu's SVE configuration has been finalized
+using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).  See KVM_ARM_VCPU_INIT
+and KVM_ARM_VCPU_FINALIZE for more information about this procedure.
+
+KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector
+lengths supported by the vcpu to be discovered and configured by
+userspace.  When transferred to or from user memory via KVM_GET_ONE_REG
+or KVM_SET_ONE_REG, the value of this register is of type
+__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as
+follows:
+
+__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
+
+if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
+    ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
+		((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1))
+	/* Vector length vq * 16 bytes supported */
+else
+	/* Vector length vq * 16 bytes not supported */
+
+(**) The maximum value vq for which the above condition is true is
+max_vq.  This is the maximum vector length available to the guest on
+this vcpu, and determines which register slices are visible through
+this ioctl interface.
+
+(See Documentation/arm64/sve.rst for an explanation of the "vq"
+nomenclature.)
+
+KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
+KVM_ARM_VCPU_INIT initialises it to the best set of vector lengths that
+the host supports.
+
+Userspace may subsequently modify it if desired until the vcpu's SVE
+configuration is finalized using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).
+
+Apart from simply removing all vector lengths from the host set that
+exceed some value, support for arbitrarily chosen sets of vector lengths
+is hardware-dependent and may not be available.  Attempting to configure
+an invalid set of vector lengths via KVM_SET_ONE_REG will fail with
+EINVAL.
+
+After the vcpu's SVE configuration is finalized, further attempts to
+write this register will fail with EPERM.
+
+
+MIPS registers are mapped using the lower 32 bits.  The upper 16 of that is
+the register group type:
+
+MIPS core registers (see above) have the following id bit patterns:
+  0x7030 0000 0000 <reg:16>
+
+MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit
+patterns depending on whether they're 32-bit or 64-bit registers:
+  0x7020 0000 0001 00 <reg:5> <sel:3>   (32-bit)
+  0x7030 0000 0001 00 <reg:5> <sel:3>   (64-bit)
+
+Note: KVM_REG_MIPS_CP0_ENTRYLO0 and KVM_REG_MIPS_CP0_ENTRYLO1 are the MIPS64
+versions of the EntryLo registers regardless of the word size of the host
+hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
+with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
+the PFNX field starting at bit 30.
+
+MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit
+patterns:
+  0x7030 0000 0001 01 <reg:8>
+
+MIPS KVM control registers (see above) have the following id bit patterns:
+  0x7030 0000 0002 <reg:16>
+
+MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
+id bit patterns depending on the size of the register being accessed. They are
+always accessed according to the current guest FPU mode (Status.FR and
+Config5.FRE), i.e. as the guest would see them, and they become unpredictable
+if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
+registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
+overlap the FPU registers:
+  0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
+  0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
+  0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
+
+MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 01 <0:3> <reg:5>
+
+MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 02 <0:3> <reg:5>
+
+
+4.69 KVM_GET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in and out)
+Returns: 0 on success, negative value on failure
+Errors include:
+  ENOENT:   no such register
+  EINVAL:   invalid register ID, or no such register
+  EPERM:    (arm64) register access not allowed before vcpu finalization
+(These error codes are indicative only: do not rely on a specific error
+code being returned in a specific situation.)
+
+This ioctl allows to receive the value of a single register implemented
+in a vcpu. The register to read is indicated by the "id" field of the
+kvm_one_reg struct passed in. On success, the register value can be found
+at the memory location pointed to by "addr".
+
+The list of registers accessible using this interface is identical to the
+list in 4.68.
+
+
+4.70 KVM_KVMCLOCK_CTRL
+
+Capability: KVM_CAP_KVMCLOCK_CTRL
+Architectures: Any that implement pvclocks (currently x86 only)
+Type: vcpu ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This signals to the host kernel that the specified guest is being paused by
+userspace.  The host will set a flag in the pvclock structure that is checked
+from the soft lockup watchdog.  The flag is part of the pvclock structure that
+is shared between guest and host, specifically the second bit of the flags
+field of the pvclock_vcpu_time_info structure.  It will be set exclusively by
+the host and read/cleared exclusively by the guest.  The guest operation of
+checking and clearing the flag must an atomic operation so
+load-link/store-conditional, or equivalent must be used.  There are two cases
+where the guest will clear the flag: when the soft lockup watchdog timer resets
+itself or when a soft lockup is detected.  This ioctl can be called any time
+after pausing the vcpu, but before it is resumed.
+
+
+4.71 KVM_SIGNAL_MSI
+
+Capability: KVM_CAP_SIGNAL_MSI
+Architectures: x86 arm arm64
+Type: vm ioctl
+Parameters: struct kvm_msi (in)
+Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
+
+Directly inject a MSI message. Only valid with in-kernel irqchip that handles
+MSI messages.
+
+struct kvm_msi {
+	__u32 address_lo;
+	__u32 address_hi;
+	__u32 data;
+	__u32 flags;
+	__u32 devid;
+	__u8  pad[12];
+};
+
+flags: KVM_MSI_VALID_DEVID: devid contains a valid value.  The per-VM
+  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+  the device ID.  If this capability is not available, userspace
+  should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
+
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message.  For PCI, this is usually a
+BFD identifier in the lower 16 bits.
+
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
+address_hi must be zero.
+
+
+4.71 KVM_CREATE_PIT2
+
+Capability: KVM_CAP_PIT2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_config (in)
+Returns: 0 on success, -1 on error
+
+Creates an in-kernel device model for the i8254 PIT. This call is only valid
+after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
+parameters have to be passed:
+
+struct kvm_pit_config {
+	__u32 flags;
+	__u32 pad[15];
+};
+
+Valid flags are:
+
+#define KVM_PIT_SPEAKER_DUMMY     1 /* emulate speaker port stub */
+
+PIT timer interrupts may use a per-VM kernel thread for injection. If it
+exists, this thread will have a name of the following pattern:
+
+kvm-pit/<owner-process-pid>
+
+When running a guest with elevated priorities, the scheduling parameters of
+this thread may have to be adjusted accordingly.
+
+This IOCTL replaces the obsolete KVM_CREATE_PIT.
+
+
+4.72 KVM_GET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (out)
+Returns: 0 on success, -1 on error
+
+Retrieves the state of the in-kernel PIT model. Only valid after
+KVM_CREATE_PIT2. The state is returned in the following structure:
+
+struct kvm_pit_state2 {
+	struct kvm_pit_channel_state channels[3];
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+Valid flags are:
+
+/* disable PIT in HPET legacy mode */
+#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
+
+This IOCTL replaces the obsolete KVM_GET_PIT.
+
+
+4.73 KVM_SET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (in)
+Returns: 0 on success, -1 on error
+
+Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
+See KVM_GET_PIT2 for details on struct kvm_pit_state2.
+
+This IOCTL replaces the obsolete KVM_SET_PIT.
+
+
+4.74 KVM_PPC_GET_SMMU_INFO
+
+Capability: KVM_CAP_PPC_GET_SMMU_INFO
+Architectures: powerpc
+Type: vm ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This populates and returns a structure describing the features of
+the "Server" class MMU emulation supported by KVM.
+This can in turn be used by userspace to generate the appropriate
+device-tree properties for the guest operating system.
+
+The structure contains some global information, followed by an
+array of supported segment page sizes:
+
+      struct kvm_ppc_smmu_info {
+	     __u64 flags;
+	     __u32 slb_size;
+	     __u32 pad;
+	     struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+      };
+
+The supported flags are:
+
+    - KVM_PPC_PAGE_SIZES_REAL:
+        When that flag is set, guest page sizes must "fit" the backing
+        store page sizes. When not set, any page size in the list can
+        be used regardless of how they are backed by userspace.
+
+    - KVM_PPC_1T_SEGMENTS
+        The emulated MMU supports 1T segments in addition to the
+        standard 256M ones.
+
+    - KVM_PPC_NO_HASH
+	This flag indicates that HPT guests are not supported by KVM,
+	thus all guests must use radix MMU mode.
+
+The "slb_size" field indicates how many SLB entries are supported
+
+The "sps" array contains 8 entries indicating the supported base
+page sizes for a segment in increasing order. Each entry is defined
+as follow:
+
+   struct kvm_ppc_one_seg_page_size {
+	__u32 page_shift;	/* Base page shift of segment (or 0) */
+	__u32 slb_enc;		/* SLB encoding for BookS */
+	struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+   };
+
+An entry with a "page_shift" of 0 is unused. Because the array is
+organized in increasing order, a lookup can stop when encoutering
+such an entry.
+
+The "slb_enc" field provides the encoding to use in the SLB for the
+page size. The bits are in positions such as the value can directly
+be OR'ed into the "vsid" argument of the slbmte instruction.
+
+The "enc" array is a list which for each of those segment base page
+size provides the list of supported actual page sizes (which can be
+only larger or equal to the base page size), along with the
+corresponding encoding in the hash PTE. Similarly, the array is
+8 entries sorted by increasing sizes and an entry with a "0" shift
+is an empty entry and a terminator:
+
+   struct kvm_ppc_one_page_size {
+	__u32 page_shift;	/* Page shift (or 0) */
+	__u32 pte_enc;		/* Encoding in the HPTE (>>12) */
+   };
+
+The "pte_enc" field provides a value that can OR'ed into the hash
+PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
+into the hash PTE second double word).
+
+4.75 KVM_IRQFD
+
+Capability: KVM_CAP_IRQFD
+Architectures: x86 s390 arm arm64
+Type: vm ioctl
+Parameters: struct kvm_irqfd (in)
+Returns: 0 on success, -1 on error
+
+Allows setting an eventfd to directly trigger a guest interrupt.
+kvm_irqfd.fd specifies the file descriptor to use as the eventfd and
+kvm_irqfd.gsi specifies the irqchip pin toggled by this event.  When
+an event is triggered on the eventfd, an interrupt is injected into
+the guest using the specified gsi pin.  The irqfd is removed using
+the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
+and kvm_irqfd.gsi.
+
+With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
+mechanism allowing emulation of level-triggered, irqfd-based
+interrupts.  When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
+additional eventfd in the kvm_irqfd.resamplefd field.  When operating
+in resample mode, posting of an interrupt through kvm_irq.fd asserts
+the specified gsi in the irqchip.  When the irqchip is resampled, such
+as from an EOI, the gsi is de-asserted and the user is notified via
+kvm_irqfd.resamplefd.  It is the user's responsibility to re-queue
+the interrupt if the device making use of it still requires service.
+Note that closing the resamplefd is not sufficient to disable the
+irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
+and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
+
+On arm/arm64, gsi routing being supported, the following can happen:
+- in case no routing entry is associated to this gsi, injection fails
+- in case the gsi is associated to an irqchip routing entry,
+  irqchip.pin + 32 corresponds to the injected SPI ID.
+- in case the gsi is associated to an MSI routing entry, the MSI
+  message and device ID are translated into an LPI (support restricted
+  to GICv3 ITS in-kernel emulation).
+
+4.76 KVM_PPC_ALLOCATE_HTAB
+
+Capability: KVM_CAP_PPC_ALLOC_HTAB
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to u32 containing hash table order (in/out)
+Returns: 0 on success, -1 on error
+
+This requests the host kernel to allocate an MMU hash table for a
+guest using the PAPR paravirtualization interface.  This only does
+anything if the kernel is configured to use the Book 3S HV style of
+virtualization.  Otherwise the capability doesn't exist and the ioctl
+returns an ENOTTY error.  The rest of this description assumes Book 3S
+HV.
+
+There must be no vcpus running when this ioctl is called; if there
+are, it will do nothing and return an EBUSY error.
+
+The parameter is a pointer to a 32-bit unsigned integer variable
+containing the order (log base 2) of the desired size of the hash
+table, which must be between 18 and 46.  On successful return from the
+ioctl, the value will not be changed by the kernel.
+
+If no hash table has been allocated when any vcpu is asked to run
+(with the KVM_RUN ioctl), the host kernel will allocate a
+default-sized hash table (16 MB).
+
+If this ioctl is called when a hash table has already been allocated,
+with a different order from the existing hash table, the existing hash
+table will be freed and a new one allocated.  If this is ioctl is
+called when a hash table has already been allocated of the same order
+as specified, the kernel will clear out the existing hash table (zero
+all HPTEs).  In either case, if the guest is using the virtualized
+real-mode area (VRMA) facility, the kernel will re-create the VMRA
+HPTEs on the next KVM_RUN of any vcpu.
+
+4.77 KVM_S390_INTERRUPT
+
+Capability: basic
+Architectures: s390
+Type: vm ioctl, vcpu ioctl
+Parameters: struct kvm_s390_interrupt (in)
+Returns: 0 on success, -1 on error
+
+Allows to inject an interrupt to the guest. Interrupts can be floating
+(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type.
+
+Interrupt parameters are passed via kvm_s390_interrupt:
+
+struct kvm_s390_interrupt {
+	__u32 type;
+	__u32 parm;
+	__u64 parm64;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
+KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
+KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
+KVM_S390_RESTART (vcpu) - restart
+KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
+KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
+KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
+			   parameters in parm and parm64
+KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
+KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
+KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
+KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
+    I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
+    I/O interruption parameters in parm (subchannel) and parm64 (intparm,
+    interruption subclass)
+KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
+                           machine check interrupt code in parm64 (note that
+                           machine checks needing further payload are not
+                           supported by this ioctl)
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
+4.78 KVM_PPC_GET_HTAB_FD
+
+Capability: KVM_CAP_PPC_HTAB_FD
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to struct kvm_get_htab_fd (in)
+Returns: file descriptor number (>= 0) on success, -1 on error
+
+This returns a file descriptor that can be used either to read out the
+entries in the guest's hashed page table (HPT), or to write entries to
+initialize the HPT.  The returned fd can only be written to if the
+KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
+can only be read if that bit is clear.  The argument struct looks like
+this:
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+	__u64	flags;
+	__u64	start_index;
+	__u64	reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY	((__u64)0x1)
+#define KVM_GET_HTAB_WRITE		((__u64)0x2)
+
+The `start_index' field gives the index in the HPT of the entry at
+which to start reading.  It is ignored when writing.
+
+Reads on the fd will initially supply information about all
+"interesting" HPT entries.  Interesting entries are those with the
+bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
+all entries.  When the end of the HPT is reached, the read() will
+return.  If read() is called again on the fd, it will start again from
+the beginning of the HPT, but will only return HPT entries that have
+changed since they were last read.
+
+Data read or written is structured as a header (8 bytes) followed by a
+series of valid HPT entries (16 bytes) each.  The header indicates how
+many valid HPT entries there are and how many invalid entries follow
+the valid entries.  The invalid entries are not represented explicitly
+in the stream.  The header format is:
+
+struct kvm_get_htab_header {
+	__u32	index;
+	__u16	n_valid;
+	__u16	n_invalid;
+};
+
+Writes to the fd create HPT entries starting at the index given in the
+header; first `n_valid' valid entries with contents from the data
+written, then `n_invalid' invalid entries, invalidating any previously
+valid entries found.
+
+4.79 KVM_CREATE_DEVICE
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: vm ioctl
+Parameters: struct kvm_create_device (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device type is unknown or unsupported
+  EEXIST: Device already created, and this type of device may not
+          be instantiated multiple times
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
+Creates an emulated device in the kernel.  The file descriptor returned
+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
+
+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
+
+Individual devices should not define flags.  Attributes should be used
+for specifying any behavior that is not implied by the device type
+number.
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
+  KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+Type: device ioctl, vm ioctl, vcpu ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+          or hardware support is missing.
+  EPERM:  The attribute cannot (currently) be accessed this way
+          (e.g. read-only attribute, or attribute that only makes
+          sense when the device is in a different state)
+
+  Other error conditions may be defined by individual device types.
+
+Gets/sets a specified piece of device configuration and/or state.  The
+semantics are device-specific.  See individual device documentation in
+the "devices" directory.  As with ONE_REG, the size of the data
+transferred is defined by the particular attribute.
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+4.81 KVM_HAS_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
+  KVM_CAP_VCPU_ATTRIBUTES for vcpu device
+Type: device ioctl, vm ioctl, vcpu ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+          or hardware support is missing.
+
+Tests whether a device supports a particular attribute.  A successful
+return indicates the attribute is implemented.  It does not necessarily
+indicate that the attribute can be read or written in the device's
+current state.  "addr" is ignored.
+
+4.82 KVM_ARM_VCPU_INIT
+
+Capability: basic
+Architectures: arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_vcpu_init (in)
+Returns: 0 on success; -1 on error
+Errors:
+  EINVAL:    the target is unknown, or the combination of features is invalid.
+  ENOENT:    a features bit specified is unknown.
+
+This tells KVM what type of CPU to present to the guest, and what
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
+return ENOEXEC for that vcpu.
+
+Note that because some registers reflect machine topology, all vcpus
+should be created before this ioctl is invoked.
+
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
+Possible features:
+	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
+	  Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+	  and execute guest code when KVM_RUN is called.
+	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
+	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 (or a future revision
+          backward compatible with v0.2) for the CPU.
+	  Depends on KVM_CAP_ARM_PSCI_0_2.
+	- KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
+	  Depends on KVM_CAP_ARM_PMU_V3.
+
+	- KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication
+	  for arm64 only.
+	  Depends on KVM_CAP_ARM_PTRAUTH_ADDRESS.
+	  If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
+	  both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
+	  KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
+	  requested.
+
+	- KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication
+	  for arm64 only.
+	  Depends on KVM_CAP_ARM_PTRAUTH_GENERIC.
+	  If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
+	  both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
+	  KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
+	  requested.
+
+	- KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only).
+	  Depends on KVM_CAP_ARM_SVE.
+	  Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+	   * After KVM_ARM_VCPU_INIT:
+
+	      - KVM_REG_ARM64_SVE_VLS may be read using KVM_GET_ONE_REG: the
+	        initial value of this pseudo-register indicates the best set of
+	        vector lengths possible for a vcpu on this host.
+
+	   * Before KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+	      - KVM_RUN and KVM_GET_REG_LIST are not available;
+
+	      - KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access
+	        the scalable archietctural SVE registers
+	        KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or
+	        KVM_REG_ARM64_SVE_FFR;
+
+	      - KVM_REG_ARM64_SVE_VLS may optionally be written using
+	        KVM_SET_ONE_REG, to modify the set of vector lengths available
+	        for the vcpu.
+
+	   * After KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
+
+	      - the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can
+	        no longer be written using KVM_SET_ONE_REG.
+
+4.83 KVM_ARM_PREFERRED_TARGET
+
+Capability: basic
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct struct kvm_vcpu_init (out)
+Returns: 0 on success; -1 on error
+Errors:
+  ENODEV:    no preferred target available for the host
+
+This queries KVM for preferred CPU target type which can be emulated
+by KVM on underlying host.
+
+The ioctl returns struct kvm_vcpu_init instance containing information
+about preferred CPU target type and recommended features for it.  The
+kvm_vcpu_init->features bitmap returned will have feature bits set if
+the preferred target recommends setting these features, but this is
+not mandatory.
+
+The information returned by this ioctl can be used to prepare an instance
+of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
+in VCPU matching underlying host.
+
+
+4.84 KVM_GET_REG_LIST
+
+Capability: basic
+Architectures: arm, arm64, mips
+Type: vcpu ioctl
+Parameters: struct kvm_reg_list (in/out)
+Returns: 0 on success; -1 on error
+Errors:
+  E2BIG:     the reg index list is too big to fit in the array specified by
+             the user (the number required will be written into n).
+
+struct kvm_reg_list {
+	__u64 n; /* number of registers in reg[] */
+	__u64 reg[0];
+};
+
+This ioctl returns the guest registers that are supported for the
+KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
+
+
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
+
+Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct kvm_arm_device_address (in)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device id is unknown
+  ENXIO:  Device not supported on current system
+  EEXIST: Address already set
+  E2BIG:  Address outside guest physical address space
+  EBUSY:  Address overlaps with other device range
+
+struct kvm_arm_device_addr {
+	__u64 id;
+	__u64 addr;
+};
+
+Specify a device address in the guest's physical address space where guests
+can access emulated or directly exposed devices, which the host kernel needs
+to know about. The id field is an architecture specific identifier for a
+specific device.
+
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
+
+  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
+  field: |        0x00000000      |     device id   |  addr type id  |
+
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id.  When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
+
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
+
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
+4.87 KVM_SET_GUEST_DEBUG
+
+Capability: KVM_CAP_SET_GUEST_DEBUG
+Architectures: x86, s390, ppc, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_guest_debug (in)
+Returns: 0 on success; -1 on error
+
+struct kvm_guest_debug {
+       __u32 control;
+       __u32 pad;
+       struct kvm_guest_debug_arch arch;
+};
+
+Set up the processor specific debug registers and configure vcpu for
+handling guest debug events. There are two parts to the structure, the
+first a control bitfield indicates the type of debug events to handle
+when running. Common control bits are:
+
+  - KVM_GUESTDBG_ENABLE:        guest debugging is enabled
+  - KVM_GUESTDBG_SINGLESTEP:    the next run should single-step
+
+The top 16 bits of the control field are architecture specific control
+flags which can include the following:
+
+  - KVM_GUESTDBG_USE_SW_BP:     using software breakpoints [x86, arm64]
+  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390, arm64]
+  - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
+  - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
+  - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
+
+For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
+are enabled in memory so we need to ensure breakpoint exceptions are
+correctly trapped and the KVM run loop exits at the breakpoint and not
+running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP
+we need to ensure the guest vCPUs architecture specific registers are
+updated to the correct (supplied) values.
+
+The second part of the structure is architecture specific and
+typically contains a set of debug registers.
+
+For arm64 the number of debug registers is implementation defined and
+can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
+KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
+indicating the number of supported registers.
+
+When debug events exit the main run loop with the reason
+KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
+structure containing architecture specific debug information.
+
+4.88 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 flags;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+  function: the eax value used to obtain the entry
+  index: the ecx value used to obtain the entry (for entries that are
+         affected by ecx)
+  flags: an OR of zero or more of the following:
+        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+           if the index field is valid
+        KVM_CPUID_FLAG_STATEFUL_FUNC:
+           if cpuid for this function returns different values for successive
+           invocations; there will be several entries with the same function,
+           all with this flag set
+        KVM_CPUID_FLAG_STATE_READ_NEXT:
+           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+           the first entry to be read by a cpu
+   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+         this function/index combination
+
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+         > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VCPU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	__u8 ar;		/* the access register number */
+	__u8 reserved[31];	/* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+         keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EINVAL: interrupt type is invalid
+          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+            than the maximum of VCPUs
+  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+          type is KVM_S390_SIGP_STOP and a stop irq is already pending
+          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+            is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+	__u64 type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+		char reserved[64];
+	} u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+This is an asynchronous vcpu ioctl and can be invoked from any thread.
+
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+         -EINVAL if buffer size is 0,
+         -ENOBUFS if buffer size is too small to fit all pending interrupts,
+         -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;        /* will stay unused for compatibility reasons */
+	__u32 len;
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+The structure contains a flags and a reserved field for future extensions. As
+the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
+reserved, these fields can not be used in the future without breaking
+compatibility.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+         -EFAULT if the buffer address was invalid,
+         -EINVAL for an invalid buffer length (see below),
+         -EBUSY if there were already interrupts pending,
+         errors occurring when actually injecting the
+          interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;        /* will stay unused for compatibility reasons */
+	__u32 len;
+	__u32 reserved[4];  /* will stay unused for compatibility reasons */
+};
+
+The restrictions for flags and reserved apply as well.
+(see KVM_S390_GET_IRQ_STATE)
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
+
+4.96 KVM_SMI
+
+Capability: KVM_CAP_X86_SMM
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an SMI on the thread's vcpu.
+
+4.97 KVM_CAP_PPC_MULTITCE
+
+Capability: KVM_CAP_PPC_MULTITCE
+Architectures: ppc
+Type: vm
+
+This capability means the kernel is capable of handling hypercalls
+H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
+space. This significantly accelerates DMA operations for PPC KVM guests.
+User space should expect that its handlers for these hypercalls
+are not going to be called if user space previously registered LIOBN
+in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
+
+In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+The hypercalls mentioned above may or may not be processed successfully
+in the kernel based fast path. If they can not be handled by the kernel,
+they will get passed on to user space. So user space still has to have
+an implementation for these despite the in kernel acceleration.
+
+This capability is always enabled.
+
+4.98 KVM_CREATE_SPAPR_TCE_64
+
+Capability: KVM_CAP_SPAPR_TCE_64
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_create_spapr_tce_64 (in)
+Returns: file descriptor for manipulating the created TCE table
+
+This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
+windows, described in 4.62 KVM_CREATE_SPAPR_TCE
+
+This capability uses extended struct in ioctl interface:
+
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+	__u64 liobn;
+	__u32 page_shift;
+	__u32 flags;
+	__u64 offset;	/* in pages */
+	__u64 size; 	/* in pages */
+};
+
+The aim of extension is to support an additional bigger DMA window with
+a variable page size.
+KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
+a bus offset of the corresponding DMA window, @size and @offset are numbers
+of IOMMU pages.
+
+@flags are not used at the moment.
+
+The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
+
+4.99 KVM_REINJECT_CONTROL
+
+Capability: KVM_CAP_REINJECT_CONTROL
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_reinject_control (in)
+Returns: 0 on success,
+         -EFAULT if struct kvm_reinject_control cannot be read,
+         -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier.
+
+i8254 (PIT) has two modes, reinject and !reinject.  The default is reinject,
+where KVM queues elapsed i8254 ticks and monitors completion of interrupt from
+vector(s) that i8254 injects.  Reinject mode dequeues a tick and injects its
+interrupt whenever there isn't a pending interrupt from i8254.
+!reinject mode injects an interrupt as soon as a tick arrives.
+
+struct kvm_reinject_control {
+	__u8 pit_reinject;
+	__u8 reserved[31];
+};
+
+pit_reinject = 0 (!reinject mode) is recommended, unless running an old
+operating system that uses the PIT for timing (e.g. Linux 2.4.x).
+
+4.100 KVM_PPC_CONFIGURE_V3_MMU
+
+Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_mmuv3_cfg (in)
+Returns: 0 on success,
+         -EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
+         -EINVAL if the configuration is invalid
+
+This ioctl controls whether the guest will use radix or HPT (hashed
+page table) translation, and sets the pointer to the process table for
+the guest.
+
+struct kvm_ppc_mmuv3_cfg {
+	__u64	flags;
+	__u64	process_table;
+};
+
+There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
+KVM_PPC_MMUV3_GTSE.  KVM_PPC_MMUV3_RADIX, if set, configures the guest
+to use radix tree translation, and if clear, to use HPT translation.
+KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
+to be able to use the global TLB and SLB invalidation instructions;
+if clear, the guest may not use these instructions.
+
+The process_table field specifies the address and size of the guest
+process table, which is in the guest's space.  This field is formatted
+as the second doubleword of the partition table entry, as defined in
+the Power ISA V3.00, Book III section 5.7.6.1.
+
+4.101 KVM_PPC_GET_RMMU_INFO
+
+Capability: KVM_CAP_PPC_RADIX_MMU
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_rmmu_info (out)
+Returns: 0 on success,
+	 -EFAULT if struct kvm_ppc_rmmu_info cannot be written,
+	 -EINVAL if no useful information can be returned
+
+This ioctl returns a structure containing two things: (a) a list
+containing supported radix tree geometries, and (b) a list that maps
+page sizes to put in the "AP" (actual page size) field for the tlbie
+(TLB invalidate entry) instruction.
+
+struct kvm_ppc_rmmu_info {
+	struct kvm_ppc_radix_geom {
+		__u8	page_shift;
+		__u8	level_bits[4];
+		__u8	pad[3];
+	}	geometries[8];
+	__u32	ap_encodings[8];
+};
+
+The geometries[] field gives up to 8 supported geometries for the
+radix page table, in terms of the log base 2 of the smallest page
+size, and the number of bits indexed at each level of the tree, from
+the PTE level up to the PGD level in that order.  Any unused entries
+will have 0 in the page_shift field.
+
+The ap_encodings gives the supported page sizes and their AP field
+encodings, encoded with the AP value in the top 3 bits and the log
+base 2 of the page size in the bottom 6 bits.
+
+4.102 KVM_PPC_RESIZE_HPT_PREPARE
+
+Capability: KVM_CAP_SPAPR_RESIZE_HPT
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_resize_hpt (in)
+Returns: 0 on successful completion,
+	 >0 if a new HPT is being prepared, the value is an estimated
+             number of milliseconds until preparation is complete
+         -EFAULT if struct kvm_reinject_control cannot be read,
+	 -EINVAL if the supplied shift or flags are invalid
+	 -ENOMEM if unable to allocate the new HPT
+	 -ENOSPC if there was a hash collision when moving existing
+                  HPT entries to the new HPT
+	 -EIO on other error conditions
+
+Used to implement the PAPR extension for runtime resizing of a guest's
+Hashed Page Table (HPT).  Specifically this starts, stops or monitors
+the preparation of a new potential HPT for the guest, essentially
+implementing the H_RESIZE_HPT_PREPARE hypercall.
+
+If called with shift > 0 when there is no pending HPT for the guest,
+this begins preparation of a new pending HPT of size 2^(shift) bytes.
+It then returns a positive integer with the estimated number of
+milliseconds until preparation is complete.
+
+If called when there is a pending HPT whose size does not match that
+requested in the parameters, discards the existing pending HPT and
+creates a new one as above.
+
+If called when there is a pending HPT of the size requested, will:
+  * If preparation of the pending HPT is already complete, return 0
+  * If preparation of the pending HPT has failed, return an error
+    code, then discard the pending HPT.
+  * If preparation of the pending HPT is still in progress, return an
+    estimated number of milliseconds until preparation is complete.
+
+If called with shift == 0, discards any currently pending HPT and
+returns 0 (i.e. cancels any in-progress preparation).
+
+flags is reserved for future expansion, currently setting any bits in
+flags will result in an -EINVAL.
+
+Normally this will be called repeatedly with the same parameters until
+it returns <= 0.  The first call will initiate preparation, subsequent
+ones will monitor preparation until it completes or fails.
+
+struct kvm_ppc_resize_hpt {
+	__u64 flags;
+	__u32 shift;
+	__u32 pad;
+};
+
+4.103 KVM_PPC_RESIZE_HPT_COMMIT
+
+Capability: KVM_CAP_SPAPR_RESIZE_HPT
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_resize_hpt (in)
+Returns: 0 on successful completion,
+         -EFAULT if struct kvm_reinject_control cannot be read,
+	 -EINVAL if the supplied shift or flags are invalid
+	 -ENXIO is there is no pending HPT, or the pending HPT doesn't
+                 have the requested size
+	 -EBUSY if the pending HPT is not fully prepared
+	 -ENOSPC if there was a hash collision when moving existing
+                  HPT entries to the new HPT
+	 -EIO on other error conditions
+
+Used to implement the PAPR extension for runtime resizing of a guest's
+Hashed Page Table (HPT).  Specifically this requests that the guest be
+transferred to working with the new HPT, essentially implementing the
+H_RESIZE_HPT_COMMIT hypercall.
+
+This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
+returned 0 with the same parameters.  In other cases
+KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
+-EBUSY, though others may be possible if the preparation was started,
+but failed).
+
+This will have undefined effects on the guest if it has not already
+placed itself in a quiescent state where no vcpu will make MMU enabled
+memory accesses.
+
+On succsful completion, the pending HPT will become the guest's active
+HPT and the previous HPT will be discarded.
+
+On failure, the guest will still be operating on its previous HPT.
+
+struct kvm_ppc_resize_hpt {
+	__u64 flags;
+	__u32 shift;
+	__u32 pad;
+};
+
+4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: system ioctl
+Parameters: u64 mce_cap (out)
+Returns: 0 on success, -1 on error
+
+Returns supported MCE capabilities. The u64 mce_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register. Supported
+capabilities will have the corresponding bits set.
+
+4.105 KVM_X86_SETUP_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: u64 mcg_cap (in)
+Returns: 0 on success,
+         -EFAULT if u64 mcg_cap cannot be read,
+         -EINVAL if the requested number of banks is invalid,
+         -EINVAL if requested MCE capability is not supported.
+
+Initializes MCE support for use. The u64 mcg_cap parameter
+has the same format as the MSR_IA32_MCG_CAP register and
+specifies which capabilities should be enabled. The maximum
+supported number of error-reporting banks can be retrieved when
+checking for KVM_CAP_MCE. The supported capabilities can be
+retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
+
+4.106 KVM_X86_SET_MCE
+
+Capability: KVM_CAP_MCE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_x86_mce (in)
+Returns: 0 on success,
+         -EFAULT if struct kvm_x86_mce cannot be read,
+         -EINVAL if the bank number is invalid,
+         -EINVAL if VAL bit is not set in status field.
+
+Inject a machine check error (MCE) into the guest. The input
+parameter is:
+
+struct kvm_x86_mce {
+	__u64 status;
+	__u64 addr;
+	__u64 misc;
+	__u64 mcg_status;
+	__u8 bank;
+	__u8 pad1[7];
+	__u64 pad2[3];
+};
+
+If the MCE being reported is an uncorrected error, KVM will
+inject it as an MCE exception into the guest. If the guest
+MCG_STATUS register reports that an MCE is in progress, KVM
+causes an KVM_EXIT_SHUTDOWN vmexit.
+
+Otherwise, if the MCE is a corrected error, KVM will just
+store it in the corresponding bank (provided this bank is
+not holding a previously reported uncorrected error).
+
+4.107 KVM_S390_GET_CMMA_BITS
+
+Capability: KVM_CAP_S390_CMMA_MIGRATION
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_cmma_log (in, out)
+Returns: 0 on success, a negative value on error
+
+This ioctl is used to get the values of the CMMA bits on the s390
+architecture. It is meant to be used in two scenarios:
+- During live migration to save the CMMA values. Live migration needs
+  to be enabled via the KVM_REQ_START_MIGRATION VM property.
+- To non-destructively peek at the CMMA values, with the flag
+  KVM_S390_CMMA_PEEK set.
+
+The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired
+values are written to a buffer whose location is indicated via the "values"
+member in the kvm_s390_cmma_log struct.  The values in the input struct are
+also updated as needed.
+Each CMMA value takes up one byte.
+
+struct kvm_s390_cmma_log {
+	__u64 start_gfn;
+	__u32 count;
+	__u32 flags;
+	union {
+		__u64 remaining;
+		__u64 mask;
+	};
+	__u64 values;
+};
+
+start_gfn is the number of the first guest frame whose CMMA values are
+to be retrieved,
+
+count is the length of the buffer in bytes,
+
+values points to the buffer where the result will be written to.
+
+If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be
+KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with
+other ioctls.
+
+The result is written in the buffer pointed to by the field values, and
+the values of the input parameter are updated as follows.
+
+Depending on the flags, different actions are performed. The only
+supported flag so far is KVM_S390_CMMA_PEEK.
+
+The default behaviour if KVM_S390_CMMA_PEEK is not set is:
+start_gfn will indicate the first page frame whose CMMA bits were dirty.
+It is not necessarily the same as the one passed as input, as clean pages
+are skipped.
+
+count will indicate the number of bytes actually written in the buffer.
+It can (and very often will) be smaller than the input value, since the
+buffer is only filled until 16 bytes of clean values are found (which
+are then not copied in the buffer). Since a CMMA migration block needs
+the base address and the length, for a total of 16 bytes, we will send
+back some clean data if there is some dirty data afterwards, as long as
+the size of the clean data does not exceed the size of the header. This
+allows to minimize the amount of data to be saved or transferred over
+the network at the expense of more roundtrips to userspace. The next
+invocation of the ioctl will skip over all the clean values, saving
+potentially more than just the 16 bytes we found.
+
+If KVM_S390_CMMA_PEEK is set:
+the existing storage attributes are read even when not in migration
+mode, and no other action is performed;
+
+the output start_gfn will be equal to the input start_gfn,
+
+the output count will be equal to the input count, except if the end of
+memory has been reached.
+
+In both cases:
+the field "remaining" will indicate the total number of dirty CMMA values
+still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is
+not enabled.
+
+mask is unused.
+
+values points to the userspace buffer where the result will be stored.
+
+This ioctl can fail with -ENOMEM if not enough memory can be allocated to
+complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
+KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
+-EFAULT if the userspace address is invalid or if no page table is
+present for the addresses (e.g. when using hugepages).
+
+4.108 KVM_S390_SET_CMMA_BITS
+
+Capability: KVM_CAP_S390_CMMA_MIGRATION
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_cmma_log (in)
+Returns: 0 on success, a negative value on error
+
+This ioctl is used to set the values of the CMMA bits on the s390
+architecture. It is meant to be used during live migration to restore
+the CMMA values, but there are no restrictions on its use.
+The ioctl takes parameters via the kvm_s390_cmma_values struct.
+Each CMMA value takes up one byte.
+
+struct kvm_s390_cmma_log {
+	__u64 start_gfn;
+	__u32 count;
+	__u32 flags;
+	union {
+		__u64 remaining;
+		__u64 mask;
+	};
+	__u64 values;
+};
+
+start_gfn indicates the starting guest frame number,
+
+count indicates how many values are to be considered in the buffer,
+
+flags is not used and must be 0.
+
+mask indicates which PGSTE bits are to be considered.
+
+remaining is not used.
+
+values points to the buffer in userspace where to store the values.
+
+This ioctl can fail with -ENOMEM if not enough memory can be allocated to
+complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
+the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or
+if the flags field was not 0, with -EFAULT if the userspace address is
+invalid, if invalid pages are written to (e.g. after the end of memory)
+or if no page table is present for the addresses (e.g. when using
+hugepages).
+
+4.109 KVM_PPC_GET_CPU_CHAR
+
+Capability: KVM_CAP_PPC_GET_CPU_CHAR
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_cpu_char (out)
+Returns: 0 on successful completion
+	 -EFAULT if struct kvm_ppc_cpu_char cannot be written
+
+This ioctl gives userspace information about certain characteristics
+of the CPU relating to speculative execution of instructions and
+possible information leakage resulting from speculative execution (see
+CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754).  The information is
+returned in struct kvm_ppc_cpu_char, which looks like this:
+
+struct kvm_ppc_cpu_char {
+	__u64	character;		/* characteristics of the CPU */
+	__u64	behaviour;		/* recommended software behaviour */
+	__u64	character_mask;		/* valid bits in character */
+	__u64	behaviour_mask;		/* valid bits in behaviour */
+};
+
+For extensibility, the character_mask and behaviour_mask fields
+indicate which bits of character and behaviour have been filled in by
+the kernel.  If the set of defined bits is extended in future then
+userspace will be able to tell whether it is running on a kernel that
+knows about the new bits.
+
+The character field describes attributes of the CPU which can help
+with preventing inadvertent information disclosure - specifically,
+whether there is an instruction to flash-invalidate the L1 data cache
+(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+to a mode where entries can only be used by the thread that created
+them, whether the bcctr[l] instruction prevents speculation, and
+whether a speculation barrier instruction (ori 31,31,0) is provided.
+
+The behaviour field describes actions that software should take to
+prevent inadvertent information disclosure, and thus describes which
+vulnerabilities the hardware is subject to; specifically whether the
+L1 data cache should be flushed when returning to user mode from the
+kernel, and whether a speculation barrier should be placed between an
+array bounds check and the array access.
+
+These fields use the same bit definitions as the new
+H_GET_CPU_CHARACTERISTICS hypercall.
+
+4.110 KVM_MEMORY_ENCRYPT_OP
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: an opaque platform specific structure (in/out)
+Returns: 0 on success; -1 on error
+
+If the platform supports creating encrypted VMs then this ioctl can be used
+for issuing platform-specific memory encryption commands to manage those
+encrypted VMs.
+
+Currently, this ioctl is used for issuing Secure Encrypted Virtualization
+(SEV) commands on AMD Processors. The SEV commands are defined in
+Documentation/virt/kvm/amd-memory-encryption.rst.
+
+4.111 KVM_MEMORY_ENCRYPT_REG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to register a guest memory region which may
+contain encrypted data (e.g. guest RAM, SMRAM etc).
+
+It is used in the SEV-enabled guest. When encryption is enabled, a guest
+memory region may contain encrypted data. The SEV memory encryption
+engine uses a tweak such that two identical plaintext pages, each at
+different locations will have differing ciphertexts. So swapping or
+moving ciphertext of those pages will not result in plaintext being
+swapped. So relocating (or migrating) physical backing pages for the SEV
+guest will require some additional steps.
+
+Note: The current SEV key management spec does not provide commands to
+swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
+memory region registered with the ioctl.
+
+4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_enc_region (in)
+Returns: 0 on success; -1 on error
+
+This ioctl can be used to unregister the guest memory region registered
+with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
+
+4.113 KVM_HYPERV_EVENTFD
+
+Capability: KVM_CAP_HYPERV_EVENTFD
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_hyperv_eventfd (in)
+
+This ioctl (un)registers an eventfd to receive notifications from the guest on
+the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
+causing a user exit.  SIGNAL_EVENT hypercall with non-zero event flag number
+(bits 24-31) still triggers a KVM_EXIT_HYPERV_HCALL user exit.
+
+struct kvm_hyperv_eventfd {
+	__u32 conn_id;
+	__s32 fd;
+	__u32 flags;
+	__u32 padding[3];
+};
+
+The conn_id field should fit within 24 bits:
+
+#define KVM_HYPERV_CONN_ID_MASK		0x00ffffff
+
+The acceptable values for the flags field are:
+
+#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
+
+Returns: 0 on success,
+	-EINVAL if conn_id or flags is outside the allowed range
+	-ENOENT on deassign if the conn_id isn't registered
+	-EEXIST on assign if the conn_id is already registered
+
+4.114 KVM_GET_NESTED_STATE
+
+Capability: KVM_CAP_NESTED_STATE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_nested_state (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  E2BIG:     the total state size exceeds the value of 'size' specified by
+             the user; the size required will be written into size.
+
+struct kvm_nested_state {
+	__u16 flags;
+	__u16 format;
+	__u32 size;
+
+	union {
+		struct kvm_vmx_nested_state_hdr vmx;
+		struct kvm_svm_nested_state_hdr svm;
+
+		/* Pad the header to 128 bytes.  */
+		__u8 pad[120];
+	} hdr;
+
+	union {
+		struct kvm_vmx_nested_state_data vmx[0];
+		struct kvm_svm_nested_state_data svm[0];
+	} data;
+};
+
+#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
+#define KVM_STATE_NESTED_EVMCS		0x00000004
+
+#define KVM_STATE_NESTED_FORMAT_VMX		0
+#define KVM_STATE_NESTED_FORMAT_SVM		1
+
+#define KVM_STATE_NESTED_VMX_VMCS_SIZE		0x1000
+
+#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE	0x00000001
+#define KVM_STATE_NESTED_VMX_SMM_VMXON		0x00000002
+
+struct kvm_vmx_nested_state_hdr {
+	__u64 vmxon_pa;
+	__u64 vmcs12_pa;
+
+	struct {
+		__u16 flags;
+	} smm;
+};
+
+struct kvm_vmx_nested_state_data {
+	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+};
+
+This ioctl copies the vcpu's nested virtualization state from the kernel to
+userspace.
+
+The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE
+to the KVM_CHECK_EXTENSION ioctl().
+
+4.115 KVM_SET_NESTED_STATE
+
+Capability: KVM_CAP_NESTED_STATE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_nested_state (in)
+Returns: 0 on success, -1 on error
+
+This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
+For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
+
+4.116 KVM_(UN)REGISTER_COALESCED_MMIO
+
+Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio)
+	    KVM_CAP_COALESCED_PIO (for coalesced pio)
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_coalesced_mmio_zone
+Returns: 0 on success, < 0 on error
+
+Coalesced I/O is a performance optimization that defers hardware
+register write emulation so that userspace exits are avoided.  It is
+typically used to reduce the overhead of emulating frequently accessed
+hardware registers.
+
+When a hardware register is configured for coalesced I/O, write accesses
+do not exit to userspace and their value is recorded in a ring buffer
+that is shared between kernel and userspace.
+
+Coalesced I/O is used if one or more write accesses to a hardware
+register can be deferred until a read or a write to another hardware
+register on the same device.  This last access will cause a vmexit and
+userspace will process accesses from the ring buffer before emulating
+it. That will avoid exiting to userspace on repeated writes.
+
+Coalesced pio is based on coalesced mmio. There is little difference
+between coalesced mmio and pio except that coalesced pio records accesses
+to I/O ports.
+
+4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
+
+Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
+Architectures: x86, arm, arm64, mips
+Type: vm ioctl
+Parameters: struct kvm_dirty_log (in)
+Returns: 0 on success, -1 on error
+
+/* for KVM_CLEAR_DIRTY_LOG */
+struct kvm_clear_dirty_log {
+	__u32 slot;
+	__u32 num_pages;
+	__u64 first_page;
+	union {
+		void __user *dirty_bitmap; /* one bit per page */
+		__u64 padding;
+	};
+};
+
+The ioctl clears the dirty status of pages in a memory slot, according to
+the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
+field.  Bit 0 of the bitmap corresponds to page "first_page" in the
+memory slot, and num_pages is the size in bits of the input bitmap.
+first_page must be a multiple of 64; num_pages must also be a multiple of
+64 unless first_page + num_pages is the size of the memory slot.  For each
+bit that is set in the input bitmap, the corresponding page is marked "clean"
+in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
+(for example via write-protection, or by clearing the dirty bit in
+a page table entry).
+
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
+This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
+is enabled; for more information, see the description of the capability.
+However, it can always be used as long as KVM_CHECK_EXTENSION confirms
+that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
+
+4.118 KVM_GET_SUPPORTED_HV_CPUID
+
+Capability: KVM_CAP_HYPERV_CPUID
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+	__u32 nent;
+	__u32 padding;
+	struct kvm_cpuid_entry2 entries[0];
+};
+
+struct kvm_cpuid_entry2 {
+	__u32 function;
+	__u32 index;
+	__u32 flags;
+	__u32 eax;
+	__u32 ebx;
+	__u32 ecx;
+	__u32 edx;
+	__u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features leaves related to Hyper-V emulation in
+KVM.  Userspace can use the information returned by this ioctl to construct
+cpuid information presented to guests consuming Hyper-V enlightenments (e.g.
+Windows or Hyper-V guests).
+
+CPUID feature leaves returned by this ioctl are defined by Hyper-V Top Level
+Functional Specification (TLFS). These leaves can't be obtained with
+KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
+leaves (0x40000000, 0x40000001).
+
+Currently, the following list of CPUID leaves are returned:
+ HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
+ HYPERV_CPUID_INTERFACE
+ HYPERV_CPUID_VERSION
+ HYPERV_CPUID_FEATURES
+ HYPERV_CPUID_ENLIGHTMENT_INFO
+ HYPERV_CPUID_IMPLEMENT_LIMITS
+ HYPERV_CPUID_NESTED_FEATURES
+
+HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
+enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
+
+Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
+with the 'nent' field indicating the number of entries in the variable-size
+array 'entries'.  If the number of entries is too low to describe all Hyper-V
+feature leaves, an error (E2BIG) is returned. If the number is more or equal
+to the number of Hyper-V feature leaves, the 'nent' field is adjusted to the
+number of valid entries in the 'entries' array, which is then filled.
+
+'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
+userspace should not expect to get any particular value there.
+
+4.119 KVM_ARM_VCPU_FINALIZE
+
+Architectures: arm, arm64
+Type: vcpu ioctl
+Parameters: int feature (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EPERM:     feature not enabled, needs configuration, or already finalized
+  EINVAL:    feature unknown or not present
+
+Recognised values for feature:
+  arm64      KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE)
+
+Finalizes the configuration of the specified vcpu feature.
+
+The vcpu must already have been initialised, enabling the affected feature, by
+means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set in
+features[].
+
+For affected vcpu features, this is a mandatory step that must be performed
+before the vcpu is fully usable.
+
+Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be
+configured by use of ioctls such as KVM_SET_ONE_REG.  The exact configuration
+that should be performaned and how to do it are feature-dependent.
+
+Other calls that depend on a particular feature being finalized, such as
+KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with
+-EPERM unless the feature has already been finalized by means of a
+KVM_ARM_VCPU_FINALIZE call.
+
+See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
+using this ioctl.
+
+4.120 KVM_SET_PMU_EVENT_FILTER
+
+Capability: KVM_CAP_PMU_EVENT_FILTER
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pmu_event_filter (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_pmu_event_filter {
+	__u32 action;
+	__u32 nevents;
+	__u32 fixed_counter_bitmap;
+	__u32 flags;
+	__u32 pad[4];
+	__u64 events[0];
+};
+
+This ioctl restricts the set of PMU events that the guest can program.
+The argument holds a list of events which will be allowed or denied.
+The eventsel+umask of each event the guest attempts to program is compared
+against the events field to determine whether the guest should have access.
+The events field only controls general purpose counters; fixed purpose
+counters are controlled by the fixed_counter_bitmap.
+
+No flags are defined yet, the field must be zero.
+
+Valid values for 'action':
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+
+5. The kvm_run structure
+------------------------
+
+Application code obtains a pointer to the kvm_run structure by
+mmap()ing a vcpu fd.  From that point, application code can control
+execution by changing fields in kvm_run prior to calling the KVM_RUN
+ioctl, and obtain information about the reason KVM_RUN returned by
+looking up structure members.
+
+struct kvm_run {
+	/* in */
+	__u8 request_interrupt_window;
+
+Request that KVM_RUN return when it becomes possible to inject external
+interrupts into the guest.  Useful in conjunction with KVM_INTERRUPT.
+
+	__u8 immediate_exit;
+
+This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
+exits immediately, returning -EINTR.  In the common scenario where a
+signal is used to "kick" a VCPU out of KVM_RUN, this field can be used
+to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability.
+Rather than blocking the signal outside KVM_RUN, userspace can set up
+a signal handler that sets run->immediate_exit to a non-zero value.
+
+This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available.
+
+	__u8 padding1[6];
+
+	/* out */
+	__u32 exit_reason;
+
+When KVM_RUN has returned successfully (return value 0), this informs
+application code why KVM_RUN has returned.  Allowable values for this
+field are detailed below.
+
+	__u8 ready_for_interrupt_injection;
+
+If request_interrupt_window has been specified, this field indicates
+an interrupt can be injected now with KVM_INTERRUPT.
+
+	__u8 if_flag;
+
+The value of the current interrupt flag.  Only valid if in-kernel
+local APIC is not used.
+
+	__u16 flags;
+
+More architecture-specific flags detailing state of the VCPU that may
+affect the device's behavior.  The only currently defined flag is
+KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
+VCPU is in system management mode.
+
+	/* in (pre_kvm_run), out (post_kvm_run) */
+	__u64 cr8;
+
+The value of the cr8 register.  Only valid if in-kernel local APIC is
+not used.  Both input and output.
+
+	__u64 apic_base;
+
+The value of the APIC BASE msr.  Only valid if in-kernel local
+APIC is not used.  Both input and output.
+
+	union {
+		/* KVM_EXIT_UNKNOWN */
+		struct {
+			__u64 hardware_exit_reason;
+		} hw;
+
+If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
+reasons.  Further architecture-specific information is available in
+hardware_exit_reason.
+
+		/* KVM_EXIT_FAIL_ENTRY */
+		struct {
+			__u64 hardware_entry_failure_reason;
+		} fail_entry;
+
+If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
+to unknown reasons.  Further architecture-specific information is
+available in hardware_entry_failure_reason.
+
+		/* KVM_EXIT_EXCEPTION */
+		struct {
+			__u32 exception;
+			__u32 error_code;
+		} ex;
+
+Unused.
+
+		/* KVM_EXIT_IO */
+		struct {
+#define KVM_EXIT_IO_IN  0
+#define KVM_EXIT_IO_OUT 1
+			__u8 direction;
+			__u8 size; /* bytes */
+			__u16 port;
+			__u32 count;
+			__u64 data_offset; /* relative to kvm_run start */
+		} io;
+
+If exit_reason is KVM_EXIT_IO, then the vcpu has
+executed a port I/O instruction which could not be satisfied by kvm.
+data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
+where kvm expects application code to place the data for the next
+KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
+
+		/* KVM_EXIT_DEBUG */
+		struct {
+			struct kvm_debug_exit_arch arch;
+		} debug;
+
+If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
+for which architecture specific information is returned.
+
+		/* KVM_EXIT_MMIO */
+		struct {
+			__u64 phys_addr;
+			__u8  data[8];
+			__u32 len;
+			__u8  is_write;
+		} mmio;
+
+If exit_reason is KVM_EXIT_MMIO, then the vcpu has
+executed a memory-mapped I/O instruction which could not be satisfied
+by kvm.  The 'data' member contains the written data if 'is_write' is
+true, and should be filled by application code otherwise.
+
+The 'data' member contains, in its first 'len' bytes, the value as it would
+appear if the VCPU performed a load or store of the appropriate width directly
+to the byte array.
+
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
+      KVM_EXIT_EPR the corresponding
+operations are complete (and guest state is consistent) only after userspace
+has re-entered the kernel with KVM_RUN.  The kernel side will first finish
+incomplete operations and then check for pending signals.  Userspace
+can re-enter the guest with an unmasked signal pending to complete
+pending operations.
+
+		/* KVM_EXIT_HYPERCALL */
+		struct {
+			__u64 nr;
+			__u64 args[6];
+			__u64 ret;
+			__u32 longmode;
+			__u32 pad;
+		} hypercall;
+
+Unused.  This was once used for 'hypercall to userspace'.  To implement
+such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+
+		/* KVM_EXIT_TPR_ACCESS */
+		struct {
+			__u64 rip;
+			__u32 is_write;
+			__u32 pad;
+		} tpr_access;
+
+To be documented (KVM_TPR_ACCESS_REPORTING).
+
+		/* KVM_EXIT_S390_SIEIC */
+		struct {
+			__u8 icptcode;
+			__u64 mask; /* psw upper half */
+			__u64 addr; /* psw lower half */
+			__u16 ipa;
+			__u32 ipb;
+		} s390_sieic;
+
+s390 specific.
+
+		/* KVM_EXIT_S390_RESET */
+#define KVM_S390_RESET_POR       1
+#define KVM_S390_RESET_CLEAR     2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT  8
+#define KVM_S390_RESET_IPL       16
+		__u64 s390_reset_flags;
+
+s390 specific.
+
+		/* KVM_EXIT_S390_UCONTROL */
+		struct {
+			__u64 trans_exc_code;
+			__u32 pgm_code;
+		} s390_ucontrol;
+
+s390 specific. A page fault has occurred for a user controlled virtual
+machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
+resolved by the kernel.
+The program code and the translation exception code that were placed
+in the cpu's lowcore are presented here as defined by the z Architecture
+Principles of Operation Book in the Chapter for Dynamic Address Translation
+(DAT)
+
+		/* KVM_EXIT_DCR */
+		struct {
+			__u32 dcrn;
+			__u32 data;
+			__u8  is_write;
+		} dcr;
+
+Deprecated - was used for 440 KVM.
+
+		/* KVM_EXIT_OSI */
+		struct {
+			__u64 gprs[32];
+		} osi;
+
+MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch
+hypercalls and exit with this exit struct that contains all the guest gprs.
+
+If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall.
+Userspace can now handle the hypercall and when it's done modify the gprs as
+necessary. Upon guest entry all guest GPRs will then be replaced by the values
+in this struct.
+
+		/* KVM_EXIT_PAPR_HCALL */
+		struct {
+			__u64 nr;
+			__u64 ret;
+			__u64 args[9];
+		} papr_hcall;
+
+This is used on 64-bit PowerPC when emulating a pSeries partition,
+e.g. with the 'pseries' machine type in qemu.  It occurs when the
+guest does a hypercall using the 'sc 1' instruction.  The 'nr' field
+contains the hypercall number (from the guest R3), and 'args' contains
+the arguments (from the guest R4 - R12).  Userspace should put the
+return code in 'ret' and any extra returned values in args[].
+The possible hypercalls are defined in the Power Architecture Platform
+Requirements (PAPR) document available from www.power.org (free
+developer registration required to access it).
+
+		/* KVM_EXIT_S390_TSCH */
+		struct {
+			__u16 subchannel_id;
+			__u16 subchannel_nr;
+			__u32 io_int_parm;
+			__u32 io_int_word;
+			__u32 ipb;
+			__u8 dequeued;
+		} s390_tsch;
+
+s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled
+and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O
+interrupt for the target subchannel has been dequeued and subchannel_id,
+subchannel_nr, io_int_parm and io_int_word contain the parameters for that
+interrupt. ipb is needed for instruction parameter decoding.
+
+		/* KVM_EXIT_EPR */
+		struct {
+			__u32 epr;
+		} epr;
+
+On FSL BookE PowerPC chips, the interrupt controller has a fast patch
+interrupt acknowledge path to the core. When the core successfully
+delivers an interrupt, it automatically populates the EPR register with
+the interrupt vector number and acknowledges the interrupt inside
+the interrupt controller.
+
+In case the interrupt controller lives in user space, we need to do
+the interrupt acknowledge cycle through it to fetch the next to be
+delivered interrupt vector using this exit.
+
+It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
+external interrupt has just been delivered into the guest. User space
+should put the acknowledged interrupt vector into the 'epr' field.
+
+		/* KVM_EXIT_SYSTEM_EVENT */
+		struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN       1
+#define KVM_SYSTEM_EVENT_RESET          2
+#define KVM_SYSTEM_EVENT_CRASH          3
+			__u32 type;
+			__u64 flags;
+		} system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+  KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest
+   has requested a crash condition maintenance. Userspace can choose
+   to ignore the request, or to gather VM memory core dump and/or
+   reset/shutdown of the VM.
+
+		/* KVM_EXIT_IOAPIC_EOI */
+		struct {
+			__u8 vector;
+		} eoi;
+
+Indicates that the VCPU's in-kernel local APIC received an EOI for a
+level-triggered IOAPIC interrupt.  This exit only triggers when the
+IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+the userspace IOAPIC should process the EOI and retrigger the interrupt if
+it is still asserted.  Vector is the LAPIC interrupt vector for which the
+EOI was received.
+
+		struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC          1
+#define KVM_EXIT_HYPERV_HCALL          2
+			__u32 type;
+			union {
+				struct {
+					__u32 msr;
+					__u64 control;
+					__u64 evt_page;
+					__u64 msg_page;
+				} synic;
+				struct {
+					__u64 input;
+					__u64 result;
+					__u64 params[2];
+				} hcall;
+			} u;
+		};
+		/* KVM_EXIT_HYPERV */
+                struct kvm_hyperv_exit hyperv;
+Indicates that the VCPU exits into userspace to process some tasks
+related to Hyper-V emulation.
+Valid values for 'type' are:
+	KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+Hyper-V SynIC state change. Notification is used to remap SynIC
+event/message pages and to enable/disable SynIC messages/events processing
+in userspace.
+
+		/* Fix the size of the union. */
+		char padding[256];
+	};
+
+	/*
+	 * shared registers between kvm and userspace.
+	 * kvm_valid_regs specifies the register classes set by the host
+	 * kvm_dirty_regs specified the register classes dirtied by userspace
+	 * struct kvm_sync_regs is architecture specific, as well as the
+	 * bits for kvm_valid_regs and kvm_dirty_regs
+	 */
+	__u64 kvm_valid_regs;
+	__u64 kvm_dirty_regs;
+	union {
+		struct kvm_sync_regs regs;
+		char padding[SYNC_REGS_SIZE_BYTES];
+	} s;
+
+If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
+certain guest registers without having to call SET/GET_*REGS. Thus we can
+avoid some system call overhead if userspace has to handle the exit.
+Userspace can query the validity of the structure by checking
+kvm_valid_regs for specific bits. These bits are architecture specific
+and usually define the validity of a groups of registers. (e.g. one bit
+ for general purpose registers)
+
+Please note that the kernel is allowed to use the kvm_run structure as the
+primary storage for certain register types. Therefore, the kernel may use the
+values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
+
+};
+
+
+
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual CPU or
+the virtual machine when enabled. To enable them, please see section 4.37.
+Below you can find a list of capabilities and what their effect on the vCPU or
+the virtual machine is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Target: whether this is a per-vcpu or per-vm capability.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+6.1 KVM_CAP_PPC_OSI
+
+Architectures: ppc
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables interception of OSI hypercalls that otherwise would
+be treated as normal system calls to be injected into the guest. OSI hypercalls
+were invented by Mac-on-Linux to have a standardized communication mechanism
+between the guest and the host.
+
+When this capability is enabled, KVM_EXIT_OSI can occur.
+
+
+6.2 KVM_CAP_PPC_PAPR
+
+Architectures: ppc
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables interception of PAPR hypercalls. PAPR hypercalls are
+done using the hypercall instruction "sc 1".
+
+It also sets the guest privilege level to "supervisor" mode. Usually the guest
+runs in "hypervisor" privilege mode with a few missing features.
+
+In addition to the above, it changes the semantics of SDR1. In this mode, the
+HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
+HTAB invisible to the guest.
+
+When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
+
+
+6.3 KVM_CAP_SW_TLB
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the address of a struct kvm_config_tlb
+Returns: 0 on success; -1 on error
+
+struct kvm_config_tlb {
+	__u64 params;
+	__u64 array;
+	__u32 mmu_type;
+	__u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM.  The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures.  The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array.  It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM.  Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_book3e_206_tlb_params".
+ - The "array" field points to an array of type "struct
+   kvm_book3e_206_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+   entries in the second TLB.
+ - Within a TLB, entries are ordered first by increasing set number.  Within a
+   set, entries are ordered by way (increasing ESEL).
+ - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
+   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - The tsize field of mas1 shall be set to 4K on TLB0, even though the
+   hardware ignores this value for TLB0.
+
+6.4 KVM_CAP_S390_CSS_SUPPORT
+
+Architectures: s390
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables support for handling of channel I/O instructions.
+
+TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are
+handled in-kernel, while the other I/O instructions are passed to userspace.
+
+When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
+SUBCHANNEL intercepts.
+
+Note that even though this capability is enabled per-vcpu, the complete
+virtual machine is affected.
+
+6.5 KVM_CAP_PPC_EPR
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] defines whether the proxy facility is active
+Returns: 0 on success; -1 on error
+
+This capability enables or disables the delivery of interrupts through the
+external proxy facility.
+
+When enabled (args[0] != 0), every time the guest gets an external interrupt
+delivered, it automatically exits into user space with a KVM_EXIT_EPR exit
+to receive the topmost interrupt vector.
+
+When disabled (args[0] == 0), behavior is as if this facility is unsupported.
+
+When this capability is enabled, KVM_EXIT_EPR can occur.
+
+6.6 KVM_CAP_IRQ_MPIC
+
+Architectures: ppc
+Parameters: args[0] is the MPIC device fd
+            args[1] is the MPIC CPU number for this vcpu
+
+This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XICS device fd
+            args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
+
+6.8 KVM_CAP_S390_IRQCHIP
+
+Architectures: s390
+Target: vm
+Parameters: none
+
+This capability enables the in-kernel irqchip for s390. Please refer to
+"4.24 KVM_CREATE_IRQCHIP" for details.
+
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
+6.74 KVM_CAP_SYNC_REGS
+Architectures: s390, x86
+Target: s390: always enabled, x86: vcpu
+Parameters: none
+Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
+sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
+
+As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
+KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
+without having to call SET/GET_*REGS". This reduces overhead by eliminating
+repeated ioctl calls for setting and/or getting register values. This is
+particularly important when userspace is making synchronous guest state
+modifications, e.g. when emulating and/or intercepting instructions in
+userspace.
+
+For s390 specifics, please refer to the source code.
+
+For x86:
+- the register sets to be copied out to kvm_run are selectable
+  by userspace (rather that all sets being copied out for every exit).
+- vcpu_events are available in addition to regs and sregs.
+
+For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to
+function as an input bit-array field set by userspace to indicate the
+specific register sets to be copied out on the next exit.
+
+To indicate when userspace has modified values that should be copied into
+the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set.
+This is done using the same bitflags as for the 'kvm_valid_regs' field.
+If the dirty bit is not set, then the register set values will not be copied
+into the vCPU even if they've been modified.
+
+Unused bitfields in the bitarrays must be set to zero.
+
+struct kvm_sync_regs {
+        struct kvm_regs regs;
+        struct kvm_sregs sregs;
+        struct kvm_vcpu_events events;
+};
+
+6.75 KVM_CAP_PPC_IRQ_XIVE
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XIVE device fd
+            args[1] is the XIVE CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XIVE device.
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+	    args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not.  Enabling or disabling in-kernel
+handling of an hcall is effective across the VM.  On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented.  If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it.  Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
+
+If the hcall number specified is not one that has an in-kernel
+implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
+error.
+
+7.2 KVM_CAP_S390_USER_SIGP
+
+Architectures: s390
+Parameters: none
+
+This capability controls which SIGP orders will be handled completely in user
+space. With this capability enabled, all fast orders will be handled completely
+in the kernel:
+- SENSE
+- SENSE RUNNING
+- EXTERNAL CALL
+- EMERGENCY SIGNAL
+- CONDITIONAL EMERGENCY SIGNAL
+
+All other orders will be handled completely in user space.
+
+Only privileged operation exceptions will be checked for in the kernel (or even
+in the hardware prior to interception). If this capability is not enabled, the
+old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space.  Will
+return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+	__u64 addr;
+	__u8 ar;
+	__u8 reserved;
+	__u8 fc;
+	__u8 sel1;
+	__u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc   - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar   - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
+
+7.5 KVM_CAP_SPLIT_IRQCHIP
+
+Architectures: x86
+Parameters: args[0] - number of routes reserved for userspace IOAPICs
+Returns: 0 on success, -1 on error
+
+Create a local apic for each processor in the kernel. This can be used
+instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
+IOAPIC and PIC (and also the PIT, even though this has to be enabled
+separately).
+
+This capability also enables in kernel routing of interrupt requests;
+when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
+used in the IRQ routing table.  The first args[0] MSI routes are reserved
+for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
+a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
+
+Fails if VCPU has already been created, or if the irqchip is already in the
+kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
+
+7.6 KVM_CAP_S390_RI
+
+Architectures: s390
+Parameters: none
+
+Allows use of runtime-instrumentation introduced with zEC12 processor.
+Will return -EINVAL if the machine does not support runtime-instrumentation.
+Will return -EBUSY if a VCPU has already been created.
+
+7.7 KVM_CAP_X2APIC_API
+
+Architectures: x86
+Parameters: args[0] - features that should be enabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid features
+
+Valid feature flags in args[0] are
+
+#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
+
+Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
+KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
+allowing the use of 32-bit APIC IDs.  See KVM_CAP_X2APIC_API in their
+respective sections.
+
+KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK must be enabled for x2APIC to work
+in logical mode or with more than 255 VCPUs.  Otherwise, KVM treats 0xff
+as a broadcast even in x2APIC mode in order to support physical x2APIC
+without interrupt remapping.  This is undesirable in logical mode,
+where 0xff represents CPUs 0-7 in cluster 0.
+
+7.8 KVM_CAP_S390_USER_INSTR0
+
+Architectures: s390
+Parameters: none
+
+With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
+be intercepted and forwarded to user space. User space can use this
+mechanism e.g. to realize 2-byte software breakpoints. The kernel will
+not inject an operating exception for these instructions, user space has
+to take care of that.
+
+This capability can be enabled dynamically even if VCPUs were already
+created and are running.
+
+7.9 KVM_CAP_S390_GS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success; -EINVAL if the machine does not support
+	 guarded storage; -EBUSY if a VCPU has already been created.
+
+Allows use of guarded storage for the KVM guest.
+
+7.10 KVM_CAP_S390_AIS
+
+Architectures: s390
+Parameters: none
+
+Allow use of adapter-interruption suppression.
+Returns: 0 on success; -EBUSY if a VCPU has already been created.
+
+7.11 KVM_CAP_PPC_SMT
+
+Architectures: ppc
+Parameters: vsmt_mode, flags
+
+Enabling this capability on a VM provides userspace with a way to set
+the desired virtual SMT mode (i.e. the number of virtual CPUs per
+virtual core).  The virtual SMT mode, vsmt_mode, must be a power of 2
+between 1 and 8.  On POWER8, vsmt_mode must also be no greater than
+the number of threads per subcore for the host.  Currently flags must
+be 0.  A successful call to enable this capability will result in
+vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
+subsequently queried for the VM.  This capability is only supported by
+HV KVM, and can only be set before any VCPUs have been created.
+The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT
+modes are available.
+
+7.12 KVM_CAP_PPC_FWNMI
+
+Architectures: ppc
+Parameters: none
+
+With this capability a machine check exception in the guest address
+space will cause KVM to exit the guest with NMI exit reason. This
+enables QEMU to build error log and branch to guest kernel registered
+machine check handling routine. Without this capability KVM will
+branch to guests' 0x200 interrupt vector.
+
+7.13 KVM_CAP_X86_DISABLE_EXITS
+
+Architectures: x86
+Parameters: args[0] defines which exits are disabled
+Returns: 0 on success, -EINVAL when args[0] contains invalid exits
+
+Valid bits in args[0] are
+
+#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE            (1 << 2)
+#define KVM_X86_DISABLE_EXITS_CSTATE           (1 << 3)
+
+Enabling this capability on a VM provides userspace with a way to no
+longer intercept some instructions for improved latency in some
+workloads, and is suggested when vCPUs are associated to dedicated
+physical CPUs.  More bits can be added in the future; userspace can
+just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
+all such vmexits.
+
+Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
+
+7.14 KVM_CAP_S390_HPAGE_1M
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, -EINVAL if hpage module parameter was not set
+	 or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
+	 flag set
+
+With this capability the KVM support for memory backing with 1m pages
+through hugetlbfs can be enabled for a VM. After the capability is
+enabled, cmma can't be enabled anymore and pfmfi and the storage key
+interpretation are disabled. If cmma has already been enabled or the
+hpage module parameter is not set to 1, -EINVAL is returned.
+
+While it is generally possible to create a huge page backed VM without
+this capability, the VM will not be able to run.
+
+7.15 KVM_CAP_MSR_PLATFORM_INFO
+
+Architectures: x86
+Parameters: args[0] whether feature should be enabled or not
+
+With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
+a #GP would be raised when the guest tries to access. Currently, this
+capability does not enable write permissions of this MSR for the guest.
+
+7.16 KVM_CAP_PPC_NESTED_HV
+
+Architectures: ppc
+Parameters: none
+Returns: 0 on success, -EINVAL when the implementation doesn't support
+	 nested-HV virtualization.
+
+HV-KVM on POWER9 and later systems allows for "nested-HV"
+virtualization, which provides a way for a guest VM to run guests that
+can run using the CPU's supervisor mode (privileged non-hypervisor
+state).  Enabling this capability on a VM depends on the CPU having
+the necessary functionality and on the facility being enabled with a
+kvm-hv module parameter.
+
+7.17 KVM_CAP_EXCEPTION_PAYLOAD
+
+Architectures: x86
+Parameters: args[0] whether feature should be enabled or not
+
+With this capability enabled, CR2 will not be modified prior to the
+emulated VM-exit when L1 intercepts a #PF exception that occurs in
+L2. Similarly, for kvm-intel only, DR6 will not be modified prior to
+the emulated VM-exit when L1 intercepts a #DB exception that occurs in
+L2. As a result, when KVM_GET_VCPU_EVENTS reports a pending #PF (or
+#DB) exception for L2, exception.has_payload will be set and the
+faulting address (or the new DR6 bits*) will be reported in the
+exception_payload field. Similarly, when userspace injects a #PF (or
+#DB) into L2 using KVM_SET_VCPU_EVENTS, it is expected to set
+exception.has_payload and to put the faulting address (or the new DR6
+bits*) in the exception_payload field.
+
+This capability also enables exception.pending in struct
+kvm_vcpu_events, which allows userspace to distinguish between pending
+and injected exceptions.
+
+
+* For the new DR6 bits, note that bit 16 is set iff the #DB exception
+  will clear DR6.RTM.
+
+7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
+
+Architectures: x86, arm, arm64, mips
+Parameters: args[0] whether feature should be enabled or not
+
+With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
+clear and write-protect all pages that are returned as dirty.
+Rather, userspace will have to do this operation separately using
+KVM_CLEAR_DIRTY_LOG.
+
+At the cost of a slightly more complicated operation, this provides better
+scalability and responsiveness for two reasons.  First,
+KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
+than requiring to sync a full memslot; this ensures that KVM does not
+take spinlocks for an extended period of time.  Second, in some cases a
+large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
+userspace actually using the data in the page.  Pages can be modified
+during this time, which is inefficint for both the guest and userspace:
+the guest will incur a higher penalty due to write protection faults,
+while userspace can see false reports of dirty pages.  Manual reprotection
+helps reducing this time, improving guest performance and reducing the
+number of dirty log false positives.
+
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
+it hard or impossible to use it correctly.  The availability of
+KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed.
+Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
+
+8. Other capabilities.
+----------------------
+
+This section lists capabilities that give information about other
+features of the KVM implementation.
+
+8.1 KVM_CAP_PPC_HWRNG
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+H_RANDOM hypercall backed by a hardware random-number generator.
+If present, the kernel H_RANDOM handler can be enabled for guest use
+with the KVM_CAP_PPC_ENABLE_HCALL capability.
+
+8.2 KVM_CAP_HYPERV_SYNIC
+
+Architectures: x86
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
+used to support Windows Hyper-V based guest paravirt drivers(VMBus).
+
+In order to use SynIC, it has to be activated by setting this
+capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
+will disable the use of APIC hardware virtualization even if supported
+by the CPU, as it's incompatible with SynIC auto-EOI behavior.
+
+8.3 KVM_CAP_PPC_RADIX_MMU
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel can support guests using the
+radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
+processor).
+
+8.4 KVM_CAP_PPC_HASH_MMU_V3
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel can support guests using the
+hashed page table MMU defined in Power ISA V3.00 (as implemented in
+the POWER9 processor), including in-memory segment tables.
+
+8.5 KVM_CAP_MIPS_VZ
+
+Architectures: mips
+
+This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
+it is available, means that full hardware assisted virtualization capabilities
+of the hardware are available for use through KVM. An appropriate
+KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which
+utilises it.
+
+If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
+available, it means that the VM is using full hardware assisted virtualization
+capabilities of the hardware. This is useful to check after creating a VM with
+KVM_VM_MIPS_DEFAULT.
+
+The value returned by KVM_CHECK_EXTENSION should be compared against known
+values (see below). All other values are reserved. This is to allow for the
+possibility of other hardware assisted virtualization implementations which
+may be incompatible with the MIPS VZ ASE.
+
+ 0: The trap & emulate implementation is in use to run guest code in user
+    mode. Guest virtual memory segments are rearranged to fit the guest in the
+    user mode address space.
+
+ 1: The MIPS VZ ASE is in use, providing full hardware assisted
+    virtualization, including standard guest virtual memory segments.
+
+8.6 KVM_CAP_MIPS_TE
+
+Architectures: mips
+
+This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
+it is available, means that the trap & emulate implementation is available to
+run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
+assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
+to KVM_CREATE_VM to create a VM which utilises it.
+
+If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
+available, it means that the VM is using trap & emulate.
+
+8.7 KVM_CAP_MIPS_64BIT
+
+Architectures: mips
+
+This capability indicates the supported architecture type of the guest, i.e. the
+supported register and address width.
+
+The values returned when this capability is checked by KVM_CHECK_EXTENSION on a
+kvm VM handle correspond roughly to the CP0_Config.AT register field, and should
+be checked specifically against known values (see below). All other values are
+reserved.
+
+ 0: MIPS32 or microMIPS32.
+    Both registers and addresses are 32-bits wide.
+    It will only be possible to run 32-bit guest code.
+
+ 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
+    Registers are 64-bits wide, but addresses are 32-bits wide.
+    64-bit guest code may run but cannot access MIPS64 memory segments.
+    It will also be possible to run 32-bit guest code.
+
+ 2: MIPS64 or microMIPS64 with access to all address segments.
+    Both registers and addresses are 64-bits wide.
+    It will be possible to run 64-bit or 32-bit guest code.
+
+8.9 KVM_CAP_ARM_USER_IRQ
+
+Architectures: arm, arm64
+This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
+that if userspace creates a VM without an in-kernel interrupt controller, it
+will be notified of changes to the output level of in-kernel emulated devices,
+which can generate virtual interrupts, presented to the VM.
+For such VMs, on every return to userspace, the kernel
+updates the vcpu's run->s.regs.device_irq_level field to represent the actual
+output level of the device.
+
+Whenever kvm detects a change in the device output level, kvm guarantees at
+least one return to userspace before running the VM.  This exit could either
+be a KVM_EXIT_INTR or any other exit event, like KVM_EXIT_MMIO. This way,
+userspace can always sample the device output level and re-compute the state of
+the userspace interrupt controller.  Userspace should always check the state
+of run->s.regs.device_irq_level on every kvm exit.
+The value in run->s.regs.device_irq_level can represent both level and edge
+triggered interrupt signals, depending on the device.  Edge triggered interrupt
+signals will exit to userspace with the bit in run->s.regs.device_irq_level
+set exactly once per edge signal.
+
+The field run->s.regs.device_irq_level is available independent of
+run->kvm_valid_regs or run->kvm_dirty_regs bits.
+
+If KVM_CAP_ARM_USER_IRQ is supported, the KVM_CHECK_EXTENSION ioctl returns a
+number larger than 0 indicating the version of this capability is implemented
+and thereby which bits in in run->s.regs.device_irq_level can signal values.
+
+Currently the following bits are defined for the device_irq_level bitmap:
+
+  KVM_CAP_ARM_USER_IRQ >= 1:
+
+    KVM_ARM_DEV_EL1_VTIMER -  EL1 virtual timer
+    KVM_ARM_DEV_EL1_PTIMER -  EL1 physical timer
+    KVM_ARM_DEV_PMU        -  ARM PMU overflow interrupt signal
+
+Future versions of kvm may implement additional events. These will get
+indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
+listed above.
+
+8.10 KVM_CAP_PPC_SMT_POSSIBLE
+
+Architectures: ppc
+
+Querying this capability returns a bitmap indicating the possible
+virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
+(counting from the right) is set, then a virtual SMT mode of 2^N is
+available.
+
+8.11 KVM_CAP_HYPERV_SYNIC2
+
+Architectures: x86
+
+This capability enables a newer version of Hyper-V Synthetic interrupt
+controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
+doesn't clear SynIC message and event flags pages when they are enabled by
+writing to the respective MSRs.
+
+8.12 KVM_CAP_HYPERV_VP_INDEX
+
+Architectures: x86
+
+This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
+value is used to denote the target vcpu for a SynIC interrupt.  For
+compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
+capability is absent, userspace can still query this msr's value.
+
+8.13 KVM_CAP_S390_AIS_MIGRATION
+
+Architectures: s390
+Parameters: none
+
+This capability indicates if the flic device will be able to get/set the
+AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
+to discover this without having to create a flic device.
+
+8.14 KVM_CAP_S390_PSW
+
+Architectures: s390
+
+This capability indicates that the PSW is exposed via the kvm_run structure.
+
+8.15 KVM_CAP_S390_GMAP
+
+Architectures: s390
+
+This capability indicates that the user space memory used as guest mapping can
+be anywhere in the user memory address space, as long as the memory slots are
+aligned and sized to a segment (1MB) boundary.
+
+8.16 KVM_CAP_S390_COW
+
+Architectures: s390
+
+This capability indicates that the user space memory used as guest mapping can
+use copy-on-write semantics as well as dirty pages tracking via read-only page
+tables.
+
+8.17 KVM_CAP_S390_BPB
+
+Architectures: s390
+
+This capability indicates that kvm will implement the interfaces to handle
+reset, migration and nested KVM for branch prediction blocking. The stfle
+facility 82 should not be provided to the guest without this capability.
+
+8.18 KVM_CAP_HYPERV_TLBFLUSH
+
+Architectures: x86
+
+This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
+hypercalls:
+HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
+HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
+
+8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
+
+Architectures: arm, arm64
+
+This capability indicates that userspace can specify (via the
+KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
+takes a virtual SError interrupt exception.
+If KVM advertises this capability, userspace can only specify the ISS field for
+the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
+CPU when the exception is taken. If this virtual SError is taken to EL1 using
+AArch64, this value will be reported in the ISS field of ESR_ELx.
+
+See KVM_CAP_VCPU_EVENTS for more details.
+8.20 KVM_CAP_HYPERV_SEND_IPI
+
+Architectures: x86
+
+This capability indicates that KVM supports paravirtualized Hyper-V IPI send
+hypercalls:
+HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
diff --git a/Documentation/virt/kvm/arm/hyp-abi.txt b/Documentation/virt/kvm/arm/hyp-abi.txt
new file mode 100644
index 000000000000..a20a0bee268d
--- /dev/null
+++ b/Documentation/virt/kvm/arm/hyp-abi.txt
@@ -0,0 +1,53 @@
+* Internal ABI between the kernel and HYP
+
+This file documents the interaction between the Linux kernel and the
+hypervisor layer when running Linux as a hypervisor (for example
+KVM). It doesn't cover the interaction of the kernel with the
+hypervisor when running as a guest (under Xen, KVM or any other
+hypervisor), or any hypervisor-specific interaction when the kernel is
+used as a host.
+
+On arm and arm64 (without VHE), the kernel doesn't run in hypervisor
+mode, but still needs to interact with it, allowing a built-in
+hypervisor to be either installed or torn down.
+
+In order to achieve this, the kernel must be booted at HYP (arm) or
+EL2 (arm64), allowing it to install a set of stubs before dropping to
+SVC/EL1. These stubs are accessible by using a 'hvc #0' instruction,
+and only act on individual CPUs.
+
+Unless specified otherwise, any built-in hypervisor must implement
+these functions (see arch/arm{,64}/include/asm/virt.h):
+
+* r0/x0 = HVC_SET_VECTORS
+  r1/x1 = vectors
+
+  Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
+  must be a physical address, and respect the alignment requirements
+  of the architecture. Only implemented by the initial stubs, not by
+  Linux hypervisors.
+
+* r0/x0 = HVC_RESET_VECTORS
+
+  Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
+  stubs' exception vector value. This effectively disables an existing
+  hypervisor.
+
+* r0/x0 = HVC_SOFT_RESTART
+  r1/x1 = restart address
+  x2 = x0's value when entering the next payload (arm64)
+  x3 = x1's value when entering the next payload (arm64)
+  x4 = x2's value when entering the next payload (arm64)
+
+  Mask all exceptions, disable the MMU, move the arguments into place
+  (arm64 only), and jump to the restart address while at HYP/EL2. This
+  hypercall is not expected to return to its caller.
+
+Any other value of r0/x0 triggers a hypervisor-specific handling,
+which is not documented here.
+
+The return value of a stub hypercall is held by r0/x0, and is 0 on
+success, and HVC_STUB_ERR on error. A stub hypercall is allowed to
+clobber any of the caller-saved registers (x0-x18 on arm64, r0-r3 and
+ip on arm). It is thus recommended to use a function call to perform
+the hypercall.
diff --git a/Documentation/virt/kvm/arm/psci.txt b/Documentation/virt/kvm/arm/psci.txt
new file mode 100644
index 000000000000..559586fc9d37
--- /dev/null
+++ b/Documentation/virt/kvm/arm/psci.txt
@@ -0,0 +1,61 @@
+KVM implements the PSCI (Power State Coordination Interface)
+specification in order to provide services such as CPU on/off, reset
+and power-off to the guest.
+
+The PSCI specification is regularly updated to provide new features,
+and KVM implements these updates if they make sense from a virtualization
+point of view.
+
+This means that a guest booted on two different versions of KVM can
+observe two different "firmware" revisions. This could cause issues if
+a given guest is tied to a particular PSCI revision (unlikely), or if
+a migration causes a different PSCI version to be exposed out of the
+blue to an unsuspecting guest.
+
+In order to remedy this situation, KVM exposes a set of "firmware
+pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
+interface. These registers can be saved/restored by userspace, and set
+to a convenient value if required.
+
+The following register is defined:
+
+* KVM_REG_ARM_PSCI_VERSION:
+
+  - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
+    (and thus has already been initialized)
+  - Returns the current PSCI version on GET_ONE_REG (defaulting to the
+    highest PSCI version implemented by KVM and compatible with v0.2)
+  - Allows any PSCI version implemented by KVM and compatible with
+    v0.2 to be set with SET_ONE_REG
+  - Affects the whole VM (even if the register view is per-vcpu)
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
+  Holds the state of the firmware support to mitigate CVE-2017-5715, as
+  offered by KVM to the guest via a HVC call. The workaround is described
+  under SMCCC_ARCH_WORKAROUND_1 in [1].
+  Accepted values are:
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
+      firmware support for the workaround. The mitigation status for the
+      guest is unknown.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
+      available to the guest and required for the mitigation.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
+      is available to the guest, but it is not needed on this VCPU.
+
+* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
+  Holds the state of the firmware support to mitigate CVE-2018-3639, as
+  offered by KVM to the guest via a HVC call. The workaround is described
+  under SMCCC_ARCH_WORKAROUND_2 in [1].
+  Accepted values are:
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
+      available. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
+      unknown. KVM does not offer firmware support for the workaround.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
+      and can be disabled by a vCPU. If
+      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
+      this vCPU.
+    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
+      always active on this vCPU or it is not needed.
+
+[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
diff --git a/Documentation/virt/kvm/cpuid.rst b/Documentation/virt/kvm/cpuid.rst
new file mode 100644
index 000000000000..01b081f6e7ea
--- /dev/null
+++ b/Documentation/virt/kvm/cpuid.rst
@@ -0,0 +1,107 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+KVM CPUID bits
+==============
+
+:Author: Glauber Costa <glommer@gmail.com>
+
+A guest running on a kvm host, can check some of its features using
+cpuid. This is not always guaranteed to work, since userspace can
+mask-out some, or even all KVM-related cpuid features before launching
+a guest.
+
+KVM cpuid functions are:
+
+function: KVM_CPUID_SIGNATURE (0x40000000)
+
+returns::
+
+   eax = 0x40000001
+   ebx = 0x4b4d564b
+   ecx = 0x564b4d56
+   edx = 0x4d
+
+Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
+The value in eax corresponds to the maximum cpuid function present in this leaf,
+and will be updated if more functions are added in the future.
+Note also that old hosts set eax value to 0x0. This should
+be interpreted as if the value was 0x40000001.
+This function queries the presence of KVM cpuid leafs.
+
+function: define KVM_CPUID_FEATURES (0x40000001)
+
+returns::
+
+          ebx, ecx
+          eax = an OR'ed group of (1 << flag)
+
+where ``flag`` is defined as below:
+
+================================= =========== ================================
+flag                              value       meaning
+================================= =========== ================================
+KVM_FEATURE_CLOCKSOURCE           0           kvmclock available at msrs
+                                              0x11 and 0x12
+
+KVM_FEATURE_NOP_IO_DELAY          1           not necessary to perform delays
+                                              on PIO operations
+
+KVM_FEATURE_MMU_OP                2           deprecated
+
+KVM_FEATURE_CLOCKSOURCE2          3           kvmclock available at msrs
+
+                                              0x4b564d00 and 0x4b564d01
+KVM_FEATURE_ASYNC_PF              4           async pf can be enabled by
+                                              writing to msr 0x4b564d02
+
+KVM_FEATURE_STEAL_TIME            5           steal time can be enabled by
+                                              writing to msr 0x4b564d03
+
+KVM_FEATURE_PV_EOI                6           paravirtualized end of interrupt
+                                              handler can be enabled by
+                                              writing to msr 0x4b564d04
+
+KVM_FEATURE_PV_UNHAULT            7           guest checks this feature bit
+                                              before enabling paravirtualized
+                                              spinlock support
+
+KVM_FEATURE_PV_TLB_FLUSH          9           guest checks this feature bit
+                                              before enabling paravirtualized
+                                              tlb flush
+
+KVM_FEATURE_ASYNC_PF_VMEXIT       10          paravirtualized async PF VM EXIT
+                                              can be enabled by setting bit 2
+                                              when writing to msr 0x4b564d02
+
+KVM_FEATURE_PV_SEND_IPI           11          guest checks this feature bit
+                                              before enabling paravirtualized
+                                              sebd IPIs
+
+KVM_FEATURE_PV_POLL_CONTROL       12          host-side polling on HLT can
+                                              be disabled by writing
+                                              to msr 0x4b564d05.
+
+KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
+                                              before using paravirtualized
+                                              sched yield.
+
+KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
+                                              per-cpu warps are expeced in
+                                              kvmclock
+================================= =========== ================================
+
+::
+
+      edx = an OR'ed group of (1 << flag)
+
+Where ``flag`` here is defined as below:
+
+================== ============ =================================
+flag               value        meaning
+================== ============ =================================
+KVM_HINTS_REALTIME 0            guest checks this feature bit to
+                                determine that vCPUs are never
+                                preempted for an unlimited time
+                                allowing optimizations
+================== ============ =================================
diff --git a/Documentation/virt/kvm/devices/README b/Documentation/virt/kvm/devices/README
new file mode 100644
index 000000000000..34a69834124a
--- /dev/null
+++ b/Documentation/virt/kvm/devices/README
@@ -0,0 +1 @@
+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
diff --git a/Documentation/virt/kvm/devices/arm-vgic-its.txt b/Documentation/virt/kvm/devices/arm-vgic-its.txt
new file mode 100644
index 000000000000..eeaa95b893a8
--- /dev/null
+++ b/Documentation/virt/kvm/devices/arm-vgic-its.txt
@@ -0,0 +1,181 @@
+ARM Virtual Interrupt Translation Service (ITS)
+===============================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
+
+The ITS allows MSI(-X) interrupts to be injected into guests. This extension is
+optional.  Creating a virtual ITS controller also requires a host GICv3 (see
+arm-vgic-v3.txt), but does not depend on having physical ITS controllers.
+
+There can be multiple ITS controllers per guest, each of them has to have
+a separate, non-overlapping MMIO region.
+
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
+      Base address in the guest physical address space of the GICv3 ITS
+      control register frame.
+      This address needs to be 64K aligned and the region covers 128K.
+  Errors:
+    -E2BIG:  Address outside of addressable IPA range
+    -EINVAL: Incorrectly aligned address
+    -EEXIST: Address already configured
+    -EFAULT: Invalid user pointer for attr->addr.
+    -ENODEV: Incorrect attribute or the ITS is not supported.
+
+
+  KVM_DEV_ARM_VGIC_GRP_CTRL
+  Attributes:
+    KVM_DEV_ARM_VGIC_CTRL_INIT
+      request the initialization of the ITS, no additional parameter in
+      kvm_device_attr.addr.
+
+    KVM_DEV_ARM_ITS_CTRL_RESET
+      reset the ITS, no additional parameter in kvm_device_attr.addr.
+      See "ITS Reset State" section.
+
+    KVM_DEV_ARM_ITS_SAVE_TABLES
+      save the ITS table data into guest RAM, at the location provisioned
+      by the guest in corresponding registers/table entries.
+
+      The layout of the tables in guest memory defines an ABI. The entries
+      are laid out in little endian format as described in the last paragraph.
+
+    KVM_DEV_ARM_ITS_RESTORE_TABLES
+      restore the ITS tables from guest RAM to ITS internal structures.
+
+      The GICV3 must be restored before the ITS and all ITS registers but
+      the GITS_CTLR must be restored before restoring the ITS tables.
+
+      The GITS_IIDR read-only register must also be restored before
+      calling KVM_DEV_ARM_ITS_RESTORE_TABLES as the IIDR revision field
+      encodes the ABI revision.
+
+      The expected ordering when restoring the GICv3/ITS is described in section
+      "ITS Restore Sequence".
+
+  Errors:
+    -ENXIO:  ITS not properly configured as required prior to setting
+             this attribute
+    -ENOMEM: Memory shortage when allocating ITS internal data
+    -EINVAL: Inconsistent restored data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
+    -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
+	     state is not available
+
+  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
+  Attributes:
+      The attr field of kvm_device_attr encodes the offset of the
+      ITS register, relative to the ITS control frame base address
+      (ITS_base).
+
+      kvm_device_attr.addr points to a __u64 value whatever the width
+      of the addressed register (32/64 bits). 64 bit registers can only
+      be accessed with full length.
+
+      Writes to read-only registers are ignored by the kernel except for:
+      - GITS_CREADR. It must be restored otherwise commands in the queue
+        will be re-executed after restoring CWRITER. GITS_CREADR must be
+        restored before restoring the GITS_CTLR which is likely to enable the
+        ITS. Also it must be restored after GITS_CBASER since a write to
+        GITS_CBASER resets GITS_CREADR.
+      - GITS_IIDR. The Revision field encodes the table layout ABI revision.
+        In the future we might implement direct injection of virtual LPIs.
+        This will require an upgrade of the table layout and an evolution of
+        the ABI. GITS_IIDR must be restored before calling
+        KVM_DEV_ARM_ITS_RESTORE_TABLES.
+
+      For other registers, getting or setting a register has the same
+      effect as reading/writing the register on real hardware.
+  Errors:
+    -ENXIO: Offset does not correspond to any supported register
+    -EFAULT: Invalid user pointer for attr->addr
+    -EINVAL: Offset is not 64-bit aligned
+    -EBUSY: one or more VCPUS are running
+
+ ITS Restore Sequence:
+ -------------------------
+
+The following ordering must be followed when restoring the GIC and the ITS:
+a) restore all guest memory and create vcpus
+b) restore all redistributors
+c) provide the ITS base address
+   (KVM_DEV_ARM_VGIC_GRP_ADDR)
+d) restore the ITS in the following order:
+   1. Restore GITS_CBASER
+   2. Restore all other GITS_ registers, except GITS_CTLR!
+   3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
+   4. Restore GITS_CTLR
+
+Then vcpus can be started.
+
+ ITS Table ABI REV0:
+ -------------------
+
+ Revision 0 of the ABI only supports the features of a virtual GICv3, and does
+ not support a virtual GICv4 with support for direct injection of virtual
+ interrupts for nested hypervisors.
+
+ The device table and ITT are indexed by the DeviceID and EventID,
+ respectively. The collection table is not indexed by CollectionID, and the
+ entries in the collection are listed in no particular order.
+ All entries are 8 bytes.
+
+ Device Table Entry (DTE):
+
+ bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
+ values:   | V |   next    | ITT_addr |  Size   |
+
+ where;
+ - V indicates whether the entry is valid. If not, other fields
+   are not meaningful.
+ - next: equals to 0 if this entry is the last one; otherwise it
+   corresponds to the DeviceID offset to the next DTE, capped by
+   2^14 -1.
+ - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
+ - Size specifies the supported number of bits for the EventID,
+   minus one
+
+ Collection Table Entry (CTE):
+
+ bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
+ values:   | V |    RES0    |  RDBase   |    ICID     |
+
+ where:
+ - V indicates whether the entry is valid. If not, other fields are
+   not meaningful.
+ - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
+ - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
+ - ICID is the collection ID
+
+ Interrupt Translation Entry (ITE):
+
+ bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
+ values:   |    next   |   pINTID  |  ICID    |
+
+ where:
+ - next: equals to 0 if this entry is the last one; otherwise it corresponds
+   to the EventID offset to the next ITE capped by 2^16 -1.
+ - pINTID is the physical LPI ID; if zero, it means the entry is not valid
+   and other fields are not meaningful.
+ - ICID is the collection ID
+
+ ITS Reset State:
+ ----------------
+
+RESET returns the ITS to the same state that it was when first created and
+initialized. When the RESET command returns, the following things are
+guaranteed:
+
+- The ITS is not enabled and quiescent
+  GITS_CTLR.Enabled = 0 .Quiescent=1
+- There is no internally cached state
+- No collection or device table are used
+  GITS_BASER<n>.Valid = 0
+- GITS_CBASER = 0, GITS_CREADR = 0, GITS_CWRITER = 0
+- The ABI version is unchanged and remains the one set when the ITS
+  device was first created.
diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.txt b/Documentation/virt/kvm/devices/arm-vgic-v3.txt
new file mode 100644
index 000000000000..ff290b43c8e5
--- /dev/null
+++ b/Documentation/virt/kvm/devices/arm-vgic-v3.txt
@@ -0,0 +1,251 @@
+ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
+==============================================================
+
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
+
+Only one VGIC instance may be instantiated through this API.  The created VGIC
+will act as the VM interrupt controller, requiring emulated user-space devices
+to inject interrupts to the VGIC instead of directly to CPUs.  It is not
+possible to create both a GICv3 and GICv2 on the same VM.
+
+Creating a guest GICv3 device requires a host GICv3 as well.
+
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
+      Base address in the guest physical address space of the GICv3 distributor
+      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+      This address needs to be 64K aligned and the region covers 64 KByte.
+
+    KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
+      Base address in the guest physical address space of the GICv3
+      redistributor register mappings. There are two 64K pages for each
+      VCPU and all of the redistributor pages are contiguous.
+      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+      This address needs to be 64K aligned.
+
+    KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
+      The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
+      bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0
+      values:   |     count      |       base      |  flags   | index
+      - index encodes the unique redistributor region index
+      - flags: reserved for future use, currently 0
+      - base field encodes bits [51:16] of the guest physical base address
+        of the first redistributor in the region.
+      - count encodes the number of redistributors in the region. Must be
+        greater than 0.
+      There are two 64K pages for each redistributor in the region and
+      redistributors are laid out contiguously within the region. Regions
+      are filled with redistributors in the index order. The sum of all
+      region count fields must be greater than or equal to the number of
+      VCPUs. Redistributor regions must be registered in the incremental
+      index order, starting from index 0.
+      The characteristics of a specific redistributor region can be read
+      by presetting the index field in the attr data.
+      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+
+  It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and
+  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
+
+  Errors:
+    -E2BIG:  Address outside of addressable IPA range
+    -EINVAL: Incorrectly aligned address, bad redistributor region
+             count/index, mixed redistributor region attribute usage
+    -EEXIST: Address already configured
+    -ENOENT: Attempt to read the characteristics of a non existing
+             redistributor region
+    -ENXIO:  The group or attribute is unknown/unsupported for this device
+             or hardware support is missing.
+    -EFAULT: Invalid user pointer for attr->addr.
+
+
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+  KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  32  |  31   ....    0 |
+    values:   |      mpidr     |      offset     |
+
+    All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
+    __u32 value.  64-bit registers must be accessed by separately accessing the
+    lower and higher word.
+
+    Writes to read-only registers are ignored by the kernel.
+
+    KVM_DEV_ARM_VGIC_GRP_DIST_REGS accesses the main distributor registers.
+    KVM_DEV_ARM_VGIC_GRP_REDIST_REGS accesses the redistributor of the CPU
+    specified by the mpidr.
+
+    The offset is relative to the "[Re]Distributor base address" as defined
+    in the GICv3/4 specs.  Getting or setting such a register has the same
+    effect as reading or writing the register on real hardware, except for the
+    following registers: GICD_STATUSR, GICR_STATUSR, GICD_ISPENDR,
+    GICR_ISPENDR0, GICD_ICPENDR, and GICR_ICPENDR0.  These registers behave
+    differently when accessed via this interface compared to their
+    architecturally defined behavior to allow software a full view of the
+    VGIC's internal state.
+
+    The mpidr field is used to specify which
+    redistributor is accessed.  The mpidr is ignored for the distributor.
+
+    The mpidr encoding is based on the affinity information in the
+    architecture defined MPIDR, and the field is encoded as follows:
+      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
+      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
+
+    Note that distributor fields are not banked, but return the same value
+    regardless of the mpidr used to access the register.
+
+    GICD_IIDR.Revision is updated when the KVM implementation is changed in a
+    way directly observable by the guest or userspace.  Userspace should read
+    GICD_IIDR from KVM and write back the read value to confirm its expected
+    behavior is aligned with the KVM implementation.  Userspace should set
+    GICD_IIDR before setting any other registers to ensure the expected
+    behavior.
+
+
+    The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
+    that a write of a clear bit has no effect, whereas a write with a set bit
+    clears that value.  To allow userspace to freely set the values of these two
+    registers, setting the attributes with the register offsets for these two
+    registers simply sets the non-reserved bits to the value written.
+
+
+    Accesses (reads and writes) to the GICD_ISPENDR register region and
+    GICR_ISPENDR0 registers get/set the value of the latched pending state for
+    the interrupts.
+
+    This is identical to the value returned by a guest read from ISPENDR for an
+    edge triggered interrupt, but may differ for level triggered interrupts.
+    For edge triggered interrupts, once an interrupt becomes pending (whether
+    because of an edge detected on the input line or because of a guest write
+    to ISPENDR) this state is "latched", and only cleared when either the
+    interrupt is activated or when the guest writes to ICPENDR. A level
+    triggered interrupt may be pending either because the level input is held
+    high by a device, or because of a guest write to the ISPENDR register. Only
+    ISPENDR writes are latched; if the device lowers the line level then the
+    interrupt is no longer pending unless the guest also wrote to ISPENDR, and
+    conversely writes to ICPENDR or activations of the interrupt do not clear
+    the pending status if the line level is still being held high.  (These
+    rules are documented in the GICv3 specification descriptions of the ICPENDR
+    and ISPENDR registers.) For a level triggered interrupt the value accessed
+    here is that of the latch which is set by ISPENDR and cleared by ICPENDR or
+    interrupt activation, whereas the value returned by a guest read from
+    ISPENDR is the logical OR of the latch value and the input line level.
+
+    Raw access to the latch state is provided to userspace so that it can save
+    and restore the entire GIC internal state (which is defined by the
+    combination of the current input line level and the latch state, and cannot
+    be deduced from purely the line level and the value of the ISPENDR
+    registers).
+
+    Accesses to GICD_ICPENDR register region and GICR_ICPENDR0 registers have
+    RAZ/WI semantics, meaning that reads always return 0 and writes are always
+    ignored.
+
+  Errors:
+    -ENXIO: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 |
+    values:   |         mpidr         |      RES     |    instr    |
+
+    The mpidr field encodes the CPU ID based on the affinity information in the
+    architecture defined MPIDR, and the field is encoded as follows:
+      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
+      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
+
+    The instr field encodes the system register to access based on the fields
+    defined in the A64 instruction set encoding for system register access
+    (RES means the bits are reserved for future use and should be zero):
+
+      | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
+      |   Op 0    |    Op1    |    CRn   |   CRm   |   Op2   |
+
+    All system regs accessed through this API are (rw, 64-bit) and
+    kvm_device_attr.addr points to a __u64 value.
+
+    KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS accesses the CPU interface registers for the
+    CPU specified by the mpidr field.
+
+    CPU interface registers access is not implemented for AArch32 mode.
+    Error -ENXIO is returned when accessed in AArch32 mode.
+  Errors:
+    -ENXIO: Getting or setting this register is not yet supported
+    -EBUSY: VCPU is running
+    -EINVAL: Invalid mpidr or register value supplied
+
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+    kvm_device_attr.addr points to a __u32 value.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set.
+
+
+  KVM_DEV_ARM_VGIC_GRP_CTRL
+  Attributes:
+    KVM_DEV_ARM_VGIC_CTRL_INIT
+      request the initialization of the VGIC, no additional parameter in
+      kvm_device_attr.addr.
+    KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
+      save all LPI pending bits into guest RAM pending tables.
+
+      The first kB of the pending table is not altered by this operation.
+  Errors:
+    -ENXIO: VGIC not properly configured as required prior to calling
+     this attribute
+    -ENODEV: no online VCPU
+    -ENOMEM: memory shortage when allocating vgic internal data
+    -EFAULT: Invalid guest ram access
+    -EBUSY:  One or more VCPUS are running
+
+
+  KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
+  Attributes:
+    The attr field of kvm_device_attr encodes the following values:
+    bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 |
+    values:   |         mpidr         |      info       |   vINTID   |
+
+    The vINTID specifies which set of IRQs is reported on.
+
+    The info field specifies which information userspace wants to get or set
+    using this interface.  Currently we support the following info values:
+
+      VGIC_LEVEL_INFO_LINE_LEVEL:
+	Get/Set the input level of the IRQ line for a set of 32 contiguously
+	numbered interrupts.
+	vINTID must be a multiple of 32.
+
+	kvm_device_attr.addr points to a __u32 value which will contain a
+	bitmap where a set bit means the interrupt level is asserted.
+
+	Bit[n] indicates the status for interrupt vINTID + n.
+
+    SGIs and any interrupt with a higher ID than the number of interrupts
+    supported, will be RAZ/WI.  LPIs are always edge-triggered and are
+    therefore not supported by this interface.
+
+    PPIs are reported per VCPU as specified in the mpidr field, and SPIs are
+    reported with the same value regardless of the mpidr specified.
+
+    The mpidr field encodes the CPU ID based on the affinity information in the
+    architecture defined MPIDR, and the field is encoded as follows:
+      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
+      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
+  Errors:
+    -EINVAL: vINTID is not multiple of 32 or
+     info field is not VGIC_LEVEL_INFO_LINE_LEVEL
diff --git a/Documentation/virt/kvm/devices/arm-vgic.txt b/Documentation/virt/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..97b6518148f8
--- /dev/null
+++ b/Documentation/virt/kvm/devices/arm-vgic.txt
@@ -0,0 +1,127 @@
+ARM Virtual Generic Interrupt Controller v2 (VGIC)
+==================================================
+
+Device types supported:
+  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP API.  The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+GICv3 implementations with hardware compatibility support allow creating a
+guest GICv2 through this interface.  For information on creating a guest GICv3
+device and guest ITS devices, see arm-vgic-v3.txt.  It is not possible to
+create both a GICv3 and GICv2 device on the same VM.
+
+
+Groups:
+  KVM_DEV_ARM_VGIC_GRP_ADDR
+  Attributes:
+    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+      Base address in the guest physical address space of the GIC distributor
+      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
+      This address needs to be 4K aligned and the region covers 4 KByte.
+
+    KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+      Base address in the guest physical address space of the GIC virtual cpu
+      interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
+      This address needs to be 4K aligned and the region covers 4 KByte.
+  Errors:
+    -E2BIG:  Address outside of addressable IPA range
+    -EINVAL: Incorrectly aligned address
+    -EEXIST: Address already configured
+    -ENXIO:  The group or attribute is unknown/unsupported for this device
+             or hardware support is missing.
+    -EFAULT: Invalid user pointer for attr->addr.
+
+  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   | vcpu_index |      offset     |
+
+    All distributor regs are (rw, 32-bit)
+
+    The offset is relative to the "Distributor base address" as defined in the
+    GICv2 specs.  Getting or setting such a register has the same effect as
+    reading or writing the register on the actual hardware from the cpu whose
+    index is specified with the vcpu_index field.  Note that most distributor
+    fields are not banked, but return the same value regardless of the
+    vcpu_index used to access the register.
+
+    GICD_IIDR.Revision is updated when the KVM implementation of an emulated
+    GICv2 is changed in a way directly observable by the guest or userspace.
+    Userspace should read GICD_IIDR from KVM and write back the read value to
+    confirm its expected behavior is aligned with the KVM implementation.
+    Userspace should set GICD_IIDR before setting any other registers (both
+    KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
+    the expected behavior. Unless GICD_IIDR has been set from userspace, writes
+    to the interrupt group registers (GICD_IGROUPR) are ignored.
+  Errors:
+    -ENXIO: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+    -EINVAL: Invalid vcpu_index supplied
+
+  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+  Attributes:
+    The attr field of kvm_device_attr encodes two values:
+    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
+    values:   |    reserved   | vcpu_index |      offset     |
+
+    All CPU interface regs are (rw, 32-bit)
+
+    The offset specifies the offset from the "CPU interface base address" as
+    defined in the GICv2 specs.  Getting or setting such a register has the
+    same effect as reading or writing the register on the actual hardware.
+
+    The Active Priorities Registers APRn are implementation defined, so we set a
+    fixed format for our implementation that fits with the model of a "GICv2
+    implementation without the security extensions" which we present to the
+    guest.  This interface always exposes four register APR[0-3] describing the
+    maximum possible 128 preemption levels.  The semantics of the register
+    indicate if any interrupts in a given preemption level are in the active
+    state by setting the corresponding bit.
+
+    Thus, preemption level X has one or more active interrupts if and only if:
+
+      APRn[X mod 32] == 0b1,  where n = X / 32
+
+    Bits for undefined preemption levels are RAZ/WI.
+
+    Note that this differs from a CPU's view of the APRs on hardware in which
+    a GIC without the security extensions expose group 0 and group 1 active
+    priorities in separate register groups, whereas we show a combined view
+    similar to GICv2's GICH_APR.
+
+    For historical reasons and to provide ABI compatibility with userspace we
+    export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
+    field in the lower 5 bits of a word, meaning that userspace must always
+    use the lower 5 bits to communicate with the KVM device and must shift the
+    value left by 3 places to obtain the actual priority mask level.
+
+  Errors:
+    -ENXIO: Getting or setting this register is not yet supported
+    -EBUSY: One or more VCPUs are running
+    -EINVAL: Invalid vcpu_index supplied
+
+  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+  Attributes:
+    A value describing the number of interrupts (SGI, PPI and SPI) for
+    this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+  Errors:
+    -EINVAL: Value set is out of the expected range
+    -EBUSY: Value has already be set, or GIC has already been initialized
+            with default values.
+
+  KVM_DEV_ARM_VGIC_GRP_CTRL
+  Attributes:
+    KVM_DEV_ARM_VGIC_CTRL_INIT
+      request the initialization of the VGIC or ITS, no additional parameter
+      in kvm_device_attr.addr.
+  Errors:
+    -ENXIO: VGIC not properly configured as required prior to calling
+     this attribute
+    -ENODEV: no online VCPU
+    -ENOMEM: memory shortage when allocating vgic internal data
diff --git a/Documentation/virt/kvm/devices/mpic.txt b/Documentation/virt/kvm/devices/mpic.txt
new file mode 100644
index 000000000000..8257397adc3c
--- /dev/null
+++ b/Documentation/virt/kvm/devices/mpic.txt
@@ -0,0 +1,53 @@
+MPIC interrupt controller
+=========================
+
+Device types supported:
+  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
+  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
+
+Only one MPIC instance, of any type, may be instantiated.  The created
+MPIC will act as the system interrupt controller, connecting to each
+vcpu's interrupt inputs.
+
+Groups:
+  KVM_DEV_MPIC_GRP_MISC
+  Attributes:
+    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
+      Base address of the 256 KiB MPIC register space.  Must be
+      naturally aligned.  A value of zero disables the mapping.
+      Reset value is zero.
+
+  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
+    Access an MPIC register, as if the access were made from the guest.
+    "attr" is the byte offset into the MPIC register space.  Accesses
+    must be 4-byte aligned.
+
+    MSIs may be signaled by using this attribute group to write
+    to the relevant MSIIR.
+
+  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
+    IRQ input line for each standard openpic source.  0 is inactive and 1
+    is active, regardless of interrupt sense.
+
+    For edge-triggered interrupts:  Writing 1 is considered an activating
+    edge, and writing 0 is ignored.  Reading returns 1 if a previously
+    signaled edge has not been acknowledged, and 0 otherwise.
+
+    "attr" is the IRQ number.  IRQ numbers for standard sources are the
+    byte offset of the relevant IVPR from EIVPR0, divided by 32.
+
+IRQ Routing:
+
+  The MPIC emulation supports IRQ routing. Only a single MPIC device can
+  be instantiated. Once that device has been created, it's available as
+  irqchip id 0.
+
+  This irqchip 0 has 256 interrupt pins, which expose the interrupts in
+  the main array of interrupt sources (a.k.a. "SRC" interrupts).
+
+  The numbering is the same as the MPIC device tree binding -- based on
+  the register offset from the beginning of the sources array, without
+  regard to any subdivisions in chip documentation such as "internal"
+  or "external" interrupts.
+
+  Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.
diff --git a/Documentation/virt/kvm/devices/s390_flic.txt b/Documentation/virt/kvm/devices/s390_flic.txt
new file mode 100644
index 000000000000..a4e20a090174
--- /dev/null
+++ b/Documentation/virt/kvm/devices/s390_flic.txt
@@ -0,0 +1,163 @@
+FLIC (floating interrupt controller)
+====================================
+
+FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
+machine check interruptions. All interrupts are stored in a per-vm list of
+pending interrupts. FLIC performs operations on this list.
+
+Only one FLIC instance may be instantiated.
+
+FLIC provides support to
+- add interrupts (KVM_DEV_FLIC_ENQUEUE)
+- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
+- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
+- enable/disable for the guest transparent async page faults
+- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
+- modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM)
+- inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT)
+- get/set all AIS mode states (KVM_DEV_FLIC_AISM_ALL)
+
+Groups:
+  KVM_DEV_FLIC_ENQUEUE
+    Passes a buffer and length into the kernel which are then injected into
+    the list of pending interrupts.
+    attr->addr contains the pointer to the buffer and attr->attr contains
+    the length of the buffer.
+    The format of the data structure kvm_s390_irq as it is copied from userspace
+    is defined in usr/include/linux/kvm.h.
+
+  KVM_DEV_FLIC_GET_ALL_IRQS
+    Copies all floating interrupts into a buffer provided by userspace.
+    When the buffer is too small it returns -ENOMEM, which is the indication
+    for userspace to try again with a bigger buffer.
+    -ENOBUFS is returned when the allocation of a kernelspace buffer has
+    failed.
+    -EFAULT is returned when copying data to userspace failed.
+    All interrupts remain pending, i.e. are not deleted from the list of
+    currently pending interrupts.
+    attr->addr contains the userspace address of the buffer into which all
+    interrupt data will be copied.
+    attr->attr contains the size of the buffer in bytes.
+
+  KVM_DEV_FLIC_CLEAR_IRQS
+    Simply deletes all elements from the list of currently pending floating
+    interrupts.  No interrupts are injected into the guest.
+
+  KVM_DEV_FLIC_CLEAR_IO_IRQ
+    Deletes one (if any) I/O interrupt for a subchannel identified by the
+    subsystem identification word passed via the buffer specified by
+    attr->addr (address) and attr->attr (length).
+
+  KVM_DEV_FLIC_APF_ENABLE
+    Enables async page faults for the guest. So in case of a major page fault
+    the host is allowed to handle this async and continues the guest.
+
+  KVM_DEV_FLIC_APF_DISABLE_WAIT
+    Disables async page faults for the guest and waits until already pending
+    async page faults are done. This is necessary to trigger a completion interrupt
+    for every init interrupt before migrating the interrupt list.
+
+  KVM_DEV_FLIC_ADAPTER_REGISTER
+    Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
+    describing the adapter to register:
+
+struct kvm_s390_io_adapter {
+	__u32 id;
+	__u8 isc;
+	__u8 maskable;
+	__u8 swap;
+	__u8 flags;
+};
+
+   id contains the unique id for the adapter, isc the I/O interruption subclass
+   to use, maskable whether this adapter may be masked (interrupts turned off),
+   swap whether the indicators need to be byte swapped, and flags contains
+   further characteristics of the adapter.
+   Currently defined values for 'flags' are:
+   - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
+     (adapter-interrupt-suppression) facility. This flag only has an effect if
+     the AIS capability is enabled.
+   Unknown flag values are ignored.
+
+
+  KVM_DEV_FLIC_ADAPTER_MODIFY
+    Modifies attributes of an existing I/O adapter interrupt source. Takes
+    a kvm_s390_io_adapter_req specifying the adapter and the operation:
+
+struct kvm_s390_io_adapter_req {
+	__u32 id;
+	__u8 type;
+	__u8 mask;
+	__u16 pad0;
+	__u64 addr;
+};
+
+    id specifies the adapter and type the operation. The supported operations
+    are:
+
+    KVM_S390_IO_ADAPTER_MASK
+      mask or unmask the adapter, as specified in mask
+
+    KVM_S390_IO_ADAPTER_MAP
+      perform a gmap translation for the guest address provided in addr,
+      pin a userspace page for the translated address and add it to the
+      list of mappings
+      Note: A new mapping will be created unconditionally; therefore,
+            the calling code should avoid making duplicate mappings.
+
+    KVM_S390_IO_ADAPTER_UNMAP
+      release a userspace page for the translated address specified in addr
+      from the list of mappings
+
+  KVM_DEV_FLIC_AISM
+    modify the adapter-interruption-suppression mode for a given isc if the
+    AIS capability is enabled. Takes a kvm_s390_ais_req describing:
+
+struct kvm_s390_ais_req {
+	__u8 isc;
+	__u16 mode;
+};
+
+    isc contains the target I/O interruption subclass, mode the target
+    adapter-interruption-suppression mode. The following modes are
+    currently supported:
+    - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
+      is always allowed;
+    - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
+      injection is only allowed once and the following adapter interrupts
+      will be suppressed until the mode is set again to ALL-Interruptions
+      or SINGLE-Interruption mode.
+
+  KVM_DEV_FLIC_AIRQ_INJECT
+    Inject adapter interrupts on a specified adapter.
+    attr->attr contains the unique id for the adapter, which allows for
+    adapter-specific checks and actions.
+    For adapters subject to AIS, handle the airq injection suppression for
+    an isc according to the adapter-interruption-suppression mode on condition
+    that the AIS capability is enabled.
+
+  KVM_DEV_FLIC_AISM_ALL
+    Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
+    a kvm_s390_ais_all describing:
+
+struct kvm_s390_ais_all {
+       __u8 simm; /* Single-Interruption-Mode mask */
+       __u8 nimm; /* No-Interruption-Mode mask *
+};
+
+    simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
+    No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
+    to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and
+    nimm bit presents AIS mode for a ISC.
+
+    KVM_DEV_FLIC_AISM_ALL is indicated by KVM_CAP_S390_AIS_MIGRATION.
+
+Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
+FLIC with an unknown group or attribute gives the error code EINVAL (instead of
+ENXIO, as specified in the API documentation). It is not possible to conclude
+that a FLIC operation is unavailable based on the error code resulting from a
+usage attempt.
+
+Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
+schid is specified.
diff --git a/Documentation/virt/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt
new file mode 100644
index 000000000000..2b5dab16c4f2
--- /dev/null
+++ b/Documentation/virt/kvm/devices/vcpu.txt
@@ -0,0 +1,62 @@
+Generic vcpu interface
+====================================
+
+The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
+kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
+
+The groups and attributes per virtual cpu, if any, are architecture specific.
+
+1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
+Architectures: ARM64
+
+1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
+Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
+            pointer to an int
+Returns: -EBUSY: The PMU overflow interrupt is already set
+         -ENXIO: The overflow interrupt not set when attempting to get it
+         -ENODEV: PMUv3 not supported
+         -EINVAL: Invalid PMU overflow interrupt number supplied or
+                  trying to set the IRQ number without using an in-kernel
+                  irqchip.
+
+A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
+number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
+type must be same for each vcpu. As a PPI, the interrupt number is the same for
+all vcpus, while as an SPI it must be a separate number per vcpu.
+
+1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
+Parameters: no additional parameter in kvm_device_attr.addr
+Returns: -ENODEV: PMUv3 not supported or GIC not initialized
+         -ENXIO: PMUv3 not properly configured or in-kernel irqchip not
+                 configured as required prior to calling this attribute
+         -EBUSY: PMUv3 already initialized
+
+Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel
+virtual GIC implementation, this must be done after initializing the in-kernel
+irqchip.
+
+
+2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
+Architectures: ARM,ARM64
+
+2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
+2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
+Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
+            pointer to an int
+Returns: -EINVAL: Invalid timer interrupt number
+         -EBUSY:  One or more VCPUs has already run
+
+A value describing the architected timer interrupt number when connected to an
+in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the
+attribute overrides the default values (see below).
+
+KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
+KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
+
+Setting the same PPI for different timers will prevent the VCPUs from running.
+Setting the interrupt number on a VCPU configures all VCPUs created at that
+time to use the number provided for a given timer, overwriting any previously
+configured values on other VCPUs.  Userspace should configure the interrupt
+numbers on at least one VCPU after creating all VCPUs and before running any
+VCPUs.
diff --git a/Documentation/virt/kvm/devices/vfio.txt b/Documentation/virt/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..528c77c8022c
--- /dev/null
+++ b/Documentation/virt/kvm/devices/vfio.txt
@@ -0,0 +1,36 @@
+VFIO virtual device
+===================
+
+Device types supported:
+  KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM.  The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM.  As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence.  When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+  KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+	kvm_device_attr.addr points to an int32_t file descriptor
+	for the VFIO group.
+  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+	kvm_device_attr.addr points to an int32_t file descriptor
+	for the VFIO group.
+  KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
+	allocated by sPAPR KVM.
+	kvm_device_attr.addr points to a struct:
+
+	struct kvm_vfio_spapr_tce {
+		__s32	groupfd;
+		__s32	tablefd;
+	};
+
+	where
+	@groupfd is a file descriptor for a VFIO group;
+	@tablefd is a file descriptor for a TCE table allocated via
+		KVM_CREATE_SPAPR_TCE.
diff --git a/Documentation/virt/kvm/devices/vm.txt b/Documentation/virt/kvm/devices/vm.txt
new file mode 100644
index 000000000000..4ffb82b02468
--- /dev/null
+++ b/Documentation/virt/kvm/devices/vm.txt
@@ -0,0 +1,270 @@
+Generic vm interface
+====================================
+
+The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
+struct kvm_device_attr as other devices, but targets VM-wide settings
+and controls.
+
+The groups and attributes per virtual machine, if any, are architecture
+specific.
+
+1. GROUP: KVM_S390_VM_MEM_CTRL
+Architectures: s390
+
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
+Parameters: none
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
+
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
+
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
+Returns: -EINVAL if CMMA was not enabled
+         0 otherwise
+
+Clear the CMMA status for all guest pages, so any pages the guest marked
+as unused are again used any may not be reclaimed by the host.
+
+1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
+Parameters: in attr->addr the address for the new limit of guest memory
+Returns: -EFAULT if the given address is not accessible
+         -EINVAL if the virtual machine is of type UCONTROL
+         -E2BIG if the given guest memory is to big for that machine
+         -EBUSY if a vcpu is already defined
+         -ENOMEM if not enough memory is available for a new shadow guest mapping
+          0 otherwise
+
+Allows userspace to query the actual limit and set a new limit for
+the maximum guest memory size. The limit will be rounded up to
+2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
+the number of page table levels. In the case that there is no limit we will set
+the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
+
+2. GROUP: KVM_S390_VM_CPU_MODEL
+Architectures: s390
+
+2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
+
+Allows user space to retrieve machine and kvm specific cpu related information:
+
+struct kvm_s390_vm_cpu_machine {
+       __u64 cpuid;           # CPUID of host
+       __u32 ibc;             # IBC level range offered by host
+       __u8  pad[4];
+       __u64 fac_mask[256];   # set of cpu facilities enabled by KVM
+       __u64 fac_list[256];   # set of cpu facilities offered by host
+}
+
+Parameters: address of buffer to store the machine related cpu data
+            of type struct kvm_s390_vm_cpu_machine*
+Returns:    -EFAULT if the given address is not accessible from kernel space
+	    -ENOMEM if not enough memory is available to process the ioctl
+	    0 in case of success
+
+2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
+
+Allows user space to retrieve or request to change cpu related information for a vcpu:
+
+struct kvm_s390_vm_cpu_processor {
+       __u64 cpuid;           # CPUID currently (to be) used by this vcpu
+       __u16 ibc;             # IBC level currently (to be) used by this vcpu
+       __u8  pad[6];
+       __u64 fac_list[256];   # set of cpu facilities currently (to be) used
+                              # by this vcpu
+}
+
+KVM does not enforce or limit the cpu model data in any form. Take the information
+retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
+setups. Instruction interceptions triggered by additionally set facility bits that
+are not handled by KVM need to by imlemented in the VM driver code.
+
+Parameters: address of buffer to store/set the processor related cpu
+	    data of type struct kvm_s390_vm_cpu_processor*.
+Returns:    -EBUSY in case 1 or more vcpus are already activated (only in write case)
+	    -EFAULT if the given address is not accessible from kernel space
+	    -ENOMEM if not enough memory is available to process the ioctl
+	    0 in case of success
+
+2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
+
+Allows user space to retrieve available cpu features. A feature is available if
+provided by the hardware and supported by kvm. In theory, cpu features could
+even be completely emulated by kvm.
+
+struct kvm_s390_vm_cpu_feat {
+        __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
+};
+
+Parameters: address of a buffer to load the feature list from.
+Returns:    -EFAULT if the given address is not accessible from kernel space.
+	    0 in case of success.
+
+2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
+
+Allows user space to retrieve or change enabled cpu features for all VCPUs of a
+VM. Features that are not available cannot be enabled.
+
+See 2.3. for a description of the parameter struct.
+
+Parameters: address of a buffer to store/load the feature list from.
+Returns:    -EFAULT if the given address is not accessible from kernel space.
+	    -EINVAL if a cpu feature that is not available is to be enabled.
+	    -EBUSY if at least one VCPU has already been defined.
+	    0 in case of success.
+
+2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
+
+Allows user space to retrieve available cpu subfunctions without any filtering
+done by a set IBC. These subfunctions are indicated to the guest VCPU via
+query or "test bit" subfunctions and used e.g. by cpacf functions, plo and ptff.
+
+A subfunction block is only valid if KVM_S390_VM_CPU_MACHINE contains the
+STFL(E) bit introducing the affected instruction. If the affected instruction
+indicates subfunctions via a "query subfunction", the response block is
+contained in the returned struct. If the affected instruction
+indicates subfunctions via a "test bit" mechanism, the subfunction codes are
+contained in the returned struct in MSB 0 bit numbering.
+
+struct kvm_s390_vm_cpu_subfunc {
+       u8 plo[32];           # always valid (ESA/390 feature)
+       u8 ptff[16];          # valid with TOD-clock steering
+       u8 kmac[16];          # valid with Message-Security-Assist
+       u8 kmc[16];           # valid with Message-Security-Assist
+       u8 km[16];            # valid with Message-Security-Assist
+       u8 kimd[16];          # valid with Message-Security-Assist
+       u8 klmd[16];          # valid with Message-Security-Assist
+       u8 pckmo[16];         # valid with Message-Security-Assist-Extension 3
+       u8 kmctr[16];         # valid with Message-Security-Assist-Extension 4
+       u8 kmf[16];           # valid with Message-Security-Assist-Extension 4
+       u8 kmo[16];           # valid with Message-Security-Assist-Extension 4
+       u8 pcc[16];           # valid with Message-Security-Assist-Extension 4
+       u8 ppno[16];          # valid with Message-Security-Assist-Extension 5
+       u8 kma[16];           # valid with Message-Security-Assist-Extension 8
+       u8 kdsa[16];          # valid with Message-Security-Assist-Extension 9
+       u8 reserved[1792];    # reserved for future instructions
+};
+
+Parameters: address of a buffer to load the subfunction blocks from.
+Returns:    -EFAULT if the given address is not accessible from kernel space.
+	    0 in case of success.
+
+2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
+
+Allows user space to retrieve or change cpu subfunctions to be indicated for
+all VCPUs of a VM. This attribute will only be available if kernel and
+hardware support are in place.
+
+The kernel uses the configured subfunction blocks for indication to
+the guest. A subfunction block will only be used if the associated STFL(E) bit
+has not been disabled by user space (so the instruction to be queried is
+actually available for the guest).
+
+As long as no data has been written, a read will fail. The IBC will be used
+to determine available subfunctions in this case, this will guarantee backward
+compatibility.
+
+See 2.5. for a description of the parameter struct.
+
+Parameters: address of a buffer to store/load the subfunction blocks from.
+Returns:    -EFAULT if the given address is not accessible from kernel space.
+	    -EINVAL when reading, if there was no write yet.
+	    -EBUSY if at least one VCPU has already been defined.
+	    0 in case of success.
+
+3. GROUP: KVM_S390_VM_TOD
+Architectures: s390
+
+3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
+
+Allows user space to set/get the TOD clock extension (u8) (superseded by
+KVM_S390_VM_TOD_EXT).
+
+Parameters: address of a buffer in user space to store the data (u8) to
+Returns:    -EFAULT if the given address is not accessible from kernel space
+	    -EINVAL if setting the TOD clock extension to != 0 is not supported
+
+3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
+
+Allows user space to set/get bits 0-63 of the TOD clock register as defined in
+the POP (u64).
+
+Parameters: address of a buffer in user space to store the data (u64) to
+Returns:    -EFAULT if the given address is not accessible from kernel space
+
+3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
+Allows user space to set/get bits 0-63 of the TOD clock register as defined in
+the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
+also allows user space to get/set it. If the guest CPU model does not support
+it, it is stored as 0 and not allowed to be set to a value != 0.
+
+Parameters: address of a buffer in user space to store the data
+            (kvm_s390_vm_tod_clock) to
+Returns:    -EFAULT if the given address is not accessible from kernel space
+	    -EINVAL if setting the TOD clock extension to != 0 is not supported
+
+4. GROUP: KVM_S390_VM_CRYPTO
+Architectures: s390
+
+4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
+
+Allows user space to enable aes key wrapping, including generating a new
+wrapping key.
+
+Parameters: none
+Returns:    0
+
+4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
+
+Allows user space to enable dea key wrapping, including generating a new
+wrapping key.
+
+Parameters: none
+Returns:    0
+
+4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
+
+Allows user space to disable aes key wrapping, clearing the wrapping key.
+
+Parameters: none
+Returns:    0
+
+4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
+
+Allows user space to disable dea key wrapping, clearing the wrapping key.
+
+Parameters: none
+Returns:    0
+
+5. GROUP: KVM_S390_VM_MIGRATION
+Architectures: s390
+
+5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
+
+Allows userspace to stop migration mode, needed for PGSTE migration.
+Setting this attribute when migration mode is not active will have no
+effects.
+
+Parameters: none
+Returns:    0
+
+5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
+
+Allows userspace to start migration mode, needed for PGSTE migration.
+Setting this attribute when migration mode is already active will have
+no effects.
+
+Parameters: none
+Returns:    -ENOMEM if there is not enough free memory to start migration mode
+	    -EINVAL if the state of the VM is invalid (e.g. no memory defined)
+	    0 in case of success.
+
+5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
+
+Allows userspace to query the status of migration mode.
+
+Parameters: address of a buffer in user space to store the data (u64) to;
+	    the data itself is either 0 if migration mode is disabled or 1
+	    if it is enabled
+Returns:    -EFAULT if the given address is not accessible from kernel space
+	    0 in case of success.
diff --git a/Documentation/virt/kvm/devices/xics.txt b/Documentation/virt/kvm/devices/xics.txt
new file mode 100644
index 000000000000..42864935ac5d
--- /dev/null
+++ b/Documentation/virt/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called "servers",
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/Documentation/virt/kvm/devices/xive.txt b/Documentation/virt/kvm/devices/xive.txt
new file mode 100644
index 000000000000..9a24a4525253
--- /dev/null
+++ b/Documentation/virt/kvm/devices/xive.txt
@@ -0,0 +1,197 @@
+POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
+==========================================================
+
+Device types supported:
+  KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1
+
+This device acts as a VM interrupt controller. It provides the KVM
+interface to configure the interrupt sources of a VM in the underlying
+POWER9 XIVE interrupt controller.
+
+Only one XIVE instance may be instantiated. A guest XIVE device
+requires a POWER9 host and the guest OS should have support for the
+XIVE native exploitation interrupt mode. If not, it should run using
+the legacy interrupt mode, referred as XICS (POWER7/8).
+
+* Device Mappings
+
+  The KVM device exposes different MMIO ranges of the XIVE HW which
+  are required for interrupt management. These are exposed to the
+  guest in VMAs populated with a custom VM fault handler.
+
+  1. Thread Interrupt Management Area (TIMA)
+
+  Each thread has an associated Thread Interrupt Management context
+  composed of a set of registers. These registers let the thread
+  handle priority management and interrupt acknowledgment. The most
+  important are :
+
+      - Interrupt Pending Buffer     (IPB)
+      - Current Processor Priority   (CPPR)
+      - Notification Source Register (NSR)
+
+  They are exposed to software in four different pages each proposing
+  a view with a different privilege. The first page is for the
+  physical thread context and the second for the hypervisor. Only the
+  third (operating system) and the fourth (user level) are exposed the
+  guest.
+
+  2. Event State Buffer (ESB)
+
+  Each source is associated with an Event State Buffer (ESB) with
+  either a pair of even/odd pair of pages which provides commands to
+  manage the source: to trigger, to EOI, to turn off the source for
+  instance.
+
+  3. Device pass-through
+
+  When a device is passed-through into the guest, the source
+  interrupts are from a different HW controller (PHB4) and the ESB
+  pages exposed to the guest should accommadate this change.
+
+  The passthru_irq helpers, kvmppc_xive_set_mapped() and
+  kvmppc_xive_clr_mapped() are called when the device HW irqs are
+  mapped into or unmapped from the guest IRQ number space. The KVM
+  device extends these helpers to clear the ESB pages of the guest IRQ
+  number being mapped and then lets the VM fault handler repopulate.
+  The handler will insert the ESB page corresponding to the HW
+  interrupt of the device being passed-through or the initial IPI ESB
+  page if the device has being removed.
+
+  The ESB remapping is fully transparent to the guest and the OS
+  device driver. All handling is done within VFIO and the above
+  helpers in KVM-PPC.
+
+* Groups:
+
+  1. KVM_DEV_XIVE_GRP_CTRL
+  Provides global controls on the device
+  Attributes:
+    1.1 KVM_DEV_XIVE_RESET (write only)
+    Resets the interrupt controller configuration for sources and event
+    queues. To be used by kexec and kdump.
+    Errors: none
+
+    1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
+    Sync all the sources and queues and mark the EQ pages dirty. This
+    to make sure that a consistent memory state is captured when
+    migrating the VM.
+    Errors: none
+
+  2. KVM_DEV_XIVE_GRP_SOURCE (write only)
+  Initializes a new source in the XIVE device and mask it.
+  Attributes:
+    Interrupt source number  (64-bit)
+  The kvm_device_attr.addr points to a __u64 value:
+  bits:     | 63   ....  2 |   1   |   0
+  values:   |    unused    | level | type
+  - type:  0:MSI 1:LSI
+  - level: assertion level in case of an LSI.
+  Errors:
+    -E2BIG:  Interrupt source number is out of range
+    -ENOMEM: Could not create a new source block
+    -EFAULT: Invalid user pointer for attr->addr.
+    -ENXIO:  Could not allocate underlying HW interrupt
+
+  3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
+  Configures source targeting
+  Attributes:
+    Interrupt source number  (64-bit)
+  The kvm_device_attr.addr points to a __u64 value:
+  bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0
+  values:   |    eisn       | mask |  server | priority
+  - priority: 0-7 interrupt priority level
+  - server: CPU number chosen to handle the interrupt
+  - mask: mask flag (unused)
+  - eisn: Effective Interrupt Source Number
+  Errors:
+    -ENOENT: Unknown source number
+    -EINVAL: Not initialized source number
+    -EINVAL: Invalid priority
+    -EINVAL: Invalid CPU number.
+    -EFAULT: Invalid user pointer for attr->addr.
+    -ENXIO:  CPU event queues not configured or configuration of the
+             underlying HW interrupt failed
+    -EBUSY:  No CPU available to serve interrupt
+
+  4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
+  Configures an event queue of a CPU
+  Attributes:
+    EQ descriptor identifier (64-bit)
+  The EQ descriptor identifier is a tuple (server, priority) :
+  bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0
+  values:   |    unused     |  server | priority
+  The kvm_device_attr.addr points to :
+    struct kvm_ppc_xive_eq {
+	__u32 flags;
+	__u32 qshift;
+	__u64 qaddr;
+	__u32 qtoggle;
+	__u32 qindex;
+	__u8  pad[40];
+    };
+  - flags: queue flags
+    KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
+	forces notification without using the coalescing mechanism
+	provided by the XIVE END ESBs.
+  - qshift: queue size (power of 2)
+  - qaddr: real address of queue
+  - qtoggle: current queue toggle bit
+  - qindex: current queue index
+  - pad: reserved for future use
+  Errors:
+    -ENOENT: Invalid CPU number
+    -EINVAL: Invalid priority
+    -EINVAL: Invalid flags
+    -EINVAL: Invalid queue size
+    -EINVAL: Invalid queue address
+    -EFAULT: Invalid user pointer for attr->addr.
+    -EIO:    Configuration of the underlying HW failed
+
+  5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
+  Synchronize the source to flush event notifications
+  Attributes:
+    Interrupt source number  (64-bit)
+  Errors:
+    -ENOENT: Unknown source number
+    -EINVAL: Not initialized source number
+
+* VCPU state
+
+  The XIVE IC maintains VP interrupt state in an internal structure
+  called the NVT. When a VP is not dispatched on a HW processor
+  thread, this structure can be updated by HW if the VP is the target
+  of an event notification.
+
+  It is important for migration to capture the cached IPB from the NVT
+  as it synthesizes the priorities of the pending interrupts. We
+  capture a bit more to report debug information.
+
+  KVM_REG_PPC_VP_STATE (2 * 64bits)
+  bits:     |  63  ....  32  |  31  ....  0  |
+  values:   |   TIMA word0   |   TIMA word1  |
+  bits:     | 127       ..........       64  |
+  values:   |            unused              |
+
+* Migration:
+
+  Saving the state of a VM using the XIVE native exploitation mode
+  should follow a specific sequence. When the VM is stopped :
+
+  1. Mask all sources (PQ=01) to stop the flow of events.
+
+  2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
+  flush any in-flight event notification and to stabilize the EQs. At
+  this stage, the EQ pages are marked dirty to make sure they are
+  transferred in the migration sequence.
+
+  3. Capture the state of the source targeting, the EQs configuration
+  and the state of thread interrupt context registers.
+
+  Restore is similar :
+
+  1. Restore the EQ configuration. As targeting depends on it.
+  2. Restore targeting
+  3. Restore the thread interrupt contexts
+  4. Restore the source states
+  5. Let the vCPU run
diff --git a/Documentation/virt/kvm/halt-polling.txt b/Documentation/virt/kvm/halt-polling.txt
new file mode 100644
index 000000000000..4f791b128dd2
--- /dev/null
+++ b/Documentation/virt/kvm/halt-polling.txt
@@ -0,0 +1,136 @@
+The KVM halt polling system
+===========================
+
+The KVM halt polling system provides a feature within KVM whereby the latency
+of a guest can, under some circumstances, be reduced by polling in the host
+for some time period after the guest has elected to no longer run by cedeing.
+That is, when a guest vcpu has ceded, or in the case of powerpc when all of the
+vcpus of a single vcore have ceded, the host kernel polls for wakeup conditions
+before giving up the cpu to the scheduler in order to let something else run.
+
+Polling provides a latency advantage in cases where the guest can be run again
+very quickly by at least saving us a trip through the scheduler, normally on
+the order of a few micro-seconds, although performance benefits are workload
+dependant. In the event that no wakeup source arrives during the polling
+interval or some other task on the runqueue is runnable the scheduler is
+invoked. Thus halt polling is especially useful on workloads with very short
+wakeup periods where the time spent halt polling is minimised and the time
+savings of not invoking the scheduler are distinguishable.
+
+The generic halt polling code is implemented in:
+
+	virt/kvm/kvm_main.c: kvm_vcpu_block()
+
+The powerpc kvm-hv specific case is implemented in:
+
+	arch/powerpc/kvm/book3s_hv.c: kvmppc_vcore_blocked()
+
+Halt Polling Interval
+=====================
+
+The maximum time for which to poll before invoking the scheduler, referred to
+as the halt polling interval, is increased and decreased based on the perceived
+effectiveness of the polling in an attempt to limit pointless polling.
+This value is stored in either the vcpu struct:
+
+	kvm_vcpu->halt_poll_ns
+
+or in the case of powerpc kvm-hv, in the vcore struct:
+
+	kvmppc_vcore->halt_poll_ns
+
+Thus this is a per vcpu (or vcore) value.
+
+During polling if a wakeup source is received within the halt polling interval,
+the interval is left unchanged. In the event that a wakeup source isn't
+received during the polling interval (and thus schedule is invoked) there are
+two options, either the polling interval and total block time[0] were less than
+the global max polling interval (see module params below), or the total block
+time was greater than the global max polling interval.
+
+In the event that both the polling interval and total block time were less than
+the global max polling interval then the polling interval can be increased in
+the hope that next time during the longer polling interval the wake up source
+will be received while the host is polling and the latency benefits will be
+received. The polling interval is grown in the function grow_halt_poll_ns() and
+is multiplied by the module parameters halt_poll_ns_grow and
+halt_poll_ns_grow_start.
+
+In the event that the total block time was greater than the global max polling
+interval then the host will never poll for long enough (limited by the global
+max) to wakeup during the polling interval so it may as well be shrunk in order
+to avoid pointless polling. The polling interval is shrunk in the function
+shrink_halt_poll_ns() and is divided by the module parameter
+halt_poll_ns_shrink, or set to 0 iff halt_poll_ns_shrink == 0.
+
+It is worth noting that this adjustment process attempts to hone in on some
+steady state polling interval but will only really do a good job for wakeups
+which come at an approximately constant rate, otherwise there will be constant
+adjustment of the polling interval.
+
+[0] total block time: the time between when the halt polling function is
+		      invoked and a wakeup source received (irrespective of
+		      whether the scheduler is invoked within that function).
+
+Module Parameters
+=================
+
+The kvm module has 3 tuneable module parameters to adjust the global max
+polling interval as well as the rate at which the polling interval is grown and
+shrunk. These variables are defined in include/linux/kvm_host.h and as module
+parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
+powerpc kvm-hv case.
+
+Module Parameter	|   Description		    |	     Default Value
+--------------------------------------------------------------------------------
+halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT
+			| interval which defines    |
+			| the ceiling value of the  |
+			| polling interval for      | (per arch value)
+			| each vcpu.		    |
+--------------------------------------------------------------------------------
+halt_poll_ns_grow	| The value by which the    | 2
+			| halt polling interval is  |
+			| multiplied in the	    |
+			| grow_halt_poll_ns()	    |
+			| function.		    |
+--------------------------------------------------------------------------------
+halt_poll_ns_grow_start | The initial value to grow | 10000
+			| to from zero in the	    |
+			| grow_halt_poll_ns()	    |
+			| function.		    |
+--------------------------------------------------------------------------------
+halt_poll_ns_shrink	| The value by which the    | 0
+			| halt polling interval is  |
+			| divided in the	    |
+			| shrink_halt_poll_ns()	    |
+			| function.		    |
+--------------------------------------------------------------------------------
+
+These module parameters can be set from the debugfs files in:
+
+	/sys/module/kvm/parameters/
+
+Note: that these module parameters are system wide values and are not able to
+      be tuned on a per vm basis.
+
+Further Notes
+=============
+
+- Care should be taken when setting the halt_poll_ns module parameter as a
+large value has the potential to drive the cpu usage to 100% on a machine which
+would be almost entirely idle otherwise. This is because even if a guest has
+wakeups during which very little work is done and which are quite far apart, if
+the period is shorter than the global max polling interval (halt_poll_ns) then
+the host will always poll for the entire block time and thus cpu utilisation
+will go to 100%.
+
+- Halt polling essentially presents a trade off between power usage and latency
+and the module parameters should be used to tune the affinity for this. Idle
+cpu time is essentially converted to host kernel time with the aim of decreasing
+latency when entering the guest.
+
+- Halt polling will only be conducted by the host when no other tasks are
+runnable on that cpu, otherwise the polling will cease immediately and
+schedule will be invoked to allow that other task to run. Thus this doesn't
+allow a guest to denial of service the cpu.
diff --git a/Documentation/virt/kvm/hypercalls.txt b/Documentation/virt/kvm/hypercalls.txt
new file mode 100644
index 000000000000..5f6d291bd004
--- /dev/null
+++ b/Documentation/virt/kvm/hypercalls.txt
@@ -0,0 +1,154 @@
+Linux KVM Hypercall:
+===================
+X86:
+ KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
+ instruction. The hypervisor can replace it with instructions that are
+ guaranteed to be supported.
+
+ Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
+ The hypercall number should be placed in rax and the return value will be
+ placed in rax.  No other registers will be clobbered unless explicitly stated
+ by the particular hypercall.
+
+S390:
+  R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
+  number. The return value is written to R2.
+
+  S390 uses diagnose instruction as hypercall (0x500) along with hypercall
+  number in R1.
+
+  For further information on the S390 diagnose call as supported by KVM,
+  refer to Documentation/virt/kvm/s390-diag.txt.
+
+ PowerPC:
+  It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
+  Return value is placed in R3.
+
+  KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
+  property inside the device tree's /hypervisor node.
+  For more information refer to Documentation/virt/kvm/ppc-pv.txt
+
+MIPS:
+  KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
+  number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
+  the return value is placed in $2 (v0).
+
+KVM Hypercalls Documentation
+===========================
+The template for each hypercall is:
+1. Hypercall name.
+2. Architecture(s)
+3. Status (deprecated, obsolete, active)
+4. Purpose
+
+1. KVM_HC_VAPIC_POLL_IRQ
+------------------------
+Architecture: x86
+Status: active
+Purpose: Trigger guest exit so that the host can check for pending
+interrupts on reentry.
+
+2. KVM_HC_MMU_OP
+------------------------
+Architecture: x86
+Status: deprecated.
+Purpose: Support MMU operations such as writing to PTE,
+flushing TLB, release PT.
+
+3. KVM_HC_FEATURES
+------------------------
+Architecture: PPC
+Status: active
+Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
+used to enumerate which hypercalls are available. On PPC, either device tree
+based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
+mechanism (which is this hypercall) can be used.
+
+4. KVM_HC_PPC_MAP_MAGIC_PAGE
+------------------------
+Architecture: PPC
+Status: active
+Purpose: To enable communication between the hypervisor and guest there is a
+shared page that contains parts of supervisor visible register state.
+The guest can map this shared page to access its supervisor register through
+memory using this hypercall.
+
+5. KVM_HC_KICK_CPU
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to wakeup a vcpu from HLT state
+Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
+kernel mode for an event to occur (ex: a spinlock to become available) can
+execute HLT instruction once it has busy-waited for more than a threshold
+time-interval. Execution of HLT instruction would cause the hypervisor to put
+the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
+same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
+specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
+is used in the hypercall for future use.
+
+
+6. KVM_HC_CLOCK_PAIRING
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to synchronize host and guest clocks.
+Usage:
+
+a0: guest physical address where host copies
+"struct kvm_clock_offset" structure.
+
+a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
+is supported (corresponding to the host's CLOCK_REALTIME clock).
+
+		struct kvm_clock_pairing {
+			__s64 sec;
+			__s64 nsec;
+			__u64 tsc;
+			__u32 flags;
+			__u32 pad[9];
+		};
+
+       Where:
+               * sec: seconds from clock_type clock.
+               * nsec: nanoseconds from clock_type clock.
+               * tsc: guest TSC value used to calculate sec/nsec pair
+               * flags: flags, unused (0) at the moment.
+
+The hypercall lets a guest compute a precise timestamp across
+host and guest.  The guest can use the returned TSC value to
+compute the CLOCK_REALTIME for its clock, at the same instant.
+
+Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
+or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Send IPIs to multiple vCPUs.
+
+a0: lower part of the bitmap of destination APIC IDs
+a1: higher part of the bitmap of destination APIC IDs
+a2: the lowest APIC ID in bitmap
+a3: APIC ICR
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+128 destinations per hypercall in 64-bit mode and 64 vCPUs per
+hypercall in 32-bit mode.  The destinations are represented by a
+bitmap contained in the first two arguments (a0 and a1). Bit 0 of
+a0 corresponds to the APIC ID in the third argument (a2), bit 1
+corresponds to the APIC ID a2+1, and so on.
+
+Returns the number of CPUs to which the IPIs were delivered successfully.
+
+7. KVM_HC_SCHED_YIELD
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to yield if the IPI target vCPU is preempted
+
+a0: destination APIC ID
+
+Usage example: When sending a call-function IPI-many to vCPUs, yield if
+any of the IPI target vCPUs was preempted.
diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst
new file mode 100644
index 000000000000..0b206a06f5be
--- /dev/null
+++ b/Documentation/virt/kvm/index.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+KVM
+===
+
+.. toctree::
+   :maxdepth: 2
+
+   amd-memory-encryption
+   cpuid
diff --git a/Documentation/virt/kvm/locking.txt b/Documentation/virt/kvm/locking.txt
new file mode 100644
index 000000000000..635cd6eaf714
--- /dev/null
+++ b/Documentation/virt/kvm/locking.txt
@@ -0,0 +1,215 @@
+KVM Lock Overview
+=================
+
+1. Acquisition Orders
+---------------------
+
+The acquisition orders for mutexes are as follows:
+
+- kvm->lock is taken outside vcpu->mutex
+
+- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
+
+- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
+  them together is quite rare.
+
+On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+
+Everything else is a leaf: no other lock is taken inside the critical
+sections.
+
+2: Exception
+------------
+
+Fast page fault:
+
+Fast page fault is the fast path which fixes the guest page fault out of
+the mmu-lock on x86. Currently, the page fault can be fast in one of the
+following two cases:
+
+1. Access Tracking: The SPTE is not present, but it is marked for access
+tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
+restore the saved R/X bits. This is described in more detail later below.
+
+2. Write-Protection: The SPTE is present and the fault is
+caused by write-protect. That means we just need to change the W bit of the 
+spte.
+
+What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
+SPTE_MMU_WRITEABLE bit on the spte:
+- SPTE_HOST_WRITEABLE means the gfn is writable on host.
+- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
+  the gfn is writable on guest mmu and it is not write-protected by shadow
+  page write-protection.
+
+On fast page fault path, we will use cmpxchg to atomically set the spte W
+bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or 
+restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
+is safe because whenever changing these bits can be detected by cmpxchg.
+
+But we need carefully check these cases:
+1): The mapping from gfn to pfn
+The mapping from gfn to pfn may be changed since we can only ensure the pfn
+is not changed during cmpxchg. This is a ABA problem, for example, below case
+will happen:
+
+At the beginning:
+gpte = gfn1
+gfn1 is mapped to pfn1 on host
+spte is the shadow page table entry corresponding with gpte and
+spte = pfn1
+
+   VCPU 0                           VCPU0
+on fast page fault path:
+
+   old_spte = *spte;
+                                 pfn1 is swapped out:
+                                    spte = 0;
+
+                                 pfn1 is re-alloced for gfn2.
+
+                                 gpte is changed to point to
+                                 gfn2 by the guest:
+                                    spte = pfn1;
+
+   if (cmpxchg(spte, old_spte, old_spte+W)
+	mark_page_dirty(vcpu->kvm, gfn1)
+             OOPS!!!
+
+We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
+
+For direct sp, we can easily avoid it since the spte of direct sp is fixed
+to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
+to pin gfn to pfn, because after gfn_to_pfn_atomic():
+- We have held the refcount of pfn that means the pfn can not be freed and
+  be reused for another gfn.
+- The pfn is writable that means it can not be shared between different gfns
+  by KSM.
+
+Then, we can ensure the dirty bitmaps is correctly set for a gfn.
+
+Currently, to simplify the whole things, we disable fast page fault for
+indirect shadow page.
+
+2): Dirty bit tracking
+In the origin code, the spte can be fast updated (non-atomically) if the
+spte is read-only and the Accessed bit has already been set since the
+Accessed bit and Dirty bit can not be lost.
+
+But it is not true after fast page fault since the spte can be marked
+writable between reading spte and updating spte. Like below case:
+
+At the beginning:
+spte.W = 0
+spte.Accessed = 1
+
+   VCPU 0                                       VCPU0
+In mmu_spte_clear_track_bits():
+
+   old_spte = *spte;
+
+   /* 'if' condition is satisfied. */
+   if (old_spte.Accessed == 1 &&
+        old_spte.W == 0)
+      spte = 0ull;
+                                         on fast page fault path:
+                                             spte.W = 1
+                                         memory write on the spte:
+                                             spte.Dirty = 1
+
+
+   else
+      old_spte = xchg(spte, 0ull)
+
+
+   if (old_spte.Accessed == 1)
+      kvm_set_pfn_accessed(spte.pfn);
+   if (old_spte.Dirty == 1)
+      kvm_set_pfn_dirty(spte.pfn);
+      OOPS!!!
+
+The Dirty bit is lost in this case.
+
+In order to avoid this kind of issue, we always treat the spte as "volatile"
+if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
+the spte is always atomically updated in this case.
+
+3): flush tlbs due to spte updated
+If the spte is updated from writable to readonly, we should flush all TLBs,
+otherwise rmap_write_protect will find a read-only spte, even though the
+writable spte might be cached on a CPU's TLB.
+
+As mentioned before, the spte can be updated to writable out of mmu-lock on
+fast page fault path, in order to easily audit the path, we see if TLBs need
+be flushed caused by this reason in mmu_spte_update() since this is a common
+function to update spte (present -> present).
+
+Since the spte is "volatile" if it can be updated out of mmu-lock, we always
+atomically update the spte, the race caused by fast page fault can be avoided,
+See the comments in spte_has_volatile_bits() and mmu_spte_update().
+
+Lockless Access Tracking:
+
+This is used for Intel CPUs that are using EPT but do not support the EPT A/D
+bits. In this case, when the KVM MMU notifier is called to track accesses to a
+page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
+by clearing the RWX bits in the PTE and storing the original R & X bits in
+some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
+PTE (using the ignored bit 62). When the VM tries to access the page later on,
+a fault is generated and the fast page fault mechanism described above is used
+to atomically restore the PTE to a Present state. The W bit is not saved when
+the PTE is marked for access tracking and during restoration to the Present
+state, the W bit is set depending on whether or not it was a write access. If
+it wasn't, then the W bit will remain clear until a write access happens, at 
+which time it will be set using the Dirty tracking mechanism described above.
+
+3. Reference
+------------
+
+Name:		kvm_lock
+Type:		mutex
+Arch:		any
+Protects:	- vm_list
+
+Name:		kvm_count_lock
+Type:		raw_spinlock_t
+Arch:		any
+Protects:	- hardware virtualization enable/disable
+Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
+		migration.
+
+Name:		kvm_arch::tsc_write_lock
+Type:		raw_spinlock
+Arch:		x86
+Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
+		- tsc offset in vmcb
+Comment:	'raw' because updating the tsc offsets must not be preempted.
+
+Name:		kvm->mmu_lock
+Type:		spinlock_t
+Arch:		any
+Protects:	-shadow page/shadow tlb entry
+Comment:	it is a spinlock since it is used in mmu notifier.
+
+Name:		kvm->srcu
+Type:		srcu lock
+Arch:		any
+Protects:	- kvm->memslots
+		- kvm->buses
+Comment:	The srcu read lock must be held while accessing memslots (e.g.
+		when using gfn_to_* functions) and while accessing in-kernel
+		MMIO/PIO address->device structure mapping (kvm->buses).
+		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+		if it is needed by multiple functions.
+
+Name:		blocked_vcpu_on_cpu_lock
+Type:		spinlock_t
+Arch:		x86
+Protects:	blocked_vcpu_on_cpu
+Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
+		When VT-d posted-interrupts is supported and the VM has assigned
+		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
+		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
+		wakeup notification event since external interrupts from the
+		assigned devices happens, we will find the vCPU on the list to
+		wakeup.
diff --git a/Documentation/virt/kvm/mmu.txt b/Documentation/virt/kvm/mmu.txt
new file mode 100644
index 000000000000..1b9880dfba0a
--- /dev/null
+++ b/Documentation/virt/kvm/mmu.txt
@@ -0,0 +1,449 @@
+The x86 kvm shadow mmu
+======================
+
+The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible
+for presenting a standard x86 mmu to the guest, while translating guest
+physical addresses to host physical addresses.
+
+The mmu code attempts to satisfy the following requirements:
+
+- correctness: the guest should not be able to determine that it is running
+               on an emulated mmu except for timing (we attempt to comply
+               with the specification, not emulate the characteristics of
+               a particular implementation such as tlb size)
+- security:    the guest must not be able to touch host memory not assigned
+               to it
+- performance: minimize the performance penalty imposed by the mmu
+- scaling:     need to scale to large memory and large vcpu guests
+- hardware:    support the full range of x86 virtualization hardware
+- integration: Linux memory management code must be in control of guest memory
+               so that swapping, page migration, page merging, transparent
+               hugepages, and similar features work without change
+- dirty tracking: report writes to guest memory to enable live migration
+               and framebuffer-based displays
+- footprint:   keep the amount of pinned kernel memory low (most memory
+               should be shrinkable)
+- reliability:  avoid multipage or GFP_ATOMIC allocations
+
+Acronyms
+========
+
+pfn   host page frame number
+hpa   host physical address
+hva   host virtual address
+gfn   guest frame number
+gpa   guest physical address
+gva   guest virtual address
+ngpa  nested guest physical address
+ngva  nested guest virtual address
+pte   page table entry (used also to refer generically to paging structure
+      entries)
+gpte  guest pte (referring to gfns)
+spte  shadow pte (referring to pfns)
+tdp   two dimensional paging (vendor neutral term for NPT and EPT)
+
+Virtual and real hardware supported
+===================================
+
+The mmu supports first-generation mmu hardware, which allows an atomic switch
+of the current paging mode and cr3 during guest entry, as well as
+two-dimensional paging (AMD's NPT and Intel's EPT).  The emulated hardware
+it exposes is the traditional 2/3/4 level x86 mmu, with support for global
+pages, pae, pse, pse36, cr0.wp, and 1GB pages. Emulated hardware also
+able to expose NPT capable hardware on NPT capable hosts.
+
+Translation
+===========
+
+The primary job of the mmu is to program the processor's mmu to translate
+addresses for the guest.  Different translations are required at different
+times:
+
+- when guest paging is disabled, we translate guest physical addresses to
+  host physical addresses (gpa->hpa)
+- when guest paging is enabled, we translate guest virtual addresses, to
+  guest physical addresses, to host physical addresses (gva->gpa->hpa)
+- when the guest launches a guest of its own, we translate nested guest
+  virtual addresses, to nested guest physical addresses, to guest physical
+  addresses, to host physical addresses (ngva->ngpa->gpa->hpa)
+
+The primary challenge is to encode between 1 and 3 translations into hardware
+that support only 1 (traditional) and 2 (tdp) translations.  When the
+number of required translations matches the hardware, the mmu operates in
+direct mode; otherwise it operates in shadow mode (see below).
+
+Memory
+======
+
+Guest memory (gpa) is part of the user address space of the process that is
+using kvm.  Userspace defines the translation between guest addresses and user
+addresses (gpa->hva); note that two gpas may alias to the same hva, but not
+vice versa.
+
+These hvas may be backed using any method available to the host: anonymous
+memory, file backed memory, and device memory.  Memory might be paged by the
+host at any time.
+
+Events
+======
+
+The mmu is driven by events, some from the guest, some from the host.
+
+Guest generated events:
+- writes to control registers (especially cr3)
+- invlpg/invlpga instruction execution
+- access to missing or protected translations
+
+Host generated events:
+- changes in the gpa->hpa translation (either through gpa->hva changes or
+  through hva->hpa changes)
+- memory pressure (the shrinker)
+
+Shadow pages
+============
+
+The principal data structure is the shadow page, 'struct kvm_mmu_page'.  A
+shadow page contains 512 sptes, which can be either leaf or nonleaf sptes.  A
+shadow page may contain a mix of leaf and nonleaf sptes.
+
+A nonleaf spte allows the hardware mmu to reach the leaf pages and
+is not related to a translation directly.  It points to other shadow pages.
+
+A leaf spte corresponds to either one or two translations encoded into
+one paging structure entry.  These are always the lowest level of the
+translation stack, with optional higher level translations left to NPT/EPT.
+Leaf ptes point at guest pages.
+
+The following table shows translations encoded by leaf ptes, with higher-level
+translations in parentheses:
+
+ Non-nested guests:
+  nonpaging:     gpa->hpa
+  paging:        gva->gpa->hpa
+  paging, tdp:   (gva->)gpa->hpa
+ Nested guests:
+  non-tdp:       ngva->gpa->hpa  (*)
+  tdp:           (ngva->)ngpa->gpa->hpa
+
+(*) the guest hypervisor will encode the ngva->gpa translation into its page
+    tables if npt is not present
+
+Shadow pages contain the following information:
+  role.level:
+    The level in the shadow paging hierarchy that this shadow page belongs to.
+    1=4k sptes, 2=2M sptes, 3=1G sptes, etc.
+  role.direct:
+    If set, leaf sptes reachable from this page are for a linear range.
+    Examples include real mode translation, large guest pages backed by small
+    host pages, and gpa->hpa translations when NPT or EPT is active.
+    The linear range starts at (gfn << PAGE_SHIFT) and its size is determined
+    by role.level (2MB for first level, 1GB for second level, 0.5TB for third
+    level, 256TB for fourth level)
+    If clear, this page corresponds to a guest page table denoted by the gfn
+    field.
+  role.quadrant:
+    When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
+    sptes.  That means a guest page table contains more ptes than the host,
+    so multiple shadow pages are needed to shadow one guest page.
+    For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
+    first or second 512-gpte block in the guest page table.  For second-level
+    page tables, each 32-bit gpte is converted to two 64-bit sptes
+    (since each first-level guest page is shadowed by two first-level
+    shadow pages) so role.quadrant takes values in the range 0..3.  Each
+    quadrant maps 1GB virtual address space.
+  role.access:
+    Inherited guest access permissions in the form uwx.  Note execute
+    permission is positive, not negative.
+  role.invalid:
+    The page is invalid and should not be used.  It is a root page that is
+    currently pinned (by a cpu hardware register pointing to it); once it is
+    unpinned it will be destroyed.
+  role.gpte_is_8_bytes:
+    Reflects the size of the guest PTE for which the page is valid, i.e. '1'
+    if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
+  role.nxe:
+    Contains the value of efer.nxe for which the page is valid.
+  role.cr0_wp:
+    Contains the value of cr0.wp for which the page is valid.
+  role.smep_andnot_wp:
+    Contains the value of cr4.smep && !cr0.wp for which the page is valid
+    (pages for which this is true are different from other pages; see the
+    treatment of cr0.wp=0 below).
+  role.smap_andnot_wp:
+    Contains the value of cr4.smap && !cr0.wp for which the page is valid
+    (pages for which this is true are different from other pages; see the
+    treatment of cr0.wp=0 below).
+  role.ept_sp:
+    This is a virtual flag to denote a shadowed nested EPT page.  ept_sp
+    is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination.
+  role.smm:
+    Is 1 if the page is valid in system management mode.  This field
+    determines which of the kvm_memslots array was used to build this
+    shadow page; it is also used to go back from a struct kvm_mmu_page
+    to a memslot, through the kvm_memslots_for_spte_role macro and
+    __gfn_to_memslot.
+  role.ad_disabled:
+    Is 1 if the MMU instance cannot use A/D bits.  EPT did not have A/D
+    bits before Haswell; shadow EPT page tables also cannot use A/D bits
+    if the L1 hypervisor does not enable them.
+  gfn:
+    Either the guest page table containing the translations shadowed by this
+    page, or the base page frame for linear translations.  See role.direct.
+  spt:
+    A pageful of 64-bit sptes containing the translations for this page.
+    Accessed by both kvm and hardware.
+    The page pointed to by spt will have its page->private pointing back
+    at the shadow page structure.
+    sptes in spt point either at guest pages, or at lower-level shadow pages.
+    Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point
+    at __pa(sp2->spt).  sp2 will point back at sp1 through parent_pte.
+    The spt array forms a DAG structure with the shadow page as a node, and
+    guest pages as leaves.
+  gfns:
+    An array of 512 guest frame numbers, one for each present pte.  Used to
+    perform a reverse map from a pte to a gfn. When role.direct is set, any
+    element of this array can be calculated from the gfn field when used, in
+    this case, the array of gfns is not allocated. See role.direct and gfn.
+  root_count:
+    A counter keeping track of how many hardware registers (guest cr3 or
+    pdptrs) are now pointing at the page.  While this counter is nonzero, the
+    page cannot be destroyed.  See role.invalid.
+  parent_ptes:
+    The reverse mapping for the pte/ptes pointing at this page's spt. If
+    parent_ptes bit 0 is zero, only one spte points at this page and
+    parent_ptes points at this single spte, otherwise, there exists multiple
+    sptes pointing at this page and (parent_ptes & ~0x1) points at a data
+    structure with a list of parent sptes.
+  unsync:
+    If true, then the translations in this page may not match the guest's
+    translation.  This is equivalent to the state of the tlb when a pte is
+    changed but before the tlb entry is flushed.  Accordingly, unsync ptes
+    are synchronized when the guest executes invlpg or flushes its tlb by
+    other means.  Valid for leaf pages.
+  unsync_children:
+    How many sptes in the page point at pages that are unsync (or have
+    unsynchronized children).
+  unsync_child_bitmap:
+    A bitmap indicating which sptes in spt point (directly or indirectly) at
+    pages that may be unsynchronized.  Used to quickly locate all unsychronized
+    pages reachable from a given page.
+  clear_spte_count:
+    Only present on 32-bit hosts, where a 64-bit spte cannot be written
+    atomically.  The reader uses this while running out of the MMU lock
+    to detect in-progress updates and retry them until the writer has
+    finished the write.
+  write_flooding_count:
+    A guest may write to a page table many times, causing a lot of
+    emulations if the page needs to be write-protected (see "Synchronized
+    and unsynchronized pages" below).  Leaf pages can be unsynchronized
+    so that they do not trigger frequent emulation, but this is not
+    possible for non-leafs.  This field counts the number of emulations
+    since the last time the page table was actually used; if emulation
+    is triggered too frequently on this page, KVM will unmap the page
+    to avoid emulation in the future.
+
+Reverse map
+===========
+
+The mmu maintains a reverse mapping whereby all ptes mapping a page can be
+reached given its gfn.  This is used, for example, when swapping out a page.
+
+Synchronized and unsynchronized pages
+=====================================
+
+The guest uses two events to synchronize its tlb and page tables: tlb flushes
+and page invalidations (invlpg).
+
+A tlb flush means that we need to synchronize all sptes reachable from the
+guest's cr3.  This is expensive, so we keep all guest page tables write
+protected, and synchronize sptes to gptes when a gpte is written.
+
+A special case is when a guest page table is reachable from the current
+guest cr3.  In this case, the guest is obliged to issue an invlpg instruction
+before using the translation.  We take advantage of that by removing write
+protection from the guest page, and allowing the guest to modify it freely.
+We synchronize modified gptes when the guest invokes invlpg.  This reduces
+the amount of emulation we have to do when the guest modifies multiple gptes,
+or when the a guest page is no longer used as a page table and is used for
+random guest data.
+
+As a side effect we have to resynchronize all reachable unsynchronized shadow
+pages on a tlb flush.
+
+
+Reaction to events
+==================
+
+- guest page fault (or npt page fault, or ept violation)
+
+This is the most complicated event.  The cause of a page fault can be:
+
+  - a true guest fault (the guest translation won't allow the access) (*)
+  - access to a missing translation
+  - access to a protected translation
+    - when logging dirty pages, memory is write protected
+    - synchronized shadow pages are write protected (*)
+  - access to untranslatable memory (mmio)
+
+  (*) not applicable in direct mode
+
+Handling a page fault is performed as follows:
+
+ - if the RSV bit of the error code is set, the page fault is caused by guest
+   accessing MMIO and cached MMIO information is available.
+   - walk shadow page table
+   - check for valid generation number in the spte (see "Fast invalidation of
+     MMIO sptes" below)
+   - cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and
+     vcpu->arch.mmio_gfn, and call the emulator
+ - If both P bit and R/W bit of error code are set, this could possibly
+   be handled as a "fast page fault" (fixed without taking the MMU lock).  See
+   the description in Documentation/virt/kvm/locking.txt.
+ - if needed, walk the guest page tables to determine the guest translation
+   (gva->gpa or ngpa->gpa)
+   - if permissions are insufficient, reflect the fault back to the guest
+ - determine the host page
+   - if this is an mmio request, there is no host page; cache the info to
+     vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn
+ - walk the shadow page table to find the spte for the translation,
+   instantiating missing intermediate page tables as necessary
+   - If this is an mmio request, cache the mmio info to the spte and set some
+     reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
+ - try to unsynchronize the page
+   - if successful, we can let the guest continue and modify the gpte
+ - emulate the instruction
+   - if failed, unshadow the page and let the guest continue
+ - update any translations that were modified by the instruction
+
+invlpg handling:
+
+  - walk the shadow page hierarchy and drop affected translations
+  - try to reinstantiate the indicated translation in the hope that the
+    guest will use it in the near future
+
+Guest control register updates:
+
+- mov to cr3
+  - look up new shadow roots
+  - synchronize newly reachable shadow pages
+
+- mov to cr0/cr4/efer
+  - set up mmu context for new paging mode
+  - look up new shadow roots
+  - synchronize newly reachable shadow pages
+
+Host translation updates:
+
+  - mmu notifier called with updated hva
+  - look up affected sptes through reverse map
+  - drop (or update) translations
+
+Emulating cr0.wp
+================
+
+If tdp is not enabled, the host must keep cr0.wp=1 so page write protection
+works for the guest kernel, not guest guest userspace.  When the guest
+cr0.wp=1, this does not present a problem.  However when the guest cr0.wp=0,
+we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the
+semantics require allowing any guest kernel access plus user read access).
+
+We handle this by mapping the permissions to two possible sptes, depending
+on fault type:
+
+- kernel write fault: spte.u=0, spte.w=1 (allows full kernel access,
+  disallows user access)
+- read fault: spte.u=1, spte.w=0 (allows full read access, disallows kernel
+  write access)
+
+(user write faults generate a #PF)
+
+In the first case there are two additional complications:
+- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
+  the kernel may now execute it.  We handle this by also setting spte.nx.
+  If we get a user fetch or read fault, we'll change spte.u=1 and
+  spte.nx=gpte.nx back.  For this to work, KVM forces EFER.NX to 1 when
+  shadow paging is in use.
+- if CR4.SMAP is disabled: since the page has been changed to a kernel
+  page, it can not be reused when CR4.SMAP is enabled. We set
+  CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
+  here we do not care the case that CR4.SMAP is enabled since KVM will
+  directly inject #PF to guest due to failed permission check.
+
+To prevent an spte that was converted into a kernel page with cr0.wp=0
+from being written by the kernel after cr0.wp has changed to 1, we make
+the value of cr0.wp part of the page role.  This means that an spte created
+with one value of cr0.wp cannot be used when cr0.wp has a different value -
+it will simply be missed by the shadow page lookup code.  A similar issue
+exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
+changing cr4.smep to 1.  To avoid this, the value of !cr0.wp && cr4.smep
+is also made a part of the page role.
+
+Large pages
+===========
+
+The mmu supports all combinations of large and small guest and host pages.
+Supported page sizes include 4k, 2M, 4M, and 1G.  4M pages are treated as
+two separate 2M pages, on both guest and host, since the mmu always uses PAE
+paging.
+
+To instantiate a large spte, four constraints must be satisfied:
+
+- the spte must point to a large host page
+- the guest pte must be a large pte of at least equivalent size (if tdp is
+  enabled, there is no guest pte and this condition is satisfied)
+- if the spte will be writeable, the large page frame may not overlap any
+  write-protected pages
+- the guest page must be wholly contained by a single memory slot
+
+To check the last two conditions, the mmu maintains a ->disallow_lpage set of
+arrays for each memory slot and large page size.  Every write protected page
+causes its disallow_lpage to be incremented, thus preventing instantiation of
+a large spte.  The frames at the end of an unaligned memory slot have
+artificially inflated ->disallow_lpages so they can never be instantiated.
+
+Fast invalidation of MMIO sptes
+===============================
+
+As mentioned in "Reaction to events" above, kvm will cache MMIO
+information in leaf sptes.  When a new memslot is added or an existing
+memslot is changed, this information may become stale and needs to be
+invalidated.  This also needs to hold the MMU lock while walking all
+shadow pages, and is made more scalable with a similar technique.
+
+MMIO sptes have a few spare bits, which are used to store a
+generation number.  The global generation number is stored in
+kvm_memslots(kvm)->generation, and increased whenever guest memory info
+changes.
+
+When KVM finds an MMIO spte, it checks the generation number of the spte.
+If the generation number of the spte does not equal the global generation
+number, it will ignore the cached MMIO information and handle the page
+fault through the slow path.
+
+Since only 19 bits are used to store generation-number on mmio spte, all
+pages are zapped when there is an overflow.
+
+Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
+times, the last one happening when the generation number is retrieved and
+stored into the MMIO spte.  Thus, the MMIO spte might be created based on
+out-of-date information, but with an up-to-date generation number.
+
+To avoid this, the generation number is incremented again after synchronize_srcu
+returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a
+memslot update, while some SRCU readers might be using the old copy.  We do not
+want to use an MMIO sptes created with an odd generation number, and we can do
+this without losing a bit in the MMIO spte.  The "update in-progress" bit of the
+generation is not stored in MMIO spte, and is so is implicitly zero when the
+generation is extracted out of the spte.  If KVM is unlucky and creates an MMIO
+spte while an update is in-progress, the next access to the spte will always be
+a cache miss.  For example, a subsequent access during the update window will
+miss due to the in-progress flag diverging, while an access after the update
+window closes will have a higher generation number (as compared to the spte).
+
+
+Further reading
+===============
+
+- NPT presentation from KVM Forum 2008
+  http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
+
diff --git a/Documentation/virt/kvm/msr.txt b/Documentation/virt/kvm/msr.txt
new file mode 100644
index 000000000000..df1f4338b3ca
--- /dev/null
+++ b/Documentation/virt/kvm/msr.txt
@@ -0,0 +1,284 @@
+KVM-specific MSRs.
+Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
+=====================================================
+
+KVM makes use of some custom MSRs to service some requests.
+
+Custom MSRs have a range reserved for them, that goes from
+0x4b564d00 to 0x4b564dff. There are MSRs outside this area,
+but they are deprecated and their use is discouraged.
+
+Custom MSR list
+--------
+
+The current supported Custom MSR list is:
+
+MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00
+
+	data: 4-byte alignment physical address of a memory area which must be
+	in guest RAM. This memory is expected to hold a copy of the following
+	structure:
+
+	struct pvclock_wall_clock {
+		u32   version;
+		u32   sec;
+		u32   nsec;
+	} __attribute__((__packed__));
+
+	whose data will be filled in by the hypervisor. The hypervisor is only
+	guaranteed to update this data at the moment of MSR write.
+	Users that want to reliably query this information more than once have
+	to write more than once to this MSR. Fields have the following meanings:
+
+		version: guest has to check version before and after grabbing
+		time information and check that they are both equal and even.
+		An odd version indicates an in-progress update.
+
+		sec: number of seconds for wallclock at time of boot.
+
+		nsec: number of nanoseconds for wallclock at time of boot.
+
+	In order to get the current wallclock time, the system_time from
+	MSR_KVM_SYSTEM_TIME_NEW needs to be added.
+
+	Note that although MSRs are per-CPU entities, the effect of this
+	particular MSR is global.
+
+	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
+	leaf prior to usage.
+
+MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
+
+	data: 4-byte aligned physical address of a memory area which must be in
+	guest RAM, plus an enable bit in bit 0. This memory is expected to hold
+	a copy of the following structure:
+
+	struct pvclock_vcpu_time_info {
+		u32   version;
+		u32   pad0;
+		u64   tsc_timestamp;
+		u64   system_time;
+		u32   tsc_to_system_mul;
+		s8    tsc_shift;
+		u8    flags;
+		u8    pad[2];
+	} __attribute__((__packed__)); /* 32 bytes */
+
+	whose data will be filled in by the hypervisor periodically. Only one
+	write, or registration, is needed for each VCPU. The interval between
+	updates of this structure is arbitrary and implementation-dependent.
+	The hypervisor may update this structure at any time it sees fit until
+	anything with bit0 == 0 is written to it.
+
+	Fields have the following meanings:
+
+		version: guest has to check version before and after grabbing
+		time information and check that they are both equal and even.
+		An odd version indicates an in-progress update.
+
+		tsc_timestamp: the tsc value at the current VCPU at the time
+		of the update of this structure. Guests can subtract this value
+		from current tsc to derive a notion of elapsed time since the
+		structure update.
+
+		system_time: a host notion of monotonic time, including sleep
+		time at the time this structure was last updated. Unit is
+		nanoseconds.
+
+		tsc_to_system_mul: multiplier to be used when converting
+		tsc-related quantity to nanoseconds
+
+		tsc_shift: shift to be used when converting tsc-related
+		quantity to nanoseconds. This shift will ensure that
+		multiplication with tsc_to_system_mul does not overflow.
+		A positive value denotes a left shift, a negative value
+		a right shift.
+
+		The conversion from tsc to nanoseconds involves an additional
+		right shift by 32 bits. With this information, guests can
+		derive per-CPU time by doing:
+
+			time = (current_tsc - tsc_timestamp)
+			if (tsc_shift >= 0)
+				time <<= tsc_shift;
+			else
+				time >>= -tsc_shift;
+			time = (time * tsc_to_system_mul) >> 32
+			time = time + system_time
+
+		flags: bits in this field indicate extended capabilities
+		coordinated between the guest and the hypervisor. Availability
+		of specific flags has to be checked in 0x40000001 cpuid leaf.
+		Current flags are:
+
+		 flag bit   | cpuid bit    | meaning
+		-------------------------------------------------------------
+			    |	           | time measures taken across
+		     0      |	   24      | multiple cpus are guaranteed to
+			    |		   | be monotonic
+		-------------------------------------------------------------
+			    |		   | guest vcpu has been paused by
+		     1	    |	  N/A	   | the host
+			    |		   | See 4.70 in api.txt
+		-------------------------------------------------------------
+
+	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
+	leaf prior to usage.
+
+
+MSR_KVM_WALL_CLOCK:  0x11
+
+	data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
+
+	This MSR falls outside the reserved KVM range and may be removed in the
+	future. Its usage is deprecated.
+
+	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
+	leaf prior to usage.
+
+MSR_KVM_SYSTEM_TIME: 0x12
+
+	data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
+
+	This MSR falls outside the reserved KVM range and may be removed in the
+	future. Its usage is deprecated.
+
+	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
+	leaf prior to usage.
+
+	The suggested algorithm for detecting kvmclock presence is then:
+
+		if (!kvm_para_available())    /* refer to cpuid.txt */
+			return NON_PRESENT;
+
+		flags = cpuid_eax(0x40000001);
+		if (flags & 3) {
+			msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
+			msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
+			return PRESENT;
+		} else if (flags & 0) {
+			msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
+			msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
+			return PRESENT;
+		} else
+			return NON_PRESENT;
+
+MSR_KVM_ASYNC_PF_EN: 0x4b564d02
+	data: Bits 63-6 hold 64-byte aligned physical address of a
+	64 byte memory area which must be in guest RAM and must be
+	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
+	when asynchronous page faults are enabled on the vcpu 0 when
+	disabled. Bit 1 is 1 if asynchronous page faults can be injected
+	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+	are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
+	KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
+
+	First 4 byte of 64 byte memory location will be written to by
+	the hypervisor at the time of asynchronous page fault (APF)
+	injection to indicate type of asynchronous page fault. Value
+	of 1 means that the page referred to by the page fault is not
+	present. Value 2 means that the page is now available. Disabling
+	interrupt inhibits APFs. Guest must not enable interrupt
+	before the reason is read, or it may be overwritten by another
+	APF. Since APF uses the same exception vector as regular page
+	fault guest must reset the reason to 0 before it does
+	something that can generate normal page fault.  If during page
+	fault APF reason is 0 it means that this is regular page
+	fault.
+
+	During delivery of type 1 APF cr2 contains a token that will
+	be used to notify a guest when missing page becomes
+	available. When page becomes available type 2 APF is sent with
+	cr2 set to the token associated with the page. There is special
+	kind of token 0xffffffff which tells vcpu that it should wake
+	up all processes waiting for APFs and no individual type 2 APFs
+	will be sent.
+
+	If APF is disabled while there are outstanding APFs, they will
+	not be delivered.
+
+	Currently type 2 APF will be always delivered on the same vcpu as
+	type 1 was, but guest should not rely on that.
+
+MSR_KVM_STEAL_TIME: 0x4b564d03
+
+	data: 64-byte alignment physical address of a memory area which must be
+	in guest RAM, plus an enable bit in bit 0. This memory is expected to
+	hold a copy of the following structure:
+
+	struct kvm_steal_time {
+		__u64 steal;
+		__u32 version;
+		__u32 flags;
+		__u8  preempted;
+		__u8  u8_pad[3];
+		__u32 pad[11];
+	}
+
+	whose data will be filled in by the hypervisor periodically. Only one
+	write, or registration, is needed for each VCPU. The interval between
+	updates of this structure is arbitrary and implementation-dependent.
+	The hypervisor may update this structure at any time it sees fit until
+	anything with bit0 == 0 is written to it. Guest is required to make sure
+	this structure is initialized to zero.
+
+	Fields have the following meanings:
+
+		version: a sequence counter. In other words, guest has to check
+		this field before and after grabbing time information and make
+		sure they are both equal and even. An odd version indicates an
+		in-progress update.
+
+		flags: At this point, always zero. May be used to indicate
+		changes in this structure in the future.
+
+		steal: the amount of time in which this vCPU did not run, in
+		nanoseconds. Time during which the vcpu is idle, will not be
+		reported as steal time.
+
+		preempted: indicate the vCPU who owns this struct is running or
+		not. Non-zero values mean the vCPU has been preempted. Zero
+		means the vCPU is not preempted. NOTE, it is always zero if the
+		the hypervisor doesn't support this field.
+
+MSR_KVM_EOI_EN: 0x4b564d04
+	data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
+	when disabled.  Bit 1 is reserved and must be zero.  When PV end of
+	interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
+	physical address of a 4 byte memory area which must be in guest RAM and
+	must be zeroed.
+
+	The first, least significant bit of 4 byte memory location will be
+	written to by the hypervisor, typically at the time of interrupt
+	injection.  Value of 1 means that guest can skip writing EOI to the apic
+	(using MSR or MMIO write); instead, it is sufficient to signal
+	EOI by clearing the bit in guest memory - this location will
+	later be polled by the hypervisor.
+	Value of 0 means that the EOI write is required.
+
+	It is always safe for the guest to ignore the optimization and perform
+	the APIC EOI write anyway.
+
+	Hypervisor is guaranteed to only modify this least
+	significant bit while in the current VCPU context, this means that
+	guest does not need to use either lock prefix or memory ordering
+	primitives to synchronise with the hypervisor.
+
+	However, hypervisor can set and clear this memory bit at any time:
+	therefore to make sure hypervisor does not interrupt the
+	guest and clear the least significant bit in the memory area
+	in the window between guest testing it to detect
+	whether it can skip EOI apic write and between guest
+	clearing it to signal EOI to the hypervisor,
+	guest must both read the least significant bit in the memory area and
+	clear it using a single CPU instruction, such as test and clear, or
+	compare and exchange.
+
+MSR_KVM_POLL_CONTROL: 0x4b564d05
+	Control host-side polling.
+
+	data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
+
+	KVM guests can request the host not to poll on HLT, for example if
+	they are performing polling themselves.
+
diff --git a/Documentation/virt/kvm/nested-vmx.txt b/Documentation/virt/kvm/nested-vmx.txt
new file mode 100644
index 000000000000..97eb1353e962
--- /dev/null
+++ b/Documentation/virt/kvm/nested-vmx.txt
@@ -0,0 +1,240 @@
+Nested VMX
+==========
+
+Overview
+---------
+
+On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
+to easily and efficiently run guest operating systems. Normally, these guests
+*cannot* themselves be hypervisors running their own guests, because in VMX,
+guests cannot use VMX instructions.
+
+The "Nested VMX" feature adds this missing capability - of running guest
+hypervisors (which use VMX) with their own nested guests. It does so by
+allowing a guest to use VMX instructions, and correctly and efficiently
+emulating them using the single level of VMX available in the hardware.
+
+We describe in much greater detail the theory behind the nested VMX feature,
+its implementation and its performance characteristics, in the OSDI 2010 paper
+"The Turtles Project: Design and Implementation of Nested Virtualization",
+available at:
+
+	http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
+
+
+Terminology
+-----------
+
+Single-level virtualization has two levels - the host (KVM) and the guests.
+In nested virtualization, we have three levels: The host (KVM), which we call
+L0, the guest hypervisor, which we call L1, and its nested guest, which we
+call L2.
+
+
+Running nested VMX
+------------------
+
+The nested VMX feature is disabled by default. It can be enabled by giving
+the "nested=1" option to the kvm-intel module.
+
+No modifications are required to user space (qemu). However, qemu's default
+emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
+explicitly enabled, by giving qemu one of the following options:
+
+     -cpu host              (emulated CPU has all features of the real CPU)
+
+     -cpu qemu64,+vmx       (add just the vmx feature to a named CPU type)
+
+
+ABIs
+----
+
+Nested VMX aims to present a standard and (eventually) fully-functional VMX
+implementation for the a guest hypervisor to use. As such, the official
+specification of the ABI that it provides is Intel's VMX specification,
+namely volume 3B of their "Intel 64 and IA-32 Architectures Software
+Developer's Manual". Not all of VMX's features are currently fully supported,
+but the goal is to eventually support them all, starting with the VMX features
+which are used in practice by popular hypervisors (KVM and others).
+
+As a VMX implementation, nested VMX presents a VMCS structure to L1.
+As mandated by the spec, other than the two fields revision_id and abort,
+this structure is *opaque* to its user, who is not supposed to know or care
+about its internal structure. Rather, the structure is accessed through the
+VMREAD and VMWRITE instructions.
+Still, for debugging purposes, KVM developers might be interested to know the
+internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
+
+The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
+also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
+which L0 builds to actually run L2 - how this is done is explained in the
+aforementioned paper.
+
+For convenience, we repeat the content of struct vmcs12 here. If the internals
+of this structure changes, this can break live migration across KVM versions.
+VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
+struct shadow_vmcs is ever changed.
+
+	typedef u64 natural_width;
+	struct __packed vmcs12 {
+		/* According to the Intel spec, a VMCS region must start with
+		 * these two user-visible fields */
+		u32 revision_id;
+		u32 abort;
+
+		u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
+		u32 padding[7]; /* room for future expansion */
+
+		u64 io_bitmap_a;
+		u64 io_bitmap_b;
+		u64 msr_bitmap;
+		u64 vm_exit_msr_store_addr;
+		u64 vm_exit_msr_load_addr;
+		u64 vm_entry_msr_load_addr;
+		u64 tsc_offset;
+		u64 virtual_apic_page_addr;
+		u64 apic_access_addr;
+		u64 ept_pointer;
+		u64 guest_physical_address;
+		u64 vmcs_link_pointer;
+		u64 guest_ia32_debugctl;
+		u64 guest_ia32_pat;
+		u64 guest_ia32_efer;
+		u64 guest_pdptr0;
+		u64 guest_pdptr1;
+		u64 guest_pdptr2;
+		u64 guest_pdptr3;
+		u64 host_ia32_pat;
+		u64 host_ia32_efer;
+		u64 padding64[8]; /* room for future expansion */
+		natural_width cr0_guest_host_mask;
+		natural_width cr4_guest_host_mask;
+		natural_width cr0_read_shadow;
+		natural_width cr4_read_shadow;
+		natural_width cr3_target_value0;
+		natural_width cr3_target_value1;
+		natural_width cr3_target_value2;
+		natural_width cr3_target_value3;
+		natural_width exit_qualification;
+		natural_width guest_linear_address;
+		natural_width guest_cr0;
+		natural_width guest_cr3;
+		natural_width guest_cr4;
+		natural_width guest_es_base;
+		natural_width guest_cs_base;
+		natural_width guest_ss_base;
+		natural_width guest_ds_base;
+		natural_width guest_fs_base;
+		natural_width guest_gs_base;
+		natural_width guest_ldtr_base;
+		natural_width guest_tr_base;
+		natural_width guest_gdtr_base;
+		natural_width guest_idtr_base;
+		natural_width guest_dr7;
+		natural_width guest_rsp;
+		natural_width guest_rip;
+		natural_width guest_rflags;
+		natural_width guest_pending_dbg_exceptions;
+		natural_width guest_sysenter_esp;
+		natural_width guest_sysenter_eip;
+		natural_width host_cr0;
+		natural_width host_cr3;
+		natural_width host_cr4;
+		natural_width host_fs_base;
+		natural_width host_gs_base;
+		natural_width host_tr_base;
+		natural_width host_gdtr_base;
+		natural_width host_idtr_base;
+		natural_width host_ia32_sysenter_esp;
+		natural_width host_ia32_sysenter_eip;
+		natural_width host_rsp;
+		natural_width host_rip;
+		natural_width paddingl[8]; /* room for future expansion */
+		u32 pin_based_vm_exec_control;
+		u32 cpu_based_vm_exec_control;
+		u32 exception_bitmap;
+		u32 page_fault_error_code_mask;
+		u32 page_fault_error_code_match;
+		u32 cr3_target_count;
+		u32 vm_exit_controls;
+		u32 vm_exit_msr_store_count;
+		u32 vm_exit_msr_load_count;
+		u32 vm_entry_controls;
+		u32 vm_entry_msr_load_count;
+		u32 vm_entry_intr_info_field;
+		u32 vm_entry_exception_error_code;
+		u32 vm_entry_instruction_len;
+		u32 tpr_threshold;
+		u32 secondary_vm_exec_control;
+		u32 vm_instruction_error;
+		u32 vm_exit_reason;
+		u32 vm_exit_intr_info;
+		u32 vm_exit_intr_error_code;
+		u32 idt_vectoring_info_field;
+		u32 idt_vectoring_error_code;
+		u32 vm_exit_instruction_len;
+		u32 vmx_instruction_info;
+		u32 guest_es_limit;
+		u32 guest_cs_limit;
+		u32 guest_ss_limit;
+		u32 guest_ds_limit;
+		u32 guest_fs_limit;
+		u32 guest_gs_limit;
+		u32 guest_ldtr_limit;
+		u32 guest_tr_limit;
+		u32 guest_gdtr_limit;
+		u32 guest_idtr_limit;
+		u32 guest_es_ar_bytes;
+		u32 guest_cs_ar_bytes;
+		u32 guest_ss_ar_bytes;
+		u32 guest_ds_ar_bytes;
+		u32 guest_fs_ar_bytes;
+		u32 guest_gs_ar_bytes;
+		u32 guest_ldtr_ar_bytes;
+		u32 guest_tr_ar_bytes;
+		u32 guest_interruptibility_info;
+		u32 guest_activity_state;
+		u32 guest_sysenter_cs;
+		u32 host_ia32_sysenter_cs;
+		u32 padding32[8]; /* room for future expansion */
+		u16 virtual_processor_id;
+		u16 guest_es_selector;
+		u16 guest_cs_selector;
+		u16 guest_ss_selector;
+		u16 guest_ds_selector;
+		u16 guest_fs_selector;
+		u16 guest_gs_selector;
+		u16 guest_ldtr_selector;
+		u16 guest_tr_selector;
+		u16 host_es_selector;
+		u16 host_cs_selector;
+		u16 host_ss_selector;
+		u16 host_ds_selector;
+		u16 host_fs_selector;
+		u16 host_gs_selector;
+		u16 host_tr_selector;
+	};
+
+
+Authors
+-------
+
+These patches were written by:
+     Abel Gordon, abelg <at> il.ibm.com
+     Nadav Har'El, nyh <at> il.ibm.com
+     Orit Wasserman, oritw <at> il.ibm.com
+     Ben-Ami Yassor, benami <at> il.ibm.com
+     Muli Ben-Yehuda, muli <at> il.ibm.com
+
+With contributions by:
+     Anthony Liguori, aliguori <at> us.ibm.com
+     Mike Day, mdday <at> us.ibm.com
+     Michael Factor, factor <at> il.ibm.com
+     Zvi Dubitzky, dubi <at> il.ibm.com
+
+And valuable reviews by:
+     Avi Kivity, avi <at> redhat.com
+     Gleb Natapov, gleb <at> redhat.com
+     Marcelo Tosatti, mtosatti <at> redhat.com
+     Kevin Tian, kevin.tian <at> intel.com
+     and others.
diff --git a/Documentation/virt/kvm/ppc-pv.txt b/Documentation/virt/kvm/ppc-pv.txt
new file mode 100644
index 000000000000..e26115ce4258
--- /dev/null
+++ b/Documentation/virt/kvm/ppc-pv.txt
@@ -0,0 +1,212 @@
+The PPC KVM paravirtual interface
+=================================
+
+The basic execution principle by which KVM on PowerPC works is to run all kernel
+space code in PR=1 which is user space. This way we trap all privileged
+instructions and can emulate them accordingly.
+
+Unfortunately that is also the downfall. There are quite some privileged
+instructions that needlessly return us to the hypervisor even though they
+could be handled differently.
+
+This is what the PPC PV interface helps with. It takes privileged instructions
+and transforms them into unprivileged ones with some help from the hypervisor.
+This cuts down virtualization costs by about 50% on some of my benchmarks.
+
+The code for that interface can be found in arch/powerpc/kernel/kvm*
+
+Querying for existence
+======================
+
+To find out if we're running on KVM or not, we leverage the device tree. When
+Linux is running on KVM, a node /hypervisor exists. That node contains a
+compatible property with the value "linux,kvm".
+
+Once you determined you're running under a PV capable KVM, you can now use
+hypercalls as described below.
+
+KVM hypercalls
+==============
+
+Inside the device tree's /hypervisor node there's a property called
+'hypercall-instructions'. This property contains at most 4 opcodes that make
+up the hypercall. To call a hypercall, just call these instructions.
+
+The parameters are as follows:
+
+	Register	IN			OUT
+
+	r0		-			volatile
+	r3		1st parameter		Return code
+	r4		2nd parameter		1st output value
+	r5		3rd parameter		2nd output value
+	r6		4th parameter		3rd output value
+	r7		5th parameter		4th output value
+	r8		6th parameter		5th output value
+	r9		7th parameter		6th output value
+	r10		8th parameter		7th output value
+	r11		hypercall number	8th output value
+	r12		-			volatile
+
+Hypercall definitions are shared in generic code, so the same hypercall numbers
+apply for x86 and powerpc alike with the exception that each KVM hypercall
+also needs to be ORed with the KVM vendor code which is (42 << 16).
+
+Return codes can be as follows:
+
+	Code		Meaning
+
+	0		Success
+	12		Hypercall not implemented
+	<0		Error
+
+The magic page
+==============
+
+To enable communication between the hypervisor and guest there is a new shared
+page that contains parts of supervisor visible register state. The guest can
+map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
+
+With this hypercall issued the guest always gets the magic page mapped at the
+desired location. The first parameter indicates the effective address when the
+MMU is enabled. The second parameter indicates the address in real mode, if
+applicable to the target. For now, we always map the page to -4096. This way we
+can access it using absolute load and store functions. The following
+instruction reads the first field of the magic page:
+
+	ld	rX, -4096(0)
+
+The interface is designed to be extensible should there be need later to add
+additional registers to the magic page. If you add fields to the magic page,
+also define a new hypercall feature to indicate that the host can give you more
+registers. Only if the host supports the additional features, make use of them.
+
+The magic page layout is described by struct kvm_vcpu_arch_shared
+in arch/powerpc/include/asm/kvm_para.h.
+
+Magic page features
+===================
+
+When mapping the magic page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE,
+a second return value is passed to the guest. This second return value contains
+a bitmap of available features inside the magic page.
+
+The following enhancements to the magic page are currently available:
+
+  KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page
+  KVM_MAGIC_FEAT_MAS0_TO_SPRG7	Maps MASn, ESR, PIR and high SPRGs
+
+For enhanced features in the magic page, please check for the existence of the
+feature before using them!
+
+Magic page flags
+================
+
+In addition to features that indicate whether a host is capable of a particular
+feature we also have a channel for a guest to tell the guest whether it's capable
+of something. This is what we call "flags".
+
+Flags are passed to the host in the low 12 bits of the Effective Address.
+
+The following flags are currently available for a guest to expose:
+
+  MAGIC_PAGE_FLAG_NOT_MAPPED_NX Guest handles NX bits correctly wrt magic page
+
+MSR bits
+========
+
+The MSR contains bits that require hypervisor intervention and bits that do
+not require direct hypervisor intervention because they only get interpreted
+when entering the guest or don't have any impact on the hypervisor's behavior.
+
+The following bits are safe to be set inside the guest:
+
+  MSR_EE
+  MSR_RI
+
+If any other bit changes in the MSR, please still use mtmsr(d).
+
+Patched instructions
+====================
+
+The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions
+respectively on 32 bit systems with an added offset of 4 to accommodate for big
+endianness.
+
+The following is a list of mapping the Linux kernel performs when running as
+guest. Implementing any of those mappings is optional, as the instruction traps
+also act on the shared page. So calling privileged instructions still works as
+before.
+
+From			To
+====			==
+
+mfmsr	rX		ld	rX, magic_page->msr
+mfsprg	rX, 0		ld	rX, magic_page->sprg0
+mfsprg	rX, 1		ld	rX, magic_page->sprg1
+mfsprg	rX, 2		ld	rX, magic_page->sprg2
+mfsprg	rX, 3		ld	rX, magic_page->sprg3
+mfsrr0	rX		ld	rX, magic_page->srr0
+mfsrr1	rX		ld	rX, magic_page->srr1
+mfdar	rX		ld	rX, magic_page->dar
+mfdsisr	rX		lwz	rX, magic_page->dsisr
+
+mtmsr	rX		std	rX, magic_page->msr
+mtsprg	0, rX		std	rX, magic_page->sprg0
+mtsprg	1, rX		std	rX, magic_page->sprg1
+mtsprg	2, rX		std	rX, magic_page->sprg2
+mtsprg	3, rX		std	rX, magic_page->sprg3
+mtsrr0	rX		std	rX, magic_page->srr0
+mtsrr1	rX		std	rX, magic_page->srr1
+mtdar	rX		std	rX, magic_page->dar
+mtdsisr	rX		stw	rX, magic_page->dsisr
+
+tlbsync			nop
+
+mtmsrd	rX, 0		b	<special mtmsr section>
+mtmsr	rX		b	<special mtmsr section>
+
+mtmsrd	rX, 1		b	<special mtmsrd section>
+
+[Book3S only]
+mtsrin	rX, rY		b	<special mtsrin section>
+
+[BookE only]
+wrteei	[0|1]		b	<special wrteei section>
+
+
+Some instructions require more logic to determine what's going on than a load
+or store instruction can deliver. To enable patching of those, we keep some
+RAM around where we can live translate instructions to. What happens is the
+following:
+
+	1) copy emulation code to memory
+	2) patch that code to fit the emulated instruction
+	3) patch that code to return to the original pc + 4
+	4) patch the original instruction to branch to the new code
+
+That way we can inject an arbitrary amount of code as replacement for a single
+instruction. This allows us to check for pending interrupts when setting EE=1
+for example.
+
+Hypercall ABIs in KVM on PowerPC
+=================================
+1) KVM hypercalls (ePAPR)
+
+These are ePAPR compliant hypercall implementation (mentioned above). Even
+generic hypercalls are implemented here, like the ePAPR idle hcall. These are
+available on all targets.
+
+2) PAPR hypercalls
+
+PAPR hypercalls are needed to run server PowerPC PAPR guests (-M pseries in QEMU).
+These are the same hypercalls that pHyp, the POWER hypervisor implements. Some of
+them are handled in the kernel, some are handled in user space. This is only
+available on book3s_64.
+
+3) OSI hypercalls
+
+Mac-on-Linux is another user of KVM on PowerPC, which has its own hypercall (long
+before KVM). This is supported to maintain compatibility. All these hypercalls get
+forwarded to user space. This is only useful on book3s_32, but can be used with
+book3s_64 as well.
diff --git a/Documentation/virt/kvm/review-checklist.txt b/Documentation/virt/kvm/review-checklist.txt
new file mode 100644
index 000000000000..499af499e296
--- /dev/null
+++ b/Documentation/virt/kvm/review-checklist.txt
@@ -0,0 +1,38 @@
+Review checklist for kvm patches
+================================
+
+1.  The patch must follow Documentation/process/coding-style.rst and
+    Documentation/process/submitting-patches.rst.
+
+2.  Patches should be against kvm.git master branch.
+
+3.  If the patch introduces or modifies a new userspace API:
+    - the API must be documented in Documentation/virt/kvm/api.txt
+    - the API must be discoverable using KVM_CHECK_EXTENSION
+
+4.  New state must include support for save/restore.
+
+5.  New features must default to off (userspace should explicitly request them).
+    Performance improvements can and should default to on.
+
+6.  New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2
+
+7.  Emulator changes should be accompanied by unit tests for qemu-kvm.git
+    kvm/test directory.
+
+8.  Changes should be vendor neutral when possible.  Changes to common code
+    are better than duplicating changes to vendor code.
+
+9.  Similarly, prefer changes to arch independent code than to arch dependent
+    code.
+
+10. User/kernel interfaces and guest/host interfaces must be 64-bit clean
+    (all variables and sizes naturally aligned on 64-bit; use specific types
+    only - u64 rather than ulong).
+
+11. New guest visible features must either be documented in a hardware manual
+    or be accompanied by documentation.
+
+12. Features must be robust against reset and kexec - for example, shared
+    host/guest memory must be unshared to prevent the host from writing to
+    guest memory that the guest has not reserved for this purpose.
diff --git a/Documentation/virt/kvm/s390-diag.txt b/Documentation/virt/kvm/s390-diag.txt
new file mode 100644
index 000000000000..7c52e5f8b210
--- /dev/null
+++ b/Documentation/virt/kvm/s390-diag.txt
@@ -0,0 +1,83 @@
+The s390 DIAGNOSE call on KVM
+=============================
+
+KVM on s390 supports the DIAGNOSE call for making hypercalls, both for
+native hypercalls and for selected hypercalls found on other s390
+hypervisors.
+
+Note that bits are numbered as by the usual s390 convention (most significant
+bit on the left).
+
+
+General remarks
+---------------
+
+DIAGNOSE calls by the guest cause a mandatory intercept. This implies
+all supported DIAGNOSE calls need to be handled by either KVM or its
+userspace.
+
+All DIAGNOSE calls supported by KVM use the RS-a format:
+
+--------------------------------------
+|  '83'  | R1 | R3 | B2 |     D2     |
+--------------------------------------
+0        8    12   16   20           31
+
+The second-operand address (obtained by the base/displacement calculation)
+is not used to address data. Instead, bits 48-63 of this address specify
+the function code, and bits 0-47 are ignored.
+
+The supported DIAGNOSE function codes vary by the userspace used. For
+DIAGNOSE function codes not specific to KVM, please refer to the
+documentation for the s390 hypervisors defining them.
+
+
+DIAGNOSE function code 'X'500' - KVM virtio functions
+-----------------------------------------------------
+
+If the function code specifies 0x500, various virtio-related functions
+are performed.
+
+General register 1 contains the virtio subfunction code. Supported
+virtio subfunctions depend on KVM's userspace. Generally, userspace
+provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3).
+
+Upon completion of the DIAGNOSE instruction, general register 2 contains
+the function's return code, which is either a return code or a subcode
+specific value.
+
+Subcode 0 - s390-virtio notification and early console printk
+    Handled by userspace.
+
+Subcode 1 - s390-virtio reset
+    Handled by userspace.
+
+Subcode 2 - s390-virtio set status
+    Handled by userspace.
+
+Subcode 3 - virtio-ccw notification
+    Handled by either userspace or KVM (ioeventfd case).
+
+    General register 2 contains a subchannel-identification word denoting
+    the subchannel of the virtio-ccw proxy device to be notified.
+
+    General register 3 contains the number of the virtqueue to be notified.
+
+    General register 4 contains a 64bit identifier for KVM usage (the
+    kvm_io_bus cookie). If general register 4 does not contain a valid
+    identifier, it is ignored.
+
+    After completion of the DIAGNOSE call, general register 2 may contain
+    a 64bit identifier (in the kvm_io_bus cookie case), or a negative
+    error value, if an internal error occurred.
+
+    See also the virtio standard for a discussion of this hypercall.
+
+
+DIAGNOSE function code 'X'501 - KVM breakpoint
+----------------------------------------------
+
+If the function code specifies 0x501, breakpoint functions may be performed.
+This function code is handled by userspace.
+
+This diagnose function code has no subfunctions and uses no parameters.
diff --git a/Documentation/virt/kvm/timekeeping.txt b/Documentation/virt/kvm/timekeeping.txt
new file mode 100644
index 000000000000..76808a17ad84
--- /dev/null
+++ b/Documentation/virt/kvm/timekeeping.txt
@@ -0,0 +1,612 @@
+
+	Timekeeping Virtualization for X86-Based Architectures
+
+	Zachary Amsden <zamsden@redhat.com>
+	Copyright (c) 2010, Red Hat.  All rights reserved.
+
+1) Overview
+2) Timing Devices
+3) TSC Hardware
+4) Virtualization Problems
+
+=========================================================================
+
+1) Overview
+
+One of the most complicated parts of the X86 platform, and specifically,
+the virtualization of this platform is the plethora of timing devices available
+and the complexity of emulating those devices.  In addition, virtualization of
+time introduces a new set of challenges because it introduces a multiplexed
+division of time beyond the control of the guest CPU.
+
+First, we will describe the various timekeeping hardware available, then
+present some of the problems which arise and solutions available, giving
+specific recommendations for certain classes of KVM guests.
+
+The purpose of this document is to collect data and information relevant to
+timekeeping which may be difficult to find elsewhere, specifically,
+information relevant to KVM and hardware-based virtualization.
+
+=========================================================================
+
+2) Timing Devices
+
+First we discuss the basic hardware devices available.  TSC and the related
+KVM clock are special enough to warrant a full exposition and are described in
+the following section.
+
+2.1) i8254 - PIT
+
+One of the first timer devices available is the programmable interrupt timer,
+or PIT.  The PIT has a fixed frequency 1.193182 MHz base clock and three
+channels which can be programmed to deliver periodic or one-shot interrupts.
+These three channels can be configured in different modes and have individual
+counters.  Channel 1 and 2 were not available for general use in the original
+IBM PC, and historically were connected to control RAM refresh and the PC
+speaker.  Now the PIT is typically integrated as part of an emulated chipset
+and a separate physical PIT is not used.
+
+The PIT uses I/O ports 0x40 - 0x43.  Access to the 16-bit counters is done
+using single or multiple byte access to the I/O ports.  There are 6 modes
+available, but not all modes are available to all timers, as only timer 2
+has a connected gate input, required for modes 1 and 5.  The gate line is
+controlled by port 61h, bit 0, as illustrated in the following diagram.
+
+ --------------             ----------------
+|              |           |                |
+|  1.1932 MHz  |---------->| CLOCK      OUT | ---------> IRQ 0
+|    Clock     |   |       |                |
+ --------------    |    +->| GATE  TIMER 0  |
+                   |        ----------------
+                   |
+                   |        ----------------
+                   |       |                |
+                   |------>| CLOCK      OUT | ---------> 66.3 KHZ DRAM
+                   |       |                |            (aka /dev/null)
+                   |    +->| GATE  TIMER 1  |
+                   |        ----------------
+                   |
+                   |        ----------------
+                   |       |                |
+                   |------>| CLOCK      OUT | ---------> Port 61h, bit 5
+                           |                |      |
+Port 61h, bit 0 ---------->| GATE  TIMER 2  |       \_.----   ____
+                            ----------------         _|    )--|LPF|---Speaker
+                                                    / *----   \___/
+Port 61h, bit 1 -----------------------------------/
+
+The timer modes are now described.
+
+Mode 0: Single Timeout.   This is a one-shot software timeout that counts down
+ when the gate is high (always true for timers 0 and 1).  When the count
+ reaches zero, the output goes high.
+
+Mode 1: Triggered One-shot.  The output is initially set high.  When the gate
+ line is set high, a countdown is initiated (which does not stop if the gate is
+ lowered), during which the output is set low.  When the count reaches zero,
+ the output goes high.
+
+Mode 2: Rate Generator.  The output is initially set high.  When the countdown
+ reaches 1, the output goes low for one count and then returns high.  The value
+ is reloaded and the countdown automatically resumes.  If the gate line goes
+ low, the count is halted.  If the output is low when the gate is lowered, the
+ output automatically goes high (this only affects timer 2).
+
+Mode 3: Square Wave.   This generates a high / low square wave.  The count
+ determines the length of the pulse, which alternates between high and low
+ when zero is reached.  The count only proceeds when gate is high and is
+ automatically reloaded on reaching zero.  The count is decremented twice at
+ each clock to generate a full high / low cycle at the full periodic rate.
+ If the count is even, the clock remains high for N/2 counts and low for N/2
+ counts; if the clock is odd, the clock is high for (N+1)/2 counts and low
+ for (N-1)/2 counts.  Only even values are latched by the counter, so odd
+ values are not observed when reading.  This is the intended mode for timer 2,
+ which generates sine-like tones by low-pass filtering the square wave output.
+
+Mode 4: Software Strobe.  After programming this mode and loading the counter,
+ the output remains high until the counter reaches zero.  Then the output
+ goes low for 1 clock cycle and returns high.  The counter is not reloaded.
+ Counting only occurs when gate is high.
+
+Mode 5: Hardware Strobe.  After programming and loading the counter, the
+ output remains high.  When the gate is raised, a countdown is initiated
+ (which does not stop if the gate is lowered).  When the counter reaches zero,
+ the output goes low for 1 clock cycle and then returns high.  The counter is
+ not reloaded.
+
+In addition to normal binary counting, the PIT supports BCD counting.  The
+command port, 0x43 is used to set the counter and mode for each of the three
+timers.
+
+PIT commands, issued to port 0x43, using the following bit encoding:
+
+Bit 7-4: Command (See table below)
+Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
+Bit 0  : Binary (0) / BCD (1)
+
+Command table:
+
+0000 - Latch Timer 0 count for port 0x40
+	sample and hold the count to be read in port 0x40;
+	additional commands ignored until counter is read;
+	mode bits ignored.
+
+0001 - Set Timer 0 LSB mode for port 0x40
+	set timer to read LSB only and force MSB to zero;
+	mode bits set timer mode
+
+0010 - Set Timer 0 MSB mode for port 0x40
+	set timer to read MSB only and force LSB to zero;
+	mode bits set timer mode
+
+0011 - Set Timer 0 16-bit mode for port 0x40
+	set timer to read / write LSB first, then MSB;
+	mode bits set timer mode
+
+0100 - Latch Timer 1 count for port 0x41 - as described above
+0101 - Set Timer 1 LSB mode for port 0x41 - as described above
+0110 - Set Timer 1 MSB mode for port 0x41 - as described above
+0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
+
+1000 - Latch Timer 2 count for port 0x42 - as described above
+1001 - Set Timer 2 LSB mode for port 0x42 - as described above
+1010 - Set Timer 2 MSB mode for port 0x42 - as described above
+1011 - Set Timer 2 16-bit mode for port 0x42 as described above
+
+1101 - General counter latch
+	Latch combination of counters into corresponding ports
+	Bit 3 = Counter 2
+	Bit 2 = Counter 1
+	Bit 1 = Counter 0
+	Bit 0 = Unused
+
+1110 - Latch timer status
+	Latch combination of counter mode into corresponding ports
+	Bit 3 = Counter 2
+	Bit 2 = Counter 1
+	Bit 1 = Counter 0
+
+	The output of ports 0x40-0x42 following this command will be:
+
+	Bit 7 = Output pin
+	Bit 6 = Count loaded (0 if timer has expired)
+	Bit 5-4 = Read / Write mode
+	    01 = MSB only
+	    10 = LSB only
+	    11 = LSB / MSB (16-bit)
+	Bit 3-1 = Mode
+	Bit 0 = Binary (0) / BCD mode (1)
+
+2.2) RTC
+
+The second device which was available in the original PC was the MC146818 real
+time clock.  The original device is now obsolete, and usually emulated by the
+system chipset, sometimes by an HPET and some frankenstein IRQ routing.
+
+The RTC is accessed through CMOS variables, which uses an index register to
+control which bytes are read.  Since there is only one index register, read
+of the CMOS and read of the RTC require lock protection (in addition, it is
+dangerous to allow userspace utilities such as hwclock to have direct RTC
+access, as they could corrupt kernel reads and writes of CMOS memory).
+
+The RTC generates an interrupt which is usually routed to IRQ 8.  The interrupt
+can function as a periodic timer, an additional once a day alarm, and can issue
+interrupts after an update of the CMOS registers by the MC146818 is complete.
+The type of interrupt is signalled in the RTC status registers.
+
+The RTC will update the current time fields by battery power even while the
+system is off.  The current time fields should not be read while an update is
+in progress, as indicated in the status register.
+
+The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
+programmed to a 32kHz divider if the RTC is to count seconds.
+
+This is the RAM map originally used for the RTC/CMOS:
+
+Location    Size    Description
+------------------------------------------
+00h         byte    Current second (BCD)
+01h         byte    Seconds alarm (BCD)
+02h         byte    Current minute (BCD)
+03h         byte    Minutes alarm (BCD)
+04h         byte    Current hour (BCD)
+05h         byte    Hours alarm (BCD)
+06h         byte    Current day of week (BCD)
+07h         byte    Current day of month (BCD)
+08h         byte    Current month (BCD)
+09h         byte    Current year (BCD)
+0Ah         byte    Register A
+                       bit 7   = Update in progress
+                       bit 6-4 = Divider for clock
+                                  000 = 4.194 MHz
+                                  001 = 1.049 MHz
+                                  010 = 32 kHz
+                                  10X = test modes
+                                  110 = reset / disable
+                                  111 = reset / disable
+                       bit 3-0 = Rate selection for periodic interrupt
+                                  000 = periodic timer disabled
+                                  001 = 3.90625 uS
+                                  010 = 7.8125 uS
+                                  011 = .122070 mS
+                                  100 = .244141 mS
+                                     ...
+                                 1101 = 125 mS
+                                 1110 = 250 mS
+                                 1111 = 500 mS
+0Bh         byte    Register B
+                       bit 7   = Run (0) / Halt (1)
+                       bit 6   = Periodic interrupt enable
+                       bit 5   = Alarm interrupt enable
+                       bit 4   = Update-ended interrupt enable
+                       bit 3   = Square wave interrupt enable
+                       bit 2   = BCD calendar (0) / Binary (1)
+                       bit 1   = 12-hour mode (0) / 24-hour mode (1)
+                       bit 0   = 0 (DST off) / 1 (DST enabled)
+OCh         byte    Register C (read only)
+                       bit 7   = interrupt request flag (IRQF)
+                       bit 6   = periodic interrupt flag (PF)
+                       bit 5   = alarm interrupt flag (AF)
+                       bit 4   = update interrupt flag (UF)
+                       bit 3-0 = reserved
+ODh         byte    Register D (read only)
+                       bit 7   = RTC has power
+                       bit 6-0 = reserved
+32h         byte    Current century BCD (*)
+  (*) location vendor specific and now determined from ACPI global tables
+
+2.3) APIC
+
+On Pentium and later processors, an on-board timer is available to each CPU
+as part of the Advanced Programmable Interrupt Controller.  The APIC is
+accessed through memory-mapped registers and provides interrupt service to each
+CPU, used for IPIs and local timer interrupts.
+
+Although in theory the APIC is a safe and stable source for local interrupts,
+in practice, many bugs and glitches have occurred due to the special nature of
+the APIC CPU-local memory-mapped hardware.  Beware that CPU errata may affect
+the use of the APIC and that workarounds may be required.  In addition, some of
+these workarounds pose unique constraints for virtualization - requiring either
+extra overhead incurred from extra reads of memory-mapped I/O or additional
+functionality that may be more computationally expensive to implement.
+
+Since the APIC is documented quite well in the Intel and AMD manuals, we will
+avoid repetition of the detail here.  It should be pointed out that the APIC
+timer is programmed through the LVT (local vector timer) register, is capable
+of one-shot or periodic operation, and is based on the bus clock divided down
+by the programmable divider register.
+
+2.4) HPET
+
+HPET is quite complex, and was originally intended to replace the PIT / RTC
+support of the X86 PC.  It remains to be seen whether that will be the case, as
+the de facto standard of PC hardware is to emulate these older devices.  Some
+systems designated as legacy free may support only the HPET as a hardware timer
+device.
+
+The HPET spec is rather loose and vague, requiring at least 3 hardware timers,
+but allowing implementation freedom to support many more.  It also imposes no
+fixed rate on the timer frequency, but does impose some extremal values on
+frequency, error and slew.
+
+In general, the HPET is recommended as a high precision (compared to PIT /RTC)
+time source which is independent of local variation (as there is only one HPET
+in any given system).  The HPET is also memory-mapped, and its presence is
+indicated through ACPI tables by the BIOS.
+
+Detailed specification of the HPET is beyond the current scope of this
+document, as it is also very well documented elsewhere.
+
+2.5) Offboard Timers
+
+Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
+timing chips built into the cards which may have registers which are accessible
+to kernel or user drivers.  To the author's knowledge, using these to generate
+a clocksource for a Linux or other kernel has not yet been attempted and is in
+general frowned upon as not playing by the agreed rules of the game.  Such a
+timer device would require additional support to be virtualized properly and is
+not considered important at this time as no known operating system does this.
+
+=========================================================================
+
+3) TSC Hardware
+
+The TSC or time stamp counter is relatively simple in theory; it counts
+instruction cycles issued by the processor, which can be used as a measure of
+time.  In practice, due to a number of problems, it is the most complicated
+timekeeping device to use.
+
+The TSC is represented internally as a 64-bit MSR which can be read with the
+RDMSR, RDTSC, or RDTSCP (when available) instructions.  In the past, hardware
+limitations made it possible to write the TSC, but generally on old hardware it
+was only possible to write the low 32-bits of the 64-bit counter, and the upper
+32-bits of the counter were cleared.  Now, however, on Intel processors family
+0Fh, for models 3, 4 and 6, and family 06h, models e and f, this restriction
+has been lifted and all 64-bits are writable.  On AMD systems, the ability to
+write the TSC MSR is not an architectural guarantee.
+
+The TSC is accessible from CPL-0 and conditionally, for CPL > 0 software by
+means of the CR4.TSD bit, which when enabled, disables CPL > 0 TSC access.
+
+Some vendors have implemented an additional instruction, RDTSCP, which returns
+atomically not just the TSC, but an indicator which corresponds to the
+processor number.  This can be used to index into an array of TSC variables to
+determine offset information in SMP systems where TSCs are not synchronized.
+The presence of this instruction must be determined by consulting CPUID feature
+bits.
+
+Both VMX and SVM provide extension fields in the virtualization hardware which
+allows the guest visible TSC to be offset by a constant.  Newer implementations
+promise to allow the TSC to additionally be scaled, but this hardware is not
+yet widely available.
+
+3.1) TSC synchronization
+
+The TSC is a CPU-local clock in most implementations.  This means, on SMP
+platforms, the TSCs of different CPUs may start at different times depending
+on when the CPUs are powered on.  Generally, CPUs on the same die will share
+the same clock, however, this is not always the case.
+
+The BIOS may attempt to resynchronize the TSCs during the poweron process and
+the operating system or other system software may attempt to do this as well.
+Several hardware limitations make the problem worse - if it is not possible to
+write the full 64-bits of the TSC, it may be impossible to match the TSC in
+newly arriving CPUs to that of the rest of the system, resulting in
+unsynchronized TSCs.  This may be done by BIOS or system software, but in
+practice, getting a perfectly synchronized TSC will not be possible unless all
+values are read from the same clock, which generally only is possible on single
+socket systems or those with special hardware support.
+
+3.2) TSC and CPU hotplug
+
+As touched on already, CPUs which arrive later than the boot time of the system
+may not have a TSC value that is synchronized with the rest of the system.
+Either system software, BIOS, or SMM code may actually try to establish the TSC
+to a value matching the rest of the system, but a perfect match is usually not
+a guarantee.  This can have the effect of bringing a system from a state where
+TSC is synchronized back to a state where TSC synchronization flaws, however
+small, may be exposed to the OS and any virtualization environment.
+
+3.3) TSC and multi-socket / NUMA
+
+Multi-socket systems, especially large multi-socket systems are likely to have
+individual clocksources rather than a single, universally distributed clock.
+Since these clocks are driven by different crystals, they will not have
+perfectly matched frequency, and temperature and electrical variations will
+cause the CPU clocks, and thus the TSCs to drift over time.  Depending on the
+exact clock and bus design, the drift may or may not be fixed in absolute
+error, and may accumulate over time.
+
+In addition, very large systems may deliberately slew the clocks of individual
+cores.  This technique, known as spread-spectrum clocking, reduces EMI at the
+clock frequency and harmonics of it, which may be required to pass FCC
+standards for telecommunications and computer equipment.
+
+It is recommended not to trust the TSCs to remain synchronized on NUMA or
+multiple socket systems for these reasons.
+
+3.4) TSC and C-states
+
+C-states, or idling states of the processor, especially C1E and deeper sleep
+states may be problematic for TSC as well.  The TSC may stop advancing in such
+a state, resulting in a TSC which is behind that of other CPUs when execution
+is resumed.  Such CPUs must be detected and flagged by the operating system
+based on CPU and chipset identifications.
+
+The TSC in such a case may be corrected by catching it up to a known external
+clocksource.
+
+3.5) TSC frequency change / P-states
+
+To make things slightly more interesting, some CPUs may change frequency.  They
+may or may not run the TSC at the same rate, and because the frequency change
+may be staggered or slewed, at some points in time, the TSC rate may not be
+known other than falling within a range of values.  In this case, the TSC will
+not be a stable time source, and must be calibrated against a known, stable,
+external clock to be a usable source of time.
+
+Whether the TSC runs at a constant rate or scales with the P-state is model
+dependent and must be determined by inspecting CPUID, chipset or vendor
+specific MSR fields.
+
+In addition, some vendors have known bugs where the P-state is actually
+compensated for properly during normal operation, but when the processor is
+inactive, the P-state may be raised temporarily to service cache misses from
+other processors.  In such cases, the TSC on halted CPUs could advance faster
+than that of non-halted processors.  AMD Turion processors are known to have
+this problem.
+
+3.6) TSC and STPCLK / T-states
+
+External signals given to the processor may also have the effect of stopping
+the TSC.  This is typically done for thermal emergency power control to prevent
+an overheating condition, and typically, there is no way to detect that this
+condition has happened.
+
+3.7) TSC virtualization - VMX
+
+VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner.  In
+addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
+field specified in the VMCS.  Special instructions must be used to read and
+write the VMCS field.
+
+3.8) TSC virtualization - SVM
+
+SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner.  In
+addition, SVM allows passing through the host TSC plus an additional offset
+field specified in the SVM control block.
+
+3.9) TSC feature bits in Linux
+
+In summary, there is no way to guarantee the TSC remains in perfect
+synchronization unless it is explicitly guaranteed by the architecture.  Even
+if so, the TSCs in multi-sockets or NUMA systems may still run independently
+despite being locally consistent.
+
+The following feature bits are used by Linux to signal various TSC attributes,
+but they can only be taken to be meaningful for UP or single node systems.
+
+X86_FEATURE_TSC 		: The TSC is available in hardware
+X86_FEATURE_RDTSCP		: The RDTSCP instruction is available
+X86_FEATURE_CONSTANT_TSC 	: The TSC rate is unchanged with P-states
+X86_FEATURE_NONSTOP_TSC		: The TSC does not stop in C-states
+X86_FEATURE_TSC_RELIABLE	: TSC sync checks are skipped (VMware)
+
+4) Virtualization Problems
+
+Timekeeping is especially problematic for virtualization because a number of
+challenges arise.  The most obvious problem is that time is now shared between
+the host and, potentially, a number of virtual machines.  Thus the virtual
+operating system does not run with 100% usage of the CPU, despite the fact that
+it may very well make that assumption.  It may expect it to remain true to very
+exacting bounds when interrupt sources are disabled, but in reality only its
+virtual interrupt sources are disabled, and the machine may still be preempted
+at any time.  This causes problems as the passage of real time, the injection
+of machine interrupts and the associated clock sources are no longer completely
+synchronized with real time.
+
+This same problem can occur on native hardware to a degree, as SMM mode may
+steal cycles from the naturally on X86 systems when SMM mode is used by the
+BIOS, but not in such an extreme fashion.  However, the fact that SMM mode may
+cause similar problems to virtualization makes it a good justification for
+solving many of these problems on bare metal.
+
+4.1) Interrupt clocking
+
+One of the most immediate problems that occurs with legacy operating systems
+is that the system timekeeping routines are often designed to keep track of
+time by counting periodic interrupts.  These interrupts may come from the PIT
+or the RTC, but the problem is the same: the host virtualization engine may not
+be able to deliver the proper number of interrupts per second, and so guest
+time may fall behind.  This is especially problematic if a high interrupt rate
+is selected, such as 1000 HZ, which is unfortunately the default for many Linux
+guests.
+
+There are three approaches to solving this problem; first, it may be possible
+to simply ignore it.  Guests which have a separate time source for tracking
+'wall clock' or 'real time' may not need any adjustment of their interrupts to
+maintain proper time.  If this is not sufficient, it may be necessary to inject
+additional interrupts into the guest in order to increase the effective
+interrupt rate.  This approach leads to complications in extreme conditions,
+where host load or guest lag is too much to compensate for, and thus another
+solution to the problem has risen: the guest may need to become aware of lost
+ticks and compensate for them internally.  Although promising in theory, the
+implementation of this policy in Linux has been extremely error prone, and a
+number of buggy variants of lost tick compensation are distributed across
+commonly used Linux systems.
+
+Windows uses periodic RTC clocking as a means of keeping time internally, and
+thus requires interrupt slewing to keep proper time.  It does use a low enough
+rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
+practice.
+
+4.2) TSC sampling and serialization
+
+As the highest precision time source available, the cycle counter of the CPU
+has aroused much interest from developers.  As explained above, this timer has
+many problems unique to its nature as a local, potentially unstable and
+potentially unsynchronized source.  One issue which is not unique to the TSC,
+but is highlighted because of its very precise nature is sampling delay.  By
+definition, the counter, once read is already old.  However, it is also
+possible for the counter to be read ahead of the actual use of the result.
+This is a consequence of the superscalar execution of the instruction stream,
+which may execute instructions out of order.  Such execution is called
+non-serialized.  Forcing serialized execution is necessary for precise
+measurement with the TSC, and requires a serializing instruction, such as CPUID
+or an MSR read.
+
+Since CPUID may actually be virtualized by a trap and emulate mechanism, this
+serialization can pose a performance issue for hardware virtualization.  An
+accurate time stamp counter reading may therefore not always be available, and
+it may be necessary for an implementation to guard against "backwards" reads of
+the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
+system.
+
+4.3) Timespec aliasing
+
+Additionally, this lack of serialization from the TSC poses another challenge
+when using results of the TSC when measured against another time source.  As
+the TSC is much higher precision, many possible values of the TSC may be read
+while another clock is still expressing the same value.
+
+That is, you may read (T,T+10) while external clock C maintains the same value.
+Due to non-serialized reads, you may actually end up with a range which
+fluctuates - from (T-1.. T+10).  Thus, any time calculated from a TSC, but
+calibrated against an external value may have a range of valid values.
+Re-calibrating this computation may actually cause time, as computed after the
+calibration, to go backwards, compared with time computed before the
+calibration.
+
+This problem is particularly pronounced with an internal time source in Linux,
+the kernel time, which is expressed in the theoretically high resolution
+timespec - but which advances in much larger granularity intervals, sometimes
+at the rate of jiffies, and possibly in catchup modes, at a much larger step.
+
+This aliasing requires care in the computation and recalibration of kvmclock
+and any other values derived from TSC computation (such as TSC virtualization
+itself).
+
+4.4) Migration
+
+Migration of a virtual machine raises problems for timekeeping in two ways.
+First, the migration itself may take time, during which interrupts cannot be
+delivered, and after which, the guest time may need to be caught up.  NTP may
+be able to help to some degree here, as the clock correction required is
+typically small enough to fall in the NTP-correctable window.
+
+An additional concern is that timers based off the TSC (or HPET, if the raw bus
+clock is exposed) may now be running at different rates, requiring compensation
+in some way in the hypervisor by virtualizing these timers.  In addition,
+migrating to a faster machine may preclude the use of a passthrough TSC, as a
+faster clock cannot be made visible to a guest without the potential of time
+advancing faster than usual.  A slower clock is less of a problem, as it can
+always be caught up to the original rate.  KVM clock avoids these problems by
+simply storing multipliers and offsets against the TSC for the guest to convert
+back into nanosecond resolution values.
+
+4.5) Scheduling
+
+Since scheduling may be based on precise timing and firing of interrupts, the
+scheduling algorithms of an operating system may be adversely affected by
+virtualization.  In theory, the effect is random and should be universally
+distributed, but in contrived as well as real scenarios (guest device access,
+causes of virtualization exits, possible context switch), this may not always
+be the case.  The effect of this has not been well studied.
+
+In an attempt to work around this, several implementations have provided a
+paravirtualized scheduler clock, which reveals the true amount of CPU time for
+which a virtual machine has been running.
+
+4.6) Watchdogs
+
+Watchdog timers, such as the lock detector in Linux may fire accidentally when
+running under hardware virtualization due to timer interrupts being delayed or
+misinterpretation of the passage of real time.  Usually, these warnings are
+spurious and can be ignored, but in some circumstances it may be necessary to
+disable such detection.
+
+4.7) Delays and precision timing
+
+Precise timing and delays may not be possible in a virtualized system.  This
+can happen if the system is controlling physical hardware, or issues delays to
+compensate for slower I/O to and from devices.  The first issue is not solvable
+in general for a virtualized system; hardware control software can't be
+adequately virtualized without a full real-time operating system, which would
+require an RT aware virtualization platform.
+
+The second issue may cause performance problems, but this is unlikely to be a
+significant issue.  In many cases these delays may be eliminated through
+configuration or paravirtualization.
+
+4.8) Covert channels and leaks
+
+In addition to the above problems, time information will inevitably leak to the
+guest about the host in anything but a perfect implementation of virtualized
+time.  This may allow the guest to infer the presence of a hypervisor (as in a
+red-pill type detection), and it may allow information to leak between guests
+by using CPU utilization itself as a signalling channel.  Preventing such
+problems would require completely isolated virtual time which may not track
+real time any longer.  This may be useful in certain security or QA contexts,
+but in general isn't recommended for real-world deployment scenarios.
diff --git a/Documentation/virt/kvm/vcpu-requests.rst b/Documentation/virt/kvm/vcpu-requests.rst
new file mode 100644
index 000000000000..5feb3706a7ae
--- /dev/null
+++ b/Documentation/virt/kvm/vcpu-requests.rst
@@ -0,0 +1,307 @@
+=================
+KVM VCPU Requests
+=================
+
+Overview
+========
+
+KVM supports an internal API enabling threads to request a VCPU thread to
+perform some activity.  For example, a thread may request a VCPU to flush
+its TLB with a VCPU request.  The API consists of the following functions::
+
+  /* Check if any requests are pending for VCPU @vcpu. */
+  bool kvm_request_pending(struct kvm_vcpu *vcpu);
+
+  /* Check if VCPU @vcpu has request @req pending. */
+  bool kvm_test_request(int req, struct kvm_vcpu *vcpu);
+
+  /* Clear request @req for VCPU @vcpu. */
+  void kvm_clear_request(int req, struct kvm_vcpu *vcpu);
+
+  /*
+   * Check if VCPU @vcpu has request @req pending. When the request is
+   * pending it will be cleared and a memory barrier, which pairs with
+   * another in kvm_make_request(), will be issued.
+   */
+  bool kvm_check_request(int req, struct kvm_vcpu *vcpu);
+
+  /*
+   * Make request @req of VCPU @vcpu. Issues a memory barrier, which pairs
+   * with another in kvm_check_request(), prior to setting the request.
+   */
+  void kvm_make_request(int req, struct kvm_vcpu *vcpu);
+
+  /* Make request @req of all VCPUs of the VM with struct kvm @kvm. */
+  bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
+
+Typically a requester wants the VCPU to perform the activity as soon
+as possible after making the request.  This means most requests
+(kvm_make_request() calls) are followed by a call to kvm_vcpu_kick(),
+and kvm_make_all_cpus_request() has the kicking of all VCPUs built
+into it.
+
+VCPU Kicks
+----------
+
+The goal of a VCPU kick is to bring a VCPU thread out of guest mode in
+order to perform some KVM maintenance.  To do so, an IPI is sent, forcing
+a guest mode exit.  However, a VCPU thread may not be in guest mode at the
+time of the kick.  Therefore, depending on the mode and state of the VCPU
+thread, there are two other actions a kick may take.  All three actions
+are listed below:
+
+1) Send an IPI.  This forces a guest mode exit.
+2) Waking a sleeping VCPU.  Sleeping VCPUs are VCPU threads outside guest
+   mode that wait on waitqueues.  Waking them removes the threads from
+   the waitqueues, allowing the threads to run again.  This behavior
+   may be suppressed, see KVM_REQUEST_NO_WAKEUP below.
+3) Nothing.  When the VCPU is not in guest mode and the VCPU thread is not
+   sleeping, then there is nothing to do.
+
+VCPU Mode
+---------
+
+VCPUs have a mode state, ``vcpu->mode``, that is used to track whether the
+guest is running in guest mode or not, as well as some specific
+outside guest mode states.  The architecture may use ``vcpu->mode`` to
+ensure VCPU requests are seen by VCPUs (see "Ensuring Requests Are Seen"),
+as well as to avoid sending unnecessary IPIs (see "IPI Reduction"), and
+even to ensure IPI acknowledgements are waited upon (see "Waiting for
+Acknowledgements").  The following modes are defined:
+
+OUTSIDE_GUEST_MODE
+
+  The VCPU thread is outside guest mode.
+
+IN_GUEST_MODE
+
+  The VCPU thread is in guest mode.
+
+EXITING_GUEST_MODE
+
+  The VCPU thread is transitioning from IN_GUEST_MODE to
+  OUTSIDE_GUEST_MODE.
+
+READING_SHADOW_PAGE_TABLES
+
+  The VCPU thread is outside guest mode, but it wants the sender of
+  certain VCPU requests, namely KVM_REQ_TLB_FLUSH, to wait until the VCPU
+  thread is done reading the page tables.
+
+VCPU Request Internals
+======================
+
+VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap.
+This means general bitops, like those documented in [atomic-ops]_ could
+also be used, e.g. ::
+
+  clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests);
+
+However, VCPU request users should refrain from doing so, as it would
+break the abstraction.  The first 8 bits are reserved for architecture
+independent requests, all additional bits are available for architecture
+dependent requests.
+
+Architecture Independent Requests
+---------------------------------
+
+KVM_REQ_TLB_FLUSH
+
+  KVM's common MMU notifier may need to flush all of a guest's TLB
+  entries, calling kvm_flush_remote_tlbs() to do so.  Architectures that
+  choose to use the common kvm_flush_remote_tlbs() implementation will
+  need to handle this VCPU request.
+
+KVM_REQ_MMU_RELOAD
+
+  When shadow page tables are used and memory slots are removed it's
+  necessary to inform each VCPU to completely refresh the tables.  This
+  request is used for that.
+
+KVM_REQ_PENDING_TIMER
+
+  This request may be made from a timer handler run on the host on behalf
+  of a VCPU.  It informs the VCPU thread to inject a timer interrupt.
+
+KVM_REQ_UNHALT
+
+  This request may be made from the KVM common function kvm_vcpu_block(),
+  which is used to emulate an instruction that causes a CPU to halt until
+  one of an architectural specific set of events and/or interrupts is
+  received (determined by checking kvm_arch_vcpu_runnable()).  When that
+  event or interrupt arrives kvm_vcpu_block() makes the request.  This is
+  in contrast to when kvm_vcpu_block() returns due to any other reason,
+  such as a pending signal, which does not indicate the VCPU's halt
+  emulation should stop, and therefore does not make the request.
+
+KVM_REQUEST_MASK
+----------------
+
+VCPU requests should be masked by KVM_REQUEST_MASK before using them with
+bitops.  This is because only the lower 8 bits are used to represent the
+request's number.  The upper bits are used as flags.  Currently only two
+flags are defined.
+
+VCPU Request Flags
+------------------
+
+KVM_REQUEST_NO_WAKEUP
+
+  This flag is applied to requests that only need immediate attention
+  from VCPUs running in guest mode.  That is, sleeping VCPUs do not need
+  to be awaken for these requests.  Sleeping VCPUs will handle the
+  requests when they are awaken later for some other reason.
+
+KVM_REQUEST_WAIT
+
+  When requests with this flag are made with kvm_make_all_cpus_request(),
+  then the caller will wait for each VCPU to acknowledge its IPI before
+  proceeding.  This flag only applies to VCPUs that would receive IPIs.
+  If, for example, the VCPU is sleeping, so no IPI is necessary, then
+  the requesting thread does not wait.  This means that this flag may be
+  safely combined with KVM_REQUEST_NO_WAKEUP.  See "Waiting for
+  Acknowledgements" for more information about requests with
+  KVM_REQUEST_WAIT.
+
+VCPU Requests with Associated State
+===================================
+
+Requesters that want the receiving VCPU to handle new state need to ensure
+the newly written state is observable to the receiving VCPU thread's CPU
+by the time it observes the request.  This means a write memory barrier
+must be inserted after writing the new state and before setting the VCPU
+request bit.  Additionally, on the receiving VCPU thread's side, a
+corresponding read barrier must be inserted after reading the request bit
+and before proceeding to read the new state associated with it.  See
+scenario 3, Message and Flag, of [lwn-mb]_ and the kernel documentation
+[memory-barriers]_.
+
+The pair of functions, kvm_check_request() and kvm_make_request(), provide
+the memory barriers, allowing this requirement to be handled internally by
+the API.
+
+Ensuring Requests Are Seen
+==========================
+
+When making requests to VCPUs, we want to avoid the receiving VCPU
+executing in guest mode for an arbitrary long time without handling the
+request.  We can be sure this won't happen as long as we ensure the VCPU
+thread checks kvm_request_pending() before entering guest mode and that a
+kick will send an IPI to force an exit from guest mode when necessary.
+Extra care must be taken to cover the period after the VCPU thread's last
+kvm_request_pending() check and before it has entered guest mode, as kick
+IPIs will only trigger guest mode exits for VCPU threads that are in guest
+mode or at least have already disabled interrupts in order to prepare to
+enter guest mode.  This means that an optimized implementation (see "IPI
+Reduction") must be certain when it's safe to not send the IPI.  One
+solution, which all architectures except s390 apply, is to:
+
+- set ``vcpu->mode`` to IN_GUEST_MODE between disabling the interrupts and
+  the last kvm_request_pending() check;
+- enable interrupts atomically when entering the guest.
+
+This solution also requires memory barriers to be placed carefully in both
+the requesting thread and the receiving VCPU.  With the memory barriers we
+can exclude the possibility of a VCPU thread observing
+!kvm_request_pending() on its last check and then not receiving an IPI for
+the next request made of it, even if the request is made immediately after
+the check.  This is done by way of the Dekker memory barrier pattern
+(scenario 10 of [lwn-mb]_).  As the Dekker pattern requires two variables,
+this solution pairs ``vcpu->mode`` with ``vcpu->requests``.  Substituting
+them into the pattern gives::
+
+  CPU1                                    CPU2
+  =================                       =================
+  local_irq_disable();
+  WRITE_ONCE(vcpu->mode, IN_GUEST_MODE);  kvm_make_request(REQ, vcpu);
+  smp_mb();                               smp_mb();
+  if (kvm_request_pending(vcpu)) {        if (READ_ONCE(vcpu->mode) ==
+                                              IN_GUEST_MODE) {
+      ...abort guest entry...                 ...send IPI...
+  }                                       }
+
+As stated above, the IPI is only useful for VCPU threads in guest mode or
+that have already disabled interrupts.  This is why this specific case of
+the Dekker pattern has been extended to disable interrupts before setting
+``vcpu->mode`` to IN_GUEST_MODE.  WRITE_ONCE() and READ_ONCE() are used to
+pedantically implement the memory barrier pattern, guaranteeing the
+compiler doesn't interfere with ``vcpu->mode``'s carefully planned
+accesses.
+
+IPI Reduction
+-------------
+
+As only one IPI is needed to get a VCPU to check for any/all requests,
+then they may be coalesced.  This is easily done by having the first IPI
+sending kick also change the VCPU mode to something !IN_GUEST_MODE.  The
+transitional state, EXITING_GUEST_MODE, is used for this purpose.
+
+Waiting for Acknowledgements
+----------------------------
+
+Some requests, those with the KVM_REQUEST_WAIT flag set, require IPIs to
+be sent, and the acknowledgements to be waited upon, even when the target
+VCPU threads are in modes other than IN_GUEST_MODE.  For example, one case
+is when a target VCPU thread is in READING_SHADOW_PAGE_TABLES mode, which
+is set after disabling interrupts.  To support these cases, the
+KVM_REQUEST_WAIT flag changes the condition for sending an IPI from
+checking that the VCPU is IN_GUEST_MODE to checking that it is not
+OUTSIDE_GUEST_MODE.
+
+Request-less VCPU Kicks
+-----------------------
+
+As the determination of whether or not to send an IPI depends on the
+two-variable Dekker memory barrier pattern, then it's clear that
+request-less VCPU kicks are almost never correct.  Without the assurance
+that a non-IPI generating kick will still result in an action by the
+receiving VCPU, as the final kvm_request_pending() check does for
+request-accompanying kicks, then the kick may not do anything useful at
+all.  If, for instance, a request-less kick was made to a VCPU that was
+just about to set its mode to IN_GUEST_MODE, meaning no IPI is sent, then
+the VCPU thread may continue its entry without actually having done
+whatever it was the kick was meant to initiate.
+
+One exception is x86's posted interrupt mechanism.  In this case, however,
+even the request-less VCPU kick is coupled with the same
+local_irq_disable() + smp_mb() pattern described above; the ON bit
+(Outstanding Notification) in the posted interrupt descriptor takes the
+role of ``vcpu->requests``.  When sending a posted interrupt, PIR.ON is
+set before reading ``vcpu->mode``; dually, in the VCPU thread,
+vmx_sync_pir_to_irr() reads PIR after setting ``vcpu->mode`` to
+IN_GUEST_MODE.
+
+Additional Considerations
+=========================
+
+Sleeping VCPUs
+--------------
+
+VCPU threads may need to consider requests before and/or after calling
+functions that may put them to sleep, e.g. kvm_vcpu_block().  Whether they
+do or not, and, if they do, which requests need consideration, is
+architecture dependent.  kvm_vcpu_block() calls kvm_arch_vcpu_runnable()
+to check if it should awaken.  One reason to do so is to provide
+architectures a function where requests may be checked if necessary.
+
+Clearing Requests
+-----------------
+
+Generally it only makes sense for the receiving VCPU thread to clear a
+request.  However, in some circumstances, such as when the requesting
+thread and the receiving VCPU thread are executed serially, such as when
+they are the same thread, or when they are using some form of concurrency
+control to temporarily execute synchronously, then it's possible to know
+that the request may be cleared immediately, rather than waiting for the
+receiving VCPU thread to handle the request in VCPU RUN.  The only current
+examples of this are kvm_vcpu_block() calls made by VCPUs to block
+themselves.  A possible side-effect of that call is to make the
+KVM_REQ_UNHALT request, which may then be cleared immediately when the
+VCPU returns from the call.
+
+References
+==========
+
+.. [atomic-ops] Documentation/core-api/atomic_ops.rst
+.. [memory-barriers] Documentation/memory-barriers.txt
+.. [lwn-mb] https://lwn.net/Articles/573436/
diff --git a/Documentation/virt/paravirt_ops.rst b/Documentation/virt/paravirt_ops.rst
new file mode 100644
index 000000000000..6b789d27cead
--- /dev/null
+++ b/Documentation/virt/paravirt_ops.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Paravirt_ops
+============
+
+Linux provides support for different hypervisor virtualization technologies.
+Historically different binary kernels would be required in order to support
+different hypervisors, this restriction was removed with pv_ops.
+Linux pv_ops is a virtualization API which enables support for different
+hypervisors. It allows each hypervisor to override critical operations and
+allows a single kernel binary to run on all supported execution environments
+including native machine -- without any hypervisors.
+
+pv_ops provides a set of function pointers which represent operations
+corresponding to low level critical instructions and high level
+functionalities in various areas. pv-ops allows for optimizations at run
+time by enabling binary patching of the low-ops critical operations
+at boot time.
+
+pv_ops operations are classified into three categories:
+
+- simple indirect call
+   These operations correspond to high level functionality where it is
+   known that the overhead of indirect call isn't very important.
+
+- indirect call which allows optimization with binary patch
+   Usually these operations correspond to low level critical instructions. They
+   are called frequently and are performance critical. The overhead is
+   very important.
+
+- a set of macros for hand written assembly code
+   Hand written assembly codes (.S files) also need paravirtualization
+   because they include sensitive instructions or some of code paths in
+   them are very performance critical.
diff --git a/Documentation/virt/uml/UserModeLinux-HOWTO.txt b/Documentation/virt/uml/UserModeLinux-HOWTO.txt
new file mode 100644
index 000000000000..87b80f589e1c
--- /dev/null
+++ b/Documentation/virt/uml/UserModeLinux-HOWTO.txt
@@ -0,0 +1,4589 @@
+  User Mode Linux HOWTO
+  User Mode Linux Core Team
+  Mon Nov 18 14:16:16 EST 2002
+
+  This document describes the use and abuse of Jeff Dike's User Mode
+  Linux: a port of the Linux kernel as a normal Intel Linux process.
+  ______________________________________________________________________
+
+  Table of Contents
+
+  1. Introduction
+
+     1.1 How is User Mode Linux Different?
+     1.2 Why Would I Want User Mode Linux?
+
+  2. Compiling the kernel and modules
+
+     2.1 Compiling the kernel
+     2.2 Compiling and installing kernel modules
+     2.3 Compiling and installing uml_utilities
+
+  3. Running UML and logging in
+
+     3.1 Running UML
+     3.2 Logging in
+     3.3 Examples
+
+  4. UML on 2G/2G hosts
+
+     4.1 Introduction
+     4.2 The problem
+     4.3 The solution
+
+  5. Setting up serial lines and consoles
+
+     5.1 Specifying the device
+     5.2 Specifying the channel
+     5.3 Examples
+
+  6. Setting up the network
+
+     6.1 General setup
+     6.2 Userspace daemons
+     6.3 Specifying ethernet addresses
+     6.4 UML interface setup
+     6.5 Multicast
+     6.6 TUN/TAP with the uml_net helper
+     6.7 TUN/TAP with a preconfigured tap device
+     6.8 Ethertap
+     6.9 The switch daemon
+     6.10 Slip
+     6.11 Slirp
+     6.12 pcap
+     6.13 Setting up the host yourself
+
+  7. Sharing Filesystems between Virtual Machines
+
+     7.1 A warning
+     7.2 Using layered block devices
+     7.3 Note!
+     7.4 Another warning
+     7.5 uml_moo : Merging a COW file with its backing file
+
+  8. Creating filesystems
+
+     8.1 Create the filesystem file
+     8.2 Assign the file to a UML device
+     8.3 Creating and mounting the filesystem
+
+  9. Host file access
+
+     9.1 Using hostfs
+     9.2 hostfs as the root filesystem
+     9.3 Building hostfs
+
+  10. The Management Console
+     10.1 version
+     10.2 halt and reboot
+     10.3 config
+     10.4 remove
+     10.5 sysrq
+     10.6 help
+     10.7 cad
+     10.8 stop
+     10.9 go
+
+  11. Kernel debugging
+
+     11.1 Starting the kernel under gdb
+     11.2 Examining sleeping processes
+     11.3 Running ddd on UML
+     11.4 Debugging modules
+     11.5 Attaching gdb to the kernel
+     11.6 Using alternate debuggers
+
+  12. Kernel debugging examples
+
+     12.1 The case of the hung fsck
+     12.2 Episode 2: The case of the hung fsck
+
+  13. What to do when UML doesn't work
+
+     13.1 Strange compilation errors when you build from source
+     13.2 (obsolete)
+     13.3 A variety of panics and hangs with /tmp on a reiserfs  filesystem
+     13.4 The compile fails with errors about conflicting types for 'open', 'dup', and 'waitpid'
+     13.5 UML doesn't work when /tmp is an NFS filesystem
+     13.6 UML hangs on boot when compiled with gprof support
+     13.7 syslogd dies with a SIGTERM on startup
+     13.8 TUN/TAP networking doesn't work on a 2.4 host
+     13.9 You can network to the host but not to other machines on the net
+     13.10 I have no root and I want to scream
+     13.11 UML build conflict between ptrace.h and ucontext.h
+     13.12 The UML BogoMips is exactly half the host's BogoMips
+     13.13 When you run UML, it immediately segfaults
+     13.14 xterms appear, then immediately disappear
+     13.15 Any other panic, hang, or strange behavior
+
+  14. Diagnosing Problems
+
+     14.1 Case 1 : Normal kernel panics
+     14.2 Case 2 : Tracing thread panics
+     14.3 Case 3 : Tracing thread panics caused by other threads
+     14.4 Case 4 : Hangs
+
+  15. Thanks
+
+     15.1 Code and Documentation
+     15.2 Flushing out bugs
+     15.3 Buglets and clean-ups
+     15.4 Case Studies
+     15.5 Other contributions
+
+
+  ______________________________________________________________________
+
+  1.  Introduction
+
+  Welcome to User Mode Linux.  It's going to be fun.
+
+
+
+  1.1.  How is User Mode Linux Different?
+
+  Normally, the Linux Kernel talks straight to your hardware (video
+  card, keyboard, hard drives, etc), and any programs which run ask the
+  kernel to operate the hardware, like so:
+
+
+
+         +-----------+-----------+----+
+         | Process 1 | Process 2 | ...|
+         +-----------+-----------+----+
+         |       Linux Kernel         |
+         +----------------------------+
+         |         Hardware           |
+         +----------------------------+
+
+
+
+
+  The User Mode Linux Kernel is different; instead of talking to the
+  hardware, it talks to a `real' Linux kernel (called the `host kernel'
+  from now on), like any other program.  Programs can then run inside
+  User-Mode Linux as if they were running under a normal kernel, like
+  so:
+
+
+
+                     +----------------+
+                     | Process 2 | ...|
+         +-----------+----------------+
+         | Process 1 | User-Mode Linux|
+         +----------------------------+
+         |       Linux Kernel         |
+         +----------------------------+
+         |         Hardware           |
+         +----------------------------+
+
+
+
+
+
+  1.2.  Why Would I Want User Mode Linux?
+
+
+  1. If User Mode Linux crashes, your host kernel is still fine.
+
+  2. You can run a usermode kernel as a non-root user.
+
+  3. You can debug the User Mode Linux like any normal process.
+
+  4. You can run gprof (profiling) and gcov (coverage testing).
+
+  5. You can play with your kernel without breaking things.
+
+  6. You can use it as a sandbox for testing new apps.
+
+  7. You can try new development kernels safely.
+
+  8. You can run different distributions simultaneously.
+
+  9. It's extremely fun.
+
+
+
+
+
+  2.  Compiling the kernel and modules
+
+
+
+
+  2.1.  Compiling the kernel
+
+
+  Compiling the user mode kernel is just like compiling any other
+  kernel.  Let's go through the steps, using 2.4.0-prerelease (current
+  as of this writing) as an example:
+
+
+  1. Download the latest UML patch from
+
+     the download page <http://user-mode-linux.sourceforge.net/
+
+     In this example, the file is uml-patch-2.4.0-prerelease.bz2.
+
+
+  2. Download the matching kernel from your favourite kernel mirror,
+     such as:
+
+     ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2
+     <ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2>
+     .
+
+
+  3. Make a directory and unpack the kernel into it.
+
+
+
+       host%
+       mkdir ~/uml
+
+
+
+
+
+
+       host%
+       cd ~/uml
+
+
+
+
+
+
+       host%
+       tar -xzvf linux-2.4.0-prerelease.tar.bz2
+
+
+
+
+
+
+  4. Apply the patch using
+
+
+
+       host%
+       cd ~/uml/linux
+
+
+
+       host%
+       bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1
+
+
+
+
+
+
+  5. Run your favorite config; `make xconfig ARCH=um' is the most
+     convenient.  `make config ARCH=um' and 'make menuconfig ARCH=um'
+     will work as well.  The defaults will give you a useful kernel.  If
+     you want to change something, go ahead, it probably won't hurt
+     anything.
+
+
+     Note:  If the host is configured with a 2G/2G address space split
+     rather than the usual 3G/1G split, then the packaged UML binaries
+     will not run.  They will immediately segfault.  See ``UML on 2G/2G
+     hosts''  for the scoop on running UML on your system.
+
+
+
+  6. Finish with `make linux ARCH=um': the result is a file called
+     `linux' in the top directory of your source tree.
+
+  Make sure that you don't build this kernel in /usr/src/linux.  On some
+  distributions, /usr/include/asm is a link into this pool.  The user-
+  mode build changes the other end of that link, and things that include
+  <asm/anything.h> stop compiling.
+
+  The sources are also available from cvs at the project's cvs page,
+  which has directions on getting the sources. You can also browse the
+  CVS pool from there.
+
+  If you get the CVS sources, you will have to check them out into an
+  empty directory. You will then have to copy each file into the
+  corresponding directory in the appropriate kernel pool.
+
+  If you don't have the latest kernel pool, you can get the
+  corresponding user-mode sources with
+
+
+       host% cvs co -r v_2_3_x linux
+
+
+
+
+  where 'x' is the version in your pool. Note that you will not get the
+  bug fixes and enhancements that have gone into subsequent releases.
+
+
+  2.2.  Compiling and installing kernel modules
+
+  UML modules are built in the same way as the native kernel (with the
+  exception of the 'ARCH=um' that you always need for UML):
+
+
+       host% make modules ARCH=um
+
+
+
+
+  Any modules that you want to load into this kernel need to be built in
+  the user-mode pool.  Modules from the native kernel won't work.
+
+  You can install them by using ftp or something to copy them into the
+  virtual machine and dropping them into /lib/modules/`uname -r`.
+
+  You can also get the kernel build process to install them as follows:
+
+  1. with the kernel not booted, mount the root filesystem in the top
+     level of the kernel pool:
+
+
+       host% mount root_fs mnt -o loop
+
+
+
+
+
+
+  2. run
+
+
+       host%
+       make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um
+
+
+
+
+
+
+  3. unmount the filesystem
+
+
+       host% umount mnt
+
+
+
+
+
+
+  4. boot the kernel on it
+
+
+  When the system is booted, you can use insmod as usual to get the
+  modules into the kernel.  A number of things have been loaded into UML
+  as modules, especially filesystems and network protocols and filters,
+  so most symbols which need to be exported probably already are.
+  However, if you do find symbols that need exporting, let  us
+  <http://user-mode-linux.sourceforge.net/>  know, and
+  they'll be "taken care of".
+
+
+
+  2.3.  Compiling and installing uml_utilities
+
+  Many features of the UML kernel require a user-space helper program,
+  so a uml_utilities package is distributed separately from the kernel
+  patch which provides these helpers. Included within this is:
+
+  o  port-helper - Used by consoles which connect to xterms or ports
+
+  o  tunctl - Configuration tool to create and delete tap devices
+
+  o  uml_net - Setuid binary for automatic tap device configuration
+
+  o  uml_switch - User-space virtual switch required for daemon
+     transport
+
+     The uml_utilities tree is compiled with:
+
+
+       host#
+       make && make install
+
+
+
+
+  Note that UML kernel patches may require a specific version of the
+  uml_utilities distribution. If you don't keep up with the mailing
+  lists, ensure that you have the latest release of uml_utilities if you
+  are experiencing problems with your UML kernel, particularly when
+  dealing with consoles or command-line switches to the helper programs
+
+
+
+
+
+
+
+
+  3.  Running UML and logging in
+
+
+
+  3.1.  Running UML
+
+  It runs on 2.2.15 or later, and all 2.4 kernels.
+
+
+  Booting UML is straightforward.  Simply run 'linux': it will try to
+  mount the file `root_fs' in the current directory.  You do not need to
+  run it as root.  If your root filesystem is not named `root_fs', then
+  you need to put a `ubd0=root_fs_whatever' switch on the linux command
+  line.
+
+
+  You will need a filesystem to boot UML from.  There are a number
+  available for download from  here  <http://user-mode-
+  linux.sourceforge.net/> .  There are also  several tools
+  <http://user-mode-linux.sourceforge.net/>  which can be
+  used to generate UML-compatible filesystem images from media.
+  The kernel will boot up and present you with a login prompt.
+
+
+  Note:  If the host is configured with a 2G/2G address space split
+  rather than the usual 3G/1G split, then the packaged UML binaries will
+  not run.  They will immediately segfault.  See ``UML on 2G/2G hosts''
+  for the scoop on running UML on your system.
+
+
+
+  3.2.  Logging in
+
+
+
+  The prepackaged filesystems have a root account with password 'root'
+  and a user account with password 'user'.  The login banner will
+  generally tell you how to log in.  So, you log in and you will find
+  yourself inside a little virtual machine. Our filesystems have a
+  variety of commands and utilities installed (and it is fairly easy to
+  add more), so you will have a lot of tools with which to poke around
+  the system.
+
+  There are a couple of other ways to log in:
+
+  o  On a virtual console
+
+
+
+     Each virtual console that is configured (i.e. the device exists in
+     /dev and /etc/inittab runs a getty on it) will come up in its own
+     xterm.  If you get tired of the xterms, read ``Setting up serial
+     lines and consoles''  to see how to attach the consoles to
+     something else, like host ptys.
+
+
+
+  o  Over the serial line
+
+
+     In the boot output, find a line that looks like:
+
+
+
+       serial line 0 assigned pty /dev/ptyp1
+
+
+
+
+  Attach your favorite terminal program to the corresponding tty.  I.e.
+  for minicom, the command would be
+
+
+       host% minicom -o -p /dev/ttyp1
+
+
+
+
+
+
+  o  Over the net
+
+
+     If the network is running, then you can telnet to the virtual
+     machine and log in to it.  See ``Setting up the network''  to learn
+     about setting up a virtual network.
+
+  When you're done using it, run halt, and the kernel will bring itself
+  down and the process will exit.
+
+
+  3.3.  Examples
+
+  Here are some examples of UML in action:
+
+  o  A login session <http://user-mode-linux.sourceforge.net/login.html>
+
+  o  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
+
+
+
+
+
+
+
+  4.  UML on 2G/2G hosts
+
+
+
+
+  4.1.  Introduction
+
+
+  Most Linux machines are configured so that the kernel occupies the
+  upper 1G (0xc0000000 - 0xffffffff) of the 4G address space and
+  processes use the lower 3G (0x00000000 - 0xbfffffff).  However, some
+  machine are configured with a 2G/2G split, with the kernel occupying
+  the upper 2G (0x80000000 - 0xffffffff) and processes using the lower
+  2G (0x00000000 - 0x7fffffff).
+
+
+
+
+  4.2.  The problem
+
+
+  The prebuilt UML binaries on this site will not run on 2G/2G hosts
+  because UML occupies the upper .5G of the 3G process address space
+  (0xa0000000 - 0xbfffffff).  Obviously, on 2G/2G hosts, this is right
+  in the middle of the kernel address space, so UML won't even load - it
+  will immediately segfault.
+
+
+
+
+  4.3.  The solution
+
+
+  The fix for this is to rebuild UML from source after enabling
+  CONFIG_HOST_2G_2G (under 'General Setup').  This will cause UML to
+  load itself in the top .5G of that smaller process address space,
+  where it will run fine.  See ``Compiling the kernel and modules''  if
+  you need help building UML from source.
+
+
+
+
+
+
+
+
+
+
+  5.  Setting up serial lines and consoles
+
+
+  It is possible to attach UML serial lines and consoles to many types
+  of host I/O channels by specifying them on the command line.
+
+
+  You can attach them to host ptys, ttys, file descriptors, and ports.
+  This allows you to do things like
+
+  o  have a UML console appear on an unused host console,
+
+  o  hook two virtual machines together by having one attach to a pty
+     and having the other attach to the corresponding tty
+
+  o  make a virtual machine accessible from the net by attaching a
+     console to a port on the host.
+
+
+  The general format of the command line option is device=channel.
+
+
+
+  5.1.  Specifying the device
+
+  Devices are specified with "con" or "ssl" (console or serial line,
+  respectively), optionally with a device number if you are talking
+  about a specific device.
+
+
+  Using just "con" or "ssl" describes all of the consoles or serial
+  lines.  If you want to talk about console #3 or serial line #10, they
+  would be "con3" and "ssl10", respectively.
+
+
+  A specific device name will override a less general "con=" or "ssl=".
+  So, for example, you can assign a pty to each of the serial lines
+  except for the first two like this:
+
+
+        ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1
+
+
+
+
+  The specificity of the device name is all that matters; order on the
+  command line is irrelevant.
+
+
+
+  5.2.  Specifying the channel
+
+  There are a number of different types of channels to attach a UML
+  device to, each with a different way of specifying exactly what to
+  attach to.
+
+  o  pseudo-terminals - device=pty pts terminals - device=pts
+
+
+     This will cause UML to allocate a free host pseudo-terminal for the
+     device.  The terminal that it got will be announced in the boot
+     log.  You access it by attaching a terminal program to the
+     corresponding tty:
+
+  o  screen /dev/pts/n
+
+  o  screen /dev/ttyxx
+
+  o  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
+     devices
+
+  o  kermit - start it up, 'open' the device, then 'connect'
+
+
+
+
+
+  o  terminals - device=tty:tty device file
+
+
+     This will make UML attach the device to the specified tty (i.e
+
+
+        con1=tty:/dev/tty3
+
+
+
+
+  will attach UML's console 1 to the host's /dev/tty3).  If the tty that
+  you specify is the slave end of a tty/pty pair, something else must
+  have already opened the corresponding pty in order for this to work.
+
+
+
+
+
+  o  xterms - device=xterm
+
+
+     UML will run an xterm and the device will be attached to it.
+
+
+
+
+
+  o  Port - device=port:port number
+
+
+     This will attach the UML devices to the specified host port.
+     Attaching console 1 to the host's port 9000 would be done like
+     this:
+
+
+        con1=port:9000
+
+
+
+
+  Attaching all the serial lines to that port would be done similarly:
+
+
+        ssl=port:9000
+
+
+
+
+  You access these devices by telnetting to that port.  Each active tel-
+  net session gets a different device.  If there are more telnets to a
+  port than UML devices attached to it, then the extra telnet sessions
+  will block until an existing telnet detaches, or until another device
+  becomes active (i.e. by being activated in /etc/inittab).
+
+  This channel has the advantage that you can both attach multiple UML
+  devices to it and know how to access them without reading the UML boot
+  log.  It is also unique in allowing access to a UML from remote
+  machines without requiring that the UML be networked.  This could be
+  useful in allowing public access to UMLs because they would be
+  accessible from the net, but wouldn't need any kind of network
+  filtering or access control because they would have no network access.
+
+
+  If you attach the main console to a portal, then the UML boot will
+  appear to hang.  In reality, it's waiting for a telnet to connect, at
+  which point the boot will proceed.
+
+
+
+
+
+  o  already-existing file descriptors - device=file descriptor
+
+
+     If you set up a file descriptor on the UML command line, you can
+     attach a UML device to it.  This is most commonly used to put the
+     main console back on stdin and stdout after assigning all the other
+     consoles to something else:
+
+
+        con0=fd:0,fd:1 con=pts
+
+
+
+
+
+
+
+
+  o  Nothing - device=null
+
+
+     This allows the device to be opened, in contrast to 'none', but
+     reads will block, and writes will succeed and the data will be
+     thrown out.
+
+
+
+
+
+  o  None - device=none
+
+
+     This causes the device to disappear.
+
+
+
+  You can also specify different input and output channels for a device
+  by putting a comma between them:
+
+
+        ssl3=tty:/dev/tty2,xterm
+
+
+
+
+  will cause serial line 3 to accept input on the host's /dev/tty2 and
+  display output on an xterm.  That's a silly example - the most common
+  use of this syntax is to reattach the main console to stdin and stdout
+  as shown above.
+
+
+  If you decide to move the main console away from stdin/stdout, the
+  initial boot output will appear in the terminal that you're running
+  UML in.  However, once the console driver has been officially
+  initialized, then the boot output will start appearing wherever you
+  specified that console 0 should be.  That device will receive all
+  subsequent output.
+
+
+
+  5.3.  Examples
+
+  There are a number of interesting things you can do with this
+  capability.
+
+
+  First, this is how you get rid of those bleeding console xterms by
+  attaching them to host ptys:
+
+
+        con=pty con0=fd:0,fd:1
+
+
+
+
+  This will make a UML console take over an unused host virtual console,
+  so that when you switch to it, you will see the UML login prompt
+  rather than the host login prompt:
+
+
+        con1=tty:/dev/tty6
+
+
+
+
+  You can attach two virtual machines together with what amounts to a
+  serial line as follows:
+
+  Run one UML with a serial line attached to a pty -
+
+
+        ssl1=pty
+
+
+
+
+  Look at the boot log to see what pty it got (this example will assume
+  that it got /dev/ptyp1).
+
+  Boot the other UML with a serial line attached to the corresponding
+  tty -
+
+
+        ssl1=tty:/dev/ttyp1
+
+
+
+
+  Log in, make sure that it has no getty on that serial line, attach a
+  terminal program like minicom to it, and you should see the login
+  prompt of the other virtual machine.
+
+
+  6.  Setting up the network
+
+
+
+  This page describes how to set up the various transports and to
+  provide a UML instance with network access to the host, other machines
+  on the local net, and the rest of the net.
+
+
+  As of 2.4.5, UML networking has been completely redone to make it much
+  easier to set up, fix bugs, and add new features.
+
+
+  There is a new helper, uml_net, which does the host setup that
+  requires root privileges.
+
+
+  There are currently five transport types available for a UML virtual
+  machine to exchange packets with other hosts:
+
+  o  ethertap
+
+  o  TUN/TAP
+
+  o  Multicast
+
+  o  a switch daemon
+
+  o  slip
+
+  o  slirp
+
+  o  pcap
+
+     The TUN/TAP, ethertap, slip, and slirp transports allow a UML
+     instance to exchange packets with the host.  They may be directed
+     to the host or the host may just act as a router to provide access
+     to other physical or virtual machines.
+
+
+  The pcap transport is a synthetic read-only interface, using the
+  libpcap binary to collect packets from interfaces on the host and
+  filter them.  This is useful for building preconfigured traffic
+  monitors or sniffers.
+
+
+  The daemon and multicast transports provide a completely virtual
+  network to other virtual machines.  This network is completely
+  disconnected from the physical network unless one of the virtual
+  machines on it is acting as a gateway.
+
+
+  With so many host transports, which one should you use?  Here's when
+  you should use each one:
+
+  o  ethertap - if you want access to the host networking and it is
+     running 2.2
+
+  o  TUN/TAP - if you want access to the host networking and it is
+     running 2.4.  Also, the TUN/TAP transport is able to use a
+     preconfigured device, allowing it to avoid using the setuid uml_net
+     helper, which is a security advantage.
+
+  o  Multicast - if you want a purely virtual network and you don't want
+     to set up anything but the UML
+
+  o  a switch daemon - if you want a purely virtual network and you
+     don't mind running the daemon in order to get somewhat better
+     performance
+
+  o  slip - there is no particular reason to run the slip backend unless
+     ethertap and TUN/TAP are just not available for some reason
+
+  o  slirp - if you don't have root access on the host to setup
+     networking, or if you don't want to allocate an IP to your UML
+
+  o  pcap - not much use for actual network connectivity, but great for
+     monitoring traffic on the host
+
+     Ethertap is available on 2.4 and works fine.  TUN/TAP is preferred
+     to it because it has better performance and ethertap is officially
+     considered obsolete in 2.4.  Also, the root helper only needs to
+     run occasionally for TUN/TAP, rather than handling every packet, as
+     it does with ethertap.  This is a slight security advantage since
+     it provides fewer opportunities for a nasty UML user to somehow
+     exploit the helper's root privileges.
+
+
+  6.1.  General setup
+
+  First, you must have the virtual network enabled in your UML.  If are
+  running a prebuilt kernel from this site, everything is already
+  enabled.  If you build the kernel yourself, under the "Network device
+  support" menu, enable "Network device support", and then the three
+  transports.
+
+
+  The next step is to provide a network device to the virtual machine.
+  This is done by describing it on the kernel command line.
+
+  The general format is
+
+
+       eth <n> = <transport> , <transport args>
+
+
+
+
+  For example, a virtual ethernet device may be attached to a host
+  ethertap device as follows:
+
+
+       eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
+
+
+
+
+  This sets up eth0 inside the virtual machine to attach itself to the
+  host /dev/tap0, assigns it an ethernet address, and assigns the host
+  tap0 interface an IP address.
+
+
+
+  Note that the IP address you assign to the host end of the tap device
+  must be different than the IP you assign to the eth device inside UML.
+  If you are short on IPs and don't want to consume two per UML, then
+  you can reuse the host's eth IP address for the host ends of the tap
+  devices.  Internally, the UMLs must still get unique IPs for their eth
+  devices.  You can also give the UMLs non-routable IPs (192.168.x.x or
+  10.x.x.x) and have the host masquerade them.  This will let outgoing
+  connections work, but incoming connections won't without more work,
+  such as port forwarding from the host.
+  Also note that when you configure the host side of an interface, it is
+  only acting as a gateway.  It will respond to pings sent to it
+  locally, but is not useful to do that since it's a host interface.
+  You are not talking to the UML when you ping that interface and get a
+  response.
+
+
+  You can also add devices to a UML and remove them at runtime.  See the
+  ``The Management Console''  page for details.
+
+
+  The sections below describe this in more detail.
+
+
+  Once you've decided how you're going to set up the devices, you boot
+  UML, log in, configure the UML side of the devices, and set up routes
+  to the outside world.  At that point, you will be able to talk to any
+  other machines, physical or virtual, on the net.
+
+
+  If ifconfig inside UML fails and the network refuses to come up, run
+  tell you what went wrong.
+
+
+
+  6.2.  Userspace daemons
+
+  You will likely need the setuid helper, or the switch daemon, or both.
+  They are both installed with the RPM and deb, so if you've installed
+  either, you can skip the rest of this section.
+
+
+  If not, then you need to check them out of CVS, build them, and
+  install them.  The helper is uml_net, in CVS /tools/uml_net, and the
+  daemon is uml_switch, in CVS /tools/uml_router.  They are both built
+  with a plain 'make'.  Both need to be installed in a directory that's
+  in your path - /usr/bin is recommend.  On top of that, uml_net needs
+  to be setuid root.
+
+
+
+  6.3.  Specifying ethernet addresses
+
+  Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
+  allow you to specify hardware addresses for the virtual ethernet
+  devices.  This is generally not necessary.  If you don't have a
+  specific reason to do it, you probably shouldn't.  If one is not
+  specified on the command line, the driver will assign one based on the
+  device IP address.  It will provide the address fe:fd:nn:nn:nn:nn
+  where nn.nn.nn.nn is the device IP address.  This is nearly always
+  sufficient to guarantee a unique hardware address for the device.  A
+  couple of exceptions are:
+
+  o  Another set of virtual ethernet devices are on the same network and
+     they are assigned hardware addresses using a different scheme which
+     may conflict with the UML IP address-based scheme
+
+  o  You aren't going to use the device for IP networking, so you don't
+     assign the device an IP address
+
+     If you let the driver provide the hardware address, you should make
+     sure that the device IP address is known before the interface is
+     brought up.  So, inside UML, this will guarantee that:
+
+
+
+  UML#
+  ifconfig eth0 192.168.0.250 up
+
+
+
+
+  If you decide to assign the hardware address yourself, make sure that
+  the first byte of the address is even.  Addresses with an odd first
+  byte are broadcast addresses, which you don't want assigned to a
+  device.
+
+
+
+  6.4.  UML interface setup
+
+  Once the network devices have been described on the command line, you
+  should boot UML and log in.
+
+
+  The first thing to do is bring the interface up:
+
+
+       UML# ifconfig ethn ip-address up
+
+
+
+
+  You should be able to ping the host at this point.
+
+
+  To reach the rest of the world, you should set a default route to the
+  host:
+
+
+       UML# route add default gw host ip
+
+
+
+
+  Again, with host ip of 192.168.0.4:
+
+
+       UML# route add default gw 192.168.0.4
+
+
+
+
+  This page used to recommend setting a network route to your local net.
+  This is wrong, because it will cause UML to try to figure out hardware
+  addresses of the local machines by arping on the interface to the
+  host.  Since that interface is basically a single strand of ethernet
+  with two nodes on it (UML and the host) and arp requests don't cross
+  networks, they will fail to elicit any responses.  So, what you want
+  is for UML to just blindly throw all packets at the host and let it
+  figure out what to do with them, which is what leaving out the network
+  route and adding the default route does.
+
+
+  Note: If you can't communicate with other hosts on your physical
+  ethernet, it's probably because of a network route that's
+  automatically set up.  If you run 'route -n' and see a route that
+  looks like this:
+
+
+
+
+  Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
+  192.168.0.0     0.0.0.0         255.255.255.0   U     0      0      0   eth0
+
+
+
+
+  with a mask that's not 255.255.255.255, then replace it with a route
+  to your host:
+
+
+       UML#
+       route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0
+
+
+
+
+
+
+       UML#
+       route add -host 192.168.0.4 dev eth0
+
+
+
+
+  This, plus the default route to the host, will allow UML to exchange
+  packets with any machine on your ethernet.
+
+
+
+  6.5.  Multicast
+
+  The simplest way to set up a virtual network between multiple UMLs is
+  to use the mcast transport.  This was written by Harald Welte and is
+  present in UML version 2.4.5-5um and later.  Your system must have
+  multicast enabled in the kernel and there must be a multicast-capable
+  network device on the host.  Normally, this is eth0, but if there is
+  no ethernet card on the host, then you will likely get strange error
+  messages when you bring the device up inside UML.
+
+
+  To use it, run two UMLs with
+
+
+        eth0=mcast
+
+
+
+
+  on their command lines.  Log in, configure the ethernet device in each
+  machine with different IP addresses:
+
+
+       UML1# ifconfig eth0 192.168.0.254
+
+
+
+
+
+
+       UML2# ifconfig eth0 192.168.0.253
+
+
+
+
+  and they should be able to talk to each other.
+
+  The full set of command line options for this transport are
+
+
+
+       ethn=mcast,ethernet address,multicast
+       address,multicast port,ttl
+
+
+
+
+  Harald's original README is here <http://user-mode-linux.source-
+  forge.net/>  and explains these in detail, as well as
+  some other issues.
+
+  There is also a related point-to-point only "ucast" transport.
+  This is useful when your network does not support multicast, and
+  all network connections are simple point to point links.
+
+  The full set of command line options for this transport are
+
+
+       ethn=ucast,ethernet address,remote address,listen port,remote port
+
+
+
+
+  6.6.  TUN/TAP with the uml_net helper
+
+  TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
+  host.  The TUN/TAP backend has been in UML since 2.4.9-3um.
+
+
+  The easiest way to get up and running is to let the setuid uml_net
+  helper do the host setup for you.  This involves insmod-ing the tun.o
+  module if necessary, configuring the device, and setting up IP
+  forwarding, routing, and proxy arp.  If you are new to UML networking,
+  do this first.  If you're concerned about the security implications of
+  the setuid helper, use it to get up and running, then read the next
+  section to see how to have UML use a preconfigured tap device, which
+  avoids the use of uml_net.
+
+
+  If you specify an IP address for the host side of the device, the
+  uml_net helper will do all necessary setup on the host - the only
+  requirement is that TUN/TAP be available, either built in to the host
+  kernel or as the tun.o module.
+
+  The format of the command line switch to attach a device to a TUN/TAP
+  device is
+
+
+       eth <n> =tuntap,,, <IP address>
+
+
+
+
+  For example, this argument will attach the UML's eth0 to the next
+  available tap device and assign an ethernet address to it based on its
+  IP address
+
+
+       eth0=tuntap,,,192.168.0.254
+
+
+
+
+
+
+  Note that the IP address that must be used for the eth device inside
+  UML is fixed by the routing and proxy arp that is set up on the
+  TUN/TAP device on the host.  You can use a different one, but it won't
+  work because reply packets won't reach the UML.  This is a feature.
+  It prevents a nasty UML user from doing things like setting the UML IP
+  to the same as the network's nameserver or mail server.
+
+
+  There are a couple potential problems with running the TUN/TAP
+  transport on a 2.4 host kernel
+
+  o  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
+     kernel or use the ethertap transport.
+
+  o  With an upgraded kernel, TUN/TAP may fail with
+
+
+       File descriptor in bad state
+
+
+
+
+  This is due to a header mismatch between the upgraded kernel and the
+  kernel that was originally installed on the machine.  The fix is to
+  make sure that /usr/src/linux points to the headers for the running
+  kernel.
+
+  These were pointed out by Tim Robinson <timro at trkr dot net> in
+  <http://www.geocrawler.com/> name="this uml-
+  user post"> .
+
+
+
+  6.7.  TUN/TAP with a preconfigured tap device
+
+  If you prefer not to have UML use uml_net (which is somewhat
+  insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
+  beforehand.  The setup needs to be done as root, but once that's done,
+  there is no need for root assistance.  Setting up the device is done
+  as follows:
+
+  o  Create the device with tunctl (available from the UML utilities
+     tarball)
+
+
+
+
+       host#  tunctl -u uid
+
+
+
+
+  where uid is the user id or username that UML will be run as.  This
+  will tell you what device was created.
+
+  o  Configure the device IP (change IP addresses and device name to
+     suit)
+
+
+
+
+       host#  ifconfig tap0 192.168.0.254 up
+
+
+
+
+
+  o  Set up routing and arping if desired - this is my recipe, there are
+     other ways of doing the same thing
+
+
+       host#
+       bash -c 'echo 1 > /proc/sys/net/ipv4/ip_forward'
+
+       host#
+       route add -host 192.168.0.253 dev tap0
+
+
+
+
+
+
+       host#
+       bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp'
+
+
+
+
+
+
+       host#
+       arp -Ds 192.168.0.253 eth0 pub
+
+
+
+
+  Note that this must be done every time the host boots - this configu-
+  ration is not stored across host reboots.  So, it's probably a good
+  idea to stick it in an rc file.  An even better idea would be a little
+  utility which reads the information from a config file and sets up
+  devices at boot time.
+
+  o  Rather than using up two IPs and ARPing for one of them, you can
+     also provide direct access to your LAN by the UML by using a
+     bridge.
+
+
+       host#
+       brctl addbr br0
+
+
+
+
+
+
+       host#
+       ifconfig eth0 0.0.0.0 promisc up
+
+
+
+
+
+
+       host#
+       ifconfig tap0 0.0.0.0 promisc up
+
+
+
+
+
+
+       host#
+       ifconfig br0 192.168.0.1 netmask 255.255.255.0 up
+
+
+
+
+
+
+
+  host#
+  brctl stp br0 off
+
+
+
+
+
+
+       host#
+       brctl setfd br0 1
+
+
+
+
+
+
+       host#
+       brctl sethello br0 1
+
+
+
+
+
+
+       host#
+       brctl addif br0 eth0
+
+
+
+
+
+
+       host#
+       brctl addif br0 tap0
+
+
+
+
+  Note that 'br0' should be setup using ifconfig with the existing IP
+  address of eth0, as eth0 no longer has its own IP.
+
+  o
+
+
+     Also, the /dev/net/tun device must be writable by the user running
+     UML in order for the UML to use the device that's been configured
+     for it.  The simplest thing to do is
+
+
+       host#  chmod 666 /dev/net/tun
+
+
+
+
+  Making it world-writable looks bad, but it seems not to be
+  exploitable as a security hole.  However, it does allow anyone to cre-
+  ate useless tap devices (useless because they can't configure them),
+  which is a DOS attack.  A somewhat more secure alternative would to be
+  to create a group containing all the users who have preconfigured tap
+  devices and chgrp /dev/net/tun to that group with mode 664 or 660.
+
+
+  o  Once the device is set up, run UML with 'eth0=tuntap,device name'
+     (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
+     mconsole config command).
+
+  o  Bring the eth device up in UML and you're in business.
+
+     If you don't want that tap device any more, you can make it non-
+     persistent with
+
+
+       host#  tunctl -d tap device
+
+
+
+
+  Finally, tunctl has a -b (for brief mode) switch which causes it to
+  output only the name of the tap device it created.  This makes it
+  suitable for capture by a script:
+
+
+       host#  TAP=`tunctl -u 1000 -b`
+
+
+
+
+
+
+  6.8.  Ethertap
+
+  Ethertap is the general mechanism on 2.2 for userspace processes to
+  exchange packets with the kernel.
+
+
+
+  To use this transport, you need to describe the virtual network device
+  on the UML command line.  The general format for this is
+
+
+       eth <n> =ethertap, <device> , <ethernet address> , <tap IP address>
+
+
+
+
+  So, the previous example
+
+
+       eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
+
+
+
+
+  attaches the UML eth0 device to the host /dev/tap0, assigns it the
+  ethernet address fe:fd:0:0:0:1, and assigns the IP address
+  192.168.0.254 to the tap device.
+
+
+
+  The tap device is mandatory, but the others are optional.  If the
+  ethernet address is omitted, one will be assigned to it.
+
+
+  The presence of the tap IP address will cause the helper to run and do
+  whatever host setup is needed to allow the virtual machine to
+  communicate with the outside world.  If you're not sure you know what
+  you're doing, this is the way to go.
+
+
+  If it is absent, then you must configure the tap device and whatever
+  arping and routing you will need on the host.  However, even in this
+  case, the uml_net helper still needs to be in your path and it must be
+  setuid root if you're not running UML as root.  This is because the
+  tap device doesn't support SIGIO, which UML needs in order to use
+  something as a source of input.  So, the helper is used as a
+  convenient asynchronous IO thread.
+
+  If you're using the uml_net helper, you can ignore the following host
+  setup - uml_net will do it for you.  You just need to make sure you
+  have ethertap available, either built in to the host kernel or
+  available as a module.
+
+
+  If you want to set things up yourself, you need to make sure that the
+  appropriate /dev entry exists.  If it doesn't, become root and create
+  it as follows:
+
+
+       mknod /dev/tap <minor>  c 36  <minor>  + 16
+
+
+
+
+  For example, this is how to create /dev/tap0:
+
+
+       mknod /dev/tap0 c 36 0 + 16
+
+
+
+
+  You also need to make sure that the host kernel has ethertap support.
+  If ethertap is enabled as a module, you apparently need to insmod
+  ethertap once for each ethertap device you want to enable.  So,
+
+
+       host#
+       insmod ethertap
+
+
+
+
+  will give you the tap0 interface.  To get the tap1 interface, you need
+  to run
+
+
+       host#
+       insmod ethertap unit=1 -o ethertap1
+
+
+
+
+
+
+
+  6.9.  The switch daemon
+
+  Note: This is the daemon formerly known as uml_router, but which was
+  renamed so the network weenies of the world would stop growling at me.
+
+
+  The switch daemon, uml_switch, provides a mechanism for creating a
+  totally virtual network.  By default, it provides no connection to the
+  host network (but see -tap, below).
+
+
+  The first thing you need to do is run the daemon.  Running it with no
+  arguments will make it listen on a default pair of unix domain
+  sockets.
+
+
+  If you want it to listen on a different pair of sockets, use
+
+
+        -unix control socket data socket
+
+
+
+
+
+  If you want it to act as a hub rather than a switch, use
+
+
+        -hub
+
+
+
+
+
+  If you want the switch to be connected to host networking (allowing
+  the umls to get access to the outside world through the host), use
+
+
+        -tap tap0
+
+
+
+
+
+  Note that the tap device must be preconfigured (see "TUN/TAP with a
+  preconfigured tap device", above).  If you're using a different tap
+  device than tap0, specify that instead of tap0.
+
+
+  uml_switch can be backgrounded as follows
+
+
+       host%
+       uml_switch [ options ] < /dev/null > /dev/null
+
+
+
+
+  The reason it doesn't background by default is that it listens to
+  stdin for EOF.  When it sees that, it exits.
+
+
+  The general format of the kernel command line switch is
+
+
+
+       ethn=daemon,ethernet address,socket
+       type,control socket,data socket
+
+
+
+
+  You can leave off everything except the 'daemon'.  You only need to
+  specify the ethernet address if the one that will be assigned to it
+  isn't acceptable for some reason.  The rest of the arguments describe
+  how to communicate with the daemon.  You should only specify them if
+  you told the daemon to use different sockets than the default.  So, if
+  you ran the daemon with no arguments, running the UML on the same
+  machine with
+       eth0=daemon
+
+
+
+
+  will cause the eth0 driver to attach itself to the daemon correctly.
+
+
+
+  6.10.  Slip
+
+  Slip is another, less general, mechanism for a process to communicate
+  with the host networking.  In contrast to the ethertap interface,
+  which exchanges ethernet frames with the host and can be used to
+  transport any higher-level protocol, it can only be used to transport
+  IP.
+
+
+  The general format of the command line switch is
+
+
+
+       ethn=slip,slip IP
+
+
+
+
+  The slip IP argument is the IP address that will be assigned to the
+  host end of the slip device.  If it is specified, the helper will run
+  and will set up the host so that the virtual machine can reach it and
+  the rest of the network.
+
+
+  There are some oddities with this interface that you should be aware
+  of.  You should only specify one slip device on a given virtual
+  machine, and its name inside UML will be 'umn', not 'eth0' or whatever
+  you specified on the command line.  These problems will be fixed at
+  some point.
+
+
+
+  6.11.  Slirp
+
+  slirp uses an external program, usually /usr/bin/slirp, to provide IP
+  only networking connectivity through the host. This is similar to IP
+  masquerading with a firewall, although the translation is performed in
+  user-space, rather than by the kernel.  As slirp does not set up any
+  interfaces on the host, or changes routing, slirp does not require
+  root access or setuid binaries on the host.
+
+
+  The general format of the command line switch for slirp is:
+
+
+
+       ethn=slirp,ethernet address,slirp path
+
+
+
+
+  The ethernet address is optional, as UML will set up the interface
+  with an ethernet address based upon the initial IP address of the
+  interface.  The slirp path is generally /usr/bin/slirp, although it
+  will depend on distribution.
+
+
+  The slirp program can have a number of options passed to the command
+  line and we can't add them to the UML command line, as they will be
+  parsed incorrectly.  Instead, a wrapper shell script can be written or
+  the options inserted into the  /.slirprc file.  More information on
+  all of the slirp options can be found in its man pages.
+
+
+  The eth0 interface on UML should be set up with the IP 10.2.0.15,
+  although you can use anything as long as it is not used by a network
+  you will be connecting to. The default route on UML should be set to
+  use
+
+
+       UML#
+       route add default dev eth0
+
+
+
+
+  slirp provides a number of useful IP addresses which can be used by
+  UML, such as 10.0.2.3 which is an alias for the DNS server specified
+  in /etc/resolv.conf on the host or the IP given in the 'dns' option
+  for slirp.
+
+
+  Even with a baudrate setting higher than 115200, the slirp connection
+  is limited to 115200. If you need it to go faster, the slirp binary
+  needs to be compiled with FULL_BOLT defined in config.h.
+
+
+
+  6.12.  pcap
+
+  The pcap transport is attached to a UML ethernet device on the command
+  line or with uml_mconsole with the following syntax:
+
+
+
+       ethn=pcap,host interface,filter
+       expression,option1,option2
+
+
+
+
+  The expression and options are optional.
+
+
+  The interface is whatever network device on the host you want to
+  sniff.  The expression is a pcap filter expression, which is also what
+  tcpdump uses, so if you know how to specify tcpdump filters, you will
+  use the same expressions here.  The options are up to two of
+  'promisc', control whether pcap puts the host interface into
+  promiscuous mode. 'optimize' and 'nooptimize' control whether the pcap
+  expression optimizer is used.
+
+
+  Example:
+
+
+
+       eth0=pcap,eth0,tcp
+
+       eth1=pcap,eth0,!tcp
+
+
+
+  will cause the UML eth0 to emit all tcp packets on the host eth0 and
+  the UML eth1 to emit all non-tcp packets on the host eth0.
+
+
+
+  6.13.  Setting up the host yourself
+
+  If you don't specify an address for the host side of the ethertap or
+  slip device, UML won't do any setup on the host.  So this is what is
+  needed to get things working (the examples use a host-side IP of
+  192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
+  own network):
+
+  o  The device needs to be configured with its IP address.  Tap devices
+     are also configured with an mtu of 1484.  Slip devices are
+     configured with a point-to-point address pointing at the UML ip
+     address.
+
+
+       host#  ifconfig tap0 arp mtu 1484 192.168.0.251 up
+
+
+
+
+
+
+       host#
+       ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up
+
+
+
+
+
+  o  If a tap device is being set up, a route is set to the UML IP.
+
+
+       UML# route add -host 192.168.0.250 gw 192.168.0.251
+
+
+
+
+
+  o  To allow other hosts on your network to see the virtual machine,
+     proxy arp is set up for it.
+
+
+       host#  arp -Ds 192.168.0.250 eth0 pub
+
+
+
+
+
+  o  Finally, the host is set up to route packets.
+
+
+       host#  echo 1 > /proc/sys/net/ipv4/ip_forward
+
+
+
+
+
+
+
+
+
+
+  7.  Sharing Filesystems between Virtual Machines
+
+
+
+
+  7.1.  A warning
+
+  Don't attempt to share filesystems simply by booting two UMLs from the
+  same file.  That's the same thing as booting two physical machines
+  from a shared disk.  It will result in filesystem corruption.
+
+
+
+  7.2.  Using layered block devices
+
+  The way to share a filesystem between two virtual machines is to use
+  the copy-on-write (COW) layering capability of the ubd block driver.
+  As of 2.4.6-2um, the driver supports layering a read-write private
+  device over a read-only shared device.  A machine's writes are stored
+  in the private device, while reads come from either device - the
+  private one if the requested block is valid in it, the shared one if
+  not.  Using this scheme, the majority of data which is unchanged is
+  shared between an arbitrary number of virtual machines, each of which
+  has a much smaller file containing the changes that it has made.  With
+  a large number of UMLs booting from a large root filesystem, this
+  leads to a huge disk space saving.  It will also help performance,
+  since the host will be able to cache the shared data using a much
+  smaller amount of memory, so UML disk requests will be served from the
+  host's memory rather than its disks.
+
+
+
+
+  To add a copy-on-write layer to an existing block device file, simply
+  add the name of the COW file to the appropriate ubd switch:
+
+
+        ubd0=root_fs_cow,root_fs_debian_22
+
+
+
+
+  where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is
+  the existing shared filesystem.  The COW file need not exist.  If it
+  doesn't, the driver will create and initialize it.  Once the COW file
+  has been initialized, it can be used on its own on the command line:
+
+
+        ubd0=root_fs_cow
+
+
+
+
+  The name of the backing file is stored in the COW file header, so it
+  would be redundant to continue specifying it on the command line.
+
+
+
+  7.3.  Note!
+
+  When checking the size of the COW file in order to see the gobs of
+  space that you're saving, make sure you use 'ls -ls' to see the actual
+  disk consumption rather than the length of the file.  The COW file is
+  sparse, so the length will be very different from the disk usage.
+  Here is a 'ls -l' of a COW file and backing file from one boot and
+  shutdown:
+       host% ls -l cow.debian debian2.2
+       -rw-r--r--    1 jdike    jdike    492504064 Aug  6 21:16 cow.debian
+       -rwxrw-rw-    1 jdike    jdike    537919488 Aug  6 20:42 debian2.2
+
+
+
+
+  Doesn't look like much saved space, does it?  Well, here's 'ls -ls':
+
+
+       host% ls -ls cow.debian debian2.2
+          880 -rw-r--r--    1 jdike    jdike    492504064 Aug  6 21:16 cow.debian
+       525832 -rwxrw-rw-    1 jdike    jdike    537919488 Aug  6 20:42 debian2.2
+
+
+
+
+  Now, you can see that the COW file has less than a meg of disk, rather
+  than 492 meg.
+
+
+
+  7.4.  Another warning
+
+  Once a filesystem is being used as a readonly backing file for a COW
+  file, do not boot directly from it or modify it in any way.  Doing so
+  will invalidate any COW files that are using it.  The mtime and size
+  of the backing file are stored in the COW file header at its creation,
+  and they must continue to match.  If they don't, the driver will
+  refuse to use the COW file.
+
+
+
+
+  If you attempt to evade this restriction by changing either the
+  backing file or the COW header by hand, you will get a corrupted
+  filesystem.
+
+
+
+
+  Among other things, this means that upgrading the distribution in a
+  backing file and expecting that all of the COW files using it will see
+  the upgrade will not work.
+
+
+
+
+  7.5.  uml_moo : Merging a COW file with its backing file
+
+  Depending on how you use UML and COW devices, it may be advisable to
+  merge the changes in the COW file into the backing file every once in
+  a while.
+
+
+
+
+  The utility that does this is uml_moo.  Its usage is
+
+
+       host% uml_moo COW file new backing file
+
+
+
+
+  There's no need to specify the backing file since that information is
+  already in the COW file header.  If you're paranoid, boot the new
+  merged file, and if you're happy with it, move it over the old backing
+  file.
+
+
+
+
+  uml_moo creates a new backing file by default as a safety measure.  It
+  also has a destructive merge option which will merge the COW file
+  directly into its current backing file.  This is really only usable
+  when the backing file only has one COW file associated with it.  If
+  there are multiple COWs associated with a backing file, a -d merge of
+  one of them will invalidate all of the others.  However, it is
+  convenient if you're short of disk space, and it should also be
+  noticeably faster than a non-destructive merge.
+
+
+
+
+  uml_moo is installed with the UML deb and RPM.  If you didn't install
+  UML from one of those packages, you can also get it from the UML
+  utilities <http://user-mode-linux.sourceforge.net/
+  utilities>  tar file in tools/moo.
+
+
+
+
+
+
+
+
+  8.  Creating filesystems
+
+
+  You may want to create and mount new UML filesystems, either because
+  your root filesystem isn't large enough or because you want to use a
+  filesystem other than ext2.
+
+
+  This was written on the occasion of reiserfs being included in the
+  2.4.1 kernel pool, and therefore the 2.4.1 UML, so the examples will
+  talk about reiserfs.  This information is generic, and the examples
+  should be easy to translate to the filesystem of your choice.
+
+
+  8.1.  Create the filesystem file
+
+  dd is your friend.  All you need to do is tell dd to create an empty
+  file of the appropriate size.  I usually make it sparse to save time
+  and to avoid allocating disk space until it's actually used.  For
+  example, the following command will create a sparse 100 meg file full
+  of zeroes.
+
+
+       host%
+       dd if=/dev/zero of=new_filesystem seek=100 count=1 bs=1M
+
+
+
+
+
+
+  8.2.  Assign the file to a UML device
+
+  Add an argument like the following to the UML command line:
+
+  ubd4=new_filesystem
+
+
+
+
+  making sure that you use an unassigned ubd device number.
+
+
+
+  8.3.  Creating and mounting the filesystem
+
+  Make sure that the filesystem is available, either by being built into
+  the kernel, or available as a module, then boot up UML and log in.  If
+  the root filesystem doesn't have the filesystem utilities (mkfs, fsck,
+  etc), then get them into UML by way of the net or hostfs.
+
+
+  Make the new filesystem on the device assigned to the new file:
+
+
+       host#  mkreiserfs /dev/ubd/4
+
+
+       <----------- MKREISERFSv2 ----------->
+
+       ReiserFS version 3.6.25
+       Block size 4096 bytes
+       Block count 25856
+       Used blocks 8212
+               Journal - 8192 blocks (18-8209), journal header is in block 8210
+               Bitmaps: 17
+               Root block 8211
+       Hash function "r5"
+       ATTENTION: ALL DATA WILL BE LOST ON '/dev/ubd/4'! (y/n)y
+       journal size 8192 (from 18)
+       Initializing journal - 0%....20%....40%....60%....80%....100%
+       Syncing..done.
+
+
+
+
+  Now, mount it:
+
+
+       UML#
+       mount /dev/ubd/4 /mnt
+
+
+
+
+  and you're in business.
+
+
+
+
+
+
+
+
+
+  9.  Host file access
+
+
+  If you want to access files on the host machine from inside UML, you
+  can treat it as a separate machine and either nfs mount directories
+  from the host or copy files into the virtual machine with scp or rcp.
+  However, since UML is running on the host, it can access those
+  files just like any other process and make them available inside the
+  virtual machine without needing to use the network.
+
+
+  This is now possible with the hostfs virtual filesystem.  With it, you
+  can mount a host directory into the UML filesystem and access the
+  files contained in it just as you would on the host.
+
+
+  9.1.  Using hostfs
+
+  To begin with, make sure that hostfs is available inside the virtual
+  machine with
+
+
+       UML# cat /proc/filesystems
+
+
+
+  .  hostfs should be listed.  If it's not, either rebuild the kernel
+  with hostfs configured into it or make sure that hostfs is built as a
+  module and available inside the virtual machine, and insmod it.
+
+
+  Now all you need to do is run mount:
+
+
+       UML# mount none /mnt/host -t hostfs
+
+
+
+
+  will mount the host's / on the virtual machine's /mnt/host.
+
+
+  If you don't want to mount the host root directory, then you can
+  specify a subdirectory to mount with the -o switch to mount:
+
+
+       UML# mount none /mnt/home -t hostfs -o /home
+
+
+
+
+  will mount the hosts's /home on the virtual machine's /mnt/home.
+
+
+
+  9.2.  hostfs as the root filesystem
+
+  It's possible to boot from a directory hierarchy on the host using
+  hostfs rather than using the standard filesystem in a file.
+
+  To start, you need that hierarchy.  The easiest way is to loop mount
+  an existing root_fs file:
+
+
+       host#  mount root_fs uml_root_dir -o loop
+
+
+
+
+  You need to change the filesystem type of / in etc/fstab to be
+  'hostfs', so that line looks like this:
+
+  /dev/ubd/0       /        hostfs      defaults          1   1
+
+
+
+
+  Then you need to chown to yourself all the files in that directory
+  that are owned by root.  This worked for me:
+
+
+       host#  find . -uid 0 -exec chown jdike {} \;
+
+
+
+
+  Next, make sure that your UML kernel has hostfs compiled in, not as a
+  module.  Then run UML with the boot device pointing at that directory:
+
+
+        ubd0=/path/to/uml/root/directory
+
+
+
+
+  UML should then boot as it does normally.
+
+
+  9.3.  Building hostfs
+
+  If you need to build hostfs because it's not in your kernel, you have
+  two choices:
+
+
+
+  o  Compiling hostfs into the kernel:
+
+
+     Reconfigure the kernel and set the 'Host filesystem' option under
+
+
+  o  Compiling hostfs as a module:
+
+
+     Reconfigure the kernel and set the 'Host filesystem' option under
+     be in arch/um/fs/hostfs/hostfs.o.  Install that in
+     /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and
+
+
+       UML# insmod hostfs
+
+
+
+
+
+
+
+
+
+
+
+
+  10.  The Management Console
+
+
+
+  The UML management console is a low-level interface to the kernel,
+  somewhat like the i386 SysRq interface.  Since there is a full-blown
+  operating system under UML, there is much greater flexibility possible
+  than with the SysRq mechanism.
+
+
+  There are a number of things you can do with the mconsole interface:
+
+  o  get the kernel version
+
+  o  add and remove devices
+
+  o  halt or reboot the machine
+
+  o  Send SysRq commands
+
+  o  Pause and resume the UML
+
+
+  You need the mconsole client (uml_mconsole) which is present in CVS
+  (/tools/mconsole) in 2.4.5-9um and later, and will be in the RPM in
+  2.4.6.
+
+
+  You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML.
+  When you boot UML, you'll see a line like:
+
+
+       mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole
+
+
+
+
+  If you specify a unique machine id one the UML command line, i.e.
+
+
+        umid=debian
+
+
+
+
+  you'll see this
+
+
+       mconsole initialized on /home/jdike/.uml/debian/mconsole
+
+
+
+
+  That file is the socket that uml_mconsole will use to communicate with
+  UML.  Run it with either the umid or the full path as its argument:
+
+
+       host% uml_mconsole debian
+
+
+
+
+  or
+
+
+       host% uml_mconsole /home/jdike/.uml/debian/mconsole
+
+
+
+
+  You'll get a prompt, at which you can run one of these commands:
+
+  o  version
+
+  o  halt
+
+  o  reboot
+
+  o  config
+
+  o  remove
+
+  o  sysrq
+
+  o  help
+
+  o  cad
+
+  o  stop
+
+  o  go
+
+
+  10.1.  version
+
+  This takes no arguments.  It prints the UML version.
+
+
+       (mconsole)  version
+       OK Linux usermode 2.4.5-9um #1 Wed Jun 20 22:47:08 EDT 2001 i686
+
+
+
+
+  There are a couple actual uses for this.  It's a simple no-op which
+  can be used to check that a UML is running.  It's also a way of
+  sending an interrupt to the UML.  This is sometimes useful on SMP
+  hosts, where there's a bug which causes signals to UML to be lost,
+  often causing it to appear to hang.  Sending such a UML the mconsole
+  version command is a good way to 'wake it up' before networking has
+  been enabled, as it does not do anything to the function of the UML.
+
+
+
+  10.2.  halt and reboot
+
+  These take no arguments.  They shut the machine down immediately, with
+  no syncing of disks and no clean shutdown of userspace.  So, they are
+  pretty close to crashing the machine.
+
+
+       (mconsole)  halt
+       OK
+
+
+
+
+
+
+  10.3.  config
+
+  "config" adds a new device to the virtual machine.  Currently the ubd
+  and network drivers support this.  It takes one argument, which is the
+  device to add, with the same syntax as the kernel command line.
+
+
+
+
+  (mconsole)
+  config ubd3=/home/jdike/incoming/roots/root_fs_debian22
+
+  OK
+  (mconsole)  config eth1=mcast
+  OK
+
+
+
+
+
+
+  10.4.  remove
+
+  "remove" deletes a device from the system.  Its argument is just the
+  name of the device to be removed. The device must be idle in whatever
+  sense the driver considers necessary.  In the case of the ubd driver,
+  the removed block device must not be mounted, swapped on, or otherwise
+  open, and in the case of the network driver, the device must be down.
+
+
+       (mconsole)  remove ubd3
+       OK
+       (mconsole)  remove eth1
+       OK
+
+
+
+
+
+
+  10.5.  sysrq
+
+  This takes one argument, which is a single letter.  It calls the
+  generic kernel's SysRq driver, which does whatever is called for by
+  that argument.  See the SysRq documentation in
+  Documentation/admin-guide/sysrq.rst in your favorite kernel tree to
+  see what letters are valid and what they do.
+
+
+
+  10.6.  help
+
+  "help" returns a string listing the valid commands and what each one
+  does.
+
+
+
+  10.7.  cad
+
+  This invokes the Ctl-Alt-Del action on init.  What exactly this ends
+  up doing is up to /etc/inittab.  Normally, it reboots the machine.
+  With UML, this is usually not desired, so if a halt would be better,
+  then find the section of inittab that looks like this
+
+
+       # What to do when CTRL-ALT-DEL is pressed.
+       ca:12345:ctrlaltdel:/sbin/shutdown -t1 -a -r now
+
+
+
+
+  and change the command to halt.
+
+
+
+  10.8.  stop
+
+  This puts the UML in a loop reading mconsole requests until a 'go'
+  mconsole command is received. This is very useful for making backups
+  of UML filesystems, as the UML can be stopped, then synced via 'sysrq
+  s', so that everything is written to the filesystem. You can then copy
+  the filesystem and then send the UML 'go' via mconsole.
+
+
+  Note that a UML running with more than one CPU will have problems
+  after you send the 'stop' command, as only one CPU will be held in a
+  mconsole loop and all others will continue as normal.  This is a bug,
+  and will be fixed.
+
+
+
+  10.9.  go
+
+  This resumes a UML after being paused by a 'stop' command. Note that
+  when the UML has resumed, TCP connections may have timed out and if
+  the UML is paused for a long period of time, crond might go a little
+  crazy, running all the jobs it didn't do earlier.
+
+
+
+
+
+
+
+
+  11.  Kernel debugging
+
+
+  Note: The interface that makes debugging, as described here, possible
+  is present in 2.4.0-test6 kernels and later.
+
+
+  Since the user-mode kernel runs as a normal Linux process, it is
+  possible to debug it with gdb almost like any other process.  It is
+  slightly different because the kernel's threads are already being
+  ptraced for system call interception, so gdb can't ptrace them.
+  However, a mechanism has been added to work around that problem.
+
+
+  In order to debug the kernel, you need build it from source.  See
+  ``Compiling the kernel and modules''  for information on doing that.
+  Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during
+  the config.  These will compile the kernel with -g, and enable the
+  ptrace proxy so that gdb works with UML, respectively.
+
+
+
+
+  11.1.  Starting the kernel under gdb
+
+  You can have the kernel running under the control of gdb from the
+  beginning by putting 'debug' on the command line.  You will get an
+  xterm with gdb running inside it.  The kernel will send some commands
+  to gdb which will leave it stopped at the beginning of start_kernel.
+  At this point, you can get things going with 'next', 'step', or
+  'cont'.
+
+
+  There is a transcript of a debugging session  here <debug-
+  session.html> , with breakpoints being set in the scheduler and in an
+  interrupt handler.
+  11.2.  Examining sleeping processes
+
+  Not every bug is evident in the currently running process.  Sometimes,
+  processes hang in the kernel when they shouldn't because they've
+  deadlocked on a semaphore or something similar.  In this case, when
+  you ^C gdb and get a backtrace, you will see the idle thread, which
+  isn't very relevant.
+
+
+  What you want is the stack of whatever process is sleeping when it
+  shouldn't be.  You need to figure out which process that is, which is
+  generally fairly easy.  Then you need to get its host process id,
+  which you can do either by looking at ps on the host or at
+  task.thread.extern_pid in gdb.
+
+
+  Now what you do is this:
+
+  o  detach from the current thread
+
+
+       (UML gdb)  det
+
+
+
+
+
+  o  attach to the thread you are interested in
+
+
+       (UML gdb)  att <host pid>
+
+
+
+
+
+  o  look at its stack and anything else of interest
+
+
+       (UML gdb)  bt
+
+
+
+
+  Note that you can't do anything at this point that requires that a
+  process execute, e.g. calling a function
+
+  o  when you're done looking at that process, reattach to the current
+     thread and continue it
+
+
+       (UML gdb)
+       att 1
+
+
+
+
+
+
+       (UML gdb)
+       c
+
+
+
+
+  Here, specifying any pid which is not the process id of a UML thread
+  will cause gdb to reattach to the current thread.  I commonly use 1,
+  but any other invalid pid would work.
+
+
+
+  11.3.  Running ddd on UML
+
+  ddd works on UML, but requires a special kludge.  The process goes
+  like this:
+
+  o  Start ddd
+
+
+       host% ddd linux
+
+
+
+
+
+  o  With ps, get the pid of the gdb that ddd started.  You can ask the
+     gdb to tell you, but for some reason that confuses things and
+     causes a hang.
+
+  o  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
+     - it will just sit there after you hit return
+
+  o  type 'att 1' to the ddd gdb and you will see something like
+
+
+       0xa013dc51 in __kill ()
+
+
+       (gdb)
+
+
+
+
+
+  o  At this point, type 'c', UML will boot up, and you can use ddd just
+     as you do on any other process.
+
+
+
+  11.4.  Debugging modules
+
+  gdb has support for debugging code which is dynamically loaded into
+  the process.  This support is what is needed to debug kernel modules
+  under UML.
+
+
+  Using that support is somewhat complicated.  You have to tell gdb what
+  object file you just loaded into UML and where in memory it is.  Then,
+  it can read the symbol table, and figure out where all the symbols are
+  from the load address that you provided.  It gets more interesting
+  when you load the module again (i.e. after an rmmod).  You have to
+  tell gdb to forget about all its symbols, including the main UML ones
+  for some reason, then load then all back in again.
+
+
+  There's an easy way and a hard way to do this.  The easy way is to use
+  the umlgdb expect script written by Chandan Kudige.  It basically
+  automates the process for you.
+
+
+  First, you must tell it where your modules are.  There is a list in
+  the script that looks like this:
+       set MODULE_PATHS {
+       "fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o"
+       "isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o"
+       "minix" "/usr/src/uml/linux-2.4.18/fs/minix/minix.o"
+       }
+
+
+
+
+  You change that to list the names and paths of the modules that you
+  are going to debug.  Then you run it from the toplevel directory of
+  your UML pool and it basically tells you what to do:
+
+
+
+
+                   ******** GDB pid is 21903 ********
+       Start UML as: ./linux <kernel switches> debug gdb-pid=21903
+
+
+
+       GNU gdb 5.0rh-5 Red Hat Linux 7.1
+       Copyright 2001 Free Software Foundation, Inc.
+       GDB is free software, covered by the GNU General Public License, and you are
+       welcome to change it and/or distribute copies of it under certain conditions.
+       Type "show copying" to see the conditions.
+       There is absolutely no warranty for GDB.  Type "show warranty" for details.
+       This GDB was configured as "i386-redhat-linux"...
+       (gdb) b sys_init_module
+       Breakpoint 1 at 0xa0011923: file module.c, line 349.
+       (gdb) att 1
+
+
+
+
+  After you run UML and it sits there doing nothing, you hit return at
+  the 'att 1' and continue it:
+
+
+       Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1
+       0xa00f4221 in __kill ()
+       (UML gdb)  c
+       Continuing.
+
+
+
+
+  At this point, you debug normally.  When you insmod something, the
+  expect magic will kick in and you'll see something like:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+   *** Module hostfs loaded ***
+  Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs",
+      mod_user=0x8070e00) at module.c:349
+  349             char *name, *n_name, *name_tmp = NULL;
+  (UML gdb)  finish
+  Run till exit from #0  sys_init_module (name_user=0x805abb0 "hostfs",
+      mod_user=0x8070e00) at module.c:349
+  0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411
+  411             else res = EXECUTE_SYSCALL(syscall, regs);
+  Value returned is $1 = 0
+  (UML gdb)
+  p/x (int)module_list + module_list->size_of_struct
+
+  $2 = 0xa9021054
+  (UML gdb)  symbol-file ./linux
+  Load new symbol table from "./linux"? (y or n) y
+  Reading symbols from ./linux...
+  done.
+  (UML gdb)
+  add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054
+
+  add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at
+          .text_addr = 0xa9021054
+   (y or n) y
+
+  Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o...
+  done.
+  (UML gdb)  p *module_list
+  $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs",
+    size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1,
+    nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0,
+    init = 0xa90221f0 <init_hostfs>, cleanup = 0xa902222c <exit_hostfs>,
+    ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0,
+    persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0,
+    kallsyms_end = 0x0,
+    archdata_start = 0x1b855 <Address 0x1b855 out of bounds>,
+    archdata_end = 0xe5890000 <Address 0xe5890000 out of bounds>,
+    kernel_data = 0xf689c35d <Address 0xf689c35d out of bounds>}
+  >> Finished loading symbols for hostfs ...
+
+
+
+
+  That's the easy way.  It's highly recommended.  The hard way is
+  described below in case you're interested in what's going on.
+
+
+  Boot the kernel under the debugger and load the module with insmod or
+  modprobe.  With gdb, do:
+
+
+       (UML gdb)  p module_list
+
+
+
+
+  This is a list of modules that have been loaded into the kernel, with
+  the most recently loaded module first.  Normally, the module you want
+  is at module_list.  If it's not, walk down the next links, looking at
+  the name fields until find the module you want to debug.  Take the
+  address of that structure, and add module.size_of_struct (which in
+  2.4.10 kernels is 96 (0x60)) to it.  Gdb can make this hard addition
+  for you :-):
+
+
+
+  (UML gdb)
+  printf "%#x\n", (int)module_list module_list->size_of_struct
+
+
+
+
+  The offset from the module start occasionally changes (before 2.4.0,
+  it was module.size_of_struct + 4), so it's a good idea to check the
+  init and cleanup addresses once in a while, as describe below.  Now
+  do:
+
+
+       (UML gdb)
+       add-symbol-file /path/to/module/on/host that_address
+
+
+
+
+  Tell gdb you really want to do it, and you're in business.
+
+
+  If there's any doubt that you got the offset right, like breakpoints
+  appear not to work, or they're appearing in the wrong place, you can
+  check it by looking at the module structure.  The init and cleanup
+  fields should look like:
+
+
+       init = 0x588066b0 <init_hostfs>, cleanup = 0x588066c0 <exit_hostfs>
+
+
+
+
+  with no offsets on the symbol names.  If the names are right, but they
+  are offset, then the offset tells you how much you need to add to the
+  address you gave to add-symbol-file.
+
+
+  When you want to load in a new version of the module, you need to get
+  gdb to forget about the old one.  The only way I've found to do that
+  is to tell gdb to forget about all symbols that it knows about:
+
+
+       (UML gdb)  symbol-file
+
+
+
+
+  Then reload the symbols from the kernel binary:
+
+
+       (UML gdb)  symbol-file /path/to/kernel
+
+
+
+
+  and repeat the process above.  You'll also need to re-enable break-
+  points.  They were disabled when you dumped all the symbols because
+  gdb couldn't figure out where they should go.
+
+
+
+  11.5.  Attaching gdb to the kernel
+
+  If you don't have the kernel running under gdb, you can attach gdb to
+  it later by sending the tracing thread a SIGUSR1.  The first line of
+  the console output identifies its pid:
+       tracing thread pid = 20093
+
+
+
+
+  When you send it the signal:
+
+
+       host% kill -USR1 20093
+
+
+
+
+  you will get an xterm with gdb running in it.
+
+
+  If you have the mconsole compiled into UML, then the mconsole client
+  can be used to start gdb:
+
+
+       (mconsole)  (mconsole) config gdb=xterm
+
+
+
+
+  will fire up an xterm with gdb running in it.
+
+
+
+  11.6.  Using alternate debuggers
+
+  UML has support for attaching to an already running debugger rather
+  than starting gdb itself.  This is present in CVS as of 17 Apr 2001.
+  I sent it to Alan for inclusion in the ac tree, and it will be in my
+  2.4.4 release.
+
+
+  This is useful when gdb is a subprocess of some UI, such as emacs or
+  ddd.  It can also be used to run debuggers other than gdb on UML.
+  Below is an example of using strace as an alternate debugger.
+
+
+  To do this, you need to get the pid of the debugger and pass it in
+  with the
+
+
+  If you are using gdb under some UI, then tell it to 'att 1', and
+  you'll find yourself attached to UML.
+
+
+  If you are using something other than gdb as your debugger, then
+  you'll need to get it to do the equivalent of 'att 1' if it doesn't do
+  it automatically.
+
+
+  An example of an alternate debugger is strace.  You can strace the
+  actual kernel as follows:
+
+  o  Run the following in a shell
+
+
+       host%
+       sh -c 'echo pid=$$; echo -n hit return; read x; exec strace -p 1 -o strace.out'
+
+
+
+  o  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
+     by the previous command
+
+  o  Hit return in the shell, and UML will start running, and strace
+     output will start accumulating in the output file.
+
+     Note that this is different from running
+
+
+       host% strace ./linux
+
+
+
+
+  That will strace only the main UML thread, the tracing thread, which
+  doesn't do any of the actual kernel work.  It just oversees the vir-
+  tual machine.  In contrast, using strace as described above will show
+  you the low-level activity of the virtual machine.
+
+
+
+
+
+  12.  Kernel debugging examples
+
+  12.1.  The case of the hung fsck
+
+  When booting up the kernel, fsck failed, and dropped me into a shell
+  to fix things up.  I ran fsck -y, which hung:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Setting hostname uml                    [ OK ]
+  Checking root filesystem
+  /dev/fhd0 was not cleanly unmounted, check forced.
+  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+
+  /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+          (i.e., without -a or -p options)
+  [ FAILED ]
+
+  *** An error occurred during the file system check.
+  *** Dropping you to a shell; the system will reboot
+  *** when you leave the shell.
+  Give root password for maintenance
+  (or type Control-D for normal startup):
+
+  [root@uml /root]# fsck -y /dev/fhd0
+  fsck -y /dev/fhd0
+  Parallelizing fsck version 1.14 (9-Jan-1999)
+  e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+  /dev/fhd0 contains a file system with errors, check forced.
+  Pass 1: Checking inodes, blocks, and sizes
+  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.  Ignore error? yes
+
+  Inode 19780, i_blocks is 1548, should be 540.  Fix? yes
+
+  Pass 2: Checking directory structure
+  Error reading block 49405 (Attempt to read block from filesystem resulted in short read).  Ignore error? yes
+
+  Directory inode 11858, block 0, offset 0: directory corrupted
+  Salvage? yes
+
+  Missing '.' in directory inode 11858.
+  Fix? yes
+
+  Missing '..' in directory inode 11858.
+  Fix? yes
+
+
+
+
+
+  The standard drill in this sort of situation is to fire up gdb on the
+  signal thread, which, in this case, was pid 1935.  In another window,
+  I run gdb and attach pid 1935.
+
+
+
+
+       ~/linux/2.3.26/um 1016: gdb linux
+       GNU gdb 4.17.0.11 with Linux support
+       Copyright 1998 Free Software Foundation, Inc.
+       GDB is free software, covered by the GNU General Public License, and you are
+       welcome to change it and/or distribute copies of it under certain conditions.
+       Type "show copying" to see the conditions.
+       There is absolutely no warranty for GDB.  Type "show warranty" for details.
+       This GDB was configured as "i386-redhat-linux"...
+
+       (gdb) att 1935
+       Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1935
+       0x100756d9 in __wait4 ()
+
+
+
+
+
+
+  Let's see what's currently running:
+
+
+
+       (gdb) p current_task.pid
+       $1 = 0
+
+
+
+
+
+  It's the idle thread, which means that fsck went to sleep for some
+  reason and never woke up.
+
+
+  Let's guess that the last process in the process list is fsck:
+
+
+
+       (gdb) p current_task.prev_task.comm
+       $13 = "fsck.ext2\000\000\000\000\000\000"
+
+
+
+
+
+  It is, so let's see what it thinks it's up to:
+
+
+
+       (gdb) p current_task.prev_task.thread
+       $14 = {extern_pid = 1980, tracing = 0, want_tracing = 0, forking = 0,
+         kernel_stack_page = 0, signal_stack = 1342627840, syscall = {id = 4, args = {
+             3, 134973440, 1024, 0, 1024}, have_result = 0, result = 50590720},
+         request = {op = 2, u = {exec = {ip = 1350467584, sp = 2952789424}, fork = {
+               regs = {1350467584, 2952789424, 0 <repeats 15 times>}, sigstack = 0,
+               pid = 0}, switch_to = 0x507e8000, thread = {proc = 0x507e8000,
+               arg = 0xaffffdb0, flags = 0, new_pid = 0}, input_request = {
+               op = 1350467584, fd = -1342177872, proc = 0, pid = 0}}}}
+
+
+
+
+
+  The interesting things here are the fact that its .thread.syscall.id
+  is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or
+  the defines in include/asm-um/arch/unistd.h), and that it never
+  returned.  Also, its .request.op is OP_SWITCH (see
+  arch/um/include/user_util.h).  These mean that it went into a write,
+  and, for some reason, called schedule().
+
+
+  The fact that it never returned from write means that its stack should
+  be fairly interesting.  Its pid is 1980 (.thread.extern_pid).  That
+  process is being ptraced by the signal thread, so it must be detached
+  before gdb can attach it:
+
+
+
+
+
+
+
+
+
+
+  (gdb) call detach(1980)
+
+  Program received signal SIGSEGV, Segmentation fault.
+  <function called from gdb>
+  The program being debugged stopped while in a function called from GDB.
+  When the function (detach) is done executing, GDB will silently
+  stop (instead of continuing to evaluate the expression containing
+  the function call).
+  (gdb) call detach(1980)
+  $15 = 0
+
+
+
+
+
+  The first detach segfaults for some reason, and the second one
+  succeeds.
+
+
+  Now I detach from the signal thread, attach to the fsck thread, and
+  look at its stack:
+
+
+       (gdb) det
+       Detaching from program: /home/dike/linux/2.3.26/um/linux Pid 1935
+       (gdb) att 1980
+       Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1980
+       0x10070451 in __kill ()
+       (gdb) bt
+       #0  0x10070451 in __kill ()
+       #1  0x10068ccd in usr1_pid (pid=1980) at process.c:30
+       #2  0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
+           at process_kern.c:156
+       #3  0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
+           at process_kern.c:161
+       #4  0x10001d12 in schedule () at core.c:777
+       #5  0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
+       #6  0x1006aa10 in __down_failed () at semaphore.c:157
+       #7  0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
+       #8  0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+       #9  <signal handler called>
+       #10 0x10155404 in errno ()
+       #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
+       #12 0x1006c5d8 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+       #13 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+       #14 <signal handler called>
+       #15 0xc0fd in ?? ()
+       #16 0x10016647 in sys_write (fd=3,
+           buf=0x80b8800 <Address 0x80b8800 out of bounds>, count=1024)
+           at read_write.c:159
+       #17 0x1006d5b3 in execute_syscall (syscall=4, args=0x5006ef08)
+           at syscall_kern.c:254
+       #18 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
+       #19 <signal handler called>
+       #20 0x400dc8b0 in ?? ()
+
+
+
+
+
+  The interesting things here are :
+
+  o  There are two segfaults on this stack (frames 9 and 14)
+
+  o  The first faulting address (frame 11) is 0x50000800
+
+  (gdb) p (void *)1342179328
+  $16 = (void *) 0x50000800
+
+
+
+
+
+  The initial faulting address is interesting because it is on the idle
+  thread's stack.  I had been seeing the idle thread segfault for no
+  apparent reason, and the cause looked like stack corruption.  In hopes
+  of catching the culprit in the act, I had turned off all protections
+  to that stack while the idle thread wasn't running.  This apparently
+  tripped that trap.
+
+
+  However, the more immediate problem is that second segfault and I'm
+  going to concentrate on that.  First, I want to see where the fault
+  happened, so I have to go look at the sigcontent struct in frame 8:
+
+
+
+       (gdb) up
+       #1  0x10068ccd in usr1_pid (pid=1980) at process.c:30
+       30        kill(pid, SIGUSR1);
+       (gdb)
+       #2  0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
+           at process_kern.c:156
+       156       usr1_pid(getpid());
+       (gdb)
+       #3  0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
+           at process_kern.c:161
+       161       _switch_to(prev, next);
+       (gdb)
+       #4  0x10001d12 in schedule () at core.c:777
+       777             switch_to(prev, next, prev);
+       (gdb)
+       #5  0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
+       71                      schedule();
+       (gdb)
+       #6  0x1006aa10 in __down_failed () at semaphore.c:157
+       157     }
+       (gdb)
+       #7  0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
+       174       segv(sc->cr2, sc->err & 2);
+       (gdb)
+       #8  0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+       182       segv_handler(sc);
+       (gdb) p *sc
+       Cannot access memory at address 0x0.
+
+
+
+
+  That's not very useful, so I'll try a more manual method:
+
+
+       (gdb) p *((struct sigcontext *) (&sig + 1))
+       $19 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
+         __dsh = 0, edi = 1342179328, esi = 1350378548, ebp = 1342630440,
+         esp = 1342630420, ebx = 1348150624, edx = 1280, ecx = 0, eax = 0,
+         trapno = 14, err = 4, eip = 268480945, cs = 35, __csh = 0, eflags = 66118,
+         esp_at_signal = 1342630420, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
+         cr2 = 1280}
+
+
+
+  The ip is in handle_mm_fault:
+
+
+       (gdb) p (void *)268480945
+       $20 = (void *) 0x1000b1b1
+       (gdb) i sym $20
+       handle_mm_fault + 57 in section .text
+
+
+
+
+
+  Specifically, it's in pte_alloc:
+
+
+       (gdb) i line *$20
+       Line 124 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b1b1 <handle_mm_fault+57>
+          and ends at 0x1000b1b7 <handle_mm_fault+63>.
+
+
+
+
+
+  To find where in handle_mm_fault this is, I'll jump forward in the
+  code until I see an address in that procedure:
+
+
+
+       (gdb) i line *0x1000b1c0
+       Line 126 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b1b7 <handle_mm_fault+63>
+          and ends at 0x1000b1c3 <handle_mm_fault+75>.
+       (gdb) i line *0x1000b1d0
+       Line 131 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b1d0 <handle_mm_fault+88>
+          and ends at 0x1000b1da <handle_mm_fault+98>.
+       (gdb) i line *0x1000b1e0
+       Line 61 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b1da <handle_mm_fault+98>
+          and ends at 0x1000b1e1 <handle_mm_fault+105>.
+       (gdb) i line *0x1000b1f0
+       Line 134 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b1f0 <handle_mm_fault+120>
+          and ends at 0x1000b200 <handle_mm_fault+136>.
+       (gdb) i line *0x1000b200
+       Line 135 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b200 <handle_mm_fault+136>
+          and ends at 0x1000b208 <handle_mm_fault+144>.
+       (gdb) i line *0x1000b210
+       Line 139 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+          starts at address 0x1000b210 <handle_mm_fault+152>
+          and ends at 0x1000b219 <handle_mm_fault+161>.
+       (gdb) i line *0x1000b220
+       Line 1168 of "memory.c" starts at address 0x1000b21e <handle_mm_fault+166>
+          and ends at 0x1000b222 <handle_mm_fault+170>.
+
+
+
+
+
+  Something is apparently wrong with the page tables or vma_structs, so
+  lets go back to frame 11 and have a look at them:
+
+
+
+  #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
+  50        handle_mm_fault(current, vma, address, is_write);
+  (gdb) call pgd_offset_proc(vma->vm_mm, address)
+  $22 = (pgd_t *) 0x80a548c
+
+
+
+
+
+  That's pretty bogus.  Page tables aren't supposed to be in process
+  text or data areas.  Let's see what's in the vma:
+
+
+       (gdb) p *vma
+       $23 = {vm_mm = 0x507d2434, vm_start = 0, vm_end = 134512640,
+         vm_next = 0x80a4f8c, vm_page_prot = {pgprot = 0}, vm_flags = 31200,
+         vm_avl_height = 2058, vm_avl_left = 0x80a8c94, vm_avl_right = 0x80d1000,
+         vm_next_share = 0xaffffdb0, vm_pprev_share = 0xaffffe63,
+         vm_ops = 0xaffffe7a, vm_pgoff = 2952789626, vm_file = 0xafffffec,
+         vm_private_data = 0x62}
+       (gdb) p *vma.vm_mm
+       $24 = {mmap = 0x507d2434, mmap_avl = 0x0, mmap_cache = 0x8048000,
+         pgd = 0x80a4f8c, mm_users = {counter = 0}, mm_count = {counter = 134904288},
+         map_count = 134909076, mmap_sem = {count = {counter = 135073792},
+           sleepers = -1342177872, wait = {lock = <optimized out or zero length>,
+             task_list = {next = 0xaffffe63, prev = 0xaffffe7a},
+             __magic = -1342177670, __creator = -1342177300}, __magic = 98},
+         page_table_lock = {}, context = 138, start_code = 0, end_code = 0,
+         start_data = 0, end_data = 0, start_brk = 0, brk = 0, start_stack = 0,
+         arg_start = 0, arg_end = 0, env_start = 0, env_end = 0, rss = 1350381536,
+         total_vm = 0, locked_vm = 0, def_flags = 0, cpu_vm_mask = 0, swap_cnt = 0,
+         swap_address = 0, segments = 0x0}
+
+
+
+
+
+  This also pretty bogus.  With all of the 0x80xxxxx and 0xaffffxxx
+  addresses, this is looking like a stack was plonked down on top of
+  these structures.  Maybe it's a stack overflow from the next page:
+
+
+
+       (gdb) p vma
+       $25 = (struct vm_area_struct *) 0x507d2434
+
+
+
+
+
+  That's towards the lower quarter of the page, so that would have to
+  have been pretty heavy stack overflow:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  (gdb) x/100x $25
+  0x507d2434:     0x507d2434      0x00000000      0x08048000      0x080a4f8c
+  0x507d2444:     0x00000000      0x080a79e0      0x080a8c94      0x080d1000
+  0x507d2454:     0xaffffdb0      0xaffffe63      0xaffffe7a      0xaffffe7a
+  0x507d2464:     0xafffffec      0x00000062      0x0000008a      0x00000000
+  0x507d2474:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2484:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2494:     0x00000000      0x00000000      0x507d2fe0      0x00000000
+  0x507d24a4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d24b4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d24c4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d24d4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d24e4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d24f4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2504:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2514:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2524:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2534:     0x00000000      0x00000000      0x507d25dc      0x00000000
+  0x507d2544:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2554:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2564:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2574:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2584:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d2594:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d25a4:     0x00000000      0x00000000      0x00000000      0x00000000
+  0x507d25b4:     0x00000000      0x00000000      0x00000000      0x00000000
+
+
+
+
+
+  It's not stack overflow.  The only "stack-like" piece of this data is
+  the vma_struct itself.
+
+
+  At this point, I don't see any avenues to pursue, so I just have to
+  admit that I have no idea what's going on.  What I will do, though, is
+  stick a trap on the segfault handler which will stop if it sees any
+  writes to the idle thread's stack.  That was the thing that happened
+  first, and it may be that if I can catch it immediately, what's going
+  on will be somewhat clearer.
+
+
+  12.2.  Episode 2: The case of the hung fsck
+
+  After setting a trap in the SEGV handler for accesses to the signal
+  thread's stack, I reran the kernel.
+
+
+  fsck hung again, this time by hitting the trap:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  Setting hostname uml                            [ OK ]
+  Checking root filesystem
+  /dev/fhd0 contains a file system with errors, check forced.
+  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+
+  /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+          (i.e., without -a or -p options)
+  [ FAILED ]
+
+  *** An error occurred during the file system check.
+  *** Dropping you to a shell; the system will reboot
+  *** when you leave the shell.
+  Give root password for maintenance
+  (or type Control-D for normal startup):
+
+  [root@uml /root]# fsck -y /dev/fhd0
+  fsck -y /dev/fhd0
+  Parallelizing fsck version 1.14 (9-Jan-1999)
+  e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+  /dev/fhd0 contains a file system with errors, check forced.
+  Pass 1: Checking inodes, blocks, and sizes
+  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.  Ignore error? yes
+
+  Pass 2: Checking directory structure
+  Error reading block 49405 (Attempt to read block from filesystem resulted in short read).  Ignore error? yes
+
+  Directory inode 11858, block 0, offset 0: directory corrupted
+  Salvage? yes
+
+  Missing '.' in directory inode 11858.
+  Fix? yes
+
+  Missing '..' in directory inode 11858.
+  Fix? yes
+
+  Untested (4127) [100fe44c]: trap_kern.c line 31
+
+
+
+
+
+  I need to get the signal thread to detach from pid 4127 so that I can
+  attach to it with gdb.  This is done by sending it a SIGUSR1, which is
+  caught by the signal thread, which detaches the process:
+
+
+       kill -USR1 4127
+
+
+
+
+
+  Now I can run gdb on it:
+
+
+
+
+
+
+
+
+
+
+
+
+
+  ~/linux/2.3.26/um 1034: gdb linux
+  GNU gdb 4.17.0.11 with Linux support
+  Copyright 1998 Free Software Foundation, Inc.
+  GDB is free software, covered by the GNU General Public License, and you are
+  welcome to change it and/or distribute copies of it under certain conditions.
+  Type "show copying" to see the conditions.
+  There is absolutely no warranty for GDB.  Type "show warranty" for details.
+  This GDB was configured as "i386-redhat-linux"...
+  (gdb) att 4127
+  Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127
+  0x10075891 in __libc_nanosleep ()
+
+
+
+
+
+  The backtrace shows that it was in a write and that the fault address
+  (address in frame 3) is 0x50000800, which is right in the middle of
+  the signal thread's stack page:
+
+
+       (gdb) bt
+       #0  0x10075891 in __libc_nanosleep ()
+       #1  0x1007584d in __sleep (seconds=1000000)
+           at ../sysdeps/unix/sysv/linux/sleep.c:78
+       #2  0x1006ce9a in stop () at user_util.c:191
+       #3  0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
+       #4  0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+       #5  0x1006c63c in kern_segv_handler (sig=11) at trap_user.c:182
+       #6  <signal handler called>
+       #7  0xc0fd in ?? ()
+       #8  0x10016647 in sys_write (fd=3, buf=0x80b8800 "R.", count=1024)
+           at read_write.c:159
+       #9  0x1006d603 in execute_syscall (syscall=4, args=0x5006ef08)
+           at syscall_kern.c:254
+       #10 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
+       #11 <signal handler called>
+       #12 0x400dc8b0 in ?? ()
+       #13 <signal handler called>
+       #14 0x400dc8b0 in ?? ()
+       #15 0x80545fd in ?? ()
+       #16 0x804daae in ?? ()
+       #17 0x8054334 in ?? ()
+       #18 0x804d23e in ?? ()
+       #19 0x8049632 in ?? ()
+       #20 0x80491d2 in ?? ()
+       #21 0x80596b5 in ?? ()
+       (gdb) p (void *)1342179328
+       $3 = (void *) 0x50000800
+
+
+
+
+
+  Going up the stack to the segv_handler frame and looking at where in
+  the code the access happened shows that it happened near line 110 of
+  block_dev.c:
+
+
+
+
+
+
+
+
+
+  (gdb) up
+  #1  0x1007584d in __sleep (seconds=1000000)
+      at ../sysdeps/unix/sysv/linux/sleep.c:78
+  ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory.
+  (gdb)
+  #2  0x1006ce9a in stop () at user_util.c:191
+  191       while(1) sleep(1000000);
+  (gdb)
+  #3  0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
+  31          KERN_UNTESTED();
+  (gdb)
+  #4  0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+  174       segv(sc->cr2, sc->err & 2);
+  (gdb) p *sc
+  $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
+    __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484,
+    esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14,
+    err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070,
+    esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
+    cr2 = 1342179328}
+  (gdb) p (void *)268550834
+  $2 = (void *) 0x1001c2b2
+  (gdb) i sym $2
+  block_write + 1090 in section .text
+  (gdb) i line *$2
+  Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h"
+     starts at address 0x1001c2a1 <block_write+1073>
+     and ends at 0x1001c2bf <block_write+1103>.
+  (gdb) i line *0x1001c2c0
+  Line 110 of "block_dev.c" starts at address 0x1001c2bf <block_write+1103>
+     and ends at 0x1001c2e3 <block_write+1139>.
+
+
+
+
+
+  Looking at the source shows that the fault happened during a call to
+  copy_from_user to copy the data into the kernel:
+
+
+       107             count -= chars;
+       108             copy_from_user(p,buf,chars);
+       109             p += chars;
+       110             buf += chars;
+
+
+
+
+
+  p is the pointer which must contain 0x50000800, since buf contains
+  0x80b8800 (frame 8 above).  It is defined as:
+
+
+                       p = offset + bh->b_data;
+
+
+
+
+
+  I need to figure out what bh is, and it just so happens that bh is
+  passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a
+  few lines later, so I do a little disassembly:
+
+
+
+
+  (gdb) disas 0x1001c2bf 0x1001c2e0
+  Dump of assembler code from 0x1001c2bf to 0x1001c2d0:
+  0x1001c2bf <block_write+1103>:  addl   %eax,0xc(%ebp)
+  0x1001c2c2 <block_write+1106>:  movl   0xfffffdd4(%ebp),%edx
+  0x1001c2c8 <block_write+1112>:  btsl   $0x0,0x18(%edx)
+  0x1001c2cd <block_write+1117>:  btsl   $0x1,0x18(%edx)
+  0x1001c2d2 <block_write+1122>:  sbbl   %ecx,%ecx
+  0x1001c2d4 <block_write+1124>:  testl  %ecx,%ecx
+  0x1001c2d6 <block_write+1126>:  jne    0x1001c2e3 <block_write+1139>
+  0x1001c2d8 <block_write+1128>:  pushl  $0x0
+  0x1001c2da <block_write+1130>:  pushl  %edx
+  0x1001c2db <block_write+1131>:  call   0x1001819c <__mark_buffer_dirty>
+  End of assembler dump.
+
+
+
+
+
+  At that point, bh is in %edx (address 0x1001c2da), which is calculated
+  at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is,
+  taking %ebp from the sigcontext_struct above:
+
+
+       (gdb) p (void *)1342631484
+       $5 = (void *) 0x5006ee3c
+       (gdb) p 0x5006ee3c+0xfffffdd4
+       $6 = 1342630928
+       (gdb) p (void *)$6
+       $7 = (void *) 0x5006ec10
+       (gdb) p *((void **)$7)
+       $8 = (void *) 0x50100200
+
+
+
+
+
+  Now, I look at the structure to see what's in it, and particularly,
+  what its b_data field contains:
+
+
+       (gdb) p *((struct buffer_head *)0x50100200)
+       $13 = {b_next = 0x50289380, b_blocknr = 49405, b_size = 1024, b_list = 0,
+         b_dev = 15872, b_count = {counter = 1}, b_rdev = 15872, b_state = 24,
+         b_flushtime = 0, b_next_free = 0x501001a0, b_prev_free = 0x50100260,
+         b_this_page = 0x501001a0, b_reqnext = 0x0, b_pprev = 0x507fcf58,
+         b_data = 0x50000800 "", b_page = 0x50004000,
+         b_end_io = 0x10017f60 <end_buffer_io_sync>, b_dev_id = 0x0,
+         b_rsector = 98810, b_wait = {lock = <optimized out or zero length>,
+           task_list = {next = 0x50100248, prev = 0x50100248}, __magic = 1343226448,
+           __creator = 0}, b_kiobuf = 0x0}
+
+
+
+
+
+  The b_data field is indeed 0x50000800, so the question becomes how
+  that happened.  The rest of the structure looks fine, so this probably
+  is not a case of data corruption.  It happened on purpose somehow.
+
+
+  The b_page field is a pointer to the page_struct representing the
+  0x50000000 page.  Looking at it shows the kernel's idea of the state
+  of that page:
+
+
+
+  (gdb) p *$13.b_page
+  $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0,
+    index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = {
+      next = 0x50008460, prev = 0x50019350}, wait = {
+      lock = <optimized out or zero length>, task_list = {next = 0x50004024,
+        prev = 0x50004024}, __magic = 1342193708, __creator = 0},
+    pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280,
+    zone = 0x100c5160}
+
+
+
+
+
+  Some sanity-checking: the virtual field shows the "virtual" address of
+  this page, which in this kernel is the same as its "physical" address,
+  and the page_struct itself should be mem_map[0], since it represents
+  the first page of memory:
+
+
+
+       (gdb) p (void *)1342177280
+       $18 = (void *) 0x50000000
+       (gdb) p mem_map
+       $19 = (mem_map_t *) 0x50004000
+
+
+
+
+
+  These check out fine.
+
+
+  Now to check out the page_struct itself.  In particular, the flags
+  field shows whether the page is considered free or not:
+
+
+       (gdb) p (void *)132
+       $21 = (void *) 0x84
+
+
+
+
+
+  The "reserved" bit is the high bit, which is definitely not set, so
+  the kernel considers the signal stack page to be free and available to
+  be used.
+
+
+  At this point, I jump to conclusions and start looking at my early
+  boot code, because that's where that page is supposed to be reserved.
+
+
+  In my setup_arch procedure, I have the following code which looks just
+  fine:
+
+
+
+       bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn);
+       free_bootmem(__pa(low_physmem) + bootmap_size, high_physmem - low_physmem);
+
+
+
+
+
+  Two stack pages have already been allocated, and low_physmem points to
+  the third page, which is the beginning of free memory.
+  The init_bootmem call declares the entire memory to the boot memory
+  manager, which marks it all reserved.  The free_bootmem call frees up
+  all of it, except for the first two pages.  This looks correct to me.
+
+
+  So, I decide to see init_bootmem run and make sure that it is marking
+  those first two pages as reserved.  I never get that far.
+
+
+  Stepping into init_bootmem, and looking at bootmem_map before looking
+  at what it contains shows the following:
+
+
+
+       (gdb) p bootmem_map
+       $3 = (void *) 0x50000000
+
+
+
+
+
+  Aha!  The light dawns.  That first page is doing double duty as a
+  stack and as the boot memory map.  The last thing that the boot memory
+  manager does is to free the pages used by its memory map, so this page
+  is getting freed even its marked as reserved.
+
+
+  The fix was to initialize the boot memory manager before allocating
+  those two stack pages, and then allocate them through the boot memory
+  manager.  After doing this, and fixing a couple of subsequent buglets,
+  the stack corruption problem disappeared.
+
+
+
+
+
+  13.  What to do when UML doesn't work
+
+
+
+
+  13.1.  Strange compilation errors when you build from source
+
+  As of test11, it is necessary to have "ARCH=um" in the environment or
+  on the make command line for all steps in building UML, including
+  clean, distclean, or mrproper, config, menuconfig, or xconfig, dep,
+  and linux.  If you forget for any of them, the i386 build seems to
+  contaminate the UML build.  If this happens, start from scratch with
+
+
+       host%
+       make mrproper ARCH=um
+
+
+
+
+  and repeat the build process with ARCH=um on all the steps.
+
+
+  See ``Compiling the kernel and modules''  for more details.
+
+
+  Another cause of strange compilation errors is building UML in
+  /usr/src/linux.  If you do this, the first thing you need to do is
+  clean up the mess you made.  The /usr/src/linux/asm link will now
+  point to /usr/src/linux/asm-um.  Make it point back to
+  /usr/src/linux/asm-i386.  Then, move your UML pool someplace else and
+  build it there.  Also see below, where a more specific set of symptoms
+  is described.
+
+
+
+  13.3.  A variety of panics and hangs with /tmp on a reiserfs  filesys-
+  tem
+
+  I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
+  Panics preceded by
+
+
+       Detaching pid nnnn
+
+
+
+  are diagnostic of this problem.  This is a reiserfs bug which causes a
+  thread to occasionally read stale data from a mmapped page shared with
+  another thread.  The fix is to upgrade the filesystem or to have /tmp
+  be an ext2 filesystem.
+
+
+
+  13.4.  The compile fails with errors about conflicting types for
+  'open', 'dup', and 'waitpid'
+
+  This happens when you build in /usr/src/linux.  The UML build makes
+  the include/asm link point to include/asm-um.  /usr/include/asm points
+  to /usr/src/linux/include/asm, so when that link gets moved, files
+  which need to include the asm-i386 versions of headers get the
+  incompatible asm-um versions.  The fix is to move the include/asm link
+  back to include/asm-i386 and to do UML builds someplace else.
+
+
+
+  13.5.  UML doesn't work when /tmp is an NFS filesystem
+
+  This seems to be a similar situation with the ReiserFS problem above.
+  Some versions of NFS seems not to handle mmap correctly, which UML
+  depends on.  The workaround is have /tmp be a non-NFS directory.
+
+
+  13.6.  UML hangs on boot when compiled with gprof support
+
+  If you build UML with gprof support and, early in the boot, it does
+  this
+
+
+       kernel BUG at page_alloc.c:100!
+
+
+
+
+  you have a buggy gcc.  You can work around the problem by removing
+  UM_FASTCALL from CFLAGS in arch/um/Makefile-i386.  This will open up
+  another bug, but that one is fairly hard to reproduce.
+
+
+
+  13.7.  syslogd dies with a SIGTERM on startup
+
+  The exact boot error depends on the distribution that you're booting,
+  but Debian produces this:
+
+
+       /etc/rc2.d/S10sysklogd: line 49:    93 Terminated
+       start-stop-daemon --start --quiet --exec /sbin/syslogd -- $SYSLOGD
+
+
+
+
+  This is a syslogd bug.  There's a race between a parent process
+  installing a signal handler and its child sending the signal.  See
+  this uml-devel post <http://www.geocrawler.com/lists/3/Source-
+  Forge/709/0/6612801>  for the details.
+
+
+
+  13.8.  TUN/TAP networking doesn't work on a 2.4 host
+
+  There are a couple of problems which were
+  <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
+  out">  by Tim Robinson <timro at trkr dot net>
+
+  o  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
+     The fix is to upgrade to something more recent and then read the
+     next item.
+
+  o  If you see
+
+
+       File descriptor in bad state
+
+
+
+  when you bring up the device inside UML, you have a header mismatch
+  between the original kernel and the upgraded one.  Make /usr/src/linux
+  point at the new headers.  This will only be a problem if you build
+  uml_net yourself.
+
+
+
+  13.9.  You can network to the host but not to other machines on the
+  net
+
+  If you can connect to the host, and the host can connect to UML, but
+  you cannot connect to any other machines, then you may need to enable
+  IP Masquerading on the host.  Usually this is only experienced when
+  using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML
+  networking, rather than the public address space that your host is
+  connected to.  UML does not enable IP Masquerading, so you will need
+  to create a static rule to enable it:
+
+
+       host%
+       iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
+
+
+
+
+  Replace eth0 with the interface that you use to talk to the rest of
+  the world.
+
+
+  Documentation on IP Masquerading, and SNAT, can be found at
+  www.netfilter.org  <http://www.netfilter.org> .
+
+
+  If you can reach the local net, but not the outside Internet, then
+  that is usually a routing problem.  The UML needs a default route:
+
+
+       UML#
+       route add default gw gateway IP
+
+
+
+
+  The gateway IP can be any machine on the local net that knows how to
+  reach the outside world.  Usually, this is the host or the local net-
+  work's gateway.
+
+
+  Occasionally, we hear from someone who can reach some machines, but
+  not others on the same net, or who can reach some ports on other
+  machines, but not others.  These are usually caused by strange
+  firewalling somewhere between the UML and the other box.  You track
+  this down by running tcpdump on every interface the packets travel
+  over and see where they disappear.  When you find a machine that takes
+  the packets in, but does not send them onward, that's the culprit.
+
+
+
+  13.10.  I have no root and I want to scream
+
+  Thanks to Birgit Wahlich for telling me about this strange one.  It
+  turns out that there's a limit of six environment variables on the
+  kernel command line.  When that limit is reached or exceeded, argument
+  processing stops, which means that the 'root=' argument that UML
+  usually adds is not seen.  So, the filesystem has no idea what the
+  root device is, so it panics.
+
+
+  The fix is to put less stuff on the command line.  Glomming all your
+  setup variables into one is probably the best way to go.
+
+
+
+  13.11.  UML build conflict between ptrace.h and ucontext.h
+
+  On some older systems, /usr/include/asm/ptrace.h and
+  /usr/include/sys/ucontext.h define the same names.  So, when they're
+  included together, the defines from one completely mess up the parsing
+  of the other, producing errors like:
+       /usr/include/sys/ucontext.h:47: parse error before
+       `10'
+
+
+
+
+  plus a pile of warnings.
+
+
+  This is a libc botch, which has since been fixed, and I don't see any
+  way around it besides upgrading.
+
+
+
+  13.12.  The UML BogoMips is exactly half the host's BogoMips
+
+  On i386 kernels, there are two ways of running the loop that is used
+  to calculate the BogoMips rating, using the TSC if it's there or using
+  a one-instruction loop.  The TSC produces twice the BogoMips as the
+  loop.  UML uses the loop, since it has nothing resembling a TSC, and
+  will get almost exactly the same BogoMips as a host using the loop.
+  However, on a host with a TSC, its BogoMips will be double the loop
+  BogoMips, and therefore double the UML BogoMips.
+
+
+
+  13.13.  When you run UML, it immediately segfaults
+
+  If the host is configured with the 2G/2G address space split, that's
+  why.  See ``UML on 2G/2G hosts''  for the details on getting UML to
+  run on your host.
+
+
+
+  13.14.  xterms appear, then immediately disappear
+
+  If you're running an up to date kernel with an old release of
+  uml_utilities, the port-helper program will not work properly, so
+  xterms will exit straight after they appear. The solution is to
+  upgrade to the latest release of uml_utilities.  Usually this problem
+  occurs when you have installed a packaged release of UML then compiled
+  your own development kernel without upgrading the uml_utilities from
+  the source distribution.
+
+
+
+  13.15.  Any other panic, hang, or strange behavior
+
+  If you're seeing truly strange behavior, such as hangs or panics that
+  happen in random places, or you try running the debugger to see what's
+  happening and it acts strangely, then it could be a problem in the
+  host kernel.  If you're not running a stock Linus or -ac kernel, then
+  try that.  An early version of the preemption patch and a 2.4.10 SuSE
+  kernel have caused very strange problems in UML.
+
+
+  Otherwise, let me know about it.  Send a message to one of the UML
+  mailing lists - either the developer list - user-mode-linux-devel at
+  lists dot sourceforge dot net (subscription info) or the user list -
+  user-mode-linux-user at lists dot sourceforge do net (subscription
+  info), whichever you prefer.  Don't assume that everyone knows about
+  it and that a fix is imminent.
+
+
+  If you want to be super-helpful, read ``Diagnosing Problems'' and
+  follow the instructions contained therein.
+  14.  Diagnosing Problems
+
+
+  If you get UML to crash, hang, or otherwise misbehave, you should
+  report this on one of the project mailing lists, either the developer
+  list - user-mode-linux-devel at lists dot sourceforge dot net
+  (subscription info) or the user list - user-mode-linux-user at lists
+  dot sourceforge dot net (subscription info).  When you do, it is
+  likely that I will want more information.  So, it would be helpful to
+  read the stuff below, do whatever is applicable in your case, and
+  report the results to the list.
+
+
+  For any diagnosis, you're going to need to build a debugging kernel.
+  The binaries from this site aren't debuggable.  If you haven't done
+  this before, read about ``Compiling the kernel and modules''  and
+  ``Kernel debugging''  UML first.
+
+
+  14.1.  Case 1 : Normal kernel panics
+
+  The most common case is for a normal thread to panic.  To debug this,
+  you will need to run it under the debugger (add 'debug' to the command
+  line).  An xterm will start up with gdb running inside it.  Continue
+  it when it stops in start_kernel and make it crash.  Now ^C gdb and
+
+
+  If the panic was a "Kernel mode fault", then there will be a segv
+  frame on the stack and I'm going to want some more information.  The
+  stack might look something like this:
+
+
+       (UML gdb)  backtrace
+       #0  0x1009bf76 in __sigprocmask (how=1, set=0x5f347940, oset=0x0)
+           at ../sysdeps/unix/sysv/linux/sigprocmask.c:49
+       #1  0x10091411 in change_sig (signal=10, on=1) at process.c:218
+       #2  0x10094785 in timer_handler (sig=26) at time_kern.c:32
+       #3  0x1009bf38 in __restore ()
+           at ../sysdeps/unix/sysv/linux/i386/sigaction.c:125
+       #4  0x1009534c in segv (address=8, ip=268849158, is_write=2, is_user=0)
+           at trap_kern.c:66
+       #5  0x10095c04 in segv_handler (sig=11) at trap_user.c:285
+       #6  0x1009bf38 in __restore ()
+
+
+
+
+  I'm going to want to see the symbol and line information for the value
+  of ip in the segv frame.  In this case, you would do the following:
+
+
+       (UML gdb)  i sym 268849158
+
+
+
+
+  and
+
+
+       (UML gdb)  i line *268849158
+
+
+
+
+  The reason for this is the __restore frame right above the segv_han-
+  dler frame is hiding the frame that actually segfaulted.  So, I have
+  to get that information from the faulting ip.
+
+
+  14.2.  Case 2 : Tracing thread panics
+
+  The less common and more painful case is when the tracing thread
+  panics.  In this case, the kernel debugger will be useless because it
+  needs a healthy tracing thread in order to work.  The first thing to
+  do is get a backtrace from the tracing thread.  This is done by
+  figuring out what its pid is, firing up gdb, and attaching it to that
+  pid.  You can figure out the tracing thread pid by looking at the
+  first line of the console output, which will look like this:
+
+
+       tracing thread pid = 15851
+
+
+
+
+  or by running ps on the host and finding the line that looks like
+  this:
+
+
+       jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)]
+
+
+
+
+  If the panic was 'segfault in signals', then follow the instructions
+  above for collecting information about the location of the seg fault.
+
+
+  If the tracing thread flaked out all by itself, then send that
+  backtrace in and wait for our crack debugging team to fix the problem.
+
+
+  14.3.  Case 3 : Tracing thread panics caused by other threads
+
+  However, there are cases where the misbehavior of another thread
+  caused the problem.  The most common panic of this type is:
+
+
+       wait_for_stop failed to wait for  <pid>  to stop with  <signal number>
+
+
+
+
+  In this case, you'll need to get a backtrace from the process men-
+  tioned in the panic, which is complicated by the fact that the kernel
+  debugger is defunct and without some fancy footwork, another gdb can't
+  attach to it.  So, this is how the fancy footwork goes:
+
+  In a shell:
+
+
+       host% kill -STOP pid
+
+
+
+
+  Run gdb on the tracing thread as described in case 2 and do:
+
+
+       (host gdb)  call detach(pid)
+
+
+  If you get a segfault, do it again.  It always works the second time.
+
+  Detach from the tracing thread and attach to that other thread:
+
+
+       (host gdb)  detach
+
+
+
+
+
+
+       (host gdb)  attach pid
+
+
+
+
+  If gdb hangs when attaching to that process, go back to a shell and
+  do:
+
+
+       host%
+       kill -CONT pid
+
+
+
+
+  And then get the backtrace:
+
+
+       (host gdb)  backtrace
+
+
+
+
+
+  14.4.  Case 4 : Hangs
+
+  Hangs seem to be fairly rare, but they sometimes happen.  When a hang
+  happens, we need a backtrace from the offending process.  Run the
+  kernel debugger as described in case 1 and get a backtrace.  If the
+  current process is not the idle thread, then send in the backtrace.
+  You can tell that it's the idle thread if the stack looks like this:
+
+
+       #0  0x100b1401 in __libc_nanosleep ()
+       #1  0x100a2885 in idle_sleep (secs=10) at time.c:122
+       #2  0x100a546f in do_idle () at process_kern.c:445
+       #3  0x100a5508 in cpu_idle () at process_kern.c:471
+       #4  0x100ec18f in start_kernel () at init/main.c:592
+       #5  0x100a3e10 in start_kernel_proc (unused=0x0) at um_arch.c:71
+       #6  0x100a383f in signal_tramp (arg=0x100a3dd8) at trap_user.c:50
+
+
+
+
+  If this is the case, then some other process is at fault, and went to
+  sleep when it shouldn't have.  Run ps on the host and figure out which
+  process should not have gone to sleep and stayed asleep.  Then attach
+  to it with gdb and get a backtrace as described in case 3.
+
+
+
+
+
+
+  15.  Thanks
+
+
+  A number of people have helped this project in various ways, and this
+  page gives recognition where recognition is due.
+
+
+  If you're listed here and you would prefer a real link on your name,
+  or no link at all, instead of the despammed email address pseudo-link,
+  let me know.
+
+
+  If you're not listed here and you think maybe you should be, please
+  let me know that as well.  I try to get everyone, but sometimes my
+  bookkeeping lapses and I forget about contributions.
+
+
+  15.1.  Code and Documentation
+
+  Rusty Russell <rusty at linuxcare.com.au>  -
+
+  o  wrote the  HOWTO <http://user-mode-
+     linux.sourceforge.net/UserModeLinux-HOWTO.html>
+
+  o  prodded me into making this project official and putting it on
+     SourceForge
+
+  o  came up with the way cool UML logo <http://user-mode-
+     linux.sourceforge.net/uml-small.png>
+
+  o  redid the config process
+
+
+  Peter Moulder <reiter at netspace.net.au>  - Fixed my config and build
+  processes, and added some useful code to the block driver
+
+
+  Bill Stearns <wstearns at pobox.com>  -
+
+  o  HOWTO updates
+
+  o  lots of bug reports
+
+  o  lots of testing
+
+  o  dedicated a box (uml.ists.dartmouth.edu) to support UML development
+
+  o  wrote the mkrootfs script, which allows bootable filesystems of
+     RPM-based distributions to be cranked out
+
+  o  cranked out a large number of filesystems with said script
+
+
+  Jim Leu <jleu at mindspring.com>  - Wrote the virtual ethernet driver
+  and associated usermode tools
+
+  Lars Brinkhoff <http://lars.nocrew.org/>  - Contributed the ptrace
+  proxy from his own  project <http://a386.nocrew.org/> to allow easier
+  kernel debugging
+
+
+  Andrea Arcangeli <andrea at suse.de>  - Redid some of the early boot
+  code so that it would work on machines with Large File Support
+
+
+  Chris Emerson <http://www.chiark.greenend.org.uk/~cemerson/>  - Did
+  the first UML port to Linux/ppc
+
+
+  Harald Welte <laforge at gnumonks.org>  - Wrote the multicast
+  transport for the network driver
+
+
+  Jorgen Cederlof - Added special file support to hostfs
+
+
+  Greg Lonnon  <glonnon at ridgerun dot com>  - Changed the ubd driver
+  to allow it to layer a COW file on a shared read-only filesystem and
+  wrote the iomem emulation support
+
+
+  Henrik Nordstrom <http://hem.passagen.se/hno/>  - Provided a variety
+  of patches, fixes, and clues
+
+
+  Lennert Buytenhek - Contributed various patches, a rewrite of the
+  network driver, the first implementation of the mconsole driver, and
+  did the bulk of the work needed to get SMP working again.
+
+
+  Yon Uriarte - Fixed the TUN/TAP network backend while I slept.
+
+
+  Adam Heath - Made a bunch of nice cleanups to the initialization code,
+  plus various other small patches.
+
+
+  Matt Zimmerman - Matt volunteered to be the UML Debian maintainer and
+  is doing a real nice job of it.  He also noticed and fixed a number of
+  actually and potentially exploitable security holes in uml_net.  Plus
+  the occasional patch.  I like patches.
+
+
+  James McMechan - James seems to have taken over maintenance of the ubd
+  driver and is doing a nice job of it.
+
+
+  Chandan Kudige - wrote the umlgdb script which automates the reloading
+  of module symbols.
+
+
+  Steve Schmidtke - wrote the UML slirp transport and hostaudio drivers,
+  enabling UML processes to access audio devices on the host. He also
+  submitted patches for the slip transport and lots of other things.
+
+
+  David Coulson <http://davidcoulson.net>  -
+
+  o  Set up the usermodelinux.org <http://usermodelinux.org>  site,
+     which is a great way of keeping the UML user community on top of
+     UML goings-on.
+
+  o  Site documentation and updates
+
+  o  Nifty little UML management daemon  UMLd
+     <http://uml.openconsultancy.com/umld/>
+
+  o  Lots of testing and bug reports
+
+
+
+
+  15.2.  Flushing out bugs
+
+
+
+  o  Yuri Pudgorodsky
+
+  o  Gerald Britton
+
+  o  Ian Wehrman
+
+  o  Gord Lamb
+
+  o  Eugene Koontz
+
+  o  John H. Hartman
+
+  o  Anders Karlsson
+
+  o  Daniel Phillips
+
+  o  John Fremlin
+
+  o  Rainer Burgstaller
+
+  o  James Stevenson
+
+  o  Matt Clay
+
+  o  Cliff Jefferies
+
+  o  Geoff Hoff
+
+  o  Lennert Buytenhek
+
+  o  Al Viro
+
+  o  Frank Klingenhoefer
+
+  o  Livio Baldini Soares
+
+  o  Jon Burgess
+
+  o  Petru Paler
+
+  o  Paul
+
+  o  Chris Reahard
+
+  o  Sverker Nilsson
+
+  o  Gong Su
+
+  o  johan verrept
+
+  o  Bjorn Eriksson
+
+  o  Lorenzo Allegrucci
+
+  o  Muli Ben-Yehuda
+
+  o  David Mansfield
+
+  o  Howard Goff
+
+  o  Mike Anderson
+
+  o  John Byrne
+
+  o  Sapan J. Batia
+
+  o  Iris Huang
+
+  o  Jan Hudec
+
+  o  Voluspa
+
+
+
+
+  15.3.  Buglets and clean-ups
+
+
+
+  o  Dave Zarzycki
+
+  o  Adam Lazur
+
+  o  Boria Feigin
+
+  o  Brian J. Murrell
+
+  o  JS
+
+  o  Roman Zippel
+
+  o  Wil Cooley
+
+  o  Ayelet Shemesh
+
+  o  Will Dyson
+
+  o  Sverker Nilsson
+
+  o  dvorak
+
+  o  v.naga srinivas
+
+  o  Shlomi Fish
+
+  o  Roger Binns
+
+  o  johan verrept
+
+  o  MrChuoi
+
+  o  Peter Cleve
+
+  o  Vincent Guffens
+
+  o  Nathan Scott
+
+  o  Patrick Caulfield
+
+  o  jbearce
+
+  o  Catalin Marinas
+
+  o  Shane Spencer
+
+  o  Zou Min
+
+
+  o  Ryan Boder
+
+  o  Lorenzo Colitti
+
+  o  Gwendal Grignou
+
+  o  Andre' Breiler
+
+  o  Tsutomu Yasuda
+
+
+
+  15.4.  Case Studies
+
+
+  o  Jon Wright
+
+  o  William McEwan
+
+  o  Michael Richardson
+
+
+
+  15.5.  Other contributions
+
+
+  Bill Carr <Bill.Carr at compaq.com>  made the Red Hat mkrootfs script
+  work with RH 6.2.
+
+  Michael Jennings <mikejen at hevanet.com>  sent in some material which
+  is now gracing the top of the  index  page <http://user-mode-
+  linux.sourceforge.net/>  of this site.
+
+  SGI <http://www.sgi.com>  (and more specifically Ralf Baechle <ralf at
+  uni-koblenz.de> ) gave me an account on oss.sgi.com
+  <http://www.oss.sgi.com> .  The bandwidth there made it possible to
+  produce most of the filesystems available on the project download
+  page.
+
+  Laurent Bonnaud <Laurent.Bonnaud at inpg.fr>  took the old grotty
+  Debian filesystem that I've been distributing and updated it to 2.2.
+  It is now available by itself here.
+
+  Rik van Riel gave me some ftp space on ftp.nl.linux.org so I can make
+  releases even when Sourceforge is broken.
+
+  Rodrigo de Castro looked at my broken pte code and told me what was
+  wrong with it, letting me fix a long-standing (several weeks) and
+  serious set of bugs.
+
+  Chris Reahard built a specialized root filesystem for running a DNS
+  server jailed inside UML.  It's available from the download
+  <http://user-mode-linux.sourceforge.net/dl-sf.html>  page in the Jail
+  Filesystems section.
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Documentation/virtual/index.rst b/Documentation/virtual/index.rst
deleted file mode 100644
index 062ffb527043..000000000000
--- a/Documentation/virtual/index.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-============================
-Linux Virtualization Support
-============================
-
-.. toctree::
-   :maxdepth: 2
-
-   kvm/index
-   paravirt_ops
-
-.. only:: html and subproject
-
-   Indices
-   =======
-
-   * :ref:`genindex`
diff --git a/Documentation/virtual/kvm/amd-memory-encryption.rst b/Documentation/virtual/kvm/amd-memory-encryption.rst
deleted file mode 100644
index d18c97b4e140..000000000000
--- a/Documentation/virtual/kvm/amd-memory-encryption.rst
+++ /dev/null
@@ -1,250 +0,0 @@
-======================================
-Secure Encrypted Virtualization (SEV)
-======================================
-
-Overview
-========
-
-Secure Encrypted Virtualization (SEV) is a feature found on AMD processors.
-
-SEV is an extension to the AMD-V architecture which supports running
-virtual machines (VMs) under the control of a hypervisor. When enabled,
-the memory contents of a VM will be transparently encrypted with a key
-unique to that VM.
-
-The hypervisor can determine the SEV support through the CPUID
-instruction. The CPUID function 0x8000001f reports information related
-to SEV::
-
-	0x8000001f[eax]:
-			Bit[1] 	indicates support for SEV
-	    ...
-		  [ecx]:
-			Bits[31:0]  Number of encrypted guests supported simultaneously
-
-If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
-(MSR_K7_HWCR) can be used to determine if it can be enabled::
-
-	0xc001_0010:
-		Bit[23]	   1 = memory encryption can be enabled
-			   0 = memory encryption can not be enabled
-
-	0xc001_0015:
-		Bit[0]	   1 = memory encryption can be enabled
-			   0 = memory encryption can not be enabled
-
-When SEV support is available, it can be enabled in a specific VM by
-setting the SEV bit before executing VMRUN.::
-
-	VMCB[0x90]:
-		Bit[1]	    1 = SEV is enabled
-			    0 = SEV is disabled
-
-SEV hardware uses ASIDs to associate a memory encryption key with a VM.
-Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
-defined in the CPUID 0x8000001f[ecx] field.
-
-SEV Key Management
-==================
-
-The SEV guest key management is handled by a separate processor called the AMD
-Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
-key management interface to perform common hypervisor activities such as
-encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
-information, see the SEV Key Management spec [api-spec]_
-
-KVM implements the following commands to support common lifecycle events of SEV
-guests, such as launching, running, snapshotting, migrating and decommissioning.
-
-1. KVM_SEV_INIT
----------------
-
-The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
-context. In a typical workflow, this command should be the first command issued.
-
-Returns: 0 on success, -negative on error
-
-2. KVM_SEV_LAUNCH_START
------------------------
-
-The KVM_SEV_LAUNCH_START command is used for creating the memory encryption
-context. To create the encryption context, user must provide a guest policy,
-the owner's public Diffie-Hellman (PDH) key and session information.
-
-Parameters: struct  kvm_sev_launch_start (in/out)
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_launch_start {
-                __u32 handle;           /* if zero then firmware creates a new handle */
-                __u32 policy;           /* guest's policy */
-
-                __u64 dh_uaddr;         /* userspace address pointing to the guest owner's PDH key */
-                __u32 dh_len;
-
-                __u64 session_addr;     /* userspace address which points to the guest session information */
-                __u32 session_len;
-        };
-
-On success, the 'handle' field contains a new handle and on error, a negative value.
-
-For more details, see SEV spec Section 6.2.
-
-3. KVM_SEV_LAUNCH_UPDATE_DATA
------------------------------
-
-The KVM_SEV_LAUNCH_UPDATE_DATA is used for encrypting a memory region. It also
-calculates a measurement of the memory contents. The measurement is a signature
-of the memory contents that can be sent to the guest owner as an attestation
-that the memory was encrypted correctly by the firmware.
-
-Parameters (in): struct  kvm_sev_launch_update_data
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_launch_update {
-                __u64 uaddr;    /* userspace address to be encrypted (must be 16-byte aligned) */
-                __u32 len;      /* length of the data to be encrypted (must be 16-byte aligned) */
-        };
-
-For more details, see SEV spec Section 6.3.
-
-4. KVM_SEV_LAUNCH_MEASURE
--------------------------
-
-The KVM_SEV_LAUNCH_MEASURE command is used to retrieve the measurement of the
-data encrypted by the KVM_SEV_LAUNCH_UPDATE_DATA command. The guest owner may
-wait to provide the guest with confidential information until it can verify the
-measurement. Since the guest owner knows the initial contents of the guest at
-boot, the measurement can be verified by comparing it to what the guest owner
-expects.
-
-Parameters (in): struct  kvm_sev_launch_measure
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_launch_measure {
-                __u64 uaddr;    /* where to copy the measurement */
-                __u32 len;      /* length of measurement blob */
-        };
-
-For more details on the measurement verification flow, see SEV spec Section 6.4.
-
-5. KVM_SEV_LAUNCH_FINISH
-------------------------
-
-After completion of the launch flow, the KVM_SEV_LAUNCH_FINISH command can be
-issued to make the guest ready for the execution.
-
-Returns: 0 on success, -negative on error
-
-6. KVM_SEV_GUEST_STATUS
------------------------
-
-The KVM_SEV_GUEST_STATUS command is used to retrieve status information about a
-SEV-enabled guest.
-
-Parameters (out): struct kvm_sev_guest_status
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_guest_status {
-                __u32 handle;   /* guest handle */
-                __u32 policy;   /* guest policy */
-                __u8 state;     /* guest state (see enum below) */
-        };
-
-SEV guest state:
-
-::
-
-        enum {
-        SEV_STATE_INVALID = 0;
-        SEV_STATE_LAUNCHING,    /* guest is currently being launched */
-        SEV_STATE_SECRET,       /* guest is being launched and ready to accept the ciphertext data */
-        SEV_STATE_RUNNING,      /* guest is fully launched and running */
-        SEV_STATE_RECEIVING,    /* guest is being migrated in from another SEV machine */
-        SEV_STATE_SENDING       /* guest is getting migrated out to another SEV machine */
-        };
-
-7. KVM_SEV_DBG_DECRYPT
-----------------------
-
-The KVM_SEV_DEBUG_DECRYPT command can be used by the hypervisor to request the
-firmware to decrypt the data at the given memory region.
-
-Parameters (in): struct kvm_sev_dbg
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_dbg {
-                __u64 src_uaddr;        /* userspace address of data to decrypt */
-                __u64 dst_uaddr;        /* userspace address of destination */
-                __u32 len;              /* length of memory region to decrypt */
-        };
-
-The command returns an error if the guest policy does not allow debugging.
-
-8. KVM_SEV_DBG_ENCRYPT
-----------------------
-
-The KVM_SEV_DEBUG_ENCRYPT command can be used by the hypervisor to request the
-firmware to encrypt the data at the given memory region.
-
-Parameters (in): struct kvm_sev_dbg
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_dbg {
-                __u64 src_uaddr;        /* userspace address of data to encrypt */
-                __u64 dst_uaddr;        /* userspace address of destination */
-                __u32 len;              /* length of memory region to encrypt */
-        };
-
-The command returns an error if the guest policy does not allow debugging.
-
-9. KVM_SEV_LAUNCH_SECRET
-------------------------
-
-The KVM_SEV_LAUNCH_SECRET command can be used by the hypervisor to inject secret
-data after the measurement has been validated by the guest owner.
-
-Parameters (in): struct kvm_sev_launch_secret
-
-Returns: 0 on success, -negative on error
-
-::
-
-        struct kvm_sev_launch_secret {
-                __u64 hdr_uaddr;        /* userspace address containing the packet header */
-                __u32 hdr_len;
-
-                __u64 guest_uaddr;      /* the guest memory region where the secret should be injected */
-                __u32 guest_len;
-
-                __u64 trans_uaddr;      /* the hypervisor memory region which contains the secret */
-                __u32 trans_len;
-        };
-
-References
-==========
-
-
-See [white-paper]_, [api-spec]_, [amd-apm]_ and [kvm-forum]_ for more info.
-
-.. [white-paper] http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_Memory_Encryption_Whitepaper_v7-Public.pdf
-.. [api-spec] http://support.amd.com/TechDocs/55766_SEV-KM_API_Specification.pdf
-.. [amd-apm] http://support.amd.com/TechDocs/24593.pdf (section 15.34)
-.. [kvm-forum]  http://www.linux-kvm.org/images/7/74/02x08A-Thomas_Lendacky-AMDs_Virtualizatoin_Memory_Encryption_Technology.pdf
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
deleted file mode 100644
index e54a3f51ddc5..000000000000
--- a/Documentation/virtual/kvm/api.txt
+++ /dev/null
@@ -1,5296 +0,0 @@
-The Definitive KVM (Kernel-based Virtual Machine) API Documentation
-===================================================================
-
-1. General description
-----------------------
-
-The kvm API is a set of ioctls that are issued to control various aspects
-of a virtual machine.  The ioctls belong to three classes:
-
- - System ioctls: These query and set global attributes which affect the
-   whole kvm subsystem.  In addition a system ioctl is used to create
-   virtual machines.
-
- - VM ioctls: These query and set attributes that affect an entire virtual
-   machine, for example memory layout.  In addition a VM ioctl is used to
-   create virtual cpus (vcpus) and devices.
-
-   VM ioctls must be issued from the same process (address space) that was
-   used to create the VM.
-
- - vcpu ioctls: These query and set attributes that control the operation
-   of a single virtual cpu.
-
-   vcpu ioctls should be issued from the same thread that was used to create
-   the vcpu, except for asynchronous vcpu ioctl that are marked as such in
-   the documentation.  Otherwise, the first ioctl after switching threads
-   could see a performance impact.
-
- - device ioctls: These query and set attributes that control the operation
-   of a single device.
-
-   device ioctls must be issued from the same process (address space) that
-   was used to create the VM.
-
-2. File descriptors
--------------------
-
-The kvm API is centered around file descriptors.  An initial
-open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
-can be used to issue system ioctls.  A KVM_CREATE_VM ioctl on this
-handle will create a VM file descriptor which can be used to issue VM
-ioctls.  A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will
-create a virtual cpu or device and return a file descriptor pointing to
-the new resource.  Finally, ioctls on a vcpu or device fd can be used
-to control the vcpu or device.  For vcpus, this includes the important
-task of actually running guest code.
-
-In general file descriptors can be migrated among processes by means
-of fork() and the SCM_RIGHTS facility of unix domain socket.  These
-kinds of tricks are explicitly not supported by kvm.  While they will
-not cause harm to the host, their actual behavior is not guaranteed by
-the API.  See "General description" for details on the ioctl usage
-model that is supported by KVM.
-
-It is important to note that althought VM ioctls may only be issued from
-the process that created the VM, a VM's lifecycle is associated with its
-file descriptor, not its creator (process).  In other words, the VM and
-its resources, *including the associated address space*, are not freed
-until the last reference to the VM's file descriptor has been released.
-For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will
-not be freed until both the parent (original) process and its child have
-put their references to the VM's file descriptor.
-
-Because a VM's resources are not freed until the last reference to its
-file descriptor is released, creating additional references to a VM via
-via fork(), dup(), etc... without careful consideration is strongly
-discouraged and may have unwanted side effects, e.g. memory allocated
-by and on behalf of the VM's process may not be freed/unaccounted when
-the VM is shut down.
-
-
-3. Extensions
--------------
-
-As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
-incompatible change are allowed.  However, there is an extension
-facility that allows backward-compatible extensions to the API to be
-queried and used.
-
-The extension mechanism is not based on the Linux version number.
-Instead, kvm defines extension identifiers and a facility to query
-whether a particular extension identifier is available.  If it is, a
-set of ioctls is available for application use.
-
-
-4. API description
-------------------
-
-This section describes ioctls that can be used to control kvm guests.
-For each ioctl, the following information is provided along with a
-description:
-
-  Capability: which KVM extension provides this ioctl.  Can be 'basic',
-      which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
-      means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4), or 'none' which means that while not all kernels
-      support this ioctl, there's no capability bit to check its
-      availability: for kernels that don't support the ioctl,
-      the ioctl returns -ENOTTY.
-
-  Architectures: which instruction set architectures provide this ioctl.
-      x86 includes both i386 and x86_64.
-
-  Type: system, vm, or vcpu.
-
-  Parameters: what parameters are accepted by the ioctl.
-
-  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
-      are not detailed, but errors with specific meanings are.
-
-
-4.1 KVM_GET_API_VERSION
-
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: none
-Returns: the constant KVM_API_VERSION (=12)
-
-This identifies the API version as the stable kvm API. It is not
-expected that this number will change.  However, Linux 2.6.20 and
-2.6.21 report earlier versions; these are not documented and not
-supported.  Applications should refuse to run if KVM_GET_API_VERSION
-returns a value other than 12.  If this check passes, all ioctls
-described as 'basic' will be available.
-
-
-4.2 KVM_CREATE_VM
-
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: machine type identifier (KVM_VM_*)
-Returns: a VM fd that can be used to control the new virtual machine.
-
-The new VM has no virtual cpus and no memory.
-You probably want to use 0 as machine type.
-
-In order to create user controlled virtual machines on S390, check
-KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
-privileged user (CAP_SYS_ADMIN).
-
-To use hardware assisted virtualization on MIPS (VZ ASE) rather than
-the default trap & emulate implementation (which changes the virtual
-memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
-flag KVM_VM_MIPS_VZ.
-
-
-On arm64, the physical address size for a VM (IPA Size limit) is limited
-to 40bits by default. The limit can be configured if the host supports the
-extension KVM_CAP_ARM_VM_IPA_SIZE. When supported, use
-KVM_VM_TYPE_ARM_IPA_SIZE(IPA_Bits) to set the size in the machine type
-identifier, where IPA_Bits is the maximum width of any physical
-address used by the VM. The IPA_Bits is encoded in bits[7-0] of the
-machine type identifier.
-
-e.g, to configure a guest to use 48bit physical address size :
-
-    vm_fd = ioctl(dev_fd, KVM_CREATE_VM, KVM_VM_TYPE_ARM_IPA_SIZE(48));
-
-The requested size (IPA_Bits) must be :
-  0 - Implies default size, 40bits (for backward compatibility)
-
-  or
-
-  N - Implies N bits, where N is a positive integer such that,
-      32 <= N <= Host_IPA_Limit
-
-Host_IPA_Limit is the maximum possible value for IPA_Bits on the host and
-is dependent on the CPU capability and the kernel configuration. The limit can
-be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
-ioctl() at run-time.
-
-Please note that configuring the IPA size does not affect the capability
-exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
-size of the address translated by the stage2 level (guest physical to
-host physical address translations).
-
-
-4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
-
-Capability: basic, KVM_CAP_GET_MSR_FEATURES for KVM_GET_MSR_FEATURE_INDEX_LIST
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_msr_list (in/out)
-Returns: 0 on success; -1 on error
-Errors:
-  EFAULT:    the msr index list cannot be read from or written to
-  E2BIG:     the msr index list is to be to fit in the array specified by
-             the user.
-
-struct kvm_msr_list {
-	__u32 nmsrs; /* number of msrs in entries */
-	__u32 indices[0];
-};
-
-The user fills in the size of the indices array in nmsrs, and in return
-kvm adjusts nmsrs to reflect the actual number of msrs and fills in the
-indices array with their numbers.
-
-KVM_GET_MSR_INDEX_LIST returns the guest msrs that are supported.  The list
-varies by kvm version and host processor, but does not change otherwise.
-
-Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
-not returned in the MSR list, as different vcpus can have a different number
-of banks, as set via the KVM_X86_SETUP_MCE ioctl.
-
-KVM_GET_MSR_FEATURE_INDEX_LIST returns the list of MSRs that can be passed
-to the KVM_GET_MSRS system ioctl.  This lets userspace probe host capabilities
-and processor features that are exposed via MSRs (e.g., VMX capabilities).
-This list also varies by kvm version and host processor, but does not change
-otherwise.
-
-
-4.4 KVM_CHECK_EXTENSION
-
-Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
-Architectures: all
-Type: system ioctl, vm ioctl
-Parameters: extension identifier (KVM_CAP_*)
-Returns: 0 if unsupported; 1 (or some other positive integer) if supported
-
-The API allows the application to query about extensions to the core
-kvm API.  Userspace passes an extension identifier (an integer) and
-receives an integer that describes the extension availability.
-Generally 0 means no and 1 means yes, but some extensions may report
-additional information in the integer return value.
-
-Based on their initialization different VMs may have different capabilities.
-It is thus encouraged to use the vm ioctl to query for capabilities (available
-with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
-
-4.5 KVM_GET_VCPU_MMAP_SIZE
-
-Capability: basic
-Architectures: all
-Type: system ioctl
-Parameters: none
-Returns: size of vcpu mmap area, in bytes
-
-The KVM_RUN ioctl (cf.) communicates with userspace via a shared
-memory region.  This ioctl returns the size of that region.  See the
-KVM_RUN documentation for details.
-
-
-4.6 KVM_SET_MEMORY_REGION
-
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_memory_region (in)
-Returns: 0 on success, -1 on error
-
-This ioctl is obsolete and has been removed.
-
-
-4.7 KVM_CREATE_VCPU
-
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: vcpu id (apic id on x86)
-Returns: vcpu fd on success, -1 on error
-
-This API adds a vcpu to a virtual machine. No more than max_vcpus may be added.
-The vcpu id is an integer in the range [0, max_vcpu_id).
-
-The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
-the KVM_CHECK_EXTENSION ioctl() at run-time.
-The maximum possible value for max_vcpus can be retrieved using the
-KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time.
-
-If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
-cpus max.
-If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
-same as the value returned from KVM_CAP_NR_VCPUS.
-
-The maximum possible value for max_vcpu_id can be retrieved using the
-KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time.
-
-If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id
-is the same as the value returned from KVM_CAP_MAX_VCPUS.
-
-On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
-threads in one or more virtual CPU cores.  (This is because the
-hardware requires all the hardware threads in a CPU core to be in the
-same partition.)  The KVM_CAP_PPC_SMT capability indicates the number
-of vcpus per virtual core (vcore).  The vcore id is obtained by
-dividing the vcpu id by the number of vcpus per vcore.  The vcpus in a
-given vcore will always be in the same physical core as each other
-(though that might be a different physical core from time to time).
-Userspace can control the threading (SMT) mode of the guest by its
-allocation of vcpu ids.  For example, if userspace wants
-single-threaded guest vcpus, it should make all vcpu ids be a multiple
-of the number of vcpus per vcore.
-
-For virtual cpus that have been created with S390 user controlled virtual
-machines, the resulting vcpu fd can be memory mapped at page offset
-KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
-cpu's hardware control block.
-
-
-4.8 KVM_GET_DIRTY_LOG (vm ioctl)
-
-Capability: basic
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_dirty_log (in/out)
-Returns: 0 on success, -1 on error
-
-/* for KVM_GET_DIRTY_LOG */
-struct kvm_dirty_log {
-	__u32 slot;
-	__u32 padding;
-	union {
-		void __user *dirty_bitmap; /* one bit per page */
-		__u64 padding;
-	};
-};
-
-Given a memory slot, return a bitmap containing any pages dirtied
-since the last call to this ioctl.  Bit 0 is the first page in the
-memory slot.  Ensure the entire structure is cleared to avoid padding
-issues.
-
-If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
-the address space for which you want to return the dirty bitmap.
-They must be less than the value that KVM_CHECK_EXTENSION returns for
-the KVM_CAP_MULTI_ADDRESS_SPACE capability.
-
-The bits in the dirty bitmap are cleared before the ioctl returns, unless
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is enabled.  For more information,
-see the description of the capability.
-
-4.9 KVM_SET_MEMORY_ALIAS
-
-Capability: basic
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_memory_alias (in)
-Returns: 0 (success), -1 (error)
-
-This ioctl is obsolete and has been removed.
-
-
-4.10 KVM_RUN
-
-Capability: basic
-Architectures: all
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
-Errors:
-  EINTR:     an unmasked signal is pending
-
-This ioctl is used to run a guest virtual cpu.  While there are no
-explicit parameters, there is an implicit parameter block that can be
-obtained by mmap()ing the vcpu fd at offset 0, with the size given by
-KVM_GET_VCPU_MMAP_SIZE.  The parameter block is formatted as a 'struct
-kvm_run' (see below).
-
-
-4.11 KVM_GET_REGS
-
-Capability: basic
-Architectures: all except ARM, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_regs (out)
-Returns: 0 on success, -1 on error
-
-Reads the general purpose registers from the vcpu.
-
-/* x86 */
-struct kvm_regs {
-	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
-	__u64 rax, rbx, rcx, rdx;
-	__u64 rsi, rdi, rsp, rbp;
-	__u64 r8,  r9,  r10, r11;
-	__u64 r12, r13, r14, r15;
-	__u64 rip, rflags;
-};
-
-/* mips */
-struct kvm_regs {
-	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
-	__u64 gpr[32];
-	__u64 hi;
-	__u64 lo;
-	__u64 pc;
-};
-
-
-4.12 KVM_SET_REGS
-
-Capability: basic
-Architectures: all except ARM, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_regs (in)
-Returns: 0 on success, -1 on error
-
-Writes the general purpose registers into the vcpu.
-
-See KVM_GET_REGS for the data structure.
-
-
-4.13 KVM_GET_SREGS
-
-Capability: basic
-Architectures: x86, ppc
-Type: vcpu ioctl
-Parameters: struct kvm_sregs (out)
-Returns: 0 on success, -1 on error
-
-Reads special registers from the vcpu.
-
-/* x86 */
-struct kvm_sregs {
-	struct kvm_segment cs, ds, es, fs, gs, ss;
-	struct kvm_segment tr, ldt;
-	struct kvm_dtable gdt, idt;
-	__u64 cr0, cr2, cr3, cr4, cr8;
-	__u64 efer;
-	__u64 apic_base;
-	__u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
-};
-
-/* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
-
-interrupt_bitmap is a bitmap of pending external interrupts.  At most
-one bit may be set.  This interrupt has been acknowledged by the APIC
-but not yet injected into the cpu core.
-
-
-4.14 KVM_SET_SREGS
-
-Capability: basic
-Architectures: x86, ppc
-Type: vcpu ioctl
-Parameters: struct kvm_sregs (in)
-Returns: 0 on success, -1 on error
-
-Writes special registers into the vcpu.  See KVM_GET_SREGS for the
-data structures.
-
-
-4.15 KVM_TRANSLATE
-
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_translation (in/out)
-Returns: 0 on success, -1 on error
-
-Translates a virtual address according to the vcpu's current address
-translation mode.
-
-struct kvm_translation {
-	/* in */
-	__u64 linear_address;
-
-	/* out */
-	__u64 physical_address;
-	__u8  valid;
-	__u8  writeable;
-	__u8  usermode;
-	__u8  pad[5];
-};
-
-
-4.16 KVM_INTERRUPT
-
-Capability: basic
-Architectures: x86, ppc, mips
-Type: vcpu ioctl
-Parameters: struct kvm_interrupt (in)
-Returns: 0 on success, negative on failure.
-
-Queues a hardware interrupt vector to be injected.
-
-/* for KVM_INTERRUPT */
-struct kvm_interrupt {
-	/* in */
-	__u32 irq;
-};
-
-X86:
-
-Returns: 0 on success,
-	 -EEXIST if an interrupt is already enqueued
-	 -EINVAL the the irq number is invalid
-	 -ENXIO if the PIC is in the kernel
-	 -EFAULT if the pointer is invalid
-
-Note 'irq' is an interrupt vector, not an interrupt pin or line. This
-ioctl is useful if the in-kernel PIC is not used.
-
-PPC:
-
-Queues an external interrupt to be injected. This ioctl is overleaded
-with 3 different irq values:
-
-a) KVM_INTERRUPT_SET
-
-  This injects an edge type external interrupt into the guest once it's ready
-  to receive interrupts. When injected, the interrupt is done.
-
-b) KVM_INTERRUPT_UNSET
-
-  This unsets any pending interrupt.
-
-  Only available with KVM_CAP_PPC_UNSET_IRQ.
-
-c) KVM_INTERRUPT_SET_LEVEL
-
-  This injects a level type external interrupt into the guest context. The
-  interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
-  is triggered.
-
-  Only available with KVM_CAP_PPC_IRQ_LEVEL.
-
-Note that any value for 'irq' other than the ones stated above is invalid
-and incurs unexpected behavior.
-
-This is an asynchronous vcpu ioctl and can be invoked from any thread.
-
-MIPS:
-
-Queues an external interrupt to be injected into the virtual CPU. A negative
-interrupt number dequeues the interrupt.
-
-This is an asynchronous vcpu ioctl and can be invoked from any thread.
-
-
-4.17 KVM_DEBUG_GUEST
-
-Capability: basic
-Architectures: none
-Type: vcpu ioctl
-Parameters: none)
-Returns: -1 on error
-
-Support for this has been removed.  Use KVM_SET_GUEST_DEBUG instead.
-
-
-4.18 KVM_GET_MSRS
-
-Capability: basic (vcpu), KVM_CAP_GET_MSR_FEATURES (system)
-Architectures: x86
-Type: system ioctl, vcpu ioctl
-Parameters: struct kvm_msrs (in/out)
-Returns: number of msrs successfully returned;
-        -1 on error
-
-When used as a system ioctl:
-Reads the values of MSR-based features that are available for the VM.  This
-is similar to KVM_GET_SUPPORTED_CPUID, but it returns MSR indices and values.
-The list of msr-based features can be obtained using KVM_GET_MSR_FEATURE_INDEX_LIST
-in a system ioctl.
-
-When used as a vcpu ioctl:
-Reads model-specific registers from the vcpu.  Supported msr indices can
-be obtained using KVM_GET_MSR_INDEX_LIST in a system ioctl.
-
-struct kvm_msrs {
-	__u32 nmsrs; /* number of msrs in entries */
-	__u32 pad;
-
-	struct kvm_msr_entry entries[0];
-};
-
-struct kvm_msr_entry {
-	__u32 index;
-	__u32 reserved;
-	__u64 data;
-};
-
-Application code should set the 'nmsrs' member (which indicates the
-size of the entries array) and the 'index' member of each array entry.
-kvm will fill in the 'data' member.
-
-
-4.19 KVM_SET_MSRS
-
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_msrs (in)
-Returns: 0 on success, -1 on error
-
-Writes model-specific registers to the vcpu.  See KVM_GET_MSRS for the
-data structures.
-
-Application code should set the 'nmsrs' member (which indicates the
-size of the entries array), and the 'index' and 'data' members of each
-array entry.
-
-
-4.20 KVM_SET_CPUID
-
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_cpuid (in)
-Returns: 0 on success, -1 on error
-
-Defines the vcpu responses to the cpuid instruction.  Applications
-should use the KVM_SET_CPUID2 ioctl if available.
-
-
-struct kvm_cpuid_entry {
-	__u32 function;
-	__u32 eax;
-	__u32 ebx;
-	__u32 ecx;
-	__u32 edx;
-	__u32 padding;
-};
-
-/* for KVM_SET_CPUID */
-struct kvm_cpuid {
-	__u32 nent;
-	__u32 padding;
-	struct kvm_cpuid_entry entries[0];
-};
-
-
-4.21 KVM_SET_SIGNAL_MASK
-
-Capability: basic
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_signal_mask (in)
-Returns: 0 on success, -1 on error
-
-Defines which signals are blocked during execution of KVM_RUN.  This
-signal mask temporarily overrides the threads signal mask.  Any
-unblocked signal received (except SIGKILL and SIGSTOP, which retain
-their traditional behaviour) will cause KVM_RUN to return with -EINTR.
-
-Note the signal will only be delivered if not blocked by the original
-signal mask.
-
-/* for KVM_SET_SIGNAL_MASK */
-struct kvm_signal_mask {
-	__u32 len;
-	__u8  sigset[0];
-};
-
-
-4.22 KVM_GET_FPU
-
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_fpu (out)
-Returns: 0 on success, -1 on error
-
-Reads the floating point state from the vcpu.
-
-/* for KVM_GET_FPU and KVM_SET_FPU */
-struct kvm_fpu {
-	__u8  fpr[8][16];
-	__u16 fcw;
-	__u16 fsw;
-	__u8  ftwx;  /* in fxsave format */
-	__u8  pad1;
-	__u16 last_opcode;
-	__u64 last_ip;
-	__u64 last_dp;
-	__u8  xmm[16][16];
-	__u32 mxcsr;
-	__u32 pad2;
-};
-
-
-4.23 KVM_SET_FPU
-
-Capability: basic
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_fpu (in)
-Returns: 0 on success, -1 on error
-
-Writes the floating point state to the vcpu.
-
-/* for KVM_GET_FPU and KVM_SET_FPU */
-struct kvm_fpu {
-	__u8  fpr[8][16];
-	__u16 fcw;
-	__u16 fsw;
-	__u8  ftwx;  /* in fxsave format */
-	__u8  pad1;
-	__u16 last_opcode;
-	__u64 last_ip;
-	__u64 last_dp;
-	__u8  xmm[16][16];
-	__u32 mxcsr;
-	__u32 pad2;
-};
-
-
-4.24 KVM_CREATE_IRQCHIP
-
-Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ARM, arm64, s390
-Type: vm ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
-
-Creates an interrupt controller model in the kernel.
-On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
-future vcpus to have a local APIC.  IRQ routing for GSIs 0-15 is set to both
-PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
-On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
-KVM_CREATE_DEVICE, which also supports creating a GICv2.  Using
-KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
-On s390, a dummy irq routing table is created.
-
-Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
-before KVM_CREATE_IRQCHIP can be used.
-
-
-4.25 KVM_IRQ_LINE
-
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86, arm, arm64
-Type: vm ioctl
-Parameters: struct kvm_irq_level
-Returns: 0 on success, -1 on error
-
-Sets the level of a GSI input to the interrupt controller model in the kernel.
-On some architectures it is required that an interrupt controller model has
-been previously created with KVM_CREATE_IRQCHIP.  Note that edge-triggered
-interrupts require the level to be set to 1 and then back to 0.
-
-On real hardware, interrupt pins can be active-low or active-high.  This
-does not matter for the level field of struct kvm_irq_level: 1 always
-means active (asserted), 0 means inactive (deasserted).
-
-x86 allows the operating system to program the interrupt polarity
-(active-low/active-high) for level-triggered interrupts, and KVM used
-to consider the polarity.  However, due to bitrot in the handling of
-active-low interrupts, the above convention is now valid on x86 too.
-This is signaled by KVM_CAP_X86_IOAPIC_POLARITY_IGNORED.  Userspace
-should not present interrupts to the guest as active-low unless this
-capability is present (or unless it is not using the in-kernel irqchip,
-of course).
-
-
-ARM/arm64 can signal an interrupt either at the CPU level, or at the
-in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
-use PPIs designated for specific cpus.  The irq field is interpreted
-like this:
-
-  bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
-  field: | irq_type  | vcpu_index |     irq_id     |
-
-The irq_type field has the following values:
-- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
-- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
-               (the vcpu_index field is ignored)
-- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
-
-(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
-
-In both cases, level is used to assert/deassert the line.
-
-struct kvm_irq_level {
-	union {
-		__u32 irq;     /* GSI */
-		__s32 status;  /* not used for KVM_IRQ_LEVEL */
-	};
-	__u32 level;           /* 0 or 1 */
-};
-
-
-4.26 KVM_GET_IRQCHIP
-
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_irqchip (in/out)
-Returns: 0 on success, -1 on error
-
-Reads the state of a kernel interrupt controller created with
-KVM_CREATE_IRQCHIP into a buffer provided by the caller.
-
-struct kvm_irqchip {
-	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
-	__u32 pad;
-        union {
-		char dummy[512];  /* reserving space */
-		struct kvm_pic_state pic;
-		struct kvm_ioapic_state ioapic;
-	} chip;
-};
-
-
-4.27 KVM_SET_IRQCHIP
-
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_irqchip (in)
-Returns: 0 on success, -1 on error
-
-Sets the state of a kernel interrupt controller created with
-KVM_CREATE_IRQCHIP from a buffer provided by the caller.
-
-struct kvm_irqchip {
-	__u32 chip_id;  /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
-	__u32 pad;
-        union {
-		char dummy[512];  /* reserving space */
-		struct kvm_pic_state pic;
-		struct kvm_ioapic_state ioapic;
-	} chip;
-};
-
-
-4.28 KVM_XEN_HVM_CONFIG
-
-Capability: KVM_CAP_XEN_HVM
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_xen_hvm_config (in)
-Returns: 0 on success, -1 on error
-
-Sets the MSR that the Xen HVM guest uses to initialize its hypercall
-page, and provides the starting address and size of the hypercall
-blobs in userspace.  When the guest writes the MSR, kvm copies one
-page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
-memory.
-
-struct kvm_xen_hvm_config {
-	__u32 flags;
-	__u32 msr;
-	__u64 blob_addr_32;
-	__u64 blob_addr_64;
-	__u8 blob_size_32;
-	__u8 blob_size_64;
-	__u8 pad2[30];
-};
-
-
-4.29 KVM_GET_CLOCK
-
-Capability: KVM_CAP_ADJUST_CLOCK
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_clock_data (out)
-Returns: 0 on success, -1 on error
-
-Gets the current timestamp of kvmclock as seen by the current guest. In
-conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
-such as migration.
-
-When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
-set of bits that KVM can return in struct kvm_clock_data's flag member.
-
-The only flag defined now is KVM_CLOCK_TSC_STABLE.  If set, the returned
-value is the exact kvmclock value seen by all VCPUs at the instant
-when KVM_GET_CLOCK was called.  If clear, the returned value is simply
-CLOCK_MONOTONIC plus a constant offset; the offset can be modified
-with KVM_SET_CLOCK.  KVM will try to make all VCPUs follow this clock,
-but the exact value read by each VCPU could differ, because the host
-TSC is not stable.
-
-struct kvm_clock_data {
-	__u64 clock;  /* kvmclock current value */
-	__u32 flags;
-	__u32 pad[9];
-};
-
-
-4.30 KVM_SET_CLOCK
-
-Capability: KVM_CAP_ADJUST_CLOCK
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_clock_data (in)
-Returns: 0 on success, -1 on error
-
-Sets the current timestamp of kvmclock to the value specified in its parameter.
-In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
-such as migration.
-
-struct kvm_clock_data {
-	__u64 clock;  /* kvmclock current value */
-	__u32 flags;
-	__u32 pad[9];
-};
-
-
-4.31 KVM_GET_VCPU_EVENTS
-
-Capability: KVM_CAP_VCPU_EVENTS
-Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_event (out)
-Returns: 0 on success, -1 on error
-
-X86:
-
-Gets currently pending exceptions, interrupts, and NMIs as well as related
-states of the vcpu.
-
-struct kvm_vcpu_events {
-	struct {
-		__u8 injected;
-		__u8 nr;
-		__u8 has_error_code;
-		__u8 pending;
-		__u32 error_code;
-	} exception;
-	struct {
-		__u8 injected;
-		__u8 nr;
-		__u8 soft;
-		__u8 shadow;
-	} interrupt;
-	struct {
-		__u8 injected;
-		__u8 pending;
-		__u8 masked;
-		__u8 pad;
-	} nmi;
-	__u32 sipi_vector;
-	__u32 flags;
-	struct {
-		__u8 smm;
-		__u8 pending;
-		__u8 smm_inside_nmi;
-		__u8 latched_init;
-	} smi;
-	__u8 reserved[27];
-	__u8 exception_has_payload;
-	__u64 exception_payload;
-};
-
-The following bits are defined in the flags field:
-
-- KVM_VCPUEVENT_VALID_SHADOW may be set to signal that
-  interrupt.shadow contains a valid state.
-
-- KVM_VCPUEVENT_VALID_SMM may be set to signal that smi contains a
-  valid state.
-
-- KVM_VCPUEVENT_VALID_PAYLOAD may be set to signal that the
-  exception_has_payload, exception_payload, and exception.pending
-  fields contain a valid state. This bit will be set whenever
-  KVM_CAP_EXCEPTION_PAYLOAD is enabled.
-
-ARM/ARM64:
-
-If the guest accesses a device that is being emulated by the host kernel in
-such a way that a real device would generate a physical SError, KVM may make
-a virtual SError pending for that VCPU. This system error interrupt remains
-pending until the guest takes the exception by unmasking PSTATE.A.
-
-Running the VCPU may cause it to take a pending SError, or make an access that
-causes an SError to become pending. The event's description is only valid while
-the VPCU is not running.
-
-This API provides a way to read and write the pending 'event' state that is not
-visible to the guest. To save, restore or migrate a VCPU the struct representing
-the state can be read then written using this GET/SET API, along with the other
-guest-visible registers. It is not possible to 'cancel' an SError that has been
-made pending.
-
-A device being emulated in user-space may also wish to generate an SError. To do
-this the events structure can be populated by user-space. The current state
-should be read first, to ensure no existing SError is pending. If an existing
-SError is pending, the architecture's 'Multiple SError interrupts' rules should
-be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
-Serviceability (RAS) Specification").
-
-SError exceptions always have an ESR value. Some CPUs have the ability to
-specify what the virtual SError's ESR value should be. These systems will
-advertise KVM_CAP_ARM_INJECT_SERROR_ESR. In this case exception.has_esr will
-always have a non-zero value when read, and the agent making an SError pending
-should specify the ISS field in the lower 24 bits of exception.serror_esr. If
-the system supports KVM_CAP_ARM_INJECT_SERROR_ESR, but user-space sets the events
-with exception.has_esr as zero, KVM will choose an ESR.
-
-Specifying exception.has_esr on a system that does not support it will return
--EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
-will return -EINVAL.
-
-struct kvm_vcpu_events {
-	struct {
-		__u8 serror_pending;
-		__u8 serror_has_esr;
-		/* Align it to 8 bytes */
-		__u8 pad[6];
-		__u64 serror_esr;
-	} exception;
-	__u32 reserved[12];
-};
-
-4.32 KVM_SET_VCPU_EVENTS
-
-Capability: KVM_CAP_VCPU_EVENTS
-Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_event (in)
-Returns: 0 on success, -1 on error
-
-X86:
-
-Set pending exceptions, interrupts, and NMIs as well as related states of the
-vcpu.
-
-See KVM_GET_VCPU_EVENTS for the data structure.
-
-Fields that may be modified asynchronously by running VCPUs can be excluded
-from the update. These fields are nmi.pending, sipi_vector, smi.smm,
-smi.pending. Keep the corresponding bits in the flags field cleared to
-suppress overwriting the current in-kernel state. The bits are:
-
-KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
-KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
-KVM_VCPUEVENT_VALID_SMM         - transfer the smi sub-struct.
-
-If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
-the flags field to signal that interrupt.shadow contains a valid state and
-shall be written into the VCPU.
-
-KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
-
-If KVM_CAP_EXCEPTION_PAYLOAD is enabled, KVM_VCPUEVENT_VALID_PAYLOAD
-can be set in the flags field to signal that the
-exception_has_payload, exception_payload, and exception.pending fields
-contain a valid state and shall be written into the VCPU.
-
-ARM/ARM64:
-
-Set the pending SError exception state for this VCPU. It is not possible to
-'cancel' an Serror that has been made pending.
-
-See KVM_GET_VCPU_EVENTS for the data structure.
-
-
-4.33 KVM_GET_DEBUGREGS
-
-Capability: KVM_CAP_DEBUGREGS
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_debugregs (out)
-Returns: 0 on success, -1 on error
-
-Reads debug registers from the vcpu.
-
-struct kvm_debugregs {
-	__u64 db[4];
-	__u64 dr6;
-	__u64 dr7;
-	__u64 flags;
-	__u64 reserved[9];
-};
-
-
-4.34 KVM_SET_DEBUGREGS
-
-Capability: KVM_CAP_DEBUGREGS
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_debugregs (in)
-Returns: 0 on success, -1 on error
-
-Writes debug registers into the vcpu.
-
-See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
-yet and must be cleared on entry.
-
-
-4.35 KVM_SET_USER_MEMORY_REGION
-
-Capability: KVM_CAP_USER_MEMORY
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_userspace_memory_region (in)
-Returns: 0 on success, -1 on error
-
-struct kvm_userspace_memory_region {
-	__u32 slot;
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size; /* bytes */
-	__u64 userspace_addr; /* start of the userspace allocated memory */
-};
-
-/* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
-#define KVM_MEM_READONLY	(1UL << 1)
-
-This ioctl allows the user to create, modify or delete a guest physical
-memory slot.  Bits 0-15 of "slot" specify the slot id and this value
-should be less than the maximum number of user memory slots supported per
-VM.  The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS.
-Slots may not overlap in guest physical address space.
-
-If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
-specifies the address space which is being modified.  They must be
-less than the value that KVM_CHECK_EXTENSION returns for the
-KVM_CAP_MULTI_ADDRESS_SPACE capability.  Slots in separate address spaces
-are unrelated; the restriction on overlapping slots only applies within
-each address space.
-
-Deleting a slot is done by passing zero for memory_size.  When changing
-an existing slot, it may be moved in the guest physical memory space,
-or its flags may be modified, but it may not be resized.
-
-Memory for the region is taken starting at the address denoted by the
-field userspace_addr, which must point at user addressable memory for
-the entire memory slot size.  Any object may back this memory, including
-anonymous memory, ordinary files, and hugetlbfs.
-
-It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
-be identical.  This allows large pages in the guest to be backed by large
-pages in the host.
-
-The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
-KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
-writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
-use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
-to make a new slot read-only.  In this case, writes to this memory will be
-posted to userspace as KVM_EXIT_MMIO exits.
-
-When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
-the memory region are automatically reflected into the guest.  For example, an
-mmap() that affects the region will be made visible immediately.  Another
-example is madvise(MADV_DROP).
-
-It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
-The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
-allocation and is deprecated.
-
-
-4.36 KVM_SET_TSS_ADDR
-
-Capability: KVM_CAP_SET_TSS_ADDR
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long tss_address (in)
-Returns: 0 on success, -1 on error
-
-This ioctl defines the physical address of a three-page region in the guest
-physical address space.  The region must be within the first 4GB of the
-guest physical address space and must not conflict with any memory slot
-or any mmio address.  The guest may malfunction if it accesses this memory
-region.
-
-This ioctl is required on Intel-based hosts.  This is needed on Intel hardware
-because of a quirk in the virtualization implementation (see the internals
-documentation when it pops into existence).
-
-
-4.37 KVM_ENABLE_CAP
-
-Capability: KVM_CAP_ENABLE_CAP
-Architectures: mips, ppc, s390
-Type: vcpu ioctl
-Parameters: struct kvm_enable_cap (in)
-Returns: 0 on success; -1 on error
-
-Capability: KVM_CAP_ENABLE_CAP_VM
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_enable_cap (in)
-Returns: 0 on success; -1 on error
-
-+Not all extensions are enabled by default. Using this ioctl the application
-can enable an extension, making it available to the guest.
-
-On systems that do not support this ioctl, it always fails. On systems that
-do support it, it only works for extensions that are supported for enablement.
-
-To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
-be used.
-
-struct kvm_enable_cap {
-       /* in */
-       __u32 cap;
-
-The capability that is supposed to get enabled.
-
-       __u32 flags;
-
-A bitfield indicating future enhancements. Has to be 0 for now.
-
-       __u64 args[4];
-
-Arguments for enabling a feature. If a feature needs initial values to
-function properly, this is the place to put them.
-
-       __u8  pad[64];
-};
-
-The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
-for vm-wide capabilities.
-
-4.38 KVM_GET_MP_STATE
-
-Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_mp_state (out)
-Returns: 0 on success; -1 on error
-
-struct kvm_mp_state {
-	__u32 mp_state;
-};
-
-Returns the vcpu's current "multiprocessing state" (though also valid on
-uniprocessor guests).
-
-Possible values are:
-
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86,arm/arm64]
- - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86]
- - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86]
- - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86]
- - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390,arm/arm64]
- - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
- - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
-                                 [s390]
- - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
-                                 [s390]
-
-On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
-in-kernel irqchip, the multiprocessing state must be maintained by userspace on
-these architectures.
-
-For arm/arm64:
-
-The only states that are valid are KVM_MP_STATE_STOPPED and
-KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
-
-4.39 KVM_SET_MP_STATE
-
-Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390, arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_mp_state (in)
-Returns: 0 on success; -1 on error
-
-Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
-arguments.
-
-On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
-in-kernel irqchip, the multiprocessing state must be maintained by userspace on
-these architectures.
-
-For arm/arm64:
-
-The only states that are valid are KVM_MP_STATE_STOPPED and
-KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
-
-4.40 KVM_SET_IDENTITY_MAP_ADDR
-
-Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long identity (in)
-Returns: 0 on success, -1 on error
-
-This ioctl defines the physical address of a one-page region in the guest
-physical address space.  The region must be within the first 4GB of the
-guest physical address space and must not conflict with any memory slot
-or any mmio address.  The guest may malfunction if it accesses this memory
-region.
-
-Setting the address to 0 will result in resetting the address to its default
-(0xfffbc000).
-
-This ioctl is required on Intel-based hosts.  This is needed on Intel hardware
-because of a quirk in the virtualization implementation (see the internals
-documentation when it pops into existence).
-
-Fails if any VCPU has already been created.
-
-4.41 KVM_SET_BOOT_CPU_ID
-
-Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86
-Type: vm ioctl
-Parameters: unsigned long vcpu_id
-Returns: 0 on success, -1 on error
-
-Define which vcpu is the Bootstrap Processor (BSP).  Values are the same
-as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
-is vcpu 0.
-
-
-4.42 KVM_GET_XSAVE
-
-Capability: KVM_CAP_XSAVE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xsave (out)
-Returns: 0 on success, -1 on error
-
-struct kvm_xsave {
-	__u32 region[1024];
-};
-
-This ioctl would copy current vcpu's xsave struct to the userspace.
-
-
-4.43 KVM_SET_XSAVE
-
-Capability: KVM_CAP_XSAVE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xsave (in)
-Returns: 0 on success, -1 on error
-
-struct kvm_xsave {
-	__u32 region[1024];
-};
-
-This ioctl would copy userspace's xsave struct to the kernel.
-
-
-4.44 KVM_GET_XCRS
-
-Capability: KVM_CAP_XCRS
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xcrs (out)
-Returns: 0 on success, -1 on error
-
-struct kvm_xcr {
-	__u32 xcr;
-	__u32 reserved;
-	__u64 value;
-};
-
-struct kvm_xcrs {
-	__u32 nr_xcrs;
-	__u32 flags;
-	struct kvm_xcr xcrs[KVM_MAX_XCRS];
-	__u64 padding[16];
-};
-
-This ioctl would copy current vcpu's xcrs to the userspace.
-
-
-4.45 KVM_SET_XCRS
-
-Capability: KVM_CAP_XCRS
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_xcrs (in)
-Returns: 0 on success, -1 on error
-
-struct kvm_xcr {
-	__u32 xcr;
-	__u32 reserved;
-	__u64 value;
-};
-
-struct kvm_xcrs {
-	__u32 nr_xcrs;
-	__u32 flags;
-	struct kvm_xcr xcrs[KVM_MAX_XCRS];
-	__u64 padding[16];
-};
-
-This ioctl would set vcpu's xcr to the value userspace specified.
-
-
-4.46 KVM_GET_SUPPORTED_CPUID
-
-Capability: KVM_CAP_EXT_CPUID
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
-
-struct kvm_cpuid2 {
-	__u32 nent;
-	__u32 padding;
-	struct kvm_cpuid_entry2 entries[0];
-};
-
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
-
-struct kvm_cpuid_entry2 {
-	__u32 function;
-	__u32 index;
-	__u32 flags;
-	__u32 eax;
-	__u32 ebx;
-	__u32 ecx;
-	__u32 edx;
-	__u32 padding[3];
-};
-
-This ioctl returns x86 cpuid features which are supported by both the
-hardware and kvm in its default configuration.  Userspace can use the
-information returned by this ioctl to construct cpuid information (for
-KVM_SET_CPUID2) that is consistent with hardware, kernel, and
-userspace capabilities, and with user requirements (for example, the
-user may wish to constrain cpuid to emulate older hardware, or for
-feature consistency across a cluster).
-
-Note that certain capabilities, such as KVM_CAP_X86_DISABLE_EXITS, may
-expose cpuid features (e.g. MONITOR) which are not supported by kvm in
-its default configuration. If userspace enables such capabilities, it
-is responsible for modifying the results of this ioctl appropriately.
-
-Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
-with the 'nent' field indicating the number of entries in the variable-size
-array 'entries'.  If the number of entries is too low to describe the cpu
-capabilities, an error (E2BIG) is returned.  If the number is too high,
-the 'nent' field is adjusted and an error (ENOMEM) is returned.  If the
-number is just right, the 'nent' field is adjusted to the number of valid
-entries in the 'entries' array, which is then filled.
-
-The entries returned are the host cpuid as returned by the cpuid instruction,
-with unknown or unsupported features masked out.  Some features (for example,
-x2apic), may not be present in the host cpu, but are exposed by kvm if it can
-emulate them efficiently. The fields in each entry are defined as follows:
-
-  function: the eax value used to obtain the entry
-  index: the ecx value used to obtain the entry (for entries that are
-         affected by ecx)
-  flags: an OR of zero or more of the following:
-        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
-           if the index field is valid
-        KVM_CPUID_FLAG_STATEFUL_FUNC:
-           if cpuid for this function returns different values for successive
-           invocations; there will be several entries with the same function,
-           all with this flag set
-        KVM_CPUID_FLAG_STATE_READ_NEXT:
-           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
-           the first entry to be read by a cpu
-   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
-         this function/index combination
-
-The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
-as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
-support.  Instead it is reported via
-
-  ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
-
-if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
-feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
-
-
-4.47 KVM_PPC_GET_PVINFO
-
-Capability: KVM_CAP_PPC_GET_PVINFO
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_pvinfo (out)
-Returns: 0 on success, !0 on error
-
-struct kvm_ppc_pvinfo {
-	__u32 flags;
-	__u32 hcall[4];
-	__u8  pad[108];
-};
-
-This ioctl fetches PV specific information that need to be passed to the guest
-using the device tree or other means from vm context.
-
-The hcall array defines 4 instructions that make up a hypercall.
-
-If any additional field gets added to this structure later on, a bit for that
-additional piece of information will be set in the flags bitmap.
-
-The flags bitmap is defined as:
-
-   /* the host supports the ePAPR idle hcall
-   #define KVM_PPC_PVINFO_FLAGS_EV_IDLE   (1<<0)
-
-4.52 KVM_SET_GSI_ROUTING
-
-Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 s390 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_irq_routing (in)
-Returns: 0 on success, -1 on error
-
-Sets the GSI routing table entries, overwriting any previously set entries.
-
-On arm/arm64, GSI routing has the following limitation:
-- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
-
-struct kvm_irq_routing {
-	__u32 nr;
-	__u32 flags;
-	struct kvm_irq_routing_entry entries[0];
-};
-
-No flags are specified so far, the corresponding field must be set to zero.
-
-struct kvm_irq_routing_entry {
-	__u32 gsi;
-	__u32 type;
-	__u32 flags;
-	__u32 pad;
-	union {
-		struct kvm_irq_routing_irqchip irqchip;
-		struct kvm_irq_routing_msi msi;
-		struct kvm_irq_routing_s390_adapter adapter;
-		struct kvm_irq_routing_hv_sint hv_sint;
-		__u32 pad[8];
-	} u;
-};
-
-/* gsi routing entry types */
-#define KVM_IRQ_ROUTING_IRQCHIP 1
-#define KVM_IRQ_ROUTING_MSI 2
-#define KVM_IRQ_ROUTING_S390_ADAPTER 3
-#define KVM_IRQ_ROUTING_HV_SINT 4
-
-flags:
-- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
-  type, specifies that the devid field contains a valid value.  The per-VM
-  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
-  the device ID.  If this capability is not available, userspace should
-  never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
-- zero otherwise
-
-struct kvm_irq_routing_irqchip {
-	__u32 irqchip;
-	__u32 pin;
-};
-
-struct kvm_irq_routing_msi {
-	__u32 address_lo;
-	__u32 address_hi;
-	__u32 data;
-	union {
-		__u32 pad;
-		__u32 devid;
-	};
-};
-
-If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
-for the device that wrote the MSI message.  For PCI, this is usually a
-BFD identifier in the lower 16 bits.
-
-On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
-feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
-address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
-address_hi must be zero.
-
-struct kvm_irq_routing_s390_adapter {
-	__u64 ind_addr;
-	__u64 summary_addr;
-	__u64 ind_offset;
-	__u32 summary_offset;
-	__u32 adapter_id;
-};
-
-struct kvm_irq_routing_hv_sint {
-	__u32 vcpu;
-	__u32 sint;
-};
-
-
-4.55 KVM_SET_TSC_KHZ
-
-Capability: KVM_CAP_TSC_CONTROL
-Architectures: x86
-Type: vcpu ioctl
-Parameters: virtual tsc_khz
-Returns: 0 on success, -1 on error
-
-Specifies the tsc frequency for the virtual machine. The unit of the
-frequency is KHz.
-
-
-4.56 KVM_GET_TSC_KHZ
-
-Capability: KVM_CAP_GET_TSC_KHZ
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: virtual tsc-khz on success, negative value on error
-
-Returns the tsc frequency of the guest. The unit of the return value is
-KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
-error.
-
-
-4.57 KVM_GET_LAPIC
-
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_lapic_state (out)
-Returns: 0 on success, -1 on error
-
-#define KVM_APIC_REG_SIZE 0x400
-struct kvm_lapic_state {
-	char regs[KVM_APIC_REG_SIZE];
-};
-
-Reads the Local APIC registers and copies them into the input argument.  The
-data format and layout are the same as documented in the architecture manual.
-
-If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is
-enabled, then the format of APIC_ID register depends on the APIC mode
-(reported by MSR_IA32_APICBASE) of its VCPU.  x2APIC stores APIC ID in
-the APIC_ID register (bytes 32-35).  xAPIC only allows an 8-bit APIC ID
-which is stored in bits 31-24 of the APIC register, or equivalently in
-byte 35 of struct kvm_lapic_state's regs field.  KVM_GET_LAPIC must then
-be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR.
-
-If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state
-always uses xAPIC format.
-
-
-4.58 KVM_SET_LAPIC
-
-Capability: KVM_CAP_IRQCHIP
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_lapic_state (in)
-Returns: 0 on success, -1 on error
-
-#define KVM_APIC_REG_SIZE 0x400
-struct kvm_lapic_state {
-	char regs[KVM_APIC_REG_SIZE];
-};
-
-Copies the input argument into the Local APIC registers.  The data format
-and layout are the same as documented in the architecture manual.
-
-The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's
-regs field) depends on the state of the KVM_CAP_X2APIC_API capability.
-See the note in KVM_GET_LAPIC.
-
-
-4.59 KVM_IOEVENTFD
-
-Capability: KVM_CAP_IOEVENTFD
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_ioeventfd (in)
-Returns: 0 on success, !0 on error
-
-This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
-within the guest.  A guest write in the registered address will signal the
-provided event instead of triggering an exit.
-
-struct kvm_ioeventfd {
-	__u64 datamatch;
-	__u64 addr;        /* legal pio/mmio address */
-	__u32 len;         /* 0, 1, 2, 4, or 8 bytes    */
-	__s32 fd;
-	__u32 flags;
-	__u8  pad[36];
-};
-
-For the special case of virtio-ccw devices on s390, the ioevent is matched
-to a subchannel/virtqueue tuple instead.
-
-The following flags are defined:
-
-#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
-#define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
-#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
-#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
-	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
-
-If datamatch flag is set, the event will be signaled only if the written value
-to the registered address is equal to datamatch in struct kvm_ioeventfd.
-
-For virtio-ccw devices, addr contains the subchannel id and datamatch the
-virtqueue index.
-
-With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
-the kernel will ignore the length of guest write and may get a faster vmexit.
-The speedup may only apply to specific architectures, but the ioeventfd will
-work anyway.
-
-4.60 KVM_DIRTY_TLB
-
-Capability: KVM_CAP_SW_TLB
-Architectures: ppc
-Type: vcpu ioctl
-Parameters: struct kvm_dirty_tlb (in)
-Returns: 0 on success, -1 on error
-
-struct kvm_dirty_tlb {
-	__u64 bitmap;
-	__u32 num_dirty;
-};
-
-This must be called whenever userspace has changed an entry in the shared
-TLB, prior to calling KVM_RUN on the associated vcpu.
-
-The "bitmap" field is the userspace address of an array.  This array
-consists of a number of bits, equal to the total number of TLB entries as
-determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
-nearest multiple of 64.
-
-Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
-array.
-
-The array is little-endian: the bit 0 is the least significant bit of the
-first byte, bit 8 is the least significant bit of the second byte, etc.
-This avoids any complications with differing word sizes.
-
-The "num_dirty" field is a performance hint for KVM to determine whether it
-should skip processing the bitmap and just invalidate everything.  It must
-be set to the number of set bits in the bitmap.
-
-
-4.62 KVM_CREATE_SPAPR_TCE
-
-Capability: KVM_CAP_SPAPR_TCE
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_create_spapr_tce (in)
-Returns: file descriptor for manipulating the created TCE table
-
-This creates a virtual TCE (translation control entry) table, which
-is an IOMMU for PAPR-style virtual I/O.  It is used to translate
-logical addresses used in virtual I/O into guest physical addresses,
-and provides a scatter/gather capability for PAPR virtual I/O.
-
-/* for KVM_CAP_SPAPR_TCE */
-struct kvm_create_spapr_tce {
-	__u64 liobn;
-	__u32 window_size;
-};
-
-The liobn field gives the logical IO bus number for which to create a
-TCE table.  The window_size field specifies the size of the DMA window
-which this TCE table will translate - the table will contain one 64
-bit TCE entry for every 4kiB of the DMA window.
-
-When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
-table has been created using this ioctl(), the kernel will handle it
-in real mode, updating the TCE table.  H_PUT_TCE calls for other
-liobns will cause a vm exit and must be handled by userspace.
-
-The return value is a file descriptor which can be passed to mmap(2)
-to map the created TCE table into userspace.  This lets userspace read
-the entries written by kernel-handled H_PUT_TCE calls, and also lets
-userspace update the TCE table directly which is useful in some
-circumstances.
-
-
-4.63 KVM_ALLOCATE_RMA
-
-Capability: KVM_CAP_PPC_RMA
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_allocate_rma (out)
-Returns: file descriptor for mapping the allocated RMA
-
-This allocates a Real Mode Area (RMA) from the pool allocated at boot
-time by the kernel.  An RMA is a physically-contiguous, aligned region
-of memory used on older POWER processors to provide the memory which
-will be accessed by real-mode (MMU off) accesses in a KVM guest.
-POWER processors support a set of sizes for the RMA that usually
-includes 64MB, 128MB, 256MB and some larger powers of two.
-
-/* for KVM_ALLOCATE_RMA */
-struct kvm_allocate_rma {
-	__u64 rma_size;
-};
-
-The return value is a file descriptor which can be passed to mmap(2)
-to map the allocated RMA into userspace.  The mapped area can then be
-passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
-RMA for a virtual machine.  The size of the RMA in bytes (which is
-fixed at host kernel boot time) is returned in the rma_size field of
-the argument structure.
-
-The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
-is supported; 2 if the processor requires all virtual machines to have
-an RMA, or 1 if the processor can use an RMA but doesn't require it,
-because it supports the Virtual RMA (VRMA) facility.
-
-
-4.64 KVM_NMI
-
-Capability: KVM_CAP_USER_NMI
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
-
-Queues an NMI on the thread's vcpu.  Note this is well defined only
-when KVM_CREATE_IRQCHIP has not been called, since this is an interface
-between the virtual cpu core and virtual local APIC.  After KVM_CREATE_IRQCHIP
-has been called, this interface is completely emulated within the kernel.
-
-To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the
-following algorithm:
-
-  - pause the vcpu
-  - read the local APIC's state (KVM_GET_LAPIC)
-  - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1)
-  - if so, issue KVM_NMI
-  - resume the vcpu
-
-Some guests configure the LINT1 NMI input to cause a panic, aiding in
-debugging.
-
-
-4.65 KVM_S390_UCAS_MAP
-
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_ucas_mapping (in)
-Returns: 0 in case of success
-
-The parameter is defined like this:
-	struct kvm_s390_ucas_mapping {
-		__u64 user_addr;
-		__u64 vcpu_addr;
-		__u64 length;
-	};
-
-This ioctl maps the memory at "user_addr" with the length "length" to
-the vcpu's address space starting at "vcpu_addr". All parameters need to
-be aligned by 1 megabyte.
-
-
-4.66 KVM_S390_UCAS_UNMAP
-
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_ucas_mapping (in)
-Returns: 0 in case of success
-
-The parameter is defined like this:
-	struct kvm_s390_ucas_mapping {
-		__u64 user_addr;
-		__u64 vcpu_addr;
-		__u64 length;
-	};
-
-This ioctl unmaps the memory in the vcpu's address space starting at
-"vcpu_addr" with the length "length". The field "user_addr" is ignored.
-All parameters need to be aligned by 1 megabyte.
-
-
-4.67 KVM_S390_VCPU_FAULT
-
-Capability: KVM_CAP_S390_UCONTROL
-Architectures: s390
-Type: vcpu ioctl
-Parameters: vcpu absolute address (in)
-Returns: 0 in case of success
-
-This call creates a page table entry on the virtual cpu's address space
-(for user controlled virtual machines) or the virtual machine's address
-space (for regular virtual machines). This only works for minor faults,
-thus it's recommended to access subject memory page via the user page
-table upfront. This is useful to handle validity intercepts for user
-controlled virtual machines to fault in the virtual cpu's lowcore pages
-prior to calling the KVM_RUN ioctl.
-
-
-4.68 KVM_SET_ONE_REG
-
-Capability: KVM_CAP_ONE_REG
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_one_reg (in)
-Returns: 0 on success, negative value on failure
-Errors:
-  ENOENT:   no such register
-  EINVAL:   invalid register ID, or no such register
-  EPERM:    (arm64) register access not allowed before vcpu finalization
-(These error codes are indicative only: do not rely on a specific error
-code being returned in a specific situation.)
-
-struct kvm_one_reg {
-       __u64 id;
-       __u64 addr;
-};
-
-Using this ioctl, a single vcpu register can be set to a specific value
-defined by user space with the passed in struct kvm_one_reg, where id
-refers to the register identifier as described below and addr is a pointer
-to a variable with the respective size. There can be architecture agnostic
-and architecture specific registers. Each have their own range of operation
-and their own constants and width. To keep track of the implemented
-registers, find a list below:
-
-  Arch  |           Register            | Width (bits)
-        |                               |
-  PPC   | KVM_REG_PPC_HIOR              | 64
-  PPC   | KVM_REG_PPC_IAC1              | 64
-  PPC   | KVM_REG_PPC_IAC2              | 64
-  PPC   | KVM_REG_PPC_IAC3              | 64
-  PPC   | KVM_REG_PPC_IAC4              | 64
-  PPC   | KVM_REG_PPC_DAC1              | 64
-  PPC   | KVM_REG_PPC_DAC2              | 64
-  PPC   | KVM_REG_PPC_DABR              | 64
-  PPC   | KVM_REG_PPC_DSCR              | 64
-  PPC   | KVM_REG_PPC_PURR              | 64
-  PPC   | KVM_REG_PPC_SPURR             | 64
-  PPC   | KVM_REG_PPC_DAR               | 64
-  PPC   | KVM_REG_PPC_DSISR             | 32
-  PPC   | KVM_REG_PPC_AMR               | 64
-  PPC   | KVM_REG_PPC_UAMOR             | 64
-  PPC   | KVM_REG_PPC_MMCR0             | 64
-  PPC   | KVM_REG_PPC_MMCR1             | 64
-  PPC   | KVM_REG_PPC_MMCRA             | 64
-  PPC   | KVM_REG_PPC_MMCR2             | 64
-  PPC   | KVM_REG_PPC_MMCRS             | 64
-  PPC   | KVM_REG_PPC_SIAR              | 64
-  PPC   | KVM_REG_PPC_SDAR              | 64
-  PPC   | KVM_REG_PPC_SIER              | 64
-  PPC   | KVM_REG_PPC_PMC1              | 32
-  PPC   | KVM_REG_PPC_PMC2              | 32
-  PPC   | KVM_REG_PPC_PMC3              | 32
-  PPC   | KVM_REG_PPC_PMC4              | 32
-  PPC   | KVM_REG_PPC_PMC5              | 32
-  PPC   | KVM_REG_PPC_PMC6              | 32
-  PPC   | KVM_REG_PPC_PMC7              | 32
-  PPC   | KVM_REG_PPC_PMC8              | 32
-  PPC   | KVM_REG_PPC_FPR0              | 64
-          ...
-  PPC   | KVM_REG_PPC_FPR31             | 64
-  PPC   | KVM_REG_PPC_VR0               | 128
-          ...
-  PPC   | KVM_REG_PPC_VR31              | 128
-  PPC   | KVM_REG_PPC_VSR0              | 128
-          ...
-  PPC   | KVM_REG_PPC_VSR31             | 128
-  PPC   | KVM_REG_PPC_FPSCR             | 64
-  PPC   | KVM_REG_PPC_VSCR              | 32
-  PPC   | KVM_REG_PPC_VPA_ADDR          | 64
-  PPC   | KVM_REG_PPC_VPA_SLB           | 128
-  PPC   | KVM_REG_PPC_VPA_DTL           | 128
-  PPC   | KVM_REG_PPC_EPCR              | 32
-  PPC   | KVM_REG_PPC_EPR               | 32
-  PPC   | KVM_REG_PPC_TCR               | 32
-  PPC   | KVM_REG_PPC_TSR               | 32
-  PPC   | KVM_REG_PPC_OR_TSR            | 32
-  PPC   | KVM_REG_PPC_CLEAR_TSR         | 32
-  PPC   | KVM_REG_PPC_MAS0              | 32
-  PPC   | KVM_REG_PPC_MAS1              | 32
-  PPC   | KVM_REG_PPC_MAS2              | 64
-  PPC   | KVM_REG_PPC_MAS7_3            | 64
-  PPC   | KVM_REG_PPC_MAS4              | 32
-  PPC   | KVM_REG_PPC_MAS6              | 32
-  PPC   | KVM_REG_PPC_MMUCFG            | 32
-  PPC   | KVM_REG_PPC_TLB0CFG           | 32
-  PPC   | KVM_REG_PPC_TLB1CFG           | 32
-  PPC   | KVM_REG_PPC_TLB2CFG           | 32
-  PPC   | KVM_REG_PPC_TLB3CFG           | 32
-  PPC   | KVM_REG_PPC_TLB0PS            | 32
-  PPC   | KVM_REG_PPC_TLB1PS            | 32
-  PPC   | KVM_REG_PPC_TLB2PS            | 32
-  PPC   | KVM_REG_PPC_TLB3PS            | 32
-  PPC   | KVM_REG_PPC_EPTCFG            | 32
-  PPC   | KVM_REG_PPC_ICP_STATE         | 64
-  PPC   | KVM_REG_PPC_VP_STATE          | 128
-  PPC   | KVM_REG_PPC_TB_OFFSET         | 64
-  PPC   | KVM_REG_PPC_SPMC1             | 32
-  PPC   | KVM_REG_PPC_SPMC2             | 32
-  PPC   | KVM_REG_PPC_IAMR              | 64
-  PPC   | KVM_REG_PPC_TFHAR             | 64
-  PPC   | KVM_REG_PPC_TFIAR             | 64
-  PPC   | KVM_REG_PPC_TEXASR            | 64
-  PPC   | KVM_REG_PPC_FSCR              | 64
-  PPC   | KVM_REG_PPC_PSPB              | 32
-  PPC   | KVM_REG_PPC_EBBHR             | 64
-  PPC   | KVM_REG_PPC_EBBRR             | 64
-  PPC   | KVM_REG_PPC_BESCR             | 64
-  PPC   | KVM_REG_PPC_TAR               | 64
-  PPC   | KVM_REG_PPC_DPDES             | 64
-  PPC   | KVM_REG_PPC_DAWR              | 64
-  PPC   | KVM_REG_PPC_DAWRX             | 64
-  PPC   | KVM_REG_PPC_CIABR             | 64
-  PPC   | KVM_REG_PPC_IC                | 64
-  PPC   | KVM_REG_PPC_VTB               | 64
-  PPC   | KVM_REG_PPC_CSIGR             | 64
-  PPC   | KVM_REG_PPC_TACR              | 64
-  PPC   | KVM_REG_PPC_TCSCR             | 64
-  PPC   | KVM_REG_PPC_PID               | 64
-  PPC   | KVM_REG_PPC_ACOP              | 64
-  PPC   | KVM_REG_PPC_VRSAVE            | 32
-  PPC   | KVM_REG_PPC_LPCR              | 32
-  PPC   | KVM_REG_PPC_LPCR_64           | 64
-  PPC   | KVM_REG_PPC_PPR               | 64
-  PPC   | KVM_REG_PPC_ARCH_COMPAT       | 32
-  PPC   | KVM_REG_PPC_DABRX             | 32
-  PPC   | KVM_REG_PPC_WORT              | 64
-  PPC	| KVM_REG_PPC_SPRG9             | 64
-  PPC	| KVM_REG_PPC_DBSR              | 32
-  PPC   | KVM_REG_PPC_TIDR              | 64
-  PPC   | KVM_REG_PPC_PSSCR             | 64
-  PPC   | KVM_REG_PPC_DEC_EXPIRY        | 64
-  PPC   | KVM_REG_PPC_PTCR              | 64
-  PPC   | KVM_REG_PPC_TM_GPR0           | 64
-          ...
-  PPC   | KVM_REG_PPC_TM_GPR31          | 64
-  PPC   | KVM_REG_PPC_TM_VSR0           | 128
-          ...
-  PPC   | KVM_REG_PPC_TM_VSR63          | 128
-  PPC   | KVM_REG_PPC_TM_CR             | 64
-  PPC   | KVM_REG_PPC_TM_LR             | 64
-  PPC   | KVM_REG_PPC_TM_CTR            | 64
-  PPC   | KVM_REG_PPC_TM_FPSCR          | 64
-  PPC   | KVM_REG_PPC_TM_AMR            | 64
-  PPC   | KVM_REG_PPC_TM_PPR            | 64
-  PPC   | KVM_REG_PPC_TM_VRSAVE         | 64
-  PPC   | KVM_REG_PPC_TM_VSCR           | 32
-  PPC   | KVM_REG_PPC_TM_DSCR           | 64
-  PPC   | KVM_REG_PPC_TM_TAR            | 64
-  PPC   | KVM_REG_PPC_TM_XER            | 64
-        |                               |
-  MIPS  | KVM_REG_MIPS_R0               | 64
-          ...
-  MIPS  | KVM_REG_MIPS_R31              | 64
-  MIPS  | KVM_REG_MIPS_HI               | 64
-  MIPS  | KVM_REG_MIPS_LO               | 64
-  MIPS  | KVM_REG_MIPS_PC               | 64
-  MIPS  | KVM_REG_MIPS_CP0_INDEX        | 32
-  MIPS  | KVM_REG_MIPS_CP0_ENTRYLO0     | 64
-  MIPS  | KVM_REG_MIPS_CP0_ENTRYLO1     | 64
-  MIPS  | KVM_REG_MIPS_CP0_CONTEXT      | 64
-  MIPS  | KVM_REG_MIPS_CP0_CONTEXTCONFIG| 32
-  MIPS  | KVM_REG_MIPS_CP0_USERLOCAL    | 64
-  MIPS  | KVM_REG_MIPS_CP0_XCONTEXTCONFIG| 64
-  MIPS  | KVM_REG_MIPS_CP0_PAGEMASK     | 32
-  MIPS  | KVM_REG_MIPS_CP0_PAGEGRAIN    | 32
-  MIPS  | KVM_REG_MIPS_CP0_SEGCTL0      | 64
-  MIPS  | KVM_REG_MIPS_CP0_SEGCTL1      | 64
-  MIPS  | KVM_REG_MIPS_CP0_SEGCTL2      | 64
-  MIPS  | KVM_REG_MIPS_CP0_PWBASE       | 64
-  MIPS  | KVM_REG_MIPS_CP0_PWFIELD      | 64
-  MIPS  | KVM_REG_MIPS_CP0_PWSIZE       | 64
-  MIPS  | KVM_REG_MIPS_CP0_WIRED        | 32
-  MIPS  | KVM_REG_MIPS_CP0_PWCTL        | 32
-  MIPS  | KVM_REG_MIPS_CP0_HWRENA       | 32
-  MIPS  | KVM_REG_MIPS_CP0_BADVADDR     | 64
-  MIPS  | KVM_REG_MIPS_CP0_BADINSTR     | 32
-  MIPS  | KVM_REG_MIPS_CP0_BADINSTRP    | 32
-  MIPS  | KVM_REG_MIPS_CP0_COUNT        | 32
-  MIPS  | KVM_REG_MIPS_CP0_ENTRYHI      | 64
-  MIPS  | KVM_REG_MIPS_CP0_COMPARE      | 32
-  MIPS  | KVM_REG_MIPS_CP0_STATUS       | 32
-  MIPS  | KVM_REG_MIPS_CP0_INTCTL       | 32
-  MIPS  | KVM_REG_MIPS_CP0_CAUSE        | 32
-  MIPS  | KVM_REG_MIPS_CP0_EPC          | 64
-  MIPS  | KVM_REG_MIPS_CP0_PRID         | 32
-  MIPS  | KVM_REG_MIPS_CP0_EBASE        | 64
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG       | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG1      | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG2      | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG3      | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG4      | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG5      | 32
-  MIPS  | KVM_REG_MIPS_CP0_CONFIG7      | 32
-  MIPS  | KVM_REG_MIPS_CP0_XCONTEXT     | 64
-  MIPS  | KVM_REG_MIPS_CP0_ERROREPC     | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH1    | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH2    | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH3    | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH4    | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH5    | 64
-  MIPS  | KVM_REG_MIPS_CP0_KSCRATCH6    | 64
-  MIPS  | KVM_REG_MIPS_CP0_MAAR(0..63)  | 64
-  MIPS  | KVM_REG_MIPS_COUNT_CTL        | 64
-  MIPS  | KVM_REG_MIPS_COUNT_RESUME     | 64
-  MIPS  | KVM_REG_MIPS_COUNT_HZ         | 64
-  MIPS  | KVM_REG_MIPS_FPR_32(0..31)    | 32
-  MIPS  | KVM_REG_MIPS_FPR_64(0..31)    | 64
-  MIPS  | KVM_REG_MIPS_VEC_128(0..31)   | 128
-  MIPS  | KVM_REG_MIPS_FCR_IR           | 32
-  MIPS  | KVM_REG_MIPS_FCR_CSR          | 32
-  MIPS  | KVM_REG_MIPS_MSA_IR           | 32
-  MIPS  | KVM_REG_MIPS_MSA_CSR          | 32
-
-ARM registers are mapped using the lower 32 bits.  The upper 16 of that
-is the register group type, or coprocessor number:
-
-ARM core registers have the following id bit patterns:
-  0x4020 0000 0010 <index into the kvm_regs struct:16>
-
-ARM 32-bit CP15 registers have the following id bit patterns:
-  0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
-
-ARM 64-bit CP15 registers have the following id bit patterns:
-  0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
-
-ARM CCSIDR registers are demultiplexed by CSSELR value:
-  0x4020 0000 0011 00 <csselr:8>
-
-ARM 32-bit VFP control registers have the following id bit patterns:
-  0x4020 0000 0012 1 <regno:12>
-
-ARM 64-bit FP registers have the following id bit patterns:
-  0x4030 0000 0012 0 <regno:12>
-
-ARM firmware pseudo-registers have the following bit pattern:
-  0x4030 0000 0014 <regno:16>
-
-
-arm64 registers are mapped using the lower 32 bits. The upper 16 of
-that is the register group type, or coprocessor number:
-
-arm64 core/FP-SIMD registers have the following id bit patterns. Note
-that the size of the access is variable, as the kvm_regs structure
-contains elements ranging from 32 to 128 bits. The index is a 32bit
-value in the kvm_regs structure seen as a 32bit array.
-  0x60x0 0000 0010 <index into the kvm_regs struct:16>
-
-Specifically:
-    Encoding            Register  Bits  kvm_regs member
-----------------------------------------------------------------
-  0x6030 0000 0010 0000 X0          64  regs.regs[0]
-  0x6030 0000 0010 0002 X1          64  regs.regs[1]
-    ...
-  0x6030 0000 0010 003c X30         64  regs.regs[30]
-  0x6030 0000 0010 003e SP          64  regs.sp
-  0x6030 0000 0010 0040 PC          64  regs.pc
-  0x6030 0000 0010 0042 PSTATE      64  regs.pstate
-  0x6030 0000 0010 0044 SP_EL1      64  sp_el1
-  0x6030 0000 0010 0046 ELR_EL1     64  elr_el1
-  0x6030 0000 0010 0048 SPSR_EL1    64  spsr[KVM_SPSR_EL1] (alias SPSR_SVC)
-  0x6030 0000 0010 004a SPSR_ABT    64  spsr[KVM_SPSR_ABT]
-  0x6030 0000 0010 004c SPSR_UND    64  spsr[KVM_SPSR_UND]
-  0x6030 0000 0010 004e SPSR_IRQ    64  spsr[KVM_SPSR_IRQ]
-  0x6060 0000 0010 0050 SPSR_FIQ    64  spsr[KVM_SPSR_FIQ]
-  0x6040 0000 0010 0054 V0         128  fp_regs.vregs[0]    (*)
-  0x6040 0000 0010 0058 V1         128  fp_regs.vregs[1]    (*)
-    ...
-  0x6040 0000 0010 00d0 V31        128  fp_regs.vregs[31]   (*)
-  0x6020 0000 0010 00d4 FPSR        32  fp_regs.fpsr
-  0x6020 0000 0010 00d5 FPCR        32  fp_regs.fpcr
-
-(*) These encodings are not accepted for SVE-enabled vcpus.  See
-    KVM_ARM_VCPU_INIT.
-
-    The equivalent register content can be accessed via bits [127:0] of
-    the corresponding SVE Zn registers instead for vcpus that have SVE
-    enabled (see below).
-
-arm64 CCSIDR registers are demultiplexed by CSSELR value:
-  0x6020 0000 0011 00 <csselr:8>
-
-arm64 system registers have the following id bit patterns:
-  0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
-
-arm64 firmware pseudo-registers have the following bit pattern:
-  0x6030 0000 0014 <regno:16>
-
-arm64 SVE registers have the following bit patterns:
-  0x6080 0000 0015 00 <n:5> <slice:5>   Zn bits[2048*slice + 2047 : 2048*slice]
-  0x6050 0000 0015 04 <n:4> <slice:5>   Pn bits[256*slice + 255 : 256*slice]
-  0x6050 0000 0015 060 <slice:5>        FFR bits[256*slice + 255 : 256*slice]
-  0x6060 0000 0015 ffff                 KVM_REG_ARM64_SVE_VLS pseudo-register
-
-Access to register IDs where 2048 * slice >= 128 * max_vq will fail with
-ENOENT.  max_vq is the vcpu's maximum supported vector length in 128-bit
-quadwords: see (**) below.
-
-These registers are only accessible on vcpus for which SVE is enabled.
-See KVM_ARM_VCPU_INIT for details.
-
-In addition, except for KVM_REG_ARM64_SVE_VLS, these registers are not
-accessible until the vcpu's SVE configuration has been finalized
-using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).  See KVM_ARM_VCPU_INIT
-and KVM_ARM_VCPU_FINALIZE for more information about this procedure.
-
-KVM_REG_ARM64_SVE_VLS is a pseudo-register that allows the set of vector
-lengths supported by the vcpu to be discovered and configured by
-userspace.  When transferred to or from user memory via KVM_GET_ONE_REG
-or KVM_SET_ONE_REG, the value of this register is of type
-__u64[KVM_ARM64_SVE_VLS_WORDS], and encodes the set of vector lengths as
-follows:
-
-__u64 vector_lengths[KVM_ARM64_SVE_VLS_WORDS];
-
-if (vq >= SVE_VQ_MIN && vq <= SVE_VQ_MAX &&
-    ((vector_lengths[(vq - KVM_ARM64_SVE_VQ_MIN) / 64] >>
-		((vq - KVM_ARM64_SVE_VQ_MIN) % 64)) & 1))
-	/* Vector length vq * 16 bytes supported */
-else
-	/* Vector length vq * 16 bytes not supported */
-
-(**) The maximum value vq for which the above condition is true is
-max_vq.  This is the maximum vector length available to the guest on
-this vcpu, and determines which register slices are visible through
-this ioctl interface.
-
-(See Documentation/arm64/sve.rst for an explanation of the "vq"
-nomenclature.)
-
-KVM_REG_ARM64_SVE_VLS is only accessible after KVM_ARM_VCPU_INIT.
-KVM_ARM_VCPU_INIT initialises it to the best set of vector lengths that
-the host supports.
-
-Userspace may subsequently modify it if desired until the vcpu's SVE
-configuration is finalized using KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE).
-
-Apart from simply removing all vector lengths from the host set that
-exceed some value, support for arbitrarily chosen sets of vector lengths
-is hardware-dependent and may not be available.  Attempting to configure
-an invalid set of vector lengths via KVM_SET_ONE_REG will fail with
-EINVAL.
-
-After the vcpu's SVE configuration is finalized, further attempts to
-write this register will fail with EPERM.
-
-
-MIPS registers are mapped using the lower 32 bits.  The upper 16 of that is
-the register group type:
-
-MIPS core registers (see above) have the following id bit patterns:
-  0x7030 0000 0000 <reg:16>
-
-MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit
-patterns depending on whether they're 32-bit or 64-bit registers:
-  0x7020 0000 0001 00 <reg:5> <sel:3>   (32-bit)
-  0x7030 0000 0001 00 <reg:5> <sel:3>   (64-bit)
-
-Note: KVM_REG_MIPS_CP0_ENTRYLO0 and KVM_REG_MIPS_CP0_ENTRYLO1 are the MIPS64
-versions of the EntryLo registers regardless of the word size of the host
-hardware, host kernel, guest, and whether XPA is present in the guest, i.e.
-with the RI and XI bits (if they exist) in bits 63 and 62 respectively, and
-the PFNX field starting at bit 30.
-
-MIPS MAARs (see KVM_REG_MIPS_CP0_MAAR(*) above) have the following id bit
-patterns:
-  0x7030 0000 0001 01 <reg:8>
-
-MIPS KVM control registers (see above) have the following id bit patterns:
-  0x7030 0000 0002 <reg:16>
-
-MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
-id bit patterns depending on the size of the register being accessed. They are
-always accessed according to the current guest FPU mode (Status.FR and
-Config5.FRE), i.e. as the guest would see them, and they become unpredictable
-if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
-registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
-overlap the FPU registers:
-  0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
-  0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
-  0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
-
-MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
-following id bit patterns:
-  0x7020 0000 0003 01 <0:3> <reg:5>
-
-MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
-following id bit patterns:
-  0x7020 0000 0003 02 <0:3> <reg:5>
-
-
-4.69 KVM_GET_ONE_REG
-
-Capability: KVM_CAP_ONE_REG
-Architectures: all
-Type: vcpu ioctl
-Parameters: struct kvm_one_reg (in and out)
-Returns: 0 on success, negative value on failure
-Errors include:
-  ENOENT:   no such register
-  EINVAL:   invalid register ID, or no such register
-  EPERM:    (arm64) register access not allowed before vcpu finalization
-(These error codes are indicative only: do not rely on a specific error
-code being returned in a specific situation.)
-
-This ioctl allows to receive the value of a single register implemented
-in a vcpu. The register to read is indicated by the "id" field of the
-kvm_one_reg struct passed in. On success, the register value can be found
-at the memory location pointed to by "addr".
-
-The list of registers accessible using this interface is identical to the
-list in 4.68.
-
-
-4.70 KVM_KVMCLOCK_CTRL
-
-Capability: KVM_CAP_KVMCLOCK_CTRL
-Architectures: Any that implement pvclocks (currently x86 only)
-Type: vcpu ioctl
-Parameters: None
-Returns: 0 on success, -1 on error
-
-This signals to the host kernel that the specified guest is being paused by
-userspace.  The host will set a flag in the pvclock structure that is checked
-from the soft lockup watchdog.  The flag is part of the pvclock structure that
-is shared between guest and host, specifically the second bit of the flags
-field of the pvclock_vcpu_time_info structure.  It will be set exclusively by
-the host and read/cleared exclusively by the guest.  The guest operation of
-checking and clearing the flag must an atomic operation so
-load-link/store-conditional, or equivalent must be used.  There are two cases
-where the guest will clear the flag: when the soft lockup watchdog timer resets
-itself or when a soft lockup is detected.  This ioctl can be called any time
-after pausing the vcpu, but before it is resumed.
-
-
-4.71 KVM_SIGNAL_MSI
-
-Capability: KVM_CAP_SIGNAL_MSI
-Architectures: x86 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_msi (in)
-Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
-
-Directly inject a MSI message. Only valid with in-kernel irqchip that handles
-MSI messages.
-
-struct kvm_msi {
-	__u32 address_lo;
-	__u32 address_hi;
-	__u32 data;
-	__u32 flags;
-	__u32 devid;
-	__u8  pad[12];
-};
-
-flags: KVM_MSI_VALID_DEVID: devid contains a valid value.  The per-VM
-  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
-  the device ID.  If this capability is not available, userspace
-  should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
-
-If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
-for the device that wrote the MSI message.  For PCI, this is usually a
-BFD identifier in the lower 16 bits.
-
-On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
-feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
-address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
-address_hi must be zero.
-
-
-4.71 KVM_CREATE_PIT2
-
-Capability: KVM_CAP_PIT2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_config (in)
-Returns: 0 on success, -1 on error
-
-Creates an in-kernel device model for the i8254 PIT. This call is only valid
-after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
-parameters have to be passed:
-
-struct kvm_pit_config {
-	__u32 flags;
-	__u32 pad[15];
-};
-
-Valid flags are:
-
-#define KVM_PIT_SPEAKER_DUMMY     1 /* emulate speaker port stub */
-
-PIT timer interrupts may use a per-VM kernel thread for injection. If it
-exists, this thread will have a name of the following pattern:
-
-kvm-pit/<owner-process-pid>
-
-When running a guest with elevated priorities, the scheduling parameters of
-this thread may have to be adjusted accordingly.
-
-This IOCTL replaces the obsolete KVM_CREATE_PIT.
-
-
-4.72 KVM_GET_PIT2
-
-Capability: KVM_CAP_PIT_STATE2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_state2 (out)
-Returns: 0 on success, -1 on error
-
-Retrieves the state of the in-kernel PIT model. Only valid after
-KVM_CREATE_PIT2. The state is returned in the following structure:
-
-struct kvm_pit_state2 {
-	struct kvm_pit_channel_state channels[3];
-	__u32 flags;
-	__u32 reserved[9];
-};
-
-Valid flags are:
-
-/* disable PIT in HPET legacy mode */
-#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
-
-This IOCTL replaces the obsolete KVM_GET_PIT.
-
-
-4.73 KVM_SET_PIT2
-
-Capability: KVM_CAP_PIT_STATE2
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pit_state2 (in)
-Returns: 0 on success, -1 on error
-
-Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
-See KVM_GET_PIT2 for details on struct kvm_pit_state2.
-
-This IOCTL replaces the obsolete KVM_SET_PIT.
-
-
-4.74 KVM_PPC_GET_SMMU_INFO
-
-Capability: KVM_CAP_PPC_GET_SMMU_INFO
-Architectures: powerpc
-Type: vm ioctl
-Parameters: None
-Returns: 0 on success, -1 on error
-
-This populates and returns a structure describing the features of
-the "Server" class MMU emulation supported by KVM.
-This can in turn be used by userspace to generate the appropriate
-device-tree properties for the guest operating system.
-
-The structure contains some global information, followed by an
-array of supported segment page sizes:
-
-      struct kvm_ppc_smmu_info {
-	     __u64 flags;
-	     __u32 slb_size;
-	     __u32 pad;
-	     struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
-      };
-
-The supported flags are:
-
-    - KVM_PPC_PAGE_SIZES_REAL:
-        When that flag is set, guest page sizes must "fit" the backing
-        store page sizes. When not set, any page size in the list can
-        be used regardless of how they are backed by userspace.
-
-    - KVM_PPC_1T_SEGMENTS
-        The emulated MMU supports 1T segments in addition to the
-        standard 256M ones.
-
-    - KVM_PPC_NO_HASH
-	This flag indicates that HPT guests are not supported by KVM,
-	thus all guests must use radix MMU mode.
-
-The "slb_size" field indicates how many SLB entries are supported
-
-The "sps" array contains 8 entries indicating the supported base
-page sizes for a segment in increasing order. Each entry is defined
-as follow:
-
-   struct kvm_ppc_one_seg_page_size {
-	__u32 page_shift;	/* Base page shift of segment (or 0) */
-	__u32 slb_enc;		/* SLB encoding for BookS */
-	struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
-   };
-
-An entry with a "page_shift" of 0 is unused. Because the array is
-organized in increasing order, a lookup can stop when encoutering
-such an entry.
-
-The "slb_enc" field provides the encoding to use in the SLB for the
-page size. The bits are in positions such as the value can directly
-be OR'ed into the "vsid" argument of the slbmte instruction.
-
-The "enc" array is a list which for each of those segment base page
-size provides the list of supported actual page sizes (which can be
-only larger or equal to the base page size), along with the
-corresponding encoding in the hash PTE. Similarly, the array is
-8 entries sorted by increasing sizes and an entry with a "0" shift
-is an empty entry and a terminator:
-
-   struct kvm_ppc_one_page_size {
-	__u32 page_shift;	/* Page shift (or 0) */
-	__u32 pte_enc;		/* Encoding in the HPTE (>>12) */
-   };
-
-The "pte_enc" field provides a value that can OR'ed into the hash
-PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
-into the hash PTE second double word).
-
-4.75 KVM_IRQFD
-
-Capability: KVM_CAP_IRQFD
-Architectures: x86 s390 arm arm64
-Type: vm ioctl
-Parameters: struct kvm_irqfd (in)
-Returns: 0 on success, -1 on error
-
-Allows setting an eventfd to directly trigger a guest interrupt.
-kvm_irqfd.fd specifies the file descriptor to use as the eventfd and
-kvm_irqfd.gsi specifies the irqchip pin toggled by this event.  When
-an event is triggered on the eventfd, an interrupt is injected into
-the guest using the specified gsi pin.  The irqfd is removed using
-the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
-and kvm_irqfd.gsi.
-
-With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
-mechanism allowing emulation of level-triggered, irqfd-based
-interrupts.  When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
-additional eventfd in the kvm_irqfd.resamplefd field.  When operating
-in resample mode, posting of an interrupt through kvm_irq.fd asserts
-the specified gsi in the irqchip.  When the irqchip is resampled, such
-as from an EOI, the gsi is de-asserted and the user is notified via
-kvm_irqfd.resamplefd.  It is the user's responsibility to re-queue
-the interrupt if the device making use of it still requires service.
-Note that closing the resamplefd is not sufficient to disable the
-irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
-and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
-
-On arm/arm64, gsi routing being supported, the following can happen:
-- in case no routing entry is associated to this gsi, injection fails
-- in case the gsi is associated to an irqchip routing entry,
-  irqchip.pin + 32 corresponds to the injected SPI ID.
-- in case the gsi is associated to an MSI routing entry, the MSI
-  message and device ID are translated into an LPI (support restricted
-  to GICv3 ITS in-kernel emulation).
-
-4.76 KVM_PPC_ALLOCATE_HTAB
-
-Capability: KVM_CAP_PPC_ALLOC_HTAB
-Architectures: powerpc
-Type: vm ioctl
-Parameters: Pointer to u32 containing hash table order (in/out)
-Returns: 0 on success, -1 on error
-
-This requests the host kernel to allocate an MMU hash table for a
-guest using the PAPR paravirtualization interface.  This only does
-anything if the kernel is configured to use the Book 3S HV style of
-virtualization.  Otherwise the capability doesn't exist and the ioctl
-returns an ENOTTY error.  The rest of this description assumes Book 3S
-HV.
-
-There must be no vcpus running when this ioctl is called; if there
-are, it will do nothing and return an EBUSY error.
-
-The parameter is a pointer to a 32-bit unsigned integer variable
-containing the order (log base 2) of the desired size of the hash
-table, which must be between 18 and 46.  On successful return from the
-ioctl, the value will not be changed by the kernel.
-
-If no hash table has been allocated when any vcpu is asked to run
-(with the KVM_RUN ioctl), the host kernel will allocate a
-default-sized hash table (16 MB).
-
-If this ioctl is called when a hash table has already been allocated,
-with a different order from the existing hash table, the existing hash
-table will be freed and a new one allocated.  If this is ioctl is
-called when a hash table has already been allocated of the same order
-as specified, the kernel will clear out the existing hash table (zero
-all HPTEs).  In either case, if the guest is using the virtualized
-real-mode area (VRMA) facility, the kernel will re-create the VMRA
-HPTEs on the next KVM_RUN of any vcpu.
-
-4.77 KVM_S390_INTERRUPT
-
-Capability: basic
-Architectures: s390
-Type: vm ioctl, vcpu ioctl
-Parameters: struct kvm_s390_interrupt (in)
-Returns: 0 on success, -1 on error
-
-Allows to inject an interrupt to the guest. Interrupts can be floating
-(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type.
-
-Interrupt parameters are passed via kvm_s390_interrupt:
-
-struct kvm_s390_interrupt {
-	__u32 type;
-	__u32 parm;
-	__u64 parm64;
-};
-
-type can be one of the following:
-
-KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
-KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
-KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
-KVM_S390_RESTART (vcpu) - restart
-KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
-KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
-KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
-			   parameters in parm and parm64
-KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
-KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
-KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
-KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
-    I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
-    I/O interruption parameters in parm (subchannel) and parm64 (intparm,
-    interruption subclass)
-KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
-                           machine check interrupt code in parm64 (note that
-                           machine checks needing further payload are not
-                           supported by this ioctl)
-
-This is an asynchronous vcpu ioctl and can be invoked from any thread.
-
-4.78 KVM_PPC_GET_HTAB_FD
-
-Capability: KVM_CAP_PPC_HTAB_FD
-Architectures: powerpc
-Type: vm ioctl
-Parameters: Pointer to struct kvm_get_htab_fd (in)
-Returns: file descriptor number (>= 0) on success, -1 on error
-
-This returns a file descriptor that can be used either to read out the
-entries in the guest's hashed page table (HPT), or to write entries to
-initialize the HPT.  The returned fd can only be written to if the
-KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
-can only be read if that bit is clear.  The argument struct looks like
-this:
-
-/* For KVM_PPC_GET_HTAB_FD */
-struct kvm_get_htab_fd {
-	__u64	flags;
-	__u64	start_index;
-	__u64	reserved[2];
-};
-
-/* Values for kvm_get_htab_fd.flags */
-#define KVM_GET_HTAB_BOLTED_ONLY	((__u64)0x1)
-#define KVM_GET_HTAB_WRITE		((__u64)0x2)
-
-The `start_index' field gives the index in the HPT of the entry at
-which to start reading.  It is ignored when writing.
-
-Reads on the fd will initially supply information about all
-"interesting" HPT entries.  Interesting entries are those with the
-bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
-all entries.  When the end of the HPT is reached, the read() will
-return.  If read() is called again on the fd, it will start again from
-the beginning of the HPT, but will only return HPT entries that have
-changed since they were last read.
-
-Data read or written is structured as a header (8 bytes) followed by a
-series of valid HPT entries (16 bytes) each.  The header indicates how
-many valid HPT entries there are and how many invalid entries follow
-the valid entries.  The invalid entries are not represented explicitly
-in the stream.  The header format is:
-
-struct kvm_get_htab_header {
-	__u32	index;
-	__u16	n_valid;
-	__u16	n_invalid;
-};
-
-Writes to the fd create HPT entries starting at the index given in the
-header; first `n_valid' valid entries with contents from the data
-written, then `n_invalid' invalid entries, invalidating any previously
-valid entries found.
-
-4.79 KVM_CREATE_DEVICE
-
-Capability: KVM_CAP_DEVICE_CTRL
-Type: vm ioctl
-Parameters: struct kvm_create_device (in/out)
-Returns: 0 on success, -1 on error
-Errors:
-  ENODEV: The device type is unknown or unsupported
-  EEXIST: Device already created, and this type of device may not
-          be instantiated multiple times
-
-  Other error conditions may be defined by individual device types or
-  have their standard meanings.
-
-Creates an emulated device in the kernel.  The file descriptor returned
-in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
-
-If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
-device type is supported (not necessarily whether it can be created
-in the current vm).
-
-Individual devices should not define flags.  Attributes should be used
-for specifying any behavior that is not implied by the device type
-number.
-
-struct kvm_create_device {
-	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
-	__u32	fd;	/* out: device handle */
-	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
-};
-
-4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
-
-Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
-  KVM_CAP_VCPU_ATTRIBUTES for vcpu device
-Type: device ioctl, vm ioctl, vcpu ioctl
-Parameters: struct kvm_device_attr
-Returns: 0 on success, -1 on error
-Errors:
-  ENXIO:  The group or attribute is unknown/unsupported for this device
-          or hardware support is missing.
-  EPERM:  The attribute cannot (currently) be accessed this way
-          (e.g. read-only attribute, or attribute that only makes
-          sense when the device is in a different state)
-
-  Other error conditions may be defined by individual device types.
-
-Gets/sets a specified piece of device configuration and/or state.  The
-semantics are device-specific.  See individual device documentation in
-the "devices" directory.  As with ONE_REG, the size of the data
-transferred is defined by the particular attribute.
-
-struct kvm_device_attr {
-	__u32	flags;		/* no flags currently defined */
-	__u32	group;		/* device-defined */
-	__u64	attr;		/* group-defined */
-	__u64	addr;		/* userspace address of attr data */
-};
-
-4.81 KVM_HAS_DEVICE_ATTR
-
-Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
-  KVM_CAP_VCPU_ATTRIBUTES for vcpu device
-Type: device ioctl, vm ioctl, vcpu ioctl
-Parameters: struct kvm_device_attr
-Returns: 0 on success, -1 on error
-Errors:
-  ENXIO:  The group or attribute is unknown/unsupported for this device
-          or hardware support is missing.
-
-Tests whether a device supports a particular attribute.  A successful
-return indicates the attribute is implemented.  It does not necessarily
-indicate that the attribute can be read or written in the device's
-current state.  "addr" is ignored.
-
-4.82 KVM_ARM_VCPU_INIT
-
-Capability: basic
-Architectures: arm, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_vcpu_init (in)
-Returns: 0 on success; -1 on error
-Errors:
-  EINVAL:    the target is unknown, or the combination of features is invalid.
-  ENOENT:    a features bit specified is unknown.
-
-This tells KVM what type of CPU to present to the guest, and what
-optional features it should have.  This will cause a reset of the cpu
-registers to their initial values.  If this is not called, KVM_RUN will
-return ENOEXEC for that vcpu.
-
-Note that because some registers reflect machine topology, all vcpus
-should be created before this ioctl is invoked.
-
-Userspace can call this function multiple times for a given vcpu, including
-after the vcpu has been run. This will reset the vcpu to its initial
-state. All calls to this function after the initial call must use the same
-target and same set of feature flags, otherwise EINVAL will be returned.
-
-Possible features:
-	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-	  Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
-	  and execute guest code when KVM_RUN is called.
-	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
-	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
-	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 (or a future revision
-          backward compatible with v0.2) for the CPU.
-	  Depends on KVM_CAP_ARM_PSCI_0_2.
-	- KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU.
-	  Depends on KVM_CAP_ARM_PMU_V3.
-
-	- KVM_ARM_VCPU_PTRAUTH_ADDRESS: Enables Address Pointer authentication
-	  for arm64 only.
-	  Depends on KVM_CAP_ARM_PTRAUTH_ADDRESS.
-	  If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
-	  both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
-	  KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
-	  requested.
-
-	- KVM_ARM_VCPU_PTRAUTH_GENERIC: Enables Generic Pointer authentication
-	  for arm64 only.
-	  Depends on KVM_CAP_ARM_PTRAUTH_GENERIC.
-	  If KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC are
-	  both present, then both KVM_ARM_VCPU_PTRAUTH_ADDRESS and
-	  KVM_ARM_VCPU_PTRAUTH_GENERIC must be requested or neither must be
-	  requested.
-
-	- KVM_ARM_VCPU_SVE: Enables SVE for the CPU (arm64 only).
-	  Depends on KVM_CAP_ARM_SVE.
-	  Requires KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
-
-	   * After KVM_ARM_VCPU_INIT:
-
-	      - KVM_REG_ARM64_SVE_VLS may be read using KVM_GET_ONE_REG: the
-	        initial value of this pseudo-register indicates the best set of
-	        vector lengths possible for a vcpu on this host.
-
-	   * Before KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
-
-	      - KVM_RUN and KVM_GET_REG_LIST are not available;
-
-	      - KVM_GET_ONE_REG and KVM_SET_ONE_REG cannot be used to access
-	        the scalable archietctural SVE registers
-	        KVM_REG_ARM64_SVE_ZREG(), KVM_REG_ARM64_SVE_PREG() or
-	        KVM_REG_ARM64_SVE_FFR;
-
-	      - KVM_REG_ARM64_SVE_VLS may optionally be written using
-	        KVM_SET_ONE_REG, to modify the set of vector lengths available
-	        for the vcpu.
-
-	   * After KVM_ARM_VCPU_FINALIZE(KVM_ARM_VCPU_SVE):
-
-	      - the KVM_REG_ARM64_SVE_VLS pseudo-register is immutable, and can
-	        no longer be written using KVM_SET_ONE_REG.
-
-4.83 KVM_ARM_PREFERRED_TARGET
-
-Capability: basic
-Architectures: arm, arm64
-Type: vm ioctl
-Parameters: struct struct kvm_vcpu_init (out)
-Returns: 0 on success; -1 on error
-Errors:
-  ENODEV:    no preferred target available for the host
-
-This queries KVM for preferred CPU target type which can be emulated
-by KVM on underlying host.
-
-The ioctl returns struct kvm_vcpu_init instance containing information
-about preferred CPU target type and recommended features for it.  The
-kvm_vcpu_init->features bitmap returned will have feature bits set if
-the preferred target recommends setting these features, but this is
-not mandatory.
-
-The information returned by this ioctl can be used to prepare an instance
-of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
-in VCPU matching underlying host.
-
-
-4.84 KVM_GET_REG_LIST
-
-Capability: basic
-Architectures: arm, arm64, mips
-Type: vcpu ioctl
-Parameters: struct kvm_reg_list (in/out)
-Returns: 0 on success; -1 on error
-Errors:
-  E2BIG:     the reg index list is too big to fit in the array specified by
-             the user (the number required will be written into n).
-
-struct kvm_reg_list {
-	__u64 n; /* number of registers in reg[] */
-	__u64 reg[0];
-};
-
-This ioctl returns the guest registers that are supported for the
-KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-
-
-4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
-
-Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm, arm64
-Type: vm ioctl
-Parameters: struct kvm_arm_device_address (in)
-Returns: 0 on success, -1 on error
-Errors:
-  ENODEV: The device id is unknown
-  ENXIO:  Device not supported on current system
-  EEXIST: Address already set
-  E2BIG:  Address outside guest physical address space
-  EBUSY:  Address overlaps with other device range
-
-struct kvm_arm_device_addr {
-	__u64 id;
-	__u64 addr;
-};
-
-Specify a device address in the guest's physical address space where guests
-can access emulated or directly exposed devices, which the host kernel needs
-to know about. The id field is an architecture specific identifier for a
-specific device.
-
-ARM/arm64 divides the id field into two parts, a device id and an
-address type id specific to the individual device.
-
-  bits:  | 63        ...       32 | 31    ...    16 | 15    ...    0 |
-  field: |        0x00000000      |     device id   |  addr type id  |
-
-ARM/arm64 currently only require this when using the in-kernel GIC
-support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
-as the device id.  When setting the base address for the guest's
-mapping of the VGIC virtual CPU and distributor interface, the ioctl
-must be called after calling KVM_CREATE_IRQCHIP, but before calling
-KVM_RUN on any of the VCPUs.  Calling this ioctl twice for any of the
-base addresses will return -EEXIST.
-
-Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
-should be used instead.
-
-
-4.86 KVM_PPC_RTAS_DEFINE_TOKEN
-
-Capability: KVM_CAP_PPC_RTAS
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_rtas_token_args
-Returns: 0 on success, -1 on error
-
-Defines a token value for a RTAS (Run Time Abstraction Services)
-service in order to allow it to be handled in the kernel.  The
-argument struct gives the name of the service, which must be the name
-of a service that has a kernel-side implementation.  If the token
-value is non-zero, it will be associated with that service, and
-subsequent RTAS calls by the guest specifying that token will be
-handled by the kernel.  If the token value is 0, then any token
-associated with the service will be forgotten, and subsequent RTAS
-calls by the guest for that service will be passed to userspace to be
-handled.
-
-4.87 KVM_SET_GUEST_DEBUG
-
-Capability: KVM_CAP_SET_GUEST_DEBUG
-Architectures: x86, s390, ppc, arm64
-Type: vcpu ioctl
-Parameters: struct kvm_guest_debug (in)
-Returns: 0 on success; -1 on error
-
-struct kvm_guest_debug {
-       __u32 control;
-       __u32 pad;
-       struct kvm_guest_debug_arch arch;
-};
-
-Set up the processor specific debug registers and configure vcpu for
-handling guest debug events. There are two parts to the structure, the
-first a control bitfield indicates the type of debug events to handle
-when running. Common control bits are:
-
-  - KVM_GUESTDBG_ENABLE:        guest debugging is enabled
-  - KVM_GUESTDBG_SINGLESTEP:    the next run should single-step
-
-The top 16 bits of the control field are architecture specific control
-flags which can include the following:
-
-  - KVM_GUESTDBG_USE_SW_BP:     using software breakpoints [x86, arm64]
-  - KVM_GUESTDBG_USE_HW_BP:     using hardware breakpoints [x86, s390, arm64]
-  - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
-  - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
-  - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
-
-For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
-are enabled in memory so we need to ensure breakpoint exceptions are
-correctly trapped and the KVM run loop exits at the breakpoint and not
-running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP
-we need to ensure the guest vCPUs architecture specific registers are
-updated to the correct (supplied) values.
-
-The second part of the structure is architecture specific and
-typically contains a set of debug registers.
-
-For arm64 the number of debug registers is implementation defined and
-can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
-KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
-indicating the number of supported registers.
-
-When debug events exit the main run loop with the reason
-KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
-structure containing architecture specific debug information.
-
-4.88 KVM_GET_EMULATED_CPUID
-
-Capability: KVM_CAP_EXT_EMUL_CPUID
-Architectures: x86
-Type: system ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
-
-struct kvm_cpuid2 {
-	__u32 nent;
-	__u32 flags;
-	struct kvm_cpuid_entry2 entries[0];
-};
-
-The member 'flags' is used for passing flags from userspace.
-
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX		BIT(0)
-#define KVM_CPUID_FLAG_STATEFUL_FUNC		BIT(1)
-#define KVM_CPUID_FLAG_STATE_READ_NEXT		BIT(2)
-
-struct kvm_cpuid_entry2 {
-	__u32 function;
-	__u32 index;
-	__u32 flags;
-	__u32 eax;
-	__u32 ebx;
-	__u32 ecx;
-	__u32 edx;
-	__u32 padding[3];
-};
-
-This ioctl returns x86 cpuid features which are emulated by
-kvm.Userspace can use the information returned by this ioctl to query
-which features are emulated by kvm instead of being present natively.
-
-Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
-structure with the 'nent' field indicating the number of entries in
-the variable-size array 'entries'. If the number of entries is too low
-to describe the cpu capabilities, an error (E2BIG) is returned. If the
-number is too high, the 'nent' field is adjusted and an error (ENOMEM)
-is returned. If the number is just right, the 'nent' field is adjusted
-to the number of valid entries in the 'entries' array, which is then
-filled.
-
-The entries returned are the set CPUID bits of the respective features
-which kvm emulates, as returned by the CPUID instruction, with unknown
-or unsupported feature bits cleared.
-
-Features like x2apic, for example, may not be present in the host cpu
-but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
-emulated efficiently and thus not included here.
-
-The fields in each entry are defined as follows:
-
-  function: the eax value used to obtain the entry
-  index: the ecx value used to obtain the entry (for entries that are
-         affected by ecx)
-  flags: an OR of zero or more of the following:
-        KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
-           if the index field is valid
-        KVM_CPUID_FLAG_STATEFUL_FUNC:
-           if cpuid for this function returns different values for successive
-           invocations; there will be several entries with the same function,
-           all with this flag set
-        KVM_CPUID_FLAG_STATE_READ_NEXT:
-           for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
-           the first entry to be read by a cpu
-   eax, ebx, ecx, edx: the values returned by the cpuid instruction for
-         this function/index combination
-
-4.89 KVM_S390_MEM_OP
-
-Capability: KVM_CAP_S390_MEM_OP
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_mem_op (in)
-Returns: = 0 on success,
-         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
-         > 0 if an exception occurred while walking the page tables
-
-Read or write data from/to the logical (virtual) memory of a VCPU.
-
-Parameters are specified via the following structure:
-
-struct kvm_s390_mem_op {
-	__u64 gaddr;		/* the guest address */
-	__u64 flags;		/* flags */
-	__u32 size;		/* amount of bytes */
-	__u32 op;		/* type of operation */
-	__u64 buf;		/* buffer in userspace */
-	__u8 ar;		/* the access register number */
-	__u8 reserved[31];	/* should be set to 0 */
-};
-
-The type of operation is specified in the "op" field. It is either
-KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
-KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
-KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
-whether the corresponding memory access would create an access exception
-(without touching the data in the memory at the destination). In case an
-access exception occurred while walking the MMU tables of the guest, the
-ioctl returns a positive error number to indicate the type of exception.
-This exception is also raised directly at the corresponding VCPU if the
-flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
-
-The start address of the memory region has to be specified in the "gaddr"
-field, and the length of the region in the "size" field. "buf" is the buffer
-supplied by the userspace application where the read data should be written
-to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
-is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
-when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
-register number to be used.
-
-The "reserved" field is meant for future extensions. It is not used by
-KVM with the currently defined set of flags.
-
-4.90 KVM_S390_GET_SKEYS
-
-Capability: KVM_CAP_S390_SKEYS
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_skeys
-Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
-         keys, negative value on error
-
-This ioctl is used to get guest storage key values on the s390
-architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
-
-struct kvm_s390_skeys {
-	__u64 start_gfn;
-	__u64 count;
-	__u64 skeydata_addr;
-	__u32 flags;
-	__u32 reserved[9];
-};
-
-The start_gfn field is the number of the first guest frame whose storage keys
-you want to get.
-
-The count field is the number of consecutive frames (starting from start_gfn)
-whose storage keys to get. The count field must be at least 1 and the maximum
-allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
-will cause the ioctl to return -EINVAL.
-
-The skeydata_addr field is the address to a buffer large enough to hold count
-bytes. This buffer will be filled with storage key data by the ioctl.
-
-4.91 KVM_S390_SET_SKEYS
-
-Capability: KVM_CAP_S390_SKEYS
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_skeys
-Returns: 0 on success, negative value on error
-
-This ioctl is used to set guest storage key values on the s390
-architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
-See section on KVM_S390_GET_SKEYS for struct definition.
-
-The start_gfn field is the number of the first guest frame whose storage keys
-you want to set.
-
-The count field is the number of consecutive frames (starting from start_gfn)
-whose storage keys to get. The count field must be at least 1 and the maximum
-allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
-will cause the ioctl to return -EINVAL.
-
-The skeydata_addr field is the address to a buffer containing count bytes of
-storage keys. Each byte in the buffer will be set as the storage key for a
-single frame starting at start_gfn for count frames.
-
-Note: If any architecturally invalid key value is found in the given data then
-the ioctl will return -EINVAL.
-
-4.92 KVM_S390_IRQ
-
-Capability: KVM_CAP_S390_INJECT_IRQ
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq (in)
-Returns: 0 on success, -1 on error
-Errors:
-  EINVAL: interrupt type is invalid
-          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
-          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
-            than the maximum of VCPUs
-  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
-          type is KVM_S390_SIGP_STOP and a stop irq is already pending
-          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
-            is already pending
-
-Allows to inject an interrupt to the guest.
-
-Using struct kvm_s390_irq as a parameter allows
-to inject additional payload which is not
-possible via KVM_S390_INTERRUPT.
-
-Interrupt parameters are passed via kvm_s390_irq:
-
-struct kvm_s390_irq {
-	__u64 type;
-	union {
-		struct kvm_s390_io_info io;
-		struct kvm_s390_ext_info ext;
-		struct kvm_s390_pgm_info pgm;
-		struct kvm_s390_emerg_info emerg;
-		struct kvm_s390_extcall_info extcall;
-		struct kvm_s390_prefix_info prefix;
-		struct kvm_s390_stop_info stop;
-		struct kvm_s390_mchk_info mchk;
-		char reserved[64];
-	} u;
-};
-
-type can be one of the following:
-
-KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
-KVM_S390_PROGRAM_INT - program check; parameters in .pgm
-KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
-KVM_S390_RESTART - restart; no parameters
-KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
-KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
-KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
-KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
-KVM_S390_MCHK - machine check interrupt; parameters in .mchk
-
-This is an asynchronous vcpu ioctl and can be invoked from any thread.
-
-4.94 KVM_S390_GET_IRQ_STATE
-
-Capability: KVM_CAP_S390_IRQ_STATE
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq_state (out)
-Returns: >= number of bytes copied into buffer,
-         -EINVAL if buffer size is 0,
-         -ENOBUFS if buffer size is too small to fit all pending interrupts,
-         -EFAULT if the buffer address was invalid
-
-This ioctl allows userspace to retrieve the complete state of all currently
-pending interrupts in a single buffer. Use cases include migration
-and introspection. The parameter structure contains the address of a
-userspace buffer and its length:
-
-struct kvm_s390_irq_state {
-	__u64 buf;
-	__u32 flags;        /* will stay unused for compatibility reasons */
-	__u32 len;
-	__u32 reserved[4];  /* will stay unused for compatibility reasons */
-};
-
-Userspace passes in the above struct and for each pending interrupt a
-struct kvm_s390_irq is copied to the provided buffer.
-
-The structure contains a flags and a reserved field for future extensions. As
-the kernel never checked for flags == 0 and QEMU never pre-zeroed flags and
-reserved, these fields can not be used in the future without breaking
-compatibility.
-
-If -ENOBUFS is returned the buffer provided was too small and userspace
-may retry with a bigger buffer.
-
-4.95 KVM_S390_SET_IRQ_STATE
-
-Capability: KVM_CAP_S390_IRQ_STATE
-Architectures: s390
-Type: vcpu ioctl
-Parameters: struct kvm_s390_irq_state (in)
-Returns: 0 on success,
-         -EFAULT if the buffer address was invalid,
-         -EINVAL for an invalid buffer length (see below),
-         -EBUSY if there were already interrupts pending,
-         errors occurring when actually injecting the
-          interrupt. See KVM_S390_IRQ.
-
-This ioctl allows userspace to set the complete state of all cpu-local
-interrupts currently pending for the vcpu. It is intended for restoring
-interrupt state after a migration. The input parameter is a userspace buffer
-containing a struct kvm_s390_irq_state:
-
-struct kvm_s390_irq_state {
-	__u64 buf;
-	__u32 flags;        /* will stay unused for compatibility reasons */
-	__u32 len;
-	__u32 reserved[4];  /* will stay unused for compatibility reasons */
-};
-
-The restrictions for flags and reserved apply as well.
-(see KVM_S390_GET_IRQ_STATE)
-
-The userspace memory referenced by buf contains a struct kvm_s390_irq
-for each interrupt to be injected into the guest.
-If one of the interrupts could not be injected for some reason the
-ioctl aborts.
-
-len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
-and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
-which is the maximum number of possibly pending cpu-local interrupts.
-
-4.96 KVM_SMI
-
-Capability: KVM_CAP_X86_SMM
-Architectures: x86
-Type: vcpu ioctl
-Parameters: none
-Returns: 0 on success, -1 on error
-
-Queues an SMI on the thread's vcpu.
-
-4.97 KVM_CAP_PPC_MULTITCE
-
-Capability: KVM_CAP_PPC_MULTITCE
-Architectures: ppc
-Type: vm
-
-This capability means the kernel is capable of handling hypercalls
-H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
-space. This significantly accelerates DMA operations for PPC KVM guests.
-User space should expect that its handlers for these hypercalls
-are not going to be called if user space previously registered LIOBN
-in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
-
-In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
-user space might have to advertise it for the guest. For example,
-IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
-present in the "ibm,hypertas-functions" device-tree property.
-
-The hypercalls mentioned above may or may not be processed successfully
-in the kernel based fast path. If they can not be handled by the kernel,
-they will get passed on to user space. So user space still has to have
-an implementation for these despite the in kernel acceleration.
-
-This capability is always enabled.
-
-4.98 KVM_CREATE_SPAPR_TCE_64
-
-Capability: KVM_CAP_SPAPR_TCE_64
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_create_spapr_tce_64 (in)
-Returns: file descriptor for manipulating the created TCE table
-
-This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit
-windows, described in 4.62 KVM_CREATE_SPAPR_TCE
-
-This capability uses extended struct in ioctl interface:
-
-/* for KVM_CAP_SPAPR_TCE_64 */
-struct kvm_create_spapr_tce_64 {
-	__u64 liobn;
-	__u32 page_shift;
-	__u32 flags;
-	__u64 offset;	/* in pages */
-	__u64 size; 	/* in pages */
-};
-
-The aim of extension is to support an additional bigger DMA window with
-a variable page size.
-KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and
-a bus offset of the corresponding DMA window, @size and @offset are numbers
-of IOMMU pages.
-
-@flags are not used at the moment.
-
-The rest of functionality is identical to KVM_CREATE_SPAPR_TCE.
-
-4.99 KVM_REINJECT_CONTROL
-
-Capability: KVM_CAP_REINJECT_CONTROL
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_reinject_control (in)
-Returns: 0 on success,
-         -EFAULT if struct kvm_reinject_control cannot be read,
-         -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier.
-
-i8254 (PIT) has two modes, reinject and !reinject.  The default is reinject,
-where KVM queues elapsed i8254 ticks and monitors completion of interrupt from
-vector(s) that i8254 injects.  Reinject mode dequeues a tick and injects its
-interrupt whenever there isn't a pending interrupt from i8254.
-!reinject mode injects an interrupt as soon as a tick arrives.
-
-struct kvm_reinject_control {
-	__u8 pit_reinject;
-	__u8 reserved[31];
-};
-
-pit_reinject = 0 (!reinject mode) is recommended, unless running an old
-operating system that uses the PIT for timing (e.g. Linux 2.4.x).
-
-4.100 KVM_PPC_CONFIGURE_V3_MMU
-
-Capability: KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_mmuv3_cfg (in)
-Returns: 0 on success,
-         -EFAULT if struct kvm_ppc_mmuv3_cfg cannot be read,
-         -EINVAL if the configuration is invalid
-
-This ioctl controls whether the guest will use radix or HPT (hashed
-page table) translation, and sets the pointer to the process table for
-the guest.
-
-struct kvm_ppc_mmuv3_cfg {
-	__u64	flags;
-	__u64	process_table;
-};
-
-There are two bits that can be set in flags; KVM_PPC_MMUV3_RADIX and
-KVM_PPC_MMUV3_GTSE.  KVM_PPC_MMUV3_RADIX, if set, configures the guest
-to use radix tree translation, and if clear, to use HPT translation.
-KVM_PPC_MMUV3_GTSE, if set and if KVM permits it, configures the guest
-to be able to use the global TLB and SLB invalidation instructions;
-if clear, the guest may not use these instructions.
-
-The process_table field specifies the address and size of the guest
-process table, which is in the guest's space.  This field is formatted
-as the second doubleword of the partition table entry, as defined in
-the Power ISA V3.00, Book III section 5.7.6.1.
-
-4.101 KVM_PPC_GET_RMMU_INFO
-
-Capability: KVM_CAP_PPC_RADIX_MMU
-Architectures: ppc
-Type: vm ioctl
-Parameters: struct kvm_ppc_rmmu_info (out)
-Returns: 0 on success,
-	 -EFAULT if struct kvm_ppc_rmmu_info cannot be written,
-	 -EINVAL if no useful information can be returned
-
-This ioctl returns a structure containing two things: (a) a list
-containing supported radix tree geometries, and (b) a list that maps
-page sizes to put in the "AP" (actual page size) field for the tlbie
-(TLB invalidate entry) instruction.
-
-struct kvm_ppc_rmmu_info {
-	struct kvm_ppc_radix_geom {
-		__u8	page_shift;
-		__u8	level_bits[4];
-		__u8	pad[3];
-	}	geometries[8];
-	__u32	ap_encodings[8];
-};
-
-The geometries[] field gives up to 8 supported geometries for the
-radix page table, in terms of the log base 2 of the smallest page
-size, and the number of bits indexed at each level of the tree, from
-the PTE level up to the PGD level in that order.  Any unused entries
-will have 0 in the page_shift field.
-
-The ap_encodings gives the supported page sizes and their AP field
-encodings, encoded with the AP value in the top 3 bits and the log
-base 2 of the page size in the bottom 6 bits.
-
-4.102 KVM_PPC_RESIZE_HPT_PREPARE
-
-Capability: KVM_CAP_SPAPR_RESIZE_HPT
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_resize_hpt (in)
-Returns: 0 on successful completion,
-	 >0 if a new HPT is being prepared, the value is an estimated
-             number of milliseconds until preparation is complete
-         -EFAULT if struct kvm_reinject_control cannot be read,
-	 -EINVAL if the supplied shift or flags are invalid
-	 -ENOMEM if unable to allocate the new HPT
-	 -ENOSPC if there was a hash collision when moving existing
-                  HPT entries to the new HPT
-	 -EIO on other error conditions
-
-Used to implement the PAPR extension for runtime resizing of a guest's
-Hashed Page Table (HPT).  Specifically this starts, stops or monitors
-the preparation of a new potential HPT for the guest, essentially
-implementing the H_RESIZE_HPT_PREPARE hypercall.
-
-If called with shift > 0 when there is no pending HPT for the guest,
-this begins preparation of a new pending HPT of size 2^(shift) bytes.
-It then returns a positive integer with the estimated number of
-milliseconds until preparation is complete.
-
-If called when there is a pending HPT whose size does not match that
-requested in the parameters, discards the existing pending HPT and
-creates a new one as above.
-
-If called when there is a pending HPT of the size requested, will:
-  * If preparation of the pending HPT is already complete, return 0
-  * If preparation of the pending HPT has failed, return an error
-    code, then discard the pending HPT.
-  * If preparation of the pending HPT is still in progress, return an
-    estimated number of milliseconds until preparation is complete.
-
-If called with shift == 0, discards any currently pending HPT and
-returns 0 (i.e. cancels any in-progress preparation).
-
-flags is reserved for future expansion, currently setting any bits in
-flags will result in an -EINVAL.
-
-Normally this will be called repeatedly with the same parameters until
-it returns <= 0.  The first call will initiate preparation, subsequent
-ones will monitor preparation until it completes or fails.
-
-struct kvm_ppc_resize_hpt {
-	__u64 flags;
-	__u32 shift;
-	__u32 pad;
-};
-
-4.103 KVM_PPC_RESIZE_HPT_COMMIT
-
-Capability: KVM_CAP_SPAPR_RESIZE_HPT
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_resize_hpt (in)
-Returns: 0 on successful completion,
-         -EFAULT if struct kvm_reinject_control cannot be read,
-	 -EINVAL if the supplied shift or flags are invalid
-	 -ENXIO is there is no pending HPT, or the pending HPT doesn't
-                 have the requested size
-	 -EBUSY if the pending HPT is not fully prepared
-	 -ENOSPC if there was a hash collision when moving existing
-                  HPT entries to the new HPT
-	 -EIO on other error conditions
-
-Used to implement the PAPR extension for runtime resizing of a guest's
-Hashed Page Table (HPT).  Specifically this requests that the guest be
-transferred to working with the new HPT, essentially implementing the
-H_RESIZE_HPT_COMMIT hypercall.
-
-This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
-returned 0 with the same parameters.  In other cases
-KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
--EBUSY, though others may be possible if the preparation was started,
-but failed).
-
-This will have undefined effects on the guest if it has not already
-placed itself in a quiescent state where no vcpu will make MMU enabled
-memory accesses.
-
-On succsful completion, the pending HPT will become the guest's active
-HPT and the previous HPT will be discarded.
-
-On failure, the guest will still be operating on its previous HPT.
-
-struct kvm_ppc_resize_hpt {
-	__u64 flags;
-	__u32 shift;
-	__u32 pad;
-};
-
-4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
-
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: system ioctl
-Parameters: u64 mce_cap (out)
-Returns: 0 on success, -1 on error
-
-Returns supported MCE capabilities. The u64 mce_cap parameter
-has the same format as the MSR_IA32_MCG_CAP register. Supported
-capabilities will have the corresponding bits set.
-
-4.105 KVM_X86_SETUP_MCE
-
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: u64 mcg_cap (in)
-Returns: 0 on success,
-         -EFAULT if u64 mcg_cap cannot be read,
-         -EINVAL if the requested number of banks is invalid,
-         -EINVAL if requested MCE capability is not supported.
-
-Initializes MCE support for use. The u64 mcg_cap parameter
-has the same format as the MSR_IA32_MCG_CAP register and
-specifies which capabilities should be enabled. The maximum
-supported number of error-reporting banks can be retrieved when
-checking for KVM_CAP_MCE. The supported capabilities can be
-retrieved with KVM_X86_GET_MCE_CAP_SUPPORTED.
-
-4.106 KVM_X86_SET_MCE
-
-Capability: KVM_CAP_MCE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_x86_mce (in)
-Returns: 0 on success,
-         -EFAULT if struct kvm_x86_mce cannot be read,
-         -EINVAL if the bank number is invalid,
-         -EINVAL if VAL bit is not set in status field.
-
-Inject a machine check error (MCE) into the guest. The input
-parameter is:
-
-struct kvm_x86_mce {
-	__u64 status;
-	__u64 addr;
-	__u64 misc;
-	__u64 mcg_status;
-	__u8 bank;
-	__u8 pad1[7];
-	__u64 pad2[3];
-};
-
-If the MCE being reported is an uncorrected error, KVM will
-inject it as an MCE exception into the guest. If the guest
-MCG_STATUS register reports that an MCE is in progress, KVM
-causes an KVM_EXIT_SHUTDOWN vmexit.
-
-Otherwise, if the MCE is a corrected error, KVM will just
-store it in the corresponding bank (provided this bank is
-not holding a previously reported uncorrected error).
-
-4.107 KVM_S390_GET_CMMA_BITS
-
-Capability: KVM_CAP_S390_CMMA_MIGRATION
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_cmma_log (in, out)
-Returns: 0 on success, a negative value on error
-
-This ioctl is used to get the values of the CMMA bits on the s390
-architecture. It is meant to be used in two scenarios:
-- During live migration to save the CMMA values. Live migration needs
-  to be enabled via the KVM_REQ_START_MIGRATION VM property.
-- To non-destructively peek at the CMMA values, with the flag
-  KVM_S390_CMMA_PEEK set.
-
-The ioctl takes parameters via the kvm_s390_cmma_log struct. The desired
-values are written to a buffer whose location is indicated via the "values"
-member in the kvm_s390_cmma_log struct.  The values in the input struct are
-also updated as needed.
-Each CMMA value takes up one byte.
-
-struct kvm_s390_cmma_log {
-	__u64 start_gfn;
-	__u32 count;
-	__u32 flags;
-	union {
-		__u64 remaining;
-		__u64 mask;
-	};
-	__u64 values;
-};
-
-start_gfn is the number of the first guest frame whose CMMA values are
-to be retrieved,
-
-count is the length of the buffer in bytes,
-
-values points to the buffer where the result will be written to.
-
-If count is greater than KVM_S390_SKEYS_MAX, then it is considered to be
-KVM_S390_SKEYS_MAX. KVM_S390_SKEYS_MAX is re-used for consistency with
-other ioctls.
-
-The result is written in the buffer pointed to by the field values, and
-the values of the input parameter are updated as follows.
-
-Depending on the flags, different actions are performed. The only
-supported flag so far is KVM_S390_CMMA_PEEK.
-
-The default behaviour if KVM_S390_CMMA_PEEK is not set is:
-start_gfn will indicate the first page frame whose CMMA bits were dirty.
-It is not necessarily the same as the one passed as input, as clean pages
-are skipped.
-
-count will indicate the number of bytes actually written in the buffer.
-It can (and very often will) be smaller than the input value, since the
-buffer is only filled until 16 bytes of clean values are found (which
-are then not copied in the buffer). Since a CMMA migration block needs
-the base address and the length, for a total of 16 bytes, we will send
-back some clean data if there is some dirty data afterwards, as long as
-the size of the clean data does not exceed the size of the header. This
-allows to minimize the amount of data to be saved or transferred over
-the network at the expense of more roundtrips to userspace. The next
-invocation of the ioctl will skip over all the clean values, saving
-potentially more than just the 16 bytes we found.
-
-If KVM_S390_CMMA_PEEK is set:
-the existing storage attributes are read even when not in migration
-mode, and no other action is performed;
-
-the output start_gfn will be equal to the input start_gfn,
-
-the output count will be equal to the input count, except if the end of
-memory has been reached.
-
-In both cases:
-the field "remaining" will indicate the total number of dirty CMMA values
-still remaining, or 0 if KVM_S390_CMMA_PEEK is set and migration mode is
-not enabled.
-
-mask is unused.
-
-values points to the userspace buffer where the result will be stored.
-
-This ioctl can fail with -ENOMEM if not enough memory can be allocated to
-complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
-KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
--EFAULT if the userspace address is invalid or if no page table is
-present for the addresses (e.g. when using hugepages).
-
-4.108 KVM_S390_SET_CMMA_BITS
-
-Capability: KVM_CAP_S390_CMMA_MIGRATION
-Architectures: s390
-Type: vm ioctl
-Parameters: struct kvm_s390_cmma_log (in)
-Returns: 0 on success, a negative value on error
-
-This ioctl is used to set the values of the CMMA bits on the s390
-architecture. It is meant to be used during live migration to restore
-the CMMA values, but there are no restrictions on its use.
-The ioctl takes parameters via the kvm_s390_cmma_values struct.
-Each CMMA value takes up one byte.
-
-struct kvm_s390_cmma_log {
-	__u64 start_gfn;
-	__u32 count;
-	__u32 flags;
-	union {
-		__u64 remaining;
-		__u64 mask;
-	};
-	__u64 values;
-};
-
-start_gfn indicates the starting guest frame number,
-
-count indicates how many values are to be considered in the buffer,
-
-flags is not used and must be 0.
-
-mask indicates which PGSTE bits are to be considered.
-
-remaining is not used.
-
-values points to the buffer in userspace where to store the values.
-
-This ioctl can fail with -ENOMEM if not enough memory can be allocated to
-complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
-the count field is too large (e.g. more than KVM_S390_CMMA_SIZE_MAX) or
-if the flags field was not 0, with -EFAULT if the userspace address is
-invalid, if invalid pages are written to (e.g. after the end of memory)
-or if no page table is present for the addresses (e.g. when using
-hugepages).
-
-4.109 KVM_PPC_GET_CPU_CHAR
-
-Capability: KVM_CAP_PPC_GET_CPU_CHAR
-Architectures: powerpc
-Type: vm ioctl
-Parameters: struct kvm_ppc_cpu_char (out)
-Returns: 0 on successful completion
-	 -EFAULT if struct kvm_ppc_cpu_char cannot be written
-
-This ioctl gives userspace information about certain characteristics
-of the CPU relating to speculative execution of instructions and
-possible information leakage resulting from speculative execution (see
-CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754).  The information is
-returned in struct kvm_ppc_cpu_char, which looks like this:
-
-struct kvm_ppc_cpu_char {
-	__u64	character;		/* characteristics of the CPU */
-	__u64	behaviour;		/* recommended software behaviour */
-	__u64	character_mask;		/* valid bits in character */
-	__u64	behaviour_mask;		/* valid bits in behaviour */
-};
-
-For extensibility, the character_mask and behaviour_mask fields
-indicate which bits of character and behaviour have been filled in by
-the kernel.  If the set of defined bits is extended in future then
-userspace will be able to tell whether it is running on a kernel that
-knows about the new bits.
-
-The character field describes attributes of the CPU which can help
-with preventing inadvertent information disclosure - specifically,
-whether there is an instruction to flash-invalidate the L1 data cache
-(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
-to a mode where entries can only be used by the thread that created
-them, whether the bcctr[l] instruction prevents speculation, and
-whether a speculation barrier instruction (ori 31,31,0) is provided.
-
-The behaviour field describes actions that software should take to
-prevent inadvertent information disclosure, and thus describes which
-vulnerabilities the hardware is subject to; specifically whether the
-L1 data cache should be flushed when returning to user mode from the
-kernel, and whether a speculation barrier should be placed between an
-array bounds check and the array access.
-
-These fields use the same bit definitions as the new
-H_GET_CPU_CHARACTERISTICS hypercall.
-
-4.110 KVM_MEMORY_ENCRYPT_OP
-
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: an opaque platform specific structure (in/out)
-Returns: 0 on success; -1 on error
-
-If the platform supports creating encrypted VMs then this ioctl can be used
-for issuing platform-specific memory encryption commands to manage those
-encrypted VMs.
-
-Currently, this ioctl is used for issuing Secure Encrypted Virtualization
-(SEV) commands on AMD Processors. The SEV commands are defined in
-Documentation/virtual/kvm/amd-memory-encryption.rst.
-
-4.111 KVM_MEMORY_ENCRYPT_REG_REGION
-
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: struct kvm_enc_region (in)
-Returns: 0 on success; -1 on error
-
-This ioctl can be used to register a guest memory region which may
-contain encrypted data (e.g. guest RAM, SMRAM etc).
-
-It is used in the SEV-enabled guest. When encryption is enabled, a guest
-memory region may contain encrypted data. The SEV memory encryption
-engine uses a tweak such that two identical plaintext pages, each at
-different locations will have differing ciphertexts. So swapping or
-moving ciphertext of those pages will not result in plaintext being
-swapped. So relocating (or migrating) physical backing pages for the SEV
-guest will require some additional steps.
-
-Note: The current SEV key management spec does not provide commands to
-swap or migrate (move) ciphertext pages. Hence, for now we pin the guest
-memory region registered with the ioctl.
-
-4.112 KVM_MEMORY_ENCRYPT_UNREG_REGION
-
-Capability: basic
-Architectures: x86
-Type: system
-Parameters: struct kvm_enc_region (in)
-Returns: 0 on success; -1 on error
-
-This ioctl can be used to unregister the guest memory region registered
-with KVM_MEMORY_ENCRYPT_REG_REGION ioctl above.
-
-4.113 KVM_HYPERV_EVENTFD
-
-Capability: KVM_CAP_HYPERV_EVENTFD
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_hyperv_eventfd (in)
-
-This ioctl (un)registers an eventfd to receive notifications from the guest on
-the specified Hyper-V connection id through the SIGNAL_EVENT hypercall, without
-causing a user exit.  SIGNAL_EVENT hypercall with non-zero event flag number
-(bits 24-31) still triggers a KVM_EXIT_HYPERV_HCALL user exit.
-
-struct kvm_hyperv_eventfd {
-	__u32 conn_id;
-	__s32 fd;
-	__u32 flags;
-	__u32 padding[3];
-};
-
-The conn_id field should fit within 24 bits:
-
-#define KVM_HYPERV_CONN_ID_MASK		0x00ffffff
-
-The acceptable values for the flags field are:
-
-#define KVM_HYPERV_EVENTFD_DEASSIGN	(1 << 0)
-
-Returns: 0 on success,
-	-EINVAL if conn_id or flags is outside the allowed range
-	-ENOENT on deassign if the conn_id isn't registered
-	-EEXIST on assign if the conn_id is already registered
-
-4.114 KVM_GET_NESTED_STATE
-
-Capability: KVM_CAP_NESTED_STATE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_nested_state (in/out)
-Returns: 0 on success, -1 on error
-Errors:
-  E2BIG:     the total state size exceeds the value of 'size' specified by
-             the user; the size required will be written into size.
-
-struct kvm_nested_state {
-	__u16 flags;
-	__u16 format;
-	__u32 size;
-
-	union {
-		struct kvm_vmx_nested_state_hdr vmx;
-		struct kvm_svm_nested_state_hdr svm;
-
-		/* Pad the header to 128 bytes.  */
-		__u8 pad[120];
-	} hdr;
-
-	union {
-		struct kvm_vmx_nested_state_data vmx[0];
-		struct kvm_svm_nested_state_data svm[0];
-	} data;
-};
-
-#define KVM_STATE_NESTED_GUEST_MODE	0x00000001
-#define KVM_STATE_NESTED_RUN_PENDING	0x00000002
-#define KVM_STATE_NESTED_EVMCS		0x00000004
-
-#define KVM_STATE_NESTED_FORMAT_VMX		0
-#define KVM_STATE_NESTED_FORMAT_SVM		1
-
-#define KVM_STATE_NESTED_VMX_VMCS_SIZE		0x1000
-
-#define KVM_STATE_NESTED_VMX_SMM_GUEST_MODE	0x00000001
-#define KVM_STATE_NESTED_VMX_SMM_VMXON		0x00000002
-
-struct kvm_vmx_nested_state_hdr {
-	__u64 vmxon_pa;
-	__u64 vmcs12_pa;
-
-	struct {
-		__u16 flags;
-	} smm;
-};
-
-struct kvm_vmx_nested_state_data {
-	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
-	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
-};
-
-This ioctl copies the vcpu's nested virtualization state from the kernel to
-userspace.
-
-The maximum size of the state can be retrieved by passing KVM_CAP_NESTED_STATE
-to the KVM_CHECK_EXTENSION ioctl().
-
-4.115 KVM_SET_NESTED_STATE
-
-Capability: KVM_CAP_NESTED_STATE
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_nested_state (in)
-Returns: 0 on success, -1 on error
-
-This copies the vcpu's kvm_nested_state struct from userspace to the kernel.
-For the definition of struct kvm_nested_state, see KVM_GET_NESTED_STATE.
-
-4.116 KVM_(UN)REGISTER_COALESCED_MMIO
-
-Capability: KVM_CAP_COALESCED_MMIO (for coalesced mmio)
-	    KVM_CAP_COALESCED_PIO (for coalesced pio)
-Architectures: all
-Type: vm ioctl
-Parameters: struct kvm_coalesced_mmio_zone
-Returns: 0 on success, < 0 on error
-
-Coalesced I/O is a performance optimization that defers hardware
-register write emulation so that userspace exits are avoided.  It is
-typically used to reduce the overhead of emulating frequently accessed
-hardware registers.
-
-When a hardware register is configured for coalesced I/O, write accesses
-do not exit to userspace and their value is recorded in a ring buffer
-that is shared between kernel and userspace.
-
-Coalesced I/O is used if one or more write accesses to a hardware
-register can be deferred until a read or a write to another hardware
-register on the same device.  This last access will cause a vmexit and
-userspace will process accesses from the ring buffer before emulating
-it. That will avoid exiting to userspace on repeated writes.
-
-Coalesced pio is based on coalesced mmio. There is little difference
-between coalesced mmio and pio except that coalesced pio records accesses
-to I/O ports.
-
-4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
-
-Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-Architectures: x86, arm, arm64, mips
-Type: vm ioctl
-Parameters: struct kvm_dirty_log (in)
-Returns: 0 on success, -1 on error
-
-/* for KVM_CLEAR_DIRTY_LOG */
-struct kvm_clear_dirty_log {
-	__u32 slot;
-	__u32 num_pages;
-	__u64 first_page;
-	union {
-		void __user *dirty_bitmap; /* one bit per page */
-		__u64 padding;
-	};
-};
-
-The ioctl clears the dirty status of pages in a memory slot, according to
-the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
-field.  Bit 0 of the bitmap corresponds to page "first_page" in the
-memory slot, and num_pages is the size in bits of the input bitmap.
-first_page must be a multiple of 64; num_pages must also be a multiple of
-64 unless first_page + num_pages is the size of the memory slot.  For each
-bit that is set in the input bitmap, the corresponding page is marked "clean"
-in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
-(for example via write-protection, or by clearing the dirty bit in
-a page table entry).
-
-If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
-the address space for which you want to return the dirty bitmap.
-They must be less than the value that KVM_CHECK_EXTENSION returns for
-the KVM_CAP_MULTI_ADDRESS_SPACE capability.
-
-This ioctl is mostly useful when KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-is enabled; for more information, see the description of the capability.
-However, it can always be used as long as KVM_CHECK_EXTENSION confirms
-that KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 is present.
-
-4.118 KVM_GET_SUPPORTED_HV_CPUID
-
-Capability: KVM_CAP_HYPERV_CPUID
-Architectures: x86
-Type: vcpu ioctl
-Parameters: struct kvm_cpuid2 (in/out)
-Returns: 0 on success, -1 on error
-
-struct kvm_cpuid2 {
-	__u32 nent;
-	__u32 padding;
-	struct kvm_cpuid_entry2 entries[0];
-};
-
-struct kvm_cpuid_entry2 {
-	__u32 function;
-	__u32 index;
-	__u32 flags;
-	__u32 eax;
-	__u32 ebx;
-	__u32 ecx;
-	__u32 edx;
-	__u32 padding[3];
-};
-
-This ioctl returns x86 cpuid features leaves related to Hyper-V emulation in
-KVM.  Userspace can use the information returned by this ioctl to construct
-cpuid information presented to guests consuming Hyper-V enlightenments (e.g.
-Windows or Hyper-V guests).
-
-CPUID feature leaves returned by this ioctl are defined by Hyper-V Top Level
-Functional Specification (TLFS). These leaves can't be obtained with
-KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
-leaves (0x40000000, 0x40000001).
-
-Currently, the following list of CPUID leaves are returned:
- HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
- HYPERV_CPUID_INTERFACE
- HYPERV_CPUID_VERSION
- HYPERV_CPUID_FEATURES
- HYPERV_CPUID_ENLIGHTMENT_INFO
- HYPERV_CPUID_IMPLEMENT_LIMITS
- HYPERV_CPUID_NESTED_FEATURES
-
-HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
-enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
-
-Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
-with the 'nent' field indicating the number of entries in the variable-size
-array 'entries'.  If the number of entries is too low to describe all Hyper-V
-feature leaves, an error (E2BIG) is returned. If the number is more or equal
-to the number of Hyper-V feature leaves, the 'nent' field is adjusted to the
-number of valid entries in the 'entries' array, which is then filled.
-
-'index' and 'flags' fields in 'struct kvm_cpuid_entry2' are currently reserved,
-userspace should not expect to get any particular value there.
-
-4.119 KVM_ARM_VCPU_FINALIZE
-
-Architectures: arm, arm64
-Type: vcpu ioctl
-Parameters: int feature (in)
-Returns: 0 on success, -1 on error
-Errors:
-  EPERM:     feature not enabled, needs configuration, or already finalized
-  EINVAL:    feature unknown or not present
-
-Recognised values for feature:
-  arm64      KVM_ARM_VCPU_SVE (requires KVM_CAP_ARM_SVE)
-
-Finalizes the configuration of the specified vcpu feature.
-
-The vcpu must already have been initialised, enabling the affected feature, by
-means of a successful KVM_ARM_VCPU_INIT call with the appropriate flag set in
-features[].
-
-For affected vcpu features, this is a mandatory step that must be performed
-before the vcpu is fully usable.
-
-Between KVM_ARM_VCPU_INIT and KVM_ARM_VCPU_FINALIZE, the feature may be
-configured by use of ioctls such as KVM_SET_ONE_REG.  The exact configuration
-that should be performaned and how to do it are feature-dependent.
-
-Other calls that depend on a particular feature being finalized, such as
-KVM_RUN, KVM_GET_REG_LIST, KVM_GET_ONE_REG and KVM_SET_ONE_REG, will fail with
--EPERM unless the feature has already been finalized by means of a
-KVM_ARM_VCPU_FINALIZE call.
-
-See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
-using this ioctl.
-
-4.120 KVM_SET_PMU_EVENT_FILTER
-
-Capability: KVM_CAP_PMU_EVENT_FILTER
-Architectures: x86
-Type: vm ioctl
-Parameters: struct kvm_pmu_event_filter (in)
-Returns: 0 on success, -1 on error
-
-struct kvm_pmu_event_filter {
-	__u32 action;
-	__u32 nevents;
-	__u32 fixed_counter_bitmap;
-	__u32 flags;
-	__u32 pad[4];
-	__u64 events[0];
-};
-
-This ioctl restricts the set of PMU events that the guest can program.
-The argument holds a list of events which will be allowed or denied.
-The eventsel+umask of each event the guest attempts to program is compared
-against the events field to determine whether the guest should have access.
-The events field only controls general purpose counters; fixed purpose
-counters are controlled by the fixed_counter_bitmap.
-
-No flags are defined yet, the field must be zero.
-
-Valid values for 'action':
-#define KVM_PMU_EVENT_ALLOW 0
-#define KVM_PMU_EVENT_DENY 1
-
-
-5. The kvm_run structure
-------------------------
-
-Application code obtains a pointer to the kvm_run structure by
-mmap()ing a vcpu fd.  From that point, application code can control
-execution by changing fields in kvm_run prior to calling the KVM_RUN
-ioctl, and obtain information about the reason KVM_RUN returned by
-looking up structure members.
-
-struct kvm_run {
-	/* in */
-	__u8 request_interrupt_window;
-
-Request that KVM_RUN return when it becomes possible to inject external
-interrupts into the guest.  Useful in conjunction with KVM_INTERRUPT.
-
-	__u8 immediate_exit;
-
-This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN
-exits immediately, returning -EINTR.  In the common scenario where a
-signal is used to "kick" a VCPU out of KVM_RUN, this field can be used
-to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability.
-Rather than blocking the signal outside KVM_RUN, userspace can set up
-a signal handler that sets run->immediate_exit to a non-zero value.
-
-This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available.
-
-	__u8 padding1[6];
-
-	/* out */
-	__u32 exit_reason;
-
-When KVM_RUN has returned successfully (return value 0), this informs
-application code why KVM_RUN has returned.  Allowable values for this
-field are detailed below.
-
-	__u8 ready_for_interrupt_injection;
-
-If request_interrupt_window has been specified, this field indicates
-an interrupt can be injected now with KVM_INTERRUPT.
-
-	__u8 if_flag;
-
-The value of the current interrupt flag.  Only valid if in-kernel
-local APIC is not used.
-
-	__u16 flags;
-
-More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior.  The only currently defined flag is
-KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
-VCPU is in system management mode.
-
-	/* in (pre_kvm_run), out (post_kvm_run) */
-	__u64 cr8;
-
-The value of the cr8 register.  Only valid if in-kernel local APIC is
-not used.  Both input and output.
-
-	__u64 apic_base;
-
-The value of the APIC BASE msr.  Only valid if in-kernel local
-APIC is not used.  Both input and output.
-
-	union {
-		/* KVM_EXIT_UNKNOWN */
-		struct {
-			__u64 hardware_exit_reason;
-		} hw;
-
-If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
-reasons.  Further architecture-specific information is available in
-hardware_exit_reason.
-
-		/* KVM_EXIT_FAIL_ENTRY */
-		struct {
-			__u64 hardware_entry_failure_reason;
-		} fail_entry;
-
-If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
-to unknown reasons.  Further architecture-specific information is
-available in hardware_entry_failure_reason.
-
-		/* KVM_EXIT_EXCEPTION */
-		struct {
-			__u32 exception;
-			__u32 error_code;
-		} ex;
-
-Unused.
-
-		/* KVM_EXIT_IO */
-		struct {
-#define KVM_EXIT_IO_IN  0
-#define KVM_EXIT_IO_OUT 1
-			__u8 direction;
-			__u8 size; /* bytes */
-			__u16 port;
-			__u32 count;
-			__u64 data_offset; /* relative to kvm_run start */
-		} io;
-
-If exit_reason is KVM_EXIT_IO, then the vcpu has
-executed a port I/O instruction which could not be satisfied by kvm.
-data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
-where kvm expects application code to place the data for the next
-KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
-
-		/* KVM_EXIT_DEBUG */
-		struct {
-			struct kvm_debug_exit_arch arch;
-		} debug;
-
-If the exit_reason is KVM_EXIT_DEBUG, then a vcpu is processing a debug event
-for which architecture specific information is returned.
-
-		/* KVM_EXIT_MMIO */
-		struct {
-			__u64 phys_addr;
-			__u8  data[8];
-			__u32 len;
-			__u8  is_write;
-		} mmio;
-
-If exit_reason is KVM_EXIT_MMIO, then the vcpu has
-executed a memory-mapped I/O instruction which could not be satisfied
-by kvm.  The 'data' member contains the written data if 'is_write' is
-true, and should be filled by application code otherwise.
-
-The 'data' member contains, in its first 'len' bytes, the value as it would
-appear if the VCPU performed a load or store of the appropriate width directly
-to the byte array.
-
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
-      KVM_EXIT_EPR the corresponding
-operations are complete (and guest state is consistent) only after userspace
-has re-entered the kernel with KVM_RUN.  The kernel side will first finish
-incomplete operations and then check for pending signals.  Userspace
-can re-enter the guest with an unmasked signal pending to complete
-pending operations.
-
-		/* KVM_EXIT_HYPERCALL */
-		struct {
-			__u64 nr;
-			__u64 args[6];
-			__u64 ret;
-			__u32 longmode;
-			__u32 pad;
-		} hypercall;
-
-Unused.  This was once used for 'hypercall to userspace'.  To implement
-such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
-Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
-
-		/* KVM_EXIT_TPR_ACCESS */
-		struct {
-			__u64 rip;
-			__u32 is_write;
-			__u32 pad;
-		} tpr_access;
-
-To be documented (KVM_TPR_ACCESS_REPORTING).
-
-		/* KVM_EXIT_S390_SIEIC */
-		struct {
-			__u8 icptcode;
-			__u64 mask; /* psw upper half */
-			__u64 addr; /* psw lower half */
-			__u16 ipa;
-			__u32 ipb;
-		} s390_sieic;
-
-s390 specific.
-
-		/* KVM_EXIT_S390_RESET */
-#define KVM_S390_RESET_POR       1
-#define KVM_S390_RESET_CLEAR     2
-#define KVM_S390_RESET_SUBSYSTEM 4
-#define KVM_S390_RESET_CPU_INIT  8
-#define KVM_S390_RESET_IPL       16
-		__u64 s390_reset_flags;
-
-s390 specific.
-
-		/* KVM_EXIT_S390_UCONTROL */
-		struct {
-			__u64 trans_exc_code;
-			__u32 pgm_code;
-		} s390_ucontrol;
-
-s390 specific. A page fault has occurred for a user controlled virtual
-machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
-resolved by the kernel.
-The program code and the translation exception code that were placed
-in the cpu's lowcore are presented here as defined by the z Architecture
-Principles of Operation Book in the Chapter for Dynamic Address Translation
-(DAT)
-
-		/* KVM_EXIT_DCR */
-		struct {
-			__u32 dcrn;
-			__u32 data;
-			__u8  is_write;
-		} dcr;
-
-Deprecated - was used for 440 KVM.
-
-		/* KVM_EXIT_OSI */
-		struct {
-			__u64 gprs[32];
-		} osi;
-
-MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch
-hypercalls and exit with this exit struct that contains all the guest gprs.
-
-If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall.
-Userspace can now handle the hypercall and when it's done modify the gprs as
-necessary. Upon guest entry all guest GPRs will then be replaced by the values
-in this struct.
-
-		/* KVM_EXIT_PAPR_HCALL */
-		struct {
-			__u64 nr;
-			__u64 ret;
-			__u64 args[9];
-		} papr_hcall;
-
-This is used on 64-bit PowerPC when emulating a pSeries partition,
-e.g. with the 'pseries' machine type in qemu.  It occurs when the
-guest does a hypercall using the 'sc 1' instruction.  The 'nr' field
-contains the hypercall number (from the guest R3), and 'args' contains
-the arguments (from the guest R4 - R12).  Userspace should put the
-return code in 'ret' and any extra returned values in args[].
-The possible hypercalls are defined in the Power Architecture Platform
-Requirements (PAPR) document available from www.power.org (free
-developer registration required to access it).
-
-		/* KVM_EXIT_S390_TSCH */
-		struct {
-			__u16 subchannel_id;
-			__u16 subchannel_nr;
-			__u32 io_int_parm;
-			__u32 io_int_word;
-			__u32 ipb;
-			__u8 dequeued;
-		} s390_tsch;
-
-s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled
-and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O
-interrupt for the target subchannel has been dequeued and subchannel_id,
-subchannel_nr, io_int_parm and io_int_word contain the parameters for that
-interrupt. ipb is needed for instruction parameter decoding.
-
-		/* KVM_EXIT_EPR */
-		struct {
-			__u32 epr;
-		} epr;
-
-On FSL BookE PowerPC chips, the interrupt controller has a fast patch
-interrupt acknowledge path to the core. When the core successfully
-delivers an interrupt, it automatically populates the EPR register with
-the interrupt vector number and acknowledges the interrupt inside
-the interrupt controller.
-
-In case the interrupt controller lives in user space, we need to do
-the interrupt acknowledge cycle through it to fetch the next to be
-delivered interrupt vector using this exit.
-
-It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
-external interrupt has just been delivered into the guest. User space
-should put the acknowledged interrupt vector into the 'epr' field.
-
-		/* KVM_EXIT_SYSTEM_EVENT */
-		struct {
-#define KVM_SYSTEM_EVENT_SHUTDOWN       1
-#define KVM_SYSTEM_EVENT_RESET          2
-#define KVM_SYSTEM_EVENT_CRASH          3
-			__u32 type;
-			__u64 flags;
-		} system_event;
-
-If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
-a system-level event using some architecture specific mechanism (hypercall
-or some special instruction). In case of ARM/ARM64, this is triggered using
-HVC instruction based PSCI call from the vcpu. The 'type' field describes
-the system-level event type. The 'flags' field describes architecture
-specific flags for the system-level event.
-
-Valid values for 'type' are:
-  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
-   VM. Userspace is not obliged to honour this, and if it does honour
-   this does not need to destroy the VM synchronously (ie it may call
-   KVM_RUN again before shutdown finally occurs).
-  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
-   As with SHUTDOWN, userspace can choose to ignore the request, or
-   to schedule the reset to occur in the future and may call KVM_RUN again.
-  KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest
-   has requested a crash condition maintenance. Userspace can choose
-   to ignore the request, or to gather VM memory core dump and/or
-   reset/shutdown of the VM.
-
-		/* KVM_EXIT_IOAPIC_EOI */
-		struct {
-			__u8 vector;
-		} eoi;
-
-Indicates that the VCPU's in-kernel local APIC received an EOI for a
-level-triggered IOAPIC interrupt.  This exit only triggers when the
-IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
-the userspace IOAPIC should process the EOI and retrigger the interrupt if
-it is still asserted.  Vector is the LAPIC interrupt vector for which the
-EOI was received.
-
-		struct kvm_hyperv_exit {
-#define KVM_EXIT_HYPERV_SYNIC          1
-#define KVM_EXIT_HYPERV_HCALL          2
-			__u32 type;
-			union {
-				struct {
-					__u32 msr;
-					__u64 control;
-					__u64 evt_page;
-					__u64 msg_page;
-				} synic;
-				struct {
-					__u64 input;
-					__u64 result;
-					__u64 params[2];
-				} hcall;
-			} u;
-		};
-		/* KVM_EXIT_HYPERV */
-                struct kvm_hyperv_exit hyperv;
-Indicates that the VCPU exits into userspace to process some tasks
-related to Hyper-V emulation.
-Valid values for 'type' are:
-	KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
-Hyper-V SynIC state change. Notification is used to remap SynIC
-event/message pages and to enable/disable SynIC messages/events processing
-in userspace.
-
-		/* Fix the size of the union. */
-		char padding[256];
-	};
-
-	/*
-	 * shared registers between kvm and userspace.
-	 * kvm_valid_regs specifies the register classes set by the host
-	 * kvm_dirty_regs specified the register classes dirtied by userspace
-	 * struct kvm_sync_regs is architecture specific, as well as the
-	 * bits for kvm_valid_regs and kvm_dirty_regs
-	 */
-	__u64 kvm_valid_regs;
-	__u64 kvm_dirty_regs;
-	union {
-		struct kvm_sync_regs regs;
-		char padding[SYNC_REGS_SIZE_BYTES];
-	} s;
-
-If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
-certain guest registers without having to call SET/GET_*REGS. Thus we can
-avoid some system call overhead if userspace has to handle the exit.
-Userspace can query the validity of the structure by checking
-kvm_valid_regs for specific bits. These bits are architecture specific
-and usually define the validity of a groups of registers. (e.g. one bit
- for general purpose registers)
-
-Please note that the kernel is allowed to use the kvm_run structure as the
-primary storage for certain register types. Therefore, the kernel may use the
-values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
-
-};
-
-
-
-6. Capabilities that can be enabled on vCPUs
---------------------------------------------
-
-There are certain capabilities that change the behavior of the virtual CPU or
-the virtual machine when enabled. To enable them, please see section 4.37.
-Below you can find a list of capabilities and what their effect on the vCPU or
-the virtual machine is when enabling them.
-
-The following information is provided along with the description:
-
-  Architectures: which instruction set architectures provide this ioctl.
-      x86 includes both i386 and x86_64.
-
-  Target: whether this is a per-vcpu or per-vm capability.
-
-  Parameters: what parameters are accepted by the capability.
-
-  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
-      are not detailed, but errors with specific meanings are.
-
-
-6.1 KVM_CAP_PPC_OSI
-
-Architectures: ppc
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
-
-This capability enables interception of OSI hypercalls that otherwise would
-be treated as normal system calls to be injected into the guest. OSI hypercalls
-were invented by Mac-on-Linux to have a standardized communication mechanism
-between the guest and the host.
-
-When this capability is enabled, KVM_EXIT_OSI can occur.
-
-
-6.2 KVM_CAP_PPC_PAPR
-
-Architectures: ppc
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
-
-This capability enables interception of PAPR hypercalls. PAPR hypercalls are
-done using the hypercall instruction "sc 1".
-
-It also sets the guest privilege level to "supervisor" mode. Usually the guest
-runs in "hypervisor" privilege mode with a few missing features.
-
-In addition to the above, it changes the semantics of SDR1. In this mode, the
-HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
-HTAB invisible to the guest.
-
-When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
-
-
-6.3 KVM_CAP_SW_TLB
-
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the address of a struct kvm_config_tlb
-Returns: 0 on success; -1 on error
-
-struct kvm_config_tlb {
-	__u64 params;
-	__u64 array;
-	__u32 mmu_type;
-	__u32 array_len;
-};
-
-Configures the virtual CPU's TLB array, establishing a shared memory area
-between userspace and KVM.  The "params" and "array" fields are userspace
-addresses of mmu-type-specific data structures.  The "array_len" field is an
-safety mechanism, and should be set to the size in bytes of the memory that
-userspace has reserved for the array.  It must be at least the size dictated
-by "mmu_type" and "params".
-
-While KVM_RUN is active, the shared region is under control of KVM.  Its
-contents are undefined, and any modification by userspace results in
-boundedly undefined behavior.
-
-On return from KVM_RUN, the shared region will reflect the current state of
-the guest's TLB.  If userspace makes any changes, it must call KVM_DIRTY_TLB
-to tell KVM which entries have been changed, prior to calling KVM_RUN again
-on this vcpu.
-
-For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
- - The "params" field is of type "struct kvm_book3e_206_tlb_params".
- - The "array" field points to an array of type "struct
-   kvm_book3e_206_tlb_entry".
- - The array consists of all entries in the first TLB, followed by all
-   entries in the second TLB.
- - Within a TLB, entries are ordered first by increasing set number.  Within a
-   set, entries are ordered by way (increasing ESEL).
- - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
-   where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
- - The tsize field of mas1 shall be set to 4K on TLB0, even though the
-   hardware ignores this value for TLB0.
-
-6.4 KVM_CAP_S390_CSS_SUPPORT
-
-Architectures: s390
-Target: vcpu
-Parameters: none
-Returns: 0 on success; -1 on error
-
-This capability enables support for handling of channel I/O instructions.
-
-TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are
-handled in-kernel, while the other I/O instructions are passed to userspace.
-
-When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
-SUBCHANNEL intercepts.
-
-Note that even though this capability is enabled per-vcpu, the complete
-virtual machine is affected.
-
-6.5 KVM_CAP_PPC_EPR
-
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] defines whether the proxy facility is active
-Returns: 0 on success; -1 on error
-
-This capability enables or disables the delivery of interrupts through the
-external proxy facility.
-
-When enabled (args[0] != 0), every time the guest gets an external interrupt
-delivered, it automatically exits into user space with a KVM_EXIT_EPR exit
-to receive the topmost interrupt vector.
-
-When disabled (args[0] == 0), behavior is as if this facility is unsupported.
-
-When this capability is enabled, KVM_EXIT_EPR can occur.
-
-6.6 KVM_CAP_IRQ_MPIC
-
-Architectures: ppc
-Parameters: args[0] is the MPIC device fd
-            args[1] is the MPIC CPU number for this vcpu
-
-This capability connects the vcpu to an in-kernel MPIC device.
-
-6.7 KVM_CAP_IRQ_XICS
-
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the XICS device fd
-            args[1] is the XICS CPU number (server ID) for this vcpu
-
-This capability connects the vcpu to an in-kernel XICS device.
-
-6.8 KVM_CAP_S390_IRQCHIP
-
-Architectures: s390
-Target: vm
-Parameters: none
-
-This capability enables the in-kernel irqchip for s390. Please refer to
-"4.24 KVM_CREATE_IRQCHIP" for details.
-
-6.9 KVM_CAP_MIPS_FPU
-
-Architectures: mips
-Target: vcpu
-Parameters: args[0] is reserved for future use (should be 0).
-
-This capability allows the use of the host Floating Point Unit by the guest. It
-allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
-done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
-(depending on the current guest FPU register mode), and the Status.FR,
-Config5.FRE bits are accessible via the KVM API and also from the guest,
-depending on them being supported by the FPU.
-
-6.10 KVM_CAP_MIPS_MSA
-
-Architectures: mips
-Target: vcpu
-Parameters: args[0] is reserved for future use (should be 0).
-
-This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
-It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
-Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
-accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
-the guest.
-
-6.74 KVM_CAP_SYNC_REGS
-Architectures: s390, x86
-Target: s390: always enabled, x86: vcpu
-Parameters: none
-Returns: x86: KVM_CHECK_EXTENSION returns a bit-array indicating which register
-sets are supported (bitfields defined in arch/x86/include/uapi/asm/kvm.h).
-
-As described above in the kvm_sync_regs struct info in section 5 (kvm_run):
-KVM_CAP_SYNC_REGS "allow[s] userspace to access certain guest registers
-without having to call SET/GET_*REGS". This reduces overhead by eliminating
-repeated ioctl calls for setting and/or getting register values. This is
-particularly important when userspace is making synchronous guest state
-modifications, e.g. when emulating and/or intercepting instructions in
-userspace.
-
-For s390 specifics, please refer to the source code.
-
-For x86:
-- the register sets to be copied out to kvm_run are selectable
-  by userspace (rather that all sets being copied out for every exit).
-- vcpu_events are available in addition to regs and sregs.
-
-For x86, the 'kvm_valid_regs' field of struct kvm_run is overloaded to
-function as an input bit-array field set by userspace to indicate the
-specific register sets to be copied out on the next exit.
-
-To indicate when userspace has modified values that should be copied into
-the vCPU, the all architecture bitarray field, 'kvm_dirty_regs' must be set.
-This is done using the same bitflags as for the 'kvm_valid_regs' field.
-If the dirty bit is not set, then the register set values will not be copied
-into the vCPU even if they've been modified.
-
-Unused bitfields in the bitarrays must be set to zero.
-
-struct kvm_sync_regs {
-        struct kvm_regs regs;
-        struct kvm_sregs sregs;
-        struct kvm_vcpu_events events;
-};
-
-6.75 KVM_CAP_PPC_IRQ_XIVE
-
-Architectures: ppc
-Target: vcpu
-Parameters: args[0] is the XIVE device fd
-            args[1] is the XIVE CPU number (server ID) for this vcpu
-
-This capability connects the vcpu to an in-kernel XIVE device.
-
-7. Capabilities that can be enabled on VMs
-------------------------------------------
-
-There are certain capabilities that change the behavior of the virtual
-machine when enabled. To enable them, please see section 4.37. Below
-you can find a list of capabilities and what their effect on the VM
-is when enabling them.
-
-The following information is provided along with the description:
-
-  Architectures: which instruction set architectures provide this ioctl.
-      x86 includes both i386 and x86_64.
-
-  Parameters: what parameters are accepted by the capability.
-
-  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
-      are not detailed, but errors with specific meanings are.
-
-
-7.1 KVM_CAP_PPC_ENABLE_HCALL
-
-Architectures: ppc
-Parameters: args[0] is the sPAPR hcall number
-	    args[1] is 0 to disable, 1 to enable in-kernel handling
-
-This capability controls whether individual sPAPR hypercalls (hcalls)
-get handled by the kernel or not.  Enabling or disabling in-kernel
-handling of an hcall is effective across the VM.  On creation, an
-initial set of hcalls are enabled for in-kernel handling, which
-consists of those hcalls for which in-kernel handlers were implemented
-before this capability was implemented.  If disabled, the kernel will
-not to attempt to handle the hcall, but will always exit to userspace
-to handle it.  Note that it may not make sense to enable some and
-disable others of a group of related hcalls, but KVM does not prevent
-userspace from doing that.
-
-If the hcall number specified is not one that has an in-kernel
-implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
-error.
-
-7.2 KVM_CAP_S390_USER_SIGP
-
-Architectures: s390
-Parameters: none
-
-This capability controls which SIGP orders will be handled completely in user
-space. With this capability enabled, all fast orders will be handled completely
-in the kernel:
-- SENSE
-- SENSE RUNNING
-- EXTERNAL CALL
-- EMERGENCY SIGNAL
-- CONDITIONAL EMERGENCY SIGNAL
-
-All other orders will be handled completely in user space.
-
-Only privileged operation exceptions will be checked for in the kernel (or even
-in the hardware prior to interception). If this capability is not enabled, the
-old way of handling SIGP orders is used (partially in kernel and user space).
-
-7.3 KVM_CAP_S390_VECTOR_REGISTERS
-
-Architectures: s390
-Parameters: none
-Returns: 0 on success, negative value on error
-
-Allows use of the vector registers introduced with z13 processor, and
-provides for the synchronization between host and user space.  Will
-return -EINVAL if the machine does not support vectors.
-
-7.4 KVM_CAP_S390_USER_STSI
-
-Architectures: s390
-Parameters: none
-
-This capability allows post-handlers for the STSI instruction. After
-initial handling in the kernel, KVM exits to user space with
-KVM_EXIT_S390_STSI to allow user space to insert further data.
-
-Before exiting to userspace, kvm handlers should fill in s390_stsi field of
-vcpu->run:
-struct {
-	__u64 addr;
-	__u8 ar;
-	__u8 reserved;
-	__u8 fc;
-	__u8 sel1;
-	__u16 sel2;
-} s390_stsi;
-
-@addr - guest address of STSI SYSIB
-@fc   - function code
-@sel1 - selector 1
-@sel2 - selector 2
-@ar   - access register number
-
-KVM handlers should exit to userspace with rc = -EREMOTE.
-
-7.5 KVM_CAP_SPLIT_IRQCHIP
-
-Architectures: x86
-Parameters: args[0] - number of routes reserved for userspace IOAPICs
-Returns: 0 on success, -1 on error
-
-Create a local apic for each processor in the kernel. This can be used
-instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
-IOAPIC and PIC (and also the PIT, even though this has to be enabled
-separately).
-
-This capability also enables in kernel routing of interrupt requests;
-when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
-used in the IRQ routing table.  The first args[0] MSI routes are reserved
-for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
-a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
-
-Fails if VCPU has already been created, or if the irqchip is already in the
-kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
-
-7.6 KVM_CAP_S390_RI
-
-Architectures: s390
-Parameters: none
-
-Allows use of runtime-instrumentation introduced with zEC12 processor.
-Will return -EINVAL if the machine does not support runtime-instrumentation.
-Will return -EBUSY if a VCPU has already been created.
-
-7.7 KVM_CAP_X2APIC_API
-
-Architectures: x86
-Parameters: args[0] - features that should be enabled
-Returns: 0 on success, -EINVAL when args[0] contains invalid features
-
-Valid feature flags in args[0] are
-
-#define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
-#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK  (1ULL << 1)
-
-Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of
-KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC,
-allowing the use of 32-bit APIC IDs.  See KVM_CAP_X2APIC_API in their
-respective sections.
-
-KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK must be enabled for x2APIC to work
-in logical mode or with more than 255 VCPUs.  Otherwise, KVM treats 0xff
-as a broadcast even in x2APIC mode in order to support physical x2APIC
-without interrupt remapping.  This is undesirable in logical mode,
-where 0xff represents CPUs 0-7 in cluster 0.
-
-7.8 KVM_CAP_S390_USER_INSTR0
-
-Architectures: s390
-Parameters: none
-
-With this capability enabled, all illegal instructions 0x0000 (2 bytes) will
-be intercepted and forwarded to user space. User space can use this
-mechanism e.g. to realize 2-byte software breakpoints. The kernel will
-not inject an operating exception for these instructions, user space has
-to take care of that.
-
-This capability can be enabled dynamically even if VCPUs were already
-created and are running.
-
-7.9 KVM_CAP_S390_GS
-
-Architectures: s390
-Parameters: none
-Returns: 0 on success; -EINVAL if the machine does not support
-	 guarded storage; -EBUSY if a VCPU has already been created.
-
-Allows use of guarded storage for the KVM guest.
-
-7.10 KVM_CAP_S390_AIS
-
-Architectures: s390
-Parameters: none
-
-Allow use of adapter-interruption suppression.
-Returns: 0 on success; -EBUSY if a VCPU has already been created.
-
-7.11 KVM_CAP_PPC_SMT
-
-Architectures: ppc
-Parameters: vsmt_mode, flags
-
-Enabling this capability on a VM provides userspace with a way to set
-the desired virtual SMT mode (i.e. the number of virtual CPUs per
-virtual core).  The virtual SMT mode, vsmt_mode, must be a power of 2
-between 1 and 8.  On POWER8, vsmt_mode must also be no greater than
-the number of threads per subcore for the host.  Currently flags must
-be 0.  A successful call to enable this capability will result in
-vsmt_mode being returned when the KVM_CAP_PPC_SMT capability is
-subsequently queried for the VM.  This capability is only supported by
-HV KVM, and can only be set before any VCPUs have been created.
-The KVM_CAP_PPC_SMT_POSSIBLE capability indicates which virtual SMT
-modes are available.
-
-7.12 KVM_CAP_PPC_FWNMI
-
-Architectures: ppc
-Parameters: none
-
-With this capability a machine check exception in the guest address
-space will cause KVM to exit the guest with NMI exit reason. This
-enables QEMU to build error log and branch to guest kernel registered
-machine check handling routine. Without this capability KVM will
-branch to guests' 0x200 interrupt vector.
-
-7.13 KVM_CAP_X86_DISABLE_EXITS
-
-Architectures: x86
-Parameters: args[0] defines which exits are disabled
-Returns: 0 on success, -EINVAL when args[0] contains invalid exits
-
-Valid bits in args[0] are
-
-#define KVM_X86_DISABLE_EXITS_MWAIT            (1 << 0)
-#define KVM_X86_DISABLE_EXITS_HLT              (1 << 1)
-#define KVM_X86_DISABLE_EXITS_PAUSE            (1 << 2)
-#define KVM_X86_DISABLE_EXITS_CSTATE           (1 << 3)
-
-Enabling this capability on a VM provides userspace with a way to no
-longer intercept some instructions for improved latency in some
-workloads, and is suggested when vCPUs are associated to dedicated
-physical CPUs.  More bits can be added in the future; userspace can
-just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
-all such vmexits.
-
-Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
-
-7.14 KVM_CAP_S390_HPAGE_1M
-
-Architectures: s390
-Parameters: none
-Returns: 0 on success, -EINVAL if hpage module parameter was not set
-	 or cmma is enabled, or the VM has the KVM_VM_S390_UCONTROL
-	 flag set
-
-With this capability the KVM support for memory backing with 1m pages
-through hugetlbfs can be enabled for a VM. After the capability is
-enabled, cmma can't be enabled anymore and pfmfi and the storage key
-interpretation are disabled. If cmma has already been enabled or the
-hpage module parameter is not set to 1, -EINVAL is returned.
-
-While it is generally possible to create a huge page backed VM without
-this capability, the VM will not be able to run.
-
-7.15 KVM_CAP_MSR_PLATFORM_INFO
-
-Architectures: x86
-Parameters: args[0] whether feature should be enabled or not
-
-With this capability, a guest may read the MSR_PLATFORM_INFO MSR. Otherwise,
-a #GP would be raised when the guest tries to access. Currently, this
-capability does not enable write permissions of this MSR for the guest.
-
-7.16 KVM_CAP_PPC_NESTED_HV
-
-Architectures: ppc
-Parameters: none
-Returns: 0 on success, -EINVAL when the implementation doesn't support
-	 nested-HV virtualization.
-
-HV-KVM on POWER9 and later systems allows for "nested-HV"
-virtualization, which provides a way for a guest VM to run guests that
-can run using the CPU's supervisor mode (privileged non-hypervisor
-state).  Enabling this capability on a VM depends on the CPU having
-the necessary functionality and on the facility being enabled with a
-kvm-hv module parameter.
-
-7.17 KVM_CAP_EXCEPTION_PAYLOAD
-
-Architectures: x86
-Parameters: args[0] whether feature should be enabled or not
-
-With this capability enabled, CR2 will not be modified prior to the
-emulated VM-exit when L1 intercepts a #PF exception that occurs in
-L2. Similarly, for kvm-intel only, DR6 will not be modified prior to
-the emulated VM-exit when L1 intercepts a #DB exception that occurs in
-L2. As a result, when KVM_GET_VCPU_EVENTS reports a pending #PF (or
-#DB) exception for L2, exception.has_payload will be set and the
-faulting address (or the new DR6 bits*) will be reported in the
-exception_payload field. Similarly, when userspace injects a #PF (or
-#DB) into L2 using KVM_SET_VCPU_EVENTS, it is expected to set
-exception.has_payload and to put the faulting address (or the new DR6
-bits*) in the exception_payload field.
-
-This capability also enables exception.pending in struct
-kvm_vcpu_events, which allows userspace to distinguish between pending
-and injected exceptions.
-
-
-* For the new DR6 bits, note that bit 16 is set iff the #DB exception
-  will clear DR6.RTM.
-
-7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2
-
-Architectures: x86, arm, arm64, mips
-Parameters: args[0] whether feature should be enabled or not
-
-With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
-clear and write-protect all pages that are returned as dirty.
-Rather, userspace will have to do this operation separately using
-KVM_CLEAR_DIRTY_LOG.
-
-At the cost of a slightly more complicated operation, this provides better
-scalability and responsiveness for two reasons.  First,
-KVM_CLEAR_DIRTY_LOG ioctl can operate on a 64-page granularity rather
-than requiring to sync a full memslot; this ensures that KVM does not
-take spinlocks for an extended period of time.  Second, in some cases a
-large amount of time can pass between a call to KVM_GET_DIRTY_LOG and
-userspace actually using the data in the page.  Pages can be modified
-during this time, which is inefficint for both the guest and userspace:
-the guest will incur a higher penalty due to write protection faults,
-while userspace can see false reports of dirty pages.  Manual reprotection
-helps reducing this time, improving guest performance and reducing the
-number of dirty log false positives.
-
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 was previously available under the name
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT, but the implementation had bugs that make
-it hard or impossible to use it correctly.  The availability of
-KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 signals that those bugs are fixed.
-Userspace should not try to use KVM_CAP_MANUAL_DIRTY_LOG_PROTECT.
-
-8. Other capabilities.
-----------------------
-
-This section lists capabilities that give information about other
-features of the KVM implementation.
-
-8.1 KVM_CAP_PPC_HWRNG
-
-Architectures: ppc
-
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that that the kernel has an implementation of the
-H_RANDOM hypercall backed by a hardware random-number generator.
-If present, the kernel H_RANDOM handler can be enabled for guest use
-with the KVM_CAP_PPC_ENABLE_HCALL capability.
-
-8.2 KVM_CAP_HYPERV_SYNIC
-
-Architectures: x86
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that that the kernel has an implementation of the
-Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
-used to support Windows Hyper-V based guest paravirt drivers(VMBus).
-
-In order to use SynIC, it has to be activated by setting this
-capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
-will disable the use of APIC hardware virtualization even if supported
-by the CPU, as it's incompatible with SynIC auto-EOI behavior.
-
-8.3 KVM_CAP_PPC_RADIX_MMU
-
-Architectures: ppc
-
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that that the kernel can support guests using the
-radix MMU defined in Power ISA V3.00 (as implemented in the POWER9
-processor).
-
-8.4 KVM_CAP_PPC_HASH_MMU_V3
-
-Architectures: ppc
-
-This capability, if KVM_CHECK_EXTENSION indicates that it is
-available, means that that the kernel can support guests using the
-hashed page table MMU defined in Power ISA V3.00 (as implemented in
-the POWER9 processor), including in-memory segment tables.
-
-8.5 KVM_CAP_MIPS_VZ
-
-Architectures: mips
-
-This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
-it is available, means that full hardware assisted virtualization capabilities
-of the hardware are available for use through KVM. An appropriate
-KVM_VM_MIPS_* type must be passed to KVM_CREATE_VM to create a VM which
-utilises it.
-
-If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
-available, it means that the VM is using full hardware assisted virtualization
-capabilities of the hardware. This is useful to check after creating a VM with
-KVM_VM_MIPS_DEFAULT.
-
-The value returned by KVM_CHECK_EXTENSION should be compared against known
-values (see below). All other values are reserved. This is to allow for the
-possibility of other hardware assisted virtualization implementations which
-may be incompatible with the MIPS VZ ASE.
-
- 0: The trap & emulate implementation is in use to run guest code in user
-    mode. Guest virtual memory segments are rearranged to fit the guest in the
-    user mode address space.
-
- 1: The MIPS VZ ASE is in use, providing full hardware assisted
-    virtualization, including standard guest virtual memory segments.
-
-8.6 KVM_CAP_MIPS_TE
-
-Architectures: mips
-
-This capability, if KVM_CHECK_EXTENSION on the main kvm handle indicates that
-it is available, means that the trap & emulate implementation is available to
-run guest code in user mode, even if KVM_CAP_MIPS_VZ indicates that hardware
-assisted virtualisation is also available. KVM_VM_MIPS_TE (0) must be passed
-to KVM_CREATE_VM to create a VM which utilises it.
-
-If KVM_CHECK_EXTENSION on a kvm VM handle indicates that this capability is
-available, it means that the VM is using trap & emulate.
-
-8.7 KVM_CAP_MIPS_64BIT
-
-Architectures: mips
-
-This capability indicates the supported architecture type of the guest, i.e. the
-supported register and address width.
-
-The values returned when this capability is checked by KVM_CHECK_EXTENSION on a
-kvm VM handle correspond roughly to the CP0_Config.AT register field, and should
-be checked specifically against known values (see below). All other values are
-reserved.
-
- 0: MIPS32 or microMIPS32.
-    Both registers and addresses are 32-bits wide.
-    It will only be possible to run 32-bit guest code.
-
- 1: MIPS64 or microMIPS64 with access only to 32-bit compatibility segments.
-    Registers are 64-bits wide, but addresses are 32-bits wide.
-    64-bit guest code may run but cannot access MIPS64 memory segments.
-    It will also be possible to run 32-bit guest code.
-
- 2: MIPS64 or microMIPS64 with access to all address segments.
-    Both registers and addresses are 64-bits wide.
-    It will be possible to run 64-bit or 32-bit guest code.
-
-8.9 KVM_CAP_ARM_USER_IRQ
-
-Architectures: arm, arm64
-This capability, if KVM_CHECK_EXTENSION indicates that it is available, means
-that if userspace creates a VM without an in-kernel interrupt controller, it
-will be notified of changes to the output level of in-kernel emulated devices,
-which can generate virtual interrupts, presented to the VM.
-For such VMs, on every return to userspace, the kernel
-updates the vcpu's run->s.regs.device_irq_level field to represent the actual
-output level of the device.
-
-Whenever kvm detects a change in the device output level, kvm guarantees at
-least one return to userspace before running the VM.  This exit could either
-be a KVM_EXIT_INTR or any other exit event, like KVM_EXIT_MMIO. This way,
-userspace can always sample the device output level and re-compute the state of
-the userspace interrupt controller.  Userspace should always check the state
-of run->s.regs.device_irq_level on every kvm exit.
-The value in run->s.regs.device_irq_level can represent both level and edge
-triggered interrupt signals, depending on the device.  Edge triggered interrupt
-signals will exit to userspace with the bit in run->s.regs.device_irq_level
-set exactly once per edge signal.
-
-The field run->s.regs.device_irq_level is available independent of
-run->kvm_valid_regs or run->kvm_dirty_regs bits.
-
-If KVM_CAP_ARM_USER_IRQ is supported, the KVM_CHECK_EXTENSION ioctl returns a
-number larger than 0 indicating the version of this capability is implemented
-and thereby which bits in in run->s.regs.device_irq_level can signal values.
-
-Currently the following bits are defined for the device_irq_level bitmap:
-
-  KVM_CAP_ARM_USER_IRQ >= 1:
-
-    KVM_ARM_DEV_EL1_VTIMER -  EL1 virtual timer
-    KVM_ARM_DEV_EL1_PTIMER -  EL1 physical timer
-    KVM_ARM_DEV_PMU        -  ARM PMU overflow interrupt signal
-
-Future versions of kvm may implement additional events. These will get
-indicated by returning a higher number from KVM_CHECK_EXTENSION and will be
-listed above.
-
-8.10 KVM_CAP_PPC_SMT_POSSIBLE
-
-Architectures: ppc
-
-Querying this capability returns a bitmap indicating the possible
-virtual SMT modes that can be set using KVM_CAP_PPC_SMT.  If bit N
-(counting from the right) is set, then a virtual SMT mode of 2^N is
-available.
-
-8.11 KVM_CAP_HYPERV_SYNIC2
-
-Architectures: x86
-
-This capability enables a newer version of Hyper-V Synthetic interrupt
-controller (SynIC).  The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
-doesn't clear SynIC message and event flags pages when they are enabled by
-writing to the respective MSRs.
-
-8.12 KVM_CAP_HYPERV_VP_INDEX
-
-Architectures: x86
-
-This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr.  Its
-value is used to denote the target vcpu for a SynIC interrupt.  For
-compatibilty, KVM initializes this msr to KVM's internal vcpu index.  When this
-capability is absent, userspace can still query this msr's value.
-
-8.13 KVM_CAP_S390_AIS_MIGRATION
-
-Architectures: s390
-Parameters: none
-
-This capability indicates if the flic device will be able to get/set the
-AIS states for migration via the KVM_DEV_FLIC_AISM_ALL attribute and allows
-to discover this without having to create a flic device.
-
-8.14 KVM_CAP_S390_PSW
-
-Architectures: s390
-
-This capability indicates that the PSW is exposed via the kvm_run structure.
-
-8.15 KVM_CAP_S390_GMAP
-
-Architectures: s390
-
-This capability indicates that the user space memory used as guest mapping can
-be anywhere in the user memory address space, as long as the memory slots are
-aligned and sized to a segment (1MB) boundary.
-
-8.16 KVM_CAP_S390_COW
-
-Architectures: s390
-
-This capability indicates that the user space memory used as guest mapping can
-use copy-on-write semantics as well as dirty pages tracking via read-only page
-tables.
-
-8.17 KVM_CAP_S390_BPB
-
-Architectures: s390
-
-This capability indicates that kvm will implement the interfaces to handle
-reset, migration and nested KVM for branch prediction blocking. The stfle
-facility 82 should not be provided to the guest without this capability.
-
-8.18 KVM_CAP_HYPERV_TLBFLUSH
-
-Architectures: x86
-
-This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
-hypercalls:
-HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
-HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
-
-8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
-
-Architectures: arm, arm64
-
-This capability indicates that userspace can specify (via the
-KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
-takes a virtual SError interrupt exception.
-If KVM advertises this capability, userspace can only specify the ISS field for
-the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
-CPU when the exception is taken. If this virtual SError is taken to EL1 using
-AArch64, this value will be reported in the ISS field of ESR_ELx.
-
-See KVM_CAP_VCPU_EVENTS for more details.
-8.20 KVM_CAP_HYPERV_SEND_IPI
-
-Architectures: x86
-
-This capability indicates that KVM supports paravirtualized Hyper-V IPI send
-hypercalls:
-HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
diff --git a/Documentation/virtual/kvm/arm/hyp-abi.txt b/Documentation/virtual/kvm/arm/hyp-abi.txt
deleted file mode 100644
index a20a0bee268d..000000000000
--- a/Documentation/virtual/kvm/arm/hyp-abi.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-* Internal ABI between the kernel and HYP
-
-This file documents the interaction between the Linux kernel and the
-hypervisor layer when running Linux as a hypervisor (for example
-KVM). It doesn't cover the interaction of the kernel with the
-hypervisor when running as a guest (under Xen, KVM or any other
-hypervisor), or any hypervisor-specific interaction when the kernel is
-used as a host.
-
-On arm and arm64 (without VHE), the kernel doesn't run in hypervisor
-mode, but still needs to interact with it, allowing a built-in
-hypervisor to be either installed or torn down.
-
-In order to achieve this, the kernel must be booted at HYP (arm) or
-EL2 (arm64), allowing it to install a set of stubs before dropping to
-SVC/EL1. These stubs are accessible by using a 'hvc #0' instruction,
-and only act on individual CPUs.
-
-Unless specified otherwise, any built-in hypervisor must implement
-these functions (see arch/arm{,64}/include/asm/virt.h):
-
-* r0/x0 = HVC_SET_VECTORS
-  r1/x1 = vectors
-
-  Set HVBAR/VBAR_EL2 to 'vectors' to enable a hypervisor. 'vectors'
-  must be a physical address, and respect the alignment requirements
-  of the architecture. Only implemented by the initial stubs, not by
-  Linux hypervisors.
-
-* r0/x0 = HVC_RESET_VECTORS
-
-  Turn HYP/EL2 MMU off, and reset HVBAR/VBAR_EL2 to the initials
-  stubs' exception vector value. This effectively disables an existing
-  hypervisor.
-
-* r0/x0 = HVC_SOFT_RESTART
-  r1/x1 = restart address
-  x2 = x0's value when entering the next payload (arm64)
-  x3 = x1's value when entering the next payload (arm64)
-  x4 = x2's value when entering the next payload (arm64)
-
-  Mask all exceptions, disable the MMU, move the arguments into place
-  (arm64 only), and jump to the restart address while at HYP/EL2. This
-  hypercall is not expected to return to its caller.
-
-Any other value of r0/x0 triggers a hypervisor-specific handling,
-which is not documented here.
-
-The return value of a stub hypercall is held by r0/x0, and is 0 on
-success, and HVC_STUB_ERR on error. A stub hypercall is allowed to
-clobber any of the caller-saved registers (x0-x18 on arm64, r0-r3 and
-ip on arm). It is thus recommended to use a function call to perform
-the hypercall.
diff --git a/Documentation/virtual/kvm/arm/psci.txt b/Documentation/virtual/kvm/arm/psci.txt
deleted file mode 100644
index 559586fc9d37..000000000000
--- a/Documentation/virtual/kvm/arm/psci.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-KVM implements the PSCI (Power State Coordination Interface)
-specification in order to provide services such as CPU on/off, reset
-and power-off to the guest.
-
-The PSCI specification is regularly updated to provide new features,
-and KVM implements these updates if they make sense from a virtualization
-point of view.
-
-This means that a guest booted on two different versions of KVM can
-observe two different "firmware" revisions. This could cause issues if
-a given guest is tied to a particular PSCI revision (unlikely), or if
-a migration causes a different PSCI version to be exposed out of the
-blue to an unsuspecting guest.
-
-In order to remedy this situation, KVM exposes a set of "firmware
-pseudo-registers" that can be manipulated using the GET/SET_ONE_REG
-interface. These registers can be saved/restored by userspace, and set
-to a convenient value if required.
-
-The following register is defined:
-
-* KVM_REG_ARM_PSCI_VERSION:
-
-  - Only valid if the vcpu has the KVM_ARM_VCPU_PSCI_0_2 feature set
-    (and thus has already been initialized)
-  - Returns the current PSCI version on GET_ONE_REG (defaulting to the
-    highest PSCI version implemented by KVM and compatible with v0.2)
-  - Allows any PSCI version implemented by KVM and compatible with
-    v0.2 to be set with SET_ONE_REG
-  - Affects the whole VM (even if the register view is per-vcpu)
-
-* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
-  Holds the state of the firmware support to mitigate CVE-2017-5715, as
-  offered by KVM to the guest via a HVC call. The workaround is described
-  under SMCCC_ARCH_WORKAROUND_1 in [1].
-  Accepted values are:
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
-      firmware support for the workaround. The mitigation status for the
-      guest is unknown.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
-      available to the guest and required for the mitigation.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
-      is available to the guest, but it is not needed on this VCPU.
-
-* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
-  Holds the state of the firmware support to mitigate CVE-2018-3639, as
-  offered by KVM to the guest via a HVC call. The workaround is described
-  under SMCCC_ARCH_WORKAROUND_2 in [1].
-  Accepted values are:
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
-      available. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
-      unknown. KVM does not offer firmware support for the workaround.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
-      and can be disabled by a vCPU. If
-      KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
-      this vCPU.
-    KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
-      always active on this vCPU or it is not needed.
-
-[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf
diff --git a/Documentation/virtual/kvm/cpuid.rst b/Documentation/virtual/kvm/cpuid.rst
deleted file mode 100644
index 01b081f6e7ea..000000000000
--- a/Documentation/virtual/kvm/cpuid.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-==============
-KVM CPUID bits
-==============
-
-:Author: Glauber Costa <glommer@gmail.com>
-
-A guest running on a kvm host, can check some of its features using
-cpuid. This is not always guaranteed to work, since userspace can
-mask-out some, or even all KVM-related cpuid features before launching
-a guest.
-
-KVM cpuid functions are:
-
-function: KVM_CPUID_SIGNATURE (0x40000000)
-
-returns::
-
-   eax = 0x40000001
-   ebx = 0x4b4d564b
-   ecx = 0x564b4d56
-   edx = 0x4d
-
-Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
-The value in eax corresponds to the maximum cpuid function present in this leaf,
-and will be updated if more functions are added in the future.
-Note also that old hosts set eax value to 0x0. This should
-be interpreted as if the value was 0x40000001.
-This function queries the presence of KVM cpuid leafs.
-
-function: define KVM_CPUID_FEATURES (0x40000001)
-
-returns::
-
-          ebx, ecx
-          eax = an OR'ed group of (1 << flag)
-
-where ``flag`` is defined as below:
-
-================================= =========== ================================
-flag                              value       meaning
-================================= =========== ================================
-KVM_FEATURE_CLOCKSOURCE           0           kvmclock available at msrs
-                                              0x11 and 0x12
-
-KVM_FEATURE_NOP_IO_DELAY          1           not necessary to perform delays
-                                              on PIO operations
-
-KVM_FEATURE_MMU_OP                2           deprecated
-
-KVM_FEATURE_CLOCKSOURCE2          3           kvmclock available at msrs
-
-                                              0x4b564d00 and 0x4b564d01
-KVM_FEATURE_ASYNC_PF              4           async pf can be enabled by
-                                              writing to msr 0x4b564d02
-
-KVM_FEATURE_STEAL_TIME            5           steal time can be enabled by
-                                              writing to msr 0x4b564d03
-
-KVM_FEATURE_PV_EOI                6           paravirtualized end of interrupt
-                                              handler can be enabled by
-                                              writing to msr 0x4b564d04
-
-KVM_FEATURE_PV_UNHAULT            7           guest checks this feature bit
-                                              before enabling paravirtualized
-                                              spinlock support
-
-KVM_FEATURE_PV_TLB_FLUSH          9           guest checks this feature bit
-                                              before enabling paravirtualized
-                                              tlb flush
-
-KVM_FEATURE_ASYNC_PF_VMEXIT       10          paravirtualized async PF VM EXIT
-                                              can be enabled by setting bit 2
-                                              when writing to msr 0x4b564d02
-
-KVM_FEATURE_PV_SEND_IPI           11          guest checks this feature bit
-                                              before enabling paravirtualized
-                                              sebd IPIs
-
-KVM_FEATURE_PV_POLL_CONTROL       12          host-side polling on HLT can
-                                              be disabled by writing
-                                              to msr 0x4b564d05.
-
-KVM_FEATURE_PV_SCHED_YIELD        13          guest checks this feature bit
-                                              before using paravirtualized
-                                              sched yield.
-
-KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24          host will warn if no guest-side
-                                              per-cpu warps are expeced in
-                                              kvmclock
-================================= =========== ================================
-
-::
-
-      edx = an OR'ed group of (1 << flag)
-
-Where ``flag`` here is defined as below:
-
-================== ============ =================================
-flag               value        meaning
-================== ============ =================================
-KVM_HINTS_REALTIME 0            guest checks this feature bit to
-                                determine that vCPUs are never
-                                preempted for an unlimited time
-                                allowing optimizations
-================== ============ =================================
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
deleted file mode 100644
index 34a69834124a..000000000000
--- a/Documentation/virtual/kvm/devices/README
+++ /dev/null
@@ -1 +0,0 @@
-This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
deleted file mode 100644
index eeaa95b893a8..000000000000
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-ARM Virtual Interrupt Translation Service (ITS)
-===============================================
-
-Device types supported:
-  KVM_DEV_TYPE_ARM_VGIC_ITS    ARM Interrupt Translation Service Controller
-
-The ITS allows MSI(-X) interrupts to be injected into guests. This extension is
-optional.  Creating a virtual ITS controller also requires a host GICv3 (see
-arm-vgic-v3.txt), but does not depend on having physical ITS controllers.
-
-There can be multiple ITS controllers per guest, each of them has to have
-a separate, non-overlapping MMIO region.
-
-
-Groups:
-  KVM_DEV_ARM_VGIC_GRP_ADDR
-  Attributes:
-    KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit)
-      Base address in the guest physical address space of the GICv3 ITS
-      control register frame.
-      This address needs to be 64K aligned and the region covers 128K.
-  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address
-    -EEXIST: Address already configured
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENODEV: Incorrect attribute or the ITS is not supported.
-
-
-  KVM_DEV_ARM_VGIC_GRP_CTRL
-  Attributes:
-    KVM_DEV_ARM_VGIC_CTRL_INIT
-      request the initialization of the ITS, no additional parameter in
-      kvm_device_attr.addr.
-
-    KVM_DEV_ARM_ITS_CTRL_RESET
-      reset the ITS, no additional parameter in kvm_device_attr.addr.
-      See "ITS Reset State" section.
-
-    KVM_DEV_ARM_ITS_SAVE_TABLES
-      save the ITS table data into guest RAM, at the location provisioned
-      by the guest in corresponding registers/table entries.
-
-      The layout of the tables in guest memory defines an ABI. The entries
-      are laid out in little endian format as described in the last paragraph.
-
-    KVM_DEV_ARM_ITS_RESTORE_TABLES
-      restore the ITS tables from guest RAM to ITS internal structures.
-
-      The GICV3 must be restored before the ITS and all ITS registers but
-      the GITS_CTLR must be restored before restoring the ITS tables.
-
-      The GITS_IIDR read-only register must also be restored before
-      calling KVM_DEV_ARM_ITS_RESTORE_TABLES as the IIDR revision field
-      encodes the ABI revision.
-
-      The expected ordering when restoring the GICv3/ITS is described in section
-      "ITS Restore Sequence".
-
-  Errors:
-    -ENXIO:  ITS not properly configured as required prior to setting
-             this attribute
-    -ENOMEM: Memory shortage when allocating ITS internal data
-    -EINVAL: Inconsistent restored data
-    -EFAULT: Invalid guest ram access
-    -EBUSY:  One or more VCPUS are running
-    -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
-	     state is not available
-
-  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
-  Attributes:
-      The attr field of kvm_device_attr encodes the offset of the
-      ITS register, relative to the ITS control frame base address
-      (ITS_base).
-
-      kvm_device_attr.addr points to a __u64 value whatever the width
-      of the addressed register (32/64 bits). 64 bit registers can only
-      be accessed with full length.
-
-      Writes to read-only registers are ignored by the kernel except for:
-      - GITS_CREADR. It must be restored otherwise commands in the queue
-        will be re-executed after restoring CWRITER. GITS_CREADR must be
-        restored before restoring the GITS_CTLR which is likely to enable the
-        ITS. Also it must be restored after GITS_CBASER since a write to
-        GITS_CBASER resets GITS_CREADR.
-      - GITS_IIDR. The Revision field encodes the table layout ABI revision.
-        In the future we might implement direct injection of virtual LPIs.
-        This will require an upgrade of the table layout and an evolution of
-        the ABI. GITS_IIDR must be restored before calling
-        KVM_DEV_ARM_ITS_RESTORE_TABLES.
-
-      For other registers, getting or setting a register has the same
-      effect as reading/writing the register on real hardware.
-  Errors:
-    -ENXIO: Offset does not correspond to any supported register
-    -EFAULT: Invalid user pointer for attr->addr
-    -EINVAL: Offset is not 64-bit aligned
-    -EBUSY: one or more VCPUS are running
-
- ITS Restore Sequence:
- -------------------------
-
-The following ordering must be followed when restoring the GIC and the ITS:
-a) restore all guest memory and create vcpus
-b) restore all redistributors
-c) provide the ITS base address
-   (KVM_DEV_ARM_VGIC_GRP_ADDR)
-d) restore the ITS in the following order:
-   1. Restore GITS_CBASER
-   2. Restore all other GITS_ registers, except GITS_CTLR!
-   3. Load the ITS table data (KVM_DEV_ARM_ITS_RESTORE_TABLES)
-   4. Restore GITS_CTLR
-
-Then vcpus can be started.
-
- ITS Table ABI REV0:
- -------------------
-
- Revision 0 of the ABI only supports the features of a virtual GICv3, and does
- not support a virtual GICv4 with support for direct injection of virtual
- interrupts for nested hypervisors.
-
- The device table and ITT are indexed by the DeviceID and EventID,
- respectively. The collection table is not indexed by CollectionID, and the
- entries in the collection are listed in no particular order.
- All entries are 8 bytes.
-
- Device Table Entry (DTE):
-
- bits:     | 63| 62 ... 49 | 48 ... 5 | 4 ... 0 |
- values:   | V |   next    | ITT_addr |  Size   |
-
- where;
- - V indicates whether the entry is valid. If not, other fields
-   are not meaningful.
- - next: equals to 0 if this entry is the last one; otherwise it
-   corresponds to the DeviceID offset to the next DTE, capped by
-   2^14 -1.
- - ITT_addr matches bits [51:8] of the ITT address (256 Byte aligned).
- - Size specifies the supported number of bits for the EventID,
-   minus one
-
- Collection Table Entry (CTE):
-
- bits:     | 63| 62 ..  52  | 51 ... 16 | 15  ...   0 |
- values:   | V |    RES0    |  RDBase   |    ICID     |
-
- where:
- - V indicates whether the entry is valid. If not, other fields are
-   not meaningful.
- - RES0: reserved field with Should-Be-Zero-or-Preserved behavior.
- - RDBase is the PE number (GICR_TYPER.Processor_Number semantic),
- - ICID is the collection ID
-
- Interrupt Translation Entry (ITE):
-
- bits:     | 63 ... 48 | 47 ... 16 | 15 ... 0 |
- values:   |    next   |   pINTID  |  ICID    |
-
- where:
- - next: equals to 0 if this entry is the last one; otherwise it corresponds
-   to the EventID offset to the next ITE capped by 2^16 -1.
- - pINTID is the physical LPI ID; if zero, it means the entry is not valid
-   and other fields are not meaningful.
- - ICID is the collection ID
-
- ITS Reset State:
- ----------------
-
-RESET returns the ITS to the same state that it was when first created and
-initialized. When the RESET command returns, the following things are
-guaranteed:
-
-- The ITS is not enabled and quiescent
-  GITS_CTLR.Enabled = 0 .Quiescent=1
-- There is no internally cached state
-- No collection or device table are used
-  GITS_BASER<n>.Valid = 0
-- GITS_CBASER = 0, GITS_CREADR = 0, GITS_CWRITER = 0
-- The ABI version is unchanged and remains the one set when the ITS
-  device was first created.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
deleted file mode 100644
index ff290b43c8e5..000000000000
--- a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+++ /dev/null
@@ -1,251 +0,0 @@
-ARM Virtual Generic Interrupt Controller v3 and later (VGICv3)
-==============================================================
-
-
-Device types supported:
-  KVM_DEV_TYPE_ARM_VGIC_V3     ARM Generic Interrupt Controller v3.0
-
-Only one VGIC instance may be instantiated through this API.  The created VGIC
-will act as the VM interrupt controller, requiring emulated user-space devices
-to inject interrupts to the VGIC instead of directly to CPUs.  It is not
-possible to create both a GICv3 and GICv2 on the same VM.
-
-Creating a guest GICv3 device requires a host GICv3 as well.
-
-
-Groups:
-  KVM_DEV_ARM_VGIC_GRP_ADDR
-  Attributes:
-    KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
-      Base address in the guest physical address space of the GICv3 distributor
-      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
-      This address needs to be 64K aligned and the region covers 64 KByte.
-
-    KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
-      Base address in the guest physical address space of the GICv3
-      redistributor register mappings. There are two 64K pages for each
-      VCPU and all of the redistributor pages are contiguous.
-      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
-      This address needs to be 64K aligned.
-
-    KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION (rw, 64-bit)
-      The attribute data pointed to by kvm_device_attr.addr is a __u64 value:
-      bits:     | 63   ....  52  |  51   ....   16 | 15 - 12  |11 - 0
-      values:   |     count      |       base      |  flags   | index
-      - index encodes the unique redistributor region index
-      - flags: reserved for future use, currently 0
-      - base field encodes bits [51:16] of the guest physical base address
-        of the first redistributor in the region.
-      - count encodes the number of redistributors in the region. Must be
-        greater than 0.
-      There are two 64K pages for each redistributor in the region and
-      redistributors are laid out contiguously within the region. Regions
-      are filled with redistributors in the index order. The sum of all
-      region count fields must be greater than or equal to the number of
-      VCPUs. Redistributor regions must be registered in the incremental
-      index order, starting from index 0.
-      The characteristics of a specific redistributor region can be read
-      by presetting the index field in the attr data.
-      Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
-
-  It is invalid to mix calls with KVM_VGIC_V3_ADDR_TYPE_REDIST and
-  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION attributes.
-
-  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address, bad redistributor region
-             count/index, mixed redistributor region attribute usage
-    -EEXIST: Address already configured
-    -ENOENT: Attempt to read the characteristics of a non existing
-             redistributor region
-    -ENXIO:  The group or attribute is unknown/unsupported for this device
-             or hardware support is missing.
-    -EFAULT: Invalid user pointer for attr->addr.
-
-
-  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
-  KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  32  |  31   ....    0 |
-    values:   |      mpidr     |      offset     |
-
-    All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a
-    __u32 value.  64-bit registers must be accessed by separately accessing the
-    lower and higher word.
-
-    Writes to read-only registers are ignored by the kernel.
-
-    KVM_DEV_ARM_VGIC_GRP_DIST_REGS accesses the main distributor registers.
-    KVM_DEV_ARM_VGIC_GRP_REDIST_REGS accesses the redistributor of the CPU
-    specified by the mpidr.
-
-    The offset is relative to the "[Re]Distributor base address" as defined
-    in the GICv3/4 specs.  Getting or setting such a register has the same
-    effect as reading or writing the register on real hardware, except for the
-    following registers: GICD_STATUSR, GICR_STATUSR, GICD_ISPENDR,
-    GICR_ISPENDR0, GICD_ICPENDR, and GICR_ICPENDR0.  These registers behave
-    differently when accessed via this interface compared to their
-    architecturally defined behavior to allow software a full view of the
-    VGIC's internal state.
-
-    The mpidr field is used to specify which
-    redistributor is accessed.  The mpidr is ignored for the distributor.
-
-    The mpidr encoding is based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
-      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
-      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
-
-    Note that distributor fields are not banked, but return the same value
-    regardless of the mpidr used to access the register.
-
-    GICD_IIDR.Revision is updated when the KVM implementation is changed in a
-    way directly observable by the guest or userspace.  Userspace should read
-    GICD_IIDR from KVM and write back the read value to confirm its expected
-    behavior is aligned with the KVM implementation.  Userspace should set
-    GICD_IIDR before setting any other registers to ensure the expected
-    behavior.
-
-
-    The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
-    that a write of a clear bit has no effect, whereas a write with a set bit
-    clears that value.  To allow userspace to freely set the values of these two
-    registers, setting the attributes with the register offsets for these two
-    registers simply sets the non-reserved bits to the value written.
-
-
-    Accesses (reads and writes) to the GICD_ISPENDR register region and
-    GICR_ISPENDR0 registers get/set the value of the latched pending state for
-    the interrupts.
-
-    This is identical to the value returned by a guest read from ISPENDR for an
-    edge triggered interrupt, but may differ for level triggered interrupts.
-    For edge triggered interrupts, once an interrupt becomes pending (whether
-    because of an edge detected on the input line or because of a guest write
-    to ISPENDR) this state is "latched", and only cleared when either the
-    interrupt is activated or when the guest writes to ICPENDR. A level
-    triggered interrupt may be pending either because the level input is held
-    high by a device, or because of a guest write to the ISPENDR register. Only
-    ISPENDR writes are latched; if the device lowers the line level then the
-    interrupt is no longer pending unless the guest also wrote to ISPENDR, and
-    conversely writes to ICPENDR or activations of the interrupt do not clear
-    the pending status if the line level is still being held high.  (These
-    rules are documented in the GICv3 specification descriptions of the ICPENDR
-    and ISPENDR registers.) For a level triggered interrupt the value accessed
-    here is that of the latch which is set by ISPENDR and cleared by ICPENDR or
-    interrupt activation, whereas the value returned by a guest read from
-    ISPENDR is the logical OR of the latch value and the input line level.
-
-    Raw access to the latch state is provided to userspace so that it can save
-    and restore the entire GIC internal state (which is defined by the
-    combination of the current input line level and the latch state, and cannot
-    be deduced from purely the line level and the value of the ISPENDR
-    registers).
-
-    Accesses to GICD_ICPENDR register region and GICR_ICPENDR0 registers have
-    RAZ/WI semantics, meaning that reads always return 0 and writes are always
-    ignored.
-
-  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
-
-
-  KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63      ....       32 | 31  ....  16 | 15  ....  0 |
-    values:   |         mpidr         |      RES     |    instr    |
-
-    The mpidr field encodes the CPU ID based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
-      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
-      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
-
-    The instr field encodes the system register to access based on the fields
-    defined in the A64 instruction set encoding for system register access
-    (RES means the bits are reserved for future use and should be zero):
-
-      | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 |
-      |   Op 0    |    Op1    |    CRn   |   CRm   |   Op2   |
-
-    All system regs accessed through this API are (rw, 64-bit) and
-    kvm_device_attr.addr points to a __u64 value.
-
-    KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS accesses the CPU interface registers for the
-    CPU specified by the mpidr field.
-
-    CPU interface registers access is not implemented for AArch32 mode.
-    Error -ENXIO is returned when accessed in AArch32 mode.
-  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: VCPU is running
-    -EINVAL: Invalid mpidr or register value supplied
-
-
-  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
-  Attributes:
-    A value describing the number of interrupts (SGI, PPI and SPI) for
-    this GIC instance, ranging from 64 to 1024, in increments of 32.
-
-    kvm_device_attr.addr points to a __u32 value.
-
-  Errors:
-    -EINVAL: Value set is out of the expected range
-    -EBUSY: Value has already be set.
-
-
-  KVM_DEV_ARM_VGIC_GRP_CTRL
-  Attributes:
-    KVM_DEV_ARM_VGIC_CTRL_INIT
-      request the initialization of the VGIC, no additional parameter in
-      kvm_device_attr.addr.
-    KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES
-      save all LPI pending bits into guest RAM pending tables.
-
-      The first kB of the pending table is not altered by this operation.
-  Errors:
-    -ENXIO: VGIC not properly configured as required prior to calling
-     this attribute
-    -ENODEV: no online VCPU
-    -ENOMEM: memory shortage when allocating vgic internal data
-    -EFAULT: Invalid guest ram access
-    -EBUSY:  One or more VCPUS are running
-
-
-  KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO
-  Attributes:
-    The attr field of kvm_device_attr encodes the following values:
-    bits:     | 63      ....       32 | 31   ....    10 | 9  ....  0 |
-    values:   |         mpidr         |      info       |   vINTID   |
-
-    The vINTID specifies which set of IRQs is reported on.
-
-    The info field specifies which information userspace wants to get or set
-    using this interface.  Currently we support the following info values:
-
-      VGIC_LEVEL_INFO_LINE_LEVEL:
-	Get/Set the input level of the IRQ line for a set of 32 contiguously
-	numbered interrupts.
-	vINTID must be a multiple of 32.
-
-	kvm_device_attr.addr points to a __u32 value which will contain a
-	bitmap where a set bit means the interrupt level is asserted.
-
-	Bit[n] indicates the status for interrupt vINTID + n.
-
-    SGIs and any interrupt with a higher ID than the number of interrupts
-    supported, will be RAZ/WI.  LPIs are always edge-triggered and are
-    therefore not supported by this interface.
-
-    PPIs are reported per VCPU as specified in the mpidr field, and SPIs are
-    reported with the same value regardless of the mpidr specified.
-
-    The mpidr field encodes the CPU ID based on the affinity information in the
-    architecture defined MPIDR, and the field is encoded as follows:
-      | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 |
-      |    Aff3    |    Aff2    |    Aff1    |    Aff0    |
-  Errors:
-    -EINVAL: vINTID is not multiple of 32 or
-     info field is not VGIC_LEVEL_INFO_LINE_LEVEL
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
deleted file mode 100644
index 97b6518148f8..000000000000
--- a/Documentation/virtual/kvm/devices/arm-vgic.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-ARM Virtual Generic Interrupt Controller v2 (VGIC)
-==================================================
-
-Device types supported:
-  KVM_DEV_TYPE_ARM_VGIC_V2     ARM Generic Interrupt Controller v2.0
-
-Only one VGIC instance may be instantiated through either this API or the
-legacy KVM_CREATE_IRQCHIP API.  The created VGIC will act as the VM interrupt
-controller, requiring emulated user-space devices to inject interrupts to the
-VGIC instead of directly to CPUs.
-
-GICv3 implementations with hardware compatibility support allow creating a
-guest GICv2 through this interface.  For information on creating a guest GICv3
-device and guest ITS devices, see arm-vgic-v3.txt.  It is not possible to
-create both a GICv3 and GICv2 device on the same VM.
-
-
-Groups:
-  KVM_DEV_ARM_VGIC_GRP_ADDR
-  Attributes:
-    KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
-      Base address in the guest physical address space of the GIC distributor
-      register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
-      This address needs to be 4K aligned and the region covers 4 KByte.
-
-    KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
-      Base address in the guest physical address space of the GIC virtual cpu
-      interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
-      This address needs to be 4K aligned and the region covers 4 KByte.
-  Errors:
-    -E2BIG:  Address outside of addressable IPA range
-    -EINVAL: Incorrectly aligned address
-    -EEXIST: Address already configured
-    -ENXIO:  The group or attribute is unknown/unsupported for this device
-             or hardware support is missing.
-    -EFAULT: Invalid user pointer for attr->addr.
-
-  KVM_DEV_ARM_VGIC_GRP_DIST_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
-    values:   |    reserved   | vcpu_index |      offset     |
-
-    All distributor regs are (rw, 32-bit)
-
-    The offset is relative to the "Distributor base address" as defined in the
-    GICv2 specs.  Getting or setting such a register has the same effect as
-    reading or writing the register on the actual hardware from the cpu whose
-    index is specified with the vcpu_index field.  Note that most distributor
-    fields are not banked, but return the same value regardless of the
-    vcpu_index used to access the register.
-
-    GICD_IIDR.Revision is updated when the KVM implementation of an emulated
-    GICv2 is changed in a way directly observable by the guest or userspace.
-    Userspace should read GICD_IIDR from KVM and write back the read value to
-    confirm its expected behavior is aligned with the KVM implementation.
-    Userspace should set GICD_IIDR before setting any other registers (both
-    KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
-    the expected behavior. Unless GICD_IIDR has been set from userspace, writes
-    to the interrupt group registers (GICD_IGROUPR) are ignored.
-  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
-    -EINVAL: Invalid vcpu_index supplied
-
-  KVM_DEV_ARM_VGIC_GRP_CPU_REGS
-  Attributes:
-    The attr field of kvm_device_attr encodes two values:
-    bits:     | 63   ....  40 | 39 ..  32  |  31   ....    0 |
-    values:   |    reserved   | vcpu_index |      offset     |
-
-    All CPU interface regs are (rw, 32-bit)
-
-    The offset specifies the offset from the "CPU interface base address" as
-    defined in the GICv2 specs.  Getting or setting such a register has the
-    same effect as reading or writing the register on the actual hardware.
-
-    The Active Priorities Registers APRn are implementation defined, so we set a
-    fixed format for our implementation that fits with the model of a "GICv2
-    implementation without the security extensions" which we present to the
-    guest.  This interface always exposes four register APR[0-3] describing the
-    maximum possible 128 preemption levels.  The semantics of the register
-    indicate if any interrupts in a given preemption level are in the active
-    state by setting the corresponding bit.
-
-    Thus, preemption level X has one or more active interrupts if and only if:
-
-      APRn[X mod 32] == 0b1,  where n = X / 32
-
-    Bits for undefined preemption levels are RAZ/WI.
-
-    Note that this differs from a CPU's view of the APRs on hardware in which
-    a GIC without the security extensions expose group 0 and group 1 active
-    priorities in separate register groups, whereas we show a combined view
-    similar to GICv2's GICH_APR.
-
-    For historical reasons and to provide ABI compatibility with userspace we
-    export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
-    field in the lower 5 bits of a word, meaning that userspace must always
-    use the lower 5 bits to communicate with the KVM device and must shift the
-    value left by 3 places to obtain the actual priority mask level.
-
-  Errors:
-    -ENXIO: Getting or setting this register is not yet supported
-    -EBUSY: One or more VCPUs are running
-    -EINVAL: Invalid vcpu_index supplied
-
-  KVM_DEV_ARM_VGIC_GRP_NR_IRQS
-  Attributes:
-    A value describing the number of interrupts (SGI, PPI and SPI) for
-    this GIC instance, ranging from 64 to 1024, in increments of 32.
-
-  Errors:
-    -EINVAL: Value set is out of the expected range
-    -EBUSY: Value has already be set, or GIC has already been initialized
-            with default values.
-
-  KVM_DEV_ARM_VGIC_GRP_CTRL
-  Attributes:
-    KVM_DEV_ARM_VGIC_CTRL_INIT
-      request the initialization of the VGIC or ITS, no additional parameter
-      in kvm_device_attr.addr.
-  Errors:
-    -ENXIO: VGIC not properly configured as required prior to calling
-     this attribute
-    -ENODEV: no online VCPU
-    -ENOMEM: memory shortage when allocating vgic internal data
diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt
deleted file mode 100644
index 8257397adc3c..000000000000
--- a/Documentation/virtual/kvm/devices/mpic.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-MPIC interrupt controller
-=========================
-
-Device types supported:
-  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
-  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
-
-Only one MPIC instance, of any type, may be instantiated.  The created
-MPIC will act as the system interrupt controller, connecting to each
-vcpu's interrupt inputs.
-
-Groups:
-  KVM_DEV_MPIC_GRP_MISC
-  Attributes:
-    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
-      Base address of the 256 KiB MPIC register space.  Must be
-      naturally aligned.  A value of zero disables the mapping.
-      Reset value is zero.
-
-  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
-    Access an MPIC register, as if the access were made from the guest.
-    "attr" is the byte offset into the MPIC register space.  Accesses
-    must be 4-byte aligned.
-
-    MSIs may be signaled by using this attribute group to write
-    to the relevant MSIIR.
-
-  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
-    IRQ input line for each standard openpic source.  0 is inactive and 1
-    is active, regardless of interrupt sense.
-
-    For edge-triggered interrupts:  Writing 1 is considered an activating
-    edge, and writing 0 is ignored.  Reading returns 1 if a previously
-    signaled edge has not been acknowledged, and 0 otherwise.
-
-    "attr" is the IRQ number.  IRQ numbers for standard sources are the
-    byte offset of the relevant IVPR from EIVPR0, divided by 32.
-
-IRQ Routing:
-
-  The MPIC emulation supports IRQ routing. Only a single MPIC device can
-  be instantiated. Once that device has been created, it's available as
-  irqchip id 0.
-
-  This irqchip 0 has 256 interrupt pins, which expose the interrupts in
-  the main array of interrupt sources (a.k.a. "SRC" interrupts).
-
-  The numbering is the same as the MPIC device tree binding -- based on
-  the register offset from the beginning of the sources array, without
-  regard to any subdivisions in chip documentation such as "internal"
-  or "external" interrupts.
-
-  Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
deleted file mode 100644
index a4e20a090174..000000000000
--- a/Documentation/virtual/kvm/devices/s390_flic.txt
+++ /dev/null
@@ -1,163 +0,0 @@
-FLIC (floating interrupt controller)
-====================================
-
-FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
-machine check interruptions. All interrupts are stored in a per-vm list of
-pending interrupts. FLIC performs operations on this list.
-
-Only one FLIC instance may be instantiated.
-
-FLIC provides support to
-- add interrupts (KVM_DEV_FLIC_ENQUEUE)
-- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
-- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
-- purge one pending floating I/O interrupt (KVM_DEV_FLIC_CLEAR_IO_IRQ)
-- enable/disable for the guest transparent async page faults
-- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
-- modify AIS (adapter-interruption-suppression) mode state (KVM_DEV_FLIC_AISM)
-- inject adapter interrupts on a specified adapter (KVM_DEV_FLIC_AIRQ_INJECT)
-- get/set all AIS mode states (KVM_DEV_FLIC_AISM_ALL)
-
-Groups:
-  KVM_DEV_FLIC_ENQUEUE
-    Passes a buffer and length into the kernel which are then injected into
-    the list of pending interrupts.
-    attr->addr contains the pointer to the buffer and attr->attr contains
-    the length of the buffer.
-    The format of the data structure kvm_s390_irq as it is copied from userspace
-    is defined in usr/include/linux/kvm.h.
-
-  KVM_DEV_FLIC_GET_ALL_IRQS
-    Copies all floating interrupts into a buffer provided by userspace.
-    When the buffer is too small it returns -ENOMEM, which is the indication
-    for userspace to try again with a bigger buffer.
-    -ENOBUFS is returned when the allocation of a kernelspace buffer has
-    failed.
-    -EFAULT is returned when copying data to userspace failed.
-    All interrupts remain pending, i.e. are not deleted from the list of
-    currently pending interrupts.
-    attr->addr contains the userspace address of the buffer into which all
-    interrupt data will be copied.
-    attr->attr contains the size of the buffer in bytes.
-
-  KVM_DEV_FLIC_CLEAR_IRQS
-    Simply deletes all elements from the list of currently pending floating
-    interrupts.  No interrupts are injected into the guest.
-
-  KVM_DEV_FLIC_CLEAR_IO_IRQ
-    Deletes one (if any) I/O interrupt for a subchannel identified by the
-    subsystem identification word passed via the buffer specified by
-    attr->addr (address) and attr->attr (length).
-
-  KVM_DEV_FLIC_APF_ENABLE
-    Enables async page faults for the guest. So in case of a major page fault
-    the host is allowed to handle this async and continues the guest.
-
-  KVM_DEV_FLIC_APF_DISABLE_WAIT
-    Disables async page faults for the guest and waits until already pending
-    async page faults are done. This is necessary to trigger a completion interrupt
-    for every init interrupt before migrating the interrupt list.
-
-  KVM_DEV_FLIC_ADAPTER_REGISTER
-    Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
-    describing the adapter to register:
-
-struct kvm_s390_io_adapter {
-	__u32 id;
-	__u8 isc;
-	__u8 maskable;
-	__u8 swap;
-	__u8 flags;
-};
-
-   id contains the unique id for the adapter, isc the I/O interruption subclass
-   to use, maskable whether this adapter may be masked (interrupts turned off),
-   swap whether the indicators need to be byte swapped, and flags contains
-   further characteristics of the adapter.
-   Currently defined values for 'flags' are:
-   - KVM_S390_ADAPTER_SUPPRESSIBLE: adapter is subject to AIS
-     (adapter-interrupt-suppression) facility. This flag only has an effect if
-     the AIS capability is enabled.
-   Unknown flag values are ignored.
-
-
-  KVM_DEV_FLIC_ADAPTER_MODIFY
-    Modifies attributes of an existing I/O adapter interrupt source. Takes
-    a kvm_s390_io_adapter_req specifying the adapter and the operation:
-
-struct kvm_s390_io_adapter_req {
-	__u32 id;
-	__u8 type;
-	__u8 mask;
-	__u16 pad0;
-	__u64 addr;
-};
-
-    id specifies the adapter and type the operation. The supported operations
-    are:
-
-    KVM_S390_IO_ADAPTER_MASK
-      mask or unmask the adapter, as specified in mask
-
-    KVM_S390_IO_ADAPTER_MAP
-      perform a gmap translation for the guest address provided in addr,
-      pin a userspace page for the translated address and add it to the
-      list of mappings
-      Note: A new mapping will be created unconditionally; therefore,
-            the calling code should avoid making duplicate mappings.
-
-    KVM_S390_IO_ADAPTER_UNMAP
-      release a userspace page for the translated address specified in addr
-      from the list of mappings
-
-  KVM_DEV_FLIC_AISM
-    modify the adapter-interruption-suppression mode for a given isc if the
-    AIS capability is enabled. Takes a kvm_s390_ais_req describing:
-
-struct kvm_s390_ais_req {
-	__u8 isc;
-	__u16 mode;
-};
-
-    isc contains the target I/O interruption subclass, mode the target
-    adapter-interruption-suppression mode. The following modes are
-    currently supported:
-    - KVM_S390_AIS_MODE_ALL: ALL-Interruptions Mode, i.e. airq injection
-      is always allowed;
-    - KVM_S390_AIS_MODE_SINGLE: SINGLE-Interruption Mode, i.e. airq
-      injection is only allowed once and the following adapter interrupts
-      will be suppressed until the mode is set again to ALL-Interruptions
-      or SINGLE-Interruption mode.
-
-  KVM_DEV_FLIC_AIRQ_INJECT
-    Inject adapter interrupts on a specified adapter.
-    attr->attr contains the unique id for the adapter, which allows for
-    adapter-specific checks and actions.
-    For adapters subject to AIS, handle the airq injection suppression for
-    an isc according to the adapter-interruption-suppression mode on condition
-    that the AIS capability is enabled.
-
-  KVM_DEV_FLIC_AISM_ALL
-    Gets or sets the adapter-interruption-suppression mode for all ISCs. Takes
-    a kvm_s390_ais_all describing:
-
-struct kvm_s390_ais_all {
-       __u8 simm; /* Single-Interruption-Mode mask */
-       __u8 nimm; /* No-Interruption-Mode mask *
-};
-
-    simm contains Single-Interruption-Mode mask for all ISCs, nimm contains
-    No-Interruption-Mode mask for all ISCs. Each bit in simm and nimm corresponds
-    to an ISC (MSB0 bit 0 to ISC 0 and so on). The combination of simm bit and
-    nimm bit presents AIS mode for a ISC.
-
-    KVM_DEV_FLIC_AISM_ALL is indicated by KVM_CAP_S390_AIS_MIGRATION.
-
-Note: The KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR device ioctls executed on
-FLIC with an unknown group or attribute gives the error code EINVAL (instead of
-ENXIO, as specified in the API documentation). It is not possible to conclude
-that a FLIC operation is unavailable based on the error code resulting from a
-usage attempt.
-
-Note: The KVM_DEV_FLIC_CLEAR_IO_IRQ ioctl will return EINVAL in case a zero
-schid is specified.
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt
deleted file mode 100644
index 2b5dab16c4f2..000000000000
--- a/Documentation/virtual/kvm/devices/vcpu.txt
+++ /dev/null
@@ -1,62 +0,0 @@
-Generic vcpu interface
-====================================
-
-The virtual cpu "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
-KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same struct
-kvm_device_attr as other devices, but targets VCPU-wide settings and controls.
-
-The groups and attributes per virtual cpu, if any, are architecture specific.
-
-1. GROUP: KVM_ARM_VCPU_PMU_V3_CTRL
-Architectures: ARM64
-
-1.1. ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_IRQ
-Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a
-            pointer to an int
-Returns: -EBUSY: The PMU overflow interrupt is already set
-         -ENXIO: The overflow interrupt not set when attempting to get it
-         -ENODEV: PMUv3 not supported
-         -EINVAL: Invalid PMU overflow interrupt number supplied or
-                  trying to set the IRQ number without using an in-kernel
-                  irqchip.
-
-A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt
-number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt
-type must be same for each vcpu. As a PPI, the interrupt number is the same for
-all vcpus, while as an SPI it must be a separate number per vcpu.
-
-1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT
-Parameters: no additional parameter in kvm_device_attr.addr
-Returns: -ENODEV: PMUv3 not supported or GIC not initialized
-         -ENXIO: PMUv3 not properly configured or in-kernel irqchip not
-                 configured as required prior to calling this attribute
-         -EBUSY: PMUv3 already initialized
-
-Request the initialization of the PMUv3.  If using the PMUv3 with an in-kernel
-virtual GIC implementation, this must be done after initializing the in-kernel
-irqchip.
-
-
-2. GROUP: KVM_ARM_VCPU_TIMER_CTRL
-Architectures: ARM,ARM64
-
-2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER
-2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER
-Parameters: in kvm_device_attr.addr the address for the timer interrupt is a
-            pointer to an int
-Returns: -EINVAL: Invalid timer interrupt number
-         -EBUSY:  One or more VCPUs has already run
-
-A value describing the architected timer interrupt number when connected to an
-in-kernel virtual GIC.  These must be a PPI (16 <= intid < 32).  Setting the
-attribute overrides the default values (see below).
-
-KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27)
-KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30)
-
-Setting the same PPI for different timers will prevent the VCPUs from running.
-Setting the interrupt number on a VCPU configures all VCPUs created at that
-time to use the number provided for a given timer, overwriting any previously
-configured values on other VCPUs.  Userspace should configure the interrupt
-numbers on at least one VCPU after creating all VCPUs and before running any
-VCPUs.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
deleted file mode 100644
index 528c77c8022c..000000000000
--- a/Documentation/virtual/kvm/devices/vfio.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-VFIO virtual device
-===================
-
-Device types supported:
-  KVM_DEV_TYPE_VFIO
-
-Only one VFIO instance may be created per VM.  The created device
-tracks VFIO groups in use by the VM and features of those groups
-important to the correctness and acceleration of the VM.  As groups
-are enabled and disabled for use by the VM, KVM should be updated
-about their presence.  When registered with KVM, a reference to the
-VFIO-group is held by KVM.
-
-Groups:
-  KVM_DEV_VFIO_GROUP
-
-KVM_DEV_VFIO_GROUP attributes:
-  KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
-	kvm_device_attr.addr points to an int32_t file descriptor
-	for the VFIO group.
-  KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
-	kvm_device_attr.addr points to an int32_t file descriptor
-	for the VFIO group.
-  KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE: attaches a guest visible TCE table
-	allocated by sPAPR KVM.
-	kvm_device_attr.addr points to a struct:
-
-	struct kvm_vfio_spapr_tce {
-		__s32	groupfd;
-		__s32	tablefd;
-	};
-
-	where
-	@groupfd is a file descriptor for a VFIO group;
-	@tablefd is a file descriptor for a TCE table allocated via
-		KVM_CREATE_SPAPR_TCE.
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
deleted file mode 100644
index 4ffb82b02468..000000000000
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ /dev/null
@@ -1,270 +0,0 @@
-Generic vm interface
-====================================
-
-The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
-KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
-struct kvm_device_attr as other devices, but targets VM-wide settings
-and controls.
-
-The groups and attributes per virtual machine, if any, are architecture
-specific.
-
-1. GROUP: KVM_S390_VM_MEM_CTRL
-Architectures: s390
-
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
-Parameters: none
-Returns: -EBUSY if a vcpu is already defined, otherwise 0
-
-Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
-
-1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
-Parameters: none
-Returns: -EINVAL if CMMA was not enabled
-         0 otherwise
-
-Clear the CMMA status for all guest pages, so any pages the guest marked
-as unused are again used any may not be reclaimed by the host.
-
-1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
-Parameters: in attr->addr the address for the new limit of guest memory
-Returns: -EFAULT if the given address is not accessible
-         -EINVAL if the virtual machine is of type UCONTROL
-         -E2BIG if the given guest memory is to big for that machine
-         -EBUSY if a vcpu is already defined
-         -ENOMEM if not enough memory is available for a new shadow guest mapping
-          0 otherwise
-
-Allows userspace to query the actual limit and set a new limit for
-the maximum guest memory size. The limit will be rounded up to
-2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
-the number of page table levels. In the case that there is no limit we will set
-the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
-
-2. GROUP: KVM_S390_VM_CPU_MODEL
-Architectures: s390
-
-2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
-
-Allows user space to retrieve machine and kvm specific cpu related information:
-
-struct kvm_s390_vm_cpu_machine {
-       __u64 cpuid;           # CPUID of host
-       __u32 ibc;             # IBC level range offered by host
-       __u8  pad[4];
-       __u64 fac_mask[256];   # set of cpu facilities enabled by KVM
-       __u64 fac_list[256];   # set of cpu facilities offered by host
-}
-
-Parameters: address of buffer to store the machine related cpu data
-            of type struct kvm_s390_vm_cpu_machine*
-Returns:    -EFAULT if the given address is not accessible from kernel space
-	    -ENOMEM if not enough memory is available to process the ioctl
-	    0 in case of success
-
-2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
-
-Allows user space to retrieve or request to change cpu related information for a vcpu:
-
-struct kvm_s390_vm_cpu_processor {
-       __u64 cpuid;           # CPUID currently (to be) used by this vcpu
-       __u16 ibc;             # IBC level currently (to be) used by this vcpu
-       __u8  pad[6];
-       __u64 fac_list[256];   # set of cpu facilities currently (to be) used
-                              # by this vcpu
-}
-
-KVM does not enforce or limit the cpu model data in any form. Take the information
-retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
-setups. Instruction interceptions triggered by additionally set facility bits that
-are not handled by KVM need to by imlemented in the VM driver code.
-
-Parameters: address of buffer to store/set the processor related cpu
-	    data of type struct kvm_s390_vm_cpu_processor*.
-Returns:    -EBUSY in case 1 or more vcpus are already activated (only in write case)
-	    -EFAULT if the given address is not accessible from kernel space
-	    -ENOMEM if not enough memory is available to process the ioctl
-	    0 in case of success
-
-2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o)
-
-Allows user space to retrieve available cpu features. A feature is available if
-provided by the hardware and supported by kvm. In theory, cpu features could
-even be completely emulated by kvm.
-
-struct kvm_s390_vm_cpu_feat {
-        __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering
-};
-
-Parameters: address of a buffer to load the feature list from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    0 in case of success.
-
-2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w)
-
-Allows user space to retrieve or change enabled cpu features for all VCPUs of a
-VM. Features that are not available cannot be enabled.
-
-See 2.3. for a description of the parameter struct.
-
-Parameters: address of a buffer to store/load the feature list from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    -EINVAL if a cpu feature that is not available is to be enabled.
-	    -EBUSY if at least one VCPU has already been defined.
-	    0 in case of success.
-
-2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o)
-
-Allows user space to retrieve available cpu subfunctions without any filtering
-done by a set IBC. These subfunctions are indicated to the guest VCPU via
-query or "test bit" subfunctions and used e.g. by cpacf functions, plo and ptff.
-
-A subfunction block is only valid if KVM_S390_VM_CPU_MACHINE contains the
-STFL(E) bit introducing the affected instruction. If the affected instruction
-indicates subfunctions via a "query subfunction", the response block is
-contained in the returned struct. If the affected instruction
-indicates subfunctions via a "test bit" mechanism, the subfunction codes are
-contained in the returned struct in MSB 0 bit numbering.
-
-struct kvm_s390_vm_cpu_subfunc {
-       u8 plo[32];           # always valid (ESA/390 feature)
-       u8 ptff[16];          # valid with TOD-clock steering
-       u8 kmac[16];          # valid with Message-Security-Assist
-       u8 kmc[16];           # valid with Message-Security-Assist
-       u8 km[16];            # valid with Message-Security-Assist
-       u8 kimd[16];          # valid with Message-Security-Assist
-       u8 klmd[16];          # valid with Message-Security-Assist
-       u8 pckmo[16];         # valid with Message-Security-Assist-Extension 3
-       u8 kmctr[16];         # valid with Message-Security-Assist-Extension 4
-       u8 kmf[16];           # valid with Message-Security-Assist-Extension 4
-       u8 kmo[16];           # valid with Message-Security-Assist-Extension 4
-       u8 pcc[16];           # valid with Message-Security-Assist-Extension 4
-       u8 ppno[16];          # valid with Message-Security-Assist-Extension 5
-       u8 kma[16];           # valid with Message-Security-Assist-Extension 8
-       u8 kdsa[16];          # valid with Message-Security-Assist-Extension 9
-       u8 reserved[1792];    # reserved for future instructions
-};
-
-Parameters: address of a buffer to load the subfunction blocks from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    0 in case of success.
-
-2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w)
-
-Allows user space to retrieve or change cpu subfunctions to be indicated for
-all VCPUs of a VM. This attribute will only be available if kernel and
-hardware support are in place.
-
-The kernel uses the configured subfunction blocks for indication to
-the guest. A subfunction block will only be used if the associated STFL(E) bit
-has not been disabled by user space (so the instruction to be queried is
-actually available for the guest).
-
-As long as no data has been written, a read will fail. The IBC will be used
-to determine available subfunctions in this case, this will guarantee backward
-compatibility.
-
-See 2.5. for a description of the parameter struct.
-
-Parameters: address of a buffer to store/load the subfunction blocks from.
-Returns:    -EFAULT if the given address is not accessible from kernel space.
-	    -EINVAL when reading, if there was no write yet.
-	    -EBUSY if at least one VCPU has already been defined.
-	    0 in case of success.
-
-3. GROUP: KVM_S390_VM_TOD
-Architectures: s390
-
-3.1. ATTRIBUTE: KVM_S390_VM_TOD_HIGH
-
-Allows user space to set/get the TOD clock extension (u8) (superseded by
-KVM_S390_VM_TOD_EXT).
-
-Parameters: address of a buffer in user space to store the data (u8) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
-	    -EINVAL if setting the TOD clock extension to != 0 is not supported
-
-3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
-
-Allows user space to set/get bits 0-63 of the TOD clock register as defined in
-the POP (u64).
-
-Parameters: address of a buffer in user space to store the data (u64) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
-
-3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
-Allows user space to set/get bits 0-63 of the TOD clock register as defined in
-the POP (u64). If the guest CPU model supports the TOD clock extension (u8), it
-also allows user space to get/set it. If the guest CPU model does not support
-it, it is stored as 0 and not allowed to be set to a value != 0.
-
-Parameters: address of a buffer in user space to store the data
-            (kvm_s390_vm_tod_clock) to
-Returns:    -EFAULT if the given address is not accessible from kernel space
-	    -EINVAL if setting the TOD clock extension to != 0 is not supported
-
-4. GROUP: KVM_S390_VM_CRYPTO
-Architectures: s390
-
-4.1. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_AES_KW (w/o)
-
-Allows user space to enable aes key wrapping, including generating a new
-wrapping key.
-
-Parameters: none
-Returns:    0
-
-4.2. ATTRIBUTE: KVM_S390_VM_CRYPTO_ENABLE_DEA_KW (w/o)
-
-Allows user space to enable dea key wrapping, including generating a new
-wrapping key.
-
-Parameters: none
-Returns:    0
-
-4.3. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_AES_KW (w/o)
-
-Allows user space to disable aes key wrapping, clearing the wrapping key.
-
-Parameters: none
-Returns:    0
-
-4.4. ATTRIBUTE: KVM_S390_VM_CRYPTO_DISABLE_DEA_KW (w/o)
-
-Allows user space to disable dea key wrapping, clearing the wrapping key.
-
-Parameters: none
-Returns:    0
-
-5. GROUP: KVM_S390_VM_MIGRATION
-Architectures: s390
-
-5.1. ATTRIBUTE: KVM_S390_VM_MIGRATION_STOP (w/o)
-
-Allows userspace to stop migration mode, needed for PGSTE migration.
-Setting this attribute when migration mode is not active will have no
-effects.
-
-Parameters: none
-Returns:    0
-
-5.2. ATTRIBUTE: KVM_S390_VM_MIGRATION_START (w/o)
-
-Allows userspace to start migration mode, needed for PGSTE migration.
-Setting this attribute when migration mode is already active will have
-no effects.
-
-Parameters: none
-Returns:    -ENOMEM if there is not enough free memory to start migration mode
-	    -EINVAL if the state of the VM is invalid (e.g. no memory defined)
-	    0 in case of success.
-
-5.3. ATTRIBUTE: KVM_S390_VM_MIGRATION_STATUS (r/o)
-
-Allows userspace to query the status of migration mode.
-
-Parameters: address of a buffer in user space to store the data (u64) to;
-	    the data itself is either 0 if migration mode is disabled or 1
-	    if it is enabled
-Returns:    -EFAULT if the given address is not accessible from kernel space
-	    0 in case of success.
diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt
deleted file mode 100644
index 42864935ac5d..000000000000
--- a/Documentation/virtual/kvm/devices/xics.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-XICS interrupt controller
-
-Device type supported: KVM_DEV_TYPE_XICS
-
-Groups:
-  KVM_DEV_XICS_SOURCES
-  Attributes: One per interrupt source, indexed by the source number.
-
-This device emulates the XICS (eXternal Interrupt Controller
-Specification) defined in PAPR.  The XICS has a set of interrupt
-sources, each identified by a 20-bit source number, and a set of
-Interrupt Control Presentation (ICP) entities, also called "servers",
-each associated with a virtual CPU.
-
-The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
-capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
-the interrupt server number (i.e. the vcpu number from the XICS's
-point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
-64 bits of state which can be read and written using the
-KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
-state word has the following bitfields, starting at the
-least-significant end of the word:
-
-* Unused, 16 bits
-
-* Pending interrupt priority, 8 bits
-  Zero is the highest priority, 255 means no interrupt is pending.
-
-* Pending IPI (inter-processor interrupt) priority, 8 bits
-  Zero is the highest priority, 255 means no IPI is pending.
-
-* Pending interrupt source number, 24 bits
-  Zero means no interrupt pending, 2 means an IPI is pending
-
-* Current processor priority, 8 bits
-  Zero is the highest priority, meaning no interrupts can be
-  delivered, and 255 is the lowest priority.
-
-Each source has 64 bits of state that can be read and written using
-the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
-KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
-the interrupt source number.  The 64 bit state word has the following
-bitfields, starting from the least-significant end of the word:
-
-* Destination (server number), 32 bits
-  This specifies where the interrupt should be sent, and is the
-  interrupt server number specified for the destination vcpu.
-
-* Priority, 8 bits
-  This is the priority specified for this interrupt source, where 0 is
-  the highest priority and 255 is the lowest.  An interrupt with a
-  priority of 255 will never be delivered.
-
-* Level sensitive flag, 1 bit
-  This bit is 1 for a level-sensitive interrupt source, or 0 for
-  edge-sensitive (or MSI).
-
-* Masked flag, 1 bit
-  This bit is set to 1 if the interrupt is masked (cannot be delivered
-  regardless of its priority), for example by the ibm,int-off RTAS
-  call, or 0 if it is not masked.
-
-* Pending flag, 1 bit
-  This bit is 1 if the source has a pending interrupt, otherwise 0.
-
-Only one XICS instance may be created per VM.
diff --git a/Documentation/virtual/kvm/devices/xive.txt b/Documentation/virtual/kvm/devices/xive.txt
deleted file mode 100644
index 9a24a4525253..000000000000
--- a/Documentation/virtual/kvm/devices/xive.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-POWER9 eXternal Interrupt Virtualization Engine (XIVE Gen1)
-==========================================================
-
-Device types supported:
-  KVM_DEV_TYPE_XIVE     POWER9 XIVE Interrupt Controller generation 1
-
-This device acts as a VM interrupt controller. It provides the KVM
-interface to configure the interrupt sources of a VM in the underlying
-POWER9 XIVE interrupt controller.
-
-Only one XIVE instance may be instantiated. A guest XIVE device
-requires a POWER9 host and the guest OS should have support for the
-XIVE native exploitation interrupt mode. If not, it should run using
-the legacy interrupt mode, referred as XICS (POWER7/8).
-
-* Device Mappings
-
-  The KVM device exposes different MMIO ranges of the XIVE HW which
-  are required for interrupt management. These are exposed to the
-  guest in VMAs populated with a custom VM fault handler.
-
-  1. Thread Interrupt Management Area (TIMA)
-
-  Each thread has an associated Thread Interrupt Management context
-  composed of a set of registers. These registers let the thread
-  handle priority management and interrupt acknowledgment. The most
-  important are :
-
-      - Interrupt Pending Buffer     (IPB)
-      - Current Processor Priority   (CPPR)
-      - Notification Source Register (NSR)
-
-  They are exposed to software in four different pages each proposing
-  a view with a different privilege. The first page is for the
-  physical thread context and the second for the hypervisor. Only the
-  third (operating system) and the fourth (user level) are exposed the
-  guest.
-
-  2. Event State Buffer (ESB)
-
-  Each source is associated with an Event State Buffer (ESB) with
-  either a pair of even/odd pair of pages which provides commands to
-  manage the source: to trigger, to EOI, to turn off the source for
-  instance.
-
-  3. Device pass-through
-
-  When a device is passed-through into the guest, the source
-  interrupts are from a different HW controller (PHB4) and the ESB
-  pages exposed to the guest should accommadate this change.
-
-  The passthru_irq helpers, kvmppc_xive_set_mapped() and
-  kvmppc_xive_clr_mapped() are called when the device HW irqs are
-  mapped into or unmapped from the guest IRQ number space. The KVM
-  device extends these helpers to clear the ESB pages of the guest IRQ
-  number being mapped and then lets the VM fault handler repopulate.
-  The handler will insert the ESB page corresponding to the HW
-  interrupt of the device being passed-through or the initial IPI ESB
-  page if the device has being removed.
-
-  The ESB remapping is fully transparent to the guest and the OS
-  device driver. All handling is done within VFIO and the above
-  helpers in KVM-PPC.
-
-* Groups:
-
-  1. KVM_DEV_XIVE_GRP_CTRL
-  Provides global controls on the device
-  Attributes:
-    1.1 KVM_DEV_XIVE_RESET (write only)
-    Resets the interrupt controller configuration for sources and event
-    queues. To be used by kexec and kdump.
-    Errors: none
-
-    1.2 KVM_DEV_XIVE_EQ_SYNC (write only)
-    Sync all the sources and queues and mark the EQ pages dirty. This
-    to make sure that a consistent memory state is captured when
-    migrating the VM.
-    Errors: none
-
-  2. KVM_DEV_XIVE_GRP_SOURCE (write only)
-  Initializes a new source in the XIVE device and mask it.
-  Attributes:
-    Interrupt source number  (64-bit)
-  The kvm_device_attr.addr points to a __u64 value:
-  bits:     | 63   ....  2 |   1   |   0
-  values:   |    unused    | level | type
-  - type:  0:MSI 1:LSI
-  - level: assertion level in case of an LSI.
-  Errors:
-    -E2BIG:  Interrupt source number is out of range
-    -ENOMEM: Could not create a new source block
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENXIO:  Could not allocate underlying HW interrupt
-
-  3. KVM_DEV_XIVE_GRP_SOURCE_CONFIG (write only)
-  Configures source targeting
-  Attributes:
-    Interrupt source number  (64-bit)
-  The kvm_device_attr.addr points to a __u64 value:
-  bits:     | 63   ....  33 |  32  | 31 .. 3 |  2 .. 0
-  values:   |    eisn       | mask |  server | priority
-  - priority: 0-7 interrupt priority level
-  - server: CPU number chosen to handle the interrupt
-  - mask: mask flag (unused)
-  - eisn: Effective Interrupt Source Number
-  Errors:
-    -ENOENT: Unknown source number
-    -EINVAL: Not initialized source number
-    -EINVAL: Invalid priority
-    -EINVAL: Invalid CPU number.
-    -EFAULT: Invalid user pointer for attr->addr.
-    -ENXIO:  CPU event queues not configured or configuration of the
-             underlying HW interrupt failed
-    -EBUSY:  No CPU available to serve interrupt
-
-  4. KVM_DEV_XIVE_GRP_EQ_CONFIG (read-write)
-  Configures an event queue of a CPU
-  Attributes:
-    EQ descriptor identifier (64-bit)
-  The EQ descriptor identifier is a tuple (server, priority) :
-  bits:     | 63   ....  32 | 31 .. 3 |  2 .. 0
-  values:   |    unused     |  server | priority
-  The kvm_device_attr.addr points to :
-    struct kvm_ppc_xive_eq {
-	__u32 flags;
-	__u32 qshift;
-	__u64 qaddr;
-	__u32 qtoggle;
-	__u32 qindex;
-	__u8  pad[40];
-    };
-  - flags: queue flags
-    KVM_XIVE_EQ_ALWAYS_NOTIFY (required)
-	forces notification without using the coalescing mechanism
-	provided by the XIVE END ESBs.
-  - qshift: queue size (power of 2)
-  - qaddr: real address of queue
-  - qtoggle: current queue toggle bit
-  - qindex: current queue index
-  - pad: reserved for future use
-  Errors:
-    -ENOENT: Invalid CPU number
-    -EINVAL: Invalid priority
-    -EINVAL: Invalid flags
-    -EINVAL: Invalid queue size
-    -EINVAL: Invalid queue address
-    -EFAULT: Invalid user pointer for attr->addr.
-    -EIO:    Configuration of the underlying HW failed
-
-  5. KVM_DEV_XIVE_GRP_SOURCE_SYNC (write only)
-  Synchronize the source to flush event notifications
-  Attributes:
-    Interrupt source number  (64-bit)
-  Errors:
-    -ENOENT: Unknown source number
-    -EINVAL: Not initialized source number
-
-* VCPU state
-
-  The XIVE IC maintains VP interrupt state in an internal structure
-  called the NVT. When a VP is not dispatched on a HW processor
-  thread, this structure can be updated by HW if the VP is the target
-  of an event notification.
-
-  It is important for migration to capture the cached IPB from the NVT
-  as it synthesizes the priorities of the pending interrupts. We
-  capture a bit more to report debug information.
-
-  KVM_REG_PPC_VP_STATE (2 * 64bits)
-  bits:     |  63  ....  32  |  31  ....  0  |
-  values:   |   TIMA word0   |   TIMA word1  |
-  bits:     | 127       ..........       64  |
-  values:   |            unused              |
-
-* Migration:
-
-  Saving the state of a VM using the XIVE native exploitation mode
-  should follow a specific sequence. When the VM is stopped :
-
-  1. Mask all sources (PQ=01) to stop the flow of events.
-
-  2. Sync the XIVE device with the KVM control KVM_DEV_XIVE_EQ_SYNC to
-  flush any in-flight event notification and to stabilize the EQs. At
-  this stage, the EQ pages are marked dirty to make sure they are
-  transferred in the migration sequence.
-
-  3. Capture the state of the source targeting, the EQs configuration
-  and the state of thread interrupt context registers.
-
-  Restore is similar :
-
-  1. Restore the EQ configuration. As targeting depends on it.
-  2. Restore targeting
-  3. Restore the thread interrupt contexts
-  4. Restore the source states
-  5. Let the vCPU run
diff --git a/Documentation/virtual/kvm/halt-polling.txt b/Documentation/virtual/kvm/halt-polling.txt
deleted file mode 100644
index 4f791b128dd2..000000000000
--- a/Documentation/virtual/kvm/halt-polling.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-The KVM halt polling system
-===========================
-
-The KVM halt polling system provides a feature within KVM whereby the latency
-of a guest can, under some circumstances, be reduced by polling in the host
-for some time period after the guest has elected to no longer run by cedeing.
-That is, when a guest vcpu has ceded, or in the case of powerpc when all of the
-vcpus of a single vcore have ceded, the host kernel polls for wakeup conditions
-before giving up the cpu to the scheduler in order to let something else run.
-
-Polling provides a latency advantage in cases where the guest can be run again
-very quickly by at least saving us a trip through the scheduler, normally on
-the order of a few micro-seconds, although performance benefits are workload
-dependant. In the event that no wakeup source arrives during the polling
-interval or some other task on the runqueue is runnable the scheduler is
-invoked. Thus halt polling is especially useful on workloads with very short
-wakeup periods where the time spent halt polling is minimised and the time
-savings of not invoking the scheduler are distinguishable.
-
-The generic halt polling code is implemented in:
-
-	virt/kvm/kvm_main.c: kvm_vcpu_block()
-
-The powerpc kvm-hv specific case is implemented in:
-
-	arch/powerpc/kvm/book3s_hv.c: kvmppc_vcore_blocked()
-
-Halt Polling Interval
-=====================
-
-The maximum time for which to poll before invoking the scheduler, referred to
-as the halt polling interval, is increased and decreased based on the perceived
-effectiveness of the polling in an attempt to limit pointless polling.
-This value is stored in either the vcpu struct:
-
-	kvm_vcpu->halt_poll_ns
-
-or in the case of powerpc kvm-hv, in the vcore struct:
-
-	kvmppc_vcore->halt_poll_ns
-
-Thus this is a per vcpu (or vcore) value.
-
-During polling if a wakeup source is received within the halt polling interval,
-the interval is left unchanged. In the event that a wakeup source isn't
-received during the polling interval (and thus schedule is invoked) there are
-two options, either the polling interval and total block time[0] were less than
-the global max polling interval (see module params below), or the total block
-time was greater than the global max polling interval.
-
-In the event that both the polling interval and total block time were less than
-the global max polling interval then the polling interval can be increased in
-the hope that next time during the longer polling interval the wake up source
-will be received while the host is polling and the latency benefits will be
-received. The polling interval is grown in the function grow_halt_poll_ns() and
-is multiplied by the module parameters halt_poll_ns_grow and
-halt_poll_ns_grow_start.
-
-In the event that the total block time was greater than the global max polling
-interval then the host will never poll for long enough (limited by the global
-max) to wakeup during the polling interval so it may as well be shrunk in order
-to avoid pointless polling. The polling interval is shrunk in the function
-shrink_halt_poll_ns() and is divided by the module parameter
-halt_poll_ns_shrink, or set to 0 iff halt_poll_ns_shrink == 0.
-
-It is worth noting that this adjustment process attempts to hone in on some
-steady state polling interval but will only really do a good job for wakeups
-which come at an approximately constant rate, otherwise there will be constant
-adjustment of the polling interval.
-
-[0] total block time: the time between when the halt polling function is
-		      invoked and a wakeup source received (irrespective of
-		      whether the scheduler is invoked within that function).
-
-Module Parameters
-=================
-
-The kvm module has 3 tuneable module parameters to adjust the global max
-polling interval as well as the rate at which the polling interval is grown and
-shrunk. These variables are defined in include/linux/kvm_host.h and as module
-parameters in virt/kvm/kvm_main.c, or arch/powerpc/kvm/book3s_hv.c in the
-powerpc kvm-hv case.
-
-Module Parameter	|   Description		    |	     Default Value
---------------------------------------------------------------------------------
-halt_poll_ns		| The global max polling    | KVM_HALT_POLL_NS_DEFAULT
-			| interval which defines    |
-			| the ceiling value of the  |
-			| polling interval for      | (per arch value)
-			| each vcpu.		    |
---------------------------------------------------------------------------------
-halt_poll_ns_grow	| The value by which the    | 2
-			| halt polling interval is  |
-			| multiplied in the	    |
-			| grow_halt_poll_ns()	    |
-			| function.		    |
---------------------------------------------------------------------------------
-halt_poll_ns_grow_start | The initial value to grow | 10000
-			| to from zero in the	    |
-			| grow_halt_poll_ns()	    |
-			| function.		    |
---------------------------------------------------------------------------------
-halt_poll_ns_shrink	| The value by which the    | 0
-			| halt polling interval is  |
-			| divided in the	    |
-			| shrink_halt_poll_ns()	    |
-			| function.		    |
---------------------------------------------------------------------------------
-
-These module parameters can be set from the debugfs files in:
-
-	/sys/module/kvm/parameters/
-
-Note: that these module parameters are system wide values and are not able to
-      be tuned on a per vm basis.
-
-Further Notes
-=============
-
-- Care should be taken when setting the halt_poll_ns module parameter as a
-large value has the potential to drive the cpu usage to 100% on a machine which
-would be almost entirely idle otherwise. This is because even if a guest has
-wakeups during which very little work is done and which are quite far apart, if
-the period is shorter than the global max polling interval (halt_poll_ns) then
-the host will always poll for the entire block time and thus cpu utilisation
-will go to 100%.
-
-- Halt polling essentially presents a trade off between power usage and latency
-and the module parameters should be used to tune the affinity for this. Idle
-cpu time is essentially converted to host kernel time with the aim of decreasing
-latency when entering the guest.
-
-- Halt polling will only be conducted by the host when no other tasks are
-runnable on that cpu, otherwise the polling will cease immediately and
-schedule will be invoked to allow that other task to run. Thus this doesn't
-allow a guest to denial of service the cpu.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
deleted file mode 100644
index da210651f714..000000000000
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-Linux KVM Hypercall:
-===================
-X86:
- KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
- instruction. The hypervisor can replace it with instructions that are
- guaranteed to be supported.
-
- Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
- The hypercall number should be placed in rax and the return value will be
- placed in rax.  No other registers will be clobbered unless explicitly stated
- by the particular hypercall.
-
-S390:
-  R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
-  number. The return value is written to R2.
-
-  S390 uses diagnose instruction as hypercall (0x500) along with hypercall
-  number in R1.
-
-  For further information on the S390 diagnose call as supported by KVM,
-  refer to Documentation/virtual/kvm/s390-diag.txt.
-
- PowerPC:
-  It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
-  Return value is placed in R3.
-
-  KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
-  property inside the device tree's /hypervisor node.
-  For more information refer to Documentation/virtual/kvm/ppc-pv.txt
-
-MIPS:
-  KVM hypercalls use the HYPCALL instruction with code 0 and the hypercall
-  number in $2 (v0). Up to four arguments may be placed in $4-$7 (a0-a3) and
-  the return value is placed in $2 (v0).
-
-KVM Hypercalls Documentation
-===========================
-The template for each hypercall is:
-1. Hypercall name.
-2. Architecture(s)
-3. Status (deprecated, obsolete, active)
-4. Purpose
-
-1. KVM_HC_VAPIC_POLL_IRQ
-------------------------
-Architecture: x86
-Status: active
-Purpose: Trigger guest exit so that the host can check for pending
-interrupts on reentry.
-
-2. KVM_HC_MMU_OP
-------------------------
-Architecture: x86
-Status: deprecated.
-Purpose: Support MMU operations such as writing to PTE,
-flushing TLB, release PT.
-
-3. KVM_HC_FEATURES
-------------------------
-Architecture: PPC
-Status: active
-Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
-used to enumerate which hypercalls are available. On PPC, either device tree
-based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
-mechanism (which is this hypercall) can be used.
-
-4. KVM_HC_PPC_MAP_MAGIC_PAGE
-------------------------
-Architecture: PPC
-Status: active
-Purpose: To enable communication between the hypervisor and guest there is a
-shared page that contains parts of supervisor visible register state.
-The guest can map this shared page to access its supervisor register through
-memory using this hypercall.
-
-5. KVM_HC_KICK_CPU
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to wakeup a vcpu from HLT state
-Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
-kernel mode for an event to occur (ex: a spinlock to become available) can
-execute HLT instruction once it has busy-waited for more than a threshold
-time-interval. Execution of HLT instruction would cause the hypervisor to put
-the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
-same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
-specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
-is used in the hypercall for future use.
-
-
-6. KVM_HC_CLOCK_PAIRING
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to synchronize host and guest clocks.
-Usage:
-
-a0: guest physical address where host copies
-"struct kvm_clock_offset" structure.
-
-a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0)
-is supported (corresponding to the host's CLOCK_REALTIME clock).
-
-		struct kvm_clock_pairing {
-			__s64 sec;
-			__s64 nsec;
-			__u64 tsc;
-			__u32 flags;
-			__u32 pad[9];
-		};
-
-       Where:
-               * sec: seconds from clock_type clock.
-               * nsec: nanoseconds from clock_type clock.
-               * tsc: guest TSC value used to calculate sec/nsec pair
-               * flags: flags, unused (0) at the moment.
-
-The hypercall lets a guest compute a precise timestamp across
-host and guest.  The guest can use the returned TSC value to
-compute the CLOCK_REALTIME for its clock, at the same instant.
-
-Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
-or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
-
-6. KVM_HC_SEND_IPI
-------------------------
-Architecture: x86
-Status: active
-Purpose: Send IPIs to multiple vCPUs.
-
-a0: lower part of the bitmap of destination APIC IDs
-a1: higher part of the bitmap of destination APIC IDs
-a2: the lowest APIC ID in bitmap
-a3: APIC ICR
-
-The hypercall lets a guest send multicast IPIs, with at most 128
-128 destinations per hypercall in 64-bit mode and 64 vCPUs per
-hypercall in 32-bit mode.  The destinations are represented by a
-bitmap contained in the first two arguments (a0 and a1). Bit 0 of
-a0 corresponds to the APIC ID in the third argument (a2), bit 1
-corresponds to the APIC ID a2+1, and so on.
-
-Returns the number of CPUs to which the IPIs were delivered successfully.
-
-7. KVM_HC_SCHED_YIELD
-------------------------
-Architecture: x86
-Status: active
-Purpose: Hypercall used to yield if the IPI target vCPU is preempted
-
-a0: destination APIC ID
-
-Usage example: When sending a call-function IPI-many to vCPUs, yield if
-any of the IPI target vCPUs was preempted.
diff --git a/Documentation/virtual/kvm/index.rst b/Documentation/virtual/kvm/index.rst
deleted file mode 100644
index 0b206a06f5be..000000000000
--- a/Documentation/virtual/kvm/index.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-===
-KVM
-===
-
-.. toctree::
-   :maxdepth: 2
-
-   amd-memory-encryption
-   cpuid
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
deleted file mode 100644
index 635cd6eaf714..000000000000
--- a/Documentation/virtual/kvm/locking.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-KVM Lock Overview
-=================
-
-1. Acquisition Orders
----------------------
-
-The acquisition orders for mutexes are as follows:
-
-- kvm->lock is taken outside vcpu->mutex
-
-- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
-
-- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring
-  them together is quite rare.
-
-On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
-
-Everything else is a leaf: no other lock is taken inside the critical
-sections.
-
-2: Exception
-------------
-
-Fast page fault:
-
-Fast page fault is the fast path which fixes the guest page fault out of
-the mmu-lock on x86. Currently, the page fault can be fast in one of the
-following two cases:
-
-1. Access Tracking: The SPTE is not present, but it is marked for access
-tracking i.e. the SPTE_SPECIAL_MASK is set. That means we need to
-restore the saved R/X bits. This is described in more detail later below.
-
-2. Write-Protection: The SPTE is present and the fault is
-caused by write-protect. That means we just need to change the W bit of the 
-spte.
-
-What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
-SPTE_MMU_WRITEABLE bit on the spte:
-- SPTE_HOST_WRITEABLE means the gfn is writable on host.
-- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
-  the gfn is writable on guest mmu and it is not write-protected by shadow
-  page write-protection.
-
-On fast page fault path, we will use cmpxchg to atomically set the spte W
-bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, or 
-restore the saved R/X bits if VMX_EPT_TRACK_ACCESS mask is set, or both. This
-is safe because whenever changing these bits can be detected by cmpxchg.
-
-But we need carefully check these cases:
-1): The mapping from gfn to pfn
-The mapping from gfn to pfn may be changed since we can only ensure the pfn
-is not changed during cmpxchg. This is a ABA problem, for example, below case
-will happen:
-
-At the beginning:
-gpte = gfn1
-gfn1 is mapped to pfn1 on host
-spte is the shadow page table entry corresponding with gpte and
-spte = pfn1
-
-   VCPU 0                           VCPU0
-on fast page fault path:
-
-   old_spte = *spte;
-                                 pfn1 is swapped out:
-                                    spte = 0;
-
-                                 pfn1 is re-alloced for gfn2.
-
-                                 gpte is changed to point to
-                                 gfn2 by the guest:
-                                    spte = pfn1;
-
-   if (cmpxchg(spte, old_spte, old_spte+W)
-	mark_page_dirty(vcpu->kvm, gfn1)
-             OOPS!!!
-
-We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
-
-For direct sp, we can easily avoid it since the spte of direct sp is fixed
-to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
-to pin gfn to pfn, because after gfn_to_pfn_atomic():
-- We have held the refcount of pfn that means the pfn can not be freed and
-  be reused for another gfn.
-- The pfn is writable that means it can not be shared between different gfns
-  by KSM.
-
-Then, we can ensure the dirty bitmaps is correctly set for a gfn.
-
-Currently, to simplify the whole things, we disable fast page fault for
-indirect shadow page.
-
-2): Dirty bit tracking
-In the origin code, the spte can be fast updated (non-atomically) if the
-spte is read-only and the Accessed bit has already been set since the
-Accessed bit and Dirty bit can not be lost.
-
-But it is not true after fast page fault since the spte can be marked
-writable between reading spte and updating spte. Like below case:
-
-At the beginning:
-spte.W = 0
-spte.Accessed = 1
-
-   VCPU 0                                       VCPU0
-In mmu_spte_clear_track_bits():
-
-   old_spte = *spte;
-
-   /* 'if' condition is satisfied. */
-   if (old_spte.Accessed == 1 &&
-        old_spte.W == 0)
-      spte = 0ull;
-                                         on fast page fault path:
-                                             spte.W = 1
-                                         memory write on the spte:
-                                             spte.Dirty = 1
-
-
-   else
-      old_spte = xchg(spte, 0ull)
-
-
-   if (old_spte.Accessed == 1)
-      kvm_set_pfn_accessed(spte.pfn);
-   if (old_spte.Dirty == 1)
-      kvm_set_pfn_dirty(spte.pfn);
-      OOPS!!!
-
-The Dirty bit is lost in this case.
-
-In order to avoid this kind of issue, we always treat the spte as "volatile"
-if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
-the spte is always atomically updated in this case.
-
-3): flush tlbs due to spte updated
-If the spte is updated from writable to readonly, we should flush all TLBs,
-otherwise rmap_write_protect will find a read-only spte, even though the
-writable spte might be cached on a CPU's TLB.
-
-As mentioned before, the spte can be updated to writable out of mmu-lock on
-fast page fault path, in order to easily audit the path, we see if TLBs need
-be flushed caused by this reason in mmu_spte_update() since this is a common
-function to update spte (present -> present).
-
-Since the spte is "volatile" if it can be updated out of mmu-lock, we always
-atomically update the spte, the race caused by fast page fault can be avoided,
-See the comments in spte_has_volatile_bits() and mmu_spte_update().
-
-Lockless Access Tracking:
-
-This is used for Intel CPUs that are using EPT but do not support the EPT A/D
-bits. In this case, when the KVM MMU notifier is called to track accesses to a
-page (via kvm_mmu_notifier_clear_flush_young), it marks the PTE as not-present
-by clearing the RWX bits in the PTE and storing the original R & X bits in
-some unused/ignored bits. In addition, the SPTE_SPECIAL_MASK is also set on the
-PTE (using the ignored bit 62). When the VM tries to access the page later on,
-a fault is generated and the fast page fault mechanism described above is used
-to atomically restore the PTE to a Present state. The W bit is not saved when
-the PTE is marked for access tracking and during restoration to the Present
-state, the W bit is set depending on whether or not it was a write access. If
-it wasn't, then the W bit will remain clear until a write access happens, at 
-which time it will be set using the Dirty tracking mechanism described above.
-
-3. Reference
-------------
-
-Name:		kvm_lock
-Type:		mutex
-Arch:		any
-Protects:	- vm_list
-
-Name:		kvm_count_lock
-Type:		raw_spinlock_t
-Arch:		any
-Protects:	- hardware virtualization enable/disable
-Comment:	'raw' because hardware enabling/disabling must be atomic /wrt
-		migration.
-
-Name:		kvm_arch::tsc_write_lock
-Type:		raw_spinlock
-Arch:		x86
-Protects:	- kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
-		- tsc offset in vmcb
-Comment:	'raw' because updating the tsc offsets must not be preempted.
-
-Name:		kvm->mmu_lock
-Type:		spinlock_t
-Arch:		any
-Protects:	-shadow page/shadow tlb entry
-Comment:	it is a spinlock since it is used in mmu notifier.
-
-Name:		kvm->srcu
-Type:		srcu lock
-Arch:		any
-Protects:	- kvm->memslots
-		- kvm->buses
-Comment:	The srcu read lock must be held while accessing memslots (e.g.
-		when using gfn_to_* functions) and while accessing in-kernel
-		MMIO/PIO address->device structure mapping (kvm->buses).
-		The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
-		if it is needed by multiple functions.
-
-Name:		blocked_vcpu_on_cpu_lock
-Type:		spinlock_t
-Arch:		x86
-Protects:	blocked_vcpu_on_cpu
-Comment:	This is a per-CPU lock and it is used for VT-d posted-interrupts.
-		When VT-d posted-interrupts is supported and the VM has assigned
-		devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu
-		protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues
-		wakeup notification event since external interrupts from the
-		assigned devices happens, we will find the vCPU on the list to
-		wakeup.
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
deleted file mode 100644
index 2efe0efc516e..000000000000
--- a/Documentation/virtual/kvm/mmu.txt
+++ /dev/null
@@ -1,449 +0,0 @@
-The x86 kvm shadow mmu
-======================
-
-The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible
-for presenting a standard x86 mmu to the guest, while translating guest
-physical addresses to host physical addresses.
-
-The mmu code attempts to satisfy the following requirements:
-
-- correctness: the guest should not be able to determine that it is running
-               on an emulated mmu except for timing (we attempt to comply
-               with the specification, not emulate the characteristics of
-               a particular implementation such as tlb size)
-- security:    the guest must not be able to touch host memory not assigned
-               to it
-- performance: minimize the performance penalty imposed by the mmu
-- scaling:     need to scale to large memory and large vcpu guests
-- hardware:    support the full range of x86 virtualization hardware
-- integration: Linux memory management code must be in control of guest memory
-               so that swapping, page migration, page merging, transparent
-               hugepages, and similar features work without change
-- dirty tracking: report writes to guest memory to enable live migration
-               and framebuffer-based displays
-- footprint:   keep the amount of pinned kernel memory low (most memory
-               should be shrinkable)
-- reliability:  avoid multipage or GFP_ATOMIC allocations
-
-Acronyms
-========
-
-pfn   host page frame number
-hpa   host physical address
-hva   host virtual address
-gfn   guest frame number
-gpa   guest physical address
-gva   guest virtual address
-ngpa  nested guest physical address
-ngva  nested guest virtual address
-pte   page table entry (used also to refer generically to paging structure
-      entries)
-gpte  guest pte (referring to gfns)
-spte  shadow pte (referring to pfns)
-tdp   two dimensional paging (vendor neutral term for NPT and EPT)
-
-Virtual and real hardware supported
-===================================
-
-The mmu supports first-generation mmu hardware, which allows an atomic switch
-of the current paging mode and cr3 during guest entry, as well as
-two-dimensional paging (AMD's NPT and Intel's EPT).  The emulated hardware
-it exposes is the traditional 2/3/4 level x86 mmu, with support for global
-pages, pae, pse, pse36, cr0.wp, and 1GB pages. Emulated hardware also
-able to expose NPT capable hardware on NPT capable hosts.
-
-Translation
-===========
-
-The primary job of the mmu is to program the processor's mmu to translate
-addresses for the guest.  Different translations are required at different
-times:
-
-- when guest paging is disabled, we translate guest physical addresses to
-  host physical addresses (gpa->hpa)
-- when guest paging is enabled, we translate guest virtual addresses, to
-  guest physical addresses, to host physical addresses (gva->gpa->hpa)
-- when the guest launches a guest of its own, we translate nested guest
-  virtual addresses, to nested guest physical addresses, to guest physical
-  addresses, to host physical addresses (ngva->ngpa->gpa->hpa)
-
-The primary challenge is to encode between 1 and 3 translations into hardware
-that support only 1 (traditional) and 2 (tdp) translations.  When the
-number of required translations matches the hardware, the mmu operates in
-direct mode; otherwise it operates in shadow mode (see below).
-
-Memory
-======
-
-Guest memory (gpa) is part of the user address space of the process that is
-using kvm.  Userspace defines the translation between guest addresses and user
-addresses (gpa->hva); note that two gpas may alias to the same hva, but not
-vice versa.
-
-These hvas may be backed using any method available to the host: anonymous
-memory, file backed memory, and device memory.  Memory might be paged by the
-host at any time.
-
-Events
-======
-
-The mmu is driven by events, some from the guest, some from the host.
-
-Guest generated events:
-- writes to control registers (especially cr3)
-- invlpg/invlpga instruction execution
-- access to missing or protected translations
-
-Host generated events:
-- changes in the gpa->hpa translation (either through gpa->hva changes or
-  through hva->hpa changes)
-- memory pressure (the shrinker)
-
-Shadow pages
-============
-
-The principal data structure is the shadow page, 'struct kvm_mmu_page'.  A
-shadow page contains 512 sptes, which can be either leaf or nonleaf sptes.  A
-shadow page may contain a mix of leaf and nonleaf sptes.
-
-A nonleaf spte allows the hardware mmu to reach the leaf pages and
-is not related to a translation directly.  It points to other shadow pages.
-
-A leaf spte corresponds to either one or two translations encoded into
-one paging structure entry.  These are always the lowest level of the
-translation stack, with optional higher level translations left to NPT/EPT.
-Leaf ptes point at guest pages.
-
-The following table shows translations encoded by leaf ptes, with higher-level
-translations in parentheses:
-
- Non-nested guests:
-  nonpaging:     gpa->hpa
-  paging:        gva->gpa->hpa
-  paging, tdp:   (gva->)gpa->hpa
- Nested guests:
-  non-tdp:       ngva->gpa->hpa  (*)
-  tdp:           (ngva->)ngpa->gpa->hpa
-
-(*) the guest hypervisor will encode the ngva->gpa translation into its page
-    tables if npt is not present
-
-Shadow pages contain the following information:
-  role.level:
-    The level in the shadow paging hierarchy that this shadow page belongs to.
-    1=4k sptes, 2=2M sptes, 3=1G sptes, etc.
-  role.direct:
-    If set, leaf sptes reachable from this page are for a linear range.
-    Examples include real mode translation, large guest pages backed by small
-    host pages, and gpa->hpa translations when NPT or EPT is active.
-    The linear range starts at (gfn << PAGE_SHIFT) and its size is determined
-    by role.level (2MB for first level, 1GB for second level, 0.5TB for third
-    level, 256TB for fourth level)
-    If clear, this page corresponds to a guest page table denoted by the gfn
-    field.
-  role.quadrant:
-    When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit
-    sptes.  That means a guest page table contains more ptes than the host,
-    so multiple shadow pages are needed to shadow one guest page.
-    For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
-    first or second 512-gpte block in the guest page table.  For second-level
-    page tables, each 32-bit gpte is converted to two 64-bit sptes
-    (since each first-level guest page is shadowed by two first-level
-    shadow pages) so role.quadrant takes values in the range 0..3.  Each
-    quadrant maps 1GB virtual address space.
-  role.access:
-    Inherited guest access permissions in the form uwx.  Note execute
-    permission is positive, not negative.
-  role.invalid:
-    The page is invalid and should not be used.  It is a root page that is
-    currently pinned (by a cpu hardware register pointing to it); once it is
-    unpinned it will be destroyed.
-  role.gpte_is_8_bytes:
-    Reflects the size of the guest PTE for which the page is valid, i.e. '1'
-    if 64-bit gptes are in use, '0' if 32-bit gptes are in use.
-  role.nxe:
-    Contains the value of efer.nxe for which the page is valid.
-  role.cr0_wp:
-    Contains the value of cr0.wp for which the page is valid.
-  role.smep_andnot_wp:
-    Contains the value of cr4.smep && !cr0.wp for which the page is valid
-    (pages for which this is true are different from other pages; see the
-    treatment of cr0.wp=0 below).
-  role.smap_andnot_wp:
-    Contains the value of cr4.smap && !cr0.wp for which the page is valid
-    (pages for which this is true are different from other pages; see the
-    treatment of cr0.wp=0 below).
-  role.ept_sp:
-    This is a virtual flag to denote a shadowed nested EPT page.  ept_sp
-    is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination.
-  role.smm:
-    Is 1 if the page is valid in system management mode.  This field
-    determines which of the kvm_memslots array was used to build this
-    shadow page; it is also used to go back from a struct kvm_mmu_page
-    to a memslot, through the kvm_memslots_for_spte_role macro and
-    __gfn_to_memslot.
-  role.ad_disabled:
-    Is 1 if the MMU instance cannot use A/D bits.  EPT did not have A/D
-    bits before Haswell; shadow EPT page tables also cannot use A/D bits
-    if the L1 hypervisor does not enable them.
-  gfn:
-    Either the guest page table containing the translations shadowed by this
-    page, or the base page frame for linear translations.  See role.direct.
-  spt:
-    A pageful of 64-bit sptes containing the translations for this page.
-    Accessed by both kvm and hardware.
-    The page pointed to by spt will have its page->private pointing back
-    at the shadow page structure.
-    sptes in spt point either at guest pages, or at lower-level shadow pages.
-    Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point
-    at __pa(sp2->spt).  sp2 will point back at sp1 through parent_pte.
-    The spt array forms a DAG structure with the shadow page as a node, and
-    guest pages as leaves.
-  gfns:
-    An array of 512 guest frame numbers, one for each present pte.  Used to
-    perform a reverse map from a pte to a gfn. When role.direct is set, any
-    element of this array can be calculated from the gfn field when used, in
-    this case, the array of gfns is not allocated. See role.direct and gfn.
-  root_count:
-    A counter keeping track of how many hardware registers (guest cr3 or
-    pdptrs) are now pointing at the page.  While this counter is nonzero, the
-    page cannot be destroyed.  See role.invalid.
-  parent_ptes:
-    The reverse mapping for the pte/ptes pointing at this page's spt. If
-    parent_ptes bit 0 is zero, only one spte points at this page and
-    parent_ptes points at this single spte, otherwise, there exists multiple
-    sptes pointing at this page and (parent_ptes & ~0x1) points at a data
-    structure with a list of parent sptes.
-  unsync:
-    If true, then the translations in this page may not match the guest's
-    translation.  This is equivalent to the state of the tlb when a pte is
-    changed but before the tlb entry is flushed.  Accordingly, unsync ptes
-    are synchronized when the guest executes invlpg or flushes its tlb by
-    other means.  Valid for leaf pages.
-  unsync_children:
-    How many sptes in the page point at pages that are unsync (or have
-    unsynchronized children).
-  unsync_child_bitmap:
-    A bitmap indicating which sptes in spt point (directly or indirectly) at
-    pages that may be unsynchronized.  Used to quickly locate all unsychronized
-    pages reachable from a given page.
-  clear_spte_count:
-    Only present on 32-bit hosts, where a 64-bit spte cannot be written
-    atomically.  The reader uses this while running out of the MMU lock
-    to detect in-progress updates and retry them until the writer has
-    finished the write.
-  write_flooding_count:
-    A guest may write to a page table many times, causing a lot of
-    emulations if the page needs to be write-protected (see "Synchronized
-    and unsynchronized pages" below).  Leaf pages can be unsynchronized
-    so that they do not trigger frequent emulation, but this is not
-    possible for non-leafs.  This field counts the number of emulations
-    since the last time the page table was actually used; if emulation
-    is triggered too frequently on this page, KVM will unmap the page
-    to avoid emulation in the future.
-
-Reverse map
-===========
-
-The mmu maintains a reverse mapping whereby all ptes mapping a page can be
-reached given its gfn.  This is used, for example, when swapping out a page.
-
-Synchronized and unsynchronized pages
-=====================================
-
-The guest uses two events to synchronize its tlb and page tables: tlb flushes
-and page invalidations (invlpg).
-
-A tlb flush means that we need to synchronize all sptes reachable from the
-guest's cr3.  This is expensive, so we keep all guest page tables write
-protected, and synchronize sptes to gptes when a gpte is written.
-
-A special case is when a guest page table is reachable from the current
-guest cr3.  In this case, the guest is obliged to issue an invlpg instruction
-before using the translation.  We take advantage of that by removing write
-protection from the guest page, and allowing the guest to modify it freely.
-We synchronize modified gptes when the guest invokes invlpg.  This reduces
-the amount of emulation we have to do when the guest modifies multiple gptes,
-or when the a guest page is no longer used as a page table and is used for
-random guest data.
-
-As a side effect we have to resynchronize all reachable unsynchronized shadow
-pages on a tlb flush.
-
-
-Reaction to events
-==================
-
-- guest page fault (or npt page fault, or ept violation)
-
-This is the most complicated event.  The cause of a page fault can be:
-
-  - a true guest fault (the guest translation won't allow the access) (*)
-  - access to a missing translation
-  - access to a protected translation
-    - when logging dirty pages, memory is write protected
-    - synchronized shadow pages are write protected (*)
-  - access to untranslatable memory (mmio)
-
-  (*) not applicable in direct mode
-
-Handling a page fault is performed as follows:
-
- - if the RSV bit of the error code is set, the page fault is caused by guest
-   accessing MMIO and cached MMIO information is available.
-   - walk shadow page table
-   - check for valid generation number in the spte (see "Fast invalidation of
-     MMIO sptes" below)
-   - cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and
-     vcpu->arch.mmio_gfn, and call the emulator
- - If both P bit and R/W bit of error code are set, this could possibly
-   be handled as a "fast page fault" (fixed without taking the MMU lock).  See
-   the description in Documentation/virtual/kvm/locking.txt.
- - if needed, walk the guest page tables to determine the guest translation
-   (gva->gpa or ngpa->gpa)
-   - if permissions are insufficient, reflect the fault back to the guest
- - determine the host page
-   - if this is an mmio request, there is no host page; cache the info to
-     vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn
- - walk the shadow page table to find the spte for the translation,
-   instantiating missing intermediate page tables as necessary
-   - If this is an mmio request, cache the mmio info to the spte and set some
-     reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
- - try to unsynchronize the page
-   - if successful, we can let the guest continue and modify the gpte
- - emulate the instruction
-   - if failed, unshadow the page and let the guest continue
- - update any translations that were modified by the instruction
-
-invlpg handling:
-
-  - walk the shadow page hierarchy and drop affected translations
-  - try to reinstantiate the indicated translation in the hope that the
-    guest will use it in the near future
-
-Guest control register updates:
-
-- mov to cr3
-  - look up new shadow roots
-  - synchronize newly reachable shadow pages
-
-- mov to cr0/cr4/efer
-  - set up mmu context for new paging mode
-  - look up new shadow roots
-  - synchronize newly reachable shadow pages
-
-Host translation updates:
-
-  - mmu notifier called with updated hva
-  - look up affected sptes through reverse map
-  - drop (or update) translations
-
-Emulating cr0.wp
-================
-
-If tdp is not enabled, the host must keep cr0.wp=1 so page write protection
-works for the guest kernel, not guest guest userspace.  When the guest
-cr0.wp=1, this does not present a problem.  However when the guest cr0.wp=0,
-we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the
-semantics require allowing any guest kernel access plus user read access).
-
-We handle this by mapping the permissions to two possible sptes, depending
-on fault type:
-
-- kernel write fault: spte.u=0, spte.w=1 (allows full kernel access,
-  disallows user access)
-- read fault: spte.u=1, spte.w=0 (allows full read access, disallows kernel
-  write access)
-
-(user write faults generate a #PF)
-
-In the first case there are two additional complications:
-- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
-  the kernel may now execute it.  We handle this by also setting spte.nx.
-  If we get a user fetch or read fault, we'll change spte.u=1 and
-  spte.nx=gpte.nx back.  For this to work, KVM forces EFER.NX to 1 when
-  shadow paging is in use.
-- if CR4.SMAP is disabled: since the page has been changed to a kernel
-  page, it can not be reused when CR4.SMAP is enabled. We set
-  CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
-  here we do not care the case that CR4.SMAP is enabled since KVM will
-  directly inject #PF to guest due to failed permission check.
-
-To prevent an spte that was converted into a kernel page with cr0.wp=0
-from being written by the kernel after cr0.wp has changed to 1, we make
-the value of cr0.wp part of the page role.  This means that an spte created
-with one value of cr0.wp cannot be used when cr0.wp has a different value -
-it will simply be missed by the shadow page lookup code.  A similar issue
-exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
-changing cr4.smep to 1.  To avoid this, the value of !cr0.wp && cr4.smep
-is also made a part of the page role.
-
-Large pages
-===========
-
-The mmu supports all combinations of large and small guest and host pages.
-Supported page sizes include 4k, 2M, 4M, and 1G.  4M pages are treated as
-two separate 2M pages, on both guest and host, since the mmu always uses PAE
-paging.
-
-To instantiate a large spte, four constraints must be satisfied:
-
-- the spte must point to a large host page
-- the guest pte must be a large pte of at least equivalent size (if tdp is
-  enabled, there is no guest pte and this condition is satisfied)
-- if the spte will be writeable, the large page frame may not overlap any
-  write-protected pages
-- the guest page must be wholly contained by a single memory slot
-
-To check the last two conditions, the mmu maintains a ->disallow_lpage set of
-arrays for each memory slot and large page size.  Every write protected page
-causes its disallow_lpage to be incremented, thus preventing instantiation of
-a large spte.  The frames at the end of an unaligned memory slot have
-artificially inflated ->disallow_lpages so they can never be instantiated.
-
-Fast invalidation of MMIO sptes
-===============================
-
-As mentioned in "Reaction to events" above, kvm will cache MMIO
-information in leaf sptes.  When a new memslot is added or an existing
-memslot is changed, this information may become stale and needs to be
-invalidated.  This also needs to hold the MMU lock while walking all
-shadow pages, and is made more scalable with a similar technique.
-
-MMIO sptes have a few spare bits, which are used to store a
-generation number.  The global generation number is stored in
-kvm_memslots(kvm)->generation, and increased whenever guest memory info
-changes.
-
-When KVM finds an MMIO spte, it checks the generation number of the spte.
-If the generation number of the spte does not equal the global generation
-number, it will ignore the cached MMIO information and handle the page
-fault through the slow path.
-
-Since only 19 bits are used to store generation-number on mmio spte, all
-pages are zapped when there is an overflow.
-
-Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
-times, the last one happening when the generation number is retrieved and
-stored into the MMIO spte.  Thus, the MMIO spte might be created based on
-out-of-date information, but with an up-to-date generation number.
-
-To avoid this, the generation number is incremented again after synchronize_srcu
-returns; thus, bit 63 of kvm_memslots(kvm)->generation set to 1 only during a
-memslot update, while some SRCU readers might be using the old copy.  We do not
-want to use an MMIO sptes created with an odd generation number, and we can do
-this without losing a bit in the MMIO spte.  The "update in-progress" bit of the
-generation is not stored in MMIO spte, and is so is implicitly zero when the
-generation is extracted out of the spte.  If KVM is unlucky and creates an MMIO
-spte while an update is in-progress, the next access to the spte will always be
-a cache miss.  For example, a subsequent access during the update window will
-miss due to the in-progress flag diverging, while an access after the update
-window closes will have a higher generation number (as compared to the spte).
-
-
-Further reading
-===============
-
-- NPT presentation from KVM Forum 2008
-  http://www.linux-kvm.org/images/c/c8/KvmForum2008%24kdf2008_21.pdf
-
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
deleted file mode 100644
index df1f4338b3ca..000000000000
--- a/Documentation/virtual/kvm/msr.txt
+++ /dev/null
@@ -1,284 +0,0 @@
-KVM-specific MSRs.
-Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
-=====================================================
-
-KVM makes use of some custom MSRs to service some requests.
-
-Custom MSRs have a range reserved for them, that goes from
-0x4b564d00 to 0x4b564dff. There are MSRs outside this area,
-but they are deprecated and their use is discouraged.
-
-Custom MSR list
---------
-
-The current supported Custom MSR list is:
-
-MSR_KVM_WALL_CLOCK_NEW:   0x4b564d00
-
-	data: 4-byte alignment physical address of a memory area which must be
-	in guest RAM. This memory is expected to hold a copy of the following
-	structure:
-
-	struct pvclock_wall_clock {
-		u32   version;
-		u32   sec;
-		u32   nsec;
-	} __attribute__((__packed__));
-
-	whose data will be filled in by the hypervisor. The hypervisor is only
-	guaranteed to update this data at the moment of MSR write.
-	Users that want to reliably query this information more than once have
-	to write more than once to this MSR. Fields have the following meanings:
-
-		version: guest has to check version before and after grabbing
-		time information and check that they are both equal and even.
-		An odd version indicates an in-progress update.
-
-		sec: number of seconds for wallclock at time of boot.
-
-		nsec: number of nanoseconds for wallclock at time of boot.
-
-	In order to get the current wallclock time, the system_time from
-	MSR_KVM_SYSTEM_TIME_NEW needs to be added.
-
-	Note that although MSRs are per-CPU entities, the effect of this
-	particular MSR is global.
-
-	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
-	leaf prior to usage.
-
-MSR_KVM_SYSTEM_TIME_NEW:  0x4b564d01
-
-	data: 4-byte aligned physical address of a memory area which must be in
-	guest RAM, plus an enable bit in bit 0. This memory is expected to hold
-	a copy of the following structure:
-
-	struct pvclock_vcpu_time_info {
-		u32   version;
-		u32   pad0;
-		u64   tsc_timestamp;
-		u64   system_time;
-		u32   tsc_to_system_mul;
-		s8    tsc_shift;
-		u8    flags;
-		u8    pad[2];
-	} __attribute__((__packed__)); /* 32 bytes */
-
-	whose data will be filled in by the hypervisor periodically. Only one
-	write, or registration, is needed for each VCPU. The interval between
-	updates of this structure is arbitrary and implementation-dependent.
-	The hypervisor may update this structure at any time it sees fit until
-	anything with bit0 == 0 is written to it.
-
-	Fields have the following meanings:
-
-		version: guest has to check version before and after grabbing
-		time information and check that they are both equal and even.
-		An odd version indicates an in-progress update.
-
-		tsc_timestamp: the tsc value at the current VCPU at the time
-		of the update of this structure. Guests can subtract this value
-		from current tsc to derive a notion of elapsed time since the
-		structure update.
-
-		system_time: a host notion of monotonic time, including sleep
-		time at the time this structure was last updated. Unit is
-		nanoseconds.
-
-		tsc_to_system_mul: multiplier to be used when converting
-		tsc-related quantity to nanoseconds
-
-		tsc_shift: shift to be used when converting tsc-related
-		quantity to nanoseconds. This shift will ensure that
-		multiplication with tsc_to_system_mul does not overflow.
-		A positive value denotes a left shift, a negative value
-		a right shift.
-
-		The conversion from tsc to nanoseconds involves an additional
-		right shift by 32 bits. With this information, guests can
-		derive per-CPU time by doing:
-
-			time = (current_tsc - tsc_timestamp)
-			if (tsc_shift >= 0)
-				time <<= tsc_shift;
-			else
-				time >>= -tsc_shift;
-			time = (time * tsc_to_system_mul) >> 32
-			time = time + system_time
-
-		flags: bits in this field indicate extended capabilities
-		coordinated between the guest and the hypervisor. Availability
-		of specific flags has to be checked in 0x40000001 cpuid leaf.
-		Current flags are:
-
-		 flag bit   | cpuid bit    | meaning
-		-------------------------------------------------------------
-			    |	           | time measures taken across
-		     0      |	   24      | multiple cpus are guaranteed to
-			    |		   | be monotonic
-		-------------------------------------------------------------
-			    |		   | guest vcpu has been paused by
-		     1	    |	  N/A	   | the host
-			    |		   | See 4.70 in api.txt
-		-------------------------------------------------------------
-
-	Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
-	leaf prior to usage.
-
-
-MSR_KVM_WALL_CLOCK:  0x11
-
-	data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
-
-	This MSR falls outside the reserved KVM range and may be removed in the
-	future. Its usage is deprecated.
-
-	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
-	leaf prior to usage.
-
-MSR_KVM_SYSTEM_TIME: 0x12
-
-	data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
-
-	This MSR falls outside the reserved KVM range and may be removed in the
-	future. Its usage is deprecated.
-
-	Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
-	leaf prior to usage.
-
-	The suggested algorithm for detecting kvmclock presence is then:
-
-		if (!kvm_para_available())    /* refer to cpuid.txt */
-			return NON_PRESENT;
-
-		flags = cpuid_eax(0x40000001);
-		if (flags & 3) {
-			msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
-			msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
-			return PRESENT;
-		} else if (flags & 0) {
-			msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
-			msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
-			return PRESENT;
-		} else
-			return NON_PRESENT;
-
-MSR_KVM_ASYNC_PF_EN: 0x4b564d02
-	data: Bits 63-6 hold 64-byte aligned physical address of a
-	64 byte memory area which must be in guest RAM and must be
-	zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
-	when asynchronous page faults are enabled on the vcpu 0 when
-	disabled. Bit 1 is 1 if asynchronous page faults can be injected
-	when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
-	are delivered to L1 as #PF vmexits.  Bit 2 can be set only if
-	KVM_FEATURE_ASYNC_PF_VMEXIT is present in CPUID.
-
-	First 4 byte of 64 byte memory location will be written to by
-	the hypervisor at the time of asynchronous page fault (APF)
-	injection to indicate type of asynchronous page fault. Value
-	of 1 means that the page referred to by the page fault is not
-	present. Value 2 means that the page is now available. Disabling
-	interrupt inhibits APFs. Guest must not enable interrupt
-	before the reason is read, or it may be overwritten by another
-	APF. Since APF uses the same exception vector as regular page
-	fault guest must reset the reason to 0 before it does
-	something that can generate normal page fault.  If during page
-	fault APF reason is 0 it means that this is regular page
-	fault.
-
-	During delivery of type 1 APF cr2 contains a token that will
-	be used to notify a guest when missing page becomes
-	available. When page becomes available type 2 APF is sent with
-	cr2 set to the token associated with the page. There is special
-	kind of token 0xffffffff which tells vcpu that it should wake
-	up all processes waiting for APFs and no individual type 2 APFs
-	will be sent.
-
-	If APF is disabled while there are outstanding APFs, they will
-	not be delivered.
-
-	Currently type 2 APF will be always delivered on the same vcpu as
-	type 1 was, but guest should not rely on that.
-
-MSR_KVM_STEAL_TIME: 0x4b564d03
-
-	data: 64-byte alignment physical address of a memory area which must be
-	in guest RAM, plus an enable bit in bit 0. This memory is expected to
-	hold a copy of the following structure:
-
-	struct kvm_steal_time {
-		__u64 steal;
-		__u32 version;
-		__u32 flags;
-		__u8  preempted;
-		__u8  u8_pad[3];
-		__u32 pad[11];
-	}
-
-	whose data will be filled in by the hypervisor periodically. Only one
-	write, or registration, is needed for each VCPU. The interval between
-	updates of this structure is arbitrary and implementation-dependent.
-	The hypervisor may update this structure at any time it sees fit until
-	anything with bit0 == 0 is written to it. Guest is required to make sure
-	this structure is initialized to zero.
-
-	Fields have the following meanings:
-
-		version: a sequence counter. In other words, guest has to check
-		this field before and after grabbing time information and make
-		sure they are both equal and even. An odd version indicates an
-		in-progress update.
-
-		flags: At this point, always zero. May be used to indicate
-		changes in this structure in the future.
-
-		steal: the amount of time in which this vCPU did not run, in
-		nanoseconds. Time during which the vcpu is idle, will not be
-		reported as steal time.
-
-		preempted: indicate the vCPU who owns this struct is running or
-		not. Non-zero values mean the vCPU has been preempted. Zero
-		means the vCPU is not preempted. NOTE, it is always zero if the
-		the hypervisor doesn't support this field.
-
-MSR_KVM_EOI_EN: 0x4b564d04
-	data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
-	when disabled.  Bit 1 is reserved and must be zero.  When PV end of
-	interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
-	physical address of a 4 byte memory area which must be in guest RAM and
-	must be zeroed.
-
-	The first, least significant bit of 4 byte memory location will be
-	written to by the hypervisor, typically at the time of interrupt
-	injection.  Value of 1 means that guest can skip writing EOI to the apic
-	(using MSR or MMIO write); instead, it is sufficient to signal
-	EOI by clearing the bit in guest memory - this location will
-	later be polled by the hypervisor.
-	Value of 0 means that the EOI write is required.
-
-	It is always safe for the guest to ignore the optimization and perform
-	the APIC EOI write anyway.
-
-	Hypervisor is guaranteed to only modify this least
-	significant bit while in the current VCPU context, this means that
-	guest does not need to use either lock prefix or memory ordering
-	primitives to synchronise with the hypervisor.
-
-	However, hypervisor can set and clear this memory bit at any time:
-	therefore to make sure hypervisor does not interrupt the
-	guest and clear the least significant bit in the memory area
-	in the window between guest testing it to detect
-	whether it can skip EOI apic write and between guest
-	clearing it to signal EOI to the hypervisor,
-	guest must both read the least significant bit in the memory area and
-	clear it using a single CPU instruction, such as test and clear, or
-	compare and exchange.
-
-MSR_KVM_POLL_CONTROL: 0x4b564d05
-	Control host-side polling.
-
-	data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
-
-	KVM guests can request the host not to poll on HLT, for example if
-	they are performing polling themselves.
-
diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virtual/kvm/nested-vmx.txt
deleted file mode 100644
index 97eb1353e962..000000000000
--- a/Documentation/virtual/kvm/nested-vmx.txt
+++ /dev/null
@@ -1,240 +0,0 @@
-Nested VMX
-==========
-
-Overview
----------
-
-On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
-to easily and efficiently run guest operating systems. Normally, these guests
-*cannot* themselves be hypervisors running their own guests, because in VMX,
-guests cannot use VMX instructions.
-
-The "Nested VMX" feature adds this missing capability - of running guest
-hypervisors (which use VMX) with their own nested guests. It does so by
-allowing a guest to use VMX instructions, and correctly and efficiently
-emulating them using the single level of VMX available in the hardware.
-
-We describe in much greater detail the theory behind the nested VMX feature,
-its implementation and its performance characteristics, in the OSDI 2010 paper
-"The Turtles Project: Design and Implementation of Nested Virtualization",
-available at:
-
-	http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
-
-
-Terminology
------------
-
-Single-level virtualization has two levels - the host (KVM) and the guests.
-In nested virtualization, we have three levels: The host (KVM), which we call
-L0, the guest hypervisor, which we call L1, and its nested guest, which we
-call L2.
-
-
-Running nested VMX
-------------------
-
-The nested VMX feature is disabled by default. It can be enabled by giving
-the "nested=1" option to the kvm-intel module.
-
-No modifications are required to user space (qemu). However, qemu's default
-emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
-explicitly enabled, by giving qemu one of the following options:
-
-     -cpu host              (emulated CPU has all features of the real CPU)
-
-     -cpu qemu64,+vmx       (add just the vmx feature to a named CPU type)
-
-
-ABIs
-----
-
-Nested VMX aims to present a standard and (eventually) fully-functional VMX
-implementation for the a guest hypervisor to use. As such, the official
-specification of the ABI that it provides is Intel's VMX specification,
-namely volume 3B of their "Intel 64 and IA-32 Architectures Software
-Developer's Manual". Not all of VMX's features are currently fully supported,
-but the goal is to eventually support them all, starting with the VMX features
-which are used in practice by popular hypervisors (KVM and others).
-
-As a VMX implementation, nested VMX presents a VMCS structure to L1.
-As mandated by the spec, other than the two fields revision_id and abort,
-this structure is *opaque* to its user, who is not supposed to know or care
-about its internal structure. Rather, the structure is accessed through the
-VMREAD and VMWRITE instructions.
-Still, for debugging purposes, KVM developers might be interested to know the
-internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
-
-The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
-also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
-which L0 builds to actually run L2 - how this is done is explained in the
-aforementioned paper.
-
-For convenience, we repeat the content of struct vmcs12 here. If the internals
-of this structure changes, this can break live migration across KVM versions.
-VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
-struct shadow_vmcs is ever changed.
-
-	typedef u64 natural_width;
-	struct __packed vmcs12 {
-		/* According to the Intel spec, a VMCS region must start with
-		 * these two user-visible fields */
-		u32 revision_id;
-		u32 abort;
-
-		u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
-		u32 padding[7]; /* room for future expansion */
-
-		u64 io_bitmap_a;
-		u64 io_bitmap_b;
-		u64 msr_bitmap;
-		u64 vm_exit_msr_store_addr;
-		u64 vm_exit_msr_load_addr;
-		u64 vm_entry_msr_load_addr;
-		u64 tsc_offset;
-		u64 virtual_apic_page_addr;
-		u64 apic_access_addr;
-		u64 ept_pointer;
-		u64 guest_physical_address;
-		u64 vmcs_link_pointer;
-		u64 guest_ia32_debugctl;
-		u64 guest_ia32_pat;
-		u64 guest_ia32_efer;
-		u64 guest_pdptr0;
-		u64 guest_pdptr1;
-		u64 guest_pdptr2;
-		u64 guest_pdptr3;
-		u64 host_ia32_pat;
-		u64 host_ia32_efer;
-		u64 padding64[8]; /* room for future expansion */
-		natural_width cr0_guest_host_mask;
-		natural_width cr4_guest_host_mask;
-		natural_width cr0_read_shadow;
-		natural_width cr4_read_shadow;
-		natural_width cr3_target_value0;
-		natural_width cr3_target_value1;
-		natural_width cr3_target_value2;
-		natural_width cr3_target_value3;
-		natural_width exit_qualification;
-		natural_width guest_linear_address;
-		natural_width guest_cr0;
-		natural_width guest_cr3;
-		natural_width guest_cr4;
-		natural_width guest_es_base;
-		natural_width guest_cs_base;
-		natural_width guest_ss_base;
-		natural_width guest_ds_base;
-		natural_width guest_fs_base;
-		natural_width guest_gs_base;
-		natural_width guest_ldtr_base;
-		natural_width guest_tr_base;
-		natural_width guest_gdtr_base;
-		natural_width guest_idtr_base;
-		natural_width guest_dr7;
-		natural_width guest_rsp;
-		natural_width guest_rip;
-		natural_width guest_rflags;
-		natural_width guest_pending_dbg_exceptions;
-		natural_width guest_sysenter_esp;
-		natural_width guest_sysenter_eip;
-		natural_width host_cr0;
-		natural_width host_cr3;
-		natural_width host_cr4;
-		natural_width host_fs_base;
-		natural_width host_gs_base;
-		natural_width host_tr_base;
-		natural_width host_gdtr_base;
-		natural_width host_idtr_base;
-		natural_width host_ia32_sysenter_esp;
-		natural_width host_ia32_sysenter_eip;
-		natural_width host_rsp;
-		natural_width host_rip;
-		natural_width paddingl[8]; /* room for future expansion */
-		u32 pin_based_vm_exec_control;
-		u32 cpu_based_vm_exec_control;
-		u32 exception_bitmap;
-		u32 page_fault_error_code_mask;
-		u32 page_fault_error_code_match;
-		u32 cr3_target_count;
-		u32 vm_exit_controls;
-		u32 vm_exit_msr_store_count;
-		u32 vm_exit_msr_load_count;
-		u32 vm_entry_controls;
-		u32 vm_entry_msr_load_count;
-		u32 vm_entry_intr_info_field;
-		u32 vm_entry_exception_error_code;
-		u32 vm_entry_instruction_len;
-		u32 tpr_threshold;
-		u32 secondary_vm_exec_control;
-		u32 vm_instruction_error;
-		u32 vm_exit_reason;
-		u32 vm_exit_intr_info;
-		u32 vm_exit_intr_error_code;
-		u32 idt_vectoring_info_field;
-		u32 idt_vectoring_error_code;
-		u32 vm_exit_instruction_len;
-		u32 vmx_instruction_info;
-		u32 guest_es_limit;
-		u32 guest_cs_limit;
-		u32 guest_ss_limit;
-		u32 guest_ds_limit;
-		u32 guest_fs_limit;
-		u32 guest_gs_limit;
-		u32 guest_ldtr_limit;
-		u32 guest_tr_limit;
-		u32 guest_gdtr_limit;
-		u32 guest_idtr_limit;
-		u32 guest_es_ar_bytes;
-		u32 guest_cs_ar_bytes;
-		u32 guest_ss_ar_bytes;
-		u32 guest_ds_ar_bytes;
-		u32 guest_fs_ar_bytes;
-		u32 guest_gs_ar_bytes;
-		u32 guest_ldtr_ar_bytes;
-		u32 guest_tr_ar_bytes;
-		u32 guest_interruptibility_info;
-		u32 guest_activity_state;
-		u32 guest_sysenter_cs;
-		u32 host_ia32_sysenter_cs;
-		u32 padding32[8]; /* room for future expansion */
-		u16 virtual_processor_id;
-		u16 guest_es_selector;
-		u16 guest_cs_selector;
-		u16 guest_ss_selector;
-		u16 guest_ds_selector;
-		u16 guest_fs_selector;
-		u16 guest_gs_selector;
-		u16 guest_ldtr_selector;
-		u16 guest_tr_selector;
-		u16 host_es_selector;
-		u16 host_cs_selector;
-		u16 host_ss_selector;
-		u16 host_ds_selector;
-		u16 host_fs_selector;
-		u16 host_gs_selector;
-		u16 host_tr_selector;
-	};
-
-
-Authors
--------
-
-These patches were written by:
-     Abel Gordon, abelg <at> il.ibm.com
-     Nadav Har'El, nyh <at> il.ibm.com
-     Orit Wasserman, oritw <at> il.ibm.com
-     Ben-Ami Yassor, benami <at> il.ibm.com
-     Muli Ben-Yehuda, muli <at> il.ibm.com
-
-With contributions by:
-     Anthony Liguori, aliguori <at> us.ibm.com
-     Mike Day, mdday <at> us.ibm.com
-     Michael Factor, factor <at> il.ibm.com
-     Zvi Dubitzky, dubi <at> il.ibm.com
-
-And valuable reviews by:
-     Avi Kivity, avi <at> redhat.com
-     Gleb Natapov, gleb <at> redhat.com
-     Marcelo Tosatti, mtosatti <at> redhat.com
-     Kevin Tian, kevin.tian <at> intel.com
-     and others.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
deleted file mode 100644
index e26115ce4258..000000000000
--- a/Documentation/virtual/kvm/ppc-pv.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-The PPC KVM paravirtual interface
-=================================
-
-The basic execution principle by which KVM on PowerPC works is to run all kernel
-space code in PR=1 which is user space. This way we trap all privileged
-instructions and can emulate them accordingly.
-
-Unfortunately that is also the downfall. There are quite some privileged
-instructions that needlessly return us to the hypervisor even though they
-could be handled differently.
-
-This is what the PPC PV interface helps with. It takes privileged instructions
-and transforms them into unprivileged ones with some help from the hypervisor.
-This cuts down virtualization costs by about 50% on some of my benchmarks.
-
-The code for that interface can be found in arch/powerpc/kernel/kvm*
-
-Querying for existence
-======================
-
-To find out if we're running on KVM or not, we leverage the device tree. When
-Linux is running on KVM, a node /hypervisor exists. That node contains a
-compatible property with the value "linux,kvm".
-
-Once you determined you're running under a PV capable KVM, you can now use
-hypercalls as described below.
-
-KVM hypercalls
-==============
-
-Inside the device tree's /hypervisor node there's a property called
-'hypercall-instructions'. This property contains at most 4 opcodes that make
-up the hypercall. To call a hypercall, just call these instructions.
-
-The parameters are as follows:
-
-	Register	IN			OUT
-
-	r0		-			volatile
-	r3		1st parameter		Return code
-	r4		2nd parameter		1st output value
-	r5		3rd parameter		2nd output value
-	r6		4th parameter		3rd output value
-	r7		5th parameter		4th output value
-	r8		6th parameter		5th output value
-	r9		7th parameter		6th output value
-	r10		8th parameter		7th output value
-	r11		hypercall number	8th output value
-	r12		-			volatile
-
-Hypercall definitions are shared in generic code, so the same hypercall numbers
-apply for x86 and powerpc alike with the exception that each KVM hypercall
-also needs to be ORed with the KVM vendor code which is (42 << 16).
-
-Return codes can be as follows:
-
-	Code		Meaning
-
-	0		Success
-	12		Hypercall not implemented
-	<0		Error
-
-The magic page
-==============
-
-To enable communication between the hypervisor and guest there is a new shared
-page that contains parts of supervisor visible register state. The guest can
-map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
-
-With this hypercall issued the guest always gets the magic page mapped at the
-desired location. The first parameter indicates the effective address when the
-MMU is enabled. The second parameter indicates the address in real mode, if
-applicable to the target. For now, we always map the page to -4096. This way we
-can access it using absolute load and store functions. The following
-instruction reads the first field of the magic page:
-
-	ld	rX, -4096(0)
-
-The interface is designed to be extensible should there be need later to add
-additional registers to the magic page. If you add fields to the magic page,
-also define a new hypercall feature to indicate that the host can give you more
-registers. Only if the host supports the additional features, make use of them.
-
-The magic page layout is described by struct kvm_vcpu_arch_shared
-in arch/powerpc/include/asm/kvm_para.h.
-
-Magic page features
-===================
-
-When mapping the magic page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE,
-a second return value is passed to the guest. This second return value contains
-a bitmap of available features inside the magic page.
-
-The following enhancements to the magic page are currently available:
-
-  KVM_MAGIC_FEAT_SR		Maps SR registers r/w in the magic page
-  KVM_MAGIC_FEAT_MAS0_TO_SPRG7	Maps MASn, ESR, PIR and high SPRGs
-
-For enhanced features in the magic page, please check for the existence of the
-feature before using them!
-
-Magic page flags
-================
-
-In addition to features that indicate whether a host is capable of a particular
-feature we also have a channel for a guest to tell the guest whether it's capable
-of something. This is what we call "flags".
-
-Flags are passed to the host in the low 12 bits of the Effective Address.
-
-The following flags are currently available for a guest to expose:
-
-  MAGIC_PAGE_FLAG_NOT_MAPPED_NX Guest handles NX bits correctly wrt magic page
-
-MSR bits
-========
-
-The MSR contains bits that require hypervisor intervention and bits that do
-not require direct hypervisor intervention because they only get interpreted
-when entering the guest or don't have any impact on the hypervisor's behavior.
-
-The following bits are safe to be set inside the guest:
-
-  MSR_EE
-  MSR_RI
-
-If any other bit changes in the MSR, please still use mtmsr(d).
-
-Patched instructions
-====================
-
-The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions
-respectively on 32 bit systems with an added offset of 4 to accommodate for big
-endianness.
-
-The following is a list of mapping the Linux kernel performs when running as
-guest. Implementing any of those mappings is optional, as the instruction traps
-also act on the shared page. So calling privileged instructions still works as
-before.
-
-From			To
-====			==
-
-mfmsr	rX		ld	rX, magic_page->msr
-mfsprg	rX, 0		ld	rX, magic_page->sprg0
-mfsprg	rX, 1		ld	rX, magic_page->sprg1
-mfsprg	rX, 2		ld	rX, magic_page->sprg2
-mfsprg	rX, 3		ld	rX, magic_page->sprg3
-mfsrr0	rX		ld	rX, magic_page->srr0
-mfsrr1	rX		ld	rX, magic_page->srr1
-mfdar	rX		ld	rX, magic_page->dar
-mfdsisr	rX		lwz	rX, magic_page->dsisr
-
-mtmsr	rX		std	rX, magic_page->msr
-mtsprg	0, rX		std	rX, magic_page->sprg0
-mtsprg	1, rX		std	rX, magic_page->sprg1
-mtsprg	2, rX		std	rX, magic_page->sprg2
-mtsprg	3, rX		std	rX, magic_page->sprg3
-mtsrr0	rX		std	rX, magic_page->srr0
-mtsrr1	rX		std	rX, magic_page->srr1
-mtdar	rX		std	rX, magic_page->dar
-mtdsisr	rX		stw	rX, magic_page->dsisr
-
-tlbsync			nop
-
-mtmsrd	rX, 0		b	<special mtmsr section>
-mtmsr	rX		b	<special mtmsr section>
-
-mtmsrd	rX, 1		b	<special mtmsrd section>
-
-[Book3S only]
-mtsrin	rX, rY		b	<special mtsrin section>
-
-[BookE only]
-wrteei	[0|1]		b	<special wrteei section>
-
-
-Some instructions require more logic to determine what's going on than a load
-or store instruction can deliver. To enable patching of those, we keep some
-RAM around where we can live translate instructions to. What happens is the
-following:
-
-	1) copy emulation code to memory
-	2) patch that code to fit the emulated instruction
-	3) patch that code to return to the original pc + 4
-	4) patch the original instruction to branch to the new code
-
-That way we can inject an arbitrary amount of code as replacement for a single
-instruction. This allows us to check for pending interrupts when setting EE=1
-for example.
-
-Hypercall ABIs in KVM on PowerPC
-=================================
-1) KVM hypercalls (ePAPR)
-
-These are ePAPR compliant hypercall implementation (mentioned above). Even
-generic hypercalls are implemented here, like the ePAPR idle hcall. These are
-available on all targets.
-
-2) PAPR hypercalls
-
-PAPR hypercalls are needed to run server PowerPC PAPR guests (-M pseries in QEMU).
-These are the same hypercalls that pHyp, the POWER hypervisor implements. Some of
-them are handled in the kernel, some are handled in user space. This is only
-available on book3s_64.
-
-3) OSI hypercalls
-
-Mac-on-Linux is another user of KVM on PowerPC, which has its own hypercall (long
-before KVM). This is supported to maintain compatibility. All these hypercalls get
-forwarded to user space. This is only useful on book3s_32, but can be used with
-book3s_64 as well.
diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt
deleted file mode 100644
index a83b27635fdd..000000000000
--- a/Documentation/virtual/kvm/review-checklist.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-Review checklist for kvm patches
-================================
-
-1.  The patch must follow Documentation/process/coding-style.rst and
-    Documentation/process/submitting-patches.rst.
-
-2.  Patches should be against kvm.git master branch.
-
-3.  If the patch introduces or modifies a new userspace API:
-    - the API must be documented in Documentation/virtual/kvm/api.txt
-    - the API must be discoverable using KVM_CHECK_EXTENSION
-
-4.  New state must include support for save/restore.
-
-5.  New features must default to off (userspace should explicitly request them).
-    Performance improvements can and should default to on.
-
-6.  New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2
-
-7.  Emulator changes should be accompanied by unit tests for qemu-kvm.git
-    kvm/test directory.
-
-8.  Changes should be vendor neutral when possible.  Changes to common code
-    are better than duplicating changes to vendor code.
-
-9.  Similarly, prefer changes to arch independent code than to arch dependent
-    code.
-
-10. User/kernel interfaces and guest/host interfaces must be 64-bit clean
-    (all variables and sizes naturally aligned on 64-bit; use specific types
-    only - u64 rather than ulong).
-
-11. New guest visible features must either be documented in a hardware manual
-    or be accompanied by documentation.
-
-12. Features must be robust against reset and kexec - for example, shared
-    host/guest memory must be unshared to prevent the host from writing to
-    guest memory that the guest has not reserved for this purpose.
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
deleted file mode 100644
index 7c52e5f8b210..000000000000
--- a/Documentation/virtual/kvm/s390-diag.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-The s390 DIAGNOSE call on KVM
-=============================
-
-KVM on s390 supports the DIAGNOSE call for making hypercalls, both for
-native hypercalls and for selected hypercalls found on other s390
-hypervisors.
-
-Note that bits are numbered as by the usual s390 convention (most significant
-bit on the left).
-
-
-General remarks
----------------
-
-DIAGNOSE calls by the guest cause a mandatory intercept. This implies
-all supported DIAGNOSE calls need to be handled by either KVM or its
-userspace.
-
-All DIAGNOSE calls supported by KVM use the RS-a format:
-
---------------------------------------
-|  '83'  | R1 | R3 | B2 |     D2     |
---------------------------------------
-0        8    12   16   20           31
-
-The second-operand address (obtained by the base/displacement calculation)
-is not used to address data. Instead, bits 48-63 of this address specify
-the function code, and bits 0-47 are ignored.
-
-The supported DIAGNOSE function codes vary by the userspace used. For
-DIAGNOSE function codes not specific to KVM, please refer to the
-documentation for the s390 hypervisors defining them.
-
-
-DIAGNOSE function code 'X'500' - KVM virtio functions
------------------------------------------------------
-
-If the function code specifies 0x500, various virtio-related functions
-are performed.
-
-General register 1 contains the virtio subfunction code. Supported
-virtio subfunctions depend on KVM's userspace. Generally, userspace
-provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3).
-
-Upon completion of the DIAGNOSE instruction, general register 2 contains
-the function's return code, which is either a return code or a subcode
-specific value.
-
-Subcode 0 - s390-virtio notification and early console printk
-    Handled by userspace.
-
-Subcode 1 - s390-virtio reset
-    Handled by userspace.
-
-Subcode 2 - s390-virtio set status
-    Handled by userspace.
-
-Subcode 3 - virtio-ccw notification
-    Handled by either userspace or KVM (ioeventfd case).
-
-    General register 2 contains a subchannel-identification word denoting
-    the subchannel of the virtio-ccw proxy device to be notified.
-
-    General register 3 contains the number of the virtqueue to be notified.
-
-    General register 4 contains a 64bit identifier for KVM usage (the
-    kvm_io_bus cookie). If general register 4 does not contain a valid
-    identifier, it is ignored.
-
-    After completion of the DIAGNOSE call, general register 2 may contain
-    a 64bit identifier (in the kvm_io_bus cookie case), or a negative
-    error value, if an internal error occurred.
-
-    See also the virtio standard for a discussion of this hypercall.
-
-
-DIAGNOSE function code 'X'501 - KVM breakpoint
-----------------------------------------------
-
-If the function code specifies 0x501, breakpoint functions may be performed.
-This function code is handled by userspace.
-
-This diagnose function code has no subfunctions and uses no parameters.
diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt
deleted file mode 100644
index 76808a17ad84..000000000000
--- a/Documentation/virtual/kvm/timekeeping.txt
+++ /dev/null
@@ -1,612 +0,0 @@
-
-	Timekeeping Virtualization for X86-Based Architectures
-
-	Zachary Amsden <zamsden@redhat.com>
-	Copyright (c) 2010, Red Hat.  All rights reserved.
-
-1) Overview
-2) Timing Devices
-3) TSC Hardware
-4) Virtualization Problems
-
-=========================================================================
-
-1) Overview
-
-One of the most complicated parts of the X86 platform, and specifically,
-the virtualization of this platform is the plethora of timing devices available
-and the complexity of emulating those devices.  In addition, virtualization of
-time introduces a new set of challenges because it introduces a multiplexed
-division of time beyond the control of the guest CPU.
-
-First, we will describe the various timekeeping hardware available, then
-present some of the problems which arise and solutions available, giving
-specific recommendations for certain classes of KVM guests.
-
-The purpose of this document is to collect data and information relevant to
-timekeeping which may be difficult to find elsewhere, specifically,
-information relevant to KVM and hardware-based virtualization.
-
-=========================================================================
-
-2) Timing Devices
-
-First we discuss the basic hardware devices available.  TSC and the related
-KVM clock are special enough to warrant a full exposition and are described in
-the following section.
-
-2.1) i8254 - PIT
-
-One of the first timer devices available is the programmable interrupt timer,
-or PIT.  The PIT has a fixed frequency 1.193182 MHz base clock and three
-channels which can be programmed to deliver periodic or one-shot interrupts.
-These three channels can be configured in different modes and have individual
-counters.  Channel 1 and 2 were not available for general use in the original
-IBM PC, and historically were connected to control RAM refresh and the PC
-speaker.  Now the PIT is typically integrated as part of an emulated chipset
-and a separate physical PIT is not used.
-
-The PIT uses I/O ports 0x40 - 0x43.  Access to the 16-bit counters is done
-using single or multiple byte access to the I/O ports.  There are 6 modes
-available, but not all modes are available to all timers, as only timer 2
-has a connected gate input, required for modes 1 and 5.  The gate line is
-controlled by port 61h, bit 0, as illustrated in the following diagram.
-
- --------------             ----------------
-|              |           |                |
-|  1.1932 MHz  |---------->| CLOCK      OUT | ---------> IRQ 0
-|    Clock     |   |       |                |
- --------------    |    +->| GATE  TIMER 0  |
-                   |        ----------------
-                   |
-                   |        ----------------
-                   |       |                |
-                   |------>| CLOCK      OUT | ---------> 66.3 KHZ DRAM
-                   |       |                |            (aka /dev/null)
-                   |    +->| GATE  TIMER 1  |
-                   |        ----------------
-                   |
-                   |        ----------------
-                   |       |                |
-                   |------>| CLOCK      OUT | ---------> Port 61h, bit 5
-                           |                |      |
-Port 61h, bit 0 ---------->| GATE  TIMER 2  |       \_.----   ____
-                            ----------------         _|    )--|LPF|---Speaker
-                                                    / *----   \___/
-Port 61h, bit 1 -----------------------------------/
-
-The timer modes are now described.
-
-Mode 0: Single Timeout.   This is a one-shot software timeout that counts down
- when the gate is high (always true for timers 0 and 1).  When the count
- reaches zero, the output goes high.
-
-Mode 1: Triggered One-shot.  The output is initially set high.  When the gate
- line is set high, a countdown is initiated (which does not stop if the gate is
- lowered), during which the output is set low.  When the count reaches zero,
- the output goes high.
-
-Mode 2: Rate Generator.  The output is initially set high.  When the countdown
- reaches 1, the output goes low for one count and then returns high.  The value
- is reloaded and the countdown automatically resumes.  If the gate line goes
- low, the count is halted.  If the output is low when the gate is lowered, the
- output automatically goes high (this only affects timer 2).
-
-Mode 3: Square Wave.   This generates a high / low square wave.  The count
- determines the length of the pulse, which alternates between high and low
- when zero is reached.  The count only proceeds when gate is high and is
- automatically reloaded on reaching zero.  The count is decremented twice at
- each clock to generate a full high / low cycle at the full periodic rate.
- If the count is even, the clock remains high for N/2 counts and low for N/2
- counts; if the clock is odd, the clock is high for (N+1)/2 counts and low
- for (N-1)/2 counts.  Only even values are latched by the counter, so odd
- values are not observed when reading.  This is the intended mode for timer 2,
- which generates sine-like tones by low-pass filtering the square wave output.
-
-Mode 4: Software Strobe.  After programming this mode and loading the counter,
- the output remains high until the counter reaches zero.  Then the output
- goes low for 1 clock cycle and returns high.  The counter is not reloaded.
- Counting only occurs when gate is high.
-
-Mode 5: Hardware Strobe.  After programming and loading the counter, the
- output remains high.  When the gate is raised, a countdown is initiated
- (which does not stop if the gate is lowered).  When the counter reaches zero,
- the output goes low for 1 clock cycle and then returns high.  The counter is
- not reloaded.
-
-In addition to normal binary counting, the PIT supports BCD counting.  The
-command port, 0x43 is used to set the counter and mode for each of the three
-timers.
-
-PIT commands, issued to port 0x43, using the following bit encoding:
-
-Bit 7-4: Command (See table below)
-Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
-Bit 0  : Binary (0) / BCD (1)
-
-Command table:
-
-0000 - Latch Timer 0 count for port 0x40
-	sample and hold the count to be read in port 0x40;
-	additional commands ignored until counter is read;
-	mode bits ignored.
-
-0001 - Set Timer 0 LSB mode for port 0x40
-	set timer to read LSB only and force MSB to zero;
-	mode bits set timer mode
-
-0010 - Set Timer 0 MSB mode for port 0x40
-	set timer to read MSB only and force LSB to zero;
-	mode bits set timer mode
-
-0011 - Set Timer 0 16-bit mode for port 0x40
-	set timer to read / write LSB first, then MSB;
-	mode bits set timer mode
-
-0100 - Latch Timer 1 count for port 0x41 - as described above
-0101 - Set Timer 1 LSB mode for port 0x41 - as described above
-0110 - Set Timer 1 MSB mode for port 0x41 - as described above
-0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
-
-1000 - Latch Timer 2 count for port 0x42 - as described above
-1001 - Set Timer 2 LSB mode for port 0x42 - as described above
-1010 - Set Timer 2 MSB mode for port 0x42 - as described above
-1011 - Set Timer 2 16-bit mode for port 0x42 as described above
-
-1101 - General counter latch
-	Latch combination of counters into corresponding ports
-	Bit 3 = Counter 2
-	Bit 2 = Counter 1
-	Bit 1 = Counter 0
-	Bit 0 = Unused
-
-1110 - Latch timer status
-	Latch combination of counter mode into corresponding ports
-	Bit 3 = Counter 2
-	Bit 2 = Counter 1
-	Bit 1 = Counter 0
-
-	The output of ports 0x40-0x42 following this command will be:
-
-	Bit 7 = Output pin
-	Bit 6 = Count loaded (0 if timer has expired)
-	Bit 5-4 = Read / Write mode
-	    01 = MSB only
-	    10 = LSB only
-	    11 = LSB / MSB (16-bit)
-	Bit 3-1 = Mode
-	Bit 0 = Binary (0) / BCD mode (1)
-
-2.2) RTC
-
-The second device which was available in the original PC was the MC146818 real
-time clock.  The original device is now obsolete, and usually emulated by the
-system chipset, sometimes by an HPET and some frankenstein IRQ routing.
-
-The RTC is accessed through CMOS variables, which uses an index register to
-control which bytes are read.  Since there is only one index register, read
-of the CMOS and read of the RTC require lock protection (in addition, it is
-dangerous to allow userspace utilities such as hwclock to have direct RTC
-access, as they could corrupt kernel reads and writes of CMOS memory).
-
-The RTC generates an interrupt which is usually routed to IRQ 8.  The interrupt
-can function as a periodic timer, an additional once a day alarm, and can issue
-interrupts after an update of the CMOS registers by the MC146818 is complete.
-The type of interrupt is signalled in the RTC status registers.
-
-The RTC will update the current time fields by battery power even while the
-system is off.  The current time fields should not be read while an update is
-in progress, as indicated in the status register.
-
-The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
-programmed to a 32kHz divider if the RTC is to count seconds.
-
-This is the RAM map originally used for the RTC/CMOS:
-
-Location    Size    Description
-------------------------------------------
-00h         byte    Current second (BCD)
-01h         byte    Seconds alarm (BCD)
-02h         byte    Current minute (BCD)
-03h         byte    Minutes alarm (BCD)
-04h         byte    Current hour (BCD)
-05h         byte    Hours alarm (BCD)
-06h         byte    Current day of week (BCD)
-07h         byte    Current day of month (BCD)
-08h         byte    Current month (BCD)
-09h         byte    Current year (BCD)
-0Ah         byte    Register A
-                       bit 7   = Update in progress
-                       bit 6-4 = Divider for clock
-                                  000 = 4.194 MHz
-                                  001 = 1.049 MHz
-                                  010 = 32 kHz
-                                  10X = test modes
-                                  110 = reset / disable
-                                  111 = reset / disable
-                       bit 3-0 = Rate selection for periodic interrupt
-                                  000 = periodic timer disabled
-                                  001 = 3.90625 uS
-                                  010 = 7.8125 uS
-                                  011 = .122070 mS
-                                  100 = .244141 mS
-                                     ...
-                                 1101 = 125 mS
-                                 1110 = 250 mS
-                                 1111 = 500 mS
-0Bh         byte    Register B
-                       bit 7   = Run (0) / Halt (1)
-                       bit 6   = Periodic interrupt enable
-                       bit 5   = Alarm interrupt enable
-                       bit 4   = Update-ended interrupt enable
-                       bit 3   = Square wave interrupt enable
-                       bit 2   = BCD calendar (0) / Binary (1)
-                       bit 1   = 12-hour mode (0) / 24-hour mode (1)
-                       bit 0   = 0 (DST off) / 1 (DST enabled)
-OCh         byte    Register C (read only)
-                       bit 7   = interrupt request flag (IRQF)
-                       bit 6   = periodic interrupt flag (PF)
-                       bit 5   = alarm interrupt flag (AF)
-                       bit 4   = update interrupt flag (UF)
-                       bit 3-0 = reserved
-ODh         byte    Register D (read only)
-                       bit 7   = RTC has power
-                       bit 6-0 = reserved
-32h         byte    Current century BCD (*)
-  (*) location vendor specific and now determined from ACPI global tables
-
-2.3) APIC
-
-On Pentium and later processors, an on-board timer is available to each CPU
-as part of the Advanced Programmable Interrupt Controller.  The APIC is
-accessed through memory-mapped registers and provides interrupt service to each
-CPU, used for IPIs and local timer interrupts.
-
-Although in theory the APIC is a safe and stable source for local interrupts,
-in practice, many bugs and glitches have occurred due to the special nature of
-the APIC CPU-local memory-mapped hardware.  Beware that CPU errata may affect
-the use of the APIC and that workarounds may be required.  In addition, some of
-these workarounds pose unique constraints for virtualization - requiring either
-extra overhead incurred from extra reads of memory-mapped I/O or additional
-functionality that may be more computationally expensive to implement.
-
-Since the APIC is documented quite well in the Intel and AMD manuals, we will
-avoid repetition of the detail here.  It should be pointed out that the APIC
-timer is programmed through the LVT (local vector timer) register, is capable
-of one-shot or periodic operation, and is based on the bus clock divided down
-by the programmable divider register.
-
-2.4) HPET
-
-HPET is quite complex, and was originally intended to replace the PIT / RTC
-support of the X86 PC.  It remains to be seen whether that will be the case, as
-the de facto standard of PC hardware is to emulate these older devices.  Some
-systems designated as legacy free may support only the HPET as a hardware timer
-device.
-
-The HPET spec is rather loose and vague, requiring at least 3 hardware timers,
-but allowing implementation freedom to support many more.  It also imposes no
-fixed rate on the timer frequency, but does impose some extremal values on
-frequency, error and slew.
-
-In general, the HPET is recommended as a high precision (compared to PIT /RTC)
-time source which is independent of local variation (as there is only one HPET
-in any given system).  The HPET is also memory-mapped, and its presence is
-indicated through ACPI tables by the BIOS.
-
-Detailed specification of the HPET is beyond the current scope of this
-document, as it is also very well documented elsewhere.
-
-2.5) Offboard Timers
-
-Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
-timing chips built into the cards which may have registers which are accessible
-to kernel or user drivers.  To the author's knowledge, using these to generate
-a clocksource for a Linux or other kernel has not yet been attempted and is in
-general frowned upon as not playing by the agreed rules of the game.  Such a
-timer device would require additional support to be virtualized properly and is
-not considered important at this time as no known operating system does this.
-
-=========================================================================
-
-3) TSC Hardware
-
-The TSC or time stamp counter is relatively simple in theory; it counts
-instruction cycles issued by the processor, which can be used as a measure of
-time.  In practice, due to a number of problems, it is the most complicated
-timekeeping device to use.
-
-The TSC is represented internally as a 64-bit MSR which can be read with the
-RDMSR, RDTSC, or RDTSCP (when available) instructions.  In the past, hardware
-limitations made it possible to write the TSC, but generally on old hardware it
-was only possible to write the low 32-bits of the 64-bit counter, and the upper
-32-bits of the counter were cleared.  Now, however, on Intel processors family
-0Fh, for models 3, 4 and 6, and family 06h, models e and f, this restriction
-has been lifted and all 64-bits are writable.  On AMD systems, the ability to
-write the TSC MSR is not an architectural guarantee.
-
-The TSC is accessible from CPL-0 and conditionally, for CPL > 0 software by
-means of the CR4.TSD bit, which when enabled, disables CPL > 0 TSC access.
-
-Some vendors have implemented an additional instruction, RDTSCP, which returns
-atomically not just the TSC, but an indicator which corresponds to the
-processor number.  This can be used to index into an array of TSC variables to
-determine offset information in SMP systems where TSCs are not synchronized.
-The presence of this instruction must be determined by consulting CPUID feature
-bits.
-
-Both VMX and SVM provide extension fields in the virtualization hardware which
-allows the guest visible TSC to be offset by a constant.  Newer implementations
-promise to allow the TSC to additionally be scaled, but this hardware is not
-yet widely available.
-
-3.1) TSC synchronization
-
-The TSC is a CPU-local clock in most implementations.  This means, on SMP
-platforms, the TSCs of different CPUs may start at different times depending
-on when the CPUs are powered on.  Generally, CPUs on the same die will share
-the same clock, however, this is not always the case.
-
-The BIOS may attempt to resynchronize the TSCs during the poweron process and
-the operating system or other system software may attempt to do this as well.
-Several hardware limitations make the problem worse - if it is not possible to
-write the full 64-bits of the TSC, it may be impossible to match the TSC in
-newly arriving CPUs to that of the rest of the system, resulting in
-unsynchronized TSCs.  This may be done by BIOS or system software, but in
-practice, getting a perfectly synchronized TSC will not be possible unless all
-values are read from the same clock, which generally only is possible on single
-socket systems or those with special hardware support.
-
-3.2) TSC and CPU hotplug
-
-As touched on already, CPUs which arrive later than the boot time of the system
-may not have a TSC value that is synchronized with the rest of the system.
-Either system software, BIOS, or SMM code may actually try to establish the TSC
-to a value matching the rest of the system, but a perfect match is usually not
-a guarantee.  This can have the effect of bringing a system from a state where
-TSC is synchronized back to a state where TSC synchronization flaws, however
-small, may be exposed to the OS and any virtualization environment.
-
-3.3) TSC and multi-socket / NUMA
-
-Multi-socket systems, especially large multi-socket systems are likely to have
-individual clocksources rather than a single, universally distributed clock.
-Since these clocks are driven by different crystals, they will not have
-perfectly matched frequency, and temperature and electrical variations will
-cause the CPU clocks, and thus the TSCs to drift over time.  Depending on the
-exact clock and bus design, the drift may or may not be fixed in absolute
-error, and may accumulate over time.
-
-In addition, very large systems may deliberately slew the clocks of individual
-cores.  This technique, known as spread-spectrum clocking, reduces EMI at the
-clock frequency and harmonics of it, which may be required to pass FCC
-standards for telecommunications and computer equipment.
-
-It is recommended not to trust the TSCs to remain synchronized on NUMA or
-multiple socket systems for these reasons.
-
-3.4) TSC and C-states
-
-C-states, or idling states of the processor, especially C1E and deeper sleep
-states may be problematic for TSC as well.  The TSC may stop advancing in such
-a state, resulting in a TSC which is behind that of other CPUs when execution
-is resumed.  Such CPUs must be detected and flagged by the operating system
-based on CPU and chipset identifications.
-
-The TSC in such a case may be corrected by catching it up to a known external
-clocksource.
-
-3.5) TSC frequency change / P-states
-
-To make things slightly more interesting, some CPUs may change frequency.  They
-may or may not run the TSC at the same rate, and because the frequency change
-may be staggered or slewed, at some points in time, the TSC rate may not be
-known other than falling within a range of values.  In this case, the TSC will
-not be a stable time source, and must be calibrated against a known, stable,
-external clock to be a usable source of time.
-
-Whether the TSC runs at a constant rate or scales with the P-state is model
-dependent and must be determined by inspecting CPUID, chipset or vendor
-specific MSR fields.
-
-In addition, some vendors have known bugs where the P-state is actually
-compensated for properly during normal operation, but when the processor is
-inactive, the P-state may be raised temporarily to service cache misses from
-other processors.  In such cases, the TSC on halted CPUs could advance faster
-than that of non-halted processors.  AMD Turion processors are known to have
-this problem.
-
-3.6) TSC and STPCLK / T-states
-
-External signals given to the processor may also have the effect of stopping
-the TSC.  This is typically done for thermal emergency power control to prevent
-an overheating condition, and typically, there is no way to detect that this
-condition has happened.
-
-3.7) TSC virtualization - VMX
-
-VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
-instructions, which is enough for full virtualization of TSC in any manner.  In
-addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
-field specified in the VMCS.  Special instructions must be used to read and
-write the VMCS field.
-
-3.8) TSC virtualization - SVM
-
-SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
-instructions, which is enough for full virtualization of TSC in any manner.  In
-addition, SVM allows passing through the host TSC plus an additional offset
-field specified in the SVM control block.
-
-3.9) TSC feature bits in Linux
-
-In summary, there is no way to guarantee the TSC remains in perfect
-synchronization unless it is explicitly guaranteed by the architecture.  Even
-if so, the TSCs in multi-sockets or NUMA systems may still run independently
-despite being locally consistent.
-
-The following feature bits are used by Linux to signal various TSC attributes,
-but they can only be taken to be meaningful for UP or single node systems.
-
-X86_FEATURE_TSC 		: The TSC is available in hardware
-X86_FEATURE_RDTSCP		: The RDTSCP instruction is available
-X86_FEATURE_CONSTANT_TSC 	: The TSC rate is unchanged with P-states
-X86_FEATURE_NONSTOP_TSC		: The TSC does not stop in C-states
-X86_FEATURE_TSC_RELIABLE	: TSC sync checks are skipped (VMware)
-
-4) Virtualization Problems
-
-Timekeeping is especially problematic for virtualization because a number of
-challenges arise.  The most obvious problem is that time is now shared between
-the host and, potentially, a number of virtual machines.  Thus the virtual
-operating system does not run with 100% usage of the CPU, despite the fact that
-it may very well make that assumption.  It may expect it to remain true to very
-exacting bounds when interrupt sources are disabled, but in reality only its
-virtual interrupt sources are disabled, and the machine may still be preempted
-at any time.  This causes problems as the passage of real time, the injection
-of machine interrupts and the associated clock sources are no longer completely
-synchronized with real time.
-
-This same problem can occur on native hardware to a degree, as SMM mode may
-steal cycles from the naturally on X86 systems when SMM mode is used by the
-BIOS, but not in such an extreme fashion.  However, the fact that SMM mode may
-cause similar problems to virtualization makes it a good justification for
-solving many of these problems on bare metal.
-
-4.1) Interrupt clocking
-
-One of the most immediate problems that occurs with legacy operating systems
-is that the system timekeeping routines are often designed to keep track of
-time by counting periodic interrupts.  These interrupts may come from the PIT
-or the RTC, but the problem is the same: the host virtualization engine may not
-be able to deliver the proper number of interrupts per second, and so guest
-time may fall behind.  This is especially problematic if a high interrupt rate
-is selected, such as 1000 HZ, which is unfortunately the default for many Linux
-guests.
-
-There are three approaches to solving this problem; first, it may be possible
-to simply ignore it.  Guests which have a separate time source for tracking
-'wall clock' or 'real time' may not need any adjustment of their interrupts to
-maintain proper time.  If this is not sufficient, it may be necessary to inject
-additional interrupts into the guest in order to increase the effective
-interrupt rate.  This approach leads to complications in extreme conditions,
-where host load or guest lag is too much to compensate for, and thus another
-solution to the problem has risen: the guest may need to become aware of lost
-ticks and compensate for them internally.  Although promising in theory, the
-implementation of this policy in Linux has been extremely error prone, and a
-number of buggy variants of lost tick compensation are distributed across
-commonly used Linux systems.
-
-Windows uses periodic RTC clocking as a means of keeping time internally, and
-thus requires interrupt slewing to keep proper time.  It does use a low enough
-rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
-practice.
-
-4.2) TSC sampling and serialization
-
-As the highest precision time source available, the cycle counter of the CPU
-has aroused much interest from developers.  As explained above, this timer has
-many problems unique to its nature as a local, potentially unstable and
-potentially unsynchronized source.  One issue which is not unique to the TSC,
-but is highlighted because of its very precise nature is sampling delay.  By
-definition, the counter, once read is already old.  However, it is also
-possible for the counter to be read ahead of the actual use of the result.
-This is a consequence of the superscalar execution of the instruction stream,
-which may execute instructions out of order.  Such execution is called
-non-serialized.  Forcing serialized execution is necessary for precise
-measurement with the TSC, and requires a serializing instruction, such as CPUID
-or an MSR read.
-
-Since CPUID may actually be virtualized by a trap and emulate mechanism, this
-serialization can pose a performance issue for hardware virtualization.  An
-accurate time stamp counter reading may therefore not always be available, and
-it may be necessary for an implementation to guard against "backwards" reads of
-the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
-system.
-
-4.3) Timespec aliasing
-
-Additionally, this lack of serialization from the TSC poses another challenge
-when using results of the TSC when measured against another time source.  As
-the TSC is much higher precision, many possible values of the TSC may be read
-while another clock is still expressing the same value.
-
-That is, you may read (T,T+10) while external clock C maintains the same value.
-Due to non-serialized reads, you may actually end up with a range which
-fluctuates - from (T-1.. T+10).  Thus, any time calculated from a TSC, but
-calibrated against an external value may have a range of valid values.
-Re-calibrating this computation may actually cause time, as computed after the
-calibration, to go backwards, compared with time computed before the
-calibration.
-
-This problem is particularly pronounced with an internal time source in Linux,
-the kernel time, which is expressed in the theoretically high resolution
-timespec - but which advances in much larger granularity intervals, sometimes
-at the rate of jiffies, and possibly in catchup modes, at a much larger step.
-
-This aliasing requires care in the computation and recalibration of kvmclock
-and any other values derived from TSC computation (such as TSC virtualization
-itself).
-
-4.4) Migration
-
-Migration of a virtual machine raises problems for timekeeping in two ways.
-First, the migration itself may take time, during which interrupts cannot be
-delivered, and after which, the guest time may need to be caught up.  NTP may
-be able to help to some degree here, as the clock correction required is
-typically small enough to fall in the NTP-correctable window.
-
-An additional concern is that timers based off the TSC (or HPET, if the raw bus
-clock is exposed) may now be running at different rates, requiring compensation
-in some way in the hypervisor by virtualizing these timers.  In addition,
-migrating to a faster machine may preclude the use of a passthrough TSC, as a
-faster clock cannot be made visible to a guest without the potential of time
-advancing faster than usual.  A slower clock is less of a problem, as it can
-always be caught up to the original rate.  KVM clock avoids these problems by
-simply storing multipliers and offsets against the TSC for the guest to convert
-back into nanosecond resolution values.
-
-4.5) Scheduling
-
-Since scheduling may be based on precise timing and firing of interrupts, the
-scheduling algorithms of an operating system may be adversely affected by
-virtualization.  In theory, the effect is random and should be universally
-distributed, but in contrived as well as real scenarios (guest device access,
-causes of virtualization exits, possible context switch), this may not always
-be the case.  The effect of this has not been well studied.
-
-In an attempt to work around this, several implementations have provided a
-paravirtualized scheduler clock, which reveals the true amount of CPU time for
-which a virtual machine has been running.
-
-4.6) Watchdogs
-
-Watchdog timers, such as the lock detector in Linux may fire accidentally when
-running under hardware virtualization due to timer interrupts being delayed or
-misinterpretation of the passage of real time.  Usually, these warnings are
-spurious and can be ignored, but in some circumstances it may be necessary to
-disable such detection.
-
-4.7) Delays and precision timing
-
-Precise timing and delays may not be possible in a virtualized system.  This
-can happen if the system is controlling physical hardware, or issues delays to
-compensate for slower I/O to and from devices.  The first issue is not solvable
-in general for a virtualized system; hardware control software can't be
-adequately virtualized without a full real-time operating system, which would
-require an RT aware virtualization platform.
-
-The second issue may cause performance problems, but this is unlikely to be a
-significant issue.  In many cases these delays may be eliminated through
-configuration or paravirtualization.
-
-4.8) Covert channels and leaks
-
-In addition to the above problems, time information will inevitably leak to the
-guest about the host in anything but a perfect implementation of virtualized
-time.  This may allow the guest to infer the presence of a hypervisor (as in a
-red-pill type detection), and it may allow information to leak between guests
-by using CPU utilization itself as a signalling channel.  Preventing such
-problems would require completely isolated virtual time which may not track
-real time any longer.  This may be useful in certain security or QA contexts,
-but in general isn't recommended for real-world deployment scenarios.
diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virtual/kvm/vcpu-requests.rst
deleted file mode 100644
index 5feb3706a7ae..000000000000
--- a/Documentation/virtual/kvm/vcpu-requests.rst
+++ /dev/null
@@ -1,307 +0,0 @@
-=================
-KVM VCPU Requests
-=================
-
-Overview
-========
-
-KVM supports an internal API enabling threads to request a VCPU thread to
-perform some activity.  For example, a thread may request a VCPU to flush
-its TLB with a VCPU request.  The API consists of the following functions::
-
-  /* Check if any requests are pending for VCPU @vcpu. */
-  bool kvm_request_pending(struct kvm_vcpu *vcpu);
-
-  /* Check if VCPU @vcpu has request @req pending. */
-  bool kvm_test_request(int req, struct kvm_vcpu *vcpu);
-
-  /* Clear request @req for VCPU @vcpu. */
-  void kvm_clear_request(int req, struct kvm_vcpu *vcpu);
-
-  /*
-   * Check if VCPU @vcpu has request @req pending. When the request is
-   * pending it will be cleared and a memory barrier, which pairs with
-   * another in kvm_make_request(), will be issued.
-   */
-  bool kvm_check_request(int req, struct kvm_vcpu *vcpu);
-
-  /*
-   * Make request @req of VCPU @vcpu. Issues a memory barrier, which pairs
-   * with another in kvm_check_request(), prior to setting the request.
-   */
-  void kvm_make_request(int req, struct kvm_vcpu *vcpu);
-
-  /* Make request @req of all VCPUs of the VM with struct kvm @kvm. */
-  bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
-
-Typically a requester wants the VCPU to perform the activity as soon
-as possible after making the request.  This means most requests
-(kvm_make_request() calls) are followed by a call to kvm_vcpu_kick(),
-and kvm_make_all_cpus_request() has the kicking of all VCPUs built
-into it.
-
-VCPU Kicks
-----------
-
-The goal of a VCPU kick is to bring a VCPU thread out of guest mode in
-order to perform some KVM maintenance.  To do so, an IPI is sent, forcing
-a guest mode exit.  However, a VCPU thread may not be in guest mode at the
-time of the kick.  Therefore, depending on the mode and state of the VCPU
-thread, there are two other actions a kick may take.  All three actions
-are listed below:
-
-1) Send an IPI.  This forces a guest mode exit.
-2) Waking a sleeping VCPU.  Sleeping VCPUs are VCPU threads outside guest
-   mode that wait on waitqueues.  Waking them removes the threads from
-   the waitqueues, allowing the threads to run again.  This behavior
-   may be suppressed, see KVM_REQUEST_NO_WAKEUP below.
-3) Nothing.  When the VCPU is not in guest mode and the VCPU thread is not
-   sleeping, then there is nothing to do.
-
-VCPU Mode
----------
-
-VCPUs have a mode state, ``vcpu->mode``, that is used to track whether the
-guest is running in guest mode or not, as well as some specific
-outside guest mode states.  The architecture may use ``vcpu->mode`` to
-ensure VCPU requests are seen by VCPUs (see "Ensuring Requests Are Seen"),
-as well as to avoid sending unnecessary IPIs (see "IPI Reduction"), and
-even to ensure IPI acknowledgements are waited upon (see "Waiting for
-Acknowledgements").  The following modes are defined:
-
-OUTSIDE_GUEST_MODE
-
-  The VCPU thread is outside guest mode.
-
-IN_GUEST_MODE
-
-  The VCPU thread is in guest mode.
-
-EXITING_GUEST_MODE
-
-  The VCPU thread is transitioning from IN_GUEST_MODE to
-  OUTSIDE_GUEST_MODE.
-
-READING_SHADOW_PAGE_TABLES
-
-  The VCPU thread is outside guest mode, but it wants the sender of
-  certain VCPU requests, namely KVM_REQ_TLB_FLUSH, to wait until the VCPU
-  thread is done reading the page tables.
-
-VCPU Request Internals
-======================
-
-VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap.
-This means general bitops, like those documented in [atomic-ops]_ could
-also be used, e.g. ::
-
-  clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests);
-
-However, VCPU request users should refrain from doing so, as it would
-break the abstraction.  The first 8 bits are reserved for architecture
-independent requests, all additional bits are available for architecture
-dependent requests.
-
-Architecture Independent Requests
----------------------------------
-
-KVM_REQ_TLB_FLUSH
-
-  KVM's common MMU notifier may need to flush all of a guest's TLB
-  entries, calling kvm_flush_remote_tlbs() to do so.  Architectures that
-  choose to use the common kvm_flush_remote_tlbs() implementation will
-  need to handle this VCPU request.
-
-KVM_REQ_MMU_RELOAD
-
-  When shadow page tables are used and memory slots are removed it's
-  necessary to inform each VCPU to completely refresh the tables.  This
-  request is used for that.
-
-KVM_REQ_PENDING_TIMER
-
-  This request may be made from a timer handler run on the host on behalf
-  of a VCPU.  It informs the VCPU thread to inject a timer interrupt.
-
-KVM_REQ_UNHALT
-
-  This request may be made from the KVM common function kvm_vcpu_block(),
-  which is used to emulate an instruction that causes a CPU to halt until
-  one of an architectural specific set of events and/or interrupts is
-  received (determined by checking kvm_arch_vcpu_runnable()).  When that
-  event or interrupt arrives kvm_vcpu_block() makes the request.  This is
-  in contrast to when kvm_vcpu_block() returns due to any other reason,
-  such as a pending signal, which does not indicate the VCPU's halt
-  emulation should stop, and therefore does not make the request.
-
-KVM_REQUEST_MASK
-----------------
-
-VCPU requests should be masked by KVM_REQUEST_MASK before using them with
-bitops.  This is because only the lower 8 bits are used to represent the
-request's number.  The upper bits are used as flags.  Currently only two
-flags are defined.
-
-VCPU Request Flags
-------------------
-
-KVM_REQUEST_NO_WAKEUP
-
-  This flag is applied to requests that only need immediate attention
-  from VCPUs running in guest mode.  That is, sleeping VCPUs do not need
-  to be awaken for these requests.  Sleeping VCPUs will handle the
-  requests when they are awaken later for some other reason.
-
-KVM_REQUEST_WAIT
-
-  When requests with this flag are made with kvm_make_all_cpus_request(),
-  then the caller will wait for each VCPU to acknowledge its IPI before
-  proceeding.  This flag only applies to VCPUs that would receive IPIs.
-  If, for example, the VCPU is sleeping, so no IPI is necessary, then
-  the requesting thread does not wait.  This means that this flag may be
-  safely combined with KVM_REQUEST_NO_WAKEUP.  See "Waiting for
-  Acknowledgements" for more information about requests with
-  KVM_REQUEST_WAIT.
-
-VCPU Requests with Associated State
-===================================
-
-Requesters that want the receiving VCPU to handle new state need to ensure
-the newly written state is observable to the receiving VCPU thread's CPU
-by the time it observes the request.  This means a write memory barrier
-must be inserted after writing the new state and before setting the VCPU
-request bit.  Additionally, on the receiving VCPU thread's side, a
-corresponding read barrier must be inserted after reading the request bit
-and before proceeding to read the new state associated with it.  See
-scenario 3, Message and Flag, of [lwn-mb]_ and the kernel documentation
-[memory-barriers]_.
-
-The pair of functions, kvm_check_request() and kvm_make_request(), provide
-the memory barriers, allowing this requirement to be handled internally by
-the API.
-
-Ensuring Requests Are Seen
-==========================
-
-When making requests to VCPUs, we want to avoid the receiving VCPU
-executing in guest mode for an arbitrary long time without handling the
-request.  We can be sure this won't happen as long as we ensure the VCPU
-thread checks kvm_request_pending() before entering guest mode and that a
-kick will send an IPI to force an exit from guest mode when necessary.
-Extra care must be taken to cover the period after the VCPU thread's last
-kvm_request_pending() check and before it has entered guest mode, as kick
-IPIs will only trigger guest mode exits for VCPU threads that are in guest
-mode or at least have already disabled interrupts in order to prepare to
-enter guest mode.  This means that an optimized implementation (see "IPI
-Reduction") must be certain when it's safe to not send the IPI.  One
-solution, which all architectures except s390 apply, is to:
-
-- set ``vcpu->mode`` to IN_GUEST_MODE between disabling the interrupts and
-  the last kvm_request_pending() check;
-- enable interrupts atomically when entering the guest.
-
-This solution also requires memory barriers to be placed carefully in both
-the requesting thread and the receiving VCPU.  With the memory barriers we
-can exclude the possibility of a VCPU thread observing
-!kvm_request_pending() on its last check and then not receiving an IPI for
-the next request made of it, even if the request is made immediately after
-the check.  This is done by way of the Dekker memory barrier pattern
-(scenario 10 of [lwn-mb]_).  As the Dekker pattern requires two variables,
-this solution pairs ``vcpu->mode`` with ``vcpu->requests``.  Substituting
-them into the pattern gives::
-
-  CPU1                                    CPU2
-  =================                       =================
-  local_irq_disable();
-  WRITE_ONCE(vcpu->mode, IN_GUEST_MODE);  kvm_make_request(REQ, vcpu);
-  smp_mb();                               smp_mb();
-  if (kvm_request_pending(vcpu)) {        if (READ_ONCE(vcpu->mode) ==
-                                              IN_GUEST_MODE) {
-      ...abort guest entry...                 ...send IPI...
-  }                                       }
-
-As stated above, the IPI is only useful for VCPU threads in guest mode or
-that have already disabled interrupts.  This is why this specific case of
-the Dekker pattern has been extended to disable interrupts before setting
-``vcpu->mode`` to IN_GUEST_MODE.  WRITE_ONCE() and READ_ONCE() are used to
-pedantically implement the memory barrier pattern, guaranteeing the
-compiler doesn't interfere with ``vcpu->mode``'s carefully planned
-accesses.
-
-IPI Reduction
--------------
-
-As only one IPI is needed to get a VCPU to check for any/all requests,
-then they may be coalesced.  This is easily done by having the first IPI
-sending kick also change the VCPU mode to something !IN_GUEST_MODE.  The
-transitional state, EXITING_GUEST_MODE, is used for this purpose.
-
-Waiting for Acknowledgements
-----------------------------
-
-Some requests, those with the KVM_REQUEST_WAIT flag set, require IPIs to
-be sent, and the acknowledgements to be waited upon, even when the target
-VCPU threads are in modes other than IN_GUEST_MODE.  For example, one case
-is when a target VCPU thread is in READING_SHADOW_PAGE_TABLES mode, which
-is set after disabling interrupts.  To support these cases, the
-KVM_REQUEST_WAIT flag changes the condition for sending an IPI from
-checking that the VCPU is IN_GUEST_MODE to checking that it is not
-OUTSIDE_GUEST_MODE.
-
-Request-less VCPU Kicks
------------------------
-
-As the determination of whether or not to send an IPI depends on the
-two-variable Dekker memory barrier pattern, then it's clear that
-request-less VCPU kicks are almost never correct.  Without the assurance
-that a non-IPI generating kick will still result in an action by the
-receiving VCPU, as the final kvm_request_pending() check does for
-request-accompanying kicks, then the kick may not do anything useful at
-all.  If, for instance, a request-less kick was made to a VCPU that was
-just about to set its mode to IN_GUEST_MODE, meaning no IPI is sent, then
-the VCPU thread may continue its entry without actually having done
-whatever it was the kick was meant to initiate.
-
-One exception is x86's posted interrupt mechanism.  In this case, however,
-even the request-less VCPU kick is coupled with the same
-local_irq_disable() + smp_mb() pattern described above; the ON bit
-(Outstanding Notification) in the posted interrupt descriptor takes the
-role of ``vcpu->requests``.  When sending a posted interrupt, PIR.ON is
-set before reading ``vcpu->mode``; dually, in the VCPU thread,
-vmx_sync_pir_to_irr() reads PIR after setting ``vcpu->mode`` to
-IN_GUEST_MODE.
-
-Additional Considerations
-=========================
-
-Sleeping VCPUs
---------------
-
-VCPU threads may need to consider requests before and/or after calling
-functions that may put them to sleep, e.g. kvm_vcpu_block().  Whether they
-do or not, and, if they do, which requests need consideration, is
-architecture dependent.  kvm_vcpu_block() calls kvm_arch_vcpu_runnable()
-to check if it should awaken.  One reason to do so is to provide
-architectures a function where requests may be checked if necessary.
-
-Clearing Requests
------------------
-
-Generally it only makes sense for the receiving VCPU thread to clear a
-request.  However, in some circumstances, such as when the requesting
-thread and the receiving VCPU thread are executed serially, such as when
-they are the same thread, or when they are using some form of concurrency
-control to temporarily execute synchronously, then it's possible to know
-that the request may be cleared immediately, rather than waiting for the
-receiving VCPU thread to handle the request in VCPU RUN.  The only current
-examples of this are kvm_vcpu_block() calls made by VCPUs to block
-themselves.  A possible side-effect of that call is to make the
-KVM_REQ_UNHALT request, which may then be cleared immediately when the
-VCPU returns from the call.
-
-References
-==========
-
-.. [atomic-ops] Documentation/core-api/atomic_ops.rst
-.. [memory-barriers] Documentation/memory-barriers.txt
-.. [lwn-mb] https://lwn.net/Articles/573436/
diff --git a/Documentation/virtual/paravirt_ops.rst b/Documentation/virtual/paravirt_ops.rst
deleted file mode 100644
index 6b789d27cead..000000000000
--- a/Documentation/virtual/paravirt_ops.rst
+++ /dev/null
@@ -1,35 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-============
-Paravirt_ops
-============
-
-Linux provides support for different hypervisor virtualization technologies.
-Historically different binary kernels would be required in order to support
-different hypervisors, this restriction was removed with pv_ops.
-Linux pv_ops is a virtualization API which enables support for different
-hypervisors. It allows each hypervisor to override critical operations and
-allows a single kernel binary to run on all supported execution environments
-including native machine -- without any hypervisors.
-
-pv_ops provides a set of function pointers which represent operations
-corresponding to low level critical instructions and high level
-functionalities in various areas. pv-ops allows for optimizations at run
-time by enabling binary patching of the low-ops critical operations
-at boot time.
-
-pv_ops operations are classified into three categories:
-
-- simple indirect call
-   These operations correspond to high level functionality where it is
-   known that the overhead of indirect call isn't very important.
-
-- indirect call which allows optimization with binary patch
-   Usually these operations correspond to low level critical instructions. They
-   are called frequently and are performance critical. The overhead is
-   very important.
-
-- a set of macros for hand written assembly code
-   Hand written assembly codes (.S files) also need paravirtualization
-   because they include sensitive instructions or some of code paths in
-   them are very performance critical.
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
deleted file mode 100644
index 87b80f589e1c..000000000000
--- a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
+++ /dev/null
@@ -1,4589 +0,0 @@
-  User Mode Linux HOWTO
-  User Mode Linux Core Team
-  Mon Nov 18 14:16:16 EST 2002
-
-  This document describes the use and abuse of Jeff Dike's User Mode
-  Linux: a port of the Linux kernel as a normal Intel Linux process.
-  ______________________________________________________________________
-
-  Table of Contents
-
-  1. Introduction
-
-     1.1 How is User Mode Linux Different?
-     1.2 Why Would I Want User Mode Linux?
-
-  2. Compiling the kernel and modules
-
-     2.1 Compiling the kernel
-     2.2 Compiling and installing kernel modules
-     2.3 Compiling and installing uml_utilities
-
-  3. Running UML and logging in
-
-     3.1 Running UML
-     3.2 Logging in
-     3.3 Examples
-
-  4. UML on 2G/2G hosts
-
-     4.1 Introduction
-     4.2 The problem
-     4.3 The solution
-
-  5. Setting up serial lines and consoles
-
-     5.1 Specifying the device
-     5.2 Specifying the channel
-     5.3 Examples
-
-  6. Setting up the network
-
-     6.1 General setup
-     6.2 Userspace daemons
-     6.3 Specifying ethernet addresses
-     6.4 UML interface setup
-     6.5 Multicast
-     6.6 TUN/TAP with the uml_net helper
-     6.7 TUN/TAP with a preconfigured tap device
-     6.8 Ethertap
-     6.9 The switch daemon
-     6.10 Slip
-     6.11 Slirp
-     6.12 pcap
-     6.13 Setting up the host yourself
-
-  7. Sharing Filesystems between Virtual Machines
-
-     7.1 A warning
-     7.2 Using layered block devices
-     7.3 Note!
-     7.4 Another warning
-     7.5 uml_moo : Merging a COW file with its backing file
-
-  8. Creating filesystems
-
-     8.1 Create the filesystem file
-     8.2 Assign the file to a UML device
-     8.3 Creating and mounting the filesystem
-
-  9. Host file access
-
-     9.1 Using hostfs
-     9.2 hostfs as the root filesystem
-     9.3 Building hostfs
-
-  10. The Management Console
-     10.1 version
-     10.2 halt and reboot
-     10.3 config
-     10.4 remove
-     10.5 sysrq
-     10.6 help
-     10.7 cad
-     10.8 stop
-     10.9 go
-
-  11. Kernel debugging
-
-     11.1 Starting the kernel under gdb
-     11.2 Examining sleeping processes
-     11.3 Running ddd on UML
-     11.4 Debugging modules
-     11.5 Attaching gdb to the kernel
-     11.6 Using alternate debuggers
-
-  12. Kernel debugging examples
-
-     12.1 The case of the hung fsck
-     12.2 Episode 2: The case of the hung fsck
-
-  13. What to do when UML doesn't work
-
-     13.1 Strange compilation errors when you build from source
-     13.2 (obsolete)
-     13.3 A variety of panics and hangs with /tmp on a reiserfs  filesystem
-     13.4 The compile fails with errors about conflicting types for 'open', 'dup', and 'waitpid'
-     13.5 UML doesn't work when /tmp is an NFS filesystem
-     13.6 UML hangs on boot when compiled with gprof support
-     13.7 syslogd dies with a SIGTERM on startup
-     13.8 TUN/TAP networking doesn't work on a 2.4 host
-     13.9 You can network to the host but not to other machines on the net
-     13.10 I have no root and I want to scream
-     13.11 UML build conflict between ptrace.h and ucontext.h
-     13.12 The UML BogoMips is exactly half the host's BogoMips
-     13.13 When you run UML, it immediately segfaults
-     13.14 xterms appear, then immediately disappear
-     13.15 Any other panic, hang, or strange behavior
-
-  14. Diagnosing Problems
-
-     14.1 Case 1 : Normal kernel panics
-     14.2 Case 2 : Tracing thread panics
-     14.3 Case 3 : Tracing thread panics caused by other threads
-     14.4 Case 4 : Hangs
-
-  15. Thanks
-
-     15.1 Code and Documentation
-     15.2 Flushing out bugs
-     15.3 Buglets and clean-ups
-     15.4 Case Studies
-     15.5 Other contributions
-
-
-  ______________________________________________________________________
-
-  1.  Introduction
-
-  Welcome to User Mode Linux.  It's going to be fun.
-
-
-
-  1.1.  How is User Mode Linux Different?
-
-  Normally, the Linux Kernel talks straight to your hardware (video
-  card, keyboard, hard drives, etc), and any programs which run ask the
-  kernel to operate the hardware, like so:
-
-
-
-         +-----------+-----------+----+
-         | Process 1 | Process 2 | ...|
-         +-----------+-----------+----+
-         |       Linux Kernel         |
-         +----------------------------+
-         |         Hardware           |
-         +----------------------------+
-
-
-
-
-  The User Mode Linux Kernel is different; instead of talking to the
-  hardware, it talks to a `real' Linux kernel (called the `host kernel'
-  from now on), like any other program.  Programs can then run inside
-  User-Mode Linux as if they were running under a normal kernel, like
-  so:
-
-
-
-                     +----------------+
-                     | Process 2 | ...|
-         +-----------+----------------+
-         | Process 1 | User-Mode Linux|
-         +----------------------------+
-         |       Linux Kernel         |
-         +----------------------------+
-         |         Hardware           |
-         +----------------------------+
-
-
-
-
-
-  1.2.  Why Would I Want User Mode Linux?
-
-
-  1. If User Mode Linux crashes, your host kernel is still fine.
-
-  2. You can run a usermode kernel as a non-root user.
-
-  3. You can debug the User Mode Linux like any normal process.
-
-  4. You can run gprof (profiling) and gcov (coverage testing).
-
-  5. You can play with your kernel without breaking things.
-
-  6. You can use it as a sandbox for testing new apps.
-
-  7. You can try new development kernels safely.
-
-  8. You can run different distributions simultaneously.
-
-  9. It's extremely fun.
-
-
-
-
-
-  2.  Compiling the kernel and modules
-
-
-
-
-  2.1.  Compiling the kernel
-
-
-  Compiling the user mode kernel is just like compiling any other
-  kernel.  Let's go through the steps, using 2.4.0-prerelease (current
-  as of this writing) as an example:
-
-
-  1. Download the latest UML patch from
-
-     the download page <http://user-mode-linux.sourceforge.net/
-
-     In this example, the file is uml-patch-2.4.0-prerelease.bz2.
-
-
-  2. Download the matching kernel from your favourite kernel mirror,
-     such as:
-
-     ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2
-     <ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2>
-     .
-
-
-  3. Make a directory and unpack the kernel into it.
-
-
-
-       host%
-       mkdir ~/uml
-
-
-
-
-
-
-       host%
-       cd ~/uml
-
-
-
-
-
-
-       host%
-       tar -xzvf linux-2.4.0-prerelease.tar.bz2
-
-
-
-
-
-
-  4. Apply the patch using
-
-
-
-       host%
-       cd ~/uml/linux
-
-
-
-       host%
-       bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1
-
-
-
-
-
-
-  5. Run your favorite config; `make xconfig ARCH=um' is the most
-     convenient.  `make config ARCH=um' and 'make menuconfig ARCH=um'
-     will work as well.  The defaults will give you a useful kernel.  If
-     you want to change something, go ahead, it probably won't hurt
-     anything.
-
-
-     Note:  If the host is configured with a 2G/2G address space split
-     rather than the usual 3G/1G split, then the packaged UML binaries
-     will not run.  They will immediately segfault.  See ``UML on 2G/2G
-     hosts''  for the scoop on running UML on your system.
-
-
-
-  6. Finish with `make linux ARCH=um': the result is a file called
-     `linux' in the top directory of your source tree.
-
-  Make sure that you don't build this kernel in /usr/src/linux.  On some
-  distributions, /usr/include/asm is a link into this pool.  The user-
-  mode build changes the other end of that link, and things that include
-  <asm/anything.h> stop compiling.
-
-  The sources are also available from cvs at the project's cvs page,
-  which has directions on getting the sources. You can also browse the
-  CVS pool from there.
-
-  If you get the CVS sources, you will have to check them out into an
-  empty directory. You will then have to copy each file into the
-  corresponding directory in the appropriate kernel pool.
-
-  If you don't have the latest kernel pool, you can get the
-  corresponding user-mode sources with
-
-
-       host% cvs co -r v_2_3_x linux
-
-
-
-
-  where 'x' is the version in your pool. Note that you will not get the
-  bug fixes and enhancements that have gone into subsequent releases.
-
-
-  2.2.  Compiling and installing kernel modules
-
-  UML modules are built in the same way as the native kernel (with the
-  exception of the 'ARCH=um' that you always need for UML):
-
-
-       host% make modules ARCH=um
-
-
-
-
-  Any modules that you want to load into this kernel need to be built in
-  the user-mode pool.  Modules from the native kernel won't work.
-
-  You can install them by using ftp or something to copy them into the
-  virtual machine and dropping them into /lib/modules/`uname -r`.
-
-  You can also get the kernel build process to install them as follows:
-
-  1. with the kernel not booted, mount the root filesystem in the top
-     level of the kernel pool:
-
-
-       host% mount root_fs mnt -o loop
-
-
-
-
-
-
-  2. run
-
-
-       host%
-       make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um
-
-
-
-
-
-
-  3. unmount the filesystem
-
-
-       host% umount mnt
-
-
-
-
-
-
-  4. boot the kernel on it
-
-
-  When the system is booted, you can use insmod as usual to get the
-  modules into the kernel.  A number of things have been loaded into UML
-  as modules, especially filesystems and network protocols and filters,
-  so most symbols which need to be exported probably already are.
-  However, if you do find symbols that need exporting, let  us
-  <http://user-mode-linux.sourceforge.net/>  know, and
-  they'll be "taken care of".
-
-
-
-  2.3.  Compiling and installing uml_utilities
-
-  Many features of the UML kernel require a user-space helper program,
-  so a uml_utilities package is distributed separately from the kernel
-  patch which provides these helpers. Included within this is:
-
-  o  port-helper - Used by consoles which connect to xterms or ports
-
-  o  tunctl - Configuration tool to create and delete tap devices
-
-  o  uml_net - Setuid binary for automatic tap device configuration
-
-  o  uml_switch - User-space virtual switch required for daemon
-     transport
-
-     The uml_utilities tree is compiled with:
-
-
-       host#
-       make && make install
-
-
-
-
-  Note that UML kernel patches may require a specific version of the
-  uml_utilities distribution. If you don't keep up with the mailing
-  lists, ensure that you have the latest release of uml_utilities if you
-  are experiencing problems with your UML kernel, particularly when
-  dealing with consoles or command-line switches to the helper programs
-
-
-
-
-
-
-
-
-  3.  Running UML and logging in
-
-
-
-  3.1.  Running UML
-
-  It runs on 2.2.15 or later, and all 2.4 kernels.
-
-
-  Booting UML is straightforward.  Simply run 'linux': it will try to
-  mount the file `root_fs' in the current directory.  You do not need to
-  run it as root.  If your root filesystem is not named `root_fs', then
-  you need to put a `ubd0=root_fs_whatever' switch on the linux command
-  line.
-
-
-  You will need a filesystem to boot UML from.  There are a number
-  available for download from  here  <http://user-mode-
-  linux.sourceforge.net/> .  There are also  several tools
-  <http://user-mode-linux.sourceforge.net/>  which can be
-  used to generate UML-compatible filesystem images from media.
-  The kernel will boot up and present you with a login prompt.
-
-
-  Note:  If the host is configured with a 2G/2G address space split
-  rather than the usual 3G/1G split, then the packaged UML binaries will
-  not run.  They will immediately segfault.  See ``UML on 2G/2G hosts''
-  for the scoop on running UML on your system.
-
-
-
-  3.2.  Logging in
-
-
-
-  The prepackaged filesystems have a root account with password 'root'
-  and a user account with password 'user'.  The login banner will
-  generally tell you how to log in.  So, you log in and you will find
-  yourself inside a little virtual machine. Our filesystems have a
-  variety of commands and utilities installed (and it is fairly easy to
-  add more), so you will have a lot of tools with which to poke around
-  the system.
-
-  There are a couple of other ways to log in:
-
-  o  On a virtual console
-
-
-
-     Each virtual console that is configured (i.e. the device exists in
-     /dev and /etc/inittab runs a getty on it) will come up in its own
-     xterm.  If you get tired of the xterms, read ``Setting up serial
-     lines and consoles''  to see how to attach the consoles to
-     something else, like host ptys.
-
-
-
-  o  Over the serial line
-
-
-     In the boot output, find a line that looks like:
-
-
-
-       serial line 0 assigned pty /dev/ptyp1
-
-
-
-
-  Attach your favorite terminal program to the corresponding tty.  I.e.
-  for minicom, the command would be
-
-
-       host% minicom -o -p /dev/ttyp1
-
-
-
-
-
-
-  o  Over the net
-
-
-     If the network is running, then you can telnet to the virtual
-     machine and log in to it.  See ``Setting up the network''  to learn
-     about setting up a virtual network.
-
-  When you're done using it, run halt, and the kernel will bring itself
-  down and the process will exit.
-
-
-  3.3.  Examples
-
-  Here are some examples of UML in action:
-
-  o  A login session <http://user-mode-linux.sourceforge.net/login.html>
-
-  o  A virtual network <http://user-mode-linux.sourceforge.net/net.html>
-
-
-
-
-
-
-
-  4.  UML on 2G/2G hosts
-
-
-
-
-  4.1.  Introduction
-
-
-  Most Linux machines are configured so that the kernel occupies the
-  upper 1G (0xc0000000 - 0xffffffff) of the 4G address space and
-  processes use the lower 3G (0x00000000 - 0xbfffffff).  However, some
-  machine are configured with a 2G/2G split, with the kernel occupying
-  the upper 2G (0x80000000 - 0xffffffff) and processes using the lower
-  2G (0x00000000 - 0x7fffffff).
-
-
-
-
-  4.2.  The problem
-
-
-  The prebuilt UML binaries on this site will not run on 2G/2G hosts
-  because UML occupies the upper .5G of the 3G process address space
-  (0xa0000000 - 0xbfffffff).  Obviously, on 2G/2G hosts, this is right
-  in the middle of the kernel address space, so UML won't even load - it
-  will immediately segfault.
-
-
-
-
-  4.3.  The solution
-
-
-  The fix for this is to rebuild UML from source after enabling
-  CONFIG_HOST_2G_2G (under 'General Setup').  This will cause UML to
-  load itself in the top .5G of that smaller process address space,
-  where it will run fine.  See ``Compiling the kernel and modules''  if
-  you need help building UML from source.
-
-
-
-
-
-
-
-
-
-
-  5.  Setting up serial lines and consoles
-
-
-  It is possible to attach UML serial lines and consoles to many types
-  of host I/O channels by specifying them on the command line.
-
-
-  You can attach them to host ptys, ttys, file descriptors, and ports.
-  This allows you to do things like
-
-  o  have a UML console appear on an unused host console,
-
-  o  hook two virtual machines together by having one attach to a pty
-     and having the other attach to the corresponding tty
-
-  o  make a virtual machine accessible from the net by attaching a
-     console to a port on the host.
-
-
-  The general format of the command line option is device=channel.
-
-
-
-  5.1.  Specifying the device
-
-  Devices are specified with "con" or "ssl" (console or serial line,
-  respectively), optionally with a device number if you are talking
-  about a specific device.
-
-
-  Using just "con" or "ssl" describes all of the consoles or serial
-  lines.  If you want to talk about console #3 or serial line #10, they
-  would be "con3" and "ssl10", respectively.
-
-
-  A specific device name will override a less general "con=" or "ssl=".
-  So, for example, you can assign a pty to each of the serial lines
-  except for the first two like this:
-
-
-        ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1
-
-
-
-
-  The specificity of the device name is all that matters; order on the
-  command line is irrelevant.
-
-
-
-  5.2.  Specifying the channel
-
-  There are a number of different types of channels to attach a UML
-  device to, each with a different way of specifying exactly what to
-  attach to.
-
-  o  pseudo-terminals - device=pty pts terminals - device=pts
-
-
-     This will cause UML to allocate a free host pseudo-terminal for the
-     device.  The terminal that it got will be announced in the boot
-     log.  You access it by attaching a terminal program to the
-     corresponding tty:
-
-  o  screen /dev/pts/n
-
-  o  screen /dev/ttyxx
-
-  o  minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
-     devices
-
-  o  kermit - start it up, 'open' the device, then 'connect'
-
-
-
-
-
-  o  terminals - device=tty:tty device file
-
-
-     This will make UML attach the device to the specified tty (i.e
-
-
-        con1=tty:/dev/tty3
-
-
-
-
-  will attach UML's console 1 to the host's /dev/tty3).  If the tty that
-  you specify is the slave end of a tty/pty pair, something else must
-  have already opened the corresponding pty in order for this to work.
-
-
-
-
-
-  o  xterms - device=xterm
-
-
-     UML will run an xterm and the device will be attached to it.
-
-
-
-
-
-  o  Port - device=port:port number
-
-
-     This will attach the UML devices to the specified host port.
-     Attaching console 1 to the host's port 9000 would be done like
-     this:
-
-
-        con1=port:9000
-
-
-
-
-  Attaching all the serial lines to that port would be done similarly:
-
-
-        ssl=port:9000
-
-
-
-
-  You access these devices by telnetting to that port.  Each active tel-
-  net session gets a different device.  If there are more telnets to a
-  port than UML devices attached to it, then the extra telnet sessions
-  will block until an existing telnet detaches, or until another device
-  becomes active (i.e. by being activated in /etc/inittab).
-
-  This channel has the advantage that you can both attach multiple UML
-  devices to it and know how to access them without reading the UML boot
-  log.  It is also unique in allowing access to a UML from remote
-  machines without requiring that the UML be networked.  This could be
-  useful in allowing public access to UMLs because they would be
-  accessible from the net, but wouldn't need any kind of network
-  filtering or access control because they would have no network access.
-
-
-  If you attach the main console to a portal, then the UML boot will
-  appear to hang.  In reality, it's waiting for a telnet to connect, at
-  which point the boot will proceed.
-
-
-
-
-
-  o  already-existing file descriptors - device=file descriptor
-
-
-     If you set up a file descriptor on the UML command line, you can
-     attach a UML device to it.  This is most commonly used to put the
-     main console back on stdin and stdout after assigning all the other
-     consoles to something else:
-
-
-        con0=fd:0,fd:1 con=pts
-
-
-
-
-
-
-
-
-  o  Nothing - device=null
-
-
-     This allows the device to be opened, in contrast to 'none', but
-     reads will block, and writes will succeed and the data will be
-     thrown out.
-
-
-
-
-
-  o  None - device=none
-
-
-     This causes the device to disappear.
-
-
-
-  You can also specify different input and output channels for a device
-  by putting a comma between them:
-
-
-        ssl3=tty:/dev/tty2,xterm
-
-
-
-
-  will cause serial line 3 to accept input on the host's /dev/tty2 and
-  display output on an xterm.  That's a silly example - the most common
-  use of this syntax is to reattach the main console to stdin and stdout
-  as shown above.
-
-
-  If you decide to move the main console away from stdin/stdout, the
-  initial boot output will appear in the terminal that you're running
-  UML in.  However, once the console driver has been officially
-  initialized, then the boot output will start appearing wherever you
-  specified that console 0 should be.  That device will receive all
-  subsequent output.
-
-
-
-  5.3.  Examples
-
-  There are a number of interesting things you can do with this
-  capability.
-
-
-  First, this is how you get rid of those bleeding console xterms by
-  attaching them to host ptys:
-
-
-        con=pty con0=fd:0,fd:1
-
-
-
-
-  This will make a UML console take over an unused host virtual console,
-  so that when you switch to it, you will see the UML login prompt
-  rather than the host login prompt:
-
-
-        con1=tty:/dev/tty6
-
-
-
-
-  You can attach two virtual machines together with what amounts to a
-  serial line as follows:
-
-  Run one UML with a serial line attached to a pty -
-
-
-        ssl1=pty
-
-
-
-
-  Look at the boot log to see what pty it got (this example will assume
-  that it got /dev/ptyp1).
-
-  Boot the other UML with a serial line attached to the corresponding
-  tty -
-
-
-        ssl1=tty:/dev/ttyp1
-
-
-
-
-  Log in, make sure that it has no getty on that serial line, attach a
-  terminal program like minicom to it, and you should see the login
-  prompt of the other virtual machine.
-
-
-  6.  Setting up the network
-
-
-
-  This page describes how to set up the various transports and to
-  provide a UML instance with network access to the host, other machines
-  on the local net, and the rest of the net.
-
-
-  As of 2.4.5, UML networking has been completely redone to make it much
-  easier to set up, fix bugs, and add new features.
-
-
-  There is a new helper, uml_net, which does the host setup that
-  requires root privileges.
-
-
-  There are currently five transport types available for a UML virtual
-  machine to exchange packets with other hosts:
-
-  o  ethertap
-
-  o  TUN/TAP
-
-  o  Multicast
-
-  o  a switch daemon
-
-  o  slip
-
-  o  slirp
-
-  o  pcap
-
-     The TUN/TAP, ethertap, slip, and slirp transports allow a UML
-     instance to exchange packets with the host.  They may be directed
-     to the host or the host may just act as a router to provide access
-     to other physical or virtual machines.
-
-
-  The pcap transport is a synthetic read-only interface, using the
-  libpcap binary to collect packets from interfaces on the host and
-  filter them.  This is useful for building preconfigured traffic
-  monitors or sniffers.
-
-
-  The daemon and multicast transports provide a completely virtual
-  network to other virtual machines.  This network is completely
-  disconnected from the physical network unless one of the virtual
-  machines on it is acting as a gateway.
-
-
-  With so many host transports, which one should you use?  Here's when
-  you should use each one:
-
-  o  ethertap - if you want access to the host networking and it is
-     running 2.2
-
-  o  TUN/TAP - if you want access to the host networking and it is
-     running 2.4.  Also, the TUN/TAP transport is able to use a
-     preconfigured device, allowing it to avoid using the setuid uml_net
-     helper, which is a security advantage.
-
-  o  Multicast - if you want a purely virtual network and you don't want
-     to set up anything but the UML
-
-  o  a switch daemon - if you want a purely virtual network and you
-     don't mind running the daemon in order to get somewhat better
-     performance
-
-  o  slip - there is no particular reason to run the slip backend unless
-     ethertap and TUN/TAP are just not available for some reason
-
-  o  slirp - if you don't have root access on the host to setup
-     networking, or if you don't want to allocate an IP to your UML
-
-  o  pcap - not much use for actual network connectivity, but great for
-     monitoring traffic on the host
-
-     Ethertap is available on 2.4 and works fine.  TUN/TAP is preferred
-     to it because it has better performance and ethertap is officially
-     considered obsolete in 2.4.  Also, the root helper only needs to
-     run occasionally for TUN/TAP, rather than handling every packet, as
-     it does with ethertap.  This is a slight security advantage since
-     it provides fewer opportunities for a nasty UML user to somehow
-     exploit the helper's root privileges.
-
-
-  6.1.  General setup
-
-  First, you must have the virtual network enabled in your UML.  If are
-  running a prebuilt kernel from this site, everything is already
-  enabled.  If you build the kernel yourself, under the "Network device
-  support" menu, enable "Network device support", and then the three
-  transports.
-
-
-  The next step is to provide a network device to the virtual machine.
-  This is done by describing it on the kernel command line.
-
-  The general format is
-
-
-       eth <n> = <transport> , <transport args>
-
-
-
-
-  For example, a virtual ethernet device may be attached to a host
-  ethertap device as follows:
-
-
-       eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
-
-
-
-
-  This sets up eth0 inside the virtual machine to attach itself to the
-  host /dev/tap0, assigns it an ethernet address, and assigns the host
-  tap0 interface an IP address.
-
-
-
-  Note that the IP address you assign to the host end of the tap device
-  must be different than the IP you assign to the eth device inside UML.
-  If you are short on IPs and don't want to consume two per UML, then
-  you can reuse the host's eth IP address for the host ends of the tap
-  devices.  Internally, the UMLs must still get unique IPs for their eth
-  devices.  You can also give the UMLs non-routable IPs (192.168.x.x or
-  10.x.x.x) and have the host masquerade them.  This will let outgoing
-  connections work, but incoming connections won't without more work,
-  such as port forwarding from the host.
-  Also note that when you configure the host side of an interface, it is
-  only acting as a gateway.  It will respond to pings sent to it
-  locally, but is not useful to do that since it's a host interface.
-  You are not talking to the UML when you ping that interface and get a
-  response.
-
-
-  You can also add devices to a UML and remove them at runtime.  See the
-  ``The Management Console''  page for details.
-
-
-  The sections below describe this in more detail.
-
-
-  Once you've decided how you're going to set up the devices, you boot
-  UML, log in, configure the UML side of the devices, and set up routes
-  to the outside world.  At that point, you will be able to talk to any
-  other machines, physical or virtual, on the net.
-
-
-  If ifconfig inside UML fails and the network refuses to come up, run
-  tell you what went wrong.
-
-
-
-  6.2.  Userspace daemons
-
-  You will likely need the setuid helper, or the switch daemon, or both.
-  They are both installed with the RPM and deb, so if you've installed
-  either, you can skip the rest of this section.
-
-
-  If not, then you need to check them out of CVS, build them, and
-  install them.  The helper is uml_net, in CVS /tools/uml_net, and the
-  daemon is uml_switch, in CVS /tools/uml_router.  They are both built
-  with a plain 'make'.  Both need to be installed in a directory that's
-  in your path - /usr/bin is recommend.  On top of that, uml_net needs
-  to be setuid root.
-
-
-
-  6.3.  Specifying ethernet addresses
-
-  Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
-  allow you to specify hardware addresses for the virtual ethernet
-  devices.  This is generally not necessary.  If you don't have a
-  specific reason to do it, you probably shouldn't.  If one is not
-  specified on the command line, the driver will assign one based on the
-  device IP address.  It will provide the address fe:fd:nn:nn:nn:nn
-  where nn.nn.nn.nn is the device IP address.  This is nearly always
-  sufficient to guarantee a unique hardware address for the device.  A
-  couple of exceptions are:
-
-  o  Another set of virtual ethernet devices are on the same network and
-     they are assigned hardware addresses using a different scheme which
-     may conflict with the UML IP address-based scheme
-
-  o  You aren't going to use the device for IP networking, so you don't
-     assign the device an IP address
-
-     If you let the driver provide the hardware address, you should make
-     sure that the device IP address is known before the interface is
-     brought up.  So, inside UML, this will guarantee that:
-
-
-
-  UML#
-  ifconfig eth0 192.168.0.250 up
-
-
-
-
-  If you decide to assign the hardware address yourself, make sure that
-  the first byte of the address is even.  Addresses with an odd first
-  byte are broadcast addresses, which you don't want assigned to a
-  device.
-
-
-
-  6.4.  UML interface setup
-
-  Once the network devices have been described on the command line, you
-  should boot UML and log in.
-
-
-  The first thing to do is bring the interface up:
-
-
-       UML# ifconfig ethn ip-address up
-
-
-
-
-  You should be able to ping the host at this point.
-
-
-  To reach the rest of the world, you should set a default route to the
-  host:
-
-
-       UML# route add default gw host ip
-
-
-
-
-  Again, with host ip of 192.168.0.4:
-
-
-       UML# route add default gw 192.168.0.4
-
-
-
-
-  This page used to recommend setting a network route to your local net.
-  This is wrong, because it will cause UML to try to figure out hardware
-  addresses of the local machines by arping on the interface to the
-  host.  Since that interface is basically a single strand of ethernet
-  with two nodes on it (UML and the host) and arp requests don't cross
-  networks, they will fail to elicit any responses.  So, what you want
-  is for UML to just blindly throw all packets at the host and let it
-  figure out what to do with them, which is what leaving out the network
-  route and adding the default route does.
-
-
-  Note: If you can't communicate with other hosts on your physical
-  ethernet, it's probably because of a network route that's
-  automatically set up.  If you run 'route -n' and see a route that
-  looks like this:
-
-
-
-
-  Destination     Gateway         Genmask         Flags Metric Ref    Use Iface
-  192.168.0.0     0.0.0.0         255.255.255.0   U     0      0      0   eth0
-
-
-
-
-  with a mask that's not 255.255.255.255, then replace it with a route
-  to your host:
-
-
-       UML#
-       route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0
-
-
-
-
-
-
-       UML#
-       route add -host 192.168.0.4 dev eth0
-
-
-
-
-  This, plus the default route to the host, will allow UML to exchange
-  packets with any machine on your ethernet.
-
-
-
-  6.5.  Multicast
-
-  The simplest way to set up a virtual network between multiple UMLs is
-  to use the mcast transport.  This was written by Harald Welte and is
-  present in UML version 2.4.5-5um and later.  Your system must have
-  multicast enabled in the kernel and there must be a multicast-capable
-  network device on the host.  Normally, this is eth0, but if there is
-  no ethernet card on the host, then you will likely get strange error
-  messages when you bring the device up inside UML.
-
-
-  To use it, run two UMLs with
-
-
-        eth0=mcast
-
-
-
-
-  on their command lines.  Log in, configure the ethernet device in each
-  machine with different IP addresses:
-
-
-       UML1# ifconfig eth0 192.168.0.254
-
-
-
-
-
-
-       UML2# ifconfig eth0 192.168.0.253
-
-
-
-
-  and they should be able to talk to each other.
-
-  The full set of command line options for this transport are
-
-
-
-       ethn=mcast,ethernet address,multicast
-       address,multicast port,ttl
-
-
-
-
-  Harald's original README is here <http://user-mode-linux.source-
-  forge.net/>  and explains these in detail, as well as
-  some other issues.
-
-  There is also a related point-to-point only "ucast" transport.
-  This is useful when your network does not support multicast, and
-  all network connections are simple point to point links.
-
-  The full set of command line options for this transport are
-
-
-       ethn=ucast,ethernet address,remote address,listen port,remote port
-
-
-
-
-  6.6.  TUN/TAP with the uml_net helper
-
-  TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
-  host.  The TUN/TAP backend has been in UML since 2.4.9-3um.
-
-
-  The easiest way to get up and running is to let the setuid uml_net
-  helper do the host setup for you.  This involves insmod-ing the tun.o
-  module if necessary, configuring the device, and setting up IP
-  forwarding, routing, and proxy arp.  If you are new to UML networking,
-  do this first.  If you're concerned about the security implications of
-  the setuid helper, use it to get up and running, then read the next
-  section to see how to have UML use a preconfigured tap device, which
-  avoids the use of uml_net.
-
-
-  If you specify an IP address for the host side of the device, the
-  uml_net helper will do all necessary setup on the host - the only
-  requirement is that TUN/TAP be available, either built in to the host
-  kernel or as the tun.o module.
-
-  The format of the command line switch to attach a device to a TUN/TAP
-  device is
-
-
-       eth <n> =tuntap,,, <IP address>
-
-
-
-
-  For example, this argument will attach the UML's eth0 to the next
-  available tap device and assign an ethernet address to it based on its
-  IP address
-
-
-       eth0=tuntap,,,192.168.0.254
-
-
-
-
-
-
-  Note that the IP address that must be used for the eth device inside
-  UML is fixed by the routing and proxy arp that is set up on the
-  TUN/TAP device on the host.  You can use a different one, but it won't
-  work because reply packets won't reach the UML.  This is a feature.
-  It prevents a nasty UML user from doing things like setting the UML IP
-  to the same as the network's nameserver or mail server.
-
-
-  There are a couple potential problems with running the TUN/TAP
-  transport on a 2.4 host kernel
-
-  o  TUN/TAP seems not to work on 2.4.3 and earlier.  Upgrade the host
-     kernel or use the ethertap transport.
-
-  o  With an upgraded kernel, TUN/TAP may fail with
-
-
-       File descriptor in bad state
-
-
-
-
-  This is due to a header mismatch between the upgraded kernel and the
-  kernel that was originally installed on the machine.  The fix is to
-  make sure that /usr/src/linux points to the headers for the running
-  kernel.
-
-  These were pointed out by Tim Robinson <timro at trkr dot net> in
-  <http://www.geocrawler.com/> name="this uml-
-  user post"> .
-
-
-
-  6.7.  TUN/TAP with a preconfigured tap device
-
-  If you prefer not to have UML use uml_net (which is somewhat
-  insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
-  beforehand.  The setup needs to be done as root, but once that's done,
-  there is no need for root assistance.  Setting up the device is done
-  as follows:
-
-  o  Create the device with tunctl (available from the UML utilities
-     tarball)
-
-
-
-
-       host#  tunctl -u uid
-
-
-
-
-  where uid is the user id or username that UML will be run as.  This
-  will tell you what device was created.
-
-  o  Configure the device IP (change IP addresses and device name to
-     suit)
-
-
-
-
-       host#  ifconfig tap0 192.168.0.254 up
-
-
-
-
-
-  o  Set up routing and arping if desired - this is my recipe, there are
-     other ways of doing the same thing
-
-
-       host#
-       bash -c 'echo 1 > /proc/sys/net/ipv4/ip_forward'
-
-       host#
-       route add -host 192.168.0.253 dev tap0
-
-
-
-
-
-
-       host#
-       bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp'
-
-
-
-
-
-
-       host#
-       arp -Ds 192.168.0.253 eth0 pub
-
-
-
-
-  Note that this must be done every time the host boots - this configu-
-  ration is not stored across host reboots.  So, it's probably a good
-  idea to stick it in an rc file.  An even better idea would be a little
-  utility which reads the information from a config file and sets up
-  devices at boot time.
-
-  o  Rather than using up two IPs and ARPing for one of them, you can
-     also provide direct access to your LAN by the UML by using a
-     bridge.
-
-
-       host#
-       brctl addbr br0
-
-
-
-
-
-
-       host#
-       ifconfig eth0 0.0.0.0 promisc up
-
-
-
-
-
-
-       host#
-       ifconfig tap0 0.0.0.0 promisc up
-
-
-
-
-
-
-       host#
-       ifconfig br0 192.168.0.1 netmask 255.255.255.0 up
-
-
-
-
-
-
-
-  host#
-  brctl stp br0 off
-
-
-
-
-
-
-       host#
-       brctl setfd br0 1
-
-
-
-
-
-
-       host#
-       brctl sethello br0 1
-
-
-
-
-
-
-       host#
-       brctl addif br0 eth0
-
-
-
-
-
-
-       host#
-       brctl addif br0 tap0
-
-
-
-
-  Note that 'br0' should be setup using ifconfig with the existing IP
-  address of eth0, as eth0 no longer has its own IP.
-
-  o
-
-
-     Also, the /dev/net/tun device must be writable by the user running
-     UML in order for the UML to use the device that's been configured
-     for it.  The simplest thing to do is
-
-
-       host#  chmod 666 /dev/net/tun
-
-
-
-
-  Making it world-writable looks bad, but it seems not to be
-  exploitable as a security hole.  However, it does allow anyone to cre-
-  ate useless tap devices (useless because they can't configure them),
-  which is a DOS attack.  A somewhat more secure alternative would to be
-  to create a group containing all the users who have preconfigured tap
-  devices and chgrp /dev/net/tun to that group with mode 664 or 660.
-
-
-  o  Once the device is set up, run UML with 'eth0=tuntap,device name'
-     (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
-     mconsole config command).
-
-  o  Bring the eth device up in UML and you're in business.
-
-     If you don't want that tap device any more, you can make it non-
-     persistent with
-
-
-       host#  tunctl -d tap device
-
-
-
-
-  Finally, tunctl has a -b (for brief mode) switch which causes it to
-  output only the name of the tap device it created.  This makes it
-  suitable for capture by a script:
-
-
-       host#  TAP=`tunctl -u 1000 -b`
-
-
-
-
-
-
-  6.8.  Ethertap
-
-  Ethertap is the general mechanism on 2.2 for userspace processes to
-  exchange packets with the kernel.
-
-
-
-  To use this transport, you need to describe the virtual network device
-  on the UML command line.  The general format for this is
-
-
-       eth <n> =ethertap, <device> , <ethernet address> , <tap IP address>
-
-
-
-
-  So, the previous example
-
-
-       eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
-
-
-
-
-  attaches the UML eth0 device to the host /dev/tap0, assigns it the
-  ethernet address fe:fd:0:0:0:1, and assigns the IP address
-  192.168.0.254 to the tap device.
-
-
-
-  The tap device is mandatory, but the others are optional.  If the
-  ethernet address is omitted, one will be assigned to it.
-
-
-  The presence of the tap IP address will cause the helper to run and do
-  whatever host setup is needed to allow the virtual machine to
-  communicate with the outside world.  If you're not sure you know what
-  you're doing, this is the way to go.
-
-
-  If it is absent, then you must configure the tap device and whatever
-  arping and routing you will need on the host.  However, even in this
-  case, the uml_net helper still needs to be in your path and it must be
-  setuid root if you're not running UML as root.  This is because the
-  tap device doesn't support SIGIO, which UML needs in order to use
-  something as a source of input.  So, the helper is used as a
-  convenient asynchronous IO thread.
-
-  If you're using the uml_net helper, you can ignore the following host
-  setup - uml_net will do it for you.  You just need to make sure you
-  have ethertap available, either built in to the host kernel or
-  available as a module.
-
-
-  If you want to set things up yourself, you need to make sure that the
-  appropriate /dev entry exists.  If it doesn't, become root and create
-  it as follows:
-
-
-       mknod /dev/tap <minor>  c 36  <minor>  + 16
-
-
-
-
-  For example, this is how to create /dev/tap0:
-
-
-       mknod /dev/tap0 c 36 0 + 16
-
-
-
-
-  You also need to make sure that the host kernel has ethertap support.
-  If ethertap is enabled as a module, you apparently need to insmod
-  ethertap once for each ethertap device you want to enable.  So,
-
-
-       host#
-       insmod ethertap
-
-
-
-
-  will give you the tap0 interface.  To get the tap1 interface, you need
-  to run
-
-
-       host#
-       insmod ethertap unit=1 -o ethertap1
-
-
-
-
-
-
-
-  6.9.  The switch daemon
-
-  Note: This is the daemon formerly known as uml_router, but which was
-  renamed so the network weenies of the world would stop growling at me.
-
-
-  The switch daemon, uml_switch, provides a mechanism for creating a
-  totally virtual network.  By default, it provides no connection to the
-  host network (but see -tap, below).
-
-
-  The first thing you need to do is run the daemon.  Running it with no
-  arguments will make it listen on a default pair of unix domain
-  sockets.
-
-
-  If you want it to listen on a different pair of sockets, use
-
-
-        -unix control socket data socket
-
-
-
-
-
-  If you want it to act as a hub rather than a switch, use
-
-
-        -hub
-
-
-
-
-
-  If you want the switch to be connected to host networking (allowing
-  the umls to get access to the outside world through the host), use
-
-
-        -tap tap0
-
-
-
-
-
-  Note that the tap device must be preconfigured (see "TUN/TAP with a
-  preconfigured tap device", above).  If you're using a different tap
-  device than tap0, specify that instead of tap0.
-
-
-  uml_switch can be backgrounded as follows
-
-
-       host%
-       uml_switch [ options ] < /dev/null > /dev/null
-
-
-
-
-  The reason it doesn't background by default is that it listens to
-  stdin for EOF.  When it sees that, it exits.
-
-
-  The general format of the kernel command line switch is
-
-
-
-       ethn=daemon,ethernet address,socket
-       type,control socket,data socket
-
-
-
-
-  You can leave off everything except the 'daemon'.  You only need to
-  specify the ethernet address if the one that will be assigned to it
-  isn't acceptable for some reason.  The rest of the arguments describe
-  how to communicate with the daemon.  You should only specify them if
-  you told the daemon to use different sockets than the default.  So, if
-  you ran the daemon with no arguments, running the UML on the same
-  machine with
-       eth0=daemon
-
-
-
-
-  will cause the eth0 driver to attach itself to the daemon correctly.
-
-
-
-  6.10.  Slip
-
-  Slip is another, less general, mechanism for a process to communicate
-  with the host networking.  In contrast to the ethertap interface,
-  which exchanges ethernet frames with the host and can be used to
-  transport any higher-level protocol, it can only be used to transport
-  IP.
-
-
-  The general format of the command line switch is
-
-
-
-       ethn=slip,slip IP
-
-
-
-
-  The slip IP argument is the IP address that will be assigned to the
-  host end of the slip device.  If it is specified, the helper will run
-  and will set up the host so that the virtual machine can reach it and
-  the rest of the network.
-
-
-  There are some oddities with this interface that you should be aware
-  of.  You should only specify one slip device on a given virtual
-  machine, and its name inside UML will be 'umn', not 'eth0' or whatever
-  you specified on the command line.  These problems will be fixed at
-  some point.
-
-
-
-  6.11.  Slirp
-
-  slirp uses an external program, usually /usr/bin/slirp, to provide IP
-  only networking connectivity through the host. This is similar to IP
-  masquerading with a firewall, although the translation is performed in
-  user-space, rather than by the kernel.  As slirp does not set up any
-  interfaces on the host, or changes routing, slirp does not require
-  root access or setuid binaries on the host.
-
-
-  The general format of the command line switch for slirp is:
-
-
-
-       ethn=slirp,ethernet address,slirp path
-
-
-
-
-  The ethernet address is optional, as UML will set up the interface
-  with an ethernet address based upon the initial IP address of the
-  interface.  The slirp path is generally /usr/bin/slirp, although it
-  will depend on distribution.
-
-
-  The slirp program can have a number of options passed to the command
-  line and we can't add them to the UML command line, as they will be
-  parsed incorrectly.  Instead, a wrapper shell script can be written or
-  the options inserted into the  /.slirprc file.  More information on
-  all of the slirp options can be found in its man pages.
-
-
-  The eth0 interface on UML should be set up with the IP 10.2.0.15,
-  although you can use anything as long as it is not used by a network
-  you will be connecting to. The default route on UML should be set to
-  use
-
-
-       UML#
-       route add default dev eth0
-
-
-
-
-  slirp provides a number of useful IP addresses which can be used by
-  UML, such as 10.0.2.3 which is an alias for the DNS server specified
-  in /etc/resolv.conf on the host or the IP given in the 'dns' option
-  for slirp.
-
-
-  Even with a baudrate setting higher than 115200, the slirp connection
-  is limited to 115200. If you need it to go faster, the slirp binary
-  needs to be compiled with FULL_BOLT defined in config.h.
-
-
-
-  6.12.  pcap
-
-  The pcap transport is attached to a UML ethernet device on the command
-  line or with uml_mconsole with the following syntax:
-
-
-
-       ethn=pcap,host interface,filter
-       expression,option1,option2
-
-
-
-
-  The expression and options are optional.
-
-
-  The interface is whatever network device on the host you want to
-  sniff.  The expression is a pcap filter expression, which is also what
-  tcpdump uses, so if you know how to specify tcpdump filters, you will
-  use the same expressions here.  The options are up to two of
-  'promisc', control whether pcap puts the host interface into
-  promiscuous mode. 'optimize' and 'nooptimize' control whether the pcap
-  expression optimizer is used.
-
-
-  Example:
-
-
-
-       eth0=pcap,eth0,tcp
-
-       eth1=pcap,eth0,!tcp
-
-
-
-  will cause the UML eth0 to emit all tcp packets on the host eth0 and
-  the UML eth1 to emit all non-tcp packets on the host eth0.
-
-
-
-  6.13.  Setting up the host yourself
-
-  If you don't specify an address for the host side of the ethertap or
-  slip device, UML won't do any setup on the host.  So this is what is
-  needed to get things working (the examples use a host-side IP of
-  192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
-  own network):
-
-  o  The device needs to be configured with its IP address.  Tap devices
-     are also configured with an mtu of 1484.  Slip devices are
-     configured with a point-to-point address pointing at the UML ip
-     address.
-
-
-       host#  ifconfig tap0 arp mtu 1484 192.168.0.251 up
-
-
-
-
-
-
-       host#
-       ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up
-
-
-
-
-
-  o  If a tap device is being set up, a route is set to the UML IP.
-
-
-       UML# route add -host 192.168.0.250 gw 192.168.0.251
-
-
-
-
-
-  o  To allow other hosts on your network to see the virtual machine,
-     proxy arp is set up for it.
-
-
-       host#  arp -Ds 192.168.0.250 eth0 pub
-
-
-
-
-
-  o  Finally, the host is set up to route packets.
-
-
-       host#  echo 1 > /proc/sys/net/ipv4/ip_forward
-
-
-
-
-
-
-
-
-
-
-  7.  Sharing Filesystems between Virtual Machines
-
-
-
-
-  7.1.  A warning
-
-  Don't attempt to share filesystems simply by booting two UMLs from the
-  same file.  That's the same thing as booting two physical machines
-  from a shared disk.  It will result in filesystem corruption.
-
-
-
-  7.2.  Using layered block devices
-
-  The way to share a filesystem between two virtual machines is to use
-  the copy-on-write (COW) layering capability of the ubd block driver.
-  As of 2.4.6-2um, the driver supports layering a read-write private
-  device over a read-only shared device.  A machine's writes are stored
-  in the private device, while reads come from either device - the
-  private one if the requested block is valid in it, the shared one if
-  not.  Using this scheme, the majority of data which is unchanged is
-  shared between an arbitrary number of virtual machines, each of which
-  has a much smaller file containing the changes that it has made.  With
-  a large number of UMLs booting from a large root filesystem, this
-  leads to a huge disk space saving.  It will also help performance,
-  since the host will be able to cache the shared data using a much
-  smaller amount of memory, so UML disk requests will be served from the
-  host's memory rather than its disks.
-
-
-
-
-  To add a copy-on-write layer to an existing block device file, simply
-  add the name of the COW file to the appropriate ubd switch:
-
-
-        ubd0=root_fs_cow,root_fs_debian_22
-
-
-
-
-  where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is
-  the existing shared filesystem.  The COW file need not exist.  If it
-  doesn't, the driver will create and initialize it.  Once the COW file
-  has been initialized, it can be used on its own on the command line:
-
-
-        ubd0=root_fs_cow
-
-
-
-
-  The name of the backing file is stored in the COW file header, so it
-  would be redundant to continue specifying it on the command line.
-
-
-
-  7.3.  Note!
-
-  When checking the size of the COW file in order to see the gobs of
-  space that you're saving, make sure you use 'ls -ls' to see the actual
-  disk consumption rather than the length of the file.  The COW file is
-  sparse, so the length will be very different from the disk usage.
-  Here is a 'ls -l' of a COW file and backing file from one boot and
-  shutdown:
-       host% ls -l cow.debian debian2.2
-       -rw-r--r--    1 jdike    jdike    492504064 Aug  6 21:16 cow.debian
-       -rwxrw-rw-    1 jdike    jdike    537919488 Aug  6 20:42 debian2.2
-
-
-
-
-  Doesn't look like much saved space, does it?  Well, here's 'ls -ls':
-
-
-       host% ls -ls cow.debian debian2.2
-          880 -rw-r--r--    1 jdike    jdike    492504064 Aug  6 21:16 cow.debian
-       525832 -rwxrw-rw-    1 jdike    jdike    537919488 Aug  6 20:42 debian2.2
-
-
-
-
-  Now, you can see that the COW file has less than a meg of disk, rather
-  than 492 meg.
-
-
-
-  7.4.  Another warning
-
-  Once a filesystem is being used as a readonly backing file for a COW
-  file, do not boot directly from it or modify it in any way.  Doing so
-  will invalidate any COW files that are using it.  The mtime and size
-  of the backing file are stored in the COW file header at its creation,
-  and they must continue to match.  If they don't, the driver will
-  refuse to use the COW file.
-
-
-
-
-  If you attempt to evade this restriction by changing either the
-  backing file or the COW header by hand, you will get a corrupted
-  filesystem.
-
-
-
-
-  Among other things, this means that upgrading the distribution in a
-  backing file and expecting that all of the COW files using it will see
-  the upgrade will not work.
-
-
-
-
-  7.5.  uml_moo : Merging a COW file with its backing file
-
-  Depending on how you use UML and COW devices, it may be advisable to
-  merge the changes in the COW file into the backing file every once in
-  a while.
-
-
-
-
-  The utility that does this is uml_moo.  Its usage is
-
-
-       host% uml_moo COW file new backing file
-
-
-
-
-  There's no need to specify the backing file since that information is
-  already in the COW file header.  If you're paranoid, boot the new
-  merged file, and if you're happy with it, move it over the old backing
-  file.
-
-
-
-
-  uml_moo creates a new backing file by default as a safety measure.  It
-  also has a destructive merge option which will merge the COW file
-  directly into its current backing file.  This is really only usable
-  when the backing file only has one COW file associated with it.  If
-  there are multiple COWs associated with a backing file, a -d merge of
-  one of them will invalidate all of the others.  However, it is
-  convenient if you're short of disk space, and it should also be
-  noticeably faster than a non-destructive merge.
-
-
-
-
-  uml_moo is installed with the UML deb and RPM.  If you didn't install
-  UML from one of those packages, you can also get it from the UML
-  utilities <http://user-mode-linux.sourceforge.net/
-  utilities>  tar file in tools/moo.
-
-
-
-
-
-
-
-
-  8.  Creating filesystems
-
-
-  You may want to create and mount new UML filesystems, either because
-  your root filesystem isn't large enough or because you want to use a
-  filesystem other than ext2.
-
-
-  This was written on the occasion of reiserfs being included in the
-  2.4.1 kernel pool, and therefore the 2.4.1 UML, so the examples will
-  talk about reiserfs.  This information is generic, and the examples
-  should be easy to translate to the filesystem of your choice.
-
-
-  8.1.  Create the filesystem file
-
-  dd is your friend.  All you need to do is tell dd to create an empty
-  file of the appropriate size.  I usually make it sparse to save time
-  and to avoid allocating disk space until it's actually used.  For
-  example, the following command will create a sparse 100 meg file full
-  of zeroes.
-
-
-       host%
-       dd if=/dev/zero of=new_filesystem seek=100 count=1 bs=1M
-
-
-
-
-
-
-  8.2.  Assign the file to a UML device
-
-  Add an argument like the following to the UML command line:
-
-  ubd4=new_filesystem
-
-
-
-
-  making sure that you use an unassigned ubd device number.
-
-
-
-  8.3.  Creating and mounting the filesystem
-
-  Make sure that the filesystem is available, either by being built into
-  the kernel, or available as a module, then boot up UML and log in.  If
-  the root filesystem doesn't have the filesystem utilities (mkfs, fsck,
-  etc), then get them into UML by way of the net or hostfs.
-
-
-  Make the new filesystem on the device assigned to the new file:
-
-
-       host#  mkreiserfs /dev/ubd/4
-
-
-       <----------- MKREISERFSv2 ----------->
-
-       ReiserFS version 3.6.25
-       Block size 4096 bytes
-       Block count 25856
-       Used blocks 8212
-               Journal - 8192 blocks (18-8209), journal header is in block 8210
-               Bitmaps: 17
-               Root block 8211
-       Hash function "r5"
-       ATTENTION: ALL DATA WILL BE LOST ON '/dev/ubd/4'! (y/n)y
-       journal size 8192 (from 18)
-       Initializing journal - 0%....20%....40%....60%....80%....100%
-       Syncing..done.
-
-
-
-
-  Now, mount it:
-
-
-       UML#
-       mount /dev/ubd/4 /mnt
-
-
-
-
-  and you're in business.
-
-
-
-
-
-
-
-
-
-  9.  Host file access
-
-
-  If you want to access files on the host machine from inside UML, you
-  can treat it as a separate machine and either nfs mount directories
-  from the host or copy files into the virtual machine with scp or rcp.
-  However, since UML is running on the host, it can access those
-  files just like any other process and make them available inside the
-  virtual machine without needing to use the network.
-
-
-  This is now possible with the hostfs virtual filesystem.  With it, you
-  can mount a host directory into the UML filesystem and access the
-  files contained in it just as you would on the host.
-
-
-  9.1.  Using hostfs
-
-  To begin with, make sure that hostfs is available inside the virtual
-  machine with
-
-
-       UML# cat /proc/filesystems
-
-
-
-  .  hostfs should be listed.  If it's not, either rebuild the kernel
-  with hostfs configured into it or make sure that hostfs is built as a
-  module and available inside the virtual machine, and insmod it.
-
-
-  Now all you need to do is run mount:
-
-
-       UML# mount none /mnt/host -t hostfs
-
-
-
-
-  will mount the host's / on the virtual machine's /mnt/host.
-
-
-  If you don't want to mount the host root directory, then you can
-  specify a subdirectory to mount with the -o switch to mount:
-
-
-       UML# mount none /mnt/home -t hostfs -o /home
-
-
-
-
-  will mount the hosts's /home on the virtual machine's /mnt/home.
-
-
-
-  9.2.  hostfs as the root filesystem
-
-  It's possible to boot from a directory hierarchy on the host using
-  hostfs rather than using the standard filesystem in a file.
-
-  To start, you need that hierarchy.  The easiest way is to loop mount
-  an existing root_fs file:
-
-
-       host#  mount root_fs uml_root_dir -o loop
-
-
-
-
-  You need to change the filesystem type of / in etc/fstab to be
-  'hostfs', so that line looks like this:
-
-  /dev/ubd/0       /        hostfs      defaults          1   1
-
-
-
-
-  Then you need to chown to yourself all the files in that directory
-  that are owned by root.  This worked for me:
-
-
-       host#  find . -uid 0 -exec chown jdike {} \;
-
-
-
-
-  Next, make sure that your UML kernel has hostfs compiled in, not as a
-  module.  Then run UML with the boot device pointing at that directory:
-
-
-        ubd0=/path/to/uml/root/directory
-
-
-
-
-  UML should then boot as it does normally.
-
-
-  9.3.  Building hostfs
-
-  If you need to build hostfs because it's not in your kernel, you have
-  two choices:
-
-
-
-  o  Compiling hostfs into the kernel:
-
-
-     Reconfigure the kernel and set the 'Host filesystem' option under
-
-
-  o  Compiling hostfs as a module:
-
-
-     Reconfigure the kernel and set the 'Host filesystem' option under
-     be in arch/um/fs/hostfs/hostfs.o.  Install that in
-     /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and
-
-
-       UML# insmod hostfs
-
-
-
-
-
-
-
-
-
-
-
-
-  10.  The Management Console
-
-
-
-  The UML management console is a low-level interface to the kernel,
-  somewhat like the i386 SysRq interface.  Since there is a full-blown
-  operating system under UML, there is much greater flexibility possible
-  than with the SysRq mechanism.
-
-
-  There are a number of things you can do with the mconsole interface:
-
-  o  get the kernel version
-
-  o  add and remove devices
-
-  o  halt or reboot the machine
-
-  o  Send SysRq commands
-
-  o  Pause and resume the UML
-
-
-  You need the mconsole client (uml_mconsole) which is present in CVS
-  (/tools/mconsole) in 2.4.5-9um and later, and will be in the RPM in
-  2.4.6.
-
-
-  You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML.
-  When you boot UML, you'll see a line like:
-
-
-       mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole
-
-
-
-
-  If you specify a unique machine id one the UML command line, i.e.
-
-
-        umid=debian
-
-
-
-
-  you'll see this
-
-
-       mconsole initialized on /home/jdike/.uml/debian/mconsole
-
-
-
-
-  That file is the socket that uml_mconsole will use to communicate with
-  UML.  Run it with either the umid or the full path as its argument:
-
-
-       host% uml_mconsole debian
-
-
-
-
-  or
-
-
-       host% uml_mconsole /home/jdike/.uml/debian/mconsole
-
-
-
-
-  You'll get a prompt, at which you can run one of these commands:
-
-  o  version
-
-  o  halt
-
-  o  reboot
-
-  o  config
-
-  o  remove
-
-  o  sysrq
-
-  o  help
-
-  o  cad
-
-  o  stop
-
-  o  go
-
-
-  10.1.  version
-
-  This takes no arguments.  It prints the UML version.
-
-
-       (mconsole)  version
-       OK Linux usermode 2.4.5-9um #1 Wed Jun 20 22:47:08 EDT 2001 i686
-
-
-
-
-  There are a couple actual uses for this.  It's a simple no-op which
-  can be used to check that a UML is running.  It's also a way of
-  sending an interrupt to the UML.  This is sometimes useful on SMP
-  hosts, where there's a bug which causes signals to UML to be lost,
-  often causing it to appear to hang.  Sending such a UML the mconsole
-  version command is a good way to 'wake it up' before networking has
-  been enabled, as it does not do anything to the function of the UML.
-
-
-
-  10.2.  halt and reboot
-
-  These take no arguments.  They shut the machine down immediately, with
-  no syncing of disks and no clean shutdown of userspace.  So, they are
-  pretty close to crashing the machine.
-
-
-       (mconsole)  halt
-       OK
-
-
-
-
-
-
-  10.3.  config
-
-  "config" adds a new device to the virtual machine.  Currently the ubd
-  and network drivers support this.  It takes one argument, which is the
-  device to add, with the same syntax as the kernel command line.
-
-
-
-
-  (mconsole)
-  config ubd3=/home/jdike/incoming/roots/root_fs_debian22
-
-  OK
-  (mconsole)  config eth1=mcast
-  OK
-
-
-
-
-
-
-  10.4.  remove
-
-  "remove" deletes a device from the system.  Its argument is just the
-  name of the device to be removed. The device must be idle in whatever
-  sense the driver considers necessary.  In the case of the ubd driver,
-  the removed block device must not be mounted, swapped on, or otherwise
-  open, and in the case of the network driver, the device must be down.
-
-
-       (mconsole)  remove ubd3
-       OK
-       (mconsole)  remove eth1
-       OK
-
-
-
-
-
-
-  10.5.  sysrq
-
-  This takes one argument, which is a single letter.  It calls the
-  generic kernel's SysRq driver, which does whatever is called for by
-  that argument.  See the SysRq documentation in
-  Documentation/admin-guide/sysrq.rst in your favorite kernel tree to
-  see what letters are valid and what they do.
-
-
-
-  10.6.  help
-
-  "help" returns a string listing the valid commands and what each one
-  does.
-
-
-
-  10.7.  cad
-
-  This invokes the Ctl-Alt-Del action on init.  What exactly this ends
-  up doing is up to /etc/inittab.  Normally, it reboots the machine.
-  With UML, this is usually not desired, so if a halt would be better,
-  then find the section of inittab that looks like this
-
-
-       # What to do when CTRL-ALT-DEL is pressed.
-       ca:12345:ctrlaltdel:/sbin/shutdown -t1 -a -r now
-
-
-
-
-  and change the command to halt.
-
-
-
-  10.8.  stop
-
-  This puts the UML in a loop reading mconsole requests until a 'go'
-  mconsole command is received. This is very useful for making backups
-  of UML filesystems, as the UML can be stopped, then synced via 'sysrq
-  s', so that everything is written to the filesystem. You can then copy
-  the filesystem and then send the UML 'go' via mconsole.
-
-
-  Note that a UML running with more than one CPU will have problems
-  after you send the 'stop' command, as only one CPU will be held in a
-  mconsole loop and all others will continue as normal.  This is a bug,
-  and will be fixed.
-
-
-
-  10.9.  go
-
-  This resumes a UML after being paused by a 'stop' command. Note that
-  when the UML has resumed, TCP connections may have timed out and if
-  the UML is paused for a long period of time, crond might go a little
-  crazy, running all the jobs it didn't do earlier.
-
-
-
-
-
-
-
-
-  11.  Kernel debugging
-
-
-  Note: The interface that makes debugging, as described here, possible
-  is present in 2.4.0-test6 kernels and later.
-
-
-  Since the user-mode kernel runs as a normal Linux process, it is
-  possible to debug it with gdb almost like any other process.  It is
-  slightly different because the kernel's threads are already being
-  ptraced for system call interception, so gdb can't ptrace them.
-  However, a mechanism has been added to work around that problem.
-
-
-  In order to debug the kernel, you need build it from source.  See
-  ``Compiling the kernel and modules''  for information on doing that.
-  Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during
-  the config.  These will compile the kernel with -g, and enable the
-  ptrace proxy so that gdb works with UML, respectively.
-
-
-
-
-  11.1.  Starting the kernel under gdb
-
-  You can have the kernel running under the control of gdb from the
-  beginning by putting 'debug' on the command line.  You will get an
-  xterm with gdb running inside it.  The kernel will send some commands
-  to gdb which will leave it stopped at the beginning of start_kernel.
-  At this point, you can get things going with 'next', 'step', or
-  'cont'.
-
-
-  There is a transcript of a debugging session  here <debug-
-  session.html> , with breakpoints being set in the scheduler and in an
-  interrupt handler.
-  11.2.  Examining sleeping processes
-
-  Not every bug is evident in the currently running process.  Sometimes,
-  processes hang in the kernel when they shouldn't because they've
-  deadlocked on a semaphore or something similar.  In this case, when
-  you ^C gdb and get a backtrace, you will see the idle thread, which
-  isn't very relevant.
-
-
-  What you want is the stack of whatever process is sleeping when it
-  shouldn't be.  You need to figure out which process that is, which is
-  generally fairly easy.  Then you need to get its host process id,
-  which you can do either by looking at ps on the host or at
-  task.thread.extern_pid in gdb.
-
-
-  Now what you do is this:
-
-  o  detach from the current thread
-
-
-       (UML gdb)  det
-
-
-
-
-
-  o  attach to the thread you are interested in
-
-
-       (UML gdb)  att <host pid>
-
-
-
-
-
-  o  look at its stack and anything else of interest
-
-
-       (UML gdb)  bt
-
-
-
-
-  Note that you can't do anything at this point that requires that a
-  process execute, e.g. calling a function
-
-  o  when you're done looking at that process, reattach to the current
-     thread and continue it
-
-
-       (UML gdb)
-       att 1
-
-
-
-
-
-
-       (UML gdb)
-       c
-
-
-
-
-  Here, specifying any pid which is not the process id of a UML thread
-  will cause gdb to reattach to the current thread.  I commonly use 1,
-  but any other invalid pid would work.
-
-
-
-  11.3.  Running ddd on UML
-
-  ddd works on UML, but requires a special kludge.  The process goes
-  like this:
-
-  o  Start ddd
-
-
-       host% ddd linux
-
-
-
-
-
-  o  With ps, get the pid of the gdb that ddd started.  You can ask the
-     gdb to tell you, but for some reason that confuses things and
-     causes a hang.
-
-  o  run UML with 'debug=parent gdb-pid=<pid>' added to the command line
-     - it will just sit there after you hit return
-
-  o  type 'att 1' to the ddd gdb and you will see something like
-
-
-       0xa013dc51 in __kill ()
-
-
-       (gdb)
-
-
-
-
-
-  o  At this point, type 'c', UML will boot up, and you can use ddd just
-     as you do on any other process.
-
-
-
-  11.4.  Debugging modules
-
-  gdb has support for debugging code which is dynamically loaded into
-  the process.  This support is what is needed to debug kernel modules
-  under UML.
-
-
-  Using that support is somewhat complicated.  You have to tell gdb what
-  object file you just loaded into UML and where in memory it is.  Then,
-  it can read the symbol table, and figure out where all the symbols are
-  from the load address that you provided.  It gets more interesting
-  when you load the module again (i.e. after an rmmod).  You have to
-  tell gdb to forget about all its symbols, including the main UML ones
-  for some reason, then load then all back in again.
-
-
-  There's an easy way and a hard way to do this.  The easy way is to use
-  the umlgdb expect script written by Chandan Kudige.  It basically
-  automates the process for you.
-
-
-  First, you must tell it where your modules are.  There is a list in
-  the script that looks like this:
-       set MODULE_PATHS {
-       "fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o"
-       "isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o"
-       "minix" "/usr/src/uml/linux-2.4.18/fs/minix/minix.o"
-       }
-
-
-
-
-  You change that to list the names and paths of the modules that you
-  are going to debug.  Then you run it from the toplevel directory of
-  your UML pool and it basically tells you what to do:
-
-
-
-
-                   ******** GDB pid is 21903 ********
-       Start UML as: ./linux <kernel switches> debug gdb-pid=21903
-
-
-
-       GNU gdb 5.0rh-5 Red Hat Linux 7.1
-       Copyright 2001 Free Software Foundation, Inc.
-       GDB is free software, covered by the GNU General Public License, and you are
-       welcome to change it and/or distribute copies of it under certain conditions.
-       Type "show copying" to see the conditions.
-       There is absolutely no warranty for GDB.  Type "show warranty" for details.
-       This GDB was configured as "i386-redhat-linux"...
-       (gdb) b sys_init_module
-       Breakpoint 1 at 0xa0011923: file module.c, line 349.
-       (gdb) att 1
-
-
-
-
-  After you run UML and it sits there doing nothing, you hit return at
-  the 'att 1' and continue it:
-
-
-       Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1
-       0xa00f4221 in __kill ()
-       (UML gdb)  c
-       Continuing.
-
-
-
-
-  At this point, you debug normally.  When you insmod something, the
-  expect magic will kick in and you'll see something like:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-   *** Module hostfs loaded ***
-  Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs",
-      mod_user=0x8070e00) at module.c:349
-  349             char *name, *n_name, *name_tmp = NULL;
-  (UML gdb)  finish
-  Run till exit from #0  sys_init_module (name_user=0x805abb0 "hostfs",
-      mod_user=0x8070e00) at module.c:349
-  0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411
-  411             else res = EXECUTE_SYSCALL(syscall, regs);
-  Value returned is $1 = 0
-  (UML gdb)
-  p/x (int)module_list + module_list->size_of_struct
-
-  $2 = 0xa9021054
-  (UML gdb)  symbol-file ./linux
-  Load new symbol table from "./linux"? (y or n) y
-  Reading symbols from ./linux...
-  done.
-  (UML gdb)
-  add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054
-
-  add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at
-          .text_addr = 0xa9021054
-   (y or n) y
-
-  Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o...
-  done.
-  (UML gdb)  p *module_list
-  $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs",
-    size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1,
-    nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0,
-    init = 0xa90221f0 <init_hostfs>, cleanup = 0xa902222c <exit_hostfs>,
-    ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0,
-    persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0,
-    kallsyms_end = 0x0,
-    archdata_start = 0x1b855 <Address 0x1b855 out of bounds>,
-    archdata_end = 0xe5890000 <Address 0xe5890000 out of bounds>,
-    kernel_data = 0xf689c35d <Address 0xf689c35d out of bounds>}
-  >> Finished loading symbols for hostfs ...
-
-
-
-
-  That's the easy way.  It's highly recommended.  The hard way is
-  described below in case you're interested in what's going on.
-
-
-  Boot the kernel under the debugger and load the module with insmod or
-  modprobe.  With gdb, do:
-
-
-       (UML gdb)  p module_list
-
-
-
-
-  This is a list of modules that have been loaded into the kernel, with
-  the most recently loaded module first.  Normally, the module you want
-  is at module_list.  If it's not, walk down the next links, looking at
-  the name fields until find the module you want to debug.  Take the
-  address of that structure, and add module.size_of_struct (which in
-  2.4.10 kernels is 96 (0x60)) to it.  Gdb can make this hard addition
-  for you :-):
-
-
-
-  (UML gdb)
-  printf "%#x\n", (int)module_list module_list->size_of_struct
-
-
-
-
-  The offset from the module start occasionally changes (before 2.4.0,
-  it was module.size_of_struct + 4), so it's a good idea to check the
-  init and cleanup addresses once in a while, as describe below.  Now
-  do:
-
-
-       (UML gdb)
-       add-symbol-file /path/to/module/on/host that_address
-
-
-
-
-  Tell gdb you really want to do it, and you're in business.
-
-
-  If there's any doubt that you got the offset right, like breakpoints
-  appear not to work, or they're appearing in the wrong place, you can
-  check it by looking at the module structure.  The init and cleanup
-  fields should look like:
-
-
-       init = 0x588066b0 <init_hostfs>, cleanup = 0x588066c0 <exit_hostfs>
-
-
-
-
-  with no offsets on the symbol names.  If the names are right, but they
-  are offset, then the offset tells you how much you need to add to the
-  address you gave to add-symbol-file.
-
-
-  When you want to load in a new version of the module, you need to get
-  gdb to forget about the old one.  The only way I've found to do that
-  is to tell gdb to forget about all symbols that it knows about:
-
-
-       (UML gdb)  symbol-file
-
-
-
-
-  Then reload the symbols from the kernel binary:
-
-
-       (UML gdb)  symbol-file /path/to/kernel
-
-
-
-
-  and repeat the process above.  You'll also need to re-enable break-
-  points.  They were disabled when you dumped all the symbols because
-  gdb couldn't figure out where they should go.
-
-
-
-  11.5.  Attaching gdb to the kernel
-
-  If you don't have the kernel running under gdb, you can attach gdb to
-  it later by sending the tracing thread a SIGUSR1.  The first line of
-  the console output identifies its pid:
-       tracing thread pid = 20093
-
-
-
-
-  When you send it the signal:
-
-
-       host% kill -USR1 20093
-
-
-
-
-  you will get an xterm with gdb running in it.
-
-
-  If you have the mconsole compiled into UML, then the mconsole client
-  can be used to start gdb:
-
-
-       (mconsole)  (mconsole) config gdb=xterm
-
-
-
-
-  will fire up an xterm with gdb running in it.
-
-
-
-  11.6.  Using alternate debuggers
-
-  UML has support for attaching to an already running debugger rather
-  than starting gdb itself.  This is present in CVS as of 17 Apr 2001.
-  I sent it to Alan for inclusion in the ac tree, and it will be in my
-  2.4.4 release.
-
-
-  This is useful when gdb is a subprocess of some UI, such as emacs or
-  ddd.  It can also be used to run debuggers other than gdb on UML.
-  Below is an example of using strace as an alternate debugger.
-
-
-  To do this, you need to get the pid of the debugger and pass it in
-  with the
-
-
-  If you are using gdb under some UI, then tell it to 'att 1', and
-  you'll find yourself attached to UML.
-
-
-  If you are using something other than gdb as your debugger, then
-  you'll need to get it to do the equivalent of 'att 1' if it doesn't do
-  it automatically.
-
-
-  An example of an alternate debugger is strace.  You can strace the
-  actual kernel as follows:
-
-  o  Run the following in a shell
-
-
-       host%
-       sh -c 'echo pid=$$; echo -n hit return; read x; exec strace -p 1 -o strace.out'
-
-
-
-  o  Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
-     by the previous command
-
-  o  Hit return in the shell, and UML will start running, and strace
-     output will start accumulating in the output file.
-
-     Note that this is different from running
-
-
-       host% strace ./linux
-
-
-
-
-  That will strace only the main UML thread, the tracing thread, which
-  doesn't do any of the actual kernel work.  It just oversees the vir-
-  tual machine.  In contrast, using strace as described above will show
-  you the low-level activity of the virtual machine.
-
-
-
-
-
-  12.  Kernel debugging examples
-
-  12.1.  The case of the hung fsck
-
-  When booting up the kernel, fsck failed, and dropped me into a shell
-  to fix things up.  I ran fsck -y, which hung:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  Setting hostname uml                    [ OK ]
-  Checking root filesystem
-  /dev/fhd0 was not cleanly unmounted, check forced.
-  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
-
-  /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
-          (i.e., without -a or -p options)
-  [ FAILED ]
-
-  *** An error occurred during the file system check.
-  *** Dropping you to a shell; the system will reboot
-  *** when you leave the shell.
-  Give root password for maintenance
-  (or type Control-D for normal startup):
-
-  [root@uml /root]# fsck -y /dev/fhd0
-  fsck -y /dev/fhd0
-  Parallelizing fsck version 1.14 (9-Jan-1999)
-  e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
-  /dev/fhd0 contains a file system with errors, check forced.
-  Pass 1: Checking inodes, blocks, and sizes
-  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.  Ignore error? yes
-
-  Inode 19780, i_blocks is 1548, should be 540.  Fix? yes
-
-  Pass 2: Checking directory structure
-  Error reading block 49405 (Attempt to read block from filesystem resulted in short read).  Ignore error? yes
-
-  Directory inode 11858, block 0, offset 0: directory corrupted
-  Salvage? yes
-
-  Missing '.' in directory inode 11858.
-  Fix? yes
-
-  Missing '..' in directory inode 11858.
-  Fix? yes
-
-
-
-
-
-  The standard drill in this sort of situation is to fire up gdb on the
-  signal thread, which, in this case, was pid 1935.  In another window,
-  I run gdb and attach pid 1935.
-
-
-
-
-       ~/linux/2.3.26/um 1016: gdb linux
-       GNU gdb 4.17.0.11 with Linux support
-       Copyright 1998 Free Software Foundation, Inc.
-       GDB is free software, covered by the GNU General Public License, and you are
-       welcome to change it and/or distribute copies of it under certain conditions.
-       Type "show copying" to see the conditions.
-       There is absolutely no warranty for GDB.  Type "show warranty" for details.
-       This GDB was configured as "i386-redhat-linux"...
-
-       (gdb) att 1935
-       Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1935
-       0x100756d9 in __wait4 ()
-
-
-
-
-
-
-  Let's see what's currently running:
-
-
-
-       (gdb) p current_task.pid
-       $1 = 0
-
-
-
-
-
-  It's the idle thread, which means that fsck went to sleep for some
-  reason and never woke up.
-
-
-  Let's guess that the last process in the process list is fsck:
-
-
-
-       (gdb) p current_task.prev_task.comm
-       $13 = "fsck.ext2\000\000\000\000\000\000"
-
-
-
-
-
-  It is, so let's see what it thinks it's up to:
-
-
-
-       (gdb) p current_task.prev_task.thread
-       $14 = {extern_pid = 1980, tracing = 0, want_tracing = 0, forking = 0,
-         kernel_stack_page = 0, signal_stack = 1342627840, syscall = {id = 4, args = {
-             3, 134973440, 1024, 0, 1024}, have_result = 0, result = 50590720},
-         request = {op = 2, u = {exec = {ip = 1350467584, sp = 2952789424}, fork = {
-               regs = {1350467584, 2952789424, 0 <repeats 15 times>}, sigstack = 0,
-               pid = 0}, switch_to = 0x507e8000, thread = {proc = 0x507e8000,
-               arg = 0xaffffdb0, flags = 0, new_pid = 0}, input_request = {
-               op = 1350467584, fd = -1342177872, proc = 0, pid = 0}}}}
-
-
-
-
-
-  The interesting things here are the fact that its .thread.syscall.id
-  is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or
-  the defines in include/asm-um/arch/unistd.h), and that it never
-  returned.  Also, its .request.op is OP_SWITCH (see
-  arch/um/include/user_util.h).  These mean that it went into a write,
-  and, for some reason, called schedule().
-
-
-  The fact that it never returned from write means that its stack should
-  be fairly interesting.  Its pid is 1980 (.thread.extern_pid).  That
-  process is being ptraced by the signal thread, so it must be detached
-  before gdb can attach it:
-
-
-
-
-
-
-
-
-
-
-  (gdb) call detach(1980)
-
-  Program received signal SIGSEGV, Segmentation fault.
-  <function called from gdb>
-  The program being debugged stopped while in a function called from GDB.
-  When the function (detach) is done executing, GDB will silently
-  stop (instead of continuing to evaluate the expression containing
-  the function call).
-  (gdb) call detach(1980)
-  $15 = 0
-
-
-
-
-
-  The first detach segfaults for some reason, and the second one
-  succeeds.
-
-
-  Now I detach from the signal thread, attach to the fsck thread, and
-  look at its stack:
-
-
-       (gdb) det
-       Detaching from program: /home/dike/linux/2.3.26/um/linux Pid 1935
-       (gdb) att 1980
-       Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1980
-       0x10070451 in __kill ()
-       (gdb) bt
-       #0  0x10070451 in __kill ()
-       #1  0x10068ccd in usr1_pid (pid=1980) at process.c:30
-       #2  0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
-           at process_kern.c:156
-       #3  0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
-           at process_kern.c:161
-       #4  0x10001d12 in schedule () at core.c:777
-       #5  0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
-       #6  0x1006aa10 in __down_failed () at semaphore.c:157
-       #7  0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
-       #8  0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
-       #9  <signal handler called>
-       #10 0x10155404 in errno ()
-       #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
-       #12 0x1006c5d8 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
-       #13 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
-       #14 <signal handler called>
-       #15 0xc0fd in ?? ()
-       #16 0x10016647 in sys_write (fd=3,
-           buf=0x80b8800 <Address 0x80b8800 out of bounds>, count=1024)
-           at read_write.c:159
-       #17 0x1006d5b3 in execute_syscall (syscall=4, args=0x5006ef08)
-           at syscall_kern.c:254
-       #18 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
-       #19 <signal handler called>
-       #20 0x400dc8b0 in ?? ()
-
-
-
-
-
-  The interesting things here are :
-
-  o  There are two segfaults on this stack (frames 9 and 14)
-
-  o  The first faulting address (frame 11) is 0x50000800
-
-  (gdb) p (void *)1342179328
-  $16 = (void *) 0x50000800
-
-
-
-
-
-  The initial faulting address is interesting because it is on the idle
-  thread's stack.  I had been seeing the idle thread segfault for no
-  apparent reason, and the cause looked like stack corruption.  In hopes
-  of catching the culprit in the act, I had turned off all protections
-  to that stack while the idle thread wasn't running.  This apparently
-  tripped that trap.
-
-
-  However, the more immediate problem is that second segfault and I'm
-  going to concentrate on that.  First, I want to see where the fault
-  happened, so I have to go look at the sigcontent struct in frame 8:
-
-
-
-       (gdb) up
-       #1  0x10068ccd in usr1_pid (pid=1980) at process.c:30
-       30        kill(pid, SIGUSR1);
-       (gdb)
-       #2  0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
-           at process_kern.c:156
-       156       usr1_pid(getpid());
-       (gdb)
-       #3  0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
-           at process_kern.c:161
-       161       _switch_to(prev, next);
-       (gdb)
-       #4  0x10001d12 in schedule () at core.c:777
-       777             switch_to(prev, next, prev);
-       (gdb)
-       #5  0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
-       71                      schedule();
-       (gdb)
-       #6  0x1006aa10 in __down_failed () at semaphore.c:157
-       157     }
-       (gdb)
-       #7  0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
-       174       segv(sc->cr2, sc->err & 2);
-       (gdb)
-       #8  0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
-       182       segv_handler(sc);
-       (gdb) p *sc
-       Cannot access memory at address 0x0.
-
-
-
-
-  That's not very useful, so I'll try a more manual method:
-
-
-       (gdb) p *((struct sigcontext *) (&sig + 1))
-       $19 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
-         __dsh = 0, edi = 1342179328, esi = 1350378548, ebp = 1342630440,
-         esp = 1342630420, ebx = 1348150624, edx = 1280, ecx = 0, eax = 0,
-         trapno = 14, err = 4, eip = 268480945, cs = 35, __csh = 0, eflags = 66118,
-         esp_at_signal = 1342630420, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
-         cr2 = 1280}
-
-
-
-  The ip is in handle_mm_fault:
-
-
-       (gdb) p (void *)268480945
-       $20 = (void *) 0x1000b1b1
-       (gdb) i sym $20
-       handle_mm_fault + 57 in section .text
-
-
-
-
-
-  Specifically, it's in pte_alloc:
-
-
-       (gdb) i line *$20
-       Line 124 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b1b1 <handle_mm_fault+57>
-          and ends at 0x1000b1b7 <handle_mm_fault+63>.
-
-
-
-
-
-  To find where in handle_mm_fault this is, I'll jump forward in the
-  code until I see an address in that procedure:
-
-
-
-       (gdb) i line *0x1000b1c0
-       Line 126 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b1b7 <handle_mm_fault+63>
-          and ends at 0x1000b1c3 <handle_mm_fault+75>.
-       (gdb) i line *0x1000b1d0
-       Line 131 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b1d0 <handle_mm_fault+88>
-          and ends at 0x1000b1da <handle_mm_fault+98>.
-       (gdb) i line *0x1000b1e0
-       Line 61 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b1da <handle_mm_fault+98>
-          and ends at 0x1000b1e1 <handle_mm_fault+105>.
-       (gdb) i line *0x1000b1f0
-       Line 134 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b1f0 <handle_mm_fault+120>
-          and ends at 0x1000b200 <handle_mm_fault+136>.
-       (gdb) i line *0x1000b200
-       Line 135 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b200 <handle_mm_fault+136>
-          and ends at 0x1000b208 <handle_mm_fault+144>.
-       (gdb) i line *0x1000b210
-       Line 139 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
-          starts at address 0x1000b210 <handle_mm_fault+152>
-          and ends at 0x1000b219 <handle_mm_fault+161>.
-       (gdb) i line *0x1000b220
-       Line 1168 of "memory.c" starts at address 0x1000b21e <handle_mm_fault+166>
-          and ends at 0x1000b222 <handle_mm_fault+170>.
-
-
-
-
-
-  Something is apparently wrong with the page tables or vma_structs, so
-  lets go back to frame 11 and have a look at them:
-
-
-
-  #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
-  50        handle_mm_fault(current, vma, address, is_write);
-  (gdb) call pgd_offset_proc(vma->vm_mm, address)
-  $22 = (pgd_t *) 0x80a548c
-
-
-
-
-
-  That's pretty bogus.  Page tables aren't supposed to be in process
-  text or data areas.  Let's see what's in the vma:
-
-
-       (gdb) p *vma
-       $23 = {vm_mm = 0x507d2434, vm_start = 0, vm_end = 134512640,
-         vm_next = 0x80a4f8c, vm_page_prot = {pgprot = 0}, vm_flags = 31200,
-         vm_avl_height = 2058, vm_avl_left = 0x80a8c94, vm_avl_right = 0x80d1000,
-         vm_next_share = 0xaffffdb0, vm_pprev_share = 0xaffffe63,
-         vm_ops = 0xaffffe7a, vm_pgoff = 2952789626, vm_file = 0xafffffec,
-         vm_private_data = 0x62}
-       (gdb) p *vma.vm_mm
-       $24 = {mmap = 0x507d2434, mmap_avl = 0x0, mmap_cache = 0x8048000,
-         pgd = 0x80a4f8c, mm_users = {counter = 0}, mm_count = {counter = 134904288},
-         map_count = 134909076, mmap_sem = {count = {counter = 135073792},
-           sleepers = -1342177872, wait = {lock = <optimized out or zero length>,
-             task_list = {next = 0xaffffe63, prev = 0xaffffe7a},
-             __magic = -1342177670, __creator = -1342177300}, __magic = 98},
-         page_table_lock = {}, context = 138, start_code = 0, end_code = 0,
-         start_data = 0, end_data = 0, start_brk = 0, brk = 0, start_stack = 0,
-         arg_start = 0, arg_end = 0, env_start = 0, env_end = 0, rss = 1350381536,
-         total_vm = 0, locked_vm = 0, def_flags = 0, cpu_vm_mask = 0, swap_cnt = 0,
-         swap_address = 0, segments = 0x0}
-
-
-
-
-
-  This also pretty bogus.  With all of the 0x80xxxxx and 0xaffffxxx
-  addresses, this is looking like a stack was plonked down on top of
-  these structures.  Maybe it's a stack overflow from the next page:
-
-
-
-       (gdb) p vma
-       $25 = (struct vm_area_struct *) 0x507d2434
-
-
-
-
-
-  That's towards the lower quarter of the page, so that would have to
-  have been pretty heavy stack overflow:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  (gdb) x/100x $25
-  0x507d2434:     0x507d2434      0x00000000      0x08048000      0x080a4f8c
-  0x507d2444:     0x00000000      0x080a79e0      0x080a8c94      0x080d1000
-  0x507d2454:     0xaffffdb0      0xaffffe63      0xaffffe7a      0xaffffe7a
-  0x507d2464:     0xafffffec      0x00000062      0x0000008a      0x00000000
-  0x507d2474:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2484:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2494:     0x00000000      0x00000000      0x507d2fe0      0x00000000
-  0x507d24a4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d24b4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d24c4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d24d4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d24e4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d24f4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2504:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2514:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2524:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2534:     0x00000000      0x00000000      0x507d25dc      0x00000000
-  0x507d2544:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2554:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2564:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2574:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2584:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d2594:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d25a4:     0x00000000      0x00000000      0x00000000      0x00000000
-  0x507d25b4:     0x00000000      0x00000000      0x00000000      0x00000000
-
-
-
-
-
-  It's not stack overflow.  The only "stack-like" piece of this data is
-  the vma_struct itself.
-
-
-  At this point, I don't see any avenues to pursue, so I just have to
-  admit that I have no idea what's going on.  What I will do, though, is
-  stick a trap on the segfault handler which will stop if it sees any
-  writes to the idle thread's stack.  That was the thing that happened
-  first, and it may be that if I can catch it immediately, what's going
-  on will be somewhat clearer.
-
-
-  12.2.  Episode 2: The case of the hung fsck
-
-  After setting a trap in the SEGV handler for accesses to the signal
-  thread's stack, I reran the kernel.
-
-
-  fsck hung again, this time by hitting the trap:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  Setting hostname uml                            [ OK ]
-  Checking root filesystem
-  /dev/fhd0 contains a file system with errors, check forced.
-  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
-
-  /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
-          (i.e., without -a or -p options)
-  [ FAILED ]
-
-  *** An error occurred during the file system check.
-  *** Dropping you to a shell; the system will reboot
-  *** when you leave the shell.
-  Give root password for maintenance
-  (or type Control-D for normal startup):
-
-  [root@uml /root]# fsck -y /dev/fhd0
-  fsck -y /dev/fhd0
-  Parallelizing fsck version 1.14 (9-Jan-1999)
-  e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
-  /dev/fhd0 contains a file system with errors, check forced.
-  Pass 1: Checking inodes, blocks, and sizes
-  Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.  Ignore error? yes
-
-  Pass 2: Checking directory structure
-  Error reading block 49405 (Attempt to read block from filesystem resulted in short read).  Ignore error? yes
-
-  Directory inode 11858, block 0, offset 0: directory corrupted
-  Salvage? yes
-
-  Missing '.' in directory inode 11858.
-  Fix? yes
-
-  Missing '..' in directory inode 11858.
-  Fix? yes
-
-  Untested (4127) [100fe44c]: trap_kern.c line 31
-
-
-
-
-
-  I need to get the signal thread to detach from pid 4127 so that I can
-  attach to it with gdb.  This is done by sending it a SIGUSR1, which is
-  caught by the signal thread, which detaches the process:
-
-
-       kill -USR1 4127
-
-
-
-
-
-  Now I can run gdb on it:
-
-
-
-
-
-
-
-
-
-
-
-
-
-  ~/linux/2.3.26/um 1034: gdb linux
-  GNU gdb 4.17.0.11 with Linux support
-  Copyright 1998 Free Software Foundation, Inc.
-  GDB is free software, covered by the GNU General Public License, and you are
-  welcome to change it and/or distribute copies of it under certain conditions.
-  Type "show copying" to see the conditions.
-  There is absolutely no warranty for GDB.  Type "show warranty" for details.
-  This GDB was configured as "i386-redhat-linux"...
-  (gdb) att 4127
-  Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127
-  0x10075891 in __libc_nanosleep ()
-
-
-
-
-
-  The backtrace shows that it was in a write and that the fault address
-  (address in frame 3) is 0x50000800, which is right in the middle of
-  the signal thread's stack page:
-
-
-       (gdb) bt
-       #0  0x10075891 in __libc_nanosleep ()
-       #1  0x1007584d in __sleep (seconds=1000000)
-           at ../sysdeps/unix/sysv/linux/sleep.c:78
-       #2  0x1006ce9a in stop () at user_util.c:191
-       #3  0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
-       #4  0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
-       #5  0x1006c63c in kern_segv_handler (sig=11) at trap_user.c:182
-       #6  <signal handler called>
-       #7  0xc0fd in ?? ()
-       #8  0x10016647 in sys_write (fd=3, buf=0x80b8800 "R.", count=1024)
-           at read_write.c:159
-       #9  0x1006d603 in execute_syscall (syscall=4, args=0x5006ef08)
-           at syscall_kern.c:254
-       #10 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
-       #11 <signal handler called>
-       #12 0x400dc8b0 in ?? ()
-       #13 <signal handler called>
-       #14 0x400dc8b0 in ?? ()
-       #15 0x80545fd in ?? ()
-       #16 0x804daae in ?? ()
-       #17 0x8054334 in ?? ()
-       #18 0x804d23e in ?? ()
-       #19 0x8049632 in ?? ()
-       #20 0x80491d2 in ?? ()
-       #21 0x80596b5 in ?? ()
-       (gdb) p (void *)1342179328
-       $3 = (void *) 0x50000800
-
-
-
-
-
-  Going up the stack to the segv_handler frame and looking at where in
-  the code the access happened shows that it happened near line 110 of
-  block_dev.c:
-
-
-
-
-
-
-
-
-
-  (gdb) up
-  #1  0x1007584d in __sleep (seconds=1000000)
-      at ../sysdeps/unix/sysv/linux/sleep.c:78
-  ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory.
-  (gdb)
-  #2  0x1006ce9a in stop () at user_util.c:191
-  191       while(1) sleep(1000000);
-  (gdb)
-  #3  0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
-  31          KERN_UNTESTED();
-  (gdb)
-  #4  0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
-  174       segv(sc->cr2, sc->err & 2);
-  (gdb) p *sc
-  $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
-    __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484,
-    esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14,
-    err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070,
-    esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
-    cr2 = 1342179328}
-  (gdb) p (void *)268550834
-  $2 = (void *) 0x1001c2b2
-  (gdb) i sym $2
-  block_write + 1090 in section .text
-  (gdb) i line *$2
-  Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h"
-     starts at address 0x1001c2a1 <block_write+1073>
-     and ends at 0x1001c2bf <block_write+1103>.
-  (gdb) i line *0x1001c2c0
-  Line 110 of "block_dev.c" starts at address 0x1001c2bf <block_write+1103>
-     and ends at 0x1001c2e3 <block_write+1139>.
-
-
-
-
-
-  Looking at the source shows that the fault happened during a call to
-  copy_from_user to copy the data into the kernel:
-
-
-       107             count -= chars;
-       108             copy_from_user(p,buf,chars);
-       109             p += chars;
-       110             buf += chars;
-
-
-
-
-
-  p is the pointer which must contain 0x50000800, since buf contains
-  0x80b8800 (frame 8 above).  It is defined as:
-
-
-                       p = offset + bh->b_data;
-
-
-
-
-
-  I need to figure out what bh is, and it just so happens that bh is
-  passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a
-  few lines later, so I do a little disassembly:
-
-
-
-
-  (gdb) disas 0x1001c2bf 0x1001c2e0
-  Dump of assembler code from 0x1001c2bf to 0x1001c2d0:
-  0x1001c2bf <block_write+1103>:  addl   %eax,0xc(%ebp)
-  0x1001c2c2 <block_write+1106>:  movl   0xfffffdd4(%ebp),%edx
-  0x1001c2c8 <block_write+1112>:  btsl   $0x0,0x18(%edx)
-  0x1001c2cd <block_write+1117>:  btsl   $0x1,0x18(%edx)
-  0x1001c2d2 <block_write+1122>:  sbbl   %ecx,%ecx
-  0x1001c2d4 <block_write+1124>:  testl  %ecx,%ecx
-  0x1001c2d6 <block_write+1126>:  jne    0x1001c2e3 <block_write+1139>
-  0x1001c2d8 <block_write+1128>:  pushl  $0x0
-  0x1001c2da <block_write+1130>:  pushl  %edx
-  0x1001c2db <block_write+1131>:  call   0x1001819c <__mark_buffer_dirty>
-  End of assembler dump.
-
-
-
-
-
-  At that point, bh is in %edx (address 0x1001c2da), which is calculated
-  at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is,
-  taking %ebp from the sigcontext_struct above:
-
-
-       (gdb) p (void *)1342631484
-       $5 = (void *) 0x5006ee3c
-       (gdb) p 0x5006ee3c+0xfffffdd4
-       $6 = 1342630928
-       (gdb) p (void *)$6
-       $7 = (void *) 0x5006ec10
-       (gdb) p *((void **)$7)
-       $8 = (void *) 0x50100200
-
-
-
-
-
-  Now, I look at the structure to see what's in it, and particularly,
-  what its b_data field contains:
-
-
-       (gdb) p *((struct buffer_head *)0x50100200)
-       $13 = {b_next = 0x50289380, b_blocknr = 49405, b_size = 1024, b_list = 0,
-         b_dev = 15872, b_count = {counter = 1}, b_rdev = 15872, b_state = 24,
-         b_flushtime = 0, b_next_free = 0x501001a0, b_prev_free = 0x50100260,
-         b_this_page = 0x501001a0, b_reqnext = 0x0, b_pprev = 0x507fcf58,
-         b_data = 0x50000800 "", b_page = 0x50004000,
-         b_end_io = 0x10017f60 <end_buffer_io_sync>, b_dev_id = 0x0,
-         b_rsector = 98810, b_wait = {lock = <optimized out or zero length>,
-           task_list = {next = 0x50100248, prev = 0x50100248}, __magic = 1343226448,
-           __creator = 0}, b_kiobuf = 0x0}
-
-
-
-
-
-  The b_data field is indeed 0x50000800, so the question becomes how
-  that happened.  The rest of the structure looks fine, so this probably
-  is not a case of data corruption.  It happened on purpose somehow.
-
-
-  The b_page field is a pointer to the page_struct representing the
-  0x50000000 page.  Looking at it shows the kernel's idea of the state
-  of that page:
-
-
-
-  (gdb) p *$13.b_page
-  $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0,
-    index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = {
-      next = 0x50008460, prev = 0x50019350}, wait = {
-      lock = <optimized out or zero length>, task_list = {next = 0x50004024,
-        prev = 0x50004024}, __magic = 1342193708, __creator = 0},
-    pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280,
-    zone = 0x100c5160}
-
-
-
-
-
-  Some sanity-checking: the virtual field shows the "virtual" address of
-  this page, which in this kernel is the same as its "physical" address,
-  and the page_struct itself should be mem_map[0], since it represents
-  the first page of memory:
-
-
-
-       (gdb) p (void *)1342177280
-       $18 = (void *) 0x50000000
-       (gdb) p mem_map
-       $19 = (mem_map_t *) 0x50004000
-
-
-
-
-
-  These check out fine.
-
-
-  Now to check out the page_struct itself.  In particular, the flags
-  field shows whether the page is considered free or not:
-
-
-       (gdb) p (void *)132
-       $21 = (void *) 0x84
-
-
-
-
-
-  The "reserved" bit is the high bit, which is definitely not set, so
-  the kernel considers the signal stack page to be free and available to
-  be used.
-
-
-  At this point, I jump to conclusions and start looking at my early
-  boot code, because that's where that page is supposed to be reserved.
-
-
-  In my setup_arch procedure, I have the following code which looks just
-  fine:
-
-
-
-       bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn);
-       free_bootmem(__pa(low_physmem) + bootmap_size, high_physmem - low_physmem);
-
-
-
-
-
-  Two stack pages have already been allocated, and low_physmem points to
-  the third page, which is the beginning of free memory.
-  The init_bootmem call declares the entire memory to the boot memory
-  manager, which marks it all reserved.  The free_bootmem call frees up
-  all of it, except for the first two pages.  This looks correct to me.
-
-
-  So, I decide to see init_bootmem run and make sure that it is marking
-  those first two pages as reserved.  I never get that far.
-
-
-  Stepping into init_bootmem, and looking at bootmem_map before looking
-  at what it contains shows the following:
-
-
-
-       (gdb) p bootmem_map
-       $3 = (void *) 0x50000000
-
-
-
-
-
-  Aha!  The light dawns.  That first page is doing double duty as a
-  stack and as the boot memory map.  The last thing that the boot memory
-  manager does is to free the pages used by its memory map, so this page
-  is getting freed even its marked as reserved.
-
-
-  The fix was to initialize the boot memory manager before allocating
-  those two stack pages, and then allocate them through the boot memory
-  manager.  After doing this, and fixing a couple of subsequent buglets,
-  the stack corruption problem disappeared.
-
-
-
-
-
-  13.  What to do when UML doesn't work
-
-
-
-
-  13.1.  Strange compilation errors when you build from source
-
-  As of test11, it is necessary to have "ARCH=um" in the environment or
-  on the make command line for all steps in building UML, including
-  clean, distclean, or mrproper, config, menuconfig, or xconfig, dep,
-  and linux.  If you forget for any of them, the i386 build seems to
-  contaminate the UML build.  If this happens, start from scratch with
-
-
-       host%
-       make mrproper ARCH=um
-
-
-
-
-  and repeat the build process with ARCH=um on all the steps.
-
-
-  See ``Compiling the kernel and modules''  for more details.
-
-
-  Another cause of strange compilation errors is building UML in
-  /usr/src/linux.  If you do this, the first thing you need to do is
-  clean up the mess you made.  The /usr/src/linux/asm link will now
-  point to /usr/src/linux/asm-um.  Make it point back to
-  /usr/src/linux/asm-i386.  Then, move your UML pool someplace else and
-  build it there.  Also see below, where a more specific set of symptoms
-  is described.
-
-
-
-  13.3.  A variety of panics and hangs with /tmp on a reiserfs  filesys-
-  tem
-
-  I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
-  Panics preceded by
-
-
-       Detaching pid nnnn
-
-
-
-  are diagnostic of this problem.  This is a reiserfs bug which causes a
-  thread to occasionally read stale data from a mmapped page shared with
-  another thread.  The fix is to upgrade the filesystem or to have /tmp
-  be an ext2 filesystem.
-
-
-
-  13.4.  The compile fails with errors about conflicting types for
-  'open', 'dup', and 'waitpid'
-
-  This happens when you build in /usr/src/linux.  The UML build makes
-  the include/asm link point to include/asm-um.  /usr/include/asm points
-  to /usr/src/linux/include/asm, so when that link gets moved, files
-  which need to include the asm-i386 versions of headers get the
-  incompatible asm-um versions.  The fix is to move the include/asm link
-  back to include/asm-i386 and to do UML builds someplace else.
-
-
-
-  13.5.  UML doesn't work when /tmp is an NFS filesystem
-
-  This seems to be a similar situation with the ReiserFS problem above.
-  Some versions of NFS seems not to handle mmap correctly, which UML
-  depends on.  The workaround is have /tmp be a non-NFS directory.
-
-
-  13.6.  UML hangs on boot when compiled with gprof support
-
-  If you build UML with gprof support and, early in the boot, it does
-  this
-
-
-       kernel BUG at page_alloc.c:100!
-
-
-
-
-  you have a buggy gcc.  You can work around the problem by removing
-  UM_FASTCALL from CFLAGS in arch/um/Makefile-i386.  This will open up
-  another bug, but that one is fairly hard to reproduce.
-
-
-
-  13.7.  syslogd dies with a SIGTERM on startup
-
-  The exact boot error depends on the distribution that you're booting,
-  but Debian produces this:
-
-
-       /etc/rc2.d/S10sysklogd: line 49:    93 Terminated
-       start-stop-daemon --start --quiet --exec /sbin/syslogd -- $SYSLOGD
-
-
-
-
-  This is a syslogd bug.  There's a race between a parent process
-  installing a signal handler and its child sending the signal.  See
-  this uml-devel post <http://www.geocrawler.com/lists/3/Source-
-  Forge/709/0/6612801>  for the details.
-
-
-
-  13.8.  TUN/TAP networking doesn't work on a 2.4 host
-
-  There are a couple of problems which were
-  <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
-  out">  by Tim Robinson <timro at trkr dot net>
-
-  o  It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
-     The fix is to upgrade to something more recent and then read the
-     next item.
-
-  o  If you see
-
-
-       File descriptor in bad state
-
-
-
-  when you bring up the device inside UML, you have a header mismatch
-  between the original kernel and the upgraded one.  Make /usr/src/linux
-  point at the new headers.  This will only be a problem if you build
-  uml_net yourself.
-
-
-
-  13.9.  You can network to the host but not to other machines on the
-  net
-
-  If you can connect to the host, and the host can connect to UML, but
-  you cannot connect to any other machines, then you may need to enable
-  IP Masquerading on the host.  Usually this is only experienced when
-  using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML
-  networking, rather than the public address space that your host is
-  connected to.  UML does not enable IP Masquerading, so you will need
-  to create a static rule to enable it:
-
-
-       host%
-       iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
-
-
-
-
-  Replace eth0 with the interface that you use to talk to the rest of
-  the world.
-
-
-  Documentation on IP Masquerading, and SNAT, can be found at
-  www.netfilter.org  <http://www.netfilter.org> .
-
-
-  If you can reach the local net, but not the outside Internet, then
-  that is usually a routing problem.  The UML needs a default route:
-
-
-       UML#
-       route add default gw gateway IP
-
-
-
-
-  The gateway IP can be any machine on the local net that knows how to
-  reach the outside world.  Usually, this is the host or the local net-
-  work's gateway.
-
-
-  Occasionally, we hear from someone who can reach some machines, but
-  not others on the same net, or who can reach some ports on other
-  machines, but not others.  These are usually caused by strange
-  firewalling somewhere between the UML and the other box.  You track
-  this down by running tcpdump on every interface the packets travel
-  over and see where they disappear.  When you find a machine that takes
-  the packets in, but does not send them onward, that's the culprit.
-
-
-
-  13.10.  I have no root and I want to scream
-
-  Thanks to Birgit Wahlich for telling me about this strange one.  It
-  turns out that there's a limit of six environment variables on the
-  kernel command line.  When that limit is reached or exceeded, argument
-  processing stops, which means that the 'root=' argument that UML
-  usually adds is not seen.  So, the filesystem has no idea what the
-  root device is, so it panics.
-
-
-  The fix is to put less stuff on the command line.  Glomming all your
-  setup variables into one is probably the best way to go.
-
-
-
-  13.11.  UML build conflict between ptrace.h and ucontext.h
-
-  On some older systems, /usr/include/asm/ptrace.h and
-  /usr/include/sys/ucontext.h define the same names.  So, when they're
-  included together, the defines from one completely mess up the parsing
-  of the other, producing errors like:
-       /usr/include/sys/ucontext.h:47: parse error before
-       `10'
-
-
-
-
-  plus a pile of warnings.
-
-
-  This is a libc botch, which has since been fixed, and I don't see any
-  way around it besides upgrading.
-
-
-
-  13.12.  The UML BogoMips is exactly half the host's BogoMips
-
-  On i386 kernels, there are two ways of running the loop that is used
-  to calculate the BogoMips rating, using the TSC if it's there or using
-  a one-instruction loop.  The TSC produces twice the BogoMips as the
-  loop.  UML uses the loop, since it has nothing resembling a TSC, and
-  will get almost exactly the same BogoMips as a host using the loop.
-  However, on a host with a TSC, its BogoMips will be double the loop
-  BogoMips, and therefore double the UML BogoMips.
-
-
-
-  13.13.  When you run UML, it immediately segfaults
-
-  If the host is configured with the 2G/2G address space split, that's
-  why.  See ``UML on 2G/2G hosts''  for the details on getting UML to
-  run on your host.
-
-
-
-  13.14.  xterms appear, then immediately disappear
-
-  If you're running an up to date kernel with an old release of
-  uml_utilities, the port-helper program will not work properly, so
-  xterms will exit straight after they appear. The solution is to
-  upgrade to the latest release of uml_utilities.  Usually this problem
-  occurs when you have installed a packaged release of UML then compiled
-  your own development kernel without upgrading the uml_utilities from
-  the source distribution.
-
-
-
-  13.15.  Any other panic, hang, or strange behavior
-
-  If you're seeing truly strange behavior, such as hangs or panics that
-  happen in random places, or you try running the debugger to see what's
-  happening and it acts strangely, then it could be a problem in the
-  host kernel.  If you're not running a stock Linus or -ac kernel, then
-  try that.  An early version of the preemption patch and a 2.4.10 SuSE
-  kernel have caused very strange problems in UML.
-
-
-  Otherwise, let me know about it.  Send a message to one of the UML
-  mailing lists - either the developer list - user-mode-linux-devel at
-  lists dot sourceforge dot net (subscription info) or the user list -
-  user-mode-linux-user at lists dot sourceforge do net (subscription
-  info), whichever you prefer.  Don't assume that everyone knows about
-  it and that a fix is imminent.
-
-
-  If you want to be super-helpful, read ``Diagnosing Problems'' and
-  follow the instructions contained therein.
-  14.  Diagnosing Problems
-
-
-  If you get UML to crash, hang, or otherwise misbehave, you should
-  report this on one of the project mailing lists, either the developer
-  list - user-mode-linux-devel at lists dot sourceforge dot net
-  (subscription info) or the user list - user-mode-linux-user at lists
-  dot sourceforge dot net (subscription info).  When you do, it is
-  likely that I will want more information.  So, it would be helpful to
-  read the stuff below, do whatever is applicable in your case, and
-  report the results to the list.
-
-
-  For any diagnosis, you're going to need to build a debugging kernel.
-  The binaries from this site aren't debuggable.  If you haven't done
-  this before, read about ``Compiling the kernel and modules''  and
-  ``Kernel debugging''  UML first.
-
-
-  14.1.  Case 1 : Normal kernel panics
-
-  The most common case is for a normal thread to panic.  To debug this,
-  you will need to run it under the debugger (add 'debug' to the command
-  line).  An xterm will start up with gdb running inside it.  Continue
-  it when it stops in start_kernel and make it crash.  Now ^C gdb and
-
-
-  If the panic was a "Kernel mode fault", then there will be a segv
-  frame on the stack and I'm going to want some more information.  The
-  stack might look something like this:
-
-
-       (UML gdb)  backtrace
-       #0  0x1009bf76 in __sigprocmask (how=1, set=0x5f347940, oset=0x0)
-           at ../sysdeps/unix/sysv/linux/sigprocmask.c:49
-       #1  0x10091411 in change_sig (signal=10, on=1) at process.c:218
-       #2  0x10094785 in timer_handler (sig=26) at time_kern.c:32
-       #3  0x1009bf38 in __restore ()
-           at ../sysdeps/unix/sysv/linux/i386/sigaction.c:125
-       #4  0x1009534c in segv (address=8, ip=268849158, is_write=2, is_user=0)
-           at trap_kern.c:66
-       #5  0x10095c04 in segv_handler (sig=11) at trap_user.c:285
-       #6  0x1009bf38 in __restore ()
-
-
-
-
-  I'm going to want to see the symbol and line information for the value
-  of ip in the segv frame.  In this case, you would do the following:
-
-
-       (UML gdb)  i sym 268849158
-
-
-
-
-  and
-
-
-       (UML gdb)  i line *268849158
-
-
-
-
-  The reason for this is the __restore frame right above the segv_han-
-  dler frame is hiding the frame that actually segfaulted.  So, I have
-  to get that information from the faulting ip.
-
-
-  14.2.  Case 2 : Tracing thread panics
-
-  The less common and more painful case is when the tracing thread
-  panics.  In this case, the kernel debugger will be useless because it
-  needs a healthy tracing thread in order to work.  The first thing to
-  do is get a backtrace from the tracing thread.  This is done by
-  figuring out what its pid is, firing up gdb, and attaching it to that
-  pid.  You can figure out the tracing thread pid by looking at the
-  first line of the console output, which will look like this:
-
-
-       tracing thread pid = 15851
-
-
-
-
-  or by running ps on the host and finding the line that looks like
-  this:
-
-
-       jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)]
-
-
-
-
-  If the panic was 'segfault in signals', then follow the instructions
-  above for collecting information about the location of the seg fault.
-
-
-  If the tracing thread flaked out all by itself, then send that
-  backtrace in and wait for our crack debugging team to fix the problem.
-
-
-  14.3.  Case 3 : Tracing thread panics caused by other threads
-
-  However, there are cases where the misbehavior of another thread
-  caused the problem.  The most common panic of this type is:
-
-
-       wait_for_stop failed to wait for  <pid>  to stop with  <signal number>
-
-
-
-
-  In this case, you'll need to get a backtrace from the process men-
-  tioned in the panic, which is complicated by the fact that the kernel
-  debugger is defunct and without some fancy footwork, another gdb can't
-  attach to it.  So, this is how the fancy footwork goes:
-
-  In a shell:
-
-
-       host% kill -STOP pid
-
-
-
-
-  Run gdb on the tracing thread as described in case 2 and do:
-
-
-       (host gdb)  call detach(pid)
-
-
-  If you get a segfault, do it again.  It always works the second time.
-
-  Detach from the tracing thread and attach to that other thread:
-
-
-       (host gdb)  detach
-
-
-
-
-
-
-       (host gdb)  attach pid
-
-
-
-
-  If gdb hangs when attaching to that process, go back to a shell and
-  do:
-
-
-       host%
-       kill -CONT pid
-
-
-
-
-  And then get the backtrace:
-
-
-       (host gdb)  backtrace
-
-
-
-
-
-  14.4.  Case 4 : Hangs
-
-  Hangs seem to be fairly rare, but they sometimes happen.  When a hang
-  happens, we need a backtrace from the offending process.  Run the
-  kernel debugger as described in case 1 and get a backtrace.  If the
-  current process is not the idle thread, then send in the backtrace.
-  You can tell that it's the idle thread if the stack looks like this:
-
-
-       #0  0x100b1401 in __libc_nanosleep ()
-       #1  0x100a2885 in idle_sleep (secs=10) at time.c:122
-       #2  0x100a546f in do_idle () at process_kern.c:445
-       #3  0x100a5508 in cpu_idle () at process_kern.c:471
-       #4  0x100ec18f in start_kernel () at init/main.c:592
-       #5  0x100a3e10 in start_kernel_proc (unused=0x0) at um_arch.c:71
-       #6  0x100a383f in signal_tramp (arg=0x100a3dd8) at trap_user.c:50
-
-
-
-
-  If this is the case, then some other process is at fault, and went to
-  sleep when it shouldn't have.  Run ps on the host and figure out which
-  process should not have gone to sleep and stayed asleep.  Then attach
-  to it with gdb and get a backtrace as described in case 3.
-
-
-
-
-
-
-  15.  Thanks
-
-
-  A number of people have helped this project in various ways, and this
-  page gives recognition where recognition is due.
-
-
-  If you're listed here and you would prefer a real link on your name,
-  or no link at all, instead of the despammed email address pseudo-link,
-  let me know.
-
-
-  If you're not listed here and you think maybe you should be, please
-  let me know that as well.  I try to get everyone, but sometimes my
-  bookkeeping lapses and I forget about contributions.
-
-
-  15.1.  Code and Documentation
-
-  Rusty Russell <rusty at linuxcare.com.au>  -
-
-  o  wrote the  HOWTO <http://user-mode-
-     linux.sourceforge.net/UserModeLinux-HOWTO.html>
-
-  o  prodded me into making this project official and putting it on
-     SourceForge
-
-  o  came up with the way cool UML logo <http://user-mode-
-     linux.sourceforge.net/uml-small.png>
-
-  o  redid the config process
-
-
-  Peter Moulder <reiter at netspace.net.au>  - Fixed my config and build
-  processes, and added some useful code to the block driver
-
-
-  Bill Stearns <wstearns at pobox.com>  -
-
-  o  HOWTO updates
-
-  o  lots of bug reports
-
-  o  lots of testing
-
-  o  dedicated a box (uml.ists.dartmouth.edu) to support UML development
-
-  o  wrote the mkrootfs script, which allows bootable filesystems of
-     RPM-based distributions to be cranked out
-
-  o  cranked out a large number of filesystems with said script
-
-
-  Jim Leu <jleu at mindspring.com>  - Wrote the virtual ethernet driver
-  and associated usermode tools
-
-  Lars Brinkhoff <http://lars.nocrew.org/>  - Contributed the ptrace
-  proxy from his own  project <http://a386.nocrew.org/> to allow easier
-  kernel debugging
-
-
-  Andrea Arcangeli <andrea at suse.de>  - Redid some of the early boot
-  code so that it would work on machines with Large File Support
-
-
-  Chris Emerson <http://www.chiark.greenend.org.uk/~cemerson/>  - Did
-  the first UML port to Linux/ppc
-
-
-  Harald Welte <laforge at gnumonks.org>  - Wrote the multicast
-  transport for the network driver
-
-
-  Jorgen Cederlof - Added special file support to hostfs
-
-
-  Greg Lonnon  <glonnon at ridgerun dot com>  - Changed the ubd driver
-  to allow it to layer a COW file on a shared read-only filesystem and
-  wrote the iomem emulation support
-
-
-  Henrik Nordstrom <http://hem.passagen.se/hno/>  - Provided a variety
-  of patches, fixes, and clues
-
-
-  Lennert Buytenhek - Contributed various patches, a rewrite of the
-  network driver, the first implementation of the mconsole driver, and
-  did the bulk of the work needed to get SMP working again.
-
-
-  Yon Uriarte - Fixed the TUN/TAP network backend while I slept.
-
-
-  Adam Heath - Made a bunch of nice cleanups to the initialization code,
-  plus various other small patches.
-
-
-  Matt Zimmerman - Matt volunteered to be the UML Debian maintainer and
-  is doing a real nice job of it.  He also noticed and fixed a number of
-  actually and potentially exploitable security holes in uml_net.  Plus
-  the occasional patch.  I like patches.
-
-
-  James McMechan - James seems to have taken over maintenance of the ubd
-  driver and is doing a nice job of it.
-
-
-  Chandan Kudige - wrote the umlgdb script which automates the reloading
-  of module symbols.
-
-
-  Steve Schmidtke - wrote the UML slirp transport and hostaudio drivers,
-  enabling UML processes to access audio devices on the host. He also
-  submitted patches for the slip transport and lots of other things.
-
-
-  David Coulson <http://davidcoulson.net>  -
-
-  o  Set up the usermodelinux.org <http://usermodelinux.org>  site,
-     which is a great way of keeping the UML user community on top of
-     UML goings-on.
-
-  o  Site documentation and updates
-
-  o  Nifty little UML management daemon  UMLd
-     <http://uml.openconsultancy.com/umld/>
-
-  o  Lots of testing and bug reports
-
-
-
-
-  15.2.  Flushing out bugs
-
-
-
-  o  Yuri Pudgorodsky
-
-  o  Gerald Britton
-
-  o  Ian Wehrman
-
-  o  Gord Lamb
-
-  o  Eugene Koontz
-
-  o  John H. Hartman
-
-  o  Anders Karlsson
-
-  o  Daniel Phillips
-
-  o  John Fremlin
-
-  o  Rainer Burgstaller
-
-  o  James Stevenson
-
-  o  Matt Clay
-
-  o  Cliff Jefferies
-
-  o  Geoff Hoff
-
-  o  Lennert Buytenhek
-
-  o  Al Viro
-
-  o  Frank Klingenhoefer
-
-  o  Livio Baldini Soares
-
-  o  Jon Burgess
-
-  o  Petru Paler
-
-  o  Paul
-
-  o  Chris Reahard
-
-  o  Sverker Nilsson
-
-  o  Gong Su
-
-  o  johan verrept
-
-  o  Bjorn Eriksson
-
-  o  Lorenzo Allegrucci
-
-  o  Muli Ben-Yehuda
-
-  o  David Mansfield
-
-  o  Howard Goff
-
-  o  Mike Anderson
-
-  o  John Byrne
-
-  o  Sapan J. Batia
-
-  o  Iris Huang
-
-  o  Jan Hudec
-
-  o  Voluspa
-
-
-
-
-  15.3.  Buglets and clean-ups
-
-
-
-  o  Dave Zarzycki
-
-  o  Adam Lazur
-
-  o  Boria Feigin
-
-  o  Brian J. Murrell
-
-  o  JS
-
-  o  Roman Zippel
-
-  o  Wil Cooley
-
-  o  Ayelet Shemesh
-
-  o  Will Dyson
-
-  o  Sverker Nilsson
-
-  o  dvorak
-
-  o  v.naga srinivas
-
-  o  Shlomi Fish
-
-  o  Roger Binns
-
-  o  johan verrept
-
-  o  MrChuoi
-
-  o  Peter Cleve
-
-  o  Vincent Guffens
-
-  o  Nathan Scott
-
-  o  Patrick Caulfield
-
-  o  jbearce
-
-  o  Catalin Marinas
-
-  o  Shane Spencer
-
-  o  Zou Min
-
-
-  o  Ryan Boder
-
-  o  Lorenzo Colitti
-
-  o  Gwendal Grignou
-
-  o  Andre' Breiler
-
-  o  Tsutomu Yasuda
-
-
-
-  15.4.  Case Studies
-
-
-  o  Jon Wright
-
-  o  William McEwan
-
-  o  Michael Richardson
-
-
-
-  15.5.  Other contributions
-
-
-  Bill Carr <Bill.Carr at compaq.com>  made the Red Hat mkrootfs script
-  work with RH 6.2.
-
-  Michael Jennings <mikejen at hevanet.com>  sent in some material which
-  is now gracing the top of the  index  page <http://user-mode-
-  linux.sourceforge.net/>  of this site.
-
-  SGI <http://www.sgi.com>  (and more specifically Ralf Baechle <ralf at
-  uni-koblenz.de> ) gave me an account on oss.sgi.com
-  <http://www.oss.sgi.com> .  The bandwidth there made it possible to
-  produce most of the filesystems available on the project download
-  page.
-
-  Laurent Bonnaud <Laurent.Bonnaud at inpg.fr>  took the old grotty
-  Debian filesystem that I've been distributing and updated it to 2.2.
-  It is now available by itself here.
-
-  Rik van Riel gave me some ftp space on ftp.nl.linux.org so I can make
-  releases even when Sourceforge is broken.
-
-  Rodrigo de Castro looked at my broken pte code and told me what was
-  wrong with it, letting me fix a long-standing (several weeks) and
-  serious set of bugs.
-
-  Chris Reahard built a specialized root filesystem for running a DNS
-  server jailed inside UML.  It's available from the download
-  <http://user-mode-linux.sourceforge.net/dl-sf.html>  page in the Jail
-  Filesystems section.
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/MAINTAINERS b/MAINTAINERS
index debbb7b97c98..1aec93695040 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8727,7 +8727,7 @@ L:	kvm@vger.kernel.org
 W:	http://www.linux-kvm.org
 T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
 S:	Supported
-F:	Documentation/virtual/kvm/
+F:	Documentation/virt/kvm/
 F:	include/trace/events/kvm.h
 F:	include/uapi/asm-generic/kvm*
 F:	include/uapi/linux/kvm*
@@ -12054,7 +12054,7 @@ M:	Juergen Gross <jgross@suse.com>
 M:	Alok Kataria <akataria@vmware.com>
 L:	virtualization@lists.linux-foundation.org
 S:	Supported
-F:	Documentation/virtual/paravirt_ops.txt
+F:	Documentation/virt/paravirt_ops.txt
 F:	arch/*/kernel/paravirt*
 F:	arch/*/include/asm/paravirt*.h
 F:	include/linux/hypervisor.h
@@ -16745,7 +16745,7 @@ W:	http://user-mode-linux.sourceforge.net
 Q:	https://patchwork.ozlabs.org/project/linux-um/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git
 S:	Maintained
-F:	Documentation/virtual/uml/
+F:	Documentation/virt/uml/
 F:	arch/um/
 F:	arch/x86/um/
 F:	fs/hostfs/
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 01555c6ae0f5..be48c2215fa2 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -31,7 +31,7 @@
  * Struct fields are always 32 or 64 bit aligned, depending on them being 32
  * or 64 bit wide respectively.
  *
- * See Documentation/virtual/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.txt
  */
 struct kvm_vcpu_arch_shared {
 	__u64 scratch1;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8f72526e2f68..24843cf49579 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3466,7 +3466,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 		/*
 		 * Currently, fast page fault only works for direct mapping
 		 * since the gfn is not stable for indirect shadow page. See
-		 * Documentation/virtual/kvm/locking.txt to get more detail.
+		 * Documentation/virt/kvm/locking.txt to get more detail.
 		 */
 		fault_handled = fast_pf_fix_direct_spte(vcpu, sp,
 							iterator.sptep, spte,
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index a7c19540ce21..5e3f12d5359e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virtual/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.txt
 	 */
 	union {
 		__u32 irq;
@@ -1086,7 +1086,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index c2152f3dd02d..e7c67be7c15f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
-	 * For ARM: See Documentation/virtual/kvm/api.txt
+	 * For ARM: See Documentation/virt/kvm/api.txt
 	 */
 	union {
 		__u32 irq;
@@ -1085,7 +1085,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emulation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virt/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f645c0fbf7ec..acc43242a310 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -727,7 +727,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		 * Ensure we set mode to IN_GUEST_MODE after we disable
 		 * interrupts and before the final VCPU requests check.
 		 * See the comment in kvm_vcpu_exiting_guest_mode() and
-		 * Documentation/virtual/kvm/vcpu-requests.rst
+		 * Documentation/virt/kvm/vcpu-requests.rst
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 936962abc38d..c45e2d7e942f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -250,7 +250,7 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
 	 * pending state of interrupt is latched in pending_latch variable.
 	 * Userspace will save and restore pending state and line_level
 	 * separately.
-	 * Refer to Documentation/virtual/kvm/devices/arm-vgic-v3.txt
+	 * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.txt
 	 * for handling of ISPENDR and ICPENDR.
 	 */
 	for (i = 0; i < len * 8; i++) {
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 57205beaa981..3b7525deec80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -42,7 +42,7 @@
 			    VGIC_AFFINITY_LEVEL(val, 3))
 
 /*
- * As per Documentation/virtual/kvm/devices/arm-vgic-v3.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-v3.txt,
  * below macros are defined for CPUREG encoding.
  */
 #define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK   0x000000000000c000
@@ -63,7 +63,7 @@
 				      KVM_REG_ARM_VGIC_SYSREG_OP2_MASK)
 
 /*
- * As per Documentation/virtual/kvm/devices/arm-vgic-its.txt,
+ * As per Documentation/virt/kvm/devices/arm-vgic-its.txt,
  * below macros are defined for ITS table entry encoding.
  */
 #define KVM_ITS_CTE_VALID_SHIFT		63
-- 
cgit v1.2.3


From 6d1bcb957be2850e0776f24c289e1f87c256baeb Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:43:07 +0100
Subject: iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops

Commit add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing")
added three new TLB flushing operations to the IOMMU API so that the
underlying driver operations can be batched when unmapping large regions
of IO virtual address space.

However, the ->iotlb_range_add() callback has not been implemented by
any IOMMU drivers (amd_iommu.c implements it as an empty function, which
incurs the overhead of an indirect branch). Instead, drivers either flush
the entire IOTLB in the ->iotlb_sync() callback or perform the necessary
invalidation during ->unmap().

Attempting to implement ->iotlb_range_add() for arm-smmu-v3.c revealed
two major issues:

  1. The page size used to map the region in the page-table is not known,
     and so it is not generally possible to issue TLB flushes in the most
     efficient manner.

  2. The only mutable state passed to the callback is a pointer to the
     iommu_domain, which can be accessed concurrently and therefore
     requires expensive synchronisation to keep track of the outstanding
     flushes.

Remove the callback entirely in preparation for extending ->unmap() and
->iotlb_sync() to update a token on the caller's stack.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/amd_iommu.c       |  6 ------
 drivers/iommu/iommu.c           |  3 ---
 drivers/vfio/vfio_iommu_type1.c |  1 -
 include/linux/iommu.h           | 15 ---------------
 4 files changed, 25 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b607a92791d3..f93b148cf55e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3196,11 +3196,6 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 	domain_flush_complete(dom);
 }
 
-static void amd_iommu_iotlb_range_add(struct iommu_domain *domain,
-				      unsigned long iova, size_t size)
-{
-}
-
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3219,7 +3214,6 @@ const struct iommu_ops amd_iommu_ops = {
 	.is_attach_deferred = amd_iommu_is_attach_deferred,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
-	.iotlb_range_add = amd_iommu_iotlb_range_add,
 	.iotlb_sync = amd_iommu_flush_iotlb_all,
 };
 
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0c674d80c37f..6d7b25fe2474 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1903,9 +1903,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 		if (!unmapped_page)
 			break;
 
-		if (sync && ops->iotlb_range_add)
-			ops->iotlb_range_add(domain, iova, pgsize);
-
 		pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
 			 iova, unmapped_page);
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 054391f30fa8..fad7fd8c167c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -696,7 +696,6 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
 		if (!unmapped) {
 			kfree(entry);
 		} else {
-			iommu_tlb_range_add(domain->domain, *iova, unmapped);
 			entry->iova = *iova;
 			entry->phys = phys;
 			entry->len  = unmapped;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index fdc355ccc570..1e21431262d9 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -201,7 +201,6 @@ struct iommu_sva_ops {
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
- * @iotlb_range_add: Add a given iova range to the flush queue for this domain
  * @iotlb_sync_map: Sync mappings created recently using @map to the hardware
  * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
  *            queue
@@ -244,8 +243,6 @@ struct iommu_ops {
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
-	void (*iotlb_range_add)(struct iommu_domain *domain,
-				unsigned long iova, size_t size);
 	void (*iotlb_sync_map)(struct iommu_domain *domain);
 	void (*iotlb_sync)(struct iommu_domain *domain);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
@@ -476,13 +473,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
 		domain->ops->flush_iotlb_all(domain);
 }
 
-static inline void iommu_tlb_range_add(struct iommu_domain *domain,
-				       unsigned long iova, size_t size)
-{
-	if (domain->ops->iotlb_range_add)
-		domain->ops->iotlb_range_add(domain, iova, size);
-}
-
 static inline void iommu_tlb_sync(struct iommu_domain *domain)
 {
 	if (domain->ops->iotlb_sync)
@@ -637,11 +627,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
 {
 }
 
-static inline void iommu_tlb_range_add(struct iommu_domain *domain,
-				       unsigned long iova, size_t size)
-{
-}
-
 static inline void iommu_tlb_sync(struct iommu_domain *domain)
 {
 }
-- 
cgit v1.2.3


From 298f78895b081911e0b3605f07d79ebd3d4cf7b0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:43:34 +0100
Subject: iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops

In preparation for TLB flush gathering in the IOMMU API, rename the
iommu_gather_ops structure in io-pgtable to iommu_flush_ops, which
better describes its purpose and avoids the potential for confusion
between different levels of the API.

$ find linux/ -type f -name '*.[ch]' | xargs sed -i 's/gather_ops/flush_ops/g'

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
 drivers/iommu/arm-smmu-v3.c             | 4 ++--
 drivers/iommu/arm-smmu.c                | 8 ++++----
 drivers/iommu/io-pgtable-arm-v7s.c      | 2 +-
 drivers/iommu/io-pgtable-arm.c          | 2 +-
 drivers/iommu/ipmmu-vmsa.c              | 4 ++--
 drivers/iommu/msm_iommu.c               | 4 ++--
 drivers/iommu/mtk_iommu.c               | 4 ++--
 drivers/iommu/qcom_iommu.c              | 4 ++--
 include/linux/io-pgtable.h              | 6 +++---
 10 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 92ac995dd9c6..17bceb11e708 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -257,7 +257,7 @@ static void mmu_tlb_sync_context(void *cookie)
 	// TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X
 }
 
-static const struct iommu_gather_ops mmu_tlb_ops = {
+static const struct iommu_flush_ops mmu_tlb_ops = {
 	.tlb_flush_all	= mmu_tlb_inv_context_s1,
 	.tlb_add_flush	= mmu_tlb_inv_range_nosync,
 	.tlb_sync	= mmu_tlb_sync_context,
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index a9a9fabd3968..7e137e1e28f1 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1603,7 +1603,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	} while (size -= granule);
 }
 
-static const struct iommu_gather_ops arm_smmu_gather_ops = {
+static const struct iommu_flush_ops arm_smmu_flush_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
@@ -1796,7 +1796,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
 		.ias		= ias,
 		.oas		= oas,
 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
-		.tlb		= &arm_smmu_gather_ops,
+		.tlb		= &arm_smmu_flush_ops,
 		.iommu_dev	= smmu->dev,
 	};
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 64977c131ee6..dc08db347ef3 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -251,7 +251,7 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct io_pgtable_ops		*pgtbl_ops;
-	const struct iommu_gather_ops	*tlb_ops;
+	const struct iommu_flush_ops	*tlb_ops;
 	struct arm_smmu_cfg		cfg;
 	enum arm_smmu_domain_stage	stage;
 	bool				non_strict;
@@ -547,19 +547,19 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
 	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
 }
 
-static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
+static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync_context,
 };
 
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
 	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync_context,
 };
 
-static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
+static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
 	.tlb_add_flush	= arm_smmu_tlb_inv_vmid_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync_vmid,
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index a62733c6a632..116f97ee991e 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -817,7 +817,7 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static const struct iommu_gather_ops dummy_tlb_ops = {
+static const struct iommu_flush_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0d6633921c1e..402f913b6f6d 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1081,7 +1081,7 @@ static void dummy_tlb_sync(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static const struct iommu_gather_ops dummy_tlb_ops __initconst = {
+static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index ad0098c0c87c..2c14a2c65b22 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -367,7 +367,7 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
 	/* The hardware doesn't support selective TLB flush. */
 }
 
-static const struct iommu_gather_ops ipmmu_gather_ops = {
+static const struct iommu_flush_ops ipmmu_flush_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_add_flush = ipmmu_tlb_add_flush,
 	.tlb_sync = ipmmu_tlb_flush_all,
@@ -480,7 +480,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
 	domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K;
 	domain->cfg.ias = 32;
 	domain->cfg.oas = 40;
-	domain->cfg.tlb = &ipmmu_gather_ops;
+	domain->cfg.tlb = &ipmmu_flush_ops;
 	domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32);
 	domain->io_domain.geometry.force_aperture = true;
 	/*
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index b25e2eb9e038..8b602384a385 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -178,7 +178,7 @@ static void __flush_iotlb_sync(void *cookie)
 	 */
 }
 
-static const struct iommu_gather_ops msm_iommu_gather_ops = {
+static const struct iommu_flush_ops msm_iommu_flush_ops = {
 	.tlb_flush_all = __flush_iotlb,
 	.tlb_add_flush = __flush_iotlb_range,
 	.tlb_sync = __flush_iotlb_sync,
@@ -345,7 +345,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv)
 		.pgsize_bitmap = msm_iommu_ops.pgsize_bitmap,
 		.ias = 32,
 		.oas = 32,
-		.tlb = &msm_iommu_gather_ops,
+		.tlb = &msm_iommu_flush_ops,
 		.iommu_dev = priv->dev,
 	};
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 82e4be4dfdaf..fed77658d67e 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -188,7 +188,7 @@ static void mtk_iommu_tlb_sync(void *cookie)
 	}
 }
 
-static const struct iommu_gather_ops mtk_iommu_gather_ops = {
+static const struct iommu_flush_ops mtk_iommu_flush_ops = {
 	.tlb_flush_all = mtk_iommu_tlb_flush_all,
 	.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
 	.tlb_sync = mtk_iommu_tlb_sync,
@@ -267,7 +267,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
 		.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
 		.ias = 32,
 		.oas = 32,
-		.tlb = &mtk_iommu_gather_ops,
+		.tlb = &mtk_iommu_flush_ops,
 		.iommu_dev = data->dev,
 	};
 
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 34d0b9783b3e..fd9d9f4da735 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -164,7 +164,7 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	}
 }
 
-static const struct iommu_gather_ops qcom_gather_ops = {
+static const struct iommu_flush_ops qcom_flush_ops = {
 	.tlb_flush_all	= qcom_iommu_tlb_inv_context,
 	.tlb_add_flush	= qcom_iommu_tlb_inv_range_nosync,
 	.tlb_sync	= qcom_iommu_tlb_sync,
@@ -215,7 +215,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain,
 		.pgsize_bitmap	= qcom_iommu_ops.pgsize_bitmap,
 		.ias		= 32,
 		.oas		= 40,
-		.tlb		= &qcom_gather_ops,
+		.tlb		= &qcom_flush_ops,
 		.iommu_dev	= qcom_iommu->dev,
 	};
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b5a450a3bb47..6292ea15d674 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -17,7 +17,7 @@ enum io_pgtable_fmt {
 };
 
 /**
- * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management.
+ * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management.
  *
  * @tlb_flush_all: Synchronously invalidate the entire TLB context.
  * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
@@ -28,7 +28,7 @@ enum io_pgtable_fmt {
  * Note that these can all be called in atomic context and must therefore
  * not block.
  */
-struct iommu_gather_ops {
+struct iommu_flush_ops {
 	void (*tlb_flush_all)(void *cookie);
 	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
 			      bool leaf, void *cookie);
@@ -84,7 +84,7 @@ struct io_pgtable_cfg {
 	unsigned int			ias;
 	unsigned int			oas;
 	bool				coherent_walk;
-	const struct iommu_gather_ops	*tlb;
+	const struct iommu_flush_ops	*tlb;
 	struct device			*iommu_dev;
 
 	/* Low-level data specific to the table format */
-- 
cgit v1.2.3


From a7d20dc19d9ea7012227be5144353012ffa3ddc4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:43:48 +0100
Subject: iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes

To permit batching of TLB flushes across multiple calls to the IOMMU
driver's ->unmap() implementation, introduce a new structure for
tracking the address range to be flushed and the granularity at which
the flushing is required.

This is hooked into the IOMMU API and its caller are updated to make use
of the new structure. Subsequent patches will plumb this into the IOMMU
drivers as well, but for now the gathering information is ignored.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/dma-iommu.c       |  9 +++++++--
 drivers/iommu/iommu.c           | 19 +++++++++++-------
 drivers/vfio/vfio_iommu_type1.c | 26 ++++++++++++++++---------
 include/linux/iommu.h           | 43 +++++++++++++++++++++++++++++++++++++----
 4 files changed, 75 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a7f9c3edbcb2..80beb1f5994a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -444,13 +444,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	size_t iova_off = iova_offset(iovad, dma_addr);
+	struct iommu_iotlb_gather iotlb_gather;
+	size_t unmapped;
 
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
+	iommu_iotlb_gather_init(&iotlb_gather);
+
+	unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather);
+	WARN_ON(unmapped != size);
 
-	WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
 	if (!cookie->fq_domain)
-		iommu_tlb_sync(domain);
+		iommu_tlb_sync(domain, &iotlb_gather);
 	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6d7b25fe2474..d67222fdfe44 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1862,7 +1862,7 @@ EXPORT_SYMBOL_GPL(iommu_map);
 
 static size_t __iommu_unmap(struct iommu_domain *domain,
 			    unsigned long iova, size_t size,
-			    bool sync)
+			    struct iommu_iotlb_gather *iotlb_gather)
 {
 	const struct iommu_ops *ops = domain->ops;
 	size_t unmapped_page, unmapped = 0;
@@ -1910,9 +1910,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 		unmapped += unmapped_page;
 	}
 
-	if (sync && ops->iotlb_sync)
-		ops->iotlb_sync(domain);
-
 	trace_unmap(orig_iova, size, unmapped);
 	return unmapped;
 }
@@ -1920,14 +1917,22 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 size_t iommu_unmap(struct iommu_domain *domain,
 		   unsigned long iova, size_t size)
 {
-	return __iommu_unmap(domain, iova, size, true);
+	struct iommu_iotlb_gather iotlb_gather;
+	size_t ret;
+
+	iommu_iotlb_gather_init(&iotlb_gather);
+	ret = __iommu_unmap(domain, iova, size, &iotlb_gather);
+	iommu_tlb_sync(domain, &iotlb_gather);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
 size_t iommu_unmap_fast(struct iommu_domain *domain,
-			unsigned long iova, size_t size)
+			unsigned long iova, size_t size,
+			struct iommu_iotlb_gather *iotlb_gather)
 {
-	return __iommu_unmap(domain, iova, size, false);
+	return __iommu_unmap(domain, iova, size, iotlb_gather);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index fad7fd8c167c..ad830abe1021 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -650,12 +650,13 @@ unpin_exit:
 }
 
 static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain,
-				struct list_head *regions)
+			    struct list_head *regions,
+			    struct iommu_iotlb_gather *iotlb_gather)
 {
 	long unlocked = 0;
 	struct vfio_regions *entry, *next;
 
-	iommu_tlb_sync(domain->domain);
+	iommu_tlb_sync(domain->domain, iotlb_gather);
 
 	list_for_each_entry_safe(entry, next, regions, list) {
 		unlocked += vfio_unpin_pages_remote(dma,
@@ -685,13 +686,15 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
 			       struct vfio_dma *dma, dma_addr_t *iova,
 			       size_t len, phys_addr_t phys, long *unlocked,
 			       struct list_head *unmapped_list,
-			       int *unmapped_cnt)
+			       int *unmapped_cnt,
+			       struct iommu_iotlb_gather *iotlb_gather)
 {
 	size_t unmapped = 0;
 	struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 
 	if (entry) {
-		unmapped = iommu_unmap_fast(domain->domain, *iova, len);
+		unmapped = iommu_unmap_fast(domain->domain, *iova, len,
+					    iotlb_gather);
 
 		if (!unmapped) {
 			kfree(entry);
@@ -711,8 +714,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain,
 	 * or in case of errors.
 	 */
 	if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) {
-		*unlocked += vfio_sync_unpin(dma, domain,
-					     unmapped_list);
+		*unlocked += vfio_sync_unpin(dma, domain, unmapped_list,
+					     iotlb_gather);
 		*unmapped_cnt = 0;
 	}
 
@@ -743,6 +746,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
 	struct vfio_domain *domain, *d;
 	LIST_HEAD(unmapped_region_list);
+	struct iommu_iotlb_gather iotlb_gather;
 	int unmapped_region_cnt = 0;
 	long unlocked = 0;
 
@@ -767,6 +771,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 		cond_resched();
 	}
 
+	iommu_iotlb_gather_init(&iotlb_gather);
 	while (iova < end) {
 		size_t unmapped, len;
 		phys_addr_t phys, next;
@@ -795,7 +800,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 		 */
 		unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys,
 					    &unlocked, &unmapped_region_list,
-					    &unmapped_region_cnt);
+					    &unmapped_region_cnt,
+					    &iotlb_gather);
 		if (!unmapped) {
 			unmapped = unmap_unpin_slow(domain, dma, &iova, len,
 						    phys, &unlocked);
@@ -806,8 +812,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 
 	dma->iommu_mapped = false;
 
-	if (unmapped_region_cnt)
-		unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list);
+	if (unmapped_region_cnt) {
+		unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list,
+					    &iotlb_gather);
+	}
 
 	if (do_accounting) {
 		vfio_lock_acct(dma, -unlocked, true);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 1e21431262d9..aaf073010a9a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -191,6 +191,23 @@ struct iommu_sva_ops {
 
 #ifdef CONFIG_IOMMU_API
 
+/**
+ * struct iommu_iotlb_gather - Range information for a pending IOTLB flush
+ *
+ * @start: IOVA representing the start of the range to be flushed
+ * @end: IOVA representing the end of the range to be flushed (exclusive)
+ * @pgsize: The interval at which to perform the flush
+ *
+ * This structure is intended to be updated by multiple calls to the
+ * ->unmap() function in struct iommu_ops before eventually being passed
+ * into ->iotlb_sync().
+ */
+struct iommu_iotlb_gather {
+	unsigned long		start;
+	unsigned long		end;
+	size_t			pgsize;
+};
+
 /**
  * struct iommu_ops - iommu ops and capabilities
  * @capable: check capability
@@ -375,6 +392,13 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
 	return (struct iommu_device *)dev_get_drvdata(dev);
 }
 
+static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
+{
+	*gather = (struct iommu_iotlb_gather) {
+		.start	= ULONG_MAX,
+	};
+}
+
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
 #define IOMMU_GROUP_NOTIFY_DEL_DEVICE		2 /* Pre Device removed */
 #define IOMMU_GROUP_NOTIFY_BIND_DRIVER		3 /* Pre Driver bind */
@@ -399,7 +423,8 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 			  size_t size);
 extern size_t iommu_unmap_fast(struct iommu_domain *domain,
-			       unsigned long iova, size_t size);
+			       unsigned long iova, size_t size,
+			       struct iommu_iotlb_gather *iotlb_gather);
 extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			   struct scatterlist *sg,unsigned int nents, int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
@@ -473,10 +498,13 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
 		domain->ops->flush_iotlb_all(domain);
 }
 
-static inline void iommu_tlb_sync(struct iommu_domain *domain)
+static inline void iommu_tlb_sync(struct iommu_domain *domain,
+				  struct iommu_iotlb_gather *iotlb_gather)
 {
 	if (domain->ops->iotlb_sync)
 		domain->ops->iotlb_sync(domain);
+
+	iommu_iotlb_gather_init(iotlb_gather);
 }
 
 /* PCI device grouping function */
@@ -557,6 +585,7 @@ struct iommu_group {};
 struct iommu_fwspec {};
 struct iommu_device {};
 struct iommu_fault_param {};
+struct iommu_iotlb_gather {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
@@ -611,7 +640,8 @@ static inline size_t iommu_unmap(struct iommu_domain *domain,
 }
 
 static inline size_t iommu_unmap_fast(struct iommu_domain *domain,
-				      unsigned long iova, int gfp_order)
+				      unsigned long iova, int gfp_order,
+				      struct iommu_iotlb_gather *iotlb_gather)
 {
 	return 0;
 }
@@ -627,7 +657,8 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain)
 {
 }
 
-static inline void iommu_tlb_sync(struct iommu_domain *domain)
+static inline void iommu_tlb_sync(struct iommu_domain *domain,
+				  struct iommu_iotlb_gather *iotlb_gather)
 {
 }
 
@@ -812,6 +843,10 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev)
 	return NULL;
 }
 
+static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
+{
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }
-- 
cgit v1.2.3


From 4fcf8544fc677fc8af135f1d86b3ba69c4ad429d Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:43:57 +0100
Subject: iommu: Introduce iommu_iotlb_gather_add_page()

Introduce a helper function for drivers to use when updating an
iommu_iotlb_gather structure in response to an ->unmap() call, rather
than having to open-code the logic in every page-table implementation.

Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/iommu.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index aaf073010a9a..ad41aee55bc6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -507,6 +507,31 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain,
 	iommu_iotlb_gather_init(iotlb_gather);
 }
 
+static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+					       struct iommu_iotlb_gather *gather,
+					       unsigned long iova, size_t size)
+{
+	unsigned long start = iova, end = start + size;
+
+	/*
+	 * If the new page is disjoint from the current range or is mapped at
+	 * a different granularity, then sync the TLB so that the gather
+	 * structure can be rewritten.
+	 */
+	if (gather->pgsize != size ||
+	    end < gather->start || start > gather->end) {
+		if (gather->pgsize)
+			iommu_tlb_sync(domain, gather);
+		gather->pgsize = size;
+	}
+
+	if (gather->end < end)
+		gather->end = end;
+
+	if (gather->start > start)
+		gather->start = start;
+}
+
 /* PCI device grouping function */
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
@@ -847,6 +872,12 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather)
 {
 }
 
+static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain,
+					       struct iommu_iotlb_gather *gather,
+					       unsigned long iova, size_t size)
+{
+}
+
 static inline void iommu_device_unregister(struct iommu_device *iommu)
 {
 }
-- 
cgit v1.2.3


From 511885d7061eda3eb1faf3f57dcc936ff75863f1 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Wed, 24 Jul 2019 08:23:23 -0700
Subject: lib/timerqueue: Rely on rbtree semantics for next timer

Simplify the timerqueue code by using cached rbtrees and rely on the tree
leftmost node semantics to get the timer with earliest expiration time.
This is a drop in conversion, and therefore semantics remain untouched.

The runtime overhead of cached rbtrees is be pretty much the same as the
current head->next method, noting that when removing the leftmost node,
a common operation for the timerqueue, the rb_next(leftmost) is O(1) as
well, so the next timer will either be the right node or its parent.
Therefore no extra pointer chasing. Finally, the size of the struct
timerqueue_head remains the same.

Passes several hours of rcutorture.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190724152323.bojciei3muvfxalm@linux-r8p5
---
 include/linux/timerqueue.h | 13 ++++++-------
 lib/timerqueue.c           | 30 ++++++++++++------------------
 2 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index 78b8cc73f12f..aff122f1062a 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -12,8 +12,7 @@ struct timerqueue_node {
 };
 
 struct timerqueue_head {
-	struct rb_root head;
-	struct timerqueue_node *next;
+	struct rb_root_cached rb_root;
 };
 
 
@@ -29,13 +28,14 @@ extern struct timerqueue_node *timerqueue_iterate_next(
  *
  * @head: head of timerqueue
  *
- * Returns a pointer to the timer node that has the
- * earliest expiration time.
+ * Returns a pointer to the timer node that has the earliest expiration time.
  */
 static inline
 struct timerqueue_node *timerqueue_getnext(struct timerqueue_head *head)
 {
-	return head->next;
+	struct rb_node *leftmost = rb_first_cached(&head->rb_root);
+
+	return rb_entry(leftmost, struct timerqueue_node, node);
 }
 
 static inline void timerqueue_init(struct timerqueue_node *node)
@@ -45,7 +45,6 @@ static inline void timerqueue_init(struct timerqueue_node *node)
 
 static inline void timerqueue_init_head(struct timerqueue_head *head)
 {
-	head->head = RB_ROOT;
-	head->next = NULL;
+	head->rb_root = RB_ROOT_CACHED;
 }
 #endif /* _LINUX_TIMERQUEUE_H */
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index bc7e64df27df..c52710964593 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -26,9 +26,10 @@
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
-	struct rb_node **p = &head->head.rb_node;
+	struct rb_node **p = &head->rb_root.rb_root.rb_node;
 	struct rb_node *parent = NULL;
-	struct timerqueue_node  *ptr;
+	struct timerqueue_node *ptr;
+	bool leftmost = true;
 
 	/* Make sure we don't add nodes that are already added */
 	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
@@ -36,19 +37,17 @@ bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 	while (*p) {
 		parent = *p;
 		ptr = rb_entry(parent, struct timerqueue_node, node);
-		if (node->expires < ptr->expires)
+		if (node->expires < ptr->expires) {
 			p = &(*p)->rb_left;
-		else
+		} else {
 			p = &(*p)->rb_right;
+			leftmost = false;
+		}
 	}
 	rb_link_node(&node->node, parent, p);
-	rb_insert_color(&node->node, &head->head);
+	rb_insert_color_cached(&node->node, &head->rb_root, leftmost);
 
-	if (!head->next || node->expires < head->next->expires) {
-		head->next = node;
-		return true;
-	}
-	return false;
+	return leftmost;
 }
 EXPORT_SYMBOL_GPL(timerqueue_add);
 
@@ -65,15 +64,10 @@ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 {
 	WARN_ON_ONCE(RB_EMPTY_NODE(&node->node));
 
-	/* update next pointer */
-	if (head->next == node) {
-		struct rb_node *rbn = rb_next(&node->node);
-
-		head->next = rb_entry_safe(rbn, struct timerqueue_node, node);
-	}
-	rb_erase(&node->node, &head->head);
+	rb_erase_cached(&node->node, &head->rb_root);
 	RB_CLEAR_NODE(&node->node);
-	return head->next != NULL;
+
+	return !RB_EMPTY_ROOT(&head->rb_root.rb_root);
 }
 EXPORT_SYMBOL_GPL(timerqueue_del);
 
-- 
cgit v1.2.3


From d7852fbd0f0423937fa287a598bfde188bb68c22 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Jul 2019 09:54:40 -0700
Subject: access: avoid the RCU grace period for the temporary subjective
 credentials

It turns out that 'access()' (and 'faccessat()') can cause a lot of RCU
work because it installs a temporary credential that gets allocated and
freed for each system call.

The allocation and freeing overhead is mostly benign, but because
credentials can be accessed under the RCU read lock, the freeing
involves a RCU grace period.

Which is not a huge deal normally, but if you have a lot of access()
calls, this causes a fair amount of seconday damage: instead of having a
nice alloc/free patterns that hits in hot per-CPU slab caches, you have
all those delayed free's, and on big machines with hundreds of cores,
the RCU overhead can end up being enormous.

But it turns out that all of this is entirely unnecessary.  Exactly
because access() only installs the credential as the thread-local
subjective credential, the temporary cred pointer doesn't actually need
to be RCU free'd at all.  Once we're done using it, we can just free it
synchronously and avoid all the RCU overhead.

So add a 'non_rcu' flag to 'struct cred', which can be set by users that
know they only use it in non-RCU context (there are other potential
users for this).  We can make it a union with the rcu freeing list head
that we need for the RCU case, so this doesn't need any extra storage.

Note that this also makes 'get_current_cred()' clear the new non_rcu
flag, in case we have filesystems that take a long-term reference to the
cred and then expect the RCU delayed freeing afterwards.  It's not
entirely clear that this is required, but it makes for clear semantics:
the subjective cred remains non-RCU as long as you only access it
synchronously using the thread-local accessors, but you _can_ use it as
a generic cred if you want to.

It is possible that we should just remove the whole RCU markings for
->cred entirely.  Only ->real_cred is really supposed to be accessed
through RCU, and the long-term cred copies that nfs uses might want to
explicitly re-enable RCU freeing if required, rather than have
get_current_cred() do it implicitly.

But this is a "minimal semantic changes" change for the immediate
problem.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jan Glauber <jglauber@marvell.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Jayachandran Chandrasekharan Nair <jnair@marvell.com>
Cc: Greg KH <greg@kroah.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/open.c            | 19 +++++++++++++++++++
 include/linux/cred.h |  8 +++++++-
 kernel/cred.c        | 21 +++++++++++++++++++--
 3 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..a59abe3c669a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -374,6 +374,25 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
 				override_cred->cap_permitted;
 	}
 
+	/*
+	 * The new set of credentials can *only* be used in
+	 * task-synchronous circumstances, and does not need
+	 * RCU freeing, unless somebody then takes a separate
+	 * reference to it.
+	 *
+	 * NOTE! This is _only_ true because this credential
+	 * is used purely for override_creds() that installs
+	 * it as the subjective cred. Other threads will be
+	 * accessing ->real_cred, not the subjective cred.
+	 *
+	 * If somebody _does_ make a copy of this (using the
+	 * 'get_current_cred()' function), that will clear the
+	 * non_rcu field, because now that other user may be
+	 * expecting RCU freeing. But normal thread-synchronous
+	 * cred accesses will keep things non-RCY.
+	 */
+	override_cred->non_rcu = 1;
+
 	old_cred = override_creds(override_cred);
 retry:
 	res = user_path_at(dfd, filename, lookup_flags, &path);
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 7eb43a038330..f7a30e0099be 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -145,7 +145,11 @@ struct cred {
 	struct user_struct *user;	/* real user ID subscription */
 	struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
 	struct group_info *group_info;	/* supplementary groups for euid/fsgid */
-	struct rcu_head	rcu;		/* RCU deletion hook */
+	/* RCU deletion */
+	union {
+		int non_rcu;			/* Can we skip RCU deletion? */
+		struct rcu_head	rcu;		/* RCU deletion hook */
+	};
 } __randomize_layout;
 
 extern void __put_cred(struct cred *);
@@ -246,6 +250,7 @@ static inline const struct cred *get_cred(const struct cred *cred)
 	if (!cred)
 		return cred;
 	validate_creds(cred);
+	nonconst_cred->non_rcu = 0;
 	return get_new_cred(nonconst_cred);
 }
 
@@ -257,6 +262,7 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred)
 	if (!atomic_inc_not_zero(&nonconst_cred->usage))
 		return NULL;
 	validate_creds(cred);
+	nonconst_cred->non_rcu = 0;
 	return cred;
 }
 
diff --git a/kernel/cred.c b/kernel/cred.c
index c73a87a4df13..153ae369e024 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -144,7 +144,10 @@ void __put_cred(struct cred *cred)
 	BUG_ON(cred == current->cred);
 	BUG_ON(cred == current->real_cred);
 
-	call_rcu(&cred->rcu, put_cred_rcu);
+	if (cred->non_rcu)
+		put_cred_rcu(&cred->rcu);
+	else
+		call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
@@ -256,6 +259,7 @@ struct cred *prepare_creds(void)
 	old = task->cred;
 	memcpy(new, old, sizeof(struct cred));
 
+	new->non_rcu = 0;
 	atomic_set(&new->usage, 1);
 	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
@@ -535,7 +539,19 @@ const struct cred *override_creds(const struct cred *new)
 
 	validate_creds(old);
 	validate_creds(new);
-	get_cred(new);
+
+	/*
+	 * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
+	 *
+	 * That means that we do not clear the 'non_rcu' flag, since
+	 * we are only installing the cred into the thread-synchronous
+	 * '->cred' pointer, not the '->real_cred' pointer that is
+	 * visible to other threads under RCU.
+	 *
+	 * Also note that we did validate_creds() manually, not depending
+	 * on the validation in 'get_cred()'.
+	 */
+	get_new_cred((struct cred *)new);
 	alter_cred_subscribers(new, 1);
 	rcu_assign_pointer(current->cred, new);
 	alter_cred_subscribers(old, -1);
@@ -672,6 +688,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	validate_creds(old);
 
 	*new = *old;
+	new->non_rcu = 0;
 	atomic_set(&new->usage, 1);
 	set_cred_subscribers(new, 0);
 	get_uid(new->user);
-- 
cgit v1.2.3


From 71d8e94dabee7fceac473d87445a03e848469a71 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Wed, 26 Jun 2019 17:33:09 -0700
Subject: fpga: altera-pr-ip: Make alt_pr_unregister function void

Make alt_pr_unregister function void, since it always returns 0,
and nothing would act on the value anyways.

Signed-off-by: Moritz Fischer <mdf@kernel.org>
---
 drivers/fpga/altera-pr-ip-core-plat.c  | 4 +++-
 drivers/fpga/altera-pr-ip-core.c       | 4 +---
 include/linux/fpga/altera-pr-ip-core.h | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/fpga/altera-pr-ip-core-plat.c b/drivers/fpga/altera-pr-ip-core-plat.c
index b293d83143f1..99b9cc0e70f0 100644
--- a/drivers/fpga/altera-pr-ip-core-plat.c
+++ b/drivers/fpga/altera-pr-ip-core-plat.c
@@ -32,7 +32,9 @@ static int alt_pr_platform_remove(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 
-	return alt_pr_unregister(dev);
+	alt_pr_unregister(dev);
+
+	return 0;
 }
 
 static const struct of_device_id alt_pr_of_match[] = {
diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
index a7a3bf0b5202..2cf25fd5e897 100644
--- a/drivers/fpga/altera-pr-ip-core.c
+++ b/drivers/fpga/altera-pr-ip-core.c
@@ -201,15 +201,13 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
 }
 EXPORT_SYMBOL_GPL(alt_pr_register);
 
-int alt_pr_unregister(struct device *dev)
+void alt_pr_unregister(struct device *dev)
 {
 	struct fpga_manager *mgr = dev_get_drvdata(dev);
 
 	dev_dbg(dev, "%s\n", __func__);
 
 	fpga_mgr_unregister(mgr);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(alt_pr_unregister);
 
diff --git a/include/linux/fpga/altera-pr-ip-core.h b/include/linux/fpga/altera-pr-ip-core.h
index 7d4664730d60..0b08ac20ab16 100644
--- a/include/linux/fpga/altera-pr-ip-core.h
+++ b/include/linux/fpga/altera-pr-ip-core.h
@@ -13,6 +13,6 @@
 #include <linux/io.h>
 
 int alt_pr_register(struct device *dev, void __iomem *reg_base);
-int alt_pr_unregister(struct device *dev);
+void alt_pr_unregister(struct device *dev);
 
 #endif /* _ALT_PR_IP_CORE_H */
-- 
cgit v1.2.3


From 3b51c44bd6936e86a7180abd9aebc4387a479253 Mon Sep 17 00:00:00 2001
From: Atif Niyaz <atifniyaz@google.com>
Date: Wed, 24 Jul 2019 22:26:31 +0300
Subject: Input: allow drivers specify timestamp for input events

Currently, evdev stamps events with timestamps acquired in evdev_events()
However, this timestamping may not be accurate in terms of measuring
when the actual event happened.

Let's allow individual drivers specify timestamp in order to provide a more
accurate sense of time for the event. It is expected that drivers will set the
timestamp in their hard interrupt routine.

Signed-off-by: Atif Niyaz <atifniyaz@google.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/evdev.c | 35 ++++++++---------------------------
 drivers/input/input.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/input.h | 14 ++++++++++++++
 3 files changed, 62 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 867c2cfd0038..d7dd6fcf2db0 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -25,13 +25,6 @@
 #include <linux/cdev.h>
 #include "input-compat.h"
 
-enum evdev_clock_type {
-	EV_CLK_REAL = 0,
-	EV_CLK_MONO,
-	EV_CLK_BOOT,
-	EV_CLK_MAX
-};
-
 struct evdev {
 	int open;
 	struct input_handle handle;
@@ -53,7 +46,7 @@ struct evdev_client {
 	struct fasync_struct *fasync;
 	struct evdev *evdev;
 	struct list_head node;
-	unsigned int clk_type;
+	enum input_clock_type clk_type;
 	bool revoked;
 	unsigned long *evmasks[EV_CNT];
 	unsigned int bufsize;
@@ -149,17 +142,10 @@ static void __evdev_flush_queue(struct evdev_client *client, unsigned int type)
 
 static void __evdev_queue_syn_dropped(struct evdev_client *client)
 {
+	ktime_t *ev_time = input_get_timestamp(client->evdev->handle.dev);
+	struct timespec64 ts = ktime_to_timespec64(ev_time[client->clk_type]);
 	struct input_event ev;
-	ktime_t time;
-	struct timespec64 ts;
 
-	time = client->clk_type == EV_CLK_REAL ?
-			ktime_get_real() :
-			client->clk_type == EV_CLK_MONO ?
-				ktime_get() :
-				ktime_get_boottime();
-
-	ts = ktime_to_timespec64(time);
 	ev.input_event_sec = ts.tv_sec;
 	ev.input_event_usec = ts.tv_nsec / NSEC_PER_USEC;
 	ev.type = EV_SYN;
@@ -188,18 +174,18 @@ static void evdev_queue_syn_dropped(struct evdev_client *client)
 static int evdev_set_clk_type(struct evdev_client *client, unsigned int clkid)
 {
 	unsigned long flags;
-	unsigned int clk_type;
+	enum input_clock_type clk_type;
 
 	switch (clkid) {
 
 	case CLOCK_REALTIME:
-		clk_type = EV_CLK_REAL;
+		clk_type = INPUT_CLK_REAL;
 		break;
 	case CLOCK_MONOTONIC:
-		clk_type = EV_CLK_MONO;
+		clk_type = INPUT_CLK_MONO;
 		break;
 	case CLOCK_BOOTTIME:
-		clk_type = EV_CLK_BOOT;
+		clk_type = INPUT_CLK_BOOT;
 		break;
 	default:
 		return -EINVAL;
@@ -307,12 +293,7 @@ static void evdev_events(struct input_handle *handle,
 {
 	struct evdev *evdev = handle->private;
 	struct evdev_client *client;
-	ktime_t ev_time[EV_CLK_MAX];
-
-	ev_time[EV_CLK_MONO] = ktime_get();
-	ev_time[EV_CLK_REAL] = ktime_mono_to_real(ev_time[EV_CLK_MONO]);
-	ev_time[EV_CLK_BOOT] = ktime_mono_to_any(ev_time[EV_CLK_MONO],
-						 TK_OFFS_BOOT);
+	ktime_t *ev_time = input_get_timestamp(handle->dev);
 
 	rcu_read_lock();
 
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7f3c5fcb9ed6..7494a0dede79 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1894,6 +1894,46 @@ void input_free_device(struct input_dev *dev)
 }
 EXPORT_SYMBOL(input_free_device);
 
+/**
+ * input_set_timestamp - set timestamp for input events
+ * @dev: input device to set timestamp for
+ * @timestamp: the time at which the event has occurred
+ *   in CLOCK_MONOTONIC
+ *
+ * This function is intended to provide to the input system a more
+ * accurate time of when an event actually occurred. The driver should
+ * call this function as soon as a timestamp is acquired ensuring
+ * clock conversions in input_set_timestamp are done correctly.
+ *
+ * The system entering suspend state between timestamp acquisition and
+ * calling input_set_timestamp can result in inaccurate conversions.
+ */
+void input_set_timestamp(struct input_dev *dev, ktime_t timestamp)
+{
+	dev->timestamp[INPUT_CLK_MONO] = timestamp;
+	dev->timestamp[INPUT_CLK_REAL] = ktime_mono_to_real(timestamp);
+	dev->timestamp[INPUT_CLK_BOOT] = ktime_mono_to_any(timestamp,
+							   TK_OFFS_BOOT);
+}
+EXPORT_SYMBOL(input_set_timestamp);
+
+/**
+ * input_get_timestamp - get timestamp for input events
+ * @dev: input device to get timestamp from
+ *
+ * A valid timestamp is a timestamp of non-zero value.
+ */
+ktime_t *input_get_timestamp(struct input_dev *dev)
+{
+	const ktime_t invalid_timestamp = ktime_set(0, 0);
+
+	if (!ktime_compare(dev->timestamp[INPUT_CLK_MONO], invalid_timestamp))
+		input_set_timestamp(dev, ktime_get());
+
+	return dev->timestamp;
+}
+EXPORT_SYMBOL(input_get_timestamp);
+
 /**
  * input_set_capability - mark device as capable of a certain event
  * @dev: device that is capable of emitting or accepting event
diff --git a/include/linux/input.h b/include/linux/input.h
index 510e78558c10..e95a439d8bd5 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -33,6 +33,13 @@ struct input_value {
 	__s32 value;
 };
 
+enum input_clock_type {
+	INPUT_CLK_REAL = 0,
+	INPUT_CLK_MONO,
+	INPUT_CLK_BOOT,
+	INPUT_CLK_MAX
+};
+
 /**
  * struct input_dev - represents an input device
  * @name: name of the device
@@ -114,6 +121,8 @@ struct input_value {
  * @vals: array of values queued in the current frame
  * @devres_managed: indicates that devices is managed with devres framework
  *	and needs not be explicitly unregistered or freed.
+ * @timestamp: storage for a timestamp set by input_set_timestamp called
+ *  by a driver
  */
 struct input_dev {
 	const char *name;
@@ -184,6 +193,8 @@ struct input_dev {
 	struct input_value *vals;
 
 	bool devres_managed;
+
+	ktime_t timestamp[INPUT_CLK_MAX];
 };
 #define to_input_dev(d) container_of(d, struct input_dev, dev)
 
@@ -382,6 +393,9 @@ void input_close_device(struct input_handle *);
 
 int input_flush_device(struct input_handle *handle, struct file *file);
 
+void input_set_timestamp(struct input_dev *dev, ktime_t timestamp);
+ktime_t *input_get_timestamp(struct input_dev *dev);
+
 void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);
 void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value);
 
-- 
cgit v1.2.3


From d9c5252295218df4cfe64353aa860d7b5c8700ef Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 25 Jul 2019 16:58:31 +0900
Subject: treewide: add "WITH Linux-syscall-note" to SPDX tag of uapi headers

UAPI headers licensed under GPL are supposed to have exception
"WITH Linux-syscall-note" so that they can be included into non-GPL
user space application code.

The exception note is missing in some UAPI headers.

Some of them slipped in by the treewide conversion commit b24413180f56
("License cleanup: add SPDX GPL-2.0 license identifier to files with
no license"). Just run:

  $ git show --oneline b24413180f56 -- arch/x86/include/uapi/asm/

I believe they are not intentional, and should be fixed too.

This patch was generated by the following script:

  git grep -l --not -e Linux-syscall-note --and -e SPDX-License-Identifier \
    -- :arch/*/include/uapi/asm/*.h :include/uapi/ :^*/Kbuild |
  while read file
  do
          sed -i -e '/[[:space:]]OR[[:space:]]/s/\(GPL-[^[:space:]]*\)/(\1 WITH Linux-syscall-note)/g' \
          -e '/[[:space:]]or[[:space:]]/s/\(GPL-[^[:space:]]*\)/(\1 WITH Linux-syscall-note)/g' \
          -e '/[[:space:]]OR[[:space:]]/!{/[[:space:]]or[[:space:]]/!s/\(GPL-[^[:space:]]*\)/\1 WITH Linux-syscall-note/g}' $file
  done

After this patch is applied, there are 5 UAPI headers that do not contain
"WITH Linux-syscall-note". They are kept untouched since this exception
applies only to GPL variants.

  $ git grep --not -e Linux-syscall-note --and -e SPDX-License-Identifier \
    -- :arch/*/include/uapi/asm/*.h :include/uapi/ :^*/Kbuild
  include/uapi/drm/panfrost_drm.h:/* SPDX-License-Identifier: MIT */
  include/uapi/linux/batman_adv.h:/* SPDX-License-Identifier: MIT */
  include/uapi/linux/qemu_fw_cfg.h:/* SPDX-License-Identifier: BSD-3-Clause */
  include/uapi/linux/vbox_err.h:/* SPDX-License-Identifier: MIT */
  include/uapi/linux/virtio_iommu.h:/* SPDX-License-Identifier: BSD-3-Clause */

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/uapi/asm/bpf_perf_event.h   | 2 +-
 arch/csky/include/uapi/asm/byteorder.h         | 2 +-
 arch/csky/include/uapi/asm/cachectl.h          | 2 +-
 arch/csky/include/uapi/asm/perf_regs.h         | 2 +-
 arch/csky/include/uapi/asm/ptrace.h            | 2 +-
 arch/csky/include/uapi/asm/sigcontext.h        | 2 +-
 arch/csky/include/uapi/asm/unistd.h            | 2 +-
 arch/nds32/include/uapi/asm/auxvec.h           | 2 +-
 arch/nds32/include/uapi/asm/byteorder.h        | 2 +-
 arch/nds32/include/uapi/asm/cachectl.h         | 2 +-
 arch/nds32/include/uapi/asm/fp_udfiex_crtl.h   | 2 +-
 arch/nds32/include/uapi/asm/param.h            | 2 +-
 arch/nds32/include/uapi/asm/ptrace.h           | 2 +-
 arch/nds32/include/uapi/asm/sigcontext.h       | 2 +-
 arch/nds32/include/uapi/asm/unistd.h           | 2 +-
 arch/powerpc/include/uapi/asm/bpf_perf_event.h | 2 +-
 arch/riscv/include/uapi/asm/auxvec.h           | 2 +-
 arch/riscv/include/uapi/asm/bitsperlong.h      | 2 +-
 arch/riscv/include/uapi/asm/byteorder.h        | 2 +-
 arch/riscv/include/uapi/asm/hwcap.h            | 2 +-
 arch/riscv/include/uapi/asm/ptrace.h           | 2 +-
 arch/riscv/include/uapi/asm/sigcontext.h       | 2 +-
 arch/riscv/include/uapi/asm/ucontext.h         | 2 +-
 arch/s390/include/uapi/asm/bpf_perf_event.h    | 2 +-
 arch/s390/include/uapi/asm/ipl.h               | 2 +-
 arch/sh/include/uapi/asm/setup.h               | 2 +-
 arch/sh/include/uapi/asm/types.h               | 2 +-
 arch/sparc/include/uapi/asm/oradax.h           | 2 +-
 arch/x86/include/uapi/asm/byteorder.h          | 2 +-
 arch/x86/include/uapi/asm/hwcap2.h             | 2 +-
 arch/x86/include/uapi/asm/sigcontext32.h       | 2 +-
 arch/x86/include/uapi/asm/types.h              | 2 +-
 include/uapi/linux/bpfilter.h                  | 2 +-
 include/uapi/linux/ipmi_bmc.h                  | 2 +-
 include/uapi/linux/isst_if.h                   | 2 +-
 include/uapi/linux/netfilter/nf_synproxy.h     | 2 +-
 include/uapi/linux/psp-sev.h                   | 2 +-
 include/uapi/linux/rxrpc.h                     | 2 +-
 include/uapi/linux/usb/g_uvc.h                 | 2 +-
 include/uapi/linux/vbox_vmmdev_types.h         | 2 +-
 include/uapi/linux/vboxguest.h                 | 2 +-
 include/uapi/linux/virtio_pmem.h               | 2 +-
 include/uapi/linux/vmcore.h                    | 2 +-
 include/uapi/linux/wmi.h                       | 2 +-
 include/uapi/misc/fastrpc.h                    | 2 +-
 include/uapi/rdma/rvt-abi.h                    | 2 +-
 include/uapi/rdma/siw-abi.h                    | 2 +-
 include/uapi/scsi/scsi_bsg_ufs.h               | 2 +-
 include/uapi/sound/skl-tplg-interface.h        | 2 +-
 49 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/include/uapi/asm/bpf_perf_event.h b/arch/arm64/include/uapi/asm/bpf_perf_event.h
index b551b741653d..5e1e648aeec4 100644
--- a/arch/arm64/include/uapi/asm/bpf_perf_event.h
+++ b/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
diff --git a/arch/csky/include/uapi/asm/byteorder.h b/arch/csky/include/uapi/asm/byteorder.h
index b079ec715cdf..d150cd664873 100644
--- a/arch/csky/include/uapi/asm/byteorder.h
+++ b/arch/csky/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_BYTEORDER_H
diff --git a/arch/csky/include/uapi/asm/cachectl.h b/arch/csky/include/uapi/asm/cachectl.h
index ddf2f39aa925..ed7fad1ea20d 100644
--- a/arch/csky/include/uapi/asm/cachectl.h
+++ b/arch/csky/include/uapi/asm/cachectl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 
 #ifndef __ASM_CSKY_CACHECTL_H
 #define __ASM_CSKY_CACHECTL_H
diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h
index ee323d818592..49d4e147a559 100644
--- a/arch/csky/include/uapi/asm/perf_regs.h
+++ b/arch/csky/include/uapi/asm/perf_regs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _ASM_CSKY_PERF_REGS_H
diff --git a/arch/csky/include/uapi/asm/ptrace.h b/arch/csky/include/uapi/asm/ptrace.h
index 4e248d5b86ef..66b2268e324e 100644
--- a/arch/csky/include/uapi/asm/ptrace.h
+++ b/arch/csky/include/uapi/asm/ptrace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _CSKY_PTRACE_H
diff --git a/arch/csky/include/uapi/asm/sigcontext.h b/arch/csky/include/uapi/asm/sigcontext.h
index e81e7ff11e36..670c020f2cb8 100644
--- a/arch/csky/include/uapi/asm/sigcontext.h
+++ b/arch/csky/include/uapi/asm/sigcontext.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SIGCONTEXT_H
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index ec60e49cea66..211c983c7282 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h
index b5d58ea8decb..bc0b92ab8c15 100644
--- a/arch/nds32/include/uapi/asm/auxvec.h
+++ b/arch/nds32/include/uapi/asm/auxvec.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_AUXVEC_H
diff --git a/arch/nds32/include/uapi/asm/byteorder.h b/arch/nds32/include/uapi/asm/byteorder.h
index 511e653c709d..c264ef12c49c 100644
--- a/arch/nds32/include/uapi/asm/byteorder.h
+++ b/arch/nds32/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_BYTEORDER_H__
diff --git a/arch/nds32/include/uapi/asm/cachectl.h b/arch/nds32/include/uapi/asm/cachectl.h
index 73793662815c..31b9b439d819 100644
--- a/arch/nds32/include/uapi/asm/cachectl.h
+++ b/arch/nds32/include/uapi/asm/cachectl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 1994, 1995, 1996 by Ralf Baechle
 // Copyright (C) 2005-2017 Andes Technology Corporation
 #ifndef	_ASM_CACHECTL
diff --git a/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h b/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
index d54a5d6c6538..f17396db16ec 100644
--- a/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
+++ b/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /* Copyright (C) 2005-2019 Andes Technology Corporation */
 #ifndef	_FP_UDF_IEX_CRTL_H
 #define	_FP_UDF_IEX_CRTL_H
diff --git a/arch/nds32/include/uapi/asm/param.h b/arch/nds32/include/uapi/asm/param.h
index 2977534a6bd3..48d00328d328 100644
--- a/arch/nds32/include/uapi/asm/param.h
+++ b/arch/nds32/include/uapi/asm/param.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_PARAM_H
diff --git a/arch/nds32/include/uapi/asm/ptrace.h b/arch/nds32/include/uapi/asm/ptrace.h
index 1a6e01c00e6f..d76217c7c010 100644
--- a/arch/nds32/include/uapi/asm/ptrace.h
+++ b/arch/nds32/include/uapi/asm/ptrace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __UAPI_ASM_NDS32_PTRACE_H
diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h
index dc89af7ddcc3..6c1e6648878f 100644
--- a/arch/nds32/include/uapi/asm/sigcontext.h
+++ b/arch/nds32/include/uapi/asm/sigcontext.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_SIGCONTEXT_H
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index a0b2f7b9c0f2..410795e280fe 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #define __ARCH_WANT_STAT64
diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
index b551b741653d..5e1e648aeec4 100644
--- a/arch/powerpc/include/uapi/asm/bpf_perf_event.h
+++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h
index 62716653554b..d86cb17bbabe 100644
--- a/arch/riscv/include/uapi/asm/auxvec.h
+++ b/arch/riscv/include/uapi/asm/auxvec.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2015 Regents of the University of California
diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h
index 0b9b58b57ff6..7d0b32e3b701 100644
--- a/arch/riscv/include/uapi/asm/bitsperlong.h
+++ b/arch/riscv/include/uapi/asm/bitsperlong.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2015 Regents of the University of California
diff --git a/arch/riscv/include/uapi/asm/byteorder.h b/arch/riscv/include/uapi/asm/byteorder.h
index 1920debc09c0..f671e16bf6af 100644
--- a/arch/riscv/include/uapi/asm/byteorder.h
+++ b/arch/riscv/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2015 Regents of the University of California
diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h
index 7d786145183b..4e7646077056 100644
--- a/arch/riscv/include/uapi/asm/hwcap.h
+++ b/arch/riscv/include/uapi/asm/hwcap.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copied from arch/arm64/include/asm/hwcap.h
  *
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h
index 92d8f7cd8f84..882547f6bd5c 100644
--- a/arch/riscv/include/uapi/asm/ptrace.h
+++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 Regents of the University of California
  */
diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h
index 053f809e52ce..84f2dfcfdbce 100644
--- a/arch/riscv/include/uapi/asm/sigcontext.h
+++ b/arch/riscv/include/uapi/asm/sigcontext.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 Regents of the University of California
  */
diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h
index b58e00cee2ec..411dd7b52ed6 100644
--- a/arch/riscv/include/uapi/asm/ucontext.h
+++ b/arch/riscv/include/uapi/asm/ucontext.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Copyright (C) 2012 ARM Ltd.
  * Copyright (C) 2017 SiFive, Inc.
diff --git a/arch/s390/include/uapi/asm/bpf_perf_event.h b/arch/s390/include/uapi/asm/bpf_perf_event.h
index cefe7c7cd4f6..3ed42ff6da94 100644
--- a/arch/s390/include/uapi/asm/bpf_perf_event.h
+++ b/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
 #define _UAPI__ASM_BPF_PERF_EVENT_H__
 
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
index fd32b1cd80d2..451ba7d08905 100644
--- a/arch/s390/include/uapi/asm/ipl.h
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_S390_UAPI_IPL_H
 #define _ASM_S390_UAPI_IPL_H
 
diff --git a/arch/sh/include/uapi/asm/setup.h b/arch/sh/include/uapi/asm/setup.h
index 1170dd2fb998..4bd19f80f9b0 100644
--- a/arch/sh/include/uapi/asm/setup.h
+++ b/arch/sh/include/uapi/asm/setup.h
@@ -1,2 +1,2 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #include <asm-generic/setup.h>
diff --git a/arch/sh/include/uapi/asm/types.h b/arch/sh/include/uapi/asm/types.h
index f83795fdc0da..68100e108ea6 100644
--- a/arch/sh/include/uapi/asm/types.h
+++ b/arch/sh/include/uapi/asm/types.h
@@ -1,2 +1,2 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #include <asm-generic/types.h>
diff --git a/arch/sparc/include/uapi/asm/oradax.h b/arch/sparc/include/uapi/asm/oradax.h
index 64c67f2ea33f..0dace69058ab 100644
--- a/arch/sparc/include/uapi/asm/oradax.h
+++ b/arch/sparc/include/uapi/asm/oradax.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
 /*
  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
  */
diff --git a/arch/x86/include/uapi/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index 484e3cfd7ef2..149143cab9ff 100644
--- a/arch/x86/include/uapi/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_BYTEORDER_H
 #define _ASM_X86_BYTEORDER_H
 
diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h
index 6ebaae90e207..8b2effe6efb8 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_HWCAP2_H
 #define _ASM_X86_HWCAP2_H
 
diff --git a/arch/x86/include/uapi/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index 6b18e88de8a6..7114801d0499 100644
--- a/arch/x86/include/uapi/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_SIGCONTEXT32_H
 #define _ASM_X86_SIGCONTEXT32_H
 
diff --git a/arch/x86/include/uapi/asm/types.h b/arch/x86/include/uapi/asm/types.h
index df55e1ddb0c9..9d5c11a24279 100644
--- a/arch/x86/include/uapi/asm/types.h
+++ b/arch/x86/include/uapi/asm/types.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _ASM_X86_TYPES_H
 #define _ASM_X86_TYPES_H
 
diff --git a/include/uapi/linux/bpfilter.h b/include/uapi/linux/bpfilter.h
index 2ec3cc99ea4c..cbc1f5813f50 100644
--- a/include/uapi/linux/bpfilter.h
+++ b/include/uapi/linux/bpfilter.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_LINUX_BPFILTER_H
 #define _UAPI_LINUX_BPFILTER_H
 
diff --git a/include/uapi/linux/ipmi_bmc.h b/include/uapi/linux/ipmi_bmc.h
index 1670f0944227..782a03eb1086 100644
--- a/include/uapi/linux/ipmi_bmc.h
+++ b/include/uapi/linux/ipmi_bmc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * Copyright (c) 2015-2018, Intel Corporation.
  */
diff --git a/include/uapi/linux/isst_if.h b/include/uapi/linux/isst_if.h
index d10b832c58c5..0a52b7b093d3 100644
--- a/include/uapi/linux/isst_if.h
+++ b/include/uapi/linux/isst_if.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * Intel Speed Select Interface: OS to hardware Interface
  * Copyright (c) 2019, Intel Corporation.
diff --git a/include/uapi/linux/netfilter/nf_synproxy.h b/include/uapi/linux/netfilter/nf_synproxy.h
index 6f3791c8946f..00d787f0260e 100644
--- a/include/uapi/linux/netfilter/nf_synproxy.h
+++ b/include/uapi/linux/netfilter/nf_synproxy.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _NF_SYNPROXY_H
 #define _NF_SYNPROXY_H
 
diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h
index 8654b2442f6a..592a0c1b77c9 100644
--- a/include/uapi/linux/psp-sev.h
+++ b/include/uapi/linux/psp-sev.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  * Userspace interface for AMD Secure Encrypted Virtualization (SEV)
  * platform management commands.
diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
index 782069dcf607..4accfa7e266d 100644
--- a/include/uapi/linux/rxrpc.h
+++ b/include/uapi/linux/rxrpc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
 /* Types and definitions for AF_RXRPC.
  *
  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
diff --git a/include/uapi/linux/usb/g_uvc.h b/include/uapi/linux/usb/g_uvc.h
index 3c9ee3020cbb..652f169a019e 100644
--- a/include/uapi/linux/usb/g_uvc.h
+++ b/include/uapi/linux/usb/g_uvc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * g_uvc.h  --  USB Video Class Gadget driver API
  *
diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h
index 26f39816af14..c27289fd619a 100644
--- a/include/uapi/linux/vbox_vmmdev_types.h
+++ b/include/uapi/linux/vbox_vmmdev_types.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR CDDL-1.0) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR CDDL-1.0) */
 /*
  * Virtual Device for Guest <-> VMM/Host communication, type definitions
  * which are also used for the vboxguest ioctl interface / by vboxsf
diff --git a/include/uapi/linux/vboxguest.h b/include/uapi/linux/vboxguest.h
index 612f0c7d3558..9cec58a6a5ea 100644
--- a/include/uapi/linux/vboxguest.h
+++ b/include/uapi/linux/vboxguest.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR CDDL-1.0) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR CDDL-1.0) */
 /*
  * VBoxGuest - VirtualBox Guest Additions Driver Interface.
  *
diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h
index 9a63ed6d062f..b022787ffb94 100644
--- a/include/uapi/linux/virtio_pmem.h
+++ b/include/uapi/linux/virtio_pmem.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
 /*
  * Definitions for virtio-pmem devices.
  *
diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h
index 022619668e0e..3e9da91866ff 100644
--- a/include/uapi/linux/vmcore.h
+++ b/include/uapi/linux/vmcore.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef _UAPI_VMCORE_H
 #define _UAPI_VMCORE_H
 
diff --git a/include/uapi/linux/wmi.h b/include/uapi/linux/wmi.h
index c36f2d7675a4..7085c5dca9fa 100644
--- a/include/uapi/linux/wmi.h
+++ b/include/uapi/linux/wmi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
 /*
  *  User API methods for ACPI-WMI mapping driver
  *
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 6d701af9fc42..fb792e882cef 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 
 #ifndef __QCOM_FASTRPC_H__
 #define __QCOM_FASTRPC_H__
diff --git a/include/uapi/rdma/rvt-abi.h b/include/uapi/rdma/rvt-abi.h
index 7328293c715c..7c05a02d2be5 100644
--- a/include/uapi/rdma/rvt-abi.h
+++ b/include/uapi/rdma/rvt-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 
 /*
  * This file contains defines, structures, etc. that are used
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
index 3dd8071ace7b..7de68f1dc707 100644
--- a/include/uapi/rdma/siw-abi.h
+++ b/include/uapi/rdma/siw-abi.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) or BSD-3-Clause */
 
 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
 /* Copyright (c) 2008-2019, IBM Corporation */
diff --git a/include/uapi/scsi/scsi_bsg_ufs.h b/include/uapi/scsi/scsi_bsg_ufs.h
index 17c7abd0803a..9988db6ad244 100644
--- a/include/uapi/scsi/scsi_bsg_ufs.h
+++ b/include/uapi/scsi/scsi_bsg_ufs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * UFS Transport SGIO v4 BSG Message Support
  *
diff --git a/include/uapi/sound/skl-tplg-interface.h b/include/uapi/sound/skl-tplg-interface.h
index f39352cef382..9eee32f5e407 100644
--- a/include/uapi/sound/skl-tplg-interface.h
+++ b/include/uapi/sound/skl-tplg-interface.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  * skl-tplg-interface.h - Intel DSP FW private data interface
  *
-- 
cgit v1.2.3


From dc3bf49ea330414724e429e4e9b291899c134e3b Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Thu, 25 Jul 2019 16:58:32 +0900
Subject: treewide: remove SPDX "WITH Linux-syscall-note" from kernel-space
 headers again

The "WITH Linux-syscall-note" exception exists for headers exported to
user space. It is strange to add it to non-exported headers.

Commit 687a3e4d8e61 ("treewide: remove SPDX "WITH Linux-syscall-note"
from kernel-space headers") did cleanups some months ago, but it looks
like we need to do this periodically.

This patch was generated by the following script:

  git grep -l -e Linux-syscall-note \
    -- :*.h :^arch/*/include/uapi/asm/*.h :^include/uapi/ :^tools |
  while read file
  do
          sed -i -e 's/(\(GPL-[^[:space:]]*\) WITH Linux-syscall-note)/\1/g' \
          -e 's/ WITH Linux-syscall-note//g' $file
  done

I did not commit drivers/staging/android/uapi/ion.h . This header is
not currently exported, but somebody may plan to move it to include/uapi/
when the time comes. I am not sure. Anyway, it will be better to check
the license inconsistency in drivers/staging/android/uapi/.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/sound/sof/control.h   | 2 +-
 include/sound/sof/dai-intel.h | 2 +-
 include/sound/sof/dai.h       | 2 +-
 include/sound/sof/header.h    | 2 +-
 include/sound/sof/info.h      | 2 +-
 include/sound/sof/pm.h        | 2 +-
 include/sound/sof/stream.h    | 2 +-
 include/sound/sof/topology.h  | 2 +-
 include/sound/sof/trace.h     | 2 +-
 include/sound/sof/xtensa.h    | 2 +-
 samples/vfio-mdev/mdpy-defs.h | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/sound/sof/control.h b/include/sound/sof/control.h
index bded69e696d4..6080ea0facd7 100644
--- a/include/sound/sof/control.h
+++ b/include/sound/sof/control.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/dai-intel.h b/include/sound/sof/dai-intel.h
index 4bb8ee138ba7..65e4c20e567c 100644
--- a/include/sound/sof/dai-intel.h
+++ b/include/sound/sof/dai-intel.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 3d174e20aa53..5b8de1b1983c 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/header.h b/include/sound/sof/header.h
index 12867bbd4372..10f00c08dbb7 100644
--- a/include/sound/sof/header.h
+++ b/include/sound/sof/header.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/info.h b/include/sound/sof/info.h
index 16528d2b4a50..a9156b4a062c 100644
--- a/include/sound/sof/info.h
+++ b/include/sound/sof/info.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/pm.h b/include/sound/sof/pm.h
index 8ae3ad45bdf7..003879401d63 100644
--- a/include/sound/sof/pm.h
+++ b/include/sound/sof/pm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/stream.h b/include/sound/sof/stream.h
index 643f175cb479..0b71b381b952 100644
--- a/include/sound/sof/stream.h
+++ b/include/sound/sof/stream.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/topology.h b/include/sound/sof/topology.h
index 41dcabf89899..c47b36240920 100644
--- a/include/sound/sof/topology.h
+++ b/include/sound/sof/topology.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/trace.h b/include/sound/sof/trace.h
index 9257d5473d97..fda6e8f6ead4 100644
--- a/include/sound/sof/trace.h
+++ b/include/sound/sof/trace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/include/sound/sof/xtensa.h b/include/sound/sof/xtensa.h
index d25c764b10e8..dd53d36b34e1 100644
--- a/include/sound/sof/xtensa.h
+++ b/include/sound/sof/xtensa.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
 /*
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h
index 96b3b1b49d34..eb26421b6429 100644
--- a/samples/vfio-mdev/mdpy-defs.h
+++ b/samples/vfio-mdev/mdpy-defs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Simple pci display device.
  *
-- 
cgit v1.2.3


From 110f87a6a5f65e825852632b9557f463e65251d7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 21 Jul 2019 23:49:08 +0900
Subject: usb: host: oxu210hp-hcd: remove include/linux/oxu210hp.h

struct oxu210hp_platform_data is defined, but not used at all.

$ git grep oxu210hp_platform_data
include/linux/oxu210hp.h:struct oxu210hp_platform_data {

include/linux/oxu210hp.h exists just for defining an unused structure,
so it can go away.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://lore.kernel.org/r/20190721144909.5295-1-yamada.masahiro@socionext.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/oxu210hp.h | 2 --
 include/linux/oxu210hp.h    | 8 --------
 2 files changed, 10 deletions(-)
 delete mode 100644 include/linux/oxu210hp.h

(limited to 'include')

diff --git a/drivers/usb/host/oxu210hp.h b/drivers/usb/host/oxu210hp.h
index 437044147862..67ebea4993b6 100644
--- a/drivers/usb/host/oxu210hp.h
+++ b/drivers/usb/host/oxu210hp.h
@@ -444,5 +444,3 @@ enum ehci_timer_action {
 	TIMER_ASYNC_SHRINK,
 	TIMER_ASYNC_OFF,
 };
-
-#include <linux/oxu210hp.h>
diff --git a/include/linux/oxu210hp.h b/include/linux/oxu210hp.h
deleted file mode 100644
index 94cd25165c08..000000000000
--- a/include/linux/oxu210hp.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* platform data for the OXU210HP HCD */
-
-struct oxu210hp_platform_data {
-	unsigned int bus16:1;
-	unsigned int use_hcd_otg:1;
-	unsigned int use_hcd_sph:1;
-};
-- 
cgit v1.2.3


From 4a2b8560e3dff8637ccb09524650864f60ebab7f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 22 Jul 2019 08:51:46 +0200
Subject: tty: serial: netx: Delete driver

The Netx ARM machine was deleted from the kernel. This driver
had no users and has to go.

Cc: Robert Schwebel <r.schwebel@pengutronix.de>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20190722065146.4844-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  19 -
 drivers/tty/serial/Makefile      |   1 -
 drivers/tty/serial/netx-serial.c | 733 ---------------------------------------
 include/uapi/linux/serial_core.h |   3 -
 4 files changed, 756 deletions(-)
 delete mode 100644 drivers/tty/serial/netx-serial.c

(limited to 'include')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index fd385c8c53a5..3083dbae35f7 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1035,25 +1035,6 @@ config SERIAL_VT8500_CONSOLE
 	depends on SERIAL_VT8500=y
 	select SERIAL_CORE_CONSOLE
 
-config SERIAL_NETX
-	tristate "NetX serial port support"
-	depends on ARCH_NETX
-	select SERIAL_CORE
-	help
-	  If you have a machine based on a Hilscher NetX SoC you
-	  can enable its onboard serial port by enabling this option.
-
-          To compile this driver as a module, choose M here: the
-          module will be called netx-serial.
-
-config SERIAL_NETX_CONSOLE
-	bool "Console on NetX serial port"
-	depends on SERIAL_NETX=y
-	select SERIAL_CORE_CONSOLE
-	help
-	  If you have enabled the serial port on the Hilscher NetX SoC
-	  you can make it the console by answering Y to this option.
-
 config SERIAL_OMAP
 	tristate "OMAP serial port support"
 	depends on ARCH_OMAP2PLUS
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 7cd7cabfa6c4..15a0fccadf7e 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o
 obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
 obj-$(CONFIG_SERIAL_MSM) += msm_serial.o
 obj-$(CONFIG_SERIAL_QCOM_GENI) += qcom_geni_serial.o
-obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
 obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
diff --git a/drivers/tty/serial/netx-serial.c b/drivers/tty/serial/netx-serial.c
deleted file mode 100644
index b3556863491f..000000000000
--- a/drivers/tty/serial/netx-serial.c
+++ /dev/null
@@ -1,733 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#if defined(CONFIG_SERIAL_NETX_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/platform_device.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_core.h>
-#include <linux/serial.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-
-/* We've been assigned a range on the "Low-density serial ports" major */
-#define SERIAL_NX_MAJOR	204
-#define MINOR_START	170
-
-enum uart_regs {
-	UART_DR              = 0x00,
-	UART_SR              = 0x04,
-	UART_LINE_CR         = 0x08,
-	UART_BAUDDIV_MSB     = 0x0c,
-	UART_BAUDDIV_LSB     = 0x10,
-	UART_CR              = 0x14,
-	UART_FR              = 0x18,
-	UART_IIR             = 0x1c,
-	UART_ILPR            = 0x20,
-	UART_RTS_CR          = 0x24,
-	UART_RTS_LEAD        = 0x28,
-	UART_RTS_TRAIL       = 0x2c,
-	UART_DRV_ENABLE      = 0x30,
-	UART_BRM_CR          = 0x34,
-	UART_RXFIFO_IRQLEVEL = 0x38,
-	UART_TXFIFO_IRQLEVEL = 0x3c,
-};
-
-#define SR_FE (1<<0)
-#define SR_PE (1<<1)
-#define SR_BE (1<<2)
-#define SR_OE (1<<3)
-
-#define LINE_CR_BRK       (1<<0)
-#define LINE_CR_PEN       (1<<1)
-#define LINE_CR_EPS       (1<<2)
-#define LINE_CR_STP2      (1<<3)
-#define LINE_CR_FEN       (1<<4)
-#define LINE_CR_5BIT      (0<<5)
-#define LINE_CR_6BIT      (1<<5)
-#define LINE_CR_7BIT      (2<<5)
-#define LINE_CR_8BIT      (3<<5)
-#define LINE_CR_BITS_MASK (3<<5)
-
-#define CR_UART_EN (1<<0)
-#define CR_SIREN   (1<<1)
-#define CR_SIRLP   (1<<2)
-#define CR_MSIE    (1<<3)
-#define CR_RIE     (1<<4)
-#define CR_TIE     (1<<5)
-#define CR_RTIE    (1<<6)
-#define CR_LBE     (1<<7)
-
-#define FR_CTS  (1<<0)
-#define FR_DSR  (1<<1)
-#define FR_DCD  (1<<2)
-#define FR_BUSY (1<<3)
-#define FR_RXFE (1<<4)
-#define FR_TXFF (1<<5)
-#define FR_RXFF (1<<6)
-#define FR_TXFE (1<<7)
-
-#define IIR_MIS (1<<0)
-#define IIR_RIS (1<<1)
-#define IIR_TIS (1<<2)
-#define IIR_RTIS (1<<3)
-#define IIR_MASK 0xf
-
-#define RTS_CR_AUTO (1<<0)
-#define RTS_CR_RTS  (1<<1)
-#define RTS_CR_COUNT (1<<2)
-#define RTS_CR_MOD2  (1<<3)
-#define RTS_CR_RTS_POL (1<<4)
-#define RTS_CR_CTS_CTR (1<<5)
-#define RTS_CR_CTS_POL (1<<6)
-#define RTS_CR_STICK   (1<<7)
-
-#define UART_PORT_SIZE 0x40
-#define DRIVER_NAME "netx-uart"
-
-struct netx_port {
-	struct uart_port	port;
-};
-
-static void netx_stop_tx(struct uart_port *port)
-{
-	unsigned int val;
-	val = readl(port->membase + UART_CR);
-	writel(val & ~CR_TIE,  port->membase + UART_CR);
-}
-
-static void netx_stop_rx(struct uart_port *port)
-{
-	unsigned int val;
-	val = readl(port->membase + UART_CR);
-	writel(val & ~CR_RIE,  port->membase + UART_CR);
-}
-
-static void netx_enable_ms(struct uart_port *port)
-{
-	unsigned int val;
-	val = readl(port->membase + UART_CR);
-	writel(val | CR_MSIE, port->membase + UART_CR);
-}
-
-static inline void netx_transmit_buffer(struct uart_port *port)
-{
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if (port->x_char) {
-		writel(port->x_char, port->membase + UART_DR);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_tx_stopped(port) || uart_circ_empty(xmit)) {
-		netx_stop_tx(port);
-		return;
-	}
-
-	do {
-		/* send xmit->buf[xmit->tail]
-		 * out the port here */
-		writel(xmit->buf[xmit->tail], port->membase + UART_DR);
-		xmit->tail = (xmit->tail + 1) &
-		         (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (!(readl(port->membase + UART_FR) & FR_TXFF));
-
-	if (uart_circ_empty(xmit))
-		netx_stop_tx(port);
-}
-
-static void netx_start_tx(struct uart_port *port)
-{
-	writel(
-	    readl(port->membase + UART_CR) | CR_TIE, port->membase + UART_CR);
-
-	if (!(readl(port->membase + UART_FR) & FR_TXFF))
-		netx_transmit_buffer(port);
-}
-
-static unsigned int netx_tx_empty(struct uart_port *port)
-{
-	return readl(port->membase + UART_FR) & FR_BUSY ? 0 : TIOCSER_TEMT;
-}
-
-static void netx_txint(struct uart_port *port)
-{
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		netx_stop_tx(port);
-		return;
-	}
-
-	netx_transmit_buffer(port);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-}
-
-static void netx_rxint(struct uart_port *port, unsigned long *flags)
-{
-	unsigned char rx, flg, status;
-
-	while (!(readl(port->membase + UART_FR) & FR_RXFE)) {
-		rx = readl(port->membase + UART_DR);
-		flg = TTY_NORMAL;
-		port->icount.rx++;
-		status = readl(port->membase + UART_SR);
-		if (status & SR_BE) {
-			writel(0, port->membase + UART_SR);
-			if (uart_handle_break(port))
-				continue;
-		}
-
-		if (unlikely(status & (SR_FE | SR_PE | SR_OE))) {
-
-			if (status & SR_PE)
-				port->icount.parity++;
-			else if (status & SR_FE)
-				port->icount.frame++;
-			if (status & SR_OE)
-				port->icount.overrun++;
-
-			status &= port->read_status_mask;
-
-			if (status & SR_BE)
-				flg = TTY_BREAK;
-			else if (status & SR_PE)
-				flg = TTY_PARITY;
-			else if (status & SR_FE)
-				flg = TTY_FRAME;
-		}
-
-		if (uart_handle_sysrq_char(port, rx))
-			continue;
-
-		uart_insert_char(port, status, SR_OE, rx, flg);
-	}
-
-	spin_unlock_irqrestore(&port->lock, *flags);
-	tty_flip_buffer_push(&port->state->port);
-	spin_lock_irqsave(&port->lock, *flags);
-}
-
-static irqreturn_t netx_int(int irq, void *dev_id)
-{
-	struct uart_port *port = dev_id;
-	unsigned long flags;
-	unsigned char status;
-
-	spin_lock_irqsave(&port->lock,flags);
-
-	status = readl(port->membase + UART_IIR) & IIR_MASK;
-	while (status) {
-		if (status & IIR_RIS)
-			netx_rxint(port, &flags);
-		if (status & IIR_TIS)
-			netx_txint(port);
-		if (status & IIR_MIS) {
-			if (readl(port->membase + UART_FR) & FR_CTS)
-				uart_handle_cts_change(port, 1);
-			else
-				uart_handle_cts_change(port, 0);
-		}
-		writel(0, port->membase + UART_IIR);
-		status = readl(port->membase + UART_IIR) & IIR_MASK;
-	}
-
-	spin_unlock_irqrestore(&port->lock,flags);
-	return IRQ_HANDLED;
-}
-
-static unsigned int netx_get_mctrl(struct uart_port *port)
-{
-	unsigned int ret = TIOCM_DSR | TIOCM_CAR;
-
-	if (readl(port->membase + UART_FR) & FR_CTS)
-		ret |= TIOCM_CTS;
-
-	return ret;
-}
-
-static void netx_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-	unsigned int val;
-
-	/* FIXME: Locking needed ? */
-	if (mctrl & TIOCM_RTS) {
-		val = readl(port->membase + UART_RTS_CR);
-		writel(val | RTS_CR_RTS, port->membase + UART_RTS_CR);
-	}
-}
-
-static void netx_break_ctl(struct uart_port *port, int break_state)
-{
-	unsigned int line_cr;
-	spin_lock_irq(&port->lock);
-
-	line_cr = readl(port->membase + UART_LINE_CR);
-	if (break_state != 0)
-		line_cr |= LINE_CR_BRK;
-	else
-		line_cr &= ~LINE_CR_BRK;
-	writel(line_cr, port->membase + UART_LINE_CR);
-
-	spin_unlock_irq(&port->lock);
-}
-
-static int netx_startup(struct uart_port *port)
-{
-	int ret;
-
-	ret = request_irq(port->irq, netx_int, 0,
-			     DRIVER_NAME, port);
-	if (ret) {
-		dev_err(port->dev, "unable to grab irq%d\n",port->irq);
-		goto exit;
-	}
-
-	writel(readl(port->membase + UART_LINE_CR) | LINE_CR_FEN,
-		port->membase + UART_LINE_CR);
-
-	writel(CR_MSIE | CR_RIE | CR_TIE | CR_RTIE | CR_UART_EN,
-		port->membase + UART_CR);
-
-exit:
-	return ret;
-}
-
-static void netx_shutdown(struct uart_port *port)
-{
-	writel(0, port->membase + UART_CR) ;
-
-	free_irq(port->irq, port);
-}
-
-static void
-netx_set_termios(struct uart_port *port, struct ktermios *termios,
-		   struct ktermios *old)
-{
-	unsigned int baud, quot;
-	unsigned char old_cr;
-	unsigned char line_cr = LINE_CR_FEN;
-	unsigned char rts_cr = 0;
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		line_cr |= LINE_CR_5BIT;
-		break;
-	case CS6:
-		line_cr |= LINE_CR_6BIT;
-		break;
-	case CS7:
-		line_cr |= LINE_CR_7BIT;
-		break;
-	case CS8:
-		line_cr |= LINE_CR_8BIT;
-		break;
-	}
-
-	if (termios->c_cflag & CSTOPB)
-		line_cr |= LINE_CR_STP2;
-
-	if (termios->c_cflag & PARENB) {
-		line_cr |= LINE_CR_PEN;
-		if (!(termios->c_cflag & PARODD))
-			line_cr |= LINE_CR_EPS;
-	}
-
-	if (termios->c_cflag & CRTSCTS)
-		rts_cr = RTS_CR_AUTO | RTS_CR_CTS_CTR | RTS_CR_RTS_POL;
-
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = baud * 4096;
-	quot /= 1000;
-	quot *= 256;
-	quot /= 100000;
-
-	spin_lock_irq(&port->lock);
-
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	old_cr = readl(port->membase + UART_CR);
-
-	/* disable interrupts */
-	writel(old_cr & ~(CR_MSIE | CR_RIE | CR_TIE | CR_RTIE),
-		port->membase + UART_CR);
-
-	/* drain transmitter */
-	while (readl(port->membase + UART_FR) & FR_BUSY);
-
-	/* disable UART */
-	writel(old_cr & ~CR_UART_EN, port->membase + UART_CR);
-
-	/* modem status interrupts */
-	old_cr &= ~CR_MSIE;
-	if (UART_ENABLE_MS(port, termios->c_cflag))
-		old_cr |= CR_MSIE;
-
-	writel((quot>>8) & 0xff, port->membase + UART_BAUDDIV_MSB);
-	writel(quot & 0xff, port->membase + UART_BAUDDIV_LSB);
-	writel(line_cr, port->membase + UART_LINE_CR);
-
-	writel(rts_cr, port->membase + UART_RTS_CR);
-
-	/*
-	 * Characters to ignore
-	 */
-	port->ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		port->ignore_status_mask |= SR_PE;
-	if (termios->c_iflag & IGNBRK) {
-		port->ignore_status_mask |= SR_BE;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			port->ignore_status_mask |= SR_PE;
-	}
-
-	port->read_status_mask = 0;
-	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
-		port->read_status_mask |= SR_BE;
-	if (termios->c_iflag & INPCK)
-		port->read_status_mask |= SR_PE | SR_FE;
-
-	writel(old_cr, port->membase + UART_CR);
-
-	spin_unlock_irq(&port->lock);
-}
-
-static const char *netx_type(struct uart_port *port)
-{
-	return port->type == PORT_NETX ? "NETX" : NULL;
-}
-
-static void netx_release_port(struct uart_port *port)
-{
-	release_mem_region(port->mapbase, UART_PORT_SIZE);
-}
-
-static int netx_request_port(struct uart_port *port)
-{
-	return request_mem_region(port->mapbase, UART_PORT_SIZE,
-			DRIVER_NAME) != NULL ? 0 : -EBUSY;
-}
-
-static void netx_config_port(struct uart_port *port, int flags)
-{
-	if (flags & UART_CONFIG_TYPE && netx_request_port(port) == 0)
-		port->type = PORT_NETX;
-}
-
-static int
-netx_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	int ret = 0;
-
-	if (ser->type != PORT_UNKNOWN && ser->type != PORT_NETX)
-		ret = -EINVAL;
-
-	return ret;
-}
-
-static struct uart_ops netx_pops = {
-	.tx_empty	= netx_tx_empty,
-	.set_mctrl	= netx_set_mctrl,
-	.get_mctrl	= netx_get_mctrl,
-	.stop_tx	= netx_stop_tx,
-	.start_tx	= netx_start_tx,
-	.stop_rx	= netx_stop_rx,
-	.enable_ms	= netx_enable_ms,
-	.break_ctl	= netx_break_ctl,
-	.startup	= netx_startup,
-	.shutdown	= netx_shutdown,
-	.set_termios	= netx_set_termios,
-	.type		= netx_type,
-	.release_port	= netx_release_port,
-	.request_port	= netx_request_port,
-	.config_port	= netx_config_port,
-	.verify_port	= netx_verify_port,
-};
-
-static struct netx_port netx_ports[] = {
-	{
-	.port = {
-		.type = PORT_NETX,
-		.iotype = UPIO_MEM,
-		.membase = (char __iomem *)io_p2v(NETX_PA_UART0),
-		.mapbase = NETX_PA_UART0,
-		.irq = NETX_IRQ_UART0,
-		.uartclk = 100000000,
-		.fifosize = 16,
-		.flags = UPF_BOOT_AUTOCONF,
-		.ops = &netx_pops,
-		.line = 0,
-	},
-	}, {
-	.port = {
-		.type = PORT_NETX,
-		.iotype = UPIO_MEM,
-		.membase = (char __iomem *)io_p2v(NETX_PA_UART1),
-		.mapbase = NETX_PA_UART1,
-		.irq = NETX_IRQ_UART1,
-		.uartclk = 100000000,
-		.fifosize = 16,
-		.flags = UPF_BOOT_AUTOCONF,
-		.ops = &netx_pops,
-		.line = 1,
-	},
-	}, {
-	.port = {
-		.type = PORT_NETX,
-		.iotype = UPIO_MEM,
-		.membase = (char __iomem *)io_p2v(NETX_PA_UART2),
-		.mapbase = NETX_PA_UART2,
-		.irq = NETX_IRQ_UART2,
-		.uartclk = 100000000,
-		.fifosize = 16,
-		.flags = UPF_BOOT_AUTOCONF,
-		.ops = &netx_pops,
-		.line = 2,
-	},
-	}
-};
-
-#ifdef CONFIG_SERIAL_NETX_CONSOLE
-
-static void netx_console_putchar(struct uart_port *port, int ch)
-{
-	while (readl(port->membase + UART_FR) & FR_BUSY);
-	writel(ch, port->membase + UART_DR);
-}
-
-static void
-netx_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct uart_port *port = &netx_ports[co->index].port;
-	unsigned char cr_save;
-
-	cr_save = readl(port->membase + UART_CR);
-	writel(cr_save | CR_UART_EN, port->membase + UART_CR);
-
-	uart_console_write(port, s, count, netx_console_putchar);
-
-	while (readl(port->membase + UART_FR) & FR_BUSY);
-	writel(cr_save, port->membase + UART_CR);
-}
-
-static void __init
-netx_console_get_options(struct uart_port *port, int *baud,
-			int *parity, int *bits, int *flow)
-{
-	unsigned char line_cr;
-
-	*baud = (readl(port->membase + UART_BAUDDIV_MSB) << 8) |
-		readl(port->membase + UART_BAUDDIV_LSB);
-	*baud *= 1000;
-	*baud /= 4096;
-	*baud *= 1000;
-	*baud /= 256;
-	*baud *= 100;
-
-	line_cr = readl(port->membase + UART_LINE_CR);
-	*parity = 'n';
-	if (line_cr & LINE_CR_PEN) {
-		if (line_cr & LINE_CR_EPS)
-			*parity = 'e';
-		else
-			*parity = 'o';
-	}
-
-	switch (line_cr & LINE_CR_BITS_MASK) {
-	case LINE_CR_8BIT:
-		*bits = 8;
-		break;
-	case LINE_CR_7BIT:
-		*bits = 7;
-		break;
-	case LINE_CR_6BIT:
-		*bits = 6;
-		break;
-	case LINE_CR_5BIT:
-		*bits = 5;
-		break;
-	}
-
-	if (readl(port->membase + UART_RTS_CR) & RTS_CR_AUTO)
-		*flow = 'r';
-}
-
-static int __init
-netx_console_setup(struct console *co, char *options)
-{
-	struct netx_port *sport;
-	int baud = 9600;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	if (co->index == -1 || co->index >= ARRAY_SIZE(netx_ports))
-		co->index = 0;
-	sport = &netx_ports[co->index];
-
-	if (options) {
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-	} else {
-		/* if the UART is enabled, assume it has been correctly setup
-		 * by the bootloader and get the options
-		 */
-		if (readl(sport->port.membase + UART_CR) & CR_UART_EN) {
-			netx_console_get_options(&sport->port, &baud,
-			&parity, &bits, &flow);
-		}
-
-	}
-
-	return uart_set_options(&sport->port, co, baud, parity, bits, flow);
-}
-
-static struct uart_driver netx_reg;
-static struct console netx_console = {
-	.name		= "ttyNX",
-	.write		= netx_console_write,
-	.device		= uart_console_device,
-	.setup		= netx_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &netx_reg,
-};
-
-static int __init netx_console_init(void)
-{
-	register_console(&netx_console);
-	return 0;
-}
-console_initcall(netx_console_init);
-
-#define NETX_CONSOLE	&netx_console
-#else
-#define NETX_CONSOLE	NULL
-#endif
-
-static struct uart_driver netx_reg = {
-	.owner          = THIS_MODULE,
-	.driver_name    = DRIVER_NAME,
-	.dev_name       = "ttyNX",
-	.major          = SERIAL_NX_MAJOR,
-	.minor          = MINOR_START,
-	.nr             = ARRAY_SIZE(netx_ports),
-	.cons           = NETX_CONSOLE,
-};
-
-static int serial_netx_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	struct netx_port *sport = platform_get_drvdata(pdev);
-
-	if (sport)
-		uart_suspend_port(&netx_reg, &sport->port);
-
-	return 0;
-}
-
-static int serial_netx_resume(struct platform_device *pdev)
-{
-	struct netx_port *sport = platform_get_drvdata(pdev);
-
-	if (sport)
-		uart_resume_port(&netx_reg, &sport->port);
-
-	return 0;
-}
-
-static int serial_netx_probe(struct platform_device *pdev)
-{
-	struct uart_port *port = &netx_ports[pdev->id].port;
-
-	dev_info(&pdev->dev, "initialising\n");
-
-	port->dev = &pdev->dev;
-
-	writel(1, port->membase + UART_RXFIFO_IRQLEVEL);
-	uart_add_one_port(&netx_reg, &netx_ports[pdev->id].port);
-	platform_set_drvdata(pdev, &netx_ports[pdev->id]);
-
-	return 0;
-}
-
-static int serial_netx_remove(struct platform_device *pdev)
-{
-	struct netx_port *sport = platform_get_drvdata(pdev);
-
-	if (sport)
-		uart_remove_one_port(&netx_reg, &sport->port);
-
-	return 0;
-}
-
-static struct platform_driver serial_netx_driver = {
-	.probe          = serial_netx_probe,
-	.remove         = serial_netx_remove,
-
-	.suspend	= serial_netx_suspend,
-	.resume		= serial_netx_resume,
-
-	.driver		= {
-		.name   = DRIVER_NAME,
-	},
-};
-
-static int __init netx_serial_init(void)
-{
-	int ret;
-
-	printk(KERN_INFO "Serial: NetX driver\n");
-
-	ret = uart_register_driver(&netx_reg);
-	if (ret)
-		return ret;
-
-	ret = platform_driver_register(&serial_netx_driver);
-	if (ret != 0)
-		uart_unregister_driver(&netx_reg);
-
-	return 0;
-}
-
-static void __exit netx_serial_exit(void)
-{
-	platform_driver_unregister(&serial_netx_driver);
-	uart_unregister_driver(&netx_reg);
-}
-
-module_init(netx_serial_init);
-module_exit(netx_serial_exit);
-
-MODULE_AUTHOR("Sascha Hauer");
-MODULE_DESCRIPTION("NetX serial port driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 5642c05e0da0..3cc3af1c2ee1 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -150,9 +150,6 @@
 
 #define PORT_PNX8XXX	70
 
-/* Hilscher netx */
-#define PORT_NETX	71
-
 /* SUN4V Hypervisor Console */
 #define PORT_SUNHV	72
 
-- 
cgit v1.2.3


From 173f6eacc8a89ae4b08b166735930e6c46951a81 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Sat, 20 Jul 2019 07:47:07 -0400
Subject: media: v4l: ctrls: Add debug messages

Currently, the v4l2 control code is a bit silent on errors.
Add debug messages on (hopefully) most of the error paths.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/kapi/v4l2-dev.rst      |   1 +
 drivers/media/platform/omap3isp/ispvideo.c |   4 +-
 drivers/media/v4l2-core/v4l2-ctrls.c       | 126 +++++++++++++++++++++++------
 drivers/media/v4l2-core/v4l2-ioctl.c       |  18 +++--
 drivers/media/v4l2-core/v4l2-subdev.c      |   6 +-
 include/media/v4l2-ctrls.h                 |   9 ++-
 include/media/v4l2-ioctl.h                 |   2 +
 7 files changed, 127 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/kapi/v4l2-dev.rst b/Documentation/media/kapi/v4l2-dev.rst
index b359f1804bbe..4c5a15c53dbf 100644
--- a/Documentation/media/kapi/v4l2-dev.rst
+++ b/Documentation/media/kapi/v4l2-dev.rst
@@ -288,6 +288,7 @@ Mask  Description
 0x08  Log the read and write file operations and the VIDIOC_QBUF and
       VIDIOC_DQBUF ioctls.
 0x10  Log the poll file operation.
+0x20  Log error and messages in the control operations.
 ===== ================================================================
 
 Video device cleanup
diff --git a/drivers/media/platform/omap3isp/ispvideo.c b/drivers/media/platform/omap3isp/ispvideo.c
index b52c6fe825ac..ee183c35ff3b 100644
--- a/drivers/media/platform/omap3isp/ispvideo.c
+++ b/drivers/media/platform/omap3isp/ispvideo.c
@@ -1020,8 +1020,8 @@ static int isp_video_check_external_subdevs(struct isp_video *video,
 
 	ctrls.count = 1;
 	ctrls.controls = &ctrl;
-
-	ret = v4l2_g_ext_ctrls(pipe->external->ctrl_handler, NULL, &ctrls);
+	ret = v4l2_g_ext_ctrls(pipe->external->ctrl_handler, &video->video,
+			       NULL, &ctrls);
 	if (ret < 0) {
 		dev_warn(isp->dev, "no pixel rate control in subdev %s\n",
 			 pipe->external->name);
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 13236c191796..76fa2db0e8fb 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -6,6 +6,8 @@
 
  */
 
+#define pr_fmt(fmt) "v4l2-ctrls: " fmt
+
 #include <linux/ctype.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
@@ -16,6 +18,12 @@
 #include <media/v4l2-event.h>
 #include <media/v4l2-dev.h>
 
+#define dprintk(vdev, fmt, arg...) do {					\
+	if (!WARN_ON(!(vdev)) && ((vdev)->dev_debug & V4L2_DEV_DEBUG_CTRL)) \
+		printk(KERN_DEBUG pr_fmt("%s: %s: " fmt),		\
+		       __func__, video_device_node_name(vdev), ##arg);	\
+} while (0)
+
 #define has_op(master, op) \
 	(master->ops && master->ops->op)
 #define call_op(master, op) \
@@ -3260,6 +3268,7 @@ static int v4l2_ctrl_request_bind(struct media_request *req,
 static int prepare_ext_ctrls(struct v4l2_ctrl_handler *hdl,
 			     struct v4l2_ext_controls *cs,
 			     struct v4l2_ctrl_helper *helpers,
+			     struct video_device *vdev,
 			     bool get)
 {
 	struct v4l2_ctrl_helper *h;
@@ -3277,20 +3286,31 @@ static int prepare_ext_ctrls(struct v4l2_ctrl_handler *hdl,
 		if (cs->which &&
 		    cs->which != V4L2_CTRL_WHICH_DEF_VAL &&
 		    cs->which != V4L2_CTRL_WHICH_REQUEST_VAL &&
-		    V4L2_CTRL_ID2WHICH(id) != cs->which)
+		    V4L2_CTRL_ID2WHICH(id) != cs->which) {
+			dprintk(vdev,
+				"invalid which 0x%x or control id 0x%x\n",
+				cs->which, id);
 			return -EINVAL;
+		}
 
 		/* Old-style private controls are not allowed for
 		   extended controls */
-		if (id >= V4L2_CID_PRIVATE_BASE)
+		if (id >= V4L2_CID_PRIVATE_BASE) {
+			dprintk(vdev,
+				"old-style private controls not allowed\n");
 			return -EINVAL;
+		}
 		ref = find_ref_lock(hdl, id);
-		if (ref == NULL)
+		if (ref == NULL) {
+			dprintk(vdev, "cannot find control id 0x%x\n", id);
 			return -EINVAL;
+		}
 		h->ref = ref;
 		ctrl = ref->ctrl;
-		if (ctrl->flags & V4L2_CTRL_FLAG_DISABLED)
+		if (ctrl->flags & V4L2_CTRL_FLAG_DISABLED) {
+			dprintk(vdev, "control id 0x%x is disabled\n", id);
 			return -EINVAL;
+		}
 
 		if (ctrl->cluster[0]->ncontrols > 1)
 			have_clusters = true;
@@ -3300,10 +3320,17 @@ static int prepare_ext_ctrls(struct v4l2_ctrl_handler *hdl,
 			unsigned tot_size = ctrl->elems * ctrl->elem_size;
 
 			if (c->size < tot_size) {
+				/*
+				 * In the get case the application first
+				 * queries to obtain the size of the control.
+				 */
 				if (get) {
 					c->size = tot_size;
 					return -ENOSPC;
 				}
+				dprintk(vdev,
+					"pointer control id 0x%x size too small, %d bytes but %d bytes needed\n",
+					id, c->size, tot_size);
 				return -EFAULT;
 			}
 			c->size = tot_size;
@@ -3364,7 +3391,8 @@ static int class_check(struct v4l2_ctrl_handler *hdl, u32 which)
 
 /* Get extended controls. Allocates the helpers array if needed. */
 static int v4l2_g_ext_ctrls_common(struct v4l2_ctrl_handler *hdl,
-				   struct v4l2_ext_controls *cs)
+				   struct v4l2_ext_controls *cs,
+				   struct video_device *vdev)
 {
 	struct v4l2_ctrl_helper helper[4];
 	struct v4l2_ctrl_helper *helpers = helper;
@@ -3390,7 +3418,7 @@ static int v4l2_g_ext_ctrls_common(struct v4l2_ctrl_handler *hdl,
 			return -ENOMEM;
 	}
 
-	ret = prepare_ext_ctrls(hdl, cs, helpers, true);
+	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, true);
 	cs->error_idx = cs->count;
 
 	for (i = 0; !ret && i < cs->count; i++)
@@ -3483,8 +3511,8 @@ v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl,
 	return obj;
 }
 
-int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct media_device *mdev,
-		     struct v4l2_ext_controls *cs)
+int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
+		     struct media_device *mdev, struct v4l2_ext_controls *cs)
 {
 	struct media_request_object *obj = NULL;
 	struct media_request *req = NULL;
@@ -3520,7 +3548,7 @@ int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct media_device *mdev,
 				   req_obj);
 	}
 
-	ret = v4l2_g_ext_ctrls_common(hdl, cs);
+	ret = v4l2_g_ext_ctrls_common(hdl, cs, vdev);
 
 	if (obj) {
 		media_request_unlock_for_access(req);
@@ -3663,7 +3691,9 @@ static int try_or_set_cluster(struct v4l2_fh *fh, struct v4l2_ctrl *master,
 
 /* Validate controls. */
 static int validate_ctrls(struct v4l2_ext_controls *cs,
-			  struct v4l2_ctrl_helper *helpers, bool set)
+			  struct v4l2_ctrl_helper *helpers,
+			  struct video_device *vdev,
+			  bool set)
 {
 	unsigned i;
 	int ret = 0;
@@ -3675,16 +3705,24 @@ static int validate_ctrls(struct v4l2_ext_controls *cs,
 
 		cs->error_idx = i;
 
-		if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY)
+		if (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY) {
+			dprintk(vdev,
+				"control id 0x%x is read-only\n",
+				ctrl->id);
 			return -EACCES;
+		}
 		/* This test is also done in try_set_control_cluster() which
 		   is called in atomic context, so that has the final say,
 		   but it makes sense to do an up-front check as well. Once
 		   an error occurs in try_set_control_cluster() some other
 		   controls may have been set already and we want to do a
 		   best-effort to avoid that. */
-		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED))
+		if (set && (ctrl->flags & V4L2_CTRL_FLAG_GRABBED)) {
+			dprintk(vdev,
+				"control id 0x%x is grabbed, cannot set\n",
+				ctrl->id);
 			return -EBUSY;
+		}
 		/*
 		 * Skip validation for now if the payload needs to be copied
 		 * from userspace into kernelspace. We'll validate those later.
@@ -3719,7 +3757,8 @@ static void update_from_auto_cluster(struct v4l2_ctrl *master)
 /* Try or try-and-set controls */
 static int try_set_ext_ctrls_common(struct v4l2_fh *fh,
 				    struct v4l2_ctrl_handler *hdl,
-				    struct v4l2_ext_controls *cs, bool set)
+				    struct v4l2_ext_controls *cs,
+				    struct video_device *vdev, bool set)
 {
 	struct v4l2_ctrl_helper helper[4];
 	struct v4l2_ctrl_helper *helpers = helper;
@@ -3729,13 +3768,19 @@ static int try_set_ext_ctrls_common(struct v4l2_fh *fh,
 	cs->error_idx = cs->count;
 
 	/* Default value cannot be changed */
-	if (cs->which == V4L2_CTRL_WHICH_DEF_VAL)
+	if (cs->which == V4L2_CTRL_WHICH_DEF_VAL) {
+		dprintk(vdev, "%s: cannot change default value\n",
+			video_device_node_name(vdev));
 		return -EINVAL;
+	}
 
 	cs->which = V4L2_CTRL_ID2WHICH(cs->which);
 
-	if (hdl == NULL)
+	if (hdl == NULL) {
+		dprintk(vdev, "%s: invalid null control handler\n",
+			video_device_node_name(vdev));
 		return -EINVAL;
+	}
 
 	if (cs->count == 0)
 		return class_check(hdl, cs->which);
@@ -3746,9 +3791,9 @@ static int try_set_ext_ctrls_common(struct v4l2_fh *fh,
 		if (!helpers)
 			return -ENOMEM;
 	}
-	ret = prepare_ext_ctrls(hdl, cs, helpers, false);
+	ret = prepare_ext_ctrls(hdl, cs, helpers, vdev, false);
 	if (!ret)
-		ret = validate_ctrls(cs, helpers, set);
+		ret = validate_ctrls(cs, helpers, vdev, set);
 	if (ret && set)
 		cs->error_idx = cs->count;
 	for (i = 0; !ret && i < cs->count; i++) {
@@ -3833,7 +3878,9 @@ static int try_set_ext_ctrls_common(struct v4l2_fh *fh,
 }
 
 static int try_set_ext_ctrls(struct v4l2_fh *fh,
-			     struct v4l2_ctrl_handler *hdl, struct media_device *mdev,
+			     struct v4l2_ctrl_handler *hdl,
+			     struct video_device *vdev,
+			     struct media_device *mdev,
 			     struct v4l2_ext_controls *cs, bool set)
 {
 	struct media_request_object *obj = NULL;
@@ -3841,21 +3888,39 @@ static int try_set_ext_ctrls(struct v4l2_fh *fh,
 	int ret;
 
 	if (cs->which == V4L2_CTRL_WHICH_REQUEST_VAL) {
-		if (!mdev || cs->request_fd < 0)
+		if (!mdev) {
+			dprintk(vdev, "%s: missing media device\n",
+				video_device_node_name(vdev));
+			return -EINVAL;
+		}
+
+		if (cs->request_fd < 0) {
+			dprintk(vdev, "%s: invalid request fd %d\n",
+				video_device_node_name(vdev), cs->request_fd);
 			return -EINVAL;
+		}
 
 		req = media_request_get_by_fd(mdev, cs->request_fd);
-		if (IS_ERR(req))
+		if (IS_ERR(req)) {
+			dprintk(vdev, "%s: cannot find request fd %d\n",
+				video_device_node_name(vdev), cs->request_fd);
 			return PTR_ERR(req);
+		}
 
 		ret = media_request_lock_for_update(req);
 		if (ret) {
+			dprintk(vdev, "%s: cannot lock request fd %d\n",
+				video_device_node_name(vdev), cs->request_fd);
 			media_request_put(req);
 			return ret;
 		}
 
 		obj = v4l2_ctrls_find_req_obj(hdl, req, set);
 		if (IS_ERR(obj)) {
+			dprintk(vdev,
+				"%s: cannot find request object for request fd %d\n",
+				video_device_node_name(vdev),
+				cs->request_fd);
 			media_request_unlock_for_update(req);
 			media_request_put(req);
 			return PTR_ERR(obj);
@@ -3864,7 +3929,11 @@ static int try_set_ext_ctrls(struct v4l2_fh *fh,
 				   req_obj);
 	}
 
-	ret = try_set_ext_ctrls_common(fh, hdl, cs, set);
+	ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set);
+	if (ret)
+		dprintk(vdev,
+			"%s: try_set_ext_ctrls_common failed (%d)\n",
+			video_device_node_name(vdev), ret);
 
 	if (obj) {
 		media_request_unlock_for_update(req);
@@ -3875,17 +3944,22 @@ static int try_set_ext_ctrls(struct v4l2_fh *fh,
 	return ret;
 }
 
-int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct media_device *mdev,
+int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl,
+		       struct video_device *vdev,
+		       struct media_device *mdev,
 		       struct v4l2_ext_controls *cs)
 {
-	return try_set_ext_ctrls(NULL, hdl, mdev, cs, false);
+	return try_set_ext_ctrls(NULL, hdl, vdev, mdev, cs, false);
 }
 EXPORT_SYMBOL(v4l2_try_ext_ctrls);
 
-int v4l2_s_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
-		     struct media_device *mdev, struct v4l2_ext_controls *cs)
+int v4l2_s_ext_ctrls(struct v4l2_fh *fh,
+		     struct v4l2_ctrl_handler *hdl,
+		     struct video_device *vdev,
+		     struct media_device *mdev,
+		     struct v4l2_ext_controls *cs)
 {
-	return try_set_ext_ctrls(fh, hdl, mdev, cs, true);
+	return try_set_ext_ctrls(fh, hdl, vdev, mdev, cs, true);
 }
 EXPORT_SYMBOL(v4l2_s_ext_ctrls);
 
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index e36629ae2203..38765af9ad2d 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2186,9 +2186,11 @@ static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops,
 
 	p->error_idx = p->count;
 	if (vfh && vfh->ctrl_handler)
-		return v4l2_g_ext_ctrls(vfh->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_g_ext_ctrls(vfh->ctrl_handler,
+					vfd, vfd->v4l2_dev->mdev, p);
 	if (vfd->ctrl_handler)
-		return v4l2_g_ext_ctrls(vfd->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_g_ext_ctrls(vfd->ctrl_handler,
+					vfd, vfd->v4l2_dev->mdev, p);
 	if (ops->vidioc_g_ext_ctrls == NULL)
 		return -ENOTTY;
 	return check_ext_ctrls(p, 0) ? ops->vidioc_g_ext_ctrls(file, fh, p) :
@@ -2205,9 +2207,11 @@ static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops,
 
 	p->error_idx = p->count;
 	if (vfh && vfh->ctrl_handler)
-		return v4l2_s_ext_ctrls(vfh, vfh->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_s_ext_ctrls(vfh, vfh->ctrl_handler,
+					vfd, vfd->v4l2_dev->mdev, p);
 	if (vfd->ctrl_handler)
-		return v4l2_s_ext_ctrls(NULL, vfd->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_s_ext_ctrls(NULL, vfd->ctrl_handler,
+					vfd, vfd->v4l2_dev->mdev, p);
 	if (ops->vidioc_s_ext_ctrls == NULL)
 		return -ENOTTY;
 	return check_ext_ctrls(p, 0) ? ops->vidioc_s_ext_ctrls(file, fh, p) :
@@ -2224,9 +2228,11 @@ static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops,
 
 	p->error_idx = p->count;
 	if (vfh && vfh->ctrl_handler)
-		return v4l2_try_ext_ctrls(vfh->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_try_ext_ctrls(vfh->ctrl_handler,
+					  vfd, vfd->v4l2_dev->mdev, p);
 	if (vfd->ctrl_handler)
-		return v4l2_try_ext_ctrls(vfd->ctrl_handler, vfd->v4l2_dev->mdev, p);
+		return v4l2_try_ext_ctrls(vfd->ctrl_handler,
+					  vfd, vfd->v4l2_dev->mdev, p);
 	if (ops->vidioc_try_ext_ctrls == NULL)
 		return -ENOTTY;
 	return check_ext_ctrls(p, 0) ? ops->vidioc_try_ext_ctrls(file, fh, p) :
diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c
index 25c73c13cc7e..f725cd9b66b9 100644
--- a/drivers/media/v4l2-core/v4l2-subdev.c
+++ b/drivers/media/v4l2-core/v4l2-subdev.c
@@ -372,19 +372,19 @@ static long subdev_do_ioctl(struct file *file, unsigned int cmd, void *arg)
 		if (!vfh->ctrl_handler)
 			return -ENOTTY;
 		return v4l2_g_ext_ctrls(vfh->ctrl_handler,
-					sd->v4l2_dev->mdev, arg);
+					vdev, sd->v4l2_dev->mdev, arg);
 
 	case VIDIOC_S_EXT_CTRLS:
 		if (!vfh->ctrl_handler)
 			return -ENOTTY;
 		return v4l2_s_ext_ctrls(vfh, vfh->ctrl_handler,
-					sd->v4l2_dev->mdev, arg);
+					vdev, sd->v4l2_dev->mdev, arg);
 
 	case VIDIOC_TRY_EXT_CTRLS:
 		if (!vfh->ctrl_handler)
 			return -ENOTTY;
 		return v4l2_try_ext_ctrls(vfh->ctrl_handler,
-					  sd->v4l2_dev->mdev, arg);
+					  vdev, sd->v4l2_dev->mdev, arg);
 
 	case VIDIOC_DQEVENT:
 		if (!(sd->flags & V4L2_SUBDEV_FL_HAS_EVENTS))
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 6e9dc9c44bb1..570ff4b0205a 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -1268,25 +1268,28 @@ int v4l2_s_ctrl(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
  *	:ref:`VIDIOC_G_EXT_CTRLS <vidioc_g_ext_ctrls>` ioctl
  *
  * @hdl: pointer to &struct v4l2_ctrl_handler
+ * @vdev: pointer to &struct video_device
  * @mdev: pointer to &struct media_device
  * @c: pointer to &struct v4l2_ext_controls
  *
  * If hdl == NULL then they will all return -EINVAL.
  */
-int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct media_device *mdev,
-		     struct v4l2_ext_controls *c);
+int v4l2_g_ext_ctrls(struct v4l2_ctrl_handler *hdl, struct video_device *vdev,
+		     struct media_device *mdev, struct v4l2_ext_controls *c);
 
 /**
  * v4l2_try_ext_ctrls - Helper function to implement
  *	:ref:`VIDIOC_TRY_EXT_CTRLS <vidioc_g_ext_ctrls>` ioctl
  *
  * @hdl: pointer to &struct v4l2_ctrl_handler
+ * @vdev: pointer to &struct video_device
  * @mdev: pointer to &struct media_device
  * @c: pointer to &struct v4l2_ext_controls
  *
  * If hdl == NULL then they will all return -EINVAL.
  */
 int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl,
+		       struct video_device *vdev,
 		       struct media_device *mdev,
 		       struct v4l2_ext_controls *c);
 
@@ -1296,12 +1299,14 @@ int v4l2_try_ext_ctrls(struct v4l2_ctrl_handler *hdl,
  *
  * @fh: pointer to &struct v4l2_fh
  * @hdl: pointer to &struct v4l2_ctrl_handler
+ * @vdev: pointer to &struct video_device
  * @mdev: pointer to &struct media_device
  * @c: pointer to &struct v4l2_ext_controls
  *
  * If hdl == NULL then they will all return -EINVAL.
  */
 int v4l2_s_ext_ctrls(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl,
+		     struct video_device *vdev,
 		     struct media_device *mdev,
 		     struct v4l2_ext_controls *c);
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 400f2e46c108..4bba65a59d46 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -602,6 +602,8 @@ struct v4l2_ioctl_ops {
 #define V4L2_DEV_DEBUG_STREAMING	0x08
 /* Log poll() */
 #define V4L2_DEV_DEBUG_POLL		0x10
+/* Log controls */
+#define V4L2_DEV_DEBUG_CTRL		0x20
 
 /*  Video standard functions  */
 
-- 
cgit v1.2.3


From ee484875af0080a0c8621e58facf06f92c658a4b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 23 Jul 2019 08:58:01 -0400
Subject: media: davinci/vpfe_capture.c: drop unused format descriptions

Simplify vpfe_pixel_format to just contain the pixelformat and bpp fields.
All others are unused.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/davinci/vpfe_capture.c | 51 +++++----------------------
 include/media/davinci/vpfe_capture.h          |  2 +-
 2 files changed, 10 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c
index 852fc357e19d..916ed743d716 100644
--- a/drivers/media/platform/davinci/vpfe_capture.c
+++ b/drivers/media/platform/davinci/vpfe_capture.c
@@ -119,57 +119,27 @@ static const struct vpfe_standard vpfe_standards[] = {
 /* Used when raw Bayer image from ccdc is directly captured to SDRAM */
 static const struct vpfe_pixel_format vpfe_pix_fmts[] = {
 	{
-		.fmtdesc = {
-			.index = 0,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "Bayer GrRBGb 8bit A-Law compr.",
-			.pixelformat = V4L2_PIX_FMT_SBGGR8,
-		},
+		.pixelformat = V4L2_PIX_FMT_SBGGR8,
 		.bpp = 1,
 	},
 	{
-		.fmtdesc = {
-			.index = 1,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "Bayer GrRBGb - 16bit",
-			.pixelformat = V4L2_PIX_FMT_SBGGR16,
-		},
+		.pixelformat = V4L2_PIX_FMT_SBGGR16,
 		.bpp = 2,
 	},
 	{
-		.fmtdesc = {
-			.index = 2,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "Bayer GrRBGb 8bit DPCM compr.",
-			.pixelformat = V4L2_PIX_FMT_SGRBG10DPCM8,
-		},
+		.pixelformat = V4L2_PIX_FMT_SGRBG10DPCM8,
 		.bpp = 1,
 	},
 	{
-		.fmtdesc = {
-			.index = 3,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "YCbCr 4:2:2 Interleaved UYVY",
-			.pixelformat = V4L2_PIX_FMT_UYVY,
-		},
+		.pixelformat = V4L2_PIX_FMT_UYVY,
 		.bpp = 2,
 	},
 	{
-		.fmtdesc = {
-			.index = 4,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "YCbCr 4:2:2 Interleaved YUYV",
-			.pixelformat = V4L2_PIX_FMT_YUYV,
-		},
+		.pixelformat = V4L2_PIX_FMT_YUYV,
 		.bpp = 2,
 	},
 	{
-		.fmtdesc = {
-			.index = 5,
-			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
-			.description = "Y/CbCr 4:2:0 - Semi planar",
-			.pixelformat = V4L2_PIX_FMT_NV12,
-		},
+		.pixelformat = V4L2_PIX_FMT_NV12,
 		.bpp = 1,
 	},
 };
@@ -183,7 +153,7 @@ static const struct vpfe_pixel_format *vpfe_lookup_pix_format(u32 pix_format)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(vpfe_pix_fmts); i++) {
-		if (pix_format == vpfe_pix_fmts[i].fmtdesc.pixelformat)
+		if (pix_format == vpfe_pix_fmts[i].pixelformat)
 			return &vpfe_pix_fmts[i];
 	}
 	return NULL;
@@ -782,7 +752,7 @@ static const struct vpfe_pixel_format *
 	temp = 0;
 	found = 0;
 	while (ccdc_dev->hw_ops.enum_pix(&pix, temp) >= 0) {
-		if (vpfe_pix_fmt->fmtdesc.pixelformat == pix) {
+		if (vpfe_pix_fmt->pixelformat == pix) {
 			found = 1;
 			break;
 		}
@@ -899,7 +869,6 @@ static int vpfe_enum_fmt_vid_cap(struct file *file, void  *priv,
 {
 	struct vpfe_device *vpfe_dev = video_drvdata(file);
 	const struct vpfe_pixel_format *pix_fmt;
-	int temp_index;
 	u32 pix;
 
 	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_enum_fmt_vid_cap\n");
@@ -910,9 +879,7 @@ static int vpfe_enum_fmt_vid_cap(struct file *file, void  *priv,
 	/* Fill in the information about format */
 	pix_fmt = vpfe_lookup_pix_format(pix);
 	if (pix_fmt) {
-		temp_index = fmt->index;
-		*fmt = pix_fmt->fmtdesc;
-		fmt->index = temp_index;
+		fmt->pixelformat = fmt->pixelformat;
 		return 0;
 	}
 	return -EINVAL;
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
index 2c5b3eacf527..4ad53031e2f7 100644
--- a/include/media/davinci/vpfe_capture.h
+++ b/include/media/davinci/vpfe_capture.h
@@ -32,7 +32,7 @@
 #define CAPTURE_DRV_NAME		"vpfe-capture"
 
 struct vpfe_pixel_format {
-	struct v4l2_fmtdesc fmtdesc;
+	u32 pixelformat;
 	/* bytes per pixel */
 	int bpp;
 };
-- 
cgit v1.2.3


From 615c164da0eb42cbfb1688cb429cc4d5039db5d8 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 5 Jul 2019 17:14:21 +0300
Subject: intel_th: msu: Introduce buffer interface

Introduces a concept of external buffers, which is a mechanism for creating
trace sinks that would receive trace data from MSC buffers and transfer it
elsewhere.

A external buffer can implement its own window allocation/deallocation if
it has to. It must provide a callback that's used to notify it when a
window fills up, so that it can then start a DMA transaction from that
window 'elsewhere'. This window remains in a 'locked' state and won't be
used for storing new trace data until the buffer 'unlocks' it with a
provided API call, at which point the window can be used again for storing
trace data.

This relies on a functional "last block" interrupt, so not all versions of
Trace Hub can use this feature, which does not reflect on existing users.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20190705141425.19894-2-alexander.shishkin@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-bus-intel_th-devices-msc     |   3 +-
 MAINTAINERS                                        |   1 +
 drivers/hwtracing/intel_th/msu.c                   | 388 +++++++++++++++++++--
 drivers/hwtracing/intel_th/msu.h                   |  20 +-
 include/linux/intel_th.h                           |  79 +++++
 5 files changed, 461 insertions(+), 30 deletions(-)
 create mode 100644 include/linux/intel_th.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
index f54ae244f3f1..456cb62b384c 100644
--- a/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
+++ b/Documentation/ABI/testing/sysfs-bus-intel_th-devices-msc
@@ -12,7 +12,8 @@ Description:	(RW) Configure MSC operating mode:
 		  - "single", for contiguous buffer mode (high-order alloc);
 		  - "multi", for multiblock mode;
 		  - "ExI", for DCI handler mode;
-		  - "debug", for debug mode.
+		  - "debug", for debug mode;
+		  - any of the currently loaded buffer sinks.
 		If operating mode changes, existing buffer is deallocated,
 		provided there are no active users and tracing is not enabled,
 		otherwise the write will fail.
diff --git a/MAINTAINERS b/MAINTAINERS
index 783569e3c4b4..c8c506b8423b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8360,6 +8360,7 @@ M:	Alexander Shishkin <alexander.shishkin@linux.intel.com>
 S:	Supported
 F:	Documentation/trace/intel_th.rst
 F:	drivers/hwtracing/intel_th/
+F:	include/linux/intel_th.h
 
 INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
 M:	Ning Sun <ning.sun@intel.com>
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 8ab28e5fb366..08413e6a075f 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -17,21 +17,48 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/workqueue.h>
 #include <linux/dma-mapping.h>
 
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
 #endif
 
+#include <linux/intel_th.h>
 #include "intel_th.h"
 #include "msu.h"
 
 #define msc_dev(x) (&(x)->thdev->dev)
 
+/*
+ * Lockout state transitions:
+ *   READY -> INUSE -+-> LOCKED -+-> READY -> etc.
+ *                   \-----------/
+ * WIN_READY:	window can be used by HW
+ * WIN_INUSE:	window is in use
+ * WIN_LOCKED:	window is filled up and is being processed by the buffer
+ * handling code
+ *
+ * All state transitions happen automatically, except for the LOCKED->READY,
+ * which needs to be signalled by the buffer code by calling
+ * intel_th_msc_window_unlock().
+ *
+ * When the interrupt handler has to switch to the next window, it checks
+ * whether it's READY, and if it is, it performs the switch and tracing
+ * continues. If it's LOCKED, it stops the trace.
+ */
+enum lockout_state {
+	WIN_READY = 0,
+	WIN_INUSE,
+	WIN_LOCKED
+};
+
 /**
  * struct msc_window - multiblock mode window descriptor
  * @entry:	window list linkage (msc::win_list)
  * @pgoff:	page offset into the buffer that this window starts at
+ * @lockout:	lockout state, see comment below
+ * @lo_lock:	lockout state serialization
  * @nr_blocks:	number of blocks (pages) in this window
  * @nr_segs:	number of segments in this window (<= @nr_blocks)
  * @_sgt:	array of block descriptors
@@ -40,6 +67,8 @@
 struct msc_window {
 	struct list_head	entry;
 	unsigned long		pgoff;
+	enum lockout_state	lockout;
+	spinlock_t		lo_lock;
 	unsigned int		nr_blocks;
 	unsigned int		nr_segs;
 	struct msc		*msc;
@@ -77,6 +106,8 @@ struct msc_iter {
  * struct msc - MSC device representation
  * @reg_base:		register window base address
  * @thdev:		intel_th_device pointer
+ * @mbuf:		MSU buffer, if assigned
+ * @mbuf_priv		MSU buffer's private data, if @mbuf
  * @win_list:		list of windows in multiblock mode
  * @single_sgt:		single mode buffer
  * @cur_win:		current window
@@ -100,6 +131,10 @@ struct msc {
 	void __iomem		*msu_base;
 	struct intel_th_device	*thdev;
 
+	const struct msu_buffer	*mbuf;
+	void			*mbuf_priv;
+
+	struct work_struct	work;
 	struct list_head	win_list;
 	struct sg_table		single_sgt;
 	struct msc_window	*cur_win;
@@ -126,6 +161,101 @@ struct msc {
 	unsigned int		index;
 };
 
+static LIST_HEAD(msu_buffer_list);
+static struct mutex msu_buffer_mutex;
+
+/**
+ * struct msu_buffer_entry - internal MSU buffer bookkeeping
+ * @entry:	link to msu_buffer_list
+ * @mbuf:	MSU buffer object
+ * @owner:	module that provides this MSU buffer
+ */
+struct msu_buffer_entry {
+	struct list_head	entry;
+	const struct msu_buffer	*mbuf;
+	struct module		*owner;
+};
+
+static struct msu_buffer_entry *__msu_buffer_entry_find(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	lockdep_assert_held(&msu_buffer_mutex);
+
+	list_for_each_entry(mbe, &msu_buffer_list, entry) {
+		if (!strcmp(mbe->mbuf->name, name))
+			return mbe;
+	}
+
+	return NULL;
+}
+
+static const struct msu_buffer *
+msu_buffer_get(const char *name)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(name);
+	if (mbe && !try_module_get(mbe->owner))
+		mbe = NULL;
+	mutex_unlock(&msu_buffer_mutex);
+
+	return mbe ? mbe->mbuf : NULL;
+}
+
+static void msu_buffer_put(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe)
+		module_put(mbe->owner);
+	mutex_unlock(&msu_buffer_mutex);
+}
+
+int intel_th_msu_buffer_register(const struct msu_buffer *mbuf,
+				 struct module *owner)
+{
+	struct msu_buffer_entry *mbe;
+	int ret = 0;
+
+	mbe = kzalloc(sizeof(*mbe), GFP_KERNEL);
+	if (!mbe)
+		return -ENOMEM;
+
+	mutex_lock(&msu_buffer_mutex);
+	if (__msu_buffer_entry_find(mbuf->name)) {
+		ret = -EEXIST;
+		kfree(mbe);
+		goto unlock;
+	}
+
+	mbe->mbuf = mbuf;
+	mbe->owner = owner;
+	list_add_tail(&mbe->entry, &msu_buffer_list);
+unlock:
+	mutex_unlock(&msu_buffer_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_register);
+
+void intel_th_msu_buffer_unregister(const struct msu_buffer *mbuf)
+{
+	struct msu_buffer_entry *mbe;
+
+	mutex_lock(&msu_buffer_mutex);
+	mbe = __msu_buffer_entry_find(mbuf->name);
+	if (mbe) {
+		list_del(&mbe->entry);
+		kfree(mbe);
+	}
+	mutex_unlock(&msu_buffer_mutex);
+}
+EXPORT_SYMBOL_GPL(intel_th_msu_buffer_unregister);
+
 static inline bool msc_block_is_empty(struct msc_block_desc *bdesc)
 {
 	/* header hasn't been written */
@@ -188,6 +318,25 @@ static struct msc_window *msc_next_window(struct msc_window *win)
 	return list_next_entry(win, entry);
 }
 
+static size_t msc_win_total_sz(struct msc_window *win)
+{
+	unsigned int blk;
+	size_t size = 0;
+
+	for (blk = 0; blk < win->nr_segs; blk++) {
+		struct msc_block_desc *bdesc = msc_win_block(win, blk);
+
+		if (msc_block_wrapped(bdesc))
+			return win->nr_blocks << PAGE_SHIFT;
+
+		size += msc_total_sz(bdesc);
+		if (msc_block_last_written(bdesc))
+			break;
+	}
+
+	return size;
+}
+
 /**
  * msc_find_window() - find a window matching a given sg_table
  * @msc:	MSC device
@@ -527,6 +676,9 @@ static int intel_th_msu_init(struct msc *msc)
 	if (!msc->do_irq)
 		return 0;
 
+	if (!msc->mbuf)
+		return 0;
+
 	mintctl = ioread32(msc->msu_base + REG_MSU_MINTCTL);
 	mintctl |= msc->index ? M1BLIE : M0BLIE;
 	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
@@ -554,6 +706,44 @@ static void intel_th_msu_deinit(struct msc *msc)
 	iowrite32(mintctl, msc->msu_base + REG_MSU_MINTCTL);
 }
 
+static int msc_win_set_lockout(struct msc_window *win,
+			       enum lockout_state expect,
+			       enum lockout_state new)
+{
+	enum lockout_state old;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!win->msc->mbuf)
+		return 0;
+
+	spin_lock_irqsave(&win->lo_lock, flags);
+	old = win->lockout;
+
+	if (old != expect) {
+		ret = -EINVAL;
+		dev_warn_ratelimited(msc_dev(win->msc),
+				     "expected lockout state %d, got %d\n",
+				     expect, old);
+		goto unlock;
+	}
+
+	win->lockout = new;
+
+unlock:
+	spin_unlock_irqrestore(&win->lo_lock, flags);
+
+	if (ret) {
+		if (expect == WIN_READY && old == WIN_LOCKED)
+			return -EBUSY;
+
+		/* from intel_th_msc_window_unlock(), don't warn if not locked */
+		if (expect == WIN_LOCKED && old == new)
+			return 0;
+	}
+
+	return ret;
+}
 /**
  * msc_configure() - set up MSC hardware
  * @msc:	the MSC device to configure
@@ -571,8 +761,12 @@ static int msc_configure(struct msc *msc)
 	if (msc->mode > MSC_MODE_MULTI)
 		return -ENOTSUPP;
 
-	if (msc->mode == MSC_MODE_MULTI)
+	if (msc->mode == MSC_MODE_MULTI) {
+		if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE))
+			return -EBUSY;
+
 		msc_buffer_clear_hw_header(msc);
+	}
 
 	reg = msc->base_addr >> PAGE_SHIFT;
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0BAR);
@@ -594,10 +788,14 @@ static int msc_configure(struct msc *msc)
 
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
 
+	intel_th_msu_init(msc);
+
 	msc->thdev->output.multiblock = msc->mode == MSC_MODE_MULTI;
 	intel_th_trace_enable(msc->thdev);
 	msc->enabled = 1;
 
+	if (msc->mbuf && msc->mbuf->activate)
+		msc->mbuf->activate(msc->mbuf_priv);
 
 	return 0;
 }
@@ -611,10 +809,17 @@ static int msc_configure(struct msc *msc)
  */
 static void msc_disable(struct msc *msc)
 {
+	struct msc_window *win = msc->cur_win;
 	u32 reg;
 
 	lockdep_assert_held(&msc->buf_mutex);
 
+	if (msc->mode == MSC_MODE_MULTI)
+		msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	if (msc->mbuf && msc->mbuf->deactivate)
+		msc->mbuf->deactivate(msc->mbuf_priv);
+	intel_th_msu_deinit(msc);
 	intel_th_trace_disable(msc->thdev);
 
 	if (msc->mode == MSC_MODE_SINGLE) {
@@ -630,6 +835,11 @@ static void msc_disable(struct msc *msc)
 	reg = ioread32(msc->reg_base + REG_MSU_MSC0CTL);
 	reg &= ~MSC_EN;
 	iowrite32(reg, msc->reg_base + REG_MSU_MSC0CTL);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
 	msc->enabled = 0;
 
 	iowrite32(0, msc->reg_base + REG_MSU_MSC0BAR);
@@ -640,6 +850,10 @@ static void msc_disable(struct msc *msc)
 
 	reg = ioread32(msc->reg_base + REG_MSU_MSC0STS);
 	dev_dbg(msc_dev(msc), "MSCnSTS: %08x\n", reg);
+
+	reg = ioread32(msc->reg_base + REG_MSU_MSUSTS);
+	reg &= msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	iowrite32(reg, msc->reg_base + REG_MSU_MSUSTS);
 }
 
 static int intel_th_msc_activate(struct intel_th_device *thdev)
@@ -856,6 +1070,8 @@ static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
 
 	win->msc = msc;
 	win->sgt = &win->_sgt;
+	win->lockout = WIN_READY;
+	spin_lock_init(&win->lo_lock);
 
 	if (!list_empty(&msc->win_list)) {
 		struct msc_window *prev = list_last_entry(&msc->win_list,
@@ -865,8 +1081,13 @@ static int msc_buffer_win_alloc(struct msc *msc, unsigned int nr_blocks)
 		win->pgoff = prev->pgoff + prev->nr_blocks;
 	}
 
-	ret = __msc_buffer_win_alloc(win, nr_blocks);
-	if (ret < 0)
+	if (msc->mbuf && msc->mbuf->alloc_window)
+		ret = msc->mbuf->alloc_window(msc->mbuf_priv, &win->sgt,
+					      nr_blocks << PAGE_SHIFT);
+	else
+		ret = __msc_buffer_win_alloc(win, nr_blocks);
+
+	if (ret <= 0)
 		goto err_nomem;
 
 	msc_buffer_set_uc(win, ret);
@@ -925,7 +1146,10 @@ static void msc_buffer_win_free(struct msc *msc, struct msc_window *win)
 
 	msc_buffer_set_wb(win);
 
-	__msc_buffer_win_free(msc, win);
+	if (msc->mbuf && msc->mbuf->free_window)
+		msc->mbuf->free_window(msc->mbuf_priv, win->sgt);
+	else
+		__msc_buffer_win_free(msc, win);
 
 	kfree(win);
 }
@@ -1462,18 +1686,77 @@ static void msc_win_switch(struct msc *msc)
 	intel_th_trace_switch(msc->thdev);
 }
 
+/**
+ * intel_th_msc_window_unlock - put the window back in rotation
+ * @dev:	MSC device to which this relates
+ * @sgt:	buffer's sg_table for the window, does nothing if NULL
+ */
+void intel_th_msc_window_unlock(struct device *dev, struct sg_table *sgt)
+{
+	struct msc *msc = dev_get_drvdata(dev);
+	struct msc_window *win;
+
+	if (!sgt)
+		return;
+
+	win = msc_find_window(msc, sgt, false);
+	if (!win)
+		return;
+
+	msc_win_set_lockout(win, WIN_LOCKED, WIN_READY);
+}
+EXPORT_SYMBOL_GPL(intel_th_msc_window_unlock);
+
+static void msc_work(struct work_struct *work)
+{
+	struct msc *msc = container_of(work, struct msc, work);
+
+	intel_th_msc_deactivate(msc->thdev);
+}
+
 static irqreturn_t intel_th_msc_interrupt(struct intel_th_device *thdev)
 {
 	struct msc *msc = dev_get_drvdata(&thdev->dev);
 	u32 msusts = ioread32(msc->msu_base + REG_MSU_MSUSTS);
 	u32 mask = msc->index ? MSUSTS_MSC1BLAST : MSUSTS_MSC0BLAST;
+	struct msc_window *win, *next_win;
 
-	if (!(msusts & mask)) {
-		if (msc->enabled)
-			return IRQ_HANDLED;
+	if (!msc->do_irq || !msc->mbuf)
 		return IRQ_NONE;
+
+	msusts &= mask;
+
+	if (!msusts)
+		return msc->enabled ? IRQ_HANDLED : IRQ_NONE;
+
+	iowrite32(msusts, msc->msu_base + REG_MSU_MSUSTS);
+
+	if (!msc->enabled)
+		return IRQ_NONE;
+
+	/* grab the window before we do the switch */
+	win = msc->cur_win;
+	if (!win)
+		return IRQ_HANDLED;
+	next_win = msc_next_window(win);
+	if (!next_win)
+		return IRQ_HANDLED;
+
+	/* next window: if READY, proceed, if LOCKED, stop the trace */
+	if (msc_win_set_lockout(next_win, WIN_READY, WIN_INUSE)) {
+		schedule_work(&msc->work);
+		return IRQ_HANDLED;
 	}
 
+	/* current window: INUSE -> LOCKED */
+	msc_win_set_lockout(win, WIN_INUSE, WIN_LOCKED);
+
+	msc_win_switch(msc);
+
+	if (msc->mbuf && msc->mbuf->ready)
+		msc->mbuf->ready(msc->mbuf_priv, win->sgt,
+				 msc_win_total_sz(win));
+
 	return IRQ_HANDLED;
 }
 
@@ -1511,21 +1794,43 @@ wrap_store(struct device *dev, struct device_attribute *attr, const char *buf,
 
 static DEVICE_ATTR_RW(wrap);
 
+static void msc_buffer_unassign(struct msc *msc)
+{
+	lockdep_assert_held(&msc->buf_mutex);
+
+	if (!msc->mbuf)
+		return;
+
+	msc->mbuf->unassign(msc->mbuf_priv);
+	msu_buffer_put(msc->mbuf);
+	msc->mbuf_priv = NULL;
+	msc->mbuf = NULL;
+}
+
 static ssize_t
 mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
 	struct msc *msc = dev_get_drvdata(dev);
+	const char *mode = msc_mode[msc->mode];
+	ssize_t ret;
+
+	mutex_lock(&msc->buf_mutex);
+	if (msc->mbuf)
+		mode = msc->mbuf->name;
+	ret = scnprintf(buf, PAGE_SIZE, "%s\n", mode);
+	mutex_unlock(&msc->buf_mutex);
 
-	return scnprintf(buf, PAGE_SIZE, "%s\n", msc_mode[msc->mode]);
+	return ret;
 }
 
 static ssize_t
 mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
 	   size_t size)
 {
+	const struct msu_buffer *mbuf = NULL;
 	struct msc *msc = dev_get_drvdata(dev);
 	size_t len = size;
-	char *cp;
+	char *cp, *mode;
 	int i, ret;
 
 	if (!capable(CAP_SYS_RAWIO))
@@ -1535,17 +1840,59 @@ mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
 	if (cp)
 		len = cp - buf;
 
-	for (i = 0; i < ARRAY_SIZE(msc_mode); i++)
-		if (!strncmp(msc_mode[i], buf, len))
-			goto found;
+	mode = kstrndup(buf, len, GFP_KERNEL);
+	i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
+	if (i >= 0)
+		goto found;
+
+	/* Buffer sinks only work with a usable IRQ */
+	if (!msc->do_irq) {
+		kfree(mode);
+		return -EINVAL;
+	}
+
+	mbuf = msu_buffer_get(mode);
+	kfree(mode);
+	if (mbuf)
+		goto found;
 
 	return -EINVAL;
 
 found:
 	mutex_lock(&msc->buf_mutex);
+	ret = 0;
+
+	/* Same buffer: do nothing */
+	if (mbuf && mbuf == msc->mbuf) {
+		/* put the extra reference we just got */
+		msu_buffer_put(mbuf);
+		goto unlock;
+	}
+
 	ret = msc_buffer_unlocked_free_unless_used(msc);
-	if (!ret)
-		msc->mode = i;
+	if (ret)
+		goto unlock;
+
+	if (mbuf) {
+		void *mbuf_priv = mbuf->assign(dev, &i);
+
+		if (!mbuf_priv) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
+
+		msc_buffer_unassign(msc);
+		msc->mbuf_priv = mbuf_priv;
+		msc->mbuf = mbuf;
+	} else {
+		msc_buffer_unassign(msc);
+	}
+
+	msc->mode = i;
+
+unlock:
+	if (ret && mbuf)
+		msu_buffer_put(mbuf);
 	mutex_unlock(&msc->buf_mutex);
 
 	return ret ? ret : size;
@@ -1667,7 +2014,12 @@ win_switch_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	mutex_lock(&msc->buf_mutex);
-	if (msc->mode != MSC_MODE_MULTI)
+	/*
+	 * Window switch can only happen in the "multi" mode.
+	 * If a external buffer is engaged, they have the full
+	 * control over window switching.
+	 */
+	if (msc->mode != MSC_MODE_MULTI || msc->mbuf)
 		ret = -ENOTSUPP;
 	else
 		msc_win_switch(msc);
@@ -1720,10 +2072,7 @@ static int intel_th_msc_probe(struct intel_th_device *thdev)
 	msc->reg_base = base + msc->index * 0x100;
 	msc->msu_base = base;
 
-	err = intel_th_msu_init(msc);
-	if (err)
-		return err;
-
+	INIT_WORK(&msc->work, msc_work);
 	err = intel_th_msc_init(msc);
 	if (err)
 		return err;
@@ -1739,7 +2088,6 @@ static void intel_th_msc_remove(struct intel_th_device *thdev)
 	int ret;
 
 	intel_th_msc_deactivate(thdev);
-	intel_th_msu_deinit(msc);
 
 	/*
 	 * Buffers should not be used at this point except if the
diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h
index 574c16004cb2..3f527dd4d727 100644
--- a/drivers/hwtracing/intel_th/msu.h
+++ b/drivers/hwtracing/intel_th/msu.h
@@ -44,14 +44,6 @@ enum {
 #define M0BLIE		BIT(16)
 #define M1BLIE		BIT(24)
 
-/* MSC operating modes (MSC_MODE) */
-enum {
-	MSC_MODE_SINGLE	= 0,
-	MSC_MODE_MULTI,
-	MSC_MODE_EXI,
-	MSC_MODE_DEBUG,
-};
-
 /* MSCnSTS bits */
 #define MSCSTS_WRAPSTAT	BIT(1)	/* Wrap occurred */
 #define MSCSTS_PLE	BIT(2)	/* Pipeline Empty */
@@ -93,6 +85,16 @@ static inline unsigned long msc_data_sz(struct msc_block_desc *bdesc)
 	return bdesc->valid_dw * 4 - MSC_BDESC;
 }
 
+static inline unsigned long msc_total_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->valid_dw * 4;
+}
+
+static inline unsigned long msc_block_sz(struct msc_block_desc *bdesc)
+{
+	return bdesc->block_sz * 64 - MSC_BDESC;
+}
+
 static inline bool msc_block_wrapped(struct msc_block_desc *bdesc)
 {
 	if (bdesc->hw_tag & (MSC_HW_TAG_BLOCKWRAP | MSC_HW_TAG_WINWRAP))
@@ -104,7 +106,7 @@ static inline bool msc_block_wrapped(struct msc_block_desc *bdesc)
 static inline bool msc_block_last_written(struct msc_block_desc *bdesc)
 {
 	if ((bdesc->hw_tag & MSC_HW_TAG_ENDBIT) ||
-	    (msc_data_sz(bdesc) != DATA_IN_PAGE))
+	    (msc_data_sz(bdesc) != msc_block_sz(bdesc)))
 		return true;
 
 	return false;
diff --git a/include/linux/intel_th.h b/include/linux/intel_th.h
new file mode 100644
index 000000000000..9b7f4c22499c
--- /dev/null
+++ b/include/linux/intel_th.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel(R) Trace Hub data structures for implementing buffer sinks.
+ *
+ * Copyright (C) 2019 Intel Corporation.
+ */
+
+#ifndef _INTEL_TH_H_
+#define _INTEL_TH_H_
+
+#include <linux/scatterlist.h>
+
+/* MSC operating modes (MSC_MODE) */
+enum {
+	MSC_MODE_SINGLE	= 0,
+	MSC_MODE_MULTI,
+	MSC_MODE_EXI,
+	MSC_MODE_DEBUG,
+};
+
+struct msu_buffer {
+	const char	*name;
+	/*
+	 * ->assign() called when buffer 'mode' is set to this driver
+	 *   (aka mode_store())
+	 * @device:	struct device * of the msc
+	 * @mode:	allows the driver to set HW mode (see the enum above)
+	 * Returns:	a pointer to a private structure associated with this
+	 *		msc or NULL in case of error. This private structure
+	 *		will then be passed into all other callbacks.
+	 */
+	void	*(*assign)(struct device *dev, int *mode);
+	/* ->unassign():	some other mode is selected, clean up */
+	void	(*unassign)(void *priv);
+	/*
+	 * ->alloc_window(): allocate memory for the window of a given
+	 *		size
+	 * @sgt:	pointer to sg_table, can be overridden by the buffer
+	 *		driver, or kept intact
+	 * Returns:	number of sg table entries <= number of pages;
+	 *		0 is treated as an allocation failure.
+	 */
+	int	(*alloc_window)(void *priv, struct sg_table **sgt,
+				size_t size);
+	void	(*free_window)(void *priv, struct sg_table *sgt);
+	/* ->activate():	trace has started */
+	void	(*activate)(void *priv);
+	/* ->deactivate():	trace is about to stop */
+	void	(*deactivate)(void *priv);
+	/*
+	 * ->ready():	window @sgt is filled up to the last block OR
+	 *		tracing is stopped by the user; this window contains
+	 *		@bytes data. The window in question transitions into
+	 *		the "LOCKED" state, indicating that it can't be used
+	 *		by hardware. To clear this state and make the window
+	 *		available to the hardware again, call
+	 *		intel_th_msc_window_unlock().
+	 */
+	int	(*ready)(void *priv, struct sg_table *sgt, size_t bytes);
+};
+
+int intel_th_msu_buffer_register(const struct msu_buffer *mbuf,
+				 struct module *owner);
+void intel_th_msu_buffer_unregister(const struct msu_buffer *mbuf);
+void intel_th_msc_window_unlock(struct device *dev, struct sg_table *sgt);
+
+#define module_intel_th_msu_buffer(__buffer) \
+static int __init __buffer##_init(void) \
+{ \
+	return intel_th_msu_buffer_register(&(__buffer), THIS_MODULE); \
+} \
+module_init(__buffer##_init); \
+static void __exit __buffer##_exit(void) \
+{ \
+	intel_th_msu_buffer_unregister(&(__buffer)); \
+} \
+module_exit(__buffer##_exit);
+
+#endif /* _INTEL_TH_H_ */
-- 
cgit v1.2.3


From 515db266a9dace92b0cbaed9a6044dd5304b8ca9 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 16 Jul 2019 17:21:06 +0200
Subject: driver core: Remove device link creation limitation

If device_link_add() is called for a consumer/supplier pair with an
existing device link between them and the existing link's type is
not in agreement with the flags passed to that function by its
caller, NULL will be returned.  That is seriously inconvenient,
because it forces the callers of device_link_add() to worry about
what others may or may not do even if that is not relevant to them
for any other reasons.

It turns out, however, that this limitation can be made go away
relatively easily.

The underlying observation is that if DL_FLAG_STATELESS has been
passed to device_link_add() in flags for the given consumer/supplier
pair at least once, calling either device_link_del() or
device_link_remove() to release the link returned by it should work,
but there are no other requirements associated with that flag.  In
turn, if at least one of the callers of device_link_add() for the
given consumer/supplier pair has not passed DL_FLAG_STATELESS to it
in flags, the driver core should track the status of the link and act
on it as appropriate (ie. the link should be treated as "managed").
This means that DL_FLAG_STATELESS needs to be set for managed device
links and it should be valid to call device_link_del() or
device_link_remove() to drop references to them in certain
sutiations.

To allow that to happen, introduce a new (internal) device link flag
called DL_FLAG_MANAGED and make device_link_add() set it automatically
whenever DL_FLAG_STATELESS is not passed to it.  Also make it take
additional references to existing device links that were previously
stateless (that is, with DL_FLAG_STATELESS set and DL_FLAG_MANAGED
unset) and will need to be managed going forward and initialize
their status (which has been DL_STATE_NONE so far).

Accordingly, when a managed device link is dropped automatically
by the driver core, make it clear DL_FLAG_MANAGED, reset the link's
status back to DL_STATE_NONE and drop the reference to it associated
with DL_FLAG_MANAGED instead of just deleting it right away (to
allow it to stay around in case it still needs to be released
explicitly by someone).

With that, since setting DL_FLAG_STATELESS doesn't mean that the
device link in question is not managed any more, replace all of the
status-tracking checks against DL_FLAG_STATELESS with analogous
checks against DL_FLAG_MANAGED and update the documentation to
reflect these changes.

While at it, make device_link_add() reject flags that it does not
recognize, including DL_FLAG_MANAGED.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Saravana Kannan <saravanak@google.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Review-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/2305283.AStDPdUUnE@kreacher
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/device_link.rst |   4 +-
 drivers/base/core.c                      | 176 +++++++++++++++++--------------
 drivers/base/power/runtime.c             |   4 +-
 include/linux/device.h                   |   4 +-
 4 files changed, 106 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst
index ae1e3d0394b0..1b5020ec6517 100644
--- a/Documentation/driver-api/device_link.rst
+++ b/Documentation/driver-api/device_link.rst
@@ -78,8 +78,8 @@ typically deleted in its ``->remove`` callback for symmetry.  That way, if the
 driver is compiled as a module, the device link is added on module load and
 orderly deleted on unload.  The same restrictions that apply to device link
 addition (e.g. exclusion of a parallel suspend/resume transition) apply equally
-to deletion.  Device links with ``DL_FLAG_STATELESS`` unset (i.e. managed
-device links) are deleted automatically by the driver core.
+to deletion.  Device links managed by the driver core are deleted automatically
+by it.
 
 Several flags may be specified on device link addition, two of which
 have already been mentioned above:  ``DL_FLAG_STATELESS`` to express that no
diff --git a/drivers/base/core.c b/drivers/base/core.c
index da84a73f2ba6..21cd08162219 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -124,6 +124,50 @@ static int device_is_dependent(struct device *dev, void *target)
 	return ret;
 }
 
+static void device_link_init_status(struct device_link *link,
+				    struct device *consumer,
+				    struct device *supplier)
+{
+	switch (supplier->links.status) {
+	case DL_DEV_PROBING:
+		switch (consumer->links.status) {
+		case DL_DEV_PROBING:
+			/*
+			 * A consumer driver can create a link to a supplier
+			 * that has not completed its probing yet as long as it
+			 * knows that the supplier is already functional (for
+			 * example, it has just acquired some resources from the
+			 * supplier).
+			 */
+			link->status = DL_STATE_CONSUMER_PROBE;
+			break;
+		default:
+			link->status = DL_STATE_DORMANT;
+			break;
+		}
+		break;
+	case DL_DEV_DRIVER_BOUND:
+		switch (consumer->links.status) {
+		case DL_DEV_PROBING:
+			link->status = DL_STATE_CONSUMER_PROBE;
+			break;
+		case DL_DEV_DRIVER_BOUND:
+			link->status = DL_STATE_ACTIVE;
+			break;
+		default:
+			link->status = DL_STATE_AVAILABLE;
+			break;
+		}
+		break;
+	case DL_DEV_UNBINDING:
+		link->status = DL_STATE_SUPPLIER_UNBIND;
+		break;
+	default:
+		link->status = DL_STATE_DORMANT;
+		break;
+	}
+}
+
 static int device_reorder_to_tail(struct device *dev, void *not_used)
 {
 	struct device_link *link;
@@ -165,6 +209,10 @@ void device_pm_move_to_tail(struct device *dev)
 	device_links_read_unlock(idx);
 }
 
+#define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
+			       DL_FLAG_AUTOREMOVE_SUPPLIER | \
+			       DL_FLAG_AUTOPROBE_CONSUMER)
+
 /**
  * device_link_add - Create a link between two devices.
  * @consumer: Consumer end of the link.
@@ -179,9 +227,9 @@ void device_pm_move_to_tail(struct device *dev)
  * of the link.  If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be
  * ignored.
  *
- * If DL_FLAG_STATELESS is set in @flags, the link is not going to be managed by
- * the driver core and, in particular, the caller of this function is expected
- * to drop the reference to the link acquired by it directly.
+ * If DL_FLAG_STATELESS is set in @flags, the caller of this function is
+ * expected to release the link returned by it directly with the help of either
+ * device_link_del() or device_link_remove().
  *
  * If that flag is not set, however, the caller of this function is handing the
  * management of the link over to the driver core entirely and its return value
@@ -201,9 +249,16 @@ void device_pm_move_to_tail(struct device *dev)
  * be used to request the driver core to automaticall probe for a consmer
  * driver after successfully binding a driver to the supplier device.
  *
- * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER
- * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and
- * will cause NULL to be returned upfront.
+ * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER,
+ * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at
+ * the same time is invalid and will cause NULL to be returned upfront.
+ * However, if a device link between the given @consumer and @supplier pair
+ * exists already when this function is called for them, the existing link will
+ * be returned regardless of its current type and status (the link's flags may
+ * be modified then).  The caller of this function is then expected to treat
+ * the link as though it has just been created, so (in particular) if
+ * DL_FLAG_STATELESS was passed in @flags, the link needs to be released
+ * explicitly when not needed any more (as stated above).
  *
  * A side effect of the link creation is re-ordering of dpm_list and the
  * devices_kset list by moving the consumer device and all devices depending
@@ -220,10 +275,8 @@ struct device_link *device_link_add(struct device *consumer,
 	struct device_link *link;
 
 	if (!consumer || !supplier ||
-	    (flags & DL_FLAG_STATELESS &&
-	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
-		      DL_FLAG_AUTOREMOVE_SUPPLIER |
-		      DL_FLAG_AUTOPROBE_CONSUMER)) ||
+	    (flags & ~(DL_FLAG_STATELESS | DL_MANAGED_LINK_FLAGS)) ||
+	    (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
 	    (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
 	     flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
 		      DL_FLAG_AUTOREMOVE_SUPPLIER)))
@@ -236,6 +289,9 @@ struct device_link *device_link_add(struct device *consumer,
 		}
 	}
 
+	if (!(flags & DL_FLAG_STATELESS))
+		flags |= DL_FLAG_MANAGED;
+
 	device_links_write_lock();
 	device_pm_lock();
 
@@ -262,15 +318,6 @@ struct device_link *device_link_add(struct device *consumer,
 		if (link->consumer != consumer)
 			continue;
 
-		/*
-		 * Don't return a stateless link if the caller wants a stateful
-		 * one and vice versa.
-		 */
-		if (WARN_ON((flags & DL_FLAG_STATELESS) != (link->flags & DL_FLAG_STATELESS))) {
-			link = NULL;
-			goto out;
-		}
-
 		if (flags & DL_FLAG_PM_RUNTIME) {
 			if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
 				pm_runtime_new_link(consumer);
@@ -281,6 +328,7 @@ struct device_link *device_link_add(struct device *consumer,
 		}
 
 		if (flags & DL_FLAG_STATELESS) {
+			link->flags |= DL_FLAG_STATELESS;
 			kref_get(&link->kref);
 			goto out;
 		}
@@ -299,6 +347,11 @@ struct device_link *device_link_add(struct device *consumer,
 			link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER |
 					 DL_FLAG_AUTOREMOVE_SUPPLIER);
 		}
+		if (!(link->flags & DL_FLAG_MANAGED)) {
+			kref_get(&link->kref);
+			link->flags |= DL_FLAG_MANAGED;
+			device_link_init_status(link, consumer, supplier);
+		}
 		goto out;
 	}
 
@@ -325,48 +378,10 @@ struct device_link *device_link_add(struct device *consumer,
 	kref_init(&link->kref);
 
 	/* Determine the initial link state. */
-	if (flags & DL_FLAG_STATELESS) {
+	if (flags & DL_FLAG_STATELESS)
 		link->status = DL_STATE_NONE;
-	} else {
-		switch (supplier->links.status) {
-		case DL_DEV_PROBING:
-			switch (consumer->links.status) {
-			case DL_DEV_PROBING:
-				/*
-				 * A consumer driver can create a link to a
-				 * supplier that has not completed its probing
-				 * yet as long as it knows that the supplier is
-				 * already functional (for example, it has just
-				 * acquired some resources from the supplier).
-				 */
-				link->status = DL_STATE_CONSUMER_PROBE;
-				break;
-			default:
-				link->status = DL_STATE_DORMANT;
-				break;
-			}
-			break;
-		case DL_DEV_DRIVER_BOUND:
-			switch (consumer->links.status) {
-			case DL_DEV_PROBING:
-				link->status = DL_STATE_CONSUMER_PROBE;
-				break;
-			case DL_DEV_DRIVER_BOUND:
-				link->status = DL_STATE_ACTIVE;
-				break;
-			default:
-				link->status = DL_STATE_AVAILABLE;
-				break;
-			}
-			break;
-		case DL_DEV_UNBINDING:
-			link->status = DL_STATE_SUPPLIER_UNBIND;
-			break;
-		default:
-			link->status = DL_STATE_DORMANT;
-			break;
-		}
-	}
+	else
+		device_link_init_status(link, consumer, supplier);
 
 	/*
 	 * Some callers expect the link creation during consumer driver probe to
@@ -528,7 +543,7 @@ static void device_links_missing_supplier(struct device *dev)
  * mark the link as "consumer probe in progress" to make the supplier removal
  * wait for us to complete (or bad things may happen).
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 int device_links_check_suppliers(struct device *dev)
 {
@@ -538,7 +553,7 @@ int device_links_check_suppliers(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		if (link->status != DL_STATE_AVAILABLE) {
@@ -563,7 +578,7 @@ int device_links_check_suppliers(struct device *dev)
  *
  * Also change the status of @dev's links to suppliers to "active".
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 void device_links_driver_bound(struct device *dev)
 {
@@ -572,7 +587,7 @@ void device_links_driver_bound(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		/*
@@ -593,7 +608,7 @@ void device_links_driver_bound(struct device *dev)
 	}
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
@@ -605,6 +620,13 @@ void device_links_driver_bound(struct device *dev)
 	device_links_write_unlock();
 }
 
+static void device_link_drop_managed(struct device_link *link)
+{
+	link->flags &= ~DL_FLAG_MANAGED;
+	WRITE_ONCE(link->status, DL_STATE_NONE);
+	kref_put(&link->kref, __device_link_del);
+}
+
 /**
  * __device_links_no_driver - Update links of a device without a driver.
  * @dev: Device without a drvier.
@@ -615,18 +637,18 @@ void device_links_driver_bound(struct device *dev)
  * unless they already are in the "supplier unbind in progress" state in which
  * case they need not be updated.
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 static void __device_links_no_driver(struct device *dev)
 {
 	struct device_link *link, *ln;
 
 	list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER)
-			__device_link_del(&link->kref);
+			device_link_drop_managed(link);
 		else if (link->status == DL_STATE_CONSUMER_PROBE ||
 			 link->status == DL_STATE_ACTIVE)
 			WRITE_ONCE(link->status, DL_STATE_AVAILABLE);
@@ -643,7 +665,7 @@ static void __device_links_no_driver(struct device *dev)
  * %__device_links_no_driver() to update links to suppliers for it as
  * appropriate.
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 void device_links_no_driver(struct device *dev)
 {
@@ -652,7 +674,7 @@ void device_links_no_driver(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		/*
@@ -680,7 +702,7 @@ void device_links_no_driver(struct device *dev)
  * invoke %__device_links_no_driver() to update links to suppliers for it as
  * appropriate.
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 void device_links_driver_cleanup(struct device *dev)
 {
@@ -689,7 +711,7 @@ void device_links_driver_cleanup(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER);
@@ -702,7 +724,7 @@ void device_links_driver_cleanup(struct device *dev)
 		 */
 		if (link->status == DL_STATE_SUPPLIER_UNBIND &&
 		    link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER)
-			__device_link_del(&link->kref);
+			device_link_drop_managed(link);
 
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
@@ -724,7 +746,7 @@ void device_links_driver_cleanup(struct device *dev)
  *
  * Return 'false' if there are no probing or active consumers.
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 bool device_links_busy(struct device *dev)
 {
@@ -734,7 +756,7 @@ bool device_links_busy(struct device *dev)
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		if (link->status == DL_STATE_CONSUMER_PROBE
@@ -764,7 +786,7 @@ bool device_links_busy(struct device *dev)
  * driver to unbind and start over (the consumer will not re-probe as we have
  * changed the state of the link already).
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links without the DL_FLAG_MANAGED flag set are ignored.
  */
 void device_links_unbind_consumers(struct device *dev)
 {
@@ -776,7 +798,7 @@ void device_links_unbind_consumers(struct device *dev)
 	list_for_each_entry(link, &dev->links.consumers, s_node) {
 		enum device_link_state status;
 
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		status = link->status;
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index b75335508d2c..45a8fbe6987a 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1624,7 +1624,7 @@ void pm_runtime_remove(struct device *dev)
  * runtime PM references to the device, drop the usage counter of the device
  * (as many times as needed).
  *
- * Links with the DL_FLAG_STATELESS flag set are ignored.
+ * Links with the DL_FLAG_MANAGED flag unset are ignored.
  *
  * Since the device is guaranteed to be runtime-active at the point this is
  * called, nothing else needs to be done here.
@@ -1641,7 +1641,7 @@ void pm_runtime_clean_up_links(struct device *dev)
 	idx = device_links_read_lock();
 
 	list_for_each_entry_rcu(link, &dev->links.consumers, s_node) {
-		if (link->flags & DL_FLAG_STATELESS)
+		if (!(link->flags & DL_FLAG_MANAGED))
 			continue;
 
 		while (refcount_dec_not_one(&link->rpm_active))
diff --git a/include/linux/device.h b/include/linux/device.h
index c330b75c6c57..c1c489921e4c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -833,12 +833,13 @@ enum device_link_state {
 /*
  * Device link flags.
  *
- * STATELESS: The core won't track the presence of supplier/consumer drivers.
+ * STATELESS: The core will not remove this link automatically.
  * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind.
  * PM_RUNTIME: If set, the runtime PM framework will use this link.
  * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation.
  * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind.
  * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds.
+ * MANAGED: The core tracks presence of supplier/consumer drivers (internal).
  */
 #define DL_FLAG_STATELESS		BIT(0)
 #define DL_FLAG_AUTOREMOVE_CONSUMER	BIT(1)
@@ -846,6 +847,7 @@ enum device_link_state {
 #define DL_FLAG_RPM_ACTIVE		BIT(3)
 #define DL_FLAG_AUTOREMOVE_SUPPLIER	BIT(4)
 #define DL_FLAG_AUTOPROBE_CONSUMER	BIT(5)
+#define DL_FLAG_MANAGED			BIT(6)
 
 /**
  * struct device_link - Device link representation.
-- 
cgit v1.2.3


From 016413d967061fc2eb6798a487b3022bef7698a6 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Thu, 4 Apr 2019 19:43:29 -0400
Subject: media: v4l2-async: Get fwnode reference when putting it to the
 notifier's list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The v4l2_async_notifier_add_fwnode_subdev() did not take a reference of
the added fwnode, relying on the caller to handle that instead, in essence
putting the fwnode to be added if there was an error.

As the reference is eventually released during the notifier cleanup, this
is not intuitive nor logical. Improve this by always getting a reference
when the function succeeds, and the caller releasing the reference when it
does not *itself* need it anymore.

Luckily, perhaps, there were just a handful of callers using the function.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Tested-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/platform/am437x/am437x-vpfe.c   |  5 ++---
 drivers/media/platform/davinci/vpif_capture.c | 13 ++++++-------
 drivers/media/platform/qcom/camss/camss.c     |  2 +-
 drivers/media/platform/xilinx/xilinx-vipp.c   |  2 +-
 drivers/media/v4l2-core/v4l2-async.c          |  3 ++-
 drivers/media/v4l2-core/v4l2-fwnode.c         | 23 ++++++-----------------
 include/media/v4l2-async.h                    |  5 +++--
 7 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c
index 105237edbb58..3b1d60ca859b 100644
--- a/drivers/media/platform/am437x/am437x-vpfe.c
+++ b/drivers/media/platform/am437x/am437x-vpfe.c
@@ -2489,10 +2489,9 @@ vpfe_get_pdata(struct vpfe_device *vpfe)
 		pdata->asd[i] = v4l2_async_notifier_add_fwnode_subdev(
 			&vpfe->notifier, of_fwnode_handle(rem),
 			sizeof(struct v4l2_async_subdev));
-		if (IS_ERR(pdata->asd[i])) {
-			of_node_put(rem);
+		of_node_put(rem);
+		if (IS_ERR(pdata->asd[i]))
 			goto cleanup;
-		}
 	}
 
 	of_node_put(endpoint);
diff --git a/drivers/media/platform/davinci/vpif_capture.c b/drivers/media/platform/davinci/vpif_capture.c
index 4bbb4fdeae92..71f4fe882d13 100644
--- a/drivers/media/platform/davinci/vpif_capture.c
+++ b/drivers/media/platform/davinci/vpif_capture.c
@@ -1502,6 +1502,7 @@ static struct vpif_capture_config *
 vpif_capture_get_pdata(struct platform_device *pdev)
 {
 	struct device_node *endpoint = NULL;
+	struct device_node *rem = NULL;
 	struct vpif_capture_config *pdata;
 	struct vpif_subdev_info *sdinfo;
 	struct vpif_capture_chan_config *chan;
@@ -1532,7 +1533,6 @@ vpif_capture_get_pdata(struct platform_device *pdev)
 
 	for (i = 0; i < VPIF_CAPTURE_NUM_CHANNELS; i++) {
 		struct v4l2_fwnode_endpoint bus_cfg = { .bus_type = 0 };
-		struct device_node *rem;
 		unsigned int flags;
 		int err;
 
@@ -1554,10 +1554,8 @@ vpif_capture_get_pdata(struct platform_device *pdev)
 					    VPIF_CAPTURE_NUM_CHANNELS,
 					    sizeof(*chan->inputs),
 					    GFP_KERNEL);
-		if (!chan->inputs) {
-			of_node_put(rem);
+		if (!chan->inputs)
 			goto err_cleanup;
-		}
 
 		chan->input_count++;
 		chan->inputs[i].input.type = V4L2_INPUT_TYPE_CAMERA;
@@ -1589,10 +1587,10 @@ vpif_capture_get_pdata(struct platform_device *pdev)
 		pdata->asd[i] = v4l2_async_notifier_add_fwnode_subdev(
 			&vpif_obj.notifier, of_fwnode_handle(rem),
 			sizeof(struct v4l2_async_subdev));
-		if (IS_ERR(pdata->asd[i])) {
-			of_node_put(rem);
+		if (IS_ERR(pdata->asd[i]))
 			goto err_cleanup;
-		}
+
+		of_node_put(rem);
 	}
 
 done:
@@ -1604,6 +1602,7 @@ done:
 	return pdata;
 
 err_cleanup:
+	of_node_put(rem);
 	of_node_put(endpoint);
 	v4l2_async_notifier_cleanup(&vpif_obj.notifier);
 
diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c
index 63da18773d24..3fdc9f964a3c 100644
--- a/drivers/media/platform/qcom/camss/camss.c
+++ b/drivers/media/platform/qcom/camss/camss.c
@@ -486,9 +486,9 @@ static int camss_of_parse_ports(struct camss *camss)
 		asd = v4l2_async_notifier_add_fwnode_subdev(
 			&camss->notifier, of_fwnode_handle(remote),
 			sizeof(*csd));
+		of_node_put(remote);
 		if (IS_ERR(asd)) {
 			ret = PTR_ERR(asd);
-			of_node_put(remote);
 			goto err_cleanup;
 		}
 
diff --git a/drivers/media/platform/xilinx/xilinx-vipp.c b/drivers/media/platform/xilinx/xilinx-vipp.c
index edce0402155d..cc2856efea59 100644
--- a/drivers/media/platform/xilinx/xilinx-vipp.c
+++ b/drivers/media/platform/xilinx/xilinx-vipp.c
@@ -385,9 +385,9 @@ static int xvip_graph_parse_one(struct xvip_composite_device *xdev,
 		asd = v4l2_async_notifier_add_fwnode_subdev(
 			&xdev->notifier, remote,
 			sizeof(struct xvip_graph_entity));
+		fwnode_handle_put(remote);
 		if (IS_ERR(asd)) {
 			ret = PTR_ERR(asd);
-			fwnode_handle_put(remote);
 			goto err_notifier_cleanup;
 		}
 	}
diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
index 8d307b538f52..7d364c545a40 100644
--- a/drivers/media/v4l2-core/v4l2-async.c
+++ b/drivers/media/v4l2-core/v4l2-async.c
@@ -593,10 +593,11 @@ v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier,
 		return ERR_PTR(-ENOMEM);
 
 	asd->match_type = V4L2_ASYNC_MATCH_FWNODE;
-	asd->match.fwnode = fwnode;
+	asd->match.fwnode = fwnode_handle_get(fwnode);
 
 	ret = v4l2_async_notifier_add_subdev(notifier, asd);
 	if (ret) {
+		fwnode_handle_put(fwnode);
 		kfree(asd);
 		return ERR_PTR(ret);
 	}
diff --git a/drivers/media/v4l2-core/v4l2-fwnode.c b/drivers/media/v4l2-core/v4l2-fwnode.c
index 6972c5af25c6..3bd1888787eb 100644
--- a/drivers/media/v4l2-core/v4l2-fwnode.c
+++ b/drivers/media/v4l2-core/v4l2-fwnode.c
@@ -775,23 +775,17 @@ static int v4l2_fwnode_reference_parse(struct device *dev,
 		asd = v4l2_async_notifier_add_fwnode_subdev(notifier,
 							    args.fwnode,
 							    sizeof(*asd));
+		fwnode_handle_put(args.fwnode);
 		if (IS_ERR(asd)) {
-			ret = PTR_ERR(asd);
 			/* not an error if asd already exists */
-			if (ret == -EEXIST) {
-				fwnode_handle_put(args.fwnode);
+			if (PTR_ERR(asd) == -EEXIST)
 				continue;
-			}
 
-			goto error;
+			return PTR_ERR(asd);
 		}
 	}
 
 	return 0;
-
-error:
-	fwnode_handle_put(args.fwnode);
-	return ret;
 }
 
 /*
@@ -1081,23 +1075,18 @@ v4l2_fwnode_reference_parse_int_props(struct device *dev,
 
 		asd = v4l2_async_notifier_add_fwnode_subdev(notifier, fwnode,
 							    sizeof(*asd));
+		fwnode_handle_put(fwnode);
 		if (IS_ERR(asd)) {
 			ret = PTR_ERR(asd);
 			/* not an error if asd already exists */
-			if (ret == -EEXIST) {
-				fwnode_handle_put(fwnode);
+			if (ret == -EEXIST)
 				continue;
-			}
 
-			goto error;
+			return PTR_ERR(asd);
 		}
 	}
 
 	return !fwnode || PTR_ERR(fwnode) == -ENOENT ? 0 : PTR_ERR(fwnode);
-
-error:
-	fwnode_handle_put(fwnode);
-	return ret;
 }
 
 int v4l2_async_notifier_parse_fwnode_sensor_common(struct device *dev,
diff --git a/include/media/v4l2-async.h b/include/media/v4l2-async.h
index 2e3d93f742a3..a95f7fd8969e 100644
--- a/include/media/v4l2-async.h
+++ b/include/media/v4l2-async.h
@@ -172,8 +172,9 @@ int v4l2_async_notifier_add_subdev(struct v4l2_async_notifier *notifier,
  *		     the driver's async sub-device struct, i.e. both
  *		     begin at the same memory address.
  *
- * Allocate a fwnode-matched asd of size asd_struct_size, and add it
- * to the notifiers @asd_list.
+ * Allocate a fwnode-matched asd of size asd_struct_size, and add it to the
+ * notifiers @asd_list. The function also gets a reference of the fwnode which
+ * is released later at notifier cleanup time.
  */
 struct v4l2_async_subdev *
 v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier,
-- 
cgit v1.2.3


From 820342aca05188c9af4b468d6c41b3327161f7ad Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Thu, 28 Feb 2019 08:25:28 -0500
Subject: media: v4l2-async: Add v4l2_async_notifier_add_fwnode_remote_subdev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v4l2_async_notifier_add_fwnode_remote_subdev is a convenience function for
parsing information on V4L2 fwnode subdevs.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Tested-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-async.c | 23 +++++++++++++++++++++++
 include/media/v4l2-async.h           | 25 +++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c
index 7d364c545a40..dc4f470ea6a7 100644
--- a/drivers/media/v4l2-core/v4l2-async.c
+++ b/drivers/media/v4l2-core/v4l2-async.c
@@ -606,6 +606,29 @@ v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier,
 }
 EXPORT_SYMBOL_GPL(v4l2_async_notifier_add_fwnode_subdev);
 
+int
+v4l2_async_notifier_add_fwnode_remote_subdev(struct v4l2_async_notifier *notif,
+					     struct fwnode_handle *endpoint,
+					     struct v4l2_async_subdev *asd)
+{
+	struct fwnode_handle *remote;
+	int ret;
+
+	remote = fwnode_graph_get_remote_port_parent(endpoint);
+	if (!remote)
+		return -ENOTCONN;
+
+	asd->match_type = V4L2_ASYNC_MATCH_FWNODE;
+	asd->match.fwnode = remote;
+
+	ret = v4l2_async_notifier_add_subdev(notif, asd);
+	if (ret)
+		fwnode_handle_put(remote);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(v4l2_async_notifier_add_fwnode_remote_subdev);
+
 struct v4l2_async_subdev *
 v4l2_async_notifier_add_i2c_subdev(struct v4l2_async_notifier *notifier,
 				   int adapter_id, unsigned short address,
diff --git a/include/media/v4l2-async.h b/include/media/v4l2-async.h
index a95f7fd8969e..8319284c93cb 100644
--- a/include/media/v4l2-async.h
+++ b/include/media/v4l2-async.h
@@ -181,6 +181,31 @@ v4l2_async_notifier_add_fwnode_subdev(struct v4l2_async_notifier *notifier,
 				      struct fwnode_handle *fwnode,
 				      unsigned int asd_struct_size);
 
+/**
+ * v4l2_async_notifier_add_fwnode_remote_subdev - Allocate and add a fwnode
+ *						  remote async subdev to the
+ *						  notifier's master asd_list.
+ *
+ * @notif: pointer to &struct v4l2_async_notifier
+ * @endpoint: local endpoint pointing to the remote sub-device to be matched
+ * @asd: Async sub-device struct allocated by the caller. The &struct
+ *	 v4l2_async_subdev shall be the first member of the driver's async
+ *	 sub-device struct, i.e. both begin at the same memory address.
+ *
+ * Gets the remote endpoint of a given local endpoint, set it up for fwnode
+ * matching and adds the async sub-device to the notifier's @asd_list. The
+ * function also gets a reference of the fwnode which is released later at
+ * notifier cleanup time.
+ *
+ * This is just like @v4l2_async_notifier_add_fwnode_subdev, but with the
+ * exception that the fwnode refers to a local endpoint, not the remote one, and
+ * the function relies on the caller to allocate the async sub-device struct.
+ */
+int
+v4l2_async_notifier_add_fwnode_remote_subdev(struct v4l2_async_notifier *notif,
+					     struct fwnode_handle *endpoint,
+					     struct v4l2_async_subdev *asd);
+
 /**
  * v4l2_async_notifier_add_i2c_subdev - Allocate and add an i2c async
  *				subdev to the notifier's master asd_list.
-- 
cgit v1.2.3


From 16d51a590a8ce3befb1308e0e7ab77f3b661af33 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 16 Jul 2019 17:20:45 +0200
Subject: sched/fair: Don't free p->numa_faults with concurrent readers

When going through execve(), zero out the NUMA fault statistics instead of
freeing them.

During execve, the task is reachable through procfs and the scheduler. A
concurrent /proc/*/sched reader can read data from a freed ->numa_faults
allocation (confirmed by KASAN) and write it back to userspace.
I believe that it would also be possible for a use-after-free read to occur
through a race between a NUMA fault and execve(): task_numa_fault() can
lead to task_numa_compare(), which invokes task_weight() on the currently
running task of a different CPU.

Another way to fix this would be to make ->numa_faults RCU-managed or add
extra locking, but it seems easier to wipe the NUMA fault statistics on
execve.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Fixes: 82727018b0d3 ("sched/numa: Call task_numa_free() from do_execve()")
Link: https://lkml.kernel.org/r/20190716152047.14424-1-jannh@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                            |  2 +-
 include/linux/sched/numa_balancing.h |  4 ++--
 kernel/fork.c                        |  2 +-
 kernel/sched/fair.c                  | 24 ++++++++++++++++++++----
 4 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index c71cbfe6826a..f7f6a140856a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1828,7 +1828,7 @@ static int __do_execve_file(int fd, struct filename *filename,
 	membarrier_execve(current);
 	rseq_execve(current);
 	acct_update_integrals(current);
-	task_numa_free(current);
+	task_numa_free(current, false);
 	free_bprm(bprm);
 	kfree(pathbuf);
 	if (filename)
diff --git a/include/linux/sched/numa_balancing.h b/include/linux/sched/numa_balancing.h
index e7dd04a84ba8..3988762efe15 100644
--- a/include/linux/sched/numa_balancing.h
+++ b/include/linux/sched/numa_balancing.h
@@ -19,7 +19,7 @@
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
 extern pid_t task_numa_group_id(struct task_struct *p);
 extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
+extern void task_numa_free(struct task_struct *p, bool final);
 extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
 					int src_nid, int dst_cpu);
 #else
@@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
 static inline void set_numabalancing_state(bool enabled)
 {
 }
-static inline void task_numa_free(struct task_struct *p)
+static inline void task_numa_free(struct task_struct *p, bool final)
 {
 }
 static inline bool should_numa_migrate_memory(struct task_struct *p,
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..2852d0e76ea3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -726,7 +726,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(tsk == current);
 
 	cgroup_free(tsk);
-	task_numa_free(tsk);
+	task_numa_free(tsk, true);
 	security_task_free(tsk);
 	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 036be95a87e9..6adb0e0f5feb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2353,13 +2353,23 @@ no_join:
 	return;
 }
 
-void task_numa_free(struct task_struct *p)
+/*
+ * Get rid of NUMA staticstics associated with a task (either current or dead).
+ * If @final is set, the task is dead and has reached refcount zero, so we can
+ * safely free all relevant data structures. Otherwise, there might be
+ * concurrent reads from places like load balancing and procfs, and we should
+ * reset the data back to default state without freeing ->numa_faults.
+ */
+void task_numa_free(struct task_struct *p, bool final)
 {
 	struct numa_group *grp = p->numa_group;
-	void *numa_faults = p->numa_faults;
+	unsigned long *numa_faults = p->numa_faults;
 	unsigned long flags;
 	int i;
 
+	if (!numa_faults)
+		return;
+
 	if (grp) {
 		spin_lock_irqsave(&grp->lock, flags);
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
@@ -2372,8 +2382,14 @@ void task_numa_free(struct task_struct *p)
 		put_numa_group(grp);
 	}
 
-	p->numa_faults = NULL;
-	kfree(numa_faults);
+	if (final) {
+		p->numa_faults = NULL;
+		kfree(numa_faults);
+	} else {
+		p->total_numa_faults = 0;
+		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
+			numa_faults[i] = 0;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From cb361d8cdef69990f6b4504dc1fd9a594d983c97 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 16 Jul 2019 17:20:47 +0200
Subject: sched/fair: Use RCU accessors consistently for ->numa_group

The old code used RCU annotations and accessors inconsistently for
->numa_group, which can lead to use-after-frees and NULL dereferences.

Let all accesses to ->numa_group use proper RCU helpers to prevent such
issues.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Fixes: 8c8a743c5087 ("sched/numa: Use {cpu, pid} to create task groups for shared faults")
Link: https://lkml.kernel.org/r/20190716152047.14424-3-jannh@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  10 ++++-
 kernel/sched/fair.c   | 120 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 90 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8dc1811487f5..9f51932bd543 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1092,7 +1092,15 @@ struct task_struct {
 	u64				last_sum_exec_runtime;
 	struct callback_head		numa_work;
 
-	struct numa_group		*numa_group;
+	/*
+	 * This pointer is only modified for current in syscall and
+	 * pagefault context (and for tasks being destroyed), so it can be read
+	 * from any of the following contexts:
+	 *  - RCU read-side critical section
+	 *  - current->numa_group from everywhere
+	 *  - task's runqueue locked, task not running
+	 */
+	struct numa_group __rcu		*numa_group;
 
 	/*
 	 * numa_faults is an array split into four regions:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6adb0e0f5feb..bc9cfeaac8bd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1086,6 +1086,21 @@ struct numa_group {
 	unsigned long faults[0];
 };
 
+/*
+ * For functions that can be called in multiple contexts that permit reading
+ * ->numa_group (see struct task_struct for locking rules).
+ */
+static struct numa_group *deref_task_numa_group(struct task_struct *p)
+{
+	return rcu_dereference_check(p->numa_group, p == current ||
+		(lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+}
+
+static struct numa_group *deref_curr_numa_group(struct task_struct *p)
+{
+	return rcu_dereference_protected(p->numa_group, p == current);
+}
+
 static inline unsigned long group_faults_priv(struct numa_group *ng);
 static inline unsigned long group_faults_shared(struct numa_group *ng);
 
@@ -1129,10 +1144,12 @@ static unsigned int task_scan_start(struct task_struct *p)
 {
 	unsigned long smin = task_scan_min(p);
 	unsigned long period = smin;
+	struct numa_group *ng;
 
 	/* Scale the maximum scan period with the amount of shared memory. */
-	if (p->numa_group) {
-		struct numa_group *ng = p->numa_group;
+	rcu_read_lock();
+	ng = rcu_dereference(p->numa_group);
+	if (ng) {
 		unsigned long shared = group_faults_shared(ng);
 		unsigned long private = group_faults_priv(ng);
 
@@ -1140,6 +1157,7 @@ static unsigned int task_scan_start(struct task_struct *p)
 		period *= shared + 1;
 		period /= private + shared + 1;
 	}
+	rcu_read_unlock();
 
 	return max(smin, period);
 }
@@ -1148,13 +1166,14 @@ static unsigned int task_scan_max(struct task_struct *p)
 {
 	unsigned long smin = task_scan_min(p);
 	unsigned long smax;
+	struct numa_group *ng;
 
 	/* Watch for min being lower than max due to floor calculations */
 	smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
 
 	/* Scale the maximum scan period with the amount of shared memory. */
-	if (p->numa_group) {
-		struct numa_group *ng = p->numa_group;
+	ng = deref_curr_numa_group(p);
+	if (ng) {
 		unsigned long shared = group_faults_shared(ng);
 		unsigned long private = group_faults_priv(ng);
 		unsigned long period = smax;
@@ -1186,7 +1205,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
 	p->numa_scan_period		= sysctl_numa_balancing_scan_delay;
 	p->numa_work.next		= &p->numa_work;
 	p->numa_faults			= NULL;
-	p->numa_group			= NULL;
+	RCU_INIT_POINTER(p->numa_group, NULL);
 	p->last_task_numa_placement	= 0;
 	p->last_sum_exec_runtime	= 0;
 
@@ -1233,7 +1252,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
 
 pid_t task_numa_group_id(struct task_struct *p)
 {
-	return p->numa_group ? p->numa_group->gid : 0;
+	struct numa_group *ng;
+	pid_t gid = 0;
+
+	rcu_read_lock();
+	ng = rcu_dereference(p->numa_group);
+	if (ng)
+		gid = ng->gid;
+	rcu_read_unlock();
+
+	return gid;
 }
 
 /*
@@ -1258,11 +1286,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
 
 static inline unsigned long group_faults(struct task_struct *p, int nid)
 {
-	if (!p->numa_group)
+	struct numa_group *ng = deref_task_numa_group(p);
+
+	if (!ng)
 		return 0;
 
-	return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
-		p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
+	return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+		ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
@@ -1400,12 +1430,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
 static inline unsigned long group_weight(struct task_struct *p, int nid,
 					 int dist)
 {
+	struct numa_group *ng = deref_task_numa_group(p);
 	unsigned long faults, total_faults;
 
-	if (!p->numa_group)
+	if (!ng)
 		return 0;
 
-	total_faults = p->numa_group->total_faults;
+	total_faults = ng->total_faults;
 
 	if (!total_faults)
 		return 0;
@@ -1419,7 +1450,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid,
 bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 				int src_nid, int dst_cpu)
 {
-	struct numa_group *ng = p->numa_group;
+	struct numa_group *ng = deref_curr_numa_group(p);
 	int dst_nid = cpu_to_node(dst_cpu);
 	int last_cpupid, this_cpupid;
 
@@ -1600,13 +1631,14 @@ static bool load_too_imbalanced(long src_load, long dst_load,
 static void task_numa_compare(struct task_numa_env *env,
 			      long taskimp, long groupimp, bool maymove)
 {
+	struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
 	struct rq *dst_rq = cpu_rq(env->dst_cpu);
+	long imp = p_ng ? groupimp : taskimp;
 	struct task_struct *cur;
 	long src_load, dst_load;
-	long load;
-	long imp = env->p->numa_group ? groupimp : taskimp;
-	long moveimp = imp;
 	int dist = env->dist;
+	long moveimp = imp;
+	long load;
 
 	if (READ_ONCE(dst_rq->numa_migrate_on))
 		return;
@@ -1645,21 +1677,22 @@ static void task_numa_compare(struct task_numa_env *env,
 	 * If dst and source tasks are in the same NUMA group, or not
 	 * in any group then look only at task weights.
 	 */
-	if (cur->numa_group == env->p->numa_group) {
+	cur_ng = rcu_dereference(cur->numa_group);
+	if (cur_ng == p_ng) {
 		imp = taskimp + task_weight(cur, env->src_nid, dist) -
 		      task_weight(cur, env->dst_nid, dist);
 		/*
 		 * Add some hysteresis to prevent swapping the
 		 * tasks within a group over tiny differences.
 		 */
-		if (cur->numa_group)
+		if (cur_ng)
 			imp -= imp / 16;
 	} else {
 		/*
 		 * Compare the group weights. If a task is all by itself
 		 * (not part of a group), use the task weight instead.
 		 */
-		if (cur->numa_group && env->p->numa_group)
+		if (cur_ng && p_ng)
 			imp += group_weight(cur, env->src_nid, dist) -
 			       group_weight(cur, env->dst_nid, dist);
 		else
@@ -1757,11 +1790,12 @@ static int task_numa_migrate(struct task_struct *p)
 		.best_imp = 0,
 		.best_cpu = -1,
 	};
+	unsigned long taskweight, groupweight;
 	struct sched_domain *sd;
+	long taskimp, groupimp;
+	struct numa_group *ng;
 	struct rq *best_rq;
-	unsigned long taskweight, groupweight;
 	int nid, ret, dist;
-	long taskimp, groupimp;
 
 	/*
 	 * Pick the lowest SD_NUMA domain, as that would have the smallest
@@ -1807,7 +1841,8 @@ static int task_numa_migrate(struct task_struct *p)
 	 *   multiple NUMA nodes; in order to better consolidate the group,
 	 *   we need to check other locations.
 	 */
-	if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
+	ng = deref_curr_numa_group(p);
+	if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
 		for_each_online_node(nid) {
 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
 				continue;
@@ -1840,7 +1875,7 @@ static int task_numa_migrate(struct task_struct *p)
 	 * A task that migrated to a second choice node will be better off
 	 * trying for a better one later. Do not set the preferred node here.
 	 */
-	if (p->numa_group) {
+	if (ng) {
 		if (env.best_cpu == -1)
 			nid = env.src_nid;
 		else
@@ -2135,6 +2170,7 @@ static void task_numa_placement(struct task_struct *p)
 	unsigned long total_faults;
 	u64 runtime, period;
 	spinlock_t *group_lock = NULL;
+	struct numa_group *ng;
 
 	/*
 	 * The p->mm->numa_scan_seq field gets updated without
@@ -2152,8 +2188,9 @@ static void task_numa_placement(struct task_struct *p)
 	runtime = numa_get_avg_runtime(p, &period);
 
 	/* If the task is part of a group prevent parallel updates to group stats */
-	if (p->numa_group) {
-		group_lock = &p->numa_group->lock;
+	ng = deref_curr_numa_group(p);
+	if (ng) {
+		group_lock = &ng->lock;
 		spin_lock_irq(group_lock);
 	}
 
@@ -2194,7 +2231,7 @@ static void task_numa_placement(struct task_struct *p)
 			p->numa_faults[cpu_idx] += f_diff;
 			faults += p->numa_faults[mem_idx];
 			p->total_numa_faults += diff;
-			if (p->numa_group) {
+			if (ng) {
 				/*
 				 * safe because we can only change our own group
 				 *
@@ -2202,14 +2239,14 @@ static void task_numa_placement(struct task_struct *p)
 				 * nid and priv in a specific region because it
 				 * is at the beginning of the numa_faults array.
 				 */
-				p->numa_group->faults[mem_idx] += diff;
-				p->numa_group->faults_cpu[mem_idx] += f_diff;
-				p->numa_group->total_faults += diff;
-				group_faults += p->numa_group->faults[mem_idx];
+				ng->faults[mem_idx] += diff;
+				ng->faults_cpu[mem_idx] += f_diff;
+				ng->total_faults += diff;
+				group_faults += ng->faults[mem_idx];
 			}
 		}
 
-		if (!p->numa_group) {
+		if (!ng) {
 			if (faults > max_faults) {
 				max_faults = faults;
 				max_nid = nid;
@@ -2220,8 +2257,8 @@ static void task_numa_placement(struct task_struct *p)
 		}
 	}
 
-	if (p->numa_group) {
-		numa_group_count_active_nodes(p->numa_group);
+	if (ng) {
+		numa_group_count_active_nodes(ng);
 		spin_unlock_irq(group_lock);
 		max_nid = preferred_group_nid(p, max_nid);
 	}
@@ -2255,7 +2292,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 	int cpu = cpupid_to_cpu(cpupid);
 	int i;
 
-	if (unlikely(!p->numa_group)) {
+	if (unlikely(!deref_curr_numa_group(p))) {
 		unsigned int size = sizeof(struct numa_group) +
 				    4*nr_node_ids*sizeof(unsigned long);
 
@@ -2291,7 +2328,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 	if (!grp)
 		goto no_join;
 
-	my_grp = p->numa_group;
+	my_grp = deref_curr_numa_group(p);
 	if (grp == my_grp)
 		goto no_join;
 
@@ -2362,7 +2399,8 @@ no_join:
  */
 void task_numa_free(struct task_struct *p, bool final)
 {
-	struct numa_group *grp = p->numa_group;
+	/* safe: p either is current or is being freed by current */
+	struct numa_group *grp = rcu_dereference_raw(p->numa_group);
 	unsigned long *numa_faults = p->numa_faults;
 	unsigned long flags;
 	int i;
@@ -2442,7 +2480,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 	 * actively using should be counted as local. This allows the
 	 * scan rate to slow down when a workload has settled down.
 	 */
-	ng = p->numa_group;
+	ng = deref_curr_numa_group(p);
 	if (!priv && !local && ng && ng->active_nodes > 1 &&
 				numa_is_active_node(cpu_node, ng) &&
 				numa_is_active_node(mem_node, ng))
@@ -10460,18 +10498,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
 {
 	int node;
 	unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+	struct numa_group *ng;
 
+	rcu_read_lock();
+	ng = rcu_dereference(p->numa_group);
 	for_each_online_node(node) {
 		if (p->numa_faults) {
 			tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
 			tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
 		}
-		if (p->numa_group) {
-			gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
-			gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+		if (ng) {
+			gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
+			gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
 		}
 		print_numa_stats(m, node, tsf, tpf, gsf, gpf);
 	}
+	rcu_read_unlock();
 }
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
-- 
cgit v1.2.3


From 364f6afc4f5537b79cf454eb35cae92920676075 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 22 Jul 2019 11:24:40 -0700
Subject: locking/lockdep: Make it clear that what lock_class::key points at is
 not modified

This patch does not change the behavior of the lockdep code.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190722182443.216015-2-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h            | 2 +-
 kernel/locking/lockdep.c           | 2 +-
 kernel/locking/lockdep_internals.h | 3 ++-
 kernel/locking/lockdep_proc.c      | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 0b0d7259276d..cdb3c2f06092 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -97,7 +97,7 @@ struct lock_class {
 	 */
 	struct list_head		locks_after, locks_before;
 
-	struct lockdep_subclass_key	*key;
+	const struct lockdep_subclass_key *key;
 	unsigned int			subclass;
 	unsigned int			dep_gen_id;
 
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 4861cf8e274b..af6627866191 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -511,7 +511,7 @@ static const char *usage_str[] =
 };
 #endif
 
-const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
+const char *__get_key_name(const struct lockdep_subclass_key *key, char *str)
 {
 	return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
 }
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index cc83568d5012..2e518369add4 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -116,7 +116,8 @@ extern struct lock_chain lock_chains[];
 extern void get_usage_chars(struct lock_class *class,
 			    char usage[LOCK_USAGE_CHARS]);
 
-extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
+extern const char *__get_key_name(const struct lockdep_subclass_key *key,
+				  char *str);
 
 struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i);
 
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index bda006f8a88b..ed9842425cac 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -399,7 +399,7 @@ static void seq_lock_time(struct seq_file *m, struct lock_time *lt)
 
 static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
 {
-	struct lockdep_subclass_key *ckey;
+	const struct lockdep_subclass_key *ckey;
 	struct lock_class_stats *stats;
 	struct lock_class *class;
 	const char *cname;
-- 
cgit v1.2.3


From a2970421640bd9b6a78f2685d7750a791abdfd4e Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 22 Jul 2019 11:24:41 -0700
Subject: stacktrace: Constify 'entries' arguments

Make it clear to humans and to the compiler that the stack trace
('entries') arguments are not modified.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/20190722182443.216015-3-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/stacktrace.h | 4 ++--
 kernel/stacktrace.c        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index f0cfd12cb45e..83bd8cb475d7 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -9,9 +9,9 @@ struct task_struct;
 struct pt_regs;
 
 #ifdef CONFIG_STACKTRACE
-void stack_trace_print(unsigned long *trace, unsigned int nr_entries,
+void stack_trace_print(const unsigned long *trace, unsigned int nr_entries,
 		       int spaces);
-int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
+int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries,
 			unsigned int nr_entries, int spaces);
 unsigned int stack_trace_save(unsigned long *store, unsigned int size,
 			      unsigned int skipnr);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index f5440abb7532..6d1f68b7e528 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -20,7 +20,7 @@
  * @nr_entries:	Number of entries in the storage array
  * @spaces:	Number of leading spaces to print
  */
-void stack_trace_print(unsigned long *entries, unsigned int nr_entries,
+void stack_trace_print(const unsigned long *entries, unsigned int nr_entries,
 		       int spaces)
 {
 	unsigned int i;
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(stack_trace_print);
  *
  * Return: Number of bytes printed.
  */
-int stack_trace_snprint(char *buf, size_t size, unsigned long *entries,
+int stack_trace_snprint(char *buf, size_t size, const unsigned long *entries,
 			unsigned int nr_entries, int spaces)
 {
 	unsigned int generated, i, total = 0;
-- 
cgit v1.2.3


From 12593b7467f9130b64a6d4b6a26ed4ec217b6784 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 22 Jul 2019 11:24:42 -0700
Subject: locking/lockdep: Reduce space occupied by stack traces

Although commit 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys
for workqueues") unregisters dynamic lockdep keys when a workqueue is
destroyed, a side effect of that commit is that all stack traces
associated with the lockdep key are leaked when a workqueue is destroyed.
Fix this by storing each unique stack trace once. Other changes in this
patch are:

- Use NULL instead of { .nr_entries = 0 } to represent 'no trace'.
- Store a pointer to a stack trace in struct lock_class and struct
  lock_list instead of storing 'nr_entries' and 'offset'.

This patch avoids that the following program triggers the "BUG:
MAX_STACK_TRACE_ENTRIES too low!" complaint:

	#include <fcntl.h>
	#include <unistd.h>

	int main()
	{
		for (;;) {
			int fd = open("/dev/infiniband/rdma_cm", O_RDWR);
			close(fd);
		}
	}

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reported-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Yuyang Du <duyuyang@gmail.com>
Link: https://lkml.kernel.org/r/20190722182443.216015-4-bvanassche@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h            |   9 +--
 kernel/locking/lockdep.c           | 128 ++++++++++++++++++++++++++-----------
 kernel/locking/lockdep_internals.h |   2 +
 3 files changed, 95 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index cdb3c2f06092..b8a835fd611b 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,10 +66,7 @@ struct lock_class_key {
 
 extern struct lock_class_key __lockdep_no_validate__;
 
-struct lock_trace {
-	unsigned int		nr_entries;
-	unsigned int		offset;
-};
+struct lock_trace;
 
 #define LOCKSTAT_POINTS		4
 
@@ -105,7 +102,7 @@ struct lock_class {
 	 * IRQ/softirq usage tracking bits:
 	 */
 	unsigned long			usage_mask;
-	struct lock_trace		usage_traces[XXX_LOCK_USAGE_STATES];
+	const struct lock_trace		*usage_traces[XXX_LOCK_USAGE_STATES];
 
 	/*
 	 * Generation counter, when doing certain classes of graph walking,
@@ -193,7 +190,7 @@ struct lock_list {
 	struct list_head		entry;
 	struct lock_class		*class;
 	struct lock_class		*links_to;
-	struct lock_trace		trace;
+	const struct lock_trace		*trace;
 	int				distance;
 
 	/*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index af6627866191..1a96869cb2f0 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -449,33 +449,72 @@ static void print_lockdep_off(const char *bug_msg)
 unsigned long nr_stack_trace_entries;
 
 #ifdef CONFIG_PROVE_LOCKING
+/**
+ * struct lock_trace - single stack backtrace
+ * @hash_entry:	Entry in a stack_trace_hash[] list.
+ * @hash:	jhash() of @entries.
+ * @nr_entries:	Number of entries in @entries.
+ * @entries:	Actual stack backtrace.
+ */
+struct lock_trace {
+	struct hlist_node	hash_entry;
+	u32			hash;
+	u32			nr_entries;
+	unsigned long		entries[0] __aligned(sizeof(unsigned long));
+};
+#define LOCK_TRACE_SIZE_IN_LONGS				\
+	(sizeof(struct lock_trace) / sizeof(unsigned long))
 /*
- * Stack-trace: tightly packed array of stack backtrace
- * addresses. Protected by the graph_lock.
+ * Stack-trace: sequence of lock_trace structures. Protected by the graph_lock.
  */
 static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+static struct hlist_head stack_trace_hash[STACK_TRACE_HASH_SIZE];
+
+static bool traces_identical(struct lock_trace *t1, struct lock_trace *t2)
+{
+	return t1->hash == t2->hash && t1->nr_entries == t2->nr_entries &&
+		memcmp(t1->entries, t2->entries,
+		       t1->nr_entries * sizeof(t1->entries[0])) == 0;
+}
 
-static int save_trace(struct lock_trace *trace)
+static struct lock_trace *save_trace(void)
 {
-	unsigned long *entries = stack_trace + nr_stack_trace_entries;
+	struct lock_trace *trace, *t2;
+	struct hlist_head *hash_head;
+	u32 hash;
 	unsigned int max_entries;
 
-	trace->offset = nr_stack_trace_entries;
-	max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
-	trace->nr_entries = stack_trace_save(entries, max_entries, 3);
-	nr_stack_trace_entries += trace->nr_entries;
+	BUILD_BUG_ON_NOT_POWER_OF_2(STACK_TRACE_HASH_SIZE);
+	BUILD_BUG_ON(LOCK_TRACE_SIZE_IN_LONGS >= MAX_STACK_TRACE_ENTRIES);
+
+	trace = (struct lock_trace *)(stack_trace + nr_stack_trace_entries);
+	max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries -
+		LOCK_TRACE_SIZE_IN_LONGS;
+	trace->nr_entries = stack_trace_save(trace->entries, max_entries, 3);
 
-	if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
+	if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES -
+	    LOCK_TRACE_SIZE_IN_LONGS - 1) {
 		if (!debug_locks_off_graph_unlock())
-			return 0;
+			return NULL;
 
 		print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
 		dump_stack();
 
-		return 0;
+		return NULL;
 	}
 
-	return 1;
+	hash = jhash(trace->entries, trace->nr_entries *
+		     sizeof(trace->entries[0]), 0);
+	trace->hash = hash;
+	hash_head = stack_trace_hash + (hash & (STACK_TRACE_HASH_SIZE - 1));
+	hlist_for_each_entry(t2, hash_head, hash_entry) {
+		if (traces_identical(trace, t2))
+			return t2;
+	}
+	nr_stack_trace_entries += LOCK_TRACE_SIZE_IN_LONGS + trace->nr_entries;
+	hlist_add_head(&trace->hash_entry, hash_head);
+
+	return trace;
 }
 #endif
 
@@ -1235,7 +1274,7 @@ static struct lock_list *alloc_list_entry(void)
 static int add_lock_to_list(struct lock_class *this,
 			    struct lock_class *links_to, struct list_head *head,
 			    unsigned long ip, int distance,
-			    struct lock_trace *trace)
+			    const struct lock_trace *trace)
 {
 	struct lock_list *entry;
 	/*
@@ -1249,7 +1288,7 @@ static int add_lock_to_list(struct lock_class *this,
 	entry->class = this;
 	entry->links_to = links_to;
 	entry->distance = distance;
-	entry->trace = *trace;
+	entry->trace = trace;
 	/*
 	 * Both allocation and removal are done under the graph lock; but
 	 * iteration is under RCU-sched; see look_up_lock_class() and
@@ -1470,11 +1509,10 @@ static inline int __bfs_backwards(struct lock_list *src_entry,
 
 }
 
-static void print_lock_trace(struct lock_trace *trace, unsigned int spaces)
+static void print_lock_trace(const struct lock_trace *trace,
+			     unsigned int spaces)
 {
-	unsigned long *entries = stack_trace + trace->offset;
-
-	stack_trace_print(entries, trace->nr_entries, spaces);
+	stack_trace_print(trace->entries, trace->nr_entries, spaces);
 }
 
 /*
@@ -1489,7 +1527,7 @@ print_circular_bug_entry(struct lock_list *target, int depth)
 	printk("\n-> #%u", depth);
 	print_lock_name(target->class);
 	printk(KERN_CONT ":\n");
-	print_lock_trace(&target->trace, 6);
+	print_lock_trace(target->trace, 6);
 }
 
 static void
@@ -1592,7 +1630,8 @@ static noinline void print_circular_bug(struct lock_list *this,
 	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return;
 
-	if (!save_trace(&this->trace))
+	this->trace = save_trace();
+	if (!this->trace)
 		return;
 
 	depth = get_lock_depth(target);
@@ -1715,7 +1754,7 @@ check_path(struct lock_class *target, struct lock_list *src_entry,
  */
 static noinline int
 check_noncircular(struct held_lock *src, struct held_lock *target,
-		  struct lock_trace *trace)
+		  struct lock_trace **const trace)
 {
 	int ret;
 	struct lock_list *uninitialized_var(target_entry);
@@ -1729,13 +1768,13 @@ check_noncircular(struct held_lock *src, struct held_lock *target,
 	ret = check_path(hlock_class(target), &src_entry, &target_entry);
 
 	if (unlikely(!ret)) {
-		if (!trace->nr_entries) {
+		if (!*trace) {
 			/*
 			 * If save_trace fails here, the printing might
 			 * trigger a WARN but because of the !nr_entries it
 			 * should not do bad things.
 			 */
-			save_trace(trace);
+			*trace = save_trace();
 		}
 
 		print_circular_bug(&src_entry, target_entry, src, target);
@@ -1859,7 +1898,7 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 
 			len += printk("%*s   %s", depth, "", usage_str[bit]);
 			len += printk(KERN_CONT " at:\n");
-			print_lock_trace(class->usage_traces + bit, len);
+			print_lock_trace(class->usage_traces[bit], len);
 		}
 	}
 	printk("%*s }\n", depth, "");
@@ -1884,7 +1923,7 @@ print_shortest_lock_dependencies(struct lock_list *leaf,
 	do {
 		print_lock_class_header(entry->class, depth);
 		printk("%*s ... acquired at:\n", depth, "");
-		print_lock_trace(&entry->trace, 2);
+		print_lock_trace(entry->trace, 2);
 		printk("\n");
 
 		if (depth == 0 && (entry != root)) {
@@ -1995,14 +2034,14 @@ print_bad_irq_dependency(struct task_struct *curr,
 	print_lock_name(backwards_entry->class);
 	pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
 
-	print_lock_trace(backwards_entry->class->usage_traces + bit1, 1);
+	print_lock_trace(backwards_entry->class->usage_traces[bit1], 1);
 
 	pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
 	print_lock_name(forwards_entry->class);
 	pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
 	pr_warn("...");
 
-	print_lock_trace(forwards_entry->class->usage_traces + bit2, 1);
+	print_lock_trace(forwards_entry->class->usage_traces[bit2], 1);
 
 	pr_warn("\nother info that might help us debug this:\n\n");
 	print_irq_lock_scenario(backwards_entry, forwards_entry,
@@ -2011,13 +2050,15 @@ print_bad_irq_dependency(struct task_struct *curr,
 	lockdep_print_held_locks(curr);
 
 	pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
-	if (!save_trace(&prev_root->trace))
+	prev_root->trace = save_trace();
+	if (!prev_root->trace)
 		return;
 	print_shortest_lock_dependencies(backwards_entry, prev_root);
 
 	pr_warn("\nthe dependencies between the lock to be acquired");
 	pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
-	if (!save_trace(&next_root->trace))
+	next_root->trace = save_trace();
+	if (!next_root->trace)
 		return;
 	print_shortest_lock_dependencies(forwards_entry, next_root);
 
@@ -2369,7 +2410,8 @@ check_deadlock(struct task_struct *curr, struct held_lock *next)
  */
 static int
 check_prev_add(struct task_struct *curr, struct held_lock *prev,
-	       struct held_lock *next, int distance, struct lock_trace *trace)
+	       struct held_lock *next, int distance,
+	       struct lock_trace **const trace)
 {
 	struct lock_list *entry;
 	int ret;
@@ -2444,8 +2486,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 		return ret;
 #endif
 
-	if (!trace->nr_entries && !save_trace(trace))
-		return 0;
+	if (!*trace) {
+		*trace = save_trace();
+		if (!*trace)
+			return 0;
+	}
 
 	/*
 	 * Ok, all validations passed, add the new lock
@@ -2453,14 +2498,14 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 */
 	ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
 			       &hlock_class(prev)->locks_after,
-			       next->acquire_ip, distance, trace);
+			       next->acquire_ip, distance, *trace);
 
 	if (!ret)
 		return 0;
 
 	ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
 			       &hlock_class(next)->locks_before,
-			       next->acquire_ip, distance, trace);
+			       next->acquire_ip, distance, *trace);
 	if (!ret)
 		return 0;
 
@@ -2476,7 +2521,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 static int
 check_prevs_add(struct task_struct *curr, struct held_lock *next)
 {
-	struct lock_trace trace = { .nr_entries = 0 };
+	struct lock_trace *trace = NULL;
 	int depth = curr->lockdep_depth;
 	struct held_lock *hlock;
 
@@ -3015,7 +3060,7 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
 	print_lock(this);
 
 	pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
-	print_lock_trace(hlock_class(this)->usage_traces + prev_bit, 1);
+	print_lock_trace(hlock_class(this)->usage_traces[prev_bit], 1);
 
 	print_irqtrace_events(curr);
 	pr_warn("\nother info that might help us debug this:\n");
@@ -3096,7 +3141,8 @@ print_irq_inversion_bug(struct task_struct *curr,
 	lockdep_print_held_locks(curr);
 
 	pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
-	if (!save_trace(&root->trace))
+	root->trace = save_trace();
+	if (!root->trace)
 		return;
 	print_shortest_lock_dependencies(other, root);
 
@@ -3580,7 +3626,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
 
 	hlock_class(this)->usage_mask |= new_mask;
 
-	if (!save_trace(hlock_class(this)->usage_traces + new_bit))
+	if (!(hlock_class(this)->usage_traces[new_bit] = save_trace()))
 		return 0;
 
 	switch (new_bit) {
@@ -5157,6 +5203,12 @@ void __init lockdep_init(void)
 		) / 1024
 		);
 
+#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+	printk(" memory used for stack traces: %zu kB\n",
+	       (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024
+	       );
+#endif
+
 	printk(" per task-struct memory footprint: %zu bytes\n",
 	       sizeof(((struct task_struct *)NULL)->held_locks));
 }
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index 2e518369add4..93a008bf77db 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -92,6 +92,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
 #define MAX_LOCKDEP_ENTRIES	16384UL
 #define MAX_LOCKDEP_CHAINS_BITS	15
 #define MAX_STACK_TRACE_ENTRIES	262144UL
+#define STACK_TRACE_HASH_SIZE	8192
 #else
 #define MAX_LOCKDEP_ENTRIES	32768UL
 
@@ -102,6 +103,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ =
  * addresses. Protected by the hash_lock.
  */
 #define MAX_STACK_TRACE_ENTRIES	524288UL
+#define STACK_TRACE_HASH_SIZE	16384
 #endif
 
 #define MAX_LOCKDEP_CHAINS	(1UL << MAX_LOCKDEP_CHAINS_BITS)
-- 
cgit v1.2.3


From e797bda3fd29137f6c151dfa10ea6a61c17895ce Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Jul 2019 20:47:16 +0200
Subject: smp/hotplug: Track booted once CPUs in a cpumask

The booted once information which is required to deal with the MCE
broadcast issue on X86 correctly is stored in the per cpu hotplug state,
which is perfectly fine for the intended purpose.

X86 needs that information for supporting NMI broadcasting via shortcuts,
but retrieving it from per cpu data is cumbersome.

Move it to a cpumask so the information can be checked against the
cpu_present_mask quickly.

No functional change intended.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190722105219.818822855@linutronix.de
---
 include/linux/cpumask.h |  2 ++
 kernel/cpu.c            | 11 +++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 21755471b1c3..693124900f0a 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -115,6 +115,8 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
+extern cpumask_t cpus_booted_once_mask;
+
 static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e84c0873559e..05778e32674a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -62,7 +62,6 @@ struct cpuhp_cpu_state {
 	bool			rollback;
 	bool			single;
 	bool			bringup;
-	bool			booted_once;
 	struct hlist_node	*node;
 	struct hlist_node	*last;
 	enum cpuhp_state	cb_state;
@@ -76,6 +75,10 @@ static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
 	.fail = CPUHP_INVALID,
 };
 
+#ifdef CONFIG_SMP
+cpumask_t cpus_booted_once_mask;
+#endif
+
 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
 static struct lockdep_map cpuhp_state_up_map =
 	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
@@ -433,7 +436,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
 	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
 	 * core will shutdown the machine.
 	 */
-	return !per_cpu(cpuhp_state, cpu).booted_once;
+	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
@@ -1066,7 +1069,7 @@ void notify_cpu_starting(unsigned int cpu)
 	int ret;
 
 	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
-	st->booted_once = true;
+	cpumask_set_cpu(cpu, &cpus_booted_once_mask);
 	while (st->state < target) {
 		st->state++;
 		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
@@ -2334,7 +2337,7 @@ void __init boot_cpu_init(void)
 void __init boot_cpu_hotplug_init(void)
 {
 #ifdef CONFIG_SMP
-	this_cpu_write(cpuhp_state.booted_once, true);
+	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
 #endif
 	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
 }
-- 
cgit v1.2.3


From b9fa6442f7043e2cdd247905d4f3b80f2e9605cb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 22 Jul 2019 20:47:24 +0200
Subject: cpumask: Implement cpumask_or_equal()

The IPI code of x86 needs to evaluate whether the target cpumask is equal
to the cpu_online_mask or equal except for the calling CPU.

To replace the current implementation which requires the usage of a
temporary cpumask, which might involve allocations, add a new function
which compares a cpumask to the result of two other cpumasks which are
or'ed together before comparison.

This allows to make the required decision in one go and the calling code
then can check for the calling CPU being set in the target mask with
cpumask_test_cpu().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190722105220.585449120@linutronix.de
---
 include/linux/bitmap.h  | 23 +++++++++++++++++++++++
 include/linux/cpumask.h | 14 ++++++++++++++
 lib/bitmap.c            | 20 ++++++++++++++++++++
 3 files changed, 57 insertions(+)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index f58e97446abc..90528f12bdfa 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -120,6 +120,10 @@ extern int __bitmap_empty(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_full(const unsigned long *bitmap, unsigned int nbits);
 extern int __bitmap_equal(const unsigned long *bitmap1,
 			  const unsigned long *bitmap2, unsigned int nbits);
+extern bool __pure __bitmap_or_equal(const unsigned long *src1,
+				     const unsigned long *src2,
+				     const unsigned long *src3,
+				     unsigned int nbits);
 extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
 			unsigned int nbits);
 extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
@@ -321,6 +325,25 @@ static inline int bitmap_equal(const unsigned long *src1,
 	return __bitmap_equal(src1, src2, nbits);
 }
 
+/**
+ * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * @src1:	Pointer to bitmap 1
+ * @src2:	Pointer to bitmap 2 will be or'ed with bitmap 1
+ * @src3:	Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ *
+ * Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
+ */
+static inline bool bitmap_or_equal(const unsigned long *src1,
+				   const unsigned long *src2,
+				   const unsigned long *src3,
+				   unsigned int nbits)
+{
+	if (!small_const_nbits(nbits))
+		return __bitmap_or_equal(src1, src2, src3, nbits);
+
+	return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
+}
+
 static inline int bitmap_intersects(const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 693124900f0a..0c7db5efe66c 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -475,6 +475,20 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
 						 nr_cpumask_bits);
 }
 
+/**
+ * cpumask_or_equal - *src1p | *src2p == *src3p
+ * @src1p: the first input
+ * @src2p: the second input
+ * @src3p: the third input
+ */
+static inline bool cpumask_or_equal(const struct cpumask *src1p,
+				    const struct cpumask *src2p,
+				    const struct cpumask *src3p)
+{
+	return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
+			       cpumask_bits(src3p), nr_cpumask_bits);
+}
+
 /**
  * cpumask_intersects - (*src1p & *src2p) != 0
  * @src1p: the first input
diff --git a/lib/bitmap.c b/lib/bitmap.c
index bbe2589e8497..f9e834841e94 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -59,6 +59,26 @@ int __bitmap_equal(const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_equal);
 
+bool __bitmap_or_equal(const unsigned long *bitmap1,
+		       const unsigned long *bitmap2,
+		       const unsigned long *bitmap3,
+		       unsigned int bits)
+{
+	unsigned int k, lim = bits / BITS_PER_LONG;
+	unsigned long tmp;
+
+	for (k = 0; k < lim; ++k) {
+		if ((bitmap1[k] | bitmap2[k]) != bitmap3[k])
+			return false;
+	}
+
+	if (!(bits % BITS_PER_LONG))
+		return true;
+
+	tmp = (bitmap1[k] | bitmap2[k]) ^ bitmap3[k];
+	return (tmp & BITMAP_LAST_WORD_MASK(bits)) == 0;
+}
+
 void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
 {
 	unsigned int k, lim = BITS_TO_LONGS(bits);
-- 
cgit v1.2.3


From 0c09ab96fc820109d63097a2adcbbd20836b655f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 9 Jul 2019 16:23:40 +0200
Subject: cpu/hotplug: Cache number of online CPUs

Re-evaluating the bitmap wheight of the online cpus bitmap in every
invocation of num_online_cpus() over and over is a pretty useless
exercise. Especially when num_online_cpus() is used in code paths
like the IPI delivery of x86 or the membarrier code.

Cache the number of online CPUs in the core and just return the cached
variable. The accessor function provides only a snapshot when used without
protection against concurrent CPU hotplug.

The storage needs to use an atomic_t because the kexec and reboot code
(ab)use set_cpu_online() in their 'shutdown' handlers without any form of
serialization as pointed out by Mathieu. Regular CPU hotplug usage is
properly serialized.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907091622590.1634@nanos.tec.linutronix.de
---
 include/linux/cpumask.h | 25 ++++++++++++++++---------
 kernel/cpu.c            | 24 ++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0c7db5efe66c..b5a5a1ed9efd 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/bitmap.h>
+#include <linux/atomic.h>
 #include <linux/bug.h>
 
 /* Don't assign or return these: may not be this big! */
@@ -95,8 +96,21 @@ extern struct cpumask __cpu_active_mask;
 #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
 #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
 
+extern atomic_t __num_online_cpus;
+
 #if NR_CPUS > 1
-#define num_online_cpus()	cpumask_weight(cpu_online_mask)
+/**
+ * num_online_cpus() - Read the number of online CPUs
+ *
+ * Despite the fact that __num_online_cpus is of type atomic_t, this
+ * interface gives only a momentary snapshot and is not protected against
+ * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
+ * region.
+ */
+static inline unsigned int num_online_cpus(void)
+{
+	return atomic_read(&__num_online_cpus);
+}
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
 #define num_present_cpus()	cpumask_weight(cpu_present_mask)
 #define num_active_cpus()	cpumask_weight(cpu_active_mask)
@@ -821,14 +835,7 @@ set_cpu_present(unsigned int cpu, bool present)
 		cpumask_clear_cpu(cpu, &__cpu_present_mask);
 }
 
-static inline void
-set_cpu_online(unsigned int cpu, bool online)
-{
-	if (online)
-		cpumask_set_cpu(cpu, &__cpu_online_mask);
-	else
-		cpumask_clear_cpu(cpu, &__cpu_online_mask);
-}
+void set_cpu_online(unsigned int cpu, bool online);
 
 static inline void
 set_cpu_active(unsigned int cpu, bool active)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 05778e32674a..e1967e9eddc2 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2298,6 +2298,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
 struct cpumask __cpu_active_mask __read_mostly;
 EXPORT_SYMBOL(__cpu_active_mask);
 
+atomic_t __num_online_cpus __read_mostly;
+EXPORT_SYMBOL(__num_online_cpus);
+
 void init_cpu_present(const struct cpumask *src)
 {
 	cpumask_copy(&__cpu_present_mask, src);
@@ -2313,6 +2316,27 @@ void init_cpu_online(const struct cpumask *src)
 	cpumask_copy(&__cpu_online_mask, src);
 }
 
+void set_cpu_online(unsigned int cpu, bool online)
+{
+	/*
+	 * atomic_inc/dec() is required to handle the horrid abuse of this
+	 * function by the reboot and kexec code which invoke it from
+	 * IPI/NMI broadcasts when shutting down CPUs. Invocation from
+	 * regular CPU hotplug is properly serialized.
+	 *
+	 * Note, that the fact that __num_online_cpus is of type atomic_t
+	 * does not protect readers which are not serialized against
+	 * concurrent hotplug operations.
+	 */
+	if (online) {
+		if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask))
+			atomic_inc(&__num_online_cpus);
+	} else {
+		if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask))
+			atomic_dec(&__num_online_cpus);
+	}
+}
+
 /*
  * Activate the first processor.
  */
-- 
cgit v1.2.3


From 7b3c92b85a65c2db1f542265bc98e1f9e3056eba Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 4 Jul 2019 15:13:23 -0700
Subject: sched/core: Convert get_task_struct() to return the task

Returning the pointer that was passed in allows us to write
slightly more idiomatic code.  Convert a few users.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190704221323.24290-1-willy@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/task.h        | 6 +++++-
 kernel/events/core.c              | 9 +++------
 kernel/irq/manage.c               | 3 +--
 kernel/locking/rtmutex.c          | 6 ++----
 kernel/trace/trace_sched_wakeup.c | 3 +--
 5 files changed, 12 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0497091e40c1..3d90ed8f75f0 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -105,7 +105,11 @@ extern void sched_exec(void);
 #define sched_exec()   {}
 #endif
 
-#define get_task_struct(tsk) do { refcount_inc(&(tsk)->usage); } while(0)
+static inline struct task_struct *get_task_struct(struct task_struct *t)
+{
+	refcount_inc(&t->usage);
+	return t;
+}
 
 extern void __put_task_struct(struct task_struct *t);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 026a14541a38..ea5e8139fe62 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4089,10 +4089,8 @@ alloc_perf_context(struct pmu *pmu, struct task_struct *task)
 		return NULL;
 
 	__perf_event_init_context(ctx);
-	if (task) {
-		ctx->task = task;
-		get_task_struct(task);
-	}
+	if (task)
+		ctx->task = get_task_struct(task);
 	ctx->pmu = pmu;
 
 	return ctx;
@@ -10355,8 +10353,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		 * and we cannot use the ctx information because we need the
 		 * pmu before we get a ctx.
 		 */
-		get_task_struct(task);
-		event->hw.target = task;
+		event->hw.target = get_task_struct(task);
 	}
 
 	event->clock = &local_clock;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e8f7f179bf77..9d50fbe5531a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1255,8 +1255,7 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
 	 * the thread dies to avoid that the interrupt code
 	 * references an already freed task_struct.
 	 */
-	get_task_struct(t);
-	new->thread = t;
+	new->thread = get_task_struct(t);
 	/*
 	 * Tell the thread to set its affinity. This is
 	 * important for shared interrupt handlers as we do
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index fa83d36e30c6..2874bf556162 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -628,8 +628,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 		}
 
 		/* [10] Grab the next task, i.e. owner of @lock */
-		task = rt_mutex_owner(lock);
-		get_task_struct(task);
+		task = get_task_struct(rt_mutex_owner(lock));
 		raw_spin_lock(&task->pi_lock);
 
 		/*
@@ -709,8 +708,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
 	}
 
 	/* [10] Grab the next task, i.e. the owner of @lock */
-	task = rt_mutex_owner(lock);
-	get_task_struct(task);
+	task = get_task_struct(rt_mutex_owner(lock));
 	raw_spin_lock(&task->pi_lock);
 
 	/* [11] requeue the pi waiters if necessary */
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 743b2b520d34..5e43b9664eca 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -579,8 +579,7 @@ probe_wakeup(void *ignore, struct task_struct *p)
 	else
 		tracing_dl = 0;
 
-	wakeup_task = p;
-	get_task_struct(wakeup_task);
+	wakeup_task = get_task_struct(p);
 
 	local_save_flags(flags);
 
-- 
cgit v1.2.3


From c22645f4c8f021fb1c5e7189eb1f968132cc0844 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 19 Jul 2019 15:59:53 +0200
Subject: sched/topology: Add partition_sched_domains_locked()

Introduce the partition_sched_domains_locked() function by taking
the mutex locking code out of the original function.  That way
the work done by partition_sched_domains_locked() can be reused
without dropping the mutex lock.

No change of functionality is introduced by this patch.

Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bristot@redhat.com
Cc: claudio@evidence.eu.com
Cc: lizefan@huawei.com
Cc: longman@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: rostedt@goodmis.org
Cc: tommaso.cucinotta@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-2-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h | 10 ++++++++++
 kernel/sched/topology.c        | 17 +++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 7863bb62d2ab..f341163fedc9 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -150,6 +150,10 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
 	return to_cpumask(sd->span);
 }
 
+extern void partition_sched_domains_locked(int ndoms_new,
+					   cpumask_var_t doms_new[],
+					   struct sched_domain_attr *dattr_new);
+
 extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 				    struct sched_domain_attr *dattr_new);
 
@@ -194,6 +198,12 @@ extern void set_sched_topology(struct sched_domain_topology_level *tl);
 
 struct sched_domain_attr;
 
+static inline void
+partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
+			       struct sched_domain_attr *dattr_new)
+{
+}
+
 static inline void
 partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
 			struct sched_domain_attr *dattr_new)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 4eea2c9bc732..5a174ae6ecf3 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2169,16 +2169,16 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
  * ndoms_new == 0 is a special case for destroying existing domains,
  * and it will not create the default domain.
  *
- * Call with hotplug lock held
+ * Call with hotplug lock and sched_domains_mutex held
  */
-void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
-			     struct sched_domain_attr *dattr_new)
+void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
+				    struct sched_domain_attr *dattr_new)
 {
 	bool __maybe_unused has_eas = false;
 	int i, j, n;
 	int new_topology;
 
-	mutex_lock(&sched_domains_mutex);
+	lockdep_assert_held(&sched_domains_mutex);
 
 	/* Always unregister in case we don't destroy any domains: */
 	unregister_sched_domain_sysctl();
@@ -2261,6 +2261,15 @@ match3:
 	ndoms_cur = ndoms_new;
 
 	register_sched_domain_sysctl();
+}
 
+/*
+ * Call with hotplug lock held
+ */
+void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+			     struct sched_domain_attr *dattr_new)
+{
+	mutex_lock(&sched_domains_mutex);
+	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
 	mutex_unlock(&sched_domains_mutex);
 }
-- 
cgit v1.2.3


From f9a25f776d780bfa3279f0b6e5f5cf3224997976 Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Fri, 19 Jul 2019 15:59:55 +0200
Subject: cpusets: Rebuild root domain deadline accounting information

When the topology of root domains is modified by CPUset or CPUhotplug
operations information about the current deadline bandwidth held in the
root domain is lost.

This patch addresses the issue by recalculating the lost deadline
bandwidth information by circling through the deadline tasks held in
CPUsets and adding their current load to the root domain they are
associated with.

Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
[ Various additional modifications. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bristot@redhat.com
Cc: claudio@evidence.eu.com
Cc: lizefan@huawei.com
Cc: longman@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: rostedt@goodmis.org
Cc: tj@kernel.org
Cc: tommaso.cucinotta@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-4-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cgroup.h         |  1 +
 include/linux/sched.h          |  5 ++++
 include/linux/sched/deadline.h |  8 ++++++
 kernel/cgroup/cgroup.c         |  2 +-
 kernel/cgroup/cpuset.c         | 64 +++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/deadline.c        | 30 ++++++++++++++++++++
 kernel/sched/sched.h           |  3 --
 kernel/sched/topology.c        | 13 ++++++++-
 8 files changed, 120 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f6b048902d6c..3ba3e6da13a6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -150,6 +150,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
 struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
 					struct cgroup_subsys_state **dst_cssp);
 
+void cgroup_enable_task_cg_lists(void);
 void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
 			 struct css_task_iter *it);
 struct task_struct *css_task_iter_next(struct css_task_iter *it);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..b94ad92dfbe6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -295,6 +295,11 @@ enum uclamp_id {
 	UCLAMP_CNT
 };
 
+#ifdef CONFIG_SMP
+extern struct root_domain def_root_domain;
+extern struct mutex sched_domains_mutex;
+#endif
+
 struct sched_info {
 #ifdef CONFIG_SCHED_INFO
 	/* Cumulative counters: */
diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h
index 0cb034331cbb..1aff00b65f3c 100644
--- a/include/linux/sched/deadline.h
+++ b/include/linux/sched/deadline.h
@@ -24,3 +24,11 @@ static inline bool dl_time_before(u64 a, u64 b)
 {
 	return (s64)(a - b) < 0;
 }
+
+#ifdef CONFIG_SMP
+
+struct root_domain;
+extern void dl_add_task_root_domain(struct task_struct *p);
+extern void dl_clear_root_domain(struct root_domain *rd);
+
+#endif /* CONFIG_SMP */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 753afbca549f..4b5bc452176c 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1891,7 +1891,7 @@ static int cgroup_reconfigure(struct fs_context *fc)
  */
 static bool use_task_css_set_links __read_mostly;
 
-static void cgroup_enable_task_cg_lists(void)
+void cgroup_enable_task_cg_lists(void)
 {
 	struct task_struct *p, *g;
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5aa37531ce76..846cbdb68566 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -45,6 +45,7 @@
 #include <linux/proc_fs.h>
 #include <linux/rcupdate.h>
 #include <linux/sched.h>
+#include <linux/sched/deadline.h>
 #include <linux/sched/mm.h>
 #include <linux/sched/task.h>
 #include <linux/seq_file.h>
@@ -894,6 +895,67 @@ done:
 	return ndoms;
 }
 
+static void update_tasks_root_domain(struct cpuset *cs)
+{
+	struct css_task_iter it;
+	struct task_struct *task;
+
+	css_task_iter_start(&cs->css, 0, &it);
+
+	while ((task = css_task_iter_next(&it)))
+		dl_add_task_root_domain(task);
+
+	css_task_iter_end(&it);
+}
+
+static void rebuild_root_domains(void)
+{
+	struct cpuset *cs = NULL;
+	struct cgroup_subsys_state *pos_css;
+
+	lockdep_assert_held(&cpuset_mutex);
+	lockdep_assert_cpus_held();
+	lockdep_assert_held(&sched_domains_mutex);
+
+	cgroup_enable_task_cg_lists();
+
+	rcu_read_lock();
+
+	/*
+	 * Clear default root domain DL accounting, it will be computed again
+	 * if a task belongs to it.
+	 */
+	dl_clear_root_domain(&def_root_domain);
+
+	cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
+
+		if (cpumask_empty(cs->effective_cpus)) {
+			pos_css = css_rightmost_descendant(pos_css);
+			continue;
+		}
+
+		css_get(&cs->css);
+
+		rcu_read_unlock();
+
+		update_tasks_root_domain(cs);
+
+		rcu_read_lock();
+		css_put(&cs->css);
+	}
+	rcu_read_unlock();
+}
+
+static void
+partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
+				    struct sched_domain_attr *dattr_new)
+{
+	mutex_lock(&sched_domains_mutex);
+	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
+	rebuild_root_domains();
+	mutex_unlock(&sched_domains_mutex);
+}
+
 /*
  * Rebuild scheduler domains.
  *
@@ -931,7 +993,7 @@ static void rebuild_sched_domains_locked(void)
 	ndoms = generate_sched_domains(&doms, &attr);
 
 	/* Have scheduler rebuild the domains */
-	partition_sched_domains(ndoms, doms, attr);
+	partition_and_rebuild_sched_domains(ndoms, doms, attr);
 out:
 	put_online_cpus();
 }
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index ef5b9f6b1d42..0f9d2180be23 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2283,6 +2283,36 @@ void __init init_sched_dl_class(void)
 					GFP_KERNEL, cpu_to_node(i));
 }
 
+void dl_add_task_root_domain(struct task_struct *p)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+	struct dl_bw *dl_b;
+
+	rq = task_rq_lock(p, &rf);
+	if (!dl_task(p))
+		goto unlock;
+
+	dl_b = &rq->rd->dl_bw;
+	raw_spin_lock(&dl_b->lock);
+
+	__dl_add(dl_b, p->dl.dl_bw, cpumask_weight(rq->rd->span));
+
+	raw_spin_unlock(&dl_b->lock);
+
+unlock:
+	task_rq_unlock(rq, p, &rf);
+}
+
+void dl_clear_root_domain(struct root_domain *rd)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&rd->dl_bw.lock, flags);
+	rd->dl_bw.total_bw = 0;
+	raw_spin_unlock_irqrestore(&rd->dl_bw.lock, flags);
+}
+
 #endif /* CONFIG_SMP */
 
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 16126efd14ed..7583faddba33 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -778,9 +778,6 @@ struct root_domain {
 	struct perf_domain __rcu *pd;
 };
 
-extern struct root_domain def_root_domain;
-extern struct mutex sched_domains_mutex;
-
 extern void init_defrootdomain(void);
 extern int sched_init_domains(const struct cpumask *cpu_map);
 extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5a174ae6ecf3..8f83e8e3ea9a 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2203,8 +2203,19 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
 	for (i = 0; i < ndoms_cur; i++) {
 		for (j = 0; j < n && !new_topology; j++) {
 			if (cpumask_equal(doms_cur[i], doms_new[j]) &&
-			    dattrs_equal(dattr_cur, i, dattr_new, j))
+			    dattrs_equal(dattr_cur, i, dattr_new, j)) {
+				struct root_domain *rd;
+
+				/*
+				 * This domain won't be destroyed and as such
+				 * its dl_bw->total_bw needs to be cleared.  It
+				 * will be recomputed in function
+				 * update_tasks_root_domain().
+				 */
+				rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
+				dl_clear_root_domain(rd);
 				goto match1;
+			}
 		}
 		/* No match - a current sched domain not in new doms_new[] */
 		detach_destroy_domains(doms_cur[i]);
-- 
cgit v1.2.3


From d74b27d63a8bebe2fe634944e4ebdc7b10db7a39 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Fri, 19 Jul 2019 15:59:58 +0200
Subject: cgroup/cpuset: Change cpuset_rwsem and hotplug lock order

cpuset_rwsem is going to be acquired from sched_setscheduler() with a
following patch. There are however paths (e.g., spawn_ksoftirqd) in
which sched_scheduler() is eventually called while holding hotplug lock;
this creates a dependecy between hotplug lock (to be always acquired
first) and cpuset_rwsem (to be always acquired after hotplug lock).

Fix paths which currently take the two locks in the wrong order (after
a following patch is applied).

Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bristot@redhat.com
Cc: claudio@evidence.eu.com
Cc: lizefan@huawei.com
Cc: longman@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: mathieu.poirier@linaro.org
Cc: rostedt@goodmis.org
Cc: tj@kernel.org
Cc: tommaso.cucinotta@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-7-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h |  8 ++++----
 kernel/cgroup/cpuset.c | 22 +++++++++++++++++-----
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 934633a05d20..7f1478c26a33 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
 
 static inline void cpuset_inc(void)
 {
-	static_branch_inc(&cpusets_pre_enable_key);
-	static_branch_inc(&cpusets_enabled_key);
+	static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
+	static_branch_inc_cpuslocked(&cpusets_enabled_key);
 }
 
 static inline void cpuset_dec(void)
 {
-	static_branch_dec(&cpusets_enabled_key);
-	static_branch_dec(&cpusets_pre_enable_key);
+	static_branch_dec_cpuslocked(&cpusets_enabled_key);
+	static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
 }
 
 extern int cpuset_init(void);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e1a8d168c5e9..5c5014caa23c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -973,8 +973,8 @@ static void rebuild_sched_domains_locked(void)
 	cpumask_var_t *doms;
 	int ndoms;
 
+	lockdep_assert_cpus_held();
 	percpu_rwsem_assert_held(&cpuset_rwsem);
-	get_online_cpus();
 
 	/*
 	 * We have raced with CPU hotplug. Don't do anything to avoid
@@ -983,19 +983,17 @@ static void rebuild_sched_domains_locked(void)
 	 */
 	if (!top_cpuset.nr_subparts_cpus &&
 	    !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
-		goto out;
+		return;
 
 	if (top_cpuset.nr_subparts_cpus &&
 	   !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask))
-		goto out;
+		return;
 
 	/* Generate domain masks and attrs */
 	ndoms = generate_sched_domains(&doms, &attr);
 
 	/* Have scheduler rebuild the domains */
 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
-out:
-	put_online_cpus();
 }
 #else /* !CONFIG_SMP */
 static void rebuild_sched_domains_locked(void)
@@ -1005,9 +1003,11 @@ static void rebuild_sched_domains_locked(void)
 
 void rebuild_sched_domains(void)
 {
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 	rebuild_sched_domains_locked();
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 }
 
 /**
@@ -2245,6 +2245,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	cpuset_filetype_t type = cft->private;
 	int retval = 0;
 
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 	if (!is_cpuset_online(cs)) {
 		retval = -ENODEV;
@@ -2282,6 +2283,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
 	}
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 	return retval;
 }
 
@@ -2292,6 +2294,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 	cpuset_filetype_t type = cft->private;
 	int retval = -ENODEV;
 
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
@@ -2306,6 +2309,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
 	}
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 	return retval;
 }
 
@@ -2344,6 +2348,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 	kernfs_break_active_protection(of->kn);
 	flush_work(&cpuset_hotplug_work);
 
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
@@ -2369,6 +2374,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
 	free_cpuset(trialcs);
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 	kernfs_unbreak_active_protection(of->kn);
 	css_put(&cs->css);
 	flush_workqueue(cpuset_migrate_mm_wq);
@@ -2499,6 +2505,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
 		return -EINVAL;
 
 	css_get(&cs->css);
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 	if (!is_cpuset_online(cs))
 		goto out_unlock;
@@ -2506,6 +2513,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
 	retval = update_prstate(cs, val);
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 	css_put(&cs->css);
 	return retval ?: nbytes;
 }
@@ -2711,6 +2719,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	if (!parent)
 		return 0;
 
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 
 	set_bit(CS_ONLINE, &cs->flags);
@@ -2763,6 +2772,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	spin_unlock_irq(&callback_lock);
 out_unlock:
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 	return 0;
 }
 
@@ -2781,6 +2791,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 {
 	struct cpuset *cs = css_cs(css);
 
+	get_online_cpus();
 	percpu_down_write(&cpuset_rwsem);
 
 	if (is_partition_root(cs))
@@ -2801,6 +2812,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 	clear_bit(CS_ONLINE, &cs->flags);
 
 	percpu_up_write(&cpuset_rwsem);
+	put_online_cpus();
 }
 
 static void cpuset_css_free(struct cgroup_subsys_state *css)
-- 
cgit v1.2.3


From 710da3c8ea7dfbd327920afd3831d8c82c42789d Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@redhat.com>
Date: Fri, 19 Jul 2019 16:00:00 +0200
Subject: sched/core: Prevent race condition between cpuset and
 __sched_setscheduler()

No synchronisation mechanism exists between the cpuset subsystem and
calls to function __sched_setscheduler(). As such, it is possible that
new root domains are created on the cpuset side while a deadline
acceptance test is carried out in __sched_setscheduler(), leading to a
potential oversell of CPU bandwidth.

Grab cpuset_rwsem read lock from core scheduler, so to prevent
situations such as the one described above from happening.

The only exception is normalize_rt_tasks() which needs to work under
tasklist_lock and can't therefore grab cpuset_rwsem. We are fine with
this, as this function is only called by sysrq and, if that gets
triggered, DEADLINE guarantees are already gone out of the window
anyway.

Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bristot@redhat.com
Cc: claudio@evidence.eu.com
Cc: lizefan@huawei.com
Cc: longman@redhat.com
Cc: luca.abeni@santannapisa.it
Cc: mathieu.poirier@linaro.org
Cc: rostedt@goodmis.org
Cc: tj@kernel.org
Cc: tommaso.cucinotta@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-9-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/cpuset.h |  5 +++++
 kernel/cgroup/cpuset.c | 11 +++++++++++
 kernel/sched/core.c    | 20 +++++++++++++++++---
 3 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 7f1478c26a33..04c20de66afc 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -55,6 +55,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void cpuset_read_lock(void);
+extern void cpuset_read_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -176,6 +178,9 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void cpuset_read_lock(void) { }
+static inline void cpuset_read_unlock(void) { }
+
 static inline void cpuset_cpus_allowed(struct task_struct *p,
 				       struct cpumask *mask)
 {
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 5c5014caa23c..c52bc91f882b 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -334,6 +334,17 @@ static struct cpuset top_cpuset = {
  */
 
 DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem);
+
+void cpuset_read_lock(void)
+{
+	percpu_down_read(&cpuset_rwsem);
+}
+
+void cpuset_read_unlock(void)
+{
+	percpu_up_read(&cpuset_rwsem);
+}
+
 static DEFINE_SPINLOCK(callback_lock);
 
 static struct workqueue_struct *cpuset_migrate_mm_wq;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1af3d2dc6b29..1bceb22dac18 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4698,6 +4698,9 @@ recheck:
 			return retval;
 	}
 
+	if (pi)
+		cpuset_read_lock();
+
 	/*
 	 * Make sure no PI-waiters arrive (or leave) while we are
 	 * changing the priority of the task:
@@ -4772,6 +4775,8 @@ change:
 	if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
 		policy = oldpolicy = -1;
 		task_rq_unlock(rq, p, &rf);
+		if (pi)
+			cpuset_read_unlock();
 		goto recheck;
 	}
 
@@ -4832,8 +4837,10 @@ change:
 	preempt_disable();
 	task_rq_unlock(rq, p, &rf);
 
-	if (pi)
+	if (pi) {
+		cpuset_read_unlock();
 		rt_mutex_adjust_pi(p);
+	}
 
 	/* Run balance callbacks after we've adjusted the PI chain: */
 	balance_callback(rq);
@@ -4843,6 +4850,8 @@ change:
 
 unlock:
 	task_rq_unlock(rq, p, &rf);
+	if (pi)
+		cpuset_read_unlock();
 	return retval;
 }
 
@@ -4927,10 +4936,15 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 	rcu_read_lock();
 	retval = -ESRCH;
 	p = find_process_by_pid(pid);
-	if (p != NULL)
-		retval = sched_setscheduler(p, policy, &lparam);
+	if (likely(p))
+		get_task_struct(p);
 	rcu_read_unlock();
 
+	if (likely(p)) {
+		retval = sched_setscheduler(p, policy, &lparam);
+		put_task_struct(p);
+	}
+
 	return retval;
 }
 
-- 
cgit v1.2.3


From 39289bfc221423b27570e7c9157b690828e786cb Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 22 Jul 2019 17:01:30 +0000
Subject: RDMA: Make most headers compile stand alone

So that rdma can work with CONFIG_KERNEL_HEADER_TEST and
CONFIG_HEADERS_CHECK.

Link: https://lore.kernel.org/r/20190722170126.GA16453@ziepe.ca
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/Kbuild               | 6 ------
 include/rdma/ib.h            | 2 ++
 include/rdma/iw_portmap.h    | 3 +++
 include/rdma/opa_port_info.h | 2 ++
 include/rdma/rdmavt_cq.h     | 1 +
 include/rdma/signature.h     | 2 ++
 6 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..fc2aa4e20658 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -945,12 +945,6 @@ header-test-			+= net/xdp.h
 header-test-			+= net/xdp_priv.h
 header-test-			+= pcmcia/cistpl.h
 header-test-			+= pcmcia/ds.h
-header-test-			+= rdma/ib.h
-header-test-			+= rdma/iw_portmap.h
-header-test-			+= rdma/opa_port_info.h
-header-test-			+= rdma/rdmavt_cq.h
-header-test-			+= rdma/restrack.h
-header-test-			+= rdma/signature.h
 header-test-			+= rdma/tid_rdma_defs.h
 header-test-			+= scsi/fc/fc_encaps.h
 header-test-			+= scsi/fc/fc_fc2.h
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 4f385ec54f80..fe2fc9e91588 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -36,6 +36,8 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/cred.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
 
 struct ib_addr {
 	union {
diff --git a/include/rdma/iw_portmap.h b/include/rdma/iw_portmap.h
index b9fee7feeeb5..c89535047c42 100644
--- a/include/rdma/iw_portmap.h
+++ b/include/rdma/iw_portmap.h
@@ -33,6 +33,9 @@
 #ifndef _IW_PORTMAP_H
 #define _IW_PORTMAP_H
 
+#include <linux/socket.h>
+#include <linux/netlink.h>
+
 #define IWPM_ULIBNAME_SIZE	32
 #define IWPM_DEVNAME_SIZE	32
 #define IWPM_IFNAME_SIZE	16
diff --git a/include/rdma/opa_port_info.h b/include/rdma/opa_port_info.h
index 7147a9263011..bdbfe25d3854 100644
--- a/include/rdma/opa_port_info.h
+++ b/include/rdma/opa_port_info.h
@@ -33,6 +33,8 @@
 #if !defined(OPA_PORT_INFO_H)
 #define OPA_PORT_INFO_H
 
+#include <rdma/opa_smi.h>
+
 #define OPA_PORT_LINK_MODE_NOP	0		/* No change */
 #define OPA_PORT_LINK_MODE_OPA	4		/* Port mode is OPA */
 
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 04c519ef6d71..574eb7278f46 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -53,6 +53,7 @@
 
 #include <linux/kthread.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
 
 /*
  * Define an ib_cq_notify value that is not valid so we know when CQ
diff --git a/include/rdma/signature.h b/include/rdma/signature.h
index f24cc2a1d3c5..d16b0fcc8344 100644
--- a/include/rdma/signature.h
+++ b/include/rdma/signature.h
@@ -6,6 +6,8 @@
 #ifndef _RDMA_SIGNATURE_H_
 #define _RDMA_SIGNATURE_H_
 
+#include <linux/types.h>
+
 enum ib_signature_prot_cap {
 	IB_PROT_T10DIF_TYPE_1 = 1,
 	IB_PROT_T10DIF_TYPE_2 = 1 << 1,
-- 
cgit v1.2.3


From 8732d85a69a0411f16a4b78df8fdc7b09c50a849 Mon Sep 17 00:00:00 2001
From: Mattias Jacobsson <2pi@mok.nu>
Date: Fri, 19 Jul 2019 19:51:45 +0200
Subject: platform/x86: wmi: add missing struct parameter description

Add a description for the context parameter in the struct wmi_device_id.

Reported-by: kbuild test robot <lkp@intel.com>
Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id")
Signed-off-by: Mattias Jacobsson <2pi@mok.nu>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/mod_devicetable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b2c1648f7e5d..5714fd35a83c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -814,6 +814,7 @@ struct tee_client_device_id {
 /**
  * struct wmi_device_id - WMI device identifier
  * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @context: pointer to driver specific data
  */
 struct wmi_device_id {
 	const char guid_string[UUID_STRING_LEN+1];
-- 
cgit v1.2.3


From 1d2fedd8561dc469a7503855ee602f4bb3eccfa7 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Tue, 23 Jul 2019 10:02:05 +0300
Subject: RDMA/core: Support netlink commands in non init_net net namespaces

Now that IB core supports RDMA device binding with specific net namespace,
enable IB core to accept netlink commands in non init_net namespaces.

This is done by having per net namespace netlink socket.

At present only netlink device handling client RDMA_NL_NLDEV supports
device handling in multiple net namespaces.  Hence do not accept netlink
messages for other clients in non init_net net namespaces.

Link: https://lore.kernel.org/r/20190723070205.6247-1-leon@kernel.org
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/addr.c      |  2 +-
 drivers/infiniband/core/core_priv.h | 24 +++++++++++++-
 drivers/infiniband/core/device.c    | 38 ++++++++--------------
 drivers/infiniband/core/iwpm_msg.c  |  8 ++---
 drivers/infiniband/core/iwpm_util.c |  6 ++--
 drivers/infiniband/core/netlink.c   | 63 +++++++++++++++++++++++++------------
 drivers/infiniband/core/nldev.c     | 20 ++++++------
 drivers/infiniband/core/sa_query.c  |  2 +-
 include/rdma/rdma_netlink.h         | 10 ++++--
 9 files changed, 105 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 9b76a8fcdd24..1dd467bed8fc 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -183,7 +183,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 
 	/* Repair the nlmsg header length */
 	nlmsg_end(skb, nlh);
-	rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
+	rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL);
 
 	/* Make the request retry, so when we get the response from userspace
 	 * we will have something.
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 888d89ce81df..589ed805e0ad 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -36,6 +36,8 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/cgroup_rdma.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/opa_addr.h>
@@ -54,8 +56,26 @@ struct pkey_index_qp_list {
 	struct list_head    qp_list;
 };
 
+/**
+ * struct rdma_dev_net - rdma net namespace metadata for a net
+ * @nl_sock:	Pointer to netlink socket
+ * @net:	Pointer to owner net namespace
+ * @id:		xarray id to identify the net namespace.
+ */
+struct rdma_dev_net {
+	struct sock *nl_sock;
+	possible_net_t net;
+	u32 id;
+};
+
 extern const struct attribute_group ib_dev_attr_group;
 extern bool ib_devices_shared_netns;
+extern unsigned int rdma_dev_net_id;
+
+static inline struct rdma_dev_net *rdma_net_to_dev_net(struct net *net)
+{
+	return net_generic(net, rdma_dev_net_id);
+}
 
 int ib_device_register_sysfs(struct ib_device *device);
 void ib_device_unregister_sysfs(struct ib_device *device);
@@ -179,7 +199,6 @@ void ib_mad_cleanup(void);
 int ib_sa_init(void);
 void ib_sa_cleanup(void);
 
-int rdma_nl_init(void);
 void rdma_nl_exit(void);
 
 int ib_nl_handle_resolve_resp(struct sk_buff *skb,
@@ -362,4 +381,7 @@ void ib_port_unregister_module_stat(struct kobject *kobj);
 
 int ib_device_set_netns_put(struct sk_buff *skb,
 			    struct ib_device *dev, u32 ns_fd);
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet);
+void rdma_nl_net_exit(struct rdma_dev_net *rnet);
 #endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 309210d2d4a8..c3576c7d2e8f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -39,7 +39,6 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
-#include <net/netns/generic.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/hashtable.h>
@@ -105,17 +104,7 @@ static DECLARE_RWSEM(clients_rwsem);
  */
 #define CLIENT_DATA_REGISTERED XA_MARK_1
 
-/**
- * struct rdma_dev_net - rdma net namespace metadata for a net
- * @net:	Pointer to owner net namespace
- * @id:		xarray id to identify the net namespace.
- */
-struct rdma_dev_net {
-	possible_net_t net;
-	u32 id;
-};
-
-static unsigned int rdma_dev_net_id;
+unsigned int rdma_dev_net_id;
 
 /*
  * A list of net namespaces is maintained in an xarray. This is necessary
@@ -1050,7 +1039,7 @@ int rdma_compatdev_set(u8 enable)
 
 static void rdma_dev_exit_net(struct net *net)
 {
-	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
 	struct ib_device *dev;
 	unsigned long index;
 	int ret;
@@ -1084,25 +1073,32 @@ static void rdma_dev_exit_net(struct net *net)
 	}
 	up_read(&devices_rwsem);
 
+	rdma_nl_net_exit(rnet);
 	xa_erase(&rdma_nets, rnet->id);
 }
 
 static __net_init int rdma_dev_init_net(struct net *net)
 {
-	struct rdma_dev_net *rnet = net_generic(net, rdma_dev_net_id);
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
 	unsigned long index;
 	struct ib_device *dev;
 	int ret;
 
+	write_pnet(&rnet->net, net);
+
+	ret = rdma_nl_net_init(rnet);
+	if (ret)
+		return ret;
+
 	/* No need to create any compat devices in default init_net. */
 	if (net_eq(net, &init_net))
 		return 0;
 
-	write_pnet(&rnet->net, net);
-
 	ret = xa_alloc(&rdma_nets, &rnet->id, rnet, xa_limit_32b, GFP_KERNEL);
-	if (ret)
+	if (ret) {
+		rdma_nl_net_exit(rnet);
 		return ret;
+	}
 
 	down_read(&devices_rwsem);
 	xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
@@ -2629,12 +2625,6 @@ static int __init ib_core_init(void)
 		goto err_comp_unbound;
 	}
 
-	ret = rdma_nl_init();
-	if (ret) {
-		pr_warn("Couldn't init IB netlink interface: err %d\n", ret);
-		goto err_sysfs;
-	}
-
 	ret = addr_init();
 	if (ret) {
 		pr_warn("Could't init IB address resolution\n");
@@ -2680,8 +2670,6 @@ err_mad:
 err_addr:
 	addr_cleanup();
 err_ibnl:
-	rdma_nl_exit();
-err_sysfs:
 	class_unregister(&ib_class);
 err_comp_unbound:
 	destroy_workqueue(ib_comp_unbound_wq);
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 2452b0ddcf0d..f1a873d4e842 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -112,7 +112,7 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
 	pr_debug("%s: Multicasting a nlmsg (dev = %s ifname = %s iwpm = %s)\n",
 		__func__, pm_msg->dev_name, pm_msg->if_name, iwpm_ulib_name);
 
-	ret = rdma_nl_multicast(skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
+	ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_IWPM, GFP_KERNEL);
 	if (ret) {
 		skb = NULL; /* skb is freed in the netlink send-op handling */
 		iwpm_user_pid = IWPM_PID_UNAVAILABLE;
@@ -202,7 +202,7 @@ int iwpm_add_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	nlmsg_end(skb, nlh);
 	nlmsg_request->req_buffer = pm_msg;
 
-	ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+	ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
 	if (ret) {
 		skb = NULL; /* skb is freed in the netlink send-op handling */
 		iwpm_user_pid = IWPM_PID_UNDEFINED;
@@ -297,7 +297,7 @@ int iwpm_add_and_query_mapping(struct iwpm_sa_data *pm_msg, u8 nl_client)
 	nlmsg_end(skb, nlh);
 	nlmsg_request->req_buffer = pm_msg;
 
-	ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+	ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
 	if (ret) {
 		skb = NULL; /* skb is freed in the netlink send-op handling */
 		err_str = "Unable to send a nlmsg";
@@ -364,7 +364,7 @@ int iwpm_remove_mapping(struct sockaddr_storage *local_addr, u8 nl_client)
 
 	nlmsg_end(skb, nlh);
 
-	ret = rdma_nl_unicast_wait(skb, iwpm_user_pid);
+	ret = rdma_nl_unicast_wait(&init_net, skb, iwpm_user_pid);
 	if (ret) {
 		skb = NULL; /* skb is freed in the netlink send-op handling */
 		iwpm_user_pid = IWPM_PID_UNDEFINED;
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index 41929bb83739..c7ad3499228c 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -645,7 +645,7 @@ static int send_mapinfo_num(u32 mapping_num, u8 nl_client, int iwpm_pid)
 
 	nlmsg_end(skb, nlh);
 
-	ret = rdma_nl_unicast(skb, iwpm_pid);
+	ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
 	if (ret) {
 		skb = NULL;
 		err_str = "Unable to send a nlmsg";
@@ -674,7 +674,7 @@ static int send_nlmsg_done(struct sk_buff *skb, u8 nl_client, int iwpm_pid)
 		return -ENOMEM;
 	}
 	nlh->nlmsg_type = NLMSG_DONE;
-	ret = rdma_nl_unicast(skb, iwpm_pid);
+	ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
 	if (ret)
 		pr_warn("%s Unable to send a nlmsg\n", __func__);
 	return ret;
@@ -824,7 +824,7 @@ int iwpm_send_hello(u8 nl_client, int iwpm_pid, u16 abi_version)
 		goto hello_num_error;
 	nlmsg_end(skb, nlh);
 
-	ret = rdma_nl_unicast(skb, iwpm_pid);
+	ret = rdma_nl_unicast(&init_net, skb, iwpm_pid);
 	if (ret) {
 		skb = NULL;
 		err_str = "Unable to send a nlmsg";
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index eecfc0b377c9..67a76aca2dd6 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -36,20 +36,22 @@
 #include <linux/export.h>
 #include <net/netlink.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/sock.h>
 #include <rdma/rdma_netlink.h>
 #include <linux/module.h>
 #include "core_priv.h"
 
 static DEFINE_MUTEX(rdma_nl_mutex);
-static struct sock *nls;
 static struct {
 	const struct rdma_nl_cbs   *cb_table;
 } rdma_nl_types[RDMA_NL_NUM_CLIENTS];
 
 bool rdma_nl_chk_listeners(unsigned int group)
 {
-	return netlink_has_listeners(nls, group);
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(&init_net);
+
+	return netlink_has_listeners(rnet->nl_sock, group);
 }
 EXPORT_SYMBOL(rdma_nl_chk_listeners);
 
@@ -73,13 +75,21 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
 	return (op < max_num_ops[type]) ? true : false;
 }
 
-static bool is_nl_valid(unsigned int type, unsigned int op)
+static bool
+is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
 {
 	const struct rdma_nl_cbs *cb_table;
 
 	if (!is_nl_msg_valid(type, op))
 		return false;
 
+	/*
+	 * Currently only NLDEV client is supporting netlink commands in
+	 * non init_net net namespace.
+	 */
+	if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
+		return false;
+
 	if (!rdma_nl_types[type].cb_table) {
 		mutex_unlock(&rdma_nl_mutex);
 		request_module("rdma-netlink-subsys-%d", type);
@@ -161,7 +171,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	unsigned int op = RDMA_NL_GET_OP(type);
 	const struct rdma_nl_cbs *cb_table;
 
-	if (!is_nl_valid(index, op))
+	if (!is_nl_valid(skb, index, op))
 		return -EINVAL;
 
 	cb_table = rdma_nl_types[index].cb_table;
@@ -185,7 +195,7 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			.dump = cb_table[op].dump,
 		};
 		if (c.dump)
-			return netlink_dump_start(nls, skb, nlh, &c);
+			return netlink_dump_start(skb->sk, skb, nlh, &c);
 		return -EINVAL;
 	}
 
@@ -258,52 +268,65 @@ static void rdma_nl_rcv(struct sk_buff *skb)
 	mutex_unlock(&rdma_nl_mutex);
 }
 
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid)
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
 {
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
 	int err;
 
-	err = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
+	err = netlink_unicast(rnet->nl_sock, skb, pid, MSG_DONTWAIT);
 	return (err < 0) ? err : 0;
 }
 EXPORT_SYMBOL(rdma_nl_unicast);
 
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid)
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid)
 {
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
 	int err;
 
-	err = netlink_unicast(nls, skb, pid, 0);
+	err = netlink_unicast(rnet->nl_sock, skb, pid, 0);
 	return (err < 0) ? err : 0;
 }
 EXPORT_SYMBOL(rdma_nl_unicast_wait);
 
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags)
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+		      unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(nls, skb, 0, group, flags);
+	struct rdma_dev_net *rnet = rdma_net_to_dev_net(net);
+
+	return nlmsg_multicast(rnet->nl_sock, skb, 0, group, flags);
 }
 EXPORT_SYMBOL(rdma_nl_multicast);
 
-int __init rdma_nl_init(void)
+void rdma_nl_exit(void)
+{
+	int idx;
+
+	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+		WARN(rdma_nl_types[idx].cb_table,
+		     "Nelink client %d wasn't released prior to unloading %s\n",
+		     idx, KBUILD_MODNAME);
+}
+
+int rdma_nl_net_init(struct rdma_dev_net *rnet)
 {
+	struct net *net = read_pnet(&rnet->net);
 	struct netlink_kernel_cfg cfg = {
 		.input	= rdma_nl_rcv,
 	};
+	struct sock *nls;
 
-	nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
+	nls = netlink_kernel_create(net, NETLINK_RDMA, &cfg);
 	if (!nls)
 		return -ENOMEM;
 
 	nls->sk_sndtimeo = 10 * HZ;
+	rnet->nl_sock = nls;
 	return 0;
 }
 
-void rdma_nl_exit(void)
+void rdma_nl_net_exit(struct rdma_dev_net *rnet)
 {
-	int idx;
-
-	for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
-		rdma_nl_unregister(idx);
-
-	netlink_kernel_release(nls);
+	netlink_kernel_release(rnet->nl_sock);
 }
 
 MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 783e465e7c41..e287b71a1cfd 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -832,7 +832,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	nlmsg_end(msg, nlh);
 
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
 	nlmsg_free(msg);
@@ -972,7 +972,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
 
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
 	nlmsg_free(msg);
@@ -1074,7 +1074,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
 	nlmsg_free(msg);
@@ -1251,7 +1251,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_free:
 	nlmsg_free(msg);
@@ -1596,7 +1596,7 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
 	put_device(data.cdev);
 	if (ibdev)
 		ib_device_put(ibdev);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 out_data:
 	put_device(data.cdev);
@@ -1636,7 +1636,7 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return err;
 	}
 	nlmsg_end(msg, nlh);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 }
 
 static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1734,7 +1734,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
 	rdma_counter_unbind_qpn(device, port, qpn, cntn);
@@ -1802,7 +1802,7 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_fill:
 	rdma_counter_bind_qpn(device, port, qpn, cntn);
@@ -1893,7 +1893,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
 	mutex_unlock(&stats->lock);
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_table:
 	nla_nest_cancel(msg, table_attr);
@@ -1961,7 +1961,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);
-	return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
+	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
 
 err_msg:
 	nlmsg_free(msg);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 7d8071c7e564..17fc2936c077 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -860,7 +860,7 @@ static int ib_nl_send_msg(struct ib_sa_query *query, gfp_t gfp_mask)
 	/* Repair the nlmsg header length */
 	nlmsg_end(skb, nlh);
 
-	return rdma_nl_multicast(skb, RDMA_NL_GROUP_LS, gfp_mask);
+	return rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_mask);
 }
 
 static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask)
diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h
index 6631624e4d7c..ab22759de7ea 100644
--- a/include/rdma/rdma_netlink.h
+++ b/include/rdma/rdma_netlink.h
@@ -76,28 +76,32 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 /**
  * Send the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast(struct sk_buff *skb, u32 pid);
+int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid);
 
 /**
  * Send, with wait/1 retry, the supplied skb to a specific userspace PID.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @pid: Userspace netlink process ID
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_unicast_wait(struct sk_buff *skb, __u32 pid);
+int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid);
 
 /**
  * Send the supplied skb to a netlink group.
+ * @net: Net namespace in which to send the skb
  * @skb: The netlink skb
  * @group: Netlink group ID
  * @flags: allocation flags
  * Returns 0 on success or a negative error code.
  */
-int rdma_nl_multicast(struct sk_buff *skb, unsigned int group, gfp_t flags);
+int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
+		      unsigned int group, gfp_t flags);
 
 /**
  * Check if there are any listeners to the netlink group
-- 
cgit v1.2.3


From 0058eb589881056b49a4ba15dfa3f1b8db53991c Mon Sep 17 00:00:00 2001
From: Michal Kalderon <michal.kalderon@marvell.com>
Date: Tue, 9 Jul 2019 17:17:33 +0300
Subject: qed*: Change dpi_addr to be denoted with __iomem

Several casts were required around dpi_addr parameter in qed_rdma_if.h
This is an address on the doorbell bar and should therefore be marked with
__iomem.

Link: https://lore.kernel.org/r/20190709141735.19193-5-michal.kalderon@marvell.com
Reported-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/qedr/main.c          | 2 +-
 drivers/infiniband/hw/qedr/qedr.h          | 2 +-
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 5 ++---
 include/linux/qed/qed_rdma_if.h            | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 533157a2a3be..ca5f6f65c929 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -820,7 +820,7 @@ static int qedr_init_hw(struct qedr_dev *dev)
 	if (rc)
 		goto out;
 
-	dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr;
+	dev->db_addr = out_params.dpi_addr;
 	dev->db_phys_addr = out_params.dpi_phys_addr;
 	dev->db_size = out_params.dpi_size;
 	dev->dpi = out_params.dpi;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index a92ca22e5de1..0cfd849b13d6 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -229,7 +229,7 @@ struct qedr_ucontext {
 	struct ib_ucontext ibucontext;
 	struct qedr_dev *dev;
 	struct qedr_pd *pd;
-	u64 dpi_addr;
+	void __iomem *dpi_addr;
 	u64 dpi_phys_addr;
 	u32 dpi_size;
 	u16 dpi;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index f900fde448db..95eba277f6e2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -799,9 +799,8 @@ static int qed_rdma_add_user(void *rdma_cxt,
 	/* Calculate the corresponding DPI address */
 	dpi_start_offset = p_hwfn->dpi_start_offset;
 
-	out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
-				     dpi_start_offset +
-				     ((out_params->dpi) * p_hwfn->dpi_size));
+	out_params->dpi_addr = p_hwfn->doorbells + dpi_start_offset +
+			       out_params->dpi * p_hwfn->dpi_size;
 
 	out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
 				    dpi_start_offset +
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 898f595ea3d6..74efca15fde7 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -225,7 +225,7 @@ struct qed_rdma_start_in_params {
 
 struct qed_rdma_add_user_out_params {
 	u16 dpi;
-	u64 dpi_addr;
+	void __iomem *dpi_addr;
 	u64 dpi_phys_addr;
 	u32 dpi_size;
 	u16 wid_count;
-- 
cgit v1.2.3


From b2b998c0f944993c9ef435569651e407d607af41 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Date: Sun, 9 Jun 2019 20:19:03 +0200
Subject: leds: class: Improve LED and LED flash class registration API

Replace of_led_classdev_register() with led_classdev_register_ext(), which
accepts easily extendable struct led_init_data, instead of the fixed
struct device_node argument. The latter can be now passed in an fwnode
property of the struct led_init_data.

The modification is driven by the need for passing additional arguments
required for the forthcoming generic mechanism for composing LED names.
Currently the LED name is conveyed in the "name" char pointer property of
the struct led_classdev. This is redundant since LED class device name
is accessible throughout the whole LED class device life time via
associated struct device's kobj->name property.

The change will not break any existing clients since the patch alters
also existing led_classdev{_flash}_register() macro wrappers, that pass
NULL in place of init_data, which leads to using legacy name
initialization path basing on the struct led_classdev's "name" property.

Three existing users of devm_of_led_classdev_registers() are modified
to use devm_led_classdev_register(), which will not impact their
operation since they in fact didn't need to pass struct device_node on
registration from the beginning.

Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Baolin Wang <baolin.wang@linaro.org>
Cc: Dan Murphy <dmurphy@ti.com>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Oleh Kravchenko <oleg@kaa.org.ua>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Simon Shields <simon@lineageos.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/led-class-flash.c  |  9 +++++----
 drivers/leds/led-class.c        | 29 +++++++++++++++++------------
 drivers/leds/leds-cr0014114.c   |  3 +--
 drivers/leds/leds-gpio.c        |  2 +-
 drivers/leds/leds-max77650.c    |  2 +-
 drivers/leds/leds-pwm.c         |  3 +--
 include/linux/led-class-flash.h | 15 ++++++++++-----
 include/linux/leds.h            | 34 ++++++++++++++++++++++++----------
 8 files changed, 60 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/leds/led-class-flash.c b/drivers/leds/led-class-flash.c
index 94980c654d89..60c3de5c6b9f 100644
--- a/drivers/leds/led-class-flash.c
+++ b/drivers/leds/led-class-flash.c
@@ -282,8 +282,9 @@ static void led_flash_init_sysfs_groups(struct led_classdev_flash *fled_cdev)
 	led_cdev->groups = flash_groups;
 }
 
-int led_classdev_flash_register(struct device *parent,
-				struct led_classdev_flash *fled_cdev)
+int led_classdev_flash_register_ext(struct device *parent,
+				    struct led_classdev_flash *fled_cdev,
+				    struct led_init_data *init_data)
 {
 	struct led_classdev *led_cdev;
 	const struct led_flash_ops *ops;
@@ -309,13 +310,13 @@ int led_classdev_flash_register(struct device *parent,
 	}
 
 	/* Register led class device */
-	ret = led_classdev_register(parent, led_cdev);
+	ret = led_classdev_register_ext(parent, led_cdev, init_data);
 	if (ret < 0)
 		return ret;
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(led_classdev_flash_register);
+EXPORT_SYMBOL_GPL(led_classdev_flash_register_ext);
 
 void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev)
 {
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 4793e77808e2..242122f49333 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -14,6 +14,7 @@
 #include <linux/leds.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -242,14 +243,16 @@ static int led_classdev_next_name(const char *init_name, char *name,
 }
 
 /**
- * of_led_classdev_register - register a new object of led_classdev class.
+ * led_classdev_register_ext - register a new object of led_classdev class
+ *			       with init data.
  *
  * @parent: parent of LED device
  * @led_cdev: the led_classdev structure for this device.
- * @np: DT node describing this LED
+ * @init_data: LED class device initialization data
  */
-int of_led_classdev_register(struct device *parent, struct device_node *np,
-			    struct led_classdev *led_cdev)
+int led_classdev_register_ext(struct device *parent,
+			      struct led_classdev *led_cdev,
+			      struct led_init_data *init_data)
 {
 	char name[LED_MAX_NAME_SIZE];
 	int ret;
@@ -266,7 +269,8 @@ int of_led_classdev_register(struct device *parent, struct device_node *np,
 		mutex_unlock(&led_cdev->led_access);
 		return PTR_ERR(led_cdev->dev);
 	}
-	led_cdev->dev->of_node = np;
+	if (init_data && init_data->fwnode)
+		led_cdev->dev->of_node = to_of_node(init_data->fwnode);
 
 	if (ret)
 		dev_warn(parent, "Led %s renamed to %s due to name collision",
@@ -311,7 +315,7 @@ int of_led_classdev_register(struct device *parent, struct device_node *np,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(of_led_classdev_register);
+EXPORT_SYMBOL_GPL(led_classdev_register_ext);
 
 /**
  * led_classdev_unregister - unregisters a object of led_properties class.
@@ -356,14 +360,15 @@ static void devm_led_classdev_release(struct device *dev, void *res)
 }
 
 /**
- * devm_of_led_classdev_register - resource managed led_classdev_register()
+ * devm_led_classdev_register_ext - resource managed led_classdev_register_ext()
  *
  * @parent: parent of LED device
  * @led_cdev: the led_classdev structure for this device.
+ * @init_data: LED class device initialization data
  */
-int devm_of_led_classdev_register(struct device *parent,
-				  struct device_node *np,
-				  struct led_classdev *led_cdev)
+int devm_led_classdev_register_ext(struct device *parent,
+				   struct led_classdev *led_cdev,
+				   struct led_init_data *init_data)
 {
 	struct led_classdev **dr;
 	int rc;
@@ -372,7 +377,7 @@ int devm_of_led_classdev_register(struct device *parent,
 	if (!dr)
 		return -ENOMEM;
 
-	rc = of_led_classdev_register(parent, np, led_cdev);
+	rc = led_classdev_register_ext(parent, led_cdev, init_data);
 	if (rc) {
 		devres_free(dr);
 		return rc;
@@ -383,7 +388,7 @@ int devm_of_led_classdev_register(struct device *parent,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(devm_of_led_classdev_register);
+EXPORT_SYMBOL_GPL(devm_led_classdev_register_ext);
 
 static int devm_led_classdev_match(struct device *dev, void *res, void *data)
 {
diff --git a/drivers/leds/leds-cr0014114.c b/drivers/leds/leds-cr0014114.c
index 0e4262462cb9..1c82152764d2 100644
--- a/drivers/leds/leds-cr0014114.c
+++ b/drivers/leds/leds-cr0014114.c
@@ -207,8 +207,7 @@ static int cr0014114_probe_dt(struct cr0014114 *priv)
 		led->ldev.max_brightness	  = CR_MAX_BRIGHTNESS;
 		led->ldev.brightness_set_blocking = cr0014114_set_sync;
 
-		ret = devm_of_led_classdev_register(priv->dev, np,
-						    &led->ldev);
+		ret = devm_led_classdev_register(priv->dev, &led->ldev);
 		if (ret) {
 			dev_err(priv->dev,
 				"failed to register LED device %s, err %d",
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index bdc98ddca1dc..8f463c912db8 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -108,7 +108,7 @@ static int create_gpio_led(const struct gpio_led *template,
 	if (ret < 0)
 		return ret;
 
-	return devm_of_led_classdev_register(parent, np, &led_dat->cdev);
+	return devm_led_classdev_register(parent, &led_dat->cdev);
 }
 
 struct gpio_leds_priv {
diff --git a/drivers/leds/leds-max77650.c b/drivers/leds/leds-max77650.c
index 04738324b3e6..5a14f9775b0e 100644
--- a/drivers/leds/leds-max77650.c
+++ b/drivers/leds/leds-max77650.c
@@ -118,7 +118,7 @@ static int max77650_led_probe(struct platform_device *pdev)
 		of_property_read_string(child, "linux,default-trigger",
 					&led->cdev.default_trigger);
 
-		rv = devm_of_led_classdev_register(dev, child, &led->cdev);
+		rv = devm_led_classdev_register(dev, &led->cdev);
 		if (rv)
 			goto err_node_put;
 
diff --git a/drivers/leds/leds-pwm.c b/drivers/leds/leds-pwm.c
index 48d068f80f11..d0e1f2710351 100644
--- a/drivers/leds/leds-pwm.c
+++ b/drivers/leds/leds-pwm.c
@@ -111,8 +111,7 @@ static int led_pwm_add(struct device *dev, struct led_pwm_priv *priv,
 	if (!led_data->period && (led->pwm_period_ns > 0))
 		led_data->period = led->pwm_period_ns;
 
-	ret = devm_of_led_classdev_register(dev, to_of_node(fwnode),
-					    &led_data->cdev);
+	ret = devm_led_classdev_register(dev, &led_data->cdev);
 	if (ret == 0) {
 		priv->num_leds++;
 		led_pwm_set(&led_data->cdev, led_data->cdev.brightness);
diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h
index f52713f0a269..1e824963af17 100644
--- a/include/linux/led-class-flash.h
+++ b/include/linux/led-class-flash.h
@@ -86,15 +86,20 @@ static inline struct led_classdev_flash *lcdev_to_flcdev(
 }
 
 /**
- * led_classdev_flash_register - register a new object of led_classdev class
- *				 with support for flash LEDs
- * @parent: the flash LED to register
+ * led_classdev_flash_register_ext - register a new object of LED class with
+ *				     init data and with support for flash LEDs
+ * @parent: LED flash controller device this flash LED is driven by
  * @fled_cdev: the led_classdev_flash structure for this device
+ * @init_data: the LED class flash device initialization data
  *
  * Returns: 0 on success or negative error value on failure
  */
-extern int led_classdev_flash_register(struct device *parent,
-				struct led_classdev_flash *fled_cdev);
+extern int led_classdev_flash_register_ext(struct device *parent,
+					struct led_classdev_flash *fled_cdev,
+					struct led_init_data *init_data);
+
+#define led_classdev_flash_register(parent, fled_cdev)		\
+	led_classdev_flash_register_ext(parent, fled_cdev, NULL)
 
 /**
  * led_classdev_flash_unregister - unregisters an object of led_classdev class
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 9b2bf574a17a..2bbe8a38fe39 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -30,6 +30,11 @@ enum led_brightness {
 	LED_FULL	= 255,
 };
 
+struct led_init_data {
+	/* device fwnode handle */
+	struct fwnode_handle *fwnode;
+};
+
 struct led_classdev {
 	const char		*name;
 	enum led_brightness	 brightness;
@@ -125,16 +130,25 @@ struct led_classdev {
 	struct mutex		led_access;
 };
 
-extern int of_led_classdev_register(struct device *parent,
-				    struct device_node *np,
-				    struct led_classdev *led_cdev);
-#define led_classdev_register(parent, led_cdev)				\
-	of_led_classdev_register(parent, NULL, led_cdev)
-extern int devm_of_led_classdev_register(struct device *parent,
-					 struct device_node *np,
-					 struct led_classdev *led_cdev);
-#define devm_led_classdev_register(parent, led_cdev)			\
-	devm_of_led_classdev_register(parent, NULL, led_cdev)
+/**
+ * led_classdev_register_ext - register a new object of LED class with
+ *			       init data
+ * @parent: LED controller device this LED is driven by
+ * @led_cdev: the led_classdev structure for this device
+ * @init_data: the LED class device initialization data
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+extern int led_classdev_register_ext(struct device *parent,
+				     struct led_classdev *led_cdev,
+				     struct led_init_data *init_data);
+#define led_classdev_register(parent, led_cdev)			\
+	led_classdev_register_ext(parent, led_cdev, NULL)
+extern int devm_led_classdev_register_ext(struct device *parent,
+					  struct led_classdev *led_cdev,
+					  struct led_init_data *init_data);
+#define devm_led_classdev_register(parent, led_cdev)		\
+	devm_led_classdev_register_ext(parent, led_cdev, NULL)
 extern void led_classdev_unregister(struct led_classdev *led_cdev);
 extern void devm_led_classdev_unregister(struct device *parent,
 					 struct led_classdev *led_cdev);
-- 
cgit v1.2.3


From 853a78a7d6c7734f5aaa9c2a866cbd1f2a658892 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Date: Sun, 9 Jun 2019 20:19:04 +0200
Subject: dt-bindings: leds: Add LED_COLOR_ID definitions

Add common LED color identifiers.

Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Baolin Wang <baolin.wang@linaro.org>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Oleh Kravchenko <oleg@kaa.org.ua>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Simon Shields <simon@lineageos.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Dan Murphy <dmurphy@ti.com>
---
 include/dt-bindings/leds/common.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h
index e171d0a6beb2..217ee9e0dd6c 100644
--- a/include/dt-bindings/leds/common.h
+++ b/include/dt-bindings/leds/common.h
@@ -19,4 +19,15 @@
 #define LEDS_BOOST_ADAPTIVE	1
 #define LEDS_BOOST_FIXED	2
 
+/* Standard LED colors */
+#define LED_COLOR_ID_WHITE	0
+#define LED_COLOR_ID_RED	1
+#define LED_COLOR_ID_GREEN	2
+#define LED_COLOR_ID_BLUE	3
+#define LED_COLOR_ID_AMBER	4
+#define LED_COLOR_ID_VIOLET	5
+#define LED_COLOR_ID_YELLOW	6
+#define LED_COLOR_ID_IR		7
+#define LED_COLOR_ID_MAX	8
+
 #endif /* __DT_BINDINGS_LEDS_H */
-- 
cgit v1.2.3


From 2f430310f7b4216408aaae57c55a596091c47846 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Date: Sun, 9 Jun 2019 20:19:04 +0200
Subject: dt-bindings: leds: Add LED_FUNCTION definitions

Add initial set of common LED function definitions.

Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Baolin Wang <baolin.wang@linaro.org>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Oleh Kravchenko <oleg@kaa.org.ua>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Simon Shields <simon@lineageos.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Dan Murphy <dmurphy@ti.com>
---
 include/dt-bindings/leds/common.h | 44 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/leds/common.h b/include/dt-bindings/leds/common.h
index 217ee9e0dd6c..9e1256a7c1bf 100644
--- a/include/dt-bindings/leds/common.h
+++ b/include/dt-bindings/leds/common.h
@@ -3,8 +3,9 @@
  * This header provides macros for the common LEDs device tree bindings.
  *
  * Copyright (C) 2015, Samsung Electronics Co., Ltd.
- *
  * Author: Jacek Anaszewski <j.anaszewski@samsung.com>
+ *
+ * Copyright (C) 2019 Jacek Anaszewski <jacek.anaszewski@gmail.com>
  */
 
 #ifndef __DT_BINDINGS_LEDS_H
@@ -30,4 +31,45 @@
 #define LED_COLOR_ID_IR		7
 #define LED_COLOR_ID_MAX	8
 
+/* Standard LED functions */
+#define LED_FUNCTION_ACTIVITY "activity"
+#define LED_FUNCTION_ALARM "alarm"
+#define LED_FUNCTION_BACKLIGHT "backlight"
+#define LED_FUNCTION_BLUETOOTH "bluetooth"
+#define LED_FUNCTION_BOOT "boot"
+#define LED_FUNCTION_CPU "cpu"
+#define LED_FUNCTION_CAPSLOCK "capslock"
+#define LED_FUNCTION_CHARGING "charging"
+#define LED_FUNCTION_DEBUG "debug"
+#define LED_FUNCTION_DISK "disk"
+#define LED_FUNCTION_DISK_ACTIVITY "disk-activity"
+#define LED_FUNCTION_DISK_ERR "disk-err"
+#define LED_FUNCTION_DISK_READ "disk-read"
+#define LED_FUNCTION_DISK_WRITE "disk-write"
+#define LED_FUNCTION_FAULT "fault"
+#define LED_FUNCTION_FLASH "flash"
+#define LED_FUNCTION_HEARTBEAT "heartbeat"
+#define LED_FUNCTION_INDICATOR "indicator"
+#define LED_FUNCTION_KBD_BACKLIGHT "kbd_backlight"
+#define LED_FUNCTION_LAN "lan"
+#define LED_FUNCTION_MAIL "mail"
+#define LED_FUNCTION_MTD "mtd"
+#define LED_FUNCTION_MICMUTE "micmute"
+#define LED_FUNCTION_MUTE "mute"
+#define LED_FUNCTION_NUMLOCK "numlock"
+#define LED_FUNCTION_PANIC "panic"
+#define LED_FUNCTION_PROGRAMMING "programming"
+#define LED_FUNCTION_POWER "power"
+#define LED_FUNCTION_RX "rx"
+#define LED_FUNCTION_SD "sd"
+#define LED_FUNCTION_SCROLLLOCK "scrolllock"
+#define LED_FUNCTION_STANDBY "standby"
+#define LED_FUNCTION_STATUS "status"
+#define LED_FUNCTION_TORCH "torch"
+#define LED_FUNCTION_TX "tx"
+#define LED_FUNCTION_USB "usb"
+#define LED_FUNCTION_WAN "wan"
+#define LED_FUNCTION_WLAN "wlan"
+#define LED_FUNCTION_WPS "wps"
+
 #endif /* __DT_BINDINGS_LEDS_H */
-- 
cgit v1.2.3


From bb4e9af0348dfeafd66c7e7f82e8a0983fe5390c Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Date: Sun, 9 Jun 2019 20:19:04 +0200
Subject: leds: core: Add support for composing LED class device names

Add generic support for composing LED class device name. The newly
introduced led_compose_name() function composes device name according
to either <color:function> or <devicename:color:function> pattern,
depending on the configuration of initialization data.

Backward compatibility with in-driver hard-coded LED class device
names is assured thanks to the default_label and devicename properties
of newly introduced struct led_init_data.

In case none of the aforementioned properties was found, then, for OF
nodes, the node name is adopted for LED class device name.

At the occassion of amending the Documentation/leds/leds-class.txt
unify spelling: colour -> color.

Alongside these changes added is a new tool - tools/leds/get_led_device_info.sh.
The tool allows retrieving details of a LED class device's parent device,
which proves that using vendor or product name for devicename part
of LED name doesn't convey any added value since that information had been
already available in sysfs. The script performs also basic validation
of a LED class device name.

Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Baolin Wang <baolin.wang@linaro.org>
Cc: Dan Murphy <dmurphy@ti.com>
Cc: Daniel Mack <daniel@zonque.org>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Oleh Kravchenko <oleg@kaa.org.ua>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Simon Shields <simon@lineageos.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 Documentation/leds/leds-class.rst |  70 ++++++++++++-
 drivers/leds/led-class.c          |  20 +++-
 drivers/leds/led-core.c           | 127 ++++++++++++++++++++++++
 drivers/leds/leds.h               |   1 +
 include/linux/leds.h              |  45 +++++++++
 tools/leds/get_led_device_info.sh | 201 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 458 insertions(+), 6 deletions(-)
 create mode 100755 tools/leds/get_led_device_info.sh

(limited to 'include')

diff --git a/Documentation/leds/leds-class.rst b/Documentation/leds/leds-class.rst
index df0120a1ee3c..a0708d3f3d0b 100644
--- a/Documentation/leds/leds-class.rst
+++ b/Documentation/leds/leds-class.rst
@@ -43,9 +43,73 @@ LED Device Naming
 
 Is currently of the form:
 
-	"devicename:colour:function"
-
-There have been calls for LED properties such as colour to be exported as
+	"devicename:color:function"
+
+- devicename:
+        it should refer to a unique identifier created by the kernel,
+        like e.g. phyN for network devices or inputN for input devices, rather
+        than to the hardware; the information related to the product and the bus
+        to which given device is hooked is available in sysfs and can be
+        retrieved using get_led_device_info.sh script from tools/leds; generally
+        this section is expected mostly for LEDs that are somehow associated with
+        other devices.
+
+- color:
+        one of LED_COLOR_ID_* definitions from the header
+        include/dt-bindings/leds/common.h.
+
+- function:
+        one of LED_FUNCTION_* definitions from the header
+        include/dt-bindings/leds/common.h.
+
+If required color or function is missing, please submit a patch
+to linux-leds@vger.kernel.org.
+
+It is possible that more than one LED with the same color and function will
+be required for given platform, differing only with an ordinal number.
+In this case it is preferable to just concatenate the predefined LED_FUNCTION_*
+name with required "-N" suffix in the driver. fwnode based drivers can use
+function-enumerator property for that and then the concatenation will be handled
+automatically by the LED core upon LED class device registration.
+
+LED subsystem has also a protection against name clash, that may occur
+when LED class device is created by a driver of hot-pluggable device and
+it doesn't provide unique devicename section. In this case numerical
+suffix (e.g. "_1", "_2", "_3" etc.) is added to the requested LED class
+device name.
+
+There might be still LED class drivers around using vendor or product name
+for devicename, but this approach is now deprecated as it doesn't convey
+any added value. Product information can be found in other places in sysfs
+(see tools/leds/get_led_device_info.sh).
+
+Examples of proper LED names:
+
+  - "red:disk"
+  - "white:flash"
+  - "red:indicator"
+  - "phy1:green:wlan"
+  - "phy3::wlan"
+  - ":kbd_backlight"
+  - "input5::kbd_backlight"
+  - "input3::numlock"
+  - "input3::scrolllock"
+  - "input3::capslock"
+  - "mmc1::status"
+  - "white:status"
+
+get_led_device_info.sh script can be used for verifying if the LED name
+meets the requirements pointed out here. It performs validation of the LED class
+devicename sections and gives hints on expected value for a section in case
+the validation fails for it. So far the script supports validation
+of associations between LEDs and following types of devices:
+
+        - input devices
+        - ieee80211 compliant USB devices
+
+The script is open to extensions.
+
+There have been calls for LED properties such as color to be exported as
 individual led class attributes. As a solution which doesn't incur as much
 overhead, I suggest these become part of the device name. The naming scheme
 above leaves scope for further attributes should they be needed. If sections
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 242122f49333..508d6477d0b8 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -254,17 +254,31 @@ int led_classdev_register_ext(struct device *parent,
 			      struct led_classdev *led_cdev,
 			      struct led_init_data *init_data)
 {
-	char name[LED_MAX_NAME_SIZE];
+	char composed_name[LED_MAX_NAME_SIZE];
+	char final_name[LED_MAX_NAME_SIZE];
+	const char *proposed_name = composed_name;
 	int ret;
 
-	ret = led_classdev_next_name(led_cdev->name, name, sizeof(name));
+	if (init_data) {
+		if (init_data->devname_mandatory && !init_data->devicename) {
+			dev_err(parent, "Mandatory device name is missing");
+			return -EINVAL;
+		}
+		ret = led_compose_name(parent, init_data, composed_name);
+		if (ret < 0)
+			return ret;
+	} else {
+		proposed_name = led_cdev->name;
+	}
+
+	ret = led_classdev_next_name(proposed_name, final_name, sizeof(final_name));
 	if (ret < 0)
 		return ret;
 
 	mutex_init(&led_cdev->led_access);
 	mutex_lock(&led_cdev->led_access);
 	led_cdev->dev = device_create_with_groups(leds_class, parent, 0,
-				led_cdev, led_cdev->groups, "%s", name);
+				led_cdev, led_cdev->groups, "%s", final_name);
 	if (IS_ERR(led_cdev->dev)) {
 		mutex_unlock(&led_cdev->led_access);
 		return PTR_ERR(led_cdev->dev);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 7107cd7e87cf..f0c1c403f678 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -13,8 +13,10 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
+#include <linux/property.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
+#include <uapi/linux/uleds.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
@@ -23,6 +25,18 @@ EXPORT_SYMBOL_GPL(leds_list_lock);
 LIST_HEAD(leds_list);
 EXPORT_SYMBOL_GPL(leds_list);
 
+const char * const led_colors[LED_COLOR_ID_MAX] = {
+	[LED_COLOR_ID_WHITE] = "white",
+	[LED_COLOR_ID_RED] = "red",
+	[LED_COLOR_ID_GREEN] = "green",
+	[LED_COLOR_ID_BLUE] = "blue",
+	[LED_COLOR_ID_AMBER] = "amber",
+	[LED_COLOR_ID_VIOLET] = "violet",
+	[LED_COLOR_ID_YELLOW] = "yellow",
+	[LED_COLOR_ID_IR] = "ir",
+};
+EXPORT_SYMBOL_GPL(led_colors);
+
 static int __led_set_brightness(struct led_classdev *led_cdev,
 				enum led_brightness value)
 {
@@ -353,3 +367,116 @@ void led_sysfs_enable(struct led_classdev *led_cdev)
 	led_cdev->flags &= ~LED_SYSFS_DISABLE;
 }
 EXPORT_SYMBOL_GPL(led_sysfs_enable);
+
+static void led_parse_fwnode_props(struct device *dev,
+				   struct fwnode_handle *fwnode,
+				   struct led_properties *props)
+{
+	int ret;
+
+	if (!fwnode)
+		return;
+
+	if (fwnode_property_present(fwnode, "label")) {
+		ret = fwnode_property_read_string(fwnode, "label", &props->label);
+		if (ret)
+			dev_err(dev, "Error parsing 'label' property (%d)\n", ret);
+		return;
+	}
+
+	if (fwnode_property_present(fwnode, "color")) {
+		ret = fwnode_property_read_u32(fwnode, "color", &props->color);
+		if (ret)
+			dev_err(dev, "Error parsing 'color' property (%d)\n", ret);
+		else if (props->color >= LED_COLOR_ID_MAX)
+			dev_err(dev, "LED color identifier out of range\n");
+		else
+			props->color_present = true;
+	}
+
+
+	if (!fwnode_property_present(fwnode, "function"))
+		return;
+
+	ret = fwnode_property_read_string(fwnode, "function", &props->function);
+	if (ret) {
+		dev_err(dev,
+			"Error parsing 'function' property (%d)\n",
+			ret);
+	}
+
+	if (!fwnode_property_present(fwnode, "function-enumerator"))
+		return;
+
+	ret = fwnode_property_read_u32(fwnode, "function-enumerator",
+				       &props->func_enum);
+	if (ret) {
+		dev_err(dev,
+			"Error parsing 'function-enumerator' property (%d)\n",
+			ret);
+	} else {
+		props->func_enum_present = true;
+	}
+}
+
+int led_compose_name(struct device *dev, struct led_init_data *init_data,
+		     char *led_classdev_name)
+{
+	struct led_properties props = {};
+	struct fwnode_handle *fwnode = init_data->fwnode;
+	const char *devicename = init_data->devicename;
+
+	if (!led_classdev_name)
+		return -EINVAL;
+
+	led_parse_fwnode_props(dev, fwnode, &props);
+
+	if (props.label) {
+		/*
+		 * If init_data.devicename is NULL, then it indicates that
+		 * DT label should be used as-is for LED class device name.
+		 * Otherwise the label is prepended with devicename to compose
+		 * the final LED class device name.
+		 */
+		if (!devicename) {
+			strscpy(led_classdev_name, props.label,
+				LED_MAX_NAME_SIZE);
+		} else {
+			snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
+				 devicename, props.label);
+		}
+	} else if (props.function || props.color_present) {
+		char tmp_buf[LED_MAX_NAME_SIZE];
+
+		if (props.func_enum_present) {
+			snprintf(tmp_buf, LED_MAX_NAME_SIZE, "%s:%s-%d",
+				 props.color_present ? led_colors[props.color] : "",
+				 props.function ?: "", props.func_enum);
+		} else {
+			snprintf(tmp_buf, LED_MAX_NAME_SIZE, "%s:%s",
+				 props.color_present ? led_colors[props.color] : "",
+				 props.function ?: "");
+		}
+		if (init_data->devname_mandatory) {
+			snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
+				 devicename, tmp_buf);
+		} else {
+			strscpy(led_classdev_name, tmp_buf, LED_MAX_NAME_SIZE);
+
+		}
+	} else if (init_data->default_label) {
+		if (!devicename) {
+			dev_err(dev, "Legacy LED naming requires devicename segment");
+			return -EINVAL;
+		}
+		snprintf(led_classdev_name, LED_MAX_NAME_SIZE, "%s:%s",
+			 devicename, init_data->default_label);
+	} else if (is_of_node(fwnode)) {
+		strscpy(led_classdev_name, to_of_node(fwnode)->name,
+			LED_MAX_NAME_SIZE);
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(led_compose_name);
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 47b229469069..0b577cece8f7 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -27,5 +27,6 @@ void led_set_brightness_nosleep(struct led_classdev *led_cdev,
 extern struct rw_semaphore leds_list_lock;
 extern struct list_head leds_list;
 extern struct list_head trigger_list;
+extern const char * const led_colors[LED_COLOR_ID_MAX];
 
 #endif	/* __LEDS_H_INCLUDED */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 2bbe8a38fe39..d101fd13e18e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -8,6 +8,7 @@
 #ifndef __LINUX_LEDS_H_INCLUDED
 #define __LINUX_LEDS_H_INCLUDED
 
+#include <dt-bindings/leds/common.h>
 #include <linux/device.h>
 #include <linux/kernfs.h>
 #include <linux/list.h>
@@ -33,6 +34,25 @@ enum led_brightness {
 struct led_init_data {
 	/* device fwnode handle */
 	struct fwnode_handle *fwnode;
+	/*
+	 * default <color:function> tuple, for backward compatibility
+	 * with in-driver hard-coded LED names used as a fallback when
+	 * DT "label" property is absent; it should be set to NULL
+	 * in new LED class drivers.
+	 */
+	const char *default_label;
+	/*
+	 * string to be used for devicename section of LED class device
+	 * either for label based LED name composition path or for fwnode
+	 * based when devname_mandatory is true
+	 */
+	const char *devicename;
+	/*
+	 * indicates if LED name should always comprise devicename section;
+	 * only LEDs exposed by drivers of hot-pluggable devices should
+	 * set it to true
+	 */
+	bool devname_mandatory;
 };
 
 struct led_classdev {
@@ -257,6 +277,22 @@ extern void led_sysfs_disable(struct led_classdev *led_cdev);
  */
 extern void led_sysfs_enable(struct led_classdev *led_cdev);
 
+/**
+ * led_compose_name - compose LED class device name
+ * @dev: LED controller device object
+ * @child: child fwnode_handle describing a LED or a group of synchronized LEDs;
+ *	   it must be provided only for fwnode based LEDs
+ * @led_classdev_name: composed LED class device name
+ *
+ * Create LED class device name basing on the provided init_data argument.
+ * The name can have <devicename:color:function> or <color:function>.
+ * form, depending on the init_data configuration.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+extern int led_compose_name(struct device *dev, struct led_init_data *init_data,
+			    char *led_classdev_name);
+
 /**
  * led_sysfs_is_disabled - check if LED sysfs interface is disabled
  * @led_cdev: the LED to query
@@ -434,6 +470,15 @@ struct led_platform_data {
 	struct led_info	*leds;
 };
 
+struct led_properties {
+	u32		color;
+	bool		color_present;
+	const char	*function;
+	u32		func_enum;
+	bool		func_enum_present;
+	const char	*label;
+};
+
 struct gpio_desc;
 typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state,
 				unsigned long *delay_on,
diff --git a/tools/leds/get_led_device_info.sh b/tools/leds/get_led_device_info.sh
new file mode 100755
index 000000000000..ccfa442db5fe
--- /dev/null
+++ b/tools/leds/get_led_device_info.sh
@@ -0,0 +1,201 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+led_common_defs_path="include/dt-bindings/leds/common.h"
+
+num_args=$#
+if [ $num_args -eq 1 ]; then
+        linux_top=$(dirname `realpath $0` | awk -F/     \
+                        '{                              \
+                                i=1;                    \
+                                while (i <= NF - 2) {   \
+                                        printf $i"/";   \
+                                        i++;            \
+                                };                      \
+                        }')
+	led_defs_path=$linux_top/$led_common_defs_path
+elif [ $num_args -eq 2 ]; then
+        led_defs_path=`realpath $2`
+else
+	echo "Usage: get_led_device_info.sh LED_CDEV_PATH [LED_COMMON_DEFS_PATH]"
+	exit 1
+fi
+
+if [ ! -f $led_defs_path ]; then
+	echo "$led_defs_path doesn't exist"
+	exit 1
+fi
+
+led_cdev_path=`echo $1 | sed s'/\/$//'`
+
+ls "$led_cdev_path/brightness" > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+	echo "Device \"$led_cdev_path\" does not exist."
+	exit 1
+fi
+
+bus=`readlink $led_cdev_path/device/subsystem | sed s'/.*\///'`
+usb_subdev=`readlink $led_cdev_path | grep usb | sed s'/\(.*usb[0-9]*\/[0-9]*-[0-9]*\)\/.*/\1/'`
+ls "$led_cdev_path/device/of_node/compatible" > /dev/null 2>&1
+of_node_missing=$?
+
+if [ "$bus" = "input" ]; then
+	input_node=`readlink $led_cdev_path/device | sed s'/.*\///'`
+	if [ ! -z "$usb_subdev" ]; then
+		bus="usb"
+	fi
+fi
+
+if [ "$bus" = "usb" ]; then
+	usb_interface=`readlink $led_cdev_path | sed s'/.*\(usb[0-9]*\)/\1/' | cut -d\/ -f3`
+	cd $led_cdev_path/../$usb_subdev
+	driver=`readlink $usb_interface/driver | sed s'/.*\///'`
+	if [ -d "$usb_interface/ieee80211" ]; then
+		wifi_phy=`ls -l $usb_interface/ieee80211 | grep phy | awk '{print $9}'`
+	fi
+	idVendor=`cat idVendor`
+	idProduct=`cat idProduct`
+	manufacturer=`cat manufacturer`
+	product=`cat product`
+elif [ "$bus" = "input" ]; then
+	cd $led_cdev_path
+	product=`cat device/name`
+	driver=`cat device/device/driver/description`
+elif [ $of_node_missing -eq 0 ]; then
+	cd $led_cdev_path
+	compatible=`cat device/of_node/compatible`
+	if [ "$compatible" = "gpio-leds" ]; then
+		driver="leds-gpio"
+	elif [ "$compatible" = "pwm-leds" ]; then
+		driver="leds-pwm"
+	else
+		manufacturer=`echo $compatible | awk -F, '{print $1}'`
+		product=`echo $compatible | awk -F, '{print $2}'`
+	fi
+else
+	echo "Unknown device type."
+	exit 1
+fi
+
+printf "\n#####################################\n"
+printf "# LED class device hardware details #\n"
+printf "#####################################\n\n"
+
+printf "bus:\t\t\t$bus\n"
+
+if [ ! -z "$idVendor" ]; then
+	printf "idVendor:\t\t$idVendor\n"
+fi
+
+if [ ! -z "$idProduct" ]; then
+	printf "idProduct:\t\t$idProduct\n"
+fi
+
+if [ ! -z "$manufacturer" ]; then
+	printf "manufacturer:\t\t$manufacturer\n"
+fi
+
+if [ ! -z "$product" ]; then
+	printf "product:\t\t$product\n"
+fi
+
+if [ ! -z "$driver" ]; then
+	printf "driver:\t\t\t$driver\n"
+fi
+
+if [ ! -z "$input_node" ]; then
+	printf "associated input node:\t$input_node\n"
+fi
+
+printf "\n####################################\n"
+printf "# LED class device name validation #\n"
+printf "####################################\n\n"
+
+led_name=`echo $led_cdev_path | sed s'/.*\///'`
+
+num_sections=`echo $led_name | awk -F: '{print NF}'`
+
+if [ $num_sections -eq 1 ]; then
+	printf "\":\" delimiter not detected.\t[ FAILED ]\n"
+	exit 1
+elif [ $num_sections -eq 2 ]; then
+	color=`echo $led_name | cut -d: -f1`
+	function=`echo $led_name | cut -d: -f2`
+elif [ $num_sections -eq 3 ]; then
+	devicename=`echo $led_name | cut -d: -f1`
+	color=`echo $led_name | cut -d: -f2`
+	function=`echo $led_name | cut -d: -f3`
+else
+	printf "Detected %d sections in the LED class device name - should the script be updated?\n" $num_sections
+	exit 1
+fi
+
+S_DEV="devicename"
+S_CLR="color     "
+S_FUN="function  "
+status_tab=20
+
+print_msg_ok()
+{
+	local section_name="$1"
+	local section_val="$2"
+	local msg="$3"
+	printf "$section_name :\t%-${status_tab}.${status_tab}s %s %s\n" "$section_val" "[ OK ]    " "$msg"
+}
+
+print_msg_failed()
+{
+	local section_name="$1"
+	local section_val="$2"
+	local msg="$3"
+	printf "$section_name :\t%-${status_tab}.${status_tab}s %s %s\n" "$section_val" "[ FAILED ]" "$msg"
+}
+
+if [ ! -z "$input_node" ]; then
+	expected_devname=$input_node
+elif [ ! -z "$wifi_phy" ]; then
+	expected_devname=$wifi_phy
+fi
+
+if [ ! -z "$devicename" ]; then
+	if [ ! -z "$expected_devname" ]; then
+		if [ "$devicename" = "$expected_devname" ]; then
+			print_msg_ok "$S_DEV" "$devicename"
+		else
+			print_msg_failed "$S_DEV" "$devicename" "Expected: $expected_devname"
+		fi
+	else
+		if [ "$devicename" = "$manufacturer" ]; then
+			print_msg_failed "$S_DEV" "$devicename" "Redundant: use of vendor name is discouraged"
+		elif [ "$devicename" = "$product" ]; then
+			print_msg_failed "$S_DEV" "$devicename" "Redundant: use of product name is discouraged"
+		else
+			print_msg_failed "$S_DEV" "$devicename" "Unknown devicename - should the script be updated?"
+		fi
+	fi
+elif [ ! -z "$expected_devname" ]; then
+	print_msg_failed "$S_DEV" "blank" "Expected: $expected_devname"
+fi
+
+if [ ! -z "$color" ]; then
+	color_upper=`echo $color | tr [:lower:] [:upper:]`
+	color_id_definition=$(cat $led_defs_path | grep "_$color_upper\s" | awk '{print $2}')
+	if [ ! -z "$color_id_definition" ]; then
+		print_msg_ok "$S_CLR" "$color" "Matching definition: $color_id_definition"
+	else
+		print_msg_failed "$S_CLR" "$color" "Definition not found in $led_defs_path"
+	fi
+
+fi
+
+if [ ! -z "$function" ]; then
+	# strip optional enumerator
+	function=`echo $function | sed s'/\(.*\)-[0-9]*$/\1/'`
+	fun_definition=$(cat $led_defs_path | grep "\"$function\"" | awk '{print $2}')
+	if [ ! -z "$fun_definition" ]; then
+		print_msg_ok "$S_FUN" "$function" "Matching definition: $fun_definition"
+	else
+		print_msg_failed "$S_FUN" "$function" "Definition not found in $led_defs_path"
+	fi
+
+fi
-- 
cgit v1.2.3


From f8be17b81d44aed1f9ea68c3fc70f501c9616e2d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 23 Jul 2019 10:22:48 +0300
Subject: lib/dim: Fix -Wunused-const-variable warnings

DIM causes to the following warnings during kernel compilation
which indicates that tx_profile and rx_profile are supposed to
be declared in *.c and not in *.h files.

In file included from ./include/rdma/ib_verbs.h:64,
                 from ./include/linux/mlx5/device.h:37,
                 from ./include/linux/mlx5/driver.h:51,
                 from ./include/linux/mlx5/vport.h:36,
                 from drivers/infiniband/hw/mlx5/ib_virt.c:34:
./include/linux/dim.h:326:1: warning: _tx_profile_ defined but not used [-Wunused-const-variable=]
  326 | tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
      | ^~~~~~~~~~
./include/linux/dim.h:320:1: warning: _rx_profile_ defined but not used [-Wunused-const-variable=]
  320 | rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
      | ^~~~~~~~~~

Fixes: 4f75da3666c0 ("linux/dim: Move implementation to .c files")
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dim.h | 56 -----------------------------------------------------
 lib/dim/net_dim.c   | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/dim.h b/include/linux/dim.h
index d3a0fbfff2bb..9fa4b3f88c39 100644
--- a/include/linux/dim.h
+++ b/include/linux/dim.h
@@ -272,62 +272,6 @@ dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
 
 /* Net DIM */
 
-/*
- * Net DIM profiles:
- *        There are different set of profiles for each CQ period mode.
- *        There are different set of profiles for RX/TX CQs.
- *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
- */
-#define NET_DIM_PARAMS_NUM_PROFILES 5
-#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
-#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
-#define NET_DIM_DEF_PROFILE_CQE 1
-#define NET_DIM_DEF_PROFILE_EQE 1
-
-#define NET_DIM_RX_EQE_PROFILES { \
-	{1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-	{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
-}
-
-#define NET_DIM_RX_CQE_PROFILES { \
-	{2,  256},             \
-	{8,  128},             \
-	{16, 64},              \
-	{32, 64},              \
-	{64, 64}               \
-}
-
-#define NET_DIM_TX_EQE_PROFILES { \
-	{1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
-	{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
-}
-
-#define NET_DIM_TX_CQE_PROFILES { \
-	{5,  128},  \
-	{8,  64},  \
-	{16, 32},  \
-	{32, 32},  \
-	{64, 32}   \
-}
-
-static const struct dim_cq_moder
-rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
-	NET_DIM_RX_EQE_PROFILES,
-	NET_DIM_RX_CQE_PROFILES,
-};
-
-static const struct dim_cq_moder
-tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
-	NET_DIM_TX_EQE_PROFILES,
-	NET_DIM_TX_CQE_PROFILES,
-};
-
 /**
  *	net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
  *	@cq_period_mode: CQ period mode
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
index 5bcc902c5388..a4db51c21266 100644
--- a/lib/dim/net_dim.c
+++ b/lib/dim/net_dim.c
@@ -5,6 +5,62 @@
 
 #include <linux/dim.h>
 
+/*
+ * Net DIM profiles:
+ *        There are different set of profiles for each CQ period mode.
+ *        There are different set of profiles for RX/TX CQs.
+ *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
+ */
+#define NET_DIM_PARAMS_NUM_PROFILES 5
+#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
+#define NET_DIM_DEF_PROFILE_CQE 1
+#define NET_DIM_DEF_PROFILE_EQE 1
+
+#define NET_DIM_RX_EQE_PROFILES { \
+	{1,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{8,   NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{64,  NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+	{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
+}
+
+#define NET_DIM_RX_CQE_PROFILES { \
+	{2,  256},             \
+	{8,  128},             \
+	{16, 64},              \
+	{32, 64},              \
+	{64, 64}               \
+}
+
+#define NET_DIM_TX_EQE_PROFILES { \
+	{1,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+	{8,   NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+	{32,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+	{64,  NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE},  \
+	{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}   \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+	{5,  128},  \
+	{8,  64},  \
+	{16, 32},  \
+	{32, 32},  \
+	{64, 32}   \
+}
+
+static const struct dim_cq_moder
+rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+	NET_DIM_RX_EQE_PROFILES,
+	NET_DIM_RX_CQE_PROFILES,
+};
+
+static const struct dim_cq_moder
+tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+	NET_DIM_TX_EQE_PROFILES,
+	NET_DIM_TX_CQE_PROFILES,
+};
+
 struct dim_cq_moder
 net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
 {
-- 
cgit v1.2.3


From 02712bc3250849c1cf99d626aea98f610e695f34 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Jul 2019 08:52:53 +0200
Subject: mm/hmm: move hmm_vma_range_done and hmm_vma_fault to nouveau

These two functions are marked as a legacy APIs to get rid of, but seem to
suit the current nouveau flow.  Move it to the only user in preparation
for fixing a locking bug involving caller and callee.  All comments
referring to the old API have been removed as this now is a driver private
helper.

Link: https://lore.kernel.org/r/20190724065258.16603-3-hch@lst.de
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 46 +++++++++++++++++++++++++++--
 include/linux/hmm.h                   | 54 -----------------------------------
 2 files changed, 44 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 8c92374afcf2..6c1b04de0db8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -475,6 +475,48 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm,
 		fault->inst, fault->addr, fault->access);
 }
 
+static inline bool
+nouveau_range_done(struct hmm_range *range)
+{
+	bool ret = hmm_range_valid(range);
+
+	hmm_range_unregister(range);
+	return ret;
+}
+
+static int
+nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range,
+		    bool block)
+{
+	long ret;
+
+	range->default_flags = 0;
+	range->pfn_flags_mask = -1UL;
+
+	ret = hmm_range_register(range, mirror,
+				 range->start, range->end,
+				 PAGE_SHIFT);
+	if (ret)
+		return (int)ret;
+
+	if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
+		up_read(&range->vma->vm_mm->mmap_sem);
+		return -EAGAIN;
+	}
+
+	ret = hmm_range_fault(range, block);
+	if (ret <= 0) {
+		if (ret == -EBUSY || !ret) {
+			up_read(&range->vma->vm_mm->mmap_sem);
+			ret = -EBUSY;
+		} else if (ret == -EAGAIN)
+			ret = -EBUSY;
+		hmm_range_unregister(range);
+		return ret;
+	}
+	return 0;
+}
+
 static int
 nouveau_svm_fault(struct nvif_notify *notify)
 {
@@ -649,10 +691,10 @@ nouveau_svm_fault(struct nvif_notify *notify)
 		range.values = nouveau_svm_pfn_values;
 		range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
 again:
-		ret = hmm_vma_fault(&svmm->mirror, &range, true);
+		ret = nouveau_range_fault(&svmm->mirror, &range, true);
 		if (ret == 0) {
 			mutex_lock(&svmm->mutex);
-			if (!hmm_vma_range_done(&range)) {
+			if (!nouveau_range_done(&range)) {
 				mutex_unlock(&svmm->mutex);
 				goto again;
 			}
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index b8a08b2a10ca..7ef56dc18050 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -484,60 +484,6 @@ long hmm_range_dma_unmap(struct hmm_range *range,
  */
 #define HMM_RANGE_DEFAULT_TIMEOUT 1000
 
-/* This is a temporary helper to avoid merge conflict between trees. */
-static inline bool hmm_vma_range_done(struct hmm_range *range)
-{
-	bool ret = hmm_range_valid(range);
-
-	hmm_range_unregister(range);
-	return ret;
-}
-
-/* This is a temporary helper to avoid merge conflict between trees. */
-static inline int hmm_vma_fault(struct hmm_mirror *mirror,
-				struct hmm_range *range, bool block)
-{
-	long ret;
-
-	/*
-	 * With the old API the driver must set each individual entries with
-	 * the requested flags (valid, write, ...). So here we set the mask to
-	 * keep intact the entries provided by the driver and zero out the
-	 * default_flags.
-	 */
-	range->default_flags = 0;
-	range->pfn_flags_mask = -1UL;
-
-	ret = hmm_range_register(range, mirror,
-				 range->start, range->end,
-				 PAGE_SHIFT);
-	if (ret)
-		return (int)ret;
-
-	if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
-		/*
-		 * The mmap_sem was taken by driver we release it here and
-		 * returns -EAGAIN which correspond to mmap_sem have been
-		 * drop in the old API.
-		 */
-		up_read(&range->vma->vm_mm->mmap_sem);
-		return -EAGAIN;
-	}
-
-	ret = hmm_range_fault(range, block);
-	if (ret <= 0) {
-		if (ret == -EBUSY || !ret) {
-			/* Same as above, drop mmap_sem to match old API. */
-			up_read(&range->vma->vm_mm->mmap_sem);
-			ret = -EBUSY;
-		} else if (ret == -EAGAIN)
-			ret = -EBUSY;
-		hmm_range_unregister(range);
-		return ret;
-	}
-	return 0;
-}
-
 /* Below are for HMM internal use only! Not to be used by device driver! */
 static inline void hmm_mm_init(struct mm_struct *mm)
 {
-- 
cgit v1.2.3


From f32471e2cf87112b8f5dc10469b27c39c1a41722 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Jul 2019 08:52:57 +0200
Subject: mm/hmm: remove the legacy hmm_pfn_* APIs

Switch the one remaining user in nouveau over to its replacement, and
remove all the wrappers.

Link: https://lore.kernel.org/r/20190724065258.16603-7-hch@lst.de
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c |  2 +-
 include/linux/hmm.h                    | 34 ----------------------------------
 2 files changed, 1 insertion(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 1333220787a1..345c63cb752a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -845,7 +845,7 @@ nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
 		struct page *page;
 		uint64_t addr;
 
-		page = hmm_pfn_to_page(range, range->pfns[i]);
+		page = hmm_device_entry_to_page(range, range->pfns[i]);
 		if (page == NULL)
 			continue;
 
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 7ef56dc18050..9f32586684c9 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -290,40 +290,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
 		range->flags[HMM_PFN_VALID];
 }
 
-/*
- * Old API:
- * hmm_pfn_to_page()
- * hmm_pfn_to_pfn()
- * hmm_pfn_from_page()
- * hmm_pfn_from_pfn()
- *
- * This are the OLD API please use new API, it is here to avoid cross-tree
- * merge painfullness ie we convert things to new API in stages.
- */
-static inline struct page *hmm_pfn_to_page(const struct hmm_range *range,
-					   uint64_t pfn)
-{
-	return hmm_device_entry_to_page(range, pfn);
-}
-
-static inline unsigned long hmm_pfn_to_pfn(const struct hmm_range *range,
-					   uint64_t pfn)
-{
-	return hmm_device_entry_to_pfn(range, pfn);
-}
-
-static inline uint64_t hmm_pfn_from_page(const struct hmm_range *range,
-					 struct page *page)
-{
-	return hmm_device_entry_from_page(range, page);
-}
-
-static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
-					unsigned long pfn)
-{
-	return hmm_device_entry_from_pfn(range, pfn);
-}
-
 /*
  * Mirroring: how to synchronize device page table with CPU page table.
  *
-- 
cgit v1.2.3


From 7a32f2962c56d9d8a836b4469855caeee8766bd4 Mon Sep 17 00:00:00 2001
From: Edward Srouji <edwards@mellanox.com>
Date: Tue, 23 Jul 2019 10:12:55 +0300
Subject: net/mlx5: Fix modify_cq_in alignment

Fix modify_cq_in alignment to match the device specification.
After this fix the 'cq_umem_valid' field will be in the right offset.

Cc: <stable@vger.kernel.org> # 4.19
Fixes: bd37197554eb ("net/mlx5: Update mlx5_ifc with DEVX UID bits")
Signed-off-by: Edward Srouji <edwards@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3d5752657d9..ec571fd7fcf8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5975,10 +5975,12 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x40];
+	u8         reserved_at_280[0x60];
 
 	u8         cq_umem_valid[0x1];
-	u8         reserved_at_2c1[0x5bf];
+	u8         reserved_at_2e1[0x1f];
+
+	u8         reserved_at_300[0x580];
 
 	u8         pas[0][0x40];
 };
-- 
cgit v1.2.3


From 90bb769291161cf25a818d69cf608c181654473e Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@mellanox.com>
Date: Sat, 6 Jul 2019 18:06:15 +0300
Subject: net/mlx5e: Prevent encap flow counter update async to user query

This patch prevents a race between user invoked cached counters
query and a neighbor last usage updater.

The cached flow counter stats can be queried by calling
"mlx5_fc_query_cached" which provides the number of bytes and
packets that passed via this flow since the last time this counter
was queried.
It does so by reducting the last saved stats from the current, cached
stats and then updating the last saved stats with the cached stats.
It also provide the lastuse value for that flow.

Since "mlx5e_tc_update_neigh_used_value" needs to retrieve the
last usage time of encapsulation flows, it calls the flow counter
query method periodically and async to user queries of the flow counter
using cls_flower.
This call is causing the driver to update the last reported bytes and
packets from the cache and therefore, future user queries of the flow
stats will return lower than expected number for bytes and packets
since the last saved stats in the driver was updated async to the last
saved stats in cls_flower.

This causes wrong stats presentation of encapsulation flows to user.

Since the neighbor usage updater only needs the lastuse stats from the
cached counter, the fix is to use a dedicated lastuse query call that
returns the lastuse value without synching between the cached stats and
the last saved stats.

Fixes: f6dfb4c3f216 ("net/mlx5e: Update neighbour 'used' state using HW flow rules counters")
Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c       | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 5 +++++
 include/linux/mlx5/fs.h                               | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cc096f6011d9..7ecfc53cf5f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1230,13 +1230,13 @@ static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
-	u64 bytes, packets, lastuse = 0;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5e_encap_entry *e;
 	struct mlx5_fc *counter;
 	struct neigh_table *tbl;
 	bool neigh_used = false;
 	struct neighbour *n;
+	u64 lastuse;
 
 	if (m_neigh->family == AF_INET)
 		tbl = &arp_tbl;
@@ -1256,7 +1256,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 					    encaps[efi->index]);
 			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 				counter = mlx5e_tc_get_counter(flow);
-				mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
+				lastuse = mlx5_fc_query_lastuse(counter);
 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 					neigh_used = true;
 					break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b3762123a69c..1834d9f3aa1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -369,6 +369,11 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 }
 EXPORT_SYMBOL(mlx5_fc_query);
 
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter)
+{
+	return counter->cache.lastuse;
+}
+
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse)
 {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 04a569568eac..f049af3f3cd8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -220,6 +220,7 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
+u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
-- 
cgit v1.2.3


From 086f95682114fd2d1790bd3226e76cbae9a2d192 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 25 Jul 2019 15:52:25 -0700
Subject: bpf/flow_dissector: pass input flags to BPF flow dissector program

C flow dissector supports input flags that tell it to customize parsing
by either stopping early or trying to parse as deep as possible. Pass
those flags to the BPF flow dissector so it can make the same
decisions. In the next commits I'll add support for those flags to
our reference bpf_flow.c

v3:
* Export copy of flow dissector flags instead of moving (Alexei Starovoitov)

Acked-by: Petar Penkov <ppenkov@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Petar Penkov <ppenkov@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skbuff.h    |  2 +-
 include/uapi/linux/bpf.h  |  5 +++++
 net/bpf/test_run.c        |  2 +-
 net/core/flow_dissector.c | 12 ++++++++++--
 4 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 718742b1c505..9b7a8038beec 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1271,7 +1271,7 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
 
 struct bpf_flow_dissector;
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
-		      __be16 proto, int nhoff, int hlen);
+		      __be16 proto, int nhoff, int hlen, unsigned int flags);
 
 bool __skb_flow_dissect(const struct net *net,
 			const struct sk_buff *skb,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fa1c753dcdbc..88b9d743036f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3507,6 +3507,10 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG		(1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL		(1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP		(1U << 2)
+
 struct bpf_flow_keys {
 	__u16	nhoff;
 	__u16	thoff;
@@ -3528,6 +3532,7 @@ struct bpf_flow_keys {
 			__u32	ipv6_dst[4];	/* in6_addr; network order */
 		};
 	};
+	__u32	flags;
 };
 
 struct bpf_func_info {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 80e6f3a6864d..4e41d15a1098 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -419,7 +419,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	time_start = ktime_get_ns();
 	for (i = 0; i < repeat; i++) {
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
-					  size);
+					  size, 0);
 
 		if (signal_pending(current)) {
 			preempt_enable();
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3e6fedb57bc1..50ed1a688709 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -784,7 +784,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 }
 
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
-		      __be16 proto, int nhoff, int hlen)
+		      __be16 proto, int nhoff, int hlen, unsigned int flags)
 {
 	struct bpf_flow_keys *flow_keys = ctx->flow_keys;
 	u32 result;
@@ -795,6 +795,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 	flow_keys->nhoff = nhoff;
 	flow_keys->thoff = flow_keys->nhoff;
 
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG !=
+		     (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+	BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP !=
+		     (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+	flow_keys->flags = flags;
+
 	preempt_disable();
 	result = BPF_PROG_RUN(prog, ctx);
 	preempt_enable();
@@ -914,7 +922,7 @@ bool __skb_flow_dissect(const struct net *net,
 			}
 
 			ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff,
-					       hlen);
+					       hlen, flags);
 			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
 						 target_container);
 			rcu_read_unlock();
-- 
cgit v1.2.3


From 71c99e32b926159ea628352751f66383d7d04d17 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 25 Jul 2019 15:52:30 -0700
Subject: bpf/flow_dissector: support ipv6 flow_label and
 BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL

Add support for exporting ipv6 flow label via bpf_flow_keys.
Export flow label from bpf_flow.c and also return early when
BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL is passed.

Acked-by: Petar Penkov <ppenkov@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Petar Penkov <ppenkov@google.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h                           |  1 +
 net/core/flow_dissector.c                          |  9 +++++
 tools/include/uapi/linux/bpf.h                     |  1 +
 .../selftests/bpf/prog_tests/flow_dissector.c      | 46 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_flow.c       | 10 +++++
 5 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 88b9d743036f..e985f07a98ed 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3533,6 +3533,7 @@ struct bpf_flow_keys {
 		};
 	};
 	__u32	flags;
+	__be32	flow_label;
 };
 
 struct bpf_func_info {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 50ed1a688709..9741b593ea53 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 	struct flow_dissector_key_basic *key_basic;
 	struct flow_dissector_key_addrs *key_addrs;
 	struct flow_dissector_key_ports *key_ports;
+	struct flow_dissector_key_tags *key_tags;
 
 	key_control = skb_flow_dissector_target(flow_dissector,
 						FLOW_DISSECTOR_KEY_CONTROL,
@@ -781,6 +782,14 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
 		key_ports->src = flow_keys->sport;
 		key_ports->dst = flow_keys->dport;
 	}
+
+	if (dissector_uses_key(flow_dissector,
+			       FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+		key_tags = skb_flow_dissector_target(flow_dissector,
+						     FLOW_DISSECTOR_KEY_FLOW_LABEL,
+						     target_container);
+		key_tags->flow_label = ntohl(flow_keys->flow_label);
+	}
 }
 
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 2e4b0848d795..3d7fc67ec1b8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3530,6 +3530,7 @@ struct bpf_flow_keys {
 		};
 	};
 	__u32	flags;
+	__be32	flow_label;
 };
 
 struct bpf_func_info {
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 8e8c18aced9b..ef83f145a6f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -20,6 +20,7 @@
 	      "is_encap=%u/%u "						\
 	      "ip_proto=0x%x/0x%x "					\
 	      "n_proto=0x%x/0x%x "					\
+	      "flow_label=0x%x/0x%x "					\
 	      "sport=%u/%u "						\
 	      "dport=%u/%u\n",						\
 	      got.nhoff, expected.nhoff,				\
@@ -30,6 +31,7 @@
 	      got.is_encap, expected.is_encap,				\
 	      got.ip_proto, expected.ip_proto,				\
 	      got.n_proto, expected.n_proto,				\
+	      got.flow_label, expected.flow_label,			\
 	      got.sport, expected.sport,				\
 	      got.dport, expected.dport)
 
@@ -257,6 +259,50 @@ struct test tests[] = {
 			.is_first_frag = true,
 		},
 	},
+	{
+		.name = "ipv6-flow-label",
+		.pkt.ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.flow_lbl = { 0xb, 0xee, 0xef },
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.sport = 80,
+			.dport = 8080,
+			.flow_label = __bpf_constant_htonl(0xbeeef),
+		},
+	},
+	{
+		.name = "ipv6-no-flow-label",
+		.pkt.ipv6 = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.iph.nexthdr = IPPROTO_TCP,
+			.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.flow_lbl = { 0xb, 0xee, 0xef },
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+			.addr_proto = ETH_P_IPV6,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IPV6),
+			.flow_label = __bpf_constant_htonl(0xbeeef),
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+	},
 };
 
 static int create_tap(const char *ifname)
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index f931cd3db9d4..7fbfa22f33df 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -83,6 +83,12 @@ static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
 	return ret;
 }
 
+#define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
+static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
+{
+	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
+}
+
 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
 							 __u16 hdr_size,
 							 void *buffer)
@@ -308,6 +314,10 @@ PROG(IPV6)(struct __sk_buff *skb)
 
 	keys->thoff += sizeof(struct ipv6hdr);
 	keys->ip_proto = ip6h->nexthdr;
+	keys->flow_label = ip6_flowlabel(ip6h);
+
+	if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+		return export_flow_keys(keys, BPF_OK);
 
 	return parse_ipv6_proto(skb, ip6h->nexthdr);
 }
-- 
cgit v1.2.3


From 9552389c465ed1ded39edf4a5642a861b53c2955 Mon Sep 17 00:00:00 2001
From: Gilad Ben-Yossef <gilad@benyossef.com>
Date: Tue, 2 Jul 2019 14:39:20 +0300
Subject: crypto: fips - add FIPS test failure notification chain

Crypto test failures in FIPS mode cause an immediate panic, but
on some system the cryptographic boundary extends beyond just
the Linux controlled domain.

Add a simple atomic notification chain to allow interested parties
to register to receive notification prior to us kicking the bucket.

Signed-off-by: Gilad Ben-Yossef <gilad@benyossef.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/fips.c        | 11 +++++++++++
 crypto/testmgr.c     |  4 +++-
 include/linux/fips.h |  7 +++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/crypto/fips.c b/crypto/fips.c
index c0b3a3c3452d..7b1d8caee669 100644
--- a/crypto/fips.c
+++ b/crypto/fips.c
@@ -11,10 +11,14 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sysctl.h>
+#include <linux/notifier.h>
 
 int fips_enabled;
 EXPORT_SYMBOL_GPL(fips_enabled);
 
+ATOMIC_NOTIFIER_HEAD(fips_fail_notif_chain);
+EXPORT_SYMBOL_GPL(fips_fail_notif_chain);
+
 /* Process kernel command-line parameter at boot time. fips=0 or fips=1 */
 static int fips_enable(char *str)
 {
@@ -58,6 +62,13 @@ static void crypto_proc_fips_exit(void)
 	unregister_sysctl_table(crypto_sysctls);
 }
 
+void fips_fail_notify(void)
+{
+	if (fips_enabled)
+		atomic_notifier_call_chain(&fips_fail_notif_chain, 0, NULL);
+}
+EXPORT_SYMBOL_GPL(fips_fail_notify);
+
 static int __init fips_init(void)
 {
 	crypto_proc_fips_init();
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d0b5b33806a6..8ba1e75cd973 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -5240,9 +5240,11 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
 					     type, mask);
 
 test_done:
-	if (rc && (fips_enabled || panic_on_fail))
+	if (rc && (fips_enabled || panic_on_fail)) {
+		fips_fail_notify();
 		panic("alg: self-tests for %s (%s) failed in %s mode!\n",
 		      driver, alg, fips_enabled ? "fips" : "panic_on_fail");
+	}
 
 	if (fips_enabled && !rc)
 		pr_info("alg: self-tests for %s (%s) passed\n", driver, alg);
diff --git a/include/linux/fips.h b/include/linux/fips.h
index afeeece92302..c6961e932fef 100644
--- a/include/linux/fips.h
+++ b/include/linux/fips.h
@@ -4,8 +4,15 @@
 
 #ifdef CONFIG_CRYPTO_FIPS
 extern int fips_enabled;
+extern struct atomic_notifier_head fips_fail_notif_chain;
+
+void fips_fail_notify(void);
+
 #else
 #define fips_enabled 0
+
+static inline void fips_fail_notify(void) {}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From e59c1c98745637796df824c0177f279b6e9cad94 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 2 Jul 2019 21:41:22 +0200
Subject: crypto: aes - create AES library based on the fixed time AES code

Take the existing small footprint and mostly time invariant C code
and turn it into a AES library that can be used for non-performance
critical, casual use of AES, and as a fallback for, e.g., SIMD code
that needs a secondary path that can be taken in contexts where the
SIMD unit is off limits (e.g., in hard interrupts taken from kernel
context)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig       |   4 +
 crypto/aes_ti.c      | 303 +-------------------------------------------
 include/crypto/aes.h |  34 +++++
 lib/crypto/Makefile  |   3 +
 lib/crypto/aes.c     | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 394 insertions(+), 300 deletions(-)
 create mode 100644 lib/crypto/aes.c

(limited to 'include')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index e801450bcb1c..091ebbbc9655 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1066,6 +1066,9 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
 
 comment "Ciphers"
 
+config CRYPTO_LIB_AES
+	tristate
+
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
 	select CRYPTO_ALGAPI
@@ -1089,6 +1092,7 @@ config CRYPTO_AES
 config CRYPTO_AES_TI
 	tristate "Fixed time AES cipher"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_AES
 	help
 	  This is a generic implementation of AES that attempts to eliminate
 	  data dependent latencies as much as possible without affecting
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index b3ebdc5679cb..205c2c257d49 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -8,249 +8,19 @@
 #include <crypto/aes.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
-#include <asm/unaligned.h>
-
-/*
- * Emit the sbox as volatile const to prevent the compiler from doing
- * constant folding on sbox references involving fixed indexes.
- */
-static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
-	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
-	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
-	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
-	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
-	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
-	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
-	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
-	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
-	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
-	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
-	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
-	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
-	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
-	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
-	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
-	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
-	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
-	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
-	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
-	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
-	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
-	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
-	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
-	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
-	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
-	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
-	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
-	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
-	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
-	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
-	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
-};
-
-static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
-	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
-	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
-	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
-	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
-	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
-	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
-	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
-	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
-	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
-	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
-	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
-	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
-	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
-	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
-	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
-	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
-	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
-};
-
-static u32 mul_by_x(u32 w)
-{
-	u32 x = w & 0x7f7f7f7f;
-	u32 y = w & 0x80808080;
-
-	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
-	return (x << 1) ^ (y >> 7) * 0x1b;
-}
-
-static u32 mul_by_x2(u32 w)
-{
-	u32 x = w & 0x3f3f3f3f;
-	u32 y = w & 0x80808080;
-	u32 z = w & 0x40404040;
-
-	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
-	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
-}
-
-static u32 mix_columns(u32 x)
-{
-	/*
-	 * Perform the following matrix multiplication in GF(2^8)
-	 *
-	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
-	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
-	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
-	 */
-	u32 y = mul_by_x(x) ^ ror32(x, 16);
-
-	return y ^ ror32(x ^ y, 8);
-}
-
-static u32 inv_mix_columns(u32 x)
-{
-	/*
-	 * Perform the following matrix multiplication in GF(2^8)
-	 *
-	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
-	 * | 0x9 0xe 0xb 0xd |   | x[1] |
-	 * | 0xd 0x9 0xe 0xb | x | x[2] |
-	 * | 0xb 0xd 0x9 0xe |   | x[3] |
-	 *
-	 * which can conveniently be reduced to
-	 *
-	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
-	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
-	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
-	 */
-	u32 y = mul_by_x2(x);
-
-	return mix_columns(x ^ y ^ ror32(y, 16));
-}
-
-static __always_inline u32 subshift(u32 in[], int pos)
-{
-	return (__aesti_sbox[in[pos] & 0xff]) ^
-	       (__aesti_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
-	       (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
-	       (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
-}
-
-static __always_inline u32 inv_subshift(u32 in[], int pos)
-{
-	return (__aesti_inv_sbox[in[pos] & 0xff]) ^
-	       (__aesti_inv_sbox[(in[(pos + 3) % 4] >>  8) & 0xff] <<  8) ^
-	       (__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
-	       (__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
-}
-
-static u32 subw(u32 in)
-{
-	return (__aesti_sbox[in & 0xff]) ^
-	       (__aesti_sbox[(in >>  8) & 0xff] <<  8) ^
-	       (__aesti_sbox[(in >> 16) & 0xff] << 16) ^
-	       (__aesti_sbox[(in >> 24) & 0xff] << 24);
-}
-
-static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-			    unsigned int key_len)
-{
-	u32 kwords = key_len / sizeof(u32);
-	u32 rc, i, j;
-
-	if (key_len != AES_KEYSIZE_128 &&
-	    key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256)
-		return -EINVAL;
-
-	ctx->key_length = key_len;
-
-	for (i = 0; i < kwords; i++)
-		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
-
-	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
-		u32 *rki = ctx->key_enc + (i * kwords);
-		u32 *rko = rki + kwords;
-
-		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
-		rko[1] = rko[0] ^ rki[1];
-		rko[2] = rko[1] ^ rki[2];
-		rko[3] = rko[2] ^ rki[3];
-
-		if (key_len == 24) {
-			if (i >= 7)
-				break;
-			rko[4] = rko[3] ^ rki[4];
-			rko[5] = rko[4] ^ rki[5];
-		} else if (key_len == 32) {
-			if (i >= 6)
-				break;
-			rko[4] = subw(rko[3]) ^ rki[4];
-			rko[5] = rko[4] ^ rki[5];
-			rko[6] = rko[5] ^ rki[6];
-			rko[7] = rko[6] ^ rki[7];
-		}
-	}
-
-	/*
-	 * Generate the decryption keys for the Equivalent Inverse Cipher.
-	 * This involves reversing the order of the round keys, and applying
-	 * the Inverse Mix Columns transformation to all but the first and
-	 * the last one.
-	 */
-	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
-	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
-	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
-	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
-
-	for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
-		ctx->key_dec[i]     = inv_mix_columns(ctx->key_enc[j]);
-		ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
-		ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
-		ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
-	}
-
-	ctx->key_dec[i]     = ctx->key_enc[0];
-	ctx->key_dec[i + 1] = ctx->key_enc[1];
-	ctx->key_dec[i + 2] = ctx->key_enc[2];
-	ctx->key_dec[i + 3] = ctx->key_enc[3];
-
-	return 0;
-}
 
 static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			 unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	return aesti_expand_key(ctx, in_key, key_len);
+	return aes_expandkey(ctx, in_key, key_len);
 }
 
 static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *rkp = ctx->key_enc + 4;
-	int rounds = 6 + ctx->key_length / 4;
-	u32 st0[4], st1[4];
 	unsigned long flags;
-	int round;
-
-	st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
-	st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
-	st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
-	st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
 
 	/*
 	 * Temporarily disable interrupts to avoid races where cachelines are
@@ -258,36 +28,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	 */
 	local_irq_save(flags);
 
-	/*
-	 * Force the compiler to emit data independent Sbox references,
-	 * by xoring the input with Sbox values that are known to add up
-	 * to zero. This pulls the entire Sbox into the D-cache before any
-	 * data dependent lookups are done.
-	 */
-	st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[ 64] ^ __aesti_sbox[134] ^ __aesti_sbox[195];
-	st0[1] ^= __aesti_sbox[16] ^ __aesti_sbox[ 82] ^ __aesti_sbox[158] ^ __aesti_sbox[221];
-	st0[2] ^= __aesti_sbox[32] ^ __aesti_sbox[ 96] ^ __aesti_sbox[160] ^ __aesti_sbox[234];
-	st0[3] ^= __aesti_sbox[48] ^ __aesti_sbox[112] ^ __aesti_sbox[186] ^ __aesti_sbox[241];
-
-	for (round = 0;; round += 2, rkp += 8) {
-		st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
-		st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
-		st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
-		st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
-
-		if (round == rounds - 2)
-			break;
-
-		st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
-		st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
-		st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
-		st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
-	}
-
-	put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
-	put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
-	put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
-	put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+	aes_encrypt(ctx, out, in);
 
 	local_irq_restore(flags);
 }
@@ -295,16 +36,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *rkp = ctx->key_dec + 4;
-	int rounds = 6 + ctx->key_length / 4;
-	u32 st0[4], st1[4];
 	unsigned long flags;
-	int round;
-
-	st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
-	st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
-	st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
-	st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
 
 	/*
 	 * Temporarily disable interrupts to avoid races where cachelines are
@@ -312,36 +44,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 	 */
 	local_irq_save(flags);
 
-	/*
-	 * Force the compiler to emit data independent Sbox references,
-	 * by xoring the input with Sbox values that are known to add up
-	 * to zero. This pulls the entire Sbox into the D-cache before any
-	 * data dependent lookups are done.
-	 */
-	st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[ 64] ^ __aesti_inv_sbox[129] ^ __aesti_inv_sbox[200];
-	st0[1] ^= __aesti_inv_sbox[16] ^ __aesti_inv_sbox[ 83] ^ __aesti_inv_sbox[150] ^ __aesti_inv_sbox[212];
-	st0[2] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[ 96] ^ __aesti_inv_sbox[160] ^ __aesti_inv_sbox[236];
-	st0[3] ^= __aesti_inv_sbox[48] ^ __aesti_inv_sbox[112] ^ __aesti_inv_sbox[187] ^ __aesti_inv_sbox[247];
-
-	for (round = 0;; round += 2, rkp += 8) {
-		st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
-		st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
-		st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
-		st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
-
-		if (round == rounds - 2)
-			break;
-
-		st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
-		st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
-		st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
-		st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
-	}
-
-	put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
-	put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
-	put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
-	put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+	aes_decrypt(ctx, out, in);
 
 	local_irq_restore(flags);
 }
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 0fdb542c70cd..d0067fca0cd0 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -37,4 +37,38 @@ int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
 int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 		unsigned int key_len);
+
+/**
+ * aes_expandkey - Expands the AES key as described in FIPS-197
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
+ * key schedule plus a 16 bytes key which is used before the first round).
+ * The decryption key is prepared for the "Equivalent Inverse Cipher" as
+ * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
+ * for the initial combination, the second slot for the first round and so on.
+ */
+int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		  unsigned int key_len);
+
+/**
+ * aes_encrypt - Encrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the ciphertext
+ * @in:		Buffer containing the plaintext
+ */
+void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
+/**
+ * aes_decrypt - Decrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the plaintext
+ * @in:		Buffer containing the ciphertext
+ */
+void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
 #endif
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 88195c34932d..42a91c62d96d 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,4 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
+obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
+libaes-y := aes.o
+
 obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
 libarc4-y := arc4.o
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
new file mode 100644
index 000000000000..9928b23e0a8a
--- /dev/null
+++ b/lib/crypto/aes.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+/*
+ * Emit the sbox as volatile const to prevent the compiler from doing
+ * constant folding on sbox references involving fixed indexes.
+ */
+static volatile const u8 __cacheline_aligned aes_sbox[] = {
+	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+};
+
+static volatile const u8 __cacheline_aligned aes_inv_sbox[] = {
+	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
+};
+
+static u32 mul_by_x(u32 w)
+{
+	u32 x = w & 0x7f7f7f7f;
+	u32 y = w & 0x80808080;
+
+	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
+	return (x << 1) ^ (y >> 7) * 0x1b;
+}
+
+static u32 mul_by_x2(u32 w)
+{
+	u32 x = w & 0x3f3f3f3f;
+	u32 y = w & 0x80808080;
+	u32 z = w & 0x40404040;
+
+	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
+	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
+}
+
+static u32 mix_columns(u32 x)
+{
+	/*
+	 * Perform the following matrix multiplication in GF(2^8)
+	 *
+	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
+	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
+	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
+	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
+	 */
+	u32 y = mul_by_x(x) ^ ror32(x, 16);
+
+	return y ^ ror32(x ^ y, 8);
+}
+
+static u32 inv_mix_columns(u32 x)
+{
+	/*
+	 * Perform the following matrix multiplication in GF(2^8)
+	 *
+	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
+	 * | 0x9 0xe 0xb 0xd |   | x[1] |
+	 * | 0xd 0x9 0xe 0xb | x | x[2] |
+	 * | 0xb 0xd 0x9 0xe |   | x[3] |
+	 *
+	 * which can conveniently be reduced to
+	 *
+	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
+	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
+	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
+	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
+	 */
+	u32 y = mul_by_x2(x);
+
+	return mix_columns(x ^ y ^ ror32(y, 16));
+}
+
+static __always_inline u32 subshift(u32 in[], int pos)
+{
+	return (aes_sbox[in[pos] & 0xff]) ^
+	       (aes_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
+	       (aes_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
+	       (aes_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
+}
+
+static __always_inline u32 inv_subshift(u32 in[], int pos)
+{
+	return (aes_inv_sbox[in[pos] & 0xff]) ^
+	       (aes_inv_sbox[(in[(pos + 3) % 4] >>  8) & 0xff] <<  8) ^
+	       (aes_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
+	       (aes_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
+}
+
+static u32 subw(u32 in)
+{
+	return (aes_sbox[in & 0xff]) ^
+	       (aes_sbox[(in >>  8) & 0xff] <<  8) ^
+	       (aes_sbox[(in >> 16) & 0xff] << 16) ^
+	       (aes_sbox[(in >> 24) & 0xff] << 24);
+}
+
+/**
+ * aes_expandkey - Expands the AES key as described in FIPS-197
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
+ * key schedule plus a 16 bytes key which is used before the first round).
+ * The decryption key is prepared for the "Equivalent Inverse Cipher" as
+ * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
+ * for the initial combination, the second slot for the first round and so on.
+ */
+int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		  unsigned int key_len)
+{
+	u32 kwords = key_len / sizeof(u32);
+	u32 rc, i, j;
+
+	if (key_len != AES_KEYSIZE_128 &&
+	    key_len != AES_KEYSIZE_192 &&
+	    key_len != AES_KEYSIZE_256)
+		return -EINVAL;
+
+	ctx->key_length = key_len;
+
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
+
+	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
+		u32 *rki = ctx->key_enc + (i * kwords);
+		u32 *rko = rki + kwords;
+
+		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
+		rko[1] = rko[0] ^ rki[1];
+		rko[2] = rko[1] ^ rki[2];
+		rko[3] = rko[2] ^ rki[3];
+
+		if (key_len == AES_KEYSIZE_192) {
+			if (i >= 7)
+				break;
+			rko[4] = rko[3] ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+		} else if (key_len == AES_KEYSIZE_256) {
+			if (i >= 6)
+				break;
+			rko[4] = subw(rko[3]) ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+			rko[6] = rko[5] ^ rki[6];
+			rko[7] = rko[6] ^ rki[7];
+		}
+	}
+
+	/*
+	 * Generate the decryption keys for the Equivalent Inverse Cipher.
+	 * This involves reversing the order of the round keys, and applying
+	 * the Inverse Mix Columns transformation to all but the first and
+	 * the last one.
+	 */
+	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
+	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
+	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
+	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
+
+	for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
+		ctx->key_dec[i]     = inv_mix_columns(ctx->key_enc[j]);
+		ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
+		ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
+		ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
+	}
+
+	ctx->key_dec[i]     = ctx->key_enc[0];
+	ctx->key_dec[i + 1] = ctx->key_enc[1];
+	ctx->key_dec[i + 2] = ctx->key_enc[2];
+	ctx->key_dec[i + 3] = ctx->key_enc[3];
+
+	return 0;
+}
+EXPORT_SYMBOL(aes_expandkey);
+
+/**
+ * aes_encrypt - Encrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the ciphertext
+ * @in:		Buffer containing the plaintext
+ */
+void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
+{
+	const u32 *rkp = ctx->key_enc + 4;
+	int rounds = 6 + ctx->key_length / 4;
+	u32 st0[4], st1[4];
+	int round;
+
+	st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
+	st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
+	st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
+	st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
+
+	/*
+	 * Force the compiler to emit data independent Sbox references,
+	 * by xoring the input with Sbox values that are known to add up
+	 * to zero. This pulls the entire Sbox into the D-cache before any
+	 * data dependent lookups are done.
+	 */
+	st0[0] ^= aes_sbox[ 0] ^ aes_sbox[ 64] ^ aes_sbox[134] ^ aes_sbox[195];
+	st0[1] ^= aes_sbox[16] ^ aes_sbox[ 82] ^ aes_sbox[158] ^ aes_sbox[221];
+	st0[2] ^= aes_sbox[32] ^ aes_sbox[ 96] ^ aes_sbox[160] ^ aes_sbox[234];
+	st0[3] ^= aes_sbox[48] ^ aes_sbox[112] ^ aes_sbox[186] ^ aes_sbox[241];
+
+	for (round = 0;; round += 2, rkp += 8) {
+		st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
+		st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
+		st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
+		st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
+
+		if (round == rounds - 2)
+			break;
+
+		st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
+		st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
+		st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
+		st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
+	}
+
+	put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
+	put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
+	put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
+	put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+}
+EXPORT_SYMBOL(aes_encrypt);
+
+/**
+ * aes_decrypt - Decrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the plaintext
+ * @in:		Buffer containing the ciphertext
+ */
+void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
+{
+	const u32 *rkp = ctx->key_dec + 4;
+	int rounds = 6 + ctx->key_length / 4;
+	u32 st0[4], st1[4];
+	int round;
+
+	st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
+	st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
+	st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
+	st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
+
+	/*
+	 * Force the compiler to emit data independent Sbox references,
+	 * by xoring the input with Sbox values that are known to add up
+	 * to zero. This pulls the entire Sbox into the D-cache before any
+	 * data dependent lookups are done.
+	 */
+	st0[0] ^= aes_inv_sbox[ 0] ^ aes_inv_sbox[ 64] ^ aes_inv_sbox[129] ^ aes_inv_sbox[200];
+	st0[1] ^= aes_inv_sbox[16] ^ aes_inv_sbox[ 83] ^ aes_inv_sbox[150] ^ aes_inv_sbox[212];
+	st0[2] ^= aes_inv_sbox[32] ^ aes_inv_sbox[ 96] ^ aes_inv_sbox[160] ^ aes_inv_sbox[236];
+	st0[3] ^= aes_inv_sbox[48] ^ aes_inv_sbox[112] ^ aes_inv_sbox[187] ^ aes_inv_sbox[247];
+
+	for (round = 0;; round += 2, rkp += 8) {
+		st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
+		st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
+		st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
+		st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
+
+		if (round == rounds - 2)
+			break;
+
+		st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
+		st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
+		st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
+		st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
+	}
+
+	put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
+	put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
+	put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
+	put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+}
+EXPORT_SYMBOL(aes_decrypt);
+
+MODULE_DESCRIPTION("Generic AES library");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 5bb12d7825adf0e80b849a273834f3131a6cc4e1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 2 Jul 2019 21:41:33 +0200
Subject: crypto: aes-generic - drop key expansion routine in favor of library
 version

Drop aes-generic's version of crypto_aes_expand_key(), and switch to
the key expansion routine provided by the AES library. AES key expansion
is not performance critical, and it is better to have a single version
shared by all AES implementations.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig       |   1 +
 crypto/aes_generic.c | 153 +--------------------------------------------------
 include/crypto/aes.h |   2 -
 3 files changed, 3 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index df6f0be66574..80ea118600ab 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1072,6 +1072,7 @@ config CRYPTO_LIB_AES
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_AES
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
 	  algorithm.
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 3aa4a715c216..426deb437f19 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -1125,155 +1125,6 @@ EXPORT_SYMBOL_GPL(crypto_fl_tab);
 EXPORT_SYMBOL_GPL(crypto_it_tab);
 EXPORT_SYMBOL_GPL(crypto_il_tab);
 
-/* initialise the key schedule from the user supplied key */
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x)	do {		\
-	u	= star_x(x);		\
-	v	= star_x(u);		\
-	w	= star_x(v);		\
-	t	= w ^ (x);		\
-	(y)	= u ^ v ^ w;		\
-	(y)	^= ror32(u ^ t, 8) ^	\
-		ror32(v ^ t, 16) ^	\
-		ror32(t, 24);		\
-} while (0)
-
-#define ls_box(x)		\
-	crypto_fl_tab[0][byte(x, 0)] ^	\
-	crypto_fl_tab[1][byte(x, 1)] ^	\
-	crypto_fl_tab[2][byte(x, 2)] ^	\
-	crypto_fl_tab[3][byte(x, 3)]
-
-#define loop4(i)	do {		\
-	t = ror32(t, 8);		\
-	t = ls_box(t) ^ rco_tab[i];	\
-	t ^= ctx->key_enc[4 * i];		\
-	ctx->key_enc[4 * i + 4] = t;		\
-	t ^= ctx->key_enc[4 * i + 1];		\
-	ctx->key_enc[4 * i + 5] = t;		\
-	t ^= ctx->key_enc[4 * i + 2];		\
-	ctx->key_enc[4 * i + 6] = t;		\
-	t ^= ctx->key_enc[4 * i + 3];		\
-	ctx->key_enc[4 * i + 7] = t;		\
-} while (0)
-
-#define loop6(i)	do {		\
-	t = ror32(t, 8);		\
-	t = ls_box(t) ^ rco_tab[i];	\
-	t ^= ctx->key_enc[6 * i];		\
-	ctx->key_enc[6 * i + 6] = t;		\
-	t ^= ctx->key_enc[6 * i + 1];		\
-	ctx->key_enc[6 * i + 7] = t;		\
-	t ^= ctx->key_enc[6 * i + 2];		\
-	ctx->key_enc[6 * i + 8] = t;		\
-	t ^= ctx->key_enc[6 * i + 3];		\
-	ctx->key_enc[6 * i + 9] = t;		\
-	t ^= ctx->key_enc[6 * i + 4];		\
-	ctx->key_enc[6 * i + 10] = t;		\
-	t ^= ctx->key_enc[6 * i + 5];		\
-	ctx->key_enc[6 * i + 11] = t;		\
-} while (0)
-
-#define loop8tophalf(i)	do {			\
-	t = ror32(t, 8);			\
-	t = ls_box(t) ^ rco_tab[i];		\
-	t ^= ctx->key_enc[8 * i];			\
-	ctx->key_enc[8 * i + 8] = t;			\
-	t ^= ctx->key_enc[8 * i + 1];			\
-	ctx->key_enc[8 * i + 9] = t;			\
-	t ^= ctx->key_enc[8 * i + 2];			\
-	ctx->key_enc[8 * i + 10] = t;			\
-	t ^= ctx->key_enc[8 * i + 3];			\
-	ctx->key_enc[8 * i + 11] = t;			\
-} while (0)
-
-#define loop8(i)	do {				\
-	loop8tophalf(i);				\
-	t  = ctx->key_enc[8 * i + 4] ^ ls_box(t);	\
-	ctx->key_enc[8 * i + 12] = t;			\
-	t ^= ctx->key_enc[8 * i + 5];			\
-	ctx->key_enc[8 * i + 13] = t;			\
-	t ^= ctx->key_enc[8 * i + 6];			\
-	ctx->key_enc[8 * i + 14] = t;			\
-	t ^= ctx->key_enc[8 * i + 7];			\
-	ctx->key_enc[8 * i + 15] = t;			\
-} while (0)
-
-/**
- * crypto_aes_expand_key - Expands the AES key as described in FIPS-197
- * @ctx:	The location where the computed key will be stored.
- * @in_key:	The supplied key.
- * @key_len:	The length of the supplied key.
- *
- * Returns 0 on success. The function fails only if an invalid key size (or
- * pointer) is supplied.
- * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
- * key schedule plus a 16 bytes key which is used before the first round).
- * The decryption key is prepared for the "Equivalent Inverse Cipher" as
- * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
- * for the initial combination, the second slot for the first round and so on.
- */
-int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-		unsigned int key_len)
-{
-	u32 i, t, u, v, w, j;
-
-	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
-			key_len != AES_KEYSIZE_256)
-		return -EINVAL;
-
-	ctx->key_length = key_len;
-
-	ctx->key_enc[0] = get_unaligned_le32(in_key);
-	ctx->key_enc[1] = get_unaligned_le32(in_key + 4);
-	ctx->key_enc[2] = get_unaligned_le32(in_key + 8);
-	ctx->key_enc[3] = get_unaligned_le32(in_key + 12);
-
-	ctx->key_dec[key_len + 24] = ctx->key_enc[0];
-	ctx->key_dec[key_len + 25] = ctx->key_enc[1];
-	ctx->key_dec[key_len + 26] = ctx->key_enc[2];
-	ctx->key_dec[key_len + 27] = ctx->key_enc[3];
-
-	switch (key_len) {
-	case AES_KEYSIZE_128:
-		t = ctx->key_enc[3];
-		for (i = 0; i < 10; ++i)
-			loop4(i);
-		break;
-
-	case AES_KEYSIZE_192:
-		ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
-		t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
-		for (i = 0; i < 8; ++i)
-			loop6(i);
-		break;
-
-	case AES_KEYSIZE_256:
-		ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
-		ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
-		ctx->key_enc[6] = get_unaligned_le32(in_key + 24);
-		t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28);
-		for (i = 0; i < 6; ++i)
-			loop8(i);
-		loop8tophalf(i);
-		break;
-	}
-
-	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
-	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
-	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
-	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
-
-	for (i = 4; i < key_len + 24; ++i) {
-		j = key_len + 24 - (i & ~3) + (i & 3);
-		imix_col(ctx->key_dec[j], ctx->key_enc[i]);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_aes_expand_key);
-
 /**
  * crypto_aes_set_key - Set the AES key.
  * @tfm:	The %crypto_tfm that is used in the context.
@@ -1281,7 +1132,7 @@ EXPORT_SYMBOL_GPL(crypto_aes_expand_key);
  * @key_len:	The size of the key.
  *
  * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
- * is set. The function uses crypto_aes_expand_key() to expand the key.
+ * is set. The function uses aes_expand_key() to expand the key.
  * &crypto_aes_ctx _must_ be the private data embedded in @tfm which is
  * retrieved with crypto_tfm_ctx().
  */
@@ -1292,7 +1143,7 @@ int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 	u32 *flags = &tfm->crt_flags;
 	int ret;
 
-	ret = crypto_aes_expand_key(ctx, in_key, key_len);
+	ret = aes_expandkey(ctx, in_key, key_len);
 	if (!ret)
 		return 0;
 
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index d0067fca0cd0..0a64a977f9b3 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -35,8 +35,6 @@ extern const u32 crypto_il_tab[4][256] ____cacheline_aligned;
 
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
-int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-		unsigned int key_len);
 
 /**
  * aes_expandkey - Expands the AES key as described in FIPS-197
-- 
cgit v1.2.3


From d9ec772d9550b6e513c51bc4e6fa1e3ffb50181c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 2 Jul 2019 21:41:34 +0200
Subject: crypto: ctr - add helper for performing a CTR encryption walk

Add a static inline helper modeled after crypto_cbc_encrypt_walk()
that can be reused for SIMD algorithms that need to implement a
non-SIMD fallback for performing CTR encryption.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/ctr.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h
index 06984a26c8cf..a1c66d1001af 100644
--- a/include/crypto/ctr.h
+++ b/include/crypto/ctr.h
@@ -8,8 +8,58 @@
 #ifndef _CRYPTO_CTR_H
 #define _CRYPTO_CTR_H
 
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
 #define CTR_RFC3686_NONCE_SIZE 4
 #define CTR_RFC3686_IV_SIZE 8
 #define CTR_RFC3686_BLOCK_SIZE 16
 
+static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req,
+					  void (*fn)(struct crypto_skcipher *,
+						     const u8 *, u8 *))
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int blocksize = crypto_skcipher_chunksize(tfm);
+	u8 buf[MAX_CIPHER_BLOCKSIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	/* avoid integer division due to variable blocksize parameter */
+	if (WARN_ON_ONCE(!is_power_of_2(blocksize)))
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			tail = walk.nbytes & (blocksize - 1);
+			nbytes -= tail;
+		}
+
+		do {
+			int bsize = min(nbytes, blocksize);
+
+			fn(tfm, walk.iv, buf);
+
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, blocksize);
+
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}
+
 #endif  /* _CRYPTO_CTR_H */
-- 
cgit v1.2.3


From 1e25ca02a0619c10bc0eae975846926902381ce5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 2 Jul 2019 21:41:45 +0200
Subject: crypto: aes-generic - unexport last-round AES tables

The versions of the AES lookup tables that are only used during the last
round are never used outside of the driver, so there is no need to
export their symbols.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/aes_generic.c | 6 ++----
 include/crypto/aes.h | 2 --
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 426deb437f19..71a5c190d360 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -328,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] ____cacheline_aligned = {
 	}
 };
 
-__visible const u32 crypto_fl_tab[4][256] ____cacheline_aligned = {
+static const u32 crypto_fl_tab[4][256] ____cacheline_aligned = {
 	{
 		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
 		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -856,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] ____cacheline_aligned = {
 	}
 };
 
-__visible const u32 crypto_il_tab[4][256] ____cacheline_aligned = {
+static const u32 crypto_il_tab[4][256] ____cacheline_aligned = {
 	{
 		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
 		0x00000030, 0x00000036, 0x000000a5, 0x00000038,
@@ -1121,9 +1121,7 @@ __visible const u32 crypto_il_tab[4][256] ____cacheline_aligned = {
 };
 
 EXPORT_SYMBOL_GPL(crypto_ft_tab);
-EXPORT_SYMBOL_GPL(crypto_fl_tab);
 EXPORT_SYMBOL_GPL(crypto_it_tab);
-EXPORT_SYMBOL_GPL(crypto_il_tab);
 
 /**
  * crypto_aes_set_key - Set the AES key.
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 0a64a977f9b3..df8426fd8051 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -29,9 +29,7 @@ struct crypto_aes_ctx {
 };
 
 extern const u32 crypto_ft_tab[4][256] ____cacheline_aligned;
-extern const u32 crypto_fl_tab[4][256] ____cacheline_aligned;
 extern const u32 crypto_it_tab[4][256] ____cacheline_aligned;
-extern const u32 crypto_il_tab[4][256] ____cacheline_aligned;
 
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
-- 
cgit v1.2.3


From 9467a3150cf4afca638673e099af71e8c493a3a0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 2 Jul 2019 21:41:46 +0200
Subject: crypto: lib/aes - export sbox and inverse sbox

There are a few copies of the AES S-boxes floating around, so export
the ones from the AES library so that we can reuse them in other
modules.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/aes.h | 3 +++
 lib/crypto/aes.c     | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index df8426fd8051..8e0f4cf948e5 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -67,4 +67,7 @@ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
  */
 void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
 
+extern const u8 crypto_aes_sbox[];
+extern const u8 crypto_aes_inv_sbox[];
+
 #endif
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index 9928b23e0a8a..4e100af38c51 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -82,6 +82,12 @@ static volatile const u8 __cacheline_aligned aes_inv_sbox[] = {
 	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
 };
 
+extern const u8 crypto_aes_sbox[256] __alias(aes_sbox);
+extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
+
+EXPORT_SYMBOL(crypto_aes_sbox);
+EXPORT_SYMBOL(crypto_aes_inv_sbox);
+
 static u32 mul_by_x(u32 w)
 {
 	u32 x = w & 0x7f7f7f7f;
-- 
cgit v1.2.3


From 5cb97700beaa005ceb2a127b6f53536a4544c9d8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 3 Jul 2019 10:55:06 +0200
Subject: crypto: morus - remove generic and x86 implementations

MORUS was not selected as a winner in the CAESAR competition, which
is not surprising since it is considered to be cryptographically
broken [0]. (Note that this is not an implementation defect, but a
flaw in the underlying algorithm). Since it is unlikely to be in use
currently, let's remove it before we're stuck with it.

[0] https://eprint.iacr.org/2019/172.pdf

Reviewed-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile              |   13 -
 arch/x86/crypto/morus1280-avx2-asm.S  |  619 ------------
 arch/x86/crypto/morus1280-avx2-glue.c |   62 --
 arch/x86/crypto/morus1280-sse2-asm.S  |  893 -----------------
 arch/x86/crypto/morus1280-sse2-glue.c |   61 --
 arch/x86/crypto/morus1280_glue.c      |  205 ----
 arch/x86/crypto/morus640-sse2-asm.S   |  612 ------------
 arch/x86/crypto/morus640-sse2-glue.c  |   61 --
 arch/x86/crypto/morus640_glue.c       |  200 ----
 crypto/Kconfig                        |   56 --
 crypto/Makefile                       |    2 -
 crypto/morus1280.c                    |  542 -----------
 crypto/morus640.c                     |  533 ----------
 crypto/testmgr.c                      |   12 -
 crypto/testmgr.h                      | 1707 ---------------------------------
 include/crypto/morus1280_glue.h       |   97 --
 include/crypto/morus640_glue.h        |   97 --
 include/crypto/morus_common.h         |   18 -
 18 files changed, 5790 deletions(-)
 delete mode 100644 arch/x86/crypto/morus1280-avx2-asm.S
 delete mode 100644 arch/x86/crypto/morus1280-avx2-glue.c
 delete mode 100644 arch/x86/crypto/morus1280-sse2-asm.S
 delete mode 100644 arch/x86/crypto/morus1280-sse2-glue.c
 delete mode 100644 arch/x86/crypto/morus1280_glue.c
 delete mode 100644 arch/x86/crypto/morus640-sse2-asm.S
 delete mode 100644 arch/x86/crypto/morus640-sse2-glue.c
 delete mode 100644 arch/x86/crypto/morus640_glue.c
 delete mode 100644 crypto/morus1280.c
 delete mode 100644 crypto/morus640.c
 delete mode 100644 include/crypto/morus1280_glue.h
 delete mode 100644 include/crypto/morus640_glue.h
 delete mode 100644 include/crypto/morus_common.h

(limited to 'include')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index b96a14e67ab0..6f1d825fbb09 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,12 +39,6 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
 obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
 obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
 
-obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
-obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
-
-obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
-obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
-
 obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
 
@@ -62,8 +56,6 @@ endif
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
-
-	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
 endif
 
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -81,9 +73,6 @@ aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
 aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
 aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
 
-morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
-morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
-
 nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
 
 ifeq ($(avx_supported),yes)
@@ -102,8 +91,6 @@ ifeq ($(avx2_supported),yes)
 	chacha-x86_64-y += chacha-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 
-	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
-
 	nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
 endif
 
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
deleted file mode 100644
index 5413fee33481..000000000000
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ /dev/null
@@ -1,619 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AVX2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0		%ymm0
-#define STATE0_LOW	%xmm0
-#define STATE1		%ymm1
-#define STATE2		%ymm2
-#define STATE3		%ymm3
-#define STATE4		%ymm4
-#define KEY		%ymm5
-#define MSG		%ymm5
-#define MSG_LOW		%xmm5
-#define T0		%ymm6
-#define T0_LOW		%xmm6
-#define T1		%ymm7
-
-.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
-.align 32
-.Lmorus1280_const:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
-.align 32
-.Lmorus1280_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro morus1280_round s0, s1, s2, s3, s4, b, w
-	vpand \s1, \s2, T0
-	vpxor T0, \s0, \s0
-	vpxor \s3, \s0, \s0
-	vpsllq $\b, \s0, T0
-	vpsrlq $(64 - \b), \s0, \s0
-	vpxor T0, \s0, \s0
-	vpermq $\w, \s3, \s3
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	vpxor MSG, STATE1, STATE1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	vpxor MSG, STATE2, STATE2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	vpxor MSG, STATE3, STATE3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	vpxor MSG, STATE4, STATE4
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	vpxor MSG, MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LOW
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pshufd $MASK2, MSG_LOW, MSG_LOW
-	pinsrq $0, (%r8), MSG_LOW
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	vpermq $MASK2, MSG, MSG
-	movdqu (%rsi), MSG_LOW
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LOW, (%r9)
-	vpermq $MASK2, T0, T0
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LOW, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	pextrq $1, T0_LOW, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_avx2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_avx2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	vpxor STATE0, STATE0, STATE0
-	movdqu (%rdx), STATE0_LOW
-	/* load key: */
-	vmovdqu (%rsi), KEY
-	vmovdqa KEY, STATE1
-	/* load all ones: */
-	vpcmpeqd STATE2, STATE2, STATE2
-	/* load all zeros: */
-	vpxor STATE3, STATE3, STATE3
-	/* load the constant: */
-	vmovdqa .Lmorus1280_const, STATE4
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	vpxor KEY, STATE1, STATE1
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_init)
-
-/*
- * void crypto_morus1280_avx2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	and $0x1F, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	vmovdqa (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	vmovdqu (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_ad)
-
-/*
- * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	vmovdqa (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqa T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	vmovdqu (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqu T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc)
-
-/*
- * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc_tail)
-
-/*
- * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	vmovdqa (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	vmovdqu (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec)
-
-/*
- * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LOW
-	vpbroadcastb T0_LOW, T0
-	vmovdqa .Lmorus1280_counter, T1
-	vpcmpgtb T1, T0, T0
-	vpand T0, MSG, MSG
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec_tail)
-
-/*
- * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_avx2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	vpxor STATE0, STATE4, STATE4
-
-	/* prepare length block: */
-	vpxor MSG, MSG, MSG
-	vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
-	vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
-	vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	vmovdqu (%rsi), MSG
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_final)
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
deleted file mode 100644
index 2d000d66ba4c..000000000000
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for AVX2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_avx2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_avx2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_avx2_module_init);
-module_exit(crypto_morus1280_avx2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-avx2");
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
deleted file mode 100644
index 0eece772866b..000000000000
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ /dev/null
@@ -1,893 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-
-#define STATE0_LO	%xmm0
-#define STATE0_HI	%xmm1
-#define STATE1_LO	%xmm2
-#define STATE1_HI	%xmm3
-#define STATE2_LO	%xmm4
-#define STATE2_HI	%xmm5
-#define STATE3_LO	%xmm6
-#define STATE3_HI	%xmm7
-#define STATE4_LO	%xmm8
-#define STATE4_HI	%xmm9
-#define KEY_LO		%xmm10
-#define KEY_HI		%xmm11
-#define MSG_LO		%xmm10
-#define MSG_HI		%xmm11
-#define T0_LO		%xmm12
-#define T0_HI		%xmm13
-#define T1_LO		%xmm14
-#define T1_HI		%xmm15
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 16
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter_0:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-.Lmorus640_counter_1:
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro rol1 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | LO_1 || LO_0 | HI_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \hi, T0_LO
-	punpcklqdq \lo, T0_LO
-	punpckhqdq \hi, \lo
-	movdqa \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro rol2 hi, lo
-	movdqa \lo, T0_LO
-	movdqa \hi, \lo
-	movdqa T0_LO, \hi
-.endm
-
-.macro rol3 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * LO_0 | HI_1 || HI_0 | LO_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \lo, T0_LO
-	punpckhqdq \hi, T0_LO
-	punpcklqdq \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
-	movdqa \s1_l, T0_LO
-	pand \s2_l, T0_LO
-	pxor T0_LO, \s0_l
-
-	movdqa \s1_h, T0_LO
-	pand \s2_h, T0_LO
-	pxor T0_LO, \s0_h
-
-	pxor \s3_l, \s0_l
-	pxor \s3_h, \s0_h
-
-	movdqa \s0_l, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_l
-	pxor T0_LO, \s0_l
-
-	movdqa \s0_h, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_h
-	pxor T0_LO, \s0_h
-
-	\w \s3_h, \s3_l
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	pxor MSG_LO, STATE1_LO
-	pxor MSG_HI, STATE1_HI
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	pxor MSG_LO, STATE2_LO
-	pxor MSG_HI, STATE2_HI
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	pxor MSG_LO, STATE3_LO
-	pxor MSG_HI, STATE3_HI
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	pxor MSG_LO, STATE4_LO
-	pxor MSG_HI, STATE4_HI
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG_LO, MSG_LO
-	pxor MSG_HI, MSG_HI
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LO
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG_LO
-	movq (%r8), T0_LO
-	pxor T0_LO, MSG_LO
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	movdqa MSG_LO, MSG_HI
-	movdqu (%rsi), MSG_LO
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LO, (%r9)
-	movdqa T0_HI, T0_LO
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LO, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0_LO
-	movq T0_LO, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_sse2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	pxor STATE0_HI, STATE0_HI
-	movdqu (%rdx), STATE0_LO
-	/* load key: */
-	movdqu  0(%rsi), KEY_LO
-	movdqu 16(%rsi), KEY_HI
-	movdqa KEY_LO, STATE1_LO
-	movdqa KEY_HI, STATE1_HI
-	/* load all ones: */
-	pcmpeqd STATE2_LO, STATE2_LO
-	pcmpeqd STATE2_HI, STATE2_HI
-	/* load all zeros: */
-	pxor STATE3_LO, STATE3_LO
-	pxor STATE3_HI, STATE3_HI
-	/* load the constant: */
-	movdqa .Lmorus640_const_0, STATE4_LO
-	movdqa .Lmorus640_const_1, STATE4_HI
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY_LO, STATE1_LO
-	pxor KEY_HI, STATE1_HI
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_init)
-
-/*
- * void crypto_morus1280_sse2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_ad)
-
-/*
- * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqa T0_LO,  0(%rdx)
-	movdqa T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqu T0_LO,  0(%rdx)
-	movdqu T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc)
-
-/*
- * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc_tail)
-
-/*
- * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO,  0(%rdx)
-	movdqa MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqu MSG_LO,  0(%rdx)
-	movdqu MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec)
-
-/*
- * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	movdqa T0_LO, T0_HI
-	movdqa .Lmorus640_counter_0, T1_LO
-	movdqa .Lmorus640_counter_1, T1_HI
-	pcmpgtb T1_LO, T0_LO
-	pcmpgtb T1_HI, T0_HI
-	pand T0_LO, MSG_LO
-	pand T0_HI, MSG_HI
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec_tail)
-
-/*
- * void crypto_morus1280_sse2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0_LO, STATE4_LO
-	pxor STATE0_HI, STATE4_HI
-
-	/* prepare length block: */
-	movq %rdx, MSG_LO
-	movq %rcx, T0_LO
-	pslldq $8, T0_LO
-	pxor T0_LO, MSG_LO
-	psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
-	pxor MSG_HI, MSG_HI
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T0_LO
-	movdqa STATE1_HI, T0_HI
-	rol3 T0_HI, T0_LO
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-	movdqa STATE2_LO, T0_LO
-	movdqa STATE2_HI, T0_HI
-	pand STATE3_LO, T0_LO
-	pand STATE3_HI, T0_HI
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-
-	movdqu MSG_LO,  0(%rsi)
-	movdqu MSG_HI, 16(%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_final)
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
deleted file mode 100644
index aada9d774293..000000000000
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_sse2_module_init);
-module_exit(crypto_morus1280_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-sse2");
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
deleted file mode 100644
index ffbde8b22838..000000000000
--- a/arch/x86/crypto/morus1280_glue.c
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus1280_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus1280_state {
-	struct morus1280_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus1280_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus1280_glue_process_ad(
-		struct morus1280_state *state,
-		const struct morus1280_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus1280_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS1280_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
-			left &= MORUS1280_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
-						struct morus1280_ops ops,
-						struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes,
-					    MORUS1280_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				 unsigned int keylen)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen == MORUS1280_BLOCK_SIZE) {
-		memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
-	} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
-		memcpy(ctx->key.bytes, key, keylen);
-		memcpy(ctx->key.bytes + keylen, key, keylen);
-	} else {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
-
-int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
-				      unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
-
-static void crypto_morus1280_glue_crypt(struct aead_request *req,
-					struct morus1280_ops ops,
-					unsigned int cryptlen,
-					struct morus1280_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus1280_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus1280_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus1280_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
-
-int crypto_morus1280_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus1280_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
-
-void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
-				    const struct morus1280_glue_ops *ops)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
deleted file mode 100644
index a60891101bbd..000000000000
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ /dev/null
@@ -1,612 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-640
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define KEY	%xmm5
-#define MSG	%xmm5
-#define T0	%xmm6
-#define T1	%xmm7
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 32
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-.macro morus640_round s0, s1, s2, s3, s4, b, w
-	movdqa \s1, T0
-	pand \s2, T0
-	pxor T0, \s0
-	pxor \s3, \s0
-	movdqa \s0, T0
-	pslld $\b, T0
-	psrld $(32 - \b), \s0
-	pxor T0, \s0
-	pshufd $\w, \s3, \s3
-.endm
-
-/*
- * __morus640_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	pxor MSG, STATE1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	pxor MSG, STATE2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	pxor MSG, STATE3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	pxor MSG, STATE4
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update)
-
-
-/*
- * __morus640_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update_zero:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_morus640_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	movdqu (%rdx), STATE0
-	/* load key: */
-	movdqu (%rsi), KEY
-	movdqa KEY, STATE1
-	/* load all ones: */
-	pcmpeqd STATE2, STATE2
-	/* load the constants: */
-	movdqa .Lmorus640_const_0, STATE3
-	movdqa .Lmorus640_const_1, STATE4
-
-	/* update 16 times with zero: */
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY, STATE1
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_init)
-
-/*
- * void crypto_morus640_sse2_ad(void *state, const void *data,
- *                              unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $16, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_ad)
-
-/*
- * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqa T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqu T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc)
-
-/*
- * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-
-	call __store_partial
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc_tail)
-
-/*
- * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqu MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec)
-
-/*
- * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Lmorus640_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec_tail)
-
-/*
- * void crypto_morus640_sse2_final(void *state, void *tag_xor,
- *	                           u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus640_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0, STATE4
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_final)
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
deleted file mode 100644
index 8ef68134aef4..000000000000
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus640_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
-					  const void *iv);
-asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
-					unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
-					 void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
-					 void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
-					   u64 assoclen, u64 cryptlen);
-
-MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus640_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus640_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus640_sse2_module_init);
-module_exit(crypto_morus640_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus640");
-MODULE_ALIAS_CRYPTO("morus640-sse2");
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
deleted file mode 100644
index d8b5fd6cef29..000000000000
--- a/arch/x86/crypto/morus640_glue.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus640_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus640_state {
-	struct morus640_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus640_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus640_glue_process_ad(
-		struct morus640_state *state,
-		const struct morus640_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus640_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS640_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS640_BLOCK_SIZE - 1);
-			left &= MORUS640_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
-					       struct morus640_ops ops,
-					       struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				unsigned int keylen)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != MORUS640_BLOCK_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
-
-int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
-				     unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
-
-static void crypto_morus640_glue_crypt(struct aead_request *req,
-				       struct morus640_ops ops,
-				       unsigned int cryptlen,
-				       struct morus640_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus640_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus640_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus640_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
-
-int crypto_morus640_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus640_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
-
-void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
-				   const struct morus640_glue_ops *ops)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 80ea118600ab..160dc1a9c7ec 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -344,62 +344,6 @@ config CRYPTO_AEGIS256_AESNI_SSE2
 	help
 	 AESNI+SSE2 implementation of the AEGIS-256 dedicated AEAD algorithm.
 
-config CRYPTO_MORUS640
-	tristate "MORUS-640 AEAD algorithm"
-	select CRYPTO_AEAD
-	help
-	  Support for the MORUS-640 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS640_GLUE
-	tristate
-	depends on X86
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	  Common glue for SIMD optimizations of the MORUS-640 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS640_SSE2
-	tristate "MORUS-640 AEAD algorithm (x86_64 SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS640_GLUE
-	help
-	  SSE2 implementation of the MORUS-640 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS1280
-	tristate "MORUS-1280 AEAD algorithm"
-	select CRYPTO_AEAD
-	help
-	  Support for the MORUS-1280 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS1280_GLUE
-	tristate
-	depends on X86
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	  Common glue for SIMD optimizations of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS1280_SSE2
-	tristate "MORUS-1280 AEAD algorithm (x86_64 SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS1280_GLUE
-	help
-	  SSE2 optimizedimplementation of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS1280_AVX2
-	tristate "MORUS-1280 AEAD algorithm (x86_64 AVX2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS1280_GLUE
-	help
-	  AVX2 optimized implementation of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
diff --git a/crypto/Makefile b/crypto/Makefile
index 9479e1a45d8c..4bc1951d3787 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -92,8 +92,6 @@ obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
 obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
 obj-$(CONFIG_CRYPTO_AEGIS128L) += aegis128l.o
 obj-$(CONFIG_CRYPTO_AEGIS256) += aegis256.o
-obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
-obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
diff --git a/crypto/morus1280.c b/crypto/morus1280.c
deleted file mode 100644
index f8734c6576af..000000000000
--- a/crypto/morus1280.c
+++ /dev/null
@@ -1,542 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus_common.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#define MORUS1280_WORD_SIZE 8
-#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
-#define MORUS1280_BLOCK_ALIGN (__alignof__(__le64))
-#define MORUS1280_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS1280_BLOCK_ALIGN)
-
-struct morus1280_block {
-	u64 words[MORUS_BLOCK_WORDS];
-};
-
-union morus1280_block_in {
-	__le64 words[MORUS_BLOCK_WORDS];
-	u8 bytes[MORUS1280_BLOCK_SIZE];
-};
-
-struct morus1280_state {
-	struct morus1280_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus1280_ctx {
-	struct morus1280_block key;
-};
-
-struct morus1280_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct morus1280_state *state,
-			    u8 *dst, const u8 *src, unsigned int size);
-};
-
-static const struct morus1280_block crypto_morus1280_const[1] = {
-	{ .words = {
-		U64_C(0x0d08050302010100),
-		U64_C(0x6279e99059372215),
-		U64_C(0xf12fc26d55183ddb),
-		U64_C(0xdd28b57342311120),
-	} },
-};
-
-static void crypto_morus1280_round(struct morus1280_block *b0,
-				   struct morus1280_block *b1,
-				   struct morus1280_block *b2,
-				   struct morus1280_block *b3,
-				   struct morus1280_block *b4,
-				   const struct morus1280_block *m,
-				   unsigned int b, unsigned int w)
-{
-	unsigned int i;
-	struct morus1280_block tmp;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		b0->words[i] ^= b1->words[i] & b2->words[i];
-		b0->words[i] ^= b3->words[i];
-		b0->words[i] ^= m->words[i];
-		b0->words[i] = rol64(b0->words[i], b);
-	}
-
-	tmp = *b3;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
-}
-
-static void crypto_morus1280_update(struct morus1280_state *state,
-				    const struct morus1280_block *m)
-{
-	static const struct morus1280_block z = {};
-
-	struct morus1280_block *s = state->s;
-
-	crypto_morus1280_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 13, 1);
-	crypto_morus1280_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  46, 2);
-	crypto_morus1280_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,  38, 3);
-	crypto_morus1280_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,   7, 2);
-	crypto_morus1280_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,   4, 1);
-}
-
-static void crypto_morus1280_load_a(struct morus1280_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = le64_to_cpu(*(const __le64 *)src);
-		src += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_load_u(struct morus1280_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = get_unaligned_le64(src);
-		src += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_load(struct morus1280_block *dst, const u8 *src)
-{
-	if (MORUS1280_ALIGNED(src))
-		crypto_morus1280_load_a(dst, src);
-	else
-		crypto_morus1280_load_u(dst, src);
-}
-
-static void crypto_morus1280_store_a(u8 *dst, const struct morus1280_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		*(__le64 *)dst = cpu_to_le64(src->words[i]);
-		dst += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_store_u(u8 *dst, const struct morus1280_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		put_unaligned_le64(src->words[i], dst);
-		dst += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_store(u8 *dst, const struct morus1280_block *src)
-{
-	if (MORUS1280_ALIGNED(dst))
-		crypto_morus1280_store_a(dst, src);
-	else
-		crypto_morus1280_store_u(dst, src);
-}
-
-static void crypto_morus1280_ad(struct morus1280_state *state, const u8 *src,
-				unsigned int size)
-{
-	struct morus1280_block m;
-
-	if (MORUS1280_ALIGNED(src)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			crypto_morus1280_update(state, &m);
-
-			size -= MORUS1280_BLOCK_SIZE;
-			src += MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			crypto_morus1280_update(state, &m);
-
-			size -= MORUS1280_BLOCK_SIZE;
-			src += MORUS1280_BLOCK_SIZE;
-		}
-	}
-}
-
-static void crypto_morus1280_core(const struct morus1280_state *state,
-				  struct morus1280_block *blk)
-{
-	unsigned int i;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
-
-        for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		blk->words[i] ^= state->s[0].words[i];
-		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
-	}
-}
-
-static void crypto_morus1280_encrypt_chunk(struct morus1280_state *state,
-					   u8 *dst, const u8 *src,
-					   unsigned int size)
-{
-	struct morus1280_block c, m;
-
-	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			c = m;
-			crypto_morus1280_core(state, &c);
-			crypto_morus1280_store_a(dst, &c);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			c = m;
-			crypto_morus1280_core(state, &c);
-			crypto_morus1280_store_u(dst, &c);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus1280_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-
-		crypto_morus1280_load_a(&m, tail.bytes);
-		c = m;
-		crypto_morus1280_core(state, &c);
-		crypto_morus1280_store_a(tail.bytes, &c);
-		crypto_morus1280_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus1280_decrypt_chunk(struct morus1280_state *state,
-					   u8 *dst, const u8 *src,
-					   unsigned int size)
-{
-	struct morus1280_block m;
-
-	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			crypto_morus1280_core(state, &m);
-			crypto_morus1280_store_a(dst, &m);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			crypto_morus1280_core(state, &m);
-			crypto_morus1280_store_u(dst, &m);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus1280_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-
-		crypto_morus1280_load_a(&m, tail.bytes);
-		crypto_morus1280_core(state, &m);
-		crypto_morus1280_store_a(tail.bytes, &m);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-		crypto_morus1280_load_a(&m, tail.bytes);
-		crypto_morus1280_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus1280_init(struct morus1280_state *state,
-				  const struct morus1280_block *key,
-				  const u8 *iv)
-{
-	static const struct morus1280_block z = {};
-
-	union morus1280_block_in tmp;
-	unsigned int i;
-
-	memcpy(tmp.bytes, iv, MORUS_NONCE_SIZE);
-	memset(tmp.bytes + MORUS_NONCE_SIZE, 0,
-	       MORUS1280_BLOCK_SIZE - MORUS_NONCE_SIZE);
-
-	crypto_morus1280_load(&state->s[0], tmp.bytes);
-	state->s[1] = *key;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[2].words[i] = U64_C(0xFFFFFFFFFFFFFFFF);
-	state->s[3] = z;
-	state->s[4] = crypto_morus1280_const[0];
-
-	for (i = 0; i < 16; i++)
-		crypto_morus1280_update(state, &z);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[1].words[i] ^= key->words[i];
-}
-
-static void crypto_morus1280_process_ad(struct morus1280_state *state,
-					struct scatterlist *sg_src,
-					unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus1280_block m;
-	union morus1280_block_in buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS1280_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-
-				crypto_morus1280_load_a(&m, buf.bytes);
-				crypto_morus1280_update(state, &m);
-
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_morus1280_ad(state, src, left);
-			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
-			left &= MORUS1280_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
-
-		crypto_morus1280_load_a(&m, buf.bytes);
-		crypto_morus1280_update(state, &m);
-	}
-}
-
-static void crypto_morus1280_process_crypt(struct morus1280_state *state,
-					   struct aead_request *req,
-					   const struct morus1280_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_morus1280_final(struct morus1280_state *state,
-				   struct morus1280_block *tag_xor,
-				   u64 assoclen, u64 cryptlen)
-{
-	struct morus1280_block tmp;
-	unsigned int i;
-
-	tmp.words[0] = assoclen * 8;
-	tmp.words[1] = cryptlen * 8;
-	tmp.words[2] = 0;
-	tmp.words[3] = 0;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[4].words[i] ^= state->s[0].words[i];
-
-	for (i = 0; i < 10; i++)
-		crypto_morus1280_update(state, &tmp);
-
-	crypto_morus1280_core(state, tag_xor);
-}
-
-static int crypto_morus1280_setkey(struct crypto_aead *aead, const u8 *key,
-				   unsigned int keylen)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-	union morus1280_block_in tmp;
-
-	if (keylen == MORUS1280_BLOCK_SIZE)
-		crypto_morus1280_load(&ctx->key, key);
-	else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
-		memcpy(tmp.bytes, key, keylen);
-		memcpy(tmp.bytes + keylen, key, keylen);
-
-		crypto_morus1280_load(&ctx->key, tmp.bytes);
-	} else {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int crypto_morus1280_setauthsize(struct crypto_aead *tfm,
-					unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-
-static void crypto_morus1280_crypt(struct aead_request *req,
-				   struct morus1280_block *tag_xor,
-				   unsigned int cryptlen,
-				   const struct morus1280_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_state state;
-
-	crypto_morus1280_init(&state, &ctx->key, req->iv);
-	crypto_morus1280_process_ad(&state, req->src, req->assoclen);
-	crypto_morus1280_process_crypt(&state, req, ops);
-	crypto_morus1280_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_morus1280_encrypt(struct aead_request *req)
-{
-	static const struct morus1280_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_morus1280_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_block tag = {};
-	union morus1280_block_in tag_out;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus1280_store(tag_out.bytes, &tag);
-
-	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_morus1280_decrypt(struct aead_request *req)
-{
-	static const struct morus1280_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_morus1280_decrypt_chunk,
-	};
-	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union morus1280_block_in tag_in;
-	struct morus1280_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag_in.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus1280_load(&tag, tag_in.bytes);
-	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus1280_store(tag_in.bytes, &tag);
-
-	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_morus1280_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_morus1280_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_morus1280_alg = {
-	.setkey = crypto_morus1280_setkey,
-	.setauthsize = crypto_morus1280_setauthsize,
-	.encrypt = crypto_morus1280_encrypt,
-	.decrypt = crypto_morus1280_decrypt,
-	.init = crypto_morus1280_init_tfm,
-	.exit = crypto_morus1280_exit_tfm,
-
-	.ivsize = MORUS_NONCE_SIZE,
-	.maxauthsize = MORUS_MAX_AUTH_SIZE,
-	.chunksize = MORUS1280_BLOCK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct morus1280_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "morus1280",
-		.cra_driver_name = "morus1280-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-
-static int __init crypto_morus1280_module_init(void)
-{
-	return crypto_register_aead(&crypto_morus1280_alg);
-}
-
-static void __exit crypto_morus1280_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_morus1280_alg);
-}
-
-subsys_initcall(crypto_morus1280_module_init);
-module_exit(crypto_morus1280_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-generic");
diff --git a/crypto/morus640.c b/crypto/morus640.c
deleted file mode 100644
index ae5aa9482cb4..000000000000
--- a/crypto/morus640.c
+++ /dev/null
@@ -1,533 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus_common.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#define MORUS640_WORD_SIZE 4
-#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
-#define MORUS640_BLOCK_ALIGN (__alignof__(__le32))
-#define MORUS640_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS640_BLOCK_ALIGN)
-
-struct morus640_block {
-	u32 words[MORUS_BLOCK_WORDS];
-};
-
-union morus640_block_in {
-	__le32 words[MORUS_BLOCK_WORDS];
-	u8 bytes[MORUS640_BLOCK_SIZE];
-};
-
-struct morus640_state {
-	struct morus640_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus640_ctx {
-	struct morus640_block key;
-};
-
-struct morus640_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct morus640_state *state,
-			    u8 *dst, const u8 *src, unsigned int size);
-};
-
-static const struct morus640_block crypto_morus640_const[2] = {
-	{ .words = {
-		U32_C(0x02010100),
-		U32_C(0x0d080503),
-		U32_C(0x59372215),
-		U32_C(0x6279e990),
-	} },
-	{ .words = {
-		U32_C(0x55183ddb),
-		U32_C(0xf12fc26d),
-		U32_C(0x42311120),
-		U32_C(0xdd28b573),
-	} },
-};
-
-static void crypto_morus640_round(struct morus640_block *b0,
-				  struct morus640_block *b1,
-				  struct morus640_block *b2,
-				  struct morus640_block *b3,
-				  struct morus640_block *b4,
-				  const struct morus640_block *m,
-				  unsigned int b, unsigned int w)
-{
-	unsigned int i;
-	struct morus640_block tmp;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		b0->words[i] ^= b1->words[i] & b2->words[i];
-		b0->words[i] ^= b3->words[i];
-		b0->words[i] ^= m->words[i];
-		b0->words[i] = rol32(b0->words[i], b);
-	}
-
-	tmp = *b3;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
-}
-
-static void crypto_morus640_update(struct morus640_state *state,
-				   const struct morus640_block *m)
-{
-	static const struct morus640_block z = {};
-
-	struct morus640_block *s = state->s;
-
-	crypto_morus640_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z,  5, 1);
-	crypto_morus640_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  31, 2);
-	crypto_morus640_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,   7, 3);
-	crypto_morus640_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,  22, 2);
-	crypto_morus640_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,  13, 1);
-}
-
-static void crypto_morus640_load_a(struct morus640_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = le32_to_cpu(*(const __le32 *)src);
-		src += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_load_u(struct morus640_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = get_unaligned_le32(src);
-		src += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_load(struct morus640_block *dst, const u8 *src)
-{
-	if (MORUS640_ALIGNED(src))
-		crypto_morus640_load_a(dst, src);
-	else
-		crypto_morus640_load_u(dst, src);
-}
-
-static void crypto_morus640_store_a(u8 *dst, const struct morus640_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		*(__le32 *)dst = cpu_to_le32(src->words[i]);
-		dst += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_store_u(u8 *dst, const struct morus640_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		put_unaligned_le32(src->words[i], dst);
-		dst += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_store(u8 *dst, const struct morus640_block *src)
-{
-	if (MORUS640_ALIGNED(dst))
-		crypto_morus640_store_a(dst, src);
-	else
-		crypto_morus640_store_u(dst, src);
-}
-
-static void crypto_morus640_ad(struct morus640_state *state, const u8 *src,
-			       unsigned int size)
-{
-	struct morus640_block m;
-
-	if (MORUS640_ALIGNED(src)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			crypto_morus640_update(state, &m);
-
-			size -= MORUS640_BLOCK_SIZE;
-			src += MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			crypto_morus640_update(state, &m);
-
-			size -= MORUS640_BLOCK_SIZE;
-			src += MORUS640_BLOCK_SIZE;
-		}
-	}
-}
-
-static void crypto_morus640_core(const struct morus640_state *state,
-				 struct morus640_block *blk)
-{
-	unsigned int i;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		blk->words[i] ^= state->s[0].words[i];
-		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
-	}
-}
-
-static void crypto_morus640_encrypt_chunk(struct morus640_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	struct morus640_block c, m;
-
-	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			c = m;
-			crypto_morus640_core(state, &c);
-			crypto_morus640_store_a(dst, &c);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			c = m;
-			crypto_morus640_core(state, &c);
-			crypto_morus640_store_u(dst, &c);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus640_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-
-		crypto_morus640_load_a(&m, tail.bytes);
-		c = m;
-		crypto_morus640_core(state, &c);
-		crypto_morus640_store_a(tail.bytes, &c);
-		crypto_morus640_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	struct morus640_block m;
-
-	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			crypto_morus640_core(state, &m);
-			crypto_morus640_store_a(dst, &m);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			crypto_morus640_core(state, &m);
-			crypto_morus640_store_u(dst, &m);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus640_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-
-		crypto_morus640_load_a(&m, tail.bytes);
-		crypto_morus640_core(state, &m);
-		crypto_morus640_store_a(tail.bytes, &m);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-		crypto_morus640_load_a(&m, tail.bytes);
-		crypto_morus640_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus640_init(struct morus640_state *state,
-				 const struct morus640_block *key,
-				 const u8 *iv)
-{
-	static const struct morus640_block z = {};
-
-	unsigned int i;
-
-	crypto_morus640_load(&state->s[0], iv);
-	state->s[1] = *key;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[2].words[i] = U32_C(0xFFFFFFFF);
-	state->s[3] = crypto_morus640_const[0];
-	state->s[4] = crypto_morus640_const[1];
-
-	for (i = 0; i < 16; i++)
-		crypto_morus640_update(state, &z);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[1].words[i] ^= key->words[i];
-}
-
-static void crypto_morus640_process_ad(struct morus640_state *state,
-				       struct scatterlist *sg_src,
-				       unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus640_block m;
-	union morus640_block_in buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS640_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-
-				crypto_morus640_load_a(&m, buf.bytes);
-				crypto_morus640_update(state, &m);
-
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_morus640_ad(state, src, left);
-			src += left & ~(MORUS640_BLOCK_SIZE - 1);
-			left &= MORUS640_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
-
-		crypto_morus640_load_a(&m, buf.bytes);
-		crypto_morus640_update(state, &m);
-	}
-}
-
-static void crypto_morus640_process_crypt(struct morus640_state *state,
-					  struct aead_request *req,
-					  const struct morus640_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_morus640_final(struct morus640_state *state,
-				  struct morus640_block *tag_xor,
-				  u64 assoclen, u64 cryptlen)
-{
-	struct morus640_block tmp;
-	unsigned int i;
-
-	tmp.words[0] = lower_32_bits(assoclen * 8);
-	tmp.words[1] = upper_32_bits(assoclen * 8);
-	tmp.words[2] = lower_32_bits(cryptlen * 8);
-	tmp.words[3] = upper_32_bits(cryptlen * 8);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[4].words[i] ^= state->s[0].words[i];
-
-	for (i = 0; i < 10; i++)
-		crypto_morus640_update(state, &tmp);
-
-	crypto_morus640_core(state, tag_xor);
-}
-
-static int crypto_morus640_setkey(struct crypto_aead *aead, const u8 *key,
-				  unsigned int keylen)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != MORUS640_BLOCK_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	crypto_morus640_load(&ctx->key, key);
-	return 0;
-}
-
-static int crypto_morus640_setauthsize(struct crypto_aead *tfm,
-				       unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-
-static void crypto_morus640_crypt(struct aead_request *req,
-				  struct morus640_block *tag_xor,
-				  unsigned int cryptlen,
-				  const struct morus640_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_state state;
-
-	crypto_morus640_init(&state, &ctx->key, req->iv);
-	crypto_morus640_process_ad(&state, req->src, req->assoclen);
-	crypto_morus640_process_crypt(&state, req, ops);
-	crypto_morus640_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_morus640_encrypt(struct aead_request *req)
-{
-	static const struct morus640_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_morus640_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_block tag = {};
-	union morus640_block_in tag_out;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus640_store(tag_out.bytes, &tag);
-
-	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_morus640_decrypt(struct aead_request *req)
-{
-	static const struct morus640_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_morus640_decrypt_chunk,
-	};
-	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union morus640_block_in tag_in;
-	struct morus640_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag_in.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus640_load(&tag, tag_in.bytes);
-	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus640_store(tag_in.bytes, &tag);
-
-	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_morus640_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_morus640_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_morus640_alg = {
-	.setkey = crypto_morus640_setkey,
-	.setauthsize = crypto_morus640_setauthsize,
-	.encrypt = crypto_morus640_encrypt,
-	.decrypt = crypto_morus640_decrypt,
-	.init = crypto_morus640_init_tfm,
-	.exit = crypto_morus640_exit_tfm,
-
-	.ivsize = MORUS_NONCE_SIZE,
-	.maxauthsize = MORUS_MAX_AUTH_SIZE,
-	.chunksize = MORUS640_BLOCK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct morus640_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "morus640",
-		.cra_driver_name = "morus640-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static int __init crypto_morus640_module_init(void)
-{
-	return crypto_register_aead(&crypto_morus640_alg);
-}
-
-static void __exit crypto_morus640_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_morus640_alg);
-}
-
-subsys_initcall(crypto_morus640_module_init);
-module_exit(crypto_morus640_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD algorithm");
-MODULE_ALIAS_CRYPTO("morus640");
-MODULE_ALIAS_CRYPTO("morus640-generic");
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 5fe90ea46319..6258581aa628 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4768,18 +4768,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.hash = __VECS(michael_mic_tv_template)
 		}
-	}, {
-		.alg = "morus1280",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(morus1280_tv_template)
-		}
-	}, {
-		.alg = "morus640",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(morus640_tv_template)
-		}
 	}, {
 		.alg = "nhpoly1305",
 		.test = alg_test_hash,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index b743ca6d12cc..fca03fa018fc 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -20472,1713 +20472,6 @@ static const struct aead_testvec aegis256_tv_template[] = {
 	},
 };
 
-/*
- * MORUS-640 test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/morus640128v2/)
- */
-static const struct aead_testvec morus640_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x89\x62\x7d\xf3\x07\x9d\x52\x05"
-			  "\x53\xc3\x04\x60\x93\xb4\x37\x9a",
-		.clen	= 16,
-	}, {
-		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
-			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
-		.klen	= 16,
-		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x69",
-		.plen	= 1,
-		.ctext	= "\xa8\x8d\xe4\x90\xb5\x50\x8f\x78"
-			  "\xb6\x10\x9a\x59\x5f\x61\x37\x70"
-			  "\x09",
-		.clen	= 17,
-	}, {
-		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
-			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
-		.klen	= 16,
-		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
-			  "\x62\x58\xe9\x8f\xef\xa4\x17",
-		.plen	= 15,
-		.ctext	= "\x76\xdd\xb9\x05\x3d\xce\x61\x38"
-			  "\xf3\xef\xf7\xe5\xd7\xfd\x70\xa5"
-			  "\xcf\x9d\x64\xb8\x0a\x9f\xfd\x8b"
-			  "\xd4\x6e\xfe\xd9\xc8\x63\x4b",
-		.clen	= 31,
-	}, {
-		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
-		.klen	= 16,
-		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
-			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97",
-		.plen	= 16,
-		.ctext	= "\xdc\x72\xe8\x14\xfb\x63\xad\x72"
-			  "\x1f\x57\x9a\x1f\x88\x81\xdb\xd6"
-			  "\xc1\x91\x9d\xb9\x25\xc4\x99\x4c"
-			  "\x97\xcd\x8a\x0c\x9d\x68\x00\x1c",
-		.clen	= 32,
-	}, {
-		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.klen	= 16,
-		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
-			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
-			  "\x09",
-		.plen	= 17,
-		.ctext	= "\x6b\x4f\x3b\x90\x9a\xa2\xb3\x82"
-			  "\x0a\xb8\x55\xee\xeb\x73\x4d\x7f"
-			  "\x54\x11\x3a\x8a\x31\xa3\xb5\xf2"
-			  "\xcd\x49\xdb\xf3\xee\x26\xbd\xa2"
-			  "\x0d",
-		.clen	= 33,
-	}, {
-		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
-		.klen	= 16,
-		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
-			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
-			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
-			  "\x57\x05\x01\x1c\x66\x22\xd3",
-		.plen	= 31,
-		.ctext	= "\x59\xd1\x0f\x6b\xee\x27\x84\x92"
-			  "\xb7\xa9\xb5\xdd\x02\xa4\x12\xa5"
-			  "\x50\x32\xb4\x9a\x2e\x35\x83\x55"
-			  "\x36\x12\x12\xed\xa3\x31\xc5\x30"
-			  "\xa7\xe2\x4a\x6d\x05\x59\x43\x91"
-			  "\x75\xfa\x6c\x17\xc6\x73\xca",
-		.clen	= 47,
-	}, {
-		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
-		.klen	= 16,
-		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
-			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
-			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
-			  "\x19\x33\xe0\xf4\x40\x81\x72\x28",
-		.plen	= 32,
-		.ctext	= "\xdb\x49\x68\x0f\x91\x5b\x21\xb1"
-			  "\xcf\x50\xb2\x4c\x32\xe1\xa6\x69"
-			  "\xc0\xfb\x44\x1f\xa0\x9a\xeb\x39"
-			  "\x1b\xde\x68\x38\xcc\x27\x52\xc5"
-			  "\xf6\x3e\x74\xea\x66\x5b\x5f\x0c"
-			  "\x65\x9e\x58\xe6\x52\xa2\xfe\x59",
-		.clen	= 48,
-	}, {
-		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
-		.klen	= 16,
-		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
-		.assoc	= "\xc5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x56\xe7\x24\x52\xdd\x95\x60\x5b"
-			  "\x09\x48\x39\x69\x9c\xb3\x62\x46",
-		.clen	= 16,
-	}, {
-		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
-			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
-		.klen	= 16,
-		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
-		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76",
-		.alen	= 15,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xdd\xfa\x6c\x1f\x5d\x86\x87\x01"
-			  "\x13\xe5\x73\x46\x46\xf2\x5c\xe1",
-		.clen	= 16,
-	}, {
-		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
-			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
-		.klen	= 16,
-		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
-		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
-		.alen	= 16,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xa6\x1b\xb9\xd7\x5e\x3c\xcf\xac"
-			  "\xa9\x21\x45\x0b\x16\x52\xf7\xe1",
-		.clen	= 16,
-	}, {
-		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
-			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
-		.klen	= 16,
-		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
-		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
-			  "\x3c",
-		.alen	= 17,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x15\xff\xde\x3b\x34\xfc\xf6\xf9"
-			  "\xbb\xa8\x62\xad\x0a\xf5\x48\x60",
-		.clen	= 16,
-	}, {
-		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
-			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
-		.klen	= 16,
-		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
-		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
-			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
-			  "\xaf\x0b\x02\x38\xcc\x44\xa7",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xd2\x9d\xf8\x3b\xd7\x84\xe9\x2d"
-			  "\x4b\xef\x75\x16\x0a\x99\xae\x6b",
-		.clen	= 16,
-	}, {
-		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
-			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
-		.klen	= 16,
-		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
-		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
-			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
-			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xe4\x8d\xa7\xa7\x45\xc1\x31\x4f"
-			  "\xce\xfb\xaf\xd6\xc2\xe6\xee\xc0",
-		.clen	= 16,
-	}, {
-		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
-			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
-		.klen	= 16,
-		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
-		.assoc	= "\x31",
-		.alen	= 1,
-		.ptext	= "\x40",
-		.plen	= 1,
-		.ctext	= "\xe2\x67\x38\x4f\xb9\xad\x7d\x38"
-			  "\x01\xfe\x84\x14\x85\xf8\xd1\xe3"
-			  "\x22",
-		.clen	= 17,
-	}, {
-		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
-			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
-		.klen	= 16,
-		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
-		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06",
-		.alen	= 15,
-		.ptext	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37",
-		.plen	= 15,
-		.ctext	= "\x77\x32\x61\xeb\xb4\x33\x29\x92"
-			  "\x29\x95\xc5\x8e\x85\x76\xab\xfc"
-			  "\x07\x95\xa7\x44\x74\xf7\x22\xff"
-			  "\xd8\xd8\x36\x3d\x8a\x7f\x9e",
-		.clen	= 31,
-	}, {
-		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
-		.klen	= 16,
-		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
-		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
-		.alen	= 16,
-		.ptext	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
-		.plen	= 16,
-		.ctext	= "\xd8\xfd\x44\x45\xf6\x42\x12\x38"
-			  "\xf2\x0b\xea\x4f\x9e\x11\x61\x07"
-			  "\x48\x67\x98\x18\x9b\xd0\x0c\x59"
-			  "\x67\xa4\x11\xb3\x2b\xd6\xc1\x70",
-		.clen	= 32,
-	}, {
-		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.klen	= 16,
-		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
-		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
-			  "\x3b",
-		.alen	= 17,
-		.ptext	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05",
-		.plen	= 17,
-		.ctext	= "\xb1\xab\x53\x4e\xc7\x40\x16\xb6"
-			  "\x71\x3a\x00\x9f\x41\x88\xb0\xb2"
-			  "\x71\x83\x85\x5f\xc8\x79\x0a\x99"
-			  "\x99\xdc\x89\x1c\x88\xd2\x3e\xf9"
-			  "\x83",
-		.clen	= 33,
-	}, {
-		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
-		.klen	= 16,
-		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
-		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
-			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
-			  "\x38\x1d\x3b\x4a\xe9\x7e\x62",
-		.alen	= 31,
-		.ptext	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a",
-		.plen	= 31,
-		.ctext	= "\x29\xc4\xf0\x03\xc1\x86\xdf\x06"
-			  "\x5c\x7b\xef\x64\x87\x00\xd1\x37"
-			  "\xa7\x08\xbc\x7f\x8f\x41\x54\xd0"
-			  "\x3e\xf1\xc3\xa2\x96\x84\xdd\x2a"
-			  "\x2d\x21\x30\xf9\x02\xdb\x06\x0c"
-			  "\xf1\x5a\x66\x69\xe0\xca\x83",
-		.clen	= 47,
-	}, {
-		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
-		.klen	= 16,
-		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
-		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
-			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
-			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
-		.alen	= 32,
-		.ptext	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
-		.plen	= 32,
-		.ctext	= "\xe2\x2e\x44\xdf\xd3\x60\x6d\xb2"
-			  "\x70\x57\x37\xc5\xc2\x4f\x8d\x14"
-			  "\xc6\xbf\x8b\xec\xf5\x62\x67\xf2"
-			  "\x2f\xa1\xe6\xd6\xa7\xb1\x8c\x54"
-			  "\xe5\x6b\x49\xf9\x6e\x90\xc3\xaa"
-			  "\x7a\x00\x2e\x4d\x7f\x31\x2e\x81",
-		.clen	= 48,
-	}, {
-		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
-		.klen	= 16,
-		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
-		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
-			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
-			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
-			  "\x1a",
-		.alen	= 33,
-		.ptext	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
-			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
-			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
-			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
-			  "\x8a",
-		.plen	= 65,
-		.ctext	= "\xc7\xca\x26\x61\x57\xee\xa2\xb9"
-			  "\xb1\x37\xde\x95\x06\x90\x11\x08"
-			  "\x4d\x30\x9f\x24\xc0\x56\xb7\xe1"
-			  "\x0b\x9f\xd2\x57\xe9\xd2\xb1\x76"
-			  "\x56\x9a\xb4\x58\xc5\x08\xfc\xb5"
-			  "\xf2\x31\x9b\xc9\xcd\xb3\x64\xdb"
-			  "\x6f\x50\xbf\xf4\x73\x9d\xfb\x6b"
-			  "\xef\x35\x25\x48\xed\xcf\x29\xa8"
-			  "\xac\xc3\xb9\xcb\x61\x8f\x73\x92"
-			  "\x2c\x7a\x6f\xda\xf9\x09\x6f\xe1"
-			  "\xc4",
-		.clen	= 81,
-	}, {
-		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
-		.klen	= 16,
-		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
-		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
-			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
-			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
-			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
-			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
-			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
-			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
-			  "\x21",
-		.alen	= 65,
-		.ptext	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac",
-		.plen	= 33,
-		.ctext	= "\x57\xcd\x3d\x46\xc5\xf9\x68\x3b"
-			  "\x2c\x0f\xb4\x7e\x7b\x64\x3e\x40"
-			  "\xf3\x78\x63\x34\x89\x79\x39\x6b"
-			  "\x61\x64\x4a\x9a\xfa\x70\xa4\xd3"
-			  "\x54\x0b\xea\x05\xa6\x95\x64\xed"
-			  "\x3d\x69\xa2\x0c\x27\x56\x2f\x34"
-			  "\x66",
-		.clen	= 49,
-	}, {
-		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
-		.klen	= 16,
-		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
-		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
-		.alen	= 16,
-		.ptext	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
-		.plen	= 16,
-		.ctext	= "\xfc\x85\x06\x28\x8f\xe8\x23\x1f"
-			  "\x33\x98\x87\xde\x08\xb6\xb6\xae"
-			  "\x3e\xa4\xf8\x19\xf1\x92\x60\x39"
-			  "\xb9\x6b\x3f\xdf\xc8\xcb\x30",
-		.clen	= 31,
-	}, {
-		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.klen	= 16,
-		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
-		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
-		.alen	= 16,
-		.ptext	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
-		.plen	= 16,
-		.ctext	= "\x74\x7d\x70\x07\xe9\xba\x01\xee"
-			  "\x6c\xc6\x6f\x50\x25\x33\xbe\x50"
-			  "\x17\xb8\x17\x62\xed\x80\xa2\xf5"
-			  "\x03\xde\x85\x71\x5d\x34",
-		.clen	= 30,
-	}, {
-		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.klen	= 16,
-		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
-		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
-		.alen	= 16,
-		.ptext	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
-		.plen	= 16,
-		.ctext	= "\xf4\xb3\x85\xf9\xac\xde\xb1\x38"
-			  "\x29\xfd\x6c\x7c\x49\xe5\x1d\xaf"
-			  "\xba\xea\xd4\xfa\x3f\x11\x33\x98",
-		.clen	= 24,
-	}, {
-		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.klen	= 16,
-		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
-			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
-		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
-			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98",
-		.alen	= 16,
-		.ptext	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
-			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a",
-		.plen	= 16,
-		.ctext	= "\xe6\x5c\x49\x4f\x78\xf3\x62\x86"
-			  "\xe1\xb7\xa5\xc3\x32\x88\x3c\x8c"
-			  "\x6e",
-		.clen	= 17,
-	},
-};
-
-/*
- * MORUS-1280 test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/morus1280128v2/ and crypto_aead/morus1280256v2/ )
- */
-static const struct aead_testvec morus1280_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x91\x85\x0f\xf5\x52\x9e\xce\xce"
-			  "\x65\x99\xc7\xbf\xd3\x76\xe8\x98",
-		.clen	= 16,
-	}, {
-		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
-			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
-		.klen	= 16,
-		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x69",
-		.plen	= 1,
-		.ctext	= "\x88\xc3\x4c\xf0\x2f\x43\x76\x13"
-			  "\x96\xda\x76\x34\x33\x4e\xd5\x39"
-			  "\x73",
-		.clen	= 17,
-	}, {
-		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
-			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
-		.klen	= 16,
-		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
-			  "\x62\x58\xe9\x8f\xef\xa4\x17\x91"
-			  "\xb4\x96\x9f\x6b\xce\x38\xa5\x46"
-			  "\x13\x7d\x64\x93\xd7\x05\xf5",
-		.plen	= 31,
-		.ctext	= "\x3e\x5c\x3b\x58\x3b\x7d\x2a\x22"
-			  "\x75\x0b\x24\xa6\x0e\xc3\xde\x52"
-			  "\x97\x0b\x64\xd4\xce\x90\x52\xf7"
-			  "\xef\xdb\x6a\x38\xd2\xa8\xa1\x0d"
-			  "\xe0\x61\x33\x24\xc6\x4d\x51\xbc"
-			  "\xa4\x21\x74\xcf\x19\x16\x59",
-		.clen	= 47,
-	}, {
-		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
-		.klen	= 16,
-		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
-			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97"
-			  "\xde\x58\xab\xf0\xd3\xd8\x27\x60"
-			  "\xd5\xaa\x43\x6b\xb1\x64\x95\xa4",
-		.plen	= 32,
-		.ctext	= "\x30\x82\x9c\x2b\x67\xcb\xf9\x1f"
-			  "\xde\x9f\x77\xb2\xda\x92\x61\x5c"
-			  "\x09\x0b\x2d\x9a\x26\xaa\x1c\x06"
-			  "\xab\x74\xb7\x2b\x95\x5f\x9f\xa1"
-			  "\x9a\xff\x50\xa0\xa2\xff\xc5\xad"
-			  "\x21\x8e\x84\x5c\x12\x61\xb2\xae",
-		.clen	= 48,
-	}, {
-		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.klen	= 16,
-		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
-			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
-			  "\x09\x1a\xb7\x74\xd8\x78\xa9\x79"
-			  "\x96\xd8\x22\x43\x8c\xc3\x34\x7b"
-			  "\xc4",
-		.plen	= 33,
-		.ctext	= "\x67\x5d\x8e\x45\xc8\x39\xf5\x17"
-			  "\xc1\x1d\x2a\xdd\x88\x67\xda\x1f"
-			  "\x6d\xe8\x37\x28\x5a\xc1\x5e\x9f"
-			  "\xa6\xec\xc6\x92\x05\x4b\xc0\xa3"
-			  "\x63\xef\x88\xa4\x9b\x0a\x5c\xed"
-			  "\x2b\x6a\xac\x63\x52\xaa\x10\x94"
-			  "\xd0",
-		.clen	= 49,
-	}, {
-		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
-		.klen	= 16,
-		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
-			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
-			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
-			  "\x57\x05\x01\x1c\x66\x22\xd3\x51"
-			  "\xd3\xdf\x18\xc9\x30\x66\xed\xb1"
-			  "\x96\x58\xd5\x8c\x64\x8c\x7c\xf5"
-			  "\x01\xd0\x74\x5f\x9b\xaa\xf6\xd1"
-			  "\xe6\x16\xa2\xac\xde\x47\x40",
-		.plen	= 63,
-		.ctext	= "\x7d\x61\x1a\x35\x20\xcc\x07\x88"
-			  "\x03\x98\x87\xcf\xc0\x6e\x4d\x19"
-			  "\xe3\xd4\x0b\xfb\x29\x8f\x49\x1a"
-			  "\x3a\x06\x77\xce\x71\x2c\xcd\xdd"
-			  "\xed\xf6\xc9\xbe\xa6\x3b\xb8\xfc"
-			  "\x6c\xbe\x77\xed\x74\x0e\x20\x85"
-			  "\xd0\x65\xde\x24\x6f\xe3\x25\xc5"
-			  "\xdf\x5b\x0f\xbd\x8a\x88\x78\xc9"
-			  "\xe5\x81\x37\xde\x84\x7a\xf6\x84"
-			  "\x99\x7a\x72\x9c\x54\x31\xa1",
-		.clen	= 79,
-	}, {
-		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
-		.klen	= 16,
-		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
-			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
-			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
-			  "\x19\x33\xe0\xf4\x40\x81\x72\x28"
-			  "\xe1\x8b\x1c\xf8\x91\x78\xff\xaf"
-			  "\xb0\x68\x69\xf2\x27\x35\x91\x84"
-			  "\x2e\x37\x5b\x00\x04\xff\x16\x9c"
-			  "\xb5\x19\x39\xeb\xd9\xcd\x29\x9a",
-		.plen	= 64,
-		.ctext	= "\x05\xc5\xb1\xf9\x1b\xb9\xab\x2c"
-			  "\xa5\x07\x12\xa7\x12\x39\x60\x66"
-			  "\x30\x81\x4a\x03\x78\x28\x45\x52"
-			  "\xd2\x2b\x24\xfd\x8b\xa5\xb7\x66"
-			  "\x6f\x45\xd7\x3b\x67\x6f\x51\xb9"
-			  "\xc0\x3d\x6c\xca\x1e\xae\xff\xb6"
-			  "\x79\xa9\xe4\x82\x5d\x4c\x2d\xdf"
-			  "\xeb\x71\x40\xc9\x2c\x40\x45\x6d"
-			  "\x73\x77\x01\xf3\x4f\xf3\x9d\x2a"
-			  "\x5d\x57\xa8\xa1\x18\xa2\xad\xcb",
-		.clen	= 80,
-	}, {
-		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
-		.klen	= 16,
-		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
-		.assoc	= "\xc5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x4d\xbf\x11\xac\x7f\x97\x0b\x2e"
-			  "\x89\x3b\x9d\x0f\x83\x1c\x08\xc3",
-		.clen	= 16,
-	}, {
-		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
-			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
-		.klen	= 16,
-		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
-		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
-			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
-			  "\x6b\x83\x66\xaf\x3e\x27\xc9",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x5b\xc0\x8d\x54\xe4\xec\xbe\x38"
-			  "\x03\x12\xf9\xcc\x9e\x46\x42\x92",
-		.clen	= 16,
-	}, {
-		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
-			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
-		.klen	= 16,
-		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
-		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
-			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
-			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x48\xc5\xc3\x4c\x40\x2e\x2f\xc2"
-			  "\x6d\x65\xe0\x67\x9c\x1d\xa0\xf0",
-		.clen	= 16,
-	}, {
-		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
-			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
-		.klen	= 16,
-		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
-		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
-			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
-			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc"
-			  "\x97",
-		.alen	= 33,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x28\x64\x78\x51\x55\xd8\x56\x4a"
-			  "\x58\x3e\xf7\xbe\xee\x21\xfe\x94",
-		.clen	= 16,
-	}, {
-		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
-			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
-		.klen	= 16,
-		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
-		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
-			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
-			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3"
-			  "\xa6\xbf\x31\x93\x60\xcd\xda\x63"
-			  "\x2c\xb1\xaa\x19\xc8\x19\xf8\xeb"
-			  "\x03\xa1\xe8\xbe\x37\x54\xec\xa2"
-			  "\xcd\x2c\x45\x58\xbd\x8e\x80",
-		.alen	= 63,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xb3\xa6\x00\x4e\x09\x20\xac\x21"
-			  "\x77\x72\x69\x76\x2d\x36\xe5\xc8",
-		.clen	= 16,
-	}, {
-		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
-			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
-		.klen	= 16,
-		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
-		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
-			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
-			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a"
-			  "\xb4\x6b\x35\xc2\xc1\xdf\xed\x60"
-			  "\x46\xc1\x3e\x7f\x8c\xc2\x0e\x7a"
-			  "\x30\x08\xd0\x5f\xa0\xaa\x0c\x6d"
-			  "\x9c\x2f\xdb\x97\xb8\x15\x69\x01",
-		.alen	= 64,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x65\x33\x7b\xa1\x63\xf4\x20\xdd"
-			  "\xe4\xb9\x4a\xaa\x9a\x21\xaa\x14",
-		.clen	= 16,
-	}, {
-		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
-			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
-		.klen	= 16,
-		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
-		.assoc	= "\x31",
-		.alen	= 1,
-		.ptext	= "\x40",
-		.plen	= 1,
-		.ctext	= "\x1d\x47\x17\x34\x86\xf5\x54\x1a"
-			  "\x6d\x28\xb8\x5d\x6c\xcf\xa0\xb9"
-			  "\xbf",
-		.clen	= 17,
-	}, {
-		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
-			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
-		.klen	= 16,
-		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
-		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
-			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
-			  "\xf4\x94\x9f\xc1\x5a\x61\x85",
-		.alen	= 31,
-		.ptext	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37\xdb"
-			  "\xb0\xb2\x2b\x9f\x0b\xb8\xbd\x7a"
-			  "\x24\xa0\xd6\xb7\x11\x79\x6c",
-		.plen	= 31,
-		.ctext	= "\x78\x90\x52\xae\x0f\xf7\x2e\xef"
-			  "\x63\x09\x08\x58\xb5\x56\xbd\x72"
-			  "\x6e\x42\xcf\x27\x04\x7c\xdb\x92"
-			  "\x18\xe9\xa4\x33\x90\xba\x62\xb5"
-			  "\x70\xd3\x88\x9b\x4f\x05\xa7\x51"
-			  "\x85\x87\x17\x09\x42\xed\x4e",
-		.clen	= 47,
-	}, {
-		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
-		.klen	= 16,
-		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
-		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
-			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
-			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
-		.alen	= 32,
-		.ptext	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2"
-			  "\xdb\x74\x36\x23\x11\x58\x3f\x93"
-			  "\xe5\xcd\xb5\x90\xeb\xd8\x0c\xb3",
-		.plen	= 32,
-		.ctext	= "\x1d\x2c\x57\xe0\x50\x38\x3d\x41"
-			  "\x2e\x71\xc8\x3b\x92\x43\x58\xaf"
-			  "\x5a\xfb\xad\x8f\xd9\xd5\x8a\x5e"
-			  "\xdb\xf3\xcd\x3a\x2b\xe1\x2c\x1a"
-			  "\xb0\xed\xe3\x0c\x6e\xf9\xf2\xd6"
-			  "\x90\xe6\xb1\x0e\xa5\x8a\xac\xb7",
-		.clen	= 48,
-	}, {
-		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.klen	= 16,
-		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
-		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
-			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
-			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4"
-			  "\xee",
-		.alen	= 33,
-		.ptext	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05\x36\x42\xa7\x16\xf8\xc1\xad"
-			  "\xa7\xfb\x94\x68\xc5\x37\xab\x8a"
-			  "\x72",
-		.plen	= 33,
-		.ctext	= "\x59\x10\x84\x1c\x83\x4c\x8b\xfc"
-			  "\xfd\x2e\x4b\x46\x84\xff\x78\x4e"
-			  "\x50\xda\x5c\xb9\x61\x1d\xf5\xb9"
-			  "\xfe\xbb\x7f\xae\x8c\xc1\x24\xbd"
-			  "\x8c\x6f\x1f\x9b\xce\xc6\xc1\x37"
-			  "\x08\x06\x5a\xe5\x96\x10\x95\xc2"
-			  "\x5e",
-		.clen	= 49,
-	}, {
-		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
-		.klen	= 16,
-		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
-		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
-			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
-			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa"
-			  "\xfd\xc9\x4a\xa9\xa9\x39\x4b\x54"
-			  "\xc8\x0e\x24\x7f\x5e\x10\x7a\x45"
-			  "\x10\x0b\x56\x85\xad\x54\xaa\x66"
-			  "\xa8\x43\xcd\xd4\x9b\xb7\xfa",
-		.alen	= 63,
-		.ptext	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a\x60"
-			  "\x80\xf4\x4b\xf4\xc1\x3d\xd0\x93"
-			  "\xcf\x12\xc9\x59\x8f\x7a\x7f\xa8"
-			  "\x1b\xa5\x50\xed\x87\xa9\x72\x59"
-			  "\x9c\x44\xb2\xa4\x99\x98\x34",
-		.plen	= 63,
-		.ctext	= "\x9a\x12\xbc\xdf\x72\xa8\x56\x22"
-			  "\x49\x2d\x07\x92\xfc\x3d\x6d\x5f"
-			  "\xef\x36\x19\xae\x91\xfa\xd6\x63"
-			  "\x46\xea\x8a\x39\x14\x21\xa6\x37"
-			  "\x18\xfc\x97\x3e\x16\xa5\x4d\x39"
-			  "\x45\x2e\x69\xcc\x9c\x5f\xdf\x6d"
-			  "\x5e\xa2\xbf\xac\x83\x32\x72\x52"
-			  "\x58\x58\x23\x40\xfd\xa5\xc2\xe6"
-			  "\xe9\x5a\x50\x98\x00\x58\xc9\x86"
-			  "\x4f\x20\x37\xdb\x7b\x22\xa3",
-		.clen	= 79,
-	}, {
-		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
-		.klen	= 16,
-		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
-		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
-			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
-			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81"
-			  "\x0b\x76\x4f\xd7\x0a\x4b\x5e\x51"
-			  "\xe3\x1d\xb9\xe5\x21\xb9\x8f\xd4"
-			  "\x3d\x72\x3e\x26\x16\xa9\xca\x32"
-			  "\x77\x47\x63\x14\x95\x3d\xe4\x34",
-		.alen	= 64,
-		.ptext	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37"
-			  "\x8f\xa1\x50\x23\x22\x4f\xe3\x91"
-			  "\xe9\x21\x5e\xbf\x52\x23\x95\x37"
-			  "\x48\x0c\x38\x8f\xf0\xff\x92\x24"
-			  "\x6b\x47\x49\xe3\x94\x1f\x1e\x01",
-		.plen	= 64,
-		.ctext	= "\xe6\xeb\x92\x5a\x5b\xf0\x2d\xbb"
-			  "\x23\xec\x35\xe3\xae\xc9\xfb\x0b"
-			  "\x90\x14\x46\xeb\xa8\x8d\xb0\x9b"
-			  "\x39\xda\x8b\x48\xec\xb2\x00\x4e"
-			  "\x80\x6f\x46\x4f\x9b\x1e\xbb\x35"
-			  "\xea\x5a\xbc\xa2\x36\xa5\x89\x45"
-			  "\xc2\xd6\xd7\x15\x0b\xf6\x6c\x56"
-			  "\xec\x99\x7d\x61\xb3\x15\x93\xed"
-			  "\x83\x1e\xd9\x48\x84\x0b\x37\xfe"
-			  "\x95\x74\x44\xd5\x54\xa6\x27\x06",
-		.clen	= 80,
-	}, {
-		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
-		.klen	= 16,
-		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
-		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
-			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
-			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
-			  "\x1a\x22\x53\x05\x6b\x5c\x71\x4f"
-			  "\xfd\x2d\x4d\x4c\xe5\x62\xa5\x63"
-			  "\x6a\xda\x26\xc8\x7f\xff\xea\xfd"
-			  "\x46\x4a\xfa\x53\x8f\xc4\xcd\x68"
-			  "\x58",
-		.alen	= 65,
-		.ptext	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
-			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
-			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
-			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
-			  "\x8a\xdf\x27\xa0\xe4\x60\x99\xae"
-			  "\x8e\x43\xd9\x39\x7b\x10\x40\x67"
-			  "\x5c\x7e\xc9\x70\x63\x34\xca\x59"
-			  "\xfe\x86\xbc\xb7\x9c\x39\xf3\x6d"
-			  "\x6a\x41\x64\x6f\x16\x7f\x65\x7e"
-			  "\x89\x84\x68\xeb\xb0\x51\xbe\x55"
-			  "\x33\x16\x59\x6c\x3b\xef\x88\xad"
-			  "\x2f\xab\xbc\x25\x76\x87\x41\x2f"
-			  "\x36",
-		.plen	= 129,
-		.ctext	= "\x89\x24\x27\x86\xdc\xd7\x6b\xd9"
-			  "\xd1\xcd\xdc\x16\xdd\x2c\xc1\xfb"
-			  "\x52\xb5\xb3\xab\x50\x99\x3f\xa0"
-			  "\x38\xa4\x74\xa5\x04\x15\x63\x05"
-			  "\x8f\x54\x81\x06\x5a\x6b\xa4\x63"
-			  "\x6d\xa7\x21\xcb\xff\x42\x30\x8e"
-			  "\x3b\xd1\xca\x3f\x4b\x1a\xb8\xc3"
-			  "\x42\x01\xe6\xbc\x75\x15\x87\xee"
-			  "\xc9\x8e\x65\x01\xd9\xd8\xb5\x9f"
-			  "\x48\x86\xa6\x5f\x2c\xc7\xb5\xb0"
-			  "\xed\x5d\x14\x7c\x3f\x40\xb1\x0b"
-			  "\x72\xef\x94\x8d\x7a\x85\x56\xe5"
-			  "\x56\x08\x15\x56\xba\xaf\xbd\xf0"
-			  "\x20\xef\xa0\xf6\xa9\xad\xa2\xc9"
-			  "\x1c\x3b\x28\x51\x7e\x77\xb2\x18"
-			  "\x4f\x61\x64\x37\x22\x36\x6d\x78"
-			  "\xed\xed\x35\xe8\x83\xa5\xec\x25"
-			  "\x6b\xff\x5f\x1a\x09\x96\x3d\xdc"
-			  "\x20",
-		.clen	= 145,
-	}, {
-		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
-		.klen	= 16,
-		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
-		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
-			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
-			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
-			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
-			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
-			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
-			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
-			  "\x21\xf7\x42\x89\xac\x12\x2a\x54"
-			  "\x69\xee\x18\xc7\x8d\xed\xe8\xfd"
-			  "\xbb\x04\x28\xe6\x8a\x3c\x98\xc1"
-			  "\x04\x2d\xa9\xa1\x24\x83\xff\xe9"
-			  "\x55\x7a\xf0\xd1\xf6\x63\x05\xe1"
-			  "\xd9\x1e\x75\x72\xc1\x9f\xae\x32"
-			  "\xe1\x6b\xcd\x9e\x61\x19\x23\x86"
-			  "\xd9\xd2\xaf\x8e\xd5\xd3\xa8\xa9"
-			  "\x51",
-		.alen	= 129,
-		.ptext	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
-			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
-			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
-			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
-			  "\x54",
-		.plen	= 65,
-		.ctext	= "\x36\x78\xb9\x22\xde\x62\x35\x55"
-			  "\x1a\x7a\xf5\x45\xbc\xd7\x15\x82"
-			  "\x01\xe9\x5a\x07\xea\x46\xaf\x91"
-			  "\xcb\x73\xa5\xee\xe1\xb4\xbf\xc2"
-			  "\xdb\xd2\x9d\x59\xde\xfc\x83\x00"
-			  "\xf5\x46\xac\x97\xd5\x57\xa9\xb9"
-			  "\x1f\x8c\xe8\xca\x68\x8b\x91\x0c"
-			  "\x01\xbe\x0a\xaf\x7c\xf6\x67\xa4"
-			  "\xbf\xbc\x88\x3f\x5d\xd1\xf9\x19"
-			  "\x0f\x9d\xb2\xaf\xb9\x6e\x17\xdf"
-			  "\xa2",
-		.clen	= 81,
-	}, {
-		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
-		.klen	= 16,
-		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
-		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
-			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
-			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
-		.alen	= 32,
-		.ptext	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07"
-			  "\xd9\x02\x7c\x3d\x2f\x18\x4b\x2d"
-			  "\x6e\xde\xee\xa2\x08\x12\xc7\xba",
-		.plen	= 32,
-		.ctext	= "\x08\x1b\x95\x0e\x41\x95\x02\x4b"
-			  "\x9c\xbb\xa8\xd0\x7c\xd3\x44\x6e"
-			  "\x89\x14\x33\x70\x0a\xbc\xea\x39"
-			  "\x88\xaa\x2b\xd5\x73\x11\x55\xf5"
-			  "\x33\x33\x9c\xd7\x42\x34\x49\x8e"
-			  "\x2f\x03\x30\x05\x47\xaf\x34",
-		.clen	= 47,
-	}, {
-		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.klen	= 16,
-		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
-		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
-			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
-			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
-		.alen	= 32,
-		.ptext	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d"
-			  "\x03\xc4\x88\xc1\x35\xb8\xcd\x47"
-			  "\x2f\x0c\xcd\x7a\xe2\x71\x66\x91",
-		.plen	= 32,
-		.ctext	= "\x97\xca\xf4\xe0\x8d\x89\xbf\x68"
-			  "\x0c\x60\xb9\x27\xdf\xaa\x41\xc6"
-			  "\x25\xd8\xf7\x1f\x10\x15\x48\x61"
-			  "\x4c\x95\x00\xdf\x51\x9b\x7f\xe6"
-			  "\x24\x40\x9e\xbe\x3b\xeb\x1b\x98"
-			  "\xb9\x9c\xe5\xef\xf2\x05",
-		.clen	= 46,
-	}, {
-		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.klen	= 16,
-		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
-		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
-			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
-			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
-		.alen	= 32,
-		.ptext	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13"
-			  "\x2e\x86\x93\x45\x3a\x58\x4f\x61"
-			  "\xf0\x3a\xac\x53\xbc\xd0\x06\x68",
-		.plen	= 32,
-		.ctext	= "\x63\x4c\x2a\x8e\xb4\x6b\x63\x0d"
-			  "\xb5\xec\x9b\x4e\x12\x23\xa3\xcf"
-			  "\x1a\x5a\x70\x15\x5a\x10\x40\x51"
-			  "\xca\x47\x4c\x9d\xc9\x97\xf4\x77"
-			  "\xdb\xc8\x10\x2d\xdc\x65\x20\x3f",
-		.clen	= 40,
-	}, {
-		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.klen	= 16,
-		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
-			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
-		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
-			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98"
-			  "\x8d\x98\x1c\xa8\xfe\x50\xf0\x74"
-			  "\x81\x5c\x53\x35\xe0\x17\xbd\x88",
-		.alen	= 32,
-		.ptext	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
-			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a"
-			  "\x58\x49\x9f\xc9\x3f\xf8\xd1\x7a"
-			  "\xb2\x67\x8b\x2b\x96\x2f\xa5\x3e",
-		.plen	= 32,
-		.ctext	= "\xf1\x62\x44\xc7\x5f\x19\xca\x43"
-			  "\x47\x2c\xaf\x68\x82\xbd\x51\xef"
-			  "\x3d\x65\xd8\x45\x2d\x06\x07\x78"
-			  "\x08\x2e\xb3\x23\xcd\x81\x12\x55"
-			  "\x1a",
-		.clen	= 33,
-	}, {
-		.key	= "\xe9\x95\xa2\x8f\x93\x13\x7b\xb7"
-			  "\x96\x4e\x63\x33\x69\x8d\x02\x9b"
-			  "\x23\xf9\x22\xeb\x80\xa0\xb1\x81"
-			  "\xe2\x73\xc3\x21\x4d\x47\x8d\xf4",
-		.klen	= 32,
-		.iv	= "\xf8\x5e\x31\xf7\xd7\xb2\x25\x3e"
-			  "\xb7\x85\x90\x58\x67\x57\x33\x1d",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xdf\x2f\x83\xc0\x45\x4a\x2c\xcf"
-			  "\xb9\xd2\x41\xf6\x80\xa1\x52\x70",
-		.clen	= 16,
-	}, {
-		.key	= "\x25\xba\xdc\x2e\xa3\x8f\x24\xd3"
-			  "\x17\x29\x15\xc5\x63\xb2\xc5\xa1"
-			  "\x4d\xbc\x2d\x6f\x85\x40\x33\x9a"
-			  "\xa3\xa0\xa1\xfa\x27\xa6\x2c\xca",
-		.klen	= 32,
-		.iv	= "\x34\x83\x6a\x96\xe7\x2d\xce\x5a"
-			  "\x38\x5f\x42\xe9\x61\x7b\xf5\x23",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x53",
-		.plen	= 1,
-		.ctext	= "\x01\xd8\x55\x3c\xc0\x5a\x4b\xc7"
-			  "\x01\xf4\x08\xe3\x0d\xf7\xf0\x78"
-			  "\x53",
-		.clen	= 17,
-	}, {
-		.key	= "\x62\xdf\x16\xcd\xb3\x0a\xcc\xef"
-			  "\x98\x03\xc7\x56\x5d\xd6\x87\xa8"
-			  "\x77\x7e\x39\xf3\x8a\xe0\xb5\xb4"
-			  "\x65\xce\x80\xd2\x01\x05\xcb\xa1",
-		.klen	= 32,
-		.iv	= "\x71\xa8\xa4\x35\xf7\xa9\x76\x75"
-			  "\xb8\x39\xf4\x7a\x5b\x9f\xb8\x29",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x8f\x3a\xc1\x05\x7f\xe7\xcb\x83"
-			  "\xf9\xa6\x4d\xc3\x58\x31\x19\x2c"
-			  "\xd7\x90\xc2\x56\x4e\xd8\x57\xc7"
-			  "\xf6\xf0\x27\xb4\x25\x4c\x83",
-		.plen	= 31,
-		.ctext	= "\xc2\x4b\x41\x0f\x2d\xb9\x62\x07"
-			  "\xff\x8e\x74\xf8\xa1\xa6\xd5\x37"
-			  "\xa5\x64\x31\x5c\xca\x73\x9b\x43"
-			  "\xe6\x70\x63\x46\x95\xcb\xf7\xb5"
-			  "\x20\x8c\x75\x7a\x2a\x17\x2f\xa9"
-			  "\xb8\x4d\x11\x42\xd1\xf8\xf1",
-		.clen	= 47,
-	}, {
-		.key	= "\x9e\x03\x4f\x6d\xc3\x86\x75\x0a"
-			  "\x19\xdd\x79\xe8\x57\xfb\x4a\xae"
-			  "\xa2\x40\x45\x77\x90\x80\x37\xce"
-			  "\x26\xfb\x5f\xaa\xdb\x64\x6b\x77",
-		.klen	= 32,
-		.iv	= "\xae\xcc\xde\xd5\x07\x25\x1f\x91"
-			  "\x39\x14\xa6\x0c\x55\xc4\x7b\x30",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xcc\x5f\xfb\xa4\x8f\x63\x74\x9f"
-			  "\x7a\x81\xff\x55\x52\x56\xdc\x33"
-			  "\x01\x52\xcd\xdb\x53\x78\xd9\xe1"
-			  "\xb7\x1d\x06\x8d\xff\xab\x22\x98",
-		.plen	= 32,
-		.ctext	= "\xbb\x01\x7c\xd1\x2c\x33\x7b\x37"
-			  "\x0a\xee\xc4\x30\x19\xd7\x3a\x6f"
-			  "\xf8\x2b\x67\xf5\x3b\x84\x87\x2a"
-			  "\xfb\x07\x7a\x82\xb5\xe4\x85\x26"
-			  "\x1e\xa8\xe5\x04\x54\xce\xe5\x5f"
-			  "\xb5\x3f\xc1\xd5\x7f\xbd\xd2\xa6",
-		.clen	= 48,
-	}, {
-		.key	= "\xdb\x28\x89\x0c\xd3\x01\x1e\x26"
-			  "\x9a\xb7\x2b\x79\x51\x1f\x0d\xb4"
-			  "\xcc\x03\x50\xfc\x95\x20\xb9\xe7"
-			  "\xe8\x29\x3e\x83\xb5\xc3\x0a\x4e",
-		.klen	= 32,
-		.iv	= "\xea\xf1\x18\x74\x17\xa0\xc8\xad"
-			  "\xba\xee\x58\x9d\x4f\xe8\x3d\x36",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x08\x84\x34\x44\x9f\xde\x1c\xbb"
-			  "\xfb\x5b\xb1\xe6\x4c\x7a\x9f\x39"
-			  "\x2c\x14\xd9\x5f\x59\x18\x5b\xfb"
-			  "\x79\x4b\xe5\x65\xd9\x0a\xc1\x6f"
-			  "\x2e",
-		.plen	= 33,
-		.ctext	= "\xc2\xf4\x40\x55\xf9\x59\xff\x73"
-			  "\x08\xf5\x98\x92\x0c\x7b\x35\x9a"
-			  "\xa8\xf4\x42\x7e\x6f\x93\xca\x22"
-			  "\x23\x06\x1e\xf8\x89\x22\xf4\x46"
-			  "\x7c\x7c\x67\x75\xab\xe5\x75\xaa"
-			  "\x15\xd7\x83\x19\xfd\x31\x59\x5b"
-			  "\x32",
-		.clen	= 49,
-	}, {
-		.key	= "\x17\x4d\xc3\xab\xe3\x7d\xc7\x42"
-			  "\x1b\x91\xdd\x0a\x4b\x43\xcf\xba"
-			  "\xf6\xc5\x5c\x80\x9a\xc0\x3b\x01"
-			  "\xa9\x56\x1d\x5b\x8f\x22\xa9\x25",
-		.klen	= 32,
-		.iv	= "\x27\x16\x51\x13\x27\x1c\x71\xc9"
-			  "\x3b\xc8\x0a\x2f\x49\x0c\x00\x3c",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x45\xa8\x6e\xe3\xaf\x5a\xc5\xd7"
-			  "\x7c\x35\x63\x77\x46\x9f\x61\x3f"
-			  "\x56\xd7\xe4\xe3\x5e\xb8\xdc\x14"
-			  "\x3a\x79\xc4\x3e\xb3\x69\x61\x46"
-			  "\x3c\xb6\x83\x4e\xb4\x26\xc7\x73"
-			  "\x22\xda\x52\x8b\x7d\x11\x98\xea"
-			  "\x62\xe1\x14\x1e\xdc\xfe\x0f\xad"
-			  "\x20\x76\x5a\xdc\x4e\x71\x13",
-		.plen	= 63,
-		.ctext	= "\xc9\x82\x3b\x4b\x87\x84\xa5\xdb"
-			  "\xa0\x8c\xd3\x3e\x7f\x8d\xe8\x28"
-			  "\x2a\xdc\xfa\x01\x84\x87\x9a\x70"
-			  "\x81\x75\x37\x0a\xd2\x75\xa9\xb6"
-			  "\x21\x72\xee\x7e\x65\x95\xe5\xcc"
-			  "\x01\xb7\x39\xa6\x51\x15\xca\xff"
-			  "\x61\xdc\x97\x38\xcc\xf4\xca\xc7"
-			  "\x83\x9b\x05\x11\x72\x60\xf0\xb4"
-			  "\x7e\x06\xab\x0a\xc0\xbb\x59\x23"
-			  "\xaa\x2d\xfc\x4e\x35\x05\x59",
-		.clen	= 79,
-	}, {
-		.key	= "\x54\x71\xfd\x4b\xf3\xf9\x6f\x5e"
-			  "\x9c\x6c\x8f\x9c\x45\x68\x92\xc1"
-			  "\x21\x87\x67\x04\x9f\x60\xbd\x1b"
-			  "\x6a\x84\xfc\x34\x6a\x81\x48\xfb",
-		.klen	= 32,
-		.iv	= "\x63\x3b\x8b\xb3\x37\x98\x1a\xe5"
-			  "\xbc\xa2\xbc\xc0\x43\x31\xc2\x42",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x81\xcd\xa8\x82\xbf\xd6\x6e\xf3"
-			  "\xfd\x0f\x15\x09\x40\xc3\x24\x45"
-			  "\x81\x99\xf0\x67\x63\x58\x5e\x2e"
-			  "\xfb\xa6\xa3\x16\x8d\xc8\x00\x1c"
-			  "\x4b\x62\x87\x7c\x15\x38\xda\x70"
-			  "\x3d\xea\xe7\xf2\x40\xba\xae\x79"
-			  "\x8f\x48\xfc\xbf\x45\x53\x2e\x78"
-			  "\xef\x79\xf0\x1b\x49\xf7\xfd\x9c",
-		.plen	= 64,
-		.ctext	= "\x11\x7c\x7d\xef\xce\x29\x95\xec"
-			  "\x7e\x9f\x42\xa6\x26\x07\xa1\x75"
-			  "\x2f\x4e\x09\x9a\xf6\x6b\xc2\xfa"
-			  "\x0d\xd0\x17\xdc\x25\x1e\x9b\xdc"
-			  "\x5f\x8c\x1c\x60\x15\x4f\x9b\x20"
-			  "\x7b\xff\xcd\x82\x60\x84\xf4\xa5"
-			  "\x20\x9a\x05\x19\x5b\x02\x0a\x72"
-			  "\x43\x11\x26\x58\xcf\xc5\x41\xcf"
-			  "\x13\xcc\xde\x32\x92\xfa\x86\xf2"
-			  "\xaf\x16\xe8\x8f\xca\xb6\xfd\x54",
-		.clen	= 80,
-	}, {
-		.key	= "\x90\x96\x36\xea\x03\x74\x18\x7a"
-			  "\x1d\x46\x42\x2d\x3f\x8c\x54\xc7"
-			  "\x4b\x4a\x73\x89\xa4\x00\x3f\x34"
-			  "\x2c\xb1\xdb\x0c\x44\xe0\xe8\xd2",
-		.klen	= 32,
-		.iv	= "\xa0\x5f\xc5\x52\x47\x13\xc2\x01"
-			  "\x3d\x7c\x6e\x52\x3d\x55\x85\x48",
-		.assoc	= "\xaf",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x9b\xc5\x3b\x20\x0a\x88\x56\xbe"
-			  "\x69\xdf\xc4\xc4\x02\x46\x3a\xf0",
-		.clen	= 16,
-	}, {
-		.key	= "\xcd\xbb\x70\x89\x13\xf0\xc1\x95"
-			  "\x9e\x20\xf4\xbf\x39\xb1\x17\xcd"
-			  "\x76\x0c\x7f\x0d\xa9\xa0\xc1\x4e"
-			  "\xed\xdf\xb9\xe4\x1e\x3f\x87\xa8",
-		.klen	= 32,
-		.iv	= "\xdc\x84\xfe\xf1\x58\x8f\x6b\x1c"
-			  "\xbe\x57\x20\xe3\x37\x7a\x48\x4f",
-		.assoc	= "\xeb\x4d\x8d\x59\x9c\x2e\x15\xa3"
-			  "\xde\x8d\x4d\x07\x36\x43\x78\xd0"
-			  "\x0b\x6d\x84\x4f\x2c\xf0\x82\x5b"
-			  "\x4e\xf6\x29\xd1\x8b\x6f\x56",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xe0\x6d\xa1\x07\x98\x2f\x40\x2d"
-			  "\x2e\x9a\xd6\x61\x43\xc0\x74\x69",
-		.clen	= 16,
-	}, {
-		.key	= "\x0a\xe0\xaa\x29\x24\x6c\x6a\xb1"
-			  "\x1f\xfa\xa6\x50\x33\xd5\xda\xd3"
-			  "\xa0\xce\x8a\x91\xae\x40\x43\x68"
-			  "\xae\x0d\x98\xbd\xf8\x9e\x26\x7f",
-		.klen	= 32,
-		.iv	= "\x19\xa9\x38\x91\x68\x0b\x14\x38"
-			  "\x3f\x31\xd2\x74\x31\x9e\x0a\x55",
-		.assoc	= "\x28\x72\xc7\xf8\xac\xaa\xbe\xbf"
-			  "\x5f\x67\xff\x99\x30\x67\x3b\xd6"
-			  "\x35\x2f\x90\xd3\x31\x90\x04\x74"
-			  "\x0f\x23\x08\xa9\x65\xce\xf6\xea",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xb9\x57\x13\x3e\x82\x31\x61\x65"
-			  "\x0d\x7f\x6c\x96\x93\x5c\x50\xe2",
-		.clen	= 16,
-	}, {
-		.key	= "\x46\x04\xe3\xc8\x34\xe7\x12\xcd"
-			  "\xa0\xd4\x58\xe2\x2d\xf9\x9c\xda"
-			  "\xca\x91\x96\x15\xb4\xe0\xc5\x81"
-			  "\x70\x3a\x77\x95\xd2\xfd\xc5\x55",
-		.klen	= 32,
-		.iv	= "\x55\xcd\x72\x30\x78\x86\xbd\x54"
-			  "\xc0\x0b\x84\x06\x2b\xc2\xcd\x5b",
-		.assoc	= "\x64\x97\x00\x98\xbc\x25\x67\xdb"
-			  "\xe0\x41\xb1\x2a\x2a\x8c\xfe\xdd"
-			  "\x5f\xf2\x9c\x58\x36\x30\x86\x8e"
-			  "\xd1\x51\xe6\x81\x3f\x2d\x95\xc1"
-			  "\x01",
-		.alen	= 33,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x81\x96\x34\xde\xbb\x36\xdd\x3e"
-			  "\x4e\x5e\xcb\x44\x21\xb8\x3f\xf1",
-		.clen	= 16,
-	}, {
-		.key	= "\x83\x29\x1d\x67\x44\x63\xbb\xe9"
-			  "\x20\xaf\x0a\x73\x27\x1e\x5f\xe0"
-			  "\xf5\x53\xa1\x9a\xb9\x80\x47\x9b"
-			  "\x31\x68\x56\x6e\xac\x5c\x65\x2c",
-		.klen	= 32,
-		.iv	= "\x92\xf2\xac\xcf\x88\x02\x65\x70"
-			  "\x41\xe5\x36\x97\x25\xe7\x90\x61",
-		.assoc	= "\xa1\xbb\x3a\x37\xcc\xa1\x10\xf7"
-			  "\x61\x1c\x63\xbc\x24\xb0\xc0\xe3"
-			  "\x8a\xb4\xa7\xdc\x3b\xd0\x08\xa8"
-			  "\x92\x7f\xc5\x5a\x19\x8c\x34\x97"
-			  "\x0f\x95\x9b\x18\xe4\x8d\xb4\x24"
-			  "\xb9\x33\x28\x18\xe1\x9d\x14\xe0"
-			  "\x64\xb2\x89\x7d\x78\xa8\x05\x7e"
-			  "\x07\x8c\xfc\x88\x2d\xb8\x53",
-		.alen	= 63,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x2e\x99\xb6\x79\x57\x56\x80\x36"
-			  "\x8e\xc4\x1c\x12\x7d\x71\x36\x0c",
-		.clen	= 16,
-	}, {
-		.key	= "\xbf\x4e\x57\x07\x54\xdf\x64\x05"
-			  "\xa1\x89\xbc\x04\x21\x42\x22\xe6"
-			  "\x1f\x15\xad\x1e\xbe\x20\xc9\xb4"
-			  "\xf3\x95\x35\x46\x86\xbb\x04\x03",
-		.klen	= 32,
-		.iv	= "\xce\x17\xe5\x6f\x98\x7e\x0e\x8c"
-			  "\xc2\xbf\xe8\x29\x1f\x0b\x52\x68",
-		.assoc	= "\xdd\xe0\x74\xd6\xdc\x1d\xb8\x13"
-			  "\xe2\xf6\x15\x4d\x1e\xd4\x83\xe9"
-			  "\xb4\x76\xb3\x60\x40\x70\x8a\xc1"
-			  "\x53\xac\xa4\x32\xf3\xeb\xd3\x6e"
-			  "\x1e\x42\xa0\x46\x45\x9f\xc7\x22"
-			  "\xd3\x43\xbc\x7e\xa5\x47\x2a\x6f"
-			  "\x91\x19\x70\x1e\xe1\xfe\x25\x49"
-			  "\xd6\x8f\x93\xc7\x28\x3f\x3d\x03",
-		.alen	= 64,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x7b\x25\x3d\x47\xd4\xa7\x08\xce"
-			  "\x3b\x89\x40\x36\xba\x6d\x0e\xa2",
-		.clen	= 16,
-	}, {
-		.key	= "\xfc\x72\x90\xa6\x64\x5a\x0d\x21"
-			  "\x22\x63\x6e\x96\x1b\x67\xe4\xec"
-			  "\x49\xd7\xb9\xa2\xc3\xc0\x4b\xce"
-			  "\xb4\xc3\x14\x1e\x61\x1a\xa3\xd9",
-		.klen	= 32,
-		.iv	= "\x0b\x3c\x1f\x0e\xa8\xf9\xb7\xa7"
-			  "\x42\x9a\x9a\xba\x19\x30\x15\x6e",
-		.assoc	= "\x1a",
-		.alen	= 1,
-		.ptext	= "\x29",
-		.plen	= 1,
-		.ctext	= "\xe6\x09\x6f\x95\x9a\x18\xc8\xf6"
-			  "\x17\x75\x81\x16\xdf\x26\xff\x67"
-			  "\x92",
-		.clen	= 17,
-	}, {
-		.key	= "\x38\x97\xca\x45\x74\xd6\xb6\x3c"
-			  "\xa3\x3d\x20\x27\x15\x8b\xa7\xf2"
-			  "\x74\x9a\xc4\x27\xc8\x60\xcd\xe8"
-			  "\x75\xf0\xf2\xf7\x3b\x79\x42\xb0",
-		.klen	= 32,
-		.iv	= "\x47\x60\x59\xad\xb8\x75\x60\xc3"
-			  "\xc3\x74\x4c\x4c\x13\x54\xd8\x74",
-		.assoc	= "\x56\x29\xe7\x15\xfc\x14\x0a\x4a"
-			  "\xe4\xaa\x79\x70\x12\x1d\x08\xf6"
-			  "\x09\xfb\xca\x69\x4b\xb0\x8e\xf5"
-			  "\xd6\x07\x62\xe3\xa8\xa9\x12",
-		.alen	= 31,
-		.ptext	= "\x66\xf3\x75\x7d\x40\xb3\xb4\xd1"
-			  "\x04\xe1\xa6\x94\x10\xe6\x39\x77"
-			  "\xd3\xac\x4d\x8a\x8c\x58\x6e\xfb"
-			  "\x06\x13\x9a\xd9\x5e\xc0\xfa",
-		.plen	= 31,
-		.ctext	= "\x82\xc0\x56\xf0\xd7\xc4\xc9\xfd"
-			  "\x3c\xd1\x2a\xd4\x15\x86\x9d\xda"
-			  "\xea\x6c\x6f\xa1\x33\xb0\x7a\x01"
-			  "\x57\xe7\xf3\x7b\x73\xe7\x54\x10"
-			  "\xc6\x91\xe2\xc6\xa0\x69\xe7\xe6"
-			  "\x76\xc3\xf5\x3a\x76\xfd\x4a",
-		.clen	= 47,
-	}, {
-		.key	= "\x75\xbc\x04\xe5\x84\x52\x5e\x58"
-			  "\x24\x17\xd2\xb9\x0e\xaf\x6a\xf9"
-			  "\x9e\x5c\xd0\xab\xcd\x00\x4f\x01"
-			  "\x37\x1e\xd1\xcf\x15\xd8\xe2\x86",
-		.klen	= 32,
-		.iv	= "\x84\x85\x92\x4d\xc8\xf1\x08\xdf"
-			  "\x44\x4e\xff\xdd\x0d\x78\x9a\x7a",
-		.assoc	= "\x93\x4e\x21\xb4\x0c\x90\xb3\x66"
-			  "\x65\x84\x2b\x01\x0b\x42\xcb\xfc"
-			  "\x33\xbd\xd6\xed\x50\x50\x10\x0e"
-			  "\x97\x35\x41\xbb\x82\x08\xb1\xf2",
-		.alen	= 32,
-		.ptext	= "\xa2\x17\xaf\x1c\x50\x2e\x5d\xed"
-			  "\x85\xbb\x58\x26\x0a\x0b\xfc\x7d"
-			  "\xfe\x6e\x59\x0e\x91\xf8\xf0\x15"
-			  "\xc8\x40\x78\xb1\x38\x1f\x99\xa7",
-		.plen	= 32,
-		.ctext	= "\x01\x47\x8e\x6c\xf6\x64\x89\x3a"
-			  "\x71\xce\xe4\xaa\x45\x70\xe6\x84"
-			  "\x62\x48\x08\x64\x86\x6a\xdf\xec"
-			  "\xb4\xa0\xfb\x34\x03\x0c\x19\xf4"
-			  "\x2b\x7b\x36\x73\xec\x54\xa9\x1e"
-			  "\x30\x85\xdb\xe4\xac\xe9\x2c\xca",
-		.clen	= 48,
-	}, {
-		.key	= "\xb1\xe1\x3e\x84\x94\xcd\x07\x74"
-			  "\xa5\xf2\x84\x4a\x08\xd4\x2c\xff"
-			  "\xc8\x1e\xdb\x2f\xd2\xa0\xd1\x1b"
-			  "\xf8\x4c\xb0\xa8\xef\x37\x81\x5d",
-		.klen	= 32,
-		.iv	= "\xc0\xaa\xcc\xec\xd8\x6c\xb1\xfb"
-			  "\xc5\x28\xb1\x6e\x07\x9d\x5d\x81",
-		.assoc	= "\xd0\x73\x5a\x54\x1d\x0b\x5b\x82"
-			  "\xe5\x5f\xdd\x93\x05\x66\x8e\x02"
-			  "\x5e\x80\xe1\x71\x55\xf0\x92\x28"
-			  "\x59\x62\x20\x94\x5c\x67\x50\xc8"
-			  "\x58",
-		.alen	= 33,
-		.ptext	= "\xdf\x3c\xe9\xbc\x61\xaa\x06\x09"
-			  "\x06\x95\x0a\xb7\x04\x2f\xbe\x84"
-			  "\x28\x30\x64\x92\x96\x98\x72\x2e"
-			  "\x89\x6e\x57\x8a\x13\x7e\x38\x7e"
-			  "\xdb",
-		.plen	= 33,
-		.ctext	= "\x85\xe0\xf8\x0f\x8e\x49\xe3\x60"
-			  "\xcb\x4a\x54\x94\xcf\xf5\x7e\x34"
-			  "\xe9\xf8\x80\x65\x53\xd0\x72\x70"
-			  "\x4f\x7d\x9d\xd1\x15\x6f\xb9\x2c"
-			  "\xfa\xe8\xdd\xac\x2e\xe1\x3f\x67"
-			  "\x63\x0f\x1a\x59\xb7\x89\xdb\xf4"
-			  "\xc3",
-		.clen	= 49,
-	}, {
-		.key	= "\xee\x05\x77\x23\xa5\x49\xb0\x90"
-			  "\x26\xcc\x36\xdc\x02\xf8\xef\x05"
-			  "\xf3\xe1\xe7\xb3\xd8\x40\x53\x35"
-			  "\xb9\x79\x8f\x80\xc9\x96\x20\x33",
-		.klen	= 32,
-		.iv	= "\xfd\xce\x06\x8b\xe9\xe8\x5a\x17"
-			  "\x46\x02\x63\x00\x01\xc1\x20\x87",
-		.assoc	= "\x0c\x98\x94\xf3\x2d\x87\x04\x9e"
-			  "\x66\x39\x8f\x24\xff\x8a\x50\x08"
-			  "\x88\x42\xed\xf6\x5a\x90\x14\x42"
-			  "\x1a\x90\xfe\x6c\x36\xc6\xf0\x9f"
-			  "\x66\xa0\xb5\x2d\x2c\xf8\x25\x15"
-			  "\x55\x90\xa2\x7e\x77\x94\x96\x3a"
-			  "\x71\x1c\xf7\x44\xee\xa8\xc3\x42"
-			  "\xe2\xa3\x84\x04\x0b\xe1\xce",
-		.alen	= 63,
-		.ptext	= "\x1b\x61\x23\x5b\x71\x26\xae\x25"
-			  "\x87\x6f\xbc\x49\xfe\x53\x81\x8a"
-			  "\x53\xf2\x70\x17\x9b\x38\xf4\x48"
-			  "\x4b\x9b\x36\x62\xed\xdd\xd8\x54"
-			  "\xea\xcb\xb6\x79\x45\xfc\xaa\x54"
-			  "\x5c\x94\x47\x58\xa7\xff\x9c\x9e"
-			  "\x7c\xb6\xf1\xac\xc8\xfd\x8b\x35"
-			  "\xd5\xa4\x6a\xd4\x09\xc2\x08",
-		.plen	= 63,
-		.ctext	= "\x00\xe5\x5b\x87\x5c\x20\x22\x8a"
-			  "\xda\x1f\xd3\xff\xbb\xb2\xb0\xf8"
-			  "\xef\xe9\xeb\x9e\x7c\x80\xf4\x2b"
-			  "\x59\xc0\x79\xbc\x17\xa0\x15\x01"
-			  "\xf5\x72\xfb\x5a\xe7\xaf\x07\xe3"
-			  "\x1b\x49\x21\x34\x23\x63\x55\x5e"
-			  "\xee\x4f\x34\x17\xfa\xfe\xa5\x0c"
-			  "\xed\x0b\x23\xea\x9b\xda\x57\x2f"
-			  "\xf6\xa9\xae\x0d\x4e\x40\x96\x45"
-			  "\x7f\xfa\xf0\xbf\xc4\x98\x78",
-		.clen	= 79,
-	}, {
-		.key	= "\x2a\x2a\xb1\xc3\xb5\xc5\x59\xac"
-			  "\xa7\xa6\xe8\x6d\xfc\x1d\xb2\x0b"
-			  "\x1d\xa3\xf3\x38\xdd\xe0\xd5\x4e"
-			  "\x7b\xa7\x6e\x58\xa3\xf5\xbf\x0a",
-		.klen	= 32,
-		.iv	= "\x39\xf3\x3f\x2b\xf9\x64\x03\x33"
-			  "\xc7\xdd\x15\x91\xfb\xe6\xe2\x8d",
-		.assoc	= "\x49\xbc\xce\x92\x3d\x02\xad\xba"
-			  "\xe7\x13\x41\xb6\xf9\xaf\x13\x0f"
-			  "\xb2\x04\xf8\x7a\x5f\x30\x96\x5b"
-			  "\xdc\xbd\xdd\x44\x10\x25\x8f\x75"
-			  "\x75\x4d\xb9\x5b\x8e\x0a\x38\x13"
-			  "\x6f\x9f\x36\xe4\x3a\x3e\xac\xc9"
-			  "\x9d\x83\xde\xe5\x57\xfd\xe3\x0e"
-			  "\xb1\xa7\x1b\x44\x05\x67\xb7\x37",
-		.alen	= 64,
-		.ptext	= "\x58\x85\x5c\xfa\x81\xa1\x57\x40"
-			  "\x08\x4a\x6e\xda\xf8\x78\x44\x90"
-			  "\x7d\xb5\x7b\x9b\xa1\xd8\x76\x62"
-			  "\x0c\xc9\x15\x3b\xc7\x3c\x77\x2b"
-			  "\xf8\x78\xba\xa7\xa6\x0e\xbd\x52"
-			  "\x76\xa3\xdc\xbe\x6b\xa8\xb1\x2d"
-			  "\xa9\x1d\xd8\x4e\x31\x53\xab\x00"
-			  "\xa5\xa7\x01\x13\x04\x49\xf2\x04",
-		.plen	= 64,
-		.ctext	= "\x28\xdd\xb9\x4a\x12\xc7\x0a\xe1"
-			  "\x58\x06\x1a\x9b\x8c\x67\xdf\xeb"
-			  "\x35\x35\x60\x9d\x06\x40\x65\xc1"
-			  "\x93\xe8\xb3\x82\x50\x29\xdd\xb5"
-			  "\x2b\xcb\xde\x18\x78\x6b\x42\xbe"
-			  "\x6d\x24\xd0\xb2\x7d\xd7\x08\x8f"
-			  "\x4a\x18\x98\xad\x8c\xf2\x97\xb4"
-			  "\xf4\x77\xe4\xbf\x41\x3b\xc4\x06"
-			  "\xce\x9e\x34\x81\xf0\x89\x11\x13"
-			  "\x02\x65\xa1\x7c\xdf\x07\x33\x06",
-		.clen	= 80,
-	}, {
-		.key	= "\x67\x4f\xeb\x62\xc5\x40\x01\xc7"
-			  "\x28\x80\x9a\xfe\xf6\x41\x74\x12"
-			  "\x48\x65\xfe\xbc\xe2\x80\x57\x68"
-			  "\x3c\xd4\x4d\x31\x7d\x54\x5f\xe1",
-		.klen	= 32,
-		.iv	= "\x76\x18\x79\xca\x09\xdf\xac\x4e"
-			  "\x48\xb7\xc7\x23\xf5\x0a\xa5\x93",
-		.assoc	= "\x85\xe1\x08\x32\x4d\x7e\x56\xd5"
-			  "\x68\xed\xf3\x47\xf3\xd3\xd6\x15"
-			  "\xdd\xc7\x04\xfe\x64\xd0\x18\x75"
-			  "\x9d\xeb\xbc\x1d\xea\x84\x2e\x4c"
-			  "\x83\xf9\xbe\x8a\xef\x1c\x4b\x10"
-			  "\x89\xaf\xcb\x4b\xfe\xe7\xc1\x58"
-			  "\xca\xea\xc6\x87\xc0\x53\x03\xd9"
-			  "\x80\xaa\xb2\x83\xff\xee\xa1\x6a"
-			  "\x04",
-		.alen	= 65,
-		.ptext	= "\x94\xaa\x96\x9a\x91\x1d\x00\x5c"
-			  "\x88\x24\x20\x6b\xf2\x9c\x06\x96"
-			  "\xa7\x77\x87\x1f\xa6\x78\xf8\x7b"
-			  "\xcd\xf6\xf4\x13\xa1\x9b\x16\x02"
-			  "\x07\x24\xbf\xd5\x08\x20\xd0\x4f"
-			  "\x90\xb3\x70\x24\x2f\x51\xc7\xbb"
-			  "\xd6\x84\xc0\xef\x9a\xa8\xca\xcc"
-			  "\x74\xab\x97\x53\xfe\xd0\xdb\x37"
-			  "\x37\x6a\x0e\x9f\x3f\xa3\x2a\xe3"
-			  "\x1b\x34\x6d\x51\x72\x2b\x17\xe7"
-			  "\x4d\xaa\x2c\x18\xda\xa3\x33\x89"
-			  "\x2a\x9f\xf4\xd2\xed\x76\x3d\x3f"
-			  "\x3c\x15\x9d\x8e\x4f\x3c\x27\xb0"
-			  "\x42\x3f\x2f\x8a\xd4\xc2\x10\xb2"
-			  "\x27\x7f\xe3\x34\x80\x02\x49\x4b"
-			  "\x07\x68\x22\x2a\x88\x25\x53\xb2"
-			  "\x2f",
-		.plen	= 129,
-		.ctext	= "\x85\x39\x69\x35\xfb\xf9\xb0\xa6"
-			  "\x85\x43\x88\xd0\xd7\x78\x60\x19"
-			  "\x3e\x1f\xb1\xa4\xd6\xc5\x96\xec"
-			  "\xf7\x84\x85\xc7\x27\x0f\x74\x57"
-			  "\x28\x9e\xdd\x90\x3c\x43\x12\xc5"
-			  "\x51\x3d\x39\x8f\xa5\xf4\xe0\x0b"
-			  "\x57\x04\xf1\x6d\xfe\x9b\x84\x27"
-			  "\xe8\xeb\x4d\xda\x02\x0a\xc5\x49"
-			  "\x1a\x55\x5e\x50\x56\x4d\x94\xda"
-			  "\x20\xf8\x12\x54\x50\xb3\x11\xda"
-			  "\xed\x44\x27\x67\xd5\xd1\x8b\x4b"
-			  "\x38\x67\x56\x65\x59\xda\xe6\x97"
-			  "\x81\xae\x2f\x92\x3b\xae\x22\x1c"
-			  "\x91\x59\x38\x18\x00\xe8\xba\x92"
-			  "\x04\x19\x56\xdf\xb0\x82\xeb\x6f"
-			  "\x2e\xdb\x54\x3c\x4b\xbb\x60\x90"
-			  "\x4c\x50\x10\x62\xba\x7a\xb1\x68"
-			  "\x37\xd7\x87\x4e\xe4\x66\x09\x1f"
-			  "\xa5",
-		.clen	= 145,
-	}, {
-		.key	= "\xa3\x73\x24\x01\xd5\xbc\xaa\xe3"
-			  "\xa9\x5a\x4c\x90\xf0\x65\x37\x18"
-			  "\x72\x28\x0a\x40\xe7\x20\xd9\x82"
-			  "\xfe\x02\x2b\x09\x57\xb3\xfe\xb7",
-		.klen	= 32,
-		.iv	= "\xb3\x3d\xb3\x69\x19\x5b\x54\x6a"
-			  "\xc9\x91\x79\xb4\xef\x2e\x68\x99",
-		.assoc	= "\xc2\x06\x41\xd1\x5d\xfa\xff\xf1"
-			  "\xe9\xc7\xa5\xd9\xed\xf8\x98\x1b"
-			  "\x07\x89\x10\x82\x6a\x70\x9a\x8f"
-			  "\x5e\x19\x9b\xf5\xc5\xe3\xcd\x22"
-			  "\x92\xa5\xc2\xb8\x51\x2e\x5e\x0e"
-			  "\xa4\xbe\x5f\xb1\xc1\x90\xd7\xe7"
-			  "\xf7\x52\xae\x28\x29\xa8\x22\xa4"
-			  "\x4f\xae\x48\xc2\xfa\x75\x8b\x9e"
-			  "\xce\x83\x2a\x88\x07\x55\xbb\x89"
-			  "\xf6\xdf\xac\xdf\x83\x08\xbf\x7d"
-			  "\xac\x30\x8b\x8e\x02\xac\x00\xf1"
-			  "\x30\x46\xe1\xbc\x75\xbf\x49\xbb"
-			  "\x26\x4e\x29\xf0\x2f\x21\xc6\x13"
-			  "\x92\xd9\x3d\x11\xe4\x10\x00\x8e"
-			  "\xd4\xd4\x58\x65\xa6\x2b\xe3\x25"
-			  "\xb1\x8f\x15\x93\xe7\x71\xb9\x2c"
-			  "\x4b",
-		.alen	= 129,
-		.ptext	= "\xd1\xcf\xd0\x39\xa1\x99\xa9\x78"
-			  "\x09\xfe\xd2\xfd\xec\xc1\xc9\x9d"
-			  "\xd2\x39\x93\xa3\xab\x18\x7a\x95"
-			  "\x8f\x24\xd3\xeb\x7b\xfa\xb5\xd8"
-			  "\x15\xd1\xc3\x04\x69\x32\xe3\x4d"
-			  "\xaa\xc2\x04\x8b\xf2\xfa\xdc\x4a"
-			  "\x02\xeb\xa8\x90\x03\xfd\xea\x97"
-			  "\x43\xaf\x2e\x92\xf8\x57\xc5\x6a"
-			  "\x00",
-		.plen	= 65,
-		.ctext	= "\x7d\xde\x53\x22\xe4\x23\x3b\x30"
-			  "\x78\xde\x35\x90\x7a\xd9\x0b\x93"
-			  "\xf6\x0e\x0b\xed\x40\xee\x10\x9c"
-			  "\x96\x3a\xd3\x34\xb2\xd0\x67\xcf"
-			  "\x63\x7f\x2d\x0c\xcf\x96\xec\x64"
-			  "\x1a\x87\xcc\x7d\x2c\x5e\x81\x4b"
-			  "\xd2\x8f\x4c\x7c\x00\xb1\xb4\xe0"
-			  "\x87\x4d\xb1\xbc\xd8\x78\x2c\x17"
-			  "\xf2\x3b\xd8\x28\x40\xe2\x76\xf6"
-			  "\x20\x13\x83\x46\xaf\xff\xe3\x0f"
-			  "\x72",
-		.clen	= 81,
-	}, {
-		.key	= "\xe0\x98\x5e\xa1\xe5\x38\x53\xff"
-			  "\x2a\x35\xfe\x21\xea\x8a\xfa\x1e"
-			  "\x9c\xea\x15\xc5\xec\xc0\x5b\x9b"
-			  "\xbf\x2f\x0a\xe1\x32\x12\x9d\x8e",
-		.klen	= 32,
-		.iv	= "\xef\x61\xed\x08\x29\xd7\xfd\x86"
-			  "\x4a\x6b\x2b\x46\xe9\x53\x2a\xa0",
-		.assoc	= "\xfe\x2a\x7b\x70\x6d\x75\xa7\x0d"
-			  "\x6a\xa2\x57\x6a\xe7\x1c\x5b\x21"
-			  "\x31\x4b\x1b\x07\x6f\x10\x1c\xa8"
-			  "\x20\x46\x7a\xce\x9f\x42\x6d\xf9",
-		.alen	= 32,
-		.ptext	= "\x0d\xf4\x09\xd8\xb1\x14\x51\x94"
-			  "\x8a\xd8\x84\x8e\xe6\xe5\x8c\xa3"
-			  "\xfc\xfc\x9e\x28\xb0\xb8\xfc\xaf"
-			  "\x50\x52\xb1\xc4\x55\x59\x55\xaf",
-		.plen	= 32,
-		.ctext	= "\x5a\xcd\x8c\x57\xf2\x6a\xb6\xbe"
-			  "\x53\xc7\xaa\x9a\x60\x74\x9c\xc4"
-			  "\xa2\xc2\xd0\x6d\xe1\x03\x63\xdc"
-			  "\xbb\x51\x7e\x9c\x89\x73\xde\x4e"
-			  "\x24\xf8\x52\x7c\x15\x41\x0e\xba"
-			  "\x69\x0e\x36\x5f\x2f\x22\x8c",
-		.clen	= 47,
-	}, {
-		.key	= "\x1c\xbd\x98\x40\xf5\xb3\xfc\x1b"
-			  "\xaa\x0f\xb0\xb3\xe4\xae\xbc\x24"
-			  "\xc7\xac\x21\x49\xf1\x60\xdd\xb5"
-			  "\x80\x5d\xe9\xba\x0c\x71\x3c\x64",
-		.klen	= 32,
-		.iv	= "\x2c\x86\x26\xa8\x39\x52\xa6\xa2"
-			  "\xcb\x45\xdd\xd7\xe3\x77\xed\xa6",
-		.assoc	= "\x3b\x4f\xb5\x10\x7d\xf1\x50\x29"
-			  "\xeb\x7c\x0a\xfb\xe1\x40\x1e\x27"
-			  "\x5c\x0d\x27\x8b\x74\xb0\x9e\xc2"
-			  "\xe1\x74\x59\xa6\x79\xa1\x0c\xd0",
-		.alen	= 32,
-		.ptext	= "\x4a\x18\x43\x77\xc1\x90\xfa\xb0"
-			  "\x0b\xb2\x36\x20\xe0\x09\x4e\xa9"
-			  "\x26\xbe\xaa\xac\xb5\x58\x7e\xc8"
-			  "\x11\x7f\x90\x9c\x2f\xb8\xf4\x85",
-		.plen	= 32,
-		.ctext	= "\x47\xd6\xce\x78\xd6\xbf\x4a\x51"
-			  "\xb8\xda\x92\x3c\xfd\xda\xac\x8e"
-			  "\x8d\x88\xd7\x4d\x90\xe5\xeb\xa1"
-			  "\xab\xd6\x7c\x76\xad\xea\x7d\x76"
-			  "\x53\xee\xb0\xcd\xd0\x02\xbb\x70"
-			  "\x5b\x6f\x7b\xe2\x8c\xe8",
-		.clen	= 46,
-	}, {
-		.key	= "\x59\xe1\xd2\xdf\x05\x2f\xa4\x37"
-			  "\x2b\xe9\x63\x44\xde\xd3\x7f\x2b"
-			  "\xf1\x6f\x2d\xcd\xf6\x00\x5f\xcf"
-			  "\x42\x8a\xc8\x92\xe6\xd0\xdc\x3b",
-		.klen	= 32,
-		.iv	= "\x68\xab\x60\x47\x49\xce\x4f\xbe"
-			  "\x4c\x20\x8f\x68\xdd\x9c\xb0\xac",
-		.assoc	= "\x77\x74\xee\xaf\x8d\x6d\xf9\x45"
-			  "\x6c\x56\xbc\x8d\xdb\x65\xe0\x2e"
-			  "\x86\xd0\x32\x0f\x79\x50\x20\xdb"
-			  "\xa2\xa1\x37\x7e\x53\x00\xab\xa6",
-		.alen	= 32,
-		.ptext	= "\x86\x3d\x7d\x17\xd1\x0c\xa3\xcc"
-			  "\x8c\x8d\xe8\xb1\xda\x2e\x11\xaf"
-			  "\x51\x80\xb5\x30\xba\xf8\x00\xe2"
-			  "\xd3\xad\x6f\x75\x09\x18\x93\x5c",
-		.plen	= 32,
-		.ctext	= "\x9f\xa9\x2b\xa4\x8f\x00\x05\x2b"
-			  "\xe7\x68\x81\x51\xbb\xfb\xdf\x60"
-			  "\xbb\xac\xe8\xc1\xdc\x68\xae\x68"
-			  "\x3a\xcd\x7a\x06\x49\xfe\x80\x11"
-			  "\xe6\x61\x99\xe2\xdd\xbe\x2c\xbf",
-		.clen	= 40,
-	}, {
-		.key	= "\x96\x06\x0b\x7f\x15\xab\x4d\x53"
-			  "\xac\xc3\x15\xd6\xd8\xf7\x42\x31"
-			  "\x1b\x31\x38\x51\xfc\xa0\xe1\xe8"
-			  "\x03\xb8\xa7\x6b\xc0\x2f\x7b\x11",
-		.klen	= 32,
-		.iv	= "\xa5\xcf\x9a\xe6\x59\x4a\xf7\xd9"
-			  "\xcd\xfa\x41\xfa\xd7\xc0\x72\xb2",
-		.assoc	= "\xb4\x99\x28\x4e\x9d\xe8\xa2\x60"
-			  "\xed\x30\x6e\x1e\xd5\x89\xa3\x34"
-			  "\xb1\x92\x3e\x93\x7e\xf0\xa2\xf5"
-			  "\x64\xcf\x16\x57\x2d\x5f\x4a\x7d",
-		.alen	= 32,
-		.ptext	= "\xc3\x62\xb7\xb6\xe2\x87\x4c\xe7"
-			  "\x0d\x67\x9a\x43\xd4\x52\xd4\xb5"
-			  "\x7b\x43\xc1\xb5\xbf\x98\x82\xfc"
-			  "\x94\xda\x4e\x4d\xe4\x77\x32\x32",
-		.plen	= 32,
-		.ctext	= "\xe2\x34\xfa\x25\xfd\xfb\x89\x5e"
-			  "\x5b\x4e\x0b\x15\x6e\x39\xfb\x0c"
-			  "\x73\xc7\xd9\x6b\xbe\xce\x9b\x70"
-			  "\xc7\x4f\x96\x16\x03\xfc\xea\xfb"
-			  "\x56",
-		.clen	= 33,
-	},
-};
-
 /*
  * All key wrapping test vectors taken from
  * http://csrc.nist.gov/groups/STM/cavp/documents/mac/kwtestvectors.zip
diff --git a/include/crypto/morus1280_glue.h b/include/crypto/morus1280_glue.h
deleted file mode 100644
index 5cefddb1991f..000000000000
--- a/include/crypto/morus1280_glue.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Common glue skeleton -- header file
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS1280_GLUE_H
-#define _CRYPTO_MORUS1280_GLUE_H
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/aead.h>
-#include <crypto/morus_common.h>
-
-#define MORUS1280_WORD_SIZE 8
-#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
-
-struct morus1280_block {
-	u8 bytes[MORUS1280_BLOCK_SIZE];
-};
-
-struct morus1280_glue_ops {
-	void (*init)(void *state, const void *key, const void *iv);
-	void (*ad)(void *state, const void *data, unsigned int length);
-	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
-	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
-};
-
-struct morus1280_ctx {
-	const struct morus1280_glue_ops *ops;
-	struct morus1280_block key;
-};
-
-void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
-				    const struct morus1280_glue_ops *ops);
-int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				 unsigned int keylen);
-int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
-				      unsigned int authsize);
-int crypto_morus1280_glue_encrypt(struct aead_request *req);
-int crypto_morus1280_glue_decrypt(struct aead_request *req);
-
-#define MORUS1280_DECLARE_ALG(id, driver_name, priority) \
-	static const struct morus1280_glue_ops crypto_morus1280_##id##_ops = {\
-		.init = crypto_morus1280_##id##_init, \
-		.ad = crypto_morus1280_##id##_ad, \
-		.enc = crypto_morus1280_##id##_enc, \
-		.enc_tail = crypto_morus1280_##id##_enc_tail, \
-		.dec = crypto_morus1280_##id##_dec, \
-		.dec_tail = crypto_morus1280_##id##_dec_tail, \
-		.final = crypto_morus1280_##id##_final, \
-	}; \
-	\
-	static int crypto_morus1280_##id##_init_tfm(struct crypto_aead *tfm) \
-	{ \
-		crypto_morus1280_glue_init_ops(tfm, &crypto_morus1280_##id##_ops); \
-		return 0; \
-	} \
-	\
-	static void crypto_morus1280_##id##_exit_tfm(struct crypto_aead *tfm) \
-	{ \
-	} \
-	\
-	static struct aead_alg crypto_morus1280_##id##_alg = { \
-		.setkey = crypto_morus1280_glue_setkey, \
-		.setauthsize = crypto_morus1280_glue_setauthsize, \
-		.encrypt = crypto_morus1280_glue_encrypt, \
-		.decrypt = crypto_morus1280_glue_decrypt, \
-		.init = crypto_morus1280_##id##_init_tfm, \
-		.exit = crypto_morus1280_##id##_exit_tfm, \
-		\
-		.ivsize = MORUS_NONCE_SIZE, \
-		.maxauthsize = MORUS_MAX_AUTH_SIZE, \
-		.chunksize = MORUS1280_BLOCK_SIZE, \
-		\
-		.base = { \
-			.cra_flags = CRYPTO_ALG_INTERNAL, \
-			.cra_blocksize = 1, \
-			.cra_ctxsize = sizeof(struct morus1280_ctx), \
-			.cra_alignmask = 0, \
-			.cra_priority = priority, \
-			\
-			.cra_name = "__morus1280", \
-			.cra_driver_name = "__"driver_name, \
-			\
-			.cra_module = THIS_MODULE, \
-		} \
-	}
-
-#endif /* _CRYPTO_MORUS1280_GLUE_H */
diff --git a/include/crypto/morus640_glue.h b/include/crypto/morus640_glue.h
deleted file mode 100644
index 0ee6266cb26c..000000000000
--- a/include/crypto/morus640_glue.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Common glue skeleton -- header file
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS640_GLUE_H
-#define _CRYPTO_MORUS640_GLUE_H
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/aead.h>
-#include <crypto/morus_common.h>
-
-#define MORUS640_WORD_SIZE 4
-#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
-
-struct morus640_block {
-	u8 bytes[MORUS640_BLOCK_SIZE];
-};
-
-struct morus640_glue_ops {
-	void (*init)(void *state, const void *key, const void *iv);
-	void (*ad)(void *state, const void *data, unsigned int length);
-	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
-	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
-};
-
-struct morus640_ctx {
-	const struct morus640_glue_ops *ops;
-	struct morus640_block key;
-};
-
-void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
-				   const struct morus640_glue_ops *ops);
-int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				unsigned int keylen);
-int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
-				     unsigned int authsize);
-int crypto_morus640_glue_encrypt(struct aead_request *req);
-int crypto_morus640_glue_decrypt(struct aead_request *req);
-
-#define MORUS640_DECLARE_ALG(id, driver_name, priority) \
-	static const struct morus640_glue_ops crypto_morus640_##id##_ops = {\
-		.init = crypto_morus640_##id##_init, \
-		.ad = crypto_morus640_##id##_ad, \
-		.enc = crypto_morus640_##id##_enc, \
-		.enc_tail = crypto_morus640_##id##_enc_tail, \
-		.dec = crypto_morus640_##id##_dec, \
-		.dec_tail = crypto_morus640_##id##_dec_tail, \
-		.final = crypto_morus640_##id##_final, \
-	}; \
-	\
-	static int crypto_morus640_##id##_init_tfm(struct crypto_aead *tfm) \
-	{ \
-		crypto_morus640_glue_init_ops(tfm, &crypto_morus640_##id##_ops); \
-		return 0; \
-	} \
-	\
-	static void crypto_morus640_##id##_exit_tfm(struct crypto_aead *tfm) \
-	{ \
-	} \
-	\
-	static struct aead_alg crypto_morus640_##id##_alg = {\
-		.setkey = crypto_morus640_glue_setkey, \
-		.setauthsize = crypto_morus640_glue_setauthsize, \
-		.encrypt = crypto_morus640_glue_encrypt, \
-		.decrypt = crypto_morus640_glue_decrypt, \
-		.init = crypto_morus640_##id##_init_tfm, \
-		.exit = crypto_morus640_##id##_exit_tfm, \
-		\
-		.ivsize = MORUS_NONCE_SIZE, \
-		.maxauthsize = MORUS_MAX_AUTH_SIZE, \
-		.chunksize = MORUS640_BLOCK_SIZE, \
-		\
-		.base = { \
-			.cra_flags = CRYPTO_ALG_INTERNAL, \
-			.cra_blocksize = 1, \
-			.cra_ctxsize = sizeof(struct morus640_ctx), \
-			.cra_alignmask = 0, \
-			.cra_priority = priority, \
-			\
-			.cra_name = "__morus640", \
-			.cra_driver_name = "__"driver_name, \
-			\
-			.cra_module = THIS_MODULE, \
-		} \
-	}
-
-#endif /* _CRYPTO_MORUS640_GLUE_H */
diff --git a/include/crypto/morus_common.h b/include/crypto/morus_common.h
deleted file mode 100644
index 969510a9a56c..000000000000
--- a/include/crypto/morus_common.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS Authenticated-Encryption Algorithm
- *   Common definitions
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS_COMMON_H
-#define _CRYPTO_MORUS_COMMON_H
-
-#define MORUS_BLOCK_WORDS 4
-#define MORUS_STATE_BLOCKS 5
-#define MORUS_NONCE_SIZE 16
-#define MORUS_MAX_AUTH_SIZE 16
-
-#endif /* _CRYPTO_MORUS_COMMON_H */
-- 
cgit v1.2.3


From 17a8f868ae3e85a173843b1ac65e744e8585bc5a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 8 Jul 2019 11:24:56 +0530
Subject: opp: Return genpd virtual devices from dev_pm_opp_attach_genpd()

The cpufreq drivers don't need to do runtime PM operations on the
virtual devices returned by dev_pm_domain_attach_by_name() and so the
virtual devices weren't shared with the callers of
dev_pm_opp_attach_genpd() earlier.

But the IO device drivers would want to do that. This patch updates the
prototype of dev_pm_opp_attach_genpd() to accept another argument to
return the pointer to the array of genpd virtual devices.

Reported-by: Rajendra Nayak <rnayak@codeaurora.org>
Tested-by: Rajendra Nayak <rnayak@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 6 +++++-
 include/linux/pm_opp.h | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 08b830f84d8c..bdb435822401 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1771,6 +1771,7 @@ static void _opp_detach_genpd(struct opp_table *opp_table)
  * dev_pm_opp_attach_genpd - Attach genpd(s) for the device and save virtual device pointer
  * @dev: Consumer device for which the genpd is getting attached.
  * @names: Null terminated array of pointers containing names of genpd to attach.
+ * @virt_devs: Pointer to return the array of virtual devices.
  *
  * Multiple generic power domains for a device are supported with the help of
  * virtual genpd devices, which are created for each consumer device - genpd
@@ -1788,7 +1789,8 @@ static void _opp_detach_genpd(struct opp_table *opp_table)
  * The order of entries in the names array must match the order in which
  * "required-opps" are added in DT.
  */
-struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names)
+struct opp_table *dev_pm_opp_attach_genpd(struct device *dev,
+		const char **names, struct device ***virt_devs)
 {
 	struct opp_table *opp_table;
 	struct device *virt_dev;
@@ -1842,6 +1844,8 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names
 		name++;
 	}
 
+	if (virt_devs)
+		*virt_devs = opp_table->genpd_virt_devs;
 	mutex_unlock(&opp_table->genpd_virt_dev_lock);
 
 	return opp_table;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index af5021f27cb7..5bdceca5125d 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -128,7 +128,7 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name);
 void dev_pm_opp_put_clkname(struct opp_table *opp_table);
 struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
 void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table);
-struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names);
+struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs);
 void dev_pm_opp_detach_genpd(struct opp_table *opp_table);
 int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
@@ -292,7 +292,7 @@ static inline struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const
 
 static inline void dev_pm_opp_put_clkname(struct opp_table *opp_table) {}
 
-static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names)
+static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
-- 
cgit v1.2.3


From 71419d84c216cee8da3b19fb843b4242f112cde4 Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@linaro.org>
Date: Thu, 25 Jul 2019 12:41:29 +0200
Subject: opp: Add dev_pm_opp_find_level_exact()

Since the performance states in the OPP table are unique, implement a
dev_pm_opp_find_level_exact() in order to be able to fetch a specific OPP.

Signed-off-by: Niklas Cassel <niklas.cassel@linaro.org>
[ Viresh: Updated commit log ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h |  8 ++++++++
 2 files changed, 56 insertions(+)

(limited to 'include')

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index bdb435822401..0ee8c0133d3e 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -401,6 +401,54 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
+/**
+ * dev_pm_opp_find_level_exact() - search for an exact level
+ * @dev:		device for which we do this operation
+ * @level:		level to search for
+ *
+ * Return: Searches for exact match in the opp table and returns pointer to the
+ * matching opp if found, else returns ERR_PTR in case of error and should
+ * be handled using IS_ERR. Error return values can be:
+ * EINVAL:	for bad pointer
+ * ERANGE:	no match found for search
+ * ENODEV:	if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+					       unsigned int level)
+{
+	struct opp_table *opp_table;
+	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
+
+	opp_table = _find_opp_table(dev);
+	if (IS_ERR(opp_table)) {
+		int r = PTR_ERR(opp_table);
+
+		dev_err(dev, "%s: OPP table not found (%d)\n", __func__, r);
+		return ERR_PTR(r);
+	}
+
+	mutex_lock(&opp_table->lock);
+
+	list_for_each_entry(temp_opp, &opp_table->opp_list, node) {
+		if (temp_opp->level == level) {
+			opp = temp_opp;
+
+			/* Increment the reference count of OPP */
+			dev_pm_opp_get(opp);
+			break;
+		}
+	}
+
+	mutex_unlock(&opp_table->lock);
+	dev_pm_opp_put_opp_table(opp_table);
+
+	return opp;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_level_exact);
+
 static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table,
 						   unsigned long *freq)
 {
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5bdceca5125d..b8197ab014f2 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -96,6 +96,8 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 					      unsigned long freq,
 					      bool available);
+struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+					       unsigned int level);
 
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					      unsigned long *freq);
@@ -200,6 +202,12 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+					unsigned int level)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 					unsigned long *freq)
 {
-- 
cgit v1.2.3


From 5db4c4b9559f8cddd5f7f74e58c7b8f172120e6d Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Tue, 23 Jul 2019 21:00:01 +0300
Subject: mac80211: pass the vif to cancel_remain_on_channel

This low level driver can find it useful to get the vif
when a remain on channel session is cancelled.

iwlwifi will need this soon.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Link: https://lore.kernel.org/r/20190723180001.5828-1-emmanuel.grumbach@intel.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c             | 3 ++-
 drivers/net/wireless/ath/ath9k/main.c             | 3 ++-
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 ++-
 drivers/net/wireless/mac80211_hwsim.c             | 3 ++-
 drivers/net/wireless/rsi/rsi_91x_mac80211.c       | 3 ++-
 drivers/net/wireless/ti/wlcore/main.c             | 3 ++-
 include/net/mac80211.h                            | 3 ++-
 net/mac80211/driver-ops.h                         | 8 +++++---
 net/mac80211/offchannel.c                         | 5 +++--
 net/mac80211/trace.h                              | 7 ++++---
 10 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0606416dc971..12dad659bf68 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -6970,7 +6970,8 @@ exit:
 	return ret;
 }
 
-static int ath10k_cancel_remain_on_channel(struct ieee80211_hw *hw)
+static int ath10k_cancel_remain_on_channel(struct ieee80211_hw *hw,
+					   struct ieee80211_vif *vif)
 {
 	struct ath10k *ar = hw->priv;
 
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index f23cb2f3d296..34121fbf32e3 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2392,7 +2392,8 @@ out:
 	return ret;
 }
 
-static int ath9k_cancel_remain_on_channel(struct ieee80211_hw *hw)
+static int ath9k_cancel_remain_on_channel(struct ieee80211_hw *hw,
+					  struct ieee80211_vif *vif)
 {
 	struct ath_softc *sc = hw->priv;
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 55cd49ccbf0b..e63623251d61 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4010,7 +4010,8 @@ out_unlock:
 	return ret;
 }
 
-static int iwl_mvm_cancel_roc(struct ieee80211_hw *hw)
+static int iwl_mvm_cancel_roc(struct ieee80211_hw *hw,
+			      struct ieee80211_vif *vif)
 {
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c4611eb4eb86..0869f924e60c 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2216,7 +2216,8 @@ static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
 	return 0;
 }
 
-static int mac80211_hwsim_croc(struct ieee80211_hw *hw)
+static int mac80211_hwsim_croc(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif)
 {
 	struct mac80211_hwsim_data *hwsim = hw->priv;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 49df3bb08d41..ce5e92d82efc 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -1818,7 +1818,8 @@ out:
 	return status;
 }
 
-static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw)
+static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw,
+				   struct ieee80211_vif *vif)
 {
 	struct rsi_hw *adapter = hw->priv;
 	struct rsi_common *common = adapter->priv;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index b74dc8bc9755..547ad538d8b6 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5749,7 +5749,8 @@ static void wlcore_roc_complete_work(struct work_struct *work)
 		ieee80211_remain_on_channel_expired(wl->hw);
 }
 
-static int wlcore_op_cancel_remain_on_channel(struct ieee80211_hw *hw)
+static int wlcore_op_cancel_remain_on_channel(struct ieee80211_hw *hw,
+					      struct ieee80211_vif *vif)
 {
 	struct wl1271 *wl = hw->priv;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d26da013f7c0..e39bf85ae4c2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3914,7 +3914,8 @@ struct ieee80211_ops {
 				 struct ieee80211_channel *chan,
 				 int duration,
 				 enum ieee80211_roc_type type);
-	int (*cancel_remain_on_channel)(struct ieee80211_hw *hw);
+	int (*cancel_remain_on_channel)(struct ieee80211_hw *hw,
+					struct ieee80211_vif *vif);
 	int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx);
 	void (*get_ringparam)(struct ieee80211_hw *hw,
 			      u32 *tx, u32 *tx_max, u32 *rx, u32 *rx_max);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c2d8b5451a5e..2c9b3eb8b652 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -692,14 +692,16 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local,
 	return ret;
 }
 
-static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local)
+static inline int
+drv_cancel_remain_on_channel(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *sdata)
 {
 	int ret;
 
 	might_sleep();
 
-	trace_drv_cancel_remain_on_channel(local);
-	ret = local->ops->cancel_remain_on_channel(&local->hw);
+	trace_drv_cancel_remain_on_channel(local, sdata);
+	ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif);
 	trace_drv_return_int(local, ret);
 
 	return ret;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 60ef8972b254..c710504ccf1a 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -8,6 +8,7 @@
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2019 Intel Corporation
  */
 #include <linux/export.h>
 #include <net/mac80211.h>
@@ -732,7 +733,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local,
 	}
 
 	if (local->ops->remain_on_channel) {
-		ret = drv_cancel_remain_on_channel(local);
+		ret = drv_cancel_remain_on_channel(local, roc->sdata);
 		if (WARN_ON_ONCE(ret)) {
 			mutex_unlock(&local->mtx);
 			return ret;
@@ -991,7 +992,7 @@ void ieee80211_roc_purge(struct ieee80211_local *local,
 		if (roc->started) {
 			if (local->ops->remain_on_channel) {
 				/* can race, so ignore return value */
-				drv_cancel_remain_on_channel(local);
+				drv_cancel_remain_on_channel(local, sdata);
 				ieee80211_roc_notify_destroy(roc);
 			} else {
 				roc->abort = true;
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 3bb4459b52c7..4768322dc202 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -1242,9 +1242,10 @@ TRACE_EVENT(drv_remain_on_channel,
 	)
 );
 
-DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel,
-	TP_PROTO(struct ieee80211_local *local),
-	TP_ARGS(local)
+DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+	TP_ARGS(local, sdata)
 );
 
 TRACE_EVENT(drv_set_ringparam,
-- 
cgit v1.2.3


From 612fcfd9b31f08858d2a2e1279adda367e1ade00 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 12 Jun 2019 16:26:58 +0200
Subject: mac80211: remove unused and unneeded remove_sta_debugfs callback

The remove_sta_debugfs callback in struct rate_control_ops is no longer
used by any driver, as there is no need for it (the debugfs directory is
already removed recursivly by the mac80211 core.)  Because no one needs
it, just remove it to keep anyone else from accidentally using it in the
future.

Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20190612142658.12792-5-gregkh@linuxfoundation.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h  | 1 -
 net/mac80211/rate.h     | 9 ---------
 net/mac80211/sta_info.c | 1 -
 3 files changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e39bf85ae4c2..6cc5b25edf9d 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5946,7 +5946,6 @@ struct rate_control_ops {
 
 	void (*add_sta_debugfs)(void *priv, void *priv_sta,
 				struct dentry *dir);
-	void (*remove_sta_debugfs)(void *priv, void *priv_sta);
 
 	u32 (*get_expected_throughput)(void *priv_sta);
 };
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 5d5348bc41ec..5397c6dad056 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -60,15 +60,6 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta)
 #endif
 }
 
-static inline void rate_control_remove_sta_debugfs(struct sta_info *sta)
-{
-#ifdef CONFIG_MAC80211_DEBUGFS
-	struct rate_control_ref *ref = sta->rate_ctrl;
-	if (ref && ref->ops->remove_sta_debugfs)
-		ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv);
-#endif
-}
-
 void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata);
 
 /* Get a reference to the rate control algorithm. If `name' is NULL, get the
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 95eb8220e2e4..fb6614f57cbc 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1065,7 +1065,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
 	cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL);
 	kfree(sinfo);
 
-	rate_control_remove_sta_debugfs(sta);
 	ieee80211_sta_debugfs_remove(sta);
 
 	cleanup_single_sta(sta);
-- 
cgit v1.2.3


From fb0e76abe34bd67756dbdf4d5982b7dc54afa1d8 Mon Sep 17 00:00:00 2001
From: Erik Stromdahl <erik.stromdahl@gmail.com>
Date: Mon, 17 Jun 2019 22:01:39 +0200
Subject: mac80211: add tx dequeue function for process context

Since ieee80211_tx_dequeue() must not be called with softirqs enabled
(i.e. from process context without proper disable of bottom halves),
we add a wrapper that disables bottom halves before calling
ieee80211_tx_dequeue()

The new function is named ieee80211_tx_dequeue_ni() just as all other
from-process-context versions found in mac80211.

The documentation of ieee80211_tx_dequeue() is also updated so it
mentions that the function should not be called from process context.

Signed-off-by: Erik Stromdahl <erik.stromdahl@gmail.com>
Link: https://lore.kernel.org/r/20190617200140.6189-1-erik.stromdahl@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 26 ++++++++++++++++++++++++++
 net/mac80211/tx.c      |  2 ++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6cc5b25edf9d..fbe1c29e3349 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6234,10 +6234,36 @@ void ieee80211_unreserve_tid(struct ieee80211_sta *sta, u8 tid);
  * but for the duration of the frame handling.
  * However, also note that while in the wake_tx_queue() method,
  * rcu_read_lock() is already held.
+ *
+ * softirqs must also be disabled when this function is called.
+ * In process context, use ieee80211_tx_dequeue_ni() instead.
  */
 struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 				     struct ieee80211_txq *txq);
 
+/**
+ * ieee80211_tx_dequeue_ni - dequeue a packet from a software tx queue
+ * (in process context)
+ *
+ * Like ieee80211_tx_dequeue() but can be called in process context
+ * (internally disables bottom halves).
+ *
+ * @hw: pointer as obtained from ieee80211_alloc_hw()
+ * @txq: pointer obtained from station or virtual interface, or from
+ *	ieee80211_next_txq()
+ */
+static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw,
+						      struct ieee80211_txq *txq)
+{
+	struct sk_buff *skb;
+
+	local_bh_disable();
+	skb = ieee80211_tx_dequeue(hw, txq);
+	local_bh_enable();
+
+	return skb;
+}
+
 /**
  * ieee80211_next_txq - get next tx queue to pull packets from
  *
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f13eb2f61ccf..fb8870d9eba3 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3546,6 +3546,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
 	ieee80211_tx_result r;
 	struct ieee80211_vif *vif = txq->vif;
 
+	WARN_ON_ONCE(softirq_count() == 0);
+
 begin:
 	spin_lock_bh(&fq->lock);
 
-- 
cgit v1.2.3


From 3e47bf1ca4c363ba8b1f99c4c3dcda13d2979954 Mon Sep 17 00:00:00 2001
From: Alexander Wetzel <alexander@wetzel-home.de>
Date: Sat, 29 Jun 2019 21:50:13 +0200
Subject: mac80211: Simplify Extended Key ID API

1) Drop IEEE80211_HW_EXT_KEY_ID_NATIVE and let drivers directly set
   the NL80211_EXT_FEATURE_EXT_KEY_ID flag.

2) Drop IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT and simply assume all
   drivers are unable to handle A-MPDU key borders.

The new Extended Key ID API now requires all mac80211 drivers to set
NL80211_EXT_FEATURE_EXT_KEY_ID when they implement set_key() and can
handle Extended Key ID. For drivers not providing set_key() mac80211
itself enables Extended Key ID support, using the internal SW crypto
services.

Signed-off-by: Alexander Wetzel <alexander@wetzel-home.de>
Link: https://lore.kernel.org/r/20190629195015.19680-2-alexander@wetzel-home.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  8 --------
 net/mac80211/debugfs.c |  2 --
 net/mac80211/key.c     | 18 ++++++++----------
 net/mac80211/main.c    | 18 ++++++------------
 4 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fbe1c29e3349..58941893a13f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2268,12 +2268,6 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID
  *	only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set.
  *
- * @IEEE80211_HW_EXT_KEY_ID_NATIVE: Driver and hardware are supporting Extended
- *	Key ID and can handle two unicast keys per station for Rx and Tx.
- *
- * @IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT: The card/driver can't handle
- *	active Tx A-MPDU sessions with Extended Key IDs during rekey.
- *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2325,8 +2319,6 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN,
 	IEEE80211_HW_SUPPORTS_MULTI_BSSID,
 	IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
-	IEEE80211_HW_EXT_KEY_ID_NATIVE,
-	IEEE80211_HW_NO_AMPDU_KEYBORDER_SUPPORT,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2e7f75938c51..47435f57e086 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -271,8 +271,6 @@ static const char *hw_flag_names[] = {
 	FLAG(TX_STATUS_NO_AMPDU_LEN),
 	FLAG(SUPPORTS_MULTI_BSSID),
 	FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
-	FLAG(EXT_KEY_ID_NATIVE),
-	FLAG(NO_AMPDU_KEYBORDER_SUPPORT),
 #undef FLAG
 };
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index dd60f6428049..92c3affb0eb0 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -270,8 +270,7 @@ int ieee80211_set_tx_key(struct ieee80211_key *key)
 
 	sta->ptk_idx = key->conf.keyidx;
 
-	if (ieee80211_hw_check(&local->hw, NO_AMPDU_KEYBORDER_SUPPORT))
-		clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
+	clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
 	ieee80211_check_fast_xmit(sta);
 
 	return 0;
@@ -289,16 +288,15 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
 	if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) {
 		/* Extended Key ID key install, initial one or rekey */
 
-		if (sta->ptk_idx != INVALID_PTK_KEYIDX &&
-		    ieee80211_hw_check(&local->hw,
-				       NO_AMPDU_KEYBORDER_SUPPORT)) {
+		if (sta->ptk_idx != INVALID_PTK_KEYIDX) {
 			/* Aggregation Sessions with Extended Key ID must not
 			 * mix MPDUs with different keyIDs within one A-MPDU.
-			 * Tear down any running Tx aggregation and all new
-			 * Rx/Tx aggregation request during rekey if the driver
-			 * asks us to do so. (Blocking Tx only would be
-			 * sufficient but WLAN_STA_BLOCK_BA gets the job done
-			 * for the few ms we need it.)
+			 * Tear down running Tx aggregation sessions and block
+			 * new Rx/Tx aggregation requests during rekey to
+			 * ensure there are no A-MPDUs for the driver to
+			 * aggregate. (Blocking Tx only would be sufficient but
+			 * WLAN_STA_BLOCK_BA gets the job done for the few ms
+			 * we need it.)
 			 */
 			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
 			mutex_lock(&sta->ampdu_mlme.mtx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4c2702f128f3..29b9d57df1a3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1048,21 +1048,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
-	/* Enable Extended Key IDs when driver allowed it, or when it
-	 * supports neither HW crypto nor A-MPDUs
+	/* Mac80211 and therefore all drivers using SW crypto only
+	 * are able to handle PTK rekeys and Extended Key ID.
 	 */
-	if ((!local->ops->set_key &&
-	     !ieee80211_hw_check(hw, AMPDU_AGGREGATION)) ||
-	    ieee80211_hw_check(&local->hw, EXT_KEY_ID_NATIVE))
-		wiphy_ext_feature_set(local->hw.wiphy,
-				      NL80211_EXT_FEATURE_EXT_KEY_ID);
-
-	/* Mac80211 and therefore all cards only using SW crypto are able to
-	 * handle PTK rekeys correctly
-	 */
-	if (!local->ops->set_key)
+	if (!local->ops->set_key) {
 		wiphy_ext_feature_set(local->hw.wiphy,
 				      NL80211_EXT_FEATURE_CAN_REPLACE_PTK0);
+		wiphy_ext_feature_set(local->hw.wiphy,
+				      NL80211_EXT_FEATURE_EXT_KEY_ID);
+	}
 
 	/*
 	 * Calculate scan IE length -- we need this to alloc
-- 
cgit v1.2.3


From dc3998ec5cf2d377f2e85ba16b6a15affec98a0a Mon Sep 17 00:00:00 2001
From: Alexander Wetzel <alexander@wetzel-home.de>
Date: Sat, 29 Jun 2019 21:50:14 +0200
Subject: mac80211: AMPDU handling for rekeys with Extended Key ID

Extended Key ID allows A-MPDU sessions while rekeying as long as each
A-MPDU aggregates only MPDUs with one keyid together.

Drivers able to segregate MPDUs accordingly can tell mac80211 to not
stop A-MPDU sessions when rekeying by setting the new flag
IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT.

Signed-off-by: Alexander Wetzel <alexander@wetzel-home.de>
Link: https://lore.kernel.org/r/20190629195015.19680-3-alexander@wetzel-home.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h |  5 +++++
 net/mac80211/debugfs.c |  1 +
 net/mac80211/key.c     | 14 ++++++++------
 3 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 58941893a13f..0187d84031fc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2268,6 +2268,10 @@ struct ieee80211_txq {
  * @IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID: Hardware supports multi BSSID
  *	only for HE APs. Applies if @IEEE80211_HW_SUPPORTS_MULTI_BSSID is set.
  *
+ * @IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT: The card and driver is only
+ *	aggregating MPDUs with the same keyid, allowing mac80211 to keep Tx
+ *	A-MPDU sessions active while rekeying with Extended Key ID.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2319,6 +2323,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_TX_STATUS_NO_AMPDU_LEN,
 	IEEE80211_HW_SUPPORTS_MULTI_BSSID,
 	IEEE80211_HW_SUPPORTS_ONLY_HE_MULTI_BSSID,
+	IEEE80211_HW_AMPDU_KEYBORDER_SUPPORT,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 47435f57e086..568b3b276931 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -271,6 +271,7 @@ static const char *hw_flag_names[] = {
 	FLAG(TX_STATUS_NO_AMPDU_LEN),
 	FLAG(SUPPORTS_MULTI_BSSID),
 	FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID),
+	FLAG(AMPDU_KEYBORDER_SUPPORT),
 #undef FLAG
 };
 
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 92c3affb0eb0..7dfee848abac 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -270,7 +270,8 @@ int ieee80211_set_tx_key(struct ieee80211_key *key)
 
 	sta->ptk_idx = key->conf.keyidx;
 
-	clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
+	if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT))
+		clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
 	ieee80211_check_fast_xmit(sta);
 
 	return 0;
@@ -288,15 +289,16 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old,
 	if (new->conf.flags & IEEE80211_KEY_FLAG_NO_AUTO_TX) {
 		/* Extended Key ID key install, initial one or rekey */
 
-		if (sta->ptk_idx != INVALID_PTK_KEYIDX) {
+		if (sta->ptk_idx != INVALID_PTK_KEYIDX &&
+		    !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) {
 			/* Aggregation Sessions with Extended Key ID must not
 			 * mix MPDUs with different keyIDs within one A-MPDU.
 			 * Tear down running Tx aggregation sessions and block
 			 * new Rx/Tx aggregation requests during rekey to
-			 * ensure there are no A-MPDUs for the driver to
-			 * aggregate. (Blocking Tx only would be sufficient but
-			 * WLAN_STA_BLOCK_BA gets the job done for the few ms
-			 * we need it.)
+			 * ensure there are no A-MPDUs when the driver is not
+			 * supporting A-MPDU key borders. (Blocking Tx only
+			 * would be sufficient but WLAN_STA_BLOCK_BA gets the
+			 * job done for the few ms we need it.)
 			 */
 			set_sta_flag(sta, WLAN_STA_BLOCK_BA);
 			mutex_lock(&sta->ampdu_mlme.mtx);
-- 
cgit v1.2.3


From 2aa485e1148557215337731b2c79f5569edcbbab Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Sat, 13 Jul 2019 18:36:41 +0200
Subject: mac80211: add support for parsing ADDBA_EXT IEs

ADDBA_EXT IEs can be used to negotiate the BA fragmentation level.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190713163642.18491-2-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 8 ++++++++
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/util.c        | 7 +++++++
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8511fadc0935..f36144eda5d6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -881,6 +881,14 @@ struct ieee80211_tpc_report_ie {
 	u8 link_margin;
 } __packed;
 
+#define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK	GENMASK(2, 1)
+#define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT	1
+#define IEEE80211_ADDBA_EXT_NO_FRAG		BIT(0)
+
+struct ieee80211_addba_ext_ie {
+	u8 data;
+} __packed;
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 004e2e3adb88..c67da3575e74 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1506,6 +1506,7 @@ struct ieee802_11_elems {
 	u8 max_bssid_indicator;
 	u8 dtim_count;
 	u8 dtim_period;
+	const struct ieee80211_addba_ext_ie *addba_ext_ie;
 
 	/* length of them, respectively */
 	u8 ext_capab_len;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1b224fa27367..3441558ef2d2 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1200,6 +1200,13 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 
 			elems->cisco_dtpc_elem = pos;
 			break;
+		case WLAN_EID_ADDBA_EXT:
+			if (elen != sizeof(struct ieee80211_addba_ext_ie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->addba_ext_ie = (void *)pos;
+			break;
 		case WLAN_EID_TIMEOUT_INTERVAL:
 			if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
 				elems->timeout_int = (void *)pos;
-- 
cgit v1.2.3


From cbe77dde4757446bbe333299b0c91d48b8d575a2 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Sun, 14 Jul 2019 17:44:14 +0200
Subject: mac80211: add xmit rate to struct ieee80211_tx_status

Right now struct ieee80211_tx_rate cannot hold HE rates. Lets use
struct ieee80211_tx_status instead. This will also make the code
future-proof for when we have EHT.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190714154419.11854-2-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0187d84031fc..6fe4381ba0ef 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1058,11 +1058,13 @@ struct ieee80211_tx_info {
  * @sta: Station that the packet was transmitted for
  * @info: Basic tx status information
  * @skb: Packet skb (can be NULL if not provided by the driver)
+ * @rate: The TX rate that was used when sending the packet
  */
 struct ieee80211_tx_status {
 	struct ieee80211_sta *sta;
 	struct ieee80211_tx_info *info;
 	struct sk_buff *skb;
+	struct rate_info *rate;
 };
 
 /**
-- 
cgit v1.2.3


From e6f4051123fd33901e9655a675b22aefcdc5d277 Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Mon, 22 Jul 2019 12:44:50 +0530
Subject: {nl,mac}80211: fix interface combinations on crypto controlled
 devices

Commit 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on
crypto controlled devices") has introduced a change which allows
4addr operation on crypto controlled devices (ex: ath10k). This
change has inadvertently impacted the interface combinations logic
on such devices.

General rule is that software interfaces like AP/VLAN should not be
listed under supported interface combinations and should not be
considered during validation of these combinations; because of the
aforementioned change, AP/VLAN interfaces(if present) will be checked
against interfaces supported by the device and blocks valid interface
combinations.

Consider a case where an AP and AP/VLAN are up and running; when a
second AP device is brought up on the same physical device, this AP
will be checked against the AP/VLAN interface (which will not be
part of supported interface combinations of the device) and blocks
second AP to come up.

Add a new API cfg80211_iftype_allowed() to fix the problem, this
API works for all devices with/without SW crypto control.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Fixes: 33d915d9e8ce ("{nl,mac}80211: allow 4addr AP operation on crypto controlled devices")
Link: https://lore.kernel.org/r/1563779690-9716-1-git-send-email-mpubbise@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 15 +++++++++++++++
 net/mac80211/util.c    |  7 +++----
 net/wireless/core.c    |  6 ++----
 net/wireless/nl80211.c |  4 +---
 net/wireless/util.c    | 27 +++++++++++++++++++++++++--
 5 files changed, 46 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45850a8391d9..26e2ad2c7027 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -7320,6 +7320,21 @@ void cfg80211_pmsr_complete(struct wireless_dev *wdev,
 			    struct cfg80211_pmsr_request *req,
 			    gfp_t gfp);
 
+/**
+ * cfg80211_iftype_allowed - check whether the interface can be allowed
+ * @wiphy: the wiphy
+ * @iftype: interface type
+ * @is_4addr: use_4addr flag, must be '0' when check_swif is '1'
+ * @check_swif: check iftype against software interfaces
+ *
+ * Check whether the interface is allowed to operate; additionally, this API
+ * can be used to check iftype against the software interfaces when
+ * check_swif is '1'.
+ */
+bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
+			     bool is_4addr, u8 check_swif);
+
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1b224fa27367..ad1e58184c4e 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3796,9 +3796,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
 	}
 
 	/* Always allow software iftypes */
-	if (local->hw.wiphy->software_iftypes & BIT(iftype) ||
-	    (iftype == NL80211_IFTYPE_AP_VLAN &&
-	     local->hw.wiphy->flags & WIPHY_FLAG_4ADDR_AP)) {
+	if (cfg80211_iftype_allowed(local->hw.wiphy, iftype, 0, 1)) {
 		if (radar_detect)
 			return -EINVAL;
 		return 0;
@@ -3833,7 +3831,8 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
 
 		if (sdata_iter == sdata ||
 		    !ieee80211_sdata_running(sdata_iter) ||
-		    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
+		    cfg80211_iftype_allowed(local->hw.wiphy,
+					    wdev_iter->iftype, 0, 1))
 			continue;
 
 		params.iftype_num[wdev_iter->iftype]++;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 45d9afcff6d5..32b3c719fdfc 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1410,10 +1410,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 		}
 		break;
 	case NETDEV_PRE_UP:
-		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)) &&
-		    !(wdev->iftype == NL80211_IFTYPE_AP_VLAN &&
-		      rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP &&
-		      wdev->use_4addr))
+		if (!cfg80211_iftype_allowed(wdev->wiphy, wdev->iftype,
+					     wdev->use_4addr, 0))
 			return notifier_from_errno(-EOPNOTSUPP);
 
 		if (rfkill_blocked(rdev->rfkill))
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fc83dd179c1a..fd05ae1437a9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3484,9 +3484,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 			return err;
 	}
 
-	if (!(rdev->wiphy.interface_modes & (1 << type)) &&
-	    !(type == NL80211_IFTYPE_AP_VLAN && params.use_4addr &&
-	      rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP))
+	if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0))
 		return -EOPNOTSUPP;
 
 	err = nl80211_parse_mon_options(rdev, type, info, &params);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1c39d6a2e850..d0e35b7b9e35 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1697,7 +1697,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 	for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
 		num_interfaces += params->iftype_num[iftype];
 		if (params->iftype_num[iftype] > 0 &&
-		    !(wiphy->software_iftypes & BIT(iftype)))
+		    !cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
 			used_iftypes |= BIT(iftype);
 	}
 
@@ -1719,7 +1719,7 @@ int cfg80211_iter_combinations(struct wiphy *wiphy,
 			return -ENOMEM;
 
 		for (iftype = 0; iftype < NUM_NL80211_IFTYPES; iftype++) {
-			if (wiphy->software_iftypes & BIT(iftype))
+			if (cfg80211_iftype_allowed(wiphy, iftype, 0, 1))
 				continue;
 			for (j = 0; j < c->n_limits; j++) {
 				all_iftypes |= limits[j].types;
@@ -2072,3 +2072,26 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 	return max_vht_nss;
 }
 EXPORT_SYMBOL(ieee80211_get_vht_max_nss);
+
+bool cfg80211_iftype_allowed(struct wiphy *wiphy, enum nl80211_iftype iftype,
+			     bool is_4addr, u8 check_swif)
+
+{
+	bool is_vlan = iftype == NL80211_IFTYPE_AP_VLAN;
+
+	switch (check_swif) {
+	case 0:
+		if (is_vlan && is_4addr)
+			return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+		return wiphy->interface_modes & BIT(iftype);
+	case 1:
+		if (!(wiphy->software_iftypes & BIT(iftype)) && is_vlan)
+			return wiphy->flags & WIPHY_FLAG_4ADDR_AP;
+		return wiphy->software_iftypes & BIT(iftype);
+	default:
+		break;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(cfg80211_iftype_allowed);
-- 
cgit v1.2.3


From 91b05a7e7d8033a90a64f5fc0e3808db423e420a Mon Sep 17 00:00:00 2001
From: Ondrej Mosnacek <omosnace@redhat.com>
Date: Tue, 9 Jul 2019 13:11:24 +0200
Subject: crypto: user - make NETLINK_CRYPTO work inside netns

Currently, NETLINK_CRYPTO works only in the init network namespace. It
doesn't make much sense to cut it out of the other network namespaces,
so do the minor plumbing work necessary to make it work in any network
namespace. Code inspired by net/core/sock_diag.c.

Tested using kcapi-dgst from libkcapi [1]:
Before:
    # unshare -n kcapi-dgst -c sha256 </dev/null | wc -c
    libkcapi - Error: Netlink error: sendmsg failed
    libkcapi - Error: Netlink error: sendmsg failed
    libkcapi - Error: NETLINK_CRYPTO: cannot obtain cipher information for hmac(sha512) (is required crypto_user.c patch missing? see documentation)
    0

After:
    # unshare -n kcapi-dgst -c sha256 </dev/null | wc -c
    32

[1] https://github.com/smuellerDD/libkcapi

Signed-off-by: Ondrej Mosnacek <omosnace@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user_base.c            | 37 ++++++++++++++++++++++++------------
 crypto/crypto_user_stat.c            |  4 +++-
 include/crypto/internal/cryptouser.h |  2 --
 include/net/net_namespace.h          |  3 +++
 4 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index c65e39005ce2..910e0b46012e 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -10,9 +10,10 @@
 #include <linux/crypto.h>
 #include <linux/cryptouser.h>
 #include <linux/sched.h>
-#include <net/netlink.h>
 #include <linux/security.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
+#include <net/sock.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
@@ -25,9 +26,6 @@
 
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
-/* The crypto netlink socket */
-struct sock *crypto_nlsk;
-
 struct crypto_dump_info {
 	struct sk_buff *in_skb;
 	struct sk_buff *out_skb;
@@ -186,6 +184,7 @@ out:
 static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 			 struct nlattr **attrs)
 {
+	struct net *net = sock_net(in_skb->sk);
 	struct crypto_user_alg *p = nlmsg_data(in_nlh);
 	struct crypto_alg *alg;
 	struct sk_buff *skb;
@@ -217,7 +216,7 @@ drop_alg:
 	if (err)
 		return err;
 
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
 
 static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
@@ -420,6 +419,7 @@ static const struct crypto_link {
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nlattr *attrs[CRYPTOCFGA_MAX+1];
 	const struct crypto_link *link;
 	int type, err;
@@ -450,7 +450,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 				.done = link->done,
 				.min_dump_alloc = min(dump_alloc, 65535UL),
 			};
-			err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+			err = netlink_dump_start(net->crypto_nlsk, skb, nlh, &c);
 		}
 
 		return err;
@@ -474,22 +474,35 @@ static void crypto_netlink_rcv(struct sk_buff *skb)
 	mutex_unlock(&crypto_cfg_mutex);
 }
 
-static int __init crypto_user_init(void)
+static int __net_init crypto_netlink_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input	= crypto_netlink_rcv,
 	};
 
-	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg);
-	if (!crypto_nlsk)
-		return -ENOMEM;
+	net->crypto_nlsk = netlink_kernel_create(net, NETLINK_CRYPTO, &cfg);
+	return net->crypto_nlsk == NULL ? -ENOMEM : 0;
+}
 
-	return 0;
+static void __net_exit crypto_netlink_exit(struct net *net)
+{
+	netlink_kernel_release(net->crypto_nlsk);
+	net->crypto_nlsk = NULL;
+}
+
+static struct pernet_operations crypto_netlink_net_ops = {
+	.init = crypto_netlink_init,
+	.exit = crypto_netlink_exit,
+};
+
+static int __init crypto_user_init(void)
+{
+	return register_pernet_subsys(&crypto_netlink_net_ops);
 }
 
 static void __exit crypto_user_exit(void)
 {
-	netlink_kernel_release(crypto_nlsk);
+	unregister_pernet_subsys(&crypto_netlink_net_ops);
 }
 
 module_init(crypto_user_init);
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index a03f326a63d3..8bad88413de1 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -10,6 +10,7 @@
 #include <linux/cryptouser.h>
 #include <linux/sched.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
@@ -298,6 +299,7 @@ out:
 int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		      struct nlattr **attrs)
 {
+	struct net *net = sock_net(in_skb->sk);
 	struct crypto_user_alg *p = nlmsg_data(in_nlh);
 	struct crypto_alg *alg;
 	struct sk_buff *skb;
@@ -329,7 +331,7 @@ drop_alg:
 	if (err)
 		return err;
 
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
index 8c602b187c58..40623f4457df 100644
--- a/include/crypto/internal/cryptouser.h
+++ b/include/crypto/internal/cryptouser.h
@@ -1,8 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <net/netlink.h>
 
-extern struct sock *crypto_nlsk;
-
 struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
 
 #ifdef CONFIG_CRYPTO_STATS
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 4a9da951a794..85bc1de5dece 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -170,6 +170,9 @@ struct net {
 #endif
 #ifdef CONFIG_XDP_SOCKETS
 	struct netns_xdp	xdp;
+#endif
+#if IS_ENABLED(CONFIG_CRYPTO_USER)
+	struct sock		*crypto_nlsk;
 #endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
-- 
cgit v1.2.3


From 1f961807925032daa90267d8a23ff730e7ede07a Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Thu, 25 Jul 2019 17:56:44 -0700
Subject: mm/hmm: replace hmm_update with mmu_notifier_range

The hmm_mirror_ops callback function sync_cpu_device_pagetables() passes a
struct hmm_update which is a simplified version of struct
mmu_notifier_range. This is unnecessary so replace hmm_update with
mmu_notifier_range directly.

Link: https://lore.kernel.org/r/20190726005650.2566-2-rcampbell@nvidia.com
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
[jgg: white space tuning]
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 14 ++++++++------
 drivers/gpu/drm/nouveau/nouveau_svm.c  |  4 ++--
 include/linux/hmm.h                    | 34 ++++++----------------------------
 mm/hmm.c                               | 13 ++++---------
 4 files changed, 20 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 3971c201f320..b698b423b25d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -195,13 +195,14 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
-			const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
+			      const struct mmu_notifier_range *update)
 {
 	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 	unsigned long start = update->start;
 	unsigned long end = update->end;
-	bool blockable = update->blockable;
+	bool blockable = mmu_notifier_range_blockable(update);
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
@@ -243,13 +244,14 @@ static int amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static int amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
-			const struct hmm_update *update)
+static int
+amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
+			      const struct mmu_notifier_range *update)
 {
 	struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
 	unsigned long start = update->start;
 	unsigned long end = update->end;
-	bool blockable = update->blockable;
+	bool blockable = mmu_notifier_range_blockable(update);
 	struct interval_tree_node *it;
 
 	/* notification is exclusive, but interval is inclusive */
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 545100f7c594..79b29c918717 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -252,13 +252,13 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
 
 static int
 nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
-					const struct hmm_update *update)
+					const struct mmu_notifier_range *update)
 {
 	struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
 	unsigned long start = update->start;
 	unsigned long limit = update->end;
 
-	if (!update->blockable)
+	if (!mmu_notifier_range_blockable(update))
 		return -EAGAIN;
 
 	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 9f32586684c9..4f870f7017cb 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -340,29 +340,6 @@ static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
 
 struct hmm_mirror;
 
-/*
- * enum hmm_update_event - type of update
- * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
- */
-enum hmm_update_event {
-	HMM_UPDATE_INVALIDATE,
-};
-
-/*
- * struct hmm_update - HMM update information for callback
- *
- * @start: virtual start address of the range to update
- * @end: virtual end address of the range to update
- * @event: event triggering the update (what is happening)
- * @blockable: can the callback block/sleep ?
- */
-struct hmm_update {
-	unsigned long start;
-	unsigned long end;
-	enum hmm_update_event event;
-	bool blockable;
-};
-
 /*
  * struct hmm_mirror_ops - HMM mirror device operations callback
  *
@@ -383,9 +360,9 @@ struct hmm_mirror_ops {
 	/* sync_cpu_device_pagetables() - synchronize page tables
 	 *
 	 * @mirror: pointer to struct hmm_mirror
-	 * @update: update information (see struct hmm_update)
-	 * Return: -EAGAIN if update.blockable false and callback need to
-	 *          block, 0 otherwise.
+	 * @update: update information (see struct mmu_notifier_range)
+	 * Return: -EAGAIN if mmu_notifier_range_blockable(update) is false
+	 * and callback needs to block, 0 otherwise.
 	 *
 	 * This callback ultimately originates from mmu_notifiers when the CPU
 	 * page table is updated. The device driver must update its page table
@@ -396,8 +373,9 @@ struct hmm_mirror_ops {
 	 * page tables are completely updated (TLBs flushed, etc); this is a
 	 * synchronous call.
 	 */
-	int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-					  const struct hmm_update *update);
+	int (*sync_cpu_device_pagetables)(
+		struct hmm_mirror *mirror,
+		const struct mmu_notifier_range *update);
 };
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index 54b3a4162ae9..4040b4427635 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -165,7 +165,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 {
 	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 	struct hmm_mirror *mirror;
-	struct hmm_update update;
 	struct hmm_range *range;
 	unsigned long flags;
 	int ret = 0;
@@ -173,15 +172,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 	if (!kref_get_unless_zero(&hmm->kref))
 		return 0;
 
-	update.start = nrange->start;
-	update.end = nrange->end;
-	update.event = HMM_UPDATE_INVALIDATE;
-	update.blockable = mmu_notifier_range_blockable(nrange);
-
 	spin_lock_irqsave(&hmm->ranges_lock, flags);
 	hmm->notifiers++;
 	list_for_each_entry(range, &hmm->ranges, list) {
-		if (update.end < range->start || update.start >= range->end)
+		if (nrange->end < range->start || nrange->start >= range->end)
 			continue;
 
 		range->valid = false;
@@ -198,9 +192,10 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 	list_for_each_entry(mirror, &hmm->mirrors, list) {
 		int rc;
 
-		rc = mirror->ops->sync_cpu_device_pagetables(mirror, &update);
+		rc = mirror->ops->sync_cpu_device_pagetables(mirror, nrange);
 		if (rc) {
-			if (WARN_ON(update.blockable || rc != -EAGAIN))
+			if (WARN_ON(mmu_notifier_range_blockable(nrange) ||
+			    rc != -EAGAIN))
 				continue;
 			ret = -EAGAIN;
 			break;
-- 
cgit v1.2.3


From 9a4903e49e495bfd2650862dfae4178bebe4db9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 25 Jul 2019 17:56:46 -0700
Subject: mm/hmm: replace the block argument to hmm_range_fault with a flags
 value

This allows easier expansion to other flags, and also makes the callers a
little easier to read.

Link: https://lore.kernel.org/r/20190726005650.2566-4-rcampbell@nvidia.com
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  2 +-
 drivers/gpu/drm/nouveau/nouveau_svm.c   |  2 +-
 include/linux/hmm.h                     | 11 ++++-
 mm/hmm.c                                | 74 ++++++++++++++++-----------------
 4 files changed, 48 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e51b48ac48eb..12a59ac83f72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -832,7 +832,7 @@ retry:
 
 	down_read(&mm->mmap_sem);
 
-	r = hmm_range_fault(range, true);
+	r = hmm_range_fault(range, 0);
 	if (unlikely(r < 0)) {
 		if (likely(r == -EAGAIN)) {
 			/*
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 79b29c918717..49b520c60fc5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -505,7 +505,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
 		return -EBUSY;
 	}
 
-	ret = hmm_range_fault(range, true);
+	ret = hmm_range_fault(range, 0);
 	if (ret <= 0) {
 		if (ret == 0)
 			ret = -EBUSY;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 4f870f7017cb..89a141060e5b 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -407,12 +407,19 @@ int hmm_range_register(struct hmm_range *range,
 		       unsigned long end,
 		       unsigned page_shift);
 void hmm_range_unregister(struct hmm_range *range);
+
+/*
+ * Retry fault if non-blocking, drop mmap_sem and return -EAGAIN in that case.
+ */
+#define HMM_FAULT_ALLOW_RETRY		(1 << 0)
+
 long hmm_range_snapshot(struct hmm_range *range);
-long hmm_range_fault(struct hmm_range *range, bool block);
+long hmm_range_fault(struct hmm_range *range, unsigned int flags);
+
 long hmm_range_dma_map(struct hmm_range *range,
 		       struct device *device,
 		       dma_addr_t *daddrs,
-		       bool block);
+		       unsigned int flags);
 long hmm_range_dma_unmap(struct hmm_range *range,
 			 struct vm_area_struct *vma,
 			 struct device *device,
diff --git a/mm/hmm.c b/mm/hmm.c
index 362944b0fbca..84f2791d3510 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -281,7 +281,7 @@ struct hmm_vma_walk {
 	struct dev_pagemap	*pgmap;
 	unsigned long		last;
 	bool			fault;
-	bool			block;
+	unsigned int		flags;
 };
 
 static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
@@ -293,8 +293,11 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
 	struct vm_area_struct *vma = walk->vma;
 	vm_fault_t ret;
 
-	flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-	flags |= write_fault ? FAULT_FLAG_WRITE : 0;
+	if (hmm_vma_walk->flags & HMM_FAULT_ALLOW_RETRY)
+		flags |= FAULT_FLAG_ALLOW_RETRY;
+	if (write_fault)
+		flags |= FAULT_FLAG_WRITE;
+
 	ret = handle_mm_fault(vma, addr, flags);
 	if (ret & VM_FAULT_RETRY) {
 		/* Note, handle_mm_fault did up_read(&mm->mmap_sem)) */
@@ -1012,26 +1015,26 @@ long hmm_range_snapshot(struct hmm_range *range)
 }
 EXPORT_SYMBOL(hmm_range_snapshot);
 
-/*
- * hmm_range_fault() - try to fault some address in a virtual address range
- * @range: range being faulted
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of valid pages in range->pfns[] (from range start
- *          address). This may be zero. If the return value is negative,
- *          then one of the following values may be returned:
+/**
+ * hmm_range_fault - try to fault some address in a virtual address range
+ * @range:	range being faulted
+ * @flags:	HMM_FAULT_* flags
  *
- *           -EINVAL  invalid arguments or mm or virtual address are in an
- *                    invalid vma (for instance device file vma).
- *           -ENOMEM: Out of memory.
- *           -EPERM:  Invalid permission (for instance asking for write and
- *                    range is read only).
- *           -EAGAIN: If you need to retry and mmap_sem was drop. This can only
- *                    happens if block argument is false.
- *           -EBUSY:  If the the range is being invalidated and you should wait
- *                    for invalidation to finish.
- *           -EFAULT: Invalid (ie either no valid vma or it is illegal to access
- *                    that range), number of valid pages in range->pfns[] (from
- *                    range start address).
+ * Return: the number of valid pages in range->pfns[] (from range start
+ * address), which may be zero.  On error one of the following status codes
+ * can be returned:
+ *
+ * -EINVAL:	Invalid arguments or mm or virtual address is in an invalid vma
+ *		(e.g., device file vma).
+ * -ENOMEM:	Out of memory.
+ * -EPERM:	Invalid permission (e.g., asking for write and range is read
+ *		only).
+ * -EAGAIN:	A page fault needs to be retried and mmap_sem was dropped.
+ * -EBUSY:	The range has been invalidated and the caller needs to wait for
+ *		the invalidation to finish.
+ * -EFAULT:	Invalid (i.e., either no valid vma or it is illegal to access
+ *		that range) number of valid pages in range->pfns[] (from
+ *              range start address).
  *
  * This is similar to a regular CPU page fault except that it will not trigger
  * any memory migration if the memory being faulted is not accessible by CPUs
@@ -1040,7 +1043,7 @@ EXPORT_SYMBOL(hmm_range_snapshot);
  * On error, for one virtual address in the range, the function will mark the
  * corresponding HMM pfn entry with an error flag.
  */
-long hmm_range_fault(struct hmm_range *range, bool block)
+long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
 	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
 	unsigned long start = range->start, end;
@@ -1086,7 +1089,7 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 		hmm_vma_walk.pgmap = NULL;
 		hmm_vma_walk.last = start;
 		hmm_vma_walk.fault = true;
-		hmm_vma_walk.block = block;
+		hmm_vma_walk.flags = flags;
 		hmm_vma_walk.range = range;
 		mm_walk.private = &hmm_vma_walk;
 		end = min(range->end, vma->vm_end);
@@ -1125,25 +1128,22 @@ long hmm_range_fault(struct hmm_range *range, bool block)
 EXPORT_SYMBOL(hmm_range_fault);
 
 /**
- * hmm_range_dma_map() - hmm_range_fault() and dma map page all in one.
- * @range: range being faulted
- * @device: device against to dma map page to
- * @daddrs: dma address of mapped pages
- * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
- * Return: number of pages mapped on success, -EAGAIN if mmap_sem have been
- *          drop and you need to try again, some other error value otherwise
+ * hmm_range_dma_map - hmm_range_fault() and dma map page all in one.
+ * @range:	range being faulted
+ * @device:	device to map page to
+ * @daddrs:	array of dma addresses for the mapped pages
+ * @flags:	HMM_FAULT_*
  *
- * Note same usage pattern as hmm_range_fault().
+ * Return: the number of pages mapped on success (including zero), or any
+ * status return from hmm_range_fault() otherwise.
  */
-long hmm_range_dma_map(struct hmm_range *range,
-		       struct device *device,
-		       dma_addr_t *daddrs,
-		       bool block)
+long hmm_range_dma_map(struct hmm_range *range, struct device *device,
+		dma_addr_t *daddrs, unsigned int flags)
 {
 	unsigned long i, npages, mapped;
 	long ret;
 
-	ret = hmm_range_fault(range, block);
+	ret = hmm_range_fault(range, flags);
 	if (ret <= 0)
 		return ret ? ret : -EBUSY;
 
-- 
cgit v1.2.3


From d45d464b118f428229d91769c8a3cc1e2e0bb4d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 25 Jul 2019 17:56:47 -0700
Subject: mm/hmm: merge hmm_range_snapshot into hmm_range_fault

Add a HMM_FAULT_SNAPSHOT flag so that hmm_range_snapshot can be merged
into the almost identical hmm_range_fault function.

Link: https://lore.kernel.org/r/20190726005650.2566-5-rcampbell@nvidia.com
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst | 17 +++++-----
 include/linux/hmm.h      |  4 ++-
 mm/hmm.c                 | 85 ++----------------------------------------------
 3 files changed, 13 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index 710ce1c701bf..ddcb5ca8b296 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -192,15 +192,14 @@ read only, or fully unmap, etc.). The device must complete the update before
 the driver callback returns.
 
 When the device driver wants to populate a range of virtual addresses, it can
-use either::
+use::
 
-  long hmm_range_snapshot(struct hmm_range *range);
-  long hmm_range_fault(struct hmm_range *range, bool block);
+  long hmm_range_fault(struct hmm_range *range, unsigned int flags);
 
-The first one (hmm_range_snapshot()) will only fetch present CPU page table
+With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table
 entries and will not trigger a page fault on missing or non-present entries.
-The second one does trigger a page fault on missing or read-only entries if
-write access is requested (see below). Page faults use the generic mm page
+Without that flag, it does trigger a page fault on missing or read-only entries
+if write access is requested (see below). Page faults use the generic mm page
 fault code path just like a CPU page fault.
 
 Both functions copy CPU page table entries into their pfns array argument. Each
@@ -227,20 +226,20 @@ The usage pattern is::
 
       /*
        * Just wait for range to be valid, safe to ignore return value as we
-       * will use the return value of hmm_range_snapshot() below under the
+       * will use the return value of hmm_range_fault() below under the
        * mmap_sem to ascertain the validity of the range.
        */
       hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
 
  again:
       down_read(&mm->mmap_sem);
-      ret = hmm_range_snapshot(&range);
+      ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT);
       if (ret) {
           up_read(&mm->mmap_sem);
           if (ret == -EBUSY) {
             /*
              * No need to check hmm_range_wait_until_valid() return value
-             * on retry we will get proper error with hmm_range_snapshot()
+             * on retry we will get proper error with hmm_range_fault()
              */
             hmm_range_wait_until_valid(&range, TIMEOUT_IN_MSEC);
             goto again;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 89a141060e5b..90dc5944b1bc 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -413,7 +413,9 @@ void hmm_range_unregister(struct hmm_range *range);
  */
 #define HMM_FAULT_ALLOW_RETRY		(1 << 0)
 
-long hmm_range_snapshot(struct hmm_range *range);
+/* Don't fault in missing PTEs, just snapshot the current state. */
+#define HMM_FAULT_SNAPSHOT		(1 << 1)
+
 long hmm_range_fault(struct hmm_range *range, unsigned int flags);
 
 long hmm_range_dma_map(struct hmm_range *range,
diff --git a/mm/hmm.c b/mm/hmm.c
index 84f2791d3510..1bc014cddd78 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -280,7 +280,6 @@ struct hmm_vma_walk {
 	struct hmm_range	*range;
 	struct dev_pagemap	*pgmap;
 	unsigned long		last;
-	bool			fault;
 	unsigned int		flags;
 };
 
@@ -373,7 +372,7 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 {
 	struct hmm_range *range = hmm_vma_walk->range;
 
-	if (!hmm_vma_walk->fault)
+	if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT)
 		return;
 
 	/*
@@ -418,7 +417,7 @@ static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
 {
 	unsigned long i;
 
-	if (!hmm_vma_walk->fault) {
+	if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) {
 		*fault = *write_fault = false;
 		return;
 	}
@@ -936,85 +935,6 @@ void hmm_range_unregister(struct hmm_range *range)
 }
 EXPORT_SYMBOL(hmm_range_unregister);
 
-/*
- * hmm_range_snapshot() - snapshot CPU page table for a range
- * @range: range
- * Return: -EINVAL if invalid argument, -ENOMEM out of memory, -EPERM invalid
- *          permission (for instance asking for write and range is read only),
- *          -EBUSY if you need to retry, -EFAULT invalid (ie either no valid
- *          vma or it is illegal to access that range), number of valid pages
- *          in range->pfns[] (from range start address).
- *
- * This snapshots the CPU page table for a range of virtual addresses. Snapshot
- * validity is tracked by range struct. See in include/linux/hmm.h for example
- * on how to use.
- */
-long hmm_range_snapshot(struct hmm_range *range)
-{
-	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
-	unsigned long start = range->start, end;
-	struct hmm_vma_walk hmm_vma_walk;
-	struct hmm *hmm = range->hmm;
-	struct vm_area_struct *vma;
-	struct mm_walk mm_walk;
-
-	lockdep_assert_held(&hmm->mm->mmap_sem);
-	do {
-		/* If range is no longer valid force retry. */
-		if (!range->valid)
-			return -EBUSY;
-
-		vma = find_vma(hmm->mm, start);
-		if (vma == NULL || (vma->vm_flags & device_vma))
-			return -EFAULT;
-
-		if (is_vm_hugetlb_page(vma)) {
-			if (huge_page_shift(hstate_vma(vma)) !=
-				    range->page_shift &&
-			    range->page_shift != PAGE_SHIFT)
-				return -EINVAL;
-		} else {
-			if (range->page_shift != PAGE_SHIFT)
-				return -EINVAL;
-		}
-
-		if (!(vma->vm_flags & VM_READ)) {
-			/*
-			 * If vma do not allow read access, then assume that it
-			 * does not allow write access, either. HMM does not
-			 * support architecture that allow write without read.
-			 */
-			hmm_pfns_clear(range, range->pfns,
-				range->start, range->end);
-			return -EPERM;
-		}
-
-		range->vma = vma;
-		hmm_vma_walk.pgmap = NULL;
-		hmm_vma_walk.last = start;
-		hmm_vma_walk.fault = false;
-		hmm_vma_walk.range = range;
-		mm_walk.private = &hmm_vma_walk;
-		end = min(range->end, vma->vm_end);
-
-		mm_walk.vma = vma;
-		mm_walk.mm = vma->vm_mm;
-		mm_walk.pte_entry = NULL;
-		mm_walk.test_walk = NULL;
-		mm_walk.hugetlb_entry = NULL;
-		mm_walk.pud_entry = hmm_vma_walk_pud;
-		mm_walk.pmd_entry = hmm_vma_walk_pmd;
-		mm_walk.pte_hole = hmm_vma_walk_hole;
-		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
-
-		walk_page_range(start, end, &mm_walk);
-		start = end;
-	} while (start < range->end);
-
-	return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-}
-EXPORT_SYMBOL(hmm_range_snapshot);
-
 /**
  * hmm_range_fault - try to fault some address in a virtual address range
  * @range:	range being faulted
@@ -1088,7 +1008,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 		range->vma = vma;
 		hmm_vma_walk.pgmap = NULL;
 		hmm_vma_walk.last = start;
-		hmm_vma_walk.fault = true;
 		hmm_vma_walk.flags = flags;
 		hmm_vma_walk.range = range;
 		mm_walk.private = &hmm_vma_walk;
-- 
cgit v1.2.3


From ef11a931bd1c57b02fe2603ff95a392a73041f9e Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 18 Jun 2019 08:19:14 +0200
Subject: mac80211: HE: add Spatial Reuse element parsing support

Add support to mac80211 for parsing SPR elements as per
P802.11ax_D4.0 section 9.4.2.241.

Signed-off-by: Shashidhar Lakkavalli <slakkavalli@datto.com>
Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190618061915.7102-2-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/util.c        |  4 ++++
 3 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f36144eda5d6..a656f31262f1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1633,6 +1633,18 @@ struct ieee80211_he_operation {
 	u8 optional[0];
 } __packed;
 
+/**
+ * struct ieee80211_he_spr - HE spatial reuse element
+ *
+ * This structure is the "HE spatial reuse element" element as
+ * described in P802.11ax_D4.0 section 9.4.2.241
+ */
+struct ieee80211_he_spr {
+	u8 he_sr_control;
+	/* Optional 0 to 19 bytes: depends on @he_sr_control */
+	u8 optional[0];
+} __packed;
+
 /**
  * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field
  *
@@ -2071,6 +2083,42 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 	return oper_len;
 }
 
+/* HE Spatial Reuse defines */
+#define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			0x4
+#define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		0x8
+
+/*
+ * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size
+ * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the the byte
+ *	after the ext ID byte. It is assumed that he_spr_ie has at least
+ *	sizeof(struct ieee80211_he_spr) bytes, the caller must have validated
+ *	this
+ * @return the actual size of the IE data (not including header), or 0 on error
+ */
+static inline u8
+ieee80211_he_spr_size(const u8 *he_spr_ie)
+{
+	struct ieee80211_he_spr *he_spr = (void *)he_spr_ie;
+	u8 spr_len = sizeof(struct ieee80211_he_spr);
+	u32 he_spr_params;
+
+	/* Make sure the input is not NULL */
+	if (!he_spr_ie)
+		return 0;
+
+	/* Calc required length */
+	he_spr_params = le32_to_cpu(he_spr->he_sr_control);
+	if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
+		spr_len++;
+	if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT)
+		spr_len += 18;
+
+	/* Add the first byte (extension ID) to the total length */
+	spr_len++;
+
+	return spr_len;
+}
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -2493,6 +2541,7 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_HE_OPERATION = 36,
 	WLAN_EID_EXT_UORA = 37,
 	WLAN_EID_EXT_HE_MU_EDCA = 38,
+	WLAN_EID_EXT_HE_SPR = 39,
 	WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52,
 	WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55,
 	WLAN_EID_EXT_NON_INHERITANCE = 56,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4c80c0ed67a7..472c5a40317d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1480,6 +1480,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_meshconf_ie *mesh_config;
 	const u8 *he_cap;
 	const struct ieee80211_he_operation *he_operation;
+	const struct ieee80211_he_spr *he_spr;
 	const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
 	const u8 *uora_element;
 	const u8 *mesh_id;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3441558ef2d2..3e2aeb1a75b4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1240,6 +1240,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 				   WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION &&
 				   elen == 3) {
 				elems->mbssid_config_ie = (void *)&pos[1];
+			} else if (pos[0] == WLAN_EID_EXT_HE_SPR &&
+				   elen >= sizeof(*elems->he_spr) &&
+				   elen >= ieee80211_he_spr_size(&pos[1])) {
+				elems->he_spr = (void *)&pos[1];
 			}
 			break;
 		default:
-- 
cgit v1.2.3


From a0b4496a43681cbeec03a38e1b685c80c0d7405d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 16 Jul 2019 00:09:19 +0200
Subject: mac80211: add IEEE80211_KEY_FLAG_GENERATE_MMIE to ieee80211_key_flags

Add IEEE80211_KEY_FLAG_GENERATE_MMIE flag to ieee80211_key_flags in order
to allow the driver to notify mac80211 to generate MMIE and that it
requires sequence number generation only.
This is a preliminary patch to add BIP_CMAC_128 hw support to mt7615
driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/dfe275f9aa0f1cc6b33085f9efd5d8447f68ad13.1563228405.git.lorenzo@kernel.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 4 ++++
 net/mac80211/wpa.c     | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6fe4381ba0ef..9effd286c1ae 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1704,6 +1704,9 @@ struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif);
  *	a TKIP key if it only requires MIC space. Do not set together with
  *	@IEEE80211_KEY_FLAG_GENERATE_MMIC on the same key.
  * @IEEE80211_KEY_FLAG_NO_AUTO_TX: Key needs explicit Tx activation.
+ * @IEEE80211_KEY_FLAG_GENERATE_MMIE: This flag should be set by the driver
+ *	for a AES_CMAC key to indicate that it requires sequence number
+ *	generation only
  */
 enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_GENERATE_IV_MGMT	= BIT(0),
@@ -1716,6 +1719,7 @@ enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_RESERVE_TAILROOM	= BIT(7),
 	IEEE80211_KEY_FLAG_PUT_MIC_SPACE	= BIT(8),
 	IEEE80211_KEY_FLAG_NO_AUTO_TX		= BIT(9),
+	IEEE80211_KEY_FLAG_GENERATE_MMIE	= BIT(10),
 };
 
 /**
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index ee72779729e5..91bf32af55e9 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -946,7 +946,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 
 	info = IEEE80211_SKB_CB(skb);
 
-	if (info->control.hw_key)
+	if (info->control.hw_key &&
+	    !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE))
 		return TX_CONTINUE;
 
 	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
@@ -962,6 +963,9 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 
 	bip_ipn_set64(mmie->sequence_number, pn64);
 
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
 	bip_aad(skb, aad);
 
 	/*
-- 
cgit v1.2.3


From 7a113110fc8cdda14023c0bffc7bd8b5f3da1edf Mon Sep 17 00:00:00 2001
From: Denis Kenzior <denkenz@gmail.com>
Date: Mon, 22 Jul 2019 06:33:10 -0500
Subject: nl80211: document uapi for CMD_FRAME_WAIT_CANCEL

Commit 1c38c7f22068 ("nl80211: send event when CMD_FRAME duration
expires") added the possibility of NL80211_CMD_FRAME_WAIT_CANCEL
being sent whenever the off-channel wait time associated with a
CMD_FRAME completes.  Document this in the uapi/linux/nl80211.h file.

Signed-off-by: Denis Kenzior <denkenz@gmail.com>
Link: https://lore.kernel.org/r/20190722113312.14031-1-denkenz@gmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index beb9a9d0c00a..c45587c2cf44 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -657,7 +657,9 @@
  *	is used during CSA period.
  * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this
  *	command may be used with the corresponding cookie to cancel the wait
- *	time if it is known that it is no longer necessary.
+ *	time if it is known that it is no longer necessary.  This command is
+ *	also sent as an event whenever the driver has completed the off-channel
+ *	wait time.
  * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility.
  * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame
  *	transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies
-- 
cgit v1.2.3


From cc374377a19d2a49d693997b62dc3a6f5fac6d61 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Thu, 25 Jul 2019 17:56:50 -0700
Subject: mm/hmm: remove hmm_range vma

Since hmm_range_fault() doesn't use the struct hmm_range vma field, remove
it.

Link: https://lore.kernel.org/r/20190726005650.2566-8-rcampbell@nvidia.com
Suggested-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/nouveau/nouveau_svm.c | 7 +++----
 include/linux/hmm.h                   | 1 -
 mm/hmm.c                              | 1 -
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 49b520c60fc5..a74530b5a523 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -496,12 +496,12 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
 				 range->start, range->end,
 				 PAGE_SHIFT);
 	if (ret) {
-		up_read(&range->vma->vm_mm->mmap_sem);
+		up_read(&range->hmm->mm->mmap_sem);
 		return (int)ret;
 	}
 
 	if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
-		up_read(&range->vma->vm_mm->mmap_sem);
+		up_read(&range->hmm->mm->mmap_sem);
 		return -EBUSY;
 	}
 
@@ -509,7 +509,7 @@ nouveau_range_fault(struct hmm_mirror *mirror, struct hmm_range *range)
 	if (ret <= 0) {
 		if (ret == 0)
 			ret = -EBUSY;
-		up_read(&range->vma->vm_mm->mmap_sem);
+		up_read(&range->hmm->mm->mmap_sem);
 		hmm_range_unregister(range);
 		return ret;
 	}
@@ -682,7 +682,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			 args.i.p.addr + args.i.p.size, fn - fi);
 
 		/* Have HMM fault pages within the fault window to the GPU. */
-		range.vma = vma;
 		range.start = args.i.p.addr;
 		range.end = args.i.p.addr + args.i.p.size;
 		range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 90dc5944b1bc..82265118d94a 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -164,7 +164,6 @@ enum hmm_pfn_value_e {
  */
 struct hmm_range {
 	struct hmm		*hmm;
-	struct vm_area_struct	*vma;
 	struct list_head	list;
 	unsigned long		start;
 	unsigned long		end;
diff --git a/mm/hmm.c b/mm/hmm.c
index 6111c0a3c12d..9a908902e4cc 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1002,7 +1002,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 			return -EPERM;
 		}
 
-		range->vma = vma;
 		hmm_vma_walk.pgmap = NULL;
 		hmm_vma_walk.last = start;
 		hmm_vma_walk.flags = flags;
-- 
cgit v1.2.3


From a7cf3d24ee6081930feb4c830a7f6f16ebe31c49 Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Thu, 25 Jul 2019 12:07:12 -0600
Subject: net: qualcomm: rmnet: Fix incorrect UL checksum offload logic

The udp_ip4_ind bit is set only for IPv4 UDP non-fragmented packets
so that the hardware can flip the checksum to 0xFFFF if the computed
checksum is 0 per RFC768.

However, this bit had to be set for IPv6 UDP non fragmented packets
as well per hardware requirements. Otherwise, IPv6 UDP packets
with computed checksum as 0 were transmitted by hardware and were
dropped in the network.

In addition to setting this bit for IPv6 UDP, the field is also
appropriately renamed to udp_ind as part of this change.

Fixes: 5eb5f8608ef1 ("net: qualcomm: rmnet: Add support for TX checksum offload")
Cc: Sean Tranchetti <stranche@codeaurora.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c | 13 +++++++++----
 include/linux/if_rmnet.h                             |  4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 60189923737a..21d38167f961 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -206,9 +206,9 @@ rmnet_map_ipv4_ul_csum_header(void *iphdr,
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
 	if (ip4h->protocol == IPPROTO_UDP)
-		ul_header->udp_ip4_ind = 1;
+		ul_header->udp_ind = 1;
 	else
-		ul_header->udp_ip4_ind = 0;
+		ul_header->udp_ind = 0;
 
 	/* Changing remaining fields to network order */
 	hdr++;
@@ -239,6 +239,7 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 			      struct rmnet_map_ul_csum_header *ul_header,
 			      struct sk_buff *skb)
 {
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
 	__be16 *hdr = (__be16 *)ul_header, offset;
 
 	offset = htons((__force u16)(skb_transport_header(skb) -
@@ -246,7 +247,11 @@ rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
 	ul_header->csum_start_offset = offset;
 	ul_header->csum_insert_offset = skb->csum_offset;
 	ul_header->csum_enabled = 1;
-	ul_header->udp_ip4_ind = 0;
+
+	if (ip6h->nexthdr == IPPROTO_UDP)
+		ul_header->udp_ind = 1;
+	else
+		ul_header->udp_ind = 0;
 
 	/* Changing remaining fields to network order */
 	hdr++;
@@ -419,7 +424,7 @@ sw_csum:
 	ul_header->csum_start_offset = 0;
 	ul_header->csum_insert_offset = 0;
 	ul_header->csum_enabled = 0;
-	ul_header->udp_ip4_ind = 0;
+	ul_header->udp_ind = 0;
 
 	priv->stats.csum_sw++;
 }
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
index b4f5403383fc..9661416a9bb4 100644
--- a/include/linux/if_rmnet.h
+++ b/include/linux/if_rmnet.h
@@ -41,11 +41,11 @@ struct rmnet_map_ul_csum_header {
 	__be16 csum_start_offset;
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u16 csum_insert_offset:14;
-	u16 udp_ip4_ind:1;
+	u16 udp_ind:1;
 	u16 csum_enabled:1;
 #elif defined (__BIG_ENDIAN_BITFIELD)
 	u16 csum_enabled:1;
-	u16 udp_ip4_ind:1;
+	u16 udp_ind:1;
 	u16 csum_insert_offset:14;
 #else
 #error	"Please fix <asm/byteorder.h>"
-- 
cgit v1.2.3


From f1765a1819ff3489db9500c6d464e682e6844a14 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 26 Jul 2019 12:17:44 +0200
Subject: of: Fix typo in kerneldoc

"Findfrom" is not a word. Replace the function synopsis by something
that makes sense.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/of.h b/include/linux/of.h
index 0cf857012f11..844f89e1b039 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -1164,7 +1164,7 @@ static inline int of_property_read_string_index(const struct device_node *np,
 }
 
 /**
- * of_property_read_bool - Findfrom a property
+ * of_property_read_bool - Find a property
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
  *
-- 
cgit v1.2.3


From 934d24a5e1508e73c0001afb54a3916e4270428f Mon Sep 17 00:00:00 2001
From: Vitor Soares <Vitor.Soares@synopsys.com>
Date: Fri, 19 Jul 2019 15:30:54 +0200
Subject: i3c: move i3c_device_match_id to device.c and export it

Some I3C device drivers need to know which entry matches the
i3c_device object passed to the probe function

Let's move i3c_device_match_id() to device.c and export it so it can be
used by drivers.

Signed-off-by: Vitor Soares <vitor.soares@synopsys.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/i3c/device.c       | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i3c/master.c       | 45 ---------------------------------------
 include/linux/i3c/device.h |  4 ++++
 3 files changed, 57 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c
index 69cc040c3a1c..9e2e1406f85e 100644
--- a/drivers/i3c/device.c
+++ b/drivers/i3c/device.c
@@ -200,6 +200,59 @@ struct i3c_device *dev_to_i3cdev(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_to_i3cdev);
 
+/**
+ * i3c_device_match_id() - Returns the i3c_device_id entry matching @i3cdev
+ * @i3cdev: I3C device
+ * @id_table: I3C device match table
+ *
+ * Return: a pointer to an i3c_device_id object or NULL if there's no match.
+ */
+const struct i3c_device_id *
+i3c_device_match_id(struct i3c_device *i3cdev,
+		    const struct i3c_device_id *id_table)
+{
+	struct i3c_device_info devinfo;
+	const struct i3c_device_id *id;
+
+	i3c_device_get_info(i3cdev, &devinfo);
+
+	/*
+	 * The lower 32bits of the provisional ID is just filled with a random
+	 * value, try to match using DCR info.
+	 */
+	if (!I3C_PID_RND_LOWER_32BITS(devinfo.pid)) {
+		u16 manuf = I3C_PID_MANUF_ID(devinfo.pid);
+		u16 part = I3C_PID_PART_ID(devinfo.pid);
+		u16 ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
+
+		/* First try to match by manufacturer/part ID. */
+		for (id = id_table; id->match_flags != 0; id++) {
+			if ((id->match_flags & I3C_MATCH_MANUF_AND_PART) !=
+			    I3C_MATCH_MANUF_AND_PART)
+				continue;
+
+			if (manuf != id->manuf_id || part != id->part_id)
+				continue;
+
+			if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
+			    ext_info != id->extra_info)
+				continue;
+
+			return id;
+		}
+	}
+
+	/* Fallback to DCR match. */
+	for (id = id_table; id->match_flags != 0; id++) {
+		if ((id->match_flags & I3C_MATCH_DCR) &&
+		    id->dcr == devinfo.dcr)
+			return id;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(i3c_device_match_id);
+
 /**
  * i3c_driver_register_with_owner() - register an I3C device driver
  *
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index d6f8b038a896..c58729081899 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -276,51 +276,6 @@ static const struct device_type i3c_device_type = {
 	.uevent = i3c_device_uevent,
 };
 
-static const struct i3c_device_id *
-i3c_device_match_id(struct i3c_device *i3cdev,
-		    const struct i3c_device_id *id_table)
-{
-	struct i3c_device_info devinfo;
-	const struct i3c_device_id *id;
-
-	i3c_device_get_info(i3cdev, &devinfo);
-
-	/*
-	 * The lower 32bits of the provisional ID is just filled with a random
-	 * value, try to match using DCR info.
-	 */
-	if (!I3C_PID_RND_LOWER_32BITS(devinfo.pid)) {
-		u16 manuf = I3C_PID_MANUF_ID(devinfo.pid);
-		u16 part = I3C_PID_PART_ID(devinfo.pid);
-		u16 ext_info = I3C_PID_EXTRA_INFO(devinfo.pid);
-
-		/* First try to match by manufacturer/part ID. */
-		for (id = id_table; id->match_flags != 0; id++) {
-			if ((id->match_flags & I3C_MATCH_MANUF_AND_PART) !=
-			    I3C_MATCH_MANUF_AND_PART)
-				continue;
-
-			if (manuf != id->manuf_id || part != id->part_id)
-				continue;
-
-			if ((id->match_flags & I3C_MATCH_EXTRA_INFO) &&
-			    ext_info != id->extra_info)
-				continue;
-
-			return id;
-		}
-	}
-
-	/* Fallback to DCR match. */
-	for (id = id_table; id->match_flags != 0; id++) {
-		if ((id->match_flags & I3C_MATCH_DCR) &&
-		    id->dcr == devinfo.dcr)
-			return id;
-	}
-
-	return NULL;
-}
-
 static int i3c_device_match(struct device *dev, struct device_driver *drv)
 {
 	struct i3c_device *i3cdev;
diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index 5ecb055fd375..de102e4418ab 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -188,6 +188,10 @@ static inline struct i3c_driver *drv_to_i3cdrv(struct device_driver *drv)
 struct device *i3cdev_to_dev(struct i3c_device *i3cdev);
 struct i3c_device *dev_to_i3cdev(struct device *dev);
 
+const struct i3c_device_id *
+i3c_device_match_id(struct i3c_device *i3cdev,
+		    const struct i3c_device_id *id_table);
+
 static inline void i3cdev_set_drvdata(struct i3c_device *i3cdev,
 				      void *data)
 {
-- 
cgit v1.2.3


From 6fc4dbcf0276279d488c5fbbfabe94734134f4fa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 18 Jul 2019 23:01:46 +0800
Subject: padata: Replace delayed timer with immediate workqueue in
 padata_reorder

The function padata_reorder will use a timer when it cannot progress
while completed jobs are outstanding (pd->reorder_objects > 0).  This
is suboptimal as if we do end up using the timer then it would have
introduced a gratuitous delay of one second.

In fact we can easily distinguish between whether completed jobs
are outstanding and whether we can make progress.  All we have to
do is look at the next pqueue list.

This patch does that by replacing pd->processed with pd->cpu so
that the next pqueue is more accessible.

A work queue is used instead of the original try_again to avoid
hogging the CPU.

Note that we don't bother removing the work queue in
padata_flush_queues because the whole premise is broken.  You
cannot flush async crypto requests so it makes no sense to even
try.  A subsequent patch will fix it by replacing it with a ref
counting scheme.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h | 13 +++----
 kernel/padata.c        | 97 ++++++++++----------------------------------------
 2 files changed, 22 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 56f09e36f770..8da673861d99 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -12,7 +12,6 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/timer.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
 
@@ -73,18 +72,14 @@ struct padata_serial_queue {
  * @serial: List to wait for serialization after reordering.
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
- * @pd: Backpointer to the internal control structure.
  * @work: work struct for parallelization.
- * @reorder_work: work struct for reordering.
  * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
 struct padata_parallel_queue {
        struct padata_list    parallel;
        struct padata_list    reorder;
-       struct parallel_data *pd;
        struct work_struct    work;
-       struct work_struct    reorder_work;
        atomic_t              num_obj;
        int                   cpu_index;
 };
@@ -110,10 +105,10 @@ struct padata_cpumask {
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
+ * @cpu: Next CPU to be processed.
  * @cpumask: The cpumasks in use for parallel and serial workers.
+ * @reorder_work: work struct for reordering.
  * @lock: Reorder lock.
- * @processed: Number of already processed objects.
- * @timer: Reorder timer.
  */
 struct parallel_data {
 	struct padata_instance		*pinst;
@@ -122,10 +117,10 @@ struct parallel_data {
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
 	atomic_t			seq_nr;
+	int				cpu;
 	struct padata_cpumask		cpumask;
+	struct work_struct		reorder_work;
 	spinlock_t                      lock ____cacheline_aligned;
-	unsigned int			processed;
-	struct timer_list		timer;
 };
 
 /**
diff --git a/kernel/padata.c b/kernel/padata.c
index 15a8ad63f4ff..fbafca18597f 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -165,23 +165,12 @@ EXPORT_SYMBOL(padata_do_parallel);
  */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
-	int cpu, num_cpus;
-	unsigned int next_nr, next_index;
 	struct padata_parallel_queue *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
+	int cpu = pd->cpu;
 
-	num_cpus = cpumask_weight(pd->cpumask.pcpu);
-
-	/*
-	 * Calculate the percpu reorder queue and the sequence
-	 * number of the next object.
-	 */
-	next_nr = pd->processed;
-	next_index = next_nr % num_cpus;
-	cpu = padata_index_to_cpu(pd, next_index);
 	next_queue = per_cpu_ptr(pd->pqueue, cpu);
-
 	reorder = &next_queue->reorder;
 
 	spin_lock(&reorder->lock);
@@ -192,7 +181,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
 
-		pd->processed++;
+		pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
+					    false);
 
 		spin_unlock(&reorder->lock);
 		goto out;
@@ -215,6 +205,7 @@ static void padata_reorder(struct parallel_data *pd)
 	struct padata_priv *padata;
 	struct padata_serial_queue *squeue;
 	struct padata_instance *pinst = pd->pinst;
+	struct padata_parallel_queue *next_queue;
 
 	/*
 	 * We need to ensure that only one cpu can work on dequeueing of
@@ -246,7 +237,6 @@ static void padata_reorder(struct parallel_data *pd)
 		 * so exit immediately.
 		 */
 		if (PTR_ERR(padata) == -ENODATA) {
-			del_timer(&pd->timer);
 			spin_unlock_bh(&pd->lock);
 			return;
 		}
@@ -265,70 +255,29 @@ static void padata_reorder(struct parallel_data *pd)
 
 	/*
 	 * The next object that needs serialization might have arrived to
-	 * the reorder queues in the meantime, we will be called again
-	 * from the timer function if no one else cares for it.
+	 * the reorder queues in the meantime.
 	 *
-	 * Ensure reorder_objects is read after pd->lock is dropped so we see
-	 * an increment from another task in padata_do_serial.  Pairs with
+	 * Ensure reorder queue is read after pd->lock is dropped so we see
+	 * new objects from another task in padata_do_serial.  Pairs with
 	 * smp_mb__after_atomic in padata_do_serial.
 	 */
 	smp_mb();
-	if (atomic_read(&pd->reorder_objects)
-			&& !(pinst->flags & PADATA_RESET))
-		mod_timer(&pd->timer, jiffies + HZ);
-	else
-		del_timer(&pd->timer);
 
-	return;
+	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
+	if (!list_empty(&next_queue->reorder.list))
+		queue_work(pinst->wq, &pd->reorder_work);
 }
 
 static void invoke_padata_reorder(struct work_struct *work)
 {
-	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 
 	local_bh_disable();
-	pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
-	pd = pqueue->pd;
+	pd = container_of(work, struct parallel_data, reorder_work);
 	padata_reorder(pd);
 	local_bh_enable();
 }
 
-static void padata_reorder_timer(struct timer_list *t)
-{
-	struct parallel_data *pd = from_timer(pd, t, timer);
-	unsigned int weight;
-	int target_cpu, cpu;
-
-	cpu = get_cpu();
-
-	/* We don't lock pd here to not interfere with parallel processing
-	 * padata_reorder() calls on other CPUs. We just need any CPU out of
-	 * the cpumask.pcpu set. It would be nice if it's the right one but
-	 * it doesn't matter if we're off to the next one by using an outdated
-	 * pd->processed value.
-	 */
-	weight = cpumask_weight(pd->cpumask.pcpu);
-	target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
-
-	/* ensure to call the reorder callback on the correct CPU */
-	if (cpu != target_cpu) {
-		struct padata_parallel_queue *pqueue;
-		struct padata_instance *pinst;
-
-		/* The timer function is serialized wrt itself -- no locking
-		 * needed.
-		 */
-		pinst = pd->pinst;
-		pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
-		queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
-	} else {
-		padata_reorder(pd);
-	}
-
-	put_cpu();
-}
-
 static void padata_serial_worker(struct work_struct *serial_work)
 {
 	struct padata_serial_queue *squeue;
@@ -376,9 +325,8 @@ void padata_do_serial(struct padata_priv *padata)
 
 	cpu = get_cpu();
 
-	/* We need to run on the same CPU padata_do_parallel(.., padata, ..)
-	 * was called on -- or, at least, enqueue the padata object into the
-	 * correct per-cpu queue.
+	/* We need to enqueue the padata object into the correct
+	 * per-cpu queue.
 	 */
 	if (cpu != padata->cpu) {
 		reorder_via_wq = 1;
@@ -388,12 +336,12 @@ void padata_do_serial(struct padata_priv *padata)
 	pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
 	spin_lock(&pqueue->reorder.lock);
-	atomic_inc(&pd->reorder_objects);
 	list_add_tail(&padata->list, &pqueue->reorder.list);
+	atomic_inc(&pd->reorder_objects);
 	spin_unlock(&pqueue->reorder.lock);
 
 	/*
-	 * Ensure the atomic_inc of reorder_objects above is ordered correctly
+	 * Ensure the addition to the reorder list is ordered correctly
 	 * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
 	 * in padata_reorder.
 	 */
@@ -401,13 +349,7 @@ void padata_do_serial(struct padata_priv *padata)
 
 	put_cpu();
 
-	/* If we're running on the wrong CPU, call padata_reorder() via a
-	 * kernel worker.
-	 */
-	if (reorder_via_wq)
-		queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
-	else
-		padata_reorder(pd);
+	padata_reorder(pd);
 }
 EXPORT_SYMBOL(padata_do_serial);
 
@@ -463,14 +405,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
 			continue;
 		}
 
-		pqueue->pd = pd;
 		pqueue->cpu_index = cpu_index;
 		cpu_index++;
 
 		__padata_list_init(&pqueue->reorder);
 		__padata_list_init(&pqueue->parallel);
 		INIT_WORK(&pqueue->work, padata_parallel_worker);
-		INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
 		atomic_set(&pqueue->num_obj, 0);
 	}
 }
@@ -498,12 +438,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 
 	padata_init_pqueues(pd);
 	padata_init_squeues(pd);
-	timer_setup(&pd->timer, padata_reorder_timer, 0);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
 	pd->pinst = pinst;
 	spin_lock_init(&pd->lock);
+	pd->cpu = cpumask_first(pcpumask);
+	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
 
 	return pd;
 
@@ -538,8 +479,6 @@ static void padata_flush_queues(struct parallel_data *pd)
 		flush_work(&pqueue->work);
 	}
 
-	del_timer_sync(&pd->timer);
-
 	if (atomic_read(&pd->reorder_objects))
 		padata_reorder(pd);
 
-- 
cgit v1.2.3


From 8dfa20fcfbeb245642dfe3a43f8a3735d9aed42a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 19 Jul 2019 23:09:18 -0700
Subject: crypto: ghash - add comment and improve help text

To help avoid confusion, add a comment to ghash-generic.c which explains
the convention that the kernel's implementation of GHASH uses.

Also update the Kconfig help text and module descriptions to call GHASH
a "hash function" rather than a "message digest", since the latter
normally means a real cryptographic hash function, which GHASH is not.

Cc: Pascal Van Leeuwen <pvanleeuwen@verimatrix.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Pascal Van Leeuwen <pvanleeuwen@verimatrix.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/ghash-ce-glue.c            |  2 +-
 arch/s390/crypto/ghash_s390.c              |  2 +-
 arch/x86/crypto/ghash-clmulni-intel_glue.c |  3 +--
 crypto/Kconfig                             | 11 ++++++-----
 crypto/ghash-generic.c                     | 31 +++++++++++++++++++++++++++---
 drivers/crypto/Kconfig                     |  6 +++---
 include/crypto/ghash.h                     |  2 +-
 7 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index bb906b5f1eb3..c691077679a6 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -18,7 +18,7 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index eeeb6a7737a4..a3e7400e031c 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -153,4 +153,4 @@ module_exit(ghash_mod_exit);
 MODULE_ALIAS_CRYPTO("ghash");
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
+MODULE_DESCRIPTION("GHASH hash function, s390 implementation");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index ac76fe88ac4f..04d72a5a8ce9 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -357,6 +357,5 @@ module_init(ghash_pclmulqdqni_mod_init);
 module_exit(ghash_pclmulqdqni_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
-		   "accelerated by PCLMULQDQ-NI");
+MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 2e7f08ba0675..455a3354e291 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -647,11 +647,12 @@ config CRYPTO_VPMSUM_TESTER
 	  Unless you are testing these algorithms, you don't need this.
 
 config CRYPTO_GHASH
-	tristate "GHASH digest algorithm"
+	tristate "GHASH hash function"
 	select CRYPTO_GF128MUL
 	select CRYPTO_HASH
 	help
-	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
+	  GHASH is the hash function used in GCM (Galois/Counter Mode).
+	  It is not a general-purpose cryptographic hash function.
 
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
@@ -976,12 +977,12 @@ config CRYPTO_WP512
 	  <http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html>
 
 config CRYPTO_GHASH_CLMUL_NI_INTEL
-	tristate "GHASH digest algorithm (CLMUL-NI accelerated)"
+	tristate "GHASH hash function (CLMUL-NI accelerated)"
 	depends on X86 && 64BIT
 	select CRYPTO_CRYPTD
 	help
-	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
-	  The implementation is accelerated by CLMUL-NI of Intel.
+	  This is the x86_64 CLMUL-NI accelerated implementation of
+	  GHASH, the hash function used in GCM (Galois/Counter mode).
 
 comment "Ciphers"
 
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index dad9e1f91a78..5027b3461c92 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -1,12 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * GHASH: digest algorithm for GCM (Galois/Counter Mode).
+ * GHASH: hash function for GCM (Galois/Counter Mode).
  *
  * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
  * Copyright (c) 2009 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+/*
+ * GHASH is a keyed hash function used in GCM authentication tag generation.
+ *
+ * The original GCM paper [1] presents GHASH as a function GHASH(H, A, C) which
+ * takes a 16-byte hash key H, additional authenticated data A, and a ciphertext
+ * C.  It formats A and C into a single byte string X, interprets X as a
+ * polynomial over GF(2^128), and evaluates this polynomial at the point H.
+ *
+ * However, the NIST standard for GCM [2] presents GHASH as GHASH(H, X) where X
+ * is the already-formatted byte string containing both A and C.
+ *
+ * "ghash" in the Linux crypto API uses the 'X' (pre-formatted) convention,
+ * since the API supports only a single data stream per hash.  Thus, the
+ * formatting of 'A' and 'C' is done in the "gcm" template, not in "ghash".
+ *
+ * The reason "ghash" is separate from "gcm" is to allow "gcm" to use an
+ * accelerated "ghash" when a standalone accelerated "gcm(aes)" is unavailable.
+ * It is generally inappropriate to use "ghash" for other purposes, since it is
+ * an "ε-almost-XOR-universal hash function", not a cryptographic hash function.
+ * It can only be used securely in crypto modes specially designed to use it.
  *
- * The algorithm implementation is copied from gcm.c.
+ * [1] The Galois/Counter Mode of Operation (GCM)
+ *     (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.694.695&rep=rep1&type=pdf)
+ * [2] Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC
+ *     (https://csrc.nist.gov/publications/detail/sp/800-38d/final)
  */
 
 #include <crypto/algapi.h>
@@ -156,6 +181,6 @@ subsys_initcall(ghash_mod_init);
 module_exit(ghash_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm");
+MODULE_DESCRIPTION("GHASH hash function");
 MODULE_ALIAS_CRYPTO("ghash");
 MODULE_ALIAS_CRYPTO("ghash-generic");
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 69d1bbd5d9bf..b8c50871f11b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -189,12 +189,12 @@ config S390_PRNG
 	  It is available as of z9.
 
 config CRYPTO_GHASH_S390
-	tristate "GHASH digest algorithm"
+	tristate "GHASH hash function"
 	depends on S390
 	select CRYPTO_HASH
 	help
-	  This is the s390 hardware accelerated implementation of the
-	  GHASH message digest algorithm for GCM (Galois/Counter Mode).
+	  This is the s390 hardware accelerated implementation of GHASH,
+	  the hash function used in GCM (Galois/Counter mode).
 
 	  It is available as of z196.
 
diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h
index 9136301062a5..f832c9f2aca3 100644
--- a/include/crypto/ghash.h
+++ b/include/crypto/ghash.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Common values for GHASH algorithms
+ * Common values for the GHASH hash function
  */
 
 #ifndef __CRYPTO_GHASH_H__
-- 
cgit v1.2.3


From ed1f2e85da79274f3dc4092953f1359eb732f0c6 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Thu, 18 Jul 2019 16:28:24 -0700
Subject: iio: cros_ec: Add calibscale for 3d MEMS

Add calibration scale support to accel, gyro and magnetometer.

Check on eve with current firmware, check reading calibscale returns 1.0,
check with newer firmware values are applied.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../iio/common/cros_ec_sensors/cros_ec_sensors.c   | 51 +++++++++++++++++++---
 .../common/cros_ec_sensors/cros_ec_sensors_core.c  |  2 +-
 drivers/iio/light/cros_ec_light_prox.c             | 12 ++---
 drivers/iio/pressure/cros_ec_baro.c                |  2 -
 include/linux/iio/common/cros_ec_sensors_core.h    |  5 ++-
 5 files changed, 57 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index 17af4e0fd5f8..2af09606c438 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -63,10 +63,35 @@ static int cros_ec_sensors_read(struct iio_dev *indio_dev,
 
 		/* Save values */
 		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
-			st->core.calib[i] =
+			st->core.calib[i].offset =
 				st->core.resp->sensor_offset.offset[i];
 		ret = IIO_VAL_INT;
-		*val = st->core.calib[idx];
+		*val = st->core.calib[idx].offset;
+		break;
+	case IIO_CHAN_INFO_CALIBSCALE:
+		st->core.param.cmd = MOTIONSENSE_CMD_SENSOR_SCALE;
+		st->core.param.sensor_offset.flags = 0;
+
+		ret = cros_ec_motion_send_host_cmd(&st->core, 0);
+		if (ret == -EPROTO) {
+			/* Reading calibscale is not supported on older EC. */
+			*val = 1;
+			*val2 = 0;
+			ret = IIO_VAL_INT_PLUS_MICRO;
+			break;
+		} else if (ret) {
+			break;
+		}
+
+		/* Save values */
+		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
+			st->core.calib[i].scale =
+				st->core.resp->sensor_scale.scale[i];
+
+		*val = st->core.calib[idx].scale >> 15;
+		*val2 = ((st->core.calib[idx].scale & 0x7FFF) * 1000000LL) /
+			MOTION_SENSE_DEFAULT_SCALE;
+		ret = IIO_VAL_INT_PLUS_MICRO;
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		st->core.param.cmd = MOTIONSENSE_CMD_SENSOR_RANGE;
@@ -134,7 +159,7 @@ static int cros_ec_sensors_write(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_CALIBBIAS:
-		st->core.calib[idx] = val;
+		st->core.calib[idx].offset = val;
 
 		/* Send to EC for each axis, even if not complete */
 		st->core.param.cmd = MOTIONSENSE_CMD_SENSOR_OFFSET;
@@ -142,10 +167,25 @@ static int cros_ec_sensors_write(struct iio_dev *indio_dev,
 			MOTION_SENSE_SET_OFFSET;
 		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
 			st->core.param.sensor_offset.offset[i] =
-				st->core.calib[i];
+				st->core.calib[i].offset;
 		st->core.param.sensor_offset.temp =
 			EC_MOTION_SENSE_INVALID_CALIB_TEMP;
 
+		ret = cros_ec_motion_send_host_cmd(&st->core, 0);
+		break;
+	case IIO_CHAN_INFO_CALIBSCALE:
+		st->core.calib[idx].scale = val;
+		/* Send to EC for each axis, even if not complete */
+
+		st->core.param.cmd = MOTIONSENSE_CMD_SENSOR_SCALE;
+		st->core.param.sensor_offset.flags =
+			MOTION_SENSE_SET_OFFSET;
+		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
+			st->core.param.sensor_scale.scale[i] =
+				st->core.calib[i].scale;
+		st->core.param.sensor_scale.temp =
+			EC_MOTION_SENSE_INVALID_CALIB_TEMP;
+
 		ret = cros_ec_motion_send_host_cmd(&st->core, 0);
 		break;
 	case IIO_CHAN_INFO_SCALE:
@@ -206,7 +246,8 @@ static int cros_ec_sensors_probe(struct platform_device *pdev)
 		/* Common part */
 		channel->info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_CALIBBIAS);
+			BIT(IIO_CHAN_INFO_CALIBBIAS) |
+			BIT(IIO_CHAN_INFO_CALIBSCALE);
 		channel->info_mask_shared_by_all =
 			BIT(IIO_CHAN_INFO_SCALE) |
 			BIT(IIO_CHAN_INFO_FREQUENCY) |
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index 130362ca421b..96d5aa1f4bd5 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -118,7 +118,7 @@ static ssize_t cros_ec_sensors_calibrate(struct iio_dev *indio_dev,
 	} else {
 		/* Save values */
 		for (i = CROS_EC_SENSOR_X; i < CROS_EC_SENSOR_MAX_AXIS; i++)
-			st->calib[i] = st->resp->perform_calib.offset[i];
+			st->calib[i].offset = st->resp->perform_calib.offset[i];
 	}
 	mutex_unlock(&st->cmd_lock);
 
diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c
index 308ee6ff2e22..b81746a99f1f 100644
--- a/drivers/iio/light/cros_ec_light_prox.c
+++ b/drivers/iio/light/cros_ec_light_prox.c
@@ -88,9 +88,10 @@ static int cros_ec_light_prox_read(struct iio_dev *indio_dev,
 		}
 
 		/* Save values */
-		st->core.calib[0] = st->core.resp->sensor_offset.offset[0];
+		st->core.calib[0].offset =
+			st->core.resp->sensor_offset.offset[0];
 
-		*val = st->core.calib[idx];
+		*val = st->core.calib[idx].offset;
 		break;
 	case IIO_CHAN_INFO_CALIBSCALE:
 		/*
@@ -134,11 +135,12 @@ static int cros_ec_light_prox_write(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_CALIBBIAS:
-		st->core.calib[idx] = val;
+		st->core.calib[idx].offset = val;
 		/* Send to EC for each axis, even if not complete */
 		st->core.param.cmd = MOTIONSENSE_CMD_SENSOR_OFFSET;
 		st->core.param.sensor_offset.flags = MOTION_SENSE_SET_OFFSET;
-		st->core.param.sensor_offset.offset[0] = st->core.calib[0];
+		st->core.param.sensor_offset.offset[0] =
+			st->core.calib[0].offset;
 		st->core.param.sensor_offset.temp =
 					EC_MOTION_SENSE_INVALID_CALIB_TEMP;
 		if (cros_ec_motion_send_host_cmd(&st->core, 0))
@@ -205,8 +207,6 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev)
 	channel->ext_info = cros_ec_sensors_ext_info;
 	channel->scan_type.sign = 'u';
 
-	state->core.calib[0] = 0;
-
 	/* Sensor specific */
 	switch (state->core.type) {
 	case MOTIONSENSE_TYPE_LIGHT:
diff --git a/drivers/iio/pressure/cros_ec_baro.c b/drivers/iio/pressure/cros_ec_baro.c
index 034ce98d6e97..d3acba7ba582 100644
--- a/drivers/iio/pressure/cros_ec_baro.c
+++ b/drivers/iio/pressure/cros_ec_baro.c
@@ -152,8 +152,6 @@ static int cros_ec_baro_probe(struct platform_device *pdev)
 	channel->ext_info = cros_ec_sensors_ext_info;
 	channel->scan_type.sign = 'u';
 
-	state->core.calib[0] = 0;
-
 	/* Sensor specific */
 	switch (state->core.type) {
 	case MOTIONSENSE_TYPE_BARO:
diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h
index 0c636b9fe8d7..bb03a252bd04 100644
--- a/include/linux/iio/common/cros_ec_sensors_core.h
+++ b/include/linux/iio/common/cros_ec_sensors_core.h
@@ -62,7 +62,10 @@ struct cros_ec_sensors_core_state {
 	enum motionsensor_type type;
 	enum motionsensor_location loc;
 
-	s16 calib[CROS_EC_SENSOR_MAX_AXIS];
+	struct calib_data {
+		s16 offset;
+		u16 scale;
+	} calib[CROS_EC_SENSOR_MAX_AXIS];
 
 	u8 samples[CROS_EC_SAMPLE_SIZE];
 
-- 
cgit v1.2.3


From 1a981c0586c038710227eb740350f291e77ce365 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 26 Jul 2019 12:27:40 +0200
Subject: net: stmmac: Make MDIO bus reset optional

The Tegra EQOS driver already resets the MDIO bus at probe time via the
reset GPIO specified in the phy-reset-gpios device tree property. There
is no need to reset the bus again later on.

This avoids the need to query the device tree for the snps,reset GPIO,
which is not part of the Tegra EQOS device tree bindings. This quiesces
an error message from the generic bus reset code if it doesn't find the
snps,reset related delays.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 3 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c       | 4 +++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c        | 1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c   | 8 +++++++-
 include/linux/stmmac.h                                  | 1 +
 5 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 3a14cdd01f5f..66933332c68e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -333,6 +333,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
 	usleep_range(2000, 4000);
 	gpiod_set_value(eqos->reset, 0);
 
+	/* MDIO bus was already reset just above */
+	data->mdio_bus_data->needs_reset = false;
+
 	eqos->rst = devm_reset_control_get(&pdev->dev, "eqos");
 	if (IS_ERR(eqos->rst)) {
 		err = PTR_ERR(eqos->rst);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 4304c1abc5d1..40c42637ad75 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -348,7 +348,9 @@ int stmmac_mdio_register(struct net_device *ndev)
 		max_addr = PHY_MAX_ADDR;
 	}
 
-	new_bus->reset = &stmmac_mdio_reset;
+	if (mdio_bus_data->needs_reset)
+		new_bus->reset = &stmmac_mdio_reset;
+
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
 		 new_bus->name, priv->plat->bus_id);
 	new_bus->priv = ndev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 86f9c07a38cf..d5d08e11c353 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -63,6 +63,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 	plat->has_gmac = 1;
 	plat->force_sf_dma_mode = 1;
 
+	plat->mdio_bus_data->needs_reset = true;
 	plat->mdio_bus_data->phy_mask = 0;
 
 	/* Set default value for multicast hash bins */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 73fc2524372e..333b09564b88 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -342,10 +342,16 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 		mdio = true;
 	}
 
-	if (mdio)
+	if (mdio) {
 		plat->mdio_bus_data =
 			devm_kzalloc(dev, sizeof(struct stmmac_mdio_bus_data),
 				     GFP_KERNEL);
+		if (!plat->mdio_bus_data)
+			return -ENOMEM;
+
+		plat->mdio_bus_data->needs_reset = true;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 7d06241582dd..7b3e354bcd3c 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -81,6 +81,7 @@ struct stmmac_mdio_bus_data {
 	unsigned int phy_mask;
 	int *irqs;
 	int probed_phy_irq;
+	bool needs_reset;
 };
 
 struct stmmac_dma_cfg {
-- 
cgit v1.2.3


From a090965b882333500d8780e2c1762e17782f413f Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@st.com>
Date: Thu, 18 Jul 2019 15:53:43 -0700
Subject: iio:common:st_sensors: add st_sensors_get_settings_index() helper
 function

Extract from st_sensors_check_device_support() function the code that
is used to get the specific settings for a device. This will be used
as generic extractor by each ST driver.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/common/st_sensors/st_sensors_core.c | 49 +++++++++++++++++--------
 include/linux/iio/common/st_sensors.h           |  4 ++
 2 files changed, 38 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 8b22dc241482..3610ca9eaa87 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -633,28 +633,47 @@ static int st_sensors_init_interface_mode(struct iio_dev *indio_dev,
 	return 0;
 }
 
+/*
+ * st_sensors_get_settings_index() - get index of the sensor settings for a
+ *				     specific device from list of settings
+ * @name: device name buffer reference.
+ * @list: sensor settings list.
+ * @list_length: length of sensor settings list.
+ *
+ * Return: non negative number on success (valid index),
+ *	   negative error code otherwise.
+ */
+int st_sensors_get_settings_index(const char *name,
+				  const struct st_sensor_settings *list,
+				  const int list_length)
+{
+	int i, n;
+
+	for (i = 0; i < list_length; i++) {
+		for (n = 0; n < ST_SENSORS_MAX_4WAI; n++) {
+			if (strcmp(name, list[i].sensors_supported[n]) == 0)
+				return i;
+		}
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(st_sensors_get_settings_index);
+
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
 			int num_sensors_list,
 			const struct st_sensor_settings *sensor_settings)
 {
-	int i, n, err = 0;
-	u8 wai;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	int i, err;
+	u8 wai;
 
-	for (i = 0; i < num_sensors_list; i++) {
-		for (n = 0; n < ST_SENSORS_MAX_4WAI; n++) {
-			if (strcmp(indio_dev->name,
-				sensor_settings[i].sensors_supported[n]) == 0) {
-				break;
-			}
-		}
-		if (n < ST_SENSORS_MAX_4WAI)
-			break;
-	}
-	if (i == num_sensors_list) {
+	i = st_sensors_get_settings_index(indio_dev->name,
+					  sensor_settings, num_sensors_list);
+	if (i < 0) {
 		dev_err(&indio_dev->dev, "device name %s not recognized.\n",
-							indio_dev->name);
-		return -ENODEV;
+			indio_dev->name);
+		return i;
 	}
 
 	err = st_sensors_init_interface_mode(indio_dev, &sensor_settings[i]);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 2948ac99e2da..17fbf3e9b013 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -334,6 +334,10 @@ int st_sensors_set_fullscale_by_gain(struct iio_dev *indio_dev, int scale);
 int st_sensors_read_info_raw(struct iio_dev *indio_dev,
 				struct iio_chan_spec const *ch, int *val);
 
+int st_sensors_get_settings_index(const char *name,
+				  const struct st_sensor_settings *list,
+				  const int list_length);
+
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
 	int num_sensors_list, const struct st_sensor_settings *sensor_settings);
 
-- 
cgit v1.2.3


From 1ecd245e0eb23d1c3803474eba75589743d0d1fe Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@st.com>
Date: Thu, 18 Jul 2019 15:53:52 -0700
Subject: iio: move 3-wire spi initialization to st_sensors_spi

Some devices need to be configured with special bit in order to
use spi 3-wire. This was done during device identification phase.
Instead, let's move this part as spi specific.
Doing this the check_device_support function becomes a simple
device id check, so let's rename it.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c               |  4 +-
 drivers/iio/accel/st_accel_spi.c                |  4 +-
 drivers/iio/common/st_sensors/st_sensors_core.c | 64 ++++++------------------
 drivers/iio/common/st_sensors/st_sensors_spi.c  | 65 ++++++++++++++++++++++++-
 drivers/iio/gyro/st_gyro_core.c                 |  4 +-
 drivers/iio/gyro/st_gyro_spi.c                  |  4 +-
 drivers/iio/magnetometer/st_magn_core.c         |  4 +-
 drivers/iio/magnetometer/st_magn_spi.c          |  4 +-
 drivers/iio/pressure/st_pressure_core.c         |  4 +-
 drivers/iio/pressure/st_pressure_spi.c          |  4 +-
 include/linux/iio/common/st_sensors.h           |  3 +-
 include/linux/iio/common/st_sensors_spi.h       |  4 +-
 12 files changed, 97 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 6fc490ffef7e..630909702a19 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1183,9 +1183,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	if (err)
 		return err;
 
-	err = st_sensors_check_device_support(indio_dev,
-					ARRAY_SIZE(st_accel_sensors_settings),
-					st_accel_sensors_settings);
+	err = st_sensors_verify_id(indio_dev);
 	if (err < 0)
 		goto st_accel_power_off;
 
diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c
index c0556db9d60a..8af7027d5598 100644
--- a/drivers/iio/accel/st_accel_spi.c
+++ b/drivers/iio/accel/st_accel_spi.c
@@ -124,7 +124,9 @@ static int st_accel_spi_probe(struct spi_device *spi)
 	adata = iio_priv(indio_dev);
 	adata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_spi_configure(indio_dev, spi, adata);
+	err = st_sensors_spi_configure(indio_dev, spi);
+	if (err < 0)
+		return err;
 
 	err = st_accel_common_probe(indio_dev);
 	if (err < 0)
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 3610ca9eaa87..f05bf7cf0594 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -608,31 +608,6 @@ out:
 }
 EXPORT_SYMBOL(st_sensors_read_info_raw);
 
-static int st_sensors_init_interface_mode(struct iio_dev *indio_dev,
-			const struct st_sensor_settings *sensor_settings)
-{
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	struct device_node *np = sdata->dev->of_node;
-	struct st_sensors_platform_data *pdata;
-
-	pdata = (struct st_sensors_platform_data *)sdata->dev->platform_data;
-	if (((np && of_property_read_bool(np, "spi-3wire")) ||
-	     (pdata && pdata->spi_3wire)) && sensor_settings->sim.addr) {
-		int err;
-
-		err = sdata->tf->write_byte(&sdata->tb, sdata->dev,
-					    sensor_settings->sim.addr,
-					    sensor_settings->sim.value);
-		if (err < 0) {
-			dev_err(&indio_dev->dev,
-				"failed to init interface mode\n");
-			return err;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * st_sensors_get_settings_index() - get index of the sensor settings for a
  *				     specific device from list of settings
@@ -660,36 +635,30 @@ int st_sensors_get_settings_index(const char *name,
 }
 EXPORT_SYMBOL(st_sensors_get_settings_index);
 
-int st_sensors_check_device_support(struct iio_dev *indio_dev,
-			int num_sensors_list,
-			const struct st_sensor_settings *sensor_settings)
+/*
+ * st_sensors_verify_id() - verify sensor ID (WhoAmI) is matching with the
+ *			    expected value
+ * @indio_dev: IIO device reference.
+ *
+ * Return: 0 on success (valid sensor ID), else a negative error code.
+ */
+int st_sensors_verify_id(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	int i, err;
+	int err;
 	u8 wai;
 
-	i = st_sensors_get_settings_index(indio_dev->name,
-					  sensor_settings, num_sensors_list);
-	if (i < 0) {
-		dev_err(&indio_dev->dev, "device name %s not recognized.\n",
-			indio_dev->name);
-		return i;
-	}
-
-	err = st_sensors_init_interface_mode(indio_dev, &sensor_settings[i]);
-	if (err < 0)
-		return err;
-
-	if (sensor_settings[i].wai_addr) {
+	if (sdata->sensor_settings->wai_addr) {
 		err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-					   sensor_settings[i].wai_addr, &wai);
+					   sdata->sensor_settings->wai_addr,
+					   &wai);
 		if (err < 0) {
 			dev_err(&indio_dev->dev,
 				"failed to read Who-Am-I register.\n");
 			return err;
 		}
 
-		if (sensor_settings[i].wai != wai) {
+		if (sdata->sensor_settings->wai != wai) {
 			dev_err(&indio_dev->dev,
 				"%s: WhoAmI mismatch (0x%x).\n",
 				indio_dev->name, wai);
@@ -697,12 +666,9 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev,
 		}
 	}
 
-	sdata->sensor_settings =
-			(struct st_sensor_settings *)&sensor_settings[i];
-
-	return i;
+	return 0;
 }
-EXPORT_SYMBOL(st_sensors_check_device_support);
+EXPORT_SYMBOL(st_sensors_verify_id);
 
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf)
diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c
index 2213843f02cb..a57cd648975c 100644
--- a/drivers/iio/common/st_sensors/st_sensors_spi.c
+++ b/drivers/iio/common/st_sensors/st_sensors_spi.c
@@ -102,9 +102,68 @@ static const struct st_sensor_transfer_function st_sensors_tf_spi = {
 	.read_multiple_byte = st_sensors_spi_read_multiple_byte,
 };
 
-void st_sensors_spi_configure(struct iio_dev *indio_dev,
-			struct spi_device *spi, struct st_sensor_data *sdata)
+/*
+ * st_sensors_is_spi_3_wire() - check if SPI 3-wire mode has been selected
+ * @spi: spi device reference.
+ *
+ * Return: true if SPI 3-wire mode is selected, false otherwise.
+ */
+static bool st_sensors_is_spi_3_wire(struct spi_device *spi)
+{
+	struct device_node *np = spi->dev.of_node;
+	struct st_sensors_platform_data *pdata;
+
+	pdata = (struct st_sensors_platform_data *)spi->dev.platform_data;
+	if ((np && of_property_read_bool(np, "spi-3wire")) ||
+	    (pdata && pdata->spi_3wire)) {
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * st_sensors_configure_spi_3_wire() - configure SPI 3-wire if needed
+ * @spi: spi device reference.
+ * @settings: sensor specific settings reference.
+ *
+ * Return: 0 on success, else a negative error code.
+ */
+static int st_sensors_configure_spi_3_wire(struct spi_device *spi,
+					   struct st_sensor_settings *settings)
+{
+	if (settings->sim.addr) {
+		u8 buffer[] = {
+			settings->sim.addr,
+			settings->sim.value
+		};
+
+		return spi_write(spi, buffer, 2);
+	}
+
+	return 0;
+}
+
+/*
+ * st_sensors_spi_configure() - configure SPI interface
+ * @indio_dev: IIO device reference.
+ * @spi: spi device reference.
+ *
+ * Return: 0 on success, else a negative error code.
+ */
+int st_sensors_spi_configure(struct iio_dev *indio_dev,
+			     struct spi_device *spi)
 {
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	int err;
+
+	if (st_sensors_is_spi_3_wire(spi)) {
+		err = st_sensors_configure_spi_3_wire(spi,
+						      sdata->sensor_settings);
+		if (err < 0)
+			return err;
+	}
+
 	spi_set_drvdata(spi, indio_dev);
 
 	indio_dev->dev.parent = &spi->dev;
@@ -113,6 +172,8 @@ void st_sensors_spi_configure(struct iio_dev *indio_dev,
 	sdata->dev = &spi->dev;
 	sdata->tf = &st_sensors_tf_spi;
 	sdata->get_irq_data_ready = st_sensors_spi_get_irq;
+
+	return 0;
 }
 EXPORT_SYMBOL(st_sensors_spi_configure);
 
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 5cc63d41d855..4b87e79aa744 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -400,9 +400,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	if (err)
 		return err;
 
-	err = st_sensors_check_device_support(indio_dev,
-					ARRAY_SIZE(st_gyro_sensors_settings),
-					st_gyro_sensors_settings);
+	err = st_sensors_verify_id(indio_dev);
 	if (err < 0)
 		goto st_gyro_power_off;
 
diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c
index bb7082055f85..b5c624251231 100644
--- a/drivers/iio/gyro/st_gyro_spi.c
+++ b/drivers/iio/gyro/st_gyro_spi.c
@@ -91,7 +91,9 @@ static int st_gyro_spi_probe(struct spi_device *spi)
 	gdata = iio_priv(indio_dev);
 	gdata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_spi_configure(indio_dev, spi, gdata);
+	err = st_sensors_spi_configure(indio_dev, spi);
+	if (err < 0)
+		return err;
 
 	err = st_gyro_common_probe(indio_dev);
 	if (err < 0)
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 43a49a52c81a..3f313aefece6 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -502,9 +502,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 	if (err)
 		return err;
 
-	err = st_sensors_check_device_support(indio_dev,
-					ARRAY_SIZE(st_magn_sensors_settings),
-					st_magn_sensors_settings);
+	err = st_sensors_verify_id(indio_dev);
 	if (err < 0)
 		goto st_magn_power_off;
 
diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c
index a3045afc6b53..fbf909bde841 100644
--- a/drivers/iio/magnetometer/st_magn_spi.c
+++ b/drivers/iio/magnetometer/st_magn_spi.c
@@ -73,7 +73,9 @@ static int st_magn_spi_probe(struct spi_device *spi)
 	mdata = iio_priv(indio_dev);
 	mdata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_spi_configure(indio_dev, spi, mdata);
+	err = st_sensors_spi_configure(indio_dev, spi);
+	if (err < 0)
+		return err;
 
 	err = st_magn_common_probe(indio_dev);
 	if (err < 0)
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 35d80ff27464..a783fc075c26 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -698,9 +698,7 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	if (err)
 		return err;
 
-	err = st_sensors_check_device_support(indio_dev,
-					ARRAY_SIZE(st_press_sensors_settings),
-					st_press_sensors_settings);
+	err = st_sensors_verify_id(indio_dev);
 	if (err < 0)
 		goto st_press_power_off;
 
diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c
index 3e8c1ffe001e..7c8b70221e70 100644
--- a/drivers/iio/pressure/st_pressure_spi.c
+++ b/drivers/iio/pressure/st_pressure_spi.c
@@ -83,7 +83,9 @@ static int st_press_spi_probe(struct spi_device *spi)
 	press_data = iio_priv(indio_dev);
 	press_data->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_spi_configure(indio_dev, spi, press_data);
+	err = st_sensors_spi_configure(indio_dev, spi);
+	if (err < 0)
+		return err;
 
 	err = st_press_common_probe(indio_dev);
 	if (err < 0)
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 17fbf3e9b013..566b955e2980 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -338,8 +338,7 @@ int st_sensors_get_settings_index(const char *name,
 				  const struct st_sensor_settings *list,
 				  const int list_length);
 
-int st_sensors_check_device_support(struct iio_dev *indio_dev,
-	int num_sensors_list, const struct st_sensor_settings *sensor_settings);
+int st_sensors_verify_id(struct iio_dev *indio_dev);
 
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf);
diff --git a/include/linux/iio/common/st_sensors_spi.h b/include/linux/iio/common/st_sensors_spi.h
index 6020f7167859..90b25f087f06 100644
--- a/include/linux/iio/common/st_sensors_spi.h
+++ b/include/linux/iio/common/st_sensors_spi.h
@@ -13,7 +13,7 @@
 #include <linux/spi/spi.h>
 #include <linux/iio/common/st_sensors.h>
 
-void st_sensors_spi_configure(struct iio_dev *indio_dev,
-			struct spi_device *spi, struct st_sensor_data *sdata);
+int st_sensors_spi_configure(struct iio_dev *indio_dev,
+			     struct spi_device *spi);
 
 #endif /* ST_SENSORS_SPI_H */
-- 
cgit v1.2.3


From 062809ef7733209312562e87cefc84a470430929 Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@st.com>
Date: Thu, 18 Jul 2019 15:53:53 -0700
Subject: iio: make st_sensors drivers use regmap

This patch is meant to replace the i2c/spi transfer functions with
regmap. SPI framework requires DMA safe buffers so let's add GFP_DMA
flag for memory allocation used by bulk_read functions.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_buffer.c                |   3 +-
 drivers/iio/accel/st_accel_core.c                  |   3 -
 drivers/iio/accel/st_accel_i2c.c                   |   4 +-
 drivers/iio/common/st_sensors/st_sensors_buffer.c  |  10 +--
 drivers/iio/common/st_sensors/st_sensors_core.c    |  39 +++-----
 drivers/iio/common/st_sensors/st_sensors_i2c.c     |  73 ++++++++-------
 drivers/iio/common/st_sensors/st_sensors_spi.c     | 100 +++++----------------
 drivers/iio/common/st_sensors/st_sensors_trigger.c |  10 +--
 drivers/iio/gyro/st_gyro_buffer.c                  |   3 +-
 drivers/iio/gyro/st_gyro_core.c                    |   3 -
 drivers/iio/gyro/st_gyro_i2c.c                     |   4 +-
 drivers/iio/magnetometer/st_magn_buffer.c          |   3 +-
 drivers/iio/magnetometer/st_magn_core.c            |   3 -
 drivers/iio/magnetometer/st_magn_i2c.c             |   4 +-
 drivers/iio/pressure/st_pressure_buffer.c          |   3 +-
 drivers/iio/pressure/st_pressure_core.c            |   3 -
 drivers/iio/pressure/st_pressure_i2c.c             |   4 +-
 include/linux/iio/common/st_sensors.h              |  40 +--------
 include/linux/iio/common/st_sensors_i2c.h          |   4 +-
 19 files changed, 104 insertions(+), 212 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c
index 0205c0167cdd..05f9aea431e2 100644
--- a/drivers/iio/accel/st_accel_buffer.c
+++ b/drivers/iio/accel/st_accel_buffer.c
@@ -39,7 +39,8 @@ static int st_accel_buffer_postenable(struct iio_dev *indio_dev)
 	int err;
 	struct st_sensor_data *adata = iio_priv(indio_dev);
 
-	adata->buffer_data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
+	adata->buffer_data = kmalloc(indio_dev->scan_bytes,
+				     GFP_DMA | GFP_KERNEL);
 	if (adata->buffer_data == NULL) {
 		err = -ENOMEM;
 		goto allocate_memory_error;
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 630909702a19..0b17004cb12e 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -13,7 +13,6 @@
 #include <linux/acpi.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
@@ -1177,7 +1176,6 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &accel_info;
-	mutex_init(&adata->tb.buf_lock);
 
 	err = st_sensors_power_enable(indio_dev);
 	if (err)
@@ -1188,7 +1186,6 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 		goto st_accel_power_off;
 
 	adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS;
-	adata->multiread_bit = adata->sensor_settings->multi_read_bit;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
 	channels_size = indio_dev->num_channels * sizeof(struct iio_chan_spec);
diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index a92cf776031e..50fa0fc32baa 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -174,7 +174,9 @@ static int st_accel_i2c_probe(struct i2c_client *client)
 	adata = iio_priv(indio_dev);
 	adata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_i2c_configure(indio_dev, client, adata);
+	ret = st_sensors_i2c_configure(indio_dev, client);
+	if (ret < 0)
+		return ret;
 
 	ret = st_accel_common_probe(indio_dev);
 	if (ret < 0)
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index 4a68669dc555..eee30130ae23 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -17,15 +17,16 @@
 #include <linux/iio/trigger_consumer.h>
 #include <linux/iio/triggered_buffer.h>
 #include <linux/irqreturn.h>
+#include <linux/regmap.h>
 
 #include <linux/iio/common/st_sensors.h>
 
 
 static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
 {
-	int i;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 	unsigned int num_data_channels = sdata->num_data_channels;
+	int i;
 
 	for_each_set_bit(i, indio_dev->active_scan_mask, num_data_channels) {
 		const struct iio_chan_spec *channel = &indio_dev->channels[i];
@@ -36,11 +37,8 @@ static int st_sensors_get_buffer_element(struct iio_dev *indio_dev, u8 *buf)
 			channel->scan_type.storagebits >> 3;
 
 		buf = PTR_ALIGN(buf, storage_bytes);
-		if (sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
-						  channel->address,
-						  bytes_to_read, buf,
-						  sdata->multiread_bit) <
-		    bytes_to_read)
+		if (regmap_bulk_read(sdata->regmap, channel->address,
+				     buf, bytes_to_read) < 0)
 			return -EIO;
 
 		/* Advance the buffer pointer */
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index f05bf7cf0594..4a3064fb6cd9 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -15,6 +15,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/regmap.h>
 #include <asm/unaligned.h>
 #include <linux/iio/common/st_sensors.h>
 
@@ -28,19 +29,10 @@ static inline u32 st_sensors_get_unaligned_le24(const u8 *p)
 int st_sensors_write_data_with_mask(struct iio_dev *indio_dev,
 				    u8 reg_addr, u8 mask, u8 data)
 {
-	int err;
-	u8 new_data;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
-	err = sdata->tf->read_byte(&sdata->tb, sdata->dev, reg_addr, &new_data);
-	if (err < 0)
-		goto st_sensors_write_data_with_mask_error;
-
-	new_data = ((new_data & (~mask)) | ((data << __ffs(mask)) & mask));
-	err = sdata->tf->write_byte(&sdata->tb, sdata->dev, reg_addr, new_data);
-
-st_sensors_write_data_with_mask_error:
-	return err;
+	return regmap_update_bits(sdata->regmap,
+				  reg_addr, mask, data << __ffs(mask));
 }
 
 int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev,
@@ -48,19 +40,15 @@ int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev,
 				  unsigned *readval)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	u8 readdata;
 	int err;
 
 	if (!readval)
-		return sdata->tf->write_byte(&sdata->tb, sdata->dev,
-					     (u8)reg, (u8)writeval);
+		return regmap_write(sdata->regmap, reg, writeval);
 
-	err = sdata->tf->read_byte(&sdata->tb, sdata->dev, (u8)reg, &readdata);
+	err = regmap_read(sdata->regmap, reg, readval);
 	if (err < 0)
 		return err;
 
-	*readval = (unsigned)readdata;
-
 	return 0;
 }
 EXPORT_SYMBOL(st_sensors_debugfs_reg_access);
@@ -545,7 +533,7 @@ st_sensors_match_scale_error:
 EXPORT_SYMBOL(st_sensors_set_fullscale_by_gain);
 
 static int st_sensors_read_axis_data(struct iio_dev *indio_dev,
-				struct iio_chan_spec const *ch, int *data)
+				     struct iio_chan_spec const *ch, int *data)
 {
 	int err;
 	u8 *outdata;
@@ -554,13 +542,12 @@ static int st_sensors_read_axis_data(struct iio_dev *indio_dev,
 
 	byte_for_channel = DIV_ROUND_UP(ch->scan_type.realbits +
 					ch->scan_type.shift, 8);
-	outdata = kmalloc(byte_for_channel, GFP_KERNEL);
+	outdata = kmalloc(byte_for_channel, GFP_DMA | GFP_KERNEL);
 	if (!outdata)
 		return -ENOMEM;
 
-	err = sdata->tf->read_multiple_byte(&sdata->tb, sdata->dev,
-				ch->address, byte_for_channel,
-				outdata, sdata->multiread_bit);
+	err = regmap_bulk_read(sdata->regmap, ch->address,
+			       outdata, byte_for_channel);
 	if (err < 0)
 		goto st_sensors_free_memory;
 
@@ -645,13 +632,11 @@ EXPORT_SYMBOL(st_sensors_get_settings_index);
 int st_sensors_verify_id(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
-	int err;
-	u8 wai;
+	int wai, err;
 
 	if (sdata->sensor_settings->wai_addr) {
-		err = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-					   sdata->sensor_settings->wai_addr,
-					   &wai);
+		err = regmap_read(sdata->regmap,
+				  sdata->sensor_settings->wai_addr, &wai);
 		if (err < 0) {
 			dev_err(&indio_dev->dev,
 				"failed to read Who-Am-I register.\n");
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
index b1c9812407e7..9240625534df 100644
--- a/drivers/iio/common/st_sensors/st_sensors_i2c.c
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -13,6 +13,7 @@
 #include <linux/iio/iio.h>
 #include <linux/of_device.h>
 #include <linux/acpi.h>
+#include <linux/regmap.h>
 
 #include <linux/iio/common/st_sensors_i2c.h>
 
@@ -26,55 +27,51 @@ static unsigned int st_sensors_i2c_get_irq(struct iio_dev *indio_dev)
 	return to_i2c_client(sdata->dev)->irq;
 }
 
-static int st_sensors_i2c_read_byte(struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 *res_byte)
-{
-	int err;
-
-	err = i2c_smbus_read_byte_data(to_i2c_client(dev), reg_addr);
-	if (err < 0)
-		goto st_accel_i2c_read_byte_error;
-
-	*res_byte = err & 0xff;
-
-st_accel_i2c_read_byte_error:
-	return err < 0 ? err : 0;
-}
-
-static int st_sensors_i2c_read_multiple_byte(
-		struct st_sensor_transfer_buffer *tb, struct device *dev,
-			u8 reg_addr, int len, u8 *data, bool multiread_bit)
-{
-	if (multiread_bit)
-		reg_addr |= ST_SENSORS_I2C_MULTIREAD;
-
-	return i2c_smbus_read_i2c_block_data_or_emulated(to_i2c_client(dev),
-							 reg_addr, len, data);
-}
-
-static int st_sensors_i2c_write_byte(struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 data)
-{
-	return i2c_smbus_write_byte_data(to_i2c_client(dev), reg_addr, data);
-}
+static const struct regmap_config st_sensors_i2c_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
 
-static const struct st_sensor_transfer_function st_sensors_tf_i2c = {
-	.read_byte = st_sensors_i2c_read_byte,
-	.write_byte = st_sensors_i2c_write_byte,
-	.read_multiple_byte = st_sensors_i2c_read_multiple_byte,
+static const struct regmap_config st_sensors_i2c_regmap_multiread_bit_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.read_flag_mask = ST_SENSORS_I2C_MULTIREAD,
 };
 
-void st_sensors_i2c_configure(struct iio_dev *indio_dev,
-		struct i2c_client *client, struct st_sensor_data *sdata)
+/*
+ * st_sensors_i2c_configure() - configure I2C interface
+ * @indio_dev: IIO device reference.
+ * @client: i2c client reference.
+ *
+ * Return: 0 on success, else a negative error code.
+ */
+int st_sensors_i2c_configure(struct iio_dev *indio_dev,
+			     struct i2c_client *client)
 {
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	const struct regmap_config *config;
+
+	if (sdata->sensor_settings->multi_read_bit)
+		config = &st_sensors_i2c_regmap_multiread_bit_config;
+	else
+		config = &st_sensors_i2c_regmap_config;
+
+	sdata->regmap = devm_regmap_init_i2c(client, config);
+	if (IS_ERR(sdata->regmap)) {
+		dev_err(&client->dev, "Failed to register i2c regmap (%d)\n",
+			(int)PTR_ERR(sdata->regmap));
+		return PTR_ERR(sdata->regmap);
+	}
+
 	i2c_set_clientdata(client, indio_dev);
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->name = client->name;
 
 	sdata->dev = &client->dev;
-	sdata->tf = &st_sensors_tf_i2c;
 	sdata->get_irq_data_ready = st_sensors_i2c_get_irq;
+
+	return 0;
 }
 EXPORT_SYMBOL(st_sensors_i2c_configure);
 
diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c
index a57cd648975c..9c0661a283d0 100644
--- a/drivers/iio/common/st_sensors/st_sensors_spi.c
+++ b/drivers/iio/common/st_sensors/st_sensors_spi.c
@@ -11,12 +11,12 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/iio/iio.h>
+#include <linux/regmap.h>
 
 #include <linux/iio/common/st_sensors_spi.h>
-
+#include "st_sensors_core.h"
 
 #define ST_SENSORS_SPI_MULTIREAD	0xc0
-#define ST_SENSORS_SPI_READ		0x80
 
 static unsigned int st_sensors_spi_get_irq(struct iio_dev *indio_dev)
 {
@@ -25,81 +25,15 @@ static unsigned int st_sensors_spi_get_irq(struct iio_dev *indio_dev)
 	return to_spi_device(sdata->dev)->irq;
 }
 
-static int st_sensors_spi_read(struct st_sensor_transfer_buffer *tb,
-	struct device *dev, u8 reg_addr, int len, u8 *data, bool multiread_bit)
-{
-	int err;
-
-	struct spi_transfer xfers[] = {
-		{
-			.tx_buf = tb->tx_buf,
-			.bits_per_word = 8,
-			.len = 1,
-		},
-		{
-			.rx_buf = tb->rx_buf,
-			.bits_per_word = 8,
-			.len = len,
-		}
-	};
-
-	mutex_lock(&tb->buf_lock);
-	if ((multiread_bit) && (len > 1))
-		tb->tx_buf[0] = reg_addr | ST_SENSORS_SPI_MULTIREAD;
-	else
-		tb->tx_buf[0] = reg_addr | ST_SENSORS_SPI_READ;
-
-	err = spi_sync_transfer(to_spi_device(dev), xfers, ARRAY_SIZE(xfers));
-	if (err)
-		goto acc_spi_read_error;
-
-	memcpy(data, tb->rx_buf, len);
-	mutex_unlock(&tb->buf_lock);
-	return len;
-
-acc_spi_read_error:
-	mutex_unlock(&tb->buf_lock);
-	return err;
-}
-
-static int st_sensors_spi_read_byte(struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 *res_byte)
-{
-	return st_sensors_spi_read(tb, dev, reg_addr, 1, res_byte, false);
-}
-
-static int st_sensors_spi_read_multiple_byte(
-	struct st_sensor_transfer_buffer *tb, struct device *dev,
-			u8 reg_addr, int len, u8 *data, bool multiread_bit)
-{
-	return st_sensors_spi_read(tb, dev, reg_addr, len, data, multiread_bit);
-}
-
-static int st_sensors_spi_write_byte(struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 data)
-{
-	int err;
-
-	struct spi_transfer xfers = {
-		.tx_buf = tb->tx_buf,
-		.bits_per_word = 8,
-		.len = 2,
-	};
-
-	mutex_lock(&tb->buf_lock);
-	tb->tx_buf[0] = reg_addr;
-	tb->tx_buf[1] = data;
-
-	err = spi_sync_transfer(to_spi_device(dev), &xfers, 1);
-	mutex_unlock(&tb->buf_lock);
-
-	return err;
-}
+static const struct regmap_config st_sensors_spi_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+};
 
-static const struct st_sensor_transfer_function st_sensors_tf_spi = {
-	.read_byte = st_sensors_spi_read_byte,
-	.write_byte = st_sensors_spi_write_byte,
-	.read_multiple_byte = st_sensors_spi_read_multiple_byte,
+static const struct regmap_config st_sensors_spi_regmap_multiread_bit_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.read_flag_mask = ST_SENSORS_SPI_MULTIREAD,
 };
 
 /*
@@ -155,6 +89,7 @@ int st_sensors_spi_configure(struct iio_dev *indio_dev,
 			     struct spi_device *spi)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	const struct regmap_config *config;
 	int err;
 
 	if (st_sensors_is_spi_3_wire(spi)) {
@@ -164,13 +99,24 @@ int st_sensors_spi_configure(struct iio_dev *indio_dev,
 			return err;
 	}
 
+	if (sdata->sensor_settings->multi_read_bit)
+		config = &st_sensors_spi_regmap_multiread_bit_config;
+	else
+		config = &st_sensors_spi_regmap_config;
+
+	sdata->regmap = devm_regmap_init_spi(spi, config);
+	if (IS_ERR(sdata->regmap)) {
+		dev_err(&spi->dev, "Failed to register spi regmap (%d)\n",
+			(int)PTR_ERR(sdata->regmap));
+		return PTR_ERR(sdata->regmap);
+	}
+
 	spi_set_drvdata(spi, indio_dev);
 
 	indio_dev->dev.parent = &spi->dev;
 	indio_dev->name = spi->modalias;
 
 	sdata->dev = &spi->dev;
-	sdata->tf = &st_sensors_tf_spi;
 	sdata->get_irq_data_ready = st_sensors_spi_get_irq;
 
 	return 0;
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 630c8cb35e8b..bed7b8682b17 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -13,6 +13,7 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/trigger.h>
 #include <linux/interrupt.h>
+#include <linux/regmap.h>
 #include <linux/iio/common/st_sensors.h>
 #include "st_sensors_core.h"
 
@@ -26,8 +27,7 @@
 static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
 					    struct st_sensor_data *sdata)
 {
-	u8 status;
-	int ret;
+	int ret, status;
 
 	/* How would I know if I can't check it? */
 	if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr)
@@ -37,9 +37,9 @@ static int st_sensors_new_samples_available(struct iio_dev *indio_dev,
 	if (!indio_dev->active_scan_mask)
 		return 0;
 
-	ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-			sdata->sensor_settings->drdy_irq.stat_drdy.addr,
-			&status);
+	ret = regmap_read(sdata->regmap,
+			  sdata->sensor_settings->drdy_irq.stat_drdy.addr,
+			  &status);
 	if (ret < 0) {
 		dev_err(sdata->dev,
 			"error checking samples available\n");
diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c
index 6e362f735e92..21360681d4dd 100644
--- a/drivers/iio/gyro/st_gyro_buffer.c
+++ b/drivers/iio/gyro/st_gyro_buffer.c
@@ -39,7 +39,8 @@ static int st_gyro_buffer_postenable(struct iio_dev *indio_dev)
 	int err;
 	struct st_sensor_data *gdata = iio_priv(indio_dev);
 
-	gdata->buffer_data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
+	gdata->buffer_data = kmalloc(indio_dev->scan_bytes,
+				     GFP_DMA | GFP_KERNEL);
 	if (gdata->buffer_data == NULL) {
 		err = -ENOMEM;
 		goto allocate_memory_error;
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 4b87e79aa744..02e42c945181 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
@@ -394,7 +393,6 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &gyro_info;
-	mutex_init(&gdata->tb.buf_lock);
 
 	err = st_sensors_power_enable(indio_dev);
 	if (err)
@@ -405,7 +403,6 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 		goto st_gyro_power_off;
 
 	gdata->num_data_channels = ST_GYRO_NUMBER_DATA_CHANNELS;
-	gdata->multiread_bit = gdata->sensor_settings->multi_read_bit;
 	indio_dev->channels = gdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
index fa71e94b76f4..05a1a0874bd5 100644
--- a/drivers/iio/gyro/st_gyro_i2c.c
+++ b/drivers/iio/gyro/st_gyro_i2c.c
@@ -87,7 +87,9 @@ static int st_gyro_i2c_probe(struct i2c_client *client,
 	gdata = iio_priv(indio_dev);
 	gdata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_i2c_configure(indio_dev, client, gdata);
+	err = st_sensors_i2c_configure(indio_dev, client);
+	if (err < 0)
+		return err;
 
 	err = st_gyro_common_probe(indio_dev);
 	if (err < 0)
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index 11d7806655bc..9dba93539a99 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -34,7 +34,8 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
 	int err;
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
 
-	mdata->buffer_data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
+	mdata->buffer_data = kmalloc(indio_dev->scan_bytes,
+				     GFP_DMA | GFP_KERNEL);
 	if (mdata->buffer_data == NULL) {
 		err = -ENOMEM;
 		goto allocate_memory_error;
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 3f313aefece6..804353a483c7 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
@@ -496,7 +495,6 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &magn_info;
-	mutex_init(&mdata->tb.buf_lock);
 
 	err = st_sensors_power_enable(indio_dev);
 	if (err)
@@ -507,7 +505,6 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 		goto st_magn_power_off;
 
 	mdata->num_data_channels = ST_MAGN_NUMBER_DATA_CHANNELS;
-	mdata->multiread_bit = mdata->sensor_settings->multi_read_bit;
 	indio_dev->channels = mdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
index d5d565639bed..fdba480a12be 100644
--- a/drivers/iio/magnetometer/st_magn_i2c.c
+++ b/drivers/iio/magnetometer/st_magn_i2c.c
@@ -79,7 +79,9 @@ static int st_magn_i2c_probe(struct i2c_client *client,
 	mdata = iio_priv(indio_dev);
 	mdata->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_i2c_configure(indio_dev, client, mdata);
+	err = st_sensors_i2c_configure(indio_dev, client);
+	if (err < 0)
+		return err;
 
 	err = st_magn_common_probe(indio_dev);
 	if (err < 0)
diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c
index 4566e08a64a1..f21b630abaa0 100644
--- a/drivers/iio/pressure/st_pressure_buffer.c
+++ b/drivers/iio/pressure/st_pressure_buffer.c
@@ -39,7 +39,8 @@ static int st_press_buffer_postenable(struct iio_dev *indio_dev)
 	int err;
 	struct st_sensor_data *press_data = iio_priv(indio_dev);
 
-	press_data->buffer_data = kmalloc(indio_dev->scan_bytes, GFP_KERNEL);
+	press_data->buffer_data = kmalloc(indio_dev->scan_bytes,
+					  GFP_DMA | GFP_KERNEL);
 	if (press_data->buffer_data == NULL) {
 		err = -ENOMEM;
 		goto allocate_memory_error;
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index a783fc075c26..9ef92a501286 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -12,7 +12,6 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/i2c.h>
 #include <linux/gpio.h>
@@ -692,7 +691,6 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &press_info;
-	mutex_init(&press_data->tb.buf_lock);
 
 	err = st_sensors_power_enable(indio_dev);
 	if (err)
@@ -709,7 +707,6 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	 * element.
 	 */
 	press_data->num_data_channels = press_data->sensor_settings->num_ch - 1;
-	press_data->multiread_bit = press_data->sensor_settings->multi_read_bit;
 	indio_dev->channels = press_data->sensor_settings->ch;
 	indio_dev->num_channels = press_data->sensor_settings->num_ch;
 
diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index 466e7dde5eae..71d2ed6b4948 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -112,7 +112,9 @@ static int st_press_i2c_probe(struct i2c_client *client,
 	press_data = iio_priv(indio_dev);
 	press_data->sensor_settings = (struct st_sensor_settings *)settings;
 
-	st_sensors_i2c_configure(indio_dev, client, press_data);
+	ret = st_sensors_i2c_configure(indio_dev, client);
+	if (ret < 0)
+		return ret;
 
 	ret = st_press_common_probe(indio_dev);
 	if (ret < 0)
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 566b955e2980..28fc1f9fa7d5 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -16,6 +16,7 @@
 #include <linux/iio/trigger.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
+#include <linux/regmap.h>
 
 #include <linux/platform_data/st_sensors_pdata.h>
 
@@ -169,36 +170,6 @@ struct st_sensor_data_ready_irq {
 	} ig1;
 };
 
-/**
- * struct st_sensor_transfer_buffer - ST sensor device I/O buffer
- * @buf_lock: Mutex to protect rx and tx buffers.
- * @tx_buf: Buffer used by SPI transfer function to send data to the sensors.
- *	This buffer is used to avoid DMA not-aligned issue.
- * @rx_buf: Buffer used by SPI transfer to receive data from sensors.
- *	This buffer is used to avoid DMA not-aligned issue.
- */
-struct st_sensor_transfer_buffer {
-	struct mutex buf_lock;
-	u8 rx_buf[ST_SENSORS_RX_MAX_LENGTH];
-	u8 tx_buf[ST_SENSORS_TX_MAX_LENGTH] ____cacheline_aligned;
-};
-
-/**
- * struct st_sensor_transfer_function - ST sensor device I/O function
- * @read_byte: Function used to read one byte.
- * @write_byte: Function used to write one byte.
- * @read_multiple_byte: Function used to read multiple byte.
- */
-struct st_sensor_transfer_function {
-	int (*read_byte) (struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 *res_byte);
-	int (*write_byte) (struct st_sensor_transfer_buffer *tb,
-				struct device *dev, u8 reg_addr, u8 data);
-	int (*read_multiple_byte) (struct st_sensor_transfer_buffer *tb,
-		struct device *dev, u8 reg_addr, int len, u8 *data,
-							bool multiread_bit);
-};
-
 /**
  * struct st_sensor_settings - ST specific sensor settings
  * @wai: Contents of WhoAmI register.
@@ -242,16 +213,14 @@ struct st_sensor_settings {
  * @current_fullscale: Maximum range of measure by the sensor.
  * @vdd: Pointer to sensor's Vdd power supply
  * @vdd_io: Pointer to sensor's Vdd-IO power supply
+ * @regmap: Pointer to specific sensor regmap configuration.
  * @enabled: Status of the sensor (false->off, true->on).
- * @multiread_bit: Use or not particular bit for [I2C/SPI] multiread.
  * @buffer_data: Data used by buffer part.
  * @odr: Output data rate of the sensor [Hz].
  * num_data_channels: Number of data channels used in buffer.
  * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2).
  * @int_pin_open_drain: Set the interrupt/DRDY to open drain.
  * @get_irq_data_ready: Function to get the IRQ used for data ready signal.
- * @tf: Transfer function structure used by I/O operations.
- * @tb: Transfer buffers and mutex used by I/O operations.
  * @edge_irq: the IRQ triggers on edges and need special handling.
  * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
  * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
@@ -264,9 +233,9 @@ struct st_sensor_data {
 	struct st_sensor_fullscale_avl *current_fullscale;
 	struct regulator *vdd;
 	struct regulator *vdd_io;
+	struct regmap *regmap;
 
 	bool enabled;
-	bool multiread_bit;
 
 	char *buffer_data;
 
@@ -278,9 +247,6 @@ struct st_sensor_data {
 
 	unsigned int (*get_irq_data_ready) (struct iio_dev *indio_dev);
 
-	const struct st_sensor_transfer_function *tf;
-	struct st_sensor_transfer_buffer tb;
-
 	bool edge_irq;
 	bool hw_irq_trigger;
 	s64 hw_timestamp;
diff --git a/include/linux/iio/common/st_sensors_i2c.h b/include/linux/iio/common/st_sensors_i2c.h
index 5ada89944698..01e424e2af4f 100644
--- a/include/linux/iio/common/st_sensors_i2c.h
+++ b/include/linux/iio/common/st_sensors_i2c.h
@@ -14,8 +14,8 @@
 #include <linux/iio/common/st_sensors.h>
 #include <linux/of.h>
 
-void st_sensors_i2c_configure(struct iio_dev *indio_dev,
-		struct i2c_client *client, struct st_sensor_data *sdata);
+int st_sensors_i2c_configure(struct iio_dev *indio_dev,
+			     struct i2c_client *client);
 
 #ifdef CONFIG_ACPI
 int st_sensors_match_acpi_device(struct device *dev);
-- 
cgit v1.2.3


From dca39af8831e65adc13e0f9f8bdca3a746185072 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Tue, 23 Jul 2019 10:36:38 +0300
Subject: iio: imu: adis: Add support for SPI transfer cs_change_delay

The ADIS16460 requires a higher delay before the next transfer. Since the
SPI framework supports configuring the delay before the next transfer, this
driver will become the first user of it.

The support for this functionality in ADIS16460 requires an addition to the
ADIS lib to support the `cs_change_delay` functionality from the SPI
subsystem.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/adis.c       | 12 ++++++++++++
 include/linux/iio/imu/adis.h |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index 30281e91dbf9..1631c255deab 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -39,18 +39,24 @@ int adis_write_reg(struct adis *adis, unsigned int reg,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->write_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.tx_buf = adis->tx + 2,
 			.bits_per_word = 8,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->write_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.tx_buf = adis->tx + 4,
 			.bits_per_word = 8,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->write_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.tx_buf = adis->tx + 6,
 			.bits_per_word = 8,
@@ -133,12 +139,16 @@ int adis_read_reg(struct adis *adis, unsigned int reg,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->write_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.tx_buf = adis->tx + 2,
 			.bits_per_word = 8,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->read_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.tx_buf = adis->tx + 4,
 			.rx_buf = adis->rx,
@@ -146,6 +156,8 @@ int adis_read_reg(struct adis *adis, unsigned int reg,
 			.len = 2,
 			.cs_change = 1,
 			.delay_usecs = adis->data->read_delay,
+			.cs_change_delay = adis->data->cs_change_delay,
+			.cs_change_delay_unit = SPI_DELAY_UNIT_USECS,
 		}, {
 			.rx_buf = adis->rx + 2,
 			.bits_per_word = 8,
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 3428d06b2f44..4c53815bb729 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -26,6 +26,7 @@ struct adis_burst;
  * struct adis_data - ADIS chip variant specific data
  * @read_delay: SPI delay for read operations in us
  * @write_delay: SPI delay for write operations in us
+ * @cs_change_delay: SPI delay between CS changes in us
  * @glob_cmd_reg: Register address of the GLOB_CMD register
  * @msc_ctrl_reg: Register address of the MSC_CTRL register
  * @diag_stat_reg: Register address of the DIAG_STAT register
@@ -35,6 +36,7 @@ struct adis_burst;
 struct adis_data {
 	unsigned int read_delay;
 	unsigned int write_delay;
+	unsigned int cs_change_delay;
 
 	unsigned int glob_cmd_reg;
 	unsigned int msc_ctrl_reg;
-- 
cgit v1.2.3


From 12bf745c9afb6755acba186091bcbb61229f4165 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Mon, 15 Jul 2019 16:14:51 -0700
Subject: iio: cros_ec: Add sign vector in core for backward compatibility

To allow cros_ec iio core library to be used with legacy device, add a
vector to rotate sensor data if necessary: legacy devices are not
reporting data in HTML5/Android sensor referential.

Check the data is not rotated on recent chromebooks that use the HTML5
standard to present sensor data.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c | 4 ++++
 include/linux/iio/common/cros_ec_sensors_core.h           | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index d02660b1f0fe..3b491cf7be95 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -101,6 +101,9 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
 		}
 		state->type = state->resp->info.type;
 		state->loc = state->resp->info.location;
+
+		/* Set sign vector, only used for backward compatibility. */
+		memset(state->sign, 1, CROS_EC_SENSOR_MAX_AXIS);
 	}
 
 	return 0;
@@ -303,6 +306,7 @@ static int cros_ec_sensors_read_data_unsafe(struct iio_dev *indio_dev,
 		if (ret < 0)
 			return ret;
 
+		*data *= st->sign[i];
 		data++;
 	}
 
diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h
index bb03a252bd04..6f13c73bac3d 100644
--- a/include/linux/iio/common/cros_ec_sensors_core.h
+++ b/include/linux/iio/common/cros_ec_sensors_core.h
@@ -66,7 +66,7 @@ struct cros_ec_sensors_core_state {
 		s16 offset;
 		u16 scale;
 	} calib[CROS_EC_SENSOR_MAX_AXIS];
-
+	s8 sign[CROS_EC_SENSOR_MAX_AXIS];
 	u8 samples[CROS_EC_SAMPLE_SIZE];
 
 	int (*read_ec_sensors_data)(struct iio_dev *indio_dev,
-- 
cgit v1.2.3


From ae7b02ad2f32d39d1434655f346c04a16c1aa703 Mon Sep 17 00:00:00 2001
From: Fabien Lahoudere <fabien.lahoudere@collabora.com>
Date: Tue, 16 Jul 2019 11:11:06 +0200
Subject: iio: common: cros_ec_sensors: Expose cros_ec_sensors frequency range
 via iio sysfs

Embedded controller return minimum and maximum frequencies, unfortunately
we have no way to know the step for all available frequencies.
Even if not complete, we can return a list of known values using the
standard read_avail callback (IIO_CHAN_INFO_SAMP_FREQ) to provide them to
userland.

Now cros_ec_* sensors provides frequencies values in sysfs like this:
"0 min max". 0 is always true to disable the sensor.

Default frequencies are provided for earlier protocol.

Signed-off-by: Nick Vaccaro <nvaccaro@chromium.org>
Signed-off-by: Fabien Lahoudere <fabien.lahoudere@collabora.com>
[rebased on top of iio/testing and solved conflicts]
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../iio/common/cros_ec_sensors/cros_ec_sensors.c   |  3 +
 .../common/cros_ec_sensors/cros_ec_sensors_core.c  | 65 ++++++++++++++++++++++
 drivers/iio/light/cros_ec_light_prox.c             |  3 +
 include/linux/iio/common/cros_ec_sensors_core.h    | 21 +++++++
 4 files changed, 92 insertions(+)

(limited to 'include')

diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index 2af09606c438..6a4919cd83c3 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -215,6 +215,7 @@ static int cros_ec_sensors_write(struct iio_dev *indio_dev,
 static const struct iio_info ec_sensors_info = {
 	.read_raw = &cros_ec_sensors_read,
 	.write_raw = &cros_ec_sensors_write,
+	.read_avail = &cros_ec_sensors_core_read_avail,
 };
 
 static int cros_ec_sensors_probe(struct platform_device *pdev)
@@ -252,6 +253,8 @@ static int cros_ec_sensors_probe(struct platform_device *pdev)
 			BIT(IIO_CHAN_INFO_SCALE) |
 			BIT(IIO_CHAN_INFO_FREQUENCY) |
 			BIT(IIO_CHAN_INFO_SAMP_FREQ);
+		channel->info_mask_shared_by_all_available =
+			BIT(IIO_CHAN_INFO_SAMP_FREQ);
 		channel->scan_type.realbits = CROS_EC_SENSOR_BITS;
 		channel->scan_type.storagebits = CROS_EC_SENSOR_BITS;
 		channel->scan_index = i;
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index 3b491cf7be95..fd833295bb17 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -50,6 +50,37 @@ static int cros_ec_get_host_cmd_version_mask(struct cros_ec_device *ec_dev,
 	return ret;
 }
 
+static void get_default_min_max_freq(enum motionsensor_type type,
+				     u32 *min_freq,
+				     u32 *max_freq)
+{
+	switch (type) {
+	case MOTIONSENSE_TYPE_ACCEL:
+	case MOTIONSENSE_TYPE_GYRO:
+		*min_freq = 12500;
+		*max_freq = 100000;
+		break;
+	case MOTIONSENSE_TYPE_MAG:
+		*min_freq = 5000;
+		*max_freq = 25000;
+		break;
+	case MOTIONSENSE_TYPE_PROX:
+	case MOTIONSENSE_TYPE_LIGHT:
+		*min_freq = 100;
+		*max_freq = 50000;
+		break;
+	case MOTIONSENSE_TYPE_BARO:
+		*min_freq = 250;
+		*max_freq = 20000;
+		break;
+	case MOTIONSENSE_TYPE_ACTIVITY:
+	default:
+		*min_freq = 0;
+		*max_freq = 0;
+		break;
+	}
+}
+
 int cros_ec_sensors_core_init(struct platform_device *pdev,
 			      struct iio_dev *indio_dev,
 			      bool physical_device)
@@ -104,6 +135,19 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
 
 		/* Set sign vector, only used for backward compatibility. */
 		memset(state->sign, 1, CROS_EC_SENSOR_MAX_AXIS);
+
+		/* 0 is a correct value used to stop the device */
+		state->frequencies[0] = 0;
+		if (state->msg->version < 3) {
+			get_default_min_max_freq(state->resp->info.type,
+						 &state->frequencies[1],
+						 &state->frequencies[2]);
+		} else {
+			state->frequencies[1] =
+			    state->resp->info_3.min_frequency;
+			state->frequencies[2] =
+			    state->resp->info_3.max_frequency;
+		}
 	}
 
 	return 0;
@@ -471,6 +515,27 @@ int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st,
 }
 EXPORT_SYMBOL_GPL(cros_ec_sensors_core_read);
 
+int cros_ec_sensors_core_read_avail(struct iio_dev *indio_dev,
+				    struct iio_chan_spec const *chan,
+				    const int **vals,
+				    int *type,
+				    int *length,
+				    long mask)
+{
+	struct cros_ec_sensors_core_state *state = iio_priv(indio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*length = ARRAY_SIZE(state->frequencies);
+		*vals = (const int *)&state->frequencies;
+		*type = IIO_VAL_INT;
+		return IIO_AVAIL_LIST;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(cros_ec_sensors_core_read_avail);
+
 int cros_ec_sensors_core_write(struct cros_ec_sensors_core_state *st,
 			       struct iio_chan_spec const *chan,
 			       int val, int val2, long mask)
diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c
index 965f346d7d64..44313a009928 100644
--- a/drivers/iio/light/cros_ec_light_prox.c
+++ b/drivers/iio/light/cros_ec_light_prox.c
@@ -162,6 +162,7 @@ static int cros_ec_light_prox_write(struct iio_dev *indio_dev,
 static const struct iio_info cros_ec_light_prox_info = {
 	.read_raw = &cros_ec_light_prox_read,
 	.write_raw = &cros_ec_light_prox_write,
+	.read_avail = &cros_ec_sensors_core_read_avail,
 };
 
 static int cros_ec_light_prox_probe(struct platform_device *pdev)
@@ -196,6 +197,8 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev)
 	channel->info_mask_shared_by_all =
 		BIT(IIO_CHAN_INFO_SAMP_FREQ) |
 		BIT(IIO_CHAN_INFO_FREQUENCY);
+	channel->info_mask_shared_by_all_available =
+		BIT(IIO_CHAN_INFO_SAMP_FREQ);
 	channel->scan_type.realbits = CROS_EC_SENSOR_BITS;
 	channel->scan_type.storagebits = CROS_EC_SENSOR_BITS;
 	channel->scan_type.shift = 0;
diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h
index 6f13c73bac3d..ea1f50ce2e49 100644
--- a/include/linux/iio/common/cros_ec_sensors_core.h
+++ b/include/linux/iio/common/cros_ec_sensors_core.h
@@ -73,6 +73,9 @@ struct cros_ec_sensors_core_state {
 				    unsigned long scan_mask, s16 *data);
 
 	int curr_sampl_freq;
+
+	/* Table of known available frequencies : 0, Min and Max in mHz */
+	int frequencies[3];
 };
 
 /**
@@ -153,6 +156,24 @@ int cros_ec_sensors_core_read(struct cros_ec_sensors_core_state *st,
 			      struct iio_chan_spec const *chan,
 			      int *val, int *val2, long mask);
 
+/**
+ * cros_ec_sensors_core_read_avail() - get available values
+ * @indio_dev:		pointer to state information for device
+ * @chan:	channel specification structure table
+ * @vals:	list of available values
+ * @type:	type of data returned
+ * @length:	number of data returned in the array
+ * @mask:	specifies which values to be requested
+ *
+ * Return:	an error code, IIO_AVAIL_RANGE or IIO_AVAIL_LIST
+ */
+int cros_ec_sensors_core_read_avail(struct iio_dev *indio_dev,
+				    struct iio_chan_spec const *chan,
+				    const int **vals,
+				    int *type,
+				    int *length,
+				    long mask);
+
 /**
  * cros_ec_sensors_core_write() - function to write a value to the sensor
  * @st:		pointer to state information for device
-- 
cgit v1.2.3


From ffe0bbabb0cffceceae07484fde1ec2a63b1537c Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 8 Jul 2019 10:23:43 +0200
Subject: gpio: don't WARN() on NULL descs if gpiolib is disabled

If gpiolib is disabled, we use the inline stubs from gpio/consumer.h
instead of regular definitions of GPIO API. The stubs for 'optional'
variants of gpiod_get routines return NULL in this case as if the
relevant GPIO wasn't found. This is correct so far.

Calling other (non-gpio_get) stubs from this header triggers a warning
because the GPIO descriptor couldn't have been requested. The warning
however is unconditional (WARN_ON(1)) and is emitted even if the passed
descriptor pointer is NULL.

We don't want to force the users of 'optional' gpio_get to check the
returned pointer before calling e.g. gpiod_set_value() so let's only
WARN on non-NULL descriptors.

Cc: stable@vger.kernel.org
Reported-by: Claus H. Stovgaard <cst@phaseone.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/linux/gpio/consumer.h | 64 +++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 9ddcf50a3c59..a7f08fb0f865 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -247,7 +247,7 @@ static inline void gpiod_put(struct gpio_desc *desc)
 	might_sleep();
 
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 
 static inline void devm_gpiod_unhinge(struct device *dev,
@@ -256,7 +256,7 @@ static inline void devm_gpiod_unhinge(struct device *dev,
 	might_sleep();
 
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 
 static inline void gpiod_put_array(struct gpio_descs *descs)
@@ -264,7 +264,7 @@ static inline void gpiod_put_array(struct gpio_descs *descs)
 	might_sleep();
 
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(descs);
 }
 
 static inline struct gpio_desc *__must_check
@@ -317,7 +317,7 @@ static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
 	might_sleep();
 
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 
 static inline void devm_gpiod_put_array(struct device *dev,
@@ -326,32 +326,32 @@ static inline void devm_gpiod_put_array(struct device *dev,
 	might_sleep();
 
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(descs);
 }
 
 
 static inline int gpiod_get_direction(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 static inline int gpiod_direction_input(struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 static inline int gpiod_direction_output(struct gpio_desc *desc, int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 
@@ -359,7 +359,7 @@ static inline int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
 static inline int gpiod_get_value(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 static inline int gpiod_get_array_value(unsigned int array_size,
@@ -368,13 +368,13 @@ static inline int gpiod_get_array_value(unsigned int array_size,
 					unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 static inline int gpiod_set_array_value(unsigned int array_size,
 					struct gpio_desc **desc_array,
@@ -382,13 +382,13 @@ static inline int gpiod_set_array_value(unsigned int array_size,
 					unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 static inline int gpiod_get_raw_array_value(unsigned int array_size,
@@ -397,13 +397,13 @@ static inline int gpiod_get_raw_array_value(unsigned int array_size,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 static inline int gpiod_set_raw_array_value(unsigned int array_size,
 					    struct gpio_desc **desc_array,
@@ -411,14 +411,14 @@ static inline int gpiod_set_raw_array_value(unsigned int array_size,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 
 static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 static inline int gpiod_get_array_value_cansleep(unsigned int array_size,
@@ -427,13 +427,13 @@ static inline int gpiod_get_array_value_cansleep(unsigned int array_size,
 				     unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 static inline int gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
@@ -441,13 +441,13 @@ static inline int gpiod_set_array_value_cansleep(unsigned int array_size,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
@@ -456,14 +456,14 @@ static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 					       unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
 						int value)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 }
 static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
@@ -471,41 +471,41 @@ static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc_array);
 	return 0;
 }
 
 static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 
 static inline int gpiod_set_transitory(struct gpio_desc *desc, bool transitory)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -ENOSYS;
 }
 
 static inline int gpiod_is_active_low(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 static inline int gpiod_cansleep(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return 0;
 }
 
 static inline int gpiod_to_irq(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -EINVAL;
 }
 
@@ -513,7 +513,7 @@ static inline int gpiod_set_consumer_name(struct gpio_desc *desc,
 					  const char *name)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -EINVAL;
 }
 
@@ -525,7 +525,7 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio)
 static inline int desc_to_gpio(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
-	WARN_ON(1);
+	WARN_ON(desc);
 	return -EINVAL;
 }
 
-- 
cgit v1.2.3


From 085771ec14b9bdb843fe9283d4703ced395d1b0b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:20 -0700
Subject: fs-verity: add UAPI header

Add the UAPI header for fs-verity, including two ioctls:

- FS_IOC_ENABLE_VERITY
- FS_IOC_MEASURE_VERITY

These ioctls are documented in the "User API" section of
Documentation/filesystems/fsverity.rst.

Examples of using these ioctls can be found in fsverity-utils
(https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/fsverity-utils.git).

I've also written xfstests that test these ioctls
(https://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/xfstests-dev.git/log/?h=fsverity).

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/ioctl/ioctl-number.rst |  1 +
 include/uapi/linux/fsverity.h        | 39 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 include/uapi/linux/fsverity.h

(limited to 'include')

diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
index 7f8dcae7a230..bef79cd4c6b4 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -233,6 +233,7 @@ Code  Seq#    Include File                                           Comments
 'f'   00-0F  fs/ext4/ext4.h                                          conflict!
 'f'   00-0F  linux/fs.h                                              conflict!
 'f'   00-0F  fs/ocfs2/ocfs2_fs.h                                     conflict!
+'f'   81-8F  linux/fsverity.h
 'g'   00-0F  linux/usb/gadgetfs.h
 'g'   20-2F  linux/usb/g_printer.h
 'h'   00-7F                                                          conflict! Charon filesystem
diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h
new file mode 100644
index 000000000000..57d1d7fc0c34
--- /dev/null
+++ b/include/uapi/linux/fsverity.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * fs-verity user API
+ *
+ * These ioctls can be used on filesystems that support fs-verity.  See the
+ * "User API" section of Documentation/filesystems/fsverity.rst.
+ *
+ * Copyright 2019 Google LLC
+ */
+#ifndef _UAPI_LINUX_FSVERITY_H
+#define _UAPI_LINUX_FSVERITY_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define FS_VERITY_HASH_ALG_SHA256	1
+
+struct fsverity_enable_arg {
+	__u32 version;
+	__u32 hash_algorithm;
+	__u32 block_size;
+	__u32 salt_size;
+	__u64 salt_ptr;
+	__u32 sig_size;
+	__u32 __reserved1;
+	__u64 sig_ptr;
+	__u64 __reserved2[11];
+};
+
+struct fsverity_digest {
+	__u16 digest_algorithm;
+	__u16 digest_size; /* input/output */
+	__u8 digest[];
+};
+
+#define FS_IOC_ENABLE_VERITY	_IOW('f', 133, struct fsverity_enable_arg)
+#define FS_IOC_MEASURE_VERITY	_IOWR('f', 134, struct fsverity_digest)
+
+#endif /* _UAPI_LINUX_FSVERITY_H */
-- 
cgit v1.2.3


From fe9918d3b228b3e8c726849d1486933f46b9069e Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:21 -0700
Subject: fs: uapi: define verity bit for FS_IOC_GETFLAGS

Add FS_VERITY_FL to the flags for FS_IOC_GETFLAGS, so that applications
can easily determine whether a file is a verity file at the same time as
they're checking other file flags.  This flag will be gettable only;
FS_IOC_SETFLAGS won't allow setting it, since an ioctl must be used
instead to provide more parameters.

This flag matches the on-disk bit that was already allocated for ext4.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/uapi/linux/fs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 59c71fa8c553..df261b7e0587 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -306,6 +306,7 @@ struct fscrypt_key {
 #define FS_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define FS_HUGE_FILE_FL			0x00040000 /* Reserved for ext4 */
 #define FS_EXTENT_FL			0x00080000 /* Extents */
+#define FS_VERITY_FL			0x00100000 /* Verity protected inode */
 #define FS_EA_INODE_FL			0x00200000 /* Inode used for large EA */
 #define FS_EOFBLOCKS_FL			0x00400000 /* Reserved for ext4 */
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
-- 
cgit v1.2.3


From 5585f2af737ae3d7454cb0ae77b995cd3ac7e43c Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:21 -0700
Subject: fs-verity: add inode and superblock fields

Analogous to fs/crypto/, add fields to the VFS inode and superblock for
use by the fs/verity/ support layer:

- ->s_vop: points to the fsverity_operations if the filesystem supports
  fs-verity, otherwise is NULL.

- ->i_verity_info: points to cached fs-verity information for the inode
  after someone opens it, otherwise is NULL.

- S_VERITY: bit in ->i_flags that identifies verity inodes, even when
  they haven't been opened yet and thus still have NULL ->i_verity_info.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/fs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56b8e358af5c..b3a0f5bfb06d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -64,6 +64,8 @@ struct workqueue_struct;
 struct iov_iter;
 struct fscrypt_info;
 struct fscrypt_operations;
+struct fsverity_info;
+struct fsverity_operations;
 struct fs_context;
 struct fs_parameter_description;
 
@@ -723,6 +725,10 @@ struct inode {
 	struct fscrypt_info	*i_crypt_info;
 #endif
 
+#ifdef CONFIG_FS_VERITY
+	struct fsverity_info	*i_verity_info;
+#endif
+
 	void			*i_private; /* fs or device private pointer */
 } __randomize_layout;
 
@@ -1427,6 +1433,9 @@ struct super_block {
 	const struct xattr_handler **s_xattr;
 #ifdef CONFIG_FS_ENCRYPTION
 	const struct fscrypt_operations	*s_cop;
+#endif
+#ifdef CONFIG_FS_VERITY
+	const struct fsverity_operations *s_vop;
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
@@ -1965,6 +1974,7 @@ struct super_operations {
 #endif
 #define S_ENCRYPTED	16384	/* Encrypted file (using fs/crypto/) */
 #define S_CASEFOLD	32768	/* Casefolded file */
+#define S_VERITY	65536	/* Verity file (using fs/verity/) */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -2006,6 +2016,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
 #define IS_DAX(inode)		((inode)->i_flags & S_DAX)
 #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
 #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
+#define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
 
 #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
 				 (inode)->i_rdev == WHITEOUT_DEV)
-- 
cgit v1.2.3


From fd2d1acfcadfe2e42567afaec5e989b38061a7d2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:22 -0700
Subject: fs-verity: add the hook for file ->open()

Add the fsverity_file_open() function, which prepares an fs-verity file
to be read from.  If not already done, it loads the fs-verity descriptor
from the filesystem and sets up an fsverity_info structure for the inode
which describes the Merkle tree and contains the file measurement.  It
also denies all attempts to open verity files for writing.

This commit also begins the include/linux/fsverity.h header, which
declares the interface between fs/verity/ and filesystems.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/Makefile           |   3 +-
 fs/verity/fsverity_private.h |  54 +++++++-
 fs/verity/init.c             |   6 +
 fs/verity/open.c             | 318 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsverity.h     |  71 ++++++++++
 5 files changed, 449 insertions(+), 3 deletions(-)
 create mode 100644 fs/verity/open.c
 create mode 100644 include/linux/fsverity.h

(limited to 'include')

diff --git a/fs/verity/Makefile b/fs/verity/Makefile
index 398f3f85fa18..e6a8951c493a 100644
--- a/fs/verity/Makefile
+++ b/fs/verity/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_FS_VERITY) += hash_algs.o \
-			   init.o
+			   init.o \
+			   open.o
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index 9697aaebb5dc..c79746ff335e 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -15,8 +15,7 @@
 #define pr_fmt(fmt) "fs-verity: " fmt
 
 #include <crypto/sha.h>
-#include <linux/fs.h>
-#include <uapi/linux/fsverity.h>
+#include <linux/fsverity.h>
 
 struct ahash_request;
 
@@ -59,6 +58,40 @@ struct merkle_tree_params {
 	u64 level_start[FS_VERITY_MAX_LEVELS];
 };
 
+/**
+ * fsverity_info - cached verity metadata for an inode
+ *
+ * When a verity file is first opened, an instance of this struct is allocated
+ * and stored in ->i_verity_info; it remains until the inode is evicted.  It
+ * caches information about the Merkle tree that's needed to efficiently verify
+ * data read from the file.  It also caches the file measurement.  The Merkle
+ * tree pages themselves are not cached here, but the filesystem may cache them.
+ */
+struct fsverity_info {
+	struct merkle_tree_params tree_params;
+	u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE];
+	u8 measurement[FS_VERITY_MAX_DIGEST_SIZE];
+	const struct inode *inode;
+};
+
+/*
+ * Merkle tree properties.  The file measurement is the hash of this structure.
+ */
+struct fsverity_descriptor {
+	__u8 version;		/* must be 1 */
+	__u8 hash_algorithm;	/* Merkle tree hash algorithm */
+	__u8 log_blocksize;	/* log2 of size of data and tree blocks */
+	__u8 salt_size;		/* size of salt in bytes; 0 if none */
+	__le32 sig_size;	/* reserved, must be 0 */
+	__le64 data_size;	/* size of file the Merkle tree is built over */
+	__u8 root_hash[64];	/* Merkle tree root hash */
+	__u8 salt[32];		/* salt prepended to each hashed block */
+	__u8 __reserved[144];	/* must be 0's */
+};
+
+/* Arbitrary limit to bound the kmalloc() size.  Can be changed. */
+#define FS_VERITY_MAX_DESCRIPTOR_SIZE	16384
+
 /* hash_algs.c */
 
 extern struct fsverity_hash_alg fsverity_hash_algs[];
@@ -85,4 +118,21 @@ fsverity_msg(const struct inode *inode, const char *level,
 #define fsverity_err(inode, fmt, ...)		\
 	fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__)
 
+/* open.c */
+
+int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
+				     const struct inode *inode,
+				     unsigned int hash_algorithm,
+				     unsigned int log_blocksize,
+				     const u8 *salt, size_t salt_size);
+
+struct fsverity_info *fsverity_create_info(const struct inode *inode,
+					   const void *desc, size_t desc_size);
+
+void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
+
+void fsverity_free_info(struct fsverity_info *vi);
+
+int __init fsverity_init_info_cache(void);
+
 #endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/init.c b/fs/verity/init.c
index 40076bbe452a..fff1fd634335 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -33,8 +33,14 @@ void fsverity_msg(const struct inode *inode, const char *level,
 
 static int __init fsverity_init(void)
 {
+	int err;
+
 	fsverity_check_hash_algs();
 
+	err = fsverity_init_info_cache();
+	if (err)
+		return err;
+
 	pr_debug("Initialized fs-verity\n");
 	return 0;
 }
diff --git a/fs/verity/open.c b/fs/verity/open.c
new file mode 100644
index 000000000000..8013f77f907e
--- /dev/null
+++ b/fs/verity/open.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/open.c: opening fs-verity files
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/slab.h>
+
+static struct kmem_cache *fsverity_info_cachep;
+
+/**
+ * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters
+ * @params: the parameters struct to initialize
+ * @inode: the inode for which the Merkle tree is being built
+ * @hash_algorithm: number of hash algorithm to use
+ * @log_blocksize: log base 2 of block size to use
+ * @salt: pointer to salt (optional)
+ * @salt_size: size of salt, possibly 0
+ *
+ * Validate the hash algorithm and block size, then compute the tree topology
+ * (num levels, num blocks in each level, etc.) and initialize @params.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_init_merkle_tree_params(struct merkle_tree_params *params,
+				     const struct inode *inode,
+				     unsigned int hash_algorithm,
+				     unsigned int log_blocksize,
+				     const u8 *salt, size_t salt_size)
+{
+	const struct fsverity_hash_alg *hash_alg;
+	int err;
+	u64 blocks;
+	u64 offset;
+	int level;
+
+	memset(params, 0, sizeof(*params));
+
+	hash_alg = fsverity_get_hash_alg(inode, hash_algorithm);
+	if (IS_ERR(hash_alg))
+		return PTR_ERR(hash_alg);
+	params->hash_alg = hash_alg;
+	params->digest_size = hash_alg->digest_size;
+
+	params->hashstate = fsverity_prepare_hash_state(hash_alg, salt,
+							salt_size);
+	if (IS_ERR(params->hashstate)) {
+		err = PTR_ERR(params->hashstate);
+		params->hashstate = NULL;
+		fsverity_err(inode, "Error %d preparing hash state", err);
+		goto out_err;
+	}
+
+	if (log_blocksize != PAGE_SHIFT) {
+		fsverity_warn(inode, "Unsupported log_blocksize: %u",
+			      log_blocksize);
+		err = -EINVAL;
+		goto out_err;
+	}
+	params->log_blocksize = log_blocksize;
+	params->block_size = 1 << log_blocksize;
+
+	if (WARN_ON(!is_power_of_2(params->digest_size))) {
+		err = -EINVAL;
+		goto out_err;
+	}
+	if (params->block_size < 2 * params->digest_size) {
+		fsverity_warn(inode,
+			      "Merkle tree block size (%u) too small for hash algorithm \"%s\"",
+			      params->block_size, hash_alg->name);
+		err = -EINVAL;
+		goto out_err;
+	}
+	params->log_arity = params->log_blocksize - ilog2(params->digest_size);
+	params->hashes_per_block = 1 << params->log_arity;
+
+	pr_debug("Merkle tree uses %s with %u-byte blocks (%u hashes/block), salt=%*phN\n",
+		 hash_alg->name, params->block_size, params->hashes_per_block,
+		 (int)salt_size, salt);
+
+	/*
+	 * Compute the number of levels in the Merkle tree and create a map from
+	 * level to the starting block of that level.  Level 'num_levels - 1' is
+	 * the root and is stored first.  Level 0 is the level directly "above"
+	 * the data blocks and is stored last.
+	 */
+
+	/* Compute number of levels and the number of blocks in each level */
+	blocks = (inode->i_size + params->block_size - 1) >> log_blocksize;
+	pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks);
+	while (blocks > 1) {
+		if (params->num_levels >= FS_VERITY_MAX_LEVELS) {
+			fsverity_err(inode, "Too many levels in Merkle tree");
+			err = -EINVAL;
+			goto out_err;
+		}
+		blocks = (blocks + params->hashes_per_block - 1) >>
+			 params->log_arity;
+		/* temporarily using level_start[] to store blocks in level */
+		params->level_start[params->num_levels++] = blocks;
+	}
+
+	/* Compute the starting block of each level */
+	offset = 0;
+	for (level = (int)params->num_levels - 1; level >= 0; level--) {
+		blocks = params->level_start[level];
+		params->level_start[level] = offset;
+		pr_debug("Level %d is %llu blocks starting at index %llu\n",
+			 level, blocks, offset);
+		offset += blocks;
+	}
+
+	params->tree_size = offset << log_blocksize;
+	return 0;
+
+out_err:
+	kfree(params->hashstate);
+	memset(params, 0, sizeof(*params));
+	return err;
+}
+
+/* Compute the file measurement by hashing the fsverity_descriptor. */
+static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg,
+				    const struct fsverity_descriptor *desc,
+				    u8 *measurement)
+{
+	return fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), measurement);
+}
+
+/*
+ * Validate the given fsverity_descriptor and create a new fsverity_info from
+ * it.
+ */
+struct fsverity_info *fsverity_create_info(const struct inode *inode,
+					   const void *_desc, size_t desc_size)
+{
+	const struct fsverity_descriptor *desc = _desc;
+	struct fsverity_info *vi;
+	int err;
+
+	if (desc_size < sizeof(*desc)) {
+		fsverity_err(inode, "Unrecognized descriptor size: %zu bytes",
+			     desc_size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (desc->version != 1) {
+		fsverity_err(inode, "Unrecognized descriptor version: %u",
+			     desc->version);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (desc->sig_size ||
+	    memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) {
+		fsverity_err(inode, "Reserved bits set in descriptor");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (desc->salt_size > sizeof(desc->salt)) {
+		fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (le64_to_cpu(desc->data_size) != inode->i_size) {
+		fsverity_err(inode,
+			     "Wrong data_size: %llu (desc) != %lld (inode)",
+			     le64_to_cpu(desc->data_size), inode->i_size);
+		return ERR_PTR(-EINVAL);
+	}
+
+	vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL);
+	if (!vi)
+		return ERR_PTR(-ENOMEM);
+	vi->inode = inode;
+
+	err = fsverity_init_merkle_tree_params(&vi->tree_params, inode,
+					       desc->hash_algorithm,
+					       desc->log_blocksize,
+					       desc->salt, desc->salt_size);
+	if (err) {
+		fsverity_err(inode,
+			     "Error %d initializing Merkle tree parameters",
+			     err);
+		goto out;
+	}
+
+	memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size);
+
+	err = compute_file_measurement(vi->tree_params.hash_alg, desc,
+				       vi->measurement);
+	if (err) {
+		fsverity_err(inode, "Error %d computing file measurement", err);
+		goto out;
+	}
+	pr_debug("Computed file measurement: %s:%*phN\n",
+		 vi->tree_params.hash_alg->name,
+		 vi->tree_params.digest_size, vi->measurement);
+out:
+	if (err) {
+		fsverity_free_info(vi);
+		vi = ERR_PTR(err);
+	}
+	return vi;
+}
+
+void fsverity_set_info(struct inode *inode, struct fsverity_info *vi)
+{
+	/*
+	 * Multiple processes may race to set ->i_verity_info, so use cmpxchg.
+	 * This pairs with the READ_ONCE() in fsverity_get_info().
+	 */
+	if (cmpxchg(&inode->i_verity_info, NULL, vi) != NULL)
+		fsverity_free_info(vi);
+}
+
+void fsverity_free_info(struct fsverity_info *vi)
+{
+	if (!vi)
+		return;
+	kfree(vi->tree_params.hashstate);
+	kmem_cache_free(fsverity_info_cachep, vi);
+}
+
+/* Ensure the inode has an ->i_verity_info */
+static int ensure_verity_info(struct inode *inode)
+{
+	struct fsverity_info *vi = fsverity_get_info(inode);
+	struct fsverity_descriptor *desc;
+	int res;
+
+	if (vi)
+		return 0;
+
+	res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0);
+	if (res < 0) {
+		fsverity_err(inode,
+			     "Error %d getting verity descriptor size", res);
+		return res;
+	}
+	if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
+		fsverity_err(inode, "Verity descriptor is too large (%d bytes)",
+			     res);
+		return -EMSGSIZE;
+	}
+	desc = kmalloc(res, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+	res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res);
+	if (res < 0) {
+		fsverity_err(inode, "Error %d reading verity descriptor", res);
+		goto out_free_desc;
+	}
+
+	vi = fsverity_create_info(inode, desc, res);
+	if (IS_ERR(vi)) {
+		res = PTR_ERR(vi);
+		goto out_free_desc;
+	}
+
+	fsverity_set_info(inode, vi);
+	res = 0;
+out_free_desc:
+	kfree(desc);
+	return res;
+}
+
+/**
+ * fsverity_file_open() - prepare to open a verity file
+ * @inode: the inode being opened
+ * @filp: the struct file being set up
+ *
+ * When opening a verity file, deny the open if it is for writing.  Otherwise,
+ * set up the inode's ->i_verity_info if not already done.
+ *
+ * When combined with fscrypt, this must be called after fscrypt_file_open().
+ * Otherwise, we won't have the key set up to decrypt the verity metadata.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_file_open(struct inode *inode, struct file *filp)
+{
+	if (!IS_VERITY(inode))
+		return 0;
+
+	if (filp->f_mode & FMODE_WRITE) {
+		pr_debug("Denying opening verity file (ino %lu) for write\n",
+			 inode->i_ino);
+		return -EPERM;
+	}
+
+	return ensure_verity_info(inode);
+}
+EXPORT_SYMBOL_GPL(fsverity_file_open);
+
+/**
+ * fsverity_cleanup_inode() - free the inode's verity info, if present
+ *
+ * Filesystems must call this on inode eviction to free ->i_verity_info.
+ */
+void fsverity_cleanup_inode(struct inode *inode)
+{
+	fsverity_free_info(inode->i_verity_info);
+	inode->i_verity_info = NULL;
+}
+EXPORT_SYMBOL_GPL(fsverity_cleanup_inode);
+
+int __init fsverity_init_info_cache(void)
+{
+	fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
+						   SLAB_RECLAIM_ACCOUNT,
+						   measurement);
+	if (!fsverity_info_cachep)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
new file mode 100644
index 000000000000..09b04dab6452
--- /dev/null
+++ b/include/linux/fsverity.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * fs-verity: read-only file-based authenticity protection
+ *
+ * This header declares the interface between the fs/verity/ support layer and
+ * filesystems that support fs-verity.
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef _LINUX_FSVERITY_H
+#define _LINUX_FSVERITY_H
+
+#include <linux/fs.h>
+#include <uapi/linux/fsverity.h>
+
+/* Verity operations for filesystems */
+struct fsverity_operations {
+
+	/**
+	 * Get the verity descriptor of the given inode.
+	 *
+	 * @inode: an inode with the S_VERITY flag set
+	 * @buf: buffer in which to place the verity descriptor
+	 * @bufsize: size of @buf, or 0 to retrieve the size only
+	 *
+	 * If bufsize == 0, then the size of the verity descriptor is returned.
+	 * Otherwise the verity descriptor is written to 'buf' and its actual
+	 * size is returned; -ERANGE is returned if it's too large.  This may be
+	 * called by multiple processes concurrently on the same inode.
+	 *
+	 * Return: the size on success, -errno on failure
+	 */
+	int (*get_verity_descriptor)(struct inode *inode, void *buf,
+				     size_t bufsize);
+};
+
+#ifdef CONFIG_FS_VERITY
+
+static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
+{
+	/* pairs with the cmpxchg() in fsverity_set_info() */
+	return READ_ONCE(inode->i_verity_info);
+}
+
+/* open.c */
+
+extern int fsverity_file_open(struct inode *inode, struct file *filp);
+extern void fsverity_cleanup_inode(struct inode *inode);
+
+#else /* !CONFIG_FS_VERITY */
+
+static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
+{
+	return NULL;
+}
+
+/* open.c */
+
+static inline int fsverity_file_open(struct inode *inode, struct file *filp)
+{
+	return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
+}
+
+static inline void fsverity_cleanup_inode(struct inode *inode)
+{
+}
+
+#endif	/* !CONFIG_FS_VERITY */
+
+#endif	/* _LINUX_FSVERITY_H */
-- 
cgit v1.2.3


From c1d9b584e2cf3f0562d8fcf34574c044d17853a1 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:22 -0700
Subject: fs-verity: add the hook for file ->setattr()

Add a function fsverity_prepare_setattr() which filesystems that support
fs-verity must call to deny truncates of verity files.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/open.c         | 21 +++++++++++++++++++++
 include/linux/fsverity.h |  7 +++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/fs/verity/open.c b/fs/verity/open.c
index 8013f77f907e..2cb2fe8082bf 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -295,6 +295,27 @@ int fsverity_file_open(struct inode *inode, struct file *filp)
 }
 EXPORT_SYMBOL_GPL(fsverity_file_open);
 
+/**
+ * fsverity_prepare_setattr() - prepare to change a verity inode's attributes
+ * @dentry: dentry through which the inode is being changed
+ * @attr: attributes to change
+ *
+ * Verity files are immutable, so deny truncates.  This isn't covered by the
+ * open-time check because sys_truncate() takes a path, not a file descriptor.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	if (IS_VERITY(d_inode(dentry)) && (attr->ia_valid & ATTR_SIZE)) {
+		pr_debug("Denying truncate of verity file (ino %lu)\n",
+			 d_inode(dentry)->i_ino);
+		return -EPERM;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsverity_prepare_setattr);
+
 /**
  * fsverity_cleanup_inode() - free the inode's verity info, if present
  *
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 09b04dab6452..cbd0f84e1620 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -46,6 +46,7 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 /* open.c */
 
 extern int fsverity_file_open(struct inode *inode, struct file *filp);
+extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
 extern void fsverity_cleanup_inode(struct inode *inode);
 
 #else /* !CONFIG_FS_VERITY */
@@ -62,6 +63,12 @@ static inline int fsverity_file_open(struct inode *inode, struct file *filp)
 	return IS_VERITY(inode) ? -EOPNOTSUPP : 0;
 }
 
+static inline int fsverity_prepare_setattr(struct dentry *dentry,
+					   struct iattr *attr)
+{
+	return IS_VERITY(d_inode(dentry)) ? -EOPNOTSUPP : 0;
+}
+
 static inline void fsverity_cleanup_inode(struct inode *inode)
 {
 }
-- 
cgit v1.2.3


From 8a1d0f9cacc997bedc017056a94f35dc823394ed Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:22 -0700
Subject: fs-verity: add data verification hooks for ->readpages()

Add functions that verify data pages that have been read from a
fs-verity file, against that file's Merkle tree.  These will be called
from filesystems' ->readpage() and ->readpages() methods.

Since data verification can block, a workqueue is provided for these
methods to enqueue verification work from their bio completion callback.

See the "Verifying data" section of
Documentation/filesystems/fsverity.rst for more information.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/Makefile           |   3 +-
 fs/verity/fsverity_private.h |   5 +
 fs/verity/init.c             |   8 ++
 fs/verity/open.c             |   6 +
 fs/verity/verify.c           | 275 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsverity.h     |  56 +++++++++
 6 files changed, 352 insertions(+), 1 deletion(-)
 create mode 100644 fs/verity/verify.c

(limited to 'include')

diff --git a/fs/verity/Makefile b/fs/verity/Makefile
index e6a8951c493a..7fa628cd5eba 100644
--- a/fs/verity/Makefile
+++ b/fs/verity/Makefile
@@ -2,4 +2,5 @@
 
 obj-$(CONFIG_FS_VERITY) += hash_algs.o \
 			   init.o \
-			   open.o
+			   open.o \
+			   verify.o
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index c79746ff335e..eaa2b3b93bbf 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -134,5 +134,10 @@ void fsverity_set_info(struct inode *inode, struct fsverity_info *vi);
 void fsverity_free_info(struct fsverity_info *vi);
 
 int __init fsverity_init_info_cache(void);
+void __init fsverity_exit_info_cache(void);
+
+/* verify.c */
+
+int __init fsverity_init_workqueue(void);
 
 #endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/init.c b/fs/verity/init.c
index fff1fd634335..b593805aafcc 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -41,7 +41,15 @@ static int __init fsverity_init(void)
 	if (err)
 		return err;
 
+	err = fsverity_init_workqueue();
+	if (err)
+		goto err_exit_info_cache;
+
 	pr_debug("Initialized fs-verity\n");
 	return 0;
+
+err_exit_info_cache:
+	fsverity_exit_info_cache();
+	return err;
 }
 late_initcall(fsverity_init)
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 2cb2fe8082bf..3636a1ed8e2c 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -337,3 +337,9 @@ int __init fsverity_init_info_cache(void)
 		return -ENOMEM;
 	return 0;
 }
+
+void __init fsverity_exit_info_cache(void)
+{
+	kmem_cache_destroy(fsverity_info_cachep);
+	fsverity_info_cachep = NULL;
+}
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
new file mode 100644
index 000000000000..62ab8f6a8ea1
--- /dev/null
+++ b/fs/verity/verify.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/verify.c: data verification functions, i.e. hooks for ->readpages()
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <crypto/hash.h>
+#include <linux/bio.h>
+#include <linux/ratelimit.h>
+
+static struct workqueue_struct *fsverity_read_workqueue;
+
+/**
+ * hash_at_level() - compute the location of the block's hash at the given level
+ *
+ * @params:	(in) the Merkle tree parameters
+ * @dindex:	(in) the index of the data block being verified
+ * @level:	(in) the level of hash we want (0 is leaf level)
+ * @hindex:	(out) the index of the hash block containing the wanted hash
+ * @hoffset:	(out) the byte offset to the wanted hash within the hash block
+ */
+static void hash_at_level(const struct merkle_tree_params *params,
+			  pgoff_t dindex, unsigned int level, pgoff_t *hindex,
+			  unsigned int *hoffset)
+{
+	pgoff_t position;
+
+	/* Offset of the hash within the level's region, in hashes */
+	position = dindex >> (level * params->log_arity);
+
+	/* Index of the hash block in the tree overall */
+	*hindex = params->level_start[level] + (position >> params->log_arity);
+
+	/* Offset of the wanted hash (in bytes) within the hash block */
+	*hoffset = (position & ((1 << params->log_arity) - 1)) <<
+		   (params->log_blocksize - params->log_arity);
+}
+
+/* Extract a hash from a hash page */
+static void extract_hash(struct page *hpage, unsigned int hoffset,
+			 unsigned int hsize, u8 *out)
+{
+	void *virt = kmap_atomic(hpage);
+
+	memcpy(out, virt + hoffset, hsize);
+	kunmap_atomic(virt);
+}
+
+static inline int cmp_hashes(const struct fsverity_info *vi,
+			     const u8 *want_hash, const u8 *real_hash,
+			     pgoff_t index, int level)
+{
+	const unsigned int hsize = vi->tree_params.digest_size;
+
+	if (memcmp(want_hash, real_hash, hsize) == 0)
+		return 0;
+
+	fsverity_err(vi->inode,
+		     "FILE CORRUPTED! index=%lu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
+		     index, level,
+		     vi->tree_params.hash_alg->name, hsize, want_hash,
+		     vi->tree_params.hash_alg->name, hsize, real_hash);
+	return -EBADMSG;
+}
+
+/*
+ * Verify a single data page against the file's Merkle tree.
+ *
+ * In principle, we need to verify the entire path to the root node.  However,
+ * for efficiency the filesystem may cache the hash pages.  Therefore we need
+ * only ascend the tree until an already-verified page is seen, as indicated by
+ * the PageChecked bit being set; then verify the path to that page.
+ *
+ * This code currently only supports the case where the verity block size is
+ * equal to PAGE_SIZE.  Doing otherwise would be possible but tricky, since we
+ * wouldn't be able to use the PageChecked bit.
+ *
+ * Note that multiple processes may race to verify a hash page and mark it
+ * Checked, but it doesn't matter; the result will be the same either way.
+ *
+ * Return: true if the page is valid, else false.
+ */
+static bool verify_page(struct inode *inode, const struct fsverity_info *vi,
+			struct ahash_request *req, struct page *data_page)
+{
+	const struct merkle_tree_params *params = &vi->tree_params;
+	const unsigned int hsize = params->digest_size;
+	const pgoff_t index = data_page->index;
+	int level;
+	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
+	const u8 *want_hash;
+	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
+	struct page *hpages[FS_VERITY_MAX_LEVELS];
+	unsigned int hoffsets[FS_VERITY_MAX_LEVELS];
+	int err;
+
+	if (WARN_ON_ONCE(!PageLocked(data_page) || PageUptodate(data_page)))
+		return false;
+
+	pr_debug_ratelimited("Verifying data page %lu...\n", index);
+
+	/*
+	 * Starting at the leaf level, ascend the tree saving hash pages along
+	 * the way until we find a verified hash page, indicated by PageChecked;
+	 * or until we reach the root.
+	 */
+	for (level = 0; level < params->num_levels; level++) {
+		pgoff_t hindex;
+		unsigned int hoffset;
+		struct page *hpage;
+
+		hash_at_level(params, index, level, &hindex, &hoffset);
+
+		pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n",
+				     level, hindex, hoffset);
+
+		hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
+								  hindex);
+		if (IS_ERR(hpage)) {
+			err = PTR_ERR(hpage);
+			fsverity_err(inode,
+				     "Error %d reading Merkle tree page %lu",
+				     err, hindex);
+			goto out;
+		}
+
+		if (PageChecked(hpage)) {
+			extract_hash(hpage, hoffset, hsize, _want_hash);
+			want_hash = _want_hash;
+			put_page(hpage);
+			pr_debug_ratelimited("Hash page already checked, want %s:%*phN\n",
+					     params->hash_alg->name,
+					     hsize, want_hash);
+			goto descend;
+		}
+		pr_debug_ratelimited("Hash page not yet checked\n");
+		hpages[level] = hpage;
+		hoffsets[level] = hoffset;
+	}
+
+	want_hash = vi->root_hash;
+	pr_debug("Want root hash: %s:%*phN\n",
+		 params->hash_alg->name, hsize, want_hash);
+descend:
+	/* Descend the tree verifying hash pages */
+	for (; level > 0; level--) {
+		struct page *hpage = hpages[level - 1];
+		unsigned int hoffset = hoffsets[level - 1];
+
+		err = fsverity_hash_page(params, inode, req, hpage, real_hash);
+		if (err)
+			goto out;
+		err = cmp_hashes(vi, want_hash, real_hash, index, level - 1);
+		if (err)
+			goto out;
+		SetPageChecked(hpage);
+		extract_hash(hpage, hoffset, hsize, _want_hash);
+		want_hash = _want_hash;
+		put_page(hpage);
+		pr_debug("Verified hash page at level %d, now want %s:%*phN\n",
+			 level - 1, params->hash_alg->name, hsize, want_hash);
+	}
+
+	/* Finally, verify the data page */
+	err = fsverity_hash_page(params, inode, req, data_page, real_hash);
+	if (err)
+		goto out;
+	err = cmp_hashes(vi, want_hash, real_hash, index, -1);
+out:
+	for (; level > 0; level--)
+		put_page(hpages[level - 1]);
+
+	return err == 0;
+}
+
+/**
+ * fsverity_verify_page() - verify a data page
+ *
+ * Verify a page that has just been read from a verity file.  The page must be a
+ * pagecache page that is still locked and not yet uptodate.
+ *
+ * Return: true if the page is valid, else false.
+ */
+bool fsverity_verify_page(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	const struct fsverity_info *vi = inode->i_verity_info;
+	struct ahash_request *req;
+	bool valid;
+
+	req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
+	if (unlikely(!req))
+		return false;
+
+	valid = verify_page(inode, vi, req, page);
+
+	ahash_request_free(req);
+
+	return valid;
+}
+EXPORT_SYMBOL_GPL(fsverity_verify_page);
+
+#ifdef CONFIG_BLOCK
+/**
+ * fsverity_verify_bio() - verify a 'read' bio that has just completed
+ *
+ * Verify a set of pages that have just been read from a verity file.  The pages
+ * must be pagecache pages that are still locked and not yet uptodate.  Pages
+ * that fail verification are set to the Error state.  Verification is skipped
+ * for pages already in the Error state, e.g. due to fscrypt decryption failure.
+ *
+ * This is a helper function for use by the ->readpages() method of filesystems
+ * that issue bios to read data directly into the page cache.  Filesystems that
+ * populate the page cache without issuing bios (e.g. non block-based
+ * filesystems) must instead call fsverity_verify_page() directly on each page.
+ * All filesystems must also call fsverity_verify_page() on holes.
+ */
+void fsverity_verify_bio(struct bio *bio)
+{
+	struct inode *inode = bio_first_page_all(bio)->mapping->host;
+	const struct fsverity_info *vi = inode->i_verity_info;
+	struct ahash_request *req;
+	struct bio_vec *bv;
+	struct bvec_iter_all iter_all;
+
+	req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS);
+	if (unlikely(!req)) {
+		bio_for_each_segment_all(bv, bio, iter_all)
+			SetPageError(bv->bv_page);
+		return;
+	}
+
+	bio_for_each_segment_all(bv, bio, iter_all) {
+		struct page *page = bv->bv_page;
+
+		if (!PageError(page) && !verify_page(inode, vi, req, page))
+			SetPageError(page);
+	}
+
+	ahash_request_free(req);
+}
+EXPORT_SYMBOL_GPL(fsverity_verify_bio);
+#endif /* CONFIG_BLOCK */
+
+/**
+ * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
+ *
+ * Enqueue verification work for asynchronous processing.
+ */
+void fsverity_enqueue_verify_work(struct work_struct *work)
+{
+	queue_work(fsverity_read_workqueue, work);
+}
+EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
+
+int __init fsverity_init_workqueue(void)
+{
+	/*
+	 * Use an unbound workqueue to allow bios to be verified in parallel
+	 * even when they happen to complete on the same CPU.  This sacrifices
+	 * locality, but it's worthwhile since hashing is CPU-intensive.
+	 *
+	 * Also use a high-priority workqueue to prioritize verification work,
+	 * which blocks reads from completing, over regular application tasks.
+	 */
+	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
+						  WQ_UNBOUND | WQ_HIGHPRI,
+						  num_online_cpus());
+	if (!fsverity_read_workqueue)
+		return -ENOMEM;
+	return 0;
+}
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index cbd0f84e1620..95c257cd7ff0 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -33,6 +33,23 @@ struct fsverity_operations {
 	 */
 	int (*get_verity_descriptor)(struct inode *inode, void *buf,
 				     size_t bufsize);
+
+	/**
+	 * Read a Merkle tree page of the given inode.
+	 *
+	 * @inode: the inode
+	 * @index: 0-based index of the page within the Merkle tree
+	 *
+	 * This can be called at any time on an open verity file, as well as
+	 * between ->begin_enable_verity() and ->end_enable_verity().  It may be
+	 * called by multiple processes concurrently, even with the same page.
+	 *
+	 * Note that this must retrieve a *page*, not necessarily a *block*.
+	 *
+	 * Return: the page on success, ERR_PTR() on failure
+	 */
+	struct page *(*read_merkle_tree_page)(struct inode *inode,
+					      pgoff_t index);
 };
 
 #ifdef CONFIG_FS_VERITY
@@ -49,6 +66,12 @@ extern int fsverity_file_open(struct inode *inode, struct file *filp);
 extern int fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr);
 extern void fsverity_cleanup_inode(struct inode *inode);
 
+/* verify.c */
+
+extern bool fsverity_verify_page(struct page *page);
+extern void fsverity_verify_bio(struct bio *bio);
+extern void fsverity_enqueue_verify_work(struct work_struct *work);
+
 #else /* !CONFIG_FS_VERITY */
 
 static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
@@ -73,6 +96,39 @@ static inline void fsverity_cleanup_inode(struct inode *inode)
 {
 }
 
+/* verify.c */
+
+static inline bool fsverity_verify_page(struct page *page)
+{
+	WARN_ON(1);
+	return false;
+}
+
+static inline void fsverity_verify_bio(struct bio *bio)
+{
+	WARN_ON(1);
+}
+
+static inline void fsverity_enqueue_verify_work(struct work_struct *work)
+{
+	WARN_ON(1);
+}
+
 #endif	/* !CONFIG_FS_VERITY */
 
+/**
+ * fsverity_active() - do reads from the inode need to go through fs-verity?
+ *
+ * This checks whether ->i_verity_info has been set.
+ *
+ * Filesystems call this from ->readpages() to check whether the pages need to
+ * be verified or not.  Don't use IS_VERITY() for this purpose; it's subject to
+ * a race condition where the file is being read concurrently with
+ * FS_IOC_ENABLE_VERITY completing.  (S_VERITY is set before ->i_verity_info.)
+ */
+static inline bool fsverity_active(const struct inode *inode)
+{
+	return fsverity_get_info(inode) != NULL;
+}
+
 #endif	/* _LINUX_FSVERITY_H */
-- 
cgit v1.2.3


From 91826ba13855f73e252fef68369b3b0e1ed25253 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 29 Jul 2019 00:51:38 +0900
Subject: netfilter: add include guard to xt_connlabel.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_connlabel.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/xt_connlabel.h b/include/uapi/linux/netfilter/xt_connlabel.h
index 2312f0ec07b2..323f0dfc2a4e 100644
--- a/include/uapi/linux/netfilter/xt_connlabel.h
+++ b/include/uapi/linux/netfilter/xt_connlabel.h
@@ -1,4 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+
+#ifndef _UAPI_XT_CONNLABEL_H
+#define _UAPI_XT_CONNLABEL_H
+
 #include <linux/types.h>
 
 #define XT_CONNLABEL_MAXBIT 127
@@ -11,3 +15,5 @@ struct xt_connlabel_mtinfo {
 	__u16 bit;
 	__u16 options;
 };
+
+#endif /* _UAPI_XT_CONNLABEL_H */
-- 
cgit v1.2.3


From 90d4962cfc87a6994a19db0d76e1fa214dd67267 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 29 Jul 2019 12:23:41 +0200
Subject: mac80211: fix ieee80211_he_oper_size() comment

Johannes mentioned that the comment should not reference mac80211 as other
subsystems might call the helper.

Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20190729102342.8659-1-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a656f31262f1..9c81895c63c1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2053,8 +2053,8 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
  * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
  * @he_oper_ie: byte data of the He Operations IE, stating from the the byte
  *	after the ext ID byte. It is assumed that he_oper_ie has at least
- *	sizeof(struct ieee80211_he_operation) bytes, checked already in
- *	ieee802_11_parse_elems_crc()
+ *	sizeof(struct ieee80211_he_operation) bytes, the caller must have
+ *	validated this.
  * @return the actual size of the IE data (not including header), or 0 on error
  */
 static inline u8
-- 
cgit v1.2.3


From 697f6c507c74991057eb6df3cfb46579ca136467 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 29 Jul 2019 12:23:42 +0200
Subject: mac80211: propagate HE operation info into bss_conf

Upon a successful assoc a station shall store the content of the HE
operation element inside bss_conf so that the driver can setup the
hardware accordingly.

Signed-off-by: Shashidhar Lakkavalli <slakkavalli@datto.com>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20190729102342.8659-2-john@phrozen.org
[use struct copy]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  2 ++
 net/mac80211/he.c          | 15 +++++++++++++++
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mlme.c        |  1 +
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9effd286c1ae..bd91388797fc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -600,6 +600,7 @@ struct ieee80211_ftm_responder_params {
  *	nontransmitted BSSIDs
  * @profile_periodicity: the least number of beacon frames need to be received
  *	in order to discover all the nontransmitted BSSIDs in the set.
+ * @he_operation: HE operation information of the AP we are connected to
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -661,6 +662,7 @@ struct ieee80211_bss_conf {
 	u8 bssid_indicator;
 	bool ema_ap;
 	u8 profile_periodicity;
+	struct ieee80211_he_operation he_operation;
 };
 
 /**
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 219650591c79..f910f730ad0d 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -50,3 +50,18 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 
 	he_cap->has_he = true;
 }
+
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+			const struct ieee80211_he_operation *he_op_ie_elem)
+{
+	struct ieee80211_he_operation *he_operation =
+					&vif->bss_conf.he_operation;
+
+	if (!he_op_ie_elem) {
+		memset(he_operation, 0, sizeof(*he_operation));
+		return;
+	}
+
+	vif->bss_conf.he_operation = *he_op_ie_elem;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a5818ff0e2a4..ba1bc245678e 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1873,6 +1873,10 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 				  const u8 *he_cap_ie, u8 he_cap_len,
 				  struct sta_info *sta);
 
+void
+ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
+			const struct ieee80211_he_operation *he_op_ie_elem);
+
 /* Spectrum management */
 void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_mgmt *mgmt,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a99ad0325309..2b8a7428973d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3381,6 +3381,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		if (elems.uora_element)
 			bss_conf->uora_ocw_range = elems.uora_element[0];
 
+		ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation);
 		/* TODO: OPEN: what happens if BSS color disable is set? */
 	}
 
-- 
cgit v1.2.3


From 2ab45876756fb6c132ae801b0939e0474f84c426 Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Mon, 29 Jul 2019 12:45:12 +0200
Subject: mac80211: add support for the ADDBA extension element

HE allows peers to negotiate the aggregation fragmentation level to be used
during transmission. The level can be 1-3. The Ext element is added behind
the ADDBA request inside the action frame. The responder will then reply
with the same level or a lower one if the requested one is not supported.
This patch only handles the negotiation part as the ADDBA frames get passed
to the ATH11k firmware, which does the rest of the magic for us aswell as
generating the requests.

Signed-off-by: Shashidhar Lakkavalli <slakkavalli@datto.com>
Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190729104512.27615-1-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  2 ++
 net/mac80211/agg-rx.c      | 70 ++++++++++++++++++++++++++++++++++++++++------
 net/mac80211/ht.c          |  2 +-
 net/mac80211/ieee80211_i.h |  3 +-
 4 files changed, 66 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9c81895c63c1..7d3f2ced92d1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -981,6 +981,8 @@ struct ieee80211_mgmt {
 					__le16 capab;
 					__le16 timeout;
 					__le16 start_seq_num;
+					/* followed by BA Extension */
+					u8 variable[0];
 				} __packed addba_req;
 				struct{
 					u8 action_code;
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 01b0dad24500..0e1bb43973b8 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -178,17 +178,52 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t)
 	rcu_read_unlock();
 }
 
-static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
+static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata,
+				   struct sk_buff *skb,
+				   const struct ieee80211_addba_ext_ie *req)
+{
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_addba_ext_ie *resp;
+	const struct ieee80211_sta_he_cap *he_cap;
+	u8 frag_level, cap_frag_level;
+	u8 *pos;
+
+	sband = ieee80211_get_sband(sdata);
+	he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type);
+	if (!he_cap)
+		return;
+
+	pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie));
+	*pos++ = WLAN_EID_ADDBA_EXT;
+	*pos++ = sizeof(struct ieee80211_addba_ext_ie);
+	resp = (struct ieee80211_addba_ext_ie *)pos;
+	resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG;
+
+	frag_level = u32_get_bits(req->data,
+				  IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+	cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0],
+				      IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK);
+	if (frag_level > cap_frag_level)
+		frag_level = cap_frag_level;
+	resp->data |= u8_encode_bits(frag_level,
+				     IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK);
+}
+
+static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid,
 				      u8 dialog_token, u16 status, u16 policy,
-				      u16 buf_size, u16 timeout)
+				      u16 buf_size, u16 timeout,
+				      const struct ieee80211_addba_ext_ie *addbaext)
 {
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
 	bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
 	u16 capab;
 
-	skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
+	skb = dev_alloc_skb(sizeof(*mgmt) +
+		    2 + sizeof(struct ieee80211_addba_ext_ie) +
+		    local->hw.extra_tx_headroom);
 	if (!skb)
 		return;
 
@@ -222,13 +257,17 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
 	mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
 	mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
 
+	if (sta->sta.he_cap.has_he && addbaext)
+		ieee80211_add_addbaext(sdata, skb, addbaext);
+
 	ieee80211_tx_skb(sdata, skb);
 }
 
 void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 				      u8 dialog_token, u16 timeout,
 				      u16 start_seq_num, u16 ba_policy, u16 tid,
-				      u16 buf_size, bool tx, bool auto_seq)
+				      u16 buf_size, bool tx, bool auto_seq,
+				      const struct ieee80211_addba_ext_ie *addbaext)
 {
 	struct ieee80211_local *local = sta->sdata->local;
 	struct tid_ampdu_rx *tid_agg_rx;
@@ -410,21 +449,22 @@ end:
 	}
 
 	if (tx)
-		ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
+		ieee80211_send_addba_resp(sta, sta->sta.addr, tid,
 					  dialog_token, status, 1, buf_size,
-					  timeout);
+					  timeout, addbaext);
 }
 
 static void __ieee80211_start_rx_ba_session(struct sta_info *sta,
 					    u8 dialog_token, u16 timeout,
 					    u16 start_seq_num, u16 ba_policy,
 					    u16 tid, u16 buf_size, bool tx,
-					    bool auto_seq)
+					    bool auto_seq,
+					    const struct ieee80211_addba_ext_ie *addbaext)
 {
 	mutex_lock(&sta->ampdu_mlme.mtx);
 	___ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
 					 start_seq_num, ba_policy, tid,
-					 buf_size, tx, auto_seq);
+					 buf_size, tx, auto_seq, addbaext);
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 }
 
@@ -434,7 +474,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 				     size_t len)
 {
 	u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num;
+	struct ieee802_11_elems elems = { 0 };
 	u8 dialog_token;
+	int ies_len;
 
 	/* extract session parameters from addba request frame */
 	dialog_token = mgmt->u.action.u.addba_req.dialog_token;
@@ -447,9 +489,19 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
 	buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
 
+	ies_len = len - offsetof(struct ieee80211_mgmt,
+				 u.action.u.addba_req.variable);
+	if (ies_len) {
+		ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable,
+                                ies_len, true, &elems, mgmt->bssid, NULL);
+		if (elems.parse_error)
+			return;
+	}
+
 	__ieee80211_start_rx_ba_session(sta, dialog_token, timeout,
 					start_seq_num, ba_policy, tid,
-					buf_size, true, false);
+					buf_size, true, false,
+					elems.addba_ext_ie);
 }
 
 void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d5a500b2a448..a2e4d6b8fd98 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -359,7 +359,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
 				       sta->ampdu_mlme.tid_rx_manage_offl))
 			___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid,
 							 IEEE80211_MAX_AMPDU_BUF_HT,
-							 false, true);
+							 false, true, NULL);
 
 		if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS,
 				       sta->ampdu_mlme.tid_rx_manage_offl))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ba1bc245678e..38769f5c3da4 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1807,7 +1807,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 				      u8 dialog_token, u16 timeout,
 				      u16 start_seq_num, u16 ba_policy, u16 tid,
-				      u16 buf_size, bool tx, bool auto_seq);
+				      u16 buf_size, bool tx, bool auto_seq,
+				      const struct ieee80211_addba_ext_ie *addbaext);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
 					 enum ieee80211_agg_stop_reason reason);
 void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3


From 3b0b278312ba7d6c1eb8b2fb48d459fb7f341a20 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 29 Jul 2019 16:35:03 +0300
Subject: NFC: nxp-nci: Get rid of platform data

Legacy platform data must go away. We are on the safe side here since
there are no users of it in the kernel.

If anyone by any odd reason needs it the GPIO lookup tables and
built-in device properties at your service.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                           |  1 -
 drivers/nfc/nxp-nci/core.c            |  1 -
 drivers/nfc/nxp-nci/i2c.c             |  9 +--------
 drivers/nfc/nxp-nci/nxp-nci.h         |  1 -
 include/linux/platform_data/nxp-nci.h | 19 -------------------
 5 files changed, 1 insertion(+), 30 deletions(-)
 delete mode 100644 include/linux/platform_data/nxp-nci.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9cc156c58f0c..ee663e0e2f2e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11327,7 +11327,6 @@ F:	include/net/nfc/
 F:	include/uapi/linux/nfc.h
 F:	drivers/nfc/
 F:	include/linux/platform_data/nfcmrvl.h
-F:	include/linux/platform_data/nxp-nci.h
 F:	Documentation/devicetree/bindings/net/nfc/
 
 NFS, SUNRPC, AND LOCKD CLIENTS
diff --git a/drivers/nfc/nxp-nci/core.c b/drivers/nfc/nxp-nci/core.c
index 8dafc696719f..aed18ca60170 100644
--- a/drivers/nfc/nxp-nci/core.c
+++ b/drivers/nfc/nxp-nci/core.c
@@ -14,7 +14,6 @@
 #include <linux/gpio.h>
 #include <linux/module.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/nxp-nci.h>
 
 #include <net/nfc/nci_core.h>
 
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 5db71869f04b..47b3b7e612e6 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -23,7 +23,6 @@
 #include <linux/gpio/consumer.h>
 #include <linux/of_gpio.h>
 #include <linux/of_irq.h>
-#include <linux/platform_data/nxp-nci.h>
 #include <asm/unaligned.h>
 
 #include <net/nfc/nfc.h>
@@ -304,7 +303,6 @@ static int nxp_nci_i2c_probe(struct i2c_client *client,
 			    const struct i2c_device_id *id)
 {
 	struct nxp_nci_i2c_phy *phy;
-	struct nxp_nci_nfc_platform_data *pdata;
 	int r;
 
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
@@ -323,17 +321,12 @@ static int nxp_nci_i2c_probe(struct i2c_client *client,
 	phy->i2c_dev = client;
 	i2c_set_clientdata(client, phy);
 
-	pdata = client->dev.platform_data;
-
-	if (!pdata && client->dev.of_node) {
+	if (client->dev.of_node) {
 		r = nxp_nci_i2c_parse_devtree(client);
 		if (r < 0) {
 			nfc_err(&client->dev, "Failed to get DT data\n");
 			goto probe_exit;
 		}
-	} else if (pdata) {
-		phy->gpio_en = pdata->gpio_en;
-		phy->gpio_fw = pdata->gpio_fw;
 	} else if (ACPI_HANDLE(&client->dev)) {
 		r = nxp_nci_i2c_acpi_config(phy);
 		if (r < 0)
diff --git a/drivers/nfc/nxp-nci/nxp-nci.h b/drivers/nfc/nxp-nci/nxp-nci.h
index 6fe7c45544bf..ae3fb2735a4e 100644
--- a/drivers/nfc/nxp-nci/nxp-nci.h
+++ b/drivers/nfc/nxp-nci/nxp-nci.h
@@ -14,7 +14,6 @@
 #include <linux/completion.h>
 #include <linux/firmware.h>
 #include <linux/nfc.h>
-#include <linux/platform_data/nxp-nci.h>
 
 #include <net/nfc/nci_core.h>
 
diff --git a/include/linux/platform_data/nxp-nci.h b/include/linux/platform_data/nxp-nci.h
deleted file mode 100644
index 97827ad468e2..000000000000
--- a/include/linux/platform_data/nxp-nci.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Generic platform data for the NXP NCI NFC chips.
- *
- * Copyright (C) 2014  NXP Semiconductors  All rights reserved.
- *
- * Authors: Clément Perrochaud <clement.perrochaud@nxp.com>
- */
-
-#ifndef _NXP_NCI_H_
-#define _NXP_NCI_H_
-
-struct nxp_nci_nfc_platform_data {
-	unsigned int gpio_en;
-	unsigned int gpio_fw;
-	unsigned int irq;
-};
-
-#endif /* _NXP_NCI_H_ */
-- 
cgit v1.2.3


From 56f8af5e9d38f120cba2c2adb0786fa2dbc901a4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:06 +0100
Subject: iommu: Pass struct iommu_iotlb_gather to ->unmap() and ->iotlb_sync()

To allow IOMMU drivers to batch up TLB flushing operations and postpone
them until ->iotlb_sync() is called, extend the prototypes for the
->unmap() and ->iotlb_sync() IOMMU ops callbacks to take a pointer to
the current iommu_iotlb_gather structure.

All affected IOMMU drivers are updated, but there should be no
functional change since the extra parameter is ignored for now.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/amd_iommu.c      | 11 +++++++++--
 drivers/iommu/arm-smmu-v3.c    |  7 ++++---
 drivers/iommu/arm-smmu.c       |  5 +++--
 drivers/iommu/exynos-iommu.c   |  3 ++-
 drivers/iommu/intel-iommu.c    |  3 ++-
 drivers/iommu/iommu.c          |  2 +-
 drivers/iommu/ipmmu-vmsa.c     | 12 +++++++++---
 drivers/iommu/msm_iommu.c      |  2 +-
 drivers/iommu/mtk_iommu.c      | 13 ++++++++++---
 drivers/iommu/mtk_iommu_v1.c   |  3 ++-
 drivers/iommu/omap-iommu.c     |  2 +-
 drivers/iommu/qcom_iommu.c     | 12 +++++++++---
 drivers/iommu/rockchip-iommu.c |  2 +-
 drivers/iommu/s390-iommu.c     |  3 ++-
 drivers/iommu/tegra-gart.c     | 12 +++++++++---
 drivers/iommu/tegra-smmu.c     |  2 +-
 drivers/iommu/virtio-iommu.c   |  5 +++--
 include/linux/iommu.h          |  7 ++++---
 18 files changed, 73 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f93b148cf55e..29eeea914660 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3055,7 +3055,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 }
 
 static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
-			   size_t page_size)
+			      size_t page_size,
+			      struct iommu_iotlb_gather *gather)
 {
 	struct protection_domain *domain = to_pdomain(dom);
 	size_t unmap_size;
@@ -3196,6 +3197,12 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
 	domain_flush_complete(dom);
 }
 
+static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
+				 struct iommu_iotlb_gather *gather)
+{
+	amd_iommu_flush_iotlb_all(domain);
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.domain_alloc = amd_iommu_domain_alloc,
@@ -3214,7 +3221,7 @@ const struct iommu_ops amd_iommu_ops = {
 	.is_attach_deferred = amd_iommu_is_attach_deferred,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 	.flush_iotlb_all = amd_iommu_flush_iotlb_all,
-	.iotlb_sync = amd_iommu_flush_iotlb_all,
+	.iotlb_sync = amd_iommu_iotlb_sync,
 };
 
 /*****************************************************************************
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 7e137e1e28f1..80753b8ca054 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1985,8 +1985,8 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 	return ops->map(ops, iova, paddr, size, prot);
 }
 
-static size_t
-arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
+static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+			     size_t size, struct iommu_iotlb_gather *gather)
 {
 	int ret;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2010,7 +2010,8 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 		arm_smmu_tlb_inv_context(smmu_domain);
 }
 
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+				struct iommu_iotlb_gather *gather)
 {
 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index dc08db347ef3..e535ae2a9e65 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1301,7 +1301,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-			     size_t size)
+			     size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
@@ -1329,7 +1329,8 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 	}
 }
 
-static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
+				struct iommu_iotlb_gather *gather)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index b0c1e5f9daae..cf5af34cb681 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1130,7 +1130,8 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
 }
 
 static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
-				 unsigned long l_iova, size_t size)
+				 unsigned long l_iova, size_t size,
+				 struct iommu_iotlb_gather *gather)
 {
 	struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
 	sysmmu_iova_t iova = (sysmmu_iova_t)l_iova;
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ac4172c02244..b9fb8d6ddc6e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5147,7 +5147,8 @@ static int intel_iommu_map(struct iommu_domain *domain,
 }
 
 static size_t intel_iommu_unmap(struct iommu_domain *domain,
-				unsigned long iova, size_t size)
+				unsigned long iova, size_t size,
+				struct iommu_iotlb_gather *gather)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct page *freelist = NULL;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index d67222fdfe44..70bfbcc09248 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1899,7 +1899,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
 	while (unmapped < size) {
 		size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
 
-		unmapped_page = ops->unmap(domain, iova, pgsize);
+		unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather);
 		if (!unmapped_page)
 			break;
 
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 2c14a2c65b22..a9332b893ce2 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -733,14 +733,14 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
 }
 
 static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
-			  size_t size)
+			  size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
 	return domain->iop->unmap(domain->iop, iova, size);
 }
 
-static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
+static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
@@ -748,6 +748,12 @@ static void ipmmu_iotlb_sync(struct iommu_domain *io_domain)
 		ipmmu_tlb_flush_all(domain);
 }
 
+static void ipmmu_iotlb_sync(struct iommu_domain *io_domain,
+			     struct iommu_iotlb_gather *gather)
+{
+	ipmmu_flush_iotlb_all(io_domain);
+}
+
 static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
 				      dma_addr_t iova)
 {
@@ -957,7 +963,7 @@ static const struct iommu_ops ipmmu_ops = {
 	.detach_dev = ipmmu_detach_device,
 	.map = ipmmu_map,
 	.unmap = ipmmu_unmap,
-	.flush_iotlb_all = ipmmu_iotlb_sync,
+	.flush_iotlb_all = ipmmu_flush_iotlb_all,
 	.iotlb_sync = ipmmu_iotlb_sync,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 8b602384a385..681ab3d3376d 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -509,7 +509,7 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			      size_t len)
+			      size_t len, struct iommu_iotlb_gather *gather)
 {
 	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index fed77658d67e..c870f1674903 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -371,7 +371,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t mtk_iommu_unmap(struct iommu_domain *domain,
-			      unsigned long iova, size_t size)
+			      unsigned long iova, size_t size,
+			      struct iommu_iotlb_gather *gather)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
 	unsigned long flags;
@@ -384,7 +385,13 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
 	return unmapsz;
 }
 
-static void mtk_iommu_iotlb_sync(struct iommu_domain *domain)
+static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
+}
+
+static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
+				 struct iommu_iotlb_gather *gather)
 {
 	mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data());
 }
@@ -490,7 +497,7 @@ static const struct iommu_ops mtk_iommu_ops = {
 	.detach_dev	= mtk_iommu_detach_device,
 	.map		= mtk_iommu_map,
 	.unmap		= mtk_iommu_unmap,
-	.flush_iotlb_all = mtk_iommu_iotlb_sync,
+	.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
 	.iotlb_sync	= mtk_iommu_iotlb_sync,
 	.iova_to_phys	= mtk_iommu_iova_to_phys,
 	.add_device	= mtk_iommu_add_device,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index abeeac488372..7b92ddd5d9fd 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -324,7 +324,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t mtk_iommu_unmap(struct iommu_domain *domain,
-			      unsigned long iova, size_t size)
+			      unsigned long iova, size_t size,
+			      struct iommu_iotlb_gather *gather)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
 	unsigned long flags;
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index dfb961d8c21b..8039bc5ee425 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1149,7 +1149,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
 }
 
 static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
-			       size_t size)
+			       size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
 	struct device *dev = omap_domain->dev;
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index fd9d9f4da735..a7432991fa04 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -417,7 +417,7 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t size)
+			       size_t size, struct iommu_iotlb_gather *gather)
 {
 	size_t ret;
 	unsigned long flags;
@@ -441,7 +441,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return ret;
 }
 
-static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
+static void qcom_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
 	struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain);
 	struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops,
@@ -454,6 +454,12 @@ static void qcom_iommu_iotlb_sync(struct iommu_domain *domain)
 	pm_runtime_put_sync(qcom_domain->iommu->dev);
 }
 
+static void qcom_iommu_iotlb_sync(struct iommu_domain *domain,
+				  struct iommu_iotlb_gather *gather)
+{
+	qcom_iommu_flush_iotlb_all(domain);
+}
+
 static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain,
 					   dma_addr_t iova)
 {
@@ -581,7 +587,7 @@ static const struct iommu_ops qcom_iommu_ops = {
 	.detach_dev	= qcom_iommu_detach_dev,
 	.map		= qcom_iommu_map,
 	.unmap		= qcom_iommu_unmap,
-	.flush_iotlb_all = qcom_iommu_iotlb_sync,
+	.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
 	.iotlb_sync	= qcom_iommu_iotlb_sync,
 	.iova_to_phys	= qcom_iommu_iova_to_phys,
 	.add_device	= qcom_iommu_add_device,
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index dc26d74d79c2..26290f310f90 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -794,7 +794,7 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
 }
 
 static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
-			     size_t size)
+			     size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
 	unsigned long flags;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 22d4db302c1c..3b0b18e23187 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -314,7 +314,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 }
 
 static size_t s390_iommu_unmap(struct iommu_domain *domain,
-			       unsigned long iova, size_t size)
+			       unsigned long iova, size_t size,
+			       struct iommu_iotlb_gather *gather)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	int flags = ZPCI_PTE_INVALID;
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index 6d40bc1b38bf..3924f7c05544 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -207,7 +207,7 @@ static inline int __gart_iommu_unmap(struct gart_device *gart,
 }
 
 static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t bytes)
+			       size_t bytes, struct iommu_iotlb_gather *gather)
 {
 	struct gart_device *gart = gart_handle;
 	int err;
@@ -273,11 +273,17 @@ static int gart_iommu_of_xlate(struct device *dev,
 	return 0;
 }
 
-static void gart_iommu_sync(struct iommu_domain *domain)
+static void gart_iommu_sync_map(struct iommu_domain *domain)
 {
 	FLUSH_GART_REGS(gart_handle);
 }
 
+static void gart_iommu_sync(struct iommu_domain *domain,
+			    struct iommu_iotlb_gather *gather)
+{
+	gart_iommu_sync_map(domain);
+}
+
 static const struct iommu_ops gart_iommu_ops = {
 	.capable	= gart_iommu_capable,
 	.domain_alloc	= gart_iommu_domain_alloc,
@@ -292,7 +298,7 @@ static const struct iommu_ops gart_iommu_ops = {
 	.iova_to_phys	= gart_iommu_iova_to_phys,
 	.pgsize_bitmap	= GART_IOMMU_PGSIZES,
 	.of_xlate	= gart_iommu_of_xlate,
-	.iotlb_sync_map	= gart_iommu_sync,
+	.iotlb_sync_map	= gart_iommu_sync_map,
 	.iotlb_sync	= gart_iommu_sync,
 };
 
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index c4a652b227f8..7293fc3f796d 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -680,7 +680,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t size)
+			       size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct tegra_smmu_as *as = to_smmu_as(domain);
 	dma_addr_t pte_dma;
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 433f4d2ee956..5f9f91a4d7f3 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -742,7 +742,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			   size_t size)
+			   size_t size, struct iommu_iotlb_gather *gather)
 {
 	int ret = 0;
 	size_t unmapped;
@@ -788,7 +788,8 @@ static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain,
 	return paddr;
 }
 
-static void viommu_iotlb_sync(struct iommu_domain *domain)
+static void viommu_iotlb_sync(struct iommu_domain *domain,
+			      struct iommu_iotlb_gather *gather)
 {
 	struct viommu_domain *vdomain = to_viommu_domain(domain);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ad41aee55bc6..64ebaff33455 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -258,10 +258,11 @@ struct iommu_ops {
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
-		     size_t size);
+		     size_t size, struct iommu_iotlb_gather *iotlb_gather);
 	void (*flush_iotlb_all)(struct iommu_domain *domain);
 	void (*iotlb_sync_map)(struct iommu_domain *domain);
-	void (*iotlb_sync)(struct iommu_domain *domain);
+	void (*iotlb_sync)(struct iommu_domain *domain,
+			   struct iommu_iotlb_gather *iotlb_gather);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
@@ -502,7 +503,7 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain,
 				  struct iommu_iotlb_gather *iotlb_gather)
 {
 	if (domain->ops->iotlb_sync)
-		domain->ops->iotlb_sync(domain);
+		domain->ops->iotlb_sync(domain, iotlb_gather);
 
 	iommu_iotlb_gather_init(iotlb_gather);
 }
-- 
cgit v1.2.3


From 3445545b2248300319b6965208e77140c960c3fd Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:16 +0100
Subject: iommu/io-pgtable: Introduce tlb_flush_walk() and tlb_flush_leaf()

In preparation for deferring TLB flushes to iommu_tlb_sync(), introduce
two new synchronous invalidation helpers to the io-pgtable API, which
allow the unmap() code to force invalidation in cases where it cannot be
deferred (e.g. when replacing a table with a block or when TLBI_ON_MAP
is set).

Signed-off-by: Will Deacon <will@kernel.org>
---
 include/linux/io-pgtable.h | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 6292ea15d674..27275575b305 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -19,17 +19,31 @@ enum io_pgtable_fmt {
 /**
  * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management.
  *
- * @tlb_flush_all: Synchronously invalidate the entire TLB context.
- * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range.
- * @tlb_sync:      Ensure any queued TLB invalidation has taken effect, and
- *                 any corresponding page table updates are visible to the
- *                 IOMMU.
+ * @tlb_flush_all:  Synchronously invalidate the entire TLB context.
+ * @tlb_flush_walk: Synchronously invalidate all intermediate TLB state
+ *                  (sometimes referred to as the "walk cache") for a virtual
+ *                  address range.
+ * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual
+ *                  address range.
+ * @tlb_add_flush:  Optional callback to queue up leaf TLB invalidation for a
+ *                  virtual address range.  This function exists purely as an
+ *                  optimisation for IOMMUs that cannot batch TLB invalidation
+ *                  operations efficiently and are therefore better suited to
+ *                  issuing them early rather than deferring them until
+ *                  iommu_tlb_sync().
+ * @tlb_sync:       Ensure any queued TLB invalidation has taken effect, and
+ *                  any corresponding page table updates are visible to the
+ *                  IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
  */
 struct iommu_flush_ops {
 	void (*tlb_flush_all)(void *cookie);
+	void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule,
+			       void *cookie);
+	void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
+			       void *cookie);
 	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
 			      bool leaf, void *cookie);
 	void (*tlb_sync)(void *cookie);
-- 
cgit v1.2.3


From 10b7a7d912697afd681a0bcfced9e05543aded35 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:32 +0100
Subject: iommu/io-pgtable-arm: Call ->tlb_flush_walk() and ->tlb_flush_leaf()

Now that all IOMMU drivers using the io-pgtable API implement the
->tlb_flush_walk() and ->tlb_flush_leaf() callbacks, we can use them in
the io-pgtable code instead of ->tlb_add_flush() immediately followed by
->tlb_sync().

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 25 +++++++++++++++----------
 drivers/iommu/io-pgtable-arm.c     | 17 ++++++++++++-----
 include/linux/io-pgtable.h         | 14 ++++++++++++++
 3 files changed, 41 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 116f97ee991e..8d4914fe73bc 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -493,9 +493,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
 	 * a chance for anything to kick off a table walk for the new iova.
 	 */
 	if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
-		io_pgtable_tlb_add_flush(iop, iova, size,
-					 ARM_V7S_BLOCK_SIZE(2), false);
-		io_pgtable_tlb_sync(iop);
+		io_pgtable_tlb_flush_walk(iop, iova, size,
+					  ARM_V7S_BLOCK_SIZE(2));
 	} else {
 		wmb();
 	}
@@ -541,8 +540,7 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 	__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
 
 	size *= ARM_V7S_CONT_PAGES;
-	io_pgtable_tlb_add_flush(iop, iova, size, size, true);
-	io_pgtable_tlb_sync(iop);
+	io_pgtable_tlb_flush_leaf(iop, iova, size, size);
 	return pte;
 }
 
@@ -637,9 +635,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 		for (i = 0; i < num_entries; i++) {
 			if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
 				/* Also flush any partial walks */
-				io_pgtable_tlb_add_flush(iop, iova, blk_size,
-					ARM_V7S_BLOCK_SIZE(lvl + 1), false);
-				io_pgtable_tlb_sync(iop);
+				io_pgtable_tlb_flush_walk(iop, iova, blk_size,
+						ARM_V7S_BLOCK_SIZE(lvl + 1));
 				ptep = iopte_deref(pte[i], lvl);
 				__arm_v7s_free_table(ptep, lvl + 1, data);
 			} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
@@ -805,13 +802,19 @@ static void dummy_tlb_flush_all(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-				size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+			    void *cookie)
 {
 	WARN_ON(cookie != cfg_cookie);
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
+static void dummy_tlb_add_flush(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
+{
+	dummy_tlb_flush(iova, size, granule, cookie);
+}
+
 static void dummy_tlb_sync(void *cookie)
 {
 	WARN_ON(cookie != cfg_cookie);
@@ -819,6 +822,8 @@ static void dummy_tlb_sync(void *cookie)
 
 static const struct iommu_flush_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
+	.tlb_flush_walk	= dummy_tlb_flush,
+	.tlb_flush_leaf	= dummy_tlb_flush,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
 };
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 402f913b6f6d..b58338c86323 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -611,9 +611,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 
 		if (!iopte_leaf(pte, lvl, iop->fmt)) {
 			/* Also flush any partial walks */
-			io_pgtable_tlb_add_flush(iop, iova, size,
-						ARM_LPAE_GRANULE(data), false);
-			io_pgtable_tlb_sync(iop);
+			io_pgtable_tlb_flush_walk(iop, iova, size,
+						  ARM_LPAE_GRANULE(data));
 			ptep = iopte_deref(pte, data);
 			__arm_lpae_free_pgtable(data, lvl + 1, ptep);
 		} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
@@ -1069,13 +1068,19 @@ static void dummy_tlb_flush_all(void *cookie)
 	WARN_ON(cookie != cfg_cookie);
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-				size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
+			    void *cookie)
 {
 	WARN_ON(cookie != cfg_cookie);
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
+static void dummy_tlb_add_flush(unsigned long iova, size_t size,
+				size_t granule, bool leaf, void *cookie)
+{
+	dummy_tlb_flush(iova, size, granule, cookie);
+}
+
 static void dummy_tlb_sync(void *cookie)
 {
 	WARN_ON(cookie != cfg_cookie);
@@ -1083,6 +1088,8 @@ static void dummy_tlb_sync(void *cookie)
 
 static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
+	.tlb_flush_walk	= dummy_tlb_flush,
+	.tlb_flush_leaf	= dummy_tlb_flush,
 	.tlb_add_flush	= dummy_tlb_add_flush,
 	.tlb_sync	= dummy_tlb_sync,
 };
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 27275575b305..0618aac59e74 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -198,6 +198,20 @@ static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
 	iop->cfg.tlb->tlb_flush_all(iop->cookie);
 }
 
+static inline void
+io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova,
+			  size_t size, size_t granule)
+{
+	iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie);
+}
+
+static inline void
+io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,
+			  size_t size, size_t granule)
+{
+	iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);
+}
+
 static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
 		unsigned long iova, size_t size, size_t granule, bool leaf)
 {
-- 
cgit v1.2.3


From abfd6fe0cd535d31ee83b668be6eb59ce6a8469d Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:41 +0100
Subject: iommu/io-pgtable: Replace ->tlb_add_flush() with ->tlb_add_page()

The ->tlb_add_flush() callback in the io-pgtable API now looks a bit
silly:

  - It takes a size and a granule, which are always the same
  - It takes a 'bool leaf', which is always true
  - It only ever flushes a single page

With that in mind, replace it with an optional ->tlb_add_page() callback
that drops the useless parameters.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c |  5 --
 drivers/iommu/arm-smmu-v3.c             |  8 ++-
 drivers/iommu/arm-smmu.c                | 88 +++++++++++++++++++++------------
 drivers/iommu/io-pgtable-arm-v7s.c      | 12 ++---
 drivers/iommu/io-pgtable-arm.c          | 11 ++---
 drivers/iommu/ipmmu-vmsa.c              |  7 ---
 drivers/iommu/msm_iommu.c               |  7 ++-
 drivers/iommu/mtk_iommu.c               |  8 ++-
 drivers/iommu/qcom_iommu.c              |  8 ++-
 include/linux/io-pgtable.h              | 22 ++++-----
 10 files changed, 105 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 651858147bd6..ff9af320cacc 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -247,10 +247,6 @@ static void mmu_tlb_inv_context_s1(void *cookie)
 	mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
 }
 
-static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
-				     size_t granule, bool leaf, void *cookie)
-{}
-
 static void mmu_tlb_sync_context(void *cookie)
 {
 	//struct panfrost_device *pfdev = cookie;
@@ -273,7 +269,6 @@ static const struct iommu_flush_ops mmu_tlb_ops = {
 	.tlb_flush_all	= mmu_tlb_inv_context_s1,
 	.tlb_flush_walk = mmu_tlb_flush_walk,
 	.tlb_flush_leaf = mmu_tlb_flush_leaf,
-	.tlb_add_flush	= mmu_tlb_inv_range_nosync,
 	.tlb_sync	= mmu_tlb_sync_context,
 };
 
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 79819b003b07..98c90a1b4b22 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1603,6 +1603,12 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	} while (size -= granule);
 }
 
+static void arm_smmu_tlb_inv_page_nosync(unsigned long iova, size_t granule,
+					 void *cookie)
+{
+	arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
+}
+
 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
 				  size_t granule, void *cookie)
 {
@@ -1627,7 +1633,7 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = {
 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
-	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
+	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
 	.tlb_sync	= arm_smmu_tlb_sync,
 };
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index e9f01b860ae3..f056164a94b0 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -248,10 +248,16 @@ enum arm_smmu_domain_stage {
 	ARM_SMMU_DOMAIN_BYPASS,
 };
 
+struct arm_smmu_flush_ops {
+	struct iommu_flush_ops		tlb;
+	void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
+			      bool leaf, void *cookie)
+};
+
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct io_pgtable_ops		*pgtbl_ops;
-	const struct iommu_flush_ops	*tlb_ops;
+	const struct arm_smmu_flush_ops	*flush_ops;
 	struct arm_smmu_cfg		cfg;
 	enum arm_smmu_domain_stage	stage;
 	bool				non_strict;
@@ -551,42 +557,62 @@ static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
 				  size_t granule, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
+	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
 
-	smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, false, cookie);
-	smmu_domain->tlb_ops->tlb_sync(cookie);
+	ops->tlb_inv_range(iova, size, granule, false, cookie);
+	ops->tlb.tlb_sync(cookie);
 }
 
 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
 				  size_t granule, void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
+	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
+
+	ops->tlb_inv_range(iova, size, granule, true, cookie);
+	ops->tlb.tlb_sync(cookie);
+}
+
+static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule,
+				  void *cookie)
+{
+	struct arm_smmu_domain *smmu_domain = cookie;
+	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
 
-	smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, true, cookie);
-	smmu_domain->tlb_ops->tlb_sync(cookie);
+	ops->tlb_inv_range(iova, granule, granule, true, cookie);
 }
 
-static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
-	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
-	.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
-	.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
-	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
-	.tlb_sync	= arm_smmu_tlb_sync_context,
+static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
+	.tlb = {
+		.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
+		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
+		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
+		.tlb_add_page	= arm_smmu_tlb_add_page,
+		.tlb_sync	= arm_smmu_tlb_sync_context,
+	},
+	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
 };
 
-static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
-	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
-	.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
-	.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
-	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
-	.tlb_sync	= arm_smmu_tlb_sync_context,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
+	.tlb = {
+		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
+		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
+		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
+		.tlb_add_page	= arm_smmu_tlb_add_page,
+		.tlb_sync	= arm_smmu_tlb_sync_context,
+	},
+	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
 };
 
-static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
-	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
-	.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
-	.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
-	.tlb_add_flush	= arm_smmu_tlb_inv_vmid_nosync,
-	.tlb_sync	= arm_smmu_tlb_sync_vmid,
+static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
+	.tlb = {
+		.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
+		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
+		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
+		.tlb_add_page	= arm_smmu_tlb_add_page,
+		.tlb_sync	= arm_smmu_tlb_sync_vmid,
+	},
+	.tlb_inv_range		= arm_smmu_tlb_inv_vmid_nosync,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -866,7 +892,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 			ias = min(ias, 32UL);
 			oas = min(oas, 32UL);
 		}
-		smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 		/*
@@ -886,9 +912,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 			oas = min(oas, 40UL);
 		}
 		if (smmu->version == ARM_SMMU_V2)
-			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
 		else
-			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
 		break;
 	default:
 		ret = -EINVAL;
@@ -917,7 +943,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 		.ias		= ias,
 		.oas		= oas,
 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
-		.tlb		= smmu_domain->tlb_ops,
+		.tlb		= &smmu_domain->flush_ops->tlb,
 		.iommu_dev	= smmu->dev,
 	};
 
@@ -1346,9 +1372,9 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-	if (smmu_domain->tlb_ops) {
+	if (smmu_domain->flush_ops) {
 		arm_smmu_rpm_get(smmu);
-		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+		smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain);
 		arm_smmu_rpm_put(smmu);
 	}
 }
@@ -1359,9 +1385,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 
-	if (smmu_domain->tlb_ops) {
+	if (smmu_domain->flush_ops) {
 		arm_smmu_rpm_get(smmu);
-		smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+		smmu_domain->flush_ops->tlb.tlb_sync(smmu_domain);
 		arm_smmu_rpm_put(smmu);
 	}
 }
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 8d4914fe73bc..b3f975c95f76 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -584,7 +584,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 		return __arm_v7s_unmap(data, iova, size, 2, tablep);
 	}
 
-	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+	io_pgtable_tlb_add_page(&data->iop, iova, size);
 	return size;
 }
 
@@ -647,8 +647,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 				 */
 				smp_wmb();
 			} else {
-				io_pgtable_tlb_add_flush(iop, iova, blk_size,
-							 blk_size, true);
+				io_pgtable_tlb_add_page(iop, iova, blk_size);
 			}
 			iova += blk_size;
 		}
@@ -809,10 +808,9 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-				size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
 {
-	dummy_tlb_flush(iova, size, granule, cookie);
+	dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
 static void dummy_tlb_sync(void *cookie)
@@ -824,7 +822,7 @@ static const struct iommu_flush_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_flush_walk	= dummy_tlb_flush,
 	.tlb_flush_leaf	= dummy_tlb_flush,
-	.tlb_add_flush	= dummy_tlb_add_flush,
+	.tlb_add_page	= dummy_tlb_add_page,
 	.tlb_sync	= dummy_tlb_sync,
 };
 
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index b58338c86323..a5c0db01533e 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -582,7 +582,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 
 		tablep = iopte_deref(pte, data);
 	} else if (unmap_idx >= 0) {
-		io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+		io_pgtable_tlb_add_page(&data->iop, iova, size);
 		return size;
 	}
 
@@ -623,7 +623,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			 */
 			smp_wmb();
 		} else {
-			io_pgtable_tlb_add_flush(iop, iova, size, size, true);
+			io_pgtable_tlb_add_page(iop, iova, size);
 		}
 
 		return size;
@@ -1075,10 +1075,9 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_add_flush(unsigned long iova, size_t size,
-				size_t granule, bool leaf, void *cookie)
+static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
 {
-	dummy_tlb_flush(iova, size, granule, cookie);
+	dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
 static void dummy_tlb_sync(void *cookie)
@@ -1090,7 +1089,7 @@ static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_flush_walk	= dummy_tlb_flush,
 	.tlb_flush_leaf	= dummy_tlb_flush,
-	.tlb_add_flush	= dummy_tlb_add_flush,
+	.tlb_add_page	= dummy_tlb_add_page,
 	.tlb_sync	= dummy_tlb_sync,
 };
 
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 9cc7bcb7e39d..c4da271af90e 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -367,17 +367,10 @@ static void ipmmu_tlb_flush(unsigned long iova, size_t size,
 	ipmmu_tlb_flush_all(cookie);
 }
 
-static void ipmmu_tlb_add_flush(unsigned long iova, size_t size,
-				size_t granule, bool leaf, void *cookie)
-{
-	/* The hardware doesn't support selective TLB flush. */
-}
-
 static const struct iommu_flush_ops ipmmu_flush_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_flush_walk = ipmmu_tlb_flush,
 	.tlb_flush_leaf = ipmmu_tlb_flush,
-	.tlb_add_flush = ipmmu_tlb_add_flush,
 	.tlb_sync = ipmmu_tlb_flush_all,
 };
 
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 64132093751a..2cd83295a841 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -192,11 +192,16 @@ static void __flush_iotlb_leaf(unsigned long iova, size_t size,
 	__flush_iotlb_sync(cookie);
 }
 
+static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie)
+{
+	__flush_iotlb_range(iova, granule, granule, true, cookie);
+}
+
 static const struct iommu_flush_ops msm_iommu_flush_ops = {
 	.tlb_flush_all = __flush_iotlb,
 	.tlb_flush_walk = __flush_iotlb_walk,
 	.tlb_flush_leaf = __flush_iotlb_leaf,
-	.tlb_add_flush = __flush_iotlb_range,
+	.tlb_add_page = __flush_iotlb_page,
 	.tlb_sync = __flush_iotlb_sync,
 };
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 85a7176bf9ae..a0b4b4dc4b90 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -202,11 +202,17 @@ static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
 	mtk_iommu_tlb_sync(cookie);
 }
 
+static void mtk_iommu_tlb_flush_page_nosync(unsigned long iova, size_t granule,
+					    void *cookie)
+{
+	mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
+}
+
 static const struct iommu_flush_ops mtk_iommu_flush_ops = {
 	.tlb_flush_all = mtk_iommu_tlb_flush_all,
 	.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
 	.tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
-	.tlb_add_flush = mtk_iommu_tlb_add_flush_nosync,
+	.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
 	.tlb_sync = mtk_iommu_tlb_sync,
 };
 
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 643079e52e69..7d8411dee4cf 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -178,11 +178,17 @@ static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
 	qcom_iommu_tlb_sync(cookie);
 }
 
+static void qcom_iommu_tlb_add_page(unsigned long iova, size_t granule,
+				    void *cookie)
+{
+	qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
+}
+
 static const struct iommu_flush_ops qcom_flush_ops = {
 	.tlb_flush_all	= qcom_iommu_tlb_inv_context,
 	.tlb_flush_walk = qcom_iommu_tlb_flush_walk,
 	.tlb_flush_leaf = qcom_iommu_tlb_flush_leaf,
-	.tlb_add_flush	= qcom_iommu_tlb_inv_range_nosync,
+	.tlb_add_page	= qcom_iommu_tlb_add_page,
 	.tlb_sync	= qcom_iommu_tlb_sync,
 };
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 0618aac59e74..99e04bd2baa1 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -25,12 +25,11 @@ enum io_pgtable_fmt {
  *                  address range.
  * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual
  *                  address range.
- * @tlb_add_flush:  Optional callback to queue up leaf TLB invalidation for a
- *                  virtual address range.  This function exists purely as an
- *                  optimisation for IOMMUs that cannot batch TLB invalidation
- *                  operations efficiently and are therefore better suited to
- *                  issuing them early rather than deferring them until
- *                  iommu_tlb_sync().
+ * @tlb_add_page:   Optional callback to queue up leaf TLB invalidation for a
+ *                  single page. This function exists purely as an optimisation
+ *                  for IOMMUs that cannot batch TLB invalidation operations
+ *                  efficiently and are therefore better suited to issuing them
+ *                  early rather than deferring them until iommu_tlb_sync().
  * @tlb_sync:       Ensure any queued TLB invalidation has taken effect, and
  *                  any corresponding page table updates are visible to the
  *                  IOMMU.
@@ -44,8 +43,7 @@ struct iommu_flush_ops {
 			       void *cookie);
 	void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
 			       void *cookie);
-	void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
-			      bool leaf, void *cookie);
+	void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie);
 	void (*tlb_sync)(void *cookie);
 };
 
@@ -212,10 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,
 	iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie);
 }
 
-static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
-		unsigned long iova, size_t size, size_t granule, bool leaf)
+static inline void
+io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova,
+			size_t granule)
 {
-	iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
+	if (iop->cfg.tlb->tlb_add_page)
+		iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie);
 }
 
 static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
-- 
cgit v1.2.3


From e953f7f2fa78d1c7fd064171f88457c6b1e21af9 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:50 +0100
Subject: iommu/io-pgtable: Remove unused ->tlb_sync() callback

The ->tlb_sync() callback is no longer used, so it can be removed.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c |  1 -
 drivers/iommu/arm-smmu-v3.c             |  8 --------
 drivers/iommu/arm-smmu.c                | 17 +++++++++--------
 drivers/iommu/io-pgtable-arm-v7s.c      |  6 ------
 drivers/iommu/io-pgtable-arm.c          |  6 ------
 drivers/iommu/ipmmu-vmsa.c              |  1 -
 drivers/iommu/msm_iommu.c               | 20 +++++++-------------
 drivers/iommu/mtk_iommu.c               |  1 -
 drivers/iommu/qcom_iommu.c              |  1 -
 include/linux/io-pgtable.h              |  9 ---------
 10 files changed, 16 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index ff9af320cacc..de22a2276e00 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -269,7 +269,6 @@ static const struct iommu_flush_ops mmu_tlb_ops = {
 	.tlb_flush_all	= mmu_tlb_inv_context_s1,
 	.tlb_flush_walk = mmu_tlb_flush_walk,
 	.tlb_flush_leaf = mmu_tlb_flush_leaf,
-	.tlb_sync	= mmu_tlb_sync_context,
 };
 
 static const char *access_type_name(struct panfrost_device *pfdev,
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 98c90a1b4b22..231093413ff9 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1545,13 +1545,6 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
 }
 
 /* IO_PGTABLE API */
-static void arm_smmu_tlb_sync(void *cookie)
-{
-	struct arm_smmu_domain *smmu_domain = cookie;
-
-	arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
-}
-
 static void arm_smmu_tlb_inv_context(void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
@@ -1634,7 +1627,6 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = {
 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
 	.tlb_flush_leaf = arm_smmu_tlb_inv_leaf,
 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
-	.tlb_sync	= arm_smmu_tlb_sync,
 };
 
 /* IOMMU API */
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f056164a94b0..07a267c437d6 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -251,7 +251,8 @@ enum arm_smmu_domain_stage {
 struct arm_smmu_flush_ops {
 	struct iommu_flush_ops		tlb;
 	void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule,
-			      bool leaf, void *cookie)
+			      bool leaf, void *cookie);
+	void (*tlb_sync)(void *cookie);
 };
 
 struct arm_smmu_domain {
@@ -539,7 +540,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
  * almost negligible, but the benefit of getting the first one in as far ahead
  * of the sync as possible is significant, hence we don't just make this a
- * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think.
  */
 static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
 					 size_t granule, bool leaf, void *cookie)
@@ -560,7 +561,7 @@ static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
 	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
 
 	ops->tlb_inv_range(iova, size, granule, false, cookie);
-	ops->tlb.tlb_sync(cookie);
+	ops->tlb_sync(cookie);
 }
 
 static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
@@ -570,7 +571,7 @@ static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
 	const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops;
 
 	ops->tlb_inv_range(iova, size, granule, true, cookie);
-	ops->tlb.tlb_sync(cookie);
+	ops->tlb_sync(cookie);
 }
 
 static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule,
@@ -588,9 +589,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = {
 		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
 		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
 		.tlb_add_page	= arm_smmu_tlb_add_page,
-		.tlb_sync	= arm_smmu_tlb_sync_context,
 	},
 	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
+	.tlb_sync		= arm_smmu_tlb_sync_context,
 };
 
 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
@@ -599,9 +600,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
 		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
 		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
 		.tlb_add_page	= arm_smmu_tlb_add_page,
-		.tlb_sync	= arm_smmu_tlb_sync_context,
 	},
 	.tlb_inv_range		= arm_smmu_tlb_inv_range_nosync,
+	.tlb_sync		= arm_smmu_tlb_sync_context,
 };
 
 static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
@@ -610,9 +611,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
 		.tlb_flush_walk	= arm_smmu_tlb_inv_walk,
 		.tlb_flush_leaf	= arm_smmu_tlb_inv_leaf,
 		.tlb_add_page	= arm_smmu_tlb_add_page,
-		.tlb_sync	= arm_smmu_tlb_sync_vmid,
 	},
 	.tlb_inv_range		= arm_smmu_tlb_inv_vmid_nosync,
+	.tlb_sync		= arm_smmu_tlb_sync_vmid,
 };
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -1387,7 +1388,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
 
 	if (smmu_domain->flush_ops) {
 		arm_smmu_rpm_get(smmu);
-		smmu_domain->flush_ops->tlb.tlb_sync(smmu_domain);
+		smmu_domain->flush_ops->tlb_sync(smmu_domain);
 		arm_smmu_rpm_put(smmu);
 	}
 }
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index b3f975c95f76..203894fb6765 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -813,17 +813,11 @@ static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
 	dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
-static void dummy_tlb_sync(void *cookie)
-{
-	WARN_ON(cookie != cfg_cookie);
-}
-
 static const struct iommu_flush_ops dummy_tlb_ops = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_flush_walk	= dummy_tlb_flush,
 	.tlb_flush_leaf	= dummy_tlb_flush,
 	.tlb_add_page	= dummy_tlb_add_page,
-	.tlb_sync	= dummy_tlb_sync,
 };
 
 #define __FAIL(ops)	({				\
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index a5c0db01533e..f35516744965 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -1080,17 +1080,11 @@ static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
 	dummy_tlb_flush(iova, granule, granule, cookie);
 }
 
-static void dummy_tlb_sync(void *cookie)
-{
-	WARN_ON(cookie != cfg_cookie);
-}
-
 static const struct iommu_flush_ops dummy_tlb_ops __initconst = {
 	.tlb_flush_all	= dummy_tlb_flush_all,
 	.tlb_flush_walk	= dummy_tlb_flush,
 	.tlb_flush_leaf	= dummy_tlb_flush,
 	.tlb_add_page	= dummy_tlb_add_page,
-	.tlb_sync	= dummy_tlb_sync,
 };
 
 static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index c4da271af90e..a2b8eff4c1f7 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -371,7 +371,6 @@ static const struct iommu_flush_ops ipmmu_flush_ops = {
 	.tlb_flush_all = ipmmu_tlb_flush_all,
 	.tlb_flush_walk = ipmmu_tlb_flush,
 	.tlb_flush_leaf = ipmmu_tlb_flush,
-	.tlb_sync = ipmmu_tlb_flush_all,
 };
 
 /* -----------------------------------------------------------------------------
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 2cd83295a841..ccfc7ed230ef 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -168,28 +168,16 @@ fail:
 	return;
 }
 
-static void __flush_iotlb_sync(void *cookie)
-{
-	/*
-	 * Nothing is needed here, the barrier to guarantee
-	 * completion of the tlb sync operation is implicitly
-	 * taken care when the iommu client does a writel before
-	 * kick starting the other master.
-	 */
-}
-
 static void __flush_iotlb_walk(unsigned long iova, size_t size,
 			       size_t granule, void *cookie)
 {
 	__flush_iotlb_range(iova, size, granule, false, cookie);
-	__flush_iotlb_sync(cookie);
 }
 
 static void __flush_iotlb_leaf(unsigned long iova, size_t size,
 			       size_t granule, void *cookie)
 {
 	__flush_iotlb_range(iova, size, granule, true, cookie);
-	__flush_iotlb_sync(cookie);
 }
 
 static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie)
@@ -202,7 +190,6 @@ static const struct iommu_flush_ops msm_iommu_flush_ops = {
 	.tlb_flush_walk = __flush_iotlb_walk,
 	.tlb_flush_leaf = __flush_iotlb_leaf,
 	.tlb_add_page = __flush_iotlb_page,
-	.tlb_sync = __flush_iotlb_sync,
 };
 
 static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end)
@@ -712,6 +699,13 @@ static struct iommu_ops msm_iommu_ops = {
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
+	/*
+	 * Nothing is needed here, the barrier to guarantee
+	 * completion of the tlb sync operation is implicitly
+	 * taken care when the iommu client does a writel before
+	 * kick starting the other master.
+	 */
+	.iotlb_sync = NULL,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.add_device = msm_iommu_add_device,
 	.remove_device = msm_iommu_remove_device,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index a0b4b4dc4b90..3785750bdb44 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -213,7 +213,6 @@ static const struct iommu_flush_ops mtk_iommu_flush_ops = {
 	.tlb_flush_walk = mtk_iommu_tlb_flush_walk,
 	.tlb_flush_leaf = mtk_iommu_tlb_flush_leaf,
 	.tlb_add_page = mtk_iommu_tlb_flush_page_nosync,
-	.tlb_sync = mtk_iommu_tlb_sync,
 };
 
 static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 7d8411dee4cf..0b8a6d6bb475 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -189,7 +189,6 @@ static const struct iommu_flush_ops qcom_flush_ops = {
 	.tlb_flush_walk = qcom_iommu_tlb_flush_walk,
 	.tlb_flush_leaf = qcom_iommu_tlb_flush_leaf,
 	.tlb_add_page	= qcom_iommu_tlb_add_page,
-	.tlb_sync	= qcom_iommu_tlb_sync,
 };
 
 static irqreturn_t qcom_iommu_fault(int irq, void *dev)
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 99e04bd2baa1..843310484fe2 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -30,9 +30,6 @@ enum io_pgtable_fmt {
  *                  for IOMMUs that cannot batch TLB invalidation operations
  *                  efficiently and are therefore better suited to issuing them
  *                  early rather than deferring them until iommu_tlb_sync().
- * @tlb_sync:       Ensure any queued TLB invalidation has taken effect, and
- *                  any corresponding page table updates are visible to the
- *                  IOMMU.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
@@ -44,7 +41,6 @@ struct iommu_flush_ops {
 	void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
 			       void *cookie);
 	void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie);
-	void (*tlb_sync)(void *cookie);
 };
 
 /**
@@ -218,11 +214,6 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova,
 		iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie);
 }
 
-static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
-{
-	iop->cfg.tlb->tlb_sync(iop->cookie);
-}
-
 /**
  * struct io_pgtable_init_fns - Alloc/free a set of page tables for a
  *                              particular format.
-- 
cgit v1.2.3


From a2d3a382d6c682e22b263c9e7f0d857c3fa6c9d6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:44:58 +0100
Subject: iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->unmap()

Update the io-pgtable ->unmap() function to take an iommu_iotlb_gather
pointer as an argument, and update the callers as appropriate.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
 drivers/iommu/arm-smmu-v3.c             | 2 +-
 drivers/iommu/arm-smmu.c                | 2 +-
 drivers/iommu/io-pgtable-arm-v7s.c      | 6 +++---
 drivers/iommu/io-pgtable-arm.c          | 7 +++----
 drivers/iommu/ipmmu-vmsa.c              | 2 +-
 drivers/iommu/msm_iommu.c               | 2 +-
 drivers/iommu/mtk_iommu.c               | 2 +-
 drivers/iommu/qcom_iommu.c              | 2 +-
 include/linux/io-pgtable.h              | 4 +++-
 10 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index de22a2276e00..6e8145c36e93 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo)
 		size_t unmapped_page;
 		size_t pgsize = get_pgsize(iova, len - unmapped_len);
 
-		unmapped_page = ops->unmap(ops, iova, pgsize);
+		unmapped_page = ops->unmap(ops, iova, pgsize, NULL);
 		if (!unmapped_page)
 			break;
 
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 231093413ff9..8e2e53079f48 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2015,7 +2015,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 	if (!ops)
 		return 0;
 
-	ret = ops->unmap(ops, iova, size);
+	ret = ops->unmap(ops, iova, size, gather);
 	if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size))
 		return 0;
 
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 07a267c437d6..f6689956ab6e 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1362,7 +1362,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 		return 0;
 
 	arm_smmu_rpm_get(smmu);
-	ret = ops->unmap(ops, iova, size);
+	ret = ops->unmap(ops, iova, size, gather);
 	arm_smmu_rpm_put(smmu);
 
 	return ret;
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 203894fb6765..a7776e982b6c 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -666,7 +666,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 }
 
 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			    size_t size)
+			    size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
 
@@ -892,7 +892,7 @@ static int __init arm_v7s_do_selftests(void)
 	size = 1UL << __ffs(cfg.pgsize_bitmap);
 	while (i < loopnr) {
 		iova_start = i * SZ_16M;
-		if (ops->unmap(ops, iova_start + size, size) != size)
+		if (ops->unmap(ops, iova_start + size, size, NULL) != size)
 			return __FAIL(ops);
 
 		/* Remap of partial unmap */
@@ -910,7 +910,7 @@ static int __init arm_v7s_do_selftests(void)
 	for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
 		size = 1UL << i;
 
-		if (ops->unmap(ops, iova, size) != size)
+		if (ops->unmap(ops, iova, size, NULL) != size)
 			return __FAIL(ops);
 
 		if (ops->iova_to_phys(ops, iova + 42))
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f35516744965..325430f8a0a1 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -12,7 +12,6 @@
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/io-pgtable.h>
-#include <linux/iommu.h>
 #include <linux/kernel.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
@@ -642,7 +641,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 }
 
 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			     size_t size)
+			     size_t size, struct iommu_iotlb_gather *gather)
 {
 	struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
 	arm_lpae_iopte *ptep = data->pgd;
@@ -1167,7 +1166,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 
 		/* Partial unmap */
 		size = 1UL << __ffs(cfg->pgsize_bitmap);
-		if (ops->unmap(ops, SZ_1G + size, size) != size)
+		if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
 			return __FAIL(ops, i);
 
 		/* Remap of partial unmap */
@@ -1182,7 +1181,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 		for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
 			size = 1UL << j;
 
-			if (ops->unmap(ops, iova, size) != size)
+			if (ops->unmap(ops, iova, size, NULL) != size)
 				return __FAIL(ops, i);
 
 			if (ops->iova_to_phys(ops, iova + 42))
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index a2b8eff4c1f7..76a8ec343d53 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -737,7 +737,7 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
-	return domain->iop->unmap(domain->iop, iova, size);
+	return domain->iop->unmap(domain->iop, iova, size, gather);
 }
 
 static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index ccfc7ed230ef..8a0dcaf0a9e9 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -523,7 +523,7 @@ static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	unsigned long flags;
 
 	spin_lock_irqsave(&priv->pgtlock, flags);
-	len = priv->iop->unmap(priv->iop, iova, len);
+	len = priv->iop->unmap(priv->iop, iova, len, gather);
 	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
 	return len;
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 3785750bdb44..b73cffd63262 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -400,7 +400,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain,
 	size_t unmapsz;
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	unmapsz = dom->iop->unmap(dom->iop, iova, size);
+	unmapsz = dom->iop->unmap(dom->iop, iova, size, gather);
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
 	return unmapsz;
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 0b8a6d6bb475..48b288ef74b4 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -455,7 +455,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	 */
 	pm_runtime_get_sync(qcom_domain->iommu->dev);
 	spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size);
+	ret = ops->unmap(ops, iova, size, gather);
 	spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
 	pm_runtime_put_sync(qcom_domain->iommu->dev);
 
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 843310484fe2..fe27d93c8ad9 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -1,7 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __IO_PGTABLE_H
 #define __IO_PGTABLE_H
+
 #include <linux/bitops.h>
+#include <linux/iommu.h>
 
 /*
  * Public API for use by IOMMU drivers
@@ -136,7 +138,7 @@ struct io_pgtable_ops {
 	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-			size_t size);
+			size_t size, struct iommu_iotlb_gather *gather);
 	phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
 				    unsigned long iova);
 };
-- 
cgit v1.2.3


From 3951c41af4a65ba418e6b1b973d398552bedb84f Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Tue, 2 Jul 2019 16:45:15 +0100
Subject: iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->tlb_add_page()

With all the pieces in place, we can finally propagate the
iommu_iotlb_gather structure from the call to unmap() down to the IOMMU
drivers' implementation of ->tlb_add_page(). Currently everybody ignores
it, but the machinery is now there to defer invalidation.

Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm-smmu-v3.c        |  3 ++-
 drivers/iommu/arm-smmu.c           |  3 ++-
 drivers/iommu/io-pgtable-arm-v7s.c | 23 ++++++++++++++---------
 drivers/iommu/io-pgtable-arm.c     | 22 ++++++++++++++--------
 drivers/iommu/msm_iommu.c          |  3 ++-
 drivers/iommu/mtk_iommu.c          |  3 ++-
 drivers/iommu/qcom_iommu.c         |  3 ++-
 include/linux/io-pgtable.h         | 16 +++++++++-------
 8 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 8e2e53079f48..d1ebc7103065 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1596,7 +1596,8 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
 	} while (size -= granule);
 }
 
-static void arm_smmu_tlb_inv_page_nosync(unsigned long iova, size_t granule,
+static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
+					 unsigned long iova, size_t granule,
 					 void *cookie)
 {
 	arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index f6689956ab6e..5598d0ff71a8 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -574,7 +574,8 @@ static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size,
 	ops->tlb_sync(cookie);
 }
 
-static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule,
+static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather,
+				  unsigned long iova, size_t granule,
 				  void *cookie)
 {
 	struct arm_smmu_domain *smmu_domain = cookie;
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index a7776e982b6c..18e7d212c7de 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -362,7 +362,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
 	return false;
 }
 
-static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
+static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *,
+			      struct iommu_iotlb_gather *, unsigned long,
 			      size_t, int, arm_v7s_iopte *);
 
 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
@@ -383,7 +384,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
 			size_t sz = ARM_V7S_BLOCK_SIZE(lvl);
 
 			tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
-			if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
+			if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz,
 						    sz, lvl, tblp) != sz))
 				return -EINVAL;
 		} else if (ptep[i]) {
@@ -545,6 +546,7 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
 }
 
 static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
+				      struct iommu_iotlb_gather *gather,
 				      unsigned long iova, size_t size,
 				      arm_v7s_iopte blk_pte,
 				      arm_v7s_iopte *ptep)
@@ -581,14 +583,15 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
 			return 0;
 
 		tablep = iopte_deref(pte, 1);
-		return __arm_v7s_unmap(data, iova, size, 2, tablep);
+		return __arm_v7s_unmap(data, gather, iova, size, 2, tablep);
 	}
 
-	io_pgtable_tlb_add_page(&data->iop, iova, size);
+	io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
 	return size;
 }
 
 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
+			      struct iommu_iotlb_gather *gather,
 			      unsigned long iova, size_t size, int lvl,
 			      arm_v7s_iopte *ptep)
 {
@@ -647,7 +650,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 				 */
 				smp_wmb();
 			} else {
-				io_pgtable_tlb_add_page(iop, iova, blk_size);
+				io_pgtable_tlb_add_page(iop, gather, iova, blk_size);
 			}
 			iova += blk_size;
 		}
@@ -657,12 +660,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
+		return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0],
+					       ptep);
 	}
 
 	/* Keep on walkin' */
 	ptep = iopte_deref(pte[0], lvl);
-	return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
+	return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
@@ -673,7 +677,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 	if (WARN_ON(upper_32_bits(iova)))
 		return 0;
 
-	return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
+	return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd);
 }
 
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
@@ -808,7 +812,8 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+			       unsigned long iova, size_t granule, void *cookie)
 {
 	dummy_tlb_flush(iova, granule, granule, cookie);
 }
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 325430f8a0a1..4c91359057c5 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -289,6 +289,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte,
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       struct iommu_iotlb_gather *gather,
 			       unsigned long iova, size_t size, int lvl,
 			       arm_lpae_iopte *ptep);
 
@@ -334,8 +335,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 		size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
 
 		tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data);
-		if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz))
+		if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) {
+			WARN_ON(1);
 			return -EINVAL;
+		}
 	}
 
 	__arm_lpae_init_pte(data, paddr, prot, lvl, ptep);
@@ -536,6 +539,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 }
 
 static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
+				       struct iommu_iotlb_gather *gather,
 				       unsigned long iova, size_t size,
 				       arm_lpae_iopte blk_pte, int lvl,
 				       arm_lpae_iopte *ptep)
@@ -581,14 +585,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 
 		tablep = iopte_deref(pte, data);
 	} else if (unmap_idx >= 0) {
-		io_pgtable_tlb_add_page(&data->iop, iova, size);
+		io_pgtable_tlb_add_page(&data->iop, gather, iova, size);
 		return size;
 	}
 
-	return __arm_lpae_unmap(data, iova, size, lvl, tablep);
+	return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep);
 }
 
 static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
+			       struct iommu_iotlb_gather *gather,
 			       unsigned long iova, size_t size, int lvl,
 			       arm_lpae_iopte *ptep)
 {
@@ -622,7 +627,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 			 */
 			smp_wmb();
 		} else {
-			io_pgtable_tlb_add_page(iop, iova, size);
+			io_pgtable_tlb_add_page(iop, gather, iova, size);
 		}
 
 		return size;
@@ -631,13 +636,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
 		 * Insert a table at the next level to map the old region,
 		 * minus the part we want to unmap
 		 */
-		return arm_lpae_split_blk_unmap(data, iova, size, pte,
+		return arm_lpae_split_blk_unmap(data, gather, iova, size, pte,
 						lvl + 1, ptep);
 	}
 
 	/* Keep on walkin' */
 	ptep = iopte_deref(pte, data);
-	return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep);
+	return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep);
 }
 
 static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
@@ -650,7 +655,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
 	if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias)))
 		return 0;
 
-	return __arm_lpae_unmap(data, iova, size, lvl, ptep);
+	return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep);
 }
 
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
@@ -1074,7 +1079,8 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule,
 	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
 }
 
-static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie)
+static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather,
+			       unsigned long iova, size_t granule, void *cookie)
 {
 	dummy_tlb_flush(iova, granule, granule, cookie);
 }
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 8a0dcaf0a9e9..4c0be5b75c28 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -180,7 +180,8 @@ static void __flush_iotlb_leaf(unsigned long iova, size_t size,
 	__flush_iotlb_range(iova, size, granule, true, cookie);
 }
 
-static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie)
+static void __flush_iotlb_page(struct iommu_iotlb_gather *gather,
+			       unsigned long iova, size_t granule, void *cookie)
 {
 	__flush_iotlb_range(iova, granule, granule, true, cookie);
 }
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index b73cffd63262..0827d51936fa 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -202,7 +202,8 @@ static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
 	mtk_iommu_tlb_sync(cookie);
 }
 
-static void mtk_iommu_tlb_flush_page_nosync(unsigned long iova, size_t granule,
+static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather,
+					    unsigned long iova, size_t granule,
 					    void *cookie)
 {
 	mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie);
diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c
index 48b288ef74b4..eac760cdbb28 100644
--- a/drivers/iommu/qcom_iommu.c
+++ b/drivers/iommu/qcom_iommu.c
@@ -178,7 +178,8 @@ static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size,
 	qcom_iommu_tlb_sync(cookie);
 }
 
-static void qcom_iommu_tlb_add_page(unsigned long iova, size_t granule,
+static void qcom_iommu_tlb_add_page(struct iommu_iotlb_gather *gather,
+				    unsigned long iova, size_t granule,
 				    void *cookie)
 {
 	qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie);
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index fe27d93c8ad9..6b1b8be3ebec 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -28,10 +28,10 @@ enum io_pgtable_fmt {
  * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual
  *                  address range.
  * @tlb_add_page:   Optional callback to queue up leaf TLB invalidation for a
- *                  single page. This function exists purely as an optimisation
- *                  for IOMMUs that cannot batch TLB invalidation operations
- *                  efficiently and are therefore better suited to issuing them
- *                  early rather than deferring them until iommu_tlb_sync().
+ *                  single page.  IOMMUs that cannot batch TLB invalidation
+ *                  operations efficiently will typically issue them here, but
+ *                  others may decide to update the iommu_iotlb_gather structure
+ *                  and defer the invalidation until iommu_tlb_sync() instead.
  *
  * Note that these can all be called in atomic context and must therefore
  * not block.
@@ -42,7 +42,8 @@ struct iommu_flush_ops {
 			       void *cookie);
 	void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule,
 			       void *cookie);
-	void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie);
+	void (*tlb_add_page)(struct iommu_iotlb_gather *gather,
+			     unsigned long iova, size_t granule, void *cookie);
 };
 
 /**
@@ -209,11 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova,
 }
 
 static inline void
-io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova,
+io_pgtable_tlb_add_page(struct io_pgtable *iop,
+			struct iommu_iotlb_gather * gather, unsigned long iova,
 			size_t granule)
 {
 	if (iop->cfg.tlb->tlb_add_page)
-		iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie);
+		iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie);
 }
 
 /**
-- 
cgit v1.2.3


From 156189a6d7a797d6fa0b13dd6b4f32b11d234a99 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 29 Jul 2019 01:40:15 +0900
Subject: leds: netxbig: remove legacy board-file support

Since commit ebc278f15759 ("ARM: mvebu: remove static LED setup for
netxbig boards"), no one in upstream passes in the platform data to
this driver.

Squash leds-kirkwood-netxbig.h into the driver, and remove the legacy
board-file support.

Link: https://lkml.org/lkml/2019/7/20/83
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 drivers/leds/Kconfig                               |  1 +
 drivers/leds/leds-netxbig.c                        | 70 +++++++++++++++-------
 .../linux/platform_data/leds-kirkwood-netxbig.h    | 54 -----------------
 3 files changed, 51 insertions(+), 74 deletions(-)
 delete mode 100644 include/linux/platform_data/leds-kirkwood-netxbig.h

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index f7a3dd7ecf3d..1988de1d64c0 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -590,6 +590,7 @@ config LEDS_NETXBIG
 	tristate "LED support for Big Network series LEDs"
 	depends on LEDS_CLASS
 	depends on MACH_KIRKWOOD
+	depends on OF_GPIO
 	default y
 	help
 	  This option enables support for LEDs found on the LaCie 2Big
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 10497a466775..0944cb111c34 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -15,7 +15,48 @@
 #include <linux/gpio.h>
 #include <linux/of_gpio.h>
 #include <linux/leds.h>
-#include <linux/platform_data/leds-kirkwood-netxbig.h>
+
+struct netxbig_gpio_ext {
+	unsigned int	*addr;
+	int		num_addr;
+	unsigned int	*data;
+	int		num_data;
+	unsigned int	enable;
+};
+
+enum netxbig_led_mode {
+	NETXBIG_LED_OFF,
+	NETXBIG_LED_ON,
+	NETXBIG_LED_SATA,
+	NETXBIG_LED_TIMER1,
+	NETXBIG_LED_TIMER2,
+	NETXBIG_LED_MODE_NUM,
+};
+
+#define NETXBIG_LED_INVALID_MODE NETXBIG_LED_MODE_NUM
+
+struct netxbig_led_timer {
+	unsigned long		delay_on;
+	unsigned long		delay_off;
+	enum netxbig_led_mode	mode;
+};
+
+struct netxbig_led {
+	const char	*name;
+	const char	*default_trigger;
+	int		mode_addr;
+	int		*mode_val;
+	int		bright_addr;
+	int		bright_max;
+};
+
+struct netxbig_led_platform_data {
+	struct netxbig_gpio_ext	*gpio_ext;
+	struct netxbig_led_timer *timer;
+	int			num_timer;
+	struct netxbig_led	*leds;
+	int			num_leds;
+};
 
 /*
  * GPIO extension bus.
@@ -306,7 +347,6 @@ static int create_netxbig_led(struct platform_device *pdev,
 	return devm_led_classdev_register(&pdev->dev, &led_dat->cdev);
 }
 
-#ifdef CONFIG_OF_GPIO
 static int gpio_ext_get_of_pdata(struct device *dev, struct device_node *np,
 				 struct netxbig_gpio_ext *gpio_ext)
 {
@@ -522,30 +562,20 @@ static const struct of_device_id of_netxbig_leds_match[] = {
 	{},
 };
 MODULE_DEVICE_TABLE(of, of_netxbig_leds_match);
-#else
-static inline int
-netxbig_leds_get_of_pdata(struct device *dev,
-			  struct netxbig_led_platform_data *pdata)
-{
-	return -ENODEV;
-}
-#endif /* CONFIG_OF_GPIO */
 
 static int netxbig_led_probe(struct platform_device *pdev)
 {
-	struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct netxbig_led_platform_data *pdata;
 	struct netxbig_led_data *leds_data;
 	int i;
 	int ret;
 
-	if (!pdata) {
-		pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
-			return -ENOMEM;
-		ret = netxbig_leds_get_of_pdata(&pdev->dev, pdata);
-		if (ret)
-			return ret;
-	}
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+	ret = netxbig_leds_get_of_pdata(&pdev->dev, pdata);
+	if (ret)
+		return ret;
 
 	leds_data = devm_kcalloc(&pdev->dev,
 				 pdata->num_leds, sizeof(*leds_data),
@@ -571,7 +601,7 @@ static struct platform_driver netxbig_led_driver = {
 	.probe		= netxbig_led_probe,
 	.driver		= {
 		.name		= "leds-netxbig",
-		.of_match_table	= of_match_ptr(of_netxbig_leds_match),
+		.of_match_table	= of_netxbig_leds_match,
 	},
 };
 
diff --git a/include/linux/platform_data/leds-kirkwood-netxbig.h b/include/linux/platform_data/leds-kirkwood-netxbig.h
deleted file mode 100644
index 3c85a735c380..000000000000
--- a/include/linux/platform_data/leds-kirkwood-netxbig.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Platform data structure for netxbig LED driver
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-
-#ifndef __LEDS_KIRKWOOD_NETXBIG_H
-#define __LEDS_KIRKWOOD_NETXBIG_H
-
-struct netxbig_gpio_ext {
-	unsigned	*addr;
-	int		num_addr;
-	unsigned	*data;
-	int		num_data;
-	unsigned	enable;
-};
-
-enum netxbig_led_mode {
-	NETXBIG_LED_OFF,
-	NETXBIG_LED_ON,
-	NETXBIG_LED_SATA,
-	NETXBIG_LED_TIMER1,
-	NETXBIG_LED_TIMER2,
-	NETXBIG_LED_MODE_NUM,
-};
-
-#define NETXBIG_LED_INVALID_MODE NETXBIG_LED_MODE_NUM
-
-struct netxbig_led_timer {
-	unsigned long		delay_on;
-	unsigned long		delay_off;
-	enum netxbig_led_mode	mode;
-};
-
-struct netxbig_led {
-	const char	*name;
-	const char	*default_trigger;
-	int		mode_addr;
-	int		*mode_val;
-	int		bright_addr;
-	int		bright_max;
-};
-
-struct netxbig_led_platform_data {
-	struct netxbig_gpio_ext	*gpio_ext;
-	struct netxbig_led_timer *timer;
-	int			num_timer;
-	struct netxbig_led	*leds;
-	int			num_leds;
-};
-
-#endif /* __LEDS_KIRKWOOD_NETXBIG_H */
-- 
cgit v1.2.3


From 6dbff13ca8a2ad2fddd904c2e789dd5e59a8644c Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 26 Jul 2019 18:06:52 +0200
Subject: include/bpf.h: Remove map_insert_ctx() stubs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When we changed the device and CPU maps to use linked lists instead of
bitmaps, we also removed the need for the map_insert_ctx() helpers to keep
track of the bitmaps inside each map. However, it seems I forgot to remove
the function definitions stubs, so remove those here.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 18f4cc2c6acd..bfdb54dd2ad1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -713,7 +713,6 @@ struct xdp_buff;
 struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
-void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -721,7 +720,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
-void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
 void __cpu_map_flush(struct bpf_map *map);
 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -801,10 +799,6 @@ static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
-static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
-{
-}
-
 static inline void __dev_map_flush(struct bpf_map *map)
 {
 }
@@ -834,10 +828,6 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 	return NULL;
 }
 
-static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index)
-{
-}
-
 static inline void __cpu_map_flush(struct bpf_map *map)
 {
 }
-- 
cgit v1.2.3


From 6f9d451ab1a33728adb72d7ff66a7b374d665176 Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 26 Jul 2019 18:06:55 +0200
Subject: xdp: Add devmap_hash map type for looking up devices by hashed index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A common pattern when using xdp_redirect_map() is to create a device map
where the lookup key is simply ifindex. Because device maps are arrays,
this leaves holes in the map, and the map has to be sized to fit the
largest ifindex, regardless of how many devices actually are actually
needed in the map.

This patch adds a second type of device map where the key is looked up
using a hashmap, instead of being used as an array index. This allows maps
to be densely packed, so they can be smaller.

Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h        |   7 ++
 include/linux/bpf_types.h  |   1 +
 include/trace/events/xdp.h |   3 +-
 include/uapi/linux/bpf.h   |   1 +
 kernel/bpf/devmap.c        | 200 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c      |   2 +
 net/core/filter.c          |   9 +-
 7 files changed, 220 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bfdb54dd2ad1..f9a506147c8a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -713,6 +713,7 @@ struct xdp_buff;
 struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
+struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
@@ -799,6 +800,12 @@ static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
+static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
+							     u32 key)
+{
+	return NULL;
+}
+
 static inline void __dev_map_flush(struct bpf_map *map)
 {
 }
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index eec5aeeeaf92..36a9c2325176 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 #ifdef CONFIG_NET
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #if defined(CONFIG_BPF_STREAM_PARSER)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 68899fdc985b..8c8420230a10 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -175,7 +175,8 @@ struct _bpf_dtab_netdev {
 #endif /* __DEVMAP_OBJ_TYPE */
 
 #define devmap_ifindex(fwd, map)				\
-	((map->map_type == BPF_MAP_TYPE_DEVMAP) ?		\
+	((map->map_type == BPF_MAP_TYPE_DEVMAP ||		\
+	  map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ?		\
 	  ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
 
 #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx)		\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e985f07a98ed..6bbef0c7f585 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -134,6 +134,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_QUEUE,
 	BPF_MAP_TYPE_STACK,
 	BPF_MAP_TYPE_SK_STORAGE,
+	BPF_MAP_TYPE_DEVMAP_HASH,
 };
 
 /* Note that tracing related programs such as
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a0501266bdb8..9af048a932b5 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -37,6 +37,12 @@
  * notifier hook walks the map we know that new dev references can not be
  * added by the user because core infrastructure ensures dev_get_by_index()
  * calls will fail at this point.
+ *
+ * The devmap_hash type is a map type which interprets keys as ifindexes and
+ * indexes these using a hashmap. This allows maps that use ifindex as key to be
+ * densely packed instead of having holes in the lookup array for unused
+ * ifindexes. The setup and packet enqueue/send code is shared between the two
+ * types of devmap; only the lookup and insertion is different.
  */
 #include <linux/bpf.h>
 #include <net/xdp.h>
@@ -59,6 +65,7 @@ struct xdp_bulk_queue {
 
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
+	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
 	struct xdp_bulk_queue __percpu *bulkq;
 	struct rcu_head rcu;
@@ -70,11 +77,30 @@ struct bpf_dtab {
 	struct bpf_dtab_netdev **netdev_map;
 	struct list_head __percpu *flush_list;
 	struct list_head list;
+
+	/* these are only used for DEVMAP_HASH type maps */
+	struct hlist_head *dev_index_head;
+	spinlock_t index_lock;
+	unsigned int items;
+	u32 n_buckets;
 };
 
 static DEFINE_SPINLOCK(dev_map_lock);
 static LIST_HEAD(dev_map_list);
 
+static struct hlist_head *dev_map_create_hash(unsigned int entries)
+{
+	int i;
+	struct hlist_head *hash;
+
+	hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
+	if (hash != NULL)
+		for (i = 0; i < entries; i++)
+			INIT_HLIST_HEAD(&hash[i]);
+
+	return hash;
+}
+
 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 {
 	int err, cpu;
@@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 	cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
 	cost += sizeof(struct list_head) * num_possible_cpus();
 
+	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+		dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
+
+		if (!dtab->n_buckets) /* Overflow check */
+			return -EINVAL;
+		cost += sizeof(struct hlist_head) * dtab->n_buckets;
+	}
+
 	/* if map size is larger than memlock limit, reject it */
 	err = bpf_map_charge_init(&dtab->map.memory, cost);
 	if (err)
@@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 	if (!dtab->netdev_map)
 		goto free_percpu;
 
+	if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+		dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
+		if (!dtab->dev_index_head)
+			goto free_map_area;
+
+		spin_lock_init(&dtab->index_lock);
+	}
+
 	return 0;
 
+free_map_area:
+	bpf_map_area_free(dtab->netdev_map);
 free_percpu:
 	free_percpu(dtab->flush_list);
 free_charge:
@@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
 
 	free_percpu(dtab->flush_list);
 	bpf_map_area_free(dtab->netdev_map);
+	kfree(dtab->dev_index_head);
 	kfree(dtab);
 }
 
@@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 	return 0;
 }
 
+static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
+						    int idx)
+{
+	return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
+}
+
+struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct hlist_head *head = dev_map_index_hash(dtab, key);
+	struct bpf_dtab_netdev *dev;
+
+	hlist_for_each_entry_rcu(dev, head, index_hlist)
+		if (dev->idx == key)
+			return dev;
+
+	return NULL;
+}
+
+static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
+				    void *next_key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	u32 idx, *next = next_key;
+	struct bpf_dtab_netdev *dev, *next_dev;
+	struct hlist_head *head;
+	int i = 0;
+
+	if (!key)
+		goto find_first;
+
+	idx = *(u32 *)key;
+
+	dev = __dev_map_hash_lookup_elem(map, idx);
+	if (!dev)
+		goto find_first;
+
+	next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
+				    struct bpf_dtab_netdev, index_hlist);
+
+	if (next_dev) {
+		*next = next_dev->idx;
+		return 0;
+	}
+
+	i = idx & (dtab->n_buckets - 1);
+	i++;
+
+ find_first:
+	for (; i < dtab->n_buckets; i++) {
+		head = dev_map_index_hash(dtab, i);
+
+		next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+					    struct bpf_dtab_netdev,
+					    index_hlist);
+		if (next_dev) {
+			*next = next_dev->idx;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
 static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
 		       bool in_napi_ctx)
 {
@@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 	return dev ? &dev->ifindex : NULL;
 }
 
+static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
+								*(u32 *)key);
+	struct net_device *dev = obj ? obj->dev : NULL;
+
+	return dev ? &dev->ifindex : NULL;
+}
+
 static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
 {
 	if (dev->dev->netdev_ops->ndo_xdp_xmit) {
@@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
 	return 0;
 }
 
+static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *old_dev;
+	int k = *(u32 *)key;
+	unsigned long flags;
+	int ret = -ENOENT;
+
+	spin_lock_irqsave(&dtab->index_lock, flags);
+
+	old_dev = __dev_map_hash_lookup_elem(map, k);
+	if (old_dev) {
+		dtab->items--;
+		hlist_del_init_rcu(&old_dev->index_hlist);
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&dtab->index_lock, flags);
+
+	return ret;
+}
+
 static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 						    struct bpf_dtab *dtab,
 						    u32 ifindex,
@@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
 				     map, key, value, map_flags);
 }
 
+static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
+				     void *key, void *value, u64 map_flags)
+{
+	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_dtab_netdev *dev, *old_dev;
+	u32 ifindex = *(u32 *)value;
+	u32 idx = *(u32 *)key;
+	unsigned long flags;
+
+	if (unlikely(map_flags > BPF_EXIST || !ifindex))
+		return -EINVAL;
+
+	old_dev = __dev_map_hash_lookup_elem(map, idx);
+	if (old_dev && (map_flags & BPF_NOEXIST))
+		return -EEXIST;
+
+	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	spin_lock_irqsave(&dtab->index_lock, flags);
+
+	if (old_dev) {
+		hlist_del_rcu(&old_dev->index_hlist);
+	} else {
+		if (dtab->items >= dtab->map.max_entries) {
+			spin_unlock_irqrestore(&dtab->index_lock, flags);
+			call_rcu(&dev->rcu, __dev_map_entry_free);
+			return -E2BIG;
+		}
+		dtab->items++;
+	}
+
+	hlist_add_head_rcu(&dev->index_hlist,
+			   dev_map_index_hash(dtab, idx));
+	spin_unlock_irqrestore(&dtab->index_lock, flags);
+
+	if (old_dev)
+		call_rcu(&old_dev->rcu, __dev_map_entry_free);
+
+	return 0;
+}
+
+static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
+				   u64 map_flags)
+{
+	return __dev_map_hash_update_elem(current->nsproxy->net_ns,
+					 map, key, value, map_flags);
+}
+
 const struct bpf_map_ops dev_map_ops = {
 	.map_alloc = dev_map_alloc,
 	.map_free = dev_map_free,
@@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
 	.map_check_btf = map_check_no_btf,
 };
 
+const struct bpf_map_ops dev_map_hash_ops = {
+	.map_alloc = dev_map_alloc,
+	.map_free = dev_map_free,
+	.map_get_next_key = dev_map_hash_get_next_key,
+	.map_lookup_elem = dev_map_hash_lookup_elem,
+	.map_update_elem = dev_map_hash_update_elem,
+	.map_delete_elem = dev_map_hash_delete_elem,
+	.map_check_btf = map_check_no_btf,
+};
+
 static int dev_map_notification(struct notifier_block *notifier,
 				ulong event, void *ptr)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5900cbb966b1..cef851cd5c36 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH:
 		if (func_id != BPF_FUNC_redirect_map &&
 		    func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
@@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		break;
 	case BPF_FUNC_redirect_map:
 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
 			goto error;
diff --git a/net/core/filter.c b/net/core/filter.c
index 3961437ccc50..1eee70f80fba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
 	int err;
 
 	switch (map->map_type) {
-	case BPF_MAP_TYPE_DEVMAP: {
+	case BPF_MAP_TYPE_DEVMAP:
+	case BPF_MAP_TYPE_DEVMAP_HASH: {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_enqueue(dst, xdp, dev_rx);
@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void)
 	if (map) {
 		switch (map->map_type) {
 		case BPF_MAP_TYPE_DEVMAP:
+		case BPF_MAP_TYPE_DEVMAP_HASH:
 			__dev_map_flush(map);
 			break;
 		case BPF_MAP_TYPE_CPUMAP:
@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
 	switch (map->map_type) {
 	case BPF_MAP_TYPE_DEVMAP:
 		return __dev_map_lookup_elem(map, index);
+	case BPF_MAP_TYPE_DEVMAP_HASH:
+		return __dev_map_hash_lookup_elem(map, index);
 	case BPF_MAP_TYPE_CPUMAP:
 		return __cpu_map_lookup_elem(map, index);
 	case BPF_MAP_TYPE_XSKMAP:
@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
 	ri->tgt_value = NULL;
 	WRITE_ONCE(ri->map, NULL);
 
-	if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
+	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
 		struct bpf_dtab_netdev *dst = fwd;
 
 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
-- 
cgit v1.2.3


From f3e4ff28b8685d856f381ee6bcf88b6149a6db5b Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 24 Jul 2019 11:00:54 +0200
Subject: scsi: libfc: Whitespace cleanup in libfc.h

No functional change.

[mkp: typo]

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libfc.h | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 2d64b53f947c..9b87e1a1c646 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -115,7 +115,7 @@ struct fc_disc_port {
 	struct fc_lport    *lp;
 	struct list_head   peers;
 	struct work_struct rport_work;
-	u32                port_id;
+	u32		   port_id;
 };
 
 /**
@@ -155,14 +155,14 @@ struct fc_rport_operations {
  */
 struct fc_rport_libfc_priv {
 	struct fc_lport		   *local_port;
-	enum fc_rport_state        rp_state;
+	enum fc_rport_state	   rp_state;
 	u16			   flags;
 	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
 	#define FC_RP_FLAGS_RETRY		(1 << 1)
 	#define FC_RP_STARTED			(1 << 2)
 	#define FC_RP_FLAGS_CONF_REQ		(1 << 3)
-	unsigned int	           e_d_tov;
-	unsigned int	           r_a_tov;
+	unsigned int		   e_d_tov;
+	unsigned int		   r_a_tov;
 };
 
 /**
@@ -191,24 +191,24 @@ struct fc_rport_priv {
 	struct fc_lport		    *local_port;
 	struct fc_rport		    *rport;
 	struct kref		    kref;
-	enum fc_rport_state         rp_state;
+	enum fc_rport_state	    rp_state;
 	struct fc_rport_identifiers ids;
 	u16			    flags;
-	u16		            max_seq;
+	u16			    max_seq;
 	u16			    disc_id;
 	u16			    maxframe_size;
-	unsigned int	            retries;
-	unsigned int	            major_retries;
-	unsigned int	            e_d_tov;
-	unsigned int	            r_a_tov;
-	struct mutex                rp_mutex;
+	unsigned int		    retries;
+	unsigned int		    major_retries;
+	unsigned int		    e_d_tov;
+	unsigned int		    r_a_tov;
+	struct mutex		    rp_mutex;
 	struct delayed_work	    retry_work;
-	enum fc_rport_event         event;
+	enum fc_rport_event	    event;
 	struct fc_rport_operations  *ops;
-	struct list_head            peers;
-	struct work_struct          event_work;
+	struct list_head	    peers;
+	struct work_struct	    event_work;
 	u32			    supported_classes;
-	u16                         prli_count;
+	u16			    prli_count;
 	struct rcu_head		    rcu;
 	u16			    sp_features;
 	u8			    spp_type;
@@ -618,12 +618,12 @@ struct libfc_function_template {
  * @disc_callback: Callback routine called when discovery completes
  */
 struct fc_disc {
-	unsigned char         retry_count;
-	unsigned char         pending;
-	unsigned char         requested;
-	unsigned short        seq_count;
-	unsigned char         buf_len;
-	u16                   disc_id;
+	unsigned char	      retry_count;
+	unsigned char	      pending;
+	unsigned char	      requested;
+	unsigned short	      seq_count;
+	unsigned char	      buf_len;
+	u16		      disc_id;
 
 	struct list_head      rports;
 	void		      *priv;
@@ -697,7 +697,7 @@ struct fc_lport {
 	struct fc_rport_priv	       *ms_rdata;
 	struct fc_rport_priv	       *ptp_rdata;
 	void			       *scsi_priv;
-	struct fc_disc                 disc;
+	struct fc_disc		       disc;
 
 	/* Virtual port information */
 	struct list_head	       vports;
@@ -715,7 +715,7 @@ struct fc_lport {
 	u8			       retry_count;
 
 	/* Fabric information */
-	u32                            port_id;
+	u32			       port_id;
 	u64			       wwpn;
 	u64			       wwnn;
 	unsigned int		       service_params;
@@ -743,11 +743,11 @@ struct fc_lport {
 	struct fc_ns_fts	       fcts;
 
 	/* Miscellaneous */
-	struct mutex                   lp_mutex;
-	struct list_head               list;
+	struct mutex		       lp_mutex;
+	struct list_head	       list;
 	struct delayed_work	       retry_work;
 	void			       *prov[FC_FC4_PROV_SIZE];
-	struct list_head               lport_list;
+	struct list_head	       lport_list;
 };
 
 /**
-- 
cgit v1.2.3


From 023358b136d490ca91735ac6490db3741af5a8bd Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 24 Jul 2019 11:00:55 +0200
Subject: scsi: fcoe: Embed fc_rport_priv in fcoe_rport structure

Gcc-9 complains for a memset across pointer boundaries, which happens as
the code tries to allocate a flexible array on the stack.  Turns out we
cannot do this without relying on gcc-isms, so with this patch we'll embed
the fc_rport_priv structure into fcoe_rport, can use the normal
'container_of' outcast, and will only have to do a memset over one
structure.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fcoe/fcoe_ctlr.c | 51 +++++++++++++++++--------------------------
 drivers/scsi/libfc/fc_rport.c |  5 ++++-
 include/scsi/libfcoe.h        |  1 +
 3 files changed, 25 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 1a85fe9e4b7b..fc32b5d76821 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2005,7 +2005,7 @@ EXPORT_SYMBOL_GPL(fcoe_wwn_from_mac);
  */
 static inline struct fcoe_rport *fcoe_ctlr_rport(struct fc_rport_priv *rdata)
 {
-	return (struct fcoe_rport *)(rdata + 1);
+	return container_of(rdata, struct fcoe_rport, rdata);
 }
 
 /**
@@ -2269,7 +2269,7 @@ static void fcoe_ctlr_vn_start(struct fcoe_ctlr *fip)
  */
 static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip,
 			      struct sk_buff *skb,
-			      struct fc_rport_priv *rdata)
+			      struct fcoe_rport *frport)
 {
 	struct fip_header *fiph;
 	struct fip_desc *desc = NULL;
@@ -2277,16 +2277,12 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip,
 	struct fip_wwn_desc *wwn = NULL;
 	struct fip_vn_desc *vn = NULL;
 	struct fip_size_desc *size = NULL;
-	struct fcoe_rport *frport;
 	size_t rlen;
 	size_t dlen;
 	u32 desc_mask = 0;
 	u32 dtype;
 	u8 sub;
 
-	memset(rdata, 0, sizeof(*rdata) + sizeof(*frport));
-	frport = fcoe_ctlr_rport(rdata);
-
 	fiph = (struct fip_header *)skb->data;
 	frport->flags = ntohs(fiph->fip_flags);
 
@@ -2349,15 +2345,17 @@ static int fcoe_ctlr_vn_parse(struct fcoe_ctlr *fip,
 			if (dlen != sizeof(struct fip_wwn_desc))
 				goto len_err;
 			wwn = (struct fip_wwn_desc *)desc;
-			rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn);
+			frport->rdata.ids.node_name =
+				get_unaligned_be64(&wwn->fd_wwn);
 			break;
 		case FIP_DT_VN_ID:
 			if (dlen != sizeof(struct fip_vn_desc))
 				goto len_err;
 			vn = (struct fip_vn_desc *)desc;
 			memcpy(frport->vn_mac, vn->fd_mac, ETH_ALEN);
-			rdata->ids.port_id = ntoh24(vn->fd_fc_id);
-			rdata->ids.port_name = get_unaligned_be64(&vn->fd_wwpn);
+			frport->rdata.ids.port_id = ntoh24(vn->fd_fc_id);
+			frport->rdata.ids.port_name =
+				get_unaligned_be64(&vn->fd_wwpn);
 			break;
 		case FIP_DT_FC4F:
 			if (dlen != sizeof(struct fip_fc4_feat))
@@ -2738,10 +2736,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 {
 	struct fip_header *fiph;
 	enum fip_vn2vn_subcode sub;
-	struct {
-		struct fc_rport_priv rdata;
-		struct fcoe_rport frport;
-	} buf;
+	struct fcoe_rport frport = { };
 	int rc, vlan_id = 0;
 
 	fiph = (struct fip_header *)skb->data;
@@ -2757,7 +2752,7 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 		goto drop;
 	}
 
-	rc = fcoe_ctlr_vn_parse(fip, skb, &buf.rdata);
+	rc = fcoe_ctlr_vn_parse(fip, skb, &frport);
 	if (rc) {
 		LIBFCOE_FIP_DBG(fip, "vn_recv vn_parse error %d\n", rc);
 		goto drop;
@@ -2766,19 +2761,19 @@ static int fcoe_ctlr_vn_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	mutex_lock(&fip->ctlr_mutex);
 	switch (sub) {
 	case FIP_SC_VN_PROBE_REQ:
-		fcoe_ctlr_vn_probe_req(fip, &buf.rdata);
+		fcoe_ctlr_vn_probe_req(fip, &frport.rdata);
 		break;
 	case FIP_SC_VN_PROBE_REP:
-		fcoe_ctlr_vn_probe_reply(fip, &buf.rdata);
+		fcoe_ctlr_vn_probe_reply(fip, &frport.rdata);
 		break;
 	case FIP_SC_VN_CLAIM_NOTIFY:
-		fcoe_ctlr_vn_claim_notify(fip, &buf.rdata);
+		fcoe_ctlr_vn_claim_notify(fip, &frport.rdata);
 		break;
 	case FIP_SC_VN_CLAIM_REP:
-		fcoe_ctlr_vn_claim_resp(fip, &buf.rdata);
+		fcoe_ctlr_vn_claim_resp(fip, &frport.rdata);
 		break;
 	case FIP_SC_VN_BEACON:
-		fcoe_ctlr_vn_beacon(fip, &buf.rdata);
+		fcoe_ctlr_vn_beacon(fip, &frport.rdata);
 		break;
 	default:
 		LIBFCOE_FIP_DBG(fip, "vn_recv unknown subcode %d\n", sub);
@@ -2802,22 +2797,18 @@ drop:
  */
 static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip,
 			      struct sk_buff *skb,
-			      struct fc_rport_priv *rdata)
+			      struct fcoe_rport *frport)
 {
 	struct fip_header *fiph;
 	struct fip_desc *desc = NULL;
 	struct fip_mac_desc *macd = NULL;
 	struct fip_wwn_desc *wwn = NULL;
-	struct fcoe_rport *frport;
 	size_t rlen;
 	size_t dlen;
 	u32 desc_mask = 0;
 	u32 dtype;
 	u8 sub;
 
-	memset(rdata, 0, sizeof(*rdata) + sizeof(*frport));
-	frport = fcoe_ctlr_rport(rdata);
-
 	fiph = (struct fip_header *)skb->data;
 	frport->flags = ntohs(fiph->fip_flags);
 
@@ -2871,7 +2862,8 @@ static int fcoe_ctlr_vlan_parse(struct fcoe_ctlr *fip,
 			if (dlen != sizeof(struct fip_wwn_desc))
 				goto len_err;
 			wwn = (struct fip_wwn_desc *)desc;
-			rdata->ids.node_name = get_unaligned_be64(&wwn->fd_wwn);
+			frport->rdata.ids.node_name =
+				get_unaligned_be64(&wwn->fd_wwn);
 			break;
 		default:
 			LIBFCOE_FIP_DBG(fip, "unexpected descriptor type %x "
@@ -2982,22 +2974,19 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb)
 {
 	struct fip_header *fiph;
 	enum fip_vlan_subcode sub;
-	struct {
-		struct fc_rport_priv rdata;
-		struct fcoe_rport frport;
-	} buf;
+	struct fcoe_rport frport = { };
 	int rc;
 
 	fiph = (struct fip_header *)skb->data;
 	sub = fiph->fip_subcode;
-	rc = fcoe_ctlr_vlan_parse(fip, skb, &buf.rdata);
+	rc = fcoe_ctlr_vlan_parse(fip, skb, &frport);
 	if (rc) {
 		LIBFCOE_FIP_DBG(fip, "vlan_recv vlan_parse error %d\n", rc);
 		goto drop;
 	}
 	mutex_lock(&fip->ctlr_mutex);
 	if (sub == FIP_SC_VL_REQ)
-		fcoe_ctlr_vlan_disc_reply(fip, &buf.rdata);
+		fcoe_ctlr_vlan_disc_reply(fip, &frport.rdata);
 	mutex_unlock(&fip->ctlr_mutex);
 
 drop:
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index e0f3852fdad1..da6e97d8dc3b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -128,6 +128,7 @@ EXPORT_SYMBOL(fc_rport_lookup);
 struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id)
 {
 	struct fc_rport_priv *rdata;
+	size_t rport_priv_size = sizeof(*rdata);
 
 	lockdep_assert_held(&lport->disc.disc_mutex);
 
@@ -135,7 +136,9 @@ struct fc_rport_priv *fc_rport_create(struct fc_lport *lport, u32 port_id)
 	if (rdata)
 		return rdata;
 
-	rdata = kzalloc(sizeof(*rdata) + lport->rport_priv_size, GFP_KERNEL);
+	if (lport->rport_priv_size > 0)
+		rport_priv_size = lport->rport_priv_size;
+	rdata = kzalloc(rport_priv_size, GFP_KERNEL);
 	if (!rdata)
 		return NULL;
 
diff --git a/include/scsi/libfcoe.h b/include/scsi/libfcoe.h
index dc14b52577f7..2568cb0627ec 100644
--- a/include/scsi/libfcoe.h
+++ b/include/scsi/libfcoe.h
@@ -229,6 +229,7 @@ struct fcoe_fcf {
  * @vn_mac:	VN_Node assigned MAC address for data
  */
 struct fcoe_rport {
+	struct fc_rport_priv rdata;
 	unsigned long time;
 	u16 fcoe_len;
 	u16 flags;
-- 
cgit v1.2.3


From 9091373ab7ea27cad381ce71aa37de6b9e687e81 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 30 Jul 2019 14:43:47 +0900
Subject: gpio: remove less important #ifdef around declarations

The whole struct/function declarations in this header are surrounded
by #ifdef.

As far as I understood, the motivation of this is probably to break
the build earlier if a driver misses to select or depend on correct
CONFIG options in Kconfig.

Since commit 94bed2a9c4ae ("Add -Werror-implicit-function-declaration")
no one cannot call functions that have not been declared.

So, I see some benefit in doing this in the cost of uglier headers.

In reality, it would not be so easy to catch missed 'select' or
'depends on' because GPIOLIB, GPIOLIB_IRQCHIP etc. are already selected
by someone else eventually. So, this kind of error, if any, will be
caught by randconfig bots.

In summary, I am not a big fan of cluttered #ifdef nesting, and this
does not matter for normal developers. The code readability wins.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/linux/gpio/driver.h | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 6a0e420915a3..f28f534f451a 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -20,9 +20,6 @@ struct module;
 enum gpiod_flags;
 enum gpio_lookup_flags;
 
-#ifdef CONFIG_GPIOLIB
-
-#ifdef CONFIG_GPIOLIB_IRQCHIP
 /**
  * struct gpio_irq_chip - GPIO interrupt controller
  */
@@ -161,7 +158,6 @@ struct gpio_irq_chip {
 	 */
 	void		(*irq_disable)(struct irq_data *data);
 };
-#endif /* CONFIG_GPIOLIB_IRQCHIP */
 
 /**
  * struct gpio_chip - abstract a GPIO controller
@@ -441,16 +437,12 @@ bool gpiochip_line_is_valid(const struct gpio_chip *chip, unsigned int offset);
 /* get driver data */
 void *gpiochip_get_data(struct gpio_chip *chip);
 
-struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
-
 struct bgpio_pdata {
 	const char *label;
 	int base;
 	int ngpio;
 };
 
-#if IS_ENABLED(CONFIG_GPIO_GENERIC)
-
 int bgpio_init(struct gpio_chip *gc, struct device *dev,
 	       unsigned long sz, void __iomem *dat, void __iomem *set,
 	       void __iomem *clr, void __iomem *dirout, void __iomem *dirin,
@@ -463,10 +455,6 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev,
 #define BGPIOF_READ_OUTPUT_REG_SET	BIT(4) /* reg_set stores output value */
 #define BGPIOF_NO_OUTPUT		BIT(5) /* only input */
 
-#endif /* CONFIG_GPIO_GENERIC */
-
-#ifdef CONFIG_GPIOLIB_IRQCHIP
-
 int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 		     irq_hw_number_t hwirq);
 void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq);
@@ -555,15 +543,11 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip,
 }
 #endif /* CONFIG_LOCKDEP */
 
-#endif /* CONFIG_GPIOLIB_IRQCHIP */
-
 int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset);
 void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset);
 int gpiochip_generic_config(struct gpio_chip *chip, unsigned offset,
 			    unsigned long config);
 
-#ifdef CONFIG_PINCTRL
-
 /**
  * struct gpio_pin_range - pin range controlled by a gpio chip
  * @node: list for maintaining set of pin ranges, used internally
@@ -576,6 +560,8 @@ struct gpio_pin_range {
 	struct pinctrl_gpio_range range;
 };
 
+#ifdef CONFIG_PINCTRL
+
 int gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 			   unsigned int gpio_offset, unsigned int pin_offset,
 			   unsigned int npins);
@@ -586,8 +572,6 @@ void gpiochip_remove_pin_ranges(struct gpio_chip *chip);
 
 #else /* ! CONFIG_PINCTRL */
 
-struct pinctrl_dev;
-
 static inline int
 gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 		       unsigned int gpio_offset, unsigned int pin_offset,
@@ -619,6 +603,11 @@ void gpiochip_free_own_desc(struct gpio_desc *desc);
 void devprop_gpiochip_set_names(struct gpio_chip *chip,
 				const struct fwnode_handle *fwnode);
 
+
+#ifdef CONFIG_GPIOLIB
+
+struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
+
 #else /* CONFIG_GPIOLIB */
 
 static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
@@ -630,4 +619,4 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
 
 #endif /* CONFIG_GPIOLIB */
 
-#endif
+#endif /* __LINUX_GPIO_DRIVER_H */
-- 
cgit v1.2.3


From 10a08fd65ec1a68ccd86b19ec822ed5f2e50113f Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Jul 2019 11:55:59 +0200
Subject: ACPI: PM: Set up EC GPE for system wakeup from drivers that need it

The EC GPE needs to be set up for system wakeup only if there is a
driver depending on it, either intel-hid or intel-vbtn, bound to a
button device that is expected to wake up the system from sleep (such
as the power button on some Dell systems, like the XPS13 9360).  It
doesn't need to be set up for waking up the system from sleep in any
other cases and whether or not it is expected to wake up the system
from sleep doesn't depend on whether or not the LPS0 device is
present in the ACPI namespace.

For this reason, rearrange the ACPI suspend-to-idle code to make the
drivers depending on the EC GPE wakeup take care of setting it up and
decouple that from the LPS0 device handling.

While at it, make intel-hid and intel-vbtn prepare for system wakeup
only if they are allowed to wake up the system from sleep by user
space (via sysfs).

[Note that acpi_ec_mark_gpe_for_wake() and acpi_ec_set_gpe_wake_mask()
 are there to prevent the EC GPE from being disabled by the
 acpi_enable_all_wakeup_gpes() call in acpi_s2idle_prepare(), so on
 systems with either intel-hid or intel-vbtn this change doesn't
 affect any interactions with the hardware or platform firmware.]

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
---
 drivers/acpi/ec.c                 |  7 ++++++-
 drivers/acpi/internal.h           |  2 --
 drivers/acpi/sleep.c              | 13 ++-----------
 drivers/platform/x86/intel-hid.c  | 20 ++++++++++++++++----
 drivers/platform/x86/intel-vbtn.c | 20 ++++++++++++++++----
 include/linux/acpi.h              |  4 ++++
 include/linux/suspend.h           |  1 +
 7 files changed, 45 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 58c7ad402d8d..b996ca5f253f 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
+#include <linux/suspend.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <asm/io.h>
@@ -1048,17 +1049,21 @@ void acpi_ec_unblock_transactions(void)
 		acpi_ec_start(first_ec, true);
 }
 
+#ifdef CONFIG_PM_SLEEP
 void acpi_ec_mark_gpe_for_wake(void)
 {
 	if (first_ec && !ec_no_wakeup)
 		acpi_mark_gpe_for_wake(NULL, first_ec->gpe);
 }
+EXPORT_SYMBOL_GPL(acpi_ec_mark_gpe_for_wake);
 
 void acpi_ec_set_gpe_wake_mask(u8 action)
 {
-	if (first_ec && !ec_no_wakeup)
+	if (pm_suspend_no_platform() && first_ec && !ec_no_wakeup)
 		acpi_set_gpe_wake_mask(NULL, first_ec->gpe, action);
 }
+EXPORT_SYMBOL_GPL(acpi_ec_set_gpe_wake_mask);
+#endif
 
 bool acpi_ec_dispatch_gpe(void)
 {
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 1b5f9ac06ea8..bcc080511197 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -194,8 +194,6 @@ void acpi_ec_ecdt_probe(void);
 void acpi_ec_dsdt_probe(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
-void acpi_ec_mark_gpe_for_wake(void);
-void acpi_ec_set_gpe_wake_mask(u8 action);
 bool acpi_ec_dispatch_gpe(void);
 int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 			      acpi_handle handle, acpi_ec_query_func func,
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 970ae7c7a3f7..9cb0532f7471 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -930,8 +930,6 @@ static int lps0_device_attach(struct acpi_device *adev,
 
 		acpi_handle_debug(adev->handle, "_DSM function mask: 0x%x\n",
 				  bitmask);
-
-		acpi_ec_mark_gpe_for_wake();
 	} else {
 		acpi_handle_debug(adev->handle,
 				  "_DSM function 0 evaluation failed\n");
@@ -960,8 +958,6 @@ static int acpi_s2idle_prepare(void)
 	if (lps0_device_handle) {
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
-
-		acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE);
 	}
 
 	if (acpi_sci_irq_valid())
@@ -979,10 +975,7 @@ static int acpi_s2idle_prepare(void)
 
 static void acpi_s2idle_wake(void)
 {
-	if (!lps0_device_handle)
-		return;
-
-	if (pm_debug_messages_on)
+	if (lps0_device_handle && pm_debug_messages_on)
 		lpi_check_constraints();
 
 	/*
@@ -1031,8 +1024,6 @@ static void acpi_s2idle_restore(void)
 		disable_irq_wake(acpi_sci_irq);
 
 	if (lps0_device_handle) {
-		acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE);
-
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
 		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
 	}
@@ -1081,7 +1072,7 @@ bool acpi_s2idle_wakeup(void)
 
 bool acpi_sleep_no_ec_events(void)
 {
-	return !s2idle_in_progress || !lps0_device_handle;
+	return !s2idle_in_progress;
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c
index bc0d55a59015..e51c72b92cbd 100644
--- a/drivers/platform/x86/intel-hid.c
+++ b/drivers/platform/x86/intel-hid.c
@@ -253,9 +253,12 @@ static void intel_button_array_enable(struct device *device, bool enable)
 
 static int intel_hid_pm_prepare(struct device *device)
 {
-	struct intel_hid_priv *priv = dev_get_drvdata(device);
+	if (device_may_wakeup(device)) {
+		struct intel_hid_priv *priv = dev_get_drvdata(device);
 
-	priv->wakeup_mode = true;
+		priv->wakeup_mode = true;
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE);
+	}
 	return 0;
 }
 
@@ -270,9 +273,12 @@ static int intel_hid_pl_suspend_handler(struct device *device)
 
 static int intel_hid_pl_resume_handler(struct device *device)
 {
-	struct intel_hid_priv *priv = dev_get_drvdata(device);
+	if (device_may_wakeup(device)) {
+		struct intel_hid_priv *priv = dev_get_drvdata(device);
 
-	priv->wakeup_mode = false;
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE);
+		priv->wakeup_mode = false;
+	}
 	if (pm_resume_via_firmware()) {
 		intel_hid_set_enable(device, true);
 		intel_button_array_enable(device, true);
@@ -491,6 +497,12 @@ static int intel_hid_probe(struct platform_device *device)
 	}
 
 	device_init_wakeup(&device->dev, true);
+	/*
+	 * In order for system wakeup to work, the EC GPE has to be marked as
+	 * a wakeup one, so do that here (this setting will persist, but it has
+	 * no effect until the wakeup mask is set for the EC GPE).
+	 */
+	acpi_ec_mark_gpe_for_wake();
 	return 0;
 
 err_remove_notify:
diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index a0d0cecff55f..ab84e1bbdedd 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -176,6 +176,12 @@ static int intel_vbtn_probe(struct platform_device *device)
 		return -EBUSY;
 
 	device_init_wakeup(&device->dev, true);
+	/*
+	 * In order for system wakeup to work, the EC GPE has to be marked as
+	 * a wakeup one, so do that here (this setting will persist, but it has
+	 * no effect until the wakeup mask is set for the EC GPE).
+	 */
+	acpi_ec_mark_gpe_for_wake();
 	return 0;
 }
 
@@ -195,17 +201,23 @@ static int intel_vbtn_remove(struct platform_device *device)
 
 static int intel_vbtn_pm_prepare(struct device *dev)
 {
-	struct intel_vbtn_priv *priv = dev_get_drvdata(dev);
+	if (device_may_wakeup(dev)) {
+		struct intel_vbtn_priv *priv = dev_get_drvdata(dev);
 
-	priv->wakeup_mode = true;
+		priv->wakeup_mode = true;
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_ENABLE);
+	}
 	return 0;
 }
 
 static int intel_vbtn_pm_resume(struct device *dev)
 {
-	struct intel_vbtn_priv *priv = dev_get_drvdata(dev);
+	if (device_may_wakeup(dev)) {
+		struct intel_vbtn_priv *priv = dev_get_drvdata(dev);
 
-	priv->wakeup_mode = false;
+		acpi_ec_set_gpe_wake_mask(ACPI_GPE_DISABLE);
+		priv->wakeup_mode = false;
+	}
 	return 0;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9426b9aaed86..e65a4c5bbeae 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -931,6 +931,8 @@ int acpi_subsys_suspend_noirq(struct device *dev);
 int acpi_subsys_suspend(struct device *dev);
 int acpi_subsys_freeze(struct device *dev);
 int acpi_subsys_poweroff(struct device *dev);
+void acpi_ec_mark_gpe_for_wake(void);
+void acpi_ec_set_gpe_wake_mask(u8 action);
 #else
 static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
 static inline void acpi_subsys_complete(struct device *dev) {}
@@ -939,6 +941,8 @@ static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; }
 static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 static inline int acpi_subsys_poweroff(struct device *dev) { return 0; }
+static inline void acpi_ec_mark_gpe_for_wake(void) {}
+static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
 #endif
 
 #ifdef CONFIG_ACPI
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 66ce3871ed61..f0c4a8445140 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -335,6 +335,7 @@ static inline void pm_set_suspend_via_firmware(void) {}
 static inline void pm_set_resume_via_firmware(void) {}
 static inline bool pm_suspend_via_firmware(void) { return false; }
 static inline bool pm_resume_via_firmware(void) { return false; }
+static inline bool pm_suspend_no_platform(void) { return false; }
 static inline bool pm_suspend_default_s2idle(void) { return false; }
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
-- 
cgit v1.2.3


From 6cda08a20dbde45b021091230c8a359fa08c5103 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:32 +0100
Subject: drivers: Introduce device lookup variants by name

Add a helper to match the device name for device lookup. Also
reuse this generic exported helper for the existing bus_find_device_by_name().
and add similar variants for driver/class.

Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexander Aring <alex.aring@gmail.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Murphy <dmurphy@ti.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: linux-leds@vger.kernel.org
Cc: linux-rtc@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Cc: linux-wpan@vger.kernel.org
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Peter Oberparleiter <oberpar@linux.ibm.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Stefan Schmidt <stefan@datenfreihafen.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20190723221838.12024-2-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/bus.c               | 24 -----------------------
 drivers/base/core.c              |  6 ++++++
 drivers/hwtracing/stm/core.c     |  9 +--------
 drivers/leds/led-class.c         |  9 +--------
 drivers/rtc/interface.c          | 11 +----------
 drivers/s390/cio/ccwgroup.c      | 10 +---------
 drivers/s390/cio/device.c        | 15 +-------------
 drivers/s390/crypto/zcrypt_api.c | 11 +----------
 drivers/usb/roles/class.c        |  8 +-------
 drivers/usb/typec/class.c        |  8 +-------
 include/linux/device.h           | 42 +++++++++++++++++++++++++++++++++++++---
 net/ieee802154/core.c            |  7 +------
 12 files changed, 54 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index df3cac739813..a1d1e8256324 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -342,30 +342,6 @@ struct device *bus_find_device(struct bus_type *bus,
 }
 EXPORT_SYMBOL_GPL(bus_find_device);
 
-static int match_name(struct device *dev, const void *data)
-{
-	const char *name = data;
-
-	return sysfs_streq(name, dev_name(dev));
-}
-
-/**
- * bus_find_device_by_name - device iterator for locating a particular device of a specific name
- * @bus: bus type
- * @start: Device to begin with
- * @name: name of the device to match
- *
- * This is similar to the bus_find_device() function above, but it handles
- * searching by a name automatically, no need to write another strcmp matching
- * function.
- */
-struct device *bus_find_device_by_name(struct bus_type *bus,
-				       struct device *start, const char *name)
-{
-	return bus_find_device(bus, start, (void *)name, match_name);
-}
-EXPORT_SYMBOL_GPL(bus_find_device_by_name);
-
 /**
  * subsys_find_device_by_id - find a device with a specific enumeration number
  * @subsys: subsystem
diff --git a/drivers/base/core.c b/drivers/base/core.c
index da84a73f2ba6..fb83647d685a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3357,6 +3357,12 @@ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2)
 }
 EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
 
+int device_match_name(struct device *dev, const void *name)
+{
+	return sysfs_streq(dev_name(dev), name);
+}
+EXPORT_SYMBOL_GPL(device_match_name);
+
 int device_match_of_node(struct device *dev, const void *np)
 {
 	return dev->of_node == np;
diff --git a/drivers/hwtracing/stm/core.c b/drivers/hwtracing/stm/core.c
index e55b902560de..2b6bd42632e8 100644
--- a/drivers/hwtracing/stm/core.c
+++ b/drivers/hwtracing/stm/core.c
@@ -89,13 +89,6 @@ static struct class stm_class = {
 	.dev_groups	= stm_groups,
 };
 
-static int stm_dev_match(struct device *dev, const void *data)
-{
-	const char *name = data;
-
-	return sysfs_streq(name, dev_name(dev));
-}
-
 /**
  * stm_find_device() - find stm device by name
  * @buf:	character buffer containing the name
@@ -116,7 +109,7 @@ struct stm_device *stm_find_device(const char *buf)
 	if (!stm_core_up)
 		return NULL;
 
-	dev = class_find_device(&stm_class, NULL, buf, stm_dev_match);
+	dev = class_find_device_by_name(&stm_class, buf);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 4793e77808e2..d54c8e4d8954 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -213,13 +213,6 @@ static int led_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(leds_class_dev_pm_ops, led_suspend, led_resume);
 
-static int match_name(struct device *dev, const void *data)
-{
-	if (!dev_name(dev))
-		return 0;
-	return !strcmp(dev_name(dev), (char *)data);
-}
-
 static int led_classdev_next_name(const char *init_name, char *name,
 				  size_t len)
 {
@@ -230,7 +223,7 @@ static int led_classdev_next_name(const char *init_name, char *name,
 	strlcpy(name, init_name, len);
 
 	while ((ret < len) &&
-	       (dev = class_find_device(leds_class, NULL, name, match_name))) {
+	       (dev = class_find_device_by_name(leds_class, name))) {
 		put_device(dev);
 		ret = snprintf(name, len, "%s_%u", init_name, ++i);
 	}
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 72b7ddc43116..c93ef33b01d3 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -663,21 +663,12 @@ void rtc_update_irq(struct rtc_device *rtc,
 }
 EXPORT_SYMBOL_GPL(rtc_update_irq);
 
-static int __rtc_match(struct device *dev, const void *data)
-{
-	const char *name = data;
-
-	if (strcmp(dev_name(dev), name) == 0)
-		return 1;
-	return 0;
-}
-
 struct rtc_device *rtc_class_open(const char *name)
 {
 	struct device *dev;
 	struct rtc_device *rtc = NULL;
 
-	dev = class_find_device(rtc_class, NULL, name, __rtc_match);
+	dev = class_find_device_by_name(rtc_class, name);
 	if (dev)
 		rtc = to_rtc_device(dev);
 
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index c522e9313c50..d843e362c167 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -608,13 +608,6 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver)
 }
 EXPORT_SYMBOL(ccwgroup_driver_unregister);
 
-static int __ccwgroupdev_check_busid(struct device *dev, const void *id)
-{
-	const char *bus_id = id;
-
-	return (strcmp(bus_id, dev_name(dev)) == 0);
-}
-
 /**
  * get_ccwgroupdev_by_busid() - obtain device from a bus id
  * @gdrv: driver the device is owned by
@@ -631,8 +624,7 @@ struct ccwgroup_device *get_ccwgroupdev_by_busid(struct ccwgroup_driver *gdrv,
 {
 	struct device *dev;
 
-	dev = driver_find_device(&gdrv->driver, NULL, bus_id,
-				 __ccwgroupdev_check_busid);
+	dev = driver_find_device_by_name(&gdrv->driver, bus_id);
 
 	return dev ? to_ccwgroupdev(dev) : NULL;
 }
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index c421899be20f..131430bd48d9 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -1695,18 +1695,6 @@ int ccw_device_force_console(struct ccw_device *cdev)
 EXPORT_SYMBOL_GPL(ccw_device_force_console);
 #endif
 
-/*
- * get ccw_device matching the busid, but only if owned by cdrv
- */
-static int
-__ccwdev_check_busid(struct device *dev, const void *id)
-{
-	const char *bus_id = id;
-
-	return (strcmp(bus_id, dev_name(dev)) == 0);
-}
-
-
 /**
  * get_ccwdev_by_busid() - obtain device from a bus id
  * @cdrv: driver the device is owned by
@@ -1723,8 +1711,7 @@ struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv,
 {
 	struct device *dev;
 
-	dev = driver_find_device(&cdrv->driver, NULL, (void *)bus_id,
-				 __ccwdev_check_busid);
+	dev = driver_find_device_by_name(&cdrv->driver, bus_id);
 
 	return dev ? to_ccwdev(dev) : NULL;
 }
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 1058b4b5cc1e..38a5a47b8c9c 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -133,12 +133,6 @@ struct zcdn_device {
 static int zcdn_create(const char *name);
 static int zcdn_destroy(const char *name);
 
-/* helper function, matches the name for find_zcdndev_by_name() */
-static int __match_zcdn_name(struct device *dev, const void *data)
-{
-	return strcmp(dev_name(dev), (const char *)data) == 0;
-}
-
 /* helper function, matches the devt value for find_zcdndev_by_devt() */
 static int __match_zcdn_devt(struct device *dev, const void *data)
 {
@@ -152,10 +146,7 @@ static int __match_zcdn_devt(struct device *dev, const void *data)
  */
 static inline struct zcdn_device *find_zcdndev_by_name(const char *name)
 {
-	struct device *dev =
-		class_find_device(zcrypt_class, NULL,
-				  (void *) name,
-				  __match_zcdn_name);
+	struct device *dev = class_find_device_by_name(zcrypt_class, name);
 
 	return dev ? to_zcdn_dev(dev) : NULL;
 }
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index 86defca6623e..c8efe60e2465 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -90,11 +90,6 @@ static int switch_fwnode_match(struct device *dev, const void *fwnode)
 	return dev_fwnode(dev) == fwnode;
 }
 
-static int switch_name_match(struct device *dev, const void *name)
-{
-	return !strcmp((const char *)name, dev_name(dev));
-}
-
 static void *usb_role_switch_match(struct device_connection *con, int ep,
 				   void *data)
 {
@@ -107,8 +102,7 @@ static void *usb_role_switch_match(struct device_connection *con, int ep,
 		dev = class_find_device(role_class, NULL, con->fwnode,
 					switch_fwnode_match);
 	} else {
-		dev = class_find_device(role_class, NULL, con->endpoint[ep],
-					switch_name_match);
+		dev = class_find_device_by_name(role_class, con->endpoint[ep]);
 	}
 
 	return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER);
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index a18285a990a8..9b0d15b487e5 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -210,11 +210,6 @@ static int typec_port_fwnode_match(struct device *dev, const void *fwnode)
 	return dev_fwnode(dev) == fwnode;
 }
 
-static int typec_port_name_match(struct device *dev, const void *name)
-{
-	return !strcmp((const char *)name, dev_name(dev));
-}
-
 static void *typec_port_match(struct device_connection *con, int ep, void *data)
 {
 	struct device *dev;
@@ -227,8 +222,7 @@ static void *typec_port_match(struct device_connection *con, int ep, void *data)
 		return class_find_device(typec_class, NULL, con->fwnode,
 					 typec_port_fwnode_match);
 
-	dev = class_find_device(typec_class, NULL, con->endpoint[ep],
-				typec_port_name_match);
+	dev = class_find_device_by_name(typec_class, con->endpoint[ep]);
 
 	return dev ? dev : ERR_PTR(-EPROBE_DEFER);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index c330b75c6c57..3ba376b8b456 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -164,6 +164,7 @@ void subsys_dev_iter_init(struct subsys_dev_iter *iter,
 struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter);
 void subsys_dev_iter_exit(struct subsys_dev_iter *iter);
 
+int device_match_name(struct device *dev, const void *name);
 int device_match_of_node(struct device *dev, const void *np);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
@@ -171,9 +172,20 @@ int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 struct device *bus_find_device(struct bus_type *bus, struct device *start,
 			       const void *data,
 			       int (*match)(struct device *dev, const void *data));
-struct device *bus_find_device_by_name(struct bus_type *bus,
-				       struct device *start,
-				       const char *name);
+/**
+ * bus_find_device_by_name - device iterator for locating a particular device
+ * of a specific name.
+ * @bus: bus type
+ * @start: Device to begin with
+ * @name: name of the device to match
+ */
+static inline struct device *bus_find_device_by_name(struct bus_type *bus,
+						     struct device *start,
+						     const char *name)
+{
+	return bus_find_device(bus, start, name, device_match_name);
+}
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
@@ -342,6 +354,18 @@ struct device *driver_find_device(struct device_driver *drv,
 				  struct device *start, const void *data,
 				  int (*match)(struct device *dev, const void *data));
 
+/**
+ * driver_find_device_by_name - device iterator for locating a particular device
+ * of a specific name.
+ * @driver: the driver we're iterating
+ * @name: name of the device to match
+ */
+static inline struct device *driver_find_device_by_name(struct device_driver *drv,
+							const char *name)
+{
+	return driver_find_device(drv, NULL, name, device_match_name);
+}
+
 void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 int driver_deferred_probe_check_state_continue(struct device *dev);
@@ -471,6 +495,18 @@ extern struct device *class_find_device(struct class *class,
 					struct device *start, const void *data,
 					int (*match)(struct device *, const void *));
 
+/**
+ * class_find_device_by_name - device iterator for locating a particular device
+ * of a specific name.
+ * @class: class type
+ * @name: name of the device to match
+ */
+static inline struct device *class_find_device_by_name(struct class *class,
+						       const char *name)
+{
+	return class_find_device(class, NULL, name, device_match_name);
+}
+
 struct class_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct class *class, struct class_attribute *attr,
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 60b7ac56a1f5..de259b5170ab 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -23,11 +23,6 @@
 LIST_HEAD(cfg802154_rdev_list);
 int cfg802154_rdev_list_generation;
 
-static int wpan_phy_match(struct device *dev, const void *data)
-{
-	return !strcmp(dev_name(dev), (const char *)data);
-}
-
 struct wpan_phy *wpan_phy_find(const char *str)
 {
 	struct device *dev;
@@ -35,7 +30,7 @@ struct wpan_phy *wpan_phy_find(const char *str)
 	if (WARN_ON(!str))
 		return NULL;
 
-	dev = class_find_device(&wpan_phy_class, NULL, str, wpan_phy_match);
+	dev = class_find_device_by_name(&wpan_phy_class, str);
 	if (!dev)
 		return NULL;
 
-- 
cgit v1.2.3


From cfba5de9b99f8bbb8b4ea11b3049784e78b8759b Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:33 +0100
Subject: drivers: Introduce device lookup variants by of_node

Introduce wrappers for {bus/driver/class}_find_device() to
locate devices by its of_node.

Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: dri-devel@lists.freedesktop.org
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: devicetree@vger.kernel.org
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Heiko Stuebner <heiko@sntech.de>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: linux-i2c@vger.kernel.org
Cc: linux-rockchip@lists.infradead.org
Cc: linux-spi@vger.kernel.org
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Alan Tull <atull@kernel.org>
Cc: linux-fpga@vger.kernel.org
Cc: Peter Rosin <peda@axentia.se>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Thor Thayer <thor.thayer@linux.intel.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Peter Rosin <peda@axentia.se>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de> # I2C part
Acked-by: Moritz Fischer <mdf@kernel.org> # For FPGA part
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20190723221838.12024-3-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/amba/tegra-ahb.c              | 11 +----------
 drivers/fpga/fpga-bridge.c            |  8 +-------
 drivers/fpga/fpga-mgr.c               |  8 +-------
 drivers/gpu/drm/drm_mipi_dsi.c        |  7 +------
 drivers/i2c/i2c-core-of.c             |  7 +------
 drivers/mfd/altera-sysmgr.c           | 14 ++-----------
 drivers/mux/core.c                    |  7 +------
 drivers/net/phy/mdio_bus.c            |  9 +--------
 drivers/nvmem/core.c                  |  7 +------
 drivers/of/of_mdio.c                  |  8 +-------
 drivers/of/platform.c                 |  7 +------
 drivers/regulator/of_regulator.c      |  7 +------
 drivers/spi/spi.c                     | 20 ++++---------------
 include/linux/device.h                | 37 +++++++++++++++++++++++++++++++++++
 sound/soc/rockchip/rk3399_gru_sound.c |  9 ++-------
 15 files changed, 56 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/drivers/amba/tegra-ahb.c b/drivers/amba/tegra-ahb.c
index aa64eece77a6..57d3b2e2d007 100644
--- a/drivers/amba/tegra-ahb.c
+++ b/drivers/amba/tegra-ahb.c
@@ -134,22 +134,13 @@ static inline void gizmo_writel(struct tegra_ahb *ahb, u32 value, u32 offset)
 }
 
 #ifdef CONFIG_TEGRA_IOMMU_SMMU
-static int tegra_ahb_match_by_smmu(struct device *dev, const void *data)
-{
-	struct tegra_ahb *ahb = dev_get_drvdata(dev);
-	const struct device_node *dn = data;
-
-	return (ahb->dev->of_node == dn) ? 1 : 0;
-}
-
 int tegra_ahb_enable_smmu(struct device_node *dn)
 {
 	struct device *dev;
 	u32 val;
 	struct tegra_ahb *ahb;
 
-	dev = driver_find_device(&tegra_ahb_driver.driver, NULL, dn,
-				 tegra_ahb_match_by_smmu);
+	dev = driver_find_device_by_of_node(&tegra_ahb_driver.driver, dn);
 	if (!dev)
 		return -EPROBE_DEFER;
 	ahb = dev_get_drvdata(dev);
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index 80bd8f1b2aa6..4bab9028940a 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -19,11 +19,6 @@ static struct class *fpga_bridge_class;
 /* Lock for adding/removing bridges to linked lists*/
 static spinlock_t bridge_list_lock;
 
-static int fpga_bridge_of_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /**
  * fpga_bridge_enable - Enable transactions on the bridge
  *
@@ -104,8 +99,7 @@ struct fpga_bridge *of_fpga_bridge_get(struct device_node *np,
 {
 	struct device *dev;
 
-	dev = class_find_device(fpga_bridge_class, NULL, np,
-				fpga_bridge_of_node_match);
+	dev = class_find_device_by_of_node(fpga_bridge_class, np);
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index c3866816456a..e05104f5e40c 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -482,11 +482,6 @@ struct fpga_manager *fpga_mgr_get(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_get);
 
-static int fpga_mgr_of_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /**
  * of_fpga_mgr_get - Given a device node, get a reference to a fpga mgr.
  *
@@ -498,8 +493,7 @@ struct fpga_manager *of_fpga_mgr_get(struct device_node *node)
 {
 	struct device *dev;
 
-	dev = class_find_device(fpga_mgr_class, NULL, node,
-				fpga_mgr_of_node_match);
+	dev = class_find_device_by_of_node(fpga_mgr_class, node);
 	if (!dev)
 		return ERR_PTR(-ENODEV);
 
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index ad19df0686c9..bd2498bbd74a 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -93,11 +93,6 @@ static struct bus_type mipi_dsi_bus_type = {
 	.pm = &mipi_dsi_device_pm_ops,
 };
 
-static int of_device_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /**
  * of_find_mipi_dsi_device_by_node() - find the MIPI DSI device matching a
  *    device tree node
@@ -110,7 +105,7 @@ struct mipi_dsi_device *of_find_mipi_dsi_device_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&mipi_dsi_bus_type, NULL, np, of_device_match);
+	dev = bus_find_device_by_of_node(&mipi_dsi_bus_type, np);
 
 	return dev ? to_mipi_dsi_device(dev) : NULL;
 }
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index d1c48dec7118..6f632d543fcc 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -113,11 +113,6 @@ void of_i2c_register_devices(struct i2c_adapter *adap)
 	of_node_put(bus);
 }
 
-static int of_dev_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 static int of_dev_or_parent_node_match(struct device *dev, const void *data)
 {
 	if (dev->of_node == data)
@@ -135,7 +130,7 @@ struct i2c_client *of_find_i2c_device_by_node(struct device_node *node)
 	struct device *dev;
 	struct i2c_client *client;
 
-	dev = bus_find_device(&i2c_bus_type, NULL, node, of_dev_node_match);
+	dev = bus_find_device_by_of_node(&i2c_bus_type, node);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c
index 2ee14d8a6d31..d2a13a547a3c 100644
--- a/drivers/mfd/altera-sysmgr.c
+++ b/drivers/mfd/altera-sysmgr.c
@@ -87,16 +87,6 @@ static struct regmap_config altr_sysmgr_regmap_cfg = {
 	.use_single_write = true,
 };
 
-/**
- * sysmgr_match_phandle
- * Matching function used by driver_find_device().
- * Return: True if match is found, otherwise false.
- */
-static int sysmgr_match_phandle(struct device *dev, const void *data)
-{
-	return dev->of_node == (const struct device_node *)data;
-}
-
 /**
  * altr_sysmgr_regmap_lookup_by_phandle
  * Find the sysmgr previous configured in probe() and return regmap property.
@@ -117,8 +107,8 @@ struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np,
 	if (!sysmgr_np)
 		return ERR_PTR(-ENODEV);
 
-	dev = driver_find_device(&altr_sysmgr_driver.driver, NULL,
-				 (void *)sysmgr_np, sysmgr_match_phandle);
+	dev = driver_find_device_by_of_node(&altr_sysmgr_driver.driver,
+					    (void *)sysmgr_np);
 	of_node_put(sysmgr_np);
 	if (!dev)
 		return ERR_PTR(-EPROBE_DEFER);
diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index d1271c1ee23c..1fb22388e7e0 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -405,17 +405,12 @@ int mux_control_deselect(struct mux_control *mux)
 }
 EXPORT_SYMBOL_GPL(mux_control_deselect);
 
-static int of_dev_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /* Note this function returns a reference to the mux_chip dev. */
 static struct mux_chip *of_find_mux_chip_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = class_find_device(&mux_class, NULL, np, of_dev_node_match);
+	dev = class_find_device_by_of_node(&mux_class, np);
 
 	return dev ? to_mux_chip(dev) : NULL;
 }
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index bd04fe762056..ce940871331e 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -262,11 +262,6 @@ static struct class mdio_bus_class = {
 };
 
 #if IS_ENABLED(CONFIG_OF_MDIO)
-/* Helper function for of_mdio_find_bus */
-static int of_mdio_bus_match(struct device *dev, const void *mdio_bus_np)
-{
-	return dev->of_node == mdio_bus_np;
-}
 /**
  * of_mdio_find_bus - Given an mii_bus node, find the mii_bus.
  * @mdio_bus_np: Pointer to the mii_bus.
@@ -287,9 +282,7 @@ struct mii_bus *of_mdio_find_bus(struct device_node *mdio_bus_np)
 	if (!mdio_bus_np)
 		return NULL;
 
-	d = class_find_device(&mdio_bus_class, NULL,  mdio_bus_np,
-			      of_mdio_bus_match);
-
+	d = class_find_device_by_of_node(&mdio_bus_class, mdio_bus_np);
 	return d ? to_mii_bus(d) : NULL;
 }
 EXPORT_SYMBOL(of_mdio_find_bus);
diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ac5d945be88a..057d1ff87d5d 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -76,11 +76,6 @@ static struct bus_type nvmem_bus_type = {
 	.name		= "nvmem",
 };
 
-static int of_nvmem_match(struct device *dev, const void *nvmem_np)
-{
-	return dev->of_node == nvmem_np;
-}
-
 static struct nvmem_device *of_nvmem_find(struct device_node *nvmem_np)
 {
 	struct device *d;
@@ -88,7 +83,7 @@ static struct nvmem_device *of_nvmem_find(struct device_node *nvmem_np)
 	if (!nvmem_np)
 		return NULL;
 
-	d = bus_find_device(&nvmem_bus_type, NULL, nvmem_np, of_nvmem_match);
+	d = bus_find_device_by_of_node(&nvmem_bus_type, nvmem_np);
 
 	if (!d)
 		return NULL;
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 44f53496cab1..000b95787df1 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -280,12 +280,6 @@ unregister:
 }
 EXPORT_SYMBOL(of_mdiobus_register);
 
-/* Helper function for of_phy_find_device */
-static int of_phy_match(struct device *dev, const void *phy_np)
-{
-	return dev->of_node == phy_np;
-}
-
 /**
  * of_phy_find_device - Give a PHY node, find the phy_device
  * @phy_np: Pointer to the phy's device tree node
@@ -301,7 +295,7 @@ struct phy_device *of_phy_find_device(struct device_node *phy_np)
 	if (!phy_np)
 		return NULL;
 
-	d = bus_find_device(&mdio_bus_type, NULL, phy_np, of_phy_match);
+	d = bus_find_device_by_of_node(&mdio_bus_type, phy_np);
 	if (d) {
 		mdiodev = to_mdio_device(d);
 		if (mdiodev->flags & MDIO_DEVICE_FLAG_PHY)
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 7801e25e6895..b47a2292fe8e 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -37,11 +37,6 @@ static const struct of_device_id of_skipped_node_table[] = {
 	{} /* Empty terminated list */
 };
 
-static int of_dev_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /**
  * of_find_device_by_node - Find the platform_device associated with a node
  * @np: Pointer to device tree node
@@ -55,7 +50,7 @@ struct platform_device *of_find_device_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&platform_bus_type, NULL, np, of_dev_node_match);
+	dev = bus_find_device_by_of_node(&platform_bus_type, np);
 	return dev ? to_platform_device(dev) : NULL;
 }
 EXPORT_SYMBOL(of_find_device_by_node);
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 397918ebba55..20dcc9c03adc 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -460,16 +460,11 @@ error:
 	return NULL;
 }
 
-static int of_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 struct regulator_dev *of_find_regulator_by_node(struct device_node *np)
 {
 	struct device *dev;
 
-	dev = class_find_device(&regulator_class, NULL, np, of_node_match);
+	dev = class_find_device_by_of_node(&regulator_class, np);
 
 	return dev ? dev_to_rdev(dev) : NULL;
 }
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 75ac046cae52..a591da87981a 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3652,37 +3652,25 @@ EXPORT_SYMBOL_GPL(spi_write_then_read);
 /*-------------------------------------------------------------------------*/
 
 #if IS_ENABLED(CONFIG_OF)
-static int __spi_of_device_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /* must call put_device() when done with returned spi_device device */
 struct spi_device *of_find_spi_device_by_node(struct device_node *node)
 {
-	struct device *dev = bus_find_device(&spi_bus_type, NULL, node,
-						__spi_of_device_match);
+	struct device *dev = bus_find_device_by_of_node(&spi_bus_type, node);
+
 	return dev ? to_spi_device(dev) : NULL;
 }
 EXPORT_SYMBOL_GPL(of_find_spi_device_by_node);
 #endif /* IS_ENABLED(CONFIG_OF) */
 
 #if IS_ENABLED(CONFIG_OF_DYNAMIC)
-static int __spi_of_controller_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 /* the spi controllers are not using spi_bus, so we find it with another way */
 static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node)
 {
 	struct device *dev;
 
-	dev = class_find_device(&spi_master_class, NULL, node,
-				__spi_of_controller_match);
+	dev = class_find_device_by_of_node(&spi_master_class, node);
 	if (!dev && IS_ENABLED(CONFIG_SPI_SLAVE))
-		dev = class_find_device(&spi_slave_class, NULL, node,
-					__spi_of_controller_match);
+		dev = class_find_device_by_of_node(&spi_slave_class, node);
 	if (!dev)
 		return NULL;
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 3ba376b8b456..29d8d7ad41e6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -186,6 +186,18 @@ static inline struct device *bus_find_device_by_name(struct bus_type *bus,
 	return bus_find_device(bus, start, name, device_match_name);
 }
 
+/**
+ * bus_find_device_by_of_node : device iterator for locating a particular device
+ * matching the of_node.
+ * @bus: bus type
+ * @np: of_node of the device to match.
+ */
+static inline struct device *
+bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np)
+{
+	return bus_find_device(bus, NULL, np, device_match_of_node);
+}
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
@@ -366,6 +378,19 @@ static inline struct device *driver_find_device_by_name(struct device_driver *dr
 	return driver_find_device(drv, NULL, name, device_match_name);
 }
 
+/**
+ * driver_find_device_by_of_node- device iterator for locating a particular device
+ * by of_node pointer.
+ * @driver: the driver we're iterating
+ * @np: of_node pointer to match.
+ */
+static inline struct device *
+driver_find_device_by_of_node(struct device_driver *drv,
+			      const struct device_node *np)
+{
+	return driver_find_device(drv, NULL, np, device_match_of_node);
+}
+
 void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 int driver_deferred_probe_check_state_continue(struct device *dev);
@@ -507,6 +532,18 @@ static inline struct device *class_find_device_by_name(struct class *class,
 	return class_find_device(class, NULL, name, device_match_name);
 }
 
+/**
+ * class_find_device_by_of_node : device iterator for locating a particular device
+ * matching the of_node.
+ * @class: class type
+ * @np: of_node of the device to match.
+ */
+static inline struct device *
+class_find_device_by_of_node(struct class *class, const struct device_node *np)
+{
+	return class_find_device(class, NULL, np, device_match_of_node);
+}
+
 struct class_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct class *class, struct class_attribute *attr,
diff --git a/sound/soc/rockchip/rk3399_gru_sound.c b/sound/soc/rockchip/rk3399_gru_sound.c
index c16b0ffe8cfc..d951100bf770 100644
--- a/sound/soc/rockchip/rk3399_gru_sound.c
+++ b/sound/soc/rockchip/rk3399_gru_sound.c
@@ -422,11 +422,6 @@ static const struct dailink_match_data dailink_match[] = {
 	},
 };
 
-static int of_dev_node_match(struct device *dev, const void *data)
-{
-	return dev->of_node == data;
-}
-
 static int rockchip_sound_codec_node_match(struct device_node *np_codec)
 {
 	struct device *dev;
@@ -438,8 +433,8 @@ static int rockchip_sound_codec_node_match(struct device_node *np_codec)
 			continue;
 
 		if (dailink_match[i].bus_type) {
-			dev = bus_find_device(dailink_match[i].bus_type, NULL,
-					      np_codec, of_dev_node_match);
+			dev = bus_find_device_by_of_node(dailink_match[i].bus_type,
+							 np_codec);
 			if (!dev)
 				continue;
 			put_device(dev);
-- 
cgit v1.2.3


From 67843bbaf36eb087714f40e783ee78e99e9e4b86 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:34 +0100
Subject: drivers: Introduce device lookup variants by fwnode

Add a helper to match the firmware node handle of a device and provide
wrappers for {bus/class/driver}_find_device() APIs to avoid proliferation
of duplicate custom match functions.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: linux-usb@vger.kernel.org
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Joe Perches <joe@perches.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20190723221838.12024-4-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c                                |  6 ++++
 drivers/base/devcon.c                              |  8 +----
 drivers/hwtracing/coresight/coresight-platform.c   | 11 ++----
 drivers/hwtracing/coresight/coresight-priv.h       |  2 --
 drivers/hwtracing/coresight/coresight.c            |  4 +--
 drivers/infiniband/hw/hns/hns_roce_hw_v1.c         |  8 +----
 drivers/iommu/arm-smmu-v3.c                        |  9 ++---
 drivers/iommu/arm-smmu.c                           |  9 ++---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c |  8 +----
 drivers/usb/roles/class.c                          |  8 +----
 drivers/usb/typec/class.c                          |  8 +----
 include/linux/device.h                             | 39 ++++++++++++++++++++++
 12 files changed, 57 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index fb83647d685a..e8f81a667545 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3368,3 +3368,9 @@ int device_match_of_node(struct device *dev, const void *np)
 	return dev->of_node == np;
 }
 EXPORT_SYMBOL_GPL(device_match_of_node);
+
+int device_match_fwnode(struct device *dev, const void *fwnode)
+{
+	return dev_fwnode(dev) == fwnode;
+}
+EXPORT_SYMBOL_GPL(device_match_fwnode);
diff --git a/drivers/base/devcon.c b/drivers/base/devcon.c
index 09f28479b243..1d488dc5dd0c 100644
--- a/drivers/base/devcon.c
+++ b/drivers/base/devcon.c
@@ -133,19 +133,13 @@ static struct bus_type *generic_match_buses[] = {
 	NULL,
 };
 
-static int device_fwnode_match(struct device *dev, const void *fwnode)
-{
-	return dev_fwnode(dev) == fwnode;
-}
-
 static void *device_connection_fwnode_match(struct device_connection *con)
 {
 	struct bus_type *bus;
 	struct device *dev;
 
 	for (bus = generic_match_buses[0]; bus; bus++) {
-		dev = bus_find_device(bus, NULL, (void *)con->fwnode,
-				      device_fwnode_match);
+		dev = bus_find_device_by_fwnode(bus, con->fwnode);
 		if (dev && !strncmp(dev_name(dev), con->id, strlen(con->id)))
 			return dev;
 
diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c
index dad7d96c5943..3c5bee429105 100644
--- a/drivers/hwtracing/coresight/coresight-platform.c
+++ b/drivers/hwtracing/coresight/coresight-platform.c
@@ -37,11 +37,6 @@ static int coresight_alloc_conns(struct device *dev,
 	return 0;
 }
 
-int coresight_device_fwnode_match(struct device *dev, const void *fwnode)
-{
-	return dev_fwnode(dev) == fwnode;
-}
-
 static struct device *
 coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
 {
@@ -51,8 +46,7 @@ coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
 	 * If we have a non-configurable replicator, it will be found on the
 	 * platform bus.
 	 */
-	dev = bus_find_device(&platform_bus_type, NULL,
-			      fwnode, coresight_device_fwnode_match);
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
 	if (dev)
 		return dev;
 
@@ -60,8 +54,7 @@ coresight_find_device_by_fwnode(struct fwnode_handle *fwnode)
 	 * We have a configurable component - circle through the AMBA bus
 	 * looking for the device that matches the endpoint node.
 	 */
-	return bus_find_device(&amba_bustype, NULL,
-			       fwnode, coresight_device_fwnode_match);
+	return bus_find_device_by_fwnode(&amba_bustype, fwnode);
 }
 
 #ifdef CONFIG_OF
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 7d401790dd7e..61d7f9ff054d 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -202,6 +202,4 @@ static inline void *coresight_get_uci_data(const struct amba_id *id)
 
 void coresight_release_platform_data(struct coresight_platform_data *pdata);
 
-int coresight_device_fwnode_match(struct device *dev, const void *fwnode);
-
 #endif
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 55db77f6410b..6453c67a4d01 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -1046,9 +1046,7 @@ static void coresight_fixup_device_conns(struct coresight_device *csdev)
 		struct coresight_connection *conn = &csdev->pdata->conns[i];
 		struct device *dev = NULL;
 
-		dev = bus_find_device(&coresight_bustype, NULL,
-				      (void *)conn->child_fwnode,
-				      coresight_device_fwnode_match);
+		dev = bus_find_device_by_fwnode(&coresight_bustype, conn->child_fwnode);
 		if (dev) {
 			conn->child_dev = to_coresight_device(dev);
 			/* and put reference from 'bus_find_device()' */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 81e6dedb1e02..fa05e943038a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -4499,19 +4499,13 @@ static const struct acpi_device_id hns_roce_acpi_match[] = {
 };
 MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match);
 
-static int hns_roce_node_match(struct device *dev, const void *fwnode)
-{
-	return dev->fwnode == fwnode;
-}
-
 static struct
 platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode)
 {
 	struct device *dev;
 
 	/* get the 'device' corresponding to the matching 'fwnode' */
-	dev = bus_find_device(&platform_bus_type, NULL,
-			      fwnode, hns_roce_node_match);
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
 	/* get the platform device */
 	return dev ? to_platform_device(dev) : NULL;
 }
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index a9a9fabd3968..6f0e13fa5e1a 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -2034,16 +2034,11 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 
 static struct platform_driver arm_smmu_driver;
 
-static int arm_smmu_match_node(struct device *dev, const void *data)
-{
-	return dev->fwnode == data;
-}
-
 static
 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
 {
-	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
-						fwnode, arm_smmu_match_node);
+	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
+							  fwnode);
 	put_device(dev);
 	return dev ? dev_get_drvdata(dev) : NULL;
 }
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 64977c131ee6..aa06498f291d 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1426,16 +1426,11 @@ static bool arm_smmu_capable(enum iommu_cap cap)
 	}
 }
 
-static int arm_smmu_match_node(struct device *dev, const void *data)
-{
-	return dev->fwnode == data;
-}
-
 static
 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
 {
-	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
-						fwnode, arm_smmu_match_node);
+	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
+							  fwnode);
 	put_device(dev);
 	return dev ? dev_get_drvdata(dev) : NULL;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index bb6586d0e5af..ed3829ae4ef1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -754,17 +754,11 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 	return (void *)misc_op;
 }
 
-static int hns_dsaf_dev_match(struct device *dev, const void *fwnode)
-{
-	return dev->fwnode == fwnode;
-}
-
 struct
 platform_device *hns_dsaf_find_platform_device(struct fwnode_handle *fwnode)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&platform_bus_type, NULL,
-			      fwnode, hns_dsaf_dev_match);
+	dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
 	return dev ? to_platform_device(dev) : NULL;
 }
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index c8efe60e2465..0526efbc4922 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -85,11 +85,6 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_get_role);
 
-static int switch_fwnode_match(struct device *dev, const void *fwnode)
-{
-	return dev_fwnode(dev) == fwnode;
-}
-
 static void *usb_role_switch_match(struct device_connection *con, int ep,
 				   void *data)
 {
@@ -99,8 +94,7 @@ static void *usb_role_switch_match(struct device_connection *con, int ep,
 		if (con->id && !fwnode_property_present(con->fwnode, con->id))
 			return NULL;
 
-		dev = class_find_device(role_class, NULL, con->fwnode,
-					switch_fwnode_match);
+		dev = class_find_device_by_fwnode(role_class, con->fwnode);
 	} else {
 		dev = class_find_device_by_name(role_class, con->endpoint[ep]);
 	}
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 9b0d15b487e5..94a3eda62add 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -205,11 +205,6 @@ static void typec_altmode_put_partner(struct altmode *altmode)
 	put_device(&adev->dev);
 }
 
-static int typec_port_fwnode_match(struct device *dev, const void *fwnode)
-{
-	return dev_fwnode(dev) == fwnode;
-}
-
 static void *typec_port_match(struct device_connection *con, int ep, void *data)
 {
 	struct device *dev;
@@ -219,8 +214,7 @@ static void *typec_port_match(struct device_connection *con, int ep, void *data)
 	 * we need to return ERR_PTR(-PROBE_DEFER) when there is no device.
 	 */
 	if (con->fwnode)
-		return class_find_device(typec_class, NULL, con->fwnode,
-					 typec_port_fwnode_match);
+		return class_find_device_by_fwnode(typec_class, con->fwnode);
 
 	dev = class_find_device_by_name(typec_class, con->endpoint[ep]);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 29d8d7ad41e6..7133fc1c285d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -166,6 +166,7 @@ void subsys_dev_iter_exit(struct subsys_dev_iter *iter);
 
 int device_match_name(struct device *dev, const void *name);
 int device_match_of_node(struct device *dev, const void *np);
+int device_match_fwnode(struct device *dev, const void *fwnode);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
@@ -198,6 +199,18 @@ bus_find_device_by_of_node(struct bus_type *bus, const struct device_node *np)
 	return bus_find_device(bus, NULL, np, device_match_of_node);
 }
 
+/**
+ * bus_find_device_by_fwnode : device iterator for locating a particular device
+ * matching the fwnode.
+ * @bus: bus type
+ * @fwnode: fwnode of the device to match.
+ */
+static inline struct device *
+bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwnode)
+{
+	return bus_find_device(bus, NULL, fwnode, device_match_fwnode);
+}
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
@@ -391,6 +404,19 @@ driver_find_device_by_of_node(struct device_driver *drv,
 	return driver_find_device(drv, NULL, np, device_match_of_node);
 }
 
+/**
+ * driver_find_device_by_fwnode- device iterator for locating a particular device
+ * by fwnode pointer.
+ * @driver: the driver we're iterating
+ * @fwnode: fwnode pointer to match.
+ */
+static inline struct device *
+driver_find_device_by_fwnode(struct device_driver *drv,
+			     const struct fwnode_handle *fwnode)
+{
+	return driver_find_device(drv, NULL, fwnode, device_match_fwnode);
+}
+
 void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 int driver_deferred_probe_check_state_continue(struct device *dev);
@@ -544,6 +570,19 @@ class_find_device_by_of_node(struct class *class, const struct device_node *np)
 	return class_find_device(class, NULL, np, device_match_of_node);
 }
 
+/**
+ * class_find_device_by_fwnode : device iterator for locating a particular device
+ * matching the fwnode.
+ * @class: class type
+ * @fwnode: fwnode of the device to match.
+ */
+static inline struct device *
+class_find_device_by_fwnode(struct class *class,
+			    const struct fwnode_handle *fwnode)
+{
+	return class_find_device(class, NULL, fwnode, device_match_fwnode);
+}
+
 struct class_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct class *class, struct class_attribute *attr,
-- 
cgit v1.2.3


From 4495dfdd6193d9712b7b8f5d699d89d5996e6aaa Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:35 +0100
Subject: drivers: Introduce device lookup variants by device type

Add a helper to match a device by its type and provide wrappers
for {bus/class/driver}_find_device() APIs.

Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-usb@vger.kernel.org
Cc: Oliver Neukum <oneukum@suse.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tomas Winkler <tomas.winkler@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20190723221838.12024-5-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c               | 15 +++++++--------
 drivers/hwtracing/intel_th/core.c | 10 +---------
 drivers/misc/mei/main.c           |  9 +--------
 drivers/s390/crypto/zcrypt_api.c  | 11 +----------
 drivers/tty/tty_io.c              |  8 +-------
 drivers/usb/core/devio.c          |  8 +-------
 include/linux/device.h            | 37 +++++++++++++++++++++++++++++++++++++
 7 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index e8f81a667545..3abc32b60c0a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2867,13 +2867,6 @@ struct device *device_create_with_groups(struct class *class,
 }
 EXPORT_SYMBOL_GPL(device_create_with_groups);
 
-static int __match_devt(struct device *dev, const void *data)
-{
-	const dev_t *devt = data;
-
-	return dev->devt == *devt;
-}
-
 /**
  * device_destroy - removes a device that was created with device_create()
  * @class: pointer to the struct class that this device was registered with
@@ -2886,7 +2879,7 @@ void device_destroy(struct class *class, dev_t devt)
 {
 	struct device *dev;
 
-	dev = class_find_device(class, NULL, &devt, __match_devt);
+	dev = class_find_device_by_devt(class, devt);
 	if (dev) {
 		put_device(dev);
 		device_unregister(dev);
@@ -3374,3 +3367,9 @@ int device_match_fwnode(struct device *dev, const void *fwnode)
 	return dev_fwnode(dev) == fwnode;
 }
 EXPORT_SYMBOL_GPL(device_match_fwnode);
+
+int device_match_devt(struct device *dev, const void *pdevt)
+{
+	return dev->devt == *(dev_t *)pdevt;
+}
+EXPORT_SYMBOL_GPL(device_match_devt);
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 55922896d862..d5c1821b31c6 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -789,12 +789,6 @@ static int intel_th_populate(struct intel_th *th)
 	return 0;
 }
 
-static int match_devt(struct device *dev, const void *data)
-{
-	dev_t devt = (dev_t)(unsigned long)(void *)data;
-	return dev->devt == devt;
-}
-
 static int intel_th_output_open(struct inode *inode, struct file *file)
 {
 	const struct file_operations *fops;
@@ -802,9 +796,7 @@ static int intel_th_output_open(struct inode *inode, struct file *file)
 	struct device *dev;
 	int err;
 
-	dev = bus_find_device(&intel_th_bus, NULL,
-			      (void *)(unsigned long)inode->i_rdev,
-			      match_devt);
+	dev = bus_find_device_by_devt(&intel_th_bus, inode->i_rdev);
 	if (!dev || !dev->driver)
 		return -ENODEV;
 
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index f894d1f8a53e..7310b476323c 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -858,13 +858,6 @@ static ssize_t dev_state_show(struct device *device,
 }
 static DEVICE_ATTR_RO(dev_state);
 
-static int match_devt(struct device *dev, const void *data)
-{
-	const dev_t *devt = data;
-
-	return dev->devt == *devt;
-}
-
 /**
  * dev_set_devstate: set to new device state and notify sysfs file.
  *
@@ -880,7 +873,7 @@ void mei_set_devstate(struct mei_device *dev, enum mei_dev_state state)
 
 	dev->dev_state = state;
 
-	clsdev = class_find_device(mei_class, NULL, &dev->cdev.dev, match_devt);
+	clsdev = class_find_device_by_devt(mei_class, dev->cdev.dev);
 	if (clsdev) {
 		sysfs_notify(&clsdev->kobj, NULL, "dev_state");
 		put_device(clsdev);
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 38a5a47b8c9c..150f6236c9bb 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -133,12 +133,6 @@ struct zcdn_device {
 static int zcdn_create(const char *name);
 static int zcdn_destroy(const char *name);
 
-/* helper function, matches the devt value for find_zcdndev_by_devt() */
-static int __match_zcdn_devt(struct device *dev, const void *data)
-{
-	return dev->devt == *((dev_t *) data);
-}
-
 /*
  * Find zcdn device by name.
  * Returns reference to the zcdn device which needs to be released
@@ -158,10 +152,7 @@ static inline struct zcdn_device *find_zcdndev_by_name(const char *name)
  */
 static inline struct zcdn_device *find_zcdndev_by_devt(dev_t devt)
 {
-	struct device *dev =
-		class_find_device(zcrypt_class, NULL,
-				  (void *) &devt,
-				  __match_zcdn_devt);
+	struct device *dev = class_find_device_by_devt(zcrypt_class, devt);
 
 	return dev ? to_zcdn_dev(dev) : NULL;
 }
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 566728fbaf3c..802c1210558f 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2952,17 +2952,11 @@ void do_SAK(struct tty_struct *tty)
 
 EXPORT_SYMBOL(do_SAK);
 
-static int dev_match_devt(struct device *dev, const void *data)
-{
-	const dev_t *devt = data;
-	return dev->devt == *devt;
-}
-
 /* Must put_device() after it's unused! */
 static struct device *tty_get_device(struct tty_struct *tty)
 {
 	dev_t devt = tty_devnum(tty);
-	return class_find_device(tty_class, NULL, &devt, dev_match_devt);
+	return class_find_device_by_devt(tty_class, devt);
 }
 
 
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index b265ab5405f9..60268aee93a8 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -942,17 +942,11 @@ error:
 	return ret;
 }
 
-static int match_devt(struct device *dev, const void *data)
-{
-	return dev->devt == (dev_t)(unsigned long)(void *)data;
-}
-
 static struct usb_device *usbdev_lookup_by_devt(dev_t devt)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&usb_bus_type, NULL,
-			      (void *) (unsigned long) devt, match_devt);
+	dev = bus_find_device_by_devt(&usb_bus_type, devt);
 	if (!dev)
 		return NULL;
 	return to_usb_device(dev);
diff --git a/include/linux/device.h b/include/linux/device.h
index 7133fc1c285d..93b2f55ef44e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -167,6 +167,7 @@ void subsys_dev_iter_exit(struct subsys_dev_iter *iter);
 int device_match_name(struct device *dev, const void *name);
 int device_match_of_node(struct device *dev, const void *np);
 int device_match_fwnode(struct device *dev, const void *fwnode);
+int device_match_devt(struct device *dev, const void *pdevt);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
@@ -211,6 +212,18 @@ bus_find_device_by_fwnode(struct bus_type *bus, const struct fwnode_handle *fwno
 	return bus_find_device(bus, NULL, fwnode, device_match_fwnode);
 }
 
+/**
+ * bus_find_device_by_devt : device iterator for locating a particular device
+ * matching the device type.
+ * @bus: bus type
+ * @devt: device type of the device to match.
+ */
+static inline struct device *bus_find_device_by_devt(struct bus_type *bus,
+						     dev_t devt)
+{
+	return bus_find_device(bus, NULL, &devt, device_match_devt);
+}
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
@@ -417,6 +430,18 @@ driver_find_device_by_fwnode(struct device_driver *drv,
 	return driver_find_device(drv, NULL, fwnode, device_match_fwnode);
 }
 
+/**
+ * driver_find_device_by_devt- device iterator for locating a particular device
+ * by devt.
+ * @driver: the driver we're iterating
+ * @devt: devt pointer to match.
+ */
+static inline struct device *driver_find_device_by_devt(struct device_driver *drv,
+							dev_t devt)
+{
+	return driver_find_device(drv, NULL, &devt, device_match_devt);
+}
+
 void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 int driver_deferred_probe_check_state_continue(struct device *dev);
@@ -583,6 +608,18 @@ class_find_device_by_fwnode(struct class *class,
 	return class_find_device(class, NULL, fwnode, device_match_fwnode);
 }
 
+/**
+ * class_find_device_by_devt : device iterator for locating a particular device
+ * matching the device type.
+ * @class: class type
+ * @devt: device type of the device to match.
+ */
+static inline struct device *class_find_device_by_devt(struct class *class,
+						       dev_t devt)
+{
+	return class_find_device(class, NULL, &devt, device_match_devt);
+}
+
 struct class_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct class *class, struct class_attribute *attr,
-- 
cgit v1.2.3


From 00500147cbd3fc51353d0d003eaa9d31c72c0d50 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:36 +0100
Subject: drivers: Introduce device lookup variants by ACPI_COMPANION device

Add a generic helper to match a device by the ACPI_COMPANION device
and provide wrappers for the device lookup APIs.

Cc: Len Brown <lenb@kernel.org>
Cc: linux-acpi@vger.kernel.org
Cc: linux-spi@vger.kernel.org
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: linux-i2c@vger.kernel.org
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de> # I2C parts
Link: https://lore.kernel.org/r/20190723221838.12024-6-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c         |  6 +++++
 drivers/i2c/i2c-core-acpi.c | 11 ++------
 drivers/spi/spi.c           |  8 +-----
 include/linux/device.h      | 65 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 74 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3abc32b60c0a..57d71bc2c559 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3373,3 +3373,9 @@ int device_match_devt(struct device *dev, const void *pdevt)
 	return dev->devt == *(dev_t *)pdevt;
 }
 EXPORT_SYMBOL_GPL(device_match_devt);
+
+int device_match_acpi_dev(struct device *dev, const void *adev)
+{
+	return ACPI_COMPANION(dev) == adev;
+}
+EXPORT_SYMBOL(device_match_acpi_dev);
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 4dbbc9a35f65..bc80aafb521f 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -354,17 +354,11 @@ static int i2c_acpi_find_match_adapter(struct device *dev, const void *data)
 	return ACPI_HANDLE(dev) == (acpi_handle)data;
 }
 
-static int i2c_acpi_find_match_device(struct device *dev, const void *data)
-{
-	return ACPI_COMPANION(dev) == data;
-}
 
 struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
 {
-	struct device *dev;
+	struct device *dev = bus_find_device_by_acpi_dev(&i2c_bus_type, handle);
 
-	dev = bus_find_device(&i2c_bus_type, NULL, handle,
-			      i2c_acpi_find_match_adapter);
 	return dev ? i2c_verify_adapter(dev) : NULL;
 }
 EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);
@@ -373,8 +367,7 @@ static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&i2c_bus_type, NULL, adev,
-			      i2c_acpi_find_match_device);
+	dev = bus_find_device_by_acpi_dev(&i2c_bus_type, adev);
 	return dev ? i2c_verify_client(dev) : NULL;
 }
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a591da87981a..c486a6f84c2c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3741,11 +3741,6 @@ static int spi_acpi_controller_match(struct device *dev, const void *data)
 	return ACPI_COMPANION(dev->parent) == data;
 }
 
-static int spi_acpi_device_match(struct device *dev, const void *data)
-{
-	return ACPI_COMPANION(dev) == data;
-}
-
 static struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
@@ -3765,8 +3760,7 @@ static struct spi_device *acpi_spi_find_device_by_adev(struct acpi_device *adev)
 {
 	struct device *dev;
 
-	dev = bus_find_device(&spi_bus_type, NULL, adev, spi_acpi_device_match);
-
+	dev = bus_find_device_by_acpi_dev(&spi_bus_type, adev);
 	return dev ? to_spi_device(dev) : NULL;
 }
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 93b2f55ef44e..7514ef3d3f1a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -168,6 +168,7 @@ int device_match_name(struct device *dev, const void *name);
 int device_match_of_node(struct device *dev, const void *np);
 int device_match_fwnode(struct device *dev, const void *fwnode);
 int device_match_devt(struct device *dev, const void *pdevt);
+int device_match_acpi_dev(struct device *dev, const void *adev);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
@@ -224,6 +225,28 @@ static inline struct device *bus_find_device_by_devt(struct bus_type *bus,
 	return bus_find_device(bus, NULL, &devt, device_match_devt);
 }
 
+#ifdef CONFIG_ACPI
+struct acpi_device;
+
+/**
+ * bus_find_device_by_acpi_dev : device iterator for locating a particular device
+ * matching the ACPI COMPANION device.
+ * @bus: bus type
+ * @adev: ACPI COMPANION device to match.
+ */
+static inline struct device *
+bus_find_device_by_acpi_dev(struct bus_type *bus, const struct acpi_device *adev)
+{
+	return bus_find_device(bus, NULL, adev, device_match_acpi_dev);
+}
+#else
+static inline struct device *
+bus_find_device_by_acpi_dev(struct bus_type *bus, const void *adev)
+{
+	return NULL;
+}
+#endif
+
 struct device *subsys_find_device_by_id(struct bus_type *bus, unsigned int id,
 					struct device *hint);
 int bus_for_each_drv(struct bus_type *bus, struct device_driver *start,
@@ -442,6 +465,27 @@ static inline struct device *driver_find_device_by_devt(struct device_driver *dr
 	return driver_find_device(drv, NULL, &devt, device_match_devt);
 }
 
+#ifdef CONFIG_ACPI
+/**
+ * driver_find_device_by_acpi_dev : device iterator for locating a particular
+ * device matching the ACPI_COMPANION device.
+ * @driver: the driver we're iterating
+ * @adev: ACPI_COMPANION device to match.
+ */
+static inline struct device *
+driver_find_device_by_acpi_dev(struct device_driver *drv,
+			       const struct acpi_device *adev)
+{
+	return driver_find_device(drv, NULL, adev, device_match_acpi_dev);
+}
+#else
+static inline struct device *
+driver_find_device_by_acpi_dev(struct device_driver *drv, const void *adev)
+{
+	return NULL;
+}
+#endif
+
 void driver_deferred_probe_add(struct device *dev);
 int driver_deferred_probe_check_state(struct device *dev);
 int driver_deferred_probe_check_state_continue(struct device *dev);
@@ -620,6 +664,27 @@ static inline struct device *class_find_device_by_devt(struct class *class,
 	return class_find_device(class, NULL, &devt, device_match_devt);
 }
 
+#ifdef CONFIG_ACPI
+struct acpi_device;
+/**
+ * class_find_device_by_acpi_dev : device iterator for locating a particular
+ * device matching the ACPI_COMPANION device.
+ * @class: class type
+ * @adev: ACPI_COMPANION device to match.
+ */
+static inline struct device *
+class_find_device_by_acpi_dev(struct class *class, const struct acpi_device *adev)
+{
+	return class_find_device(class, NULL, adev, device_match_acpi_dev);
+}
+#else
+static inline struct device *
+class_find_device_by_acpi_dev(struct class *class, const void *adev)
+{
+	return NULL;
+}
+#endif
+
 struct class_attribute {
 	struct attribute attr;
 	ssize_t (*show)(struct class *class, struct class_attribute *attr,
-- 
cgit v1.2.3


From 6bf85ba9e55f659ddc0747bf1bb504ec6d15f525 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:37 +0100
Subject: drivers: Add generic helper to match any device

Add a generic helper to match any/all devices. Using this
introduce new wrappers {bus/driver/class}_find_next_device().

Cc: Elie Morisse <syniurge@gmail.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Nehal Shah <nehal-bakulchandra.shah@amd.com>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Shyam Sundar S K <shyam-sundar.s-k@amd.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com> # PCI
Link: https://lore.kernel.org/r/20190723221838.12024-7-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c                  |  6 ++++++
 drivers/i2c/busses/i2c-amd-mp2-pci.c |  8 +-------
 drivers/pci/probe.c                  |  7 +------
 drivers/s390/cio/ccwgroup.c          |  8 +-------
 drivers/scsi/scsi_proc.c             |  9 ++-------
 include/linux/device.h               | 17 +++++++++++++++++
 6 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 57d71bc2c559..e22e29b3dc97 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -3379,3 +3379,9 @@ int device_match_acpi_dev(struct device *dev, const void *adev)
 	return ACPI_COMPANION(dev) == adev;
 }
 EXPORT_SYMBOL(device_match_acpi_dev);
+
+int device_match_any(struct device *dev, const void *unused)
+{
+	return 1;
+}
+EXPORT_SYMBOL_GPL(device_match_any);
diff --git a/drivers/i2c/busses/i2c-amd-mp2-pci.c b/drivers/i2c/busses/i2c-amd-mp2-pci.c
index c7fe3b44a860..5e4800d72e00 100644
--- a/drivers/i2c/busses/i2c-amd-mp2-pci.c
+++ b/drivers/i2c/busses/i2c-amd-mp2-pci.c
@@ -457,18 +457,12 @@ static struct pci_driver amd_mp2_pci_driver = {
 };
 module_pci_driver(amd_mp2_pci_driver);
 
-static int amd_mp2_device_match(struct device *dev, const void *data)
-{
-	return 1;
-}
-
 struct amd_mp2_dev *amd_mp2_find_device(void)
 {
 	struct device *dev;
 	struct pci_dev *pci_dev;
 
-	dev = driver_find_device(&amd_mp2_pci_driver.driver, NULL, NULL,
-				 amd_mp2_device_match);
+	dev = driver_find_next_device(&amd_mp2_pci_driver.driver, NULL);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a3c7338fad86..dbeeb385fb9f 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -64,11 +64,6 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
 	return &r->res;
 }
 
-static int find_anything(struct device *dev, const void *data)
-{
-	return 1;
-}
-
 /*
  * Some device drivers need know if PCI is initiated.
  * Basically, we think PCI is not initiated when there
@@ -79,7 +74,7 @@ int no_pci_devices(void)
 	struct device *dev;
 	int no_devices;
 
-	dev = bus_find_device(&pci_bus_type, NULL, NULL, find_anything);
+	dev = bus_find_next_device(&pci_bus_type, NULL);
 	no_devices = (dev == NULL);
 	put_device(dev);
 	return no_devices;
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index d843e362c167..0005ec9285aa 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -581,11 +581,6 @@ int ccwgroup_driver_register(struct ccwgroup_driver *cdriver)
 }
 EXPORT_SYMBOL(ccwgroup_driver_register);
 
-static int __ccwgroup_match_all(struct device *dev, const void *data)
-{
-	return 1;
-}
-
 /**
  * ccwgroup_driver_unregister() - deregister a ccw group driver
  * @cdriver: driver to be deregistered
@@ -597,8 +592,7 @@ void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver)
 	struct device *dev;
 
 	/* We don't want ccwgroup devices to live longer than their driver. */
-	while ((dev = driver_find_device(&cdriver->driver, NULL, NULL,
-					 __ccwgroup_match_all))) {
+	while ((dev = driver_find_next_device(&cdriver->driver, NULL))) {
 		struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
 
 		ccwgroup_ungroup(gdev);
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index c074631086a4..5b313226f11c 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -372,15 +372,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
 	return err;
 }
 
-static int always_match(struct device *dev, const void *data)
-{
-	return 1;
-}
-
 static inline struct device *next_scsi_device(struct device *start)
 {
-	struct device *next = bus_find_device(&scsi_bus_type, start, NULL,
-					      always_match);
+	struct device *next = bus_find_next_device(&scsi_bus_type, start);
+
 	put_device(start);
 	return next;
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 7514ef3d3f1a..8ae3f4b47293 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -169,6 +169,7 @@ int device_match_of_node(struct device *dev, const void *np);
 int device_match_fwnode(struct device *dev, const void *fwnode);
 int device_match_devt(struct device *dev, const void *pdevt);
 int device_match_acpi_dev(struct device *dev, const void *adev);
+int device_match_any(struct device *dev, const void *unused);
 
 int bus_for_each_dev(struct bus_type *bus, struct device *start, void *data,
 		     int (*fn)(struct device *dev, void *data));
@@ -225,6 +226,16 @@ static inline struct device *bus_find_device_by_devt(struct bus_type *bus,
 	return bus_find_device(bus, NULL, &devt, device_match_devt);
 }
 
+/**
+ * bus_find_next_device - Find the next device after a given device in a
+ * given bus.
+ */
+static inline struct device *
+bus_find_next_device(struct bus_type *bus,struct device *cur)
+{
+	return bus_find_device(bus, cur, NULL, device_match_any);
+}
+
 #ifdef CONFIG_ACPI
 struct acpi_device;
 
@@ -465,6 +476,12 @@ static inline struct device *driver_find_device_by_devt(struct device_driver *dr
 	return driver_find_device(drv, NULL, &devt, device_match_devt);
 }
 
+static inline struct device *driver_find_next_device(struct device_driver *drv,
+						     struct device *start)
+{
+	return driver_find_device(drv, start, NULL, device_match_any);
+}
+
 #ifdef CONFIG_ACPI
 /**
  * driver_find_device_by_acpi_dev : device iterator for locating a particular
-- 
cgit v1.2.3


From 36f3313d6bff91ab2a9e47698c27d15363640a4e Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Tue, 23 Jul 2019 23:18:38 +0100
Subject: platform: Add platform_find_device_by_driver() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide a helper to lookup platform devices by matching device
driver in order to avoid drivers trying to use platform bus
internals.

Cc: Eric Anholt <eric@anholt.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Heiko Stübner" <heiko@sntech.de>
Cc: Inki Dae <inki.dae@samsung.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sandy Huang <hjc@rock-chips.com>
Cc: Seung-Woo Kim <sw0312.kim@samsung.com>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20190723221838.12024-8-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c                     | 14 ++++++++++++++
 drivers/gpu/drm/exynos/exynos_drm_drv.c     |  9 +++------
 drivers/gpu/drm/mcde/mcde_drv.c             |  3 +--
 drivers/gpu/drm/rockchip/rockchip_drm_drv.c |  3 +--
 drivers/gpu/drm/vc4/vc4_drv.c               |  3 +--
 include/linux/platform_device.h             |  3 +++
 6 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 506a0175a5a7..a174ce5ea17c 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1197,6 +1197,20 @@ struct bus_type platform_bus_type = {
 };
 EXPORT_SYMBOL_GPL(platform_bus_type);
 
+/**
+ * platform_find_device_by_driver - Find a platform device with a given
+ * driver.
+ * @start: The device to start the search from.
+ * @drv: The device driver to look for.
+ */
+struct device *platform_find_device_by_driver(struct device *start,
+					      const struct device_driver *drv)
+{
+	return bus_find_device(&platform_bus_type, start, drv,
+			       (void *)platform_match);
+}
+EXPORT_SYMBOL_GPL(platform_find_device_by_driver);
+
 int __init platform_bus_init(void)
 {
 	int error;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 58baf49d9926..badab94be2d6 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -242,9 +242,7 @@ static struct component_match *exynos_drm_match_add(struct device *dev)
 		if (!info->driver || !(info->flags & DRM_COMPONENT_DRIVER))
 			continue;
 
-		while ((d = bus_find_device(&platform_bus_type, p,
-					    &info->driver->driver,
-					    (void *)platform_bus_type.match))) {
+		while ((d = platform_find_device_by_driver(p, &info->driver->driver))) {
 			put_device(p);
 
 			if (!(info->flags & DRM_FIMC_DEVICE) ||
@@ -412,9 +410,8 @@ static void exynos_drm_unregister_devices(void)
 		if (!info->driver || !(info->flags & DRM_VIRTUAL_DEVICE))
 			continue;
 
-		while ((dev = bus_find_device(&platform_bus_type, NULL,
-					    &info->driver->driver,
-					    (void *)platform_bus_type.match))) {
+		while ((dev = platform_find_device_by_driver(NULL,
+						&info->driver->driver))) {
 			put_device(dev);
 			platform_device_unregister(to_platform_device(dev));
 		}
diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c
index baf63fb6850a..c07abf9e201c 100644
--- a/drivers/gpu/drm/mcde/mcde_drv.c
+++ b/drivers/gpu/drm/mcde/mcde_drv.c
@@ -477,8 +477,7 @@ static int mcde_probe(struct platform_device *pdev)
 		struct device_driver *drv = &mcde_component_drivers[i]->driver;
 		struct device *p = NULL, *d;
 
-		while ((d = bus_find_device(&platform_bus_type, p, drv,
-					    (void *)platform_bus_type.match))) {
+		while ((d = platform_find_device_by_driver(p, drv))) {
 			put_device(p);
 			component_match_add(dev, &match, mcde_compare_dev, d);
 			p = d;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 53d2c5bd61dc..38dc26376961 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -330,8 +330,7 @@ static struct component_match *rockchip_drm_match_add(struct device *dev)
 		struct device *p = NULL, *d;
 
 		do {
-			d = bus_find_device(&platform_bus_type, p, &drv->driver,
-					    (void *)platform_bus_type.match);
+			d = platform_find_device_by_driver(p, &drv->driver);
 			put_device(p);
 			p = d;
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index bf11930e40e1..1551c8253bec 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -237,8 +237,7 @@ static void vc4_match_add_drivers(struct device *dev,
 		struct device_driver *drv = &drivers[i]->driver;
 		struct device *p = NULL, *d;
 
-		while ((d = bus_find_device(&platform_bus_type, p, drv,
-					    (void *)platform_bus_type.match))) {
+		while ((d = platform_find_device_by_driver(p, drv))) {
 			put_device(p);
 			component_match_add(dev, match, compare_dev, d);
 			p = d;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 9bc36b589827..37e15a935a42 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -51,6 +51,9 @@ extern struct device platform_bus;
 extern void arch_setup_pdev_archdata(struct platform_device *);
 extern struct resource *platform_get_resource(struct platform_device *,
 					      unsigned int, unsigned int);
+extern struct device *
+platform_find_device_by_driver(struct device *start,
+			       const struct device_driver *drv);
 extern void __iomem *
 devm_platform_ioremap_resource(struct platform_device *pdev,
 			       unsigned int index);
-- 
cgit v1.2.3


From 2889ffcfc2522d6d25e5bda704275064062bbb21 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Mon, 29 Jul 2019 16:27:37 +0800
Subject: platform/x86: asus-wmi: cleanup AGFN fan handling

The asus-wmi driver currently uses the "AGFN" interface and
the FAN_CTRL device for fan control. According to the spec, this
interface is very dated and marked as pending removal from products
currently in development.

Clean up the way that the AGFN fan is detected and handled, also
preparing the driver for the introduction of an alternate fan
control method needed to support recent Asus products.

Not anticipating further development of this interface, simplify
the code by dropping any notion of being able to control multiple
AGFN fans (this was already limited to just a single fan through only
exposing a single fan in sysfs).

Check for the presence of AGFN fans at probe time, simplifying the code
flow in asus_hwmon_sysfs_is_visible().

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c            | 238 +++++++++++++----------------
 include/linux/platform_data/x86/asus-wmi.h |   4 +-
 2 files changed, 109 insertions(+), 133 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 0c6a810fcb72..fc2939ac1cfe 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -65,6 +65,8 @@ MODULE_LICENSE("GPL");
 #define ASUS_FAN_MFUN			0x13
 #define ASUS_FAN_SFUN_READ		0x06
 #define ASUS_FAN_SFUN_WRITE		0x07
+
+/* Based on standard hwmon pwmX_enable values */
 #define ASUS_FAN_CTRL_MANUAL		1
 #define ASUS_FAN_CTRL_AUTO		2
 
@@ -120,7 +122,7 @@ struct agfn_args {
 } __packed;
 
 /* struct used for calling fan read and write methods */
-struct fan_args {
+struct agfn_fan_args {
 	struct agfn_args agfn;	/* common fields */
 	u8 fan;			/* fan number: 0: set auto mode 1: 1st fan */
 	u32 speed;		/* read: RPM/100 - write: 0-255 */
@@ -148,6 +150,11 @@ struct asus_rfkill {
 	u32 dev_id;
 };
 
+enum fan_type {
+	FAN_TYPE_NONE = 0,
+	FAN_TYPE_AGFN,		/* deprecated on newer platforms */
+};
+
 struct asus_wmi {
 	int dsts_id;
 	int spec;
@@ -178,9 +185,9 @@ struct asus_wmi {
 	struct asus_rfkill gps;
 	struct asus_rfkill uwb;
 
-	bool asus_hwmon_fan_manual_mode;
-	int asus_hwmon_num_fans;
-	int asus_hwmon_pwm;
+	enum fan_type fan_type;
+	int fan_pwm_mode;
+	int agfn_pwm;
 
 	bool fan_boost_mode_available;
 	u8 fan_boost_mode_mask;
@@ -1125,10 +1132,10 @@ static void asus_wmi_set_als(void)
 
 /* Hwmon device ***************************************************************/
 
-static int asus_hwmon_agfn_fan_speed_read(struct asus_wmi *asus, int fan,
+static int asus_agfn_fan_speed_read(struct asus_wmi *asus, int fan,
 					  int *speed)
 {
-	struct fan_args args = {
+	struct agfn_fan_args args = {
 		.agfn.len = sizeof(args),
 		.agfn.mfun = ASUS_FAN_MFUN,
 		.agfn.sfun = ASUS_FAN_SFUN_READ,
@@ -1152,10 +1159,10 @@ static int asus_hwmon_agfn_fan_speed_read(struct asus_wmi *asus, int fan,
 	return 0;
 }
 
-static int asus_hwmon_agfn_fan_speed_write(struct asus_wmi *asus, int fan,
+static int asus_agfn_fan_speed_write(struct asus_wmi *asus, int fan,
 				     int *speed)
 {
-	struct fan_args args = {
+	struct agfn_fan_args args = {
 		.agfn.len = sizeof(args),
 		.agfn.mfun = ASUS_FAN_MFUN,
 		.agfn.sfun = ASUS_FAN_SFUN_WRITE,
@@ -1175,7 +1182,7 @@ static int asus_hwmon_agfn_fan_speed_write(struct asus_wmi *asus, int fan,
 		return -ENXIO;
 
 	if (speed && fan == 1)
-		asus->asus_hwmon_pwm = *speed;
+		asus->agfn_pwm = *speed;
 
 	return 0;
 }
@@ -1184,87 +1191,75 @@ static int asus_hwmon_agfn_fan_speed_write(struct asus_wmi *asus, int fan,
  * Check if we can read the speed of one fan. If true we assume we can also
  * control it.
  */
-static int asus_hwmon_get_fan_number(struct asus_wmi *asus, int *num_fans)
+static bool asus_wmi_has_agfn_fan(struct asus_wmi *asus)
 {
 	int status;
-	int speed = 0;
+	int speed;
+	u32 value;
 
-	*num_fans = 0;
+	status = asus_agfn_fan_speed_read(asus, 1, &speed);
+	if (status != 0)
+		return false;
 
-	status = asus_hwmon_agfn_fan_speed_read(asus, 1, &speed);
-	if (!status)
-		*num_fans = 1;
+	status = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_CTRL, &value);
+	if (status != 0)
+		return false;
 
-	return 0;
+	/*
+	 * We need to find a better way, probably using sfun,
+	 * bits or spec ...
+	 * Currently we disable it if:
+	 * - ASUS_WMI_UNSUPPORTED_METHOD is returned
+	 * - reverved bits are non-zero
+	 * - sfun and presence bit are not set
+	 */
+	return !(value == ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000
+		 || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT)));
 }
 
-static int asus_hwmon_fan_set_auto(struct asus_wmi *asus)
+static int asus_fan_set_auto(struct asus_wmi *asus)
 {
 	int status;
 
-	status = asus_hwmon_agfn_fan_speed_write(asus, 0, NULL);
+	status = asus_agfn_fan_speed_write(asus, 0, NULL);
 	if (status)
 		return -ENXIO;
 
-	asus->asus_hwmon_fan_manual_mode = false;
-
 	return 0;
 }
 
-static int asus_hwmon_fan_rpm_show(struct device *dev, int fan)
+static ssize_t pwm1_show(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
 {
 	struct asus_wmi *asus = dev_get_drvdata(dev);
-	int value;
-	int ret;
-
-	/* no speed readable on manual mode */
-	if (asus->asus_hwmon_fan_manual_mode)
-		return -ENXIO;
-
-	ret = asus_hwmon_agfn_fan_speed_read(asus, fan+1, &value);
-	if (ret) {
-		pr_warn("reading fan speed failed: %d\n", ret);
-		return -ENXIO;
-	}
-
-	return value;
-}
-
-static void asus_hwmon_pwm_show(struct asus_wmi *asus, int fan, int *value)
-{
 	int err;
+	int value;
 
-	if (asus->asus_hwmon_pwm >= 0) {
-		*value = asus->asus_hwmon_pwm;
-		return;
-	}
+	/* If we already set a value then just return it */
+	if (asus->agfn_pwm >= 0)
+		return sprintf(buf, "%d\n", asus->agfn_pwm);
 
-	err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_CTRL, value);
+	/*
+	 * If we haven't set already set a value through the AGFN interface,
+	 * we read a current value through the (now-deprecated) FAN_CTRL device.
+	 */
+	err = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_FAN_CTRL, &value);
 	if (err < 0)
-		return;
+		return err;
 
-	*value &= 0xFF;
-
-	if (*value == 1) /* Low Speed */
-		*value = 85;
-	else if (*value == 2)
-		*value = 170;
-	else if (*value == 3)
-		*value = 255;
-	else if (*value) {
-		pr_err("Unknown fan speed %#x\n", *value);
-		*value = -1;
+	value &= 0xFF;
+
+	if (value == 1) /* Low Speed */
+		value = 85;
+	else if (value == 2)
+		value = 170;
+	else if (value == 3)
+		value = 255;
+	else if (value) {
+		pr_err("Unknown fan speed %#x\n", value);
+		value = -1;
 	}
-}
-
-static ssize_t pwm1_show(struct device *dev,
-			       struct device_attribute *attr,
-			       char *buf)
-{
-	struct asus_wmi *asus = dev_get_drvdata(dev);
-	int value;
-
-	asus_hwmon_pwm_show(asus, 0, &value);
 
 	return sprintf(buf, "%d\n", value);
 }
@@ -1284,11 +1279,11 @@ static ssize_t pwm1_store(struct device *dev,
 
 	value = clamp(value, 0, 255);
 
-	state = asus_hwmon_agfn_fan_speed_write(asus, 1, &value);
+	state = asus_agfn_fan_speed_write(asus, 1, &value);
 	if (state)
 		pr_warn("Setting fan speed failed: %d\n", state);
 	else
-		asus->asus_hwmon_fan_manual_mode = true;
+		asus->fan_pwm_mode = ASUS_FAN_CTRL_MANUAL;
 
 	return count;
 }
@@ -1297,10 +1292,21 @@ static ssize_t fan1_input_show(struct device *dev,
 					struct device_attribute *attr,
 					char *buf)
 {
-	int value = asus_hwmon_fan_rpm_show(dev, 0);
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+	int value;
+	int ret;
 
-	return sprintf(buf, "%d\n", value < 0 ? -1 : value*100);
+	/* no speed readable on manual mode */
+	if (asus->fan_pwm_mode == ASUS_FAN_CTRL_MANUAL)
+		return -ENXIO;
+
+	ret = asus_agfn_fan_speed_read(asus, 1, &value);
+	if (ret) {
+		pr_warn("reading fan speed failed: %d\n", ret);
+		return -ENXIO;
+	}
 
+	return sprintf(buf, "%d\n", value < 0 ? -1 : value*100);
 }
 
 static ssize_t pwm1_enable_show(struct device *dev,
@@ -1309,10 +1315,7 @@ static ssize_t pwm1_enable_show(struct device *dev,
 {
 	struct asus_wmi *asus = dev_get_drvdata(dev);
 
-	if (asus->asus_hwmon_fan_manual_mode)
-		return sprintf(buf, "%d\n", ASUS_FAN_CTRL_MANUAL);
-
-	return sprintf(buf, "%d\n", ASUS_FAN_CTRL_AUTO);
+	return sprintf(buf, "%d\n", asus->fan_pwm_mode);
 }
 
 static ssize_t pwm1_enable_store(struct device *dev,
@@ -1329,14 +1332,21 @@ static ssize_t pwm1_enable_store(struct device *dev,
 	if (ret)
 		return ret;
 
-	if (state == ASUS_FAN_CTRL_MANUAL)
-		asus->asus_hwmon_fan_manual_mode = true;
-	else
-		status = asus_hwmon_fan_set_auto(asus);
+	switch (state) {
+	case ASUS_FAN_CTRL_MANUAL:
+		break;
 
-	if (status)
-		return status;
+	case ASUS_FAN_CTRL_AUTO:
+		status = asus_fan_set_auto(asus);
+		if (status)
+			return status;
+		break;
 
+	default:
+		return -EINVAL;
+	}
+
+	asus->fan_pwm_mode = state;
 	return count;
 }
 
@@ -1389,59 +1399,31 @@ static umode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct asus_wmi *asus = dev_get_drvdata(dev->parent);
-	int dev_id = -1;
-	int fan_attr = -1;
 	u32 value = ASUS_WMI_UNSUPPORTED_METHOD;
-	bool ok = true;
-
-	if (attr == &dev_attr_pwm1.attr)
-		dev_id = ASUS_WMI_DEVID_FAN_CTRL;
-	else if (attr == &dev_attr_temp1_input.attr)
-		dev_id = ASUS_WMI_DEVID_THERMAL_CTRL;
-
 
 	if (attr == &dev_attr_fan1_input.attr
 	    || attr == &dev_attr_fan1_label.attr
 	    || attr == &dev_attr_pwm1.attr
 	    || attr == &dev_attr_pwm1_enable.attr) {
-		fan_attr = 1;
-	}
-
-	if (dev_id != -1) {
-		int err = asus_wmi_get_devstate(asus, dev_id, &value);
+		if (asus->fan_type == FAN_TYPE_NONE)
+			return 0;
+	} else if (attr == &dev_attr_temp1_input.attr) {
+		int err = asus_wmi_get_devstate(asus,
+						ASUS_WMI_DEVID_THERMAL_CTRL,
+						&value);
 
-		if (err < 0 && fan_attr == -1)
+		if (err < 0)
 			return 0; /* can't return negative here */
-	}
 
-	if (dev_id == ASUS_WMI_DEVID_FAN_CTRL) {
-		/*
-		 * We need to find a better way, probably using sfun,
-		 * bits or spec ...
-		 * Currently we disable it if:
-		 * - ASUS_WMI_UNSUPPORTED_METHOD is returned
-		 * - reverved bits are non-zero
-		 * - sfun and presence bit are not set
-		 */
-		if (value == ASUS_WMI_UNSUPPORTED_METHOD || value & 0xFFF80000
-		    || (!asus->sfun && !(value & ASUS_WMI_DSTS_PRESENCE_BIT)))
-			ok = false;
-		else
-			ok = fan_attr <= asus->asus_hwmon_num_fans;
-	} else if (dev_id == ASUS_WMI_DEVID_THERMAL_CTRL) {
 		/*
 		 * If the temperature value in deci-Kelvin is near the absolute
 		 * zero temperature, something is clearly wrong
 		 */
 		if (value == 0 || value == 1)
-			ok = false;
-	} else if (fan_attr <= asus->asus_hwmon_num_fans && fan_attr != -1) {
-		ok = true;
-	} else {
-		ok = false;
+			return 0;
 	}
 
-	return ok ? attr->mode : 0;
+	return attr->mode;
 }
 
 static const struct attribute_group hwmon_attribute_group = {
@@ -1467,21 +1449,16 @@ static int asus_wmi_hwmon_init(struct asus_wmi *asus)
 
 static int asus_wmi_fan_init(struct asus_wmi *asus)
 {
-	int status;
-
-	asus->asus_hwmon_pwm = -1;
-	asus->asus_hwmon_num_fans = -1;
-	asus->asus_hwmon_fan_manual_mode = false;
+	asus->fan_type = FAN_TYPE_NONE;
+	asus->agfn_pwm = -1;
 
-	status = asus_hwmon_get_fan_number(asus, &asus->asus_hwmon_num_fans);
-	if (status) {
-		asus->asus_hwmon_num_fans = 0;
-		pr_warn("Could not determine number of fans: %d\n", status);
-		return -ENXIO;
+	if (asus_wmi_has_agfn_fan(asus)) {
+		asus->fan_type = FAN_TYPE_AGFN;
+		asus_fan_set_auto(asus);
+		asus->fan_pwm_mode = ASUS_FAN_CTRL_AUTO;
 	}
 
-	pr_info("Number of fans: %d\n", asus->asus_hwmon_num_fans);
-	return 0;
+	return asus->fan_type != FAN_TYPE_NONE;
 }
 
 /* Fan mode *******************************************************************/
@@ -2333,7 +2310,6 @@ static int asus_wmi_add(struct platform_device *pdev)
 		goto fail_input;
 
 	err = asus_wmi_fan_init(asus); /* probably no problems on error */
-	asus_hwmon_fan_set_auto(asus);
 
 	err = asus_wmi_hwmon_init(asus);
 	if (err)
@@ -2425,7 +2401,7 @@ static int asus_wmi_remove(struct platform_device *device)
 	asus_wmi_rfkill_exit(asus);
 	asus_wmi_debugfs_exit(asus);
 	asus_wmi_sysfs_exit(asus->platform_device);
-	asus_hwmon_fan_set_auto(asus);
+	asus_fan_set_auto(asus);
 
 	kfree(asus);
 	return 0;
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 4802cd2c7309..5ae9c062a1f6 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -12,7 +12,7 @@
 #define ASUS_WMI_METHODID_GPID		0x44495047 /* Get Panel ID?? (Resol) */
 #define ASUS_WMI_METHODID_QMOD		0x444F4D51 /* Quiet MODe */
 #define ASUS_WMI_METHODID_SPLV		0x4C425053 /* Set Panel Light Value */
-#define ASUS_WMI_METHODID_AGFN		0x4E464741 /* FaN? */
+#define ASUS_WMI_METHODID_AGFN		0x4E464741 /* Atk Generic FuNction */
 #define ASUS_WMI_METHODID_SFUN		0x4E554653 /* FUNCtionalities */
 #define ASUS_WMI_METHODID_SDSP		0x50534453 /* Set DiSPlay output */
 #define ASUS_WMI_METHODID_GDSP		0x50534447 /* Get DiSPlay output */
@@ -72,7 +72,7 @@
 
 /* Fan, Thermal */
 #define ASUS_WMI_DEVID_THERMAL_CTRL	0x00110011
-#define ASUS_WMI_DEVID_FAN_CTRL		0x00110012
+#define ASUS_WMI_DEVID_FAN_CTRL		0x00110012 /* deprecated */
 
 /* Power */
 #define ASUS_WMI_DEVID_PROCESSOR_STATE	0x00120012
-- 
cgit v1.2.3


From e3168b874321d04c160c9eb937919eb926ae232f Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Mon, 29 Jul 2019 16:27:39 +0800
Subject: platform/x86: asus-wmi: fix CPU fan control on recent products

Previously, asus-wmi was using the AGFN interface and FAN_CTRL device
for CPU fan control. However, this code has been found to be not fully
working on some recent products, and having checked the spec, these
interfaces are marked as being removed from future products currently
in development.

The replacement appears to be the CPU_FAN device, added in spec version
8.3 (March 2014) and present on many modern Asus laptops.

Add support for this device, and use it whenever it is detected.
The older approach based on AGFN and FAN_CTRL is used as a fallback
on products that do not have such device.

Other than switching between automatic and full speed, there is
no fan speed control through this new interface.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c            | 125 +++++++++++++++++++++++------
 include/linux/platform_data/x86/asus-wmi.h |   1 +
 2 files changed, 101 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 25f1e256c442..34dfbed65332 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -67,6 +67,7 @@ MODULE_LICENSE("GPL");
 #define ASUS_FAN_SFUN_WRITE		0x07
 
 /* Based on standard hwmon pwmX_enable values */
+#define ASUS_FAN_CTRL_FULLSPEED		0
 #define ASUS_FAN_CTRL_MANUAL		1
 #define ASUS_FAN_CTRL_AUTO		2
 
@@ -153,6 +154,7 @@ struct asus_rfkill {
 enum fan_type {
 	FAN_TYPE_NONE = 0,
 	FAN_TYPE_AGFN,		/* deprecated on newer platforms */
+	FAN_TYPE_SPEC83,	/* starting in Spec 8.3, use CPU_FAN_CTRL */
 };
 
 struct asus_wmi {
@@ -1211,10 +1213,29 @@ static bool asus_wmi_has_agfn_fan(struct asus_wmi *asus)
 static int asus_fan_set_auto(struct asus_wmi *asus)
 {
 	int status;
+	u32 retval;
 
-	status = asus_agfn_fan_speed_write(asus, 0, NULL);
-	if (status)
+	switch (asus->fan_type) {
+	case FAN_TYPE_SPEC83:
+		status = asus_wmi_set_devstate(ASUS_WMI_DEVID_CPU_FAN_CTRL,
+					       0, &retval);
+		if (status)
+			return status;
+
+		if (retval != 1)
+			return -EIO;
+		break;
+
+	case FAN_TYPE_AGFN:
+		status = asus_agfn_fan_speed_write(asus, 0, NULL);
+		if (status)
+			return -ENXIO;
+		break;
+
+	default:
 		return -ENXIO;
+	}
+
 
 	return 0;
 }
@@ -1287,13 +1308,29 @@ static ssize_t fan1_input_show(struct device *dev,
 	int value;
 	int ret;
 
-	/* no speed readable on manual mode */
-	if (asus->fan_pwm_mode == ASUS_FAN_CTRL_MANUAL)
-		return -ENXIO;
+	switch (asus->fan_type) {
+	case FAN_TYPE_SPEC83:
+		ret = asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL,
+					    &value);
+		if (ret < 0)
+			return ret;
 
-	ret = asus_agfn_fan_speed_read(asus, 1, &value);
-	if (ret) {
-		pr_warn("reading fan speed failed: %d\n", ret);
+		value &= 0xffff;
+		break;
+
+	case FAN_TYPE_AGFN:
+		/* no speed readable on manual mode */
+		if (asus->fan_pwm_mode == ASUS_FAN_CTRL_MANUAL)
+			return -ENXIO;
+
+		ret = asus_agfn_fan_speed_read(asus, 1, &value);
+		if (ret) {
+			pr_warn("reading fan speed failed: %d\n", ret);
+			return -ENXIO;
+		}
+		break;
+
+	default:
 		return -ENXIO;
 	}
 
@@ -1306,6 +1343,15 @@ static ssize_t pwm1_enable_show(struct device *dev,
 {
 	struct asus_wmi *asus = dev_get_drvdata(dev);
 
+	/*
+	 * Just read back the cached pwm mode.
+	 *
+	 * For the CPU_FAN device, the spec indicates that we should be
+	 * able to read the device status and consult bit 19 to see if we
+	 * are in Full On or Automatic mode. However, this does not work
+	 * in practice on X532FL at least (the bit is always 0) and there's
+	 * also nothing in the DSDT to indicate that this behaviour exists.
+	 */
 	return sprintf(buf, "%d\n", asus->fan_pwm_mode);
 }
 
@@ -1316,25 +1362,48 @@ static ssize_t pwm1_enable_store(struct device *dev,
 	struct asus_wmi *asus = dev_get_drvdata(dev);
 	int status = 0;
 	int state;
+	int value;
 	int ret;
+	u32 retval;
 
 	ret = kstrtouint(buf, 10, &state);
 
 	if (ret)
 		return ret;
 
-	switch (state) {
-	case ASUS_FAN_CTRL_MANUAL:
-		break;
+	if (asus->fan_type == FAN_TYPE_SPEC83) {
+		switch (state) { /* standard documented hwmon values */
+		case ASUS_FAN_CTRL_FULLSPEED:
+			value = 1;
+			break;
+		case ASUS_FAN_CTRL_AUTO:
+			value = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
 
-	case ASUS_FAN_CTRL_AUTO:
-		status = asus_fan_set_auto(asus);
-		if (status)
-			return status;
-		break;
+		ret = asus_wmi_set_devstate(ASUS_WMI_DEVID_CPU_FAN_CTRL,
+					    value, &retval);
+		if (ret)
+			return ret;
 
-	default:
-		return -EINVAL;
+		if (retval != 1)
+			return -EIO;
+	} else if (asus->fan_type == FAN_TYPE_AGFN) {
+		switch (state) {
+		case ASUS_FAN_CTRL_MANUAL:
+			break;
+
+		case ASUS_FAN_CTRL_AUTO:
+			status = asus_fan_set_auto(asus);
+			if (status)
+				return status;
+			break;
+
+		default:
+			return -EINVAL;
+		}
 	}
 
 	asus->fan_pwm_mode = state;
@@ -1392,9 +1461,11 @@ static umode_t asus_hwmon_sysfs_is_visible(struct kobject *kobj,
 	struct asus_wmi *asus = dev_get_drvdata(dev->parent);
 	u32 value = ASUS_WMI_UNSUPPORTED_METHOD;
 
-	if (attr == &dev_attr_fan1_input.attr
+	if (attr == &dev_attr_pwm1.attr) {
+		if (asus->fan_type != FAN_TYPE_AGFN)
+			return 0;
+	} else if (attr == &dev_attr_fan1_input.attr
 	    || attr == &dev_attr_fan1_label.attr
-	    || attr == &dev_attr_pwm1.attr
 	    || attr == &dev_attr_pwm1_enable.attr) {
 		if (asus->fan_type == FAN_TYPE_NONE)
 			return 0;
@@ -1443,13 +1514,17 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
 	asus->fan_type = FAN_TYPE_NONE;
 	asus->agfn_pwm = -1;
 
-	if (asus_wmi_has_agfn_fan(asus)) {
+	if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_CPU_FAN_CTRL))
+		asus->fan_type = FAN_TYPE_SPEC83;
+	else if (asus_wmi_has_agfn_fan(asus))
 		asus->fan_type = FAN_TYPE_AGFN;
-		asus_fan_set_auto(asus);
-		asus->fan_pwm_mode = ASUS_FAN_CTRL_AUTO;
-	}
 
-	return asus->fan_type != FAN_TYPE_NONE;
+	if (asus->fan_type == FAN_TYPE_NONE)
+		return -ENODEV;
+
+	asus_fan_set_auto(asus);
+	asus->fan_pwm_mode = ASUS_FAN_CTRL_AUTO;
+	return 0;
 }
 
 /* Fan mode *******************************************************************/
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 5ae9c062a1f6..409e16064f4b 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -73,6 +73,7 @@
 /* Fan, Thermal */
 #define ASUS_WMI_DEVID_THERMAL_CTRL	0x00110011
 #define ASUS_WMI_DEVID_FAN_CTRL		0x00110012 /* deprecated */
+#define ASUS_WMI_DEVID_CPU_FAN_CTRL	0x00110013
 
 /* Power */
 #define ASUS_WMI_DEVID_PROCESSOR_STATE	0x00120012
-- 
cgit v1.2.3


From c776dd50196a07a0ef943fcb74e8c86f9a5a20a8 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:38 -0600
Subject: PCI: Make PCI_PM_* delay times private

These delay time definitions:

  #define PCI_PM_D2_DELAY         200
  #define PCI_PM_D3_WAIT          10
  #define PCI_PM_D3COLD_WAIT      100
  #define PCI_PM_BUS_WAIT         50

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-2-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 5 +++++
 include/linux/pci.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 1be03a97cb92..708413632429 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -39,6 +39,11 @@ int pci_probe_reset_function(struct pci_dev *dev);
 int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
 int pci_bus_error_reset(struct pci_dev *dev);
 
+#define PCI_PM_D2_DELAY         200
+#define PCI_PM_D3_WAIT          10
+#define PCI_PM_D3COLD_WAIT      100
+#define PCI_PM_BUS_WAIT         50
+
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
  *
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61b64fa1b0b1..9c2c6e161cfd 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -145,11 +145,6 @@ static inline const char *pci_power_name(pci_power_t state)
 	return pci_power_names[1 + (__force int) state];
 }
 
-#define PCI_PM_D2_DELAY		200
-#define PCI_PM_D3_WAIT		10
-#define PCI_PM_D3COLD_WAIT	100
-#define PCI_PM_BUS_WAIT		50
-
 /**
  * typedef pci_channel_state_t
  *
-- 
cgit v1.2.3


From 669696ebbccc40e8096a2ee9809c028fc1538001 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:39 -0600
Subject: PCI: Make pci_check_pme_status(), pci_pme_wakeup_bus() private

These interfaces:

  bool pci_check_pme_status(struct pci_dev *dev);
  void pci_pme_wakeup_bus(struct pci_bus *bus);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-3-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 2 ++
 include/linux/pci.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 708413632429..ad1fe54ab8ee 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -89,6 +89,8 @@ void pci_power_up(struct pci_dev *dev);
 void pci_disable_enabled_device(struct pci_dev *dev);
 int pci_finish_runtime_suspend(struct pci_dev *dev);
 void pcie_clear_root_pme_status(struct pci_dev *dev);
+bool pci_check_pme_status(struct pci_dev *dev);
+void pci_pme_wakeup_bus(struct pci_bus *bus);
 int __pci_pme_wakeup(struct pci_dev *dev, void *ign);
 void pci_pme_restore(struct pci_dev *dev);
 bool pci_dev_need_resume(struct pci_dev *dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9c2c6e161cfd..b13fb829171e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1236,8 +1236,6 @@ int pci_wake_from_d3(struct pci_dev *dev, bool enable);
 int pci_prepare_to_sleep(struct pci_dev *dev);
 int pci_back_from_sleep(struct pci_dev *dev);
 bool pci_dev_run_wake(struct pci_dev *dev);
-bool pci_check_pme_status(struct pci_dev *dev);
-void pci_pme_wakeup_bus(struct pci_bus *bus);
 void pci_d3cold_enable(struct pci_dev *dev);
 void pci_d3cold_disable(struct pci_dev *dev);
 bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
-- 
cgit v1.2.3


From 975e1ac173058b8710e5979e97fc1397233301f3 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:40 -0600
Subject: PCI: Make pci_get_host_bridge_device(), pci_put_host_bridge_device()
 private

These interfaces:

  struct device *pci_get_host_bridge_device(struct pci_dev *dev);
  void pci_put_host_bridge_device(struct device *dev);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-4-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 3 +++
 include/linux/pci.h | 3 ---
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index ad1fe54ab8ee..b06c750e08d7 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -243,6 +243,9 @@ enum pci_bar_type {
 	pci_bar_mem64,		/* A 64-bit memory BAR */
 };
 
+struct device *pci_get_host_bridge_device(struct pci_dev *dev);
+void pci_put_host_bridge_device(struct device *dev);
+
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign);
 bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl,
 				int crs_timeout);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b13fb829171e..ffb904e7895d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -644,9 +644,6 @@ static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev)
 	return dev->bus->self;
 }
 
-struct device *pci_get_host_bridge_device(struct pci_dev *dev);
-void pci_put_host_bridge_device(struct device *dev);
-
 #ifdef CONFIG_PCI_MSI
 static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev)
 {
-- 
cgit v1.2.3


From 440589dd1068fb36b2fe10ccddf97e43267e3b8d Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:41 -0600
Subject: PCI: Make pci_save_vc_state(), pci_restore_vc_state(), etc private

These Virtual Channel interfaces:

  int pci_save_vc_state(struct pci_dev *dev);
  void pci_restore_vc_state(struct pci_dev *dev);
  void pci_allocate_vc_save_buffers(struct pci_dev *dev);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-5-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 5 +++++
 include/linux/pci.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b06c750e08d7..19cda6e6fccd 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -130,6 +130,11 @@ void pci_vpd_release(struct pci_dev *dev);
 void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev);
 void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev);
 
+/* PCI Virtual Channel */
+int pci_save_vc_state(struct pci_dev *dev);
+void pci_restore_vc_state(struct pci_dev *dev);
+void pci_allocate_vc_save_buffers(struct pci_dev *dev);
+
 /* PCI /proc functions */
 #ifdef CONFIG_PROC_FS
 int pci_proc_attach_device(struct pci_dev *dev);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ffb904e7895d..3ff458f89190 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1239,11 +1239,6 @@ bool pcie_relaxed_ordering_enabled(struct pci_dev *dev);
 void pci_wakeup_bus(struct pci_bus *bus);
 void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state);
 
-/* PCI Virtual Channel */
-int pci_save_vc_state(struct pci_dev *dev);
-void pci_restore_vc_state(struct pci_dev *dev);
-void pci_allocate_vc_save_buffers(struct pci_dev *dev);
-
 /* For use by arch with custom probe code */
 void set_pcie_port_type(struct pci_dev *pdev);
 void set_pcie_hotplug_bridge(struct pci_dev *pdev);
-- 
cgit v1.2.3


From 003d3b2c5f835d897b3b10a13a9e1340630e93f1 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:42 -0600
Subject: PCI: Make pci_hotplug_io_size, mem_size, and bus_size private

These symbols:

  extern unsigned long pci_hotplug_io_size;
  extern unsigned long pci_hotplug_mem_size;
  extern unsigned long pci_hotplug_bus_size;

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-6-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 3 +++
 include/linux/pci.h | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 19cda6e6fccd..9698fa805e28 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -208,6 +208,9 @@ extern const struct attribute_group *pcibus_groups[];
 extern const struct device_type pci_dev_type;
 extern const struct attribute_group *pci_bus_groups[];
 
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mem_size;
+extern unsigned long pci_hotplug_bus_size;
 
 /**
  * pci_match_one_device - Tell if a PCI device structure has a matching
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 3ff458f89190..b5592b9b7f38 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2010,10 +2010,6 @@ extern unsigned long pci_cardbus_mem_size;
 extern u8 pci_dfl_cache_line_size;
 extern u8 pci_cache_line_size;
 
-extern unsigned long pci_hotplug_io_size;
-extern unsigned long pci_hotplug_mem_size;
-extern unsigned long pci_hotplug_bus_size;
-
 /* Architecture-specific versions may override these (weak) */
 void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
-- 
cgit v1.2.3


From ecd29c1a38af4ba6e61382591dfe93f06f14ba25 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:43 -0600
Subject: PCI: Make pci_bus_get(), pci_bus_put() private

These interfaces:

  struct pci_bus *pci_bus_get(struct pci_bus *bus);
  void pci_bus_put(struct pci_bus *bus);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-7-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 2 ++
 include/linux/pci.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9698fa805e28..81e94c9f1c12 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -274,6 +274,8 @@ bool pci_bus_clip_resource(struct pci_dev *dev, int idx);
 
 void pci_reassigndev_resource_alignment(struct pci_dev *dev);
 void pci_disable_bridge_window(struct pci_dev *dev);
+struct pci_bus *pci_bus_get(struct pci_bus *bus);
+void pci_bus_put(struct pci_bus *bus);
 
 /* PCIe link information */
 #define PCIE_SPEED2STR(speed) \
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b5592b9b7f38..4d1b6f07f8f1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1282,8 +1282,6 @@ int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *);
 void pci_release_selected_regions(struct pci_dev *, int);
 
 /* drivers/pci/bus.c */
-struct pci_bus *pci_bus_get(struct pci_bus *bus);
-void pci_bus_put(struct pci_bus *bus);
 void pci_add_resource(struct list_head *resources, struct resource *res);
 void pci_add_resource_offset(struct list_head *resources, struct resource *res,
 			     resource_size_t offset);
-- 
cgit v1.2.3


From 5da78d95785daa9454336a88b2f86a8998f6c739 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:44 -0600
Subject: PCI: Make pcie_update_link_speed() private

This interface:

  void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);

is only used in drivers/pci/ and does not need to be seen by the rest of
the kernel.  Move it to drivers/pci/pci.h so it's private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-8-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 1 +
 include/linux/pci.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 81e94c9f1c12..8459663358c5 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -299,6 +299,7 @@ u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed,
 			   enum pcie_link_width *width);
 void __pcie_print_link_status(struct pci_dev *dev, bool verbose);
 void pcie_report_downtraining(struct pci_dev *dev);
+void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 
 /* Single Root I/O Virtualization */
 struct pci_sriov {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4d1b6f07f8f1..8301352a937f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -987,7 +987,6 @@ struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
 struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
 				int busnr);
-void pcie_update_link_speed(struct pci_bus *bus, u16 link_status);
 struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr,
 				 const char *name,
 				 struct hotplug_slot *hotplug);
-- 
cgit v1.2.3


From b92b512a435da01b52de07e3dcc2f07a4ad404de Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:45 -0600
Subject: PCI: Make pci_ats_init() private

This interface:

  void pci_ats_init(struct pci_dev *dev);

is only used in drivers/pci/ and does not need to be seen by the rest of
the kernel.  Move it to drivers/pci/pci.h so it's private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-9-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 7 ++++---
 include/linux/pci.h | 2 --
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 8459663358c5..2bb59afb540e 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -439,11 +439,12 @@ static inline void pci_restore_dpc_state(struct pci_dev *dev) {}
 #endif
 
 #ifdef CONFIG_PCI_ATS
+/* Address Translation Service */
+void pci_ats_init(struct pci_dev *dev);
 void pci_restore_ats_state(struct pci_dev *dev);
 #else
-static inline void pci_restore_ats_state(struct pci_dev *dev)
-{
-}
+static inline void pci_ats_init(struct pci_dev *d) { }
+static inline void pci_restore_ats_state(struct pci_dev *dev) { }
 #endif /* CONFIG_PCI_ATS */
 
 #ifdef CONFIG_PCI_IOV
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 8301352a937f..9b8d0c2a1c9a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1761,13 +1761,11 @@ static inline bool pci_ats_disabled(void) { return true; }
 
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
-void pci_ats_init(struct pci_dev *dev);
 int pci_enable_ats(struct pci_dev *dev, int ps);
 void pci_disable_ats(struct pci_dev *dev);
 int pci_ats_queue_depth(struct pci_dev *dev);
 int pci_ats_page_aligned(struct pci_dev *dev);
 #else
-static inline void pci_ats_init(struct pci_dev *d) { }
 static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; }
 static inline void pci_disable_ats(struct pci_dev *d) { }
 static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; }
-- 
cgit v1.2.3


From 72bde9ced373ca1e27332fd020d248151319a3d6 Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:46 -0600
Subject: PCI: Make pcie_set_ecrc_checking(), pcie_ecrc_get_policy() private

These interfaces:

  void pcie_set_ecrc_checking(struct pci_dev *dev);
  void pcie_ecrc_get_policy(char *str);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-10-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 8 ++++++++
 include/linux/pci.h | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 2bb59afb540e..d3d006c95041 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -541,6 +541,14 @@ static inline void pcie_aspm_create_sysfs_dev_files(struct pci_dev *pdev) { }
 static inline void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) { }
 #endif
 
+#ifdef CONFIG_PCIE_ECRC
+void pcie_set_ecrc_checking(struct pci_dev *dev);
+void pcie_ecrc_get_policy(char *str);
+#else
+static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
+static inline void pcie_ecrc_get_policy(char *str) { }
+#endif
+
 #ifdef CONFIG_PCIE_PTM
 void pci_ptm_init(struct pci_dev *dev);
 #else
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9b8d0c2a1c9a..65502c564661 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1559,14 +1559,6 @@ bool pci_aer_available(void);
 static inline bool pci_aer_available(void) { return false; }
 #endif
 
-#ifdef CONFIG_PCIE_ECRC
-void pcie_set_ecrc_checking(struct pci_dev *dev);
-void pcie_ecrc_get_policy(char *str);
-#else
-static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
-static inline void pcie_ecrc_get_policy(char *str) { }
-#endif
-
 bool pci_ats_disabled(void);
 
 #ifdef CONFIG_PCIE_PTM
-- 
cgit v1.2.3


From ac6c26da29c12fa511c877c273ed5c939dc9e96c Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:47 -0600
Subject: PCI: Make pci_enable_ptm() private

This interface:

  int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);

is only used in drivers/pci/ and does not need to be seen by the rest of
the kernel.  Move it to drivers/pci/pci.h so it's private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-11-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 3 +++
 include/linux/pci.h | 7 -------
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index d3d006c95041..8935fc1ff446 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -551,8 +551,11 @@ static inline void pcie_ecrc_get_policy(char *str) { }
 
 #ifdef CONFIG_PCIE_PTM
 void pci_ptm_init(struct pci_dev *dev);
+int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
 #else
 static inline void pci_ptm_init(struct pci_dev *dev) { }
+static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
+{ return -EINVAL; }
 #endif
 
 struct pci_dev_reset_methods {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 65502c564661..ef73b0c87f01 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1561,13 +1561,6 @@ static inline bool pci_aer_available(void) { return false; }
 
 bool pci_ats_disabled(void);
 
-#ifdef CONFIG_PCIE_PTM
-int pci_enable_ptm(struct pci_dev *dev, u8 *granularity);
-#else
-static inline int pci_enable_ptm(struct pci_dev *dev, u8 *granularity)
-{ return -EINVAL; }
-#endif
-
 void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
-- 
cgit v1.2.3


From 621f7e354fd8f99db0e3f7a0e8cda238caee9f3a Mon Sep 17 00:00:00 2001
From: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Date: Wed, 24 Jul 2019 17:38:48 -0600
Subject: PCI: Make pci_set_of_node(), etc private

These interfaces:

  void pci_set_of_node(struct pci_dev *dev);
  void pci_release_of_node(struct pci_dev *dev);
  void pci_set_bus_of_node(struct pci_bus *bus);
  void pci_release_bus_of_node(struct pci_bus *bus);

are only used in drivers/pci/ and do not need to be seen by the rest of the
kernel.  Move them to drivers/pci/pci.h so they're private to the PCI
subsystem.

Link: https://lore.kernel.org/r/20190724233848.73327-12-skunberg.kelsey@gmail.com
Signed-off-by: Kelsey Skunberg <skunberg.kelsey@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.h   | 9 +++++++++
 include/linux/pci.h | 8 --------
 2 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 8935fc1ff446..61bbfd611140 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -592,6 +592,10 @@ struct device_node;
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 int of_pci_get_max_link_speed(struct device_node *node);
+void pci_set_of_node(struct pci_dev *dev);
+void pci_release_of_node(struct pci_dev *dev);
+void pci_set_bus_of_node(struct pci_bus *bus);
+void pci_release_bus_of_node(struct pci_bus *bus);
 
 #else
 static inline int
@@ -611,6 +615,11 @@ of_pci_get_max_link_speed(struct device_node *node)
 {
 	return -EINVAL;
 }
+
+static inline void pci_set_of_node(struct pci_dev *dev) { }
+static inline void pci_release_of_node(struct pci_dev *dev) { }
+static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
+static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF_ADDRESS)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ef73b0c87f01..5a26b7db71b3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2259,10 +2259,6 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 #ifdef CONFIG_OF
 struct device_node;
 struct irq_domain;
-void pci_set_of_node(struct pci_dev *dev);
-void pci_release_of_node(struct pci_dev *dev);
-void pci_set_bus_of_node(struct pci_bus *bus);
-void pci_release_bus_of_node(struct pci_bus *bus);
 struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus);
 int pci_parse_request_of_pci_ranges(struct device *dev,
 				    struct list_head *resources,
@@ -2272,10 +2268,6 @@ int pci_parse_request_of_pci_ranges(struct device *dev,
 struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus);
 
 #else	/* CONFIG_OF */
-static inline void pci_set_of_node(struct pci_dev *dev) { }
-static inline void pci_release_of_node(struct pci_dev *dev) { }
-static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
-static inline void pci_release_bus_of_node(struct pci_bus *bus) { }
 static inline struct irq_domain *
 pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
 static inline int pci_parse_request_of_pci_ranges(struct device *dev,
-- 
cgit v1.2.3


From 259231a045616c4101d023a8f4dcc8379af265a6 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 3 Jul 2019 20:51:26 -0300
Subject: cpuidle: add poll_limit_ns to cpuidle_device structure

Add a poll_limit_ns variable to cpuidle_device structure.

Calculate and configure it in the new cpuidle_poll_time
function, in case its zero.

Individual governors are allowed to override this value.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c    | 30 ++++++++++++++++++++++++++++++
 drivers/cpuidle/poll_state.c | 11 ++---------
 drivers/cpuidle/sysfs.c      |  7 +++++++
 include/linux/cpuidle.h      |  6 ++++++
 4 files changed, 45 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0f4b7c45df3e..0895b988fa92 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -361,6 +361,36 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
 		cpuidle_curr_governor->reflect(dev, index);
 }
 
+/**
+ * cpuidle_poll_time - return amount of time to poll for,
+ * governors can override dev->poll_limit_ns if necessary
+ *
+ * @drv:   the cpuidle driver tied with the cpu
+ * @dev:   the cpuidle device
+ *
+ */
+u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+		      struct cpuidle_device *dev)
+{
+	int i;
+	u64 limit_ns;
+
+	if (dev->poll_limit_ns)
+		return dev->poll_limit_ns;
+
+	limit_ns = TICK_NSEC;
+	for (i = 1; i < drv->state_count; i++) {
+		if (drv->states[i].disabled || dev->states_usage[i].disable)
+			continue;
+
+		limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
+	}
+
+	dev->poll_limit_ns = limit_ns;
+
+	return dev->poll_limit_ns;
+}
+
 /**
  * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
  */
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 02b9315a9e96..c8fa5f41dfc4 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -20,16 +20,9 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 	local_irq_enable();
 	if (!current_set_polling_and_test()) {
 		unsigned int loop_count = 0;
-		u64 limit = TICK_NSEC;
-		int i;
+		u64 limit;
 
-		for (i = 1; i < drv->state_count; i++) {
-			if (drv->states[i].disabled || dev->states_usage[i].disable)
-				continue;
-
-			limit = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
-			break;
-		}
+		limit = cpuidle_poll_time(drv, dev);
 
 		while (!need_resched()) {
 			cpu_relax();
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index eb20adb5de23..2bb2683b493c 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -334,6 +334,7 @@ struct cpuidle_state_kobj {
 	struct cpuidle_state_usage *state_usage;
 	struct completion kobj_unregister;
 	struct kobject kobj;
+	struct cpuidle_device *device;
 };
 
 #ifdef CONFIG_SUSPEND
@@ -391,6 +392,7 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k
 #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
 #define kobj_to_state(k) (kobj_to_state_obj(k)->state)
 #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
+#define kobj_to_device(k) (kobj_to_state_obj(k)->device)
 #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
 
 static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
@@ -414,10 +416,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
 	struct cpuidle_state *state = kobj_to_state(kobj);
 	struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
 	struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
+	struct cpuidle_device *dev = kobj_to_device(kobj);
 
 	if (cattr->store)
 		ret = cattr->store(state, state_usage, buf, size);
 
+	/* reset poll time cache */
+	dev->poll_limit_ns = 0;
+
 	return ret;
 }
 
@@ -468,6 +474,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 		}
 		kobj->state = &drv->states[i];
 		kobj->state_usage = &device->states_usage[i];
+		kobj->device = device;
 		init_completion(&kobj->kobj_unregister);
 
 		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index bb9a0db89f1a..b484dd69ec21 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -86,6 +86,7 @@ struct cpuidle_device {
 	ktime_t			next_hrtimer;
 
 	int			last_residency;
+	u64			poll_limit_ns;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
 	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
 	struct cpuidle_driver_kobj *kobj_driver;
@@ -132,6 +133,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv,
 extern int cpuidle_enter(struct cpuidle_driver *drv,
 			 struct cpuidle_device *dev, int index);
 extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
+extern u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+			     struct cpuidle_device *dev);
 
 extern int cpuidle_register_driver(struct cpuidle_driver *drv);
 extern struct cpuidle_driver *cpuidle_get_driver(void);
@@ -166,6 +169,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
 {return -ENODEV; }
 static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { }
+extern u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+			     struct cpuidle_device *dev)
+{return 0; }
 static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
 {return -ENODEV; }
 static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
-- 
cgit v1.2.3


From 7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 3 Jul 2019 20:51:27 -0300
Subject: governors: unify last_state_idx

Since this field is shared by all governors, move it to
cpuidle device structure.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/ladder.c | 21 ++++++++++-----------
 drivers/cpuidle/governors/menu.c   |  5 ++---
 drivers/cpuidle/governors/teo.c    | 12 ++++++------
 include/linux/cpuidle.h            |  1 +
 4 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index f0dddc66af26..428eeb832fe7 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -38,7 +38,6 @@ struct ladder_device_state {
 
 struct ladder_device {
 	struct ladder_device_state states[CPUIDLE_STATE_MAX];
-	int last_state_idx;
 };
 
 static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
@@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
  * @old_idx: the current state index
  * @new_idx: the new target state index
  */
-static inline void ladder_do_selection(struct ladder_device *ldev,
+static inline void ladder_do_selection(struct cpuidle_device *dev,
+				       struct ladder_device *ldev,
 				       int old_idx, int new_idx)
 {
 	ldev->states[old_idx].stats.promotion_count = 0;
 	ldev->states[old_idx].stats.demotion_count = 0;
-	ldev->last_state_idx = new_idx;
+	dev->last_state_idx = new_idx;
 }
 
 /**
@@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 {
 	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	struct ladder_device_state *last_state;
-	int last_residency, last_idx = ldev->last_state_idx;
+	int last_residency, last_idx = dev->last_state_idx;
 	int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
 	int latency_req = cpuidle_governor_latency_req(dev->cpu);
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
-		ladder_do_selection(ldev, last_idx, 0);
+		ladder_do_selection(dev, ldev, last_idx, 0);
 		return 0;
 	}
 
@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 		last_state->stats.promotion_count++;
 		last_state->stats.demotion_count = 0;
 		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
-			ladder_do_selection(ldev, last_idx, last_idx + 1);
+			ladder_do_selection(dev, ldev, last_idx, last_idx + 1);
 			return last_idx + 1;
 		}
 	}
@@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 			if (drv->states[i].exit_latency <= latency_req)
 				break;
 		}
-		ladder_do_selection(ldev, last_idx, i);
+		ladder_do_selection(dev, ldev, last_idx, i);
 		return i;
 	}
 
@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 		last_state->stats.demotion_count++;
 		last_state->stats.promotion_count = 0;
 		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
-			ladder_do_selection(ldev, last_idx, last_idx - 1);
+			ladder_do_selection(dev, ldev, last_idx, last_idx - 1);
 			return last_idx - 1;
 		}
 	}
@@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
 	struct ladder_device_state *lstate;
 	struct cpuidle_state *state;
 
-	ldev->last_state_idx = first_idx;
+	dev->last_state_idx = first_idx;
 
 	for (i = first_idx; i < drv->state_count; i++) {
 		state = &drv->states[i];
@@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
  */
 static void ladder_reflect(struct cpuidle_device *dev, int index)
 {
-	struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
 	if (index > 0)
-		ldev->last_state_idx = index;
+		dev->last_state_idx = index;
 }
 
 static struct cpuidle_governor ladder_governor = {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e9a28c7846d6..dace4c7f830c 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -117,7 +117,6 @@
  */
 
 struct menu_device {
-	int		last_state_idx;
 	int             needs_update;
 	int             tick_wakeup;
 
@@ -455,7 +454,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 
-	data->last_state_idx = index;
+	dev->last_state_idx = index;
 	data->needs_update = 1;
 	data->tick_wakeup = tick_nohz_idle_got_tick();
 }
@@ -468,7 +467,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
 static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
-	int last_idx = data->last_state_idx;
+	int last_idx = dev->last_state_idx;
 	struct cpuidle_state *target = &drv->states[last_idx];
 	unsigned int measured_us;
 	unsigned int new_factor;
diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index 7d05efdbd3c6..a2fd81067a13 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -96,7 +96,6 @@ struct teo_idle_state {
  * @time_span_ns: Time between idle state selection and post-wakeup update.
  * @sleep_length_ns: Time till the closest timer event (at the selection time).
  * @states: Idle states data corresponding to this CPU.
- * @last_state: Idle state entered by the CPU last time.
  * @interval_idx: Index of the most recent saved idle interval.
  * @intervals: Saved idle duration values.
  */
@@ -104,7 +103,6 @@ struct teo_cpu {
 	u64 time_span_ns;
 	u64 sleep_length_ns;
 	struct teo_idle_state states[CPUIDLE_STATE_MAX];
-	int last_state;
 	int interval_idx;
 	unsigned int intervals[INTERVALS];
 };
@@ -130,7 +128,9 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 */
 		measured_us = sleep_length_us;
 	} else {
-		unsigned int lat = drv->states[cpu_data->last_state].exit_latency;
+		unsigned int lat;
+
+		lat = drv->states[dev->last_state_idx].exit_latency;
 
 		measured_us = ktime_to_us(cpu_data->time_span_ns);
 		/*
@@ -245,9 +245,9 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int max_early_idx, idx, i;
 	ktime_t delta_tick;
 
-	if (cpu_data->last_state >= 0) {
+	if (dev->last_state_idx >= 0) {
 		teo_update(drv, dev);
-		cpu_data->last_state = -1;
+		dev->last_state_idx = -1;
 	}
 
 	cpu_data->time_span_ns = local_clock();
@@ -394,7 +394,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 
-	cpu_data->last_state = state;
+	dev->last_state_idx = state;
 	/*
 	 * If the wakeup was not "natural", but triggered by one of the safety
 	 * nets, assume that the CPU might have been idle for the entire sleep
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b484dd69ec21..ba535a1a47d5 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -85,6 +85,7 @@ struct cpuidle_device {
 	unsigned int		cpu;
 	ktime_t			next_hrtimer;
 
+	int			last_state_idx;
 	int			last_residency;
 	u64			poll_limit_ns;
 	struct cpuidle_state_usage	states_usage[CPUIDLE_STATE_MAX];
-- 
cgit v1.2.3


From a1c4423b02b2121108e3ea9580741e0f26309a48 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 3 Jul 2019 20:51:29 -0300
Subject: cpuidle-haltpoll: disable host side polling when kvm virtualized

When performing guest side polling, it is not necessary to
also perform host side polling.

So disable host side polling, via the new MSR interface,
when loading cpuidle-haltpoll driver.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/Kconfig                        |  7 ++++++
 arch/x86/include/asm/cpuidle_haltpoll.h |  8 +++++++
 arch/x86/kernel/kvm.c                   | 42 +++++++++++++++++++++++++++++++++
 drivers/cpuidle/cpuidle-haltpoll.c      |  9 ++++++-
 include/linux/cpuidle_haltpoll.h        | 16 +++++++++++++
 5 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/cpuidle_haltpoll.h
 create mode 100644 include/linux/cpuidle_haltpoll.h

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 222855cc0158..05e78acb187c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -794,6 +794,7 @@ config KVM_GUEST
 	bool "KVM Guest support (including kvmclock)"
 	depends on PARAVIRT
 	select PARAVIRT_CLOCK
+	select ARCH_CPUIDLE_HALTPOLL
 	default y
 	---help---
 	  This option enables various optimizations for running under the KVM
@@ -802,6 +803,12 @@ config KVM_GUEST
 	  underlying device model, the host provides the guest with
 	  timing infrastructure such as time of day, and system time
 
+config ARCH_CPUIDLE_HALTPOLL
+        def_bool n
+        prompt "Disable host haltpoll when loading haltpoll driver"
+        help
+	  If virtualized under KVM, disable host haltpoll.
+
 config PVH
 	bool "Support for running PVH guests"
 	---help---
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..ff8607d81526
--- /dev/null
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_HALTPOLL_H
+#define _ARCH_HALTPOLL_H
+
+void arch_haltpoll_enable(void);
+void arch_haltpoll_disable(void);
+
+#endif
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b7f34fe2171e..f48401be8ce0 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -875,3 +875,45 @@ void __init kvm_spinlock_init(void)
 }
 
 #endif	/* CONFIG_PARAVIRT_SPINLOCKS */
+
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+
+static void kvm_disable_host_haltpoll(void *i)
+{
+	wrmsrl(MSR_KVM_POLL_CONTROL, 0);
+}
+
+static void kvm_enable_host_haltpoll(void *i)
+{
+	wrmsrl(MSR_KVM_POLL_CONTROL, 1);
+}
+
+void arch_haltpoll_enable(void)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
+		printk(KERN_ERR "kvm: host does not support poll control\n");
+		printk(KERN_ERR "kvm: host upgrade recommended\n");
+		return;
+	}
+
+	preempt_disable();
+	/* Enable guest halt poll disables host halt poll */
+	kvm_disable_host_haltpoll(NULL);
+	smp_call_function(kvm_disable_host_haltpoll, NULL, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
+
+void arch_haltpoll_disable(void)
+{
+	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
+		return;
+
+	preempt_disable();
+	/* Enable guest halt poll disables host halt poll */
+	kvm_enable_host_haltpoll(NULL);
+	smp_call_function(kvm_enable_host_haltpoll, NULL, 1);
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
+#endif
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 35cfb53e9287..9ac093dcbb01 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/sched/idle.h>
 #include <linux/kvm_para.h>
+#include <linux/cpuidle_haltpoll.h>
 
 static int default_enter_idle(struct cpuidle_device *dev,
 			      struct cpuidle_driver *drv, int index)
@@ -47,6 +48,7 @@ static struct cpuidle_driver haltpoll_driver = {
 
 static int __init haltpoll_init(void)
 {
+	int ret;
 	struct cpuidle_driver *drv = &haltpoll_driver;
 
 	cpuidle_poll_state_init(drv);
@@ -54,11 +56,16 @@ static int __init haltpoll_init(void)
 	if (!kvm_para_available())
 		return 0;
 
-	return cpuidle_register(&haltpoll_driver, NULL);
+	ret = cpuidle_register(&haltpoll_driver, NULL);
+	if (ret == 0)
+		arch_haltpoll_enable();
+
+	return ret;
 }
 
 static void __exit haltpoll_exit(void)
 {
+	arch_haltpoll_disable();
 	cpuidle_unregister(&haltpoll_driver);
 }
 
diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h
new file mode 100644
index 000000000000..fe5954c2409e
--- /dev/null
+++ b/include/linux/cpuidle_haltpoll.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _CPUIDLE_HALTPOLL_H
+#define _CPUIDLE_HALTPOLL_H
+
+#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
+#include <asm/cpuidle_haltpoll.h>
+#else
+static inline void arch_haltpoll_enable(void)
+{
+}
+
+static inline void arch_haltpoll_disable(void)
+{
+}
+#endif
+#endif
-- 
cgit v1.2.3


From 64a38e840ce5940253208eaba40265c73decc4ee Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Fri, 26 Jul 2019 18:33:01 -0400
Subject: SUNRPC: Track writers of the 'channel' file to improve
 cache_listeners_exist

The sunrpc cache interface is susceptible to being fooled by a rogue
process just reading a 'channel' file.  If this happens the kernel
may think a valid daemon exists to service the cache when it does not.
For example, the following may fool the kernel:
cat /proc/net/rpc/auth.unix.gid/channel

Change the tracking of readers to writers when considering whether a
listener exists as all valid daemon processes either open a channel
file O_RDWR or O_WRONLY.  While this does not prevent a rogue process
from "stealing" a message from the kernel, it does at least improve
the kernels perception of whether a valid process servicing the cache
exists.

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  6 +++---
 net/sunrpc/cache.c           | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c7f38e897174..f7d086b77a21 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -107,9 +107,9 @@ struct cache_detail {
 	/* fields for communication over channel */
 	struct list_head	queue;
 
-	atomic_t		readers;		/* how many time is /chennel open */
-	time_t			last_close;		/* if no readers, when did last close */
-	time_t			last_warn;		/* when we last warned about no readers */
+	atomic_t		writers;		/* how many time is /channel open */
+	time_t			last_close;		/* if no writers, when did last close */
+	time_t			last_warn;		/* when we last warned about no writers */
 
 	union {
 		struct proc_dir_entry	*procfs;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 6f1528f271ee..a6a6190ad37a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -373,7 +373,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
-	atomic_set(&cd->readers, 0);
+	atomic_set(&cd->writers, 0);
 	cd->last_close = 0;
 	cd->last_warn = -1;
 	list_add(&cd->others, &cache_list);
@@ -1029,11 +1029,13 @@ static int cache_open(struct inode *inode, struct file *filp,
 		}
 		rp->offset = 0;
 		rp->q.reader = 1;
-		atomic_inc(&cd->readers);
+
 		spin_lock(&queue_lock);
 		list_add(&rp->q.list, &cd->queue);
 		spin_unlock(&queue_lock);
 	}
+	if (filp->f_mode & FMODE_WRITE)
+		atomic_inc(&cd->writers);
 	filp->private_data = rp;
 	return 0;
 }
@@ -1062,8 +1064,10 @@ static int cache_release(struct inode *inode, struct file *filp,
 		filp->private_data = NULL;
 		kfree(rp);
 
+	}
+	if (filp->f_mode & FMODE_WRITE) {
+		atomic_dec(&cd->writers);
 		cd->last_close = seconds_since_boot();
-		atomic_dec(&cd->readers);
 	}
 	module_put(cd->owner);
 	return 0;
@@ -1171,7 +1175,7 @@ static void warn_no_listener(struct cache_detail *detail)
 
 static bool cache_listeners_exist(struct cache_detail *detail)
 {
-	if (atomic_read(&detail->readers))
+	if (atomic_read(&detail->writers))
 		return true;
 	if (detail->last_close == 0)
 		/* This cache was never opened */
-- 
cgit v1.2.3


From 509ce4c85bd055ee1013bc853b5d543428b0f017 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 29 Jul 2019 00:27:39 +0900
Subject: ppdev: add header include guard

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://lore.kernel.org/r/20190728152739.9249-1-yamada.masahiro@socionext.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/ppdev.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ppdev.h b/include/uapi/linux/ppdev.h
index 8fe3c64d149e..eb895b83f2bd 100644
--- a/include/uapi/linux/ppdev.h
+++ b/include/uapi/linux/ppdev.h
@@ -15,6 +15,9 @@
  * Added PPGETMODES/PPGETMODE/PPGETPHASE, Fred Barnes <frmb2@ukc.ac.uk>, 03/01/2001
  */
 
+#ifndef _UAPI_LINUX_PPDEV_H
+#define _UAPI_LINUX_PPDEV_H
+
 #define PP_IOCTL	'p'
 
 /* Set mode for read/write (e.g. IEEE1284_MODE_EPP) */
@@ -97,4 +100,4 @@ struct ppdev_frob_struct {
 /* only masks user-visible flags */
 #define PP_FLAGMASK	(PP_FASTWRITE | PP_FASTREAD | PP_W91284PIC)
 
-
+#endif /* _UAPI_LINUX_PPDEV_H */
-- 
cgit v1.2.3


From 89e524c04fa966330e2e80ab2bc50b9944c5847a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 30 Jul 2019 13:10:14 +0200
Subject: loop: Fix mount(2) failure due to race with LOOP_SET_FD

Commit 33ec3e53e7b1 ("loop: Don't change loop device under exclusive
opener") made LOOP_SET_FD ioctl acquire exclusive block device reference
while it updates loop device binding. However this can make perfectly
valid mount(2) fail with EBUSY due to racing LOOP_SET_FD holding
temporarily the exclusive bdev reference in cases like this:

for i in {a..z}{a..z}; do
        dd if=/dev/zero of=$i.image bs=1k count=0 seek=1024
        mkfs.ext2 $i.image
        mkdir mnt$i
done

echo "Run"
for i in {a..z}{a..z}; do
        mount -o loop -t ext2 $i.image mnt$i &
done

Fix the problem by not getting full exclusive bdev reference in
LOOP_SET_FD but instead just mark the bdev as being claimed while we
update the binding information. This just blocks new exclusive openers
instead of failing them with EBUSY thus fixing the problem.

Fixes: 33ec3e53e7b1 ("loop: Don't change loop device under exclusive opener")
Cc: stable@vger.kernel.org
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 16 +++++-----
 fs/block_dev.c       | 83 ++++++++++++++++++++++++++++++++++++----------------
 include/linux/fs.h   |  6 ++++
 3 files changed, 73 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 44c9985f352a..3036883fc9f8 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -924,6 +924,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	struct file	*file;
 	struct inode	*inode;
 	struct address_space *mapping;
+	struct block_device *claimed_bdev = NULL;
 	int		lo_flags = 0;
 	int		error;
 	loff_t		size;
@@ -942,10 +943,11 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	 * here to avoid changing device under exclusive owner.
 	 */
 	if (!(mode & FMODE_EXCL)) {
-		bdgrab(bdev);
-		error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd);
-		if (error)
+		claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+		if (IS_ERR(claimed_bdev)) {
+			error = PTR_ERR(claimed_bdev);
 			goto out_putf;
+		}
 	}
 
 	error = mutex_lock_killable(&loop_ctl_mutex);
@@ -1015,15 +1017,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	mutex_unlock(&loop_ctl_mutex);
 	if (partscan)
 		loop_reread_partitions(lo, bdev);
-	if (!(mode & FMODE_EXCL))
-		blkdev_put(bdev, mode | FMODE_EXCL);
+	if (claimed_bdev)
+		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
 	return 0;
 
 out_unlock:
 	mutex_unlock(&loop_ctl_mutex);
 out_bdev:
-	if (!(mode & FMODE_EXCL))
-		blkdev_put(bdev, mode | FMODE_EXCL);
+	if (claimed_bdev)
+		bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
 out_putf:
 	fput(file);
 out:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c2a85b587922..22591bad9353 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1181,8 +1181,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
  * Pointer to the block device containing @bdev on success, ERR_PTR()
  * value on failure.
  */
-static struct block_device *bd_start_claiming(struct block_device *bdev,
-					      void *holder)
+struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
 {
 	struct gendisk *disk;
 	struct block_device *whole;
@@ -1229,6 +1228,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
 		return ERR_PTR(err);
 	}
 }
+EXPORT_SYMBOL(bd_start_claiming);
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+	lockdep_assert_held(&bdev_lock);
+	/* tell others that we're done */
+	BUG_ON(whole->bd_claiming != holder);
+	whole->bd_claiming = NULL;
+	wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
+			void *holder)
+{
+	spin_lock(&bdev_lock);
+	BUG_ON(!bd_may_claim(bdev, whole, holder));
+	/*
+	 * Note that for a whole device bd_holders will be incremented twice,
+	 * and bd_holder will be set to bd_may_claim before being set to holder
+	 */
+	whole->bd_holders++;
+	whole->bd_holder = bd_may_claim;
+	bdev->bd_holders++;
+	bdev->bd_holder = holder;
+	bd_clear_claiming(whole, holder);
+	spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_finish_claiming);
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @whole: whole block device (returned from bd_start_claiming())
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
+		       void *holder)
+{
+	spin_lock(&bdev_lock);
+	bd_clear_claiming(whole, holder);
+	spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
 
 #ifdef CONFIG_SYSFS
 struct bd_holder_disk {
@@ -1698,29 +1753,7 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
 
 		/* finish claiming */
 		mutex_lock(&bdev->bd_mutex);
-		spin_lock(&bdev_lock);
-
-		if (!res) {
-			BUG_ON(!bd_may_claim(bdev, whole, holder));
-			/*
-			 * Note that for a whole device bd_holders
-			 * will be incremented twice, and bd_holder
-			 * will be set to bd_may_claim before being
-			 * set to holder
-			 */
-			whole->bd_holders++;
-			whole->bd_holder = bd_may_claim;
-			bdev->bd_holders++;
-			bdev->bd_holder = holder;
-		}
-
-		/* tell others that we're done */
-		BUG_ON(whole->bd_claiming != holder);
-		whole->bd_claiming = NULL;
-		wake_up_bit(&whole->bd_claiming, 0);
-
-		spin_unlock(&bdev_lock);
-
+		bd_finish_claiming(bdev, whole, holder);
 		/*
 		 * Block event polling for write claims if requested.  Any
 		 * write holder makes the write_holder state stick until
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56b8e358af5c..997a530ff4e9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2598,6 +2598,12 @@ extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
 					       void *holder);
 extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
 					      void *holder);
+extern struct block_device *bd_start_claiming(struct block_device *bdev,
+					      void *holder);
+extern void bd_finish_claiming(struct block_device *bdev,
+			       struct block_device *whole, void *holder);
+extern void bd_abort_claiming(struct block_device *bdev,
+			      struct block_device *whole, void *holder);
 extern void blkdev_put(struct block_device *bdev, fmode_t mode);
 extern int __blkdev_reread_part(struct block_device *bdev);
 extern int blkdev_reread_part(struct block_device *bdev);
-- 
cgit v1.2.3


From 7240b60c98d6309363a9f8d5a4ecd5b0626f2aff Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Tue, 30 Jul 2019 07:40:32 -0700
Subject: linux: Add skb_frag_t page_offset accessors

Add skb_frag_off(), skb_frag_off_add(), skb_frag_off_set(),
and skb_frag_off_copy() accessors for page_offset.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 67 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 718742b1c505..2957cdd6c032 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -312,7 +312,7 @@ extern int sysctl_max_skb_frags;
 typedef struct bio_vec skb_frag_t;
 
 /**
- * skb_frag_size - Returns the size of a skb fragment
+ * skb_frag_size() - Returns the size of a skb fragment
  * @frag: skb fragment
  */
 static inline unsigned int skb_frag_size(const skb_frag_t *frag)
@@ -321,7 +321,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 }
 
 /**
- * skb_frag_size_set - Sets the size of a skb fragment
+ * skb_frag_size_set() - Sets the size of a skb fragment
  * @frag: skb fragment
  * @size: size of fragment
  */
@@ -331,7 +331,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
 }
 
 /**
- * skb_frag_size_add - Incrementes the size of a skb fragment by %delta
+ * skb_frag_size_add() - Increments the size of a skb fragment by @delta
  * @frag: skb fragment
  * @delta: value to add
  */
@@ -341,7 +341,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
 }
 
 /**
- * skb_frag_size_sub - Decrements the size of a skb fragment by %delta
+ * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta
  * @frag: skb fragment
  * @delta: value to subtract
  */
@@ -2857,6 +2857,46 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
 		skb->pfmemalloc = true;
 }
 
+/**
+ * skb_frag_off() - Returns the offset of a skb fragment
+ * @frag: the paged fragment
+ */
+static inline unsigned int skb_frag_off(const skb_frag_t *frag)
+{
+	return frag->page_offset;
+}
+
+/**
+ * skb_frag_off_add() - Increments the offset of a skb fragment by @delta
+ * @frag: skb fragment
+ * @delta: value to add
+ */
+static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
+{
+	frag->page_offset += delta;
+}
+
+/**
+ * skb_frag_off_set() - Sets the offset of a skb fragment
+ * @frag: skb fragment
+ * @offset: offset of fragment
+ */
+static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
+{
+	frag->page_offset = offset;
+}
+
+/**
+ * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment
+ * @fragto: skb fragment where offset is set
+ * @fragfrom: skb fragment offset is copied from
+ */
+static inline void skb_frag_off_copy(skb_frag_t *fragto,
+				     const skb_frag_t *fragfrom)
+{
+	fragto->page_offset = fragfrom->page_offset;
+}
+
 /**
  * skb_frag_page - retrieve the page referred to by a paged fragment
  * @frag: the paged fragment
@@ -2923,7 +2963,7 @@ static inline void skb_frag_unref(struct sk_buff *skb, int f)
  */
 static inline void *skb_frag_address(const skb_frag_t *frag)
 {
-	return page_address(skb_frag_page(frag)) + frag->page_offset;
+	return page_address(skb_frag_page(frag)) + skb_frag_off(frag);
 }
 
 /**
@@ -2939,7 +2979,18 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
 	if (unlikely(!ptr))
 		return NULL;
 
-	return ptr + frag->page_offset;
+	return ptr + skb_frag_off(frag);
+}
+
+/**
+ * skb_frag_page_copy() - sets the page in a fragment from another fragment
+ * @fragto: skb fragment where page is set
+ * @fragfrom: skb fragment page is copied from
+ */
+static inline void skb_frag_page_copy(skb_frag_t *fragto,
+				      const skb_frag_t *fragfrom)
+{
+	fragto->bv_page = fragfrom->bv_page;
 }
 
 /**
@@ -2987,7 +3038,7 @@ static inline dma_addr_t skb_frag_dma_map(struct device *dev,
 					  enum dma_data_direction dir)
 {
 	return dma_map_page(dev, skb_frag_page(frag),
-			    frag->page_offset + offset, size, dir);
+			    skb_frag_off(frag) + offset, size, dir);
 }
 
 static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
@@ -3157,7 +3208,7 @@ static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
 
 		return page == skb_frag_page(frag) &&
-		       off == frag->page_offset + skb_frag_size(frag);
+		       off == skb_frag_off(frag) + skb_frag_size(frag);
 	}
 	return false;
 }
-- 
cgit v1.2.3


From 65c84f148e359ed398dcc9ed736131103f40896b Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Tue, 30 Jul 2019 07:40:34 -0700
Subject: linux: Remove bvec page_offset, use bv_offset

Now that page_offset is referenced through accessors, remove
the union, and use bv_offset.

Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bvec.h   |  5 +----
 include/linux/skbuff.h | 10 +++++-----
 2 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 7f2b2ea9399c..a032f01e928c 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -18,10 +18,7 @@
 struct bio_vec {
 	struct page	*bv_page;
 	unsigned int	bv_len;
-	union {
-		__u32		page_offset;
-		unsigned int	bv_offset;
-	};
+	unsigned int	bv_offset;
 };
 
 struct bvec_iter {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2957cdd6c032..3aef8d82ea59 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2078,7 +2078,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 	 * on page_is_pfmemalloc doing the right thing(tm).
 	 */
 	frag->bv_page		  = page;
-	frag->page_offset	  = off;
+	frag->bv_offset		  = off;
 	skb_frag_size_set(frag, size);
 
 	page = compound_head(page);
@@ -2863,7 +2863,7 @@ static inline void skb_propagate_pfmemalloc(struct page *page,
  */
 static inline unsigned int skb_frag_off(const skb_frag_t *frag)
 {
-	return frag->page_offset;
+	return frag->bv_offset;
 }
 
 /**
@@ -2873,7 +2873,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
  */
 static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
 {
-	frag->page_offset += delta;
+	frag->bv_offset += delta;
 }
 
 /**
@@ -2883,7 +2883,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
  */
 static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 {
-	frag->page_offset = offset;
+	frag->bv_offset = offset;
 }
 
 /**
@@ -2894,7 +2894,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
 static inline void skb_frag_off_copy(skb_frag_t *fragto,
 				     const skb_frag_t *fragfrom)
 {
-	fragto->page_offset = fragfrom->page_offset;
+	fragto->bv_offset = fragfrom->bv_offset;
 }
 
 /**
-- 
cgit v1.2.3


From 055d88242a6046a1ceac3167290f054c72571cd9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 30 Jul 2019 21:25:20 +0200
Subject: compat_ioctl: pppoe: fix PPPOEIOCSFWD handling

Support for handling the PPPOEIOCSFWD ioctl in compat mode was added in
linux-2.5.69 along with hundreds of other commands, but was always broken
sincen only the structure is compatible, but the command number is not,
due to the size being sizeof(size_t), or at first sizeof(sizeof((struct
sockaddr_pppox)), which is different on 64-bit architectures.

Guillaume Nault adds:

  And the implementation was broken until 2016 (see 29e73269aa4d ("pppoe:
  fix reference counting in PPPoE proxy")), and nobody ever noticed. I
  should probably have removed this ioctl entirely instead of fixing it.
  Clearly, it has never been used.

Fix it by adding a compat_ioctl handler for all pppoe variants that
translates the command number and then calls the regular ioctl function.

All other ioctl commands handled by pppoe are compatible between 32-bit
and 64-bit, and require compat_ptr() conversion.

This should apply to all stable kernels.

Acked-by: Guillaume Nault <g.nault@alphalink.fr>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/pppoe.c  |  3 +++
 drivers/net/ppp/pppox.c  | 13 +++++++++++++
 drivers/net/ppp/pptp.c   |  3 +++
 fs/compat_ioctl.c        |  3 ---
 include/linux/if_pppox.h |  3 +++
 net/l2tp/l2tp_ppp.c      |  3 +++
 6 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 1d902ecb4aa8..a44dd3c8af63 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1115,6 +1115,9 @@ static const struct proto_ops pppoe_ops = {
 	.recvmsg	= pppoe_recvmsg,
 	.mmap		= sock_no_mmap,
 	.ioctl		= pppox_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= pppox_compat_ioctl,
+#endif
 };
 
 static const struct pppox_proto pppoe_proto = {
diff --git a/drivers/net/ppp/pppox.c b/drivers/net/ppp/pppox.c
index 5ef422a43d70..08364f10a43f 100644
--- a/drivers/net/ppp/pppox.c
+++ b/drivers/net/ppp/pppox.c
@@ -17,6 +17,7 @@
 #include <linux/string.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/compat.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/net.h>
@@ -98,6 +99,18 @@ int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 EXPORT_SYMBOL(pppox_ioctl);
 
+#ifdef CONFIG_COMPAT
+int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	if (cmd == PPPOEIOCSFWD32)
+		cmd = PPPOEIOCSFWD;
+
+	return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
+}
+
+EXPORT_SYMBOL(pppox_compat_ioctl);
+#endif
+
 static int pppox_create(struct net *net, struct socket *sock, int protocol,
 			int kern)
 {
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index a8e52c8e4128..734de7de03f7 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -623,6 +623,9 @@ static const struct proto_ops pptp_ops = {
 	.recvmsg    = sock_no_recvmsg,
 	.mmap       = sock_no_mmap,
 	.ioctl      = pppox_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = pppox_compat_ioctl,
+#endif
 };
 
 static const struct pppox_proto pppox_pptp_proto = {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6e30949d9f77..a7ec2d3dff92 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -638,9 +638,6 @@ COMPATIBLE_IOCTL(PPPIOCDISCONN)
 COMPATIBLE_IOCTL(PPPIOCATTCHAN)
 COMPATIBLE_IOCTL(PPPIOCGCHAN)
 COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
-/* PPPOX */
-COMPATIBLE_IOCTL(PPPOEIOCSFWD)
-COMPATIBLE_IOCTL(PPPOEIOCDFWD)
 /* Big A */
 /* sparc only */
 /* Big Q for sound/OSS */
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 8b728750a625..69e813bcb947 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -80,6 +80,9 @@ extern int register_pppox_proto(int proto_num, const struct pppox_proto *pp);
 extern void unregister_pppox_proto(int proto_num);
 extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */
 extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+extern int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+
+#define PPPOEIOCSFWD32    _IOW(0xB1 ,0, compat_size_t)
 
 /* PPPoX socket states */
 enum {
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1d0e5904dedf..c54cb59593ef 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1681,6 +1681,9 @@ static const struct proto_ops pppol2tp_ops = {
 	.recvmsg	= pppol2tp_recvmsg,
 	.mmap		= sock_no_mmap,
 	.ioctl		= pppox_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = pppox_compat_ioctl,
+#endif
 };
 
 static const struct pppox_proto pppol2tp_proto = {
-- 
cgit v1.2.3


From b74494872555d1f7888dfd9225700a363f4a84fc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Jul 2019 20:30:49 +0200
Subject: hrtimer: Remove task argument from hrtimer_init_sleeper()

All callers hand in 'current' and that's the only task pointer which
actually makes sense. Remove the task argument and set current in the
function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.791885290@linutronix.de
---
 block/blk-mq.c                 | 2 +-
 drivers/staging/android/vsoc.c | 2 +-
 include/linux/hrtimer.h        | 3 +--
 include/linux/wait.h           | 2 +-
 kernel/futex.c                 | 2 +-
 kernel/time/hrtimer.c          | 8 ++++----
 net/core/pktgen.c              | 2 +-
 7 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b038ec680e84..5f647cb8c695 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3418,7 +3418,7 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs, current);
+	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index 00a1ec7b9154..ce480bcf20d2 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -442,7 +442,7 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
 
-		hrtimer_init_sleeper(to, current);
+		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 4971100a8cab..3c74f89367c4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -463,8 +463,7 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
-				 struct task_struct *tsk);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
 
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
 						const enum hrtimer_mode mode);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b6f77cf60dd7..d57832774ca6 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -489,7 +489,7 @@ do {										\
 	struct hrtimer_sleeper __t;						\
 										\
 	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t, current);					\
+	hrtimer_init_sleeper(&__t);						\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
diff --git a/kernel/futex.c b/kernel/futex.c
index 6d50728ef2e7..5e9842ea4012 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -490,7 +490,7 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
 			      CLOCK_REALTIME : CLOCK_MONOTONIC,
 			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout, current);
+	hrtimer_init_sleeper(timeout);
 
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5ee77f1a8a92..de895d86800c 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1639,10 +1639,10 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
 {
 	sl->timer.function = hrtimer_wakeup;
-	sl->task = task;
+	sl->task = current;
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
@@ -1669,7 +1669,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t, current);
+	hrtimer_init_sleeper(t);
 
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
@@ -1930,7 +1930,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 	hrtimer_init_on_stack(&t.timer, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
-	hrtimer_init_sleeper(&t, current);
+	hrtimer_init_sleeper(&t);
 
 	hrtimer_start_expires(&t.timer, mode);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bb9915291644..7f3cf2381f27 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2171,7 +2171,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
 		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t, current);
+		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-- 
cgit v1.2.3


From 473c7391ce731adb482c03e420964676ed8b494d Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 30 Jul 2019 17:43:30 +0200
Subject: vsock/virtio: limit the memory used per-socket

Since virtio-vsock was introduced, the buffers filled by the host
and pushed to the guest using the vring, are directly queued in
a per-socket list. These buffers are preallocated by the guest
with a fixed size (4 KB).

The maximum amount of memory used by each socket should be
controlled by the credit mechanism.
The default credit available per-socket is 256 KB, but if we use
only 1 byte per packet, the guest can queue up to 262144 of 4 KB
buffers, using up to 1 GB of memory per-socket. In addition, the
guest will continue to fill the vring with new 4 KB free buffers
to avoid starvation of other sockets.

This patch mitigates this issue copying the payload of small
packets (< 128 bytes) into the buffer of last packet queued, in
order to avoid wasting memory.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/vhost/vsock.c                   |  2 ++
 include/linux/virtio_vsock.h            |  1 +
 net/vmw_vsock/virtio_transport.c        |  1 +
 net/vmw_vsock/virtio_transport_common.c | 60 ++++++++++++++++++++++++++++-----
 4 files changed, 55 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6a50e1d0529c..6c8390a2af52 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -329,6 +329,8 @@ vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
 		return NULL;
 	}
 
+	pkt->buf_len = pkt->len;
+
 	nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
 	if (nbytes != pkt->len) {
 		vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index e223e2632edd..7d973903f52e 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -52,6 +52,7 @@ struct virtio_vsock_pkt {
 	/* socket refcnt not held, only use for cancellation */
 	struct vsock_sock *vsk;
 	void *buf;
+	u32 buf_len;
 	u32 len;
 	u32 off;
 	bool reply;
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 0815d1357861..082a30936690 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
 			break;
 		}
 
+		pkt->buf_len = buf_len;
 		pkt->len = buf_len;
 
 		sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 6f1a8aff65c5..095221f94786 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -26,6 +26,9 @@
 /* How long to wait for graceful shutdown of a connection */
 #define VSOCK_CLOSE_TIMEOUT (8 * HZ)
 
+/* Threshold for detecting small packets to copy */
+#define GOOD_COPY_LEN  128
+
 static const struct virtio_transport *virtio_transport_get_ops(void)
 {
 	const struct vsock_transport *t = vsock_core_get_transport();
@@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
 		pkt->buf = kmalloc(len, GFP_KERNEL);
 		if (!pkt->buf)
 			goto out_pkt;
+
+		pkt->buf_len = len;
+
 		err = memcpy_from_msg(pkt->buf, info->msg, len);
 		if (err)
 			goto out;
@@ -841,24 +847,60 @@ destroy:
 	return err;
 }
 
+static void
+virtio_transport_recv_enqueue(struct vsock_sock *vsk,
+			      struct virtio_vsock_pkt *pkt)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	bool free_pkt = false;
+
+	pkt->len = le32_to_cpu(pkt->hdr.len);
+	pkt->off = 0;
+
+	spin_lock_bh(&vvs->rx_lock);
+
+	virtio_transport_inc_rx_pkt(vvs, pkt);
+
+	/* Try to copy small packets into the buffer of last packet queued,
+	 * to avoid wasting memory queueing the entire buffer with a small
+	 * payload.
+	 */
+	if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
+		struct virtio_vsock_pkt *last_pkt;
+
+		last_pkt = list_last_entry(&vvs->rx_queue,
+					   struct virtio_vsock_pkt, list);
+
+		/* If there is space in the last packet queued, we copy the
+		 * new packet in its buffer.
+		 */
+		if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+			memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+			       pkt->len);
+			last_pkt->len += pkt->len;
+			free_pkt = true;
+			goto out;
+		}
+	}
+
+	list_add_tail(&pkt->list, &vvs->rx_queue);
+
+out:
+	spin_unlock_bh(&vvs->rx_lock);
+	if (free_pkt)
+		virtio_transport_free_pkt(pkt);
+}
+
 static int
 virtio_transport_recv_connected(struct sock *sk,
 				struct virtio_vsock_pkt *pkt)
 {
 	struct vsock_sock *vsk = vsock_sk(sk);
-	struct virtio_vsock_sock *vvs = vsk->trans;
 	int err = 0;
 
 	switch (le16_to_cpu(pkt->hdr.op)) {
 	case VIRTIO_VSOCK_OP_RW:
-		pkt->len = le32_to_cpu(pkt->hdr.len);
-		pkt->off = 0;
-
-		spin_lock_bh(&vvs->rx_lock);
-		virtio_transport_inc_rx_pkt(vvs, pkt);
-		list_add_tail(&pkt->list, &vvs->rx_queue);
-		spin_unlock_bh(&vvs->rx_lock);
-
+		virtio_transport_recv_enqueue(vsk, pkt);
 		sk->sk_data_ready(sk);
 		return err;
 	case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
-- 
cgit v1.2.3


From b89d882dc9fc279c8acbf1df71d51b22394186d5 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 30 Jul 2019 17:43:31 +0200
Subject: vsock/virtio: reduce credit update messages

In order to reduce the number of credit update messages,
we send them only when the space available seen by the
transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            |  1 +
 net/vmw_vsock/virtio_transport_common.c | 16 +++++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 7d973903f52e..49fc9d20bc43 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -41,6 +41,7 @@ struct virtio_vsock_sock {
 
 	/* Protected by rx_lock */
 	u32 fwd_cnt;
+	u32 last_fwd_cnt;
 	u32 rx_bytes;
 	struct list_head rx_queue;
 };
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 095221f94786..a85559d4d974 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -211,6 +211,7 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
 {
 	spin_lock_bh(&vvs->tx_lock);
+	vvs->last_fwd_cnt = vvs->fwd_cnt;
 	pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
 	pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
 	spin_unlock_bh(&vvs->tx_lock);
@@ -261,6 +262,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 	struct virtio_vsock_sock *vvs = vsk->trans;
 	struct virtio_vsock_pkt *pkt;
 	size_t bytes, total = 0;
+	u32 free_space;
 	int err = -EFAULT;
 
 	spin_lock_bh(&vvs->rx_lock);
@@ -291,11 +293,19 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 			virtio_transport_free_pkt(pkt);
 		}
 	}
+
+	free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+
 	spin_unlock_bh(&vvs->rx_lock);
 
-	/* Send a credit pkt to peer */
-	virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
-					    NULL);
+	/* We send a credit update only when the space available seen
+	 * by the transmitter is less than VIRTIO_VSOCK_MAX_PKT_BUF_SIZE
+	 */
+	if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
+		virtio_transport_send_credit_update(vsk,
+						    VIRTIO_VSOCK_TYPE_STREAM,
+						    NULL);
+	}
 
 	return total;
 
-- 
cgit v1.2.3


From 9632e9f61bc4191411c47933abe5f2d93c578f5e Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 30 Jul 2019 17:43:32 +0200
Subject: vsock/virtio: fix locking in virtio_transport_inc_tx_pkt()

fwd_cnt and last_fwd_cnt are protected by rx_lock, so we should use
the same spinlock also if we are in the TX path.

Move also buf_alloc under the same lock.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_vsock.h            | 2 +-
 net/vmw_vsock/virtio_transport_common.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 49fc9d20bc43..4c7781f4b29b 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -35,7 +35,6 @@ struct virtio_vsock_sock {
 
 	/* Protected by tx_lock */
 	u32 tx_cnt;
-	u32 buf_alloc;
 	u32 peer_fwd_cnt;
 	u32 peer_buf_alloc;
 
@@ -43,6 +42,7 @@ struct virtio_vsock_sock {
 	u32 fwd_cnt;
 	u32 last_fwd_cnt;
 	u32 rx_bytes;
+	u32 buf_alloc;
 	struct list_head rx_queue;
 };
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a85559d4d974..34a2b42313b7 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -210,11 +210,11 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
 
 void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
 {
-	spin_lock_bh(&vvs->tx_lock);
+	spin_lock_bh(&vvs->rx_lock);
 	vvs->last_fwd_cnt = vvs->fwd_cnt;
 	pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
 	pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
-	spin_unlock_bh(&vvs->tx_lock);
+	spin_unlock_bh(&vvs->rx_lock);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
 
-- 
cgit v1.2.3


From 016e43a26bab0126e33c9682f9d9d05eca9f0386 Mon Sep 17 00:00:00 2001
From: Tristram Ha <Tristram.Ha@microchip.com>
Date: Mon, 29 Jul 2019 19:49:46 +0200
Subject: net: dsa: ksz: Add KSZ8795 tag code

Add DSA tag code for Microchip KSZ8795 switch. The switch is simpler
and the tag is only 1 byte, instead of 2 as is the case with KSZ9477.

Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: David S. Miller <davem@davemloft.net>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Tristram Ha <Tristram.Ha@microchip.com>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Woojung Huh <woojung.huh@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  2 ++
 net/dsa/tag_ksz.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 1e8650fa8acc..147b757ef8ea 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -41,6 +41,7 @@ struct phylink_link_state;
 #define DSA_TAG_PROTO_TRAILER_VALUE		11
 #define DSA_TAG_PROTO_8021Q_VALUE		12
 #define DSA_TAG_PROTO_SJA1105_VALUE		13
+#define DSA_TAG_PROTO_KSZ8795_VALUE		14
 
 enum dsa_tag_protocol {
 	DSA_TAG_PROTO_NONE		= DSA_TAG_PROTO_NONE_VALUE,
@@ -57,6 +58,7 @@ enum dsa_tag_protocol {
 	DSA_TAG_PROTO_TRAILER		= DSA_TAG_PROTO_TRAILER_VALUE,
 	DSA_TAG_PROTO_8021Q		= DSA_TAG_PROTO_8021Q_VALUE,
 	DSA_TAG_PROTO_SJA1105		= DSA_TAG_PROTO_SJA1105_VALUE,
+	DSA_TAG_PROTO_KSZ8795		= DSA_TAG_PROTO_KSZ8795_VALUE,
 };
 
 struct packet_type;
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index b4872b87d4a6..73605bcbb385 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -69,6 +69,67 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
 	return skb;
 }
 
+/*
+ * For Ingress (Host -> KSZ8795), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
+ *
+ * For Egress (KSZ8795 -> Host), 1 byte is added before FCS.
+ * ---------------------------------------------------------------------------
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * ---------------------------------------------------------------------------
+ * tag0 : zero-based value represents port
+ *	  (eg, 0x00=port1, 0x02=port3, 0x06=port7)
+ */
+
+#define KSZ8795_INGRESS_TAG_LEN		1
+
+#define KSZ8795_TAIL_TAG_OVERRIDE	BIT(6)
+#define KSZ8795_TAIL_TAG_LOOKUP		BIT(7)
+
+static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct sk_buff *nskb;
+	u8 *tag;
+	u8 *addr;
+
+	nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN);
+	if (!nskb)
+		return NULL;
+
+	/* Tag encoding */
+	tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN);
+	addr = skb_mac_header(nskb);
+
+	*tag = 1 << dp->index;
+	if (is_link_local_ether_addr(addr))
+		*tag |= KSZ8795_TAIL_TAG_OVERRIDE;
+
+	return nskb;
+}
+
+static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev,
+				  struct packet_type *pt)
+{
+	u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
+
+	return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN);
+}
+
+static const struct dsa_device_ops ksz8795_netdev_ops = {
+	.name	= "ksz8795",
+	.proto	= DSA_TAG_PROTO_KSZ8795,
+	.xmit	= ksz8795_xmit,
+	.rcv	= ksz8795_rcv,
+	.overhead = KSZ8795_INGRESS_TAG_LEN,
+};
+
+DSA_TAG_DRIVER(ksz8795_netdev_ops);
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795);
+
 /*
  * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
  * ---------------------------------------------------------------------------
@@ -183,6 +244,7 @@ DSA_TAG_DRIVER(ksz9893_netdev_ops);
 MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893);
 
 static struct dsa_tag_driver *dsa_tag_driver_array[] = {
+	&DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops),
 	&DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops),
 	&DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops),
 };
-- 
cgit v1.2.3


From fce04b1ce8e326bf2cafaea4a0c91dbc3e99e403 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 30 Jul 2019 13:43:35 +0300
Subject: gpiolib: of: Reshuffle contents of consumer.h for new library layout

Kernel build bot reported a compilation error after the commit

  f626d6dfb709 ("gpio: of: Break out OF-only code"):

drivers/gpio/gpiolib-devres.o: In function `devm_gpiod_get_from_of_node':
gpiolib-devres.c:(.text+0x19a): undefined reference to `gpiod_get_from_of_node'

This happens due to move the latter under umbrella of CONFIG_OF_GPIO while
customer.h contains staled data.

Fix it by reshuffling contents of consumer.h to satisfy build dependencies.

Reported-by: kbuild test robot <lkp@intel.com>
Fixes: f626d6dfb709 ("gpio: of: Break out OF-only code"):
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20190730104337.21235-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 78 ++++++++++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 9ddcf50a3c59..6019d4d128c5 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -170,18 +170,8 @@ struct gpio_desc *gpio_to_desc(unsigned gpio);
 int desc_to_gpio(const struct gpio_desc *desc);
 
 /* Child properties interface */
-struct device_node;
 struct fwnode_handle;
 
-struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
-					 const char *propname, int index,
-					 enum gpiod_flags dflags,
-					 const char *label);
-struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
-					      struct device_node *node,
-					      const char *propname, int index,
-					      enum gpiod_flags dflags,
-					      const char *label);
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					 const char *propname, int index,
 					 enum gpiod_flags dflags,
@@ -530,28 +520,8 @@ static inline int desc_to_gpio(const struct gpio_desc *desc)
 }
 
 /* Child properties interface */
-struct device_node;
 struct fwnode_handle;
 
-static inline
-struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
-					 const char *propname, int index,
-					 enum gpiod_flags dflags,
-					 const char *label)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
-static inline
-struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
-					      struct device_node *node,
-					      const char *propname, int index,
-					      enum gpiod_flags dflags,
-					      const char *label)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
 static inline
 struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					 const char *propname, int index,
@@ -584,6 +554,54 @@ struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
 						      flags, label);
 }
 
+#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO)
+struct device_node;
+
+struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
+					 const char *propname, int index,
+					 enum gpiod_flags dflags,
+					 const char *label);
+
+#else  /* CONFIG_GPIOLIB && CONFIG_OF_GPIO */
+
+struct device_node;
+
+static inline
+struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
+					 const char *propname, int index,
+					 enum gpiod_flags dflags,
+					 const char *label)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+#endif /* CONFIG_GPIOLIB && CONFIG_OF_GPIO */
+
+#ifdef CONFIG_GPIOLIB
+struct device_node;
+
+struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
+					      struct device_node *node,
+					      const char *propname, int index,
+					      enum gpiod_flags dflags,
+					      const char *label);
+
+#else  /* CONFIG_GPIOLIB */
+
+struct device_node;
+
+static inline
+struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
+					      struct device_node *node,
+					      const char *propname, int index,
+					      enum gpiod_flags dflags,
+					      const char *label)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+#endif /* CONFIG_GPIOLIB */
+
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
 
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
-- 
cgit v1.2.3


From 2838bf941b120ec846a3903db13e319368d51b08 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 30 Jul 2019 13:43:37 +0300
Subject: gpiolib-acpi: Move acpi_dev_add_driver_gpios() et al to consumer.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The API, which belongs to GPIO library, is foreign to ACPI headers. Earlier
we moved out I²C out of the latter, and now it's time for
acpi_dev_add_driver_gpios() et al.

For time being the acpi_gpio_get_irq_resource() and acpi_dev_gpio_irq_get()
are left untouched as they need more thought about.

Note, it requires uninline acpi_dev_remove_driver_gpios() to keep purity of
consumer.h.

Cc: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Cc: Liam Girdwood <liam.r.girdwood@linux.intel.com>
Cc: Jie Yang <yang.jie@linux.intel.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: alsa-devel@alsa-project.org (moderated list:INTEL ASoC DRIVERS)
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20190730104337.21235-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c                  |  7 ++++
 include/linux/acpi.h                         | 51 -------------------------
 include/linux/gpio/consumer.h                | 57 ++++++++++++++++++++++++++++
 sound/soc/intel/boards/bytcht_cx2072x.c      |  1 +
 sound/soc/intel/boards/cht_bsw_max98090_ti.c |  1 +
 sound/soc/intel/boards/cht_bsw_rt5672.c      |  1 +
 6 files changed, 67 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index d54fc6e7c8a9..fdee8afa5339 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -383,6 +383,13 @@ int acpi_dev_add_driver_gpios(struct acpi_device *adev,
 }
 EXPORT_SYMBOL_GPL(acpi_dev_add_driver_gpios);
 
+void acpi_dev_remove_driver_gpios(struct acpi_device *adev)
+{
+	if (adev)
+		adev->driver_gpios = NULL;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_remove_driver_gpios);
+
 static void devm_acpi_dev_release_driver_gpios(struct device *dev, void *res)
 {
 	acpi_dev_remove_driver_gpios(ACPI_COMPANION(dev));
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9426b9aaed86..e40e1e27ed8e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -994,62 +994,11 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
 #endif
 #endif
 
-struct acpi_gpio_params {
-	unsigned int crs_entry_index;
-	unsigned int line_index;
-	bool active_low;
-};
-
-struct acpi_gpio_mapping {
-	const char *name;
-	const struct acpi_gpio_params *data;
-	unsigned int size;
-
-/* Ignore IoRestriction field */
-#define ACPI_GPIO_QUIRK_NO_IO_RESTRICTION	BIT(0)
-/*
- * When ACPI GPIO mapping table is in use the index parameter inside it
- * refers to the GPIO resource in _CRS method. That index has no
- * distinction of actual type of the resource. When consumer wants to
- * get GpioIo type explicitly, this quirk may be used.
- */
-#define ACPI_GPIO_QUIRK_ONLY_GPIOIO		BIT(1)
-
-	unsigned int quirks;
-};
-
 #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
-int acpi_dev_add_driver_gpios(struct acpi_device *adev,
-			      const struct acpi_gpio_mapping *gpios);
-
-static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev)
-{
-	if (adev)
-		adev->driver_gpios = NULL;
-}
-
-int devm_acpi_dev_add_driver_gpios(struct device *dev,
-				   const struct acpi_gpio_mapping *gpios);
-void devm_acpi_dev_remove_driver_gpios(struct device *dev);
-
 bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
 				struct acpi_resource_gpio **agpio);
 int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index);
 #else
-static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
-			      const struct acpi_gpio_mapping *gpios)
-{
-	return -ENXIO;
-}
-static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {}
-
-static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
-			      const struct acpi_gpio_mapping *gpios)
-{
-	return -ENXIO;
-}
-static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {}
-
 static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
 					      struct acpi_resource_gpio **agpio)
 {
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 6019d4d128c5..aac617371b60 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -602,6 +602,63 @@ struct gpio_desc *devm_gpiod_get_from_of_node(struct device *dev,
 
 #endif /* CONFIG_GPIOLIB */
 
+struct acpi_gpio_params {
+	unsigned int crs_entry_index;
+	unsigned int line_index;
+	bool active_low;
+};
+
+struct acpi_gpio_mapping {
+	const char *name;
+	const struct acpi_gpio_params *data;
+	unsigned int size;
+
+/* Ignore IoRestriction field */
+#define ACPI_GPIO_QUIRK_NO_IO_RESTRICTION	BIT(0)
+/*
+ * When ACPI GPIO mapping table is in use the index parameter inside it
+ * refers to the GPIO resource in _CRS method. That index has no
+ * distinction of actual type of the resource. When consumer wants to
+ * get GpioIo type explicitly, this quirk may be used.
+ */
+#define ACPI_GPIO_QUIRK_ONLY_GPIOIO		BIT(1)
+
+	unsigned int quirks;
+};
+
+#if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_ACPI)
+
+struct acpi_device;
+
+int acpi_dev_add_driver_gpios(struct acpi_device *adev,
+			      const struct acpi_gpio_mapping *gpios);
+void acpi_dev_remove_driver_gpios(struct acpi_device *adev);
+
+int devm_acpi_dev_add_driver_gpios(struct device *dev,
+				   const struct acpi_gpio_mapping *gpios);
+void devm_acpi_dev_remove_driver_gpios(struct device *dev);
+
+#else  /* CONFIG_GPIOLIB && CONFIG_ACPI */
+
+struct acpi_device;
+
+static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
+			      const struct acpi_gpio_mapping *gpios)
+{
+	return -ENXIO;
+}
+static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {}
+
+static inline int devm_acpi_dev_add_driver_gpios(struct device *dev,
+			      const struct acpi_gpio_mapping *gpios)
+{
+	return -ENXIO;
+}
+static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {}
+
+#endif /* CONFIG_GPIOLIB && CONFIG_ACPI */
+
+
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS)
 
 int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
diff --git a/sound/soc/intel/boards/bytcht_cx2072x.c b/sound/soc/intel/boards/bytcht_cx2072x.c
index 54ac2fd41925..67f06c95eec5 100644
--- a/sound/soc/intel/boards/bytcht_cx2072x.c
+++ b/sound/soc/intel/boards/bytcht_cx2072x.c
@@ -6,6 +6,7 @@
 
 #include <linux/acpi.h>
 #include <linux/device.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/sound/soc/intel/boards/cht_bsw_max98090_ti.c b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
index 33eb72545be6..05db311b579e 100644
--- a/sound/soc/intel/boards/cht_bsw_max98090_ti.c
+++ b/sound/soc/intel/boards/cht_bsw_max98090_ti.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/dmi.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c
index 4977b5a65eb8..9d657421730a 100644
--- a/sound/soc/intel/boards/cht_bsw_rt5672.c
+++ b/sound/soc/intel/boards/cht_bsw_rt5672.c
@@ -8,6 +8,7 @@
  *          Mengdong Lin <mengdong.lin@intel.com>
  */
 
+#include <linux/gpio/consumer.h>
 #include <linux/input.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-- 
cgit v1.2.3


From b1d45c23284e55a379f85554a27a548b7988d47a Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sat, 20 Jul 2019 19:39:43 +0900
Subject: tracing: Fix header include guards in trace event headers

These include guards are broken.

Match the #if !define() and #define lines so that they work correctly.

Link: http://lkml.kernel.org/r/20190720103943.16982-1-yamada.masahiro@socionext.com

Fixes: f54d1867005c3 ("dma-buf: Rename struct fence to dma_fence")
Fixes: 2e26ca7150a4f ("tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one header")
Fixes: e543002f77f46 ("qdisc: add tracepoint qdisc:qdisc_dequeue for dequeued SKBs")
Fixes: 95f295f9fe081 ("dmaengine: tegra: add tracepoints to driver")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/dma_fence.h     | 2 +-
 include/trace/events/napi.h          | 4 ++--
 include/trace/events/qdisc.h         | 4 ++--
 include/trace/events/tegra_apb_dma.h | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/dma_fence.h b/include/trace/events/dma_fence.h
index 2212adda8f77..64e92d56c6a8 100644
--- a/include/trace/events/dma_fence.h
+++ b/include/trace/events/dma_fence.h
@@ -2,7 +2,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM dma_fence
 
-#if !defined(_TRACE_FENCE_H) || defined(TRACE_HEADER_MULTI_READ)
+#if !defined(_TRACE_DMA_FENCE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_DMA_FENCE_H
 
 #include <linux/tracepoint.h>
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index f3a12566bed0..6678cf8b235b 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -3,7 +3,7 @@
 #define TRACE_SYSTEM napi
 
 #if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_NAPI_H_
+#define _TRACE_NAPI_H
 
 #include <linux/netdevice.h>
 #include <linux/tracepoint.h>
@@ -38,7 +38,7 @@ TRACE_EVENT(napi_poll,
 
 #undef NO_DEV
 
-#endif /* _TRACE_NAPI_H_ */
+#endif /* _TRACE_NAPI_H */
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index 60d0d8bd336d..0d1a9ebf55ba 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -2,7 +2,7 @@
 #define TRACE_SYSTEM qdisc
 
 #if !defined(_TRACE_QDISC_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_QDISC_H_
+#define _TRACE_QDISC_H
 
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
@@ -44,7 +44,7 @@ TRACE_EVENT(qdisc_dequeue,
 		  __entry->txq_state, __entry->packets, __entry->skbaddr )
 );
 
-#endif /* _TRACE_QDISC_H_ */
+#endif /* _TRACE_QDISC_H */
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h
index 0818f6286110..971cd02d2daf 100644
--- a/include/trace/events/tegra_apb_dma.h
+++ b/include/trace/events/tegra_apb_dma.h
@@ -1,5 +1,5 @@
 #if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_TEGRA_APM_DMA_H
+#define _TRACE_TEGRA_APB_DMA_H
 
 #include <linux/tracepoint.h>
 #include <linux/dmaengine.h>
@@ -55,7 +55,7 @@ TRACE_EVENT(tegra_dma_isr,
 	TP_printk("%s: irq %d\n",  __get_str(chan), __entry->irq)
 );
 
-#endif /*  _TRACE_TEGRADMA_H */
+#endif /* _TRACE_TEGRA_APB_DMA_H */
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
-- 
cgit v1.2.3


From 9349d600fb6a1ca0aaeb515523e1bb5409483d76 Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Mon, 29 Jul 2019 09:59:14 -0700
Subject: tcp: add skb-less helpers to retrieve SYN cookie

This patch allows generation of a SYN cookie before an SKB has been
allocated, as is the case at XDP.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/tcp.h    | 10 +++++++
 net/ipv4/tcp_input.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c  | 15 +++++++++++
 net/ipv6/tcp_ipv6.c  | 15 +++++++++++
 4 files changed, 113 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e5cf514ba118..fb7e153aecc5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -414,6 +414,16 @@ void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       int estab, struct tcp_fastopen_cookie *foc);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
+/*
+ *	BPF SKB-less helpers
+ */
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+			 struct tcphdr *th, u32 *cookie);
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+			 struct tcphdr *th, u32 *cookie);
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+			  const struct tcp_request_sock_ops *af_ops,
+			  struct sock *sk, struct tcphdr *th);
 /*
  *	TCP v4 functions exported for the inet6 API
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8892df6de1d4..706cbb3b2986 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th,
 #endif
 }
 
+/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped
+ * value on success.
+ */
+static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss)
+{
+	const unsigned char *ptr = (const unsigned char *)(th + 1);
+	int length = (th->doff * 4) - sizeof(struct tcphdr);
+	u16 mss = 0;
+
+	while (length > 0) {
+		int opcode = *ptr++;
+		int opsize;
+
+		switch (opcode) {
+		case TCPOPT_EOL:
+			return mss;
+		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
+			length--;
+			continue;
+		default:
+			if (length < 2)
+				return mss;
+			opsize = *ptr++;
+			if (opsize < 2) /* "silly options" */
+				return mss;
+			if (opsize > length)
+				return mss;	/* fail on partial options */
+			if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) {
+				u16 in_mss = get_unaligned_be16(ptr);
+
+				if (in_mss) {
+					if (user_mss && user_mss < in_mss)
+						in_mss = user_mss;
+					mss = in_mss;
+				}
+			}
+			ptr += opsize - 2;
+			length -= opsize;
+		}
+	}
+	return mss;
+}
+
 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
@@ -6464,6 +6507,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk,
 	}
 }
 
+/* If a SYN cookie is required and supported, returns a clamped MSS value to be
+ * used for SYN cookie generation.
+ */
+u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
+			  const struct tcp_request_sock_ops *af_ops,
+			  struct sock *sk, struct tcphdr *th)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u16 mss;
+
+	if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+	    !inet_csk_reqsk_queue_is_full(sk))
+		return 0;
+
+	if (!tcp_syn_flood_action(sk, rsk_ops->slab_name))
+		return 0;
+
+	if (sk_acceptq_is_full(sk)) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
+		return 0;
+	}
+
+	mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss);
+	if (!mss)
+		mss = af_ops->mss_clamp;
+
+	return mss;
+}
+EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss);
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d57641cb3477..10217393cda6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }
 
+u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp_request_sock_ops,
+				    &tcp_request_sock_ipv4_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v4_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 /* The socket must have it's spinlock held when we get
  * here, unless it is a TCP_LISTEN socket.
  *
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5da069e91cac..87f44d3250ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
 	return sk;
 }
 
+u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
+			 struct tcphdr *th, u32 *cookie)
+{
+	u16 mss = 0;
+#ifdef CONFIG_SYN_COOKIES
+	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
+				    &tcp_request_sock_ipv6_ops, sk, th);
+	if (mss) {
+		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
+		tcp_synq_overflow(sk);
+	}
+#endif
+	return mss;
+}
+
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	if (skb->protocol == htons(ETH_P_IP))
-- 
cgit v1.2.3


From 70d66244317e958092e9c971b08dd5b7fd29d9cb Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Mon, 29 Jul 2019 09:59:15 -0700
Subject: bpf: add bpf_tcp_gen_syncookie helper

This helper function allows BPF programs to try to generate SYN
cookies, given a reference to a listener socket. The function works
from XDP and with an skb context since bpf_skc_lookup_tcp can lookup a
socket in both cases.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 30 +++++++++++++++++++-
 net/core/filter.c        | 73 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6bbef0c7f585..4393bd4b2419 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2714,6 +2714,33 @@ union bpf_attr {
  *		**-EPERM** if no permission to send the *sig*.
  *
  *		**-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ *	Description
+ *		Try to issue a SYN cookie for the packet with corresponding
+ *		IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ *		*iph* points to the start of the IPv4 or IPv6 header, while
+ *		*iph_len* contains **sizeof**\ (**struct iphdr**) or
+ *		**sizeof**\ (**struct ip6hdr**).
+ *
+ *		*th* points to the start of the TCP header, while *th_len*
+ *		contains the length of the TCP header.
+ *
+ *	Return
+ *		On success, lower 32 bits hold the generated SYN cookie in
+ *		followed by 16 bits which hold the MSS value for that cookie,
+ *		and the top 16 bits are unused.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EINVAL** SYN cookie cannot be issued due to error
+ *
+ *		**-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ *		**-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ *		**-EPROTONOSUPPORT** IP packet version is not 4 or 6
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2825,7 +2852,8 @@ union bpf_attr {
 	FN(strtoul),			\
 	FN(sk_storage_get),		\
 	FN(sk_storage_delete),		\
-	FN(send_signal),
+	FN(send_signal),		\
+	FN(tcp_gen_syncookie),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 1eee70f80fba..5a2707918629 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5855,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
 	.arg5_type	= ARG_CONST_SIZE,
 };
 
+BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
+	   struct tcphdr *, th, u32, th_len)
+{
+#ifdef CONFIG_SYN_COOKIES
+	u32 cookie;
+	u16 mss;
+
+	if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
+		return -EINVAL;
+
+	if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
+		return -EINVAL;
+
+	if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+		return -ENOENT;
+
+	if (!th->syn || th->ack || th->fin || th->rst)
+		return -EINVAL;
+
+	if (unlikely(iph_len < sizeof(struct iphdr)))
+		return -EINVAL;
+
+	/* Both struct iphdr and struct ipv6hdr have the version field at the
+	 * same offset so we can cast to the shorter header (struct iphdr).
+	 */
+	switch (((struct iphdr *)iph)->version) {
+	case 4:
+		if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
+			return -EINVAL;
+
+		mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
+		break;
+
+#if IS_BUILTIN(CONFIG_IPV6)
+	case 6:
+		if (unlikely(iph_len < sizeof(struct ipv6hdr)))
+			return -EINVAL;
+
+		if (sk->sk_family != AF_INET6)
+			return -EINVAL;
+
+		mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
+		break;
+#endif /* CONFIG_IPV6 */
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+	if (mss <= 0)
+		return -ENOENT;
+
+	return cookie | ((u64)mss << 32);
+#else
+	return -EOPNOTSUPP;
+#endif /* CONFIG_SYN_COOKIES */
+}
+
+static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
+	.func		= bpf_tcp_gen_syncookie,
+	.gpl_only	= true, /* __cookie_v*_init_sequence() is GPL */
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
 #endif /* CONFIG_INET */
 
 bool bpf_helper_changes_pkt_data(void *func)
@@ -6144,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_tcp_check_syncookie_proto;
 	case BPF_FUNC_skb_ecn_set_ce:
 		return &bpf_skb_ecn_set_ce_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
@@ -6183,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_skc_lookup_tcp_proto;
 	case BPF_FUNC_tcp_check_syncookie:
 		return &bpf_tcp_check_syncookie_proto;
+	case BPF_FUNC_tcp_gen_syncookie:
+		return &bpf_tcp_gen_syncookie_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
-- 
cgit v1.2.3


From a78d14a31666c636a9e00a589032119fb59e3b94 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 09:46:29 +0200
Subject: xen: avoid link error on ARM

Building the privcmd code as a loadable module on ARM, we get
a link error due to the private cache management functions:

ERROR: "__sync_icache_dcache" [drivers/xen/xen-privcmd.ko] undefined!

Move the code into a new that is always built in when Xen is enabled,
as suggested by Juergen Gross and Boris Ostrovsky.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/privcmd.c   | 35 +++++------------------------------
 drivers/xen/xlate_mmu.c | 32 ++++++++++++++++++++++++++++++++
 include/xen/xen-ops.h   |  3 +++
 3 files changed, 40 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 2f5ce7230a43..c6070e70dd73 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -724,25 +724,6 @@ static long privcmd_ioctl_restrict(struct file *file, void __user *udata)
 	return 0;
 }
 
-struct remap_pfn {
-	struct mm_struct *mm;
-	struct page **pages;
-	pgprot_t prot;
-	unsigned long i;
-};
-
-static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data)
-{
-	struct remap_pfn *r = data;
-	struct page *page = r->pages[r->i];
-	pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot));
-
-	set_pte_at(r->mm, addr, ptep, pte);
-	r->i++;
-
-	return 0;
-}
-
 static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 {
 	struct privcmd_data *data = file->private_data;
@@ -774,7 +755,8 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 		goto out;
 	}
 
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+	if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
+	    xen_feature(XENFEAT_auto_translated_physmap)) {
 		unsigned int nr = DIV_ROUND_UP(kdata.num, XEN_PFN_PER_PAGE);
 		struct page **pages;
 		unsigned int i;
@@ -808,16 +790,9 @@ static long privcmd_ioctl_mmap_resource(struct file *file, void __user *udata)
 	if (rc)
 		goto out;
 
-	if (xen_feature(XENFEAT_auto_translated_physmap)) {
-		struct remap_pfn r = {
-			.mm = vma->vm_mm,
-			.pages = vma->vm_private_data,
-			.prot = vma->vm_page_prot,
-		};
-
-		rc = apply_to_page_range(r.mm, kdata.addr,
-					 kdata.num << PAGE_SHIFT,
-					 remap_pfn_fn, &r);
+	if (IS_ENABLED(CONFIG_XEN_AUTO_XLATE) &&
+	    xen_feature(XENFEAT_auto_translated_physmap)) {
+		rc = xen_remap_vma_range(vma, kdata.addr, kdata.num << PAGE_SHIFT);
 	} else {
 		unsigned int domid =
 			(xdata.flags & XENMEM_rsrc_acq_caller_owned) ?
diff --git a/drivers/xen/xlate_mmu.c b/drivers/xen/xlate_mmu.c
index ba883a80b3c0..7b1077f0abcb 100644
--- a/drivers/xen/xlate_mmu.c
+++ b/drivers/xen/xlate_mmu.c
@@ -262,3 +262,35 @@ int __init xen_xlate_map_ballooned_pages(xen_pfn_t **gfns, void **virt,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages);
+
+struct remap_pfn {
+	struct mm_struct *mm;
+	struct page **pages;
+	pgprot_t prot;
+	unsigned long i;
+};
+
+static int remap_pfn_fn(pte_t *ptep, unsigned long addr, void *data)
+{
+	struct remap_pfn *r = data;
+	struct page *page = r->pages[r->i];
+	pte_t pte = pte_mkspecial(pfn_pte(page_to_pfn(page), r->prot));
+
+	set_pte_at(r->mm, addr, ptep, pte);
+	r->i++;
+
+	return 0;
+}
+
+/* Used by the privcmd module, but has to be built-in on ARM */
+int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr, unsigned long len)
+{
+	struct remap_pfn r = {
+		.mm = vma->vm_mm,
+		.pages = vma->vm_private_data,
+		.prot = vma->vm_page_prot,
+	};
+
+	return apply_to_page_range(vma->vm_mm, addr, len, remap_pfn_fn, &r);
+}
+EXPORT_SYMBOL_GPL(xen_remap_vma_range);
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 4969817124a8..98b30c1613b2 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -109,6 +109,9 @@ static inline int xen_xlate_unmap_gfn_range(struct vm_area_struct *vma,
 }
 #endif
 
+int xen_remap_vma_range(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long len);
+
 /*
  * xen_remap_domain_gfn_array() - map an array of foreign frames by gfn
  * @vma:     VMA to map the pages into
-- 
cgit v1.2.3


From 796e90f42b7e52cf1c88e978e1d5ee69c102d85d Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 30 Jul 2019 18:37:00 +0200
Subject: cfg80211: add support for parsing OBBS_PD attributes

Add the data structure, policy and parsing code allowing userland to send
the OBSS PD information into the kernel.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190730163701.18836-2-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 15 +++++++++++++++
 include/uapi/linux/nl80211.h | 27 ++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 45 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45850a8391d9..35ec1f0a2bf9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -246,6 +246,19 @@ struct ieee80211_rate {
 	u16 hw_value, hw_value_short;
 };
 
+/**
+ * struct ieee80211_he_obss_pd - AP settings for spatial reuse
+ *
+ * @enable: is the feature enabled.
+ * @min_offset: minimal tx power offset an associated station shall use
+ * @max_offset: maximum tx power offset an associated station shall use
+ */
+struct ieee80211_he_obss_pd {
+	bool enable;
+	u8 min_offset;
+	u8 max_offset;
+};
+
 /**
  * struct ieee80211_sta_ht_cap - STA's HT capabilities
  *
@@ -896,6 +909,7 @@ enum cfg80211_ap_settings_flags {
  * @vht_required: stations must support VHT
  * @twt_responder: Enable Target Wait Time
  * @flags: flags, as defined in enum cfg80211_ap_settings_flags
+ * @he_obss_pd: OBSS Packet Detection settings
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -923,6 +937,7 @@ struct cfg80211_ap_settings {
 	bool ht_required, vht_required;
 	bool twt_responder;
 	u32 flags;
+	struct ieee80211_he_obss_pd he_obss_pd;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c45587c2cf44..822851d369ab 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2358,6 +2358,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_TWT_RESPONDER: Enable target wait time responder support.
  *
+ * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection
+ *	functionality.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2815,6 +2818,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TWT_RESPONDER,
 
+	NL80211_ATTR_HE_OBSS_PD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -6490,4 +6495,26 @@ enum nl80211_peer_measurement_ftm_resp {
 	NL80211_PMSR_FTM_RESP_ATTR_MAX = NUM_NL80211_PMSR_FTM_RESP_ATTR - 1
 };
 
+/**
+ * enum nl80211_obss_pd_attributes - OBSS packet detection attributes
+ * @__NL80211_HE_OBSS_PD_ATTR_INVALID: Invalid
+ *
+ * @NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET: the OBSS PD minimum tx power offset.
+ * @NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET: the OBSS PD maximum tx power offset.
+ *
+ * @__NL80211_HE_OBSS_PD_ATTR_LAST: Internal
+ * @NL80211_HE_OBSS_PD_ATTR_MAX: highest OBSS PD attribute.
+ */
+enum nl80211_obss_pd_attributes {
+	__NL80211_HE_OBSS_PD_ATTR_INVALID,
+
+	NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET,
+	NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET,
+
+	/* keep last */
+	__NL80211_HE_OBSS_PD_ATTR_LAST,
+	NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1,
+};
+
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e78ed45a759..3006cfce7158 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -281,6 +281,14 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = {
 		NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy),
 };
 
+static const struct nla_policy
+he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = {
+	[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] =
+		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+	[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] =
+		NLA_POLICY_RANGE(NLA_U8, 1, 20),
+};
+
 const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
@@ -574,6 +582,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY,
 					.len = SAE_PASSWORD_MAX_LEN },
 	[NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG },
+	[NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy),
 };
 
 /* policy for the key attributes */
@@ -4405,6 +4414,34 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
+static int nl80211_parse_he_obss_pd(struct nlattr *attrs,
+				    struct ieee80211_he_obss_pd *he_obss_pd)
+{
+	struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1];
+	int err;
+
+	err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs,
+			       he_obss_pd_policy, NULL);
+	if (err)
+		return err;
+
+	if (!tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] ||
+	    !tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET])
+		return -EINVAL;
+
+	he_obss_pd->min_offset =
+		nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]);
+	he_obss_pd->max_offset =
+		nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]);
+
+	if (he_obss_pd->min_offset >= he_obss_pd->max_offset)
+		return -EINVAL;
+
+	he_obss_pd->enable = true;
+
+	return 0;
+}
+
 static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 					    const u8 *rates)
 {
@@ -4689,6 +4726,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 	params.twt_responder =
 		    nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]);
 
+	if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) {
+		err = nl80211_parse_he_obss_pd(
+					info->attrs[NL80211_ATTR_HE_OBSS_PD],
+					&params.he_obss_pd);
+		if (err)
+			return err;
+	}
+
 	nl80211_calculate_ap_params(&params);
 
 	if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])
-- 
cgit v1.2.3


From 1ced169cc1c2f3e054fa14974443383ee02a8b6a Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 30 Jul 2019 18:37:01 +0200
Subject: mac80211: allow setting spatial reuse parameters from bss_conf

Store the OBSS PD parameters inside bss_conf when bringing up an AP and/or
when a station connects to an AP. This allows the driver to configure the
HW accordingly.

Signed-off-by: John Crispin <john@phrozen.org>
Link: https://lore.kernel.org/r/20190730163701.18836-3-john@phrozen.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  4 ++++
 net/mac80211/cfg.c         |  5 ++++-
 net/mac80211/he.c          | 24 ++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/mlme.c        |  1 +
 5 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bd91388797fc..6781d4637557 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -315,6 +315,7 @@ struct ieee80211_vif_chanctx_switch {
  * @BSS_CHANGED_FTM_RESPONDER: fime timing reasurement request responder
  *	functionality changed for this BSS (AP mode).
  * @BSS_CHANGED_TWT: TWT status changed
+ * @BSS_CHANGED_HE_OBSS_PD: OBSS Packet Detection status changed.
  *
  */
 enum ieee80211_bss_change {
@@ -346,6 +347,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_MCAST_RATE		= 1<<25,
 	BSS_CHANGED_FTM_RESPONDER	= 1<<26,
 	BSS_CHANGED_TWT			= 1<<27,
+	BSS_CHANGED_HE_OBSS_PD		= 1<<28,
 
 	/* when adding here, make sure to change ieee80211_reconfig */
 };
@@ -601,6 +603,7 @@ struct ieee80211_ftm_responder_params {
  * @profile_periodicity: the least number of beacon frames need to be received
  *	in order to discover all the nontransmitted BSSIDs in the set.
  * @he_operation: HE operation information of the AP we are connected to
+ * @he_obss_pd: OBSS Packet Detection parameters.
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -663,6 +666,7 @@ struct ieee80211_bss_conf {
 	bool ema_ap;
 	u8 profile_periodicity;
 	struct ieee80211_he_operation he_operation;
+	struct ieee80211_he_obss_pd he_obss_pd;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 10b99b263c4f..ed56b0c6fe19 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -980,7 +980,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 		      BSS_CHANGED_SSID |
 		      BSS_CHANGED_P2P_PS |
 		      BSS_CHANGED_TXPOWER |
-		      BSS_CHANGED_TWT;
+		      BSS_CHANGED_TWT |
+		      BSS_CHANGED_HE_OBSS_PD;
 	int err;
 	int prev_beacon_int;
 
@@ -1051,6 +1052,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 	sdata->vif.bss_conf.enable_beacon = true;
 	sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p;
 	sdata->vif.bss_conf.twt_responder = params->twt_responder;
+	memcpy(&sdata->vif.bss_conf.he_obss_pd, &params->he_obss_pd,
+	       sizeof(struct ieee80211_he_obss_pd));
 
 	sdata->vif.bss_conf.ssid_len = params->ssid_len;
 	if (params->ssid_len)
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index f910f730ad0d..a02abfc424aa 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -65,3 +65,27 @@ ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
 
 	vif->bss_conf.he_operation = *he_op_ie_elem;
 }
+
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+				const struct ieee80211_he_spr *he_spr_ie_elem)
+{
+	struct ieee80211_he_obss_pd *he_obss_pd =
+					&vif->bss_conf.he_obss_pd;
+	const u8 *data = he_spr_ie_elem->optional;
+
+	memset(he_obss_pd, 0, sizeof(*he_obss_pd));
+
+	if (!he_spr_ie_elem)
+		return;
+
+	if (he_spr_ie_elem->he_sr_control &
+	    IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT)
+		data++;
+	if (he_spr_ie_elem->he_sr_control &
+	    IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) {
+		he_obss_pd->max_offset = *data++;
+		he_obss_pd->min_offset = *data++;
+		he_obss_pd->enable = true;
+	}
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 38769f5c3da4..791ce58d0f09 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1873,6 +1873,9 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_supported_band *sband,
 				  const u8 *he_cap_ie, u8 he_cap_len,
 				  struct sta_info *sta);
+void
+ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
+				const struct ieee80211_he_spr *he_spr_ie_elem);
 
 void
 ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2b8a7428973d..225633d9e2d4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3382,6 +3382,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 			bss_conf->uora_ocw_range = elems.uora_element[0];
 
 		ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation);
+		ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr);
 		/* TODO: OPEN: what happens if BSS color disable is set? */
 	}
 
-- 
cgit v1.2.3


From 7dcddef6f769d7e60691c732eb6d09cdb1d9df76 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 31 Jul 2019 15:29:52 +1000
Subject: cpuidle: header file stubs must be "static inline"

An x86_64 allmodconfig build produces these errors:

x86_64-linux-gnu-ld: kernel/sched/core.o: in function `cpuidle_poll_time':
core.c:(.text+0x230): multiple definition of `cpuidle_poll_time'; arch/x86/=
kernel/process.o:process.c:(.text+0xc0): first defined here

(and more)

Fixes: 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpuidle.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index ba535a1a47d5..1a9f54eb3aa1 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -170,7 +170,7 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev, int index)
 {return -ENODEV; }
 static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { }
-extern u64 cpuidle_poll_time(struct cpuidle_driver *drv,
+static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv,
 			     struct cpuidle_device *dev)
 {return 0; }
 static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
-- 
cgit v1.2.3


From 82cb54856874b1b374f18420be013ff4057700a9 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 29 Jul 2019 12:55:21 +0300
Subject: asm-generic: make simd.h a mandatory include/asm header

The generic aegis128 software crypto driver recently gained support
for using SIMD intrinsics to increase performance, for which it
uncondionally #include's the <asm/simd.h> header. Unfortunately,
this header does not exist on many architectures, resulting in
build failures.

Since asm-generic already has a version of simd.h, let's make it
a mandatory header so that it gets instantiated on all architectures
that don't provide their own version.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/asm-generic/Kbuild | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 6f4536d70b8e..adff14fcb8e4 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -3,3 +3,5 @@
 # asm headers that all architectures except um should have
 # (This file is not included when SRCARCH=um since UML borrows several
 # asm headers from the host architecutre.)
+
+mandatory-y += simd.h
-- 
cgit v1.2.3


From 63643b5902c4bf096b504b0563f5426ba5baef15 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 29 Jul 2019 10:51:47 -0500
Subject: ASoC: Intel: Skylake: move NHLT header to common directory

Prepare move from NHLT code to common directory, starting with header.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/intel-nhlt.h             | 119 +++++++++++++++++++++++++++++++++
 sound/soc/intel/skylake/skl-nhlt.c     |   1 +
 sound/soc/intel/skylake/skl-nhlt.h     | 119 ---------------------------------
 sound/soc/intel/skylake/skl-ssp-clk.c  |   1 +
 sound/soc/intel/skylake/skl-topology.c |   1 +
 sound/soc/intel/skylake/skl.h          |   1 -
 6 files changed, 122 insertions(+), 120 deletions(-)
 create mode 100644 include/sound/intel-nhlt.h
 delete mode 100644 sound/soc/intel/skylake/skl-nhlt.h

(limited to 'include')

diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
new file mode 100644
index 000000000000..f85fbf9c7ce4
--- /dev/null
+++ b/include/sound/intel-nhlt.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  skl-nhlt.h - Intel HDA Platform NHLT header
+ *
+ *  Copyright (C) 2015 Intel Corp
+ *  Author: Sanjiv Kumar <sanjiv.kumar@intel.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef __SKL_NHLT_H__
+#define __SKL_NHLT_H__
+
+#include <linux/acpi.h>
+
+struct wav_fmt {
+	u16 fmt_tag;
+	u16 channels;
+	u32 samples_per_sec;
+	u32 avg_bytes_per_sec;
+	u16 block_align;
+	u16 bits_per_sample;
+	u16 cb_size;
+} __packed;
+
+struct wav_fmt_ext {
+	struct wav_fmt fmt;
+	union samples {
+		u16 valid_bits_per_sample;
+		u16 samples_per_block;
+		u16 reserved;
+	} sample;
+	u32 channel_mask;
+	u8 sub_fmt[16];
+} __packed;
+
+enum nhlt_link_type {
+	NHLT_LINK_HDA = 0,
+	NHLT_LINK_DSP = 1,
+	NHLT_LINK_DMIC = 2,
+	NHLT_LINK_SSP = 3,
+	NHLT_LINK_INVALID
+};
+
+enum nhlt_device_type {
+	NHLT_DEVICE_BT = 0,
+	NHLT_DEVICE_DMIC = 1,
+	NHLT_DEVICE_I2S = 4,
+	NHLT_DEVICE_INVALID
+};
+
+struct nhlt_specific_cfg {
+	u32 size;
+	u8 caps[0];
+} __packed;
+
+struct nhlt_fmt_cfg {
+	struct wav_fmt_ext fmt_ext;
+	struct nhlt_specific_cfg config;
+} __packed;
+
+struct nhlt_fmt {
+	u8 fmt_count;
+	struct nhlt_fmt_cfg fmt_config[0];
+} __packed;
+
+struct nhlt_endpoint {
+	u32  length;
+	u8   linktype;
+	u8   instance_id;
+	u16  vendor_id;
+	u16  device_id;
+	u16  revision_id;
+	u32  subsystem_id;
+	u8   device_type;
+	u8   direction;
+	u8   virtual_bus_id;
+	struct nhlt_specific_cfg config;
+} __packed;
+
+struct nhlt_acpi_table {
+	struct acpi_table_header header;
+	u8 endpoint_count;
+	struct nhlt_endpoint desc[0];
+} __packed;
+
+struct nhlt_resource_desc  {
+	u32 extra;
+	u16 flags;
+	u64 addr_spc_gra;
+	u64 min_addr;
+	u64 max_addr;
+	u64 addr_trans_offset;
+	u64 length;
+} __packed;
+
+#define MIC_ARRAY_2CH 2
+#define MIC_ARRAY_4CH 4
+
+struct nhlt_tdm_config {
+	u8 virtual_slot;
+	u8 config_type;
+} __packed;
+
+struct nhlt_dmic_array_config {
+	struct nhlt_tdm_config tdm_config;
+	u8 array_type;
+} __packed;
+
+enum {
+	NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
+	NHLT_MIC_ARRAY_2CH_BIG = 0xb,
+	NHLT_MIC_ARRAY_4CH_1ST_GEOM = 0xc,
+	NHLT_MIC_ARRAY_4CH_L_SHAPED = 0xd,
+	NHLT_MIC_ARRAY_4CH_2ND_GEOM = 0xe,
+	NHLT_MIC_ARRAY_VENDOR_DEFINED = 0xf,
+};
+
+#endif
diff --git a/sound/soc/intel/skylake/skl-nhlt.c b/sound/soc/intel/skylake/skl-nhlt.c
index 1132109cb992..aabc5d71650e 100644
--- a/sound/soc/intel/skylake/skl-nhlt.c
+++ b/sound/soc/intel/skylake/skl-nhlt.c
@@ -9,6 +9,7 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 #include <linux/pci.h>
+#include <sound/intel-nhlt.h>
 #include "skl.h"
 #include "skl-i2s.h"
 
diff --git a/sound/soc/intel/skylake/skl-nhlt.h b/sound/soc/intel/skylake/skl-nhlt.h
deleted file mode 100644
index f85fbf9c7ce4..000000000000
--- a/sound/soc/intel/skylake/skl-nhlt.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  skl-nhlt.h - Intel HDA Platform NHLT header
- *
- *  Copyright (C) 2015 Intel Corp
- *  Author: Sanjiv Kumar <sanjiv.kumar@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- */
-#ifndef __SKL_NHLT_H__
-#define __SKL_NHLT_H__
-
-#include <linux/acpi.h>
-
-struct wav_fmt {
-	u16 fmt_tag;
-	u16 channels;
-	u32 samples_per_sec;
-	u32 avg_bytes_per_sec;
-	u16 block_align;
-	u16 bits_per_sample;
-	u16 cb_size;
-} __packed;
-
-struct wav_fmt_ext {
-	struct wav_fmt fmt;
-	union samples {
-		u16 valid_bits_per_sample;
-		u16 samples_per_block;
-		u16 reserved;
-	} sample;
-	u32 channel_mask;
-	u8 sub_fmt[16];
-} __packed;
-
-enum nhlt_link_type {
-	NHLT_LINK_HDA = 0,
-	NHLT_LINK_DSP = 1,
-	NHLT_LINK_DMIC = 2,
-	NHLT_LINK_SSP = 3,
-	NHLT_LINK_INVALID
-};
-
-enum nhlt_device_type {
-	NHLT_DEVICE_BT = 0,
-	NHLT_DEVICE_DMIC = 1,
-	NHLT_DEVICE_I2S = 4,
-	NHLT_DEVICE_INVALID
-};
-
-struct nhlt_specific_cfg {
-	u32 size;
-	u8 caps[0];
-} __packed;
-
-struct nhlt_fmt_cfg {
-	struct wav_fmt_ext fmt_ext;
-	struct nhlt_specific_cfg config;
-} __packed;
-
-struct nhlt_fmt {
-	u8 fmt_count;
-	struct nhlt_fmt_cfg fmt_config[0];
-} __packed;
-
-struct nhlt_endpoint {
-	u32  length;
-	u8   linktype;
-	u8   instance_id;
-	u16  vendor_id;
-	u16  device_id;
-	u16  revision_id;
-	u32  subsystem_id;
-	u8   device_type;
-	u8   direction;
-	u8   virtual_bus_id;
-	struct nhlt_specific_cfg config;
-} __packed;
-
-struct nhlt_acpi_table {
-	struct acpi_table_header header;
-	u8 endpoint_count;
-	struct nhlt_endpoint desc[0];
-} __packed;
-
-struct nhlt_resource_desc  {
-	u32 extra;
-	u16 flags;
-	u64 addr_spc_gra;
-	u64 min_addr;
-	u64 max_addr;
-	u64 addr_trans_offset;
-	u64 length;
-} __packed;
-
-#define MIC_ARRAY_2CH 2
-#define MIC_ARRAY_4CH 4
-
-struct nhlt_tdm_config {
-	u8 virtual_slot;
-	u8 config_type;
-} __packed;
-
-struct nhlt_dmic_array_config {
-	struct nhlt_tdm_config tdm_config;
-	u8 array_type;
-} __packed;
-
-enum {
-	NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
-	NHLT_MIC_ARRAY_2CH_BIG = 0xb,
-	NHLT_MIC_ARRAY_4CH_1ST_GEOM = 0xc,
-	NHLT_MIC_ARRAY_4CH_L_SHAPED = 0xd,
-	NHLT_MIC_ARRAY_4CH_2ND_GEOM = 0xe,
-	NHLT_MIC_ARRAY_VENDOR_DEFINED = 0xf,
-};
-
-#endif
diff --git a/sound/soc/intel/skylake/skl-ssp-clk.c b/sound/soc/intel/skylake/skl-ssp-clk.c
index 5bb6e40d4d3e..5bfcd46452f9 100644
--- a/sound/soc/intel/skylake/skl-ssp-clk.c
+++ b/sound/soc/intel/skylake/skl-ssp-clk.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
+#include <sound/intel-nhlt.h>
 #include "skl.h"
 #include "skl-ssp-clk.h"
 #include "skl-topology.h"
diff --git a/sound/soc/intel/skylake/skl-topology.c b/sound/soc/intel/skylake/skl-topology.c
index 6241e35213af..f8a501cf5fbd 100644
--- a/sound/soc/intel/skylake/skl-topology.c
+++ b/sound/soc/intel/skylake/skl-topology.c
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/firmware.h>
 #include <linux/uuid.h>
+#include <sound/intel-nhlt.h>
 #include <sound/soc.h>
 #include <sound/soc-topology.h>
 #include <uapi/sound/snd_sst_tokens.h>
diff --git a/sound/soc/intel/skylake/skl.h b/sound/soc/intel/skylake/skl.h
index 6070666a6392..928e8115a1a7 100644
--- a/sound/soc/intel/skylake/skl.h
+++ b/sound/soc/intel/skylake/skl.h
@@ -16,7 +16,6 @@
 #include <sound/hdaudio_ext.h>
 #include <sound/hda_codec.h>
 #include <sound/soc.h>
-#include "skl-nhlt.h"
 #include "skl-ssp-clk.h"
 
 #define SKL_SUSPEND_DELAY 2000
-- 
cgit v1.2.3


From 303681f4356d322232dd5f6d9eb4bc62666064c5 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 29 Jul 2019 10:51:48 -0500
Subject: ALSA: hda: move parts of NHLT code to new module

Move parts of the code outside of the Skylake driver to help detect
the presence of DMICs (which are not supported by the HDaudio legacy
driver).

No functionality change (except for the removal of useless OR
operations), only indentation and checkpatch fixes, making sure
that the code compiles without ACPI and fixing an ACPI leak

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/intel-nhlt.h |  41 ++++++++++++++----
 sound/hda/Kconfig          |   5 +++
 sound/hda/Makefile         |   3 ++
 sound/hda/intel-nhlt.c     | 103 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 144 insertions(+), 8 deletions(-)
 create mode 100644 sound/hda/intel-nhlt.c

(limited to 'include')

diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index f85fbf9c7ce4..857922f03931 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -1,18 +1,17 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- *  skl-nhlt.h - Intel HDA Platform NHLT header
+ *  intel-nhlt.h - Intel HDA Platform NHLT header
  *
- *  Copyright (C) 2015 Intel Corp
- *  Author: Sanjiv Kumar <sanjiv.kumar@intel.com>
- *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *  Copyright (c) 2015-2019 Intel Corporation
  */
-#ifndef __SKL_NHLT_H__
-#define __SKL_NHLT_H__
+
+#ifndef __INTEL_NHLT_H__
+#define __INTEL_NHLT_H__
 
 #include <linux/acpi.h>
 
+#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_INTEL_NHLT)
+
 struct wav_fmt {
 	u16 fmt_tag;
 	u16 channels;
@@ -116,4 +115,30 @@ enum {
 	NHLT_MIC_ARRAY_VENDOR_DEFINED = 0xf,
 };
 
+struct nhlt_acpi_table *intel_nhlt_init(struct device *dev);
+
+void intel_nhlt_free(struct nhlt_acpi_table *addr);
+
+int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt);
+
+#else
+
+struct nhlt_acpi_table;
+
+static inline struct nhlt_acpi_table *intel_nhlt_init(struct device *dev)
+{
+	return NULL;
+}
+
+static inline void intel_nhlt_free(struct nhlt_acpi_table *addr)
+{
+}
+
+static inline int intel_nhlt_get_dmic_geo(struct device *dev,
+					  struct nhlt_acpi_table *nhlt)
+{
+	return 0;
+}
+#endif
+
 #endif
diff --git a/sound/hda/Kconfig b/sound/hda/Kconfig
index f6feced15f17..9ccbcb5a06bd 100644
--- a/sound/hda/Kconfig
+++ b/sound/hda/Kconfig
@@ -29,3 +29,8 @@ config SND_HDA_PREALLOC_SIZE
 
 	  Note that the pre-allocation size can be changed dynamically
 	  via a proc file (/proc/asound/card*/pcm*/sub*/prealloc), too.
+
+config SND_INTEL_NHLT
+	tristate
+	# this config should be selected only for Intel ACPI platforms.
+	# A fallback is provided so that the code compiles in all cases.
\ No newline at end of file
diff --git a/sound/hda/Makefile b/sound/hda/Makefile
index 2160202e2dc1..8560f6ef1b19 100644
--- a/sound/hda/Makefile
+++ b/sound/hda/Makefile
@@ -13,3 +13,6 @@ obj-$(CONFIG_SND_HDA_CORE) += snd-hda-core.o
 
 #extended hda
 obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/
+
+snd-intel-nhlt-objs := intel-nhlt.o
+obj-$(CONFIG_SND_INTEL_NHLT) += snd-intel-nhlt.o
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
new file mode 100644
index 000000000000..7a62e03ba407
--- /dev/null
+++ b/sound/hda/intel-nhlt.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2015-2019 Intel Corporation
+
+#include <linux/acpi.h>
+#include <sound/intel-nhlt.h>
+
+#define NHLT_ACPI_HEADER_SIG	"NHLT"
+
+/* Unique identification for getting NHLT blobs */
+static guid_t osc_guid =
+	GUID_INIT(0xA69F886E, 0x6CEB, 0x4594,
+		  0xA4, 0x1F, 0x7B, 0x5D, 0xCE, 0x24, 0xC5, 0x53);
+
+struct nhlt_acpi_table *intel_nhlt_init(struct device *dev)
+{
+	acpi_handle handle;
+	union acpi_object *obj;
+	struct nhlt_resource_desc *nhlt_ptr;
+	struct nhlt_acpi_table *nhlt_table = NULL;
+
+	handle = ACPI_HANDLE(dev);
+	if (!handle) {
+		dev_err(dev, "Didn't find ACPI_HANDLE\n");
+		return NULL;
+	}
+
+	obj = acpi_evaluate_dsm(handle, &osc_guid, 1, 1, NULL);
+
+	if (!obj)
+		return NULL;
+
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		dev_dbg(dev, "No NHLT table found\n");
+		ACPI_FREE(obj);
+		return NULL;
+	}
+
+	nhlt_ptr = (struct nhlt_resource_desc  *)obj->buffer.pointer;
+	if (nhlt_ptr->length)
+		nhlt_table = (struct nhlt_acpi_table *)
+			memremap(nhlt_ptr->min_addr, nhlt_ptr->length,
+				 MEMREMAP_WB);
+	ACPI_FREE(obj);
+	if (nhlt_table &&
+	    (strncmp(nhlt_table->header.signature,
+		     NHLT_ACPI_HEADER_SIG,
+		     strlen(NHLT_ACPI_HEADER_SIG)) != 0)) {
+		memunmap(nhlt_table);
+		dev_err(dev, "NHLT ACPI header signature incorrect\n");
+		return NULL;
+	}
+	return nhlt_table;
+}
+EXPORT_SYMBOL_GPL(intel_nhlt_init);
+
+void intel_nhlt_free(struct nhlt_acpi_table *nhlt)
+{
+	memunmap((void *)nhlt);
+}
+EXPORT_SYMBOL_GPL(intel_nhlt_free);
+
+int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
+{
+	struct nhlt_endpoint *epnt;
+	struct nhlt_dmic_array_config *cfg;
+	unsigned int dmic_geo = 0;
+	u8 j;
+
+	if (!nhlt)
+		return 0;
+
+	epnt = (struct nhlt_endpoint *)nhlt->desc;
+
+	for (j = 0; j < nhlt->endpoint_count; j++) {
+		if (epnt->linktype == NHLT_LINK_DMIC) {
+			cfg = (struct nhlt_dmic_array_config  *)
+					(epnt->config.caps);
+			switch (cfg->array_type) {
+			case NHLT_MIC_ARRAY_2CH_SMALL:
+			case NHLT_MIC_ARRAY_2CH_BIG:
+				dmic_geo = MIC_ARRAY_2CH;
+				break;
+
+			case NHLT_MIC_ARRAY_4CH_1ST_GEOM:
+			case NHLT_MIC_ARRAY_4CH_L_SHAPED:
+			case NHLT_MIC_ARRAY_4CH_2ND_GEOM:
+				dmic_geo = MIC_ARRAY_4CH;
+				break;
+
+			default:
+				dev_warn(dev, "undefined DMIC array_type 0x%0x\n",
+					 cfg->array_type);
+			}
+		}
+		epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
+	}
+
+	return dmic_geo;
+}
+EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel NHLT driver");
-- 
cgit v1.2.3


From 7a33ea70e1868ee578fe2e9a85dd300efa1a35d5 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 29 Jul 2019 10:51:49 -0500
Subject: ALSA: hda: intel-nhlt: handle NHLT VENDOR_DEFINED DMIC geometry

The NHLT spec defines a VENDOR_DEFINED geometry, which requires
reading additional information to figure out the number of
microphones.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/intel-nhlt.h | 10 ++++++++--
 sound/hda/intel-nhlt.c     |  6 +++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index 857922f03931..f657fd8fc0ad 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -96,16 +96,22 @@ struct nhlt_resource_desc  {
 #define MIC_ARRAY_2CH 2
 #define MIC_ARRAY_4CH 4
 
-struct nhlt_tdm_config {
+struct nhlt_device_specific_config {
 	u8 virtual_slot;
 	u8 config_type;
 } __packed;
 
 struct nhlt_dmic_array_config {
-	struct nhlt_tdm_config tdm_config;
+	struct nhlt_device_specific_config device_config;
 	u8 array_type;
 } __packed;
 
+struct nhlt_vendor_dmic_array_config {
+	struct nhlt_dmic_array_config dmic_config;
+	u8 nb_mics;
+	/* TODO add vendor mic config */
+} __packed;
+
 enum {
 	NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
 	NHLT_MIC_ARRAY_2CH_BIG = 0xb,
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 7a62e03ba407..daede96f28ee 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -63,6 +63,7 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
 {
 	struct nhlt_endpoint *epnt;
 	struct nhlt_dmic_array_config *cfg;
+	struct nhlt_vendor_dmic_array_config *cfg_vendor;
 	unsigned int dmic_geo = 0;
 	u8 j;
 
@@ -86,7 +87,10 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
 			case NHLT_MIC_ARRAY_4CH_2ND_GEOM:
 				dmic_geo = MIC_ARRAY_4CH;
 				break;
-
+			case NHLT_MIC_ARRAY_VENDOR_DEFINED:
+				cfg_vendor = (struct nhlt_vendor_dmic_array_config *)cfg;
+				dmic_geo = cfg_vendor->nb_mics;
+				break;
 			default:
 				dev_warn(dev, "undefined DMIC array_type 0x%0x\n",
 					 cfg->array_type);
-- 
cgit v1.2.3


From c1a280b68d4e6b6db4a65aa7865c22d8789ddf09 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Jul 2019 23:19:37 +0200
Subject: sched/preempt: Use CONFIG_PREEMPTION where appropriate

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the preemption code, scheduler and init task over to use
CONFIG_PREEMPTION.

That's the first step towards RT in that area. The more complex changes are
coming separately.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20190726212124.117528401@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/preempt.h |  4 ++--
 include/linux/preempt.h       |  6 +++---
 include/linux/sched.h         |  6 +++---
 init/init_task.c              |  2 +-
 init/main.c                   |  2 +-
 kernel/sched/core.c           | 14 +++++++-------
 kernel/sched/fair.c           |  2 +-
 kernel/sched/sched.h          |  4 ++--
 8 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index c3046c920063..d683f5e6d791 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -78,11 +78,11 @@ static __always_inline bool should_resched(int preempt_offset)
 			tif_need_resched());
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 extern asmlinkage void preempt_schedule(void);
 #define __preempt_schedule() preempt_schedule()
 extern asmlinkage void preempt_schedule_notrace(void);
 #define __preempt_schedule_notrace() preempt_schedule_notrace()
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #endif /* __ASM_PREEMPT_H */
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index dd92b1a93919..bbb68dba37cc 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -182,7 +182,7 @@ do { \
 
 #define preemptible()	(preempt_count() == 0 && !irqs_disabled())
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #define preempt_enable() \
 do { \
 	barrier(); \
@@ -203,7 +203,7 @@ do { \
 		__preempt_schedule(); \
 } while (0)
 
-#else /* !CONFIG_PREEMPT */
+#else /* !CONFIG_PREEMPTION */
 #define preempt_enable() \
 do { \
 	barrier(); \
@@ -217,7 +217,7 @@ do { \
 } while (0)
 
 #define preempt_check_resched() do { } while (0)
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 #define preempt_disable_notrace() \
 do { \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..6947516a2d3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1767,7 +1767,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
  * value indicates whether a reschedule was done in fact.
  * cond_resched_lock() will drop the spinlock before scheduling,
  */
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 extern int _cond_resched(void);
 #else
 static inline int _cond_resched(void) { return 0; }
@@ -1796,12 +1796,12 @@ static inline void cond_resched_rcu(void)
 
 /*
  * Does a critical section need to be broken due to another
- * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * task waiting?: (technically does not depend on CONFIG_PREEMPTION,
  * but a general need for low latency)
  */
 static inline int spin_needbreak(spinlock_t *lock)
 {
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 	return spin_is_contended(lock);
 #else
 	return 0;
diff --git a/init/init_task.c b/init/init_task.c
index 7ab773b9b3cd..bfe06c53b14e 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -174,7 +174,7 @@ struct task_struct init_task
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	.ret_stack	= NULL,
 #endif
-#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPT)
+#if defined(CONFIG_TRACING) && defined(CONFIG_PREEMPTION)
 	.trace_recursion = 0,
 #endif
 #ifdef CONFIG_LIVEPATCH
diff --git a/init/main.c b/init/main.c
index 96f8d5af52d6..653693da8da6 100644
--- a/init/main.c
+++ b/init/main.c
@@ -433,7 +433,7 @@ noinline void __ref rest_init(void)
 
 	/*
 	 * Enable might_sleep() and smp_processor_id() checks.
-	 * They cannot be enabled earlier because with CONFIG_PREEMPT=y
+	 * They cannot be enabled earlier because with CONFIG_PREEMPTION=y
 	 * kernel_thread() would trigger might_sleep() splats. With
 	 * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled
 	 * already, but it's stuck on the kthreadd_done completion.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..604a5e137efe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3581,7 +3581,7 @@ static inline void sched_tick_start(int cpu) { }
 static inline void sched_tick_stop(int cpu) { }
 #endif
 
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+#if defined(CONFIG_PREEMPTION) && (defined(CONFIG_DEBUG_PREEMPT) || \
 				defined(CONFIG_TRACE_PREEMPT_TOGGLE))
 /*
  * If the value passed in is equal to the current preempt count
@@ -3782,7 +3782,7 @@ again:
  *      task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
  *      called on the nearest possible occasion:
  *
- *       - If the kernel is preemptible (CONFIG_PREEMPT=y):
+ *       - If the kernel is preemptible (CONFIG_PREEMPTION=y):
  *
  *         - in syscall or exception context, at the next outmost
  *           preempt_enable(). (this might be as soon as the wake_up()'s
@@ -3791,7 +3791,7 @@ again:
  *         - in IRQ context, return from interrupt-handler to
  *           preemptible context
  *
- *       - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
+ *       - If the kernel is not preemptible (CONFIG_PREEMPTION is not set)
  *         then at the next:
  *
  *          - cond_resched() call
@@ -4033,7 +4033,7 @@ static void __sched notrace preempt_schedule_common(void)
 	} while (need_resched());
 }
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 /*
  * this is the entry point to schedule() from in-kernel preemption
  * off of preempt_enable. Kernel preemptions off return from interrupt
@@ -4105,7 +4105,7 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void)
 }
 EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
 
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 /*
  * this is the entry point to schedule() from kernel preemption
@@ -5416,7 +5416,7 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 int __sched _cond_resched(void)
 {
 	if (should_resched(0)) {
@@ -5433,7 +5433,7 @@ EXPORT_SYMBOL(_cond_resched);
  * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
  * call schedule, and on return reacquire the lock.
  *
- * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
+ * This works OK both with and without CONFIG_PREEMPTION. We do strange low-level
  * operations here to prevent schedule() from being called twice (once via
  * spin_unlock(), once by hand).
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc9cfeaac8bd..aff9d76d8d65 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7430,7 +7430,7 @@ static int detach_tasks(struct lb_env *env)
 		detached++;
 		env->imbalance -= load;
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 		/*
 		 * NEWIDLE balancing is a source of latency, so preemptible
 		 * kernels will stop after the first task is detached to minimize
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 802b1f3405f2..f2ce6ba1c5d5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1943,7 +1943,7 @@ unsigned long arch_scale_freq_capacity(int cpu)
 #endif
 
 #ifdef CONFIG_SMP
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
@@ -1995,7 +1995,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
 	return ret;
 }
 
-#endif /* CONFIG_PREEMPT */
+#endif /* CONFIG_PREEMPTION */
 
 /*
  * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
-- 
cgit v1.2.3


From 01b1d88b09824bea1a75b0bac04dcf50d9893875 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Jul 2019 23:19:38 +0200
Subject: rcu: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Switch the conditionals in RCU to use CONFIG_PREEMPTION.

That's the first step towards RCU on RT. The further tweaks are work in
progress. This neither touches the selftest bits which need a closer look
by Paul.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20190726212124.210156346@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/Kconfig             | 2 +-
 include/linux/rcupdate.h | 2 +-
 include/linux/rcutree.h  | 2 +-
 include/linux/torture.h  | 2 +-
 kernel/rcu/Kconfig       | 8 ++++----
 kernel/rcu/tree.c        | 6 +++---
 kernel/rcu/tree_stall.h  | 6 +++---
 kernel/trace/Kconfig     | 2 +-
 8 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..c7efbc018f4f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -103,7 +103,7 @@ config STATIC_KEYS_SELFTEST
 config OPTPROBES
 	def_bool y
 	depends on KPROBES && HAVE_OPTPROBES
-	select TASKS_RCU if PREEMPT
+	select TASKS_RCU if PREEMPTION
 
 config KPROBES_ON_FTRACE
 	def_bool y
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f7167478c1d..c4f76a310443 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -578,7 +578,7 @@ do {									      \
  *
  * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU),
  * it is illegal to block while in an RCU read-side critical section.
- * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPT
+ * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPTION
  * kernel builds, RCU read-side critical sections may be preempted,
  * but explicit blocking is illegal.  Finally, in preemptible RCU
  * implementations in real-time (with -rt patchset) kernel builds, RCU
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 735601ac27d3..18b1ed9864b0 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -53,7 +53,7 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
-#ifndef CONFIG_PREEMPT
+#ifndef CONFIG_PREEMPTION
 void rcu_all_qs(void);
 #endif
 
diff --git a/include/linux/torture.h b/include/linux/torture.h
index a620118385bb..6241f59e2d6f 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -86,7 +86,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp);
 #define torture_stop_kthread(n, tp) \
 	_torture_stop_kthread("Stopping " #n " task", &(tp))
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 #define torture_preempt_schedule() preempt_schedule()
 #else
 #define torture_preempt_schedule()
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 480edf328b51..7644eda17d62 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -7,7 +7,7 @@ menu "RCU Subsystem"
 
 config TREE_RCU
 	bool
-	default y if !PREEMPT && SMP
+	default y if !PREEMPTION && SMP
 	help
 	  This option selects the RCU implementation that is
 	  designed for very large SMP system with hundreds or
@@ -16,7 +16,7 @@ config TREE_RCU
 
 config PREEMPT_RCU
 	bool
-	default y if PREEMPT
+	default y if PREEMPTION
 	help
 	  This option selects the RCU implementation that is
 	  designed for very large SMP systems with hundreds or
@@ -28,7 +28,7 @@ config PREEMPT_RCU
 
 config TINY_RCU
 	bool
-	default y if !PREEMPT && !SMP
+	default y if !PREEMPTION && !SMP
 	help
 	  This option selects the RCU implementation that is
 	  designed for UP systems from which real-time response
@@ -70,7 +70,7 @@ config TREE_SRCU
 	  This option selects the full-fledged version of SRCU.
 
 config TASKS_RCU
-	def_bool PREEMPT
+	def_bool PREEMPTION
 	select SRCU
 	help
 	  This option enables a task-based RCU implementation that uses
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a14e5fbbea46..5962636502bc 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1881,7 +1881,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
 	struct rcu_node *rnp_p;
 
 	raw_lockdep_assert_held_rcu_node(rnp);
-	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
+	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPTION)) ||
 	    WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
 	    rnp->qsmask != 0) {
 		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2205,7 +2205,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
 		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask == 0) {
-			if (!IS_ENABLED(CONFIG_PREEMPT) ||
+			if (!IS_ENABLED(CONFIG_PREEMPTION) ||
 			    rcu_preempt_blocked_readers_cgp(rnp)) {
 				/*
 				 * No point in scanning bits because they
@@ -2622,7 +2622,7 @@ static int rcu_blocking_is_gp(void)
 {
 	int ret;
 
-	if (IS_ENABLED(CONFIG_PREEMPT))
+	if (IS_ENABLED(CONFIG_PREEMPTION))
 		return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
 	might_sleep();  /* Check for RCU read-side critical section. */
 	preempt_disable();
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 065183391f75..9b92bf18b737 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -163,7 +163,7 @@ static void rcu_iw_handler(struct irq_work *iwp)
 //
 // Printing RCU CPU stall warnings
 
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
 
 /*
  * Dump detailed information for all tasks blocking the current RCU
@@ -215,7 +215,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 	return ndetected;
 }
 
-#else /* #ifdef CONFIG_PREEMPT */
+#else /* #ifdef CONFIG_PREEMPTION */
 
 /*
  * Because preemptible RCU does not exist, we never have to check for
@@ -233,7 +233,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 {
 	return 0;
 }
-#endif /* #else #ifdef CONFIG_PREEMPT */
+#endif /* #else #ifdef CONFIG_PREEMPTION */
 
 /*
  * Dump stacks of all tasks running on stalled CPUs.  First try using
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 98da8998c25c..d2c1fe0b451d 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -146,7 +146,7 @@ config FUNCTION_TRACER
 	select GENERIC_TRACER
 	select CONTEXT_SWITCH_TRACER
 	select GLOB
-	select TASKS_RCU if PREEMPT
+	select TASKS_RCU if PREEMPTION
 	help
 	  Enable the kernel to trace every kernel function. This is done
 	  by using a compiler feature to insert a small, 5-byte No-Operation
-- 
cgit v1.2.3


From 27972765bd0410fc2ef5e86a41de17c71440a2dd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Jul 2019 23:19:39 +0200
Subject: locking/spinlocks: Use CONFIG_PREEMPTION

CONFIG_PREEMPTION is selected by CONFIG_PREEMPT and by
CONFIG_PREEMPT_RT. Both PREEMPT and PREEMPT_RT require the same
functionality which today depends on CONFIG_PREEMPT.

Adjust the comments in the locking code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20190726212124.302995288@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h         | 2 +-
 include/linux/spinlock_api_smp.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index ed7c4d6b8235..031ce8617df8 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -214,7 +214,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 
 /*
  * Define the various spin_lock methods.  Note we define these
- * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The
+ * regardless of whether CONFIG_SMP or CONFIG_PREEMPTION are set. The
  * various methods are defined as nops in the case they are not
  * required.
  */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 42dfab89e740..b762eaba4cdf 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -96,7 +96,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
- * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * even on CONFIG_PREEMPTION, because lockdep assumes that interrupts are
  * not re-enabled during lock-acquire (which the preempt-spin-ops do):
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
-- 
cgit v1.2.3


From eaf7b46083a7e341a23ab3d6042e0ccc115b0914 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 26 Jul 2019 09:51:12 -0300
Subject: docs: thermal: add it to the driver API

The file contents mostly describes driver internals.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/index.rst                 |   1 +
 .../driver-api/thermal/cpu-cooling-api.rst         | 107 +++
 .../driver-api/thermal/exynos_thermal.rst          |  90 +++
 .../thermal/exynos_thermal_emulation.rst           |  61 ++
 Documentation/driver-api/thermal/index.rst         |  18 +
 .../driver-api/thermal/intel_powerclamp.rst        | 320 +++++++++
 .../driver-api/thermal/nouveau_thermal.rst         |  96 +++
 .../driver-api/thermal/power_allocator.rst         | 271 +++++++
 Documentation/driver-api/thermal/sysfs-api.rst     | 798 +++++++++++++++++++++
 .../thermal/x86_pkg_temperature_thermal.rst        |  55 ++
 Documentation/thermal/cpu-cooling-api.rst          | 107 ---
 Documentation/thermal/exynos_thermal.rst           |  90 ---
 Documentation/thermal/exynos_thermal_emulation.rst |  61 --
 Documentation/thermal/index.rst                    |  18 -
 Documentation/thermal/intel_powerclamp.rst         | 320 ---------
 Documentation/thermal/nouveau_thermal.rst          |  96 ---
 Documentation/thermal/power_allocator.rst          | 271 -------
 Documentation/thermal/sysfs-api.rst                | 798 ---------------------
 .../thermal/x86_pkg_temperature_thermal.rst        |  55 --
 MAINTAINERS                                        |   2 +-
 include/linux/thermal.h                            |   4 +-
 21 files changed, 1820 insertions(+), 1819 deletions(-)
 create mode 100644 Documentation/driver-api/thermal/cpu-cooling-api.rst
 create mode 100644 Documentation/driver-api/thermal/exynos_thermal.rst
 create mode 100644 Documentation/driver-api/thermal/exynos_thermal_emulation.rst
 create mode 100644 Documentation/driver-api/thermal/index.rst
 create mode 100644 Documentation/driver-api/thermal/intel_powerclamp.rst
 create mode 100644 Documentation/driver-api/thermal/nouveau_thermal.rst
 create mode 100644 Documentation/driver-api/thermal/power_allocator.rst
 create mode 100644 Documentation/driver-api/thermal/sysfs-api.rst
 create mode 100644 Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst
 delete mode 100644 Documentation/thermal/cpu-cooling-api.rst
 delete mode 100644 Documentation/thermal/exynos_thermal.rst
 delete mode 100644 Documentation/thermal/exynos_thermal_emulation.rst
 delete mode 100644 Documentation/thermal/index.rst
 delete mode 100644 Documentation/thermal/intel_powerclamp.rst
 delete mode 100644 Documentation/thermal/nouveau_thermal.rst
 delete mode 100644 Documentation/thermal/power_allocator.rst
 delete mode 100644 Documentation/thermal/sysfs-api.rst
 delete mode 100644 Documentation/thermal/x86_pkg_temperature_thermal.rst

(limited to 'include')

diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
index d12a80f386a6..37ac052ded85 100644
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -65,6 +65,7 @@ available subsections can be seen below.
    dmaengine/index
    slimbus
    soundwire/index
+   thermal/index
    fpga/index
    acpi/index
    backlight/lp855x-driver.rst
diff --git a/Documentation/driver-api/thermal/cpu-cooling-api.rst b/Documentation/driver-api/thermal/cpu-cooling-api.rst
new file mode 100644
index 000000000000..645d914c45a6
--- /dev/null
+++ b/Documentation/driver-api/thermal/cpu-cooling-api.rst
@@ -0,0 +1,107 @@
+=======================
+CPU cooling APIs How To
+=======================
+
+Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
+
+Updated: 6 Jan 2015
+
+Copyright (c)  2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
+
+0. Introduction
+===============
+
+The generic cpu cooling(freq clipping) provides registration/unregistration APIs
+to the caller. The binding of the cooling devices to the trip point is left for
+the user. The registration APIs returns the cooling device pointer.
+
+1. cpu cooling APIs
+===================
+
+1.1 cpufreq registration/unregistration APIs
+--------------------------------------------
+
+    ::
+
+	struct thermal_cooling_device
+	*cpufreq_cooling_register(struct cpumask *clip_cpus)
+
+    This interface function registers the cpufreq cooling device with the name
+    "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+    cooling devices.
+
+   clip_cpus:
+	cpumask of cpus where the frequency constraints will happen.
+
+    ::
+
+	struct thermal_cooling_device
+	*of_cpufreq_cooling_register(struct cpufreq_policy *policy)
+
+    This interface function registers the cpufreq cooling device with
+    the name "thermal-cpufreq-%x" linking it with a device tree node, in
+    order to bind it via the thermal DT code. This api can support multiple
+    instances of cpufreq cooling devices.
+
+    policy:
+	CPUFreq policy.
+
+
+    ::
+
+	void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+
+    This interface function unregisters the "thermal-cpufreq-%x" cooling device.
+
+    cdev: Cooling device pointer which has to be unregistered.
+
+2. Power models
+===============
+
+The power API registration functions provide a simple power model for
+CPUs.  The current power is calculated as dynamic power (static power isn't
+supported currently).  This power model requires that the operating-points of
+the CPUs are registered using the kernel's opp library and the
+`cpufreq_frequency_table` is assigned to the `struct device` of the
+cpu.  If you are using CONFIG_CPUFREQ_DT then the
+`cpufreq_frequency_table` should already be assigned to the cpu
+device.
+
+The dynamic power consumption of a processor depends on many factors.
+For a given processor implementation the primary factors are:
+
+- The time the processor spends running, consuming dynamic power, as
+  compared to the time in idle states where dynamic consumption is
+  negligible.  Herein we refer to this as 'utilisation'.
+- The voltage and frequency levels as a result of DVFS.  The DVFS
+  level is a dominant factor governing power consumption.
+- In running time the 'execution' behaviour (instruction types, memory
+  access patterns and so forth) causes, in most cases, a second order
+  variation.  In pathological cases this variation can be significant,
+  but typically it is of a much lesser impact than the factors above.
+
+A high level dynamic power consumption model may then be represented as::
+
+	Pdyn = f(run) * Voltage^2 * Frequency * Utilisation
+
+f(run) here represents the described execution behaviour and its
+result has a units of Watts/Hz/Volt^2 (this often expressed in
+mW/MHz/uVolt^2)
+
+The detailed behaviour for f(run) could be modelled on-line.  However,
+in practice, such an on-line model has dependencies on a number of
+implementation specific processor support and characterisation
+factors.  Therefore, in initial implementation that contribution is
+represented as a constant coefficient.  This is a simplification
+consistent with the relative contribution to overall power variation.
+
+In this simplified representation our model becomes::
+
+	Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation
+
+Where `capacitance` is a constant that represents an indicative
+running time dynamic power coefficient in fundamental units of
+mW/MHz/uVolt^2.  Typical values for mobile CPUs might lie in range
+from 100 to 500.  For reference, the approximate values for the SoC in
+ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
+140 for the Cortex-A53 cluster.
diff --git a/Documentation/driver-api/thermal/exynos_thermal.rst b/Documentation/driver-api/thermal/exynos_thermal.rst
new file mode 100644
index 000000000000..5bd556566c70
--- /dev/null
+++ b/Documentation/driver-api/thermal/exynos_thermal.rst
@@ -0,0 +1,90 @@
+========================
+Kernel driver exynos_tmu
+========================
+
+Supported chips:
+
+* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
+
+  Datasheet: Not publicly available
+
+Authors: Donggeun Kim <dg77.kim@samsung.com>
+Authors: Amit Daniel <amit.daniel@samsung.com>
+
+TMU controller Description:
+---------------------------
+
+This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
+
+The chip only exposes the measured 8-bit temperature code value
+through a register.
+Temperature can be taken from the temperature code.
+There are three equations converting from temperature to temperature code.
+
+The three equations are:
+  1. Two point trimming::
+
+	Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1
+
+  2. One point trimming::
+
+	Tc = T + TI1 - 25
+
+  3. No trimming::
+
+	Tc = T + 50
+
+  Tc:
+       Temperature code, T: Temperature,
+  TI1:
+       Trimming info for 25 degree Celsius (stored at TRIMINFO register)
+       Temperature code measured at 25 degree Celsius which is unchanged
+  TI2:
+       Trimming info for 85 degree Celsius (stored at TRIMINFO register)
+       Temperature code measured at 85 degree Celsius which is unchanged
+
+TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
+when temperature exceeds pre-defined levels.
+The maximum number of configurable threshold is five.
+The threshold levels are defined as follows::
+
+  Level_0: current temperature > trigger_level_0 + threshold
+  Level_1: current temperature > trigger_level_1 + threshold
+  Level_2: current temperature > trigger_level_2 + threshold
+  Level_3: current temperature > trigger_level_3 + threshold
+
+The threshold and each trigger_level are set
+through the corresponding registers.
+
+When an interrupt occurs, this driver notify kernel thermal framework
+with the function exynos_report_trigger.
+Although an interrupt condition for level_0 can be set,
+it can be used to synchronize the cooling action.
+
+TMU driver description:
+-----------------------
+
+The exynos thermal driver is structured as::
+
+					Kernel Core thermal framework
+				(thermal_core.c, step_wise.c, cpu_cooling.c)
+								^
+								|
+								|
+  TMU configuration data -----> TMU Driver  <----> Exynos Core thermal wrapper
+  (exynos_tmu_data.c)	      (exynos_tmu.c)	   (exynos_thermal_common.c)
+  (exynos_tmu_data.h)	      (exynos_tmu.h)	   (exynos_thermal_common.h)
+
+a) TMU configuration data:
+		This consist of TMU register offsets/bitfields
+		described through structure exynos_tmu_registers. Also several
+		other platform data (struct exynos_tmu_platform_data) members
+		are used to configure the TMU.
+b) TMU driver:
+		This component initialises the TMU controller and sets different
+		thresholds. It invokes core thermal implementation with the call
+		exynos_report_trigger.
+c) Exynos Core thermal wrapper:
+		This provides 3 wrapper function to use the
+		Kernel core thermal framework. They are exynos_unregister_thermal,
+		exynos_register_thermal and exynos_report_trigger.
diff --git a/Documentation/driver-api/thermal/exynos_thermal_emulation.rst b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst
new file mode 100644
index 000000000000..c21d10838bc5
--- /dev/null
+++ b/Documentation/driver-api/thermal/exynos_thermal_emulation.rst
@@ -0,0 +1,61 @@
+=====================
+Exynos Emulation Mode
+=====================
+
+Copyright (C) 2012 Samsung Electronics
+
+Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
+
+Description
+-----------
+
+Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal
+management unit. Thermal emulation mode supports software debug for
+TMU's operation. User can set temperature manually with software code
+and TMU will read current temperature from user value not from sensor's
+value.
+
+Enabling CONFIG_THERMAL_EMULATION option will make this support
+available. When it's enabled, sysfs node will be created as
+/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp.
+
+The sysfs node, 'emul_node', will contain value 0 for the initial state.
+When you input any temperature you want to update to sysfs node, it
+automatically enable emulation mode and current temperature will be
+changed into it.
+
+(Exynos also supports user changeable delay time which would be used to
+delay of changing temperature. However, this node only uses same delay
+of real sensing time, 938us.)
+
+Exynos emulation mode requires synchronous of value changing and
+enabling. It means when you want to update the any value of delay or
+next temperature, then you have to enable emulation mode at the same
+time. (Or you have to keep the mode enabling.) If you don't, it fails to
+change the value to updated one and just use last succeessful value
+repeatedly. That's why this node gives users the right to change
+termerpature only. Just one interface makes it more simply to use.
+
+Disabling emulation mode only requires writing value 0 to sysfs node.
+
+::
+
+
+  TEMP	120 |
+	    |
+	100 |
+	    |
+	 80 |
+	    |				 +-----------
+	 60 |      			 |	    |
+	    |		   +-------------|          |
+	 40 |              |         	 |          |
+	    |		   |		 |          |
+	 20 |		   |		 |          +----------
+	    |		   |		 |          |          |
+	  0 |______________|_____________|__________|__________|_________
+		   A		 A	    A		       A     TIME
+		   |<----->|	 |<----->|  |<----->|	       |
+		   | 938us |  	 |	 |  |       |          |
+  emulation   : 0  50	   |  	 70      |  20      |          0
+  current temp:   sensor   50		 70         20	      sensor
diff --git a/Documentation/driver-api/thermal/index.rst b/Documentation/driver-api/thermal/index.rst
new file mode 100644
index 000000000000..5ba61d19c6ae
--- /dev/null
+++ b/Documentation/driver-api/thermal/index.rst
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+Thermal
+=======
+
+.. toctree::
+   :maxdepth: 1
+
+   cpu-cooling-api
+   sysfs-api
+   power_allocator
+
+   exynos_thermal
+   exynos_thermal_emulation
+   intel_powerclamp
+   nouveau_thermal
+   x86_pkg_temperature_thermal
diff --git a/Documentation/driver-api/thermal/intel_powerclamp.rst b/Documentation/driver-api/thermal/intel_powerclamp.rst
new file mode 100644
index 000000000000..3f6dfb0b3ea6
--- /dev/null
+++ b/Documentation/driver-api/thermal/intel_powerclamp.rst
@@ -0,0 +1,320 @@
+=======================
+Intel Powerclamp Driver
+=======================
+
+By:
+  - Arjan van de Ven <arjan@linux.intel.com>
+  - Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+.. Contents:
+
+	(*) Introduction
+	    - Goals and Objectives
+
+	(*) Theory of Operation
+	    - Idle Injection
+	    - Calibration
+
+	(*) Performance Analysis
+	    - Effectiveness and Limitations
+	    - Power vs Performance
+	    - Scalability
+	    - Calibration
+	    - Comparison with Alternative Techniques
+
+	(*) Usage and Interfaces
+	    - Generic Thermal Layer (sysfs)
+	    - Kernel APIs (TBD)
+
+INTRODUCTION
+============
+
+Consider the situation where a system’s power consumption must be
+reduced at runtime, due to power budget, thermal constraint, or noise
+level, and where active cooling is not preferred. Software managed
+passive power reduction must be performed to prevent the hardware
+actions that are designed for catastrophic scenarios.
+
+Currently, P-states, T-states (clock modulation), and CPU offlining
+are used for CPU throttling.
+
+On Intel CPUs, C-states provide effective power reduction, but so far
+they’re only used opportunistically, based on workload. With the
+development of intel_powerclamp driver, the method of synchronizing
+idle injection across all online CPU threads was introduced. The goal
+is to achieve forced and controllable C-state residency.
+
+Test/Analysis has been made in the areas of power, performance,
+scalability, and user experience. In many cases, clear advantage is
+shown over taking the CPU offline or modulating the CPU clock.
+
+
+THEORY OF OPERATION
+===================
+
+Idle Injection
+--------------
+
+On modern Intel processors (Nehalem or later), package level C-state
+residency is available in MSRs, thus also available to the kernel.
+
+These MSRs are::
+
+      #define MSR_PKG_C2_RESIDENCY      0x60D
+      #define MSR_PKG_C3_RESIDENCY      0x3F8
+      #define MSR_PKG_C6_RESIDENCY      0x3F9
+      #define MSR_PKG_C7_RESIDENCY      0x3FA
+
+If the kernel can also inject idle time to the system, then a
+closed-loop control system can be established that manages package
+level C-state. The intel_powerclamp driver is conceived as such a
+control system, where the target set point is a user-selected idle
+ratio (based on power reduction), and the error is the difference
+between the actual package level C-state residency ratio and the target idle
+ratio.
+
+Injection is controlled by high priority kernel threads, spawned for
+each online CPU.
+
+These kernel threads, with SCHED_FIFO class, are created to perform
+clamping actions of controlled duty ratio and duration. Each per-CPU
+thread synchronizes its idle time and duration, based on the rounding
+of jiffies, so accumulated errors can be prevented to avoid a jittery
+effect. Threads are also bound to the CPU such that they cannot be
+migrated, unless the CPU is taken offline. In this case, threads
+belong to the offlined CPUs will be terminated immediately.
+
+Running as SCHED_FIFO and relatively high priority, also allows such
+scheme to work for both preemptable and non-preemptable kernels.
+Alignment of idle time around jiffies ensures scalability for HZ
+values. This effect can be better visualized using a Perf timechart.
+The following diagram shows the behavior of kernel thread
+kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
+for a given "duration", then relinquishes the CPU to other tasks,
+until the next time interval.
+
+The NOHZ schedule tick is disabled during idle time, but interrupts
+are not masked. Tests show that the extra wakeups from scheduler tick
+have a dramatic impact on the effectiveness of the powerclamp driver
+on large scale systems (Westmere system with 80 processors).
+
+::
+
+  CPU0
+		    ____________          ____________
+  kidle_inject/0   |   sleep    |  mwait |  sleep     |
+	  _________|            |________|            |_______
+				 duration
+  CPU1
+		    ____________          ____________
+  kidle_inject/1   |   sleep    |  mwait |  sleep     |
+	  _________|            |________|            |_______
+				^
+				|
+				|
+				roundup(jiffies, interval)
+
+Only one CPU is allowed to collect statistics and update global
+control parameters. This CPU is referred to as the controlling CPU in
+this document. The controlling CPU is elected at runtime, with a
+policy that favors BSP, taking into account the possibility of a CPU
+hot-plug.
+
+In terms of dynamics of the idle control system, package level idle
+time is considered largely as a non-causal system where its behavior
+cannot be based on the past or current input. Therefore, the
+intel_powerclamp driver attempts to enforce the desired idle time
+instantly as given input (target idle ratio). After injection,
+powerclamp monitors the actual idle for a given time window and adjust
+the next injection accordingly to avoid over/under correction.
+
+When used in a causal control system, such as a temperature control,
+it is up to the user of this driver to implement algorithms where
+past samples and outputs are included in the feedback. For example, a
+PID-based thermal controller can use the powerclamp driver to
+maintain a desired target temperature, based on integral and
+derivative gains of the past samples.
+
+
+
+Calibration
+-----------
+During scalability testing, it is observed that synchronized actions
+among CPUs become challenging as the number of cores grows. This is
+also true for the ability of a system to enter package level C-states.
+
+To make sure the intel_powerclamp driver scales well, online
+calibration is implemented. The goals for doing such a calibration
+are:
+
+a) determine the effective range of idle injection ratio
+b) determine the amount of compensation needed at each target ratio
+
+Compensation to each target ratio consists of two parts:
+
+	a) steady state error compensation
+	This is to offset the error occurring when the system can
+	enter idle without extra wakeups (such as external interrupts).
+
+	b) dynamic error compensation
+	When an excessive amount of wakeups occurs during idle, an
+	additional idle ratio can be added to quiet interrupts, by
+	slowing down CPU activities.
+
+A debugfs file is provided for the user to examine compensation
+progress and results, such as on a Westmere system::
+
+  [jacob@nex01 ~]$ cat
+  /sys/kernel/debug/intel_powerclamp/powerclamp_calib
+  controlling cpu: 0
+  pct confidence steady dynamic (compensation)
+  0       0       0       0
+  1       1       0       0
+  2       1       1       0
+  3       3       1       0
+  4       3       1       0
+  5       3       1       0
+  6       3       1       0
+  7       3       1       0
+  8       3       1       0
+  ...
+  30      3       2       0
+  31      3       2       0
+  32      3       1       0
+  33      3       2       0
+  34      3       1       0
+  35      3       2       0
+  36      3       1       0
+  37      3       2       0
+  38      3       1       0
+  39      3       2       0
+  40      3       3       0
+  41      3       1       0
+  42      3       2       0
+  43      3       1       0
+  44      3       1       0
+  45      3       2       0
+  46      3       3       0
+  47      3       0       0
+  48      3       2       0
+  49      3       3       0
+
+Calibration occurs during runtime. No offline method is available.
+Steady state compensation is used only when confidence levels of all
+adjacent ratios have reached satisfactory level. A confidence level
+is accumulated based on clean data collected at runtime. Data
+collected during a period without extra interrupts is considered
+clean.
+
+To compensate for excessive amounts of wakeup during idle, additional
+idle time is injected when such a condition is detected. Currently,
+we have a simple algorithm to double the injection ratio. A possible
+enhancement might be to throttle the offending IRQ, such as delaying
+EOI for level triggered interrupts. But it is a challenge to be
+non-intrusive to the scheduler or the IRQ core code.
+
+
+CPU Online/Offline
+------------------
+Per-CPU kernel threads are started/stopped upon receiving
+notifications of CPU hotplug activities. The intel_powerclamp driver
+keeps track of clamping kernel threads, even after they are migrated
+to other CPUs, after a CPU offline event.
+
+
+Performance Analysis
+====================
+This section describes the general performance data collected on
+multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
+
+Effectiveness and Limitations
+-----------------------------
+The maximum range that idle injection is allowed is capped at 50
+percent. As mentioned earlier, since interrupts are allowed during
+forced idle time, excessive interrupts could result in less
+effectiveness. The extreme case would be doing a ping -f to generated
+flooded network interrupts without much CPU acknowledgement. In this
+case, little can be done from the idle injection threads. In most
+normal cases, such as scp a large file, applications can be throttled
+by the powerclamp driver, since slowing down the CPU also slows down
+network protocol processing, which in turn reduces interrupts.
+
+When control parameters change at runtime by the controlling CPU, it
+may take an additional period for the rest of the CPUs to catch up
+with the changes. During this time, idle injection is out of sync,
+thus not able to enter package C- states at the expected ratio. But
+this effect is minor, in that in most cases change to the target
+ratio is updated much less frequently than the idle injection
+frequency.
+
+Scalability
+-----------
+Tests also show a minor, but measurable, difference between the 4P/8P
+Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
+More compensation is needed on Westmere for the same amount of
+target idle ratio. The compensation also increases as the idle ratio
+gets larger. The above reason constitutes the need for the
+calibration code.
+
+On the IVB 8P system, compared to an offline CPU, powerclamp can
+achieve up to 40% better performance per watt. (measured by a spin
+counter summed over per CPU counting threads spawned for all running
+CPUs).
+
+Usage and Interfaces
+====================
+The powerclamp driver is registered to the generic thermal layer as a
+cooling device. Currently, it’s not bound to any thermal zones::
+
+  jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
+  cur_state:0
+  max_state:50
+  type:intel_powerclamp
+
+cur_state allows user to set the desired idle percentage. Writing 0 to
+cur_state will stop idle injection. Writing a value between 1 and
+max_state will start the idle injection. Reading cur_state returns the
+actual and current idle percentage. This may not be the same value
+set by the user in that current idle percentage depends on workload
+and includes natural idle. When idle injection is disabled, reading
+cur_state returns value -1 instead of 0 which is to avoid confusing
+100% busy state with the disabled state.
+
+Example usage:
+- To inject 25% idle time::
+
+	$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
+
+If the system is not busy and has more than 25% idle time already,
+then the powerclamp driver will not start idle injection. Using Top
+will not show idle injection kernel threads.
+
+If the system is busy (spin test below) and has less than 25% natural
+idle time, powerclamp kernel threads will do idle injection. Forced
+idle time is accounted as normal idle in that common code path is
+taken as the idle task.
+
+In this example, 24.1% idle is shown. This helps the system admin or
+user determine the cause of slowdown, when a powerclamp driver is in action::
+
+
+  Tasks: 197 total,   1 running, 196 sleeping,   0 stopped,   0 zombie
+  Cpu(s): 71.2%us,  4.7%sy,  0.0%ni, 24.1%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
+  Mem:   3943228k total,  1689632k used,  2253596k free,    74960k buffers
+  Swap:  4087804k total,        0k used,  4087804k free,   945336k cached
+
+    PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
+   3352 jacob     20   0  262m  644  428 S  286  0.0   0:17.16 spin
+   3341 root     -51   0     0    0    0 D   25  0.0   0:01.62 kidle_inject/0
+   3344 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/3
+   3342 root     -51   0     0    0    0 D   25  0.0   0:01.61 kidle_inject/1
+   3343 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/2
+   2935 jacob     20   0  696m 125m  35m S    5  3.3   0:31.11 firefox
+   1546 root      20   0  158m  20m 6640 S    3  0.5   0:26.97 Xorg
+   2100 jacob     20   0 1223m  88m  30m S    3  2.3   0:23.68 compiz
+
+Tests have shown that by using the powerclamp driver as a cooling
+device, a PID based userspace thermal controller can manage to
+control CPU temperature effectively, when no other thermal influence
+is added. For example, a UltraBook user can compile the kernel under
+certain temperature (below most active trip points).
diff --git a/Documentation/driver-api/thermal/nouveau_thermal.rst b/Documentation/driver-api/thermal/nouveau_thermal.rst
new file mode 100644
index 000000000000..37255fd6735d
--- /dev/null
+++ b/Documentation/driver-api/thermal/nouveau_thermal.rst
@@ -0,0 +1,96 @@
+=====================
+Kernel driver nouveau
+=====================
+
+Supported chips:
+
+* NV43+
+
+Authors: Martin Peres (mupuf) <martin.peres@free.fr>
+
+Description
+-----------
+
+This driver allows to read the GPU core temperature, drive the GPU fan and
+set temperature alarms.
+
+Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau
+cannot access any of the i2c external monitoring chips it may find. If you
+have one of those, temperature and/or fan management through Nouveau's HWMON
+interface is likely not to work. This document may then not cover your situation
+entirely.
+
+Temperature management
+----------------------
+
+Temperature is exposed under as a read-only HWMON attribute temp1_input.
+
+In order to protect the GPU from overheating, Nouveau supports 4 configurable
+temperature thresholds:
+
+ * Fan_boost:
+	Fan speed is set to 100% when reaching this temperature;
+ * Downclock:
+	The GPU will be downclocked to reduce its power dissipation;
+ * Critical:
+	The GPU is put on hold to further lower power dissipation;
+ * Shutdown:
+	Shut the computer down to protect your GPU.
+
+WARNING:
+	Some of these thresholds may not be used by Nouveau depending
+	on your chipset.
+
+The default value for these thresholds comes from the GPU's vbios. These
+thresholds can be configured thanks to the following HWMON attributes:
+
+ * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst;
+ * Downclock: temp1_max and temp1_max_hyst;
+ * Critical: temp1_crit and temp1_crit_hyst;
+ * Shutdown: temp1_emergency and temp1_emergency_hyst.
+
+NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
+to multiply!
+
+Fan management
+--------------
+
+Not all cards have a drivable fan. If you do, then the following HWMON
+attributes should be available:
+
+ * pwm1_enable:
+	Current fan management mode (NONE, MANUAL or AUTO);
+ * pwm1:
+	Current PWM value (power percentage);
+ * pwm1_min:
+	The minimum PWM speed allowed;
+ * pwm1_max:
+	The maximum PWM speed allowed (bypassed when hitting Fan_boost);
+
+You may also have the following attribute:
+
+ * fan1_input:
+	Speed in RPM of your fan.
+
+Your fan can be driven in different modes:
+
+ * 0: The fan is left untouched;
+ * 1: The fan can be driven in manual (use pwm1 to change the speed);
+ * 2; The fan is driven automatically depending on the temperature.
+
+NOTE:
+  Be sure to use the manual mode if you want to drive the fan speed manually
+
+NOTE2:
+  When operating in manual mode outside the vbios-defined
+  [PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate
+  depending on your hardware.
+
+Bug reports
+-----------
+
+Thermal management on Nouveau is new and may not work on all cards. If you have
+inquiries, please ping mupuf on IRC (#nouveau, freenode).
+
+Bug reports should be filled on Freedesktop's bug tracker. Please follow
+http://nouveau.freedesktop.org/wiki/Bugs
diff --git a/Documentation/driver-api/thermal/power_allocator.rst b/Documentation/driver-api/thermal/power_allocator.rst
new file mode 100644
index 000000000000..67b6a3297238
--- /dev/null
+++ b/Documentation/driver-api/thermal/power_allocator.rst
@@ -0,0 +1,271 @@
+=================================
+Power allocator governor tunables
+=================================
+
+Trip points
+-----------
+
+The governor works optimally with the following two passive trip points:
+
+1.  "switch on" trip point: temperature above which the governor
+    control loop starts operating.  This is the first passive trip
+    point of the thermal zone.
+
+2.  "desired temperature" trip point: it should be higher than the
+    "switch on" trip point.  This the target temperature the governor
+    is controlling for.  This is the last passive trip point of the
+    thermal zone.
+
+PID Controller
+--------------
+
+The power allocator governor implements a
+Proportional-Integral-Derivative controller (PID controller) with
+temperature as the control input and power as the controlled output:
+
+    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power
+
+where
+   -  e = desired_temperature - current_temperature
+   -  err_integral is the sum of previous errors
+   -  diff_err = e - previous_error
+
+It is similar to the one depicted below::
+
+				      k_d
+				       |
+  current_temp                         |
+       |                               v
+       |              +----------+   +---+
+       |       +----->| diff_err |-->| X |------+
+       |       |      +----------+   +---+      |
+       |       |                                |      tdp        actor
+       |       |                      k_i       |       |  get_requested_power()
+       |       |                       |        |       |        |     |
+       |       |                       |        |       |        |     | ...
+       v       |                       v        v       v        v     v
+     +---+     |      +-------+      +---+    +---+   +---+   +----------+
+     | S |-----+----->| sum e |----->| X |--->| S |-->| S |-->|power     |
+     +---+     |      +-------+      +---+    +---+   +---+   |allocation|
+       ^       |                                ^             +----------+
+       |       |                                |                |     |
+       |       |        +---+                   |                |     |
+       |       +------->| X |-------------------+                v     v
+       |                +---+                               granted performance
+  desired_temperature     ^
+			  |
+			  |
+		      k_po/k_pu
+
+Sustainable power
+-----------------
+
+An estimate of the sustainable dissipatable power (in mW) should be
+provided while registering the thermal zone.  This estimates the
+sustained power that can be dissipated at the desired control
+temperature.  This is the maximum sustained power for allocation at
+the desired maximum temperature.  The actual sustained power can vary
+for a number of reasons.  The closed loop controller will take care of
+variations such as environmental conditions, and some factors related
+to the speed-grade of the silicon.  `sustainable_power` is therefore
+simply an estimate, and may be tuned to affect the aggressiveness of
+the thermal ramp. For reference, the sustainable power of a 4" phone
+is typically 2000mW, while on a 10" tablet is around 4500mW (may vary
+depending on screen size).
+
+If you are using device tree, do add it as a property of the
+thermal-zone.  For example::
+
+	thermal-zones {
+		soc_thermal {
+			polling-delay = <1000>;
+			polling-delay-passive = <100>;
+			sustainable-power = <2500>;
+			...
+
+Instead, if the thermal zone is registered from the platform code, pass a
+`thermal_zone_params` that has a `sustainable_power`.  If no
+`thermal_zone_params` were being passed, then something like below
+will suffice::
+
+	static const struct thermal_zone_params tz_params = {
+		.sustainable_power = 3500,
+	};
+
+and then pass `tz_params` as the 5th parameter to
+`thermal_zone_device_register()`
+
+k_po and k_pu
+-------------
+
+The implementation of the PID controller in the power allocator
+thermal governor allows the configuration of two proportional term
+constants: `k_po` and `k_pu`.  `k_po` is the proportional term
+constant during temperature overshoot periods (current temperature is
+above "desired temperature" trip point).  Conversely, `k_pu` is the
+proportional term constant during temperature undershoot periods
+(current temperature below "desired temperature" trip point).
+
+These controls are intended as the primary mechanism for configuring
+the permitted thermal "ramp" of the system.  For instance, a lower
+`k_pu` value will provide a slower ramp, at the cost of capping
+available capacity at a low temperature.  On the other hand, a high
+value of `k_pu` will result in the governor granting very high power
+while temperature is low, and may lead to temperature overshooting.
+
+The default value for `k_pu` is::
+
+    2 * sustainable_power / (desired_temperature - switch_on_temp)
+
+This means that at `switch_on_temp` the output of the controller's
+proportional term will be 2 * `sustainable_power`.  The default value
+for `k_po` is::
+
+    sustainable_power / (desired_temperature - switch_on_temp)
+
+Focusing on the proportional and feed forward values of the PID
+controller equation we have::
+
+    P_max = k_p * e + sustainable_power
+
+The proportional term is proportional to the difference between the
+desired temperature and the current one.  When the current temperature
+is the desired one, then the proportional component is zero and
+`P_max` = `sustainable_power`.  That is, the system should operate in
+thermal equilibrium under constant load.  `sustainable_power` is only
+an estimate, which is the reason for closed-loop control such as this.
+
+Expanding `k_pu` we get::
+
+    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) +
+	sustainable_power
+
+where:
+
+    - T_set is the desired temperature
+    - T is the current temperature
+    - T_on is the switch on temperature
+
+When the current temperature is the switch_on temperature, the above
+formula becomes::
+
+    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) +
+	sustainable_power = 2 * sustainable_power + sustainable_power =
+	3 * sustainable_power
+
+Therefore, the proportional term alone linearly decreases power from
+3 * `sustainable_power` to `sustainable_power` as the temperature
+rises from the switch on temperature to the desired temperature.
+
+k_i and integral_cutoff
+-----------------------
+
+`k_i` configures the PID loop's integral term constant.  This term
+allows the PID controller to compensate for long term drift and for
+the quantized nature of the output control: cooling devices can't set
+the exact power that the governor requests.  When the temperature
+error is below `integral_cutoff`, errors are accumulated in the
+integral term.  This term is then multiplied by `k_i` and the result
+added to the output of the controller.  Typically `k_i` is set low (1
+or 2) and `integral_cutoff` is 0.
+
+k_d
+---
+
+`k_d` configures the PID loop's derivative term constant.  It's
+recommended to leave it as the default: 0.
+
+Cooling device power API
+========================
+
+Cooling devices controlled by this governor must supply the additional
+"power" API in their `cooling_device_ops`.  It consists on three ops:
+
+1. ::
+
+    int get_requested_power(struct thermal_cooling_device *cdev,
+			    struct thermal_zone_device *tz, u32 *power);
+
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@tz:
+	thermal zone in which we are currently operating
+@power:
+	pointer in which to store the calculated power
+
+`get_requested_power()` calculates the power requested by the device
+in milliwatts and stores it in @power .  It should return 0 on
+success, -E* on failure.  This is currently used by the power
+allocator governor to calculate how much power to give to each cooling
+device.
+
+2. ::
+
+	int state2power(struct thermal_cooling_device *cdev, struct
+			thermal_zone_device *tz, unsigned long state,
+			u32 *power);
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@tz:
+	thermal zone in which we are currently operating
+@state:
+	A cooling device state
+@power:
+	pointer in which to store the equivalent power
+
+Convert cooling device state @state into power consumption in
+milliwatts and store it in @power.  It should return 0 on success, -E*
+on failure.  This is currently used by thermal core to calculate the
+maximum power that an actor can consume.
+
+3. ::
+
+	int power2state(struct thermal_cooling_device *cdev, u32 power,
+			unsigned long *state);
+
+@cdev:
+	The `struct thermal_cooling_device` pointer
+@power:
+	power in milliwatts
+@state:
+	pointer in which to store the resulting state
+
+Calculate a cooling device state that would make the device consume at
+most @power mW and store it in @state.  It should return 0 on success,
+-E* on failure.  This is currently used by the thermal core to convert
+a given power set by the power allocator governor to a state that the
+cooling device can set.  It is a function because this conversion may
+depend on external factors that may change so this function should the
+best conversion given "current circumstances".
+
+Cooling device weights
+----------------------
+
+Weights are a mechanism to bias the allocation among cooling
+devices.  They express the relative power efficiency of different
+cooling devices.  Higher weight can be used to express higher power
+efficiency.  Weighting is relative such that if each cooling device
+has a weight of one they are considered equal.  This is particularly
+useful in heterogeneous systems where two cooling devices may perform
+the same kind of compute, but with different efficiency.  For example,
+a system with two different types of processors.
+
+If the thermal zone is registered using
+`thermal_zone_device_register()` (i.e., platform code), then weights
+are passed as part of the thermal zone's `thermal_bind_parameters`.
+If the platform is registered using device tree, then they are passed
+as the `contribution` property of each map in the `cooling-maps` node.
+
+Limitations of the power allocator governor
+===========================================
+
+The power allocator governor's PID controller works best if there is a
+periodic tick.  If you have a driver that calls
+`thermal_zone_device_update()` (or anything that ends up calling the
+governor's `throttle()` function) repetitively, the governor response
+won't be very good.  Note that this is not particular to this
+governor, step-wise will also misbehave if you call its throttle()
+faster than the normal thermal framework tick (due to interrupts for
+example) as it will overreact.
diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst
new file mode 100644
index 000000000000..fab2c9b36d08
--- /dev/null
+++ b/Documentation/driver-api/thermal/sysfs-api.rst
@@ -0,0 +1,798 @@
+===================================
+Generic Thermal Sysfs driver How To
+===================================
+
+Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
+
+Updated: 2 January 2008
+
+Copyright (c)  2008 Intel Corporation
+
+
+0. Introduction
+===============
+
+The generic thermal sysfs provides a set of interfaces for thermal zone
+devices (sensors) and thermal cooling devices (fan, processor...) to register
+with the thermal management solution and to be a part of it.
+
+This how-to focuses on enabling new thermal zone and cooling devices to
+participate in thermal management.
+This solution is platform independent and any type of thermal zone devices
+and cooling devices should be able to make use of the infrastructure.
+
+The main task of the thermal sysfs driver is to expose thermal zone attributes
+as well as cooling device attributes to the user space.
+An intelligent thermal management application can make decisions based on
+inputs from thermal zone attributes (the current temperature and trip point
+temperature) and throttle appropriate devices.
+
+- `[0-*]`	denotes any positive number starting from 0
+- `[1-*]`	denotes any positive number starting from 1
+
+1. thermal sysfs driver interface functions
+===========================================
+
+1.1 thermal zone device interface
+---------------------------------
+
+    ::
+
+	struct thermal_zone_device
+	*thermal_zone_device_register(char *type,
+				      int trips, int mask, void *devdata,
+				      struct thermal_zone_device_ops *ops,
+				      const struct thermal_zone_params *tzp,
+				      int passive_delay, int polling_delay))
+
+    This interface function adds a new thermal zone device (sensor) to
+    /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the
+    thermal cooling devices registered at the same time.
+
+    type:
+	the thermal zone type.
+    trips:
+	the total number of trip points this thermal zone supports.
+    mask:
+	Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
+    devdata:
+	device private data
+    ops:
+	thermal zone device call-backs.
+
+	.bind:
+		bind the thermal zone device with a thermal cooling device.
+	.unbind:
+		unbind the thermal zone device with a thermal cooling device.
+	.get_temp:
+		get the current temperature of the thermal zone.
+	.set_trips:
+		    set the trip points window. Whenever the current temperature
+		    is updated, the trip points immediately below and above the
+		    current temperature are found.
+	.get_mode:
+		   get the current mode (enabled/disabled) of the thermal zone.
+
+			- "enabled" means the kernel thermal management is
+			  enabled.
+			- "disabled" will prevent kernel thermal driver action
+			  upon trip points so that user applications can take
+			  charge of thermal management.
+	.set_mode:
+		set the mode (enabled/disabled) of the thermal zone.
+	.get_trip_type:
+		get the type of certain trip point.
+	.get_trip_temp:
+			get the temperature above which the certain trip point
+			will be fired.
+	.set_emul_temp:
+			set the emulation temperature which helps in debugging
+			different threshold temperature points.
+    tzp:
+	thermal zone platform parameters.
+    passive_delay:
+	number of milliseconds to wait between polls when
+	performing passive cooling.
+    polling_delay:
+	number of milliseconds to wait between polls when checking
+	whether trip points have been crossed (0 for interrupt driven systems).
+
+    ::
+
+	void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+
+    This interface function removes the thermal zone device.
+    It deletes the corresponding entry from /sys/class/thermal folder and
+    unbinds all the thermal cooling devices it uses.
+
+	::
+
+	   struct thermal_zone_device
+	   *thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
+				void *data,
+				const struct thermal_zone_of_device_ops *ops)
+
+	This interface adds a new sensor to a DT thermal zone.
+	This function will search the list of thermal zones described in
+	device tree and look for the zone that refer to the sensor device
+	pointed by dev->of_node as temperature providers. For the zone
+	pointing to the sensor node, the sensor will be added to the DT
+	thermal zone device.
+
+	The parameters for this interface are:
+
+	dev:
+			Device node of sensor containing valid node pointer in
+			dev->of_node.
+	sensor_id:
+			a sensor identifier, in case the sensor IP has more
+			than one sensors
+	data:
+			a private pointer (owned by the caller) that will be
+			passed back, when a temperature reading is needed.
+	ops:
+			`struct thermal_zone_of_device_ops *`.
+
+			==============  =======================================
+			get_temp	a pointer to a function that reads the
+					sensor temperature. This is mandatory
+					callback provided by sensor driver.
+			set_trips	a pointer to a function that sets a
+					temperature window. When this window is
+					left the driver must inform the thermal
+					core via thermal_zone_device_update.
+			get_trend 	a pointer to a function that reads the
+					sensor temperature trend.
+			set_emul_temp	a pointer to a function that sets
+					sensor emulated temperature.
+			==============  =======================================
+
+	The thermal zone temperature is provided by the get_temp() function
+	pointer of thermal_zone_of_device_ops. When called, it will
+	have the private pointer @data back.
+
+	It returns error pointer if fails otherwise valid thermal zone device
+	handle. Caller should check the return handle with IS_ERR() for finding
+	whether success or not.
+
+	::
+
+	    void thermal_zone_of_sensor_unregister(struct device *dev,
+						   struct thermal_zone_device *tzd)
+
+	This interface unregisters a sensor from a DT thermal zone which was
+	successfully added by interface thermal_zone_of_sensor_register().
+	This function removes the sensor callbacks and private data from the
+	thermal zone device registered with thermal_zone_of_sensor_register()
+	interface. It will also silent the zone by remove the .get_temp() and
+	get_trend() thermal zone device callbacks.
+
+	::
+
+	  struct thermal_zone_device
+	  *devm_thermal_zone_of_sensor_register(struct device *dev,
+				int sensor_id,
+				void *data,
+				const struct thermal_zone_of_device_ops *ops)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_register().
+
+	All details of thermal_zone_of_sensor_register() described in
+	section 1.1.3 is applicable here.
+
+	The benefit of using this interface to register sensor is that it
+	is not require to explicitly call thermal_zone_of_sensor_unregister()
+	in error path or during driver unbinding as this is done by driver
+	resource manager.
+
+	::
+
+		void devm_thermal_zone_of_sensor_unregister(struct device *dev,
+						struct thermal_zone_device *tzd)
+
+	This interface is resource managed version of
+	thermal_zone_of_sensor_unregister().
+	All details of thermal_zone_of_sensor_unregister() described in
+	section 1.1.4 is applicable here.
+	Normally this function will not need to be called and the resource
+	management code will ensure that the resource is freed.
+
+	::
+
+		int thermal_zone_get_slope(struct thermal_zone_device *tz)
+
+	This interface is used to read the slope attribute value
+	for the thermal zone device, which might be useful for platform
+	drivers for temperature calculations.
+
+	::
+
+		int thermal_zone_get_offset(struct thermal_zone_device *tz)
+
+	This interface is used to read the offset attribute value
+	for the thermal zone device, which might be useful for platform
+	drivers for temperature calculations.
+
+1.2 thermal cooling device interface
+------------------------------------
+
+
+    ::
+
+	struct thermal_cooling_device
+	*thermal_cooling_device_register(char *name,
+			void *devdata, struct thermal_cooling_device_ops *)
+
+    This interface function adds a new thermal cooling device (fan/processor/...)
+    to /sys/class/thermal/ folder as `cooling_device[0-*]`. It tries to bind itself
+    to all the thermal zone devices registered at the same time.
+
+    name:
+	the cooling device name.
+    devdata:
+	device private data.
+    ops:
+	thermal cooling devices call-backs.
+
+	.get_max_state:
+		get the Maximum throttle state of the cooling device.
+	.get_cur_state:
+		get the Currently requested throttle state of the
+		cooling device.
+	.set_cur_state:
+		set the Current throttle state of the cooling device.
+
+    ::
+
+	void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
+
+    This interface function removes the thermal cooling device.
+    It deletes the corresponding entry from /sys/class/thermal folder and
+    unbinds itself from all the thermal zone devices using it.
+
+1.3 interface for binding a thermal zone device with a thermal cooling device
+-----------------------------------------------------------------------------
+
+    ::
+
+	int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+		int trip, struct thermal_cooling_device *cdev,
+		unsigned long upper, unsigned long lower, unsigned int weight);
+
+    This interface function binds a thermal cooling device to a particular trip
+    point of a thermal zone device.
+
+    This function is usually called in the thermal zone device .bind callback.
+
+    tz:
+	  the thermal zone device
+    cdev:
+	  thermal cooling device
+    trip:
+	  indicates which trip point in this thermal zone the cooling device
+	  is associated with.
+    upper:
+	  the Maximum cooling state for this trip point.
+	  THERMAL_NO_LIMIT means no upper limit,
+	  and the cooling device can be in max_state.
+    lower:
+	  the Minimum cooling state can be used for this trip point.
+	  THERMAL_NO_LIMIT means no lower limit,
+	  and the cooling device can be in cooling state 0.
+    weight:
+	  the influence of this cooling device in this thermal
+	  zone.  See 1.4.1 below for more information.
+
+    ::
+
+	int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+				int trip, struct thermal_cooling_device *cdev);
+
+    This interface function unbinds a thermal cooling device from a particular
+    trip point of a thermal zone device. This function is usually called in
+    the thermal zone device .unbind callback.
+
+    tz:
+	the thermal zone device
+    cdev:
+	thermal cooling device
+    trip:
+	indicates which trip point in this thermal zone the cooling device
+	is associated with.
+
+1.4 Thermal Zone Parameters
+---------------------------
+
+    ::
+
+	struct thermal_bind_params
+
+    This structure defines the following parameters that are used to bind
+    a zone with a cooling device for a particular trip point.
+
+    .cdev:
+	     The cooling device pointer
+    .weight:
+	     The 'influence' of a particular cooling device on this
+	     zone. This is relative to the rest of the cooling
+	     devices. For example, if all cooling devices have a
+	     weight of 1, then they all contribute the same. You can
+	     use percentages if you want, but it's not mandatory. A
+	     weight of 0 means that this cooling device doesn't
+	     contribute to the cooling of this zone unless all cooling
+	     devices have a weight of 0. If all weights are 0, then
+	     they all contribute the same.
+    .trip_mask:
+	       This is a bit mask that gives the binding relation between
+	       this thermal zone and cdev, for a particular trip point.
+	       If nth bit is set, then the cdev and thermal zone are bound
+	       for trip point n.
+    .binding_limits:
+		     This is an array of cooling state limits. Must have
+		     exactly 2 * thermal_zone.number_of_trip_points. It is an
+		     array consisting of tuples <lower-state upper-state> of
+		     state limits. Each trip will be associated with one state
+		     limit tuple when binding. A NULL pointer means
+		     <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> on all trips.
+		     These limits are used when binding a cdev to a trip point.
+    .match:
+	    This call back returns success(0) if the 'tz and cdev' need to
+	    be bound, as per platform data.
+
+    ::
+
+	struct thermal_zone_params
+
+    This structure defines the platform level parameters for a thermal zone.
+    This data, for each thermal zone should come from the platform layer.
+    This is an optional feature where some platforms can choose not to
+    provide this data.
+
+    .governor_name:
+	       Name of the thermal governor used for this zone
+    .no_hwmon:
+	       a boolean to indicate if the thermal to hwmon sysfs interface
+	       is required. when no_hwmon == false, a hwmon sysfs interface
+	       will be created. when no_hwmon == true, nothing will be done.
+	       In case the thermal_zone_params is NULL, the hwmon interface
+	       will be created (for backward compatibility).
+    .num_tbps:
+	       Number of thermal_bind_params entries for this zone
+    .tbp:
+	       thermal_bind_params entries
+
+2. sysfs attributes structure
+=============================
+
+==	================
+RO	read only value
+WO	write only value
+RW	read/write value
+==	================
+
+Thermal sysfs attributes will be represented under /sys/class/thermal.
+Hwmon sysfs I/F extension is also available under /sys/class/hwmon
+if hwmon is compiled in or built as a module.
+
+Thermal zone device sys I/F, created once it's registered::
+
+  /sys/class/thermal/thermal_zone[0-*]:
+    |---type:			Type of the thermal zone
+    |---temp:			Current temperature
+    |---mode:			Working mode of the thermal zone
+    |---policy:			Thermal governor used for this zone
+    |---available_policies:	Available thermal governors for this zone
+    |---trip_point_[0-*]_temp:	Trip point temperature
+    |---trip_point_[0-*]_type:	Trip point type
+    |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
+    |---emul_temp:		Emulated temperature set node
+    |---sustainable_power:      Sustainable dissipatable power
+    |---k_po:                   Proportional term during temperature overshoot
+    |---k_pu:                   Proportional term during temperature undershoot
+    |---k_i:                    PID's integral term in the power allocator gov
+    |---k_d:                    PID's derivative term in the power allocator
+    |---integral_cutoff:        Offset above which errors are accumulated
+    |---slope:                  Slope constant applied as linear extrapolation
+    |---offset:                 Offset constant applied as linear extrapolation
+
+Thermal cooling device sys I/F, created once it's registered::
+
+  /sys/class/thermal/cooling_device[0-*]:
+    |---type:			Type of the cooling device(processor/fan/...)
+    |---max_state:		Maximum cooling state of the cooling device
+    |---cur_state:		Current cooling state of the cooling device
+    |---stats:			Directory containing cooling device's statistics
+    |---stats/reset:		Writing any value resets the statistics
+    |---stats/time_in_state_ms:	Time (msec) spent in various cooling states
+    |---stats/total_trans:	Total number of times cooling state is changed
+    |---stats/trans_table:	Cooing state transition table
+
+
+Then next two dynamic attributes are created/removed in pairs. They represent
+the relationship between a thermal zone and its associated cooling device.
+They are created/removed for each successful execution of
+thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
+
+::
+
+  /sys/class/thermal/thermal_zone[0-*]:
+    |---cdev[0-*]:		[0-*]th cooling device in current thermal zone
+    |---cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
+    |---cdev[0-*]_weight:       Influence of the cooling device in
+				this thermal zone
+
+Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
+the generic thermal driver also creates a hwmon sysfs I/F for each _type_
+of thermal zone device. E.g. the generic thermal driver registers one hwmon
+class device and build the associated hwmon sysfs I/F for all the registered
+ACPI thermal zones.
+
+::
+
+  /sys/class/hwmon/hwmon[0-*]:
+    |---name:			The type of the thermal zone devices
+    |---temp[1-*]_input:	The current temperature of thermal zone [1-*]
+    |---temp[1-*]_critical:	The critical trip point of thermal zone [1-*]
+
+Please read Documentation/hwmon/sysfs-interface.rst for additional information.
+
+Thermal zone attributes
+-----------------------
+
+type
+	Strings which represent the thermal zone type.
+	This is given by thermal zone driver as part of registration.
+	E.g: "acpitz" indicates it's an ACPI thermal device.
+	In order to keep it consistent with hwmon sys attribute; this should
+	be a short, lowercase string, not containing spaces nor dashes.
+	RO, Required
+
+temp
+	Current temperature as reported by thermal zone (sensor).
+	Unit: millidegree Celsius
+	RO, Required
+
+mode
+	One of the predefined values in [enabled, disabled].
+	This file gives information about the algorithm that is currently
+	managing the thermal zone. It can be either default kernel based
+	algorithm or user space application.
+
+	enabled
+			  enable Kernel Thermal management.
+	disabled
+			  Preventing kernel thermal zone driver actions upon
+			  trip points so that user application can take full
+			  charge of the thermal management.
+
+	RW, Optional
+
+policy
+	One of the various thermal governors used for a particular zone.
+
+	RW, Required
+
+available_policies
+	Available thermal governors which can be used for a particular zone.
+
+	RO, Required
+
+`trip_point_[0-*]_temp`
+	The temperature above which trip point will be fired.
+
+	Unit: millidegree Celsius
+
+	RO, Optional
+
+`trip_point_[0-*]_type`
+	Strings which indicate the type of the trip point.
+
+	E.g. it can be one of critical, hot, passive, `active[0-*]` for ACPI
+	thermal zone.
+
+	RO, Optional
+
+`trip_point_[0-*]_hyst`
+	The hysteresis value for a trip point, represented as an integer
+	Unit: Celsius
+	RW, Optional
+
+`cdev[0-*]`
+	Sysfs link to the thermal cooling device node where the sys I/F
+	for cooling device throttling control represents.
+
+	RO, Optional
+
+`cdev[0-*]_trip_point`
+	The trip point in this thermal zone which `cdev[0-*]` is associated
+	with; -1 means the cooling device is not associated with any trip
+	point.
+
+	RO, Optional
+
+`cdev[0-*]_weight`
+	The influence of `cdev[0-*]` in this thermal zone. This value
+	is relative to the rest of cooling devices in the thermal
+	zone. For example, if a cooling device has a weight double
+	than that of other, it's twice as effective in cooling the
+	thermal zone.
+
+	RW, Optional
+
+passive
+	Attribute is only present for zones in which the passive cooling
+	policy is not supported by native thermal driver. Default is zero
+	and can be set to a temperature (in millidegrees) to enable a
+	passive trip point for the zone. Activation is done by polling with
+	an interval of 1 second.
+
+	Unit: millidegrees Celsius
+
+	Valid values: 0 (disabled) or greater than 1000
+
+	RW, Optional
+
+emul_temp
+	Interface to set the emulated temperature method in thermal zone
+	(sensor). After setting this temperature, the thermal zone may pass
+	this temperature to platform emulation function if registered or
+	cache it locally. This is useful in debugging different temperature
+	threshold and its associated cooling action. This is write only node
+	and writing 0 on this node should disable emulation.
+	Unit: millidegree Celsius
+
+	WO, Optional
+
+	  WARNING:
+	    Be careful while enabling this option on production systems,
+	    because userland can easily disable the thermal policy by simply
+	    flooding this sysfs node with low temperature values.
+
+sustainable_power
+	An estimate of the sustained power that can be dissipated by
+	the thermal zone. Used by the power allocator governor. For
+	more information see Documentation/driver-api/thermal/power_allocator.rst
+
+	Unit: milliwatts
+
+	RW, Optional
+
+k_po
+	The proportional term of the power allocator governor's PID
+	controller during temperature overshoot. Temperature overshoot
+	is when the current temperature is above the "desired
+	temperature" trip point. For more information see
+	Documentation/driver-api/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_pu
+	The proportional term of the power allocator governor's PID
+	controller during temperature undershoot. Temperature undershoot
+	is when the current temperature is below the "desired
+	temperature" trip point. For more information see
+	Documentation/driver-api/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_i
+	The integral term of the power allocator governor's PID
+	controller. This term allows the PID controller to compensate
+	for long term drift. For more information see
+	Documentation/driver-api/thermal/power_allocator.rst
+
+	RW, Optional
+
+k_d
+	The derivative term of the power allocator governor's PID
+	controller. For more information see
+	Documentation/driver-api/thermal/power_allocator.rst
+
+	RW, Optional
+
+integral_cutoff
+	Temperature offset from the desired temperature trip point
+	above which the integral term of the power allocator
+	governor's PID controller starts accumulating errors. For
+	example, if integral_cutoff is 0, then the integral term only
+	accumulates error when temperature is above the desired
+	temperature trip point. For more information see
+	Documentation/driver-api/thermal/power_allocator.rst
+
+	Unit: millidegree Celsius
+
+	RW, Optional
+
+slope
+	The slope constant used in a linear extrapolation model
+	to determine a hotspot temperature based off the sensor's
+	raw readings. It is up to the device driver to determine
+	the usage of these values.
+
+	RW, Optional
+
+offset
+	The offset constant used in a linear extrapolation model
+	to determine a hotspot temperature based off the sensor's
+	raw readings. It is up to the device driver to determine
+	the usage of these values.
+
+	RW, Optional
+
+Cooling device attributes
+-------------------------
+
+type
+	String which represents the type of device, e.g:
+
+	- for generic ACPI: should be "Fan", "Processor" or "LCD"
+	- for memory controller device on intel_menlow platform:
+	  should be "Memory controller".
+
+	RO, Required
+
+max_state
+	The maximum permissible cooling state of this cooling device.
+
+	RO, Required
+
+cur_state
+	The current cooling state of this cooling device.
+	The value can any integer numbers between 0 and max_state:
+
+	- cur_state == 0 means no cooling
+	- cur_state == max_state means the maximum cooling.
+
+	RW, Required
+
+stats/reset
+	Writing any value resets the cooling device's statistics.
+	WO, Required
+
+stats/time_in_state_ms:
+	The amount of time spent by the cooling device in various cooling
+	states. The output will have "<state> <time>" pair in each line, which
+	will mean this cooling device spent <time> msec of time at <state>.
+	Output will have one line for each of the supported states.  usertime
+	units here is 10mS (similar to other time exported in /proc).
+	RO, Required
+
+
+stats/total_trans:
+	A single positive value showing the total number of times the state of a
+	cooling device is changed.
+
+	RO, Required
+
+stats/trans_table:
+	This gives fine grained information about all the cooling state
+	transitions. The cat output here is a two dimensional matrix, where an
+	entry <i,j> (row i, column j) represents the number of transitions from
+	State_i to State_j. If the transition table is bigger than PAGE_SIZE,
+	reading this will return an -EFBIG error.
+	RO, Required
+
+3. A simple implementation
+==========================
+
+ACPI thermal zone may support multiple trip points like critical, hot,
+passive, active. If an ACPI thermal zone supports critical, passive,
+active[0] and active[1] at the same time, it may register itself as a
+thermal_zone_device (thermal_zone1) with 4 trip points in all.
+It has one processor and one fan, which are both registered as
+thermal_cooling_device. Both are considered to have the same
+effectiveness in cooling the thermal zone.
+
+If the processor is listed in _PSL method, and the fan is listed in _AL0
+method, the sys I/F structure will be built like this::
+
+ /sys/class/thermal:
+  |thermal_zone1:
+    |---type:			acpitz
+    |---temp:			37000
+    |---mode:			enabled
+    |---policy:			step_wise
+    |---available_policies:	step_wise fair_share
+    |---trip_point_0_temp:	100000
+    |---trip_point_0_type:	critical
+    |---trip_point_1_temp:	80000
+    |---trip_point_1_type:	passive
+    |---trip_point_2_temp:	70000
+    |---trip_point_2_type:	active0
+    |---trip_point_3_temp:	60000
+    |---trip_point_3_type:	active1
+    |---cdev0:			--->/sys/class/thermal/cooling_device0
+    |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
+    |---cdev0_weight:           1024
+    |---cdev1:			--->/sys/class/thermal/cooling_device3
+    |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
+    |---cdev1_weight:           1024
+
+  |cooling_device0:
+    |---type:			Processor
+    |---max_state:		8
+    |---cur_state:		0
+
+  |cooling_device3:
+    |---type:			Fan
+    |---max_state:		2
+    |---cur_state:		0
+
+ /sys/class/hwmon:
+  |hwmon0:
+    |---name:			acpitz
+    |---temp1_input:		37000
+    |---temp1_crit:		100000
+
+4. Event Notification
+=====================
+
+The framework includes a simple notification mechanism, in the form of a
+netlink event. Netlink socket initialization is done during the _init_
+of the framework. Drivers which intend to use the notification mechanism
+just need to call thermal_generate_netlink_event() with two arguments viz
+(originator, event). The originator is a pointer to struct thermal_zone_device
+from where the event has been originated. An integer which represents the
+thermal zone device will be used in the message to identify the zone. The
+event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
+THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
+crosses any of the configured thresholds.
+
+5. Export Symbol APIs
+=====================
+
+5.1. get_tz_trend
+-----------------
+
+This function returns the trend of a thermal zone, i.e the rate of change
+of temperature of the thermal zone. Ideally, the thermal sensor drivers
+are supposed to implement the callback. If they don't, the thermal
+framework calculated the trend by comparing the previous and the current
+temperature values.
+
+5.2. get_thermal_instance
+-------------------------
+
+This function returns the thermal_instance corresponding to a given
+{thermal_zone, cooling_device, trip_point} combination. Returns NULL
+if such an instance does not exist.
+
+5.3. thermal_notify_framework
+-----------------------------
+
+This function handles the trip events from sensor drivers. It starts
+throttling the cooling devices according to the policy configured.
+For CRITICAL and HOT trip points, this notifies the respective drivers,
+and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
+The throttling policy is based on the configured platform data; if no
+platform data is provided, this uses the step_wise throttling policy.
+
+5.4. thermal_cdev_update
+------------------------
+
+This function serves as an arbitrator to set the state of a cooling
+device. It sets the cooling device to the deepest cooling state if
+possible.
+
+6. thermal_emergency_poweroff
+=============================
+
+On an event of critical trip temperature crossing. Thermal framework
+allows the system to shutdown gracefully by calling orderly_poweroff().
+In the event of a failure of orderly_poweroff() to shut down the system
+we are in danger of keeping the system alive at undesirably high
+temperatures. To mitigate this high risk scenario we program a work
+queue to fire after a pre-determined number of seconds to start
+an emergency shutdown of the device using the kernel_power_off()
+function. In case kernel_power_off() fails then finally
+emergency_restart() is called in the worst case.
+
+The delay should be carefully profiled so as to give adequate time for
+orderly_poweroff(). In case of failure of an orderly_poweroff() the
+emergency poweroff kicks in after the delay has elapsed and shuts down
+the system.
+
+If set to 0 emergency poweroff will not be supported. So a carefully
+profiled non-zero positive value is a must for emergerncy poweroff to be
+triggered.
diff --git a/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst b/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst
new file mode 100644
index 000000000000..2ac42ccd236f
--- /dev/null
+++ b/Documentation/driver-api/thermal/x86_pkg_temperature_thermal.rst
@@ -0,0 +1,55 @@
+===================================
+Kernel driver: x86_pkg_temp_thermal
+===================================
+
+Supported chips:
+
+* x86: with package level thermal management
+
+(Verify using: CPUID.06H:EAX[bit 6] =1)
+
+Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Reference
+---------
+
+Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
+Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
+
+Description
+-----------
+
+This driver register CPU digital temperature package level sensor as a thermal
+zone with maximum two user mode configurable trip points. Number of trip points
+depends on the capability of the package. Once the trip point is violated,
+user mode can receive notification via thermal notification mechanism and can
+take any action to control temperature.
+
+
+Threshold management
+--------------------
+Each package will register as a thermal zone under /sys/class/thermal.
+
+Example::
+
+	/sys/class/thermal/thermal_zone1
+
+This contains two trip points:
+
+- trip_point_0_temp
+- trip_point_1_temp
+
+User can set any temperature between 0 to TJ-Max temperature. Temperature units
+are in milli-degree Celsius. Refer to "Documentation/driver-api/thermal/sysfs-api.rst" for
+thermal sys-fs details.
+
+Any value other than 0 in these trip points, can trigger thermal notifications.
+Setting 0, stops sending thermal notifications.
+
+Thermal notifications:
+To get kobject-uevent notifications, set the thermal zone
+policy to "user_space".
+
+For example::
+
+	echo -n "user_space" > policy
diff --git a/Documentation/thermal/cpu-cooling-api.rst b/Documentation/thermal/cpu-cooling-api.rst
deleted file mode 100644
index 645d914c45a6..000000000000
--- a/Documentation/thermal/cpu-cooling-api.rst
+++ /dev/null
@@ -1,107 +0,0 @@
-=======================
-CPU cooling APIs How To
-=======================
-
-Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
-
-Updated: 6 Jan 2015
-
-Copyright (c)  2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
-
-0. Introduction
-===============
-
-The generic cpu cooling(freq clipping) provides registration/unregistration APIs
-to the caller. The binding of the cooling devices to the trip point is left for
-the user. The registration APIs returns the cooling device pointer.
-
-1. cpu cooling APIs
-===================
-
-1.1 cpufreq registration/unregistration APIs
---------------------------------------------
-
-    ::
-
-	struct thermal_cooling_device
-	*cpufreq_cooling_register(struct cpumask *clip_cpus)
-
-    This interface function registers the cpufreq cooling device with the name
-    "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
-    cooling devices.
-
-   clip_cpus:
-	cpumask of cpus where the frequency constraints will happen.
-
-    ::
-
-	struct thermal_cooling_device
-	*of_cpufreq_cooling_register(struct cpufreq_policy *policy)
-
-    This interface function registers the cpufreq cooling device with
-    the name "thermal-cpufreq-%x" linking it with a device tree node, in
-    order to bind it via the thermal DT code. This api can support multiple
-    instances of cpufreq cooling devices.
-
-    policy:
-	CPUFreq policy.
-
-
-    ::
-
-	void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
-
-    This interface function unregisters the "thermal-cpufreq-%x" cooling device.
-
-    cdev: Cooling device pointer which has to be unregistered.
-
-2. Power models
-===============
-
-The power API registration functions provide a simple power model for
-CPUs.  The current power is calculated as dynamic power (static power isn't
-supported currently).  This power model requires that the operating-points of
-the CPUs are registered using the kernel's opp library and the
-`cpufreq_frequency_table` is assigned to the `struct device` of the
-cpu.  If you are using CONFIG_CPUFREQ_DT then the
-`cpufreq_frequency_table` should already be assigned to the cpu
-device.
-
-The dynamic power consumption of a processor depends on many factors.
-For a given processor implementation the primary factors are:
-
-- The time the processor spends running, consuming dynamic power, as
-  compared to the time in idle states where dynamic consumption is
-  negligible.  Herein we refer to this as 'utilisation'.
-- The voltage and frequency levels as a result of DVFS.  The DVFS
-  level is a dominant factor governing power consumption.
-- In running time the 'execution' behaviour (instruction types, memory
-  access patterns and so forth) causes, in most cases, a second order
-  variation.  In pathological cases this variation can be significant,
-  but typically it is of a much lesser impact than the factors above.
-
-A high level dynamic power consumption model may then be represented as::
-
-	Pdyn = f(run) * Voltage^2 * Frequency * Utilisation
-
-f(run) here represents the described execution behaviour and its
-result has a units of Watts/Hz/Volt^2 (this often expressed in
-mW/MHz/uVolt^2)
-
-The detailed behaviour for f(run) could be modelled on-line.  However,
-in practice, such an on-line model has dependencies on a number of
-implementation specific processor support and characterisation
-factors.  Therefore, in initial implementation that contribution is
-represented as a constant coefficient.  This is a simplification
-consistent with the relative contribution to overall power variation.
-
-In this simplified representation our model becomes::
-
-	Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation
-
-Where `capacitance` is a constant that represents an indicative
-running time dynamic power coefficient in fundamental units of
-mW/MHz/uVolt^2.  Typical values for mobile CPUs might lie in range
-from 100 to 500.  For reference, the approximate values for the SoC in
-ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and
-140 for the Cortex-A53 cluster.
diff --git a/Documentation/thermal/exynos_thermal.rst b/Documentation/thermal/exynos_thermal.rst
deleted file mode 100644
index 5bd556566c70..000000000000
--- a/Documentation/thermal/exynos_thermal.rst
+++ /dev/null
@@ -1,90 +0,0 @@
-========================
-Kernel driver exynos_tmu
-========================
-
-Supported chips:
-
-* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
-
-  Datasheet: Not publicly available
-
-Authors: Donggeun Kim <dg77.kim@samsung.com>
-Authors: Amit Daniel <amit.daniel@samsung.com>
-
-TMU controller Description:
----------------------------
-
-This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
-
-The chip only exposes the measured 8-bit temperature code value
-through a register.
-Temperature can be taken from the temperature code.
-There are three equations converting from temperature to temperature code.
-
-The three equations are:
-  1. Two point trimming::
-
-	Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1
-
-  2. One point trimming::
-
-	Tc = T + TI1 - 25
-
-  3. No trimming::
-
-	Tc = T + 50
-
-  Tc:
-       Temperature code, T: Temperature,
-  TI1:
-       Trimming info for 25 degree Celsius (stored at TRIMINFO register)
-       Temperature code measured at 25 degree Celsius which is unchanged
-  TI2:
-       Trimming info for 85 degree Celsius (stored at TRIMINFO register)
-       Temperature code measured at 85 degree Celsius which is unchanged
-
-TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
-when temperature exceeds pre-defined levels.
-The maximum number of configurable threshold is five.
-The threshold levels are defined as follows::
-
-  Level_0: current temperature > trigger_level_0 + threshold
-  Level_1: current temperature > trigger_level_1 + threshold
-  Level_2: current temperature > trigger_level_2 + threshold
-  Level_3: current temperature > trigger_level_3 + threshold
-
-The threshold and each trigger_level are set
-through the corresponding registers.
-
-When an interrupt occurs, this driver notify kernel thermal framework
-with the function exynos_report_trigger.
-Although an interrupt condition for level_0 can be set,
-it can be used to synchronize the cooling action.
-
-TMU driver description:
------------------------
-
-The exynos thermal driver is structured as::
-
-					Kernel Core thermal framework
-				(thermal_core.c, step_wise.c, cpu_cooling.c)
-								^
-								|
-								|
-  TMU configuration data -----> TMU Driver  <----> Exynos Core thermal wrapper
-  (exynos_tmu_data.c)	      (exynos_tmu.c)	   (exynos_thermal_common.c)
-  (exynos_tmu_data.h)	      (exynos_tmu.h)	   (exynos_thermal_common.h)
-
-a) TMU configuration data:
-		This consist of TMU register offsets/bitfields
-		described through structure exynos_tmu_registers. Also several
-		other platform data (struct exynos_tmu_platform_data) members
-		are used to configure the TMU.
-b) TMU driver:
-		This component initialises the TMU controller and sets different
-		thresholds. It invokes core thermal implementation with the call
-		exynos_report_trigger.
-c) Exynos Core thermal wrapper:
-		This provides 3 wrapper function to use the
-		Kernel core thermal framework. They are exynos_unregister_thermal,
-		exynos_register_thermal and exynos_report_trigger.
diff --git a/Documentation/thermal/exynos_thermal_emulation.rst b/Documentation/thermal/exynos_thermal_emulation.rst
deleted file mode 100644
index c21d10838bc5..000000000000
--- a/Documentation/thermal/exynos_thermal_emulation.rst
+++ /dev/null
@@ -1,61 +0,0 @@
-=====================
-Exynos Emulation Mode
-=====================
-
-Copyright (C) 2012 Samsung Electronics
-
-Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
-
-Description
------------
-
-Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal
-management unit. Thermal emulation mode supports software debug for
-TMU's operation. User can set temperature manually with software code
-and TMU will read current temperature from user value not from sensor's
-value.
-
-Enabling CONFIG_THERMAL_EMULATION option will make this support
-available. When it's enabled, sysfs node will be created as
-/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp.
-
-The sysfs node, 'emul_node', will contain value 0 for the initial state.
-When you input any temperature you want to update to sysfs node, it
-automatically enable emulation mode and current temperature will be
-changed into it.
-
-(Exynos also supports user changeable delay time which would be used to
-delay of changing temperature. However, this node only uses same delay
-of real sensing time, 938us.)
-
-Exynos emulation mode requires synchronous of value changing and
-enabling. It means when you want to update the any value of delay or
-next temperature, then you have to enable emulation mode at the same
-time. (Or you have to keep the mode enabling.) If you don't, it fails to
-change the value to updated one and just use last succeessful value
-repeatedly. That's why this node gives users the right to change
-termerpature only. Just one interface makes it more simply to use.
-
-Disabling emulation mode only requires writing value 0 to sysfs node.
-
-::
-
-
-  TEMP	120 |
-	    |
-	100 |
-	    |
-	 80 |
-	    |				 +-----------
-	 60 |      			 |	    |
-	    |		   +-------------|          |
-	 40 |              |         	 |          |
-	    |		   |		 |          |
-	 20 |		   |		 |          +----------
-	    |		   |		 |          |          |
-	  0 |______________|_____________|__________|__________|_________
-		   A		 A	    A		       A     TIME
-		   |<----->|	 |<----->|  |<----->|	       |
-		   | 938us |  	 |	 |  |       |          |
-  emulation   : 0  50	   |  	 70      |  20      |          0
-  current temp:   sensor   50		 70         20	      sensor
diff --git a/Documentation/thermal/index.rst b/Documentation/thermal/index.rst
deleted file mode 100644
index 8c1c00146cad..000000000000
--- a/Documentation/thermal/index.rst
+++ /dev/null
@@ -1,18 +0,0 @@
-:orphan:
-
-=======
-Thermal
-=======
-
-.. toctree::
-   :maxdepth: 1
-
-   cpu-cooling-api
-   sysfs-api
-   power_allocator
-
-   exynos_thermal
-   exynos_thermal_emulation
-   intel_powerclamp
-   nouveau_thermal
-   x86_pkg_temperature_thermal
diff --git a/Documentation/thermal/intel_powerclamp.rst b/Documentation/thermal/intel_powerclamp.rst
deleted file mode 100644
index 3f6dfb0b3ea6..000000000000
--- a/Documentation/thermal/intel_powerclamp.rst
+++ /dev/null
@@ -1,320 +0,0 @@
-=======================
-Intel Powerclamp Driver
-=======================
-
-By:
-  - Arjan van de Ven <arjan@linux.intel.com>
-  - Jacob Pan <jacob.jun.pan@linux.intel.com>
-
-.. Contents:
-
-	(*) Introduction
-	    - Goals and Objectives
-
-	(*) Theory of Operation
-	    - Idle Injection
-	    - Calibration
-
-	(*) Performance Analysis
-	    - Effectiveness and Limitations
-	    - Power vs Performance
-	    - Scalability
-	    - Calibration
-	    - Comparison with Alternative Techniques
-
-	(*) Usage and Interfaces
-	    - Generic Thermal Layer (sysfs)
-	    - Kernel APIs (TBD)
-
-INTRODUCTION
-============
-
-Consider the situation where a system’s power consumption must be
-reduced at runtime, due to power budget, thermal constraint, or noise
-level, and where active cooling is not preferred. Software managed
-passive power reduction must be performed to prevent the hardware
-actions that are designed for catastrophic scenarios.
-
-Currently, P-states, T-states (clock modulation), and CPU offlining
-are used for CPU throttling.
-
-On Intel CPUs, C-states provide effective power reduction, but so far
-they’re only used opportunistically, based on workload. With the
-development of intel_powerclamp driver, the method of synchronizing
-idle injection across all online CPU threads was introduced. The goal
-is to achieve forced and controllable C-state residency.
-
-Test/Analysis has been made in the areas of power, performance,
-scalability, and user experience. In many cases, clear advantage is
-shown over taking the CPU offline or modulating the CPU clock.
-
-
-THEORY OF OPERATION
-===================
-
-Idle Injection
---------------
-
-On modern Intel processors (Nehalem or later), package level C-state
-residency is available in MSRs, thus also available to the kernel.
-
-These MSRs are::
-
-      #define MSR_PKG_C2_RESIDENCY      0x60D
-      #define MSR_PKG_C3_RESIDENCY      0x3F8
-      #define MSR_PKG_C6_RESIDENCY      0x3F9
-      #define MSR_PKG_C7_RESIDENCY      0x3FA
-
-If the kernel can also inject idle time to the system, then a
-closed-loop control system can be established that manages package
-level C-state. The intel_powerclamp driver is conceived as such a
-control system, where the target set point is a user-selected idle
-ratio (based on power reduction), and the error is the difference
-between the actual package level C-state residency ratio and the target idle
-ratio.
-
-Injection is controlled by high priority kernel threads, spawned for
-each online CPU.
-
-These kernel threads, with SCHED_FIFO class, are created to perform
-clamping actions of controlled duty ratio and duration. Each per-CPU
-thread synchronizes its idle time and duration, based on the rounding
-of jiffies, so accumulated errors can be prevented to avoid a jittery
-effect. Threads are also bound to the CPU such that they cannot be
-migrated, unless the CPU is taken offline. In this case, threads
-belong to the offlined CPUs will be terminated immediately.
-
-Running as SCHED_FIFO and relatively high priority, also allows such
-scheme to work for both preemptable and non-preemptable kernels.
-Alignment of idle time around jiffies ensures scalability for HZ
-values. This effect can be better visualized using a Perf timechart.
-The following diagram shows the behavior of kernel thread
-kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
-for a given "duration", then relinquishes the CPU to other tasks,
-until the next time interval.
-
-The NOHZ schedule tick is disabled during idle time, but interrupts
-are not masked. Tests show that the extra wakeups from scheduler tick
-have a dramatic impact on the effectiveness of the powerclamp driver
-on large scale systems (Westmere system with 80 processors).
-
-::
-
-  CPU0
-		    ____________          ____________
-  kidle_inject/0   |   sleep    |  mwait |  sleep     |
-	  _________|            |________|            |_______
-				 duration
-  CPU1
-		    ____________          ____________
-  kidle_inject/1   |   sleep    |  mwait |  sleep     |
-	  _________|            |________|            |_______
-				^
-				|
-				|
-				roundup(jiffies, interval)
-
-Only one CPU is allowed to collect statistics and update global
-control parameters. This CPU is referred to as the controlling CPU in
-this document. The controlling CPU is elected at runtime, with a
-policy that favors BSP, taking into account the possibility of a CPU
-hot-plug.
-
-In terms of dynamics of the idle control system, package level idle
-time is considered largely as a non-causal system where its behavior
-cannot be based on the past or current input. Therefore, the
-intel_powerclamp driver attempts to enforce the desired idle time
-instantly as given input (target idle ratio). After injection,
-powerclamp monitors the actual idle for a given time window and adjust
-the next injection accordingly to avoid over/under correction.
-
-When used in a causal control system, such as a temperature control,
-it is up to the user of this driver to implement algorithms where
-past samples and outputs are included in the feedback. For example, a
-PID-based thermal controller can use the powerclamp driver to
-maintain a desired target temperature, based on integral and
-derivative gains of the past samples.
-
-
-
-Calibration
------------
-During scalability testing, it is observed that synchronized actions
-among CPUs become challenging as the number of cores grows. This is
-also true for the ability of a system to enter package level C-states.
-
-To make sure the intel_powerclamp driver scales well, online
-calibration is implemented. The goals for doing such a calibration
-are:
-
-a) determine the effective range of idle injection ratio
-b) determine the amount of compensation needed at each target ratio
-
-Compensation to each target ratio consists of two parts:
-
-	a) steady state error compensation
-	This is to offset the error occurring when the system can
-	enter idle without extra wakeups (such as external interrupts).
-
-	b) dynamic error compensation
-	When an excessive amount of wakeups occurs during idle, an
-	additional idle ratio can be added to quiet interrupts, by
-	slowing down CPU activities.
-
-A debugfs file is provided for the user to examine compensation
-progress and results, such as on a Westmere system::
-
-  [jacob@nex01 ~]$ cat
-  /sys/kernel/debug/intel_powerclamp/powerclamp_calib
-  controlling cpu: 0
-  pct confidence steady dynamic (compensation)
-  0       0       0       0
-  1       1       0       0
-  2       1       1       0
-  3       3       1       0
-  4       3       1       0
-  5       3       1       0
-  6       3       1       0
-  7       3       1       0
-  8       3       1       0
-  ...
-  30      3       2       0
-  31      3       2       0
-  32      3       1       0
-  33      3       2       0
-  34      3       1       0
-  35      3       2       0
-  36      3       1       0
-  37      3       2       0
-  38      3       1       0
-  39      3       2       0
-  40      3       3       0
-  41      3       1       0
-  42      3       2       0
-  43      3       1       0
-  44      3       1       0
-  45      3       2       0
-  46      3       3       0
-  47      3       0       0
-  48      3       2       0
-  49      3       3       0
-
-Calibration occurs during runtime. No offline method is available.
-Steady state compensation is used only when confidence levels of all
-adjacent ratios have reached satisfactory level. A confidence level
-is accumulated based on clean data collected at runtime. Data
-collected during a period without extra interrupts is considered
-clean.
-
-To compensate for excessive amounts of wakeup during idle, additional
-idle time is injected when such a condition is detected. Currently,
-we have a simple algorithm to double the injection ratio. A possible
-enhancement might be to throttle the offending IRQ, such as delaying
-EOI for level triggered interrupts. But it is a challenge to be
-non-intrusive to the scheduler or the IRQ core code.
-
-
-CPU Online/Offline
-------------------
-Per-CPU kernel threads are started/stopped upon receiving
-notifications of CPU hotplug activities. The intel_powerclamp driver
-keeps track of clamping kernel threads, even after they are migrated
-to other CPUs, after a CPU offline event.
-
-
-Performance Analysis
-====================
-This section describes the general performance data collected on
-multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
-
-Effectiveness and Limitations
------------------------------
-The maximum range that idle injection is allowed is capped at 50
-percent. As mentioned earlier, since interrupts are allowed during
-forced idle time, excessive interrupts could result in less
-effectiveness. The extreme case would be doing a ping -f to generated
-flooded network interrupts without much CPU acknowledgement. In this
-case, little can be done from the idle injection threads. In most
-normal cases, such as scp a large file, applications can be throttled
-by the powerclamp driver, since slowing down the CPU also slows down
-network protocol processing, which in turn reduces interrupts.
-
-When control parameters change at runtime by the controlling CPU, it
-may take an additional period for the rest of the CPUs to catch up
-with the changes. During this time, idle injection is out of sync,
-thus not able to enter package C- states at the expected ratio. But
-this effect is minor, in that in most cases change to the target
-ratio is updated much less frequently than the idle injection
-frequency.
-
-Scalability
------------
-Tests also show a minor, but measurable, difference between the 4P/8P
-Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
-More compensation is needed on Westmere for the same amount of
-target idle ratio. The compensation also increases as the idle ratio
-gets larger. The above reason constitutes the need for the
-calibration code.
-
-On the IVB 8P system, compared to an offline CPU, powerclamp can
-achieve up to 40% better performance per watt. (measured by a spin
-counter summed over per CPU counting threads spawned for all running
-CPUs).
-
-Usage and Interfaces
-====================
-The powerclamp driver is registered to the generic thermal layer as a
-cooling device. Currently, it’s not bound to any thermal zones::
-
-  jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
-  cur_state:0
-  max_state:50
-  type:intel_powerclamp
-
-cur_state allows user to set the desired idle percentage. Writing 0 to
-cur_state will stop idle injection. Writing a value between 1 and
-max_state will start the idle injection. Reading cur_state returns the
-actual and current idle percentage. This may not be the same value
-set by the user in that current idle percentage depends on workload
-and includes natural idle. When idle injection is disabled, reading
-cur_state returns value -1 instead of 0 which is to avoid confusing
-100% busy state with the disabled state.
-
-Example usage:
-- To inject 25% idle time::
-
-	$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
-
-If the system is not busy and has more than 25% idle time already,
-then the powerclamp driver will not start idle injection. Using Top
-will not show idle injection kernel threads.
-
-If the system is busy (spin test below) and has less than 25% natural
-idle time, powerclamp kernel threads will do idle injection. Forced
-idle time is accounted as normal idle in that common code path is
-taken as the idle task.
-
-In this example, 24.1% idle is shown. This helps the system admin or
-user determine the cause of slowdown, when a powerclamp driver is in action::
-
-
-  Tasks: 197 total,   1 running, 196 sleeping,   0 stopped,   0 zombie
-  Cpu(s): 71.2%us,  4.7%sy,  0.0%ni, 24.1%id,  0.0%wa,  0.0%hi,  0.0%si,  0.0%st
-  Mem:   3943228k total,  1689632k used,  2253596k free,    74960k buffers
-  Swap:  4087804k total,        0k used,  4087804k free,   945336k cached
-
-    PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND
-   3352 jacob     20   0  262m  644  428 S  286  0.0   0:17.16 spin
-   3341 root     -51   0     0    0    0 D   25  0.0   0:01.62 kidle_inject/0
-   3344 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/3
-   3342 root     -51   0     0    0    0 D   25  0.0   0:01.61 kidle_inject/1
-   3343 root     -51   0     0    0    0 D   25  0.0   0:01.60 kidle_inject/2
-   2935 jacob     20   0  696m 125m  35m S    5  3.3   0:31.11 firefox
-   1546 root      20   0  158m  20m 6640 S    3  0.5   0:26.97 Xorg
-   2100 jacob     20   0 1223m  88m  30m S    3  2.3   0:23.68 compiz
-
-Tests have shown that by using the powerclamp driver as a cooling
-device, a PID based userspace thermal controller can manage to
-control CPU temperature effectively, when no other thermal influence
-is added. For example, a UltraBook user can compile the kernel under
-certain temperature (below most active trip points).
diff --git a/Documentation/thermal/nouveau_thermal.rst b/Documentation/thermal/nouveau_thermal.rst
deleted file mode 100644
index 37255fd6735d..000000000000
--- a/Documentation/thermal/nouveau_thermal.rst
+++ /dev/null
@@ -1,96 +0,0 @@
-=====================
-Kernel driver nouveau
-=====================
-
-Supported chips:
-
-* NV43+
-
-Authors: Martin Peres (mupuf) <martin.peres@free.fr>
-
-Description
------------
-
-This driver allows to read the GPU core temperature, drive the GPU fan and
-set temperature alarms.
-
-Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau
-cannot access any of the i2c external monitoring chips it may find. If you
-have one of those, temperature and/or fan management through Nouveau's HWMON
-interface is likely not to work. This document may then not cover your situation
-entirely.
-
-Temperature management
-----------------------
-
-Temperature is exposed under as a read-only HWMON attribute temp1_input.
-
-In order to protect the GPU from overheating, Nouveau supports 4 configurable
-temperature thresholds:
-
- * Fan_boost:
-	Fan speed is set to 100% when reaching this temperature;
- * Downclock:
-	The GPU will be downclocked to reduce its power dissipation;
- * Critical:
-	The GPU is put on hold to further lower power dissipation;
- * Shutdown:
-	Shut the computer down to protect your GPU.
-
-WARNING:
-	Some of these thresholds may not be used by Nouveau depending
-	on your chipset.
-
-The default value for these thresholds comes from the GPU's vbios. These
-thresholds can be configured thanks to the following HWMON attributes:
-
- * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst;
- * Downclock: temp1_max and temp1_max_hyst;
- * Critical: temp1_crit and temp1_crit_hyst;
- * Shutdown: temp1_emergency and temp1_emergency_hyst.
-
-NOTE: Remember that the values are stored as milli degrees Celsius. Don't forget
-to multiply!
-
-Fan management
---------------
-
-Not all cards have a drivable fan. If you do, then the following HWMON
-attributes should be available:
-
- * pwm1_enable:
-	Current fan management mode (NONE, MANUAL or AUTO);
- * pwm1:
-	Current PWM value (power percentage);
- * pwm1_min:
-	The minimum PWM speed allowed;
- * pwm1_max:
-	The maximum PWM speed allowed (bypassed when hitting Fan_boost);
-
-You may also have the following attribute:
-
- * fan1_input:
-	Speed in RPM of your fan.
-
-Your fan can be driven in different modes:
-
- * 0: The fan is left untouched;
- * 1: The fan can be driven in manual (use pwm1 to change the speed);
- * 2; The fan is driven automatically depending on the temperature.
-
-NOTE:
-  Be sure to use the manual mode if you want to drive the fan speed manually
-
-NOTE2:
-  When operating in manual mode outside the vbios-defined
-  [PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate
-  depending on your hardware.
-
-Bug reports
------------
-
-Thermal management on Nouveau is new and may not work on all cards. If you have
-inquiries, please ping mupuf on IRC (#nouveau, freenode).
-
-Bug reports should be filled on Freedesktop's bug tracker. Please follow
-http://nouveau.freedesktop.org/wiki/Bugs
diff --git a/Documentation/thermal/power_allocator.rst b/Documentation/thermal/power_allocator.rst
deleted file mode 100644
index 67b6a3297238..000000000000
--- a/Documentation/thermal/power_allocator.rst
+++ /dev/null
@@ -1,271 +0,0 @@
-=================================
-Power allocator governor tunables
-=================================
-
-Trip points
------------
-
-The governor works optimally with the following two passive trip points:
-
-1.  "switch on" trip point: temperature above which the governor
-    control loop starts operating.  This is the first passive trip
-    point of the thermal zone.
-
-2.  "desired temperature" trip point: it should be higher than the
-    "switch on" trip point.  This the target temperature the governor
-    is controlling for.  This is the last passive trip point of the
-    thermal zone.
-
-PID Controller
---------------
-
-The power allocator governor implements a
-Proportional-Integral-Derivative controller (PID controller) with
-temperature as the control input and power as the controlled output:
-
-    P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power
-
-where
-   -  e = desired_temperature - current_temperature
-   -  err_integral is the sum of previous errors
-   -  diff_err = e - previous_error
-
-It is similar to the one depicted below::
-
-				      k_d
-				       |
-  current_temp                         |
-       |                               v
-       |              +----------+   +---+
-       |       +----->| diff_err |-->| X |------+
-       |       |      +----------+   +---+      |
-       |       |                                |      tdp        actor
-       |       |                      k_i       |       |  get_requested_power()
-       |       |                       |        |       |        |     |
-       |       |                       |        |       |        |     | ...
-       v       |                       v        v       v        v     v
-     +---+     |      +-------+      +---+    +---+   +---+   +----------+
-     | S |-----+----->| sum e |----->| X |--->| S |-->| S |-->|power     |
-     +---+     |      +-------+      +---+    +---+   +---+   |allocation|
-       ^       |                                ^             +----------+
-       |       |                                |                |     |
-       |       |        +---+                   |                |     |
-       |       +------->| X |-------------------+                v     v
-       |                +---+                               granted performance
-  desired_temperature     ^
-			  |
-			  |
-		      k_po/k_pu
-
-Sustainable power
------------------
-
-An estimate of the sustainable dissipatable power (in mW) should be
-provided while registering the thermal zone.  This estimates the
-sustained power that can be dissipated at the desired control
-temperature.  This is the maximum sustained power for allocation at
-the desired maximum temperature.  The actual sustained power can vary
-for a number of reasons.  The closed loop controller will take care of
-variations such as environmental conditions, and some factors related
-to the speed-grade of the silicon.  `sustainable_power` is therefore
-simply an estimate, and may be tuned to affect the aggressiveness of
-the thermal ramp. For reference, the sustainable power of a 4" phone
-is typically 2000mW, while on a 10" tablet is around 4500mW (may vary
-depending on screen size).
-
-If you are using device tree, do add it as a property of the
-thermal-zone.  For example::
-
-	thermal-zones {
-		soc_thermal {
-			polling-delay = <1000>;
-			polling-delay-passive = <100>;
-			sustainable-power = <2500>;
-			...
-
-Instead, if the thermal zone is registered from the platform code, pass a
-`thermal_zone_params` that has a `sustainable_power`.  If no
-`thermal_zone_params` were being passed, then something like below
-will suffice::
-
-	static const struct thermal_zone_params tz_params = {
-		.sustainable_power = 3500,
-	};
-
-and then pass `tz_params` as the 5th parameter to
-`thermal_zone_device_register()`
-
-k_po and k_pu
--------------
-
-The implementation of the PID controller in the power allocator
-thermal governor allows the configuration of two proportional term
-constants: `k_po` and `k_pu`.  `k_po` is the proportional term
-constant during temperature overshoot periods (current temperature is
-above "desired temperature" trip point).  Conversely, `k_pu` is the
-proportional term constant during temperature undershoot periods
-(current temperature below "desired temperature" trip point).
-
-These controls are intended as the primary mechanism for configuring
-the permitted thermal "ramp" of the system.  For instance, a lower
-`k_pu` value will provide a slower ramp, at the cost of capping
-available capacity at a low temperature.  On the other hand, a high
-value of `k_pu` will result in the governor granting very high power
-while temperature is low, and may lead to temperature overshooting.
-
-The default value for `k_pu` is::
-
-    2 * sustainable_power / (desired_temperature - switch_on_temp)
-
-This means that at `switch_on_temp` the output of the controller's
-proportional term will be 2 * `sustainable_power`.  The default value
-for `k_po` is::
-
-    sustainable_power / (desired_temperature - switch_on_temp)
-
-Focusing on the proportional and feed forward values of the PID
-controller equation we have::
-
-    P_max = k_p * e + sustainable_power
-
-The proportional term is proportional to the difference between the
-desired temperature and the current one.  When the current temperature
-is the desired one, then the proportional component is zero and
-`P_max` = `sustainable_power`.  That is, the system should operate in
-thermal equilibrium under constant load.  `sustainable_power` is only
-an estimate, which is the reason for closed-loop control such as this.
-
-Expanding `k_pu` we get::
-
-    P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) +
-	sustainable_power
-
-where:
-
-    - T_set is the desired temperature
-    - T is the current temperature
-    - T_on is the switch on temperature
-
-When the current temperature is the switch_on temperature, the above
-formula becomes::
-
-    P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) +
-	sustainable_power = 2 * sustainable_power + sustainable_power =
-	3 * sustainable_power
-
-Therefore, the proportional term alone linearly decreases power from
-3 * `sustainable_power` to `sustainable_power` as the temperature
-rises from the switch on temperature to the desired temperature.
-
-k_i and integral_cutoff
------------------------
-
-`k_i` configures the PID loop's integral term constant.  This term
-allows the PID controller to compensate for long term drift and for
-the quantized nature of the output control: cooling devices can't set
-the exact power that the governor requests.  When the temperature
-error is below `integral_cutoff`, errors are accumulated in the
-integral term.  This term is then multiplied by `k_i` and the result
-added to the output of the controller.  Typically `k_i` is set low (1
-or 2) and `integral_cutoff` is 0.
-
-k_d
----
-
-`k_d` configures the PID loop's derivative term constant.  It's
-recommended to leave it as the default: 0.
-
-Cooling device power API
-========================
-
-Cooling devices controlled by this governor must supply the additional
-"power" API in their `cooling_device_ops`.  It consists on three ops:
-
-1. ::
-
-    int get_requested_power(struct thermal_cooling_device *cdev,
-			    struct thermal_zone_device *tz, u32 *power);
-
-
-@cdev:
-	The `struct thermal_cooling_device` pointer
-@tz:
-	thermal zone in which we are currently operating
-@power:
-	pointer in which to store the calculated power
-
-`get_requested_power()` calculates the power requested by the device
-in milliwatts and stores it in @power .  It should return 0 on
-success, -E* on failure.  This is currently used by the power
-allocator governor to calculate how much power to give to each cooling
-device.
-
-2. ::
-
-	int state2power(struct thermal_cooling_device *cdev, struct
-			thermal_zone_device *tz, unsigned long state,
-			u32 *power);
-
-@cdev:
-	The `struct thermal_cooling_device` pointer
-@tz:
-	thermal zone in which we are currently operating
-@state:
-	A cooling device state
-@power:
-	pointer in which to store the equivalent power
-
-Convert cooling device state @state into power consumption in
-milliwatts and store it in @power.  It should return 0 on success, -E*
-on failure.  This is currently used by thermal core to calculate the
-maximum power that an actor can consume.
-
-3. ::
-
-	int power2state(struct thermal_cooling_device *cdev, u32 power,
-			unsigned long *state);
-
-@cdev:
-	The `struct thermal_cooling_device` pointer
-@power:
-	power in milliwatts
-@state:
-	pointer in which to store the resulting state
-
-Calculate a cooling device state that would make the device consume at
-most @power mW and store it in @state.  It should return 0 on success,
--E* on failure.  This is currently used by the thermal core to convert
-a given power set by the power allocator governor to a state that the
-cooling device can set.  It is a function because this conversion may
-depend on external factors that may change so this function should the
-best conversion given "current circumstances".
-
-Cooling device weights
-----------------------
-
-Weights are a mechanism to bias the allocation among cooling
-devices.  They express the relative power efficiency of different
-cooling devices.  Higher weight can be used to express higher power
-efficiency.  Weighting is relative such that if each cooling device
-has a weight of one they are considered equal.  This is particularly
-useful in heterogeneous systems where two cooling devices may perform
-the same kind of compute, but with different efficiency.  For example,
-a system with two different types of processors.
-
-If the thermal zone is registered using
-`thermal_zone_device_register()` (i.e., platform code), then weights
-are passed as part of the thermal zone's `thermal_bind_parameters`.
-If the platform is registered using device tree, then they are passed
-as the `contribution` property of each map in the `cooling-maps` node.
-
-Limitations of the power allocator governor
-===========================================
-
-The power allocator governor's PID controller works best if there is a
-periodic tick.  If you have a driver that calls
-`thermal_zone_device_update()` (or anything that ends up calling the
-governor's `throttle()` function) repetitively, the governor response
-won't be very good.  Note that this is not particular to this
-governor, step-wise will also misbehave if you call its throttle()
-faster than the normal thermal framework tick (due to interrupts for
-example) as it will overreact.
diff --git a/Documentation/thermal/sysfs-api.rst b/Documentation/thermal/sysfs-api.rst
deleted file mode 100644
index e4930761d3e5..000000000000
--- a/Documentation/thermal/sysfs-api.rst
+++ /dev/null
@@ -1,798 +0,0 @@
-===================================
-Generic Thermal Sysfs driver How To
-===================================
-
-Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
-
-Updated: 2 January 2008
-
-Copyright (c)  2008 Intel Corporation
-
-
-0. Introduction
-===============
-
-The generic thermal sysfs provides a set of interfaces for thermal zone
-devices (sensors) and thermal cooling devices (fan, processor...) to register
-with the thermal management solution and to be a part of it.
-
-This how-to focuses on enabling new thermal zone and cooling devices to
-participate in thermal management.
-This solution is platform independent and any type of thermal zone devices
-and cooling devices should be able to make use of the infrastructure.
-
-The main task of the thermal sysfs driver is to expose thermal zone attributes
-as well as cooling device attributes to the user space.
-An intelligent thermal management application can make decisions based on
-inputs from thermal zone attributes (the current temperature and trip point
-temperature) and throttle appropriate devices.
-
-- `[0-*]`	denotes any positive number starting from 0
-- `[1-*]`	denotes any positive number starting from 1
-
-1. thermal sysfs driver interface functions
-===========================================
-
-1.1 thermal zone device interface
----------------------------------
-
-    ::
-
-	struct thermal_zone_device
-	*thermal_zone_device_register(char *type,
-				      int trips, int mask, void *devdata,
-				      struct thermal_zone_device_ops *ops,
-				      const struct thermal_zone_params *tzp,
-				      int passive_delay, int polling_delay))
-
-    This interface function adds a new thermal zone device (sensor) to
-    /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the
-    thermal cooling devices registered at the same time.
-
-    type:
-	the thermal zone type.
-    trips:
-	the total number of trip points this thermal zone supports.
-    mask:
-	Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
-    devdata:
-	device private data
-    ops:
-	thermal zone device call-backs.
-
-	.bind:
-		bind the thermal zone device with a thermal cooling device.
-	.unbind:
-		unbind the thermal zone device with a thermal cooling device.
-	.get_temp:
-		get the current temperature of the thermal zone.
-	.set_trips:
-		    set the trip points window. Whenever the current temperature
-		    is updated, the trip points immediately below and above the
-		    current temperature are found.
-	.get_mode:
-		   get the current mode (enabled/disabled) of the thermal zone.
-
-			- "enabled" means the kernel thermal management is
-			  enabled.
-			- "disabled" will prevent kernel thermal driver action
-			  upon trip points so that user applications can take
-			  charge of thermal management.
-	.set_mode:
-		set the mode (enabled/disabled) of the thermal zone.
-	.get_trip_type:
-		get the type of certain trip point.
-	.get_trip_temp:
-			get the temperature above which the certain trip point
-			will be fired.
-	.set_emul_temp:
-			set the emulation temperature which helps in debugging
-			different threshold temperature points.
-    tzp:
-	thermal zone platform parameters.
-    passive_delay:
-	number of milliseconds to wait between polls when
-	performing passive cooling.
-    polling_delay:
-	number of milliseconds to wait between polls when checking
-	whether trip points have been crossed (0 for interrupt driven systems).
-
-    ::
-
-	void thermal_zone_device_unregister(struct thermal_zone_device *tz)
-
-    This interface function removes the thermal zone device.
-    It deletes the corresponding entry from /sys/class/thermal folder and
-    unbinds all the thermal cooling devices it uses.
-
-	::
-
-	   struct thermal_zone_device
-	   *thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
-				void *data,
-				const struct thermal_zone_of_device_ops *ops)
-
-	This interface adds a new sensor to a DT thermal zone.
-	This function will search the list of thermal zones described in
-	device tree and look for the zone that refer to the sensor device
-	pointed by dev->of_node as temperature providers. For the zone
-	pointing to the sensor node, the sensor will be added to the DT
-	thermal zone device.
-
-	The parameters for this interface are:
-
-	dev:
-			Device node of sensor containing valid node pointer in
-			dev->of_node.
-	sensor_id:
-			a sensor identifier, in case the sensor IP has more
-			than one sensors
-	data:
-			a private pointer (owned by the caller) that will be
-			passed back, when a temperature reading is needed.
-	ops:
-			`struct thermal_zone_of_device_ops *`.
-
-			==============  =======================================
-			get_temp	a pointer to a function that reads the
-					sensor temperature. This is mandatory
-					callback provided by sensor driver.
-			set_trips	a pointer to a function that sets a
-					temperature window. When this window is
-					left the driver must inform the thermal
-					core via thermal_zone_device_update.
-			get_trend 	a pointer to a function that reads the
-					sensor temperature trend.
-			set_emul_temp	a pointer to a function that sets
-					sensor emulated temperature.
-			==============  =======================================
-
-	The thermal zone temperature is provided by the get_temp() function
-	pointer of thermal_zone_of_device_ops. When called, it will
-	have the private pointer @data back.
-
-	It returns error pointer if fails otherwise valid thermal zone device
-	handle. Caller should check the return handle with IS_ERR() for finding
-	whether success or not.
-
-	::
-
-	    void thermal_zone_of_sensor_unregister(struct device *dev,
-						   struct thermal_zone_device *tzd)
-
-	This interface unregisters a sensor from a DT thermal zone which was
-	successfully added by interface thermal_zone_of_sensor_register().
-	This function removes the sensor callbacks and private data from the
-	thermal zone device registered with thermal_zone_of_sensor_register()
-	interface. It will also silent the zone by remove the .get_temp() and
-	get_trend() thermal zone device callbacks.
-
-	::
-
-	  struct thermal_zone_device
-	  *devm_thermal_zone_of_sensor_register(struct device *dev,
-				int sensor_id,
-				void *data,
-				const struct thermal_zone_of_device_ops *ops)
-
-	This interface is resource managed version of
-	thermal_zone_of_sensor_register().
-
-	All details of thermal_zone_of_sensor_register() described in
-	section 1.1.3 is applicable here.
-
-	The benefit of using this interface to register sensor is that it
-	is not require to explicitly call thermal_zone_of_sensor_unregister()
-	in error path or during driver unbinding as this is done by driver
-	resource manager.
-
-	::
-
-		void devm_thermal_zone_of_sensor_unregister(struct device *dev,
-						struct thermal_zone_device *tzd)
-
-	This interface is resource managed version of
-	thermal_zone_of_sensor_unregister().
-	All details of thermal_zone_of_sensor_unregister() described in
-	section 1.1.4 is applicable here.
-	Normally this function will not need to be called and the resource
-	management code will ensure that the resource is freed.
-
-	::
-
-		int thermal_zone_get_slope(struct thermal_zone_device *tz)
-
-	This interface is used to read the slope attribute value
-	for the thermal zone device, which might be useful for platform
-	drivers for temperature calculations.
-
-	::
-
-		int thermal_zone_get_offset(struct thermal_zone_device *tz)
-
-	This interface is used to read the offset attribute value
-	for the thermal zone device, which might be useful for platform
-	drivers for temperature calculations.
-
-1.2 thermal cooling device interface
-------------------------------------
-
-
-    ::
-
-	struct thermal_cooling_device
-	*thermal_cooling_device_register(char *name,
-			void *devdata, struct thermal_cooling_device_ops *)
-
-    This interface function adds a new thermal cooling device (fan/processor/...)
-    to /sys/class/thermal/ folder as `cooling_device[0-*]`. It tries to bind itself
-    to all the thermal zone devices registered at the same time.
-
-    name:
-	the cooling device name.
-    devdata:
-	device private data.
-    ops:
-	thermal cooling devices call-backs.
-
-	.get_max_state:
-		get the Maximum throttle state of the cooling device.
-	.get_cur_state:
-		get the Currently requested throttle state of the
-		cooling device.
-	.set_cur_state:
-		set the Current throttle state of the cooling device.
-
-    ::
-
-	void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
-
-    This interface function removes the thermal cooling device.
-    It deletes the corresponding entry from /sys/class/thermal folder and
-    unbinds itself from all the thermal zone devices using it.
-
-1.3 interface for binding a thermal zone device with a thermal cooling device
------------------------------------------------------------------------------
-
-    ::
-
-	int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
-		int trip, struct thermal_cooling_device *cdev,
-		unsigned long upper, unsigned long lower, unsigned int weight);
-
-    This interface function binds a thermal cooling device to a particular trip
-    point of a thermal zone device.
-
-    This function is usually called in the thermal zone device .bind callback.
-
-    tz:
-	  the thermal zone device
-    cdev:
-	  thermal cooling device
-    trip:
-	  indicates which trip point in this thermal zone the cooling device
-	  is associated with.
-    upper:
-	  the Maximum cooling state for this trip point.
-	  THERMAL_NO_LIMIT means no upper limit,
-	  and the cooling device can be in max_state.
-    lower:
-	  the Minimum cooling state can be used for this trip point.
-	  THERMAL_NO_LIMIT means no lower limit,
-	  and the cooling device can be in cooling state 0.
-    weight:
-	  the influence of this cooling device in this thermal
-	  zone.  See 1.4.1 below for more information.
-
-    ::
-
-	int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
-				int trip, struct thermal_cooling_device *cdev);
-
-    This interface function unbinds a thermal cooling device from a particular
-    trip point of a thermal zone device. This function is usually called in
-    the thermal zone device .unbind callback.
-
-    tz:
-	the thermal zone device
-    cdev:
-	thermal cooling device
-    trip:
-	indicates which trip point in this thermal zone the cooling device
-	is associated with.
-
-1.4 Thermal Zone Parameters
----------------------------
-
-    ::
-
-	struct thermal_bind_params
-
-    This structure defines the following parameters that are used to bind
-    a zone with a cooling device for a particular trip point.
-
-    .cdev:
-	     The cooling device pointer
-    .weight:
-	     The 'influence' of a particular cooling device on this
-	     zone. This is relative to the rest of the cooling
-	     devices. For example, if all cooling devices have a
-	     weight of 1, then they all contribute the same. You can
-	     use percentages if you want, but it's not mandatory. A
-	     weight of 0 means that this cooling device doesn't
-	     contribute to the cooling of this zone unless all cooling
-	     devices have a weight of 0. If all weights are 0, then
-	     they all contribute the same.
-    .trip_mask:
-	       This is a bit mask that gives the binding relation between
-	       this thermal zone and cdev, for a particular trip point.
-	       If nth bit is set, then the cdev and thermal zone are bound
-	       for trip point n.
-    .binding_limits:
-		     This is an array of cooling state limits. Must have
-		     exactly 2 * thermal_zone.number_of_trip_points. It is an
-		     array consisting of tuples <lower-state upper-state> of
-		     state limits. Each trip will be associated with one state
-		     limit tuple when binding. A NULL pointer means
-		     <THERMAL_NO_LIMITS THERMAL_NO_LIMITS> on all trips.
-		     These limits are used when binding a cdev to a trip point.
-    .match:
-	    This call back returns success(0) if the 'tz and cdev' need to
-	    be bound, as per platform data.
-
-    ::
-
-	struct thermal_zone_params
-
-    This structure defines the platform level parameters for a thermal zone.
-    This data, for each thermal zone should come from the platform layer.
-    This is an optional feature where some platforms can choose not to
-    provide this data.
-
-    .governor_name:
-	       Name of the thermal governor used for this zone
-    .no_hwmon:
-	       a boolean to indicate if the thermal to hwmon sysfs interface
-	       is required. when no_hwmon == false, a hwmon sysfs interface
-	       will be created. when no_hwmon == true, nothing will be done.
-	       In case the thermal_zone_params is NULL, the hwmon interface
-	       will be created (for backward compatibility).
-    .num_tbps:
-	       Number of thermal_bind_params entries for this zone
-    .tbp:
-	       thermal_bind_params entries
-
-2. sysfs attributes structure
-=============================
-
-==	================
-RO	read only value
-WO	write only value
-RW	read/write value
-==	================
-
-Thermal sysfs attributes will be represented under /sys/class/thermal.
-Hwmon sysfs I/F extension is also available under /sys/class/hwmon
-if hwmon is compiled in or built as a module.
-
-Thermal zone device sys I/F, created once it's registered::
-
-  /sys/class/thermal/thermal_zone[0-*]:
-    |---type:			Type of the thermal zone
-    |---temp:			Current temperature
-    |---mode:			Working mode of the thermal zone
-    |---policy:			Thermal governor used for this zone
-    |---available_policies:	Available thermal governors for this zone
-    |---trip_point_[0-*]_temp:	Trip point temperature
-    |---trip_point_[0-*]_type:	Trip point type
-    |---trip_point_[0-*]_hyst:	Hysteresis value for this trip point
-    |---emul_temp:		Emulated temperature set node
-    |---sustainable_power:      Sustainable dissipatable power
-    |---k_po:                   Proportional term during temperature overshoot
-    |---k_pu:                   Proportional term during temperature undershoot
-    |---k_i:                    PID's integral term in the power allocator gov
-    |---k_d:                    PID's derivative term in the power allocator
-    |---integral_cutoff:        Offset above which errors are accumulated
-    |---slope:                  Slope constant applied as linear extrapolation
-    |---offset:                 Offset constant applied as linear extrapolation
-
-Thermal cooling device sys I/F, created once it's registered::
-
-  /sys/class/thermal/cooling_device[0-*]:
-    |---type:			Type of the cooling device(processor/fan/...)
-    |---max_state:		Maximum cooling state of the cooling device
-    |---cur_state:		Current cooling state of the cooling device
-    |---stats:			Directory containing cooling device's statistics
-    |---stats/reset:		Writing any value resets the statistics
-    |---stats/time_in_state_ms:	Time (msec) spent in various cooling states
-    |---stats/total_trans:	Total number of times cooling state is changed
-    |---stats/trans_table:	Cooing state transition table
-
-
-Then next two dynamic attributes are created/removed in pairs. They represent
-the relationship between a thermal zone and its associated cooling device.
-They are created/removed for each successful execution of
-thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
-
-::
-
-  /sys/class/thermal/thermal_zone[0-*]:
-    |---cdev[0-*]:		[0-*]th cooling device in current thermal zone
-    |---cdev[0-*]_trip_point:	Trip point that cdev[0-*] is associated with
-    |---cdev[0-*]_weight:       Influence of the cooling device in
-				this thermal zone
-
-Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
-the generic thermal driver also creates a hwmon sysfs I/F for each _type_
-of thermal zone device. E.g. the generic thermal driver registers one hwmon
-class device and build the associated hwmon sysfs I/F for all the registered
-ACPI thermal zones.
-
-::
-
-  /sys/class/hwmon/hwmon[0-*]:
-    |---name:			The type of the thermal zone devices
-    |---temp[1-*]_input:	The current temperature of thermal zone [1-*]
-    |---temp[1-*]_critical:	The critical trip point of thermal zone [1-*]
-
-Please read Documentation/hwmon/sysfs-interface.rst for additional information.
-
-Thermal zone attributes
------------------------
-
-type
-	Strings which represent the thermal zone type.
-	This is given by thermal zone driver as part of registration.
-	E.g: "acpitz" indicates it's an ACPI thermal device.
-	In order to keep it consistent with hwmon sys attribute; this should
-	be a short, lowercase string, not containing spaces nor dashes.
-	RO, Required
-
-temp
-	Current temperature as reported by thermal zone (sensor).
-	Unit: millidegree Celsius
-	RO, Required
-
-mode
-	One of the predefined values in [enabled, disabled].
-	This file gives information about the algorithm that is currently
-	managing the thermal zone. It can be either default kernel based
-	algorithm or user space application.
-
-	enabled
-			  enable Kernel Thermal management.
-	disabled
-			  Preventing kernel thermal zone driver actions upon
-			  trip points so that user application can take full
-			  charge of the thermal management.
-
-	RW, Optional
-
-policy
-	One of the various thermal governors used for a particular zone.
-
-	RW, Required
-
-available_policies
-	Available thermal governors which can be used for a particular zone.
-
-	RO, Required
-
-`trip_point_[0-*]_temp`
-	The temperature above which trip point will be fired.
-
-	Unit: millidegree Celsius
-
-	RO, Optional
-
-`trip_point_[0-*]_type`
-	Strings which indicate the type of the trip point.
-
-	E.g. it can be one of critical, hot, passive, `active[0-*]` for ACPI
-	thermal zone.
-
-	RO, Optional
-
-`trip_point_[0-*]_hyst`
-	The hysteresis value for a trip point, represented as an integer
-	Unit: Celsius
-	RW, Optional
-
-`cdev[0-*]`
-	Sysfs link to the thermal cooling device node where the sys I/F
-	for cooling device throttling control represents.
-
-	RO, Optional
-
-`cdev[0-*]_trip_point`
-	The trip point in this thermal zone which `cdev[0-*]` is associated
-	with; -1 means the cooling device is not associated with any trip
-	point.
-
-	RO, Optional
-
-`cdev[0-*]_weight`
-	The influence of `cdev[0-*]` in this thermal zone. This value
-	is relative to the rest of cooling devices in the thermal
-	zone. For example, if a cooling device has a weight double
-	than that of other, it's twice as effective in cooling the
-	thermal zone.
-
-	RW, Optional
-
-passive
-	Attribute is only present for zones in which the passive cooling
-	policy is not supported by native thermal driver. Default is zero
-	and can be set to a temperature (in millidegrees) to enable a
-	passive trip point for the zone. Activation is done by polling with
-	an interval of 1 second.
-
-	Unit: millidegrees Celsius
-
-	Valid values: 0 (disabled) or greater than 1000
-
-	RW, Optional
-
-emul_temp
-	Interface to set the emulated temperature method in thermal zone
-	(sensor). After setting this temperature, the thermal zone may pass
-	this temperature to platform emulation function if registered or
-	cache it locally. This is useful in debugging different temperature
-	threshold and its associated cooling action. This is write only node
-	and writing 0 on this node should disable emulation.
-	Unit: millidegree Celsius
-
-	WO, Optional
-
-	  WARNING:
-	    Be careful while enabling this option on production systems,
-	    because userland can easily disable the thermal policy by simply
-	    flooding this sysfs node with low temperature values.
-
-sustainable_power
-	An estimate of the sustained power that can be dissipated by
-	the thermal zone. Used by the power allocator governor. For
-	more information see Documentation/thermal/power_allocator.rst
-
-	Unit: milliwatts
-
-	RW, Optional
-
-k_po
-	The proportional term of the power allocator governor's PID
-	controller during temperature overshoot. Temperature overshoot
-	is when the current temperature is above the "desired
-	temperature" trip point. For more information see
-	Documentation/thermal/power_allocator.rst
-
-	RW, Optional
-
-k_pu
-	The proportional term of the power allocator governor's PID
-	controller during temperature undershoot. Temperature undershoot
-	is when the current temperature is below the "desired
-	temperature" trip point. For more information see
-	Documentation/thermal/power_allocator.rst
-
-	RW, Optional
-
-k_i
-	The integral term of the power allocator governor's PID
-	controller. This term allows the PID controller to compensate
-	for long term drift. For more information see
-	Documentation/thermal/power_allocator.rst
-
-	RW, Optional
-
-k_d
-	The derivative term of the power allocator governor's PID
-	controller. For more information see
-	Documentation/thermal/power_allocator.rst
-
-	RW, Optional
-
-integral_cutoff
-	Temperature offset from the desired temperature trip point
-	above which the integral term of the power allocator
-	governor's PID controller starts accumulating errors. For
-	example, if integral_cutoff is 0, then the integral term only
-	accumulates error when temperature is above the desired
-	temperature trip point. For more information see
-	Documentation/thermal/power_allocator.rst
-
-	Unit: millidegree Celsius
-
-	RW, Optional
-
-slope
-	The slope constant used in a linear extrapolation model
-	to determine a hotspot temperature based off the sensor's
-	raw readings. It is up to the device driver to determine
-	the usage of these values.
-
-	RW, Optional
-
-offset
-	The offset constant used in a linear extrapolation model
-	to determine a hotspot temperature based off the sensor's
-	raw readings. It is up to the device driver to determine
-	the usage of these values.
-
-	RW, Optional
-
-Cooling device attributes
--------------------------
-
-type
-	String which represents the type of device, e.g:
-
-	- for generic ACPI: should be "Fan", "Processor" or "LCD"
-	- for memory controller device on intel_menlow platform:
-	  should be "Memory controller".
-
-	RO, Required
-
-max_state
-	The maximum permissible cooling state of this cooling device.
-
-	RO, Required
-
-cur_state
-	The current cooling state of this cooling device.
-	The value can any integer numbers between 0 and max_state:
-
-	- cur_state == 0 means no cooling
-	- cur_state == max_state means the maximum cooling.
-
-	RW, Required
-
-stats/reset
-	Writing any value resets the cooling device's statistics.
-	WO, Required
-
-stats/time_in_state_ms:
-	The amount of time spent by the cooling device in various cooling
-	states. The output will have "<state> <time>" pair in each line, which
-	will mean this cooling device spent <time> msec of time at <state>.
-	Output will have one line for each of the supported states.  usertime
-	units here is 10mS (similar to other time exported in /proc).
-	RO, Required
-
-
-stats/total_trans:
-	A single positive value showing the total number of times the state of a
-	cooling device is changed.
-
-	RO, Required
-
-stats/trans_table:
-	This gives fine grained information about all the cooling state
-	transitions. The cat output here is a two dimensional matrix, where an
-	entry <i,j> (row i, column j) represents the number of transitions from
-	State_i to State_j. If the transition table is bigger than PAGE_SIZE,
-	reading this will return an -EFBIG error.
-	RO, Required
-
-3. A simple implementation
-==========================
-
-ACPI thermal zone may support multiple trip points like critical, hot,
-passive, active. If an ACPI thermal zone supports critical, passive,
-active[0] and active[1] at the same time, it may register itself as a
-thermal_zone_device (thermal_zone1) with 4 trip points in all.
-It has one processor and one fan, which are both registered as
-thermal_cooling_device. Both are considered to have the same
-effectiveness in cooling the thermal zone.
-
-If the processor is listed in _PSL method, and the fan is listed in _AL0
-method, the sys I/F structure will be built like this::
-
- /sys/class/thermal:
-  |thermal_zone1:
-    |---type:			acpitz
-    |---temp:			37000
-    |---mode:			enabled
-    |---policy:			step_wise
-    |---available_policies:	step_wise fair_share
-    |---trip_point_0_temp:	100000
-    |---trip_point_0_type:	critical
-    |---trip_point_1_temp:	80000
-    |---trip_point_1_type:	passive
-    |---trip_point_2_temp:	70000
-    |---trip_point_2_type:	active0
-    |---trip_point_3_temp:	60000
-    |---trip_point_3_type:	active1
-    |---cdev0:			--->/sys/class/thermal/cooling_device0
-    |---cdev0_trip_point:	1	/* cdev0 can be used for passive */
-    |---cdev0_weight:           1024
-    |---cdev1:			--->/sys/class/thermal/cooling_device3
-    |---cdev1_trip_point:	2	/* cdev1 can be used for active[0]*/
-    |---cdev1_weight:           1024
-
-  |cooling_device0:
-    |---type:			Processor
-    |---max_state:		8
-    |---cur_state:		0
-
-  |cooling_device3:
-    |---type:			Fan
-    |---max_state:		2
-    |---cur_state:		0
-
- /sys/class/hwmon:
-  |hwmon0:
-    |---name:			acpitz
-    |---temp1_input:		37000
-    |---temp1_crit:		100000
-
-4. Event Notification
-=====================
-
-The framework includes a simple notification mechanism, in the form of a
-netlink event. Netlink socket initialization is done during the _init_
-of the framework. Drivers which intend to use the notification mechanism
-just need to call thermal_generate_netlink_event() with two arguments viz
-(originator, event). The originator is a pointer to struct thermal_zone_device
-from where the event has been originated. An integer which represents the
-thermal zone device will be used in the message to identify the zone. The
-event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
-THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
-crosses any of the configured thresholds.
-
-5. Export Symbol APIs
-=====================
-
-5.1. get_tz_trend
------------------
-
-This function returns the trend of a thermal zone, i.e the rate of change
-of temperature of the thermal zone. Ideally, the thermal sensor drivers
-are supposed to implement the callback. If they don't, the thermal
-framework calculated the trend by comparing the previous and the current
-temperature values.
-
-5.2. get_thermal_instance
--------------------------
-
-This function returns the thermal_instance corresponding to a given
-{thermal_zone, cooling_device, trip_point} combination. Returns NULL
-if such an instance does not exist.
-
-5.3. thermal_notify_framework
------------------------------
-
-This function handles the trip events from sensor drivers. It starts
-throttling the cooling devices according to the policy configured.
-For CRITICAL and HOT trip points, this notifies the respective drivers,
-and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
-The throttling policy is based on the configured platform data; if no
-platform data is provided, this uses the step_wise throttling policy.
-
-5.4. thermal_cdev_update
-------------------------
-
-This function serves as an arbitrator to set the state of a cooling
-device. It sets the cooling device to the deepest cooling state if
-possible.
-
-6. thermal_emergency_poweroff
-=============================
-
-On an event of critical trip temperature crossing. Thermal framework
-allows the system to shutdown gracefully by calling orderly_poweroff().
-In the event of a failure of orderly_poweroff() to shut down the system
-we are in danger of keeping the system alive at undesirably high
-temperatures. To mitigate this high risk scenario we program a work
-queue to fire after a pre-determined number of seconds to start
-an emergency shutdown of the device using the kernel_power_off()
-function. In case kernel_power_off() fails then finally
-emergency_restart() is called in the worst case.
-
-The delay should be carefully profiled so as to give adequate time for
-orderly_poweroff(). In case of failure of an orderly_poweroff() the
-emergency poweroff kicks in after the delay has elapsed and shuts down
-the system.
-
-If set to 0 emergency poweroff will not be supported. So a carefully
-profiled non-zero positive value is a must for emergerncy poweroff to be
-triggered.
diff --git a/Documentation/thermal/x86_pkg_temperature_thermal.rst b/Documentation/thermal/x86_pkg_temperature_thermal.rst
deleted file mode 100644
index f134dbd3f5a9..000000000000
--- a/Documentation/thermal/x86_pkg_temperature_thermal.rst
+++ /dev/null
@@ -1,55 +0,0 @@
-===================================
-Kernel driver: x86_pkg_temp_thermal
-===================================
-
-Supported chips:
-
-* x86: with package level thermal management
-
-(Verify using: CPUID.06H:EAX[bit 6] =1)
-
-Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
-
-Reference
----------
-
-Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
-Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
-
-Description
------------
-
-This driver register CPU digital temperature package level sensor as a thermal
-zone with maximum two user mode configurable trip points. Number of trip points
-depends on the capability of the package. Once the trip point is violated,
-user mode can receive notification via thermal notification mechanism and can
-take any action to control temperature.
-
-
-Threshold management
---------------------
-Each package will register as a thermal zone under /sys/class/thermal.
-
-Example::
-
-	/sys/class/thermal/thermal_zone1
-
-This contains two trip points:
-
-- trip_point_0_temp
-- trip_point_1_temp
-
-User can set any temperature between 0 to TJ-Max temperature. Temperature units
-are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.rst" for
-thermal sys-fs details.
-
-Any value other than 0 in these trip points, can trigger thermal notifications.
-Setting 0, stops sending thermal notifications.
-
-Thermal notifications:
-To get kobject-uevent notifications, set the thermal zone
-policy to "user_space".
-
-For example::
-
-	echo -n "user_space" > policy
diff --git a/MAINTAINERS b/MAINTAINERS
index 7a07cda43592..6326445952f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15916,7 +15916,7 @@ M:	Viresh Kumar <viresh.kumar@linaro.org>
 M:	Javi Merino <javi.merino@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
-F:	Documentation/thermal/cpu-cooling-api.rst
+F:	Documentation/driver-api/thermal/cpu-cooling-api.rst
 F:	drivers/thermal/cpu_cooling.c
 F:	include/linux/cpu_cooling.h
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 681047f8cc05..e45659c75920 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -251,7 +251,7 @@ struct thermal_bind_params {
 	 * platform characterization. This value is relative to the
 	 * rest of the weights so a cooling device whose weight is
 	 * double that of another cooling device is twice as
-	 * effective. See Documentation/thermal/sysfs-api.rst for more
+	 * effective. See Documentation/driver-api/thermal/sysfs-api.rst for more
 	 * information.
 	 */
 	int weight;
@@ -259,7 +259,7 @@ struct thermal_bind_params {
 	/*
 	 * This is a bit mask that gives the binding relation between this
 	 * thermal zone and cdev, for a particular trip point.
-	 * See Documentation/thermal/sysfs-api.rst for more information.
+	 * See Documentation/driver-api/thermal/sysfs-api.rst for more information.
 	 */
 	int trip_mask;
 
-- 
cgit v1.2.3


From ccf988b66d697efcd0ceccc2398e0d9b909cd17c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 26 Jul 2019 09:51:16 -0300
Subject: docs: i2c: convert to ReST and add to driver-api bookset

Convert each file at I2C subsystem, renaming them to .rst and
adding to the driver-api book.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../devicetree/bindings/i2c/i2c-mux-gpmux.txt      |   2 +-
 Documentation/driver-api/ipmb.rst                  |   2 +-
 Documentation/hwmon/adm1021.rst                    |   2 +-
 Documentation/hwmon/adm1275.rst                    |   2 +-
 Documentation/hwmon/hih6130.rst                    |   2 +-
 Documentation/hwmon/ibm-cffps.rst                  |   2 +-
 Documentation/hwmon/lm25066.rst                    |   2 +-
 Documentation/hwmon/max16064.rst                   |   2 +-
 Documentation/hwmon/max16065.rst                   |   2 +-
 Documentation/hwmon/max20751.rst                   |   2 +-
 Documentation/hwmon/max34440.rst                   |   2 +-
 Documentation/hwmon/max6650.rst                    |   2 +-
 Documentation/hwmon/max8688.rst                    |   2 +-
 Documentation/hwmon/menf21bmc.rst                  |   2 +-
 Documentation/hwmon/pcf8591.rst                    |   2 +-
 Documentation/hwmon/sht3x.rst                      |   2 +-
 Documentation/hwmon/shtc1.rst                      |   2 +-
 Documentation/hwmon/tmp103.rst                     |   2 +-
 Documentation/hwmon/tps40422.rst                   |   2 +-
 Documentation/hwmon/ucd9000.rst                    |   2 +-
 Documentation/hwmon/ucd9200.rst                    |   2 +-
 Documentation/hwmon/via686a.rst                    |   2 +-
 Documentation/hwmon/zl6100.rst                     |   2 +-
 Documentation/i2c/DMA-considerations               |  71 ----
 Documentation/i2c/busses/i2c-ali1535               |  42 --
 Documentation/i2c/busses/i2c-ali1535.rst           |  45 +++
 Documentation/i2c/busses/i2c-ali1563               |  27 --
 Documentation/i2c/busses/i2c-ali1563.rst           |  30 ++
 Documentation/i2c/busses/i2c-ali15x3               | 112 ------
 Documentation/i2c/busses/i2c-ali15x3.rst           | 122 ++++++
 Documentation/i2c/busses/i2c-amd-mp2               |  23 --
 Documentation/i2c/busses/i2c-amd-mp2.rst           |  25 ++
 Documentation/i2c/busses/i2c-amd756                |  25 --
 Documentation/i2c/busses/i2c-amd756.rst            |  29 ++
 Documentation/i2c/busses/i2c-amd8111               |  41 --
 Documentation/i2c/busses/i2c-amd8111.rst           |  43 +++
 Documentation/i2c/busses/i2c-diolan-u2c            |  26 --
 Documentation/i2c/busses/i2c-diolan-u2c.rst        |  29 ++
 Documentation/i2c/busses/i2c-i801                  | 173 ---------
 Documentation/i2c/busses/i2c-i801.rst              | 182 +++++++++
 Documentation/i2c/busses/i2c-ismt                  |  36 --
 Documentation/i2c/busses/i2c-ismt.rst              |  44 +++
 Documentation/i2c/busses/i2c-mlxcpld               |  51 ---
 Documentation/i2c/busses/i2c-mlxcpld.rst           |  57 +++
 Documentation/i2c/busses/i2c-nforce2               |  50 ---
 Documentation/i2c/busses/i2c-nforce2.rst           |  53 +++
 Documentation/i2c/busses/i2c-nvidia-gpu            |  18 -
 Documentation/i2c/busses/i2c-nvidia-gpu.rst        |  20 +
 Documentation/i2c/busses/i2c-ocores                |  68 ----
 Documentation/i2c/busses/i2c-ocores.rst            |  70 ++++
 Documentation/i2c/busses/i2c-parport               | 178 ---------
 Documentation/i2c/busses/i2c-parport-light         |  22 --
 Documentation/i2c/busses/i2c-parport-light.rst     |  24 ++
 Documentation/i2c/busses/i2c-parport.rst           | 190 +++++++++
 Documentation/i2c/busses/i2c-pca-isa               |  23 --
 Documentation/i2c/busses/i2c-pca-isa.rst           |  26 ++
 Documentation/i2c/busses/i2c-piix4                 | 112 ------
 Documentation/i2c/busses/i2c-piix4.rst             | 114 ++++++
 Documentation/i2c/busses/i2c-sis5595               |  59 ---
 Documentation/i2c/busses/i2c-sis5595.rst           |  68 ++++
 Documentation/i2c/busses/i2c-sis630                |  58 ---
 Documentation/i2c/busses/i2c-sis630.rst            |  63 +++
 Documentation/i2c/busses/i2c-sis96x                |  73 ----
 Documentation/i2c/busses/i2c-sis96x.rst            |  82 ++++
 Documentation/i2c/busses/i2c-taos-evm              |  46 ---
 Documentation/i2c/busses/i2c-taos-evm.rst          |  48 +++
 Documentation/i2c/busses/i2c-via                   |  34 --
 Documentation/i2c/busses/i2c-via.rst               |  40 ++
 Documentation/i2c/busses/i2c-viapro                |  73 ----
 Documentation/i2c/busses/i2c-viapro.rst            |  77 ++++
 Documentation/i2c/busses/index.rst                 |  33 ++
 Documentation/i2c/busses/scx200_acb                |  32 --
 Documentation/i2c/busses/scx200_acb.rst            |  37 ++
 Documentation/i2c/dev-interface                    | 213 -----------
 Documentation/i2c/dev-interface.rst                | 219 +++++++++++
 Documentation/i2c/dma-considerations.rst           |  71 ++++
 Documentation/i2c/fault-codes                      | 128 -------
 Documentation/i2c/fault-codes.rst                  | 131 +++++++
 Documentation/i2c/functionality                    | 148 -------
 Documentation/i2c/functionality.rst                | 156 ++++++++
 Documentation/i2c/gpio-fault-injection             | 136 -------
 Documentation/i2c/gpio-fault-injection.rst         | 136 +++++++
 Documentation/i2c/i2c-protocol                     |  88 -----
 Documentation/i2c/i2c-protocol.rst                 |  98 +++++
 Documentation/i2c/i2c-stub                         |  64 ----
 Documentation/i2c/i2c-stub.rst                     |  66 ++++
 Documentation/i2c/i2c-topology                     | 376 ------------------
 Documentation/i2c/i2c-topology.rst                 | 396 +++++++++++++++++++
 Documentation/i2c/index.rst                        |  37 ++
 Documentation/i2c/instantiating-devices            | 248 ------------
 Documentation/i2c/instantiating-devices.rst        | 253 ++++++++++++
 Documentation/i2c/muxes/i2c-mux-gpio               |  83 ----
 Documentation/i2c/muxes/i2c-mux-gpio.rst           |  85 +++++
 Documentation/i2c/old-module-parameters            |  44 ---
 Documentation/i2c/old-module-parameters.rst        |  49 +++
 Documentation/i2c/slave-eeprom-backend             |  14 -
 Documentation/i2c/slave-eeprom-backend.rst         |  14 +
 Documentation/i2c/slave-interface                  | 193 ----------
 Documentation/i2c/slave-interface.rst              | 198 ++++++++++
 Documentation/i2c/smbus-protocol                   | 283 --------------
 Documentation/i2c/smbus-protocol.rst               | 301 +++++++++++++++
 Documentation/i2c/summary                          |  43 ---
 Documentation/i2c/summary.rst                      |  45 +++
 Documentation/i2c/ten-bit-addresses                |  28 --
 Documentation/i2c/ten-bit-addresses.rst            |  33 ++
 Documentation/i2c/upgrading-clients                | 279 --------------
 Documentation/i2c/upgrading-clients.rst            | 285 ++++++++++++++
 Documentation/i2c/writing-clients                  | 403 -------------------
 Documentation/i2c/writing-clients.rst              | 425 +++++++++++++++++++++
 Documentation/index.rst                            |   1 +
 Documentation/spi/spi-sc18is602                    |   2 +-
 MAINTAINERS                                        |  48 +--
 drivers/hwmon/atxp1.c                              |   2 +-
 drivers/hwmon/smm665.c                             |   2 +-
 drivers/i2c/Kconfig                                |   4 +-
 drivers/i2c/busses/Kconfig                         |   2 +-
 drivers/i2c/busses/i2c-i801.c                      |   2 +-
 drivers/i2c/busses/i2c-taos-evm.c                  |   2 +-
 drivers/i2c/i2c-core-base.c                        |   4 +-
 drivers/iio/dummy/iio_simple_dummy.c               |   2 +-
 drivers/rtc/rtc-ds1374.c                           |   2 +-
 include/linux/i2c.h                                |   2 +-
 122 files changed, 4610 insertions(+), 4304 deletions(-)
 delete mode 100644 Documentation/i2c/DMA-considerations
 delete mode 100644 Documentation/i2c/busses/i2c-ali1535
 create mode 100644 Documentation/i2c/busses/i2c-ali1535.rst
 delete mode 100644 Documentation/i2c/busses/i2c-ali1563
 create mode 100644 Documentation/i2c/busses/i2c-ali1563.rst
 delete mode 100644 Documentation/i2c/busses/i2c-ali15x3
 create mode 100644 Documentation/i2c/busses/i2c-ali15x3.rst
 delete mode 100644 Documentation/i2c/busses/i2c-amd-mp2
 create mode 100644 Documentation/i2c/busses/i2c-amd-mp2.rst
 delete mode 100644 Documentation/i2c/busses/i2c-amd756
 create mode 100644 Documentation/i2c/busses/i2c-amd756.rst
 delete mode 100644 Documentation/i2c/busses/i2c-amd8111
 create mode 100644 Documentation/i2c/busses/i2c-amd8111.rst
 delete mode 100644 Documentation/i2c/busses/i2c-diolan-u2c
 create mode 100644 Documentation/i2c/busses/i2c-diolan-u2c.rst
 delete mode 100644 Documentation/i2c/busses/i2c-i801
 create mode 100644 Documentation/i2c/busses/i2c-i801.rst
 delete mode 100644 Documentation/i2c/busses/i2c-ismt
 create mode 100644 Documentation/i2c/busses/i2c-ismt.rst
 delete mode 100644 Documentation/i2c/busses/i2c-mlxcpld
 create mode 100644 Documentation/i2c/busses/i2c-mlxcpld.rst
 delete mode 100644 Documentation/i2c/busses/i2c-nforce2
 create mode 100644 Documentation/i2c/busses/i2c-nforce2.rst
 delete mode 100644 Documentation/i2c/busses/i2c-nvidia-gpu
 create mode 100644 Documentation/i2c/busses/i2c-nvidia-gpu.rst
 delete mode 100644 Documentation/i2c/busses/i2c-ocores
 create mode 100644 Documentation/i2c/busses/i2c-ocores.rst
 delete mode 100644 Documentation/i2c/busses/i2c-parport
 delete mode 100644 Documentation/i2c/busses/i2c-parport-light
 create mode 100644 Documentation/i2c/busses/i2c-parport-light.rst
 create mode 100644 Documentation/i2c/busses/i2c-parport.rst
 delete mode 100644 Documentation/i2c/busses/i2c-pca-isa
 create mode 100644 Documentation/i2c/busses/i2c-pca-isa.rst
 delete mode 100644 Documentation/i2c/busses/i2c-piix4
 create mode 100644 Documentation/i2c/busses/i2c-piix4.rst
 delete mode 100644 Documentation/i2c/busses/i2c-sis5595
 create mode 100644 Documentation/i2c/busses/i2c-sis5595.rst
 delete mode 100644 Documentation/i2c/busses/i2c-sis630
 create mode 100644 Documentation/i2c/busses/i2c-sis630.rst
 delete mode 100644 Documentation/i2c/busses/i2c-sis96x
 create mode 100644 Documentation/i2c/busses/i2c-sis96x.rst
 delete mode 100644 Documentation/i2c/busses/i2c-taos-evm
 create mode 100644 Documentation/i2c/busses/i2c-taos-evm.rst
 delete mode 100644 Documentation/i2c/busses/i2c-via
 create mode 100644 Documentation/i2c/busses/i2c-via.rst
 delete mode 100644 Documentation/i2c/busses/i2c-viapro
 create mode 100644 Documentation/i2c/busses/i2c-viapro.rst
 create mode 100644 Documentation/i2c/busses/index.rst
 delete mode 100644 Documentation/i2c/busses/scx200_acb
 create mode 100644 Documentation/i2c/busses/scx200_acb.rst
 delete mode 100644 Documentation/i2c/dev-interface
 create mode 100644 Documentation/i2c/dev-interface.rst
 create mode 100644 Documentation/i2c/dma-considerations.rst
 delete mode 100644 Documentation/i2c/fault-codes
 create mode 100644 Documentation/i2c/fault-codes.rst
 delete mode 100644 Documentation/i2c/functionality
 create mode 100644 Documentation/i2c/functionality.rst
 delete mode 100644 Documentation/i2c/gpio-fault-injection
 create mode 100644 Documentation/i2c/gpio-fault-injection.rst
 delete mode 100644 Documentation/i2c/i2c-protocol
 create mode 100644 Documentation/i2c/i2c-protocol.rst
 delete mode 100644 Documentation/i2c/i2c-stub
 create mode 100644 Documentation/i2c/i2c-stub.rst
 delete mode 100644 Documentation/i2c/i2c-topology
 create mode 100644 Documentation/i2c/i2c-topology.rst
 create mode 100644 Documentation/i2c/index.rst
 delete mode 100644 Documentation/i2c/instantiating-devices
 create mode 100644 Documentation/i2c/instantiating-devices.rst
 delete mode 100644 Documentation/i2c/muxes/i2c-mux-gpio
 create mode 100644 Documentation/i2c/muxes/i2c-mux-gpio.rst
 delete mode 100644 Documentation/i2c/old-module-parameters
 create mode 100644 Documentation/i2c/old-module-parameters.rst
 delete mode 100644 Documentation/i2c/slave-eeprom-backend
 create mode 100644 Documentation/i2c/slave-eeprom-backend.rst
 delete mode 100644 Documentation/i2c/slave-interface
 create mode 100644 Documentation/i2c/slave-interface.rst
 delete mode 100644 Documentation/i2c/smbus-protocol
 create mode 100644 Documentation/i2c/smbus-protocol.rst
 delete mode 100644 Documentation/i2c/summary
 create mode 100644 Documentation/i2c/summary.rst
 delete mode 100644 Documentation/i2c/ten-bit-addresses
 create mode 100644 Documentation/i2c/ten-bit-addresses.rst
 delete mode 100644 Documentation/i2c/upgrading-clients
 create mode 100644 Documentation/i2c/upgrading-clients.rst
 delete mode 100644 Documentation/i2c/writing-clients
 create mode 100644 Documentation/i2c/writing-clients.rst

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.txt
index 2907dab56298..8b444b94e92f 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-gpmux.txt
@@ -42,7 +42,7 @@ Optional properties:
   This means that no unrelated I2C transactions are allowed on the parent I2C
   adapter for the complete multiplexed I2C transaction.
   The properties of mux-locked and parent-locked multiplexers are discussed
-  in more detail in Documentation/i2c/i2c-topology.
+  in more detail in Documentation/i2c/i2c-topology.rst.
 
 For each i2c child node, an I2C child bus will be created. They will
 be numbered based on their order in the device tree.
diff --git a/Documentation/driver-api/ipmb.rst b/Documentation/driver-api/ipmb.rst
index 7e2265144157..3ec3baed84c4 100644
--- a/Documentation/driver-api/ipmb.rst
+++ b/Documentation/driver-api/ipmb.rst
@@ -83,7 +83,7 @@ Instantiate the device
 ----------------------
 
 After loading the driver, you can instantiate the device as
-described in 'Documentation/i2c/instantiating-devices'.
+described in 'Documentation/i2c/instantiating-devices.rst'.
 If you have multiple BMCs, each connected to your Satellite MC via
 a different I2C bus, you can instantiate a device for each of
 those BMCs.
diff --git a/Documentation/hwmon/adm1021.rst b/Documentation/hwmon/adm1021.rst
index 6cbb0f75fe00..116fb2019956 100644
--- a/Documentation/hwmon/adm1021.rst
+++ b/Documentation/hwmon/adm1021.rst
@@ -142,7 +142,7 @@ loading the adm1021 module, then things are good.
 If nothing happens when loading the adm1021 module, and you are certain
 that your specific Xeon processor model includes compatible sensors, you
 will have to explicitly instantiate the sensor chips from user-space. See
-method 4 in Documentation/i2c/instantiating-devices. Possible slave
+method 4 in Documentation/i2c/instantiating-devices.rst. Possible slave
 addresses are 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e. It is likely that
 only temp2 will be correct and temp1 will have to be ignored.
 
diff --git a/Documentation/hwmon/adm1275.rst b/Documentation/hwmon/adm1275.rst
index 9a1913e5b4d9..49966ed70ec6 100644
--- a/Documentation/hwmon/adm1275.rst
+++ b/Documentation/hwmon/adm1275.rst
@@ -75,7 +75,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 The ADM1075, unlike many other PMBus devices, does not support internal voltage
diff --git a/Documentation/hwmon/hih6130.rst b/Documentation/hwmon/hih6130.rst
index 649bd4be4fc2..e95d373eb693 100644
--- a/Documentation/hwmon/hih6130.rst
+++ b/Documentation/hwmon/hih6130.rst
@@ -27,7 +27,7 @@ The devices communicate with the I2C protocol. All sensors are set to the same
 I2C address 0x27 by default, so an entry with I2C_BOARD_INFO("hih6130", 0x27)
 can be used in the board setup code.
 
-Please see Documentation/i2c/instantiating-devices for details on how to
+Please see Documentation/i2c/instantiating-devices.rst for details on how to
 instantiate I2C devices.
 
 sysfs-Interface
diff --git a/Documentation/hwmon/ibm-cffps.rst b/Documentation/hwmon/ibm-cffps.rst
index 52e74e39463a..ef8f3f806968 100644
--- a/Documentation/hwmon/ibm-cffps.rst
+++ b/Documentation/hwmon/ibm-cffps.rst
@@ -17,7 +17,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 Sysfs entries
diff --git a/Documentation/hwmon/lm25066.rst b/Documentation/hwmon/lm25066.rst
index da15e3094c8c..30e6e77fb3c8 100644
--- a/Documentation/hwmon/lm25066.rst
+++ b/Documentation/hwmon/lm25066.rst
@@ -76,7 +76,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/max16064.rst b/Documentation/hwmon/max16064.rst
index 6d5e9538991f..c06249292557 100644
--- a/Documentation/hwmon/max16064.rst
+++ b/Documentation/hwmon/max16064.rst
@@ -28,7 +28,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/max16065.rst b/Documentation/hwmon/max16065.rst
index fa5c852a178c..45f69f334f25 100644
--- a/Documentation/hwmon/max16065.rst
+++ b/Documentation/hwmon/max16065.rst
@@ -79,7 +79,7 @@ Usage Notes
 
 This driver does not probe for devices, since there is no register which
 can be safely used to identify the chip. You will have to instantiate
-the devices explicitly. Please see Documentation/i2c/instantiating-devices for
+the devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 WARNING: Do not access chip registers using the i2cdump command, and do not use
diff --git a/Documentation/hwmon/max20751.rst b/Documentation/hwmon/max20751.rst
index aa4469be6674..fe701e07eaf5 100644
--- a/Documentation/hwmon/max20751.rst
+++ b/Documentation/hwmon/max20751.rst
@@ -30,7 +30,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/max34440.rst b/Documentation/hwmon/max34440.rst
index 939138e12b02..5744df100a5d 100644
--- a/Documentation/hwmon/max34440.rst
+++ b/Documentation/hwmon/max34440.rst
@@ -83,7 +83,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 For MAX34446, the value of the currX_crit attribute determines if current or
diff --git a/Documentation/hwmon/max6650.rst b/Documentation/hwmon/max6650.rst
index 253482add082..7952b6ecaa2d 100644
--- a/Documentation/hwmon/max6650.rst
+++ b/Documentation/hwmon/max6650.rst
@@ -55,7 +55,7 @@ Usage notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 Module parameters
diff --git a/Documentation/hwmon/max8688.rst b/Documentation/hwmon/max8688.rst
index 009487759c61..71e7f2cbe2e2 100644
--- a/Documentation/hwmon/max8688.rst
+++ b/Documentation/hwmon/max8688.rst
@@ -28,7 +28,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/menf21bmc.rst b/Documentation/hwmon/menf21bmc.rst
index 1f0c6b2235ab..978691d5956d 100644
--- a/Documentation/hwmon/menf21bmc.rst
+++ b/Documentation/hwmon/menf21bmc.rst
@@ -28,7 +28,7 @@ Usage Notes
 This driver is part of the MFD driver named "menf21bmc" and does
 not auto-detect devices.
 You will have to instantiate the MFD driver explicitly.
-Please see Documentation/i2c/instantiating-devices for
+Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 Sysfs entries
diff --git a/Documentation/hwmon/pcf8591.rst b/Documentation/hwmon/pcf8591.rst
index e98bd542a441..5c4e85f53177 100644
--- a/Documentation/hwmon/pcf8591.rst
+++ b/Documentation/hwmon/pcf8591.rst
@@ -68,7 +68,7 @@ Accessing PCF8591 via /sys interface
 The PCF8591 is plainly impossible to detect! Thus the driver won't even
 try. You have to explicitly instantiate the device at the relevant
 address (in the interval [0x48..0x4f]) either through platform data, or
-using the sysfs interface. See Documentation/i2c/instantiating-devices
+using the sysfs interface. See Documentation/i2c/instantiating-devices.rst
 for details.
 
 Directories are being created for each instantiated PCF8591:
diff --git a/Documentation/hwmon/sht3x.rst b/Documentation/hwmon/sht3x.rst
index 978a7117e4b2..95a850d5b2c1 100644
--- a/Documentation/hwmon/sht3x.rst
+++ b/Documentation/hwmon/sht3x.rst
@@ -26,7 +26,7 @@ scaled by 1000, i.e. the value for 31.5 degrees celsius is 31500.
 
 The device communicates with the I2C protocol. Sensors can have the I2C
 addresses 0x44 or 0x45, depending on the wiring. See
-Documentation/i2c/instantiating-devices for methods to instantiate the device.
+Documentation/i2c/instantiating-devices.rst for methods to instantiate the device.
 
 There are two options configurable by means of sht3x_platform_data:
 
diff --git a/Documentation/hwmon/shtc1.rst b/Documentation/hwmon/shtc1.rst
index aa116332ba26..70c1192bbd8c 100644
--- a/Documentation/hwmon/shtc1.rst
+++ b/Documentation/hwmon/shtc1.rst
@@ -36,7 +36,7 @@ humidity is expressed as a percentage. Driver can be used as well for SHTW1
 chip, which has the same electrical interface.
 
 The device communicates with the I2C protocol. All sensors are set to I2C
-address 0x70. See Documentation/i2c/instantiating-devices for methods to
+address 0x70. See Documentation/i2c/instantiating-devices.rst for methods to
 instantiate the device.
 
 There are two options configurable by means of shtc1_platform_data:
diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst
index 15d25806d585..205de6148fcb 100644
--- a/Documentation/hwmon/tmp103.rst
+++ b/Documentation/hwmon/tmp103.rst
@@ -30,4 +30,4 @@ The driver provides the common sysfs-interface for temperatures (see
 Documentation/hwmon/sysfs-interface.rst under Temperatures).
 
 Please refer how to instantiate this driver:
-Documentation/i2c/instantiating-devices
+Documentation/i2c/instantiating-devices.rst
diff --git a/Documentation/hwmon/tps40422.rst b/Documentation/hwmon/tps40422.rst
index b691e30479dd..8fe3e1c3572e 100644
--- a/Documentation/hwmon/tps40422.rst
+++ b/Documentation/hwmon/tps40422.rst
@@ -28,7 +28,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/ucd9000.rst b/Documentation/hwmon/ucd9000.rst
index ebc4f2b3bfea..746f21fcb48c 100644
--- a/Documentation/hwmon/ucd9000.rst
+++ b/Documentation/hwmon/ucd9000.rst
@@ -64,7 +64,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/ucd9200.rst b/Documentation/hwmon/ucd9200.rst
index b819dfd75f71..4f0e7c3ca6f4 100644
--- a/Documentation/hwmon/ucd9200.rst
+++ b/Documentation/hwmon/ucd9200.rst
@@ -40,7 +40,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 
diff --git a/Documentation/hwmon/via686a.rst b/Documentation/hwmon/via686a.rst
index a343c35df740..7ab9ddebcf79 100644
--- a/Documentation/hwmon/via686a.rst
+++ b/Documentation/hwmon/via686a.rst
@@ -40,7 +40,7 @@ all as a 686A.
 
 The Via 686a southbridge has integrated hardware monitor functionality.
 It also has an I2C bus, but this driver only supports the hardware monitor.
-For the I2C bus driver, see <file:Documentation/i2c/busses/i2c-viapro>
+For the I2C bus driver, see <file:Documentation/i2c/busses/i2c-viapro.rst>
 
 The Via 686a implements three temperature sensors, two fan rotation speed
 sensors, five voltage sensors and alarms.
diff --git a/Documentation/hwmon/zl6100.rst b/Documentation/hwmon/zl6100.rst
index 41513bb7fe51..968aff10ce0a 100644
--- a/Documentation/hwmon/zl6100.rst
+++ b/Documentation/hwmon/zl6100.rst
@@ -121,7 +121,7 @@ Usage Notes
 -----------
 
 This driver does not auto-detect devices. You will have to instantiate the
-devices explicitly. Please see Documentation/i2c/instantiating-devices for
+devices explicitly. Please see Documentation/i2c/instantiating-devices.rst for
 details.
 
 .. warning::
diff --git a/Documentation/i2c/DMA-considerations b/Documentation/i2c/DMA-considerations
deleted file mode 100644
index 203002054120..000000000000
--- a/Documentation/i2c/DMA-considerations
+++ /dev/null
@@ -1,71 +0,0 @@
-=================
-Linux I2C and DMA
-=================
-
-Given that i2c is a low-speed bus, over which the majority of messages
-transferred are small, it is not considered a prime user of DMA access. At this
-time of writing, only 10% of I2C bus master drivers have DMA support
-implemented. And the vast majority of transactions are so small that setting up
-DMA for it will likely add more overhead than a plain PIO transfer.
-
-Therefore, it is *not* mandatory that the buffer of an I2C message is DMA safe.
-It does not seem reasonable to apply additional burdens when the feature is so
-rarely used. However, it is recommended to use a DMA-safe buffer if your
-message size is likely applicable for DMA. Most drivers have this threshold
-around 8 bytes (as of today, this is mostly an educated guess, however). For
-any message of 16 byte or larger, it is probably a really good idea. Please
-note that other subsystems you use might add requirements. E.g., if your
-I2C bus master driver is using USB as a bridge, then you need to have DMA
-safe buffers always, because USB requires it.
-
-Clients
--------
-
-For clients, if you use a DMA safe buffer in i2c_msg, set the I2C_M_DMA_SAFE
-flag with it. Then, the I2C core and drivers know they can safely operate DMA
-on it. Note that using this flag is optional. I2C host drivers which are not
-updated to use this flag will work like before. And like before, they risk
-using an unsafe DMA buffer. To improve this situation, using I2C_M_DMA_SAFE in
-more and more clients and host drivers is the planned way forward. Note also
-that setting this flag makes only sense in kernel space. User space data is
-copied into kernel space anyhow. The I2C core makes sure the destination
-buffers in kernel space are always DMA capable. Also, when the core emulates
-SMBus transactions via I2C, the buffers for block transfers are DMA safe. Users
-of i2c_master_send() and i2c_master_recv() functions can now use DMA safe
-variants (i2c_master_send_dmasafe() and i2c_master_recv_dmasafe()) once they
-know their buffers are DMA safe. Users of i2c_transfer() must set the
-I2C_M_DMA_SAFE flag manually.
-
-Masters
--------
-
-Bus master drivers wishing to implement safe DMA can use helper functions from
-the I2C core. One gives you a DMA-safe buffer for a given i2c_msg as long as a
-certain threshold is met::
-
-	dma_buf = i2c_get_dma_safe_msg_buf(msg, threshold_in_byte);
-
-If a buffer is returned, it is either msg->buf for the I2C_M_DMA_SAFE case or a
-bounce buffer. But you don't need to care about that detail, just use the
-returned buffer. If NULL is returned, the threshold was not met or a bounce
-buffer could not be allocated. Fall back to PIO in that case.
-
-In any case, a buffer obtained from above needs to be released. Another helper
-function ensures a potentially used bounce buffer is freed::
-
-	i2c_put_dma_safe_msg_buf(dma_buf, msg, xferred);
-
-The last argument 'xferred' controls if the buffer is synced back to the
-message or not. No syncing is needed in cases setting up DMA had an error and
-there was no data transferred.
-
-The bounce buffer handling from the core is generic and simple. It will always
-allocate a new bounce buffer. If you want a more sophisticated handling (e.g.
-reusing pre-allocated buffers), you are free to implement your own.
-
-Please also check the in-kernel documentation for details. The i2c-sh_mobile
-driver can be used as a reference example how to use the above helpers.
-
-Final note: If you plan to use DMA with I2C (or with anything else, actually)
-make sure you have CONFIG_DMA_API_DEBUG enabled during development. It can help
-you find various issues which can be complex to debug otherwise.
diff --git a/Documentation/i2c/busses/i2c-ali1535 b/Documentation/i2c/busses/i2c-ali1535
deleted file mode 100644
index 5d46342e486a..000000000000
--- a/Documentation/i2c/busses/i2c-ali1535
+++ /dev/null
@@ -1,42 +0,0 @@
-Kernel driver i2c-ali1535
-
-Supported adapters:
-  * Acer Labs, Inc. ALI 1535 (south bridge)
-    Datasheet: Now under NDA
-	http://www.ali.com.tw/
-
-Authors:
-	Frodo Looijaard <frodol@dds.nl>,
-	Philip Edelbrock <phil@netroedge.com>,
-	Mark D. Studebaker <mdsxyz123@yahoo.com>,
-	Dan Eaton <dan.eaton@rocketlogix.com>,
-	Stephen Rousset<stephen.rousset@rocketlogix.com>
-
-Description
------------
-
-This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
-M1535 South Bridge.
-
-The M1535 is a South bridge for portable systems. It is very similar to the
-M15x3 South bridges also produced by Acer Labs Inc.  Some of the registers
-within the part have moved and some have been redefined slightly.
-Additionally, the sequencing of the SMBus transactions has been modified to
-be more consistent with the sequencing recommended by the manufacturer and
-observed through testing.  These changes are reflected in this driver and
-can be identified by comparing this driver to the i2c-ali15x3 driver. For
-an overview of these chips see http://www.acerlabs.com
-
-The SMB controller is part of the M7101 device, which is an ACPI-compliant
-Power Management Unit (PMU).
-
-The whole M7101 device has to be enabled for the SMB to work. You can't
-just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
-We make sure that the SMB is enabled. We leave the ACPI alone.
-
-
-Features
---------
-
-This driver controls the SMB Host only. This driver does not use
-interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali1535.rst b/Documentation/i2c/busses/i2c-ali1535.rst
new file mode 100644
index 000000000000..6941064730dc
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali1535.rst
@@ -0,0 +1,45 @@
+=========================
+Kernel driver i2c-ali1535
+=========================
+
+Supported adapters:
+  * Acer Labs, Inc. ALI 1535 (south bridge)
+
+    Datasheet: Now under NDA
+	http://www.ali.com.tw/
+
+Authors:
+	- Frodo Looijaard <frodol@dds.nl>,
+	- Philip Edelbrock <phil@netroedge.com>,
+	- Mark D. Studebaker <mdsxyz123@yahoo.com>,
+	- Dan Eaton <dan.eaton@rocketlogix.com>,
+	- Stephen Rousset<stephen.rousset@rocketlogix.com>
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1535 South Bridge.
+
+The M1535 is a South bridge for portable systems. It is very similar to the
+M15x3 South bridges also produced by Acer Labs Inc.  Some of the registers
+within the part have moved and some have been redefined slightly.
+Additionally, the sequencing of the SMBus transactions has been modified to
+be more consistent with the sequencing recommended by the manufacturer and
+observed through testing.  These changes are reflected in this driver and
+can be identified by comparing this driver to the i2c-ali15x3 driver. For
+an overview of these chips see http://www.acerlabs.com
+
+The SMB controller is part of the M7101 device, which is an ACPI-compliant
+Power Management Unit (PMU).
+
+The whole M7101 device has to be enabled for the SMB to work. You can't
+just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
+We make sure that the SMB is enabled. We leave the ACPI alone.
+
+
+Features
+--------
+
+This driver controls the SMB Host only. This driver does not use
+interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali1563 b/Documentation/i2c/busses/i2c-ali1563
deleted file mode 100644
index 41b1a077e4c7..000000000000
--- a/Documentation/i2c/busses/i2c-ali1563
+++ /dev/null
@@ -1,27 +0,0 @@
-Kernel driver i2c-ali1563
-
-Supported adapters:
-  * Acer Labs, Inc. ALI 1563 (south bridge)
-    Datasheet: Now under NDA
-	http://www.ali.com.tw/
-
-Author: Patrick Mochel <mochel@digitalimplant.org>
-
-Description
------------
-
-This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
-M1563 South Bridge.
-
-For an overview of these chips see http://www.acerlabs.com
-
-The M1563 southbridge is deceptively similar to the M1533, with a few
-notable exceptions. One of those happens to be the fact they upgraded the
-i2c core to be SMBus 2.0 compliant, and happens to be almost identical to
-the i2c controller found in the Intel 801 south bridges.
-
-Features
---------
-
-This driver controls the SMB Host only. This driver does not use
-interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali1563.rst b/Documentation/i2c/busses/i2c-ali1563.rst
new file mode 100644
index 000000000000..eec32c3ba92a
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali1563.rst
@@ -0,0 +1,30 @@
+=========================
+Kernel driver i2c-ali1563
+=========================
+
+Supported adapters:
+  * Acer Labs, Inc. ALI 1563 (south bridge)
+
+    Datasheet: Now under NDA
+	http://www.ali.com.tw/
+
+Author: Patrick Mochel <mochel@digitalimplant.org>
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1563 South Bridge.
+
+For an overview of these chips see http://www.acerlabs.com
+
+The M1563 southbridge is deceptively similar to the M1533, with a few
+notable exceptions. One of those happens to be the fact they upgraded the
+i2c core to be SMBus 2.0 compliant, and happens to be almost identical to
+the i2c controller found in the Intel 801 south bridges.
+
+Features
+--------
+
+This driver controls the SMB Host only. This driver does not use
+interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali15x3 b/Documentation/i2c/busses/i2c-ali15x3
deleted file mode 100644
index 42888d8ac124..000000000000
--- a/Documentation/i2c/busses/i2c-ali15x3
+++ /dev/null
@@ -1,112 +0,0 @@
-Kernel driver i2c-ali15x3
-
-Supported adapters:
-  * Acer Labs, Inc. ALI 1533 and 1543C (south bridge)
-    Datasheet: Now under NDA
-	http://www.ali.com.tw/
-
-Authors:
-	Frodo Looijaard <frodol@dds.nl>,
-	Philip Edelbrock <phil@netroedge.com>,
-	Mark D. Studebaker <mdsxyz123@yahoo.com>
-
-Module Parameters
------------------
-
-* force_addr: int
-  Initialize the base address of the i2c controller
-
-
-Notes
------
-
-The force_addr parameter is useful for boards that don't set the address in
-the BIOS. Does not do a PCI force; the device must still be present in
-lspci. Don't use this unless the driver complains that the base address is
-not set.
-
-Example: 'modprobe i2c-ali15x3 force_addr=0xe800'
-
-SMBus periodically hangs on ASUS P5A motherboards and can only be cleared
-by a power cycle. Cause unknown (see Issues below).
-
-
-Description
------------
-
-This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
-M1541 and M1543C South Bridges.
-
-The M1543C is a South bridge for desktop systems.
-The M1541 is a South bridge for portable systems.
-They are part of the following ALI chipsets:
-
- * "Aladdin Pro 2" includes the M1621 Slot 1 North bridge with AGP and
- 		100MHz CPU Front Side bus
- * "Aladdin V" includes the M1541 Socket 7 North bridge with AGP and 100MHz
- 		CPU Front Side bus
-   Some Aladdin V motherboards:
-	Asus P5A
-	Atrend ATC-5220
-	BCM/GVC VP1541
-	Biostar M5ALA
-	Gigabyte GA-5AX (** Generally doesn't work because the BIOS doesn't
-                            enable the 7101 device! **)
-	Iwill XA100 Plus
-	Micronics C200
-	Microstar (MSI) MS-5169
-
-  * "Aladdin IV" includes the M1541 Socket 7 North bridge
-   		with host bus up to 83.3 MHz.
-
-For an overview of these chips see http://www.acerlabs.com. At this time the
-full data sheets on the web site are password protected, however if you
-contact the ALI office in San Jose they may give you the password.
-
-The M1533/M1543C devices appear as FOUR separate devices on the PCI bus. An
-output of lspci will show something similar to the following:
-
-  00:02.0 USB Controller: Acer Laboratories Inc. M5237 (rev 03)
-  00:03.0 Bridge: Acer Laboratories Inc. M7101      <= THIS IS THE ONE WE NEED
-  00:07.0 ISA bridge: Acer Laboratories Inc. M1533 (rev c3)
-  00:0f.0 IDE interface: Acer Laboratories Inc. M5229 (rev c1)
-
-** IMPORTANT **
-** If you have a M1533 or M1543C on the board and you get
-** "ali15x3: Error: Can't detect ali15x3!"
-** then run lspci.
-** If you see the 1533 and 5229 devices but NOT the 7101 device,
-** then you must enable ACPI, the PMU, SMB, or something similar
-** in the BIOS.
-** The driver won't work if it can't find the M7101 device.
-
-The SMB controller is part of the M7101 device, which is an ACPI-compliant
-Power Management Unit (PMU).
-
-The whole M7101 device has to be enabled for the SMB to work. You can't
-just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
-We make sure that the SMB is enabled. We leave the ACPI alone.
-
-Features
---------
-
-This driver controls the SMB Host only. The SMB Slave
-controller on the M15X3 is not enabled. This driver does not use
-interrupts.
-
-
-Issues
-------
-
-This driver requests the I/O space for only the SMB
-registers. It doesn't use the ACPI region.
-
-On the ASUS P5A motherboard, there are several reports that
-the SMBus will hang and this can only be resolved by
-powering off the computer. It appears to be worse when the board
-gets hot, for example under heavy CPU load, or in the summer.
-There may be electrical problems on this board.
-On the P5A, the W83781D sensor chip is on both the ISA and
-SMBus. Therefore the SMBus hangs can generally be avoided
-by accessing the W83781D on the ISA bus only.
-
diff --git a/Documentation/i2c/busses/i2c-ali15x3.rst b/Documentation/i2c/busses/i2c-ali15x3.rst
new file mode 100644
index 000000000000..d4c1a2a419cb
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali15x3.rst
@@ -0,0 +1,122 @@
+=========================
+Kernel driver i2c-ali15x3
+=========================
+
+Supported adapters:
+  * Acer Labs, Inc. ALI 1533 and 1543C (south bridge)
+
+    Datasheet: Now under NDA
+	http://www.ali.com.tw/
+
+Authors:
+	- Frodo Looijaard <frodol@dds.nl>,
+	- Philip Edelbrock <phil@netroedge.com>,
+	- Mark D. Studebaker <mdsxyz123@yahoo.com>
+
+Module Parameters
+-----------------
+
+* force_addr: int
+    Initialize the base address of the i2c controller
+
+
+Notes
+-----
+
+The force_addr parameter is useful for boards that don't set the address in
+the BIOS. Does not do a PCI force; the device must still be present in
+lspci. Don't use this unless the driver complains that the base address is
+not set.
+
+Example::
+
+    modprobe i2c-ali15x3 force_addr=0xe800
+
+SMBus periodically hangs on ASUS P5A motherboards and can only be cleared
+by a power cycle. Cause unknown (see Issues below).
+
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1541 and M1543C South Bridges.
+
+The M1543C is a South bridge for desktop systems.
+
+The M1541 is a South bridge for portable systems.
+
+They are part of the following ALI chipsets:
+
+ * "Aladdin Pro 2" includes the M1621 Slot 1 North bridge with AGP and
+   100MHz CPU Front Side bus
+ * "Aladdin V" includes the M1541 Socket 7 North bridge with AGP and 100MHz
+   CPU Front Side bus
+
+   Some Aladdin V motherboards:
+	- Asus P5A
+	- Atrend ATC-5220
+	- BCM/GVC VP1541
+	- Biostar M5ALA
+	- Gigabyte GA-5AX (Generally doesn't work because the BIOS doesn't
+	  enable the 7101 device!)
+	- Iwill XA100 Plus
+	- Micronics C200
+	- Microstar (MSI) MS-5169
+
+  * "Aladdin IV" includes the M1541 Socket 7 North bridge
+    with host bus up to 83.3 MHz.
+
+For an overview of these chips see http://www.acerlabs.com. At this time the
+full data sheets on the web site are password protected, however if you
+contact the ALI office in San Jose they may give you the password.
+
+The M1533/M1543C devices appear as FOUR separate devices on the PCI bus. An
+output of lspci will show something similar to the following::
+
+  00:02.0 USB Controller: Acer Laboratories Inc. M5237 (rev 03)
+  00:03.0 Bridge: Acer Laboratories Inc. M7101      <= THIS IS THE ONE WE NEED
+  00:07.0 ISA bridge: Acer Laboratories Inc. M1533 (rev c3)
+  00:0f.0 IDE interface: Acer Laboratories Inc. M5229 (rev c1)
+
+.. important::
+
+   If you have a M1533 or M1543C on the board and you get
+   "ali15x3: Error: Can't detect ali15x3!"
+   then run lspci.
+
+   If you see the 1533 and 5229 devices but NOT the 7101 device,
+   then you must enable ACPI, the PMU, SMB, or something similar
+   in the BIOS.
+
+   The driver won't work if it can't find the M7101 device.
+
+The SMB controller is part of the M7101 device, which is an ACPI-compliant
+Power Management Unit (PMU).
+
+The whole M7101 device has to be enabled for the SMB to work. You can't
+just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
+We make sure that the SMB is enabled. We leave the ACPI alone.
+
+Features
+--------
+
+This driver controls the SMB Host only. The SMB Slave
+controller on the M15X3 is not enabled. This driver does not use
+interrupts.
+
+
+Issues
+------
+
+This driver requests the I/O space for only the SMB
+registers. It doesn't use the ACPI region.
+
+On the ASUS P5A motherboard, there are several reports that
+the SMBus will hang and this can only be resolved by
+powering off the computer. It appears to be worse when the board
+gets hot, for example under heavy CPU load, or in the summer.
+There may be electrical problems on this board.
+On the P5A, the W83781D sensor chip is on both the ISA and
+SMBus. Therefore the SMBus hangs can generally be avoided
+by accessing the W83781D on the ISA bus only.
diff --git a/Documentation/i2c/busses/i2c-amd-mp2 b/Documentation/i2c/busses/i2c-amd-mp2
deleted file mode 100644
index 6571487171f4..000000000000
--- a/Documentation/i2c/busses/i2c-amd-mp2
+++ /dev/null
@@ -1,23 +0,0 @@
-Kernel driver i2c-amd-mp2
-
-Supported adapters:
-  * AMD MP2 PCIe interface
-
-Datasheet: not publicly available.
-
-Authors:
-	Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
-	Nehal Shah <nehal-bakulchandra.shah@amd.com>
-	Elie Morisse <syniurge@gmail.com>
-
-Description
------------
-
-The MP2 is an ARM processor programmed as an I2C controller and communicating
-with the x86 host through PCI.
-
-If you see something like this:
-
-03:00.7 MP2 I2C controller: Advanced Micro Devices, Inc. [AMD] Device 15e6
-
-in your 'lspci -v', then this driver is for your device.
diff --git a/Documentation/i2c/busses/i2c-amd-mp2.rst b/Documentation/i2c/busses/i2c-amd-mp2.rst
new file mode 100644
index 000000000000..ebc2fa899325
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-amd-mp2.rst
@@ -0,0 +1,25 @@
+=========================
+Kernel driver i2c-amd-mp2
+=========================
+
+Supported adapters:
+  * AMD MP2 PCIe interface
+
+Datasheet: not publicly available.
+
+Authors:
+	- Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
+	- Nehal Shah <nehal-bakulchandra.shah@amd.com>
+	- Elie Morisse <syniurge@gmail.com>
+
+Description
+-----------
+
+The MP2 is an ARM processor programmed as an I2C controller and communicating
+with the x86 host through PCI.
+
+If you see something like this::
+
+  03:00.7 MP2 I2C controller: Advanced Micro Devices, Inc. [AMD] Device 15e6
+
+in your ``lspci -v``, then this driver is for your device.
diff --git a/Documentation/i2c/busses/i2c-amd756 b/Documentation/i2c/busses/i2c-amd756
deleted file mode 100644
index 67f30874d0bf..000000000000
--- a/Documentation/i2c/busses/i2c-amd756
+++ /dev/null
@@ -1,25 +0,0 @@
-Kernel driver i2c-amd756
-
-Supported adapters:
-  * AMD 756
-  * AMD 766
-  * AMD 768
-  * AMD 8111
-    Datasheets: Publicly available on AMD website
-
-  * nVidia nForce
-    Datasheet: Unavailable
-
-Authors:
-	Frodo Looijaard <frodol@dds.nl>,
-	Philip Edelbrock <phil@netroedge.com> 
-
-Description
------------
-
-This driver supports the AMD 756, 766, 768 and 8111 Peripheral Bus
-Controllers, and the nVidia nForce.
-
-Note that for the 8111, there are two SMBus adapters. The SMBus 1.0 adapter
-is supported by this driver, and the SMBus 2.0 adapter is supported by the
-i2c-amd8111 driver.
diff --git a/Documentation/i2c/busses/i2c-amd756.rst b/Documentation/i2c/busses/i2c-amd756.rst
new file mode 100644
index 000000000000..bc93f392a4fc
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-amd756.rst
@@ -0,0 +1,29 @@
+========================
+Kernel driver i2c-amd756
+========================
+
+Supported adapters:
+  * AMD 756
+  * AMD 766
+  * AMD 768
+  * AMD 8111
+
+    Datasheets: Publicly available on AMD website
+
+  * nVidia nForce
+
+    Datasheet: Unavailable
+
+Authors:
+	- Frodo Looijaard <frodol@dds.nl>,
+	- Philip Edelbrock <phil@netroedge.com>
+
+Description
+-----------
+
+This driver supports the AMD 756, 766, 768 and 8111 Peripheral Bus
+Controllers, and the nVidia nForce.
+
+Note that for the 8111, there are two SMBus adapters. The SMBus 1.0 adapter
+is supported by this driver, and the SMBus 2.0 adapter is supported by the
+i2c-amd8111 driver.
diff --git a/Documentation/i2c/busses/i2c-amd8111 b/Documentation/i2c/busses/i2c-amd8111
deleted file mode 100644
index 460dd6635fd2..000000000000
--- a/Documentation/i2c/busses/i2c-amd8111
+++ /dev/null
@@ -1,41 +0,0 @@
-Kernel driver i2c-adm8111
-
-Supported adapters:
-    * AMD-8111 SMBus 2.0 PCI interface
-
-Datasheets:
-	AMD datasheet not yet available, but almost everything can be found
-	in the publicly available ACPI 2.0 specification, which the adapter
-	follows.
-
-Author: Vojtech Pavlik <vojtech@suse.cz>
-
-Description
------------
-
-If you see something like this:
-
-00:07.2 SMBus: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0 (rev 02)
-        Subsystem: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0
-        Flags: medium devsel, IRQ 19
-        I/O ports at d400 [size=32]
-
-in your 'lspci -v', then this driver is for your chipset.
-
-Process Call Support
---------------------
-
-Supported.
-
-SMBus 2.0 Support
------------------
-
-Supported. Both PEC and block process call support is implemented. Slave
-mode or host notification are not yet implemented.
-
-Notes
------
-
-Note that for the 8111, there are two SMBus adapters. The SMBus 2.0 adapter
-is supported by this driver, and the SMBus 1.0 adapter is supported by the
-i2c-amd756 driver.
diff --git a/Documentation/i2c/busses/i2c-amd8111.rst b/Documentation/i2c/busses/i2c-amd8111.rst
new file mode 100644
index 000000000000..d08bf0a7f0ac
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-amd8111.rst
@@ -0,0 +1,43 @@
+=========================
+Kernel driver i2c-adm8111
+=========================
+
+Supported adapters:
+    * AMD-8111 SMBus 2.0 PCI interface
+
+Datasheets:
+	AMD datasheet not yet available, but almost everything can be found
+	in the publicly available ACPI 2.0 specification, which the adapter
+	follows.
+
+Author: Vojtech Pavlik <vojtech@suse.cz>
+
+Description
+-----------
+
+If you see something like this::
+
+  00:07.2 SMBus: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0 (rev 02)
+          Subsystem: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0
+          Flags: medium devsel, IRQ 19
+          I/O ports at d400 [size=32]
+
+in your ``lspci -v``, then this driver is for your chipset.
+
+Process Call Support
+--------------------
+
+Supported.
+
+SMBus 2.0 Support
+-----------------
+
+Supported. Both PEC and block process call support is implemented. Slave
+mode or host notification are not yet implemented.
+
+Notes
+-----
+
+Note that for the 8111, there are two SMBus adapters. The SMBus 2.0 adapter
+is supported by this driver, and the SMBus 1.0 adapter is supported by the
+i2c-amd756 driver.
diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c
deleted file mode 100644
index 0d6018c316c7..000000000000
--- a/Documentation/i2c/busses/i2c-diolan-u2c
+++ /dev/null
@@ -1,26 +0,0 @@
-Kernel driver i2c-diolan-u2c
-
-Supported adapters:
-  * Diolan U2C-12 I2C-USB adapter
-    Documentation:
-	http://www.diolan.com/i2c/u2c12.html
-
-Author: Guenter Roeck <linux@roeck-us.net>
-
-Description
------------
-
-This is the driver for the Diolan U2C-12 USB-I2C adapter.
-
-The Diolan U2C-12 I2C-USB Adapter provides a low cost solution to connect
-a computer to I2C slave devices using a USB interface. It also supports
-connectivity to SPI devices.
-
-This driver only supports the I2C interface of U2C-12. The driver does not use
-interrupts.
-
-
-Module parameters
------------------
-
-* frequency: I2C bus frequency
diff --git a/Documentation/i2c/busses/i2c-diolan-u2c.rst b/Documentation/i2c/busses/i2c-diolan-u2c.rst
new file mode 100644
index 000000000000..c18cbdcdf73c
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-diolan-u2c.rst
@@ -0,0 +1,29 @@
+============================
+Kernel driver i2c-diolan-u2c
+============================
+
+Supported adapters:
+  * Diolan U2C-12 I2C-USB adapter
+
+    Documentation:
+	http://www.diolan.com/i2c/u2c12.html
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This is the driver for the Diolan U2C-12 USB-I2C adapter.
+
+The Diolan U2C-12 I2C-USB Adapter provides a low cost solution to connect
+a computer to I2C slave devices using a USB interface. It also supports
+connectivity to SPI devices.
+
+This driver only supports the I2C interface of U2C-12. The driver does not use
+interrupts.
+
+
+Module parameters
+-----------------
+
+* frequency: I2C bus frequency
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
deleted file mode 100644
index f426c13c63a9..000000000000
--- a/Documentation/i2c/busses/i2c-i801
+++ /dev/null
@@ -1,173 +0,0 @@
-Kernel driver i2c-i801
-
-Supported adapters:
-  * Intel 82801AA and 82801AB (ICH and ICH0 - part of the
-    '810' and '810E' chipsets)
-  * Intel 82801BA (ICH2 - part of the '815E' chipset)
-  * Intel 82801CA/CAM (ICH3)
-  * Intel 82801DB (ICH4) (HW PEC supported)
-  * Intel 82801EB/ER (ICH5) (HW PEC supported)
-  * Intel 6300ESB
-  * Intel 82801FB/FR/FW/FRW (ICH6)
-  * Intel 82801G (ICH7)
-  * Intel 631xESB/632xESB (ESB2)
-  * Intel 82801H (ICH8)
-  * Intel 82801I (ICH9)
-  * Intel EP80579 (Tolapai)
-  * Intel 82801JI (ICH10)
-  * Intel 5/3400 Series (PCH)
-  * Intel 6 Series (PCH)
-  * Intel Patsburg (PCH)
-  * Intel DH89xxCC (PCH)
-  * Intel Panther Point (PCH)
-  * Intel Lynx Point (PCH)
-  * Intel Avoton (SOC)
-  * Intel Wellsburg (PCH)
-  * Intel Coleto Creek (PCH)
-  * Intel Wildcat Point (PCH)
-  * Intel BayTrail (SOC)
-  * Intel Braswell (SOC)
-  * Intel Sunrise Point (PCH)
-  * Intel Kaby Lake (PCH)
-  * Intel DNV (SOC)
-  * Intel Broxton (SOC)
-  * Intel Lewisburg (PCH)
-  * Intel Gemini Lake (SOC)
-  * Intel Cannon Lake (PCH)
-  * Intel Cedar Fork (PCH)
-  * Intel Ice Lake (PCH)
-  * Intel Comet Lake (PCH)
-  * Intel Elkhart Lake (PCH)
-  * Intel Tiger Lake (PCH)
-   Datasheets: Publicly available at the Intel website
-
-On Intel Patsburg and later chipsets, both the normal host SMBus controller
-and the additional 'Integrated Device Function' controllers are supported.
-
-Authors: 
-	Mark Studebaker <mdsxyz123@yahoo.com>
-	Jean Delvare <jdelvare@suse.de>
-
-
-Module Parameters
------------------
-
-* disable_features (bit vector)
-Disable selected features normally supported by the device. This makes it
-possible to work around possible driver or hardware bugs if the feature in
-question doesn't work as intended for whatever reason. Bit values:
- 0x01  disable SMBus PEC
- 0x02  disable the block buffer
- 0x08  disable the I2C block read functionality
- 0x10  don't use interrupts
- 0x20  disable SMBus Host Notify
-
-
-Description
------------
-
-The ICH (properly known as the 82801AA), ICH0 (82801AB), ICH2 (82801BA),
-ICH3 (82801CA/CAM) and later devices (PCH) are Intel chips that are a part of
-Intel's '810' chipset for Celeron-based PCs, '810E' chipset for
-Pentium-based PCs, '815E' chipset, and others.
-
-The ICH chips contain at least SEVEN separate PCI functions in TWO logical
-PCI devices. An output of lspci will show something similar to the
-following:
-
-  00:1e.0 PCI bridge: Intel Corporation: Unknown device 2418 (rev 01)
-  00:1f.0 ISA bridge: Intel Corporation: Unknown device 2410 (rev 01)
-  00:1f.1 IDE interface: Intel Corporation: Unknown device 2411 (rev 01)
-  00:1f.2 USB Controller: Intel Corporation: Unknown device 2412 (rev 01)
-  00:1f.3 Unknown class [0c05]: Intel Corporation: Unknown device 2413 (rev 01)
-
-The SMBus controller is function 3 in device 1f. Class 0c05 is SMBus Serial
-Controller.
-
-The ICH chips are quite similar to Intel's PIIX4 chip, at least in the
-SMBus controller.
-
-
-Process Call Support
---------------------
-
-Block process call is supported on the 82801EB (ICH5) and later chips.
-
-
-I2C Block Read Support
-----------------------
-
-I2C block read is supported on the 82801EB (ICH5) and later chips.
-
-
-SMBus 2.0 Support
------------------
-
-The 82801DB (ICH4) and later chips support several SMBus 2.0 features.
-
-
-Interrupt Support
------------------
-
-PCI interrupt support is supported on the 82801EB (ICH5) and later chips.
-
-
-Hidden ICH SMBus
-----------------
-
-If your system has an Intel ICH south bridge, but you do NOT see the
-SMBus device at 00:1f.3 in lspci, and you can't figure out any way in the
-BIOS to enable it, it means it has been hidden by the BIOS code. Asus is
-well known for first doing this on their P4B motherboard, and many other
-boards after that. Some vendor machines are affected as well.
-
-The first thing to try is the "i2c-scmi" ACPI driver. It could be that the
-SMBus was hidden on purpose because it'll be driven by ACPI. If the
-i2c-scmi driver works for you, just forget about the i2c-i801 driver and
-don't try to unhide the ICH SMBus. Even if i2c-scmi doesn't work, you
-better make sure that the SMBus isn't used by the ACPI code. Try loading
-the "fan" and "thermal" drivers, and check in /sys/class/thermal. If you
-find a thermal zone with type "acpitz", it's likely that the ACPI is
-accessing the SMBus and it's safer not to unhide it. Only once you are
-certain that ACPI isn't using the SMBus, you can attempt to unhide it.
-
-In order to unhide the SMBus, we need to change the value of a PCI
-register before the kernel enumerates the PCI devices. This is done in
-drivers/pci/quirks.c, where all affected boards must be listed (see
-function asus_hides_smbus_hostbridge.) If the SMBus device is missing,
-and you think there's something interesting on the SMBus (e.g. a
-hardware monitoring chip), you need to add your board to the list.
-
-The motherboard is identified using the subvendor and subdevice IDs of the
-host bridge PCI device. Get yours with "lspci -n -v -s 00:00.0":
-
-00:00.0 Class 0600: 8086:2570 (rev 02)
-        Subsystem: 1043:80f2
-        Flags: bus master, fast devsel, latency 0
-        Memory at fc000000 (32-bit, prefetchable) [size=32M]
-        Capabilities: [e4] #09 [2106]
-        Capabilities: [a0] AGP version 3.0
-
-Here the host bridge ID is 2570 (82865G/PE/P), the subvendor ID is 1043
-(Asus) and the subdevice ID is 80f2 (P4P800-X). You can find the symbolic
-names for the bridge ID and the subvendor ID in include/linux/pci_ids.h,
-and then add a case for your subdevice ID at the right place in
-drivers/pci/quirks.c. Then please give it very good testing, to make sure
-that the unhidden SMBus doesn't conflict with e.g. ACPI.
-
-If it works, proves useful (i.e. there are usable chips on the SMBus)
-and seems safe, please submit a patch for inclusion into the kernel.
-
-Note: There's a useful script in lm_sensors 2.10.2 and later, named
-unhide_ICH_SMBus (in prog/hotplug), which uses the fakephp driver to
-temporarily unhide the SMBus without having to patch and recompile your
-kernel. It's very convenient if you just want to check if there's
-anything interesting on your hidden ICH SMBus.
-
-
-**********************
-The lm_sensors project gratefully acknowledges the support of Texas
-Instruments in the initial development of this driver.
-
-The lm_sensors project gratefully acknowledges the support of Intel in the
-development of SMBus 2.0 / ICH4 features of this driver.
diff --git a/Documentation/i2c/busses/i2c-i801.rst b/Documentation/i2c/busses/i2c-i801.rst
new file mode 100644
index 000000000000..2a570c214880
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-i801.rst
@@ -0,0 +1,182 @@
+======================
+Kernel driver i2c-i801
+======================
+
+
+Supported adapters:
+  * Intel 82801AA and 82801AB (ICH and ICH0 - part of the
+    '810' and '810E' chipsets)
+  * Intel 82801BA (ICH2 - part of the '815E' chipset)
+  * Intel 82801CA/CAM (ICH3)
+  * Intel 82801DB (ICH4) (HW PEC supported)
+  * Intel 82801EB/ER (ICH5) (HW PEC supported)
+  * Intel 6300ESB
+  * Intel 82801FB/FR/FW/FRW (ICH6)
+  * Intel 82801G (ICH7)
+  * Intel 631xESB/632xESB (ESB2)
+  * Intel 82801H (ICH8)
+  * Intel 82801I (ICH9)
+  * Intel EP80579 (Tolapai)
+  * Intel 82801JI (ICH10)
+  * Intel 5/3400 Series (PCH)
+  * Intel 6 Series (PCH)
+  * Intel Patsburg (PCH)
+  * Intel DH89xxCC (PCH)
+  * Intel Panther Point (PCH)
+  * Intel Lynx Point (PCH)
+  * Intel Avoton (SOC)
+  * Intel Wellsburg (PCH)
+  * Intel Coleto Creek (PCH)
+  * Intel Wildcat Point (PCH)
+  * Intel BayTrail (SOC)
+  * Intel Braswell (SOC)
+  * Intel Sunrise Point (PCH)
+  * Intel Kaby Lake (PCH)
+  * Intel DNV (SOC)
+  * Intel Broxton (SOC)
+  * Intel Lewisburg (PCH)
+  * Intel Gemini Lake (SOC)
+  * Intel Cannon Lake (PCH)
+  * Intel Cedar Fork (PCH)
+  * Intel Ice Lake (PCH)
+  * Intel Comet Lake (PCH)
+  * Intel Elkhart Lake (PCH)
+  * Intel Tiger Lake (PCH)
+
+   Datasheets: Publicly available at the Intel website
+
+On Intel Patsburg and later chipsets, both the normal host SMBus controller
+and the additional 'Integrated Device Function' controllers are supported.
+
+Authors:
+	- Mark Studebaker <mdsxyz123@yahoo.com>
+	- Jean Delvare <jdelvare@suse.de>
+
+
+Module Parameters
+-----------------
+
+* disable_features (bit vector)
+
+Disable selected features normally supported by the device. This makes it
+possible to work around possible driver or hardware bugs if the feature in
+question doesn't work as intended for whatever reason. Bit values:
+
+ ====  =========================================
+ 0x01  disable SMBus PEC
+ 0x02  disable the block buffer
+ 0x08  disable the I2C block read functionality
+ 0x10  don't use interrupts
+ 0x20  disable SMBus Host Notify
+ ====  =========================================
+
+
+Description
+-----------
+
+The ICH (properly known as the 82801AA), ICH0 (82801AB), ICH2 (82801BA),
+ICH3 (82801CA/CAM) and later devices (PCH) are Intel chips that are a part of
+Intel's '810' chipset for Celeron-based PCs, '810E' chipset for
+Pentium-based PCs, '815E' chipset, and others.
+
+The ICH chips contain at least SEVEN separate PCI functions in TWO logical
+PCI devices. An output of lspci will show something similar to the
+following::
+
+  00:1e.0 PCI bridge: Intel Corporation: Unknown device 2418 (rev 01)
+  00:1f.0 ISA bridge: Intel Corporation: Unknown device 2410 (rev 01)
+  00:1f.1 IDE interface: Intel Corporation: Unknown device 2411 (rev 01)
+  00:1f.2 USB Controller: Intel Corporation: Unknown device 2412 (rev 01)
+  00:1f.3 Unknown class [0c05]: Intel Corporation: Unknown device 2413 (rev 01)
+
+The SMBus controller is function 3 in device 1f. Class 0c05 is SMBus Serial
+Controller.
+
+The ICH chips are quite similar to Intel's PIIX4 chip, at least in the
+SMBus controller.
+
+
+Process Call Support
+--------------------
+
+Block process call is supported on the 82801EB (ICH5) and later chips.
+
+
+I2C Block Read Support
+----------------------
+
+I2C block read is supported on the 82801EB (ICH5) and later chips.
+
+
+SMBus 2.0 Support
+-----------------
+
+The 82801DB (ICH4) and later chips support several SMBus 2.0 features.
+
+
+Interrupt Support
+-----------------
+
+PCI interrupt support is supported on the 82801EB (ICH5) and later chips.
+
+
+Hidden ICH SMBus
+----------------
+
+If your system has an Intel ICH south bridge, but you do NOT see the
+SMBus device at 00:1f.3 in lspci, and you can't figure out any way in the
+BIOS to enable it, it means it has been hidden by the BIOS code. Asus is
+well known for first doing this on their P4B motherboard, and many other
+boards after that. Some vendor machines are affected as well.
+
+The first thing to try is the "i2c-scmi" ACPI driver. It could be that the
+SMBus was hidden on purpose because it'll be driven by ACPI. If the
+i2c-scmi driver works for you, just forget about the i2c-i801 driver and
+don't try to unhide the ICH SMBus. Even if i2c-scmi doesn't work, you
+better make sure that the SMBus isn't used by the ACPI code. Try loading
+the "fan" and "thermal" drivers, and check in /sys/class/thermal. If you
+find a thermal zone with type "acpitz", it's likely that the ACPI is
+accessing the SMBus and it's safer not to unhide it. Only once you are
+certain that ACPI isn't using the SMBus, you can attempt to unhide it.
+
+In order to unhide the SMBus, we need to change the value of a PCI
+register before the kernel enumerates the PCI devices. This is done in
+drivers/pci/quirks.c, where all affected boards must be listed (see
+function asus_hides_smbus_hostbridge.) If the SMBus device is missing,
+and you think there's something interesting on the SMBus (e.g. a
+hardware monitoring chip), you need to add your board to the list.
+
+The motherboard is identified using the subvendor and subdevice IDs of the
+host bridge PCI device. Get yours with ``lspci -n -v -s 00:00.0``::
+
+  00:00.0 Class 0600: 8086:2570 (rev 02)
+          Subsystem: 1043:80f2
+          Flags: bus master, fast devsel, latency 0
+          Memory at fc000000 (32-bit, prefetchable) [size=32M]
+          Capabilities: [e4] #09 [2106]
+          Capabilities: [a0] AGP version 3.0
+
+Here the host bridge ID is 2570 (82865G/PE/P), the subvendor ID is 1043
+(Asus) and the subdevice ID is 80f2 (P4P800-X). You can find the symbolic
+names for the bridge ID and the subvendor ID in include/linux/pci_ids.h,
+and then add a case for your subdevice ID at the right place in
+drivers/pci/quirks.c. Then please give it very good testing, to make sure
+that the unhidden SMBus doesn't conflict with e.g. ACPI.
+
+If it works, proves useful (i.e. there are usable chips on the SMBus)
+and seems safe, please submit a patch for inclusion into the kernel.
+
+Note: There's a useful script in lm_sensors 2.10.2 and later, named
+unhide_ICH_SMBus (in prog/hotplug), which uses the fakephp driver to
+temporarily unhide the SMBus without having to patch and recompile your
+kernel. It's very convenient if you just want to check if there's
+anything interesting on your hidden ICH SMBus.
+
+
+----------------------------------------------------------------------------
+
+The lm_sensors project gratefully acknowledges the support of Texas
+Instruments in the initial development of this driver.
+
+The lm_sensors project gratefully acknowledges the support of Intel in the
+development of SMBus 2.0 / ICH4 features of this driver.
diff --git a/Documentation/i2c/busses/i2c-ismt b/Documentation/i2c/busses/i2c-ismt
deleted file mode 100644
index 737355822c0b..000000000000
--- a/Documentation/i2c/busses/i2c-ismt
+++ /dev/null
@@ -1,36 +0,0 @@
-Kernel driver i2c-ismt
-
-Supported adapters:
-  * Intel S12xx series SOCs
-
-Authors:
-	Bill Brown <bill.e.brown@intel.com>
-
-
-Module Parameters
------------------
-
-* bus_speed (unsigned int)
-Allows changing of the bus speed.  Normally, the bus speed is set by the BIOS
-and never needs to be changed.  However, some SMBus analyzers are too slow for
-monitoring the bus during debug, thus the need for this module parameter.
-Specify the bus speed in kHz.
-Available bus frequency settings:
-  0  no change
-  80 kHz
-  100 kHz
-  400 kHz
-  1000 kHz
-
-
-Description
------------
-
-The S12xx series of SOCs have a pair of integrated SMBus 2.0 controllers
-targeted primarily at the microserver and storage markets.
-
-The S12xx series contain a pair of PCI functions.  An output of lspci will show
-something similar to the following:
-
-  00:13.0 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 0
-  00:13.1 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 1
diff --git a/Documentation/i2c/busses/i2c-ismt.rst b/Documentation/i2c/busses/i2c-ismt.rst
new file mode 100644
index 000000000000..8e74919a3fdf
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ismt.rst
@@ -0,0 +1,44 @@
+======================
+Kernel driver i2c-ismt
+======================
+
+
+Supported adapters:
+  * Intel S12xx series SOCs
+
+Authors:
+	Bill Brown <bill.e.brown@intel.com>
+
+
+Module Parameters
+-----------------
+
+* bus_speed (unsigned int)
+
+Allows changing of the bus speed.  Normally, the bus speed is set by the BIOS
+and never needs to be changed.  However, some SMBus analyzers are too slow for
+monitoring the bus during debug, thus the need for this module parameter.
+Specify the bus speed in kHz.
+
+Available bus frequency settings:
+
+  ====   =========
+  0      no change
+  80     kHz
+  100    kHz
+  400    kHz
+  1000   kHz
+  ====   =========
+
+
+Description
+-----------
+
+The S12xx series of SOCs have a pair of integrated SMBus 2.0 controllers
+targeted primarily at the microserver and storage markets.
+
+The S12xx series contain a pair of PCI functions.  An output of lspci will show
+something similar to the following::
+
+  00:13.0 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 0
+  00:13.1 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 1
diff --git a/Documentation/i2c/busses/i2c-mlxcpld b/Documentation/i2c/busses/i2c-mlxcpld
deleted file mode 100644
index 925904aa9b57..000000000000
--- a/Documentation/i2c/busses/i2c-mlxcpld
+++ /dev/null
@@ -1,51 +0,0 @@
-Driver i2c-mlxcpld
-
-Author: Michael Shych <michaelsh@mellanox.com>
-
-This is the Mellanox I2C controller logic, implemented in Lattice CPLD
-device.
-Device supports:
- - Master mode.
- - One physical bus.
- - Polling mode.
-
-This controller is equipped within the next Mellanox systems:
-"msx6710", "msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800",
-"msn2740", "msn2100".
-
-The next transaction types are supported:
- - Receive Byte/Block.
- - Send Byte/Block.
- - Read Byte/Block.
- - Write Byte/Block.
-
-Registers:
-CPBLTY		0x0 - capability reg.
-			Bits [6:5] - transaction length. b01 - 72B is supported,
-			36B in other case.
-			Bit 7 - SMBus block read support.
-CTRL		0x1 - control reg.
-			Resets all the registers.
-HALF_CYC	0x4 - cycle reg.
-			Configure the width of I2C SCL half clock cycle (in 4 LPC_CLK
-			units).
-I2C_HOLD	0x5 - hold reg.
-			OE (output enable) is delayed by value set to this register
-			(in LPC_CLK units)
-CMD			0x6 - command reg.
-			Bit 0, 0 = write, 1 = read.
-			Bits [7:1] - the 7bit Address of the I2C device.
-			It should be written last as it triggers an I2C transaction.
-NUM_DATA	0x7 - data size reg.
-			Number of data bytes to write in read transaction
-NUM_ADDR	0x8 - address reg.
-			Number of address bytes to write in read transaction.
-STATUS		0x9 - status reg.
-			Bit 0 - transaction is completed.
-			Bit 4 - ACK/NACK.
-DATAx		0xa - 0x54  - 68 bytes data buffer regs.
-			For write transaction address is specified in four first bytes
-			(DATA1 - DATA4), data starting from DATA4.
-			For read transactions address is sent in a separate transaction and
-			specified in the four first bytes (DATA0 - DATA3). Data is read
-			starting from DATA0.
diff --git a/Documentation/i2c/busses/i2c-mlxcpld.rst b/Documentation/i2c/busses/i2c-mlxcpld.rst
new file mode 100644
index 000000000000..9a0b2916aa71
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-mlxcpld.rst
@@ -0,0 +1,57 @@
+==================
+Driver i2c-mlxcpld
+==================
+
+Author: Michael Shych <michaelsh@mellanox.com>
+
+This is the Mellanox I2C controller logic, implemented in Lattice CPLD
+device.
+
+Device supports:
+ - Master mode.
+ - One physical bus.
+ - Polling mode.
+
+This controller is equipped within the next Mellanox systems:
+"msx6710", "msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800",
+"msn2740", "msn2100".
+
+The next transaction types are supported:
+ - Receive Byte/Block.
+ - Send Byte/Block.
+ - Read Byte/Block.
+ - Write Byte/Block.
+
+Registers:
+
+=============== === =======================================================================
+CPBLTY		0x0 - capability reg.
+			Bits [6:5] - transaction length. b01 - 72B is supported,
+			36B in other case.
+			Bit 7 - SMBus block read support.
+CTRL		0x1 - control reg.
+			Resets all the registers.
+HALF_CYC	0x4 - cycle reg.
+			Configure the width of I2C SCL half clock cycle (in 4 LPC_CLK
+			units).
+I2C_HOLD	0x5 - hold reg.
+			OE (output enable) is delayed by value set to this register
+			(in LPC_CLK units)
+CMD			0x6 - command reg.
+			Bit 0, 0 = write, 1 = read.
+			Bits [7:1] - the 7bit Address of the I2C device.
+			It should be written last as it triggers an I2C transaction.
+NUM_DATA	0x7 - data size reg.
+			Number of data bytes to write in read transaction
+NUM_ADDR	0x8 - address reg.
+			Number of address bytes to write in read transaction.
+STATUS		0x9 - status reg.
+			Bit 0 - transaction is completed.
+			Bit 4 - ACK/NACK.
+DATAx		0xa - 0x54  - 68 bytes data buffer regs.
+			For write transaction address is specified in four first bytes
+			(DATA1 - DATA4), data starting from DATA4.
+			For read transactions address is sent in a separate transaction and
+			specified in the four first bytes (DATA0 - DATA3). Data is read
+			starting from DATA0.
+=============== === =======================================================================
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
deleted file mode 100644
index 9698c396b830..000000000000
--- a/Documentation/i2c/busses/i2c-nforce2
+++ /dev/null
@@ -1,50 +0,0 @@
-Kernel driver i2c-nforce2
-
-Supported adapters:
-  * nForce2 MCP                10de:0064 
-  * nForce2 Ultra 400 MCP      10de:0084 
-  * nForce3 Pro150 MCP         10de:00D4 
-  * nForce3 250Gb MCP          10de:00E4 
-  * nForce4 MCP                10de:0052
-  * nForce4 MCP-04             10de:0034
-  * nForce MCP51               10de:0264
-  * nForce MCP55               10de:0368
-  * nForce MCP61               10de:03EB
-  * nForce MCP65               10de:0446
-  * nForce MCP67               10de:0542
-  * nForce MCP73               10de:07D8
-  * nForce MCP78S              10de:0752
-  * nForce MCP79               10de:0AA2
-
-Datasheet: not publicly available, but seems to be similar to the
-           AMD-8111 SMBus 2.0 adapter.
-
-Authors:
-	Hans-Frieder Vogt <hfvogt@gmx.net>,
-	Thomas Leibold <thomas@plx.com>, 
-        Patrick Dreker <patrick@dreker.de>
-	
-Description
------------
-
-i2c-nforce2 is a driver for the SMBuses included in the nVidia nForce2 MCP.
-
-If your 'lspci -v' listing shows something like the following,
-
-00:01.1 SMBus: nVidia Corporation: Unknown device 0064 (rev a2)
-        Subsystem: Asustek Computer, Inc.: Unknown device 0c11
-        Flags: 66Mhz, fast devsel, IRQ 5
-        I/O ports at c000 [size=32]
-        Capabilities: <available only to root>
-
-then this driver should support the SMBuses of your motherboard.
-
-
-Notes
------
-
-The SMBus adapter in the nForce2 chipset seems to be very similar to the
-SMBus 2.0 adapter in the AMD-8111 south bridge. However, I could only get
-the driver to work with direct I/O access, which is different to the EC
-interface of the AMD-8111. Tested on Asus A7N8X. The ACPI DSDT table of the
-Asus A7N8X lists two SMBuses, both of which are supported by this driver.
diff --git a/Documentation/i2c/busses/i2c-nforce2.rst b/Documentation/i2c/busses/i2c-nforce2.rst
new file mode 100644
index 000000000000..83181445268f
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-nforce2.rst
@@ -0,0 +1,53 @@
+=========================
+Kernel driver i2c-nforce2
+=========================
+
+Supported adapters:
+  * nForce2 MCP                10de:0064
+  * nForce2 Ultra 400 MCP      10de:0084
+  * nForce3 Pro150 MCP         10de:00D4
+  * nForce3 250Gb MCP          10de:00E4
+  * nForce4 MCP                10de:0052
+  * nForce4 MCP-04             10de:0034
+  * nForce MCP51               10de:0264
+  * nForce MCP55               10de:0368
+  * nForce MCP61               10de:03EB
+  * nForce MCP65               10de:0446
+  * nForce MCP67               10de:0542
+  * nForce MCP73               10de:07D8
+  * nForce MCP78S              10de:0752
+  * nForce MCP79               10de:0AA2
+
+Datasheet:
+           not publicly available, but seems to be similar to the
+           AMD-8111 SMBus 2.0 adapter.
+
+Authors:
+	- Hans-Frieder Vogt <hfvogt@gmx.net>,
+	- Thomas Leibold <thomas@plx.com>,
+        - Patrick Dreker <patrick@dreker.de>
+
+Description
+-----------
+
+i2c-nforce2 is a driver for the SMBuses included in the nVidia nForce2 MCP.
+
+If your ``lspci -v`` listing shows something like the following::
+
+  00:01.1 SMBus: nVidia Corporation: Unknown device 0064 (rev a2)
+          Subsystem: Asustek Computer, Inc.: Unknown device 0c11
+          Flags: 66Mhz, fast devsel, IRQ 5
+          I/O ports at c000 [size=32]
+          Capabilities: <available only to root>
+
+then this driver should support the SMBuses of your motherboard.
+
+
+Notes
+-----
+
+The SMBus adapter in the nForce2 chipset seems to be very similar to the
+SMBus 2.0 adapter in the AMD-8111 south bridge. However, I could only get
+the driver to work with direct I/O access, which is different to the EC
+interface of the AMD-8111. Tested on Asus A7N8X. The ACPI DSDT table of the
+Asus A7N8X lists two SMBuses, both of which are supported by this driver.
diff --git a/Documentation/i2c/busses/i2c-nvidia-gpu b/Documentation/i2c/busses/i2c-nvidia-gpu
deleted file mode 100644
index 31884d2b2eb5..000000000000
--- a/Documentation/i2c/busses/i2c-nvidia-gpu
+++ /dev/null
@@ -1,18 +0,0 @@
-Kernel driver i2c-nvidia-gpu
-
-Datasheet: not publicly available.
-
-Authors:
-	Ajay Gupta <ajayg@nvidia.com>
-
-Description
------------
-
-i2c-nvidia-gpu is a driver for I2C controller included in NVIDIA Turing
-and later GPUs and it is used to communicate with Type-C controller on GPUs.
-
-If your 'lspci -v' listing shows something like the following,
-
-01:00.3 Serial bus controller [0c80]: NVIDIA Corporation Device 1ad9 (rev a1)
-
-then this driver should support the I2C controller of your GPU.
diff --git a/Documentation/i2c/busses/i2c-nvidia-gpu.rst b/Documentation/i2c/busses/i2c-nvidia-gpu.rst
new file mode 100644
index 000000000000..38fb8a4c8756
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-nvidia-gpu.rst
@@ -0,0 +1,20 @@
+============================
+Kernel driver i2c-nvidia-gpu
+============================
+
+Datasheet: not publicly available.
+
+Authors:
+	Ajay Gupta <ajayg@nvidia.com>
+
+Description
+-----------
+
+i2c-nvidia-gpu is a driver for I2C controller included in NVIDIA Turing
+and later GPUs and it is used to communicate with Type-C controller on GPUs.
+
+If your ``lspci -v`` listing shows something like the following::
+
+  01:00.3 Serial bus controller [0c80]: NVIDIA Corporation Device 1ad9 (rev a1)
+
+then this driver should support the I2C controller of your GPU.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
deleted file mode 100644
index 9caaf7df1b2f..000000000000
--- a/Documentation/i2c/busses/i2c-ocores
+++ /dev/null
@@ -1,68 +0,0 @@
-Kernel driver i2c-ocores
-
-Supported adapters:
-  * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
-    https://opencores.org/project/i2c/overview
-
-Author: Peter Korsgaard <peter@korsgaard.com>
-
-Description
------------
-
-i2c-ocores is an i2c bus driver for the OpenCores.org I2C controller
-IP core by Richard Herveille.
-
-Usage
------
-
-i2c-ocores uses the platform bus, so you need to provide a struct
-platform_device with the base address and interrupt number. The
-dev.platform_data of the device should also point to a struct
-ocores_i2c_platform_data (see linux/platform_data/i2c-ocores.h) describing the
-distance between registers and the input clock speed.
-There is also a possibility to attach a list of i2c_board_info which
-the i2c-ocores driver will add to the bus upon creation.
-
-E.G. something like:
-
-static struct resource ocores_resources[] = {
-	[0] = {
-		.start	= MYI2C_BASEADDR,
-		.end	= MYI2C_BASEADDR + 8,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= MYI2C_IRQ,
-		.end	= MYI2C_IRQ,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-/* optional board info */
-struct i2c_board_info ocores_i2c_board_info[] = {
-	{
-		I2C_BOARD_INFO("tsc2003", 0x48),
-		.platform_data = &tsc2003_platform_data,
-		.irq = TSC_IRQ
-	},
-	{
-		I2C_BOARD_INFO("adv7180", 0x42 >> 1),
-		.irq = ADV_IRQ
-	}
-};
-
-static struct ocores_i2c_platform_data myi2c_data = {
-	.regstep	= 2,		/* two bytes between registers */
-	.clock_khz	= 50000,	/* input clock of 50MHz */
-	.devices	= ocores_i2c_board_info, /* optional table of devices */
-	.num_devices	= ARRAY_SIZE(ocores_i2c_board_info), /* table size */
-};
-
-static struct platform_device myi2c = {
-	.name			= "ocores-i2c",
-	.dev = {
-		.platform_data	= &myi2c_data,
-	},
-	.num_resources		= ARRAY_SIZE(ocores_resources),
-	.resource		= ocores_resources,
-};
diff --git a/Documentation/i2c/busses/i2c-ocores.rst b/Documentation/i2c/busses/i2c-ocores.rst
new file mode 100644
index 000000000000..f5e175f2a2a6
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ocores.rst
@@ -0,0 +1,70 @@
+========================
+Kernel driver i2c-ocores
+========================
+
+Supported adapters:
+  * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
+    https://opencores.org/project/i2c/overview
+
+Author: Peter Korsgaard <peter@korsgaard.com>
+
+Description
+-----------
+
+i2c-ocores is an i2c bus driver for the OpenCores.org I2C controller
+IP core by Richard Herveille.
+
+Usage
+-----
+
+i2c-ocores uses the platform bus, so you need to provide a struct
+platform_device with the base address and interrupt number. The
+dev.platform_data of the device should also point to a struct
+ocores_i2c_platform_data (see linux/platform_data/i2c-ocores.h) describing the
+distance between registers and the input clock speed.
+There is also a possibility to attach a list of i2c_board_info which
+the i2c-ocores driver will add to the bus upon creation.
+
+E.G. something like::
+
+  static struct resource ocores_resources[] = {
+	[0] = {
+		.start	= MYI2C_BASEADDR,
+		.end	= MYI2C_BASEADDR + 8,
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= MYI2C_IRQ,
+		.end	= MYI2C_IRQ,
+		.flags	= IORESOURCE_IRQ,
+	},
+  };
+
+  /* optional board info */
+  struct i2c_board_info ocores_i2c_board_info[] = {
+	{
+		I2C_BOARD_INFO("tsc2003", 0x48),
+		.platform_data = &tsc2003_platform_data,
+		.irq = TSC_IRQ
+	},
+	{
+		I2C_BOARD_INFO("adv7180", 0x42 >> 1),
+		.irq = ADV_IRQ
+	}
+  };
+
+  static struct ocores_i2c_platform_data myi2c_data = {
+	.regstep	= 2,		/* two bytes between registers */
+	.clock_khz	= 50000,	/* input clock of 50MHz */
+	.devices	= ocores_i2c_board_info, /* optional table of devices */
+	.num_devices	= ARRAY_SIZE(ocores_i2c_board_info), /* table size */
+  };
+
+  static struct platform_device myi2c = {
+	.name			= "ocores-i2c",
+	.dev = {
+		.platform_data	= &myi2c_data,
+	},
+	.num_resources		= ARRAY_SIZE(ocores_resources),
+	.resource		= ocores_resources,
+  };
diff --git a/Documentation/i2c/busses/i2c-parport b/Documentation/i2c/busses/i2c-parport
deleted file mode 100644
index c3dbb3bfd814..000000000000
--- a/Documentation/i2c/busses/i2c-parport
+++ /dev/null
@@ -1,178 +0,0 @@
-Kernel driver i2c-parport
-
-Author: Jean Delvare <jdelvare@suse.de>
-
-This is a unified driver for several i2c-over-parallel-port adapters,
-such as the ones made by Philips, Velleman or ELV. This driver is
-meant as a replacement for the older, individual drivers:
- * i2c-philips-par
- * i2c-elv
- * i2c-velleman
- * video/i2c-parport (NOT the same as this one, dedicated to home brew
-                      teletext adapters)
-
-It currently supports the following devices:
- * (type=0) Philips adapter
- * (type=1) home brew teletext adapter
- * (type=2) Velleman K8000 adapter
- * (type=3) ELV adapter
- * (type=4) Analog Devices ADM1032 evaluation board
- * (type=5) Analog Devices evaluation boards: ADM1025, ADM1030, ADM1031
- * (type=6) Barco LPT->DVI (K5800236) adapter
- * (type=7) One For All JP1 parallel port adapter
- * (type=8) VCT-jig
-
-These devices use different pinout configurations, so you have to tell
-the driver what you have, using the type module parameter. There is no
-way to autodetect the devices. Support for different pinout configurations
-can be easily added when needed.
-
-Earlier kernels defaulted to type=0 (Philips).  But now, if the type
-parameter is missing, the driver will simply fail to initialize.
-
-SMBus alert support is available on adapters which have this line properly
-connected to the parallel port's interrupt pin.
-
-
-Building your own adapter
--------------------------
-
-If you want to build you own i2c-over-parallel-port adapter, here is
-a sample electronics schema (credits go to Sylvain Munaut):
-
-Device                                                      PC
-Side          ___________________Vdd (+)                    Side
-               |    |         |
-              ---  ---       ---
-              | |  | |       | |
-              |R|  |R|       |R|
-              | |  | |       | |
-              ---  ---       ---
-               |    |         |
-               |    |    /|   |
-SCL  ----------x--------o |-----------x-------------------  pin 2
-                    |    \|   |       |
-                    |         |       |
-                    |   |\    |       |
-SDA  ----------x----x---| o---x---------------------------  pin 13
-               |        |/            |
-               |                      |
-               |         /|           |
-               ---------o |----------------x--------------  pin 3
-                         \|           |    |
-                                      |    |
-                                     ---  ---
-                                     | |  | |
-                                     |R|  |R|
-                                     | |  | |
-                                     ---  ---
-                                      |    | 
-                                     ###  ###
-                                     GND  GND
-        
-Remarks:
- - This is the exact pinout and electronics used on the Analog Devices
-   evaluation boards.
-                   /|
- - All inverters -o |- must be 74HC05, they must be open collector output.
-                   \|
- - All resitors are 10k.
- - Pins 18-25 of the parallel port connected to GND.
- - Pins 4-9 (D2-D7) could be used as VDD is the driver drives them high.
-   The ADM1032 evaluation board uses D4-D7. Beware that the amount of
-   current you can draw from the parallel port is limited. Also note that
-   all connected lines MUST BE driven at the same state, else you'll short
-   circuit the output buffers! So plugging the I2C adapter after loading
-   the i2c-parport module might be a good safety since data line state
-   prior to init may be unknown. 
- - This is 5V!
- - Obviously you cannot read SCL (so it's not really standard-compliant).
-   Pretty easy to add, just copy the SDA part and use another input pin.
-   That would give (ELV compatible pinout):
-
-
-Device                                                      PC
-Side          ______________________________Vdd (+)         Side
-               |    |            |    |
-              ---  ---          ---  ---
-              | |  | |          | |  | |
-              |R|  |R|          |R|  |R|
-              | |  | |          | |  | |
-              ---  ---          ---  ---
-               |    |            |    |
-               |    |      |\    |    |
-SCL  ----------x--------x--| o---x------------------------  pin 15
-                    |   |  |/         | 
-                    |   |             |
-                    |   |   /|        |
-                    |   ---o |-------------x--------------  pin 2
-                    |       \|        |    |
-                    |                 |    |
-                    |                 |    |
-                    |      |\         |    |
-SDA  ---------------x---x--| o--------x-------------------  pin 10
-                        |  |/              |
-                        |                  |
-                        |   /|             |
-                        ---o |------------------x---------  pin 3
-                            \|             |    |
-                                           |    |
-                                          ---  ---
-                                          | |  | |
-                                          |R|  |R|
-                                          | |  | |
-                                          ---  ---
-                                           |    | 
-                                          ###  ###
-                                          GND  GND
-
-
-If possible, you should use the same pinout configuration as existing
-adapters do, so you won't even have to change the code.
-
-
-Similar (but different) drivers
--------------------------------
-
-This driver is NOT the same as the i2c-pport driver found in the i2c
-package. The i2c-pport driver makes use of modern parallel port features so
-that you don't need additional electronics. It has other restrictions
-however, and was not ported to Linux 2.6 (yet).
-
-This driver is also NOT the same as the i2c-pcf-epp driver found in the
-lm_sensors package. The i2c-pcf-epp driver doesn't use the parallel port as
-an I2C bus directly. Instead, it uses it to control an external I2C bus
-master. That driver was not ported to Linux 2.6 (yet) either.
-
-
-Legacy documentation for Velleman adapter
------------------------------------------
-
-Useful links:
-Velleman                http://www.velleman.be/
-Velleman K8000 Howto    http://howto.htlw16.ac.at/k8000-howto.html
-
-The project has lead to new libs for the Velleman K8000 and K8005:
-  LIBK8000 v1.99.1 and LIBK8005 v0.21
-With these libs, you can control the K8000 interface card and the K8005
-stepper motor card with the simple commands which are in the original
-Velleman software, like SetIOchannel, ReadADchannel, SendStepCCWFull and
-many more, using /dev/velleman.
-  http://home.wanadoo.nl/hihihi/libk8000.htm
-  http://home.wanadoo.nl/hihihi/libk8005.htm
-  http://struyve.mine.nu:8080/index.php?block=k8000
-  http://sourceforge.net/projects/libk8005/
-
-
-One For All JP1 parallel port adapter
--------------------------------------
-
-The JP1 project revolves around a set of remote controls which expose
-the I2C bus their internal configuration EEPROM lives on via a 6 pin
-jumper in the battery compartment. More details can be found at:
-
-http://www.hifi-remote.com/jp1/
-
-Details of the simple parallel port hardware can be found at:
-
-http://www.hifi-remote.com/jp1/hardware.shtml
diff --git a/Documentation/i2c/busses/i2c-parport-light b/Documentation/i2c/busses/i2c-parport-light
deleted file mode 100644
index 7071b8ba0af4..000000000000
--- a/Documentation/i2c/busses/i2c-parport-light
+++ /dev/null
@@ -1,22 +0,0 @@
-Kernel driver i2c-parport-light
-
-Author: Jean Delvare <jdelvare@suse.de>
-
-This driver is a light version of i2c-parport. It doesn't depend        
-on the parport driver, and uses direct I/O access instead. This might be
-preferred on embedded systems where wasting memory for the clean but heavy
-parport handling is not an option. The drawback is a reduced portability
-and the impossibility to daisy-chain other parallel port devices.                 
-  
-Please see i2c-parport for documentation.
-
-Module parameters:
-
-* type: type of adapter (see i2c-parport or modinfo)
-
-* base: base I/O address
-  Default is 0x378 which is fairly common for parallel ports, at least on PC.
-
-* irq: optional IRQ
-  This must be passed if you want SMBus alert support, assuming your adapter
-  actually supports this.
diff --git a/Documentation/i2c/busses/i2c-parport-light.rst b/Documentation/i2c/busses/i2c-parport-light.rst
new file mode 100644
index 000000000000..e73af975d2c8
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-parport-light.rst
@@ -0,0 +1,24 @@
+===============================
+Kernel driver i2c-parport-light
+===============================
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This driver is a light version of i2c-parport. It doesn't depend
+on the parport driver, and uses direct I/O access instead. This might be
+preferred on embedded systems where wasting memory for the clean but heavy
+parport handling is not an option. The drawback is a reduced portability
+and the impossibility to daisy-chain other parallel port devices.
+
+Please see i2c-parport for documentation.
+
+Module parameters:
+
+* type: type of adapter (see i2c-parport or modinfo)
+
+* base: base I/O address
+  Default is 0x378 which is fairly common for parallel ports, at least on PC.
+
+* irq: optional IRQ
+  This must be passed if you want SMBus alert support, assuming your adapter
+  actually supports this.
diff --git a/Documentation/i2c/busses/i2c-parport.rst b/Documentation/i2c/busses/i2c-parport.rst
new file mode 100644
index 000000000000..a9b4e8133700
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-parport.rst
@@ -0,0 +1,190 @@
+=========================
+Kernel driver i2c-parport
+=========================
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This is a unified driver for several i2c-over-parallel-port adapters,
+such as the ones made by Philips, Velleman or ELV. This driver is
+meant as a replacement for the older, individual drivers:
+
+ * i2c-philips-par
+ * i2c-elv
+ * i2c-velleman
+ * video/i2c-parport
+   (NOT the same as this one, dedicated to home brew teletext adapters)
+
+It currently supports the following devices:
+
+ * (type=0) Philips adapter
+ * (type=1) home brew teletext adapter
+ * (type=2) Velleman K8000 adapter
+ * (type=3) ELV adapter
+ * (type=4) Analog Devices ADM1032 evaluation board
+ * (type=5) Analog Devices evaluation boards: ADM1025, ADM1030, ADM1031
+ * (type=6) Barco LPT->DVI (K5800236) adapter
+ * (type=7) One For All JP1 parallel port adapter
+ * (type=8) VCT-jig
+
+These devices use different pinout configurations, so you have to tell
+the driver what you have, using the type module parameter. There is no
+way to autodetect the devices. Support for different pinout configurations
+can be easily added when needed.
+
+Earlier kernels defaulted to type=0 (Philips).  But now, if the type
+parameter is missing, the driver will simply fail to initialize.
+
+SMBus alert support is available on adapters which have this line properly
+connected to the parallel port's interrupt pin.
+
+
+Building your own adapter
+-------------------------
+
+If you want to build you own i2c-over-parallel-port adapter, here is
+a sample electronics schema (credits go to Sylvain Munaut)::
+
+  Device                                                      PC
+  Side          ___________________Vdd (+)                    Side
+                 |    |         |
+                ---  ---       ---
+                | |  | |       | |
+                |R|  |R|       |R|
+                | |  | |       | |
+                ---  ---       ---
+                 |    |         |
+                 |    |    /|   |
+  SCL  ----------x--------o |-----------x-------------------  pin 2
+                      |    \|   |       |
+                      |         |       |
+                      |   |\    |       |
+  SDA  ----------x----x---| o---x---------------------------  pin 13
+                 |        |/            |
+                 |                      |
+                 |         /|           |
+                 ---------o |----------------x--------------  pin 3
+                           \|           |    |
+                                        |    |
+                                       ---  ---
+                                       | |  | |
+                                       |R|  |R|
+                                       | |  | |
+                                       ---  ---
+                                        |    |
+                                       ###  ###
+                                       GND  GND
+
+Remarks:
+ - This is the exact pinout and electronics used on the Analog Devices
+   evaluation boards.
+ - All inverters::
+
+                   /|
+                 -o |-
+                   \|
+
+   must be 74HC05, they must be open collector output.
+ - All resitors are 10k.
+ - Pins 18-25 of the parallel port connected to GND.
+ - Pins 4-9 (D2-D7) could be used as VDD is the driver drives them high.
+   The ADM1032 evaluation board uses D4-D7. Beware that the amount of
+   current you can draw from the parallel port is limited. Also note that
+   all connected lines MUST BE driven at the same state, else you'll short
+   circuit the output buffers! So plugging the I2C adapter after loading
+   the i2c-parport module might be a good safety since data line state
+   prior to init may be unknown.
+ - This is 5V!
+ - Obviously you cannot read SCL (so it's not really standard-compliant).
+   Pretty easy to add, just copy the SDA part and use another input pin.
+   That would give (ELV compatible pinout)::
+
+
+      Device                                                      PC
+      Side          ______________________________Vdd (+)         Side
+                     |    |            |    |
+                    ---  ---          ---  ---
+                    | |  | |          | |  | |
+                    |R|  |R|          |R|  |R|
+                    | |  | |          | |  | |
+                    ---  ---          ---  ---
+                     |    |            |    |
+                     |    |      |\    |    |
+      SCL  ----------x--------x--| o---x------------------------  pin 15
+                          |   |  |/         |
+                          |   |             |
+                          |   |   /|        |
+                          |   ---o |-------------x--------------  pin 2
+                          |       \|        |    |
+                          |                 |    |
+                          |                 |    |
+                          |      |\         |    |
+      SDA  ---------------x---x--| o--------x-------------------  pin 10
+                              |  |/              |
+                              |                  |
+                              |   /|             |
+                              ---o |------------------x---------  pin 3
+                                  \|             |    |
+                                                 |    |
+                                                ---  ---
+                                                | |  | |
+                                                |R|  |R|
+                                                | |  | |
+                                                ---  ---
+                                                 |    |
+                                                ###  ###
+                                                GND  GND
+
+
+If possible, you should use the same pinout configuration as existing
+adapters do, so you won't even have to change the code.
+
+
+Similar (but different) drivers
+-------------------------------
+
+This driver is NOT the same as the i2c-pport driver found in the i2c
+package. The i2c-pport driver makes use of modern parallel port features so
+that you don't need additional electronics. It has other restrictions
+however, and was not ported to Linux 2.6 (yet).
+
+This driver is also NOT the same as the i2c-pcf-epp driver found in the
+lm_sensors package. The i2c-pcf-epp driver doesn't use the parallel port as
+an I2C bus directly. Instead, it uses it to control an external I2C bus
+master. That driver was not ported to Linux 2.6 (yet) either.
+
+
+Legacy documentation for Velleman adapter
+-----------------------------------------
+
+Useful links:
+
+- Velleman                http://www.velleman.be/
+- Velleman K8000 Howto    http://howto.htlw16.ac.at/k8000-howto.html
+
+The project has lead to new libs for the Velleman K8000 and K8005:
+
+  LIBK8000 v1.99.1 and LIBK8005 v0.21
+
+With these libs, you can control the K8000 interface card and the K8005
+stepper motor card with the simple commands which are in the original
+Velleman software, like SetIOchannel, ReadADchannel, SendStepCCWFull and
+many more, using /dev/velleman.
+
+  - http://home.wanadoo.nl/hihihi/libk8000.htm
+  - http://home.wanadoo.nl/hihihi/libk8005.htm
+  - http://struyve.mine.nu:8080/index.php?block=k8000
+  - http://sourceforge.net/projects/libk8005/
+
+
+One For All JP1 parallel port adapter
+-------------------------------------
+
+The JP1 project revolves around a set of remote controls which expose
+the I2C bus their internal configuration EEPROM lives on via a 6 pin
+jumper in the battery compartment. More details can be found at:
+
+http://www.hifi-remote.com/jp1/
+
+Details of the simple parallel port hardware can be found at:
+
+http://www.hifi-remote.com/jp1/hardware.shtml
diff --git a/Documentation/i2c/busses/i2c-pca-isa b/Documentation/i2c/busses/i2c-pca-isa
deleted file mode 100644
index b044e5265488..000000000000
--- a/Documentation/i2c/busses/i2c-pca-isa
+++ /dev/null
@@ -1,23 +0,0 @@
-Kernel driver i2c-pca-isa
-
-Supported adapters:
-This driver supports ISA boards using the Philips PCA 9564
-Parallel bus to I2C bus controller
-
-Author: Ian Campbell <icampbell@arcom.com>, Arcom Control Systems
-
-Module Parameters
------------------
-
-* base int
- I/O base address
-* irq int
- IRQ interrupt
-* clock int
- Clock rate as described in table 1 of PCA9564 datasheet
-
-Description
------------
-
-This driver supports ISA boards using the Philips PCA 9564
-Parallel bus to I2C bus controller
diff --git a/Documentation/i2c/busses/i2c-pca-isa.rst b/Documentation/i2c/busses/i2c-pca-isa.rst
new file mode 100644
index 000000000000..a254010c8055
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-pca-isa.rst
@@ -0,0 +1,26 @@
+=========================
+Kernel driver i2c-pca-isa
+=========================
+
+Supported adapters:
+
+This driver supports ISA boards using the Philips PCA 9564
+Parallel bus to I2C bus controller
+
+Author: Ian Campbell <icampbell@arcom.com>, Arcom Control Systems
+
+Module Parameters
+-----------------
+
+* base int
+    I/O base address
+* irq int
+    IRQ interrupt
+* clock int
+    Clock rate as described in table 1 of PCA9564 datasheet
+
+Description
+-----------
+
+This driver supports ISA boards using the Philips PCA 9564
+Parallel bus to I2C bus controller
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
deleted file mode 100644
index 2703bc3acad0..000000000000
--- a/Documentation/i2c/busses/i2c-piix4
+++ /dev/null
@@ -1,112 +0,0 @@
-Kernel driver i2c-piix4
-
-Supported adapters:
-  * Intel 82371AB PIIX4 and PIIX4E
-  * Intel 82443MX (440MX)
-    Datasheet: Publicly available at the Intel website
-  * ServerWorks OSB4, CSB5, CSB6, HT-1000 and HT-1100 southbridges
-    Datasheet: Only available via NDA from ServerWorks
-  * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
-    Datasheet: Not publicly available
-    SB700 register reference available at:
-    http://support.amd.com/us/Embedded_TechDocs/43009_sb7xx_rrg_pub_1.00.pdf
-  * AMD SP5100 (SB700 derivative found on some server mainboards)
-    Datasheet: Publicly available at the AMD website
-    http://support.amd.com/us/Embedded_TechDocs/44413.pdf
-  * AMD Hudson-2, ML, CZ
-    Datasheet: Not publicly available
-  * Hygon CZ
-    Datasheet: Not publicly available
-  * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
-    Datasheet: Publicly available at the SMSC website http://www.smsc.com
-
-Authors: 
-	Frodo Looijaard <frodol@dds.nl>
-	Philip Edelbrock <phil@netroedge.com>
-
-
-Module Parameters
------------------
-
-* force: int
-  Forcibly enable the PIIX4. DANGEROUS!
-* force_addr: int
-  Forcibly enable the PIIX4 at the given address. EXTREMELY DANGEROUS!
-
-
-Description
------------
-
-The PIIX4 (properly known as the 82371AB) is an Intel chip with a lot of
-functionality. Among other things, it implements the PCI bus. One of its
-minor functions is implementing a System Management Bus. This is a true 
-SMBus - you can not access it on I2C levels. The good news is that it
-natively understands SMBus commands and you do not have to worry about
-timing problems. The bad news is that non-SMBus devices connected to it can
-confuse it mightily. Yes, this is known to happen...
-
-Do 'lspci -v' and see whether it contains an entry like this:
-
-0000:00:02.3 Bridge: Intel Corp. 82371AB/EB/MB PIIX4 ACPI (rev 02)
-	     Flags: medium devsel, IRQ 9
-
-Bus and device numbers may differ, but the function number must be
-identical (like many PCI devices, the PIIX4 incorporates a number of
-different 'functions', which can be considered as separate devices). If you
-find such an entry, you have a PIIX4 SMBus controller.
-
-On some computers (most notably, some Dells), the SMBus is disabled by
-default. If you use the insmod parameter 'force=1', the kernel module will
-try to enable it. THIS IS VERY DANGEROUS! If the BIOS did not set up a
-correct address for this module, you could get in big trouble (read:
-crashes, data corruption, etc.). Try this only as a last resort (try BIOS
-updates first, for example), and backup first! An even more dangerous
-option is 'force_addr=<IOPORT>'. This will not only enable the PIIX4 like
-'force' foes, but it will also set a new base I/O port address. The SMBus
-parts of the PIIX4 needs a range of 8 of these addresses to function
-correctly. If these addresses are already reserved by some other device,
-you will get into big trouble! DON'T USE THIS IF YOU ARE NOT VERY SURE
-ABOUT WHAT YOU ARE DOING!
-
-The PIIX4E is just an new version of the PIIX4; it is supported as well.
-The PIIX/PIIX3 does not implement an SMBus or I2C bus, so you can't use
-this driver on those mainboards.
-
-The ServerWorks Southbridges, the Intel 440MX, and the Victory66 are
-identical to the PIIX4 in I2C/SMBus support.
-
-The AMD SB700, SB800, SP5100 and Hudson-2 chipsets implement two
-PIIX4-compatible SMBus controllers. If your BIOS initializes the
-secondary controller, it will be detected by this driver as
-an "Auxiliary SMBus Host Controller".
-
-If you own Force CPCI735 motherboard or other OSB4 based systems you may need
-to change the SMBus Interrupt Select register so the SMBus controller uses
-the SMI mode.
-
-1) Use lspci command and locate the PCI device with the SMBus controller:
-   00:0f.0 ISA bridge: ServerWorks OSB4 South Bridge (rev 4f)
-   The line may vary for different chipsets. Please consult the driver source
-   for all possible PCI ids (and lspci -n to match them). Lets assume the
-   device is located at 00:0f.0.
-2) Now you just need to change the value in 0xD2 register. Get it first with
-   command: lspci -xxx -s 00:0f.0
-   If the value is 0x3 then you need to change it to 0x1
-   setpci  -s 00:0f.0 d2.b=1
-
-Please note that you don't need to do that in all cases, just when the SMBus is
-not working properly.
-
-
-Hardware-specific issues
-------------------------
-
-This driver will refuse to load on IBM systems with an Intel PIIX4 SMBus.
-Some of these machines have an RFID EEPROM (24RF08) connected to the SMBus,
-which can easily get corrupted due to a state machine bug. These are mostly
-Thinkpad laptops, but desktop systems may also be affected. We have no list
-of all affected systems, so the only safe solution was to prevent access to
-the SMBus on all IBM systems (detected using DMI data.)
-
-For additional information, read:
-http://www.lm-sensors.org/browser/lm-sensors/trunk/README
diff --git a/Documentation/i2c/busses/i2c-piix4.rst b/Documentation/i2c/busses/i2c-piix4.rst
new file mode 100644
index 000000000000..cc9000259223
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-piix4.rst
@@ -0,0 +1,114 @@
+=======================
+Kernel driver i2c-piix4
+=======================
+
+Supported adapters:
+  * Intel 82371AB PIIX4 and PIIX4E
+  * Intel 82443MX (440MX)
+    Datasheet: Publicly available at the Intel website
+  * ServerWorks OSB4, CSB5, CSB6, HT-1000 and HT-1100 southbridges
+    Datasheet: Only available via NDA from ServerWorks
+  * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
+    Datasheet: Not publicly available
+    SB700 register reference available at:
+    http://support.amd.com/us/Embedded_TechDocs/43009_sb7xx_rrg_pub_1.00.pdf
+  * AMD SP5100 (SB700 derivative found on some server mainboards)
+    Datasheet: Publicly available at the AMD website
+    http://support.amd.com/us/Embedded_TechDocs/44413.pdf
+  * AMD Hudson-2, ML, CZ
+    Datasheet: Not publicly available
+  * Hygon CZ
+    Datasheet: Not publicly available
+  * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
+    Datasheet: Publicly available at the SMSC website http://www.smsc.com
+
+Authors:
+	- Frodo Looijaard <frodol@dds.nl>
+	- Philip Edelbrock <phil@netroedge.com>
+
+
+Module Parameters
+-----------------
+
+* force: int
+  Forcibly enable the PIIX4. DANGEROUS!
+* force_addr: int
+  Forcibly enable the PIIX4 at the given address. EXTREMELY DANGEROUS!
+
+
+Description
+-----------
+
+The PIIX4 (properly known as the 82371AB) is an Intel chip with a lot of
+functionality. Among other things, it implements the PCI bus. One of its
+minor functions is implementing a System Management Bus. This is a true
+SMBus - you can not access it on I2C levels. The good news is that it
+natively understands SMBus commands and you do not have to worry about
+timing problems. The bad news is that non-SMBus devices connected to it can
+confuse it mightily. Yes, this is known to happen...
+
+Do ``lspci -v`` and see whether it contains an entry like this::
+
+  0000:00:02.3 Bridge: Intel Corp. 82371AB/EB/MB PIIX4 ACPI (rev 02)
+	       Flags: medium devsel, IRQ 9
+
+Bus and device numbers may differ, but the function number must be
+identical (like many PCI devices, the PIIX4 incorporates a number of
+different 'functions', which can be considered as separate devices). If you
+find such an entry, you have a PIIX4 SMBus controller.
+
+On some computers (most notably, some Dells), the SMBus is disabled by
+default. If you use the insmod parameter 'force=1', the kernel module will
+try to enable it. THIS IS VERY DANGEROUS! If the BIOS did not set up a
+correct address for this module, you could get in big trouble (read:
+crashes, data corruption, etc.). Try this only as a last resort (try BIOS
+updates first, for example), and backup first! An even more dangerous
+option is 'force_addr=<IOPORT>'. This will not only enable the PIIX4 like
+'force' foes, but it will also set a new base I/O port address. The SMBus
+parts of the PIIX4 needs a range of 8 of these addresses to function
+correctly. If these addresses are already reserved by some other device,
+you will get into big trouble! DON'T USE THIS IF YOU ARE NOT VERY SURE
+ABOUT WHAT YOU ARE DOING!
+
+The PIIX4E is just an new version of the PIIX4; it is supported as well.
+The PIIX/PIIX3 does not implement an SMBus or I2C bus, so you can't use
+this driver on those mainboards.
+
+The ServerWorks Southbridges, the Intel 440MX, and the Victory66 are
+identical to the PIIX4 in I2C/SMBus support.
+
+The AMD SB700, SB800, SP5100 and Hudson-2 chipsets implement two
+PIIX4-compatible SMBus controllers. If your BIOS initializes the
+secondary controller, it will be detected by this driver as
+an "Auxiliary SMBus Host Controller".
+
+If you own Force CPCI735 motherboard or other OSB4 based systems you may need
+to change the SMBus Interrupt Select register so the SMBus controller uses
+the SMI mode.
+
+1) Use lspci command and locate the PCI device with the SMBus controller:
+   00:0f.0 ISA bridge: ServerWorks OSB4 South Bridge (rev 4f)
+   The line may vary for different chipsets. Please consult the driver source
+   for all possible PCI ids (and lspci -n to match them). Lets assume the
+   device is located at 00:0f.0.
+2) Now you just need to change the value in 0xD2 register. Get it first with
+   command: lspci -xxx -s 00:0f.0
+   If the value is 0x3 then you need to change it to 0x1:
+   setpci  -s 00:0f.0 d2.b=1
+
+Please note that you don't need to do that in all cases, just when the SMBus is
+not working properly.
+
+
+Hardware-specific issues
+------------------------
+
+This driver will refuse to load on IBM systems with an Intel PIIX4 SMBus.
+Some of these machines have an RFID EEPROM (24RF08) connected to the SMBus,
+which can easily get corrupted due to a state machine bug. These are mostly
+Thinkpad laptops, but desktop systems may also be affected. We have no list
+of all affected systems, so the only safe solution was to prevent access to
+the SMBus on all IBM systems (detected using DMI data.)
+
+For additional information, read:
+http://www.lm-sensors.org/browser/lm-sensors/trunk/README
diff --git a/Documentation/i2c/busses/i2c-sis5595 b/Documentation/i2c/busses/i2c-sis5595
deleted file mode 100644
index ecd21fb49a8f..000000000000
--- a/Documentation/i2c/busses/i2c-sis5595
+++ /dev/null
@@ -1,59 +0,0 @@
-Kernel driver i2c-sis5595
-
-Authors:
-	Frodo Looijaard <frodol@dds.nl>,
-        Mark D. Studebaker <mdsxyz123@yahoo.com>,
-	Philip Edelbrock <phil@netroedge.com>
-
-Supported adapters:
-  * Silicon Integrated Systems Corp. SiS5595 Southbridge
-    Datasheet: Publicly available at the Silicon Integrated Systems Corp. site.
-
-Note: all have mfr. ID 0x1039.
-
-   SUPPORTED            PCI ID
-        5595            0008
-
-   Note: these chips contain a 0008 device which is incompatible with the
-         5595. We recognize these by the presence of the listed
-         "blacklist" PCI ID and refuse to load.
-
-   NOT SUPPORTED        PCI ID          BLACKLIST PCI ID
-         540            0008            0540
-         550            0008            0550
-        5513            0008            5511
-        5581            0008            5597
-        5582            0008            5597
-        5597            0008            5597
-        5598            0008            5597/5598
-         630            0008            0630
-         645            0008            0645
-         646            0008            0646
-         648            0008            0648
-         650            0008            0650
-         651            0008            0651
-         730            0008            0730
-         735            0008            0735
-         745            0008            0745
-         746            0008            0746
-
-Module Parameters
------------------
-
-* force_addr=0xaddr	Set the I/O base address. Useful for boards
-			that don't set the address in the BIOS. Does not do a
-			PCI force; the device must still be present in lspci.
-			Don't use this unless the driver complains that the
-			base address is not set.
-
-Description
------------
-
-i2c-sis5595 is a true SMBus host driver for motherboards with the SiS5595
-southbridges.
-
-WARNING: If you are trying to access the integrated sensors on the SiS5595
-chip, you want the sis5595 driver for those, not this driver. This driver
-is a BUS driver, not a CHIP driver. A BUS driver is used by other CHIP
-drivers to access chips on the bus.
-
diff --git a/Documentation/i2c/busses/i2c-sis5595.rst b/Documentation/i2c/busses/i2c-sis5595.rst
new file mode 100644
index 000000000000..b85630c84a96
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis5595.rst
@@ -0,0 +1,68 @@
+=========================
+Kernel driver i2c-sis5595
+=========================
+
+Authors:
+	- Frodo Looijaard <frodol@dds.nl>,
+        - Mark D. Studebaker <mdsxyz123@yahoo.com>,
+	- Philip Edelbrock <phil@netroedge.com>
+
+Supported adapters:
+  * Silicon Integrated Systems Corp. SiS5595 Southbridge
+    Datasheet: Publicly available at the Silicon Integrated Systems Corp. site.
+
+Note: all have mfr. ID 0x1039.
+
+   =========            ======
+   SUPPORTED            PCI ID
+   =========            ======
+        5595            0008
+   =========            ======
+
+   Note: these chips contain a 0008 device which is incompatible with the
+         5595. We recognize these by the presence of the listed
+         "blacklist" PCI ID and refuse to load.
+
+   =============        ======          ================
+   NOT SUPPORTED        PCI ID          BLACKLIST PCI ID
+   =============        ======          ================
+         540            0008            0540
+         550            0008            0550
+        5513            0008            5511
+        5581            0008            5597
+        5582            0008            5597
+        5597            0008            5597
+        5598            0008            5597/5598
+         630            0008            0630
+         645            0008            0645
+         646            0008            0646
+         648            0008            0648
+         650            0008            0650
+         651            0008            0651
+         730            0008            0730
+         735            0008            0735
+         745            0008            0745
+         746            0008            0746
+   =============        ======          ================
+
+Module Parameters
+-----------------
+
+==================	=====================================================
+force_addr=0xaddr	Set the I/O base address. Useful for boards
+			that don't set the address in the BIOS. Does not do a
+			PCI force; the device must still be present in lspci.
+			Don't use this unless the driver complains that the
+			base address is not set.
+==================	=====================================================
+
+Description
+-----------
+
+i2c-sis5595 is a true SMBus host driver for motherboards with the SiS5595
+southbridges.
+
+WARNING: If you are trying to access the integrated sensors on the SiS5595
+chip, you want the sis5595 driver for those, not this driver. This driver
+is a BUS driver, not a CHIP driver. A BUS driver is used by other CHIP
+drivers to access chips on the bus.
diff --git a/Documentation/i2c/busses/i2c-sis630 b/Documentation/i2c/busses/i2c-sis630
deleted file mode 100644
index ee7943631074..000000000000
--- a/Documentation/i2c/busses/i2c-sis630
+++ /dev/null
@@ -1,58 +0,0 @@
-Kernel driver i2c-sis630
-
-Supported adapters:
-  * Silicon Integrated Systems Corp (SiS)
-	630 chipset (Datasheet: available at http://www.sfr-fresh.com/linux)
-	730 chipset
-	964 chipset
-  * Possible other SiS chipsets ?
-
-Author: Alexander Malysh <amalysh@web.de>
-	Amaury Decrême <amaury.decreme@gmail.com> - SiS964 support
-
-Module Parameters
------------------
-
-* force = [1|0] Forcibly enable the SIS630. DANGEROUS!
-		This can be interesting for chipsets not named
-		above to check if it works for you chipset, but DANGEROUS!
-
-* high_clock = [1|0] Forcibly set Host Master Clock to 56KHz (default,
-			what your BIOS use). DANGEROUS! This should be a bit
-			faster, but freeze some systems (i.e. my Laptop).
-			SIS630/730 chip only.
-
-
-Description
------------
-
-This SMBus only driver is known to work on motherboards with the above
-named chipsets.
-
-If you see something like this:
-
-00:00.0 Host bridge: Silicon Integrated Systems [SiS] 630 Host (rev 31)
-00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
-
-or like this:
-
-00:00.0 Host bridge: Silicon Integrated Systems [SiS] 730 Host (rev 02)
-00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
-
-or like this:
-
-00:00.0 Host bridge: Silicon Integrated Systems [SiS] 760/M760 Host (rev 02)
-00:02.0 ISA bridge: Silicon Integrated Systems [SiS] SiS964 [MuTIOL Media IO]
-							LPC Controller (rev 36)
-
-in your 'lspci' output , then this driver is for your chipset.
-
-Thank You
----------
-Philip Edelbrock <phil@netroedge.com>
-- testing SiS730 support
-Mark M. Hoffman <mhoffman@lightlink.com>
-- bug fixes
-
-To anyone else which I forgot here ;), thanks!
-
diff --git a/Documentation/i2c/busses/i2c-sis630.rst b/Documentation/i2c/busses/i2c-sis630.rst
new file mode 100644
index 000000000000..9fcd74b18781
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis630.rst
@@ -0,0 +1,63 @@
+========================
+Kernel driver i2c-sis630
+========================
+
+Supported adapters:
+  * Silicon Integrated Systems Corp (SiS)
+	630 chipset (Datasheet: available at http://www.sfr-fresh.com/linux)
+	730 chipset
+	964 chipset
+  * Possible other SiS chipsets ?
+
+Author:
+        - Alexander Malysh <amalysh@web.de>
+	- Amaury Decrême <amaury.decreme@gmail.com> - SiS964 support
+
+Module Parameters
+-----------------
+
+==================      =====================================================
+force = [1|0]           Forcibly enable the SIS630. DANGEROUS!
+                        This can be interesting for chipsets not named
+                        above to check if it works for you chipset,
+                        but DANGEROUS!
+
+high_clock = [1|0]      Forcibly set Host Master Clock to 56KHz (default,
+			what your BIOS use). DANGEROUS! This should be a bit
+			faster, but freeze some systems (i.e. my Laptop).
+			SIS630/730 chip only.
+==================      =====================================================
+
+
+Description
+-----------
+
+This SMBus only driver is known to work on motherboards with the above
+named chipsets.
+
+If you see something like this::
+
+  00:00.0 Host bridge: Silicon Integrated Systems [SiS] 630 Host (rev 31)
+  00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+
+or like this::
+
+  00:00.0 Host bridge: Silicon Integrated Systems [SiS] 730 Host (rev 02)
+  00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+
+or like this::
+
+  00:00.0 Host bridge: Silicon Integrated Systems [SiS] 760/M760 Host (rev 02)
+  00:02.0 ISA bridge: Silicon Integrated Systems [SiS] SiS964 [MuTIOL Media IO]
+							LPC Controller (rev 36)
+
+in your ``lspci`` output , then this driver is for your chipset.
+
+Thank You
+---------
+Philip Edelbrock <phil@netroedge.com>
+- testing SiS730 support
+Mark M. Hoffman <mhoffman@lightlink.com>
+- bug fixes
+
+To anyone else which I forgot here ;), thanks!
diff --git a/Documentation/i2c/busses/i2c-sis96x b/Documentation/i2c/busses/i2c-sis96x
deleted file mode 100644
index 0b979f3252a4..000000000000
--- a/Documentation/i2c/busses/i2c-sis96x
+++ /dev/null
@@ -1,73 +0,0 @@
-Kernel driver i2c-sis96x
-
-Replaces 2.4.x i2c-sis645
-
-Supported adapters:
-  * Silicon Integrated Systems Corp (SiS)
-    Any combination of these host bridges:
-	645, 645DX (aka 646), 648, 650, 651, 655, 735, 745, 746
-    and these south bridges:
-    	961, 962, 963(L)
-
-Author: Mark M. Hoffman <mhoffman@lightlink.com>
-
-Description
------------
-
-This SMBus only driver is known to work on motherboards with the above
-named chipset combinations. The driver was developed without benefit of a
-proper datasheet from SiS. The SMBus registers are assumed compatible with
-those of the SiS630, although they are located in a completely different
-place. Thanks to Alexander Malysh <amalysh@web.de> for providing the
-SiS630 datasheet (and  driver).
-
-The command "lspci" as root should produce something like these lines:
-
-00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
-00:02.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
-00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
-
-or perhaps this...
-
-00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
-00:02.0 ISA bridge: Silicon Integrated Systems [SiS]: Unknown device 0961
-00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
-
-(kernel versions later than 2.4.18 may fill in the "Unknown"s)
-
-If you can't see it please look on quirk_sis_96x_smbus
-(drivers/pci/quirks.c) (also if southbridge detection fails)
-
-I suspect that this driver could be made to work for the following SiS
-chipsets as well: 635, and 635T. If anyone owns a board with those chips
-AND is willing to risk crashing & burning an otherwise well-behaved kernel
-in the name of progress... please contact me at <mhoffman@lightlink.com> or
-via the linux-i2c mailing list: <linux-i2c@vger.kernel.org>.  Please send bug
-reports and/or success stories as well.
-
-
-TO DOs
-------
-
-* The driver does not support SMBus block reads/writes; I may add them if a
-scenario is found where they're needed.
-
-
-Thank You
----------
-
-Mark D. Studebaker <mdsxyz123@yahoo.com>
- - design hints and bug fixes
-Alexander Maylsh <amalysh@web.de>
- - ditto, plus an important datasheet... almost the one I really wanted
-Hans-Günter Lütke Uphues <hg_lu@t-online.de>
- - patch for SiS735
-Robert Zwerus <arzie@dds.nl>
- - testing for SiS645DX
-Kianusch Sayah Karadji <kianusch@sk-tech.net>
- - patch for SiS645DX/962
-Ken Healy
- - patch for SiS655
-
-To anyone else who has written w/ feedback, thanks!
-
diff --git a/Documentation/i2c/busses/i2c-sis96x.rst b/Documentation/i2c/busses/i2c-sis96x.rst
new file mode 100644
index 000000000000..437cc1d89588
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis96x.rst
@@ -0,0 +1,82 @@
+========================
+Kernel driver i2c-sis96x
+========================
+
+Replaces 2.4.x i2c-sis645
+
+Supported adapters:
+
+  * Silicon Integrated Systems Corp (SiS)
+
+    Any combination of these host bridges:
+	645, 645DX (aka 646), 648, 650, 651, 655, 735, 745, 746
+
+    and these south bridges:
+	961, 962, 963(L)
+
+Author: Mark M. Hoffman <mhoffman@lightlink.com>
+
+Description
+-----------
+
+This SMBus only driver is known to work on motherboards with the above
+named chipset combinations. The driver was developed without benefit of a
+proper datasheet from SiS. The SMBus registers are assumed compatible with
+those of the SiS630, although they are located in a completely different
+place. Thanks to Alexander Malysh <amalysh@web.de> for providing the
+SiS630 datasheet (and  driver).
+
+The command ``lspci`` as root should produce something like these lines::
+
+  00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
+  00:02.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+  00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
+
+or perhaps this::
+
+  00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
+  00:02.0 ISA bridge: Silicon Integrated Systems [SiS]: Unknown device 0961
+  00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
+
+(kernel versions later than 2.4.18 may fill in the "Unknown"s)
+
+If you can't see it please look on quirk_sis_96x_smbus
+(drivers/pci/quirks.c) (also if southbridge detection fails)
+
+I suspect that this driver could be made to work for the following SiS
+chipsets as well: 635, and 635T. If anyone owns a board with those chips
+AND is willing to risk crashing & burning an otherwise well-behaved kernel
+in the name of progress... please contact me at <mhoffman@lightlink.com> or
+via the linux-i2c mailing list: <linux-i2c@vger.kernel.org>.  Please send bug
+reports and/or success stories as well.
+
+
+TO DOs
+------
+
+* The driver does not support SMBus block reads/writes; I may add them if a
+  scenario is found where they're needed.
+
+
+Thank You
+---------
+
+Mark D. Studebaker <mdsxyz123@yahoo.com>
+ - design hints and bug fixes
+
+Alexander Maylsh <amalysh@web.de>
+ - ditto, plus an important datasheet... almost the one I really wanted
+
+Hans-Günter Lütke Uphues <hg_lu@t-online.de>
+ - patch for SiS735
+
+Robert Zwerus <arzie@dds.nl>
+ - testing for SiS645DX
+
+Kianusch Sayah Karadji <kianusch@sk-tech.net>
+ - patch for SiS645DX/962
+
+Ken Healy
+ - patch for SiS655
+
+To anyone else who has written w/ feedback, thanks!
diff --git a/Documentation/i2c/busses/i2c-taos-evm b/Documentation/i2c/busses/i2c-taos-evm
deleted file mode 100644
index 60299555dcf0..000000000000
--- a/Documentation/i2c/busses/i2c-taos-evm
+++ /dev/null
@@ -1,46 +0,0 @@
-Kernel driver i2c-taos-evm
-
-Author: Jean Delvare <jdelvare@suse.de>
-
-This is a driver for the evaluation modules for TAOS I2C/SMBus chips.
-The modules include an SMBus master with limited capabilities, which can
-be controlled over the serial port. Virtually all evaluation modules
-are supported, but a few lines of code need to be added for each new
-module to instantiate the right I2C chip on the bus. Obviously, a driver
-for the chip in question is also needed.
-
-Currently supported devices are:
-
-* TAOS TSL2550 EVM
-
-For additional information on TAOS products, please see
-  http://www.taosinc.com/
-
-
-Using this driver
------------------
-
-In order to use this driver, you'll need the serport driver, and the
-inputattach tool, which is part of the input-utils package. The following
-commands will tell the kernel that you have a TAOS EVM on the first
-serial port:
-
-# modprobe serport
-# inputattach --taos-evm /dev/ttyS0
-
-
-Technical details
------------------
-
-Only 4 SMBus transaction types are supported by the TAOS evaluation
-modules:
-* Receive Byte
-* Send Byte
-* Read Byte
-* Write Byte
-
-The communication protocol is text-based and pretty simple. It is
-described in a PDF document on the CD which comes with the evaluation
-module. The communication is rather slow, because the serial port has
-to operate at 1200 bps. However, I don't think this is a big concern in
-practice, as these modules are meant for evaluation and testing only.
diff --git a/Documentation/i2c/busses/i2c-taos-evm.rst b/Documentation/i2c/busses/i2c-taos-evm.rst
new file mode 100644
index 000000000000..f342e313ee3d
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-taos-evm.rst
@@ -0,0 +1,48 @@
+==========================
+Kernel driver i2c-taos-evm
+==========================
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This is a driver for the evaluation modules for TAOS I2C/SMBus chips.
+The modules include an SMBus master with limited capabilities, which can
+be controlled over the serial port. Virtually all evaluation modules
+are supported, but a few lines of code need to be added for each new
+module to instantiate the right I2C chip on the bus. Obviously, a driver
+for the chip in question is also needed.
+
+Currently supported devices are:
+
+* TAOS TSL2550 EVM
+
+For additional information on TAOS products, please see
+  http://www.taosinc.com/
+
+
+Using this driver
+-----------------
+
+In order to use this driver, you'll need the serport driver, and the
+inputattach tool, which is part of the input-utils package. The following
+commands will tell the kernel that you have a TAOS EVM on the first
+serial port::
+
+  # modprobe serport
+  # inputattach --taos-evm /dev/ttyS0
+
+
+Technical details
+-----------------
+
+Only 4 SMBus transaction types are supported by the TAOS evaluation
+modules:
+* Receive Byte
+* Send Byte
+* Read Byte
+* Write Byte
+
+The communication protocol is text-based and pretty simple. It is
+described in a PDF document on the CD which comes with the evaluation
+module. The communication is rather slow, because the serial port has
+to operate at 1200 bps. However, I don't think this is a big concern in
+practice, as these modules are meant for evaluation and testing only.
diff --git a/Documentation/i2c/busses/i2c-via b/Documentation/i2c/busses/i2c-via
deleted file mode 100644
index 343870661ac3..000000000000
--- a/Documentation/i2c/busses/i2c-via
+++ /dev/null
@@ -1,34 +0,0 @@
-Kernel driver i2c-via
-
-Supported adapters:
-  * VIA Technologies, InC. VT82C586B
-    Datasheet: Publicly available at the VIA website
-
-Author: Kyösti Mälkki <kmalkki@cc.hut.fi>
-
-Description
------------
-
-i2c-via is an i2c bus driver for motherboards with VIA chipset.
-
-The following VIA pci chipsets are supported:
- - MVP3, VP3, VP2/97, VPX/97 
- - others with South bridge VT82C586B
-
-Your lspci listing must show this :
-
- Bridge: VIA Technologies, Inc. VT82C586B ACPI (rev 10)
-
-    Problems?
- 
- Q: You have VT82C586B on the motherboard, but not in the listing. 
- 
- A: Go to your BIOS setup, section PCI devices or similar.
-    Turn USB support on, and try again. 
-
- Q: No error messages, but still i2c doesn't seem to work.
-
- A: This can happen. This driver uses the pins VIA recommends in their
-    datasheets, but there are several ways the motherboard manufacturer
-    can actually wire the lines.
-
diff --git a/Documentation/i2c/busses/i2c-via.rst b/Documentation/i2c/busses/i2c-via.rst
new file mode 100644
index 000000000000..846aa17d80a2
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-via.rst
@@ -0,0 +1,40 @@
+=====================
+Kernel driver i2c-via
+=====================
+
+Supported adapters:
+  * VIA Technologies, InC. VT82C586B
+    Datasheet: Publicly available at the VIA website
+
+Author: Kyösti Mälkki <kmalkki@cc.hut.fi>
+
+Description
+-----------
+
+i2c-via is an i2c bus driver for motherboards with VIA chipset.
+
+The following VIA pci chipsets are supported:
+ - MVP3, VP3, VP2/97, VPX/97
+ - others with South bridge VT82C586B
+
+Your ``lspci`` listing must show this ::
+
+ Bridge: VIA Technologies, Inc. VT82C586B ACPI (rev 10)
+
+Problems?
+---------
+
+ Q:
+    You have VT82C586B on the motherboard, but not in the listing.
+
+ A:
+    Go to your BIOS setup, section PCI devices or similar.
+    Turn USB support on, and try again.
+
+ Q:
+    No error messages, but still i2c doesn't seem to work.
+
+ A:
+    This can happen. This driver uses the pins VIA recommends in their
+    datasheets, but there are several ways the motherboard manufacturer
+    can actually wire the lines.
diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro
deleted file mode 100644
index ab64ce21c254..000000000000
--- a/Documentation/i2c/busses/i2c-viapro
+++ /dev/null
@@ -1,73 +0,0 @@
-Kernel driver i2c-viapro
-
-Supported adapters:
-  * VIA Technologies, Inc. VT82C596A/B
-    Datasheet: Sometimes available at the VIA website
-
-  * VIA Technologies, Inc. VT82C686A/B
-    Datasheet: Sometimes available at the VIA website
-
-  * VIA Technologies, Inc. VT8231, VT8233, VT8233A
-    Datasheet: available on request from VIA
-
-  * VIA Technologies, Inc. VT8235, VT8237R, VT8237A, VT8237S, VT8251
-    Datasheet: available on request and under NDA from VIA
-
-  * VIA Technologies, Inc. CX700
-    Datasheet: available on request and under NDA from VIA
-
-  * VIA Technologies, Inc. VX800/VX820
-    Datasheet: available on http://linux.via.com.tw
-
-  * VIA Technologies, Inc. VX855/VX875
-    Datasheet: available on http://linux.via.com.tw
-
-  * VIA Technologies, Inc. VX900
-    Datasheet: available on http://linux.via.com.tw
-
-Authors:
-	Kyösti Mälkki <kmalkki@cc.hut.fi>,
-	Mark D. Studebaker <mdsxyz123@yahoo.com>,
-	Jean Delvare <jdelvare@suse.de>
-
-Module Parameters
------------------
-
-* force: int
-  Forcibly enable the SMBus controller. DANGEROUS!
-* force_addr: int
-  Forcibly enable the SMBus at the given address. EXTREMELY DANGEROUS!
-
-Description
------------
-
-i2c-viapro is a true SMBus host driver for motherboards with one of the
-supported VIA south bridges.
-
-Your lspci -n listing must show one of these :
-
- device 1106:3050   (VT82C596A function 3)
- device 1106:3051   (VT82C596B function 3)
- device 1106:3057   (VT82C686 function 4)
- device 1106:3074   (VT8233)
- device 1106:3147   (VT8233A)
- device 1106:8235   (VT8231 function 4)
- device 1106:3177   (VT8235)
- device 1106:3227   (VT8237R)
- device 1106:3337   (VT8237A)
- device 1106:3372   (VT8237S)
- device 1106:3287   (VT8251)
- device 1106:8324   (CX700)
- device 1106:8353   (VX800/VX820)
- device 1106:8409   (VX855/VX875)
- device 1106:8410   (VX900)
-
-If none of these show up, you should look in the BIOS for settings like
-enable ACPI / SMBus or even USB.
-
-Except for the oldest chips (VT82C596A/B, VT82C686A and most probably
-VT8231), this driver supports I2C block transactions. Such transactions
-are mainly useful to read from and write to EEPROMs.
-
-The CX700/VX800/VX820 additionally appears to support SMBus PEC, although
-this driver doesn't implement it yet.
diff --git a/Documentation/i2c/busses/i2c-viapro.rst b/Documentation/i2c/busses/i2c-viapro.rst
new file mode 100644
index 000000000000..1762f0cf93d0
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-viapro.rst
@@ -0,0 +1,77 @@
+========================
+Kernel driver i2c-viapro
+========================
+
+Supported adapters:
+  * VIA Technologies, Inc. VT82C596A/B
+    Datasheet: Sometimes available at the VIA website
+
+  * VIA Technologies, Inc. VT82C686A/B
+    Datasheet: Sometimes available at the VIA website
+
+  * VIA Technologies, Inc. VT8231, VT8233, VT8233A
+    Datasheet: available on request from VIA
+
+  * VIA Technologies, Inc. VT8235, VT8237R, VT8237A, VT8237S, VT8251
+    Datasheet: available on request and under NDA from VIA
+
+  * VIA Technologies, Inc. CX700
+    Datasheet: available on request and under NDA from VIA
+
+  * VIA Technologies, Inc. VX800/VX820
+    Datasheet: available on http://linux.via.com.tw
+
+  * VIA Technologies, Inc. VX855/VX875
+    Datasheet: available on http://linux.via.com.tw
+
+  * VIA Technologies, Inc. VX900
+    Datasheet: available on http://linux.via.com.tw
+
+Authors:
+	- Kyösti Mälkki <kmalkki@cc.hut.fi>,
+	- Mark D. Studebaker <mdsxyz123@yahoo.com>,
+	- Jean Delvare <jdelvare@suse.de>
+
+Module Parameters
+-----------------
+
+* force: int
+  Forcibly enable the SMBus controller. DANGEROUS!
+* force_addr: int
+  Forcibly enable the SMBus at the given address. EXTREMELY DANGEROUS!
+
+Description
+-----------
+
+i2c-viapro is a true SMBus host driver for motherboards with one of the
+supported VIA south bridges.
+
+Your ``lspci -n`` listing must show one of these :
+
+ ================   ======================
+ device 1106:3050   (VT82C596A function 3)
+ device 1106:3051   (VT82C596B function 3)
+ device 1106:3057   (VT82C686 function 4)
+ device 1106:3074   (VT8233)
+ device 1106:3147   (VT8233A)
+ device 1106:8235   (VT8231 function 4)
+ device 1106:3177   (VT8235)
+ device 1106:3227   (VT8237R)
+ device 1106:3337   (VT8237A)
+ device 1106:3372   (VT8237S)
+ device 1106:3287   (VT8251)
+ device 1106:8324   (CX700)
+ device 1106:8353   (VX800/VX820)
+ device 1106:8409   (VX855/VX875)
+ device 1106:8410   (VX900)
+ ================   ======================
+
+If none of these show up, you should look in the BIOS for settings like
+enable ACPI / SMBus or even USB.
+
+Except for the oldest chips (VT82C596A/B, VT82C686A and most probably
+VT8231), this driver supports I2C block transactions. Such transactions
+are mainly useful to read from and write to EEPROMs.
+
+The CX700/VX800/VX820 additionally appears to support SMBus PEC, although
+this driver doesn't implement it yet.
diff --git a/Documentation/i2c/busses/index.rst b/Documentation/i2c/busses/index.rst
new file mode 100644
index 000000000000..97ca4d510816
--- /dev/null
+++ b/Documentation/i2c/busses/index.rst
@@ -0,0 +1,33 @@
+. SPDX-License-Identifier: GPL-2.0
+
+===============
+I2C Bus Drivers
+===============
+
+.. toctree::
+   :maxdepth: 1
+
+   i2c-ali1535
+   i2c-ali1563
+   i2c-ali15x3
+   i2c-amd756
+   i2c-amd8111
+   i2c-amd-mp2
+   i2c-diolan-u2c
+   i2c-i801
+   i2c-ismt
+   i2c-mlxcpld
+   i2c-nforce2
+   i2c-nvidia-gpu
+   i2c-ocores
+   i2c-parport-light
+   i2c-parport
+   i2c-pca-isa
+   i2c-piix4
+   i2c-sis5595
+   i2c-sis630
+   i2c-sis96x
+   i2c-taos-evm
+   i2c-viapro
+   i2c-via
+   scx200_acb
diff --git a/Documentation/i2c/busses/scx200_acb b/Documentation/i2c/busses/scx200_acb
deleted file mode 100644
index ce83c871fe95..000000000000
--- a/Documentation/i2c/busses/scx200_acb
+++ /dev/null
@@ -1,32 +0,0 @@
-Kernel driver scx200_acb
-
-Author: Christer Weinigel <wingel@nano-system.com>
-
-The driver supersedes the older, never merged driver named i2c-nscacb.
-
-Module Parameters
------------------
-
-* base: up to 4 ints
-  Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices
-
-  By default the driver uses two base addresses 0x820 and 0x840.
-  If you want only one base address, specify the second as 0 so as to
-  override this default.
-
-Description
------------
-
-Enable the use of the ACCESS.bus controller on the Geode SCx200 and
-SC1100 processors and the CS5535 and CS5536 Geode companion devices.
-
-Device-specific notes
----------------------
-
-The SC1100 WRAP boards are known to use base addresses 0x810 and 0x820.
-If the scx200_acb driver is built into the kernel, add the following
-parameter to your boot command line:
-  scx200_acb.base=0x810,0x820
-If the scx200_acb driver is built as a module, add the following line to
-a configuration file in /etc/modprobe.d/ instead:
-  options scx200_acb base=0x810,0x820
diff --git a/Documentation/i2c/busses/scx200_acb.rst b/Documentation/i2c/busses/scx200_acb.rst
new file mode 100644
index 000000000000..8dc7c352508c
--- /dev/null
+++ b/Documentation/i2c/busses/scx200_acb.rst
@@ -0,0 +1,37 @@
+========================
+Kernel driver scx200_acb
+========================
+
+Author: Christer Weinigel <wingel@nano-system.com>
+
+The driver supersedes the older, never merged driver named i2c-nscacb.
+
+Module Parameters
+-----------------
+
+* base: up to 4 ints
+  Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices
+
+  By default the driver uses two base addresses 0x820 and 0x840.
+  If you want only one base address, specify the second as 0 so as to
+  override this default.
+
+Description
+-----------
+
+Enable the use of the ACCESS.bus controller on the Geode SCx200 and
+SC1100 processors and the CS5535 and CS5536 Geode companion devices.
+
+Device-specific notes
+---------------------
+
+The SC1100 WRAP boards are known to use base addresses 0x810 and 0x820.
+If the scx200_acb driver is built into the kernel, add the following
+parameter to your boot command line::
+
+  scx200_acb.base=0x810,0x820
+
+If the scx200_acb driver is built as a module, add the following line to
+a configuration file in /etc/modprobe.d/ instead::
+
+  options scx200_acb base=0x810,0x820
diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface
deleted file mode 100644
index fbed645ccd75..000000000000
--- a/Documentation/i2c/dev-interface
+++ /dev/null
@@ -1,213 +0,0 @@
-Usually, i2c devices are controlled by a kernel driver. But it is also
-possible to access all devices on an adapter from userspace, through
-the /dev interface. You need to load module i2c-dev for this.
-
-Each registered i2c adapter gets a number, counting from 0. You can
-examine /sys/class/i2c-dev/ to see what number corresponds to which adapter.
-Alternatively, you can run "i2cdetect -l" to obtain a formatted list of all
-i2c adapters present on your system at a given time. i2cdetect is part of
-the i2c-tools package.
-
-I2C device files are character device files with major device number 89
-and a minor device number corresponding to the number assigned as
-explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ...,
-i2c-10, ...). All 256 minor device numbers are reserved for i2c.
-
-
-C example
-=========
-
-So let's say you want to access an i2c adapter from a C program.
-First, you need to include these two headers:
-
-  #include <linux/i2c-dev.h>
-  #include <i2c/smbus.h>
-
-Now, you have to decide which adapter you want to access. You should
-inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this.
-Adapter numbers are assigned somewhat dynamically, so you can not
-assume much about them. They can even change from one boot to the next.
-
-Next thing, open the device file, as follows:
-
-  int file;
-  int adapter_nr = 2; /* probably dynamically determined */
-  char filename[20];
-
-  snprintf(filename, 19, "/dev/i2c-%d", adapter_nr);
-  file = open(filename, O_RDWR);
-  if (file < 0) {
-    /* ERROR HANDLING; you can check errno to see what went wrong */
-    exit(1);
-  }
-
-When you have opened the device, you must specify with what device
-address you want to communicate:
-
-  int addr = 0x40; /* The I2C address */
-
-  if (ioctl(file, I2C_SLAVE, addr) < 0) {
-    /* ERROR HANDLING; you can check errno to see what went wrong */
-    exit(1);
-  }
-
-Well, you are all set up now. You can now use SMBus commands or plain
-I2C to communicate with your device. SMBus commands are preferred if
-the device supports them. Both are illustrated below.
-
-  __u8 reg = 0x10; /* Device register to access */
-  __s32 res;
-  char buf[10];
-
-  /* Using SMBus commands */
-  res = i2c_smbus_read_word_data(file, reg);
-  if (res < 0) {
-    /* ERROR HANDLING: i2c transaction failed */
-  } else {
-    /* res contains the read word */
-  }
-
-  /*
-   * Using I2C Write, equivalent of
-   * i2c_smbus_write_word_data(file, reg, 0x6543)
-   */
-  buf[0] = reg;
-  buf[1] = 0x43;
-  buf[2] = 0x65;
-  if (write(file, buf, 3) != 3) {
-    /* ERROR HANDLING: i2c transaction failed */
-  }
-
-  /* Using I2C Read, equivalent of i2c_smbus_read_byte(file) */
-  if (read(file, buf, 1) != 1) {
-    /* ERROR HANDLING: i2c transaction failed */
-  } else {
-    /* buf[0] contains the read byte */
-  }
-
-Note that only a subset of the I2C and SMBus protocols can be achieved by
-the means of read() and write() calls. In particular, so-called combined
-transactions (mixing read and write messages in the same transaction)
-aren't supported. For this reason, this interface is almost never used by
-user-space programs.
-
-IMPORTANT: because of the use of inline functions, you *have* to use
-'-O' or some variation when you compile your program!
-
-
-Full interface description
-==========================
-
-The following IOCTLs are defined:
-
-ioctl(file, I2C_SLAVE, long addr)
-  Change slave address. The address is passed in the 7 lower bits of the
-  argument (except for 10 bit addresses, passed in the 10 lower bits in this
-  case).
-
-ioctl(file, I2C_TENBIT, long select)
-  Selects ten bit addresses if select not equals 0, selects normal 7 bit
-  addresses if select equals 0. Default 0.  This request is only valid
-  if the adapter has I2C_FUNC_10BIT_ADDR.
-
-ioctl(file, I2C_PEC, long select)
-  Selects SMBus PEC (packet error checking) generation and verification
-  if select not equals 0, disables if select equals 0. Default 0.
-  Used only for SMBus transactions.  This request only has an effect if the
-  the adapter has I2C_FUNC_SMBUS_PEC; it is still safe if not, it just
-  doesn't have any effect.
-
-ioctl(file, I2C_FUNCS, unsigned long *funcs)
-  Gets the adapter functionality and puts it in *funcs.
-
-ioctl(file, I2C_RDWR, struct i2c_rdwr_ioctl_data *msgset)
-  Do combined read/write transaction without stop in between.
-  Only valid if the adapter has I2C_FUNC_I2C.  The argument is
-  a pointer to a
-
-  struct i2c_rdwr_ioctl_data {
-      struct i2c_msg *msgs;  /* ptr to array of simple messages */
-      int nmsgs;             /* number of messages to exchange */
-  }
-
-  The msgs[] themselves contain further pointers into data buffers.
-  The function will write or read data to or from that buffers depending
-  on whether the I2C_M_RD flag is set in a particular message or not.
-  The slave address and whether to use ten bit address mode has to be
-  set in each message, overriding the values set with the above ioctl's.
-
-ioctl(file, I2C_SMBUS, struct i2c_smbus_ioctl_data *args)
-  If possible, use the provided i2c_smbus_* methods described below instead
-  of issuing direct ioctls.
-
-You can do plain i2c transactions by using read(2) and write(2) calls.
-You do not need to pass the address byte; instead, set it through
-ioctl I2C_SLAVE before you try to access the device.
-
-You can do SMBus level transactions (see documentation file smbus-protocol
-for details) through the following functions:
-  __s32 i2c_smbus_write_quick(int file, __u8 value);
-  __s32 i2c_smbus_read_byte(int file);
-  __s32 i2c_smbus_write_byte(int file, __u8 value);
-  __s32 i2c_smbus_read_byte_data(int file, __u8 command);
-  __s32 i2c_smbus_write_byte_data(int file, __u8 command, __u8 value);
-  __s32 i2c_smbus_read_word_data(int file, __u8 command);
-  __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value);
-  __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value);
-  __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values);
-  __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length,
-                                   __u8 *values);
-All these transactions return -1 on failure; you can read errno to see
-what happened. The 'write' transactions return 0 on success; the
-'read' transactions return the read value, except for read_block, which
-returns the number of values read. The block buffers need not be longer
-than 32 bytes.
-
-The above functions are made available by linking against the libi2c library,
-which is provided by the i2c-tools project.  See:
-https://git.kernel.org/pub/scm/utils/i2c-tools/i2c-tools.git/.
-
-
-Implementation details
-======================
-
-For the interested, here's the code flow which happens inside the kernel
-when you use the /dev interface to I2C:
-
-1* Your program opens /dev/i2c-N and calls ioctl() on it, as described in
-section "C example" above.
-
-2* These open() and ioctl() calls are handled by the i2c-dev kernel
-driver: see i2c-dev.c:i2cdev_open() and i2c-dev.c:i2cdev_ioctl(),
-respectively. You can think of i2c-dev as a generic I2C chip driver
-that can be programmed from user-space.
-
-3* Some ioctl() calls are for administrative tasks and are handled by
-i2c-dev directly. Examples include I2C_SLAVE (set the address of the
-device you want to access) and I2C_PEC (enable or disable SMBus error
-checking on future transactions.)
-
-4* Other ioctl() calls are converted to in-kernel function calls by
-i2c-dev. Examples include I2C_FUNCS, which queries the I2C adapter
-functionality using i2c.h:i2c_get_functionality(), and I2C_SMBUS, which
-performs an SMBus transaction using i2c-core-smbus.c:i2c_smbus_xfer().
-
-The i2c-dev driver is responsible for checking all the parameters that
-come from user-space for validity. After this point, there is no
-difference between these calls that came from user-space through i2c-dev
-and calls that would have been performed by kernel I2C chip drivers
-directly. This means that I2C bus drivers don't need to implement
-anything special to support access from user-space.
-
-5* These i2c.h functions are wrappers to the actual implementation of
-your I2C bus driver. Each adapter must declare callback functions
-implementing these standard calls. i2c.h:i2c_get_functionality() calls
-i2c_adapter.algo->functionality(), while
-i2c-core-smbus.c:i2c_smbus_xfer() calls either
-adapter.algo->smbus_xfer() if it is implemented, or if not,
-i2c-core-smbus.c:i2c_smbus_xfer_emulated() which in turn calls
-i2c_adapter.algo->master_xfer().
-
-After your I2C bus driver has processed these requests, execution runs
-up the call chain, with almost no processing done, except by i2c-dev to
-package the returned data, if any, in suitable format for the ioctl.
diff --git a/Documentation/i2c/dev-interface.rst b/Documentation/i2c/dev-interface.rst
new file mode 100644
index 000000000000..69c23a3c2b1b
--- /dev/null
+++ b/Documentation/i2c/dev-interface.rst
@@ -0,0 +1,219 @@
+====================
+I2C Device Interface
+====================
+
+Usually, i2c devices are controlled by a kernel driver. But it is also
+possible to access all devices on an adapter from userspace, through
+the /dev interface. You need to load module i2c-dev for this.
+
+Each registered i2c adapter gets a number, counting from 0. You can
+examine /sys/class/i2c-dev/ to see what number corresponds to which adapter.
+Alternatively, you can run "i2cdetect -l" to obtain a formatted list of all
+i2c adapters present on your system at a given time. i2cdetect is part of
+the i2c-tools package.
+
+I2C device files are character device files with major device number 89
+and a minor device number corresponding to the number assigned as
+explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ...,
+i2c-10, ...). All 256 minor device numbers are reserved for i2c.
+
+
+C example
+=========
+
+So let's say you want to access an i2c adapter from a C program.
+First, you need to include these two headers::
+
+  #include <linux/i2c-dev.h>
+  #include <i2c/smbus.h>
+
+Now, you have to decide which adapter you want to access. You should
+inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this.
+Adapter numbers are assigned somewhat dynamically, so you can not
+assume much about them. They can even change from one boot to the next.
+
+Next thing, open the device file, as follows::
+
+  int file;
+  int adapter_nr = 2; /* probably dynamically determined */
+  char filename[20];
+
+  snprintf(filename, 19, "/dev/i2c-%d", adapter_nr);
+  file = open(filename, O_RDWR);
+  if (file < 0) {
+    /* ERROR HANDLING; you can check errno to see what went wrong */
+    exit(1);
+  }
+
+When you have opened the device, you must specify with what device
+address you want to communicate::
+
+  int addr = 0x40; /* The I2C address */
+
+  if (ioctl(file, I2C_SLAVE, addr) < 0) {
+    /* ERROR HANDLING; you can check errno to see what went wrong */
+    exit(1);
+  }
+
+Well, you are all set up now. You can now use SMBus commands or plain
+I2C to communicate with your device. SMBus commands are preferred if
+the device supports them. Both are illustrated below::
+
+  __u8 reg = 0x10; /* Device register to access */
+  __s32 res;
+  char buf[10];
+
+  /* Using SMBus commands */
+  res = i2c_smbus_read_word_data(file, reg);
+  if (res < 0) {
+    /* ERROR HANDLING: i2c transaction failed */
+  } else {
+    /* res contains the read word */
+  }
+
+  /*
+   * Using I2C Write, equivalent of
+   * i2c_smbus_write_word_data(file, reg, 0x6543)
+   */
+  buf[0] = reg;
+  buf[1] = 0x43;
+  buf[2] = 0x65;
+  if (write(file, buf, 3) != 3) {
+    /* ERROR HANDLING: i2c transaction failed */
+  }
+
+  /* Using I2C Read, equivalent of i2c_smbus_read_byte(file) */
+  if (read(file, buf, 1) != 1) {
+    /* ERROR HANDLING: i2c transaction failed */
+  } else {
+    /* buf[0] contains the read byte */
+  }
+
+Note that only a subset of the I2C and SMBus protocols can be achieved by
+the means of read() and write() calls. In particular, so-called combined
+transactions (mixing read and write messages in the same transaction)
+aren't supported. For this reason, this interface is almost never used by
+user-space programs.
+
+IMPORTANT: because of the use of inline functions, you *have* to use
+'-O' or some variation when you compile your program!
+
+
+Full interface description
+==========================
+
+The following IOCTLs are defined:
+
+``ioctl(file, I2C_SLAVE, long addr)``
+  Change slave address. The address is passed in the 7 lower bits of the
+  argument (except for 10 bit addresses, passed in the 10 lower bits in this
+  case).
+
+``ioctl(file, I2C_TENBIT, long select)``
+  Selects ten bit addresses if select not equals 0, selects normal 7 bit
+  addresses if select equals 0. Default 0.  This request is only valid
+  if the adapter has I2C_FUNC_10BIT_ADDR.
+
+``ioctl(file, I2C_PEC, long select)``
+  Selects SMBus PEC (packet error checking) generation and verification
+  if select not equals 0, disables if select equals 0. Default 0.
+  Used only for SMBus transactions.  This request only has an effect if the
+  the adapter has I2C_FUNC_SMBUS_PEC; it is still safe if not, it just
+  doesn't have any effect.
+
+``ioctl(file, I2C_FUNCS, unsigned long *funcs)``
+  Gets the adapter functionality and puts it in ``*funcs``.
+
+``ioctl(file, I2C_RDWR, struct i2c_rdwr_ioctl_data *msgset)``
+  Do combined read/write transaction without stop in between.
+  Only valid if the adapter has I2C_FUNC_I2C.  The argument is
+  a pointer to a::
+
+    struct i2c_rdwr_ioctl_data {
+      struct i2c_msg *msgs;  /* ptr to array of simple messages */
+      int nmsgs;             /* number of messages to exchange */
+    }
+
+  The msgs[] themselves contain further pointers into data buffers.
+  The function will write or read data to or from that buffers depending
+  on whether the I2C_M_RD flag is set in a particular message or not.
+  The slave address and whether to use ten bit address mode has to be
+  set in each message, overriding the values set with the above ioctl's.
+
+``ioctl(file, I2C_SMBUS, struct i2c_smbus_ioctl_data *args)``
+  If possible, use the provided ``i2c_smbus_*`` methods described below instead
+  of issuing direct ioctls.
+
+You can do plain i2c transactions by using read(2) and write(2) calls.
+You do not need to pass the address byte; instead, set it through
+ioctl I2C_SLAVE before you try to access the device.
+
+You can do SMBus level transactions (see documentation file smbus-protocol
+for details) through the following functions::
+
+  __s32 i2c_smbus_write_quick(int file, __u8 value);
+  __s32 i2c_smbus_read_byte(int file);
+  __s32 i2c_smbus_write_byte(int file, __u8 value);
+  __s32 i2c_smbus_read_byte_data(int file, __u8 command);
+  __s32 i2c_smbus_write_byte_data(int file, __u8 command, __u8 value);
+  __s32 i2c_smbus_read_word_data(int file, __u8 command);
+  __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value);
+  __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value);
+  __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values);
+  __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length,
+                                   __u8 *values);
+
+All these transactions return -1 on failure; you can read errno to see
+what happened. The 'write' transactions return 0 on success; the
+'read' transactions return the read value, except for read_block, which
+returns the number of values read. The block buffers need not be longer
+than 32 bytes.
+
+The above functions are made available by linking against the libi2c library,
+which is provided by the i2c-tools project.  See:
+https://git.kernel.org/pub/scm/utils/i2c-tools/i2c-tools.git/.
+
+
+Implementation details
+======================
+
+For the interested, here's the code flow which happens inside the kernel
+when you use the /dev interface to I2C:
+
+1) Your program opens /dev/i2c-N and calls ioctl() on it, as described in
+   section "C example" above.
+
+2) These open() and ioctl() calls are handled by the i2c-dev kernel
+   driver: see i2c-dev.c:i2cdev_open() and i2c-dev.c:i2cdev_ioctl(),
+   respectively. You can think of i2c-dev as a generic I2C chip driver
+   that can be programmed from user-space.
+
+3) Some ioctl() calls are for administrative tasks and are handled by
+   i2c-dev directly. Examples include I2C_SLAVE (set the address of the
+   device you want to access) and I2C_PEC (enable or disable SMBus error
+   checking on future transactions.)
+
+4) Other ioctl() calls are converted to in-kernel function calls by
+   i2c-dev. Examples include I2C_FUNCS, which queries the I2C adapter
+   functionality using i2c.h:i2c_get_functionality(), and I2C_SMBUS, which
+   performs an SMBus transaction using i2c-core-smbus.c:i2c_smbus_xfer().
+
+   The i2c-dev driver is responsible for checking all the parameters that
+   come from user-space for validity. After this point, there is no
+   difference between these calls that came from user-space through i2c-dev
+   and calls that would have been performed by kernel I2C chip drivers
+   directly. This means that I2C bus drivers don't need to implement
+   anything special to support access from user-space.
+
+5) These i2c.h functions are wrappers to the actual implementation of
+   your I2C bus driver. Each adapter must declare callback functions
+   implementing these standard calls. i2c.h:i2c_get_functionality() calls
+   i2c_adapter.algo->functionality(), while
+   i2c-core-smbus.c:i2c_smbus_xfer() calls either
+   adapter.algo->smbus_xfer() if it is implemented, or if not,
+   i2c-core-smbus.c:i2c_smbus_xfer_emulated() which in turn calls
+   i2c_adapter.algo->master_xfer().
+
+After your I2C bus driver has processed these requests, execution runs
+up the call chain, with almost no processing done, except by i2c-dev to
+package the returned data, if any, in suitable format for the ioctl.
diff --git a/Documentation/i2c/dma-considerations.rst b/Documentation/i2c/dma-considerations.rst
new file mode 100644
index 000000000000..203002054120
--- /dev/null
+++ b/Documentation/i2c/dma-considerations.rst
@@ -0,0 +1,71 @@
+=================
+Linux I2C and DMA
+=================
+
+Given that i2c is a low-speed bus, over which the majority of messages
+transferred are small, it is not considered a prime user of DMA access. At this
+time of writing, only 10% of I2C bus master drivers have DMA support
+implemented. And the vast majority of transactions are so small that setting up
+DMA for it will likely add more overhead than a plain PIO transfer.
+
+Therefore, it is *not* mandatory that the buffer of an I2C message is DMA safe.
+It does not seem reasonable to apply additional burdens when the feature is so
+rarely used. However, it is recommended to use a DMA-safe buffer if your
+message size is likely applicable for DMA. Most drivers have this threshold
+around 8 bytes (as of today, this is mostly an educated guess, however). For
+any message of 16 byte or larger, it is probably a really good idea. Please
+note that other subsystems you use might add requirements. E.g., if your
+I2C bus master driver is using USB as a bridge, then you need to have DMA
+safe buffers always, because USB requires it.
+
+Clients
+-------
+
+For clients, if you use a DMA safe buffer in i2c_msg, set the I2C_M_DMA_SAFE
+flag with it. Then, the I2C core and drivers know they can safely operate DMA
+on it. Note that using this flag is optional. I2C host drivers which are not
+updated to use this flag will work like before. And like before, they risk
+using an unsafe DMA buffer. To improve this situation, using I2C_M_DMA_SAFE in
+more and more clients and host drivers is the planned way forward. Note also
+that setting this flag makes only sense in kernel space. User space data is
+copied into kernel space anyhow. The I2C core makes sure the destination
+buffers in kernel space are always DMA capable. Also, when the core emulates
+SMBus transactions via I2C, the buffers for block transfers are DMA safe. Users
+of i2c_master_send() and i2c_master_recv() functions can now use DMA safe
+variants (i2c_master_send_dmasafe() and i2c_master_recv_dmasafe()) once they
+know their buffers are DMA safe. Users of i2c_transfer() must set the
+I2C_M_DMA_SAFE flag manually.
+
+Masters
+-------
+
+Bus master drivers wishing to implement safe DMA can use helper functions from
+the I2C core. One gives you a DMA-safe buffer for a given i2c_msg as long as a
+certain threshold is met::
+
+	dma_buf = i2c_get_dma_safe_msg_buf(msg, threshold_in_byte);
+
+If a buffer is returned, it is either msg->buf for the I2C_M_DMA_SAFE case or a
+bounce buffer. But you don't need to care about that detail, just use the
+returned buffer. If NULL is returned, the threshold was not met or a bounce
+buffer could not be allocated. Fall back to PIO in that case.
+
+In any case, a buffer obtained from above needs to be released. Another helper
+function ensures a potentially used bounce buffer is freed::
+
+	i2c_put_dma_safe_msg_buf(dma_buf, msg, xferred);
+
+The last argument 'xferred' controls if the buffer is synced back to the
+message or not. No syncing is needed in cases setting up DMA had an error and
+there was no data transferred.
+
+The bounce buffer handling from the core is generic and simple. It will always
+allocate a new bounce buffer. If you want a more sophisticated handling (e.g.
+reusing pre-allocated buffers), you are free to implement your own.
+
+Please also check the in-kernel documentation for details. The i2c-sh_mobile
+driver can be used as a reference example how to use the above helpers.
+
+Final note: If you plan to use DMA with I2C (or with anything else, actually)
+make sure you have CONFIG_DMA_API_DEBUG enabled during development. It can help
+you find various issues which can be complex to debug otherwise.
diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes
deleted file mode 100644
index 0cee0fc545b4..000000000000
--- a/Documentation/i2c/fault-codes
+++ /dev/null
@@ -1,128 +0,0 @@
-This is a summary of the most important conventions for use of fault
-codes in the I2C/SMBus stack.
-
-
-A "Fault" is not always an "Error"
-----------------------------------
-Not all fault reports imply errors; "page faults" should be a familiar
-example.  Software often retries idempotent operations after transient
-faults.  There may be fancier recovery schemes that are appropriate in
-some cases, such as re-initializing (and maybe resetting).  After such
-recovery, triggered by a fault report, there is no error.
-
-In a similar way, sometimes a "fault" code just reports one defined
-result for an operation ... it doesn't indicate that anything is wrong
-at all, just that the outcome wasn't on the "golden path".
-
-In short, your I2C driver code may need to know these codes in order
-to respond correctly.  Other code may need to rely on YOUR code reporting
-the right fault code, so that it can (in turn) behave correctly.
-
-
-I2C and SMBus fault codes
--------------------------
-These are returned as negative numbers from most calls, with zero or
-some positive number indicating a non-fault return.  The specific
-numbers associated with these symbols differ between architectures,
-though most Linux systems use <asm-generic/errno*.h> numbering.
-
-Note that the descriptions here are not exhaustive.  There are other
-codes that may be returned, and other cases where these codes should
-be returned.  However, drivers should not return other codes for these
-cases (unless the hardware doesn't provide unique fault reports).
-
-Also, codes returned by adapter probe methods follow rules which are
-specific to their host bus (such as PCI, or the platform bus).
-
-
-EAGAIN
-	Returned by I2C adapters when they lose arbitration in master
-	transmit mode:  some other master was transmitting different
-	data at the same time.
-
-	Also returned when trying to invoke an I2C operation in an
-	atomic context, when some task is already using that I2C bus
-	to execute some other operation.
-
-EBADMSG
-	Returned by SMBus logic when an invalid Packet Error Code byte
-	is received.  This code is a CRC covering all bytes in the
-	transaction, and is sent before the terminating STOP.  This
-	fault is only reported on read transactions; the SMBus slave
-	may have a way to report PEC mismatches on writes from the
-	host.  Note that even if PECs are in use, you should not rely
-	on these as the only way to detect incorrect data transfers.
-
-EBUSY
-	Returned by SMBus adapters when the bus was busy for longer
-	than allowed.  This usually indicates some device (maybe the
-	SMBus adapter) needs some fault recovery (such as resetting),
-	or that the reset was attempted but failed.
-
-EINVAL
-	This rather vague error means an invalid parameter has been
-	detected before any I/O operation was started.  Use a more
-	specific fault code when you can.
-
-EIO
-	This rather vague error means something went wrong when
-	performing an I/O operation.  Use a more specific fault
-	code when you can.
-
-ENODEV
-	Returned by driver probe() methods.  This is a bit more
-	specific than ENXIO, implying the problem isn't with the
-	address, but with the device found there.  Driver probes
-	may verify the device returns *correct* responses, and
-	return this as appropriate.  (The driver core will warn
-	about probe faults other than ENXIO and ENODEV.)
-
-ENOMEM
-	Returned by any component that can't allocate memory when
-	it needs to do so.
-
-ENXIO
-	Returned by I2C adapters to indicate that the address phase
-	of a transfer didn't get an ACK.  While it might just mean
-	an I2C device was temporarily not responding, usually it
-	means there's nothing listening at that address.
-
-	Returned by driver probe() methods to indicate that they
-	found no device to bind to.  (ENODEV may also be used.)
-
-EOPNOTSUPP
-	Returned by an adapter when asked to perform an operation
-	that it doesn't, or can't, support.
-
-	For example, this would be returned when an adapter that
-	doesn't support SMBus block transfers is asked to execute
-	one.  In that case, the driver making that request should
-	have verified that functionality was supported before it
-	made that block transfer request.
-
-	Similarly, if an I2C adapter can't execute all legal I2C
-	messages, it should return this when asked to perform a
-	transaction it can't.  (These limitations can't be seen in
-	the adapter's functionality mask, since the assumption is
-	that if an adapter supports I2C it supports all of I2C.)
-
-EPROTO
-	Returned when slave does not conform to the relevant I2C
-	or SMBus (or chip-specific) protocol specifications.  One
-	case is when the length of an SMBus block data response
-	(from the SMBus slave) is outside the range 1-32 bytes.
-
-ESHUTDOWN
-	Returned when a transfer was requested using an adapter
-	which is already suspended.
-
-ETIMEDOUT
-	This is returned by drivers when an operation took too much
-	time, and was aborted before it completed.
-
-	SMBus adapters may return it when an operation took more
-	time than allowed by the SMBus specification; for example,
-	when a slave stretches clocks too far.  I2C has no such
-	timeouts, but it's normal for I2C adapters to impose some
-	arbitrary limits (much longer than SMBus!) too.
-
diff --git a/Documentation/i2c/fault-codes.rst b/Documentation/i2c/fault-codes.rst
new file mode 100644
index 000000000000..80b14e718b52
--- /dev/null
+++ b/Documentation/i2c/fault-codes.rst
@@ -0,0 +1,131 @@
+=====================
+I2C/SMBUS Fault Codes
+=====================
+
+This is a summary of the most important conventions for use of fault
+codes in the I2C/SMBus stack.
+
+
+A "Fault" is not always an "Error"
+----------------------------------
+Not all fault reports imply errors; "page faults" should be a familiar
+example.  Software often retries idempotent operations after transient
+faults.  There may be fancier recovery schemes that are appropriate in
+some cases, such as re-initializing (and maybe resetting).  After such
+recovery, triggered by a fault report, there is no error.
+
+In a similar way, sometimes a "fault" code just reports one defined
+result for an operation ... it doesn't indicate that anything is wrong
+at all, just that the outcome wasn't on the "golden path".
+
+In short, your I2C driver code may need to know these codes in order
+to respond correctly.  Other code may need to rely on YOUR code reporting
+the right fault code, so that it can (in turn) behave correctly.
+
+
+I2C and SMBus fault codes
+-------------------------
+These are returned as negative numbers from most calls, with zero or
+some positive number indicating a non-fault return.  The specific
+numbers associated with these symbols differ between architectures,
+though most Linux systems use <asm-generic/errno*.h> numbering.
+
+Note that the descriptions here are not exhaustive.  There are other
+codes that may be returned, and other cases where these codes should
+be returned.  However, drivers should not return other codes for these
+cases (unless the hardware doesn't provide unique fault reports).
+
+Also, codes returned by adapter probe methods follow rules which are
+specific to their host bus (such as PCI, or the platform bus).
+
+
+EAGAIN
+	Returned by I2C adapters when they lose arbitration in master
+	transmit mode:  some other master was transmitting different
+	data at the same time.
+
+	Also returned when trying to invoke an I2C operation in an
+	atomic context, when some task is already using that I2C bus
+	to execute some other operation.
+
+EBADMSG
+	Returned by SMBus logic when an invalid Packet Error Code byte
+	is received.  This code is a CRC covering all bytes in the
+	transaction, and is sent before the terminating STOP.  This
+	fault is only reported on read transactions; the SMBus slave
+	may have a way to report PEC mismatches on writes from the
+	host.  Note that even if PECs are in use, you should not rely
+	on these as the only way to detect incorrect data transfers.
+
+EBUSY
+	Returned by SMBus adapters when the bus was busy for longer
+	than allowed.  This usually indicates some device (maybe the
+	SMBus adapter) needs some fault recovery (such as resetting),
+	or that the reset was attempted but failed.
+
+EINVAL
+	This rather vague error means an invalid parameter has been
+	detected before any I/O operation was started.  Use a more
+	specific fault code when you can.
+
+EIO
+	This rather vague error means something went wrong when
+	performing an I/O operation.  Use a more specific fault
+	code when you can.
+
+ENODEV
+	Returned by driver probe() methods.  This is a bit more
+	specific than ENXIO, implying the problem isn't with the
+	address, but with the device found there.  Driver probes
+	may verify the device returns *correct* responses, and
+	return this as appropriate.  (The driver core will warn
+	about probe faults other than ENXIO and ENODEV.)
+
+ENOMEM
+	Returned by any component that can't allocate memory when
+	it needs to do so.
+
+ENXIO
+	Returned by I2C adapters to indicate that the address phase
+	of a transfer didn't get an ACK.  While it might just mean
+	an I2C device was temporarily not responding, usually it
+	means there's nothing listening at that address.
+
+	Returned by driver probe() methods to indicate that they
+	found no device to bind to.  (ENODEV may also be used.)
+
+EOPNOTSUPP
+	Returned by an adapter when asked to perform an operation
+	that it doesn't, or can't, support.
+
+	For example, this would be returned when an adapter that
+	doesn't support SMBus block transfers is asked to execute
+	one.  In that case, the driver making that request should
+	have verified that functionality was supported before it
+	made that block transfer request.
+
+	Similarly, if an I2C adapter can't execute all legal I2C
+	messages, it should return this when asked to perform a
+	transaction it can't.  (These limitations can't be seen in
+	the adapter's functionality mask, since the assumption is
+	that if an adapter supports I2C it supports all of I2C.)
+
+EPROTO
+	Returned when slave does not conform to the relevant I2C
+	or SMBus (or chip-specific) protocol specifications.  One
+	case is when the length of an SMBus block data response
+	(from the SMBus slave) is outside the range 1-32 bytes.
+
+ESHUTDOWN
+	Returned when a transfer was requested using an adapter
+	which is already suspended.
+
+ETIMEDOUT
+	This is returned by drivers when an operation took too much
+	time, and was aborted before it completed.
+
+	SMBus adapters may return it when an operation took more
+	time than allowed by the SMBus specification; for example,
+	when a slave stretches clocks too far.  I2C has no such
+	timeouts, but it's normal for I2C adapters to impose some
+	arbitrary limits (much longer than SMBus!) too.
diff --git a/Documentation/i2c/functionality b/Documentation/i2c/functionality
deleted file mode 100644
index 4aae8ed15873..000000000000
--- a/Documentation/i2c/functionality
+++ /dev/null
@@ -1,148 +0,0 @@
-INTRODUCTION
-------------
-
-Because not every I2C or SMBus adapter implements everything in the 
-I2C specifications, a client can not trust that everything it needs
-is implemented when it is given the option to attach to an adapter:
-the client needs some way to check whether an adapter has the needed
-functionality. 
-
-
-FUNCTIONALITY CONSTANTS
------------------------
-
-For the most up-to-date list of functionality constants, please check
-<uapi/linux/i2c.h>!
-
-  I2C_FUNC_I2C                    Plain i2c-level commands (Pure SMBus
-                                  adapters typically can not do these)
-  I2C_FUNC_10BIT_ADDR             Handles the 10-bit address extensions
-  I2C_FUNC_PROTOCOL_MANGLING      Knows about the I2C_M_IGNORE_NAK,
-                                  I2C_M_REV_DIR_ADDR and I2C_M_NO_RD_ACK
-                                  flags (which modify the I2C protocol!)
-  I2C_FUNC_NOSTART                Can skip repeated start sequence
-  I2C_FUNC_SMBUS_QUICK            Handles the SMBus write_quick command
-  I2C_FUNC_SMBUS_READ_BYTE        Handles the SMBus read_byte command
-  I2C_FUNC_SMBUS_WRITE_BYTE       Handles the SMBus write_byte command
-  I2C_FUNC_SMBUS_READ_BYTE_DATA   Handles the SMBus read_byte_data command
-  I2C_FUNC_SMBUS_WRITE_BYTE_DATA  Handles the SMBus write_byte_data command
-  I2C_FUNC_SMBUS_READ_WORD_DATA   Handles the SMBus read_word_data command
-  I2C_FUNC_SMBUS_WRITE_WORD_DATA  Handles the SMBus write_byte_data command
-  I2C_FUNC_SMBUS_PROC_CALL        Handles the SMBus process_call command
-  I2C_FUNC_SMBUS_READ_BLOCK_DATA  Handles the SMBus read_block_data command
-  I2C_FUNC_SMBUS_WRITE_BLOCK_DATA Handles the SMBus write_block_data command
-  I2C_FUNC_SMBUS_READ_I2C_BLOCK   Handles the SMBus read_i2c_block_data command
-  I2C_FUNC_SMBUS_WRITE_I2C_BLOCK  Handles the SMBus write_i2c_block_data command
-
-A few combinations of the above flags are also defined for your convenience:
-
-  I2C_FUNC_SMBUS_BYTE             Handles the SMBus read_byte
-                                  and write_byte commands
-  I2C_FUNC_SMBUS_BYTE_DATA        Handles the SMBus read_byte_data
-                                  and write_byte_data commands
-  I2C_FUNC_SMBUS_WORD_DATA        Handles the SMBus read_word_data
-                                  and write_word_data commands
-  I2C_FUNC_SMBUS_BLOCK_DATA       Handles the SMBus read_block_data
-                                  and write_block_data commands
-  I2C_FUNC_SMBUS_I2C_BLOCK        Handles the SMBus read_i2c_block_data
-                                  and write_i2c_block_data commands
-  I2C_FUNC_SMBUS_EMUL             Handles all SMBus commands that can be
-                                  emulated by a real I2C adapter (using
-                                  the transparent emulation layer)
-
-In kernel versions prior to 3.5 I2C_FUNC_NOSTART was implemented as
-part of I2C_FUNC_PROTOCOL_MANGLING.
-
-
-ADAPTER IMPLEMENTATION
-----------------------
-
-When you write a new adapter driver, you will have to implement a
-function callback `functionality'. Typical implementations are given
-below.
-
-A typical SMBus-only adapter would list all the SMBus transactions it
-supports. This example comes from the i2c-piix4 driver:
-
-  static u32 piix4_func(struct i2c_adapter *adapter)
-  {
-	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
-	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
-	       I2C_FUNC_SMBUS_BLOCK_DATA;
-  }
-
-A typical full-I2C adapter would use the following (from the i2c-pxa
-driver):
-
-  static u32 i2c_pxa_functionality(struct i2c_adapter *adap)
-  {
-	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
-  }
-
-I2C_FUNC_SMBUS_EMUL includes all the SMBus transactions (with the
-addition of I2C block transactions) which i2c-core can emulate using
-I2C_FUNC_I2C without any help from the adapter driver. The idea is
-to let the client drivers check for the support of SMBus functions
-without having to care whether the said functions are implemented in
-hardware by the adapter, or emulated in software by i2c-core on top
-of an I2C adapter.
-
-
-CLIENT CHECKING
----------------
-
-Before a client tries to attach to an adapter, or even do tests to check
-whether one of the devices it supports is present on an adapter, it should
-check whether the needed functionality is present. The typical way to do
-this is (from the lm75 driver):
-
-  static int lm75_detect(...)
-  {
-	(...)
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
-				     I2C_FUNC_SMBUS_WORD_DATA))
-		goto exit;
-	(...)
-  }
-
-Here, the lm75 driver checks if the adapter can do both SMBus byte data
-and SMBus word data transactions. If not, then the driver won't work on
-this adapter and there's no point in going on. If the check above is
-successful, then the driver knows that it can call the following
-functions: i2c_smbus_read_byte_data(), i2c_smbus_write_byte_data(),
-i2c_smbus_read_word_data() and i2c_smbus_write_word_data(). As a rule of
-thumb, the functionality constants you test for with
-i2c_check_functionality() should match exactly the i2c_smbus_* functions
-which you driver is calling.
-
-Note that the check above doesn't tell whether the functionalities are
-implemented in hardware by the underlying adapter or emulated in
-software by i2c-core. Client drivers don't have to care about this, as
-i2c-core will transparently implement SMBus transactions on top of I2C
-adapters.
-
-
-CHECKING THROUGH /DEV
----------------------
-
-If you try to access an adapter from a userspace program, you will have
-to use the /dev interface. You will still have to check whether the
-functionality you need is supported, of course. This is done using
-the I2C_FUNCS ioctl. An example, adapted from the i2cdetect program, is
-below:
-
-  int file;
-  if (file = open("/dev/i2c-0", O_RDWR) < 0) {
-	/* Some kind of error handling */
-	exit(1);
-  }
-  if (ioctl(file, I2C_FUNCS, &funcs) < 0) {
-	/* Some kind of error handling */
-	exit(1);
-  }
-  if (!(funcs & I2C_FUNC_SMBUS_QUICK)) {
-	/* Oops, the needed functionality (SMBus write_quick function) is
-           not available! */
-	exit(1);
-  }
-  /* Now it is safe to use the SMBus write_quick command */
diff --git a/Documentation/i2c/functionality.rst b/Documentation/i2c/functionality.rst
new file mode 100644
index 000000000000..377507c56162
--- /dev/null
+++ b/Documentation/i2c/functionality.rst
@@ -0,0 +1,156 @@
+=======================
+I2C/SMBus Functionality
+=======================
+
+INTRODUCTION
+------------
+
+Because not every I2C or SMBus adapter implements everything in the
+I2C specifications, a client can not trust that everything it needs
+is implemented when it is given the option to attach to an adapter:
+the client needs some way to check whether an adapter has the needed
+functionality.
+
+
+FUNCTIONALITY CONSTANTS
+-----------------------
+
+For the most up-to-date list of functionality constants, please check
+<uapi/linux/i2c.h>!
+
+  =============================== ==============================================
+  I2C_FUNC_I2C                    Plain i2c-level commands (Pure SMBus
+                                  adapters typically can not do these)
+  I2C_FUNC_10BIT_ADDR             Handles the 10-bit address extensions
+  I2C_FUNC_PROTOCOL_MANGLING      Knows about the I2C_M_IGNORE_NAK,
+                                  I2C_M_REV_DIR_ADDR and I2C_M_NO_RD_ACK
+                                  flags (which modify the I2C protocol!)
+  I2C_FUNC_NOSTART                Can skip repeated start sequence
+  I2C_FUNC_SMBUS_QUICK            Handles the SMBus write_quick command
+  I2C_FUNC_SMBUS_READ_BYTE        Handles the SMBus read_byte command
+  I2C_FUNC_SMBUS_WRITE_BYTE       Handles the SMBus write_byte command
+  I2C_FUNC_SMBUS_READ_BYTE_DATA   Handles the SMBus read_byte_data command
+  I2C_FUNC_SMBUS_WRITE_BYTE_DATA  Handles the SMBus write_byte_data command
+  I2C_FUNC_SMBUS_READ_WORD_DATA   Handles the SMBus read_word_data command
+  I2C_FUNC_SMBUS_WRITE_WORD_DATA  Handles the SMBus write_byte_data command
+  I2C_FUNC_SMBUS_PROC_CALL        Handles the SMBus process_call command
+  I2C_FUNC_SMBUS_READ_BLOCK_DATA  Handles the SMBus read_block_data command
+  I2C_FUNC_SMBUS_WRITE_BLOCK_DATA Handles the SMBus write_block_data command
+  I2C_FUNC_SMBUS_READ_I2C_BLOCK   Handles the SMBus read_i2c_block_data command
+  I2C_FUNC_SMBUS_WRITE_I2C_BLOCK  Handles the SMBus write_i2c_block_data command
+  =============================== ==============================================
+
+A few combinations of the above flags are also defined for your convenience:
+
+  =========================       ======================================
+  I2C_FUNC_SMBUS_BYTE             Handles the SMBus read_byte
+                                  and write_byte commands
+  I2C_FUNC_SMBUS_BYTE_DATA        Handles the SMBus read_byte_data
+                                  and write_byte_data commands
+  I2C_FUNC_SMBUS_WORD_DATA        Handles the SMBus read_word_data
+                                  and write_word_data commands
+  I2C_FUNC_SMBUS_BLOCK_DATA       Handles the SMBus read_block_data
+                                  and write_block_data commands
+  I2C_FUNC_SMBUS_I2C_BLOCK        Handles the SMBus read_i2c_block_data
+                                  and write_i2c_block_data commands
+  I2C_FUNC_SMBUS_EMUL             Handles all SMBus commands that can be
+                                  emulated by a real I2C adapter (using
+                                  the transparent emulation layer)
+  =========================       ======================================
+
+In kernel versions prior to 3.5 I2C_FUNC_NOSTART was implemented as
+part of I2C_FUNC_PROTOCOL_MANGLING.
+
+
+ADAPTER IMPLEMENTATION
+----------------------
+
+When you write a new adapter driver, you will have to implement a
+function callback ``functionality``. Typical implementations are given
+below.
+
+A typical SMBus-only adapter would list all the SMBus transactions it
+supports. This example comes from the i2c-piix4 driver::
+
+  static u32 piix4_func(struct i2c_adapter *adapter)
+  {
+	return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_DATA;
+  }
+
+A typical full-I2C adapter would use the following (from the i2c-pxa
+driver)::
+
+  static u32 i2c_pxa_functionality(struct i2c_adapter *adap)
+  {
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+  }
+
+I2C_FUNC_SMBUS_EMUL includes all the SMBus transactions (with the
+addition of I2C block transactions) which i2c-core can emulate using
+I2C_FUNC_I2C without any help from the adapter driver. The idea is
+to let the client drivers check for the support of SMBus functions
+without having to care whether the said functions are implemented in
+hardware by the adapter, or emulated in software by i2c-core on top
+of an I2C adapter.
+
+
+CLIENT CHECKING
+---------------
+
+Before a client tries to attach to an adapter, or even do tests to check
+whether one of the devices it supports is present on an adapter, it should
+check whether the needed functionality is present. The typical way to do
+this is (from the lm75 driver)::
+
+  static int lm75_detect(...)
+  {
+	(...)
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
+				     I2C_FUNC_SMBUS_WORD_DATA))
+		goto exit;
+	(...)
+  }
+
+Here, the lm75 driver checks if the adapter can do both SMBus byte data
+and SMBus word data transactions. If not, then the driver won't work on
+this adapter and there's no point in going on. If the check above is
+successful, then the driver knows that it can call the following
+functions: i2c_smbus_read_byte_data(), i2c_smbus_write_byte_data(),
+i2c_smbus_read_word_data() and i2c_smbus_write_word_data(). As a rule of
+thumb, the functionality constants you test for with
+i2c_check_functionality() should match exactly the i2c_smbus_* functions
+which you driver is calling.
+
+Note that the check above doesn't tell whether the functionalities are
+implemented in hardware by the underlying adapter or emulated in
+software by i2c-core. Client drivers don't have to care about this, as
+i2c-core will transparently implement SMBus transactions on top of I2C
+adapters.
+
+
+CHECKING THROUGH /DEV
+---------------------
+
+If you try to access an adapter from a userspace program, you will have
+to use the /dev interface. You will still have to check whether the
+functionality you need is supported, of course. This is done using
+the I2C_FUNCS ioctl. An example, adapted from the i2cdetect program, is
+below::
+
+  int file;
+  if (file = open("/dev/i2c-0", O_RDWR) < 0) {
+	/* Some kind of error handling */
+	exit(1);
+  }
+  if (ioctl(file, I2C_FUNCS, &funcs) < 0) {
+	/* Some kind of error handling */
+	exit(1);
+  }
+  if (!(funcs & I2C_FUNC_SMBUS_QUICK)) {
+	/* Oops, the needed functionality (SMBus write_quick function) is
+           not available! */
+	exit(1);
+  }
+  /* Now it is safe to use the SMBus write_quick command */
diff --git a/Documentation/i2c/gpio-fault-injection b/Documentation/i2c/gpio-fault-injection
deleted file mode 100644
index c87f416d53dd..000000000000
--- a/Documentation/i2c/gpio-fault-injection
+++ /dev/null
@@ -1,136 +0,0 @@
-=========================
-Linux I2C fault injection
-=========================
-
-The GPIO based I2C bus master driver can be configured to provide fault
-injection capabilities. It is then meant to be connected to another I2C bus
-which is driven by the I2C bus master driver under test. The GPIO fault
-injection driver can create special states on the bus which the other I2C bus
-master driver should handle gracefully.
-
-Once the Kconfig option I2C_GPIO_FAULT_INJECTOR is enabled, there will be an
-'i2c-fault-injector' subdirectory in the Kernel debugfs filesystem, usually
-mounted at /sys/kernel/debug. There will be a separate subdirectory per GPIO
-driven I2C bus. Each subdirectory will contain files to trigger the fault
-injection. They will be described now along with their intended use-cases.
-
-Wire states
-===========
-
-"scl"
------
-
-By reading this file, you get the current state of SCL. By writing, you can
-change its state to either force it low or to release it again. So, by using
-"echo 0 > scl" you force SCL low and thus, no communication will be possible
-because the bus master under test will not be able to clock. It should detect
-the condition of SCL being unresponsive and report an error to the upper
-layers.
-
-"sda"
------
-
-By reading this file, you get the current state of SDA. By writing, you can
-change its state to either force it low or to release it again. So, by using
-"echo 0 > sda" you force SDA low and thus, data cannot be transmitted. The bus
-master under test should detect this condition and trigger a bus recovery (see
-I2C specification version 4, section 3.1.16) using the helpers of the Linux I2C
-core (see 'struct bus_recovery_info'). However, the bus recovery will not
-succeed because SDA is still pinned low until you manually release it again
-with "echo 1 > sda". A test with an automatic release can be done with the
-"incomplete transfers" class of fault injectors.
-
-Incomplete transfers
-====================
-
-The following fault injectors create situations where SDA will be held low by a
-device. Bus recovery should be able to fix these situations. But please note:
-there are I2C client devices which detect a stuck SDA on their side and release
-it on their own after a few milliseconds. Also, there might be an external
-device deglitching and monitoring the I2C bus. It could also detect a stuck SDA
-and will init a bus recovery on its own. If you want to implement bus recovery
-in a bus master driver, make sure you checked your hardware setup for such
-devices before. And always verify with a scope or logic analyzer!
-
-"incomplete_address_phase"
---------------------------
-
-This file is write only and you need to write the address of an existing I2C
-client device to it. Then, a read transfer to this device will be started, but
-it will stop at the ACK phase after the address of the client has been
-transmitted. Because the device will ACK its presence, this results in SDA
-being pulled low by the device while SCL is high. So, similar to the "sda" file
-above, the bus master under test should detect this condition and try a bus
-recovery. This time, however, it should succeed and the device should release
-SDA after toggling SCL.
-
-"incomplete_write_byte"
------------------------
-
-Similar to above, this file is write only and you need to write the address of
-an existing I2C client device to it.
-
-The injector will again stop at one ACK phase, so the device will keep SDA low
-because it acknowledges data. However, there are two differences compared to
-'incomplete_address_phase':
-
-a) the message sent out will be a write message
-b) after the address byte, a 0x00 byte will be transferred. Then, stop at ACK.
-
-This is a highly delicate state, the device is set up to write any data to
-register 0x00 (if it has registers) when further clock pulses happen on SCL.
-This is why bus recovery (up to 9 clock pulses) must either check SDA or send
-additional STOP conditions to ensure the bus has been released. Otherwise
-random data will be written to a device!
-
-Lost arbitration
-================
-
-Here, we want to simulate the condition where the master under test loses the
-bus arbitration against another master in a multi-master setup.
-
-"lose_arbitration"
-------------------
-
-This file is write only and you need to write the duration of the arbitration
-intereference (in µs, maximum is 100ms). The calling process will then sleep
-and wait for the next bus clock. The process is interruptible, though.
-
-Arbitration lost is achieved by waiting for SCL going down by the master under
-test and then pulling SDA low for some time. So, the I2C address sent out
-should be corrupted and that should be detected properly. That means that the
-address sent out should have a lot of '1' bits to be able to detect corruption.
-There doesn't need to be a device at this address because arbitration lost
-should be detected beforehand. Also note, that SCL going down is monitored
-using interrupts, so the interrupt latency might cause the first bits to be not
-corrupted. A good starting point for using this fault injector on an otherwise
-idle bus is:
-
-# echo 200 > lose_arbitration &
-# i2cget -y <bus_to_test> 0x3f
-
-Panic during transfer
-=====================
-
-This fault injector will create a Kernel panic once the master under test
-started a transfer. This usually means that the state machine of the bus master
-driver will be ungracefully interrupted and the bus may end up in an unusual
-state. Use this to check if your shutdown/reboot/boot code can handle this
-scenario.
-
-"inject_panic"
---------------
-
-This file is write only and you need to write the delay between the detected
-start of a transmission and the induced Kernel panic (in µs, maximum is 100ms).
-The calling process will then sleep and wait for the next bus clock. The
-process is interruptible, though.
-
-Start of a transfer is detected by waiting for SCL going down by the master
-under test.  A good starting point for using this fault injector is:
-
-# echo 0 > inject_panic &
-# i2cget -y <bus_to_test> <some_address>
-
-Note that there doesn't need to be a device listening to the address you are
-using. Results may vary depending on that, though.
diff --git a/Documentation/i2c/gpio-fault-injection.rst b/Documentation/i2c/gpio-fault-injection.rst
new file mode 100644
index 000000000000..9dca6ec7d266
--- /dev/null
+++ b/Documentation/i2c/gpio-fault-injection.rst
@@ -0,0 +1,136 @@
+=========================
+Linux I2C fault injection
+=========================
+
+The GPIO based I2C bus master driver can be configured to provide fault
+injection capabilities. It is then meant to be connected to another I2C bus
+which is driven by the I2C bus master driver under test. The GPIO fault
+injection driver can create special states on the bus which the other I2C bus
+master driver should handle gracefully.
+
+Once the Kconfig option I2C_GPIO_FAULT_INJECTOR is enabled, there will be an
+'i2c-fault-injector' subdirectory in the Kernel debugfs filesystem, usually
+mounted at /sys/kernel/debug. There will be a separate subdirectory per GPIO
+driven I2C bus. Each subdirectory will contain files to trigger the fault
+injection. They will be described now along with their intended use-cases.
+
+Wire states
+===========
+
+"scl"
+-----
+
+By reading this file, you get the current state of SCL. By writing, you can
+change its state to either force it low or to release it again. So, by using
+"echo 0 > scl" you force SCL low and thus, no communication will be possible
+because the bus master under test will not be able to clock. It should detect
+the condition of SCL being unresponsive and report an error to the upper
+layers.
+
+"sda"
+-----
+
+By reading this file, you get the current state of SDA. By writing, you can
+change its state to either force it low or to release it again. So, by using
+"echo 0 > sda" you force SDA low and thus, data cannot be transmitted. The bus
+master under test should detect this condition and trigger a bus recovery (see
+I2C specification version 4, section 3.1.16) using the helpers of the Linux I2C
+core (see 'struct bus_recovery_info'). However, the bus recovery will not
+succeed because SDA is still pinned low until you manually release it again
+with "echo 1 > sda". A test with an automatic release can be done with the
+"incomplete transfers" class of fault injectors.
+
+Incomplete transfers
+====================
+
+The following fault injectors create situations where SDA will be held low by a
+device. Bus recovery should be able to fix these situations. But please note:
+there are I2C client devices which detect a stuck SDA on their side and release
+it on their own after a few milliseconds. Also, there might be an external
+device deglitching and monitoring the I2C bus. It could also detect a stuck SDA
+and will init a bus recovery on its own. If you want to implement bus recovery
+in a bus master driver, make sure you checked your hardware setup for such
+devices before. And always verify with a scope or logic analyzer!
+
+"incomplete_address_phase"
+--------------------------
+
+This file is write only and you need to write the address of an existing I2C
+client device to it. Then, a read transfer to this device will be started, but
+it will stop at the ACK phase after the address of the client has been
+transmitted. Because the device will ACK its presence, this results in SDA
+being pulled low by the device while SCL is high. So, similar to the "sda" file
+above, the bus master under test should detect this condition and try a bus
+recovery. This time, however, it should succeed and the device should release
+SDA after toggling SCL.
+
+"incomplete_write_byte"
+-----------------------
+
+Similar to above, this file is write only and you need to write the address of
+an existing I2C client device to it.
+
+The injector will again stop at one ACK phase, so the device will keep SDA low
+because it acknowledges data. However, there are two differences compared to
+'incomplete_address_phase':
+
+a) the message sent out will be a write message
+b) after the address byte, a 0x00 byte will be transferred. Then, stop at ACK.
+
+This is a highly delicate state, the device is set up to write any data to
+register 0x00 (if it has registers) when further clock pulses happen on SCL.
+This is why bus recovery (up to 9 clock pulses) must either check SDA or send
+additional STOP conditions to ensure the bus has been released. Otherwise
+random data will be written to a device!
+
+Lost arbitration
+================
+
+Here, we want to simulate the condition where the master under test loses the
+bus arbitration against another master in a multi-master setup.
+
+"lose_arbitration"
+------------------
+
+This file is write only and you need to write the duration of the arbitration
+intereference (in µs, maximum is 100ms). The calling process will then sleep
+and wait for the next bus clock. The process is interruptible, though.
+
+Arbitration lost is achieved by waiting for SCL going down by the master under
+test and then pulling SDA low for some time. So, the I2C address sent out
+should be corrupted and that should be detected properly. That means that the
+address sent out should have a lot of '1' bits to be able to detect corruption.
+There doesn't need to be a device at this address because arbitration lost
+should be detected beforehand. Also note, that SCL going down is monitored
+using interrupts, so the interrupt latency might cause the first bits to be not
+corrupted. A good starting point for using this fault injector on an otherwise
+idle bus is::
+
+  # echo 200 > lose_arbitration &
+  # i2cget -y <bus_to_test> 0x3f
+
+Panic during transfer
+=====================
+
+This fault injector will create a Kernel panic once the master under test
+started a transfer. This usually means that the state machine of the bus master
+driver will be ungracefully interrupted and the bus may end up in an unusual
+state. Use this to check if your shutdown/reboot/boot code can handle this
+scenario.
+
+"inject_panic"
+--------------
+
+This file is write only and you need to write the delay between the detected
+start of a transmission and the induced Kernel panic (in µs, maximum is 100ms).
+The calling process will then sleep and wait for the next bus clock. The
+process is interruptible, though.
+
+Start of a transfer is detected by waiting for SCL going down by the master
+under test.  A good starting point for using this fault injector is::
+
+  # echo 0 > inject_panic &
+  # i2cget -y <bus_to_test> <some_address>
+
+Note that there doesn't need to be a device listening to the address you are
+using. Results may vary depending on that, though.
diff --git a/Documentation/i2c/i2c-protocol b/Documentation/i2c/i2c-protocol
deleted file mode 100644
index ff6d6cee6c7e..000000000000
--- a/Documentation/i2c/i2c-protocol
+++ /dev/null
@@ -1,88 +0,0 @@
-This document describes the i2c protocol. Or will, when it is finished :-)
-
-Key to symbols
-==============
-
-S     (1 bit) : Start bit
-P     (1 bit) : Stop bit
-Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
-A, NA (1 bit) : Accept and reverse accept bit.
-Addr  (7 bits): I2C 7 bit address. Note that this can be expanded as usual to
-                get a 10 bit I2C address.
-Comm  (8 bits): Command byte, a data byte which often selects a register on
-                the device.
-Data  (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
-                for 16 bit data.
-Count (8 bits): A data byte containing the length of a block operation.
-
-[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
-
-
-Simple send transaction
-======================
-
-This corresponds to i2c_master_send.
-
-  S Addr Wr [A] Data [A] Data [A] ... [A] Data [A] P
-
-
-Simple receive transaction
-===========================
-
-This corresponds to i2c_master_recv
-
-  S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
-
-
-Combined transactions
-====================
-
-This corresponds to i2c_transfer
-
-They are just like the above transactions, but instead of a stop bit P
-a start bit S is sent and the transaction continues. An example of
-a byte read, followed by a byte write:
-
-  S Addr Rd [A] [Data] NA S Addr Wr [A] Data [A] P
-
-
-Modified transactions
-=====================
-
-The following modifications to the I2C protocol can also be generated by
-setting these flags for i2c messages. With the exception of I2C_M_NOSTART, they
-are usually only needed to work around device issues:
-
-I2C_M_IGNORE_NAK:
-    Normally message is interrupted immediately if there is [NA] from the
-    client. Setting this flag treats any [NA] as [A], and all of
-    message is sent.
-    These messages may still fail to SCL lo->hi timeout.
-
-I2C_M_NO_RD_ACK:
-    In a read message, master A/NA bit is skipped.
-
-I2C_M_NOSTART:
-    In a combined transaction, no 'S Addr Wr/Rd [A]' is generated at some
-    point. For example, setting I2C_M_NOSTART on the second partial message
-    generates something like:
-      S Addr Rd [A] [Data] NA Data [A] P
-    If you set the I2C_M_NOSTART variable for the first partial message,
-    we do not generate Addr, but we do generate the startbit S. This will
-    probably confuse all other clients on your bus, so don't try this.
-
-    This is often used to gather transmits from multiple data buffers in
-    system memory into something that appears as a single transfer to the
-    I2C device but may also be used between direction changes by some
-    rare devices.
-
-I2C_M_REV_DIR_ADDR:
-    This toggles the Rd/Wr flag. That is, if you want to do a write, but
-    need to emit an Rd instead of a Wr, or vice versa, you set this
-    flag. For example:
-      S Addr Rd [A] Data [A] Data [A] ... [A] Data [A] P
-
-I2C_M_STOP:
-    Force a stop condition (P) after the message. Some I2C related protocols
-    like SCCB require that. Normally, you really don't want to get interrupted
-    between the messages of one transfer.
diff --git a/Documentation/i2c/i2c-protocol.rst b/Documentation/i2c/i2c-protocol.rst
new file mode 100644
index 000000000000..2f8fcf671b2e
--- /dev/null
+++ b/Documentation/i2c/i2c-protocol.rst
@@ -0,0 +1,98 @@
+============
+I2C Protocol
+============
+
+This document describes the i2c protocol. Or will, when it is finished :-)
+
+Key to symbols
+==============
+
+=============== =============================================================
+S     (1 bit) : Start bit
+P     (1 bit) : Stop bit
+Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
+A, NA (1 bit) : Accept and reverse accept bit.
+Addr  (7 bits): I2C 7 bit address. Note that this can be expanded as usual to
+                get a 10 bit I2C address.
+Comm  (8 bits): Command byte, a data byte which often selects a register on
+                the device.
+Data  (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
+                for 16 bit data.
+Count (8 bits): A data byte containing the length of a block operation.
+
+[..]:           Data sent by I2C device, as opposed to data sent by the
+                host adapter.
+=============== =============================================================
+
+
+Simple send transaction
+=======================
+
+This corresponds to i2c_master_send::
+
+  S Addr Wr [A] Data [A] Data [A] ... [A] Data [A] P
+
+
+Simple receive transaction
+==========================
+
+This corresponds to i2c_master_recv::
+
+  S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+
+
+Combined transactions
+=====================
+
+This corresponds to i2c_transfer
+
+They are just like the above transactions, but instead of a stop bit P
+a start bit S is sent and the transaction continues. An example of
+a byte read, followed by a byte write::
+
+  S Addr Rd [A] [Data] NA S Addr Wr [A] Data [A] P
+
+
+Modified transactions
+=====================
+
+The following modifications to the I2C protocol can also be generated by
+setting these flags for i2c messages. With the exception of I2C_M_NOSTART, they
+are usually only needed to work around device issues:
+
+I2C_M_IGNORE_NAK:
+    Normally message is interrupted immediately if there is [NA] from the
+    client. Setting this flag treats any [NA] as [A], and all of
+    message is sent.
+    These messages may still fail to SCL lo->hi timeout.
+
+I2C_M_NO_RD_ACK:
+    In a read message, master A/NA bit is skipped.
+
+I2C_M_NOSTART:
+    In a combined transaction, no 'S Addr Wr/Rd [A]' is generated at some
+    point. For example, setting I2C_M_NOSTART on the second partial message
+    generates something like::
+
+      S Addr Rd [A] [Data] NA Data [A] P
+
+    If you set the I2C_M_NOSTART variable for the first partial message,
+    we do not generate Addr, but we do generate the startbit S. This will
+    probably confuse all other clients on your bus, so don't try this.
+
+    This is often used to gather transmits from multiple data buffers in
+    system memory into something that appears as a single transfer to the
+    I2C device but may also be used between direction changes by some
+    rare devices.
+
+I2C_M_REV_DIR_ADDR:
+    This toggles the Rd/Wr flag. That is, if you want to do a write, but
+    need to emit an Rd instead of a Wr, or vice versa, you set this
+    flag. For example::
+
+      S Addr Rd [A] Data [A] Data [A] ... [A] Data [A] P
+
+I2C_M_STOP:
+    Force a stop condition (P) after the message. Some I2C related protocols
+    like SCCB require that. Normally, you really don't want to get interrupted
+    between the messages of one transfer.
diff --git a/Documentation/i2c/i2c-stub b/Documentation/i2c/i2c-stub
deleted file mode 100644
index a16924fbd289..000000000000
--- a/Documentation/i2c/i2c-stub
+++ /dev/null
@@ -1,64 +0,0 @@
-MODULE: i2c-stub
-
-DESCRIPTION:
-
-This module is a very simple fake I2C/SMBus driver.  It implements six
-types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, (r/w)
-word data, (r/w) I2C block data, and (r/w) SMBus block data.
-
-You need to provide chip addresses as a module parameter when loading this
-driver, which will then only react to SMBus commands to these addresses.
-
-No hardware is needed nor associated with this module.  It will accept write
-quick commands to the specified addresses; it will respond to the other
-commands (also to the specified addresses) by reading from or writing to
-arrays in memory.  It will also spam the kernel logs for every command it
-handles.
-
-A pointer register with auto-increment is implemented for all byte
-operations.  This allows for continuous byte reads like those supported by
-EEPROMs, among others.
-
-SMBus block command support is disabled by default, and must be enabled
-explicitly by setting the respective bits (0x03000000) in the functionality
-module parameter.
-
-SMBus block commands must be written to configure an SMBus command for
-SMBus block operations. Writes can be partial. Block read commands always
-return the number of bytes selected with the largest write so far.
-
-The typical use-case is like this:
-	1. load this module
-	2. use i2cset (from the i2c-tools project) to pre-load some data
-	3. load the target chip driver module
-	4. observe its behavior in the kernel log
-
-There's a script named i2c-stub-from-dump in the i2c-tools package which
-can load register values automatically from a chip dump.
-
-PARAMETERS:
-
-int chip_addr[10]:
-	The SMBus addresses to emulate chips at.
-
-unsigned long functionality:
-	Functionality override, to disable some commands. See I2C_FUNC_*
-	constants in <linux/i2c.h> for the suitable values. For example,
-	value 0x1f0000 would only enable the quick, byte and byte data
-	commands.
-
-u8 bank_reg[10]
-u8 bank_mask[10]
-u8 bank_start[10]
-u8 bank_end[10]:
-	Optional bank settings. They tell which bits in which register
-	select the active bank, as well as the range of banked registers.
-
-CAVEATS:
-
-If your target driver polls some byte or word waiting for it to change, the
-stub could lock it up.  Use i2cset to unlock it.
-
-If you spam it hard enough, printk can be lossy.  This module really wants
-something like relayfs.
-
diff --git a/Documentation/i2c/i2c-stub.rst b/Documentation/i2c/i2c-stub.rst
new file mode 100644
index 000000000000..a6fc6916d6bc
--- /dev/null
+++ b/Documentation/i2c/i2c-stub.rst
@@ -0,0 +1,66 @@
+========
+i2c-stub
+========
+
+Description
+===========
+
+This module is a very simple fake I2C/SMBus driver.  It implements six
+types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, (r/w)
+word data, (r/w) I2C block data, and (r/w) SMBus block data.
+
+You need to provide chip addresses as a module parameter when loading this
+driver, which will then only react to SMBus commands to these addresses.
+
+No hardware is needed nor associated with this module.  It will accept write
+quick commands to the specified addresses; it will respond to the other
+commands (also to the specified addresses) by reading from or writing to
+arrays in memory.  It will also spam the kernel logs for every command it
+handles.
+
+A pointer register with auto-increment is implemented for all byte
+operations.  This allows for continuous byte reads like those supported by
+EEPROMs, among others.
+
+SMBus block command support is disabled by default, and must be enabled
+explicitly by setting the respective bits (0x03000000) in the functionality
+module parameter.
+
+SMBus block commands must be written to configure an SMBus command for
+SMBus block operations. Writes can be partial. Block read commands always
+return the number of bytes selected with the largest write so far.
+
+The typical use-case is like this:
+
+	1. load this module
+	2. use i2cset (from the i2c-tools project) to pre-load some data
+	3. load the target chip driver module
+	4. observe its behavior in the kernel log
+
+There's a script named i2c-stub-from-dump in the i2c-tools package which
+can load register values automatically from a chip dump.
+
+Parameters
+==========
+
+int chip_addr[10]:
+	The SMBus addresses to emulate chips at.
+
+unsigned long functionality:
+	Functionality override, to disable some commands. See I2C_FUNC_*
+	constants in <linux/i2c.h> for the suitable values. For example,
+	value 0x1f0000 would only enable the quick, byte and byte data
+	commands.
+
+u8 bank_reg[10], u8 bank_mask[10], u8 bank_start[10], u8 bank_end[10]:
+	Optional bank settings. They tell which bits in which register
+	select the active bank, as well as the range of banked registers.
+
+Caveats
+=======
+
+If your target driver polls some byte or word waiting for it to change, the
+stub could lock it up.  Use i2cset to unlock it.
+
+If you spam it hard enough, printk can be lossy.  This module really wants
+something like relayfs.
diff --git a/Documentation/i2c/i2c-topology b/Documentation/i2c/i2c-topology
deleted file mode 100644
index f74d78b53d4d..000000000000
--- a/Documentation/i2c/i2c-topology
+++ /dev/null
@@ -1,376 +0,0 @@
-I2C topology
-============
-
-There are a couple of reasons for building more complex i2c topologies
-than a straight-forward i2c bus with one adapter and one or more devices.
-
-1. A mux may be needed on the bus to prevent address collisions.
-
-2. The bus may be accessible from some external bus master, and arbitration
-   may be needed to determine if it is ok to access the bus.
-
-3. A device (particularly RF tuners) may want to avoid the digital noise
-   from the i2c bus, at least most of the time, and sits behind a gate
-   that has to be operated before the device can be accessed.
-
-Etc
-
-These constructs are represented as i2c adapter trees by Linux, where
-each adapter has a parent adapter (except the root adapter) and zero or
-more child adapters. The root adapter is the actual adapter that issues
-i2c transfers, and all adapters with a parent are part of an "i2c-mux"
-object (quoted, since it can also be an arbitrator or a gate).
-
-Depending of the particular mux driver, something happens when there is
-an i2c transfer on one of its child adapters. The mux driver can
-obviously operate a mux, but it can also do arbitration with an external
-bus master or open a gate. The mux driver has two operations for this,
-select and deselect. select is called before the transfer and (the
-optional) deselect is called after the transfer.
-
-
-Locking
-=======
-
-There are two variants of locking available to i2c muxes, they can be
-mux-locked or parent-locked muxes. As is evident from below, it can be
-useful to know if a mux is mux-locked or if it is parent-locked. The
-following list was correct at the time of writing:
-
-In drivers/i2c/muxes/
-i2c-arb-gpio-challenge    Parent-locked
-i2c-mux-gpio              Normally parent-locked, mux-locked iff
-                          all involved gpio pins are controlled by the
-                          same i2c root adapter that they mux.
-i2c-mux-gpmux             Normally parent-locked, mux-locked iff
-                          specified in device-tree.
-i2c-mux-ltc4306           Mux-locked
-i2c-mux-mlxcpld           Parent-locked
-i2c-mux-pca9541           Parent-locked
-i2c-mux-pca954x           Parent-locked
-i2c-mux-pinctrl           Normally parent-locked, mux-locked iff
-                          all involved pinctrl devices are controlled
-                          by the same i2c root adapter that they mux.
-i2c-mux-reg               Parent-locked
-
-In drivers/iio/
-gyro/mpu3050              Mux-locked
-imu/inv_mpu6050/          Mux-locked
-
-In drivers/media/
-dvb-frontends/lgdt3306a   Mux-locked
-dvb-frontends/m88ds3103   Parent-locked
-dvb-frontends/rtl2830     Parent-locked
-dvb-frontends/rtl2832     Mux-locked
-dvb-frontends/si2168      Mux-locked
-usb/cx231xx/              Parent-locked
-
-
-Mux-locked muxes
-----------------
-
-Mux-locked muxes does not lock the entire parent adapter during the
-full select-transfer-deselect transaction, only the muxes on the parent
-adapter are locked. Mux-locked muxes are mostly interesting if the
-select and/or deselect operations must use i2c transfers to complete
-their tasks. Since the parent adapter is not fully locked during the
-full transaction, unrelated i2c transfers may interleave the different
-stages of the transaction. This has the benefit that the mux driver
-may be easier and cleaner to implement, but it has some caveats.
-
-ML1. If you build a topology with a mux-locked mux being the parent
-     of a parent-locked mux, this might break the expectation from the
-     parent-locked mux that the root adapter is locked during the
-     transaction.
-
-ML2. It is not safe to build arbitrary topologies with two (or more)
-     mux-locked muxes that are not siblings, when there are address
-     collisions between the devices on the child adapters of these
-     non-sibling muxes.
-
-     I.e. the select-transfer-deselect transaction targeting e.g. device
-     address 0x42 behind mux-one may be interleaved with a similar
-     operation targeting device address 0x42 behind mux-two. The
-     intension with such a topology would in this hypothetical example
-     be that mux-one and mux-two should not be selected simultaneously,
-     but mux-locked muxes do not guarantee that in all topologies.
-
-ML3. A mux-locked mux cannot be used by a driver for auto-closing
-     gates/muxes, i.e. something that closes automatically after a given
-     number (one, in most cases) of i2c transfers. Unrelated i2c transfers
-     may creep in and close prematurely.
-
-ML4. If any non-i2c operation in the mux driver changes the i2c mux state,
-     the driver has to lock the root adapter during that operation.
-     Otherwise garbage may appear on the bus as seen from devices
-     behind the mux, when an unrelated i2c transfer is in flight during
-     the non-i2c mux-changing operation.
-
-
-Mux-locked Example
-------------------
-
-                   .----------.     .--------.
-    .--------.     |   mux-   |-----| dev D1 |
-    |  root  |--+--|  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  .--------.
-                |  '----------'  '--| dev D2 |
-                |  .--------.       '--------'
-                '--| dev D3 |
-                   '--------'
-
-When there is an access to D1, this happens:
-
- 1. Someone issues an i2c-transfer to D1.
- 2. M1 locks muxes on its parent (the root adapter in this case).
- 3. M1 calls ->select to ready the mux.
- 4. M1 (presumably) does some i2c-transfers as part of its select.
-    These transfers are normal i2c-transfers that locks the parent
-    adapter.
- 5. M1 feeds the i2c-transfer from step 1 to its parent adapter as a
-    normal i2c-transfer that locks the parent adapter.
- 6. M1 calls ->deselect, if it has one.
- 7. Same rules as in step 4, but for ->deselect.
- 8. M1 unlocks muxes on its parent.
-
-This means that accesses to D2 are lockout out for the full duration
-of the entire operation. But accesses to D3 are possibly interleaved
-at any point.
-
-
-Parent-locked muxes
--------------------
-
-Parent-locked muxes lock the parent adapter during the full select-
-transfer-deselect transaction. The implication is that the mux driver
-has to ensure that any and all i2c transfers through that parent
-adapter during the transaction are unlocked i2c transfers (using e.g.
-__i2c_transfer), or a deadlock will follow. There are a couple of
-caveats.
-
-PL1. If you build a topology with a parent-locked mux being the child
-     of another mux, this might break a possible assumption from the
-     child mux that the root adapter is unused between its select op
-     and the actual transfer (e.g. if the child mux is auto-closing
-     and the parent mux issus i2c-transfers as part of its select).
-     This is especially the case if the parent mux is mux-locked, but
-     it may also happen if the parent mux is parent-locked.
-
-PL2. If select/deselect calls out to other subsystems such as gpio,
-     pinctrl, regmap or iio, it is essential that any i2c transfers
-     caused by these subsystems are unlocked. This can be convoluted to
-     accomplish, maybe even impossible if an acceptably clean solution
-     is sought.
-
-
-Parent-locked Example
----------------------
-
-                   .----------.     .--------.
-    .--------.     |  parent- |-----| dev D1 |
-    |  root  |--+--|  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  .--------.
-                |  '----------'  '--| dev D2 |
-                |  .--------.       '--------'
-                '--| dev D3 |
-                   '--------'
-
-When there is an access to D1, this happens:
-
- 1. Someone issues an i2c-transfer to D1.
- 2. M1 locks muxes on its parent (the root adapter in this case).
- 3. M1 locks its parent adapter.
- 4. M1 calls ->select to ready the mux.
- 5. If M1 does any i2c-transfers (on this root adapter) as part of
-    its select, those transfers must be unlocked i2c-transfers so
-    that they do not deadlock the root adapter.
- 6. M1 feeds the i2c-transfer from step 1 to the root adapter as an
-    unlocked i2c-transfer, so that it does not deadlock the parent
-    adapter.
- 7. M1 calls ->deselect, if it has one.
- 8. Same rules as in step 5, but for ->deselect.
- 9. M1 unlocks its parent adapter.
-10. M1 unlocks muxes on its parent.
-
-
-This means that accesses to both D2 and D3 are locked out for the full
-duration of the entire operation.
-
-
-Complex Examples
-================
-
-Parent-locked mux as parent of parent-locked mux
-------------------------------------------------
-
-This is a useful topology, but it can be bad.
-
-                   .----------.     .----------.     .--------.
-    .--------.     |  parent- |-----|  parent- |-----| dev D1 |
-    |  root  |--+--|  locked  |     |  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
-                |  '----------'  |  '----------'  '--| dev D2 |
-                |  .--------.    |  .--------.       '--------'
-                '--| dev D4 |    '--| dev D3 |
-                   '--------'       '--------'
-
-When any device is accessed, all other devices are locked out for
-the full duration of the operation (both muxes lock their parent,
-and specifically when M2 requests its parent to lock, M1 passes
-the buck to the root adapter).
-
-This topology is bad if M2 is an auto-closing mux and M1->select
-issues any unlocked i2c transfers on the root adapter that may leak
-through and be seen by the M2 adapter, thus closing M2 prematurely.
-
-
-Mux-locked mux as parent of mux-locked mux
-------------------------------------------
-
-This is a good topology.
-
-                   .----------.     .----------.     .--------.
-    .--------.     |   mux-   |-----|   mux-   |-----| dev D1 |
-    |  root  |--+--|  locked  |     |  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
-                |  '----------'  |  '----------'  '--| dev D2 |
-                |  .--------.    |  .--------.       '--------'
-                '--| dev D4 |    '--| dev D3 |
-                   '--------'       '--------'
-
-When device D1 is accessed, accesses to D2 are locked out for the
-full duration of the operation (muxes on the top child adapter of M1
-are locked). But accesses to D3 and D4 are possibly interleaved at
-any point. Accesses to D3 locks out D1 and D2, but accesses to D4
-are still possibly interleaved.
-
-
-Mux-locked mux as parent of parent-locked mux
----------------------------------------------
-
-This is probably a bad topology.
-
-                   .----------.     .----------.     .--------.
-    .--------.     |   mux-   |-----|  parent- |-----| dev D1 |
-    |  root  |--+--|  locked  |     |  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
-                |  '----------'  |  '----------'  '--| dev D2 |
-                |  .--------.    |  .--------.       '--------'
-                '--| dev D4 |    '--| dev D3 |
-                   '--------'       '--------'
-
-When device D1 is accessed, accesses to D2 and D3 are locked out
-for the full duration of the operation (M1 locks child muxes on the
-root adapter). But accesses to D4 are possibly interleaved at any
-point.
-
-This kind of topology is generally not suitable and should probably
-be avoided. The reason is that M2 probably assumes that there will
-be no i2c transfers during its calls to ->select and ->deselect, and
-if there are, any such transfers might appear on the slave side of M2
-as partial i2c transfers, i.e. garbage or worse. This might cause
-device lockups and/or other problems.
-
-The topology is especially troublesome if M2 is an auto-closing
-mux. In that case, any interleaved accesses to D4 might close M2
-prematurely, as might any i2c-transfers part of M1->select.
-
-But if M2 is not making the above stated assumption, and if M2 is not
-auto-closing, the topology is fine.
-
-
-Parent-locked mux as parent of mux-locked mux
----------------------------------------------
-
-This is a good topology.
-
-                   .----------.     .----------.     .--------.
-    .--------.     |  parent- |-----|   mux-   |-----| dev D1 |
-    |  root  |--+--|  locked  |     |  locked  |     '--------'
-    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
-                |  '----------'  |  '----------'  '--| dev D2 |
-                |  .--------.    |  .--------.       '--------'
-                '--| dev D4 |    '--| dev D3 |
-                   '--------'       '--------'
-
-When D1 is accessed, accesses to D2 are locked out for the full
-duration of the operation (muxes on the top child adapter of M1
-are locked). Accesses to D3 and D4 are possibly interleaved at
-any point, just as is expected for mux-locked muxes.
-
-When D3 or D4 are accessed, everything else is locked out. For D3
-accesses, M1 locks the root adapter. For D4 accesses, the root
-adapter is locked directly.
-
-
-Two mux-locked sibling muxes
-----------------------------
-
-This is a good topology.
-
-                                    .--------.
-                   .----------.  .--| dev D1 |
-                   |   mux-   |--'  '--------'
-                .--|  locked  |     .--------.
-                |  |  mux M1  |-----| dev D2 |
-                |  '----------'     '--------'
-                |  .----------.     .--------.
-    .--------.  |  |   mux-   |-----| dev D3 |
-    |  root  |--+--|  locked  |     '--------'
-    '--------'  |  |  mux M2  |--.  .--------.
-                |  '----------'  '--| dev D4 |
-                |  .--------.       '--------'
-                '--| dev D5 |
-                   '--------'
-
-When D1 is accessed, accesses to D2, D3 and D4 are locked out. But
-accesses to D5 may be interleaved at any time.
-
-
-Two parent-locked sibling muxes
--------------------------------
-
-This is a good topology.
-
-                                    .--------.
-                   .----------.  .--| dev D1 |
-                   |  parent- |--'  '--------'
-                .--|  locked  |     .--------.
-                |  |  mux M1  |-----| dev D2 |
-                |  '----------'     '--------'
-                |  .----------.     .--------.
-    .--------.  |  |  parent- |-----| dev D3 |
-    |  root  |--+--|  locked  |     '--------'
-    '--------'  |  |  mux M2  |--.  .--------.
-                |  '----------'  '--| dev D4 |
-                |  .--------.       '--------'
-                '--| dev D5 |
-                   '--------'
-
-When any device is accessed, accesses to all other devices are locked
-out.
-
-
-Mux-locked and parent-locked sibling muxes
-------------------------------------------
-
-This is a good topology.
-
-                                    .--------.
-                   .----------.  .--| dev D1 |
-                   |   mux-   |--'  '--------'
-                .--|  locked  |     .--------.
-                |  |  mux M1  |-----| dev D2 |
-                |  '----------'     '--------'
-                |  .----------.     .--------.
-    .--------.  |  |  parent- |-----| dev D3 |
-    |  root  |--+--|  locked  |     '--------'
-    '--------'  |  |  mux M2  |--.  .--------.
-                |  '----------'  '--| dev D4 |
-                |  .--------.       '--------'
-                '--| dev D5 |
-                   '--------'
-
-When D1 or D2 are accessed, accesses to D3 and D4 are locked out while
-accesses to D5 may interleave. When D3 or D4 are accessed, accesses to
-all other devices are locked out.
diff --git a/Documentation/i2c/i2c-topology.rst b/Documentation/i2c/i2c-topology.rst
new file mode 100644
index 000000000000..0c1ae95f6a97
--- /dev/null
+++ b/Documentation/i2c/i2c-topology.rst
@@ -0,0 +1,396 @@
+============
+I2C topology
+============
+
+There are a couple of reasons for building more complex i2c topologies
+than a straight-forward i2c bus with one adapter and one or more devices.
+
+1. A mux may be needed on the bus to prevent address collisions.
+
+2. The bus may be accessible from some external bus master, and arbitration
+   may be needed to determine if it is ok to access the bus.
+
+3. A device (particularly RF tuners) may want to avoid the digital noise
+   from the i2c bus, at least most of the time, and sits behind a gate
+   that has to be operated before the device can be accessed.
+
+Etc
+===
+
+These constructs are represented as i2c adapter trees by Linux, where
+each adapter has a parent adapter (except the root adapter) and zero or
+more child adapters. The root adapter is the actual adapter that issues
+i2c transfers, and all adapters with a parent are part of an "i2c-mux"
+object (quoted, since it can also be an arbitrator or a gate).
+
+Depending of the particular mux driver, something happens when there is
+an i2c transfer on one of its child adapters. The mux driver can
+obviously operate a mux, but it can also do arbitration with an external
+bus master or open a gate. The mux driver has two operations for this,
+select and deselect. select is called before the transfer and (the
+optional) deselect is called after the transfer.
+
+
+Locking
+=======
+
+There are two variants of locking available to i2c muxes, they can be
+mux-locked or parent-locked muxes. As is evident from below, it can be
+useful to know if a mux is mux-locked or if it is parent-locked. The
+following list was correct at the time of writing:
+
+In drivers/i2c/muxes/:
+
+======================    =============================================
+i2c-arb-gpio-challenge    Parent-locked
+i2c-mux-gpio              Normally parent-locked, mux-locked iff
+                          all involved gpio pins are controlled by the
+                          same i2c root adapter that they mux.
+i2c-mux-gpmux             Normally parent-locked, mux-locked iff
+                          specified in device-tree.
+i2c-mux-ltc4306           Mux-locked
+i2c-mux-mlxcpld           Parent-locked
+i2c-mux-pca9541           Parent-locked
+i2c-mux-pca954x           Parent-locked
+i2c-mux-pinctrl           Normally parent-locked, mux-locked iff
+                          all involved pinctrl devices are controlled
+                          by the same i2c root adapter that they mux.
+i2c-mux-reg               Parent-locked
+======================    =============================================
+
+In drivers/iio/:
+
+======================    =============================================
+gyro/mpu3050              Mux-locked
+imu/inv_mpu6050/          Mux-locked
+======================    =============================================
+
+In drivers/media/:
+
+=======================   =============================================
+dvb-frontends/lgdt3306a   Mux-locked
+dvb-frontends/m88ds3103   Parent-locked
+dvb-frontends/rtl2830     Parent-locked
+dvb-frontends/rtl2832     Mux-locked
+dvb-frontends/si2168      Mux-locked
+usb/cx231xx/              Parent-locked
+=======================   =============================================
+
+
+Mux-locked muxes
+----------------
+
+Mux-locked muxes does not lock the entire parent adapter during the
+full select-transfer-deselect transaction, only the muxes on the parent
+adapter are locked. Mux-locked muxes are mostly interesting if the
+select and/or deselect operations must use i2c transfers to complete
+their tasks. Since the parent adapter is not fully locked during the
+full transaction, unrelated i2c transfers may interleave the different
+stages of the transaction. This has the benefit that the mux driver
+may be easier and cleaner to implement, but it has some caveats.
+
+==== =====================================================================
+ML1. If you build a topology with a mux-locked mux being the parent
+     of a parent-locked mux, this might break the expectation from the
+     parent-locked mux that the root adapter is locked during the
+     transaction.
+
+ML2. It is not safe to build arbitrary topologies with two (or more)
+     mux-locked muxes that are not siblings, when there are address
+     collisions between the devices on the child adapters of these
+     non-sibling muxes.
+
+     I.e. the select-transfer-deselect transaction targeting e.g. device
+     address 0x42 behind mux-one may be interleaved with a similar
+     operation targeting device address 0x42 behind mux-two. The
+     intension with such a topology would in this hypothetical example
+     be that mux-one and mux-two should not be selected simultaneously,
+     but mux-locked muxes do not guarantee that in all topologies.
+
+ML3. A mux-locked mux cannot be used by a driver for auto-closing
+     gates/muxes, i.e. something that closes automatically after a given
+     number (one, in most cases) of i2c transfers. Unrelated i2c transfers
+     may creep in and close prematurely.
+
+ML4. If any non-i2c operation in the mux driver changes the i2c mux state,
+     the driver has to lock the root adapter during that operation.
+     Otherwise garbage may appear on the bus as seen from devices
+     behind the mux, when an unrelated i2c transfer is in flight during
+     the non-i2c mux-changing operation.
+==== =====================================================================
+
+
+Mux-locked Example
+------------------
+
+
+::
+
+                   .----------.     .--------.
+    .--------.     |   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  .--------.
+                |  '----------'  '--| dev D2 |
+                |  .--------.       '--------'
+                '--| dev D3 |
+                   '--------'
+
+When there is an access to D1, this happens:
+
+ 1. Someone issues an i2c-transfer to D1.
+ 2. M1 locks muxes on its parent (the root adapter in this case).
+ 3. M1 calls ->select to ready the mux.
+ 4. M1 (presumably) does some i2c-transfers as part of its select.
+    These transfers are normal i2c-transfers that locks the parent
+    adapter.
+ 5. M1 feeds the i2c-transfer from step 1 to its parent adapter as a
+    normal i2c-transfer that locks the parent adapter.
+ 6. M1 calls ->deselect, if it has one.
+ 7. Same rules as in step 4, but for ->deselect.
+ 8. M1 unlocks muxes on its parent.
+
+This means that accesses to D2 are lockout out for the full duration
+of the entire operation. But accesses to D3 are possibly interleaved
+at any point.
+
+
+Parent-locked muxes
+-------------------
+
+Parent-locked muxes lock the parent adapter during the full select-
+transfer-deselect transaction. The implication is that the mux driver
+has to ensure that any and all i2c transfers through that parent
+adapter during the transaction are unlocked i2c transfers (using e.g.
+__i2c_transfer), or a deadlock will follow. There are a couple of
+caveats.
+
+==== ====================================================================
+PL1. If you build a topology with a parent-locked mux being the child
+     of another mux, this might break a possible assumption from the
+     child mux that the root adapter is unused between its select op
+     and the actual transfer (e.g. if the child mux is auto-closing
+     and the parent mux issus i2c-transfers as part of its select).
+     This is especially the case if the parent mux is mux-locked, but
+     it may also happen if the parent mux is parent-locked.
+
+PL2. If select/deselect calls out to other subsystems such as gpio,
+     pinctrl, regmap or iio, it is essential that any i2c transfers
+     caused by these subsystems are unlocked. This can be convoluted to
+     accomplish, maybe even impossible if an acceptably clean solution
+     is sought.
+==== ====================================================================
+
+
+Parent-locked Example
+---------------------
+
+::
+
+                   .----------.     .--------.
+    .--------.     |  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  .--------.
+                |  '----------'  '--| dev D2 |
+                |  .--------.       '--------'
+                '--| dev D3 |
+                   '--------'
+
+When there is an access to D1, this happens:
+
+ 1.  Someone issues an i2c-transfer to D1.
+ 2.  M1 locks muxes on its parent (the root adapter in this case).
+ 3.  M1 locks its parent adapter.
+ 4.  M1 calls ->select to ready the mux.
+ 5.  If M1 does any i2c-transfers (on this root adapter) as part of
+     its select, those transfers must be unlocked i2c-transfers so
+     that they do not deadlock the root adapter.
+ 6.  M1 feeds the i2c-transfer from step 1 to the root adapter as an
+     unlocked i2c-transfer, so that it does not deadlock the parent
+     adapter.
+ 7.  M1 calls ->deselect, if it has one.
+ 8.  Same rules as in step 5, but for ->deselect.
+ 9.  M1 unlocks its parent adapter.
+ 10. M1 unlocks muxes on its parent.
+
+
+This means that accesses to both D2 and D3 are locked out for the full
+duration of the entire operation.
+
+
+Complex Examples
+================
+
+Parent-locked mux as parent of parent-locked mux
+------------------------------------------------
+
+This is a useful topology, but it can be bad::
+
+                   .----------.     .----------.     .--------.
+    .--------.     |  parent- |-----|  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When any device is accessed, all other devices are locked out for
+the full duration of the operation (both muxes lock their parent,
+and specifically when M2 requests its parent to lock, M1 passes
+the buck to the root adapter).
+
+This topology is bad if M2 is an auto-closing mux and M1->select
+issues any unlocked i2c transfers on the root adapter that may leak
+through and be seen by the M2 adapter, thus closing M2 prematurely.
+
+
+Mux-locked mux as parent of mux-locked mux
+------------------------------------------
+
+This is a good topology::
+
+                   .----------.     .----------.     .--------.
+    .--------.     |   mux-   |-----|   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When device D1 is accessed, accesses to D2 are locked out for the
+full duration of the operation (muxes on the top child adapter of M1
+are locked). But accesses to D3 and D4 are possibly interleaved at
+any point. Accesses to D3 locks out D1 and D2, but accesses to D4
+are still possibly interleaved.
+
+
+Mux-locked mux as parent of parent-locked mux
+---------------------------------------------
+
+This is probably a bad topology::
+
+                   .----------.     .----------.     .--------.
+    .--------.     |   mux-   |-----|  parent- |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When device D1 is accessed, accesses to D2 and D3 are locked out
+for the full duration of the operation (M1 locks child muxes on the
+root adapter). But accesses to D4 are possibly interleaved at any
+point.
+
+This kind of topology is generally not suitable and should probably
+be avoided. The reason is that M2 probably assumes that there will
+be no i2c transfers during its calls to ->select and ->deselect, and
+if there are, any such transfers might appear on the slave side of M2
+as partial i2c transfers, i.e. garbage or worse. This might cause
+device lockups and/or other problems.
+
+The topology is especially troublesome if M2 is an auto-closing
+mux. In that case, any interleaved accesses to D4 might close M2
+prematurely, as might any i2c-transfers part of M1->select.
+
+But if M2 is not making the above stated assumption, and if M2 is not
+auto-closing, the topology is fine.
+
+
+Parent-locked mux as parent of mux-locked mux
+---------------------------------------------
+
+This is a good topology::
+
+                   .----------.     .----------.     .--------.
+    .--------.     |  parent- |-----|   mux-   |-----| dev D1 |
+    |  root  |--+--|  locked  |     |  locked  |     '--------'
+    '--------'  |  |  mux M1  |--.  |  mux M2  |--.  .--------.
+                |  '----------'  |  '----------'  '--| dev D2 |
+                |  .--------.    |  .--------.       '--------'
+                '--| dev D4 |    '--| dev D3 |
+                   '--------'       '--------'
+
+When D1 is accessed, accesses to D2 are locked out for the full
+duration of the operation (muxes on the top child adapter of M1
+are locked). Accesses to D3 and D4 are possibly interleaved at
+any point, just as is expected for mux-locked muxes.
+
+When D3 or D4 are accessed, everything else is locked out. For D3
+accesses, M1 locks the root adapter. For D4 accesses, the root
+adapter is locked directly.
+
+
+Two mux-locked sibling muxes
+----------------------------
+
+This is a good topology::
+
+                                    .--------.
+                   .----------.  .--| dev D1 |
+                   |   mux-   |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |   mux-   |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When D1 is accessed, accesses to D2, D3 and D4 are locked out. But
+accesses to D5 may be interleaved at any time.
+
+
+Two parent-locked sibling muxes
+-------------------------------
+
+This is a good topology::
+
+                                    .--------.
+                   .----------.  .--| dev D1 |
+                   |  parent- |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |  parent- |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When any device is accessed, accesses to all other devices are locked
+out.
+
+
+Mux-locked and parent-locked sibling muxes
+------------------------------------------
+
+This is a good topology::
+
+                                    .--------.
+                   .----------.  .--| dev D1 |
+                   |   mux-   |--'  '--------'
+                .--|  locked  |     .--------.
+                |  |  mux M1  |-----| dev D2 |
+                |  '----------'     '--------'
+                |  .----------.     .--------.
+    .--------.  |  |  parent- |-----| dev D3 |
+    |  root  |--+--|  locked  |     '--------'
+    '--------'  |  |  mux M2  |--.  .--------.
+                |  '----------'  '--| dev D4 |
+                |  .--------.       '--------'
+                '--| dev D5 |
+                   '--------'
+
+When D1 or D2 are accessed, accesses to D3 and D4 are locked out while
+accesses to D5 may interleave. When D3 or D4 are accessed, accesses to
+all other devices are locked out.
diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst
new file mode 100644
index 000000000000..cd8d020f7ac5
--- /dev/null
+++ b/Documentation/i2c/index.rst
@@ -0,0 +1,37 @@
+. SPDX-License-Identifier: GPL-2.0
+
+===================
+I2C/SMBus Subsystem
+===================
+
+.. toctree::
+   :maxdepth: 1
+
+   dev-interface
+   dma-considerations
+   fault-codes
+   functionality
+   gpio-fault-injection
+   i2c-protocol
+   i2c-stub
+   i2c-topology
+   instantiating-devices
+   old-module-parameters
+   slave-eeprom-backend
+   slave-interface
+   smbus-protocol
+   summary
+   ten-bit-addresses
+   upgrading-clients
+   writing-clients
+
+   muxes/i2c-mux-gpio
+
+   busses/index
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
deleted file mode 100644
index 345e9ea8281a..000000000000
--- a/Documentation/i2c/instantiating-devices
+++ /dev/null
@@ -1,248 +0,0 @@
-How to instantiate I2C devices
-==============================
-
-Unlike PCI or USB devices, I2C devices are not enumerated at the hardware
-level. Instead, the software must know which devices are connected on each
-I2C bus segment, and what address these devices are using. For this
-reason, the kernel code must instantiate I2C devices explicitly. There are
-several ways to achieve this, depending on the context and requirements.
-
-
-Method 1a: Declare the I2C devices by bus number
-------------------------------------------------
-
-This method is appropriate when the I2C bus is a system bus as is the case
-for many embedded systems. On such systems, each I2C bus has a number
-which is known in advance. It is thus possible to pre-declare the I2C
-devices which live on this bus. This is done with an array of struct
-i2c_board_info which is registered by calling i2c_register_board_info().
-
-Example (from omap2 h4):
-
-static struct i2c_board_info h4_i2c_board_info[] __initdata = {
-	{
-		I2C_BOARD_INFO("isp1301_omap", 0x2d),
-		.irq		= OMAP_GPIO_IRQ(125),
-	},
-	{	/* EEPROM on mainboard */
-		I2C_BOARD_INFO("24c01", 0x52),
-		.platform_data	= &m24c01,
-	},
-	{	/* EEPROM on cpu card */
-		I2C_BOARD_INFO("24c01", 0x57),
-		.platform_data	= &m24c01,
-	},
-};
-
-static void __init omap_h4_init(void)
-{
-	(...)
-	i2c_register_board_info(1, h4_i2c_board_info,
-			ARRAY_SIZE(h4_i2c_board_info));
-	(...)
-}
-
-The above code declares 3 devices on I2C bus 1, including their respective
-addresses and custom data needed by their drivers. When the I2C bus in
-question is registered, the I2C devices will be instantiated automatically
-by i2c-core.
-
-The devices will be automatically unbound and destroyed when the I2C bus
-they sit on goes away (if ever.)
-
-
-Method 1b: Declare the I2C devices via devicetree
--------------------------------------------------
-
-This method has the same implications as method 1a. The declaration of I2C
-devices is here done via devicetree as subnodes of the master controller.
-
-Example:
-
-	i2c1: i2c@400a0000 {
-		/* ... master properties skipped ... */
-		clock-frequency = <100000>;
-
-		flash@50 {
-			compatible = "atmel,24c256";
-			reg = <0x50>;
-		};
-
-		pca9532: gpio@60 {
-			compatible = "nxp,pca9532";
-			gpio-controller;
-			#gpio-cells = <2>;
-			reg = <0x60>;
-		};
-	};
-
-Here, two devices are attached to the bus using a speed of 100kHz. For
-additional properties which might be needed to set up the device, please refer
-to its devicetree documentation in Documentation/devicetree/bindings/.
-
-
-Method 1c: Declare the I2C devices via ACPI
--------------------------------------------
-
-ACPI can also describe I2C devices. There is special documentation for this
-which is currently located at Documentation/firmware-guide/acpi/enumeration.rst.
-
-
-Method 2: Instantiate the devices explicitly
---------------------------------------------
-
-This method is appropriate when a larger device uses an I2C bus for
-internal communication. A typical case is TV adapters. These can have a
-tuner, a video decoder, an audio decoder, etc. usually connected to the
-main chip by the means of an I2C bus. You won't know the number of the I2C
-bus in advance, so the method 1 described above can't be used. Instead,
-you can instantiate your I2C devices explicitly. This is done by filling
-a struct i2c_board_info and calling i2c_new_device().
-
-Example (from the sfe4001 network driver):
-
-static struct i2c_board_info sfe4001_hwmon_info = {
-	I2C_BOARD_INFO("max6647", 0x4e),
-};
-
-int sfe4001_init(struct efx_nic *efx)
-{
-	(...)
-	efx->board_info.hwmon_client =
-		i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info);
-
-	(...)
-}
-
-The above code instantiates 1 I2C device on the I2C bus which is on the
-network adapter in question.
-
-A variant of this is when you don't know for sure if an I2C device is
-present or not (for example for an optional feature which is not present
-on cheap variants of a board but you have no way to tell them apart), or
-it may have different addresses from one board to the next (manufacturer
-changing its design without notice). In this case, you can call
-i2c_new_probed_device() instead of i2c_new_device().
-
-Example (from the nxp OHCI driver):
-
-static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
-
-static int usb_hcd_nxp_probe(struct platform_device *pdev)
-{
-	(...)
-	struct i2c_adapter *i2c_adap;
-	struct i2c_board_info i2c_info;
-
-	(...)
-	i2c_adap = i2c_get_adapter(2);
-	memset(&i2c_info, 0, sizeof(struct i2c_board_info));
-	strscpy(i2c_info.type, "isp1301_nxp", sizeof(i2c_info.type));
-	isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
-						   normal_i2c, NULL);
-	i2c_put_adapter(i2c_adap);
-	(...)
-}
-
-The above code instantiates up to 1 I2C device on the I2C bus which is on
-the OHCI adapter in question. It first tries at address 0x2c, if nothing
-is found there it tries address 0x2d, and if still nothing is found, it
-simply gives up.
-
-The driver which instantiated the I2C device is responsible for destroying
-it on cleanup. This is done by calling i2c_unregister_device() on the
-pointer that was earlier returned by i2c_new_device() or
-i2c_new_probed_device().
-
-
-Method 3: Probe an I2C bus for certain devices
-----------------------------------------------
-
-Sometimes you do not have enough information about an I2C device, not even
-to call i2c_new_probed_device(). The typical case is hardware monitoring
-chips on PC mainboards. There are several dozen models, which can live
-at 25 different addresses. Given the huge number of mainboards out there,
-it is next to impossible to build an exhaustive list of the hardware
-monitoring chips being used. Fortunately, most of these chips have
-manufacturer and device ID registers, so they can be identified by
-probing.
-
-In that case, I2C devices are neither declared nor instantiated
-explicitly. Instead, i2c-core will probe for such devices as soon as their
-drivers are loaded, and if any is found, an I2C device will be
-instantiated automatically. In order to prevent any misbehavior of this
-mechanism, the following restrictions apply:
-* The I2C device driver must implement the detect() method, which
-  identifies a supported device by reading from arbitrary registers.
-* Only buses which are likely to have a supported device and agree to be
-  probed, will be probed. For example this avoids probing for hardware
-  monitoring chips on a TV adapter.
-
-Example:
-See lm90_driver and lm90_detect() in drivers/hwmon/lm90.c
-
-I2C devices instantiated as a result of such a successful probe will be
-destroyed automatically when the driver which detected them is removed,
-or when the underlying I2C bus is itself destroyed, whichever happens
-first.
-
-Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6
-kernels will find out that this method 3 is essentially similar to what
-was done there. Two significant differences are:
-* Probing is only one way to instantiate I2C devices now, while it was the
-  only way back then. Where possible, methods 1 and 2 should be preferred.
-  Method 3 should only be used when there is no other way, as it can have
-  undesirable side effects.
-* I2C buses must now explicitly say which I2C driver classes can probe
-  them (by the means of the class bitfield), while all I2C buses were
-  probed by default back then. The default is an empty class which means
-  that no probing happens. The purpose of the class bitfield is to limit
-  the aforementioned undesirable side effects.
-
-Once again, method 3 should be avoided wherever possible. Explicit device
-instantiation (methods 1 and 2) is much preferred for it is safer and
-faster.
-
-
-Method 4: Instantiate from user-space
--------------------------------------
-
-In general, the kernel should know which I2C devices are connected and
-what addresses they live at. However, in certain cases, it does not, so a
-sysfs interface was added to let the user provide the information. This
-interface is made of 2 attribute files which are created in every I2C bus
-directory: new_device and delete_device. Both files are write only and you
-must write the right parameters to them in order to properly instantiate,
-respectively delete, an I2C device.
-
-File new_device takes 2 parameters: the name of the I2C device (a string)
-and the address of the I2C device (a number, typically expressed in
-hexadecimal starting with 0x, but can also be expressed in decimal.)
-
-File delete_device takes a single parameter: the address of the I2C
-device. As no two devices can live at the same address on a given I2C
-segment, the address is sufficient to uniquely identify the device to be
-deleted.
-
-Example:
-# echo eeprom 0x50 > /sys/bus/i2c/devices/i2c-3/new_device
-
-While this interface should only be used when in-kernel device declaration
-can't be done, there is a variety of cases where it can be helpful:
-* The I2C driver usually detects devices (method 3 above) but the bus
-  segment your device lives on doesn't have the proper class bit set and
-  thus detection doesn't trigger.
-* The I2C driver usually detects devices, but your device lives at an
-  unexpected address.
-* The I2C driver usually detects devices, but your device is not detected,
-  either because the detection routine is too strict, or because your
-  device is not officially supported yet but you know it is compatible.
-* You are developing a driver on a test board, where you soldered the I2C
-  device yourself.
-
-This interface is a replacement for the force_* module parameters some I2C
-drivers implement. Being implemented in i2c-core rather than in each
-device driver individually, it is much more efficient, and also has the
-advantage that you do not have to reload the driver to change a setting.
-You can also instantiate the device before the driver is loaded or even
-available, and you don't need to know what driver the device needs.
diff --git a/Documentation/i2c/instantiating-devices.rst b/Documentation/i2c/instantiating-devices.rst
new file mode 100644
index 000000000000..1238f1fa3382
--- /dev/null
+++ b/Documentation/i2c/instantiating-devices.rst
@@ -0,0 +1,253 @@
+==============================
+How to instantiate I2C devices
+==============================
+
+Unlike PCI or USB devices, I2C devices are not enumerated at the hardware
+level. Instead, the software must know which devices are connected on each
+I2C bus segment, and what address these devices are using. For this
+reason, the kernel code must instantiate I2C devices explicitly. There are
+several ways to achieve this, depending on the context and requirements.
+
+
+Method 1a: Declare the I2C devices by bus number
+------------------------------------------------
+
+This method is appropriate when the I2C bus is a system bus as is the case
+for many embedded systems. On such systems, each I2C bus has a number
+which is known in advance. It is thus possible to pre-declare the I2C
+devices which live on this bus. This is done with an array of struct
+i2c_board_info which is registered by calling i2c_register_board_info().
+
+Example (from omap2 h4)::
+
+  static struct i2c_board_info h4_i2c_board_info[] __initdata = {
+	{
+		I2C_BOARD_INFO("isp1301_omap", 0x2d),
+		.irq		= OMAP_GPIO_IRQ(125),
+	},
+	{	/* EEPROM on mainboard */
+		I2C_BOARD_INFO("24c01", 0x52),
+		.platform_data	= &m24c01,
+	},
+	{	/* EEPROM on cpu card */
+		I2C_BOARD_INFO("24c01", 0x57),
+		.platform_data	= &m24c01,
+	},
+  };
+
+  static void __init omap_h4_init(void)
+  {
+	(...)
+	i2c_register_board_info(1, h4_i2c_board_info,
+			ARRAY_SIZE(h4_i2c_board_info));
+	(...)
+  }
+
+The above code declares 3 devices on I2C bus 1, including their respective
+addresses and custom data needed by their drivers. When the I2C bus in
+question is registered, the I2C devices will be instantiated automatically
+by i2c-core.
+
+The devices will be automatically unbound and destroyed when the I2C bus
+they sit on goes away (if ever.)
+
+
+Method 1b: Declare the I2C devices via devicetree
+-------------------------------------------------
+
+This method has the same implications as method 1a. The declaration of I2C
+devices is here done via devicetree as subnodes of the master controller.
+
+Example::
+
+	i2c1: i2c@400a0000 {
+		/* ... master properties skipped ... */
+		clock-frequency = <100000>;
+
+		flash@50 {
+			compatible = "atmel,24c256";
+			reg = <0x50>;
+		};
+
+		pca9532: gpio@60 {
+			compatible = "nxp,pca9532";
+			gpio-controller;
+			#gpio-cells = <2>;
+			reg = <0x60>;
+		};
+	};
+
+Here, two devices are attached to the bus using a speed of 100kHz. For
+additional properties which might be needed to set up the device, please refer
+to its devicetree documentation in Documentation/devicetree/bindings/.
+
+
+Method 1c: Declare the I2C devices via ACPI
+-------------------------------------------
+
+ACPI can also describe I2C devices. There is special documentation for this
+which is currently located at Documentation/firmware-guide/acpi/enumeration.rst.
+
+
+Method 2: Instantiate the devices explicitly
+--------------------------------------------
+
+This method is appropriate when a larger device uses an I2C bus for
+internal communication. A typical case is TV adapters. These can have a
+tuner, a video decoder, an audio decoder, etc. usually connected to the
+main chip by the means of an I2C bus. You won't know the number of the I2C
+bus in advance, so the method 1 described above can't be used. Instead,
+you can instantiate your I2C devices explicitly. This is done by filling
+a struct i2c_board_info and calling i2c_new_device().
+
+Example (from the sfe4001 network driver)::
+
+  static struct i2c_board_info sfe4001_hwmon_info = {
+	I2C_BOARD_INFO("max6647", 0x4e),
+  };
+
+  int sfe4001_init(struct efx_nic *efx)
+  {
+	(...)
+	efx->board_info.hwmon_client =
+		i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info);
+
+	(...)
+  }
+
+The above code instantiates 1 I2C device on the I2C bus which is on the
+network adapter in question.
+
+A variant of this is when you don't know for sure if an I2C device is
+present or not (for example for an optional feature which is not present
+on cheap variants of a board but you have no way to tell them apart), or
+it may have different addresses from one board to the next (manufacturer
+changing its design without notice). In this case, you can call
+i2c_new_probed_device() instead of i2c_new_device().
+
+Example (from the nxp OHCI driver)::
+
+  static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
+
+  static int usb_hcd_nxp_probe(struct platform_device *pdev)
+  {
+	(...)
+	struct i2c_adapter *i2c_adap;
+	struct i2c_board_info i2c_info;
+
+	(...)
+	i2c_adap = i2c_get_adapter(2);
+	memset(&i2c_info, 0, sizeof(struct i2c_board_info));
+	strscpy(i2c_info.type, "isp1301_nxp", sizeof(i2c_info.type));
+	isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
+						   normal_i2c, NULL);
+	i2c_put_adapter(i2c_adap);
+	(...)
+  }
+
+The above code instantiates up to 1 I2C device on the I2C bus which is on
+the OHCI adapter in question. It first tries at address 0x2c, if nothing
+is found there it tries address 0x2d, and if still nothing is found, it
+simply gives up.
+
+The driver which instantiated the I2C device is responsible for destroying
+it on cleanup. This is done by calling i2c_unregister_device() on the
+pointer that was earlier returned by i2c_new_device() or
+i2c_new_probed_device().
+
+
+Method 3: Probe an I2C bus for certain devices
+----------------------------------------------
+
+Sometimes you do not have enough information about an I2C device, not even
+to call i2c_new_probed_device(). The typical case is hardware monitoring
+chips on PC mainboards. There are several dozen models, which can live
+at 25 different addresses. Given the huge number of mainboards out there,
+it is next to impossible to build an exhaustive list of the hardware
+monitoring chips being used. Fortunately, most of these chips have
+manufacturer and device ID registers, so they can be identified by
+probing.
+
+In that case, I2C devices are neither declared nor instantiated
+explicitly. Instead, i2c-core will probe for such devices as soon as their
+drivers are loaded, and if any is found, an I2C device will be
+instantiated automatically. In order to prevent any misbehavior of this
+mechanism, the following restrictions apply:
+
+* The I2C device driver must implement the detect() method, which
+  identifies a supported device by reading from arbitrary registers.
+* Only buses which are likely to have a supported device and agree to be
+  probed, will be probed. For example this avoids probing for hardware
+  monitoring chips on a TV adapter.
+
+Example:
+See lm90_driver and lm90_detect() in drivers/hwmon/lm90.c
+
+I2C devices instantiated as a result of such a successful probe will be
+destroyed automatically when the driver which detected them is removed,
+or when the underlying I2C bus is itself destroyed, whichever happens
+first.
+
+Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6
+kernels will find out that this method 3 is essentially similar to what
+was done there. Two significant differences are:
+
+* Probing is only one way to instantiate I2C devices now, while it was the
+  only way back then. Where possible, methods 1 and 2 should be preferred.
+  Method 3 should only be used when there is no other way, as it can have
+  undesirable side effects.
+* I2C buses must now explicitly say which I2C driver classes can probe
+  them (by the means of the class bitfield), while all I2C buses were
+  probed by default back then. The default is an empty class which means
+  that no probing happens. The purpose of the class bitfield is to limit
+  the aforementioned undesirable side effects.
+
+Once again, method 3 should be avoided wherever possible. Explicit device
+instantiation (methods 1 and 2) is much preferred for it is safer and
+faster.
+
+
+Method 4: Instantiate from user-space
+-------------------------------------
+
+In general, the kernel should know which I2C devices are connected and
+what addresses they live at. However, in certain cases, it does not, so a
+sysfs interface was added to let the user provide the information. This
+interface is made of 2 attribute files which are created in every I2C bus
+directory: new_device and delete_device. Both files are write only and you
+must write the right parameters to them in order to properly instantiate,
+respectively delete, an I2C device.
+
+File new_device takes 2 parameters: the name of the I2C device (a string)
+and the address of the I2C device (a number, typically expressed in
+hexadecimal starting with 0x, but can also be expressed in decimal.)
+
+File delete_device takes a single parameter: the address of the I2C
+device. As no two devices can live at the same address on a given I2C
+segment, the address is sufficient to uniquely identify the device to be
+deleted.
+
+Example::
+
+  # echo eeprom 0x50 > /sys/bus/i2c/devices/i2c-3/new_device
+
+While this interface should only be used when in-kernel device declaration
+can't be done, there is a variety of cases where it can be helpful:
+
+* The I2C driver usually detects devices (method 3 above) but the bus
+  segment your device lives on doesn't have the proper class bit set and
+  thus detection doesn't trigger.
+* The I2C driver usually detects devices, but your device lives at an
+  unexpected address.
+* The I2C driver usually detects devices, but your device is not detected,
+  either because the detection routine is too strict, or because your
+  device is not officially supported yet but you know it is compatible.
+* You are developing a driver on a test board, where you soldered the I2C
+  device yourself.
+
+This interface is a replacement for the force_* module parameters some I2C
+drivers implement. Being implemented in i2c-core rather than in each
+device driver individually, it is much more efficient, and also has the
+advantage that you do not have to reload the driver to change a setting.
+You can also instantiate the device before the driver is loaded or even
+available, and you don't need to know what driver the device needs.
diff --git a/Documentation/i2c/muxes/i2c-mux-gpio b/Documentation/i2c/muxes/i2c-mux-gpio
deleted file mode 100644
index 893ecdfe6e43..000000000000
--- a/Documentation/i2c/muxes/i2c-mux-gpio
+++ /dev/null
@@ -1,83 +0,0 @@
-Kernel driver i2c-mux-gpio
-
-Author: Peter Korsgaard <peter.korsgaard@barco.com>
-
-Description
------------
-
-i2c-mux-gpio is an i2c mux driver providing access to I2C bus segments
-from a master I2C bus and a hardware MUX controlled through GPIO pins.
-
-E.G.:
-
-  ----------              ----------  Bus segment 1   - - - - -
- |          | SCL/SDA    |          |-------------- |           |
- |          |------------|          |
- |          |            |          | Bus segment 2 |           |
- |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
- |          |------------|          |               |           |
- |          |            |          | Bus segment M
- |          |            |          |---------------|           |
-  ----------              ----------                  - - - - -
-
-SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
-according to the settings of the GPIO pins 1..N.
-
-Usage
------
-
-i2c-mux-gpio uses the platform bus, so you need to provide a struct
-platform_device with the platform_data pointing to a struct
-i2c_mux_gpio_platform_data with the I2C adapter number of the master
-bus, the number of bus segments to create and the GPIO pins used
-to control it. See include/linux/platform_data/i2c-mux-gpio.h for details.
-
-E.G. something like this for a MUX providing 4 bus segments
-controlled through 3 GPIO pins:
-
-#include <linux/platform_data/i2c-mux-gpio.h>
-#include <linux/platform_device.h>
-
-static const unsigned myboard_gpiomux_gpios[] = {
-	AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
-};
-
-static const unsigned myboard_gpiomux_values[] = {
-	0, 1, 2, 3
-};
-
-static struct i2c_mux_gpio_platform_data myboard_i2cmux_data = {
-	.parent		= 1,
-	.base_nr	= 2, /* optional */
-	.values		= myboard_gpiomux_values,
-	.n_values	= ARRAY_SIZE(myboard_gpiomux_values),
-	.gpios		= myboard_gpiomux_gpios,
-	.n_gpios	= ARRAY_SIZE(myboard_gpiomux_gpios),
-	.idle		= 4, /* optional */
-};
-
-static struct platform_device myboard_i2cmux = {
-	.name		= "i2c-mux-gpio",
-	.id		= 0,
-	.dev		= {
-		.platform_data	= &myboard_i2cmux_data,
-	},
-};
-
-If you don't know the absolute GPIO pin numbers at registration time,
-you can instead provide a chip name (.chip_name) and relative GPIO pin
-numbers, and the i2c-mux-gpio driver will do the work for you,
-including deferred probing if the GPIO chip isn't immediately
-available.
-
-Device Registration
--------------------
-
-When registering your i2c-mux-gpio device, you should pass the number
-of any GPIO pin it uses as the device ID. This guarantees that every
-instance has a different ID.
-
-Alternatively, if you don't need a stable device name, you can simply
-pass PLATFORM_DEVID_AUTO as the device ID, and the platform core will
-assign a dynamic ID to your device. If you do not know the absolute
-GPIO pin numbers at registration time, this is even the only option.
diff --git a/Documentation/i2c/muxes/i2c-mux-gpio.rst b/Documentation/i2c/muxes/i2c-mux-gpio.rst
new file mode 100644
index 000000000000..7d27444035c3
--- /dev/null
+++ b/Documentation/i2c/muxes/i2c-mux-gpio.rst
@@ -0,0 +1,85 @@
+==========================
+Kernel driver i2c-mux-gpio
+==========================
+
+Author: Peter Korsgaard <peter.korsgaard@barco.com>
+
+Description
+-----------
+
+i2c-mux-gpio is an i2c mux driver providing access to I2C bus segments
+from a master I2C bus and a hardware MUX controlled through GPIO pins.
+
+E.G.::
+
+  ----------              ----------  Bus segment 1   - - - - -
+ |          | SCL/SDA    |          |-------------- |           |
+ |          |------------|          |
+ |          |            |          | Bus segment 2 |           |
+ |  Linux   | GPIO 1..N  |   MUX    |---------------   Devices
+ |          |------------|          |               |           |
+ |          |            |          | Bus segment M
+ |          |            |          |---------------|           |
+  ----------              ----------                  - - - - -
+
+SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
+according to the settings of the GPIO pins 1..N.
+
+Usage
+-----
+
+i2c-mux-gpio uses the platform bus, so you need to provide a struct
+platform_device with the platform_data pointing to a struct
+i2c_mux_gpio_platform_data with the I2C adapter number of the master
+bus, the number of bus segments to create and the GPIO pins used
+to control it. See include/linux/platform_data/i2c-mux-gpio.h for details.
+
+E.G. something like this for a MUX providing 4 bus segments
+controlled through 3 GPIO pins::
+
+  #include <linux/platform_data/i2c-mux-gpio.h>
+  #include <linux/platform_device.h>
+
+  static const unsigned myboard_gpiomux_gpios[] = {
+	AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
+  };
+
+  static const unsigned myboard_gpiomux_values[] = {
+	0, 1, 2, 3
+  };
+
+  static struct i2c_mux_gpio_platform_data myboard_i2cmux_data = {
+	.parent		= 1,
+	.base_nr	= 2, /* optional */
+	.values		= myboard_gpiomux_values,
+	.n_values	= ARRAY_SIZE(myboard_gpiomux_values),
+	.gpios		= myboard_gpiomux_gpios,
+	.n_gpios	= ARRAY_SIZE(myboard_gpiomux_gpios),
+	.idle		= 4, /* optional */
+  };
+
+  static struct platform_device myboard_i2cmux = {
+	.name		= "i2c-mux-gpio",
+	.id		= 0,
+	.dev		= {
+		.platform_data	= &myboard_i2cmux_data,
+	},
+  };
+
+If you don't know the absolute GPIO pin numbers at registration time,
+you can instead provide a chip name (.chip_name) and relative GPIO pin
+numbers, and the i2c-mux-gpio driver will do the work for you,
+including deferred probing if the GPIO chip isn't immediately
+available.
+
+Device Registration
+-------------------
+
+When registering your i2c-mux-gpio device, you should pass the number
+of any GPIO pin it uses as the device ID. This guarantees that every
+instance has a different ID.
+
+Alternatively, if you don't need a stable device name, you can simply
+pass PLATFORM_DEVID_AUTO as the device ID, and the platform core will
+assign a dynamic ID to your device. If you do not know the absolute
+GPIO pin numbers at registration time, this is even the only option.
diff --git a/Documentation/i2c/old-module-parameters b/Documentation/i2c/old-module-parameters
deleted file mode 100644
index 8e2b629d533c..000000000000
--- a/Documentation/i2c/old-module-parameters
+++ /dev/null
@@ -1,44 +0,0 @@
-I2C device driver binding control from user-space
-=================================================
-
-Up to kernel 2.6.32, many i2c drivers used helper macros provided by
-<linux/i2c.h> which created standard module parameters to let the user
-control how the driver would probe i2c buses and attach to devices. These
-parameters were known as "probe" (to let the driver probe for an extra
-address), "force" (to forcibly attach the driver to a given device) and
-"ignore" (to prevent a driver from probing a given address).
-
-With the conversion of the i2c subsystem to the standard device driver
-binding model, it became clear that these per-module parameters were no
-longer needed, and that a centralized implementation was possible. The new,
-sysfs-based interface is described in the documentation file
-"instantiating-devices", section "Method 4: Instantiate from user-space".
-
-Below is a mapping from the old module parameters to the new interface.
-
-Attaching a driver to an I2C device
------------------------------------
-
-Old method (module parameters):
-# modprobe <driver> probe=1,0x2d
-# modprobe <driver> force=1,0x2d
-# modprobe <driver> force_<device>=1,0x2d
-
-New method (sysfs interface):
-# echo <device> 0x2d > /sys/bus/i2c/devices/i2c-1/new_device
-
-Preventing a driver from attaching to an I2C device
----------------------------------------------------
-
-Old method (module parameters):
-# modprobe <driver> ignore=1,0x2f
-
-New method (sysfs interface):
-# echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device
-# modprobe <driver>
-
-Of course, it is important to instantiate the "dummy" device before loading
-the driver. The dummy device will be handled by i2c-core itself, preventing
-other drivers from binding to it later on. If there is a real device at the
-problematic address, and you want another driver to bind to it, then simply
-pass the name of the device in question instead of "dummy".
diff --git a/Documentation/i2c/old-module-parameters.rst b/Documentation/i2c/old-module-parameters.rst
new file mode 100644
index 000000000000..a1939512ad66
--- /dev/null
+++ b/Documentation/i2c/old-module-parameters.rst
@@ -0,0 +1,49 @@
+=================================================
+I2C device driver binding control from user-space
+=================================================
+
+Up to kernel 2.6.32, many i2c drivers used helper macros provided by
+<linux/i2c.h> which created standard module parameters to let the user
+control how the driver would probe i2c buses and attach to devices. These
+parameters were known as "probe" (to let the driver probe for an extra
+address), "force" (to forcibly attach the driver to a given device) and
+"ignore" (to prevent a driver from probing a given address).
+
+With the conversion of the i2c subsystem to the standard device driver
+binding model, it became clear that these per-module parameters were no
+longer needed, and that a centralized implementation was possible. The new,
+sysfs-based interface is described in the documentation file
+"instantiating-devices", section "Method 4: Instantiate from user-space".
+
+Below is a mapping from the old module parameters to the new interface.
+
+Attaching a driver to an I2C device
+-----------------------------------
+
+Old method (module parameters)::
+
+  # modprobe <driver> probe=1,0x2d
+  # modprobe <driver> force=1,0x2d
+  # modprobe <driver> force_<device>=1,0x2d
+
+New method (sysfs interface)::
+
+  # echo <device> 0x2d > /sys/bus/i2c/devices/i2c-1/new_device
+
+Preventing a driver from attaching to an I2C device
+---------------------------------------------------
+
+Old method (module parameters)::
+
+  # modprobe <driver> ignore=1,0x2f
+
+New method (sysfs interface)::
+
+  # echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device
+  # modprobe <driver>
+
+Of course, it is important to instantiate the "dummy" device before loading
+the driver. The dummy device will be handled by i2c-core itself, preventing
+other drivers from binding to it later on. If there is a real device at the
+problematic address, and you want another driver to bind to it, then simply
+pass the name of the device in question instead of "dummy".
diff --git a/Documentation/i2c/slave-eeprom-backend b/Documentation/i2c/slave-eeprom-backend
deleted file mode 100644
index 04f8d8a9b817..000000000000
--- a/Documentation/i2c/slave-eeprom-backend
+++ /dev/null
@@ -1,14 +0,0 @@
-Linux I2C slave eeprom backend
-==============================
-
-by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
-
-This is a proof-of-concept backend which acts like an EEPROM on the connected
-I2C bus. The memory contents can be modified from userspace via this file
-located in sysfs:
-
-	/sys/bus/i2c/devices/<device-directory>/slave-eeprom
-
-As of 2015, Linux doesn't support poll on binary sysfs files, so there is no
-notification when another master changed the content.
-
diff --git a/Documentation/i2c/slave-eeprom-backend.rst b/Documentation/i2c/slave-eeprom-backend.rst
new file mode 100644
index 000000000000..0b8cd83698e0
--- /dev/null
+++ b/Documentation/i2c/slave-eeprom-backend.rst
@@ -0,0 +1,14 @@
+==============================
+Linux I2C slave eeprom backend
+==============================
+
+by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
+
+This is a proof-of-concept backend which acts like an EEPROM on the connected
+I2C bus. The memory contents can be modified from userspace via this file
+located in sysfs::
+
+	/sys/bus/i2c/devices/<device-directory>/slave-eeprom
+
+As of 2015, Linux doesn't support poll on binary sysfs files, so there is no
+notification when another master changed the content.
diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface
deleted file mode 100644
index 7e2a228f21bc..000000000000
--- a/Documentation/i2c/slave-interface
+++ /dev/null
@@ -1,193 +0,0 @@
-Linux I2C slave interface description
-=====================================
-
-by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
-
-Linux can also be an I2C slave if the I2C controller in use has slave
-functionality. For that to work, one needs slave support in the bus driver plus
-a hardware independent software backend providing the actual functionality. An
-example for the latter is the slave-eeprom driver, which acts as a dual memory
-driver. While another I2C master on the bus can access it like a regular
-EEPROM, the Linux I2C slave can access the content via sysfs and handle data as
-needed. The backend driver and the I2C bus driver communicate via events. Here
-is a small graph visualizing the data flow and the means by which data is
-transported. The dotted line marks only one example. The backend could also
-use a character device, be in-kernel only, or something completely different:
-
-
-              e.g. sysfs        I2C slave events        I/O registers
-  +-----------+   v    +---------+     v     +--------+  v  +------------+
-  | Userspace +........+ Backend +-----------+ Driver +-----+ Controller |
-  +-----------+        +---------+           +--------+     +------------+
-                                                                | |
-  ----------------------------------------------------------------+--  I2C
-  --------------------------------------------------------------+----  Bus
-
-Note: Technically, there is also the I2C core between the backend and the
-driver. However, at this time of writing, the layer is transparent.
-
-
-User manual
-===========
-
-I2C slave backends behave like standard I2C clients. So, you can instantiate
-them as described in the document 'instantiating-devices'. The only difference
-is that i2c slave backends have their own address space. So, you have to add
-0x1000 to the address you would originally request. An example for
-instantiating the slave-eeprom driver from userspace at the 7 bit address 0x64
-on bus 1:
-
-  # echo slave-24c02 0x1064 > /sys/bus/i2c/devices/i2c-1/new_device
-
-Each backend should come with separate documentation to describe its specific
-behaviour and setup.
-
-
-Developer manual
-================
-
-First, the events which are used by the bus driver and the backend will be
-described in detail. After that, some implementation hints for extending bus
-drivers and writing backends will be given.
-
-
-I2C slave events
-----------------
-
-The bus driver sends an event to the backend using the following function:
-
-	ret = i2c_slave_event(client, event, &val)
-
-'client' describes the i2c slave device. 'event' is one of the special event
-types described hereafter. 'val' holds an u8 value for the data byte to be
-read/written and is thus bidirectional. The pointer to val must always be
-provided even if val is not used for an event, i.e. don't use NULL here. 'ret'
-is the return value from the backend. Mandatory events must be provided by the
-bus drivers and must be checked for by backend drivers.
-
-Event types:
-
-* I2C_SLAVE_WRITE_REQUESTED (mandatory)
-
-'val': unused
-'ret': always 0
-
-Another I2C master wants to write data to us. This event should be sent once
-our own address and the write bit was detected. The data did not arrive yet, so
-there is nothing to process or return. Wakeup or initialization probably needs
-to be done, though.
-
-* I2C_SLAVE_READ_REQUESTED (mandatory)
-
-'val': backend returns first byte to be sent
-'ret': always 0
-
-Another I2C master wants to read data from us. This event should be sent once
-our own address and the read bit was detected. After returning, the bus driver
-should transmit the first byte.
-
-* I2C_SLAVE_WRITE_RECEIVED (mandatory)
-
-'val': bus driver delivers received byte
-'ret': 0 if the byte should be acked, some errno if the byte should be nacked
-
-Another I2C master has sent a byte to us which needs to be set in 'val'. If 'ret'
-is zero, the bus driver should ack this byte. If 'ret' is an errno, then the byte
-should be nacked.
-
-* I2C_SLAVE_READ_PROCESSED (mandatory)
-
-'val': backend returns next byte to be sent
-'ret': always 0
-
-The bus driver requests the next byte to be sent to another I2C master in
-'val'. Important: This does not mean that the previous byte has been acked, it
-only means that the previous byte is shifted out to the bus! To ensure seamless
-transmission, most hardware requests the next byte when the previous one is
-still shifted out. If the master sends NACK and stops reading after the byte
-currently shifted out, this byte requested here is never used. It very likely
-needs to be sent again on the next I2C_SLAVE_READ_REQUEST, depending a bit on
-your backend, though.
-
-* I2C_SLAVE_STOP (mandatory)
-
-'val': unused
-'ret': always 0
-
-A stop condition was received. This can happen anytime and the backend should
-reset its state machine for I2C transfers to be able to receive new requests.
-
-
-Software backends
------------------
-
-If you want to write a software backend:
-
-* use a standard i2c_driver and its matching mechanisms
-* write the slave_callback which handles the above slave events
-  (best using a state machine)
-* register this callback via i2c_slave_register()
-
-Check the i2c-slave-eeprom driver as an example.
-
-
-Bus driver support
-------------------
-
-If you want to add slave support to the bus driver:
-
-* implement calls to register/unregister the slave and add those to the
-  struct i2c_algorithm. When registering, you probably need to set the i2c
-  slave address and enable slave specific interrupts. If you use runtime pm, you
-  should use pm_runtime_get_sync() because your device usually needs to be
-  powered on always to be able to detect its slave address. When unregistering,
-  do the inverse of the above.
-
-* Catch the slave interrupts and send appropriate i2c_slave_events to the backend.
-
-Note that most hardware supports being master _and_ slave on the same bus. So,
-if you extend a bus driver, please make sure that the driver supports that as
-well. In almost all cases, slave support does not need to disable the master
-functionality.
-
-Check the i2c-rcar driver as an example.
-
-
-About ACK/NACK
---------------
-
-It is good behaviour to always ACK the address phase, so the master knows if a
-device is basically present or if it mysteriously disappeared. Using NACK to
-state being busy is troublesome. SMBus demands to always ACK the address phase,
-while the I2C specification is more loose on that. Most I2C controllers also
-automatically ACK when detecting their slave addresses, so there is no option
-to NACK them. For those reasons, this API does not support NACK in the address
-phase.
-
-Currently, there is no slave event to report if the master did ACK or NACK a
-byte when it reads from us. We could make this an optional event if the need
-arises. However, cases should be extremely rare because the master is expected
-to send STOP after that and we have an event for that. Also, keep in mind not
-all I2C controllers have the possibility to report that event.
-
-
-About buffers
--------------
-
-During development of this API, the question of using buffers instead of just
-bytes came up. Such an extension might be possible, usefulness is unclear at
-this time of writing. Some points to keep in mind when using buffers:
-
-* Buffers should be opt-in and backend drivers will always have to support
-  byte-based transactions as the ultimate fallback anyhow because this is how
-  the majority of HW works.
-
-* For backends simulating hardware registers, buffers are largely not helpful
-  because after each byte written an action should be immediately triggered.
-  For reads, the data kept in the buffer might get stale if the backend just
-  updated a register because of internal processing.
-
-* A master can send STOP at any time. For partially transferred buffers, this
-  means additional code to handle this exception. Such code tends to be
-  error-prone.
-
diff --git a/Documentation/i2c/slave-interface.rst b/Documentation/i2c/slave-interface.rst
new file mode 100644
index 000000000000..c769bd6a15bf
--- /dev/null
+++ b/Documentation/i2c/slave-interface.rst
@@ -0,0 +1,198 @@
+=====================================
+Linux I2C slave interface description
+=====================================
+
+by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
+
+Linux can also be an I2C slave if the I2C controller in use has slave
+functionality. For that to work, one needs slave support in the bus driver plus
+a hardware independent software backend providing the actual functionality. An
+example for the latter is the slave-eeprom driver, which acts as a dual memory
+driver. While another I2C master on the bus can access it like a regular
+EEPROM, the Linux I2C slave can access the content via sysfs and handle data as
+needed. The backend driver and the I2C bus driver communicate via events. Here
+is a small graph visualizing the data flow and the means by which data is
+transported. The dotted line marks only one example. The backend could also
+use a character device, be in-kernel only, or something completely different::
+
+
+              e.g. sysfs        I2C slave events        I/O registers
+  +-----------+   v    +---------+     v     +--------+  v  +------------+
+  | Userspace +........+ Backend +-----------+ Driver +-----+ Controller |
+  +-----------+        +---------+           +--------+     +------------+
+                                                                | |
+  ----------------------------------------------------------------+--  I2C
+  --------------------------------------------------------------+----  Bus
+
+Note: Technically, there is also the I2C core between the backend and the
+driver. However, at this time of writing, the layer is transparent.
+
+
+User manual
+===========
+
+I2C slave backends behave like standard I2C clients. So, you can instantiate
+them as described in the document 'instantiating-devices'. The only difference
+is that i2c slave backends have their own address space. So, you have to add
+0x1000 to the address you would originally request. An example for
+instantiating the slave-eeprom driver from userspace at the 7 bit address 0x64
+on bus 1::
+
+  # echo slave-24c02 0x1064 > /sys/bus/i2c/devices/i2c-1/new_device
+
+Each backend should come with separate documentation to describe its specific
+behaviour and setup.
+
+
+Developer manual
+================
+
+First, the events which are used by the bus driver and the backend will be
+described in detail. After that, some implementation hints for extending bus
+drivers and writing backends will be given.
+
+
+I2C slave events
+----------------
+
+The bus driver sends an event to the backend using the following function::
+
+	ret = i2c_slave_event(client, event, &val)
+
+'client' describes the i2c slave device. 'event' is one of the special event
+types described hereafter. 'val' holds an u8 value for the data byte to be
+read/written and is thus bidirectional. The pointer to val must always be
+provided even if val is not used for an event, i.e. don't use NULL here. 'ret'
+is the return value from the backend. Mandatory events must be provided by the
+bus drivers and must be checked for by backend drivers.
+
+Event types:
+
+* I2C_SLAVE_WRITE_REQUESTED (mandatory)
+
+  'val': unused
+
+  'ret': always 0
+
+Another I2C master wants to write data to us. This event should be sent once
+our own address and the write bit was detected. The data did not arrive yet, so
+there is nothing to process or return. Wakeup or initialization probably needs
+to be done, though.
+
+* I2C_SLAVE_READ_REQUESTED (mandatory)
+
+  'val': backend returns first byte to be sent
+
+  'ret': always 0
+
+Another I2C master wants to read data from us. This event should be sent once
+our own address and the read bit was detected. After returning, the bus driver
+should transmit the first byte.
+
+* I2C_SLAVE_WRITE_RECEIVED (mandatory)
+
+  'val': bus driver delivers received byte
+
+  'ret': 0 if the byte should be acked, some errno if the byte should be nacked
+
+Another I2C master has sent a byte to us which needs to be set in 'val'. If 'ret'
+is zero, the bus driver should ack this byte. If 'ret' is an errno, then the byte
+should be nacked.
+
+* I2C_SLAVE_READ_PROCESSED (mandatory)
+
+  'val': backend returns next byte to be sent
+
+  'ret': always 0
+
+The bus driver requests the next byte to be sent to another I2C master in
+'val'. Important: This does not mean that the previous byte has been acked, it
+only means that the previous byte is shifted out to the bus! To ensure seamless
+transmission, most hardware requests the next byte when the previous one is
+still shifted out. If the master sends NACK and stops reading after the byte
+currently shifted out, this byte requested here is never used. It very likely
+needs to be sent again on the next I2C_SLAVE_READ_REQUEST, depending a bit on
+your backend, though.
+
+* I2C_SLAVE_STOP (mandatory)
+
+  'val': unused
+
+  'ret': always 0
+
+A stop condition was received. This can happen anytime and the backend should
+reset its state machine for I2C transfers to be able to receive new requests.
+
+
+Software backends
+-----------------
+
+If you want to write a software backend:
+
+* use a standard i2c_driver and its matching mechanisms
+* write the slave_callback which handles the above slave events
+  (best using a state machine)
+* register this callback via i2c_slave_register()
+
+Check the i2c-slave-eeprom driver as an example.
+
+
+Bus driver support
+------------------
+
+If you want to add slave support to the bus driver:
+
+* implement calls to register/unregister the slave and add those to the
+  struct i2c_algorithm. When registering, you probably need to set the i2c
+  slave address and enable slave specific interrupts. If you use runtime pm, you
+  should use pm_runtime_get_sync() because your device usually needs to be
+  powered on always to be able to detect its slave address. When unregistering,
+  do the inverse of the above.
+
+* Catch the slave interrupts and send appropriate i2c_slave_events to the backend.
+
+Note that most hardware supports being master _and_ slave on the same bus. So,
+if you extend a bus driver, please make sure that the driver supports that as
+well. In almost all cases, slave support does not need to disable the master
+functionality.
+
+Check the i2c-rcar driver as an example.
+
+
+About ACK/NACK
+--------------
+
+It is good behaviour to always ACK the address phase, so the master knows if a
+device is basically present or if it mysteriously disappeared. Using NACK to
+state being busy is troublesome. SMBus demands to always ACK the address phase,
+while the I2C specification is more loose on that. Most I2C controllers also
+automatically ACK when detecting their slave addresses, so there is no option
+to NACK them. For those reasons, this API does not support NACK in the address
+phase.
+
+Currently, there is no slave event to report if the master did ACK or NACK a
+byte when it reads from us. We could make this an optional event if the need
+arises. However, cases should be extremely rare because the master is expected
+to send STOP after that and we have an event for that. Also, keep in mind not
+all I2C controllers have the possibility to report that event.
+
+
+About buffers
+-------------
+
+During development of this API, the question of using buffers instead of just
+bytes came up. Such an extension might be possible, usefulness is unclear at
+this time of writing. Some points to keep in mind when using buffers:
+
+* Buffers should be opt-in and backend drivers will always have to support
+  byte-based transactions as the ultimate fallback anyhow because this is how
+  the majority of HW works.
+
+* For backends simulating hardware registers, buffers are largely not helpful
+  because after each byte written an action should be immediately triggered.
+  For reads, the data kept in the buffer might get stale if the backend just
+  updated a register because of internal processing.
+
+* A master can send STOP at any time. For partially transferred buffers, this
+  means additional code to handle this exception. Such code tends to be
+  error-prone.
diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
deleted file mode 100644
index 092d474f5843..000000000000
--- a/Documentation/i2c/smbus-protocol
+++ /dev/null
@@ -1,283 +0,0 @@
-SMBus Protocol Summary
-======================
-
-The following is a summary of the SMBus protocol. It applies to
-all revisions of the protocol (1.0, 1.1, and 2.0).
-Certain protocol features which are not supported by
-this package are briefly described at the end of this document.
-
-Some adapters understand only the SMBus (System Management Bus) protocol,
-which is a subset from the I2C protocol. Fortunately, many devices use
-only the same subset, which makes it possible to put them on an SMBus.
-
-If you write a driver for some I2C device, please try to use the SMBus
-commands if at all possible (if the device uses only that subset of the
-I2C protocol). This makes it possible to use the device driver on both
-SMBus adapters and I2C adapters (the SMBus command set is automatically
-translated to I2C on I2C adapters, but plain I2C commands can not be
-handled at all on most pure SMBus adapters).
-
-Below is a list of SMBus protocol operations, and the functions executing
-them.  Note that the names used in the SMBus protocol specifications usually
-don't match these function names.  For some of the operations which pass a
-single data byte, the functions using SMBus protocol operation names execute
-a different protocol operation entirely.
-
-Each transaction type corresponds to a functionality flag. Before calling a
-transaction function, a device driver should always check (just once) for
-the corresponding functionality flag to ensure that the underlying I2C
-adapter supports the transaction in question. See
-<file:Documentation/i2c/functionality> for the details.
-
-
-Key to symbols
-==============
-
-S     (1 bit) : Start bit
-P     (1 bit) : Stop bit
-Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
-A, NA (1 bit) : Accept and reverse accept bit. 
-Addr  (7 bits): I2C 7 bit address. Note that this can be expanded as usual to 
-                get a 10 bit I2C address.
-Comm  (8 bits): Command byte, a data byte which often selects a register on
-                the device.
-Data  (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
-                for 16 bit data.
-Count (8 bits): A data byte containing the length of a block operation.
-
-[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
-
-
-SMBus Quick Command
-===================
-
-This sends a single bit to the device, at the place of the Rd/Wr bit.
-
-A Addr Rd/Wr [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_QUICK
-
-
-SMBus Receive Byte:  i2c_smbus_read_byte()
-==========================================
-
-This reads a single byte from a device, without specifying a device
-register. Some devices are so simple that this interface is enough; for
-others, it is a shorthand if you want to read the same register as in
-the previous SMBus command.
-
-S Addr Rd [A] [Data] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_READ_BYTE
-
-
-SMBus Send Byte:  i2c_smbus_write_byte()
-========================================
-
-This operation is the reverse of Receive Byte: it sends a single byte
-to a device.  See Receive Byte for more information.
-
-S Addr Wr [A] Data [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE
-
-
-SMBus Read Byte:  i2c_smbus_read_byte_data()
-============================================
-
-This reads a single byte from a device, from a designated register.
-The register is specified through the Comm byte.
-
-S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
-
-
-SMBus Read Word:  i2c_smbus_read_word_data()
-============================================
-
-This operation is very like Read Byte; again, data is read from a
-device, from a designated register that is specified through the Comm
-byte. But this time, the data is a complete word (16 bits).
-
-S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
-
-Note the convenience function i2c_smbus_read_word_swapped is
-available for reads where the two data bytes are the other way
-around (not SMBus compliant, but very popular.)
-
-
-SMBus Write Byte:  i2c_smbus_write_byte_data()
-==============================================
-
-This writes a single byte to a device, to a designated register. The
-register is specified through the Comm byte. This is the opposite of
-the Read Byte operation.
-
-S Addr Wr [A] Comm [A] Data [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE_DATA
-
-
-SMBus Write Word:  i2c_smbus_write_word_data()
-==============================================
-
-This is the opposite of the Read Word operation. 16 bits
-of data is written to a device, to the designated register that is
-specified through the Comm byte. 
-
-S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_WRITE_WORD_DATA
-
-Note the convenience function i2c_smbus_write_word_swapped is
-available for writes where the two data bytes are the other way
-around (not SMBus compliant, but very popular.)
-
-
-SMBus Process Call:
-===================
-
-This command selects a device register (through the Comm byte), sends
-16 bits of data to it, and reads 16 bits of data in return.
-
-S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] 
-                             S Addr Rd [A] [DataLow] A [DataHigh] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
-
-
-SMBus Block Read:  i2c_smbus_read_block_data()
-==============================================
-
-This command reads a block of up to 32 bytes from a device, from a 
-designated register that is specified through the Comm byte. The amount
-of data is specified by the device in the Count byte.
-
-S Addr Wr [A] Comm [A] 
-           S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
-
-
-SMBus Block Write:  i2c_smbus_write_block_data()
-================================================
-
-The opposite of the Block Read command, this writes up to 32 bytes to 
-a device, to a designated register that is specified through the
-Comm byte. The amount of data is specified in the Count byte.
-
-S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_WRITE_BLOCK_DATA
-
-
-SMBus Block Write - Block Read Process Call
-===========================================
-
-SMBus Block Write - Block Read Process Call was introduced in
-Revision 2.0 of the specification.
-
-This command selects a device register (through the Comm byte), sends
-1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return.
-
-S Addr Wr [A] Comm [A] Count [A] Data [A] ...
-                             S Addr Rd [A] [Count] A [Data] ... A P
-
-Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
-
-
-SMBus Host Notify
-=================
-
-This command is sent from a SMBus device acting as a master to the
-SMBus host acting as a slave.
-It is the same form as Write Word, with the command code replaced by the
-alerting device's address.
-
-[S] [HostAddr] [Wr] A [DevAddr] A [DataLow] A [DataHigh] A [P]
-
-This is implemented in the following way in the Linux kernel:
-* I2C bus drivers which support SMBus Host Notify should report
-  I2C_FUNC_SMBUS_HOST_NOTIFY.
-* I2C bus drivers trigger SMBus Host Notify by a call to
-  i2c_handle_smbus_host_notify().
-* I2C drivers for devices which can trigger SMBus Host Notify will have
-  client->irq assigned to a Host Notify IRQ if noone else specified an other.
-
-There is currently no way to retrieve the data parameter from the client.
-
-
-Packet Error Checking (PEC)
-===========================
-
-Packet Error Checking was introduced in Revision 1.1 of the specification.
-
-PEC adds a CRC-8 error-checking byte to transfers using it, immediately
-before the terminating STOP.
-
-
-Address Resolution Protocol (ARP)
-=================================
-
-The Address Resolution Protocol was introduced in Revision 2.0 of
-the specification. It is a higher-layer protocol which uses the
-messages above.
-
-ARP adds device enumeration and dynamic address assignment to
-the protocol. All ARP communications use slave address 0x61 and
-require PEC checksums.
-
-
-SMBus Alert
-===========
-
-SMBus Alert was introduced in Revision 1.0 of the specification.
-
-The SMBus alert protocol allows several SMBus slave devices to share a
-single interrupt pin on the SMBus master, while still allowing the master
-to know which slave triggered the interrupt.
-
-This is implemented the following way in the Linux kernel:
-* I2C bus drivers which support SMBus alert should call
-  i2c_setup_smbus_alert() to setup SMBus alert support.
-* I2C drivers for devices which can trigger SMBus alerts should implement
-  the optional alert() callback.
-
-
-I2C Block Transactions
-======================
-
-The following I2C block transactions are supported by the
-SMBus layer and are described here for completeness.
-They are *NOT* defined by the SMBus specification.
-
-I2C block transactions do not limit the number of bytes transferred
-but the SMBus layer places a limit of 32 bytes.
-
-
-I2C Block Read:  i2c_smbus_read_i2c_block_data()
-================================================
-
-This command reads a block of bytes from a device, from a 
-designated register that is specified through the Comm byte.
-
-S Addr Wr [A] Comm [A] 
-           S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
-
-Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
-
-
-I2C Block Write:  i2c_smbus_write_i2c_block_data()
-==================================================
-
-The opposite of the Block Read command, this writes bytes to 
-a device, to a designated register that is specified through the
-Comm byte. Note that command lengths of 0, 2, or more bytes are
-supported as they are indistinguishable from data.
-
-S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P
-
-Functionality flag: I2C_FUNC_SMBUS_WRITE_I2C_BLOCK
diff --git a/Documentation/i2c/smbus-protocol.rst b/Documentation/i2c/smbus-protocol.rst
new file mode 100644
index 000000000000..e30eb1d274c6
--- /dev/null
+++ b/Documentation/i2c/smbus-protocol.rst
@@ -0,0 +1,301 @@
+======================
+SMBus Protocol Summary
+======================
+
+The following is a summary of the SMBus protocol. It applies to
+all revisions of the protocol (1.0, 1.1, and 2.0).
+Certain protocol features which are not supported by
+this package are briefly described at the end of this document.
+
+Some adapters understand only the SMBus (System Management Bus) protocol,
+which is a subset from the I2C protocol. Fortunately, many devices use
+only the same subset, which makes it possible to put them on an SMBus.
+
+If you write a driver for some I2C device, please try to use the SMBus
+commands if at all possible (if the device uses only that subset of the
+I2C protocol). This makes it possible to use the device driver on both
+SMBus adapters and I2C adapters (the SMBus command set is automatically
+translated to I2C on I2C adapters, but plain I2C commands can not be
+handled at all on most pure SMBus adapters).
+
+Below is a list of SMBus protocol operations, and the functions executing
+them.  Note that the names used in the SMBus protocol specifications usually
+don't match these function names.  For some of the operations which pass a
+single data byte, the functions using SMBus protocol operation names execute
+a different protocol operation entirely.
+
+Each transaction type corresponds to a functionality flag. Before calling a
+transaction function, a device driver should always check (just once) for
+the corresponding functionality flag to ensure that the underlying I2C
+adapter supports the transaction in question. See
+<file:Documentation/i2c/functionality.rst> for the details.
+
+
+Key to symbols
+==============
+
+=============== =============================================================
+S     (1 bit) : Start bit
+P     (1 bit) : Stop bit
+Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
+A, NA (1 bit) : Accept and reverse accept bit.
+Addr  (7 bits): I2C 7 bit address. Note that this can be expanded as usual to
+                get a 10 bit I2C address.
+Comm  (8 bits): Command byte, a data byte which often selects a register on
+                the device.
+Data  (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
+                for 16 bit data.
+Count (8 bits): A data byte containing the length of a block operation.
+
+[..]:           Data sent by I2C device, as opposed to data sent by the host
+                adapter.
+=============== =============================================================
+
+
+SMBus Quick Command
+===================
+
+This sends a single bit to the device, at the place of the Rd/Wr bit::
+
+  A Addr Rd/Wr [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_QUICK
+
+
+SMBus Receive Byte:  i2c_smbus_read_byte()
+==========================================
+
+This reads a single byte from a device, without specifying a device
+register. Some devices are so simple that this interface is enough; for
+others, it is a shorthand if you want to read the same register as in
+the previous SMBus command::
+
+  S Addr Rd [A] [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE
+
+
+SMBus Send Byte:  i2c_smbus_write_byte()
+========================================
+
+This operation is the reverse of Receive Byte: it sends a single byte
+to a device.  See Receive Byte for more information.
+
+::
+
+  S Addr Wr [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE
+
+
+SMBus Read Byte:  i2c_smbus_read_byte_data()
+============================================
+
+This reads a single byte from a device, from a designated register.
+The register is specified through the Comm byte::
+
+  S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
+
+
+SMBus Read Word:  i2c_smbus_read_word_data()
+============================================
+
+This operation is very like Read Byte; again, data is read from a
+device, from a designated register that is specified through the Comm
+byte. But this time, the data is a complete word (16 bits)::
+
+  S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
+
+Note the convenience function i2c_smbus_read_word_swapped is
+available for reads where the two data bytes are the other way
+around (not SMBus compliant, but very popular.)
+
+
+SMBus Write Byte:  i2c_smbus_write_byte_data()
+==============================================
+
+This writes a single byte to a device, to a designated register. The
+register is specified through the Comm byte. This is the opposite of
+the Read Byte operation.
+
+::
+
+  S Addr Wr [A] Comm [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+
+
+SMBus Write Word:  i2c_smbus_write_word_data()
+==============================================
+
+This is the opposite of the Read Word operation. 16 bits
+of data is written to a device, to the designated register that is
+specified through the Comm byte.::
+
+  S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_WORD_DATA
+
+Note the convenience function i2c_smbus_write_word_swapped is
+available for writes where the two data bytes are the other way
+around (not SMBus compliant, but very popular.)
+
+
+SMBus Process Call:
+===================
+
+This command selects a device register (through the Comm byte), sends
+16 bits of data to it, and reads 16 bits of data in return::
+
+  S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
+                               S Addr Rd [A] [DataLow] A [DataHigh] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
+
+
+SMBus Block Read:  i2c_smbus_read_block_data()
+==============================================
+
+This command reads a block of up to 32 bytes from a device, from a
+designated register that is specified through the Comm byte. The amount
+of data is specified by the device in the Count byte.
+
+::
+
+  S Addr Wr [A] Comm [A]
+             S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
+
+
+SMBus Block Write:  i2c_smbus_write_block_data()
+================================================
+
+The opposite of the Block Read command, this writes up to 32 bytes to
+a device, to a designated register that is specified through the
+Comm byte. The amount of data is specified in the Count byte.
+
+::
+
+  S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BLOCK_DATA
+
+
+SMBus Block Write - Block Read Process Call
+===========================================
+
+SMBus Block Write - Block Read Process Call was introduced in
+Revision 2.0 of the specification.
+
+This command selects a device register (through the Comm byte), sends
+1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return::
+
+  S Addr Wr [A] Comm [A] Count [A] Data [A] ...
+                               S Addr Rd [A] [Count] A [Data] ... A P
+
+Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
+
+
+SMBus Host Notify
+=================
+
+This command is sent from a SMBus device acting as a master to the
+SMBus host acting as a slave.
+It is the same form as Write Word, with the command code replaced by the
+alerting device's address.
+
+::
+
+  [S] [HostAddr] [Wr] A [DevAddr] A [DataLow] A [DataHigh] A [P]
+
+This is implemented in the following way in the Linux kernel:
+
+* I2C bus drivers which support SMBus Host Notify should report
+  I2C_FUNC_SMBUS_HOST_NOTIFY.
+* I2C bus drivers trigger SMBus Host Notify by a call to
+  i2c_handle_smbus_host_notify().
+* I2C drivers for devices which can trigger SMBus Host Notify will have
+  client->irq assigned to a Host Notify IRQ if noone else specified an other.
+
+There is currently no way to retrieve the data parameter from the client.
+
+
+Packet Error Checking (PEC)
+===========================
+
+Packet Error Checking was introduced in Revision 1.1 of the specification.
+
+PEC adds a CRC-8 error-checking byte to transfers using it, immediately
+before the terminating STOP.
+
+
+Address Resolution Protocol (ARP)
+=================================
+
+The Address Resolution Protocol was introduced in Revision 2.0 of
+the specification. It is a higher-layer protocol which uses the
+messages above.
+
+ARP adds device enumeration and dynamic address assignment to
+the protocol. All ARP communications use slave address 0x61 and
+require PEC checksums.
+
+
+SMBus Alert
+===========
+
+SMBus Alert was introduced in Revision 1.0 of the specification.
+
+The SMBus alert protocol allows several SMBus slave devices to share a
+single interrupt pin on the SMBus master, while still allowing the master
+to know which slave triggered the interrupt.
+
+This is implemented the following way in the Linux kernel:
+
+* I2C bus drivers which support SMBus alert should call
+  i2c_setup_smbus_alert() to setup SMBus alert support.
+* I2C drivers for devices which can trigger SMBus alerts should implement
+  the optional alert() callback.
+
+
+I2C Block Transactions
+======================
+
+The following I2C block transactions are supported by the
+SMBus layer and are described here for completeness.
+They are *NOT* defined by the SMBus specification.
+
+I2C block transactions do not limit the number of bytes transferred
+but the SMBus layer places a limit of 32 bytes.
+
+
+I2C Block Read:  i2c_smbus_read_i2c_block_data()
+================================================
+
+This command reads a block of bytes from a device, from a
+designated register that is specified through the Comm byte::
+
+  S Addr Wr [A] Comm [A]
+             S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
+
+
+I2C Block Write:  i2c_smbus_write_i2c_block_data()
+==================================================
+
+The opposite of the Block Read command, this writes bytes to
+a device, to a designated register that is specified through the
+Comm byte. Note that command lengths of 0, 2, or more bytes are
+supported as they are indistinguishable from data.
+
+::
+
+  S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_I2C_BLOCK
diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary
deleted file mode 100644
index 809541ab352f..000000000000
--- a/Documentation/i2c/summary
+++ /dev/null
@@ -1,43 +0,0 @@
-I2C and SMBus
-=============
-
-I2C (pronounce: I squared C) is a protocol developed by Philips. It is a 
-slow two-wire protocol (variable speed, up to 400 kHz), with a high speed
-extension (3.4 MHz).  It provides an inexpensive bus for connecting many
-types of devices with infrequent or low bandwidth communications needs.
-I2C is widely used with embedded systems.  Some systems use variants that
-don't meet branding requirements, and so are not advertised as being I2C.
-
-SMBus (System Management Bus) is based on the I2C protocol, and is mostly
-a subset of I2C protocols and signaling.  Many I2C devices will work on an
-SMBus, but some SMBus protocols add semantics beyond what is required to
-achieve I2C branding.  Modern PC mainboards rely on SMBus.  The most common
-devices connected through SMBus are RAM modules configured using I2C EEPROMs,
-and hardware monitoring chips.
-
-Because the SMBus is mostly a subset of the generalized I2C bus, we can
-use its protocols on many I2C systems.  However, there are systems that don't
-meet both SMBus and I2C electrical constraints; and others which can't
-implement all the common SMBus protocol semantics or messages.
-
-
-Terminology
-===========
-
-When we talk about I2C, we use the following terms:
-  Bus    -> Algorithm
-            Adapter
-  Device -> Driver
-            Client
-
-An Algorithm driver contains general code that can be used for a whole class
-of I2C adapters. Each specific adapter driver either depends on one algorithm
-driver, or includes its own implementation.
-
-A Driver driver (yes, this sounds ridiculous, sorry) contains the general
-code to access some type of device. Each detected device gets its own
-data in the Client structure. Usually, Driver and Client are more closely
-integrated than Algorithm and Adapter.
-
-For a given configuration, you will need a driver for your I2C bus, and
-drivers for your I2C devices (usually one driver for each device).
diff --git a/Documentation/i2c/summary.rst b/Documentation/i2c/summary.rst
new file mode 100644
index 000000000000..3a24eac17375
--- /dev/null
+++ b/Documentation/i2c/summary.rst
@@ -0,0 +1,45 @@
+=============
+I2C and SMBus
+=============
+
+I2C (pronounce: I squared C) is a protocol developed by Philips. It is a
+slow two-wire protocol (variable speed, up to 400 kHz), with a high speed
+extension (3.4 MHz).  It provides an inexpensive bus for connecting many
+types of devices with infrequent or low bandwidth communications needs.
+I2C is widely used with embedded systems.  Some systems use variants that
+don't meet branding requirements, and so are not advertised as being I2C.
+
+SMBus (System Management Bus) is based on the I2C protocol, and is mostly
+a subset of I2C protocols and signaling.  Many I2C devices will work on an
+SMBus, but some SMBus protocols add semantics beyond what is required to
+achieve I2C branding.  Modern PC mainboards rely on SMBus.  The most common
+devices connected through SMBus are RAM modules configured using I2C EEPROMs,
+and hardware monitoring chips.
+
+Because the SMBus is mostly a subset of the generalized I2C bus, we can
+use its protocols on many I2C systems.  However, there are systems that don't
+meet both SMBus and I2C electrical constraints; and others which can't
+implement all the common SMBus protocol semantics or messages.
+
+
+Terminology
+===========
+
+When we talk about I2C, we use the following terms::
+
+  Bus    -> Algorithm
+            Adapter
+  Device -> Driver
+            Client
+
+An Algorithm driver contains general code that can be used for a whole class
+of I2C adapters. Each specific adapter driver either depends on one algorithm
+driver, or includes its own implementation.
+
+A Driver driver (yes, this sounds ridiculous, sorry) contains the general
+code to access some type of device. Each detected device gets its own
+data in the Client structure. Usually, Driver and Client are more closely
+integrated than Algorithm and Adapter.
+
+For a given configuration, you will need a driver for your I2C bus, and
+drivers for your I2C devices (usually one driver for each device).
diff --git a/Documentation/i2c/ten-bit-addresses b/Documentation/i2c/ten-bit-addresses
deleted file mode 100644
index 7b2d11e53a49..000000000000
--- a/Documentation/i2c/ten-bit-addresses
+++ /dev/null
@@ -1,28 +0,0 @@
-The I2C protocol knows about two kinds of device addresses: normal 7 bit
-addresses, and an extended set of 10 bit addresses. The sets of addresses
-do not intersect: the 7 bit address 0x10 is not the same as the 10 bit
-address 0x10 (though a single device could respond to both of them).
-To avoid ambiguity, the user sees 10 bit addresses mapped to a different
-address space, namely 0xa000-0xa3ff. The leading 0xa (= 10) represents the
-10 bit mode. This is used for creating device names in sysfs. It is also
-needed when instantiating 10 bit devices via the new_device file in sysfs.
-
-I2C messages to and from 10-bit address devices have a different format.
-See the I2C specification for the details.
-
-The current 10 bit address support is minimal. It should work, however
-you can expect some problems along the way:
-* Not all bus drivers support 10-bit addresses. Some don't because the
-  hardware doesn't support them (SMBus doesn't require 10-bit address
-  support for example), some don't because nobody bothered adding the
-  code (or it's there but not working properly.) Software implementation
-  (i2c-algo-bit) is known to work.
-* Some optional features do not support 10-bit addresses. This is the
-  case of automatic detection and instantiation of devices by their,
-  drivers, for example.
-* Many user-space packages (for example i2c-tools) lack support for
-  10-bit addresses.
-
-Note that 10-bit address devices are still pretty rare, so the limitations
-listed above could stay for a long time, maybe even forever if nobody
-needs them to be fixed.
diff --git a/Documentation/i2c/ten-bit-addresses.rst b/Documentation/i2c/ten-bit-addresses.rst
new file mode 100644
index 000000000000..5c765aff16d5
--- /dev/null
+++ b/Documentation/i2c/ten-bit-addresses.rst
@@ -0,0 +1,33 @@
+=====================
+I2C Ten-bit Addresses
+=====================
+
+The I2C protocol knows about two kinds of device addresses: normal 7 bit
+addresses, and an extended set of 10 bit addresses. The sets of addresses
+do not intersect: the 7 bit address 0x10 is not the same as the 10 bit
+address 0x10 (though a single device could respond to both of them).
+To avoid ambiguity, the user sees 10 bit addresses mapped to a different
+address space, namely 0xa000-0xa3ff. The leading 0xa (= 10) represents the
+10 bit mode. This is used for creating device names in sysfs. It is also
+needed when instantiating 10 bit devices via the new_device file in sysfs.
+
+I2C messages to and from 10-bit address devices have a different format.
+See the I2C specification for the details.
+
+The current 10 bit address support is minimal. It should work, however
+you can expect some problems along the way:
+
+* Not all bus drivers support 10-bit addresses. Some don't because the
+  hardware doesn't support them (SMBus doesn't require 10-bit address
+  support for example), some don't because nobody bothered adding the
+  code (or it's there but not working properly.) Software implementation
+  (i2c-algo-bit) is known to work.
+* Some optional features do not support 10-bit addresses. This is the
+  case of automatic detection and instantiation of devices by their,
+  drivers, for example.
+* Many user-space packages (for example i2c-tools) lack support for
+  10-bit addresses.
+
+Note that 10-bit address devices are still pretty rare, so the limitations
+listed above could stay for a long time, maybe even forever if nobody
+needs them to be fixed.
diff --git a/Documentation/i2c/upgrading-clients b/Documentation/i2c/upgrading-clients
deleted file mode 100644
index 96392cc5b5c7..000000000000
--- a/Documentation/i2c/upgrading-clients
+++ /dev/null
@@ -1,279 +0,0 @@
-Upgrading I2C Drivers to the new 2.6 Driver Model
-=================================================
-
-Ben Dooks <ben-linux@fluff.org>
-
-Introduction
-------------
-
-This guide outlines how to alter existing Linux 2.6 client drivers from
-the old to the new new binding methods.
-
-
-Example old-style driver
-------------------------
-
-
-struct example_state {
-	struct i2c_client	client;
-	....
-};
-
-static struct i2c_driver example_driver;
-
-static unsigned short ignore[] = { I2C_CLIENT_END };
-static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
-
-I2C_CLIENT_INSMOD;
-
-static int example_attach(struct i2c_adapter *adap, int addr, int kind)
-{
-	struct example_state *state;
-	struct device *dev = &adap->dev;  /* to use for dev_ reports */
-	int ret;
-
-	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
-	if (state == NULL) {
-		dev_err(dev, "failed to create our state\n");
-		return -ENOMEM;
-	}
-
-	example->client.addr    = addr;
-	example->client.flags   = 0;
-	example->client.adapter = adap;
-
-	i2c_set_clientdata(&state->i2c_client, state);
-	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
-
-	ret = i2c_attach_client(&state->i2c_client);
-	if (ret < 0) {
-		dev_err(dev, "failed to attach client\n");
-		kfree(state);
-		return ret;
-	}
-
-	dev = &state->i2c_client.dev;
-
-	/* rest of the initialisation goes here. */
-
-	dev_info(dev, "example client created\n");
-
-	return 0;
-}
-
-static int example_detach(struct i2c_client *client)
-{
-	struct example_state *state = i2c_get_clientdata(client);
-
-	i2c_detach_client(client);
-	kfree(state);
-	return 0;
-}
-
-static int example_attach_adapter(struct i2c_adapter *adap)
-{
-	return i2c_probe(adap, &addr_data, example_attach);
-}
-
-static struct i2c_driver example_driver = {
- 	.driver		= {
-		.owner		= THIS_MODULE,
-		.name		= "example",
-		.pm		= &example_pm_ops,
-	},
-	.attach_adapter = example_attach_adapter,
-	.detach_client	= example_detach,
-};
-
-
-Updating the client
--------------------
-
-The new style binding model will check against a list of supported
-devices and their associated address supplied by the code registering
-the busses. This means that the driver .attach_adapter and
-.detach_client methods can be removed, along with the addr_data,
-as follows:
-
-- static struct i2c_driver example_driver;
-
-- static unsigned short ignore[] = { I2C_CLIENT_END };
-- static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
-
-- I2C_CLIENT_INSMOD;
-
-- static int example_attach_adapter(struct i2c_adapter *adap)
-- {
-- 	return i2c_probe(adap, &addr_data, example_attach);
-- }
-
- static struct i2c_driver example_driver = {
--	.attach_adapter = example_attach_adapter,
--	.detach_client	= example_detach,
- }
-
-Add the probe and remove methods to the i2c_driver, as so:
-
- static struct i2c_driver example_driver = {
-+	.probe		= example_probe,
-+	.remove		= example_remove,
- }
-
-Change the example_attach method to accept the new parameters
-which include the i2c_client that it will be working with:
-
-- static int example_attach(struct i2c_adapter *adap, int addr, int kind)
-+ static int example_probe(struct i2c_client *client,
-+			   const struct i2c_device_id *id)
-
-Change the name of example_attach to example_probe to align it with the
-i2c_driver entry names. The rest of the probe routine will now need to be
-changed as the i2c_client has already been setup for use.
-
-The necessary client fields have already been setup before
-the probe function is called, so the following client setup
-can be removed:
-
--	example->client.addr    = addr;
--	example->client.flags   = 0;
--	example->client.adapter = adap;
--
--	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
-
-The i2c_set_clientdata is now:
-
--	i2c_set_clientdata(&state->client, state);
-+	i2c_set_clientdata(client, state);
-
-The call to i2c_attach_client is no longer needed, if the probe
-routine exits successfully, then the driver will be automatically
-attached by the core. Change the probe routine as so:
-
--	ret = i2c_attach_client(&state->i2c_client);
--	if (ret < 0) {
--		dev_err(dev, "failed to attach client\n");
--		kfree(state);
--		return ret;
--	}
-
-
-Remove the storage of 'struct i2c_client' from the 'struct example_state'
-as we are provided with the i2c_client in our example_probe. Instead we
-store a pointer to it for when it is needed.
-
-struct example_state {
--	struct i2c_client	client;
-+	struct i2c_client	*client;
-
-the new i2c client as so:
-
--	struct device *dev = &adap->dev;  /* to use for dev_ reports */
-+ 	struct device *dev = &i2c_client->dev;  /* to use for dev_ reports */
-
-And remove the change after our client is attached, as the driver no
-longer needs to register a new client structure with the core:
-
--	dev = &state->i2c_client.dev;
-
-In the probe routine, ensure that the new state has the client stored
-in it:
-
-static int example_probe(struct i2c_client *i2c_client,
-			 const struct i2c_device_id *id)
-{
-	struct example_state *state;
- 	struct device *dev = &i2c_client->dev;
-	int ret;
-
-	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
-	if (state == NULL) {
-		dev_err(dev, "failed to create our state\n");
-		return -ENOMEM;
-	}
-
-+	state->client = i2c_client;
-
-Update the detach method, by changing the name to _remove and
-to delete the i2c_detach_client call. It is possible that you
-can also remove the ret variable as it is not needed for any
-of the core functions.
-
-- static int example_detach(struct i2c_client *client)
-+ static int example_remove(struct i2c_client *client)
-{
-	struct example_state *state = i2c_get_clientdata(client);
-
--	i2c_detach_client(client);
-
-And finally ensure that we have the correct ID table for the i2c-core
-and other utilities:
-
-+ struct i2c_device_id example_idtable[] = {
-+       { "example", 0 },
-+       { }
-+};
-+
-+MODULE_DEVICE_TABLE(i2c, example_idtable);
-
-static struct i2c_driver example_driver = {
- 	.driver		= {
-		.owner		= THIS_MODULE,
-		.name		= "example",
-	},
-+	.id_table	= example_ids,
-
-
-Our driver should now look like this:
-
-struct example_state {
-	struct i2c_client	*client;
-	....
-};
-
-static int example_probe(struct i2c_client *client,
-		     	 const struct i2c_device_id *id)
-{
-	struct example_state *state;
-	struct device *dev = &client->dev;
-
-	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
-	if (state == NULL) {
-		dev_err(dev, "failed to create our state\n");
-		return -ENOMEM;
-	}
-
-	state->client = client;
-	i2c_set_clientdata(client, state);
-
-	/* rest of the initialisation goes here. */
-
-	dev_info(dev, "example client created\n");
-
-	return 0;
-}
-
-static int example_remove(struct i2c_client *client)
-{
-	struct example_state *state = i2c_get_clientdata(client);
-
-	kfree(state);
-	return 0;
-}
-
-static struct i2c_device_id example_idtable[] = {
-	{ "example", 0 },
-	{ }
-};
-
-MODULE_DEVICE_TABLE(i2c, example_idtable);
-
-static struct i2c_driver example_driver = {
- 	.driver		= {
-		.owner		= THIS_MODULE,
-		.name		= "example",
-		.pm		= &example_pm_ops,
-	},
-	.id_table	= example_idtable,
-	.probe		= example_probe,
-	.remove		= example_remove,
-};
diff --git a/Documentation/i2c/upgrading-clients.rst b/Documentation/i2c/upgrading-clients.rst
new file mode 100644
index 000000000000..27d29032c138
--- /dev/null
+++ b/Documentation/i2c/upgrading-clients.rst
@@ -0,0 +1,285 @@
+=================================================
+Upgrading I2C Drivers to the new 2.6 Driver Model
+=================================================
+
+Ben Dooks <ben-linux@fluff.org>
+
+Introduction
+------------
+
+This guide outlines how to alter existing Linux 2.6 client drivers from
+the old to the new new binding methods.
+
+
+Example old-style driver
+------------------------
+
+::
+
+  struct example_state {
+	struct i2c_client	client;
+	....
+  };
+
+  static struct i2c_driver example_driver;
+
+  static unsigned short ignore[] = { I2C_CLIENT_END };
+  static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
+
+  I2C_CLIENT_INSMOD;
+
+  static int example_attach(struct i2c_adapter *adap, int addr, int kind)
+  {
+	struct example_state *state;
+	struct device *dev = &adap->dev;  /* to use for dev_ reports */
+	int ret;
+
+	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+	if (state == NULL) {
+		dev_err(dev, "failed to create our state\n");
+		return -ENOMEM;
+	}
+
+	example->client.addr    = addr;
+	example->client.flags   = 0;
+	example->client.adapter = adap;
+
+	i2c_set_clientdata(&state->i2c_client, state);
+	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
+
+	ret = i2c_attach_client(&state->i2c_client);
+	if (ret < 0) {
+		dev_err(dev, "failed to attach client\n");
+		kfree(state);
+		return ret;
+	}
+
+	dev = &state->i2c_client.dev;
+
+	/* rest of the initialisation goes here. */
+
+	dev_info(dev, "example client created\n");
+
+	return 0;
+  }
+
+  static int example_detach(struct i2c_client *client)
+  {
+	struct example_state *state = i2c_get_clientdata(client);
+
+	i2c_detach_client(client);
+	kfree(state);
+	return 0;
+  }
+
+  static int example_attach_adapter(struct i2c_adapter *adap)
+  {
+	return i2c_probe(adap, &addr_data, example_attach);
+  }
+
+  static struct i2c_driver example_driver = {
+	.driver		= {
+		.owner		= THIS_MODULE,
+		.name		= "example",
+		.pm		= &example_pm_ops,
+	},
+	.attach_adapter = example_attach_adapter,
+	.detach_client	= example_detach,
+  };
+
+
+Updating the client
+-------------------
+
+The new style binding model will check against a list of supported
+devices and their associated address supplied by the code registering
+the busses. This means that the driver .attach_adapter and
+.detach_client methods can be removed, along with the addr_data,
+as follows::
+
+  - static struct i2c_driver example_driver;
+
+  - static unsigned short ignore[] = { I2C_CLIENT_END };
+  - static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
+
+  - I2C_CLIENT_INSMOD;
+
+  - static int example_attach_adapter(struct i2c_adapter *adap)
+  - {
+  - 	return i2c_probe(adap, &addr_data, example_attach);
+  - }
+
+    static struct i2c_driver example_driver = {
+  -	.attach_adapter = example_attach_adapter,
+  -	.detach_client	= example_detach,
+    }
+
+Add the probe and remove methods to the i2c_driver, as so::
+
+   static struct i2c_driver example_driver = {
+  +	.probe		= example_probe,
+  +	.remove		= example_remove,
+   }
+
+Change the example_attach method to accept the new parameters
+which include the i2c_client that it will be working with::
+
+  - static int example_attach(struct i2c_adapter *adap, int addr, int kind)
+  + static int example_probe(struct i2c_client *client,
+  +			   const struct i2c_device_id *id)
+
+Change the name of example_attach to example_probe to align it with the
+i2c_driver entry names. The rest of the probe routine will now need to be
+changed as the i2c_client has already been setup for use.
+
+The necessary client fields have already been setup before
+the probe function is called, so the following client setup
+can be removed::
+
+  -	example->client.addr    = addr;
+  -	example->client.flags   = 0;
+  -	example->client.adapter = adap;
+  -
+  -	strscpy(client->i2c_client.name, "example", sizeof(client->i2c_client.name));
+
+The i2c_set_clientdata is now::
+
+  -	i2c_set_clientdata(&state->client, state);
+  +	i2c_set_clientdata(client, state);
+
+The call to i2c_attach_client is no longer needed, if the probe
+routine exits successfully, then the driver will be automatically
+attached by the core. Change the probe routine as so::
+
+  -	ret = i2c_attach_client(&state->i2c_client);
+  -	if (ret < 0) {
+  -		dev_err(dev, "failed to attach client\n");
+  -		kfree(state);
+  -		return ret;
+  -	}
+
+
+Remove the storage of 'struct i2c_client' from the 'struct example_state'
+as we are provided with the i2c_client in our example_probe. Instead we
+store a pointer to it for when it is needed.
+
+::
+
+  struct example_state {
+  -	struct i2c_client	client;
+  +	struct i2c_client	*client;
+
+the new i2c client as so::
+
+  -	struct device *dev = &adap->dev;  /* to use for dev_ reports */
+  + 	struct device *dev = &i2c_client->dev;  /* to use for dev_ reports */
+
+And remove the change after our client is attached, as the driver no
+longer needs to register a new client structure with the core::
+
+  -	dev = &state->i2c_client.dev;
+
+In the probe routine, ensure that the new state has the client stored
+in it::
+
+  static int example_probe(struct i2c_client *i2c_client,
+			 const struct i2c_device_id *id)
+  {
+	struct example_state *state;
+	struct device *dev = &i2c_client->dev;
+	int ret;
+
+	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+	if (state == NULL) {
+		dev_err(dev, "failed to create our state\n");
+		return -ENOMEM;
+	}
+
+  +	state->client = i2c_client;
+
+Update the detach method, by changing the name to _remove and
+to delete the i2c_detach_client call. It is possible that you
+can also remove the ret variable as it is not needed for any
+of the core functions.
+
+::
+
+  - static int example_detach(struct i2c_client *client)
+  + static int example_remove(struct i2c_client *client)
+  {
+	struct example_state *state = i2c_get_clientdata(client);
+
+  -	i2c_detach_client(client);
+
+And finally ensure that we have the correct ID table for the i2c-core
+and other utilities::
+
+  + struct i2c_device_id example_idtable[] = {
+  +       { "example", 0 },
+  +       { }
+  +};
+  +
+  +MODULE_DEVICE_TABLE(i2c, example_idtable);
+
+  static struct i2c_driver example_driver = {
+	.driver		= {
+		.owner		= THIS_MODULE,
+		.name		= "example",
+	},
+  +	.id_table	= example_ids,
+
+
+Our driver should now look like this::
+
+  struct example_state {
+	struct i2c_client	*client;
+	....
+  };
+
+  static int example_probe(struct i2c_client *client,
+			 const struct i2c_device_id *id)
+  {
+	struct example_state *state;
+	struct device *dev = &client->dev;
+
+	state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+	if (state == NULL) {
+		dev_err(dev, "failed to create our state\n");
+		return -ENOMEM;
+	}
+
+	state->client = client;
+	i2c_set_clientdata(client, state);
+
+	/* rest of the initialisation goes here. */
+
+	dev_info(dev, "example client created\n");
+
+	return 0;
+  }
+
+  static int example_remove(struct i2c_client *client)
+  {
+	struct example_state *state = i2c_get_clientdata(client);
+
+	kfree(state);
+	return 0;
+  }
+
+  static struct i2c_device_id example_idtable[] = {
+	{ "example", 0 },
+	{ }
+  };
+
+  MODULE_DEVICE_TABLE(i2c, example_idtable);
+
+  static struct i2c_driver example_driver = {
+	.driver		= {
+		.owner		= THIS_MODULE,
+		.name		= "example",
+		.pm		= &example_pm_ops,
+	},
+	.id_table	= example_idtable,
+	.probe		= example_probe,
+	.remove		= example_remove,
+  };
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
deleted file mode 100644
index a755b141fa4a..000000000000
--- a/Documentation/i2c/writing-clients
+++ /dev/null
@@ -1,403 +0,0 @@
-This is a small guide for those who want to write kernel drivers for I2C
-or SMBus devices, using Linux as the protocol host/master (not slave).
-
-To set up a driver, you need to do several things. Some are optional, and
-some things can be done slightly or completely different. Use this as a
-guide, not as a rule book!
-
-
-General remarks
-===============
-
-Try to keep the kernel namespace as clean as possible. The best way to
-do this is to use a unique prefix for all global symbols. This is
-especially important for exported symbols, but it is a good idea to do
-it for non-exported symbols too. We will use the prefix `foo_' in this
-tutorial.
-
-
-The driver structure
-====================
-
-Usually, you will implement a single driver structure, and instantiate
-all clients from it. Remember, a driver structure contains general access
-routines, and should be zero-initialized except for fields with data you
-provide.  A client structure holds device-specific information like the
-driver model device node, and its I2C address.
-
-static struct i2c_device_id foo_idtable[] = {
-	{ "foo", my_id_for_foo },
-	{ "bar", my_id_for_bar },
-	{ }
-};
-
-MODULE_DEVICE_TABLE(i2c, foo_idtable);
-
-static struct i2c_driver foo_driver = {
-	.driver = {
-		.name	= "foo",
-		.pm	= &foo_pm_ops,	/* optional */
-	},
-
-	.id_table	= foo_idtable,
-	.probe		= foo_probe,
-	.remove		= foo_remove,
-	/* if device autodetection is needed: */
-	.class		= I2C_CLASS_SOMETHING,
-	.detect		= foo_detect,
-	.address_list	= normal_i2c,
-
-	.shutdown	= foo_shutdown,	/* optional */
-	.command	= foo_command,	/* optional, deprecated */
-}
-
-The name field is the driver name, and must not contain spaces.  It
-should match the module name (if the driver can be compiled as a module),
-although you can use MODULE_ALIAS (passing "foo" in this example) to add
-another name for the module.  If the driver name doesn't match the module
-name, the module won't be automatically loaded (hotplug/coldplug).
-
-All other fields are for call-back functions which will be explained
-below.
-
-
-Extra client data
-=================
-
-Each client structure has a special `data' field that can point to any
-structure at all.  You should use this to keep device-specific data.
-
-	/* store the value */
-	void i2c_set_clientdata(struct i2c_client *client, void *data);
-
-	/* retrieve the value */
-	void *i2c_get_clientdata(const struct i2c_client *client);
-
-Note that starting with kernel 2.6.34, you don't have to set the `data' field
-to NULL in remove() or if probe() failed anymore. The i2c-core does this
-automatically on these occasions. Those are also the only times the core will
-touch this field.
-
-
-Accessing the client
-====================
-
-Let's say we have a valid client structure. At some time, we will need
-to gather information from the client, or write new information to the
-client.
-
-I have found it useful to define foo_read and foo_write functions for this.
-For some cases, it will be easier to call the i2c functions directly,
-but many chips have some kind of register-value idea that can easily
-be encapsulated.
-
-The below functions are simple examples, and should not be copied
-literally.
-
-int foo_read_value(struct i2c_client *client, u8 reg)
-{
-	if (reg < 0x10)	/* byte-sized register */
-		return i2c_smbus_read_byte_data(client, reg);
-	else		/* word-sized register */
-		return i2c_smbus_read_word_data(client, reg);
-}
-
-int foo_write_value(struct i2c_client *client, u8 reg, u16 value)
-{
-	if (reg == 0x10)	/* Impossible to write - driver error! */
-		return -EINVAL;
-	else if (reg < 0x10)	/* byte-sized register */
-		return i2c_smbus_write_byte_data(client, reg, value);
-	else			/* word-sized register */
-		return i2c_smbus_write_word_data(client, reg, value);
-}
-
-
-Probing and attaching
-=====================
-
-The Linux I2C stack was originally written to support access to hardware
-monitoring chips on PC motherboards, and thus used to embed some assumptions
-that were more appropriate to SMBus (and PCs) than to I2C.  One of these
-assumptions was that most adapters and devices drivers support the SMBUS_QUICK
-protocol to probe device presence.  Another was that devices and their drivers
-can be sufficiently configured using only such probe primitives.
-
-As Linux and its I2C stack became more widely used in embedded systems
-and complex components such as DVB adapters, those assumptions became more
-problematic.  Drivers for I2C devices that issue interrupts need more (and
-different) configuration information, as do drivers handling chip variants
-that can't be distinguished by protocol probing, or which need some board
-specific information to operate correctly.
-
-
-Device/Driver Binding
----------------------
-
-System infrastructure, typically board-specific initialization code or
-boot firmware, reports what I2C devices exist.  For example, there may be
-a table, in the kernel or from the boot loader, identifying I2C devices
-and linking them to board-specific configuration information about IRQs
-and other wiring artifacts, chip type, and so on.  That could be used to
-create i2c_client objects for each I2C device.
-
-I2C device drivers using this binding model work just like any other
-kind of driver in Linux:  they provide a probe() method to bind to
-those devices, and a remove() method to unbind.
-
-	static int foo_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id);
-	static int foo_remove(struct i2c_client *client);
-
-Remember that the i2c_driver does not create those client handles.  The
-handle may be used during foo_probe().  If foo_probe() reports success
-(zero not a negative status code) it may save the handle and use it until
-foo_remove() returns.  That binding model is used by most Linux drivers.
-
-The probe function is called when an entry in the id_table name field
-matches the device's name. It is passed the entry that was matched so
-the driver knows which one in the table matched.
-
-
-Device Creation
----------------
-
-If you know for a fact that an I2C device is connected to a given I2C bus,
-you can instantiate that device by simply filling an i2c_board_info
-structure with the device address and driver name, and calling
-i2c_new_device().  This will create the device, then the driver core will
-take care of finding the right driver and will call its probe() method.
-If a driver supports different device types, you can specify the type you
-want using the type field.  You can also specify an IRQ and platform data
-if needed.
-
-Sometimes you know that a device is connected to a given I2C bus, but you
-don't know the exact address it uses.  This happens on TV adapters for
-example, where the same driver supports dozens of slightly different
-models, and I2C device addresses change from one model to the next.  In
-that case, you can use the i2c_new_probed_device() variant, which is
-similar to i2c_new_device(), except that it takes an additional list of
-possible I2C addresses to probe.  A device is created for the first
-responsive address in the list.  If you expect more than one device to be
-present in the address range, simply call i2c_new_probed_device() that
-many times.
-
-The call to i2c_new_device() or i2c_new_probed_device() typically happens
-in the I2C bus driver. You may want to save the returned i2c_client
-reference for later use.
-
-
-Device Detection
-----------------
-
-Sometimes you do not know in advance which I2C devices are connected to
-a given I2C bus.  This is for example the case of hardware monitoring
-devices on a PC's SMBus.  In that case, you may want to let your driver
-detect supported devices automatically.  This is how the legacy model
-was working, and is now available as an extension to the standard
-driver model.
-
-You simply have to define a detect callback which will attempt to
-identify supported devices (returning 0 for supported ones and -ENODEV
-for unsupported ones), a list of addresses to probe, and a device type
-(or class) so that only I2C buses which may have that type of device
-connected (and not otherwise enumerated) will be probed.  For example,
-a driver for a hardware monitoring chip for which auto-detection is
-needed would set its class to I2C_CLASS_HWMON, and only I2C adapters
-with a class including I2C_CLASS_HWMON would be probed by this driver.
-Note that the absence of matching classes does not prevent the use of
-a device of that type on the given I2C adapter.  All it prevents is
-auto-detection; explicit instantiation of devices is still possible.
-
-Note that this mechanism is purely optional and not suitable for all
-devices.  You need some reliable way to identify the supported devices
-(typically using device-specific, dedicated identification registers),
-otherwise misdetections are likely to occur and things can get wrong
-quickly.  Keep in mind that the I2C protocol doesn't include any
-standard way to detect the presence of a chip at a given address, let
-alone a standard way to identify devices.  Even worse is the lack of
-semantics associated to bus transfers, which means that the same
-transfer can be seen as a read operation by a chip and as a write
-operation by another chip.  For these reasons, explicit device
-instantiation should always be preferred to auto-detection where
-possible.
-
-
-Device Deletion
----------------
-
-Each I2C device which has been created using i2c_new_device() or
-i2c_new_probed_device() can be unregistered by calling
-i2c_unregister_device().  If you don't call it explicitly, it will be
-called automatically before the underlying I2C bus itself is removed, as a
-device can't survive its parent in the device driver model.
-
-
-Initializing the driver
-=======================
-
-When the kernel is booted, or when your foo driver module is inserted,
-you have to do some initializing. Fortunately, just registering the
-driver module is usually enough.
-
-static int __init foo_init(void)
-{
-	return i2c_add_driver(&foo_driver);
-}
-module_init(foo_init);
-
-static void __exit foo_cleanup(void)
-{
-	i2c_del_driver(&foo_driver);
-}
-module_exit(foo_cleanup);
-
-The module_i2c_driver() macro can be used to reduce above code.
-
-module_i2c_driver(foo_driver);
-
-Note that some functions are marked by `__init'.  These functions can
-be removed after kernel booting (or module loading) is completed.
-Likewise, functions marked by `__exit' are dropped by the compiler when
-the code is built into the kernel, as they would never be called.
-
-
-Driver Information
-==================
-
-/* Substitute your own name and email address */
-MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"
-MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices");
-
-/* a few non-GPL license types are also allowed */
-MODULE_LICENSE("GPL");
-
-
-Power Management
-================
-
-If your I2C device needs special handling when entering a system low
-power state -- like putting a transceiver into a low power mode, or
-activating a system wakeup mechanism -- do that by implementing the
-appropriate callbacks for the dev_pm_ops of the driver (like suspend
-and resume).
-
-These are standard driver model calls, and they work just like they
-would for any other driver stack.  The calls can sleep, and can use
-I2C messaging to the device being suspended or resumed (since their
-parent I2C adapter is active when these calls are issued, and IRQs
-are still enabled).
-
-
-System Shutdown
-===============
-
-If your I2C device needs special handling when the system shuts down
-or reboots (including kexec) -- like turning something off -- use a
-shutdown() method.
-
-Again, this is a standard driver model call, working just like it
-would for any other driver stack:  the calls can sleep, and can use
-I2C messaging.
-
-
-Command function
-================
-
-A generic ioctl-like function call back is supported. You will seldom
-need this, and its use is deprecated anyway, so newer design should not
-use it.
-
-
-Sending and receiving
-=====================
-
-If you want to communicate with your device, there are several functions
-to do this. You can find all of them in <linux/i2c.h>.
-
-If you can choose between plain I2C communication and SMBus level
-communication, please use the latter. All adapters understand SMBus level
-commands, but only some of them understand plain I2C!
-
-
-Plain I2C communication
------------------------
-
-	int i2c_master_send(struct i2c_client *client, const char *buf,
-			    int count);
-	int i2c_master_recv(struct i2c_client *client, char *buf, int count);
-
-These routines read and write some bytes from/to a client. The client
-contains the i2c address, so you do not have to include it. The second
-parameter contains the bytes to read/write, the third the number of bytes
-to read/write (must be less than the length of the buffer, also should be
-less than 64k since msg.len is u16.) Returned is the actual number of bytes
-read/written.
-
-	int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg,
-			 int num);
-
-This sends a series of messages. Each message can be a read or write,
-and they can be mixed in any way. The transactions are combined: no
-stop bit is sent between transaction. The i2c_msg structure contains
-for each message the client address, the number of bytes of the message
-and the message data itself.
-
-You can read the file `i2c-protocol' for more information about the
-actual I2C protocol.
-
-
-SMBus communication
--------------------
-
-	s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
-			   unsigned short flags, char read_write, u8 command,
-			   int size, union i2c_smbus_data *data);
-
-This is the generic SMBus function. All functions below are implemented
-in terms of it. Never use this function directly!
-
-	s32 i2c_smbus_read_byte(struct i2c_client *client);
-	s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value);
-	s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command);
-	s32 i2c_smbus_write_byte_data(struct i2c_client *client,
-				      u8 command, u8 value);
-	s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command);
-	s32 i2c_smbus_write_word_data(struct i2c_client *client,
-				      u8 command, u16 value);
-	s32 i2c_smbus_read_block_data(struct i2c_client *client,
-				      u8 command, u8 *values);
-	s32 i2c_smbus_write_block_data(struct i2c_client *client,
-				       u8 command, u8 length, const u8 *values);
-	s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client,
-					  u8 command, u8 length, u8 *values);
-	s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
-					   u8 command, u8 length,
-					   const u8 *values);
-
-These ones were removed from i2c-core because they had no users, but could
-be added back later if needed:
-
-	s32 i2c_smbus_write_quick(struct i2c_client *client, u8 value);
-	s32 i2c_smbus_process_call(struct i2c_client *client,
-				   u8 command, u16 value);
-	s32 i2c_smbus_block_process_call(struct i2c_client *client,
-					 u8 command, u8 length, u8 *values);
-
-All these transactions return a negative errno value on failure. The 'write'
-transactions return 0 on success; the 'read' transactions return the read
-value, except for block transactions, which return the number of values
-read. The block buffers need not be longer than 32 bytes.
-
-You can read the file `smbus-protocol' for more information about the
-actual SMBus protocol.
-
-
-General purpose routines
-========================
-
-Below all general purpose routines are listed, that were not mentioned
-before.
-
-	/* Return the adapter number for a specific adapter */
-	int i2c_adapter_id(struct i2c_adapter *adap);
diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst
new file mode 100644
index 000000000000..dddf0a14ab7c
--- /dev/null
+++ b/Documentation/i2c/writing-clients.rst
@@ -0,0 +1,425 @@
+===================
+Writing I2C Clients
+===================
+
+This is a small guide for those who want to write kernel drivers for I2C
+or SMBus devices, using Linux as the protocol host/master (not slave).
+
+To set up a driver, you need to do several things. Some are optional, and
+some things can be done slightly or completely different. Use this as a
+guide, not as a rule book!
+
+
+General remarks
+===============
+
+Try to keep the kernel namespace as clean as possible. The best way to
+do this is to use a unique prefix for all global symbols. This is
+especially important for exported symbols, but it is a good idea to do
+it for non-exported symbols too. We will use the prefix ``foo_`` in this
+tutorial.
+
+
+The driver structure
+====================
+
+Usually, you will implement a single driver structure, and instantiate
+all clients from it. Remember, a driver structure contains general access
+routines, and should be zero-initialized except for fields with data you
+provide.  A client structure holds device-specific information like the
+driver model device node, and its I2C address.
+
+::
+
+  static struct i2c_device_id foo_idtable[] = {
+	{ "foo", my_id_for_foo },
+	{ "bar", my_id_for_bar },
+	{ }
+  };
+
+  MODULE_DEVICE_TABLE(i2c, foo_idtable);
+
+  static struct i2c_driver foo_driver = {
+	.driver = {
+		.name	= "foo",
+		.pm	= &foo_pm_ops,	/* optional */
+	},
+
+	.id_table	= foo_idtable,
+	.probe		= foo_probe,
+	.remove		= foo_remove,
+	/* if device autodetection is needed: */
+	.class		= I2C_CLASS_SOMETHING,
+	.detect		= foo_detect,
+	.address_list	= normal_i2c,
+
+	.shutdown	= foo_shutdown,	/* optional */
+	.command	= foo_command,	/* optional, deprecated */
+  }
+
+The name field is the driver name, and must not contain spaces.  It
+should match the module name (if the driver can be compiled as a module),
+although you can use MODULE_ALIAS (passing "foo" in this example) to add
+another name for the module.  If the driver name doesn't match the module
+name, the module won't be automatically loaded (hotplug/coldplug).
+
+All other fields are for call-back functions which will be explained
+below.
+
+
+Extra client data
+=================
+
+Each client structure has a special ``data`` field that can point to any
+structure at all.  You should use this to keep device-specific data.
+
+::
+
+	/* store the value */
+	void i2c_set_clientdata(struct i2c_client *client, void *data);
+
+	/* retrieve the value */
+	void *i2c_get_clientdata(const struct i2c_client *client);
+
+Note that starting with kernel 2.6.34, you don't have to set the ``data`` field
+to NULL in remove() or if probe() failed anymore. The i2c-core does this
+automatically on these occasions. Those are also the only times the core will
+touch this field.
+
+
+Accessing the client
+====================
+
+Let's say we have a valid client structure. At some time, we will need
+to gather information from the client, or write new information to the
+client.
+
+I have found it useful to define foo_read and foo_write functions for this.
+For some cases, it will be easier to call the i2c functions directly,
+but many chips have some kind of register-value idea that can easily
+be encapsulated.
+
+The below functions are simple examples, and should not be copied
+literally::
+
+  int foo_read_value(struct i2c_client *client, u8 reg)
+  {
+	if (reg < 0x10)	/* byte-sized register */
+		return i2c_smbus_read_byte_data(client, reg);
+	else		/* word-sized register */
+		return i2c_smbus_read_word_data(client, reg);
+  }
+
+  int foo_write_value(struct i2c_client *client, u8 reg, u16 value)
+  {
+	if (reg == 0x10)	/* Impossible to write - driver error! */
+		return -EINVAL;
+	else if (reg < 0x10)	/* byte-sized register */
+		return i2c_smbus_write_byte_data(client, reg, value);
+	else			/* word-sized register */
+		return i2c_smbus_write_word_data(client, reg, value);
+  }
+
+
+Probing and attaching
+=====================
+
+The Linux I2C stack was originally written to support access to hardware
+monitoring chips on PC motherboards, and thus used to embed some assumptions
+that were more appropriate to SMBus (and PCs) than to I2C.  One of these
+assumptions was that most adapters and devices drivers support the SMBUS_QUICK
+protocol to probe device presence.  Another was that devices and their drivers
+can be sufficiently configured using only such probe primitives.
+
+As Linux and its I2C stack became more widely used in embedded systems
+and complex components such as DVB adapters, those assumptions became more
+problematic.  Drivers for I2C devices that issue interrupts need more (and
+different) configuration information, as do drivers handling chip variants
+that can't be distinguished by protocol probing, or which need some board
+specific information to operate correctly.
+
+
+Device/Driver Binding
+---------------------
+
+System infrastructure, typically board-specific initialization code or
+boot firmware, reports what I2C devices exist.  For example, there may be
+a table, in the kernel or from the boot loader, identifying I2C devices
+and linking them to board-specific configuration information about IRQs
+and other wiring artifacts, chip type, and so on.  That could be used to
+create i2c_client objects for each I2C device.
+
+I2C device drivers using this binding model work just like any other
+kind of driver in Linux:  they provide a probe() method to bind to
+those devices, and a remove() method to unbind.
+
+::
+
+	static int foo_probe(struct i2c_client *client,
+			     const struct i2c_device_id *id);
+	static int foo_remove(struct i2c_client *client);
+
+Remember that the i2c_driver does not create those client handles.  The
+handle may be used during foo_probe().  If foo_probe() reports success
+(zero not a negative status code) it may save the handle and use it until
+foo_remove() returns.  That binding model is used by most Linux drivers.
+
+The probe function is called when an entry in the id_table name field
+matches the device's name. It is passed the entry that was matched so
+the driver knows which one in the table matched.
+
+
+Device Creation
+---------------
+
+If you know for a fact that an I2C device is connected to a given I2C bus,
+you can instantiate that device by simply filling an i2c_board_info
+structure with the device address and driver name, and calling
+i2c_new_device().  This will create the device, then the driver core will
+take care of finding the right driver and will call its probe() method.
+If a driver supports different device types, you can specify the type you
+want using the type field.  You can also specify an IRQ and platform data
+if needed.
+
+Sometimes you know that a device is connected to a given I2C bus, but you
+don't know the exact address it uses.  This happens on TV adapters for
+example, where the same driver supports dozens of slightly different
+models, and I2C device addresses change from one model to the next.  In
+that case, you can use the i2c_new_probed_device() variant, which is
+similar to i2c_new_device(), except that it takes an additional list of
+possible I2C addresses to probe.  A device is created for the first
+responsive address in the list.  If you expect more than one device to be
+present in the address range, simply call i2c_new_probed_device() that
+many times.
+
+The call to i2c_new_device() or i2c_new_probed_device() typically happens
+in the I2C bus driver. You may want to save the returned i2c_client
+reference for later use.
+
+
+Device Detection
+----------------
+
+Sometimes you do not know in advance which I2C devices are connected to
+a given I2C bus.  This is for example the case of hardware monitoring
+devices on a PC's SMBus.  In that case, you may want to let your driver
+detect supported devices automatically.  This is how the legacy model
+was working, and is now available as an extension to the standard
+driver model.
+
+You simply have to define a detect callback which will attempt to
+identify supported devices (returning 0 for supported ones and -ENODEV
+for unsupported ones), a list of addresses to probe, and a device type
+(or class) so that only I2C buses which may have that type of device
+connected (and not otherwise enumerated) will be probed.  For example,
+a driver for a hardware monitoring chip for which auto-detection is
+needed would set its class to I2C_CLASS_HWMON, and only I2C adapters
+with a class including I2C_CLASS_HWMON would be probed by this driver.
+Note that the absence of matching classes does not prevent the use of
+a device of that type on the given I2C adapter.  All it prevents is
+auto-detection; explicit instantiation of devices is still possible.
+
+Note that this mechanism is purely optional and not suitable for all
+devices.  You need some reliable way to identify the supported devices
+(typically using device-specific, dedicated identification registers),
+otherwise misdetections are likely to occur and things can get wrong
+quickly.  Keep in mind that the I2C protocol doesn't include any
+standard way to detect the presence of a chip at a given address, let
+alone a standard way to identify devices.  Even worse is the lack of
+semantics associated to bus transfers, which means that the same
+transfer can be seen as a read operation by a chip and as a write
+operation by another chip.  For these reasons, explicit device
+instantiation should always be preferred to auto-detection where
+possible.
+
+
+Device Deletion
+---------------
+
+Each I2C device which has been created using i2c_new_device() or
+i2c_new_probed_device() can be unregistered by calling
+i2c_unregister_device().  If you don't call it explicitly, it will be
+called automatically before the underlying I2C bus itself is removed, as a
+device can't survive its parent in the device driver model.
+
+
+Initializing the driver
+=======================
+
+When the kernel is booted, or when your foo driver module is inserted,
+you have to do some initializing. Fortunately, just registering the
+driver module is usually enough.
+
+::
+
+  static int __init foo_init(void)
+  {
+	return i2c_add_driver(&foo_driver);
+  }
+  module_init(foo_init);
+
+  static void __exit foo_cleanup(void)
+  {
+	i2c_del_driver(&foo_driver);
+  }
+  module_exit(foo_cleanup);
+
+  The module_i2c_driver() macro can be used to reduce above code.
+
+  module_i2c_driver(foo_driver);
+
+Note that some functions are marked by ``__init``.  These functions can
+be removed after kernel booting (or module loading) is completed.
+Likewise, functions marked by ``__exit`` are dropped by the compiler when
+the code is built into the kernel, as they would never be called.
+
+
+Driver Information
+==================
+
+::
+
+  /* Substitute your own name and email address */
+  MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"
+  MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices");
+
+  /* a few non-GPL license types are also allowed */
+  MODULE_LICENSE("GPL");
+
+
+Power Management
+================
+
+If your I2C device needs special handling when entering a system low
+power state -- like putting a transceiver into a low power mode, or
+activating a system wakeup mechanism -- do that by implementing the
+appropriate callbacks for the dev_pm_ops of the driver (like suspend
+and resume).
+
+These are standard driver model calls, and they work just like they
+would for any other driver stack.  The calls can sleep, and can use
+I2C messaging to the device being suspended or resumed (since their
+parent I2C adapter is active when these calls are issued, and IRQs
+are still enabled).
+
+
+System Shutdown
+===============
+
+If your I2C device needs special handling when the system shuts down
+or reboots (including kexec) -- like turning something off -- use a
+shutdown() method.
+
+Again, this is a standard driver model call, working just like it
+would for any other driver stack:  the calls can sleep, and can use
+I2C messaging.
+
+
+Command function
+================
+
+A generic ioctl-like function call back is supported. You will seldom
+need this, and its use is deprecated anyway, so newer design should not
+use it.
+
+
+Sending and receiving
+=====================
+
+If you want to communicate with your device, there are several functions
+to do this. You can find all of them in <linux/i2c.h>.
+
+If you can choose between plain I2C communication and SMBus level
+communication, please use the latter. All adapters understand SMBus level
+commands, but only some of them understand plain I2C!
+
+
+Plain I2C communication
+-----------------------
+
+::
+
+	int i2c_master_send(struct i2c_client *client, const char *buf,
+			    int count);
+	int i2c_master_recv(struct i2c_client *client, char *buf, int count);
+
+These routines read and write some bytes from/to a client. The client
+contains the i2c address, so you do not have to include it. The second
+parameter contains the bytes to read/write, the third the number of bytes
+to read/write (must be less than the length of the buffer, also should be
+less than 64k since msg.len is u16.) Returned is the actual number of bytes
+read/written.
+
+::
+
+	int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg,
+			 int num);
+
+This sends a series of messages. Each message can be a read or write,
+and they can be mixed in any way. The transactions are combined: no
+stop bit is sent between transaction. The i2c_msg structure contains
+for each message the client address, the number of bytes of the message
+and the message data itself.
+
+You can read the file ``i2c-protocol`` for more information about the
+actual I2C protocol.
+
+
+SMBus communication
+-------------------
+
+::
+
+	s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+			   unsigned short flags, char read_write, u8 command,
+			   int size, union i2c_smbus_data *data);
+
+This is the generic SMBus function. All functions below are implemented
+in terms of it. Never use this function directly!
+
+::
+
+	s32 i2c_smbus_read_byte(struct i2c_client *client);
+	s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value);
+	s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command);
+	s32 i2c_smbus_write_byte_data(struct i2c_client *client,
+				      u8 command, u8 value);
+	s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command);
+	s32 i2c_smbus_write_word_data(struct i2c_client *client,
+				      u8 command, u16 value);
+	s32 i2c_smbus_read_block_data(struct i2c_client *client,
+				      u8 command, u8 *values);
+	s32 i2c_smbus_write_block_data(struct i2c_client *client,
+				       u8 command, u8 length, const u8 *values);
+	s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client,
+					  u8 command, u8 length, u8 *values);
+	s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
+					   u8 command, u8 length,
+					   const u8 *values);
+
+These ones were removed from i2c-core because they had no users, but could
+be added back later if needed::
+
+	s32 i2c_smbus_write_quick(struct i2c_client *client, u8 value);
+	s32 i2c_smbus_process_call(struct i2c_client *client,
+				   u8 command, u16 value);
+	s32 i2c_smbus_block_process_call(struct i2c_client *client,
+					 u8 command, u8 length, u8 *values);
+
+All these transactions return a negative errno value on failure. The 'write'
+transactions return 0 on success; the 'read' transactions return the read
+value, except for block transactions, which return the number of values
+read. The block buffers need not be longer than 32 bytes.
+
+You can read the file ``smbus-protocol`` for more information about the
+actual SMBus protocol.
+
+
+General purpose routines
+========================
+
+Below all general purpose routines are listed, that were not mentioned
+before::
+
+	/* Return the adapter number for a specific adapter */
+	int i2c_adapter_id(struct i2c_adapter *adap);
diff --git a/Documentation/index.rst b/Documentation/index.rst
index 2df5a3da563c..9b45af84fd29 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -104,6 +104,7 @@ needed).
    fb/index
    fpga/index
    hid/index
+   i2c/index
    iio/index
    infiniband/index
    leds/index
diff --git a/Documentation/spi/spi-sc18is602 b/Documentation/spi/spi-sc18is602
index a45702865a38..0feffd5af411 100644
--- a/Documentation/spi/spi-sc18is602
+++ b/Documentation/spi/spi-sc18is602
@@ -17,7 +17,7 @@ kernel's SPI core subsystem.
 
 The driver does not probe for supported chips, since the SI18IS602/603 does not
 support Chip ID registers. You will have to instantiate the devices explicitly.
-Please see Documentation/i2c/instantiating-devices for details.
+Please see Documentation/i2c/instantiating-devices.rst for details.
 
 
 Usage Notes
diff --git a/MAINTAINERS b/MAINTAINERS
index 6326445952f6..a3d5dd341088 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -666,7 +666,7 @@ ALI1563 I2C DRIVER
 M:	Rudolf Marek <r.marek@assembler.cz>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-ali1563
+F:	Documentation/i2c/busses/i2c-ali1563.rst
 F:	drivers/i2c/busses/i2c-ali1563.c
 
 ALLEGRO DVT VIDEO IP CORE DRIVER
@@ -6741,7 +6741,7 @@ L:	linux-i2c@vger.kernel.org
 S:	Supported
 F:	drivers/i2c/muxes/i2c-mux-gpio.c
 F:	include/linux/platform_data/i2c-mux-gpio.h
-F:	Documentation/i2c/muxes/i2c-mux-gpio
+F:	Documentation/i2c/muxes/i2c-mux-gpio.rst
 
 GENERIC HDLC (WAN) DRIVERS
 M:	Krzysztof Halasa <khc@pm.waw.pl>
@@ -7497,14 +7497,14 @@ I2C CONTROLLER DRIVER FOR NVIDIA GPU
 M:	Ajay Gupta <ajayg@nvidia.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-nvidia-gpu
+F:	Documentation/i2c/busses/i2c-nvidia-gpu.rst
 F:	drivers/i2c/busses/i2c-nvidia-gpu.c
 
 I2C MUXES
 M:	Peter Rosin <peda@axentia.se>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/i2c-topology
+F:	Documentation/i2c/i2c-topology.rst
 F:	Documentation/i2c/muxes/
 F:	Documentation/devicetree/bindings/i2c/i2c-mux*
 F:	Documentation/devicetree/bindings/i2c/i2c-arb*
@@ -7524,8 +7524,8 @@ I2C OVER PARALLEL PORT
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-parport
-F:	Documentation/i2c/busses/i2c-parport-light
+F:	Documentation/i2c/busses/i2c-parport.rst
+F:	Documentation/i2c/busses/i2c-parport-light.rst
 F:	drivers/i2c/busses/i2c-parport.c
 F:	drivers/i2c/busses/i2c-parport-light.c
 
@@ -7559,7 +7559,7 @@ I2C-TAOS-EVM DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-taos-evm
+F:	Documentation/i2c/busses/i2c-taos-evm.rst
 F:	drivers/i2c/busses/i2c-taos-evm.c
 
 I2C-TINY-USB DRIVER
@@ -7573,19 +7573,19 @@ I2C/SMBUS CONTROLLER DRIVERS FOR PC
 M:	Jean Delvare <jdelvare@suse.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
-F:	Documentation/i2c/busses/i2c-ali1535
-F:	Documentation/i2c/busses/i2c-ali1563
-F:	Documentation/i2c/busses/i2c-ali15x3
-F:	Documentation/i2c/busses/i2c-amd756
-F:	Documentation/i2c/busses/i2c-amd8111
-F:	Documentation/i2c/busses/i2c-i801
-F:	Documentation/i2c/busses/i2c-nforce2
-F:	Documentation/i2c/busses/i2c-piix4
-F:	Documentation/i2c/busses/i2c-sis5595
-F:	Documentation/i2c/busses/i2c-sis630
-F:	Documentation/i2c/busses/i2c-sis96x
-F:	Documentation/i2c/busses/i2c-via
-F:	Documentation/i2c/busses/i2c-viapro
+F:	Documentation/i2c/busses/i2c-ali1535.rst
+F:	Documentation/i2c/busses/i2c-ali1563.rst
+F:	Documentation/i2c/busses/i2c-ali15x3.rst
+F:	Documentation/i2c/busses/i2c-amd756.rst
+F:	Documentation/i2c/busses/i2c-amd8111.rst
+F:	Documentation/i2c/busses/i2c-i801.rst
+F:	Documentation/i2c/busses/i2c-nforce2.rst
+F:	Documentation/i2c/busses/i2c-piix4.rst
+F:	Documentation/i2c/busses/i2c-sis5595.rst
+F:	Documentation/i2c/busses/i2c-sis630.rst
+F:	Documentation/i2c/busses/i2c-sis96x.rst
+F:	Documentation/i2c/busses/i2c-via.rst
+F:	Documentation/i2c/busses/i2c-viapro.rst
 F:	drivers/i2c/busses/i2c-ali1535.c
 F:	drivers/i2c/busses/i2c-ali1563.c
 F:	drivers/i2c/busses/i2c-ali15x3.c
@@ -7614,7 +7614,7 @@ M:	Seth Heasley <seth.heasley@intel.com>
 M:	Neil Horman <nhorman@tuxdriver.com>
 L:	linux-i2c@vger.kernel.org
 F:	drivers/i2c/busses/i2c-ismt.c
-F:	Documentation/i2c/busses/i2c-ismt
+F:	Documentation/i2c/busses/i2c-ismt.rst
 
 I2C/SMBUS STUB DRIVER
 M:	Jean Delvare <jdelvare@suse.com>
@@ -10355,7 +10355,7 @@ L:	linux-i2c@vger.kernel.org
 S:	Supported
 F:	drivers/i2c/busses/i2c-mlxcpld.c
 F:	drivers/i2c/muxes/i2c-mux-mlxcpld.c
-F:	Documentation/i2c/busses/i2c-mlxcpld
+F:	Documentation/i2c/busses/i2c-mlxcpld.rst
 
 MELLANOX MLXCPLD LED DRIVER
 M:	Vadim Pasternak <vadimp@mellanox.com>
@@ -11976,7 +11976,7 @@ M:	Andrew Lunn <andrew@lunn.ch>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 F:	Documentation/devicetree/bindings/i2c/i2c-ocores.txt
-F:	Documentation/i2c/busses/i2c-ocores
+F:	Documentation/i2c/busses/i2c-ocores.rst
 F:	drivers/i2c/busses/i2c-ocores.c
 F:	include/linux/platform_data/i2c-ocores.h
 
@@ -14280,7 +14280,7 @@ F:	net/sctp/
 SCx200 CPU SUPPORT
 M:	Jim Cromie <jim.cromie@gmail.com>
 S:	Odd Fixes
-F:	Documentation/i2c/busses/scx200_acb
+F:	Documentation/i2c/busses/scx200_acb.rst
 F:	arch/x86/platform/scx200/
 F:	drivers/watchdog/scx200_wdt.c
 F:	drivers/i2c/busses/scx200*
diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index e232fa948833..79b8df258371 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -5,7 +5,7 @@
  *
  * The ATXP1 can reside on I2C addresses 0x37 or 0x4e. The chip is
  * not auto-detected by the driver and must be instantiated explicitly.
- * See Documentation/i2c/instantiating-devices for more information.
+ * See Documentation/i2c/instantiating-devices.rst for more information.
  */
 
 #include <linux/kernel.h>
diff --git a/drivers/hwmon/smm665.c b/drivers/hwmon/smm665.c
index d8c91c2cb8cf..9ae0dc28b9cf 100644
--- a/drivers/hwmon/smm665.c
+++ b/drivers/hwmon/smm665.c
@@ -197,7 +197,7 @@ static int smm665_read_adc(struct smm665_data *data, int adc)
 	if (rv != -ENXIO) {
 		/*
 		 * We expect ENXIO to reflect NACK
-		 * (per Documentation/i2c/fault-codes).
+		 * (per Documentation/i2c/fault-codes.rst).
 		 * Everything else is an error.
 		 */
 		dev_dbg(&client->dev,
diff --git a/drivers/i2c/Kconfig b/drivers/i2c/Kconfig
index abedd55a1264..1474e57ecafc 100644
--- a/drivers/i2c/Kconfig
+++ b/drivers/i2c/Kconfig
@@ -54,7 +54,7 @@ config I2C_CHARDEV
 	  Say Y here to use i2c-* device files, usually found in the /dev
 	  directory on your system.  They make it possible to have user-space
 	  programs use the I2C bus.  Information on how to do this is
-	  contained in the file <file:Documentation/i2c/dev-interface>.
+	  contained in the file <file:Documentation/i2c/dev-interface.rst>.
 
 	  This support is also available as a module.  If so, the module 
 	  will be called i2c-dev.
@@ -107,7 +107,7 @@ config I2C_STUB
 	  especially for certain kinds of sensor chips.
 
 	  If you do build this module, be sure to read the notes and warnings
-	  in <file:Documentation/i2c/i2c-stub>.
+	  in <file:Documentation/i2c/i2c-stub.rst>.
 
 	  If you don't know what to do here, definitely say N.
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 09367fc014c3..22fcf554257b 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -1206,7 +1206,7 @@ config I2C_PARPORT
 	  and makes it easier to add support for new devices.
 
 	  An adapter type parameter is now mandatory.  Please read the file
-	  Documentation/i2c/busses/i2c-parport for details.
+	  Documentation/i2c/busses/i2c-parport.rst for details.
 
 	  Another driver exists, named i2c-parport-light, which doesn't depend
 	  on the parport driver.  This is meant for embedded systems. Don't say
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index f2956936c3f2..c2f087d20420 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -77,7 +77,7 @@
  * SMBus Host Notify			yes
  * Interrupt processing			yes
  *
- * See the file Documentation/i2c/busses/i2c-i801 for details.
+ * See the file Documentation/i2c/busses/i2c-i801.rst for details.
  */
 
 #include <linux/interrupt.h>
diff --git a/drivers/i2c/busses/i2c-taos-evm.c b/drivers/i2c/busses/i2c-taos-evm.c
index c82e78f57386..37347c93e8e0 100644
--- a/drivers/i2c/busses/i2c-taos-evm.c
+++ b/drivers/i2c/busses/i2c-taos-evm.c
@@ -125,7 +125,7 @@ static int taos_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
 			/*
 			 * Voluntarily dropping error code of kstrtou8 since all
 			 * error code that it could return are invalid according
-			 * to Documentation/i2c/fault-codes.
+			 * to Documentation/i2c/fault-codes.rst.
 			 */
 			if (kstrtou8(p + 1, 16, &data->byte))
 				return -EPROTO;
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index f26ed495d384..2e6dcf8ecbc9 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -2206,7 +2206,7 @@ static int i2c_detect_address(struct i2c_client *temp_client,
 			dev_warn(&adapter->dev,
 				"This adapter will soon drop class based instantiation of devices. "
 				"Please make sure client 0x%02x gets instantiated by other means. "
-				"Check 'Documentation/i2c/instantiating-devices' for details.\n",
+				"Check 'Documentation/i2c/instantiating-devices.rst' for details.\n",
 				info.addr);
 
 		dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n",
@@ -2236,7 +2236,7 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver)
 	if (adapter->class == I2C_CLASS_DEPRECATED) {
 		dev_dbg(&adapter->dev,
 			"This adapter dropped support for I2C classes and won't auto-detect %s devices anymore. "
-			"If you need it, check 'Documentation/i2c/instantiating-devices' for alternatives.\n",
+			"If you need it, check 'Documentation/i2c/instantiating-devices.rst' for alternatives.\n",
 			driver->driver.name);
 		return 0;
 	}
diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c
index 8f99c005458a..d28974ad9e0e 100644
--- a/drivers/iio/dummy/iio_simple_dummy.c
+++ b/drivers/iio/dummy/iio_simple_dummy.c
@@ -693,7 +693,7 @@ static int iio_dummy_remove(struct iio_sw_device *swd)
  * Varies depending on bus type of the device. As there is no device
  * here, call probe directly. For information on device registration
  * i2c:
- * Documentation/i2c/writing-clients
+ * Documentation/i2c/writing-clients.rst
  * spi:
  * Documentation/spi/spi-summary
  */
diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c
index 225a8df1d4e9..367497914c10 100644
--- a/drivers/rtc/rtc-ds1374.c
+++ b/drivers/rtc/rtc-ds1374.c
@@ -14,7 +14,7 @@
  */
 /*
  * It would be more efficient to use i2c msgs/i2c_transfer directly but, as
- * recommened in .../Documentation/i2c/writing-clients section
+ * recommended in .../Documentation/i2c/writing-clients.rst section
  * "Sending and receiving", using SMBus level communication is preferred.
  */
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index fa5552c2307b..c0a78c069117 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -521,7 +521,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
  *
  * The return codes from the @master_xfer{_atomic} fields should indicate the
  * type of error code that occurred during the transfer, as documented in the
- * Kernel Documentation file Documentation/i2c/fault-codes.
+ * Kernel Documentation file Documentation/i2c/fault-codes.rst.
  */
 struct i2c_algorithm {
 	/*
-- 
cgit v1.2.3


From ec23eb54fbc7a07405d416d77e8115e575ce3adc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Fri, 26 Jul 2019 09:51:27 -0300
Subject: docs: fs: convert docs without extension to ReST

There are 3 remaining files without an extension inside the fs docs
dir.

Manually convert them to ReST.

In the case of the nfs/exporting.rst file, as the nfs docs
aren't ported yet, I opted to convert and add a :orphan: there,
with should be removed when it gets added into a nfs-specific
part of the fs documentation.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/Locking               | 576 --------------------
 Documentation/filesystems/directory-locking     | 135 -----
 Documentation/filesystems/directory-locking.rst | 145 ++++++
 Documentation/filesystems/index.rst             |   2 +
 Documentation/filesystems/locking.rst           | 665 ++++++++++++++++++++++++
 Documentation/filesystems/nfs/Exporting         | 160 ------
 Documentation/filesystems/nfs/exporting.rst     | 165 ++++++
 Documentation/filesystems/vfs.rst               |   2 +-
 fs/cifs/export.c                                |   2 +-
 fs/exportfs/expfs.c                             |   2 +-
 fs/isofs/export.c                               |   2 +-
 fs/orangefs/file.c                              |   2 +-
 include/linux/dcache.h                          |   2 +-
 include/linux/exportfs.h                        |   2 +-
 14 files changed, 984 insertions(+), 878 deletions(-)
 delete mode 100644 Documentation/filesystems/Locking
 delete mode 100644 Documentation/filesystems/directory-locking
 create mode 100644 Documentation/filesystems/directory-locking.rst
 create mode 100644 Documentation/filesystems/locking.rst
 delete mode 100644 Documentation/filesystems/nfs/Exporting
 create mode 100644 Documentation/filesystems/nfs/exporting.rst

(limited to 'include')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
deleted file mode 100644
index 204dd3ea36bb..000000000000
--- a/Documentation/filesystems/Locking
+++ /dev/null
@@ -1,576 +0,0 @@
-	The text below describes the locking rules for VFS-related methods.
-It is (believed to be) up-to-date. *Please*, if you change anything in
-prototypes or locking protocols - update this file. And update the relevant
-instances in the tree, don't leave that to maintainers of filesystems/devices/
-etc. At the very least, put the list of dubious cases in the end of this file.
-Don't turn it into log - maintainers of out-of-the-tree code are supposed to
-be able to use diff(1).
-	Thing currently missing here: socket operations. Alexey?
-
---------------------------- dentry_operations --------------------------
-prototypes:
-	int (*d_revalidate)(struct dentry *, unsigned int);
-	int (*d_weak_revalidate)(struct dentry *, unsigned int);
-	int (*d_hash)(const struct dentry *, struct qstr *);
-	int (*d_compare)(const struct dentry *,
-			unsigned int, const char *, const struct qstr *);
-	int (*d_delete)(struct dentry *);
-	int (*d_init)(struct dentry *);
-	void (*d_release)(struct dentry *);
-	void (*d_iput)(struct dentry *, struct inode *);
-	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
-	struct vfsmount *(*d_automount)(struct path *path);
-	int (*d_manage)(const struct path *, bool);
-	struct dentry *(*d_real)(struct dentry *, const struct inode *);
-
-locking rules:
-		rename_lock	->d_lock	may block	rcu-walk
-d_revalidate:	no		no		yes (ref-walk)	maybe
-d_weak_revalidate:no		no		yes	 	no
-d_hash		no		no		no		maybe
-d_compare:	yes		no		no		maybe
-d_delete:	no		yes		no		no
-d_init:	no		no		yes		no
-d_release:	no		no		yes		no
-d_prune:        no              yes             no              no
-d_iput:		no		no		yes		no
-d_dname:	no		no		no		no
-d_automount:	no		no		yes		no
-d_manage:	no		no		yes (ref-walk)	maybe
-d_real		no		no		yes 		no
-
---------------------------- inode_operations --------------------------- 
-prototypes:
-	int (*create) (struct inode *,struct dentry *,umode_t, bool);
-	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
-	int (*link) (struct dentry *,struct inode *,struct dentry *);
-	int (*unlink) (struct inode *,struct dentry *);
-	int (*symlink) (struct inode *,struct dentry *,const char *);
-	int (*mkdir) (struct inode *,struct dentry *,umode_t);
-	int (*rmdir) (struct inode *,struct dentry *);
-	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
-	int (*rename) (struct inode *, struct dentry *,
-			struct inode *, struct dentry *, unsigned int);
-	int (*readlink) (struct dentry *, char __user *,int);
-	const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
-	void (*truncate) (struct inode *);
-	int (*permission) (struct inode *, int, unsigned int);
-	int (*get_acl)(struct inode *, int);
-	int (*setattr) (struct dentry *, struct iattr *);
-	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
-	ssize_t (*listxattr) (struct dentry *, char *, size_t);
-	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
-	void (*update_time)(struct inode *, struct timespec *, int);
-	int (*atomic_open)(struct inode *, struct dentry *,
-				struct file *, unsigned open_flag,
-				umode_t create_mode);
-	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
-
-locking rules:
-	all may block
-		i_rwsem(inode)
-lookup:		shared
-create:		exclusive
-link:		exclusive (both)
-mknod:		exclusive
-symlink:	exclusive
-mkdir:		exclusive
-unlink:		exclusive (both)
-rmdir:		exclusive (both)(see below)
-rename:		exclusive (all)	(see below)
-readlink:	no
-get_link:	no
-setattr:	exclusive
-permission:	no (may not block if called in rcu-walk mode)
-get_acl:	no
-getattr:	no
-listxattr:	no
-fiemap:		no
-update_time:	no
-atomic_open:	exclusive
-tmpfile:	no
-
-
-	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
-	exclusive on victim.
-	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
-
-See Documentation/filesystems/directory-locking for more detailed discussion
-of the locking scheme for directory operations.
-
------------------------ xattr_handler operations -----------------------
-prototypes:
-	bool (*list)(struct dentry *dentry);
-	int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
-		   struct inode *inode, const char *name, void *buffer,
-		   size_t size);
-	int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
-		   struct inode *inode, const char *name, const void *buffer,
-		   size_t size, int flags);
-
-locking rules:
-	all may block
-		i_rwsem(inode)
-list:		no
-get:		no
-set:		exclusive
-
---------------------------- super_operations ---------------------------
-prototypes:
-	struct inode *(*alloc_inode)(struct super_block *sb);
-	void (*free_inode)(struct inode *);
-	void (*destroy_inode)(struct inode *);
-	void (*dirty_inode) (struct inode *, int flags);
-	int (*write_inode) (struct inode *, struct writeback_control *wbc);
-	int (*drop_inode) (struct inode *);
-	void (*evict_inode) (struct inode *);
-	void (*put_super) (struct super_block *);
-	int (*sync_fs)(struct super_block *sb, int wait);
-	int (*freeze_fs) (struct super_block *);
-	int (*unfreeze_fs) (struct super_block *);
-	int (*statfs) (struct dentry *, struct kstatfs *);
-	int (*remount_fs) (struct super_block *, int *, char *);
-	void (*umount_begin) (struct super_block *);
-	int (*show_options)(struct seq_file *, struct dentry *);
-	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
-	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
-	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
-
-locking rules:
-	All may block [not true, see below]
-			s_umount
-alloc_inode:
-free_inode:				called from RCU callback
-destroy_inode:
-dirty_inode:
-write_inode:
-drop_inode:				!!!inode->i_lock!!!
-evict_inode:
-put_super:		write
-sync_fs:		read
-freeze_fs:		write
-unfreeze_fs:		write
-statfs:			maybe(read)	(see below)
-remount_fs:		write
-umount_begin:		no
-show_options:		no		(namespace_sem)
-quota_read:		no		(see below)
-quota_write:		no		(see below)
-bdev_try_to_free_page:	no		(see below)
-
-->statfs() has s_umount (shared) when called by ustat(2) (native or
-compat), but that's an accident of bad API; s_umount is used to pin
-the superblock down when we only have dev_t given us by userland to
-identify the superblock.  Everything else (statfs(), fstatfs(), etc.)
-doesn't hold it when calling ->statfs() - superblock is pinned down
-by resolving the pathname passed to syscall.
-->quota_read() and ->quota_write() functions are both guaranteed to
-be the only ones operating on the quota file by the quota code (via
-dqio_sem) (unless an admin really wants to screw up something and
-writes to quota files with quotas on). For other details about locking
-see also dquot_operations section.
-->bdev_try_to_free_page is called from the ->releasepage handler of
-the block device inode.  See there for more details.
-
---------------------------- file_system_type ---------------------------
-prototypes:
-	struct dentry *(*mount) (struct file_system_type *, int,
-		       const char *, void *);
-	void (*kill_sb) (struct super_block *);
-locking rules:
-		may block
-mount		yes
-kill_sb		yes
-
-->mount() returns ERR_PTR or the root dentry; its superblock should be locked
-on return.
-->kill_sb() takes a write-locked superblock, does all shutdown work on it,
-unlocks and drops the reference.
-
---------------------------- address_space_operations --------------------------
-prototypes:
-	int (*writepage)(struct page *page, struct writeback_control *wbc);
-	int (*readpage)(struct file *, struct page *);
-	int (*writepages)(struct address_space *, struct writeback_control *);
-	int (*set_page_dirty)(struct page *page);
-	int (*readpages)(struct file *filp, struct address_space *mapping,
-			struct list_head *pages, unsigned nr_pages);
-	int (*write_begin)(struct file *, struct address_space *mapping,
-				loff_t pos, unsigned len, unsigned flags,
-				struct page **pagep, void **fsdata);
-	int (*write_end)(struct file *, struct address_space *mapping,
-				loff_t pos, unsigned len, unsigned copied,
-				struct page *page, void *fsdata);
-	sector_t (*bmap)(struct address_space *, sector_t);
-	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
-	int (*releasepage) (struct page *, int);
-	void (*freepage)(struct page *);
-	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
-	bool (*isolate_page) (struct page *, isolate_mode_t);
-	int (*migratepage)(struct address_space *, struct page *, struct page *);
-	void (*putback_page) (struct page *);
-	int (*launder_page)(struct page *);
-	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
-	int (*error_remove_page)(struct address_space *, struct page *);
-	int (*swap_activate)(struct file *);
-	int (*swap_deactivate)(struct file *);
-
-locking rules:
-	All except set_page_dirty and freepage may block
-
-			PageLocked(page)	i_rwsem
-writepage:		yes, unlocks (see below)
-readpage:		yes, unlocks
-writepages:
-set_page_dirty		no
-readpages:
-write_begin:		locks the page		exclusive
-write_end:		yes, unlocks		exclusive
-bmap:
-invalidatepage:		yes
-releasepage:		yes
-freepage:		yes
-direct_IO:
-isolate_page:		yes
-migratepage:		yes (both)
-putback_page:		yes
-launder_page:		yes
-is_partially_uptodate:	yes
-error_remove_page:	yes
-swap_activate:		no
-swap_deactivate:	no
-
-	->write_begin(), ->write_end() and ->readpage() may be called from
-the request handler (/dev/loop).
-
-	->readpage() unlocks the page, either synchronously or via I/O
-completion.
-
-	->readpages() populates the pagecache with the passed pages and starts
-I/O against them.  They come unlocked upon I/O completion.
-
-	->writepage() is used for two purposes: for "memory cleansing" and for
-"sync".  These are quite different operations and the behaviour may differ
-depending upon the mode.
-
-If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
-it *must* start I/O against the page, even if that would involve
-blocking on in-progress I/O.
-
-If writepage is called for memory cleansing (sync_mode ==
-WBC_SYNC_NONE) then its role is to get as much writeout underway as
-possible.  So writepage should try to avoid blocking against
-currently-in-progress I/O.
-
-If the filesystem is not called for "sync" and it determines that it
-would need to block against in-progress I/O to be able to start new I/O
-against the page the filesystem should redirty the page with
-redirty_page_for_writepage(), then unlock the page and return zero.
-This may also be done to avoid internal deadlocks, but rarely.
-
-If the filesystem is called for sync then it must wait on any
-in-progress I/O and then start new I/O.
-
-The filesystem should unlock the page synchronously, before returning to the
-caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
-value. WRITEPAGE_ACTIVATE means that page cannot really be written out
-currently, and VM should stop calling ->writepage() on this page for some
-time. VM does this by moving page to the head of the active list, hence the
-name.
-
-Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
-and return zero, writepage *must* run set_page_writeback() against the page,
-followed by unlocking it.  Once set_page_writeback() has been run against the
-page, write I/O can be submitted and the write I/O completion handler must run
-end_page_writeback() once the I/O is complete.  If no I/O is submitted, the
-filesystem must run end_page_writeback() against the page before returning from
-writepage.
-
-That is: after 2.5.12, pages which are under writeout are *not* locked.  Note,
-if the filesystem needs the page to be locked during writeout, that is ok, too,
-the page is allowed to be unlocked at any point in time between the calls to
-set_page_writeback() and end_page_writeback().
-
-Note, failure to run either redirty_page_for_writepage() or the combination of
-set_page_writeback()/end_page_writeback() on a page submitted to writepage
-will leave the page itself marked clean but it will be tagged as dirty in the
-radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
-in the filesystem like having dirty inodes at umount and losing written data.
-
-	->writepages() is used for periodic writeback and for syscall-initiated
-sync operations.  The address_space should start I/O against at least
-*nr_to_write pages.  *nr_to_write must be decremented for each page which is
-written.  The address_space implementation may write more (or less) pages
-than *nr_to_write asks for, but it should try to be reasonably close.  If
-nr_to_write is NULL, all dirty pages must be written.
-
-writepages should _only_ write pages which are present on
-mapping->io_pages.
-
-	->set_page_dirty() is called from various places in the kernel
-when the target page is marked as needing writeback.  It may be called
-under spinlock (it cannot block) and is sometimes called with the page
-not locked.
-
-	->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
-filesystems and by the swapper. The latter will eventually go away.  Please,
-keep it that way and don't breed new callers.
-
-	->invalidatepage() is called when the filesystem must attempt to drop
-some or all of the buffers from the page when it is being truncated. It
-returns zero on success. If ->invalidatepage is zero, the kernel uses
-block_invalidatepage() instead.
-
-	->releasepage() is called when the kernel is about to try to drop the
-buffers from the page in preparation for freeing it.  It returns zero to
-indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
-the kernel assumes that the fs has no private interest in the buffers.
-
-	->freepage() is called when the kernel is done dropping the page
-from the page cache.
-
-	->launder_page() may be called prior to releasing a page if
-it is still found to be dirty. It returns zero if the page was successfully
-cleaned, or an error value if not. Note that in order to prevent the page
-getting mapped back in and redirtied, it needs to be kept locked
-across the entire operation.
-
-	->swap_activate will be called with a non-zero argument on
-files backing (non block device backed) swapfiles. A return value
-of zero indicates success, in which case this file can be used for
-backing swapspace. The swapspace operations will be proxied to the
-address space operations.
-
-	->swap_deactivate() will be called in the sys_swapoff()
-path after ->swap_activate() returned success.
-
------------------------ file_lock_operations ------------------------------
-prototypes:
-	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
-	void (*fl_release_private)(struct file_lock *);
-
-
-locking rules:
-			inode->i_lock	may block
-fl_copy_lock:		yes		no
-fl_release_private:	maybe		maybe[1]
-
-[1]:	->fl_release_private for flock or POSIX locks is currently allowed
-to block. Leases however can still be freed while the i_lock is held and
-so fl_release_private called on a lease should not block.
-
------------------------ lock_manager_operations ---------------------------
-prototypes:
-	void (*lm_notify)(struct file_lock *);  /* unblock callback */
-	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
-	void (*lm_break)(struct file_lock *); /* break_lease callback */
-	int (*lm_change)(struct file_lock **, int);
-
-locking rules:
-
-			inode->i_lock	blocked_lock_lock	may block
-lm_notify:		yes		yes			no
-lm_grant:		no		no			no
-lm_break:		yes		no			no
-lm_change		yes		no			no
-
---------------------------- buffer_head -----------------------------------
-prototypes:
-	void (*b_end_io)(struct buffer_head *bh, int uptodate);
-
-locking rules:
-	called from interrupts. In other words, extreme care is needed here.
-bh is locked, but that's all warranties we have here. Currently only RAID1,
-highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
-call this method upon the IO completion.
-
---------------------------- block_device_operations -----------------------
-prototypes:
-	int (*open) (struct block_device *, fmode_t);
-	int (*release) (struct gendisk *, fmode_t);
-	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-	int (*direct_access) (struct block_device *, sector_t, void **,
-				unsigned long *);
-	int (*media_changed) (struct gendisk *);
-	void (*unlock_native_capacity) (struct gendisk *);
-	int (*revalidate_disk) (struct gendisk *);
-	int (*getgeo)(struct block_device *, struct hd_geometry *);
-	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
-
-locking rules:
-			bd_mutex
-open:			yes
-release:		yes
-ioctl:			no
-compat_ioctl:		no
-direct_access:		no
-media_changed:		no
-unlock_native_capacity:	no
-revalidate_disk:	no
-getgeo:			no
-swap_slot_free_notify:	no	(see below)
-
-media_changed, unlock_native_capacity and revalidate_disk are called only from
-check_disk_change().
-
-swap_slot_free_notify is called with swap_lock and sometimes the page lock
-held.
-
-
---------------------------- file_operations -------------------------------
-prototypes:
-	loff_t (*llseek) (struct file *, loff_t, int);
-	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
-	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
-	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
-	int (*iterate) (struct file *, struct dir_context *);
-	int (*iterate_shared) (struct file *, struct dir_context *);
-	__poll_t (*poll) (struct file *, struct poll_table_struct *);
-	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
-	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
-	int (*mmap) (struct file *, struct vm_area_struct *);
-	int (*open) (struct inode *, struct file *);
-	int (*flush) (struct file *);
-	int (*release) (struct inode *, struct file *);
-	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
-	int (*fasync) (int, struct file *, int);
-	int (*lock) (struct file *, int, struct file_lock *);
-	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
-			loff_t *);
-	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
-			loff_t *);
-	ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
-			void __user *);
-	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
-			loff_t *, int);
-	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
-			unsigned long, unsigned long, unsigned long);
-	int (*check_flags)(int);
-	int (*flock) (struct file *, int, struct file_lock *);
-	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
-			size_t, unsigned int);
-	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
-			size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **, void **);
-	long (*fallocate)(struct file *, int, loff_t, loff_t);
-};
-
-locking rules:
-	All may block.
-
-->llseek() locking has moved from llseek to the individual llseek
-implementations.  If your fs is not using generic_file_llseek, you
-need to acquire and release the appropriate locks in your ->llseek().
-For many filesystems, it is probably safe to acquire the inode
-mutex or just to use i_size_read() instead.
-Note: this does not protect the file->f_pos against concurrent modifications
-since this is something the userspace has to take care about.
-
-->iterate() is called with i_rwsem exclusive.
-
-->iterate_shared() is called with i_rwsem at least shared.
-
-->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
-Most instances call fasync_helper(), which does that maintenance, so it's
-not normally something one needs to worry about.  Return values > 0 will be
-mapped to zero in the VFS layer.
-
-->readdir() and ->ioctl() on directories must be changed. Ideally we would
-move ->readdir() to inode_operations and use a separate method for directory
-->ioctl() or kill the latter completely. One of the problems is that for
-anything that resembles union-mount we won't have a struct file for all
-components. And there are other reasons why the current interface is a mess...
-
-->read on directories probably must go away - we should just enforce -EISDIR
-in sys_read() and friends.
-
-->setlease operations should call generic_setlease() before or after setting
-the lease within the individual filesystem to record the result of the
-operation
-
---------------------------- dquot_operations -------------------------------
-prototypes:
-	int (*write_dquot) (struct dquot *);
-	int (*acquire_dquot) (struct dquot *);
-	int (*release_dquot) (struct dquot *);
-	int (*mark_dirty) (struct dquot *);
-	int (*write_info) (struct super_block *, int);
-
-These operations are intended to be more or less wrapping functions that ensure
-a proper locking wrt the filesystem and call the generic quota operations.
-
-What filesystem should expect from the generic quota functions:
-
-		FS recursion	Held locks when called
-write_dquot:	yes		dqonoff_sem or dqptr_sem
-acquire_dquot:	yes		dqonoff_sem or dqptr_sem
-release_dquot:	yes		dqonoff_sem or dqptr_sem
-mark_dirty:	no		-
-write_info:	yes		dqonoff_sem
-
-FS recursion means calling ->quota_read() and ->quota_write() from superblock
-operations.
-
-More details about quota locking can be found in fs/dquot.c.
-
---------------------------- vm_operations_struct -----------------------------
-prototypes:
-	void (*open)(struct vm_area_struct*);
-	void (*close)(struct vm_area_struct*);
-	vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
-	vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
-	vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
-	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
-
-locking rules:
-		mmap_sem	PageLocked(page)
-open:		yes
-close:		yes
-fault:		yes		can return with page locked
-map_pages:	yes
-page_mkwrite:	yes		can return with page locked
-pfn_mkwrite:	yes
-access:		yes
-
-	->fault() is called when a previously not present pte is about
-to be faulted in. The filesystem must find and return the page associated
-with the passed in "pgoff" in the vm_fault structure. If it is possible that
-the page may be truncated and/or invalidated, then the filesystem must lock
-the page, then ensure it is not already truncated (the page lock will block
-subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
-locked. The VM will unlock the page.
-
-	->map_pages() is called when VM asks to map easy accessible pages.
-Filesystem should find and map pages associated with offsets from "start_pgoff"
-till "end_pgoff". ->map_pages() is called with page table locked and must
-not block.  If it's not possible to reach a page without blocking,
-filesystem should skip it. Filesystem should use do_set_pte() to setup
-page table entry. Pointer to entry associated with the page is passed in
-"pte" field in vm_fault structure. Pointers to entries for other offsets
-should be calculated relative to "pte".
-
-	->page_mkwrite() is called when a previously read-only pte is
-about to become writeable. The filesystem again must ensure that there are
-no truncate/invalidate races, and then return with the page locked. If
-the page has been truncated, the filesystem should not look up a new page
-like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
-will cause the VM to retry the fault.
-
-	->pfn_mkwrite() is the same as page_mkwrite but when the pte is
-VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
-VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
-after this call is to make the pte read-write, unless pfn_mkwrite returns
-an error.
-
-	->access() is called when get_user_pages() fails in
-access_process_vm(), typically used to debug a process through
-/proc/pid/mem or ptrace.  This function is needed only for
-VM_IO | VM_PFNMAP VMAs.
-
-================================================================================
-			Dubious stuff
-
-(if you break something or notice that it is broken and do not fix it yourself
-- at least put it here)
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking
deleted file mode 100644
index 4e32cb961e5b..000000000000
--- a/Documentation/filesystems/directory-locking
+++ /dev/null
@@ -1,135 +0,0 @@
-	Locking scheme used for directory operations is based on two
-kinds of locks - per-inode (->i_rwsem) and per-filesystem
-(->s_vfs_rename_mutex).
-
-	When taking the i_rwsem on multiple non-directory objects, we
-always acquire the locks in order by increasing address.  We'll call
-that "inode pointer" order in the following.
-
-	For our purposes all operations fall in 5 classes:
-
-1) read access.  Locking rules: caller locks directory we are accessing.
-The lock is taken shared.
-
-2) object creation.  Locking rules: same as above, but the lock is taken
-exclusive.
-
-3) object removal.  Locking rules: caller locks parent, finds victim,
-locks victim and calls the method.  Locks are exclusive.
-
-4) rename() that is _not_ cross-directory.  Locking rules: caller locks
-the parent and finds source and target.  In case of exchange (with
-RENAME_EXCHANGE in flags argument) lock both.  In any case,
-if the target already exists, lock it.  If the source is a non-directory,
-lock it.  If we need to lock both, lock them in inode pointer order.
-Then call the method.  All locks are exclusive.
-NB: we might get away with locking the the source (and target in exchange
-case) shared.
-
-5) link creation.  Locking rules:
-	* lock parent
-	* check that source is not a directory
-	* lock source
-	* call the method.
-All locks are exclusive.
-
-6) cross-directory rename.  The trickiest in the whole bunch.  Locking
-rules:
-	* lock the filesystem
-	* lock parents in "ancestors first" order.
-	* find source and target.
-	* if old parent is equal to or is a descendent of target
-		fail with -ENOTEMPTY
-	* if new parent is equal to or is a descendent of source
-		fail with -ELOOP
-	* If it's an exchange, lock both the source and the target.
-	* If the target exists, lock it.  If the source is a non-directory,
-	  lock it.  If we need to lock both, do so in inode pointer order.
-	* call the method.
-All ->i_rwsem are taken exclusive.  Again, we might get away with locking
-the the source (and target in exchange case) shared.
-
-The rules above obviously guarantee that all directories that are going to be
-read, modified or removed by method will be locked by caller.
-
-
-If no directory is its own ancestor, the scheme above is deadlock-free.
-Proof:
-
-	First of all, at any moment we have a partial ordering of the
-objects - A < B iff A is an ancestor of B.
-
-	That ordering can change.  However, the following is true:
-
-(1) if object removal or non-cross-directory rename holds lock on A and
-    attempts to acquire lock on B, A will remain the parent of B until we
-    acquire the lock on B.  (Proof: only cross-directory rename can change
-    the parent of object and it would have to lock the parent).
-
-(2) if cross-directory rename holds the lock on filesystem, order will not
-    change until rename acquires all locks.  (Proof: other cross-directory
-    renames will be blocked on filesystem lock and we don't start changing
-    the order until we had acquired all locks).
-
-(3) locks on non-directory objects are acquired only after locks on
-    directory objects, and are acquired in inode pointer order.
-    (Proof: all operations but renames take lock on at most one
-    non-directory object, except renames, which take locks on source and
-    target in inode pointer order in the case they are not directories.)
-
-	Now consider the minimal deadlock.  Each process is blocked on
-attempt to acquire some lock and already holds at least one lock.  Let's
-consider the set of contended locks.  First of all, filesystem lock is
-not contended, since any process blocked on it is not holding any locks.
-Thus all processes are blocked on ->i_rwsem.
-
-	By (3), any process holding a non-directory lock can only be
-waiting on another non-directory lock with a larger address.  Therefore
-the process holding the "largest" such lock can always make progress, and
-non-directory objects are not included in the set of contended locks.
-
-	Thus link creation can't be a part of deadlock - it can't be
-blocked on source and it means that it doesn't hold any locks.
-
-	Any contended object is either held by cross-directory rename or
-has a child that is also contended.  Indeed, suppose that it is held by
-operation other than cross-directory rename.  Then the lock this operation
-is blocked on belongs to child of that object due to (1).
-
-	It means that one of the operations is cross-directory rename.
-Otherwise the set of contended objects would be infinite - each of them
-would have a contended child and we had assumed that no object is its
-own descendent.  Moreover, there is exactly one cross-directory rename
-(see above).
-
-	Consider the object blocking the cross-directory rename.  One
-of its descendents is locked by cross-directory rename (otherwise we
-would again have an infinite set of contended objects).  But that
-means that cross-directory rename is taking locks out of order.  Due
-to (2) the order hadn't changed since we had acquired filesystem lock.
-But locking rules for cross-directory rename guarantee that we do not
-try to acquire lock on descendent before the lock on ancestor.
-Contradiction.  I.e.  deadlock is impossible.  Q.E.D.
-
-
-	These operations are guaranteed to avoid loop creation.  Indeed,
-the only operation that could introduce loops is cross-directory rename.
-Since the only new (parent, child) pair added by rename() is (new parent,
-source), such loop would have to contain these objects and the rest of it
-would have to exist before rename().  I.e. at the moment of loop creation
-rename() responsible for that would be holding filesystem lock and new parent
-would have to be equal to or a descendent of source.  But that means that
-new parent had been equal to or a descendent of source since the moment when
-we had acquired filesystem lock and rename() would fail with -ELOOP in that
-case.
-
-	While this locking scheme works for arbitrary DAGs, it relies on
-ability to check that directory is a descendent of another object.  Current
-implementation assumes that directory graph is a tree.  This assumption is
-also preserved by all operations (cross-directory rename on a tree that would
-not introduce a cycle will leave it a tree and link() fails for directories).
-
-	Notice that "directory" in the above == "anything that might have
-children", so if we are going to introduce hybrid objects we will need
-either to make sure that link(2) doesn't work for them or to make changes
-in is_subdir() that would make it work even in presence of such beasts.
diff --git a/Documentation/filesystems/directory-locking.rst b/Documentation/filesystems/directory-locking.rst
new file mode 100644
index 000000000000..de12016ee419
--- /dev/null
+++ b/Documentation/filesystems/directory-locking.rst
@@ -0,0 +1,145 @@
+=================
+Directory Locking
+=================
+
+
+Locking scheme used for directory operations is based on two
+kinds of locks - per-inode (->i_rwsem) and per-filesystem
+(->s_vfs_rename_mutex).
+
+When taking the i_rwsem on multiple non-directory objects, we
+always acquire the locks in order by increasing address.  We'll call
+that "inode pointer" order in the following.
+
+For our purposes all operations fall in 5 classes:
+
+1) read access.  Locking rules: caller locks directory we are accessing.
+The lock is taken shared.
+
+2) object creation.  Locking rules: same as above, but the lock is taken
+exclusive.
+
+3) object removal.  Locking rules: caller locks parent, finds victim,
+locks victim and calls the method.  Locks are exclusive.
+
+4) rename() that is _not_ cross-directory.  Locking rules: caller locks
+the parent and finds source and target.  In case of exchange (with
+RENAME_EXCHANGE in flags argument) lock both.  In any case,
+if the target already exists, lock it.  If the source is a non-directory,
+lock it.  If we need to lock both, lock them in inode pointer order.
+Then call the method.  All locks are exclusive.
+NB: we might get away with locking the the source (and target in exchange
+case) shared.
+
+5) link creation.  Locking rules:
+
+	* lock parent
+	* check that source is not a directory
+	* lock source
+	* call the method.
+
+All locks are exclusive.
+
+6) cross-directory rename.  The trickiest in the whole bunch.  Locking
+rules:
+
+	* lock the filesystem
+	* lock parents in "ancestors first" order.
+	* find source and target.
+	* if old parent is equal to or is a descendent of target
+	  fail with -ENOTEMPTY
+	* if new parent is equal to or is a descendent of source
+	  fail with -ELOOP
+	* If it's an exchange, lock both the source and the target.
+	* If the target exists, lock it.  If the source is a non-directory,
+	  lock it.  If we need to lock both, do so in inode pointer order.
+	* call the method.
+
+All ->i_rwsem are taken exclusive.  Again, we might get away with locking
+the the source (and target in exchange case) shared.
+
+The rules above obviously guarantee that all directories that are going to be
+read, modified or removed by method will be locked by caller.
+
+
+If no directory is its own ancestor, the scheme above is deadlock-free.
+
+Proof:
+
+	First of all, at any moment we have a partial ordering of the
+	objects - A < B iff A is an ancestor of B.
+
+	That ordering can change.  However, the following is true:
+
+(1) if object removal or non-cross-directory rename holds lock on A and
+    attempts to acquire lock on B, A will remain the parent of B until we
+    acquire the lock on B.  (Proof: only cross-directory rename can change
+    the parent of object and it would have to lock the parent).
+
+(2) if cross-directory rename holds the lock on filesystem, order will not
+    change until rename acquires all locks.  (Proof: other cross-directory
+    renames will be blocked on filesystem lock and we don't start changing
+    the order until we had acquired all locks).
+
+(3) locks on non-directory objects are acquired only after locks on
+    directory objects, and are acquired in inode pointer order.
+    (Proof: all operations but renames take lock on at most one
+    non-directory object, except renames, which take locks on source and
+    target in inode pointer order in the case they are not directories.)
+
+Now consider the minimal deadlock.  Each process is blocked on
+attempt to acquire some lock and already holds at least one lock.  Let's
+consider the set of contended locks.  First of all, filesystem lock is
+not contended, since any process blocked on it is not holding any locks.
+Thus all processes are blocked on ->i_rwsem.
+
+By (3), any process holding a non-directory lock can only be
+waiting on another non-directory lock with a larger address.  Therefore
+the process holding the "largest" such lock can always make progress, and
+non-directory objects are not included in the set of contended locks.
+
+Thus link creation can't be a part of deadlock - it can't be
+blocked on source and it means that it doesn't hold any locks.
+
+Any contended object is either held by cross-directory rename or
+has a child that is also contended.  Indeed, suppose that it is held by
+operation other than cross-directory rename.  Then the lock this operation
+is blocked on belongs to child of that object due to (1).
+
+It means that one of the operations is cross-directory rename.
+Otherwise the set of contended objects would be infinite - each of them
+would have a contended child and we had assumed that no object is its
+own descendent.  Moreover, there is exactly one cross-directory rename
+(see above).
+
+Consider the object blocking the cross-directory rename.  One
+of its descendents is locked by cross-directory rename (otherwise we
+would again have an infinite set of contended objects).  But that
+means that cross-directory rename is taking locks out of order.  Due
+to (2) the order hadn't changed since we had acquired filesystem lock.
+But locking rules for cross-directory rename guarantee that we do not
+try to acquire lock on descendent before the lock on ancestor.
+Contradiction.  I.e.  deadlock is impossible.  Q.E.D.
+
+
+These operations are guaranteed to avoid loop creation.  Indeed,
+the only operation that could introduce loops is cross-directory rename.
+Since the only new (parent, child) pair added by rename() is (new parent,
+source), such loop would have to contain these objects and the rest of it
+would have to exist before rename().  I.e. at the moment of loop creation
+rename() responsible for that would be holding filesystem lock and new parent
+would have to be equal to or a descendent of source.  But that means that
+new parent had been equal to or a descendent of source since the moment when
+we had acquired filesystem lock and rename() would fail with -ELOOP in that
+case.
+
+While this locking scheme works for arbitrary DAGs, it relies on
+ability to check that directory is a descendent of another object.  Current
+implementation assumes that directory graph is a tree.  This assumption is
+also preserved by all operations (cross-directory rename on a tree that would
+not introduce a cycle will leave it a tree and link() fails for directories).
+
+Notice that "directory" in the above == "anything that might have
+children", so if we are going to introduce hybrid objects we will need
+either to make sure that link(2) doesn't work for them or to make changes
+in is_subdir() that would make it work even in presence of such beasts.
diff --git a/Documentation/filesystems/index.rst b/Documentation/filesystems/index.rst
index 2de2fe2ab078..08320c35d03b 100644
--- a/Documentation/filesystems/index.rst
+++ b/Documentation/filesystems/index.rst
@@ -20,6 +20,8 @@ algorithms work.
    path-lookup
    api-summary
    splice
+   locking
+   directory-locking
 
 Filesystem support layers
 =========================
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
new file mode 100644
index 000000000000..fc3a0704553c
--- /dev/null
+++ b/Documentation/filesystems/locking.rst
@@ -0,0 +1,665 @@
+=======
+Locking
+=======
+
+The text below describes the locking rules for VFS-related methods.
+It is (believed to be) up-to-date. *Please*, if you change anything in
+prototypes or locking protocols - update this file. And update the relevant
+instances in the tree, don't leave that to maintainers of filesystems/devices/
+etc. At the very least, put the list of dubious cases in the end of this file.
+Don't turn it into log - maintainers of out-of-the-tree code are supposed to
+be able to use diff(1).
+
+Thing currently missing here: socket operations. Alexey?
+
+dentry_operations
+=================
+
+prototypes::
+
+	int (*d_revalidate)(struct dentry *, unsigned int);
+	int (*d_weak_revalidate)(struct dentry *, unsigned int);
+	int (*d_hash)(const struct dentry *, struct qstr *);
+	int (*d_compare)(const struct dentry *,
+			unsigned int, const char *, const struct qstr *);
+	int (*d_delete)(struct dentry *);
+	int (*d_init)(struct dentry *);
+	void (*d_release)(struct dentry *);
+	void (*d_iput)(struct dentry *, struct inode *);
+	char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
+	struct vfsmount *(*d_automount)(struct path *path);
+	int (*d_manage)(const struct path *, bool);
+	struct dentry *(*d_real)(struct dentry *, const struct inode *);
+
+locking rules:
+
+================== ===========	========	==============	========
+ops		   rename_lock	->d_lock	may block	rcu-walk
+================== ===========	========	==============	========
+d_revalidate:	   no		no		yes (ref-walk)	maybe
+d_weak_revalidate: no		no		yes	 	no
+d_hash		   no		no		no		maybe
+d_compare:	   yes		no		no		maybe
+d_delete:	   no		yes		no		no
+d_init:		   no		no		yes		no
+d_release:	   no		no		yes		no
+d_prune:           no		yes		no		no
+d_iput:		   no		no		yes		no
+d_dname:	   no		no		no		no
+d_automount:	   no		no		yes		no
+d_manage:	   no		no		yes (ref-walk)	maybe
+d_real		   no		no		yes 		no
+================== ===========	========	==============	========
+
+inode_operations
+================
+
+prototypes::
+
+	int (*create) (struct inode *,struct dentry *,umode_t, bool);
+	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
+	int (*link) (struct dentry *,struct inode *,struct dentry *);
+	int (*unlink) (struct inode *,struct dentry *);
+	int (*symlink) (struct inode *,struct dentry *,const char *);
+	int (*mkdir) (struct inode *,struct dentry *,umode_t);
+	int (*rmdir) (struct inode *,struct dentry *);
+	int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
+	int (*rename) (struct inode *, struct dentry *,
+			struct inode *, struct dentry *, unsigned int);
+	int (*readlink) (struct dentry *, char __user *,int);
+	const char *(*get_link) (struct dentry *, struct inode *, struct delayed_call *);
+	void (*truncate) (struct inode *);
+	int (*permission) (struct inode *, int, unsigned int);
+	int (*get_acl)(struct inode *, int);
+	int (*setattr) (struct dentry *, struct iattr *);
+	int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+	ssize_t (*listxattr) (struct dentry *, char *, size_t);
+	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
+	void (*update_time)(struct inode *, struct timespec *, int);
+	int (*atomic_open)(struct inode *, struct dentry *,
+				struct file *, unsigned open_flag,
+				umode_t create_mode);
+	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+
+locking rules:
+	all may block
+
+============	=============================================
+ops		i_rwsem(inode)
+============	=============================================
+lookup:		shared
+create:		exclusive
+link:		exclusive (both)
+mknod:		exclusive
+symlink:	exclusive
+mkdir:		exclusive
+unlink:		exclusive (both)
+rmdir:		exclusive (both)(see below)
+rename:		exclusive (all)	(see below)
+readlink:	no
+get_link:	no
+setattr:	exclusive
+permission:	no (may not block if called in rcu-walk mode)
+get_acl:	no
+getattr:	no
+listxattr:	no
+fiemap:		no
+update_time:	no
+atomic_open:	exclusive
+tmpfile:	no
+============	=============================================
+
+
+	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
+	exclusive on victim.
+	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
+
+See Documentation/filesystems/directory-locking.rst for more detailed discussion
+of the locking scheme for directory operations.
+
+xattr_handler operations
+========================
+
+prototypes::
+
+	bool (*list)(struct dentry *dentry);
+	int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
+		   struct inode *inode, const char *name, void *buffer,
+		   size_t size);
+	int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
+		   struct inode *inode, const char *name, const void *buffer,
+		   size_t size, int flags);
+
+locking rules:
+	all may block
+
+=====		==============
+ops		i_rwsem(inode)
+=====		==============
+list:		no
+get:		no
+set:		exclusive
+=====		==============
+
+super_operations
+================
+
+prototypes::
+
+	struct inode *(*alloc_inode)(struct super_block *sb);
+	void (*free_inode)(struct inode *);
+	void (*destroy_inode)(struct inode *);
+	void (*dirty_inode) (struct inode *, int flags);
+	int (*write_inode) (struct inode *, struct writeback_control *wbc);
+	int (*drop_inode) (struct inode *);
+	void (*evict_inode) (struct inode *);
+	void (*put_super) (struct super_block *);
+	int (*sync_fs)(struct super_block *sb, int wait);
+	int (*freeze_fs) (struct super_block *);
+	int (*unfreeze_fs) (struct super_block *);
+	int (*statfs) (struct dentry *, struct kstatfs *);
+	int (*remount_fs) (struct super_block *, int *, char *);
+	void (*umount_begin) (struct super_block *);
+	int (*show_options)(struct seq_file *, struct dentry *);
+	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+
+locking rules:
+	All may block [not true, see below]
+
+======================	============	========================
+ops			s_umount	note
+======================	============	========================
+alloc_inode:
+free_inode:				called from RCU callback
+destroy_inode:
+dirty_inode:
+write_inode:
+drop_inode:				!!!inode->i_lock!!!
+evict_inode:
+put_super:		write
+sync_fs:		read
+freeze_fs:		write
+unfreeze_fs:		write
+statfs:			maybe(read)	(see below)
+remount_fs:		write
+umount_begin:		no
+show_options:		no		(namespace_sem)
+quota_read:		no		(see below)
+quota_write:		no		(see below)
+bdev_try_to_free_page:	no		(see below)
+======================	============	========================
+
+->statfs() has s_umount (shared) when called by ustat(2) (native or
+compat), but that's an accident of bad API; s_umount is used to pin
+the superblock down when we only have dev_t given us by userland to
+identify the superblock.  Everything else (statfs(), fstatfs(), etc.)
+doesn't hold it when calling ->statfs() - superblock is pinned down
+by resolving the pathname passed to syscall.
+
+->quota_read() and ->quota_write() functions are both guaranteed to
+be the only ones operating on the quota file by the quota code (via
+dqio_sem) (unless an admin really wants to screw up something and
+writes to quota files with quotas on). For other details about locking
+see also dquot_operations section.
+
+->bdev_try_to_free_page is called from the ->releasepage handler of
+the block device inode.  See there for more details.
+
+file_system_type
+================
+
+prototypes::
+
+	struct dentry *(*mount) (struct file_system_type *, int,
+		       const char *, void *);
+	void (*kill_sb) (struct super_block *);
+
+locking rules:
+
+=======		=========
+ops		may block
+=======		=========
+mount		yes
+kill_sb		yes
+=======		=========
+
+->mount() returns ERR_PTR or the root dentry; its superblock should be locked
+on return.
+
+->kill_sb() takes a write-locked superblock, does all shutdown work on it,
+unlocks and drops the reference.
+
+address_space_operations
+========================
+prototypes::
+
+	int (*writepage)(struct page *page, struct writeback_control *wbc);
+	int (*readpage)(struct file *, struct page *);
+	int (*writepages)(struct address_space *, struct writeback_control *);
+	int (*set_page_dirty)(struct page *page);
+	int (*readpages)(struct file *filp, struct address_space *mapping,
+			struct list_head *pages, unsigned nr_pages);
+	int (*write_begin)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned flags,
+				struct page **pagep, void **fsdata);
+	int (*write_end)(struct file *, struct address_space *mapping,
+				loff_t pos, unsigned len, unsigned copied,
+				struct page *page, void *fsdata);
+	sector_t (*bmap)(struct address_space *, sector_t);
+	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+	int (*releasepage) (struct page *, int);
+	void (*freepage)(struct page *);
+	int (*direct_IO)(struct kiocb *, struct iov_iter *iter);
+	bool (*isolate_page) (struct page *, isolate_mode_t);
+	int (*migratepage)(struct address_space *, struct page *, struct page *);
+	void (*putback_page) (struct page *);
+	int (*launder_page)(struct page *);
+	int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
+	int (*error_remove_page)(struct address_space *, struct page *);
+	int (*swap_activate)(struct file *);
+	int (*swap_deactivate)(struct file *);
+
+locking rules:
+	All except set_page_dirty and freepage may block
+
+======================	======================== =========
+ops			PageLocked(page)	 i_rwsem
+======================	======================== =========
+writepage:		yes, unlocks (see below)
+readpage:		yes, unlocks
+writepages:
+set_page_dirty		no
+readpages:
+write_begin:		locks the page		 exclusive
+write_end:		yes, unlocks		 exclusive
+bmap:
+invalidatepage:		yes
+releasepage:		yes
+freepage:		yes
+direct_IO:
+isolate_page:		yes
+migratepage:		yes (both)
+putback_page:		yes
+launder_page:		yes
+is_partially_uptodate:	yes
+error_remove_page:	yes
+swap_activate:		no
+swap_deactivate:	no
+======================	======================== =========
+
+->write_begin(), ->write_end() and ->readpage() may be called from
+the request handler (/dev/loop).
+
+->readpage() unlocks the page, either synchronously or via I/O
+completion.
+
+->readpages() populates the pagecache with the passed pages and starts
+I/O against them.  They come unlocked upon I/O completion.
+
+->writepage() is used for two purposes: for "memory cleansing" and for
+"sync".  These are quite different operations and the behaviour may differ
+depending upon the mode.
+
+If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
+it *must* start I/O against the page, even if that would involve
+blocking on in-progress I/O.
+
+If writepage is called for memory cleansing (sync_mode ==
+WBC_SYNC_NONE) then its role is to get as much writeout underway as
+possible.  So writepage should try to avoid blocking against
+currently-in-progress I/O.
+
+If the filesystem is not called for "sync" and it determines that it
+would need to block against in-progress I/O to be able to start new I/O
+against the page the filesystem should redirty the page with
+redirty_page_for_writepage(), then unlock the page and return zero.
+This may also be done to avoid internal deadlocks, but rarely.
+
+If the filesystem is called for sync then it must wait on any
+in-progress I/O and then start new I/O.
+
+The filesystem should unlock the page synchronously, before returning to the
+caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
+value. WRITEPAGE_ACTIVATE means that page cannot really be written out
+currently, and VM should stop calling ->writepage() on this page for some
+time. VM does this by moving page to the head of the active list, hence the
+name.
+
+Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
+and return zero, writepage *must* run set_page_writeback() against the page,
+followed by unlocking it.  Once set_page_writeback() has been run against the
+page, write I/O can be submitted and the write I/O completion handler must run
+end_page_writeback() once the I/O is complete.  If no I/O is submitted, the
+filesystem must run end_page_writeback() against the page before returning from
+writepage.
+
+That is: after 2.5.12, pages which are under writeout are *not* locked.  Note,
+if the filesystem needs the page to be locked during writeout, that is ok, too,
+the page is allowed to be unlocked at any point in time between the calls to
+set_page_writeback() and end_page_writeback().
+
+Note, failure to run either redirty_page_for_writepage() or the combination of
+set_page_writeback()/end_page_writeback() on a page submitted to writepage
+will leave the page itself marked clean but it will be tagged as dirty in the
+radix tree.  This incoherency can lead to all sorts of hard-to-debug problems
+in the filesystem like having dirty inodes at umount and losing written data.
+
+->writepages() is used for periodic writeback and for syscall-initiated
+sync operations.  The address_space should start I/O against at least
+``*nr_to_write`` pages.  ``*nr_to_write`` must be decremented for each page
+which is written.  The address_space implementation may write more (or less)
+pages than ``*nr_to_write`` asks for, but it should try to be reasonably close.
+If nr_to_write is NULL, all dirty pages must be written.
+
+writepages should _only_ write pages which are present on
+mapping->io_pages.
+
+->set_page_dirty() is called from various places in the kernel
+when the target page is marked as needing writeback.  It may be called
+under spinlock (it cannot block) and is sometimes called with the page
+not locked.
+
+->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
+filesystems and by the swapper. The latter will eventually go away.  Please,
+keep it that way and don't breed new callers.
+
+->invalidatepage() is called when the filesystem must attempt to drop
+some or all of the buffers from the page when it is being truncated. It
+returns zero on success. If ->invalidatepage is zero, the kernel uses
+block_invalidatepage() instead.
+
+->releasepage() is called when the kernel is about to try to drop the
+buffers from the page in preparation for freeing it.  It returns zero to
+indicate that the buffers are (or may be) freeable.  If ->releasepage is zero,
+the kernel assumes that the fs has no private interest in the buffers.
+
+->freepage() is called when the kernel is done dropping the page
+from the page cache.
+
+->launder_page() may be called prior to releasing a page if
+it is still found to be dirty. It returns zero if the page was successfully
+cleaned, or an error value if not. Note that in order to prevent the page
+getting mapped back in and redirtied, it needs to be kept locked
+across the entire operation.
+
+->swap_activate will be called with a non-zero argument on
+files backing (non block device backed) swapfiles. A return value
+of zero indicates success, in which case this file can be used for
+backing swapspace. The swapspace operations will be proxied to the
+address space operations.
+
+->swap_deactivate() will be called in the sys_swapoff()
+path after ->swap_activate() returned success.
+
+file_lock_operations
+====================
+
+prototypes::
+
+	void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+	void (*fl_release_private)(struct file_lock *);
+
+
+locking rules:
+
+===================	=============	=========
+ops			inode->i_lock	may block
+===================	=============	=========
+fl_copy_lock:		yes		no
+fl_release_private:	maybe		maybe[1]_
+===================	=============	=========
+
+.. [1]:
+   ->fl_release_private for flock or POSIX locks is currently allowed
+   to block. Leases however can still be freed while the i_lock is held and
+   so fl_release_private called on a lease should not block.
+
+lock_manager_operations
+=======================
+
+prototypes::
+
+	void (*lm_notify)(struct file_lock *);  /* unblock callback */
+	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
+	void (*lm_break)(struct file_lock *); /* break_lease callback */
+	int (*lm_change)(struct file_lock **, int);
+
+locking rules:
+
+==========		=============	=================	=========
+ops			inode->i_lock	blocked_lock_lock	may block
+==========		=============	=================	=========
+lm_notify:		yes		yes			no
+lm_grant:		no		no			no
+lm_break:		yes		no			no
+lm_change		yes		no			no
+==========		=============	=================	=========
+
+buffer_head
+===========
+
+prototypes::
+
+	void (*b_end_io)(struct buffer_head *bh, int uptodate);
+
+locking rules:
+
+called from interrupts. In other words, extreme care is needed here.
+bh is locked, but that's all warranties we have here. Currently only RAID1,
+highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
+call this method upon the IO completion.
+
+block_device_operations
+=======================
+prototypes::
+
+	int (*open) (struct block_device *, fmode_t);
+	int (*release) (struct gendisk *, fmode_t);
+	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+	int (*direct_access) (struct block_device *, sector_t, void **,
+				unsigned long *);
+	int (*media_changed) (struct gendisk *);
+	void (*unlock_native_capacity) (struct gendisk *);
+	int (*revalidate_disk) (struct gendisk *);
+	int (*getgeo)(struct block_device *, struct hd_geometry *);
+	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
+
+locking rules:
+
+======================= ===================
+ops			bd_mutex
+======================= ===================
+open:			yes
+release:		yes
+ioctl:			no
+compat_ioctl:		no
+direct_access:		no
+media_changed:		no
+unlock_native_capacity:	no
+revalidate_disk:	no
+getgeo:			no
+swap_slot_free_notify:	no	(see below)
+======================= ===================
+
+media_changed, unlock_native_capacity and revalidate_disk are called only from
+check_disk_change().
+
+swap_slot_free_notify is called with swap_lock and sometimes the page lock
+held.
+
+
+file_operations
+===============
+
+prototypes::
+
+	loff_t (*llseek) (struct file *, loff_t, int);
+	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
+	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+	int (*iterate) (struct file *, struct dir_context *);
+	int (*iterate_shared) (struct file *, struct dir_context *);
+	__poll_t (*poll) (struct file *, struct poll_table_struct *);
+	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+	int (*mmap) (struct file *, struct vm_area_struct *);
+	int (*open) (struct inode *, struct file *);
+	int (*flush) (struct file *);
+	int (*release) (struct inode *, struct file *);
+	int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
+	int (*fasync) (int, struct file *, int);
+	int (*lock) (struct file *, int, struct file_lock *);
+	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
+			loff_t *);
+	ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
+			void __user *);
+	ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
+			loff_t *, int);
+	unsigned long (*get_unmapped_area)(struct file *, unsigned long,
+			unsigned long, unsigned long, unsigned long);
+	int (*check_flags)(int);
+	int (*flock) (struct file *, int, struct file_lock *);
+	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
+			size_t, unsigned int);
+	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
+			size_t, unsigned int);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
+	long (*fallocate)(struct file *, int, loff_t, loff_t);
+
+locking rules:
+	All may block.
+
+->llseek() locking has moved from llseek to the individual llseek
+implementations.  If your fs is not using generic_file_llseek, you
+need to acquire and release the appropriate locks in your ->llseek().
+For many filesystems, it is probably safe to acquire the inode
+mutex or just to use i_size_read() instead.
+Note: this does not protect the file->f_pos against concurrent modifications
+since this is something the userspace has to take care about.
+
+->iterate() is called with i_rwsem exclusive.
+
+->iterate_shared() is called with i_rwsem at least shared.
+
+->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
+Most instances call fasync_helper(), which does that maintenance, so it's
+not normally something one needs to worry about.  Return values > 0 will be
+mapped to zero in the VFS layer.
+
+->readdir() and ->ioctl() on directories must be changed. Ideally we would
+move ->readdir() to inode_operations and use a separate method for directory
+->ioctl() or kill the latter completely. One of the problems is that for
+anything that resembles union-mount we won't have a struct file for all
+components. And there are other reasons why the current interface is a mess...
+
+->read on directories probably must go away - we should just enforce -EISDIR
+in sys_read() and friends.
+
+->setlease operations should call generic_setlease() before or after setting
+the lease within the individual filesystem to record the result of the
+operation
+
+dquot_operations
+================
+
+prototypes::
+
+	int (*write_dquot) (struct dquot *);
+	int (*acquire_dquot) (struct dquot *);
+	int (*release_dquot) (struct dquot *);
+	int (*mark_dirty) (struct dquot *);
+	int (*write_info) (struct super_block *, int);
+
+These operations are intended to be more or less wrapping functions that ensure
+a proper locking wrt the filesystem and call the generic quota operations.
+
+What filesystem should expect from the generic quota functions:
+
+==============	============	=========================
+ops		FS recursion	Held locks when called
+==============	============	=========================
+write_dquot:	yes		dqonoff_sem or dqptr_sem
+acquire_dquot:	yes		dqonoff_sem or dqptr_sem
+release_dquot:	yes		dqonoff_sem or dqptr_sem
+mark_dirty:	no		-
+write_info:	yes		dqonoff_sem
+==============	============	=========================
+
+FS recursion means calling ->quota_read() and ->quota_write() from superblock
+operations.
+
+More details about quota locking can be found in fs/dquot.c.
+
+vm_operations_struct
+====================
+
+prototypes::
+
+	void (*open)(struct vm_area_struct*);
+	void (*close)(struct vm_area_struct*);
+	vm_fault_t (*fault)(struct vm_area_struct*, struct vm_fault *);
+	vm_fault_t (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+	vm_fault_t (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+	int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
+
+locking rules:
+
+=============	========	===========================
+ops		mmap_sem	PageLocked(page)
+=============	========	===========================
+open:		yes
+close:		yes
+fault:		yes		can return with page locked
+map_pages:	yes
+page_mkwrite:	yes		can return with page locked
+pfn_mkwrite:	yes
+access:		yes
+=============	========	===========================
+
+->fault() is called when a previously not present pte is about
+to be faulted in. The filesystem must find and return the page associated
+with the passed in "pgoff" in the vm_fault structure. If it is possible that
+the page may be truncated and/or invalidated, then the filesystem must lock
+the page, then ensure it is not already truncated (the page lock will block
+subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
+locked. The VM will unlock the page.
+
+->map_pages() is called when VM asks to map easy accessible pages.
+Filesystem should find and map pages associated with offsets from "start_pgoff"
+till "end_pgoff". ->map_pages() is called with page table locked and must
+not block.  If it's not possible to reach a page without blocking,
+filesystem should skip it. Filesystem should use do_set_pte() to setup
+page table entry. Pointer to entry associated with the page is passed in
+"pte" field in vm_fault structure. Pointers to entries for other offsets
+should be calculated relative to "pte".
+
+->page_mkwrite() is called when a previously read-only pte is
+about to become writeable. The filesystem again must ensure that there are
+no truncate/invalidate races, and then return with the page locked. If
+the page has been truncated, the filesystem should not look up a new page
+like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
+will cause the VM to retry the fault.
+
+->pfn_mkwrite() is the same as page_mkwrite but when the pte is
+VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
+VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
+after this call is to make the pte read-write, unless pfn_mkwrite returns
+an error.
+
+->access() is called when get_user_pages() fails in
+access_process_vm(), typically used to debug a process through
+/proc/pid/mem or ptrace.  This function is needed only for
+VM_IO | VM_PFNMAP VMAs.
+
+--------------------------------------------------------------------------------
+
+			Dubious stuff
+
+(if you break something or notice that it is broken and do not fix it yourself
+- at least put it here)
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
deleted file mode 100644
index 63889149f532..000000000000
--- a/Documentation/filesystems/nfs/Exporting
+++ /dev/null
@@ -1,160 +0,0 @@
-
-Making Filesystems Exportable
-=============================
-
-Overview
---------
-
-All filesystem operations require a dentry (or two) as a starting
-point.  Local applications have a reference-counted hold on suitable
-dentries via open file descriptors or cwd/root.  However remote
-applications that access a filesystem via a remote filesystem protocol
-such as NFS may not be able to hold such a reference, and so need a
-different way to refer to a particular dentry.  As the alternative
-form of reference needs to be stable across renames, truncates, and
-server-reboot (among other things, though these tend to be the most
-problematic), there is no simple answer like 'filename'.
-
-The mechanism discussed here allows each filesystem implementation to
-specify how to generate an opaque (outside of the filesystem) byte
-string for any dentry, and how to find an appropriate dentry for any
-given opaque byte string.
-This byte string will be called a "filehandle fragment" as it
-corresponds to part of an NFS filehandle.
-
-A filesystem which supports the mapping between filehandle fragments
-and dentries will be termed "exportable".
-
-
-
-Dcache Issues
--------------
-
-The dcache normally contains a proper prefix of any given filesystem
-tree.  This means that if any filesystem object is in the dcache, then
-all of the ancestors of that filesystem object are also in the dcache.
-As normal access is by filename this prefix is created naturally and
-maintained easily (by each object maintaining a reference count on
-its parent).
-
-However when objects are included into the dcache by interpreting a
-filehandle fragment, there is no automatic creation of a path prefix
-for the object.  This leads to two related but distinct features of
-the dcache that are not needed for normal filesystem access.
-
-1/ The dcache must sometimes contain objects that are not part of the
-   proper prefix. i.e that are not connected to the root.
-2/ The dcache must be prepared for a newly found (via ->lookup) directory
-   to already have a (non-connected) dentry, and must be able to move
-   that dentry into place (based on the parent and name in the
-   ->lookup).   This is particularly needed for directories as
-   it is a dcache invariant that directories only have one dentry.
-
-To implement these features, the dcache has:
-
-a/ A dentry flag DCACHE_DISCONNECTED which is set on
-   any dentry that might not be part of the proper prefix.
-   This is set when anonymous dentries are created, and cleared when a
-   dentry is noticed to be a child of a dentry which is in the proper
-   prefix.  If the refcount on a dentry with this flag set
-   becomes zero, the dentry is immediately discarded, rather than being
-   kept in the dcache.  If a dentry that is not already in the dcache
-   is repeatedly accessed by filehandle (as NFSD might do), an new dentry
-   will be a allocated for each access, and discarded at the end of
-   the access.
-
-   Note that such a dentry can acquire children, name, ancestors, etc.
-   without losing DCACHE_DISCONNECTED - that flag is only cleared when
-   subtree is successfully reconnected to root.  Until then dentries
-   in such subtree are retained only as long as there are references;
-   refcount reaching zero means immediate eviction, same as for unhashed
-   dentries.  That guarantees that we won't need to hunt them down upon
-   umount.
-
-b/ A primitive for creation of secondary roots - d_obtain_root(inode).
-   Those do _not_ bear DCACHE_DISCONNECTED.  They are placed on the
-   per-superblock list (->s_roots), so they can be located at umount
-   time for eviction purposes.
-
-c/ Helper routines to allocate anonymous dentries, and to help attach
-   loose directory dentries at lookup time. They are:
-    d_obtain_alias(inode) will return a dentry for the given inode.
-      If the inode already has a dentry, one of those is returned.
-      If it doesn't, a new anonymous (IS_ROOT and
-        DCACHE_DISCONNECTED) dentry is allocated and attached.
-      In the case of a directory, care is taken that only one dentry
-      can ever be attached.
-    d_splice_alias(inode, dentry) will introduce a new dentry into the tree;
-      either the passed-in dentry or a preexisting alias for the given inode
-      (such as an anonymous one created by d_obtain_alias), if appropriate.
-      It returns NULL when the passed-in dentry is used, following the calling
-      convention of ->lookup.
- 
-Filesystem Issues
------------------
-
-For a filesystem to be exportable it must:
- 
-   1/ provide the filehandle fragment routines described below.
-   2/ make sure that d_splice_alias is used rather than d_add
-      when ->lookup finds an inode for a given parent and name.
-
-      If inode is NULL, d_splice_alias(inode, dentry) is equivalent to
-
-		d_add(dentry, inode), NULL
-
-      Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
-
-      Typically the ->lookup routine will simply end with a:
-
-		return d_splice_alias(inode, dentry);
-	}
-
-
-
-  A file system implementation declares that instances of the filesystem
-are exportable by setting the s_export_op field in the struct
-super_block.  This field must point to a "struct export_operations"
-struct which has the following members:
-
- encode_fh  (optional)
-    Takes a dentry and creates a filehandle fragment which can later be used
-    to find or create a dentry for the same object.  The default
-    implementation creates a filehandle fragment that encodes a 32bit inode
-    and generation number for the inode encoded, and if necessary the
-    same information for the parent.
-
-  fh_to_dentry (mandatory)
-    Given a filehandle fragment, this should find the implied object and
-    create a dentry for it (possibly with d_obtain_alias).
-
-  fh_to_parent (optional but strongly recommended)
-    Given a filehandle fragment, this should find the parent of the
-    implied object and create a dentry for it (possibly with
-    d_obtain_alias).  May fail if the filehandle fragment is too small.
-
-  get_parent (optional but strongly recommended)
-    When given a dentry for a directory, this should return  a dentry for
-    the parent.  Quite possibly the parent dentry will have been allocated
-    by d_alloc_anon.  The default get_parent function just returns an error
-    so any filehandle lookup that requires finding a parent will fail.
-    ->lookup("..") is *not* used as a default as it can leave ".." entries
-    in the dcache which are too messy to work with.
-
-  get_name (optional)
-    When given a parent dentry and a child dentry, this should find a name
-    in the directory identified by the parent dentry, which leads to the
-    object identified by the child dentry.  If no get_name function is
-    supplied, a default implementation is provided which uses vfs_readdir
-    to find potential names, and matches inode numbers to find the correct
-    match.
-
-
-A filehandle fragment consists of an array of 1 or more 4byte words,
-together with a one byte "type".
-The decode_fh routine should not depend on the stated size that is
-passed to it.  This size may be larger than the original filehandle
-generated by encode_fh, in which case it will have been padded with
-nuls.  Rather, the encode_fh routine should choose a "type" which
-indicates the decode_fh how much of the filehandle is valid, and how
-it should be interpreted.
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
new file mode 100644
index 000000000000..33d588a01ace
--- /dev/null
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -0,0 +1,165 @@
+:orphan:
+
+Making Filesystems Exportable
+=============================
+
+Overview
+--------
+
+All filesystem operations require a dentry (or two) as a starting
+point.  Local applications have a reference-counted hold on suitable
+dentries via open file descriptors or cwd/root.  However remote
+applications that access a filesystem via a remote filesystem protocol
+such as NFS may not be able to hold such a reference, and so need a
+different way to refer to a particular dentry.  As the alternative
+form of reference needs to be stable across renames, truncates, and
+server-reboot (among other things, though these tend to be the most
+problematic), there is no simple answer like 'filename'.
+
+The mechanism discussed here allows each filesystem implementation to
+specify how to generate an opaque (outside of the filesystem) byte
+string for any dentry, and how to find an appropriate dentry for any
+given opaque byte string.
+This byte string will be called a "filehandle fragment" as it
+corresponds to part of an NFS filehandle.
+
+A filesystem which supports the mapping between filehandle fragments
+and dentries will be termed "exportable".
+
+
+
+Dcache Issues
+-------------
+
+The dcache normally contains a proper prefix of any given filesystem
+tree.  This means that if any filesystem object is in the dcache, then
+all of the ancestors of that filesystem object are also in the dcache.
+As normal access is by filename this prefix is created naturally and
+maintained easily (by each object maintaining a reference count on
+its parent).
+
+However when objects are included into the dcache by interpreting a
+filehandle fragment, there is no automatic creation of a path prefix
+for the object.  This leads to two related but distinct features of
+the dcache that are not needed for normal filesystem access.
+
+1. The dcache must sometimes contain objects that are not part of the
+   proper prefix. i.e that are not connected to the root.
+2. The dcache must be prepared for a newly found (via ->lookup) directory
+   to already have a (non-connected) dentry, and must be able to move
+   that dentry into place (based on the parent and name in the
+   ->lookup).   This is particularly needed for directories as
+   it is a dcache invariant that directories only have one dentry.
+
+To implement these features, the dcache has:
+
+a. A dentry flag DCACHE_DISCONNECTED which is set on
+   any dentry that might not be part of the proper prefix.
+   This is set when anonymous dentries are created, and cleared when a
+   dentry is noticed to be a child of a dentry which is in the proper
+   prefix.  If the refcount on a dentry with this flag set
+   becomes zero, the dentry is immediately discarded, rather than being
+   kept in the dcache.  If a dentry that is not already in the dcache
+   is repeatedly accessed by filehandle (as NFSD might do), an new dentry
+   will be a allocated for each access, and discarded at the end of
+   the access.
+
+   Note that such a dentry can acquire children, name, ancestors, etc.
+   without losing DCACHE_DISCONNECTED - that flag is only cleared when
+   subtree is successfully reconnected to root.  Until then dentries
+   in such subtree are retained only as long as there are references;
+   refcount reaching zero means immediate eviction, same as for unhashed
+   dentries.  That guarantees that we won't need to hunt them down upon
+   umount.
+
+b. A primitive for creation of secondary roots - d_obtain_root(inode).
+   Those do _not_ bear DCACHE_DISCONNECTED.  They are placed on the
+   per-superblock list (->s_roots), so they can be located at umount
+   time for eviction purposes.
+
+c. Helper routines to allocate anonymous dentries, and to help attach
+   loose directory dentries at lookup time. They are:
+
+    d_obtain_alias(inode) will return a dentry for the given inode.
+      If the inode already has a dentry, one of those is returned.
+
+      If it doesn't, a new anonymous (IS_ROOT and
+      DCACHE_DISCONNECTED) dentry is allocated and attached.
+
+      In the case of a directory, care is taken that only one dentry
+      can ever be attached.
+
+    d_splice_alias(inode, dentry) will introduce a new dentry into the tree;
+      either the passed-in dentry or a preexisting alias for the given inode
+      (such as an anonymous one created by d_obtain_alias), if appropriate.
+      It returns NULL when the passed-in dentry is used, following the calling
+      convention of ->lookup.
+
+Filesystem Issues
+-----------------
+
+For a filesystem to be exportable it must:
+
+   1. provide the filehandle fragment routines described below.
+   2. make sure that d_splice_alias is used rather than d_add
+      when ->lookup finds an inode for a given parent and name.
+
+      If inode is NULL, d_splice_alias(inode, dentry) is equivalent to::
+
+		d_add(dentry, inode), NULL
+
+      Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
+
+      Typically the ->lookup routine will simply end with a::
+
+		return d_splice_alias(inode, dentry);
+	}
+
+
+
+A file system implementation declares that instances of the filesystem
+are exportable by setting the s_export_op field in the struct
+super_block.  This field must point to a "struct export_operations"
+struct which has the following members:
+
+ encode_fh  (optional)
+    Takes a dentry and creates a filehandle fragment which can later be used
+    to find or create a dentry for the same object.  The default
+    implementation creates a filehandle fragment that encodes a 32bit inode
+    and generation number for the inode encoded, and if necessary the
+    same information for the parent.
+
+  fh_to_dentry (mandatory)
+    Given a filehandle fragment, this should find the implied object and
+    create a dentry for it (possibly with d_obtain_alias).
+
+  fh_to_parent (optional but strongly recommended)
+    Given a filehandle fragment, this should find the parent of the
+    implied object and create a dentry for it (possibly with
+    d_obtain_alias).  May fail if the filehandle fragment is too small.
+
+  get_parent (optional but strongly recommended)
+    When given a dentry for a directory, this should return  a dentry for
+    the parent.  Quite possibly the parent dentry will have been allocated
+    by d_alloc_anon.  The default get_parent function just returns an error
+    so any filehandle lookup that requires finding a parent will fail.
+    ->lookup("..") is *not* used as a default as it can leave ".." entries
+    in the dcache which are too messy to work with.
+
+  get_name (optional)
+    When given a parent dentry and a child dentry, this should find a name
+    in the directory identified by the parent dentry, which leads to the
+    object identified by the child dentry.  If no get_name function is
+    supplied, a default implementation is provided which uses vfs_readdir
+    to find potential names, and matches inode numbers to find the correct
+    match.
+
+
+A filehandle fragment consists of an array of 1 or more 4byte words,
+together with a one byte "type".
+The decode_fh routine should not depend on the stated size that is
+passed to it.  This size may be larger than the original filehandle
+generated by encode_fh, in which case it will have been padded with
+nuls.  Rather, the encode_fh routine should choose a "type" which
+indicates the decode_fh how much of the filehandle is valid, and how
+it should be interpreted.
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index 0f85ab21c2ca..7d4d09dd5e6d 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -20,7 +20,7 @@ kernel which allows different filesystem implementations to coexist.
 
 VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so on
 are called from a process context.  Filesystem locking is described in
-the document Documentation/filesystems/Locking.
+the document Documentation/filesystems/locking.rst.
 
 
 Directory Entry Cache (dcache)
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index ce8b7f677c58..eb0bb8ca8e63 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
  */
 
  /*
-  * See Documentation/filesystems/nfs/Exporting
+  * See Documentation/filesystems/nfs/exporting.rst
   * and examples in fs/exportfs
   *
   * Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index f0e549783caf..09bc68708d28 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -7,7 +7,7 @@
  * and for mapping back from file handles to dentries.
  *
  * For details on why we do all the strange and hairy things in here
- * take a look at Documentation/filesystems/nfs/Exporting.
+ * take a look at Documentation/filesystems/nfs/exporting.rst.
  */
 #include <linux/exportfs.h>
 #include <linux/fs.h>
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index 85a9093769a9..35768a63fb1d 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -10,7 +10,7 @@
  *
  * The following files are helpful:
  *
- *     Documentation/filesystems/nfs/Exporting
+ *     Documentation/filesystems/nfs/exporting.rst
  *     fs/exportfs/expfs.c.
  */
 
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 960f9a3c012d..a5612abc0936 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -555,7 +555,7 @@ static int orangefs_fsync(struct file *file,
  * Change the file pointer position for an instance of an open file.
  *
  * \note If .llseek is overriden, we must acquire lock as described in
- *       Documentation/filesystems/Locking.
+ *       Documentation/filesystems/locking.rst.
  *
  * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
  * require much changes to the FS
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 9451011ac014..10090f11ab95 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -151,7 +151,7 @@ struct dentry_operations {
 
 /*
  * Locking rules for dentry_operations callbacks are to be found in
- * Documentation/filesystems/Locking. Keep it updated!
+ * Documentation/filesystems/locking.rst. Keep it updated!
  *
  * FUrther descriptions are found in Documentation/filesystems/vfs.rst.
  * Keep it updated too!
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 0d3037419bc7..cf6571fc9c01 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -139,7 +139,7 @@ struct fid {
  * @get_parent:     find the parent of a given directory
  * @commit_metadata: commit metadata changes to stable storage
  *
- * See Documentation/filesystems/nfs/Exporting for details on how to use
+ * See Documentation/filesystems/nfs/exporting.rst for details on how to use
  * this interface correctly.
  *
  * encode_fh:
-- 
cgit v1.2.3


From 9cdd273e29f3b901712ec3c298b1d506861f48e3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Date: Wed, 31 Jul 2019 17:08:50 -0300
Subject: spi: docs: convert to ReST and add it to the kABI bookset

While there's one file there with briefily describes the uAPI,
the documentation was written just like most subsystems: focused
on kernel developers. So, add it together with driver-api books.

Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> # for iio
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/index.rst                 |   1 +
 Documentation/spi/butterfly             |  68 ----
 Documentation/spi/butterfly.rst         |  74 ++++
 Documentation/spi/index.rst             |  22 ++
 Documentation/spi/pxa2xx                | 235 ------------
 Documentation/spi/pxa2xx.rst            | 240 ++++++++++++
 Documentation/spi/spi-lm70llp           |  79 ----
 Documentation/spi/spi-lm70llp.rst       |  84 +++++
 Documentation/spi/spi-sc18is602         |  36 --
 Documentation/spi/spi-sc18is602.rst     |  39 ++
 Documentation/spi/spi-summary           | 631 -------------------------------
 Documentation/spi/spi-summary.rst       | 644 ++++++++++++++++++++++++++++++++
 Documentation/spi/spidev                | 149 --------
 Documentation/spi/spidev.rst            | 163 ++++++++
 drivers/iio/dummy/iio_simple_dummy.c    |   2 +-
 drivers/spi/Kconfig                     |   2 +-
 drivers/spi/spi-butterfly.c             |   2 +-
 drivers/spi/spi-lm70llp.c               |   2 +-
 include/linux/platform_data/sc18is602.h |   2 +-
 19 files changed, 1272 insertions(+), 1203 deletions(-)
 delete mode 100644 Documentation/spi/butterfly
 create mode 100644 Documentation/spi/butterfly.rst
 create mode 100644 Documentation/spi/index.rst
 delete mode 100644 Documentation/spi/pxa2xx
 create mode 100644 Documentation/spi/pxa2xx.rst
 delete mode 100644 Documentation/spi/spi-lm70llp
 create mode 100644 Documentation/spi/spi-lm70llp.rst
 delete mode 100644 Documentation/spi/spi-sc18is602
 create mode 100644 Documentation/spi/spi-sc18is602.rst
 delete mode 100644 Documentation/spi/spi-summary
 create mode 100644 Documentation/spi/spi-summary.rst
 delete mode 100644 Documentation/spi/spidev
 create mode 100644 Documentation/spi/spidev.rst

(limited to 'include')

diff --git a/Documentation/index.rst b/Documentation/index.rst
index 6217acab92db..472b8abe52e9 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -116,6 +116,7 @@ needed).
    power/index
    target/index
    timers/index
+   spi/index
    watchdog/index
    virtual/index
    input/index
diff --git a/Documentation/spi/butterfly b/Documentation/spi/butterfly
deleted file mode 100644
index 9927af7a629c..000000000000
--- a/Documentation/spi/butterfly
+++ /dev/null
@@ -1,68 +0,0 @@
-spi_butterfly - parport-to-butterfly adapter driver
-===================================================
-
-This is a hardware and software project that includes building and using
-a parallel port adapter cable, together with an "AVR Butterfly" to run
-firmware for user interfacing and/or sensors.  A Butterfly is a $US20
-battery powered card with an AVR microcontroller and lots of goodies:
-sensors, LCD, flash, toggle stick, and more.  You can use AVR-GCC to
-develop firmware for this, and flash it using this adapter cable.
-
-You can make this adapter from an old printer cable and solder things
-directly to the Butterfly.  Or (if you have the parts and skills) you
-can come up with something fancier, providing ciruit protection to the
-Butterfly and the printer port, or with a better power supply than two
-signal pins from the printer port.  Or for that matter, you can use
-similar cables to talk to many AVR boards, even a breadboard.
-
-This is more powerful than "ISP programming" cables since it lets kernel
-SPI protocol drivers interact with the AVR, and could even let the AVR
-issue interrupts to them.  Later, your protocol driver should work
-easily with a "real SPI controller", instead of this bitbanger.
-
-
-The first cable connections will hook Linux up to one SPI bus, with the
-AVR and a DataFlash chip; and to the AVR reset line.  This is all you
-need to reflash the firmware, and the pins are the standard Atmel "ISP"
-connector pins (used also on non-Butterfly AVR boards).  On the parport
-side this is like "sp12" programming cables.
-
-	Signal	  Butterfly	  Parport (DB-25)
-	------	  ---------	  ---------------
-	SCK	= J403.PB1/SCK	= pin 2/D0
-	RESET	= J403.nRST	= pin 3/D1
-	VCC	= J403.VCC_EXT	= pin 8/D6
-	MOSI	= J403.PB2/MOSI	= pin 9/D7
-	MISO	= J403.PB3/MISO	= pin 11/S7,nBUSY
-	GND	= J403.GND	= pin 23/GND
-
-Then to let Linux master that bus to talk to the DataFlash chip, you must
-(a) flash new firmware that disables SPI (set PRR.2, and disable pullups
-by clearing PORTB.[0-3]); (b) configure the mtd_dataflash driver; and
-(c) cable in the chipselect.
-
-	Signal	  Butterfly	  Parport (DB-25)
-	------	  ---------	  ---------------
-	VCC	= J400.VCC_EXT	= pin 7/D5
-	SELECT	= J400.PB0/nSS	= pin 17/C3,nSELECT
-	GND	= J400.GND	= pin 24/GND
-
-Or you could flash firmware making the AVR into an SPI slave (keeping the
-DataFlash in reset) and tweak the spi_butterfly driver to make it bind to
-the driver for your custom SPI-based protocol.
-
-The "USI" controller, using J405, can also be used for a second SPI bus.
-That would let you talk to the AVR using custom SPI-with-USI firmware,
-while letting either Linux or the AVR use the DataFlash.  There are plenty
-of spare parport pins to wire this one up, such as:
-
-	Signal	  Butterfly	  Parport (DB-25)
-	------	  ---------	  ---------------
-	SCK	= J403.PE4/USCK	= pin 5/D3
-	MOSI	= J403.PE5/DI	= pin 6/D4
-	MISO	= J403.PE6/DO	= pin 12/S5,nPAPEROUT
-	GND	= J403.GND	= pin 22/GND
-
-	IRQ	= J402.PF4	= pin 10/S6,ACK
-	GND	= J402.GND(P2)	= pin 25/GND
-
diff --git a/Documentation/spi/butterfly.rst b/Documentation/spi/butterfly.rst
new file mode 100644
index 000000000000..e614a589547c
--- /dev/null
+++ b/Documentation/spi/butterfly.rst
@@ -0,0 +1,74 @@
+===================================================
+spi_butterfly - parport-to-butterfly adapter driver
+===================================================
+
+This is a hardware and software project that includes building and using
+a parallel port adapter cable, together with an "AVR Butterfly" to run
+firmware for user interfacing and/or sensors.  A Butterfly is a $US20
+battery powered card with an AVR microcontroller and lots of goodies:
+sensors, LCD, flash, toggle stick, and more.  You can use AVR-GCC to
+develop firmware for this, and flash it using this adapter cable.
+
+You can make this adapter from an old printer cable and solder things
+directly to the Butterfly.  Or (if you have the parts and skills) you
+can come up with something fancier, providing ciruit protection to the
+Butterfly and the printer port, or with a better power supply than two
+signal pins from the printer port.  Or for that matter, you can use
+similar cables to talk to many AVR boards, even a breadboard.
+
+This is more powerful than "ISP programming" cables since it lets kernel
+SPI protocol drivers interact with the AVR, and could even let the AVR
+issue interrupts to them.  Later, your protocol driver should work
+easily with a "real SPI controller", instead of this bitbanger.
+
+
+The first cable connections will hook Linux up to one SPI bus, with the
+AVR and a DataFlash chip; and to the AVR reset line.  This is all you
+need to reflash the firmware, and the pins are the standard Atmel "ISP"
+connector pins (used also on non-Butterfly AVR boards).  On the parport
+side this is like "sp12" programming cables.
+
+	======	  =============	  ===================
+	Signal	  Butterfly	  Parport (DB-25)
+	======	  =============	  ===================
+	SCK	  J403.PB1/SCK	  pin 2/D0
+	RESET	  J403.nRST	  pin 3/D1
+	VCC	  J403.VCC_EXT	  pin 8/D6
+	MOSI	  J403.PB2/MOSI	  pin 9/D7
+	MISO	  J403.PB3/MISO	  pin 11/S7,nBUSY
+	GND	  J403.GND	  pin 23/GND
+	======	  =============	  ===================
+
+Then to let Linux master that bus to talk to the DataFlash chip, you must
+(a) flash new firmware that disables SPI (set PRR.2, and disable pullups
+by clearing PORTB.[0-3]); (b) configure the mtd_dataflash driver; and
+(c) cable in the chipselect.
+
+	======	  ============	  ===================
+	Signal	  Butterfly	  Parport (DB-25)
+	======	  ============	  ===================
+	VCC	  J400.VCC_EXT	  pin 7/D5
+	SELECT	  J400.PB0/nSS	  pin 17/C3,nSELECT
+	GND	  J400.GND	  pin 24/GND
+	======	  ============	  ===================
+
+Or you could flash firmware making the AVR into an SPI slave (keeping the
+DataFlash in reset) and tweak the spi_butterfly driver to make it bind to
+the driver for your custom SPI-based protocol.
+
+The "USI" controller, using J405, can also be used for a second SPI bus.
+That would let you talk to the AVR using custom SPI-with-USI firmware,
+while letting either Linux or the AVR use the DataFlash.  There are plenty
+of spare parport pins to wire this one up, such as:
+
+	======	  =============	  ===================
+	Signal	  Butterfly	  Parport (DB-25)
+	======	  =============	  ===================
+	SCK	  J403.PE4/USCK	  pin 5/D3
+	MOSI	  J403.PE5/DI	  pin 6/D4
+	MISO	  J403.PE6/DO	  pin 12/S5,nPAPEROUT
+	GND	  J403.GND	  pin 22/GND
+
+	IRQ	  J402.PF4	  pin 10/S6,ACK
+	GND	  J402.GND(P2)	  pin 25/GND
+	======	  =============	  ===================
diff --git a/Documentation/spi/index.rst b/Documentation/spi/index.rst
new file mode 100644
index 000000000000..06c34ea11bcf
--- /dev/null
+++ b/Documentation/spi/index.rst
@@ -0,0 +1,22 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================
+Serial Peripheral Interface (SPI)
+=================================
+
+.. toctree::
+   :maxdepth: 1
+
+   spi-summary
+   spidev
+   butterfly
+   pxa2xx
+   spi-lm70llp
+   spi-sc18is602
+
+.. only::  subproject and html
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
deleted file mode 100644
index 551325b66b23..000000000000
--- a/Documentation/spi/pxa2xx
+++ /dev/null
@@ -1,235 +0,0 @@
-PXA2xx SPI on SSP driver HOWTO
-===================================================
-This a mini howto on the pxa2xx_spi driver.  The driver turns a PXA2xx
-synchronous serial port into a SPI master controller
-(see Documentation/spi/spi-summary). The driver has the following features
-
-- Support for any PXA2xx SSP
-- SSP PIO and SSP DMA data transfers.
-- External and Internal (SSPFRM) chip selects.
-- Per slave device (chip) configuration.
-- Full suspend, freeze, resume support.
-
-The driver is built around a "spi_message" fifo serviced by workqueue and a
-tasklet. The workqueue, "pump_messages", drives message fifo and the tasklet
-(pump_transfer) is responsible for queuing SPI transactions and setting up and
-launching the dma/interrupt driven transfers.
-
-Declaring PXA2xx Master Controllers
------------------------------------
-Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
-"platform device".  The master configuration is passed to the driver via a table
-found in include/linux/spi/pxa2xx_spi.h:
-
-struct pxa2xx_spi_controller {
-	u16 num_chipselect;
-	u8 enable_dma;
-};
-
-The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of
-slave device (chips) attached to this SPI master.
-
-The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should
-be used.  This caused the driver to acquire two DMA channels: rx_channel and
-tx_channel.  The rx_channel has a higher DMA service priority the tx_channel.
-See the "PXA2xx Developer Manual" section "DMA Controller".
-
-NSSP MASTER SAMPLE
-------------------
-Below is a sample configuration using the PXA255 NSSP.
-
-static struct resource pxa_spi_nssp_resources[] = {
-	[0] = {
-		.start	= __PREG(SSCR0_P(2)), /* Start address of NSSP */
-		.end	= __PREG(SSCR0_P(2)) + 0x2c, /* Range of registers */
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= IRQ_NSSP, /* NSSP IRQ */
-		.end	= IRQ_NSSP,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct pxa2xx_spi_controller pxa_nssp_master_info = {
-	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
-	.enable_dma = 1, /* Enables NSSP DMA */
-};
-
-static struct platform_device pxa_spi_nssp = {
-	.name = "pxa2xx-spi", /* MUST BE THIS VALUE, so device match driver */
-	.id = 2, /* Bus number, MUST MATCH SSP number 1..n */
-	.resource = pxa_spi_nssp_resources,
-	.num_resources = ARRAY_SIZE(pxa_spi_nssp_resources),
-	.dev = {
-		.platform_data = &pxa_nssp_master_info, /* Passed to driver */
-	},
-};
-
-static struct platform_device *devices[] __initdata = {
-	&pxa_spi_nssp,
-};
-
-static void __init board_init(void)
-{
-	(void)platform_add_device(devices, ARRAY_SIZE(devices));
-}
-
-Declaring Slave Devices
------------------------
-Typically each SPI slave (chip) is defined in the arch/.../mach-*/board-*.c
-using the "spi_board_info" structure found in "linux/spi/spi.h". See
-"Documentation/spi/spi-summary" for additional information.
-
-Each slave device attached to the PXA must provide slave specific configuration
-information via the structure "pxa2xx_spi_chip" found in
-"include/linux/spi/pxa2xx_spi.h".  The pxa2xx_spi master controller driver
-will uses the configuration whenever the driver communicates with the slave
-device. All fields are optional.
-
-struct pxa2xx_spi_chip {
-	u8 tx_threshold;
-	u8 rx_threshold;
-	u8 dma_burst_size;
-	u32 timeout;
-	u8 enable_loopback;
-	void (*cs_control)(u32 command);
-};
-
-The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
-used to configure the SSP hardware fifo.  These fields are critical to the
-performance of pxa2xx_spi driver and misconfiguration will result in rx
-fifo overruns (especially in PIO mode transfers). Good default values are
-
-	.tx_threshold = 8,
-	.rx_threshold = 8,
-
-The range is 1 to 16 where zero indicates "use default".
-
-The "pxa2xx_spi_chip.dma_burst_size" field is used to configure PXA2xx DMA
-engine and is related the "spi_device.bits_per_word" field.  Read and understand
-the PXA2xx "Developer Manual" sections on the DMA controller and SSP Controllers
-to determine the correct value. An SSP configured for byte-wide transfers would
-use a value of 8. The driver will determine a reasonable default if
-dma_burst_size == 0.
-
-The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
-trailing bytes in the SSP receiver fifo.  The correct value for this field is
-dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
-slave device.  Please note that the PXA2xx SSP 1 does not support trailing byte
-timeouts and must busy-wait any trailing bytes.
-
-The "pxa2xx_spi_chip.enable_loopback" field is used to place the SSP porting
-into internal loopback mode.  In this mode the SSP controller internally
-connects the SSPTX pin to the SSPRX pin.  This is useful for initial setup
-testing.
-
-The "pxa2xx_spi_chip.cs_control" field is used to point to a board specific
-function for asserting/deasserting a slave device chip select.  If the field is
-NULL, the pxa2xx_spi master controller driver assumes that the SSP port is
-configured to use SSPFRM instead.
-
-NOTE: the SPI driver cannot control the chip select if SSPFRM is used, so the
-chipselect is dropped after each spi_transfer.  Most devices need chip select
-asserted around the complete message.  Use SSPFRM as a GPIO (through cs_control)
-to accommodate these chips.
-
-
-NSSP SLAVE SAMPLE
------------------
-The pxa2xx_spi_chip structure is passed to the pxa2xx_spi driver in the
-"spi_board_info.controller_data" field. Below is a sample configuration using
-the PXA255 NSSP.
-
-/* Chip Select control for the CS8415A SPI slave device */
-static void cs8415a_cs_control(u32 command)
-{
-	if (command & PXA2XX_CS_ASSERT)
-		GPCR(2) = GPIO_bit(2);
-	else
-		GPSR(2) = GPIO_bit(2);
-}
-
-/* Chip Select control for the CS8405A SPI slave device */
-static void cs8405a_cs_control(u32 command)
-{
-	if (command & PXA2XX_CS_ASSERT)
-		GPCR(3) = GPIO_bit(3);
-	else
-		GPSR(3) = GPIO_bit(3);
-}
-
-static struct pxa2xx_spi_chip cs8415a_chip_info = {
-	.tx_threshold = 8, /* SSP hardward FIFO threshold */
-	.rx_threshold = 8, /* SSP hardward FIFO threshold */
-	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
-	.timeout = 235, /* See Intel documentation */
-	.cs_control = cs8415a_cs_control, /* Use external chip select */
-};
-
-static struct pxa2xx_spi_chip cs8405a_chip_info = {
-	.tx_threshold = 8, /* SSP hardward FIFO threshold */
-	.rx_threshold = 8, /* SSP hardward FIFO threshold */
-	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
-	.timeout = 235, /* See Intel documentation */
-	.cs_control = cs8405a_cs_control, /* Use external chip select */
-};
-
-static struct spi_board_info streetracer_spi_board_info[] __initdata = {
-	{
-		.modalias = "cs8415a", /* Name of spi_driver for this device */
-		.max_speed_hz = 3686400, /* Run SSP as fast a possbile */
-		.bus_num = 2, /* Framework bus number */
-		.chip_select = 0, /* Framework chip select */
-		.platform_data = NULL; /* No spi_driver specific config */
-		.controller_data = &cs8415a_chip_info, /* Master chip config */
-		.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
-	},
-	{
-		.modalias = "cs8405a", /* Name of spi_driver for this device */
-		.max_speed_hz = 3686400, /* Run SSP as fast a possbile */
-		.bus_num = 2, /* Framework bus number */
-		.chip_select = 1, /* Framework chip select */
-		.controller_data = &cs8405a_chip_info, /* Master chip config */
-		.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
-	},
-};
-
-static void __init streetracer_init(void)
-{
-	spi_register_board_info(streetracer_spi_board_info,
-				ARRAY_SIZE(streetracer_spi_board_info));
-}
-
-
-DMA and PIO I/O Support
------------------------
-The pxa2xx_spi driver supports both DMA and interrupt driven PIO message
-transfers.  The driver defaults to PIO mode and DMA transfers must be enabled
-by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure.  The DMA
-mode supports both coherent and stream based DMA mappings.
-
-The following logic is used to determine the type of I/O to be used on
-a per "spi_transfer" basis:
-
-if !enable_dma then
-	always use PIO transfers
-
-if spi_message.len > 8191 then
-	print "rate limited" warning
-	use PIO transfers
-
-if spi_message.is_dma_mapped and rx_dma_buf != 0 and tx_dma_buf != 0 then
-	use coherent DMA mode
-
-if rx_buf and tx_buf are aligned on 8 byte boundary then
-	use streaming DMA mode
-
-otherwise
-	use PIO transfer
-
-THANKS TO
----------
-
-David Brownell and others for mentoring the development of this driver.
-
diff --git a/Documentation/spi/pxa2xx.rst b/Documentation/spi/pxa2xx.rst
new file mode 100644
index 000000000000..882d3cc72cc2
--- /dev/null
+++ b/Documentation/spi/pxa2xx.rst
@@ -0,0 +1,240 @@
+==============================
+PXA2xx SPI on SSP driver HOWTO
+==============================
+
+This a mini howto on the pxa2xx_spi driver.  The driver turns a PXA2xx
+synchronous serial port into a SPI master controller
+(see Documentation/spi/spi-summary.rst). The driver has the following features
+
+- Support for any PXA2xx SSP
+- SSP PIO and SSP DMA data transfers.
+- External and Internal (SSPFRM) chip selects.
+- Per slave device (chip) configuration.
+- Full suspend, freeze, resume support.
+
+The driver is built around a "spi_message" fifo serviced by workqueue and a
+tasklet. The workqueue, "pump_messages", drives message fifo and the tasklet
+(pump_transfer) is responsible for queuing SPI transactions and setting up and
+launching the dma/interrupt driven transfers.
+
+Declaring PXA2xx Master Controllers
+-----------------------------------
+Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
+"platform device".  The master configuration is passed to the driver via a table
+found in include/linux/spi/pxa2xx_spi.h::
+
+  struct pxa2xx_spi_controller {
+	u16 num_chipselect;
+	u8 enable_dma;
+  };
+
+The "pxa2xx_spi_controller.num_chipselect" field is used to determine the number of
+slave device (chips) attached to this SPI master.
+
+The "pxa2xx_spi_controller.enable_dma" field informs the driver that SSP DMA should
+be used.  This caused the driver to acquire two DMA channels: rx_channel and
+tx_channel.  The rx_channel has a higher DMA service priority the tx_channel.
+See the "PXA2xx Developer Manual" section "DMA Controller".
+
+NSSP MASTER SAMPLE
+------------------
+Below is a sample configuration using the PXA255 NSSP::
+
+  static struct resource pxa_spi_nssp_resources[] = {
+	[0] = {
+		.start	= __PREG(SSCR0_P(2)), /* Start address of NSSP */
+		.end	= __PREG(SSCR0_P(2)) + 0x2c, /* Range of registers */
+		.flags	= IORESOURCE_MEM,
+	},
+	[1] = {
+		.start	= IRQ_NSSP, /* NSSP IRQ */
+		.end	= IRQ_NSSP,
+		.flags	= IORESOURCE_IRQ,
+	},
+  };
+
+  static struct pxa2xx_spi_controller pxa_nssp_master_info = {
+	.num_chipselect = 1, /* Matches the number of chips attached to NSSP */
+	.enable_dma = 1, /* Enables NSSP DMA */
+  };
+
+  static struct platform_device pxa_spi_nssp = {
+	.name = "pxa2xx-spi", /* MUST BE THIS VALUE, so device match driver */
+	.id = 2, /* Bus number, MUST MATCH SSP number 1..n */
+	.resource = pxa_spi_nssp_resources,
+	.num_resources = ARRAY_SIZE(pxa_spi_nssp_resources),
+	.dev = {
+		.platform_data = &pxa_nssp_master_info, /* Passed to driver */
+	},
+  };
+
+  static struct platform_device *devices[] __initdata = {
+	&pxa_spi_nssp,
+  };
+
+  static void __init board_init(void)
+  {
+	(void)platform_add_device(devices, ARRAY_SIZE(devices));
+  }
+
+Declaring Slave Devices
+-----------------------
+Typically each SPI slave (chip) is defined in the arch/.../mach-*/board-*.c
+using the "spi_board_info" structure found in "linux/spi/spi.h". See
+"Documentation/spi/spi-summary.rst" for additional information.
+
+Each slave device attached to the PXA must provide slave specific configuration
+information via the structure "pxa2xx_spi_chip" found in
+"include/linux/spi/pxa2xx_spi.h".  The pxa2xx_spi master controller driver
+will uses the configuration whenever the driver communicates with the slave
+device. All fields are optional.
+
+::
+
+  struct pxa2xx_spi_chip {
+	u8 tx_threshold;
+	u8 rx_threshold;
+	u8 dma_burst_size;
+	u32 timeout;
+	u8 enable_loopback;
+	void (*cs_control)(u32 command);
+  };
+
+The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
+used to configure the SSP hardware fifo.  These fields are critical to the
+performance of pxa2xx_spi driver and misconfiguration will result in rx
+fifo overruns (especially in PIO mode transfers). Good default values are::
+
+	.tx_threshold = 8,
+	.rx_threshold = 8,
+
+The range is 1 to 16 where zero indicates "use default".
+
+The "pxa2xx_spi_chip.dma_burst_size" field is used to configure PXA2xx DMA
+engine and is related the "spi_device.bits_per_word" field.  Read and understand
+the PXA2xx "Developer Manual" sections on the DMA controller and SSP Controllers
+to determine the correct value. An SSP configured for byte-wide transfers would
+use a value of 8. The driver will determine a reasonable default if
+dma_burst_size == 0.
+
+The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
+trailing bytes in the SSP receiver fifo.  The correct value for this field is
+dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
+slave device.  Please note that the PXA2xx SSP 1 does not support trailing byte
+timeouts and must busy-wait any trailing bytes.
+
+The "pxa2xx_spi_chip.enable_loopback" field is used to place the SSP porting
+into internal loopback mode.  In this mode the SSP controller internally
+connects the SSPTX pin to the SSPRX pin.  This is useful for initial setup
+testing.
+
+The "pxa2xx_spi_chip.cs_control" field is used to point to a board specific
+function for asserting/deasserting a slave device chip select.  If the field is
+NULL, the pxa2xx_spi master controller driver assumes that the SSP port is
+configured to use SSPFRM instead.
+
+NOTE: the SPI driver cannot control the chip select if SSPFRM is used, so the
+chipselect is dropped after each spi_transfer.  Most devices need chip select
+asserted around the complete message.  Use SSPFRM as a GPIO (through cs_control)
+to accommodate these chips.
+
+
+NSSP SLAVE SAMPLE
+-----------------
+The pxa2xx_spi_chip structure is passed to the pxa2xx_spi driver in the
+"spi_board_info.controller_data" field. Below is a sample configuration using
+the PXA255 NSSP.
+
+::
+
+  /* Chip Select control for the CS8415A SPI slave device */
+  static void cs8415a_cs_control(u32 command)
+  {
+	if (command & PXA2XX_CS_ASSERT)
+		GPCR(2) = GPIO_bit(2);
+	else
+		GPSR(2) = GPIO_bit(2);
+  }
+
+  /* Chip Select control for the CS8405A SPI slave device */
+  static void cs8405a_cs_control(u32 command)
+  {
+	if (command & PXA2XX_CS_ASSERT)
+		GPCR(3) = GPIO_bit(3);
+	else
+		GPSR(3) = GPIO_bit(3);
+  }
+
+  static struct pxa2xx_spi_chip cs8415a_chip_info = {
+	.tx_threshold = 8, /* SSP hardward FIFO threshold */
+	.rx_threshold = 8, /* SSP hardward FIFO threshold */
+	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
+	.timeout = 235, /* See Intel documentation */
+	.cs_control = cs8415a_cs_control, /* Use external chip select */
+  };
+
+  static struct pxa2xx_spi_chip cs8405a_chip_info = {
+	.tx_threshold = 8, /* SSP hardward FIFO threshold */
+	.rx_threshold = 8, /* SSP hardward FIFO threshold */
+	.dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
+	.timeout = 235, /* See Intel documentation */
+	.cs_control = cs8405a_cs_control, /* Use external chip select */
+  };
+
+  static struct spi_board_info streetracer_spi_board_info[] __initdata = {
+	{
+		.modalias = "cs8415a", /* Name of spi_driver for this device */
+		.max_speed_hz = 3686400, /* Run SSP as fast a possbile */
+		.bus_num = 2, /* Framework bus number */
+		.chip_select = 0, /* Framework chip select */
+		.platform_data = NULL; /* No spi_driver specific config */
+		.controller_data = &cs8415a_chip_info, /* Master chip config */
+		.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
+	},
+	{
+		.modalias = "cs8405a", /* Name of spi_driver for this device */
+		.max_speed_hz = 3686400, /* Run SSP as fast a possbile */
+		.bus_num = 2, /* Framework bus number */
+		.chip_select = 1, /* Framework chip select */
+		.controller_data = &cs8405a_chip_info, /* Master chip config */
+		.irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
+	},
+  };
+
+  static void __init streetracer_init(void)
+  {
+	spi_register_board_info(streetracer_spi_board_info,
+				ARRAY_SIZE(streetracer_spi_board_info));
+  }
+
+
+DMA and PIO I/O Support
+-----------------------
+The pxa2xx_spi driver supports both DMA and interrupt driven PIO message
+transfers.  The driver defaults to PIO mode and DMA transfers must be enabled
+by setting the "enable_dma" flag in the "pxa2xx_spi_controller" structure.  The DMA
+mode supports both coherent and stream based DMA mappings.
+
+The following logic is used to determine the type of I/O to be used on
+a per "spi_transfer" basis::
+
+  if !enable_dma then
+	always use PIO transfers
+
+  if spi_message.len > 8191 then
+	print "rate limited" warning
+	use PIO transfers
+
+  if spi_message.is_dma_mapped and rx_dma_buf != 0 and tx_dma_buf != 0 then
+	use coherent DMA mode
+
+  if rx_buf and tx_buf are aligned on 8 byte boundary then
+	use streaming DMA mode
+
+  otherwise
+	use PIO transfer
+
+THANKS TO
+---------
+
+David Brownell and others for mentoring the development of this driver.
diff --git a/Documentation/spi/spi-lm70llp b/Documentation/spi/spi-lm70llp
deleted file mode 100644
index 463f6d01fa15..000000000000
--- a/Documentation/spi/spi-lm70llp
+++ /dev/null
@@ -1,79 +0,0 @@
-spi_lm70llp :  LM70-LLP parport-to-SPI adapter
-==============================================
-
-Supported board/chip:
-  * National Semiconductor LM70 LLP evaluation board
-    Datasheet: http://www.national.com/pf/LM/LM70.html
-
-Author:
-        Kaiwan N Billimoria <kaiwan@designergraphix.com>
-
-Description
------------
-This driver provides glue code connecting a National Semiconductor LM70 LLP
-temperature sensor evaluation board to the kernel's SPI core subsystem.
-
-This is a SPI master controller driver. It can be used in conjunction with
-(layered under) the LM70 logical driver (a "SPI protocol driver").
-In effect, this driver turns the parallel port interface on the eval board
-into a SPI bus with a single device, which will be driven by the generic
-LM70 driver (drivers/hwmon/lm70.c).
-
-
-Hardware Interfacing
---------------------
-The schematic for this particular board (the LM70EVAL-LLP) is
-available (on page 4) here:
-
-  http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
-
-The hardware interfacing on the LM70 LLP eval board is as follows:
-
-   Parallel                 LM70 LLP
-     Port      Direction   JP2 Header
-   ----------- --------- ----------------
-      D0     2      -         -
-      D1     3     -->      V+   5
-      D2     4     -->      V+   5
-      D3     5     -->      V+   5
-      D4     6     -->      V+   5
-      D5     7     -->      nCS  8
-      D6     8     -->      SCLK 3
-      D7     9     -->      SI/O 5
-     GND    25      -       GND  7
-    Select  13     <--      SI/O 1
-   ----------- --------- ----------------
-
-Note that since the LM70 uses a "3-wire" variant of SPI, the SI/SO pin
-is connected to both pin D7 (as Master Out) and Select (as Master In)
-using an arrangement that lets either the parport or the LM70 pull the
-pin low.  This can't be shared with true SPI devices, but other 3-wire
-devices might share the same SI/SO pin.
-
-The bitbanger routine in this driver (lm70_txrx) is called back from
-the bound "hwmon/lm70" protocol driver through its sysfs hook, using a
-spi_write_then_read() call.  It performs Mode 0 (SPI/Microwire) bitbanging.
-The lm70 driver then inteprets the resulting digital temperature value
-and exports it through sysfs.
-
-A "gotcha": National Semiconductor's LM70 LLP eval board circuit schematic
-shows that the SI/O line from the LM70 chip is connected to the base of a
-transistor Q1 (and also a pullup, and a zener diode to D7); while the
-collector is tied to VCC.
-
-Interpreting this circuit, when the LM70 SI/O line is High (or tristate
-and not grounded by the host via D7), the transistor conducts and switches
-the collector to zero, which is reflected on pin 13 of the DB25 parport
-connector.  When SI/O is Low (driven by the LM70 or the host) on the other
-hand, the transistor is cut off and the voltage tied to it's collector is
-reflected on pin 13 as a High level.
-
-So: the getmiso inline routine in this driver takes this fact into account,
-inverting the value read at pin 13.
-
-
-Thanks to
----------
-o David Brownell for mentoring the SPI-side driver development.
-o Dr.Craig Hollabaugh for the (early) "manual" bitbanging driver version.
-o Nadir Billimoria for help interpreting the circuit schematic.
diff --git a/Documentation/spi/spi-lm70llp.rst b/Documentation/spi/spi-lm70llp.rst
new file mode 100644
index 000000000000..07631aef4343
--- /dev/null
+++ b/Documentation/spi/spi-lm70llp.rst
@@ -0,0 +1,84 @@
+==============================================
+spi_lm70llp :  LM70-LLP parport-to-SPI adapter
+==============================================
+
+Supported board/chip:
+
+  * National Semiconductor LM70 LLP evaluation board
+
+    Datasheet: http://www.national.com/pf/LM/LM70.html
+
+Author:
+        Kaiwan N Billimoria <kaiwan@designergraphix.com>
+
+Description
+-----------
+This driver provides glue code connecting a National Semiconductor LM70 LLP
+temperature sensor evaluation board to the kernel's SPI core subsystem.
+
+This is a SPI master controller driver. It can be used in conjunction with
+(layered under) the LM70 logical driver (a "SPI protocol driver").
+In effect, this driver turns the parallel port interface on the eval board
+into a SPI bus with a single device, which will be driven by the generic
+LM70 driver (drivers/hwmon/lm70.c).
+
+
+Hardware Interfacing
+--------------------
+The schematic for this particular board (the LM70EVAL-LLP) is
+available (on page 4) here:
+
+  http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
+
+The hardware interfacing on the LM70 LLP eval board is as follows:
+
+   ======== == =========   ==========
+   Parallel                 LM70 LLP
+     Port   .  Direction   JP2 Header
+   ======== == =========   ==========
+      D0     2      -         -
+      D1     3     -->      V+   5
+      D2     4     -->      V+   5
+      D3     5     -->      V+   5
+      D4     6     -->      V+   5
+      D5     7     -->      nCS  8
+      D6     8     -->      SCLK 3
+      D7     9     -->      SI/O 5
+     GND    25      -       GND  7
+    Select  13     <--      SI/O 1
+   ======== == =========   ==========
+
+Note that since the LM70 uses a "3-wire" variant of SPI, the SI/SO pin
+is connected to both pin D7 (as Master Out) and Select (as Master In)
+using an arrangement that lets either the parport or the LM70 pull the
+pin low.  This can't be shared with true SPI devices, but other 3-wire
+devices might share the same SI/SO pin.
+
+The bitbanger routine in this driver (lm70_txrx) is called back from
+the bound "hwmon/lm70" protocol driver through its sysfs hook, using a
+spi_write_then_read() call.  It performs Mode 0 (SPI/Microwire) bitbanging.
+The lm70 driver then inteprets the resulting digital temperature value
+and exports it through sysfs.
+
+A "gotcha": National Semiconductor's LM70 LLP eval board circuit schematic
+shows that the SI/O line from the LM70 chip is connected to the base of a
+transistor Q1 (and also a pullup, and a zener diode to D7); while the
+collector is tied to VCC.
+
+Interpreting this circuit, when the LM70 SI/O line is High (or tristate
+and not grounded by the host via D7), the transistor conducts and switches
+the collector to zero, which is reflected on pin 13 of the DB25 parport
+connector.  When SI/O is Low (driven by the LM70 or the host) on the other
+hand, the transistor is cut off and the voltage tied to it's collector is
+reflected on pin 13 as a High level.
+
+So: the getmiso inline routine in this driver takes this fact into account,
+inverting the value read at pin 13.
+
+
+Thanks to
+---------
+
+- David Brownell for mentoring the SPI-side driver development.
+- Dr.Craig Hollabaugh for the (early) "manual" bitbanging driver version.
+- Nadir Billimoria for help interpreting the circuit schematic.
diff --git a/Documentation/spi/spi-sc18is602 b/Documentation/spi/spi-sc18is602
deleted file mode 100644
index 0feffd5af411..000000000000
--- a/Documentation/spi/spi-sc18is602
+++ /dev/null
@@ -1,36 +0,0 @@
-Kernel driver spi-sc18is602
-===========================
-
-Supported chips:
-  * NXP SI18IS602/602B/603
-    Datasheet: http://www.nxp.com/documents/data_sheet/SC18IS602_602B_603.pdf
-
-Author:
-        Guenter Roeck <linux@roeck-us.net>
-
-
-Description
------------
-
-This driver provides connects a NXP SC18IS602/603 I2C-bus to SPI bridge to the
-kernel's SPI core subsystem.
-
-The driver does not probe for supported chips, since the SI18IS602/603 does not
-support Chip ID registers. You will have to instantiate the devices explicitly.
-Please see Documentation/i2c/instantiating-devices.rst for details.
-
-
-Usage Notes
------------
-
-This driver requires the I2C adapter driver to support raw I2C messages. I2C
-adapter drivers which can only handle the SMBus protocol are not supported.
-
-The maximum SPI message size supported by SC18IS602/603 is 200 bytes. Attempts
-to initiate longer transfers will fail with -EINVAL. EEPROM read operations and
-similar large accesses have to be split into multiple chunks of no more than
-200 bytes per SPI message (128 bytes of data per message is recommended). This
-means that programs such as "cp" or "od", which automatically use large block
-sizes to access a device, can not be used directly to read data from EEPROM.
-Programs such as dd, where the block size can be specified, should be used
-instead.
diff --git a/Documentation/spi/spi-sc18is602.rst b/Documentation/spi/spi-sc18is602.rst
new file mode 100644
index 000000000000..2a31dc722321
--- /dev/null
+++ b/Documentation/spi/spi-sc18is602.rst
@@ -0,0 +1,39 @@
+===========================
+Kernel driver spi-sc18is602
+===========================
+
+Supported chips:
+
+  * NXP SI18IS602/602B/603
+
+    Datasheet: http://www.nxp.com/documents/data_sheet/SC18IS602_602B_603.pdf
+
+Author:
+        Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver provides connects a NXP SC18IS602/603 I2C-bus to SPI bridge to the
+kernel's SPI core subsystem.
+
+The driver does not probe for supported chips, since the SI18IS602/603 does not
+support Chip ID registers. You will have to instantiate the devices explicitly.
+Please see Documentation/i2c/instantiating-devices.rst for details.
+
+
+Usage Notes
+-----------
+
+This driver requires the I2C adapter driver to support raw I2C messages. I2C
+adapter drivers which can only handle the SMBus protocol are not supported.
+
+The maximum SPI message size supported by SC18IS602/603 is 200 bytes. Attempts
+to initiate longer transfers will fail with -EINVAL. EEPROM read operations and
+similar large accesses have to be split into multiple chunks of no more than
+200 bytes per SPI message (128 bytes of data per message is recommended). This
+means that programs such as "cp" or "od", which automatically use large block
+sizes to access a device, can not be used directly to read data from EEPROM.
+Programs such as dd, where the block size can be specified, should be used
+instead.
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
deleted file mode 100644
index 1a63194b74d7..000000000000
--- a/Documentation/spi/spi-summary
+++ /dev/null
@@ -1,631 +0,0 @@
-Overview of Linux kernel SPI support
-====================================
-
-02-Feb-2012
-
-What is SPI?
-------------
-The "Serial Peripheral Interface" (SPI) is a synchronous four wire serial
-link used to connect microcontrollers to sensors, memory, and peripherals.
-It's a simple "de facto" standard, not complicated enough to acquire a
-standardization body.  SPI uses a master/slave configuration.
-
-The three signal wires hold a clock (SCK, often on the order of 10 MHz),
-and parallel data lines with "Master Out, Slave In" (MOSI) or "Master In,
-Slave Out" (MISO) signals.  (Other names are also used.)  There are four
-clocking modes through which data is exchanged; mode-0 and mode-3 are most
-commonly used.  Each clock cycle shifts data out and data in; the clock
-doesn't cycle except when there is a data bit to shift.  Not all data bits
-are used though; not every protocol uses those full duplex capabilities.
-
-SPI masters use a fourth "chip select" line to activate a given SPI slave
-device, so those three signal wires may be connected to several chips
-in parallel.  All SPI slaves support chipselects; they are usually active
-low signals, labeled nCSx for slave 'x' (e.g. nCS0).  Some devices have
-other signals, often including an interrupt to the master.
-
-Unlike serial busses like USB or SMBus, even low level protocols for
-SPI slave functions are usually not interoperable between vendors
-(except for commodities like SPI memory chips).
-
-  - SPI may be used for request/response style device protocols, as with
-    touchscreen sensors and memory chips.
-
-  - It may also be used to stream data in either direction (half duplex),
-    or both of them at the same time (full duplex).
-
-  - Some devices may use eight bit words.  Others may use different word
-    lengths, such as streams of 12-bit or 20-bit digital samples.
-
-  - Words are usually sent with their most significant bit (MSB) first,
-    but sometimes the least significant bit (LSB) goes first instead.
-
-  - Sometimes SPI is used to daisy-chain devices, like shift registers.
-
-In the same way, SPI slaves will only rarely support any kind of automatic
-discovery/enumeration protocol.  The tree of slave devices accessible from
-a given SPI master will normally be set up manually, with configuration
-tables.
-
-SPI is only one of the names used by such four-wire protocols, and
-most controllers have no problem handling "MicroWire" (think of it as
-half-duplex SPI, for request/response protocols), SSP ("Synchronous
-Serial Protocol"), PSP ("Programmable Serial Protocol"), and other
-related protocols.
-
-Some chips eliminate a signal line by combining MOSI and MISO, and
-limiting themselves to half-duplex at the hardware level.  In fact
-some SPI chips have this signal mode as a strapping option.  These
-can be accessed using the same programming interface as SPI, but of
-course they won't handle full duplex transfers.  You may find such
-chips described as using "three wire" signaling: SCK, data, nCSx.
-(That data line is sometimes called MOMI or SISO.)
-
-Microcontrollers often support both master and slave sides of the SPI
-protocol.  This document (and Linux) supports both the master and slave
-sides of SPI interactions.
-
-
-Who uses it?  On what kinds of systems?
----------------------------------------
-Linux developers using SPI are probably writing device drivers for embedded
-systems boards.  SPI is used to control external chips, and it is also a
-protocol supported by every MMC or SD memory card.  (The older "DataFlash"
-cards, predating MMC cards but using the same connectors and card shape,
-support only SPI.)  Some PC hardware uses SPI flash for BIOS code.
-
-SPI slave chips range from digital/analog converters used for analog
-sensors and codecs, to memory, to peripherals like USB controllers
-or Ethernet adapters; and more.
-
-Most systems using SPI will integrate a few devices on a mainboard.
-Some provide SPI links on expansion connectors; in cases where no
-dedicated SPI controller exists, GPIO pins can be used to create a
-low speed "bitbanging" adapter.  Very few systems will "hotplug" an SPI
-controller; the reasons to use SPI focus on low cost and simple operation,
-and if dynamic reconfiguration is important, USB will often be a more
-appropriate low-pincount peripheral bus.
-
-Many microcontrollers that can run Linux integrate one or more I/O
-interfaces with SPI modes.  Given SPI support, they could use MMC or SD
-cards without needing a special purpose MMC/SD/SDIO controller.
-
-
-I'm confused.  What are these four SPI "clock modes"?
------------------------------------------------------
-It's easy to be confused here, and the vendor documentation you'll
-find isn't necessarily helpful.  The four modes combine two mode bits:
-
- - CPOL indicates the initial clock polarity.  CPOL=0 means the
-   clock starts low, so the first (leading) edge is rising, and
-   the second (trailing) edge is falling.  CPOL=1 means the clock
-   starts high, so the first (leading) edge is falling.
-
- - CPHA indicates the clock phase used to sample data; CPHA=0 says
-   sample on the leading edge, CPHA=1 means the trailing edge.
-
-   Since the signal needs to stablize before it's sampled, CPHA=0
-   implies that its data is written half a clock before the first
-   clock edge.  The chipselect may have made it become available.
-
-Chip specs won't always say "uses SPI mode X" in as many words,
-but their timing diagrams will make the CPOL and CPHA modes clear.
-
-In the SPI mode number, CPOL is the high order bit and CPHA is the
-low order bit.  So when a chip's timing diagram shows the clock
-starting low (CPOL=0) and data stabilized for sampling during the
-trailing clock edge (CPHA=1), that's SPI mode 1.
-
-Note that the clock mode is relevant as soon as the chipselect goes
-active.  So the master must set the clock to inactive before selecting
-a slave, and the slave can tell the chosen polarity by sampling the
-clock level when its select line goes active.  That's why many devices
-support for example both modes 0 and 3:  they don't care about polarity,
-and always clock data in/out on rising clock edges.
-
-
-How do these driver programming interfaces work?
-------------------------------------------------
-The <linux/spi/spi.h> header file includes kerneldoc, as does the
-main source code, and you should certainly read that chapter of the
-kernel API document.  This is just an overview, so you get the big
-picture before those details.
-
-SPI requests always go into I/O queues.  Requests for a given SPI device
-are always executed in FIFO order, and complete asynchronously through
-completion callbacks.  There are also some simple synchronous wrappers
-for those calls, including ones for common transaction types like writing
-a command and then reading its response.
-
-There are two types of SPI driver, here called:
-
-  Controller drivers ... controllers may be built into System-On-Chip
-	processors, and often support both Master and Slave roles.
-	These drivers touch hardware registers and may use DMA.
-	Or they can be PIO bitbangers, needing just GPIO pins.
-
-  Protocol drivers ... these pass messages through the controller
-	driver to communicate with a Slave or Master device on the
-	other side of an SPI link.
-
-So for example one protocol driver might talk to the MTD layer to export
-data to filesystems stored on SPI flash like DataFlash; and others might
-control audio interfaces, present touchscreen sensors as input interfaces,
-or monitor temperature and voltage levels during industrial processing.
-And those might all be sharing the same controller driver.
-
-A "struct spi_device" encapsulates the controller-side interface between
-those two types of drivers.
-
-There is a minimal core of SPI programming interfaces, focussing on
-using the driver model to connect controller and protocol drivers using
-device tables provided by board specific initialization code.  SPI
-shows up in sysfs in several locations:
-
-   /sys/devices/.../CTLR ... physical node for a given SPI controller
-
-   /sys/devices/.../CTLR/spiB.C ... spi_device on bus "B",
-	chipselect C, accessed through CTLR.
-
-   /sys/bus/spi/devices/spiB.C ... symlink to that physical
-   	.../CTLR/spiB.C device
-
-   /sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver
-	that should be used with this device (for hotplug/coldplug)
-
-   /sys/bus/spi/drivers/D ... driver for one or more spi*.* devices
-
-   /sys/class/spi_master/spiB ... symlink (or actual device node) to
-	a logical node which could hold class related state for the SPI
-	master controller managing bus "B".  All spiB.* devices share one
-	physical SPI bus segment, with SCLK, MOSI, and MISO.
-
-   /sys/devices/.../CTLR/slave ... virtual file for (un)registering the
-	slave device for an SPI slave controller.
-	Writing the driver name of an SPI slave handler to this file
-	registers the slave device; writing "(null)" unregisters the slave
-	device.
-	Reading from this file shows the name of the slave device ("(null)"
-	if not registered).
-
-   /sys/class/spi_slave/spiB ... symlink (or actual device node) to
-	a logical node which could hold class related state for the SPI
-	slave controller on bus "B".  When registered, a single spiB.*
-	device is present here, possible sharing the physical SPI bus
-	segment with other SPI slave devices.
-
-Note that the actual location of the controller's class state depends
-on whether you enabled CONFIG_SYSFS_DEPRECATED or not.  At this time,
-the only class-specific state is the bus number ("B" in "spiB"), so
-those /sys/class entries are only useful to quickly identify busses.
-
-
-How does board-specific init code declare SPI devices?
-------------------------------------------------------
-Linux needs several kinds of information to properly configure SPI devices.
-That information is normally provided by board-specific code, even for
-chips that do support some of automated discovery/enumeration.
-
-DECLARE CONTROLLERS
-
-The first kind of information is a list of what SPI controllers exist.
-For System-on-Chip (SOC) based boards, these will usually be platform
-devices, and the controller may need some platform_data in order to
-operate properly.  The "struct platform_device" will include resources
-like the physical address of the controller's first register and its IRQ.
-
-Platforms will often abstract the "register SPI controller" operation,
-maybe coupling it with code to initialize pin configurations, so that
-the arch/.../mach-*/board-*.c files for several boards can all share the
-same basic controller setup code.  This is because most SOCs have several
-SPI-capable controllers, and only the ones actually usable on a given
-board should normally be set up and registered.
-
-So for example arch/.../mach-*/board-*.c files might have code like:
-
-	#include <mach/spi.h>	/* for mysoc_spi_data */
-
-	/* if your mach-* infrastructure doesn't support kernels that can
-	 * run on multiple boards, pdata wouldn't benefit from "__init".
-	 */
-	static struct mysoc_spi_data pdata __initdata = { ... };
-
-	static __init board_init(void)
-	{
-		...
-		/* this board only uses SPI controller #2 */
-		mysoc_register_spi(2, &pdata);
-		...
-	}
-
-And SOC-specific utility code might look something like:
-
-	#include <mach/spi.h>
-
-	static struct platform_device spi2 = { ... };
-
-	void mysoc_register_spi(unsigned n, struct mysoc_spi_data *pdata)
-	{
-		struct mysoc_spi_data *pdata2;
-
-		pdata2 = kmalloc(sizeof *pdata2, GFP_KERNEL);
-		*pdata2 = pdata;
-		...
-		if (n == 2) {
-			spi2->dev.platform_data = pdata2;
-			register_platform_device(&spi2);
-
-			/* also: set up pin modes so the spi2 signals are
-			 * visible on the relevant pins ... bootloaders on
-			 * production boards may already have done this, but
-			 * developer boards will often need Linux to do it.
-			 */
-		}
-		...
-	}
-
-Notice how the platform_data for boards may be different, even if the
-same SOC controller is used.  For example, on one board SPI might use
-an external clock, where another derives the SPI clock from current
-settings of some master clock.
-
-
-DECLARE SLAVE DEVICES
-
-The second kind of information is a list of what SPI slave devices exist
-on the target board, often with some board-specific data needed for the
-driver to work correctly.
-
-Normally your arch/.../mach-*/board-*.c files would provide a small table
-listing the SPI devices on each board.  (This would typically be only a
-small handful.)  That might look like:
-
-	static struct ads7846_platform_data ads_info = {
-		.vref_delay_usecs	= 100,
-		.x_plate_ohms		= 580,
-		.y_plate_ohms		= 410,
-	};
-
-	static struct spi_board_info spi_board_info[] __initdata = {
-	{
-		.modalias	= "ads7846",
-		.platform_data	= &ads_info,
-		.mode		= SPI_MODE_0,
-		.irq		= GPIO_IRQ(31),
-		.max_speed_hz	= 120000 /* max sample rate at 3V */ * 16,
-		.bus_num	= 1,
-		.chip_select	= 0,
-	},
-	};
-
-Again, notice how board-specific information is provided; each chip may need
-several types.  This example shows generic constraints like the fastest SPI
-clock to allow (a function of board voltage in this case) or how an IRQ pin
-is wired, plus chip-specific constraints like an important delay that's
-changed by the capacitance at one pin.
-
-(There's also "controller_data", information that may be useful to the
-controller driver.  An example would be peripheral-specific DMA tuning
-data or chipselect callbacks.  This is stored in spi_device later.)
-
-The board_info should provide enough information to let the system work
-without the chip's driver being loaded.  The most troublesome aspect of
-that is likely the SPI_CS_HIGH bit in the spi_device.mode field, since
-sharing a bus with a device that interprets chipselect "backwards" is
-not possible until the infrastructure knows how to deselect it.
-
-Then your board initialization code would register that table with the SPI
-infrastructure, so that it's available later when the SPI master controller
-driver is registered:
-
-	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
-
-Like with other static board-specific setup, you won't unregister those.
-
-The widely used "card" style computers bundle memory, cpu, and little else
-onto a card that's maybe just thirty square centimeters.  On such systems,
-your arch/.../mach-.../board-*.c file would primarily provide information
-about the devices on the mainboard into which such a card is plugged.  That
-certainly includes SPI devices hooked up through the card connectors!
-
-
-NON-STATIC CONFIGURATIONS
-
-Developer boards often play by different rules than product boards, and one
-example is the potential need to hotplug SPI devices and/or controllers.
-
-For those cases you might need to use spi_busnum_to_master() to look
-up the spi bus master, and will likely need spi_new_device() to provide the
-board info based on the board that was hotplugged.  Of course, you'd later
-call at least spi_unregister_device() when that board is removed.
-
-When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those
-configurations will also be dynamic.  Fortunately, such devices all support
-basic device identification probes, so they should hotplug normally.
-
-
-How do I write an "SPI Protocol Driver"?
-----------------------------------------
-Most SPI drivers are currently kernel drivers, but there's also support
-for userspace drivers.  Here we talk only about kernel drivers.
-
-SPI protocol drivers somewhat resemble platform device drivers:
-
-	static struct spi_driver CHIP_driver = {
-		.driver = {
-			.name		= "CHIP",
-			.owner		= THIS_MODULE,
-			.pm		= &CHIP_pm_ops,
-		},
-
-		.probe		= CHIP_probe,
-		.remove		= CHIP_remove,
-	};
-
-The driver core will automatically attempt to bind this driver to any SPI
-device whose board_info gave a modalias of "CHIP".  Your probe() code
-might look like this unless you're creating a device which is managing
-a bus (appearing under /sys/class/spi_master).
-
-	static int CHIP_probe(struct spi_device *spi)
-	{
-		struct CHIP			*chip;
-		struct CHIP_platform_data	*pdata;
-
-		/* assuming the driver requires board-specific data: */
-		pdata = &spi->dev.platform_data;
-		if (!pdata)
-			return -ENODEV;
-
-		/* get memory for driver's per-chip state */
-		chip = kzalloc(sizeof *chip, GFP_KERNEL);
-		if (!chip)
-			return -ENOMEM;
-		spi_set_drvdata(spi, chip);
-
-		... etc
-		return 0;
-	}
-
-As soon as it enters probe(), the driver may issue I/O requests to
-the SPI device using "struct spi_message".  When remove() returns,
-or after probe() fails, the driver guarantees that it won't submit
-any more such messages.
-
-  - An spi_message is a sequence of protocol operations, executed
-    as one atomic sequence.  SPI driver controls include:
-
-      + when bidirectional reads and writes start ... by how its
-        sequence of spi_transfer requests is arranged;
-
-      + which I/O buffers are used ... each spi_transfer wraps a
-        buffer for each transfer direction, supporting full duplex
-        (two pointers, maybe the same one in both cases) and half
-        duplex (one pointer is NULL) transfers;
-
-      + optionally defining short delays after transfers ... using
-        the spi_transfer.delay_usecs setting (this delay can be the
-        only protocol effect, if the buffer length is zero);
-
-      + whether the chipselect becomes inactive after a transfer and
-        any delay ... by using the spi_transfer.cs_change flag;
-
-      + hinting whether the next message is likely to go to this same
-        device ... using the spi_transfer.cs_change flag on the last
-	transfer in that atomic group, and potentially saving costs
-	for chip deselect and select operations.
-
-  - Follow standard kernel rules, and provide DMA-safe buffers in
-    your messages.  That way controller drivers using DMA aren't forced
-    to make extra copies unless the hardware requires it (e.g. working
-    around hardware errata that force the use of bounce buffering).
-
-    If standard dma_map_single() handling of these buffers is inappropriate,
-    you can use spi_message.is_dma_mapped to tell the controller driver
-    that you've already provided the relevant DMA addresses.
-
-  - The basic I/O primitive is spi_async().  Async requests may be
-    issued in any context (irq handler, task, etc) and completion
-    is reported using a callback provided with the message.
-    After any detected error, the chip is deselected and processing
-    of that spi_message is aborted.
-
-  - There are also synchronous wrappers like spi_sync(), and wrappers
-    like spi_read(), spi_write(), and spi_write_then_read().  These
-    may be issued only in contexts that may sleep, and they're all
-    clean (and small, and "optional") layers over spi_async().
-
-  - The spi_write_then_read() call, and convenience wrappers around
-    it, should only be used with small amounts of data where the
-    cost of an extra copy may be ignored.  It's designed to support
-    common RPC-style requests, such as writing an eight bit command
-    and reading a sixteen bit response -- spi_w8r16() being one its
-    wrappers, doing exactly that.
-
-Some drivers may need to modify spi_device characteristics like the
-transfer mode, wordsize, or clock rate.  This is done with spi_setup(),
-which would normally be called from probe() before the first I/O is
-done to the device.  However, that can also be called at any time
-that no message is pending for that device.
-
-While "spi_device" would be the bottom boundary of the driver, the
-upper boundaries might include sysfs (especially for sensor readings),
-the input layer, ALSA, networking, MTD, the character device framework,
-or other Linux subsystems.
-
-Note that there are two types of memory your driver must manage as part
-of interacting with SPI devices.
-
-  - I/O buffers use the usual Linux rules, and must be DMA-safe.
-    You'd normally allocate them from the heap or free page pool.
-    Don't use the stack, or anything that's declared "static".
-
-  - The spi_message and spi_transfer metadata used to glue those
-    I/O buffers into a group of protocol transactions.  These can
-    be allocated anywhere it's convenient, including as part of
-    other allocate-once driver data structures.  Zero-init these.
-
-If you like, spi_message_alloc() and spi_message_free() convenience
-routines are available to allocate and zero-initialize an spi_message
-with several transfers.
-
-
-How do I write an "SPI Master Controller Driver"?
--------------------------------------------------
-An SPI controller will probably be registered on the platform_bus; write
-a driver to bind to the device, whichever bus is involved.
-
-The main task of this type of driver is to provide an "spi_master".
-Use spi_alloc_master() to allocate the master, and spi_master_get_devdata()
-to get the driver-private data allocated for that device.
-
-	struct spi_master	*master;
-	struct CONTROLLER	*c;
-
-	master = spi_alloc_master(dev, sizeof *c);
-	if (!master)
-		return -ENODEV;
-
-	c = spi_master_get_devdata(master);
-
-The driver will initialize the fields of that spi_master, including the
-bus number (maybe the same as the platform device ID) and three methods
-used to interact with the SPI core and SPI protocol drivers.  It will
-also initialize its own internal state.  (See below about bus numbering
-and those methods.)
-
-After you initialize the spi_master, then use spi_register_master() to
-publish it to the rest of the system. At that time, device nodes for the
-controller and any predeclared spi devices will be made available, and
-the driver model core will take care of binding them to drivers.
-
-If you need to remove your SPI controller driver, spi_unregister_master()
-will reverse the effect of spi_register_master().
-
-
-BUS NUMBERING
-
-Bus numbering is important, since that's how Linux identifies a given
-SPI bus (shared SCK, MOSI, MISO).  Valid bus numbers start at zero.  On
-SOC systems, the bus numbers should match the numbers defined by the chip
-manufacturer.  For example, hardware controller SPI2 would be bus number 2,
-and spi_board_info for devices connected to it would use that number.
-
-If you don't have such hardware-assigned bus number, and for some reason
-you can't just assign them, then provide a negative bus number.  That will
-then be replaced by a dynamically assigned number. You'd then need to treat
-this as a non-static configuration (see above).
-
-
-SPI MASTER METHODS
-
-    master->setup(struct spi_device *spi)
-	This sets up the device clock rate, SPI mode, and word sizes.
-	Drivers may change the defaults provided by board_info, and then
-	call spi_setup(spi) to invoke this routine.  It may sleep.
-
-	Unless each SPI slave has its own configuration registers, don't
-	change them right away ... otherwise drivers could corrupt I/O
-	that's in progress for other SPI devices.
-
-		** BUG ALERT:  for some reason the first version of
-		** many spi_master drivers seems to get this wrong.
-		** When you code setup(), ASSUME that the controller
-		** is actively processing transfers for another device.
-
-    master->cleanup(struct spi_device *spi)
-	Your controller driver may use spi_device.controller_state to hold
-	state it dynamically associates with that device.  If you do that,
-	be sure to provide the cleanup() method to free that state.
-
-    master->prepare_transfer_hardware(struct spi_master *master)
-	This will be called by the queue mechanism to signal to the driver
-	that a message is coming in soon, so the subsystem requests the
-	driver to prepare the transfer hardware by issuing this call.
-	This may sleep.
-
-    master->unprepare_transfer_hardware(struct spi_master *master)
-	This will be called by the queue mechanism to signal to the driver
-	that there are no more messages pending in the queue and it may
-	relax the hardware (e.g. by power management calls). This may sleep.
-
-    master->transfer_one_message(struct spi_master *master,
-				 struct spi_message *mesg)
-	The subsystem calls the driver to transfer a single message while
-	queuing transfers that arrive in the meantime. When the driver is
-	finished with this message, it must call
-	spi_finalize_current_message() so the subsystem can issue the next
-	message. This may sleep.
-
-    master->transfer_one(struct spi_master *master, struct spi_device *spi,
-			 struct spi_transfer *transfer)
-	The subsystem calls the driver to transfer a single transfer while
-	queuing transfers that arrive in the meantime. When the driver is
-	finished with this transfer, it must call
-	spi_finalize_current_transfer() so the subsystem can issue the next
-	transfer. This may sleep. Note: transfer_one and transfer_one_message
-	are mutually exclusive; when both are set, the generic subsystem does
-	not call your transfer_one callback.
-
-	Return values:
-	negative errno: error
-	0: transfer is finished
-	1: transfer is still in progress
-
-    master->set_cs_timing(struct spi_device *spi, u8 setup_clk_cycles,
-			      u8 hold_clk_cycles, u8 inactive_clk_cycles)
-	This method allows SPI client drivers to request SPI master controller
-	for configuring device specific CS setup, hold and inactive timing
-	requirements.
-
-    DEPRECATED METHODS
-
-    master->transfer(struct spi_device *spi, struct spi_message *message)
-	This must not sleep. Its responsibility is to arrange that the
-	transfer happens and its complete() callback is issued. The two
-	will normally happen later, after other transfers complete, and
-	if the controller is idle it will need to be kickstarted. This
-	method is not used on queued controllers and must be NULL if
-	transfer_one_message() and (un)prepare_transfer_hardware() are
-	implemented.
-
-
-SPI MESSAGE QUEUE
-
-If you are happy with the standard queueing mechanism provided by the
-SPI subsystem, just implement the queued methods specified above. Using
-the message queue has the upside of centralizing a lot of code and
-providing pure process-context execution of methods. The message queue
-can also be elevated to realtime priority on high-priority SPI traffic.
-
-Unless the queueing mechanism in the SPI subsystem is selected, the bulk
-of the driver will be managing the I/O queue fed by the now deprecated
-function transfer().
-
-That queue could be purely conceptual.  For example, a driver used only
-for low-frequency sensor access might be fine using synchronous PIO.
-
-But the queue will probably be very real, using message->queue, PIO,
-often DMA (especially if the root filesystem is in SPI flash), and
-execution contexts like IRQ handlers, tasklets, or workqueues (such
-as keventd).  Your driver can be as fancy, or as simple, as you need.
-Such a transfer() method would normally just add the message to a
-queue, and then start some asynchronous transfer engine (unless it's
-already running).
-
-
-THANKS TO
----------
-Contributors to Linux-SPI discussions include (in alphabetical order,
-by last name):
-
-Mark Brown
-David Brownell
-Russell King
-Grant Likely
-Dmitry Pervushin
-Stephen Street
-Mark Underwood
-Andrew Victor
-Linus Walleij
-Vitaly Wool
diff --git a/Documentation/spi/spi-summary.rst b/Documentation/spi/spi-summary.rst
new file mode 100644
index 000000000000..f1daffe10d78
--- /dev/null
+++ b/Documentation/spi/spi-summary.rst
@@ -0,0 +1,644 @@
+====================================
+Overview of Linux kernel SPI support
+====================================
+
+02-Feb-2012
+
+What is SPI?
+------------
+The "Serial Peripheral Interface" (SPI) is a synchronous four wire serial
+link used to connect microcontrollers to sensors, memory, and peripherals.
+It's a simple "de facto" standard, not complicated enough to acquire a
+standardization body.  SPI uses a master/slave configuration.
+
+The three signal wires hold a clock (SCK, often on the order of 10 MHz),
+and parallel data lines with "Master Out, Slave In" (MOSI) or "Master In,
+Slave Out" (MISO) signals.  (Other names are also used.)  There are four
+clocking modes through which data is exchanged; mode-0 and mode-3 are most
+commonly used.  Each clock cycle shifts data out and data in; the clock
+doesn't cycle except when there is a data bit to shift.  Not all data bits
+are used though; not every protocol uses those full duplex capabilities.
+
+SPI masters use a fourth "chip select" line to activate a given SPI slave
+device, so those three signal wires may be connected to several chips
+in parallel.  All SPI slaves support chipselects; they are usually active
+low signals, labeled nCSx for slave 'x' (e.g. nCS0).  Some devices have
+other signals, often including an interrupt to the master.
+
+Unlike serial busses like USB or SMBus, even low level protocols for
+SPI slave functions are usually not interoperable between vendors
+(except for commodities like SPI memory chips).
+
+  - SPI may be used for request/response style device protocols, as with
+    touchscreen sensors and memory chips.
+
+  - It may also be used to stream data in either direction (half duplex),
+    or both of them at the same time (full duplex).
+
+  - Some devices may use eight bit words.  Others may use different word
+    lengths, such as streams of 12-bit or 20-bit digital samples.
+
+  - Words are usually sent with their most significant bit (MSB) first,
+    but sometimes the least significant bit (LSB) goes first instead.
+
+  - Sometimes SPI is used to daisy-chain devices, like shift registers.
+
+In the same way, SPI slaves will only rarely support any kind of automatic
+discovery/enumeration protocol.  The tree of slave devices accessible from
+a given SPI master will normally be set up manually, with configuration
+tables.
+
+SPI is only one of the names used by such four-wire protocols, and
+most controllers have no problem handling "MicroWire" (think of it as
+half-duplex SPI, for request/response protocols), SSP ("Synchronous
+Serial Protocol"), PSP ("Programmable Serial Protocol"), and other
+related protocols.
+
+Some chips eliminate a signal line by combining MOSI and MISO, and
+limiting themselves to half-duplex at the hardware level.  In fact
+some SPI chips have this signal mode as a strapping option.  These
+can be accessed using the same programming interface as SPI, but of
+course they won't handle full duplex transfers.  You may find such
+chips described as using "three wire" signaling: SCK, data, nCSx.
+(That data line is sometimes called MOMI or SISO.)
+
+Microcontrollers often support both master and slave sides of the SPI
+protocol.  This document (and Linux) supports both the master and slave
+sides of SPI interactions.
+
+
+Who uses it?  On what kinds of systems?
+---------------------------------------
+Linux developers using SPI are probably writing device drivers for embedded
+systems boards.  SPI is used to control external chips, and it is also a
+protocol supported by every MMC or SD memory card.  (The older "DataFlash"
+cards, predating MMC cards but using the same connectors and card shape,
+support only SPI.)  Some PC hardware uses SPI flash for BIOS code.
+
+SPI slave chips range from digital/analog converters used for analog
+sensors and codecs, to memory, to peripherals like USB controllers
+or Ethernet adapters; and more.
+
+Most systems using SPI will integrate a few devices on a mainboard.
+Some provide SPI links on expansion connectors; in cases where no
+dedicated SPI controller exists, GPIO pins can be used to create a
+low speed "bitbanging" adapter.  Very few systems will "hotplug" an SPI
+controller; the reasons to use SPI focus on low cost and simple operation,
+and if dynamic reconfiguration is important, USB will often be a more
+appropriate low-pincount peripheral bus.
+
+Many microcontrollers that can run Linux integrate one or more I/O
+interfaces with SPI modes.  Given SPI support, they could use MMC or SD
+cards without needing a special purpose MMC/SD/SDIO controller.
+
+
+I'm confused.  What are these four SPI "clock modes"?
+-----------------------------------------------------
+It's easy to be confused here, and the vendor documentation you'll
+find isn't necessarily helpful.  The four modes combine two mode bits:
+
+ - CPOL indicates the initial clock polarity.  CPOL=0 means the
+   clock starts low, so the first (leading) edge is rising, and
+   the second (trailing) edge is falling.  CPOL=1 means the clock
+   starts high, so the first (leading) edge is falling.
+
+ - CPHA indicates the clock phase used to sample data; CPHA=0 says
+   sample on the leading edge, CPHA=1 means the trailing edge.
+
+   Since the signal needs to stablize before it's sampled, CPHA=0
+   implies that its data is written half a clock before the first
+   clock edge.  The chipselect may have made it become available.
+
+Chip specs won't always say "uses SPI mode X" in as many words,
+but their timing diagrams will make the CPOL and CPHA modes clear.
+
+In the SPI mode number, CPOL is the high order bit and CPHA is the
+low order bit.  So when a chip's timing diagram shows the clock
+starting low (CPOL=0) and data stabilized for sampling during the
+trailing clock edge (CPHA=1), that's SPI mode 1.
+
+Note that the clock mode is relevant as soon as the chipselect goes
+active.  So the master must set the clock to inactive before selecting
+a slave, and the slave can tell the chosen polarity by sampling the
+clock level when its select line goes active.  That's why many devices
+support for example both modes 0 and 3:  they don't care about polarity,
+and always clock data in/out on rising clock edges.
+
+
+How do these driver programming interfaces work?
+------------------------------------------------
+The <linux/spi/spi.h> header file includes kerneldoc, as does the
+main source code, and you should certainly read that chapter of the
+kernel API document.  This is just an overview, so you get the big
+picture before those details.
+
+SPI requests always go into I/O queues.  Requests for a given SPI device
+are always executed in FIFO order, and complete asynchronously through
+completion callbacks.  There are also some simple synchronous wrappers
+for those calls, including ones for common transaction types like writing
+a command and then reading its response.
+
+There are two types of SPI driver, here called:
+
+  Controller drivers ...
+        controllers may be built into System-On-Chip
+	processors, and often support both Master and Slave roles.
+	These drivers touch hardware registers and may use DMA.
+	Or they can be PIO bitbangers, needing just GPIO pins.
+
+  Protocol drivers ...
+        these pass messages through the controller
+	driver to communicate with a Slave or Master device on the
+	other side of an SPI link.
+
+So for example one protocol driver might talk to the MTD layer to export
+data to filesystems stored on SPI flash like DataFlash; and others might
+control audio interfaces, present touchscreen sensors as input interfaces,
+or monitor temperature and voltage levels during industrial processing.
+And those might all be sharing the same controller driver.
+
+A "struct spi_device" encapsulates the controller-side interface between
+those two types of drivers.
+
+There is a minimal core of SPI programming interfaces, focussing on
+using the driver model to connect controller and protocol drivers using
+device tables provided by board specific initialization code.  SPI
+shows up in sysfs in several locations::
+
+   /sys/devices/.../CTLR ... physical node for a given SPI controller
+
+   /sys/devices/.../CTLR/spiB.C ... spi_device on bus "B",
+	chipselect C, accessed through CTLR.
+
+   /sys/bus/spi/devices/spiB.C ... symlink to that physical
+	.../CTLR/spiB.C device
+
+   /sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver
+	that should be used with this device (for hotplug/coldplug)
+
+   /sys/bus/spi/drivers/D ... driver for one or more spi*.* devices
+
+   /sys/class/spi_master/spiB ... symlink (or actual device node) to
+	a logical node which could hold class related state for the SPI
+	master controller managing bus "B".  All spiB.* devices share one
+	physical SPI bus segment, with SCLK, MOSI, and MISO.
+
+   /sys/devices/.../CTLR/slave ... virtual file for (un)registering the
+	slave device for an SPI slave controller.
+	Writing the driver name of an SPI slave handler to this file
+	registers the slave device; writing "(null)" unregisters the slave
+	device.
+	Reading from this file shows the name of the slave device ("(null)"
+	if not registered).
+
+   /sys/class/spi_slave/spiB ... symlink (or actual device node) to
+	a logical node which could hold class related state for the SPI
+	slave controller on bus "B".  When registered, a single spiB.*
+	device is present here, possible sharing the physical SPI bus
+	segment with other SPI slave devices.
+
+Note that the actual location of the controller's class state depends
+on whether you enabled CONFIG_SYSFS_DEPRECATED or not.  At this time,
+the only class-specific state is the bus number ("B" in "spiB"), so
+those /sys/class entries are only useful to quickly identify busses.
+
+
+How does board-specific init code declare SPI devices?
+------------------------------------------------------
+Linux needs several kinds of information to properly configure SPI devices.
+That information is normally provided by board-specific code, even for
+chips that do support some of automated discovery/enumeration.
+
+Declare Controllers
+^^^^^^^^^^^^^^^^^^^
+
+The first kind of information is a list of what SPI controllers exist.
+For System-on-Chip (SOC) based boards, these will usually be platform
+devices, and the controller may need some platform_data in order to
+operate properly.  The "struct platform_device" will include resources
+like the physical address of the controller's first register and its IRQ.
+
+Platforms will often abstract the "register SPI controller" operation,
+maybe coupling it with code to initialize pin configurations, so that
+the arch/.../mach-*/board-*.c files for several boards can all share the
+same basic controller setup code.  This is because most SOCs have several
+SPI-capable controllers, and only the ones actually usable on a given
+board should normally be set up and registered.
+
+So for example arch/.../mach-*/board-*.c files might have code like::
+
+	#include <mach/spi.h>	/* for mysoc_spi_data */
+
+	/* if your mach-* infrastructure doesn't support kernels that can
+	 * run on multiple boards, pdata wouldn't benefit from "__init".
+	 */
+	static struct mysoc_spi_data pdata __initdata = { ... };
+
+	static __init board_init(void)
+	{
+		...
+		/* this board only uses SPI controller #2 */
+		mysoc_register_spi(2, &pdata);
+		...
+	}
+
+And SOC-specific utility code might look something like::
+
+	#include <mach/spi.h>
+
+	static struct platform_device spi2 = { ... };
+
+	void mysoc_register_spi(unsigned n, struct mysoc_spi_data *pdata)
+	{
+		struct mysoc_spi_data *pdata2;
+
+		pdata2 = kmalloc(sizeof *pdata2, GFP_KERNEL);
+		*pdata2 = pdata;
+		...
+		if (n == 2) {
+			spi2->dev.platform_data = pdata2;
+			register_platform_device(&spi2);
+
+			/* also: set up pin modes so the spi2 signals are
+			 * visible on the relevant pins ... bootloaders on
+			 * production boards may already have done this, but
+			 * developer boards will often need Linux to do it.
+			 */
+		}
+		...
+	}
+
+Notice how the platform_data for boards may be different, even if the
+same SOC controller is used.  For example, on one board SPI might use
+an external clock, where another derives the SPI clock from current
+settings of some master clock.
+
+Declare Slave Devices
+^^^^^^^^^^^^^^^^^^^^^
+
+The second kind of information is a list of what SPI slave devices exist
+on the target board, often with some board-specific data needed for the
+driver to work correctly.
+
+Normally your arch/.../mach-*/board-*.c files would provide a small table
+listing the SPI devices on each board.  (This would typically be only a
+small handful.)  That might look like::
+
+	static struct ads7846_platform_data ads_info = {
+		.vref_delay_usecs	= 100,
+		.x_plate_ohms		= 580,
+		.y_plate_ohms		= 410,
+	};
+
+	static struct spi_board_info spi_board_info[] __initdata = {
+	{
+		.modalias	= "ads7846",
+		.platform_data	= &ads_info,
+		.mode		= SPI_MODE_0,
+		.irq		= GPIO_IRQ(31),
+		.max_speed_hz	= 120000 /* max sample rate at 3V */ * 16,
+		.bus_num	= 1,
+		.chip_select	= 0,
+	},
+	};
+
+Again, notice how board-specific information is provided; each chip may need
+several types.  This example shows generic constraints like the fastest SPI
+clock to allow (a function of board voltage in this case) or how an IRQ pin
+is wired, plus chip-specific constraints like an important delay that's
+changed by the capacitance at one pin.
+
+(There's also "controller_data", information that may be useful to the
+controller driver.  An example would be peripheral-specific DMA tuning
+data or chipselect callbacks.  This is stored in spi_device later.)
+
+The board_info should provide enough information to let the system work
+without the chip's driver being loaded.  The most troublesome aspect of
+that is likely the SPI_CS_HIGH bit in the spi_device.mode field, since
+sharing a bus with a device that interprets chipselect "backwards" is
+not possible until the infrastructure knows how to deselect it.
+
+Then your board initialization code would register that table with the SPI
+infrastructure, so that it's available later when the SPI master controller
+driver is registered::
+
+	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+
+Like with other static board-specific setup, you won't unregister those.
+
+The widely used "card" style computers bundle memory, cpu, and little else
+onto a card that's maybe just thirty square centimeters.  On such systems,
+your ``arch/.../mach-.../board-*.c`` file would primarily provide information
+about the devices on the mainboard into which such a card is plugged.  That
+certainly includes SPI devices hooked up through the card connectors!
+
+
+Non-static Configurations
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Developer boards often play by different rules than product boards, and one
+example is the potential need to hotplug SPI devices and/or controllers.
+
+For those cases you might need to use spi_busnum_to_master() to look
+up the spi bus master, and will likely need spi_new_device() to provide the
+board info based on the board that was hotplugged.  Of course, you'd later
+call at least spi_unregister_device() when that board is removed.
+
+When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those
+configurations will also be dynamic.  Fortunately, such devices all support
+basic device identification probes, so they should hotplug normally.
+
+
+How do I write an "SPI Protocol Driver"?
+----------------------------------------
+Most SPI drivers are currently kernel drivers, but there's also support
+for userspace drivers.  Here we talk only about kernel drivers.
+
+SPI protocol drivers somewhat resemble platform device drivers::
+
+	static struct spi_driver CHIP_driver = {
+		.driver = {
+			.name		= "CHIP",
+			.owner		= THIS_MODULE,
+			.pm		= &CHIP_pm_ops,
+		},
+
+		.probe		= CHIP_probe,
+		.remove		= CHIP_remove,
+	};
+
+The driver core will automatically attempt to bind this driver to any SPI
+device whose board_info gave a modalias of "CHIP".  Your probe() code
+might look like this unless you're creating a device which is managing
+a bus (appearing under /sys/class/spi_master).
+
+::
+
+	static int CHIP_probe(struct spi_device *spi)
+	{
+		struct CHIP			*chip;
+		struct CHIP_platform_data	*pdata;
+
+		/* assuming the driver requires board-specific data: */
+		pdata = &spi->dev.platform_data;
+		if (!pdata)
+			return -ENODEV;
+
+		/* get memory for driver's per-chip state */
+		chip = kzalloc(sizeof *chip, GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+		spi_set_drvdata(spi, chip);
+
+		... etc
+		return 0;
+	}
+
+As soon as it enters probe(), the driver may issue I/O requests to
+the SPI device using "struct spi_message".  When remove() returns,
+or after probe() fails, the driver guarantees that it won't submit
+any more such messages.
+
+  - An spi_message is a sequence of protocol operations, executed
+    as one atomic sequence.  SPI driver controls include:
+
+      + when bidirectional reads and writes start ... by how its
+        sequence of spi_transfer requests is arranged;
+
+      + which I/O buffers are used ... each spi_transfer wraps a
+        buffer for each transfer direction, supporting full duplex
+        (two pointers, maybe the same one in both cases) and half
+        duplex (one pointer is NULL) transfers;
+
+      + optionally defining short delays after transfers ... using
+        the spi_transfer.delay_usecs setting (this delay can be the
+        only protocol effect, if the buffer length is zero);
+
+      + whether the chipselect becomes inactive after a transfer and
+        any delay ... by using the spi_transfer.cs_change flag;
+
+      + hinting whether the next message is likely to go to this same
+        device ... using the spi_transfer.cs_change flag on the last
+	transfer in that atomic group, and potentially saving costs
+	for chip deselect and select operations.
+
+  - Follow standard kernel rules, and provide DMA-safe buffers in
+    your messages.  That way controller drivers using DMA aren't forced
+    to make extra copies unless the hardware requires it (e.g. working
+    around hardware errata that force the use of bounce buffering).
+
+    If standard dma_map_single() handling of these buffers is inappropriate,
+    you can use spi_message.is_dma_mapped to tell the controller driver
+    that you've already provided the relevant DMA addresses.
+
+  - The basic I/O primitive is spi_async().  Async requests may be
+    issued in any context (irq handler, task, etc) and completion
+    is reported using a callback provided with the message.
+    After any detected error, the chip is deselected and processing
+    of that spi_message is aborted.
+
+  - There are also synchronous wrappers like spi_sync(), and wrappers
+    like spi_read(), spi_write(), and spi_write_then_read().  These
+    may be issued only in contexts that may sleep, and they're all
+    clean (and small, and "optional") layers over spi_async().
+
+  - The spi_write_then_read() call, and convenience wrappers around
+    it, should only be used with small amounts of data where the
+    cost of an extra copy may be ignored.  It's designed to support
+    common RPC-style requests, such as writing an eight bit command
+    and reading a sixteen bit response -- spi_w8r16() being one its
+    wrappers, doing exactly that.
+
+Some drivers may need to modify spi_device characteristics like the
+transfer mode, wordsize, or clock rate.  This is done with spi_setup(),
+which would normally be called from probe() before the first I/O is
+done to the device.  However, that can also be called at any time
+that no message is pending for that device.
+
+While "spi_device" would be the bottom boundary of the driver, the
+upper boundaries might include sysfs (especially for sensor readings),
+the input layer, ALSA, networking, MTD, the character device framework,
+or other Linux subsystems.
+
+Note that there are two types of memory your driver must manage as part
+of interacting with SPI devices.
+
+  - I/O buffers use the usual Linux rules, and must be DMA-safe.
+    You'd normally allocate them from the heap or free page pool.
+    Don't use the stack, or anything that's declared "static".
+
+  - The spi_message and spi_transfer metadata used to glue those
+    I/O buffers into a group of protocol transactions.  These can
+    be allocated anywhere it's convenient, including as part of
+    other allocate-once driver data structures.  Zero-init these.
+
+If you like, spi_message_alloc() and spi_message_free() convenience
+routines are available to allocate and zero-initialize an spi_message
+with several transfers.
+
+
+How do I write an "SPI Master Controller Driver"?
+-------------------------------------------------
+An SPI controller will probably be registered on the platform_bus; write
+a driver to bind to the device, whichever bus is involved.
+
+The main task of this type of driver is to provide an "spi_master".
+Use spi_alloc_master() to allocate the master, and spi_master_get_devdata()
+to get the driver-private data allocated for that device.
+
+::
+
+	struct spi_master	*master;
+	struct CONTROLLER	*c;
+
+	master = spi_alloc_master(dev, sizeof *c);
+	if (!master)
+		return -ENODEV;
+
+	c = spi_master_get_devdata(master);
+
+The driver will initialize the fields of that spi_master, including the
+bus number (maybe the same as the platform device ID) and three methods
+used to interact with the SPI core and SPI protocol drivers.  It will
+also initialize its own internal state.  (See below about bus numbering
+and those methods.)
+
+After you initialize the spi_master, then use spi_register_master() to
+publish it to the rest of the system. At that time, device nodes for the
+controller and any predeclared spi devices will be made available, and
+the driver model core will take care of binding them to drivers.
+
+If you need to remove your SPI controller driver, spi_unregister_master()
+will reverse the effect of spi_register_master().
+
+
+Bus Numbering
+^^^^^^^^^^^^^
+
+Bus numbering is important, since that's how Linux identifies a given
+SPI bus (shared SCK, MOSI, MISO).  Valid bus numbers start at zero.  On
+SOC systems, the bus numbers should match the numbers defined by the chip
+manufacturer.  For example, hardware controller SPI2 would be bus number 2,
+and spi_board_info for devices connected to it would use that number.
+
+If you don't have such hardware-assigned bus number, and for some reason
+you can't just assign them, then provide a negative bus number.  That will
+then be replaced by a dynamically assigned number. You'd then need to treat
+this as a non-static configuration (see above).
+
+
+SPI Master Methods
+^^^^^^^^^^^^^^^^^^
+
+``master->setup(struct spi_device *spi)``
+	This sets up the device clock rate, SPI mode, and word sizes.
+	Drivers may change the defaults provided by board_info, and then
+	call spi_setup(spi) to invoke this routine.  It may sleep.
+
+	Unless each SPI slave has its own configuration registers, don't
+	change them right away ... otherwise drivers could corrupt I/O
+	that's in progress for other SPI devices.
+
+	.. note::
+
+		BUG ALERT:  for some reason the first version of
+		many spi_master drivers seems to get this wrong.
+		When you code setup(), ASSUME that the controller
+		is actively processing transfers for another device.
+
+``master->cleanup(struct spi_device *spi)``
+	Your controller driver may use spi_device.controller_state to hold
+	state it dynamically associates with that device.  If you do that,
+	be sure to provide the cleanup() method to free that state.
+
+``master->prepare_transfer_hardware(struct spi_master *master)``
+	This will be called by the queue mechanism to signal to the driver
+	that a message is coming in soon, so the subsystem requests the
+	driver to prepare the transfer hardware by issuing this call.
+	This may sleep.
+
+``master->unprepare_transfer_hardware(struct spi_master *master)``
+	This will be called by the queue mechanism to signal to the driver
+	that there are no more messages pending in the queue and it may
+	relax the hardware (e.g. by power management calls). This may sleep.
+
+``master->transfer_one_message(struct spi_master *master, struct spi_message *mesg)``
+	The subsystem calls the driver to transfer a single message while
+	queuing transfers that arrive in the meantime. When the driver is
+	finished with this message, it must call
+	spi_finalize_current_message() so the subsystem can issue the next
+	message. This may sleep.
+
+``master->transfer_one(struct spi_master *master, struct spi_device *spi, struct spi_transfer *transfer)``
+	The subsystem calls the driver to transfer a single transfer while
+	queuing transfers that arrive in the meantime. When the driver is
+	finished with this transfer, it must call
+	spi_finalize_current_transfer() so the subsystem can issue the next
+	transfer. This may sleep. Note: transfer_one and transfer_one_message
+	are mutually exclusive; when both are set, the generic subsystem does
+	not call your transfer_one callback.
+
+	Return values:
+
+	* negative errno: error
+	* 0: transfer is finished
+	* 1: transfer is still in progress
+
+``master->set_cs_timing(struct spi_device *spi, u8 setup_clk_cycles, u8 hold_clk_cycles, u8 inactive_clk_cycles)``
+	This method allows SPI client drivers to request SPI master controller
+	for configuring device specific CS setup, hold and inactive timing
+	requirements.
+
+Deprecated Methods
+^^^^^^^^^^^^^^^^^^
+
+``master->transfer(struct spi_device *spi, struct spi_message *message)``
+	This must not sleep. Its responsibility is to arrange that the
+	transfer happens and its complete() callback is issued. The two
+	will normally happen later, after other transfers complete, and
+	if the controller is idle it will need to be kickstarted. This
+	method is not used on queued controllers and must be NULL if
+	transfer_one_message() and (un)prepare_transfer_hardware() are
+	implemented.
+
+
+SPI Message Queue
+^^^^^^^^^^^^^^^^^
+
+If you are happy with the standard queueing mechanism provided by the
+SPI subsystem, just implement the queued methods specified above. Using
+the message queue has the upside of centralizing a lot of code and
+providing pure process-context execution of methods. The message queue
+can also be elevated to realtime priority on high-priority SPI traffic.
+
+Unless the queueing mechanism in the SPI subsystem is selected, the bulk
+of the driver will be managing the I/O queue fed by the now deprecated
+function transfer().
+
+That queue could be purely conceptual.  For example, a driver used only
+for low-frequency sensor access might be fine using synchronous PIO.
+
+But the queue will probably be very real, using message->queue, PIO,
+often DMA (especially if the root filesystem is in SPI flash), and
+execution contexts like IRQ handlers, tasklets, or workqueues (such
+as keventd).  Your driver can be as fancy, or as simple, as you need.
+Such a transfer() method would normally just add the message to a
+queue, and then start some asynchronous transfer engine (unless it's
+already running).
+
+
+THANKS TO
+---------
+Contributors to Linux-SPI discussions include (in alphabetical order,
+by last name):
+
+- Mark Brown
+- David Brownell
+- Russell King
+- Grant Likely
+- Dmitry Pervushin
+- Stephen Street
+- Mark Underwood
+- Andrew Victor
+- Linus Walleij
+- Vitaly Wool
diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev
deleted file mode 100644
index 3d14035b1766..000000000000
--- a/Documentation/spi/spidev
+++ /dev/null
@@ -1,149 +0,0 @@
-SPI devices have a limited userspace API, supporting basic half-duplex
-read() and write() access to SPI slave devices.  Using ioctl() requests,
-full duplex transfers and device I/O configuration are also available.
-
-	#include <fcntl.h>
-	#include <unistd.h>
-	#include <sys/ioctl.h>
-	#include <linux/types.h>
-	#include <linux/spi/spidev.h>
-
-Some reasons you might want to use this programming interface include:
-
- * Prototyping in an environment that's not crash-prone; stray pointers
-   in userspace won't normally bring down any Linux system.
-
- * Developing simple protocols used to talk to microcontrollers acting
-   as SPI slaves, which you may need to change quite often.
-
-Of course there are drivers that can never be written in userspace, because
-they need to access kernel interfaces (such as IRQ handlers or other layers
-of the driver stack) that are not accessible to userspace.
-
-
-DEVICE CREATION, DRIVER BINDING
-===============================
-The simplest way to arrange to use this driver is to just list it in the
-spi_board_info for a device as the driver it should use:  the "modalias"
-entry is "spidev", matching the name of the driver exposing this API.
-Set up the other device characteristics (bits per word, SPI clocking,
-chipselect polarity, etc) as usual, so you won't always need to override
-them later.
-
-(Sysfs also supports userspace driven binding/unbinding of drivers to
-devices.  That mechanism might be supported here in the future.)
-
-When you do that, the sysfs node for the SPI device will include a child
-device node with a "dev" attribute that will be understood by udev or mdev.
-(Larger systems will have "udev".  Smaller ones may configure "mdev" into
-busybox; it's less featureful, but often enough.)  For a SPI device with
-chipselect C on bus B, you should see:
-
-    /dev/spidevB.C ... character special device, major number 153 with
-	a dynamically chosen minor device number.  This is the node
-	that userspace programs will open, created by "udev" or "mdev".
-
-    /sys/devices/.../spiB.C ... as usual, the SPI device node will
-	be a child of its SPI master controller.
-
-    /sys/class/spidev/spidevB.C ... created when the "spidev" driver
-	binds to that device.  (Directory or symlink, based on whether
-	or not you enabled the "deprecated sysfs files" Kconfig option.)
-
-Do not try to manage the /dev character device special file nodes by hand.
-That's error prone, and you'd need to pay careful attention to system
-security issues; udev/mdev should already be configured securely.
-
-If you unbind the "spidev" driver from that device, those two "spidev" nodes
-(in sysfs and in /dev) should automatically be removed (respectively by the
-kernel and by udev/mdev).  You can unbind by removing the "spidev" driver
-module, which will affect all devices using this driver.  You can also unbind
-by having kernel code remove the SPI device, probably by removing the driver
-for its SPI controller (so its spi_master vanishes).
-
-Since this is a standard Linux device driver -- even though it just happens
-to expose a low level API to userspace -- it can be associated with any number
-of devices at a time.  Just provide one spi_board_info record for each such
-SPI device, and you'll get a /dev device node for each device.
-
-
-BASIC CHARACTER DEVICE API
-==========================
-Normal open() and close() operations on /dev/spidevB.D files work as you
-would expect.
-
-Standard read() and write() operations are obviously only half-duplex, and
-the chipselect is deactivated between those operations.  Full-duplex access,
-and composite operation without chipselect de-activation, is available using
-the SPI_IOC_MESSAGE(N) request.
-
-Several ioctl() requests let your driver read or override the device's current
-settings for data transfer parameters:
-
-    SPI_IOC_RD_MODE, SPI_IOC_WR_MODE ... pass a pointer to a byte which will
-	return (RD) or assign (WR) the SPI transfer mode.  Use the constants
-	SPI_MODE_0..SPI_MODE_3; or if you prefer you can combine SPI_CPOL
-	(clock polarity, idle high iff this is set) or SPI_CPHA (clock phase,
-	sample on trailing edge iff this is set) flags.
-	Note that this request is limited to SPI mode flags that fit in a
-	single byte.
-
-    SPI_IOC_RD_MODE32, SPI_IOC_WR_MODE32 ... pass a pointer to a uin32_t
-	which will return (RD) or assign (WR) the full SPI transfer mode,
-	not limited to the bits that fit in one byte.
-
-    SPI_IOC_RD_LSB_FIRST, SPI_IOC_WR_LSB_FIRST ... pass a pointer to a byte
-	which will return (RD) or assign (WR) the bit justification used to
-	transfer SPI words.  Zero indicates MSB-first; other values indicate
-	the less common LSB-first encoding.  In both cases the specified value
-	is right-justified in each word, so that unused (TX) or undefined (RX)
-	bits are in the MSBs.
-
-    SPI_IOC_RD_BITS_PER_WORD, SPI_IOC_WR_BITS_PER_WORD ... pass a pointer to
-	a byte which will return (RD) or assign (WR) the number of bits in
-	each SPI transfer word.  The value zero signifies eight bits.
-
-    SPI_IOC_RD_MAX_SPEED_HZ, SPI_IOC_WR_MAX_SPEED_HZ ... pass a pointer to a
-	u32 which will return (RD) or assign (WR) the maximum SPI transfer
-	speed, in Hz.  The controller can't necessarily assign that specific
-	clock speed.
-
-NOTES:
-
-    - At this time there is no async I/O support; everything is purely
-      synchronous.
-
-    - There's currently no way to report the actual bit rate used to
-      shift data to/from a given device.
-
-    - From userspace, you can't currently change the chip select polarity;
-      that could corrupt transfers to other devices sharing the SPI bus.
-      Each SPI device is deselected when it's not in active use, allowing
-      other drivers to talk to other devices.
-
-    - There's a limit on the number of bytes each I/O request can transfer
-      to the SPI device.  It defaults to one page, but that can be changed
-      using a module parameter.
-
-    - Because SPI has no low-level transfer acknowledgement, you usually
-      won't see any I/O errors when talking to a non-existent device.
-
-
-FULL DUPLEX CHARACTER DEVICE API
-================================
-
-See the spidev_fdx.c sample program for one example showing the use of the
-full duplex programming interface.  (Although it doesn't perform a full duplex
-transfer.)  The model is the same as that used in the kernel spi_sync()
-request; the individual transfers offer the same capabilities as are
-available to kernel drivers (except that it's not asynchronous).
-
-The example shows one half-duplex RPC-style request and response message.
-These requests commonly require that the chip not be deselected between
-the request and response.  Several such requests could be chained into
-a single kernel request, even allowing the chip to be deselected after
-each response.  (Other protocol options include changing the word size
-and bitrate for each transfer segment.)
-
-To make a full duplex request, provide both rx_buf and tx_buf for the
-same transfer.  It's even OK if those are the same buffer.
diff --git a/Documentation/spi/spidev.rst b/Documentation/spi/spidev.rst
new file mode 100644
index 000000000000..f05dbc5ccdbc
--- /dev/null
+++ b/Documentation/spi/spidev.rst
@@ -0,0 +1,163 @@
+=================
+SPI userspace API
+=================
+
+SPI devices have a limited userspace API, supporting basic half-duplex
+read() and write() access to SPI slave devices.  Using ioctl() requests,
+full duplex transfers and device I/O configuration are also available.
+
+::
+
+	#include <fcntl.h>
+	#include <unistd.h>
+	#include <sys/ioctl.h>
+	#include <linux/types.h>
+	#include <linux/spi/spidev.h>
+
+Some reasons you might want to use this programming interface include:
+
+ * Prototyping in an environment that's not crash-prone; stray pointers
+   in userspace won't normally bring down any Linux system.
+
+ * Developing simple protocols used to talk to microcontrollers acting
+   as SPI slaves, which you may need to change quite often.
+
+Of course there are drivers that can never be written in userspace, because
+they need to access kernel interfaces (such as IRQ handlers or other layers
+of the driver stack) that are not accessible to userspace.
+
+
+DEVICE CREATION, DRIVER BINDING
+===============================
+The simplest way to arrange to use this driver is to just list it in the
+spi_board_info for a device as the driver it should use:  the "modalias"
+entry is "spidev", matching the name of the driver exposing this API.
+Set up the other device characteristics (bits per word, SPI clocking,
+chipselect polarity, etc) as usual, so you won't always need to override
+them later.
+
+(Sysfs also supports userspace driven binding/unbinding of drivers to
+devices.  That mechanism might be supported here in the future.)
+
+When you do that, the sysfs node for the SPI device will include a child
+device node with a "dev" attribute that will be understood by udev or mdev.
+(Larger systems will have "udev".  Smaller ones may configure "mdev" into
+busybox; it's less featureful, but often enough.)  For a SPI device with
+chipselect C on bus B, you should see:
+
+    /dev/spidevB.C ...
+	character special device, major number 153 with
+	a dynamically chosen minor device number.  This is the node
+	that userspace programs will open, created by "udev" or "mdev".
+
+    /sys/devices/.../spiB.C ...
+	as usual, the SPI device node will
+	be a child of its SPI master controller.
+
+    /sys/class/spidev/spidevB.C ...
+	created when the "spidev" driver
+	binds to that device.  (Directory or symlink, based on whether
+	or not you enabled the "deprecated sysfs files" Kconfig option.)
+
+Do not try to manage the /dev character device special file nodes by hand.
+That's error prone, and you'd need to pay careful attention to system
+security issues; udev/mdev should already be configured securely.
+
+If you unbind the "spidev" driver from that device, those two "spidev" nodes
+(in sysfs and in /dev) should automatically be removed (respectively by the
+kernel and by udev/mdev).  You can unbind by removing the "spidev" driver
+module, which will affect all devices using this driver.  You can also unbind
+by having kernel code remove the SPI device, probably by removing the driver
+for its SPI controller (so its spi_master vanishes).
+
+Since this is a standard Linux device driver -- even though it just happens
+to expose a low level API to userspace -- it can be associated with any number
+of devices at a time.  Just provide one spi_board_info record for each such
+SPI device, and you'll get a /dev device node for each device.
+
+
+BASIC CHARACTER DEVICE API
+==========================
+Normal open() and close() operations on /dev/spidevB.D files work as you
+would expect.
+
+Standard read() and write() operations are obviously only half-duplex, and
+the chipselect is deactivated between those operations.  Full-duplex access,
+and composite operation without chipselect de-activation, is available using
+the SPI_IOC_MESSAGE(N) request.
+
+Several ioctl() requests let your driver read or override the device's current
+settings for data transfer parameters:
+
+    SPI_IOC_RD_MODE, SPI_IOC_WR_MODE ...
+	pass a pointer to a byte which will
+	return (RD) or assign (WR) the SPI transfer mode.  Use the constants
+	SPI_MODE_0..SPI_MODE_3; or if you prefer you can combine SPI_CPOL
+	(clock polarity, idle high iff this is set) or SPI_CPHA (clock phase,
+	sample on trailing edge iff this is set) flags.
+	Note that this request is limited to SPI mode flags that fit in a
+	single byte.
+
+    SPI_IOC_RD_MODE32, SPI_IOC_WR_MODE32 ...
+	pass a pointer to a uin32_t
+	which will return (RD) or assign (WR) the full SPI transfer mode,
+	not limited to the bits that fit in one byte.
+
+    SPI_IOC_RD_LSB_FIRST, SPI_IOC_WR_LSB_FIRST ...
+	pass a pointer to a byte
+	which will return (RD) or assign (WR) the bit justification used to
+	transfer SPI words.  Zero indicates MSB-first; other values indicate
+	the less common LSB-first encoding.  In both cases the specified value
+	is right-justified in each word, so that unused (TX) or undefined (RX)
+	bits are in the MSBs.
+
+    SPI_IOC_RD_BITS_PER_WORD, SPI_IOC_WR_BITS_PER_WORD ...
+	pass a pointer to
+	a byte which will return (RD) or assign (WR) the number of bits in
+	each SPI transfer word.  The value zero signifies eight bits.
+
+    SPI_IOC_RD_MAX_SPEED_HZ, SPI_IOC_WR_MAX_SPEED_HZ ...
+	pass a pointer to a
+	u32 which will return (RD) or assign (WR) the maximum SPI transfer
+	speed, in Hz.  The controller can't necessarily assign that specific
+	clock speed.
+
+NOTES:
+
+    - At this time there is no async I/O support; everything is purely
+      synchronous.
+
+    - There's currently no way to report the actual bit rate used to
+      shift data to/from a given device.
+
+    - From userspace, you can't currently change the chip select polarity;
+      that could corrupt transfers to other devices sharing the SPI bus.
+      Each SPI device is deselected when it's not in active use, allowing
+      other drivers to talk to other devices.
+
+    - There's a limit on the number of bytes each I/O request can transfer
+      to the SPI device.  It defaults to one page, but that can be changed
+      using a module parameter.
+
+    - Because SPI has no low-level transfer acknowledgement, you usually
+      won't see any I/O errors when talking to a non-existent device.
+
+
+FULL DUPLEX CHARACTER DEVICE API
+================================
+
+See the spidev_fdx.c sample program for one example showing the use of the
+full duplex programming interface.  (Although it doesn't perform a full duplex
+transfer.)  The model is the same as that used in the kernel spi_sync()
+request; the individual transfers offer the same capabilities as are
+available to kernel drivers (except that it's not asynchronous).
+
+The example shows one half-duplex RPC-style request and response message.
+These requests commonly require that the chip not be deselected between
+the request and response.  Several such requests could be chained into
+a single kernel request, even allowing the chip to be deselected after
+each response.  (Other protocol options include changing the word size
+and bitrate for each transfer segment.)
+
+To make a full duplex request, provide both rx_buf and tx_buf for the
+same transfer.  It's even OK if those are the same buffer.
diff --git a/drivers/iio/dummy/iio_simple_dummy.c b/drivers/iio/dummy/iio_simple_dummy.c
index d28974ad9e0e..6cb02299a215 100644
--- a/drivers/iio/dummy/iio_simple_dummy.c
+++ b/drivers/iio/dummy/iio_simple_dummy.c
@@ -695,7 +695,7 @@ static int iio_dummy_remove(struct iio_sw_device *swd)
  * i2c:
  * Documentation/i2c/writing-clients.rst
  * spi:
- * Documentation/spi/spi-summary
+ * Documentation/spi/spi-summary.rst
  */
 static const struct iio_sw_device_ops iio_dummy_device_ops = {
 	.probe = iio_dummy_probe,
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 3a1d8f1170de..d5a24fe983e7 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -543,7 +543,7 @@ config SPI_PXA2XX
 	help
 	  This enables using a PXA2xx or Sodaville SSP port as a SPI master
 	  controller. The driver can be configured to use any SSP port and
-	  additional documentation can be found a Documentation/spi/pxa2xx.
+	  additional documentation can be found a Documentation/spi/pxa2xx.rst.
 
 config SPI_PXA2XX_PCI
 	def_tristate SPI_PXA2XX && PCI && COMMON_CLK
diff --git a/drivers/spi/spi-butterfly.c b/drivers/spi/spi-butterfly.c
index 8c77d1114ad3..7e71a351f3b7 100644
--- a/drivers/spi/spi-butterfly.c
+++ b/drivers/spi/spi-butterfly.c
@@ -23,7 +23,7 @@
  * with a battery powered AVR microcontroller and lots of goodies.  You
  * can use GCC to develop firmware for this.
  *
- * See Documentation/spi/butterfly for information about how to build
+ * See Documentation/spi/butterfly.rst for information about how to build
  * and use this custom parallel port cable.
  */
 
diff --git a/drivers/spi/spi-lm70llp.c b/drivers/spi/spi-lm70llp.c
index f18f912c9dea..174dba29b1dd 100644
--- a/drivers/spi/spi-lm70llp.c
+++ b/drivers/spi/spi-lm70llp.c
@@ -34,7 +34,7 @@
  * available (on page 4) here:
  *  http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
  *
- * Also see Documentation/spi/spi-lm70llp.  The SPI<->parport code here is
+ * Also see Documentation/spi/spi-lm70llp.rst.  The SPI<->parport code here is
  * (heavily) based on spi-butterfly by David Brownell.
  *
  * The LM70 LLP connects to the PC parallel port in the following manner:
diff --git a/include/linux/platform_data/sc18is602.h b/include/linux/platform_data/sc18is602.h
index e066d3b0d6d8..0e91489edfe6 100644
--- a/include/linux/platform_data/sc18is602.h
+++ b/include/linux/platform_data/sc18is602.h
@@ -4,7 +4,7 @@
  *
  * Copyright (C) 2012 Guenter Roeck <linux@roeck-us.net>
  *
- * For further information, see the Documentation/spi/spi-sc18is602 file.
+ * For further information, see the Documentation/spi/spi-sc18is602.rst file.
  */
 
 /**
-- 
cgit v1.2.3


From 3247b272048ffefc12c7dcfa3169bd03047a49bc Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Tue, 30 Jul 2019 15:20:41 +0300
Subject: net: bridge: mcast: add delete due to fast-leave mdb flag

In user-space there's no way to distinguish why an mdb entry was deleted
and that is a problem for daemons which would like to keep the mdb in
sync with remote ends (e.g. mlag) but would also like to converge faster.
In almost all cases we'd like to age-out the remote entry for performance
and convergence reasons except when fast-leave is enabled. In that case we
want explicit immediate remote delete, thus add mdb flag which is set only
when the entry is being deleted due to fast-leave.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h | 1 +
 net/bridge/br_mdb.c            | 2 ++
 net/bridge/br_multicast.c      | 2 +-
 net/bridge/br_private.h        | 1 +
 4 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 773e476a8e54..1b3c2b643a02 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -237,6 +237,7 @@ struct br_mdb_entry {
 #define MDB_PERMANENT 1
 	__u8 state;
 #define MDB_FLAGS_OFFLOAD	(1 << 0)
+#define MDB_FLAGS_FAST_LEAVE	(1 << 1)
 	__u8 flags;
 	__u16 vid;
 	struct {
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index bf6acd34234d..428af1abf8cc 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -60,6 +60,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
 	e->flags = 0;
 	if (flags & MDB_PG_FLAGS_OFFLOAD)
 		e->flags |= MDB_FLAGS_OFFLOAD;
+	if (flags & MDB_PG_FLAGS_FAST_LEAVE)
+		e->flags |= MDB_FLAGS_FAST_LEAVE;
 }
 
 static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3d8deac2353d..3d4b2817687f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1393,7 +1393,7 @@ br_multicast_leave_group(struct net_bridge *br,
 			del_timer(&p->timer);
 			kfree_rcu(p, rcu);
 			br_mdb_notify(br->dev, port, group, RTM_DELMDB,
-				      p->flags);
+				      p->flags | MDB_PG_FLAGS_FAST_LEAVE);
 
 			if (!mp->ports && !mp->host_joined &&
 			    netif_running(br->dev))
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e8cf03b43b7d..c4fd307fbfdc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -199,6 +199,7 @@ struct net_bridge_fdb_entry {
 
 #define MDB_PG_FLAGS_PERMANENT	BIT(0)
 #define MDB_PG_FLAGS_OFFLOAD	BIT(1)
+#define MDB_PG_FLAGS_FAST_LEAVE	BIT(2)
 
 struct net_bridge_port_group {
 	struct net_bridge_port		*port;
-- 
cgit v1.2.3


From b877ac9815a8fe7e5f6d7fdde3dc34652408840a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 14 Jun 2019 07:46:04 +0200
Subject: xen/swiotlb: remember having called xen_create_contiguous_region()

Instead of always calling xen_destroy_contiguous_region() in case the
memory is DMA-able for the used device, do so only in case it has been
made DMA-able via xen_create_contiguous_region() before.

This will avoid a lot of xen_destroy_contiguous_region() calls for
64-bit capable devices.

As the memory in question is owned by swiotlb-xen the PG_owner_priv_1
flag of the first allocated page can be used for remembering.

Signed-off-by: Juergen Gross <jgross@suse.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/swiotlb-xen.c  | 4 +++-
 include/linux/page-flags.h | 4 ++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 37ddcfcfbb21..ceb681cf64bb 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -322,6 +322,7 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			xen_free_coherent_pages(hwdev, size, ret, (dma_addr_t)phys, attrs);
 			return NULL;
 		}
+		SetPageXenRemapped(virt_to_page(ret));
 	}
 	memset(ret, 0, size);
 	return ret;
@@ -346,7 +347,8 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 	size = 1UL << (order + XEN_PAGE_SHIFT);
 
 	if (!WARN_ON((dev_addr + size - 1 > dma_mask) ||
-		     range_straddles_page_boundary(phys, size)))
+		     range_straddles_page_boundary(phys, size)) &&
+	    TestClearPageXenRemapped(virt_to_page(vaddr)))
 		xen_destroy_contiguous_region(phys, order);
 
 	xen_free_coherent_pages(hwdev, size, vaddr, (dma_addr_t)phys, attrs);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index b848517da64c..f91cb8898ff0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -152,6 +152,8 @@ enum pageflags {
 	PG_savepinned = PG_dirty,
 	/* Has a grant mapping of another (foreign) domain's page. */
 	PG_foreign = PG_owner_priv_1,
+	/* Remapped by swiotlb-xen. */
+	PG_xen_remapped = PG_owner_priv_1,
 
 	/* SLOB */
 	PG_slob_free = PG_private,
@@ -329,6 +331,8 @@ PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
 	TESTSCFLAG(Pinned, pinned, PF_NO_COMPOUND)
 PAGEFLAG(SavePinned, savepinned, PF_NO_COMPOUND);
 PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
+PAGEFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
+	TESTCLEARFLAG(XenRemapped, xen_remapped, PF_NO_COMPOUND)
 
 PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
-- 
cgit v1.2.3


From dc607f6bbafd2d8bf1d518c346560ba9577e402f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 6 Jul 2019 18:47:22 +0200
Subject: mfd: aat2870: no need to check return value of debugfs_create
 functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Cc: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20190706164722.18766-3-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/aat2870-core.c  | 13 ++-----------
 include/linux/mfd/aat2870.h |  1 -
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c
index 9f58cb2d3789..78ee4b28fca2 100644
--- a/drivers/mfd/aat2870-core.c
+++ b/drivers/mfd/aat2870-core.c
@@ -321,18 +321,9 @@ static const struct file_operations aat2870_reg_fops = {
 static void aat2870_init_debugfs(struct aat2870_data *aat2870)
 {
 	aat2870->dentry_root = debugfs_create_dir("aat2870", NULL);
-	if (!aat2870->dentry_root) {
-		dev_warn(aat2870->dev,
-			 "Failed to create debugfs root directory\n");
-		return;
-	}
 
-	aat2870->dentry_reg = debugfs_create_file("regs", 0644,
-						  aat2870->dentry_root,
-						  aat2870, &aat2870_reg_fops);
-	if (!aat2870->dentry_reg)
-		dev_warn(aat2870->dev,
-			 "Failed to create debugfs register file\n");
+	debugfs_create_file("regs", 0644, aat2870->dentry_root, aat2870,
+			    &aat2870_reg_fops);
 }
 
 #else
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index af7267c480ee..2445842d482d 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -136,7 +136,6 @@ struct aat2870_data {
 
 	/* for debugfs */
 	struct dentry *dentry_root;
-	struct dentry *dentry_reg;
 };
 
 struct aat2870_subdev_info {
-- 
cgit v1.2.3


From e2557157a9dd47b0335cac325b4eb438db2320d2 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Fri, 5 Jul 2019 16:54:05 +0800
Subject: dt-bindings: reset: imx7: Add support for i.MX8MM

i.MX8MM can reuse i.MX8MQ's reset driver, update the compatible
property and related info to support i.MX8MM.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../devicetree/bindings/reset/fsl,imx7-src.txt     |  6 +++--
 include/dt-bindings/reset/imx8mq-reset.h           | 28 +++++++++++-----------
 2 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
index 13e095182db4..c2489e41a801 100644
--- a/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
+++ b/Documentation/devicetree/bindings/reset/fsl,imx7-src.txt
@@ -8,6 +8,7 @@ Required properties:
 - compatible:
 	- For i.MX7 SoCs should be "fsl,imx7d-src", "syscon"
 	- For i.MX8MQ SoCs should be "fsl,imx8mq-src", "syscon"
+	- For i.MX8MM SoCs should be "fsl,imx8mm-src", "fsl,imx8mq-src", "syscon"
 - reg: should be register base and length as documented in the
   datasheet
 - interrupts: Should contain SRC interrupt
@@ -46,5 +47,6 @@ Example:
 
 
 For list of all valid reset indices see
-<dt-bindings/reset/imx7-reset.h> for i.MX7 and
-<dt-bindings/reset/imx8mq-reset.h> for i.MX8MQ
+<dt-bindings/reset/imx7-reset.h> for i.MX7,
+<dt-bindings/reset/imx8mq-reset.h> for i.MX8MQ and
+<dt-bindings/reset/imx8mq-reset.h> for i.MX8MM
diff --git a/include/dt-bindings/reset/imx8mq-reset.h b/include/dt-bindings/reset/imx8mq-reset.h
index 57c592498aa0..f17ef2a972f6 100644
--- a/include/dt-bindings/reset/imx8mq-reset.h
+++ b/include/dt-bindings/reset/imx8mq-reset.h
@@ -38,26 +38,26 @@
 #define IMX8MQ_RESET_PCIEPHY_PERST		27
 #define IMX8MQ_RESET_PCIE_CTRL_APPS_EN		28
 #define IMX8MQ_RESET_PCIE_CTRL_APPS_TURNOFF	29
-#define IMX8MQ_RESET_HDMI_PHY_APB_RESET		30
+#define IMX8MQ_RESET_HDMI_PHY_APB_RESET		30	/* i.MX8MM does NOT support */
 #define IMX8MQ_RESET_DISP_RESET			31
 #define IMX8MQ_RESET_GPU_RESET			32
 #define IMX8MQ_RESET_VPU_RESET			33
-#define IMX8MQ_RESET_PCIEPHY2			34
-#define IMX8MQ_RESET_PCIEPHY2_PERST		35
-#define IMX8MQ_RESET_PCIE2_CTRL_APPS_EN		36
-#define IMX8MQ_RESET_PCIE2_CTRL_APPS_TURNOFF	37
-#define IMX8MQ_RESET_MIPI_CSI1_CORE_RESET	38
-#define IMX8MQ_RESET_MIPI_CSI1_PHY_REF_RESET	39
-#define IMX8MQ_RESET_MIPI_CSI1_ESC_RESET	40
-#define IMX8MQ_RESET_MIPI_CSI2_CORE_RESET	41
-#define IMX8MQ_RESET_MIPI_CSI2_PHY_REF_RESET	42
-#define IMX8MQ_RESET_MIPI_CSI2_ESC_RESET	43
+#define IMX8MQ_RESET_PCIEPHY2			34	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_PCIEPHY2_PERST		35	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_PCIE2_CTRL_APPS_EN		36	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_PCIE2_CTRL_APPS_TURNOFF	37	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI1_CORE_RESET	38	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI1_PHY_REF_RESET	39	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI1_ESC_RESET	40	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI2_CORE_RESET	41	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI2_PHY_REF_RESET	42	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_MIPI_CSI2_ESC_RESET	43	/* i.MX8MM does NOT support */
 #define IMX8MQ_RESET_DDRC1_PRST			44
 #define IMX8MQ_RESET_DDRC1_CORE_RESET		45
 #define IMX8MQ_RESET_DDRC1_PHY_RESET		46
-#define IMX8MQ_RESET_DDRC2_PRST			47
-#define IMX8MQ_RESET_DDRC2_CORE_RESET		48
-#define IMX8MQ_RESET_DDRC2_PHY_RESET		49
+#define IMX8MQ_RESET_DDRC2_PRST			47	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_DDRC2_CORE_RESET		48	/* i.MX8MM does NOT support */
+#define IMX8MQ_RESET_DDRC2_PHY_RESET		49	/* i.MX8MM does NOT support */
 
 #define IMX8MQ_RESET_NUM			50
 
-- 
cgit v1.2.3


From 942b4c10b11bced65c0e553d8355b58bb3d9cdb2 Mon Sep 17 00:00:00 2001
From: Guido Günther <agx@sigxcpu.org>
Date: Thu, 1 Aug 2019 10:20:59 +0200
Subject: dt-bindings: reset: Fix typo in imx8mq resets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of the mipi dsi resets were called

  IMX8MQ_RESET_MIPI_DIS__

instead of

  IMX8MQ_RESET_MIPI_DSI__

Since they're DSI related this looks like a typo. This fixes the
only in tree user as well to not break bisecting.

Signed-off-by: Guido Günther <agx@sigxcpu.org>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/reset-imx7.c               | 12 ++++++------
 include/dt-bindings/reset/imx8mq-reset.h |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/reset/reset-imx7.c b/drivers/reset/reset-imx7.c
index 3ecd770f910b..1443a55a0c29 100644
--- a/drivers/reset/reset-imx7.c
+++ b/drivers/reset/reset-imx7.c
@@ -169,9 +169,9 @@ static const struct imx7_src_signal imx8mq_src_signals[IMX8MQ_RESET_NUM] = {
 	[IMX8MQ_RESET_OTG2_PHY_RESET]		= { SRC_USBOPHY2_RCR, BIT(0) },
 	[IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N]	= { SRC_MIPIPHY_RCR, BIT(1) },
 	[IMX8MQ_RESET_MIPI_DSI_RESET_N]		= { SRC_MIPIPHY_RCR, BIT(2) },
-	[IMX8MQ_RESET_MIPI_DIS_DPI_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(3) },
-	[IMX8MQ_RESET_MIPI_DIS_ESC_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(4) },
-	[IMX8MQ_RESET_MIPI_DIS_PCLK_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(5) },
+	[IMX8MQ_RESET_MIPI_DSI_DPI_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(3) },
+	[IMX8MQ_RESET_MIPI_DSI_ESC_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(4) },
+	[IMX8MQ_RESET_MIPI_DSI_PCLK_RESET_N]	= { SRC_MIPIPHY_RCR, BIT(5) },
 	[IMX8MQ_RESET_PCIEPHY]			= { SRC_PCIEPHY_RCR,
 						    BIT(2) | BIT(1) },
 	[IMX8MQ_RESET_PCIEPHY_PERST]		= { SRC_PCIEPHY_RCR, BIT(3) },
@@ -220,9 +220,9 @@ static int imx8mq_reset_set(struct reset_controller_dev *rcdev,
 
 	case IMX8MQ_RESET_PCIE_CTRL_APPS_EN:
 	case IMX8MQ_RESET_PCIE2_CTRL_APPS_EN:	/* fallthrough */
-	case IMX8MQ_RESET_MIPI_DIS_PCLK_RESET_N:	/* fallthrough */
-	case IMX8MQ_RESET_MIPI_DIS_ESC_RESET_N:	/* fallthrough */
-	case IMX8MQ_RESET_MIPI_DIS_DPI_RESET_N:	/* fallthrough */
+	case IMX8MQ_RESET_MIPI_DSI_PCLK_RESET_N:	/* fallthrough */
+	case IMX8MQ_RESET_MIPI_DSI_ESC_RESET_N:	/* fallthrough */
+	case IMX8MQ_RESET_MIPI_DSI_DPI_RESET_N:	/* fallthrough */
 	case IMX8MQ_RESET_MIPI_DSI_RESET_N:	/* fallthrough */
 	case IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N:	/* fallthrough */
 		value = assert ? 0 : bit;
diff --git a/include/dt-bindings/reset/imx8mq-reset.h b/include/dt-bindings/reset/imx8mq-reset.h
index f17ef2a972f6..9a301082d361 100644
--- a/include/dt-bindings/reset/imx8mq-reset.h
+++ b/include/dt-bindings/reset/imx8mq-reset.h
@@ -31,9 +31,9 @@
 #define IMX8MQ_RESET_OTG2_PHY_RESET		20
 #define IMX8MQ_RESET_MIPI_DSI_RESET_BYTE_N	21
 #define IMX8MQ_RESET_MIPI_DSI_RESET_N		22
-#define IMX8MQ_RESET_MIPI_DIS_DPI_RESET_N	23
-#define IMX8MQ_RESET_MIPI_DIS_ESC_RESET_N	24
-#define IMX8MQ_RESET_MIPI_DIS_PCLK_RESET_N	25
+#define IMX8MQ_RESET_MIPI_DSI_DPI_RESET_N	23
+#define IMX8MQ_RESET_MIPI_DSI_ESC_RESET_N	24
+#define IMX8MQ_RESET_MIPI_DSI_PCLK_RESET_N	25
 #define IMX8MQ_RESET_PCIEPHY			26
 #define IMX8MQ_RESET_PCIEPHY_PERST		27
 #define IMX8MQ_RESET_PCIE_CTRL_APPS_EN		28
-- 
cgit v1.2.3


From b16a0063654516bde54a95262d02207d79f1bf53 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 1 Aug 2019 09:54:53 +0200
Subject: dt-bindings: reset: amlogic,meson-gxbb-reset: update with SPDX
 Licence identifier

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 .../dt-bindings/reset/amlogic,meson-gxbb-reset.h   | 51 +---------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/reset/amlogic,meson-gxbb-reset.h b/include/dt-bindings/reset/amlogic,meson-gxbb-reset.h
index 524d6077ac1b..ea5058618863 100644
--- a/include/dt-bindings/reset/amlogic,meson-gxbb-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson-gxbb-reset.h
@@ -1,56 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
  * Copyright (c) 2016 BayLibre, SAS.
  * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- * The full GNU General Public License is included in this distribution
- * in the file called COPYING.
- *
- * BSD LICENSE
- *
- * Copyright (c) 2016 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef _DT_BINDINGS_AMLOGIC_MESON_GXBB_RESET_H
 #define _DT_BINDINGS_AMLOGIC_MESON_GXBB_RESET_H
-- 
cgit v1.2.3


From fb0d72c7ac5d1a3578e15cb56b4f453730790ba6 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 1 Aug 2019 09:54:54 +0200
Subject: dt-bindings: reset: amlogic,meson8b-reset: update with SPDX Licence
 identifier

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/dt-bindings/reset/amlogic,meson8b-reset.h | 51 +----------------------
 1 file changed, 1 insertion(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/dt-bindings/reset/amlogic,meson8b-reset.h b/include/dt-bindings/reset/amlogic,meson8b-reset.h
index 614aff2c7aff..c614438bcbdb 100644
--- a/include/dt-bindings/reset/amlogic,meson8b-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson8b-reset.h
@@ -1,56 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
  * Copyright (c) 2016 BayLibre, SAS.
  * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- * The full GNU General Public License is included in this distribution
- * in the file called COPYING.
- *
- * BSD LICENSE
- *
- * Copyright (c) 2016 BayLibre, SAS.
- * Author: Neil Armstrong <narmstrong@baylibre.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef _DT_BINDINGS_AMLOGIC_MESON8B_RESET_H
 #define _DT_BINDINGS_AMLOGIC_MESON8B_RESET_H
-- 
cgit v1.2.3


From fee531d6fc49b9a616525e30955c4cf3b403f632 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 31 Jul 2019 15:17:15 +0200
Subject: ASoC: core: no need to check return value of debugfs_create functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Also, there is no need to store the individual debugfs file name, just
remove the whole directory all at once, saving a local variable.

Note, the soc-pcm "state" file has now moved to a subdirectory, as it is
only a good idea to save the dentries for debugfs directories, not
individual files, as the individual file debugfs functions are changing
to not return a dentry.

Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: alsa-devel@alsa-project.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20190731131716.9764-2-gregkh@linuxfoundation.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  1 -
 sound/soc/soc-core.c | 43 ++++++-------------------------------------
 sound/soc/soc-dapm.c | 30 ++++--------------------------
 sound/soc/soc-pcm.c  | 14 ++++----------
 4 files changed, 14 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index d770606732cd..f0797792dd8d 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1220,7 +1220,6 @@ struct snd_soc_card {
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs_card_root;
-	struct dentry *debugfs_pop_time;
 #endif
 	u32 pop_time;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index da11e44b01aa..ed66d2c68d10 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -165,13 +165,6 @@ static void soc_init_component_debugfs(struct snd_soc_component *component)
 				component->card->debugfs_card_root);
 	}
 
-	if (IS_ERR(component->debugfs_root)) {
-		dev_warn(component->dev,
-			"ASoC: Failed to create component debugfs directory: %ld\n",
-			PTR_ERR(component->debugfs_root));
-		return;
-	}
-
 	snd_soc_dapm_debugfs_init(snd_soc_component_get_dapm(component),
 		component->debugfs_root);
 }
@@ -215,32 +208,15 @@ DEFINE_SHOW_ATTRIBUTE(component_list);
 
 static void soc_init_card_debugfs(struct snd_soc_card *card)
 {
-	if (!snd_soc_debugfs_root)
-		return;
-
 	card->debugfs_card_root = debugfs_create_dir(card->name,
 						     snd_soc_debugfs_root);
-	if (IS_ERR(card->debugfs_card_root)) {
-		dev_warn(card->dev,
-			 "ASoC: Failed to create card debugfs directory: %ld\n",
-			 PTR_ERR(card->debugfs_card_root));
-		card->debugfs_card_root = NULL;
-		return;
-	}
 
-	card->debugfs_pop_time = debugfs_create_u32("dapm_pop_time", 0644,
-						    card->debugfs_card_root,
-						    &card->pop_time);
-	if (IS_ERR(card->debugfs_pop_time))
-		dev_warn(card->dev,
-			 "ASoC: Failed to create pop time debugfs file: %ld\n",
-			 PTR_ERR(card->debugfs_pop_time));
+	debugfs_create_u32("dapm_pop_time", 0644, card->debugfs_card_root,
+			   &card->pop_time);
 }
 
 static void soc_cleanup_card_debugfs(struct snd_soc_card *card)
 {
-	if (!card->debugfs_card_root)
-		return;
 	debugfs_remove_recursive(card->debugfs_card_root);
 	card->debugfs_card_root = NULL;
 }
@@ -248,19 +224,12 @@ static void soc_cleanup_card_debugfs(struct snd_soc_card *card)
 static void snd_soc_debugfs_init(void)
 {
 	snd_soc_debugfs_root = debugfs_create_dir("asoc", NULL);
-	if (IS_ERR_OR_NULL(snd_soc_debugfs_root)) {
-		pr_warn("ASoC: Failed to create debugfs directory\n");
-		snd_soc_debugfs_root = NULL;
-		return;
-	}
 
-	if (!debugfs_create_file("dais", 0444, snd_soc_debugfs_root, NULL,
-				 &dai_list_fops))
-		pr_warn("ASoC: Failed to create DAI list debugfs file\n");
+	debugfs_create_file("dais", 0444, snd_soc_debugfs_root, NULL,
+			    &dai_list_fops);
 
-	if (!debugfs_create_file("components", 0444, snd_soc_debugfs_root, NULL,
-				 &component_list_fops))
-		pr_warn("ASoC: Failed to create component list debugfs file\n");
+	debugfs_create_file("components", 0444, snd_soc_debugfs_root, NULL,
+			    &component_list_fops);
 }
 
 static void snd_soc_debugfs_exit(void)
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index e16838e1bda2..d93c1038fab0 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -2154,50 +2154,28 @@ static const struct file_operations dapm_bias_fops = {
 void snd_soc_dapm_debugfs_init(struct snd_soc_dapm_context *dapm,
 	struct dentry *parent)
 {
-	struct dentry *d;
-
 	if (!parent || IS_ERR(parent))
 		return;
 
 	dapm->debugfs_dapm = debugfs_create_dir("dapm", parent);
 
-	if (IS_ERR(dapm->debugfs_dapm)) {
-		dev_warn(dapm->dev,
-			 "ASoC: Failed to create DAPM debugfs directory %ld\n",
-			 PTR_ERR(dapm->debugfs_dapm));
-		return;
-	}
-
-	d = debugfs_create_file("bias_level", 0444,
-				dapm->debugfs_dapm, dapm,
-				&dapm_bias_fops);
-	if (IS_ERR(d))
-		dev_warn(dapm->dev,
-			 "ASoC: Failed to create bias level debugfs file: %ld\n",
-			 PTR_ERR(d));
+	debugfs_create_file("bias_level", 0444, dapm->debugfs_dapm, dapm,
+			    &dapm_bias_fops);
 }
 
 static void dapm_debugfs_add_widget(struct snd_soc_dapm_widget *w)
 {
 	struct snd_soc_dapm_context *dapm = w->dapm;
-	struct dentry *d;
 
 	if (!dapm->debugfs_dapm || !w->name)
 		return;
 
-	d = debugfs_create_file(w->name, 0444,
-				dapm->debugfs_dapm, w,
-				&dapm_widget_power_fops);
-	if (IS_ERR(d))
-		dev_warn(w->dapm->dev,
-			 "ASoC: Failed to create %s debugfs file: %ld\n",
-			 w->name, PTR_ERR(d));
+	debugfs_create_file(w->name, 0444, dapm->debugfs_dapm, w,
+			    &dapm_widget_power_fops);
 }
 
 static void dapm_debugfs_cleanup(struct snd_soc_dapm_context *dapm)
 {
-	if (!dapm->debugfs_dapm)
-		return;
 	debugfs_remove_recursive(dapm->debugfs_dapm);
 	dapm->debugfs_dapm = NULL;
 }
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 30264bc592f6..ce7297c37537 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1205,9 +1205,9 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
 			stream ? "<-" : "->", be->dai_link->name);
 
 #ifdef CONFIG_DEBUG_FS
-	if (fe->debugfs_dpcm_root)
-		dpcm->debugfs_state = debugfs_create_u32(be->dai_link->name, 0644,
-				fe->debugfs_dpcm_root, &dpcm->state);
+	dpcm->debugfs_state = debugfs_create_dir(be->dai_link->name,
+						 fe->debugfs_dpcm_root);
+	debugfs_create_u32("state", 0644, dpcm->debugfs_state, &dpcm->state);
 #endif
 	return 1;
 }
@@ -1262,7 +1262,7 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream)
 		dpcm_be_reparent(fe, dpcm->be, stream);
 
 #ifdef CONFIG_DEBUG_FS
-		debugfs_remove(dpcm->debugfs_state);
+		debugfs_remove_recursive(dpcm->debugfs_state);
 #endif
 		spin_lock_irqsave(&fe->card->dpcm_lock, flags);
 		list_del(&dpcm->list_be);
@@ -3415,12 +3415,6 @@ void soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd)
 
 	rtd->debugfs_dpcm_root = debugfs_create_dir(rtd->dai_link->name,
 			rtd->card->debugfs_card_root);
-	if (!rtd->debugfs_dpcm_root) {
-		dev_dbg(rtd->dev,
-			 "ASoC: Failed to create dpcm debugfs directory %s\n",
-			 rtd->dai_link->name);
-		return;
-	}
 
 	debugfs_create_file("state", 0444, rtd->debugfs_dpcm_root,
 			    rtd, &dpcm_state_fops);
-- 
cgit v1.2.3


From d9b42dfab513c9130ee0458f2e6febb75c89d1c8 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Wed, 3 Jul 2019 09:58:18 +0200
Subject: drm/client: Support unmapping of DRM client buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DRM clients, such as the fbdev emulation, have their buffer objects
mapped by default. Mapping a buffer implicitly prevents its relocation.
Hence, the buffer may permanently consume video memory while it's
allocated. This is a problem for drivers of low-memory devices, such as
ast, mgag200 or older framebuffer hardware, which will then not have
enough memory to display other content (e.g., X11).

This patch introduces drm_client_buffer_vmap() and _vunmap(). Internal
DRM clients can use these functions to unmap and remap buffer objects
as needed.

There's no reference counting for vmap operations. Callers are expected
to either keep buffers mapped (as it is now), or call vmap and vunmap
in pairs around code that accesses the mapped memory.

v2:
	* remove several duplicated NULL-pointer checks
v3:
	* style and typo fixes

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/315831/
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/drm_client.c | 66 +++++++++++++++++++++++++++++++++++++-------
 include/drm/drm_client.h     |  2 ++
 2 files changed, 58 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c
index 410572f14257..fb107b24baae 100644
--- a/drivers/gpu/drm/drm_client.c
+++ b/drivers/gpu/drm/drm_client.c
@@ -281,6 +281,42 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u
 
 	buffer->gem = obj;
 
+	vaddr = drm_client_buffer_vmap(buffer);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_delete;
+	}
+
+	return buffer;
+
+err_delete:
+	drm_client_buffer_delete(buffer);
+
+	return ERR_PTR(ret);
+}
+
+/**
+ * drm_client_buffer_vmap - Map DRM client buffer into address space
+ * @buffer: DRM client buffer
+ *
+ * This function maps a client buffer into kernel address space. If the
+ * buffer is already mapped, it returns the mapping's address.
+ *
+ * Client buffer mappings are not ref'counted. Each call to
+ * drm_client_buffer_vmap() should be followed by a call to
+ * drm_client_buffer_vunmap(); or the client buffer should be mapped
+ * throughout its lifetime. The latter is the default.
+ *
+ * Returns:
+ *	The mapped memory's address
+ */
+void *drm_client_buffer_vmap(struct drm_client_buffer *buffer)
+{
+	void *vaddr;
+
+	if (buffer->vaddr)
+		return buffer->vaddr;
+
 	/*
 	 * FIXME: The dependency on GEM here isn't required, we could
 	 * convert the driver handle to a dma-buf instead and use the
@@ -289,21 +325,31 @@ drm_client_buffer_create(struct drm_client_dev *client, u32 width, u32 height, u
 	 * fd_install step out of the driver backend hooks, to make that
 	 * final step optional for internal users.
 	 */
-	vaddr = drm_gem_vmap(obj);
-	if (IS_ERR(vaddr)) {
-		ret = PTR_ERR(vaddr);
-		goto err_delete;
-	}
+	vaddr = drm_gem_vmap(buffer->gem);
+	if (IS_ERR(vaddr))
+		return vaddr;
 
 	buffer->vaddr = vaddr;
 
-	return buffer;
-
-err_delete:
-	drm_client_buffer_delete(buffer);
+	return vaddr;
+}
+EXPORT_SYMBOL(drm_client_buffer_vmap);
 
-	return ERR_PTR(ret);
+/**
+ * drm_client_buffer_vunmap - Unmap DRM client buffer
+ * @buffer: DRM client buffer
+ *
+ * This function removes a client buffer's memory mapping. This
+ * function is only required by clients that manage their buffers
+ * by themselves. By default, DRM client buffers are mapped throughout
+ * their entire lifetime.
+ */
+void drm_client_buffer_vunmap(struct drm_client_buffer *buffer)
+{
+	drm_gem_vunmap(buffer->gem, buffer->vaddr);
+	buffer->vaddr = NULL;
 }
+EXPORT_SYMBOL(drm_client_buffer_vunmap);
 
 static void drm_client_buffer_rmfb(struct drm_client_buffer *buffer)
 {
diff --git a/include/drm/drm_client.h b/include/drm/drm_client.h
index 72d51d1e9dd9..5cf2c5dd8b1e 100644
--- a/include/drm/drm_client.h
+++ b/include/drm/drm_client.h
@@ -149,6 +149,8 @@ struct drm_client_buffer {
 struct drm_client_buffer *
 drm_client_framebuffer_create(struct drm_client_dev *client, u32 width, u32 height, u32 format);
 void drm_client_framebuffer_delete(struct drm_client_buffer *buffer);
+void *drm_client_buffer_vmap(struct drm_client_buffer *buffer);
+void drm_client_buffer_vunmap(struct drm_client_buffer *buffer);
 
 int drm_client_modeset_create(struct drm_client_dev *client);
 void drm_client_modeset_free(struct drm_client_dev *client);
-- 
cgit v1.2.3


From 01b947afaa940327e7adf57070a4bf3d0bed9810 Mon Sep 17 00:00:00 2001
From: Thomas Zimmermann <tzimmermann@suse.de>
Date: Fri, 5 Jul 2019 09:31:00 +0200
Subject: drm/fb-helper: Instanciate shadow FB if configured in device's
 mode_config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generic framebuffer emulation uses a shadow buffer for framebuffers with
dirty() function. If drivers want to use the shadow FB without such a
function, they can now set prefer_shadow or prefer_shadow_fbdev in their
mode_config structures. The former flag is exported to userspace, the
latter flag is fbdev-only.

v3:
	* only schedule dirty worker if fbdev uses shadow fb
	* test shadow fb settings with boolean operators
	* use bool for struct drm_mode_config.prefer_shadow_fbdev
	* fix documentation comments

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Reviewed-by: Noralf Trønnes <noralf@tronnes.org>
Tested-by: Noralf Trønnes <noralf@tronnes.org>
Link: https://patchwork.freedesktop.org/patch/315834/
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c | 18 +++++++++++++++---
 include/drm/drm_mode_config.h   |  7 +++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 7ba6a0255821..a7ba5b4902d6 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -421,7 +421,9 @@ static void drm_fb_helper_dirty_work(struct work_struct *work)
 				return;
 			drm_fb_helper_dirty_blit_real(helper, &clip_copy);
 		}
-		helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1);
+		if (helper->fb->funcs->dirty)
+			helper->fb->funcs->dirty(helper->fb, NULL, 0, 0,
+						 &clip_copy, 1);
 
 		if (helper->buffer)
 			drm_client_buffer_vunmap(helper->buffer);
@@ -613,6 +615,16 @@ void drm_fb_helper_unlink_fbi(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_unlink_fbi);
 
+static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper)
+{
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_framebuffer *fb = fb_helper->fb;
+
+	return dev->mode_config.prefer_shadow_fbdev ||
+	       dev->mode_config.prefer_shadow ||
+	       fb->funcs->dirty;
+}
+
 static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y,
 				u32 width, u32 height)
 {
@@ -620,7 +632,7 @@ static void drm_fb_helper_dirty(struct fb_info *info, u32 x, u32 y,
 	struct drm_clip_rect *clip = &helper->dirty_clip;
 	unsigned long flags;
 
-	if (!helper->fb->funcs->dirty)
+	if (!drm_fbdev_use_shadow_fb(helper))
 		return;
 
 	spin_lock_irqsave(&helper->dirty_lock, flags);
@@ -2213,7 +2225,7 @@ int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
 
 	drm_fb_helper_fill_info(fbi, fb_helper, sizes);
 
-	if (fb->funcs->dirty) {
+	if (drm_fbdev_use_shadow_fb(fb_helper)) {
 		struct fb_ops *fbops;
 		void *shadow;
 
diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h
index 759d462d028b..f57eea0481e0 100644
--- a/include/drm/drm_mode_config.h
+++ b/include/drm/drm_mode_config.h
@@ -852,6 +852,13 @@ struct drm_mode_config {
 	/* dumb ioctl parameters */
 	uint32_t preferred_depth, prefer_shadow;
 
+	/**
+	 * @prefer_shadow_fbdev:
+	 *
+	 * Hint to framebuffer emulation to prefer shadow-fb rendering.
+	 */
+	bool prefer_shadow_fbdev;
+
 	/**
 	 * @quirk_addfb_prefer_xbgr_30bpp:
 	 *
-- 
cgit v1.2.3


From b9515ecbf6caef2ea911ca59801eff84d034fa48 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 1 Aug 2019 11:20:25 +0100
Subject: drivers: Fix typo in parameter description for
 driver_find_device_by_acpi_dev

Fix a typo in the comment describing the parameters for the new API, which
triggers the following warning for htmldocs:

include/linux/device.h:479: warning: Function parameter or member 'drv' not described in 'driver_find_device_by_acpi_dev'

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20190801102026.27312-2-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 23ef6eba7213..8e1b2ead5d96 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -467,7 +467,7 @@ driver_find_device_by_fwnode(struct device_driver *drv,
 /**
  * driver_find_device_by_devt- device iterator for locating a particular device
  * by devt.
- * @driver: the driver we're iterating
+ * @drv: the driver we're iterating
  * @devt: devt pointer to match.
  */
 static inline struct device *driver_find_device_by_devt(struct device_driver *drv,
-- 
cgit v1.2.3


From 313b46d831189f593840c625d7972092cb0088fc Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 1 Aug 2019 11:20:26 +0100
Subject: drivers: Fix htmldocs warnings with bus_find_next_device()

Document the parameters for bus_find_next_device() to avoid
htmldocs build warnings as reported below :

include/linux/device.h:236: warning: Function parameter or member 'bus' not described in 'bus_find_next_device'
include/linux/device.h:236: warning: Function parameter or member 'cur' not described in 'bus_find_next_device'

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20190801102026.27312-3-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8e1b2ead5d96..bff46ce3bc3b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -229,6 +229,8 @@ static inline struct device *bus_find_device_by_devt(struct bus_type *bus,
 /**
  * bus_find_next_device - Find the next device after a given device in a
  * given bus.
+ * @bus: bus type
+ * @cur: device to begin the search with.
  */
 static inline struct device *
 bus_find_next_device(struct bus_type *bus,struct device *cur)
-- 
cgit v1.2.3


From 5302dd7dd0b6d04c63cdce51d1e9fda9ef0be886 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 31 Jul 2019 15:17:14 -0700
Subject: driver core: Add support for linking devices during device addition

When devices are added, the bus might want to create device links to track
functional dependencies between supplier and consumer devices. This
tracking of supplier-consumer relationship allows optimizing device probe
order and tracking whether all consumers of a supplier are active. The
add_links bus callback is added to support this.

However, when consumer devices are added, they might not have a supplier
device to link to despite needing mandatory resources/functionality from
one or more suppliers. A waiting_for_suppliers list is created to track
such consumers and retry linking them when new devices get added.

Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20190731221721.187713-2-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h | 14 +++++++++
 2 files changed, 97 insertions(+)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 90f2dd4661f5..18e04ca1de13 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,6 +44,8 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
 #endif
 
 /* Device links support. */
+static LIST_HEAD(wait_for_suppliers);
+static DEFINE_MUTEX(wfs_lock);
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
@@ -418,6 +420,51 @@ struct device_link *device_link_add(struct device *consumer,
 }
 EXPORT_SYMBOL_GPL(device_link_add);
 
+/**
+ * device_link_wait_for_supplier - Mark device as waiting for supplier
+ * @consumer: Consumer device
+ *
+ * Marks the consumer device as waiting for suppliers to become available. The
+ * consumer device will never be probed until it's unmarked as waiting for
+ * suppliers. The caller is responsible for adding the link to the supplier
+ * once the supplier device is present.
+ *
+ * This function is NOT meant to be called from the probe function of the
+ * consumer but rather from code that creates/adds the consumer device.
+ */
+static void device_link_wait_for_supplier(struct device *consumer)
+{
+	mutex_lock(&wfs_lock);
+	list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers);
+	mutex_unlock(&wfs_lock);
+}
+
+/**
+ * device_link_check_waiting_consumers - Try to unmark waiting consumers
+ *
+ * Loops through all consumers waiting on suppliers and tries to add all their
+ * supplier links. If that succeeds, the consumer device is unmarked as waiting
+ * for suppliers. Otherwise, they are left marked as waiting on suppliers,
+ *
+ * The add_links bus callback is expected to return 0 if it has found and added
+ * all the supplier links for the consumer device. It should return an error if
+ * it isn't able to do so.
+ *
+ * The caller of device_link_wait_for_supplier() is expected to call this once
+ * it's aware of potential suppliers becoming available.
+ */
+static void device_link_check_waiting_consumers(void)
+{
+	struct device *dev, *tmp;
+
+	mutex_lock(&wfs_lock);
+	list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
+				 links.needs_suppliers)
+		if (!dev->bus->add_links(dev))
+			list_del_init(&dev->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
+}
+
 static void device_link_free(struct device_link *link)
 {
 	while (refcount_dec_not_one(&link->rpm_active))
@@ -552,6 +599,19 @@ int device_links_check_suppliers(struct device *dev)
 	struct device_link *link;
 	int ret = 0;
 
+	/*
+	 * If a device is waiting for one or more suppliers (in
+	 * wait_for_suppliers list), it is not ready to probe yet. So just
+	 * return -EPROBE_DEFER without having to check the links with existing
+	 * suppliers.
+	 */
+	mutex_lock(&wfs_lock);
+	if (!list_empty(&dev->links.needs_suppliers)) {
+		mutex_unlock(&wfs_lock);
+		return -EPROBE_DEFER;
+	}
+	mutex_unlock(&wfs_lock);
+
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
@@ -836,6 +896,10 @@ static void device_links_purge(struct device *dev)
 {
 	struct device_link *link, *ln;
 
+	mutex_lock(&wfs_lock);
+	list_del(&dev->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
+
 	/*
 	 * Delete all of the remaining links from this device to any other
 	 * devices (either consumers or suppliers).
@@ -1697,6 +1761,7 @@ void device_initialize(struct device *dev)
 #endif
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
+	INIT_LIST_HEAD(&dev->links.needs_suppliers);
 	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
@@ -2132,6 +2197,24 @@ int device_add(struct device *dev)
 					     BUS_NOTIFY_ADD_DEVICE, dev);
 
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
+
+	/*
+	 * Check if any of the other devices (consumers) have been waiting for
+	 * this device (supplier) to be added so that they can create a device
+	 * link to it.
+	 *
+	 * This needs to happen after device_pm_add() because device_link_add()
+	 * requires the supplier be registered before it's called.
+	 *
+	 * But this also needs to happe before bus_probe_device() to make sure
+	 * waiting consumers can link to it before the driver is bound to the
+	 * device and the driver sync_state callback is called for this device.
+	 */
+	device_link_check_waiting_consumers();
+
+	if (dev->bus && dev->bus->add_links && dev->bus->add_links(dev))
+		device_link_wait_for_supplier(dev);
+
 	bus_probe_device(dev);
 	if (parent)
 		klist_add_tail(&dev->p->knode_parent,
diff --git a/include/linux/device.h b/include/linux/device.h
index bff46ce3bc3b..1e05911325f0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -78,6 +78,17 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		-EPROBE_DEFER it will queue the device for deferred probing.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
+ * @add_links:	Called, perhaps multiple times per device, after a device is
+ *		added to this bus.  The function is expected to create device
+ *		links to all the suppliers of the input device that are
+ *		available at the time this function is called.  As in, the
+ *		function should NOT stop at the first failed device link if
+ *		other unlinked supplier devices are present in the system.
+ *
+ *		Return 0 if device links have been successfully created to all
+ *		the suppliers of this device.  Return an error if some of the
+ *		suppliers are not yet available and this function needs to be
+ *		reattempted in the future.
  * @probe:	Called when a new device or driver add to this bus, and callback
  *		the specific driver's probe to initial the matched device.
  * @remove:	Called when a device removed from this bus.
@@ -122,6 +133,7 @@ struct bus_type {
 
 	int (*match)(struct device *dev, struct device_driver *drv);
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
+	int (*add_links)(struct device *dev);
 	int (*probe)(struct device *dev);
 	int (*remove)(struct device *dev);
 	void (*shutdown)(struct device *dev);
@@ -1128,11 +1140,13 @@ enum dl_dev_state {
  * struct dev_links_info - Device data related to device links.
  * @suppliers: List of links to supplier devices.
  * @consumers: List of links to consumer devices.
+ * @needs_suppliers: Hook to global list of devices waiting for suppliers.
  * @status: Driver status information.
  */
 struct dev_links_info {
 	struct list_head suppliers;
 	struct list_head consumers;
+	struct list_head needs_suppliers;
 	enum dl_dev_state status;
 };
 
-- 
cgit v1.2.3


From 134b23eec9e3a3c795a6ceb0efe2fa63e87983b2 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 31 Jul 2019 15:17:15 -0700
Subject: driver core: Add edit_links() callback for drivers

The driver core/bus adding supplier-consumer dependencies by default
enables functional dependencies to be tracked correctly even when the
consumer devices haven't had their drivers registered or loaded (if they
are modules).

However, when the bus incorrectly adds dependencies that it shouldn't
have added, the devices might never probe.

For example, if device-C is a consumer of device-S and they have
phandles to each other in DT, the following could happen:

1.  Device-S get added first.
2.  The bus add_links() callback will (incorrectly) try to link it as
    a consumer of device-C.
3.  Since device-C isn't present, device-S will be put in
    "waiting-for-supplier" list.
4.  Device-C gets added next.
5.  All devices in "waiting-for-supplier" list are retried for linking.
6.  Device-S gets linked as consumer to Device-C.
7.  The bus add_links() callback will (correctly) try to link it as
    a consumer of device-S.
8.  This isn't allowed because it would create a cyclic device links.

Neither devices will get probed since the supplier is marked as
dependent on the consumer. And the consumer will never probe because the
consumer can't get resources from the supplier.

Without this patch, things stay in this broken state. However, with this
patch, the execution will continue like this:

9.  Device-C's driver is loaded.
10. Device-C's driver removes Device-S as a consumer of Device-C.
11. Device-C's driver adds Device-C as a consumer of Device-S.
12. Device-S probes.
14. Device-C probes.

kbuild test robot reported missing documentation for device.has_edit_links
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20190731221721.187713-3-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 24 ++++++++++++++++++++++--
 drivers/base/dd.c      | 29 +++++++++++++++++++++++++++++
 include/linux/device.h | 20 ++++++++++++++++++++
 3 files changed, 71 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 18e04ca1de13..feec8dee1e91 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -439,6 +439,19 @@ static void device_link_wait_for_supplier(struct device *consumer)
 	mutex_unlock(&wfs_lock);
 }
 
+/**
+ * device_link_remove_from_wfs - Unmark device as waiting for supplier
+ * @consumer: Consumer device
+ *
+ * Unmark the consumer device as waiting for suppliers to become available.
+ */
+void device_link_remove_from_wfs(struct device *consumer)
+{
+	mutex_lock(&wfs_lock);
+	list_del_init(&consumer->links.needs_suppliers);
+	mutex_unlock(&wfs_lock);
+}
+
 /**
  * device_link_check_waiting_consumers - Try to unmark waiting consumers
  *
@@ -456,12 +469,19 @@ static void device_link_wait_for_supplier(struct device *consumer)
 static void device_link_check_waiting_consumers(void)
 {
 	struct device *dev, *tmp;
+	int ret;
 
 	mutex_lock(&wfs_lock);
 	list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
-				 links.needs_suppliers)
-		if (!dev->bus->add_links(dev))
+				 links.needs_suppliers) {
+		ret = 0;
+		if (dev->has_edit_links)
+			ret = driver_edit_links(dev);
+		else if (dev->bus->add_links)
+			ret = dev->bus->add_links(dev);
+		if (!ret)
 			list_del_init(&dev->links.needs_suppliers);
+	}
 	mutex_unlock(&wfs_lock);
 }
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 994a90747420..5e7041ede0d7 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -698,6 +698,12 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
+	if (drv->edit_links) {
+		if (drv->edit_links(dev))
+			dev->has_edit_links = true;
+		else
+			device_link_remove_from_wfs(dev);
+	}
 	pm_runtime_get_suppliers(dev);
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
@@ -786,6 +792,29 @@ struct device_attach_data {
 	bool have_async;
 };
 
+static int __driver_edit_links(struct device_driver *drv, void *data)
+{
+	struct device *dev = data;
+
+	if (!drv->edit_links)
+		return 0;
+
+	if (driver_match_device(drv, dev) <= 0)
+		return 0;
+
+	return drv->edit_links(dev);
+}
+
+int driver_edit_links(struct device *dev)
+{
+	int ret;
+
+	device_lock(dev);
+	ret = bus_for_each_drv(dev->bus, NULL, dev, __driver_edit_links);
+	device_unlock(dev);
+	return ret;
+}
+
 static int __device_attach_driver(struct device_driver *drv, void *_data)
 {
 	struct device_attach_data *data = _data;
diff --git a/include/linux/device.h b/include/linux/device.h
index 1e05911325f0..d3991810f39d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -349,6 +349,20 @@ enum probe_type {
  * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
+ * @edit_links:	Called to allow a matched driver to edit the device links the
+ *		bus might have added incorrectly. This will be useful to handle
+ *		cases where the bus incorrectly adds functional dependencies
+ *		that aren't true or tries to create cyclic dependencies. But
+ *		doesn't correctly handle functional dependencies that are
+ *		missed by the bus as the supplier's sync_state might get to
+ *		execute before the driver for a missing consumer is loaded and
+ *		gets to edit the device links for the consumer.
+ *
+ *		This function might be called multiple times after a new device
+ *		is added.  The function is expected to create all the device
+ *		links for the new device and return 0 if it was completed
+ *		successfully or return an error if it needs to be reattempted
+ *		in the future.
  * @probe:	Called to query the existence of a specific device,
  *		whether this driver can work with it, and bind the driver
  *		to a specific device.
@@ -388,6 +402,7 @@ struct device_driver {
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
 
+	int (*edit_links)(struct device *dev);
 	int (*probe) (struct device *dev);
 	int (*remove) (struct device *dev);
 	void (*shutdown) (struct device *dev);
@@ -1220,6 +1235,8 @@ struct dev_links_info {
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
+ * @has_edit_links: This device has a driver than is capable of
+ *		    editing the device links created by driver core.
  * @dma_coherent: this particular device is dma coherent, even if the
  *		architecture supports non-coherent devices.
  *
@@ -1313,6 +1330,7 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
+	bool			has_edit_links:1;
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
@@ -1564,6 +1582,7 @@ extern int  __must_check device_attach(struct device *dev);
 extern int __must_check driver_attach(struct device_driver *drv);
 extern void device_initial_probe(struct device *dev);
 extern int __must_check device_reprobe(struct device *dev);
+extern int driver_edit_links(struct device *dev);
 
 extern bool device_is_bound(struct device *dev);
 
@@ -1654,6 +1673,7 @@ struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
 void device_link_remove(void *consumer, struct device *supplier);
+void device_link_remove_from_wfs(struct device *consumer);
 
 #ifndef dev_fmt
 #define dev_fmt(fmt) fmt
-- 
cgit v1.2.3


From 8f8184d6bf676a8680d6f441e40317d166b46f73 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 31 Jul 2019 15:17:17 -0700
Subject: driver core: Add sync_state driver/bus callback

This sync_state driver/bus callback is called once all the consumers
of a supplier have probed successfully.

This allows the supplier device's driver/bus to sync the supplier
device's state to the software state with the guarantee that all the
consumers are actively managing the resources provided by the supplier
device.

To maintain backwards compatibility and ease transition from existing
frameworks and resource cleanup schemes, late_initcall_sync is the
earliest when the sync_state callback might be called.

There is no upper bound on the time by which the sync_state callback
has to be called. This is because if a consumer device never probes,
the supplier has to maintain its resources in the state left by the
bootloader. For example, if the bootloader leaves the display
backlight at a fixed voltage and the backlight driver is never probed,
you don't want the backlight to ever be turned off after boot up.

Also, when multiple devices are added after kernel init, some
suppliers could be added before their consumer devices get added. In
these instances, the supplier devices could get their sync_state
callback called right after they probe because the consumers devices
haven't had a chance to create device links to the suppliers.

To handle this correctly, this change also provides APIs to
pause/resume sync state callbacks so that when multiple devices are
added, their sync_state callback evaluation can be postponed to happen
after all of them are added.

kbuild test robot reported missing documentation for device.state_synced
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20190731221721.187713-5-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h | 26 ++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index feec8dee1e91..5b1795cd116a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -46,6 +46,8 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
 /* Device links support. */
 static LIST_HEAD(wait_for_suppliers);
 static DEFINE_MUTEX(wfs_lock);
+static LIST_HEAD(deferred_sync);
+static unsigned int supplier_sync_state_disabled;
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
@@ -651,6 +653,62 @@ int device_links_check_suppliers(struct device *dev)
 	return ret;
 }
 
+static void __device_links_supplier_sync_state(struct device *dev)
+{
+	struct device_link *link;
+
+	if (dev->state_synced)
+		return;
+
+	list_for_each_entry(link, &dev->links.consumers, s_node) {
+		if (!(link->flags & DL_FLAG_MANAGED))
+			continue;
+		if (link->status != DL_STATE_ACTIVE)
+			return;
+	}
+
+	if (dev->bus->sync_state)
+		dev->bus->sync_state(dev);
+	else if (dev->driver && dev->driver->sync_state)
+		dev->driver->sync_state(dev);
+
+	dev->state_synced = true;
+}
+
+void device_links_supplier_sync_state_pause(void)
+{
+	device_links_write_lock();
+	supplier_sync_state_disabled++;
+	device_links_write_unlock();
+}
+
+void device_links_supplier_sync_state_resume(void)
+{
+	struct device *dev, *tmp;
+
+	device_links_write_lock();
+	if (!supplier_sync_state_disabled) {
+		WARN(true, "Unmatched sync_state pause/resume!");
+		goto out;
+	}
+	supplier_sync_state_disabled--;
+	if (supplier_sync_state_disabled)
+		goto out;
+
+	list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
+		__device_links_supplier_sync_state(dev);
+		list_del_init(&dev->links.defer_sync);
+	}
+out:
+	device_links_write_unlock();
+}
+
+static void __device_links_supplier_defer_sync(struct device *sup)
+{
+	if (list_empty(&sup->links.defer_sync))
+		list_add_tail(&sup->links.defer_sync, &deferred_sync);
+}
+
 /**
  * device_links_driver_bound - Update device links after probing its driver.
  * @dev: Device to update the links for.
@@ -695,6 +753,11 @@ void device_links_driver_bound(struct device *dev)
 
 		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 		WRITE_ONCE(link->status, DL_STATE_ACTIVE);
+
+		if (supplier_sync_state_disabled)
+			__device_links_supplier_defer_sync(link->supplier);
+		else
+			__device_links_supplier_sync_state(link->supplier);
 	}
 
 	dev->links.status = DL_DEV_DRIVER_BOUND;
@@ -811,6 +874,7 @@ void device_links_driver_cleanup(struct device *dev)
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
 
+	list_del_init(&dev->links.defer_sync);
 	__device_links_no_driver(dev);
 
 	device_links_write_unlock();
@@ -1782,6 +1846,7 @@ void device_initialize(struct device *dev)
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
 	INIT_LIST_HEAD(&dev->links.needs_suppliers);
+	INIT_LIST_HEAD(&dev->links.defer_sync);
 	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
diff --git a/include/linux/device.h b/include/linux/device.h
index d3991810f39d..63a3aafabcd6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -84,6 +84,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		available at the time this function is called.  As in, the
  *		function should NOT stop at the first failed device link if
  *		other unlinked supplier devices are present in the system.
+ *		This is necessary for the sync_state() callback to work
+ *		correctly.
  *
  *		Return 0 if device links have been successfully created to all
  *		the suppliers of this device.  Return an error if some of the
@@ -91,6 +93,13 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		reattempted in the future.
  * @probe:	Called when a new device or driver add to this bus, and callback
  *		the specific driver's probe to initial the matched device.
+ * @sync_state:	Called to sync device state to software state after all the
+ *		state tracking consumers linked to this device (present at
+ *		the time of late_initcall) have successfully bound to a
+ *		driver. If the device has no consumers, this function will
+ *		be called at late_initcall_sync level. If the device has
+ *		consumers that are never bound to a driver, this function
+ *		will never get called until they do.
  * @remove:	Called when a device removed from this bus.
  * @shutdown:	Called at shut-down time to quiesce the device.
  *
@@ -135,6 +144,7 @@ struct bus_type {
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	int (*add_links)(struct device *dev);
 	int (*probe)(struct device *dev);
+	void (*sync_state)(struct device *dev);
 	int (*remove)(struct device *dev);
 	void (*shutdown)(struct device *dev);
 
@@ -366,6 +376,13 @@ enum probe_type {
  * @probe:	Called to query the existence of a specific device,
  *		whether this driver can work with it, and bind the driver
  *		to a specific device.
+ * @sync_state:	Called to sync device state to software state after all the
+ *		state tracking consumers linked to this device (present at
+ *		the time of late_initcall) have successfully bound to a
+ *		driver. If the device has no consumers, this function will
+ *		be called at late_initcall_sync level. If the device has
+ *		consumers that are never bound to a driver, this function
+ *		will never get called until they do.
  * @remove:	Called when the device is removed from the system to
  *		unbind a device from this driver.
  * @shutdown:	Called at shut-down time to quiesce the device.
@@ -404,6 +421,7 @@ struct device_driver {
 
 	int (*edit_links)(struct device *dev);
 	int (*probe) (struct device *dev);
+	void (*sync_state)(struct device *dev);
 	int (*remove) (struct device *dev);
 	void (*shutdown) (struct device *dev);
 	int (*suspend) (struct device *dev, pm_message_t state);
@@ -1156,12 +1174,14 @@ enum dl_dev_state {
  * @suppliers: List of links to supplier devices.
  * @consumers: List of links to consumer devices.
  * @needs_suppliers: Hook to global list of devices waiting for suppliers.
+ * @defer_sync: Hook to global list of devices that have deferred sync_state.
  * @status: Driver status information.
  */
 struct dev_links_info {
 	struct list_head suppliers;
 	struct list_head consumers;
 	struct list_head needs_suppliers;
+	struct list_head defer_sync;
 	enum dl_dev_state status;
 };
 
@@ -1237,6 +1257,9 @@ struct dev_links_info {
  *              device.
  * @has_edit_links: This device has a driver than is capable of
  *		    editing the device links created by driver core.
+ * @state_synced: The hardware state of this device has been synced to match
+ *		  the software state of this device by calling the driver/bus
+ *		  sync_state() callback.
  * @dma_coherent: this particular device is dma coherent, even if the
  *		architecture supports non-coherent devices.
  *
@@ -1331,6 +1354,7 @@ struct device {
 	bool			offline:1;
 	bool			of_node_reused:1;
 	bool			has_edit_links:1;
+	bool			state_synced:1;
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
@@ -1674,6 +1698,8 @@ struct device_link *device_link_add(struct device *consumer,
 void device_link_del(struct device_link *link);
 void device_link_remove(void *consumer, struct device *supplier);
 void device_link_remove_from_wfs(struct device *consumer);
+void device_links_supplier_sync_state_pause(void);
+void device_links_supplier_sync_state_resume(void);
 
 #ifndef dev_fmt
 #define dev_fmt(fmt) fmt
-- 
cgit v1.2.3


From dbc1625fc9deefb352f6ff26a575ae4b3ddef23a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 26 Jul 2019 20:30:50 +0200
Subject: hrtimer: Consolidate hrtimer_init() + hrtimer_init_sleeper() calls

hrtimer_init_sleeper() calls require prior initialisation of the hrtimer
object which is embedded into the hrtimer_sleeper.

Combine the initialization and spare a function call. Fixup all call sites.

This is also a preparatory change for PREEMPT_RT to do hrtimer sleeper
specific initializations of the embedded hrtimer without modifying any of
the call sites.

No functional change.

[ anna-maria: Minor cleanups ]
[ tglx: Adopted to the removal of the task argument of
  	hrtimer_init_sleeper() and trivial polishing.
	Folded a fix from Stephen Rothwell for the vsoc code ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.887468908@linutronix.de
---
 block/blk-mq.c                 |  3 +--
 drivers/staging/android/vsoc.c |  6 ++----
 include/linux/hrtimer.h        | 17 ++++++++++++++---
 include/linux/wait.h           |  4 ++--
 kernel/futex.c                 |  8 +++-----
 kernel/time/hrtimer.c          | 43 +++++++++++++++++++++++++++++++-----------
 net/core/pktgen.c              |  4 +---
 7 files changed, 55 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5f647cb8c695..df3fafbfe9a9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3415,10 +3415,9 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
 	kt = nsecs;
 
 	mode = HRTIMER_MODE_REL;
-	hrtimer_init_on_stack(&hs.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_sleeper_on_stack(&hs, CLOCK_MONOTONIC, mode);
 	hrtimer_set_expires(&hs.timer, kt);
 
-	hrtimer_init_sleeper(&hs);
 	do {
 		if (blk_mq_rq_state(rq) == MQ_RQ_COMPLETE)
 			break;
diff --git a/drivers/staging/android/vsoc.c b/drivers/staging/android/vsoc.c
index ce480bcf20d2..2d6b3981afb8 100644
--- a/drivers/staging/android/vsoc.c
+++ b/drivers/staging/android/vsoc.c
@@ -437,12 +437,10 @@ static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 			return -EINVAL;
 		wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
 
-		hrtimer_init_on_stack(&to->timer, CLOCK_MONOTONIC,
-				      HRTIMER_MODE_ABS);
+		hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC,
+					      HRTIMER_MODE_ABS);
 		hrtimer_set_expires_range_ns(&to->timer, wake_time,
 					     current->timer_slack_ns);
-
-		hrtimer_init_sleeper(to);
 	}
 
 	while (1) {
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3c74f89367c4..0df373bed3d7 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -347,10 +347,15 @@ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 /* Initialize timers: */
 extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
 			 enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+				 enum hrtimer_mode mode);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
 				  enum hrtimer_mode mode);
+extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+					  clockid_t clock_id,
+					  enum hrtimer_mode mode);
 
 extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
 #else
@@ -360,6 +365,14 @@ static inline void hrtimer_init_on_stack(struct hrtimer *timer,
 {
 	hrtimer_init(timer, which_clock, mode);
 }
+
+static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+						 clockid_t clock_id,
+						 enum hrtimer_mode mode)
+{
+	hrtimer_init_sleeper(sl, clock_id, mode);
+}
+
 static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
 #endif
 
@@ -463,10 +476,8 @@ extern long hrtimer_nanosleep(const struct timespec64 *rqtp,
 			      const enum hrtimer_mode mode,
 			      const clockid_t clockid);
 
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl);
-
 extern int schedule_hrtimeout_range(ktime_t *expires, u64 delta,
-						const enum hrtimer_mode mode);
+				    const enum hrtimer_mode mode);
 extern int schedule_hrtimeout_range_clock(ktime_t *expires,
 					  u64 delta,
 					  const enum hrtimer_mode mode,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index d57832774ca6..4707543ef575 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -488,8 +488,8 @@ do {										\
 	int __ret = 0;								\
 	struct hrtimer_sleeper __t;						\
 										\
-	hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	\
-	hrtimer_init_sleeper(&__t);						\
+	hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC,			\
+				      HRTIMER_MODE_REL);			\
 	if ((timeout) != KTIME_MAX)						\
 		hrtimer_start_range_ns(&__t.timer, timeout,			\
 				       current->timer_slack_ns,			\
diff --git a/kernel/futex.c b/kernel/futex.c
index 5e9842ea4012..c8561aa5338e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -487,11 +487,9 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	if (!time)
 		return NULL;
 
-	hrtimer_init_on_stack(&timeout->timer, (flags & FLAGS_CLOCKRT) ?
-			      CLOCK_REALTIME : CLOCK_MONOTONIC,
-			      HRTIMER_MODE_ABS);
-	hrtimer_init_sleeper(timeout);
-
+	hrtimer_init_sleeper_on_stack(timeout, (flags & FLAGS_CLOCKRT) ?
+				      CLOCK_REALTIME : CLOCK_MONOTONIC,
+				      HRTIMER_MODE_ABS);
 	/*
 	 * If range_ns is 0, calling hrtimer_set_expires_range_ns() is
 	 * effectively the same as calling hrtimer_set_expires().
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index de895d86800c..bb55d62f631e 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -427,6 +427,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
 }
 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
 
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode);
+
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
+{
+	debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+
 void destroy_hrtimer_on_stack(struct hrtimer *timer)
 {
 	debug_object_free(timer, &hrtimer_debug_descr);
@@ -1639,11 +1650,27 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl)
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+				   clockid_t clock_id, enum hrtimer_mode mode)
 {
+	__hrtimer_init(&sl->timer, clock_id, mode);
 	sl->timer.function = hrtimer_wakeup;
 	sl->task = current;
 }
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl:		sleeper to be initialized
+ * @clock_id:	the clock to be used
+ * @mode:	timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+			  enum hrtimer_mode mode)
+{
+	debug_init(&sl->timer, clock_id, mode);
+	__hrtimer_init_sleeper(sl, clock_id, mode);
+
+}
 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
@@ -1669,8 +1696,6 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 {
 	struct restart_block *restart;
 
-	hrtimer_init_sleeper(t);
-
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
 		hrtimer_start_expires(&t->timer, mode);
@@ -1707,10 +1732,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
 	struct hrtimer_sleeper t;
 	int ret;
 
-	hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
-				HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+				      HRTIMER_MODE_ABS);
 	hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
 	ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
 	destroy_hrtimer_on_stack(&t.timer);
 	return ret;
@@ -1728,7 +1752,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
 	if (dl_task(current) || rt_task(current))
 		slack = 0;
 
-	hrtimer_init_on_stack(&t.timer, clockid, mode);
+	hrtimer_init_sleeper_on_stack(&t, clockid, mode);
 	hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
 	ret = do_nanosleep(&t, mode);
 	if (ret != -ERESTART_RESTARTBLOCK)
@@ -1927,11 +1951,8 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, clock_id, mode);
+	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
-	hrtimer_init_sleeper(&t);
-
 	hrtimer_start_expires(&t.timer, mode);
 
 	if (likely(t.task))
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 7f3cf2381f27..a5905975bc12 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2156,7 +2156,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 	s64 remaining;
 	struct hrtimer_sleeper t;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init_sleeper_on_stack(&t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	hrtimer_set_expires(&t.timer, spin_until);
 
 	remaining = ktime_to_ns(hrtimer_expires_remaining(&t.timer));
@@ -2170,8 +2170,6 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
 			end_time = ktime_get();
 		} while (ktime_compare(end_time, spin_until) < 0);
 	} else {
-		/* see do_nanosleep */
-		hrtimer_init_sleeper(&t);
 		do {
 			set_current_state(TASK_INTERRUPTIBLE);
 			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-- 
cgit v1.2.3


From 01656464fce946f70b02a84ab218e562ceb1662e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 Jul 2019 21:03:53 +0200
Subject: hrtimer: Provide hrtimer_sleeper_start_expires()

hrtimer_sleepers will gain a scheduling class dependent treatment on
PREEMPT_RT. Create a wrapper around hrtimer_start_expires() to make that
possible.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h |  3 +++
 kernel/time/hrtimer.c   | 19 +++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 0df373bed3d7..24072a0942c0 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -408,6 +408,9 @@ static inline void hrtimer_start_expires(struct hrtimer *timer,
 	hrtimer_start_range_ns(timer, soft, delta, mode);
 }
 
+void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
+				   enum hrtimer_mode mode);
+
 static inline void hrtimer_restart(struct hrtimer *timer)
 {
 	hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index bb55d62f631e..dab1ea1a99d0 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1650,6 +1650,21 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+/**
+ * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
+ * @sl:		sleeper to be started
+ * @mode:	timer mode abs/rel
+ *
+ * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
+ * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
+ */
+void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
+				   enum hrtimer_mode mode)
+{
+	hrtimer_start_expires(&sl->timer, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
+
 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 				   clockid_t clock_id, enum hrtimer_mode mode)
 {
@@ -1698,7 +1713,7 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
 
 	do {
 		set_current_state(TASK_INTERRUPTIBLE);
-		hrtimer_start_expires(&t->timer, mode);
+		hrtimer_sleeper_start_expires(t, mode);
 
 		if (likely(t->task))
 			freezable_schedule();
@@ -1953,7 +1968,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
 
 	hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-	hrtimer_start_expires(&t.timer, mode);
+	hrtimer_sleeper_start_expires(&t, mode);
 
 	if (likely(t.task))
 		schedule();
-- 
cgit v1.2.3


From ae6683d815895c2be1e60e1942630fa99488055b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 26 Jul 2019 20:30:51 +0200
Subject: hrtimer: Introduce HARD expiry mode

On PREEMPT_RT not all hrtimers can be expired in hard interrupt context
even if that is perfectly fine on a PREEMPT_RT=n kernel, e.g. because they
take regular spinlocks. Also for latency reasons PREEMPT_RT tries to defer
most hrtimers' expiry into soft interrupt context.

But there are hrtimers which must be expired in hard interrupt context even
when PREEMPT_RT is enabled:

  - hrtimers which must expiry in hard interrupt context, e.g. scheduler,
    perf, watchdog related hrtimers

  - latency critical hrtimers, e.g. nanosleep, ..., kvm lapic timer

Add a new mode flag HRTIMER_MODE_HARD which allows to mark these timers so
PREEMPT_RT will not move them into softirq expiry mode.

[ tglx: Split out of a larger combo patch. Added changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185752.981398465@linutronix.de
---
 include/linux/hrtimer.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 24072a0942c0..15c2ba6b6316 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -38,6 +38,7 @@ enum hrtimer_mode {
 	HRTIMER_MODE_REL	= 0x01,
 	HRTIMER_MODE_PINNED	= 0x02,
 	HRTIMER_MODE_SOFT	= 0x04,
+	HRTIMER_MODE_HARD	= 0x08,
 
 	HRTIMER_MODE_ABS_PINNED = HRTIMER_MODE_ABS | HRTIMER_MODE_PINNED,
 	HRTIMER_MODE_REL_PINNED = HRTIMER_MODE_REL | HRTIMER_MODE_PINNED,
@@ -48,6 +49,11 @@ enum hrtimer_mode {
 	HRTIMER_MODE_ABS_PINNED_SOFT = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_SOFT,
 	HRTIMER_MODE_REL_PINNED_SOFT = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_SOFT,
 
+	HRTIMER_MODE_ABS_HARD	= HRTIMER_MODE_ABS | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_HARD	= HRTIMER_MODE_REL | HRTIMER_MODE_HARD,
+
+	HRTIMER_MODE_ABS_PINNED_HARD = HRTIMER_MODE_ABS_PINNED | HRTIMER_MODE_HARD,
+	HRTIMER_MODE_REL_PINNED_HARD = HRTIMER_MODE_REL_PINNED | HRTIMER_MODE_HARD,
 };
 
 /*
-- 
cgit v1.2.3


From 621e55ff5b8e0ab5d1063f0eae0ef3960bef8f6e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 31 Jul 2019 11:18:40 +0300
Subject: RDMA/devices: Do not deadlock during client removal

lockdep reports:

   WARNING: possible circular locking dependency detected

   modprobe/302 is trying to acquire lock:
   0000000007c8919c ((wq_completion)ib_cm){+.+.}, at: flush_workqueue+0xdf/0x990

   but task is already holding lock:
   000000002d3d2ca9 (&device->client_data_rwsem){++++}, at: remove_client_context+0x79/0xd0 [ib_core]

   which lock already depends on the new lock.

   the existing dependency chain (in reverse order) is:

   -> #2 (&device->client_data_rwsem){++++}:
          down_read+0x3f/0x160
          ib_get_net_dev_by_params+0xd5/0x200 [ib_core]
          cma_ib_req_handler+0x5f6/0x2090 [rdma_cm]
          cm_process_work+0x29/0x110 [ib_cm]
          cm_req_handler+0x10f5/0x1c00 [ib_cm]
          cm_work_handler+0x54c/0x311d [ib_cm]
          process_one_work+0x4aa/0xa30
          worker_thread+0x62/0x5b0
          kthread+0x1ca/0x1f0
          ret_from_fork+0x24/0x30

   -> #1 ((work_completion)(&(&work->work)->work)){+.+.}:
          process_one_work+0x45f/0xa30
          worker_thread+0x62/0x5b0
          kthread+0x1ca/0x1f0
          ret_from_fork+0x24/0x30

   -> #0 ((wq_completion)ib_cm){+.+.}:
          lock_acquire+0xc8/0x1d0
          flush_workqueue+0x102/0x990
          cm_remove_one+0x30e/0x3c0 [ib_cm]
          remove_client_context+0x94/0xd0 [ib_core]
          disable_device+0x10a/0x1f0 [ib_core]
          __ib_unregister_device+0x5a/0xe0 [ib_core]
          ib_unregister_device+0x21/0x30 [ib_core]
          mlx5_ib_stage_ib_reg_cleanup+0x9/0x10 [mlx5_ib]
          __mlx5_ib_remove+0x3d/0x70 [mlx5_ib]
          mlx5_ib_remove+0x12e/0x140 [mlx5_ib]
          mlx5_remove_device+0x144/0x150 [mlx5_core]
          mlx5_unregister_interface+0x3f/0xf0 [mlx5_core]
          mlx5_ib_cleanup+0x10/0x3a [mlx5_ib]
          __x64_sys_delete_module+0x227/0x350
          do_syscall_64+0xc3/0x6a4
          entry_SYSCALL_64_after_hwframe+0x49/0xbe

Which is due to the read side of the client_data_rwsem being obtained
recursively through a work queue flush during cm client removal.

The lock is being held across the remove in remove_client_context() so
that the function is a fence, once it returns the client is removed. This
is required so that the two callers do not proceed with destruction until
the client completes removal.

Instead of using client_data_rwsem use the existing device unregistration
refcount and add a similar client unregistration (client->uses) refcount.

This will fence the two unregistration paths without holding any locks.

Cc: <stable@vger.kernel.org>
Fixes: 921eab1143aa ("RDMA/devices: Re-organize device.c locking")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190731081841.32345-2-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 54 ++++++++++++++++++++++++++++++----------
 include/rdma/ib_verbs.h          |  3 +++
 2 files changed, 44 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 9773145dee09..d86fbabe48d6 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -99,6 +99,12 @@ static LIST_HEAD(client_list);
 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
 static DECLARE_RWSEM(clients_rwsem);
 
+static void ib_client_put(struct ib_client *client)
+{
+	if (refcount_dec_and_test(&client->uses))
+		complete(&client->uses_zero);
+}
+
 /*
  * If client_data is registered then the corresponding client must also still
  * be registered.
@@ -660,6 +666,14 @@ static int add_client_context(struct ib_device *device,
 		return 0;
 
 	down_write(&device->client_data_rwsem);
+	/*
+	 * So long as the client is registered hold both the client and device
+	 * unregistration locks.
+	 */
+	if (!refcount_inc_not_zero(&client->uses))
+		goto out_unlock;
+	refcount_inc(&device->refcount);
+
 	/*
 	 * Another caller to add_client_context got here first and has already
 	 * completely initialized context.
@@ -683,6 +697,9 @@ static int add_client_context(struct ib_device *device,
 	return 0;
 
 out:
+	ib_device_put(device);
+	ib_client_put(client);
+out_unlock:
 	up_write(&device->client_data_rwsem);
 	return ret;
 }
@@ -702,7 +719,7 @@ static void remove_client_context(struct ib_device *device,
 	client_data = xa_load(&device->client_data, client_id);
 	xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
 	client = xa_load(&clients, client_id);
-	downgrade_write(&device->client_data_rwsem);
+	up_write(&device->client_data_rwsem);
 
 	/*
 	 * Notice we cannot be holding any exclusive locks when calling the
@@ -712,17 +729,13 @@ static void remove_client_context(struct ib_device *device,
 	 *
 	 * For this reason clients and drivers should not call the
 	 * unregistration functions will holdling any locks.
-	 *
-	 * It tempting to drop the client_data_rwsem too, but this is required
-	 * to ensure that unregister_client does not return until all clients
-	 * are completely unregistered, which is required to avoid module
-	 * unloading races.
 	 */
 	if (client->remove)
 		client->remove(device, client_data);
 
 	xa_erase(&device->client_data, client_id);
-	up_read(&device->client_data_rwsem);
+	ib_device_put(device);
+	ib_client_put(client);
 }
 
 static int alloc_port_data(struct ib_device *device)
@@ -1705,6 +1718,8 @@ int ib_register_client(struct ib_client *client)
 	unsigned long index;
 	int ret;
 
+	refcount_set(&client->uses, 1);
+	init_completion(&client->uses_zero);
 	ret = assign_client_id(client);
 	if (ret)
 		return ret;
@@ -1740,16 +1755,29 @@ void ib_unregister_client(struct ib_client *client)
 	unsigned long index;
 
 	down_write(&clients_rwsem);
+	ib_client_put(client);
 	xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
 	up_write(&clients_rwsem);
+
+	/* We do not want to have locks while calling client->remove() */
+	rcu_read_lock();
+	xa_for_each (&devices, index, device) {
+		if (!ib_device_try_get(device))
+			continue;
+		rcu_read_unlock();
+
+		remove_client_context(device, client->client_id);
+
+		ib_device_put(device);
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
 	/*
-	 * Every device still known must be serialized to make sure we are
-	 * done with the client callbacks before we return.
+	 * remove_client_context() is not a fence, it can return even though a
+	 * removal is ongoing. Wait until all removals are completed.
 	 */
-	down_read(&devices_rwsem);
-	xa_for_each (&devices, index, device)
-		remove_client_context(device, client->client_id);
-	up_read(&devices_rwsem);
+	wait_for_completion(&client->uses_zero);
 
 	down_write(&clients_rwsem);
 	list_del(&client->list);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c5f8a9f17063..7b80ec822043 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2647,6 +2647,9 @@ struct ib_client {
 			const union ib_gid *gid,
 			const struct sockaddr *addr,
 			void *client_data);
+
+	refcount_t uses;
+	struct completion uses_zero;
 	struct list_head list;
 	u32 client_id;
 
-- 
cgit v1.2.3


From 9cd5881719e9555cae300ec8b389eda3c8101339 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Wed, 31 Jul 2019 11:18:41 +0300
Subject: RDMA/devices: Remove the lock around remove_client_context

Due to the complexity of client->remove() callbacks it is desirable to not
hold any locks while calling them. Remove the last one by tracking only
the highest client ID and running backwards from there over the xarray.

Since the only purpose of that lock was to protect the linked list, we can
drop the lock.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190731081841.32345-3-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/device.c | 48 ++++++++++++++++++++++------------------
 include/rdma/ib_verbs.h          |  1 -
 2 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d86fbabe48d6..ea8661a00651 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -94,7 +94,7 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
 static DECLARE_RWSEM(devices_rwsem);
 #define DEVICE_REGISTERED XA_MARK_1
 
-static LIST_HEAD(client_list);
+static u32 highest_client_id;
 #define CLIENT_REGISTERED XA_MARK_1
 static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
 static DECLARE_RWSEM(clients_rwsem);
@@ -1237,7 +1237,7 @@ static int setup_device(struct ib_device *device)
 
 static void disable_device(struct ib_device *device)
 {
-	struct ib_client *client;
+	u32 cid;
 
 	WARN_ON(!refcount_read(&device->refcount));
 
@@ -1245,10 +1245,19 @@ static void disable_device(struct ib_device *device)
 	xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
 	up_write(&devices_rwsem);
 
+	/*
+	 * Remove clients in LIFO order, see assign_client_id. This could be
+	 * more efficient if xarray learns to reverse iterate. Since no new
+	 * clients can be added to this ib_device past this point we only need
+	 * the maximum possible client_id value here.
+	 */
 	down_read(&clients_rwsem);
-	list_for_each_entry_reverse(client, &client_list, list)
-		remove_client_context(device, client->client_id);
+	cid = highest_client_id;
 	up_read(&clients_rwsem);
+	while (cid) {
+		cid--;
+		remove_client_context(device, cid);
+	}
 
 	/* Pairs with refcount_set in enable_device */
 	ib_device_put(device);
@@ -1675,30 +1684,31 @@ static int assign_client_id(struct ib_client *client)
 	/*
 	 * The add/remove callbacks must be called in FIFO/LIFO order. To
 	 * achieve this we assign client_ids so they are sorted in
-	 * registration order, and retain a linked list we can reverse iterate
-	 * to get the LIFO order. The extra linked list can go away if xarray
-	 * learns to reverse iterate.
+	 * registration order.
 	 */
-	if (list_empty(&client_list)) {
-		client->client_id = 0;
-	} else {
-		struct ib_client *last;
-
-		last = list_last_entry(&client_list, struct ib_client, list);
-		client->client_id = last->client_id + 1;
-	}
+	client->client_id = highest_client_id;
 	ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
 	if (ret)
 		goto out;
 
+	highest_client_id++;
 	xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
-	list_add_tail(&client->list, &client_list);
 
 out:
 	up_write(&clients_rwsem);
 	return ret;
 }
 
+static void remove_client_id(struct ib_client *client)
+{
+	down_write(&clients_rwsem);
+	xa_erase(&clients, client->client_id);
+	for (; highest_client_id; highest_client_id--)
+		if (xa_load(&clients, highest_client_id - 1))
+			break;
+	up_write(&clients_rwsem);
+}
+
 /**
  * ib_register_client - Register an IB client
  * @client:Client to register
@@ -1778,11 +1788,7 @@ void ib_unregister_client(struct ib_client *client)
 	 * removal is ongoing. Wait until all removals are completed.
 	 */
 	wait_for_completion(&client->uses_zero);
-
-	down_write(&clients_rwsem);
-	list_del(&client->list);
-	xa_erase(&clients, client->client_id);
-	up_write(&clients_rwsem);
+	remove_client_id(client);
 }
 EXPORT_SYMBOL(ib_unregister_client);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7b80ec822043..4f225175cb91 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2650,7 +2650,6 @@ struct ib_client {
 
 	refcount_t uses;
 	struct completion uses_zero;
-	struct list_head list;
 	u32 client_id;
 
 	/* kverbs are not required by the client */
-- 
cgit v1.2.3


From 6f06e04b67baa1c9da61c8b15b1335a1dbb98bcb Mon Sep 17 00:00:00 2001
From: Gavi Teitz <gavi@mellanox.com>
Date: Mon, 29 Jul 2019 21:12:52 +0000
Subject: net/mlx5: Refactor and optimize flow counter bulk query

Towards introducing the ability to allocate bulks of flow counters,
refactor the flow counter bulk query process, removing functions and
structs whose names indicated being used for flow counter bulk
allocation FW commands, despite them actually only being used to
support bulk querying, and migrate their functionality to correctly
named functions in their natural location, fs_counters.c.

Additionally, optimize the bulk query process by:
 * Extracting the memory used for the query to mlx5_fc_stats so
   that it is only allocated once, and not for each bulk query.
 * Querying all the counters in one function call.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  61 ++--------
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |  13 +--
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 125 +++++++++++----------
 include/linux/mlx5/driver.h                        |   1 +
 4 files changed, 81 insertions(+), 119 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 7ac1249eadc3..51f6972f4c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -615,67 +615,24 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 	return 0;
 }
 
-struct mlx5_cmd_fc_bulk {
-	u32 id;
-	int num;
-	int outlen;
-	u32 out[0];
-};
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num)
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len)
 {
-	struct mlx5_cmd_fc_bulk *b;
-	int outlen =
-		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-		MLX5_ST_SZ_BYTES(traffic_counter) * num;
-
-	b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
-	if (!b)
-		return NULL;
-
-	b->id = id;
-	b->num = num;
-	b->outlen = outlen;
-
-	return b;
+	return MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter) * bulk_len;
 }
 
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b)
-{
-	kfree(b);
-}
-
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out)
 {
+	int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
 	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
-	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
-	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-	return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
-}
-
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes)
-{
-	int index = id - b->id;
-	void *stats;
-
-	if (index < 0 || index >= b->num) {
-		mlx5_core_warn(dev, "Flow counter id (0x%x) out of range (0x%x..0x%x). Counter ignored.\n",
-			       id, b->id, b->id + b->num - 1);
-		return;
-	}
-
-	stats = MLX5_ADDR_OF(query_flow_counter_out, b->out,
-			     flow_statistics[index]);
-	*packets = MLX5_GET64(traffic_counter, stats, packets);
-	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
+	MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index e340f9af2f5a..db49eabba98d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -82,16 +82,9 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
 
-struct mlx5_cmd_fc_bulk;
-
-struct mlx5_cmd_fc_bulk *
-mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num);
-void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b);
-int
-mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b);
-void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
-			  struct mlx5_cmd_fc_bulk *b, u32 id,
-			  u64 *packets, u64 *bytes);
+int mlx5_cmd_fc_get_bulk_query_out_len(int bulk_len);
+int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
+			   u32 *out);
 
 const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type type);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index b3762123a69c..067a4b56498b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -75,7 +75,7 @@ struct mlx5_fc {
  * access to counter list:
  * - create (user context)
  *   - mlx5_fc_create() only adds to an addlist to be used by
- *     mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ *     mlx5_fc_stats_work(). addlist is a lockless single linked list
  *     that doesn't require any additional synchronization when adding single
  *     node.
  *   - spawn thread to do the actual destroy
@@ -136,72 +136,69 @@ static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
 	spin_unlock(&fc_stats->counters_idr_lock);
 }
 
-/* The function returns the last counter that was queried so the caller
- * function can continue calling it till all counters are queried.
- */
-static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
-					   struct mlx5_fc *first,
-					   u32 last_id)
+static int get_max_bulk_query_len(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
-	struct mlx5_fc *counter = NULL;
-	struct mlx5_cmd_fc_bulk *b;
-	bool more = false;
-	u32 afirst_id;
-	int num;
-	int err;
+	return min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
+			  (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+}
 
-	int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK,
-			     (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk)));
+static void update_counter_cache(int index, u32 *bulk_raw_data,
+				 struct mlx5_fc_cache *cache)
+{
+	void *stats = MLX5_ADDR_OF(query_flow_counter_out, bulk_raw_data,
+			     flow_statistics[index]);
+	u64 packets = MLX5_GET64(traffic_counter, stats, packets);
+	u64 bytes = MLX5_GET64(traffic_counter, stats, octets);
 
-	/* first id must be aligned to 4 when using bulk query */
-	afirst_id = first->id & ~0x3;
+	if (cache->packets == packets)
+		return;
 
-	/* number of counters to query inc. the last counter */
-	num = ALIGN(last_id - afirst_id + 1, 4);
-	if (num > max_bulk) {
-		num = max_bulk;
-		last_id = afirst_id + num - 1;
-	}
+	cache->packets = packets;
+	cache->bytes = bytes;
+	cache->lastuse = jiffies;
+}
 
-	b = mlx5_cmd_fc_bulk_alloc(dev, afirst_id, num);
-	if (!b) {
-		mlx5_core_err(dev, "Error allocating resources for bulk query\n");
-		return NULL;
-	}
+static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
+					      struct mlx5_fc *first,
+					      u32 last_id)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	bool query_more_counters = (first->id <= last_id);
+	int max_bulk_len = get_max_bulk_query_len(dev);
+	u32 *data = fc_stats->bulk_query_out;
+	struct mlx5_fc *counter = first;
+	u32 bulk_base_id;
+	int bulk_len;
+	int err;
 
-	err = mlx5_cmd_fc_bulk_query(dev, b);
-	if (err) {
-		mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
-		goto out;
-	}
+	while (query_more_counters) {
+		/* first id must be aligned to 4 when using bulk query */
+		bulk_base_id = counter->id & ~0x3;
 
-	counter = first;
-	list_for_each_entry_from(counter, &fc_stats->counters, list) {
-		struct mlx5_fc_cache *c = &counter->cache;
-		u64 packets;
-		u64 bytes;
+		/* number of counters to query inc. the last counter */
+		bulk_len = min_t(int, max_bulk_len,
+				 ALIGN(last_id - bulk_base_id + 1, 4));
 
-		if (counter->id > last_id) {
-			more = true;
-			break;
+		err = mlx5_cmd_fc_bulk_query(dev, bulk_base_id, bulk_len,
+					     data);
+		if (err) {
+			mlx5_core_err(dev, "Error doing bulk query: %d\n", err);
+			return;
 		}
+		query_more_counters = false;
 
-		mlx5_cmd_fc_bulk_get(dev, b,
-				     counter->id, &packets, &bytes);
+		list_for_each_entry_from(counter, &fc_stats->counters, list) {
+			int counter_index = counter->id - bulk_base_id;
+			struct mlx5_fc_cache *cache = &counter->cache;
 
-		if (c->packets == packets)
-			continue;
+			if (counter->id >= bulk_base_id + bulk_len) {
+				query_more_counters = true;
+				break;
+			}
 
-		c->packets = packets;
-		c->bytes = bytes;
-		c->lastuse = jiffies;
+			update_counter_cache(counter_index, data, cache);
+		}
 	}
-
-out:
-	mlx5_cmd_fc_bulk_free(b);
-
-	return more ? counter : NULL;
 }
 
 static void mlx5_free_fc(struct mlx5_core_dev *dev,
@@ -244,8 +241,8 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 
 	counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
 				   list);
-	while (counter)
-		counter = mlx5_fc_stats_query(dev, counter, last->id);
+	if (counter)
+		mlx5_fc_stats_query_counter_range(dev, counter, last->id);
 
 	fc_stats->next_query = now + fc_stats->sampling_interval;
 }
@@ -324,6 +321,8 @@ EXPORT_SYMBOL(mlx5_fc_destroy);
 int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int max_bulk_len;
+	int max_out_len;
 
 	spin_lock_init(&fc_stats->counters_idr_lock);
 	idr_init(&fc_stats->counters_idr);
@@ -331,14 +330,24 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 	init_llist_head(&fc_stats->addlist);
 	init_llist_head(&fc_stats->dellist);
 
+	max_bulk_len = get_max_bulk_query_len(dev);
+	max_out_len = mlx5_cmd_fc_get_bulk_query_out_len(max_bulk_len);
+	fc_stats->bulk_query_out = kzalloc(max_out_len, GFP_KERNEL);
+	if (!fc_stats->bulk_query_out)
+		return -ENOMEM;
+
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
-		return -ENOMEM;
+		goto err_wq_create;
 
 	fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
 	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
 
 	return 0;
+
+err_wq_create:
+	kfree(fc_stats->bulk_query_out);
+	return -ENOMEM;
 }
 
 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
@@ -352,6 +361,8 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
+	kfree(fc_stats->bulk_query_out);
+
 	idr_destroy(&fc_stats->counters_idr);
 
 	tmplist = llist_del_all(&fc_stats->addlist);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0e6da1840c7d..267b2bc0ca4a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -488,6 +488,7 @@ struct mlx5_fc_stats {
 	struct delayed_work work;
 	unsigned long next_query;
 	unsigned long sampling_interval; /* jiffies */
+	u32 *bulk_query_out;
 };
 
 struct mlx5_events;
-- 
cgit v1.2.3


From 8536a6bf2ea1f3bf4e3159b590fbecce4d854796 Mon Sep 17 00:00:00 2001
From: Gavi Teitz <gavi@mellanox.com>
Date: Mon, 29 Jul 2019 21:12:54 +0000
Subject: net/mlx5: Add flow counter bulk allocation hardware bits and command

Add a handle to invoke the new FW capability of allocating a bulk of
flow counters.

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 10 +++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h |  3 +++
 include/linux/mlx5/mlx5_ifc.h                    | 21 +++++++++++++++++++--
 3 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 51f6972f4c70..b84a225bbe86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -566,7 +566,9 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id)
 {
 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
@@ -574,6 +576,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 
 	MLX5_SET(alloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+	MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
 
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
@@ -581,6 +584,11 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 	return err;
 }
 
+int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
+{
+	return mlx5_cmd_fc_bulk_alloc(dev, 0, id);
+}
+
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index db49eabba98d..bc4606306009 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -78,6 +78,9 @@ struct mlx5_flow_cmds {
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
+int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
+			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
+			   u32 *id);
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b3d5752657d9..196987f14a3f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1040,6 +1040,21 @@ enum {
 	MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
 };
 
+#define MLX5_FC_BULK_SIZE_FACTOR 128
+
+enum mlx5_fc_bulk_alloc_bitmask {
+	MLX5_FC_BULK_128   = (1 << 0),
+	MLX5_FC_BULK_256   = (1 << 1),
+	MLX5_FC_BULK_512   = (1 << 2),
+	MLX5_FC_BULK_1024  = (1 << 3),
+	MLX5_FC_BULK_2048  = (1 << 4),
+	MLX5_FC_BULK_4096  = (1 << 5),
+	MLX5_FC_BULK_8192  = (1 << 6),
+	MLX5_FC_BULK_16384 = (1 << 7),
+};
+
+#define MLX5_FC_BULK_NUM_FCS(fc_enum) (MLX5_FC_BULK_SIZE_FACTOR * (fc_enum))
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_0[0x30];
 	u8         vhca_id[0x10];
@@ -1244,7 +1259,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_2e0[0x7];
 	u8         max_qp_mcg[0x19];
 
-	u8         reserved_at_300[0x18];
+	u8         reserved_at_300[0x10];
+	u8         flow_counter_bulk_alloc[0x8];
 	u8         log_max_mcg[0x8];
 
 	u8         reserved_at_320[0x3];
@@ -7815,7 +7831,8 @@ struct mlx5_ifc_alloc_flow_counter_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x38];
+	u8         flow_counter_bulk[0x8];
 };
 
 struct mlx5_ifc_add_vxlan_udp_dport_out_bits {
-- 
cgit v1.2.3


From 7761f9eef3f09f2f4c579313e0c536770b5ecff4 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 29 Jul 2019 21:12:56 +0000
Subject: net/mlx5: Fix offset of tisc bits reserved field

First reserved field is off by one instead of reserved_at_1 it should be
reserved_at_2, fix that.

Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 196987f14a3f..9265c84ad353 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2782,7 +2782,7 @@ struct mlx5_ifc_traffic_counter_bits {
 struct mlx5_ifc_tisc_bits {
 	u8         strict_lag_tx_port_affinity[0x1];
 	u8         tls_en[0x1];
-	u8         reserved_at_1[0x2];
+	u8         reserved_at_2[0x2];
 	u8         lag_tx_port_affinity[0x04];
 
 	u8         reserved_at_8[0x4];
-- 
cgit v1.2.3


From 6cedde4513990af7191afa43528533f80e92c989 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Mon, 29 Jul 2019 21:13:00 +0000
Subject: net/mlx5: E-Switch, Verify support QoS element type

Check if firmware supports the requested element type before
attempting to create the element type.
In addition, explicitly specify the request element type and tsar type.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 31 +++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                     |  7 +++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 1f3891fde2eb..2927fa1da92f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1393,19 +1393,50 @@ out:
 	return err;
 }
 
+static bool element_type_supported(struct mlx5_eswitch *esw, int type)
+{
+	struct mlx5_core_dev *dev = esw->dev = esw->dev;
+
+	switch (type) {
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_TASR;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+		return MLX5_CAP_QOS(dev, esw_element_type) &
+		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+	}
+	return false;
+}
+
 /* Vport QoS management */
 static int esw_create_tsar(struct mlx5_eswitch *esw)
 {
 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
 	struct mlx5_core_dev *dev = esw->dev;
+	__be32 *attr;
 	int err;
 
 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
 		return 0;
 
+	if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+		return 0;
+
 	if (esw->qos.enabled)
 		return -EEXIST;
 
+	MLX5_SET(scheduling_context, tsar_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
 	err = mlx5_create_scheduling_element_cmd(dev,
 						 SCHEDULING_HIERARCHY_E_SWITCH,
 						 tsar_ctx,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9265c84ad353..30d15e80bcc7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2957,6 +2957,13 @@ enum {
 	SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3,
 };
 
+enum {
+	ELEMENT_TYPE_CAP_MASK_TASR		= 1 << 0,
+	ELEMENT_TYPE_CAP_MASK_VPORT		= 1 << 1,
+	ELEMENT_TYPE_CAP_MASK_VPORT_TC		= 1 << 2,
+	ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC	= 1 << 3,
+};
+
 struct mlx5_ifc_scheduling_context_bits {
 	u8         element_type[0x8];
 	u8         reserved_at_8[0x18];
-- 
cgit v1.2.3


From 0ab6a3ddbad40ef5b6b8c2353fd53fa4ecf9c479 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 30 Jul 2019 20:15:25 +0200
Subject: hrtimer: Make enqueue mode check work on RT

hrtimer_start_range_ns() has a WARN_ONCE() which verifies that a timer
which is marker for softirq expiry is not queued in the hard interrupt base
and vice versa.

When PREEMPT_RT is enabled, timers which are not explicitely marked to
expire in hard interrupt context are deferrred to the soft interrupt. So
the regular check would trigger.

Change the check, so when PREEMPT_RT is enabled, it is verified that the
timers marked for hard interrupt expiry are not tried to be queued for soft
interrupt expiry or any of the unmarked and softirq marked is tried to be
expired in hard interrupt context.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 3 +++
 kernel/time/hrtimer.c   | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 15c2ba6b6316..7d0d0a36a8f4 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -107,6 +107,8 @@ enum hrtimer_restart {
  * @state:	state information (See bit values above)
  * @is_rel:	Set if the timer was armed relative
  * @is_soft:	Set if hrtimer will be expired in soft interrupt context.
+ * @is_hard:	Set if hrtimer will be expired in hard interrupt context
+ *		even on RT.
  *
  * The hrtimer structure must be initialized by hrtimer_init()
  */
@@ -118,6 +120,7 @@ struct hrtimer {
 	u8				state;
 	u8				is_rel;
 	u8				is_soft;
+	u8				is_hard;
 };
 
 /**
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index dab1ea1a99d0..0ace301a56f4 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1107,9 +1107,13 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
 
 	/*
 	 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
-	 * match.
+	 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
+	 * expiry mode because unmarked timers are moved to softirq expiry.
 	 */
-	WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+	if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+		WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+	else
+		WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
 
 	base = lock_hrtimer_base(timer, &flags);
 
@@ -1288,6 +1292,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 
 	base += hrtimer_clockid_to_base(clock_id);
 	timer->is_soft = softtimer;
+	timer->is_hard = !softtimer;
 	timer->base = &cpu_base->clock_base[base];
 	timerqueue_init(&timer->node);
 }
-- 
cgit v1.2.3


From f61eff83cec9cfab31fd30a2ca8856be379cdcd5 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Fri, 26 Jul 2019 20:30:59 +0200
Subject: hrtimer: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling hrtimer_cancel() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If hrtimer_cancel() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
hrtimer_cancel() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.737767218@linutronix.de
---
 include/linux/hrtimer.h | 16 +++++++++
 kernel/time/hrtimer.c   | 95 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 105 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7d0d0a36a8f4..5df4bcff96d5 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -192,6 +192,10 @@ enum  hrtimer_base_type {
  * @nr_retries:		Total number of hrtimer interrupt retries
  * @nr_hangs:		Total number of hrtimer interrupt hangs
  * @max_hang_time:	Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *			 expired
+ * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
+ *			callback to finish.
  * @expires_next:	absolute time of the next event, is required for remote
  *			hrtimer enqueue; it is the total first expiry time (hard
  *			and soft hrtimer are taken into account)
@@ -218,6 +222,10 @@ struct hrtimer_cpu_base {
 	unsigned short			nr_retries;
 	unsigned short			nr_hangs;
 	unsigned int			max_hang_time;
+#endif
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t			softirq_expiry_lock;
+	atomic_t			timer_waiters;
 #endif
 	ktime_t				expires_next;
 	struct hrtimer			*next_timer;
@@ -350,6 +358,14 @@ extern void hrtimers_resume(void);
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 
+#ifdef CONFIG_PREEMPT_RT
+void hrtimer_cancel_wait_running(const struct hrtimer *timer);
+#else
+static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
+{
+	cpu_relax();
+}
+#endif
 
 /* Exported timer functions: */
 
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index c101f88ae8aa..499122752649 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1162,6 +1162,82 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
 }
 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
 
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+	spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+	spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+				      unsigned long flags)
+{
+	if (atomic_read(&cpu_base->timer_waiters)) {
+		raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+		spin_unlock(&cpu_base->softirq_expiry_lock);
+		spin_lock(&cpu_base->softirq_expiry_lock);
+		raw_spin_lock_irq(&cpu_base->lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+	struct hrtimer_clock_base *base = timer->base;
+
+	if (!timer->is_soft || !base || !base->cpu_base) {
+		cpu_relax();
+		return;
+	}
+
+	/*
+	 * Mark the base as contended and grab the expiry lock, which is
+	 * held by the softirq across the timer callback. Drop the lock
+	 * immediately so the softirq can expire the next timer. In theory
+	 * the timer could already be running again, but that's more than
+	 * unlikely and just causes another wait loop.
+	 */
+	atomic_inc(&base->cpu_base->timer_waiters);
+	spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+	atomic_dec(&base->cpu_base->timer_waiters);
+	spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+					     unsigned long flags) { }
+#endif
+
 /**
  * hrtimer_cancel - cancel a timer and wait for the handler to finish.
  * @timer:	the timer to be cancelled
@@ -1172,13 +1248,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
  */
 int hrtimer_cancel(struct hrtimer *timer)
 {
-	for (;;) {
-		int ret = hrtimer_try_to_cancel(timer);
+	int ret;
 
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+	do {
+		ret = hrtimer_try_to_cancel(timer);
+
+		if (ret < 0)
+			hrtimer_cancel_wait_running(timer);
+	} while (ret < 0);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
 
@@ -1475,6 +1553,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 				break;
 
 			__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+			if (active_mask == HRTIMER_ACTIVE_SOFT)
+				hrtimer_sync_wait_running(cpu_base, flags);
 		}
 	}
 }
@@ -1485,6 +1565,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	unsigned long flags;
 	ktime_t now;
 
+	hrtimer_cpu_base_lock_expiry(cpu_base);
 	raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
 	now = hrtimer_update_base(cpu_base);
@@ -1494,6 +1575,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
 	hrtimer_update_softirq_timer(cpu_base, true);
 
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+	hrtimer_cpu_base_unlock_expiry(cpu_base);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1897,6 +1979,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
 	cpu_base->softirq_next_timer = NULL;
 	cpu_base->expires_next = KTIME_MAX;
 	cpu_base->softirq_expires_next = KTIME_MAX;
+	hrtimer_cpu_base_init_expiry_lock(cpu_base);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 030dcdd197d77374879bb5603d091eee7d8aba80 Mon Sep 17 00:00:00 2001
From: Anna-Maria Gleixner <anna-maria@linutronix.de>
Date: Fri, 26 Jul 2019 20:31:00 +0200
Subject: timers: Prepare support for PREEMPT_RT

When PREEMPT_RT is enabled, the soft interrupt thread can be preempted.  If
the soft interrupt thread is preempted in the middle of a timer callback,
then calling del_timer_sync() can lead to two issues:

  - If the caller is on a remote CPU then it has to spin wait for the timer
    handler to complete. This can result in unbound priority inversion.

  - If the caller originates from the task which preempted the timer
    handler on the same CPU, then spin waiting for the timer handler to
    complete is never going to end.

To avoid these issues, add a new lock to the timer base which is held
around the execution of the timer callbacks. If del_timer_sync() detects
that the timer callback is currently running, it blocks on the expiry
lock. When the callback is finished, the expiry lock is dropped by the
softirq thread which wakes up the waiter and the system makes progress.

This addresses both the priority inversion and the life lock issues.

This mechanism is not used for timers which are marked IRQSAFE as for those
preemption is disabled accross the callback and therefore this situation
cannot happen. The callbacks for such timers need to be individually
audited for RT compliance.

The same issue can happen in virtual machines when the vCPU which runs a
timer callback is scheduled out. If a second vCPU of the same guest calls
del_timer_sync() it will spin wait for the other vCPU to be scheduled back
in. The expiry lock mechanism would avoid that. It'd be trivial to enable
this when paravirt spinlocks are enabled in a guest, but it's not clear
whether this is an actual problem in the wild, so for now it's an RT only
mechanism.

As the softirq thread can be preempted with PREEMPT_RT=y, the SMP variant
of del_timer_sync() needs to be used on UP as well.

[ tglx: Refactored it for mainline ]

Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190726185753.832418500@linutronix.de
---
 include/linux/timer.h |   2 +-
 kernel/time/timer.c   | 103 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 96 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 282e4f2a532a..1e6650ed066d 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -183,7 +183,7 @@ extern void add_timer(struct timer_list *timer);
 
 extern int try_to_del_timer_sync(struct timer_list *timer);
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
   extern int del_timer_sync(struct timer_list *timer);
 #else
 # define del_timer_sync(t)		del_timer(t)
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 343c7ba33b1c..673c6a0f0c45 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -196,6 +196,10 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
 	raw_spinlock_t		lock;
 	struct timer_list	*running_timer;
+#ifdef CONFIG_PREEMPT_RT
+	spinlock_t		expiry_lock;
+	atomic_t		timer_waiters;
+#endif
 	unsigned long		clk;
 	unsigned long		next_expiry;
 	unsigned int		cpu;
@@ -1227,7 +1231,78 @@ int try_to_del_timer_sync(struct timer_list *timer)
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+	spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+	spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+	spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+	if (atomic_read(&base->timer_waiters)) {
+		spin_unlock(&base->expiry_lock);
+		spin_lock(&base->expiry_lock);
+	}
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+	u32 tf;
+
+	tf = READ_ONCE(timer->flags);
+	if (!(tf & TIMER_MIGRATING)) {
+		struct timer_base *base = get_timer_base(tf);
+
+		/*
+		 * Mark the base as contended and grab the expiry lock,
+		 * which is held by the softirq across the timer
+		 * callback. Drop the lock immediately so the softirq can
+		 * expire the next timer. In theory the timer could already
+		 * be running again, but that's more than unlikely and just
+		 * causes another wait loop.
+		 */
+		atomic_inc(&base->timer_waiters);
+		spin_lock_bh(&base->expiry_lock);
+		atomic_dec(&base->timer_waiters);
+		spin_unlock_bh(&base->expiry_lock);
+	}
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1266,6 +1341,8 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
  */
 int del_timer_sync(struct timer_list *timer)
 {
+	int ret;
+
 #ifdef CONFIG_LOCKDEP
 	unsigned long flags;
 
@@ -1283,12 +1360,17 @@ int del_timer_sync(struct timer_list *timer)
 	 * could lead to deadlock.
 	 */
 	WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-	for (;;) {
-		int ret = try_to_del_timer_sync(timer);
-		if (ret >= 0)
-			return ret;
-		cpu_relax();
-	}
+
+	do {
+		ret = try_to_del_timer_sync(timer);
+
+		if (unlikely(ret < 0)) {
+			del_timer_wait_running(timer);
+			cpu_relax();
+		}
+	} while (ret < 0);
+
+	return ret;
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1360,10 +1442,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head)
 		if (timer->flags & TIMER_IRQSAFE) {
 			raw_spin_unlock(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
 			raw_spin_lock(&base->lock);
 		} else {
 			raw_spin_unlock_irq(&base->lock);
 			call_timer_fn(timer, fn, baseclk);
+			base->running_timer = NULL;
+			timer_sync_wait_running(base);
 			raw_spin_lock_irq(&base->lock);
 		}
 	}
@@ -1658,6 +1743,7 @@ static inline void __run_timers(struct timer_base *base)
 	if (!time_after_eq(jiffies, base->clk))
 		return;
 
+	timer_base_lock_expiry(base);
 	raw_spin_lock_irq(&base->lock);
 
 	/*
@@ -1684,8 +1770,8 @@ static inline void __run_timers(struct timer_base *base)
 		while (levels--)
 			expire_timers(base, heads + levels);
 	}
-	base->running_timer = NULL;
 	raw_spin_unlock_irq(&base->lock);
+	timer_base_unlock_expiry(base);
 }
 
 /*
@@ -1930,6 +2016,7 @@ static void __init init_timer_cpu(int cpu)
 		base->cpu = cpu;
 		raw_spin_lock_init(&base->lock);
 		base->clk = jiffies;
+		timer_base_init_expiry_lock(base);
 	}
 }
 
-- 
cgit v1.2.3


From 5d99b32a009e900a561f6a42ea7afe5b21288b8a Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 31 Jul 2019 00:33:54 +0200
Subject: posix-timers: Move rcu_head out of it union

Timer deletion on PREEMPT_RT is prone to priority inversion and live
locks. The hrtimer code has a synchronization mechanism for this. Posix CPU
timers will grow one.

But that mechanism cannot be invoked while holding the k_itimer lock
because that can deadlock against the running timer callback. So the lock
must be dropped which allows the timer to be freed.

The timer free can be prevented by taking RCU readlock before dropping the
lock, but because the rcu_head is part of the 'it' union a concurrent free
will overwrite the hrtimer on which the task is trying to synchronize.

Move the rcu_head out of the union to prevent this.

[ tglx: Fixed up kernel-doc. Rewrote changelog ]

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20190730223828.965541887@linutronix.de
---
 include/linux/posix-timers.h | 5 +++--
 kernel/time/posix-timers.c   | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index b20798fc5191..604cec0e41ba 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -85,7 +85,8 @@ static inline int clockid_to_fd(const clockid_t clk)
  * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
  * @sigq:		Pointer to preallocated sigqueue
  * @it:			Union representing the various posix timer type
- *			internals. Also used for rcu freeing the timer.
+ *			internals.
+ * @rcu:		RCU head for freeing the timer.
  */
 struct k_itimer {
 	struct list_head	list;
@@ -114,8 +115,8 @@ struct k_itimer {
 		struct {
 			struct alarm	alarmtimer;
 		} alarm;
-		struct rcu_head		rcu;
 	} it;
+	struct rcu_head		rcu;
 };
 
 void run_posix_cpu_timers(struct task_struct *task);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index bbe8f9686a70..3e663f982c82 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -442,7 +442,7 @@ static struct k_itimer * alloc_posix_timer(void)
 
 static void k_itimer_rcu_free(struct rcu_head *head)
 {
-	struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+	struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
 
 	kmem_cache_free(posix_timers_cache, tmr);
 }
@@ -459,7 +459,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
 	}
 	put_pid(tmr->it_pid);
 	sigqueue_free(tmr->sigq);
-	call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+	call_rcu(&tmr->rcu, k_itimer_rcu_free);
 }
 
 static int common_timer_create(struct k_itimer *new_timer)
-- 
cgit v1.2.3


From 558101f1b9807b34d8eeefb352d11e642b7e98dd Mon Sep 17 00:00:00 2001
From: Gavi Teitz <gavi@mellanox.com>
Date: Thu, 27 Jun 2019 20:53:03 +0300
Subject: net/mlx5: Add flow counter pool

Add a pool of flow counters, based on flow counter bulks, removing the
need to allocate a new counter via a costly FW command during the flow
creation process. The time it takes to acquire/release a flow counter
is cut from ~50 [us] to ~50 [ns].

The pool is part of the mlx5 driver instance, and provides flow
counters for aging flows. mlx5_fc_create() was modified to provide
counters for aging flows from the pool by default, and
mlx5_destroy_fc() was modified to release counters back to the pool
for later reuse. If bulk allocation is not supported or fails, and for
non-aging flows, the fallback behavior is to allocate and free
individual counters.

The pool is comprised of three lists of flow counter bulks, one of
fully used bulks, one of partially used bulks, and one of unused
bulks. Counters are provided from the partially used bulks first, to
help limit bulk fragmentation.

The pool maintains a threshold, and strives to maintain the amount of
available counters below it. The pool is increased in size when a
counter acquisition request is made and there are no available
counters, and it is decreased in size when the last counter in a bulk
is released and there are more available counters than the threshold.
All pool size changes are done in the context of the
acquiring/releasing process.

The value of the threshold is directly correlated to the amount of
used counters the pool is providing, while constrained by a hard
maximum, and is recalculated every time a bulk is allocated/freed.
This ensures that the pool only consumes large amounts of memory for
available counters if the pool is being used heavily. When fully
populated and at the hard maximum, the buffer of available counters
consumes ~40 [MB].

Signed-off-by: Gavi Teitz <gavi@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 231 ++++++++++++++++++---
 include/linux/mlx5/driver.h                        |  12 ++
 2 files changed, 218 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 3e734e62a6cd..51f1736c455d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -40,6 +40,8 @@
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
+#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FC_POOL_USED_BUFF_RATIO 10
 
 struct mlx5_fc_cache {
 	u64 packets;
@@ -65,6 +67,11 @@ struct mlx5_fc {
 	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
 };
 
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev);
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool);
+static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool);
+static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc);
+
 /* locking scheme:
  *
  * It is the responsibility of the user to prevent concurrent calls or bad
@@ -202,13 +209,22 @@ static void mlx5_fc_stats_query_counter_range(struct mlx5_core_dev *dev,
 	}
 }
 
-static void mlx5_free_fc(struct mlx5_core_dev *dev,
-			 struct mlx5_fc *counter)
+static void mlx5_fc_free(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 {
 	mlx5_cmd_fc_free(dev, counter->id);
 	kfree(counter);
 }
 
+static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	if (counter->bulk)
+		mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter);
+	else
+		mlx5_fc_free(dev, counter);
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
@@ -232,7 +248,7 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 	llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
 		mlx5_fc_stats_remove(dev, counter);
 
-		mlx5_free_fc(dev, counter);
+		mlx5_fc_release(dev, counter);
 	}
 
 	if (time_before(now, fc_stats->next_query) ||
@@ -248,26 +264,56 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 	fc_stats->next_query = now + fc_stats->sampling_interval;
 }
 
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+static struct mlx5_fc *mlx5_fc_single_alloc(struct mlx5_core_dev *dev)
 {
-	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	struct mlx5_fc *counter;
 	int err;
 
 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 	if (!counter)
 		return ERR_PTR(-ENOMEM);
-	INIT_LIST_HEAD(&counter->list);
 
 	err = mlx5_cmd_fc_alloc(dev, &counter->id);
-	if (err)
-		goto err_out;
+	if (err) {
+		kfree(counter);
+		return ERR_PTR(err);
+	}
+
+	return counter;
+}
+
+static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter;
+
+	if (aging && MLX5_CAP_GEN(dev, flow_counter_bulk_alloc) != 0) {
+		counter = mlx5_fc_pool_acquire_counter(&fc_stats->fc_pool);
+		if (!IS_ERR(counter))
+			return counter;
+	}
+
+	return mlx5_fc_single_alloc(dev);
+}
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	int err;
+
+	if (IS_ERR(counter))
+		return counter;
+
+	INIT_LIST_HEAD(&counter->list);
+	counter->aging = aging;
 
 	if (aging) {
 		u32 id = counter->id;
 
 		counter->cache.lastuse = jiffies;
-		counter->aging = true;
+		counter->lastbytes = counter->cache.bytes;
+		counter->lastpackets = counter->cache.packets;
 
 		idr_preload(GFP_KERNEL);
 		spin_lock(&fc_stats->counters_idr_lock);
@@ -288,10 +334,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
 	return counter;
 
 err_out_alloc:
-	mlx5_cmd_fc_free(dev, counter->id);
-err_out:
-	kfree(counter);
-
+	mlx5_fc_release(dev, counter);
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(mlx5_fc_create);
@@ -315,7 +358,7 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 		return;
 	}
 
-	mlx5_free_fc(dev, counter);
+	mlx5_fc_release(dev, counter);
 }
 EXPORT_SYMBOL(mlx5_fc_destroy);
 
@@ -344,6 +387,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 	fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD;
 	INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work);
 
+	mlx5_fc_pool_init(&fc_stats->fc_pool, dev);
 	return 0;
 
 err_wq_create:
@@ -358,6 +402,7 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
 
+	mlx5_fc_pool_cleanup(&fc_stats->fc_pool);
 	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
@@ -368,10 +413,10 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 
 	tmplist = llist_del_all(&fc_stats->addlist);
 	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
-		mlx5_free_fc(dev, counter);
+		mlx5_fc_release(dev, counter);
 
 	list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
-		mlx5_free_fc(dev, counter);
+		mlx5_fc_release(dev, counter);
 }
 
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
@@ -417,14 +462,15 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
 /* Flow counter bluks */
 
 struct mlx5_fc_bulk {
+	struct list_head pool_list;
 	u32 base_id;
 	int bulk_len;
 	unsigned long *bitmask;
 	struct mlx5_fc fcs[0];
 };
 
-static void
-mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, u32 id)
+static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
+			 u32 id)
 {
 	counter->bulk = bulk;
 	counter->id = id;
@@ -435,8 +481,7 @@ static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk)
 	return bitmap_weight(bulk->bitmask, bulk->bulk_len);
 }
 
-static struct mlx5_fc_bulk __attribute__((unused))
-*mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
 {
 	enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
 	struct mlx5_fc_bulk *bulk;
@@ -479,7 +524,7 @@ err_alloc_bulk:
 	return ERR_PTR(err);
 }
 
-static int __attribute__((unused))
+static int
 mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
 {
 	if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) {
@@ -494,8 +539,7 @@ mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk)
 	return 0;
 }
 
-static struct mlx5_fc __attribute__((unused))
-*mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
+static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk)
 {
 	int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len);
 
@@ -506,8 +550,7 @@ static struct mlx5_fc __attribute__((unused))
 	return &bulk->fcs[free_fc_index];
 }
 
-static int __attribute__((unused))
-mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
+static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
 {
 	int fc_index = fc->id - bulk->base_id;
 
@@ -517,3 +560,141 @@ mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc)
 	set_bit(fc_index, bulk->bitmask);
 	return 0;
 }
+
+/* Flow counters pool API */
+
+static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev)
+{
+	fc_pool->dev = dev;
+	mutex_init(&fc_pool->pool_lock);
+	INIT_LIST_HEAD(&fc_pool->fully_used);
+	INIT_LIST_HEAD(&fc_pool->partially_used);
+	INIT_LIST_HEAD(&fc_pool->unused);
+	fc_pool->available_fcs = 0;
+	fc_pool->used_fcs = 0;
+	fc_pool->threshold = 0;
+}
+
+static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *bulk;
+	struct mlx5_fc_bulk *tmp;
+
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+	list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list)
+		mlx5_fc_bulk_destroy(dev, bulk);
+}
+
+static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool)
+{
+	fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD,
+				   fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO);
+}
+
+static struct mlx5_fc_bulk *
+mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *new_bulk;
+
+	new_bulk = mlx5_fc_bulk_create(dev);
+	if (!IS_ERR(new_bulk))
+		fc_pool->available_fcs += new_bulk->bulk_len;
+	mlx5_fc_pool_update_threshold(fc_pool);
+	return new_bulk;
+}
+
+static void
+mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+
+	fc_pool->available_fcs -= bulk->bulk_len;
+	mlx5_fc_bulk_destroy(dev, bulk);
+	mlx5_fc_pool_update_threshold(fc_pool);
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_from_list(struct list_head *src_list,
+			       struct list_head *next_list,
+			       bool move_non_full_bulk)
+{
+	struct mlx5_fc_bulk *bulk;
+	struct mlx5_fc *fc;
+
+	if (list_empty(src_list))
+		return ERR_PTR(-ENODATA);
+
+	bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list);
+	fc = mlx5_fc_bulk_acquire_fc(bulk);
+	if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0)
+		list_move(&bulk->pool_list, next_list);
+	return fc;
+}
+
+static struct mlx5_fc *
+mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool)
+{
+	struct mlx5_fc_bulk *new_bulk;
+	struct mlx5_fc *fc;
+
+	mutex_lock(&fc_pool->pool_lock);
+
+	fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used,
+					    &fc_pool->fully_used, false);
+	if (IS_ERR(fc))
+		fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused,
+						    &fc_pool->partially_used,
+						    true);
+	if (IS_ERR(fc)) {
+		new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool);
+		if (IS_ERR(new_bulk)) {
+			fc = ERR_CAST(new_bulk);
+			goto out;
+		}
+		fc = mlx5_fc_bulk_acquire_fc(new_bulk);
+		list_add(&new_bulk->pool_list, &fc_pool->partially_used);
+	}
+	fc_pool->available_fcs--;
+	fc_pool->used_fcs++;
+
+out:
+	mutex_unlock(&fc_pool->pool_lock);
+	return fc;
+}
+
+static void
+mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc)
+{
+	struct mlx5_core_dev *dev = fc_pool->dev;
+	struct mlx5_fc_bulk *bulk = fc->bulk;
+	int bulk_free_fcs_amount;
+
+	mutex_lock(&fc_pool->pool_lock);
+
+	if (mlx5_fc_bulk_release_fc(bulk, fc)) {
+		mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n");
+		goto unlock;
+	}
+
+	fc_pool->available_fcs++;
+	fc_pool->used_fcs--;
+
+	bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk);
+	if (bulk_free_fcs_amount == 1)
+		list_move_tail(&bulk->pool_list, &fc_pool->partially_used);
+	if (bulk_free_fcs_amount == bulk->bulk_len) {
+		list_del(&bulk->pool_list);
+		if (fc_pool->available_fcs > fc_pool->threshold)
+			mlx5_fc_pool_free_bulk(fc_pool, bulk);
+		else
+			list_add(&bulk->pool_list, &fc_pool->unused);
+	}
+
+unlock:
+	mutex_unlock(&fc_pool->pool_lock);
+}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 267b2bc0ca4a..d8f348ef9c33 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -477,6 +477,17 @@ struct mlx5_core_sriov {
 	u16			max_vfs;
 };
 
+struct mlx5_fc_pool {
+	struct mlx5_core_dev *dev;
+	struct mutex pool_lock; /* protects pool lists */
+	struct list_head fully_used;
+	struct list_head partially_used;
+	struct list_head unused;
+	int available_fcs;
+	int used_fcs;
+	int threshold;
+};
+
 struct mlx5_fc_stats {
 	spinlock_t counters_idr_lock; /* protects counters_idr */
 	struct idr counters_idr;
@@ -489,6 +500,7 @@ struct mlx5_fc_stats {
 	unsigned long next_query;
 	unsigned long sampling_interval; /* jiffies */
 	u32 *bulk_query_out;
+	struct mlx5_fc_pool fc_pool;
 };
 
 struct mlx5_events;
-- 
cgit v1.2.3


From 3695eae5fee0605f316fbaad0b9e3de791d7dfaf Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Sun, 28 Jul 2019 00:22:29 +0200
Subject: pidfd: add P_PIDFD to waitid()

This adds the P_PIDFD type to waitid().
One of the last remaining bits for the pidfd api is to make it possible
to wait on pidfds. With P_PIDFD added to waitid() the parts of userspace
that want to use the pidfd api to exclusively manage processes can do so
now.

One of the things this will unblock in the future is the ability to make
it possible to retrieve the exit status via waitid(P_PIDFD) for
non-parent processes if handed a _suitable_ pidfd that has this feature
set. This is similar to what you can do on FreeBSD with kqueue(). It
might even end up being possible to wait on a process as a non-parent if
an appropriate property is enabled on the pidfd.

With P_PIDFD no scoping of the process identified by the pidfd is
possible, i.e. it explicitly blocks things such as wait4(-1), wait4(0),
waitid(P_ALL), waitid(P_PGID) etc. It only allows for semantics
equivalent to wait4(pid), waitid(P_PID). Users that need scoping should
rely on pid-based wait*() syscalls for now.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Jann Horn <jannh@google.com>
Cc: Andy Lutomirsky <luto@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Aleksa Sarai <cyphar@cyphar.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/20190727222229.6516-2-christian@brauner.io
---
 include/linux/pid.h       |  4 ++++
 include/uapi/linux/wait.h |  1 +
 kernel/exit.c             | 33 ++++++++++++++++++++++++++++++---
 kernel/fork.c             |  8 ++++++++
 kernel/signal.c           |  7 +++++--
 5 files changed, 48 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 2a83e434db9d..9645b1194c98 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -72,6 +72,10 @@ extern struct pid init_struct_pid;
 
 extern const struct file_operations pidfd_fops;
 
+struct file;
+
+extern struct pid *pidfd_pid(const struct file *file);
+
 static inline struct pid *get_pid(struct pid *pid)
 {
 	if (pid)
diff --git a/include/uapi/linux/wait.h b/include/uapi/linux/wait.h
index ac49a220cf2a..85b809fc9f11 100644
--- a/include/uapi/linux/wait.h
+++ b/include/uapi/linux/wait.h
@@ -17,6 +17,7 @@
 #define P_ALL		0
 #define P_PID		1
 #define P_PGID		2
+#define P_PIDFD		3
 
 
 #endif /* _UAPI_LINUX_WAIT_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index a75b6a7f458a..64bb6893a37d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1552,6 +1552,23 @@ end:
 	return retval;
 }
 
+static struct pid *pidfd_get_pid(unsigned int fd)
+{
+	struct fd f;
+	struct pid *pid;
+
+	f = fdget(fd);
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	pid = pidfd_pid(f.file);
+	if (!IS_ERR(pid))
+		get_pid(pid);
+
+	fdput(f);
+	return pid;
+}
+
 static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 			  int options, struct rusage *ru)
 {
@@ -1574,19 +1591,29 @@ static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
 		type = PIDTYPE_PID;
 		if (upid <= 0)
 			return -EINVAL;
+
+		pid = find_get_pid(upid);
 		break;
 	case P_PGID:
 		type = PIDTYPE_PGID;
 		if (upid <= 0)
 			return -EINVAL;
+
+		pid = find_get_pid(upid);
+		break;
+	case P_PIDFD:
+		type = PIDTYPE_PID;
+		if (upid < 0)
+			return -EINVAL;
+
+		pid = pidfd_get_pid(upid);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (type < PIDTYPE_MAX)
-		pid = find_get_pid(upid);
-
 	wo.wo_type	= type;
 	wo.wo_pid	= pid;
 	wo.wo_flags	= options;
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..b169e2ca2d84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1690,6 +1690,14 @@ static inline void rcu_copy_process(struct task_struct *p)
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
+struct pid *pidfd_pid(const struct file *file)
+{
+	if (file->f_op == &pidfd_fops)
+		return file->private_data;
+
+	return ERR_PTR(-EBADF);
+}
+
 static int pidfd_release(struct inode *inode, struct file *file)
 {
 	struct pid *pid = file->private_data;
diff --git a/kernel/signal.c b/kernel/signal.c
index 91b789dd6e72..2e567f64812f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3672,8 +3672,11 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
 
 static struct pid *pidfd_to_pid(const struct file *file)
 {
-	if (file->f_op == &pidfd_fops)
-		return file->private_data;
+	struct pid *pid;
+
+	pid = pidfd_pid(file);
+	if (!IS_ERR(pid))
+		return pid;
 
 	return tgid_pidfd_to_pid(file);
 }
-- 
cgit v1.2.3


From 0a5b99f57873e233ad42ef71e23c629f6ea1fcfe Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Thu, 11 Jul 2019 16:45:41 -0400
Subject: treewide: Rename rcu_dereference_raw_notrace() to _check()

The rcu_dereference_raw_notrace() API name is confusing.  It is equivalent
to rcu_dereference_raw() except that it also does sparse pointer checking.

There are only a few users of rcu_dereference_raw_notrace(). This patches
renames all of them to be rcu_dereference_raw_check() with the "_check()"
indicating sparse checking.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ paulmck: Fix checkpatch warnings about parentheses. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 Documentation/RCU/Design/Requirements/Requirements.html | 2 +-
 arch/powerpc/include/asm/kvm_book3s_64.h                | 2 +-
 include/linux/rculist.h                                 | 6 +++---
 include/linux/rcupdate.h                                | 2 +-
 kernel/trace/ftrace_internal.h                          | 8 ++++----
 kernel/trace/trace.c                                    | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 5a9238a2883c..bdbc84f1b949 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2512,7 +2512,7 @@ disabled across the entire RCU read-side critical section.
 <p>
 It is possible to use tracing on RCU code, but tracing itself
 uses RCU.
-For this reason, <tt>rcu_dereference_raw_notrace()</tt>
+For this reason, <tt>rcu_dereference_raw_check()</tt>
 is provided for use by tracing, which avoids the destructive
 recursion that could otherwise ensue.
 This API is also used by virtualization in some architectures,
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index bb7c8cc77f1a..04b2b927bb5a 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -535,7 +535,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
  */
 static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
 {
-	return rcu_dereference_raw_notrace(kvm->memslots[0]);
+	return rcu_dereference_raw_check(kvm->memslots[0]);
 }
 
 extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index e91ec9ddcd30..932296144131 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -622,7 +622,7 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define hlist_for_each_entry_rcu(pos, head, member)			\
-	for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+	for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
 			typeof(*(pos)), member);			\
 		pos;							\
 		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
@@ -642,10 +642,10 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * not do any RCU debugging or tracing.
  */
 #define hlist_for_each_entry_rcu_notrace(pos, head, member)			\
-	for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\
+	for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\
 			typeof(*(pos)), member);			\
 		pos;							\
-		pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\
+		pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\
 			&(pos)->member)), typeof(*(pos)), member))
 
 /**
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f7167478c1d..bfcafbc1e301 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -476,7 +476,7 @@ do {									      \
  * The no-tracing version of rcu_dereference_raw() must not call
  * rcu_read_lock_held().
  */
-#define rcu_dereference_raw_notrace(p) __rcu_dereference_check((p), 1, __rcu)
+#define rcu_dereference_raw_check(p) __rcu_dereference_check((p), 1, __rcu)
 
 /**
  * rcu_dereference_protected() - fetch RCU pointer when updates prevented
diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h
index 0515a2096f90..0456e0a3dab1 100644
--- a/kernel/trace/ftrace_internal.h
+++ b/kernel/trace/ftrace_internal.h
@@ -6,22 +6,22 @@
 
 /*
  * Traverse the ftrace_global_list, invoking all entries.  The reason that we
- * can use rcu_dereference_raw_notrace() is that elements removed from this list
+ * can use rcu_dereference_raw_check() is that elements removed from this list
  * are simply leaked, so there is no need to interact with a grace-period
- * mechanism.  The rcu_dereference_raw_notrace() calls are needed to handle
+ * mechanism.  The rcu_dereference_raw_check() calls are needed to handle
  * concurrent insertions into the ftrace_global_list.
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
 #define do_for_each_ftrace_op(op, list)			\
-	op = rcu_dereference_raw_notrace(list);			\
+	op = rcu_dereference_raw_check(list);			\
 	do
 
 /*
  * Optimized for just a single item in the list (as that is the normal case).
  */
 #define while_for_each_ftrace_op(op)				\
-	while (likely(op = rcu_dereference_raw_notrace((op)->next)) &&	\
+	while (likely(op = rcu_dereference_raw_check((op)->next)) &&	\
 	       unlikely((op) != &ftrace_list_end))
 
 extern struct ftrace_ops __rcu *ftrace_ops_list;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 525a97fbbc60..642474b26ba7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2642,10 +2642,10 @@ static void ftrace_exports(struct ring_buffer_event *event)
 
 	preempt_disable_notrace();
 
-	export = rcu_dereference_raw_notrace(ftrace_exports_list);
+	export = rcu_dereference_raw_check(ftrace_exports_list);
 	while (export) {
 		trace_process_export(export, event);
-		export = rcu_dereference_raw_notrace(export->next);
+		export = rcu_dereference_raw_check(export->next);
 	}
 
 	preempt_enable_notrace();
-- 
cgit v1.2.3


From 9f00baf74e4b6f79a3a3dfab44fb7bb2e797b551 Mon Sep 17 00:00:00 2001
From: Gary R Hook <gary.hook@amd.com>
Date: Tue, 30 Jul 2019 16:05:24 +0000
Subject: crypto: ccp - Add support for valid authsize values less than 16

AES GCM encryption allows for authsize values of 4, 8, and 12-16 bytes.
Validate the requested authsize, and retain it to save in the request
context.

Fixes: 36cf515b9bbe2 ("crypto: ccp - Enable support for AES GCM on v5 CCPs")
Cc: <stable@vger.kernel.org>
Signed-off-by: Gary R Hook <gary.hook@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-galois.c | 14 ++++++++++++++
 drivers/crypto/ccp/ccp-ops.c               | 26 +++++++++++++++++++++-----
 include/linux/ccp.h                        |  2 ++
 3 files changed, 37 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index d22631cb2bb3..02eba84028b3 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -58,6 +58,19 @@ static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm,
 				   unsigned int authsize)
 {
+	switch (authsize) {
+	case 16:
+	case 15:
+	case 14:
+	case 13:
+	case 12:
+	case 8:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -104,6 +117,7 @@ static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt)
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
 	INIT_LIST_HEAD(&rctx->cmd.entry);
 	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.authsize = crypto_aead_authsize(tfm);
 	rctx->cmd.u.aes.type = ctx->u.aes.type;
 	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
 	rctx->cmd.u.aes.action = encrypt;
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 59f9849c3662..ef723e2722a8 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -622,6 +622,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 
 	unsigned long long *final;
 	unsigned int dm_offset;
+	unsigned int authsize;
 	unsigned int jobid;
 	unsigned int ilen;
 	bool in_place = true; /* Default value */
@@ -643,6 +644,21 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 	if (!aes->key) /* Gotta have a key SGL */
 		return -EINVAL;
 
+	/* Zero defaults to 16 bytes, the maximum size */
+	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
+	switch (authsize) {
+	case 16:
+	case 15:
+	case 14:
+	case 13:
+	case 12:
+	case 8:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
 	/* First, decompose the source buffer into AAD & PT,
 	 * and the destination buffer into AAD, CT & tag, or
 	 * the input into CT & tag.
@@ -657,7 +673,7 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
 	} else {
 		/* Input length for decryption includes tag */
-		ilen = aes->src_len - AES_BLOCK_SIZE;
+		ilen = aes->src_len - authsize;
 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
 	}
 
@@ -839,19 +855,19 @@ static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
 
 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
 		/* Put the ciphered tag after the ciphertext. */
-		ccp_get_dm_area(&final_wa, 0, p_tag, 0, AES_BLOCK_SIZE);
+		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
 	} else {
 		/* Does this ciphered tag match the input? */
-		ret = ccp_init_dm_workarea(&tag, cmd_q, AES_BLOCK_SIZE,
+		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
 					   DMA_BIDIRECTIONAL);
 		if (ret)
 			goto e_tag;
-		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, AES_BLOCK_SIZE);
+		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
 		if (ret)
 			goto e_tag;
 
 		ret = crypto_memneq(tag.address, final_wa.address,
-				    AES_BLOCK_SIZE) ? -EBADMSG : 0;
+				    authsize) ? -EBADMSG : 0;
 		ccp_dm_free(&tag);
 	}
 
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index 7e9c991c95e0..43ed9e77cf81 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -173,6 +173,8 @@ struct ccp_aes_engine {
 	enum ccp_aes_mode mode;
 	enum ccp_aes_action action;
 
+	u32 authsize;
+
 	struct scatterlist *key;
 	u32 key_len;		/* In bytes */
 
-- 
cgit v1.2.3


From 68289c634344431d6f97480eb5384516f37e39b8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 23 Jul 2019 20:43:43 +0900
Subject: crypto: add header include guards

Add header include guards in case they are included multiple times.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sha1_base.h      | 5 +++++
 include/crypto/sha256_base.h    | 5 +++++
 include/crypto/sha512_base.h    | 5 +++++
 include/crypto/sm3_base.h       | 5 +++++
 include/uapi/linux/cryptouser.h | 5 +++++
 5 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
index 63c14f2dc7bd..20fd1f7468af 100644
--- a/include/crypto/sha1_base.h
+++ b/include/crypto/sha1_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA1_BASE_H
+#define _CRYPTO_SHA1_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -101,3 +104,5 @@ static inline int sha1_base_finish(struct shash_desc *desc, u8 *out)
 	*sctx = (struct sha1_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA1_BASE_H */
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
index 59159bc944f5..b50a035a2bc7 100644
--- a/include/crypto/sha256_base.h
+++ b/include/crypto/sha256_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA256_BASE_H
+#define _CRYPTO_SHA256_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -123,3 +126,5 @@ static inline int sha256_base_finish(struct shash_desc *desc, u8 *out)
 	*sctx = (struct sha256_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA256_BASE_H */
diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h
index 099be8027f3f..fb19c77494dc 100644
--- a/include/crypto/sha512_base.h
+++ b/include/crypto/sha512_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA512_BASE_H
+#define _CRYPTO_SHA512_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -126,3 +129,5 @@ static inline int sha512_base_finish(struct shash_desc *desc, u8 *out)
 	*sctx = (struct sha512_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA512_BASE_H */
diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h
index 31891b0dc7e3..1cbf9aa1fe52 100644
--- a/include/crypto/sm3_base.h
+++ b/include/crypto/sm3_base.h
@@ -6,6 +6,9 @@
  * Written by Gilad Ben-Yossef <gilad@benyossef.com>
  */
 
+#ifndef _CRYPTO_SM3_BASE_H
+#define _CRYPTO_SM3_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sm3.h>
 #include <linux/crypto.h>
@@ -104,3 +107,5 @@ static inline int sm3_base_finish(struct shash_desc *desc, u8 *out)
 	*sctx = (struct sm3_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SM3_BASE_H */
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 4dc1603919ce..5730c67f0617 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -19,6 +19,9 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#ifndef _UAPI_LINUX_CRYPTOUSER_H
+#define _UAPI_LINUX_CRYPTOUSER_H
+
 #include <linux/types.h>
 
 /* Netlink configuration messages.  */
@@ -198,3 +201,5 @@ struct crypto_report_acomp {
 
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
+
+#endif /* _UAPI_LINUX_CRYPTOUSER_H */
-- 
cgit v1.2.3


From 939f146b89c22c387d51f69b27ef031664f64cbe Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 23 Jul 2019 20:43:44 +0900
Subject: crypto: user - fix potential warnings in cryptouser.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Function definitions in headers are usually marked as 'static inline'.

Since 'inline' is missing for crypto_reportstat(), if it were not
referenced from a .c file that includes this header, it would produce
a warning.

Also, 'struct crypto_user_alg' is not declared in this header.

I included <linux/crytouser.h> instead of adding the forward declaration
as suggested [1].

Detected by compile-testing this header as a standalone unit:

./include/crypto/internal/cryptouser.h:6:44: warning: ‘struct crypto_user_alg’ declared inside parameter list will not be visible outside of this definition or declaration
 struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
                                            ^~~~~~~~~~~~~~~
./include/crypto/internal/cryptouser.h:11:12: warning: ‘crypto_reportstat’ defined but not used [-Wunused-function]
 static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs)
            ^~~~~~~~~~~~~~~~~

[1] https://lkml.org/lkml/2019/6/13/1121

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/cryptouser.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
index 40623f4457df..fd54074332f5 100644
--- a/include/crypto/internal/cryptouser.h
+++ b/include/crypto/internal/cryptouser.h
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/cryptouser.h>
 #include <net/netlink.h>
 
 struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
@@ -6,7 +7,9 @@ struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
 #ifdef CONFIG_CRYPTO_STATS
 int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs);
 #else
-static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs)
+static inline int crypto_reportstat(struct sk_buff *in_skb,
+				    struct nlmsghdr *in_nlh,
+				    struct nlattr **attrs)
 {
 	return -ENOTSUPP;
 }
-- 
cgit v1.2.3


From f9981bc53825de2c001659d35c870786771f1e90 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Jul 2019 22:19:07 +0200
Subject: crypto: api - Remove redundant #ifdef in crypto_yield()

While looking at CONFIG_PREEMPT dependencies treewide the #ifdef in
crypto_yield() matched.

CONFIG_PREEMPT and CONFIG_PREEMPT_VOLUNTARY are mutually exclusive so the
extra !CONFIG_PREEMPT conditional is redundant.

cond_resched() has only an effect when CONFIG_PREEMPT_VOLUNTARY is set,
otherwise it's a stub which the compiler optimizes out.

Remove the whole conditional.

No functional change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-crypto@vger.kernel.org
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index dc1106af95c3..e5bd302f2c49 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -409,10 +409,8 @@ static inline int crypto_memneq(const void *a, const void *b, size_t size)
 
 static inline void crypto_yield(u32 flags)
 {
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
 	if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		cond_resched();
-#endif
 }
 
 int crypto_register_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 2ef540476e0077ca2f9992fde9a014891000c4b5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 29 Jul 2019 00:32:36 +0900
Subject: hwrng: timeriomem - add include guard to timeriomem-rng.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/timeriomem-rng.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index fd4a6e6ec831..672df7fbf6c1 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -5,6 +5,9 @@
  * Copyright (c) 2009 Alexander Clouter <alex@digriz.org.uk>
  */
 
+#ifndef _LINUX_TIMERIOMEM_RNG_H
+#define _LINUX_TIMERIOMEM_RNG_H
+
 struct timeriomem_rng_data {
 	void __iomem		*address;
 
@@ -14,3 +17,5 @@ struct timeriomem_rng_data {
 	/* bits of entropy per 1024 bits read */
 	unsigned int		quality;
 };
+
+#endif /* _LINUX_TIMERIOMEM_RNG_H */
-- 
cgit v1.2.3


From 23b6904442d08b7dbed7622ed33b236d41a3aa8b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 31 Jul 2019 14:43:40 +0200
Subject: driver core: add dev_groups to all drivers

Add the ability for the driver core to create and remove a list of
attribute groups automatically when the device is bound/unbound from a
specific driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Tested-by: Richard Gong <richard.gong@linux.intel.com>
Link: https://lore.kernel.org/r/20190731124349.4474-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c      | 14 ++++++++++++++
 include/linux/device.h |  3 +++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 994a90747420..d811e60610d3 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -554,9 +554,16 @@ re_probe:
 			goto probe_failed;
 	}
 
+	if (device_add_groups(dev, drv->dev_groups)) {
+		dev_err(dev, "device_add_groups() failed\n");
+		goto dev_groups_failed;
+	}
+
 	if (test_remove) {
 		test_remove = false;
 
+		device_remove_groups(dev, drv->dev_groups);
+
 		if (dev->bus->remove)
 			dev->bus->remove(dev);
 		else if (drv->remove)
@@ -584,6 +591,11 @@ re_probe:
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 	goto done;
 
+dev_groups_failed:
+	if (dev->bus->remove)
+		dev->bus->remove(dev);
+	else if (drv->remove)
+		drv->remove(dev);
 probe_failed:
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
@@ -1114,6 +1126,8 @@ static void __device_release_driver(struct device *dev, struct device *parent)
 
 		pm_runtime_put_sync(dev);
 
+		device_remove_groups(dev, drv->dev_groups);
+
 		if (dev->bus && dev->bus->remove)
 			dev->bus->remove(dev);
 		else if (drv->remove)
diff --git a/include/linux/device.h b/include/linux/device.h
index c330b75c6c57..98c00b71b598 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -262,6 +262,8 @@ enum probe_type {
  * @resume:	Called to bring a device from sleep mode.
  * @groups:	Default attributes that get created by the driver core
  *		automatically.
+ * @dev_groups:	Additional attributes attached to device instance once the
+ *		it is bound to the driver.
  * @pm:		Power management operations of the device which matched
  *		this driver.
  * @coredump:	Called when sysfs entry is written to. The device driver
@@ -296,6 +298,7 @@ struct device_driver {
 	int (*suspend) (struct device *dev, pm_message_t state);
 	int (*resume) (struct device *dev);
 	const struct attribute_group **groups;
+	const struct attribute_group **dev_groups;
 
 	const struct dev_pm_ops *pm;
 	void (*coredump) (struct device *dev);
-- 
cgit v1.2.3


From 274840e544225657fbca4f12efa1ee55474bb800 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Tue, 9 Jul 2019 19:09:42 -0400
Subject: drm/ttm: Add release_notify callback to ttm_bo_driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This notifies the driver that a BO is about to be released.

Releasing a BO also invokes the move_notify callback from
ttm_bo_cleanup_memtype_use, but that happens too late for anything
that would add fences to the BO and require a delayed delete.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    |  3 +++
 include/drm/ttm/ttm_bo_driver.h | 10 ++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a7fd5a4955c9..939b9258d513 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -671,6 +671,9 @@ static void ttm_bo_release(struct kref *kref)
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
 
+	if (bo->bdev->driver->release_notify)
+		bo->bdev->driver->release_notify(bo);
+
 	drm_vma_offset_remove(&bdev->vma_manager, &bo->vma_node);
 	ttm_mem_io_lock(man, false);
 	ttm_mem_io_free_vm(bo);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index a2d810a2504d..81077e5b4b7e 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -390,6 +390,16 @@ struct ttm_bo_driver {
 	 * notify driver that a BO was deleted from LRU.
 	 */
 	void (*del_from_lru_notify)(struct ttm_buffer_object *bo);
+
+	/**
+	 * Notify the driver that we're about to release a BO
+	 *
+	 * @bo: BO that is about to be released
+	 *
+	 * Gives the driver a chance to do any cleanup, including
+	 * adding fences that may force a delayed delete
+	 */
+	void (*release_notify)(struct ttm_buffer_object *bo);
 };
 
 /**
-- 
cgit v1.2.3


From d8f4981e2e8a968411105db568f3d48256b2ebbc Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Mon, 8 Jul 2019 20:09:21 -0400
Subject: drm/amdgpu: Add flag to wipe VRAM on release
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This memory allocation flag will be used to indicate BOs containing
sensitive data that should not be leaked to other processes.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 include/uapi/drm/amdgpu_drm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 11cc57322962..c99b4f2482c6 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -128,6 +128,10 @@ extern "C" {
  * for the second page onward should be set to NC.
  */
 #define AMDGPU_GEM_CREATE_MQD_GFX9		(1 << 8)
+/* Flag that BO may contain sensitive data that must be wiped before
+ * releasing the memory
+ */
+#define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE	(1 << 9)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
-- 
cgit v1.2.3


From 9802f5d78b3747467644edc609393f95d0b10e61 Mon Sep 17 00:00:00 2001
From: Xiaojie Yuan <xiaojie.yuan@amd.com>
Date: Mon, 17 Dec 2018 18:00:26 +0800
Subject: drm/amdgpu: add navi12 asic type

Add asic type.

Signed-off-by: Xiaojie Yuan <xiaojie.yuan@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 include/drm/amd_asic_type.h                | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2363e7658d36..6cd8670bc3eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -103,6 +103,7 @@ static const char *amdgpu_asic_name[] = {
 	"ARCTURUS",
 	"NAVI10",
 	"NAVI14",
+	"NAVI12",
 	"LAST",
 };
 
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 0f5a12a99948..737a1e374f0c 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -52,6 +52,7 @@ enum amd_asic_type {
 	CHIP_ARCTURUS,
 	CHIP_NAVI10,
 	CHIP_NAVI14,
+	CHIP_NAVI12,
 	CHIP_LAST,
 };
 
-- 
cgit v1.2.3


From 1e80936a42e16b7a7c4265de666c472256b7cf36 Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Wed, 19 Jun 2019 13:52:43 +0800
Subject: dt-bindings: imx: Add clock binding doc for i.MX8MN

Add the clock binding doc for i.MX8MN.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Reviewed-by: Maxime Ripard <maxime.ripard@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 .../devicetree/bindings/clock/imx8mn-clock.yaml    | 112 +++++++++++
 include/dt-bindings/clock/imx8mn-clock.h           | 215 +++++++++++++++++++++
 2 files changed, 327 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/imx8mn-clock.yaml
 create mode 100644 include/dt-bindings/clock/imx8mn-clock.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml b/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml
new file mode 100644
index 000000000000..622f3658bd9f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx8mn-clock.yaml
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bindings/clock/imx8mn-clock.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX8M Nano Clock Control Module Binding
+
+maintainers:
+  - Anson Huang <Anson.Huang@nxp.com>
+
+description: |
+  NXP i.MX8M Nano clock control module is an integrated clock controller, which
+  generates and supplies to all modules.
+
+properties:
+  compatible:
+    const: fsl,imx8mn-ccm
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: 32k osc
+      - description: 24m osc
+      - description: ext1 clock input
+      - description: ext2 clock input
+      - description: ext3 clock input
+      - description: ext4 clock input
+
+  clock-names:
+    items:
+      - const: osc_32k
+      - const: osc_24m
+      - const: clk_ext1
+      - const: clk_ext2
+      - const: clk_ext3
+      - const: clk_ext4
+
+  '#clock-cells':
+    const: 1
+    description: |
+      The clock consumer should specify the desired clock by having the clock
+      ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx8mn-clock.h
+      for the full list of i.MX8M Nano clock IDs.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - '#clock-cells'
+
+examples:
+  # Clock Control Module node:
+  - |
+    clk: clock-controller@30380000 {
+        compatible = "fsl,imx8mn-ccm";
+        reg = <0x0 0x30380000 0x0 0x10000>;
+        #clock-cells = <1>;
+        clocks = <&osc_32k>, <&osc_24m>, <&clk_ext1>,
+                 <&clk_ext2>, <&clk_ext3>, <&clk_ext4>;
+        clock-names = "osc_32k", "osc_24m", "clk_ext1",
+                      "clk_ext2", "clk_ext3", "clk_ext4";
+    };
+
+  # Required external clocks for Clock Control Module node:
+  - |
+    osc_32k: clock-osc-32k {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <32768>;
+        clock-output-names = "osc_32k";
+    };
+
+    osc_24m: clock-osc-24m {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <24000000>;
+        clock-output-names = "osc_24m";
+    };
+
+    clk_ext1: clock-ext1 {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <133000000>;
+        clock-output-names = "clk_ext1";
+    };
+
+    clk_ext2: clock-ext2 {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <133000000>;
+        clock-output-names = "clk_ext2";
+    };
+
+    clk_ext3: clock-ext3 {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <133000000>;
+        clock-output-names = "clk_ext3";
+    };
+
+    clk_ext4: clock-ext4 {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency= <133000000>;
+        clock-output-names = "clk_ext4";
+    };
+
+...
diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
new file mode 100644
index 000000000000..5255b1c2420e
--- /dev/null
+++ b/include/dt-bindings/clock/imx8mn-clock.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2019 NXP
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_IMX8MN_H
+#define __DT_BINDINGS_CLOCK_IMX8MN_H
+
+#define IMX8MN_CLK_DUMMY			0
+#define IMX8MN_CLK_32K				1
+#define IMX8MN_CLK_24M				2
+#define IMX8MN_OSC_HDMI_CLK			3
+#define IMX8MN_CLK_EXT1				4
+#define IMX8MN_CLK_EXT2				5
+#define IMX8MN_CLK_EXT3				6
+#define IMX8MN_CLK_EXT4				7
+#define IMX8MN_AUDIO_PLL1_REF_SEL		8
+#define IMX8MN_AUDIO_PLL2_REF_SEL		9
+#define IMX8MN_VIDEO_PLL1_REF_SEL		10
+#define IMX8MN_DRAM_PLL_REF_SEL			11
+#define IMX8MN_GPU_PLL_REF_SEL			12
+#define IMX8MN_VPU_PLL_REF_SEL			13
+#define IMX8MN_ARM_PLL_REF_SEL			14
+#define IMX8MN_SYS_PLL1_REF_SEL			15
+#define IMX8MN_SYS_PLL2_REF_SEL			16
+#define IMX8MN_SYS_PLL3_REF_SEL			17
+#define IMX8MN_AUDIO_PLL1			18
+#define IMX8MN_AUDIO_PLL2			19
+#define IMX8MN_VIDEO_PLL1			20
+#define IMX8MN_DRAM_PLL				21
+#define IMX8MN_GPU_PLL				22
+#define IMX8MN_VPU_PLL				23
+#define IMX8MN_ARM_PLL				24
+#define IMX8MN_SYS_PLL1				25
+#define IMX8MN_SYS_PLL2				26
+#define IMX8MN_SYS_PLL3				27
+#define IMX8MN_AUDIO_PLL1_BYPASS		28
+#define IMX8MN_AUDIO_PLL2_BYPASS		29
+#define IMX8MN_VIDEO_PLL1_BYPASS		30
+#define IMX8MN_DRAM_PLL_BYPASS			31
+#define IMX8MN_GPU_PLL_BYPASS			32
+#define IMX8MN_VPU_PLL_BYPASS			33
+#define IMX8MN_ARM_PLL_BYPASS			34
+#define IMX8MN_SYS_PLL1_BYPASS			35
+#define IMX8MN_SYS_PLL2_BYPASS			36
+#define IMX8MN_SYS_PLL3_BYPASS			37
+#define IMX8MN_AUDIO_PLL1_OUT			38
+#define IMX8MN_AUDIO_PLL2_OUT			39
+#define IMX8MN_VIDEO_PLL1_OUT			40
+#define IMX8MN_DRAM_PLL_OUT			41
+#define IMX8MN_GPU_PLL_OUT			42
+#define IMX8MN_VPU_PLL_OUT			43
+#define IMX8MN_ARM_PLL_OUT			44
+#define IMX8MN_SYS_PLL1_OUT			45
+#define IMX8MN_SYS_PLL2_OUT			46
+#define IMX8MN_SYS_PLL3_OUT			47
+#define IMX8MN_SYS_PLL1_40M			48
+#define IMX8MN_SYS_PLL1_80M			49
+#define IMX8MN_SYS_PLL1_100M			50
+#define IMX8MN_SYS_PLL1_133M			51
+#define IMX8MN_SYS_PLL1_160M			52
+#define IMX8MN_SYS_PLL1_200M			53
+#define IMX8MN_SYS_PLL1_266M			54
+#define IMX8MN_SYS_PLL1_400M			55
+#define IMX8MN_SYS_PLL1_800M			56
+#define IMX8MN_SYS_PLL2_50M			57
+#define IMX8MN_SYS_PLL2_100M			58
+#define IMX8MN_SYS_PLL2_125M			59
+#define IMX8MN_SYS_PLL2_166M			60
+#define IMX8MN_SYS_PLL2_200M			61
+#define IMX8MN_SYS_PLL2_250M			62
+#define IMX8MN_SYS_PLL2_333M			63
+#define IMX8MN_SYS_PLL2_500M			64
+#define IMX8MN_SYS_PLL2_1000M			65
+
+/* CORE CLOCK ROOT */
+#define IMX8MN_CLK_A53_SRC			66
+#define IMX8MN_CLK_GPU_CORE_SRC			67
+#define IMX8MN_CLK_GPU_SHADER_SRC		68
+#define IMX8MN_CLK_A53_CG			69
+#define IMX8MN_CLK_GPU_CORE_CG			70
+#define IMX8MN_CLK_GPU_SHADER_CG		71
+#define IMX8MN_CLK_A53_DIV			72
+#define IMX8MN_CLK_GPU_CORE_DIV			73
+#define IMX8MN_CLK_GPU_SHADER_DIV		74
+
+/* BUS CLOCK ROOT */
+#define IMX8MN_CLK_MAIN_AXI			75
+#define IMX8MN_CLK_ENET_AXI			76
+#define IMX8MN_CLK_NAND_USDHC_BUS		77
+#define IMX8MN_CLK_DISP_AXI			78
+#define IMX8MN_CLK_DISP_APB			79
+#define IMX8MN_CLK_USB_BUS			80
+#define IMX8MN_CLK_GPU_AXI			81
+#define IMX8MN_CLK_GPU_AHB			82
+#define IMX8MN_CLK_NOC				83
+#define IMX8MN_CLK_AHB				84
+#define IMX8MN_CLK_AUDIO_AHB			85
+
+/* IPG CLOCK ROOT */
+#define IMX8MN_CLK_IPG_ROOT			86
+#define IMX8MN_CLK_IPG_AUDIO_ROOT		87
+
+/* IP */
+#define IMX8MN_CLK_DRAM_CORE			88
+#define IMX8MN_CLK_DRAM_ALT			89
+#define IMX8MN_CLK_DRAM_APB			90
+#define IMX8MN_CLK_DRAM_ALT_ROOT		91
+#define IMX8MN_CLK_DISP_PIXEL			92
+#define IMX8MN_CLK_SAI2				93
+#define IMX8MN_CLK_SAI3				94
+#define IMX8MN_CLK_SAI5				95
+#define IMX8MN_CLK_SAI6				96
+#define IMX8MN_CLK_SPDIF1			97
+#define IMX8MN_CLK_ENET_REF			98
+#define IMX8MN_CLK_ENET_TIMER			99
+#define IMX8MN_CLK_ENET_PHY_REF			100
+#define IMX8MN_CLK_NAND				101
+#define IMX8MN_CLK_QSPI				102
+#define IMX8MN_CLK_USDHC1			103
+#define IMX8MN_CLK_USDHC2			104
+#define IMX8MN_CLK_I2C1				105
+#define IMX8MN_CLK_I2C2				106
+#define IMX8MN_CLK_I2C3				107
+#define IMX8MN_CLK_I2C4				118
+#define IMX8MN_CLK_UART1			119
+#define IMX8MN_CLK_UART2			110
+#define IMX8MN_CLK_UART3			111
+#define IMX8MN_CLK_UART4			112
+#define IMX8MN_CLK_USB_CORE_REF			113
+#define IMX8MN_CLK_USB_PHY_REF			114
+#define IMX8MN_CLK_ECSPI1			115
+#define IMX8MN_CLK_ECSPI2			116
+#define IMX8MN_CLK_PWM1				117
+#define IMX8MN_CLK_PWM2				118
+#define IMX8MN_CLK_PWM3				119
+#define IMX8MN_CLK_PWM4				120
+#define IMX8MN_CLK_WDOG				121
+#define IMX8MN_CLK_WRCLK			122
+#define IMX8MN_CLK_CLKO1			123
+#define IMX8MN_CLK_CLKO2			124
+#define IMX8MN_CLK_DSI_CORE			125
+#define IMX8MN_CLK_DSI_PHY_REF			126
+#define IMX8MN_CLK_DSI_DBI			127
+#define IMX8MN_CLK_USDHC3			128
+#define IMX8MN_CLK_CAMERA_PIXEL			129
+#define IMX8MN_CLK_CSI1_PHY_REF			130
+#define IMX8MN_CLK_CSI2_PHY_REF			131
+#define IMX8MN_CLK_CSI2_ESC			132
+#define IMX8MN_CLK_ECSPI3			133
+#define IMX8MN_CLK_PDM				134
+#define IMX8MN_CLK_SAI7				135
+
+#define IMX8MN_CLK_ECSPI1_ROOT			136
+#define IMX8MN_CLK_ECSPI2_ROOT			137
+#define IMX8MN_CLK_ECSPI3_ROOT			138
+#define IMX8MN_CLK_ENET1_ROOT			139
+#define IMX8MN_CLK_GPIO1_ROOT			140
+#define IMX8MN_CLK_GPIO2_ROOT			141
+#define IMX8MN_CLK_GPIO3_ROOT			142
+#define IMX8MN_CLK_GPIO4_ROOT			143
+#define IMX8MN_CLK_GPIO5_ROOT			144
+#define IMX8MN_CLK_I2C1_ROOT			145
+#define IMX8MN_CLK_I2C2_ROOT			146
+#define IMX8MN_CLK_I2C3_ROOT			147
+#define IMX8MN_CLK_I2C4_ROOT			148
+#define IMX8MN_CLK_MU_ROOT			149
+#define IMX8MN_CLK_OCOTP_ROOT			150
+#define IMX8MN_CLK_PWM1_ROOT			151
+#define IMX8MN_CLK_PWM2_ROOT			152
+#define IMX8MN_CLK_PWM3_ROOT			153
+#define IMX8MN_CLK_PWM4_ROOT			154
+#define IMX8MN_CLK_QSPI_ROOT			155
+#define IMX8MN_CLK_NAND_ROOT			156
+#define IMX8MN_CLK_SAI2_ROOT			157
+#define IMX8MN_CLK_SAI2_IPG			158
+#define IMX8MN_CLK_SAI3_ROOT			159
+#define IMX8MN_CLK_SAI3_IPG			160
+#define IMX8MN_CLK_SAI5_ROOT			161
+#define IMX8MN_CLK_SAI5_IPG			162
+#define IMX8MN_CLK_SAI6_ROOT			163
+#define IMX8MN_CLK_SAI6_IPG			164
+#define IMX8MN_CLK_SAI7_ROOT			165
+#define IMX8MN_CLK_SAI7_IPG			166
+#define IMX8MN_CLK_SDMA1_ROOT			167
+#define IMX8MN_CLK_SDMA2_ROOT			168
+#define IMX8MN_CLK_UART1_ROOT			169
+#define IMX8MN_CLK_UART2_ROOT			170
+#define IMX8MN_CLK_UART3_ROOT			171
+#define IMX8MN_CLK_UART4_ROOT			172
+#define IMX8MN_CLK_USB1_CTRL_ROOT		173
+#define IMX8MN_CLK_USDHC1_ROOT			174
+#define IMX8MN_CLK_USDHC2_ROOT			175
+#define IMX8MN_CLK_WDOG1_ROOT			176
+#define IMX8MN_CLK_WDOG2_ROOT			177
+#define IMX8MN_CLK_WDOG3_ROOT			178
+#define IMX8MN_CLK_GPU_BUS_ROOT			179
+#define IMX8MN_CLK_ASRC_ROOT			180
+#define IMX8MN_CLK_GPU3D_ROOT			181
+#define IMX8MN_CLK_PDM_ROOT			182
+#define IMX8MN_CLK_PDM_IPG			183
+#define IMX8MN_CLK_DISP_AXI_ROOT		184
+#define IMX8MN_CLK_DISP_APB_ROOT		185
+#define IMX8MN_CLK_DISP_PIXEL_ROOT		186
+#define IMX8MN_CLK_CAMERA_PIXEL_ROOT		187
+#define IMX8MN_CLK_USDHC3_ROOT			188
+#define IMX8MN_CLK_SDMA3_ROOT			189
+#define IMX8MN_CLK_TMU_ROOT			190
+#define IMX8MN_CLK_ARM				191
+#define IMX8MN_CLK_NAND_USDHC_BUS_RAWNAND_CLK	192
+#define IMX8MN_CLK_GPU_CORE_ROOT		193
+
+#define IMX8MN_CLK_END				194
+
+#endif
-- 
cgit v1.2.3


From 6ad7cb7122cee4d3e672e58bf542b070b9363c15 Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Tue, 2 Jul 2019 18:20:07 +0300
Subject: clk: imx8: Add DSP related clocks

i.MX8QXP contains Hifi4 DSP. There are four clocks
associated with DSP:
  * dsp_lpcg_core_clk
  * dsp_lpcg_ipg_clk
  * dsp_lpcg_adb_aclk
  * ocram_lpcg_ipg_clk

Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/clk/imx/clk-imx8qxp-lpcg.c     | 5 +++++
 include/dt-bindings/clock/imx8-clock.h | 6 +++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c
index fb6edf1b8aa2..c0aff7ca6374 100644
--- a/drivers/clk/imx/clk-imx8qxp-lpcg.c
+++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c
@@ -72,6 +72,11 @@ static const struct imx8qxp_lpcg_data imx8qxp_lpcg_adma[] = {
 	{ IMX_ADMA_LPCG_I2C2_CLK, "i2c2_lpcg_clk", "i2c2_clk", 0, ADMA_LPI2C_2_LPCG, 0, 0, },
 	{ IMX_ADMA_LPCG_I2C3_IPG_CLK, "i2c3_lpcg_ipg_clk", "dma_ipg_clk_root", 0, ADMA_LPI2C_3_LPCG, 16, 0, },
 	{ IMX_ADMA_LPCG_I2C3_CLK, "i2c3_lpcg_clk", "i2c3_clk", 0, ADMA_LPI2C_3_LPCG, 0, 0, },
+
+	{ IMX_ADMA_LPCG_DSP_CORE_CLK, "dsp_lpcg_core_clk", "dma_ipg_clk_root", 0, ADMA_HIFI_LPCG, 28, 0, },
+	{ IMX_ADMA_LPCG_DSP_IPG_CLK, "dsp_lpcg_ipg_clk", "dma_ipg_clk_root", 0, ADMA_HIFI_LPCG, 20, 0, },
+	{ IMX_ADMA_LPCG_DSP_ADB_CLK, "dsp_lpcg_adb_clk", "dma_ipg_clk_root", 0, ADMA_HIFI_LPCG, 16, 0, },
+	{ IMX_ADMA_LPCG_OCRAM_IPG_CLK, "ocram_lpcg_ipg_clk", "dma_ipg_clk_root", 0, ADMA_OCRAM_LPCG, 16, 0, },
 };
 
 static const struct imx8qxp_ss_lpcg imx8qxp_ss_adma = {
diff --git a/include/dt-bindings/clock/imx8-clock.h b/include/dt-bindings/clock/imx8-clock.h
index 4236818e3be5..673a8c662340 100644
--- a/include/dt-bindings/clock/imx8-clock.h
+++ b/include/dt-bindings/clock/imx8-clock.h
@@ -283,7 +283,11 @@
 #define IMX_ADMA_LPCG_PWM_IPG_CLK			38
 #define IMX_ADMA_LPCG_LCD_PIX_CLK			39
 #define IMX_ADMA_LPCG_LCD_APB_CLK			40
+#define IMX_ADMA_LPCG_DSP_ADB_CLK			41
+#define IMX_ADMA_LPCG_DSP_IPG_CLK			42
+#define IMX_ADMA_LPCG_DSP_CORE_CLK			43
+#define IMX_ADMA_LPCG_OCRAM_IPG_CLK			44
 
-#define IMX_ADMA_LPCG_CLK_END				41
+#define IMX_ADMA_LPCG_CLK_END				45
 
 #endif /* __DT_BINDINGS_CLOCK_IMX_H */
-- 
cgit v1.2.3


From ee38d94a0ad89890b770f6c876263cf9fcbfde84 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 2 Aug 2019 21:49:02 -0700
Subject: page flags: prioritize kasan bits over last-cpuid

ARM64 randdconfig builds regularly run into a build error, especially
when NUMA_BALANCING and SPARSEMEM are enabled but not SPARSEMEM_VMEMMAP:

  #error "KASAN: not enough bits in page flags for tag"

The last-cpuid bits are already contitional on the available space, so
the result of the calculation is a bit random on whether they were
already left out or not.

Adding the kasan tag bits before last-cpuid makes it much more likely to
end up with a successful build here, and should be reliable for
randconfig at least, as long as that does not randomize NR_CPUS or
NODES_SHIFT but uses the defaults.

In order for the modified check to not trigger in the x86 vdso32 code
where all constants are wrong (building with -m32), enclose all the
definitions with an #ifdef.

[arnd@arndb.de: build fix]
  Link: http://lkml.kernel.org/r/CAK8P3a3Mno1SWTcuAOT0Wa9VS15pdU6EfnkxLbDpyS55yO04+g@mail.gmail.com
Link: http://lkml.kernel.org/r/20190722115520.3743282-1-arnd@arndb.de
Link: https://lore.kernel.org/lkml/20190618095347.3850490-1-arnd@arndb.de/
Fixes: 2813b9c02962 ("kasan, mm, arm64: tag non slab memory allocated via pagealloc")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/vdso/vdso.h             |  1 +
 include/linux/page-flags-layout.h | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/mips/vdso/vdso.h b/arch/mips/vdso/vdso.h
index 14b1931be69c..b65b169778e3 100644
--- a/arch/mips/vdso/vdso.h
+++ b/arch/mips/vdso/vdso.h
@@ -9,6 +9,7 @@
 #if _MIPS_SIM != _MIPS_SIM_ABI64 && defined(CONFIG_64BIT)
 
 /* Building 32-bit VDSO for the 64-bit kernel. Fake a 32-bit Kconfig. */
+#define BUILD_VDSO32_64
 #undef CONFIG_64BIT
 #define CONFIG_32BIT 1
 #ifndef __ASSEMBLY__
diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
index 1dda31825ec4..71283739ffd2 100644
--- a/include/linux/page-flags-layout.h
+++ b/include/linux/page-flags-layout.h
@@ -32,6 +32,7 @@
 
 #endif /* CONFIG_SPARSEMEM */
 
+#ifndef BUILD_VDSO32_64
 /*
  * page->flags layout:
  *
@@ -76,20 +77,22 @@
 #define LAST_CPUPID_SHIFT 0
 #endif
 
-#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS
+#ifdef CONFIG_KASAN_SW_TAGS
+#define KASAN_TAG_WIDTH 8
+#else
+#define KASAN_TAG_WIDTH 0
+#endif
+
+#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \
+	<= BITS_PER_LONG - NR_PAGEFLAGS
 #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT
 #else
 #define LAST_CPUPID_WIDTH 0
 #endif
 
-#ifdef CONFIG_KASAN_SW_TAGS
-#define KASAN_TAG_WIDTH 8
 #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \
 	> BITS_PER_LONG - NR_PAGEFLAGS
-#error "KASAN: not enough bits in page flags for tag"
-#endif
-#else
-#define KASAN_TAG_WIDTH 0
+#error "Not enough bits in page flags"
 #endif
 
 /*
@@ -104,4 +107,5 @@
 #define LAST_CPUPID_NOT_IN_PAGE_FLAGS
 #endif
 
+#endif
 #endif /* _LINUX_PAGE_FLAGS_LAYOUT */
-- 
cgit v1.2.3


From cbedfe11347fe418621bd188d58a206beb676218 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Fri, 2 Aug 2019 21:49:19 -0700
Subject: asm-generic: fix -Wtype-limits compiler warnings

Commit d66acc39c7ce ("bitops: Optimise get_order()") introduced a
compilation warning because "rx_frag_size" is an "ushort" while
PAGE_SHIFT here is 16.

The commit changed the get_order() to be a multi-line macro where
compilers insist to check all statements in the macro even when
__builtin_constant_p(rx_frag_size) will return false as "rx_frag_size"
is a module parameter.

In file included from ./arch/powerpc/include/asm/page_64.h:107,
                 from ./arch/powerpc/include/asm/page.h:242,
                 from ./arch/powerpc/include/asm/mmu.h:132,
                 from ./arch/powerpc/include/asm/lppaca.h:47,
                 from ./arch/powerpc/include/asm/paca.h:17,
                 from ./arch/powerpc/include/asm/current.h:13,
                 from ./include/linux/thread_info.h:21,
                 from ./arch/powerpc/include/asm/processor.h:39,
                 from ./include/linux/prefetch.h:15,
                 from drivers/net/ethernet/emulex/benet/be_main.c:14:
drivers/net/ethernet/emulex/benet/be_main.c: In function 'be_rx_cqs_create':
./include/asm-generic/getorder.h:54:9: warning: comparison is always
true due to limited range of data type [-Wtype-limits]
   (((n) < (1UL << PAGE_SHIFT)) ? 0 :  \
         ^
drivers/net/ethernet/emulex/benet/be_main.c:3138:33: note: in expansion
of macro 'get_order'
  adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
                                 ^~~~~~~~~

Fix it by moving all of this multi-line macro into a proper function,
and killing __get_order() off.

[akpm@linux-foundation.org: remove __get_order() altogether]
[cai@lca.pw: v2]
  Link: http://lkml.kernel.org/r/1564000166-31428-1-git-send-email-cai@lca.pw
Link: http://lkml.kernel.org/r/1563914986-26502-1-git-send-email-cai@lca.pw
Fixes: d66acc39c7ce ("bitops: Optimise get_order()")
Signed-off-by: Qian Cai <cai@lca.pw>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Bill Wendling <morbo@google.com>
Cc: James Y Knight <jyknight@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/getorder.h | 50 +++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/getorder.h b/include/asm-generic/getorder.h
index c64bea7a52be..e9f20b813a69 100644
--- a/include/asm-generic/getorder.h
+++ b/include/asm-generic/getorder.h
@@ -7,24 +7,6 @@
 #include <linux/compiler.h>
 #include <linux/log2.h>
 
-/*
- * Runtime evaluation of get_order()
- */
-static inline __attribute_const__
-int __get_order(unsigned long size)
-{
-	int order;
-
-	size--;
-	size >>= PAGE_SHIFT;
-#if BITS_PER_LONG == 32
-	order = fls(size);
-#else
-	order = fls64(size);
-#endif
-	return order;
-}
-
 /**
  * get_order - Determine the allocation order of a memory size
  * @size: The size for which to get the order
@@ -43,19 +25,27 @@ int __get_order(unsigned long size)
  * to hold an object of the specified size.
  *
  * The result is undefined if the size is 0.
- *
- * This function may be used to initialise variables with compile time
- * evaluations of constants.
  */
-#define get_order(n)						\
-(								\
-	__builtin_constant_p(n) ? (				\
-		((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT :	\
-		(((n) < (1UL << PAGE_SHIFT)) ? 0 :		\
-		 ilog2((n) - 1) - PAGE_SHIFT + 1)		\
-	) :							\
-	__get_order(n)						\
-)
+static inline __attribute_const__ int get_order(unsigned long size)
+{
+	if (__builtin_constant_p(size)) {
+		if (!size)
+			return BITS_PER_LONG - PAGE_SHIFT;
+
+		if (size < (1UL << PAGE_SHIFT))
+			return 0;
+
+		return ilog2((size) - 1) - PAGE_SHIFT + 1;
+	}
+
+	size--;
+	size >>= PAGE_SHIFT;
+#if BITS_PER_LONG == 32
+	return fls(size);
+#else
+	return fls64(size);
+#endif
+}
 
 #endif	/* __ASSEMBLY__ */
 
-- 
cgit v1.2.3


From 8c0bb7873815bf8c3c4dfb24e8ebf4fefb4c35d2 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 10 Jul 2019 12:05:59 +0200
Subject: netfilter: synproxy: rename mss synproxy_options field

After introduce "mss_encode" field in the synproxy_options struct the field
"mss" is a little confusing. It has been renamed to "mss_option".

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h | 2 +-
 net/ipv4/netfilter/ipt_SYNPROXY.c             | 4 ++--
 net/ipv6/netfilter/ip6t_SYNPROXY.c            | 4 ++--
 net/netfilter/nf_synproxy_core.c              | 8 ++++----
 net/netfilter/nft_synproxy.c                  | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 44513b93bd55..2f0171d24997 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -67,7 +67,7 @@ static inline struct synproxy_net *synproxy_pernet(struct net *net)
 struct synproxy_options {
 	u8				options;
 	u8				wscale;
-	u16				mss;
+	u16				mss_option;
 	u16				mss_encode;
 	u32				tsval;
 	u32				tsecr;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 0e70f3f65f6f..748dc3ce58d3 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
-		opts.mss_encode = opts.mss;
-		opts.mss = info->mss;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 5cdb4a69d277..fd1f52a21bf1 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 			opts.options |= XT_SYNPROXY_OPT_ECN;
 
 		opts.options &= info->options;
-		opts.mss_encode = opts.mss;
-		opts.mss = info->mss;
+		opts.mss_encode = opts.mss_option;
+		opts.mss_option = info->mss;
 		if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
 			synproxy_init_timestamp_cookie(info, &opts);
 		else
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index c769462a839e..b0930d4aba22 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
 			switch (opcode) {
 			case TCPOPT_MSS:
 				if (opsize == TCPOLEN_MSS) {
-					opts->mss = get_unaligned_be16(ptr);
+					opts->mss_option = get_unaligned_be16(ptr);
 					opts->options |= NF_SYNPROXY_OPT_MSS;
 				}
 				break;
@@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts)
 	if (options & NF_SYNPROXY_OPT_MSS)
 		*ptr++ = htonl((TCPOPT_MSS << 24) |
 			       (TCPOLEN_MSS << 16) |
-			       opts->mss);
+			       opts->mss_option);
 
 	if (options & NF_SYNPROXY_OPT_TIMESTAMP) {
 		if (options & NF_SYNPROXY_OPT_SACK_PERM)
@@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net,
 	}
 
 	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
+	opts->mss_option = mss;
 	opts->options |= NF_SYNPROXY_OPT_MSS;
 
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
@@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
 	}
 
 	this_cpu_inc(snet->stats->cookie_valid);
-	opts->mss = mss;
+	opts->mss_option = mss;
 	opts->options |= NF_SYNPROXY_OPT_MSS;
 
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 928e661d1517..db4c23f5dfcb 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -31,8 +31,8 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 		opts->options |= NF_SYNPROXY_OPT_ECN;
 
 	opts->options &= priv->info.options;
-	opts->mss_encode = opts->mss;
-	opts->mss = info->mss;
+	opts->mss_encode = opts->mss_option;
+	opts->mss_option = info->mss;
 	if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
 		synproxy_init_timestamp_cookie(info, opts);
 	else
-- 
cgit v1.2.3


From 5e5412c365a32e452daa762eac36121cb8a370bb Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 30 Jul 2019 11:30:33 -0400
Subject: net/socket: fix GCC8+ Wpacked-not-aligned warnings

There are a lot of those warnings with GCC8+ 64-bit,

In file included from ./include/linux/sctp.h:42,
                 from net/core/skbuff.c:47:
./include/uapi/linux/sctp.h:395:1: warning: alignment 4 of 'struct
sctp_paddr_change' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:728:1: warning: alignment 4 of 'struct
sctp_setpeerprim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:727:26: warning: 'sspp_addr' offset 4 in
'struct sctp_setpeerprim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage sspp_addr;
                          ^~~~~~~~~
./include/uapi/linux/sctp.h:741:1: warning: alignment 4 of 'struct
sctp_prim' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:740:26: warning: 'ssp_addr' offset 4 in
'struct sctp_prim' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage ssp_addr;
                          ^~~~~~~~
./include/uapi/linux/sctp.h:792:1: warning: alignment 4 of 'struct
sctp_paddrparams' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:784:26: warning: 'spp_address' offset 4 in
'struct sctp_paddrparams' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spp_address;
                          ^~~~~~~~~~~
./include/uapi/linux/sctp.h:905:1: warning: alignment 4 of 'struct
sctp_paddrinfo' is less than 8 [-Wpacked-not-aligned]
 } __attribute__((packed, aligned(4)));
 ^
./include/uapi/linux/sctp.h:899:26: warning: 'spinfo_address' offset 4
in 'struct sctp_paddrinfo' isn't aligned to 8 [-Wpacked-not-aligned]
  struct sockaddr_storage spinfo_address;
                          ^~~~~~~~~~~~~~

This is because the commit 20c9c825b12f ("[SCTP] Fix SCTP socket options
to work with 32-bit apps on 64-bit kernels.") added "packed, aligned(4)"
GCC attributes to some structures but one of the members, i.e, "struct
sockaddr_storage" in those structures has the attribute,
"aligned(__alignof__ (struct sockaddr *)" which is 8-byte on 64-bit
systems, so the commit overwrites the designed alignments for
"sockaddr_storage".

To fix this, "struct sockaddr_storage" needs to be aligned to 4-byte as
it is only used in those packed sctp structure which is part of UAPI,
and "struct __kernel_sockaddr_storage" is used in some other
places of UAPI that need not to change alignments in order to not
breaking userspace.

Use an implicit alignment for "struct __kernel_sockaddr_storage" so it
can keep the same alignments as a member in both packed and un-packed
structures without breaking UAPI.

Suggested-by: David Laight <David.Laight@ACULAB.COM>
Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/socket.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h
index 8eb96021709c..c3409c8ec0dd 100644
--- a/include/uapi/linux/socket.h
+++ b/include/uapi/linux/socket.h
@@ -6,17 +6,24 @@
  * Desired design of maximum size and alignment (see RFC2553)
  */
 #define _K_SS_MAXSIZE	128	/* Implementation specific max size */
-#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
-				/* Implementation specific desired alignment */
 
 typedef unsigned short __kernel_sa_family_t;
 
+/*
+ * The definition uses anonymous union and struct in order to control the
+ * default alignment.
+ */
 struct __kernel_sockaddr_storage {
-	__kernel_sa_family_t	ss_family;		/* address family */
-	/* Following field(s) are implementation specific */
-	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
+	union {
+		struct {
+			__kernel_sa_family_t	ss_family; /* address family */
+			/* Following field(s) are implementation specific */
+			char __data[_K_SS_MAXSIZE - sizeof(unsigned short)];
 				/* space to achieve desired size, */
 				/* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
+		};
+		void *__align; /* implementation specific desired alignment */
+	};
+};
 
 #endif /* _UAPI_LINUX_SOCKET_H */
-- 
cgit v1.2.3


From ea77388b02270b0af8dc57f668f311235ea068f0 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markz@mellanox.com>
Date: Wed, 31 Jul 2019 14:40:13 +0300
Subject: net/mlx5: Fix mlx5_ifc_query_lag_out_bits

Remove the "reserved_at_40" field to match the device specification.

Fixes: 84df61ebc69b ("net/mlx5: Add HW interfaces used by LAG")
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 30d15e80bcc7..1f9d4a8e6227 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9592,8 +9592,6 @@ struct mlx5_ifc_query_lag_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
-
 	struct mlx5_ifc_lagc_bits ctx;
 };
 
-- 
cgit v1.2.3


From aa306ab703e9452b1e25cc8e8f04b8df523d0bb8 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 24 Jul 2019 11:48:39 +0800
Subject: blk-mq: introduce blk_mq_request_completed()

NVMe needs this function to decide if one request to be aborted has
been completed in normal IO path already.

So introduce it.

Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 6 ++++++
 include/linux/blk-mq.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f78d3287dd82..8bb5854a62f3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -665,6 +665,12 @@ int blk_mq_request_started(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_request_started);
 
+int blk_mq_request_completed(struct request *rq)
+{
+	return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
+}
+EXPORT_SYMBOL_GPL(blk_mq_request_completed);
+
 void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3fa1fa59f9b2..baac2926e54a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -296,6 +296,7 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
 
 
 int blk_mq_request_started(struct request *rq);
+int blk_mq_request_completed(struct request *rq);
 void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, blk_status_t error);
 void __blk_mq_end_request(struct request *rq, blk_status_t error);
-- 
cgit v1.2.3


From f9934a80f91dba8c7029ba7601459e41ea7770aa Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 24 Jul 2019 11:48:40 +0800
Subject: blk-mq: introduce blk_mq_tagset_wait_completed_request()

blk-mq may schedule to call queue's complete function on remote CPU via
IPI, but doesn't provide any way to synchronize the request's complete
fn. The current queue freeze interface can't provide the synchonization
because aborted requests stay at blk-mq queues during EH.

In some driver's EH(such as NVMe), hardware queue's resource may be freed &
re-allocated. If the completed request's complete fn is run finally after the
hardware queue's resource is released, kernel crash will be triggered.

Prepare for fixing this kind of issue by introducing
blk_mq_tagset_wait_completed_request().

Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c     | 32 ++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index da19f0bc8876..008388e82b5c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 
 #include <linux/blk-mq.h>
+#include <linux/delay.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
@@ -354,6 +355,37 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 }
 EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
 
+static bool blk_mq_tagset_count_completed_rqs(struct request *rq,
+		void *data, bool reserved)
+{
+	unsigned *count = data;
+
+	if (blk_mq_request_completed(rq))
+		(*count)++;
+	return true;
+}
+
+/**
+ * blk_mq_tagset_wait_completed_request - wait until all completed req's
+ * complete funtion is run
+ * @tagset:	Tag set to drain completed request
+ *
+ * Note: This function has to be run after all IO queues are shutdown
+ */
+void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset)
+{
+	while (true) {
+		unsigned count = 0;
+
+		blk_mq_tagset_busy_iter(tagset,
+				blk_mq_tagset_count_completed_rqs, &count);
+		if (!count)
+			break;
+		msleep(5);
+	}
+}
+EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
+
 /**
  * blk_mq_queue_tag_busy_iter - iterate over all requests with a driver tag
  * @q:		Request queue to examine.
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index baac2926e54a..ee0719b649b6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -322,6 +322,7 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
 		busy_tag_iter_fn *fn, void *priv);
+void blk_mq_tagset_wait_completed_request(struct blk_mq_tag_set *tagset);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_unfreeze_queue(struct request_queue *q);
 void blk_freeze_queue_start(struct request_queue *q);
-- 
cgit v1.2.3


From a87ccce0b5a06ee546931859fa62e10f1bce54f9 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 24 Jul 2019 11:48:43 +0800
Subject: blk-mq: remove blk_mq_complete_request_sync

blk_mq_tagset_wait_completed_request() has been applied for waiting
for completed request's fn, so not necessary to use
blk_mq_complete_request_sync() any more.

Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c           | 7 -------
 drivers/nvme/host/core.c | 2 +-
 include/linux/blk-mq.h   | 1 -
 3 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8bb5854a62f3..6968de9d7402 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -652,13 +652,6 @@ bool blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-void blk_mq_complete_request_sync(struct request *rq)
-{
-	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
-	rq->q->mq_ops->complete(rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
-
 int blk_mq_request_started(struct request *rq)
 {
 	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bcede8c879d1..4ba374633dc8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -293,7 +293,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
 		return true;
 
 	nvme_req(req)->status = NVME_SC_ABORT_REQ;
-	blk_mq_complete_request_sync(req);
+	blk_mq_complete_request(req);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nvme_cancel_request);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ee0719b649b6..1cdd2788cfa6 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -305,7 +305,6 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
-void blk_mq_complete_request_sync(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio, unsigned int nr_segs);
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
cgit v1.2.3


From af2c68fe94e8c0a628519b60ba070c5cf6526a99 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 1 Aug 2019 15:50:40 -0700
Subject: block: Declare several function pointer arguments 'const'

Make it clear to the compiler and also to humans that the functions
that query request queue properties do not modify any member of the
request_queue data structure.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      |  7 ++++---
 include/linux/blkdev.h | 32 ++++++++++++++++----------------
 2 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 57f7990b342d..8344d94f13e0 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -144,7 +144,7 @@ static inline unsigned get_max_io_size(struct request_queue *q,
 	return sectors;
 }
 
-static unsigned get_max_segment_size(struct request_queue *q,
+static unsigned get_max_segment_size(const struct request_queue *q,
 				     unsigned offset)
 {
 	unsigned long mask = queue_segment_boundary(q);
@@ -161,8 +161,9 @@ static unsigned get_max_segment_size(struct request_queue *q,
  * Split the bvec @bv into segments, and update all kinds of
  * variables.
  */
-static bool bvec_split_segs(struct request_queue *q, struct bio_vec *bv,
-		unsigned *nsegs, unsigned *sectors, unsigned max_segs)
+static bool bvec_split_segs(const struct request_queue *q,
+			    const struct bio_vec *bv, unsigned *nsegs,
+			    unsigned *sectors, unsigned max_segs)
 {
 	unsigned len = bv->bv_len;
 	unsigned total_len = 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ef375dafb1c..96a29a72fd4a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1232,42 +1232,42 @@ enum blk_default_limits {
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
 
-static inline unsigned long queue_segment_boundary(struct request_queue *q)
+static inline unsigned long queue_segment_boundary(const struct request_queue *q)
 {
 	return q->limits.seg_boundary_mask;
 }
 
-static inline unsigned long queue_virt_boundary(struct request_queue *q)
+static inline unsigned long queue_virt_boundary(const struct request_queue *q)
 {
 	return q->limits.virt_boundary_mask;
 }
 
-static inline unsigned int queue_max_sectors(struct request_queue *q)
+static inline unsigned int queue_max_sectors(const struct request_queue *q)
 {
 	return q->limits.max_sectors;
 }
 
-static inline unsigned int queue_max_hw_sectors(struct request_queue *q)
+static inline unsigned int queue_max_hw_sectors(const struct request_queue *q)
 {
 	return q->limits.max_hw_sectors;
 }
 
-static inline unsigned short queue_max_segments(struct request_queue *q)
+static inline unsigned short queue_max_segments(const struct request_queue *q)
 {
 	return q->limits.max_segments;
 }
 
-static inline unsigned short queue_max_discard_segments(struct request_queue *q)
+static inline unsigned short queue_max_discard_segments(const struct request_queue *q)
 {
 	return q->limits.max_discard_segments;
 }
 
-static inline unsigned int queue_max_segment_size(struct request_queue *q)
+static inline unsigned int queue_max_segment_size(const struct request_queue *q)
 {
 	return q->limits.max_segment_size;
 }
 
-static inline unsigned short queue_logical_block_size(struct request_queue *q)
+static inline unsigned short queue_logical_block_size(const struct request_queue *q)
 {
 	int retval = 512;
 
@@ -1282,7 +1282,7 @@ static inline unsigned short bdev_logical_block_size(struct block_device *bdev)
 	return queue_logical_block_size(bdev_get_queue(bdev));
 }
 
-static inline unsigned int queue_physical_block_size(struct request_queue *q)
+static inline unsigned int queue_physical_block_size(const struct request_queue *q)
 {
 	return q->limits.physical_block_size;
 }
@@ -1292,7 +1292,7 @@ static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
 	return queue_physical_block_size(bdev_get_queue(bdev));
 }
 
-static inline unsigned int queue_io_min(struct request_queue *q)
+static inline unsigned int queue_io_min(const struct request_queue *q)
 {
 	return q->limits.io_min;
 }
@@ -1302,7 +1302,7 @@ static inline int bdev_io_min(struct block_device *bdev)
 	return queue_io_min(bdev_get_queue(bdev));
 }
 
-static inline unsigned int queue_io_opt(struct request_queue *q)
+static inline unsigned int queue_io_opt(const struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
@@ -1312,7 +1312,7 @@ static inline int bdev_io_opt(struct block_device *bdev)
 	return queue_io_opt(bdev_get_queue(bdev));
 }
 
-static inline int queue_alignment_offset(struct request_queue *q)
+static inline int queue_alignment_offset(const struct request_queue *q)
 {
 	if (q->limits.misaligned)
 		return -1;
@@ -1342,7 +1342,7 @@ static inline int bdev_alignment_offset(struct block_device *bdev)
 	return q->limits.alignment_offset;
 }
 
-static inline int queue_discard_alignment(struct request_queue *q)
+static inline int queue_discard_alignment(const struct request_queue *q)
 {
 	if (q->limits.discard_misaligned)
 		return -1;
@@ -1432,7 +1432,7 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
 	return 0;
 }
 
-static inline int queue_dma_alignment(struct request_queue *q)
+static inline int queue_dma_alignment(const struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
 }
@@ -1543,7 +1543,7 @@ static inline void blk_queue_max_integrity_segments(struct request_queue *q,
 }
 
 static inline unsigned short
-queue_max_integrity_segments(struct request_queue *q)
+queue_max_integrity_segments(const struct request_queue *q)
 {
 	return q->limits.max_integrity_segments;
 }
@@ -1626,7 +1626,7 @@ static inline void blk_queue_max_integrity_segments(struct request_queue *q,
 						    unsigned int segs)
 {
 }
-static inline unsigned short queue_max_integrity_segments(struct request_queue *q)
+static inline unsigned short queue_max_integrity_segments(const struct request_queue *q)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 012d4a652ca172d93315cb69f2adf7df37ea77e6 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 1 Aug 2019 15:39:07 -0700
Subject: block: Fix spelling in the header above blkg_lookup()

See also commit 8f4236d9008b ("block: remove QUEUE_FLAG_BYPASS and ->bypass") # v5.0.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 12811091fd50..0bb79d858a13 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -375,7 +375,7 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
  * @q: request_queue of interest
  *
  * Lookup blkg for the @blkcg - @q pair.  This function should be called
- * under RCU read loc.
+ * under RCU read lock.
  */
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
 					   struct request_queue *q)
-- 
cgit v1.2.3


From e84e8f0663956f45c747df5629046794cff93893 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Thu, 1 Aug 2019 10:26:35 -0700
Subject: block: add req op to reset all zones and flag

This patch introduces a new request operation REQ_OP_ZONE_RESET_ALL.
This is useful for the applications like mkfs where it needs to reset
all the zones present on the underlying block device. As part for this
patch we also introduce new QUEUE_FLAG_ZONE_RESETALL which indicates the
queue zone reset all capability and corresponding helper macro.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 2 ++
 include/linux/blkdev.h    | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1b1fa1557e68..d6ce7b3ec8b1 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -282,6 +282,8 @@ enum req_opf {
 	REQ_OP_ZONE_RESET	= 6,
 	/* write the same sector many times */
 	REQ_OP_WRITE_SAME	= 7,
+	/* reset all the zone present on the device */
+	REQ_OP_ZONE_RESET_ALL	= 8,
 	/* write the zero filled sector many times */
 	REQ_OP_WRITE_ZEROES	= 9,
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 96a29a72fd4a..167bf879f072 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -611,6 +611,7 @@ struct request_queue {
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 23	/* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
+#define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP))
@@ -630,6 +631,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
 #define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
+#define blk_queue_zone_resetall(q)	\
+	test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-- 
cgit v1.2.3


From 226b4fc75c78f9c497c5182d939101b260cfb9f3 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 25 Jul 2019 10:04:59 +0800
Subject: blk-mq: add callback of .cleanup_rq

SCSI maintains its own driver private data hooked off of each SCSI
request, and the pridate data won't be freed after scsi_queue_rq()
returns BLK_STS_RESOURCE or BLK_STS_DEV_RESOURCE. An upper layer driver
(e.g. dm-rq) may need to retry these SCSI requests, before SCSI has
fully dispatched them, due to a lower level SCSI driver's resource
limitation identified in scsi_queue_rq(). Currently SCSI's per-request
private data is leaked when the upper layer driver (dm-rq) frees and
then retries these requests in response to BLK_STS_RESOURCE or
BLK_STS_DEV_RESOURCE returns from scsi_queue_rq().

This usecase is so specialized that it doesn't warrant training an
existing blk-mq interface (e.g. blk_mq_free_request) to allow SCSI to
account for freeing its driver private data -- doing so would add an
extra branch for handling a special case that all other consumers of
SCSI (and blk-mq) won't ever need to worry about.

So the most pragmatic way forward is to delegate freeing SCSI driver
private data to the upper layer driver (dm-rq).  Do so by adding
new .cleanup_rq callback and calling a new blk_mq_cleanup_rq() method
from dm-rq.  A following commit will implement the .cleanup_rq() hook
in scsi_mq_ops.

Cc: Ewan D. Milne <emilne@redhat.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: dm-devel@redhat.com
Cc: <stable@vger.kernel.org>
Fixes: 396eaf21ee17 ("blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-rq.c     |  1 +
 include/linux/blk-mq.h | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index c9e44ac1f9a6..21d5c1784d0c 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -408,6 +408,7 @@ static int map_request(struct dm_rq_target_io *tio)
 		ret = dm_dispatch_clone_request(clone, rq);
 		if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
 			blk_rq_unprep_clone(clone);
+			blk_mq_cleanup_rq(clone);
 			tio->ti->type->release_clone_rq(clone, &tio->info);
 			tio->clone = NULL;
 			return DM_MAPIO_REQUEUE;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1cdd2788cfa6..21cebe901ac0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,6 +140,7 @@ typedef int (poll_fn)(struct blk_mq_hw_ctx *);
 typedef int (map_queues_fn)(struct blk_mq_tag_set *set);
 typedef bool (busy_fn)(struct request_queue *);
 typedef void (complete_fn)(struct request *);
+typedef void (cleanup_rq_fn)(struct request *);
 
 
 struct blk_mq_ops {
@@ -200,6 +201,12 @@ struct blk_mq_ops {
 	/* Called from inside blk_get_request() */
 	void (*initialize_rq_fn)(struct request *rq);
 
+	/*
+	 * Called before freeing one request which isn't completed yet,
+	 * and usually for freeing the driver private data
+	 */
+	cleanup_rq_fn		*cleanup_rq;
+
 	/*
 	 * If set, returns whether or not this queue currently is busy
 	 */
@@ -367,4 +374,10 @@ static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
 			BLK_QC_T_INTERNAL;
 }
 
+static inline void blk_mq_cleanup_rq(struct request *rq)
+{
+	if (rq->q->mq_ops->cleanup_rq)
+		rq->q->mq_ops->cleanup_rq(rq);
+}
+
 #endif
-- 
cgit v1.2.3


From 0d1e16c640ee67dcae65ae8e3c6f58e6d8101047 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 16 Jul 2019 11:43:20 +0200
Subject: PM / Domains: Align in-parameter names for some genpd functions

Align in-parameter names for the declarations of pm_genpd_add|
remove_subdomain() and of_genpd_add_subdomain() according to their
implementations, as to improve consistency.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 91d9bf497071..baf02ff91a31 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -197,9 +197,9 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
 int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev);
 int pm_genpd_remove_device(struct device *dev);
 int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
-			   struct generic_pm_domain *new_subdomain);
+			   struct generic_pm_domain *subdomain);
 int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
-			      struct generic_pm_domain *target);
+			      struct generic_pm_domain *subdomain);
 int pm_genpd_init(struct generic_pm_domain *genpd,
 		  struct dev_power_governor *gov, bool is_off);
 int pm_genpd_remove(struct generic_pm_domain *genpd);
@@ -226,12 +226,12 @@ static inline int pm_genpd_remove_device(struct device *dev)
 	return -ENOSYS;
 }
 static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
-					 struct generic_pm_domain *new_sd)
+					 struct generic_pm_domain *subdomain)
 {
 	return -ENOSYS;
 }
 static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
-					    struct generic_pm_domain *target)
+					    struct generic_pm_domain *subdomain)
 {
 	return -ENOSYS;
 }
@@ -282,8 +282,8 @@ int of_genpd_add_provider_onecell(struct device_node *np,
 				  struct genpd_onecell_data *data);
 void of_genpd_del_provider(struct device_node *np);
 int of_genpd_add_device(struct of_phandle_args *args, struct device *dev);
-int of_genpd_add_subdomain(struct of_phandle_args *parent,
-			   struct of_phandle_args *new_subdomain);
+int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
+			   struct of_phandle_args *subdomain_spec);
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
 int of_genpd_parse_idle_states(struct device_node *dn,
 			       struct genpd_power_state **states, int *n);
@@ -316,8 +316,8 @@ static inline int of_genpd_add_device(struct of_phandle_args *args,
 	return -ENODEV;
 }
 
-static inline int of_genpd_add_subdomain(struct of_phandle_args *parent,
-					 struct of_phandle_args *new_subdomain)
+static inline int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
+					 struct of_phandle_args *subdomain_spec)
 {
 	return -ENODEV;
 }
-- 
cgit v1.2.3


From 3d6ade0abfe1ac833b525349ade156a269979cf0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sat, 20 Jul 2019 20:58:58 +0900
Subject: pinctrl: pinctrl-single: add header include guard

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://lore.kernel.org/r/20190720115858.7015-1-yamada.masahiro@socionext.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/pinctrl-single.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/pinctrl-single.h b/include/linux/platform_data/pinctrl-single.h
index 1cf36fdf9510..7473d3c4cabf 100644
--- a/include/linux/platform_data/pinctrl-single.h
+++ b/include/linux/platform_data/pinctrl-single.h
@@ -1,4 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PINCTRL_SINGLE_H
+#define _PINCTRL_SINGLE_H
+
 /**
  * irq:		optional wake-up interrupt
  * rearm:	optional soc specific rearm function
@@ -11,3 +15,5 @@ struct pcs_pdata {
 	int irq;
 	void (*rearm)(void);
 };
+
+#endif /* _PINCTRL_SINGLE_H */
-- 
cgit v1.2.3


From 17e433b54393a6269acbcb792da97791fe1592d8 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Mon, 5 Aug 2019 10:03:19 +0800
Subject: KVM: Fix leak vCPU's VMCS value into other pCPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit d73eb57b80b (KVM: Boost vCPUs that are delivering interrupts), a
five years old bug is exposed. Running ebizzy benchmark in three 80 vCPUs VMs
on one 80 pCPUs Skylake server, a lot of rcu_sched stall warning splatting
in the VMs after stress testing:

 INFO: rcu_sched detected stalls on CPUs/tasks: { 4 41 57 62 77} (detected by 15, t=60004 jiffies, g=899, c=898, q=15073)
 Call Trace:
   flush_tlb_mm_range+0x68/0x140
   tlb_flush_mmu.part.75+0x37/0xe0
   tlb_finish_mmu+0x55/0x60
   zap_page_range+0x142/0x190
   SyS_madvise+0x3cd/0x9c0
   system_call_fastpath+0x1c/0x21

swait_active() sustains to be true before finish_swait() is called in
kvm_vcpu_block(), voluntarily preempted vCPUs are taken into account
by kvm_vcpu_on_spin() loop greatly increases the probability condition
kvm_arch_vcpu_runnable(vcpu) is checked and can be true, when APICv
is enabled the yield-candidate vCPU's VMCS RVI field leaks(by
vmx_sync_pir_to_irr()) into spinning-on-a-taken-lock vCPU's current
VMCS.

This patch fixes it by checking conservatively a subset of events.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Marc Zyngier <Marc.Zyngier@arm.com>
Cc: stable@vger.kernel.org
Fixes: 98f4a1467 (KVM: add kvm_arch_vcpu_runnable() test to kvm_vcpu_on_spin() loop)
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/powerpc/kvm/powerpc.c      |  5 +++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              |  6 ++++++
 arch/x86/kvm/vmx/vmx.c          |  6 ++++++
 arch/x86/kvm/x86.c              | 16 ++++++++++++++++
 include/linux/kvm_host.h        |  1 +
 virt/kvm/kvm_main.c             | 25 ++++++++++++++++++++++++-
 7 files changed, 59 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0dba7eb24f92..3e34d5fa6708 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -50,6 +50,11 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
 }
 
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+	return kvm_arch_vcpu_runnable(vcpu);
+}
+
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 {
 	return false;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e74f0711eaaf..fc046ca89d32 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1175,6 +1175,7 @@ struct kvm_x86_ops {
 	int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq,
 			      uint32_t guest_irq, bool set);
 	void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
+	bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
 
 	int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
 			    bool *expired);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7eafc6907861..d685491fce4d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5190,6 +5190,11 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
 		kvm_vcpu_wake_up(vcpu);
 }
 
+static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+	return false;
+}
+
 static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
 {
 	unsigned long flags;
@@ -7314,6 +7319,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 
 	.pmu_ops = &amd_pmu_ops,
 	.deliver_posted_interrupt = svm_deliver_avic_intr,
+	.dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt,
 	.update_pi_irte = svm_update_pi_irte,
 	.setup_mce = svm_setup_mce,
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 074385c86c09..42ed3faa6af8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6117,6 +6117,11 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	return max_irr;
 }
 
+static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
+{
+	return pi_test_on(vcpu_to_pi_desc(vcpu));
+}
+
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 	if (!kvm_vcpu_apicv_active(vcpu))
@@ -7726,6 +7731,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
 	.sync_pir_to_irr = vmx_sync_pir_to_irr,
 	.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
+	.dy_apicv_has_pending_interrupt = vmx_dy_apicv_has_pending_interrupt,
 
 	.set_tss_addr = vmx_set_tss_addr,
 	.set_identity_map_addr = vmx_set_identity_map_addr,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c6d951cbd76c..93b0bd45ac73 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9698,6 +9698,22 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
 }
 
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+	if (READ_ONCE(vcpu->arch.pv.pv_unhalted))
+		return true;
+
+	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+		kvm_test_request(KVM_REQ_SMI, vcpu) ||
+		 kvm_test_request(KVM_REQ_EVENT, vcpu))
+		return true;
+
+	if (vcpu->arch.apicv_active && kvm_x86_ops->dy_apicv_has_pending_interrupt(vcpu))
+		return true;
+
+	return false;
+}
+
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.preempted_in_kernel;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5c5b5867024c..9e4c2bb90297 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -872,6 +872,7 @@ int kvm_arch_check_processor_compat(void);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 /*
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ed061d8a457c..1f05aeb9da27 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2477,6 +2477,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
 #endif
 }
 
+/*
+ * Unlike kvm_arch_vcpu_runnable, this function is called outside
+ * a vcpu_load/vcpu_put pair.  However, for most architectures
+ * kvm_arch_vcpu_runnable does not require vcpu_load.
+ */
+bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+	return kvm_arch_vcpu_runnable(vcpu);
+}
+
+static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
+{
+	if (kvm_arch_dy_runnable(vcpu))
+		return true;
+
+#ifdef CONFIG_KVM_ASYNC_PF
+	if (!list_empty_careful(&vcpu->async_pf.done))
+		return true;
+#endif
+
+	return false;
+}
+
 void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 {
 	struct kvm *kvm = me->kvm;
@@ -2506,7 +2529,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
 				!kvm_arch_vcpu_in_kernel(vcpu))
-- 
cgit v1.2.3


From 741cbbae0768b828be2d48331eb371a4f08bbea8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sat, 3 Aug 2019 08:14:25 +0200
Subject: KVM: remove kvm_arch_has_vcpu_debugfs()

There is no need for this function as all arches have to implement
kvm_arch_create_vcpu_debugfs() no matter what.  A #define symbol
let us actually simplify the code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/mips/kvm/mips.c            | 10 ----------
 arch/powerpc/kvm/powerpc.c      | 10 ----------
 arch/s390/kvm/kvm-s390.c        | 10 ----------
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/debugfs.c          |  5 -----
 include/linux/kvm_host.h        |  3 ++-
 virt/kvm/arm/arm.c              |  5 -----
 virt/kvm/kvm_main.c             |  5 ++---
 8 files changed, 6 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 2cfe839f0b3a..1109924560d8 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -150,16 +150,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	return 0;
 }
 
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return false;
-}
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
 void kvm_mips_free_vcpus(struct kvm *kvm)
 {
 	unsigned int i;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3e34d5fa6708..3e566c2e6066 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -457,16 +457,6 @@ err_out:
 	return -EINVAL;
 }
 
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return false;
-}
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	unsigned int i;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f520cd837fb..f329dcb3f44c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2516,16 +2516,6 @@ out_err:
 	return rc;
 }
 
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return false;
-}
-
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc046ca89d32..e92725b2a46f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -35,6 +35,8 @@
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/hyperv-tlfs.h>
 
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
+
 #define KVM_MAX_VCPUS 288
 #define KVM_SOFT_MAX_VCPUS 240
 #define KVM_MAX_VCPU_ID 1023
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 329361b69d5e..9bd93e0d5f63 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -8,11 +8,6 @@
 #include <linux/debugfs.h>
 #include "lapic.h"
 
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return true;
-}
-
 static int vcpu_get_timer_advance_ns(void *data, u64 *val)
 {
 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9e4c2bb90297..8d34db3c8bc6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -861,8 +861,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-bool kvm_arch_has_vcpu_debugfs(void);
+#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
+#endif
 
 int kvm_arch_hardware_enable(void);
 void kvm_arch_hardware_disable(void);
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index acc43242a310..13f5a1aa6d79 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -144,11 +144,6 @@ out_fail_alloc:
 	return ret;
 }
 
-bool kvm_arch_has_vcpu_debugfs(void)
-{
-	return false;
-}
-
 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1f05aeb9da27..4afb1a234018 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2617,12 +2617,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 
 static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 {
+#ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
 	char dir_name[ITOA_MAX_LEN * 2];
 	int ret;
 
-	if (!kvm_arch_has_vcpu_debugfs())
-		return 0;
-
 	if (!debugfs_initialized())
 		return 0;
 
@@ -2637,6 +2635,7 @@ static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 		debugfs_remove_recursive(vcpu->debugfs_dentry);
 		return ret;
 	}
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 3e7093d045196b1016517631645e874fe903db7e Mon Sep 17 00:00:00 2001
From: Greg KH <gregkh@linuxfoundation.org>
Date: Wed, 31 Jul 2019 20:56:20 +0200
Subject: KVM: no need to check return value of debugfs_create functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Also, when doing this, change kvm_arch_create_vcpu_debugfs() to return
void instead of an integer, as we should not care at all about if this
function actually does anything or not.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Radim Krčmář" <rkrcmar@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <x86@kernel.org>
Cc: <kvm@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/debugfs.c   | 41 +++++++++++++----------------------------
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 21 +++++----------------
 3 files changed, 19 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 9bd93e0d5f63..018aebce33ff 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -43,37 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
 
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 {
-	struct dentry *ret;
+	debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu,
+			    &vcpu_tsc_offset_fops);
 
-	ret = debugfs_create_file("tsc-offset", 0444,
-							vcpu->debugfs_dentry,
-							vcpu, &vcpu_tsc_offset_fops);
-	if (!ret)
-		return -ENOMEM;
-
-	if (lapic_in_kernel(vcpu)) {
-		ret = debugfs_create_file("lapic_timer_advance_ns", 0444,
-								vcpu->debugfs_dentry,
-								vcpu, &vcpu_timer_advance_ns_fops);
-		if (!ret)
-			return -ENOMEM;
-	}
+	if (lapic_in_kernel(vcpu))
+		debugfs_create_file("lapic_timer_advance_ns", 0444,
+				    vcpu->debugfs_dentry, vcpu,
+				    &vcpu_timer_advance_ns_fops);
 
 	if (kvm_has_tsc_control) {
-		ret = debugfs_create_file("tsc-scaling-ratio", 0444,
-							vcpu->debugfs_dentry,
-							vcpu, &vcpu_tsc_scaling_fops);
-		if (!ret)
-			return -ENOMEM;
-		ret = debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
-							vcpu->debugfs_dentry,
-							vcpu, &vcpu_tsc_scaling_frac_fops);
-		if (!ret)
-			return -ENOMEM;
-
+		debugfs_create_file("tsc-scaling-ratio", 0444,
+				    vcpu->debugfs_dentry, vcpu,
+				    &vcpu_tsc_scaling_fops);
+		debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444,
+				    vcpu->debugfs_dentry, vcpu,
+				    &vcpu_tsc_scaling_frac_fops);
 	}
-
-	return 0;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8d34db3c8bc6..fcb46b3374c6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -862,7 +862,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
-int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu);
 #endif
 
 int kvm_arch_hardware_enable(void);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4afb1a234018..4feceaa03fb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2615,29 +2615,20 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 	return anon_inode_getfd(name, &kvm_vcpu_fops, vcpu, O_RDWR | O_CLOEXEC);
 }
 
-static int kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
+static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS
 	char dir_name[ITOA_MAX_LEN * 2];
-	int ret;
 
 	if (!debugfs_initialized())
-		return 0;
+		return;
 
 	snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id);
 	vcpu->debugfs_dentry = debugfs_create_dir(dir_name,
-								vcpu->kvm->debugfs_dentry);
-	if (!vcpu->debugfs_dentry)
-		return -ENOMEM;
+						  vcpu->kvm->debugfs_dentry);
 
-	ret = kvm_arch_create_vcpu_debugfs(vcpu);
-	if (ret < 0) {
-		debugfs_remove_recursive(vcpu->debugfs_dentry);
-		return ret;
-	}
+	kvm_arch_create_vcpu_debugfs(vcpu);
 #endif
-
-	return 0;
 }
 
 /*
@@ -2672,9 +2663,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	if (r)
 		goto vcpu_destroy;
 
-	r = kvm_create_vcpu_debugfs(vcpu);
-	if (r)
-		goto vcpu_destroy;
+	kvm_create_vcpu_debugfs(vcpu);
 
 	mutex_lock(&kvm->lock);
 	if (kvm_get_vcpu_by_id(kvm, id)) {
-- 
cgit v1.2.3


From 5eeaf10eec394b28fad2c58f1f5c3a5da0e87d1c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 2 Aug 2019 10:28:32 +0100
Subject: KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block

Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.

There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().

Things go really south if the guest does the following:

	mov x0, #0	// or any small value masking interrupts
	msr ICC_PMR_EL1, x0

	[vcpu preempted, then rescheduled, VMCR sampled]

	mov x0, #ff	// allow all interrupts
	msr ICC_PMR_EL1, x0
	wfi		// traps to EL2, so samping of VMCR

	[interrupt arrives just after WFI]

Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.

To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.

This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.

Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/kvm/arm_vgic.h      |  1 +
 virt/kvm/arm/arm.c          | 11 +++++++++++
 virt/kvm/arm/vgic/vgic-v2.c |  9 ++++++++-
 virt/kvm/arm/vgic/vgic-v3.c |  7 ++++++-
 virt/kvm/arm/vgic/vgic.c    | 11 +++++++++++
 virt/kvm/arm/vgic/vgic.h    |  2 ++
 6 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 46bbc949c20a..7a30524a80ee 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -350,6 +350,7 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
 
 void kvm_vgic_load(struct kvm_vcpu *vcpu);
 void kvm_vgic_put(struct kvm_vcpu *vcpu);
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)	((k)->arch.vgic.initialized)
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index c704fa696184..482b20256fa8 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -323,6 +323,17 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * If we're about to block (most likely because we've just hit a
+	 * WFI), we need to sync back the state of the GIC CPU interface
+	 * so that we have the lastest PMR and group enables. This ensures
+	 * that kvm_arch_vcpu_runnable has up-to-date data to decide
+	 * whether we have pending interrupts.
+	 */
+	preempt_disable();
+	kvm_vgic_vmcr_sync(vcpu);
+	preempt_enable();
+
 	kvm_vgic_v4_enable_doorbell(vcpu);
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 6dd5ad706c92..96aab77d0471 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -484,10 +484,17 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
 		       kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
 
-void vgic_v2_put(struct kvm_vcpu *vcpu)
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
 
 	cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
+}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+
+	vgic_v2_vmcr_sync(vcpu);
 	cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
 }
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index c2c9ce009f63..0c653a1e5215 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -662,12 +662,17 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
 		__vgic_v3_activate_traps(vcpu);
 }
 
-void vgic_v3_put(struct kvm_vcpu *vcpu)
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
 {
 	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 
 	if (likely(cpu_if->vgic_sre))
 		cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
+}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+	vgic_v3_vmcr_sync(vcpu);
 
 	kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
 
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 04786c8ec77e..13d4b38a94ec 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -919,6 +919,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
 		vgic_v3_put(vcpu);
 }
 
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
+{
+	if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+		return;
+
+	if (kvm_vgic_global_state.type == VGIC_V2)
+		vgic_v2_vmcr_sync(vcpu);
+	else
+		vgic_v3_vmcr_sync(vcpu);
+}
+
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 57205beaa981..11adbdac1d56 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -193,6 +193,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
 void vgic_v2_init_lrs(void);
 void vgic_v2_load(struct kvm_vcpu *vcpu);
 void vgic_v2_put(struct kvm_vcpu *vcpu);
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
 
 void vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -223,6 +224,7 @@ bool vgic_v3_check_base(struct kvm *kvm);
 
 void vgic_v3_load(struct kvm_vcpu *vcpu);
 void vgic_v3_put(struct kvm_vcpu *vcpu);
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
 
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
-- 
cgit v1.2.3


From 4ff1fef10f353b928bcc9d56d31fda53f2c43191 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:49:48 +0900
Subject: ASoC: add soc-component.c

ALSA SoC has many snd_soc_component_xxx(), but these are randomly
located in many files. Because of it, code is difficult to read.
This patch creates new soc-component.c, and moves existing
snd_soc_component_xxx() into it.
But not yet fully. We need more cleanup it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87imrp5roa.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 328 ++++++++++++++++++++++++++++++++++++++++++
 include/sound/soc.h           | 306 +--------------------------------------
 sound/soc/Makefile            |   2 +-
 sound/soc/soc-component.c     | 269 ++++++++++++++++++++++++++++++++++
 sound/soc/soc-core.c          |  44 ------
 sound/soc/soc-jack.c          |  18 ---
 sound/soc/soc-utils.c         | 199 -------------------------
 7 files changed, 599 insertions(+), 567 deletions(-)
 create mode 100644 include/sound/soc-component.h
 create mode 100644 sound/soc/soc-component.c

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
new file mode 100644
index 000000000000..a97d499e5d7a
--- /dev/null
+++ b/include/sound/soc-component.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * soc-component.h
+ *
+ * Copyright (c) 2019 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __SOC_COMPONENT_H
+#define __SOC_COMPONENT_H
+
+#include <sound/soc.h>
+
+/*
+ * Component probe and remove ordering levels for components with runtime
+ * dependencies.
+ */
+#define SND_SOC_COMP_ORDER_FIRST	-2
+#define SND_SOC_COMP_ORDER_EARLY	-1
+#define SND_SOC_COMP_ORDER_NORMAL	 0
+#define SND_SOC_COMP_ORDER_LATE		 1
+#define SND_SOC_COMP_ORDER_LAST		 2
+
+#define for_each_comp_order(order)		\
+	for (order  = SND_SOC_COMP_ORDER_FIRST;	\
+	     order <= SND_SOC_COMP_ORDER_LAST;	\
+	     order++)
+
+/* component interface */
+struct snd_soc_component_driver {
+	const char *name;
+
+	/* Default control and setup, added after probe() is run */
+	const struct snd_kcontrol_new *controls;
+	unsigned int num_controls;
+	const struct snd_soc_dapm_widget *dapm_widgets;
+	unsigned int num_dapm_widgets;
+	const struct snd_soc_dapm_route *dapm_routes;
+	unsigned int num_dapm_routes;
+
+	int (*probe)(struct snd_soc_component *component);
+	void (*remove)(struct snd_soc_component *component);
+	int (*suspend)(struct snd_soc_component *component);
+	int (*resume)(struct snd_soc_component *component);
+
+	unsigned int (*read)(struct snd_soc_component *component,
+			     unsigned int reg);
+	int (*write)(struct snd_soc_component *component,
+		     unsigned int reg, unsigned int val);
+
+	/* pcm creation and destruction */
+	int (*pcm_new)(struct snd_soc_pcm_runtime *rtd);
+	void (*pcm_free)(struct snd_pcm *pcm);
+
+	/* component wide operations */
+	int (*set_sysclk)(struct snd_soc_component *component,
+			  int clk_id, int source, unsigned int freq, int dir);
+	int (*set_pll)(struct snd_soc_component *component, int pll_id,
+		       int source, unsigned int freq_in, unsigned int freq_out);
+	int (*set_jack)(struct snd_soc_component *component,
+			struct snd_soc_jack *jack,  void *data);
+
+	/* DT */
+	int (*of_xlate_dai_name)(struct snd_soc_component *component,
+				 struct of_phandle_args *args,
+				 const char **dai_name);
+	int (*of_xlate_dai_id)(struct snd_soc_component *comment,
+			       struct device_node *endpoint);
+	void (*seq_notifier)(struct snd_soc_component *component,
+			     enum snd_soc_dapm_type type, int subseq);
+	int (*stream_event)(struct snd_soc_component *component, int event);
+	int (*set_bias_level)(struct snd_soc_component *component,
+			      enum snd_soc_bias_level level);
+
+	const struct snd_pcm_ops *ops;
+	const struct snd_compr_ops *compr_ops;
+
+	/* probe ordering - for components with runtime dependencies */
+	int probe_order;
+	int remove_order;
+
+	/*
+	 * signal if the module handling the component should not be removed
+	 * if a pcm is open. Setting this would prevent the module
+	 * refcount being incremented in probe() but allow it be incremented
+	 * when a pcm is opened and decremented when it is closed.
+	 */
+	unsigned int module_get_upon_open:1;
+
+	/* bits */
+	unsigned int idle_bias_on:1;
+	unsigned int suspend_bias_off:1;
+	unsigned int use_pmdown_time:1; /* care pmdown_time at stop */
+	unsigned int endianness:1;
+	unsigned int non_legacy_dai_naming:1;
+
+	/* this component uses topology and ignore machine driver FEs */
+	const char *ignore_machine;
+	const char *topology_name_prefix;
+	int (*be_hw_params_fixup)(struct snd_soc_pcm_runtime *rtd,
+				  struct snd_pcm_hw_params *params);
+	bool use_dai_pcm_id;	/* use DAI link PCM ID as PCM device number */
+	int be_pcm_base;	/* base device ID for all BE PCMs */
+};
+
+struct snd_soc_component {
+	const char *name;
+	int id;
+	const char *name_prefix;
+	struct device *dev;
+	struct snd_soc_card *card;
+
+	unsigned int active;
+
+	unsigned int suspended:1; /* is in suspend PM state */
+
+	struct list_head list;
+	struct list_head card_aux_list; /* for auxiliary bound components */
+	struct list_head card_list;
+
+	const struct snd_soc_component_driver *driver;
+
+	struct list_head dai_list;
+	int num_dai;
+
+	struct regmap *regmap;
+	int val_bytes;
+
+	struct mutex io_mutex;
+
+	/* attached dynamic objects */
+	struct list_head dobj_list;
+
+	/*
+	 * DO NOT use any of the fields below in drivers, they are temporary and
+	 * are going to be removed again soon. If you use them in driver code
+	 * the driver will be marked as BROKEN when these fields are removed.
+	 */
+
+	/* Don't use these, use snd_soc_component_get_dapm() */
+	struct snd_soc_dapm_context dapm;
+
+	/* machine specific init */
+	int (*init)(struct snd_soc_component *component);
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_root;
+	const char *debugfs_prefix;
+#endif
+};
+
+#define for_each_component_dais(component, dai)\
+	list_for_each_entry(dai, &(component)->dai_list, list)
+#define for_each_component_dais_safe(component, dai, _dai)\
+	list_for_each_entry_safe(dai, _dai, &(component)->dai_list, list)
+
+/**
+ * snd_soc_dapm_to_component() - Casts a DAPM context to the component it is
+ *  embedded in
+ * @dapm: The DAPM context to cast to the component
+ *
+ * This function must only be used on DAPM contexts that are known to be part of
+ * a component (e.g. in a component driver). Otherwise the behavior is
+ * undefined.
+ */
+static inline struct snd_soc_component *snd_soc_dapm_to_component(
+	struct snd_soc_dapm_context *dapm)
+{
+	return container_of(dapm, struct snd_soc_component, dapm);
+}
+
+/**
+ * snd_soc_component_get_dapm() - Returns the DAPM context associated with a
+ *  component
+ * @component: The component for which to get the DAPM context
+ */
+static inline struct snd_soc_dapm_context *snd_soc_component_get_dapm(
+	struct snd_soc_component *component)
+{
+	return &component->dapm;
+}
+
+/**
+ * snd_soc_component_init_bias_level() - Initialize COMPONENT DAPM bias level
+ * @component: The COMPONENT for which to initialize the DAPM bias level
+ * @level: The DAPM level to initialize to
+ *
+ * Initializes the COMPONENT DAPM bias level. See snd_soc_dapm_init_bias_level()
+ */
+static inline void
+snd_soc_component_init_bias_level(struct snd_soc_component *component,
+				  enum snd_soc_bias_level level)
+{
+	snd_soc_dapm_init_bias_level(
+		snd_soc_component_get_dapm(component), level);
+}
+
+/**
+ * snd_soc_component_get_bias_level() - Get current COMPONENT DAPM bias level
+ * @component: The COMPONENT for which to get the DAPM bias level
+ *
+ * Returns: The current DAPM bias level of the COMPONENT.
+ */
+static inline enum snd_soc_bias_level
+snd_soc_component_get_bias_level(struct snd_soc_component *component)
+{
+	return snd_soc_dapm_get_bias_level(
+		snd_soc_component_get_dapm(component));
+}
+
+/**
+ * snd_soc_component_force_bias_level() - Set the COMPONENT DAPM bias level
+ * @component: The COMPONENT for which to set the level
+ * @level: The level to set to
+ *
+ * Forces the COMPONENT bias level to a specific state. See
+ * snd_soc_dapm_force_bias_level().
+ */
+static inline int
+snd_soc_component_force_bias_level(struct snd_soc_component *component,
+				   enum snd_soc_bias_level level)
+{
+	return snd_soc_dapm_force_bias_level(
+		snd_soc_component_get_dapm(component),
+		level);
+}
+
+/**
+ * snd_soc_dapm_kcontrol_component() - Returns the component associated to a
+ * kcontrol
+ * @kcontrol: The kcontrol
+ *
+ * This function must only be used on DAPM contexts that are known to be part of
+ * a COMPONENT (e.g. in a COMPONENT driver). Otherwise the behavior is undefined
+ */
+static inline struct snd_soc_component *snd_soc_dapm_kcontrol_component(
+	struct snd_kcontrol *kcontrol)
+{
+	return snd_soc_dapm_to_component(snd_soc_dapm_kcontrol_dapm(kcontrol));
+}
+
+/**
+ * snd_soc_component_cache_sync() - Sync the register cache with the hardware
+ * @component: COMPONENT to sync
+ *
+ * Note: This function will call regcache_sync()
+ */
+static inline int snd_soc_component_cache_sync(
+	struct snd_soc_component *component)
+{
+	return regcache_sync(component->regmap);
+}
+
+/* component IO */
+int snd_soc_component_read(struct snd_soc_component *component,
+			   unsigned int reg, unsigned int *val);
+unsigned int snd_soc_component_read32(struct snd_soc_component *component,
+				      unsigned int reg);
+int snd_soc_component_write(struct snd_soc_component *component,
+			    unsigned int reg, unsigned int val);
+int snd_soc_component_update_bits(struct snd_soc_component *component,
+				  unsigned int reg, unsigned int mask,
+				  unsigned int val);
+int snd_soc_component_update_bits_async(struct snd_soc_component *component,
+					unsigned int reg, unsigned int mask,
+					unsigned int val);
+void snd_soc_component_async_complete(struct snd_soc_component *component);
+int snd_soc_component_test_bits(struct snd_soc_component *component,
+				unsigned int reg, unsigned int mask,
+				unsigned int value);
+
+/* component wide operations */
+int snd_soc_component_set_sysclk(struct snd_soc_component *component,
+				 int clk_id, int source,
+				 unsigned int freq, int dir);
+int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
+			      int source, unsigned int freq_in,
+			      unsigned int freq_out);
+int snd_soc_component_set_jack(struct snd_soc_component *component,
+			       struct snd_soc_jack *jack, void *data);
+
+#ifdef CONFIG_REGMAP
+void snd_soc_component_init_regmap(struct snd_soc_component *component,
+				   struct regmap *regmap);
+void snd_soc_component_exit_regmap(struct snd_soc_component *component);
+#endif
+
+static inline void snd_soc_component_set_drvdata(struct snd_soc_component *c,
+						 void *data)
+{
+	dev_set_drvdata(c->dev, data);
+}
+
+static inline void *snd_soc_component_get_drvdata(struct snd_soc_component *c)
+{
+	return dev_get_drvdata(c->dev);
+}
+
+static inline bool snd_soc_component_is_active(
+	struct snd_soc_component *component)
+{
+	return component->active != 0;
+}
+
+/* component pin */
+int snd_soc_component_enable_pin(struct snd_soc_component *component,
+				 const char *pin);
+int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component,
+					  const char *pin);
+int snd_soc_component_disable_pin(struct snd_soc_component *component,
+				  const char *pin);
+int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component,
+					   const char *pin);
+int snd_soc_component_nc_pin(struct snd_soc_component *component,
+			     const char *pin);
+int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component,
+				      const char *pin);
+int snd_soc_component_get_pin_status(struct snd_soc_component *component,
+				     const char *pin);
+int snd_soc_component_force_enable_pin(struct snd_soc_component *component,
+				       const char *pin);
+int snd_soc_component_force_enable_pin_unlocked(
+	struct snd_soc_component *component,
+	const char *pin);
+
+#endif /* __SOC_COMPONENT_H */
diff --git a/include/sound/soc.h b/include/sound/soc.h
index f0797792dd8d..6ac6481b4882 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -362,21 +362,6 @@
 #define SOC_ENUM_SINGLE_VIRT_DECL(name, xtexts) \
 	const struct soc_enum name = SOC_ENUM_SINGLE_VIRT(ARRAY_SIZE(xtexts), xtexts)
 
-/*
- * Component probe and remove ordering levels for components with runtime
- * dependencies.
- */
-#define SND_SOC_COMP_ORDER_FIRST		-2
-#define SND_SOC_COMP_ORDER_EARLY		-1
-#define SND_SOC_COMP_ORDER_NORMAL		0
-#define SND_SOC_COMP_ORDER_LATE		1
-#define SND_SOC_COMP_ORDER_LAST		2
-
-#define for_each_comp_order(order)		\
-	for (order  = SND_SOC_COMP_ORDER_FIRST;	\
-	     order <= SND_SOC_COMP_ORDER_LAST;	\
-	     order++)
-
 /*
  * Bias levels
  *
@@ -747,132 +732,6 @@ struct snd_soc_compr_ops {
 	int (*trigger)(struct snd_compr_stream *);
 };
 
-/* component interface */
-struct snd_soc_component_driver {
-	const char *name;
-
-	/* Default control and setup, added after probe() is run */
-	const struct snd_kcontrol_new *controls;
-	unsigned int num_controls;
-	const struct snd_soc_dapm_widget *dapm_widgets;
-	unsigned int num_dapm_widgets;
-	const struct snd_soc_dapm_route *dapm_routes;
-	unsigned int num_dapm_routes;
-
-	int (*probe)(struct snd_soc_component *);
-	void (*remove)(struct snd_soc_component *);
-	int (*suspend)(struct snd_soc_component *);
-	int (*resume)(struct snd_soc_component *);
-
-	unsigned int (*read)(struct snd_soc_component *, unsigned int);
-	int (*write)(struct snd_soc_component *, unsigned int, unsigned int);
-
-	/* pcm creation and destruction */
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
-
-	/* component wide operations */
-	int (*set_sysclk)(struct snd_soc_component *component,
-			  int clk_id, int source, unsigned int freq, int dir);
-	int (*set_pll)(struct snd_soc_component *component, int pll_id,
-		       int source, unsigned int freq_in, unsigned int freq_out);
-	int (*set_jack)(struct snd_soc_component *component,
-			struct snd_soc_jack *jack,  void *data);
-
-	/* DT */
-	int (*of_xlate_dai_name)(struct snd_soc_component *component,
-				 struct of_phandle_args *args,
-				 const char **dai_name);
-	int (*of_xlate_dai_id)(struct snd_soc_component *comment,
-			       struct device_node *endpoint);
-	void (*seq_notifier)(struct snd_soc_component *, enum snd_soc_dapm_type,
-		int subseq);
-	int (*stream_event)(struct snd_soc_component *, int event);
-	int (*set_bias_level)(struct snd_soc_component *component,
-			      enum snd_soc_bias_level level);
-
-	const struct snd_pcm_ops *ops;
-	const struct snd_compr_ops *compr_ops;
-
-	/* probe ordering - for components with runtime dependencies */
-	int probe_order;
-	int remove_order;
-
-	/*
-	 * signal if the module handling the component should not be removed
-	 * if a pcm is open. Setting this would prevent the module
-	 * refcount being incremented in probe() but allow it be incremented
-	 * when a pcm is opened and decremented when it is closed.
-	 */
-	unsigned int module_get_upon_open:1;
-
-	/* bits */
-	unsigned int idle_bias_on:1;
-	unsigned int suspend_bias_off:1;
-	unsigned int use_pmdown_time:1; /* care pmdown_time at stop */
-	unsigned int endianness:1;
-	unsigned int non_legacy_dai_naming:1;
-
-	/* this component uses topology and ignore machine driver FEs */
-	const char *ignore_machine;
-	const char *topology_name_prefix;
-	int (*be_hw_params_fixup)(struct snd_soc_pcm_runtime *rtd,
-				  struct snd_pcm_hw_params *params);
-	bool use_dai_pcm_id;	/* use the DAI link PCM ID as PCM device number */
-	int be_pcm_base;	/* base device ID for all BE PCMs */
-};
-
-struct snd_soc_component {
-	const char *name;
-	int id;
-	const char *name_prefix;
-	struct device *dev;
-	struct snd_soc_card *card;
-
-	unsigned int active;
-
-	unsigned int suspended:1; /* is in suspend PM state */
-
-	struct list_head list;
-	struct list_head card_aux_list; /* for auxiliary bound components */
-	struct list_head card_list;
-
-	const struct snd_soc_component_driver *driver;
-
-	struct list_head dai_list;
-	int num_dai;
-
-	struct regmap *regmap;
-	int val_bytes;
-
-	struct mutex io_mutex;
-
-	/* attached dynamic objects */
-	struct list_head dobj_list;
-
-	/*
-	* DO NOT use any of the fields below in drivers, they are temporary and
-	* are going to be removed again soon. If you use them in driver code the
-	* driver will be marked as BROKEN when these fields are removed.
-	*/
-
-	/* Don't use these, use snd_soc_component_get_dapm() */
-	struct snd_soc_dapm_context dapm;
-
-	/* machine specific init */
-	int (*init)(struct snd_soc_component *component);
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_root;
-	const char *debugfs_prefix;
-#endif
-};
-
-#define for_each_component_dais(component, dai)\
-	list_for_each_entry(dai, &(component)->dai_list, list)
-#define for_each_component_dais_safe(component, dai, _dai)\
-	list_for_each_entry_safe(dai, _dai, &(component)->dai_list, list)
-
 struct snd_soc_rtdcom_list {
 	struct snd_soc_component *component;
 	struct list_head list; /* rtd::component_list */
@@ -1337,134 +1196,6 @@ struct soc_enum {
 	struct snd_soc_dobj dobj;
 };
 
-/**
- * snd_soc_dapm_to_component() - Casts a DAPM context to the component it is
- *  embedded in
- * @dapm: The DAPM context to cast to the component
- *
- * This function must only be used on DAPM contexts that are known to be part of
- * a component (e.g. in a component driver). Otherwise the behavior is
- * undefined.
- */
-static inline struct snd_soc_component *snd_soc_dapm_to_component(
-	struct snd_soc_dapm_context *dapm)
-{
-	return container_of(dapm, struct snd_soc_component, dapm);
-}
-
-/**
- * snd_soc_component_get_dapm() - Returns the DAPM context associated with a
- *  component
- * @component: The component for which to get the DAPM context
- */
-static inline struct snd_soc_dapm_context *snd_soc_component_get_dapm(
-	struct snd_soc_component *component)
-{
-	return &component->dapm;
-}
-
-/**
- * snd_soc_component_init_bias_level() - Initialize COMPONENT DAPM bias level
- * @component: The COMPONENT for which to initialize the DAPM bias level
- * @level: The DAPM level to initialize to
- *
- * Initializes the COMPONENT DAPM bias level. See snd_soc_dapm_init_bias_level().
- */
-static inline void
-snd_soc_component_init_bias_level(struct snd_soc_component *component,
-				  enum snd_soc_bias_level level)
-{
-	snd_soc_dapm_init_bias_level(
-		snd_soc_component_get_dapm(component), level);
-}
-
-/**
- * snd_soc_component_get_bias_level() - Get current COMPONENT DAPM bias level
- * @component: The COMPONENT for which to get the DAPM bias level
- *
- * Returns: The current DAPM bias level of the COMPONENT.
- */
-static inline enum snd_soc_bias_level
-snd_soc_component_get_bias_level(struct snd_soc_component *component)
-{
-	return snd_soc_dapm_get_bias_level(
-		snd_soc_component_get_dapm(component));
-}
-
-/**
- * snd_soc_component_force_bias_level() - Set the COMPONENT DAPM bias level
- * @component: The COMPONENT for which to set the level
- * @level: The level to set to
- *
- * Forces the COMPONENT bias level to a specific state. See
- * snd_soc_dapm_force_bias_level().
- */
-static inline int
-snd_soc_component_force_bias_level(struct snd_soc_component *component,
-				   enum snd_soc_bias_level level)
-{
-	return snd_soc_dapm_force_bias_level(
-		snd_soc_component_get_dapm(component),
-		level);
-}
-
-/**
- * snd_soc_dapm_kcontrol_component() - Returns the component associated to a kcontrol
- * @kcontrol: The kcontrol
- *
- * This function must only be used on DAPM contexts that are known to be part of
- * a COMPONENT (e.g. in a COMPONENT driver). Otherwise the behavior is undefined.
- */
-static inline struct snd_soc_component *snd_soc_dapm_kcontrol_component(
-	struct snd_kcontrol *kcontrol)
-{
-	return snd_soc_dapm_to_component(snd_soc_dapm_kcontrol_dapm(kcontrol));
-}
-
-/**
- * snd_soc_component_cache_sync() - Sync the register cache with the hardware
- * @component: COMPONENT to sync
- *
- * Note: This function will call regcache_sync()
- */
-static inline int snd_soc_component_cache_sync(
-	struct snd_soc_component *component)
-{
-	return regcache_sync(component->regmap);
-}
-
-/* component IO */
-int snd_soc_component_read(struct snd_soc_component *component,
-	unsigned int reg, unsigned int *val);
-unsigned int snd_soc_component_read32(struct snd_soc_component *component,
-				      unsigned int reg);
-int snd_soc_component_write(struct snd_soc_component *component,
-	unsigned int reg, unsigned int val);
-int snd_soc_component_update_bits(struct snd_soc_component *component,
-	unsigned int reg, unsigned int mask, unsigned int val);
-int snd_soc_component_update_bits_async(struct snd_soc_component *component,
-	unsigned int reg, unsigned int mask, unsigned int val);
-void snd_soc_component_async_complete(struct snd_soc_component *component);
-int snd_soc_component_test_bits(struct snd_soc_component *component,
-	unsigned int reg, unsigned int mask, unsigned int value);
-
-/* component wide operations */
-int snd_soc_component_set_sysclk(struct snd_soc_component *component,
-			int clk_id, int source, unsigned int freq, int dir);
-int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
-			      int source, unsigned int freq_in,
-			      unsigned int freq_out);
-int snd_soc_component_set_jack(struct snd_soc_component *component,
-			       struct snd_soc_jack *jack, void *data);
-
-#ifdef CONFIG_REGMAP
-
-void snd_soc_component_init_regmap(struct snd_soc_component *component,
-	struct regmap *regmap);
-void snd_soc_component_exit_regmap(struct snd_soc_component *component);
-
-#endif
-
 /* device driver data */
 
 static inline void snd_soc_card_set_drvdata(struct snd_soc_card *card,
@@ -1478,17 +1209,6 @@ static inline void *snd_soc_card_get_drvdata(struct snd_soc_card *card)
 	return card->drvdata;
 }
 
-static inline void snd_soc_component_set_drvdata(struct snd_soc_component *c,
-		void *data)
-{
-	dev_set_drvdata(c->dev, data);
-}
-
-static inline void *snd_soc_component_get_drvdata(struct snd_soc_component *c)
-{
-	return dev_get_drvdata(c->dev);
-}
-
 static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
 {
 	INIT_LIST_HEAD(&card->widgets);
@@ -1535,12 +1255,6 @@ static inline unsigned int snd_soc_enum_item_to_val(struct soc_enum *e,
 	return e->values[item];
 }
 
-static inline bool snd_soc_component_is_active(
-	struct snd_soc_component *component)
-{
-	return component->active != 0;
-}
-
 /**
  * snd_soc_kcontrol_component() - Returns the component that registered the
  *  control
@@ -1676,24 +1390,6 @@ static inline void snd_soc_dapm_mutex_unlock(struct snd_soc_dapm_context *dapm)
 	mutex_unlock(&dapm->card->dapm_mutex);
 }
 
-int snd_soc_component_enable_pin(struct snd_soc_component *component,
-				 const char *pin);
-int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component,
-					  const char *pin);
-int snd_soc_component_disable_pin(struct snd_soc_component *component,
-				  const char *pin);
-int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component,
-					   const char *pin);
-int snd_soc_component_nc_pin(struct snd_soc_component *component,
-			     const char *pin);
-int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component,
-				      const char *pin);
-int snd_soc_component_get_pin_status(struct snd_soc_component *component,
-				     const char *pin);
-int snd_soc_component_force_enable_pin(struct snd_soc_component *component,
-				       const char *pin);
-int snd_soc_component_force_enable_pin_unlocked(
-					struct snd_soc_component *component,
-					const char *pin);
+#include <sound/soc-component.h>
 
 #endif
diff --git a/sound/soc/Makefile b/sound/soc/Makefile
index 919c3c027c62..250a0dea9294 100644
--- a/sound/soc/Makefile
+++ b/sound/soc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-utils.o soc-dai.o
+snd-soc-core-objs := soc-core.o soc-dapm.o soc-jack.o soc-utils.o soc-dai.o soc-component.o
 snd-soc-core-objs += soc-pcm.o soc-io.o soc-devres.o soc-ops.o
 snd-soc-core-$(CONFIG_SND_SOC_COMPRESS) += soc-compress.o
 
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
new file mode 100644
index 000000000000..e19f78bfb919
--- /dev/null
+++ b/sound/soc/soc-component.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// soc-component.c
+//
+// Copyright (C) 2019 Renesas Electronics Corp.
+// Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+//
+#include <sound/soc.h>
+
+/**
+ * snd_soc_component_set_sysclk - configure COMPONENT system or master clock.
+ * @component: COMPONENT
+ * @clk_id: DAI specific clock ID
+ * @source: Source for the clock
+ * @freq: new clock frequency in Hz
+ * @dir: new clock direction - input/output.
+ *
+ * Configures the CODEC master (MCLK) or system (SYSCLK) clocking.
+ */
+int snd_soc_component_set_sysclk(struct snd_soc_component *component,
+				 int clk_id, int source, unsigned int freq,
+				 int dir)
+{
+	if (component->driver->set_sysclk)
+		return component->driver->set_sysclk(component, clk_id, source,
+						     freq, dir);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_set_sysclk);
+
+/*
+ * snd_soc_component_set_pll - configure component PLL.
+ * @component: COMPONENT
+ * @pll_id: DAI specific PLL ID
+ * @source: DAI specific source for the PLL
+ * @freq_in: PLL input clock frequency in Hz
+ * @freq_out: requested PLL output clock frequency in Hz
+ *
+ * Configures and enables PLL to generate output clock based on input clock.
+ */
+int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
+			      int source, unsigned int freq_in,
+			      unsigned int freq_out)
+{
+	if (component->driver->set_pll)
+		return component->driver->set_pll(component, pll_id, source,
+						  freq_in, freq_out);
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_set_pll);
+
+int snd_soc_component_enable_pin(struct snd_soc_component *component,
+				 const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_enable_pin(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_enable_pin(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin);
+
+int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component,
+					  const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_enable_pin_unlocked(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_enable_pin_unlocked(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked);
+
+int snd_soc_component_disable_pin(struct snd_soc_component *component,
+				  const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_disable_pin(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_disable_pin(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin);
+
+int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component,
+					   const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_disable_pin_unlocked(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_disable_pin_unlocked(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked);
+
+int snd_soc_component_nc_pin(struct snd_soc_component *component,
+			     const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_nc_pin(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_nc_pin(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin);
+
+int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component,
+				      const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_nc_pin_unlocked(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_nc_pin_unlocked(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked);
+
+int snd_soc_component_get_pin_status(struct snd_soc_component *component,
+				     const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_get_pin_status(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_get_pin_status(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status);
+
+int snd_soc_component_force_enable_pin(struct snd_soc_component *component,
+				       const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_force_enable_pin(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_force_enable_pin(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin);
+
+int snd_soc_component_force_enable_pin_unlocked(
+	struct snd_soc_component *component,
+	const char *pin)
+{
+	struct snd_soc_dapm_context *dapm =
+		snd_soc_component_get_dapm(component);
+	char *full_name;
+	int ret;
+
+	if (!component->name_prefix)
+		return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin);
+
+	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
+	if (!full_name)
+		return -ENOMEM;
+
+	ret = snd_soc_dapm_force_enable_pin_unlocked(dapm, full_name);
+	kfree(full_name);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked);
+
+/**
+ * snd_soc_component_set_jack - configure component jack.
+ * @component: COMPONENTs
+ * @jack: structure to use for the jack
+ * @data: can be used if codec driver need extra data for configuring jack
+ *
+ * Configures and enables jack detection function.
+ */
+int snd_soc_component_set_jack(struct snd_soc_component *component,
+			       struct snd_soc_jack *jack, void *data)
+{
+	if (component->driver->set_jack)
+		return component->driver->set_jack(component, jack, data);
+
+	return -ENOTSUPP;
+}
+EXPORT_SYMBOL_GPL(snd_soc_component_set_jack);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ed66d2c68d10..dc3e45547da2 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2345,50 +2345,6 @@ int snd_soc_add_dai_controls(struct snd_soc_dai *dai,
 }
 EXPORT_SYMBOL_GPL(snd_soc_add_dai_controls);
 
-/**
- * snd_soc_component_set_sysclk - configure COMPONENT system or master clock.
- * @component: COMPONENT
- * @clk_id: DAI specific clock ID
- * @source: Source for the clock
- * @freq: new clock frequency in Hz
- * @dir: new clock direction - input/output.
- *
- * Configures the CODEC master (MCLK) or system (SYSCLK) clocking.
- */
-int snd_soc_component_set_sysclk(struct snd_soc_component *component,
-				 int clk_id, int source, unsigned int freq,
-				 int dir)
-{
-	if (component->driver->set_sysclk)
-		return component->driver->set_sysclk(component, clk_id, source,
-						 freq, dir);
-
-	return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_set_sysclk);
-
-/*
- * snd_soc_component_set_pll - configure component PLL.
- * @component: COMPONENT
- * @pll_id: DAI specific PLL ID
- * @source: DAI specific source for the PLL
- * @freq_in: PLL input clock frequency in Hz
- * @freq_out: requested PLL output clock frequency in Hz
- *
- * Configures and enables PLL to generate output clock based on input clock.
- */
-int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
-			      int source, unsigned int freq_in,
-			      unsigned int freq_out)
-{
-	if (component->driver->set_pll)
-		return component->driver->set_pll(component, pll_id, source,
-						  freq_in, freq_out);
-
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_set_pll);
-
 static int snd_soc_bind_card(struct snd_soc_card *card)
 {
 	struct snd_soc_pcm_runtime *rtd;
diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c
index c7b990abdbaa..a71d2340eb05 100644
--- a/sound/soc/soc-jack.c
+++ b/sound/soc/soc-jack.c
@@ -23,24 +23,6 @@ struct jack_gpio_tbl {
 	struct snd_soc_jack_gpio *gpios;
 };
 
-/**
- * snd_soc_component_set_jack - configure component jack.
- * @component: COMPONENTs
- * @jack: structure to use for the jack
- * @data: can be used if codec driver need extra data for configuring jack
- *
- * Configures and enables jack detection function.
- */
-int snd_soc_component_set_jack(struct snd_soc_component *component,
-			       struct snd_soc_jack *jack, void *data)
-{
-	if (component->driver->set_jack)
-		return component->driver->set_jack(component, jack, data);
-
-	return -ENOTSUPP;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_set_jack);
-
 /**
  * snd_soc_card_jack_new - Create a new jack
  * @card:  ASoC card
diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c
index e3b9dd634c6d..54dcece52b0c 100644
--- a/sound/soc/soc-utils.c
+++ b/sound/soc/soc-utils.c
@@ -52,205 +52,6 @@ int snd_soc_params_to_bclk(struct snd_pcm_hw_params *params)
 }
 EXPORT_SYMBOL_GPL(snd_soc_params_to_bclk);
 
-int snd_soc_component_enable_pin(struct snd_soc_component *component,
-				 const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_enable_pin(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_enable_pin(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin);
-
-int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component,
-					  const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_enable_pin_unlocked(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_enable_pin_unlocked(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked);
-
-int snd_soc_component_disable_pin(struct snd_soc_component *component,
-				  const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_disable_pin(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_disable_pin(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin);
-
-int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component,
-					   const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_disable_pin_unlocked(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_disable_pin_unlocked(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked);
-
-int snd_soc_component_nc_pin(struct snd_soc_component *component,
-			     const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_nc_pin(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_nc_pin(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin);
-
-int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component,
-				      const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_nc_pin_unlocked(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_nc_pin_unlocked(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked);
-
-int snd_soc_component_get_pin_status(struct snd_soc_component *component,
-				     const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_get_pin_status(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_get_pin_status(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status);
-
-int snd_soc_component_force_enable_pin(struct snd_soc_component *component,
-				       const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_force_enable_pin(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_force_enable_pin(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin);
-
-int snd_soc_component_force_enable_pin_unlocked(
-					struct snd_soc_component *component,
-					const char *pin)
-{
-	struct snd_soc_dapm_context *dapm =
-		snd_soc_component_get_dapm(component);
-	char *full_name;
-	int ret;
-
-	if (!component->name_prefix)
-		return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin);
-
-	full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin);
-	if (!full_name)
-		return -ENOMEM;
-
-	ret = snd_soc_dapm_force_enable_pin_unlocked(dapm, full_name);
-	kfree(full_name);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked);
-
 static const struct snd_pcm_hardware dummy_dma_hardware = {
 	/* Random values to keep userspace happy when checking constraints */
 	.info			= SNDRV_PCM_INFO_INTERLEAVED |
-- 
cgit v1.2.3


From 4a81e8f30d0b422b7f10562952124d719f73b071 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:49:54 +0900
Subject: ASoC: soc-component: add snd_soc_component_get/put()

ALSA SoC is calling try_module_get()/module_put() based on
component->driver->module_get_upon_open.
To keep simple and readable code, we should create its function.
This patch adds new snd_soc_component_get/put().

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87h8795ro4.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 13 +++++++++++++
 sound/soc/soc-component.c     | 18 ++++++++++++++++++
 sound/soc/soc-core.c          |  9 ++++-----
 sound/soc/soc-pcm.c           | 10 ++++------
 4 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index a97d499e5d7a..a76cadf49a16 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -287,6 +287,19 @@ void snd_soc_component_init_regmap(struct snd_soc_component *component,
 void snd_soc_component_exit_regmap(struct snd_soc_component *component);
 #endif
 
+#define snd_soc_component_module_get_when_probe(component)\
+	snd_soc_component_module_get(component, 0)
+#define snd_soc_component_module_get_when_open(component)	\
+	snd_soc_component_module_get(component, 1)
+int snd_soc_component_module_get(struct snd_soc_component *component,
+				 int upon_open);
+#define snd_soc_component_module_put_when_remove(component)	\
+	snd_soc_component_module_put(component, 0)
+#define snd_soc_component_module_put_when_close(component)	\
+	snd_soc_component_module_put(component, 1)
+void snd_soc_component_module_put(struct snd_soc_component *component,
+				  int upon_open);
+
 static inline void snd_soc_component_set_drvdata(struct snd_soc_component *c,
 						 void *data)
 {
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index e19f78bfb919..ac2d7bd5d844 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -5,6 +5,7 @@
 // Copyright (C) 2019 Renesas Electronics Corp.
 // Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 //
+#include <linux/module.h>
 #include <sound/soc.h>
 
 /**
@@ -267,3 +268,20 @@ int snd_soc_component_set_jack(struct snd_soc_component *component,
 	return -ENOTSUPP;
 }
 EXPORT_SYMBOL_GPL(snd_soc_component_set_jack);
+
+int snd_soc_component_module_get(struct snd_soc_component *component,
+				 int upon_open)
+{
+	if (component->driver->module_get_upon_open == !!upon_open &&
+	    !try_module_get(component->dev->driver->owner))
+		return -ENODEV;
+
+	return 0;
+}
+
+void snd_soc_component_module_put(struct snd_soc_component *component,
+				  int upon_open)
+{
+	if (component->driver->module_get_upon_open == !!upon_open)
+		module_put(component->dev->driver->owner);
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index dc3e45547da2..9ba19efcc56c 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -926,8 +926,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 	soc_cleanup_component_debugfs(component);
 	component->card = NULL;
-	if (!component->driver->module_get_upon_open)
-		module_put(component->dev->driver->owner);
+	snd_soc_component_module_put_when_close(component);
 }
 
 static void soc_remove_component(struct snd_soc_component *component)
@@ -1255,9 +1254,9 @@ static int soc_probe_component(struct snd_soc_card *card,
 		return 0;
 	}
 
-	if (!component->driver->module_get_upon_open &&
-	    !try_module_get(component->dev->driver->owner))
-		return -ENODEV;
+	ret = snd_soc_component_module_get_when_probe(component);
+	if (ret < 0)
+		return ret;
 
 	component->card = card;
 	dapm->card = card;
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 1e6c4e226933..5fef18507286 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -15,7 +15,6 @@
 #include <linux/delay.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/export.h>
@@ -440,12 +439,12 @@ static int soc_pcm_components_open(struct snd_pcm_substream *substream,
 		component = rtdcom->component;
 		*last = component;
 
-		if (component->driver->module_get_upon_open &&
-		    !try_module_get(component->dev->driver->owner)) {
+		ret = snd_soc_component_module_get_when_open(component);
+		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: can't get module %s\n",
 				component->name);
-			return -ENODEV;
+			return ret;
 		}
 
 		if (!component->driver->ops ||
@@ -481,8 +480,7 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream,
 		    component->driver->ops->close)
 			component->driver->ops->close(substream);
 
-		if (component->driver->module_get_upon_open)
-			module_put(component->dev->driver->owner);
+		snd_soc_component_module_put_when_close(component);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From ae2f4849286eed48a3aa79a7b73bb5bcd0c9213b Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:01 +0900
Subject: ASoC: soc-component: add snd_soc_component_open()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_open() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ftmt5rnx.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  4 ++++
 sound/soc/soc-component.c     | 10 ++++++++++
 sound/soc/soc-pcm.c           |  6 +-----
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index a76cadf49a16..156b1a5b6ddd 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -338,4 +338,8 @@ int snd_soc_component_force_enable_pin_unlocked(
 	struct snd_soc_component *component,
 	const char *pin);
 
+/* component driver ops */
+int snd_soc_component_open(struct snd_soc_component *component,
+			   struct snd_pcm_substream *substream);
+
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index ac2d7bd5d844..ada46f9729b2 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -285,3 +285,13 @@ void snd_soc_component_module_put(struct snd_soc_component *component,
 	if (component->driver->module_get_upon_open == !!upon_open)
 		module_put(component->dev->driver->owner);
 }
+
+int snd_soc_component_open(struct snd_soc_component *component,
+			   struct snd_pcm_substream *substream)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->open)
+		return component->driver->ops->open(substream);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 5fef18507286..caf7028cee62 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -447,11 +447,7 @@ static int soc_pcm_components_open(struct snd_pcm_substream *substream,
 			return ret;
 		}
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->open)
-			continue;
-
-		ret = component->driver->ops->open(substream);
+		ret = snd_soc_component_open(component, substream);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: can't open component %s: %d\n",
-- 
cgit v1.2.3


From 3672beb8cad6beb7b8c017514aef0f4f507debcf Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:07 +0900
Subject: ASoC: soc-component: add snd_soc_component_close()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_close() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ef2d5rnr.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 10 ++++++++++
 sound/soc/soc-pcm.c           |  8 +++-----
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 156b1a5b6ddd..8dacac953884 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -341,5 +341,7 @@ int snd_soc_component_force_enable_pin_unlocked(
 /* component driver ops */
 int snd_soc_component_open(struct snd_soc_component *component,
 			   struct snd_pcm_substream *substream);
+int snd_soc_component_close(struct snd_soc_component *component,
+			    struct snd_pcm_substream *substream);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index ada46f9729b2..cee66183470d 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -295,3 +295,13 @@ int snd_soc_component_open(struct snd_soc_component *component,
 
 	return 0;
 }
+
+int snd_soc_component_close(struct snd_soc_component *component,
+			    struct snd_pcm_substream *substream)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->close)
+		return component->driver->ops->close(substream);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index caf7028cee62..4848642ef159 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -465,6 +465,7 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_component *component;
+	int ret = 0;
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -472,14 +473,11 @@ static int soc_pcm_components_close(struct snd_pcm_substream *substream,
 		if (component == last)
 			break;
 
-		if (component->driver->ops &&
-		    component->driver->ops->close)
-			component->driver->ops->close(substream);
-
+		ret |= snd_soc_component_close(component, substream);
 		snd_soc_component_module_put_when_close(component);
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 6d53723380ed73a2ced648d6e92774b39e5af1bd Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:13 +0900
Subject: ASoC: soc-component: add snd_soc_component_prepare()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_prepare() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87d0hx5rnm.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 10 ++++++++++
 sound/soc/soc-pcm.c           |  6 +-----
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 8dacac953884..ff8233014444 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -343,5 +343,7 @@ int snd_soc_component_open(struct snd_soc_component *component,
 			   struct snd_pcm_substream *substream);
 int snd_soc_component_close(struct snd_soc_component *component,
 			    struct snd_pcm_substream *substream);
+int snd_soc_component_prepare(struct snd_soc_component *component,
+			      struct snd_pcm_substream *substream);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index cee66183470d..733d7139d875 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -305,3 +305,13 @@ int snd_soc_component_close(struct snd_soc_component *component,
 
 	return 0;
 }
+
+int snd_soc_component_prepare(struct snd_soc_component *component,
+			      struct snd_pcm_substream *substream)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->prepare)
+		return component->driver->ops->prepare(substream);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 4848642ef159..c7fee67bc0dc 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -785,11 +785,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->prepare)
-			continue;
-
-		ret = component->driver->ops->prepare(substream);
+		ret = snd_soc_component_prepare(component, substream);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: platform prepare error: %d\n", ret);
-- 
cgit v1.2.3


From 245c539a1206d74e8508a07550fb7c99d0817b8c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:19 +0900
Subject: ASoC: soc-component: add snd_soc_component_hw_params()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_hw_params() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87blxh5rnf.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  3 +++
 sound/soc/soc-component.c     | 11 +++++++++++
 sound/soc/soc-pcm.c           |  6 +-----
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index ff8233014444..778a6e7d352d 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -345,5 +345,8 @@ int snd_soc_component_close(struct snd_soc_component *component,
 			    struct snd_pcm_substream *substream);
 int snd_soc_component_prepare(struct snd_soc_component *component,
 			      struct snd_pcm_substream *substream);
+int snd_soc_component_hw_params(struct snd_soc_component *component,
+				struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 733d7139d875..7b6456370da5 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -315,3 +315,14 @@ int snd_soc_component_prepare(struct snd_soc_component *component,
 
 	return 0;
 }
+
+int snd_soc_component_hw_params(struct snd_soc_component *component,
+				struct snd_pcm_substream *substream,
+				struct snd_pcm_hw_params *params)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->hw_params)
+		return component->driver->ops->hw_params(substream, params);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index c7fee67bc0dc..8be1d22dc87a 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -951,11 +951,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->hw_params)
-			continue;
-
-		ret = component->driver->ops->hw_params(substream, params);
+		ret = snd_soc_component_hw_params(component, substream, params);
 		if (ret < 0) {
 			dev_err(component->dev,
 				"ASoC: %s hw params failed: %d\n",
-- 
cgit v1.2.3


From eae7136aa2083699c69de5890fd6c32c501952b5 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:24 +0900
Subject: ASoC: soc-component: add snd_soc_component_hw_free()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_hw_free() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87a7d15rna.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 10 ++++++++++
 sound/soc/soc-pcm.c           |  9 +++------
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 778a6e7d352d..fbcd911ac25e 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -348,5 +348,7 @@ int snd_soc_component_prepare(struct snd_soc_component *component,
 int snd_soc_component_hw_params(struct snd_soc_component *component,
 				struct snd_pcm_substream *substream,
 				struct snd_pcm_hw_params *params);
+int snd_soc_component_hw_free(struct snd_soc_component *component,
+			      struct snd_pcm_substream *substream);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 7b6456370da5..e2bc34efe547 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -326,3 +326,13 @@ int snd_soc_component_hw_params(struct snd_soc_component *component,
 
 	return 0;
 }
+
+int snd_soc_component_hw_free(struct snd_soc_component *component,
+			       struct snd_pcm_substream *substream)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->hw_free)
+		return component->driver->ops->hw_free(substream);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 8be1d22dc87a..8c5289904f20 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -847,6 +847,7 @@ static int soc_pcm_components_hw_free(struct snd_pcm_substream *substream,
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_component *component;
+	int ret = 0;
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -854,14 +855,10 @@ static int soc_pcm_components_hw_free(struct snd_pcm_substream *substream,
 		if (component == last)
 			break;
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->hw_free)
-			continue;
-
-		component->driver->ops->hw_free(substream);
+		ret |= snd_soc_component_hw_free(component, substream);
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3


From 5693d50c830272cb3c4a04d2ce4db502debd1259 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:29 +0900
Subject: ASoC: soc-component: add snd_soc_component_trigger()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_trigger() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/878ssl5rn5.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  3 +++
 sound/soc/soc-component.c     | 11 +++++++++++
 sound/soc/soc-pcm.c           |  6 +-----
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index fbcd911ac25e..302e27a89d47 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -350,5 +350,8 @@ int snd_soc_component_hw_params(struct snd_soc_component *component,
 				struct snd_pcm_hw_params *params);
 int snd_soc_component_hw_free(struct snd_soc_component *component,
 			      struct snd_pcm_substream *substream);
+int snd_soc_component_trigger(struct snd_soc_component *component,
+			      struct snd_pcm_substream *substream,
+			      int cmd);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index e2bc34efe547..cf0d20a877e6 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -336,3 +336,14 @@ int snd_soc_component_hw_free(struct snd_soc_component *component,
 
 	return 0;
 }
+
+int snd_soc_component_trigger(struct snd_soc_component *component,
+			      struct snd_pcm_substream *substream,
+			      int cmd)
+{
+	if (component->driver->ops &&
+	    component->driver->ops->trigger)
+		return component->driver->ops->trigger(substream, cmd);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 8c5289904f20..cd49c2d688c3 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1065,11 +1065,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->trigger)
-			continue;
-
-		ret = component->driver->ops->trigger(substream, cmd);
+		ret = snd_soc_component_trigger(component, substream, cmd);
 		if (ret < 0)
 			return ret;
 	}
-- 
cgit v1.2.3


From 66c51573b89d0a5c1089139a2f0dd029a755c37d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:50:34 +0900
Subject: ASoC: soc-component: add snd_soc_component_suspend()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_suspend() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/877e855rn0.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 1 +
 sound/soc/soc-component.c     | 7 +++++++
 sound/soc/soc-core.c          | 4 +---
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 302e27a89d47..cdb014f41fd1 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -353,5 +353,6 @@ int snd_soc_component_hw_free(struct snd_soc_component *component,
 int snd_soc_component_trigger(struct snd_soc_component *component,
 			      struct snd_pcm_substream *substream,
 			      int cmd);
+void snd_soc_component_suspend(struct snd_soc_component *component);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index cf0d20a877e6..f0e63cd991c8 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -347,3 +347,10 @@ int snd_soc_component_trigger(struct snd_soc_component *component,
 
 	return 0;
 }
+
+void snd_soc_component_suspend(struct snd_soc_component *component)
+{
+	if (component->driver->suspend)
+		component->driver->suspend(component);
+	component->suspended = 1;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 9ba19efcc56c..855b19abc1d2 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -520,9 +520,7 @@ int snd_soc_suspend(struct device *dev)
 				/* fall through */
 
 			case SND_SOC_BIAS_OFF:
-				if (component->driver->suspend)
-					component->driver->suspend(component);
-				component->suspended = 1;
+				snd_soc_component_suspend(component);
 				if (component->regmap)
 					regcache_mark_dirty(component->regmap);
 				/* deactivate pins to sleep state */
-- 
cgit v1.2.3


From 9a840cbac77a90e8406296aaa132ebf2c84ed9e3 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:08 +0900
Subject: ASoC: soc-component: add snd_soc_component_resume()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_resume() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/875znp5rm2.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 1 +
 sound/soc/soc-component.c     | 7 +++++++
 sound/soc/soc-core.c          | 4 +---
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index cdb014f41fd1..1e3b70855ba7 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -354,5 +354,6 @@ int snd_soc_component_trigger(struct snd_soc_component *component,
 			      struct snd_pcm_substream *substream,
 			      int cmd);
 void snd_soc_component_suspend(struct snd_soc_component *component);
+void snd_soc_component_resume(struct snd_soc_component *component);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index f0e63cd991c8..cbae7672b72d 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -354,3 +354,10 @@ void snd_soc_component_suspend(struct snd_soc_component *component)
 		component->driver->suspend(component);
 	component->suspended = 1;
 }
+
+void snd_soc_component_resume(struct snd_soc_component *component)
+{
+	if (component->driver->resume)
+		component->driver->resume(component);
+	component->suspended = 0;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 855b19abc1d2..6cdfe7b2fe06 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -593,9 +593,7 @@ static void soc_resume_deferred(struct work_struct *work)
 
 	for_each_card_components(card, component) {
 		if (component->suspended) {
-			if (component->driver->resume)
-				component->driver->resume(component);
-			component->suspended = 0;
+			snd_soc_component_resume(component);
 		}
 	}
 
-- 
cgit v1.2.3


From e40fadbcef583808c11d2e86b8ac1c652731468e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:13 +0900
Subject: ASoC: soc-component: add snd_soc_component_is_suspended()

Current ALSA SoC is directly using component->xxx,
But, it is not good for encapsulation.
This patch adds new snd_soc_component_is_suspended() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/874l395rlx.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 1 +
 sound/soc/soc-component.c     | 5 +++++
 sound/soc/soc-core.c          | 5 ++---
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 1e3b70855ba7..9600dc4ca6b4 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -355,5 +355,6 @@ int snd_soc_component_trigger(struct snd_soc_component *component,
 			      int cmd);
 void snd_soc_component_suspend(struct snd_soc_component *component);
 void snd_soc_component_resume(struct snd_soc_component *component);
+int snd_soc_component_is_suspended(struct snd_soc_component *component);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index cbae7672b72d..0a9ca84d7ac6 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -361,3 +361,8 @@ void snd_soc_component_resume(struct snd_soc_component *component)
 		component->driver->resume(component);
 	component->suspended = 0;
 }
+
+int snd_soc_component_is_suspended(struct snd_soc_component *component)
+{
+	return component->suspended;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6cdfe7b2fe06..ea93edd328a2 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -503,7 +503,7 @@ int snd_soc_suspend(struct device *dev)
 		 * If there are paths active then the COMPONENT will be held
 		 * with bias _ON and should not be suspended.
 		 */
-		if (!component->suspended) {
+		if (!snd_soc_component_is_suspended(component)) {
 			switch (snd_soc_dapm_get_bias_level(dapm)) {
 			case SND_SOC_BIAS_STANDBY:
 				/*
@@ -592,9 +592,8 @@ static void soc_resume_deferred(struct work_struct *work)
 	}
 
 	for_each_card_components(card, component) {
-		if (component->suspended) {
+		if (snd_soc_component_is_suspended(component))
 			snd_soc_component_resume(component);
-		}
 	}
 
 	for_each_card_rtds(card, rtd) {
-- 
cgit v1.2.3


From 08e837dd9e39bd3e25b1fa1a13f6ba44040e3f0d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:17 +0900
Subject: ASoC: soc-component: add snd_soc_component_probe()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_probe() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/8736it5rlt.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  1 +
 sound/soc/soc-component.c     |  8 ++++++++
 sound/soc/soc-core.c          | 12 +++++-------
 3 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 9600dc4ca6b4..34e774efcf69 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -356,5 +356,6 @@ int snd_soc_component_trigger(struct snd_soc_component *component,
 void snd_soc_component_suspend(struct snd_soc_component *component);
 void snd_soc_component_resume(struct snd_soc_component *component);
 int snd_soc_component_is_suspended(struct snd_soc_component *component);
+int snd_soc_component_probe(struct snd_soc_component *component);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 0a9ca84d7ac6..b2bfc0375193 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -366,3 +366,11 @@ int snd_soc_component_is_suspended(struct snd_soc_component *component)
 {
 	return component->suspended;
 }
+
+int snd_soc_component_probe(struct snd_soc_component *component)
+{
+	if (component->driver->probe)
+		return component->driver->probe(component);
+
+	return 0;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index ea93edd328a2..6a422ddae130 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1282,13 +1282,11 @@ static int soc_probe_component(struct snd_soc_card *card,
 		}
 	}
 
-	if (component->driver->probe) {
-		ret = component->driver->probe(component);
-		if (ret < 0) {
-			dev_err(component->dev,
-				"ASoC: failed to probe component %d\n", ret);
-			goto err_probe;
-		}
+	ret = snd_soc_component_probe(component);
+	if (ret < 0) {
+		dev_err(component->dev,
+			"ASoC: failed to probe component %d\n", ret);
+		goto err_probe;
 	}
 	WARN(dapm->idle_bias_off &&
 	     dapm->bias_level != SND_SOC_BIAS_OFF,
-- 
cgit v1.2.3


From 03b34dd7d87ce3493cb1837c9e59c3b3aac4724f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:22 +0900
Subject: ASoC: soc-component: add snd_soc_component_remove()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_remove() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/871ryd5rlo.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 1 +
 sound/soc/soc-component.c     | 6 ++++++
 sound/soc/soc-core.c          | 3 +--
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 34e774efcf69..b8480d947901 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -357,5 +357,6 @@ void snd_soc_component_suspend(struct snd_soc_component *component);
 void snd_soc_component_resume(struct snd_soc_component *component);
 int snd_soc_component_is_suspended(struct snd_soc_component *component);
 int snd_soc_component_probe(struct snd_soc_component *component);
+void snd_soc_component_remove(struct snd_soc_component *component);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index b2bfc0375193..eba77ea2b62d 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -374,3 +374,9 @@ int snd_soc_component_probe(struct snd_soc_component *component)
 
 	return 0;
 }
+
+void snd_soc_component_remove(struct snd_soc_component *component)
+{
+	if (component->driver->remove)
+		component->driver->remove(component);
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6a422ddae130..6a6403ddf62d 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -929,8 +929,7 @@ static void soc_remove_component(struct snd_soc_component *component)
 	if (!component->card)
 		return;
 
-	if (component->driver->remove)
-		component->driver->remove(component);
+	snd_soc_component_remove(component);
 
 	soc_cleanup_component(component);
 }
-- 
cgit v1.2.3


From 2c7b1704819435d188c7697c6815f788bf9e6200 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:26 +0900
Subject: ASoC: soc-component: add snd_soc_component_of_xlate_dai_id()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_of_xlate_dai_id() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87zhl14d14.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h | 2 ++
 sound/soc/soc-component.c     | 9 +++++++++
 sound/soc/soc-core.c          | 5 ++---
 3 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index b8480d947901..3f4acd337c4a 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -358,5 +358,7 @@ void snd_soc_component_resume(struct snd_soc_component *component);
 int snd_soc_component_is_suspended(struct snd_soc_component *component);
 int snd_soc_component_probe(struct snd_soc_component *component);
 void snd_soc_component_remove(struct snd_soc_component *component);
+int snd_soc_component_of_xlate_dai_id(struct snd_soc_component *component,
+				      struct device_node *ep);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index eba77ea2b62d..faf49992f661 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -380,3 +380,12 @@ void snd_soc_component_remove(struct snd_soc_component *component)
 	if (component->driver->remove)
 		component->driver->remove(component);
 }
+
+int snd_soc_component_of_xlate_dai_id(struct snd_soc_component *component,
+				      struct device_node *ep)
+{
+	if (component->driver->of_xlate_dai_id)
+		return component->driver->of_xlate_dai_id(component, ep);
+
+	return -ENOTSUPP;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6a6403ddf62d..f63d09dd55f4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3334,9 +3334,8 @@ int snd_soc_get_dai_id(struct device_node *ep)
 	ret = -ENOTSUPP;
 	mutex_lock(&client_mutex);
 	component = soc_find_component(&dlc);
-	if (component &&
-	    component->driver->of_xlate_dai_id)
-		ret = component->driver->of_xlate_dai_id(component, ep);
+	if (component)
+		ret = snd_soc_component_of_xlate_dai_id(component, ep);
 	mutex_unlock(&client_mutex);
 
 	of_node_put(dlc.of_node);
-- 
cgit v1.2.3


From a2a341752558cc67d6fe5c8ada7c16f9c3690f89 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:31 +0900
Subject: ASoC: soc-component: add snd_soc_component_of_xlate_dai_name()

Current ALSA SoC is directly using component->driver->xxx,
thus, it is deep nested, and makes code difficult to read,
and is not good for encapsulation.
This patch adds new snd_soc_component_of_xlate_dai_name() and use it

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87y30l4d0z.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  3 +++
 sound/soc/soc-component.c     | 10 ++++++++++
 sound/soc/soc-core.c          |  7 ++-----
 3 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 3f4acd337c4a..3ed2c39e45c2 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -360,5 +360,8 @@ int snd_soc_component_probe(struct snd_soc_component *component);
 void snd_soc_component_remove(struct snd_soc_component *component);
 int snd_soc_component_of_xlate_dai_id(struct snd_soc_component *component,
 				      struct device_node *ep);
+int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
+					struct of_phandle_args *args,
+					const char **dai_name);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index faf49992f661..de1bc5196f67 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -389,3 +389,13 @@ int snd_soc_component_of_xlate_dai_id(struct snd_soc_component *component,
 
 	return -ENOTSUPP;
 }
+
+int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
+					struct of_phandle_args *args,
+					const char **dai_name)
+{
+	if (component->driver->of_xlate_dai_name)
+		return component->driver->of_xlate_dai_name(component,
+						     args, dai_name);
+	return -ENOTSUPP;
+}
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f63d09dd55f4..2f068c239f34 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3358,11 +3358,8 @@ int snd_soc_get_dai_name(struct of_phandle_args *args,
 		if (component_of_node != args->np)
 			continue;
 
-		if (pos->driver->of_xlate_dai_name) {
-			ret = pos->driver->of_xlate_dai_name(pos,
-							     args,
-							     dai_name);
-		} else {
+		ret = snd_soc_component_of_xlate_dai_name(pos, args, dai_name);
+		if (ret == -ENOTSUPP) {
 			struct snd_soc_dai *dai;
 			int id = -1;
 
-- 
cgit v1.2.3


From 9d415fbf773f162a5c274e671741c6fa94b74287 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:35 +0900
Subject: ASoC: soc-component: move snd_soc_component_seq_notifier()

Current soc-dapm / soc-core are using a long way round to call
.seq_notifier.

	if (driver->seq_notifier)
		dapm->seq_notifier = ...;
	...
	if (dapm->seq_notifier)
		ret = dapm->seq_notifier(...);

We can directly call it via driver->seq_notifier.
One note here is that both Card and Component have dapm,
but, Card's dapm doesn't have dapm->component.
We need to check it.

This patch moves snd_soc_component_seq_notifier() to soc-component.c,
and updates parameters.
dapm->seq_notifier is no longer needed

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87wog54d0v.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  3 +++
 include/sound/soc-dapm.h      |  2 --
 sound/soc/soc-component.c     |  7 +++++++
 sound/soc/soc-core.c          | 10 ----------
 sound/soc/soc-dapm.c          | 15 ++++++++-------
 5 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 3ed2c39e45c2..7ac903c1e33f 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -281,6 +281,9 @@ int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
 int snd_soc_component_set_jack(struct snd_soc_component *component,
 			       struct snd_soc_jack *jack, void *data);
 
+void snd_soc_component_seq_notifier(struct snd_soc_component *component,
+				    enum snd_soc_dapm_type type, int subseq);
+
 #ifdef CONFIG_REGMAP
 void snd_soc_component_init_regmap(struct snd_soc_component *component,
 				   struct regmap *regmap);
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 6c6694160130..a03db6f8faa8 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -661,8 +661,6 @@ struct snd_soc_dapm_context {
 	unsigned int idle_bias_off:1; /* Use BIAS_OFF instead of STANDBY */
 	/* Go to BIAS_OFF in suspend if the DAPM context is idle */
 	unsigned int suspend_bias_off:1;
-	void (*seq_notifier)(struct snd_soc_dapm_context *,
-			     enum snd_soc_dapm_type, int);
 
 	struct device *dev; /* from parent - for debug */
 	struct snd_soc_component *component; /* parent component */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index de1bc5196f67..ca0b28b1d918 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -52,6 +52,13 @@ int snd_soc_component_set_pll(struct snd_soc_component *component, int pll_id,
 }
 EXPORT_SYMBOL_GPL(snd_soc_component_set_pll);
 
+void snd_soc_component_seq_notifier(struct snd_soc_component *component,
+				    enum snd_soc_dapm_type type, int subseq)
+{
+	if (component->driver->seq_notifier)
+		component->driver->seq_notifier(component, type, subseq);
+}
+
 int snd_soc_component_enable_pin(struct snd_soc_component *component,
 				 const char *pin)
 {
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 2f068c239f34..c618fecc3d45 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2646,14 +2646,6 @@ int snd_soc_register_dai(struct snd_soc_component *component,
 }
 EXPORT_SYMBOL_GPL(snd_soc_register_dai);
 
-static void snd_soc_component_seq_notifier(struct snd_soc_dapm_context *dapm,
-	enum snd_soc_dapm_type type, int subseq)
-{
-	struct snd_soc_component *component = dapm->component;
-
-	component->driver->seq_notifier(component, type, subseq);
-}
-
 static int snd_soc_component_stream_event(struct snd_soc_dapm_context *dapm,
 	int event)
 {
@@ -2690,8 +2682,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component,
 	dapm->bias_level = SND_SOC_BIAS_OFF;
 	dapm->idle_bias_off = !driver->idle_bias_on;
 	dapm->suspend_bias_off = driver->suspend_bias_off;
-	if (driver->seq_notifier)
-		dapm->seq_notifier = snd_soc_component_seq_notifier;
 	if (driver->stream_event)
 		dapm->stream_event = snd_soc_component_stream_event;
 	if (driver->set_bias_level)
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index d93c1038fab0..0b60f688b433 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1611,12 +1611,12 @@ static void dapm_seq_run(struct snd_soc_card *card,
 			if (!list_empty(&pending))
 				dapm_seq_run_coalesced(card, &pending);
 
-			if (cur_dapm && cur_dapm->seq_notifier) {
+			if (cur_dapm && cur_dapm->component) {
 				for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++)
 					if (sort[i] == cur_sort)
-						cur_dapm->seq_notifier(cur_dapm,
-								       i,
-								       cur_subseq);
+						snd_soc_component_seq_notifier(
+							cur_dapm->component,
+							i, cur_subseq);
 			}
 
 			if (cur_dapm && w->dapm != cur_dapm)
@@ -1674,11 +1674,12 @@ static void dapm_seq_run(struct snd_soc_card *card,
 	if (!list_empty(&pending))
 		dapm_seq_run_coalesced(card, &pending);
 
-	if (cur_dapm && cur_dapm->seq_notifier) {
+	if (cur_dapm && cur_dapm->component) {
 		for (i = 0; i < ARRAY_SIZE(dapm_up_seq); i++)
 			if (sort[i] == cur_sort)
-				cur_dapm->seq_notifier(cur_dapm,
-						       i, cur_subseq);
+				snd_soc_component_seq_notifier(
+					cur_dapm->component,
+					i, cur_subseq);
 	}
 
 	list_for_each_entry(d, &card->dapm_list, list) {
-- 
cgit v1.2.3


From 8e2a990d76aced95c6f01c2d67d8835c86f0ca67 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:39 +0900
Subject: ASoC: soc-component: move snd_soc_component_stream_event()

Current soc-dapm / soc-core are using a long way round to call
.stream_event.

	if (driver->stream_event)
		dapm->stream_event = ...;
	...
	if (dapm->stream_event)
		ret = dapm->stream_event(...);

We can directly call it via driver->stream_event.
One note here is that both Card and Component have dapm,
but, Card's dapm doesn't have dapm->component.
We need to check it.

This patch moves snd_soc_component_stream_event() to soc-component.c
and updates parameters.
dapm->stream_event is no longer needed

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87v9vp4d0r.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 include/sound/soc-dapm.h      |  1 -
 sound/soc/soc-component.c     |  9 +++++++++
 sound/soc/soc-core.c          | 10 ----------
 sound/soc/soc-dapm.c          |  9 +++++++--
 5 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 7ac903c1e33f..1f84f04e2670 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -283,6 +283,8 @@ int snd_soc_component_set_jack(struct snd_soc_component *component,
 
 void snd_soc_component_seq_notifier(struct snd_soc_component *component,
 				    enum snd_soc_dapm_type type, int subseq);
+int snd_soc_component_stream_event(struct snd_soc_component *component,
+				   int event);
 
 #ifdef CONFIG_REGMAP
 void snd_soc_component_init_regmap(struct snd_soc_component *component,
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index a03db6f8faa8..c2f14a335891 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -670,7 +670,6 @@ struct snd_soc_dapm_context {
 	enum snd_soc_bias_level target_bias_level;
 	struct list_head list;
 
-	int (*stream_event)(struct snd_soc_dapm_context *dapm, int event);
 	int (*set_bias_level)(struct snd_soc_dapm_context *dapm,
 			      enum snd_soc_bias_level level);
 
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index ca0b28b1d918..f33dda8023ec 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -59,6 +59,15 @@ void snd_soc_component_seq_notifier(struct snd_soc_component *component,
 		component->driver->seq_notifier(component, type, subseq);
 }
 
+int snd_soc_component_stream_event(struct snd_soc_component *component,
+				   int event)
+{
+	if (component->driver->stream_event)
+		return component->driver->stream_event(component, event);
+
+	return 0;
+}
+
 int snd_soc_component_enable_pin(struct snd_soc_component *component,
 				 const char *pin)
 {
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index c618fecc3d45..8cfbe5fb5921 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2646,14 +2646,6 @@ int snd_soc_register_dai(struct snd_soc_component *component,
 }
 EXPORT_SYMBOL_GPL(snd_soc_register_dai);
 
-static int snd_soc_component_stream_event(struct snd_soc_dapm_context *dapm,
-	int event)
-{
-	struct snd_soc_component *component = dapm->component;
-
-	return component->driver->stream_event(component, event);
-}
-
 static int snd_soc_component_set_bias_level(struct snd_soc_dapm_context *dapm,
 					enum snd_soc_bias_level level)
 {
@@ -2682,8 +2674,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component,
 	dapm->bias_level = SND_SOC_BIAS_OFF;
 	dapm->idle_bias_off = !driver->idle_bias_on;
 	dapm->suspend_bias_off = driver->suspend_bias_off;
-	if (driver->stream_event)
-		dapm->stream_event = snd_soc_component_stream_event;
 	if (driver->set_bias_level)
 		dapm->set_bias_level = snd_soc_component_set_bias_level;
 
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 0b60f688b433..9288b2b43f98 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -1913,6 +1913,7 @@ static int dapm_power_widgets(struct snd_soc_card *card, int event)
 	LIST_HEAD(down_list);
 	ASYNC_DOMAIN_EXCLUSIVE(async_domain);
 	enum snd_soc_bias_level bias;
+	int ret;
 
 	lockdep_assert_held(&card->dapm_mutex);
 
@@ -2029,8 +2030,12 @@ static int dapm_power_widgets(struct snd_soc_card *card, int event)
 
 	/* do we need to notify any clients that DAPM event is complete */
 	list_for_each_entry(d, &card->dapm_list, list) {
-		if (d->stream_event)
-			d->stream_event(d, event);
+		if (!d->component)
+			continue;
+
+		ret = snd_soc_component_stream_event(d->component, event);
+		if (ret < 0)
+			return ret;
 	}
 
 	pop_dbg(card->dev, card->pop_time,
-- 
cgit v1.2.3


From 7951b14611851bdae18e9bca18015b1d84731d0d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:43 +0900
Subject: ASoC: soc-component: move snd_soc_component_set_bias_level()

Current soc-dapm / soc-core are using a long way round to call
.set_bias_level.

	if (driver->set_bias_level)
		dapm->set_bias_level = ...;
	...
	if (dapm->set_bias_level)
		ret = dapm->set_bias_level(...);

We can directly call it via driver->set_bias_level.
One note here is that both Card and Component have dapm,
but, Card's dapm doesn't have dapm->component.
We need to check it.

This patch moves snd_soc_component_set_bias_level() to soc-component.c
and updates parameters.
dapm->set_bias_level is no longer needed

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87tvb94d0n.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 include/sound/soc-dapm.h      |  3 ---
 sound/soc/soc-component.c     |  9 +++++++++
 sound/soc/soc-core.c          | 10 ----------
 sound/soc/soc-dapm.c          |  4 ++--
 5 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 1f84f04e2670..2aaf12bbbed0 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -285,6 +285,8 @@ void snd_soc_component_seq_notifier(struct snd_soc_component *component,
 				    enum snd_soc_dapm_type type, int subseq);
 int snd_soc_component_stream_event(struct snd_soc_component *component,
 				   int event);
+int snd_soc_component_set_bias_level(struct snd_soc_component *component,
+				     enum snd_soc_bias_level level);
 
 #ifdef CONFIG_REGMAP
 void snd_soc_component_init_regmap(struct snd_soc_component *component,
diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index c2f14a335891..2aa73d6dd7be 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -670,9 +670,6 @@ struct snd_soc_dapm_context {
 	enum snd_soc_bias_level target_bias_level;
 	struct list_head list;
 
-	int (*set_bias_level)(struct snd_soc_dapm_context *dapm,
-			      enum snd_soc_bias_level level);
-
 	struct snd_soc_dapm_wcache path_sink_cache;
 	struct snd_soc_dapm_wcache path_source_cache;
 
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index f33dda8023ec..cb63df6e46eb 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -68,6 +68,15 @@ int snd_soc_component_stream_event(struct snd_soc_component *component,
 	return 0;
 }
 
+int snd_soc_component_set_bias_level(struct snd_soc_component *component,
+				     enum snd_soc_bias_level level)
+{
+	if (component->driver->set_bias_level)
+		return component->driver->set_bias_level(component, level);
+
+	return 0;
+}
+
 int snd_soc_component_enable_pin(struct snd_soc_component *component,
 				 const char *pin)
 {
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 8cfbe5fb5921..0f75dac4bb26 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2646,14 +2646,6 @@ int snd_soc_register_dai(struct snd_soc_component *component,
 }
 EXPORT_SYMBOL_GPL(snd_soc_register_dai);
 
-static int snd_soc_component_set_bias_level(struct snd_soc_dapm_context *dapm,
-					enum snd_soc_bias_level level)
-{
-	struct snd_soc_component *component = dapm->component;
-
-	return component->driver->set_bias_level(component, level);
-}
-
 static int snd_soc_component_initialize(struct snd_soc_component *component,
 	const struct snd_soc_component_driver *driver, struct device *dev)
 {
@@ -2674,8 +2666,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component,
 	dapm->bias_level = SND_SOC_BIAS_OFF;
 	dapm->idle_bias_off = !driver->idle_bias_on;
 	dapm->suspend_bias_off = driver->suspend_bias_off;
-	if (driver->set_bias_level)
-		dapm->set_bias_level = snd_soc_component_set_bias_level;
 
 	INIT_LIST_HEAD(&component->dai_list);
 	mutex_init(&component->io_mutex);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 9288b2b43f98..d09bdca63c62 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -684,8 +684,8 @@ int snd_soc_dapm_force_bias_level(struct snd_soc_dapm_context *dapm,
 {
 	int ret = 0;
 
-	if (dapm->set_bias_level)
-		ret = dapm->set_bias_level(dapm, level);
+	if (dapm->component)
+		ret = snd_soc_component_set_bias_level(dapm->component, level);
 
 	if (ret == 0)
 		dapm->bias_level = level;
-- 
cgit v1.2.3


From 0035e2565b93e0902a06320ba1716bc1ddd753b3 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:47 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_pointer()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_pointer() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87sgqt4d0j.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 18 ++++++++++++++++++
 sound/soc/soc-pcm.c           | 13 +------------
 3 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 2aaf12bbbed0..38b4be1d99f5 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -371,4 +371,6 @@ int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
 					struct of_phandle_args *args,
 					const char **dai_name);
 
+int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream);
+
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index cb63df6e46eb..e2053c8bf1f0 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -424,3 +424,21 @@ int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
 						     args, dai_name);
 	return -ENOTSUPP;
 }
+
+int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_component *component;
+	struct snd_soc_rtdcom_list *rtdcom;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		/* FIXME: use 1st pointer */
+		if (component->driver->ops &&
+		    component->driver->ops->pointer)
+			return component->driver->ops->pointer(substream);
+	}
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index cd49c2d688c3..020e1d275076 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1111,8 +1111,6 @@ static int soc_pcm_bespoke_trigger(struct snd_pcm_substream *substream,
 static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream)
 {
 	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	struct snd_soc_dai *codec_dai;
 	struct snd_pcm_runtime *runtime = substream->runtime;
@@ -1124,17 +1122,8 @@ static snd_pcm_uframes_t soc_pcm_pointer(struct snd_pcm_substream *substream)
 	/* clearing the previous total delay */
 	runtime->delay = 0;
 
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
+	offset = snd_soc_pcm_component_pointer(substream);
 
-		if (!component->driver->ops ||
-		    !component->driver->ops->pointer)
-			continue;
-
-		/* FIXME: use 1st pointer */
-		offset = component->driver->ops->pointer(substream);
-		break;
-	}
 	/* base delay if assigned in pointer callback */
 	delay = runtime->delay;
 
-- 
cgit v1.2.3


From 96a47908d8769479f5217bf3f432ccdc06a29747 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:51 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_ioctrl()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_ioctrl() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87r26d4d0f.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 20 ++++++++++++++++++++
 sound/soc/soc-pcm.c           | 25 ++-----------------------
 3 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 38b4be1d99f5..5db4e5d028d0 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -372,5 +372,7 @@ int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
 					const char **dai_name);
 
 int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream);
+int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
+				unsigned int cmd, void *arg);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index e2053c8bf1f0..a6c0857a9e90 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -442,3 +442,23 @@ int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream)
 
 	return 0;
 }
+
+int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
+				unsigned int cmd, void *arg)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_component *component;
+	struct snd_soc_rtdcom_list *rtdcom;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		/* FIXME: use 1st ioctl */
+		if (component->driver->ops &&
+		    component->driver->ops->ioctl)
+			return component->driver->ops->ioctl(substream,
+							     cmd, arg);
+	}
+
+	return snd_pcm_lib_ioctl(substream, cmd, arg);
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 020e1d275076..12377b8d41c2 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2453,27 +2453,6 @@ out:
 	return ret;
 }
 
-static int soc_pcm_ioctl(struct snd_pcm_substream *substream,
-		     unsigned int cmd, void *arg)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_component *component;
-	struct snd_soc_rtdcom_list *rtdcom;
-
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (!component->driver->ops ||
-		    !component->driver->ops->ioctl)
-			continue;
-
-		/* FIXME: use 1st ioctl */
-		return component->driver->ops->ioctl(substream, cmd, arg);
-	}
-
-	return snd_pcm_lib_ioctl(substream, cmd, arg);
-}
-
 static int dpcm_run_update_shutdown(struct snd_soc_pcm_runtime *fe, int stream)
 {
 	struct snd_pcm_substream *substream =
@@ -3013,7 +2992,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 		rtd->ops.hw_free	= dpcm_fe_dai_hw_free;
 		rtd->ops.close		= dpcm_fe_dai_close;
 		rtd->ops.pointer	= soc_pcm_pointer;
-		rtd->ops.ioctl		= soc_pcm_ioctl;
+		rtd->ops.ioctl		= snd_soc_pcm_component_ioctl;
 	} else {
 		rtd->ops.open		= soc_pcm_open;
 		rtd->ops.hw_params	= soc_pcm_hw_params;
@@ -3022,7 +3001,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 		rtd->ops.hw_free	= soc_pcm_hw_free;
 		rtd->ops.close		= soc_pcm_close;
 		rtd->ops.pointer	= soc_pcm_pointer;
-		rtd->ops.ioctl		= soc_pcm_ioctl;
+		rtd->ops.ioctl		= snd_soc_pcm_component_ioctl;
 	}
 
 	for_each_rtdcom(rtd, rtdcom) {
-- 
cgit v1.2.3


From 82d81f5cced36e480b581ae51c595e2deb9f2d56 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:51:56 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_copy_user()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_copy_user() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87pnlx4d0a.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  3 +++
 sound/soc/soc-component.c     | 21 +++++++++++++++++++++
 sound/soc/soc-pcm.c           | 25 +------------------------
 3 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 5db4e5d028d0..6b95d2467053 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -374,5 +374,8 @@ int snd_soc_component_of_xlate_dai_name(struct snd_soc_component *component,
 int snd_soc_pcm_component_pointer(struct snd_pcm_substream *substream);
 int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 				unsigned int cmd, void *arg);
+int snd_soc_pcm_component_copy_user(struct snd_pcm_substream *substream,
+				    int channel, unsigned long pos,
+				    void __user *buf, unsigned long bytes);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index a6c0857a9e90..20897dce1bec 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -462,3 +462,24 @@ int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 
 	return snd_pcm_lib_ioctl(substream, cmd, arg);
 }
+
+int snd_soc_pcm_component_copy_user(struct snd_pcm_substream *substream,
+				    int channel, unsigned long pos,
+				    void __user *buf, unsigned long bytes)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		/* FIXME. it returns 1st copy now */
+		if (component->driver->ops &&
+		    component->driver->ops->copy_user)
+			return component->driver->ops->copy_user(
+				substream, channel, pos, buf, bytes);
+	}
+
+	return -EINVAL;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 12377b8d41c2..b0e6ce89b012 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2818,29 +2818,6 @@ static void soc_pcm_private_free(struct snd_pcm *pcm)
 	}
 }
 
-static int soc_rtdcom_copy_user(struct snd_pcm_substream *substream, int channel,
-				unsigned long pos, void __user *buf,
-				unsigned long bytes)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_rtdcom_list *rtdcom;
-	struct snd_soc_component *component;
-
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (!component->driver->ops ||
-		    !component->driver->ops->copy_user)
-			continue;
-
-		/* FIXME. it returns 1st copy now */
-		return component->driver->ops->copy_user(substream, channel,
-							 pos, buf, bytes);
-	}
-
-	return -EINVAL;
-}
-
 static struct page *soc_rtdcom_page(struct snd_pcm_substream *substream,
 				    unsigned long offset)
 {
@@ -3011,7 +2988,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 			continue;
 
 		if (ops->copy_user)
-			rtd->ops.copy_user	= soc_rtdcom_copy_user;
+			rtd->ops.copy_user	= snd_soc_pcm_component_copy_user;
 		if (ops->page)
 			rtd->ops.page		= soc_rtdcom_page;
 		if (ops->mmap)
-- 
cgit v1.2.3


From 9c712e4f57229081e837d593fc1e4183b068a41c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:52:00 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_page()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_page() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87o91h4d06.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 23 +++++++++++++++++++++++
 sound/soc/soc-pcm.c           | 26 +-------------------------
 3 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 6b95d2467053..4cab257962a6 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -377,5 +377,7 @@ int snd_soc_pcm_component_ioctl(struct snd_pcm_substream *substream,
 int snd_soc_pcm_component_copy_user(struct snd_pcm_substream *substream,
 				    int channel, unsigned long pos,
 				    void __user *buf, unsigned long bytes);
+struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
+					unsigned long offset);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 20897dce1bec..d503bc9b0850 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -483,3 +483,26 @@ int snd_soc_pcm_component_copy_user(struct snd_pcm_substream *substream,
 
 	return -EINVAL;
 }
+
+struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
+					unsigned long offset)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+	struct page *page;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		/* FIXME. it returns 1st page now */
+		if (component->driver->ops &&
+		    component->driver->ops->page) {
+			page = component->driver->ops->page(substream, offset);
+			if (page)
+				return page;
+		}
+	}
+
+	return NULL;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index b0e6ce89b012..fe34f2e5d75e 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2818,30 +2818,6 @@ static void soc_pcm_private_free(struct snd_pcm *pcm)
 	}
 }
 
-static struct page *soc_rtdcom_page(struct snd_pcm_substream *substream,
-				    unsigned long offset)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_rtdcom_list *rtdcom;
-	struct snd_soc_component *component;
-	struct page *page;
-
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (!component->driver->ops ||
-		    !component->driver->ops->page)
-			continue;
-
-		/* FIXME. it returns 1st page now */
-		page = component->driver->ops->page(substream, offset);
-		if (page)
-			return page;
-	}
-
-	return NULL;
-}
-
 static int soc_rtdcom_mmap(struct snd_pcm_substream *substream,
 			   struct vm_area_struct *vma)
 {
@@ -2990,7 +2966,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 		if (ops->copy_user)
 			rtd->ops.copy_user	= snd_soc_pcm_component_copy_user;
 		if (ops->page)
-			rtd->ops.page		= soc_rtdcom_page;
+			rtd->ops.page		= snd_soc_pcm_component_page;
 		if (ops->mmap)
 			rtd->ops.mmap		= soc_rtdcom_mmap;
 	}
-- 
cgit v1.2.3


From 205875e1a12ef9c61e939db9ded90fe3f6352e75 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:52:04 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_mmap()

Current ALSA SoC is directly using component->driver->ops->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_mmap() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87muh14d02.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  2 ++
 sound/soc/soc-component.c     | 19 +++++++++++++++++++
 sound/soc/soc-pcm.c           | 23 +----------------------
 3 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 4cab257962a6..dd1ea5d71998 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -379,5 +379,7 @@ int snd_soc_pcm_component_copy_user(struct snd_pcm_substream *substream,
 				    void __user *buf, unsigned long bytes);
 struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
 					unsigned long offset);
+int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
+			       struct vm_area_struct *vma);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index d503bc9b0850..2aff1b087522 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -506,3 +506,22 @@ struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
 
 	return NULL;
 }
+
+int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
+			       struct vm_area_struct *vma)
+{
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		/* FIXME. it returns 1st mmap now */
+		if (component->driver->ops &&
+		    component->driver->ops->mmap)
+			return component->driver->ops->mmap(substream, vma);
+	}
+
+	return -EINVAL;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index fe34f2e5d75e..7bbee0d71942 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2818,27 +2818,6 @@ static void soc_pcm_private_free(struct snd_pcm *pcm)
 	}
 }
 
-static int soc_rtdcom_mmap(struct snd_pcm_substream *substream,
-			   struct vm_area_struct *vma)
-{
-	struct snd_soc_pcm_runtime *rtd = substream->private_data;
-	struct snd_soc_rtdcom_list *rtdcom;
-	struct snd_soc_component *component;
-
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (!component->driver->ops ||
-		    !component->driver->ops->mmap)
-			continue;
-
-		/* FIXME. it returns 1st mmap now */
-		return component->driver->ops->mmap(substream, vma);
-	}
-
-	return -EINVAL;
-}
-
 /* create a new pcm */
 int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 {
@@ -2968,7 +2947,7 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 		if (ops->page)
 			rtd->ops.page		= snd_soc_pcm_component_page;
 		if (ops->mmap)
-			rtd->ops.mmap		= soc_rtdcom_mmap;
+			rtd->ops.mmap		= snd_soc_pcm_component_mmap;
 	}
 
 	if (playback)
-- 
cgit v1.2.3


From 7484291e9b7564af65b2581dcdebeeaf98bc86d0 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:52:08 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_pcm_new()

Current ALSA SoC is directly using component->driver->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_pcm() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87lfwl4czy.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  1 +
 sound/soc/soc-component.c     | 20 ++++++++++++++++++++
 sound/soc/soc-pcm.c           | 18 ++++--------------
 3 files changed, 25 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index dd1ea5d71998..d3048ad06582 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -381,5 +381,6 @@ struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
 					unsigned long offset);
 int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
 			       struct vm_area_struct *vma);
+int snd_soc_pcm_component_new(struct snd_pcm *pcm);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 2aff1b087522..ff13d901bbab 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -525,3 +525,23 @@ int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
 
 	return -EINVAL;
 }
+
+int snd_soc_pcm_component_new(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+	int ret;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		if (component->driver->pcm_new) {
+			ret = component->driver->pcm_new(rtd);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 7bbee0d71942..955c49fd3bda 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2823,7 +2823,6 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 {
 	struct snd_soc_dai *codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	struct snd_soc_component *component;
 	struct snd_soc_rtdcom_list *rtdcom;
 	struct snd_pcm *pcm;
 	char new_name[64];
@@ -2956,19 +2955,10 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 	if (capture)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &rtd->ops);
 
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (!component->driver->pcm_new)
-			continue;
-
-		ret = component->driver->pcm_new(rtd);
-		if (ret < 0) {
-			dev_err(component->dev,
-				"ASoC: pcm constructor failed: %d\n",
-				ret);
-			return ret;
-		}
+	ret = snd_soc_pcm_component_new(pcm);
+	if (ret < 0) {
+		dev_err(rtd->dev, "ASoC: pcm constructor failed: %d\n", ret);
+		return ret;
 	}
 
 	pcm->private_free = soc_pcm_private_free;
-- 
cgit v1.2.3


From 79776da0989733a5bac0a1e635e3a284c3f5c745 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 26 Jul 2019 13:52:12 +0900
Subject: ASoC: soc-component: add snd_soc_pcm_component_pcm_free()

Current ALSA SoC is directly using component->driver->xxx,
thus, the code nested deeply, and it makes code difficult to read,
and is not good for encapsulation.

We want to implement component related function at soc-component.c,
but, some of them need to care whole snd_soc_pcm_runtime (= rtd)
connected component.

Let's call component related function which need to care with
for_each_rtdcom() loop as snd_soc_pcm_component_xxx().
This patch adds new snd_soc_pcm_component_pcm_free() and use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87k1c54czu.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-component.h |  1 +
 sound/soc/soc-component.c     | 14 ++++++++++++++
 sound/soc/soc-pcm.c           |  9 +--------
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index d3048ad06582..5d80b2eef525 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -382,5 +382,6 @@ struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
 int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
 			       struct vm_area_struct *vma);
 int snd_soc_pcm_component_new(struct snd_pcm *pcm);
+void snd_soc_pcm_component_free(struct snd_pcm *pcm);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index ff13d901bbab..79ffc2820ba9 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -545,3 +545,17 @@ int snd_soc_pcm_component_new(struct snd_pcm *pcm)
 
 	return 0;
 }
+
+void snd_soc_pcm_component_free(struct snd_pcm *pcm)
+{
+	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
+	struct snd_soc_rtdcom_list *rtdcom;
+	struct snd_soc_component *component;
+
+	for_each_rtdcom(rtd, rtdcom) {
+		component = rtdcom->component;
+
+		if (component->driver->pcm_free)
+			component->driver->pcm_free(pcm);
+	}
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 955c49fd3bda..77c986fe08d0 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2805,17 +2805,10 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream)
 static void soc_pcm_private_free(struct snd_pcm *pcm)
 {
 	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_soc_rtdcom_list *rtdcom;
-	struct snd_soc_component *component;
 
 	/* need to sync the delayed work before releasing resources */
 	flush_delayed_work(&rtd->delayed_work);
-	for_each_rtdcom(rtd, rtdcom) {
-		component = rtdcom->component;
-
-		if (component->driver->pcm_free)
-			component->driver->pcm_free(pcm);
-	}
+	snd_soc_pcm_component_free(pcm);
 }
 
 /* create a new pcm */
-- 
cgit v1.2.3


From 9cd15d521a3adcb687a0f9a312e32caaa94f44c2 Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@st.com>
Date: Fri, 2 Aug 2019 10:59:13 -0700
Subject: iio: remove get_irq_data_ready() function pointer and use IRQ number
 directly

Not even sure why it was there since the beginning. Just use IRQ
number in the sensor_data struct.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c                  |  7 +++----
 drivers/iio/common/st_sensors/st_sensors_i2c.c     |  9 +--------
 drivers/iio/common/st_sensors/st_sensors_spi.c     |  9 +--------
 drivers/iio/common/st_sensors/st_sensors_trigger.c | 21 ++++++++++-----------
 drivers/iio/gyro/st_gyro_core.c                    |  7 +++----
 drivers/iio/magnetometer/st_magn_core.c            |  7 +++----
 drivers/iio/pressure/st_pressure_core.c            |  7 +++----
 include/linux/iio/common/st_sensors.h              |  5 ++---
 8 files changed, 26 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 0b17004cb12e..2e37f8a6d8cf 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1169,7 +1169,6 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	struct st_sensor_data *adata = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata =
 		(struct st_sensors_platform_data *)adata->dev->platform_data;
-	int irq = adata->get_irq_data_ready(indio_dev);
 	struct iio_chan_spec *channels;
 	size_t channels_size;
 	int err;
@@ -1217,7 +1216,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	if (err < 0)
 		goto st_accel_power_off;
 
-	if (irq > 0) {
+	if (adata->irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
 						 ST_ACCEL_TRIGGER_OPS);
 		if (err < 0)
@@ -1234,7 +1233,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	return 0;
 
 st_accel_device_register_error:
-	if (irq > 0)
+	if (adata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 st_accel_probe_trigger_error:
 	st_accel_deallocate_ring(indio_dev);
@@ -1252,7 +1251,7 @@ void st_accel_common_remove(struct iio_dev *indio_dev)
 	st_sensors_power_disable(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	if (adata->get_irq_data_ready(indio_dev) > 0)
+	if (adata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 
 	st_accel_deallocate_ring(indio_dev);
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
index 9240625534df..aa89d54a7c59 100644
--- a/drivers/iio/common/st_sensors/st_sensors_i2c.c
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -20,13 +20,6 @@
 
 #define ST_SENSORS_I2C_MULTIREAD	0x80
 
-static unsigned int st_sensors_i2c_get_irq(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-
-	return to_i2c_client(sdata->dev)->irq;
-}
-
 static const struct regmap_config st_sensors_i2c_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -69,7 +62,7 @@ int st_sensors_i2c_configure(struct iio_dev *indio_dev,
 	indio_dev->name = client->name;
 
 	sdata->dev = &client->dev;
-	sdata->get_irq_data_ready = st_sensors_i2c_get_irq;
+	sdata->irq = client->irq;
 
 	return 0;
 }
diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c
index 9c0661a283d0..2262f81b07c2 100644
--- a/drivers/iio/common/st_sensors/st_sensors_spi.c
+++ b/drivers/iio/common/st_sensors/st_sensors_spi.c
@@ -18,13 +18,6 @@
 
 #define ST_SENSORS_SPI_MULTIREAD	0xc0
 
-static unsigned int st_sensors_spi_get_irq(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-
-	return to_spi_device(sdata->dev)->irq;
-}
-
 static const struct regmap_config st_sensors_spi_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -117,7 +110,7 @@ int st_sensors_spi_configure(struct iio_dev *indio_dev,
 	indio_dev->name = spi->modalias;
 
 	sdata->dev = &spi->dev;
-	sdata->get_irq_data_ready = st_sensors_spi_get_irq;
+	sdata->irq = spi->irq;
 
 	return 0;
 }
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index bed7b8682b17..4a2efa00f7f2 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -121,9 +121,9 @@ static irqreturn_t st_sensors_irq_thread(int irq, void *p)
 int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
 {
-	int err, irq;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 	unsigned long irq_trig;
+	int err;
 
 	sdata->trig = iio_trigger_alloc("%s-trigger", indio_dev->name);
 	if (sdata->trig == NULL) {
@@ -135,8 +135,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	sdata->trig->ops = trigger_ops;
 	sdata->trig->dev.parent = sdata->dev;
 
-	irq = sdata->get_irq_data_ready(indio_dev);
-	irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
+	irq_trig = irqd_get_trigger_type(irq_get_irq_data(sdata->irq));
 	/*
 	 * If the IRQ is triggered on falling edge, we need to mark the
 	 * interrupt as active low, if the hardware supports this.
@@ -206,12 +205,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.stat_drdy.addr)
 		irq_trig |= IRQF_SHARED;
 
-	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
-			st_sensors_irq_handler,
-			st_sensors_irq_thread,
-			irq_trig,
-			sdata->trig->name,
-			sdata->trig);
+	err = request_threaded_irq(sdata->irq,
+				   st_sensors_irq_handler,
+				   st_sensors_irq_thread,
+				   irq_trig,
+				   sdata->trig->name,
+				   sdata->trig);
 	if (err) {
 		dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
 		goto iio_trigger_free;
@@ -227,7 +226,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	return 0;
 
 iio_trigger_register_error:
-	free_irq(sdata->get_irq_data_ready(indio_dev), sdata->trig);
+	free_irq(sdata->irq, sdata->trig);
 iio_trigger_free:
 	iio_trigger_free(sdata->trig);
 	return err;
@@ -239,7 +238,7 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	iio_trigger_unregister(sdata->trig);
-	free_irq(sdata->get_irq_data_ready(indio_dev), sdata->trig);
+	free_irq(sdata->irq, sdata->trig);
 	iio_trigger_free(sdata->trig);
 }
 EXPORT_SYMBOL(st_sensors_deallocate_trigger);
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 02e42c945181..c0acbb5d2ffb 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -388,7 +388,6 @@ EXPORT_SYMBOL(st_gyro_get_settings);
 int st_gyro_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *gdata = iio_priv(indio_dev);
-	int irq = gdata->get_irq_data_ready(indio_dev);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -419,7 +418,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	if (err < 0)
 		goto st_gyro_power_off;
 
-	if (irq > 0) {
+	if (gdata->irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
 						  ST_GYRO_TRIGGER_OPS);
 		if (err < 0)
@@ -436,7 +435,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	return 0;
 
 st_gyro_device_register_error:
-	if (irq > 0)
+	if (gdata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 st_gyro_probe_trigger_error:
 	st_gyro_deallocate_ring(indio_dev);
@@ -454,7 +453,7 @@ void st_gyro_common_remove(struct iio_dev *indio_dev)
 	st_sensors_power_disable(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	if (gdata->get_irq_data_ready(indio_dev) > 0)
+	if (gdata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 
 	st_gyro_deallocate_ring(indio_dev);
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 804353a483c7..a3a268ee2896 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -490,7 +490,6 @@ EXPORT_SYMBOL(st_magn_get_settings);
 int st_magn_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
-	int irq = mdata->get_irq_data_ready(indio_dev);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -520,7 +519,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 	if (err < 0)
 		goto st_magn_power_off;
 
-	if (irq > 0) {
+	if (mdata->irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
 						ST_MAGN_TRIGGER_OPS);
 		if (err < 0)
@@ -537,7 +536,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 	return 0;
 
 st_magn_device_register_error:
-	if (irq > 0)
+	if (mdata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 st_magn_probe_trigger_error:
 	st_magn_deallocate_ring(indio_dev);
@@ -555,7 +554,7 @@ void st_magn_common_remove(struct iio_dev *indio_dev)
 	st_sensors_power_disable(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	if (mdata->get_irq_data_ready(indio_dev) > 0)
+	if (mdata->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 
 	st_magn_deallocate_ring(indio_dev);
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 9ef92a501286..ca6863b32a5f 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -686,7 +686,6 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	struct st_sensor_data *press_data = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata =
 		(struct st_sensors_platform_data *)press_data->dev->platform_data;
-	int irq = press_data->get_irq_data_ready(indio_dev);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -729,7 +728,7 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	if (err < 0)
 		goto st_press_power_off;
 
-	if (irq > 0) {
+	if (press_data->irq > 0) {
 		err = st_sensors_allocate_trigger(indio_dev,
 						  ST_PRESS_TRIGGER_OPS);
 		if (err < 0)
@@ -746,7 +745,7 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 	return err;
 
 st_press_device_register_error:
-	if (irq > 0)
+	if (press_data->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 st_press_probe_trigger_error:
 	st_press_deallocate_ring(indio_dev);
@@ -764,7 +763,7 @@ void st_press_common_remove(struct iio_dev *indio_dev)
 	st_sensors_power_disable(indio_dev);
 
 	iio_device_unregister(indio_dev);
-	if (press_data->get_irq_data_ready(indio_dev) > 0)
+	if (press_data->irq > 0)
 		st_sensors_deallocate_trigger(indio_dev);
 
 	st_press_deallocate_ring(indio_dev);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 28fc1f9fa7d5..4d0889bf1c6c 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -220,7 +220,7 @@ struct st_sensor_settings {
  * num_data_channels: Number of data channels used in buffer.
  * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2).
  * @int_pin_open_drain: Set the interrupt/DRDY to open drain.
- * @get_irq_data_ready: Function to get the IRQ used for data ready signal.
+ * @irq: the IRQ number.
  * @edge_irq: the IRQ triggers on edges and need special handling.
  * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
  * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
@@ -244,8 +244,7 @@ struct st_sensor_data {
 
 	u8 drdy_int_pin;
 	bool int_pin_open_drain;
-
-	unsigned int (*get_irq_data_ready) (struct iio_dev *indio_dev);
+	int irq;
 
 	bool edge_irq;
 	bool hw_irq_trigger;
-- 
cgit v1.2.3


From 20cf4e026730104892fa1268de0371a631cee294 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 29 Jul 2019 13:22:09 -0400
Subject: rdma: Enable ib_alloc_cq to spread work over a device's comp_vectors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Send and Receive completion is handled on a single CPU selected at
the time each Completion Queue is allocated. Typically this is when
an initiator instantiates an RDMA transport, or when a target
accepts an RDMA connection.

Some ULPs cannot open a connection per CPU to spread completion
workload across available CPUs and MSI vectors. For such ULPs,
provide an API that allows the RDMA core to select a completion
vector based on the device's complement of available comp_vecs.

ULPs that invoke ib_alloc_cq() with only comp_vector 0 are converted
to use the new API so that their completion workloads interfere less
with each other.

Suggested-by: Håkon Bugge <haakon.bugge@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Cc: <linux-cifs@vger.kernel.org>
Cc: <v9fs-developer@lists.sourceforge.net>
Link: https://lore.kernel.org/r/20190729171923.13428.52555.stgit@manet.1015granger.net
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/cq.c             | 28 ++++++++++++++++++++++++++++
 drivers/infiniband/ulp/srpt/ib_srpt.c    |  4 ++--
 fs/cifs/smbdirect.c                      | 10 ++++++----
 include/rdma/ib_verbs.h                  | 19 +++++++++++++++++++
 net/9p/trans_rdma.c                      |  6 +++---
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  8 ++++----
 net/sunrpc/xprtrdma/verbs.c              | 13 ++++++-------
 7 files changed, 68 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index 7c599878ccf7..bbfded6d5d3d 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -252,6 +252,34 @@ out_free_cq:
 }
 EXPORT_SYMBOL(__ib_alloc_cq_user);
 
+/**
+ * __ib_alloc_cq_any - allocate a completion queue
+ * @dev:		device to allocate the CQ for
+ * @private:		driver private data, accessible from cq->cq_context
+ * @nr_cqe:		number of CQEs to allocate
+ * @poll_ctx:		context to poll the CQ from
+ * @caller:		module owner name
+ *
+ * Attempt to spread ULP Completion Queues over each device's interrupt
+ * vectors. A simple best-effort mechanism is used.
+ */
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+				int nr_cqe, enum ib_poll_context poll_ctx,
+				const char *caller)
+{
+	static atomic_t counter;
+	int comp_vector = 0;
+
+	if (dev->num_comp_vectors > 1)
+		comp_vector =
+			atomic_inc_return(&counter) %
+			min_t(int, dev->num_comp_vectors, num_online_cpus());
+
+	return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+				  caller, NULL);
+}
+EXPORT_SYMBOL(__ib_alloc_cq_any);
+
 /**
  * ib_free_cq_user - free a completion queue
  * @cq:		completion queue to free.
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1a039f16d315..e25c70a56be6 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1767,8 +1767,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
 		goto out;
 
 retry:
-	ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + sq_size,
-			0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
+	ch->cq = ib_alloc_cq_any(sdev->device, ch, ch->rq_size + sq_size,
+				 IB_POLL_WORKQUEUE);
 	if (IS_ERR(ch->cq)) {
 		ret = PTR_ERR(ch->cq);
 		pr_err("failed to create CQ cqe= %d ret= %d\n",
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index cd07e5301d42..3c91fa97c9a8 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1654,15 +1654,17 @@ static struct smbd_connection *_smbd_get_connection(
 
 	info->send_cq = NULL;
 	info->recv_cq = NULL;
-	info->send_cq = ib_alloc_cq(info->id->device, info,
-			info->send_credit_target, 0, IB_POLL_SOFTIRQ);
+	info->send_cq =
+		ib_alloc_cq_any(info->id->device, info,
+				info->send_credit_target, IB_POLL_SOFTIRQ);
 	if (IS_ERR(info->send_cq)) {
 		info->send_cq = NULL;
 		goto alloc_cq_failed;
 	}
 
-	info->recv_cq = ib_alloc_cq(info->id->device, info,
-			info->receive_credit_max, 0, IB_POLL_SOFTIRQ);
+	info->recv_cq =
+		ib_alloc_cq_any(info->id->device, info,
+				info->receive_credit_max, IB_POLL_SOFTIRQ);
 	if (IS_ERR(info->recv_cq)) {
 		info->recv_cq = NULL;
 		goto alloc_cq_failed;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c5f8a9f17063..2a1523ccd7ab 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3711,6 +3711,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
 				NULL);
 }
 
+struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private,
+				int nr_cqe, enum ib_poll_context poll_ctx,
+				const char *caller);
+
+/**
+ * ib_alloc_cq_any: Allocate kernel CQ
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @poll_ctx: Context used for polling the CQ
+ */
+static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev,
+					    void *private, int nr_cqe,
+					    enum ib_poll_context poll_ctx)
+{
+	return __ib_alloc_cq_any(dev, private, nr_cqe, poll_ctx,
+				 KBUILD_MODNAME);
+}
+
 /**
  * ib_free_cq_user - Free kernel/user CQ
  * @cq: The CQ to free
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index bac8dad5dd69..b21c3c209815 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -685,9 +685,9 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
 		goto error;
 
 	/* Create the Completion Queue */
-	rdma->cq = ib_alloc_cq(rdma->cm_id->device, client,
-			opts.sq_depth + opts.rq_depth + 1,
-			0, IB_POLL_SOFTIRQ);
+	rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
+				   opts.sq_depth + opts.rq_depth + 1,
+				   IB_POLL_SOFTIRQ);
 	if (IS_ERR(rdma->cq))
 		goto error;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3fe665152d95..4d3db6ee7f09 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -454,14 +454,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		dprintk("svcrdma: error creating PD for connect request\n");
 		goto errout;
 	}
-	newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
-					0, IB_POLL_WORKQUEUE);
+	newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
+					    IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_sq_cq)) {
 		dprintk("svcrdma: error creating SQ CQ for connect request\n");
 		goto errout;
 	}
-	newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
-					0, IB_POLL_WORKQUEUE);
+	newxprt->sc_rq_cq =
+		ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
 	if (IS_ERR(newxprt->sc_rq_cq)) {
 		dprintk("svcrdma: error creating RQ CQ for connect request\n");
 		goto errout;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 805b1f35e1ca..b10aa16557f0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -521,18 +521,17 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
 	init_waitqueue_head(&ep->rep_connect_wait);
 	ep->rep_receive_count = 0;
 
-	sendcq = ib_alloc_cq(ia->ri_id->device, NULL,
-			     ep->rep_attr.cap.max_send_wr + 1,
-			     ia->ri_id->device->num_comp_vectors > 1 ? 1 : 0,
-			     IB_POLL_WORKQUEUE);
+	sendcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+				 ep->rep_attr.cap.max_send_wr + 1,
+				 IB_POLL_WORKQUEUE);
 	if (IS_ERR(sendcq)) {
 		rc = PTR_ERR(sendcq);
 		goto out1;
 	}
 
-	recvcq = ib_alloc_cq(ia->ri_id->device, NULL,
-			     ep->rep_attr.cap.max_recv_wr + 1,
-			     0, IB_POLL_WORKQUEUE);
+	recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL,
+				 ep->rep_attr.cap.max_recv_wr + 1,
+				 IB_POLL_WORKQUEUE);
 	if (IS_ERR(recvcq)) {
 		rc = PTR_ERR(recvcq);
 		goto out2;
-- 
cgit v1.2.3


From 69bb18ddfc4331ba1dea9db811caf93e95726408 Mon Sep 17 00:00:00 2001
From: Wu Hao <hao.wu@intel.com>
Date: Sun, 4 Aug 2019 18:20:11 +0800
Subject: fpga: dfl: fme: add DFL_FPGA_FME_PORT_RELEASE/ASSIGN ioctl support.

In order to support virtualization usage via PCIe SRIOV, this patch
adds two ioctls under FPGA Management Engine (FME) to release and
assign back the port device. In order to safely turn Port from PF
into VF and enable PCIe SRIOV, it requires user to invoke this
PORT_RELEASE ioctl to release port firstly to remove userspace
interfaces, and then configure the PF/VF access register in FME.
After disable SRIOV, it requires user to invoke this PORT_ASSIGN
ioctl to attach the port back to PF.

 Ioctl interfaces:
 * DFL_FPGA_FME_PORT_RELEASE
   Release platform device of given port, it deletes port platform
   device to remove related userspace interfaces on PF. After this
   function, then it's safe to configure PF/VF access mode to VF,
   and enable VFs via SRIOV.

 * DFL_FPGA_FME_PORT_ASSIGN
   Assign platform device of given port back to PF. After configure
   PF/VF access mode to PF, this ioctl adds port platform device
   back to re-enable related userspace interfaces on PF.

Signed-off-by: Zhang Yi Z <yi.z.zhang@intel.com>
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
Signed-off-by: Wu Hao <hao.wu@intel.com>
Acked-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Link: https://lore.kernel.org/r/1564914022-3710-2-git-send-email-hao.wu@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/fpga/dfl-fme-main.c   |  42 ++++++++++++++++
 drivers/fpga/dfl.c            | 113 +++++++++++++++++++++++++++++++++++++-----
 drivers/fpga/dfl.h            |  10 ++++
 include/uapi/linux/fpga-dfl.h |  18 +++++++
 4 files changed, 171 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/fpga/dfl-fme-main.c b/drivers/fpga/dfl-fme-main.c
index 0be4635583d5..dfea2dee78c3 100644
--- a/drivers/fpga/dfl-fme-main.c
+++ b/drivers/fpga/dfl-fme-main.c
@@ -16,6 +16,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/uaccess.h>
 #include <linux/fpga-dfl.h>
 
 #include "dfl.h"
@@ -104,9 +105,50 @@ static void fme_hdr_uinit(struct platform_device *pdev,
 	device_remove_groups(&pdev->dev, fme_hdr_groups);
 }
 
+static long fme_hdr_ioctl_release_port(struct dfl_feature_platform_data *pdata,
+				       unsigned long arg)
+{
+	struct dfl_fpga_cdev *cdev = pdata->dfl_cdev;
+	int port_id;
+
+	if (get_user(port_id, (int __user *)arg))
+		return -EFAULT;
+
+	return dfl_fpga_cdev_release_port(cdev, port_id);
+}
+
+static long fme_hdr_ioctl_assign_port(struct dfl_feature_platform_data *pdata,
+				      unsigned long arg)
+{
+	struct dfl_fpga_cdev *cdev = pdata->dfl_cdev;
+	int port_id;
+
+	if (get_user(port_id, (int __user *)arg))
+		return -EFAULT;
+
+	return dfl_fpga_cdev_assign_port(cdev, port_id);
+}
+
+static long fme_hdr_ioctl(struct platform_device *pdev,
+			  struct dfl_feature *feature,
+			  unsigned int cmd, unsigned long arg)
+{
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+
+	switch (cmd) {
+	case DFL_FPGA_FME_PORT_RELEASE:
+		return fme_hdr_ioctl_release_port(pdata, arg);
+	case DFL_FPGA_FME_PORT_ASSIGN:
+		return fme_hdr_ioctl_assign_port(pdata, arg);
+	}
+
+	return -ENODEV;
+}
+
 static const struct dfl_feature_ops fme_hdr_ops = {
 	.init = fme_hdr_init,
 	.uinit = fme_hdr_uinit,
+	.ioctl = fme_hdr_ioctl,
 };
 
 static struct dfl_feature_driver fme_feature_drvs[] = {
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index 4b66aaa32b5a..70ffe8b4c157 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -231,16 +231,20 @@ EXPORT_SYMBOL_GPL(dfl_fpga_port_ops_del);
  */
 int dfl_fpga_check_port_id(struct platform_device *pdev, void *pport_id)
 {
-	struct dfl_fpga_port_ops *port_ops = dfl_fpga_port_ops_get(pdev);
-	int port_id;
+	struct dfl_feature_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct dfl_fpga_port_ops *port_ops;
+
+	if (pdata->id != FEATURE_DEV_ID_UNUSED)
+		return pdata->id == *(int *)pport_id;
 
+	port_ops = dfl_fpga_port_ops_get(pdev);
 	if (!port_ops || !port_ops->get_id)
 		return 0;
 
-	port_id = port_ops->get_id(pdev);
+	pdata->id = port_ops->get_id(pdev);
 	dfl_fpga_port_ops_put(port_ops);
 
-	return port_id == *(int *)pport_id;
+	return pdata->id == *(int *)pport_id;
 }
 EXPORT_SYMBOL_GPL(dfl_fpga_check_port_id);
 
@@ -474,6 +478,7 @@ static int build_info_commit_dev(struct build_feature_devs_info *binfo)
 	pdata->dev = fdev;
 	pdata->num = binfo->feature_num;
 	pdata->dfl_cdev = binfo->cdev;
+	pdata->id = FEATURE_DEV_ID_UNUSED;
 	mutex_init(&pdata->lock);
 	lockdep_set_class_and_name(&pdata->lock, &dfl_pdata_keys[type],
 				   dfl_pdata_key_strings[type]);
@@ -973,25 +978,27 @@ void dfl_fpga_feature_devs_remove(struct dfl_fpga_cdev *cdev)
 {
 	struct dfl_feature_platform_data *pdata, *ptmp;
 
-	remove_feature_devs(cdev);
-
 	mutex_lock(&cdev->lock);
-	if (cdev->fme_dev) {
-		/* the fme should be unregistered. */
-		WARN_ON(device_is_registered(cdev->fme_dev));
+	if (cdev->fme_dev)
 		put_device(cdev->fme_dev);
-	}
 
 	list_for_each_entry_safe(pdata, ptmp, &cdev->port_dev_list, node) {
 		struct platform_device *port_dev = pdata->dev;
 
-		/* the port should be unregistered. */
-		WARN_ON(device_is_registered(&port_dev->dev));
+		/* remove released ports */
+		if (!device_is_registered(&port_dev->dev)) {
+			dfl_id_free(feature_dev_id_type(port_dev),
+				    port_dev->id);
+			platform_device_put(port_dev);
+		}
+
 		list_del(&pdata->node);
 		put_device(&port_dev->dev);
 	}
 	mutex_unlock(&cdev->lock);
 
+	remove_feature_devs(cdev);
+
 	fpga_region_unregister(cdev->region);
 	devm_kfree(cdev->parent, cdev);
 }
@@ -1042,6 +1049,88 @@ static int __init dfl_fpga_init(void)
 	return ret;
 }
 
+/**
+ * dfl_fpga_cdev_release_port - release a port platform device
+ *
+ * @cdev: parent container device.
+ * @port_id: id of the port platform device.
+ *
+ * This function allows user to release a port platform device. This is a
+ * mandatory step before turn a port from PF into VF for SRIOV support.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int dfl_fpga_cdev_release_port(struct dfl_fpga_cdev *cdev, int port_id)
+{
+	struct platform_device *port_pdev;
+	int ret = -ENODEV;
+
+	mutex_lock(&cdev->lock);
+	port_pdev = __dfl_fpga_cdev_find_port(cdev, &port_id,
+					      dfl_fpga_check_port_id);
+	if (!port_pdev)
+		goto unlock_exit;
+
+	if (!device_is_registered(&port_pdev->dev)) {
+		ret = -EBUSY;
+		goto put_dev_exit;
+	}
+
+	ret = dfl_feature_dev_use_begin(dev_get_platdata(&port_pdev->dev));
+	if (ret)
+		goto put_dev_exit;
+
+	platform_device_del(port_pdev);
+	cdev->released_port_num++;
+put_dev_exit:
+	put_device(&port_pdev->dev);
+unlock_exit:
+	mutex_unlock(&cdev->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dfl_fpga_cdev_release_port);
+
+/**
+ * dfl_fpga_cdev_assign_port - assign a port platform device back
+ *
+ * @cdev: parent container device.
+ * @port_id: id of the port platform device.
+ *
+ * This function allows user to assign a port platform device back. This is
+ * a mandatory step after disable SRIOV support.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int dfl_fpga_cdev_assign_port(struct dfl_fpga_cdev *cdev, int port_id)
+{
+	struct platform_device *port_pdev;
+	int ret = -ENODEV;
+
+	mutex_lock(&cdev->lock);
+	port_pdev = __dfl_fpga_cdev_find_port(cdev, &port_id,
+					      dfl_fpga_check_port_id);
+	if (!port_pdev)
+		goto unlock_exit;
+
+	if (device_is_registered(&port_pdev->dev)) {
+		ret = -EBUSY;
+		goto put_dev_exit;
+	}
+
+	ret = platform_device_add(port_pdev);
+	if (ret)
+		goto put_dev_exit;
+
+	dfl_feature_dev_use_end(dev_get_platdata(&port_pdev->dev));
+	cdev->released_port_num--;
+put_dev_exit:
+	put_device(&port_pdev->dev);
+unlock_exit:
+	mutex_unlock(&cdev->lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dfl_fpga_cdev_assign_port);
+
 static void __exit dfl_fpga_exit(void)
 {
 	dfl_chardev_uinit();
diff --git a/drivers/fpga/dfl.h b/drivers/fpga/dfl.h
index a8b869e9e5b7..6f7855e57869 100644
--- a/drivers/fpga/dfl.h
+++ b/drivers/fpga/dfl.h
@@ -183,6 +183,8 @@ struct dfl_feature {
 
 #define DEV_STATUS_IN_USE	0
 
+#define FEATURE_DEV_ID_UNUSED	(-1)
+
 /**
  * struct dfl_feature_platform_data - platform data for feature devices
  *
@@ -191,6 +193,7 @@ struct dfl_feature {
  * @cdev: cdev of feature dev.
  * @dev: ptr to platform device linked with this platform data.
  * @dfl_cdev: ptr to container device.
+ * @id: id used for this feature device.
  * @disable_count: count for port disable.
  * @num: number for sub features.
  * @dev_status: dev status (e.g. DEV_STATUS_IN_USE).
@@ -203,6 +206,7 @@ struct dfl_feature_platform_data {
 	struct cdev cdev;
 	struct platform_device *dev;
 	struct dfl_fpga_cdev *dfl_cdev;
+	int id;
 	unsigned int disable_count;
 	unsigned long dev_status;
 	void *private;
@@ -373,6 +377,7 @@ void dfl_fpga_enum_info_free(struct dfl_fpga_enum_info *info);
  * @fme_dev: FME feature device under this container device.
  * @lock: mutex lock to protect the port device list.
  * @port_dev_list: list of all port feature devices under this container device.
+ * @released_port_num: released port number under this container device.
  */
 struct dfl_fpga_cdev {
 	struct device *parent;
@@ -380,6 +385,7 @@ struct dfl_fpga_cdev {
 	struct device *fme_dev;
 	struct mutex lock;
 	struct list_head port_dev_list;
+	int released_port_num;
 };
 
 struct dfl_fpga_cdev *
@@ -407,4 +413,8 @@ dfl_fpga_cdev_find_port(struct dfl_fpga_cdev *cdev, void *data,
 
 	return pdev;
 }
+
+int dfl_fpga_cdev_release_port(struct dfl_fpga_cdev *cdev, int port_id);
+int dfl_fpga_cdev_assign_port(struct dfl_fpga_cdev *cdev, int port_id);
+
 #endif /* __FPGA_DFL_H */
diff --git a/include/uapi/linux/fpga-dfl.h b/include/uapi/linux/fpga-dfl.h
index 2e324e515c41..ec70a0746e59 100644
--- a/include/uapi/linux/fpga-dfl.h
+++ b/include/uapi/linux/fpga-dfl.h
@@ -176,4 +176,22 @@ struct dfl_fpga_fme_port_pr {
 
 #define DFL_FPGA_FME_PORT_PR	_IO(DFL_FPGA_MAGIC, DFL_FME_BASE + 0)
 
+/**
+ * DFL_FPGA_FME_PORT_RELEASE - _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 1,
+ *						int port_id)
+ *
+ * Driver releases the port per Port ID provided by caller.
+ * Return: 0 on success, -errno on failure.
+ */
+#define DFL_FPGA_FME_PORT_RELEASE   _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 1, int)
+
+/**
+ * DFL_FPGA_FME_PORT_ASSIGN - _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 2,
+ *						int port_id)
+ *
+ * Driver assigns the port back per Port ID provided by caller.
+ * Return: 0 on success, -errno on failure.
+ */
+#define DFL_FPGA_FME_PORT_ASSIGN     _IOW(DFL_FPGA_MAGIC, DFL_FME_BASE + 2, int)
+
 #endif /* _UAPI_LINUX_FPGA_DFL_H */
-- 
cgit v1.2.3


From 05bb411ada9508b48044ef5d84dd8bc46cece607 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Thu, 1 Aug 2019 20:14:46 +0300
Subject: RDMA/core: Introduce ratelimited ibdev printk functions

Add ratelimited helpers to the ibdev_* printk functions.
Implementation inspired by counterpart dev_*_ratelimited functions.

Signed-off-by: Gal Pressman <galpress@amazon.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190801171447.54440-2-galpress@amazon.com
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 include/rdma/ib_verbs.h | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 2a1523ccd7ab..0ecda7d15df2 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -107,6 +107,48 @@ static inline
 void ibdev_dbg(const struct ib_device *ibdev, const char *format, ...) {}
 #endif
 
+#define ibdev_level_ratelimited(ibdev_level, ibdev, fmt, ...)           \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	if (__ratelimit(&_rs))                                          \
+		ibdev_level(ibdev, fmt, ##__VA_ARGS__);                 \
+} while (0)
+
+#define ibdev_emerg_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_emerg, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_alert_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_alert, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_crit_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_crit, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_err_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_err, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_warn_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_warn, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_notice_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_notice, ibdev, fmt, ##__VA_ARGS__)
+#define ibdev_info_ratelimited(ibdev, fmt, ...) \
+	ibdev_level_ratelimited(ibdev_info, ibdev, fmt, ##__VA_ARGS__)
+
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/* descriptor check is first to prevent flooding with "callbacks suppressed" */
+#define ibdev_dbg_ratelimited(ibdev, fmt, ...)                          \
+do {                                                                    \
+	static DEFINE_RATELIMIT_STATE(_rs,                              \
+				      DEFAULT_RATELIMIT_INTERVAL,       \
+				      DEFAULT_RATELIMIT_BURST);         \
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);                 \
+	if (DYNAMIC_DEBUG_BRANCH(descriptor) && __ratelimit(&_rs))      \
+		__dynamic_ibdev_dbg(&descriptor, ibdev, fmt,            \
+				    ##__VA_ARGS__);                     \
+} while (0)
+#else
+__printf(2, 3) __cold
+static inline
+void ibdev_dbg_ratelimited(const struct ib_device *ibdev, const char *format, ...) {}
+#endif
+
 union ib_gid {
 	u8	raw[16];
 	struct {
-- 
cgit v1.2.3


From 5d92e631b8be8965a90c144320f06e096081a551 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Thu, 1 Aug 2019 14:36:01 -0700
Subject: net/tls: partially revert fix transition through disconnect with
 close

Looks like we were slightly overzealous with the shutdown()
cleanup. Even though the sock->sk_state can reach CLOSED again,
socket->state will not got back to SS_UNCONNECTED once
connections is ESTABLISHED. Meaning we will see EISCONN if
we try to reconnect, and EINVAL if we try to listen.

Only listen sockets can be shutdown() and reused, but since
ESTABLISHED sockets can never be re-connected() or used for
listen() we don't need to try to clean up the ULP state early.

Fixes: 32857cf57f92 ("net/tls: fix transition through disconnect with close")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls-offload.rst |  6 ----
 include/net/tls.h                        |  2 --
 net/tls/tls_main.c                       | 55 --------------------------------
 3 files changed, 63 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 2d9f9ebf4117..b70b70dc4524 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -524,9 +524,3 @@ Redirects leak clear text
 
 In the RX direction, if segment has already been decrypted by the device
 and it gets redirected or mirrored - clear text will be transmitted out.
-
-shutdown() doesn't clear TLS state
-----------------------------------
-
-shutdown() system call allows for a TLS socket to be reused as a different
-connection. Offload doesn't currently handle that.
diff --git a/include/net/tls.h b/include/net/tls.h
index 9e425ac2de45..41b2d41bb1b8 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -290,8 +290,6 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
-
-	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f208f8455ef2..9cbbae606ced 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,33 +261,6 @@ void tls_ctx_free(struct tls_context *ctx)
 	kfree(ctx);
 }
 
-static void tls_ctx_free_deferred(struct work_struct *gc)
-{
-	struct tls_context *ctx = container_of(gc, struct tls_context, gc);
-
-	/* Ensure any remaining work items are completed. The sk will
-	 * already have lost its tls_ctx reference by the time we get
-	 * here so no xmit operation will actually be performed.
-	 */
-	if (ctx->tx_conf == TLS_SW) {
-		tls_sw_cancel_work_tx(ctx);
-		tls_sw_free_ctx_tx(ctx);
-	}
-
-	if (ctx->rx_conf == TLS_SW) {
-		tls_sw_strparser_done(ctx);
-		tls_sw_free_ctx_rx(ctx);
-	}
-
-	tls_ctx_free(ctx);
-}
-
-static void tls_ctx_free_wq(struct tls_context *ctx)
-{
-	INIT_WORK(&ctx->gc, tls_ctx_free_deferred);
-	schedule_work(&ctx->gc);
-}
-
 static void tls_sk_proto_cleanup(struct sock *sk,
 				 struct tls_context *ctx, long timeo)
 {
@@ -315,29 +288,6 @@ static void tls_sk_proto_cleanup(struct sock *sk,
 #endif
 }
 
-static void tls_sk_proto_unhash(struct sock *sk)
-{
-	struct inet_connection_sock *icsk = inet_csk(sk);
-	long timeo = sock_sndtimeo(sk, 0);
-	struct tls_context *ctx;
-
-	if (unlikely(!icsk->icsk_ulp_data)) {
-		if (sk->sk_prot->unhash)
-			sk->sk_prot->unhash(sk);
-	}
-
-	ctx = tls_get_ctx(sk);
-	tls_sk_proto_cleanup(sk, ctx, timeo);
-	write_lock_bh(&sk->sk_callback_lock);
-	icsk->icsk_ulp_data = NULL;
-	sk->sk_prot = ctx->sk_proto;
-	write_unlock_bh(&sk->sk_callback_lock);
-
-	if (ctx->sk_proto->unhash)
-		ctx->sk_proto->unhash(sk);
-	tls_ctx_free_wq(ctx);
-}
-
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -786,7 +736,6 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_BASE][TLS_BASE].setsockopt	= tls_setsockopt;
 	prot[TLS_BASE][TLS_BASE].getsockopt	= tls_getsockopt;
 	prot[TLS_BASE][TLS_BASE].close		= tls_sk_proto_close;
-	prot[TLS_BASE][TLS_BASE].unhash		= tls_sk_proto_unhash;
 
 	prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_SW][TLS_BASE].sendmsg		= tls_sw_sendmsg;
@@ -804,20 +753,16 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
-	prot[TLS_HW][TLS_BASE].unhash		= base->unhash;
 	prot[TLS_HW][TLS_BASE].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_BASE].sendpage		= tls_device_sendpage;
 
 	prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
-	prot[TLS_HW][TLS_SW].unhash		= base->unhash;
 	prot[TLS_HW][TLS_SW].sendmsg		= tls_device_sendmsg;
 	prot[TLS_HW][TLS_SW].sendpage		= tls_device_sendpage;
 
 	prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
-	prot[TLS_BASE][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_SW][TLS_HW] = prot[TLS_SW][TLS_SW];
-	prot[TLS_SW][TLS_HW].unhash		= base->unhash;
 
 	prot[TLS_HW][TLS_HW] = prot[TLS_HW][TLS_SW];
 #endif
-- 
cgit v1.2.3


From 9aebf4de220344e2f03ae6386272bf98f80fd295 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 24 Jul 2019 04:05:11 +0530
Subject: base: soc: Add serial_number attribute to soc

Add new attribute named "serial_number" as a standard interface for
user space to acquire the serial number of the device.

For ST-Ericsson SoCs this is exposed by the cryptically named "soc_id"
attribute, but this provides a human readable standardized name for this
property.

Tested-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Vaishali Thakkar <vaishali.thakkar@linaro.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 Documentation/ABI/testing/sysfs-devices-soc | 7 +++++++
 drivers/base/soc.c                          | 7 +++++++
 include/linux/sys_soc.h                     | 1 +
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-devices-soc b/Documentation/ABI/testing/sysfs-devices-soc
index 6d9cc253f2b2..ba3a3fac0ee1 100644
--- a/Documentation/ABI/testing/sysfs-devices-soc
+++ b/Documentation/ABI/testing/sysfs-devices-soc
@@ -26,6 +26,13 @@ Description:
 		Read-only attribute common to all SoCs. Contains SoC family name
 		(e.g. DB8500).
 
+What:		/sys/devices/socX/serial_number
+Date:		January 2019
+contact:	Bjorn Andersson <bjorn.andersson@linaro.org>
+Description:
+		Read-only attribute supported by most SoCs. Contains the SoC's
+		serial number, if available.
+
 What:		/sys/devices/socX/soc_id
 Date:		January 2012
 contact:	Lee Jones <lee.jones@linaro.org>
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 10b280f30217..b0933b9fe67f 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -33,6 +33,7 @@ static struct bus_type soc_bus_type = {
 
 static DEVICE_ATTR(machine,  S_IRUGO, soc_info_get,  NULL);
 static DEVICE_ATTR(family,   S_IRUGO, soc_info_get,  NULL);
+static DEVICE_ATTR(serial_number, S_IRUGO, soc_info_get,  NULL);
 static DEVICE_ATTR(soc_id,   S_IRUGO, soc_info_get,  NULL);
 static DEVICE_ATTR(revision, S_IRUGO, soc_info_get,  NULL);
 
@@ -57,6 +58,9 @@ static umode_t soc_attribute_mode(struct kobject *kobj,
 	if ((attr == &dev_attr_revision.attr)
 	    && (soc_dev->attr->revision != NULL))
 		return attr->mode;
+	if ((attr == &dev_attr_serial_number.attr)
+	    && (soc_dev->attr->serial_number != NULL))
+		return attr->mode;
 	if ((attr == &dev_attr_soc_id.attr)
 	    && (soc_dev->attr->soc_id != NULL))
 		return attr->mode;
@@ -77,6 +81,8 @@ static ssize_t soc_info_get(struct device *dev,
 		return sprintf(buf, "%s\n", soc_dev->attr->family);
 	if (attr == &dev_attr_revision)
 		return sprintf(buf, "%s\n", soc_dev->attr->revision);
+	if (attr == &dev_attr_serial_number)
+		return sprintf(buf, "%s\n", soc_dev->attr->serial_number);
 	if (attr == &dev_attr_soc_id)
 		return sprintf(buf, "%s\n", soc_dev->attr->soc_id);
 
@@ -87,6 +93,7 @@ static ssize_t soc_info_get(struct device *dev,
 static struct attribute *soc_attr[] = {
 	&dev_attr_machine.attr,
 	&dev_attr_family.attr,
+	&dev_attr_serial_number.attr,
 	&dev_attr_soc_id.attr,
 	&dev_attr_revision.attr,
 	NULL,
diff --git a/include/linux/sys_soc.h b/include/linux/sys_soc.h
index b7c70c3e953f..48ceea867dd6 100644
--- a/include/linux/sys_soc.h
+++ b/include/linux/sys_soc.h
@@ -12,6 +12,7 @@ struct soc_device_attribute {
 	const char *machine;
 	const char *family;
 	const char *revision;
+	const char *serial_number;
 	const char *soc_id;
 	const void *data;
 };
-- 
cgit v1.2.3


From c8424e776b093280d3fdd104d850706b3b229ac8 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 4 Jul 2019 15:57:34 -0300
Subject: MODSIGN: Export module signature definitions

IMA will use the module_signature format for append signatures, so export
the relevant definitions and factor out the code which verifies that the
appended signature trailer is valid.

Also, create a CONFIG_MODULE_SIG_FORMAT option so that IMA can select it
and be able to use mod_check_sig() without having to depend on either
CONFIG_MODULE_SIG or CONFIG_MODULES.

s390 duplicated the definition of struct module_signature so now they can
use the new <linux/module_signature.h> header instead.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Acked-by: Jessica Yu <jeyu@kernel.org>
Reviewed-by: Philipp Rudo <prudo@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 arch/s390/Kconfig                     |  2 +-
 arch/s390/kernel/machine_kexec_file.c | 24 +--------------
 include/linux/module.h                |  3 --
 include/linux/module_signature.h      | 44 +++++++++++++++++++++++++++
 init/Kconfig                          |  6 +++-
 kernel/Makefile                       |  1 +
 kernel/module.c                       |  1 +
 kernel/module_signature.c             | 46 ++++++++++++++++++++++++++++
 kernel/module_signing.c               | 56 +++++------------------------------
 scripts/Makefile                      |  2 +-
 10 files changed, 108 insertions(+), 77 deletions(-)
 create mode 100644 include/linux/module_signature.h
 create mode 100644 kernel/module_signature.c

(limited to 'include')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a4ad2733eedf..e0ae0d51f985 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -538,7 +538,7 @@ config ARCH_HAS_KEXEC_PURGATORY
 
 config KEXEC_VERIFY_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
-	depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+	depends on KEXEC_FILE && MODULE_SIG_FORMAT
 	help
 	  This option makes kernel signature verification mandatory for
 	  the kexec_file_load() syscall.
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fbdd3ea73667..1ac9fbc6e01e 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -10,7 +10,7 @@
 #include <linux/elf.h>
 #include <linux/errno.h>
 #include <linux/kexec.h>
-#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/verification.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -23,28 +23,6 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 };
 
 #ifdef CONFIG_KEXEC_VERIFY_SIG
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *	- Signer's name
- *	- Key identifier
- *	- Signature data
- *	- Information block
- */
-struct module_signature {
-	u8	algo;		/* Public-key crypto algorithm [0] */
-	u8	hash;		/* Digest algorithm [0] */
-	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
-	u8	signer_len;	/* Length of signer's name [0] */
-	u8	key_id_len;	/* Length of key identifier [0] */
-	u8	__pad[3];
-	__be32	sig_len;	/* Length of signature data */
-};
-
-#define PKEY_ID_PKCS7 2
-
 int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 {
 	const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
diff --git a/include/linux/module.h b/include/linux/module.h
index 1455812dd325..f6fc1dae74f4 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -26,9 +26,6 @@
 #include <linux/percpu.h>
 #include <asm/module.h>
 
-/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
-#define MODULE_SIG_STRING "~Module signature appended~\n"
-
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
new file mode 100644
index 000000000000..523617fc5b6a
--- /dev/null
+++ b/include/linux/module_signature.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Module signature handling.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#ifndef _LINUX_MODULE_SIGNATURE_H
+#define _LINUX_MODULE_SIGNATURE_H
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+enum pkey_id_type {
+	PKEY_ID_PGP,		/* OpenPGP generated key ID */
+	PKEY_ID_X509,		/* X.509 arbitrary subjectKeyIdentifier */
+	PKEY_ID_PKCS7,		/* Signature in PKCS#7 message */
+};
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *	- Signer's name
+ *	- Key identifier
+ *	- Signature data
+ *	- Information block
+ */
+struct module_signature {
+	u8	algo;		/* Public-key crypto algorithm [0] */
+	u8	hash;		/* Digest algorithm [0] */
+	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	u8	signer_len;	/* Length of signer's name [0] */
+	u8	key_id_len;	/* Length of key identifier [0] */
+	u8	__pad[3];
+	__be32	sig_len;	/* Length of signature data */
+};
+
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+		  const char *name);
+
+#endif /* _LINUX_MODULE_SIGNATURE_H */
diff --git a/init/Kconfig b/init/Kconfig
index bd7d650d4a99..2dca877c9ed7 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1930,6 +1930,10 @@ config BASE_SMALL
 	default 0 if BASE_FULL
 	default 1 if !BASE_FULL
 
+config MODULE_SIG_FORMAT
+	def_bool n
+	select SYSTEM_DATA_VERIFICATION
+
 menuconfig MODULES
 	bool "Enable loadable module support"
 	option modules
@@ -2007,7 +2011,7 @@ config MODULE_SRCVERSION_ALL
 config MODULE_SIG
 	bool "Module signature verification"
 	depends on MODULES
-	select SYSTEM_DATA_VERIFICATION
+	select MODULE_SIG_FORMAT
 	help
 	  Check modules for valid signatures upon load: the signature
 	  is simply appended to the module. For more information see
diff --git a/kernel/Makefile b/kernel/Makefile
index a8d923b5481b..179b44ab8b46 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -58,6 +58,7 @@ endif
 obj-$(CONFIG_UID16) += uid16.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_MODULE_SIG) += module_signing.o
+obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o
 obj-$(CONFIG_KALLSYMS) += kallsyms.o
 obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
 obj-$(CONFIG_CRASH_CORE) += crash_core.o
diff --git a/kernel/module.c b/kernel/module.c
index 5933395af9a0..5ac22efc3685 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/extable.h>
 #include <linux/moduleloader.h>
+#include <linux/module_signature.h>
 #include <linux/trace_events.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
diff --git a/kernel/module_signature.c b/kernel/module_signature.c
new file mode 100644
index 000000000000..4224a1086b7d
--- /dev/null
+++ b/kernel/module_signature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Module signature checker
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/module_signature.h>
+#include <asm/byteorder.h>
+
+/**
+ * mod_check_sig - check that the given signature is sane
+ *
+ * @ms:		Signature to check.
+ * @file_len:	Size of the file to which @ms is appended.
+ * @name:	What is being checked. Used for error messages.
+ */
+int mod_check_sig(const struct module_signature *ms, size_t file_len,
+		  const char *name)
+{
+	if (be32_to_cpu(ms->sig_len) >= file_len - sizeof(*ms))
+		return -EBADMSG;
+
+	if (ms->id_type != PKEY_ID_PKCS7) {
+		pr_err("%s: Module is not signed with expected PKCS#7 message\n",
+		       name);
+		return -ENOPKG;
+	}
+
+	if (ms->algo != 0 ||
+	    ms->hash != 0 ||
+	    ms->signer_len != 0 ||
+	    ms->key_id_len != 0 ||
+	    ms->__pad[0] != 0 ||
+	    ms->__pad[1] != 0 ||
+	    ms->__pad[2] != 0) {
+		pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
+		       name);
+		return -EBADMSG;
+	}
+
+	return 0;
+}
diff --git a/kernel/module_signing.c b/kernel/module_signing.c
index b10fb1986ca9..9d9fc678c91d 100644
--- a/kernel/module_signing.c
+++ b/kernel/module_signing.c
@@ -7,37 +7,13 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/module_signature.h>
 #include <linux/string.h>
 #include <linux/verification.h>
 #include <crypto/public_key.h>
 #include "module-internal.h"
 
-enum pkey_id_type {
-	PKEY_ID_PGP,		/* OpenPGP generated key ID */
-	PKEY_ID_X509,		/* X.509 arbitrary subjectKeyIdentifier */
-	PKEY_ID_PKCS7,		/* Signature in PKCS#7 message */
-};
-
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- *	- Signer's name
- *	- Key identifier
- *	- Signature data
- *	- Information block
- */
-struct module_signature {
-	u8	algo;		/* Public-key crypto algorithm [0] */
-	u8	hash;		/* Digest algorithm [0] */
-	u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
-	u8	signer_len;	/* Length of signer's name [0] */
-	u8	key_id_len;	/* Length of key identifier [0] */
-	u8	__pad[3];
-	__be32	sig_len;	/* Length of signature data */
-};
-
 /*
  * Verify the signature on a module.
  */
@@ -45,6 +21,7 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 {
 	struct module_signature ms;
 	size_t sig_len, modlen = info->len;
+	int ret;
 
 	pr_devel("==>%s(,%zu)\n", __func__, modlen);
 
@@ -52,32 +29,15 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 		return -EBADMSG;
 
 	memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
-	modlen -= sizeof(ms);
+
+	ret = mod_check_sig(&ms, modlen, info->name);
+	if (ret)
+		return ret;
 
 	sig_len = be32_to_cpu(ms.sig_len);
-	if (sig_len >= modlen)
-		return -EBADMSG;
-	modlen -= sig_len;
+	modlen -= sig_len + sizeof(ms);
 	info->len = modlen;
 
-	if (ms.id_type != PKEY_ID_PKCS7) {
-		pr_err("%s: Module is not signed with expected PKCS#7 message\n",
-		       info->name);
-		return -ENOPKG;
-	}
-
-	if (ms.algo != 0 ||
-	    ms.hash != 0 ||
-	    ms.signer_len != 0 ||
-	    ms.key_id_len != 0 ||
-	    ms.__pad[0] != 0 ||
-	    ms.__pad[1] != 0 ||
-	    ms.__pad[2] != 0) {
-		pr_err("%s: PKCS#7 signature info has unexpected non-zero params\n",
-		       info->name);
-		return -EBADMSG;
-	}
-
 	return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len,
 				      VERIFY_USE_SECONDARY_KEYRING,
 				      VERIFYING_MODULE_SIGNATURE,
diff --git a/scripts/Makefile b/scripts/Makefile
index 16bcb8087899..532f7e0915c3 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -17,7 +17,7 @@ hostprogs-$(CONFIG_VT)           += conmakehash
 hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
 hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
 hostprogs-$(CONFIG_ASN1)	 += asn1_compiler
-hostprogs-$(CONFIG_MODULE_SIG)	 += sign-file
+hostprogs-$(CONFIG_MODULE_SIG_FORMAT) += sign-file
 hostprogs-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert
 hostprogs-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert
 
-- 
cgit v1.2.3


From 2a7bf671186eb5d1a47ef192aefbdf788f5b38fe Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 27 Jun 2019 23:19:25 -0300
Subject: PKCS#7: Refactor verify_pkcs7_signature()

IMA will need to verify a PKCS#7 signature which has already been parsed.
For this reason, factor out the code which does that from
verify_pkcs7_signature() into a new function which takes a struct
pkcs7_message instead of a data buffer.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 certs/system_keyring.c       | 61 ++++++++++++++++++++++++++++++++------------
 include/linux/verification.h | 10 ++++++++
 2 files changed, 55 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/certs/system_keyring.c b/certs/system_keyring.c
index 1eba08a1af82..798291177186 100644
--- a/certs/system_keyring.c
+++ b/certs/system_keyring.c
@@ -190,33 +190,27 @@ late_initcall(load_system_certificate_list);
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 /**
- * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * verify_pkcs7_message_sig - Verify a PKCS#7-based signature on system data.
  * @data: The data to be verified (NULL if expecting internal data).
  * @len: Size of @data.
- * @raw_pkcs7: The PKCS#7 message that is the signature.
- * @pkcs7_len: The size of @raw_pkcs7.
+ * @pkcs7: The PKCS#7 message that is the signature.
  * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
  *					(void *)1UL for all trusted keys).
  * @usage: The use to which the key is being put.
  * @view_content: Callback to gain access to content.
  * @ctx: Context for callback.
  */
-int verify_pkcs7_signature(const void *data, size_t len,
-			   const void *raw_pkcs7, size_t pkcs7_len,
-			   struct key *trusted_keys,
-			   enum key_being_used_for usage,
-			   int (*view_content)(void *ctx,
-					       const void *data, size_t len,
-					       size_t asn1hdrlen),
-			   void *ctx)
+int verify_pkcs7_message_sig(const void *data, size_t len,
+			     struct pkcs7_message *pkcs7,
+			     struct key *trusted_keys,
+			     enum key_being_used_for usage,
+			     int (*view_content)(void *ctx,
+						 const void *data, size_t len,
+						 size_t asn1hdrlen),
+			     void *ctx)
 {
-	struct pkcs7_message *pkcs7;
 	int ret;
 
-	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
-	if (IS_ERR(pkcs7))
-		return PTR_ERR(pkcs7);
-
 	/* The data should be detached - so we need to supply it. */
 	if (data && pkcs7_supply_detached_data(pkcs7, data, len) < 0) {
 		pr_err("PKCS#7 signature with non-detached data\n");
@@ -269,6 +263,41 @@ int verify_pkcs7_signature(const void *data, size_t len,
 	}
 
 error:
+	pr_devel("<==%s() = %d\n", __func__, ret);
+	return ret;
+}
+
+/**
+ * verify_pkcs7_signature - Verify a PKCS#7-based signature on system data.
+ * @data: The data to be verified (NULL if expecting internal data).
+ * @len: Size of @data.
+ * @raw_pkcs7: The PKCS#7 message that is the signature.
+ * @pkcs7_len: The size of @raw_pkcs7.
+ * @trusted_keys: Trusted keys to use (NULL for builtin trusted keys only,
+ *					(void *)1UL for all trusted keys).
+ * @usage: The use to which the key is being put.
+ * @view_content: Callback to gain access to content.
+ * @ctx: Context for callback.
+ */
+int verify_pkcs7_signature(const void *data, size_t len,
+			   const void *raw_pkcs7, size_t pkcs7_len,
+			   struct key *trusted_keys,
+			   enum key_being_used_for usage,
+			   int (*view_content)(void *ctx,
+					       const void *data, size_t len,
+					       size_t asn1hdrlen),
+			   void *ctx)
+{
+	struct pkcs7_message *pkcs7;
+	int ret;
+
+	pkcs7 = pkcs7_parse_message(raw_pkcs7, pkcs7_len);
+	if (IS_ERR(pkcs7))
+		return PTR_ERR(pkcs7);
+
+	ret = verify_pkcs7_message_sig(data, len, pkcs7, trusted_keys, usage,
+				       view_content, ctx);
+
 	pkcs7_free_message(pkcs7);
 	pr_devel("<==%s() = %d\n", __func__, ret);
 	return ret;
diff --git a/include/linux/verification.h b/include/linux/verification.h
index 32d990d163c4..911ab7c2b1ab 100644
--- a/include/linux/verification.h
+++ b/include/linux/verification.h
@@ -32,6 +32,7 @@ extern const char *const key_being_used_for[NR__KEY_BEING_USED_FOR];
 #ifdef CONFIG_SYSTEM_DATA_VERIFICATION
 
 struct key;
+struct pkcs7_message;
 
 extern int verify_pkcs7_signature(const void *data, size_t len,
 				  const void *raw_pkcs7, size_t pkcs7_len,
@@ -41,6 +42,15 @@ extern int verify_pkcs7_signature(const void *data, size_t len,
 						      const void *data, size_t len,
 						      size_t asn1hdrlen),
 				  void *ctx);
+extern int verify_pkcs7_message_sig(const void *data, size_t len,
+				    struct pkcs7_message *pkcs7,
+				    struct key *trusted_keys,
+				    enum key_being_used_for usage,
+				    int (*view_content)(void *ctx,
+							const void *data,
+							size_t len,
+							size_t asn1hdrlen),
+				    void *ctx);
 
 #ifdef CONFIG_SIGNED_PE_FILE_VERIFICATION
 extern int verify_pefile_signature(const void *pebuf, unsigned pelen,
-- 
cgit v1.2.3


From e201af16d1ec76ccd19b90484d767984ff451f18 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Thu, 27 Jun 2019 23:19:26 -0300
Subject: PKCS#7: Introduce pkcs7_get_digest()

IMA will need to access the digest of the PKCS7 message (as calculated by
the kernel) before the signature is verified, so introduce
pkcs7_get_digest() for that purpose.

Also, modify pkcs7_digest() to detect when the digest was already
calculated so that it doesn't have to do redundant work. Verifying that
sinfo->sig->digest isn't NULL is sufficient because both places which
allocate sinfo->sig (pkcs7_parse_message() and pkcs7_note_signed_info())
use kzalloc() so sig->digest is always initialized to zero.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 crypto/asymmetric_keys/pkcs7_verify.c | 33 +++++++++++++++++++++++++++++++++
 include/crypto/pkcs7.h                |  4 ++++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/crypto/asymmetric_keys/pkcs7_verify.c b/crypto/asymmetric_keys/pkcs7_verify.c
index 11bee67fa9cc..ce49820caa97 100644
--- a/crypto/asymmetric_keys/pkcs7_verify.c
+++ b/crypto/asymmetric_keys/pkcs7_verify.c
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/asn1.h>
 #include <crypto/hash.h>
+#include <crypto/hash_info.h>
 #include <crypto/public_key.h>
 #include "pkcs7_parser.h"
 
@@ -29,6 +30,10 @@ static int pkcs7_digest(struct pkcs7_message *pkcs7,
 
 	kenter(",%u,%s", sinfo->index, sinfo->sig->hash_algo);
 
+	/* The digest was calculated already. */
+	if (sig->digest)
+		return 0;
+
 	if (!sinfo->sig->hash_algo)
 		return -ENOPKG;
 
@@ -117,6 +122,34 @@ error_no_desc:
 	return ret;
 }
 
+int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf, u32 *len,
+		     enum hash_algo *hash_algo)
+{
+	struct pkcs7_signed_info *sinfo = pkcs7->signed_infos;
+	int i, ret;
+
+	/*
+	 * This function doesn't support messages with more than one signature.
+	 */
+	if (sinfo == NULL || sinfo->next != NULL)
+		return -EBADMSG;
+
+	ret = pkcs7_digest(pkcs7, sinfo);
+	if (ret)
+		return ret;
+
+	*buf = sinfo->sig->digest;
+	*len = sinfo->sig->digest_size;
+
+	for (i = 0; i < HASH_ALGO__LAST; i++)
+		if (!strcmp(hash_algo_name[i], sinfo->sig->hash_algo)) {
+			*hash_algo = i;
+			break;
+		}
+
+	return 0;
+}
+
 /*
  * Find the key (X.509 certificate) to use to verify a PKCS#7 message.  PKCS#7
  * uses the issuer's name and the issuing certificate serial number for
diff --git a/include/crypto/pkcs7.h b/include/crypto/pkcs7.h
index 96071bee03ac..38ec7f5f9041 100644
--- a/include/crypto/pkcs7.h
+++ b/include/crypto/pkcs7.h
@@ -9,6 +9,7 @@
 #define _CRYPTO_PKCS7_H
 
 #include <linux/verification.h>
+#include <linux/hash_info.h>
 #include <crypto/public_key.h>
 
 struct key;
@@ -40,4 +41,7 @@ extern int pkcs7_verify(struct pkcs7_message *pkcs7,
 extern int pkcs7_supply_detached_data(struct pkcs7_message *pkcs7,
 				      const void *data, size_t datalen);
 
+extern int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf,
+			    u32 *len, enum hash_algo *hash_algo);
+
 #endif /* _CRYPTO_PKCS7_H */
-- 
cgit v1.2.3


From fce45cd41101f1a9620267146b21f09b3454d8db Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Sun, 28 Jul 2019 21:47:35 -0700
Subject: locking/rwsem: Check for operations on an uninitialized rwsem

Currently rwsems is the only locking primitive that lacks this
debug feature. Add it under CONFIG_DEBUG_RWSEMS and do the magic
checking in the locking fastpath (trylock) operation such that
we cover all cases. The unlocking part is pretty straightforward.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Waiman Long <longman@redhat.com>
Cc: mingo@kernel.org
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: https://lkml.kernel.org/r/20190729044735.9632-1-dave@stgolabs.net
---
 include/linux/rwsem.h  | 10 ++++++++++
 kernel/locking/rwsem.c | 22 ++++++++++++++++++----
 2 files changed, 28 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 9d9c663987d8..00d6054687dd 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -45,6 +45,9 @@ struct rw_semaphore {
 #endif
 	raw_spinlock_t wait_lock;
 	struct list_head wait_list;
+#ifdef CONFIG_DEBUG_RWSEMS
+	void *magic;
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -73,6 +76,12 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
+#ifdef CONFIG_DEBUG_RWSEMS
+# define __DEBUG_RWSEM_INITIALIZER(lockname) , .magic = &lockname
+#else
+# define __DEBUG_RWSEM_INITIALIZER(lockname)
+#endif
+
 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
 #define __RWSEM_OPT_INIT(lockname) , .osq = OSQ_LOCK_UNLOCKED
 #else
@@ -85,6 +94,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 	  .wait_list = LIST_HEAD_INIT((name).wait_list),	\
 	  .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock)	\
 	  __RWSEM_OPT_INIT(name)				\
+	  __DEBUG_RWSEM_INITIALIZER(name)			\
 	  __RWSEM_DEP_MAP_INIT(name) }
 
 #define DECLARE_RWSEM(name) \
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 354238a08b7a..eef04551eae7 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -105,8 +105,9 @@
 #ifdef CONFIG_DEBUG_RWSEMS
 # define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\
 	if (!debug_locks_silent &&				\
-	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
+	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
 		#c, atomic_long_read(&(sem)->count),		\
+		(unsigned long) sem->magic,			\
 		atomic_long_read(&(sem)->owner), (long)current,	\
 		list_empty(&(sem)->wait_list) ? "" : "not "))	\
 			debug_locks_off();			\
@@ -329,6 +330,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	 */
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
 	lockdep_init_map(&sem->dep_map, name, key, 0);
+#endif
+#ifdef CONFIG_DEBUG_RWSEMS
+	sem->magic = sem;
 #endif
 	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
 	raw_spin_lock_init(&sem->wait_lock);
@@ -1358,11 +1362,14 @@ static inline int __down_read_killable(struct rw_semaphore *sem)
 
 static inline int __down_read_trylock(struct rw_semaphore *sem)
 {
+	long tmp;
+
+	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
+
 	/*
 	 * Optimize for the case when the rwsem is not locked at all.
 	 */
-	long tmp = RWSEM_UNLOCKED_VALUE;
-
+	tmp = RWSEM_UNLOCKED_VALUE;
 	do {
 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
 					tmp + RWSEM_READER_BIAS)) {
@@ -1403,8 +1410,11 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
 
 static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
-	long tmp = RWSEM_UNLOCKED_VALUE;
+	long tmp;
 
+	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
+
+	tmp  = RWSEM_UNLOCKED_VALUE;
 	if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
 					    RWSEM_WRITER_LOCKED)) {
 		rwsem_set_owner(sem);
@@ -1420,7 +1430,9 @@ inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
+	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
+
 	rwsem_clear_reader_owned(sem);
 	tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
 	DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
@@ -1438,12 +1450,14 @@ static inline void __up_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
+	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
 	/*
 	 * sem->owner may differ from current if the ownership is transferred
 	 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
 	 */
 	DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
 			    !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
+
 	rwsem_clear_owner(sem);
 	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
 	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
-- 
cgit v1.2.3


From 5f35d5a66b3ec62cb5ec4ec2ad9aebe2ac325673 Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <mojha@codeaurora.org>
Date: Wed, 31 Jul 2019 20:35:03 +0530
Subject: locking/mutex: Make __mutex_owner static to mutex.c

__mutex_owner() should only be used by the mutex api's.
So, to put this restiction let's move the __mutex_owner()
function definition from linux/mutex.h to mutex.c file.

There exist functions that uses __mutex_owner() like
mutex_is_locked() and mutex_trylock_recursive(), So
to keep legacy thing intact move them as well and
export them.

Move mutex_waiter structure also to keep it private to the
file.

Signed-off-by: Mukesh Ojha <mojha@codeaurora.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: mingo@redhat.com
Cc: will@kernel.org
Link: https://lkml.kernel.org/r/1564585504-3543-1-git-send-email-mojha@codeaurora.org
---
 include/linux/mutex.h  | 38 +++-----------------------------------
 kernel/locking/mutex.c | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/locking/mutex.h |  2 ++
 3 files changed, 44 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index dcd03fee6e01..eb8c62aba263 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -65,29 +65,6 @@ struct mutex {
 #endif
 };
 
-/*
- * Internal helper function; C doesn't allow us to hide it :/
- *
- * DO NOT USE (outside of mutex code).
- */
-static inline struct task_struct *__mutex_owner(struct mutex *lock)
-{
-	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
-}
-
-/*
- * This is the control structure for tasks blocked on mutex,
- * which resides on the blocked task's kernel stack:
- */
-struct mutex_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-	struct ww_acquire_ctx	*ww_ctx;
-#ifdef CONFIG_DEBUG_MUTEXES
-	void			*magic;
-#endif
-};
-
 #ifdef CONFIG_DEBUG_MUTEXES
 
 #define __DEBUG_MUTEX_INITIALIZER(lockname)				\
@@ -144,10 +121,7 @@ extern void __mutex_init(struct mutex *lock, const char *name,
  *
  * Returns true if the mutex is locked, false if unlocked.
  */
-static inline bool mutex_is_locked(struct mutex *lock)
-{
-	return __mutex_owner(lock) != NULL;
-}
+extern bool mutex_is_locked(struct mutex *lock);
 
 /*
  * See kernel/locking/mutex.c for detailed documentation of these APIs.
@@ -220,13 +194,7 @@ enum mutex_trylock_recursive_enum {
  *  - MUTEX_TRYLOCK_SUCCESS   - lock acquired,
  *  - MUTEX_TRYLOCK_RECURSIVE - we already owned the lock.
  */
-static inline /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
-mutex_trylock_recursive(struct mutex *lock)
-{
-	if (unlikely(__mutex_owner(lock) == current))
-		return MUTEX_TRYLOCK_RECURSIVE;
-
-	return mutex_trylock(lock);
-}
+extern /* __deprecated */ __must_check enum mutex_trylock_recursive_enum
+mutex_trylock_recursive(struct mutex *lock);
 
 #endif /* __LINUX_MUTEX_H */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 5e069734363c..ac4929f1e085 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -36,6 +36,19 @@
 # include "mutex.h"
 #endif
 
+/*
+ * This is the control structure for tasks blocked on mutex,
+ * which resides on the blocked task's kernel stack:
+ */
+struct mutex_waiter {
+	struct list_head	list;
+	struct task_struct	*task;
+	struct ww_acquire_ctx	*ww_ctx;
+#ifdef CONFIG_DEBUG_MUTEXES
+	void			*magic;
+#endif
+};
+
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
@@ -65,11 +78,37 @@ EXPORT_SYMBOL(__mutex_init);
 
 #define MUTEX_FLAGS		0x07
 
+/*
+ * Internal helper function; C doesn't allow us to hide it :/
+ *
+ * DO NOT USE (outside of mutex code).
+ */
+static inline struct task_struct *__mutex_owner(struct mutex *lock)
+{
+	return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
+}
+
 static inline struct task_struct *__owner_task(unsigned long owner)
 {
 	return (struct task_struct *)(owner & ~MUTEX_FLAGS);
 }
 
+bool mutex_is_locked(struct mutex *lock)
+{
+	return __mutex_owner(lock) != NULL;
+}
+EXPORT_SYMBOL(mutex_is_locked);
+
+__must_check enum mutex_trylock_recursive_enum
+mutex_trylock_recursive(struct mutex *lock)
+{
+	if (unlikely(__mutex_owner(lock) == current))
+		return MUTEX_TRYLOCK_RECURSIVE;
+
+	return mutex_trylock(lock);
+}
+EXPORT_SYMBOL(mutex_trylock_recursive);
+
 static inline unsigned long __owner_flags(unsigned long owner)
 {
 	return owner & MUTEX_FLAGS;
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 1c2287d3fa71..7cde5c6d414e 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -19,6 +19,8 @@
 #define debug_mutex_unlock(lock)			do { } while (0)
 #define debug_mutex_init(lock, name, key)		do { } while (0)
 
+struct mutex_waiter;
+
 static inline void
 debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 {
-- 
cgit v1.2.3


From 98d87f70f4ab84b9e50e16b7848937ae07518cd4 Mon Sep 17 00:00:00 2001
From: Hans Holmberg <hans@owltronix.com>
Date: Wed, 31 Jul 2019 11:41:33 +0200
Subject: lightnvm: remove nvm_submit_io_sync_fn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the redundant sync handling interface and wait for a completion in
the lightnvm core instead.

Reviewed-by: Javier González <javier@javigon.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Hans Holmberg <hans@owltronix.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c      | 35 +++++++++++++++++++++++++++++------
 drivers/nvme/host/lightnvm.c | 29 -----------------------------
 include/linux/lightnvm.h     |  2 --
 3 files changed, 29 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index a600934fdd9c..01d098fb96ac 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -752,12 +752,36 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_submit_io);
 
+static void nvm_sync_end_io(struct nvm_rq *rqd)
+{
+	struct completion *waiting = rqd->private;
+
+	complete(waiting);
+}
+
+static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	int ret = 0;
+
+	rqd->end_io = nvm_sync_end_io;
+	rqd->private = &wait;
+
+	ret = dev->ops->submit_io(dev, rqd);
+	if (ret)
+		return ret;
+
+	wait_for_completion_io(&wait);
+
+	return 0;
+}
+
 int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
 	int ret;
 
-	if (!dev->ops->submit_io_sync)
+	if (!dev->ops->submit_io)
 		return -ENODEV;
 
 	nvm_rq_tgt_to_dev(tgt_dev, rqd);
@@ -765,9 +789,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	rqd->dev = tgt_dev;
 	rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
 
-	/* In case of error, fail with right address format */
-	ret = dev->ops->submit_io_sync(dev, rqd);
-	nvm_rq_dev_to_tgt(tgt_dev, rqd);
+	ret = nvm_submit_io_wait(dev, rqd);
 
 	return ret;
 }
@@ -788,12 +810,13 @@ EXPORT_SYMBOL(nvm_end_io);
 
 static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
 {
-	if (!dev->ops->submit_io_sync)
+	if (!dev->ops->submit_io)
 		return -ENODEV;
 
+	rqd->dev = NULL;
 	rqd->flags = nvm_set_flags(&dev->geo, rqd);
 
-	return dev->ops->submit_io_sync(dev, rqd);
+	return nvm_submit_io_wait(dev, rqd);
 }
 
 static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ba009d4c9dfa..d6f121452d5d 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -690,34 +690,6 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return 0;
 }
 
-static int nvme_nvm_submit_io_sync(struct nvm_dev *dev, struct nvm_rq *rqd)
-{
-	struct request_queue *q = dev->q;
-	struct request *rq;
-	struct nvme_nvm_command cmd;
-	int ret = 0;
-
-	memset(&cmd, 0, sizeof(struct nvme_nvm_command));
-
-	rq = nvme_nvm_alloc_request(q, rqd, &cmd);
-	if (IS_ERR(rq))
-		return PTR_ERR(rq);
-
-	/* I/Os can fail and the error is signaled through rqd. Callers must
-	 * handle the error accordingly.
-	 */
-	blk_execute_rq(q, NULL, rq, 0);
-	if (nvme_req(rq)->flags & NVME_REQ_CANCELLED)
-		ret = -EINTR;
-
-	rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64);
-	rqd->error = nvme_req(rq)->status;
-
-	blk_mq_free_request(rq);
-
-	return ret;
-}
-
 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
 					int size)
 {
@@ -754,7 +726,6 @@ static struct nvm_dev_ops nvme_nvm_dev_ops = {
 	.get_chk_meta		= nvme_nvm_get_chk_meta,
 
 	.submit_io		= nvme_nvm_submit_io,
-	.submit_io_sync		= nvme_nvm_submit_io_sync,
 
 	.create_dma_pool	= nvme_nvm_create_dma_pool,
 	.destroy_dma_pool	= nvme_nvm_destroy_dma_pool,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 4d0d5655c7b2..8891647b24b1 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -89,7 +89,6 @@ typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
 typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
 							struct nvm_chk_meta *);
 typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
 typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@ -104,7 +103,6 @@ struct nvm_dev_ops {
 	nvm_get_chk_meta_fn	*get_chk_meta;
 
 	nvm_submit_io_fn	*submit_io;
-	nvm_submit_io_sync_fn	*submit_io_sync;
 
 	nvm_create_dma_pool_fn	*create_dma_pool;
 	nvm_destroy_dma_pool_fn	*destroy_dma_pool;
-- 
cgit v1.2.3


From 48e5da725581c1f7444e45cccbafc33e11430b48 Mon Sep 17 00:00:00 2001
From: Hans Holmberg <hans@owltronix.com>
Date: Wed, 31 Jul 2019 11:41:34 +0200
Subject: lightnvm: move metadata mapping to lower level driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that blk_rq_map_kern can map both kmem and vmem, move internal
metadata mapping down to the lower level driver.

Reviewed-by: Javier González <javier@javigon.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Hans Holmberg <hans@owltronix.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c          |  16 +++---
 drivers/lightnvm/pblk-core.c     | 113 +++++----------------------------------
 drivers/lightnvm/pblk-read.c     |  22 ++------
 drivers/lightnvm/pblk-recovery.c |  39 ++------------
 drivers/lightnvm/pblk-write.c    |  20 ++-----
 drivers/lightnvm/pblk.h          |   8 +--
 drivers/nvme/host/lightnvm.c     |  20 +++++--
 include/linux/lightnvm.h         |   6 +--
 8 files changed, 54 insertions(+), 190 deletions(-)

(limited to 'include')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 01d098fb96ac..3cd03582a2ed 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -731,7 +731,7 @@ static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
 	return flags;
 }
 
-int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
+int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd, void *buf)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
 	int ret;
@@ -745,7 +745,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
 
 	/* In case of error, fail with right address format */
-	ret = dev->ops->submit_io(dev, rqd);
+	ret = dev->ops->submit_io(dev, rqd, buf);
 	if (ret)
 		nvm_rq_dev_to_tgt(tgt_dev, rqd);
 	return ret;
@@ -759,7 +759,8 @@ static void nvm_sync_end_io(struct nvm_rq *rqd)
 	complete(waiting);
 }
 
-static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd)
+static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd,
+			      void *buf)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	int ret = 0;
@@ -767,7 +768,7 @@ static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd)
 	rqd->end_io = nvm_sync_end_io;
 	rqd->private = &wait;
 
-	ret = dev->ops->submit_io(dev, rqd);
+	ret = dev->ops->submit_io(dev, rqd, buf);
 	if (ret)
 		return ret;
 
@@ -776,7 +777,8 @@ static int nvm_submit_io_wait(struct nvm_dev *dev, struct nvm_rq *rqd)
 	return 0;
 }
 
-int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
+int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd,
+		       void *buf)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
 	int ret;
@@ -789,7 +791,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	rqd->dev = tgt_dev;
 	rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
 
-	ret = nvm_submit_io_wait(dev, rqd);
+	ret = nvm_submit_io_wait(dev, rqd, buf);
 
 	return ret;
 }
@@ -816,7 +818,7 @@ static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
 	rqd->dev = NULL;
 	rqd->flags = nvm_set_flags(&dev->geo, rqd);
 
-	return nvm_submit_io_wait(dev, rqd);
+	return nvm_submit_io_wait(dev, rqd, NULL);
 }
 
 static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index f546e6f28b8a..a58d3c84a3f2 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -507,7 +507,7 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
 	pblk->sec_per_write = sec_per_write;
 }
 
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 
@@ -518,7 +518,7 @@ int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
 		return NVM_IO_ERR;
 #endif
 
-	return nvm_submit_io(dev, rqd);
+	return nvm_submit_io(dev, rqd, buf);
 }
 
 void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
@@ -541,7 +541,7 @@ void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd)
 	}
 }
 
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
+int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	int ret;
@@ -553,7 +553,7 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
 		return NVM_IO_ERR;
 #endif
 
-	ret = nvm_submit_io_sync(dev, rqd);
+	ret = nvm_submit_io_sync(dev, rqd, buf);
 
 	if (trace_pblk_chunk_state_enabled() && !ret &&
 	    rqd->opcode == NVM_OP_PWRITE)
@@ -562,65 +562,19 @@ int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd)
 	return ret;
 }
 
-int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd)
+static int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd,
+				   void *buf)
 {
 	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	int ret;
 
 	pblk_down_chunk(pblk, ppa_list[0]);
-	ret = pblk_submit_io_sync(pblk, rqd);
+	ret = pblk_submit_io_sync(pblk, rqd, buf);
 	pblk_up_chunk(pblk, ppa_list[0]);
 
 	return ret;
 }
 
-static void pblk_bio_map_addr_endio(struct bio *bio)
-{
-	bio_put(bio);
-}
-
-struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
-			      unsigned int nr_secs, unsigned int len,
-			      int alloc_type, gfp_t gfp_mask)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	void *kaddr = data;
-	struct page *page;
-	struct bio *bio;
-	int i, ret;
-
-	if (alloc_type == PBLK_KMALLOC_META)
-		return bio_map_kern(dev->q, kaddr, len, gfp_mask);
-
-	bio = bio_kmalloc(gfp_mask, nr_secs);
-	if (!bio)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < nr_secs; i++) {
-		page = vmalloc_to_page(kaddr);
-		if (!page) {
-			pblk_err(pblk, "could not map vmalloc bio\n");
-			bio_put(bio);
-			bio = ERR_PTR(-ENOMEM);
-			goto out;
-		}
-
-		ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
-		if (ret != PAGE_SIZE) {
-			pblk_err(pblk, "could not add page to bio\n");
-			bio_put(bio);
-			bio = ERR_PTR(-ENOMEM);
-			goto out;
-		}
-
-		kaddr += PAGE_SIZE;
-	}
-
-	bio->bi_end_io = pblk_bio_map_addr_endio;
-out:
-	return bio;
-}
-
 int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
 		   unsigned long secs_to_flush, bool skip_meta)
 {
@@ -722,9 +676,7 @@ u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
 
 int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct bio *bio;
 	struct ppa_addr *ppa_list;
 	struct nvm_rq rqd;
 	u64 paddr = pblk_line_smeta_start(pblk, line);
@@ -736,16 +688,6 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
 	if (ret)
 		return ret;
 
-	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		ret = PTR_ERR(bio);
-		goto clear_rqd;
-	}
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-
-	rqd.bio = bio;
 	rqd.opcode = NVM_OP_PREAD;
 	rqd.nr_ppas = lm->smeta_sec;
 	rqd.is_seq = 1;
@@ -754,10 +696,9 @@ int pblk_line_smeta_read(struct pblk *pblk, struct pblk_line *line)
 	for (i = 0; i < lm->smeta_sec; i++, paddr++)
 		ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
 
-	ret = pblk_submit_io_sync(pblk, &rqd);
+	ret = pblk_submit_io_sync(pblk, &rqd, line->smeta);
 	if (ret) {
 		pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
-		bio_put(bio);
 		goto clear_rqd;
 	}
 
@@ -776,9 +717,7 @@ clear_rqd:
 static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
 				 u64 paddr)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct bio *bio;
 	struct ppa_addr *ppa_list;
 	struct nvm_rq rqd;
 	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
@@ -791,16 +730,6 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
 	if (ret)
 		return ret;
 
-	bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		ret = PTR_ERR(bio);
-		goto clear_rqd;
-	}
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
-	rqd.bio = bio;
 	rqd.opcode = NVM_OP_PWRITE;
 	rqd.nr_ppas = lm->smeta_sec;
 	rqd.is_seq = 1;
@@ -814,10 +743,9 @@ static int pblk_line_smeta_write(struct pblk *pblk, struct pblk_line *line,
 		meta->lba = lba_list[paddr] = addr_empty;
 	}
 
-	ret = pblk_submit_io_sync_sem(pblk, &rqd);
+	ret = pblk_submit_io_sync_sem(pblk, &rqd, line->smeta);
 	if (ret) {
 		pblk_err(pblk, "smeta I/O submission failed: %d\n", ret);
-		bio_put(bio);
 		goto clear_rqd;
 	}
 
@@ -838,10 +766,8 @@ int pblk_line_emeta_read(struct pblk *pblk, struct pblk_line *line,
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
 	void *ppa_list_buf, *meta_list;
-	struct bio *bio;
 	struct ppa_addr *ppa_list;
 	struct nvm_rq rqd;
 	u64 paddr = line->emeta_ssec;
@@ -867,17 +793,6 @@ next_rq:
 	rq_ppas = pblk_calc_secs(pblk, left_ppas, 0, false);
 	rq_len = rq_ppas * geo->csecs;
 
-	bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len,
-					l_mg->emeta_alloc_type, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		ret = PTR_ERR(bio);
-		goto free_rqd_dma;
-	}
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-
-	rqd.bio = bio;
 	rqd.meta_list = meta_list;
 	rqd.ppa_list = ppa_list_buf;
 	rqd.dma_meta_list = dma_meta_list;
@@ -896,7 +811,6 @@ next_rq:
 		while (test_bit(pos, line->blk_bitmap)) {
 			paddr += min;
 			if (pblk_boundary_paddr_checks(pblk, paddr)) {
-				bio_put(bio);
 				ret = -EINTR;
 				goto free_rqd_dma;
 			}
@@ -906,7 +820,6 @@ next_rq:
 		}
 
 		if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
-			bio_put(bio);
 			ret = -EINTR;
 			goto free_rqd_dma;
 		}
@@ -915,10 +828,9 @@ next_rq:
 			ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line_id);
 	}
 
-	ret = pblk_submit_io_sync(pblk, &rqd);
+	ret = pblk_submit_io_sync(pblk, &rqd, emeta_buf);
 	if (ret) {
 		pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
-		bio_put(bio);
 		goto free_rqd_dma;
 	}
 
@@ -963,7 +875,7 @@ static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
 	/* The write thread schedules erases so that it minimizes disturbances
 	 * with writes. Thus, there is no need to take the LUN semaphore.
 	 */
-	ret = pblk_submit_io_sync(pblk, &rqd);
+	ret = pblk_submit_io_sync(pblk, &rqd, NULL);
 	rqd.private = pblk;
 	__pblk_end_io_erase(pblk, &rqd);
 
@@ -1792,7 +1704,7 @@ int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
 	/* The write thread schedules erases so that it minimizes disturbances
 	 * with writes. Thus, there is no need to take the LUN semaphore.
 	 */
-	err = pblk_submit_io(pblk, rqd);
+	err = pblk_submit_io(pblk, rqd, NULL);
 	if (err) {
 		struct nvm_tgt_dev *dev = pblk->dev;
 		struct nvm_geo *geo = &dev->geo;
@@ -1923,7 +1835,6 @@ void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
 static void pblk_save_lba_list(struct pblk *pblk, struct pblk_line *line)
 {
 	struct pblk_line_meta *lm = &pblk->lm;
-	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	unsigned int lba_list_size = lm->emeta_len[2];
 	struct pblk_w_err_gc *w_err_gc = line->w_err_gc;
 	struct pblk_emeta *emeta = line->emeta;
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index d98ea392fe33..d572d4559e4e 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -342,7 +342,7 @@ split_retry:
 		bio_put(int_bio);
 		int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
 		goto split_retry;
-	} else if (pblk_submit_io(pblk, rqd)) {
+	} else if (pblk_submit_io(pblk, rqd, NULL)) {
 		/* Submitting IO to drive failed, let's report an error */
 		rqd->error = -ENODEV;
 		pblk_end_io_read(rqd);
@@ -419,7 +419,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
-	struct bio *bio;
 	struct nvm_rq rqd;
 	int data_len;
 	int ret = NVM_IO_OK;
@@ -447,25 +446,12 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 		goto out;
 
 	data_len = (gc_rq->secs_to_gc) * geo->csecs;
-	bio = pblk_bio_map_addr(pblk, gc_rq->data, gc_rq->secs_to_gc, data_len,
-						PBLK_VMALLOC_META, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		pblk_err(pblk, "could not allocate GC bio (%lu)\n",
-								PTR_ERR(bio));
-		ret = PTR_ERR(bio);
-		goto err_free_dma;
-	}
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-
 	rqd.opcode = NVM_OP_PREAD;
 	rqd.nr_ppas = gc_rq->secs_to_gc;
-	rqd.bio = bio;
 
-	if (pblk_submit_io_sync(pblk, &rqd)) {
+	if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
 		ret = -EIO;
-		goto err_free_bio;
+		goto err_free_dma;
 	}
 
 	pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
@@ -489,8 +475,6 @@ out:
 	pblk_free_rqd_meta(pblk, &rqd);
 	return ret;
 
-err_free_bio:
-	bio_put(bio);
 err_free_dma:
 	pblk_free_rqd_meta(pblk, &rqd);
 	return ret;
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index e6dda04de144..d5e210c3c5b7 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -178,12 +178,11 @@ static int pblk_recov_pad_line(struct pblk *pblk, struct pblk_line *line,
 	void *meta_list;
 	struct pblk_pad_rq *pad_rq;
 	struct nvm_rq *rqd;
-	struct bio *bio;
 	struct ppa_addr *ppa_list;
 	void *data;
 	__le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	u64 w_ptr = line->cur_sec;
-	int left_line_ppas, rq_ppas, rq_len;
+	int left_line_ppas, rq_ppas;
 	int i, j;
 	int ret = 0;
 
@@ -212,28 +211,15 @@ next_pad_rq:
 		goto fail_complete;
 	}
 
-	rq_len = rq_ppas * geo->csecs;
-
-	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
-						PBLK_VMALLOC_META, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		ret = PTR_ERR(bio);
-		goto fail_complete;
-	}
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-
 	rqd = pblk_alloc_rqd(pblk, PBLK_WRITE_INT);
 
 	ret = pblk_alloc_rqd_meta(pblk, rqd);
 	if (ret) {
 		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-		bio_put(bio);
 		goto fail_complete;
 	}
 
-	rqd->bio = bio;
+	rqd->bio = NULL;
 	rqd->opcode = NVM_OP_PWRITE;
 	rqd->is_seq = 1;
 	rqd->nr_ppas = rq_ppas;
@@ -275,13 +261,12 @@ next_pad_rq:
 	kref_get(&pad_rq->ref);
 	pblk_down_chunk(pblk, ppa_list[0]);
 
-	ret = pblk_submit_io(pblk, rqd);
+	ret = pblk_submit_io(pblk, rqd, data);
 	if (ret) {
 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
 		pblk_up_chunk(pblk, ppa_list[0]);
 		kref_put(&pad_rq->ref, pblk_recov_complete);
 		pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
-		bio_put(bio);
 		goto fail_complete;
 	}
 
@@ -375,7 +360,6 @@ static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line,
 	struct ppa_addr *ppa_list;
 	void *meta_list;
 	struct nvm_rq *rqd;
-	struct bio *bio;
 	void *data;
 	dma_addr_t dma_ppa_list, dma_meta_list;
 	__le64 *lba_list;
@@ -407,15 +391,7 @@ next_rq:
 	rq_len = rq_ppas * geo->csecs;
 
 retry_rq:
-	bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL);
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_READ, 0);
-	bio_get(bio);
-
-	rqd->bio = bio;
+	rqd->bio = NULL;
 	rqd->opcode = NVM_OP_PREAD;
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
@@ -445,10 +421,9 @@ retry_rq:
 				addr_to_gen_ppa(pblk, paddr + j, line->id);
 	}
 
-	ret = pblk_submit_io_sync(pblk, rqd);
+	ret = pblk_submit_io_sync(pblk, rqd, data);
 	if (ret) {
 		pblk_err(pblk, "I/O submission failed: %d\n", ret);
-		bio_put(bio);
 		return ret;
 	}
 
@@ -460,24 +435,20 @@ retry_rq:
 
 		if (padded) {
 			pblk_log_read_err(pblk, rqd);
-			bio_put(bio);
 			return -EINTR;
 		}
 
 		pad_distance = pblk_pad_distance(pblk, line);
 		ret = pblk_recov_pad_line(pblk, line, pad_distance);
 		if (ret) {
-			bio_put(bio);
 			return ret;
 		}
 
 		padded = true;
-		bio_put(bio);
 		goto retry_rq;
 	}
 
 	pblk_get_packed_meta(pblk, rqd);
-	bio_put(bio);
 
 	for (i = 0; i < rqd->nr_ppas; i++) {
 		struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 4e63f9b5954c..b9a2aeba95ab 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -373,7 +373,6 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	struct pblk_emeta *emeta = meta_line->emeta;
 	struct ppa_addr *ppa_list;
 	struct pblk_g_ctx *m_ctx;
-	struct bio *bio;
 	struct nvm_rq *rqd;
 	void *data;
 	u64 paddr;
@@ -391,20 +390,9 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	rq_len = rq_ppas * geo->csecs;
 	data = ((void *)emeta->buf) + emeta->mem;
 
-	bio = pblk_bio_map_addr(pblk, data, rq_ppas, rq_len,
-					l_mg->emeta_alloc_type, GFP_KERNEL);
-	if (IS_ERR(bio)) {
-		pblk_err(pblk, "failed to map emeta io");
-		ret = PTR_ERR(bio);
-		goto fail_free_rqd;
-	}
-	bio->bi_iter.bi_sector = 0; /* internal bio */
-	bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
-	rqd->bio = bio;
-
 	ret = pblk_alloc_w_rq(pblk, rqd, rq_ppas, pblk_end_io_write_meta);
 	if (ret)
-		goto fail_free_bio;
+		goto fail_free_rqd;
 
 	ppa_list = nvm_rq_to_ppa_list(rqd);
 	for (i = 0; i < rqd->nr_ppas; ) {
@@ -423,7 +411,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 
 	pblk_down_chunk(pblk, ppa_list[0]);
 
-	ret = pblk_submit_io(pblk, rqd);
+	ret = pblk_submit_io(pblk, rqd, data);
 	if (ret) {
 		pblk_err(pblk, "emeta I/O submission failed: %d\n", ret);
 		goto fail_rollback;
@@ -437,8 +425,6 @@ fail_rollback:
 	pblk_dealloc_page(pblk, meta_line, rq_ppas);
 	list_add(&meta_line->list, &meta_line->list);
 	spin_unlock(&l_mg->close_lock);
-fail_free_bio:
-	bio_put(bio);
 fail_free_rqd:
 	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
 	return ret;
@@ -523,7 +509,7 @@ static int pblk_submit_io_set(struct pblk *pblk, struct nvm_rq *rqd)
 	meta_line = pblk_should_submit_meta_io(pblk, rqd);
 
 	/* Submit data write for current data line */
-	err = pblk_submit_io(pblk, rqd);
+	err = pblk_submit_io(pblk, rqd, NULL);
 	if (err) {
 		pblk_err(pblk, "data I/O submission failed: %d\n", err);
 		return NVM_IO_ERR;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index a67855387f53..d515d3409a74 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -783,14 +783,10 @@ struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
 					      struct ppa_addr ppa);
 void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd);
 void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd);
-int pblk_submit_io_sync_sem(struct pblk *pblk, struct nvm_rq *rqd);
+int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
+int pblk_submit_io_sync(struct pblk *pblk, struct nvm_rq *rqd, void *buf);
 int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line);
 void pblk_check_chunk_state_update(struct pblk *pblk, struct nvm_rq *rqd);
-struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
-			      unsigned int nr_secs, unsigned int len,
-			      int alloc_type, gfp_t gfp_mask);
 struct pblk_line *pblk_line_get(struct pblk *pblk);
 struct pblk_line *pblk_line_get_first_data(struct pblk *pblk);
 struct pblk_line *pblk_line_replace_data(struct pblk *pblk);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index d6f121452d5d..ec46693f6b64 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -667,11 +667,14 @@ static struct request *nvme_nvm_alloc_request(struct request_queue *q,
 	return rq;
 }
 
-static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
+static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd,
+			      void *buf)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct request_queue *q = dev->q;
 	struct nvme_nvm_command *cmd;
 	struct request *rq;
+	int ret;
 
 	cmd = kzalloc(sizeof(struct nvme_nvm_command), GFP_KERNEL);
 	if (!cmd)
@@ -679,8 +682,15 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 
 	rq = nvme_nvm_alloc_request(q, rqd, cmd);
 	if (IS_ERR(rq)) {
-		kfree(cmd);
-		return PTR_ERR(rq);
+		ret = PTR_ERR(rq);
+		goto err_free_cmd;
+	}
+
+	if (buf) {
+		ret = blk_rq_map_kern(q, rq, buf, geo->csecs * rqd->nr_ppas,
+				GFP_KERNEL);
+		if (ret)
+			goto err_free_cmd;
 	}
 
 	rq->end_io_data = rqd;
@@ -688,6 +698,10 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
 	blk_execute_rq_nowait(q, NULL, rq, 0, nvme_nvm_end_io);
 
 	return 0;
+
+err_free_cmd:
+	kfree(cmd);
+	return ret;
 }
 
 static void *nvme_nvm_create_dma_pool(struct nvm_dev *nvmdev, char *name,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 8891647b24b1..ee8ec2e68055 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -88,7 +88,7 @@ typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
 typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
 typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
 							struct nvm_chk_meta *);
-typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
+typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *, void *);
 typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int);
 typedef void (nvm_destroy_dma_pool_fn)(void *);
 typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t,
@@ -680,8 +680,8 @@ extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr,
 			      int, struct nvm_chk_meta *);
 extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *,
 			      int, int);
-extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
-extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
+extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *, void *);
+extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *, void *);
 extern void nvm_end_io(struct nvm_rq *);
 
 #else /* CONFIG_NVM */
-- 
cgit v1.2.3


From 63f0c60379650d82250f22e4cf4137ef3dc4f43d Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 23 Jul 2019 19:58:39 +0200
Subject: arm64: Introduce prctl() options to control the tagged user addresses
 ABI

It is not desirable to relax the ABI to allow tagged user addresses into
the kernel indiscriminately. This patch introduces a prctl() interface
for enabling or disabling the tagged ABI with a global sysctl control
for preventing applications from enabling the relaxed ABI (meant for
testing user-space prctl() return error checking without reconfiguring
the kernel). The ABI properties are inherited by threads of the same
application and fork()'ed children but cleared on execve(). A Kconfig
option allows the overall disabling of the relaxed ABI.

The PR_SET_TAGGED_ADDR_CTRL will be expanded in the future to handle
MTE-specific settings like imprecise vs precise exceptions.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/Kconfig                   |  9 +++++
 arch/arm64/include/asm/processor.h   |  8 ++++
 arch/arm64/include/asm/thread_info.h |  1 +
 arch/arm64/include/asm/uaccess.h     |  4 +-
 arch/arm64/kernel/process.c          | 73 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/prctl.h           |  5 +++
 kernel/sys.c                         | 12 ++++++
 7 files changed, 111 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3adcec05b1f6..5d254178b9ca 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1110,6 +1110,15 @@ config ARM64_SW_TTBR0_PAN
 	  zeroed area and reserved ASID. The user access routines
 	  restore the valid TTBR0_EL1 temporarily.
 
+config ARM64_TAGGED_ADDR_ABI
+	bool "Enable the tagged user addresses syscall ABI"
+	default y
+	help
+	  When this option is enabled, user applications can opt in to a
+	  relaxed ABI via prctl() allowing tagged addresses to be passed
+	  to system calls as pointer arguments. For details, see
+	  Documentation/arm64/tagged-address-abi.txt.
+
 menuconfig COMPAT
 	bool "Kernel support for 32-bit EL0"
 	depends on ARM64_4K_PAGES || EXPERT
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 844e2964b0f5..28eed40ffc12 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -306,6 +306,14 @@ extern void __init minsigstksz_setup(void);
 /* PR_PAC_RESET_KEYS prctl */
 #define PAC_RESET_KEYS(tsk, arg)	ptrauth_prctl_reset_keys(tsk, arg)
 
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */
+long set_tagged_addr_ctrl(unsigned long arg);
+long get_tagged_addr_ctrl(void);
+#define SET_TAGGED_ADDR_CTRL(arg)	set_tagged_addr_ctrl(arg)
+#define GET_TAGGED_ADDR_CTRL()		get_tagged_addr_ctrl()
+#endif
+
 /*
  * For CONFIG_GCC_PLUGIN_STACKLEAK
  *
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 180b34ec5965..012238d8e58d 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -90,6 +90,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_SVE			23	/* Scalable Vector Extension in use */
 #define TIF_SVE_VL_INHERIT	24	/* Inherit sve_vl_onexec across exec */
 #define TIF_SSBD		25	/* Wants SSB mitigation */
+#define TIF_TAGGED_ADDR		26	/* Allow tagged user addresses */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index a138e3b4f717..097d6bfac0b7 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -62,7 +62,9 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si
 {
 	unsigned long ret, limit = current_thread_info()->addr_limit;
 
-	addr = untagged_addr(addr);
+	if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) &&
+	    test_thread_flag(TIF_TAGGED_ADDR))
+		addr = untagged_addr(addr);
 
 	__chk_user_ptr(addr);
 	asm volatile(
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index f674f28df663..76b7c55026aa 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
+#include <linux/sysctl.h>
 #include <linux/unistd.h>
 #include <linux/user.h>
 #include <linux/delay.h>
@@ -38,6 +39,7 @@
 #include <trace/events/power.h>
 #include <linux/percpu.h>
 #include <linux/thread_info.h>
+#include <linux/prctl.h>
 
 #include <asm/alternative.h>
 #include <asm/arch_gicv3.h>
@@ -307,11 +309,18 @@ static void tls_thread_flush(void)
 	}
 }
 
+static void flush_tagged_addr_state(void)
+{
+	if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI))
+		clear_thread_flag(TIF_TAGGED_ADDR);
+}
+
 void flush_thread(void)
 {
 	fpsimd_flush_thread();
 	tls_thread_flush();
 	flush_ptrace_hw_breakpoint(current);
+	flush_tagged_addr_state();
 }
 
 void release_thread(struct task_struct *dead_task)
@@ -565,3 +574,67 @@ void arch_setup_new_exec(void)
 
 	ptrauth_thread_init_user(current);
 }
+
+#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
+/*
+ * Control the relaxed ABI allowing tagged user addresses into the kernel.
+ */
+static unsigned int tagged_addr_prctl_allowed = 1;
+
+long set_tagged_addr_ctrl(unsigned long arg)
+{
+	if (!tagged_addr_prctl_allowed)
+		return -EINVAL;
+	if (is_compat_task())
+		return -EINVAL;
+	if (arg & ~PR_TAGGED_ADDR_ENABLE)
+		return -EINVAL;
+
+	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
+
+	return 0;
+}
+
+long get_tagged_addr_ctrl(void)
+{
+	if (!tagged_addr_prctl_allowed)
+		return -EINVAL;
+	if (is_compat_task())
+		return -EINVAL;
+
+	if (test_thread_flag(TIF_TAGGED_ADDR))
+		return PR_TAGGED_ADDR_ENABLE;
+
+	return 0;
+}
+
+/*
+ * Global sysctl to disable the tagged user addresses support. This control
+ * only prevents the tagged address ABI enabling via prctl() and does not
+ * disable it for tasks that already opted in to the relaxed ABI.
+ */
+static int zero;
+static int one = 1;
+
+static struct ctl_table tagged_addr_sysctl_table[] = {
+	{
+		.procname	= "tagged_addr",
+		.mode		= 0644,
+		.data		= &tagged_addr_prctl_allowed,
+		.maxlen		= sizeof(int),
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{ }
+};
+
+static int __init tagged_addr_init(void)
+{
+	if (!register_sysctl("abi", tagged_addr_sysctl_table))
+		return -EINVAL;
+	return 0;
+}
+
+core_initcall(tagged_addr_init);
+#endif	/* CONFIG_ARM64_TAGGED_ADDR_ABI */
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 094bb03b9cc2..2e927b3e9d6c 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -229,4 +229,9 @@ struct prctl_mm_map {
 # define PR_PAC_APDBKEY			(1UL << 3)
 # define PR_PAC_APGAKEY			(1UL << 4)
 
+/* Tagged user address controls for arm64 */
+#define PR_SET_TAGGED_ADDR_CTRL		55
+#define PR_GET_TAGGED_ADDR_CTRL		56
+# define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 2969304c29fe..c6c4d5358bd3 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -124,6 +124,12 @@
 #ifndef PAC_RESET_KEYS
 # define PAC_RESET_KEYS(a, b)	(-EINVAL)
 #endif
+#ifndef SET_TAGGED_ADDR_CTRL
+# define SET_TAGGED_ADDR_CTRL(a)	(-EINVAL)
+#endif
+#ifndef GET_TAGGED_ADDR_CTRL
+# define GET_TAGGED_ADDR_CTRL()		(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2492,6 +2498,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		error = PAC_RESET_KEYS(me, arg2);
 		break;
+	case PR_SET_TAGGED_ADDR_CTRL:
+		error = SET_TAGGED_ADDR_CTRL(arg2);
+		break;
+	case PR_GET_TAGGED_ADDR_CTRL:
+		error = GET_TAGGED_ADDR_CTRL();
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 70433f67ec3a54710744902d782f8954325e25b8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Tue, 6 Aug 2019 12:15:19 +1000
Subject: MODSIGN: make new include file self contained

Fixes: c8424e776b09 ("MODSIGN: Export module signature definitions")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
---
 include/linux/module_signature.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/module_signature.h b/include/linux/module_signature.h
index 523617fc5b6a..7eb4b00381ac 100644
--- a/include/linux/module_signature.h
+++ b/include/linux/module_signature.h
@@ -9,6 +9,8 @@
 #ifndef _LINUX_MODULE_SIGNATURE_H
 #define _LINUX_MODULE_SIGNATURE_H
 
+#include <linux/types.h>
+
 /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
 #define MODULE_SIG_STRING "~Module signature appended~\n"
 
-- 
cgit v1.2.3


From c4bd48699beb92d6bb99d6139d1e9737cca73480 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 3 Aug 2019 16:36:18 +0300
Subject: net: sched: police: allow accessing police->params with rtnl

Recently implemented support for police action in flow_offload infra leads
to following rcu usage warning:

[ 1925.881092] =============================
[ 1925.881094] WARNING: suspicious RCU usage
[ 1925.881098] 5.3.0-rc1+ #574 Not tainted
[ 1925.881100] -----------------------------
[ 1925.881104] include/net/tc_act/tc_police.h:57 suspicious rcu_dereference_check() usage!
[ 1925.881106]
               other info that might help us debug this:

[ 1925.881109]
               rcu_scheduler_active = 2, debug_locks = 1
[ 1925.881112] 1 lock held by tc/18591:
[ 1925.881115]  #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970
[ 1925.881124]
               stack backtrace:
[ 1925.881127] CPU: 2 PID: 18591 Comm: tc Not tainted 5.3.0-rc1+ #574
[ 1925.881130] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 1925.881132] Call Trace:
[ 1925.881138]  dump_stack+0x85/0xc0
[ 1925.881145]  tc_setup_flow_action+0x1771/0x2040
[ 1925.881155]  fl_hw_replace_filter+0x11f/0x2e0 [cls_flower]
[ 1925.881175]  fl_change+0xd24/0x1b30 [cls_flower]
[ 1925.881200]  tc_new_tfilter+0x3e0/0x970
[ 1925.881231]  ? tc_del_tfilter+0x720/0x720
[ 1925.881243]  rtnetlink_rcv_msg+0x389/0x4b0
[ 1925.881250]  ? netlink_deliver_tap+0x95/0x400
[ 1925.881257]  ? rtnl_dellink+0x2d0/0x2d0
[ 1925.881264]  netlink_rcv_skb+0x49/0x110
[ 1925.881275]  netlink_unicast+0x171/0x200
[ 1925.881284]  netlink_sendmsg+0x224/0x3f0
[ 1925.881299]  sock_sendmsg+0x5e/0x60
[ 1925.881305]  ___sys_sendmsg+0x2ae/0x330
[ 1925.881309]  ? task_work_add+0x43/0x50
[ 1925.881314]  ? fput_many+0x45/0x80
[ 1925.881329]  ? __lock_acquire+0x248/0x1930
[ 1925.881342]  ? find_held_lock+0x2b/0x80
[ 1925.881347]  ? task_work_run+0x7b/0xd0
[ 1925.881359]  __sys_sendmsg+0x59/0xa0
[ 1925.881375]  do_syscall_64+0x5c/0xb0
[ 1925.881381]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 1925.881384] RIP: 0033:0x7feb245047b8
[ 1925.881388] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 89 54
[ 1925.881391] RSP: 002b:00007ffc2d2a5788 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 1925.881395] RAX: ffffffffffffffda RBX: 000000005d4497ed RCX: 00007feb245047b8
[ 1925.881398] RDX: 0000000000000000 RSI: 00007ffc2d2a57f0 RDI: 0000000000000003
[ 1925.881400] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006
[ 1925.881403] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001
[ 1925.881406] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001

Change tcf_police_rate_bytes_ps() and tcf_police_tcfp_burst() helpers to
allow using them from both rtnl and rcu protected contexts.

Fixes: 8c8cfc6ed274 ("net/sched: add police action to the hardware intermediate representation")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_police.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 8b9ef3664262..cfdc7cb82cad 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -54,7 +54,7 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act)
 	struct tcf_police *police = to_police(act);
 	struct tcf_police_params *params;
 
-	params = rcu_dereference_bh(police->params);
+	params = rcu_dereference_bh_rtnl(police->params);
 	return params->rate.rate_bytes_ps;
 }
 
@@ -63,7 +63,7 @@ static inline s64 tcf_police_tcfp_burst(const struct tc_action *act)
 	struct tcf_police *police = to_police(act);
 	struct tcf_police_params *params;
 
-	params = rcu_dereference_bh(police->params);
+	params = rcu_dereference_bh_rtnl(police->params);
 	return params->tcfp_burst;
 }
 
-- 
cgit v1.2.3


From 67cbf7dedd03a63ca2fbd9df2049eabba7a37edf Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 3 Aug 2019 16:36:19 +0300
Subject: net: sched: sample: allow accessing psample_group with rtnl

Recently implemented support for sample action in flow_offload infra leads
to following rcu usage warning:

[ 1938.234856] =============================
[ 1938.234858] WARNING: suspicious RCU usage
[ 1938.234863] 5.3.0-rc1+ #574 Not tainted
[ 1938.234866] -----------------------------
[ 1938.234869] include/net/tc_act/tc_sample.h:47 suspicious rcu_dereference_check() usage!
[ 1938.234872]
               other info that might help us debug this:

[ 1938.234875]
               rcu_scheduler_active = 2, debug_locks = 1
[ 1938.234879] 1 lock held by tc/19540:
[ 1938.234881]  #0: 00000000b03cb918 (rtnl_mutex){+.+.}, at: tc_new_tfilter+0x47c/0x970
[ 1938.234900]
               stack backtrace:
[ 1938.234905] CPU: 2 PID: 19540 Comm: tc Not tainted 5.3.0-rc1+ #574
[ 1938.234908] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[ 1938.234911] Call Trace:
[ 1938.234922]  dump_stack+0x85/0xc0
[ 1938.234930]  tc_setup_flow_action+0xed5/0x2040
[ 1938.234944]  fl_hw_replace_filter+0x11f/0x2e0 [cls_flower]
[ 1938.234965]  fl_change+0xd24/0x1b30 [cls_flower]
[ 1938.234990]  tc_new_tfilter+0x3e0/0x970
[ 1938.235021]  ? tc_del_tfilter+0x720/0x720
[ 1938.235028]  rtnetlink_rcv_msg+0x389/0x4b0
[ 1938.235038]  ? netlink_deliver_tap+0x95/0x400
[ 1938.235044]  ? rtnl_dellink+0x2d0/0x2d0
[ 1938.235053]  netlink_rcv_skb+0x49/0x110
[ 1938.235063]  netlink_unicast+0x171/0x200
[ 1938.235073]  netlink_sendmsg+0x224/0x3f0
[ 1938.235091]  sock_sendmsg+0x5e/0x60
[ 1938.235097]  ___sys_sendmsg+0x2ae/0x330
[ 1938.235111]  ? __handle_mm_fault+0x12cd/0x19e0
[ 1938.235125]  ? __handle_mm_fault+0x12cd/0x19e0
[ 1938.235138]  ? find_held_lock+0x2b/0x80
[ 1938.235147]  ? do_user_addr_fault+0x22d/0x490
[ 1938.235160]  __sys_sendmsg+0x59/0xa0
[ 1938.235178]  do_syscall_64+0x5c/0xb0
[ 1938.235187]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 1938.235192] RIP: 0033:0x7ff9a4d597b8
[ 1938.235197] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83
 ec 28 89 54
[ 1938.235200] RSP: 002b:00007ffcfe381c48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[ 1938.235205] RAX: ffffffffffffffda RBX: 000000005d4497f9 RCX: 00007ff9a4d597b8
[ 1938.235208] RDX: 0000000000000000 RSI: 00007ffcfe381cb0 RDI: 0000000000000003
[ 1938.235211] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006
[ 1938.235214] R10: 0000000000404ec2 R11: 0000000000000246 R12: 0000000000000001
[ 1938.235217] R13: 0000000000480640 R14: 0000000000000012 R15: 0000000000000001

Change tcf_sample_psample_group() helper to allow using it from both rtnl
and rcu protected contexts.

Fixes: a7a7be6087b0 ("net/sched: add sample action to the hardware intermediate representation")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_sample.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 0a559d4b6f0f..b4fce0fae645 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -44,7 +44,7 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a)
 static inline struct psample_group *
 tcf_sample_psample_group(const struct tc_action *a)
 {
-	return rcu_dereference(to_sample(a)->psample_group);
+	return rcu_dereference_rtnl(to_sample(a)->psample_group);
 }
 
 #endif /* __NET_TC_SAMPLE_H */
-- 
cgit v1.2.3


From 77feb4eed7560215a724df6e7d4f1beaf98ba49d Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Sun, 4 Aug 2019 16:09:03 +0100
Subject: net: tc_act: add skbedit_ptype helper functions

The tc_act header file contains an inline function that checks if an
action is changing the skb mark of a packet and a further function to
extract the mark.

Add similar functions to check for and get skbedit actions that modify
the packet type of the skb.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_skbedit.h | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 4c04e2985508..b22a1f641f02 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -54,4 +54,31 @@ static inline u32 tcf_skbedit_mark(const struct tc_action *a)
 	return mark;
 }
 
+/* Return true iff action is ptype */
+static inline bool is_tcf_skbedit_ptype(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	u32 flags;
+
+	if (a->ops && a->ops->id == TCA_ID_SKBEDIT) {
+		rcu_read_lock();
+		flags = rcu_dereference(to_skbedit(a)->params)->flags;
+		rcu_read_unlock();
+		return flags == SKBEDIT_F_PTYPE;
+	}
+#endif
+	return false;
+}
+
+static inline u32 tcf_skbedit_ptype(const struct tc_action *a)
+{
+	u16 ptype;
+
+	rcu_read_lock();
+	ptype = rcu_dereference(to_skbedit(a)->params)->ptype;
+	rcu_read_unlock();
+
+	return ptype;
+}
+
 #endif /* __NET_TC_SKBEDIT_H */
-- 
cgit v1.2.3


From fb1b775a247ee8d846152841f780eba6cb71bcfc Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Sun, 4 Aug 2019 16:09:04 +0100
Subject: net: sched: add skbedit of ptype action to hardware IR

TC rules can impliment skbedit actions. Currently actions that modify the
skb mark are passed to offloading drivers via the hardware intermediate
representation in the flow_offload API.

Extend this to include skbedit actions that modify the packet type of the
skb. Such actions may be used to set the ptype to HOST when redirecting a
packet to ingress.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 2 ++
 net/sched/cls_api.c        | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 00b9aab5fdc1..04c29f5bb60a 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -126,6 +126,7 @@ enum flow_action_id {
 	FLOW_ACTION_ADD,
 	FLOW_ACTION_CSUM,
 	FLOW_ACTION_MARK,
+	FLOW_ACTION_PTYPE,
 	FLOW_ACTION_WAKE,
 	FLOW_ACTION_QUEUE,
 	FLOW_ACTION_SAMPLE,
@@ -168,6 +169,7 @@ struct flow_action_entry {
 		const struct ip_tunnel_info *tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
 		u32			csum_flags;	/* FLOW_ACTION_CSUM */
 		u32			mark;		/* FLOW_ACTION_MARK */
+		u16                     ptype;          /* FLOW_ACTION_PTYPE */
 		struct {				/* FLOW_ACTION_QUEUE */
 			u32		ctx;
 			u32		index;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3565d9aa09aa..ae73d3705571 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3294,6 +3294,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			default:
 				goto err_out;
 			}
+		} else if (is_tcf_skbedit_ptype(act)) {
+			entry->id = FLOW_ACTION_PTYPE;
+			entry->ptype = tcf_skbedit_ptype(act);
 		} else {
 			goto err_out;
 		}
-- 
cgit v1.2.3


From d7609c96c6da0831e196d970a20dc960bcc4a4d6 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Sun, 4 Aug 2019 16:09:05 +0100
Subject: net: tc_act: add helpers to detect ingress mirred actions

TC mirred actions can send to egress or ingress on a given netdev. Helpers
exist to detect actions that are mirred to egress. Extend the header file
to include helpers to detect ingress mirred actions.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_mirred.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index c757585a05b0..1cace4c69e44 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -32,6 +32,24 @@ static inline bool is_tcf_mirred_egress_mirror(const struct tc_action *a)
 	return false;
 }
 
+static inline bool is_tcf_mirred_ingress_redirect(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->id == TCA_ID_MIRRED)
+		return to_mirred(a)->tcfm_eaction == TCA_INGRESS_REDIR;
+#endif
+	return false;
+}
+
+static inline bool is_tcf_mirred_ingress_mirror(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->id == TCA_ID_MIRRED)
+		return to_mirred(a)->tcfm_eaction == TCA_INGRESS_MIRROR;
+#endif
+	return false;
+}
+
 static inline struct net_device *tcf_mirred_dev(const struct tc_action *a)
 {
 	return rtnl_dereference(to_mirred(a)->tcfm_dev);
-- 
cgit v1.2.3


From 48e584ac583b08a923d4d872596cc7b049e99f12 Mon Sep 17 00:00:00 2001
From: John Hurley <john.hurley@netronome.com>
Date: Sun, 4 Aug 2019 16:09:06 +0100
Subject: net: sched: add ingress mirred action to hardware IR

TC mirred actions (redirect and mirred) can send to egress or ingress of a
device. Currently only egress is used for hw offload rules.

Modify the intermediate representation for hw offload to include mirred
actions that go to ingress. This gives drivers access to such rules and
can decide whether or not to offload them.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 2 ++
 net/sched/cls_api.c        | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 04c29f5bb60a..d3b12bc8a114 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -117,6 +117,8 @@ enum flow_action_id {
 	FLOW_ACTION_GOTO,
 	FLOW_ACTION_REDIRECT,
 	FLOW_ACTION_MIRRED,
+	FLOW_ACTION_REDIRECT_INGRESS,
+	FLOW_ACTION_MIRRED_INGRESS,
 	FLOW_ACTION_VLAN_PUSH,
 	FLOW_ACTION_VLAN_POP,
 	FLOW_ACTION_VLAN_MANGLE,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ae73d3705571..9d85d3295c7c 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3205,6 +3205,12 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 		} else if (is_tcf_mirred_egress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED;
 			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_mirred_ingress_redirect(act)) {
+			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
+			entry->dev = tcf_mirred_dev(act);
+		} else if (is_tcf_mirred_ingress_mirror(act)) {
+			entry->id = FLOW_ACTION_MIRRED_INGRESS;
+			entry->dev = tcf_mirred_dev(act);
 		} else if (is_tcf_vlan(act)) {
 			switch (tcf_vlan_action(act)) {
 			case TCA_VLAN_ACT_PUSH:
-- 
cgit v1.2.3


From 0cccd42e6193e168cbecc271dae464e4a53fd7b3 Mon Sep 17 00:00:00 2001
From: Jianjun Wang <jianjun.wang@mediatek.com>
Date: Fri, 28 Jun 2019 15:34:25 +0800
Subject: PCI: mediatek: Add controller support for MT7629

MT7629 is an ARM platform SoC which has the same PCIe IP as MT7622.

The HW default value of its PCI host controller Device ID is invalid,
fix it to match the hardware implementation.

Signed-off-by: Jianjun Wang <jianjun.wang@mediatek.com>
[lorenzo.pieralisi@arm.com: commit log/minor spelling update]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Andrew Murray <andrew.murray@arm.com>
Acked-by: Ryder Lee <ryder.lee@mediatek.com>
---
 drivers/pci/controller/pcie-mediatek.c | 18 ++++++++++++++++++
 include/linux/pci_ids.h                |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c
index 80601e1b939e..3eaa7081ab2a 100644
--- a/drivers/pci/controller/pcie-mediatek.c
+++ b/drivers/pci/controller/pcie-mediatek.c
@@ -73,6 +73,7 @@
 #define PCIE_MSI_VECTOR		0x0c0
 
 #define PCIE_CONF_VEND_ID	0x100
+#define PCIE_CONF_DEVICE_ID	0x102
 #define PCIE_CONF_CLASS_ID	0x106
 
 #define PCIE_INT_MASK		0x420
@@ -141,12 +142,16 @@ struct mtk_pcie_port;
 /**
  * struct mtk_pcie_soc - differentiate between host generations
  * @need_fix_class_id: whether this host's class ID needed to be fixed or not
+ * @need_fix_device_id: whether this host's device ID needed to be fixed or not
+ * @device_id: device ID which this host need to be fixed
  * @ops: pointer to configuration access functions
  * @startup: pointer to controller setting functions
  * @setup_irq: pointer to initialize IRQ functions
  */
 struct mtk_pcie_soc {
 	bool need_fix_class_id;
+	bool need_fix_device_id;
+	unsigned int device_id;
 	struct pci_ops *ops;
 	int (*startup)(struct mtk_pcie_port *port);
 	int (*setup_irq)(struct mtk_pcie_port *port, struct device_node *node);
@@ -696,6 +701,9 @@ static int mtk_pcie_startup_port_v2(struct mtk_pcie_port *port)
 		writew(val, port->base + PCIE_CONF_CLASS_ID);
 	}
 
+	if (soc->need_fix_device_id)
+		writew(soc->device_id, port->base + PCIE_CONF_DEVICE_ID);
+
 	/* 100ms timeout value should be enough for Gen1/2 training */
 	err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_V2, val,
 				 !!(val & PCIE_PORT_LINKUP_V2), 20,
@@ -1216,11 +1224,21 @@ static const struct mtk_pcie_soc mtk_pcie_soc_mt7622 = {
 	.setup_irq = mtk_pcie_setup_irq,
 };
 
+static const struct mtk_pcie_soc mtk_pcie_soc_mt7629 = {
+	.need_fix_class_id = true,
+	.need_fix_device_id = true,
+	.device_id = PCI_DEVICE_ID_MEDIATEK_7629,
+	.ops = &mtk_pcie_ops_v2,
+	.startup = mtk_pcie_startup_port_v2,
+	.setup_irq = mtk_pcie_setup_irq,
+};
+
 static const struct of_device_id mtk_pcie_ids[] = {
 	{ .compatible = "mediatek,mt2701-pcie", .data = &mtk_pcie_soc_v1 },
 	{ .compatible = "mediatek,mt7623-pcie", .data = &mtk_pcie_soc_v1 },
 	{ .compatible = "mediatek,mt2712-pcie", .data = &mtk_pcie_soc_mt2712 },
 	{ .compatible = "mediatek,mt7622-pcie", .data = &mtk_pcie_soc_mt7622 },
+	{ .compatible = "mediatek,mt7629-pcie", .data = &mtk_pcie_soc_mt7629 },
 	{},
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..f59a6f98900c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2132,6 +2132,7 @@
 #define PCI_VENDOR_ID_MYRICOM		0x14c1
 
 #define PCI_VENDOR_ID_MEDIATEK		0x14c3
+#define PCI_DEVICE_ID_MEDIATEK_7629	0x7629
 
 #define PCI_VENDOR_ID_TITAN		0x14D2
 #define PCI_DEVICE_ID_TITAN_010L	0x8001
-- 
cgit v1.2.3


From b71b283e3d6df40cef03b5ce882b2385971f58a3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Aug 2019 16:44:51 +0200
Subject: USB: add support for dev_groups to struct usb_driver

Now that the driver core supports dev_groups for individual drivers,
expose that pointer to struct usb_driver to make it easier for USB
drivers to also use it.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20190806144502.17792-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 1 +
 include/linux/usb.h       | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index ebcadaad89d1..687fc5df4c17 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -954,6 +954,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner,
 	new_driver->drvwrap.driver.remove = usb_unbind_interface;
 	new_driver->drvwrap.driver.owner = owner;
 	new_driver->drvwrap.driver.mod_name = mod_name;
+	new_driver->drvwrap.driver.dev_groups = new_driver->dev_groups;
 	spin_lock_init(&new_driver->dynids.lock);
 	INIT_LIST_HEAD(&new_driver->dynids.list);
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 83d35d993e8c..af4eb6419ae8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1151,6 +1151,8 @@ struct usbdrv_wrap {
  * @id_table: USB drivers use ID table to support hotplugging.
  *	Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
  *	or your driver's probe function will never get called.
+ * @dev_groups: Attributes attached to the device that will be created once it
+ *	is bound to the driver.
  * @dynids: used internally to hold the list of dynamically added device
  *	ids for this driver.
  * @drvwrap: Driver-model core structure wrapper.
@@ -1198,6 +1200,7 @@ struct usb_driver {
 	int (*post_reset)(struct usb_interface *intf);
 
 	const struct usb_device_id *id_table;
+	const struct attribute_group **dev_groups;
 
 	struct usb_dynids dynids;
 	struct usbdrv_wrap drvwrap;
-- 
cgit v1.2.3


From 7d9c1d2f7aca2651b3821947bf928ee131df102a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Aug 2019 16:44:52 +0200
Subject: USB: add support for dev_groups to struct usb_device_driver

Now that the driver core supports dev_groups for individual drivers,
expose that pointer to struct usb_device_driver to make it easier for USB
drivers to also use it.

Yes, users of usb_device_driver are much rare, but there are instances
already that use custom sysfs files, so adding this support will make
things easier for those drivers.  usbip is one example, hubs might be
another one.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20190806144502.17792-3-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 1 +
 include/linux/usb.h       | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 687fc5df4c17..2b27d232d7a7 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -892,6 +892,7 @@ int usb_register_device_driver(struct usb_device_driver *new_udriver,
 	new_udriver->drvwrap.driver.probe = usb_probe_device;
 	new_udriver->drvwrap.driver.remove = usb_unbind_device;
 	new_udriver->drvwrap.driver.owner = owner;
+	new_udriver->drvwrap.driver.dev_groups = new_udriver->dev_groups;
 
 	retval = driver_register(&new_udriver->drvwrap.driver);
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index af4eb6419ae8..57f667cad3ec 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1224,6 +1224,8 @@ struct usb_driver {
  *	module is being unloaded.
  * @suspend: Called when the device is going to be suspended by the system.
  * @resume: Called when the device is being resumed by the system.
+ * @dev_groups: Attributes attached to the device that will be created once it
+ *	is bound to the driver.
  * @drvwrap: Driver-model core structure wrapper.
  * @supports_autosuspend: if set to 0, the USB core will not allow autosuspend
  *	for devices bound to this driver.
@@ -1238,6 +1240,7 @@ struct usb_device_driver {
 
 	int (*suspend) (struct usb_device *udev, pm_message_t message);
 	int (*resume) (struct usb_device *udev, pm_message_t message);
+	const struct attribute_group **dev_groups;
 	struct usbdrv_wrap drvwrap;
 	unsigned int supports_autosuspend:1;
 };
-- 
cgit v1.2.3


From 45880f7b7b19e043ce0aaa4cb7d05369425c82fa Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Tue, 6 Aug 2019 18:00:13 +0800
Subject: error-injection: Consolidate override function definition

The function override_function_with_return() is defined separately for
each architecture and every architecture's definition is almost same
with each other.  E.g. x86 and powerpc both define function in its own
asm/error-injection.h header and override_function_with_return() has
the same definition, the only difference is that x86 defines an extra
function just_return_func() but it is specific for x86 and is only used
by x86's override_function_with_return(), so don't need to export this
function.

This patch consolidates override_function_with_return() definition into
asm-generic/error-injection.h header, thus all architectures can use the
common definition.  As result, the architecture specific headers are
removed; the include/linux/error-injection.h header also changes to
include asm-generic/error-injection.h header rather than architecture
header, furthermore, it includes linux/compiler.h for successful
compilation.

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/powerpc/include/asm/error-injection.h | 13 -------------
 arch/x86/include/asm/error-injection.h     | 13 -------------
 include/asm-generic/error-injection.h      |  6 ++++++
 include/linux/error-injection.h            |  6 +++---
 4 files changed, 9 insertions(+), 29 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/error-injection.h
 delete mode 100644 arch/x86/include/asm/error-injection.h

(limited to 'include')

diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h
deleted file mode 100644
index 62fd24739852..000000000000
--- a/arch/powerpc/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h
deleted file mode 100644
index 47b7a1296245..000000000000
--- a/arch/x86/include/asm/error-injection.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ERROR_INJECTION_H
-#define _ASM_ERROR_INJECTION_H
-
-#include <linux/compiler.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm-generic/error-injection.h>
-
-asmlinkage void just_return_func(void);
-void override_function_with_return(struct pt_regs *regs);
-
-#endif /* _ASM_ERROR_INJECTION_H */
diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h
index 95a159a4137f..80ca61058dd2 100644
--- a/include/asm-generic/error-injection.h
+++ b/include/asm-generic/error-injection.h
@@ -16,6 +16,8 @@ struct error_injection_entry {
 	int		etype;
 };
 
+struct pt_regs;
+
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
 /*
  * Whitelist ganerating macro. Specify functions which can be
@@ -28,8 +30,12 @@ static struct error_injection_entry __used				\
 		.addr = (unsigned long)fname,				\
 		.etype = EI_ETYPE_##_etype,				\
 	};
+
+void override_function_with_return(struct pt_regs *regs);
 #else
 #define ALLOW_ERROR_INJECTION(fname, _etype)
+
+static inline void override_function_with_return(struct pt_regs *regs) { }
 #endif
 #endif
 
diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h
index 280c61ecbf20..635a95caf29f 100644
--- a/include/linux/error-injection.h
+++ b/include/linux/error-injection.h
@@ -2,16 +2,16 @@
 #ifndef _LINUX_ERROR_INJECTION_H
 #define _LINUX_ERROR_INJECTION_H
 
-#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+#include <linux/compiler.h>
+#include <asm-generic/error-injection.h>
 
-#include <asm/error-injection.h>
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
 
 extern bool within_error_injection_list(unsigned long addr);
 extern int get_injectable_error_type(unsigned long addr);
 
 #else /* !CONFIG_FUNCTION_ERROR_INJECTION */
 
-#include <asm-generic/error-injection.h>
 static inline bool within_error_injection_list(unsigned long addr)
 {
 	return false;
-- 
cgit v1.2.3


From b977fcf477c176e5f41775f0ea139f935b0f25b7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 31 Jul 2019 15:13:19 +0100
Subject: irqdomain/debugfs: Use PAs to generate fwnode names

Booting a large arm64 server (HiSi D05) leads to the following
shouting at boot time:

[   20.722132] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.730851] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.739560] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.748267] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.756975] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.765683] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!
[   20.774391] debugfs: File 'irqchip@(____ptrval____)-3' in directory 'domains' already present!

and many more... Evidently, we expect something a bit more informative
than ____ptrval____, and certainly we want all of our domains, not just
the first one.

For that, turn the %p used to generate the fwnode name into something
that won't be repainted (%pa). Given that we've now fixed all users to
pass a pointer to a PA, it will actually do the right thing.

Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqdomain.h | 6 +++---
 kernel/irq/irqdomain.c    | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 07ec8b390161..583e7abd07f9 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -220,7 +220,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 
 #ifdef CONFIG_IRQ_DOMAIN
 struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
-						const char *name, void *data);
+						const char *name, phys_addr_t *pa);
 
 enum {
 	IRQCHIP_FWNODE_REAL,
@@ -241,9 +241,9 @@ struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
 					 NULL);
 }
 
-static inline struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
 {
-	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, data);
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa);
 }
 
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3078d0e48bba..e7bbab149750 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -31,7 +31,7 @@ struct irqchip_fwid {
 	struct fwnode_handle	fwnode;
 	unsigned int		type;
 	char			*name;
-	void *data;
+	phys_addr_t		*pa;
 };
 
 #ifdef CONFIG_GENERIC_IRQ_DEBUGFS
@@ -62,7 +62,8 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
  * domain struct.
  */
 struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
-						const char *name, void *data)
+						const char *name,
+						phys_addr_t *pa)
 {
 	struct irqchip_fwid *fwid;
 	char *n;
@@ -77,7 +78,7 @@ struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
 		n = kasprintf(GFP_KERNEL, "%s-%d", name, id);
 		break;
 	default:
-		n = kasprintf(GFP_KERNEL, "irqchip@%p", data);
+		n = kasprintf(GFP_KERNEL, "irqchip@%pa", pa);
 		break;
 	}
 
@@ -89,7 +90,7 @@ struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
 
 	fwid->type = type;
 	fwid->name = n;
-	fwid->data = data;
+	fwid->pa = pa;
 	fwid->fwnode.ops = &irqchip_fwnode_ops;
 	return &fwid->fwnode;
 }
-- 
cgit v1.2.3


From 4b3e30ed3ec7864e798403a63ff2e96bd0c19ab0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 7 Aug 2019 00:23:07 -0500
Subject: Revert "drm/amdkfd: New IOCTL to allocate queue GWS"

This reverts commit 1a058c3376765ee31d65e28cbbb9d4ff15120056.

This interface is still in too much flux.  Revert until
it's sorted out.

Acked-by: Oak Zeng <Oak.Zeng@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 ----------------------------
 include/uapi/linux/kfd_ioctl.h           | 20 +-------------------
 2 files changed, 1 insertion(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 26b15cc56c31..1d3cd5c50d5f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1567,32 +1567,6 @@ copy_from_user_failed:
 	return err;
 }
 
-static int kfd_ioctl_alloc_queue_gws(struct file *filep,
-		struct kfd_process *p, void *data)
-{
-	int retval;
-	struct kfd_ioctl_alloc_queue_gws_args *args = data;
-	struct kfd_dev *dev;
-
-	if (!hws_gws_support)
-		return -ENODEV;
-
-	dev = kfd_device_by_id(args->gpu_id);
-	if (!dev) {
-		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
-		return -ENODEV;
-	}
-	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
-		return -ENODEV;
-
-	mutex_lock(&p->mutex);
-	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
-	mutex_unlock(&p->mutex);
-
-	args->first_gws = 0;
-	return retval;
-}
-
 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
 		struct kfd_process *p, void *data)
 {
@@ -1795,8 +1769,6 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
 				kfd_ioctl_import_dmabuf, 0),
 
-	AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
-			kfd_ioctl_alloc_queue_gws, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 070d1bc7e725..20917c59f39c 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -410,21 +410,6 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 	__u32 n_success;		/* to/from KFD */
 };
 
-/* Allocate GWS for specific queue
- *
- * @gpu_id:      device identifier
- * @queue_id:    queue's id that GWS is allocated for
- * @num_gws:     how many GWS to allocate
- * @first_gws:   index of the first GWS allocated.
- *               only support contiguous GWS allocation
- */
-struct kfd_ioctl_alloc_queue_gws_args {
-	__u32 gpu_id;		/* to KFD */
-	__u32 queue_id;		/* to KFD */
-	__u32 num_gws;		/* to KFD */
-	__u32 first_gws;	/* from KFD */
-};
-
 struct kfd_ioctl_get_dmabuf_info_args {
 	__u64 size;		/* from KFD */
 	__u64 metadata_ptr;	/* to KFD */
@@ -544,10 +529,7 @@ enum kfd_mmio_remap {
 #define AMDKFD_IOC_IMPORT_DMABUF		\
 		AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
 
-#define AMDKFD_IOC_ALLOC_QUEUE_GWS		\
-		AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args)
-
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x1F
+#define AMDKFD_COMMAND_END		0x1E
 
 #endif
-- 
cgit v1.2.3


From 2cbeb41913e639890bf2c6485d7bdc6a25fdfd56 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Aug 2019 19:05:43 +0300
Subject: mm/hmm: remove the unused vma argument to hmm_range_dma_unmap

Link: https://lore.kernel.org/r/20190806160554.14046-6-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/hmm.h | 1 -
 mm/hmm.c            | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 82265118d94a..59be0aa2476d 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -422,7 +422,6 @@ long hmm_range_dma_map(struct hmm_range *range,
 		       dma_addr_t *daddrs,
 		       unsigned int flags);
 long hmm_range_dma_unmap(struct hmm_range *range,
-			 struct vm_area_struct *vma,
 			 struct device *device,
 			 dma_addr_t *daddrs,
 			 bool dirty);
diff --git a/mm/hmm.c b/mm/hmm.c
index 9a908902e4cc..160ecb47d04b 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1129,7 +1129,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
 /**
  * hmm_range_dma_unmap() - unmap range of that was map with hmm_range_dma_map()
  * @range: range being unmapped
- * @vma: the vma against which the range (optional)
  * @device: device against which dma map was done
  * @daddrs: dma address of mapped pages
  * @dirty: dirty page if it had the write flag set
@@ -1141,7 +1140,6 @@ EXPORT_SYMBOL(hmm_range_dma_map);
  * concurrent mmu notifier or sync_cpu_device_pagetables() to make progress.
  */
 long hmm_range_dma_unmap(struct hmm_range *range,
-			 struct vm_area_struct *vma,
 			 struct device *device,
 			 dma_addr_t *daddrs,
 			 bool dirty)
-- 
cgit v1.2.3


From fac555ac93d453a0d2265eef88bf4c249dd63e07 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Aug 2019 19:05:44 +0300
Subject: mm/hmm: remove superfluous arguments from hmm_range_register

The start, end and page_shift values are all saved in the range structure,
so we might as well use that for argument passing.

Link: https://lore.kernel.org/r/20190806160554.14046-7-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst                |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  7 +++++--
 drivers/gpu/drm/nouveau/nouveau_svm.c   |  5 ++---
 include/linux/hmm.h                     |  6 +-----
 mm/hmm.c                                | 20 +++++---------------
 5 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index ddcb5ca8b296..e63c11f7e0e0 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -222,7 +222,7 @@ The usage pattern is::
       range.flags = ...;
       range.values = ...;
       range.pfn_shift = ...;
-      hmm_range_register(&range);
+      hmm_range_register(&range, mirror);
 
       /*
        * Just wait for range to be valid, safe to ignore return value as we
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index f0821638bbc6..71d6e7087b0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -818,8 +818,11 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 				0 : range->flags[HMM_PFN_WRITE];
 	range->pfn_flags_mask = 0;
 	range->pfns = pfns;
-	hmm_range_register(range, mirror, start,
-			   start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT);
+	range->page_shift = PAGE_SHIFT;
+	range->start = start;
+	range->end = start + ttm->num_pages * PAGE_SIZE;
+
+	hmm_range_register(range, mirror);
 
 	/*
 	 * Just wait for range to be valid, safe to ignore return value as we
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 98072fd48cf7..41fad4719ac6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -492,9 +492,7 @@ nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
 	range->default_flags = 0;
 	range->pfn_flags_mask = -1UL;
 
-	ret = hmm_range_register(range, &svmm->mirror,
-				 range->start, range->end,
-				 PAGE_SHIFT);
+	ret = hmm_range_register(range, &svmm->mirror);
 	if (ret) {
 		up_read(&svmm->mm->mmap_sem);
 		return (int)ret;
@@ -682,6 +680,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			 args.i.p.addr + args.i.p.size, fn - fi);
 
 		/* Have HMM fault pages within the fault window to the GPU. */
+		range.page_shift = PAGE_SHIFT;
 		range.start = args.i.p.addr;
 		range.end = args.i.p.addr + args.i.p.size;
 		range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 59be0aa2476d..c5b51376b453 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -400,11 +400,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror);
 /*
  * Please see Documentation/vm/hmm.rst for how to use the range API.
  */
-int hmm_range_register(struct hmm_range *range,
-		       struct hmm_mirror *mirror,
-		       unsigned long start,
-		       unsigned long end,
-		       unsigned page_shift);
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror);
 void hmm_range_unregister(struct hmm_range *range);
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index 160ecb47d04b..ab22fdb655c8 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -850,35 +850,25 @@ static void hmm_pfns_clear(struct hmm_range *range,
  * hmm_range_register() - start tracking change to CPU page table over a range
  * @range: range
  * @mm: the mm struct for the range of virtual address
- * @start: start virtual address (inclusive)
- * @end: end virtual address (exclusive)
- * @page_shift: expect page shift for the range
+ *
  * Return: 0 on success, -EFAULT if the address space is no longer valid
  *
  * Track updates to the CPU page table see include/linux/hmm.h
  */
-int hmm_range_register(struct hmm_range *range,
-		       struct hmm_mirror *mirror,
-		       unsigned long start,
-		       unsigned long end,
-		       unsigned page_shift)
+int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
 {
-	unsigned long mask = ((1UL << page_shift) - 1UL);
+	unsigned long mask = ((1UL << range->page_shift) - 1UL);
 	struct hmm *hmm = mirror->hmm;
 	unsigned long flags;
 
 	range->valid = false;
 	range->hmm = NULL;
 
-	if ((start & mask) || (end & mask))
+	if ((range->start & mask) || (range->end & mask))
 		return -EINVAL;
-	if (start >= end)
+	if (range->start >= range->end)
 		return -EINVAL;
 
-	range->page_shift = page_shift;
-	range->start = start;
-	range->end = end;
-
 	/* Prevent hmm_release() from running while the range is valid */
 	if (!mmget_not_zero(hmm->mm))
 		return -EFAULT;
-- 
cgit v1.2.3


From 7f08263d9bc6627382da14f9e81d643d0329d5d1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Aug 2019 19:05:45 +0300
Subject: mm/hmm: remove the page_shift member from struct hmm_range

All users pass PAGE_SIZE here, and if we wanted to support single entries
for huge pages we should really just add a HMM_FAULT_HUGEPAGE flag instead
that uses the huge page size instead of having the caller calculate that
size once, just for the hmm code to verify it.

Link: https://lore.kernel.org/r/20190806160554.14046-8-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  1 -
 drivers/gpu/drm/nouveau/nouveau_svm.c   |  1 -
 include/linux/hmm.h                     | 22 -----------------
 mm/hmm.c                                | 42 +++++++--------------------------
 4 files changed, 9 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 71d6e7087b0b..8bf79288c4e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -818,7 +818,6 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 				0 : range->flags[HMM_PFN_WRITE];
 	range->pfn_flags_mask = 0;
 	range->pfns = pfns;
-	range->page_shift = PAGE_SHIFT;
 	range->start = start;
 	range->end = start + ttm->num_pages * PAGE_SIZE;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 41fad4719ac6..668d4bd0c118 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -680,7 +680,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
 			 args.i.p.addr + args.i.p.size, fn - fi);
 
 		/* Have HMM fault pages within the fault window to the GPU. */
-		range.page_shift = PAGE_SHIFT;
 		range.start = args.i.p.addr;
 		range.end = args.i.p.addr + args.i.p.size;
 		range.pfns = args.phys;
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index c5b51376b453..51e18fbb8953 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -158,7 +158,6 @@ enum hmm_pfn_value_e {
  * @values: pfn value for some special case (none, special, error, ...)
  * @default_flags: default flags for the range (write, read, ... see hmm doc)
  * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- * @page_shift: device virtual address shift value (should be >= PAGE_SHIFT)
  * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
  * @valid: pfns array did not change since it has been fill by an HMM function
  */
@@ -172,31 +171,10 @@ struct hmm_range {
 	const uint64_t		*values;
 	uint64_t		default_flags;
 	uint64_t		pfn_flags_mask;
-	uint8_t			page_shift;
 	uint8_t			pfn_shift;
 	bool			valid;
 };
 
-/*
- * hmm_range_page_shift() - return the page shift for the range
- * @range: range being queried
- * Return: page shift (page size = 1 << page shift) for the range
- */
-static inline unsigned hmm_range_page_shift(const struct hmm_range *range)
-{
-	return range->page_shift;
-}
-
-/*
- * hmm_range_page_size() - return the page size for the range
- * @range: range being queried
- * Return: page size for the range in bytes
- */
-static inline unsigned long hmm_range_page_size(const struct hmm_range *range)
-{
-	return 1UL << hmm_range_page_shift(range);
-}
-
 /*
  * hmm_range_wait_until_valid() - wait for range to be valid
  * @range: range affected by invalidation to wait on
diff --git a/mm/hmm.c b/mm/hmm.c
index ab22fdb655c8..9e0052e037fe 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -345,13 +345,12 @@ static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	uint64_t *pfns = range->pfns;
-	unsigned long i, page_size;
+	unsigned long i;
 
 	hmm_vma_walk->last = addr;
-	page_size = hmm_range_page_size(range);
-	i = (addr - range->start) >> range->page_shift;
+	i = (addr - range->start) >> PAGE_SHIFT;
 
-	for (; addr < end; addr += page_size, i++) {
+	for (; addr < end; addr += PAGE_SIZE, i++) {
 		pfns[i] = range->values[HMM_PFN_NONE];
 		if (fault || write_fault) {
 			int ret;
@@ -779,7 +778,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 				      struct mm_walk *walk)
 {
 #ifdef CONFIG_HUGETLB_PAGE
-	unsigned long addr = start, i, pfn, mask, size, pfn_inc;
+	unsigned long addr = start, i, pfn, mask;
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	struct vm_area_struct *vma = walk->vma;
@@ -790,24 +789,12 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 	pte_t entry;
 	int ret = 0;
 
-	size = huge_page_size(h);
-	mask = size - 1;
-	if (range->page_shift != PAGE_SHIFT) {
-		/* Make sure we are looking at a full page. */
-		if (start & mask)
-			return -EINVAL;
-		if (end < (start + size))
-			return -EINVAL;
-		pfn_inc = size >> PAGE_SHIFT;
-	} else {
-		pfn_inc = 1;
-		size = PAGE_SIZE;
-	}
+	mask = huge_page_size(h) - 1;
 
 	ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
 	entry = huge_ptep_get(pte);
 
-	i = (start - range->start) >> range->page_shift;
+	i = (start - range->start) >> PAGE_SHIFT;
 	orig_pfn = range->pfns[i];
 	range->pfns[i] = range->values[HMM_PFN_NONE];
 	cpu_flags = pte_to_hmm_pfn_flags(range, entry);
@@ -819,8 +806,8 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
 		goto unlock;
 	}
 
-	pfn = pte_pfn(entry) + ((start & mask) >> range->page_shift);
-	for (; addr < end; addr += size, i++, pfn += pfn_inc)
+	pfn = pte_pfn(entry) + ((start & mask) >> PAGE_SHIFT);
+	for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
 		range->pfns[i] = hmm_device_entry_from_pfn(range, pfn) |
 				 cpu_flags;
 	hmm_vma_walk->last = end;
@@ -857,14 +844,13 @@ static void hmm_pfns_clear(struct hmm_range *range,
  */
 int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
 {
-	unsigned long mask = ((1UL << range->page_shift) - 1UL);
 	struct hmm *hmm = mirror->hmm;
 	unsigned long flags;
 
 	range->valid = false;
 	range->hmm = NULL;
 
-	if ((range->start & mask) || (range->end & mask))
+	if ((range->start & (PAGE_SIZE - 1)) || (range->end & (PAGE_SIZE - 1)))
 		return -EINVAL;
 	if (range->start >= range->end)
 		return -EINVAL;
@@ -971,16 +957,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			if (huge_page_shift(hstate_vma(vma)) !=
-			    range->page_shift &&
-			    range->page_shift != PAGE_SHIFT)
-				return -EINVAL;
-		} else {
-			if (range->page_shift != PAGE_SHIFT)
-				return -EINVAL;
-		}
-
 		if (!(vma->vm_flags & VM_READ)) {
 			/*
 			 * If vma do not allow read access, then assume that it
-- 
cgit v1.2.3


From 94f3e14e00fd43024b1c4d8e0c1e442db9b4d964 Mon Sep 17 00:00:00 2001
From: Chuhong Yuan <hslester96@gmail.com>
Date: Tue, 6 Aug 2019 09:59:50 +0800
Subject: mlx5: Use refcount_t for refcount

Reference counters are preferred to use refcount_t instead of
atomic_t.
This is because the implementation of refcount_t can prevent
overflows and detect possible use-after-free.
So convert atomic_t ref counters to refcount_t.

Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/srq_cmd.c         | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 6 +++---
 include/linux/mlx5/driver.h                  | 3 ++-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index b0d0687c7a68..8fc3630a9d4c 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -86,7 +86,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
 	xa_lock(&table->array);
 	srq = xa_load(&table->array, srqn);
 	if (srq)
-		atomic_inc(&srq->common.refcount);
+		refcount_inc(&srq->common.refcount);
 	xa_unlock(&table->array);
 
 	return srq;
@@ -592,7 +592,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
 	if (err)
 		return err;
 
-	atomic_set(&srq->common.refcount, 1);
+	refcount_set(&srq->common.refcount, 1);
 	init_completion(&srq->common.free);
 
 	err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL));
@@ -675,7 +675,7 @@ static int srq_event_notifier(struct notifier_block *nb,
 	xa_lock(&table->array);
 	srq = xa_load(&table->array, srqn);
 	if (srq)
-		atomic_inc(&srq->common.refcount);
+		refcount_inc(&srq->common.refcount);
 	xa_unlock(&table->array);
 
 	if (!srq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index b8ba74de9555..7b44d1e49604 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -53,7 +53,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
 
 	common = radix_tree_lookup(&table->tree, rsn);
 	if (common)
-		atomic_inc(&common->refcount);
+		refcount_inc(&common->refcount);
 
 	spin_unlock_irqrestore(&table->lock, flags);
 
@@ -62,7 +62,7 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
 
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
 {
-	if (atomic_dec_and_test(&common->refcount))
+	if (refcount_dec_and_test(&common->refcount))
 		complete(&common->free);
 }
 
@@ -209,7 +209,7 @@ static int create_resource_common(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
-	atomic_set(&qp->common.refcount, 1);
+	refcount_set(&qp->common.refcount, 1);
 	init_completion(&qp->common.free);
 	qp->pid = current->pid;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 267b2bc0ca4a..0acd28f2e62c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -47,6 +47,7 @@
 #include <linux/interrupt.h>
 #include <linux/idr.h>
 #include <linux/notifier.h>
+#include <linux/refcount.h>
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
@@ -398,7 +399,7 @@ enum mlx5_res_type {
 
 struct mlx5_core_rsc_common {
 	enum mlx5_res_type	res;
-	atomic_t		refcount;
+	refcount_t		refcount;
 	struct completion	free;
 };
 
-- 
cgit v1.2.3


From e5ee331ebcf33827d1bd64e984c565b23cf53227 Mon Sep 17 00:00:00 2001
From: Deepak Katragadda <dkatraga@codeaurora.org>
Date: Mon, 22 Jul 2019 13:13:47 +0530
Subject: dt-bindings: clock: Document gcc bindings for SM8150

Document the global clock controller found on SM8150.

Signed-off-by: Deepak Katragadda <dkatraga@codeaurora.org>
Signed-off-by: Taniya Das <tdas@codeaurora.org>
[vkoul: port to upstream and add external clocks
	split binding to this patch]]
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Link: https://lkml.kernel.org/r/20190722074348.29582-5-vkoul@kernel.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 .../devicetree/bindings/clock/qcom,gcc.txt         |  21 ++
 include/dt-bindings/clock/qcom,gcc-sm8150.h        | 243 +++++++++++++++++++++
 2 files changed, 264 insertions(+)
 create mode 100644 include/dt-bindings/clock/qcom,gcc-sm8150.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 8661c3cd3ccf..d14362ad4132 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -23,6 +23,7 @@ Required properties :
 			"qcom,gcc-sdm630"
 			"qcom,gcc-sdm660"
 			"qcom,gcc-sdm845"
+			"qcom,gcc-sm8150"
 
 - reg : shall contain base register location and length
 - #clock-cells : shall contain 1
@@ -38,6 +39,13 @@ Documentation/devicetree/bindings/thermal/qcom-tsens.txt
 - protected-clocks : Protected clock specifier list as per common clock
  binding.
 
+For SM8150 only:
+       - clocks: a list of phandles and clock-specifier pairs,
+                 one for each entry in clock-names.
+       - clock-names: "bi_tcxo" (required)
+                      "sleep_clk" (optional)
+                      "aud_ref_clock" (optional)
+
 Example:
 	clock-controller@900000 {
 		compatible = "qcom,gcc-msm8960";
@@ -71,3 +79,16 @@ Example of GCC with protected-clocks properties:
 				   <GCC_LPASS_Q6_AXI_CLK>,
 				   <GCC_LPASS_SWAY_CLK>;
 	};
+
+Example of GCC with clocks
+	gcc: clock-controller@100000 {
+		compatible = "qcom,gcc-sm8150";
+		reg = <0x00100000 0x1f0000>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+		#power-domain-cells = <1>;
+		clock-names = "bi_tcxo",
+		              "sleep_clk";
+		clocks = <&rpmcc RPM_SMD_XO_CLK_SRC>,
+			 <&sleep_clk>;
+	};
diff --git a/include/dt-bindings/clock/qcom,gcc-sm8150.h b/include/dt-bindings/clock/qcom,gcc-sm8150.h
new file mode 100644
index 000000000000..90d60ef94c64
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-sm8150.h
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _DT_BINDINGS_CLK_QCOM_GCC_SM8150_H
+#define _DT_BINDINGS_CLK_QCOM_GCC_SM8150_H
+
+/* GCC clocks */
+#define GCC_AGGRE_NOC_PCIE_TBU_CLK				0
+#define GCC_AGGRE_UFS_CARD_AXI_CLK				1
+#define GCC_AGGRE_UFS_CARD_AXI_HW_CTL_CLK			2
+#define GCC_AGGRE_UFS_PHY_AXI_CLK				3
+#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK			4
+#define GCC_AGGRE_USB3_PRIM_AXI_CLK				5
+#define GCC_AGGRE_USB3_SEC_AXI_CLK				6
+#define GCC_BOOT_ROM_AHB_CLK					7
+#define GCC_CAMERA_AHB_CLK					8
+#define GCC_CAMERA_HF_AXI_CLK					9
+#define GCC_CAMERA_SF_AXI_CLK					10
+#define GCC_CAMERA_XO_CLK					11
+#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK				12
+#define GCC_CFG_NOC_USB3_SEC_AXI_CLK				13
+#define GCC_CPUSS_AHB_CLK					14
+#define GCC_CPUSS_AHB_CLK_SRC					15
+#define GCC_CPUSS_DVM_BUS_CLK					16
+#define GCC_CPUSS_GNOC_CLK					17
+#define GCC_CPUSS_RBCPR_CLK					18
+#define GCC_DDRSS_GPU_AXI_CLK					19
+#define GCC_DISP_AHB_CLK					20
+#define GCC_DISP_HF_AXI_CLK					21
+#define GCC_DISP_SF_AXI_CLK					22
+#define GCC_DISP_XO_CLK						23
+#define GCC_EMAC_AXI_CLK					24
+#define GCC_EMAC_PTP_CLK					25
+#define GCC_EMAC_PTP_CLK_SRC					26
+#define GCC_EMAC_RGMII_CLK					27
+#define GCC_EMAC_RGMII_CLK_SRC					28
+#define GCC_EMAC_SLV_AHB_CLK					29
+#define GCC_GP1_CLK						30
+#define GCC_GP1_CLK_SRC						31
+#define GCC_GP2_CLK						32
+#define GCC_GP2_CLK_SRC						33
+#define GCC_GP3_CLK						34
+#define GCC_GP3_CLK_SRC						35
+#define GCC_GPU_CFG_AHB_CLK					36
+#define GCC_GPU_GPLL0_CLK_SRC					37
+#define GCC_GPU_GPLL0_DIV_CLK_SRC				38
+#define GCC_GPU_IREF_CLK					39
+#define GCC_GPU_MEMNOC_GFX_CLK					40
+#define GCC_GPU_SNOC_DVM_GFX_CLK				41
+#define GCC_NPU_AT_CLK						42
+#define GCC_NPU_AXI_CLK						43
+#define GCC_NPU_CFG_AHB_CLK					44
+#define GCC_NPU_GPLL0_CLK_SRC					45
+#define GCC_NPU_GPLL0_DIV_CLK_SRC				46
+#define GCC_NPU_TRIG_CLK					47
+#define GCC_PCIE0_PHY_REFGEN_CLK				48
+#define GCC_PCIE1_PHY_REFGEN_CLK				49
+#define GCC_PCIE_0_AUX_CLK					50
+#define GCC_PCIE_0_AUX_CLK_SRC					51
+#define GCC_PCIE_0_CFG_AHB_CLK					52
+#define GCC_PCIE_0_CLKREF_CLK					53
+#define GCC_PCIE_0_MSTR_AXI_CLK					54
+#define GCC_PCIE_0_PIPE_CLK					55
+#define GCC_PCIE_0_SLV_AXI_CLK					56
+#define GCC_PCIE_0_SLV_Q2A_AXI_CLK				57
+#define GCC_PCIE_1_AUX_CLK					58
+#define GCC_PCIE_1_AUX_CLK_SRC					59
+#define GCC_PCIE_1_CFG_AHB_CLK					60
+#define GCC_PCIE_1_CLKREF_CLK					61
+#define GCC_PCIE_1_MSTR_AXI_CLK					62
+#define GCC_PCIE_1_PIPE_CLK					63
+#define GCC_PCIE_1_SLV_AXI_CLK					64
+#define GCC_PCIE_1_SLV_Q2A_AXI_CLK				65
+#define GCC_PCIE_PHY_AUX_CLK					66
+#define GCC_PCIE_PHY_REFGEN_CLK_SRC				67
+#define GCC_PDM2_CLK						68
+#define GCC_PDM2_CLK_SRC					69
+#define GCC_PDM_AHB_CLK						70
+#define GCC_PDM_XO4_CLK						71
+#define GCC_PRNG_AHB_CLK					72
+#define GCC_QMIP_CAMERA_NRT_AHB_CLK				73
+#define GCC_QMIP_CAMERA_RT_AHB_CLK				74
+#define GCC_QMIP_DISP_AHB_CLK					75
+#define GCC_QMIP_VIDEO_CVP_AHB_CLK				76
+#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK				77
+#define GCC_QSPI_CNOC_PERIPH_AHB_CLK				78
+#define GCC_QSPI_CORE_CLK					79
+#define GCC_QSPI_CORE_CLK_SRC					80
+#define GCC_QUPV3_WRAP0_S0_CLK					81
+#define GCC_QUPV3_WRAP0_S0_CLK_SRC				82
+#define GCC_QUPV3_WRAP0_S1_CLK					83
+#define GCC_QUPV3_WRAP0_S1_CLK_SRC				84
+#define GCC_QUPV3_WRAP0_S2_CLK					85
+#define GCC_QUPV3_WRAP0_S2_CLK_SRC				86
+#define GCC_QUPV3_WRAP0_S3_CLK					87
+#define GCC_QUPV3_WRAP0_S3_CLK_SRC				88
+#define GCC_QUPV3_WRAP0_S4_CLK					89
+#define GCC_QUPV3_WRAP0_S4_CLK_SRC				90
+#define GCC_QUPV3_WRAP0_S5_CLK					91
+#define GCC_QUPV3_WRAP0_S5_CLK_SRC				92
+#define GCC_QUPV3_WRAP0_S6_CLK					93
+#define GCC_QUPV3_WRAP0_S6_CLK_SRC				94
+#define GCC_QUPV3_WRAP0_S7_CLK					95
+#define GCC_QUPV3_WRAP0_S7_CLK_SRC				96
+#define GCC_QUPV3_WRAP1_S0_CLK					97
+#define GCC_QUPV3_WRAP1_S0_CLK_SRC				98
+#define GCC_QUPV3_WRAP1_S1_CLK					99
+#define GCC_QUPV3_WRAP1_S1_CLK_SRC				100
+#define GCC_QUPV3_WRAP1_S2_CLK					101
+#define GCC_QUPV3_WRAP1_S2_CLK_SRC				102
+#define GCC_QUPV3_WRAP1_S3_CLK					103
+#define GCC_QUPV3_WRAP1_S3_CLK_SRC				104
+#define GCC_QUPV3_WRAP1_S4_CLK					105
+#define GCC_QUPV3_WRAP1_S4_CLK_SRC				106
+#define GCC_QUPV3_WRAP1_S5_CLK					107
+#define GCC_QUPV3_WRAP1_S5_CLK_SRC				108
+#define GCC_QUPV3_WRAP2_S0_CLK					109
+#define GCC_QUPV3_WRAP2_S0_CLK_SRC				110
+#define GCC_QUPV3_WRAP2_S1_CLK					111
+#define GCC_QUPV3_WRAP2_S1_CLK_SRC				112
+#define GCC_QUPV3_WRAP2_S2_CLK					113
+#define GCC_QUPV3_WRAP2_S2_CLK_SRC				114
+#define GCC_QUPV3_WRAP2_S3_CLK					115
+#define GCC_QUPV3_WRAP2_S3_CLK_SRC				116
+#define GCC_QUPV3_WRAP2_S4_CLK					117
+#define GCC_QUPV3_WRAP2_S4_CLK_SRC				118
+#define GCC_QUPV3_WRAP2_S5_CLK					119
+#define GCC_QUPV3_WRAP2_S5_CLK_SRC				120
+#define GCC_QUPV3_WRAP_0_M_AHB_CLK				121
+#define GCC_QUPV3_WRAP_0_S_AHB_CLK				122
+#define GCC_QUPV3_WRAP_1_M_AHB_CLK				123
+#define GCC_QUPV3_WRAP_1_S_AHB_CLK				124
+#define GCC_QUPV3_WRAP_2_M_AHB_CLK				125
+#define GCC_QUPV3_WRAP_2_S_AHB_CLK				126
+#define GCC_SDCC2_AHB_CLK					127
+#define GCC_SDCC2_APPS_CLK					128
+#define GCC_SDCC2_APPS_CLK_SRC					129
+#define GCC_SDCC4_AHB_CLK					130
+#define GCC_SDCC4_APPS_CLK					131
+#define GCC_SDCC4_APPS_CLK_SRC					132
+#define GCC_SYS_NOC_CPUSS_AHB_CLK				133
+#define GCC_TSIF_AHB_CLK					134
+#define GCC_TSIF_INACTIVITY_TIMERS_CLK				135
+#define GCC_TSIF_REF_CLK					136
+#define GCC_TSIF_REF_CLK_SRC					137
+#define GCC_UFS_CARD_AHB_CLK					138
+#define GCC_UFS_CARD_AXI_CLK					139
+#define GCC_UFS_CARD_AXI_CLK_SRC				140
+#define GCC_UFS_CARD_AXI_HW_CTL_CLK				141
+#define GCC_UFS_CARD_CLKREF_CLK					142
+#define GCC_UFS_CARD_ICE_CORE_CLK				143
+#define GCC_UFS_CARD_ICE_CORE_CLK_SRC				144
+#define GCC_UFS_CARD_ICE_CORE_HW_CTL_CLK			145
+#define GCC_UFS_CARD_PHY_AUX_CLK				146
+#define GCC_UFS_CARD_PHY_AUX_CLK_SRC				147
+#define GCC_UFS_CARD_PHY_AUX_HW_CTL_CLK				148
+#define GCC_UFS_CARD_RX_SYMBOL_0_CLK				149
+#define GCC_UFS_CARD_RX_SYMBOL_1_CLK				150
+#define GCC_UFS_CARD_TX_SYMBOL_0_CLK				151
+#define GCC_UFS_CARD_UNIPRO_CORE_CLK				152
+#define GCC_UFS_CARD_UNIPRO_CORE_CLK_SRC			153
+#define GCC_UFS_CARD_UNIPRO_CORE_HW_CTL_CLK			154
+#define GCC_UFS_MEM_CLKREF_CLK					155
+#define GCC_UFS_PHY_AHB_CLK					156
+#define GCC_UFS_PHY_AXI_CLK					157
+#define GCC_UFS_PHY_AXI_CLK_SRC					158
+#define GCC_UFS_PHY_AXI_HW_CTL_CLK				159
+#define GCC_UFS_PHY_ICE_CORE_CLK				160
+#define GCC_UFS_PHY_ICE_CORE_CLK_SRC				161
+#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK				162
+#define GCC_UFS_PHY_PHY_AUX_CLK					163
+#define GCC_UFS_PHY_PHY_AUX_CLK_SRC				164
+#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK				165
+#define GCC_UFS_PHY_RX_SYMBOL_0_CLK				166
+#define GCC_UFS_PHY_RX_SYMBOL_1_CLK				167
+#define GCC_UFS_PHY_TX_SYMBOL_0_CLK				168
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK				169
+#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC				170
+#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK			171
+#define GCC_USB30_PRIM_MASTER_CLK				172
+#define GCC_USB30_PRIM_MASTER_CLK_SRC				173
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK				174
+#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC			175
+#define GCC_USB30_PRIM_SLEEP_CLK				176
+#define GCC_USB30_SEC_MASTER_CLK				177
+#define GCC_USB30_SEC_MASTER_CLK_SRC				178
+#define GCC_USB30_SEC_MOCK_UTMI_CLK				179
+#define GCC_USB30_SEC_MOCK_UTMI_CLK_SRC				180
+#define GCC_USB30_SEC_SLEEP_CLK					181
+#define GCC_USB3_PRIM_CLKREF_CLK				182
+#define GCC_USB3_PRIM_PHY_AUX_CLK				183
+#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC				184
+#define GCC_USB3_PRIM_PHY_COM_AUX_CLK				185
+#define GCC_USB3_PRIM_PHY_PIPE_CLK				186
+#define GCC_USB3_SEC_CLKREF_CLK					187
+#define GCC_USB3_SEC_PHY_AUX_CLK				188
+#define GCC_USB3_SEC_PHY_AUX_CLK_SRC				189
+#define GCC_USB3_SEC_PHY_COM_AUX_CLK				190
+#define GCC_USB3_SEC_PHY_PIPE_CLK				191
+#define GCC_VIDEO_AHB_CLK					192
+#define GCC_VIDEO_AXI0_CLK					193
+#define GCC_VIDEO_AXI1_CLK					194
+#define GCC_VIDEO_AXIC_CLK					195
+#define GCC_VIDEO_XO_CLK					196
+#define GPLL0							197
+#define GPLL0_OUT_EVEN						198
+#define GPLL7							199
+#define GPLL9							200
+
+/* Reset clocks */
+#define GCC_EMAC_BCR						0
+#define GCC_GPU_BCR						1
+#define GCC_MMSS_BCR						2
+#define GCC_NPU_BCR						3
+#define GCC_PCIE_0_BCR						4
+#define GCC_PCIE_0_PHY_BCR					5
+#define GCC_PCIE_1_BCR						6
+#define GCC_PCIE_1_PHY_BCR					7
+#define GCC_PCIE_PHY_BCR					8
+#define GCC_PDM_BCR						9
+#define GCC_PRNG_BCR						10
+#define GCC_QSPI_BCR						11
+#define GCC_QUPV3_WRAPPER_0_BCR					12
+#define GCC_QUPV3_WRAPPER_1_BCR					13
+#define GCC_QUPV3_WRAPPER_2_BCR					14
+#define GCC_QUSB2PHY_PRIM_BCR					15
+#define GCC_QUSB2PHY_SEC_BCR					16
+#define GCC_USB3_PHY_PRIM_BCR					17
+#define GCC_USB3_DP_PHY_PRIM_BCR				18
+#define GCC_USB3_PHY_SEC_BCR					19
+#define GCC_USB3PHY_PHY_SEC_BCR					20
+#define GCC_SDCC2_BCR						21
+#define GCC_SDCC4_BCR						22
+#define GCC_TSIF_BCR						23
+#define GCC_UFS_CARD_BCR					24
+#define GCC_UFS_PHY_BCR						25
+#define GCC_USB30_PRIM_BCR					26
+#define GCC_USB30_SEC_BCR					27
+#define GCC_USB_PHY_CFG_AHB2PHY_BCR				28
+
+#endif
-- 
cgit v1.2.3


From dccc96abfb21dc19d69e707c38c8ba439bba7160 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 1 Aug 2019 15:38:14 -0700
Subject: scsi: core: Reduce memory required for SCSI logging

The data structure used for log messages is so large that it can cause a
boot failure. Since allocations from that data structure can fail anyway,
use kmalloc() / kfree() instead of that data structure.

See also https://bugzilla.kernel.org/show_bug.cgi?id=204119.
See also commit ded85c193a39 ("scsi: Implement per-cpu logging buffer") # v4.0.

Reported-by: Jan Palus <jpalus@fastmail.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jan Palus <jpalus@fastmail.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_logging.c | 48 +++------------------------------------------
 include/scsi/scsi_dbg.h     |  2 --
 2 files changed, 3 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_logging.c b/drivers/scsi/scsi_logging.c
index 39b8cc4574b4..c6ed0b12e807 100644
--- a/drivers/scsi/scsi_logging.c
+++ b/drivers/scsi/scsi_logging.c
@@ -15,57 +15,15 @@
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_dbg.h>
 
-#define SCSI_LOG_SPOOLSIZE 4096
-
-#if (SCSI_LOG_SPOOLSIZE / SCSI_LOG_BUFSIZE) > BITS_PER_LONG
-#warning SCSI logging bitmask too large
-#endif
-
-struct scsi_log_buf {
-	char buffer[SCSI_LOG_SPOOLSIZE];
-	unsigned long map;
-};
-
-static DEFINE_PER_CPU(struct scsi_log_buf, scsi_format_log);
-
 static char *scsi_log_reserve_buffer(size_t *len)
 {
-	struct scsi_log_buf *buf;
-	unsigned long map_bits = sizeof(buf->buffer) / SCSI_LOG_BUFSIZE;
-	unsigned long idx = 0;
-
-	preempt_disable();
-	buf = this_cpu_ptr(&scsi_format_log);
-	idx = find_first_zero_bit(&buf->map, map_bits);
-	if (likely(idx < map_bits)) {
-		while (test_and_set_bit(idx, &buf->map)) {
-			idx = find_next_zero_bit(&buf->map, map_bits, idx);
-			if (idx >= map_bits)
-				break;
-		}
-	}
-	if (WARN_ON(idx >= map_bits)) {
-		preempt_enable();
-		return NULL;
-	}
-	*len = SCSI_LOG_BUFSIZE;
-	return buf->buffer + idx * SCSI_LOG_BUFSIZE;
+	*len = 128;
+	return kmalloc(*len, GFP_ATOMIC);
 }
 
 static void scsi_log_release_buffer(char *bufptr)
 {
-	struct scsi_log_buf *buf;
-	unsigned long idx;
-	int ret;
-
-	buf = this_cpu_ptr(&scsi_format_log);
-	if (bufptr >= buf->buffer &&
-	    bufptr < buf->buffer + SCSI_LOG_SPOOLSIZE) {
-		idx = (bufptr - buf->buffer) / SCSI_LOG_BUFSIZE;
-		ret = test_and_clear_bit(idx, &buf->map);
-		WARN_ON(!ret);
-	}
-	preempt_enable();
+	kfree(bufptr);
 }
 
 static inline const char *scmd_name(const struct scsi_cmnd *scmd)
diff --git a/include/scsi/scsi_dbg.h b/include/scsi/scsi_dbg.h
index e03bd9d41fa8..7b196d234626 100644
--- a/include/scsi/scsi_dbg.h
+++ b/include/scsi/scsi_dbg.h
@@ -6,8 +6,6 @@ struct scsi_cmnd;
 struct scsi_device;
 struct scsi_sense_hdr;
 
-#define SCSI_LOG_BUFSIZE 128
-
 extern void scsi_print_command(struct scsi_cmnd *);
 extern size_t __scsi_format_command(char *, size_t,
 				   const unsigned char *, size_t);
-- 
cgit v1.2.3


From 570aaec7e9436eae5ffe4588d08e5262731e1b75 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Wed, 17 Jul 2019 07:56:51 -0700
Subject: clk: Constify struct clk_bulk_data * where possible

The following functions:

    - clk_bulk_enable()
    - clk_bulk_prepare()
    - clk_bulk_disable()
    - clk_bulk_unprepare()

already expect const clk_bulk_data * as a second parameter, however
their no-op version have mismatching prototypes that don't. Fix that.

While at it, constify the second argument of clk_bulk_prepare_enable()
and clk_bulk_disable_unprepare(), since the functions they are
comprised of already accept const clk_bulk_data *.

Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Chris Healy <cphealy@gmail.com>
Cc: linux-clk@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: https://lkml.kernel.org/r/20190717145651.17250-1-andrew.smirnov@gmail.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk.h | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/clk.h b/include/linux/clk.h
index 3c096c7a51dc..7a795fd6d141 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -239,7 +239,8 @@ static inline int clk_prepare(struct clk *clk)
 	return 0;
 }
 
-static inline int __must_check clk_bulk_prepare(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check
+clk_bulk_prepare(int num_clks, const struct clk_bulk_data *clks)
 {
 	might_sleep();
 	return 0;
@@ -263,7 +264,8 @@ static inline void clk_unprepare(struct clk *clk)
 {
 	might_sleep();
 }
-static inline void clk_bulk_unprepare(int num_clks, struct clk_bulk_data *clks)
+static inline void clk_bulk_unprepare(int num_clks,
+				      const struct clk_bulk_data *clks)
 {
 	might_sleep();
 }
@@ -819,7 +821,8 @@ static inline int clk_enable(struct clk *clk)
 	return 0;
 }
 
-static inline int __must_check clk_bulk_enable(int num_clks, struct clk_bulk_data *clks)
+static inline int __must_check clk_bulk_enable(int num_clks,
+					       const struct clk_bulk_data *clks)
 {
 	return 0;
 }
@@ -828,7 +831,7 @@ static inline void clk_disable(struct clk *clk) {}
 
 
 static inline void clk_bulk_disable(int num_clks,
-				    struct clk_bulk_data *clks) {}
+				    const struct clk_bulk_data *clks) {}
 
 static inline unsigned long clk_get_rate(struct clk *clk)
 {
@@ -917,8 +920,8 @@ static inline void clk_disable_unprepare(struct clk *clk)
 	clk_unprepare(clk);
 }
 
-static inline int __must_check clk_bulk_prepare_enable(int num_clks,
-					struct clk_bulk_data *clks)
+static inline int __must_check
+clk_bulk_prepare_enable(int num_clks, const struct clk_bulk_data *clks)
 {
 	int ret;
 
@@ -933,7 +936,7 @@ static inline int __must_check clk_bulk_prepare_enable(int num_clks,
 }
 
 static inline void clk_bulk_disable_unprepare(int num_clks,
-					      struct clk_bulk_data *clks)
+					      const struct clk_bulk_data *clks)
 {
 	clk_bulk_disable(num_clks, clks);
 	clk_bulk_unprepare(num_clks, clks);
-- 
cgit v1.2.3


From 71ed79b0e4be0db254640c3beb9a1a0316eb5f61 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 6 Aug 2019 12:15:09 +0200
Subject: USB: Move wusbcore and UWB to staging as it is obsolete

The UWB and wusbcore code is long obsolete, so let us just move the code
out of the real part of the kernel and into the drivers/staging/
location with plans to remove it entirely in a few releases.

Link: https://lore.kernel.org/r/20190806101509.GA11280@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/usb/wusb-cbaf                        |  130 -
 Documentation/usb/wusb-design-overview.rst         |  457 ---
 MAINTAINERS                                        |   15 +-
 drivers/Kconfig                                    |    2 -
 drivers/Makefile                                   |    1 -
 drivers/staging/Kconfig                            |    3 +
 drivers/staging/Makefile                           |    2 +
 drivers/staging/uwb/Kconfig                        |   72 +
 drivers/staging/uwb/Makefile                       |   32 +
 drivers/staging/uwb/TODO                           |    8 +
 drivers/staging/uwb/address.c                      |  352 +++
 drivers/staging/uwb/allocator.c                    |  374 +++
 drivers/staging/uwb/beacon.c                       |  595 ++++
 drivers/staging/uwb/driver.c                       |  143 +
 drivers/staging/uwb/drp-avail.c                    |  278 ++
 drivers/staging/uwb/drp-ie.c                       |  305 ++
 drivers/staging/uwb/drp.c                          |  842 ++++++
 drivers/staging/uwb/est.c                          |  450 +++
 drivers/staging/uwb/hwa-rc.c                       |  929 +++++++
 drivers/staging/uwb/i1480/Makefile                 |    2 +
 drivers/staging/uwb/i1480/dfu/Makefile             |   10 +
 drivers/staging/uwb/i1480/dfu/dfu.c                |  198 ++
 drivers/staging/uwb/i1480/dfu/i1480-dfu.h          |  246 ++
 drivers/staging/uwb/i1480/dfu/mac.c                |  496 ++++
 drivers/staging/uwb/i1480/dfu/phy.c                |  190 ++
 drivers/staging/uwb/i1480/dfu/usb.c                |  448 +++
 drivers/staging/uwb/i1480/i1480-est.c              |   85 +
 drivers/staging/uwb/ie-rcv.c                       |   42 +
 drivers/staging/uwb/ie.c                           |  366 +++
 drivers/staging/uwb/include/debug-cmd.h            |   57 +
 drivers/staging/uwb/include/spec.h                 |  767 +++++
 drivers/staging/uwb/include/umc.h                  |  192 ++
 drivers/staging/uwb/include/whci.h                 |  102 +
 drivers/staging/uwb/lc-dev.c                       |  457 +++
 drivers/staging/uwb/lc-rc.c                        |  569 ++++
 drivers/staging/uwb/neh.c                          |  606 ++++
 drivers/staging/uwb/pal.c                          |  128 +
 drivers/staging/uwb/radio.c                        |  196 ++
 drivers/staging/uwb/reset.c                        |  379 +++
 drivers/staging/uwb/rsv.c                          | 1000 +++++++
 drivers/staging/uwb/scan.c                         |  120 +
 drivers/staging/uwb/umc-bus.c                      |  211 ++
 drivers/staging/uwb/umc-dev.c                      |   94 +
 drivers/staging/uwb/umc-drv.c                      |   31 +
 drivers/staging/uwb/uwb-debug.c                    |  354 +++
 drivers/staging/uwb/uwb-internal.h                 |  366 +++
 drivers/staging/uwb/uwb.h                          |  817 ++++++
 drivers/staging/uwb/uwbd.c                         |  356 +++
 drivers/staging/uwb/whc-rc.c                       |  467 ++++
 drivers/staging/uwb/whci.c                         |  257 ++
 drivers/staging/wusbcore/Documentation/wusb-cbaf   |  130 +
 .../Documentation/wusb-design-overview.rst         |  457 +++
 drivers/staging/wusbcore/Kconfig                   |   39 +
 drivers/staging/wusbcore/Makefile                  |   28 +
 drivers/staging/wusbcore/TODO                      |    8 +
 drivers/staging/wusbcore/cbaf.c                    |  645 +++++
 drivers/staging/wusbcore/crypto.c                  |  441 +++
 drivers/staging/wusbcore/dev-sysfs.c               |  124 +
 drivers/staging/wusbcore/devconnect.c              | 1085 ++++++++
 drivers/staging/wusbcore/host/Kconfig              |   28 +
 drivers/staging/wusbcore/host/Makefile             |    3 +
 drivers/staging/wusbcore/host/hwa-hc.c             |  875 ++++++
 drivers/staging/wusbcore/host/whci/Makefile        |   14 +
 drivers/staging/wusbcore/host/whci/asl.c           |  376 +++
 drivers/staging/wusbcore/host/whci/debug.c         |  153 +
 drivers/staging/wusbcore/host/whci/hcd.c           |  356 +++
 drivers/staging/wusbcore/host/whci/hw.c            |   93 +
 drivers/staging/wusbcore/host/whci/init.c          |  177 ++
 drivers/staging/wusbcore/host/whci/int.c           |   82 +
 drivers/staging/wusbcore/host/whci/pzl.c           |  404 +++
 drivers/staging/wusbcore/host/whci/qset.c          |  831 ++++++
 drivers/staging/wusbcore/host/whci/whcd.h          |  202 ++
 drivers/staging/wusbcore/host/whci/whci-hc.h       |  401 +++
 drivers/staging/wusbcore/host/whci/wusb.c          |  210 ++
 drivers/staging/wusbcore/include/association.h     |  151 +
 drivers/staging/wusbcore/include/wusb-wa.h         |  304 ++
 drivers/staging/wusbcore/include/wusb.h            |  362 +++
 drivers/staging/wusbcore/mmc.c                     |  303 ++
 drivers/staging/wusbcore/pal.c                     |   45 +
 drivers/staging/wusbcore/reservation.c             |  110 +
 drivers/staging/wusbcore/rh.c                      |  426 +++
 drivers/staging/wusbcore/security.c                |  599 ++++
 drivers/staging/wusbcore/wa-hc.c                   |   88 +
 drivers/staging/wusbcore/wa-hc.h                   |  467 ++++
 drivers/staging/wusbcore/wa-nep.c                  |  289 ++
 drivers/staging/wusbcore/wa-rpipe.c                |  539 ++++
 drivers/staging/wusbcore/wa-xfer.c                 | 2927 ++++++++++++++++++++
 drivers/staging/wusbcore/wusbhc.c                  |  490 ++++
 drivers/staging/wusbcore/wusbhc.h                  |  487 ++++
 drivers/usb/Kconfig                                |    2 -
 drivers/usb/Makefile                               |    2 -
 drivers/usb/host/Kconfig                           |   26 -
 drivers/usb/host/Makefile                          |    3 -
 drivers/usb/host/hwa-hc.c                          |  875 ------
 drivers/usb/host/whci/Makefile                     |   14 -
 drivers/usb/host/whci/asl.c                        |  376 ---
 drivers/usb/host/whci/debug.c                      |  153 -
 drivers/usb/host/whci/hcd.c                        |  356 ---
 drivers/usb/host/whci/hw.c                         |   93 -
 drivers/usb/host/whci/init.c                       |  177 --
 drivers/usb/host/whci/int.c                        |   82 -
 drivers/usb/host/whci/pzl.c                        |  404 ---
 drivers/usb/host/whci/qset.c                       |  831 ------
 drivers/usb/host/whci/whcd.h                       |  202 --
 drivers/usb/host/whci/whci-hc.h                    |  401 ---
 drivers/usb/host/whci/wusb.c                       |  210 --
 drivers/usb/wusbcore/Kconfig                       |   38 -
 drivers/usb/wusbcore/Makefile                      |   26 -
 drivers/usb/wusbcore/cbaf.c                        |  645 -----
 drivers/usb/wusbcore/crypto.c                      |  441 ---
 drivers/usb/wusbcore/dev-sysfs.c                   |  124 -
 drivers/usb/wusbcore/devconnect.c                  | 1085 --------
 drivers/usb/wusbcore/mmc.c                         |  303 --
 drivers/usb/wusbcore/pal.c                         |   45 -
 drivers/usb/wusbcore/reservation.c                 |  110 -
 drivers/usb/wusbcore/rh.c                          |  426 ---
 drivers/usb/wusbcore/security.c                    |  599 ----
 drivers/usb/wusbcore/wa-hc.c                       |   88 -
 drivers/usb/wusbcore/wa-hc.h                       |  467 ----
 drivers/usb/wusbcore/wa-nep.c                      |  289 --
 drivers/usb/wusbcore/wa-rpipe.c                    |  539 ----
 drivers/usb/wusbcore/wa-xfer.c                     | 2927 --------------------
 drivers/usb/wusbcore/wusbhc.c                      |  490 ----
 drivers/usb/wusbcore/wusbhc.h                      |  487 ----
 drivers/uwb/Kconfig                                |   72 -
 drivers/uwb/Makefile                               |   32 -
 drivers/uwb/address.c                              |  352 ---
 drivers/uwb/allocator.c                            |  374 ---
 drivers/uwb/beacon.c                               |  595 ----
 drivers/uwb/driver.c                               |  143 -
 drivers/uwb/drp-avail.c                            |  278 --
 drivers/uwb/drp-ie.c                               |  305 --
 drivers/uwb/drp.c                                  |  842 ------
 drivers/uwb/est.c                                  |  450 ---
 drivers/uwb/hwa-rc.c                               |  929 -------
 drivers/uwb/i1480/Makefile                         |    2 -
 drivers/uwb/i1480/dfu/Makefile                     |   10 -
 drivers/uwb/i1480/dfu/dfu.c                        |  198 --
 drivers/uwb/i1480/dfu/i1480-dfu.h                  |  246 --
 drivers/uwb/i1480/dfu/mac.c                        |  496 ----
 drivers/uwb/i1480/dfu/phy.c                        |  190 --
 drivers/uwb/i1480/dfu/usb.c                        |  448 ---
 drivers/uwb/i1480/i1480-est.c                      |   85 -
 drivers/uwb/ie-rcv.c                               |   42 -
 drivers/uwb/ie.c                                   |  366 ---
 drivers/uwb/lc-dev.c                               |  457 ---
 drivers/uwb/lc-rc.c                                |  569 ----
 drivers/uwb/neh.c                                  |  606 ----
 drivers/uwb/pal.c                                  |  128 -
 drivers/uwb/radio.c                                |  196 --
 drivers/uwb/reset.c                                |  379 ---
 drivers/uwb/rsv.c                                  | 1000 -------
 drivers/uwb/scan.c                                 |  120 -
 drivers/uwb/umc-bus.c                              |  211 --
 drivers/uwb/umc-dev.c                              |   94 -
 drivers/uwb/umc-drv.c                              |   31 -
 drivers/uwb/uwb-debug.c                            |  355 ---
 drivers/uwb/uwb-internal.h                         |  366 ---
 drivers/uwb/uwbd.c                                 |  356 ---
 drivers/uwb/whc-rc.c                               |  467 ----
 drivers/uwb/whci.c                                 |  257 --
 include/linux/usb/association.h                    |  151 -
 include/linux/usb/wusb-wa.h                        |  304 --
 include/linux/usb/wusb.h                           |  362 ---
 include/linux/uwb.h                                |  817 ------
 include/linux/uwb/debug-cmd.h                      |   57 -
 include/linux/uwb/spec.h                           |  767 -----
 include/linux/uwb/umc.h                            |  192 --
 include/linux/uwb/whci.h                           |  102 -
 169 files changed, 28747 insertions(+), 28736 deletions(-)
 delete mode 100644 Documentation/usb/wusb-cbaf
 delete mode 100644 Documentation/usb/wusb-design-overview.rst
 create mode 100644 drivers/staging/uwb/Kconfig
 create mode 100644 drivers/staging/uwb/Makefile
 create mode 100644 drivers/staging/uwb/TODO
 create mode 100644 drivers/staging/uwb/address.c
 create mode 100644 drivers/staging/uwb/allocator.c
 create mode 100644 drivers/staging/uwb/beacon.c
 create mode 100644 drivers/staging/uwb/driver.c
 create mode 100644 drivers/staging/uwb/drp-avail.c
 create mode 100644 drivers/staging/uwb/drp-ie.c
 create mode 100644 drivers/staging/uwb/drp.c
 create mode 100644 drivers/staging/uwb/est.c
 create mode 100644 drivers/staging/uwb/hwa-rc.c
 create mode 100644 drivers/staging/uwb/i1480/Makefile
 create mode 100644 drivers/staging/uwb/i1480/dfu/Makefile
 create mode 100644 drivers/staging/uwb/i1480/dfu/dfu.c
 create mode 100644 drivers/staging/uwb/i1480/dfu/i1480-dfu.h
 create mode 100644 drivers/staging/uwb/i1480/dfu/mac.c
 create mode 100644 drivers/staging/uwb/i1480/dfu/phy.c
 create mode 100644 drivers/staging/uwb/i1480/dfu/usb.c
 create mode 100644 drivers/staging/uwb/i1480/i1480-est.c
 create mode 100644 drivers/staging/uwb/ie-rcv.c
 create mode 100644 drivers/staging/uwb/ie.c
 create mode 100644 drivers/staging/uwb/include/debug-cmd.h
 create mode 100644 drivers/staging/uwb/include/spec.h
 create mode 100644 drivers/staging/uwb/include/umc.h
 create mode 100644 drivers/staging/uwb/include/whci.h
 create mode 100644 drivers/staging/uwb/lc-dev.c
 create mode 100644 drivers/staging/uwb/lc-rc.c
 create mode 100644 drivers/staging/uwb/neh.c
 create mode 100644 drivers/staging/uwb/pal.c
 create mode 100644 drivers/staging/uwb/radio.c
 create mode 100644 drivers/staging/uwb/reset.c
 create mode 100644 drivers/staging/uwb/rsv.c
 create mode 100644 drivers/staging/uwb/scan.c
 create mode 100644 drivers/staging/uwb/umc-bus.c
 create mode 100644 drivers/staging/uwb/umc-dev.c
 create mode 100644 drivers/staging/uwb/umc-drv.c
 create mode 100644 drivers/staging/uwb/uwb-debug.c
 create mode 100644 drivers/staging/uwb/uwb-internal.h
 create mode 100644 drivers/staging/uwb/uwb.h
 create mode 100644 drivers/staging/uwb/uwbd.c
 create mode 100644 drivers/staging/uwb/whc-rc.c
 create mode 100644 drivers/staging/uwb/whci.c
 create mode 100644 drivers/staging/wusbcore/Documentation/wusb-cbaf
 create mode 100644 drivers/staging/wusbcore/Documentation/wusb-design-overview.rst
 create mode 100644 drivers/staging/wusbcore/Kconfig
 create mode 100644 drivers/staging/wusbcore/Makefile
 create mode 100644 drivers/staging/wusbcore/TODO
 create mode 100644 drivers/staging/wusbcore/cbaf.c
 create mode 100644 drivers/staging/wusbcore/crypto.c
 create mode 100644 drivers/staging/wusbcore/dev-sysfs.c
 create mode 100644 drivers/staging/wusbcore/devconnect.c
 create mode 100644 drivers/staging/wusbcore/host/Kconfig
 create mode 100644 drivers/staging/wusbcore/host/Makefile
 create mode 100644 drivers/staging/wusbcore/host/hwa-hc.c
 create mode 100644 drivers/staging/wusbcore/host/whci/Makefile
 create mode 100644 drivers/staging/wusbcore/host/whci/asl.c
 create mode 100644 drivers/staging/wusbcore/host/whci/debug.c
 create mode 100644 drivers/staging/wusbcore/host/whci/hcd.c
 create mode 100644 drivers/staging/wusbcore/host/whci/hw.c
 create mode 100644 drivers/staging/wusbcore/host/whci/init.c
 create mode 100644 drivers/staging/wusbcore/host/whci/int.c
 create mode 100644 drivers/staging/wusbcore/host/whci/pzl.c
 create mode 100644 drivers/staging/wusbcore/host/whci/qset.c
 create mode 100644 drivers/staging/wusbcore/host/whci/whcd.h
 create mode 100644 drivers/staging/wusbcore/host/whci/whci-hc.h
 create mode 100644 drivers/staging/wusbcore/host/whci/wusb.c
 create mode 100644 drivers/staging/wusbcore/include/association.h
 create mode 100644 drivers/staging/wusbcore/include/wusb-wa.h
 create mode 100644 drivers/staging/wusbcore/include/wusb.h
 create mode 100644 drivers/staging/wusbcore/mmc.c
 create mode 100644 drivers/staging/wusbcore/pal.c
 create mode 100644 drivers/staging/wusbcore/reservation.c
 create mode 100644 drivers/staging/wusbcore/rh.c
 create mode 100644 drivers/staging/wusbcore/security.c
 create mode 100644 drivers/staging/wusbcore/wa-hc.c
 create mode 100644 drivers/staging/wusbcore/wa-hc.h
 create mode 100644 drivers/staging/wusbcore/wa-nep.c
 create mode 100644 drivers/staging/wusbcore/wa-rpipe.c
 create mode 100644 drivers/staging/wusbcore/wa-xfer.c
 create mode 100644 drivers/staging/wusbcore/wusbhc.c
 create mode 100644 drivers/staging/wusbcore/wusbhc.h
 delete mode 100644 drivers/usb/host/hwa-hc.c
 delete mode 100644 drivers/usb/host/whci/Makefile
 delete mode 100644 drivers/usb/host/whci/asl.c
 delete mode 100644 drivers/usb/host/whci/debug.c
 delete mode 100644 drivers/usb/host/whci/hcd.c
 delete mode 100644 drivers/usb/host/whci/hw.c
 delete mode 100644 drivers/usb/host/whci/init.c
 delete mode 100644 drivers/usb/host/whci/int.c
 delete mode 100644 drivers/usb/host/whci/pzl.c
 delete mode 100644 drivers/usb/host/whci/qset.c
 delete mode 100644 drivers/usb/host/whci/whcd.h
 delete mode 100644 drivers/usb/host/whci/whci-hc.h
 delete mode 100644 drivers/usb/host/whci/wusb.c
 delete mode 100644 drivers/usb/wusbcore/Kconfig
 delete mode 100644 drivers/usb/wusbcore/Makefile
 delete mode 100644 drivers/usb/wusbcore/cbaf.c
 delete mode 100644 drivers/usb/wusbcore/crypto.c
 delete mode 100644 drivers/usb/wusbcore/dev-sysfs.c
 delete mode 100644 drivers/usb/wusbcore/devconnect.c
 delete mode 100644 drivers/usb/wusbcore/mmc.c
 delete mode 100644 drivers/usb/wusbcore/pal.c
 delete mode 100644 drivers/usb/wusbcore/reservation.c
 delete mode 100644 drivers/usb/wusbcore/rh.c
 delete mode 100644 drivers/usb/wusbcore/security.c
 delete mode 100644 drivers/usb/wusbcore/wa-hc.c
 delete mode 100644 drivers/usb/wusbcore/wa-hc.h
 delete mode 100644 drivers/usb/wusbcore/wa-nep.c
 delete mode 100644 drivers/usb/wusbcore/wa-rpipe.c
 delete mode 100644 drivers/usb/wusbcore/wa-xfer.c
 delete mode 100644 drivers/usb/wusbcore/wusbhc.c
 delete mode 100644 drivers/usb/wusbcore/wusbhc.h
 delete mode 100644 drivers/uwb/Kconfig
 delete mode 100644 drivers/uwb/Makefile
 delete mode 100644 drivers/uwb/address.c
 delete mode 100644 drivers/uwb/allocator.c
 delete mode 100644 drivers/uwb/beacon.c
 delete mode 100644 drivers/uwb/driver.c
 delete mode 100644 drivers/uwb/drp-avail.c
 delete mode 100644 drivers/uwb/drp-ie.c
 delete mode 100644 drivers/uwb/drp.c
 delete mode 100644 drivers/uwb/est.c
 delete mode 100644 drivers/uwb/hwa-rc.c
 delete mode 100644 drivers/uwb/i1480/Makefile
 delete mode 100644 drivers/uwb/i1480/dfu/Makefile
 delete mode 100644 drivers/uwb/i1480/dfu/dfu.c
 delete mode 100644 drivers/uwb/i1480/dfu/i1480-dfu.h
 delete mode 100644 drivers/uwb/i1480/dfu/mac.c
 delete mode 100644 drivers/uwb/i1480/dfu/phy.c
 delete mode 100644 drivers/uwb/i1480/dfu/usb.c
 delete mode 100644 drivers/uwb/i1480/i1480-est.c
 delete mode 100644 drivers/uwb/ie-rcv.c
 delete mode 100644 drivers/uwb/ie.c
 delete mode 100644 drivers/uwb/lc-dev.c
 delete mode 100644 drivers/uwb/lc-rc.c
 delete mode 100644 drivers/uwb/neh.c
 delete mode 100644 drivers/uwb/pal.c
 delete mode 100644 drivers/uwb/radio.c
 delete mode 100644 drivers/uwb/reset.c
 delete mode 100644 drivers/uwb/rsv.c
 delete mode 100644 drivers/uwb/scan.c
 delete mode 100644 drivers/uwb/umc-bus.c
 delete mode 100644 drivers/uwb/umc-dev.c
 delete mode 100644 drivers/uwb/umc-drv.c
 delete mode 100644 drivers/uwb/uwb-debug.c
 delete mode 100644 drivers/uwb/uwb-internal.h
 delete mode 100644 drivers/uwb/uwbd.c
 delete mode 100644 drivers/uwb/whc-rc.c
 delete mode 100644 drivers/uwb/whci.c
 delete mode 100644 include/linux/usb/association.h
 delete mode 100644 include/linux/usb/wusb-wa.h
 delete mode 100644 include/linux/usb/wusb.h
 delete mode 100644 include/linux/uwb.h
 delete mode 100644 include/linux/uwb/debug-cmd.h
 delete mode 100644 include/linux/uwb/spec.h
 delete mode 100644 include/linux/uwb/umc.h
 delete mode 100644 include/linux/uwb/whci.h

(limited to 'include')

diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf
deleted file mode 100644
index 8b3d43efce90..000000000000
--- a/Documentation/usb/wusb-cbaf
+++ /dev/null
@@ -1,130 +0,0 @@
-#! /bin/bash
-#
-
-set -e
-
-progname=$(basename $0)
-function help
-{
-    cat <<EOF
-Usage: $progname COMMAND DEVICEs [ARGS]
-
-Command for manipulating the pairing/authentication credentials of a
-Wireless USB device that supports wired-mode Cable-Based-Association.
-
-Works in conjunction with the wusb-cba.ko driver from http://linuxuwb.org.
-
-
-DEVICE
-
- sysfs path to the device to authenticate; for example, both this
- guys are the same:
-
- /sys/devices/pci0000:00/0000:00:1d.7/usb1/1-4/1-4.4/1-4.4:1.1
- /sys/bus/usb/drivers/wusb-cbaf/1-4.4:1.1
-
-COMMAND/ARGS are
-
- start
-
-   Start a WUSB host controller (by setting up a CHID)
-
- set-chid DEVICE HOST-CHID HOST-BANDGROUP HOST-NAME
-
-   Sets host information in the device; after this you can call the
-   get-cdid to see how does this device report itself to us.
-
- get-cdid DEVICE
-
-   Get the device ID associated to the HOST-CHID we sent with
-   'set-chid'. We might not know about it.
-
- set-cc DEVICE
-
-   If we allow the device to connect, set a random new CDID and CK
-   (connection key). Device saves them for the next time it wants to
-   connect wireless. We save them for that next time also so we can
-   authenticate the device (when we see the CDID he uses to id
-   itself) and the CK to crypto talk to it.
-
-CHID is always 16 hex bytes in 'XX YY ZZ...' form
-BANDGROUP is almost always 0001
-
-Examples:
-
-  You can default most arguments to '' to get a sane value:
-
-  $ $progname set-chid '' '' '' "My host name"
-
-  A full sequence:
-
-  $ $progname set-chid '' '' '' "My host name"
-  $ $progname get-cdid ''
-  $ $progname set-cc ''
-
-EOF
-}
-
-
-# Defaults
-# FIXME: CHID should come from a database :), band group from the host
-host_CHID="00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff"
-host_band_group="0001"
-host_name=$(hostname)
-
-devs="$(echo /sys/bus/usb/drivers/wusb-cbaf/[0-9]*)"
-hdevs="$(for h in /sys/class/uwb_rc/*/wusbhc; do readlink -f $h; done)"
-
-result=0
-case $1 in
-    start)
-        for dev in ${2:-$hdevs}
-          do
-          echo $host_CHID > $dev/wusb_chid
-          echo I: started host $(basename $dev) >&2
-        done
-        ;;
-    stop)
-        for dev in ${2:-$hdevs}
-          do
-          echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
-          echo I: stopped host $(basename $dev) >&2
-        done
-        ;;
-    set-chid)
-        shift
-        for dev in ${2:-$devs}; do
-            echo "${4:-$host_name}" > $dev/wusb_host_name
-            echo "${3:-$host_band_group}" > $dev/wusb_host_band_groups
-            echo ${2:-$host_CHID} > $dev/wusb_chid
-        done
-        ;;
-    get-cdid)
-        for dev in ${2:-$devs}
-          do
-          cat $dev/wusb_cdid
-        done
-        ;;
-    set-cc)
-        for dev in ${2:-$devs}; do
-            shift
-            CDID="$(head --bytes=16 /dev/urandom  | od -tx1 -An)"
-            CK="$(head --bytes=16 /dev/urandom  | od -tx1 -An)"
-            echo "$CDID" > $dev/wusb_cdid
-            echo "$CK" > $dev/wusb_ck
-
-            echo I: CC set >&2
-            echo "CHID: $(cat $dev/wusb_chid)"
-            echo "CDID:$CDID"
-            echo "CK:  $CK"
-        done
-        ;;
-    help|h|--help|-h)
-        help
-        ;;
-    *)
-        echo "E: Unknown usage" 1>&2
-        help 1>&2
-        result=1
-esac
-exit $result
diff --git a/Documentation/usb/wusb-design-overview.rst b/Documentation/usb/wusb-design-overview.rst
deleted file mode 100644
index dc5e21609bb5..000000000000
--- a/Documentation/usb/wusb-design-overview.rst
+++ /dev/null
@@ -1,457 +0,0 @@
-================================
-Linux UWB + Wireless USB + WiNET
-================================
-
-   Copyright (C) 2005-2006 Intel Corporation
-
-   Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
-
-   This program is free software; you can redistribute it and/or
-   modify it under the terms of the GNU General Public License version
-   2 as published by the Free Software Foundation.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301, USA.
-
-
-Please visit http://bughost.org/thewiki/Design-overview.txt-1.8 for
-updated content.
-
-    * Design-overview.txt-1.8
-
-This code implements a Ultra Wide Band stack for Linux, as well as
-drivers for the USB based UWB radio controllers defined in the
-Wireless USB 1.0 specification (including Wireless USB host controller
-and an Intel WiNET controller).
-
-.. Contents
-   1. Introduction
-         1. HWA: Host Wire adapters, your Wireless USB dongle
-
-         2. DWA: Device Wired Adaptor, a Wireless USB hub for wired
-            devices
-         3. WHCI: Wireless Host Controller Interface, the PCI WUSB host
-            adapter
-   2. The UWB stack
-         1. Devices and hosts: the basic structure
-
-         2. Host Controller life cycle
-
-         3. On the air: beacons and enumerating the radio neighborhood
-
-         4. Device lists
-         5. Bandwidth allocation
-
-   3. Wireless USB Host Controller drivers
-
-   4. Glossary
-
-
-Introduction
-============
-
-UWB is a wide-band communication protocol that is to serve also as the
-low-level protocol for others (much like TCP sits on IP). Currently
-these others are Wireless USB and TCP/IP, but seems Bluetooth and
-Firewire/1394 are coming along.
-
-UWB uses a band from roughly 3 to 10 GHz, transmitting at a max of
-~-41dB (or 0.074 uW/MHz--geography specific data is still being
-negotiated w/ regulators, so watch for changes). That band is divided in
-a bunch of ~1.5 GHz wide channels (or band groups) composed of three
-subbands/subchannels (528 MHz each). Each channel is independent of each
-other, so you could consider them different "busses". Initially this
-driver considers them all a single one.
-
-Radio time is divided in 65536 us long /superframes/, each one divided
-in 256 256us long /MASs/ (Media Allocation Slots), which are the basic
-time/media allocation units for transferring data. At the beginning of
-each superframe there is a Beacon Period (BP), where every device
-transmit its beacon on a single MAS. The length of the BP depends on how
-many devices are present and the length of their beacons.
-
-Devices have a MAC (fixed, 48 bit address) and a device (changeable, 16
-bit address) and send periodic beacons to advertise themselves and pass
-info on what they are and do. They advertise their capabilities and a
-bunch of other stuff.
-
-The different logical parts of this driver are:
-
-    *
-
-      *UWB*: the Ultra-Wide-Band stack -- manages the radio and
-      associated spectrum to allow for devices sharing it. Allows to
-      control bandwidth assignment, beaconing, scanning, etc
-
-    *
-
-      *WUSB*: the layer that sits on top of UWB to provide Wireless USB.
-      The Wireless USB spec defines means to control a UWB radio and to
-      do the actual WUSB.
-
-
-HWA: Host Wire adapters, your Wireless USB dongle
--------------------------------------------------
-
-WUSB also defines a device called a Host Wire Adaptor (HWA), which in
-mere terms is a USB dongle that enables your PC to have UWB and Wireless
-USB. The Wireless USB Host Controller in a HWA looks to the host like a
-[Wireless] USB controller connected via USB (!)
-
-The HWA itself is broken in two or three main interfaces:
-
-    *
-
-      *RC*: Radio control -- this implements an interface to the
-      Ultra-Wide-Band radio controller. The driver for this implements a
-      USB-based UWB Radio Controller to the UWB stack.
-
-    *
-
-      *HC*: the wireless USB host controller. It looks like a USB host
-      whose root port is the radio and the WUSB devices connect to it.
-      To the system it looks like a separate USB host. The driver (will)
-      implement a USB host controller (similar to UHCI, OHCI or EHCI)
-      for which the root hub is the radio...To reiterate: it is a USB
-      controller that is connected via USB instead of PCI.
-
-    *
-
-      *WINET*: some HW provide a WiNET interface (IP over UWB). This
-      package provides a driver for it (it looks like a network
-      interface, winetX). The driver detects when there is a link up for
-      their type and kick into gear.
-
-
-DWA: Device Wired Adaptor, a Wireless USB hub for wired devices
----------------------------------------------------------------
-
-These are the complement to HWAs. They are a USB host for connecting
-wired devices, but it is connected to your PC connected via Wireless
-USB. To the system it looks like yet another USB host. To the untrained
-eye, it looks like a hub that connects upstream wirelessly.
-
-We still offer no support for this; however, it should share a lot of
-code with the HWA-RC driver; there is a bunch of factorization work that
-has been done to support that in upcoming releases.
-
-
-WHCI: Wireless Host Controller Interface, the PCI WUSB host adapter
--------------------------------------------------------------------
-
-This is your usual PCI device that implements WHCI. Similar in concept
-to EHCI, it allows your wireless USB devices (including DWAs) to connect
-to your host via a PCI interface. As in the case of the HWA, it has a
-Radio Control interface and the WUSB Host Controller interface per se.
-
-There is still no driver support for this, but will be in upcoming
-releases.
-
-
-The UWB stack
-=============
-
-The main mission of the UWB stack is to keep a tally of which devices
-are in radio proximity to allow drivers to connect to them. As well, it
-provides an API for controlling the local radio controllers (RCs from
-now on), such as to start/stop beaconing, scan, allocate bandwidth, etc.
-
-
-Devices and hosts: the basic structure
---------------------------------------
-
-The main building block here is the UWB device (struct uwb_dev). For
-each device that pops up in radio presence (ie: the UWB host receives a
-beacon from it) you get a struct uwb_dev that will show up in
-/sys/bus/uwb/devices.
-
-For each RC that is detected, a new struct uwb_rc and struct uwb_dev are
-created. An entry is also created in /sys/class/uwb_rc for each RC.
-
-Each RC driver is implemented by a separate driver that plugs into the
-interface that the UWB stack provides through a struct uwb_rc_ops. The
-spec creators have been nice enough to make the message format the same
-for HWA and WHCI RCs, so the driver is really a very thin transport that
-moves the requests from the UWB API to the device [/uwb_rc_ops->cmd()/]
-and sends the replies and notifications back to the API
-[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
-is chartered, among other things, to keep the tab of how the UWB radio
-neighborhood looks, creating and destroying devices as they show up or
-disappear.
-
-Command execution is very simple: a command block is sent and a event
-block or reply is expected back. For sending/receiving command/events, a
-handle called /neh/ (Notification/Event Handle) is opened with
-/uwb_rc_neh_open()/.
-
-The HWA-RC (USB dongle) driver (drivers/uwb/hwa-rc.c) does this job for
-the USB connected HWA. Eventually, drivers/whci-rc.c will do the same
-for the PCI connected WHCI controller.
-
-
-Host Controller life cycle
---------------------------
-
-So let's say we connect a dongle to the system: it is detected and
-firmware uploaded if needed [for Intel's i1480
-/drivers/uwb/ptc/usb.c:ptc_usb_probe()/] and then it is reenumerated.
-Now we have a real HWA device connected and
-/drivers/uwb/hwa-rc.c:hwarc_probe()/ picks it up, that will set up the
-Wire-Adaptor environment and then suck it into the UWB stack's vision of
-the world [/drivers/uwb/lc-rc.c:uwb_rc_add()/].
-
-    *
-
-      [*] The stack should put a new RC to scan for devices
-      [/uwb_rc_scan()/] so it finds what's available around and tries to
-      connect to them, but this is policy stuff and should be driven
-      from user space. As of now, the operator is expected to do it
-      manually; see the release notes for documentation on the procedure.
-
-When a dongle is disconnected, /drivers/uwb/hwa-rc.c:hwarc_disconnect()/
-takes time of tearing everything down safely (or not...).
-
-
-On the air: beacons and enumerating the radio neighborhood
-----------------------------------------------------------
-
-So assuming we have devices and we have agreed for a channel to connect
-on (let's say 9), we put the new RC to beacon:
-
-    *
-
-            $ echo 9 0 > /sys/class/uwb_rc/uwb0/beacon
-
-Now it is visible. If there were other devices in the same radio channel
-and beacon group (that's what the zero is for), the dongle's radio
-control interface will send beacon notifications on its
-notification/event endpoint (NEEP). The beacon notifications are part of
-the event stream that is funneled into the API with
-/drivers/uwb/neh.c:uwb_rc_neh_grok()/ and delivered to the UWBD, the UWB
-daemon through a notification list.
-
-UWBD wakes up and scans the event list; finds a beacon and adds it to
-the BEACON CACHE (/uwb_beca/). If he receives a number of beacons from
-the same device, he considers it to be 'onair' and creates a new device
-[/drivers/uwb/lc-dev.c:uwbd_dev_onair()/]. Similarly, when no beacons
-are received in some time, the device is considered gone and wiped out
-[uwbd calls periodically /uwb/beacon.c:uwb_beca_purge()/ that will purge
-the beacon cache of dead devices].
-
-
-Device lists
-------------
-
-All UWB devices are kept in the list of the struct bus_type uwb_bus_type.
-
-
-Bandwidth allocation
---------------------
-
-The UWB stack maintains a local copy of DRP availability through
-processing of incoming *DRP Availability Change* notifications. This
-local copy is currently used to present the current bandwidth
-availability to the user through the sysfs file
-/sys/class/uwb_rc/uwbx/bw_avail. In the future the bandwidth
-availability information will be used by the bandwidth reservation
-routines.
-
-The bandwidth reservation routines are in progress and are thus not
-present in the current release. When completed they will enable a user
-to initiate DRP reservation requests through interaction with sysfs. DRP
-reservation requests from remote UWB devices will also be handled. The
-bandwidth management done by the UWB stack will include callbacks to the
-higher layers will enable the higher layers to use the reservations upon
-completion. [Note: The bandwidth reservation work is in progress and
-subject to change.]
-
-
-Wireless USB Host Controller drivers
-====================================
-
-*WARNING* This section needs a lot of work!
-
-As explained above, there are three different types of HCs in the WUSB
-world: HWA-HC, DWA-HC and WHCI-HC.
-
-HWA-HC and DWA-HC share that they are Wire-Adapters (USB or WUSB
-connected controllers), and their transfer management system is almost
-identical. So is their notification delivery system.
-
-HWA-HC and WHCI-HC share that they are both WUSB host controllers, so
-they have to deal with WUSB device life cycle and maintenance, wireless
-root-hub
-
-HWA exposes a Host Controller interface (HWA-HC 0xe0/02/02). This has
-three endpoints (Notifications, Data Transfer In and Data Transfer
-Out--known as NEP, DTI and DTO in the code).
-
-We reserve UWB bandwidth for our Wireless USB Cluster, create a Cluster
-ID and tell the HC to use all that. Then we start it. This means the HC
-starts sending MMCs.
-
-    *
-
-      The MMCs are blocks of data defined somewhere in the WUSB1.0 spec
-      that define a stream in the UWB channel time allocated for sending
-      WUSB IEs (host to device commands/notifications) and Device
-      Notifications (device initiated to host). Each host defines a
-      unique Wireless USB cluster through MMCs. Devices can connect to a
-      single cluster at the time. The IEs are Information Elements, and
-      among them are the bandwidth allocations that tell each device
-      when can they transmit or receive.
-
-Now it all depends on external stimuli.
-
-New device connection
----------------------
-
-A new device pops up, it scans the radio looking for MMCs that give out
-the existence of Wireless USB channels. Once one (or more) are found,
-selects which one to connect to. Sends a /DN_Connect/ (device
-notification connect) during the DNTS (Device Notification Time
-Slot--announced in the MMCs
-
-HC picks the /DN_Connect/ out (nep module sends to notif.c for delivery
-into /devconnect/). This process starts the authentication process for
-the device. First we allocate a /fake port/ and assign an
-unauthenticated address (128 to 255--what we really do is
-0x80 | fake_port_idx). We fiddle with the fake port status and /hub_wq/
-sees a new connection, so he moves on to enable the fake port with a reset.
-
-So now we are in the reset path -- we know we have a non-yet enumerated
-device with an unauthorized address; we ask user space to authenticate
-(FIXME: not yet done, similar to bluetooth pairing), then we do the key
-exchange (FIXME: not yet done) and issue a /set address 0/ to bring the
-device to the default state. Device is authenticated.
-
-From here, the USB stack takes control through the usb_hcd ops. hub_wq
-has seen the port status changes, as we have been toggling them. It will
-start enumerating and doing transfers through usb_hcd->urb_enqueue() to
-read descriptors and move our data.
-
-Device life cycle and keep alives
----------------------------------
-
-Every time there is a successful transfer to/from a device, we update a
-per-device activity timestamp. If not, every now and then we check and
-if the activity timestamp gets old, we ping the device by sending it a
-Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
-arrives to us as a notification through
-devconnect.c:wusb_handle_dn_alive(). If a device times out, we
-disconnect it from the system (cleaning up internal information and
-toggling the bits in the fake hub port, which kicks hub_wq into removing
-the rest of the stuff).
-
-This is done through devconnect:__wusb_check_devs(), which will scan the
-device list looking for whom needs refreshing.
-
-If the device wants to disconnect, it will either die (ugly) or send a
-/DN_Disconnect/ that will prompt a disconnection from the system.
-
-Sending and receiving data
---------------------------
-
-Data is sent and received through /Remote Pipes/ (rpipes). An rpipe is
-/aimed/ at an endpoint in a WUSB device. This is the same for HWAs and
-DWAs.
-
-Each HC has a number of rpipes and buffers that can be assigned to them;
-when doing a data transfer (xfer), first the rpipe has to be aimed and
-prepared (buffers assigned), then we can start queueing requests for
-data in or out.
-
-Data buffers have to be segmented out before sending--so we send first a
-header (segment request) and then if there is any data, a data buffer
-immediately after to the DTI interface (yep, even the request). If our
-buffer is bigger than the max segment size, then we just do multiple
-requests.
-
-[This sucks, because doing USB scatter gatter in Linux is resource
-intensive, if any...not that the current approach is not. It just has to
-be cleaned up a lot :)].
-
-If reading, we don't send data buffers, just the segment headers saying
-we want to read segments.
-
-When the xfer is executed, we receive a notification that says data is
-ready in the DTI endpoint (handled through
-xfer.c:wa_handle_notif_xfer()). In there we read from the DTI endpoint a
-descriptor that gives us the status of the transfer, its identification
-(given when we issued it) and the segment number. If it was a data read,
-we issue another URB to read into the destination buffer the chunk of
-data coming out of the remote endpoint. Done, wait for the next guy. The
-callbacks for the URBs issued from here are the ones that will declare
-the xfer complete at some point and call its callback.
-
-Seems simple, but the implementation is not trivial.
-
-    *
-
-      *WARNING* Old!!
-
-The main xfer descriptor, wa_xfer (equivalent to a URB) contains an
-array of segments, tallys on segments and buffers and callback
-information. Buried in there is a lot of URBs for executing the segments
-and buffer transfers.
-
-For OUT xfers, there is an array of segments, one URB for each, another
-one of buffer URB. When submitting, we submit URBs for segment request
-1, buffer 1, segment 2, buffer 2...etc. Then we wait on the DTI for xfer
-result data; when all the segments are complete, we call the callback to
-finalize the transfer.
-
-For IN xfers, we only issue URBs for the segments we want to read and
-then wait for the xfer result data.
-
-URB mapping into xfers
-^^^^^^^^^^^^^^^^^^^^^^
-
-This is done by hwahc_op_urb_[en|de]queue(). In enqueue() we aim an
-rpipe to the endpoint where we have to transmit, create a transfer
-context (wa_xfer) and submit it. When the xfer is done, our callback is
-called and we assign the status bits and release the xfer resources.
-
-In dequeue() we are basically cancelling/aborting the transfer. We issue
-a xfer abort request to the HC, cancel all the URBs we had submitted
-and not yet done and when all that is done, the xfer callback will be
-called--this will call the URB callback.
-
-
-Glossary
-========
-
-*DWA* -- Device Wire Adapter
-
-USB host, wired for downstream devices, upstream connects wirelessly
-with Wireless USB.
-
-*EVENT* -- Response to a command on the NEEP
-
-*HWA* -- Host Wire Adapter / USB dongle for UWB and Wireless USB
-
-*NEH* -- Notification/Event Handle
-
-Handle/file descriptor for receiving notifications or events. The WA
-code requires you to get one of this to listen for notifications or
-events on the NEEP.
-
-*NEEP* -- Notification/Event EndPoint
-
-Stuff related to the management of the first endpoint of a HWA USB
-dongle that is used to deliver an stream of events and notifications to
-the host.
-
-*NOTIFICATION* -- Message coming in the NEEP as response to something.
-
-*RC* -- Radio Control
-
-Design-overview.txt-1.8 (last edited 2006-11-04 12:22:24 by
-InakyPerezGonzalez)
diff --git a/MAINTAINERS b/MAINTAINERS
index 6426db5198f0..f4463fb48249 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3800,14 +3800,9 @@ F:	scripts/sign-file.c
 F:	scripts/extract-cert.c
 
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
-L:	linux-usb@vger.kernel.org
+L:	devel@driverdev.osuosl.org
 S:	Orphan
-F:	Documentation/usb/wusb-design-overview.rst
-F:	Documentation/usb/wusb-cbaf
-F:	drivers/usb/host/hwa-hc.c
-F:	drivers/usb/host/whci/
-F:	drivers/usb/wusbcore/
-F:	include/linux/usb/wusb*
+F:	drivers/staging/wbusbcore/
 
 CFAG12864B LCD DRIVER
 M:	Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
@@ -16447,11 +16442,9 @@ F:	drivers/usb/common/ulpi.c
 F:	include/linux/ulpi/
 
 ULTRA-WIDEBAND (UWB) SUBSYSTEM:
-L:	linux-usb@vger.kernel.org
+L:	devel@driverdev.osuosl.org
 S:	Orphan
-F:	drivers/uwb/
-F:	include/linux/uwb.h
-F:	include/linux/uwb/
+F:	drivers/staging/uwb/
 
 UNICODE SUBSYSTEM:
 M:	Gabriel Krisman Bertazi <krisman@collabora.com>
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 61cf4ea2c229..e8852c09184b 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -108,8 +108,6 @@ source "drivers/hid/Kconfig"
 
 source "drivers/usb/Kconfig"
 
-source "drivers/uwb/Kconfig"
-
 source "drivers/mmc/Kconfig"
 
 source "drivers/memstick/Kconfig"
diff --git a/drivers/Makefile b/drivers/Makefile
index 6d37564e783c..cf046e9bd88c 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -100,7 +100,6 @@ obj-$(CONFIG_ZORRO)		+= zorro/
 obj-$(CONFIG_ATA_OVER_ETH)	+= block/aoe/
 obj-$(CONFIG_PARIDE) 		+= block/paride/
 obj-$(CONFIG_TC)		+= tc/
-obj-$(CONFIG_UWB)		+= uwb/
 obj-$(CONFIG_USB_PHY)		+= usb/
 obj-$(CONFIG_USB)		+= usb/
 obj-$(CONFIG_USB_SUPPORT)	+= usb/
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 7c96a01eef6c..cf419d9c942d 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -120,4 +120,7 @@ source "drivers/staging/kpc2000/Kconfig"
 
 source "drivers/staging/isdn/Kconfig"
 
+source "drivers/staging/wusbcore/Kconfig"
+source "drivers/staging/uwb/Kconfig"
+
 endif # STAGING
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index fcaac9693b83..38179bc842a8 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -50,3 +50,5 @@ obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_FIELDBUS_DEV)     += fieldbus/
 obj-$(CONFIG_KPC2000)		+= kpc2000/
 obj-$(CONFIG_ISDN_CAPI)		+= isdn/
+obj-$(CONFIG_UWB)		+= uwb/
+obj-$(CONFIG_USB_WUSB)		+= wusbcore/
diff --git a/drivers/staging/uwb/Kconfig b/drivers/staging/uwb/Kconfig
new file mode 100644
index 000000000000..259e053e1e09
--- /dev/null
+++ b/drivers/staging/uwb/Kconfig
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# UWB device configuration
+#
+
+menuconfig UWB
+	tristate "Ultra Wideband devices"
+	default n
+	select GENERIC_NET_UTILS
+	help
+	  UWB is a high-bandwidth, low-power, point-to-point radio
+	  technology using a wide spectrum (3.1-10.6GHz). It is
+	  optimized for in-room use (480Mbps at 2 meters, 110Mbps at
+	  10m). It serves as the transport layer for other protocols,
+	  such as Wireless USB (WUSB).
+
+	  The topology is peer to peer; however, higher level
+	  protocols (such as WUSB) might impose a master/slave
+	  relationship.
+
+	  Say Y here if your computer has UWB radio controllers (USB or PCI)
+	  based. You will need to enable the radio controllers
+	  below.  It is ok to select all of them, no harm done.
+
+	  For more help check the UWB and WUSB related files in
+	  <file:Documentation/usb/>.
+
+	  To compile the UWB stack as a module, choose M here.
+
+if UWB
+
+config UWB_HWA
+	tristate "UWB Radio Control driver for WUSB-compliant USB dongles (HWA)"
+	depends on USB
+	help
+	  This driver enables the radio controller for HWA USB
+	  devices. HWA stands for Host Wire Adapter, and it is a UWB
+	  Radio Controller connected to your system via USB. Most of
+	  them come with a Wireless USB host controller also.
+
+	  To compile this driver select Y (built in) or M (module). It
+	  is safe to select any even if you do not have the hardware.
+
+config UWB_WHCI
+        tristate "UWB Radio Control driver for WHCI-compliant cards"
+        depends on PCI
+        help
+          This driver enables the radio controller for WHCI cards.
+
+          WHCI is a specification developed by Intel
+          (http://www.intel.com/technology/comms/wusb/whci.htm) much
+          in the spirit of USB's EHCI, but for UWB and Wireless USB
+          radio/host controllers connected via memory mapping (eg:
+          PCI). Most of these cards come also with a Wireless USB host
+          controller.
+
+          To compile this driver select Y (built in) or M (module). It
+          is safe to select any even if you do not have the hardware.
+
+config UWB_I1480U
+        tristate "Support for Intel Wireless UWB Link 1480 HWA"
+        depends on UWB_HWA
+        select FW_LOADER
+        help
+         This driver enables support for the i1480 when connected via
+         USB. It consists of a firmware uploader that will enable it
+         to behave as an HWA device.
+
+         To compile this driver select Y (built in) or M (module). It
+         is safe to select any even if you do not have the hardware.
+
+endif # UWB
diff --git a/drivers/staging/uwb/Makefile b/drivers/staging/uwb/Makefile
new file mode 100644
index 000000000000..32f4de7afbd6
--- /dev/null
+++ b/drivers/staging/uwb/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_UWB)		+= uwb.o
+obj-$(CONFIG_UWB_WHCI)		+= umc.o whci.o whc-rc.o
+obj-$(CONFIG_UWB_HWA)		+= hwa-rc.o
+obj-$(CONFIG_UWB_I1480U)	+= i1480/
+
+uwb-objs :=		\
+	address.o	\
+	allocator.o	\
+	beacon.o	\
+	driver.o	\
+	drp.o		\
+	drp-avail.o	\
+	drp-ie.o	\
+	est.o		\
+	ie.o		\
+	ie-rcv.o	\
+	lc-dev.o	\
+	lc-rc.o		\
+	neh.o		\
+	pal.o		\
+	radio.o		\
+	reset.o		\
+	rsv.o		\
+	scan.o		\
+	uwb-debug.o	\
+	uwbd.o
+
+umc-objs :=		\
+	umc-bus.o	\
+	umc-dev.o	\
+	umc-drv.o
diff --git a/drivers/staging/uwb/TODO b/drivers/staging/uwb/TODO
new file mode 100644
index 000000000000..abae57000534
--- /dev/null
+++ b/drivers/staging/uwb/TODO
@@ -0,0 +1,8 @@
+TODO: Remove in late 2019 unless there are users
+
+There seems to not be any real wireless USB devices anywhere in the wild
+anymore.  It turned out to be a failed technology :(
+
+This will be removed from the tree if no one objects.
+
+Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/staging/uwb/address.c b/drivers/staging/uwb/address.c
new file mode 100644
index 000000000000..857d5cd56a95
--- /dev/null
+++ b/drivers/staging/uwb/address.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Address management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/random.h>
+#include <linux/etherdevice.h>
+
+#include "uwb-internal.h"
+
+
+/** Device Address Management command */
+struct uwb_rc_cmd_dev_addr_mgmt {
+	struct uwb_rccb rccb;
+	u8 bmOperationType;
+	u8 baAddr[6];
+} __attribute__((packed));
+
+
+/**
+ * Low level command for setting/getting UWB radio's addresses
+ *
+ * @hwarc:	HWA Radio Control interface instance
+ * @bmOperationType:
+ * 		Set/get, MAC/DEV (see WUSB1.0[8.6.2.2])
+ * @baAddr:	address buffer--assumed to have enough data to hold
+ *              the address type requested.
+ * @reply:	Pointer to reply buffer (can be stack allocated)
+ * @returns:	0 if ok, < 0 errno code on error.
+ *
+ * @cmd has to be allocated because USB cannot grok USB or vmalloc
+ * buffers depending on your combination of host architecture.
+ */
+static
+int uwb_rc_dev_addr_mgmt(struct uwb_rc *rc,
+			 u8 bmOperationType, const u8 *baAddr,
+			 struct uwb_rc_evt_dev_addr_mgmt *reply)
+{
+	int result;
+	struct uwb_rc_cmd_dev_addr_mgmt *cmd;
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_kzalloc;
+	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
+	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_DEV_ADDR_MGMT);
+	cmd->bmOperationType = bmOperationType;
+	if (baAddr) {
+		size_t size = 0;
+		switch (bmOperationType >> 1) {
+		case 0:	size = 2; break;
+		case 1:	size = 6; break;
+		default: BUG();
+		}
+		memcpy(cmd->baAddr, baAddr, size);
+	}
+	reply->rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply->rceb.wEvent = UWB_RC_CMD_DEV_ADDR_MGMT;
+	result = uwb_rc_cmd(rc, "DEV-ADDR-MGMT",
+			    &cmd->rccb, sizeof(*cmd),
+			    &reply->rceb, sizeof(*reply));
+	if (result < 0)
+		goto error_cmd;
+	if (result < sizeof(*reply)) {
+		dev_err(&rc->uwb_dev.dev,
+			"DEV-ADDR-MGMT: not enough data replied: "
+			"%d vs %zu bytes needed\n", result, sizeof(*reply));
+		result = -ENOMSG;
+	} else if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(&rc->uwb_dev.dev,
+			"DEV-ADDR-MGMT: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply->bResultCode),
+			reply->bResultCode);
+		result = -EIO;
+	} else
+		result = 0;
+error_cmd:
+	kfree(cmd);
+error_kzalloc:
+	return result;
+}
+
+
+/**
+ * Set the UWB RC MAC or device address.
+ *
+ * @rc:      UWB Radio Controller
+ * @_addr:   Pointer to address to write [assumed to be either a
+ *           'struct uwb_mac_addr *' or a 'struct uwb_dev_addr *'].
+ * @type:    Type of address to set (UWB_ADDR_DEV or UWB_ADDR_MAC).
+ * @returns: 0 if ok, < 0 errno code on error.
+ *
+ * Some anal retentivity here: even if both 'struct
+ * uwb_{dev,mac}_addr' have the actual byte array in the same offset
+ * and I could just pass _addr to hwarc_cmd_dev_addr_mgmt(), I prefer
+ * to use some syntatic sugar in case someday we decide to change the
+ * format of the structs. The compiler will optimize it out anyway.
+ */
+static int uwb_rc_addr_set(struct uwb_rc *rc,
+		    const void *_addr, enum uwb_addr_type type)
+{
+	int result;
+	u8 bmOperationType = 0x1; 		/* Set address */
+	const struct uwb_dev_addr *dev_addr = _addr;
+	const struct uwb_mac_addr *mac_addr = _addr;
+	struct uwb_rc_evt_dev_addr_mgmt reply;
+	const u8 *baAddr;
+
+	result = -EINVAL;
+	switch (type) {
+	case UWB_ADDR_DEV:
+		baAddr = dev_addr->data;
+		break;
+	case UWB_ADDR_MAC:
+		baAddr = mac_addr->data;
+		bmOperationType |= 0x2;
+		break;
+	default:
+		return result;
+	}
+	return uwb_rc_dev_addr_mgmt(rc, bmOperationType, baAddr, &reply);
+}
+
+
+/**
+ * Get the UWB radio's MAC or device address.
+ *
+ * @rc:      UWB Radio Controller
+ * @_addr:   Where to write the address data [assumed to be either a
+ *           'struct uwb_mac_addr *' or a 'struct uwb_dev_addr *'].
+ * @type:    Type of address to get (UWB_ADDR_DEV or UWB_ADDR_MAC).
+ * @returns: 0 if ok (and *_addr set), < 0 errno code on error.
+ *
+ * See comment in uwb_rc_addr_set() about anal retentivity in the
+ * type handling of the address variables.
+ */
+static int uwb_rc_addr_get(struct uwb_rc *rc,
+		    void *_addr, enum uwb_addr_type type)
+{
+	int result;
+	u8 bmOperationType = 0x0; 		/* Get address */
+	struct uwb_rc_evt_dev_addr_mgmt evt;
+	struct uwb_dev_addr *dev_addr = _addr;
+	struct uwb_mac_addr *mac_addr = _addr;
+	u8 *baAddr;
+
+	result = -EINVAL;
+	switch (type) {
+	case UWB_ADDR_DEV:
+		baAddr = dev_addr->data;
+		break;
+	case UWB_ADDR_MAC:
+		bmOperationType |= 0x2;
+		baAddr = mac_addr->data;
+		break;
+	default:
+		return result;
+	}
+	result = uwb_rc_dev_addr_mgmt(rc, bmOperationType, baAddr, &evt);
+	if (result == 0)
+		switch (type) {
+		case UWB_ADDR_DEV:
+			memcpy(&dev_addr->data, evt.baAddr,
+			       sizeof(dev_addr->data));
+			break;
+		case UWB_ADDR_MAC:
+			memcpy(&mac_addr->data, evt.baAddr,
+			       sizeof(mac_addr->data));
+			break;
+		default:		/* shut gcc up */
+			BUG();
+		}
+	return result;
+}
+
+
+/** Get @rc's MAC address to @addr */
+int uwb_rc_mac_addr_get(struct uwb_rc *rc,
+			struct uwb_mac_addr *addr) {
+	return uwb_rc_addr_get(rc, addr, UWB_ADDR_MAC);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_mac_addr_get);
+
+
+/** Get @rc's device address to @addr */
+int uwb_rc_dev_addr_get(struct uwb_rc *rc,
+			struct uwb_dev_addr *addr) {
+	return uwb_rc_addr_get(rc, addr, UWB_ADDR_DEV);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_dev_addr_get);
+
+
+/** Set @rc's address to @addr */
+int uwb_rc_mac_addr_set(struct uwb_rc *rc,
+			const struct uwb_mac_addr *addr)
+{
+	int result = -EINVAL;
+	mutex_lock(&rc->uwb_dev.mutex);
+	result = uwb_rc_addr_set(rc, addr, UWB_ADDR_MAC);
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return result;
+}
+
+
+/** Set @rc's address to @addr */
+int uwb_rc_dev_addr_set(struct uwb_rc *rc,
+			const struct uwb_dev_addr *addr)
+{
+	int result = -EINVAL;
+	mutex_lock(&rc->uwb_dev.mutex);
+	result = uwb_rc_addr_set(rc, addr, UWB_ADDR_DEV);
+	rc->uwb_dev.dev_addr = *addr;
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return result;
+}
+
+/* Returns !0 if given address is already assigned to device. */
+int __uwb_mac_addr_assigned_check(struct device *dev, void *_addr)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_mac_addr *addr = _addr;
+
+	if (!uwb_mac_addr_cmp(addr, &uwb_dev->mac_addr))
+		return !0;
+	return 0;
+}
+
+/* Returns !0 if given address is already assigned to device. */
+int __uwb_dev_addr_assigned_check(struct device *dev, void *_addr)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_dev_addr *addr = _addr;
+	if (!uwb_dev_addr_cmp(addr, &uwb_dev->dev_addr))
+		return !0;
+	return 0;
+}
+
+/**
+ * uwb_dev_addr_assign - assigned a generated DevAddr to a radio controller
+ * @rc:      the (local) radio controller device requiring a new DevAddr
+ *
+ * A new DevAddr is required when:
+ *    - first setting up a radio controller
+ *    - if the hardware reports a DevAddr conflict
+ *
+ * The DevAddr is randomly generated in the generated DevAddr range
+ * [0x100, 0xfeff]. The number of devices in a beacon group is limited
+ * by mMaxBPLength (96) so this address space will never be exhausted.
+ *
+ * [ECMA-368] 17.1.1, 17.16.
+ */
+int uwb_rc_dev_addr_assign(struct uwb_rc *rc)
+{
+	struct uwb_dev_addr new_addr;
+
+	do {
+		get_random_bytes(new_addr.data, sizeof(new_addr.data));
+	} while (new_addr.data[0] == 0x00 || new_addr.data[0] == 0xff
+		 || __uwb_dev_addr_assigned(rc, &new_addr));
+
+	return uwb_rc_dev_addr_set(rc, &new_addr);
+}
+
+/**
+ * uwbd_evt_handle_rc_dev_addr_conflict - handle a DEV_ADDR_CONFLICT event
+ * @evt: the DEV_ADDR_CONFLICT notification from the radio controller
+ *
+ * A new (non-conflicting) DevAddr is assigned to the radio controller.
+ *
+ * [ECMA-368] 17.1.1.1.
+ */
+int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt)
+{
+	struct uwb_rc *rc = evt->rc;
+
+	return uwb_rc_dev_addr_assign(rc);
+}
+
+/*
+ * Print the 48-bit EUI MAC address of the radio controller when
+ * reading /sys/class/uwb_rc/XX/mac_address
+ */
+static ssize_t uwb_rc_mac_addr_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	struct uwb_mac_addr addr;
+	ssize_t result;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	result = uwb_rc_addr_get(rc, &addr, UWB_ADDR_MAC);
+	mutex_unlock(&rc->uwb_dev.mutex);
+	if (result >= 0) {
+		result = uwb_mac_addr_print(buf, UWB_ADDR_STRSIZE, &addr);
+		buf[result++] = '\n';
+	}
+	return result;
+}
+
+/*
+ * Parse a 48 bit address written to /sys/class/uwb_rc/XX/mac_address
+ * and if correct, set it.
+ */
+static ssize_t uwb_rc_mac_addr_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	struct uwb_mac_addr addr;
+	ssize_t result;
+
+	if (!mac_pton(buf, addr.data))
+		return -EINVAL;
+	if (is_multicast_ether_addr(addr.data)) {
+		dev_err(&rc->uwb_dev.dev, "refusing to set multicast "
+			"MAC address %s\n", buf);
+		return -EINVAL;
+	}
+	result = uwb_rc_mac_addr_set(rc, &addr);
+	if (result == 0)
+		rc->uwb_dev.mac_addr = addr;
+
+	return result < 0 ? result : size;
+}
+DEVICE_ATTR(mac_address, S_IRUGO | S_IWUSR, uwb_rc_mac_addr_show, uwb_rc_mac_addr_store);
+
+/** Print @addr to @buf, @return bytes written */
+size_t __uwb_addr_print(char *buf, size_t buf_size, const unsigned char *addr,
+			int type)
+{
+	size_t result;
+	if (type)
+		result = scnprintf(buf, buf_size, "%pM", addr);
+	else
+		result = scnprintf(buf, buf_size, "%02x:%02x",
+				  addr[1], addr[0]);
+	return result;
+}
+EXPORT_SYMBOL_GPL(__uwb_addr_print);
diff --git a/drivers/staging/uwb/allocator.c b/drivers/staging/uwb/allocator.c
new file mode 100644
index 000000000000..1f429fba20b7
--- /dev/null
+++ b/drivers/staging/uwb/allocator.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB reservation management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include "uwb.h"
+
+#include "uwb-internal.h"
+
+static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int col, mas, safe_mas, unsafe_mas;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	unsigned char c;
+
+	for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) {
+    
+		safe_mas   = ci->csi.safe_mas_per_col;
+		unsafe_mas = ci->csi.unsafe_mas_per_col;
+    
+		for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) {
+			if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) {
+	
+				if (safe_mas > 0) {
+					safe_mas--;
+					c = UWB_RSV_MAS_SAFE;
+				} else if (unsafe_mas > 0) {
+					unsafe_mas--;
+					c = UWB_RSV_MAS_UNSAFE;
+				} else {
+					break;
+				}
+				bm[col * UWB_MAS_PER_ZONE + mas] = c;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int mas, col, rows;
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	unsigned char c;
+
+	rows = 1;
+	c = UWB_RSV_MAS_SAFE;
+	for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) {
+		if (ri->avail[mas] == 1) {
+      
+			if (rows > ri->used_rows) {
+				break;
+			} else if (rows > 7) {
+				c = UWB_RSV_MAS_UNSAFE;
+			}
+
+			for (col = 0; col < UWB_NUM_ZONES; col++) {
+				if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) {
+					bm[col * UWB_NUM_ZONES + mas] = c;
+					if(c == UWB_RSV_MAS_SAFE)
+						ai->safe_allocated_mases++;
+					else
+						ai->unsafe_allocated_mases++;
+				}
+			}
+			rows++;
+		}
+	}
+	ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+}
+
+/*
+ * Find the best column set for a given availability, interval, num safe mas and
+ * num unsafe mas.
+ *
+ * The different sets are tried in order as shown below, depending on the interval.
+ *
+ * interval = 16
+ *	deep = 0
+ *		set 1 ->  {  8 }
+ *	deep = 1
+ *		set 1 ->  {  4 }
+ *		set 2 ->  { 12 }
+ *	deep = 2
+ *		set 1 ->  {  2 }
+ *		set 2 ->  {  6 }
+ *		set 3 ->  { 10 }
+ *		set 4 ->  { 14 }
+ *	deep = 3
+ *		set 1 ->  {  1 }
+ *		set 2 ->  {  3 }
+ *		set 3 ->  {  5 }
+ *		set 4 ->  {  7 }
+ *		set 5 ->  {  9 }
+ *		set 6 ->  { 11 }
+ *		set 7 ->  { 13 }
+ *		set 8 ->  { 15 }
+ *
+ * interval = 8
+ *	deep = 0
+ *		set 1 ->  {  4  12 }
+ *	deep = 1
+ *		set 1 ->  {  2  10 }
+ *		set 2 ->  {  6  14 }
+ *	deep = 2
+ *		set 1 ->  {  1   9 }
+ *		set 2 ->  {  3  11 }
+ *		set 3 ->  {  5  13 }
+ *		set 4 ->  {  7  15 }
+ *
+ * interval = 4
+ *	deep = 0
+ *		set 1 ->  {  2   6  10  14 }
+ *	deep = 1
+ *		set 1 ->  {  1   5   9  13 }
+ *		set 2 ->  {  3   7  11  15 }
+ *
+ * interval = 2
+ *	deep = 0
+ *		set 1 ->  {  1   3   5   7   9  11  13  15 }
+ */
+static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, 
+					int num_safe_mas, int num_unsafe_mas)
+{
+	struct uwb_rsv_col_info *ci = ai->ci;
+	struct uwb_rsv_col_set_info *csi = &ci->csi;
+	struct uwb_rsv_col_set_info tmp_csi;
+	int deep, set, col, start_col_deep, col_start_set;
+	int start_col, max_mas_in_set, lowest_max_mas_in_deep;
+	int n_mas;
+	int found = UWB_RSV_ALLOC_NOT_FOUND; 
+
+	tmp_csi.start_col = 0;
+	start_col_deep = interval;
+	n_mas = num_unsafe_mas + num_safe_mas;
+
+	for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) {
+		start_col_deep /= 2;
+		col_start_set = 0;
+		lowest_max_mas_in_deep = UWB_MAS_PER_ZONE;
+
+		for (set = 1; set <= (1 << deep); set++) {
+			max_mas_in_set = 0;
+			start_col = start_col_deep + col_start_set;
+			for (col = start_col; col < UWB_NUM_ZONES; col += interval) {
+                
+				if (ci[col].max_avail_safe >= num_safe_mas &&
+				    ci[col].max_avail_unsafe >= n_mas) {
+					if (ci[col].highest_mas[n_mas] > max_mas_in_set)
+						max_mas_in_set = ci[col].highest_mas[n_mas];
+				} else {
+					max_mas_in_set = 0;
+					break;
+				}
+			}
+			if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) {
+				lowest_max_mas_in_deep = max_mas_in_set;
+
+				tmp_csi.start_col = start_col;
+			}
+			col_start_set += (interval >> deep);
+		}
+
+		if (lowest_max_mas_in_deep < 8) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+			break;
+		} else if ((lowest_max_mas_in_deep > 8) && 
+			   (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) &&
+			   (found == UWB_RSV_ALLOC_NOT_FOUND)) {
+			csi->start_col = tmp_csi.start_col;
+			found = UWB_RSV_ALLOC_FOUND;
+		}
+	}
+
+	if (found == UWB_RSV_ALLOC_FOUND) {
+		csi->interval = interval;
+		csi->safe_mas_per_col = num_safe_mas;
+		csi->unsafe_mas_per_col = num_unsafe_mas;
+
+		ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas;
+		ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas;
+		ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
+		ai->interval = interval;		
+	}
+	return found;
+}
+
+static void get_row_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_row_info *ri = &ai->ri;
+	int col, mas;
+  
+	ri->free_rows = 16;
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		ri->avail[mas] = 1;
+		for (col = 1; col < UWB_NUM_ZONES; col++) {
+			if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) {
+				ri->free_rows--;
+				ri->avail[mas]=0;
+				break;
+			}
+		}
+	}
+}
+
+static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci)
+{
+	int mas;
+	int block_count = 0, start_block = 0; 
+	int previous_avail = 0;
+	int available = 0;
+	int safe_mas_in_row[UWB_MAS_PER_ZONE] = {
+		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
+	};
+
+	rci->max_avail_safe = 0;
+
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
+		if (!bm[column * UWB_NUM_ZONES + mas]) {
+			available++;
+			rci->max_avail_unsafe = available;
+
+			rci->highest_mas[available] = mas;
+
+			if (previous_avail) {
+				block_count++;
+				if ((block_count > safe_mas_in_row[start_block]) &&
+				    (!rci->max_avail_safe))
+					rci->max_avail_safe = available - 1;
+			} else {
+				previous_avail = 1;
+				start_block = mas;
+				block_count = 1;
+			}
+		} else {
+			previous_avail = 0;
+		}
+	}
+	if (!rci->max_avail_safe)
+		rci->max_avail_safe = rci->max_avail_unsafe;
+}
+
+static void get_column_descriptors(struct uwb_rsv_alloc_info *ai)
+{
+	unsigned char *bm = ai->bm;
+	struct uwb_rsv_col_info *ci = ai->ci;
+	int col;
+
+	for (col = 1; col < UWB_NUM_ZONES; col++) {
+		uwb_rsv_fill_column_info(bm, col, &ci[col]);
+	}
+}
+
+static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai)
+{
+	int n_rows;
+	int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW;
+	int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW;
+	if (ai->min_mas % UWB_USABLE_MAS_PER_ROW)
+		min_rows++;
+	for (n_rows = max_rows; n_rows >= min_rows; n_rows--) {
+		if (n_rows <= ai->ri.free_rows) {
+			ai->ri.used_rows = n_rows;
+			ai->interval = 1; /* row reservation */
+			uwb_rsv_fill_row_alloc(ai);
+			return UWB_RSV_ALLOC_FOUND;
+		}
+	}  
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval)
+{
+	int n_safe, n_unsafe, n_mas;  
+	int n_column = UWB_NUM_ZONES / interval;
+	int max_per_zone = ai->max_mas / n_column;
+	int min_per_zone = ai->min_mas / n_column;
+
+	if (ai->min_mas % n_column)
+		min_per_zone++;
+
+	if (min_per_zone > UWB_MAS_PER_ZONE) {
+		return UWB_RSV_ALLOC_NOT_FOUND;
+	}
+    
+	if (max_per_zone > UWB_MAS_PER_ZONE) {
+		max_per_zone = UWB_MAS_PER_ZONE;
+	}
+    
+	for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) {
+		if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND)
+			continue;
+		for (n_safe = n_mas; n_safe >= 0; n_safe--) {
+			n_unsafe = n_mas - n_safe;
+			if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) {
+				uwb_rsv_fill_column_alloc(ai);
+				return UWB_RSV_ALLOC_FOUND;
+			}
+		}
+	}
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
+				 struct uwb_mas_bm *result)
+{
+	struct uwb_rsv_alloc_info *ai;
+	int interval;
+	int bit_index;
+
+	ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL);
+	if (!ai)
+		return UWB_RSV_ALLOC_NOT_FOUND;
+	ai->min_mas = rsv->min_mas;
+	ai->max_mas = rsv->max_mas;
+	ai->max_interval = rsv->max_interval;
+
+
+	/* fill the not available vector from the available bm */
+	for_each_clear_bit(bit_index, available->bm, UWB_NUM_MAS)
+		ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL;
+
+	if (ai->max_interval == 1) {
+		get_row_descriptors(ai);
+		if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+		else
+			goto alloc_not_found;
+	}
+
+	get_column_descriptors(ai);
+        
+	for (interval = 16; interval >= 2; interval>>=1) {
+		if (interval > ai->max_interval)
+			continue;
+		if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND)
+			goto alloc_found;
+	}
+
+	/* try row reservation if no column is found */
+	get_row_descriptors(ai);
+	if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
+		goto alloc_found;
+	else
+		goto alloc_not_found;
+
+  alloc_found:
+	bitmap_zero(result->bm, UWB_NUM_MAS);
+	bitmap_zero(result->unsafe_bm, UWB_NUM_MAS);
+	/* fill the safe and unsafe bitmaps */
+	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
+		if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE)
+			set_bit(bit_index, result->bm);
+		else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE)
+			set_bit(bit_index, result->unsafe_bm);
+	}
+	bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS);
+
+	result->safe   = ai->safe_allocated_mases;
+	result->unsafe = ai->unsafe_allocated_mases;
+	
+	kfree(ai);		
+	return UWB_RSV_ALLOC_FOUND;
+  
+  alloc_not_found:
+	kfree(ai);
+	return UWB_RSV_ALLOC_NOT_FOUND;
+}
diff --git a/drivers/staging/uwb/beacon.c b/drivers/staging/uwb/beacon.c
new file mode 100644
index 000000000000..c483c19c5ef8
--- /dev/null
+++ b/drivers/staging/uwb/beacon.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Beacon management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/kdev_t.h>
+#include <linux/slab.h>
+
+#include "uwb-internal.h"
+
+/* Start Beaconing command structure */
+struct uwb_rc_cmd_start_beacon {
+	struct uwb_rccb rccb;
+	__le16 wBPSTOffset;
+	u8 bChannelNumber;
+} __attribute__((packed));
+
+
+static int uwb_rc_start_beacon(struct uwb_rc *rc, u16 bpst_offset, u8 channel)
+{
+	int result;
+	struct uwb_rc_cmd_start_beacon *cmd;
+	struct uwb_rc_evt_confirm reply;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
+	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_START_BEACON);
+	cmd->wBPSTOffset = cpu_to_le16(bpst_offset);
+	cmd->bChannelNumber = channel;
+	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply.rceb.wEvent = UWB_RC_CMD_START_BEACON;
+	result = uwb_rc_cmd(rc, "START-BEACON", &cmd->rccb, sizeof(*cmd),
+			    &reply.rceb, sizeof(reply));
+	if (result < 0)
+		goto error_cmd;
+	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(&rc->uwb_dev.dev,
+			"START-BEACON: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
+		result = -EIO;
+	}
+error_cmd:
+	kfree(cmd);
+	return result;
+}
+
+static int uwb_rc_stop_beacon(struct uwb_rc *rc)
+{
+	int result;
+	struct uwb_rccb *cmd;
+	struct uwb_rc_evt_confirm reply;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+	cmd->bCommandType = UWB_RC_CET_GENERAL;
+	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_STOP_BEACON);
+	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply.rceb.wEvent = UWB_RC_CMD_STOP_BEACON;
+	result = uwb_rc_cmd(rc, "STOP-BEACON", cmd, sizeof(*cmd),
+			    &reply.rceb, sizeof(reply));
+	if (result < 0)
+		goto error_cmd;
+	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(&rc->uwb_dev.dev,
+			"STOP-BEACON: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
+		result = -EIO;
+	}
+error_cmd:
+	kfree(cmd);
+	return result;
+}
+
+/*
+ * Start/stop beacons
+ *
+ * @rc:          UWB Radio Controller to operate on
+ * @channel:     UWB channel on which to beacon (WUSB[table
+ *               5-12]). If -1, stop beaconing.
+ * @bpst_offset: Beacon Period Start Time offset; FIXME-do zero
+ *
+ * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
+ * of a SET IE command after the device sent the first beacon that includes
+ * the IEs specified in the SET IE command. So, after we start beaconing we
+ * check if there is anything in the IE cache and call the SET IE command
+ * if needed.
+ */
+int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
+{
+	int result;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	dev_dbg(dev, "%s: channel = %d\n", __func__, channel);
+	if (channel < 0)
+		channel = -1;
+	if (channel == -1)
+		result = uwb_rc_stop_beacon(rc);
+	else {
+		/* channel >= 0...dah */
+		result = uwb_rc_start_beacon(rc, bpst_offset, channel);
+		if (result < 0) {
+			dev_err(dev, "Cannot start beaconing: %d\n", result);
+			return result;
+		}
+		if (le16_to_cpu(rc->ies->wIELength) > 0) {
+			result = uwb_rc_set_ie(rc, rc->ies);
+			if (result < 0) {
+				dev_err(dev, "Cannot set new IE on device: "
+					"%d\n", result);
+				result = uwb_rc_stop_beacon(rc);
+				channel = -1;
+				bpst_offset = 0;
+			}
+		}
+	}
+
+	if (result >= 0)
+		rc->beaconing = channel;
+	return result;
+}
+
+/*
+ * Beacon cache
+ *
+ * The purpose of this is to speed up the lookup of becon information
+ * when a new beacon arrives. The UWB Daemon uses it also to keep a
+ * tab of which devices are in radio distance and which not. When a
+ * device's beacon stays present for more than a certain amount of
+ * time, it is considered a new, usable device. When a beacon ceases
+ * to be received for a certain amount of time, it is considered that
+ * the device is gone.
+ *
+ * FIXME: use an allocator for the entries
+ * FIXME: use something faster for search than a list
+ */
+
+void uwb_bce_kfree(struct kref *_bce)
+{
+	struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt);
+
+	kfree(bce->be);
+	kfree(bce);
+}
+
+
+/* Find a beacon by dev addr in the cache */
+static
+struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
+					 const struct uwb_dev_addr *dev_addr)
+{
+	struct uwb_beca_e *bce, *next;
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
+		if (!memcmp(&bce->dev_addr, dev_addr, sizeof(bce->dev_addr)))
+			goto out;
+	}
+	bce = NULL;
+out:
+	return bce;
+}
+
+/* Find a beacon by dev addr in the cache */
+static
+struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc,
+					 const struct uwb_mac_addr *mac_addr)
+{
+	struct uwb_beca_e *bce, *next;
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
+		if (!memcmp(bce->mac_addr, mac_addr->data,
+			    sizeof(struct uwb_mac_addr)))
+			goto out;
+	}
+	bce = NULL;
+out:
+	return bce;
+}
+
+/**
+ * uwb_dev_get_by_devaddr - get a UWB device with a specific DevAddr
+ * @rc:      the radio controller that saw the device
+ * @devaddr: DevAddr of the UWB device to find
+ *
+ * There may be more than one matching device (in the case of a
+ * DevAddr conflict), but only the first one is returned.
+ */
+struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
+				       const struct uwb_dev_addr *devaddr)
+{
+	struct uwb_dev *found = NULL;
+	struct uwb_beca_e *bce;
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bydev(rc, devaddr);
+	if (bce)
+		found = uwb_dev_try_get(rc, bce->uwb_dev);
+	mutex_unlock(&rc->uwb_beca.mutex);
+
+	return found;
+}
+
+/**
+ * uwb_dev_get_by_macaddr - get a UWB device with a specific EUI-48
+ * @rc:      the radio controller that saw the device
+ * @devaddr: EUI-48 of the UWB device to find
+ */
+struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
+				       const struct uwb_mac_addr *macaddr)
+{
+	struct uwb_dev *found = NULL;
+	struct uwb_beca_e *bce;
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, macaddr);
+	if (bce)
+		found = uwb_dev_try_get(rc, bce->uwb_dev);
+	mutex_unlock(&rc->uwb_beca.mutex);
+
+	return found;
+}
+
+/* Initialize a beacon cache entry */
+static void uwb_beca_e_init(struct uwb_beca_e *bce)
+{
+	mutex_init(&bce->mutex);
+	kref_init(&bce->refcnt);
+	stats_init(&bce->lqe_stats);
+	stats_init(&bce->rssi_stats);
+}
+
+/*
+ * Add a beacon to the cache
+ *
+ * @be:         Beacon event information
+ * @bf:         Beacon frame (part of b, really)
+ * @ts_jiffies: Timestamp (in jiffies) when the beacon was received
+ */
+static
+struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc,
+				  struct uwb_rc_evt_beacon *be,
+				  struct uwb_beacon_frame *bf,
+				  unsigned long ts_jiffies)
+{
+	struct uwb_beca_e *bce;
+
+	bce = kzalloc(sizeof(*bce), GFP_KERNEL);
+	if (bce == NULL)
+		return NULL;
+	uwb_beca_e_init(bce);
+	bce->ts_jiffies = ts_jiffies;
+	bce->uwb_dev = NULL;
+	list_add(&bce->node, &rc->uwb_beca.list);
+	return bce;
+}
+
+/*
+ * Wipe out beacon entries that became stale
+ *
+ * Remove associated devicest too.
+ */
+void uwb_beca_purge(struct uwb_rc *rc)
+{
+	struct uwb_beca_e *bce, *next;
+	unsigned long expires;
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
+		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
+		if (time_after(jiffies, expires)) {
+			uwbd_dev_offair(bce);
+		}
+	}
+	mutex_unlock(&rc->uwb_beca.mutex);
+}
+
+/* Clean up the whole beacon cache. Called on shutdown */
+void uwb_beca_release(struct uwb_rc *rc)
+{
+	struct uwb_beca_e *bce, *next;
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
+		list_del(&bce->node);
+		uwb_bce_put(bce);
+	}
+	mutex_unlock(&rc->uwb_beca.mutex);
+}
+
+static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be,
+			     struct uwb_beacon_frame *bf)
+{
+	char macbuf[UWB_ADDR_STRSIZE];
+	char devbuf[UWB_ADDR_STRSIZE];
+	char dstbuf[UWB_ADDR_STRSIZE];
+
+	uwb_mac_addr_print(macbuf, sizeof(macbuf), &bf->Device_Identifier);
+	uwb_dev_addr_print(devbuf, sizeof(devbuf), &bf->hdr.SrcAddr);
+	uwb_dev_addr_print(dstbuf, sizeof(dstbuf), &bf->hdr.DestAddr);
+	dev_info(&rc->uwb_dev.dev,
+		 "BEACON from %s to %s (ch%u offset %u slot %u MAC %s)\n",
+		 devbuf, dstbuf, be->bChannelNumber, be->wBPSTOffset,
+		 bf->Beacon_Slot_Number, macbuf);
+}
+
+/*
+ * @bce: beacon cache entry, referenced
+ */
+ssize_t uwb_bce_print_IEs(struct uwb_dev *uwb_dev, struct uwb_beca_e *bce,
+			  char *buf, size_t size)
+{
+	ssize_t result = 0;
+	struct uwb_rc_evt_beacon *be;
+	struct uwb_beacon_frame *bf;
+	int ies_len;
+	struct uwb_ie_hdr *ies;
+
+	mutex_lock(&bce->mutex);
+
+	be = bce->be;
+	if (be) {
+		bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
+		ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame);
+		ies = (struct uwb_ie_hdr *)bf->IEData;
+
+		result = uwb_ie_dump_hex(ies, ies_len, buf, size);
+	}
+
+	mutex_unlock(&bce->mutex);
+
+	return result;
+}
+
+/*
+ * Verify that the beacon event, frame and IEs are ok
+ */
+static int uwb_verify_beacon(struct uwb_rc *rc, struct uwb_event *evt,
+			     struct uwb_rc_evt_beacon *be)
+{
+	int result = -EINVAL;
+	struct uwb_beacon_frame *bf;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	/* Is there enough data to decode a beacon frame? */
+	if (evt->notif.size < sizeof(*be) + sizeof(*bf)) {
+		dev_err(dev, "BEACON event: Not enough data to decode "
+			"(%zu vs %zu bytes needed)\n", evt->notif.size,
+			sizeof(*be) + sizeof(*bf));
+		goto error;
+	}
+	/* FIXME: make sure beacon frame IEs are fine and that the whole thing
+	 * is consistent */
+	result = 0;
+error:
+	return result;
+}
+
+/*
+ * Handle UWB_RC_EVT_BEACON events
+ *
+ * We check the beacon cache to see how the received beacon fares. If
+ * is there already we refresh the timestamp. If not we create a new
+ * entry.
+ *
+ * According to the WHCI and WUSB specs, only one beacon frame is
+ * allowed per notification block, so we don't bother about scanning
+ * for more.
+ */
+int uwbd_evt_handle_rc_beacon(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct uwb_rc *rc;
+	struct uwb_rc_evt_beacon *be;
+	struct uwb_beacon_frame *bf;
+	struct uwb_beca_e *bce;
+
+	rc = evt->rc;
+	be = container_of(evt->notif.rceb, struct uwb_rc_evt_beacon, rceb);
+	result = uwb_verify_beacon(rc, evt, be);
+	if (result < 0)
+		return result;
+
+	/* FIXME: handle alien beacons. */
+	if (be->bBeaconType == UWB_RC_BEACON_TYPE_OL_ALIEN ||
+	    be->bBeaconType == UWB_RC_BEACON_TYPE_NOL_ALIEN) {
+		return -ENOSYS;
+	}
+
+	bf = (struct uwb_beacon_frame *) be->BeaconInfo;
+
+	/*
+	 * Drop beacons from devices with a NULL EUI-48 -- they cannot
+	 * be uniquely identified.
+	 *
+	 * It's expected that these will all be WUSB devices and they
+	 * have a WUSB specific connection method so ignoring them
+	 * here shouldn't be a problem.
+	 */
+	if (uwb_mac_addr_bcast(&bf->Device_Identifier))
+		return 0;
+
+	mutex_lock(&rc->uwb_beca.mutex);
+	bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier);
+	if (bce == NULL) {
+		/* Not in there, a new device is pinging */
+		uwb_beacon_print(evt->rc, be, bf);
+		bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies);
+		if (bce == NULL) {
+			mutex_unlock(&rc->uwb_beca.mutex);
+			return -ENOMEM;
+		}
+	}
+	mutex_unlock(&rc->uwb_beca.mutex);
+
+	mutex_lock(&bce->mutex);
+	/* purge old beacon data */
+	kfree(bce->be);
+
+	/* Update commonly used fields */
+	bce->ts_jiffies = evt->ts_jiffies;
+	bce->be = be;
+	bce->dev_addr = bf->hdr.SrcAddr;
+	bce->mac_addr = &bf->Device_Identifier;
+	be->wBPSTOffset = le16_to_cpu(be->wBPSTOffset);
+	be->wBeaconInfoLength = le16_to_cpu(be->wBeaconInfoLength);
+	stats_add_sample(&bce->lqe_stats, be->bLQI - 7);
+	stats_add_sample(&bce->rssi_stats, be->bRSSI + 18);
+
+	/*
+	 * This might be a beacon from a new device.
+	 */
+	if (bce->uwb_dev == NULL)
+		uwbd_dev_onair(evt->rc, bce);
+
+	mutex_unlock(&bce->mutex);
+
+	return 1; /* we keep the event data */
+}
+
+/*
+ * Handle UWB_RC_EVT_BEACON_SIZE events
+ *
+ * XXXXX
+ */
+int uwbd_evt_handle_rc_beacon_size(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_beacon_size *bs;
+
+	/* Is there enough data to decode the event? */
+	if (evt->notif.size < sizeof(*bs)) {
+		dev_err(dev, "BEACON SIZE notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*bs));
+		goto error;
+	}
+	bs = container_of(evt->notif.rceb, struct uwb_rc_evt_beacon_size, rceb);
+	if (0)
+		dev_info(dev, "Beacon size changed to %u bytes "
+			"(FIXME: action?)\n", le16_to_cpu(bs->wNewBeaconSize));
+	else {
+		/* temporary hack until we do something with this message... */
+		static unsigned count;
+		if (++count % 1000 == 0)
+			dev_info(dev, "Beacon size changed %u times "
+				"(FIXME: action?)\n", count);
+	}
+	result = 0;
+error:
+	return result;
+}
+
+/**
+ * uwbd_evt_handle_rc_bp_slot_change - handle a BP_SLOT_CHANGE event
+ * @evt: the BP_SLOT_CHANGE notification from the radio controller
+ *
+ * If the event indicates that no beacon period slots were available
+ * then radio controller has transitioned to a non-beaconing state.
+ * Otherwise, simply save the current beacon slot.
+ */
+int uwbd_evt_handle_rc_bp_slot_change(struct uwb_event *evt)
+{
+	struct uwb_rc *rc = evt->rc;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_evt_bp_slot_change *bpsc;
+
+	if (evt->notif.size < sizeof(*bpsc)) {
+		dev_err(dev, "BP SLOT CHANGE event: Not enough data\n");
+		return -EINVAL;
+	}
+	bpsc = container_of(evt->notif.rceb, struct uwb_rc_evt_bp_slot_change, rceb);
+
+	if (uwb_rc_evt_bp_slot_change_no_slot(bpsc)) {
+		dev_err(dev, "stopped beaconing: No free slots in BP\n");
+		mutex_lock(&rc->uwb_dev.mutex);
+		rc->beaconing = -1;
+		mutex_unlock(&rc->uwb_dev.mutex);
+	} else
+		rc->uwb_dev.beacon_slot = uwb_rc_evt_bp_slot_change_slot_num(bpsc);
+
+	return 0;
+}
+
+/**
+ * Handle UWB_RC_EVT_BPOIE_CHANGE events
+ *
+ * XXXXX
+ */
+struct uwb_ie_bpo {
+	struct uwb_ie_hdr hdr;
+	u8                bp_length;
+	u8                data[];
+} __attribute__((packed));
+
+int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_bpoie_change *bpoiec;
+	struct uwb_ie_bpo *bpoie;
+	static unsigned count;	/* FIXME: this is a temp hack */
+	size_t iesize;
+
+	/* Is there enough data to decode it? */
+	if (evt->notif.size < sizeof(*bpoiec)) {
+		dev_err(dev, "BPOIEC notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*bpoiec));
+		goto error;
+	}
+	bpoiec = container_of(evt->notif.rceb, struct uwb_rc_evt_bpoie_change, rceb);
+	iesize = le16_to_cpu(bpoiec->wBPOIELength);
+	if (iesize < sizeof(*bpoie)) {
+		dev_err(dev, "BPOIEC notification: Not enough IE data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			iesize, sizeof(*bpoie));
+		goto error;
+	}
+	if (++count % 1000 == 0)	/* Lame placeholder */
+		dev_info(dev, "BPOIE: %u changes received\n", count);
+	/*
+	 * FIXME: At this point we should go over all the IEs in the
+	 *        bpoiec->BPOIE array and act on each.
+	 */
+	result = 0;
+error:
+	return result;
+}
+
+/*
+ * Print beaconing state.
+ */
+static ssize_t uwb_rc_beacon_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	ssize_t result;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	result = sprintf(buf, "%d\n", rc->beaconing);
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return result;
+}
+
+/*
+ * Start beaconing on the specified channel, or stop beaconing.
+ */
+static ssize_t uwb_rc_beacon_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	int channel;
+	ssize_t result = -EINVAL;
+
+	result = sscanf(buf, "%d", &channel);
+	if (result >= 1)
+		result = uwb_radio_force_channel(rc, channel);
+
+	return result < 0 ? result : size;
+}
+DEVICE_ATTR(beacon, S_IRUGO | S_IWUSR, uwb_rc_beacon_show, uwb_rc_beacon_store);
diff --git a/drivers/staging/uwb/driver.c b/drivers/staging/uwb/driver.c
new file mode 100644
index 000000000000..5755c2e49ffc
--- /dev/null
+++ b/drivers/staging/uwb/driver.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Driver initialization, etc
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ *
+ * Life cycle: FIXME: explain
+ *
+ *  UWB radio controller:
+ *
+ *    1. alloc a uwb_rc, zero it
+ *    2. call uwb_rc_init() on it to set it up + ops (won't do any
+ *       kind of allocation)
+ *    3. register (now it is owned by the UWB stack--deregister before
+ *       freeing/destroying).
+ *    4. It lives on it's own now (UWB stack handles)--when it
+ *       disconnects, call unregister()
+ *    5. free it.
+ *
+ *    Make sure you have a reference to the uwb_rc before calling
+ *    any of the UWB API functions.
+ *
+ * TODO:
+ *
+ * 1. Locking and life cycle management is crappy still. All entry
+ *    points to the UWB HCD API assume you have a reference on the
+ *    uwb_rc structure and that it won't go away. They mutex lock it
+ *    before doing anything.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/kdev_t.h>
+#include <linux/random.h>
+
+#include "uwb-internal.h"
+
+
+/* UWB stack attributes (or 'global' constants) */
+
+
+/**
+ * If a beacon disappears for longer than this, then we consider the
+ * device who was represented by that beacon to be gone.
+ *
+ * ECMA-368[17.2.3, last para] establishes that a device must not
+ * consider a device to be its neighbour if he doesn't receive a beacon
+ * for more than mMaxLostBeacons. mMaxLostBeacons is defined in
+ * ECMA-368[17.16] as 3; because we can get only one beacon per
+ * superframe, that'd be 3 * 65ms = 195 ~ 200 ms. Let's give it time
+ * for jitter and stuff and make it 500 ms.
+ */
+unsigned long beacon_timeout_ms = 500;
+
+static
+ssize_t beacon_timeout_ms_show(struct class *class,
+				struct class_attribute *attr,
+				char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "%lu\n", beacon_timeout_ms);
+}
+
+static
+ssize_t beacon_timeout_ms_store(struct class *class,
+				struct class_attribute *attr,
+				const char *buf, size_t size)
+{
+	unsigned long bt;
+	ssize_t result;
+	result = sscanf(buf, "%lu", &bt);
+	if (result != 1)
+		return -EINVAL;
+	beacon_timeout_ms = bt;
+	return size;
+}
+static CLASS_ATTR_RW(beacon_timeout_ms);
+
+static struct attribute *uwb_class_attrs[] = {
+	&class_attr_beacon_timeout_ms.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(uwb_class);
+
+/** Device model classes */
+struct class uwb_rc_class = {
+	.name        = "uwb_rc",
+	.class_groups = uwb_class_groups,
+};
+
+
+static int __init uwb_subsys_init(void)
+{
+	int result = 0;
+
+	result = uwb_est_create();
+	if (result < 0) {
+		printk(KERN_ERR "uwb: Can't initialize EST subsystem\n");
+		goto error_est_init;
+	}
+
+	result = class_register(&uwb_rc_class);
+	if (result < 0)
+		goto error_uwb_rc_class_register;
+
+	/* Register the UWB bus */
+	result = bus_register(&uwb_bus_type);
+	if (result) {
+		pr_err("%s - registering bus driver failed\n", __func__);
+		goto exit_bus;
+	}
+
+	uwb_dbg_init();
+	return 0;
+
+exit_bus:
+	class_unregister(&uwb_rc_class);
+error_uwb_rc_class_register:
+	uwb_est_destroy();
+error_est_init:
+	return result;
+}
+module_init(uwb_subsys_init);
+
+static void __exit uwb_subsys_exit(void)
+{
+	uwb_dbg_exit();
+	bus_unregister(&uwb_bus_type);
+	class_unregister(&uwb_rc_class);
+	uwb_est_destroy();
+	return;
+}
+module_exit(uwb_subsys_exit);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Ultra Wide Band core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/uwb/drp-avail.c b/drivers/staging/uwb/drp-avail.c
new file mode 100644
index 000000000000..02392ab82a7d
--- /dev/null
+++ b/drivers/staging/uwb/drp-avail.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * DRP availability management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Reinette Chatre <reinette.chatre@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * Manage DRP Availability (the MAS available for DRP
+ * reservations). Thus:
+ *
+ * - Handle DRP Availability Change notifications
+ *
+ * - Allow the reservation manager to indicate MAS reserved/released
+ *   by local (owned by/targeted at the radio controller)
+ *   reservations.
+ *
+ * - Based on the two sources above, generate a DRP Availability IE to
+ *   be included in the beacon.
+ *
+ * See also the documentation for struct uwb_drp_avail.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitmap.h>
+#include "uwb-internal.h"
+
+/**
+ * uwb_drp_avail_init - initialize an RC's MAS availability
+ *
+ * All MAS are available initially.  The RC will inform use which
+ * slots are used for the BP (it may change in size).
+ */
+void uwb_drp_avail_init(struct uwb_rc *rc)
+{
+	bitmap_fill(rc->drp_avail.global, UWB_NUM_MAS);
+	bitmap_fill(rc->drp_avail.local, UWB_NUM_MAS);
+	bitmap_fill(rc->drp_avail.pending, UWB_NUM_MAS);
+}
+
+/*
+ * Determine MAS available for new local reservations.
+ *
+ * avail = global & local & pending
+ */
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
+{
+	bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
+	bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS);
+}
+
+/**
+ * uwb_drp_avail_reserve_pending - reserve MAS for a new reservation
+ * @rc: the radio controller
+ * @mas: the MAS to reserve
+ *
+ * Returns 0 on success, or -EBUSY if the MAS requested aren't available.
+ */
+int uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas)
+{
+	struct uwb_mas_bm avail;
+
+	uwb_drp_available(rc, &avail);
+	if (!bitmap_subset(mas->bm, avail.bm, UWB_NUM_MAS))
+		return -EBUSY;
+
+	bitmap_andnot(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
+	return 0;
+}
+
+/**
+ * uwb_drp_avail_reserve - reserve MAS for an established reservation
+ * @rc: the radio controller
+ * @mas: the MAS to reserve
+ */
+void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas)
+{
+	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
+	bitmap_andnot(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
+	rc->drp_avail.ie_valid = false;
+}
+
+/**
+ * uwb_drp_avail_release - release MAS from a pending or established reservation
+ * @rc: the radio controller
+ * @mas: the MAS to release
+ */
+void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas)
+{
+	bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
+	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
+	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
+}
+
+/**
+ * uwb_drp_avail_ie_update - update the DRP Availability IE
+ * @rc: the radio controller
+ *
+ * avail = global & local
+ */
+void uwb_drp_avail_ie_update(struct uwb_rc *rc)
+{
+	struct uwb_mas_bm avail;
+
+	bitmap_and(avail.bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
+
+	rc->drp_avail.ie.hdr.element_id = UWB_IE_DRP_AVAILABILITY;
+	rc->drp_avail.ie.hdr.length = UWB_NUM_MAS / 8;
+	uwb_mas_bm_copy_le(rc->drp_avail.ie.bmp, &avail);
+	rc->drp_avail.ie_valid = true;
+}
+
+/**
+ * Create an unsigned long from a buffer containing a byte stream.
+ *
+ * @array: pointer to buffer
+ * @itr:   index of buffer from where we start
+ * @len:   the buffer's remaining size may not be exact multiple of
+ *         sizeof(unsigned long), @len is the length of buffer that needs
+ *         to be converted. This will be sizeof(unsigned long) or smaller
+ *         (BUG if not). If it is smaller then we will pad the remaining
+ *         space of the result with zeroes.
+ */
+static
+unsigned long get_val(u8 *array, size_t itr, size_t len)
+{
+	unsigned long val = 0;
+	size_t top = itr + len;
+
+	BUG_ON(len > sizeof(val));
+
+	while (itr < top) {
+		val <<= 8;
+		val |= array[top - 1];
+		top--;
+	}
+	val <<= 8 * (sizeof(val) - len); /* padding */
+	return val;
+}
+
+/**
+ * Initialize bitmap from data buffer.
+ *
+ * The bitmap to be converted could come from a IE, for example a
+ * DRP Availability IE.
+ * From ECMA-368 1.0 [16.8.7]: "
+ * octets: 1            1               N * (0 to 32)
+ *         Element ID   Length (=N)     DRP Availability Bitmap
+ *
+ * The DRP Availability Bitmap field is up to 256 bits long, one
+ * bit for each MAS in the superframe, where the least-significant
+ * bit of the field corresponds to the first MAS in the superframe
+ * and successive bits correspond to successive MASs."
+ *
+ * The DRP Availability bitmap is in octets from 0 to 32, so octet
+ * 32 contains bits for MAS 1-8, etc. If the bitmap is smaller than 32
+ * octets, the bits in octets not included at the end of the bitmap are
+ * treated as zero. In this case (when the bitmap is smaller than 32
+ * octets) the MAS represented range from MAS 1 to MAS (size of bitmap)
+ * with the last octet still containing bits for MAS 1-8, etc.
+ *
+ * For example:
+ * F00F0102 03040506 0708090A 0B0C0D0E 0F010203
+ * ^^^^
+ * ||||
+ * ||||
+ * |||\LSB of byte is MAS 9
+ * ||\MSB of byte is MAS 16
+ * |\LSB of first byte is MAS 1
+ * \ MSB of byte is MAS 8
+ *
+ * An example of this encoding can be found in ECMA-368 Annex-D [Table D.11]
+ *
+ * The resulting bitmap will have the following mapping:
+ *	bit position 0 == MAS 1
+ *	bit position 1 == MAS 2
+ *	...
+ *	bit position (UWB_NUM_MAS - 1) == MAS UWB_NUM_MAS
+ *
+ * @bmp_itr:	pointer to bitmap (can be declared with DECLARE_BITMAP)
+ * @buffer:	pointer to buffer containing bitmap data in big endian
+ *              format (MSB first)
+ * @buffer_size:number of bytes with which bitmap should be initialized
+ */
+static
+void buffer_to_bmp(unsigned long *bmp_itr, void *_buffer,
+		   size_t buffer_size)
+{
+	u8 *buffer = _buffer;
+	size_t itr, len;
+	unsigned long val;
+
+	itr = 0;
+	while (itr < buffer_size) {
+		len = buffer_size - itr >= sizeof(val) ?
+			sizeof(val) : buffer_size - itr;
+		val = get_val(buffer, itr, len);
+		bmp_itr[itr / sizeof(val)] = val;
+		itr += sizeof(val);
+	}
+}
+
+
+/**
+ * Extract DRP Availability bitmap from the notification.
+ *
+ * The notification that comes in contains a bitmap of (UWB_NUM_MAS / 8) bytes
+ * We convert that to our internal representation.
+ */
+static
+int uwbd_evt_get_drp_avail(struct uwb_event *evt, unsigned long *bmp)
+{
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_drp_avail *drp_evt;
+	int result = -EINVAL;
+
+	/* Is there enough data to decode the event? */
+	if (evt->notif.size < sizeof(*drp_evt)) {
+		dev_err(dev, "DRP Availability Change: Not enough "
+			"data to decode event [%zu bytes, %zu "
+			"needed]\n", evt->notif.size, sizeof(*drp_evt));
+		goto error;
+	}
+	drp_evt = container_of(evt->notif.rceb, struct uwb_rc_evt_drp_avail, rceb);
+	buffer_to_bmp(bmp, drp_evt->bmp, UWB_NUM_MAS/8);
+	result = 0;
+error:
+	return result;
+}
+
+
+/**
+ * Process an incoming DRP Availability notification.
+ *
+ * @evt:	Event information (packs the actual event data, which
+ *              radio controller it came to, etc).
+ *
+ * @returns:    0 on success (so uwbd() frees the event buffer), < 0
+ *              on error.
+ *
+ * According to ECMA-368 1.0 [16.8.7], bits set to ONE indicate that
+ * the MAS slot is available, bits set to ZERO indicate that the slot
+ * is busy.
+ *
+ * So we clear available slots, we set used slots :)
+ *
+ * The notification only marks non-availability based on the BP and
+ * received DRP IEs that are not for this radio controller.  A copy of
+ * this bitmap is needed to generate the real availability (which
+ * includes local and pending reservations).
+ *
+ * The DRP Availability IE that this radio controller emits will need
+ * to be updated.
+ */
+int uwbd_evt_handle_rc_drp_avail(struct uwb_event *evt)
+{
+	int result;
+	struct uwb_rc *rc = evt->rc;
+	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
+
+	result = uwbd_evt_get_drp_avail(evt, bmp);
+	if (result < 0)
+		return result;
+
+	mutex_lock(&rc->rsvs_mutex);
+	bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS);
+	rc->drp_avail.ie_valid = false;
+	uwb_rsv_handle_drp_avail_change(rc);
+	mutex_unlock(&rc->rsvs_mutex);
+
+	uwb_rsv_sched_update(rc);
+
+	return 0;
+}
diff --git a/drivers/staging/uwb/drp-ie.c b/drivers/staging/uwb/drp-ie.c
new file mode 100644
index 000000000000..b2a862cf76de
--- /dev/null
+++ b/drivers/staging/uwb/drp-ie.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB DRP IE management.
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+
+#include "uwb.h"
+#include "uwb-internal.h"
+
+
+/*
+ * Return the reason code for a reservations's DRP IE.
+ */
+static int uwb_rsv_reason_code(struct uwb_rsv *rsv)
+{
+	static const int reason_codes[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_PENDING]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MODIFIED]           = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = UWB_DRP_REASON_MODIFIED,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_ACCEPTED]           = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_CONFLICT]           = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_PENDING]            = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_DENIED]             = UWB_DRP_REASON_DENIED,
+		[UWB_RSV_STATE_T_RESIZED]            = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return reason_codes[rsv->state];
+}
+
+/*
+ * Return the reason code for a reservations's companion DRP IE .
+ */
+static int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv)
+{
+	static const int companion_reason_codes[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
+	};
+
+	return companion_reason_codes[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's DRP IE.
+ */
+int uwb_rsv_status(struct uwb_rsv *rsv)
+{
+	static const int statuses[] = {
+		[UWB_RSV_STATE_O_INITIATED]          = 0,
+		[UWB_RSV_STATE_O_PENDING]            = 0,
+		[UWB_RSV_STATE_O_MODIFIED]           = 1,
+		[UWB_RSV_STATE_O_ESTABLISHED]        = 1,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]        = 0,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]     = 1,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]      = 1,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 1,
+		[UWB_RSV_STATE_T_ACCEPTED]           = 1,
+		[UWB_RSV_STATE_T_CONFLICT]           = 0,
+		[UWB_RSV_STATE_T_PENDING]            = 0,
+		[UWB_RSV_STATE_T_DENIED]             = 0,
+		[UWB_RSV_STATE_T_RESIZED]            = 1,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 1,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 1,
+
+	};
+
+	return statuses[rsv->state];
+}
+
+/*
+ * Return the status bit for a reservations's companion DRP IE .
+ */
+int uwb_rsv_companion_status(struct uwb_rsv *rsv)
+{
+	static const int companion_statuses[] = {
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 0,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 0,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 0,
+	};
+
+	return companion_statuses[rsv->state];
+}
+
+/*
+ * Allocate a DRP IE.
+ *
+ * To save having to free/allocate a DRP IE when its MAS changes,
+ * enough memory is allocated for the maxiumum number of DRP
+ * allocation fields.  This gives an overhead per reservation of up to
+ * (UWB_NUM_ZONES - 1) * 4 = 60 octets.
+ */
+static struct uwb_ie_drp *uwb_drp_ie_alloc(void)
+{
+	struct uwb_ie_drp *drp_ie;
+
+	drp_ie = kzalloc(struct_size(drp_ie, allocs, UWB_NUM_ZONES),
+			 GFP_KERNEL);
+	if (drp_ie)
+		drp_ie->hdr.element_id = UWB_IE_DRP;
+	return drp_ie;
+}
+
+
+/*
+ * Fill a DRP IE's allocation fields from a MAS bitmap.
+ */
+static void uwb_drp_ie_from_bm(struct uwb_ie_drp *drp_ie,
+			       struct uwb_mas_bm *mas)
+{
+	int z, i, num_fields = 0, next = 0;
+	struct uwb_drp_alloc *zones;
+	__le16 current_bmp;
+	DECLARE_BITMAP(tmp_bmp, UWB_NUM_MAS);
+	DECLARE_BITMAP(tmp_mas_bm, UWB_MAS_PER_ZONE);
+
+	zones = drp_ie->allocs;
+
+	bitmap_copy(tmp_bmp, mas->bm, UWB_NUM_MAS);
+
+	/* Determine unique MAS bitmaps in zones from bitmap. */
+	for (z = 0; z < UWB_NUM_ZONES; z++) {
+		bitmap_copy(tmp_mas_bm, tmp_bmp, UWB_MAS_PER_ZONE);
+		if (bitmap_weight(tmp_mas_bm, UWB_MAS_PER_ZONE) > 0) {
+			bool found = false;
+			current_bmp = (__le16) *tmp_mas_bm;
+			for (i = 0; i < next; i++) {
+				if (current_bmp == zones[i].mas_bm) {
+					zones[i].zone_bm |= 1 << z;
+					found = true;
+					break;
+				}
+			}
+			if (!found)  {
+				num_fields++;
+				zones[next].zone_bm = 1 << z;
+				zones[next].mas_bm = current_bmp;
+				next++;
+			}
+		}
+		bitmap_shift_right(tmp_bmp, tmp_bmp, UWB_MAS_PER_ZONE, UWB_NUM_MAS);
+	}
+
+	/* Store in format ready for transmission (le16). */
+	for (i = 0; i < num_fields; i++) {
+		drp_ie->allocs[i].zone_bm = cpu_to_le16(zones[i].zone_bm);
+		drp_ie->allocs[i].mas_bm = cpu_to_le16(zones[i].mas_bm);
+	}
+
+	drp_ie->hdr.length = sizeof(struct uwb_ie_drp) - sizeof(struct uwb_ie_hdr)
+		+ num_fields * sizeof(struct uwb_drp_alloc);
+}
+
+/**
+ * uwb_drp_ie_update - update a reservation's DRP IE
+ * @rsv: the reservation
+ */
+int uwb_drp_ie_update(struct uwb_rsv *rsv)
+{
+	struct uwb_ie_drp *drp_ie;
+	struct uwb_rsv_move *mv;
+	int unsafe;
+
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		kfree(rsv->drp_ie);
+		rsv->drp_ie = NULL;
+		return 0;
+	}
+
+	unsafe = rsv->mas.unsafe ? 1 : 0;
+
+	if (rsv->drp_ie == NULL) {
+		rsv->drp_ie = uwb_drp_ie_alloc();
+		if (rsv->drp_ie == NULL)
+			return -ENOMEM;
+	}
+	drp_ie = rsv->drp_ie;
+
+	uwb_ie_drp_set_unsafe(drp_ie,       unsafe);
+	uwb_ie_drp_set_tiebreaker(drp_ie,   rsv->tiebreaker);
+	uwb_ie_drp_set_owner(drp_ie,        uwb_rsv_is_owner(rsv));
+	uwb_ie_drp_set_status(drp_ie,       uwb_rsv_status(rsv));
+	uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_reason_code(rsv));
+	uwb_ie_drp_set_stream_index(drp_ie, rsv->stream);
+	uwb_ie_drp_set_type(drp_ie,         rsv->type);
+
+	if (uwb_rsv_is_owner(rsv)) {
+		switch (rsv->target.type) {
+		case UWB_RSV_TARGET_DEV:
+			drp_ie->dev_addr = rsv->target.dev->dev_addr;
+			break;
+		case UWB_RSV_TARGET_DEVADDR:
+			drp_ie->dev_addr = rsv->target.devaddr;
+			break;
+		}
+	} else
+		drp_ie->dev_addr = rsv->owner->dev_addr;
+
+	uwb_drp_ie_from_bm(drp_ie, &rsv->mas);
+
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv;
+		if (mv->companion_drp_ie == NULL) {
+			mv->companion_drp_ie = uwb_drp_ie_alloc();
+			if (mv->companion_drp_ie == NULL)
+				return -ENOMEM;
+		}
+		drp_ie = mv->companion_drp_ie;
+
+		/* keep all the same configuration of the main drp_ie */
+		memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp));
+
+
+		/* FIXME: handle properly the unsafe bit */
+		uwb_ie_drp_set_unsafe(drp_ie,       1);
+		uwb_ie_drp_set_status(drp_ie,       uwb_rsv_companion_status(rsv));
+		uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_companion_reason_code(rsv));
+
+		uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas);
+	}
+
+	rsv->ie_valid = true;
+	return 0;
+}
+
+/*
+ * Set MAS bits from given MAS bitmap in a single zone of large bitmap.
+ *
+ * We are given a zone id and the MAS bitmap of bits that need to be set in
+ * this zone. Note that this zone may already have bits set and this only
+ * adds settings - we cannot simply assign the MAS bitmap contents to the
+ * zone contents. We iterate over the the bits (MAS) in the zone and set the
+ * bits that are set in the given MAS bitmap.
+ */
+static
+void uwb_drp_ie_single_zone_to_bm(struct uwb_mas_bm *bm, u8 zone, u16 mas_bm)
+{
+	int mas;
+	u16 mas_mask;
+
+	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++) {
+		mas_mask = 1 << mas;
+		if (mas_bm & mas_mask)
+			set_bit(zone * UWB_NUM_ZONES + mas, bm->bm);
+	}
+}
+
+/**
+ * uwb_drp_ie_zones_to_bm - convert DRP allocation fields to a bitmap
+ * @mas:    MAS bitmap that will be populated to correspond to the
+ *          allocation fields in the DRP IE
+ * @drp_ie: the DRP IE that contains the allocation fields.
+ *
+ * The input format is an array of MAS allocation fields (16 bit Zone
+ * bitmap, 16 bit MAS bitmap) as described in [ECMA-368] section
+ * 16.8.6. The output is a full 256 bit MAS bitmap.
+ *
+ * We go over all the allocation fields, for each allocation field we
+ * know which zones are impacted. We iterate over all the zones
+ * impacted and call a function that will set the correct MAS bits in
+ * each zone.
+ */
+void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
+{
+	int numallocs = (drp_ie->hdr.length - 4) / 4;
+	const struct uwb_drp_alloc *alloc;
+	int cnt;
+	u16 zone_bm, mas_bm;
+	u8 zone;
+	u16 zone_mask;
+
+	bitmap_zero(bm->bm, UWB_NUM_MAS);
+
+	for (cnt = 0; cnt < numallocs; cnt++) {
+		alloc = &drp_ie->allocs[cnt];
+		zone_bm = le16_to_cpu(alloc->zone_bm);
+		mas_bm = le16_to_cpu(alloc->mas_bm);
+		for (zone = 0; zone < UWB_NUM_ZONES; zone++)   {
+			zone_mask = 1 << zone;
+			if (zone_bm & zone_mask)
+				uwb_drp_ie_single_zone_to_bm(bm, zone, mas_bm);
+		}
+	}
+}
+
diff --git a/drivers/staging/uwb/drp.c b/drivers/staging/uwb/drp.c
new file mode 100644
index 000000000000..869987bede7b
--- /dev/null
+++ b/drivers/staging/uwb/drp.c
@@ -0,0 +1,842 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Dynamic Reservation Protocol handling
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include "uwb-internal.h"
+
+
+/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */
+enum uwb_drp_conflict_action {
+	/* Reservation is maintained, no action needed */
+	UWB_DRP_CONFLICT_MANTAIN = 0,
+
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. If the device is the reservation
+	 * target, it shall also set the Reason Code in its DRP IE to
+	 * Conflict in its beacon in the following superframe.
+	 */
+	UWB_DRP_CONFLICT_ACT1,
+
+	/* the device shall not set the Reservation Status bit to ONE
+	 * and shall not transmit frames in conflicting MASs. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */
+	UWB_DRP_CONFLICT_ACT2,
+
+	/* the device shall not transmit frames in conflicting MASs in
+	 * the following superframe. It shall remove the conflicting
+	 * MASs from the reservation or set the Reservation Status to
+	 * ZERO in its beacon in the following superframe. If the
+	 * device is the reservation target, it shall also set the
+	 * Reason Code in its DRP IE to Conflict.
+	 */
+	UWB_DRP_CONFLICT_ACT3,
+};
+
+
+static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg,
+				    struct uwb_rceb *reply, ssize_t reply_size)
+{
+	struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply;
+	unsigned long flags;
+
+	if (r != NULL) {
+		if (r->bResultCode != UWB_RC_RES_SUCCESS)
+			dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n",
+				uwb_rc_strerror(r->bResultCode), r->bResultCode);
+	} else
+		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n");
+
+	spin_lock_irqsave(&rc->rsvs_lock, flags);
+	if (rc->set_drp_ie_pending > 1) {
+		rc->set_drp_ie_pending = 0;
+		uwb_rsv_queue_update(rc);
+	} else {
+		rc->set_drp_ie_pending = 0;
+	}
+	spin_unlock_irqrestore(&rc->rsvs_lock, flags);
+}
+
+/**
+ * Construct and send the SET DRP IE
+ *
+ * @rc:         UWB Host controller
+ * @returns:    >= 0 number of bytes still available in the beacon
+ *              < 0 errno code on error.
+ *
+ * See WUSB[8.6.2.7]: The host must set all the DRP IEs that it wants the
+ * device to include in its beacon at the same time. We thus have to
+ * traverse all reservations and include the DRP IEs of all PENDING
+ * and NEGOTIATED reservations in a SET DRP command for transmission.
+ *
+ * A DRP Availability IE is appended.
+ *
+ * rc->rsvs_mutex is held
+ *
+ * FIXME We currently ignore the returned value indicating the remaining space
+ * in beacon. This could be used to deny reservation requests earlier if
+ * determined that they would cause the beacon space to be exceeded.
+ */
+int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
+{
+	int result;
+	struct uwb_rc_cmd_set_drp_ie *cmd;
+	struct uwb_rsv *rsv;
+	struct uwb_rsv_move *mv;
+	int num_bytes = 0;
+	u8 *IEDataptr;
+
+	result = -ENOMEM;
+	/* First traverse all reservations to determine memory needed. */
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->drp_ie != NULL) {
+			num_bytes += rsv->drp_ie->hdr.length + 2;
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				num_bytes +=
+					mv->companion_drp_ie->hdr.length + 2;
+			}
+		}
+	}
+	num_bytes += sizeof(rc->drp_avail.ie);
+	cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL);
+	if (cmd == NULL)
+		goto error;
+	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
+	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_DRP_IE);
+	cmd->wIELength = num_bytes;
+	IEDataptr = (u8 *)&cmd->IEData[0];
+
+	/* FIXME: DRV avail IE is not always needed */
+	/* put DRP avail IE first */
+	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
+	IEDataptr += sizeof(struct uwb_ie_drp_avail);
+
+	/* Next traverse all reservations to place IEs in allocated memory. */
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->drp_ie != NULL) {
+			memcpy(IEDataptr, rsv->drp_ie,
+			       rsv->drp_ie->hdr.length + 2);
+			IEDataptr += rsv->drp_ie->hdr.length + 2;
+
+			if (uwb_rsv_has_two_drp_ies(rsv) &&
+				(rsv->mv.companion_drp_ie != NULL)) {
+				mv = &rsv->mv;
+				memcpy(IEDataptr, mv->companion_drp_ie,
+				       mv->companion_drp_ie->hdr.length + 2);
+				IEDataptr +=
+					mv->companion_drp_ie->hdr.length + 2;
+			}
+		}
+	}
+
+	result = uwb_rc_cmd_async(rc, "SET-DRP-IE",
+				&cmd->rccb, sizeof(*cmd) + num_bytes,
+				UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE,
+				uwb_rc_set_drp_cmd_done, NULL);
+
+	rc->set_drp_ie_pending = 1;
+
+	kfree(cmd);
+error:
+	return result;
+}
+
+/*
+ * Evaluate the action to perform using conflict resolution rules
+ *
+ * Return a uwb_drp_conflict_action.
+ */
+static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot,
+				    struct uwb_rsv *rsv, int our_status)
+{
+	int our_tie_breaker = rsv->tiebreaker;
+	int our_type        = rsv->type;
+	int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot;
+
+	int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie);
+	int ext_status      = uwb_ie_drp_status(ext_drp_ie);
+	int ext_type        = uwb_ie_drp_type(ext_drp_ie);
+
+
+	/* [ECMA-368 2nd Edition] 17.4.6 */
+	if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-1 */
+	if (our_type == UWB_DRP_TYPE_ALIEN_BP) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-2 */
+	if (ext_type == UWB_DRP_TYPE_ALIEN_BP) {
+		/* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */
+		return UWB_DRP_CONFLICT_ACT1;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-3 */
+	if (our_status == 0 && ext_status == 1) {
+		return UWB_DRP_CONFLICT_ACT2;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-4 */
+	if (our_status == 1 && ext_status == 0) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5a */
+	if (our_tie_breaker == ext_tie_breaker &&
+	    our_beacon_slot <  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	/* [ECMA-368 2nd Edition] 17.4.6-5b */
+	if (our_tie_breaker != ext_tie_breaker &&
+	    our_beacon_slot >  ext_beacon_slot) {
+		return UWB_DRP_CONFLICT_MANTAIN;
+	}
+
+	if (our_status == 0) {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-6a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		} else  {
+			/* [ECMA-368 2nd Edition] 17.4.6-6b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT2;
+			}
+		}
+	} else {
+		if (our_tie_breaker == ext_tie_breaker) {
+			/* [ECMA-368 2nd Edition] 17.4.6-7a */
+			if (our_beacon_slot > ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		} else {
+			/* [ECMA-368 2nd Edition] 17.4.6-7b */
+			if (our_beacon_slot < ext_beacon_slot) {
+				return UWB_DRP_CONFLICT_ACT3;
+			}
+		}
+	}
+	return UWB_DRP_CONFLICT_MANTAIN;
+}
+
+static void handle_conflict_normal(struct uwb_ie_drp *drp_ie,
+				   int ext_beacon_slot,
+				   struct uwb_rsv *rsv,
+				   struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	int action;
+
+	action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv));
+
+	if (uwb_rsv_is_owner(rsv)) {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+			/* try move */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED);
+			if (bow->can_reserve_extra_mases == false)
+				uwb_rsv_backoff_win_increment(rc);
+
+			break;
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_backoff_win_increment(rc);
+			/* drop some mases with reason modified */
+			/* put in the companion the mases to be dropped */
+			bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		default:
+			break;
+		}
+	} else {
+		switch(action) {
+		case UWB_DRP_CONFLICT_ACT2:
+		case UWB_DRP_CONFLICT_ACT3:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+		default:
+			break;
+		}
+
+	}
+
+}
+
+static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot,
+				      struct uwb_rsv *rsv, bool companion_only,
+				      struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int action;
+
+	if (companion_only) {
+		/* status of companion is 0 at this point */
+		action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0);
+		if (uwb_rsv_is_owner(rsv)) {
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv,
+						UWB_RSV_STATE_O_ESTABLISHED);
+				rsv->needs_release_companion_mas = false;
+				if (bow->can_reserve_extra_mases == false)
+					uwb_rsv_backoff_win_increment(rc);
+				uwb_drp_avail_release(rsv->rc,
+						&rsv->mv.companion_mas);
+			}
+		} else { /* rsv is target */
+			switch(action) {
+			case UWB_DRP_CONFLICT_ACT2:
+			case UWB_DRP_CONFLICT_ACT3:
+				uwb_rsv_set_state(rsv,
+					UWB_RSV_STATE_T_EXPANDING_CONFLICT);
+                                /* send_drp_avail_ie = true; */
+			}
+		}
+	} else { /* also base part of the reservation is conflicting */
+		if (uwb_rsv_is_owner(rsv)) {
+			uwb_rsv_backoff_win_increment(rc);
+			/* remove companion part */
+			uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+
+			/* drop some mases with reason modified */
+
+			/* put in the companion the mases to be dropped */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm,
+					conflicting_mas->bm, UWB_NUM_MAS);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		} else { /* it is a target rsv */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+                        /* send_drp_avail_ie = true; */
+		}
+	}
+}
+
+static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv,
+					struct uwb_rc_evt_drp *drp_evt,
+					struct uwb_ie_drp *drp_ie,
+					struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv_move *mv;
+
+	/* check if the conflicting reservation has two drp_ies */
+	if (uwb_rsv_has_two_drp_ies(rsv)) {
+		mv = &rsv->mv;
+		if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm,
+								UWB_NUM_MAS)) {
+			handle_conflict_expanding(drp_ie,
+						drp_evt->beacon_slot_number,
+						rsv, false, conflicting_mas);
+		} else {
+			if (bitmap_intersects(mv->companion_mas.bm,
+					conflicting_mas->bm, UWB_NUM_MAS)) {
+				handle_conflict_expanding(
+					drp_ie, drp_evt->beacon_slot_number,
+					rsv, true, conflicting_mas);
+			}
+		}
+	} else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm,
+							UWB_NUM_MAS)) {
+		handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number,
+					rsv, conflicting_mas);
+	}
+}
+
+static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc,
+					    struct uwb_rc_evt_drp *drp_evt,
+					    struct uwb_ie_drp *drp_ie,
+					    struct uwb_mas_bm *conflicting_mas)
+{
+	struct uwb_rsv *rsv;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie,
+							conflicting_mas);
+	}
+}
+
+static void uwb_drp_process_target_accepted(struct uwb_rc *rc,
+	struct uwb_rsv *rsv, struct uwb_rc_evt_drp *drp_evt,
+	struct uwb_ie_drp *drp_ie, struct uwb_mas_bm *mas)
+{
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int status;
+
+	status = uwb_ie_drp_status(drp_ie);
+
+	if (rsv->state == UWB_RSV_STATE_T_CONFLICT) {
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
+		return;
+	}
+
+	if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) {
+		/* drp_ie is companion */
+		if (!bitmap_equal(rsv->mas.bm, mas->bm, UWB_NUM_MAS)) {
+			/* stroke companion */
+			uwb_rsv_set_state(rsv,
+				UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+		}
+	} else {
+		if (!bitmap_equal(rsv->mas.bm, mas->bm, UWB_NUM_MAS)) {
+			if (uwb_drp_avail_reserve_pending(rc, mas) == -EBUSY) {
+				/* FIXME: there is a conflict, find
+				 * the conflicting reservations and
+				 * take a sensible action. Consider
+				 * that in drp_ie there is the
+				 * "neighbour" */
+				uwb_drp_handle_all_conflict_rsv(rc, drp_evt,
+						drp_ie, mas);
+			} else {
+				/* accept the extra reservation */
+				bitmap_copy(mv->companion_mas.bm, mas->bm,
+								UWB_NUM_MAS);
+				uwb_rsv_set_state(rsv,
+					UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+			}
+		} else {
+			if (status) {
+				uwb_rsv_set_state(rsv,
+						UWB_RSV_STATE_T_ACCEPTED);
+			}
+		}
+
+	}
+}
+
+/*
+ * Based on the DRP IE, transition a target reservation to a new
+ * state.
+ */
+static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
+		   struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv = &rsv->mv;
+	int status;
+	enum uwb_drp_reason reason_code;
+	struct uwb_mas_bm mas;
+
+	status = uwb_ie_drp_status(drp_ie);
+	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+
+	switch (reason_code) {
+	case UWB_DRP_REASON_ACCEPTED:
+		uwb_drp_process_target_accepted(rc, rsv, drp_evt, drp_ie, &mas);
+		break;
+
+	case UWB_DRP_REASON_MODIFIED:
+		/* check to see if we have already modified the reservation */
+		if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+			break;
+		}
+
+		/* find if the owner wants to expand or reduce */
+		if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
+			/* owner is reducing */
+			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm,
+				UWB_NUM_MAS);
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+		}
+
+		bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED);
+		break;
+	default:
+		dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+			 reason_code, status);
+	}
+}
+
+static void uwb_drp_process_owner_accepted(struct uwb_rsv *rsv,
+						struct uwb_mas_bm *mas)
+{
+	struct uwb_rsv_move *mv = &rsv->mv;
+
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_PENDING:
+	case UWB_RSV_STATE_O_INITIATED:
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+		break;
+	case UWB_RSV_STATE_O_MODIFIED:
+		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+		else
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
+		break;
+
+	case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */
+		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+		else
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		if (bitmap_equal(mas->bm, mv->companion_mas.bm, UWB_NUM_MAS)) {
+			/* Companion reservation accepted */
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		} else {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		else
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		break;
+	default:
+		break;
+	}
+}
+/*
+ * Based on the DRP IE, transition an owner reservation to a new
+ * state.
+ */
+static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
+				  struct uwb_dev *src, struct uwb_ie_drp *drp_ie,
+				  struct uwb_rc_evt_drp *drp_evt)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	int status;
+	enum uwb_drp_reason reason_code;
+	struct uwb_mas_bm mas;
+
+	status = uwb_ie_drp_status(drp_ie);
+	reason_code = uwb_ie_drp_reason_code(drp_ie);
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+
+	if (status) {
+		switch (reason_code) {
+		case UWB_DRP_REASON_ACCEPTED:
+			uwb_drp_process_owner_accepted(rsv, &mas);
+			break;
+		default:
+			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+				 reason_code, status);
+		}
+	} else {
+		switch (reason_code) {
+		case UWB_DRP_REASON_PENDING:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_PENDING);
+			break;
+		case UWB_DRP_REASON_DENIED:
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+			break;
+		case UWB_DRP_REASON_CONFLICT:
+			/* resolve the conflict */
+			bitmap_complement(mas.bm, src->last_availability_bm,
+					  UWB_NUM_MAS);
+			uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas);
+			break;
+		default:
+			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
+				 reason_code, status);
+		}
+	}
+}
+
+static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt)
+{
+	unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US;
+	mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us));
+}
+
+static void uwb_cnflt_update_work(struct work_struct *work)
+{
+	struct uwb_cnflt_alien *cnflt = container_of(work,
+						     struct uwb_cnflt_alien,
+						     cnflt_update_work);
+	struct uwb_cnflt_alien *c;
+	struct uwb_rc *rc = cnflt->rc;
+
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_del(&cnflt->rc_node);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+
+	list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) {
+		bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm,
+						c->mas.bm, UWB_NUM_MAS);
+	}
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work,
+					usecs_to_jiffies(delay_us));
+
+	kfree(cnflt);
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_cnflt_timer(struct timer_list *t)
+{
+	struct uwb_cnflt_alien *cnflt = from_timer(cnflt, t, timer);
+
+	queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work);
+}
+
+/*
+ * We have received an DRP_IE of type Alien BP and we need to make
+ * sure we do not transmit in conflicting MASs.
+ */
+static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_mas_bm mas;
+	struct uwb_cnflt_alien *cnflt;
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+
+	list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) {
+		if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) {
+			/* Existing alien BP reservation conflicting
+			 * bitmap, just reset the timer */
+			uwb_cnflt_alien_stroke_timer(cnflt);
+			return;
+		}
+	}
+
+	/* New alien BP reservation conflicting bitmap */
+
+	/* alloc and initialize new uwb_cnflt_alien */
+	cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL);
+	if (!cnflt) {
+		dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n");
+		return;
+	}
+
+	INIT_LIST_HEAD(&cnflt->rc_node);
+	timer_setup(&cnflt->timer, uwb_cnflt_timer, 0);
+
+	cnflt->rc = rc;
+	INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work);
+
+	bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS);
+
+	list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list);
+
+	/* update rc global conflicting alien bitmap */
+	bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS);
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
+
+	/* start the timer */
+	uwb_cnflt_alien_stroke_timer(cnflt);
+}
+
+static void uwb_drp_process_not_involved(struct uwb_rc *rc,
+					 struct uwb_rc_evt_drp *drp_evt,
+					 struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_mas_bm mas;
+
+	uwb_drp_ie_to_bm(&mas, drp_ie);
+	uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
+}
+
+static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src,
+				     struct uwb_rc_evt_drp *drp_evt,
+				     struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_rsv *rsv;
+
+	rsv = uwb_rsv_find(rc, src, drp_ie);
+	if (!rsv) {
+		/*
+		 * No reservation? It's either for a recently
+		 * terminated reservation; or the DRP IE couldn't be
+		 * processed (e.g., an invalid IE or out of memory).
+		 */
+		return;
+	}
+
+	/*
+	 * Do nothing with DRP IEs for reservations that have been
+	 * terminated.
+	 */
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+		return;
+	}
+
+	if (uwb_ie_drp_owner(drp_ie))
+		uwb_drp_process_target(rc, rsv, drp_ie, drp_evt);
+	else
+		uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt);
+
+}
+
+
+static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
+{
+	return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0;
+}
+
+/*
+ * Process a received DRP IE.
+ */
+static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
+			    struct uwb_dev *src, struct uwb_ie_drp *drp_ie)
+{
+	if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP)
+		uwb_drp_handle_alien_drp(rc, drp_ie);
+	else if (uwb_drp_involves_us(rc, drp_ie))
+		uwb_drp_process_involved(rc, src, drp_evt, drp_ie);
+	else
+		uwb_drp_process_not_involved(rc, drp_evt, drp_ie);
+}
+
+/*
+ * Process a received DRP Availability IE
+ */
+static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src,
+					 struct uwb_ie_drp_avail *drp_availability_ie)
+{
+	bitmap_copy(src->last_availability_bm,
+		    drp_availability_ie->bmp, UWB_NUM_MAS);
+}
+
+/*
+ * Process all the DRP IEs (both DRP IEs and the DRP Availability IE)
+ * from a device.
+ */
+static
+void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
+			 size_t ielen, struct uwb_dev *src_dev)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_ie_hdr *ie_hdr;
+	void *ptr;
+
+	ptr = drp_evt->ie_data;
+	for (;;) {
+		ie_hdr = uwb_ie_next(&ptr, &ielen);
+		if (!ie_hdr)
+			break;
+
+		switch (ie_hdr->element_id) {
+		case UWB_IE_DRP_AVAILABILITY:
+			uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr);
+			break;
+		case UWB_IE_DRP:
+			uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr);
+			break;
+		default:
+			dev_warn(dev, "unexpected IE in DRP notification\n");
+			break;
+		}
+	}
+
+	if (ielen > 0)
+		dev_warn(dev, "%d octets remaining in DRP notification\n",
+			 (int)ielen);
+}
+
+/**
+ * uwbd_evt_handle_rc_drp - handle a DRP_IE event
+ * @evt: the DRP_IE event from the radio controller
+ *
+ * This processes DRP notifications from the radio controller, either
+ * initiating a new reservation or transitioning an existing
+ * reservation into a different state.
+ *
+ * DRP notifications can occur for three different reasons:
+ *
+ * - UWB_DRP_NOTIF_DRP_IE_RECVD: one or more DRP IEs with the RC as
+ *   the target or source have been received.
+ *
+ *   These DRP IEs could be new or for an existing reservation.
+ *
+ *   If the DRP IE for an existing reservation ceases to be to
+ *   received for at least mMaxLostBeacons, the reservation should be
+ *   considered to be terminated.  Note that the TERMINATE reason (see
+ *   below) may not always be signalled (e.g., the remote device has
+ *   two or more reservations established with the RC).
+ *
+ * - UWB_DRP_NOTIF_CONFLICT: DRP IEs from any device in the beacon
+ *   group conflict with the RC's reservations.
+ *
+ * - UWB_DRP_NOTIF_TERMINATE: DRP IEs are no longer being received
+ *   from a device (i.e., it's terminated all reservations).
+ *
+ * Only the software state of the reservations is changed; the setting
+ * of the radio controller's DRP IEs is done after all the events in
+ * an event buffer are processed.  This saves waiting multiple times
+ * for the SET_DRP_IE command to complete.
+ */
+int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
+{
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc *rc = evt->rc;
+	struct uwb_rc_evt_drp *drp_evt;
+	size_t ielength, bytes_left;
+	struct uwb_dev_addr src_addr;
+	struct uwb_dev *src_dev;
+
+	/* Is there enough data to decode the event (and any IEs in
+	   its payload)? */
+	if (evt->notif.size < sizeof(*drp_evt)) {
+		dev_err(dev, "DRP event: Not enough data to decode event "
+			"[%zu bytes left, %zu needed]\n",
+			evt->notif.size, sizeof(*drp_evt));
+		return 0;
+	}
+	bytes_left = evt->notif.size - sizeof(*drp_evt);
+	drp_evt = container_of(evt->notif.rceb, struct uwb_rc_evt_drp, rceb);
+	ielength = le16_to_cpu(drp_evt->ie_length);
+	if (bytes_left != ielength) {
+		dev_err(dev, "DRP event: Not enough data in payload [%zu"
+			"bytes left, %zu declared in the event]\n",
+			bytes_left, ielength);
+		return 0;
+	}
+
+	memcpy(src_addr.data, &drp_evt->src_addr, sizeof(src_addr));
+	src_dev = uwb_dev_get_by_devaddr(rc, &src_addr);
+	if (!src_dev) {
+		/*
+		 * A DRP notification from an unrecognized device.
+		 *
+		 * This is probably from a WUSB device that doesn't
+		 * have an EUI-48 and therefore doesn't show up in the
+		 * UWB device database.  It's safe to simply ignore
+		 * these.
+		 */
+		return 0;
+	}
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	/* We do not distinguish from the reason */
+	uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
+
+	mutex_unlock(&rc->rsvs_mutex);
+
+	uwb_dev_put(src_dev);
+	return 0;
+}
diff --git a/drivers/staging/uwb/est.c b/drivers/staging/uwb/est.c
new file mode 100644
index 000000000000..d4141ffdd775
--- /dev/null
+++ b/drivers/staging/uwb/est.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band Radio Control
+ * Event Size Tables management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ *
+ * Infrastructure, code and data tables for guessing the size of
+ * events received on the notification endpoints of UWB radio
+ * controllers.
+ *
+ * You define a table of events and for each, its size and how to get
+ * the extra size.
+ *
+ * ENTRY POINTS:
+ *
+ * uwb_est_{init/destroy}(): To initialize/release the EST subsystem.
+ *
+ * uwb_est_[u]register(): To un/register event size tables
+ *   uwb_est_grow()
+ *
+ * uwb_est_find_size(): Get the size of an event
+ *   uwb_est_get_size()
+ */
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include "uwb-internal.h"
+
+struct uwb_est {
+	u16 type_event_high;
+	u16 vendor, product;
+	u8 entries;
+	const struct uwb_est_entry *entry;
+};
+
+static struct uwb_est *uwb_est;
+static u8 uwb_est_size;
+static u8 uwb_est_used;
+static DEFINE_RWLOCK(uwb_est_lock);
+
+/**
+ * WUSB Standard Event Size Table, HWA-RC interface
+ *
+ * Sizes for events and notifications type 0 (general), high nibble 0.
+ */
+static
+struct uwb_est_entry uwb_est_00_00xx[] = {
+	[UWB_RC_EVT_IE_RCV] = {
+		.size = sizeof(struct uwb_rc_evt_ie_rcv),
+		.offset = 1 + offsetof(struct uwb_rc_evt_ie_rcv, wIELength),
+	},
+	[UWB_RC_EVT_BEACON] = {
+		.size = sizeof(struct uwb_rc_evt_beacon),
+		.offset = 1 + offsetof(struct uwb_rc_evt_beacon, wBeaconInfoLength),
+	},
+	[UWB_RC_EVT_BEACON_SIZE] = {
+		.size = sizeof(struct uwb_rc_evt_beacon_size),
+	},
+	[UWB_RC_EVT_BPOIE_CHANGE] = {
+		.size = sizeof(struct uwb_rc_evt_bpoie_change),
+		.offset = 1 + offsetof(struct uwb_rc_evt_bpoie_change,
+				       wBPOIELength),
+	},
+	[UWB_RC_EVT_BP_SLOT_CHANGE] = {
+		.size = sizeof(struct uwb_rc_evt_bp_slot_change),
+	},
+	[UWB_RC_EVT_BP_SWITCH_IE_RCV] = {
+		.size = sizeof(struct uwb_rc_evt_bp_switch_ie_rcv),
+		.offset = 1 + offsetof(struct uwb_rc_evt_bp_switch_ie_rcv, wIELength),
+	},
+	[UWB_RC_EVT_DEV_ADDR_CONFLICT] = {
+		.size = sizeof(struct uwb_rc_evt_dev_addr_conflict),
+	},
+	[UWB_RC_EVT_DRP_AVAIL] = {
+		.size = sizeof(struct uwb_rc_evt_drp_avail)
+	},
+	[UWB_RC_EVT_DRP] = {
+		.size = sizeof(struct uwb_rc_evt_drp),
+		.offset = 1 + offsetof(struct uwb_rc_evt_drp, ie_length),
+	},
+	[UWB_RC_EVT_BP_SWITCH_STATUS] = {
+		.size = sizeof(struct uwb_rc_evt_bp_switch_status),
+	},
+	[UWB_RC_EVT_CMD_FRAME_RCV] = {
+		.size = sizeof(struct uwb_rc_evt_cmd_frame_rcv),
+		.offset = 1 + offsetof(struct uwb_rc_evt_cmd_frame_rcv, dataLength),
+	},
+	[UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV] = {
+		.size = sizeof(struct uwb_rc_evt_channel_change_ie_rcv),
+		.offset = 1 + offsetof(struct uwb_rc_evt_channel_change_ie_rcv, wIELength),
+	},
+	[UWB_RC_CMD_CHANNEL_CHANGE] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_DEV_ADDR_MGMT] = {
+		.size = sizeof(struct uwb_rc_evt_dev_addr_mgmt) },
+	[UWB_RC_CMD_GET_IE] = {
+		.size = sizeof(struct uwb_rc_evt_get_ie),
+		.offset = 1 + offsetof(struct uwb_rc_evt_get_ie, wIELength),
+	},
+	[UWB_RC_CMD_RESET] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SCAN] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SET_BEACON_FILTER] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SET_DRP_IE] = {
+		.size = sizeof(struct uwb_rc_evt_set_drp_ie),
+	},
+	[UWB_RC_CMD_SET_IE] = {
+		.size = sizeof(struct uwb_rc_evt_set_ie),
+	},
+	[UWB_RC_CMD_SET_NOTIFICATION_FILTER] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SET_TX_POWER] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SLEEP] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_START_BEACON] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_STOP_BEACON] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_BP_MERGE] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SEND_COMMAND_FRAME] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+	[UWB_RC_CMD_SET_ASIE_NOTIF] = {
+		.size = sizeof(struct uwb_rc_evt_confirm),
+	},
+};
+
+static
+struct uwb_est_entry uwb_est_01_00xx[] = {
+	[UWB_RC_DAA_ENERGY_DETECTED] = {
+		.size = sizeof(struct uwb_rc_evt_daa_energy_detected),
+	},
+	[UWB_RC_SET_DAA_ENERGY_MASK] = {
+		.size = sizeof(struct uwb_rc_evt_set_daa_energy_mask),
+	},
+	[UWB_RC_SET_NOTIFICATION_FILTER_EX] = {
+		.size = sizeof(struct uwb_rc_evt_set_notification_filter_ex),
+	},
+};
+
+/**
+ * Initialize the EST subsystem
+ *
+ * Register the standard tables also.
+ *
+ * FIXME: tag init
+ */
+int uwb_est_create(void)
+{
+	int result;
+
+	uwb_est_size = 2;
+	uwb_est_used = 0;
+	uwb_est = kcalloc(uwb_est_size, sizeof(uwb_est[0]), GFP_KERNEL);
+	if (uwb_est == NULL)
+		return -ENOMEM;
+
+	result = uwb_est_register(UWB_RC_CET_GENERAL, 0, 0xffff, 0xffff,
+				  uwb_est_00_00xx, ARRAY_SIZE(uwb_est_00_00xx));
+	if (result < 0)
+		goto out;
+	result = uwb_est_register(UWB_RC_CET_EX_TYPE_1, 0, 0xffff, 0xffff,
+				  uwb_est_01_00xx, ARRAY_SIZE(uwb_est_01_00xx));
+out:
+	return result;
+}
+
+
+/** Clean it up */
+void uwb_est_destroy(void)
+{
+	kfree(uwb_est);
+	uwb_est = NULL;
+	uwb_est_size = uwb_est_used = 0;
+}
+
+
+/**
+ * Double the capacity of the EST table
+ *
+ * @returns 0 if ok, < 0 errno no error.
+ */
+static
+int uwb_est_grow(void)
+{
+	size_t actual_size = uwb_est_size * sizeof(uwb_est[0]);
+	void *new = kmalloc_array(2, actual_size, GFP_ATOMIC);
+	if (new == NULL)
+		return -ENOMEM;
+	memcpy(new, uwb_est, actual_size);
+	memset(new + actual_size, 0, actual_size);
+	kfree(uwb_est);
+	uwb_est = new;
+	uwb_est_size *= 2;
+	return 0;
+}
+
+
+/**
+ * Register an event size table
+ *
+ * Makes room for it if the table is full, and then inserts  it in the
+ * right position (entries are sorted by type, event_high, vendor and
+ * then product).
+ *
+ * @vendor:  vendor code for matching against the device (0x0000 and
+ *           0xffff mean any); use 0x0000 to force all to match without
+ *           checking possible vendor specific ones, 0xfffff to match
+ *           after checking vendor specific ones.
+ *
+ * @product: product code from that vendor; same matching rules, use
+ *           0x0000 for not allowing vendor specific matches, 0xffff
+ *           for allowing.
+ *
+ * This arragement just makes the tables sort differenty. Because the
+ * table is sorted by growing type-event_high-vendor-product, a zero
+ * vendor will match before than a 0x456a vendor, that will match
+ * before a 0xfffff vendor.
+ *
+ * @returns 0 if ok, < 0 errno on error (-ENOENT if not found).
+ */
+/* FIXME: add bus type to vendor/product code */
+int uwb_est_register(u8 type, u8 event_high, u16 vendor, u16 product,
+		     const struct uwb_est_entry *entry, size_t entries)
+{
+	unsigned long flags;
+	unsigned itr;
+	int result = 0;
+
+	write_lock_irqsave(&uwb_est_lock, flags);
+	if (uwb_est_used == uwb_est_size) {
+		result = uwb_est_grow();
+		if (result < 0)
+			goto out;
+	}
+	/* Find the right spot to insert it in */
+	for (itr = 0; itr < uwb_est_used; itr++)
+		if (uwb_est[itr].type_event_high < type
+		    && uwb_est[itr].vendor < vendor
+		    && uwb_est[itr].product < product)
+			break;
+
+	/* Shift others to make room for the new one? */
+	if (itr < uwb_est_used)
+		memmove(&uwb_est[itr+1], &uwb_est[itr], uwb_est_used - itr);
+	uwb_est[itr].type_event_high = type << 8 | event_high;
+	uwb_est[itr].vendor = vendor;
+	uwb_est[itr].product = product;
+	uwb_est[itr].entry = entry;
+	uwb_est[itr].entries = entries;
+	uwb_est_used++;
+out:
+	write_unlock_irqrestore(&uwb_est_lock, flags);
+	return result;
+}
+EXPORT_SYMBOL_GPL(uwb_est_register);
+
+
+/**
+ * Unregister an event size table
+ *
+ * This just removes the specified entry and moves the ones after it
+ * to fill in the gap. This is needed to keep the list sorted; no
+ * reallocation is done to reduce the size of the table.
+ *
+ * We unregister by all the data we used to register instead of by
+ * pointer to the @entry array because we might have used the same
+ * table for a bunch of IDs (for example).
+ *
+ * @returns 0 if ok, < 0 errno on error (-ENOENT if not found).
+ */
+int uwb_est_unregister(u8 type, u8 event_high, u16 vendor, u16 product,
+		       const struct uwb_est_entry *entry, size_t entries)
+{
+	unsigned long flags;
+	unsigned itr;
+	struct uwb_est est_cmp = {
+		.type_event_high = type << 8 | event_high,
+		.vendor = vendor,
+		.product = product,
+		.entry = entry,
+		.entries = entries
+	};
+	write_lock_irqsave(&uwb_est_lock, flags);
+	for (itr = 0; itr < uwb_est_used; itr++)
+		if (!memcmp(&uwb_est[itr], &est_cmp, sizeof(est_cmp)))
+			goto found;
+	write_unlock_irqrestore(&uwb_est_lock, flags);
+	return -ENOENT;
+
+found:
+	if (itr < uwb_est_used - 1)	/* Not last one? move ones above */
+		memmove(&uwb_est[itr], &uwb_est[itr+1], uwb_est_used - itr - 1);
+	uwb_est_used--;
+	write_unlock_irqrestore(&uwb_est_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uwb_est_unregister);
+
+
+/**
+ * Get the size of an event from a table
+ *
+ * @rceb: pointer to the buffer with the event
+ * @rceb_size: size of the area pointed to by @rceb in bytes.
+ * @returns: > 0      Size of the event
+ *	     -ENOSPC  An area big enough was not provided to look
+ *		      ahead into the event's guts and guess the size.
+ *	     -EINVAL  Unknown event code (wEvent).
+ *
+ * This will look at the received RCEB and guess what is the total
+ * size. For variable sized events, it will look further ahead into
+ * their length field to see how much data should be read.
+ *
+ * Note this size is *not* final--the neh (Notification/Event Handle)
+ * might specificy an extra size to add.
+ */
+static
+ssize_t uwb_est_get_size(struct uwb_rc *uwb_rc, struct uwb_est *est,
+			 u8 event_low, const struct uwb_rceb *rceb,
+			 size_t rceb_size)
+{
+	unsigned offset;
+	ssize_t size;
+	struct device *dev = &uwb_rc->uwb_dev.dev;
+	const struct uwb_est_entry *entry;
+
+	size = -ENOENT;
+	if (event_low >= est->entries) {	/* in range? */
+		dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: event %u out of range\n",
+			est, est->type_event_high, est->vendor, est->product,
+			est->entries, event_low);
+		goto out;
+	}
+	size = -ENOENT;
+	entry = &est->entry[event_low];
+	if (entry->size == 0 && entry->offset == 0) {	/* unknown? */
+		dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: event %u unknown\n",
+			est, est->type_event_high, est->vendor,	est->product,
+			est->entries, event_low);
+		goto out;
+	}
+	offset = entry->offset;	/* extra fries with that? */
+	if (offset == 0)
+		size = entry->size;
+	else {
+		/* Ops, got an extra size field at 'offset'--read it */
+		const void *ptr = rceb;
+		size_t type_size = 0;
+		offset--;
+		size = -ENOSPC;			/* enough data for more? */
+		switch (entry->type) {
+		case UWB_EST_16:  type_size = sizeof(__le16); break;
+		case UWB_EST_8:   type_size = sizeof(u8);     break;
+		default: 	 BUG();
+		}
+		if (offset + type_size > rceb_size) {
+			dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: "
+				"not enough data to read extra size\n",
+				est, est->type_event_high, est->vendor,
+				est->product, est->entries);
+			goto out;
+		}
+		size = entry->size;
+		ptr += offset;
+		switch (entry->type) {
+		case UWB_EST_16:  size += le16_to_cpu(*(__le16 *)ptr); break;
+		case UWB_EST_8:   size += *(u8 *)ptr;                  break;
+		default: 	 BUG();
+		}
+	}
+out:
+	return size;
+}
+
+
+/**
+ * Guesses the size of a WA event
+ *
+ * @rceb: pointer to the buffer with the event
+ * @rceb_size: size of the area pointed to by @rceb in bytes.
+ * @returns: > 0      Size of the event
+ *	     -ENOSPC  An area big enough was not provided to look
+ *		      ahead into the event's guts and guess the size.
+ *	     -EINVAL  Unknown event code (wEvent).
+ *
+ * This will look at the received RCEB and guess what is the total
+ * size by checking all the tables registered with
+ * uwb_est_register(). For variable sized events, it will look further
+ * ahead into their length field to see how much data should be read.
+ *
+ * Note this size is *not* final--the neh (Notification/Event Handle)
+ * might specificy an extra size to add or replace.
+ */
+ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
+			  size_t rceb_size)
+{
+	/* FIXME: add vendor/product data */
+	ssize_t size;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned long flags;
+	unsigned itr;
+	u16 type_event_high, event;
+
+	read_lock_irqsave(&uwb_est_lock, flags);
+	size = -ENOSPC;
+	if (rceb_size < sizeof(*rceb))
+		goto out;
+	event = le16_to_cpu(rceb->wEvent);
+	type_event_high = rceb->bEventType << 8 | (event & 0xff00) >> 8;
+	for (itr = 0; itr < uwb_est_used; itr++) {
+		if (uwb_est[itr].type_event_high != type_event_high)
+			continue;
+		size = uwb_est_get_size(rc, &uwb_est[itr],
+					event & 0x00ff, rceb, rceb_size);
+		/* try more tables that might handle the same type */
+		if (size != -ENOENT)
+			goto out;
+	}
+	dev_dbg(dev,
+		"event 0x%02x/%04x/%02x: no handlers available; RCEB %4ph\n",
+		(unsigned) rceb->bEventType,
+		(unsigned) le16_to_cpu(rceb->wEvent),
+		(unsigned) rceb->bEventContext,
+		rceb);
+	size = -ENOENT;
+out:
+	read_unlock_irqrestore(&uwb_est_lock, flags);
+	return size;
+}
+EXPORT_SYMBOL_GPL(uwb_est_find_size);
diff --git a/drivers/staging/uwb/hwa-rc.c b/drivers/staging/uwb/hwa-rc.c
new file mode 100644
index 000000000000..b6effad749d7
--- /dev/null
+++ b/drivers/staging/uwb/hwa-rc.c
@@ -0,0 +1,929 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * WUSB Host Wire Adapter: Radio Control Interface (WUSB[8.6])
+ * Radio Control command/event transport
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Initialize the Radio Control interface Driver.
+ *
+ * For each device probed, creates an 'struct hwarc' which contains
+ * just the representation of the UWB Radio Controller, and the logic
+ * for reading notifications and passing them to the UWB Core.
+ *
+ * So we initialize all of those, register the UWB Radio Controller
+ * and setup the notification/event handle to pipe the notifications
+ * to the UWB management Daemon.
+ *
+ * Command and event filtering.
+ *
+ * This is the driver for the Radio Control Interface described in WUSB
+ * 1.0. The core UWB module assumes that all drivers are compliant to the
+ * WHCI 0.95 specification. We thus create a filter that parses all
+ * incoming messages from the (WUSB 1.0) device and manipulate them to
+ * conform to the WHCI 0.95 specification. Similarly, outgoing messages
+ * are parsed and manipulated to conform to the WUSB 1.0 compliant messages
+ * that the device expects. Only a few messages are affected:
+ * Affected events:
+ *    UWB_RC_EVT_BEACON
+ *    UWB_RC_EVT_BP_SLOT_CHANGE
+ *    UWB_RC_EVT_DRP_AVAIL
+ *    UWB_RC_EVT_DRP
+ * Affected commands:
+ *    UWB_RC_CMD_SCAN
+ *    UWB_RC_CMD_SET_DRP_IE
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include "../wusbcore/include/wusb.h"
+#include "../wusbcore/include/wusb-wa.h"
+#include "uwb.h"
+
+#include "uwb-internal.h"
+
+/* The device uses commands and events from the WHCI specification, although
+ * reporting itself as WUSB compliant. */
+#define WUSB_QUIRK_WHCI_CMD_EVT		0x01
+
+/**
+ * Descriptor for an instance of the UWB Radio Control Driver that
+ * attaches to the RCI interface of the Host Wired Adapter.
+ *
+ * Unless there is a lock specific to the 'data members', all access
+ * is protected by uwb_rc->mutex.
+ *
+ * The NEEP (Notification/Event EndPoint) URB (@neep_urb) writes to
+ * @rd_buffer. Note there is no locking because it is perfectly (heh!)
+ * serialized--probe() submits an URB, callback is called, processes
+ * the data (synchronously), submits another URB, and so on. There is
+ * no concurrent access to the buffer.
+ */
+struct hwarc {
+	struct usb_device *usb_dev;
+	struct usb_interface *usb_iface;
+	struct uwb_rc *uwb_rc;		/* UWB host controller */
+	struct urb *neep_urb;		/* Notification endpoint handling */
+	struct edc neep_edc;
+	void *rd_buffer;		/* NEEP read buffer */
+};
+
+
+/* Beacon received notification (WUSB 1.0 [8.6.3.2]) */
+struct uwb_rc_evt_beacon_WUSB_0100 {
+	struct uwb_rceb rceb;
+	u8	bChannelNumber;
+	__le16	wBPSTOffset;
+	u8	bLQI;
+	u8	bRSSI;
+	__le16	wBeaconInfoLength;
+	u8	BeaconInfo[];
+} __attribute__((packed));
+
+/**
+ * Filter WUSB 1.0 BEACON RCV notification to be WHCI 0.95
+ *
+ * @header: the incoming event
+ * @buf_size: size of buffer containing incoming event
+ * @new_size: size of event after filtering completed
+ *
+ * The WHCI 0.95 spec has a "Beacon Type" field. This value is unknown at
+ * the time we receive the beacon from WUSB so we just set it to
+ * UWB_RC_BEACON_TYPE_NEIGHBOR as a default.
+ * The solution below allocates memory upon receipt of every beacon from a
+ * WUSB device. This will deteriorate performance. What is the right way to
+ * do this?
+ */
+static
+int hwarc_filter_evt_beacon_WUSB_0100(struct uwb_rc *rc,
+				      struct uwb_rceb **header,
+				      const size_t buf_size,
+				      size_t *new_size)
+{
+	struct uwb_rc_evt_beacon_WUSB_0100 *be;
+	struct uwb_rc_evt_beacon *newbe;
+	size_t bytes_left, ielength;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	be = container_of(*header, struct uwb_rc_evt_beacon_WUSB_0100, rceb);
+	bytes_left = buf_size;
+	if (bytes_left < sizeof(*be)) {
+		dev_err(dev, "Beacon Received Notification: Not enough data "
+			"to decode for filtering (%zu vs %zu bytes needed)\n",
+			bytes_left, sizeof(*be));
+		return -EINVAL;
+	}
+	bytes_left -= sizeof(*be);
+	ielength = le16_to_cpu(be->wBeaconInfoLength);
+	if (bytes_left < ielength) {
+		dev_err(dev, "Beacon Received Notification: Not enough data "
+			"to decode IEs (%zu vs %zu bytes needed)\n",
+			bytes_left, ielength);
+		return -EINVAL;
+	}
+	newbe = kzalloc(sizeof(*newbe) + ielength, GFP_ATOMIC);
+	if (newbe == NULL)
+		return -ENOMEM;
+	newbe->rceb = be->rceb;
+	newbe->bChannelNumber = be->bChannelNumber;
+	newbe->bBeaconType = UWB_RC_BEACON_TYPE_NEIGHBOR;
+	newbe->wBPSTOffset = be->wBPSTOffset;
+	newbe->bLQI = be->bLQI;
+	newbe->bRSSI = be->bRSSI;
+	newbe->wBeaconInfoLength = be->wBeaconInfoLength;
+	memcpy(newbe->BeaconInfo, be->BeaconInfo, ielength);
+	*header = &newbe->rceb;
+	*new_size = sizeof(*newbe) + ielength;
+	return 1;  /* calling function will free memory */
+}
+
+
+/* DRP Availability change notification (WUSB 1.0 [8.6.3.8]) */
+struct uwb_rc_evt_drp_avail_WUSB_0100 {
+	struct uwb_rceb rceb;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/**
+ * Filter WUSB 1.0 DRP AVAILABILITY CHANGE notification to be WHCI 0.95
+ *
+ * @header: the incoming event
+ * @buf_size: size of buffer containing incoming event
+ * @new_size: size of event after filtering completed
+ */
+static
+int hwarc_filter_evt_drp_avail_WUSB_0100(struct uwb_rc *rc,
+					 struct uwb_rceb **header,
+					 const size_t buf_size,
+					 size_t *new_size)
+{
+	struct uwb_rc_evt_drp_avail_WUSB_0100 *da;
+	struct uwb_rc_evt_drp_avail *newda;
+	struct uwb_ie_hdr *ie_hdr;
+	size_t bytes_left, ielength;
+	struct device *dev = &rc->uwb_dev.dev;
+
+
+	da = container_of(*header, struct uwb_rc_evt_drp_avail_WUSB_0100, rceb);
+	bytes_left = buf_size;
+	if (bytes_left < sizeof(*da)) {
+		dev_err(dev, "Not enough data to decode DRP Avail "
+			"Notification for filtering. Expected %zu, "
+			"received %zu.\n", (size_t)sizeof(*da), bytes_left);
+		return -EINVAL;
+	}
+	bytes_left -= sizeof(*da);
+	ielength = le16_to_cpu(da->wIELength);
+	if (bytes_left < ielength) {
+		dev_err(dev, "DRP Avail Notification filter: IE length "
+			"[%zu bytes] does not match actual length "
+			"[%zu bytes].\n", ielength, bytes_left);
+		return -EINVAL;
+	}
+	if (ielength < sizeof(*ie_hdr)) {
+		dev_err(dev, "DRP Avail Notification filter: Not enough "
+			"data to decode IE [%zu bytes, %zu needed]\n",
+			ielength, sizeof(*ie_hdr));
+		return -EINVAL;
+	}
+	ie_hdr = (void *) da->IEData;
+	if (ie_hdr->length > 32) {
+		dev_err(dev, "DRP Availability Change event has unexpected "
+			"length for filtering. Expected < 32 bytes, "
+			"got %zu bytes.\n", (size_t)ie_hdr->length);
+		return -EINVAL;
+	}
+	newda = kzalloc(sizeof(*newda), GFP_ATOMIC);
+	if (newda == NULL)
+		return -ENOMEM;
+	newda->rceb = da->rceb;
+	memcpy(newda->bmp, (u8 *) ie_hdr + sizeof(*ie_hdr), ie_hdr->length);
+	*header = &newda->rceb;
+	*new_size = sizeof(*newda);
+	return 1; /* calling function will free memory */
+}
+
+
+/* DRP notification (WUSB 1.0 [8.6.3.9]) */
+struct uwb_rc_evt_drp_WUSB_0100 {
+	struct uwb_rceb rceb;
+	struct uwb_dev_addr wSrcAddr;
+	u8 bExplicit;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/**
+ * Filter WUSB 1.0 DRP Notification to be WHCI 0.95
+ *
+ * @header: the incoming event
+ * @buf_size: size of buffer containing incoming event
+ * @new_size: size of event after filtering completed
+ *
+ * It is hard to manage DRP reservations without having a Reason code.
+ * Unfortunately there is none in the WUSB spec. We just set the default to
+ * DRP IE RECEIVED.
+ * We do not currently use the bBeaconSlotNumber value, so we set this to
+ * zero for now.
+ */
+static
+int hwarc_filter_evt_drp_WUSB_0100(struct uwb_rc *rc,
+				   struct uwb_rceb **header,
+				   const size_t buf_size,
+				   size_t *new_size)
+{
+	struct uwb_rc_evt_drp_WUSB_0100 *drpev;
+	struct uwb_rc_evt_drp *newdrpev;
+	size_t bytes_left, ielength;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	drpev = container_of(*header, struct uwb_rc_evt_drp_WUSB_0100, rceb);
+	bytes_left = buf_size;
+	if (bytes_left < sizeof(*drpev)) {
+		dev_err(dev, "Not enough data to decode DRP Notification "
+			"for filtering. Expected %zu, received %zu.\n",
+			(size_t)sizeof(*drpev), bytes_left);
+		return -EINVAL;
+	}
+	ielength = le16_to_cpu(drpev->wIELength);
+	bytes_left -= sizeof(*drpev);
+	if (bytes_left < ielength) {
+		dev_err(dev, "DRP Notification filter: header length [%zu "
+			"bytes] does not match actual length [%zu "
+			"bytes].\n", ielength, bytes_left);
+		return -EINVAL;
+	}
+	newdrpev = kzalloc(sizeof(*newdrpev) + ielength, GFP_ATOMIC);
+	if (newdrpev == NULL)
+		return -ENOMEM;
+	newdrpev->rceb = drpev->rceb;
+	newdrpev->src_addr = drpev->wSrcAddr;
+	newdrpev->reason = UWB_DRP_NOTIF_DRP_IE_RCVD;
+	newdrpev->beacon_slot_number = 0;
+	newdrpev->ie_length = drpev->wIELength;
+	memcpy(newdrpev->ie_data, drpev->IEData, ielength);
+	*header = &newdrpev->rceb;
+	*new_size = sizeof(*newdrpev) + ielength;
+	return 1; /* calling function will free memory */
+}
+
+
+/* Scan Command (WUSB 1.0 [8.6.2.5]) */
+struct uwb_rc_cmd_scan_WUSB_0100 {
+	struct uwb_rccb rccb;
+	u8 bChannelNumber;
+	u8 bScanState;
+} __attribute__((packed));
+
+/**
+ * Filter WHCI 0.95 SCAN command to be WUSB 1.0 SCAN command
+ *
+ * @header:   command sent to device (compliant to WHCI 0.95)
+ * @size:     size of command sent to device
+ *
+ * We only reduce the size by two bytes because the WUSB 1.0 scan command
+ * does not have the last field (wStarttime). Also, make sure we don't send
+ * the device an unexpected scan type.
+ */
+static
+int hwarc_filter_cmd_scan_WUSB_0100(struct uwb_rc *rc,
+				    struct uwb_rccb **header,
+				    size_t *size)
+{
+	struct uwb_rc_cmd_scan *sc;
+
+	sc = container_of(*header, struct uwb_rc_cmd_scan, rccb);
+
+	if (sc->bScanState == UWB_SCAN_ONLY_STARTTIME)
+		sc->bScanState = UWB_SCAN_ONLY;
+	/* Don't send the last two bytes. */
+	*size -= 2;
+	return 0;
+}
+
+
+/* SET DRP IE command (WUSB 1.0 [8.6.2.7]) */
+struct uwb_rc_cmd_set_drp_ie_WUSB_0100 {
+	struct uwb_rccb rccb;
+	u8 bExplicit;
+	__le16 wIELength;
+	struct uwb_ie_drp IEData[];
+} __attribute__((packed));
+
+/**
+ * Filter WHCI 0.95 SET DRP IE command to be WUSB 1.0 SET DRP IE command
+ *
+ * @header:   command sent to device (compliant to WHCI 0.95)
+ * @size:     size of command sent to device
+ *
+ * WUSB has an extra bExplicit field - we assume always explicit
+ * negotiation so this field is set. The command expected by the device is
+ * thus larger than the one prepared by the driver so we need to
+ * reallocate memory to accommodate this.
+ * We trust the driver to send us the correct data so no checking is done
+ * on incoming data - evn though it is variable length.
+ */
+static
+int hwarc_filter_cmd_set_drp_ie_WUSB_0100(struct uwb_rc *rc,
+					  struct uwb_rccb **header,
+					  size_t *size)
+{
+	struct uwb_rc_cmd_set_drp_ie *orgcmd;
+	struct uwb_rc_cmd_set_drp_ie_WUSB_0100 *cmd;
+	size_t ielength;
+
+	orgcmd = container_of(*header, struct uwb_rc_cmd_set_drp_ie, rccb);
+	ielength = le16_to_cpu(orgcmd->wIELength);
+	cmd = kzalloc(sizeof(*cmd) + ielength, GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+	cmd->rccb = orgcmd->rccb;
+	cmd->bExplicit = 0;
+	cmd->wIELength = orgcmd->wIELength;
+	memcpy(cmd->IEData, orgcmd->IEData, ielength);
+	*header = &cmd->rccb;
+	*size = sizeof(*cmd) + ielength;
+	return 1; /* calling function will free memory */
+}
+
+
+/**
+ * Filter data from WHCI driver to WUSB device
+ *
+ * @header: WHCI 0.95 compliant command from driver
+ * @size:   length of command
+ *
+ * The routine managing commands to the device (uwb_rc_cmd()) will call the
+ * filtering function pointer (if it exists) before it passes any data to
+ * the device. At this time the command has been formatted according to
+ * WHCI 0.95 and is ready to be sent to the device.
+ *
+ * The filter function will be provided with the current command and its
+ * length. The function will manipulate the command if necessary and
+ * potentially reallocate memory for a command that needed more memory that
+ * the given command. If new memory was created the function will return 1
+ * to indicate to the calling function that the memory need to be freed
+ * when not needed any more. The size will contain the new length of the
+ * command.
+ * If memory has not been allocated we rely on the original mechanisms to
+ * free the memory of the command - even when we reduce the value of size.
+ */
+static
+int hwarc_filter_cmd_WUSB_0100(struct uwb_rc *rc, struct uwb_rccb **header,
+			       size_t *size)
+{
+	int result;
+	struct uwb_rccb *rccb = *header;
+	int cmd = le16_to_cpu(rccb->wCommand);
+	switch (cmd) {
+	case UWB_RC_CMD_SCAN:
+		result = hwarc_filter_cmd_scan_WUSB_0100(rc, header, size);
+		break;
+	case UWB_RC_CMD_SET_DRP_IE:
+		result = hwarc_filter_cmd_set_drp_ie_WUSB_0100(rc, header, size);
+		break;
+	default:
+		result = -ENOANO;
+		break;
+	}
+	return result;
+}
+
+
+/**
+ * Filter data from WHCI driver to WUSB device
+ *
+ * @header: WHCI 0.95 compliant command from driver
+ * @size:   length of command
+ *
+ * Filter commands based on which protocol the device supports. The WUSB
+ * errata should be the same as WHCI 0.95 so we do not filter that here -
+ * only WUSB 1.0.
+ */
+static
+int hwarc_filter_cmd(struct uwb_rc *rc, struct uwb_rccb **header,
+		     size_t *size)
+{
+	int result = -ENOANO;
+	if (rc->version == 0x0100)
+		result = hwarc_filter_cmd_WUSB_0100(rc, header, size);
+	return result;
+}
+
+
+/**
+ * Compute return value as sum of incoming value and value at given offset
+ *
+ * @rceb:      event for which we compute the size, it contains a variable
+ *	       length field.
+ * @core_size: size of the "non variable" part of the event
+ * @offset:    place in event where the length of the variable part is stored
+ * @buf_size: total length of buffer in which event arrived - we need to make
+ *	       sure we read the offset in memory that is still part of the event
+ */
+static
+ssize_t hwarc_get_event_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
+			     size_t core_size, size_t offset,
+			     const size_t buf_size)
+{
+	ssize_t size = -ENOSPC;
+	const void *ptr = rceb;
+	size_t type_size = sizeof(__le16);
+	struct device *dev = &rc->uwb_dev.dev;
+
+	if (offset + type_size >= buf_size) {
+		dev_err(dev, "Not enough data to read extra size of event "
+			"0x%02x/%04x/%02x, only got %zu bytes.\n",
+			rceb->bEventType, le16_to_cpu(rceb->wEvent),
+			rceb->bEventContext, buf_size);
+		goto out;
+	}
+	ptr += offset;
+	size = core_size + le16_to_cpu(*(__le16 *)ptr);
+out:
+	return size;
+}
+
+
+/* Beacon slot change notification (WUSB 1.0 [8.6.3.5]) */
+struct uwb_rc_evt_bp_slot_change_WUSB_0100 {
+	struct uwb_rceb rceb;
+	u8 bSlotNumber;
+} __attribute__((packed));
+
+
+/**
+ * Filter data from WUSB device to WHCI driver
+ *
+ * @header:	 incoming event
+ * @buf_size:	 size of buffer in which event arrived
+ * @_event_size: actual size of event in the buffer
+ * @new_size:	 size of event after filtered
+ *
+ * We don't know how the buffer is constructed - there may be more than one
+ * event in it so buffer length does not determine event length. We first
+ * determine the expected size of the incoming event. This value is passed
+ * back only if the actual filtering succeeded (so we know the computed
+ * expected size is correct). This value will be zero if
+ * the event did not need any filtering.
+ *
+ * WHCI interprets the BP Slot Change event's data differently than
+ * WUSB. The event sizes are exactly the same. The data field
+ * indicates the new beacon slot in which a RC is transmitting its
+ * beacon. The maximum value of this is 96 (wMacBPLength ECMA-368
+ * 17.16 (Table 117)). We thus know that the WUSB value will not set
+ * the bit bNoSlot, so we don't really do anything (placeholder).
+ */
+static
+int hwarc_filter_event_WUSB_0100(struct uwb_rc *rc, struct uwb_rceb **header,
+				 const size_t buf_size, size_t *_real_size,
+				 size_t *_new_size)
+{
+	int result = -ENOANO;
+	struct uwb_rceb *rceb = *header;
+	int event = le16_to_cpu(rceb->wEvent);
+	ssize_t event_size;
+	size_t core_size, offset;
+
+	if (rceb->bEventType != UWB_RC_CET_GENERAL)
+		goto out;
+	switch (event) {
+	case UWB_RC_EVT_BEACON:
+		core_size = sizeof(struct uwb_rc_evt_beacon_WUSB_0100);
+		offset = offsetof(struct uwb_rc_evt_beacon_WUSB_0100,
+				  wBeaconInfoLength);
+		event_size = hwarc_get_event_size(rc, rceb, core_size,
+						  offset, buf_size);
+		if (event_size < 0)
+			goto out;
+		*_real_size = event_size;
+		result = hwarc_filter_evt_beacon_WUSB_0100(rc, header,
+							   buf_size, _new_size);
+		break;
+	case UWB_RC_EVT_BP_SLOT_CHANGE:
+		*_new_size = *_real_size =
+			sizeof(struct uwb_rc_evt_bp_slot_change_WUSB_0100);
+		result = 0;
+		break;
+
+	case UWB_RC_EVT_DRP_AVAIL:
+		core_size = sizeof(struct uwb_rc_evt_drp_avail_WUSB_0100);
+		offset = offsetof(struct uwb_rc_evt_drp_avail_WUSB_0100,
+				  wIELength);
+		event_size = hwarc_get_event_size(rc, rceb, core_size,
+						  offset, buf_size);
+		if (event_size < 0)
+			goto out;
+		*_real_size = event_size;
+		result = hwarc_filter_evt_drp_avail_WUSB_0100(
+			rc, header, buf_size, _new_size);
+		break;
+
+	case UWB_RC_EVT_DRP:
+		core_size = sizeof(struct uwb_rc_evt_drp_WUSB_0100);
+		offset = offsetof(struct uwb_rc_evt_drp_WUSB_0100, wIELength);
+		event_size = hwarc_get_event_size(rc, rceb, core_size,
+						  offset, buf_size);
+		if (event_size < 0)
+			goto out;
+		*_real_size = event_size;
+		result = hwarc_filter_evt_drp_WUSB_0100(rc, header,
+							buf_size, _new_size);
+		break;
+
+	default:
+		break;
+	}
+out:
+	return result;
+}
+
+/**
+ * Filter data from WUSB device to WHCI driver
+ *
+ * @header:	 incoming event
+ * @buf_size:	 size of buffer in which event arrived
+ * @_event_size: actual size of event in the buffer
+ * @_new_size:	 size of event after filtered
+ *
+ * Filter events based on which protocol the device supports. The WUSB
+ * errata should be the same as WHCI 0.95 so we do not filter that here -
+ * only WUSB 1.0.
+ *
+ * If we don't handle it, we return -ENOANO (why the weird error code?
+ * well, so if I get it, I can pinpoint in the code that raised
+ * it...after all, not too many places use the higher error codes).
+ */
+static
+int hwarc_filter_event(struct uwb_rc *rc, struct uwb_rceb **header,
+		       const size_t buf_size, size_t *_real_size,
+		       size_t *_new_size)
+{
+	int result = -ENOANO;
+	if (rc->version == 0x0100)
+		result =  hwarc_filter_event_WUSB_0100(
+			rc, header, buf_size, _real_size, _new_size);
+	return result;
+}
+
+
+/**
+ * Execute an UWB RC command on HWA
+ *
+ * @rc:	      Instance of a Radio Controller that is a HWA
+ * @cmd:      Buffer containing the RCCB and payload to execute
+ * @cmd_size: Size of the command buffer.
+ *
+ * NOTE: rc's mutex has to be locked
+ */
+static
+int hwarc_cmd(struct uwb_rc *uwb_rc, const struct uwb_rccb *cmd, size_t cmd_size)
+{
+	struct hwarc *hwarc = uwb_rc->priv;
+	return usb_control_msg(
+		hwarc->usb_dev, usb_sndctrlpipe(hwarc->usb_dev, 0),
+		WA_EXEC_RC_CMD, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+		0, hwarc->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+		(void *) cmd, cmd_size, 100 /* FIXME: this is totally arbitrary */);
+}
+
+static
+int hwarc_reset(struct uwb_rc *uwb_rc)
+{
+	struct hwarc *hwarc = uwb_rc->priv;
+	int result;
+
+	/* device lock must be held when calling usb_reset_device. */
+	result = usb_lock_device_for_reset(hwarc->usb_dev, NULL);
+	if (result >= 0) {
+		result = usb_reset_device(hwarc->usb_dev);
+		usb_unlock_device(hwarc->usb_dev);
+	}
+
+	return result;
+}
+
+/**
+ * Callback for the notification and event endpoint
+ *
+ * Check's that everything is fine and then passes the read data to
+ * the notification/event handling mechanism (neh).
+ */
+static
+void hwarc_neep_cb(struct urb *urb)
+{
+	struct hwarc *hwarc = urb->context;
+	struct usb_interface *usb_iface = hwarc->usb_iface;
+	struct device *dev = &usb_iface->dev;
+	int result;
+
+	switch (result = urb->status) {
+	case 0:
+		uwb_rc_neh_grok(hwarc->uwb_rc, urb->transfer_buffer,
+				urb->actual_length);
+		break;
+	case -ECONNRESET:	/* Not an error, but a controlled situation; */
+	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
+		goto out;
+	case -ESHUTDOWN:	/* going away! */
+		goto out;
+	default:		/* On general errors, retry unless it gets ugly */
+		if (edc_inc(&hwarc->neep_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME))
+			goto error_exceeded;
+		dev_err(dev, "NEEP: URB error %d\n", urb->status);
+	}
+	result = usb_submit_urb(urb, GFP_ATOMIC);
+	if (result < 0 && result != -ENODEV && result != -EPERM) {
+		/* ignoring unrecoverable errors */
+		dev_err(dev, "NEEP: Can't resubmit URB (%d) resetting device\n",
+			result);
+		goto error;
+	}
+out:
+	return;
+
+error_exceeded:
+	dev_err(dev, "NEEP: URB max acceptable errors "
+		"exceeded, resetting device\n");
+error:
+	uwb_rc_neh_error(hwarc->uwb_rc, result);
+	uwb_rc_reset_all(hwarc->uwb_rc);
+	return;
+}
+
+static void hwarc_init(struct hwarc *hwarc)
+{
+	edc_init(&hwarc->neep_edc);
+}
+
+/**
+ * Initialize the notification/event endpoint stuff
+ *
+ * Note this is effectively a parallel thread; it knows that
+ * hwarc->uwb_rc always exists because the existence of a 'hwarc'
+ * means that there is a reverence on the hwarc->uwb_rc (see
+ * _probe()), and thus _neep_cb() can execute safely.
+ */
+static int hwarc_neep_init(struct uwb_rc *rc)
+{
+	struct hwarc *hwarc = rc->priv;
+	struct usb_interface *iface = hwarc->usb_iface;
+	struct usb_device *usb_dev = interface_to_usbdev(iface);
+	struct device *dev = &iface->dev;
+	int result;
+	struct usb_endpoint_descriptor *epd;
+
+	epd = &iface->cur_altsetting->endpoint[0].desc;
+	hwarc->rd_buffer = (void *) __get_free_page(GFP_KERNEL);
+	if (hwarc->rd_buffer == NULL) {
+		dev_err(dev, "Unable to allocate notification's read buffer\n");
+		goto error_rd_buffer;
+	}
+	hwarc->neep_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (hwarc->neep_urb == NULL)
+		goto error_urb_alloc;
+	usb_fill_int_urb(hwarc->neep_urb, usb_dev,
+			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
+			 hwarc->rd_buffer, PAGE_SIZE,
+			 hwarc_neep_cb, hwarc, epd->bInterval);
+	result = usb_submit_urb(hwarc->neep_urb, GFP_ATOMIC);
+	if (result < 0) {
+		dev_err(dev, "Cannot submit notification URB: %d\n", result);
+		goto error_neep_submit;
+	}
+	return 0;
+
+error_neep_submit:
+	usb_free_urb(hwarc->neep_urb);
+	hwarc->neep_urb = NULL;
+error_urb_alloc:
+	free_page((unsigned long)hwarc->rd_buffer);
+	hwarc->rd_buffer = NULL;
+error_rd_buffer:
+	return -ENOMEM;
+}
+
+
+/** Clean up all the notification endpoint resources */
+static void hwarc_neep_release(struct uwb_rc *rc)
+{
+	struct hwarc *hwarc = rc->priv;
+
+	usb_kill_urb(hwarc->neep_urb);
+	usb_free_urb(hwarc->neep_urb);
+	hwarc->neep_urb = NULL;
+
+	free_page((unsigned long)hwarc->rd_buffer);
+	hwarc->rd_buffer = NULL;
+}
+
+/**
+ * Get the version from class-specific descriptor
+ *
+ * NOTE: this descriptor comes with the big bundled configuration
+ *	 descriptor that includes the interfaces' and endpoints', so
+ *	 we just look for it in the cached copy kept by the USB stack.
+ *
+ * NOTE2: We convert LE fields to CPU order.
+ */
+static int hwarc_get_version(struct uwb_rc *rc)
+{
+	int result;
+
+	struct hwarc *hwarc = rc->priv;
+	struct uwb_rc_control_intf_class_desc *descr;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct usb_device *usb_dev = hwarc->usb_dev;
+	char *itr;
+	struct usb_descriptor_header *hdr;
+	size_t itr_size, actconfig_idx;
+	u16 version;
+
+	actconfig_idx = (usb_dev->actconfig - usb_dev->config) /
+		sizeof(usb_dev->config[0]);
+	itr = usb_dev->rawdescriptors[actconfig_idx];
+	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
+	while (itr_size >= sizeof(*hdr)) {
+		hdr = (struct usb_descriptor_header *) itr;
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
+		if (hdr->bDescriptorType == USB_DT_CS_RADIO_CONTROL)
+			goto found;
+		itr += hdr->bLength;
+		itr_size -= hdr->bLength;
+	}
+	dev_err(dev, "cannot find Radio Control Interface Class descriptor\n");
+	return -ENODEV;
+
+found:
+	result = -EINVAL;
+	if (hdr->bLength > itr_size) {	/* is it available? */
+		dev_err(dev, "incomplete Radio Control Interface Class "
+			"descriptor (%zu bytes left, %u needed)\n",
+			itr_size, hdr->bLength);
+		goto error;
+	}
+	if (hdr->bLength < sizeof(*descr)) {
+		dev_err(dev, "short Radio Control Interface Class "
+			"descriptor\n");
+		goto error;
+	}
+	descr = (struct uwb_rc_control_intf_class_desc *) hdr;
+	/* Make LE fields CPU order */
+	version = __le16_to_cpu(descr->bcdRCIVersion);
+	if (version != 0x0100) {
+		dev_err(dev, "Device reports protocol version 0x%04x. We "
+			"do not support that. \n", version);
+		result = -EINVAL;
+		goto error;
+	}
+	rc->version = version;
+	dev_dbg(dev, "Device supports WUSB protocol version 0x%04x \n",	rc->version);
+	result = 0;
+error:
+	return result;
+}
+
+/*
+ * By creating a 'uwb_rc', we have a reference on it -- that reference
+ * is the one we drop when we disconnect.
+ *
+ * No need to switch altsettings; according to WUSB1.0[8.6.1.1], there
+ * is only one altsetting allowed.
+ */
+static int hwarc_probe(struct usb_interface *iface,
+		       const struct usb_device_id *id)
+{
+	int result;
+	struct uwb_rc *uwb_rc;
+	struct hwarc *hwarc;
+	struct device *dev = &iface->dev;
+
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+	if (!usb_endpoint_xfer_int(&iface->cur_altsetting->endpoint[0].desc))
+		return -ENODEV;
+
+	result = -ENOMEM;
+	uwb_rc = uwb_rc_alloc();
+	if (uwb_rc == NULL) {
+		dev_err(dev, "unable to allocate RC instance\n");
+		goto error_rc_alloc;
+	}
+	hwarc = kzalloc(sizeof(*hwarc), GFP_KERNEL);
+	if (hwarc == NULL) {
+		dev_err(dev, "unable to allocate HWA RC instance\n");
+		goto error_alloc;
+	}
+	hwarc_init(hwarc);
+	hwarc->usb_dev = usb_get_dev(interface_to_usbdev(iface));
+	hwarc->usb_iface = usb_get_intf(iface);
+	hwarc->uwb_rc = uwb_rc;
+
+	uwb_rc->owner = THIS_MODULE;
+	uwb_rc->start = hwarc_neep_init;
+	uwb_rc->stop  = hwarc_neep_release;
+	uwb_rc->cmd   = hwarc_cmd;
+	uwb_rc->reset = hwarc_reset;
+	if (id->driver_info & WUSB_QUIRK_WHCI_CMD_EVT) {
+		uwb_rc->filter_cmd   = NULL;
+		uwb_rc->filter_event = NULL;
+	} else {
+		uwb_rc->filter_cmd   = hwarc_filter_cmd;
+		uwb_rc->filter_event = hwarc_filter_event;
+	}
+
+	result = uwb_rc_add(uwb_rc, dev, hwarc);
+	if (result < 0)
+		goto error_rc_add;
+	result = hwarc_get_version(uwb_rc);
+	if (result < 0) {
+		dev_err(dev, "cannot retrieve version of RC \n");
+		goto error_get_version;
+	}
+	usb_set_intfdata(iface, hwarc);
+	return 0;
+
+error_get_version:
+	uwb_rc_rm(uwb_rc);
+error_rc_add:
+	usb_put_intf(iface);
+	usb_put_dev(hwarc->usb_dev);
+	kfree(hwarc);
+error_alloc:
+	uwb_rc_put(uwb_rc);
+error_rc_alloc:
+	return result;
+}
+
+static void hwarc_disconnect(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	usb_set_intfdata(hwarc->usb_iface, NULL);
+	uwb_rc_rm(uwb_rc);
+	usb_put_intf(hwarc->usb_iface);
+	usb_put_dev(hwarc->usb_dev);
+	kfree(hwarc);
+	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
+}
+
+static int hwarc_pre_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int hwarc_post_reset(struct usb_interface *iface)
+{
+	struct hwarc *hwarc = usb_get_intfdata(iface);
+	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
+
+	return uwb_rc_post_reset(uwb_rc);
+}
+
+/** USB device ID's that we handle */
+static const struct usb_device_id hwarc_id_table[] = {
+	/* D-Link DUB-1210 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3d02, 0xe0, 0x01, 0x02),
+	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
+	/* Intel i1480 (using firmware 1.3PA2-20070828) */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x8086, 0x0c3b, 0xe0, 0x01, 0x02),
+	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
+	/* Alereon 5310 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5310, 0xe0, 0x01, 0x02),
+	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
+	/* Alereon 5611 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5611, 0xe0, 0x01, 0x02),
+	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
+	/* Generic match for the Radio Control interface */
+	{ USB_INTERFACE_INFO(0xe0, 0x01, 0x02), },
+	{ },
+};
+MODULE_DEVICE_TABLE(usb, hwarc_id_table);
+
+static struct usb_driver hwarc_driver = {
+	.name =		"hwa-rc",
+	.id_table =	hwarc_id_table,
+	.probe =	hwarc_probe,
+	.disconnect =	hwarc_disconnect,
+	.pre_reset =    hwarc_pre_reset,
+	.post_reset =   hwarc_post_reset,
+};
+
+module_usb_driver(hwarc_driver);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Host Wireless Adapter Radio Control Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/uwb/i1480/Makefile b/drivers/staging/uwb/i1480/Makefile
new file mode 100644
index 000000000000..d26fb9b845ae
--- /dev/null
+++ b/drivers/staging/uwb/i1480/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_UWB_I1480U)	+= dfu/ i1480-est.o
diff --git a/drivers/staging/uwb/i1480/dfu/Makefile b/drivers/staging/uwb/i1480/dfu/Makefile
new file mode 100644
index 000000000000..4739fdac5922
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_UWB_I1480U)	+= i1480-dfu-usb.o
+
+i1480-dfu-usb-objs := \
+	dfu.o 	\
+	mac.o	\
+	phy.o	\
+	usb.o
+
+
diff --git a/drivers/staging/uwb/i1480/dfu/dfu.c b/drivers/staging/uwb/i1480/dfu/dfu.c
new file mode 100644
index 000000000000..9d51ce8faad1
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/dfu.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless UWB Link 1480
+ * Main driver
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Common code for firmware upload used by the USB and PCI version;
+ * i1480_fw_upload() takes a device descriptor and uses the function
+ * pointers it provides to upload firmware and prepare the PHY.
+ *
+ * As well, provides common functions used by the rest of the code.
+ */
+#include "i1480-dfu.h"
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include "../../uwb.h"
+
+/*
+ * i1480_rceb_check - Check RCEB for expected field values
+ * @i1480: pointer to device for which RCEB is being checked
+ * @rceb: RCEB being checked
+ * @cmd: which command the RCEB is related to
+ * @context: expected context
+ * @expected_type: expected event type
+ * @expected_event: expected event
+ *
+ * If @cmd is NULL, do not print error messages, but still return an error
+ * code.
+ *
+ * Return 0 if @rceb matches the expected values, -EINVAL otherwise.
+ */
+int i1480_rceb_check(const struct i1480 *i1480, const struct uwb_rceb *rceb,
+		     const char *cmd, u8 context, u8 expected_type,
+		     unsigned expected_event)
+{
+	int result = 0;
+	struct device *dev = i1480->dev;
+	if (rceb->bEventContext != context) {
+		if (cmd)
+			dev_err(dev, "%s: unexpected context id 0x%02x "
+				"(expected 0x%02x)\n", cmd,
+				rceb->bEventContext, context);
+		result = -EINVAL;
+	}
+	if (rceb->bEventType != expected_type) {
+		if (cmd)
+			dev_err(dev, "%s: unexpected event type 0x%02x "
+				"(expected 0x%02x)\n", cmd,
+				rceb->bEventType, expected_type);
+		result = -EINVAL;
+	}
+	if (le16_to_cpu(rceb->wEvent) != expected_event) {
+		if (cmd)
+			dev_err(dev, "%s: unexpected event 0x%04x "
+				"(expected 0x%04x)\n", cmd,
+				le16_to_cpu(rceb->wEvent), expected_event);
+		result = -EINVAL;
+	}
+	return result;
+}
+EXPORT_SYMBOL_GPL(i1480_rceb_check);
+
+
+/*
+ * Execute a Radio Control Command
+ *
+ * Command data has to be in i1480->cmd_buf.
+ *
+ * @returns size of the reply data filled in i1480->evt_buf or < 0 errno
+ *          code on error.
+ */
+ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size,
+		  size_t reply_size)
+{
+	ssize_t result;
+	struct uwb_rceb *reply = i1480->evt_buf;
+	struct uwb_rccb *cmd = i1480->cmd_buf;
+	u16 expected_event = reply->wEvent;
+	u8 expected_type = reply->bEventType;
+	u8 context;
+
+	init_completion(&i1480->evt_complete);
+	i1480->evt_result = -EINPROGRESS;
+	do {
+		get_random_bytes(&context, 1);
+	} while (context == 0x00 || context == 0xff);
+	cmd->bCommandContext = context;
+	result = i1480->cmd(i1480, cmd_name, cmd_size);
+	if (result < 0)
+		goto error;
+	/* wait for the callback to report a event was received */
+	result = wait_for_completion_interruptible_timeout(
+		&i1480->evt_complete, HZ);
+	if (result == 0) {
+		result = -ETIMEDOUT;
+		goto error;
+	}
+	if (result < 0)
+		goto error;
+	result = i1480->evt_result;
+	if (result < 0) {
+		dev_err(i1480->dev, "%s: command reply reception failed: %zd\n",
+			cmd_name, result);
+		goto error;
+	}
+	/*
+	 * Firmware versions >= 1.4.12224 for IOGear GUWA100U generate a
+	 * spurious notification after firmware is downloaded. So check whether
+	 * the receibed RCEB is such notification before assuming that the
+	 * command has failed.
+	 */
+	if (i1480_rceb_check(i1480, i1480->evt_buf, NULL,
+			     0, 0xfd, 0x0022) == 0) {
+		/* Now wait for the actual RCEB for this command. */
+		result = i1480->wait_init_done(i1480);
+		if (result < 0)
+			goto error;
+		result = i1480->evt_result;
+	}
+	if (result != reply_size) {
+		dev_err(i1480->dev, "%s returned only %zu bytes, %zu expected\n",
+			cmd_name, result, reply_size);
+		result = -EINVAL;
+		goto error;
+	}
+	/* Verify we got the right event in response */
+	result = i1480_rceb_check(i1480, i1480->evt_buf, cmd_name, context,
+				  expected_type, expected_event);
+error:
+	return result;
+}
+EXPORT_SYMBOL_GPL(i1480_cmd);
+
+
+static
+int i1480_print_state(struct i1480 *i1480)
+{
+	int result;
+	u32 *buf = (u32 *) i1480->cmd_buf;
+
+	result = i1480->read(i1480, 0x80080000, 2 * sizeof(*buf));
+	if (result < 0) {
+		dev_err(i1480->dev, "cannot read U & L states: %d\n", result);
+		goto error;
+	}
+	dev_info(i1480->dev, "state U 0x%08x, L 0x%08x\n", buf[0], buf[1]);
+error:
+	return result;
+}
+
+
+/*
+ * PCI probe, firmware uploader
+ *
+ * _mac_fw_upload() will call rc_setup(), which needs an rc_release().
+ */
+int i1480_fw_upload(struct i1480 *i1480)
+{
+	int result;
+
+	result = i1480_pre_fw_upload(i1480);	/* PHY pre fw */
+	if (result < 0 && result != -ENOENT) {
+		i1480_print_state(i1480);
+		goto error;
+	}
+	result = i1480_mac_fw_upload(i1480);	/* MAC fw */
+	if (result < 0) {
+		if (result == -ENOENT)
+			dev_err(i1480->dev, "Cannot locate MAC FW file '%s'\n",
+				i1480->mac_fw_name);
+		else
+			i1480_print_state(i1480);
+		goto error;
+	}
+	result = i1480_phy_fw_upload(i1480);	/* PHY fw */
+	if (result < 0 && result != -ENOENT) {
+		i1480_print_state(i1480);
+		goto error_rc_release;
+	}
+	/*
+	 * FIXME: find some reliable way to check whether firmware is running
+	 * properly. Maybe use some standard request that has no side effects?
+	 */
+	dev_info(i1480->dev, "firmware uploaded successfully\n");
+error_rc_release:
+	if (i1480->rc_release)
+		i1480->rc_release(i1480);
+	result = 0;
+error:
+	return result;
+}
+EXPORT_SYMBOL_GPL(i1480_fw_upload);
diff --git a/drivers/staging/uwb/i1480/dfu/i1480-dfu.h b/drivers/staging/uwb/i1480/dfu/i1480-dfu.h
new file mode 100644
index 000000000000..b21d058ecc23
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/i1480-dfu.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * i1480 Device Firmware Upload
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This driver is the firmware uploader for the Intel Wireless UWB
+ * Link 1480 device (both in the USB and PCI incarnations).
+ *
+ * The process is quite simple: we stop the device, write the firmware
+ * to its memory and then restart it. Wait for the device to let us
+ * know it is done booting firmware. Ready.
+ *
+ * We might have to upload before or after a phy firmware (which might
+ * be done in two methods, using a normal firmware image or through
+ * the MPI port).
+ *
+ * Because USB and PCI use common methods, we just make ops out of the
+ * common operations (read, write, wait_init_done and cmd) and
+ * implement them in usb.c and pci.c.
+ *
+ * The flow is (some parts omitted):
+ *
+ * i1480_{usb,pci}_probe()	  On enumerate/discovery
+ *   i1480_fw_upload()
+ *     i1480_pre_fw_upload()
+ *       __mac_fw_upload()
+ *         fw_hdrs_load()
+ *         mac_fw_hdrs_push()
+ *           i1480->write()       [i1480_{usb,pci}_write()]
+ *           i1480_fw_cmp()
+ *             i1480->read()      [i1480_{usb,pci}_read()]
+ *     i1480_mac_fw_upload()
+ *       __mac_fw_upload()
+ *       i1480->setup(()
+ *       i1480->wait_init_done()
+ *       i1480_cmd_reset()
+ *         i1480->cmd()           [i1480_{usb,pci}_cmd()]
+ *         ...
+ *     i1480_phy_fw_upload()
+ *       request_firmware()
+ *       i1480_mpi_write()
+ *         i1480->cmd()           [i1480_{usb,pci}_cmd()]
+ *
+ * Once the probe function enumerates the device and uploads the
+ * firmware, we just exit with -ENODEV, as we don't really want to
+ * attach to the device.
+ */
+#ifndef __i1480_DFU_H__
+#define __i1480_DFU_H__
+
+#include <linux/types.h>
+#include <linux/completion.h>
+#include "../../include/spec.h"
+
+#define i1480_FW_UPLOAD_MODE_MASK (cpu_to_le32(0x00000018))
+
+#if i1480_FW > 0x00000302
+#define i1480_RCEB_EXTENDED
+#endif
+
+struct uwb_rccb;
+struct uwb_rceb;
+
+/*
+ * Common firmware upload handlers
+ *
+ * Normally you embed this struct in another one specific to your hw.
+ *
+ * @write	Write to device's memory from buffer.
+ * @read	Read from device's memory to i1480->evt_buf.
+ * @setup	Setup device after basic firmware is uploaded
+ * @wait_init_done
+ *              Wait for the device to send a notification saying init
+ *              is done.
+ * @cmd         FOP for issuing the command to the hardware. The
+ *              command data is contained in i1480->cmd_buf and the size
+ *              is supplied as an argument. The command replied is put
+ *              in i1480->evt_buf and the size in i1480->evt_result (or if
+ *              an error, a < 0 errno code).
+ *
+ * @cmd_buf	Memory buffer used to send commands to the device.
+ *              Allocated by the upper layers i1480_fw_upload().
+ *              Size has to be @buf_size.
+ * @evt_buf	Memory buffer used to place the async notifications
+ *              received by the hw. Allocated by the upper layers
+ *              i1480_fw_upload().
+ *              Size has to be @buf_size.
+ * @cmd_complete
+ *              Low level driver uses this to notify code waiting afor
+ *              an event that the event has arrived and data is in
+ *              i1480->evt_buf (and size/result in i1480->evt_result).
+ * @hw_rev
+ *              Use this value to activate dfu code to support new revisions
+ *              of hardware.  i1480_init() sets this to a default value.
+ *              It should be updated by the USB and PCI code.
+ */
+struct i1480 {
+	struct device *dev;
+
+	int (*write)(struct i1480 *, u32 addr, const void *, size_t);
+	int (*read)(struct i1480 *, u32 addr, size_t);
+	int (*rc_setup)(struct i1480 *);
+	void (*rc_release)(struct i1480 *);
+	int (*wait_init_done)(struct i1480 *);
+	int (*cmd)(struct i1480 *, const char *cmd_name, size_t cmd_size);
+	const char *pre_fw_name;
+	const char *mac_fw_name;
+	const char *mac_fw_name_deprecate;	/* FIXME: Will go away */
+	const char *phy_fw_name;
+	u8 hw_rev;
+
+	size_t buf_size;	/* size of both evt_buf and cmd_buf */
+	void *evt_buf, *cmd_buf;
+	ssize_t evt_result;
+	struct completion evt_complete;
+};
+
+static inline
+void i1480_init(struct i1480 *i1480)
+{
+	i1480->hw_rev = 1;
+	init_completion(&i1480->evt_complete);
+}
+
+extern int i1480_fw_upload(struct i1480 *);
+extern int i1480_pre_fw_upload(struct i1480 *);
+extern int i1480_mac_fw_upload(struct i1480 *);
+extern int i1480_phy_fw_upload(struct i1480 *);
+extern ssize_t i1480_cmd(struct i1480 *, const char *, size_t, size_t);
+extern int i1480_rceb_check(const struct i1480 *,
+			    const struct uwb_rceb *, const char *, u8,
+			    u8, unsigned);
+
+enum {
+	/* Vendor specific command type */
+	i1480_CET_VS1 = 		0xfd,
+	/* i1480 commands */
+	i1480_CMD_SET_IP_MAS = 		0x000e,
+	i1480_CMD_GET_MAC_PHY_INFO = 	0x0003,
+	i1480_CMD_MPI_WRITE =		0x000f,
+	i1480_CMD_MPI_READ = 		0x0010,
+	/* i1480 events */
+#if i1480_FW > 0x00000302
+	i1480_EVT_CONFIRM = 		0x0002,
+	i1480_EVT_RM_INIT_DONE = 	0x0101,
+	i1480_EVT_DEV_ADD = 		0x0103,
+	i1480_EVT_DEV_RM = 		0x0104,
+	i1480_EVT_DEV_ID_CHANGE = 	0x0105,
+	i1480_EVT_GET_MAC_PHY_INFO =	i1480_CMD_GET_MAC_PHY_INFO,
+#else
+	i1480_EVT_CONFIRM = 		0x0002,
+	i1480_EVT_RM_INIT_DONE = 	0x0101,
+	i1480_EVT_DEV_ADD = 		0x0103,
+	i1480_EVT_DEV_RM = 		0x0104,
+	i1480_EVT_DEV_ID_CHANGE = 	0x0105,
+	i1480_EVT_GET_MAC_PHY_INFO =	i1480_EVT_CONFIRM,
+#endif
+};
+
+
+struct i1480_evt_confirm {
+	struct uwb_rceb rceb;
+#ifdef i1480_RCEB_EXTENDED
+	__le16 wParamLength;
+#endif
+	u8 bResultCode;
+} __attribute__((packed));
+
+
+struct i1480_rceb {
+	struct uwb_rceb rceb;
+#ifdef i1480_RCEB_EXTENDED
+	__le16 wParamLength;
+#endif
+} __attribute__((packed));
+
+
+/**
+ * Get MAC & PHY Information confirm event structure
+ *
+ * Confirm event returned by the command.
+ */
+struct i1480_evt_confirm_GMPI {
+#if i1480_FW > 0x00000302
+	struct uwb_rceb rceb;
+	__le16 wParamLength;
+	__le16 status;
+	u8 mac_addr[6];		/* EUI-64 bit IEEE address [still 8 bytes?] */
+	u8 dev_addr[2];
+	__le16 mac_fw_rev;	/* major = v >> 8; minor = v & 0xff */
+	u8 hw_rev;
+	u8 phy_vendor;
+	u8 phy_rev;		/* major v = >> 8; minor = v & 0xff */
+	__le16 mac_caps;
+	u8 phy_caps[3];
+	u8 key_stores;
+	__le16 mcast_addr_stores;
+	u8 sec_mode_supported;
+#else
+	struct uwb_rceb rceb;
+	u8 status;
+	u8 mac_addr[8];         /* EUI-64 bit IEEE address [still 8 bytes?] */
+	u8 dev_addr[2];
+	__le16 mac_fw_rev;      /* major = v >> 8; minor = v & 0xff */
+	__le16 phy_fw_rev;      /* major v = >> 8; minor = v & 0xff */
+	__le16 mac_caps;
+	u8 phy_caps;
+	u8 key_stores;
+	__le16 mcast_addr_stores;
+	u8 sec_mode_supported;
+#endif
+} __attribute__((packed));
+
+
+struct i1480_cmd_mpi_write {
+	struct uwb_rccb rccb;
+	__le16 size;
+	u8 data[];
+};
+
+
+struct i1480_cmd_mpi_read {
+	struct uwb_rccb rccb;
+	__le16 size;
+	struct {
+		u8 page, offset;
+	} __attribute__((packed)) data[];
+} __attribute__((packed));
+
+
+struct i1480_evt_mpi_read {
+	struct uwb_rceb rceb;
+#ifdef i1480_RCEB_EXTENDED
+	__le16 wParamLength;
+#endif
+	u8 bResultCode;
+	__le16 size;
+	struct {
+		u8 page, offset, value;
+	} __attribute__((packed)) data[];
+} __attribute__((packed));
+
+
+#endif /* #ifndef __i1480_DFU_H__ */
diff --git a/drivers/staging/uwb/i1480/dfu/mac.c b/drivers/staging/uwb/i1480/dfu/mac.c
new file mode 100644
index 000000000000..6e4d6c9cecf5
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/mac.c
@@ -0,0 +1,496 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless UWB Link 1480
+ * MAC Firmware upload implementation
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Implementation of the code for parsing the firmware file (extract
+ * the headers and binary code chunks) in the fw_*() functions. The
+ * code to upload pre and mac firmwares is the same, so it uses a
+ * common entry point in __mac_fw_upload(), which uses the i1480
+ * function pointers to push the firmware to the device.
+ */
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include "../../uwb.h"
+#include "i1480-dfu.h"
+
+/*
+ * Descriptor for a continuous segment of MAC fw data
+ */
+struct fw_hdr {
+	unsigned long address;
+	size_t length;
+	const u32 *bin;
+	struct fw_hdr *next;
+};
+
+
+/* Free a chain of firmware headers */
+static
+void fw_hdrs_free(struct fw_hdr *hdr)
+{
+	struct fw_hdr *next;
+
+	while (hdr) {
+		next = hdr->next;
+		kfree(hdr);
+		hdr = next;
+	}
+}
+
+
+/* Fill a firmware header descriptor from a memory buffer */
+static
+int fw_hdr_load(struct i1480 *i1480, struct fw_hdr *hdr, unsigned hdr_cnt,
+		const char *_data, const u32 *data_itr, const u32 *data_top)
+{
+	size_t hdr_offset =  (const char *) data_itr - _data;
+	size_t remaining_size = (void *) data_top - (void *) data_itr;
+	if (data_itr + 2 > data_top) {
+		dev_err(i1480->dev, "fw hdr #%u/%zu: EOF reached in header at "
+		       "offset %zu, limit %zu\n",
+		       hdr_cnt, hdr_offset,
+		       (const char *) data_itr + 2 - _data,
+		       (const char *) data_top - _data);
+		return -EINVAL;
+	}
+	hdr->next = NULL;
+	hdr->address = le32_to_cpu(*data_itr++);
+	hdr->length = le32_to_cpu(*data_itr++);
+	hdr->bin = data_itr;
+	if (hdr->length > remaining_size) {
+		dev_err(i1480->dev, "fw hdr #%u/%zu: EOF reached in data; "
+		       "chunk too long (%zu bytes), only %zu left\n",
+		       hdr_cnt, hdr_offset, hdr->length, remaining_size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
+/**
+ * Get a buffer where the firmware is supposed to be and create a
+ * chain of headers linking them together.
+ *
+ * @phdr: where to place the pointer to the first header (headers link
+ *        to the next via the @hdr->next ptr); need to free the whole
+ *        chain when done.
+ *
+ * @_data: Pointer to the data buffer.
+ *
+ * @_data_size: Size of the data buffer (bytes); data size has to be a
+ *              multiple of 4. Function will fail if not.
+ *
+ * Goes over the whole binary blob; reads the first chunk and creates
+ * a fw hdr from it (which points to where the data is in @_data and
+ * the length of the chunk); then goes on to the next chunk until
+ * done. Each header is linked to the next.
+ */
+static
+int fw_hdrs_load(struct i1480 *i1480, struct fw_hdr **phdr,
+		 const char *_data, size_t data_size)
+{
+	int result;
+	unsigned hdr_cnt = 0;
+	u32 *data = (u32 *) _data, *data_itr, *data_top;
+	struct fw_hdr *hdr, **prev_hdr = phdr;
+
+	result = -EINVAL;
+	/* Check size is ok and pointer is aligned */
+	if (data_size % sizeof(u32) != 0)
+		goto error;
+	if ((unsigned long) _data % sizeof(u16) != 0)
+		goto error;
+	*phdr = NULL;
+	data_itr = data;
+	data_top = (u32 *) (_data + data_size);
+	while (data_itr < data_top) {
+		result = -ENOMEM;
+		hdr = kmalloc(sizeof(*hdr), GFP_KERNEL);
+		if (hdr == NULL) {
+			dev_err(i1480->dev, "Cannot allocate fw header "
+			       "for chunk #%u\n", hdr_cnt);
+			goto error_alloc;
+		}
+		result = fw_hdr_load(i1480, hdr, hdr_cnt,
+				     _data, data_itr, data_top);
+		if (result < 0)
+			goto error_load;
+		data_itr += 2 + hdr->length;
+		*prev_hdr = hdr;
+		prev_hdr = &hdr->next;
+		hdr_cnt++;
+	};
+	*prev_hdr = NULL;
+	return 0;
+
+error_load:
+	kfree(hdr);
+error_alloc:
+	fw_hdrs_free(*phdr);
+error:
+	return result;
+}
+
+
+/**
+ * Compares a chunk of fw with one in the devices's memory
+ *
+ * @i1480:     Device instance
+ * @hdr:     Pointer to the firmware chunk
+ * @returns: 0 if equal, < 0 errno on error. If > 0, it is the offset
+ *           where the difference was found (plus one).
+ *
+ * Kind of dirty and simplistic, but does the trick in both the PCI
+ * and USB version. We do a quick[er] memcmp(), and if it fails, we do
+ * a byte-by-byte to find the offset.
+ */
+static
+ssize_t i1480_fw_cmp(struct i1480 *i1480, struct fw_hdr *hdr)
+{
+	ssize_t result = 0;
+	u32 src_itr = 0, cnt;
+	size_t size = hdr->length*sizeof(hdr->bin[0]);
+	size_t chunk_size;
+	u8 *bin = (u8 *) hdr->bin;
+
+	while (size > 0) {
+		chunk_size = size < i1480->buf_size ? size : i1480->buf_size;
+		result = i1480->read(i1480, hdr->address + src_itr, chunk_size);
+		if (result < 0) {
+			dev_err(i1480->dev, "error reading for verification: "
+				"%zd\n", result);
+			goto error;
+		}
+		if (memcmp(i1480->cmd_buf, bin + src_itr, result)) {
+			u8 *buf = i1480->cmd_buf;
+			for (cnt = 0; cnt < result; cnt++)
+				if (bin[src_itr + cnt] != buf[cnt]) {
+					dev_err(i1480->dev, "byte failed at "
+						"src_itr %u cnt %u [0x%02x "
+						"vs 0x%02x]\n", src_itr, cnt,
+						bin[src_itr + cnt], buf[cnt]);
+					result = src_itr + cnt + 1;
+					goto cmp_failed;
+				}
+		}
+		src_itr += result;
+		size -= result;
+	}
+	result = 0;
+error:
+cmp_failed:
+	return result;
+}
+
+
+/**
+ * Writes firmware headers to the device.
+ *
+ * @prd:     PRD instance
+ * @hdr:     Processed firmware
+ * @returns: 0 if ok, < 0 errno on error.
+ */
+static
+int mac_fw_hdrs_push(struct i1480 *i1480, struct fw_hdr *hdr,
+		     const char *fw_name, const char *fw_tag)
+{
+	struct device *dev = i1480->dev;
+	ssize_t result = 0;
+	struct fw_hdr *hdr_itr;
+	int verif_retry_count;
+
+	/* Now, header by header, push them to the hw */
+	for (hdr_itr = hdr; hdr_itr != NULL; hdr_itr = hdr_itr->next) {
+		verif_retry_count = 0;
+retry:
+		dev_dbg(dev, "fw chunk (%zu @ 0x%08lx)\n",
+			hdr_itr->length * sizeof(hdr_itr->bin[0]),
+			hdr_itr->address);
+		result = i1480->write(i1480, hdr_itr->address, hdr_itr->bin,
+				    hdr_itr->length*sizeof(hdr_itr->bin[0]));
+		if (result < 0) {
+			dev_err(dev, "%s fw '%s': write failed (%zuB @ 0x%lx):"
+				" %zd\n", fw_tag, fw_name,
+				hdr_itr->length * sizeof(hdr_itr->bin[0]),
+				hdr_itr->address, result);
+			break;
+		}
+		result = i1480_fw_cmp(i1480, hdr_itr);
+		if (result < 0) {
+			dev_err(dev, "%s fw '%s': verification read "
+				"failed (%zuB @ 0x%lx): %zd\n",
+				fw_tag, fw_name,
+				hdr_itr->length * sizeof(hdr_itr->bin[0]),
+				hdr_itr->address, result);
+			break;
+		}
+		if (result > 0) {	/* Offset where it failed + 1 */
+			result--;
+			dev_err(dev, "%s fw '%s': WARNING: verification "
+				"failed at 0x%lx: retrying\n",
+				fw_tag, fw_name, hdr_itr->address + result);
+			if (++verif_retry_count < 3)
+				goto retry;	/* write this block again! */
+			dev_err(dev, "%s fw '%s': verification failed at 0x%lx: "
+				"tried %d times\n", fw_tag, fw_name,
+				hdr_itr->address + result, verif_retry_count);
+			result = -EINVAL;
+			break;
+		}
+	}
+	return result;
+}
+
+
+/** Puts the device in firmware upload mode.*/
+static
+int mac_fw_upload_enable(struct i1480 *i1480)
+{
+	int result;
+	u32 reg = 0x800000c0;
+	u32 *buffer = (u32 *)i1480->cmd_buf;
+
+	if (i1480->hw_rev > 1)
+		reg = 0x8000d0d4;
+	result = i1480->read(i1480, reg, sizeof(u32));
+	if (result < 0)
+		goto error_cmd;
+	*buffer &= ~i1480_FW_UPLOAD_MODE_MASK;
+	result = i1480->write(i1480, reg, buffer, sizeof(u32));
+	if (result < 0)
+		goto error_cmd;
+	return 0;
+error_cmd:
+	dev_err(i1480->dev, "can't enable fw upload mode: %d\n", result);
+	return result;
+}
+
+
+/** Gets the device out of firmware upload mode. */
+static
+int mac_fw_upload_disable(struct i1480 *i1480)
+{
+	int result;
+	u32 reg = 0x800000c0;
+	u32 *buffer = (u32 *)i1480->cmd_buf;
+
+	if (i1480->hw_rev > 1)
+		reg = 0x8000d0d4;
+	result = i1480->read(i1480, reg, sizeof(u32));
+	if (result < 0)
+		goto error_cmd;
+	*buffer |= i1480_FW_UPLOAD_MODE_MASK;
+	result = i1480->write(i1480, reg, buffer, sizeof(u32));
+	if (result < 0)
+		goto error_cmd;
+	return 0;
+error_cmd:
+	dev_err(i1480->dev, "can't disable fw upload mode: %d\n", result);
+	return result;
+}
+
+
+
+/**
+ * Generic function for uploading a MAC firmware.
+ *
+ * @i1480:     Device instance
+ * @fw_name: Name of firmware file to upload.
+ * @fw_tag:  Name of the firmware type (for messages)
+ *           [eg: MAC, PRE]
+ * @do_wait: Wait for device to emit initialization done message (0
+ *           for PRE fws, 1 for MAC fws).
+ * @returns: 0 if ok, < 0 errno on error.
+ */
+static
+int __mac_fw_upload(struct i1480 *i1480, const char *fw_name,
+		    const char *fw_tag)
+{
+	int result;
+	const struct firmware *fw;
+	struct fw_hdr *fw_hdrs;
+
+	result = request_firmware(&fw, fw_name, i1480->dev);
+	if (result < 0)	/* Up to caller to complain on -ENOENT */
+		goto out;
+	result = fw_hdrs_load(i1480, &fw_hdrs, fw->data, fw->size);
+	if (result < 0) {
+		dev_err(i1480->dev, "%s fw '%s': failed to parse firmware "
+			"file: %d\n", fw_tag, fw_name, result);
+		goto out_release;
+	}
+	result = mac_fw_upload_enable(i1480);
+	if (result < 0)
+		goto out_hdrs_release;
+	result = mac_fw_hdrs_push(i1480, fw_hdrs, fw_name, fw_tag);
+	mac_fw_upload_disable(i1480);
+out_hdrs_release:
+	if (result >= 0)
+		dev_info(i1480->dev, "%s fw '%s': uploaded\n", fw_tag, fw_name);
+	else
+		dev_err(i1480->dev, "%s fw '%s': failed to upload (%d), "
+			"power cycle device\n", fw_tag, fw_name, result);
+	fw_hdrs_free(fw_hdrs);
+out_release:
+	release_firmware(fw);
+out:
+	return result;
+}
+
+
+/**
+ * Upload a pre-PHY firmware
+ *
+ */
+int i1480_pre_fw_upload(struct i1480 *i1480)
+{
+	int result;
+	result = __mac_fw_upload(i1480, i1480->pre_fw_name, "PRE");
+	if (result == 0)
+		msleep(400);
+	return result;
+}
+
+
+/**
+ * Reset a the MAC and PHY
+ *
+ * @i1480:     Device's instance
+ * @returns: 0 if ok, < 0 errno code on error
+ *
+ * We put the command on kmalloc'ed memory as some arches cannot do
+ * USB from the stack. The reply event is copied from an stage buffer,
+ * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
+ *
+ * We issue the reset to make sure the UWB controller reinits the PHY;
+ * this way we can now if the PHY init went ok.
+ */
+static
+int i1480_cmd_reset(struct i1480 *i1480)
+{
+	int result;
+	struct uwb_rccb *cmd = (void *) i1480->cmd_buf;
+	struct i1480_evt_reset {
+		struct uwb_rceb rceb;
+		u8 bResultCode;
+	} __attribute__((packed)) *reply = (void *) i1480->evt_buf;
+
+	result = -ENOMEM;
+	cmd->bCommandType = UWB_RC_CET_GENERAL;
+	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_RESET);
+	reply->rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply->rceb.wEvent = UWB_RC_CMD_RESET;
+	result = i1480_cmd(i1480, "RESET", sizeof(*cmd), sizeof(*reply));
+	if (result < 0)
+		goto out;
+	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(i1480->dev, "RESET: command execution failed: %u\n",
+			reply->bResultCode);
+		result = -EIO;
+	}
+out:
+	return result;
+
+}
+
+
+/* Wait for the MAC FW to start running */
+static
+int i1480_fw_is_running_q(struct i1480 *i1480)
+{
+	int cnt = 0;
+	int result;
+	u32 *val = (u32 *) i1480->cmd_buf;
+
+	for (cnt = 0; cnt < 10; cnt++) {
+		msleep(100);
+		result = i1480->read(i1480, 0x80080000, 4);
+		if (result < 0) {
+			dev_err(i1480->dev, "Can't read 0x8008000: %d\n", result);
+			goto out;
+		}
+		if (*val == 0x55555555UL)	/* fw running? cool */
+			goto out;
+	}
+	dev_err(i1480->dev, "Timed out waiting for fw to start\n");
+	result = -ETIMEDOUT;
+out:
+	return result;
+
+}
+
+
+/**
+ * Upload MAC firmware, wait for it to start
+ *
+ * @i1480:     Device instance
+ * @fw_name: Name of the file that contains the firmware
+ *
+ * This has to be called after the pre fw has been uploaded (if
+ * there is any).
+ */
+int i1480_mac_fw_upload(struct i1480 *i1480)
+{
+	int result = 0, deprecated_name = 0;
+	struct i1480_rceb *rcebe = (void *) i1480->evt_buf;
+
+	result = __mac_fw_upload(i1480, i1480->mac_fw_name, "MAC");
+	if (result == -ENOENT) {
+		result = __mac_fw_upload(i1480, i1480->mac_fw_name_deprecate,
+					 "MAC");
+		deprecated_name = 1;
+	}
+	if (result < 0)
+		return result;
+	if (deprecated_name == 1)
+		dev_warn(i1480->dev,
+			 "WARNING: firmware file name %s is deprecated, "
+			 "please rename to %s\n",
+			 i1480->mac_fw_name_deprecate, i1480->mac_fw_name);
+	result = i1480_fw_is_running_q(i1480);
+	if (result < 0)
+		goto error_fw_not_running;
+	result = i1480->rc_setup ? i1480->rc_setup(i1480) : 0;
+	if (result < 0) {
+		dev_err(i1480->dev, "Cannot setup after MAC fw upload: %d\n",
+			result);
+		goto error_setup;
+	}
+	result = i1480->wait_init_done(i1480);	/* wait init'on */
+	if (result < 0) {
+		dev_err(i1480->dev, "MAC fw '%s': Initialization timed out "
+			"(%d)\n", i1480->mac_fw_name, result);
+		goto error_init_timeout;
+	}
+	/* verify we got the right initialization done event */
+	if (i1480->evt_result != sizeof(*rcebe)) {
+		dev_err(i1480->dev, "MAC fw '%s': initialization event returns "
+			"wrong size (%zu bytes vs %zu needed)\n",
+			i1480->mac_fw_name, i1480->evt_result, sizeof(*rcebe));
+		goto error_size;
+	}
+	result = -EIO;
+	if (i1480_rceb_check(i1480, &rcebe->rceb, NULL, 0, i1480_CET_VS1,
+			     i1480_EVT_RM_INIT_DONE) < 0) {
+		dev_err(i1480->dev, "wrong initialization event 0x%02x/%04x/%02x "
+			"received; expected 0x%02x/%04x/00\n",
+			rcebe->rceb.bEventType, le16_to_cpu(rcebe->rceb.wEvent),
+			rcebe->rceb.bEventContext, i1480_CET_VS1,
+			i1480_EVT_RM_INIT_DONE);
+		goto error_init_timeout;
+	}
+	result = i1480_cmd_reset(i1480);
+	if (result < 0)
+		dev_err(i1480->dev, "MAC fw '%s': MBOA reset failed (%d)\n",
+			i1480->mac_fw_name, result);
+error_fw_not_running:
+error_init_timeout:
+error_size:
+error_setup:
+	return result;
+}
diff --git a/drivers/staging/uwb/i1480/dfu/phy.c b/drivers/staging/uwb/i1480/dfu/phy.c
new file mode 100644
index 000000000000..13512c7dda0b
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/phy.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless UWB Link 1480
+ * PHY parameters upload
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Code for uploading the PHY parameters to the PHY through the UWB
+ * Radio Control interface.
+ *
+ * We just send the data through the MPI interface using HWA-like
+ * commands and then reset the PHY to make sure it is ok.
+ */
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include "../../../wusbcore/include/wusb.h"
+#include "i1480-dfu.h"
+
+
+/**
+ * Write a value array to an address of the MPI interface
+ *
+ * @i1480:	Device descriptor
+ * @data:	Data array to write
+ * @size:	Size of the data array
+ * @returns:	0 if ok, < 0 errno code on error.
+ *
+ * The data array is organized into pairs:
+ *
+ * ADDRESS VALUE
+ *
+ * ADDRESS is BE 16 bit unsigned, VALUE 8 bit unsigned. Size thus has
+ * to be a multiple of three.
+ */
+static
+int i1480_mpi_write(struct i1480 *i1480, const void *data, size_t size)
+{
+	int result;
+	struct i1480_cmd_mpi_write *cmd = i1480->cmd_buf;
+	struct i1480_evt_confirm *reply = i1480->evt_buf;
+
+	BUG_ON(size > 480);
+	result = -ENOMEM;
+	cmd->rccb.bCommandType = i1480_CET_VS1;
+	cmd->rccb.wCommand = cpu_to_le16(i1480_CMD_MPI_WRITE);
+	cmd->size = cpu_to_le16(size);
+	memcpy(cmd->data, data, size);
+	reply->rceb.bEventType = i1480_CET_VS1;
+	reply->rceb.wEvent = i1480_CMD_MPI_WRITE;
+	result = i1480_cmd(i1480, "MPI-WRITE", sizeof(*cmd) + size, sizeof(*reply));
+	if (result < 0)
+		goto out;
+	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(i1480->dev, "MPI-WRITE: command execution failed: %d\n",
+			reply->bResultCode);
+		result = -EIO;
+	}
+out:
+	return result;
+}
+
+
+/**
+ * Read a value array to from an address of the MPI interface
+ *
+ * @i1480:	Device descriptor
+ * @data:	where to place the read array
+ * @srcaddr:	Where to read from
+ * @size:	Size of the data read array
+ * @returns:	0 if ok, < 0 errno code on error.
+ *
+ * The command data array is organized into pairs ADDR0 ADDR1..., and
+ * the returned data in ADDR0 VALUE0 ADDR1 VALUE1...
+ *
+ * We generate the command array to be a sequential read and then
+ * rearrange the result.
+ *
+ * We use the i1480->cmd_buf for the command, i1480->evt_buf for the reply.
+ *
+ * As the reply has to fit in 512 bytes (i1480->evt_buffer), the max amount
+ * of values we can read is (512 - sizeof(*reply)) / 3
+ */
+static
+int i1480_mpi_read(struct i1480 *i1480, u8 *data, u16 srcaddr, size_t size)
+{
+	int result;
+	struct i1480_cmd_mpi_read *cmd = i1480->cmd_buf;
+	struct i1480_evt_mpi_read *reply = i1480->evt_buf;
+	unsigned cnt;
+
+	memset(i1480->cmd_buf, 0x69, 512);
+	memset(i1480->evt_buf, 0x69, 512);
+
+	BUG_ON(size > (i1480->buf_size - sizeof(*reply)) / 3);
+	result = -ENOMEM;
+	cmd->rccb.bCommandType = i1480_CET_VS1;
+	cmd->rccb.wCommand = cpu_to_le16(i1480_CMD_MPI_READ);
+	cmd->size = cpu_to_le16(3*size);
+	for (cnt = 0; cnt < size; cnt++) {
+		cmd->data[cnt].page = (srcaddr + cnt) >> 8;
+		cmd->data[cnt].offset = (srcaddr + cnt) & 0xff;
+	}
+	reply->rceb.bEventType = i1480_CET_VS1;
+	reply->rceb.wEvent = i1480_CMD_MPI_READ;
+	result = i1480_cmd(i1480, "MPI-READ", sizeof(*cmd) + 2*size,
+			sizeof(*reply) + 3*size);
+	if (result < 0)
+		goto out;
+	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(i1480->dev, "MPI-READ: command execution failed: %d\n",
+			reply->bResultCode);
+		result = -EIO;
+		goto out;
+	}
+	for (cnt = 0; cnt < size; cnt++) {
+		if (reply->data[cnt].page != (srcaddr + cnt) >> 8)
+			dev_err(i1480->dev, "MPI-READ: page inconsistency at "
+				"index %u: expected 0x%02x, got 0x%02x\n", cnt,
+				(srcaddr + cnt) >> 8, reply->data[cnt].page);
+		if (reply->data[cnt].offset != ((srcaddr + cnt) & 0x00ff))
+			dev_err(i1480->dev, "MPI-READ: offset inconsistency at "
+				"index %u: expected 0x%02x, got 0x%02x\n", cnt,
+				(srcaddr + cnt) & 0x00ff,
+				reply->data[cnt].offset);
+		data[cnt] = reply->data[cnt].value;
+	}
+	result = 0;
+out:
+	return result;
+}
+
+
+/**
+ * Upload a PHY firmware, wait for it to start
+ *
+ * @i1480:     Device instance
+ * @fw_name: Name of the file that contains the firmware
+ *
+ * We assume the MAC fw is up and running. This means we can use the
+ * MPI interface to write the PHY firmware. Once done, we issue an
+ * MBOA Reset, which will force the MAC to reset and reinitialize the
+ * PHY. If that works, we are ready to go.
+ *
+ * Max packet size for the MPI write is 512, so the max buffer is 480
+ * (which gives us 160 byte triads of MSB, LSB and VAL for the data).
+ */
+int i1480_phy_fw_upload(struct i1480 *i1480)
+{
+	int result;
+	const struct firmware *fw;
+	const char *data_itr, *data_top;
+	const size_t MAX_BLK_SIZE = 480;	/* 160 triads */
+	size_t data_size;
+	u8 phy_stat;
+
+	result = request_firmware(&fw, i1480->phy_fw_name, i1480->dev);
+	if (result < 0)
+		goto out;
+	/* Loop writing data in chunks as big as possible until done. */
+	for (data_itr = fw->data, data_top = data_itr + fw->size;
+	     data_itr < data_top; data_itr += MAX_BLK_SIZE) {
+		data_size = min(MAX_BLK_SIZE, (size_t) (data_top - data_itr));
+		result = i1480_mpi_write(i1480, data_itr, data_size);
+		if (result < 0)
+			goto error_mpi_write;
+	}
+	/* Read MPI page 0, offset 6; if 0, PHY was initialized correctly. */
+	result = i1480_mpi_read(i1480, &phy_stat, 0x0006, 1);
+	if (result < 0) {
+		dev_err(i1480->dev, "PHY: can't get status: %d\n", result);
+		goto error_mpi_status;
+	}
+	if (phy_stat != 0) {
+		result = -ENODEV;
+		dev_info(i1480->dev, "error, PHY not ready: %u\n", phy_stat);
+		goto error_phy_status;
+	}
+	dev_info(i1480->dev, "PHY fw '%s': uploaded\n", i1480->phy_fw_name);
+error_phy_status:
+error_mpi_status:
+error_mpi_write:
+	release_firmware(fw);
+	if (result < 0)
+		dev_err(i1480->dev, "PHY fw '%s': failed to upload (%d), "
+			"power cycle device\n", i1480->phy_fw_name, result);
+out:
+	return result;
+}
diff --git a/drivers/staging/uwb/i1480/dfu/usb.c b/drivers/staging/uwb/i1480/dfu/usb.c
new file mode 100644
index 000000000000..d41086bdd783
--- /dev/null
+++ b/drivers/staging/uwb/i1480/dfu/usb.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless UWB Link 1480
+ * USB SKU firmware upload implementation
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This driver will prepare the i1480 device to behave as a real
+ * Wireless USB HWA adaptor by uploading the firmware.
+ *
+ * When the device is connected or driver is loaded, i1480_usb_probe()
+ * is called--this will allocate and initialize the device structure,
+ * fill in the pointers to the common functions (read, write,
+ * wait_init_done and cmd for HWA command execution) and once that is
+ * done, call the common firmware uploading routine. Then clean up and
+ * return -ENODEV, as we don't attach to the device.
+ *
+ * The rest are the basic ops we implement that the fw upload code
+ * uses to do its job. All the ops in the common code are i1480->NAME,
+ * the functions are i1480_usb_NAME().
+ */
+#include <linux/module.h>
+#include <linux/usb.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include "../../uwb.h"
+#include "../../../wusbcore/include/wusb.h"
+#include "../../../wusbcore/include/wusb-wa.h"
+#include "i1480-dfu.h"
+
+struct i1480_usb {
+	struct i1480 i1480;
+	struct usb_device *usb_dev;
+	struct usb_interface *usb_iface;
+	struct urb *neep_urb;	/* URB for reading from EP1 */
+};
+
+
+static
+void i1480_usb_init(struct i1480_usb *i1480_usb)
+{
+	i1480_init(&i1480_usb->i1480);
+}
+
+
+static
+int i1480_usb_create(struct i1480_usb *i1480_usb, struct usb_interface *iface)
+{
+	struct usb_device *usb_dev = interface_to_usbdev(iface);
+	int result = -ENOMEM;
+
+	i1480_usb->usb_dev = usb_get_dev(usb_dev);	/* bind the USB device */
+	i1480_usb->usb_iface = usb_get_intf(iface);
+	usb_set_intfdata(iface, i1480_usb);		/* Bind the driver to iface0 */
+	i1480_usb->neep_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (i1480_usb->neep_urb == NULL)
+		goto error;
+	return 0;
+
+error:
+	usb_set_intfdata(iface, NULL);
+	usb_put_intf(iface);
+	usb_put_dev(usb_dev);
+	return result;
+}
+
+
+static
+void i1480_usb_destroy(struct i1480_usb *i1480_usb)
+{
+	usb_kill_urb(i1480_usb->neep_urb);
+	usb_free_urb(i1480_usb->neep_urb);
+	usb_set_intfdata(i1480_usb->usb_iface, NULL);
+	usb_put_intf(i1480_usb->usb_iface);
+	usb_put_dev(i1480_usb->usb_dev);
+}
+
+
+/**
+ * Write a buffer to a memory address in the i1480 device
+ *
+ * @i1480:  i1480 instance
+ * @memory_address:
+ *          Address where to write the data buffer to.
+ * @buffer: Buffer to the data
+ * @size:   Size of the buffer [has to be < 512].
+ * @returns: 0 if ok, < 0 errno code on error.
+ *
+ * Data buffers to USB cannot be on the stack or in vmalloc'ed areas,
+ * so we copy it to the local i1480 buffer before proceeding. In any
+ * case, we have a max size we can send.
+ */
+static
+int i1480_usb_write(struct i1480 *i1480, u32 memory_address,
+		    const void *buffer, size_t size)
+{
+	int result = 0;
+	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
+	size_t buffer_size, itr = 0;
+
+	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
+	while (size > 0) {
+		buffer_size = size < i1480->buf_size ? size : i1480->buf_size;
+		memcpy(i1480->cmd_buf, buffer + itr, buffer_size);
+		result = usb_control_msg(
+			i1480_usb->usb_dev, usb_sndctrlpipe(i1480_usb->usb_dev, 0),
+			0xf0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			memory_address,	(memory_address >> 16),
+			i1480->cmd_buf, buffer_size, 100 /* FIXME: arbitrary */);
+		if (result < 0)
+			break;
+		itr += result;
+		memory_address += result;
+		size -= result;
+	}
+	return result;
+}
+
+
+/**
+ * Read a block [max size 512] of the device's memory to @i1480's buffer.
+ *
+ * @i1480: i1480 instance
+ * @memory_address:
+ *         Address where to read from.
+ * @size:  Size to read. Smaller than or equal to 512.
+ * @returns: >= 0 number of bytes written if ok, < 0 errno code on error.
+ *
+ * NOTE: if the memory address or block is incorrect, you might get a
+ *       stall or a different memory read. Caller has to verify the
+ *       memory address and size passed back in the @neh structure.
+ */
+static
+int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size)
+{
+	ssize_t result = 0, bytes = 0;
+	size_t itr, read_size = i1480->buf_size;
+	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
+
+	BUG_ON(size > i1480->buf_size);
+	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
+	BUG_ON(read_size > 512);
+
+	if (addr >= 0x8000d200 && addr < 0x8000d400)	/* Yeah, HW quirk */
+		read_size = 4;
+
+	for (itr = 0; itr < size; itr += read_size) {
+		size_t itr_addr = addr + itr;
+		size_t itr_size = min(read_size, size - itr);
+		result = usb_control_msg(
+			i1480_usb->usb_dev, usb_rcvctrlpipe(i1480_usb->usb_dev, 0),
+			0xf0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+			itr_addr, (itr_addr >> 16),
+			i1480->cmd_buf + itr, itr_size,
+			100 /* FIXME: arbitrary */);
+		if (result < 0) {
+			dev_err(i1480->dev, "%s: USB read error: %zd\n",
+				__func__, result);
+			goto out;
+		}
+		if (result != itr_size) {
+			result = -EIO;
+			dev_err(i1480->dev,
+				"%s: partial read got only %zu bytes vs %zu expected\n",
+				__func__, result, itr_size);
+			goto out;
+		}
+		bytes += result;
+	}
+	result = bytes;
+out:
+	return result;
+}
+
+
+/**
+ * Callback for reads on the notification/event endpoint
+ *
+ * Just enables the completion read handler.
+ */
+static
+void i1480_usb_neep_cb(struct urb *urb)
+{
+	struct i1480 *i1480 = urb->context;
+	struct device *dev = i1480->dev;
+
+	switch (urb->status) {
+	case 0:
+		break;
+	case -ECONNRESET:	/* Not an error, but a controlled situation; */
+	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
+		dev_dbg(dev, "NEEP: reset/noent %d\n", urb->status);
+		break;
+	case -ESHUTDOWN:	/* going away! */
+		dev_dbg(dev, "NEEP: down %d\n", urb->status);
+		break;
+	default:
+		dev_err(dev, "NEEP: unknown status %d\n", urb->status);
+		break;
+	}
+	i1480->evt_result = urb->actual_length;
+	complete(&i1480->evt_complete);
+	return;
+}
+
+
+/**
+ * Wait for the MAC FW to initialize
+ *
+ * MAC FW sends a 0xfd/0101/00 notification to EP1 when done
+ * initializing. Get that notification into i1480->evt_buf; upper layer
+ * will verify it.
+ *
+ * Set i1480->evt_result with the result of getting the event or its
+ * size (if successful).
+ *
+ * Delivers the data directly to i1480->evt_buf
+ */
+static
+int i1480_usb_wait_init_done(struct i1480 *i1480)
+{
+	int result;
+	struct device *dev = i1480->dev;
+	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
+	struct usb_endpoint_descriptor *epd;
+
+	init_completion(&i1480->evt_complete);
+	i1480->evt_result = -EINPROGRESS;
+	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
+	usb_fill_int_urb(i1480_usb->neep_urb, i1480_usb->usb_dev,
+			 usb_rcvintpipe(i1480_usb->usb_dev, epd->bEndpointAddress),
+			 i1480->evt_buf, i1480->buf_size,
+			 i1480_usb_neep_cb, i1480, epd->bInterval);
+	result = usb_submit_urb(i1480_usb->neep_urb, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "init done: cannot submit NEEP read: %d\n",
+			result);
+		goto error_submit;
+	}
+	/* Wait for the USB callback to get the data */
+	result = wait_for_completion_interruptible_timeout(
+		&i1480->evt_complete, HZ);
+	if (result <= 0) {
+		result = result == 0 ? -ETIMEDOUT : result;
+		goto error_wait;
+	}
+	usb_kill_urb(i1480_usb->neep_urb);
+	return 0;
+
+error_wait:
+	usb_kill_urb(i1480_usb->neep_urb);
+error_submit:
+	i1480->evt_result = result;
+	return result;
+}
+
+
+/**
+ * Generic function for issuing commands to the i1480
+ *
+ * @i1480:      i1480 instance
+ * @cmd_name:   Name of the command (for error messages)
+ * @cmd:        Pointer to command buffer
+ * @cmd_size:   Size of the command buffer
+ * @reply:      Buffer for the reply event
+ * @reply_size: Expected size back (including RCEB); the reply buffer
+ *              is assumed to be as big as this.
+ * @returns:    >= 0 size of the returned event data if ok,
+ *              < 0 errno code on error.
+ *
+ * Arms the NE handle, issues the command to the device and checks the
+ * basics of the reply event.
+ */
+static
+int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size)
+{
+	int result;
+	struct device *dev = i1480->dev;
+	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
+	struct usb_endpoint_descriptor *epd;
+	struct uwb_rccb *cmd = i1480->cmd_buf;
+	u8 iface_no;
+
+	/* Post a read on the notification & event endpoint */
+	iface_no = i1480_usb->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
+	usb_fill_int_urb(
+		i1480_usb->neep_urb, i1480_usb->usb_dev,
+		usb_rcvintpipe(i1480_usb->usb_dev, epd->bEndpointAddress),
+		i1480->evt_buf, i1480->buf_size,
+		i1480_usb_neep_cb, i1480, epd->bInterval);
+	result = usb_submit_urb(i1480_usb->neep_urb, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "%s: cannot submit NEEP read: %d\n",
+			cmd_name, result);
+		goto error_submit_ep1;
+	}
+	/* Now post the command on EP0 */
+	result = usb_control_msg(
+		i1480_usb->usb_dev, usb_sndctrlpipe(i1480_usb->usb_dev, 0),
+		WA_EXEC_RC_CMD,
+		USB_DIR_OUT | USB_RECIP_INTERFACE | USB_TYPE_CLASS,
+		0, iface_no,
+		cmd, cmd_size,
+		100 /* FIXME: this is totally arbitrary */);
+	if (result < 0) {
+		dev_err(dev, "%s: control request failed: %d\n",
+			cmd_name, result);
+		goto error_submit_ep0;
+	}
+	return result;
+
+error_submit_ep0:
+	usb_kill_urb(i1480_usb->neep_urb);
+error_submit_ep1:
+	return result;
+}
+
+
+/*
+ * Probe a i1480 device for uploading firmware.
+ *
+ * We attach only to interface #0, which is the radio control interface.
+ */
+static
+int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
+{
+	struct usb_device *udev = interface_to_usbdev(iface);
+	struct i1480_usb *i1480_usb;
+	struct i1480 *i1480;
+	struct device *dev = &iface->dev;
+	int result;
+
+	result = -ENODEV;
+	if (iface->cur_altsetting->desc.bInterfaceNumber != 0) {
+		dev_dbg(dev, "not attaching to iface %d\n",
+			iface->cur_altsetting->desc.bInterfaceNumber);
+		goto error;
+	}
+	if (iface->num_altsetting > 1 &&
+			le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) {
+		/* Need altsetting #1 [HW QUIRK] or EP1 won't work */
+		result = usb_set_interface(interface_to_usbdev(iface), 0, 1);
+		if (result < 0)
+			dev_warn(dev,
+				 "can't set altsetting 1 on iface 0: %d\n",
+				 result);
+	}
+
+	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
+		return -ENODEV;
+
+	result = -ENOMEM;
+	i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL);
+	if (i1480_usb == NULL) {
+		dev_err(dev, "Unable to allocate instance\n");
+		goto error;
+	}
+	i1480_usb_init(i1480_usb);
+
+	i1480 = &i1480_usb->i1480;
+	i1480->buf_size = 512;
+	i1480->cmd_buf = kmalloc_array(2, i1480->buf_size, GFP_KERNEL);
+	if (i1480->cmd_buf == NULL) {
+		dev_err(dev, "Cannot allocate transfer buffers\n");
+		result = -ENOMEM;
+		goto error_buf_alloc;
+	}
+	i1480->evt_buf = i1480->cmd_buf + i1480->buf_size;
+
+	result = i1480_usb_create(i1480_usb, iface);
+	if (result < 0) {
+		dev_err(dev, "Cannot create instance: %d\n", result);
+		goto error_create;
+	}
+
+	/* setup the fops and upload the firmware */
+	i1480->pre_fw_name = "i1480-pre-phy-0.0.bin";
+	i1480->mac_fw_name = "i1480-usb-0.0.bin";
+	i1480->mac_fw_name_deprecate = "ptc-0.0.bin";
+	i1480->phy_fw_name = "i1480-phy-0.0.bin";
+	i1480->dev = &iface->dev;
+	i1480->write = i1480_usb_write;
+	i1480->read = i1480_usb_read;
+	i1480->rc_setup = NULL;
+	i1480->wait_init_done = i1480_usb_wait_init_done;
+	i1480->cmd = i1480_usb_cmd;
+
+	result = i1480_fw_upload(&i1480_usb->i1480);	/* the real thing */
+	if (result >= 0) {
+		usb_reset_device(i1480_usb->usb_dev);
+		result = -ENODEV;	/* we don't want to bind to the iface */
+	}
+	i1480_usb_destroy(i1480_usb);
+error_create:
+	kfree(i1480->cmd_buf);
+error_buf_alloc:
+	kfree(i1480_usb);
+error:
+	return result;
+}
+
+MODULE_FIRMWARE("i1480-pre-phy-0.0.bin");
+MODULE_FIRMWARE("i1480-usb-0.0.bin");
+MODULE_FIRMWARE("i1480-phy-0.0.bin");
+
+#define i1480_USB_DEV(v, p)				\
+{							\
+	.match_flags = USB_DEVICE_ID_MATCH_DEVICE	\
+		 | USB_DEVICE_ID_MATCH_DEV_INFO		\
+		 | USB_DEVICE_ID_MATCH_INT_INFO,	\
+	.idVendor = (v),				\
+	.idProduct = (p),				\
+	.bDeviceClass = 0xff,				\
+	.bDeviceSubClass = 0xff,			\
+	.bDeviceProtocol = 0xff,			\
+	.bInterfaceClass = 0xff,			\
+	.bInterfaceSubClass = 0xff,			\
+	.bInterfaceProtocol = 0xff,			\
+}
+
+
+/** USB device ID's that we handle */
+static const struct usb_device_id i1480_usb_id_table[] = {
+	i1480_USB_DEV(0x8086, 0xdf3b),
+	i1480_USB_DEV(0x15a9, 0x0005),
+	i1480_USB_DEV(0x07d1, 0x3802),
+	i1480_USB_DEV(0x050d, 0x305a),
+	i1480_USB_DEV(0x3495, 0x3007),
+	{},
+};
+MODULE_DEVICE_TABLE(usb, i1480_usb_id_table);
+
+
+static struct usb_driver i1480_dfu_driver = {
+	.name =		"i1480-dfu-usb",
+	.id_table =	i1480_usb_id_table,
+	.probe =	i1480_usb_probe,
+	.disconnect =	NULL,
+};
+
+module_usb_driver(i1480_dfu_driver);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Intel Wireless UWB Link 1480 firmware uploader for USB");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/uwb/i1480/i1480-est.c b/drivers/staging/uwb/i1480/i1480-est.c
new file mode 100644
index 000000000000..106e0a44b138
--- /dev/null
+++ b/drivers/staging/uwb/i1480/i1480-est.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Wireless UWB Link 1480
+ * Event Size tables for Wired Adaptors
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+#include "../uwb.h"
+#include "dfu/i1480-dfu.h"
+
+
+/** Event size table for wEvents 0x00XX */
+static struct uwb_est_entry i1480_est_fd00[] = {
+	/* Anybody expecting this response has to use
+	 * neh->extra_size to specify the real size that will
+	 * come back. */
+	[i1480_EVT_CONFIRM] = { .size = sizeof(struct i1480_evt_confirm) },
+	[i1480_CMD_SET_IP_MAS] = { .size = sizeof(struct i1480_evt_confirm) },
+#ifdef i1480_RCEB_EXTENDED
+	[0x09] = {
+		.size = sizeof(struct i1480_rceb),
+		.offset = 1 + offsetof(struct i1480_rceb, wParamLength),
+	},
+#endif
+};
+
+/** Event size table for wEvents 0x01XX */
+static struct uwb_est_entry i1480_est_fd01[] = {
+	[0xff & i1480_EVT_RM_INIT_DONE] = { .size = sizeof(struct i1480_rceb) },
+	[0xff & i1480_EVT_DEV_ADD] = { .size = sizeof(struct i1480_rceb) + 9 },
+	[0xff & i1480_EVT_DEV_RM] = { .size = sizeof(struct i1480_rceb) + 9 },
+	[0xff & i1480_EVT_DEV_ID_CHANGE] = {
+		.size = sizeof(struct i1480_rceb) + 2 },
+};
+
+static int __init i1480_est_init(void)
+{
+	int result = uwb_est_register(i1480_CET_VS1, 0x00, 0x8086, 0x0c3b,
+				      i1480_est_fd00,
+				      ARRAY_SIZE(i1480_est_fd00));
+	if (result < 0) {
+		printk(KERN_ERR "Can't register EST table fd00: %d\n", result);
+		return result;
+	}
+	result = uwb_est_register(i1480_CET_VS1, 0x01, 0x8086, 0x0c3b,
+				  i1480_est_fd01, ARRAY_SIZE(i1480_est_fd01));
+	if (result < 0) {
+		printk(KERN_ERR "Can't register EST table fd01: %d\n", result);
+		return result;
+	}
+	return 0;
+}
+module_init(i1480_est_init);
+
+static void __exit i1480_est_exit(void)
+{
+	uwb_est_unregister(i1480_CET_VS1, 0x00, 0x8086, 0x0c3b,
+			   i1480_est_fd00, ARRAY_SIZE(i1480_est_fd00));
+	uwb_est_unregister(i1480_CET_VS1, 0x01, 0x8086, 0x0c3b,
+			   i1480_est_fd01, ARRAY_SIZE(i1480_est_fd01));
+}
+module_exit(i1480_est_exit);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("i1480's Vendor Specific Event Size Tables");
+MODULE_LICENSE("GPL");
+
+/**
+ * USB device ID's that we handle
+ *
+ * [so we are loaded when this kind device is connected]
+ */
+static struct usb_device_id __used i1480_est_id_table[] = {
+	{ USB_DEVICE(0x8086, 0xdf3b), },
+	{ USB_DEVICE(0x8086, 0x0c3b), },
+	{ },
+};
+MODULE_DEVICE_TABLE(usb, i1480_est_id_table);
diff --git a/drivers/staging/uwb/ie-rcv.c b/drivers/staging/uwb/ie-rcv.c
new file mode 100644
index 000000000000..51a4706e0dd3
--- /dev/null
+++ b/drivers/staging/uwb/ie-rcv.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * IE Received notification handling.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitmap.h>
+#include "uwb-internal.h"
+
+/*
+ * Process an incoming IE Received notification.
+ */
+int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct device *dev = &evt->rc->uwb_dev.dev;
+	struct uwb_rc_evt_ie_rcv *iercv;
+
+	/* Is there enough data to decode it? */
+	if (evt->notif.size < sizeof(*iercv)) {
+		dev_err(dev, "IE Received notification: Not enough data to "
+			"decode (%zu vs %zu bytes needed)\n",
+			evt->notif.size, sizeof(*iercv));
+		goto error;
+	}
+	iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb);
+
+	dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]);
+
+	if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) {
+		dev_warn(dev, "unhandled Relinquish Request IE\n");
+	}
+
+	return 0;
+error:
+	return result;
+}
diff --git a/drivers/staging/uwb/ie.c b/drivers/staging/uwb/ie.c
new file mode 100644
index 000000000000..b11678dd0380
--- /dev/null
+++ b/drivers/staging/uwb/ie.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Information Element Handling
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Reinette Chatre <reinette.chatre@intel.com>
+ *
+ * FIXME: docs
+ */
+
+#include <linux/slab.h>
+#include <linux/export.h>
+#include "uwb-internal.h"
+
+/**
+ * uwb_ie_next - get the next IE in a buffer
+ * @ptr: start of the buffer containing the IE data
+ * @len: length of the buffer
+ *
+ * Both @ptr and @len are updated so subsequent calls to uwb_ie_next()
+ * will get the next IE.
+ *
+ * NULL is returned (and @ptr and @len will not be updated) if there
+ * are no more IEs in the buffer or the buffer is too short.
+ */
+struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len)
+{
+	struct uwb_ie_hdr *hdr;
+	size_t ie_len;
+
+	if (*len < sizeof(struct uwb_ie_hdr))
+		return NULL;
+
+	hdr = *ptr;
+	ie_len = sizeof(struct uwb_ie_hdr) + hdr->length;
+
+	if (*len < ie_len)
+		return NULL;
+
+	*ptr += ie_len;
+	*len -= ie_len;
+
+	return hdr;
+}
+EXPORT_SYMBOL_GPL(uwb_ie_next);
+
+/**
+ * uwb_ie_dump_hex - print IEs to a character buffer
+ * @ies: the IEs to print.
+ * @len: length of all the IEs.
+ * @buf: the destination buffer.
+ * @size: size of @buf.
+ *
+ * Returns the number of characters written.
+ */
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size)
+{
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+	int r = 0;
+	u8 *d;
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &len);
+		if (!ie)
+			break;
+
+		r += scnprintf(buf + r, size - r, "%02x %02x",
+			       (unsigned)ie->element_id,
+			       (unsigned)ie->length);
+		d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr);
+		while (d != ptr && r < size)
+			r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++);
+		if (r < size)
+			buf[r++] = '\n';
+	};
+
+	return r;
+}
+
+/**
+ * Get the IEs that a radio controller is sending in its beacon
+ *
+ * @uwb_rc:  UWB Radio Controller
+ * @returns: Size read from the system
+ *
+ * We don't need to lock the uwb_rc's mutex because we don't modify
+ * anything. Once done with the iedata buffer, call
+ * uwb_rc_ie_release(iedata). Don't call kfree on it.
+ */
+static
+ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
+{
+	ssize_t result;
+	struct device *dev = &uwb_rc->uwb_dev.dev;
+	struct uwb_rccb *cmd = NULL;
+	struct uwb_rceb *reply = NULL;
+	struct uwb_rc_evt_get_ie *get_ie;
+
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		return -ENOMEM;
+
+	cmd->bCommandType = UWB_RC_CET_GENERAL;
+	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE);
+	result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd),
+			     UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE,
+			     &reply);
+	kfree(cmd);
+	if (result < 0)
+		return result;
+
+	get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb);
+	if (result < sizeof(*get_ie)) {
+		dev_err(dev, "not enough data returned for decoding GET IE "
+			"(%zu bytes received vs %zu needed)\n",
+			result, sizeof(*get_ie));
+		return -EINVAL;
+	} else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) {
+		dev_err(dev, "not enough data returned for decoding GET IE "
+			"payload (%zu bytes received vs %zu needed)\n", result,
+			sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength));
+		return -EINVAL;
+	}
+
+	*pget_ie = get_ie;
+	return result;
+}
+
+
+/**
+ * Replace all IEs currently being transmitted by a device
+ *
+ * @cmd:    pointer to the SET-IE command with the IEs to set
+ * @size:   size of @buf
+ */
+int uwb_rc_set_ie(struct uwb_rc *rc, struct uwb_rc_cmd_set_ie *cmd)
+{
+	int result;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_evt_set_ie reply;
+
+	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply.rceb.wEvent = UWB_RC_CMD_SET_IE;
+	result = uwb_rc_cmd(rc, "SET-IE", &cmd->rccb,
+			    sizeof(*cmd) + le16_to_cpu(cmd->wIELength),
+			    &reply.rceb, sizeof(reply));
+	if (result < 0)
+		goto error_cmd;
+	else if (result != sizeof(reply)) {
+		dev_err(dev, "SET-IE: not enough data to decode reply "
+			"(%d bytes received vs %zu needed)\n",
+			result, sizeof(reply));
+		result = -EIO;
+	} else if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(dev, "SET-IE: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
+		result = -EIO;
+	} else
+		result = 0;
+error_cmd:
+	return result;
+}
+
+/* Cleanup the whole IE management subsystem */
+void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
+{
+	mutex_init(&uwb_rc->ies_mutex);
+}
+
+
+/**
+ * uwb_rc_ie_setup - setup a radio controller's IE manager
+ * @uwb_rc: the radio controller.
+ *
+ * The current set of IEs are obtained from the hardware with a GET-IE
+ * command (since the radio controller is not yet beaconing this will
+ * be just the hardware's MAC and PHY Capability IEs).
+ *
+ * Returns 0 on success; -ve on an error.
+ */
+int uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
+{
+	struct uwb_rc_evt_get_ie *ie_info = NULL;
+	int capacity;
+
+	capacity = uwb_rc_get_ie(uwb_rc, &ie_info);
+	if (capacity < 0)
+		return capacity;
+
+	mutex_lock(&uwb_rc->ies_mutex);
+
+	uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info;
+	uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL;
+	uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE);
+	uwb_rc->ies_capacity = capacity;
+
+	mutex_unlock(&uwb_rc->ies_mutex);
+
+	return 0;
+}
+
+
+/* Cleanup the whole IE management subsystem */
+void uwb_rc_ie_release(struct uwb_rc *uwb_rc)
+{
+	kfree(uwb_rc->ies);
+	uwb_rc->ies = NULL;
+	uwb_rc->ies_capacity = 0;
+}
+
+
+static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie)
+{
+	struct uwb_rc_cmd_set_ie *new_ies;
+	void *ptr, *prev_ie;
+	struct uwb_ie_hdr *ie;
+	size_t length, new_ie_len, new_capacity, size, prev_size;
+
+	length = le16_to_cpu(rc->ies->wIELength);
+	new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length;
+	new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len;
+
+	if (new_capacity > rc->ies_capacity) {
+		new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL);
+		if (!new_ies)
+			return -ENOMEM;
+		rc->ies = new_ies;
+	}
+
+	ptr = rc->ies->IEData;
+	size = length;
+	for (;;) {
+		prev_ie = ptr;
+		prev_size = size;
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie || ie->element_id > new_ie->element_id)
+			break;
+	}
+
+	memmove(prev_ie + new_ie_len, prev_ie, prev_size);
+	memcpy(prev_ie, new_ie, new_ie_len);
+	rc->ies->wIELength = cpu_to_le16(length + new_ie_len);
+
+	return 0;
+}
+
+/**
+ * uwb_rc_ie_add - add new IEs to the radio controller's beacon
+ * @uwb_rc: the radio controller.
+ * @ies: the buffer containing the new IE or IEs to be added to
+ *       the device's beacon.
+ * @size: length of all the IEs.
+ *
+ * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
+ * after the device sent the first beacon that includes the IEs specified
+ * in the SET IE command. We thus cannot send this command if the device is
+ * not beaconing. Instead, a SET IE command will be sent later right after
+ * we start beaconing.
+ *
+ * Setting an IE on the device will overwrite all current IEs in device. So
+ * we take the current IEs being transmitted by the device, insert the
+ * new one, and call SET IE with all the IEs needed.
+ *
+ * Returns 0 on success; or -ENOMEM.
+ */
+int uwb_rc_ie_add(struct uwb_rc *uwb_rc,
+		  const struct uwb_ie_hdr *ies, size_t size)
+{
+	int result = 0;
+	void *ptr;
+	const struct uwb_ie_hdr *ie;
+
+	mutex_lock(&uwb_rc->ies_mutex);
+
+	ptr = (void *)ies;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+
+		result = uwb_rc_ie_add_one(uwb_rc, ie);
+		if (result < 0)
+			break;
+	}
+	if (result >= 0) {
+		if (size == 0) {
+			if (uwb_rc->beaconing != -1)
+				result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
+		} else
+			result = -EINVAL;
+	}
+
+	mutex_unlock(&uwb_rc->ies_mutex);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
+
+
+/*
+ * Remove an IE from internal cache
+ *
+ * We are dealing with our internal IE cache so no need to verify that the
+ * IEs are valid (it has been done already).
+ *
+ * Should be called with ies_mutex held
+ *
+ * We do not break out once an IE is found in the cache. It is currently
+ * possible to have more than one IE with the same ID included in the
+ * beacon. We don't reallocate, we just mark the size smaller.
+ */
+static
+void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
+{
+	struct uwb_ie_hdr *ie;
+	size_t len = le16_to_cpu(uwb_rc->ies->wIELength);
+	void *ptr;
+	size_t size;
+
+	ptr = uwb_rc->ies->IEData;
+	size = len;
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &size);
+		if (!ie)
+			break;
+		if (ie->element_id == to_remove) {
+			len -= sizeof(struct uwb_ie_hdr) + ie->length;
+			memmove(ie, ptr, size);
+			ptr = ie;
+		}
+	}
+	uwb_rc->ies->wIELength = cpu_to_le16(len);
+}
+
+
+/**
+ * uwb_rc_ie_rm - remove an IE from the radio controller's beacon
+ * @uwb_rc: the radio controller.
+ * @element_id: the element ID of the IE to remove.
+ *
+ * Only IEs previously added with uwb_rc_ie_add() may be removed.
+ *
+ * Returns 0 on success; or -ve the SET-IE command to the radio
+ * controller failed.
+ */
+int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id)
+{
+	int result = 0;
+
+	mutex_lock(&uwb_rc->ies_mutex);
+
+	uwb_rc_ie_cache_rm(uwb_rc, element_id);
+
+	if (uwb_rc->beaconing != -1)
+		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
+
+	mutex_unlock(&uwb_rc->ies_mutex);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_ie_rm);
diff --git a/drivers/staging/uwb/include/debug-cmd.h b/drivers/staging/uwb/include/debug-cmd.h
new file mode 100644
index 000000000000..f97db6c3bcc0
--- /dev/null
+++ b/drivers/staging/uwb/include/debug-cmd.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Ultra Wide Band
+ * Debug interface commands
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#ifndef __LINUX__UWB__DEBUG_CMD_H__
+#define __LINUX__UWB__DEBUG_CMD_H__
+
+#include <linux/types.h>
+
+/*
+ * Debug interface commands
+ *
+ * UWB_DBG_CMD_RSV_ESTABLISH: Establish a new unicast reservation.
+ *
+ * UWB_DBG_CMD_RSV_TERMINATE: Terminate the Nth reservation.
+ */
+
+enum uwb_dbg_cmd_type {
+	UWB_DBG_CMD_RSV_ESTABLISH = 1,
+	UWB_DBG_CMD_RSV_TERMINATE = 2,
+	UWB_DBG_CMD_IE_ADD = 3,
+	UWB_DBG_CMD_IE_RM = 4,
+	UWB_DBG_CMD_RADIO_START = 5,
+	UWB_DBG_CMD_RADIO_STOP = 6,
+};
+
+struct uwb_dbg_cmd_rsv_establish {
+	__u8  target[6];
+	__u8  type;
+	__u16 max_mas;
+	__u16 min_mas;
+	__u8  max_interval;
+};
+
+struct uwb_dbg_cmd_rsv_terminate {
+	int index;
+};
+
+struct uwb_dbg_cmd_ie {
+	__u8 data[128];
+	int len;
+};
+
+struct uwb_dbg_cmd {
+	__u32 type;
+	union {
+		struct uwb_dbg_cmd_rsv_establish rsv_establish;
+		struct uwb_dbg_cmd_rsv_terminate rsv_terminate;
+		struct uwb_dbg_cmd_ie ie_add;
+		struct uwb_dbg_cmd_ie ie_rm;
+	};
+};
+
+#endif /* #ifndef __LINUX__UWB__DEBUG_CMD_H__ */
diff --git a/drivers/staging/uwb/include/spec.h b/drivers/staging/uwb/include/spec.h
new file mode 100644
index 000000000000..5f75caf7b4ba
--- /dev/null
+++ b/drivers/staging/uwb/include/spec.h
@@ -0,0 +1,767 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Ultra Wide Band
+ * UWB Standard definitions
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * All these definitions are based on the ECMA-368 standard.
+ *
+ * Note all definitions are Little Endian in the wire, and we will
+ * convert them to host order before operating on the bitfields (that
+ * yes, we use extensively).
+ */
+
+#ifndef __LINUX__UWB_SPEC_H__
+#define __LINUX__UWB_SPEC_H__
+
+#include <linux/types.h>
+#include <linux/bitmap.h>
+#include <linux/if_ether.h>
+
+#define i1480_FW 0x00000303
+/* #define i1480_FW 0x00000302 */
+
+/**
+ * Number of Medium Access Slots in a superframe.
+ *
+ * UWB divides time in SuperFrames, each one divided in 256 pieces, or
+ * Medium Access Slots. See MBOA MAC[5.4.5] for details. The MAS is the
+ * basic bandwidth allocation unit in UWB.
+ */
+enum { UWB_NUM_MAS = 256 };
+
+/**
+ * Number of Zones in superframe.
+ *
+ * UWB divides the superframe into zones with numbering starting from BPST.
+ * See MBOA MAC[16.8.6]
+ */
+enum { UWB_NUM_ZONES = 16 };
+
+/*
+ * Number of MAS in a zone.
+ */
+#define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES)
+
+/*
+ * Number of MAS required before a row can be considered available.
+ */
+#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1)
+
+/*
+ * Number of streams per DRP reservation between a pair of devices.
+ *
+ * [ECMA-368] section 16.8.6.
+ */
+enum { UWB_NUM_STREAMS = 8 };
+
+/*
+ * mMasLength
+ *
+ * The length of a MAS in microseconds.
+ *
+ * [ECMA-368] section 17.16.
+ */
+enum { UWB_MAS_LENGTH_US = 256 };
+
+/*
+ * mBeaconSlotLength
+ *
+ * The length of the beacon slot in microseconds.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_BEACON_SLOT_LENGTH_US = 85 };
+
+/*
+ * mMaxLostBeacons
+ *
+ * The number beacons missing in consecutive superframes before a
+ * device can be considered as unreachable.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_MAX_LOST_BEACONS = 3 };
+
+/*
+ * mDRPBackOffWinMin
+ *
+ * The minimum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MIN = 2 };
+
+/*
+ * mDRPBackOffWinMax
+ *
+ * The maximum number of superframes to wait before trying to reserve
+ * extra MAS.
+ *
+ * [ECMA-368] section 17.16
+ */
+enum { UWB_DRP_BACKOFF_WIN_MAX = 16 };
+
+/*
+ * Length of a superframe in microseconds.
+ */
+#define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS)
+
+/**
+ * UWB MAC address
+ *
+ * It is *imperative* that this struct is exactly 6 packed bytes (as
+ * it is also used to define headers sent down and up the wire/radio).
+ */
+struct uwb_mac_addr {
+	u8 data[ETH_ALEN];
+} __attribute__((packed));
+
+
+/**
+ * UWB device address
+ *
+ * It is *imperative* that this struct is exactly 6 packed bytes (as
+ * it is also used to define headers sent down and up the wire/radio).
+ */
+struct uwb_dev_addr {
+	u8 data[2];
+} __attribute__((packed));
+
+
+/**
+ * Types of UWB addresses
+ *
+ * Order matters (by size).
+ */
+enum uwb_addr_type {
+	UWB_ADDR_DEV = 0,
+	UWB_ADDR_MAC = 1,
+};
+
+
+/** Size of a char buffer for printing a MAC/device address */
+enum { UWB_ADDR_STRSIZE = 32 };
+
+
+/** UWB WiMedia protocol IDs. */
+enum uwb_prid {
+	UWB_PRID_WLP_RESERVED   = 0x0000,
+	UWB_PRID_WLP		= 0x0001,
+	UWB_PRID_WUSB_BOT	= 0x0010,
+	UWB_PRID_WUSB		= 0x0010,
+	UWB_PRID_WUSB_TOP	= 0x001F,
+};
+
+
+/** PHY Rate (MBOA MAC[7.8.12, Table 61]) */
+enum uwb_phy_rate {
+	UWB_PHY_RATE_53 = 0,
+	UWB_PHY_RATE_80,
+	UWB_PHY_RATE_106,
+	UWB_PHY_RATE_160,
+	UWB_PHY_RATE_200,
+	UWB_PHY_RATE_320,
+	UWB_PHY_RATE_400,
+	UWB_PHY_RATE_480,
+	UWB_PHY_RATE_INVALID
+};
+
+
+/**
+ * Different ways to scan (MBOA MAC[6.2.2, Table 8], WUSB[Table 8-78])
+ */
+enum uwb_scan_type {
+	UWB_SCAN_ONLY = 0,
+	UWB_SCAN_OUTSIDE_BP,
+	UWB_SCAN_WHILE_INACTIVE,
+	UWB_SCAN_DISABLED,
+	UWB_SCAN_ONLY_STARTTIME,
+	UWB_SCAN_TOP
+};
+
+
+/** ACK Policy types (MBOA MAC[7.2.1.3]) */
+enum uwb_ack_pol {
+	UWB_ACK_NO = 0,
+	UWB_ACK_INM = 1,
+	UWB_ACK_B = 2,
+	UWB_ACK_B_REQ = 3,
+};
+
+
+/** DRP reservation types ([ECMA-368 table 106) */
+enum uwb_drp_type {
+	UWB_DRP_TYPE_ALIEN_BP = 0,
+	UWB_DRP_TYPE_HARD,
+	UWB_DRP_TYPE_SOFT,
+	UWB_DRP_TYPE_PRIVATE,
+	UWB_DRP_TYPE_PCA,
+};
+
+
+/** DRP Reason Codes ([ECMA-368] table 107) */
+enum uwb_drp_reason {
+	UWB_DRP_REASON_ACCEPTED = 0,
+	UWB_DRP_REASON_CONFLICT,
+	UWB_DRP_REASON_PENDING,
+	UWB_DRP_REASON_DENIED,
+	UWB_DRP_REASON_MODIFIED,
+};
+
+/** Relinquish Request Reason Codes ([ECMA-368] table 113) */
+enum uwb_relinquish_req_reason {
+	UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0,
+	UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION,
+};
+
+/**
+ *  DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9])
+ */
+enum uwb_drp_notif_reason {
+	UWB_DRP_NOTIF_DRP_IE_RCVD = 0,
+	UWB_DRP_NOTIF_CONFLICT,
+	UWB_DRP_NOTIF_TERMINATE,
+};
+
+
+/** Allocation of MAS slots in a DRP request MBOA MAC[7.8.7] */
+struct uwb_drp_alloc {
+	__le16 zone_bm;
+	__le16 mas_bm;
+} __attribute__((packed));
+
+
+/** General MAC Header format (ECMA-368[16.2]) */
+struct uwb_mac_frame_hdr {
+	__le16 Frame_Control;
+	struct uwb_dev_addr DestAddr;
+	struct uwb_dev_addr SrcAddr;
+	__le16 Sequence_Control;
+	__le16 Access_Information;
+} __attribute__((packed));
+
+
+/**
+ * uwb_beacon_frame - a beacon frame including MAC headers
+ *
+ * [ECMA] section 16.3.
+ */
+struct uwb_beacon_frame {
+	struct uwb_mac_frame_hdr hdr;
+	struct uwb_mac_addr Device_Identifier;	/* may be a NULL EUI-48 */
+	u8 Beacon_Slot_Number;
+	u8 Device_Control;
+	u8 IEData[];
+} __attribute__((packed));
+
+
+/** Information Element codes (MBOA MAC[T54]) */
+enum uwb_ie {
+	UWB_PCA_AVAILABILITY = 2,
+	UWB_IE_DRP_AVAILABILITY = 8,
+	UWB_IE_DRP = 9,
+	UWB_BP_SWITCH_IE = 11,
+	UWB_MAC_CAPABILITIES_IE = 12,
+	UWB_PHY_CAPABILITIES_IE = 13,
+	UWB_APP_SPEC_PROBE_IE = 15,
+	UWB_IDENTIFICATION_IE = 19,
+	UWB_MASTER_KEY_ID_IE = 20,
+	UWB_RELINQUISH_REQUEST_IE = 21,
+	UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */
+	UWB_APP_SPEC_IE = 255,
+};
+
+
+/**
+ * Header common to all Information Elements (IEs)
+ */
+struct uwb_ie_hdr {
+	u8 element_id;	/* enum uwb_ie */
+	u8 length;
+} __attribute__((packed));
+
+
+/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.6]) */
+struct uwb_ie_drp {
+	struct uwb_ie_hdr	hdr;
+	__le16                  drp_control;
+	struct uwb_dev_addr	dev_addr;
+	struct uwb_drp_alloc	allocs[];
+} __attribute__((packed));
+
+static inline int uwb_ie_drp_type(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 0) & 0x7;
+}
+
+static inline int uwb_ie_drp_stream_index(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 3) & 0x7;
+}
+
+static inline int uwb_ie_drp_reason_code(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 6) & 0x7;
+}
+
+static inline int uwb_ie_drp_status(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 9) & 0x1;
+}
+
+static inline int uwb_ie_drp_owner(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 10) & 0x1;
+}
+
+static inline int uwb_ie_drp_tiebreaker(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 11) & 0x1;
+}
+
+static inline int uwb_ie_drp_unsafe(struct uwb_ie_drp *ie)
+{
+	return (le16_to_cpu(ie->drp_control) >> 12) & 0x1;
+}
+
+static inline void uwb_ie_drp_set_type(struct uwb_ie_drp *ie, enum uwb_drp_type type)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x7 << 0)) | (type << 0);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_stream_index(struct uwb_ie_drp *ie, int stream_index)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x7 << 3)) | (stream_index << 3);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_reason_code(struct uwb_ie_drp *ie,
+				       enum uwb_drp_reason reason_code)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (ie->drp_control & ~(0x7 << 6)) | (reason_code << 6);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_status(struct uwb_ie_drp *ie, int status)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x1 << 9)) | (status << 9);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_owner(struct uwb_ie_drp *ie, int owner)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x1 << 10)) | (owner << 10);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_tiebreaker(struct uwb_ie_drp *ie, int tiebreaker)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x1 << 11)) | (tiebreaker << 11);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+static inline void uwb_ie_drp_set_unsafe(struct uwb_ie_drp *ie, int unsafe)
+{
+	u16 drp_control = le16_to_cpu(ie->drp_control);
+	drp_control = (drp_control & ~(0x1 << 12)) | (unsafe << 12);
+	ie->drp_control = cpu_to_le16(drp_control);
+}
+
+/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.7]) */
+struct uwb_ie_drp_avail {
+	struct uwb_ie_hdr	hdr;
+	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
+} __attribute__((packed));
+
+/* Relinqish Request IE ([ECMA-368] section 16.8.19). */
+struct uwb_relinquish_request_ie {
+        struct uwb_ie_hdr       hdr;
+        __le16                  relinquish_req_control;
+        struct uwb_dev_addr     dev_addr;
+        struct uwb_drp_alloc    allocs[];
+} __attribute__((packed));
+
+static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie)
+{
+	return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf;
+}
+
+static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie,
+							 int reason_code)
+{
+	u16 ctrl = le16_to_cpu(ie->relinquish_req_control);
+	ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0);
+	ie->relinquish_req_control = cpu_to_le16(ctrl);
+}
+
+/**
+ * The Vendor ID is set to an OUI that indicates the vendor of the device.
+ * ECMA-368 [16.8.10]
+ */
+struct uwb_vendor_id {
+	u8 data[3];
+} __attribute__((packed));
+
+/**
+ * The device type ID
+ * FIXME: clarify what this means
+ * ECMA-368 [16.8.10]
+ */
+struct uwb_device_type_id {
+	u8 data[3];
+} __attribute__((packed));
+
+
+/**
+ * UWB device information types
+ * ECMA-368 [16.8.10]
+ */
+enum uwb_dev_info_type {
+	UWB_DEV_INFO_VENDOR_ID = 0,
+	UWB_DEV_INFO_VENDOR_TYPE,
+	UWB_DEV_INFO_NAME,
+};
+
+/**
+ * UWB device information found in Identification IE
+ * ECMA-368 [16.8.10]
+ */
+struct uwb_dev_info {
+	u8 type;	/* enum uwb_dev_info_type */
+	u8 length;
+	u8 data[];
+} __attribute__((packed));
+
+/**
+ * UWB Identification IE
+ * ECMA-368 [16.8.10]
+ */
+struct uwb_identification_ie {
+	struct uwb_ie_hdr hdr;
+	struct uwb_dev_info info[];
+} __attribute__((packed));
+
+/*
+ * UWB Radio Controller
+ *
+ * These definitions are common to the Radio Control layers as
+ * exported by the WUSB1.0 HWA and WHCI interfaces.
+ */
+
+/** Radio Control Command Block (WUSB1.0[Table 8-65] and WHCI 0.95) */
+struct uwb_rccb {
+	u8 bCommandType;		/* enum hwa_cet */
+	__le16 wCommand;		/* Command code */
+	u8 bCommandContext;		/* Context ID */
+} __attribute__((packed));
+
+
+/** Radio Control Event Block (WUSB[table 8-66], WHCI 0.95) */
+struct uwb_rceb {
+	u8 bEventType;			/* enum hwa_cet */
+	__le16 wEvent;			/* Event code */
+	u8 bEventContext;		/* Context ID */
+} __attribute__((packed));
+
+
+enum {
+	UWB_RC_CET_GENERAL = 0,		/* General Command/Event type */
+	UWB_RC_CET_EX_TYPE_1 = 1,	/* Extended Type 1 Command/Event type */
+};
+
+/* Commands to the radio controller */
+enum uwb_rc_cmd {
+	UWB_RC_CMD_CHANNEL_CHANGE = 16,
+	UWB_RC_CMD_DEV_ADDR_MGMT = 17,	/* Device Address Management */
+	UWB_RC_CMD_GET_IE = 18,		/* GET Information Elements */
+	UWB_RC_CMD_RESET = 19,
+	UWB_RC_CMD_SCAN = 20,		/* Scan management  */
+	UWB_RC_CMD_SET_BEACON_FILTER = 21,
+	UWB_RC_CMD_SET_DRP_IE = 22,	/* Dynamic Reservation Protocol IEs */
+	UWB_RC_CMD_SET_IE = 23,		/* Information Element management */
+	UWB_RC_CMD_SET_NOTIFICATION_FILTER = 24,
+	UWB_RC_CMD_SET_TX_POWER = 25,
+	UWB_RC_CMD_SLEEP = 26,
+	UWB_RC_CMD_START_BEACON = 27,
+	UWB_RC_CMD_STOP_BEACON = 28,
+	UWB_RC_CMD_BP_MERGE = 29,
+	UWB_RC_CMD_SEND_COMMAND_FRAME = 30,
+	UWB_RC_CMD_SET_ASIE_NOTIF = 31,
+};
+
+/* Notifications from the radio controller */
+enum uwb_rc_evt {
+	UWB_RC_EVT_IE_RCV = 0,
+	UWB_RC_EVT_BEACON = 1,
+	UWB_RC_EVT_BEACON_SIZE = 2,
+	UWB_RC_EVT_BPOIE_CHANGE = 3,
+	UWB_RC_EVT_BP_SLOT_CHANGE = 4,
+	UWB_RC_EVT_BP_SWITCH_IE_RCV = 5,
+	UWB_RC_EVT_DEV_ADDR_CONFLICT = 6,
+	UWB_RC_EVT_DRP_AVAIL = 7,
+	UWB_RC_EVT_DRP = 8,
+	UWB_RC_EVT_BP_SWITCH_STATUS = 9,
+	UWB_RC_EVT_CMD_FRAME_RCV = 10,
+	UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV = 11,
+	/* Events (command responses) use the same code as the command */
+	UWB_RC_EVT_UNKNOWN_CMD_RCV = 65535,
+};
+
+enum uwb_rc_extended_type_1_cmd {
+	UWB_RC_SET_DAA_ENERGY_MASK = 32,
+	UWB_RC_SET_NOTIFICATION_FILTER_EX = 33,
+};
+
+enum uwb_rc_extended_type_1_evt {
+	UWB_RC_DAA_ENERGY_DETECTED = 0,
+};
+
+/* Radio Control Result Code. [WHCI] table 3-3. */
+enum {
+	UWB_RC_RES_SUCCESS = 0,
+	UWB_RC_RES_FAIL,
+	UWB_RC_RES_FAIL_HARDWARE,
+	UWB_RC_RES_FAIL_NO_SLOTS,
+	UWB_RC_RES_FAIL_BEACON_TOO_LARGE,
+	UWB_RC_RES_FAIL_INVALID_PARAMETER,
+	UWB_RC_RES_FAIL_UNSUPPORTED_PWR_LEVEL,
+	UWB_RC_RES_FAIL_INVALID_IE_DATA,
+	UWB_RC_RES_FAIL_BEACON_SIZE_EXCEEDED,
+	UWB_RC_RES_FAIL_CANCELLED,
+	UWB_RC_RES_FAIL_INVALID_STATE,
+	UWB_RC_RES_FAIL_INVALID_SIZE,
+	UWB_RC_RES_FAIL_ACK_NOT_RECEIVED,
+	UWB_RC_RES_FAIL_NO_MORE_ASIE_NOTIF,
+	UWB_RC_RES_FAIL_TIME_OUT = 255,
+};
+
+/* Confirm event. [WHCI] section 3.1.3.1 etc. */
+struct uwb_rc_evt_confirm {
+	struct uwb_rceb rceb;
+	u8 bResultCode;
+} __attribute__((packed));
+
+/* Device Address Management event. [WHCI] section 3.1.3.2. */
+struct uwb_rc_evt_dev_addr_mgmt {
+	struct uwb_rceb rceb;
+	u8 baAddr[ETH_ALEN];
+	u8 bResultCode;
+} __attribute__((packed));
+
+
+/* Get IE Event. [WHCI] section 3.1.3.3. */
+struct uwb_rc_evt_get_ie {
+	struct uwb_rceb rceb;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/* Set DRP IE Event. [WHCI] section 3.1.3.7. */
+struct uwb_rc_evt_set_drp_ie {
+	struct uwb_rceb rceb;
+	__le16 wRemainingSpace;
+	u8 bResultCode;
+} __attribute__((packed));
+
+/* Set IE Event. [WHCI] section 3.1.3.8. */
+struct uwb_rc_evt_set_ie {
+	struct uwb_rceb rceb;
+	__le16 RemainingSpace;
+	u8 bResultCode;
+} __attribute__((packed));
+
+/* Scan command. [WHCI] 3.1.3.5. */
+struct uwb_rc_cmd_scan {
+	struct uwb_rccb rccb;
+	u8 bChannelNumber;
+	u8 bScanState;
+	__le16 wStartTime;
+} __attribute__((packed));
+
+/* Set DRP IE command. [WHCI] section 3.1.3.7. */
+struct uwb_rc_cmd_set_drp_ie {
+	struct uwb_rccb rccb;
+	__le16 wIELength;
+	struct uwb_ie_drp IEData[];
+} __attribute__((packed));
+
+/* Set IE command. [WHCI] section 3.1.3.8. */
+struct uwb_rc_cmd_set_ie {
+	struct uwb_rccb rccb;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/* Set DAA Energy Mask event. [WHCI 0.96] section 3.1.3.17. */
+struct uwb_rc_evt_set_daa_energy_mask {
+	struct uwb_rceb rceb;
+	__le16 wLength;
+	u8 result;
+} __attribute__((packed));
+
+/* Set Notification Filter Extended event. [WHCI 0.96] section 3.1.3.18. */
+struct uwb_rc_evt_set_notification_filter_ex {
+	struct uwb_rceb rceb;
+	__le16 wLength;
+	u8 result;
+} __attribute__((packed));
+
+/* IE Received notification. [WHCI] section 3.1.4.1. */
+struct uwb_rc_evt_ie_rcv {
+	struct uwb_rceb rceb;
+	struct uwb_dev_addr SrcAddr;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/* Type of the received beacon. [WHCI] section 3.1.4.2. */
+enum uwb_rc_beacon_type {
+	UWB_RC_BEACON_TYPE_SCAN = 0,
+	UWB_RC_BEACON_TYPE_NEIGHBOR,
+	UWB_RC_BEACON_TYPE_OL_ALIEN,
+	UWB_RC_BEACON_TYPE_NOL_ALIEN,
+};
+
+/* Beacon received notification. [WHCI] 3.1.4.2. */
+struct uwb_rc_evt_beacon {
+	struct uwb_rceb rceb;
+	u8	bChannelNumber;
+	u8	bBeaconType;
+	__le16	wBPSTOffset;
+	u8	bLQI;
+	u8	bRSSI;
+	__le16	wBeaconInfoLength;
+	u8	BeaconInfo[];
+} __attribute__((packed));
+
+
+/* Beacon Size Change notification. [WHCI] section 3.1.4.3 */
+struct uwb_rc_evt_beacon_size {
+	struct uwb_rceb rceb;
+	__le16 wNewBeaconSize;
+} __attribute__((packed));
+
+
+/* BPOIE Change notification. [WHCI] section 3.1.4.4. */
+struct uwb_rc_evt_bpoie_change {
+	struct uwb_rceb rceb;
+	__le16 wBPOIELength;
+	u8 BPOIE[];
+} __attribute__((packed));
+
+
+/* Beacon Slot Change notification. [WHCI] section 3.1.4.5. */
+struct uwb_rc_evt_bp_slot_change {
+	struct uwb_rceb rceb;
+	u8 slot_info;
+} __attribute__((packed));
+
+static inline int uwb_rc_evt_bp_slot_change_slot_num(
+	const struct uwb_rc_evt_bp_slot_change *evt)
+{
+	return evt->slot_info & 0x7f;
+}
+
+static inline int uwb_rc_evt_bp_slot_change_no_slot(
+	const struct uwb_rc_evt_bp_slot_change *evt)
+{
+	return (evt->slot_info & 0x80) >> 7;
+}
+
+/* BP Switch IE Received notification. [WHCI] section 3.1.4.6. */
+struct uwb_rc_evt_bp_switch_ie_rcv {
+	struct uwb_rceb rceb;
+	struct uwb_dev_addr wSrcAddr;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/* DevAddr Conflict notification. [WHCI] section 3.1.4.7. */
+struct uwb_rc_evt_dev_addr_conflict {
+	struct uwb_rceb rceb;
+} __attribute__((packed));
+
+/* DRP notification. [WHCI] section 3.1.4.9. */
+struct uwb_rc_evt_drp {
+	struct uwb_rceb           rceb;
+	struct uwb_dev_addr       src_addr;
+	u8                        reason;
+	u8                        beacon_slot_number;
+	__le16                    ie_length;
+	u8                        ie_data[];
+} __attribute__((packed));
+
+static inline enum uwb_drp_notif_reason uwb_rc_evt_drp_reason(struct uwb_rc_evt_drp *evt)
+{
+	return evt->reason & 0x0f;
+}
+
+
+/* DRP Availability Change notification. [WHCI] section 3.1.4.8. */
+struct uwb_rc_evt_drp_avail {
+	struct uwb_rceb rceb;
+	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
+} __attribute__((packed));
+
+/* BP switch status notification. [WHCI] section 3.1.4.10. */
+struct uwb_rc_evt_bp_switch_status {
+	struct uwb_rceb rceb;
+	u8 status;
+	u8 slot_offset;
+	__le16 bpst_offset;
+	u8 move_countdown;
+} __attribute__((packed));
+
+/* Command Frame Received notification. [WHCI] section 3.1.4.11. */
+struct uwb_rc_evt_cmd_frame_rcv {
+	struct uwb_rceb rceb;
+	__le16 receive_time;
+	struct uwb_dev_addr wSrcAddr;
+	struct uwb_dev_addr wDstAddr;
+	__le16 control;
+	__le16 reserved;
+	__le16 dataLength;
+	u8 data[];
+} __attribute__((packed));
+
+/* Channel Change IE Received notification. [WHCI] section 3.1.4.12. */
+struct uwb_rc_evt_channel_change_ie_rcv {
+	struct uwb_rceb rceb;
+	struct uwb_dev_addr wSrcAddr;
+	__le16 wIELength;
+	u8 IEData[];
+} __attribute__((packed));
+
+/* DAA Energy Detected notification. [WHCI 0.96] section 3.1.4.14. */
+struct uwb_rc_evt_daa_energy_detected {
+	struct uwb_rceb rceb;
+	__le16 wLength;
+	u8 bandID;
+	u8 reserved;
+	u8 toneBmp[16];
+} __attribute__((packed));
+
+
+/**
+ * Radio Control Interface Class Descriptor
+ *
+ *  WUSB 1.0 [8.6.1.2]
+ */
+struct uwb_rc_control_intf_class_desc {
+	u8 bLength;
+	u8 bDescriptorType;
+	__le16 bcdRCIVersion;
+} __attribute__((packed));
+
+#endif /* #ifndef __LINUX__UWB_SPEC_H__ */
diff --git a/drivers/staging/uwb/include/umc.h b/drivers/staging/uwb/include/umc.h
new file mode 100644
index 000000000000..ddbceb39ad15
--- /dev/null
+++ b/drivers/staging/uwb/include/umc.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * UWB Multi-interface Controller support.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ *
+ * UMC (UWB Multi-interface Controller) capabilities (e.g., radio
+ * controller, host controller) are presented as devices on the "umc"
+ * bus.
+ *
+ * The radio controller is not strictly a UMC capability but it's
+ * useful to present it as such.
+ *
+ * References:
+ *
+ *   [WHCI] Wireless Host Controller Interface Specification for
+ *          Certified Wireless Universal Serial Bus, revision 0.95.
+ *
+ * How this works is kind of convoluted but simple. The whci.ko driver
+ * loads when WHCI devices are detected. These WHCI devices expose
+ * many devices in the same PCI function (they couldn't have reused
+ * functions, no), so for each PCI function that exposes these many
+ * devices, whci ceates a umc_dev [whci_probe() -> whci_add_cap()]
+ * with umc_device_create() and adds it to the bus with
+ * umc_device_register().
+ *
+ * umc_device_register() calls device_register() which will push the
+ * bus management code to load your UMC driver's somehting_probe()
+ * that you have registered for that capability code.
+ *
+ * Now when the WHCI device is removed, whci_remove() will go over
+ * each umc_dev assigned to each of the PCI function's capabilities
+ * and through whci_del_cap() call umc_device_unregister() each
+ * created umc_dev. Of course, if you are bound to the device, your
+ * driver's something_remove() will be called.
+ */
+
+#ifndef _LINUX_UWB_UMC_H_
+#define _LINUX_UWB_UMC_H_
+
+#include <linux/device.h>
+#include <linux/pci.h>
+
+/*
+ * UMC capability IDs.
+ *
+ * 0x00 is reserved so use it for the radio controller device.
+ *
+ * [WHCI] table 2-8
+ */
+#define UMC_CAP_ID_WHCI_RC      0x00 /* radio controller */
+#define UMC_CAP_ID_WHCI_WUSB_HC 0x01 /* WUSB host controller */
+
+/**
+ * struct umc_dev - UMC capability device
+ *
+ * @version:  version of the specification this capability conforms to.
+ * @cap_id:   capability ID.
+ * @bar:      PCI Bar (64 bit) where the resource lies
+ * @resource: register space resource.
+ * @irq:      interrupt line.
+ */
+struct umc_dev {
+	u16		version;
+	u8		cap_id;
+	u8		bar;
+	struct resource resource;
+	unsigned	irq;
+	struct device	dev;
+};
+
+#define to_umc_dev(d) container_of(d, struct umc_dev, dev)
+
+/**
+ * struct umc_driver - UMC capability driver
+ * @cap_id: supported capability ID.
+ * @match: driver specific capability matching function.
+ * @match_data: driver specific data for match() (e.g., a
+ * table of pci_device_id's if umc_match_pci_id() is used).
+ */
+struct umc_driver {
+	char *name;
+	u8 cap_id;
+	int (*match)(struct umc_driver *, struct umc_dev *);
+	const void *match_data;
+
+	int  (*probe)(struct umc_dev *);
+	void (*remove)(struct umc_dev *);
+	int  (*pre_reset)(struct umc_dev *);
+	int  (*post_reset)(struct umc_dev *);
+
+	struct device_driver driver;
+};
+
+#define to_umc_driver(d) container_of(d, struct umc_driver, driver)
+
+extern struct bus_type umc_bus_type;
+
+struct umc_dev *umc_device_create(struct device *parent, int n);
+int __must_check umc_device_register(struct umc_dev *umc);
+void umc_device_unregister(struct umc_dev *umc);
+
+int __must_check __umc_driver_register(struct umc_driver *umc_drv,
+				       struct module *mod,
+				       const char *mod_name);
+
+/**
+ * umc_driver_register - register a UMC capabiltity driver.
+ * @umc_drv:  pointer to the driver.
+ */
+#define umc_driver_register(umc_drv) \
+	__umc_driver_register(umc_drv, THIS_MODULE, KBUILD_MODNAME)
+
+void umc_driver_unregister(struct umc_driver *umc_drv);
+
+/*
+ * Utility function you can use to match (umc_driver->match) against a
+ * null-terminated array of 'struct pci_device_id' in
+ * umc_driver->match_data.
+ */
+int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc);
+
+/**
+ * umc_parent_pci_dev - return the UMC's parent PCI device or NULL if none
+ * @umc_dev: UMC device whose parent PCI device we are looking for
+ *
+ * DIRTY!!! DON'T RELY ON THIS
+ *
+ * FIXME: This is as dirty as it gets, but we need some way to check
+ * the correct type of umc_dev->parent (so that for example, we can
+ * cast to pci_dev). Casting to pci_dev is necessary because at some
+ * point we need to request resources from the device. Mapping is
+ * easily over come (ioremap and stuff are bus agnostic), but hooking
+ * up to some error handlers (such as pci error handlers) might need
+ * this.
+ *
+ * THIS might (probably will) be removed in the future, so don't count
+ * on it.
+ */
+static inline struct pci_dev *umc_parent_pci_dev(struct umc_dev *umc_dev)
+{
+	struct pci_dev *pci_dev = NULL;
+	if (dev_is_pci(umc_dev->dev.parent))
+		pci_dev = to_pci_dev(umc_dev->dev.parent);
+	return pci_dev;
+}
+
+/**
+ * umc_dev_get() - reference a UMC device.
+ * @umc_dev: Pointer to UMC device.
+ *
+ * NOTE: we are assuming in this whole scheme that the parent device
+ *       is referenced at _probe() time and unreferenced at _remove()
+ *       time by the parent's subsystem.
+ */
+static inline struct umc_dev *umc_dev_get(struct umc_dev *umc_dev)
+{
+	get_device(&umc_dev->dev);
+	return umc_dev;
+}
+
+/**
+ * umc_dev_put() - unreference a UMC device.
+ * @umc_dev: Pointer to UMC device.
+ */
+static inline void umc_dev_put(struct umc_dev *umc_dev)
+{
+	put_device(&umc_dev->dev);
+}
+
+/**
+ * umc_set_drvdata - set UMC device's driver data.
+ * @umc_dev: Pointer to UMC device.
+ * @data:    Data to set.
+ */
+static inline void umc_set_drvdata(struct umc_dev *umc_dev, void *data)
+{
+	dev_set_drvdata(&umc_dev->dev, data);
+}
+
+/**
+ * umc_get_drvdata - recover UMC device's driver data.
+ * @umc_dev: Pointer to UMC device.
+ */
+static inline void *umc_get_drvdata(struct umc_dev *umc_dev)
+{
+	return dev_get_drvdata(&umc_dev->dev);
+}
+
+int umc_controller_reset(struct umc_dev *umc);
+
+#endif /* #ifndef _LINUX_UWB_UMC_H_ */
diff --git a/drivers/staging/uwb/include/whci.h b/drivers/staging/uwb/include/whci.h
new file mode 100644
index 000000000000..1a5c2cc2b008
--- /dev/null
+++ b/drivers/staging/uwb/include/whci.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Wireless Host Controller Interface for Ultra-Wide-Band and Wireless USB
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * References:
+ *   [WHCI] Wireless Host Controller Interface Specification for
+ *          Certified Wireless Universal Serial Bus, revision 0.95.
+ */
+#ifndef _LINUX_UWB_WHCI_H_
+#define _LINUX_UWB_WHCI_H_
+
+#include <linux/pci.h>
+
+/*
+ * UWB interface capability registers (offsets from UWBBASE)
+ *
+ * [WHCI] section 2.2
+ */
+#define UWBCAPINFO	0x00 /* == UWBCAPDATA(0) */
+#  define UWBCAPINFO_TO_N_CAPS(c)	(((c) >> 0)  & 0xFull)
+#define UWBCAPDATA(n)	(8*(n))
+#  define UWBCAPDATA_TO_VERSION(c)	(((c) >> 32) & 0xFFFFull)
+#  define UWBCAPDATA_TO_OFFSET(c)	(((c) >> 18) & 0x3FFFull)
+#  define UWBCAPDATA_TO_BAR(c)		(((c) >> 16) & 0x3ull)
+#  define UWBCAPDATA_TO_SIZE(c)		((((c) >> 8) & 0xFFull) * sizeof(u32))
+#  define UWBCAPDATA_TO_CAP_ID(c)	(((c) >> 0)  & 0xFFull)
+
+/* Size of the WHCI capability data (including the RC capability) for
+   a device with n capabilities. */
+#define UWBCAPDATA_SIZE(n) (8 + 8*(n))
+
+
+/*
+ * URC registers (offsets from URCBASE)
+ *
+ * [WHCI] section 2.3
+ */
+#define URCCMD		0x00
+#  define URCCMD_RESET		(1 << 31)  /* UMC Hardware reset */
+#  define URCCMD_RS		(1 << 30)  /* Run/Stop */
+#  define URCCMD_EARV		(1 << 29)  /* Event Address Register Valid */
+#  define URCCMD_ACTIVE		(1 << 15)  /* Command is active */
+#  define URCCMD_IWR		(1 << 14)  /* Interrupt When Ready */
+#  define URCCMD_SIZE_MASK	0x00000fff /* Command size mask */
+#define URCSTS		0x04
+#  define URCSTS_EPS		(1 << 17)  /* Event Processing Status */
+#  define URCSTS_HALTED		(1 << 16)  /* RC halted */
+#  define URCSTS_HSE		(1 << 10)  /* Host System Error...fried */
+#  define URCSTS_ER		(1 <<  9)  /* Event Ready */
+#  define URCSTS_RCI		(1 <<  8)  /* Ready for Command Interrupt */
+#  define URCSTS_INT_MASK	0x00000700 /* URC interrupt sources */
+#  define URCSTS_ISI		0x000000ff /* Interrupt Source Identification */
+#define URCINTR		0x08
+#  define URCINTR_EN_ALL	0x000007ff /* Enable all interrupt sources */
+#define URCCMDADDR	0x10
+#define URCEVTADDR	0x18
+#  define URCEVTADDR_OFFSET_MASK 0xfff    /* Event pointer offset mask */
+
+
+/** Write 32 bit @value to little endian register at @addr */
+static inline
+void le_writel(u32 value, void __iomem *addr)
+{
+	iowrite32(value, addr);
+}
+
+
+/** Read from 32 bit little endian register at @addr */
+static inline
+u32 le_readl(void __iomem *addr)
+{
+	return ioread32(addr);
+}
+
+
+/** Write 64 bit @value to little endian register at @addr */
+static inline
+void le_writeq(u64 value, void __iomem *addr)
+{
+	iowrite32(value, addr);
+	iowrite32(value >> 32, addr + 4);
+}
+
+
+/** Read from 64 bit little endian register at @addr */
+static inline
+u64 le_readq(void __iomem *addr)
+{
+	u64 value;
+	value  = ioread32(addr);
+	value |= (u64)ioread32(addr + 4) << 32;
+	return value;
+}
+
+extern int whci_wait_for(struct device *dev, u32 __iomem *reg,
+			 u32 mask, u32 result,
+			 unsigned long max_ms,  const char *tag);
+
+#endif /* #ifndef _LINUX_UWB_WHCI_H_ */
diff --git a/drivers/staging/uwb/lc-dev.c b/drivers/staging/uwb/lc-dev.c
new file mode 100644
index 000000000000..3e5c07fd6b10
--- /dev/null
+++ b/drivers/staging/uwb/lc-dev.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Life cycle of devices
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/err.h>
+#include <linux/kdev_t.h>
+#include <linux/random.h>
+#include <linux/stat.h>
+#include "uwb-internal.h"
+
+/* We initialize addresses to 0xff (invalid, as it is bcast) */
+static inline void uwb_dev_addr_init(struct uwb_dev_addr *addr)
+{
+	memset(&addr->data, 0xff, sizeof(addr->data));
+}
+
+static inline void uwb_mac_addr_init(struct uwb_mac_addr *addr)
+{
+	memset(&addr->data, 0xff, sizeof(addr->data));
+}
+
+/*
+ * Add callback @new to be called when an event occurs in @rc.
+ */
+int uwb_notifs_register(struct uwb_rc *rc, struct uwb_notifs_handler *new)
+{
+	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
+		return -ERESTARTSYS;
+	list_add(&new->list_node, &rc->notifs_chain.list);
+	mutex_unlock(&rc->notifs_chain.mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uwb_notifs_register);
+
+/*
+ * Remove event handler (callback)
+ */
+int uwb_notifs_deregister(struct uwb_rc *rc, struct uwb_notifs_handler *entry)
+{
+	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
+		return -ERESTARTSYS;
+	list_del(&entry->list_node);
+	mutex_unlock(&rc->notifs_chain.mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uwb_notifs_deregister);
+
+/*
+ * Notify all event handlers of a given event on @rc
+ *
+ * We are called with a valid reference to the device, or NULL if the
+ * event is not for a particular event (e.g., a BG join event).
+ */
+void uwb_notify(struct uwb_rc *rc, struct uwb_dev *uwb_dev, enum uwb_notifs event)
+{
+	struct uwb_notifs_handler *handler;
+	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
+		return;
+	if (!list_empty(&rc->notifs_chain.list)) {
+		list_for_each_entry(handler, &rc->notifs_chain.list, list_node) {
+			handler->cb(handler->data, uwb_dev, event);
+		}
+	}
+	mutex_unlock(&rc->notifs_chain.mutex);
+}
+
+/*
+ * Release the backing device of a uwb_dev that has been dynamically allocated.
+ */
+static void uwb_dev_sys_release(struct device *dev)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+
+	uwb_bce_put(uwb_dev->bce);
+	memset(uwb_dev, 0x69, sizeof(*uwb_dev));
+	kfree(uwb_dev);
+}
+
+/*
+ * Initialize a UWB device instance
+ *
+ * Alloc, zero and call this function.
+ */
+void uwb_dev_init(struct uwb_dev *uwb_dev)
+{
+	mutex_init(&uwb_dev->mutex);
+	device_initialize(&uwb_dev->dev);
+	uwb_dev->dev.release = uwb_dev_sys_release;
+	uwb_dev_addr_init(&uwb_dev->dev_addr);
+	uwb_mac_addr_init(&uwb_dev->mac_addr);
+	bitmap_fill(uwb_dev->streams, UWB_NUM_GLOBAL_STREAMS);
+}
+
+static ssize_t uwb_dev_EUI_48_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	char addr[UWB_ADDR_STRSIZE];
+
+	uwb_mac_addr_print(addr, sizeof(addr), &uwb_dev->mac_addr);
+	return sprintf(buf, "%s\n", addr);
+}
+static DEVICE_ATTR(EUI_48, S_IRUGO, uwb_dev_EUI_48_show, NULL);
+
+static ssize_t uwb_dev_DevAddr_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	char addr[UWB_ADDR_STRSIZE];
+
+	uwb_dev_addr_print(addr, sizeof(addr), &uwb_dev->dev_addr);
+	return sprintf(buf, "%s\n", addr);
+}
+static DEVICE_ATTR(DevAddr, S_IRUGO, uwb_dev_DevAddr_show, NULL);
+
+/*
+ * Show the BPST of this device.
+ *
+ * Calculated from the receive time of the device's beacon and it's
+ * slot number.
+ */
+static ssize_t uwb_dev_BPST_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_beca_e *bce;
+	struct uwb_beacon_frame *bf;
+	u16 bpst;
+
+	bce = uwb_dev->bce;
+	mutex_lock(&bce->mutex);
+	bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
+	bpst = bce->be->wBPSTOffset
+		- (u16)(bf->Beacon_Slot_Number * UWB_BEACON_SLOT_LENGTH_US);
+	mutex_unlock(&bce->mutex);
+
+	return sprintf(buf, "%d\n", bpst);
+}
+static DEVICE_ATTR(BPST, S_IRUGO, uwb_dev_BPST_show, NULL);
+
+/*
+ * Show the IEs a device is beaconing
+ *
+ * We need to access the beacon cache, so we just lock it really
+ * quick, print the IEs and unlock.
+ *
+ * We have a reference on the cache entry, so that should be
+ * quite safe.
+ */
+static ssize_t uwb_dev_IEs_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+
+	return uwb_bce_print_IEs(uwb_dev, uwb_dev->bce, buf, PAGE_SIZE);
+}
+static DEVICE_ATTR(IEs, S_IRUGO | S_IWUSR, uwb_dev_IEs_show, NULL);
+
+static ssize_t uwb_dev_LQE_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_beca_e *bce = uwb_dev->bce;
+	size_t result;
+
+	mutex_lock(&bce->mutex);
+	result = stats_show(&uwb_dev->bce->lqe_stats, buf);
+	mutex_unlock(&bce->mutex);
+	return result;
+}
+
+static ssize_t uwb_dev_LQE_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_beca_e *bce = uwb_dev->bce;
+	ssize_t result;
+
+	mutex_lock(&bce->mutex);
+	result = stats_store(&uwb_dev->bce->lqe_stats, buf, size);
+	mutex_unlock(&bce->mutex);
+	return result;
+}
+static DEVICE_ATTR(LQE, S_IRUGO | S_IWUSR, uwb_dev_LQE_show, uwb_dev_LQE_store);
+
+static ssize_t uwb_dev_RSSI_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_beca_e *bce = uwb_dev->bce;
+	size_t result;
+
+	mutex_lock(&bce->mutex);
+	result = stats_show(&uwb_dev->bce->rssi_stats, buf);
+	mutex_unlock(&bce->mutex);
+	return result;
+}
+
+static ssize_t uwb_dev_RSSI_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_beca_e *bce = uwb_dev->bce;
+	ssize_t result;
+
+	mutex_lock(&bce->mutex);
+	result = stats_store(&uwb_dev->bce->rssi_stats, buf, size);
+	mutex_unlock(&bce->mutex);
+	return result;
+}
+static DEVICE_ATTR(RSSI, S_IRUGO | S_IWUSR, uwb_dev_RSSI_show, uwb_dev_RSSI_store);
+
+
+static struct attribute *uwb_dev_attrs[] = {
+	&dev_attr_EUI_48.attr,
+	&dev_attr_DevAddr.attr,
+	&dev_attr_BPST.attr,
+	&dev_attr_IEs.attr,
+	&dev_attr_LQE.attr,
+	&dev_attr_RSSI.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(uwb_dev);
+
+/* UWB bus type. */
+struct bus_type uwb_bus_type = {
+	.name =		"uwb",
+	.dev_groups =	uwb_dev_groups,
+};
+
+/**
+ * Device SYSFS registration
+ */
+static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev)
+{
+	struct device *dev;
+
+	dev = &uwb_dev->dev;
+	dev->parent = parent_dev;
+	dev_set_drvdata(dev, uwb_dev);
+
+	return device_add(dev);
+}
+
+
+static void __uwb_dev_sys_rm(struct uwb_dev *uwb_dev)
+{
+	dev_set_drvdata(&uwb_dev->dev, NULL);
+	device_del(&uwb_dev->dev);
+}
+
+
+/**
+ * Register and initialize a new UWB device
+ *
+ * Did you call uwb_dev_init() on it?
+ *
+ * @parent_rc: is the parent radio controller who has the link to the
+ *             device. When registering the UWB device that is a UWB
+ *             Radio Controller, we point back to it.
+ *
+ * If registering the device that is part of a radio, caller has set
+ * rc->uwb_dev->dev. Otherwise it is to be left NULL--a new one will
+ * be allocated.
+ */
+int uwb_dev_add(struct uwb_dev *uwb_dev, struct device *parent_dev,
+		struct uwb_rc *parent_rc)
+{
+	int result;
+	struct device *dev;
+
+	BUG_ON(uwb_dev == NULL);
+	BUG_ON(parent_dev == NULL);
+	BUG_ON(parent_rc == NULL);
+
+	mutex_lock(&uwb_dev->mutex);
+	dev = &uwb_dev->dev;
+	uwb_dev->rc = parent_rc;
+	result = __uwb_dev_sys_add(uwb_dev, parent_dev);
+	if (result < 0)
+		printk(KERN_ERR "UWB: unable to register dev %s with sysfs: %d\n",
+		       dev_name(dev), result);
+	mutex_unlock(&uwb_dev->mutex);
+	return result;
+}
+
+
+void uwb_dev_rm(struct uwb_dev *uwb_dev)
+{
+	mutex_lock(&uwb_dev->mutex);
+	__uwb_dev_sys_rm(uwb_dev);
+	mutex_unlock(&uwb_dev->mutex);
+}
+
+
+static
+int __uwb_dev_try_get(struct device *dev, void *__target_uwb_dev)
+{
+	struct uwb_dev *target_uwb_dev = __target_uwb_dev;
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	if (uwb_dev == target_uwb_dev) {
+		uwb_dev_get(uwb_dev);
+		return 1;
+	} else
+		return 0;
+}
+
+
+/**
+ * Given a UWB device descriptor, validate and refcount it
+ *
+ * @returns NULL if the device does not exist or is quiescing; the ptr to
+ *               it otherwise.
+ */
+struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev)
+{
+	if (uwb_dev_for_each(rc, __uwb_dev_try_get, uwb_dev))
+		return uwb_dev;
+	else
+		return NULL;
+}
+EXPORT_SYMBOL_GPL(uwb_dev_try_get);
+
+
+/**
+ * Remove a device from the system [grunt for other functions]
+ */
+int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc)
+{
+	struct device *dev = &uwb_dev->dev;
+	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
+
+	uwb_mac_addr_print(macbuf, sizeof(macbuf), &uwb_dev->mac_addr);
+	uwb_dev_addr_print(devbuf, sizeof(devbuf), &uwb_dev->dev_addr);
+	dev_info(dev, "uwb device (mac %s dev %s) disconnected from %s %s\n",
+		 macbuf, devbuf,
+		 uwb_dev->dev.bus->name,
+		 rc ? dev_name(&(rc->uwb_dev.dev)) : "");
+	uwb_dev_rm(uwb_dev);
+	list_del(&uwb_dev->bce->node);
+	uwb_bce_put(uwb_dev->bce);
+	uwb_dev_put(uwb_dev);	/* for the creation in _onair() */
+
+	return 0;
+}
+
+
+/**
+ * A device went off the air, clean up after it!
+ *
+ * This is called by the UWB Daemon (through the beacon purge function
+ * uwb_bcn_cache_purge) when it is detected that a device has been in
+ * radio silence for a while.
+ *
+ * If this device is actually a local radio controller we don't need
+ * to go through the offair process, as it is not registered as that.
+ *
+ * NOTE: uwb_bcn_cache.mutex is held!
+ */
+void uwbd_dev_offair(struct uwb_beca_e *bce)
+{
+	struct uwb_dev *uwb_dev;
+
+	uwb_dev = bce->uwb_dev;
+	if (uwb_dev) {
+		uwb_notify(uwb_dev->rc, uwb_dev, UWB_NOTIF_OFFAIR);
+		__uwb_dev_offair(uwb_dev, uwb_dev->rc);
+	}
+}
+
+
+/**
+ * A device went on the air, start it up!
+ *
+ * This is called by the UWB Daemon when it is detected that a device
+ * has popped up in the radio range of the radio controller.
+ *
+ * It will just create the freaking device, register the beacon and
+ * stuff and yatla, done.
+ *
+ *
+ * NOTE: uwb_beca.mutex is held, bce->mutex is held
+ */
+void uwbd_dev_onair(struct uwb_rc *rc, struct uwb_beca_e *bce)
+{
+	int result;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_dev *uwb_dev;
+	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
+
+	uwb_mac_addr_print(macbuf, sizeof(macbuf), bce->mac_addr);
+	uwb_dev_addr_print(devbuf, sizeof(devbuf), &bce->dev_addr);
+	uwb_dev = kzalloc(sizeof(struct uwb_dev), GFP_KERNEL);
+	if (uwb_dev == NULL) {
+		dev_err(dev, "new device %s: Cannot allocate memory\n",
+			macbuf);
+		return;
+	}
+	uwb_dev_init(uwb_dev);		/* This sets refcnt to one, we own it */
+	uwb_dev->dev.bus = &uwb_bus_type;
+	uwb_dev->mac_addr = *bce->mac_addr;
+	uwb_dev->dev_addr = bce->dev_addr;
+	dev_set_name(&uwb_dev->dev, "%s", macbuf);
+
+	/* plug the beacon cache */
+	bce->uwb_dev = uwb_dev;
+	uwb_dev->bce = bce;
+	uwb_bce_get(bce);		/* released in uwb_dev_sys_release() */
+
+	result = uwb_dev_add(uwb_dev, &rc->uwb_dev.dev, rc);
+	if (result < 0) {
+		dev_err(dev, "new device %s: cannot instantiate device\n",
+			macbuf);
+		goto error_dev_add;
+	}
+
+	dev_info(dev, "uwb device (mac %s dev %s) connected to %s %s\n",
+		 macbuf, devbuf, uwb_dev->dev.bus->name,
+		 dev_name(&(rc->uwb_dev.dev)));
+	uwb_notify(rc, uwb_dev, UWB_NOTIF_ONAIR);
+	return;
+
+error_dev_add:
+	bce->uwb_dev = NULL;
+	uwb_bce_put(bce);
+	kfree(uwb_dev);
+	return;
+}
+
+/**
+ * Iterate over the list of UWB devices, calling a @function on each
+ *
+ * See docs for bus_for_each()....
+ *
+ * @rc:       radio controller for the devices.
+ * @function: function to call.
+ * @priv:     data to pass to @function.
+ * @returns:  0 if no invocation of function() returned a value
+ *            different to zero. That value otherwise.
+ */
+int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f function, void *priv)
+{
+	return device_for_each_child(&rc->uwb_dev.dev, priv, function);
+}
+EXPORT_SYMBOL_GPL(uwb_dev_for_each);
diff --git a/drivers/staging/uwb/lc-rc.c b/drivers/staging/uwb/lc-rc.c
new file mode 100644
index 000000000000..ee31b221cdc2
--- /dev/null
+++ b/drivers/staging/uwb/lc-rc.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Life cycle of radio controllers
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ *
+ * A UWB radio controller is also a UWB device, so it embeds one...
+ *
+ * List of RCs comes from the 'struct class uwb_rc_class'.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/random.h>
+#include <linux/kdev_t.h>
+#include <linux/etherdevice.h>
+#include <linux/usb.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include "uwb-internal.h"
+
+static int uwb_rc_index_match(struct device *dev, const void *data)
+{
+	const int *index = data;
+	struct uwb_rc *rc = dev_get_drvdata(dev);
+
+	if (rc->index == *index)
+		return 1;
+	return 0;
+}
+
+static struct uwb_rc *uwb_rc_find_by_index(int index)
+{
+	struct device *dev;
+	struct uwb_rc *rc = NULL;
+
+	dev = class_find_device(&uwb_rc_class, NULL, &index, uwb_rc_index_match);
+	if (dev) {
+		rc = dev_get_drvdata(dev);
+		put_device(dev);
+	}
+
+	return rc;
+}
+
+static int uwb_rc_new_index(void)
+{
+	int index = 0;
+
+	for (;;) {
+		if (!uwb_rc_find_by_index(index))
+			return index;
+		if (++index < 0)
+			index = 0;
+	}
+}
+
+/**
+ * Release the backing device of a uwb_rc that has been dynamically allocated.
+ */
+static void uwb_rc_sys_release(struct device *dev)
+{
+	struct uwb_dev *uwb_dev = container_of(dev, struct uwb_dev, dev);
+	struct uwb_rc *rc = container_of(uwb_dev, struct uwb_rc, uwb_dev);
+
+	uwb_rc_ie_release(rc);
+	kfree(rc);
+}
+
+
+void uwb_rc_init(struct uwb_rc *rc)
+{
+	struct uwb_dev *uwb_dev = &rc->uwb_dev;
+
+	uwb_dev_init(uwb_dev);
+	rc->uwb_dev.dev.class = &uwb_rc_class;
+	rc->uwb_dev.dev.release = uwb_rc_sys_release;
+	uwb_rc_neh_create(rc);
+	rc->beaconing = -1;
+	rc->scan_type = UWB_SCAN_DISABLED;
+	INIT_LIST_HEAD(&rc->notifs_chain.list);
+	mutex_init(&rc->notifs_chain.mutex);
+	INIT_LIST_HEAD(&rc->uwb_beca.list);
+	mutex_init(&rc->uwb_beca.mutex);
+	uwb_drp_avail_init(rc);
+	uwb_rc_ie_init(rc);
+	uwb_rsv_init(rc);
+	uwb_rc_pal_init(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_init);
+
+
+struct uwb_rc *uwb_rc_alloc(void)
+{
+	struct uwb_rc *rc;
+	rc = kzalloc(sizeof(*rc), GFP_KERNEL);
+	if (rc == NULL)
+		return NULL;
+	uwb_rc_init(rc);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_alloc);
+
+/*
+ * Show the ASIE that is broadcast in the UWB beacon by this uwb_rc device.
+ */
+static ssize_t ASIE_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	struct uwb_ie_hdr *ie;
+	void *ptr;
+	size_t len;
+	int result = 0;
+
+	/* init empty buffer. */
+	result = scnprintf(buf, PAGE_SIZE, "\n");
+	mutex_lock(&rc->ies_mutex);
+	/* walk IEData looking for an ASIE. */
+	ptr = rc->ies->IEData;
+	len = le16_to_cpu(rc->ies->wIELength);
+	for (;;) {
+		ie = uwb_ie_next(&ptr, &len);
+		if (!ie)
+			break;
+		if (ie->element_id == UWB_APP_SPEC_IE) {
+			result = uwb_ie_dump_hex(ie,
+					ie->length + sizeof(struct uwb_ie_hdr),
+					buf, PAGE_SIZE);
+			break;
+		}
+	}
+	mutex_unlock(&rc->ies_mutex);
+
+	return result;
+}
+
+/*
+ * Update the ASIE that is broadcast in the UWB beacon by this uwb_rc device.
+ */
+static ssize_t ASIE_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	char ie_buf[255];
+	int result, ie_len = 0;
+	const char *cur_ptr = buf;
+	struct uwb_ie_hdr *ie;
+
+	/* empty string means clear the ASIE. */
+	if (strlen(buf) <= 1) {
+		uwb_rc_ie_rm(rc, UWB_APP_SPEC_IE);
+		return size;
+	}
+
+	/* if non-empty string, convert string of hex chars to binary. */
+	while (ie_len < sizeof(ie_buf)) {
+		int char_count;
+
+		if (sscanf(cur_ptr, " %02hhX %n",
+				&(ie_buf[ie_len]), &char_count) > 0) {
+			++ie_len;
+			/* skip chars read from cur_ptr. */
+			cur_ptr += char_count;
+		} else {
+			break;
+		}
+	}
+
+	/* validate IE length and type. */
+	if (ie_len < sizeof(struct uwb_ie_hdr)) {
+		dev_err(dev, "%s: Invalid ASIE size %d.\n", __func__, ie_len);
+		return -EINVAL;
+	}
+
+	ie = (struct uwb_ie_hdr *)ie_buf;
+	if (ie->element_id != UWB_APP_SPEC_IE) {
+		dev_err(dev, "%s: Invalid IE element type size = 0x%02X.\n",
+				__func__, ie->element_id);
+		return -EINVAL;
+	}
+
+	/* bounds check length field from user. */
+	if (ie->length > (ie_len - sizeof(struct uwb_ie_hdr)))
+		ie->length = ie_len - sizeof(struct uwb_ie_hdr);
+
+	/*
+	 * Valid ASIE received. Remove current ASIE then add the new one using
+	 * uwb_rc_ie_add.
+	 */
+	uwb_rc_ie_rm(rc, UWB_APP_SPEC_IE);
+
+	result = uwb_rc_ie_add(rc, ie, ie->length + sizeof(struct uwb_ie_hdr));
+
+	return result >= 0 ? size : result;
+}
+static DEVICE_ATTR_RW(ASIE);
+
+static struct attribute *rc_attrs[] = {
+		&dev_attr_mac_address.attr,
+		&dev_attr_scan.attr,
+		&dev_attr_beacon.attr,
+		&dev_attr_ASIE.attr,
+		NULL,
+};
+
+static const struct attribute_group rc_attr_group = {
+	.attrs = rc_attrs,
+};
+
+/*
+ * Registration of sysfs specific stuff
+ */
+static int uwb_rc_sys_add(struct uwb_rc *rc)
+{
+	return sysfs_create_group(&rc->uwb_dev.dev.kobj, &rc_attr_group);
+}
+
+
+static void __uwb_rc_sys_rm(struct uwb_rc *rc)
+{
+	sysfs_remove_group(&rc->uwb_dev.dev.kobj, &rc_attr_group);
+}
+
+/**
+ * uwb_rc_mac_addr_setup - get an RC's EUI-48 address or set it
+ * @rc:  the radio controller.
+ *
+ * If the EUI-48 address is 00:00:00:00:00:00 or FF:FF:FF:FF:FF:FF
+ * then a random locally administered EUI-48 is generated and set on
+ * the device.  The probability of address collisions is sufficiently
+ * unlikely (1/2^40 = 9.1e-13) that they're not checked for.
+ */
+static
+int uwb_rc_mac_addr_setup(struct uwb_rc *rc)
+{
+	int result;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_dev *uwb_dev = &rc->uwb_dev;
+	char devname[UWB_ADDR_STRSIZE];
+	struct uwb_mac_addr addr;
+
+	result = uwb_rc_mac_addr_get(rc, &addr);
+	if (result < 0) {
+		dev_err(dev, "cannot retrieve UWB EUI-48 address: %d\n", result);
+		return result;
+	}
+
+	if (uwb_mac_addr_unset(&addr) || uwb_mac_addr_bcast(&addr)) {
+		addr.data[0] = 0x02; /* locally administered and unicast */
+		get_random_bytes(&addr.data[1], sizeof(addr.data)-1);
+
+		result = uwb_rc_mac_addr_set(rc, &addr);
+		if (result < 0) {
+			uwb_mac_addr_print(devname, sizeof(devname), &addr);
+			dev_err(dev, "cannot set EUI-48 address %s: %d\n",
+				devname, result);
+			return result;
+		}
+	}
+	uwb_dev->mac_addr = addr;
+	return 0;
+}
+
+
+
+static int uwb_rc_setup(struct uwb_rc *rc)
+{
+	int result;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	result = uwb_radio_setup(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot setup UWB radio: %d\n", result);
+		goto error;
+	}
+	result = uwb_rc_mac_addr_setup(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot setup UWB MAC address: %d\n", result);
+		goto error;
+	}
+	result = uwb_rc_dev_addr_assign(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot assign UWB DevAddr: %d\n", result);
+		goto error;
+	}
+	result = uwb_rc_ie_setup(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot setup IE subsystem: %d\n", result);
+		goto error_ie_setup;
+	}
+	result = uwb_rsv_setup(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot setup reservation subsystem: %d\n", result);
+		goto error_rsv_setup;
+	}
+	uwb_dbg_add_rc(rc);
+	return 0;
+
+error_rsv_setup:
+	uwb_rc_ie_release(rc);
+error_ie_setup:
+error:
+	return result;
+}
+
+
+/**
+ * Register a new UWB radio controller
+ *
+ * Did you call uwb_rc_init() on your rc?
+ *
+ * We assume that this is being called with a > 0 refcount on
+ * it [through ops->{get|put}_device(). We'll take our own, though.
+ *
+ * @parent_dev is our real device, the one that provides the actual UWB device
+ */
+int uwb_rc_add(struct uwb_rc *rc, struct device *parent_dev, void *priv)
+{
+	int result;
+	struct device *dev;
+	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
+
+	rc->index = uwb_rc_new_index();
+
+	dev = &rc->uwb_dev.dev;
+	dev_set_name(dev, "uwb%d", rc->index);
+
+	rc->priv = priv;
+
+	init_waitqueue_head(&rc->uwbd.wq);
+	INIT_LIST_HEAD(&rc->uwbd.event_list);
+	spin_lock_init(&rc->uwbd.event_list_lock);
+
+	uwbd_start(rc);
+
+	result = rc->start(rc);
+	if (result < 0)
+		goto error_rc_start;
+
+	result = uwb_rc_setup(rc);
+	if (result < 0) {
+		dev_err(dev, "cannot setup UWB radio controller: %d\n", result);
+		goto error_rc_setup;
+	}
+
+	result = uwb_dev_add(&rc->uwb_dev, parent_dev, rc);
+	if (result < 0 && result != -EADDRNOTAVAIL)
+		goto error_dev_add;
+
+	result = uwb_rc_sys_add(rc);
+	if (result < 0) {
+		dev_err(parent_dev, "cannot register UWB radio controller "
+			"dev attributes: %d\n", result);
+		goto error_sys_add;
+	}
+
+	uwb_mac_addr_print(macbuf, sizeof(macbuf), &rc->uwb_dev.mac_addr);
+	uwb_dev_addr_print(devbuf, sizeof(devbuf), &rc->uwb_dev.dev_addr);
+	dev_info(dev,
+		 "new uwb radio controller (mac %s dev %s) on %s %s\n",
+		 macbuf, devbuf, parent_dev->bus->name, dev_name(parent_dev));
+	rc->ready = 1;
+	return 0;
+
+error_sys_add:
+	uwb_dev_rm(&rc->uwb_dev);
+error_dev_add:
+error_rc_setup:
+	rc->stop(rc);
+error_rc_start:
+	uwbd_stop(rc);
+	return result;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_add);
+
+
+static int uwb_dev_offair_helper(struct device *dev, void *priv)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+
+	return __uwb_dev_offair(uwb_dev, uwb_dev->rc);
+}
+
+/*
+ * Remove a Radio Controller; stop beaconing/scanning, disconnect all children
+ */
+void uwb_rc_rm(struct uwb_rc *rc)
+{
+	rc->ready = 0;
+
+	uwb_dbg_del_rc(rc);
+	uwb_rsv_remove_all(rc);
+	uwb_radio_shutdown(rc);
+
+	rc->stop(rc);
+
+	uwbd_stop(rc);
+	uwb_rc_neh_destroy(rc);
+
+	uwb_dev_lock(&rc->uwb_dev);
+	rc->priv = NULL;
+	rc->cmd = NULL;
+	uwb_dev_unlock(&rc->uwb_dev);
+	mutex_lock(&rc->uwb_beca.mutex);
+	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
+	__uwb_rc_sys_rm(rc);
+	mutex_unlock(&rc->uwb_beca.mutex);
+	uwb_rsv_cleanup(rc);
+ 	uwb_beca_release(rc);
+	uwb_dev_rm(&rc->uwb_dev);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_rm);
+
+static int find_rc_try_get(struct device *dev, const void *data)
+{
+	const struct uwb_rc *target_rc = data;
+	struct uwb_rc *rc = dev_get_drvdata(dev);
+
+	if (rc == NULL) {
+		WARN_ON(1);
+		return 0;
+	}
+	if (rc == target_rc) {
+		if (rc->ready == 0)
+			return 0;
+		else
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * Given a radio controller descriptor, validate and refcount it
+ *
+ * @returns NULL if the rc does not exist or is quiescing; the ptr to
+ *               it otherwise.
+ */
+struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *target_rc)
+{
+	struct device *dev;
+	struct uwb_rc *rc = NULL;
+
+	dev = class_find_device(&uwb_rc_class, NULL, target_rc,
+				find_rc_try_get);
+	if (dev) {
+		rc = dev_get_drvdata(dev);
+		__uwb_rc_get(rc);
+		put_device(dev);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(__uwb_rc_try_get);
+
+/*
+ * RC get for external refcount acquirers...
+ *
+ * Increments the refcount of the device and it's backend modules
+ */
+static inline struct uwb_rc *uwb_rc_get(struct uwb_rc *rc)
+{
+	if (rc->ready == 0)
+		return NULL;
+	uwb_dev_get(&rc->uwb_dev);
+	return rc;
+}
+
+static int find_rc_grandpa(struct device *dev, const void *data)
+{
+	const struct device *grandpa_dev = data;
+	struct uwb_rc *rc = dev_get_drvdata(dev);
+
+	if (rc->uwb_dev.dev.parent->parent == grandpa_dev) {
+		rc = uwb_rc_get(rc);
+		return 1;
+	}
+	return 0;
+}
+
+/**
+ * Locate and refcount a radio controller given a common grand-parent
+ *
+ * @grandpa_dev  Pointer to the 'grandparent' device structure.
+ * @returns NULL If the rc does not exist or is quiescing; the ptr to
+ *               it otherwise, properly referenced.
+ *
+ * The Radio Control interface (or the UWB Radio Controller) is always
+ * an interface of a device. The parent is the interface, the
+ * grandparent is the device that encapsulates the interface.
+ *
+ * There is no need to lock around as the "grandpa" would be
+ * refcounted by the target, and to remove the referemes, the
+ * uwb_rc_class->sem would have to be taken--we hold it, ergo we
+ * should be safe.
+ */
+struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *grandpa_dev)
+{
+	struct device *dev;
+	struct uwb_rc *rc = NULL;
+
+	dev = class_find_device(&uwb_rc_class, NULL, grandpa_dev,
+				find_rc_grandpa);
+	if (dev) {
+		rc = dev_get_drvdata(dev);
+		put_device(dev);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_get_by_grandpa);
+
+/**
+ * Find a radio controller by device address
+ *
+ * @returns the pointer to the radio controller, properly referenced
+ */
+static int find_rc_dev(struct device *dev, const void *data)
+{
+	const struct uwb_dev_addr *addr = data;
+	struct uwb_rc *rc = dev_get_drvdata(dev);
+
+	if (rc == NULL) {
+		WARN_ON(1);
+		return 0;
+	}
+	if (!uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, addr)) {
+		rc = uwb_rc_get(rc);
+		return 1;
+	}
+	return 0;
+}
+
+struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *addr)
+{
+	struct device *dev;
+	struct uwb_rc *rc = NULL;
+
+	dev = class_find_device(&uwb_rc_class, NULL, addr, find_rc_dev);
+	if (dev) {
+		rc = dev_get_drvdata(dev);
+		put_device(dev);
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_get_by_dev);
+
+/**
+ * Drop a reference on a radio controller
+ *
+ * This is the version that should be done by entities external to the
+ * UWB Radio Control stack (ie: clients of the API).
+ */
+void uwb_rc_put(struct uwb_rc *rc)
+{
+	__uwb_rc_put(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_put);
diff --git a/drivers/staging/uwb/neh.c b/drivers/staging/uwb/neh.c
new file mode 100644
index 000000000000..1695584be412
--- /dev/null
+++ b/drivers/staging/uwb/neh.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * WUSB Wire Adapter: Radio Control Interface (WUSB[8])
+ * Notification and Event Handling
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * The RC interface of the Host Wire Adapter (USB dongle) or WHCI PCI
+ * card delivers a stream of notifications and events to the
+ * notification end event endpoint or area. This code takes care of
+ * getting a buffer with that data, breaking it up in separate
+ * notifications and events and then deliver those.
+ *
+ * Events are answers to commands and they carry a context ID that
+ * associates them to the command. Notifications are that,
+ * notifications, they come out of the blue and have a context ID of
+ * zero. Think of the context ID kind of like a handler. The
+ * uwb_rc_neh_* code deals with managing context IDs.
+ *
+ * This is why you require a handle to operate on a UWB host. When you
+ * open a handle a context ID is assigned to you.
+ *
+ * So, as it is done is:
+ *
+ * 1. Add an event handler [uwb_rc_neh_add()] (assigns a ctx id)
+ * 2. Issue command [rc->cmd(rc, ...)]
+ * 3. Arm the timeout timer [uwb_rc_neh_arm()]
+ * 4, Release the reference to the neh [uwb_rc_neh_put()]
+ * 5. Wait for the callback
+ * 6. Command result (RCEB) is passed to the callback
+ *
+ * If (2) fails, you should remove the handle [uwb_rc_neh_rm()]
+ * instead of arming the timer.
+ *
+ * Handles are for using in *serialized* code, single thread.
+ *
+ * When the notification/event comes, the IRQ handler/endpoint
+ * callback passes the data read to uwb_rc_neh_grok() which will break
+ * it up in a discrete series of events, look up who is listening for
+ * them and execute the pertinent callbacks.
+ *
+ * If the reader detects an error while reading the data stream, call
+ * uwb_rc_neh_error().
+ *
+ * CONSTRAINTS/ASSUMPTIONS:
+ *
+ * - Most notifications/events are small (less thank .5k), copying
+ *   around is ok.
+ *
+ * - Notifications/events are ALWAYS smaller than PAGE_SIZE
+ *
+ * - Notifications/events always come in a single piece (ie: a buffer
+ *   will always contain entire notifications/events).
+ *
+ * - we cannot know in advance how long each event is (because they
+ *   lack a length field in their header--smart move by the standards
+ *   body, btw). So we need a facility to get the event size given the
+ *   header. This is what the EST code does (notif/Event Size
+ *   Tables), check nest.c--as well, you can associate the size to
+ *   the handle [w/ neh->extra_size()].
+ *
+ * - Most notifications/events are fixed size; only a few are variable
+ *   size (NEST takes care of that).
+ *
+ * - Listeners of events expect them, so they usually provide a
+ *   buffer, as they know the size. Listeners to notifications don't,
+ *   so we allocate their buffers dynamically.
+ */
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/export.h>
+
+#include "uwb-internal.h"
+
+/*
+ * UWB Radio Controller Notification/Event Handle
+ *
+ * Represents an entity waiting for an event coming from the UWB Radio
+ * Controller with a given context id (context) and type (evt_type and
+ * evt). On reception of the notification/event, the callback (cb) is
+ * called with the event.
+ *
+ * If the timer expires before the event is received, the callback is
+ * called with -ETIMEDOUT as the event size.
+ */
+struct uwb_rc_neh {
+	struct kref kref;
+
+	struct uwb_rc *rc;
+	u8 evt_type;
+	__le16 evt;
+	u8 context;
+	u8 completed;
+	uwb_rc_cmd_cb_f cb;
+	void *arg;
+
+	struct timer_list timer;
+	struct list_head list_node;
+};
+
+static void uwb_rc_neh_timer(struct timer_list *t);
+
+static void uwb_rc_neh_release(struct kref *kref)
+{
+	struct uwb_rc_neh *neh = container_of(kref, struct uwb_rc_neh, kref);
+
+	kfree(neh);
+}
+
+static void uwb_rc_neh_get(struct uwb_rc_neh *neh)
+{
+	kref_get(&neh->kref);
+}
+
+/**
+ * uwb_rc_neh_put - release reference to a neh
+ * @neh: the neh
+ */
+void uwb_rc_neh_put(struct uwb_rc_neh *neh)
+{
+	kref_put(&neh->kref, uwb_rc_neh_release);
+}
+
+
+/**
+ * Assigns @neh a context id from @rc's pool
+ *
+ * @rc:	    UWB Radio Controller descriptor; @rc->neh_lock taken
+ * @neh:    Notification/Event Handle
+ * @returns 0 if context id was assigned ok; < 0 errno on error (if
+ *	    all the context IDs are taken).
+ *
+ * (assumes @wa is locked).
+ *
+ * NOTE: WUSB spec reserves context ids 0x00 for notifications and
+ *	 0xff is invalid, so they must not be used. Initialization
+ *	 fills up those two in the bitmap so they are not allocated.
+ *
+ * We spread the allocation around to reduce the possibility of two
+ * consecutive opened @neh's getting the same context ID assigned (to
+ * avoid surprises with late events that timed out long time ago). So
+ * first we search from where @rc->ctx_roll is, if not found, we
+ * search from zero.
+ */
+static
+int __uwb_rc_ctx_get(struct uwb_rc *rc, struct uwb_rc_neh *neh)
+{
+	int result;
+	result = find_next_zero_bit(rc->ctx_bm, UWB_RC_CTX_MAX,
+				    rc->ctx_roll++);
+	if (result < UWB_RC_CTX_MAX)
+		goto found;
+	result = find_first_zero_bit(rc->ctx_bm, UWB_RC_CTX_MAX);
+	if (result < UWB_RC_CTX_MAX)
+		goto found;
+	return -ENFILE;
+found:
+	set_bit(result, rc->ctx_bm);
+	neh->context = result;
+	return 0;
+}
+
+
+/** Releases @neh's context ID back to @rc (@rc->neh_lock is locked). */
+static
+void __uwb_rc_ctx_put(struct uwb_rc *rc, struct uwb_rc_neh *neh)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	if (neh->context == 0)
+		return;
+	if (test_bit(neh->context, rc->ctx_bm) == 0) {
+		dev_err(dev, "context %u not set in bitmap\n",
+			neh->context);
+		WARN_ON(1);
+	}
+	clear_bit(neh->context, rc->ctx_bm);
+	neh->context = 0;
+}
+
+/**
+ * uwb_rc_neh_add - add a neh for a radio controller command
+ * @rc:             the radio controller
+ * @cmd:            the radio controller command
+ * @expected_type:  the type of the expected response event
+ * @expected_event: the expected event ID
+ * @cb:             callback for when the event is received
+ * @arg:            argument for the callback
+ *
+ * Creates a neh and adds it to the list of those waiting for an
+ * event.  A context ID will be assigned to the command.
+ */
+struct uwb_rc_neh *uwb_rc_neh_add(struct uwb_rc *rc, struct uwb_rccb *cmd,
+				  u8 expected_type, u16 expected_event,
+				  uwb_rc_cmd_cb_f cb, void *arg)
+{
+	int result;
+	unsigned long flags;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_neh *neh;
+
+	neh = kzalloc(sizeof(*neh), GFP_KERNEL);
+	if (neh == NULL) {
+		result = -ENOMEM;
+		goto error_kzalloc;
+	}
+
+	kref_init(&neh->kref);
+	INIT_LIST_HEAD(&neh->list_node);
+	timer_setup(&neh->timer, uwb_rc_neh_timer, 0);
+
+	neh->rc = rc;
+	neh->evt_type = expected_type;
+	neh->evt = cpu_to_le16(expected_event);
+	neh->cb = cb;
+	neh->arg = arg;
+
+	spin_lock_irqsave(&rc->neh_lock, flags);
+	result = __uwb_rc_ctx_get(rc, neh);
+	if (result >= 0) {
+		cmd->bCommandContext = neh->context;
+		list_add_tail(&neh->list_node, &rc->neh_list);
+		uwb_rc_neh_get(neh);
+	}
+	spin_unlock_irqrestore(&rc->neh_lock, flags);
+	if (result < 0)
+		goto error_ctx_get;
+
+	return neh;
+
+error_ctx_get:
+	kfree(neh);
+error_kzalloc:
+	dev_err(dev, "cannot open handle to radio controller: %d\n", result);
+	return ERR_PTR(result);
+}
+
+static void __uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
+{
+	__uwb_rc_ctx_put(rc, neh);
+	list_del(&neh->list_node);
+}
+
+/**
+ * uwb_rc_neh_rm - remove a neh.
+ * @rc:  the radio controller
+ * @neh: the neh to remove
+ *
+ * Remove an active neh immediately instead of waiting for the event
+ * (or a time out).
+ */
+void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc->neh_lock, flags);
+	__uwb_rc_neh_rm(rc, neh);
+	spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+	del_timer_sync(&neh->timer);
+	uwb_rc_neh_put(neh);
+}
+
+/**
+ * uwb_rc_neh_arm - arm an event handler timeout timer
+ *
+ * @rc:     UWB Radio Controller
+ * @neh:    Notification/event handler for @rc
+ *
+ * The timer is only armed if the neh is active.
+ */
+void uwb_rc_neh_arm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc->neh_lock, flags);
+	if (neh->context)
+		mod_timer(&neh->timer,
+			  jiffies + msecs_to_jiffies(UWB_RC_CMD_TIMEOUT_MS));
+	spin_unlock_irqrestore(&rc->neh_lock, flags);
+}
+
+static void uwb_rc_neh_cb(struct uwb_rc_neh *neh, struct uwb_rceb *rceb, size_t size)
+{
+	(*neh->cb)(neh->rc, neh->arg, rceb, size);
+	uwb_rc_neh_put(neh);
+}
+
+static bool uwb_rc_neh_match(struct uwb_rc_neh *neh, const struct uwb_rceb *rceb)
+{
+	return neh->evt_type == rceb->bEventType
+		&& neh->evt == rceb->wEvent
+		&& neh->context == rceb->bEventContext;
+}
+
+/**
+ * Find the handle waiting for a RC Radio Control Event
+ *
+ * @rc:         UWB Radio Controller
+ * @rceb:       Pointer to the RCEB buffer
+ * @event_size: Pointer to the size of the RCEB buffer. Might be
+ *              adjusted to take into account the @neh->extra_size
+ *              settings.
+ *
+ * If the listener has no buffer (NULL buffer), one is allocated for
+ * the right size (the amount of data received). @neh->ptr will point
+ * to the event payload, which always starts with a 'struct
+ * uwb_rceb'. kfree() it when done.
+ */
+static
+struct uwb_rc_neh *uwb_rc_neh_lookup(struct uwb_rc *rc,
+				     const struct uwb_rceb *rceb)
+{
+	struct uwb_rc_neh *neh = NULL, *h;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc->neh_lock, flags);
+
+	list_for_each_entry(h, &rc->neh_list, list_node) {
+		if (uwb_rc_neh_match(h, rceb)) {
+			neh = h;
+			break;
+		}
+	}
+
+	if (neh)
+		__uwb_rc_neh_rm(rc, neh);
+
+	spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+	return neh;
+}
+
+
+/*
+ * Process notifications coming from the radio control interface
+ *
+ * @rc:    UWB Radio Control Interface descriptor
+ * @neh:   Notification/Event Handler @neh->ptr points to
+ *         @uwb_evt->buffer.
+ *
+ * This function is called by the event/notif handling subsystem when
+ * notifications arrive (hwarc_probe() arms a notification/event handle
+ * that calls back this function for every received notification; this
+ * function then will rearm itself).
+ *
+ * Notification data buffers are dynamically allocated by the NEH
+ * handling code in neh.c [uwb_rc_neh_lookup()]. What is actually
+ * allocated is space to contain the notification data.
+ *
+ * Buffers are prefixed with a Radio Control Event Block (RCEB) as
+ * defined by the WUSB Wired-Adapter Radio Control interface. We
+ * just use it for the notification code.
+ *
+ * On each case statement we just transcode endianess of the different
+ * fields. We declare a pointer to a RCI definition of an event, and
+ * then to a UWB definition of the same event (which are the same,
+ * remember). Event if we use different pointers
+ */
+static
+void uwb_rc_notif(struct uwb_rc *rc, struct uwb_rceb *rceb, ssize_t size)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_event *uwb_evt;
+
+	if (size == -ESHUTDOWN)
+		return;
+	if (size < 0) {
+		dev_err(dev, "ignoring event with error code %zu\n",
+			size);
+		return;
+	}
+
+	uwb_evt = kzalloc(sizeof(*uwb_evt), GFP_ATOMIC);
+	if (unlikely(uwb_evt == NULL)) {
+		dev_err(dev, "no memory to queue event 0x%02x/%04x/%02x\n",
+			rceb->bEventType, le16_to_cpu(rceb->wEvent),
+			rceb->bEventContext);
+		return;
+	}
+	uwb_evt->rc = __uwb_rc_get(rc);	/* will be put by uwbd's uwbd_event_handle() */
+	uwb_evt->ts_jiffies = jiffies;
+	uwb_evt->type = UWB_EVT_TYPE_NOTIF;
+	uwb_evt->notif.size = size;
+	uwb_evt->notif.rceb = rceb;
+
+	uwbd_event_queue(uwb_evt);
+}
+
+static void uwb_rc_neh_grok_event(struct uwb_rc *rc, struct uwb_rceb *rceb, size_t size)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_neh *neh;
+	struct uwb_rceb *notif;
+	unsigned long flags;
+
+	if (rceb->bEventContext == 0) {
+		notif = kmalloc(size, GFP_ATOMIC);
+		if (notif) {
+			memcpy(notif, rceb, size);
+			uwb_rc_notif(rc, notif, size);
+		} else
+			dev_err(dev, "event 0x%02x/%04x/%02x (%zu bytes): no memory\n",
+				rceb->bEventType, le16_to_cpu(rceb->wEvent),
+				rceb->bEventContext, size);
+	} else {
+		neh = uwb_rc_neh_lookup(rc, rceb);
+		if (neh) {
+			spin_lock_irqsave(&rc->neh_lock, flags);
+			/* to guard against a timeout */
+			neh->completed = 1;
+			del_timer(&neh->timer);
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			uwb_rc_neh_cb(neh, rceb, size);
+		} else
+			dev_warn(dev, "event 0x%02x/%04x/%02x (%zu bytes): nobody cared\n",
+				 rceb->bEventType, le16_to_cpu(rceb->wEvent),
+				 rceb->bEventContext, size);
+	}
+}
+
+/**
+ * Given a buffer with one or more UWB RC events/notifications, break
+ * them up and dispatch them.
+ *
+ * @rc:	      UWB Radio Controller
+ * @buf:      Buffer with the stream of notifications/events
+ * @buf_size: Amount of data in the buffer
+ *
+ * Note each notification/event starts always with a 'struct
+ * uwb_rceb', so the minimum size if 4 bytes.
+ *
+ * The device may pass us events formatted differently than expected.
+ * These are first filtered, potentially creating a new event in a new
+ * memory location. If a new event is created by the filter it is also
+ * freed here.
+ *
+ * For each notif/event, tries to guess the size looking at the EST
+ * tables, then looks for a neh that is waiting for that event and if
+ * found, copies the payload to the neh's buffer and calls it back. If
+ * not, the data is ignored.
+ *
+ * Note that if we can't find a size description in the EST tables, we
+ * still might find a size in the 'neh' handle in uwb_rc_neh_lookup().
+ *
+ * Assumptions:
+ *
+ *   @rc->neh_lock is NOT taken
+ *
+ * We keep track of various sizes here:
+ * size:      contains the size of the buffer that is processed for the
+ *            incoming event. this buffer may contain events that are not
+ *            formatted as WHCI.
+ * real_size: the actual space taken by this event in the buffer.
+ *            We need to keep track of the real size of an event to be able to
+ *            advance the buffer correctly.
+ * event_size: the size of the event as expected by the core layer
+ *            [OR] the size of the event after filtering. if the filtering
+ *            created a new event in a new memory location then this is
+ *            effectively the size of a new event buffer
+ */
+void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	void *itr;
+	struct uwb_rceb *rceb;
+	size_t size, real_size, event_size;
+	int needtofree;
+
+	itr = buf;
+	size = buf_size;
+	while (size > 0) {
+		if (size < sizeof(*rceb)) {
+			dev_err(dev, "not enough data in event buffer to "
+				"process incoming events (%zu left, minimum is "
+				"%zu)\n", size, sizeof(*rceb));
+			break;
+		}
+
+		rceb = itr;
+		if (rc->filter_event) {
+			needtofree = rc->filter_event(rc, &rceb, size,
+						      &real_size, &event_size);
+			if (needtofree < 0 && needtofree != -ENOANO) {
+				dev_err(dev, "BUG: Unable to filter event "
+					"(0x%02x/%04x/%02x) from "
+					"device. \n", rceb->bEventType,
+					le16_to_cpu(rceb->wEvent),
+					rceb->bEventContext);
+				break;
+			}
+		} else
+			needtofree = -ENOANO;
+		/* do real processing if there was no filtering or the
+		 * filtering didn't act */
+		if (needtofree == -ENOANO) {
+			ssize_t ret = uwb_est_find_size(rc, rceb, size);
+			if (ret < 0)
+				break;
+			if (ret > size) {
+				dev_err(dev, "BUG: hw sent incomplete event "
+					"0x%02x/%04x/%02x (%zd bytes), only got "
+					"%zu bytes. We don't handle that.\n",
+					rceb->bEventType, le16_to_cpu(rceb->wEvent),
+					rceb->bEventContext, ret, size);
+				break;
+			}
+			real_size = event_size = ret;
+		}
+		uwb_rc_neh_grok_event(rc, rceb, event_size);
+
+		if (needtofree == 1)
+			kfree(rceb);
+
+		itr += real_size;
+		size -= real_size;
+	}
+}
+EXPORT_SYMBOL_GPL(uwb_rc_neh_grok);
+
+
+/**
+ * The entity that reads from the device notification/event channel has
+ * detected an error.
+ *
+ * @rc:    UWB Radio Controller
+ * @error: Errno error code
+ *
+ */
+void uwb_rc_neh_error(struct uwb_rc *rc, int error)
+{
+	struct uwb_rc_neh *neh;
+	unsigned long flags;
+
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
+		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
+		uwb_rc_neh_cb(neh, NULL, error);
+	}
+}
+EXPORT_SYMBOL_GPL(uwb_rc_neh_error);
+
+
+static void uwb_rc_neh_timer(struct timer_list *t)
+{
+	struct uwb_rc_neh *neh = from_timer(neh, t, timer);
+	struct uwb_rc *rc = neh->rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc->neh_lock, flags);
+	if (neh->completed) {
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+		return;
+	}
+	if (neh->context)
+		__uwb_rc_neh_rm(rc, neh);
+	else
+		neh = NULL;
+	spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+	if (neh)
+		uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
+}
+
+/** Initializes the @rc's neh subsystem
+ */
+void uwb_rc_neh_create(struct uwb_rc *rc)
+{
+	spin_lock_init(&rc->neh_lock);
+	INIT_LIST_HEAD(&rc->neh_list);
+	set_bit(0, rc->ctx_bm);		/* 0 is reserved (see [WUSB] table 8-65) */
+	set_bit(0xff, rc->ctx_bm);	/* and 0xff is invalid */
+	rc->ctx_roll = 1;
+}
+
+
+/** Release's the @rc's neh subsystem */
+void uwb_rc_neh_destroy(struct uwb_rc *rc)
+{
+	unsigned long flags;
+	struct uwb_rc_neh *neh;
+
+	for (;;) {
+		spin_lock_irqsave(&rc->neh_lock, flags);
+		if (list_empty(&rc->neh_list)) {
+			spin_unlock_irqrestore(&rc->neh_lock, flags);
+			break;
+		}
+		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
+		__uwb_rc_neh_rm(rc, neh);
+		spin_unlock_irqrestore(&rc->neh_lock, flags);
+
+		del_timer_sync(&neh->timer);
+		uwb_rc_neh_put(neh);
+	}
+}
diff --git a/drivers/staging/uwb/pal.c b/drivers/staging/uwb/pal.c
new file mode 100644
index 000000000000..a541e646a603
--- /dev/null
+++ b/drivers/staging/uwb/pal.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB PAL support.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+
+#include "uwb.h"
+#include "uwb-internal.h"
+
+/**
+ * uwb_pal_init - initialize a UWB PAL
+ * @pal: the PAL to initialize
+ */
+void uwb_pal_init(struct uwb_pal *pal)
+{
+	INIT_LIST_HEAD(&pal->node);
+}
+EXPORT_SYMBOL_GPL(uwb_pal_init);
+
+/**
+ * uwb_pal_register - register a UWB PAL
+ * @pal: the PAL
+ *
+ * The PAL must be initialized with uwb_pal_init().
+ */
+int uwb_pal_register(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+	int ret;
+
+	if (pal->device) {
+		/* create a link to the uwb_rc in the PAL device's directory. */
+		ret = sysfs_create_link(&pal->device->kobj,
+					&rc->uwb_dev.dev.kobj, "uwb_rc");
+		if (ret < 0)
+			return ret;
+		/* create a link to the PAL in the UWB device's directory. */
+		ret = sysfs_create_link(&rc->uwb_dev.dev.kobj,
+					&pal->device->kobj, pal->name);
+		if (ret < 0) {
+			sysfs_remove_link(&pal->device->kobj, "uwb_rc");
+			return ret;
+		}
+	}
+
+	pal->debugfs_dir = uwb_dbg_create_pal_dir(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	list_add(&pal->node, &rc->pals);
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uwb_pal_register);
+
+static int find_rc(struct device *dev, const void *data)
+{
+	const struct uwb_rc *target_rc = data;
+	struct uwb_rc *rc = dev_get_drvdata(dev);
+
+	if (rc == NULL) {
+		WARN_ON(1);
+		return 0;
+	}
+	if (rc == target_rc) {
+		if (rc->ready == 0)
+			return 0;
+		else
+			return 1;
+	}
+	return 0;
+}
+
+/**
+ * Given a radio controller descriptor see if it is registered.
+ *
+ * @returns false if the rc does not exist or is quiescing; true otherwise.
+ */
+static bool uwb_rc_class_device_exists(struct uwb_rc *target_rc)
+{
+	struct device *dev;
+
+	dev = class_find_device(&uwb_rc_class, NULL, target_rc,	find_rc);
+
+	put_device(dev);
+
+	return (dev != NULL);
+}
+
+/**
+ * uwb_pal_unregister - unregister a UWB PAL
+ * @pal: the PAL
+ */
+void uwb_pal_unregister(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	uwb_radio_stop(pal);
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	list_del(&pal->node);
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	debugfs_remove(pal->debugfs_dir);
+
+	if (pal->device) {
+		/* remove link to the PAL in the UWB device's directory. */
+		if (uwb_rc_class_device_exists(rc))
+			sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
+
+		/* remove link to uwb_rc in the PAL device's directory. */
+		sysfs_remove_link(&pal->device->kobj, "uwb_rc");
+	}
+}
+EXPORT_SYMBOL_GPL(uwb_pal_unregister);
+
+/**
+ * uwb_rc_pal_init - initialize the PAL related parts of a radio controller
+ * @rc: the radio controller
+ */
+void uwb_rc_pal_init(struct uwb_rc *rc)
+{
+	INIT_LIST_HEAD(&rc->pals);
+}
diff --git a/drivers/staging/uwb/radio.c b/drivers/staging/uwb/radio.c
new file mode 100644
index 000000000000..6afb75ce1b5f
--- /dev/null
+++ b/drivers/staging/uwb/radio.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB radio (channel) management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+
+#include "uwb.h"
+#include "uwb-internal.h"
+
+
+static int uwb_radio_select_channel(struct uwb_rc *rc)
+{
+	/*
+	 * Default to channel 9 (BG1, TFC1) unless the user has
+	 * selected a specific channel or there are no active PALs.
+	 */
+	if (rc->active_pals == 0)
+		return -1;
+	if (rc->beaconing_forced)
+		return rc->beaconing_forced;
+	return 9;
+}
+
+
+/*
+ * Notify all active PALs that the channel has changed.
+ */
+static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel)
+{
+	struct uwb_pal *pal;
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel && channel != pal->channel) {
+			pal->channel = channel;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, pal->channel);
+		}
+	}
+}
+
+/*
+ * Change to a new channel and notify any active PALs of the new
+ * channel.
+ *
+ * When stopping the radio, PALs need to be notified first so they can
+ * terminate any active reservations.
+ */
+static int uwb_radio_change_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+	struct device *dev = &rc->uwb_dev.dev;
+
+	dev_dbg(dev, "%s: channel = %d, rc->beaconing = %d\n", __func__,
+		channel, rc->beaconing);
+
+	if (channel == -1)
+		uwb_radio_channel_changed(rc, channel);
+
+	if (channel != rc->beaconing) {
+		if (rc->beaconing != -1 && channel != -1) {
+			/*
+			 * FIXME: should signal the channel change
+			 * with a Channel Change IE.
+			 */
+			ret = uwb_radio_change_channel(rc, -1);
+			if (ret < 0)
+				return ret;
+		}
+		ret = uwb_rc_beacon(rc, channel, 0);
+	}
+
+	if (channel != -1)
+		uwb_radio_channel_changed(rc, rc->beaconing);
+
+	return ret;
+}
+
+/**
+ * uwb_radio_start - request that the radio be started
+ * @pal: the PAL making the request.
+ *
+ * If the radio is not already active, a suitable channel is selected
+ * and beacons are started.
+ */
+int uwb_radio_start(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (!pal->channel) {
+		pal->channel = -1;
+		rc->active_pals++;
+		ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_radio_start);
+
+/**
+ * uwb_radio_stop - request that the radio be stopped.
+ * @pal: the PAL making the request.
+ *
+ * Stops the radio if no other PAL is making use of it.
+ */
+void uwb_radio_stop(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	if (pal->channel) {
+		rc->active_pals--;
+		uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+		pal->channel = 0;
+	}
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+EXPORT_SYMBOL_GPL(uwb_radio_stop);
+
+/*
+ * uwb_radio_force_channel - force a specific channel to be used
+ * @rc: the radio controller.
+ * @channel: the channel to use; -1 to force the radio to stop; 0 to
+ *   use the default channel selection algorithm.
+ */
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel)
+{
+	int ret = 0;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	rc->beaconing_forced = channel;
+	ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return ret;
+}
+
+/*
+ * uwb_radio_setup - setup the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset to ensure it's in a known state
+ * before it's used.
+ */
+int uwb_radio_setup(struct uwb_rc *rc)
+{
+	return uwb_rc_reset(rc);
+}
+
+/*
+ * uwb_radio_reset_state - reset any radio manager state
+ * @rc: the radio controller.
+ *
+ * All internal radio manager state is reset to values corresponding
+ * to a reset radio controller.
+ */
+void uwb_radio_reset_state(struct uwb_rc *rc)
+{
+	struct uwb_pal *pal;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->channel) {
+			pal->channel = -1;
+			if (pal->channel_changed)
+				pal->channel_changed(pal, -1);
+		}
+	}
+
+	rc->beaconing = -1;
+	rc->scanning = -1;
+
+	mutex_unlock(&rc->uwb_dev.mutex);
+}
+
+/*
+ * uwb_radio_shutdown - shutdown the radio manager
+ * @rc: the radio controller.
+ *
+ * The radio controller is reset.
+ */
+void uwb_radio_shutdown(struct uwb_rc *rc)
+{
+	uwb_radio_reset_state(rc);
+	uwb_rc_reset(rc);
+}
diff --git a/drivers/staging/uwb/reset.c b/drivers/staging/uwb/reset.c
new file mode 100644
index 000000000000..8fc7c14d903e
--- /dev/null
+++ b/drivers/staging/uwb/reset.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * UWB basic command support and radio reset
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME:
+ *
+ *  - docs
+ *
+ *  - Now we are serializing (using the uwb_dev->mutex) the command
+ *    execution; it should be parallelized as much as possible some
+ *    day.
+ */
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include "uwb-internal.h"
+
+/**
+ * Command result codes (WUSB1.0[T8-69])
+ */
+static
+const char *__strerror[] = {
+	"success",
+	"failure",
+	"hardware failure",
+	"no more slots",
+	"beacon is too large",
+	"invalid parameter",
+	"unsupported power level",
+	"time out (wa) or invalid ie data (whci)",
+	"beacon size exceeded",
+	"cancelled",
+	"invalid state",
+	"invalid size",
+	"ack not received",
+	"no more asie notification",
+};
+
+
+/** Return a string matching the given error code */
+const char *uwb_rc_strerror(unsigned code)
+{
+	if (code == 255)
+		return "time out";
+	if (code >= ARRAY_SIZE(__strerror))
+		return "unknown error";
+	return __strerror[code];
+}
+
+int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+		     struct uwb_rccb *cmd, size_t cmd_size,
+		     u8 expected_type, u16 expected_event,
+		     uwb_rc_cmd_cb_f cb, void *arg)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_neh *neh;
+	int needtofree = 0;
+	int result;
+
+	uwb_dev_lock(&rc->uwb_dev);	/* Protect against rc->priv being removed */
+	if (rc->priv == NULL) {
+		uwb_dev_unlock(&rc->uwb_dev);
+		return -ESHUTDOWN;
+	}
+
+	if (rc->filter_cmd) {
+		needtofree = rc->filter_cmd(rc, &cmd, &cmd_size);
+		if (needtofree < 0 && needtofree != -ENOANO) {
+			dev_err(dev, "%s: filter error: %d\n",
+				cmd_name, needtofree);
+			uwb_dev_unlock(&rc->uwb_dev);
+			return needtofree;
+		}
+	}
+
+	neh = uwb_rc_neh_add(rc, cmd, expected_type, expected_event, cb, arg);
+	if (IS_ERR(neh)) {
+		result = PTR_ERR(neh);
+		uwb_dev_unlock(&rc->uwb_dev);
+		goto out;
+	}
+
+	result = rc->cmd(rc, cmd, cmd_size);
+	uwb_dev_unlock(&rc->uwb_dev);
+	if (result < 0)
+		uwb_rc_neh_rm(rc, neh);
+	else
+		uwb_rc_neh_arm(rc, neh);
+	uwb_rc_neh_put(neh);
+out:
+	if (needtofree == 1)
+		kfree(cmd);
+	return result < 0 ? result : 0;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_cmd_async);
+
+struct uwb_rc_cmd_done_params {
+	struct completion completion;
+	struct uwb_rceb *reply;
+	ssize_t reply_size;
+};
+
+static void uwb_rc_cmd_done(struct uwb_rc *rc, void *arg,
+			    struct uwb_rceb *reply, ssize_t reply_size)
+{
+	struct uwb_rc_cmd_done_params *p = (struct uwb_rc_cmd_done_params *)arg;
+
+	if (reply_size > 0) {
+		if (p->reply)
+			reply_size = min(p->reply_size, reply_size);
+		else
+			p->reply = kmalloc(reply_size, GFP_ATOMIC);
+
+		if (p->reply)
+			memcpy(p->reply, reply, reply_size);
+		else
+			reply_size = -ENOMEM;
+	}
+	p->reply_size = reply_size;
+	complete(&p->completion);
+}
+
+
+/**
+ * Generic function for issuing commands to the Radio Control Interface
+ *
+ * @rc:       UWB Radio Control descriptor
+ * @cmd_name: Name of the command being issued (for error messages)
+ * @cmd:      Pointer to rccb structure containing the command;
+ *            normally you embed this structure as the first member of
+ *            the full command structure.
+ * @cmd_size: Size of the whole command buffer pointed to by @cmd.
+ * @reply:    Pointer to where to store the reply
+ * @reply_size: @reply's size
+ * @expected_type: Expected type in the return event
+ * @expected_event: Expected event code in the return event
+ * @preply:   Here a pointer to where the event data is received will
+ *            be stored. Once done with the data, free with kfree().
+ *
+ * This function is generic; it works for commands that return a fixed
+ * and known size or for commands that return a variable amount of data.
+ *
+ * If a buffer is provided, that is used, although it could be chopped
+ * to the maximum size of the buffer. If the buffer is NULL, then one
+ * be allocated in *preply with the whole contents of the reply.
+ *
+ * @rc needs to be referenced
+ */
+static
+ssize_t __uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
+		     struct uwb_rccb *cmd, size_t cmd_size,
+		     struct uwb_rceb *reply, size_t reply_size,
+		     u8 expected_type, u16 expected_event,
+		     struct uwb_rceb **preply)
+{
+	ssize_t result = 0;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rc_cmd_done_params params;
+
+	init_completion(&params.completion);
+	params.reply = reply;
+	params.reply_size = reply_size;
+
+	result = uwb_rc_cmd_async(rc, cmd_name, cmd, cmd_size,
+				  expected_type, expected_event,
+				  uwb_rc_cmd_done, &params);
+	if (result)
+		return result;
+
+	wait_for_completion(&params.completion);
+
+	if (preply)
+		*preply = params.reply;
+
+	if (params.reply_size < 0)
+		dev_err(dev, "%s: confirmation event 0x%02x/%04x/%02x "
+			"reception failed: %d\n", cmd_name,
+			expected_type, expected_event, cmd->bCommandContext,
+			(int)params.reply_size);
+	return params.reply_size;
+}
+
+
+/**
+ * Generic function for issuing commands to the Radio Control Interface
+ *
+ * @rc:       UWB Radio Control descriptor
+ * @cmd_name: Name of the command being issued (for error messages)
+ * @cmd:      Pointer to rccb structure containing the command;
+ *            normally you embed this structure as the first member of
+ *            the full command structure.
+ * @cmd_size: Size of the whole command buffer pointed to by @cmd.
+ * @reply:    Pointer to the beginning of the confirmation event
+ *            buffer. Normally bigger than an 'struct hwarc_rceb'.
+ *            You need to fill out reply->bEventType and reply->wEvent (in
+ *            cpu order) as the function will use them to verify the
+ *            confirmation event.
+ * @reply_size: Size of the reply buffer
+ *
+ * The function checks that the length returned in the reply is at
+ * least as big as @reply_size; if not, it will be deemed an error and
+ * -EIO returned.
+ *
+ * @rc needs to be referenced
+ */
+ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
+		   struct uwb_rccb *cmd, size_t cmd_size,
+		   struct uwb_rceb *reply, size_t reply_size)
+{
+	struct device *dev = &rc->uwb_dev.dev;
+	ssize_t result;
+
+	result = __uwb_rc_cmd(rc, cmd_name,
+			      cmd, cmd_size, reply, reply_size,
+			      reply->bEventType, reply->wEvent, NULL);
+
+	if (result > 0 && result < reply_size) {
+		dev_err(dev, "%s: not enough data returned for decoding reply "
+			"(%zu bytes received vs at least %zu needed)\n",
+			cmd_name, result, reply_size);
+		result = -EIO;
+	}
+	return result;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_cmd);
+
+
+/**
+ * Generic function for issuing commands to the Radio Control
+ * Interface that return an unknown amount of data
+ *
+ * @rc:       UWB Radio Control descriptor
+ * @cmd_name: Name of the command being issued (for error messages)
+ * @cmd:      Pointer to rccb structure containing the command;
+ *            normally you embed this structure as the first member of
+ *            the full command structure.
+ * @cmd_size: Size of the whole command buffer pointed to by @cmd.
+ * @expected_type: Expected type in the return event
+ * @expected_event: Expected event code in the return event
+ * @preply:   Here a pointer to where the event data is received will
+ *            be stored. Once done with the data, free with kfree().
+ *
+ * The function checks that the length returned in the reply is at
+ * least as big as a 'struct uwb_rceb *'; if not, it will be deemed an
+ * error and -EIO returned.
+ *
+ * @rc needs to be referenced
+ */
+ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
+		    struct uwb_rccb *cmd, size_t cmd_size,
+		    u8 expected_type, u16 expected_event,
+		    struct uwb_rceb **preply)
+{
+	return __uwb_rc_cmd(rc, cmd_name, cmd, cmd_size, NULL, 0,
+			    expected_type, expected_event, preply);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_vcmd);
+
+
+/**
+ * Reset a UWB Host Controller (and all radio settings)
+ *
+ * @rc:      Host Controller descriptor
+ * @returns: 0 if ok, < 0 errno code on error
+ *
+ * We put the command on kmalloc'ed memory as some arches cannot do
+ * USB from the stack. The reply event is copied from an stage buffer,
+ * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
+ */
+int uwb_rc_reset(struct uwb_rc *rc)
+{
+	int result = -ENOMEM;
+	struct uwb_rc_evt_confirm reply;
+	struct uwb_rccb *cmd;
+	size_t cmd_size = sizeof(*cmd);
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_kzalloc;
+	cmd->bCommandType = UWB_RC_CET_GENERAL;
+	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_RESET);
+	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply.rceb.wEvent = UWB_RC_CMD_RESET;
+	result = uwb_rc_cmd(rc, "RESET", cmd, cmd_size,
+			    &reply.rceb, sizeof(reply));
+	if (result < 0)
+		goto error_cmd;
+	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(&rc->uwb_dev.dev,
+			"RESET: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
+		result = -EIO;
+	}
+error_cmd:
+	kfree(cmd);
+error_kzalloc:
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return result;
+}
+
+int uwbd_msg_handle_reset(struct uwb_event *evt)
+{
+	struct uwb_rc *rc = evt->rc;
+	int ret;
+
+	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
+	ret = rc->reset(rc);
+	if (ret < 0) {
+		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
+		goto error;
+	}
+	return 0;
+error:
+	/* Nothing can be done except try the reset again. Wait a bit
+	   to avoid reset loops during probe() or remove(). */
+	msleep(1000);
+	uwb_rc_reset_all(rc);
+	return ret;
+}
+
+/**
+ * uwb_rc_reset_all - request a reset of the radio controller and PALs
+ * @rc: the radio controller of the hardware device to be reset.
+ *
+ * The full hardware reset of the radio controller and all the PALs
+ * will be scheduled.
+ */
+void uwb_rc_reset_all(struct uwb_rc *rc)
+{
+	struct uwb_event *evt;
+
+	evt = kzalloc(sizeof(struct uwb_event), GFP_ATOMIC);
+	if (unlikely(evt == NULL))
+		return;
+
+	evt->rc = __uwb_rc_get(rc);	/* will be put by uwbd's uwbd_event_handle() */
+	evt->ts_jiffies = jiffies;
+	evt->type = UWB_EVT_TYPE_MSG;
+	evt->message = UWB_EVT_MSG_RESET;
+
+	uwbd_event_queue(evt);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_reset_all);
+
+void uwb_rc_pre_reset(struct uwb_rc *rc)
+{
+	rc->stop(rc);
+	uwbd_flush(rc);
+
+	uwb_radio_reset_state(rc);
+	uwb_rsv_remove_all(rc);
+}
+EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
+
+int uwb_rc_post_reset(struct uwb_rc *rc)
+{
+	int ret;
+
+	ret = rc->start(rc);
+	if (ret)
+		goto out;
+	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
+	if (ret)
+		goto out;
+	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
+	if (ret)
+		goto out;
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/staging/uwb/rsv.c b/drivers/staging/uwb/rsv.c
new file mode 100644
index 000000000000..f45a04ff7275
--- /dev/null
+++ b/drivers/staging/uwb/rsv.c
@@ -0,0 +1,1000 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB reservation management.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/export.h>
+
+#include "uwb.h"
+#include "uwb-internal.h"
+
+static void uwb_rsv_timer(struct timer_list *t);
+
+static const char *rsv_states[] = {
+	[UWB_RSV_STATE_NONE]                 = "none            ",
+	[UWB_RSV_STATE_O_INITIATED]          = "o initiated     ",
+	[UWB_RSV_STATE_O_PENDING]            = "o pending       ",
+	[UWB_RSV_STATE_O_MODIFIED]           = "o modified      ",
+	[UWB_RSV_STATE_O_ESTABLISHED]        = "o established   ",
+	[UWB_RSV_STATE_O_TO_BE_MOVED]        = "o to be moved   ",
+	[UWB_RSV_STATE_O_MOVE_EXPANDING]     = "o move expanding",
+	[UWB_RSV_STATE_O_MOVE_COMBINING]     = "o move combining",
+	[UWB_RSV_STATE_O_MOVE_REDUCING]      = "o move reducing ",
+	[UWB_RSV_STATE_T_ACCEPTED]           = "t accepted      ",
+	[UWB_RSV_STATE_T_CONFLICT]           = "t conflict      ",
+	[UWB_RSV_STATE_T_PENDING]            = "t pending       ",
+	[UWB_RSV_STATE_T_DENIED]             = "t denied        ",
+	[UWB_RSV_STATE_T_RESIZED]            = "t resized       ",
+	[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ",
+	[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf",
+	[UWB_RSV_STATE_T_EXPANDING_PENDING]  = "t expanding pend",
+	[UWB_RSV_STATE_T_EXPANDING_DENIED]   = "t expanding den ",
+};
+
+static const char *rsv_types[] = {
+	[UWB_DRP_TYPE_ALIEN_BP] = "alien-bp",
+	[UWB_DRP_TYPE_HARD]     = "hard",
+	[UWB_DRP_TYPE_SOFT]     = "soft",
+	[UWB_DRP_TYPE_PRIVATE]  = "private",
+	[UWB_DRP_TYPE_PCA]      = "pca",
+};
+
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv)
+{
+	static const bool has_two_drp_ies[] = {
+		[UWB_RSV_STATE_O_INITIATED]               = false,
+		[UWB_RSV_STATE_O_PENDING]                 = false,
+		[UWB_RSV_STATE_O_MODIFIED]                = false,
+		[UWB_RSV_STATE_O_ESTABLISHED]             = false,
+		[UWB_RSV_STATE_O_TO_BE_MOVED]             = false,
+		[UWB_RSV_STATE_O_MOVE_COMBINING]          = false,
+		[UWB_RSV_STATE_O_MOVE_REDUCING]           = false,
+		[UWB_RSV_STATE_O_MOVE_EXPANDING]          = true,
+		[UWB_RSV_STATE_T_ACCEPTED]                = false,
+		[UWB_RSV_STATE_T_CONFLICT]                = false,
+		[UWB_RSV_STATE_T_PENDING]                 = false,
+		[UWB_RSV_STATE_T_DENIED]                  = false,
+		[UWB_RSV_STATE_T_RESIZED]                 = false,
+		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_CONFLICT]      = true,
+		[UWB_RSV_STATE_T_EXPANDING_PENDING]       = true,
+		[UWB_RSV_STATE_T_EXPANDING_DENIED]        = true,
+	};
+
+	return has_two_drp_ies[rsv->state];
+}
+
+/**
+ * uwb_rsv_state_str - return a string for a reservation state
+ * @state: the reservation state.
+ */
+const char *uwb_rsv_state_str(enum uwb_rsv_state state)
+{
+	if (state < UWB_RSV_STATE_NONE || state >= UWB_RSV_STATE_LAST)
+		return "unknown";
+	return rsv_states[state];
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_state_str);
+
+/**
+ * uwb_rsv_type_str - return a string for a reservation type
+ * @type: the reservation type
+ */
+const char *uwb_rsv_type_str(enum uwb_drp_type type)
+{
+	if (type < UWB_DRP_TYPE_ALIEN_BP || type > UWB_DRP_TYPE_PCA)
+		return "invalid";
+	return rsv_types[type];
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_type_str);
+
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv)
+{
+	struct device *dev = &rsv->rc->uwb_dev.dev;
+	struct uwb_dev_addr devaddr;
+	char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
+
+	uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
+	if (rsv->target.type == UWB_RSV_TARGET_DEV)
+		devaddr = rsv->target.dev->dev_addr;
+	else
+		devaddr = rsv->target.devaddr;
+	uwb_dev_addr_print(target, sizeof(target), &devaddr);
+
+	dev_dbg(dev, "rsv %s %s -> %s: %s\n",
+		text, owner, target, uwb_rsv_state_str(rsv->state));
+}
+
+static void uwb_rsv_release(struct kref *kref)
+{
+	struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref);
+
+	kfree(rsv);
+}
+
+void uwb_rsv_get(struct uwb_rsv *rsv)
+{
+	kref_get(&rsv->kref);
+}
+
+void uwb_rsv_put(struct uwb_rsv *rsv)
+{
+	kref_put(&rsv->kref, uwb_rsv_release);
+}
+
+/*
+ * Get a free stream index for a reservation.
+ *
+ * If the target is a DevAddr (e.g., a WUSB cluster reservation) then
+ * the stream is allocated from a pool of per-RC stream indexes,
+ * otherwise a unique stream index for the target is selected.
+ */
+static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned long *streams_bm;
+	int stream;
+
+	switch (rsv->target.type) {
+	case UWB_RSV_TARGET_DEV:
+		streams_bm = rsv->target.dev->streams;
+		break;
+	case UWB_RSV_TARGET_DEVADDR:
+		streams_bm = rc->uwb_dev.streams;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	stream = find_first_zero_bit(streams_bm, UWB_NUM_STREAMS);
+	if (stream >= UWB_NUM_STREAMS) {
+		dev_err(dev, "%s: no available stream found\n", __func__);
+		return -EBUSY;
+	}
+
+	rsv->stream = stream;
+	set_bit(stream, streams_bm);
+
+	dev_dbg(dev, "get stream %d\n", rsv->stream);
+
+	return 0;
+}
+
+static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned long *streams_bm;
+
+	switch (rsv->target.type) {
+	case UWB_RSV_TARGET_DEV:
+		streams_bm = rsv->target.dev->streams;
+		break;
+	case UWB_RSV_TARGET_DEVADDR:
+		streams_bm = rc->uwb_dev.streams;
+		break;
+	default:
+		return;
+	}
+
+	clear_bit(rsv->stream, streams_bm);
+
+	dev_dbg(dev, "put stream %d\n", rsv->stream);
+}
+
+void uwb_rsv_backoff_win_timer(struct timer_list *t)
+{
+	struct uwb_drp_backoff_win *bow = from_timer(bow, t, timer);
+	struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow);
+	struct device *dev = &rc->uwb_dev.dev;
+
+	bow->can_reserve_extra_mases = true;
+	if (bow->total_expired <= 4) {
+		bow->total_expired++;
+	} else {
+		/* after 4 backoff window has expired we can exit from
+		 * the backoff procedure */
+		bow->total_expired = 0;
+		bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
+	}
+	dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n", bow->total_expired, bow->n);
+
+	/* try to relocate all the "to be moved" relocations */
+	uwb_rsv_handle_drp_avail_change(rc);
+}
+
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
+{
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	unsigned timeout_us;
+
+	dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window);
+
+	bow->can_reserve_extra_mases = false;
+
+	if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX)
+		return;
+
+	bow->window <<= 1;
+	bow->n = prandom_u32() & (bow->window - 1);
+	dev_dbg(dev, "new_window=%d, n=%d\n", bow->window, bow->n);
+
+	/* reset the timer associated variables */
+	timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US;
+	bow->total_expired = 0;
+	mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us));
+}
+
+static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
+{
+	int sframes = UWB_MAX_LOST_BEACONS;
+
+	/*
+	 * Multicast reservations can become established within 1
+	 * super frame and should not be terminated if no response is
+	 * received.
+	 */
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		sframes = 0;
+	} else if (rsv->is_multicast) {
+		if (rsv->state == UWB_RSV_STATE_O_INITIATED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING)
+			sframes = 1;
+		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED)
+			sframes = 0;
+
+	}
+
+	if (sframes > 0) {
+		/*
+		 * Add an additional 2 superframes to account for the
+		 * time to send the SET DRP IE command.
+		 */
+		unsigned timeout_us = (sframes + 2) * UWB_SUPERFRAME_LENGTH_US;
+		mod_timer(&rsv->timer, jiffies + usecs_to_jiffies(timeout_us));
+	} else
+		del_timer(&rsv->timer);
+}
+
+/*
+ * Update a reservations state, and schedule an update of the
+ * transmitted DRP IEs.
+ */
+static void uwb_rsv_state_update(struct uwb_rsv *rsv,
+				 enum uwb_rsv_state new_state)
+{
+	rsv->state = new_state;
+	rsv->ie_valid = false;
+
+	uwb_rsv_dump("SU", rsv);
+
+	uwb_rsv_stroke_timer(rsv);
+	uwb_rsv_sched_update(rsv->rc);
+}
+
+static void uwb_rsv_callback(struct uwb_rsv *rsv)
+{
+	if (rsv->callback)
+		rsv->callback(rsv);
+}
+
+void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
+{
+	struct uwb_rsv_move *mv = &rsv->mv;
+
+	if (rsv->state == new_state) {
+		switch (rsv->state) {
+		case UWB_RSV_STATE_O_ESTABLISHED:
+		case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		case UWB_RSV_STATE_O_MOVE_COMBINING:
+		case UWB_RSV_STATE_O_MOVE_REDUCING:
+		case UWB_RSV_STATE_T_ACCEPTED:
+		case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		case UWB_RSV_STATE_T_RESIZED:
+		case UWB_RSV_STATE_NONE:
+			uwb_rsv_stroke_timer(rsv);
+			break;
+		default:
+			/* Expecting a state transition so leave timer
+			   as-is. */
+			break;
+		}
+		return;
+	}
+
+	uwb_rsv_dump("SC", rsv);
+
+	switch (new_state) {
+	case UWB_RSV_STATE_NONE:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
+		uwb_rsv_remove(rsv);
+		uwb_rsv_callback(rsv);
+		break;
+	case UWB_RSV_STATE_O_INITIATED:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_INITIATED);
+		break;
+	case UWB_RSV_STATE_O_PENDING:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING);
+		break;
+	case UWB_RSV_STATE_O_MODIFIED:
+		/* in the companion there are the MASes to drop */
+		bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED);
+		break;
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->state == UWB_RSV_STATE_O_MODIFIED
+		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) {
+			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
+			rsv->needs_release_companion_mas = false;
+		}
+		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+		uwb_rsv_callback(rsv);
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		rsv->needs_release_companion_mas = true;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		rsv->needs_release_companion_mas = false;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
+		rsv->mas.safe   += mv->companion_mas.safe;
+		rsv->mas.unsafe += mv->companion_mas.unsafe;
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		rsv->needs_release_companion_mas = true;
+		rsv->mas.safe   = mv->final_mas.safe;
+		rsv->mas.unsafe = mv->final_mas.unsafe;
+		bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
+		bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+		break;
+	case UWB_RSV_STATE_T_ACCEPTED:
+	case UWB_RSV_STATE_T_RESIZED:
+		rsv->needs_release_companion_mas = false;
+		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED);
+		uwb_rsv_callback(rsv);
+		break;
+	case UWB_RSV_STATE_T_DENIED:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED);
+		break;
+	case UWB_RSV_STATE_T_CONFLICT:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT);
+		break;
+	case UWB_RSV_STATE_T_PENDING:
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING);
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		rsv->needs_release_companion_mas = true;
+		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
+		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
+		break;
+	default:
+		dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n",
+			uwb_rsv_state_str(new_state), new_state);
+	}
+}
+
+static void uwb_rsv_handle_timeout_work(struct work_struct *work)
+{
+	struct uwb_rsv *rsv = container_of(work, struct uwb_rsv,
+					   handle_timeout_work);
+	struct uwb_rc *rc = rsv->rc;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	uwb_rsv_dump("TO", rsv);
+
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_INITIATED:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_EXPANDING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_COMBINING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_MOVE_REDUCING:
+		if (rsv->is_multicast) {
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
+			goto unlock;
+		}
+		break;
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		if (rsv->is_multicast)
+			goto unlock;
+		break;
+	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
+		/*
+		 * The time out could be for the main or of the
+		 * companion DRP, assume it's for the companion and
+		 * drop that first.  A further time out is required to
+		 * drop the main.
+		 */
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+		goto unlock;
+	case UWB_RSV_STATE_NONE:
+		goto unlock;
+	default:
+		break;
+	}
+
+	uwb_rsv_remove(rsv);
+
+unlock:
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv;
+
+	rsv = kzalloc(sizeof(struct uwb_rsv), GFP_KERNEL);
+	if (!rsv)
+		return NULL;
+
+	INIT_LIST_HEAD(&rsv->rc_node);
+	INIT_LIST_HEAD(&rsv->pal_node);
+	kref_init(&rsv->kref);
+	timer_setup(&rsv->timer, uwb_rsv_timer, 0);
+
+	rsv->rc = rc;
+	INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work);
+
+	return rsv;
+}
+
+/**
+ * uwb_rsv_create - allocate and initialize a UWB reservation structure
+ * @rc: the radio controller
+ * @cb: callback to use when the reservation completes or terminates
+ * @pal_priv: data private to the PAL to be passed in the callback
+ *
+ * The callback is called when the state of the reservation changes from:
+ *
+ *   - pending to accepted
+ *   - pending to denined
+ *   - accepted to terminated
+ *   - pending to terminated
+ */
+struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb, void *pal_priv)
+{
+	struct uwb_rsv *rsv;
+
+	rsv = uwb_rsv_alloc(rc);
+	if (!rsv)
+		return NULL;
+
+	rsv->callback = cb;
+	rsv->pal_priv = pal_priv;
+
+	return rsv;
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_create);
+
+void uwb_rsv_remove(struct uwb_rsv *rsv)
+{
+	uwb_rsv_dump("RM", rsv);
+
+	if (rsv->state != UWB_RSV_STATE_NONE)
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+
+	if (rsv->needs_release_companion_mas)
+		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
+	uwb_drp_avail_release(rsv->rc, &rsv->mas);
+
+	if (uwb_rsv_is_owner(rsv))
+		uwb_rsv_put_stream(rsv);
+
+	uwb_dev_put(rsv->owner);
+	if (rsv->target.type == UWB_RSV_TARGET_DEV)
+		uwb_dev_put(rsv->target.dev);
+
+	list_del_init(&rsv->rc_node);
+	uwb_rsv_put(rsv);
+}
+
+/**
+ * uwb_rsv_destroy - free a UWB reservation structure
+ * @rsv: the reservation to free
+ *
+ * The reservation must already be terminated.
+ */
+void uwb_rsv_destroy(struct uwb_rsv *rsv)
+{
+	uwb_rsv_put(rsv);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
+
+/**
+ * usb_rsv_establish - start a reservation establishment
+ * @rsv: the reservation
+ *
+ * The PAL should fill in @rsv's owner, target, type, max_mas,
+ * min_mas, max_interval and is_multicast fields.  If the target is a
+ * uwb_dev it must be referenced.
+ *
+ * The reservation's callback will be called when the reservation is
+ * accepted, denied or times out.
+ */
+int uwb_rsv_establish(struct uwb_rsv *rsv)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_mas_bm available;
+	struct device *dev = &rc->uwb_dev.dev;
+	int ret;
+
+	mutex_lock(&rc->rsvs_mutex);
+	ret = uwb_rsv_get_stream(rsv);
+	if (ret) {
+		dev_err(dev, "%s: uwb_rsv_get_stream failed: %d\n",
+			__func__, ret);
+		goto out;
+	}
+
+	rsv->tiebreaker = prandom_u32() & 1;
+	/* get available mas bitmap */
+	uwb_drp_available(rc, &available);
+
+	ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas);
+	if (ret == UWB_RSV_ALLOC_NOT_FOUND) {
+		ret = -EBUSY;
+		uwb_rsv_put_stream(rsv);
+		dev_err(dev, "%s: uwb_rsv_find_best_allocation failed: %d\n",
+			__func__, ret);
+		goto out;
+	}
+
+	ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas);
+	if (ret != 0) {
+		uwb_rsv_put_stream(rsv);
+		dev_err(dev, "%s: uwb_drp_avail_reserve_pending failed: %d\n",
+			__func__, ret);
+		goto out;
+	}
+
+	uwb_rsv_get(rsv);
+	list_add_tail(&rsv->rc_node, &rc->reservations);
+	rsv->owner = &rc->uwb_dev;
+	uwb_dev_get(rsv->owner);
+	uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_INITIATED);
+out:
+	mutex_unlock(&rc->rsvs_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_establish);
+
+/**
+ * uwb_rsv_modify - modify an already established reservation
+ * @rsv: the reservation to modify
+ * @max_mas: new maximum MAS to reserve
+ * @min_mas: new minimum MAS to reserve
+ * @max_interval: new max_interval to use
+ *
+ * FIXME: implement this once there are PALs that use it.
+ */
+int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval)
+{
+	return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_modify);
+
+/*
+ * move an already established reservation (rc->rsvs_mutex must to be
+ * taken when tis function is called)
+ */
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available)
+{
+	struct uwb_rc *rc = rsv->rc;
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct device *dev = &rc->uwb_dev.dev;
+	struct uwb_rsv_move *mv;
+	int ret = 0;
+
+	if (bow->can_reserve_extra_mases == false)
+		return -EBUSY;
+
+	mv = &rsv->mv;
+
+	if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) {
+
+		if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) {
+			/* We want to move the reservation */
+			bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_drp_avail_reserve_pending(rc, &mv->companion_mas);
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
+		}
+	} else {
+		dev_dbg(dev, "new allocation not found\n");
+	}
+
+	return ret;
+}
+
+/* It will try to move every reservation in state O_ESTABLISHED giving
+ * to the MAS allocator algorithm an availability that is the real one
+ * plus the allocation already established from the reservation. */
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc)
+{
+	struct uwb_drp_backoff_win *bow = &rc->bow;
+	struct uwb_rsv *rsv;
+	struct uwb_mas_bm mas;
+
+	if (bow->can_reserve_extra_mases == false)
+		return;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED ||
+		    rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) {
+			uwb_drp_available(rc, &mas);
+			bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS);
+			uwb_rsv_try_move(rsv, &mas);
+		}
+	}
+
+}
+
+/**
+ * uwb_rsv_terminate - terminate an established reservation
+ * @rsv: the reservation to terminate
+ *
+ * A reservation is terminated by removing the DRP IE from the beacon,
+ * the other end will consider the reservation to be terminated when
+ * it does not see the DRP IE for at least mMaxLostBeacons.
+ *
+ * If applicable, the reference to the target uwb_dev will be released.
+ */
+void uwb_rsv_terminate(struct uwb_rsv *rsv)
+{
+	struct uwb_rc *rc = rsv->rc;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	if (rsv->state != UWB_RSV_STATE_NONE)
+		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_terminate);
+
+/**
+ * uwb_rsv_accept - accept a new reservation from a peer
+ * @rsv:      the reservation
+ * @cb:       call back for reservation changes
+ * @pal_priv: data to be passed in the above call back
+ *
+ * Reservation requests from peers are denied unless a PAL accepts it
+ * by calling this function.
+ *
+ * The PAL call uwb_rsv_destroy() for all accepted reservations before
+ * calling uwb_pal_unregister().
+ */
+void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv)
+{
+	uwb_rsv_get(rsv);
+
+	rsv->callback = cb;
+	rsv->pal_priv = pal_priv;
+	rsv->state    = UWB_RSV_STATE_T_ACCEPTED;
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_accept);
+
+/*
+ * Is a received DRP IE for this reservation?
+ */
+static bool uwb_rsv_match(struct uwb_rsv *rsv, struct uwb_dev *src,
+			  struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_dev_addr *rsv_src;
+	int stream;
+
+	stream = uwb_ie_drp_stream_index(drp_ie);
+
+	if (rsv->stream != stream)
+		return false;
+
+	switch (rsv->target.type) {
+	case UWB_RSV_TARGET_DEVADDR:
+		return rsv->stream == stream;
+	case UWB_RSV_TARGET_DEV:
+		if (uwb_ie_drp_owner(drp_ie))
+			rsv_src = &rsv->owner->dev_addr;
+		else
+			rsv_src = &rsv->target.dev->dev_addr;
+		return uwb_dev_addr_cmp(&src->dev_addr, rsv_src) == 0;
+	}
+	return false;
+}
+
+static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
+					  struct uwb_dev *src,
+					  struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_rsv *rsv;
+	struct uwb_pal *pal;
+	enum uwb_rsv_state state;
+
+	rsv = uwb_rsv_alloc(rc);
+	if (!rsv)
+		return NULL;
+
+	rsv->rc          = rc;
+	rsv->owner       = src;
+	uwb_dev_get(rsv->owner);
+	rsv->target.type = UWB_RSV_TARGET_DEV;
+	rsv->target.dev  = &rc->uwb_dev;
+	uwb_dev_get(&rc->uwb_dev);
+	rsv->type        = uwb_ie_drp_type(drp_ie);
+	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
+	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
+
+	/*
+	 * See if any PALs are interested in this reservation. If not,
+	 * deny the request.
+	 */
+	rsv->state = UWB_RSV_STATE_T_DENIED;
+	mutex_lock(&rc->uwb_dev.mutex);
+	list_for_each_entry(pal, &rc->pals, node) {
+		if (pal->new_rsv)
+			pal->new_rsv(pal, rsv);
+		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
+			break;
+	}
+	mutex_unlock(&rc->uwb_dev.mutex);
+
+	list_add_tail(&rsv->rc_node, &rc->reservations);
+	state = rsv->state;
+	rsv->state = UWB_RSV_STATE_NONE;
+
+	/* FIXME: do something sensible here */
+	if (state == UWB_RSV_STATE_T_ACCEPTED
+	    && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) {
+		/* FIXME: do something sensible here */
+	} else {
+		uwb_rsv_set_state(rsv, state);
+	}
+
+	return rsv;
+}
+
+/**
+ * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations
+ * @rsv: the reservation.
+ * @mas: returns the available MAS.
+ *
+ * The usable MAS of a reservation may be less than the negotiated MAS
+ * if alien BPs are present.
+ */
+void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas)
+{
+	bitmap_zero(mas->bm, UWB_NUM_MAS);
+	bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
+}
+EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas);
+
+/**
+ * uwb_rsv_find - find a reservation for a received DRP IE.
+ * @rc: the radio controller
+ * @src: source of the DRP IE
+ * @drp_ie: the DRP IE
+ *
+ * If the reservation cannot be found and the DRP IE is from a peer
+ * attempting to establish a new reservation, create a new reservation
+ * and add it to the list.
+ */
+struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
+			     struct uwb_ie_drp *drp_ie)
+{
+	struct uwb_rsv *rsv;
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (uwb_rsv_match(rsv, src, drp_ie))
+			return rsv;
+	}
+
+	if (uwb_ie_drp_owner(drp_ie))
+		return uwb_rsv_new_target(rc, src, drp_ie);
+
+	return NULL;
+}
+
+/*
+ * Go through all the reservations and check for timeouts and (if
+ * necessary) update their DRP IEs.
+ *
+ * FIXME: look at building the SET_DRP_IE command here rather than
+ * having to rescan the list in uwb_rc_send_all_drp_ie().
+ */
+static bool uwb_rsv_update_all(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+	bool ie_updated = false;
+
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		if (!rsv->ie_valid) {
+			uwb_drp_ie_update(rsv);
+			ie_updated = true;
+		}
+	}
+
+	return ie_updated;
+}
+
+void uwb_rsv_queue_update(struct uwb_rc *rc)
+{
+	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
+
+	queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us));
+}
+
+/**
+ * uwb_rsv_sched_update - schedule an update of the DRP IEs
+ * @rc: the radio controller.
+ *
+ * To improve performance and ensure correctness with [ECMA-368] the
+ * number of SET-DRP-IE commands that are done are limited.
+ *
+ * DRP IEs update come from two sources: DRP events from the hardware
+ * which all occur at the beginning of the superframe ('syncronous'
+ * events) and reservation establishment/termination requests from
+ * PALs or timers ('asynchronous' events).
+ *
+ * A delayed work ensures that all the synchronous events result in
+ * one SET-DRP-IE command.
+ *
+ * Additional logic (the set_drp_ie_pending and rsv_updated_postponed
+ * flags) will prevent an asynchrous event starting a SET-DRP-IE
+ * command if one is currently awaiting a response.
+ *
+ * FIXME: this does leave a window where an asynchrous event can delay
+ * the SET-DRP-IE for a synchronous event by one superframe.
+ */
+void uwb_rsv_sched_update(struct uwb_rc *rc)
+{
+	spin_lock_irq(&rc->rsvs_lock);
+	if (!delayed_work_pending(&rc->rsv_update_work)) {
+		if (rc->set_drp_ie_pending > 0) {
+			rc->set_drp_ie_pending++;
+			goto unlock;
+		}
+		uwb_rsv_queue_update(rc);
+	}
+unlock:
+	spin_unlock_irq(&rc->rsvs_lock);
+}
+
+/*
+ * Update DRP IEs and, if necessary, the DRP Availability IE and send
+ * the updated IEs to the radio controller.
+ */
+static void uwb_rsv_update_work(struct work_struct *work)
+{
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_update_work.work);
+	bool ie_updated;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	ie_updated = uwb_rsv_update_all(rc);
+
+	if (!rc->drp_avail.ie_valid) {
+		uwb_drp_avail_ie_update(rc);
+		ie_updated = true;
+	}
+
+	if (ie_updated && (rc->set_drp_ie_pending == 0))
+		uwb_rc_send_all_drp_ie(rc);
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_rsv_alien_bp_work(struct work_struct *work)
+{
+	struct uwb_rc *rc = container_of(work, struct uwb_rc,
+					 rsv_alien_bp_work.work);
+	struct uwb_rsv *rsv;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) {
+			uwb_rsv_callback(rsv);
+		}
+	}
+
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+static void uwb_rsv_timer(struct timer_list *t)
+{
+	struct uwb_rsv *rsv = from_timer(rsv, t, timer);
+
+	queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work);
+}
+
+/**
+ * uwb_rsv_remove_all - remove all reservations
+ * @rc: the radio controller
+ *
+ * A DRP IE update is not done.
+ */
+void uwb_rsv_remove_all(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+
+	mutex_lock(&rc->rsvs_mutex);
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		if (rsv->state != UWB_RSV_STATE_NONE)
+			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
+		del_timer_sync(&rsv->timer);
+	}
+	/* Cancel any postponed update. */
+	rc->set_drp_ie_pending = 0;
+	mutex_unlock(&rc->rsvs_mutex);
+
+	cancel_delayed_work_sync(&rc->rsv_update_work);
+	flush_workqueue(rc->rsv_workq);
+
+	mutex_lock(&rc->rsvs_mutex);
+	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
+		uwb_rsv_remove(rsv);
+	}
+	mutex_unlock(&rc->rsvs_mutex);
+}
+
+void uwb_rsv_init(struct uwb_rc *rc)
+{
+	INIT_LIST_HEAD(&rc->reservations);
+	INIT_LIST_HEAD(&rc->cnflt_alien_list);
+	mutex_init(&rc->rsvs_mutex);
+	spin_lock_init(&rc->rsvs_lock);
+	INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
+	INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work);
+	rc->bow.can_reserve_extra_mases = true;
+	rc->bow.total_expired = 0;
+	rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
+	timer_setup(&rc->bow.timer, uwb_rsv_backoff_win_timer, 0);
+
+	bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS);
+}
+
+int uwb_rsv_setup(struct uwb_rc *rc)
+{
+	char name[16];
+
+	snprintf(name, sizeof(name), "%s_rsvd", dev_name(&rc->uwb_dev.dev));
+	rc->rsv_workq = create_singlethread_workqueue(name);
+	if (rc->rsv_workq == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void uwb_rsv_cleanup(struct uwb_rc *rc)
+{
+	uwb_rsv_remove_all(rc);
+	destroy_workqueue(rc->rsv_workq);
+}
diff --git a/drivers/staging/uwb/scan.c b/drivers/staging/uwb/scan.c
new file mode 100644
index 000000000000..ffc3f452302d
--- /dev/null
+++ b/drivers/staging/uwb/scan.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Scanning management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ * FIXME: there are issues here on how BEACON and SCAN on USB RCI deal
+ *        with each other. Currently seems that START_BEACON while
+ *        SCAN_ONLY will cancel the scan, so we need to update the
+ *        state here. Clarification request sent by email on
+ *        10/05/2005.
+ *        10/28/2005 No clear answer heard--maybe we'll hack the API
+ *                   so that when we start beaconing, if the HC is
+ *                   scanning in a mode not compatible with beaconing
+ *                   we just fail.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/stat.h>
+#include "uwb-internal.h"
+
+
+/**
+ * Start/stop scanning in a radio controller
+ *
+ * @rc:      UWB Radio Controller
+ * @channel: Channel to scan; encodings in WUSB1.0[Table 5.12]
+ * @type:    Type of scanning to do.
+ * @bpst_offset: value at which to start scanning (if type ==
+ *                UWB_SCAN_ONLY_STARTTIME)
+ * @returns: 0 if ok, < 0 errno code on error
+ *
+ * We put the command on kmalloc'ed memory as some arches cannot do
+ * USB from the stack. The reply event is copied from an stage buffer,
+ * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
+ */
+int uwb_rc_scan(struct uwb_rc *rc,
+		unsigned channel, enum uwb_scan_type type,
+		unsigned bpst_offset)
+{
+	int result;
+	struct uwb_rc_cmd_scan *cmd;
+	struct uwb_rc_evt_confirm reply;
+
+	result = -ENOMEM;
+	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
+	if (cmd == NULL)
+		goto error_kzalloc;
+	mutex_lock(&rc->uwb_dev.mutex);
+	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
+	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SCAN);
+	cmd->bChannelNumber = channel;
+	cmd->bScanState = type;
+	cmd->wStartTime = cpu_to_le16(bpst_offset);
+	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
+	reply.rceb.wEvent = UWB_RC_CMD_SCAN;
+	result = uwb_rc_cmd(rc, "SCAN", &cmd->rccb, sizeof(*cmd),
+			    &reply.rceb, sizeof(reply));
+	if (result < 0)
+		goto error_cmd;
+	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
+		dev_err(&rc->uwb_dev.dev,
+			"SCAN: command execution failed: %s (%d)\n",
+			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
+		result = -EIO;
+		goto error_cmd;
+	}
+	rc->scanning = channel;
+	rc->scan_type = type;
+error_cmd:
+	mutex_unlock(&rc->uwb_dev.mutex);
+	kfree(cmd);
+error_kzalloc:
+	return result;
+}
+
+/*
+ * Print scanning state
+ */
+static ssize_t uwb_rc_scan_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	ssize_t result;
+
+	mutex_lock(&rc->uwb_dev.mutex);
+	result = sprintf(buf, "%d %d\n", rc->scanning, rc->scan_type);
+	mutex_unlock(&rc->uwb_dev.mutex);
+	return result;
+}
+
+/*
+ *
+ */
+static ssize_t uwb_rc_scan_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
+	struct uwb_rc *rc = uwb_dev->rc;
+	unsigned channel;
+	unsigned type;
+	unsigned bpst_offset = 0;
+	ssize_t result = -EINVAL;
+
+	result = sscanf(buf, "%u %u %u\n", &channel, &type, &bpst_offset);
+	if (result >= 2 && type < UWB_SCAN_TOP)
+		result = uwb_rc_scan(rc, channel, type, bpst_offset);
+
+	return result < 0 ? result : size;
+}
+
+/** Radio Control sysfs interface (declaration) */
+DEVICE_ATTR(scan, S_IRUGO | S_IWUSR, uwb_rc_scan_show, uwb_rc_scan_store);
diff --git a/drivers/staging/uwb/umc-bus.c b/drivers/staging/uwb/umc-bus.c
new file mode 100644
index 000000000000..8b931f66a720
--- /dev/null
+++ b/drivers/staging/uwb/umc-bus.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Bus for UWB Multi-interface Controller capabilities.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/sysfs.h>
+#include <linux/workqueue.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "include/umc.h"
+
+static int umc_bus_pre_reset_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->pre_reset)
+			ret = umc_drv->pre_reset(umc);
+		else
+			device_release_driver(dev);
+	}
+	return ret;
+}
+
+static int umc_bus_post_reset_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (dev->driver) {
+		struct umc_dev *umc = to_umc_dev(dev);
+		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
+
+		if (umc_drv->post_reset)
+			ret = umc_drv->post_reset(umc);
+	} else
+		ret = device_attach(dev);
+
+	return ret;
+}
+
+/**
+ * umc_controller_reset - reset the whole UMC controller
+ * @umc: the UMC device for the radio controller.
+ *
+ * Drivers or all capabilities of the controller will have their
+ * pre_reset methods called or be unbound from their device.  Then all
+ * post_reset methods will be called or the drivers will be rebound.
+ *
+ * Radio controllers must provide pre_reset and post_reset methods and
+ * reset the hardware in their start method.
+ *
+ * If this is called while a probe() or remove() is in progress it
+ * will return -EAGAIN and not perform the reset.
+ */
+int umc_controller_reset(struct umc_dev *umc)
+{
+	struct device *parent = umc->dev.parent;
+	int ret = 0;
+
+	if (!device_trylock(parent))
+		return -EAGAIN;
+	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
+	if (ret >= 0)
+		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
+	device_unlock(parent);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(umc_controller_reset);
+
+/**
+ * umc_match_pci_id - match a UMC driver to a UMC device's parent PCI device.
+ * @umc_drv: umc driver with match_data pointing to a zero-terminated
+ * table of pci_device_id's.
+ * @umc: umc device whose parent is to be matched.
+ */
+int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc)
+{
+	const struct pci_device_id *id_table = umc_drv->match_data;
+	struct pci_dev *pci;
+
+	if (!dev_is_pci(umc->dev.parent))
+		return 0;
+
+	pci = to_pci_dev(umc->dev.parent);
+	return pci_match_id(id_table, pci) != NULL;
+}
+EXPORT_SYMBOL_GPL(umc_match_pci_id);
+
+static int umc_bus_rescan_helper(struct device *dev, void *data)
+{
+	int ret = 0;
+
+	if (!dev->driver)
+		ret = device_attach(dev);
+
+	return ret;
+}
+
+static void umc_bus_rescan(struct device *parent)
+{
+	int err;
+
+	/*
+	 * We can't use bus_rescan_devices() here as it deadlocks when
+	 * it tries to retake the dev->parent semaphore.
+	 */
+	err = device_for_each_child(parent, NULL, umc_bus_rescan_helper);
+	if (err < 0)
+		printk(KERN_WARNING "%s: rescan of bus failed: %d\n",
+		       KBUILD_MODNAME, err);
+}
+
+static int umc_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct umc_dev *umc = to_umc_dev(dev);
+	struct umc_driver *umc_driver = to_umc_driver(drv);
+
+	if (umc->cap_id == umc_driver->cap_id) {
+		if (umc_driver->match)
+			return umc_driver->match(umc_driver, umc);
+		else
+			return 1;
+	}
+	return 0;
+}
+
+static int umc_device_probe(struct device *dev)
+{
+	struct umc_dev *umc;
+	struct umc_driver *umc_driver;
+	int err;
+
+	umc_driver = to_umc_driver(dev->driver);
+	umc = to_umc_dev(dev);
+
+	get_device(dev);
+	err = umc_driver->probe(umc);
+	if (err)
+		put_device(dev);
+	else
+		umc_bus_rescan(dev->parent);
+
+	return err;
+}
+
+static int umc_device_remove(struct device *dev)
+{
+	struct umc_dev *umc;
+	struct umc_driver *umc_driver;
+
+	umc_driver = to_umc_driver(dev->driver);
+	umc = to_umc_dev(dev);
+
+	umc_driver->remove(umc);
+	put_device(dev);
+	return 0;
+}
+
+static ssize_t capability_id_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct umc_dev *umc = to_umc_dev(dev);
+
+	return sprintf(buf, "0x%02x\n", umc->cap_id);
+}
+static DEVICE_ATTR_RO(capability_id);
+
+static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct umc_dev *umc = to_umc_dev(dev);
+
+	return sprintf(buf, "0x%04x\n", umc->version);
+}
+static DEVICE_ATTR_RO(version);
+
+static struct attribute *umc_dev_attrs[] = {
+	&dev_attr_capability_id.attr,
+	&dev_attr_version.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(umc_dev);
+
+struct bus_type umc_bus_type = {
+	.name		= "umc",
+	.match		= umc_bus_match,
+	.probe		= umc_device_probe,
+	.remove		= umc_device_remove,
+	.dev_groups	= umc_dev_groups,
+};
+EXPORT_SYMBOL_GPL(umc_bus_type);
+
+static int __init umc_bus_init(void)
+{
+	return bus_register(&umc_bus_type);
+}
+module_init(umc_bus_init);
+
+static void __exit umc_bus_exit(void)
+{
+	bus_unregister(&umc_bus_type);
+}
+module_exit(umc_bus_exit);
+
+MODULE_DESCRIPTION("UWB Multi-interface Controller capability bus");
+MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/uwb/umc-dev.c b/drivers/staging/uwb/umc-dev.c
new file mode 100644
index 000000000000..0c71caae00be
--- /dev/null
+++ b/drivers/staging/uwb/umc-dev.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB Multi-interface Controller device management.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include "include/umc.h"
+
+static void umc_device_release(struct device *dev)
+{
+	struct umc_dev *umc = to_umc_dev(dev);
+
+	kfree(umc);
+}
+
+/**
+ * umc_device_create - allocate a child UMC device
+ * @parent: parent of the new UMC device.
+ * @n:      index of the new device.
+ *
+ * The new UMC device will have a bus ID of the parent with '-n'
+ * appended.
+ */
+struct umc_dev *umc_device_create(struct device *parent, int n)
+{
+	struct umc_dev *umc;
+
+	umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL);
+	if (umc) {
+		dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n);
+		umc->dev.parent  = parent;
+		umc->dev.bus     = &umc_bus_type;
+		umc->dev.release = umc_device_release;
+
+		umc->dev.dma_mask = parent->dma_mask;
+	}
+	return umc;
+}
+EXPORT_SYMBOL_GPL(umc_device_create);
+
+/**
+ * umc_device_register - register a UMC device
+ * @umc: pointer to the UMC device
+ *
+ * The memory resource for the UMC device is acquired and the device
+ * registered with the system.
+ */
+int umc_device_register(struct umc_dev *umc)
+{
+	int err;
+
+	err = request_resource(umc->resource.parent, &umc->resource);
+	if (err < 0) {
+		dev_err(&umc->dev, "can't allocate resource range %pR: %d\n",
+			&umc->resource, err);
+		goto error_request_resource;
+	}
+
+	err = device_register(&umc->dev);
+	if (err < 0)
+		goto error_device_register;
+	return 0;
+
+error_device_register:
+	put_device(&umc->dev);
+	release_resource(&umc->resource);
+error_request_resource:
+	return err;
+}
+EXPORT_SYMBOL_GPL(umc_device_register);
+
+/**
+ * umc_device_unregister - unregister a UMC device
+ * @umc: pointer to the UMC device
+ *
+ * First we unregister the device, make sure the driver can do it's
+ * resource release thing and then we try to release any left over
+ * resources. We take a ref to the device, to make sure it doesn't
+ * disappear under our feet.
+ */
+void umc_device_unregister(struct umc_dev *umc)
+{
+	struct device *dev;
+	if (!umc)
+		return;
+	dev = get_device(&umc->dev);
+	device_unregister(&umc->dev);
+	release_resource(&umc->resource);
+	put_device(dev);
+}
+EXPORT_SYMBOL_GPL(umc_device_unregister);
diff --git a/drivers/staging/uwb/umc-drv.c b/drivers/staging/uwb/umc-drv.c
new file mode 100644
index 000000000000..ed3bd220e8c2
--- /dev/null
+++ b/drivers/staging/uwb/umc-drv.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * UWB Multi-interface Controller driver management.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include "include/umc.h"
+
+int __umc_driver_register(struct umc_driver *umc_drv, struct module *module,
+			  const char *mod_name)
+{
+	umc_drv->driver.name     = umc_drv->name;
+	umc_drv->driver.owner    = module;
+	umc_drv->driver.mod_name = mod_name;
+	umc_drv->driver.bus      = &umc_bus_type;
+
+	return driver_register(&umc_drv->driver);
+}
+EXPORT_SYMBOL_GPL(__umc_driver_register);
+
+/**
+ * umc_driver_register - unregister a UMC capabiltity driver.
+ * @umc_drv:  pointer to the driver.
+ */
+void umc_driver_unregister(struct umc_driver *umc_drv)
+{
+	driver_unregister(&umc_drv->driver);
+}
+EXPORT_SYMBOL_GPL(umc_driver_unregister);
diff --git a/drivers/staging/uwb/uwb-debug.c b/drivers/staging/uwb/uwb-debug.c
new file mode 100644
index 000000000000..dd14df219ef8
--- /dev/null
+++ b/drivers/staging/uwb/uwb-debug.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Debug support
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * FIXME: doc
+ */
+
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/notifier.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+#include "include/debug-cmd.h"
+#include "uwb-internal.h"
+
+/*
+ * Debug interface
+ *
+ * Per radio controller debugfs files (in uwb/uwbN/):
+ *
+ * command: Flexible command interface (see <linux/uwb/debug-cmd.h>).
+ *
+ * reservations: information on reservations.
+ *
+ * accept: Set to true (Y or 1) to accept reservation requests from
+ * peers.
+ *
+ * drp_avail: DRP availability information.
+ */
+
+struct uwb_dbg {
+	struct uwb_pal pal;
+
+	bool accept;
+	struct list_head rsvs;
+
+	struct dentry *root_d;
+	struct dentry *command_f;
+	struct dentry *reservations_f;
+	struct dentry *accept_f;
+	struct dentry *drp_avail_f;
+	spinlock_t list_lock;
+};
+
+static struct dentry *root_dir;
+
+static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
+{
+	struct uwb_dbg *dbg = rsv->pal_priv;
+
+	uwb_rsv_dump("debug", rsv);
+
+	if (rsv->state == UWB_RSV_STATE_NONE) {
+		spin_lock(&dbg->list_lock);
+		list_del(&rsv->pal_node);
+		spin_unlock(&dbg->list_lock);
+		uwb_rsv_destroy(rsv);
+	}
+}
+
+static int cmd_rsv_establish(struct uwb_rc *rc,
+			     struct uwb_dbg_cmd_rsv_establish *cmd)
+{
+	struct uwb_mac_addr macaddr;
+	struct uwb_rsv *rsv;
+	struct uwb_dev *target;
+	int ret;
+
+	memcpy(&macaddr, cmd->target, sizeof(macaddr));
+	target = uwb_dev_get_by_macaddr(rc, &macaddr);
+	if (target == NULL)
+		return -ENODEV;
+
+	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, rc->dbg);
+	if (rsv == NULL) {
+		uwb_dev_put(target);
+		return -ENOMEM;
+	}
+
+	rsv->target.type  = UWB_RSV_TARGET_DEV;
+	rsv->target.dev   = target;
+	rsv->type         = cmd->type;
+	rsv->max_mas      = cmd->max_mas;
+	rsv->min_mas      = cmd->min_mas;
+	rsv->max_interval = cmd->max_interval;
+
+	ret = uwb_rsv_establish(rsv);
+	if (ret)
+		uwb_rsv_destroy(rsv);
+	else {
+		spin_lock(&(rc->dbg)->list_lock);
+		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
+		spin_unlock(&(rc->dbg)->list_lock);
+	}
+	return ret;
+}
+
+static int cmd_rsv_terminate(struct uwb_rc *rc,
+			     struct uwb_dbg_cmd_rsv_terminate *cmd)
+{
+	struct uwb_rsv *rsv, *found = NULL;
+	int i = 0;
+
+	spin_lock(&(rc->dbg)->list_lock);
+
+	list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) {
+		if (i == cmd->index) {
+			found = rsv;
+			uwb_rsv_get(found);
+			break;
+		}
+		i++;
+	}
+
+	spin_unlock(&(rc->dbg)->list_lock);
+
+	if (!found)
+		return -EINVAL;
+
+	uwb_rsv_terminate(found);
+	uwb_rsv_put(found);
+
+	return 0;
+}
+
+static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add)
+{
+	return uwb_rc_ie_add(rc,
+			     (const struct uwb_ie_hdr *) ie_to_add->data,
+			     ie_to_add->len);
+}
+
+static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm)
+{
+	return uwb_rc_ie_rm(rc, ie_to_rm->data[0]);
+}
+
+static ssize_t command_write(struct file *file, const char __user *buf,
+			 size_t len, loff_t *off)
+{
+	struct uwb_rc *rc = file->private_data;
+	struct uwb_dbg_cmd cmd;
+	int ret = 0;
+	
+	if (len != sizeof(struct uwb_dbg_cmd))
+		return -EINVAL;
+
+	if (copy_from_user(&cmd, buf, len) != 0)
+		return -EFAULT;
+
+	switch (cmd.type) {
+	case UWB_DBG_CMD_RSV_ESTABLISH:
+		ret = cmd_rsv_establish(rc, &cmd.rsv_establish);
+		break;
+	case UWB_DBG_CMD_RSV_TERMINATE:
+		ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate);
+		break;
+	case UWB_DBG_CMD_IE_ADD:
+		ret = cmd_ie_add(rc, &cmd.ie_add);
+		break;
+	case UWB_DBG_CMD_IE_RM:
+		ret = cmd_ie_rm(rc, &cmd.ie_rm);
+		break;
+	case UWB_DBG_CMD_RADIO_START:
+		ret = uwb_radio_start(&rc->dbg->pal);
+		break;
+	case UWB_DBG_CMD_RADIO_STOP:
+		uwb_radio_stop(&rc->dbg->pal);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return ret < 0 ? ret : len;
+}
+
+static const struct file_operations command_fops = {
+	.open	= simple_open,
+	.write  = command_write,
+	.read   = NULL,
+	.llseek = no_llseek,
+	.owner  = THIS_MODULE,
+};
+
+static int reservations_show(struct seq_file *s, void *p)
+{
+	struct uwb_rc *rc = s->private;
+	struct uwb_rsv *rsv;
+
+	mutex_lock(&rc->rsvs_mutex);
+
+	list_for_each_entry(rsv, &rc->reservations, rc_node) {
+		struct uwb_dev_addr devaddr;
+		char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
+		bool is_owner;
+
+		uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
+		if (rsv->target.type == UWB_RSV_TARGET_DEV) {
+			devaddr = rsv->target.dev->dev_addr;
+			is_owner = &rc->uwb_dev == rsv->owner;
+		} else {
+			devaddr = rsv->target.devaddr;
+			is_owner = true;
+		}
+		uwb_dev_addr_print(target, sizeof(target), &devaddr);
+
+		seq_printf(s, "%c %s -> %s: %s\n",
+			   is_owner ? 'O' : 'T',
+			   owner, target, uwb_rsv_state_str(rsv->state));
+		seq_printf(s, "  stream: %d  type: %s\n",
+			   rsv->stream, uwb_rsv_type_str(rsv->type));
+		seq_printf(s, "  %*pb\n", UWB_NUM_MAS, rsv->mas.bm);
+	}
+
+	mutex_unlock(&rc->rsvs_mutex);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(reservations);
+
+static int drp_avail_show(struct seq_file *s, void *p)
+{
+	struct uwb_rc *rc = s->private;
+
+	seq_printf(s, "global:  %*pb\n", UWB_NUM_MAS, rc->drp_avail.global);
+	seq_printf(s, "local:   %*pb\n", UWB_NUM_MAS, rc->drp_avail.local);
+	seq_printf(s, "pending: %*pb\n", UWB_NUM_MAS, rc->drp_avail.pending);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(drp_avail);
+
+static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct device *dev = &pal->rc->uwb_dev.dev;
+
+	if (channel > 0)
+		dev_info(dev, "debug: channel %d started\n", channel);
+	else
+		dev_info(dev, "debug: channel stopped\n");
+}
+
+static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
+{
+	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
+
+	if (dbg->accept) {
+		spin_lock(&dbg->list_lock);
+		list_add_tail(&rsv->pal_node, &dbg->rsvs);
+		spin_unlock(&dbg->list_lock);
+		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
+	}
+}
+
+/**
+ * uwb_dbg_add_rc - add a debug interface for a radio controller
+ * @rc: the radio controller
+ */
+void uwb_dbg_add_rc(struct uwb_rc *rc)
+{
+	rc->dbg = kzalloc(sizeof(struct uwb_dbg), GFP_KERNEL);
+	if (rc->dbg == NULL)
+		return;
+
+	INIT_LIST_HEAD(&rc->dbg->rsvs);
+	spin_lock_init(&(rc->dbg)->list_lock);
+
+	uwb_pal_init(&rc->dbg->pal);
+	rc->dbg->pal.rc = rc;
+	rc->dbg->pal.channel_changed = uwb_dbg_channel_changed;
+	rc->dbg->pal.new_rsv = uwb_dbg_new_rsv;
+	uwb_pal_register(&rc->dbg->pal);
+
+	if (root_dir) {
+		rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev),
+						     root_dir);
+		rc->dbg->command_f = debugfs_create_file("command", 0200,
+							 rc->dbg->root_d, rc,
+							 &command_fops);
+		rc->dbg->reservations_f = debugfs_create_file("reservations", 0444,
+							      rc->dbg->root_d, rc,
+							      &reservations_fops);
+		rc->dbg->accept_f = debugfs_create_bool("accept", 0644,
+							rc->dbg->root_d,
+							&rc->dbg->accept);
+		rc->dbg->drp_avail_f = debugfs_create_file("drp_avail", 0444,
+							   rc->dbg->root_d, rc,
+							   &drp_avail_fops);
+	}
+}
+
+/**
+ * uwb_dbg_del_rc - remove a radio controller's debug interface
+ * @rc: the radio controller
+ */
+void uwb_dbg_del_rc(struct uwb_rc *rc)
+{
+	struct uwb_rsv *rsv, *t;
+
+	if (rc->dbg == NULL)
+		return;
+
+	list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) {
+		uwb_rsv_terminate(rsv);
+	}
+
+	uwb_pal_unregister(&rc->dbg->pal);
+
+	if (root_dir) {
+		debugfs_remove(rc->dbg->drp_avail_f);
+		debugfs_remove(rc->dbg->accept_f);
+		debugfs_remove(rc->dbg->reservations_f);
+		debugfs_remove(rc->dbg->command_f);
+		debugfs_remove(rc->dbg->root_d);
+	}
+}
+
+/**
+ * uwb_dbg_exit - initialize the debug interface sub-module
+ */
+void uwb_dbg_init(void)
+{
+	root_dir = debugfs_create_dir("uwb", NULL);
+}
+
+/**
+ * uwb_dbg_exit - clean-up the debug interface sub-module
+ */
+void uwb_dbg_exit(void)
+{
+	debugfs_remove(root_dir);
+}
+
+/**
+ * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL
+ * @pal: The PAL.
+ */
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal)
+{
+	struct uwb_rc *rc = pal->rc;
+
+	if (root_dir && rc->dbg && rc->dbg->root_d && pal->name)
+		return debugfs_create_dir(pal->name, rc->dbg->root_d);
+	return NULL;
+}
diff --git a/drivers/staging/uwb/uwb-internal.h b/drivers/staging/uwb/uwb-internal.h
new file mode 100644
index 000000000000..4c2fdac7f610
--- /dev/null
+++ b/drivers/staging/uwb/uwb-internal.h
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Ultra Wide Band
+ * UWB internal API
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This contains most of the internal API for UWB. This is stuff used
+ * across the stack that of course, is of no interest to the rest.
+ *
+ * Some parts might end up going public (like uwb_rc_*())...
+ */
+
+#ifndef __UWB_INTERNAL_H__
+#define __UWB_INTERNAL_H__
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include "uwb.h"
+
+struct uwb_beca_e;
+
+/* General device API */
+extern void uwb_dev_init(struct uwb_dev *uwb_dev);
+extern int __uwb_dev_offair(struct uwb_dev *, struct uwb_rc *);
+extern int uwb_dev_add(struct uwb_dev *uwb_dev, struct device *parent_dev,
+		       struct uwb_rc *parent_rc);
+extern void uwb_dev_rm(struct uwb_dev *uwb_dev);
+extern void uwbd_dev_onair(struct uwb_rc *, struct uwb_beca_e *);
+extern void uwbd_dev_offair(struct uwb_beca_e *);
+void uwb_notify(struct uwb_rc *rc, struct uwb_dev *uwb_dev, enum uwb_notifs event);
+
+/* General UWB Radio Controller Internal API */
+extern struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *);
+static inline struct uwb_rc *__uwb_rc_get(struct uwb_rc *rc)
+{
+	uwb_dev_get(&rc->uwb_dev);
+	return rc;
+}
+
+static inline void __uwb_rc_put(struct uwb_rc *rc)
+{
+	if (rc)
+		uwb_dev_put(&rc->uwb_dev);
+}
+
+extern int uwb_rc_reset(struct uwb_rc *rc);
+extern int uwb_rc_beacon(struct uwb_rc *rc,
+			 int channel, unsigned bpst_offset);
+extern int uwb_rc_scan(struct uwb_rc *rc,
+		       unsigned channel, enum uwb_scan_type type,
+		       unsigned bpst_offset);
+extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc);
+
+void uwb_rc_ie_init(struct uwb_rc *);
+int uwb_rc_ie_setup(struct uwb_rc *);
+void uwb_rc_ie_release(struct uwb_rc *);
+int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
+		    char *buf, size_t size);
+int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
+
+
+extern const char *uwb_rc_strerror(unsigned code);
+
+/*
+ * Time to wait for a response to an RC command.
+ *
+ * Some commands can take a long time to response. e.g., START_BEACON
+ * may scan for several superframes before joining an existing beacon
+ * group and this can take around 600 ms.
+ */
+#define UWB_RC_CMD_TIMEOUT_MS 1000 /* ms */
+
+/*
+ * Notification/Event Handlers
+ */
+
+struct uwb_rc_neh;
+
+extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+			    struct uwb_rccb *cmd, size_t cmd_size,
+			    u8 expected_type, u16 expected_event,
+			    uwb_rc_cmd_cb_f cb, void *arg);
+
+
+void uwb_rc_neh_create(struct uwb_rc *rc);
+void uwb_rc_neh_destroy(struct uwb_rc *rc);
+
+struct uwb_rc_neh *uwb_rc_neh_add(struct uwb_rc *rc, struct uwb_rccb *cmd,
+				  u8 expected_type, u16 expected_event,
+				  uwb_rc_cmd_cb_f cb, void *arg);
+void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh);
+void uwb_rc_neh_arm(struct uwb_rc *rc, struct uwb_rc_neh *neh);
+void uwb_rc_neh_put(struct uwb_rc_neh *neh);
+
+/* Event size tables */
+extern int uwb_est_create(void);
+extern void uwb_est_destroy(void);
+
+/*
+ * UWB conflicting alien reservations
+ */
+struct uwb_cnflt_alien {
+	struct uwb_rc *rc;
+	struct list_head rc_node;
+	struct uwb_mas_bm mas;
+	struct timer_list timer;
+	struct work_struct cnflt_update_work;
+};
+
+enum uwb_uwb_rsv_alloc_result {
+	UWB_RSV_ALLOC_FOUND = 0,
+	UWB_RSV_ALLOC_NOT_FOUND,
+};
+
+enum uwb_rsv_mas_status {
+	UWB_RSV_MAS_NOT_AVAIL = 1,
+	UWB_RSV_MAS_SAFE,
+	UWB_RSV_MAS_UNSAFE,
+};
+
+struct uwb_rsv_col_set_info {
+	unsigned char start_col;
+	unsigned char interval;
+	unsigned char safe_mas_per_col;
+	unsigned char unsafe_mas_per_col;
+};
+
+struct uwb_rsv_col_info {
+	unsigned char max_avail_safe;
+	unsigned char max_avail_unsafe;
+	unsigned char highest_mas[UWB_MAS_PER_ZONE];
+	struct uwb_rsv_col_set_info csi;
+};
+
+struct uwb_rsv_row_info {
+	unsigned char avail[UWB_MAS_PER_ZONE];
+	unsigned char free_rows;
+	unsigned char used_rows;
+};
+
+/*
+ * UWB find allocation
+ */
+struct uwb_rsv_alloc_info {
+	unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES];
+	struct uwb_rsv_col_info ci[UWB_NUM_ZONES];
+	struct uwb_rsv_row_info ri;
+	struct uwb_mas_bm *not_available;
+	struct uwb_mas_bm *result;
+	int min_mas;
+	int max_mas;
+	int max_interval;
+	int total_allocated_mases;
+	int safe_allocated_mases;
+	int unsafe_allocated_mases;
+	int interval;
+};
+
+int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv,
+				 struct uwb_mas_bm *available,
+				 struct uwb_mas_bm *result);
+void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc);
+/*
+ * UWB Events & management daemon
+ */
+
+/**
+ * enum uwb_event_type - types of UWB management daemon events
+ *
+ * The UWB management daemon (uwbd) can receive two types of events:
+ *   UWB_EVT_TYPE_NOTIF - notification from the radio controller.
+ *   UWB_EVT_TYPE_MSG   - a simple message.
+ */
+enum uwb_event_type {
+	UWB_EVT_TYPE_NOTIF,
+	UWB_EVT_TYPE_MSG,
+};
+
+/**
+ * struct uwb_event_notif - an event for a radio controller notification
+ * @size: Size of the buffer (ie: Guaranteed to contain at least
+ *        a full 'struct uwb_rceb')
+ * @rceb: Pointer to a kmalloced() event payload
+ */
+struct uwb_event_notif {
+	size_t size;
+	struct uwb_rceb *rceb;
+};
+
+/**
+ * enum uwb_event_message - an event for a message for asynchronous processing
+ *
+ * UWB_EVT_MSG_RESET - reset the radio controller and all PAL hardware.
+ */
+enum uwb_event_message {
+	UWB_EVT_MSG_RESET,
+};
+
+/**
+ * UWB Event
+ * @rc:         Radio controller that emitted the event (referenced)
+ * @ts_jiffies: Timestamp, when was it received
+ * @type:       This event's type.
+ */
+struct uwb_event {
+	struct list_head list_node;
+	struct uwb_rc *rc;
+	unsigned long ts_jiffies;
+	enum uwb_event_type type;
+	union {
+		struct uwb_event_notif notif;
+		enum uwb_event_message message;
+	};
+};
+
+extern void uwbd_start(struct uwb_rc *rc);
+extern void uwbd_stop(struct uwb_rc *rc);
+extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask);
+extern void uwbd_event_queue(struct uwb_event *);
+void uwbd_flush(struct uwb_rc *rc);
+
+/* UWB event handlers */
+extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *);
+extern int uwbd_evt_handle_rc_beacon(struct uwb_event *);
+extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *);
+extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *);
+extern int uwbd_evt_handle_rc_bp_slot_change(struct uwb_event *);
+extern int uwbd_evt_handle_rc_drp(struct uwb_event *);
+extern int uwbd_evt_handle_rc_drp_avail(struct uwb_event *);
+
+int uwbd_msg_handle_reset(struct uwb_event *evt);
+
+
+/*
+ * Address management
+ */
+int uwb_rc_dev_addr_assign(struct uwb_rc *rc);
+int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt);
+
+/*
+ * UWB Beacon Cache
+ *
+ * Each beacon we received is kept in a cache--when we receive that
+ * beacon consistently, that means there is a new device that we have
+ * to add to the system.
+ */
+
+extern unsigned long beacon_timeout_ms;
+
+/**
+ * Beacon cache entry
+ *
+ * @jiffies_refresh: last time a beacon was  received that refreshed
+ *                   this cache entry.
+ * @uwb_dev: device connected to this beacon. This pointer is not
+ *           safe, you need to get it with uwb_dev_try_get()
+ *
+ * @hits: how many time we have seen this beacon since last time we
+ *        cleared it
+ */
+struct uwb_beca_e {
+	struct mutex mutex;
+	struct kref refcnt;
+	struct list_head node;
+	struct uwb_mac_addr *mac_addr;
+	struct uwb_dev_addr dev_addr;
+	u8 hits;
+	unsigned long ts_jiffies;
+	struct uwb_dev *uwb_dev;
+	struct uwb_rc_evt_beacon *be;
+	struct stats lqe_stats, rssi_stats;	/* radio statistics */
+};
+struct uwb_beacon_frame;
+extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *,
+				 char *, size_t);
+
+extern void uwb_bce_kfree(struct kref *_bce);
+static inline void uwb_bce_get(struct uwb_beca_e *bce)
+{
+	kref_get(&bce->refcnt);
+}
+static inline void uwb_bce_put(struct uwb_beca_e *bce)
+{
+	kref_put(&bce->refcnt, uwb_bce_kfree);
+}
+extern void uwb_beca_purge(struct uwb_rc *rc);
+extern void uwb_beca_release(struct uwb_rc *rc);
+
+struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
+				       const struct uwb_dev_addr *devaddr);
+struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
+				       const struct uwb_mac_addr *macaddr);
+
+int uwb_radio_setup(struct uwb_rc *rc);
+void uwb_radio_reset_state(struct uwb_rc *rc);
+void uwb_radio_shutdown(struct uwb_rc *rc);
+int uwb_radio_force_channel(struct uwb_rc *rc, int channel);
+
+/* -- UWB Sysfs representation */
+extern struct class uwb_rc_class;
+extern struct bus_type uwb_bus_type;
+extern struct device_attribute dev_attr_mac_address;
+extern struct device_attribute dev_attr_beacon;
+extern struct device_attribute dev_attr_scan;
+
+/* -- DRP Bandwidth allocator: bandwidth allocations, reservations, DRP */
+void uwb_rsv_init(struct uwb_rc *rc);
+int uwb_rsv_setup(struct uwb_rc *rc);
+void uwb_rsv_cleanup(struct uwb_rc *rc);
+void uwb_rsv_remove_all(struct uwb_rc *rc);
+void uwb_rsv_get(struct uwb_rsv *rsv);
+void uwb_rsv_put(struct uwb_rsv *rsv);
+bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv);
+void uwb_rsv_dump(char *text, struct uwb_rsv *rsv);
+int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available);
+void uwb_rsv_backoff_win_timer(struct timer_list *t);
+void uwb_rsv_backoff_win_increment(struct uwb_rc *rc);
+int uwb_rsv_status(struct uwb_rsv *rsv);
+int uwb_rsv_companion_status(struct uwb_rsv *rsv);
+
+void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
+void uwb_rsv_remove(struct uwb_rsv *rsv);
+struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
+			     struct uwb_ie_drp *drp_ie);
+void uwb_rsv_sched_update(struct uwb_rc *rc);
+void uwb_rsv_queue_update(struct uwb_rc *rc);
+
+int uwb_drp_ie_update(struct uwb_rsv *rsv);
+void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie);
+
+void uwb_drp_avail_init(struct uwb_rc *rc);
+void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail);
+int  uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas);
+void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas);
+void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas);
+void uwb_drp_avail_ie_update(struct uwb_rc *rc);
+
+/* -- PAL support */
+void uwb_rc_pal_init(struct uwb_rc *rc);
+
+/* -- Misc */
+
+extern ssize_t uwb_mac_frame_hdr_print(char *, size_t,
+				       const struct uwb_mac_frame_hdr *);
+
+/* -- Debug interface */
+void uwb_dbg_init(void);
+void uwb_dbg_exit(void);
+void uwb_dbg_add_rc(struct uwb_rc *rc);
+void uwb_dbg_del_rc(struct uwb_rc *rc);
+struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
+
+static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
+{
+	device_lock(&uwb_dev->dev);
+}
+
+static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev)
+{
+	device_unlock(&uwb_dev->dev);
+}
+
+#endif /* #ifndef __UWB_INTERNAL_H__ */
diff --git a/drivers/staging/uwb/uwb.h b/drivers/staging/uwb/uwb.h
new file mode 100644
index 000000000000..6a59706ba3a0
--- /dev/null
+++ b/drivers/staging/uwb/uwb.h
@@ -0,0 +1,817 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Ultra Wide Band
+ * UWB API
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: doc: overview of the API, different parts and pointers
+ */
+
+#ifndef __LINUX__UWB_H__
+#define __LINUX__UWB_H__
+
+#include <linux/limits.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include <asm/page.h>
+#include "include/spec.h"
+
+struct uwb_dev;
+struct uwb_beca_e;
+struct uwb_rc;
+struct uwb_rsv;
+struct uwb_dbg;
+
+/**
+ * struct uwb_dev - a UWB Device
+ * @rc: UWB Radio Controller that discovered the device (kind of its
+ *     parent).
+ * @bce: a beacon cache entry for this device; or NULL if the device
+ *     is a local radio controller.
+ * @mac_addr: the EUI-48 address of this device.
+ * @dev_addr: the current DevAddr used by this device.
+ * @beacon_slot: the slot number the beacon is using.
+ * @streams: bitmap of streams allocated to reservations targeted at
+ *     this device.  For an RC, this is the streams allocated for
+ *     reservations targeted at DevAddrs.
+ *
+ * A UWB device may either by a neighbor or part of a local radio
+ * controller.
+ */
+struct uwb_dev {
+	struct mutex mutex;
+	struct list_head list_node;
+	struct device dev;
+	struct uwb_rc *rc;		/* radio controller */
+	struct uwb_beca_e *bce;		/* Beacon Cache Entry */
+
+	struct uwb_mac_addr mac_addr;
+	struct uwb_dev_addr dev_addr;
+	int beacon_slot;
+	DECLARE_BITMAP(streams, UWB_NUM_STREAMS);
+	DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS);
+};
+#define to_uwb_dev(d) container_of(d, struct uwb_dev, dev)
+
+/**
+ * UWB HWA/WHCI Radio Control {Command|Event} Block context IDs
+ *
+ * RC[CE]Bs have a 'context ID' field that matches the command with
+ * the event received to confirm it.
+ *
+ * Maximum number of context IDs
+ */
+enum { UWB_RC_CTX_MAX = 256 };
+
+
+/** Notification chain head for UWB generated events to listeners */
+struct uwb_notifs_chain {
+	struct list_head list;
+	struct mutex mutex;
+};
+
+/* Beacon cache list */
+struct uwb_beca {
+	struct list_head list;
+	size_t entries;
+	struct mutex mutex;
+};
+
+/* Event handling thread. */
+struct uwbd {
+	int pid;
+	struct task_struct *task;
+	wait_queue_head_t wq;
+	struct list_head event_list;
+	spinlock_t event_list_lock;
+};
+
+/**
+ * struct uwb_mas_bm - a bitmap of all MAS in a superframe
+ * @bm: a bitmap of length #UWB_NUM_MAS
+ */
+struct uwb_mas_bm {
+	DECLARE_BITMAP(bm, UWB_NUM_MAS);
+	DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS);
+	int safe;
+	int unsafe;
+};
+
+/**
+ * uwb_rsv_state - UWB Reservation state.
+ *
+ * NONE - reservation is not active (no DRP IE being transmitted).
+ *
+ * Owner reservation states:
+ *
+ * INITIATED - owner has sent an initial DRP request.
+ * PENDING - target responded with pending Reason Code.
+ * MODIFIED - reservation manager is modifying an established
+ * reservation with a different MAS allocation.
+ * ESTABLISHED - the reservation has been successfully negotiated.
+ *
+ * Target reservation states:
+ *
+ * DENIED - request is denied.
+ * ACCEPTED - request is accepted.
+ * PENDING - PAL has yet to make a decision to whether to accept or
+ * deny.
+ *
+ * FIXME: further target states TBD.
+ */
+enum uwb_rsv_state {
+	UWB_RSV_STATE_NONE = 0,
+	UWB_RSV_STATE_O_INITIATED,
+	UWB_RSV_STATE_O_PENDING,
+	UWB_RSV_STATE_O_MODIFIED,
+	UWB_RSV_STATE_O_ESTABLISHED,
+	UWB_RSV_STATE_O_TO_BE_MOVED,
+	UWB_RSV_STATE_O_MOVE_EXPANDING,
+	UWB_RSV_STATE_O_MOVE_COMBINING,
+	UWB_RSV_STATE_O_MOVE_REDUCING,
+	UWB_RSV_STATE_T_ACCEPTED,
+	UWB_RSV_STATE_T_DENIED,
+	UWB_RSV_STATE_T_CONFLICT,
+	UWB_RSV_STATE_T_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_ACCEPTED,
+	UWB_RSV_STATE_T_EXPANDING_CONFLICT,
+	UWB_RSV_STATE_T_EXPANDING_PENDING,
+	UWB_RSV_STATE_T_EXPANDING_DENIED,
+	UWB_RSV_STATE_T_RESIZED,
+
+	UWB_RSV_STATE_LAST,
+};
+
+enum uwb_rsv_target_type {
+	UWB_RSV_TARGET_DEV,
+	UWB_RSV_TARGET_DEVADDR,
+};
+
+/**
+ * struct uwb_rsv_target - the target of a reservation.
+ *
+ * Reservations unicast and targeted at a single device
+ * (UWB_RSV_TARGET_DEV); or (e.g., in the case of WUSB) targeted at a
+ * specific (private) DevAddr (UWB_RSV_TARGET_DEVADDR).
+ */
+struct uwb_rsv_target {
+	enum uwb_rsv_target_type type;
+	union {
+		struct uwb_dev *dev;
+		struct uwb_dev_addr devaddr;
+	};
+};
+
+struct uwb_rsv_move {
+	struct uwb_mas_bm final_mas;
+	struct uwb_ie_drp *companion_drp_ie;
+	struct uwb_mas_bm companion_mas;
+};
+
+/*
+ * Number of streams reserved for reservations targeted at DevAddrs.
+ */
+#define UWB_NUM_GLOBAL_STREAMS 1
+
+typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv);
+
+/**
+ * struct uwb_rsv - a DRP reservation
+ *
+ * Data structure management:
+ *
+ * @rc:             the radio controller this reservation is for
+ *                  (as target or owner)
+ * @rc_node:        a list node for the RC
+ * @pal_node:       a list node for the PAL
+ *
+ * Owner and target parameters:
+ *
+ * @owner:          the UWB device owning this reservation
+ * @target:         the target UWB device
+ * @type:           reservation type
+ *
+ * Owner parameters:
+ *
+ * @max_mas:        maxiumum number of MAS
+ * @min_mas:        minimum number of MAS
+ * @sparsity:       owner selected sparsity
+ * @is_multicast:   true iff multicast
+ *
+ * @callback:       callback function when the reservation completes
+ * @pal_priv:       private data for the PAL making the reservation
+ *
+ * Reservation status:
+ *
+ * @status:         negotiation status
+ * @stream:         stream index allocated for this reservation
+ * @tiebreaker:     conflict tiebreaker for this reservation
+ * @mas:            reserved MAS
+ * @drp_ie:         the DRP IE
+ * @ie_valid:       true iff the DRP IE matches the reservation parameters
+ *
+ * DRP reservations are uniquely identified by the owner, target and
+ * stream index.  However, when using a DevAddr as a target (e.g., for
+ * a WUSB cluster reservation) the responses may be received from
+ * devices with different DevAddrs.  In this case, reservations are
+ * uniquely identified by just the stream index.  A number of stream
+ * indexes (UWB_NUM_GLOBAL_STREAMS) are reserved for this.
+ */
+struct uwb_rsv {
+	struct uwb_rc *rc;
+	struct list_head rc_node;
+	struct list_head pal_node;
+	struct kref kref;
+
+	struct uwb_dev *owner;
+	struct uwb_rsv_target target;
+	enum uwb_drp_type type;
+	int max_mas;
+	int min_mas;
+	int max_interval;
+	bool is_multicast;
+
+	uwb_rsv_cb_f callback;
+	void *pal_priv;
+
+	enum uwb_rsv_state state;
+	bool needs_release_companion_mas;
+	u8 stream;
+	u8 tiebreaker;
+	struct uwb_mas_bm mas;
+	struct uwb_ie_drp *drp_ie;
+	struct uwb_rsv_move mv;
+	bool ie_valid;
+	struct timer_list timer;
+	struct work_struct handle_timeout_work;
+};
+
+static const
+struct uwb_mas_bm uwb_mas_bm_zero = { .bm = { 0 } };
+
+static inline void uwb_mas_bm_copy_le(void *dst, const struct uwb_mas_bm *mas)
+{
+	bitmap_copy_le(dst, mas->bm, UWB_NUM_MAS);
+}
+
+/**
+ * struct uwb_drp_avail - a radio controller's view of MAS usage
+ * @global:   MAS unused by neighbors (excluding reservations targeted
+ *            or owned by the local radio controller) or the beaon period
+ * @local:    MAS unused by local established reservations
+ * @pending:  MAS unused by local pending reservations
+ * @ie:       DRP Availability IE to be included in the beacon
+ * @ie_valid: true iff @ie is valid and does not need to regenerated from
+ *            @global and @local
+ *
+ * Each radio controller maintains a view of MAS usage or
+ * availability. MAS available for a new reservation are determined
+ * from the intersection of @global, @local, and @pending.
+ *
+ * The radio controller must transmit a DRP Availability IE that's the
+ * intersection of @global and @local.
+ *
+ * A set bit indicates the MAS is unused and available.
+ *
+ * rc->rsvs_mutex should be held before accessing this data structure.
+ *
+ * [ECMA-368] section 17.4.3.
+ */
+struct uwb_drp_avail {
+	DECLARE_BITMAP(global, UWB_NUM_MAS);
+	DECLARE_BITMAP(local, UWB_NUM_MAS);
+	DECLARE_BITMAP(pending, UWB_NUM_MAS);
+	struct uwb_ie_drp_avail ie;
+	bool ie_valid;
+};
+
+struct uwb_drp_backoff_win {
+	u8 window;
+	u8 n;
+	int total_expired;
+	struct timer_list timer;
+	bool can_reserve_extra_mases;
+};
+
+const char *uwb_rsv_state_str(enum uwb_rsv_state state);
+const char *uwb_rsv_type_str(enum uwb_drp_type type);
+
+struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb,
+			       void *pal_priv);
+void uwb_rsv_destroy(struct uwb_rsv *rsv);
+
+int uwb_rsv_establish(struct uwb_rsv *rsv);
+int uwb_rsv_modify(struct uwb_rsv *rsv,
+		   int max_mas, int min_mas, int sparsity);
+void uwb_rsv_terminate(struct uwb_rsv *rsv);
+
+void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv);
+
+void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas);
+
+/**
+ * Radio Control Interface instance
+ *
+ *
+ * Life cycle rules: those of the UWB Device.
+ *
+ * @index:    an index number for this radio controller, as used in the
+ *            device name.
+ * @version:  version of protocol supported by this device
+ * @priv:     Backend implementation; rw with uwb_dev.dev.sem taken.
+ * @cmd:      Backend implementation to execute commands; rw and call
+ *            only  with uwb_dev.dev.sem taken.
+ * @reset:    Hardware reset of radio controller and any PAL controllers.
+ * @filter:   Backend implementation to manipulate data to and from device
+ *            to be compliant to specification assumed by driver (WHCI
+ *            0.95).
+ *
+ *            uwb_dev.dev.mutex is used to execute commands and update
+ *            the corresponding structures; can't use a spinlock
+ *            because rc->cmd() can sleep.
+ * @ies:         This is a dynamically allocated array cacheing the
+ *               IEs (settable by the host) that the beacon of this
+ *               radio controller is currently sending.
+ *
+ *               In reality, we store here the full command we set to
+ *               the radio controller (which is basically a command
+ *               prefix followed by all the IEs the beacon currently
+ *               contains). This way we don't have to realloc and
+ *               memcpy when setting it.
+ *
+ *               We set this up in uwb_rc_ie_setup(), where we alloc
+ *               this struct, call get_ie() [so we know which IEs are
+ *               currently being sent, if any].
+ *
+ * @ies_capacity:Amount of space (in bytes) allocated in @ies. The
+ *               amount used is given by sizeof(*ies) plus ies->wIELength
+ *               (which is a little endian quantity all the time).
+ * @ies_mutex:   protect the IE cache
+ * @dbg:         information for the debug interface
+ */
+struct uwb_rc {
+	struct uwb_dev uwb_dev;
+	int index;
+	u16 version;
+
+	struct module *owner;
+	void *priv;
+	int (*start)(struct uwb_rc *rc);
+	void (*stop)(struct uwb_rc *rc);
+	int (*cmd)(struct uwb_rc *, const struct uwb_rccb *, size_t);
+	int (*reset)(struct uwb_rc *rc);
+	int (*filter_cmd)(struct uwb_rc *, struct uwb_rccb **, size_t *);
+	int (*filter_event)(struct uwb_rc *, struct uwb_rceb **, const size_t,
+			    size_t *, size_t *);
+
+	spinlock_t neh_lock;		/* protects neh_* and ctx_* */
+	struct list_head neh_list;	/* Open NE handles */
+	unsigned long ctx_bm[UWB_RC_CTX_MAX / 8 / sizeof(unsigned long)];
+	u8 ctx_roll;
+
+	int beaconing;			/* Beaconing state [channel number] */
+	int beaconing_forced;
+	int scanning;
+	enum uwb_scan_type scan_type:3;
+	unsigned ready:1;
+	struct uwb_notifs_chain notifs_chain;
+	struct uwb_beca uwb_beca;
+
+	struct uwbd uwbd;
+
+	struct uwb_drp_backoff_win bow;
+	struct uwb_drp_avail drp_avail;
+	struct list_head reservations;
+	struct list_head cnflt_alien_list;
+	struct uwb_mas_bm cnflt_alien_bitmap;
+	struct mutex rsvs_mutex;
+	spinlock_t rsvs_lock;
+	struct workqueue_struct *rsv_workq;
+
+	struct delayed_work rsv_update_work;
+	struct delayed_work rsv_alien_bp_work;
+	int set_drp_ie_pending;
+	struct mutex ies_mutex;
+	struct uwb_rc_cmd_set_ie *ies;
+	size_t ies_capacity;
+
+	struct list_head pals;
+	int active_pals;
+
+	struct uwb_dbg *dbg;
+};
+
+
+/**
+ * struct uwb_pal - a UWB PAL
+ * @name:    descriptive name for this PAL (wusbhc, wlp, etc.).
+ * @device:  a device for the PAL.  Used to link the PAL and the radio
+ *           controller in sysfs.
+ * @rc:      the radio controller the PAL uses.
+ * @channel_changed: called when the channel used by the radio changes.
+ *           A channel of -1 means the channel has been stopped.
+ * @new_rsv: called when a peer requests a reservation (may be NULL if
+ *           the PAL cannot accept reservation requests).
+ * @channel: channel being used by the PAL; 0 if the PAL isn't using
+ *           the radio; -1 if the PAL wishes to use the radio but
+ *           cannot.
+ * @debugfs_dir: a debugfs directory which the PAL can use for its own
+ *           debugfs files.
+ *
+ * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
+ * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
+ *
+ * The PALs using a radio controller must register themselves to
+ * permit the UWB stack to coordinate usage of the radio between the
+ * various PALs or to allow PALs to response to certain requests from
+ * peers.
+ *
+ * A struct uwb_pal should be embedded in a containing structure
+ * belonging to the PAL and initialized with uwb_pal_init()).  Fields
+ * should be set appropriately by the PAL before registering the PAL
+ * with uwb_pal_register().
+ */
+struct uwb_pal {
+	struct list_head node;
+	const char *name;
+	struct device *device;
+	struct uwb_rc *rc;
+
+	void (*channel_changed)(struct uwb_pal *pal, int channel);
+	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
+
+	int channel;
+	struct dentry *debugfs_dir;
+};
+
+void uwb_pal_init(struct uwb_pal *pal);
+int uwb_pal_register(struct uwb_pal *pal);
+void uwb_pal_unregister(struct uwb_pal *pal);
+
+int uwb_radio_start(struct uwb_pal *pal);
+void uwb_radio_stop(struct uwb_pal *pal);
+
+/*
+ * General public API
+ *
+ * This API can be used by UWB device drivers or by those implementing
+ * UWB Radio Controllers
+ */
+struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
+				       const struct uwb_dev_addr *devaddr);
+struct uwb_dev *uwb_dev_get_by_rc(struct uwb_dev *, struct uwb_rc *);
+static inline void uwb_dev_get(struct uwb_dev *uwb_dev)
+{
+	get_device(&uwb_dev->dev);
+}
+static inline void uwb_dev_put(struct uwb_dev *uwb_dev)
+{
+	put_device(&uwb_dev->dev);
+}
+struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev);
+
+/**
+ * Callback function for 'uwb_{dev,rc}_foreach()'.
+ *
+ * @dev:  Linux device instance
+ *        'uwb_dev = container_of(dev, struct uwb_dev, dev)'
+ * @priv: Data passed by the caller to 'uwb_{dev,rc}_foreach()'.
+ *
+ * @returns: 0 to continue the iterations, any other val to stop
+ *           iterating and return the value to the caller of
+ *           _foreach().
+ */
+typedef int (*uwb_dev_for_each_f)(struct device *dev, void *priv);
+int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f func, void *priv);
+
+struct uwb_rc *uwb_rc_alloc(void);
+struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *);
+struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *);
+void uwb_rc_put(struct uwb_rc *rc);
+
+typedef void (*uwb_rc_cmd_cb_f)(struct uwb_rc *rc, void *arg,
+                                struct uwb_rceb *reply, ssize_t reply_size);
+
+int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
+		     struct uwb_rccb *cmd, size_t cmd_size,
+		     u8 expected_type, u16 expected_event,
+		     uwb_rc_cmd_cb_f cb, void *arg);
+ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
+		   struct uwb_rccb *cmd, size_t cmd_size,
+		   struct uwb_rceb *reply, size_t reply_size);
+ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
+		    struct uwb_rccb *cmd, size_t cmd_size,
+		    u8 expected_type, u16 expected_event,
+		    struct uwb_rceb **preply);
+
+size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
+
+int uwb_rc_dev_addr_set(struct uwb_rc *, const struct uwb_dev_addr *);
+int uwb_rc_dev_addr_get(struct uwb_rc *, struct uwb_dev_addr *);
+int uwb_rc_mac_addr_set(struct uwb_rc *, const struct uwb_mac_addr *);
+int uwb_rc_mac_addr_get(struct uwb_rc *, struct uwb_mac_addr *);
+int __uwb_mac_addr_assigned_check(struct device *, void *);
+int __uwb_dev_addr_assigned_check(struct device *, void *);
+
+/* Print in @buf a pretty repr of @addr */
+static inline size_t uwb_dev_addr_print(char *buf, size_t buf_size,
+					const struct uwb_dev_addr *addr)
+{
+	return __uwb_addr_print(buf, buf_size, addr->data, 0);
+}
+
+/* Print in @buf a pretty repr of @addr */
+static inline size_t uwb_mac_addr_print(char *buf, size_t buf_size,
+					const struct uwb_mac_addr *addr)
+{
+	return __uwb_addr_print(buf, buf_size, addr->data, 1);
+}
+
+/* @returns 0 if device addresses @addr2 and @addr1 are equal */
+static inline int uwb_dev_addr_cmp(const struct uwb_dev_addr *addr1,
+				   const struct uwb_dev_addr *addr2)
+{
+	return memcmp(addr1, addr2, sizeof(*addr1));
+}
+
+/* @returns 0 if MAC addresses @addr2 and @addr1 are equal */
+static inline int uwb_mac_addr_cmp(const struct uwb_mac_addr *addr1,
+				   const struct uwb_mac_addr *addr2)
+{
+	return memcmp(addr1, addr2, sizeof(*addr1));
+}
+
+/* @returns !0 if a MAC @addr is a broadcast address */
+static inline int uwb_mac_addr_bcast(const struct uwb_mac_addr *addr)
+{
+	struct uwb_mac_addr bcast = {
+		.data = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
+	};
+	return !uwb_mac_addr_cmp(addr, &bcast);
+}
+
+/* @returns !0 if a MAC @addr is all zeroes*/
+static inline int uwb_mac_addr_unset(const struct uwb_mac_addr *addr)
+{
+	struct uwb_mac_addr unset = {
+		.data = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
+	};
+	return !uwb_mac_addr_cmp(addr, &unset);
+}
+
+/* @returns !0 if the address is in use. */
+static inline unsigned __uwb_dev_addr_assigned(struct uwb_rc *rc,
+					       struct uwb_dev_addr *addr)
+{
+	return uwb_dev_for_each(rc, __uwb_dev_addr_assigned_check, addr);
+}
+
+/*
+ * UWB Radio Controller API
+ *
+ * This API is used (in addition to the general API) to implement UWB
+ * Radio Controllers.
+ */
+void uwb_rc_init(struct uwb_rc *);
+int uwb_rc_add(struct uwb_rc *, struct device *dev, void *rc_priv);
+void uwb_rc_rm(struct uwb_rc *);
+void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
+void uwb_rc_neh_error(struct uwb_rc *, int);
+void uwb_rc_reset_all(struct uwb_rc *rc);
+void uwb_rc_pre_reset(struct uwb_rc *rc);
+int uwb_rc_post_reset(struct uwb_rc *rc);
+
+/**
+ * uwb_rsv_is_owner - is the owner of this reservation the RC?
+ * @rsv: the reservation
+ */
+static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
+{
+	return rsv->owner == &rsv->rc->uwb_dev;
+}
+
+/**
+ * enum uwb_notifs - UWB events that can be passed to any listeners
+ * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group.
+ * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group.
+ *
+ * Higher layers can register callback functions with the radio
+ * controller using uwb_notifs_register(). The radio controller
+ * maintains a list of all registered handlers and will notify all
+ * nodes when an event occurs.
+ */
+enum uwb_notifs {
+	UWB_NOTIF_ONAIR,
+	UWB_NOTIF_OFFAIR,
+};
+
+/* Callback function registered with UWB */
+struct uwb_notifs_handler {
+	struct list_head list_node;
+	void (*cb)(void *, struct uwb_dev *, enum uwb_notifs);
+	void *data;
+};
+
+int uwb_notifs_register(struct uwb_rc *, struct uwb_notifs_handler *);
+int uwb_notifs_deregister(struct uwb_rc *, struct uwb_notifs_handler *);
+
+
+/**
+ * UWB radio controller Event Size Entry (for creating entry tables)
+ *
+ * WUSB and WHCI define events and notifications, and they might have
+ * fixed or variable size.
+ *
+ * Each event/notification has a size which is not necessarily known
+ * in advance based on the event code. As well, vendor specific
+ * events/notifications will have a size impossible to determine
+ * unless we know about the device's specific details.
+ *
+ * It was way too smart of the spec writers not to think that it would
+ * be impossible for a generic driver to skip over vendor specific
+ * events/notifications if there are no LENGTH fields in the HEADER of
+ * each message...the transaction size cannot be counted on as the
+ * spec does not forbid to pack more than one event in a single
+ * transaction.
+ *
+ * Thus, we guess sizes with tables (or for events, when you know the
+ * size ahead of time you can use uwb_rc_neh_extra_size*()). We
+ * register tables with the known events and their sizes, and then we
+ * traverse those tables. For those with variable length, we provide a
+ * way to lookup the size inside the event/notification's
+ * payload. This allows device-specific event size tables to be
+ * registered.
+ *
+ * @size:   Size of the payload
+ *
+ * @offset: if != 0, at offset @offset-1 starts a field with a length
+ *          that has to be added to @size. The format of the field is
+ *          given by @type.
+ *
+ * @type:   Type and length of the offset field. Most common is LE 16
+ *          bits (that's why that is zero); others are there mostly to
+ *          cover for bugs and weirdos.
+ */
+struct uwb_est_entry {
+	size_t size;
+	unsigned offset;
+	enum { UWB_EST_16 = 0, UWB_EST_8 = 1 } type;
+};
+
+int uwb_est_register(u8 type, u8 code_high, u16 vendor, u16 product,
+		     const struct uwb_est_entry *, size_t entries);
+int uwb_est_unregister(u8 type, u8 code_high, u16 vendor, u16 product,
+		       const struct uwb_est_entry *, size_t entries);
+ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
+			  size_t len);
+
+/* -- Misc */
+
+enum {
+	EDC_MAX_ERRORS = 10,
+	EDC_ERROR_TIMEFRAME = HZ,
+};
+
+/* error density counter */
+struct edc {
+	unsigned long timestart;
+	u16 errorcount;
+};
+
+static inline
+void edc_init(struct edc *edc)
+{
+	edc->timestart = jiffies;
+}
+
+/* Called when an error occurred.
+ * This is way to determine if the number of acceptable errors per time
+ * period has been exceeded. It is not accurate as there are cases in which
+ * this scheme will not work, for example if there are periodic occurrences
+ * of errors that straddle updates to the start time. This scheme is
+ * sufficient for our usage.
+ *
+ * @returns 1 if maximum acceptable errors per timeframe has been exceeded.
+ */
+static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe)
+{
+	unsigned long now;
+
+	now = jiffies;
+	if (now - err_hist->timestart > timeframe) {
+		err_hist->errorcount = 1;
+		err_hist->timestart = now;
+	} else if (++err_hist->errorcount > max_err) {
+			err_hist->errorcount = 0;
+			err_hist->timestart = now;
+			return 1;
+	}
+	return 0;
+}
+
+
+/* Information Element handling */
+
+struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
+int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size);
+int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id);
+
+/*
+ * Transmission statistics
+ *
+ * UWB uses LQI and RSSI (one byte values) for reporting radio signal
+ * strength and line quality indication. We do quick and dirty
+ * averages of those. They are signed values, btw.
+ *
+ * For 8 bit quantities, we keep the min, the max, an accumulator
+ * (@sigma) and a # of samples. When @samples gets to 255, we compute
+ * the average (@sigma / @samples), place it in @sigma and reset
+ * @samples to 1 (so we use it as the first sample).
+ *
+ * Now, statistically speaking, probably I am kicking the kidneys of
+ * some books I have in my shelves collecting dust, but I just want to
+ * get an approx, not the Nobel.
+ *
+ * LOCKING: there is no locking per se, but we try to keep a lockless
+ * schema. Only _add_samples() modifies the values--as long as you
+ * have other locking on top that makes sure that no two calls of
+ * _add_sample() happen at the same time, then we are fine. Now, for
+ * resetting the values we just set @samples to 0 and that makes the
+ * next _add_sample() to start with defaults. Reading the values in
+ * _show() currently can race, so you need to make sure the calls are
+ * under the same lock that protects calls to _add_sample(). FIXME:
+ * currently unlocked (It is not ultraprecise but does the trick. Bite
+ * me).
+ */
+struct stats {
+	s8 min, max;
+	s16 sigma;
+	atomic_t samples;
+};
+
+static inline
+void stats_init(struct stats *stats)
+{
+	atomic_set(&stats->samples, 0);
+	wmb();
+}
+
+static inline
+void stats_add_sample(struct stats *stats, s8 sample)
+{
+	s8 min, max;
+	s16 sigma;
+	unsigned samples = atomic_read(&stats->samples);
+	if (samples == 0) {	/* it was zero before, so we initialize */
+		min = 127;
+		max = -128;
+		sigma = 0;
+	} else {
+		min = stats->min;
+		max = stats->max;
+		sigma = stats->sigma;
+	}
+
+	if (sample < min)	/* compute new values */
+		min = sample;
+	else if (sample > max)
+		max = sample;
+	sigma += sample;
+
+	stats->min = min;	/* commit */
+	stats->max = max;
+	stats->sigma = sigma;
+	if (atomic_add_return(1, &stats->samples) > 255) {
+		/* wrapped around! reset */
+		stats->sigma = sigma / 256;
+		atomic_set(&stats->samples, 1);
+	}
+}
+
+static inline ssize_t stats_show(struct stats *stats, char *buf)
+{
+	int min, max, avg;
+	int samples = atomic_read(&stats->samples);
+	if (samples == 0)
+		min = max = avg = 0;
+	else {
+		min = stats->min;
+		max = stats->max;
+		avg = stats->sigma / samples;
+	}
+	return scnprintf(buf, PAGE_SIZE, "%d %d %d\n", min, max, avg);
+}
+
+static inline ssize_t stats_store(struct stats *stats, const char *buf,
+				  size_t size)
+{
+	stats_init(stats);
+	return size;
+}
+
+#endif /* #ifndef __LINUX__UWB_H__ */
diff --git a/drivers/staging/uwb/uwbd.c b/drivers/staging/uwb/uwbd.c
new file mode 100644
index 000000000000..dc5c743d4f5f
--- /dev/null
+++ b/drivers/staging/uwb/uwbd.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ultra Wide Band
+ * Neighborhood Management Daemon
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This daemon takes care of maintaing information that describes the
+ * UWB neighborhood that the radios in this machine can see. It also
+ * keeps a tab of which devices are visible, makes sure each HC sits
+ * on a different channel to avoid interfering, etc.
+ *
+ * Different drivers (radio controller, device, any API in general)
+ * communicate with this daemon through an event queue. Daemon wakes
+ * up, takes a list of events and handles them one by one; handling
+ * function is extracted from a table based on the event's type and
+ * subtype. Events are freed only if the handling function says so.
+ *
+ *   . Lock protecting the event list has to be an spinlock and locked
+ *     with IRQSAVE because it might be called from an interrupt
+ *     context (ie: when events arrive and the notification drops
+ *     down from the ISR).
+ *
+ *   . UWB radio controller drivers queue events to the daemon using
+ *     uwbd_event_queue(). They just get the event, chew it to make it
+ *     look like UWBD likes it and pass it in a buffer allocated with
+ *     uwb_event_alloc().
+ *
+ * EVENTS
+ *
+ * Events have a type, a subtype, a length, some other stuff and the
+ * data blob, which depends on the event. The header is 'struct
+ * uwb_event'; for payloads, see 'struct uwbd_evt_*'.
+ *
+ * EVENT HANDLER TABLES
+ *
+ * To find a handling function for an event, the type is used to index
+ * a subtype-table in the type-table. The subtype-table is indexed
+ * with the subtype to get the function that handles the event. Start
+ * with the main type-table 'uwbd_evt_type_handler'.
+ *
+ * DEVICES
+ *
+ * Devices are created when a bunch of beacons have been received and
+ * it is stablished that the device has stable radio presence. CREATED
+ * only, not configured. Devices are ONLY configured when an
+ * Application-Specific IE Probe is receieved, in which the device
+ * declares which Protocol ID it groks. Then the device is CONFIGURED
+ * (and the driver->probe() stuff of the device model is invoked).
+ *
+ * Devices are considered disconnected when a certain number of
+ * beacons are not received in an amount of time.
+ *
+ * Handler functions are called normally uwbd_evt_handle_*().
+ */
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/freezer.h>
+
+#include "uwb-internal.h"
+
+/*
+ * UWBD Event handler function signature
+ *
+ * Return !0 if the event needs not to be freed (ie the handler
+ * takes/took care of it). 0 means the daemon code will free the
+ * event.
+ *
+ * @evt->rc is already referenced and guaranteed to exist. See
+ * uwb_evt_handle().
+ */
+typedef int (*uwbd_evt_handler_f)(struct uwb_event *);
+
+/**
+ * Properties of a UWBD event
+ *
+ * @handler:    the function that will handle this event
+ * @name:       text name of event
+ */
+struct uwbd_event {
+	uwbd_evt_handler_f handler;
+	const char *name;
+};
+
+/* Table of handlers for and properties of the UWBD Radio Control Events */
+static struct uwbd_event uwbd_urc_events[] = {
+	[UWB_RC_EVT_IE_RCV] = {
+		.handler = uwbd_evt_handle_rc_ie_rcv,
+		.name = "IE_RECEIVED"
+	},
+	[UWB_RC_EVT_BEACON] = {
+		.handler = uwbd_evt_handle_rc_beacon,
+		.name = "BEACON_RECEIVED"
+	},
+	[UWB_RC_EVT_BEACON_SIZE] = {
+		.handler = uwbd_evt_handle_rc_beacon_size,
+		.name = "BEACON_SIZE_CHANGE"
+	},
+	[UWB_RC_EVT_BPOIE_CHANGE] = {
+		.handler = uwbd_evt_handle_rc_bpoie_change,
+		.name = "BPOIE_CHANGE"
+	},
+	[UWB_RC_EVT_BP_SLOT_CHANGE] = {
+		.handler = uwbd_evt_handle_rc_bp_slot_change,
+		.name = "BP_SLOT_CHANGE"
+	},
+	[UWB_RC_EVT_DRP_AVAIL] = {
+		.handler = uwbd_evt_handle_rc_drp_avail,
+		.name = "DRP_AVAILABILITY_CHANGE"
+	},
+	[UWB_RC_EVT_DRP] = {
+		.handler = uwbd_evt_handle_rc_drp,
+		.name = "DRP"
+	},
+	[UWB_RC_EVT_DEV_ADDR_CONFLICT] = {
+		.handler = uwbd_evt_handle_rc_dev_addr_conflict,
+		.name = "DEV_ADDR_CONFLICT",
+	},
+};
+
+
+
+struct uwbd_evt_type_handler {
+	const char *name;
+	struct uwbd_event *uwbd_events;
+	size_t size;
+};
+
+/* Table of handlers for each UWBD Event type. */
+static struct uwbd_evt_type_handler uwbd_urc_evt_type_handlers[] = {
+	[UWB_RC_CET_GENERAL] = {
+		.name        = "URC",
+		.uwbd_events = uwbd_urc_events,
+		.size        = ARRAY_SIZE(uwbd_urc_events),
+	},
+};
+
+static const struct uwbd_event uwbd_message_handlers[] = {
+	[UWB_EVT_MSG_RESET] = {
+		.handler = uwbd_msg_handle_reset,
+		.name = "reset",
+	},
+};
+
+/*
+ * Handle an URC event passed to the UWB Daemon
+ *
+ * @evt: the event to handle
+ * @returns: 0 if the event can be kfreed, !0 on the contrary
+ *           (somebody else took ownership) [coincidentally, returning
+ *           a <0 errno code will free it :)].
+ *
+ * Looks up the two indirection tables (one for the type, one for the
+ * subtype) to decide which function handles it and then calls the
+ * handler.
+ *
+ * The event structure passed to the event handler has the radio
+ * controller in @evt->rc referenced. The reference will be dropped
+ * once the handler returns, so if it needs it for longer (async),
+ * it'll need to take another one.
+ */
+static
+int uwbd_event_handle_urc(struct uwb_event *evt)
+{
+	int result = -EINVAL;
+	struct uwbd_evt_type_handler *type_table;
+	uwbd_evt_handler_f handler;
+	u8 type, context;
+	u16 event;
+
+	type = evt->notif.rceb->bEventType;
+	event = le16_to_cpu(evt->notif.rceb->wEvent);
+	context = evt->notif.rceb->bEventContext;
+
+	if (type >= ARRAY_SIZE(uwbd_urc_evt_type_handlers))
+		goto out;
+	type_table = &uwbd_urc_evt_type_handlers[type];
+	if (type_table->uwbd_events == NULL)
+		goto out;
+	if (event >= type_table->size)
+		goto out;
+	handler = type_table->uwbd_events[event].handler;
+	if (handler == NULL)
+		goto out;
+
+	result = (*handler)(evt);
+out:
+	if (result < 0)
+		dev_err(&evt->rc->uwb_dev.dev,
+			"UWBD: event 0x%02x/%04x/%02x, handling failed: %d\n",
+			type, event, context, result);
+	return result;
+}
+
+static void uwbd_event_handle_message(struct uwb_event *evt)
+{
+	struct uwb_rc *rc;
+	int result;
+
+	rc = evt->rc;
+
+	if (evt->message < 0 || evt->message >= ARRAY_SIZE(uwbd_message_handlers)) {
+		dev_err(&rc->uwb_dev.dev, "UWBD: invalid message type %d\n", evt->message);
+		return;
+	}
+
+	result = uwbd_message_handlers[evt->message].handler(evt);
+	if (result < 0)
+		dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n",
+			uwbd_message_handlers[evt->message].name, result);
+}
+
+static void uwbd_event_handle(struct uwb_event *evt)
+{
+	struct uwb_rc *rc;
+	int should_keep;
+
+	rc = evt->rc;
+
+	if (rc->ready) {
+		switch (evt->type) {
+		case UWB_EVT_TYPE_NOTIF:
+			should_keep = uwbd_event_handle_urc(evt);
+			if (should_keep <= 0)
+				kfree(evt->notif.rceb);
+			break;
+		case UWB_EVT_TYPE_MSG:
+			uwbd_event_handle_message(evt);
+			break;
+		default:
+			dev_err(&rc->uwb_dev.dev, "UWBD: invalid event type %d\n", evt->type);
+			break;
+		}
+	}
+
+	__uwb_rc_put(rc);	/* for the __uwb_rc_get() in uwb_rc_notif_cb() */
+}
+
+/**
+ * UWB Daemon
+ *
+ * Listens to all UWB notifications and takes care to track the state
+ * of the UWB neighbourhood for the kernel. When we do a run, we
+ * spinlock, move the list to a private copy and release the
+ * lock. Hold it as little as possible. Not a conflict: it is
+ * guaranteed we own the events in the private list.
+ *
+ * FIXME: should change so we don't have a 1HZ timer all the time, but
+ *        only if there are devices.
+ */
+static int uwbd(void *param)
+{
+	struct uwb_rc *rc = param;
+	unsigned long flags;
+	struct uwb_event *evt;
+	int should_stop = 0;
+
+	while (1) {
+		wait_event_interruptible_timeout(
+			rc->uwbd.wq,
+			!list_empty(&rc->uwbd.event_list)
+			  || (should_stop = kthread_should_stop()),
+			HZ);
+		if (should_stop)
+			break;
+
+		spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+		if (!list_empty(&rc->uwbd.event_list)) {
+			evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node);
+			list_del(&evt->list_node);
+		} else
+			evt = NULL;
+		spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
+
+		if (evt) {
+			uwbd_event_handle(evt);
+			kfree(evt);
+		}
+
+		uwb_beca_purge(rc);	/* Purge devices that left */
+	}
+	return 0;
+}
+
+
+/** Start the UWB daemon */
+void uwbd_start(struct uwb_rc *rc)
+{
+	struct task_struct *task = kthread_run(uwbd, rc, "uwbd");
+	if (IS_ERR(task)) {
+		rc->uwbd.task = NULL;
+		printk(KERN_ERR "UWB: Cannot start management daemon; "
+		       "UWB won't work\n");
+	} else {
+		rc->uwbd.task = task;
+		rc->uwbd.pid = rc->uwbd.task->pid;
+	}
+}
+
+/* Stop the UWB daemon and free any unprocessed events */
+void uwbd_stop(struct uwb_rc *rc)
+{
+	if (rc->uwbd.task)
+		kthread_stop(rc->uwbd.task);
+	uwbd_flush(rc);
+}
+
+/*
+ * Queue an event for the management daemon
+ *
+ * When some lower layer receives an event, it uses this function to
+ * push it forward to the UWB daemon.
+ *
+ * Once you pass the event, you don't own it any more, but the daemon
+ * does. It will uwb_event_free() it when done, so make sure you
+ * uwb_event_alloc()ed it or bad things will happen.
+ *
+ * If the daemon is not running, we just free the event.
+ */
+void uwbd_event_queue(struct uwb_event *evt)
+{
+	struct uwb_rc *rc = evt->rc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
+	if (rc->uwbd.pid != 0) {
+		list_add(&evt->list_node, &rc->uwbd.event_list);
+		wake_up_all(&rc->uwbd.wq);
+	} else {
+		__uwb_rc_put(evt->rc);
+		if (evt->type == UWB_EVT_TYPE_NOTIF)
+			kfree(evt->notif.rceb);
+		kfree(evt);
+	}
+	spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
+	return;
+}
+
+void uwbd_flush(struct uwb_rc *rc)
+{
+	struct uwb_event *evt, *nxt;
+
+	spin_lock_irq(&rc->uwbd.event_list_lock);
+	list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) {
+		if (evt->rc == rc) {
+			__uwb_rc_put(rc);
+			list_del(&evt->list_node);
+			if (evt->type == UWB_EVT_TYPE_NOTIF)
+				kfree(evt->notif.rceb);
+			kfree(evt);
+		}
+	}
+	spin_unlock_irq(&rc->uwbd.event_list_lock);
+}
diff --git a/drivers/staging/uwb/whc-rc.c b/drivers/staging/uwb/whc-rc.c
new file mode 100644
index 000000000000..34020ed351ab
--- /dev/null
+++ b/drivers/staging/uwb/whc-rc.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Wireless Host Controller: Radio Control Interface (WHCI v0.95[2.3])
+ * Radio Control command/event transport to the UWB stack
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Initialize and hook up the Radio Control interface.
+ *
+ * For each device probed, creates an 'struct whcrc' which contains
+ * just the representation of the UWB Radio Controller, and the logic
+ * for reading notifications and passing them to the UWB Core.
+ *
+ * So we initialize all of those, register the UWB Radio Controller
+ * and setup the notification/event handle to pipe the notifications
+ * to the UWB management Daemon.
+ *
+ * Once uwb_rc_add() is called, the UWB stack takes control, resets
+ * the radio and readies the device to take commands the UWB
+ * API/user-space.
+ *
+ * Note this driver is just a transport driver; the commands are
+ * formed at the UWB stack and given to this driver who will deliver
+ * them to the hw and transfer the replies/notifications back to the
+ * UWB stack through the UWB daemon (UWBD).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sched.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include "uwb.h"
+#include "include/whci.h"
+#include "include/umc.h"
+
+#include "uwb-internal.h"
+
+/**
+ * Descriptor for an instance of the UWB Radio Control Driver that
+ * attaches to the URC interface of the WHCI PCI card.
+ *
+ * Unless there is a lock specific to the 'data members', all access
+ * is protected by uwb_rc->mutex.
+ */
+struct whcrc {
+	struct umc_dev *umc_dev;
+	struct uwb_rc *uwb_rc;		/* UWB host controller */
+
+	unsigned long area;
+	void __iomem *rc_base;
+	size_t rc_len;
+	spinlock_t irq_lock;
+
+	void *evt_buf, *cmd_buf;
+	dma_addr_t evt_dma_buf, cmd_dma_buf;
+	wait_queue_head_t cmd_wq;
+	struct work_struct event_work;
+};
+
+/**
+ * Execute an UWB RC command on WHCI/RC
+ *
+ * @rc:       Instance of a Radio Controller that is a whcrc
+ * @cmd:      Buffer containing the RCCB and payload to execute
+ * @cmd_size: Size of the command buffer.
+ *
+ * We copy the command into whcrc->cmd_buf (as it is pretty and
+ * aligned`and physically contiguous) and then press the right keys in
+ * the controller's URCCMD register to get it to read it. We might
+ * have to wait for the cmd_sem to be open to us.
+ *
+ * NOTE: rc's mutex has to be locked
+ */
+static int whcrc_cmd(struct uwb_rc *uwb_rc,
+	      const struct uwb_rccb *cmd, size_t cmd_size)
+{
+	int result = 0;
+	struct whcrc *whcrc = uwb_rc->priv;
+	struct device *dev = &whcrc->umc_dev->dev;
+	u32 urccmd;
+
+	if (cmd_size >= 4096)
+		return -EINVAL;
+
+	/*
+	 * If the URC is halted, then the hardware has reset itself.
+	 * Attempt to recover by restarting the device and then return
+	 * an error as it's likely that the current command isn't
+	 * valid for a newly started RC.
+	 */
+	if (le_readl(whcrc->rc_base + URCSTS) & URCSTS_HALTED) {
+		dev_err(dev, "requesting reset of halted radio controller\n");
+		uwb_rc_reset_all(uwb_rc);
+		return -EIO;
+	}
+
+	result = wait_event_timeout(whcrc->cmd_wq,
+		!(le_readl(whcrc->rc_base + URCCMD) & URCCMD_ACTIVE), HZ/2);
+	if (result == 0) {
+		dev_err(dev, "device is not ready to execute commands\n");
+		return -ETIMEDOUT;
+	}
+
+	memmove(whcrc->cmd_buf, cmd, cmd_size);
+	le_writeq(whcrc->cmd_dma_buf, whcrc->rc_base + URCCMDADDR);
+
+	spin_lock(&whcrc->irq_lock);
+	urccmd = le_readl(whcrc->rc_base + URCCMD);
+	urccmd &= ~(URCCMD_EARV | URCCMD_SIZE_MASK);
+	le_writel(urccmd | URCCMD_ACTIVE | URCCMD_IWR | cmd_size,
+		  whcrc->rc_base + URCCMD);
+	spin_unlock(&whcrc->irq_lock);
+
+	return 0;
+}
+
+static int whcrc_reset(struct uwb_rc *rc)
+{
+	struct whcrc *whcrc = rc->priv;
+
+	return umc_controller_reset(whcrc->umc_dev);
+}
+
+/**
+ * Reset event reception mechanism and tell hw we are ready to get more
+ *
+ * We have read all the events in the event buffer, so we are ready to
+ * reset it to the beginning.
+ *
+ * This is only called during initialization or after an event buffer
+ * has been retired.  This means we can be sure that event processing
+ * is disabled and it's safe to update the URCEVTADDR register.
+ *
+ * There's no need to wait for the event processing to start as the
+ * URC will not clear URCCMD_ACTIVE until (internal) event buffer
+ * space is available.
+ */
+static
+void whcrc_enable_events(struct whcrc *whcrc)
+{
+	u32 urccmd;
+
+	le_writeq(whcrc->evt_dma_buf, whcrc->rc_base + URCEVTADDR);
+
+	spin_lock(&whcrc->irq_lock);
+	urccmd = le_readl(whcrc->rc_base + URCCMD) & ~URCCMD_ACTIVE;
+	le_writel(urccmd | URCCMD_EARV, whcrc->rc_base + URCCMD);
+	spin_unlock(&whcrc->irq_lock);
+}
+
+static void whcrc_event_work(struct work_struct *work)
+{
+	struct whcrc *whcrc = container_of(work, struct whcrc, event_work);
+	size_t size;
+	u64 urcevtaddr;
+
+	urcevtaddr = le_readq(whcrc->rc_base + URCEVTADDR);
+	size = urcevtaddr & URCEVTADDR_OFFSET_MASK;
+
+	uwb_rc_neh_grok(whcrc->uwb_rc, whcrc->evt_buf, size);
+	whcrc_enable_events(whcrc);
+}
+
+/**
+ * Catch interrupts?
+ *
+ * We ack inmediately (and expect the hw to do the right thing and
+ * raise another IRQ if things have changed :)
+ */
+static
+irqreturn_t whcrc_irq_cb(int irq, void *_whcrc)
+{
+	struct whcrc *whcrc = _whcrc;
+	struct device *dev = &whcrc->umc_dev->dev;
+	u32 urcsts;
+
+	urcsts = le_readl(whcrc->rc_base + URCSTS);
+	if (!(urcsts & URCSTS_INT_MASK))
+		return IRQ_NONE;
+	le_writel(urcsts & URCSTS_INT_MASK, whcrc->rc_base + URCSTS);
+
+	if (urcsts & URCSTS_HSE) {
+		dev_err(dev, "host system error -- hardware halted\n");
+		/* FIXME: do something sensible here */
+		goto out;
+	}
+	if (urcsts & URCSTS_ER)
+		schedule_work(&whcrc->event_work);
+	if (urcsts & URCSTS_RCI)
+		wake_up_all(&whcrc->cmd_wq);
+out:
+	return IRQ_HANDLED;
+}
+
+
+/**
+ * Initialize a UMC RC interface: map regions, get (shared) IRQ
+ */
+static
+int whcrc_setup_rc_umc(struct whcrc *whcrc)
+{
+	int result = 0;
+	struct device *dev = &whcrc->umc_dev->dev;
+	struct umc_dev *umc_dev = whcrc->umc_dev;
+
+	whcrc->area = umc_dev->resource.start;
+	whcrc->rc_len = resource_size(&umc_dev->resource);
+	result = -EBUSY;
+	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) == NULL) {
+		dev_err(dev, "can't request URC region (%zu bytes @ 0x%lx): %d\n",
+			whcrc->rc_len, whcrc->area, result);
+		goto error_request_region;
+	}
+
+	whcrc->rc_base = ioremap_nocache(whcrc->area, whcrc->rc_len);
+	if (whcrc->rc_base == NULL) {
+		dev_err(dev, "can't ioremap registers (%zu bytes @ 0x%lx): %d\n",
+			whcrc->rc_len, whcrc->area, result);
+		goto error_ioremap_nocache;
+	}
+
+	result = request_irq(umc_dev->irq, whcrc_irq_cb, IRQF_SHARED,
+			     KBUILD_MODNAME, whcrc);
+	if (result < 0) {
+		dev_err(dev, "can't allocate IRQ %d: %d\n",
+			umc_dev->irq, result);
+		goto error_request_irq;
+	}
+
+	result = -ENOMEM;
+	whcrc->cmd_buf = dma_alloc_coherent(&umc_dev->dev, PAGE_SIZE,
+					    &whcrc->cmd_dma_buf, GFP_KERNEL);
+	if (whcrc->cmd_buf == NULL) {
+		dev_err(dev, "Can't allocate cmd transfer buffer\n");
+		goto error_cmd_buffer;
+	}
+
+	whcrc->evt_buf = dma_alloc_coherent(&umc_dev->dev, PAGE_SIZE,
+					    &whcrc->evt_dma_buf, GFP_KERNEL);
+	if (whcrc->evt_buf == NULL) {
+		dev_err(dev, "Can't allocate evt transfer buffer\n");
+		goto error_evt_buffer;
+	}
+	return 0;
+
+error_evt_buffer:
+	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->cmd_buf,
+			  whcrc->cmd_dma_buf);
+error_cmd_buffer:
+	free_irq(umc_dev->irq, whcrc);
+error_request_irq:
+	iounmap(whcrc->rc_base);
+error_ioremap_nocache:
+	release_mem_region(whcrc->area, whcrc->rc_len);
+error_request_region:
+	return result;
+}
+
+
+/**
+ * Release RC's UMC resources
+ */
+static
+void whcrc_release_rc_umc(struct whcrc *whcrc)
+{
+	struct umc_dev *umc_dev = whcrc->umc_dev;
+
+	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->evt_buf,
+			  whcrc->evt_dma_buf);
+	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->cmd_buf,
+			  whcrc->cmd_dma_buf);
+	free_irq(umc_dev->irq, whcrc);
+	iounmap(whcrc->rc_base);
+	release_mem_region(whcrc->area, whcrc->rc_len);
+}
+
+
+/**
+ * whcrc_start_rc - start a WHCI radio controller
+ * @whcrc: the radio controller to start
+ *
+ * Reset the UMC device, start the radio controller, enable events and
+ * finally enable interrupts.
+ */
+static int whcrc_start_rc(struct uwb_rc *rc)
+{
+	struct whcrc *whcrc = rc->priv;
+	struct device *dev = &whcrc->umc_dev->dev;
+
+	/* Reset the thing */
+	le_writel(URCCMD_RESET, whcrc->rc_base + URCCMD);
+	if (whci_wait_for(dev, whcrc->rc_base + URCCMD, URCCMD_RESET, 0,
+			  5000, "hardware reset") < 0)
+		return -EBUSY;
+
+	/* Set the event buffer, start the controller (enable IRQs later) */
+	le_writel(0, whcrc->rc_base + URCINTR);
+	le_writel(URCCMD_RS, whcrc->rc_base + URCCMD);
+	if (whci_wait_for(dev, whcrc->rc_base + URCSTS, URCSTS_HALTED, 0,
+			  5000, "radio controller start") < 0)
+		return -ETIMEDOUT;
+	whcrc_enable_events(whcrc);
+	le_writel(URCINTR_EN_ALL, whcrc->rc_base + URCINTR);
+	return 0;
+}
+
+
+/**
+ * whcrc_stop_rc - stop a WHCI radio controller
+ * @whcrc: the radio controller to stop
+ *
+ * Disable interrupts and cancel any pending event processing work
+ * before clearing the Run/Stop bit.
+ */
+static
+void whcrc_stop_rc(struct uwb_rc *rc)
+{
+	struct whcrc *whcrc = rc->priv;
+	struct umc_dev *umc_dev = whcrc->umc_dev;
+
+	le_writel(0, whcrc->rc_base + URCINTR);
+	cancel_work_sync(&whcrc->event_work);
+
+	le_writel(0, whcrc->rc_base + URCCMD);
+	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
+		      URCSTS_HALTED, URCSTS_HALTED, 100, "radio controller stop");
+}
+
+static void whcrc_init(struct whcrc *whcrc)
+{
+	spin_lock_init(&whcrc->irq_lock);
+	init_waitqueue_head(&whcrc->cmd_wq);
+	INIT_WORK(&whcrc->event_work, whcrc_event_work);
+}
+
+/**
+ * Initialize the radio controller.
+ *
+ * NOTE: we setup whcrc->uwb_rc before calling uwb_rc_add(); in the
+ *       IRQ handler we use that to determine if the hw is ready to
+ *       handle events. Looks like a race condition, but it really is
+ *       not.
+ */
+static
+int whcrc_probe(struct umc_dev *umc_dev)
+{
+	int result;
+	struct uwb_rc *uwb_rc;
+	struct whcrc *whcrc;
+	struct device *dev = &umc_dev->dev;
+
+	result = -ENOMEM;
+	uwb_rc = uwb_rc_alloc();
+	if (uwb_rc == NULL) {
+		dev_err(dev, "unable to allocate RC instance\n");
+		goto error_rc_alloc;
+	}
+	whcrc = kzalloc(sizeof(*whcrc), GFP_KERNEL);
+	if (whcrc == NULL) {
+		dev_err(dev, "unable to allocate WHC-RC instance\n");
+		goto error_alloc;
+	}
+	whcrc_init(whcrc);
+	whcrc->umc_dev = umc_dev;
+
+	result = whcrc_setup_rc_umc(whcrc);
+	if (result < 0) {
+		dev_err(dev, "Can't setup RC UMC interface: %d\n", result);
+		goto error_setup_rc_umc;
+	}
+	whcrc->uwb_rc = uwb_rc;
+
+	uwb_rc->owner = THIS_MODULE;
+	uwb_rc->cmd   = whcrc_cmd;
+	uwb_rc->reset = whcrc_reset;
+	uwb_rc->start = whcrc_start_rc;
+	uwb_rc->stop  = whcrc_stop_rc;
+
+	result = uwb_rc_add(uwb_rc, dev, whcrc);
+	if (result < 0)
+		goto error_rc_add;
+	umc_set_drvdata(umc_dev, whcrc);
+	return 0;
+
+error_rc_add:
+	whcrc_release_rc_umc(whcrc);
+error_setup_rc_umc:
+	kfree(whcrc);
+error_alloc:
+	uwb_rc_put(uwb_rc);
+error_rc_alloc:
+	return result;
+}
+
+/**
+ * Clean up the radio control resources
+ *
+ * When we up the command semaphore, everybody possibly held trying to
+ * execute a command should be granted entry and then they'll see the
+ * host is quiescing and up it (so it will chain to the next waiter).
+ * This should not happen (in any case), as we can only remove when
+ * there are no handles open...
+ */
+static void whcrc_remove(struct umc_dev *umc_dev)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc_dev);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	umc_set_drvdata(umc_dev, NULL);
+	uwb_rc_rm(uwb_rc);
+	whcrc_release_rc_umc(whcrc);
+	kfree(whcrc);
+	uwb_rc_put(uwb_rc);
+}
+
+static int whcrc_pre_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	uwb_rc_pre_reset(uwb_rc);
+	return 0;
+}
+
+static int whcrc_post_reset(struct umc_dev *umc)
+{
+	struct whcrc *whcrc = umc_get_drvdata(umc);
+	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
+
+	return uwb_rc_post_reset(uwb_rc);
+}
+
+/* PCI device ID's that we handle [so it gets loaded] */
+static struct pci_device_id __used whcrc_id_table[] = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
+	{ /* empty last entry */ }
+};
+MODULE_DEVICE_TABLE(pci, whcrc_id_table);
+
+static struct umc_driver whcrc_driver = {
+	.name       = "whc-rc",
+	.cap_id     = UMC_CAP_ID_WHCI_RC,
+	.probe      = whcrc_probe,
+	.remove     = whcrc_remove,
+	.pre_reset  = whcrc_pre_reset,
+	.post_reset = whcrc_post_reset,
+};
+
+static int __init whcrc_driver_init(void)
+{
+	return umc_driver_register(&whcrc_driver);
+}
+module_init(whcrc_driver_init);
+
+static void __exit whcrc_driver_exit(void)
+{
+	umc_driver_unregister(&whcrc_driver);
+}
+module_exit(whcrc_driver_exit);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Wireless Host Controller Radio Control Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/uwb/whci.c b/drivers/staging/uwb/whci.c
new file mode 100644
index 000000000000..a8832f64d708
--- /dev/null
+++ b/drivers/staging/uwb/whci.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * WHCI UWB Multi-interface Controller enumerator.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include "include/whci.h"
+#include "include/umc.h"
+
+struct whci_card {
+	struct pci_dev *pci;
+	void __iomem *uwbbase;
+	u8 n_caps;
+	struct umc_dev *devs[0];
+};
+
+
+/* Fix faulty HW :( */
+static
+u64 whci_capdata_quirks(struct whci_card *card, u64 capdata)
+{
+	u64 capdata_orig = capdata;
+	struct pci_dev *pci_dev = card->pci;
+	if (pci_dev->vendor == PCI_VENDOR_ID_INTEL
+	    && (pci_dev->device == 0x0c3b || pci_dev->device == 0004)
+	    && pci_dev->class == 0x0d1010) {
+		switch (UWBCAPDATA_TO_CAP_ID(capdata)) {
+			/* WLP capability has 0x100 bytes of aperture */
+		case 0x80:
+			capdata |= 0x40 << 8; break;
+			/* WUSB capability has 0x80 bytes of aperture
+			 * and ID is 1 */
+		case 0x02:
+			capdata &= ~0xffff;
+			capdata |= 0x2001;
+			break;
+		}
+	}
+	if (capdata_orig != capdata)
+		dev_warn(&pci_dev->dev,
+			 "PCI v%04x d%04x c%06x#%02x: "
+			 "corrected capdata from %016Lx to %016Lx\n",
+			 pci_dev->vendor, pci_dev->device, pci_dev->class,
+			 (unsigned)UWBCAPDATA_TO_CAP_ID(capdata),
+			 (unsigned long long)capdata_orig,
+			 (unsigned long long)capdata);
+	return capdata;
+}
+
+
+/**
+ * whci_wait_for - wait for a WHCI register to be set
+ *
+ * Polls (for at most @max_ms ms) until '*@reg & @mask == @result'.
+ */
+int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result,
+	unsigned long max_ms, const char *tag)
+{
+	unsigned t = 0;
+	u32 val;
+	for (;;) {
+		val = le_readl(reg);
+		if ((val & mask) == result)
+			break;
+		if (t >= max_ms) {
+			dev_err(dev, "%s timed out\n", tag);
+			return -ETIMEDOUT;
+		}
+		msleep(10);
+		t += 10;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(whci_wait_for);
+
+
+/*
+ * NOTE: the capinfo and capdata registers are slightly different
+ *       (size and cap-id fields). So for cap #0, we need to fill
+ *       in. Size comes from the size of the register block
+ *       (statically calculated); cap_id comes from nowhere, we use
+ *       zero, that is reserved, for the radio controller, because
+ *       none was defined at the spec level.
+ */
+static int whci_add_cap(struct whci_card *card, int n)
+{
+	struct umc_dev *umc;
+	u64 capdata;
+	int bar, err;
+
+	umc = umc_device_create(&card->pci->dev, n);
+	if (umc == NULL)
+		return -ENOMEM;
+
+	capdata = le_readq(card->uwbbase + UWBCAPDATA(n));
+
+	bar = UWBCAPDATA_TO_BAR(capdata) << 1;
+
+	capdata = whci_capdata_quirks(card, capdata);
+	/* Capability 0 is the radio controller. It's size is 32
+	 * bytes (WHCI0.95[2.3, T2-9]). */
+	umc->version         = UWBCAPDATA_TO_VERSION(capdata);
+	umc->cap_id          = n == 0 ? 0 : UWBCAPDATA_TO_CAP_ID(capdata);
+	umc->bar	     = bar;
+	umc->resource.start  = pci_resource_start(card->pci, bar)
+		+ UWBCAPDATA_TO_OFFSET(capdata);
+	umc->resource.end    = umc->resource.start
+		+ (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1;
+	umc->resource.name   = dev_name(&umc->dev);
+	umc->resource.flags  = card->pci->resource[bar].flags;
+	umc->resource.parent = &card->pci->resource[bar];
+	umc->irq             = card->pci->irq;
+
+	err = umc_device_register(umc);
+	if (err < 0)
+		goto error;
+	card->devs[n] = umc;
+	return 0;
+
+error:
+	kfree(umc);
+	return err;
+}
+
+static void whci_del_cap(struct whci_card *card, int n)
+{
+	struct umc_dev *umc = card->devs[n];
+
+	umc_device_unregister(umc);
+}
+
+static int whci_n_caps(struct pci_dev *pci)
+{
+	void __iomem *uwbbase;
+	u64 capinfo;
+
+	uwbbase = pci_iomap(pci, 0, 8);
+	if (!uwbbase)
+		return -ENOMEM;
+	capinfo = le_readq(uwbbase + UWBCAPINFO);
+	pci_iounmap(pci, uwbbase);
+
+	return UWBCAPINFO_TO_N_CAPS(capinfo);
+}
+
+static int whci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+{
+	struct whci_card *card;
+	int err, n_caps, n;
+
+	err = pci_enable_device(pci);
+	if (err < 0)
+		goto error;
+	pci_enable_msi(pci);
+	pci_set_master(pci);
+	err = -ENXIO;
+	if (!pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
+		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
+	else if (!pci_set_dma_mask(pci, DMA_BIT_MASK(32)))
+		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
+	else
+		goto error_dma;
+
+	err = n_caps = whci_n_caps(pci);
+	if (n_caps < 0)
+		goto error_ncaps;
+
+	err = -ENOMEM;
+	card = kzalloc(sizeof(struct whci_card)
+		       + sizeof(struct umc_dev *) * (n_caps + 1),
+		       GFP_KERNEL);
+	if (card == NULL)
+		goto error_kzalloc;
+	card->pci = pci;
+	card->n_caps = n_caps;
+
+	err = -EBUSY;
+	if (!request_mem_region(pci_resource_start(pci, 0),
+				UWBCAPDATA_SIZE(card->n_caps),
+				"whci (capability data)"))
+		goto error_request_memregion;
+	err = -ENOMEM;
+	card->uwbbase = pci_iomap(pci, 0, UWBCAPDATA_SIZE(card->n_caps));
+	if (!card->uwbbase)
+		goto error_iomap;
+
+	/* Add each capability. */
+	for (n = 0; n <= card->n_caps; n++) {
+		err = whci_add_cap(card, n);
+		if (err < 0 && n == 0) {
+			dev_err(&pci->dev, "cannot bind UWB radio controller:"
+				" %d\n", err);
+			goto error_bind;
+		}
+		if (err < 0)
+			dev_warn(&pci->dev, "warning: cannot bind capability "
+				 "#%u: %d\n", n, err);
+	}
+	pci_set_drvdata(pci, card);
+	return 0;
+
+error_bind:
+	pci_iounmap(pci, card->uwbbase);
+error_iomap:
+	release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps));
+error_request_memregion:
+	kfree(card);
+error_kzalloc:
+error_ncaps:
+error_dma:
+	pci_disable_msi(pci);
+	pci_disable_device(pci);
+error:
+	return err;
+}
+
+static void whci_remove(struct pci_dev *pci)
+{
+	struct whci_card *card = pci_get_drvdata(pci);
+	int n;
+
+	pci_set_drvdata(pci, NULL);
+	/* Unregister each capability in reverse (so the master device
+	 * is unregistered last). */
+	for (n = card->n_caps; n >= 0 ; n--)
+		whci_del_cap(card, n);
+	pci_iounmap(pci, card->uwbbase);
+	release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps));
+	kfree(card);
+	pci_disable_msi(pci);
+	pci_disable_device(pci);
+}
+
+static struct pci_device_id whci_id_table[] = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
+	{ 0 },
+};
+MODULE_DEVICE_TABLE(pci, whci_id_table);
+
+
+static struct pci_driver whci_driver = {
+	.name     = "whci",
+	.id_table = whci_id_table,
+	.probe    = whci_probe,
+	.remove   = whci_remove,
+};
+
+module_pci_driver(whci_driver);
+MODULE_DESCRIPTION("WHCI UWB Multi-interface Controller enumerator");
+MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/Documentation/wusb-cbaf b/drivers/staging/wusbcore/Documentation/wusb-cbaf
new file mode 100644
index 000000000000..8b3d43efce90
--- /dev/null
+++ b/drivers/staging/wusbcore/Documentation/wusb-cbaf
@@ -0,0 +1,130 @@
+#! /bin/bash
+#
+
+set -e
+
+progname=$(basename $0)
+function help
+{
+    cat <<EOF
+Usage: $progname COMMAND DEVICEs [ARGS]
+
+Command for manipulating the pairing/authentication credentials of a
+Wireless USB device that supports wired-mode Cable-Based-Association.
+
+Works in conjunction with the wusb-cba.ko driver from http://linuxuwb.org.
+
+
+DEVICE
+
+ sysfs path to the device to authenticate; for example, both this
+ guys are the same:
+
+ /sys/devices/pci0000:00/0000:00:1d.7/usb1/1-4/1-4.4/1-4.4:1.1
+ /sys/bus/usb/drivers/wusb-cbaf/1-4.4:1.1
+
+COMMAND/ARGS are
+
+ start
+
+   Start a WUSB host controller (by setting up a CHID)
+
+ set-chid DEVICE HOST-CHID HOST-BANDGROUP HOST-NAME
+
+   Sets host information in the device; after this you can call the
+   get-cdid to see how does this device report itself to us.
+
+ get-cdid DEVICE
+
+   Get the device ID associated to the HOST-CHID we sent with
+   'set-chid'. We might not know about it.
+
+ set-cc DEVICE
+
+   If we allow the device to connect, set a random new CDID and CK
+   (connection key). Device saves them for the next time it wants to
+   connect wireless. We save them for that next time also so we can
+   authenticate the device (when we see the CDID he uses to id
+   itself) and the CK to crypto talk to it.
+
+CHID is always 16 hex bytes in 'XX YY ZZ...' form
+BANDGROUP is almost always 0001
+
+Examples:
+
+  You can default most arguments to '' to get a sane value:
+
+  $ $progname set-chid '' '' '' "My host name"
+
+  A full sequence:
+
+  $ $progname set-chid '' '' '' "My host name"
+  $ $progname get-cdid ''
+  $ $progname set-cc ''
+
+EOF
+}
+
+
+# Defaults
+# FIXME: CHID should come from a database :), band group from the host
+host_CHID="00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff"
+host_band_group="0001"
+host_name=$(hostname)
+
+devs="$(echo /sys/bus/usb/drivers/wusb-cbaf/[0-9]*)"
+hdevs="$(for h in /sys/class/uwb_rc/*/wusbhc; do readlink -f $h; done)"
+
+result=0
+case $1 in
+    start)
+        for dev in ${2:-$hdevs}
+          do
+          echo $host_CHID > $dev/wusb_chid
+          echo I: started host $(basename $dev) >&2
+        done
+        ;;
+    stop)
+        for dev in ${2:-$hdevs}
+          do
+          echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
+          echo I: stopped host $(basename $dev) >&2
+        done
+        ;;
+    set-chid)
+        shift
+        for dev in ${2:-$devs}; do
+            echo "${4:-$host_name}" > $dev/wusb_host_name
+            echo "${3:-$host_band_group}" > $dev/wusb_host_band_groups
+            echo ${2:-$host_CHID} > $dev/wusb_chid
+        done
+        ;;
+    get-cdid)
+        for dev in ${2:-$devs}
+          do
+          cat $dev/wusb_cdid
+        done
+        ;;
+    set-cc)
+        for dev in ${2:-$devs}; do
+            shift
+            CDID="$(head --bytes=16 /dev/urandom  | od -tx1 -An)"
+            CK="$(head --bytes=16 /dev/urandom  | od -tx1 -An)"
+            echo "$CDID" > $dev/wusb_cdid
+            echo "$CK" > $dev/wusb_ck
+
+            echo I: CC set >&2
+            echo "CHID: $(cat $dev/wusb_chid)"
+            echo "CDID:$CDID"
+            echo "CK:  $CK"
+        done
+        ;;
+    help|h|--help|-h)
+        help
+        ;;
+    *)
+        echo "E: Unknown usage" 1>&2
+        help 1>&2
+        result=1
+esac
+exit $result
diff --git a/drivers/staging/wusbcore/Documentation/wusb-design-overview.rst b/drivers/staging/wusbcore/Documentation/wusb-design-overview.rst
new file mode 100644
index 000000000000..dc5e21609bb5
--- /dev/null
+++ b/drivers/staging/wusbcore/Documentation/wusb-design-overview.rst
@@ -0,0 +1,457 @@
+================================
+Linux UWB + Wireless USB + WiNET
+================================
+
+   Copyright (C) 2005-2006 Intel Corporation
+
+   Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License version
+   2 as published by the Free Software Foundation.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+
+Please visit http://bughost.org/thewiki/Design-overview.txt-1.8 for
+updated content.
+
+    * Design-overview.txt-1.8
+
+This code implements a Ultra Wide Band stack for Linux, as well as
+drivers for the USB based UWB radio controllers defined in the
+Wireless USB 1.0 specification (including Wireless USB host controller
+and an Intel WiNET controller).
+
+.. Contents
+   1. Introduction
+         1. HWA: Host Wire adapters, your Wireless USB dongle
+
+         2. DWA: Device Wired Adaptor, a Wireless USB hub for wired
+            devices
+         3. WHCI: Wireless Host Controller Interface, the PCI WUSB host
+            adapter
+   2. The UWB stack
+         1. Devices and hosts: the basic structure
+
+         2. Host Controller life cycle
+
+         3. On the air: beacons and enumerating the radio neighborhood
+
+         4. Device lists
+         5. Bandwidth allocation
+
+   3. Wireless USB Host Controller drivers
+
+   4. Glossary
+
+
+Introduction
+============
+
+UWB is a wide-band communication protocol that is to serve also as the
+low-level protocol for others (much like TCP sits on IP). Currently
+these others are Wireless USB and TCP/IP, but seems Bluetooth and
+Firewire/1394 are coming along.
+
+UWB uses a band from roughly 3 to 10 GHz, transmitting at a max of
+~-41dB (or 0.074 uW/MHz--geography specific data is still being
+negotiated w/ regulators, so watch for changes). That band is divided in
+a bunch of ~1.5 GHz wide channels (or band groups) composed of three
+subbands/subchannels (528 MHz each). Each channel is independent of each
+other, so you could consider them different "busses". Initially this
+driver considers them all a single one.
+
+Radio time is divided in 65536 us long /superframes/, each one divided
+in 256 256us long /MASs/ (Media Allocation Slots), which are the basic
+time/media allocation units for transferring data. At the beginning of
+each superframe there is a Beacon Period (BP), where every device
+transmit its beacon on a single MAS. The length of the BP depends on how
+many devices are present and the length of their beacons.
+
+Devices have a MAC (fixed, 48 bit address) and a device (changeable, 16
+bit address) and send periodic beacons to advertise themselves and pass
+info on what they are and do. They advertise their capabilities and a
+bunch of other stuff.
+
+The different logical parts of this driver are:
+
+    *
+
+      *UWB*: the Ultra-Wide-Band stack -- manages the radio and
+      associated spectrum to allow for devices sharing it. Allows to
+      control bandwidth assignment, beaconing, scanning, etc
+
+    *
+
+      *WUSB*: the layer that sits on top of UWB to provide Wireless USB.
+      The Wireless USB spec defines means to control a UWB radio and to
+      do the actual WUSB.
+
+
+HWA: Host Wire adapters, your Wireless USB dongle
+-------------------------------------------------
+
+WUSB also defines a device called a Host Wire Adaptor (HWA), which in
+mere terms is a USB dongle that enables your PC to have UWB and Wireless
+USB. The Wireless USB Host Controller in a HWA looks to the host like a
+[Wireless] USB controller connected via USB (!)
+
+The HWA itself is broken in two or three main interfaces:
+
+    *
+
+      *RC*: Radio control -- this implements an interface to the
+      Ultra-Wide-Band radio controller. The driver for this implements a
+      USB-based UWB Radio Controller to the UWB stack.
+
+    *
+
+      *HC*: the wireless USB host controller. It looks like a USB host
+      whose root port is the radio and the WUSB devices connect to it.
+      To the system it looks like a separate USB host. The driver (will)
+      implement a USB host controller (similar to UHCI, OHCI or EHCI)
+      for which the root hub is the radio...To reiterate: it is a USB
+      controller that is connected via USB instead of PCI.
+
+    *
+
+      *WINET*: some HW provide a WiNET interface (IP over UWB). This
+      package provides a driver for it (it looks like a network
+      interface, winetX). The driver detects when there is a link up for
+      their type and kick into gear.
+
+
+DWA: Device Wired Adaptor, a Wireless USB hub for wired devices
+---------------------------------------------------------------
+
+These are the complement to HWAs. They are a USB host for connecting
+wired devices, but it is connected to your PC connected via Wireless
+USB. To the system it looks like yet another USB host. To the untrained
+eye, it looks like a hub that connects upstream wirelessly.
+
+We still offer no support for this; however, it should share a lot of
+code with the HWA-RC driver; there is a bunch of factorization work that
+has been done to support that in upcoming releases.
+
+
+WHCI: Wireless Host Controller Interface, the PCI WUSB host adapter
+-------------------------------------------------------------------
+
+This is your usual PCI device that implements WHCI. Similar in concept
+to EHCI, it allows your wireless USB devices (including DWAs) to connect
+to your host via a PCI interface. As in the case of the HWA, it has a
+Radio Control interface and the WUSB Host Controller interface per se.
+
+There is still no driver support for this, but will be in upcoming
+releases.
+
+
+The UWB stack
+=============
+
+The main mission of the UWB stack is to keep a tally of which devices
+are in radio proximity to allow drivers to connect to them. As well, it
+provides an API for controlling the local radio controllers (RCs from
+now on), such as to start/stop beaconing, scan, allocate bandwidth, etc.
+
+
+Devices and hosts: the basic structure
+--------------------------------------
+
+The main building block here is the UWB device (struct uwb_dev). For
+each device that pops up in radio presence (ie: the UWB host receives a
+beacon from it) you get a struct uwb_dev that will show up in
+/sys/bus/uwb/devices.
+
+For each RC that is detected, a new struct uwb_rc and struct uwb_dev are
+created. An entry is also created in /sys/class/uwb_rc for each RC.
+
+Each RC driver is implemented by a separate driver that plugs into the
+interface that the UWB stack provides through a struct uwb_rc_ops. The
+spec creators have been nice enough to make the message format the same
+for HWA and WHCI RCs, so the driver is really a very thin transport that
+moves the requests from the UWB API to the device [/uwb_rc_ops->cmd()/]
+and sends the replies and notifications back to the API
+[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
+is chartered, among other things, to keep the tab of how the UWB radio
+neighborhood looks, creating and destroying devices as they show up or
+disappear.
+
+Command execution is very simple: a command block is sent and a event
+block or reply is expected back. For sending/receiving command/events, a
+handle called /neh/ (Notification/Event Handle) is opened with
+/uwb_rc_neh_open()/.
+
+The HWA-RC (USB dongle) driver (drivers/uwb/hwa-rc.c) does this job for
+the USB connected HWA. Eventually, drivers/whci-rc.c will do the same
+for the PCI connected WHCI controller.
+
+
+Host Controller life cycle
+--------------------------
+
+So let's say we connect a dongle to the system: it is detected and
+firmware uploaded if needed [for Intel's i1480
+/drivers/uwb/ptc/usb.c:ptc_usb_probe()/] and then it is reenumerated.
+Now we have a real HWA device connected and
+/drivers/uwb/hwa-rc.c:hwarc_probe()/ picks it up, that will set up the
+Wire-Adaptor environment and then suck it into the UWB stack's vision of
+the world [/drivers/uwb/lc-rc.c:uwb_rc_add()/].
+
+    *
+
+      [*] The stack should put a new RC to scan for devices
+      [/uwb_rc_scan()/] so it finds what's available around and tries to
+      connect to them, but this is policy stuff and should be driven
+      from user space. As of now, the operator is expected to do it
+      manually; see the release notes for documentation on the procedure.
+
+When a dongle is disconnected, /drivers/uwb/hwa-rc.c:hwarc_disconnect()/
+takes time of tearing everything down safely (or not...).
+
+
+On the air: beacons and enumerating the radio neighborhood
+----------------------------------------------------------
+
+So assuming we have devices and we have agreed for a channel to connect
+on (let's say 9), we put the new RC to beacon:
+
+    *
+
+            $ echo 9 0 > /sys/class/uwb_rc/uwb0/beacon
+
+Now it is visible. If there were other devices in the same radio channel
+and beacon group (that's what the zero is for), the dongle's radio
+control interface will send beacon notifications on its
+notification/event endpoint (NEEP). The beacon notifications are part of
+the event stream that is funneled into the API with
+/drivers/uwb/neh.c:uwb_rc_neh_grok()/ and delivered to the UWBD, the UWB
+daemon through a notification list.
+
+UWBD wakes up and scans the event list; finds a beacon and adds it to
+the BEACON CACHE (/uwb_beca/). If he receives a number of beacons from
+the same device, he considers it to be 'onair' and creates a new device
+[/drivers/uwb/lc-dev.c:uwbd_dev_onair()/]. Similarly, when no beacons
+are received in some time, the device is considered gone and wiped out
+[uwbd calls periodically /uwb/beacon.c:uwb_beca_purge()/ that will purge
+the beacon cache of dead devices].
+
+
+Device lists
+------------
+
+All UWB devices are kept in the list of the struct bus_type uwb_bus_type.
+
+
+Bandwidth allocation
+--------------------
+
+The UWB stack maintains a local copy of DRP availability through
+processing of incoming *DRP Availability Change* notifications. This
+local copy is currently used to present the current bandwidth
+availability to the user through the sysfs file
+/sys/class/uwb_rc/uwbx/bw_avail. In the future the bandwidth
+availability information will be used by the bandwidth reservation
+routines.
+
+The bandwidth reservation routines are in progress and are thus not
+present in the current release. When completed they will enable a user
+to initiate DRP reservation requests through interaction with sysfs. DRP
+reservation requests from remote UWB devices will also be handled. The
+bandwidth management done by the UWB stack will include callbacks to the
+higher layers will enable the higher layers to use the reservations upon
+completion. [Note: The bandwidth reservation work is in progress and
+subject to change.]
+
+
+Wireless USB Host Controller drivers
+====================================
+
+*WARNING* This section needs a lot of work!
+
+As explained above, there are three different types of HCs in the WUSB
+world: HWA-HC, DWA-HC and WHCI-HC.
+
+HWA-HC and DWA-HC share that they are Wire-Adapters (USB or WUSB
+connected controllers), and their transfer management system is almost
+identical. So is their notification delivery system.
+
+HWA-HC and WHCI-HC share that they are both WUSB host controllers, so
+they have to deal with WUSB device life cycle and maintenance, wireless
+root-hub
+
+HWA exposes a Host Controller interface (HWA-HC 0xe0/02/02). This has
+three endpoints (Notifications, Data Transfer In and Data Transfer
+Out--known as NEP, DTI and DTO in the code).
+
+We reserve UWB bandwidth for our Wireless USB Cluster, create a Cluster
+ID and tell the HC to use all that. Then we start it. This means the HC
+starts sending MMCs.
+
+    *
+
+      The MMCs are blocks of data defined somewhere in the WUSB1.0 spec
+      that define a stream in the UWB channel time allocated for sending
+      WUSB IEs (host to device commands/notifications) and Device
+      Notifications (device initiated to host). Each host defines a
+      unique Wireless USB cluster through MMCs. Devices can connect to a
+      single cluster at the time. The IEs are Information Elements, and
+      among them are the bandwidth allocations that tell each device
+      when can they transmit or receive.
+
+Now it all depends on external stimuli.
+
+New device connection
+---------------------
+
+A new device pops up, it scans the radio looking for MMCs that give out
+the existence of Wireless USB channels. Once one (or more) are found,
+selects which one to connect to. Sends a /DN_Connect/ (device
+notification connect) during the DNTS (Device Notification Time
+Slot--announced in the MMCs
+
+HC picks the /DN_Connect/ out (nep module sends to notif.c for delivery
+into /devconnect/). This process starts the authentication process for
+the device. First we allocate a /fake port/ and assign an
+unauthenticated address (128 to 255--what we really do is
+0x80 | fake_port_idx). We fiddle with the fake port status and /hub_wq/
+sees a new connection, so he moves on to enable the fake port with a reset.
+
+So now we are in the reset path -- we know we have a non-yet enumerated
+device with an unauthorized address; we ask user space to authenticate
+(FIXME: not yet done, similar to bluetooth pairing), then we do the key
+exchange (FIXME: not yet done) and issue a /set address 0/ to bring the
+device to the default state. Device is authenticated.
+
+From here, the USB stack takes control through the usb_hcd ops. hub_wq
+has seen the port status changes, as we have been toggling them. It will
+start enumerating and doing transfers through usb_hcd->urb_enqueue() to
+read descriptors and move our data.
+
+Device life cycle and keep alives
+---------------------------------
+
+Every time there is a successful transfer to/from a device, we update a
+per-device activity timestamp. If not, every now and then we check and
+if the activity timestamp gets old, we ping the device by sending it a
+Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
+arrives to us as a notification through
+devconnect.c:wusb_handle_dn_alive(). If a device times out, we
+disconnect it from the system (cleaning up internal information and
+toggling the bits in the fake hub port, which kicks hub_wq into removing
+the rest of the stuff).
+
+This is done through devconnect:__wusb_check_devs(), which will scan the
+device list looking for whom needs refreshing.
+
+If the device wants to disconnect, it will either die (ugly) or send a
+/DN_Disconnect/ that will prompt a disconnection from the system.
+
+Sending and receiving data
+--------------------------
+
+Data is sent and received through /Remote Pipes/ (rpipes). An rpipe is
+/aimed/ at an endpoint in a WUSB device. This is the same for HWAs and
+DWAs.
+
+Each HC has a number of rpipes and buffers that can be assigned to them;
+when doing a data transfer (xfer), first the rpipe has to be aimed and
+prepared (buffers assigned), then we can start queueing requests for
+data in or out.
+
+Data buffers have to be segmented out before sending--so we send first a
+header (segment request) and then if there is any data, a data buffer
+immediately after to the DTI interface (yep, even the request). If our
+buffer is bigger than the max segment size, then we just do multiple
+requests.
+
+[This sucks, because doing USB scatter gatter in Linux is resource
+intensive, if any...not that the current approach is not. It just has to
+be cleaned up a lot :)].
+
+If reading, we don't send data buffers, just the segment headers saying
+we want to read segments.
+
+When the xfer is executed, we receive a notification that says data is
+ready in the DTI endpoint (handled through
+xfer.c:wa_handle_notif_xfer()). In there we read from the DTI endpoint a
+descriptor that gives us the status of the transfer, its identification
+(given when we issued it) and the segment number. If it was a data read,
+we issue another URB to read into the destination buffer the chunk of
+data coming out of the remote endpoint. Done, wait for the next guy. The
+callbacks for the URBs issued from here are the ones that will declare
+the xfer complete at some point and call its callback.
+
+Seems simple, but the implementation is not trivial.
+
+    *
+
+      *WARNING* Old!!
+
+The main xfer descriptor, wa_xfer (equivalent to a URB) contains an
+array of segments, tallys on segments and buffers and callback
+information. Buried in there is a lot of URBs for executing the segments
+and buffer transfers.
+
+For OUT xfers, there is an array of segments, one URB for each, another
+one of buffer URB. When submitting, we submit URBs for segment request
+1, buffer 1, segment 2, buffer 2...etc. Then we wait on the DTI for xfer
+result data; when all the segments are complete, we call the callback to
+finalize the transfer.
+
+For IN xfers, we only issue URBs for the segments we want to read and
+then wait for the xfer result data.
+
+URB mapping into xfers
+^^^^^^^^^^^^^^^^^^^^^^
+
+This is done by hwahc_op_urb_[en|de]queue(). In enqueue() we aim an
+rpipe to the endpoint where we have to transmit, create a transfer
+context (wa_xfer) and submit it. When the xfer is done, our callback is
+called and we assign the status bits and release the xfer resources.
+
+In dequeue() we are basically cancelling/aborting the transfer. We issue
+a xfer abort request to the HC, cancel all the URBs we had submitted
+and not yet done and when all that is done, the xfer callback will be
+called--this will call the URB callback.
+
+
+Glossary
+========
+
+*DWA* -- Device Wire Adapter
+
+USB host, wired for downstream devices, upstream connects wirelessly
+with Wireless USB.
+
+*EVENT* -- Response to a command on the NEEP
+
+*HWA* -- Host Wire Adapter / USB dongle for UWB and Wireless USB
+
+*NEH* -- Notification/Event Handle
+
+Handle/file descriptor for receiving notifications or events. The WA
+code requires you to get one of this to listen for notifications or
+events on the NEEP.
+
+*NEEP* -- Notification/Event EndPoint
+
+Stuff related to the management of the first endpoint of a HWA USB
+dongle that is used to deliver an stream of events and notifications to
+the host.
+
+*NOTIFICATION* -- Message coming in the NEEP as response to something.
+
+*RC* -- Radio Control
+
+Design-overview.txt-1.8 (last edited 2006-11-04 12:22:24 by
+InakyPerezGonzalez)
diff --git a/drivers/staging/wusbcore/Kconfig b/drivers/staging/wusbcore/Kconfig
new file mode 100644
index 000000000000..056c60b4d57f
--- /dev/null
+++ b/drivers/staging/wusbcore/Kconfig
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Wireless USB Core configuration
+#
+config USB_WUSB
+	tristate "Enable Wireless USB extensions"
+	depends on UWB
+	select CRYPTO
+	select CRYPTO_AES
+	select CRYPTO_CCM
+	help
+	  Enable the host-side support for Wireless USB.
+
+          To compile this support select Y (built in). It is safe to
+	  select even if you don't have the hardware.
+
+config USB_WUSB_CBAF
+	tristate "Support WUSB Cable Based Association (CBA)"
+	depends on USB
+	help
+	  Some WUSB devices support Cable Based Association. It's used to
+	  enable the secure communication between the host and the
+	  device.
+
+	  Enable this option if your WUSB device must to be connected
+	  via wired USB before establishing a wireless link.
+
+	  It is safe to select even if you don't have a compatible
+	  hardware.
+
+config USB_WUSB_CBAF_DEBUG
+	bool "Enable CBA debug messages"
+	depends on USB_WUSB_CBAF
+	help
+	  Say Y here if you want the CBA to produce a bunch of debug messages
+	  to the system log. Select this if you are having a problem with
+	  CBA support and want to see more of what is going on.
+
+source "drivers/staging/wusbcore/host/Kconfig"
diff --git a/drivers/staging/wusbcore/Makefile b/drivers/staging/wusbcore/Makefile
new file mode 100644
index 000000000000..b47b874268ac
--- /dev/null
+++ b/drivers/staging/wusbcore/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-$(CONFIG_USB_WUSB_CBAF_DEBUG) := -DDEBUG
+
+obj-$(CONFIG_USB_WUSB)		+= wusbcore.o
+obj-$(CONFIG_USB_HWA_HCD)	+= wusb-wa.o
+obj-$(CONFIG_USB_WUSB_CBAF)	+= wusb-cbaf.o
+
+
+wusbcore-y :=		\
+	crypto.o	\
+	devconnect.o	\
+	dev-sysfs.o	\
+	mmc.o		\
+	pal.o		\
+	rh.o		\
+	reservation.o	\
+	security.o	\
+	wusbhc.o
+
+wusb-cbaf-y := cbaf.o
+
+wusb-wa-y :=		\
+	wa-hc.o		\
+	wa-nep.o	\
+	wa-rpipe.o	\
+	wa-xfer.o
+
+obj-y	+= host/
diff --git a/drivers/staging/wusbcore/TODO b/drivers/staging/wusbcore/TODO
new file mode 100644
index 000000000000..abae57000534
--- /dev/null
+++ b/drivers/staging/wusbcore/TODO
@@ -0,0 +1,8 @@
+TODO: Remove in late 2019 unless there are users
+
+There seems to not be any real wireless USB devices anywhere in the wild
+anymore.  It turned out to be a failed technology :(
+
+This will be removed from the tree if no one objects.
+
+Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/staging/wusbcore/cbaf.c b/drivers/staging/wusbcore/cbaf.c
new file mode 100644
index 000000000000..57062eaf7558
--- /dev/null
+++ b/drivers/staging/wusbcore/cbaf.c
@@ -0,0 +1,645 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB - Cable Based Association
+ *
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ *
+ * WUSB devices have to be paired (associated in WUSB lingo) so
+ * that they can connect to the system.
+ *
+ * One way of pairing is using CBA-Cable Based Association. First
+ * time you plug the device with a cable, association is done between
+ * host and device and subsequent times, you can connect wirelessly
+ * without having to associate again. That's the idea.
+ *
+ * This driver does nothing Earth shattering. It just provides an
+ * interface to chat with the wire-connected device so we can get a
+ * CDID (device ID) that might have been previously associated to a
+ * CHID (host ID) and to set up a new <CHID,CDID,CK> triplet
+ * (connection context), with the CK being the secret, or connection
+ * key. This is the pairing data.
+ *
+ * When a device with the CBA capability connects, the probe routine
+ * just creates a bunch of sysfs files that a user space enumeration
+ * manager uses to allow it to connect wirelessly to the system or not.
+ *
+ * The process goes like this:
+ *
+ * 1. Device plugs, cbaf is loaded, notifications happen.
+ *
+ * 2. The connection manager (CM) sees a device with CBAF capability
+ *    (the wusb_chid etc. files in /sys/devices/blah/OURDEVICE).
+ *
+ * 3. The CM writes the host name, supported band groups, and the CHID
+ *    (host ID) into the wusb_host_name, wusb_host_band_groups and
+ *    wusb_chid files. These get sent to the device and the CDID (if
+ *    any) for this host is requested.
+ *
+ * 4. The CM can verify that the device's supported band groups
+ *    (wusb_device_band_groups) are compatible with the host.
+ *
+ * 5. The CM reads the wusb_cdid file.
+ *
+ * 6. The CM looks up its database
+ *
+ * 6.1 If it has a matching CHID,CDID entry, the device has been
+ *     authorized before (paired) and nothing further needs to be
+ *     done.
+ *
+ * 6.2 If the CDID is zero (or the CM doesn't find a matching CDID in
+ *     its database), the device is assumed to be not known.  The CM
+ *     may associate the host with device by: writing a randomly
+ *     generated CDID to wusb_cdid and then a random CK to wusb_ck
+ *     (this uploads the new CC to the device).
+ *
+ *     CMD may choose to prompt the user before associating with a new
+ *     device.
+ *
+ * 7. Device is unplugged.
+ *
+ * When the device tries to connect wirelessly, it will present its
+ * CDID to the WUSB host controller.  The CM will query the
+ * database. If the CHID/CDID pair found, it will (with a 4-way
+ * handshake) challenge the device to demonstrate it has the CK secret
+ * key (from our database) without actually exchanging it. Once
+ * satisfied, crypto keys are derived from the CK, the device is
+ * connected and all communication is encrypted.
+ *
+ * References:
+ *   [WUSB-AM] Association Models Supplement to the Certified Wireless
+ *             Universal Serial Bus Specification, version 1.0.
+ */
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/usb.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include "../uwb/uwb.h"
+#include "include/wusb.h"
+#include "include/association.h"
+
+#define CBA_NAME_LEN 0x40 /* [WUSB-AM] table 4-7 */
+
+/* An instance of a Cable-Based-Association-Framework device */
+struct cbaf {
+	struct usb_device *usb_dev;
+	struct usb_interface *usb_iface;
+	void *buffer;
+	size_t buffer_size;
+
+	struct wusb_ckhdid chid;
+	char host_name[CBA_NAME_LEN];
+	u16 host_band_groups;
+
+	struct wusb_ckhdid cdid;
+	char device_name[CBA_NAME_LEN];
+	u16 device_band_groups;
+
+	struct wusb_ckhdid ck;
+};
+
+/*
+ * Verify that a CBAF USB-interface has what we need
+ *
+ * According to [WUSB-AM], CBA devices should provide at least two
+ * interfaces:
+ *  - RETRIEVE_HOST_INFO
+ *  - ASSOCIATE
+ *
+ * If the device doesn't provide these interfaces, we do not know how
+ * to deal with it.
+ */
+static int cbaf_check(struct cbaf *cbaf)
+{
+	int result;
+	struct device *dev = &cbaf->usb_iface->dev;
+	struct wusb_cbaf_assoc_info *assoc_info;
+	struct wusb_cbaf_assoc_request *assoc_request;
+	size_t assoc_size;
+	void *itr, *top;
+	int ar_rhi = 0, ar_assoc = 0;
+
+	result = usb_control_msg(
+		cbaf->usb_dev, usb_rcvctrlpipe(cbaf->usb_dev, 0),
+		CBAF_REQ_GET_ASSOCIATION_INFORMATION,
+		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+		0, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+		cbaf->buffer, cbaf->buffer_size, USB_CTRL_GET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Cannot get available association types: %d\n",
+			result);
+		return result;
+	}
+
+	assoc_info = cbaf->buffer;
+	if (result < sizeof(*assoc_info)) {
+		dev_err(dev, "Not enough data to decode association info "
+			"header (%zu vs %zu bytes required)\n",
+			(size_t)result, sizeof(*assoc_info));
+		return result;
+	}
+
+	assoc_size = le16_to_cpu(assoc_info->Length);
+	if (result < assoc_size) {
+		dev_err(dev, "Not enough data to decode association info "
+			"(%zu vs %zu bytes required)\n",
+			(size_t)assoc_size, sizeof(*assoc_info));
+		return result;
+	}
+	/*
+	 * From now on, we just verify, but won't error out unless we
+	 * don't find the AR_TYPE_WUSB_{RETRIEVE_HOST_INFO,ASSOCIATE}
+	 * types.
+	 */
+	itr = cbaf->buffer + sizeof(*assoc_info);
+	top = cbaf->buffer + assoc_size;
+	dev_dbg(dev, "Found %u association requests (%zu bytes)\n",
+		 assoc_info->NumAssociationRequests, assoc_size);
+
+	while (itr < top) {
+		u16 ar_type, ar_subtype;
+		u32 ar_size;
+		const char *ar_name;
+
+		assoc_request = itr;
+
+		if (top - itr < sizeof(*assoc_request)) {
+			dev_err(dev, "Not enough data to decode association "
+				"request (%zu vs %zu bytes needed)\n",
+				top - itr, sizeof(*assoc_request));
+			break;
+		}
+
+		ar_type = le16_to_cpu(assoc_request->AssociationTypeId);
+		ar_subtype = le16_to_cpu(assoc_request->AssociationSubTypeId);
+		ar_size = le32_to_cpu(assoc_request->AssociationTypeInfoSize);
+		ar_name = "unknown";
+
+		switch (ar_type) {
+		case AR_TYPE_WUSB:
+			/* Verify we have what is mandated by [WUSB-AM]. */
+			switch (ar_subtype) {
+			case AR_TYPE_WUSB_RETRIEVE_HOST_INFO:
+				ar_name = "RETRIEVE_HOST_INFO";
+				ar_rhi = 1;
+				break;
+			case AR_TYPE_WUSB_ASSOCIATE:
+				/* send assoc data */
+				ar_name = "ASSOCIATE";
+				ar_assoc = 1;
+				break;
+			}
+			break;
+		}
+
+		dev_dbg(dev, "Association request #%02u: 0x%04x/%04x "
+			 "(%zu bytes): %s\n",
+			 assoc_request->AssociationDataIndex, ar_type,
+			 ar_subtype, (size_t)ar_size, ar_name);
+
+		itr += sizeof(*assoc_request);
+	}
+
+	if (!ar_rhi) {
+		dev_err(dev, "Missing RETRIEVE_HOST_INFO association "
+			"request\n");
+		return -EINVAL;
+	}
+	if (!ar_assoc) {
+		dev_err(dev, "Missing ASSOCIATE association request\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct wusb_cbaf_host_info cbaf_host_info_defaults = {
+	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
+	.AssociationTypeId	  = cpu_to_le16(AR_TYPE_WUSB),
+	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
+	.AssociationSubTypeId = cpu_to_le16(AR_TYPE_WUSB_RETRIEVE_HOST_INFO),
+	.CHID_hdr                 = WUSB_AR_CHID,
+	.LangID_hdr               = WUSB_AR_LangID,
+	.HostFriendlyName_hdr     = WUSB_AR_HostFriendlyName,
+};
+
+/* Send WUSB host information (CHID and name) to a CBAF device */
+static int cbaf_send_host_info(struct cbaf *cbaf)
+{
+	struct wusb_cbaf_host_info *hi;
+	size_t name_len;
+	size_t hi_size;
+
+	hi = cbaf->buffer;
+	memset(hi, 0, sizeof(*hi));
+	*hi = cbaf_host_info_defaults;
+	hi->CHID = cbaf->chid;
+	hi->LangID = 0;	/* FIXME: I guess... */
+	strlcpy(hi->HostFriendlyName, cbaf->host_name, CBA_NAME_LEN);
+	name_len = strlen(cbaf->host_name);
+	hi->HostFriendlyName_hdr.len = cpu_to_le16(name_len);
+	hi_size = sizeof(*hi) + name_len;
+
+	return usb_control_msg(cbaf->usb_dev,
+			usb_sndctrlpipe(cbaf->usb_dev, 0),
+			CBAF_REQ_SET_ASSOCIATION_RESPONSE,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			0x0101,
+			cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			hi, hi_size, USB_CTRL_SET_TIMEOUT);
+}
+
+/*
+ * Get device's information (CDID) associated to CHID
+ *
+ * The device will return it's information (CDID, name, bandgroups)
+ * associated to the CHID we have set before, or 0 CDID and default
+ * name and bandgroup if no CHID set or unknown.
+ */
+static int cbaf_cdid_get(struct cbaf *cbaf)
+{
+	int result;
+	struct device *dev = &cbaf->usb_iface->dev;
+	struct wusb_cbaf_device_info *di;
+	size_t needed;
+
+	di = cbaf->buffer;
+	result = usb_control_msg(
+		cbaf->usb_dev, usb_rcvctrlpipe(cbaf->usb_dev, 0),
+		CBAF_REQ_GET_ASSOCIATION_REQUEST,
+		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+		0x0200, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+		di, cbaf->buffer_size, USB_CTRL_GET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Cannot request device information: %d\n",
+			result);
+		return result;
+	}
+
+	needed = result < sizeof(*di) ? sizeof(*di) : le32_to_cpu(di->Length);
+	if (result < needed) {
+		dev_err(dev, "Not enough data in DEVICE_INFO reply (%zu vs "
+			"%zu bytes needed)\n", (size_t)result, needed);
+		return -ENOENT;
+	}
+
+	strlcpy(cbaf->device_name, di->DeviceFriendlyName, CBA_NAME_LEN);
+	cbaf->cdid = di->CDID;
+	cbaf->device_band_groups = le16_to_cpu(di->BandGroups);
+
+	return 0;
+}
+
+static ssize_t cbaf_wusb_chid_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return sprintf(buf, "%16ph\n", cbaf->chid.data);
+}
+
+static ssize_t cbaf_wusb_chid_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t size)
+{
+	ssize_t result;
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	result = sscanf(buf,
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx",
+			&cbaf->chid.data[0] , &cbaf->chid.data[1],
+			&cbaf->chid.data[2] , &cbaf->chid.data[3],
+			&cbaf->chid.data[4] , &cbaf->chid.data[5],
+			&cbaf->chid.data[6] , &cbaf->chid.data[7],
+			&cbaf->chid.data[8] , &cbaf->chid.data[9],
+			&cbaf->chid.data[10], &cbaf->chid.data[11],
+			&cbaf->chid.data[12], &cbaf->chid.data[13],
+			&cbaf->chid.data[14], &cbaf->chid.data[15]);
+
+	if (result != 16)
+		return -EINVAL;
+
+	result = cbaf_send_host_info(cbaf);
+	if (result < 0)
+		return result;
+	result = cbaf_cdid_get(cbaf);
+	if (result < 0)
+		return result;
+	return size;
+}
+static DEVICE_ATTR(wusb_chid, 0600, cbaf_wusb_chid_show, cbaf_wusb_chid_store);
+
+static ssize_t cbaf_wusb_host_name_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", cbaf->host_name);
+}
+
+static ssize_t cbaf_wusb_host_name_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t size)
+{
+	ssize_t result;
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	result = sscanf(buf, "%63s", cbaf->host_name);
+	if (result != 1)
+		return -EINVAL;
+
+	return size;
+}
+static DEVICE_ATTR(wusb_host_name, 0600, cbaf_wusb_host_name_show,
+					 cbaf_wusb_host_name_store);
+
+static ssize_t cbaf_wusb_host_band_groups_show(struct device *dev,
+					       struct device_attribute *attr,
+					       char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%04x\n", cbaf->host_band_groups);
+}
+
+static ssize_t cbaf_wusb_host_band_groups_store(struct device *dev,
+						struct device_attribute *attr,
+						const char *buf, size_t size)
+{
+	ssize_t result;
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+	u16 band_groups = 0;
+
+	result = sscanf(buf, "%04hx", &band_groups);
+	if (result != 1)
+		return -EINVAL;
+
+	cbaf->host_band_groups = band_groups;
+
+	return size;
+}
+
+static DEVICE_ATTR(wusb_host_band_groups, 0600,
+		   cbaf_wusb_host_band_groups_show,
+		   cbaf_wusb_host_band_groups_store);
+
+static const struct wusb_cbaf_device_info cbaf_device_info_defaults = {
+	.Length_hdr               = WUSB_AR_Length,
+	.CDID_hdr                 = WUSB_AR_CDID,
+	.BandGroups_hdr           = WUSB_AR_BandGroups,
+	.LangID_hdr               = WUSB_AR_LangID,
+	.DeviceFriendlyName_hdr   = WUSB_AR_DeviceFriendlyName,
+};
+
+static ssize_t cbaf_wusb_cdid_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return sprintf(buf, "%16ph\n", cbaf->cdid.data);
+}
+
+static ssize_t cbaf_wusb_cdid_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t size)
+{
+	ssize_t result;
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+	struct wusb_ckhdid cdid;
+
+	result = sscanf(buf,
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx",
+			&cdid.data[0] , &cdid.data[1],
+			&cdid.data[2] , &cdid.data[3],
+			&cdid.data[4] , &cdid.data[5],
+			&cdid.data[6] , &cdid.data[7],
+			&cdid.data[8] , &cdid.data[9],
+			&cdid.data[10], &cdid.data[11],
+			&cdid.data[12], &cdid.data[13],
+			&cdid.data[14], &cdid.data[15]);
+	if (result != 16)
+		return -EINVAL;
+
+	cbaf->cdid = cdid;
+
+	return size;
+}
+static DEVICE_ATTR(wusb_cdid, 0600, cbaf_wusb_cdid_show, cbaf_wusb_cdid_store);
+
+static ssize_t cbaf_wusb_device_band_groups_show(struct device *dev,
+						 struct device_attribute *attr,
+						 char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%04x\n", cbaf->device_band_groups);
+}
+
+static DEVICE_ATTR(wusb_device_band_groups, 0600,
+		   cbaf_wusb_device_band_groups_show,
+		   NULL);
+
+static ssize_t cbaf_wusb_device_name_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", cbaf->device_name);
+}
+static DEVICE_ATTR(wusb_device_name, 0600, cbaf_wusb_device_name_show, NULL);
+
+static const struct wusb_cbaf_cc_data cbaf_cc_data_defaults = {
+	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
+	.AssociationTypeId	  = cpu_to_le16(AR_TYPE_WUSB),
+	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
+	.AssociationSubTypeId     = cpu_to_le16(AR_TYPE_WUSB_ASSOCIATE),
+	.Length_hdr               = WUSB_AR_Length,
+	.Length		= cpu_to_le32(sizeof(struct wusb_cbaf_cc_data)),
+	.ConnectionContext_hdr    = WUSB_AR_ConnectionContext,
+	.BandGroups_hdr           = WUSB_AR_BandGroups,
+};
+
+static const struct wusb_cbaf_cc_data_fail cbaf_cc_data_fail_defaults = {
+	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
+	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
+	.Length_hdr               = WUSB_AR_Length,
+	.AssociationStatus_hdr    = WUSB_AR_AssociationStatus,
+};
+
+/*
+ * Send a new CC to the device.
+ */
+static int cbaf_cc_upload(struct cbaf *cbaf)
+{
+	int result;
+	struct device *dev = &cbaf->usb_iface->dev;
+	struct wusb_cbaf_cc_data *ccd;
+
+	ccd =  cbaf->buffer;
+	*ccd = cbaf_cc_data_defaults;
+	ccd->CHID = cbaf->chid;
+	ccd->CDID = cbaf->cdid;
+	ccd->CK = cbaf->ck;
+	ccd->BandGroups = cpu_to_le16(cbaf->host_band_groups);
+
+	dev_dbg(dev, "Trying to upload CC:\n");
+	dev_dbg(dev, "  CHID       %16ph\n", ccd->CHID.data);
+	dev_dbg(dev, "  CDID       %16ph\n", ccd->CDID.data);
+	dev_dbg(dev, "  Bandgroups 0x%04x\n", cbaf->host_band_groups);
+
+	result = usb_control_msg(
+		cbaf->usb_dev, usb_sndctrlpipe(cbaf->usb_dev, 0),
+		CBAF_REQ_SET_ASSOCIATION_RESPONSE,
+		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+		0x0201, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+		ccd, sizeof(*ccd), USB_CTRL_SET_TIMEOUT);
+
+	return result;
+}
+
+static ssize_t cbaf_wusb_ck_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t size)
+{
+	ssize_t result;
+	struct usb_interface *iface = to_usb_interface(dev);
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+
+	result = sscanf(buf,
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx",
+			&cbaf->ck.data[0] , &cbaf->ck.data[1],
+			&cbaf->ck.data[2] , &cbaf->ck.data[3],
+			&cbaf->ck.data[4] , &cbaf->ck.data[5],
+			&cbaf->ck.data[6] , &cbaf->ck.data[7],
+			&cbaf->ck.data[8] , &cbaf->ck.data[9],
+			&cbaf->ck.data[10], &cbaf->ck.data[11],
+			&cbaf->ck.data[12], &cbaf->ck.data[13],
+			&cbaf->ck.data[14], &cbaf->ck.data[15]);
+	if (result != 16)
+		return -EINVAL;
+
+	result = cbaf_cc_upload(cbaf);
+	if (result < 0)
+		return result;
+
+	return size;
+}
+static DEVICE_ATTR(wusb_ck, 0600, NULL, cbaf_wusb_ck_store);
+
+static struct attribute *cbaf_dev_attrs[] = {
+	&dev_attr_wusb_host_name.attr,
+	&dev_attr_wusb_host_band_groups.attr,
+	&dev_attr_wusb_chid.attr,
+	&dev_attr_wusb_cdid.attr,
+	&dev_attr_wusb_device_name.attr,
+	&dev_attr_wusb_device_band_groups.attr,
+	&dev_attr_wusb_ck.attr,
+	NULL,
+};
+
+static const struct attribute_group cbaf_dev_attr_group = {
+	.name = NULL,	/* we want them in the same directory */
+	.attrs = cbaf_dev_attrs,
+};
+
+static int cbaf_probe(struct usb_interface *iface,
+		      const struct usb_device_id *id)
+{
+	struct cbaf *cbaf;
+	struct device *dev = &iface->dev;
+	int result = -ENOMEM;
+
+	cbaf = kzalloc(sizeof(*cbaf), GFP_KERNEL);
+	if (cbaf == NULL)
+		goto error_kzalloc;
+	cbaf->buffer = kmalloc(512, GFP_KERNEL);
+	if (cbaf->buffer == NULL)
+		goto error_kmalloc_buffer;
+
+	cbaf->buffer_size = 512;
+	cbaf->usb_dev = usb_get_dev(interface_to_usbdev(iface));
+	cbaf->usb_iface = usb_get_intf(iface);
+	result = cbaf_check(cbaf);
+	if (result < 0) {
+		dev_err(dev, "This device is not WUSB-CBAF compliant and is not supported yet.\n");
+		goto error_check;
+	}
+
+	result = sysfs_create_group(&dev->kobj, &cbaf_dev_attr_group);
+	if (result < 0) {
+		dev_err(dev, "Can't register sysfs attr group: %d\n", result);
+		goto error_create_group;
+	}
+	usb_set_intfdata(iface, cbaf);
+	return 0;
+
+error_create_group:
+error_check:
+	usb_put_intf(iface);
+	usb_put_dev(cbaf->usb_dev);
+	kfree(cbaf->buffer);
+error_kmalloc_buffer:
+	kfree(cbaf);
+error_kzalloc:
+	return result;
+}
+
+static void cbaf_disconnect(struct usb_interface *iface)
+{
+	struct cbaf *cbaf = usb_get_intfdata(iface);
+	struct device *dev = &iface->dev;
+	sysfs_remove_group(&dev->kobj, &cbaf_dev_attr_group);
+	usb_set_intfdata(iface, NULL);
+	usb_put_intf(iface);
+	usb_put_dev(cbaf->usb_dev);
+	kfree(cbaf->buffer);
+	/* paranoia: clean up crypto keys */
+	kzfree(cbaf);
+}
+
+static const struct usb_device_id cbaf_id_table[] = {
+	{ USB_INTERFACE_INFO(0xef, 0x03, 0x01), },
+	{ },
+};
+MODULE_DEVICE_TABLE(usb, cbaf_id_table);
+
+static struct usb_driver cbaf_driver = {
+	.name =		"wusb-cbaf",
+	.id_table =	cbaf_id_table,
+	.probe =	cbaf_probe,
+	.disconnect =	cbaf_disconnect,
+};
+
+module_usb_driver(cbaf_driver);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Wireless USB Cable Based Association");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/crypto.c b/drivers/staging/wusbcore/crypto.c
new file mode 100644
index 000000000000..d7d55ed19a98
--- /dev/null
+++ b/drivers/staging/wusbcore/crypto.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultra Wide Band
+ * AES-128 CCM Encryption
+ *
+ * Copyright (C) 2007 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * We don't do any encryption here; we use the Linux Kernel's AES-128
+ * crypto modules to construct keys and payload blocks in a way
+ * defined by WUSB1.0[6]. Check the erratas, as typos are are patched
+ * there.
+ *
+ * Thanks a zillion to John Keys for his help and clarifications over
+ * the designed-by-a-committee text.
+ *
+ * So the idea is that there is this basic Pseudo-Random-Function
+ * defined in WUSB1.0[6.5] which is the core of everything. It works
+ * by tweaking some blocks, AES crypting them and then xoring
+ * something else with them (this seems to be called CBC(AES) -- can
+ * you tell I know jack about crypto?). So we just funnel it into the
+ * Linux Crypto API.
+ *
+ * We leave a crypto test module so we can verify that vectors match,
+ * every now and then.
+ *
+ * Block size: 16 bytes -- AES seems to do things in 'block sizes'. I
+ *             am learning a lot...
+ *
+ *             Conveniently, some data structures that need to be
+ *             funneled through AES are...16 bytes in size!
+ */
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include "../uwb/uwb.h"
+#include "include/wusb.h"
+
+static int debug_crypto_verify;
+
+module_param(debug_crypto_verify, int, 0);
+MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms");
+
+static void wusb_key_dump(const void *buf, size_t len)
+{
+	print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_OFFSET, 16, 1,
+		       buf, len, 0);
+}
+
+/*
+ * Block of data, as understood by AES-CCM
+ *
+ * The code assumes this structure is nothing but a 16 byte array
+ * (packed in a struct to avoid common mess ups that I usually do with
+ * arrays and enforcing type checking).
+ */
+struct aes_ccm_block {
+	u8 data[16];
+} __attribute__((packed));
+
+/*
+ * Counter-mode Blocks (WUSB1.0[6.4])
+ *
+ * According to CCM (or so it seems), for the purpose of calculating
+ * the MIC, the message is broken in N counter-mode blocks, B0, B1,
+ * ... BN.
+ *
+ * B0 contains flags, the CCM nonce and l(m).
+ *
+ * B1 contains l(a), the MAC header, the encryption offset and padding.
+ *
+ * If EO is nonzero, additional blocks are built from payload bytes
+ * until EO is exhausted (FIXME: padding to 16 bytes, I guess). The
+ * padding is not xmitted.
+ */
+
+/* WUSB1.0[T6.4] */
+struct aes_ccm_b0 {
+	u8 flags;	/* 0x59, per CCM spec */
+	struct aes_ccm_nonce ccm_nonce;
+	__be16 lm;
+} __attribute__((packed));
+
+/* WUSB1.0[T6.5] */
+struct aes_ccm_b1 {
+	__be16 la;
+	u8 mac_header[10];
+	__le16 eo;
+	u8 security_reserved;	/* This is always zero */
+	u8 padding;		/* 0 */
+} __attribute__((packed));
+
+/*
+ * Encryption Blocks (WUSB1.0[6.4.4])
+ *
+ * CCM uses Ax blocks to generate a keystream with which the MIC and
+ * the message's payload are encoded. A0 always encrypts/decrypts the
+ * MIC. Ax (x>0) are used for the successive payload blocks.
+ *
+ * The x is the counter, and is increased for each block.
+ */
+struct aes_ccm_a {
+	u8 flags;	/* 0x01, per CCM spec */
+	struct aes_ccm_nonce ccm_nonce;
+	__be16 counter;	/* Value of x */
+} __attribute__((packed));
+
+/* Scratch space for MAC calculations. */
+struct wusb_mac_scratch {
+	struct aes_ccm_b0 b0;
+	struct aes_ccm_b1 b1;
+	struct aes_ccm_a ax;
+};
+
+/*
+ * CC-MAC function WUSB1.0[6.5]
+ *
+ * Take a data string and produce the encrypted CBC Counter-mode MIC
+ *
+ * Note the names for most function arguments are made to (more or
+ * less) match those used in the pseudo-function definition given in
+ * WUSB1.0[6.5].
+ *
+ * @tfm_cbc: CBC(AES) blkcipher handle (initialized)
+ *
+ * @tfm_aes: AES cipher handle (initialized)
+ *
+ * @mic: buffer for placing the computed MIC (Message Integrity
+ *       Code). This is exactly 8 bytes, and we expect the buffer to
+ *       be at least eight bytes in length.
+ *
+ * @key: 128 bit symmetric key
+ *
+ * @n: CCM nonce
+ *
+ * @a: ASCII string, 14 bytes long (I guess zero padded if needed;
+ *     we use exactly 14 bytes).
+ *
+ * @b: data stream to be processed
+ *
+ * @blen: size of b...
+ *
+ * Still not very clear how this is done, but looks like this: we
+ * create block B0 (as WUSB1.0[6.5] says), then we AES-crypt it with
+ * @key. We bytewise xor B0 with B1 (1) and AES-crypt that. Then we
+ * take the payload and divide it in blocks (16 bytes), xor them with
+ * the previous crypto result (16 bytes) and crypt it, repeat the next
+ * block with the output of the previous one, rinse wash. So we use
+ * the CBC-MAC(AES) shash, that does precisely that. The IV (Initial
+ * Vector) is 16 bytes and is set to zero, so
+ *
+ * (1) Created as 6.5 says, again, using as l(a) 'Blen + 14', and
+ *     using the 14 bytes of @a to fill up
+ *     b1.{mac_header,e0,security_reserved,padding}.
+ *
+ * NOTE: The definition of l(a) in WUSB1.0[6.5] vs the definition of
+ *       l(m) is orthogonal, they bear no relationship, so it is not
+ *       in conflict with the parameter's relation that
+ *       WUSB1.0[6.4.2]) defines.
+ *
+ * NOTE: WUSB1.0[A.1]: Host Nonce is missing a nibble? (1e); fixed in
+ *       first errata released on 2005/07.
+ *
+ * NOTE: we need to clean IV to zero at each invocation to make sure
+ *       we start with a fresh empty Initial Vector, so that the CBC
+ *       works ok.
+ *
+ * NOTE: blen is not aligned to a block size, we'll pad zeros, that's
+ *       what sg[4] is for. Maybe there is a smarter way to do this.
+ */
+static int wusb_ccm_mac(struct crypto_shash *tfm_cbcmac,
+			struct wusb_mac_scratch *scratch,
+			void *mic,
+			const struct aes_ccm_nonce *n,
+			const struct aes_ccm_label *a, const void *b,
+			size_t blen)
+{
+	SHASH_DESC_ON_STACK(desc, tfm_cbcmac);
+	u8 iv[AES_BLOCK_SIZE];
+
+	/*
+	 * These checks should be compile time optimized out
+	 * ensure @a fills b1's mac_header and following fields
+	 */
+	BUILD_BUG_ON(sizeof(*a) != sizeof(scratch->b1) - sizeof(scratch->b1.la));
+	BUILD_BUG_ON(sizeof(scratch->b0) != sizeof(struct aes_ccm_block));
+	BUILD_BUG_ON(sizeof(scratch->b1) != sizeof(struct aes_ccm_block));
+	BUILD_BUG_ON(sizeof(scratch->ax) != sizeof(struct aes_ccm_block));
+
+	/* Setup B0 */
+	scratch->b0.flags = 0x59;	/* Format B0 */
+	scratch->b0.ccm_nonce = *n;
+	scratch->b0.lm = cpu_to_be16(0);	/* WUSB1.0[6.5] sez l(m) is 0 */
+
+	/* Setup B1
+	 *
+	 * The WUSB spec is anything but clear! WUSB1.0[6.5]
+	 * says that to initialize B1 from A with 'l(a) = blen +
+	 * 14'--after clarification, it means to use A's contents
+	 * for MAC Header, EO, sec reserved and padding.
+	 */
+	scratch->b1.la = cpu_to_be16(blen + 14);
+	memcpy(&scratch->b1.mac_header, a, sizeof(*a));
+
+	desc->tfm = tfm_cbcmac;
+	crypto_shash_init(desc);
+	crypto_shash_update(desc, (u8 *)&scratch->b0, sizeof(scratch->b0) +
+						      sizeof(scratch->b1));
+	crypto_shash_finup(desc, b, blen, iv);
+
+	/* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5]
+	 * The procedure is to AES crypt the A0 block and XOR the MIC
+	 * Tag against it; we only do the first 8 bytes and place it
+	 * directly in the destination buffer.
+	 */
+	scratch->ax.flags = 0x01;		/* as per WUSB 1.0 spec */
+	scratch->ax.ccm_nonce = *n;
+	scratch->ax.counter = 0;
+
+	/* reuse the CBC-MAC transform to perform the single block encryption */
+	crypto_shash_digest(desc, (u8 *)&scratch->ax, sizeof(scratch->ax),
+			    (u8 *)&scratch->ax);
+
+	crypto_xor_cpy(mic, (u8 *)&scratch->ax, iv, 8);
+
+	return 8;
+}
+
+/*
+ * WUSB Pseudo Random Function (WUSB1.0[6.5])
+ *
+ * @b: buffer to the source data; cannot be a global or const local
+ *     (will confuse the scatterlists)
+ */
+ssize_t wusb_prf(void *out, size_t out_size,
+		 const u8 key[16], const struct aes_ccm_nonce *_n,
+		 const struct aes_ccm_label *a,
+		 const void *b, size_t blen, size_t len)
+{
+	ssize_t result, bytes = 0, bitr;
+	struct aes_ccm_nonce n = *_n;
+	struct crypto_shash *tfm_cbcmac;
+	struct wusb_mac_scratch scratch;
+	u64 sfn = 0;
+	__le64 sfn_le;
+
+	tfm_cbcmac = crypto_alloc_shash("cbcmac(aes)", 0, 0);
+	if (IS_ERR(tfm_cbcmac)) {
+		result = PTR_ERR(tfm_cbcmac);
+		printk(KERN_ERR "E: can't load CBCMAC-AES: %d\n", (int)result);
+		goto error_alloc_cbcmac;
+	}
+
+	result = crypto_shash_setkey(tfm_cbcmac, key, AES_BLOCK_SIZE);
+	if (result < 0) {
+		printk(KERN_ERR "E: can't set CBCMAC-AES key: %d\n", (int)result);
+		goto error_setkey_cbcmac;
+	}
+
+	for (bitr = 0; bitr < (len + 63) / 64; bitr++) {
+		sfn_le = cpu_to_le64(sfn++);
+		memcpy(&n.sfn, &sfn_le, sizeof(n.sfn));	/* n.sfn++... */
+		result = wusb_ccm_mac(tfm_cbcmac, &scratch, out + bytes,
+				      &n, a, b, blen);
+		if (result < 0)
+			goto error_ccm_mac;
+		bytes += result;
+	}
+	result = bytes;
+
+error_ccm_mac:
+error_setkey_cbcmac:
+	crypto_free_shash(tfm_cbcmac);
+error_alloc_cbcmac:
+	return result;
+}
+
+/* WUSB1.0[A.2] test vectors */
+static const u8 stv_hsmic_key[16] = {
+	0x4b, 0x79, 0xa3, 0xcf, 0xe5, 0x53, 0x23, 0x9d,
+	0xd7, 0xc1, 0x6d, 0x1c, 0x2d, 0xab, 0x6d, 0x3f
+};
+
+static const struct aes_ccm_nonce stv_hsmic_n = {
+	.sfn = { 0 },
+	.tkid = { 0x76, 0x98, 0x01,  },
+	.dest_addr = { .data = { 0xbe, 0x00 } },
+		.src_addr = { .data = { 0x76, 0x98 } },
+};
+
+/*
+ * Out-of-band MIC Generation verification code
+ *
+ */
+static int wusb_oob_mic_verify(void)
+{
+	int result;
+	u8 mic[8];
+	/* WUSB1.0[A.2] test vectors */
+	static const struct usb_handshake stv_hsmic_hs = {
+		.bMessageNumber = 2,
+		.bStatus 	= 00,
+		.tTKID 		= { 0x76, 0x98, 0x01 },
+		.bReserved 	= 00,
+		.CDID 		= { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
+				    0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
+				    0x3c, 0x3d, 0x3e, 0x3f },
+		.nonce	 	= { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
+				    0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
+				    0x2c, 0x2d, 0x2e, 0x2f },
+		.MIC	 	= { 0x75, 0x6a, 0x97, 0x51, 0x0c, 0x8c,
+				    0x14, 0x7b },
+	};
+	size_t hs_size;
+
+	result = wusb_oob_mic(mic, stv_hsmic_key, &stv_hsmic_n, &stv_hsmic_hs);
+	if (result < 0)
+		printk(KERN_ERR "E: WUSB OOB MIC test: failed: %d\n", result);
+	else if (memcmp(stv_hsmic_hs.MIC, mic, sizeof(mic))) {
+		printk(KERN_ERR "E: OOB MIC test: "
+		       "mismatch between MIC result and WUSB1.0[A2]\n");
+		hs_size = sizeof(stv_hsmic_hs) - sizeof(stv_hsmic_hs.MIC);
+		printk(KERN_ERR "E: Handshake2 in: (%zu bytes)\n", hs_size);
+		wusb_key_dump(&stv_hsmic_hs, hs_size);
+		printk(KERN_ERR "E: CCM Nonce in: (%zu bytes)\n",
+		       sizeof(stv_hsmic_n));
+		wusb_key_dump(&stv_hsmic_n, sizeof(stv_hsmic_n));
+		printk(KERN_ERR "E: MIC out:\n");
+		wusb_key_dump(mic, sizeof(mic));
+		printk(KERN_ERR "E: MIC out (from WUSB1.0[A.2]):\n");
+		wusb_key_dump(stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
+		result = -EINVAL;
+	} else
+		result = 0;
+	return result;
+}
+
+/*
+ * Test vectors for Key derivation
+ *
+ * These come from WUSB1.0[6.5.1], the vectors in WUSB1.0[A.1]
+ * (errata corrected in 2005/07).
+ */
+static const u8 stv_key_a1[16] __attribute__ ((__aligned__(4))) = {
+	0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87,
+	0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f
+};
+
+static const struct aes_ccm_nonce stv_keydvt_n_a1 = {
+	.sfn = { 0 },
+	.tkid = { 0x76, 0x98, 0x01,  },
+	.dest_addr = { .data = { 0xbe, 0x00 } },
+	.src_addr = { .data = { 0x76, 0x98 } },
+};
+
+static const struct wusb_keydvt_out stv_keydvt_out_a1 = {
+	.kck = {
+		0x4b, 0x79, 0xa3, 0xcf, 0xe5, 0x53, 0x23, 0x9d,
+		0xd7, 0xc1, 0x6d, 0x1c, 0x2d, 0xab, 0x6d, 0x3f
+	},
+	.ptk = {
+		0xc8, 0x70, 0x62, 0x82, 0xb6, 0x7c, 0xe9, 0x06,
+		0x7b, 0xc5, 0x25, 0x69, 0xf2, 0x36, 0x61, 0x2d
+	}
+};
+
+/*
+ * Performa a test to make sure we match the vectors defined in
+ * WUSB1.0[A.1](Errata2006/12)
+ */
+static int wusb_key_derive_verify(void)
+{
+	int result = 0;
+	struct wusb_keydvt_out keydvt_out;
+	/* These come from WUSB1.0[A.1] + 2006/12 errata */
+	static const struct wusb_keydvt_in stv_keydvt_in_a1 = {
+		.hnonce = {
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+			0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+		},
+		.dnonce = {
+			0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+			0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f
+		}
+	};
+
+	result = wusb_key_derive(&keydvt_out, stv_key_a1, &stv_keydvt_n_a1,
+				 &stv_keydvt_in_a1);
+	if (result < 0)
+		printk(KERN_ERR "E: WUSB key derivation test: "
+		       "derivation failed: %d\n", result);
+	if (memcmp(&stv_keydvt_out_a1, &keydvt_out, sizeof(keydvt_out))) {
+		printk(KERN_ERR "E: WUSB key derivation test: "
+		       "mismatch between key derivation result "
+		       "and WUSB1.0[A1] Errata 2006/12\n");
+		printk(KERN_ERR "E: keydvt in: key\n");
+		wusb_key_dump(stv_key_a1, sizeof(stv_key_a1));
+		printk(KERN_ERR "E: keydvt in: nonce\n");
+		wusb_key_dump(&stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
+		printk(KERN_ERR "E: keydvt in: hnonce & dnonce\n");
+		wusb_key_dump(&stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
+		printk(KERN_ERR "E: keydvt out: KCK\n");
+		wusb_key_dump(&keydvt_out.kck, sizeof(keydvt_out.kck));
+		printk(KERN_ERR "E: keydvt out: PTK\n");
+		wusb_key_dump(&keydvt_out.ptk, sizeof(keydvt_out.ptk));
+		result = -EINVAL;
+	} else
+		result = 0;
+	return result;
+}
+
+/*
+ * Initialize crypto system
+ *
+ * FIXME: we do nothing now, other than verifying. Later on we'll
+ * cache the encryption stuff, so that's why we have a separate init.
+ */
+int wusb_crypto_init(void)
+{
+	int result;
+
+	if (debug_crypto_verify) {
+		result = wusb_key_derive_verify();
+		if (result < 0)
+			return result;
+		return wusb_oob_mic_verify();
+	}
+	return 0;
+}
+
+void wusb_crypto_exit(void)
+{
+	/* FIXME: free cached crypto transforms */
+}
diff --git a/drivers/staging/wusbcore/dev-sysfs.c b/drivers/staging/wusbcore/dev-sysfs.c
new file mode 100644
index 000000000000..67b0a4c412b2
--- /dev/null
+++ b/drivers/staging/wusbcore/dev-sysfs.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB devices
+ * sysfs bindings
+ *
+ * Copyright (C) 2007 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Get them out of the way...
+ */
+
+#include <linux/jiffies.h>
+#include <linux/ctype.h>
+#include <linux/workqueue.h>
+#include "wusbhc.h"
+
+static ssize_t wusb_disconnect_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t size)
+{
+	struct usb_device *usb_dev;
+	struct wusbhc *wusbhc;
+	unsigned command;
+	u8 port_idx;
+
+	if (sscanf(buf, "%u", &command) != 1)
+		return -EINVAL;
+	if (command == 0)
+		return size;
+	usb_dev = to_usb_device(dev);
+	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
+	if (wusbhc == NULL)
+		return -ENODEV;
+
+	mutex_lock(&wusbhc->mutex);
+	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
+	__wusbhc_dev_disable(wusbhc, port_idx);
+	mutex_unlock(&wusbhc->mutex);
+	wusbhc_put(wusbhc);
+	return size;
+}
+static DEVICE_ATTR_WO(wusb_disconnect);
+
+static ssize_t wusb_cdid_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	ssize_t result;
+	struct wusb_dev *wusb_dev;
+
+	wusb_dev = wusb_dev_get_by_usb_dev(to_usb_device(dev));
+	if (wusb_dev == NULL)
+		return -ENODEV;
+	result = sprintf(buf, "%16ph\n", wusb_dev->cdid.data);
+	wusb_dev_put(wusb_dev);
+	return result;
+}
+static DEVICE_ATTR_RO(wusb_cdid);
+
+static ssize_t wusb_ck_store(struct device *dev,
+			     struct device_attribute *attr,
+			     const char *buf, size_t size)
+{
+	int result;
+	struct usb_device *usb_dev;
+	struct wusbhc *wusbhc;
+	struct wusb_ckhdid ck;
+
+	result = sscanf(buf,
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx\n",
+			&ck.data[0] , &ck.data[1],
+			&ck.data[2] , &ck.data[3],
+			&ck.data[4] , &ck.data[5],
+			&ck.data[6] , &ck.data[7],
+			&ck.data[8] , &ck.data[9],
+			&ck.data[10], &ck.data[11],
+			&ck.data[12], &ck.data[13],
+			&ck.data[14], &ck.data[15]);
+	if (result != 16)
+		return -EINVAL;
+
+	usb_dev = to_usb_device(dev);
+	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
+	if (wusbhc == NULL)
+		return -ENODEV;
+	result = wusb_dev_4way_handshake(wusbhc, usb_dev->wusb_dev, &ck);
+	memzero_explicit(&ck, sizeof(ck));
+	wusbhc_put(wusbhc);
+	return result < 0 ? result : size;
+}
+static DEVICE_ATTR_WO(wusb_ck);
+
+static struct attribute *wusb_dev_attrs[] = {
+		&dev_attr_wusb_disconnect.attr,
+		&dev_attr_wusb_cdid.attr,
+		&dev_attr_wusb_ck.attr,
+		NULL,
+};
+
+static const struct attribute_group wusb_dev_attr_group = {
+	.name = NULL,	/* we want them in the same directory */
+	.attrs = wusb_dev_attrs,
+};
+
+int wusb_dev_sysfs_add(struct wusbhc *wusbhc, struct usb_device *usb_dev,
+		       struct wusb_dev *wusb_dev)
+{
+	int result = sysfs_create_group(&usb_dev->dev.kobj,
+					&wusb_dev_attr_group);
+	struct device *dev = &usb_dev->dev;
+	if (result < 0)
+		dev_err(dev, "Cannot register WUSB-dev attributes: %d\n",
+			result);
+	return result;
+}
+
+void wusb_dev_sysfs_rm(struct wusb_dev *wusb_dev)
+{
+	struct usb_device *usb_dev = wusb_dev->usb_dev;
+	if (usb_dev)
+		sysfs_remove_group(&usb_dev->dev.kobj, &wusb_dev_attr_group);
+}
diff --git a/drivers/staging/wusbcore/devconnect.c b/drivers/staging/wusbcore/devconnect.c
new file mode 100644
index 000000000000..1170f8baf608
--- /dev/null
+++ b/drivers/staging/wusbcore/devconnect.c
@@ -0,0 +1,1085 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
+ * Device Connect handling
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ * FIXME: this file needs to be broken up, it's grown too big
+ *
+ *
+ * WUSB1.0[7.1, 7.5.1, ]
+ *
+ * WUSB device connection is kind of messy. Some background:
+ *
+ *     When a device wants to connect it scans the UWB radio channels
+ *     looking for a WUSB Channel; a WUSB channel is defined by MMCs
+ *     (Micro Managed Commands or something like that) [see
+ *     Design-overview for more on this] .
+ *
+ * So, device scans the radio, finds MMCs and thus a host and checks
+ * when the next DNTS is. It sends a Device Notification Connect
+ * (DN_Connect); the host picks it up (through nep.c and notif.c, ends
+ * up in wusb_devconnect_ack(), which creates a wusb_dev structure in
+ * wusbhc->port[port_number].wusb_dev), assigns an unauth address
+ * to the device (this means from 0x80 to 0xfe) and sends, in the MMC
+ * a Connect Ack Information Element (ConnAck IE).
+ *
+ * So now the device now has a WUSB address. From now on, we use
+ * that to talk to it in the RPipes.
+ *
+ * ASSUMPTIONS:
+ *
+ *  - We use the the as device address the port number where it is
+ *    connected (port 0 doesn't exist). For unauth, it is 128 + that.
+ *
+ * ROADMAP:
+ *
+ *   This file contains the logic for doing that--entry points:
+ *
+ *   wusb_devconnect_ack()      Ack a device until _acked() called.
+ *                              Called by notif.c:wusb_handle_dn_connect()
+ *                              when a DN_Connect is received.
+ *
+ *     wusb_devconnect_acked()  Ack done, release resources.
+ *
+ *   wusb_handle_dn_alive()     Called by notif.c:wusb_handle_dn()
+ *                              for processing a DN_Alive pong from a device.
+ *
+ *   wusb_handle_dn_disconnect()Called by notif.c:wusb_handle_dn() to
+ *                              process a disconnect request from a
+ *                              device.
+ *
+ *   __wusb_dev_disable()       Called by rh.c:wusbhc_rh_clear_port_feat() when
+ *                              disabling a port.
+ *
+ *   wusb_devconnect_create()   Called when creating the host by
+ *                              lc.c:wusbhc_create().
+ *
+ *   wusb_devconnect_destroy()  Cleanup called removing the host. Called
+ *                              by lc.c:wusbhc_destroy().
+ *
+ *   Each Wireless USB host maintains a list of DN_Connect requests
+ *   (actually we maintain a list of pending Connect Acks, the
+ *   wusbhc->ca_list).
+ *
+ * LIFE CYCLE OF port->wusb_dev
+ *
+ *   Before the @wusbhc structure put()s the reference it owns for
+ *   port->wusb_dev [and clean the wusb_dev pointer], it needs to
+ *   lock @wusbhc->mutex.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <linux/export.h>
+#include "wusbhc.h"
+
+static void wusbhc_devconnect_acked_work(struct work_struct *work);
+
+static void wusb_dev_free(struct wusb_dev *wusb_dev)
+{
+	kfree(wusb_dev);
+}
+
+static struct wusb_dev *wusb_dev_alloc(struct wusbhc *wusbhc)
+{
+	struct wusb_dev *wusb_dev;
+
+	wusb_dev = kzalloc(sizeof(*wusb_dev), GFP_KERNEL);
+	if (wusb_dev == NULL)
+		goto err;
+
+	wusb_dev->wusbhc = wusbhc;
+
+	INIT_WORK(&wusb_dev->devconnect_acked_work, wusbhc_devconnect_acked_work);
+
+	return wusb_dev;
+err:
+	wusb_dev_free(wusb_dev);
+	return NULL;
+}
+
+
+/*
+ * Using the Connect-Ack list, fill out the @wusbhc Connect-Ack WUSB IE
+ * properly so that it can be added to the MMC.
+ *
+ * We just get the @wusbhc->ca_list and fill out the first four ones or
+ * less (per-spec WUSB1.0[7.5, before T7-38). If the ConnectAck WUSB
+ * IE is not allocated, we alloc it.
+ *
+ * @wusbhc->mutex must be taken
+ */
+static void wusbhc_fill_cack_ie(struct wusbhc *wusbhc)
+{
+	unsigned cnt;
+	struct wusb_dev *dev_itr;
+	struct wuie_connect_ack *cack_ie;
+
+	cack_ie = &wusbhc->cack_ie;
+	cnt = 0;
+	list_for_each_entry(dev_itr, &wusbhc->cack_list, cack_node) {
+		cack_ie->blk[cnt].CDID = dev_itr->cdid;
+		cack_ie->blk[cnt].bDeviceAddress = dev_itr->addr;
+		if (++cnt >= WUIE_ELT_MAX)
+			break;
+	}
+	cack_ie->hdr.bLength = sizeof(cack_ie->hdr)
+		+ cnt * sizeof(cack_ie->blk[0]);
+}
+
+/*
+ * Register a new device that wants to connect
+ *
+ * A new device wants to connect, so we add it to the Connect-Ack
+ * list. We give it an address in the unauthorized range (bit 8 set);
+ * user space will have to drive authorization further on.
+ *
+ * @dev_addr: address to use for the device (which is also the port
+ *            number).
+ *
+ * @wusbhc->mutex must be taken
+ */
+static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc,
+					struct wusb_dn_connect *dnc,
+					const char *pr_cdid, u8 port_idx)
+{
+	struct device *dev = wusbhc->dev;
+	struct wusb_dev *wusb_dev;
+	int new_connection = wusb_dn_connect_new_connection(dnc);
+	u8 dev_addr;
+	int result;
+
+	/* Is it registered already? */
+	list_for_each_entry(wusb_dev, &wusbhc->cack_list, cack_node)
+		if (!memcmp(&wusb_dev->cdid, &dnc->CDID,
+			    sizeof(wusb_dev->cdid)))
+			return wusb_dev;
+	/* We don't have it, create an entry, register it */
+	wusb_dev = wusb_dev_alloc(wusbhc);
+	if (wusb_dev == NULL)
+		return NULL;
+	wusb_dev_init(wusb_dev);
+	wusb_dev->cdid = dnc->CDID;
+	wusb_dev->port_idx = port_idx;
+
+	/*
+	 * Devices are always available within the cluster reservation
+	 * and since the hardware will take the intersection of the
+	 * per-device availability and the cluster reservation, the
+	 * per-device availability can simply be set to always
+	 * available.
+	 */
+	bitmap_fill(wusb_dev->availability.bm, UWB_NUM_MAS);
+
+	/* FIXME: handle reconnects instead of assuming connects are
+	   always new. */
+	if (1 && new_connection == 0)
+		new_connection = 1;
+	if (new_connection) {
+		dev_addr = (port_idx + 2) | WUSB_DEV_ADDR_UNAUTH;
+
+		dev_info(dev, "Connecting new WUSB device to address %u, "
+			"port %u\n", dev_addr, port_idx);
+
+		result = wusb_set_dev_addr(wusbhc, wusb_dev, dev_addr);
+		if (result < 0)
+			return NULL;
+	}
+	wusb_dev->entry_ts = jiffies;
+	list_add_tail(&wusb_dev->cack_node, &wusbhc->cack_list);
+	wusbhc->cack_count++;
+	wusbhc_fill_cack_ie(wusbhc);
+
+	return wusb_dev;
+}
+
+/*
+ * Remove a Connect-Ack context entry from the HCs view
+ *
+ * @wusbhc->mutex must be taken
+ */
+static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	list_del_init(&wusb_dev->cack_node);
+	wusbhc->cack_count--;
+	wusbhc_fill_cack_ie(wusbhc);
+}
+
+/*
+ * @wusbhc->mutex must be taken */
+static
+void wusbhc_devconnect_acked(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	wusbhc_cack_rm(wusbhc, wusb_dev);
+	if (wusbhc->cack_count)
+		wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
+	else
+		wusbhc_mmcie_rm(wusbhc, &wusbhc->cack_ie.hdr);
+}
+
+static void wusbhc_devconnect_acked_work(struct work_struct *work)
+{
+	struct wusb_dev *wusb_dev = container_of(work, struct wusb_dev,
+						 devconnect_acked_work);
+	struct wusbhc *wusbhc = wusb_dev->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	wusbhc_devconnect_acked(wusbhc, wusb_dev);
+	mutex_unlock(&wusbhc->mutex);
+
+	wusb_dev_put(wusb_dev);
+}
+
+/*
+ * Ack a device for connection
+ *
+ * FIXME: docs
+ *
+ * @pr_cdid:	Printable CDID...hex Use @dnc->cdid for the real deal.
+ *
+ * So we get the connect ack IE (may have been allocated already),
+ * find an empty connect block, an empty virtual port, create an
+ * address with it (see below), make it an unauth addr [bit 7 set] and
+ * set the MMC.
+ *
+ * Addresses: because WUSB hosts have no downstream hubs, we can do a
+ *            1:1 mapping between 'port number' and device
+ *            address. This simplifies many things, as during this
+ *            initial connect phase the USB stack has no knowledge of
+ *            the device and hasn't assigned an address yet--we know
+ *            USB's choose_address() will use the same heuristics we
+ *            use here, so we can assume which address will be assigned.
+ *
+ *            USB stack always assigns address 1 to the root hub, so
+ *            to the port number we add 2 (thus virtual port #0 is
+ *            addr #2).
+ *
+ * @wusbhc shall be referenced
+ */
+static
+void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc,
+			   const char *pr_cdid)
+{
+	int result;
+	struct device *dev = wusbhc->dev;
+	struct wusb_dev *wusb_dev;
+	struct wusb_port *port;
+	unsigned idx;
+
+	mutex_lock(&wusbhc->mutex);
+
+	/* Check we are not handling it already */
+	for (idx = 0; idx < wusbhc->ports_max; idx++) {
+		port = wusb_port_by_idx(wusbhc, idx);
+		if (port->wusb_dev
+		    && memcmp(&dnc->CDID, &port->wusb_dev->cdid, sizeof(dnc->CDID)) == 0)
+			goto error_unlock;
+	}
+	/* Look up those fake ports we have for a free one */
+	for (idx = 0; idx < wusbhc->ports_max; idx++) {
+		port = wusb_port_by_idx(wusbhc, idx);
+		if ((port->status & USB_PORT_STAT_POWER)
+		    && !(port->status & USB_PORT_STAT_CONNECTION))
+			break;
+	}
+	if (idx >= wusbhc->ports_max) {
+		dev_err(dev, "Host controller can't connect more devices "
+			"(%u already connected); device %s rejected\n",
+			wusbhc->ports_max, pr_cdid);
+		/* NOTE: we could send a WUIE_Disconnect here, but we haven't
+		 *       event acked, so the device will eventually timeout the
+		 *       connection, right? */
+		goto error_unlock;
+	}
+
+	/* Make sure we are using no crypto on that "virtual port" */
+	wusbhc->set_ptk(wusbhc, idx, 0, NULL, 0);
+
+	/* Grab a filled in Connect-Ack context, fill out the
+	 * Connect-Ack Wireless USB IE, set the MMC */
+	wusb_dev = wusbhc_cack_add(wusbhc, dnc, pr_cdid, idx);
+	if (wusb_dev == NULL)
+		goto error_unlock;
+	result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
+	if (result < 0)
+		goto error_unlock;
+	/* Give the device at least 2ms (WUSB1.0[7.5.1p3]), let's do
+	 * three for a good measure */
+	msleep(3);
+	port->wusb_dev = wusb_dev;
+	port->status |= USB_PORT_STAT_CONNECTION;
+	port->change |= USB_PORT_STAT_C_CONNECTION;
+	/* Now the port status changed to connected; hub_wq will
+	 * pick the change up and try to reset the port to bring it to
+	 * the enabled state--so this process returns up to the stack
+	 * and it calls back into wusbhc_rh_port_reset().
+	 */
+error_unlock:
+	mutex_unlock(&wusbhc->mutex);
+	return;
+
+}
+
+/*
+ * Disconnect a Wireless USB device from its fake port
+ *
+ * Marks the port as disconnected so that hub_wq can pick up the change
+ * and drops our knowledge about the device.
+ *
+ * Assumes there is a device connected
+ *
+ * @port_index: zero based port number
+ *
+ * NOTE: @wusbhc->mutex is locked
+ *
+ * WARNING: From here it is not very safe to access anything hanging off
+ *	    wusb_dev
+ */
+static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
+				    struct wusb_port *port)
+{
+	struct wusb_dev *wusb_dev = port->wusb_dev;
+
+	port->status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE
+			  | USB_PORT_STAT_SUSPEND | USB_PORT_STAT_RESET
+			  | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED);
+	port->change |= USB_PORT_STAT_C_CONNECTION | USB_PORT_STAT_C_ENABLE;
+	if (wusb_dev) {
+		dev_dbg(wusbhc->dev, "disconnecting device from port %d\n", wusb_dev->port_idx);
+		if (!list_empty(&wusb_dev->cack_node))
+			list_del_init(&wusb_dev->cack_node);
+		/* For the one in cack_add() */
+		wusb_dev_put(wusb_dev);
+	}
+	port->wusb_dev = NULL;
+
+	/* After a device disconnects, change the GTK (see [WUSB]
+	 * section 6.2.11.2). */
+	if (wusbhc->active)
+		wusbhc_gtk_rekey(wusbhc);
+
+	/* The Wireless USB part has forgotten about the device already; now
+	 * hub_wq's timer will pick up the disconnection and remove the USB
+	 * device from the system
+	 */
+}
+
+/*
+ * Refresh the list of keep alives to emit in the MMC
+ *
+ * We only publish the first four devices that have a coming timeout
+ * condition. Then when we are done processing those, we go for the
+ * next ones. We ignore the ones that have timed out already (they'll
+ * be purged).
+ *
+ * This might cause the first devices to timeout the last devices in
+ * the port array...FIXME: come up with a better algorithm?
+ *
+ * Note we can't do much about MMC's ops errors; we hope next refresh
+ * will kind of handle it.
+ *
+ * NOTE: @wusbhc->mutex is locked
+ */
+static void __wusbhc_keep_alive(struct wusbhc *wusbhc)
+{
+	struct device *dev = wusbhc->dev;
+	unsigned cnt;
+	struct wusb_dev *wusb_dev;
+	struct wusb_port *wusb_port;
+	struct wuie_keep_alive *ie = &wusbhc->keep_alive_ie;
+	unsigned keep_alives, old_keep_alives;
+
+	old_keep_alives = ie->hdr.bLength - sizeof(ie->hdr);
+	keep_alives = 0;
+	for (cnt = 0;
+	     keep_alives < WUIE_ELT_MAX && cnt < wusbhc->ports_max;
+	     cnt++) {
+		unsigned tt = msecs_to_jiffies(wusbhc->trust_timeout);
+
+		wusb_port = wusb_port_by_idx(wusbhc, cnt);
+		wusb_dev = wusb_port->wusb_dev;
+
+		if (wusb_dev == NULL)
+			continue;
+		if (wusb_dev->usb_dev == NULL)
+			continue;
+
+		if (time_after(jiffies, wusb_dev->entry_ts + tt)) {
+			dev_err(dev, "KEEPALIVE: device %u timed out\n",
+				wusb_dev->addr);
+			__wusbhc_dev_disconnect(wusbhc, wusb_port);
+		} else if (time_after(jiffies, wusb_dev->entry_ts + tt/3)) {
+			/* Approaching timeout cut off, need to refresh */
+			ie->bDeviceAddress[keep_alives++] = wusb_dev->addr;
+		}
+	}
+	if (keep_alives & 0x1)	/* pad to even number ([WUSB] section 7.5.9) */
+		ie->bDeviceAddress[keep_alives++] = 0x7f;
+	ie->hdr.bLength = sizeof(ie->hdr) +
+		keep_alives*sizeof(ie->bDeviceAddress[0]);
+	if (keep_alives > 0)
+		wusbhc_mmcie_set(wusbhc, 10, 5, &ie->hdr);
+	else if (old_keep_alives != 0)
+		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
+}
+
+/*
+ * Do a run through all devices checking for timeouts
+ */
+static void wusbhc_keep_alive_run(struct work_struct *ws)
+{
+	struct delayed_work *dw = to_delayed_work(ws);
+	struct wusbhc *wusbhc =	container_of(dw, struct wusbhc, keep_alive_timer);
+
+	mutex_lock(&wusbhc->mutex);
+	__wusbhc_keep_alive(wusbhc);
+	mutex_unlock(&wusbhc->mutex);
+
+	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
+			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
+}
+
+/*
+ * Find the wusb_dev from its device address.
+ *
+ * The device can be found directly from the address (see
+ * wusb_cack_add() for where the device address is set to port_idx
+ * +2), except when the address is zero.
+ */
+static struct wusb_dev *wusbhc_find_dev_by_addr(struct wusbhc *wusbhc, u8 addr)
+{
+	int p;
+
+	if (addr == 0xff) /* unconnected */
+		return NULL;
+
+	if (addr > 0) {
+		int port = (addr & ~0x80) - 2;
+		if (port < 0 || port >= wusbhc->ports_max)
+			return NULL;
+		return wusb_port_by_idx(wusbhc, port)->wusb_dev;
+	}
+
+	/* Look for the device with address 0. */
+	for (p = 0; p < wusbhc->ports_max; p++) {
+		struct wusb_dev *wusb_dev = wusb_port_by_idx(wusbhc, p)->wusb_dev;
+		if (wusb_dev && wusb_dev->addr == addr)
+			return wusb_dev;
+	}
+	return NULL;
+}
+
+/*
+ * Handle a DN_Alive notification (WUSB1.0[7.6.1])
+ *
+ * This just updates the device activity timestamp and then refreshes
+ * the keep alive IE.
+ *
+ * @wusbhc shall be referenced and unlocked
+ */
+static void wusbhc_handle_dn_alive(struct wusbhc *wusbhc, u8 srcaddr)
+{
+	struct wusb_dev *wusb_dev;
+
+	mutex_lock(&wusbhc->mutex);
+	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
+	if (wusb_dev == NULL) {
+		dev_dbg(wusbhc->dev, "ignoring DN_Alive from unconnected device %02x\n",
+			srcaddr);
+	} else {
+		wusb_dev->entry_ts = jiffies;
+		__wusbhc_keep_alive(wusbhc);
+	}
+	mutex_unlock(&wusbhc->mutex);
+}
+
+/*
+ * Handle a DN_Connect notification (WUSB1.0[7.6.1])
+ *
+ * @wusbhc
+ * @pkt_hdr
+ * @size:    Size of the buffer where the notification resides; if the
+ *           notification data suggests there should be more data than
+ *           available, an error will be signaled and the whole buffer
+ *           consumed.
+ *
+ * @wusbhc->mutex shall be held
+ */
+static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc,
+				     struct wusb_dn_hdr *dn_hdr,
+				     size_t size)
+{
+	struct device *dev = wusbhc->dev;
+	struct wusb_dn_connect *dnc;
+	char pr_cdid[WUSB_CKHDID_STRSIZE];
+	static const char *beacon_behaviour[] = {
+		"reserved",
+		"self-beacon",
+		"directed-beacon",
+		"no-beacon"
+	};
+
+	if (size < sizeof(*dnc)) {
+		dev_err(dev, "DN CONNECT: short notification (%zu < %zu)\n",
+			size, sizeof(*dnc));
+		return;
+	}
+
+	dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr);
+	sprintf(pr_cdid, "%16ph", dnc->CDID.data);
+	dev_info(dev, "DN CONNECT: device %s @ %x (%s) wants to %s\n",
+		 pr_cdid,
+		 wusb_dn_connect_prev_dev_addr(dnc),
+		 beacon_behaviour[wusb_dn_connect_beacon_behavior(dnc)],
+		 wusb_dn_connect_new_connection(dnc) ? "connect" : "reconnect");
+	/* ACK the connect */
+	wusbhc_devconnect_ack(wusbhc, dnc, pr_cdid);
+}
+
+/*
+ * Handle a DN_Disconnect notification (WUSB1.0[7.6.1])
+ *
+ * Device is going down -- do the disconnect.
+ *
+ * @wusbhc shall be referenced and unlocked
+ */
+static void wusbhc_handle_dn_disconnect(struct wusbhc *wusbhc, u8 srcaddr)
+{
+	struct device *dev = wusbhc->dev;
+	struct wusb_dev *wusb_dev;
+
+	mutex_lock(&wusbhc->mutex);
+	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
+	if (wusb_dev == NULL) {
+		dev_dbg(dev, "ignoring DN DISCONNECT from unconnected device %02x\n",
+			srcaddr);
+	} else {
+		dev_info(dev, "DN DISCONNECT: device 0x%02x going down\n",
+			wusb_dev->addr);
+		__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc,
+			wusb_dev->port_idx));
+	}
+	mutex_unlock(&wusbhc->mutex);
+}
+
+/*
+ * Handle a Device Notification coming a host
+ *
+ * The Device Notification comes from a host (HWA, DWA or WHCI)
+ * wrapped in a set of headers. Somebody else has peeled off those
+ * headers for us and we just get one Device Notifications.
+ *
+ * Invalid DNs (e.g., too short) are discarded.
+ *
+ * @wusbhc shall be referenced
+ *
+ * FIXMES:
+ *  - implement priorities as in WUSB1.0[Table 7-55]?
+ */
+void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr,
+		      struct wusb_dn_hdr *dn_hdr, size_t size)
+{
+	struct device *dev = wusbhc->dev;
+
+	if (size < sizeof(struct wusb_dn_hdr)) {
+		dev_err(dev, "DN data shorter than DN header (%d < %d)\n",
+			(int)size, (int)sizeof(struct wusb_dn_hdr));
+		return;
+	}
+	switch (dn_hdr->bType) {
+	case WUSB_DN_CONNECT:
+		wusbhc_handle_dn_connect(wusbhc, dn_hdr, size);
+		break;
+	case WUSB_DN_ALIVE:
+		wusbhc_handle_dn_alive(wusbhc, srcaddr);
+		break;
+	case WUSB_DN_DISCONNECT:
+		wusbhc_handle_dn_disconnect(wusbhc, srcaddr);
+		break;
+	case WUSB_DN_MASAVAILCHANGED:
+	case WUSB_DN_RWAKE:
+	case WUSB_DN_SLEEP:
+		/* FIXME: handle these DNs. */
+		break;
+	case WUSB_DN_EPRDY:
+		/* The hardware handles these. */
+		break;
+	default:
+		dev_warn(dev, "unknown DN %u (%d octets) from %u\n",
+			 dn_hdr->bType, (int)size, srcaddr);
+	}
+}
+EXPORT_SYMBOL_GPL(wusbhc_handle_dn);
+
+/*
+ * Disconnect a WUSB device from a the cluster
+ *
+ * @wusbhc
+ * @port     Fake port where the device is (wusbhc index, not USB port number).
+ *
+ * In Wireless USB, a disconnect is basically telling the device he is
+ * being disconnected and forgetting about him.
+ *
+ * We send the device a Device Disconnect IE (WUSB1.0[7.5.11]) for 100
+ * ms and then keep going.
+ *
+ * We don't do much in case of error; we always pretend we disabled
+ * the port and disconnected the device. If physically the request
+ * didn't get there (many things can fail in the way there), the stack
+ * will reject the device's communication attempts.
+ *
+ * @wusbhc should be refcounted and locked
+ */
+void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port_idx)
+{
+	int result;
+	struct device *dev = wusbhc->dev;
+	struct wusb_dev *wusb_dev;
+	struct wuie_disconnect *ie;
+
+	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
+	if (wusb_dev == NULL) {
+		/* reset no device? ignore */
+		dev_dbg(dev, "DISCONNECT: no device at port %u, ignoring\n",
+			port_idx);
+		return;
+	}
+	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
+
+	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
+	if (ie == NULL)
+		return;
+	ie->hdr.bLength = sizeof(*ie);
+	ie->hdr.bIEIdentifier = WUIE_ID_DEVICE_DISCONNECT;
+	ie->bDeviceAddress = wusb_dev->addr;
+	result = wusbhc_mmcie_set(wusbhc, 0, 0, &ie->hdr);
+	if (result < 0)
+		dev_err(dev, "DISCONNECT: can't set MMC: %d\n", result);
+	else {
+		/* At least 6 MMCs, assuming at least 1 MMC per zone. */
+		msleep(7*4);
+		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
+	}
+	kfree(ie);
+}
+
+/*
+ * Walk over the BOS descriptor, verify and grok it
+ *
+ * @usb_dev: referenced
+ * @wusb_dev: referenced and unlocked
+ *
+ * The BOS descriptor is defined at WUSB1.0[7.4.1], and it defines a
+ * "flexible" way to wrap all kinds of descriptors inside an standard
+ * descriptor (wonder why they didn't use normal descriptors,
+ * btw). Not like they lack code.
+ *
+ * At the end we go to look for the WUSB Device Capabilities
+ * (WUSB1.0[7.4.1.1]) that is wrapped in a device capability descriptor
+ * that is part of the BOS descriptor set. That tells us what does the
+ * device support (dual role, beacon type, UWB PHY rates).
+ */
+static int wusb_dev_bos_grok(struct usb_device *usb_dev,
+			     struct wusb_dev *wusb_dev,
+			     struct usb_bos_descriptor *bos, size_t desc_size)
+{
+	ssize_t result;
+	struct device *dev = &usb_dev->dev;
+	void *itr, *top;
+
+	/* Walk over BOS capabilities, verify them */
+	itr = (void *)bos + sizeof(*bos);
+	top = itr + desc_size - sizeof(*bos);
+	while (itr < top) {
+		struct usb_dev_cap_header *cap_hdr = itr;
+		size_t cap_size;
+		u8 cap_type;
+		if (top - itr < sizeof(*cap_hdr)) {
+			dev_err(dev, "Device BUG? premature end of BOS header "
+				"data [offset 0x%02x]: only %zu bytes left\n",
+				(int)(itr - (void *)bos), top - itr);
+			result = -ENOSPC;
+			goto error_bad_cap;
+		}
+		cap_size = cap_hdr->bLength;
+		cap_type = cap_hdr->bDevCapabilityType;
+		if (cap_size == 0)
+			break;
+		if (cap_size > top - itr) {
+			dev_err(dev, "Device BUG? premature end of BOS data "
+				"[offset 0x%02x cap %02x %zu bytes]: "
+				"only %zu bytes left\n",
+				(int)(itr - (void *)bos),
+				cap_type, cap_size, top - itr);
+			result = -EBADF;
+			goto error_bad_cap;
+		}
+		switch (cap_type) {
+		case USB_CAP_TYPE_WIRELESS_USB:
+			if (cap_size != sizeof(*wusb_dev->wusb_cap_descr))
+				dev_err(dev, "Device BUG? WUSB Capability "
+					"descriptor is %zu bytes vs %zu "
+					"needed\n", cap_size,
+					sizeof(*wusb_dev->wusb_cap_descr));
+			else
+				wusb_dev->wusb_cap_descr = itr;
+			break;
+		default:
+			dev_err(dev, "BUG? Unknown BOS capability 0x%02x "
+				"(%zu bytes) at offset 0x%02x\n", cap_type,
+				cap_size, (int)(itr - (void *)bos));
+		}
+		itr += cap_size;
+	}
+	result = 0;
+error_bad_cap:
+	return result;
+}
+
+/*
+ * Add information from the BOS descriptors to the device
+ *
+ * @usb_dev: referenced
+ * @wusb_dev: referenced and unlocked
+ *
+ * So what we do is we alloc a space for the BOS descriptor of 64
+ * bytes; read the first four bytes which include the wTotalLength
+ * field (WUSB1.0[T7-26]) and if it fits in those 64 bytes, read the
+ * whole thing. If not we realloc to that size.
+ *
+ * Then we call the groking function, that will fill up
+ * wusb_dev->wusb_cap_descr, which is what we'll need later on.
+ */
+static int wusb_dev_bos_add(struct usb_device *usb_dev,
+			    struct wusb_dev *wusb_dev)
+{
+	ssize_t result;
+	struct device *dev = &usb_dev->dev;
+	struct usb_bos_descriptor *bos;
+	size_t alloc_size = 32, desc_size = 4;
+
+	bos = kmalloc(alloc_size, GFP_KERNEL);
+	if (bos == NULL)
+		return -ENOMEM;
+	result = usb_get_descriptor(usb_dev, USB_DT_BOS, 0, bos, desc_size);
+	if (result < 4) {
+		dev_err(dev, "Can't get BOS descriptor or too short: %zd\n",
+			result);
+		goto error_get_descriptor;
+	}
+	desc_size = le16_to_cpu(bos->wTotalLength);
+	if (desc_size >= alloc_size) {
+		kfree(bos);
+		alloc_size = desc_size;
+		bos = kmalloc(alloc_size, GFP_KERNEL);
+		if (bos == NULL)
+			return -ENOMEM;
+	}
+	result = usb_get_descriptor(usb_dev, USB_DT_BOS, 0, bos, desc_size);
+	if (result < 0 || result != desc_size) {
+		dev_err(dev, "Can't get  BOS descriptor or too short (need "
+			"%zu bytes): %zd\n", desc_size, result);
+		goto error_get_descriptor;
+	}
+	if (result < sizeof(*bos)
+	    || le16_to_cpu(bos->wTotalLength) != desc_size) {
+		dev_err(dev, "Can't get  BOS descriptor or too short (need "
+			"%zu bytes): %zd\n", desc_size, result);
+		goto error_get_descriptor;
+	}
+
+	result = wusb_dev_bos_grok(usb_dev, wusb_dev, bos, result);
+	if (result < 0)
+		goto error_bad_bos;
+	wusb_dev->bos = bos;
+	return 0;
+
+error_bad_bos:
+error_get_descriptor:
+	kfree(bos);
+	wusb_dev->wusb_cap_descr = NULL;
+	return result;
+}
+
+static void wusb_dev_bos_rm(struct wusb_dev *wusb_dev)
+{
+	kfree(wusb_dev->bos);
+	wusb_dev->wusb_cap_descr = NULL;
+};
+
+/*
+ * USB stack's device addition Notifier Callback
+ *
+ * Called from drivers/usb/core/hub.c when a new device is added; we
+ * use this hook to perform certain WUSB specific setup work on the
+ * new device. As well, it is the first time we can connect the
+ * wusb_dev and the usb_dev. So we note it down in wusb_dev and take a
+ * reference that we'll drop.
+ *
+ * First we need to determine if the device is a WUSB device (else we
+ * ignore it). For that we use the speed setting (USB_SPEED_WIRELESS)
+ * [FIXME: maybe we'd need something more definitive]. If so, we track
+ * it's usb_busd and from there, the WUSB HC.
+ *
+ * Because all WUSB HCs are contained in a 'struct wusbhc', voila, we
+ * get the wusbhc for the device.
+ *
+ * We have a reference on @usb_dev (as we are called at the end of its
+ * enumeration).
+ *
+ * NOTE: @usb_dev locked
+ */
+static void wusb_dev_add_ncb(struct usb_device *usb_dev)
+{
+	int result = 0;
+	struct wusb_dev *wusb_dev;
+	struct wusbhc *wusbhc;
+	struct device *dev = &usb_dev->dev;
+	u8 port_idx;
+
+	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
+		return;		/* skip non wusb and wusb RHs */
+
+	usb_set_device_state(usb_dev, USB_STATE_UNAUTHENTICATED);
+
+	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
+	if (wusbhc == NULL)
+		goto error_nodev;
+	mutex_lock(&wusbhc->mutex);
+	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, usb_dev);
+	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
+	mutex_unlock(&wusbhc->mutex);
+	if (wusb_dev == NULL)
+		goto error_nodev;
+	wusb_dev->usb_dev = usb_get_dev(usb_dev);
+	usb_dev->wusb_dev = wusb_dev_get(wusb_dev);
+	result = wusb_dev_sec_add(wusbhc, usb_dev, wusb_dev);
+	if (result < 0) {
+		dev_err(dev, "Cannot enable security: %d\n", result);
+		goto error_sec_add;
+	}
+	/* Now query the device for it's BOS and attach it to wusb_dev */
+	result = wusb_dev_bos_add(usb_dev, wusb_dev);
+	if (result < 0) {
+		dev_err(dev, "Cannot get BOS descriptors: %d\n", result);
+		goto error_bos_add;
+	}
+	result = wusb_dev_sysfs_add(wusbhc, usb_dev, wusb_dev);
+	if (result < 0)
+		goto error_add_sysfs;
+out:
+	wusb_dev_put(wusb_dev);
+	wusbhc_put(wusbhc);
+error_nodev:
+	return;
+
+error_add_sysfs:
+	wusb_dev_bos_rm(wusb_dev);
+error_bos_add:
+	wusb_dev_sec_rm(wusb_dev);
+error_sec_add:
+	mutex_lock(&wusbhc->mutex);
+	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
+	mutex_unlock(&wusbhc->mutex);
+	goto out;
+}
+
+/*
+ * Undo all the steps done at connection by the notifier callback
+ *
+ * NOTE: @usb_dev locked
+ */
+static void wusb_dev_rm_ncb(struct usb_device *usb_dev)
+{
+	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
+
+	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
+		return;		/* skip non wusb and wusb RHs */
+
+	wusb_dev_sysfs_rm(wusb_dev);
+	wusb_dev_bos_rm(wusb_dev);
+	wusb_dev_sec_rm(wusb_dev);
+	wusb_dev->usb_dev = NULL;
+	usb_dev->wusb_dev = NULL;
+	wusb_dev_put(wusb_dev);
+	usb_put_dev(usb_dev);
+}
+
+/*
+ * Handle notifications from the USB stack (notifier call back)
+ *
+ * This is called when the USB stack does a
+ * usb_{bus,device}_{add,remove}() so we can do WUSB specific
+ * handling. It is called with [for the case of
+ * USB_DEVICE_{ADD,REMOVE} with the usb_dev locked.
+ */
+int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
+		 void *priv)
+{
+	int result = NOTIFY_OK;
+
+	switch (val) {
+	case USB_DEVICE_ADD:
+		wusb_dev_add_ncb(priv);
+		break;
+	case USB_DEVICE_REMOVE:
+		wusb_dev_rm_ncb(priv);
+		break;
+	case USB_BUS_ADD:
+		/* ignore (for now) */
+	case USB_BUS_REMOVE:
+		break;
+	default:
+		WARN_ON(1);
+		result = NOTIFY_BAD;
+	}
+	return result;
+}
+
+/*
+ * Return a referenced wusb_dev given a @wusbhc and @usb_dev
+ */
+struct wusb_dev *__wusb_dev_get_by_usb_dev(struct wusbhc *wusbhc,
+					   struct usb_device *usb_dev)
+{
+	struct wusb_dev *wusb_dev;
+	u8 port_idx;
+
+	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
+	BUG_ON(port_idx > wusbhc->ports_max);
+	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
+	if (wusb_dev != NULL)		/* ops, device is gone */
+		wusb_dev_get(wusb_dev);
+	return wusb_dev;
+}
+EXPORT_SYMBOL_GPL(__wusb_dev_get_by_usb_dev);
+
+void wusb_dev_destroy(struct kref *_wusb_dev)
+{
+	struct wusb_dev *wusb_dev = container_of(_wusb_dev, struct wusb_dev, refcnt);
+
+	list_del_init(&wusb_dev->cack_node);
+	wusb_dev_free(wusb_dev);
+}
+EXPORT_SYMBOL_GPL(wusb_dev_destroy);
+
+/*
+ * Create all the device connect handling infrastructure
+ *
+ * This is basically the device info array, Connect Acknowledgement
+ * (cack) lists, keep-alive timers (and delayed work thread).
+ */
+int wusbhc_devconnect_create(struct wusbhc *wusbhc)
+{
+	wusbhc->keep_alive_ie.hdr.bIEIdentifier = WUIE_ID_KEEP_ALIVE;
+	wusbhc->keep_alive_ie.hdr.bLength = sizeof(wusbhc->keep_alive_ie.hdr);
+	INIT_DELAYED_WORK(&wusbhc->keep_alive_timer, wusbhc_keep_alive_run);
+
+	wusbhc->cack_ie.hdr.bIEIdentifier = WUIE_ID_CONNECTACK;
+	wusbhc->cack_ie.hdr.bLength = sizeof(wusbhc->cack_ie.hdr);
+	INIT_LIST_HEAD(&wusbhc->cack_list);
+
+	return 0;
+}
+
+/*
+ * Release all resources taken by the devconnect stuff
+ */
+void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
+{
+	/* no op */
+}
+
+/*
+ * wusbhc_devconnect_start - start accepting device connections
+ * @wusbhc: the WUSB HC
+ *
+ * Sets the Host Info IE to accept all new connections.
+ *
+ * FIXME: This also enables the keep alives but this is not necessary
+ * until there are connected and authenticated devices.
+ */
+int wusbhc_devconnect_start(struct wusbhc *wusbhc)
+{
+	struct device *dev = wusbhc->dev;
+	struct wuie_host_info *hi;
+	int result;
+
+	hi = kzalloc(sizeof(*hi), GFP_KERNEL);
+	if (hi == NULL)
+		return -ENOMEM;
+
+	hi->hdr.bLength       = sizeof(*hi);
+	hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO;
+	hi->attributes        = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL);
+	hi->CHID              = wusbhc->chid;
+	result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr);
+	if (result < 0) {
+		dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result);
+		goto error_mmcie_set;
+	}
+	wusbhc->wuie_host_info = hi;
+
+	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
+			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
+
+	return 0;
+
+error_mmcie_set:
+	kfree(hi);
+	return result;
+}
+
+/*
+ * wusbhc_devconnect_stop - stop managing connected devices
+ * @wusbhc: the WUSB HC
+ *
+ * Disconnects any devices still connected, stops the keep alives and
+ * removes the Host Info IE.
+ */
+void wusbhc_devconnect_stop(struct wusbhc *wusbhc)
+{
+	int i;
+
+	mutex_lock(&wusbhc->mutex);
+	for (i = 0; i < wusbhc->ports_max; i++) {
+		if (wusbhc->port[i].wusb_dev)
+			__wusbhc_dev_disconnect(wusbhc, &wusbhc->port[i]);
+	}
+	mutex_unlock(&wusbhc->mutex);
+
+	cancel_delayed_work_sync(&wusbhc->keep_alive_timer);
+	wusbhc_mmcie_rm(wusbhc, &wusbhc->wuie_host_info->hdr);
+	kfree(wusbhc->wuie_host_info);
+	wusbhc->wuie_host_info = NULL;
+}
+
+/*
+ * wusb_set_dev_addr - set the WUSB device address used by the host
+ * @wusbhc: the WUSB HC the device is connect to
+ * @wusb_dev: the WUSB device
+ * @addr: new device address
+ */
+int wusb_set_dev_addr(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, u8 addr)
+{
+	int result;
+
+	wusb_dev->addr = addr;
+	result = wusbhc->dev_info_set(wusbhc, wusb_dev);
+	if (result < 0)
+		dev_err(wusbhc->dev, "device %d: failed to set device "
+			"address\n", wusb_dev->port_idx);
+	else
+		dev_info(wusbhc->dev, "device %d: %s addr %u\n",
+			 wusb_dev->port_idx,
+			 (addr & WUSB_DEV_ADDR_UNAUTH) ? "unauth" : "auth",
+			 wusb_dev->addr);
+
+	return result;
+}
diff --git a/drivers/staging/wusbcore/host/Kconfig b/drivers/staging/wusbcore/host/Kconfig
new file mode 100644
index 000000000000..9a73f9360a08
--- /dev/null
+++ b/drivers/staging/wusbcore/host/Kconfig
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config USB_WHCI_HCD
+	tristate "Wireless USB Host Controller Interface (WHCI) driver"
+	depends on USB_PCI && USB && UWB
+	select USB_WUSB
+	select UWB_WHCI
+	help
+	  A driver for PCI-based Wireless USB Host Controllers that are
+	  compliant with the WHCI specification.
+
+	  To compile this driver a module, choose M here: the module
+	  will be called "whci-hcd".
+
+config USB_HWA_HCD
+	tristate "Host Wire Adapter (HWA) driver"
+	depends on USB && UWB
+	select USB_WUSB
+	select UWB_HWA
+	help
+	  This driver enables you to connect Wireless USB devices to
+	  your system using a Host Wire Adaptor USB dongle. This is an
+	  UWB Radio Controller and WUSB Host Controller connected to
+	  your machine via USB (specified in WUSB1.0).
+
+	  To compile this driver a module, choose M here: the module
+	  will be called "hwa-hc".
+
diff --git a/drivers/staging/wusbcore/host/Makefile b/drivers/staging/wusbcore/host/Makefile
new file mode 100644
index 000000000000..d65ee8a73e21
--- /dev/null
+++ b/drivers/staging/wusbcore/host/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_USB_WHCI_HCD)	+= whci/
+obj-$(CONFIG_USB_HWA_HCD)	+= hwa-hc.o
diff --git a/drivers/staging/wusbcore/host/hwa-hc.c b/drivers/staging/wusbcore/host/hwa-hc.c
new file mode 100644
index 000000000000..8d959e91fe27
--- /dev/null
+++ b/drivers/staging/wusbcore/host/hwa-hc.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Host Wire Adapter:
+ * Driver glue, HWA-specific functions, bridges to WAHC and WUSBHC
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * The HWA driver is a simple layer that forwards requests to the WAHC
+ * (Wire Adater Host Controller) or WUSBHC (Wireless USB Host
+ * Controller) layers.
+ *
+ * Host Wire Adapter is the 'WUSB 1.0 standard' name for Wireless-USB
+ * Host Controller that is connected to your system via USB (a USB
+ * dongle that implements a USB host...). There is also a Device Wired
+ * Adaptor, DWA (Wireless USB hub) that uses the same mechanism for
+ * transferring data (it is after all a USB host connected via
+ * Wireless USB), we have a common layer called Wire Adapter Host
+ * Controller that does all the hard work. The WUSBHC (Wireless USB
+ * Host Controller) is the part common to WUSB Host Controllers, the
+ * HWA and the PCI-based one, that is implemented following the WHCI
+ * spec. All these layers are implemented in ../wusbcore.
+ *
+ * The main functions are hwahc_op_urb_{en,de}queue(), that pass the
+ * job of converting a URB to a Wire Adapter
+ *
+ * Entry points:
+ *
+ *   hwahc_driver_*()   Driver initialization, registration and
+ *                      teardown.
+ *
+ *   hwahc_probe()	New device came up, create an instance for
+ *                      it [from device enumeration].
+ *
+ *   hwahc_disconnect()	Remove device instance [from device
+ *                      enumeration].
+ *
+ *   [__]hwahc_op_*()   Host-Wire-Adaptor specific functions for
+ *                      starting/stopping/etc (some might be made also
+ *                      DWA).
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/wait.h>
+#include <linux/completion.h>
+#include "../wa-hc.h"
+#include "../wusbhc.h"
+
+struct hwahc {
+	struct wusbhc wusbhc;	/* has to be 1st */
+	struct wahc wa;
+};
+
+/*
+ * FIXME should be wusbhc
+ *
+ * NOTE: we need to cache the Cluster ID because later...there is no
+ *       way to get it :)
+ */
+static int __hwahc_set_cluster_id(struct hwahc *hwahc, u8 cluster_id)
+{
+	int result;
+	struct wusbhc *wusbhc = &hwahc->wusbhc;
+	struct wahc *wa = &hwahc->wa;
+	struct device *dev = &wa->usb_iface->dev;
+
+	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_SET_CLUSTER_ID,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			cluster_id,
+			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0)
+		dev_err(dev, "Cannot set WUSB Cluster ID to 0x%02x: %d\n",
+			cluster_id, result);
+	else
+		wusbhc->cluster_id = cluster_id;
+	dev_info(dev, "Wireless USB Cluster ID set to 0x%02x\n", cluster_id);
+	return result;
+}
+
+static int __hwahc_op_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+
+	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_SET_NUM_DNTS,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			interval << 8 | slots,
+			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+}
+
+/*
+ * Reset a WUSB host controller and wait for it to complete doing it.
+ *
+ * @usb_hcd:	Pointer to WUSB Host Controller instance.
+ *
+ */
+static int hwahc_op_reset(struct usb_hcd *usb_hcd)
+{
+	int result;
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct device *dev = &hwahc->wa.usb_iface->dev;
+
+	mutex_lock(&wusbhc->mutex);
+	wa_nep_disarm(&hwahc->wa);
+	result = __wa_set_feature(&hwahc->wa, WA_RESET);
+	if (result < 0) {
+		dev_err(dev, "error commanding HC to reset: %d\n", result);
+		goto error_unlock;
+	}
+	result = __wa_wait_status(&hwahc->wa, WA_STATUS_RESETTING, 0);
+	if (result < 0) {
+		dev_err(dev, "error waiting for HC to reset: %d\n", result);
+		goto error_unlock;
+	}
+error_unlock:
+	mutex_unlock(&wusbhc->mutex);
+	return result;
+}
+
+/*
+ * FIXME: break this function up
+ */
+static int hwahc_op_start(struct usb_hcd *usb_hcd)
+{
+	u8 addr;
+	int result;
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+
+	result = -ENOSPC;
+	mutex_lock(&wusbhc->mutex);
+	addr = wusb_cluster_id_get();
+	if (addr == 0)
+		goto error_cluster_id_get;
+	result = __hwahc_set_cluster_id(hwahc, addr);
+	if (result < 0)
+		goto error_set_cluster_id;
+
+	usb_hcd->uses_new_polling = 1;
+	set_bit(HCD_FLAG_POLL_RH, &usb_hcd->flags);
+	usb_hcd->state = HC_STATE_RUNNING;
+
+	/*
+	 * prevent USB core from suspending the root hub since
+	 * bus_suspend and bus_resume are not yet supported.
+	 */
+	pm_runtime_get_noresume(&usb_hcd->self.root_hub->dev);
+
+	result = 0;
+out:
+	mutex_unlock(&wusbhc->mutex);
+	return result;
+
+error_set_cluster_id:
+	wusb_cluster_id_put(addr);
+error_cluster_id_get:
+	goto out;
+
+}
+
+/*
+ * No need to abort pipes, as when this is called, all the children
+ * has been disconnected and that has done it [through
+ * usb_disable_interface() -> usb_disable_endpoint() ->
+ * hwahc_op_ep_disable() - >rpipe_ep_disable()].
+ */
+static void hwahc_op_stop(struct usb_hcd *usb_hcd)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+
+	mutex_lock(&wusbhc->mutex);
+	wusb_cluster_id_put(wusbhc->cluster_id);
+	mutex_unlock(&wusbhc->mutex);
+}
+
+static int hwahc_op_get_frame_number(struct usb_hcd *usb_hcd)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+
+	/*
+	 * We cannot query the HWA for the WUSB time since that requires sending
+	 * a synchronous URB and this function can be called in_interrupt.
+	 * Instead, query the USB frame number for our parent and use that.
+	 */
+	return usb_get_current_frame_number(wa->usb_dev);
+}
+
+static int hwahc_op_urb_enqueue(struct usb_hcd *usb_hcd, struct urb *urb,
+				gfp_t gfp)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+
+	return wa_urb_enqueue(&hwahc->wa, urb->ep, urb, gfp);
+}
+
+static int hwahc_op_urb_dequeue(struct usb_hcd *usb_hcd, struct urb *urb,
+				int status)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+
+	return wa_urb_dequeue(&hwahc->wa, urb, status);
+}
+
+/*
+ * Release resources allocated for an endpoint
+ *
+ * If there is an associated rpipe to this endpoint, go ahead and put it.
+ */
+static void hwahc_op_endpoint_disable(struct usb_hcd *usb_hcd,
+				      struct usb_host_endpoint *ep)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+
+	rpipe_ep_disable(&hwahc->wa, ep);
+}
+
+static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
+{
+	int result;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct device *dev = &hwahc->wa.usb_iface->dev;
+
+	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error commanding HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
+	if (result < 0) {
+		dev_err(dev, "error waiting for HC to start: %d\n", result);
+		goto error_stop;
+	}
+	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "cannot listen to notifications: %d\n", result);
+		goto error_stop;
+	}
+	/*
+	 * If WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS is set,
+	 *  disable transfer notifications.
+	 */
+	if (hwahc->wa.quirks &
+		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS) {
+		struct usb_host_interface *cur_altsetting =
+			hwahc->wa.usb_iface->cur_altsetting;
+
+		result = usb_control_msg(hwahc->wa.usb_dev,
+				usb_sndctrlpipe(hwahc->wa.usb_dev, 0),
+				WA_REQ_ALEREON_DISABLE_XFER_NOTIFICATIONS,
+				USB_DIR_OUT | USB_TYPE_VENDOR |
+					USB_RECIP_INTERFACE,
+				WA_REQ_ALEREON_FEATURE_SET,
+				cur_altsetting->desc.bInterfaceNumber,
+				NULL, 0,
+				USB_CTRL_SET_TIMEOUT);
+		/*
+		 * If we successfully sent the control message, start DTI here
+		 * because no transfer notifications will be received which is
+		 * where DTI is normally started.
+		 */
+		if (result == 0)
+			result = wa_dti_start(&hwahc->wa);
+		else
+			result = 0;	/* OK.  Continue normally. */
+
+		if (result < 0) {
+			dev_err(dev, "cannot start DTI: %d\n", result);
+			goto error_dti_start;
+		}
+	}
+
+	return result;
+
+error_dti_start:
+	wa_nep_disarm(&hwahc->wa);
+error_stop:
+	__wa_clear_feature(&hwahc->wa, WA_ENABLE);
+	return result;
+}
+
+static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	int ret;
+
+	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			      WUSB_REQ_CHAN_STOP,
+			      USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			      delay * 1000,
+			      iface_no,
+			      NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (ret == 0)
+		msleep(delay);
+
+	wa_nep_disarm(&hwahc->wa);
+	__wa_stop(&hwahc->wa);
+}
+
+/*
+ * Set the UWB MAS allocation for the WUSB cluster
+ *
+ * @stream_index: stream to use (-1 for cancelling the allocation)
+ * @mas: mas bitmap to use
+ */
+static int __hwahc_op_bwa_set(struct wusbhc *wusbhc, s8 stream_index,
+			      const struct uwb_mas_bm *mas)
+{
+	int result;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	struct device *dev = &wa->usb_iface->dev;
+	u8 mas_le[UWB_NUM_MAS/8];
+
+	/* Set the stream index */
+	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_SET_STREAM_IDX,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			stream_index,
+			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Cannot set WUSB stream index: %d\n", result);
+		goto out;
+	}
+	uwb_mas_bm_copy_le(mas_le, mas);
+	/* Set the MAS allocation */
+	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_SET_WUSB_MAS,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			0, wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			mas_le, 32, USB_CTRL_SET_TIMEOUT);
+	if (result < 0)
+		dev_err(dev, "Cannot set WUSB MAS allocation: %d\n", result);
+out:
+	return result;
+}
+
+/*
+ * Add an IE to the host's MMC
+ *
+ * @interval:    See WUSB1.0[8.5.3.1]
+ * @repeat_cnt:  See WUSB1.0[8.5.3.1]
+ * @handle:      See WUSB1.0[8.5.3.1]
+ * @wuie:        Pointer to the header of the WUSB IE data to add.
+ *               MUST BE allocated in a kmalloc buffer (no stack or
+ *               vmalloc).
+ *
+ * NOTE: the format of the WUSB IEs for MMCs are different to the
+ *       normal MBOA MAC IEs (IE Id + Length in MBOA MAC vs. Length +
+ *       Id in WUSB IEs). Standards...you gotta love'em.
+ */
+static int __hwahc_op_mmcie_add(struct wusbhc *wusbhc, u8 interval,
+				u8 repeat_cnt, u8 handle,
+				struct wuie_hdr *wuie)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+
+	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_ADD_MMC_IE,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			interval << 8 | repeat_cnt,
+			handle << 8 | iface_no,
+			wuie, wuie->bLength, USB_CTRL_SET_TIMEOUT);
+}
+
+/*
+ * Remove an IE to the host's MMC
+ *
+ * @handle:      See WUSB1.0[8.5.3.1]
+ */
+static int __hwahc_op_mmcie_rm(struct wusbhc *wusbhc, u8 handle)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_REMOVE_MMC_IE,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			0, handle << 8 | iface_no,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+}
+
+/*
+ * Update device information for a given fake port
+ *
+ * @port_idx: Fake port to which device is connected (wusbhc index, not
+ *            USB port number).
+ */
+static int __hwahc_op_dev_info_set(struct wusbhc *wusbhc,
+				   struct wusb_dev *wusb_dev)
+{
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	struct hwa_dev_info *dev_info;
+	int ret;
+
+	/* fill out the Device Info buffer and send it */
+	dev_info = kzalloc(sizeof(struct hwa_dev_info), GFP_KERNEL);
+	if (!dev_info)
+		return -ENOMEM;
+	uwb_mas_bm_copy_le(dev_info->bmDeviceAvailability,
+			   &wusb_dev->availability);
+	dev_info->bDeviceAddress = wusb_dev->addr;
+
+	/*
+	 * If the descriptors haven't been read yet, use a default PHY
+	 * rate of 53.3 Mbit/s only.  The correct value will be used
+	 * when this will be called again as part of the
+	 * authentication process (which occurs after the descriptors
+	 * have been read).
+	 */
+	if (wusb_dev->wusb_cap_descr)
+		dev_info->wPHYRates = wusb_dev->wusb_cap_descr->wPHYRates;
+	else
+		dev_info->wPHYRates = cpu_to_le16(USB_WIRELESS_PHY_53);
+
+	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			WUSB_REQ_SET_DEV_INFO,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			0, wusb_dev->port_idx << 8 | iface_no,
+			dev_info, sizeof(struct hwa_dev_info),
+			USB_CTRL_SET_TIMEOUT);
+	kfree(dev_info);
+	return ret;
+}
+
+/*
+ * Set host's idea of which encryption (and key) method to use when
+ * talking to ad evice on a given port.
+ *
+ * If key is NULL, it means disable encryption for that "virtual port"
+ * (used when we disconnect).
+ */
+static int __hwahc_dev_set_key(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
+			       const void *key, size_t key_size,
+			       u8 key_idx)
+{
+	int result = -ENOMEM;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	struct usb_key_descriptor *keyd;
+	size_t keyd_len;
+
+	keyd_len = sizeof(*keyd) + key_size;
+	keyd = kzalloc(keyd_len, GFP_KERNEL);
+	if (keyd == NULL)
+		return -ENOMEM;
+
+	keyd->bLength = keyd_len;
+	keyd->bDescriptorType = USB_DT_KEY;
+	keyd->tTKID[0] = (tkid >>  0) & 0xff;
+	keyd->tTKID[1] = (tkid >>  8) & 0xff;
+	keyd->tTKID[2] = (tkid >> 16) & 0xff;
+	memcpy(keyd->bKeyData, key, key_size);
+
+	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			USB_REQ_SET_DESCRIPTOR,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			USB_DT_KEY << 8 | key_idx,
+			port_idx << 8 | iface_no,
+			keyd, keyd_len, USB_CTRL_SET_TIMEOUT);
+
+	kzfree(keyd); /* clear keys etc. */
+	return result;
+}
+
+/*
+ * Set host's idea of which encryption (and key) method to use when
+ * talking to ad evice on a given port.
+ *
+ * If key is NULL, it means disable encryption for that "virtual port"
+ * (used when we disconnect).
+ */
+static int __hwahc_op_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
+			      const void *key, size_t key_size)
+{
+	int result = -ENOMEM;
+	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	struct wahc *wa = &hwahc->wa;
+	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
+	u8 encryption_value;
+
+	/* Tell the host which key to use to talk to the device */
+	if (key) {
+		u8 key_idx = wusb_key_index(0, WUSB_KEY_INDEX_TYPE_PTK,
+					    WUSB_KEY_INDEX_ORIGINATOR_HOST);
+
+		result = __hwahc_dev_set_key(wusbhc, port_idx, tkid,
+					     key, key_size, key_idx);
+		if (result < 0)
+			goto error_set_key;
+		encryption_value = wusbhc->ccm1_etd->bEncryptionValue;
+	} else {
+		/* FIXME: this should come from wusbhc->etd[UNSECURE].value */
+		encryption_value = 0;
+	}
+
+	/* Set the encryption type for communicating with the device */
+	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			USB_REQ_SET_ENCRYPTION,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			encryption_value, port_idx << 8 | iface_no,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0)
+		dev_err(wusbhc->dev, "Can't set host's WUSB encryption for "
+			"port index %u to %s (value %d): %d\n", port_idx,
+			wusb_et_name(wusbhc->ccm1_etd->bEncryptionType),
+			wusbhc->ccm1_etd->bEncryptionValue, result);
+error_set_key:
+	return result;
+}
+
+/*
+ * Set host's GTK key
+ */
+static int __hwahc_op_set_gtk(struct wusbhc *wusbhc, u32 tkid,
+			      const void *key, size_t key_size)
+{
+	u8 key_idx = wusb_key_index(0, WUSB_KEY_INDEX_TYPE_GTK,
+				    WUSB_KEY_INDEX_ORIGINATOR_HOST);
+
+	return __hwahc_dev_set_key(wusbhc, 0, tkid, key, key_size, key_idx);
+}
+
+/*
+ * Get the Wire Adapter class-specific descriptor
+ *
+ * NOTE: this descriptor comes with the big bundled configuration
+ *       descriptor that includes the interfaces' and endpoints', so
+ *       we just look for it in the cached copy kept by the USB stack.
+ *
+ * NOTE2: We convert LE fields to CPU order.
+ */
+static int wa_fill_descr(struct wahc *wa)
+{
+	int result;
+	struct device *dev = &wa->usb_iface->dev;
+	char *itr;
+	struct usb_device *usb_dev = wa->usb_dev;
+	struct usb_descriptor_header *hdr;
+	struct usb_wa_descriptor *wa_descr;
+	size_t itr_size, actconfig_idx;
+
+	actconfig_idx = (usb_dev->actconfig - usb_dev->config) /
+			sizeof(usb_dev->config[0]);
+	itr = usb_dev->rawdescriptors[actconfig_idx];
+	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
+	while (itr_size >= sizeof(*hdr)) {
+		hdr = (struct usb_descriptor_header *) itr;
+		dev_dbg(dev, "Extra device descriptor: "
+			"type %02x/%u bytes @ %zu (%zu left)\n",
+			hdr->bDescriptorType, hdr->bLength,
+			(itr - usb_dev->rawdescriptors[actconfig_idx]),
+			itr_size);
+		if (hdr->bDescriptorType == USB_DT_WIRE_ADAPTER)
+			goto found;
+		itr += hdr->bLength;
+		itr_size -= hdr->bLength;
+	}
+	dev_err(dev, "cannot find Wire Adapter Class descriptor\n");
+	return -ENODEV;
+
+found:
+	result = -EINVAL;
+	if (hdr->bLength > itr_size) {	/* is it available? */
+		dev_err(dev, "incomplete Wire Adapter Class descriptor "
+			"(%zu bytes left, %u needed)\n",
+			itr_size, hdr->bLength);
+		goto error;
+	}
+	if (hdr->bLength < sizeof(*wa->wa_descr)) {
+		dev_err(dev, "short Wire Adapter Class descriptor\n");
+		goto error;
+	}
+	wa->wa_descr = wa_descr = (struct usb_wa_descriptor *) hdr;
+	if (le16_to_cpu(wa_descr->bcdWAVersion) > 0x0100)
+		dev_warn(dev, "Wire Adapter v%d.%d newer than groked v1.0\n",
+			 (le16_to_cpu(wa_descr->bcdWAVersion) & 0xff00) >> 8,
+			 le16_to_cpu(wa_descr->bcdWAVersion) & 0x00ff);
+	result = 0;
+error:
+	return result;
+}
+
+static const struct hc_driver hwahc_hc_driver = {
+	.description = "hwa-hcd",
+	.product_desc = "Wireless USB HWA host controller",
+	.hcd_priv_size = sizeof(struct hwahc) - sizeof(struct usb_hcd),
+	.irq = NULL,			/* FIXME */
+	.flags = HCD_USB25,
+	.reset = hwahc_op_reset,
+	.start = hwahc_op_start,
+	.stop = hwahc_op_stop,
+	.get_frame_number = hwahc_op_get_frame_number,
+	.urb_enqueue = hwahc_op_urb_enqueue,
+	.urb_dequeue = hwahc_op_urb_dequeue,
+	.endpoint_disable = hwahc_op_endpoint_disable,
+
+	.hub_status_data = wusbhc_rh_status_data,
+	.hub_control = wusbhc_rh_control,
+	.start_port_reset = wusbhc_rh_start_port_reset,
+};
+
+static int hwahc_security_create(struct hwahc *hwahc)
+{
+	int result;
+	struct wusbhc *wusbhc = &hwahc->wusbhc;
+	struct usb_device *usb_dev = hwahc->wa.usb_dev;
+	struct device *dev = &usb_dev->dev;
+	struct usb_security_descriptor *secd;
+	struct usb_encryption_descriptor *etd;
+	void *itr, *top;
+	size_t itr_size, needed, bytes;
+	u8 index;
+	char buf[64];
+
+	/* Find the host's security descriptors in the config descr bundle */
+	index = (usb_dev->actconfig - usb_dev->config) /
+		sizeof(usb_dev->config[0]);
+	itr = usb_dev->rawdescriptors[index];
+	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
+	top = itr + itr_size;
+	result = __usb_get_extra_descriptor(usb_dev->rawdescriptors[index],
+			le16_to_cpu(usb_dev->actconfig->desc.wTotalLength),
+			USB_DT_SECURITY, (void **) &secd, sizeof(*secd));
+	if (result == -1) {
+		dev_warn(dev, "BUG? WUSB host has no security descriptors\n");
+		return 0;
+	}
+	needed = sizeof(*secd);
+	if (top - (void *)secd < needed) {
+		dev_err(dev, "BUG? Not enough data to process security "
+			"descriptor header (%zu bytes left vs %zu needed)\n",
+			top - (void *) secd, needed);
+		return 0;
+	}
+	needed = le16_to_cpu(secd->wTotalLength);
+	if (top - (void *)secd < needed) {
+		dev_err(dev, "BUG? Not enough data to process security "
+			"descriptors (%zu bytes left vs %zu needed)\n",
+			top - (void *) secd, needed);
+		return 0;
+	}
+	/* Walk over the sec descriptors and store CCM1's on wusbhc */
+	itr = (void *) secd + sizeof(*secd);
+	top = (void *) secd + le16_to_cpu(secd->wTotalLength);
+	index = 0;
+	bytes = 0;
+	while (itr < top) {
+		etd = itr;
+		if (top - itr < sizeof(*etd)) {
+			dev_err(dev, "BUG: bad host security descriptor; "
+				"not enough data (%zu vs %zu left)\n",
+				top - itr, sizeof(*etd));
+			break;
+		}
+		if (etd->bLength < sizeof(*etd)) {
+			dev_err(dev, "BUG: bad host encryption descriptor; "
+				"descriptor is too short "
+				"(%zu vs %zu needed)\n",
+				(size_t)etd->bLength, sizeof(*etd));
+			break;
+		}
+		itr += etd->bLength;
+		bytes += snprintf(buf + bytes, sizeof(buf) - bytes,
+				  "%s (0x%02x) ",
+				  wusb_et_name(etd->bEncryptionType),
+				  etd->bEncryptionValue);
+		wusbhc->ccm1_etd = etd;
+	}
+	dev_info(dev, "supported encryption types: %s\n", buf);
+	if (wusbhc->ccm1_etd == NULL) {
+		dev_err(dev, "E: host doesn't support CCM-1 crypto\n");
+		return 0;
+	}
+	/* Pretty print what we support */
+	return 0;
+}
+
+static void hwahc_security_release(struct hwahc *hwahc)
+{
+	/* nothing to do here so far... */
+}
+
+static int hwahc_create(struct hwahc *hwahc, struct usb_interface *iface,
+	kernel_ulong_t quirks)
+{
+	int result;
+	struct device *dev = &iface->dev;
+	struct wusbhc *wusbhc = &hwahc->wusbhc;
+	struct wahc *wa = &hwahc->wa;
+	struct usb_device *usb_dev = interface_to_usbdev(iface);
+
+	wa->usb_dev = usb_get_dev(usb_dev);	/* bind the USB device */
+	wa->usb_iface = usb_get_intf(iface);
+	wusbhc->dev = dev;
+	/* defer getting the uwb_rc handle until it is needed since it
+	 * may not have been registered by the hwa_rc driver yet. */
+	wusbhc->uwb_rc = NULL;
+	result = wa_fill_descr(wa);	/* Get the device descriptor */
+	if (result < 0)
+		goto error_fill_descriptor;
+	if (wa->wa_descr->bNumPorts > USB_MAXCHILDREN) {
+		dev_err(dev, "FIXME: USB_MAXCHILDREN too low for WUSB "
+			"adapter (%u ports)\n", wa->wa_descr->bNumPorts);
+		wusbhc->ports_max = USB_MAXCHILDREN;
+	} else {
+		wusbhc->ports_max = wa->wa_descr->bNumPorts;
+	}
+	wusbhc->mmcies_max = wa->wa_descr->bNumMMCIEs;
+	wusbhc->start = __hwahc_op_wusbhc_start;
+	wusbhc->stop = __hwahc_op_wusbhc_stop;
+	wusbhc->mmcie_add = __hwahc_op_mmcie_add;
+	wusbhc->mmcie_rm = __hwahc_op_mmcie_rm;
+	wusbhc->dev_info_set = __hwahc_op_dev_info_set;
+	wusbhc->bwa_set = __hwahc_op_bwa_set;
+	wusbhc->set_num_dnts = __hwahc_op_set_num_dnts;
+	wusbhc->set_ptk = __hwahc_op_set_ptk;
+	wusbhc->set_gtk = __hwahc_op_set_gtk;
+	result = hwahc_security_create(hwahc);
+	if (result < 0) {
+		dev_err(dev, "Can't initialize security: %d\n", result);
+		goto error_security_create;
+	}
+	wa->wusb = wusbhc;	/* FIXME: ugly, need to fix */
+	result = wusbhc_create(&hwahc->wusbhc);
+	if (result < 0) {
+		dev_err(dev, "Can't create WUSB HC structures: %d\n", result);
+		goto error_wusbhc_create;
+	}
+	result = wa_create(&hwahc->wa, iface, quirks);
+	if (result < 0)
+		goto error_wa_create;
+	return 0;
+
+error_wa_create:
+	wusbhc_destroy(&hwahc->wusbhc);
+error_wusbhc_create:
+	/* WA Descr fill allocs no resources */
+error_security_create:
+error_fill_descriptor:
+	usb_put_intf(iface);
+	usb_put_dev(usb_dev);
+	return result;
+}
+
+static void hwahc_destroy(struct hwahc *hwahc)
+{
+	struct wusbhc *wusbhc = &hwahc->wusbhc;
+
+	mutex_lock(&wusbhc->mutex);
+	__wa_destroy(&hwahc->wa);
+	wusbhc_destroy(&hwahc->wusbhc);
+	hwahc_security_release(hwahc);
+	hwahc->wusbhc.dev = NULL;
+	uwb_rc_put(wusbhc->uwb_rc);
+	usb_put_intf(hwahc->wa.usb_iface);
+	usb_put_dev(hwahc->wa.usb_dev);
+	mutex_unlock(&wusbhc->mutex);
+}
+
+static void hwahc_init(struct hwahc *hwahc)
+{
+	wa_init(&hwahc->wa);
+}
+
+static int hwahc_probe(struct usb_interface *usb_iface,
+		       const struct usb_device_id *id)
+{
+	int result;
+	struct usb_hcd *usb_hcd;
+	struct wusbhc *wusbhc;
+	struct hwahc *hwahc;
+	struct device *dev = &usb_iface->dev;
+
+	result = -ENOMEM;
+	usb_hcd = usb_create_hcd(&hwahc_hc_driver, &usb_iface->dev, "wusb-hwa");
+	if (usb_hcd == NULL) {
+		dev_err(dev, "unable to allocate instance\n");
+		goto error_alloc;
+	}
+	usb_hcd->wireless = 1;
+	usb_hcd->self.sg_tablesize = ~0;
+	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+	hwahc_init(hwahc);
+	result = hwahc_create(hwahc, usb_iface, id->driver_info);
+	if (result < 0) {
+		dev_err(dev, "Cannot initialize internals: %d\n", result);
+		goto error_hwahc_create;
+	}
+	result = usb_add_hcd(usb_hcd, 0, 0);
+	if (result < 0) {
+		dev_err(dev, "Cannot add HCD: %d\n", result);
+		goto error_add_hcd;
+	}
+	device_wakeup_enable(usb_hcd->self.controller);
+	result = wusbhc_b_create(&hwahc->wusbhc);
+	if (result < 0) {
+		dev_err(dev, "Cannot setup phase B of WUSBHC: %d\n", result);
+		goto error_wusbhc_b_create;
+	}
+	return 0;
+
+error_wusbhc_b_create:
+	usb_remove_hcd(usb_hcd);
+error_add_hcd:
+	hwahc_destroy(hwahc);
+error_hwahc_create:
+	usb_put_hcd(usb_hcd);
+error_alloc:
+	return result;
+}
+
+static void hwahc_disconnect(struct usb_interface *usb_iface)
+{
+	struct usb_hcd *usb_hcd;
+	struct wusbhc *wusbhc;
+	struct hwahc *hwahc;
+
+	usb_hcd = usb_get_intfdata(usb_iface);
+	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
+
+	wusbhc_b_destroy(&hwahc->wusbhc);
+	usb_remove_hcd(usb_hcd);
+	hwahc_destroy(hwahc);
+	usb_put_hcd(usb_hcd);
+}
+
+static const struct usb_device_id hwahc_id_table[] = {
+	/* Alereon 5310 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5310, 0xe0, 0x02, 0x01),
+	  .driver_info = WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC |
+		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS },
+	/* Alereon 5611 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5611, 0xe0, 0x02, 0x01),
+	  .driver_info = WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC |
+		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS },
+	/* FIXME: use class labels for this */
+	{ USB_INTERFACE_INFO(0xe0, 0x02, 0x01), },
+	{},
+};
+MODULE_DEVICE_TABLE(usb, hwahc_id_table);
+
+static struct usb_driver hwahc_driver = {
+	.name =		"hwa-hc",
+	.probe =	hwahc_probe,
+	.disconnect =	hwahc_disconnect,
+	.id_table =	hwahc_id_table,
+};
+
+module_usb_driver(hwahc_driver);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Host Wired Adapter USB Host Control Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/host/whci/Makefile b/drivers/staging/wusbcore/host/whci/Makefile
new file mode 100644
index 000000000000..859d20079df6
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_USB_WHCI_HCD) += whci-hcd.o
+
+whci-hcd-y := \
+	asl.o	\
+	debug.o \
+	hcd.o	\
+	hw.o	\
+	init.o	\
+	int.o	\
+	pzl.o	\
+	qset.o	\
+	wusb.o
diff --git a/drivers/staging/wusbcore/host/whci/asl.c b/drivers/staging/wusbcore/host/whci/asl.c
new file mode 100644
index 000000000000..a2b9a50cfb80
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/asl.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) asynchronous schedule management.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
+#include <linux/usb.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset,
+			       struct whc_qset **next, struct whc_qset **prev)
+{
+	struct list_head *n, *p;
+
+	BUG_ON(list_empty(&whc->async_list));
+
+	n = qset->list_node.next;
+	if (n == &whc->async_list)
+		n = n->next;
+	p = qset->list_node.prev;
+	if (p == &whc->async_list)
+		p = p->prev;
+
+	*next = container_of(n, struct whc_qset, list_node);
+	*prev = container_of(p, struct whc_qset, list_node);
+
+}
+
+static void asl_qset_insert_begin(struct whc *whc, struct whc_qset *qset)
+{
+	list_move(&qset->list_node, &whc->async_list);
+	qset->in_sw_list = true;
+}
+
+static void asl_qset_insert(struct whc *whc, struct whc_qset *qset)
+{
+	struct whc_qset *next, *prev;
+
+	qset_clear(whc, qset);
+
+	/* Link into ASL. */
+	qset_get_next_prev(whc, qset, &next, &prev);
+	whc_qset_set_link_ptr(&qset->qh.link, next->qset_dma);
+	whc_qset_set_link_ptr(&prev->qh.link, qset->qset_dma);
+	qset->in_hw_list = true;
+}
+
+static void asl_qset_remove(struct whc *whc, struct whc_qset *qset)
+{
+	struct whc_qset *prev, *next;
+
+	qset_get_next_prev(whc, qset, &next, &prev);
+
+	list_move(&qset->list_node, &whc->async_removed_list);
+	qset->in_sw_list = false;
+
+	/*
+	 * No more qsets in the ASL?  The caller must stop the ASL as
+	 * it's no longer valid.
+	 */
+	if (list_empty(&whc->async_list))
+		return;
+
+	/* Remove from ASL. */
+	whc_qset_set_link_ptr(&prev->qh.link, next->qset_dma);
+	qset->in_hw_list = false;
+}
+
+/**
+ * process_qset - process any recently inactivated or halted qTDs in a
+ * qset.
+ *
+ * After inactive qTDs are removed, new qTDs can be added if the
+ * urb queue still contains URBs.
+ *
+ * Returns any additional WUSBCMD bits for the ASL sync command (i.e.,
+ * WUSBCMD_ASYNC_QSET_RM if a halted qset was removed).
+ */
+static uint32_t process_qset(struct whc *whc, struct whc_qset *qset)
+{
+	enum whc_update update = 0;
+	uint32_t status = 0;
+
+	while (qset->ntds) {
+		struct whc_qtd *td;
+
+		td = &qset->qtd[qset->td_start];
+		status = le32_to_cpu(td->status);
+
+		/*
+		 * Nothing to do with a still active qTD.
+		 */
+		if (status & QTD_STS_ACTIVE)
+			break;
+
+		if (status & QTD_STS_HALTED) {
+			/* Ug, an error. */
+			process_halted_qtd(whc, qset, td);
+			/* A halted qTD always triggers an update
+			   because the qset was either removed or
+			   reactivated. */
+			update |= WHC_UPDATE_UPDATED;
+			goto done;
+		}
+
+		/* Mmm, a completed qTD. */
+		process_inactive_qtd(whc, qset, td);
+	}
+
+	if (!qset->remove)
+		update |= qset_add_qtds(whc, qset);
+
+done:
+	/*
+	 * Remove this qset from the ASL if requested, but only if has
+	 * no qTDs.
+	 */
+	if (qset->remove && qset->ntds == 0) {
+		asl_qset_remove(whc, qset);
+		update |= WHC_UPDATE_REMOVED;
+	}
+	return update;
+}
+
+void asl_start(struct whc *whc)
+{
+	struct whc_qset *qset;
+
+	qset = list_first_entry(&whc->async_list, struct whc_qset, list_node);
+
+	le_writeq(qset->qset_dma | QH_LINK_NTDS(8), whc->base + WUSBASYNCLISTADDR);
+
+	whc_write_wusbcmd(whc, WUSBCMD_ASYNC_EN, WUSBCMD_ASYNC_EN);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
+		      WUSBSTS_ASYNC_SCHED, WUSBSTS_ASYNC_SCHED,
+		      1000, "start ASL");
+}
+
+void asl_stop(struct whc *whc)
+{
+	whc_write_wusbcmd(whc, WUSBCMD_ASYNC_EN, 0);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
+		      WUSBSTS_ASYNC_SCHED, 0,
+		      1000, "stop ASL");
+}
+
+/**
+ * asl_update - request an ASL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the ASL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
+void asl_update(struct whc *whc, uint32_t wusbcmd)
+{
+	struct wusbhc *wusbhc = &whc->wusbhc;
+	long t;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		t = wait_event_timeout(
+			whc->async_list_wq,
+			(le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0,
+			msecs_to_jiffies(1000));
+		if (t == 0)
+			whc_hw_error(whc, "ASL update timeout");
+	}
+	mutex_unlock(&wusbhc->mutex);
+}
+
+/**
+ * scan_async_work - scan the ASL for qsets to process.
+ *
+ * Process each qset in the ASL in turn and then signal the WHC that
+ * the ASL has been updated.
+ *
+ * Then start, stop or update the asynchronous schedule as required.
+ */
+void scan_async_work(struct work_struct *work)
+{
+	struct whc *whc = container_of(work, struct whc, async_work);
+	struct whc_qset *qset, *t;
+	enum whc_update update = 0;
+
+	spin_lock_irq(&whc->lock);
+
+	/*
+	 * Transerve the software list backwards so new qsets can be
+	 * safely inserted into the ASL without making it non-circular.
+	 */
+	list_for_each_entry_safe_reverse(qset, t, &whc->async_list, list_node) {
+		if (!qset->in_hw_list) {
+			asl_qset_insert(whc, qset);
+			update |= WHC_UPDATE_ADDED;
+		}
+
+		update |= process_qset(whc, qset);
+	}
+
+	spin_unlock_irq(&whc->lock);
+
+	if (update) {
+		uint32_t wusbcmd = WUSBCMD_ASYNC_UPDATED | WUSBCMD_ASYNC_SYNCED_DB;
+		if (update & WHC_UPDATE_REMOVED)
+			wusbcmd |= WUSBCMD_ASYNC_QSET_RM;
+		asl_update(whc, wusbcmd);
+	}
+
+	/*
+	 * Now that the ASL is updated, complete the removal of any
+	 * removed qsets.
+	 *
+	 * If the qset was to be reset, do so and reinsert it into the
+	 * ASL if it has pending transfers.
+	 */
+	spin_lock_irq(&whc->lock);
+
+	list_for_each_entry_safe(qset, t, &whc->async_removed_list, list_node) {
+		qset_remove_complete(whc, qset);
+		if (qset->reset) {
+			qset_reset(whc, qset);
+			if (!list_empty(&qset->stds)) {
+				asl_qset_insert_begin(whc, qset);
+				queue_work(whc->workqueue, &whc->async_work);
+			}
+		}
+	}
+
+	spin_unlock_irq(&whc->lock);
+}
+
+/**
+ * asl_urb_enqueue - queue an URB onto the asynchronous list (ASL).
+ * @whc: the WHCI host controller
+ * @urb: the URB to enqueue
+ * @mem_flags: flags for any memory allocations
+ *
+ * The qset for the endpoint is obtained and the urb queued on to it.
+ *
+ * Work is scheduled to update the hardware's view of the ASL.
+ */
+int asl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
+{
+	struct whc_qset *qset;
+	int err;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	err = usb_hcd_link_urb_to_ep(&whc->wusbhc.usb_hcd, urb);
+	if (err < 0) {
+		spin_unlock_irqrestore(&whc->lock, flags);
+		return err;
+	}
+
+	qset = get_qset(whc, urb, GFP_ATOMIC);
+	if (qset == NULL)
+		err = -ENOMEM;
+	else
+		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
+	if (!err) {
+		if (!qset->in_sw_list && !qset->remove)
+			asl_qset_insert_begin(whc, qset);
+	} else
+		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
+
+	spin_unlock_irqrestore(&whc->lock, flags);
+
+	if (!err)
+		queue_work(whc->workqueue, &whc->async_work);
+
+	return err;
+}
+
+/**
+ * asl_urb_dequeue - remove an URB (qset) from the async list.
+ * @whc: the WHCI host controller
+ * @urb: the URB to dequeue
+ * @status: the current status of the URB
+ *
+ * URBs that do yet have qTDs can simply be removed from the software
+ * queue, otherwise the qset must be removed from the ASL so the qTDs
+ * can be removed.
+ */
+int asl_urb_dequeue(struct whc *whc, struct urb *urb, int status)
+{
+	struct whc_urb *wurb = urb->hcpriv;
+	struct whc_qset *qset = wurb->qset;
+	struct whc_std *std, *t;
+	bool has_qtd = false;
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	ret = usb_hcd_check_unlink_urb(&whc->wusbhc.usb_hcd, urb, status);
+	if (ret < 0)
+		goto out;
+
+	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
+		if (std->urb == urb) {
+			if (std->qtd)
+				has_qtd = true;
+			qset_free_std(whc, std);
+		} else
+			std->qtd = NULL; /* so this std is re-added when the qset is */
+	}
+
+	if (has_qtd) {
+		asl_qset_remove(whc, qset);
+		wurb->status = status;
+		wurb->is_async = true;
+		queue_work(whc->workqueue, &wurb->dequeue_work);
+	} else
+		qset_remove_urb(whc, qset, urb, status);
+out:
+	spin_unlock_irqrestore(&whc->lock, flags);
+
+	return ret;
+}
+
+/**
+ * asl_qset_delete - delete a qset from the ASL
+ */
+void asl_qset_delete(struct whc *whc, struct whc_qset *qset)
+{
+	qset->remove = 1;
+	queue_work(whc->workqueue, &whc->async_work);
+	qset_delete(whc, qset);
+}
+
+/**
+ * asl_init - initialize the asynchronous schedule list
+ *
+ * A dummy qset with no qTDs is added to the ASL to simplify removing
+ * qsets (no need to stop the ASL when the last qset is removed).
+ */
+int asl_init(struct whc *whc)
+{
+	struct whc_qset *qset;
+
+	qset = qset_alloc(whc, GFP_KERNEL);
+	if (qset == NULL)
+		return -ENOMEM;
+
+	asl_qset_insert_begin(whc, qset);
+	asl_qset_insert(whc, qset);
+
+	return 0;
+}
+
+/**
+ * asl_clean_up - free ASL resources
+ *
+ * The ASL is stopped and empty except for the dummy qset.
+ */
+void asl_clean_up(struct whc *whc)
+{
+	struct whc_qset *qset;
+
+	if (!list_empty(&whc->async_list)) {
+		qset = list_first_entry(&whc->async_list, struct whc_qset, list_node);
+		list_del(&qset->list_node);
+		qset_free(whc, qset);
+	}
+}
diff --git a/drivers/staging/wusbcore/host/whci/debug.c b/drivers/staging/wusbcore/host/whci/debug.c
new file mode 100644
index 000000000000..443da6719147
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/debug.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) debug.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+struct whc_dbg {
+	struct dentry *di_f;
+	struct dentry *asl_f;
+	struct dentry *pzl_f;
+};
+
+static void qset_print(struct seq_file *s, struct whc_qset *qset)
+{
+	static const char *qh_type[] = {
+		"ctrl", "isoc", "bulk", "intr", "rsvd", "rsvd", "rsvd", "lpintr", };
+	struct whc_std *std;
+	struct urb *urb = NULL;
+	int i;
+
+	seq_printf(s, "qset %08x", (u32)qset->qset_dma);
+	if (&qset->list_node == qset->whc->async_list.prev) {
+		seq_printf(s, " (dummy)\n");
+	} else {
+		seq_printf(s, " ep%d%s-%s maxpkt: %d\n",
+			   qset->qh.info1 & 0x0f,
+			   (qset->qh.info1 >> 4) & 0x1 ? "in" : "out",
+			   qh_type[(qset->qh.info1 >> 5) & 0x7],
+			   (qset->qh.info1 >> 16) & 0xffff);
+	}
+	seq_printf(s, "  -> %08x\n", (u32)qset->qh.link);
+	seq_printf(s, "  info: %08x %08x %08x\n",
+		   qset->qh.info1, qset->qh.info2,  qset->qh.info3);
+	seq_printf(s, "  sts: %04x errs: %d curwin: %08x\n",
+		   qset->qh.status, qset->qh.err_count, qset->qh.cur_window);
+	seq_printf(s, "  TD: sts: %08x opts: %08x\n",
+		   qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
+
+	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
+		seq_printf(s, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
+			i == qset->td_start ? 'S' : ' ',
+			i == qset->td_end ? 'E' : ' ',
+			i, qset->qtd[i].status, qset->qtd[i].options,
+			(u32)qset->qtd[i].page_list_ptr);
+	}
+	seq_printf(s, "  ntds: %d\n", qset->ntds);
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (urb != std->urb) {
+			urb = std->urb;
+			seq_printf(s, "  urb %p transferred: %d bytes\n", urb,
+				urb->actual_length);
+		}
+		if (std->qtd)
+			seq_printf(s, "    sTD[%td]: %zu bytes @ %08x\n",
+				std->qtd - &qset->qtd[0],
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+		else
+			seq_printf(s, "    sTD[-]: %zd bytes @ %08x\n",
+				std->len, std->num_pointers ?
+				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
+	}
+}
+
+static int di_show(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	int d;
+
+	for (d = 0; d < whc->n_devices; d++) {
+		struct di_buf_entry *di = &whc->di_buf[d];
+
+		seq_printf(s, "DI[%d]\n", d);
+		seq_printf(s, "  availability: %*pb\n",
+			   UWB_NUM_MAS, (unsigned long *)di->availability_info);
+		seq_printf(s, "  %c%c key idx: %d dev addr: %d\n",
+			   (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
+			   (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
+			   (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
+			   (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
+	}
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(di);
+
+static int asl_show(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+
+	list_for_each_entry(qset, &whc->async_list, list_node) {
+		qset_print(s, qset);
+	}
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(asl);
+
+static int pzl_show(struct seq_file *s, void *p)
+{
+	struct whc *whc = s->private;
+	struct whc_qset *qset;
+	int period;
+
+	for (period = 0; period < 5; period++) {
+		seq_printf(s, "Period %d\n", period);
+		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
+			qset_print(s, qset);
+		}
+	}
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(pzl);
+
+void whc_dbg_init(struct whc *whc)
+{
+	if (whc->wusbhc.pal.debugfs_dir == NULL)
+		return;
+
+	whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL);
+	if (whc->dbg == NULL)
+		return;
+
+	whc->dbg->di_f = debugfs_create_file("di", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &di_fops);
+	whc->dbg->asl_f = debugfs_create_file("asl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &asl_fops);
+	whc->dbg->pzl_f = debugfs_create_file("pzl", 0444,
+					      whc->wusbhc.pal.debugfs_dir, whc,
+					      &pzl_fops);
+}
+
+void whc_dbg_clean_up(struct whc *whc)
+{
+	if (whc->dbg) {
+		debugfs_remove(whc->dbg->pzl_f);
+		debugfs_remove(whc->dbg->asl_f);
+		debugfs_remove(whc->dbg->di_f);
+		kfree(whc->dbg);
+	}
+}
diff --git a/drivers/staging/wusbcore/host/whci/hcd.c b/drivers/staging/wusbcore/host/whci/hcd.c
new file mode 100644
index 000000000000..bee1ff2d35be
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/hcd.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) driver.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+/*
+ * One time initialization.
+ *
+ * Nothing to do here.
+ */
+static int whc_reset(struct usb_hcd *usb_hcd)
+{
+	return 0;
+}
+
+/*
+ * Start the wireless host controller.
+ *
+ * Start device notification.
+ *
+ * Put hc into run state, set DNTS parameters.
+ */
+static int whc_start(struct usb_hcd *usb_hcd)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u8 bcid;
+	int ret;
+
+	mutex_lock(&wusbhc->mutex);
+
+	le_writel(WUSBINTR_GEN_CMD_DONE
+		  | WUSBINTR_HOST_ERR
+		  | WUSBINTR_ASYNC_SCHED_SYNCED
+		  | WUSBINTR_DNTS_INT
+		  | WUSBINTR_ERR_INT
+		  | WUSBINTR_INT,
+		  whc->base + WUSBINTR);
+
+	/* set cluster ID */
+	bcid = wusb_cluster_id_get();
+	ret = whc_set_cluster_id(whc, bcid);
+	if (ret < 0)
+		goto out;
+	wusbhc->cluster_id = bcid;
+
+	/* start HC */
+	whc_write_wusbcmd(whc, WUSBCMD_RUN, WUSBCMD_RUN);
+
+	usb_hcd->uses_new_polling = 1;
+	set_bit(HCD_FLAG_POLL_RH, &usb_hcd->flags);
+	usb_hcd->state = HC_STATE_RUNNING;
+
+out:
+	mutex_unlock(&wusbhc->mutex);
+	return ret;
+}
+
+
+/*
+ * Stop the wireless host controller.
+ *
+ * Stop device notification.
+ *
+ * Wait for pending transfer to stop? Put hc into stop state?
+ */
+static void whc_stop(struct usb_hcd *usb_hcd)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+
+	mutex_lock(&wusbhc->mutex);
+
+	/* stop HC */
+	le_writel(0, whc->base + WUSBINTR);
+	whc_write_wusbcmd(whc, WUSBCMD_RUN, 0);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
+		      WUSBSTS_HCHALTED, WUSBSTS_HCHALTED,
+		      100, "HC to halt");
+
+	wusb_cluster_id_put(wusbhc->cluster_id);
+
+	mutex_unlock(&wusbhc->mutex);
+}
+
+static int whc_get_frame_number(struct usb_hcd *usb_hcd)
+{
+	/* Frame numbers are not applicable to WUSB. */
+	return -ENOSYS;
+}
+
+
+/*
+ * Queue an URB to the ASL or PZL
+ */
+static int whc_urb_enqueue(struct usb_hcd *usb_hcd, struct urb *urb,
+			   gfp_t mem_flags)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	int ret;
+
+	switch (usb_pipetype(urb->pipe)) {
+	case PIPE_INTERRUPT:
+		ret = pzl_urb_enqueue(whc, urb, mem_flags);
+		break;
+	case PIPE_ISOCHRONOUS:
+		dev_err(&whc->umc->dev, "isochronous transfers unsupported\n");
+		ret = -ENOTSUPP;
+		break;
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+	default:
+		ret = asl_urb_enqueue(whc, urb, mem_flags);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * Remove a queued URB from the ASL or PZL.
+ */
+static int whc_urb_dequeue(struct usb_hcd *usb_hcd, struct urb *urb, int status)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	int ret;
+
+	switch (usb_pipetype(urb->pipe)) {
+	case PIPE_INTERRUPT:
+		ret = pzl_urb_dequeue(whc, urb, status);
+		break;
+	case PIPE_ISOCHRONOUS:
+		ret = -ENOTSUPP;
+		break;
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+	default:
+		ret = asl_urb_dequeue(whc, urb, status);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * Wait for all URBs to the endpoint to be completed, then delete the
+ * qset.
+ */
+static void whc_endpoint_disable(struct usb_hcd *usb_hcd,
+				 struct usb_host_endpoint *ep)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	struct whc_qset *qset;
+
+	qset = ep->hcpriv;
+	if (qset) {
+		ep->hcpriv = NULL;
+		if (usb_endpoint_xfer_bulk(&ep->desc)
+		    || usb_endpoint_xfer_control(&ep->desc))
+			asl_qset_delete(whc, qset);
+		else
+			pzl_qset_delete(whc, qset);
+	}
+}
+
+static void whc_endpoint_reset(struct usb_hcd *usb_hcd,
+			       struct usb_host_endpoint *ep)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	struct whc_qset *qset;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	qset = ep->hcpriv;
+	if (qset) {
+		qset->remove = 1;
+		qset->reset = 1;
+
+		if (usb_endpoint_xfer_bulk(&ep->desc)
+		    || usb_endpoint_xfer_control(&ep->desc))
+			queue_work(whc->workqueue, &whc->async_work);
+		else
+			queue_work(whc->workqueue, &whc->periodic_work);
+	}
+
+	spin_unlock_irqrestore(&whc->lock, flags);
+}
+
+
+static const struct hc_driver whc_hc_driver = {
+	.description = "whci-hcd",
+	.product_desc = "Wireless host controller",
+	.hcd_priv_size = sizeof(struct whc) - sizeof(struct usb_hcd),
+	.irq = whc_int_handler,
+	.flags = HCD_USB2,
+
+	.reset = whc_reset,
+	.start = whc_start,
+	.stop = whc_stop,
+	.get_frame_number = whc_get_frame_number,
+	.urb_enqueue = whc_urb_enqueue,
+	.urb_dequeue = whc_urb_dequeue,
+	.endpoint_disable = whc_endpoint_disable,
+	.endpoint_reset = whc_endpoint_reset,
+
+	.hub_status_data = wusbhc_rh_status_data,
+	.hub_control = wusbhc_rh_control,
+	.start_port_reset = wusbhc_rh_start_port_reset,
+};
+
+static int whc_probe(struct umc_dev *umc)
+{
+	int ret;
+	struct usb_hcd *usb_hcd;
+	struct wusbhc *wusbhc;
+	struct whc *whc;
+	struct device *dev = &umc->dev;
+
+	usb_hcd = usb_create_hcd(&whc_hc_driver, dev, "whci");
+	if (usb_hcd == NULL) {
+		dev_err(dev, "unable to create hcd\n");
+		return -ENOMEM;
+	}
+
+	usb_hcd->wireless = 1;
+	usb_hcd->self.sg_tablesize = 2048; /* somewhat arbitrary */
+
+	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	whc = wusbhc_to_whc(wusbhc);
+	whc->umc = umc;
+
+	ret = whc_init(whc);
+	if (ret)
+		goto error_whc_init;
+
+	wusbhc->dev = dev;
+	wusbhc->uwb_rc = uwb_rc_get_by_grandpa(umc->dev.parent);
+	if (!wusbhc->uwb_rc) {
+		ret = -ENODEV;
+		dev_err(dev, "cannot get radio controller\n");
+		goto error_uwb_rc;
+	}
+
+	if (whc->n_devices > USB_MAXCHILDREN) {
+		dev_warn(dev, "USB_MAXCHILDREN too low for WUSB adapter (%u ports)\n",
+			 whc->n_devices);
+		wusbhc->ports_max = USB_MAXCHILDREN;
+	} else
+		wusbhc->ports_max = whc->n_devices;
+	wusbhc->mmcies_max      = whc->n_mmc_ies;
+	wusbhc->start           = whc_wusbhc_start;
+	wusbhc->stop            = whc_wusbhc_stop;
+	wusbhc->mmcie_add       = whc_mmcie_add;
+	wusbhc->mmcie_rm        = whc_mmcie_rm;
+	wusbhc->dev_info_set    = whc_dev_info_set;
+	wusbhc->bwa_set         = whc_bwa_set;
+	wusbhc->set_num_dnts    = whc_set_num_dnts;
+	wusbhc->set_ptk         = whc_set_ptk;
+	wusbhc->set_gtk         = whc_set_gtk;
+
+	ret = wusbhc_create(wusbhc);
+	if (ret)
+		goto error_wusbhc_create;
+
+	ret = usb_add_hcd(usb_hcd, whc->umc->irq, IRQF_SHARED);
+	if (ret) {
+		dev_err(dev, "cannot add HCD: %d\n", ret);
+		goto error_usb_add_hcd;
+	}
+	device_wakeup_enable(usb_hcd->self.controller);
+
+	ret = wusbhc_b_create(wusbhc);
+	if (ret) {
+		dev_err(dev, "WUSBHC phase B setup failed: %d\n", ret);
+		goto error_wusbhc_b_create;
+	}
+
+	whc_dbg_init(whc);
+
+	return 0;
+
+error_wusbhc_b_create:
+	usb_remove_hcd(usb_hcd);
+error_usb_add_hcd:
+	wusbhc_destroy(wusbhc);
+error_wusbhc_create:
+	uwb_rc_put(wusbhc->uwb_rc);
+error_uwb_rc:
+	whc_clean_up(whc);
+error_whc_init:
+	usb_put_hcd(usb_hcd);
+	return ret;
+}
+
+
+static void whc_remove(struct umc_dev *umc)
+{
+	struct usb_hcd *usb_hcd = dev_get_drvdata(&umc->dev);
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+
+	if (usb_hcd) {
+		whc_dbg_clean_up(whc);
+		wusbhc_b_destroy(wusbhc);
+		usb_remove_hcd(usb_hcd);
+		wusbhc_destroy(wusbhc);
+		uwb_rc_put(wusbhc->uwb_rc);
+		whc_clean_up(whc);
+		usb_put_hcd(usb_hcd);
+	}
+}
+
+static struct umc_driver whci_hc_driver = {
+	.name =		"whci-hcd",
+	.cap_id =       UMC_CAP_ID_WHCI_WUSB_HC,
+	.probe =	whc_probe,
+	.remove =	whc_remove,
+};
+
+static int __init whci_hc_driver_init(void)
+{
+	return umc_driver_register(&whci_hc_driver);
+}
+module_init(whci_hc_driver_init);
+
+static void __exit whci_hc_driver_exit(void)
+{
+	umc_driver_unregister(&whci_hc_driver);
+}
+module_exit(whci_hc_driver_exit);
+
+/* PCI device ID's that we handle (so it gets loaded) */
+static struct pci_device_id __used whci_hcd_id_table[] = {
+	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
+	{ /* empty last entry */ }
+};
+MODULE_DEVICE_TABLE(pci, whci_hcd_id_table);
+
+MODULE_DESCRIPTION("WHCI Wireless USB host controller driver");
+MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/host/whci/hw.c b/drivers/staging/wusbcore/host/whci/hw.c
new file mode 100644
index 000000000000..e4e8914abf42
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/hw.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) hardware access helpers.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+void whc_write_wusbcmd(struct whc *whc, u32 mask, u32 val)
+{
+	unsigned long flags;
+	u32 cmd;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	cmd = le_readl(whc->base + WUSBCMD);
+	cmd = (cmd & ~mask) | val;
+	le_writel(cmd, whc->base + WUSBCMD);
+
+	spin_unlock_irqrestore(&whc->lock, flags);
+}
+
+/**
+ * whc_do_gencmd - start a generic command via the WUSBGENCMDSTS register
+ * @whc:    the WHCI HC
+ * @cmd:    command to start.
+ * @params: parameters for the command (the WUSBGENCMDPARAMS register value).
+ * @addr:   pointer to any data for the command (may be NULL).
+ * @len:    length of the data (if any).
+ */
+int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len)
+{
+	unsigned long flags;
+	dma_addr_t dma_addr;
+	int t;
+	int ret = 0;
+
+	mutex_lock(&whc->mutex);
+
+	/* Wait for previous command to complete. */
+	t = wait_event_timeout(whc->cmd_wq,
+			       (le_readl(whc->base + WUSBGENCMDSTS) & WUSBGENCMDSTS_ACTIVE) == 0,
+			       WHC_GENCMD_TIMEOUT_MS);
+	if (t == 0) {
+		dev_err(&whc->umc->dev, "generic command timeout (%04x/%04x)\n",
+			le_readl(whc->base + WUSBGENCMDSTS),
+			le_readl(whc->base + WUSBGENCMDPARAMS));
+		ret = -ETIMEDOUT;
+		goto out;
+	}
+
+	if (addr) {
+		memcpy(whc->gen_cmd_buf, addr, len);
+		dma_addr = whc->gen_cmd_buf_dma;
+	} else
+		dma_addr = 0;
+
+	/* Poke registers to start cmd. */
+	spin_lock_irqsave(&whc->lock, flags);
+
+	le_writel(params, whc->base + WUSBGENCMDPARAMS);
+	le_writeq(dma_addr, whc->base + WUSBGENADDR);
+
+	le_writel(WUSBGENCMDSTS_ACTIVE | WUSBGENCMDSTS_IOC | cmd,
+		  whc->base + WUSBGENCMDSTS);
+
+	spin_unlock_irqrestore(&whc->lock, flags);
+out:
+	mutex_unlock(&whc->mutex);
+
+	return ret;
+}
+
+/**
+ * whc_hw_error - recover from a hardware error
+ * @whc:    the WHCI HC that broke.
+ * @reason: a description of the failure.
+ *
+ * Recover from broken hardware with a full reset.
+ */
+void whc_hw_error(struct whc *whc, const char *reason)
+{
+	struct wusbhc *wusbhc = &whc->wusbhc;
+
+	dev_err(&whc->umc->dev, "hardware error: %s\n", reason);
+	wusbhc_reset_all(wusbhc);
+}
diff --git a/drivers/staging/wusbcore/host/whci/init.c b/drivers/staging/wusbcore/host/whci/init.c
new file mode 100644
index 000000000000..55fd458a8f30
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/init.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) initialization.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+/*
+ * Reset the host controller.
+ */
+static void whc_hw_reset(struct whc *whc)
+{
+	le_writel(WUSBCMD_WHCRESET, whc->base + WUSBCMD);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBCMD, WUSBCMD_WHCRESET, 0,
+		      100, "reset");
+}
+
+static void whc_hw_init_di_buf(struct whc *whc)
+{
+	int d;
+
+	/* Disable all entries in the Device Information buffer. */
+	for (d = 0; d < whc->n_devices; d++)
+		whc->di_buf[d].addr_sec_info = WHC_DI_DISABLE;
+
+	le_writeq(whc->di_buf_dma, whc->base + WUSBDEVICEINFOADDR);
+}
+
+static void whc_hw_init_dn_buf(struct whc *whc)
+{
+	/* Clear the Device Notification buffer to ensure the V (valid)
+	 * bits are clear.  */
+	memset(whc->dn_buf, 0, 4096);
+
+	le_writeq(whc->dn_buf_dma, whc->base + WUSBDNTSBUFADDR);
+}
+
+int whc_init(struct whc *whc)
+{
+	u32 whcsparams;
+	int ret, i;
+	resource_size_t start, len;
+
+	spin_lock_init(&whc->lock);
+	mutex_init(&whc->mutex);
+	init_waitqueue_head(&whc->cmd_wq);
+	init_waitqueue_head(&whc->async_list_wq);
+	init_waitqueue_head(&whc->periodic_list_wq);
+	whc->workqueue = alloc_ordered_workqueue(dev_name(&whc->umc->dev), 0);
+	if (whc->workqueue == NULL) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	INIT_WORK(&whc->dn_work, whc_dn_work);
+
+	INIT_WORK(&whc->async_work, scan_async_work);
+	INIT_LIST_HEAD(&whc->async_list);
+	INIT_LIST_HEAD(&whc->async_removed_list);
+
+	INIT_WORK(&whc->periodic_work, scan_periodic_work);
+	for (i = 0; i < 5; i++)
+		INIT_LIST_HEAD(&whc->periodic_list[i]);
+	INIT_LIST_HEAD(&whc->periodic_removed_list);
+
+	/* Map HC registers. */
+	start = whc->umc->resource.start;
+	len   = whc->umc->resource.end - start + 1;
+	if (!request_mem_region(start, len, "whci-hc")) {
+		dev_err(&whc->umc->dev, "can't request HC region\n");
+		ret = -EBUSY;
+		goto error;
+	}
+	whc->base_phys = start;
+	whc->base = ioremap(start, len);
+	if (!whc->base) {
+		dev_err(&whc->umc->dev, "ioremap\n");
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	whc_hw_reset(whc);
+
+	/* Read maximum number of devices, keys and MMC IEs. */
+	whcsparams = le_readl(whc->base + WHCSPARAMS);
+	whc->n_devices = WHCSPARAMS_TO_N_DEVICES(whcsparams);
+	whc->n_keys    = WHCSPARAMS_TO_N_KEYS(whcsparams);
+	whc->n_mmc_ies = WHCSPARAMS_TO_N_MMC_IES(whcsparams);
+
+	dev_dbg(&whc->umc->dev, "N_DEVICES = %d, N_KEYS = %d, N_MMC_IES = %d\n",
+		whc->n_devices, whc->n_keys, whc->n_mmc_ies);
+
+	whc->qset_pool = dma_pool_create("qset", &whc->umc->dev,
+					 sizeof(struct whc_qset), 64, 0);
+	if (whc->qset_pool == NULL) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ret = asl_init(whc);
+	if (ret < 0)
+		goto error;
+	ret = pzl_init(whc);
+	if (ret < 0)
+		goto error;
+
+	/* Allocate and initialize a buffer for generic commands, the
+	   Device Information buffer, and the Device Notification
+	   buffer. */
+
+	whc->gen_cmd_buf = dma_alloc_coherent(&whc->umc->dev, WHC_GEN_CMD_DATA_LEN,
+					      &whc->gen_cmd_buf_dma, GFP_KERNEL);
+	if (whc->gen_cmd_buf == NULL) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	whc->dn_buf = dma_alloc_coherent(&whc->umc->dev,
+					 sizeof(struct dn_buf_entry) * WHC_N_DN_ENTRIES,
+					 &whc->dn_buf_dma, GFP_KERNEL);
+	if (!whc->dn_buf) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	whc_hw_init_dn_buf(whc);
+
+	whc->di_buf = dma_alloc_coherent(&whc->umc->dev,
+					 sizeof(struct di_buf_entry) * whc->n_devices,
+					 &whc->di_buf_dma, GFP_KERNEL);
+	if (!whc->di_buf) {
+		ret = -ENOMEM;
+		goto error;
+	}
+	whc_hw_init_di_buf(whc);
+
+	return 0;
+
+error:
+	whc_clean_up(whc);
+	return ret;
+}
+
+void whc_clean_up(struct whc *whc)
+{
+	resource_size_t len;
+
+	if (whc->di_buf)
+		dma_free_coherent(&whc->umc->dev, sizeof(struct di_buf_entry) * whc->n_devices,
+				  whc->di_buf, whc->di_buf_dma);
+	if (whc->dn_buf)
+		dma_free_coherent(&whc->umc->dev, sizeof(struct dn_buf_entry) * WHC_N_DN_ENTRIES,
+				  whc->dn_buf, whc->dn_buf_dma);
+	if (whc->gen_cmd_buf)
+		dma_free_coherent(&whc->umc->dev, WHC_GEN_CMD_DATA_LEN,
+				  whc->gen_cmd_buf, whc->gen_cmd_buf_dma);
+
+	pzl_clean_up(whc);
+	asl_clean_up(whc);
+
+	dma_pool_destroy(whc->qset_pool);
+
+	len   = resource_size(&whc->umc->resource);
+	if (whc->base)
+		iounmap(whc->base);
+	if (whc->base_phys)
+		release_mem_region(whc->base_phys, len);
+
+	if (whc->workqueue)
+		destroy_workqueue(whc->workqueue);
+}
diff --git a/drivers/staging/wusbcore/host/whci/int.c b/drivers/staging/wusbcore/host/whci/int.c
new file mode 100644
index 000000000000..bdbe35e9366f
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/int.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) interrupt handling.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+static void transfer_done(struct whc *whc)
+{
+	queue_work(whc->workqueue, &whc->async_work);
+	queue_work(whc->workqueue, &whc->periodic_work);
+}
+
+irqreturn_t whc_int_handler(struct usb_hcd *hcd)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(hcd);
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 sts;
+
+	sts = le_readl(whc->base + WUSBSTS);
+	if (!(sts & WUSBSTS_INT_MASK))
+		return IRQ_NONE;
+	le_writel(sts & WUSBSTS_INT_MASK, whc->base + WUSBSTS);
+
+	if (sts & WUSBSTS_GEN_CMD_DONE)
+		wake_up(&whc->cmd_wq);
+
+	if (sts & WUSBSTS_HOST_ERR)
+		dev_err(&whc->umc->dev, "FIXME: host system error\n");
+
+	if (sts & WUSBSTS_ASYNC_SCHED_SYNCED)
+		wake_up(&whc->async_list_wq);
+
+	if (sts & WUSBSTS_PERIODIC_SCHED_SYNCED)
+		wake_up(&whc->periodic_list_wq);
+
+	if (sts & WUSBSTS_DNTS_INT)
+		queue_work(whc->workqueue, &whc->dn_work);
+
+	/*
+	 * A transfer completed (see [WHCI] section 4.7.1.2 for when
+	 * this occurs).
+	 */
+	if (sts & (WUSBSTS_INT | WUSBSTS_ERR_INT))
+		transfer_done(whc);
+
+	return IRQ_HANDLED;
+}
+
+static int process_dn_buf(struct whc *whc)
+{
+	struct wusbhc *wusbhc = &whc->wusbhc;
+	struct dn_buf_entry *dn;
+	int processed = 0;
+
+	for (dn = whc->dn_buf; dn < whc->dn_buf + WHC_N_DN_ENTRIES; dn++) {
+		if (dn->status & WHC_DN_STATUS_VALID) {
+			wusbhc_handle_dn(wusbhc, dn->src_addr,
+					 (struct wusb_dn_hdr *)dn->dn_data,
+					 dn->msg_size);
+			dn->status &= ~WHC_DN_STATUS_VALID;
+			processed++;
+		}
+	}
+	return processed;
+}
+
+void whc_dn_work(struct work_struct *work)
+{
+	struct whc *whc = container_of(work, struct whc, dn_work);
+	int processed;
+
+	do {
+		processed = process_dn_buf(whc);
+	} while (processed);
+}
diff --git a/drivers/staging/wusbcore/host/whci/pzl.c b/drivers/staging/wusbcore/host/whci/pzl.c
new file mode 100644
index 000000000000..6dfc075f5798
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/pzl.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) periodic schedule management.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/dma-mapping.h>
+#include <linux/usb.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+static void update_pzl_pointers(struct whc *whc, int period, u64 addr)
+{
+	switch (period) {
+	case 0:
+		whc_qset_set_link_ptr(&whc->pz_list[0], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[2], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[4], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[6], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[8], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[10], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[12], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[14], addr);
+		break;
+	case 1:
+		whc_qset_set_link_ptr(&whc->pz_list[1], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[5], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[9], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[13], addr);
+		break;
+	case 2:
+		whc_qset_set_link_ptr(&whc->pz_list[3], addr);
+		whc_qset_set_link_ptr(&whc->pz_list[11], addr);
+		break;
+	case 3:
+		whc_qset_set_link_ptr(&whc->pz_list[7], addr);
+		break;
+	case 4:
+		whc_qset_set_link_ptr(&whc->pz_list[15], addr);
+		break;
+	}
+}
+
+/*
+ * Return the 'period' to use for this qset.  The minimum interval for
+ * the endpoint is used so whatever urbs are submitted the device is
+ * polled often enough.
+ */
+static int qset_get_period(struct whc *whc, struct whc_qset *qset)
+{
+	uint8_t bInterval = qset->ep->desc.bInterval;
+
+	if (bInterval < 6)
+		bInterval = 6;
+	if (bInterval > 10)
+		bInterval = 10;
+	return bInterval - 6;
+}
+
+static void qset_insert_in_sw_list(struct whc *whc, struct whc_qset *qset)
+{
+	int period;
+
+	period = qset_get_period(whc, qset);
+
+	qset_clear(whc, qset);
+	list_move(&qset->list_node, &whc->periodic_list[period]);
+	qset->in_sw_list = true;
+}
+
+static void pzl_qset_remove(struct whc *whc, struct whc_qset *qset)
+{
+	list_move(&qset->list_node, &whc->periodic_removed_list);
+	qset->in_hw_list = false;
+	qset->in_sw_list = false;
+}
+
+/**
+ * pzl_process_qset - process any recently inactivated or halted qTDs
+ * in a qset.
+ *
+ * After inactive qTDs are removed, new qTDs can be added if the
+ * urb queue still contains URBs.
+ *
+ * Returns the schedule updates required.
+ */
+static enum whc_update pzl_process_qset(struct whc *whc, struct whc_qset *qset)
+{
+	enum whc_update update = 0;
+	uint32_t status = 0;
+
+	while (qset->ntds) {
+		struct whc_qtd *td;
+
+		td = &qset->qtd[qset->td_start];
+		status = le32_to_cpu(td->status);
+
+		/*
+		 * Nothing to do with a still active qTD.
+		 */
+		if (status & QTD_STS_ACTIVE)
+			break;
+
+		if (status & QTD_STS_HALTED) {
+			/* Ug, an error. */
+			process_halted_qtd(whc, qset, td);
+			/* A halted qTD always triggers an update
+			   because the qset was either removed or
+			   reactivated. */
+			update |= WHC_UPDATE_UPDATED;
+			goto done;
+		}
+
+		/* Mmm, a completed qTD. */
+		process_inactive_qtd(whc, qset, td);
+	}
+
+	if (!qset->remove)
+		update |= qset_add_qtds(whc, qset);
+
+done:
+	/*
+	 * If there are no qTDs in this qset, remove it from the PZL.
+	 */
+	if (qset->remove && qset->ntds == 0) {
+		pzl_qset_remove(whc, qset);
+		update |= WHC_UPDATE_REMOVED;
+	}
+
+	return update;
+}
+
+/**
+ * pzl_start - start the periodic schedule
+ * @whc: the WHCI host controller
+ *
+ * The PZL must be valid (e.g., all entries in the list should have
+ * the T bit set).
+ */
+void pzl_start(struct whc *whc)
+{
+	le_writeq(whc->pz_list_dma, whc->base + WUSBPERIODICLISTBASE);
+
+	whc_write_wusbcmd(whc, WUSBCMD_PERIODIC_EN, WUSBCMD_PERIODIC_EN);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
+		      WUSBSTS_PERIODIC_SCHED, WUSBSTS_PERIODIC_SCHED,
+		      1000, "start PZL");
+}
+
+/**
+ * pzl_stop - stop the periodic schedule
+ * @whc: the WHCI host controller
+ */
+void pzl_stop(struct whc *whc)
+{
+	whc_write_wusbcmd(whc, WUSBCMD_PERIODIC_EN, 0);
+	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
+		      WUSBSTS_PERIODIC_SCHED, 0,
+		      1000, "stop PZL");
+}
+
+/**
+ * pzl_update - request a PZL update and wait for the hardware to be synced
+ * @whc: the WHCI HC
+ * @wusbcmd: WUSBCMD value to start the update.
+ *
+ * If the WUSB HC is inactive (i.e., the PZL is stopped) then the
+ * update must be skipped as the hardware may not respond to update
+ * requests.
+ */
+void pzl_update(struct whc *whc, uint32_t wusbcmd)
+{
+	struct wusbhc *wusbhc = &whc->wusbhc;
+	long t;
+
+	mutex_lock(&wusbhc->mutex);
+	if (wusbhc->active) {
+		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
+		t = wait_event_timeout(
+			whc->periodic_list_wq,
+			(le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0,
+			msecs_to_jiffies(1000));
+		if (t == 0)
+			whc_hw_error(whc, "PZL update timeout");
+	}
+	mutex_unlock(&wusbhc->mutex);
+}
+
+static void update_pzl_hw_view(struct whc *whc)
+{
+	struct whc_qset *qset, *t;
+	int period;
+	u64 tmp_qh = 0;
+
+	for (period = 0; period < 5; period++) {
+		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
+			whc_qset_set_link_ptr(&qset->qh.link, tmp_qh);
+			tmp_qh = qset->qset_dma;
+			qset->in_hw_list = true;
+		}
+		update_pzl_pointers(whc, period, tmp_qh);
+	}
+}
+
+/**
+ * scan_periodic_work - scan the PZL for qsets to process.
+ *
+ * Process each qset in the PZL in turn and then signal the WHC that
+ * the PZL has been updated.
+ *
+ * Then start, stop or update the periodic schedule as required.
+ */
+void scan_periodic_work(struct work_struct *work)
+{
+	struct whc *whc = container_of(work, struct whc, periodic_work);
+	struct whc_qset *qset, *t;
+	enum whc_update update = 0;
+	int period;
+
+	spin_lock_irq(&whc->lock);
+
+	for (period = 4; period >= 0; period--) {
+		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
+			if (!qset->in_hw_list)
+				update |= WHC_UPDATE_ADDED;
+			update |= pzl_process_qset(whc, qset);
+		}
+	}
+
+	if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED))
+		update_pzl_hw_view(whc);
+
+	spin_unlock_irq(&whc->lock);
+
+	if (update) {
+		uint32_t wusbcmd = WUSBCMD_PERIODIC_UPDATED | WUSBCMD_PERIODIC_SYNCED_DB;
+		if (update & WHC_UPDATE_REMOVED)
+			wusbcmd |= WUSBCMD_PERIODIC_QSET_RM;
+		pzl_update(whc, wusbcmd);
+	}
+
+	/*
+	 * Now that the PZL is updated, complete the removal of any
+	 * removed qsets.
+	 *
+	 * If the qset was to be reset, do so and reinsert it into the
+	 * PZL if it has pending transfers.
+	 */
+	spin_lock_irq(&whc->lock);
+
+	list_for_each_entry_safe(qset, t, &whc->periodic_removed_list, list_node) {
+		qset_remove_complete(whc, qset);
+		if (qset->reset) {
+			qset_reset(whc, qset);
+			if (!list_empty(&qset->stds)) {
+				qset_insert_in_sw_list(whc, qset);
+				queue_work(whc->workqueue, &whc->periodic_work);
+			}
+		}
+	}
+
+	spin_unlock_irq(&whc->lock);
+}
+
+/**
+ * pzl_urb_enqueue - queue an URB onto the periodic list (PZL)
+ * @whc: the WHCI host controller
+ * @urb: the URB to enqueue
+ * @mem_flags: flags for any memory allocations
+ *
+ * The qset for the endpoint is obtained and the urb queued on to it.
+ *
+ * Work is scheduled to update the hardware's view of the PZL.
+ */
+int pzl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
+{
+	struct whc_qset *qset;
+	int err;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	err = usb_hcd_link_urb_to_ep(&whc->wusbhc.usb_hcd, urb);
+	if (err < 0) {
+		spin_unlock_irqrestore(&whc->lock, flags);
+		return err;
+	}
+
+	qset = get_qset(whc, urb, GFP_ATOMIC);
+	if (qset == NULL)
+		err = -ENOMEM;
+	else
+		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
+	if (!err) {
+		if (!qset->in_sw_list && !qset->remove)
+			qset_insert_in_sw_list(whc, qset);
+	} else
+		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
+
+	spin_unlock_irqrestore(&whc->lock, flags);
+
+	if (!err)
+		queue_work(whc->workqueue, &whc->periodic_work);
+
+	return err;
+}
+
+/**
+ * pzl_urb_dequeue - remove an URB (qset) from the periodic list
+ * @whc: the WHCI host controller
+ * @urb: the URB to dequeue
+ * @status: the current status of the URB
+ *
+ * URBs that do yet have qTDs can simply be removed from the software
+ * queue, otherwise the qset must be removed so the qTDs can be safely
+ * removed.
+ */
+int pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status)
+{
+	struct whc_urb *wurb = urb->hcpriv;
+	struct whc_qset *qset = wurb->qset;
+	struct whc_std *std, *t;
+	bool has_qtd = false;
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&whc->lock, flags);
+
+	ret = usb_hcd_check_unlink_urb(&whc->wusbhc.usb_hcd, urb, status);
+	if (ret < 0)
+		goto out;
+
+	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
+		if (std->urb == urb) {
+			if (std->qtd)
+				has_qtd = true;
+			qset_free_std(whc, std);
+		} else
+			std->qtd = NULL; /* so this std is re-added when the qset is */
+	}
+
+	if (has_qtd) {
+		pzl_qset_remove(whc, qset);
+		update_pzl_hw_view(whc);
+		wurb->status = status;
+		wurb->is_async = false;
+		queue_work(whc->workqueue, &wurb->dequeue_work);
+	} else
+		qset_remove_urb(whc, qset, urb, status);
+out:
+	spin_unlock_irqrestore(&whc->lock, flags);
+
+	return ret;
+}
+
+/**
+ * pzl_qset_delete - delete a qset from the PZL
+ */
+void pzl_qset_delete(struct whc *whc, struct whc_qset *qset)
+{
+	qset->remove = 1;
+	queue_work(whc->workqueue, &whc->periodic_work);
+	qset_delete(whc, qset);
+}
+
+/**
+ * pzl_init - initialize the periodic zone list
+ * @whc: the WHCI host controller
+ */
+int pzl_init(struct whc *whc)
+{
+	int i;
+
+	whc->pz_list = dma_alloc_coherent(&whc->umc->dev, sizeof(u64) * 16,
+					  &whc->pz_list_dma, GFP_KERNEL);
+	if (whc->pz_list == NULL)
+		return -ENOMEM;
+
+	/* Set T bit on all elements in PZL. */
+	for (i = 0; i < 16; i++)
+		whc->pz_list[i] = cpu_to_le64(QH_LINK_NTDS(8) | QH_LINK_T);
+
+	le_writeq(whc->pz_list_dma, whc->base + WUSBPERIODICLISTBASE);
+
+	return 0;
+}
+
+/**
+ * pzl_clean_up - free PZL resources
+ * @whc: the WHCI host controller
+ *
+ * The PZL is stopped and empty.
+ */
+void pzl_clean_up(struct whc *whc)
+{
+	if (whc->pz_list)
+		dma_free_coherent(&whc->umc->dev,  sizeof(u64) * 16, whc->pz_list,
+				  whc->pz_list_dma);
+}
diff --git a/drivers/staging/wusbcore/host/whci/qset.c b/drivers/staging/wusbcore/host/whci/qset.c
new file mode 100644
index 000000000000..66459b77dc77
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/qset.c
@@ -0,0 +1,831 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) qset management.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags)
+{
+	struct whc_qset *qset;
+	dma_addr_t dma;
+
+	qset = dma_pool_zalloc(whc->qset_pool, mem_flags, &dma);
+	if (qset == NULL)
+		return NULL;
+
+	qset->qset_dma = dma;
+	qset->whc = whc;
+
+	INIT_LIST_HEAD(&qset->list_node);
+	INIT_LIST_HEAD(&qset->stds);
+
+	return qset;
+}
+
+/**
+ * qset_fill_qh - fill the static endpoint state in a qset's QHead
+ * @qset: the qset whose QH needs initializing with static endpoint
+ *        state
+ * @urb:  an urb for a transfer to this endpoint
+ */
+static void qset_fill_qh(struct whc *whc, struct whc_qset *qset, struct urb *urb)
+{
+	struct usb_device *usb_dev = urb->dev;
+	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
+	struct usb_wireless_ep_comp_descriptor *epcd;
+	bool is_out;
+	uint8_t phy_rate;
+
+	is_out = usb_pipeout(urb->pipe);
+
+	qset->max_packet = le16_to_cpu(urb->ep->desc.wMaxPacketSize);
+
+	epcd = (struct usb_wireless_ep_comp_descriptor *)qset->ep->extra;
+	if (epcd) {
+		qset->max_seq = epcd->bMaxSequence;
+		qset->max_burst = epcd->bMaxBurst;
+	} else {
+		qset->max_seq = 2;
+		qset->max_burst = 1;
+	}
+
+	/*
+	 * Initial PHY rate is 53.3 Mbit/s for control endpoints or
+	 * the maximum supported by the device for other endpoints
+	 * (unless limited by the user).
+	 */
+	if (usb_pipecontrol(urb->pipe))
+		phy_rate = UWB_PHY_RATE_53;
+	else {
+		uint16_t phy_rates;
+
+		phy_rates = le16_to_cpu(wusb_dev->wusb_cap_descr->wPHYRates);
+		phy_rate = fls(phy_rates) - 1;
+		if (phy_rate > whc->wusbhc.phy_rate)
+			phy_rate = whc->wusbhc.phy_rate;
+	}
+
+	qset->qh.info1 = cpu_to_le32(
+		QH_INFO1_EP(usb_pipeendpoint(urb->pipe))
+		| (is_out ? QH_INFO1_DIR_OUT : QH_INFO1_DIR_IN)
+		| usb_pipe_to_qh_type(urb->pipe)
+		| QH_INFO1_DEV_INFO_IDX(wusb_port_no_to_idx(usb_dev->portnum))
+		| QH_INFO1_MAX_PKT_LEN(qset->max_packet)
+		);
+	qset->qh.info2 = cpu_to_le32(
+		QH_INFO2_BURST(qset->max_burst)
+		| QH_INFO2_DBP(0)
+		| QH_INFO2_MAX_COUNT(3)
+		| QH_INFO2_MAX_RETRY(3)
+		| QH_INFO2_MAX_SEQ(qset->max_seq - 1)
+		);
+	/* FIXME: where can we obtain these Tx parameters from?  Why
+	 * doesn't the chip know what Tx power to use? It knows the Rx
+	 * strength and can presumably guess the Tx power required
+	 * from that? */
+	qset->qh.info3 = cpu_to_le32(
+		QH_INFO3_TX_RATE(phy_rate)
+		| QH_INFO3_TX_PWR(0) /* 0 == max power */
+		);
+
+	qset->qh.cur_window = cpu_to_le32((1 << qset->max_burst) - 1);
+}
+
+/**
+ * qset_clear - clear fields in a qset so it may be reinserted into a
+ * schedule.
+ *
+ * The sequence number and current window are not cleared (see
+ * qset_reset()).
+ */
+void qset_clear(struct whc *whc, struct whc_qset *qset)
+{
+	qset->td_start = qset->td_end = qset->ntds = 0;
+
+	qset->qh.link = cpu_to_le64(QH_LINK_NTDS(8) | QH_LINK_T);
+	qset->qh.status = qset->qh.status & QH_STATUS_SEQ_MASK;
+	qset->qh.err_count = 0;
+	qset->qh.scratch[0] = 0;
+	qset->qh.scratch[1] = 0;
+	qset->qh.scratch[2] = 0;
+
+	memset(&qset->qh.overlay, 0, sizeof(qset->qh.overlay));
+
+	init_completion(&qset->remove_complete);
+}
+
+/**
+ * qset_reset - reset endpoint state in a qset.
+ *
+ * Clears the sequence number and current window.  This qset must not
+ * be in the ASL or PZL.
+ */
+void qset_reset(struct whc *whc, struct whc_qset *qset)
+{
+	qset->reset = 0;
+
+	qset->qh.status &= ~QH_STATUS_SEQ_MASK;
+	qset->qh.cur_window = cpu_to_le32((1 << qset->max_burst) - 1);
+}
+
+/**
+ * get_qset - get the qset for an async endpoint
+ *
+ * A new qset is created if one does not already exist.
+ */
+struct whc_qset *get_qset(struct whc *whc, struct urb *urb,
+				 gfp_t mem_flags)
+{
+	struct whc_qset *qset;
+
+	qset = urb->ep->hcpriv;
+	if (qset == NULL) {
+		qset = qset_alloc(whc, mem_flags);
+		if (qset == NULL)
+			return NULL;
+
+		qset->ep = urb->ep;
+		urb->ep->hcpriv = qset;
+		qset_fill_qh(whc, qset, urb);
+	}
+	return qset;
+}
+
+void qset_remove_complete(struct whc *whc, struct whc_qset *qset)
+{
+	qset->remove = 0;
+	list_del_init(&qset->list_node);
+	complete(&qset->remove_complete);
+}
+
+/**
+ * qset_add_qtds - add qTDs for an URB to a qset
+ *
+ * Returns true if the list (ASL/PZL) must be updated because (for a
+ * WHCI 0.95 controller) an activated qTD was pointed to be iCur.
+ */
+enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset)
+{
+	struct whc_std *std;
+	enum whc_update update = 0;
+
+	list_for_each_entry(std, &qset->stds, list_node) {
+		struct whc_qtd *qtd;
+		uint32_t status;
+
+		if (qset->ntds >= WHCI_QSET_TD_MAX
+		    || (qset->pause_after_urb && std->urb != qset->pause_after_urb))
+			break;
+
+		if (std->qtd)
+			continue; /* already has a qTD */
+
+		qtd = std->qtd = &qset->qtd[qset->td_end];
+
+		/* Fill in setup bytes for control transfers. */
+		if (usb_pipecontrol(std->urb->pipe))
+			memcpy(qtd->setup, std->urb->setup_packet, 8);
+
+		status = QTD_STS_ACTIVE | QTD_STS_LEN(std->len);
+
+		if (whc_std_last(std) && usb_pipeout(std->urb->pipe))
+			status |= QTD_STS_LAST_PKT;
+
+		/*
+		 * For an IN transfer the iAlt field should be set so
+		 * the h/w will automatically advance to the next
+		 * transfer. However, if there are 8 or more TDs
+		 * remaining in this transfer then iAlt cannot be set
+		 * as it could point to somewhere in this transfer.
+		 */
+		if (std->ntds_remaining < WHCI_QSET_TD_MAX) {
+			int ialt;
+			ialt = (qset->td_end + std->ntds_remaining) % WHCI_QSET_TD_MAX;
+			status |= QTD_STS_IALT(ialt);
+		} else if (usb_pipein(std->urb->pipe))
+			qset->pause_after_urb = std->urb;
+
+		if (std->num_pointers)
+			qtd->options = cpu_to_le32(QTD_OPT_IOC);
+		else
+			qtd->options = cpu_to_le32(QTD_OPT_IOC | QTD_OPT_SMALL);
+		qtd->page_list_ptr = cpu_to_le64(std->dma_addr);
+
+		qtd->status = cpu_to_le32(status);
+
+		if (QH_STATUS_TO_ICUR(qset->qh.status) == qset->td_end)
+			update = WHC_UPDATE_UPDATED;
+
+		if (++qset->td_end >= WHCI_QSET_TD_MAX)
+			qset->td_end = 0;
+		qset->ntds++;
+	}
+
+	return update;
+}
+
+/**
+ * qset_remove_qtd - remove the first qTD from a qset.
+ *
+ * The qTD might be still active (if it's part of a IN URB that
+ * resulted in a short read) so ensure it's deactivated.
+ */
+static void qset_remove_qtd(struct whc *whc, struct whc_qset *qset)
+{
+	qset->qtd[qset->td_start].status = 0;
+
+	if (++qset->td_start >= WHCI_QSET_TD_MAX)
+		qset->td_start = 0;
+	qset->ntds--;
+}
+
+static void qset_copy_bounce_to_sg(struct whc *whc, struct whc_std *std)
+{
+	struct scatterlist *sg;
+	void *bounce;
+	size_t remaining, offset;
+
+	bounce = std->bounce_buf;
+	remaining = std->len;
+
+	sg = std->bounce_sg;
+	offset = std->bounce_offset;
+
+	while (remaining) {
+		size_t len;
+
+		len = min(sg->length - offset, remaining);
+		memcpy(sg_virt(sg) + offset, bounce, len);
+
+		bounce += len;
+		remaining -= len;
+
+		offset += len;
+		if (offset >= sg->length) {
+			sg = sg_next(sg);
+			offset = 0;
+		}
+	}
+
+}
+
+/**
+ * qset_free_std - remove an sTD and free it.
+ * @whc: the WHCI host controller
+ * @std: the sTD to remove and free.
+ */
+void qset_free_std(struct whc *whc, struct whc_std *std)
+{
+	list_del(&std->list_node);
+	if (std->bounce_buf) {
+		bool is_out = usb_pipeout(std->urb->pipe);
+		dma_addr_t dma_addr;
+
+		if (std->num_pointers)
+			dma_addr = le64_to_cpu(std->pl_virt[0].buf_ptr);
+		else
+			dma_addr = std->dma_addr;
+
+		dma_unmap_single(whc->wusbhc.dev, dma_addr,
+				 std->len, is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		if (!is_out)
+			qset_copy_bounce_to_sg(whc, std);
+		kfree(std->bounce_buf);
+	}
+	if (std->pl_virt) {
+		if (!dma_mapping_error(whc->wusbhc.dev, std->dma_addr))
+			dma_unmap_single(whc->wusbhc.dev, std->dma_addr,
+					 std->num_pointers * sizeof(struct whc_page_list_entry),
+					 DMA_TO_DEVICE);
+		kfree(std->pl_virt);
+		std->pl_virt = NULL;
+	}
+	kfree(std);
+}
+
+/**
+ * qset_remove_qtds - remove an URB's qTDs (and sTDs).
+ */
+static void qset_remove_qtds(struct whc *whc, struct whc_qset *qset,
+			     struct urb *urb)
+{
+	struct whc_std *std, *t;
+
+	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
+		if (std->urb != urb)
+			break;
+		if (std->qtd != NULL)
+			qset_remove_qtd(whc, qset);
+		qset_free_std(whc, std);
+	}
+}
+
+/**
+ * qset_free_stds - free any remaining sTDs for an URB.
+ */
+static void qset_free_stds(struct whc_qset *qset, struct urb *urb)
+{
+	struct whc_std *std, *t;
+
+	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
+		if (std->urb == urb)
+			qset_free_std(qset->whc, std);
+	}
+}
+
+static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_flags)
+{
+	dma_addr_t dma_addr = std->dma_addr;
+	dma_addr_t sp, ep;
+	size_t pl_len;
+	int p;
+
+	/* Short buffers don't need a page list. */
+	if (std->len <= WHCI_PAGE_SIZE) {
+		std->num_pointers = 0;
+		return 0;
+	}
+
+	sp = dma_addr & ~(WHCI_PAGE_SIZE-1);
+	ep = dma_addr + std->len;
+	std->num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE);
+
+	pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
+	std->pl_virt = kmalloc(pl_len, mem_flags);
+	if (std->pl_virt == NULL)
+		return -ENOMEM;
+	std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE);
+	if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) {
+		kfree(std->pl_virt);
+		return -EFAULT;
+	}
+
+	for (p = 0; p < std->num_pointers; p++) {
+		std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
+		dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1);
+	}
+
+	return 0;
+}
+
+/**
+ * urb_dequeue_work - executes asl/pzl update and gives back the urb to the system.
+ */
+static void urb_dequeue_work(struct work_struct *work)
+{
+	struct whc_urb *wurb = container_of(work, struct whc_urb, dequeue_work);
+	struct whc_qset *qset = wurb->qset;
+	struct whc *whc = qset->whc;
+	unsigned long flags;
+
+	if (wurb->is_async)
+		asl_update(whc, WUSBCMD_ASYNC_UPDATED
+			   | WUSBCMD_ASYNC_SYNCED_DB
+			   | WUSBCMD_ASYNC_QSET_RM);
+	else
+		pzl_update(whc, WUSBCMD_PERIODIC_UPDATED
+			   | WUSBCMD_PERIODIC_SYNCED_DB
+			   | WUSBCMD_PERIODIC_QSET_RM);
+
+	spin_lock_irqsave(&whc->lock, flags);
+	qset_remove_urb(whc, qset, wurb->urb, wurb->status);
+	spin_unlock_irqrestore(&whc->lock, flags);
+}
+
+static struct whc_std *qset_new_std(struct whc *whc, struct whc_qset *qset,
+				    struct urb *urb, gfp_t mem_flags)
+{
+	struct whc_std *std;
+
+	std = kzalloc(sizeof(struct whc_std), mem_flags);
+	if (std == NULL)
+		return NULL;
+
+	std->urb = urb;
+	std->qtd = NULL;
+
+	INIT_LIST_HEAD(&std->list_node);
+	list_add_tail(&std->list_node, &qset->stds);
+
+	return std;
+}
+
+static int qset_add_urb_sg(struct whc *whc, struct whc_qset *qset, struct urb *urb,
+			   gfp_t mem_flags)
+{
+	size_t remaining;
+	struct scatterlist *sg;
+	int i;
+	int ntds = 0;
+	struct whc_std *std = NULL;
+	struct whc_page_list_entry *new_pl_virt;
+	dma_addr_t prev_end = 0;
+	size_t pl_len;
+	int p = 0;
+
+	remaining = urb->transfer_buffer_length;
+
+	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
+		dma_addr_t dma_addr;
+		size_t dma_remaining;
+		dma_addr_t sp, ep;
+		int num_pointers;
+
+		if (remaining == 0) {
+			break;
+		}
+
+		dma_addr = sg_dma_address(sg);
+		dma_remaining = min_t(size_t, sg_dma_len(sg), remaining);
+
+		while (dma_remaining) {
+			size_t dma_len;
+
+			/*
+			 * We can use the previous std (if it exists) provided that:
+			 * - the previous one ended on a page boundary.
+			 * - the current one begins on a page boundary.
+			 * - the previous one isn't full.
+			 *
+			 * If a new std is needed but the previous one
+			 * was not a whole number of packets then this
+			 * sg list cannot be mapped onto multiple
+			 * qTDs.  Return an error and let the caller
+			 * sort it out.
+			 */
+			if (!std
+			    || (prev_end & (WHCI_PAGE_SIZE-1))
+			    || (dma_addr & (WHCI_PAGE_SIZE-1))
+			    || std->len + WHCI_PAGE_SIZE > QTD_MAX_XFER_SIZE) {
+				if (std && std->len % qset->max_packet != 0)
+					return -EINVAL;
+				std = qset_new_std(whc, qset, urb, mem_flags);
+				if (std == NULL) {
+					return -ENOMEM;
+				}
+				ntds++;
+				p = 0;
+			}
+
+			dma_len = dma_remaining;
+
+			/*
+			 * If the remainder of this element doesn't
+			 * fit in a single qTD, limit the qTD to a
+			 * whole number of packets.  This allows the
+			 * remainder to go into the next qTD.
+			 */
+			if (std->len + dma_len > QTD_MAX_XFER_SIZE) {
+				dma_len = (QTD_MAX_XFER_SIZE / qset->max_packet)
+					* qset->max_packet - std->len;
+			}
+
+			std->len += dma_len;
+			std->ntds_remaining = -1; /* filled in later */
+
+			sp = dma_addr & ~(WHCI_PAGE_SIZE-1);
+			ep = dma_addr + dma_len;
+			num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE);
+			std->num_pointers += num_pointers;
+
+			pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
+
+			new_pl_virt = krealloc(std->pl_virt, pl_len, mem_flags);
+			if (new_pl_virt == NULL) {
+				kfree(std->pl_virt);
+				std->pl_virt = NULL;
+				return -ENOMEM;
+			}
+			std->pl_virt = new_pl_virt;
+
+			for (;p < std->num_pointers; p++) {
+				std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
+				dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1);
+			}
+
+			prev_end = dma_addr = ep;
+			dma_remaining -= dma_len;
+			remaining -= dma_len;
+		}
+	}
+
+	/* Now the number of stds is know, go back and fill in
+	   std->ntds_remaining. */
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (std->ntds_remaining == -1) {
+			pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
+			std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt,
+						       pl_len, DMA_TO_DEVICE);
+			if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr))
+				return -EFAULT;
+			std->ntds_remaining = ntds--;
+		}
+	}
+	return 0;
+}
+
+/**
+ * qset_add_urb_sg_linearize - add an urb with sg list, copying the data
+ *
+ * If the URB contains an sg list whose elements cannot be directly
+ * mapped to qTDs then the data must be transferred via bounce
+ * buffers.
+ */
+static int qset_add_urb_sg_linearize(struct whc *whc, struct whc_qset *qset,
+				     struct urb *urb, gfp_t mem_flags)
+{
+	bool is_out = usb_pipeout(urb->pipe);
+	size_t max_std_len;
+	size_t remaining;
+	int ntds = 0;
+	struct whc_std *std = NULL;
+	void *bounce = NULL;
+	struct scatterlist *sg;
+	int i;
+
+	/* limit maximum bounce buffer to 16 * 3.5 KiB ~= 28 k */
+	max_std_len = qset->max_burst * qset->max_packet;
+
+	remaining = urb->transfer_buffer_length;
+
+	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
+		size_t len;
+		size_t sg_remaining;
+		void *orig;
+
+		if (remaining == 0) {
+			break;
+		}
+
+		sg_remaining = min_t(size_t, remaining, sg->length);
+		orig = sg_virt(sg);
+
+		while (sg_remaining) {
+			if (!std || std->len == max_std_len) {
+				std = qset_new_std(whc, qset, urb, mem_flags);
+				if (std == NULL)
+					return -ENOMEM;
+				std->bounce_buf = kmalloc(max_std_len, mem_flags);
+				if (std->bounce_buf == NULL)
+					return -ENOMEM;
+				std->bounce_sg = sg;
+				std->bounce_offset = orig - sg_virt(sg);
+				bounce = std->bounce_buf;
+				ntds++;
+			}
+
+			len = min(sg_remaining, max_std_len - std->len);
+
+			if (is_out)
+				memcpy(bounce, orig, len);
+
+			std->len += len;
+			std->ntds_remaining = -1; /* filled in later */
+
+			bounce += len;
+			orig += len;
+			sg_remaining -= len;
+			remaining -= len;
+		}
+	}
+
+	/*
+	 * For each of the new sTDs, map the bounce buffers, create
+	 * page lists (if necessary), and fill in std->ntds_remaining.
+	 */
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (std->ntds_remaining != -1)
+			continue;
+
+		std->dma_addr = dma_map_single(&whc->umc->dev, std->bounce_buf, std->len,
+					       is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+		if (dma_mapping_error(&whc->umc->dev, std->dma_addr))
+			return -EFAULT;
+
+		if (qset_fill_page_list(whc, std, mem_flags) < 0)
+			return -ENOMEM;
+
+		std->ntds_remaining = ntds--;
+	}
+
+	return 0;
+}
+
+/**
+ * qset_add_urb - add an urb to the qset's queue.
+ *
+ * The URB is chopped into sTDs, one for each qTD that will required.
+ * At least one qTD (and sTD) is required even if the transfer has no
+ * data (e.g., for some control transfers).
+ */
+int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb,
+	gfp_t mem_flags)
+{
+	struct whc_urb *wurb;
+	int remaining = urb->transfer_buffer_length;
+	u64 transfer_dma = urb->transfer_dma;
+	int ntds_remaining;
+	int ret;
+
+	wurb = kzalloc(sizeof(struct whc_urb), mem_flags);
+	if (wurb == NULL)
+		goto err_no_mem;
+	urb->hcpriv = wurb;
+	wurb->qset = qset;
+	wurb->urb = urb;
+	INIT_WORK(&wurb->dequeue_work, urb_dequeue_work);
+
+	if (urb->num_sgs) {
+		ret = qset_add_urb_sg(whc, qset, urb, mem_flags);
+		if (ret == -EINVAL) {
+			qset_free_stds(qset, urb);
+			ret = qset_add_urb_sg_linearize(whc, qset, urb, mem_flags);
+		}
+		if (ret < 0)
+			goto err_no_mem;
+		return 0;
+	}
+
+	ntds_remaining = DIV_ROUND_UP(remaining, QTD_MAX_XFER_SIZE);
+	if (ntds_remaining == 0)
+		ntds_remaining = 1;
+
+	while (ntds_remaining) {
+		struct whc_std *std;
+		size_t std_len;
+
+		std_len = remaining;
+		if (std_len > QTD_MAX_XFER_SIZE)
+			std_len = QTD_MAX_XFER_SIZE;
+
+		std = qset_new_std(whc, qset, urb, mem_flags);
+		if (std == NULL)
+			goto err_no_mem;
+
+		std->dma_addr = transfer_dma;
+		std->len = std_len;
+		std->ntds_remaining = ntds_remaining;
+
+		if (qset_fill_page_list(whc, std, mem_flags) < 0)
+			goto err_no_mem;
+
+		ntds_remaining--;
+		remaining -= std_len;
+		transfer_dma += std_len;
+	}
+
+	return 0;
+
+err_no_mem:
+	qset_free_stds(qset, urb);
+	return -ENOMEM;
+}
+
+/**
+ * qset_remove_urb - remove an URB from the urb queue.
+ *
+ * The URB is returned to the USB subsystem.
+ */
+void qset_remove_urb(struct whc *whc, struct whc_qset *qset,
+			    struct urb *urb, int status)
+{
+	struct wusbhc *wusbhc = &whc->wusbhc;
+	struct whc_urb *wurb = urb->hcpriv;
+
+	usb_hcd_unlink_urb_from_ep(&wusbhc->usb_hcd, urb);
+	/* Drop the lock as urb->complete() may enqueue another urb. */
+	spin_unlock(&whc->lock);
+	wusbhc_giveback_urb(wusbhc, urb, status);
+	spin_lock(&whc->lock);
+
+	kfree(wurb);
+}
+
+/**
+ * get_urb_status_from_qtd - get the completed urb status from qTD status
+ * @urb:    completed urb
+ * @status: qTD status
+ */
+static int get_urb_status_from_qtd(struct urb *urb, u32 status)
+{
+	if (status & QTD_STS_HALTED) {
+		if (status & QTD_STS_DBE)
+			return usb_pipein(urb->pipe) ? -ENOSR : -ECOMM;
+		else if (status & QTD_STS_BABBLE)
+			return -EOVERFLOW;
+		else if (status & QTD_STS_RCE)
+			return -ETIME;
+		return -EPIPE;
+	}
+	if (usb_pipein(urb->pipe)
+	    && (urb->transfer_flags & URB_SHORT_NOT_OK)
+	    && urb->actual_length < urb->transfer_buffer_length)
+		return -EREMOTEIO;
+	return 0;
+}
+
+/**
+ * process_inactive_qtd - process an inactive (but not halted) qTD.
+ *
+ * Update the urb with the transfer bytes from the qTD, if the urb is
+ * completely transferred or (in the case of an IN only) the LPF is
+ * set, then the transfer is complete and the urb should be returned
+ * to the system.
+ */
+void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
+				 struct whc_qtd *qtd)
+{
+	struct whc_std *std = list_first_entry(&qset->stds, struct whc_std, list_node);
+	struct urb *urb = std->urb;
+	uint32_t status;
+	bool complete;
+
+	status = le32_to_cpu(qtd->status);
+
+	urb->actual_length += std->len - QTD_STS_TO_LEN(status);
+
+	if (usb_pipein(urb->pipe) && (status & QTD_STS_LAST_PKT))
+		complete = true;
+	else
+		complete = whc_std_last(std);
+
+	qset_remove_qtd(whc, qset);
+	qset_free_std(whc, std);
+
+	/*
+	 * Transfers for this URB are complete?  Then return it to the
+	 * USB subsystem.
+	 */
+	if (complete) {
+		qset_remove_qtds(whc, qset, urb);
+		qset_remove_urb(whc, qset, urb, get_urb_status_from_qtd(urb, status));
+
+		/*
+		 * If iAlt isn't valid then the hardware didn't
+		 * advance iCur. Adjust the start and end pointers to
+		 * match iCur.
+		 */
+		if (!(status & QTD_STS_IALT_VALID))
+			qset->td_start = qset->td_end
+				= QH_STATUS_TO_ICUR(le16_to_cpu(qset->qh.status));
+		qset->pause_after_urb = NULL;
+	}
+}
+
+/**
+ * process_halted_qtd - process a qset with a halted qtd
+ *
+ * Remove all the qTDs for the failed URB and return the failed URB to
+ * the USB subsystem.  Then remove all other qTDs so the qset can be
+ * removed.
+ *
+ * FIXME: this is the point where rate adaptation can be done.  If a
+ * transfer failed because it exceeded the maximum number of retries
+ * then it could be reactivated with a slower rate without having to
+ * remove the qset.
+ */
+void process_halted_qtd(struct whc *whc, struct whc_qset *qset,
+			       struct whc_qtd *qtd)
+{
+	struct whc_std *std = list_first_entry(&qset->stds, struct whc_std, list_node);
+	struct urb *urb = std->urb;
+	int urb_status;
+
+	urb_status = get_urb_status_from_qtd(urb, le32_to_cpu(qtd->status));
+
+	qset_remove_qtds(whc, qset, urb);
+	qset_remove_urb(whc, qset, urb, urb_status);
+
+	list_for_each_entry(std, &qset->stds, list_node) {
+		if (qset->ntds == 0)
+			break;
+		qset_remove_qtd(whc, qset);
+		std->qtd = NULL;
+	}
+
+	qset->remove = 1;
+}
+
+void qset_free(struct whc *whc, struct whc_qset *qset)
+{
+	dma_pool_free(whc->qset_pool, qset, qset->qset_dma);
+}
+
+/**
+ * qset_delete - wait for a qset to be unused, then free it.
+ */
+void qset_delete(struct whc *whc, struct whc_qset *qset)
+{
+	wait_for_completion(&qset->remove_complete);
+	qset_free(whc, qset);
+}
diff --git a/drivers/staging/wusbcore/host/whci/whcd.h b/drivers/staging/wusbcore/host/whci/whcd.h
new file mode 100644
index 000000000000..a442a2589e83
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/whcd.h
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) private header.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#ifndef __WHCD_H
+#define __WHCD_H
+
+#include <linux/workqueue.h>
+
+#include "../../../uwb/include/whci.h"
+#include "../../../uwb/include/umc.h"
+#include "whci-hc.h"
+
+/* Generic command timeout. */
+#define WHC_GENCMD_TIMEOUT_MS 100
+
+struct whc_dbg;
+
+struct whc {
+	struct wusbhc wusbhc;
+	struct umc_dev *umc;
+
+	resource_size_t base_phys;
+	void __iomem *base;
+	int irq;
+
+	u8 n_devices;
+	u8 n_keys;
+	u8 n_mmc_ies;
+
+	u64 *pz_list;
+	struct dn_buf_entry *dn_buf;
+	struct di_buf_entry *di_buf;
+	dma_addr_t pz_list_dma;
+	dma_addr_t dn_buf_dma;
+	dma_addr_t di_buf_dma;
+
+	spinlock_t   lock;
+	struct mutex mutex;
+
+	void *            gen_cmd_buf;
+	dma_addr_t        gen_cmd_buf_dma;
+	wait_queue_head_t cmd_wq;
+
+	struct workqueue_struct *workqueue;
+	struct work_struct       dn_work;
+
+	struct dma_pool *qset_pool;
+
+	struct list_head async_list;
+	struct list_head async_removed_list;
+	wait_queue_head_t async_list_wq;
+	struct work_struct async_work;
+
+	struct list_head periodic_list[5];
+	struct list_head periodic_removed_list;
+	wait_queue_head_t periodic_list_wq;
+	struct work_struct periodic_work;
+
+	struct whc_dbg *dbg;
+};
+
+#define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc))
+
+/**
+ * struct whc_std - a software TD.
+ * @urb: the URB this sTD is for.
+ * @offset: start of the URB's data for this TD.
+ * @len: the length of data in the associated TD.
+ * @ntds_remaining: number of TDs (starting from this one) in this transfer.
+ *
+ * @bounce_buf: a bounce buffer if the std was from an urb with a sg
+ * list that could not be mapped to qTDs directly.
+ * @bounce_sg: the first scatterlist element bounce_buf is for.
+ * @bounce_offset: the offset into bounce_sg for the start of bounce_buf.
+ *
+ * Queued URBs may require more TDs than are available in a qset so we
+ * use a list of these "software TDs" (sTDs) to hold per-TD data.
+ */
+struct whc_std {
+	struct urb *urb;
+	size_t len;
+	int    ntds_remaining;
+	struct whc_qtd *qtd;
+
+	struct list_head list_node;
+	int num_pointers;
+	dma_addr_t dma_addr;
+	struct whc_page_list_entry *pl_virt;
+
+	void *bounce_buf;
+	struct scatterlist *bounce_sg;
+	unsigned bounce_offset;
+};
+
+/**
+ * struct whc_urb - per URB host controller structure.
+ * @urb: the URB this struct is for.
+ * @qset: the qset associated to the URB.
+ * @dequeue_work: the work to remove the URB when dequeued.
+ * @is_async: the URB belongs to async sheduler or not.
+ * @status: the status to be returned when calling wusbhc_giveback_urb.
+ */
+struct whc_urb {
+	struct urb *urb;
+	struct whc_qset *qset;
+	struct work_struct dequeue_work;
+	bool is_async;
+	int status;
+};
+
+/**
+ * whc_std_last - is this sTD the URB's last?
+ * @std: the sTD to check.
+ */
+static inline bool whc_std_last(struct whc_std *std)
+{
+	return std->ntds_remaining <= 1;
+}
+
+enum whc_update {
+	WHC_UPDATE_ADDED   = 0x01,
+	WHC_UPDATE_REMOVED = 0x02,
+	WHC_UPDATE_UPDATED = 0x04,
+};
+
+/* init.c */
+int whc_init(struct whc *whc);
+void whc_clean_up(struct whc *whc);
+
+/* hw.c */
+void whc_write_wusbcmd(struct whc *whc, u32 mask, u32 val);
+int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len);
+void whc_hw_error(struct whc *whc, const char *reason);
+
+/* wusb.c */
+int whc_wusbhc_start(struct wusbhc *wusbhc);
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay);
+int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
+		  u8 handle, struct wuie_hdr *wuie);
+int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle);
+int whc_bwa_set(struct wusbhc *wusbhc, s8 stream_index, const struct uwb_mas_bm *mas_bm);
+int whc_dev_info_set(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
+int whc_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots);
+int whc_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
+		const void *ptk, size_t key_size);
+int whc_set_gtk(struct wusbhc *wusbhc, u32 tkid,
+		const void *gtk, size_t key_size);
+int whc_set_cluster_id(struct whc *whc, u8 bcid);
+
+/* int.c */
+irqreturn_t whc_int_handler(struct usb_hcd *hcd);
+void whc_dn_work(struct work_struct *work);
+
+/* asl.c */
+void asl_start(struct whc *whc);
+void asl_stop(struct whc *whc);
+int  asl_init(struct whc *whc);
+void asl_clean_up(struct whc *whc);
+int  asl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags);
+int  asl_urb_dequeue(struct whc *whc, struct urb *urb, int status);
+void asl_qset_delete(struct whc *whc, struct whc_qset *qset);
+void scan_async_work(struct work_struct *work);
+
+/* pzl.c */
+int  pzl_init(struct whc *whc);
+void pzl_clean_up(struct whc *whc);
+void pzl_start(struct whc *whc);
+void pzl_stop(struct whc *whc);
+int  pzl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags);
+int  pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status);
+void pzl_qset_delete(struct whc *whc, struct whc_qset *qset);
+void scan_periodic_work(struct work_struct *work);
+
+/* qset.c */
+struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags);
+void qset_free(struct whc *whc, struct whc_qset *qset);
+struct whc_qset *get_qset(struct whc *whc, struct urb *urb, gfp_t mem_flags);
+void qset_delete(struct whc *whc, struct whc_qset *qset);
+void qset_clear(struct whc *whc, struct whc_qset *qset);
+void qset_reset(struct whc *whc, struct whc_qset *qset);
+int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb,
+		 gfp_t mem_flags);
+void qset_free_std(struct whc *whc, struct whc_std *std);
+void qset_remove_urb(struct whc *whc, struct whc_qset *qset,
+			    struct urb *urb, int status);
+void process_halted_qtd(struct whc *whc, struct whc_qset *qset,
+			       struct whc_qtd *qtd);
+void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
+				 struct whc_qtd *qtd);
+enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset);
+void qset_remove_complete(struct whc *whc, struct whc_qset *qset);
+void pzl_update(struct whc *whc, uint32_t wusbcmd);
+void asl_update(struct whc *whc, uint32_t wusbcmd);
+
+/* debug.c */
+void whc_dbg_init(struct whc *whc);
+void whc_dbg_clean_up(struct whc *whc);
+
+#endif /* #ifndef __WHCD_H */
diff --git a/drivers/staging/wusbcore/host/whci/whci-hc.h b/drivers/staging/wusbcore/host/whci/whci-hc.h
new file mode 100644
index 000000000000..5a86a57a80cc
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/whci-hc.h
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) data structures.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#ifndef _WHCI_WHCI_HC_H
+#define _WHCI_WHCI_HC_H
+
+#include <linux/list.h>
+
+/**
+ * WHCI_PAGE_SIZE - page size use by WHCI
+ *
+ * WHCI assumes that host system uses pages of 4096 octets.
+ */
+#define WHCI_PAGE_SIZE 4096
+
+
+/**
+ * QTD_MAX_TXFER_SIZE - max number of bytes to transfer with a single
+ * qtd.
+ *
+ * This is 2^20 - 1.
+ */
+#define QTD_MAX_XFER_SIZE 1048575
+
+
+/**
+ * struct whc_qtd - Queue Element Transfer Descriptors (qTD)
+ *
+ * This describes the data for a bulk, control or interrupt transfer.
+ *
+ * [WHCI] section 3.2.4
+ */
+struct whc_qtd {
+	__le32 status; /*< remaining transfer len and transfer status */
+	__le32 options;
+	__le64 page_list_ptr; /*< physical pointer to data buffer page list*/
+	__u8   setup[8];      /*< setup data for control transfers */
+} __attribute__((packed));
+
+#define QTD_STS_ACTIVE     (1 << 31)  /* enable execution of transaction */
+#define QTD_STS_HALTED     (1 << 30)  /* transfer halted */
+#define QTD_STS_DBE        (1 << 29)  /* data buffer error */
+#define QTD_STS_BABBLE     (1 << 28)  /* babble detected */
+#define QTD_STS_RCE        (1 << 27)  /* retry count exceeded */
+#define QTD_STS_LAST_PKT   (1 << 26)  /* set Last Packet Flag in WUSB header */
+#define QTD_STS_INACTIVE   (1 << 25)  /* queue set is marked inactive */
+#define QTD_STS_IALT_VALID (1 << 23)                          /* iAlt field is valid */
+#define QTD_STS_IALT(i)    (QTD_STS_IALT_VALID | ((i) << 20)) /* iAlt field */
+#define QTD_STS_LEN(l)     ((l) << 0) /* transfer length */
+#define QTD_STS_TO_LEN(s)  ((s) & 0x000fffff)
+
+#define QTD_OPT_IOC      (1 << 1) /* page_list_ptr points to buffer directly */
+#define QTD_OPT_SMALL    (1 << 0) /* interrupt on complete */
+
+/**
+ * struct whc_itd - Isochronous Queue Element Transfer Descriptors (iTD)
+ *
+ * This describes the data and other parameters for an isochronous
+ * transfer.
+ *
+ * [WHCI] section 3.2.5
+ */
+struct whc_itd {
+	__le16 presentation_time;    /*< presentation time for OUT transfers */
+	__u8   num_segments;         /*< number of data segments in segment list */
+	__u8   status;               /*< command execution status */
+	__le32 options;              /*< misc transfer options */
+	__le64 page_list_ptr;        /*< physical pointer to data buffer page list */
+	__le64 seg_list_ptr;         /*< physical pointer to segment list */
+} __attribute__((packed));
+
+#define ITD_STS_ACTIVE   (1 << 7) /* enable execution of transaction */
+#define ITD_STS_DBE      (1 << 5) /* data buffer error */
+#define ITD_STS_BABBLE   (1 << 4) /* babble detected */
+#define ITD_STS_INACTIVE (1 << 1) /* queue set is marked inactive */
+
+#define ITD_OPT_IOC      (1 << 1) /* interrupt on complete */
+#define ITD_OPT_SMALL    (1 << 0) /* page_list_ptr points to buffer directly */
+
+/**
+ * Page list entry.
+ *
+ * A TD's page list must contain sufficient page list entries for the
+ * total data length in the TD.
+ *
+ * [WHCI] section 3.2.4.3
+ */
+struct whc_page_list_entry {
+	__le64 buf_ptr; /*< physical pointer to buffer */
+} __attribute__((packed));
+
+/**
+ * struct whc_seg_list_entry - Segment list entry.
+ *
+ * Describes a portion of the data buffer described in the containing
+ * qTD's page list.
+ *
+ * seg_ptr = qtd->page_list_ptr[qtd->seg_list_ptr[seg].idx].buf_ptr
+ *           + qtd->seg_list_ptr[seg].offset;
+ *
+ * Segments can't cross page boundries.
+ *
+ * [WHCI] section 3.2.5.5
+ */
+struct whc_seg_list_entry {
+	__le16 len;    /*< segment length */
+	__u8   idx;    /*< index into page list */
+	__u8   status; /*< segment status */
+	__le16 offset; /*< 12 bit offset into page */
+} __attribute__((packed));
+
+/**
+ * struct whc_qhead - endpoint and status information for a qset.
+ *
+ * [WHCI] section 3.2.6
+ */
+struct whc_qhead {
+	__le64 link; /*< next qset in list */
+	__le32 info1;
+	__le32 info2;
+	__le32 info3;
+	__le16 status;
+	__le16 err_count;  /*< transaction error count */
+	__le32 cur_window;
+	__le32 scratch[3]; /*< h/w scratch area */
+	union {
+		struct whc_qtd qtd;
+		struct whc_itd itd;
+	} overlay;
+} __attribute__((packed));
+
+#define QH_LINK_PTR_MASK (~0x03Full)
+#define QH_LINK_PTR(ptr) ((ptr) & QH_LINK_PTR_MASK)
+#define QH_LINK_IQS      (1 << 4) /* isochronous queue set */
+#define QH_LINK_NTDS(n)  (((n) - 1) << 1) /* number of TDs in queue set */
+#define QH_LINK_T        (1 << 0) /* last queue set in periodic schedule list */
+
+#define QH_INFO1_EP(e)           ((e) << 0)  /* endpoint number */
+#define QH_INFO1_DIR_IN          (1 << 4)    /* IN transfer */
+#define QH_INFO1_DIR_OUT         (0 << 4)    /* OUT transfer */
+#define QH_INFO1_TR_TYPE_CTRL    (0x0 << 5)  /* control transfer */
+#define QH_INFO1_TR_TYPE_ISOC    (0x1 << 5)  /* isochronous transfer */
+#define QH_INFO1_TR_TYPE_BULK    (0x2 << 5)  /* bulk transfer */
+#define QH_INFO1_TR_TYPE_INT     (0x3 << 5)  /* interrupt */
+#define QH_INFO1_TR_TYPE_LP_INT  (0x7 << 5)  /* low power interrupt */
+#define QH_INFO1_DEV_INFO_IDX(i) ((i) << 8)  /* index into device info buffer */
+#define QH_INFO1_SET_INACTIVE    (1 << 15)   /* set inactive after transfer */
+#define QH_INFO1_MAX_PKT_LEN(l)  ((l) << 16) /* maximum packet length */
+
+#define QH_INFO2_BURST(b)        ((b) << 0)  /* maximum burst length */
+#define QH_INFO2_DBP(p)          ((p) << 5)  /* data burst policy (see [WUSB] table 5-7) */
+#define QH_INFO2_MAX_COUNT(c)    ((c) << 8)  /* max isoc/int pkts per zone */
+#define QH_INFO2_RQS             (1 << 15)   /* reactivate queue set */
+#define QH_INFO2_MAX_RETRY(r)    ((r) << 16) /* maximum transaction retries */
+#define QH_INFO2_MAX_SEQ(s)      ((s) << 20) /* maximum sequence number */
+#define QH_INFO3_MAX_DELAY(d)    ((d) << 0)  /* maximum stream delay in 125 us units (isoc only) */
+#define QH_INFO3_INTERVAL(i)     ((i) << 16) /* segment interval in 125 us units (isoc only) */
+
+#define QH_INFO3_TX_RATE(r)      ((r) << 24) /* PHY rate (see [ECMA-368] section 10.3.1.1) */
+#define QH_INFO3_TX_PWR(p)       ((p) << 29) /* transmit power (see [WUSB] section 5.2.1.2) */
+
+#define QH_STATUS_FLOW_CTRL      (1 << 15)
+#define QH_STATUS_ICUR(i)        ((i) << 5)
+#define QH_STATUS_TO_ICUR(s)     (((s) >> 5) & 0x7)
+#define QH_STATUS_SEQ_MASK       0x1f
+
+/**
+ * usb_pipe_to_qh_type - USB core pipe type to QH transfer type
+ *
+ * Returns the QH type field for a USB core pipe type.
+ */
+static inline unsigned usb_pipe_to_qh_type(unsigned pipe)
+{
+	static const unsigned type[] = {
+		[PIPE_ISOCHRONOUS] = QH_INFO1_TR_TYPE_ISOC,
+		[PIPE_INTERRUPT]   = QH_INFO1_TR_TYPE_INT,
+		[PIPE_CONTROL]     = QH_INFO1_TR_TYPE_CTRL,
+		[PIPE_BULK]        = QH_INFO1_TR_TYPE_BULK,
+	};
+	return type[usb_pipetype(pipe)];
+}
+
+/**
+ * Maxiumum number of TDs in a qset.
+ */
+#define WHCI_QSET_TD_MAX 8
+
+/**
+ * struct whc_qset - WUSB data transfers to a specific endpoint
+ * @qh: the QHead of this qset
+ * @qtd: up to 8 qTDs (for qsets for control, bulk and interrupt
+ * transfers)
+ * @itd: up to 8 iTDs (for qsets for isochronous transfers)
+ * @qset_dma: DMA address for this qset
+ * @whc: WHCI HC this qset is for
+ * @ep: endpoint
+ * @stds: list of sTDs queued to this qset
+ * @ntds: number of qTDs queued (not necessarily the same as nTDs
+ * field in the QH)
+ * @td_start: index of the first qTD in the list
+ * @td_end: index of next free qTD in the list (provided
+ *          ntds < WHCI_QSET_TD_MAX)
+ *
+ * Queue Sets (qsets) are added to the asynchronous schedule list
+ * (ASL) or the periodic zone list (PZL).
+ *
+ * qsets may contain up to 8 TDs (either qTDs or iTDs as appropriate).
+ * Each TD may refer to at most 1 MiB of data. If a single transfer
+ * has > 8MiB of data, TDs can be reused as they are completed since
+ * the TD list is used as a circular buffer.  Similarly, several
+ * (smaller) transfers may be queued in a qset.
+ *
+ * WHCI controllers may cache portions of the qsets in the ASL and
+ * PZL, requiring the WHCD to inform the WHC that the lists have been
+ * updated (fields changed or qsets inserted or removed).  For safe
+ * insertion and removal of qsets from the lists the schedule must be
+ * stopped to avoid races in updating the QH link pointers.
+ *
+ * Since the HC is free to execute qsets in any order, all transfers
+ * to an endpoint should use the same qset to ensure transfers are
+ * executed in the order they're submitted.
+ *
+ * [WHCI] section 3.2.3
+ */
+struct whc_qset {
+	struct whc_qhead qh;
+	union {
+		struct whc_qtd qtd[WHCI_QSET_TD_MAX];
+		struct whc_itd itd[WHCI_QSET_TD_MAX];
+	};
+
+	/* private data for WHCD */
+	dma_addr_t qset_dma;
+	struct whc *whc;
+	struct usb_host_endpoint *ep;
+	struct list_head stds;
+	int ntds;
+	int td_start;
+	int td_end;
+	struct list_head list_node;
+	unsigned in_sw_list:1;
+	unsigned in_hw_list:1;
+	unsigned remove:1;
+	unsigned reset:1;
+	struct urb *pause_after_urb;
+	struct completion remove_complete;
+	uint16_t max_packet;
+	uint8_t max_burst;
+	uint8_t max_seq;
+};
+
+static inline void whc_qset_set_link_ptr(u64 *ptr, u64 target)
+{
+	if (target)
+		*ptr = (*ptr & ~(QH_LINK_PTR_MASK | QH_LINK_T)) | QH_LINK_PTR(target);
+	else
+		*ptr = QH_LINK_T;
+}
+
+/**
+ * struct di_buf_entry - Device Information (DI) buffer entry.
+ *
+ * There's one of these per connected device.
+ */
+struct di_buf_entry {
+	__le32 availability_info[8]; /*< MAS availability information, one MAS per bit */
+	__le32 addr_sec_info;        /*< addressing and security info */
+	__le32 reserved[7];
+} __attribute__((packed));
+
+#define WHC_DI_SECURE           (1 << 31)
+#define WHC_DI_DISABLE          (1 << 30)
+#define WHC_DI_KEY_IDX(k)       ((k) << 8)
+#define WHC_DI_KEY_IDX_MASK     0x0000ff00
+#define WHC_DI_DEV_ADDR(a)      ((a) << 0)
+#define WHC_DI_DEV_ADDR_MASK    0x000000ff
+
+/**
+ * struct dn_buf_entry - Device Notification (DN) buffer entry.
+ *
+ * [WHCI] section 3.2.8
+ */
+struct dn_buf_entry {
+	__u8   msg_size;    /*< number of octets of valid DN data */
+	__u8   reserved1;
+	__u8   src_addr;    /*< source address */
+	__u8   status;      /*< buffer entry status */
+	__le32 tkid;        /*< TKID for source device, valid if secure bit is set */
+	__u8   dn_data[56]; /*< up to 56 octets of DN data */
+} __attribute__((packed));
+
+#define WHC_DN_STATUS_VALID  (1 << 7) /* buffer entry is valid */
+#define WHC_DN_STATUS_SECURE (1 << 6) /* notification received using secure frame */
+
+#define WHC_N_DN_ENTRIES (4096 / sizeof(struct dn_buf_entry))
+
+/* The Add MMC IE WUSB Generic Command may take up to 256 bytes of
+   data. [WHCI] section 2.4.7. */
+#define WHC_GEN_CMD_DATA_LEN 256
+
+/*
+ * HC registers.
+ *
+ * [WHCI] section 2.4
+ */
+
+#define WHCIVERSION          0x00
+
+#define WHCSPARAMS           0x04
+#  define WHCSPARAMS_TO_N_MMC_IES(p) (((p) >> 16) & 0xff)
+#  define WHCSPARAMS_TO_N_KEYS(p)    (((p) >> 8) & 0xff)
+#  define WHCSPARAMS_TO_N_DEVICES(p) (((p) >> 0) & 0x7f)
+
+#define WUSBCMD              0x08
+#  define WUSBCMD_BCID(b)            ((b) << 16)
+#  define WUSBCMD_BCID_MASK          (0xff << 16)
+#  define WUSBCMD_ASYNC_QSET_RM      (1 << 12)
+#  define WUSBCMD_PERIODIC_QSET_RM   (1 << 11)
+#  define WUSBCMD_WUSBSI(s)          ((s) << 8)
+#  define WUSBCMD_WUSBSI_MASK        (0x7 << 8)
+#  define WUSBCMD_ASYNC_SYNCED_DB    (1 << 7)
+#  define WUSBCMD_PERIODIC_SYNCED_DB (1 << 6)
+#  define WUSBCMD_ASYNC_UPDATED      (1 << 5)
+#  define WUSBCMD_PERIODIC_UPDATED   (1 << 4)
+#  define WUSBCMD_ASYNC_EN           (1 << 3)
+#  define WUSBCMD_PERIODIC_EN        (1 << 2)
+#  define WUSBCMD_WHCRESET           (1 << 1)
+#  define WUSBCMD_RUN                (1 << 0)
+
+#define WUSBSTS              0x0c
+#  define WUSBSTS_ASYNC_SCHED             (1 << 15)
+#  define WUSBSTS_PERIODIC_SCHED          (1 << 14)
+#  define WUSBSTS_DNTS_SCHED              (1 << 13)
+#  define WUSBSTS_HCHALTED                (1 << 12)
+#  define WUSBSTS_GEN_CMD_DONE            (1 << 9)
+#  define WUSBSTS_CHAN_TIME_ROLLOVER      (1 << 8)
+#  define WUSBSTS_DNTS_OVERFLOW           (1 << 7)
+#  define WUSBSTS_BPST_ADJUSTMENT_CHANGED (1 << 6)
+#  define WUSBSTS_HOST_ERR                (1 << 5)
+#  define WUSBSTS_ASYNC_SCHED_SYNCED      (1 << 4)
+#  define WUSBSTS_PERIODIC_SCHED_SYNCED   (1 << 3)
+#  define WUSBSTS_DNTS_INT                (1 << 2)
+#  define WUSBSTS_ERR_INT                 (1 << 1)
+#  define WUSBSTS_INT                     (1 << 0)
+#  define WUSBSTS_INT_MASK                0x3ff
+
+#define WUSBINTR             0x10
+#  define WUSBINTR_GEN_CMD_DONE             (1 << 9)
+#  define WUSBINTR_CHAN_TIME_ROLLOVER       (1 << 8)
+#  define WUSBINTR_DNTS_OVERFLOW            (1 << 7)
+#  define WUSBINTR_BPST_ADJUSTMENT_CHANGED  (1 << 6)
+#  define WUSBINTR_HOST_ERR                 (1 << 5)
+#  define WUSBINTR_ASYNC_SCHED_SYNCED       (1 << 4)
+#  define WUSBINTR_PERIODIC_SCHED_SYNCED    (1 << 3)
+#  define WUSBINTR_DNTS_INT                 (1 << 2)
+#  define WUSBINTR_ERR_INT                  (1 << 1)
+#  define WUSBINTR_INT                      (1 << 0)
+#  define WUSBINTR_ALL 0x3ff
+
+#define WUSBGENCMDSTS        0x14
+#  define WUSBGENCMDSTS_ACTIVE (1 << 31)
+#  define WUSBGENCMDSTS_ERROR  (1 << 24)
+#  define WUSBGENCMDSTS_IOC    (1 << 23)
+#  define WUSBGENCMDSTS_MMCIE_ADD 0x01
+#  define WUSBGENCMDSTS_MMCIE_RM  0x02
+#  define WUSBGENCMDSTS_SET_MAS   0x03
+#  define WUSBGENCMDSTS_CHAN_STOP 0x04
+#  define WUSBGENCMDSTS_RWP_EN    0x05
+
+#define WUSBGENCMDPARAMS     0x18
+#define WUSBGENADDR          0x20
+#define WUSBASYNCLISTADDR    0x28
+#define WUSBDNTSBUFADDR      0x30
+#define WUSBDEVICEINFOADDR   0x38
+
+#define WUSBSETSECKEYCMD     0x40
+#  define WUSBSETSECKEYCMD_SET    (1 << 31)
+#  define WUSBSETSECKEYCMD_ERASE  (1 << 30)
+#  define WUSBSETSECKEYCMD_GTK    (1 << 8)
+#  define WUSBSETSECKEYCMD_IDX(i) ((i) << 0)
+
+#define WUSBTKID             0x44
+#define WUSBSECKEY           0x48
+#define WUSBPERIODICLISTBASE 0x58
+#define WUSBMASINDEX         0x60
+
+#define WUSBDNTSCTRL         0x64
+#  define WUSBDNTSCTRL_ACTIVE      (1 << 31)
+#  define WUSBDNTSCTRL_INTERVAL(i) ((i) << 8)
+#  define WUSBDNTSCTRL_SLOTS(s)    ((s) << 0)
+
+#define WUSBTIME             0x68
+#  define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff
+
+#define WUSBBPST             0x6c
+#define WUSBDIBUPDATED       0x70
+
+#endif /* #ifndef _WHCI_WHCI_HC_H */
diff --git a/drivers/staging/wusbcore/host/whci/wusb.c b/drivers/staging/wusbcore/host/whci/wusb.c
new file mode 100644
index 000000000000..6d0068ab35e4
--- /dev/null
+++ b/drivers/staging/wusbcore/host/whci/wusb.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless Host Controller (WHC) WUSB operations.
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+
+#include "../../../uwb/include/umc.h"
+#include "../../wusbhc.h"
+
+#include "whcd.h"
+
+static int whc_update_di(struct whc *whc, int idx)
+{
+	int offset = idx / 32;
+	u32 bit = 1 << (idx % 32);
+
+	le_writel(bit, whc->base + WUSBDIBUPDATED + offset);
+
+	return whci_wait_for(&whc->umc->dev,
+			     whc->base + WUSBDIBUPDATED + offset, bit, 0,
+			     100, "DI update");
+}
+
+/*
+ * WHCI starts MMCs based on there being a valid GTK so these need
+ * only start/stop the asynchronous and periodic schedules and send a
+ * channel stop command.
+ */
+
+int whc_wusbhc_start(struct wusbhc *wusbhc)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+
+	asl_start(whc);
+	pzl_start(whc);
+
+	return 0;
+}
+
+void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 stop_time, now_time;
+	int ret;
+
+	pzl_stop(whc);
+	asl_stop(whc);
+
+	now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK;
+	stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff;
+	ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0);
+	if (ret == 0)
+		msleep(delay);
+}
+
+int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
+		  u8 handle, struct wuie_hdr *wuie)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 params;
+
+	params = (interval << 24)
+		| (repeat_cnt << 16)
+		| (wuie->bLength << 8)
+		| handle;
+
+	return whc_do_gencmd(whc, WUSBGENCMDSTS_MMCIE_ADD, params, wuie, wuie->bLength);
+}
+
+int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 params;
+
+	params = handle;
+
+	return whc_do_gencmd(whc, WUSBGENCMDSTS_MMCIE_RM, params, NULL, 0);
+}
+
+int whc_bwa_set(struct wusbhc *wusbhc, s8 stream_index, const struct uwb_mas_bm *mas_bm)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+
+	if (stream_index >= 0)
+		whc_write_wusbcmd(whc, WUSBCMD_WUSBSI_MASK, WUSBCMD_WUSBSI(stream_index));
+
+	return whc_do_gencmd(whc, WUSBGENCMDSTS_SET_MAS, 0, (void *)mas_bm, sizeof(*mas_bm));
+}
+
+int whc_dev_info_set(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	int idx = wusb_dev->port_idx;
+	struct di_buf_entry *di = &whc->di_buf[idx];
+	int ret;
+
+	mutex_lock(&whc->mutex);
+
+	uwb_mas_bm_copy_le(di->availability_info, &wusb_dev->availability);
+	di->addr_sec_info &= ~(WHC_DI_DISABLE | WHC_DI_DEV_ADDR_MASK);
+	di->addr_sec_info |= WHC_DI_DEV_ADDR(wusb_dev->addr);
+
+	ret = whc_update_di(whc, idx);
+
+	mutex_unlock(&whc->mutex);
+
+	return ret;
+}
+
+/*
+ * Set the number of Device Notification Time Slots (DNTS) and enable
+ * device notifications.
+ */
+int whc_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	u32 dntsctrl;
+
+	dntsctrl = WUSBDNTSCTRL_ACTIVE
+		| WUSBDNTSCTRL_INTERVAL(interval)
+		| WUSBDNTSCTRL_SLOTS(slots);
+
+	le_writel(dntsctrl, whc->base + WUSBDNTSCTRL);
+
+	return 0;
+}
+
+static int whc_set_key(struct whc *whc, u8 key_index, uint32_t tkid,
+		       const void *key, size_t key_size, bool is_gtk)
+{
+	uint32_t setkeycmd;
+	uint32_t seckey[4];
+	int i;
+	int ret;
+
+	memcpy(seckey, key, key_size);
+	setkeycmd = WUSBSETSECKEYCMD_SET | WUSBSETSECKEYCMD_IDX(key_index);
+	if (is_gtk)
+		setkeycmd |= WUSBSETSECKEYCMD_GTK;
+
+	le_writel(tkid, whc->base + WUSBTKID);
+	for (i = 0; i < 4; i++)
+		le_writel(seckey[i], whc->base + WUSBSECKEY + 4*i);
+	le_writel(setkeycmd, whc->base + WUSBSETSECKEYCMD);
+
+	ret = whci_wait_for(&whc->umc->dev, whc->base + WUSBSETSECKEYCMD,
+			    WUSBSETSECKEYCMD_SET, 0, 100, "set key");
+
+	return ret;
+}
+
+/**
+ * whc_set_ptk - set the PTK to use for a device.
+ *
+ * The index into the key table for this PTK is the same as the
+ * device's port index.
+ */
+int whc_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
+		const void *ptk, size_t key_size)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	struct di_buf_entry *di = &whc->di_buf[port_idx];
+	int ret;
+
+	mutex_lock(&whc->mutex);
+
+	if (ptk) {
+		ret = whc_set_key(whc, port_idx, tkid, ptk, key_size, false);
+		if (ret)
+			goto out;
+
+		di->addr_sec_info &= ~WHC_DI_KEY_IDX_MASK;
+		di->addr_sec_info |= WHC_DI_SECURE | WHC_DI_KEY_IDX(port_idx);
+	} else
+		di->addr_sec_info &= ~WHC_DI_SECURE;
+
+	ret = whc_update_di(whc, port_idx);
+out:
+	mutex_unlock(&whc->mutex);
+	return ret;
+}
+
+/**
+ * whc_set_gtk - set the GTK for subsequent broadcast packets
+ *
+ * The GTK is stored in the last entry in the key table (the previous
+ * N_DEVICES entries are for the per-device PTKs).
+ */
+int whc_set_gtk(struct wusbhc *wusbhc, u32 tkid,
+		const void *gtk, size_t key_size)
+{
+	struct whc *whc = wusbhc_to_whc(wusbhc);
+	int ret;
+
+	mutex_lock(&whc->mutex);
+
+	ret = whc_set_key(whc, whc->n_devices, tkid, gtk, key_size, true);
+
+	mutex_unlock(&whc->mutex);
+
+	return ret;
+}
+
+int whc_set_cluster_id(struct whc *whc, u8 bcid)
+{
+	whc_write_wusbcmd(whc, WUSBCMD_BCID_MASK, WUSBCMD_BCID(bcid));
+	return 0;
+}
diff --git a/drivers/staging/wusbcore/include/association.h b/drivers/staging/wusbcore/include/association.h
new file mode 100644
index 000000000000..d7f3cb9b9db5
--- /dev/null
+++ b/drivers/staging/wusbcore/include/association.h
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB - Cable Based Association
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ */
+#ifndef __LINUX_USB_ASSOCIATION_H
+#define __LINUX_USB_ASSOCIATION_H
+
+
+/*
+ * Association attributes
+ *
+ * Association Models Supplement to WUSB 1.0 T[3-1]
+ *
+ * Each field in the structures has it's ID, it's length and then the
+ * value. This is the actual definition of the field's ID and its
+ * length.
+ */
+struct wusb_am_attr {
+	__u8 id;
+	__u8 len;
+};
+
+/* Different fields defined by the spec */
+#define WUSB_AR_AssociationTypeId	{ .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) }
+#define WUSB_AR_AssociationSubTypeId	{ .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) }
+#define WUSB_AR_Length			{ .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) }
+#define WUSB_AR_AssociationStatus	{ .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) }
+#define WUSB_AR_LangID			{ .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) }
+#define WUSB_AR_DeviceFriendlyName	{ .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_HostFriendlyName	{ .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */
+#define WUSB_AR_CHID			{ .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) }
+#define WUSB_AR_CDID			{ .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) }
+#define WUSB_AR_ConnectionContext	{ .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) }
+#define WUSB_AR_BandGroups		{ .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) }
+
+/* CBAF Control Requests (AMS1.0[T4-1] */
+enum {
+	CBAF_REQ_GET_ASSOCIATION_INFORMATION = 0x01,
+	CBAF_REQ_GET_ASSOCIATION_REQUEST,
+	CBAF_REQ_SET_ASSOCIATION_RESPONSE
+};
+
+/*
+ * CBAF USB-interface defitions
+ *
+ * No altsettings, one optional interrupt endpoint.
+ */
+enum {
+	CBAF_IFACECLASS    = 0xef,
+	CBAF_IFACESUBCLASS = 0x03,
+	CBAF_IFACEPROTOCOL = 0x01,
+};
+
+/* Association Information (AMS1.0[T4-3]) */
+struct wusb_cbaf_assoc_info {
+	__le16 Length;
+	__u8 NumAssociationRequests;
+	__le16 Flags;
+	__u8 AssociationRequestsArray[];
+} __attribute__((packed));
+
+/* Association Request (AMS1.0[T4-4]) */
+struct wusb_cbaf_assoc_request {
+	__u8 AssociationDataIndex;
+	__u8 Reserved;
+	__le16 AssociationTypeId;
+	__le16 AssociationSubTypeId;
+	__le32 AssociationTypeInfoSize;
+} __attribute__((packed));
+
+enum {
+	AR_TYPE_WUSB                    = 0x0001,
+	AR_TYPE_WUSB_RETRIEVE_HOST_INFO = 0x0000,
+	AR_TYPE_WUSB_ASSOCIATE          = 0x0001,
+};
+
+/* Association Attribute header (AMS1.0[3.8]) */
+struct wusb_cbaf_attr_hdr {
+	__le16 id;
+	__le16 len;
+} __attribute__((packed));
+
+/* Host Info (AMS1.0[T4-7]) (yeah, more headers and fields...) */
+struct wusb_cbaf_host_info {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr CHID_hdr;
+	struct wusb_ckhdid CHID;
+	struct wusb_cbaf_attr_hdr LangID_hdr;
+	__le16 LangID;
+	struct wusb_cbaf_attr_hdr HostFriendlyName_hdr;
+	__u8 HostFriendlyName[];
+} __attribute__((packed));
+
+/* Device Info (AMS1.0[T4-8])
+ *
+ * I still don't get this tag'n'header stuff for each goddamn
+ * field...
+ */
+struct wusb_cbaf_device_info {
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le32 Length;
+	struct wusb_cbaf_attr_hdr CDID_hdr;
+	struct wusb_ckhdid CDID;
+	struct wusb_cbaf_attr_hdr BandGroups_hdr;
+	__le16 BandGroups;
+	struct wusb_cbaf_attr_hdr LangID_hdr;
+	__le16 LangID;
+	struct wusb_cbaf_attr_hdr DeviceFriendlyName_hdr;
+	__u8 DeviceFriendlyName[];
+} __attribute__((packed));
+
+/* Connection Context; CC_DATA - Success case (AMS1.0[T4-9]) */
+struct wusb_cbaf_cc_data {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le32 Length;
+	struct wusb_cbaf_attr_hdr ConnectionContext_hdr;
+	struct wusb_ckhdid CHID;
+	struct wusb_ckhdid CDID;
+	struct wusb_ckhdid CK;
+	struct wusb_cbaf_attr_hdr BandGroups_hdr;
+	__le16 BandGroups;
+} __attribute__((packed));
+
+/* CC_DATA - Failure case (AMS1.0[T4-10]) */
+struct wusb_cbaf_cc_data_fail {
+	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
+	__le16 AssociationTypeId;
+	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
+	__le16 AssociationSubTypeId;
+	struct wusb_cbaf_attr_hdr Length_hdr;
+	__le16 Length;
+	struct wusb_cbaf_attr_hdr AssociationStatus_hdr;
+	__u32 AssociationStatus;
+} __attribute__((packed));
+
+#endif	/* __LINUX_USB_ASSOCIATION_H */
diff --git a/drivers/staging/wusbcore/include/wusb-wa.h b/drivers/staging/wusbcore/include/wusb-wa.h
new file mode 100644
index 000000000000..64a840b5106e
--- /dev/null
+++ b/drivers/staging/wusbcore/include/wusb-wa.h
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Wire Adapter constants and structures.
+ *
+ * Copyright (C) 2005-2006 Intel Corporation.
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * FIXME: docs
+ * FIXME: organize properly, group logically
+ *
+ * All the event structures are defined in uwb/spec.h, as they are
+ * common to the WHCI and WUSB radio control interfaces.
+ *
+ * References:
+ *   [WUSB] Wireless Universal Serial Bus Specification, revision 1.0, ch8
+ */
+#ifndef __LINUX_USB_WUSB_WA_H
+#define __LINUX_USB_WUSB_WA_H
+
+/**
+ * Radio Command Request for the Radio Control Interface
+ *
+ * Radio Control Interface command and event codes are the same as
+ * WHCI, and listed in include/linux/uwb.h:UWB_RC_{CMD,EVT}_*
+ */
+enum {
+	WA_EXEC_RC_CMD = 40,	/* Radio Control command Request */
+};
+
+/* Wireless Adapter Requests ([WUSB] table 8-51) */
+enum {
+	WUSB_REQ_ADD_MMC_IE     = 20,
+	WUSB_REQ_REMOVE_MMC_IE  = 21,
+	WUSB_REQ_SET_NUM_DNTS   = 22,
+	WUSB_REQ_SET_CLUSTER_ID = 23,
+	WUSB_REQ_SET_DEV_INFO   = 24,
+	WUSB_REQ_GET_TIME       = 25,
+	WUSB_REQ_SET_STREAM_IDX = 26,
+	WUSB_REQ_SET_WUSB_MAS   = 27,
+	WUSB_REQ_CHAN_STOP      = 28,
+};
+
+
+/* Wireless Adapter WUSB Channel Time types ([WUSB] table 8-52) */
+enum {
+	WUSB_TIME_ADJ   = 0,
+	WUSB_TIME_BPST  = 1,
+	WUSB_TIME_WUSB  = 2,
+};
+
+enum {
+	WA_ENABLE = 0x01,
+	WA_RESET = 0x02,
+	RPIPE_PAUSE = 0x1,
+	RPIPE_STALL = 0x2,
+};
+
+/* Responses from Get Status request ([WUSB] section 8.3.1.6) */
+enum {
+	WA_STATUS_ENABLED = 0x01,
+	WA_STATUS_RESETTING = 0x02
+};
+
+enum rpipe_crs {
+	RPIPE_CRS_CTL = 0x01,
+	RPIPE_CRS_ISO = 0x02,
+	RPIPE_CRS_BULK = 0x04,
+	RPIPE_CRS_INTR = 0x08
+};
+
+/**
+ * RPipe descriptor ([WUSB] section 8.5.2.11)
+ *
+ * FIXME: explain rpipes
+ */
+struct usb_rpipe_descriptor {
+	u8	bLength;
+	u8	bDescriptorType;
+	__le16  wRPipeIndex;
+	__le16	wRequests;
+	__le16	wBlocks;		/* rw if 0 */
+	__le16	wMaxPacketSize;		/* rw */
+	union {
+		u8	dwa_bHSHubAddress;		/* rw: DWA. */
+		u8	hwa_bMaxBurst;			/* rw: HWA. */
+	};
+	union {
+		u8	dwa_bHSHubPort;		/*  rw: DWA. */
+		u8	hwa_bDeviceInfoIndex;	/*  rw: HWA. */
+	};
+	u8	bSpeed;			/* rw: xfer rate 'enum uwb_phy_rate' */
+	union {
+		u8 dwa_bDeviceAddress;	/* rw: DWA Target device address. */
+		u8 hwa_reserved;		/* rw: HWA. */
+	};
+	u8	bEndpointAddress;	/* rw: Target EP address */
+	u8	bDataSequence;		/* ro: Current Data sequence */
+	__le32	dwCurrentWindow;	/* ro */
+	u8	bMaxDataSequence;	/* ro?: max supported seq */
+	u8	bInterval;		/* rw:  */
+	u8	bOverTheAirInterval;	/* rw:  */
+	u8	bmAttribute;		/* ro?  */
+	u8	bmCharacteristics;	/* ro? enum rpipe_attr, supported xsactions */
+	u8	bmRetryOptions;		/* rw? */
+	__le16	wNumTransactionErrors;	/* rw */
+} __attribute__ ((packed));
+
+/**
+ * Wire Adapter Notification types ([WUSB] sections 8.4.5 & 8.5.4)
+ *
+ * These are the notifications coming on the notification endpoint of
+ * an HWA and a DWA.
+ */
+enum wa_notif_type {
+	DWA_NOTIF_RWAKE = 0x91,
+	DWA_NOTIF_PORTSTATUS = 0x92,
+	WA_NOTIF_TRANSFER = 0x93,
+	HWA_NOTIF_BPST_ADJ = 0x94,
+	HWA_NOTIF_DN = 0x95,
+};
+
+/**
+ * Wire Adapter notification header
+ *
+ * Notifications coming from a wire adapter use a common header
+ * defined in [WUSB] sections 8.4.5 & 8.5.4.
+ */
+struct wa_notif_hdr {
+	u8 bLength;
+	u8 bNotifyType;			/* enum wa_notif_type */
+} __packed;
+
+/**
+ * HWA DN Received notification [(WUSB] section 8.5.4.2)
+ *
+ * The DNData is specified in WUSB1.0[7.6]. For each device
+ * notification we received, we just need to dispatch it.
+ *
+ * @dndata:  this is really an array of notifications, but all start
+ *           with the same header.
+ */
+struct hwa_notif_dn {
+	struct wa_notif_hdr hdr;
+	u8 bSourceDeviceAddr;		/* from errata 2005/07 */
+	u8 bmAttributes;
+	struct wusb_dn_hdr dndata[];
+} __packed;
+
+/* [WUSB] section 8.3.3 */
+enum wa_xfer_type {
+	WA_XFER_TYPE_CTL = 0x80,
+	WA_XFER_TYPE_BI = 0x81,		/* bulk/interrupt */
+	WA_XFER_TYPE_ISO = 0x82,
+	WA_XFER_RESULT = 0x83,
+	WA_XFER_ABORT = 0x84,
+	WA_XFER_ISO_PACKET_INFO = 0xA0,
+	WA_XFER_ISO_PACKET_STATUS = 0xA1,
+};
+
+/* [WUSB] section 8.3.3 */
+struct wa_xfer_hdr {
+	u8 bLength;			/* 0x18 */
+	u8 bRequestType;		/* 0x80 WA_REQUEST_TYPE_CTL */
+	__le16 wRPipe;			/* RPipe index */
+	__le32 dwTransferID;		/* Host-assigned ID */
+	__le32 dwTransferLength;	/* Length of data to xfer */
+	u8 bTransferSegment;
+} __packed;
+
+struct wa_xfer_ctl {
+	struct wa_xfer_hdr hdr;
+	u8 bmAttribute;
+	__le16 wReserved;
+	struct usb_ctrlrequest baSetupData;
+} __packed;
+
+struct wa_xfer_bi {
+	struct wa_xfer_hdr hdr;
+	u8 bReserved;
+	__le16 wReserved;
+} __packed;
+
+/* [WUSB] section 8.5.5 */
+struct wa_xfer_hwaiso {
+	struct wa_xfer_hdr hdr;
+	u8 bReserved;
+	__le16 wPresentationTime;
+	__le32 dwNumOfPackets;
+} __packed;
+
+struct wa_xfer_packet_info_hwaiso {
+	__le16 wLength;
+	u8 bPacketType;
+	u8 bReserved;
+	__le16 PacketLength[0];
+} __packed;
+
+struct wa_xfer_packet_status_len_hwaiso {
+	__le16 PacketLength;
+	__le16 PacketStatus;
+} __packed;
+
+struct wa_xfer_packet_status_hwaiso {
+	__le16 wLength;
+	u8 bPacketType;
+	u8 bReserved;
+	struct wa_xfer_packet_status_len_hwaiso PacketStatus[0];
+} __packed;
+
+/* [WUSB] section 8.3.3.5 */
+struct wa_xfer_abort {
+	u8 bLength;
+	u8 bRequestType;
+	__le16 wRPipe;			/* RPipe index */
+	__le32 dwTransferID;		/* Host-assigned ID */
+} __packed;
+
+/**
+ * WA Transfer Complete notification ([WUSB] section 8.3.3.3)
+ *
+ */
+struct wa_notif_xfer {
+	struct wa_notif_hdr hdr;
+	u8 bEndpoint;
+	u8 Reserved;
+} __packed;
+
+/** Transfer result basic codes [WUSB] table 8-15 */
+enum {
+	WA_XFER_STATUS_SUCCESS,
+	WA_XFER_STATUS_HALTED,
+	WA_XFER_STATUS_DATA_BUFFER_ERROR,
+	WA_XFER_STATUS_BABBLE,
+	WA_XFER_RESERVED,
+	WA_XFER_STATUS_NOT_FOUND,
+	WA_XFER_STATUS_INSUFFICIENT_RESOURCE,
+	WA_XFER_STATUS_TRANSACTION_ERROR,
+	WA_XFER_STATUS_ABORTED,
+	WA_XFER_STATUS_RPIPE_NOT_READY,
+	WA_XFER_INVALID_FORMAT,
+	WA_XFER_UNEXPECTED_SEGMENT_NUMBER,
+	WA_XFER_STATUS_RPIPE_TYPE_MISMATCH,
+};
+
+/** [WUSB] section 8.3.3.4 */
+struct wa_xfer_result {
+	struct wa_notif_hdr hdr;
+	__le32 dwTransferID;
+	__le32 dwTransferLength;
+	u8     bTransferSegment;
+	u8     bTransferStatus;
+	__le32 dwNumOfPackets;
+} __packed;
+
+/**
+ * Wire Adapter Class Descriptor ([WUSB] section 8.5.2.7).
+ *
+ * NOTE: u16 fields are read Little Endian from the hardware.
+ *
+ * @bNumPorts is the original max number of devices that the host can
+ *            connect; we might chop this so the stack can handle
+ *            it. In case you need to access it, use wusbhc->ports_max
+ *            if it is a Wireless USB WA.
+ */
+struct usb_wa_descriptor {
+	u8	bLength;
+	u8	bDescriptorType;
+	__le16	bcdWAVersion;
+	u8	bNumPorts;		/* don't use!! */
+	u8	bmAttributes;		/* Reserved == 0 */
+	__le16	wNumRPipes;
+	__le16	wRPipeMaxBlock;
+	u8	bRPipeBlockSize;
+	u8	bPwrOn2PwrGood;
+	u8	bNumMMCIEs;
+	u8	DeviceRemovable;	/* FIXME: in DWA this is up to 16 bytes */
+} __packed;
+
+/**
+ * HWA Device Information Buffer (WUSB1.0[T8.54])
+ */
+struct hwa_dev_info {
+	u8	bmDeviceAvailability[32];       /* FIXME: ignored for now */
+	u8	bDeviceAddress;
+	__le16	wPHYRates;
+	u8	bmDeviceAttribute;
+} __packed;
+
+#endif /* #ifndef __LINUX_USB_WUSB_WA_H */
diff --git a/drivers/staging/wusbcore/include/wusb.h b/drivers/staging/wusbcore/include/wusb.h
new file mode 100644
index 000000000000..09771d1da7bc
--- /dev/null
+++ b/drivers/staging/wusbcore/include/wusb.h
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Standard Definitions
+ * Event Size Tables
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ *
+ *
+ * FIXME: docs
+ * FIXME: organize properly, group logically
+ *
+ * All the event structures are defined in uwb/spec.h, as they are
+ * common to the WHCI and WUSB radio control interfaces.
+ */
+
+#ifndef __WUSB_H__
+#define __WUSB_H__
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/usb/ch9.h>
+#include <linux/param.h>
+#include "../../uwb/include/spec.h"
+
+/**
+ * WUSB Information Element header
+ *
+ * I don't know why, they decided to make it different to the MBOA MAC
+ * IE Header; beats me.
+ */
+struct wuie_hdr {
+	u8 bLength;
+	u8 bIEIdentifier;
+} __attribute__((packed));
+
+enum {
+	WUIE_ID_WCTA = 0x80,
+	WUIE_ID_CONNECTACK,
+	WUIE_ID_HOST_INFO,
+	WUIE_ID_CHANGE_ANNOUNCE,
+	WUIE_ID_DEVICE_DISCONNECT,
+	WUIE_ID_HOST_DISCONNECT,
+	WUIE_ID_KEEP_ALIVE = 0x89,
+	WUIE_ID_ISOCH_DISCARD,
+	WUIE_ID_RESET_DEVICE,
+};
+
+/**
+ * Maximum number of array elements in a WUSB IE.
+ *
+ * WUSB1.0[7.5 before table 7-38] says that in WUSB IEs that
+ * are "arrays" have to limited to 4 elements. So we define it
+ * like that to ease up and submit only the neeed size.
+ */
+#define WUIE_ELT_MAX 4
+
+/**
+ * Wrapper for the data that defines a CHID, a CDID or a CK
+ *
+ * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of
+ * data. In order to avoid confusion and enforce types, we wrap it.
+ *
+ * Make it packed, as we use it in some hw definitions.
+ */
+struct wusb_ckhdid {
+	u8 data[16];
+} __attribute__((packed));
+
+static const struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } };
+
+#define WUSB_CKHDID_STRSIZE (3 * sizeof(struct wusb_ckhdid) + 1)
+
+/**
+ * WUSB IE: Host Information (WUSB1.0[7.5.2])
+ *
+ * Used to provide information about the host to the Wireless USB
+ * devices in range (CHID can be used as an ASCII string).
+ */
+struct wuie_host_info {
+	struct wuie_hdr hdr;
+	__le16 attributes;
+	struct wusb_ckhdid CHID;
+} __attribute__((packed));
+
+/**
+ * WUSB IE: Connect Ack (WUSB1.0[7.5.1])
+ *
+ * Used to acknowledge device connect requests. See note for
+ * WUIE_ELT_MAX.
+ */
+struct wuie_connect_ack {
+	struct wuie_hdr hdr;
+	struct {
+		struct wusb_ckhdid CDID;
+		u8 bDeviceAddress;	/* 0 means unused */
+		u8 bReserved;
+	} blk[WUIE_ELT_MAX];
+} __attribute__((packed));
+
+/**
+ * WUSB IE Host Information Element, Connect Availability
+ *
+ * WUSB1.0[7.5.2], bmAttributes description
+ */
+enum {
+	WUIE_HI_CAP_RECONNECT = 0,
+	WUIE_HI_CAP_LIMITED,
+	WUIE_HI_CAP_RESERVED,
+	WUIE_HI_CAP_ALL,
+};
+
+/**
+ * WUSB IE: Channel Stop (WUSB1.0[7.5.8])
+ *
+ * Tells devices the host is going to stop sending MMCs and will disappear.
+ */
+struct wuie_channel_stop {
+	struct wuie_hdr hdr;
+	u8 attributes;
+	u8 timestamp[3];
+} __attribute__((packed));
+
+/**
+ * WUSB IE: Keepalive (WUSB1.0[7.5.9])
+ *
+ * Ask device(s) to send keepalives.
+ */
+struct wuie_keep_alive {
+	struct wuie_hdr hdr;
+	u8 bDeviceAddress[WUIE_ELT_MAX];
+} __attribute__((packed));
+
+/**
+ * WUSB IE: Reset device (WUSB1.0[7.5.11])
+ *
+ * Tell device to reset; in all truth, we can fit 4 CDIDs, but we only
+ * use it for one at the time...
+ *
+ * In any case, this request is a wee bit silly: why don't they target
+ * by address??
+ */
+struct wuie_reset {
+	struct wuie_hdr hdr;
+	struct wusb_ckhdid CDID;
+} __attribute__((packed));
+
+/**
+ * WUSB IE: Disconnect device (WUSB1.0[7.5.11])
+ *
+ * Tell device to disconnect; we can fit 4 addresses, but we only use
+ * it for one at the time...
+ */
+struct wuie_disconnect {
+	struct wuie_hdr hdr;
+	u8 bDeviceAddress;
+	u8 padding;
+} __attribute__((packed));
+
+/**
+ * WUSB IE: Host disconnect ([WUSB] section 7.5.5)
+ *
+ * Tells all connected devices to disconnect.
+ */
+struct wuie_host_disconnect {
+	struct wuie_hdr hdr;
+} __attribute__((packed));
+
+/**
+ * WUSB Device Notification header (WUSB1.0[7.6])
+ */
+struct wusb_dn_hdr {
+	u8 bType;
+	u8 notifdata[];
+} __attribute__((packed));
+
+/** Device Notification codes (WUSB1.0[Table 7-54]) */
+enum WUSB_DN {
+	WUSB_DN_CONNECT = 0x01,
+	WUSB_DN_DISCONNECT = 0x02,
+	WUSB_DN_EPRDY = 0x03,
+	WUSB_DN_MASAVAILCHANGED = 0x04,
+	WUSB_DN_RWAKE = 0x05,
+	WUSB_DN_SLEEP = 0x06,
+	WUSB_DN_ALIVE = 0x07,
+};
+
+/** WUSB Device Notification Connect */
+struct wusb_dn_connect {
+	struct wusb_dn_hdr hdr;
+	__le16 attributes;
+	struct wusb_ckhdid CDID;
+} __attribute__((packed));
+
+static inline int wusb_dn_connect_prev_dev_addr(const struct wusb_dn_connect *dn)
+{
+	return le16_to_cpu(dn->attributes) & 0xff;
+}
+
+static inline int wusb_dn_connect_new_connection(const struct wusb_dn_connect *dn)
+{
+	return (le16_to_cpu(dn->attributes) >> 8) & 0x1;
+}
+
+static inline int wusb_dn_connect_beacon_behavior(const struct wusb_dn_connect *dn)
+{
+	return (le16_to_cpu(dn->attributes) >> 9) & 0x03;
+}
+
+/** Device is alive (aka: pong) (WUSB1.0[7.6.7]) */
+struct wusb_dn_alive {
+	struct wusb_dn_hdr hdr;
+} __attribute__((packed));
+
+/** Device is disconnecting (WUSB1.0[7.6.2]) */
+struct wusb_dn_disconnect {
+	struct wusb_dn_hdr hdr;
+} __attribute__((packed));
+
+/* General constants */
+enum {
+	WUSB_TRUST_TIMEOUT_MS = 4000,	/* [WUSB] section 4.15.1 */
+};
+
+/*
+ * WUSB Crypto stuff (WUSB1.0[6])
+ */
+
+extern const char *wusb_et_name(u8);
+
+/**
+ * WUSB key index WUSB1.0[7.3.2.4], for usage when setting keys for
+ * the host or the device.
+ */
+static inline u8 wusb_key_index(int index, int type, int originator)
+{
+	return (originator << 6) | (type << 4) | index;
+}
+
+#define WUSB_KEY_INDEX_TYPE_PTK			0 /* for HWA only */
+#define WUSB_KEY_INDEX_TYPE_ASSOC		1
+#define WUSB_KEY_INDEX_TYPE_GTK			2
+#define WUSB_KEY_INDEX_ORIGINATOR_HOST		0
+#define WUSB_KEY_INDEX_ORIGINATOR_DEVICE	1
+/* bits 0-3 used for the key index. */
+#define WUSB_KEY_INDEX_MAX			15
+
+/* A CCM Nonce, defined in WUSB1.0[6.4.1] */
+struct aes_ccm_nonce {
+	u8 sfn[6];              /* Little Endian */
+	u8 tkid[3];             /* LE */
+	struct uwb_dev_addr dest_addr;
+	struct uwb_dev_addr src_addr;
+} __attribute__((packed));
+
+/* A CCM operation label, defined on WUSB1.0[6.5.x] */
+struct aes_ccm_label {
+	u8 data[14];
+} __attribute__((packed));
+
+/*
+ * Input to the key derivation sequence defined in
+ * WUSB1.0[6.5.1]. Rest of the data is in the CCM Nonce passed to the
+ * PRF function.
+ */
+struct wusb_keydvt_in {
+	u8 hnonce[16];
+	u8 dnonce[16];
+} __attribute__((packed));
+
+/*
+ * Output from the key derivation sequence defined in
+ * WUSB1.0[6.5.1].
+ */
+struct wusb_keydvt_out {
+	u8 kck[16];
+	u8 ptk[16];
+} __attribute__((packed));
+
+/* Pseudo Random Function WUSB1.0[6.5] */
+extern int wusb_crypto_init(void);
+extern void wusb_crypto_exit(void);
+extern ssize_t wusb_prf(void *out, size_t out_size,
+			const u8 key[16], const struct aes_ccm_nonce *_n,
+			const struct aes_ccm_label *a,
+			const void *b, size_t blen, size_t len);
+
+static inline int wusb_prf_64(void *out, size_t out_size, const u8 key[16],
+			      const struct aes_ccm_nonce *n,
+			      const struct aes_ccm_label *a,
+			      const void *b, size_t blen)
+{
+	return wusb_prf(out, out_size, key, n, a, b, blen, 64);
+}
+
+static inline int wusb_prf_128(void *out, size_t out_size, const u8 key[16],
+			       const struct aes_ccm_nonce *n,
+			       const struct aes_ccm_label *a,
+			       const void *b, size_t blen)
+{
+	return wusb_prf(out, out_size, key, n, a, b, blen, 128);
+}
+
+static inline int wusb_prf_256(void *out, size_t out_size, const u8 key[16],
+			       const struct aes_ccm_nonce *n,
+			       const struct aes_ccm_label *a,
+			       const void *b, size_t blen)
+{
+	return wusb_prf(out, out_size, key, n, a, b, blen, 256);
+}
+
+/* Key derivation WUSB1.0[6.5.1] */
+static inline int wusb_key_derive(struct wusb_keydvt_out *keydvt_out,
+				  const u8 key[16],
+				  const struct aes_ccm_nonce *n,
+				  const struct wusb_keydvt_in *keydvt_in)
+{
+	const struct aes_ccm_label a = { .data = "Pair-wise keys" };
+	return wusb_prf_256(keydvt_out, sizeof(*keydvt_out), key, n, &a,
+			    keydvt_in, sizeof(*keydvt_in));
+}
+
+/*
+ * Out-of-band MIC Generation WUSB1.0[6.5.2]
+ *
+ * Compute the MIC over @key, @n and @hs and place it in @mic_out.
+ *
+ * @mic_out:  Where to place the 8 byte MIC tag
+ * @key:      KCK from the derivation process
+ * @n:        CCM nonce, n->sfn == 0, TKID as established in the
+ *            process.
+ * @hs:       Handshake struct for phase 2 of the 4-way.
+ *            hs->bStatus and hs->bReserved are zero.
+ *            hs->bMessageNumber is 2 (WUSB1.0[7.3.2.5.2]
+ *            hs->dest_addr is the device's USB address padded with 0
+ *            hs->src_addr is the hosts's UWB device address
+ *            hs->mic is ignored (as we compute that value).
+ */
+static inline int wusb_oob_mic(u8 mic_out[8], const u8 key[16],
+			       const struct aes_ccm_nonce *n,
+			       const struct usb_handshake *hs)
+{
+	const struct aes_ccm_label a = { .data = "out-of-bandMIC" };
+	return wusb_prf_64(mic_out, 8, key, n, &a,
+			   hs, sizeof(*hs) - sizeof(hs->MIC));
+}
+
+#endif /* #ifndef __WUSB_H__ */
diff --git a/drivers/staging/wusbcore/mmc.c b/drivers/staging/wusbcore/mmc.c
new file mode 100644
index 000000000000..881e1f20d718
--- /dev/null
+++ b/drivers/staging/wusbcore/mmc.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
+ * MMC (Microscheduled Management Command) handling
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * WUIEs and MMC IEs...well, they are almost the same at the end. MMC
+ * IEs are Wireless USB IEs that go into the MMC period...[what is
+ * that? look in Design-overview.txt].
+ *
+ *
+ * This is a simple subsystem to keep track of which IEs are being
+ * sent by the host in the MMC period.
+ *
+ * For each WUIE we ask to send, we keep it in an array, so we can
+ * request its removal later, or replace the content. They are tracked
+ * by pointer, so be sure to use the same pointer if you want to
+ * remove it or update the contents.
+ *
+ * FIXME:
+ *  - add timers that autoremove intervalled IEs?
+ */
+#include <linux/slab.h>
+#include <linux/export.h>
+#include "include/wusb.h"
+#include "wusbhc.h"
+
+/* Initialize the MMCIEs handling mechanism */
+int wusbhc_mmcie_create(struct wusbhc *wusbhc)
+{
+	u8 mmcies = wusbhc->mmcies_max;
+	wusbhc->mmcie = kcalloc(mmcies, sizeof(wusbhc->mmcie[0]), GFP_KERNEL);
+	if (wusbhc->mmcie == NULL)
+		return -ENOMEM;
+	mutex_init(&wusbhc->mmcie_mutex);
+	return 0;
+}
+
+/* Release resources used by the MMCIEs handling mechanism */
+void wusbhc_mmcie_destroy(struct wusbhc *wusbhc)
+{
+	kfree(wusbhc->mmcie);
+}
+
+/*
+ * Add or replace an MMC Wireless USB IE.
+ *
+ * @interval:    See WUSB1.0[8.5.3.1]
+ * @repeat_cnt:  See WUSB1.0[8.5.3.1]
+ * @handle:      See WUSB1.0[8.5.3.1]
+ * @wuie:        Pointer to the header of the WUSB IE data to add.
+ *               MUST BE allocated in a kmalloc buffer (no stack or
+ *               vmalloc).
+ *               THE CALLER ALWAYS OWNS THE POINTER (we don't free it
+ *               on remove, we just forget about it).
+ * @returns:     0 if ok, < 0 errno code on error.
+ *
+ * Goes over the *whole* @wusbhc->mmcie array looking for (a) the
+ * first free spot and (b) if @wuie is already in the array (aka:
+ * transmitted in the MMCs) the spot were it is.
+ *
+ * If present, we "overwrite it" (update).
+ *
+ *
+ * NOTE: Need special ordering rules -- see below WUSB1.0 Table 7-38.
+ *       The host uses the handle as the 'sort' index. We
+ *       allocate the last one always for the WUIE_ID_HOST_INFO, and
+ *       the rest, first come first serve in inverse order.
+ *
+ *       Host software must make sure that it adds the other IEs in
+ *       the right order... the host hardware is responsible for
+ *       placing the WCTA IEs in the right place with the other IEs
+ *       set by host software.
+ *
+ * NOTE: we can access wusbhc->wa_descr without locking because it is
+ *       read only.
+ */
+int wusbhc_mmcie_set(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
+		     struct wuie_hdr *wuie)
+{
+	int result = -ENOBUFS;
+	unsigned handle, itr;
+
+	/* Search a handle, taking into account the ordering */
+	mutex_lock(&wusbhc->mmcie_mutex);
+	switch (wuie->bIEIdentifier) {
+	case WUIE_ID_HOST_INFO:
+		/* Always last */
+		handle = wusbhc->mmcies_max - 1;
+		break;
+	case WUIE_ID_ISOCH_DISCARD:
+		dev_err(wusbhc->dev, "Special ordering case for WUIE ID 0x%x "
+			"unimplemented\n", wuie->bIEIdentifier);
+		result = -ENOSYS;
+		goto error_unlock;
+	default:
+		/* search for it or find the last empty slot */
+		handle = ~0;
+		for (itr = 0; itr < wusbhc->mmcies_max - 1; itr++) {
+			if (wusbhc->mmcie[itr] == wuie) {
+				handle = itr;
+				break;
+			}
+			if (wusbhc->mmcie[itr] == NULL)
+				handle = itr;
+		}
+		if (handle == ~0)
+			goto error_unlock;
+	}
+	result = (wusbhc->mmcie_add)(wusbhc, interval, repeat_cnt, handle,
+				     wuie);
+	if (result >= 0)
+		wusbhc->mmcie[handle] = wuie;
+error_unlock:
+	mutex_unlock(&wusbhc->mmcie_mutex);
+	return result;
+}
+EXPORT_SYMBOL_GPL(wusbhc_mmcie_set);
+
+/*
+ * Remove an MMC IE previously added with wusbhc_mmcie_set()
+ *
+ * @wuie	Pointer used to add the WUIE
+ */
+void wusbhc_mmcie_rm(struct wusbhc *wusbhc, struct wuie_hdr *wuie)
+{
+	int result;
+	unsigned handle, itr;
+
+	mutex_lock(&wusbhc->mmcie_mutex);
+	for (itr = 0; itr < wusbhc->mmcies_max; itr++) {
+		if (wusbhc->mmcie[itr] == wuie) {
+			handle = itr;
+			goto found;
+		}
+	}
+	mutex_unlock(&wusbhc->mmcie_mutex);
+	return;
+
+found:
+	result = (wusbhc->mmcie_rm)(wusbhc, handle);
+	if (result == 0)
+		wusbhc->mmcie[itr] = NULL;
+	mutex_unlock(&wusbhc->mmcie_mutex);
+}
+EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
+
+static int wusbhc_mmc_start(struct wusbhc *wusbhc)
+{
+	int ret;
+
+	mutex_lock(&wusbhc->mutex);
+	ret = wusbhc->start(wusbhc);
+	if (ret >= 0)
+		wusbhc->active = 1;
+	mutex_unlock(&wusbhc->mutex);
+
+	return ret;
+}
+
+static void wusbhc_mmc_stop(struct wusbhc *wusbhc)
+{
+	mutex_lock(&wusbhc->mutex);
+	wusbhc->active = 0;
+	wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
+	mutex_unlock(&wusbhc->mutex);
+}
+
+/*
+ * wusbhc_start - start transmitting MMCs and accepting connections
+ * @wusbhc: the HC to start
+ *
+ * Establishes a cluster reservation, enables device connections, and
+ * starts MMCs with appropriate DNTS parameters.
+ */
+int wusbhc_start(struct wusbhc *wusbhc)
+{
+	int result;
+	struct device *dev = wusbhc->dev;
+
+	WARN_ON(wusbhc->wuie_host_info != NULL);
+	BUG_ON(wusbhc->uwb_rc == NULL);
+
+	result = wusbhc_rsv_establish(wusbhc);
+	if (result < 0) {
+		dev_err(dev, "cannot establish cluster reservation: %d\n",
+			result);
+		goto error_rsv_establish;
+	}
+
+	result = wusbhc_devconnect_start(wusbhc);
+	if (result < 0) {
+		dev_err(dev, "error enabling device connections: %d\n",
+			result);
+		goto error_devconnect_start;
+	}
+
+	result = wusbhc_sec_start(wusbhc);
+	if (result < 0) {
+		dev_err(dev, "error starting security in the HC: %d\n",
+			result);
+		goto error_sec_start;
+	}
+
+	result = wusbhc->set_num_dnts(wusbhc, wusbhc->dnts_interval,
+		wusbhc->dnts_num_slots);
+	if (result < 0) {
+		dev_err(dev, "Cannot set DNTS parameters: %d\n", result);
+		goto error_set_num_dnts;
+	}
+	result = wusbhc_mmc_start(wusbhc);
+	if (result < 0) {
+		dev_err(dev, "error starting wusbch: %d\n", result);
+		goto error_wusbhc_start;
+	}
+
+	return 0;
+
+error_wusbhc_start:
+	wusbhc_sec_stop(wusbhc);
+error_set_num_dnts:
+error_sec_start:
+	wusbhc_devconnect_stop(wusbhc);
+error_devconnect_start:
+	wusbhc_rsv_terminate(wusbhc);
+error_rsv_establish:
+	return result;
+}
+
+/*
+ * wusbhc_stop - stop transmitting MMCs
+ * @wusbhc: the HC to stop
+ *
+ * Stops the WUSB channel and removes the cluster reservation.
+ */
+void wusbhc_stop(struct wusbhc *wusbhc)
+{
+	wusbhc_mmc_stop(wusbhc);
+	wusbhc_sec_stop(wusbhc);
+	wusbhc_devconnect_stop(wusbhc);
+	wusbhc_rsv_terminate(wusbhc);
+}
+
+/*
+ * Set/reset/update a new CHID
+ *
+ * Depending on the previous state of the MMCs, start, stop or change
+ * the sent MMC. This effectively switches the host controller on and
+ * off (radio wise).
+ */
+int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
+{
+	int result = 0;
+
+	if (memcmp(chid, &wusb_ckhdid_zero, sizeof(*chid)) == 0)
+		chid = NULL;
+
+	mutex_lock(&wusbhc->mutex);
+	if (chid) {
+		if (wusbhc->active) {
+			mutex_unlock(&wusbhc->mutex);
+			return -EBUSY;
+		}
+		wusbhc->chid = *chid;
+	}
+
+	/* register with UWB if we haven't already since we are about to start
+	    the radio. */
+	if ((chid) && (wusbhc->uwb_rc == NULL)) {
+		wusbhc->uwb_rc = uwb_rc_get_by_grandpa(wusbhc->dev->parent);
+		if (wusbhc->uwb_rc == NULL) {
+			result = -ENODEV;
+			dev_err(wusbhc->dev,
+				"Cannot get associated UWB Host Controller\n");
+			goto error_rc_get;
+		}
+
+		result = wusbhc_pal_register(wusbhc);
+		if (result < 0) {
+			dev_err(wusbhc->dev, "Cannot register as a UWB PAL\n");
+			goto error_pal_register;
+		}
+	}
+	mutex_unlock(&wusbhc->mutex);
+
+	if (chid)
+		result = uwb_radio_start(&wusbhc->pal);
+	else if (wusbhc->uwb_rc)
+		uwb_radio_stop(&wusbhc->pal);
+
+	return result;
+
+error_pal_register:
+	uwb_rc_put(wusbhc->uwb_rc);
+	wusbhc->uwb_rc = NULL;
+error_rc_get:
+	mutex_unlock(&wusbhc->mutex);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(wusbhc_chid_set);
diff --git a/drivers/staging/wusbcore/pal.c b/drivers/staging/wusbcore/pal.c
new file mode 100644
index 000000000000..30f569131471
--- /dev/null
+++ b/drivers/staging/wusbcore/pal.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Host Controller
+ * UWB Protocol Adaptation Layer (PAL) glue.
+ *
+ * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
+ */
+#include "wusbhc.h"
+
+static void wusbhc_channel_changed(struct uwb_pal *pal, int channel)
+{
+	struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal);
+
+	dev_dbg(wusbhc->dev, "%s: channel = %d\n", __func__, channel);
+	if (channel < 0)
+		wusbhc_stop(wusbhc);
+	else
+		wusbhc_start(wusbhc);
+}
+
+/**
+ * wusbhc_pal_register - register the WUSB HC as a UWB PAL
+ * @wusbhc: the WUSB HC
+ */
+int wusbhc_pal_register(struct wusbhc *wusbhc)
+{
+	uwb_pal_init(&wusbhc->pal);
+
+	wusbhc->pal.name   = "wusbhc";
+	wusbhc->pal.device = wusbhc->usb_hcd.self.controller;
+	wusbhc->pal.rc     = wusbhc->uwb_rc;
+	wusbhc->pal.channel_changed = wusbhc_channel_changed;
+
+	return uwb_pal_register(&wusbhc->pal);
+}
+
+/**
+ * wusbhc_pal_unregister - unregister the WUSB HC as a UWB PAL
+ * @wusbhc: the WUSB HC
+ */
+void wusbhc_pal_unregister(struct wusbhc *wusbhc)
+{
+	if (wusbhc->uwb_rc)
+		uwb_pal_unregister(&wusbhc->pal);
+}
diff --git a/drivers/staging/wusbcore/reservation.c b/drivers/staging/wusbcore/reservation.c
new file mode 100644
index 000000000000..b921faac698b
--- /dev/null
+++ b/drivers/staging/wusbcore/reservation.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB cluster reservation management
+ *
+ * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
+ */
+#include <linux/kernel.h>
+
+#include "../uwb/uwb.h"
+#include "wusbhc.h"
+
+/*
+ * WUSB cluster reservations are multicast reservations with the
+ * broadcast cluster ID (BCID) as the target DevAddr.
+ *
+ * FIXME: consider adjusting the reservation depending on what devices
+ * are attached.
+ */
+
+static int wusbhc_bwa_set(struct wusbhc *wusbhc, u8 stream,
+	const struct uwb_mas_bm *mas)
+{
+	if (mas == NULL)
+		mas = &uwb_mas_bm_zero;
+	return wusbhc->bwa_set(wusbhc, stream, mas);
+}
+
+/**
+ * wusbhc_rsv_complete_cb - WUSB HC reservation complete callback
+ * @rsv:    the reservation
+ *
+ * Either set or clear the HC's view of the reservation.
+ *
+ * FIXME: when a reservation is denied the HC should be stopped.
+ */
+static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
+{
+	struct wusbhc *wusbhc = rsv->pal_priv;
+	struct device *dev = wusbhc->dev;
+	struct uwb_mas_bm mas;
+
+	dev_dbg(dev, "%s: state = %d\n", __func__, rsv->state);
+	switch (rsv->state) {
+	case UWB_RSV_STATE_O_ESTABLISHED:
+		uwb_rsv_get_usable_mas(rsv, &mas);
+		dev_dbg(dev, "established reservation: %*pb\n",
+			UWB_NUM_MAS, mas.bm);
+		wusbhc_bwa_set(wusbhc, rsv->stream, &mas);
+		break;
+	case UWB_RSV_STATE_NONE:
+		dev_dbg(dev, "removed reservation\n");
+		wusbhc_bwa_set(wusbhc, 0, NULL);
+		break;
+	default:
+		dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state);
+		break;
+	}
+}
+
+
+/**
+ * wusbhc_rsv_establish - establish a reservation for the cluster
+ * @wusbhc: the WUSB HC requesting a bandwidth reservation
+ */
+int wusbhc_rsv_establish(struct wusbhc *wusbhc)
+{
+	struct uwb_rc *rc = wusbhc->uwb_rc;
+	struct uwb_rsv *rsv;
+	struct uwb_dev_addr bcid;
+	int ret;
+
+	if (rc == NULL)
+		return -ENODEV;
+
+	rsv = uwb_rsv_create(rc, wusbhc_rsv_complete_cb, wusbhc);
+	if (rsv == NULL)
+		return -ENOMEM;
+
+	bcid.data[0] = wusbhc->cluster_id;
+	bcid.data[1] = 0;
+
+	rsv->target.type = UWB_RSV_TARGET_DEVADDR;
+	rsv->target.devaddr = bcid;
+	rsv->type = UWB_DRP_TYPE_PRIVATE;
+	rsv->max_mas = 256; /* try to get as much as possible */
+	rsv->min_mas = 15;  /* one MAS per zone */
+	rsv->max_interval = 1; /* max latency is one zone */
+	rsv->is_multicast = true;
+
+	ret = uwb_rsv_establish(rsv);
+	if (ret == 0)
+		wusbhc->rsv = rsv;
+	else
+		uwb_rsv_destroy(rsv);
+	return ret;
+}
+
+
+/**
+ * wusbhc_rsv_terminate - terminate the cluster reservation
+ * @wusbhc: the WUSB host whose reservation is to be terminated
+ */
+void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
+{
+	if (wusbhc->rsv) {
+		uwb_rsv_terminate(wusbhc->rsv);
+		uwb_rsv_destroy(wusbhc->rsv);
+		wusbhc->rsv = NULL;
+	}
+}
diff --git a/drivers/staging/wusbcore/rh.c b/drivers/staging/wusbcore/rh.c
new file mode 100644
index 000000000000..20c08cd9dcbf
--- /dev/null
+++ b/drivers/staging/wusbcore/rh.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Host Controller
+ * Root Hub operations
+ *
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * We fake a root hub that has fake ports (as many as simultaneous
+ * devices the Wireless USB Host Controller can deal with). For each
+ * port we keep an state in @wusbhc->port[index] identical to the one
+ * specified in the USB2.0[ch11] spec and some extra device
+ * information that complements the one in 'struct usb_device' (as
+ * this lacs a hcpriv pointer).
+ *
+ * Note this is common to WHCI and HWA host controllers.
+ *
+ * Through here we enable most of the state changes that the USB stack
+ * will use to connect or disconnect devices. We need to do some
+ * forced adaptation of Wireless USB device states vs. wired:
+ *
+ *        USB:                 WUSB:
+ *
+ * Port   Powered-off          port slot n/a
+ *        Powered-on           port slot available
+ *        Disconnected         port slot available
+ *        Connected            port slot assigned device
+ *        		       device sent DN_Connect
+ *                             device was authenticated
+ *        Enabled              device is authenticated, transitioned
+ *                             from unauth -> auth -> default address
+ *                             -> enabled
+ *        Reset                disconnect
+ *        Disable              disconnect
+ *
+ * This maps the standard USB port states with the WUSB device states
+ * so we can fake ports without having to modify the USB stack.
+ *
+ * FIXME: this process will change in the future
+ *
+ *
+ * ENTRY POINTS
+ *
+ * Our entry points into here are, as in hcd.c, the USB stack root hub
+ * ops defined in the usb_hcd struct:
+ *
+ * wusbhc_rh_status_data()	Provide hub and port status data bitmap
+ *
+ * wusbhc_rh_control()          Execution of all the major requests
+ *                              you can do to a hub (Set|Clear
+ *                              features, get descriptors, status, etc).
+ *
+ * wusbhc_rh_[suspend|resume]() That
+ *
+ * wusbhc_rh_start_port_reset() ??? unimplemented
+ */
+#include <linux/slab.h>
+#include <linux/export.h>
+#include "wusbhc.h"
+
+/*
+ * Reset a fake port
+ *
+ * Using a Reset Device IE is too heavyweight as it causes the device
+ * to enter the UnConnected state and leave the cluster, this can mean
+ * that when the device reconnects it is connected to a different fake
+ * port.
+ *
+ * Instead, reset authenticated devices with a SetAddress(0), followed
+ * by a SetAddresss(AuthAddr).
+ *
+ * For unauthenticated devices just pretend to reset but do nothing.
+ * If the device initialization continues to fail it will eventually
+ * time out after TrustTimeout and enter the UnConnected state.
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ *
+ * Supposedly we are the only thread accesing @wusbhc->port; in any
+ * case, maybe we should move the mutex locking from
+ * wusbhc_devconnect_auth() to here.
+ *
+ * @port_idx refers to the wusbhc's port index, not the USB port number
+ */
+static int wusbhc_rh_port_reset(struct wusbhc *wusbhc, u8 port_idx)
+{
+	int result = 0;
+	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
+	struct wusb_dev *wusb_dev = port->wusb_dev;
+
+	if (wusb_dev == NULL)
+		return -ENOTCONN;
+
+	port->status |= USB_PORT_STAT_RESET;
+	port->change |= USB_PORT_STAT_C_RESET;
+
+	if (wusb_dev->addr & WUSB_DEV_ADDR_UNAUTH)
+		result = 0;
+	else
+		result = wusb_dev_update_address(wusbhc, wusb_dev);
+
+	port->status &= ~USB_PORT_STAT_RESET;
+	port->status |= USB_PORT_STAT_ENABLE;
+	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;	
+
+	return result;
+}
+
+/*
+ * Return the hub change status bitmap
+ *
+ * The bits in the change status bitmap are cleared when a
+ * ClearPortFeature request is issued (USB2.0[11.12.3,11.12.4].
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ *
+ * WARNING!! This gets called from atomic context; we cannot get the
+ *           mutex--the only race condition we can find is some bit
+ *           changing just after we copy it, which shouldn't be too
+ *           big of a problem [and we can't make it an spinlock
+ *           because other parts need to take it and sleep] .
+ *
+ *           @usb_hcd is refcounted, so it won't disappear under us
+ *           and before killing a host, the polling of the root hub
+ *           would be stopped anyway.
+ */
+int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	size_t cnt, size, bits_set = 0;
+
+	/* WE DON'T LOCK, see comment */
+	/* round up to bytes.  Hub bit is bit 0 so add 1. */
+	size = DIV_ROUND_UP(wusbhc->ports_max + 1, 8);
+
+	/* clear the output buffer. */
+	memset(_buf, 0, size);
+	/* set the bit for each changed port. */
+	for (cnt = 0; cnt < wusbhc->ports_max; cnt++) {
+
+		if (wusb_port_by_idx(wusbhc, cnt)->change) {
+			const int bitpos = cnt+1;
+
+			_buf[bitpos/8] |= (1 << (bitpos % 8));
+			bits_set++;
+		}
+	}
+
+	return bits_set ? size : 0;
+}
+EXPORT_SYMBOL_GPL(wusbhc_rh_status_data);
+
+/*
+ * Return the hub's descriptor
+ *
+ * NOTE: almost cut and paste from ehci-hub.c
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked
+ */
+static int wusbhc_rh_get_hub_descr(struct wusbhc *wusbhc, u16 wValue,
+				   u16 wIndex,
+				   struct usb_hub_descriptor *descr,
+				   u16 wLength)
+{
+	u16 temp = 1 + (wusbhc->ports_max / 8);
+	u8 length = 7 + 2 * temp;
+
+	if (wLength < length)
+		return -ENOSPC;
+	descr->bDescLength = 7 + 2 * temp;
+	descr->bDescriptorType = USB_DT_HUB; /* HUB type */
+	descr->bNbrPorts = wusbhc->ports_max;
+	descr->wHubCharacteristics = cpu_to_le16(
+		HUB_CHAR_COMMON_LPSM	/* All ports power at once */
+		| 0x00			/* not part of compound device */
+		| HUB_CHAR_NO_OCPM	/* No overcurrent protection */
+		| 0x00			/* 8 FS think time FIXME ?? */
+		| 0x00);		/* No port indicators */
+	descr->bPwrOn2PwrGood = 0;
+	descr->bHubContrCurrent = 0;
+	/* two bitmaps:  ports removable, and usb 1.0 legacy PortPwrCtrlMask */
+	memset(&descr->u.hs.DeviceRemovable[0], 0, temp);
+	memset(&descr->u.hs.DeviceRemovable[temp], 0xff, temp);
+	return 0;
+}
+
+/*
+ * Clear a hub feature
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ *
+ * Nothing to do, so no locking needed ;)
+ */
+static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature)
+{
+	int result;
+
+	switch (feature) {
+	case C_HUB_LOCAL_POWER:
+		/* FIXME: maybe plug bit 0 to the power input status,
+		 * if any?
+		 * see wusbhc_rh_get_hub_status() */
+	case C_HUB_OVER_CURRENT:
+		result = 0;
+		break;
+	default:
+		result = -EPIPE;
+	}
+	return result;
+}
+
+/*
+ * Return hub status (it is always zero...)
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ *
+ * Nothing to do, so no locking needed ;)
+ */
+static int wusbhc_rh_get_hub_status(struct wusbhc *wusbhc, u32 *buf,
+				    u16 wLength)
+{
+	/* FIXME: maybe plug bit 0 to the power input status (if any)? */
+	*buf = 0;
+	return 0;
+}
+
+/*
+ * Set a port feature
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ */
+static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature,
+				   u8 selector, u8 port_idx)
+{
+	struct device *dev = wusbhc->dev;
+
+	if (port_idx > wusbhc->ports_max)
+		return -EINVAL;
+
+	switch (feature) {
+		/* According to USB2.0[11.24.2.13]p2, these features
+		 * are not required to be implemented. */
+	case USB_PORT_FEAT_C_OVER_CURRENT:
+	case USB_PORT_FEAT_C_ENABLE:
+	case USB_PORT_FEAT_C_SUSPEND:
+	case USB_PORT_FEAT_C_CONNECTION:
+	case USB_PORT_FEAT_C_RESET:
+		return 0;
+	case USB_PORT_FEAT_POWER:
+		/* No such thing, but we fake it works */
+		mutex_lock(&wusbhc->mutex);
+		wusb_port_by_idx(wusbhc, port_idx)->status |= USB_PORT_STAT_POWER;
+		mutex_unlock(&wusbhc->mutex);
+		return 0;
+	case USB_PORT_FEAT_RESET:
+		return wusbhc_rh_port_reset(wusbhc, port_idx);
+	case USB_PORT_FEAT_ENABLE:
+	case USB_PORT_FEAT_SUSPEND:
+		dev_err(dev, "(port_idx %d) set feat %d/%d UNIMPLEMENTED\n",
+			port_idx, feature, selector);
+		return -ENOSYS;
+	default:
+		dev_err(dev, "(port_idx %d) set feat %d/%d UNKNOWN\n",
+			port_idx, feature, selector);
+		return -EPIPE;
+	}
+
+	return 0;
+}
+
+/*
+ * Clear a port feature...
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ */
+static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
+				     u8 selector, u8 port_idx)
+{
+	int result = 0;
+	struct device *dev = wusbhc->dev;
+
+	if (port_idx > wusbhc->ports_max)
+		return -EINVAL;
+
+	mutex_lock(&wusbhc->mutex);
+	switch (feature) {
+	case USB_PORT_FEAT_POWER:	/* fake port always on */
+		/* According to USB2.0[11.24.2.7.1.4], no need to implement? */
+	case USB_PORT_FEAT_C_OVER_CURRENT:
+		break;
+	case USB_PORT_FEAT_C_RESET:
+		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_RESET;
+		break;
+	case USB_PORT_FEAT_C_CONNECTION:
+		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_CONNECTION;
+		break;
+	case USB_PORT_FEAT_ENABLE:
+		__wusbhc_dev_disable(wusbhc, port_idx);
+		break;
+	case USB_PORT_FEAT_C_ENABLE:
+		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_ENABLE;
+		break;
+	case USB_PORT_FEAT_SUSPEND:
+	case USB_PORT_FEAT_C_SUSPEND:
+		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNIMPLEMENTED\n",
+			port_idx, feature, selector);
+		result = -ENOSYS;
+		break;
+	default:
+		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNKNOWN\n",
+			port_idx, feature, selector);
+		result = -EPIPE;
+		break;
+	}
+	mutex_unlock(&wusbhc->mutex);
+
+	return result;
+}
+
+/*
+ * Return the port's status
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ */
+static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx,
+				     u32 *_buf, u16 wLength)
+{
+	__le16 *buf = (__le16 *)_buf;
+
+	if (port_idx > wusbhc->ports_max)
+		return -EINVAL;
+
+	mutex_lock(&wusbhc->mutex);
+	buf[0] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->status);
+	buf[1] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->change);
+	mutex_unlock(&wusbhc->mutex);
+
+	return 0;
+}
+
+/*
+ * Entry point for Root Hub operations
+ *
+ * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
+ */
+int wusbhc_rh_control(struct usb_hcd *usb_hcd, u16 reqntype, u16 wValue,
+		      u16 wIndex, char *buf, u16 wLength)
+{
+	int result = -ENOSYS;
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+
+	switch (reqntype) {
+	case GetHubDescriptor:
+		result = wusbhc_rh_get_hub_descr(
+			wusbhc, wValue, wIndex,
+			(struct usb_hub_descriptor *) buf, wLength);
+		break;
+	case ClearHubFeature:
+		result = wusbhc_rh_clear_hub_feat(wusbhc, wValue);
+		break;
+	case GetHubStatus:
+		result = wusbhc_rh_get_hub_status(wusbhc, (u32 *)buf, wLength);
+		break;
+
+	case SetPortFeature:
+		result = wusbhc_rh_set_port_feat(wusbhc, wValue, wIndex >> 8,
+						 (wIndex & 0xff) - 1);
+		break;
+	case ClearPortFeature:
+		result = wusbhc_rh_clear_port_feat(wusbhc, wValue, wIndex >> 8,
+						   (wIndex & 0xff) - 1);
+		break;
+	case GetPortStatus:
+		result = wusbhc_rh_get_port_status(wusbhc, wIndex - 1,
+						   (u32 *)buf, wLength);
+		break;
+
+	case SetHubFeature:
+	default:
+		dev_err(wusbhc->dev, "%s (%p [%p], %x, %x, %x, %p, %x) "
+			"UNIMPLEMENTED\n", __func__, usb_hcd, wusbhc, reqntype,
+			wValue, wIndex, buf, wLength);
+		/* dump_stack(); */
+		result = -ENOSYS;
+	}
+	return result;
+}
+EXPORT_SYMBOL_GPL(wusbhc_rh_control);
+
+int wusbhc_rh_start_port_reset(struct usb_hcd *usb_hcd, unsigned port_idx)
+{
+	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+	dev_err(wusbhc->dev, "%s (%p [%p], port_idx %u) UNIMPLEMENTED\n",
+		__func__, usb_hcd, wusbhc, port_idx);
+	WARN_ON(1);
+	return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(wusbhc_rh_start_port_reset);
+
+static void wusb_port_init(struct wusb_port *port)
+{
+	port->status |= USB_PORT_STAT_HIGH_SPEED;
+}
+
+/*
+ * Alloc fake port specific fields and status.
+ */
+int wusbhc_rh_create(struct wusbhc *wusbhc)
+{
+	int result = -ENOMEM;
+	size_t port_size, itr;
+	port_size = wusbhc->ports_max * sizeof(wusbhc->port[0]);
+	wusbhc->port = kzalloc(port_size, GFP_KERNEL);
+	if (wusbhc->port == NULL)
+		goto error_port_alloc;
+	for (itr = 0; itr < wusbhc->ports_max; itr++)
+		wusb_port_init(&wusbhc->port[itr]);
+	result = 0;
+error_port_alloc:
+	return result;
+}
+
+void wusbhc_rh_destroy(struct wusbhc *wusbhc)
+{
+	kfree(wusbhc->port);
+}
diff --git a/drivers/staging/wusbcore/security.c b/drivers/staging/wusbcore/security.c
new file mode 100644
index 000000000000..14ac8c98ac9e
--- /dev/null
+++ b/drivers/staging/wusbcore/security.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Host Controller
+ * Security support: encryption enablement, etc
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/usb/ch9.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include "wusbhc.h"
+#include <asm/unaligned.h>
+
+static void wusbhc_gtk_rekey_work(struct work_struct *work);
+
+int wusbhc_sec_create(struct wusbhc *wusbhc)
+{
+	/*
+	 * WQ is singlethread because we need to serialize rekey operations.
+	 * Use a separate workqueue for security operations instead of the
+	 * wusbd workqueue because security operations may need to communicate
+	 * directly with downstream wireless devices using synchronous URBs.
+	 * If a device is not responding, this could block other host
+	 * controller operations.
+	 */
+	wusbhc->wq_security = create_singlethread_workqueue("wusbd_security");
+	if (wusbhc->wq_security == NULL) {
+		pr_err("WUSB-core: Cannot create wusbd_security workqueue\n");
+		return -ENOMEM;
+	}
+
+	wusbhc->gtk.descr.bLength = sizeof(wusbhc->gtk.descr) +
+		sizeof(wusbhc->gtk.data);
+	wusbhc->gtk.descr.bDescriptorType = USB_DT_KEY;
+	wusbhc->gtk.descr.bReserved = 0;
+	wusbhc->gtk_index = 0;
+
+	INIT_WORK(&wusbhc->gtk_rekey_work, wusbhc_gtk_rekey_work);
+
+	return 0;
+}
+
+
+/* Called when the HC is destroyed */
+void wusbhc_sec_destroy(struct wusbhc *wusbhc)
+{
+	destroy_workqueue(wusbhc->wq_security);
+}
+
+
+/**
+ * wusbhc_next_tkid - generate a new, currently unused, TKID
+ * @wusbhc:   the WUSB host controller
+ * @wusb_dev: the device whose PTK the TKID is for
+ *            (or NULL for a TKID for a GTK)
+ *
+ * The generated TKID consists of two parts: the device's authenticated
+ * address (or 0 or a GTK); and an incrementing number.  This ensures
+ * that TKIDs cannot be shared between devices and by the time the
+ * incrementing number wraps around the older TKIDs will no longer be
+ * in use (a maximum of two keys may be active at any one time).
+ */
+static u32 wusbhc_next_tkid(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	u32 *tkid;
+	u32 addr;
+
+	if (wusb_dev == NULL) {
+		tkid = &wusbhc->gtk_tkid;
+		addr = 0;
+	} else {
+		tkid = &wusb_port_by_idx(wusbhc, wusb_dev->port_idx)->ptk_tkid;
+		addr = wusb_dev->addr & 0x7f;
+	}
+
+	*tkid = (addr << 8) | ((*tkid + 1) & 0xff);
+
+	return *tkid;
+}
+
+static void wusbhc_generate_gtk(struct wusbhc *wusbhc)
+{
+	const size_t key_size = sizeof(wusbhc->gtk.data);
+	u32 tkid;
+
+	tkid = wusbhc_next_tkid(wusbhc, NULL);
+
+	wusbhc->gtk.descr.tTKID[0] = (tkid >>  0) & 0xff;
+	wusbhc->gtk.descr.tTKID[1] = (tkid >>  8) & 0xff;
+	wusbhc->gtk.descr.tTKID[2] = (tkid >> 16) & 0xff;
+
+	get_random_bytes(wusbhc->gtk.descr.bKeyData, key_size);
+}
+
+/**
+ * wusbhc_sec_start - start the security management process
+ * @wusbhc: the WUSB host controller
+ *
+ * Generate and set an initial GTK on the host controller.
+ *
+ * Called when the HC is started.
+ */
+int wusbhc_sec_start(struct wusbhc *wusbhc)
+{
+	const size_t key_size = sizeof(wusbhc->gtk.data);
+	int result;
+
+	wusbhc_generate_gtk(wusbhc);
+
+	result = wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
+				&wusbhc->gtk.descr.bKeyData, key_size);
+	if (result < 0)
+		dev_err(wusbhc->dev, "cannot set GTK for the host: %d\n",
+			result);
+
+	return result;
+}
+
+/**
+ * wusbhc_sec_stop - stop the security management process
+ * @wusbhc: the WUSB host controller
+ *
+ * Wait for any pending GTK rekeys to stop.
+ */
+void wusbhc_sec_stop(struct wusbhc *wusbhc)
+{
+	cancel_work_sync(&wusbhc->gtk_rekey_work);
+}
+
+
+/** @returns encryption type name */
+const char *wusb_et_name(u8 x)
+{
+	switch (x) {
+	case USB_ENC_TYPE_UNSECURE:	return "unsecure";
+	case USB_ENC_TYPE_WIRED:	return "wired";
+	case USB_ENC_TYPE_CCM_1:	return "CCM-1";
+	case USB_ENC_TYPE_RSA_1:	return "RSA-1";
+	default:			return "unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(wusb_et_name);
+
+/*
+ * Set the device encryption method
+ *
+ * We tell the device which encryption method to use; we do this when
+ * setting up the device's security.
+ */
+static int wusb_dev_set_encryption(struct usb_device *usb_dev, int value)
+{
+	int result;
+	struct device *dev = &usb_dev->dev;
+	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
+
+	if (value) {
+		value = wusb_dev->ccm1_etd.bEncryptionValue;
+	} else {
+		/* FIXME: should be wusb_dev->etd[UNSECURE].bEncryptionValue */
+		value = 0;
+	}
+	/* Set device's */
+	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+			USB_REQ_SET_ENCRYPTION,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			value, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0)
+		dev_err(dev, "Can't set device's WUSB encryption to "
+			"%s (value %d): %d\n",
+			wusb_et_name(wusb_dev->ccm1_etd.bEncryptionType),
+			wusb_dev->ccm1_etd.bEncryptionValue,  result);
+	return result;
+}
+
+/*
+ * Set the GTK to be used by a device.
+ *
+ * The device must be authenticated.
+ */
+static int wusb_dev_set_gtk(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	struct usb_device *usb_dev = wusb_dev->usb_dev;
+	u8 key_index = wusb_key_index(wusbhc->gtk_index,
+		WUSB_KEY_INDEX_TYPE_GTK, WUSB_KEY_INDEX_ORIGINATOR_HOST);
+
+	return usb_control_msg(
+		usb_dev, usb_sndctrlpipe(usb_dev, 0),
+		USB_REQ_SET_DESCRIPTOR,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_DT_KEY << 8 | key_index, 0,
+		&wusbhc->gtk.descr, wusbhc->gtk.descr.bLength,
+		USB_CTRL_SET_TIMEOUT);
+}
+
+
+/* FIXME: prototype for adding security */
+int wusb_dev_sec_add(struct wusbhc *wusbhc,
+		     struct usb_device *usb_dev, struct wusb_dev *wusb_dev)
+{
+	int result, bytes, secd_size;
+	struct device *dev = &usb_dev->dev;
+	struct usb_security_descriptor *secd, *new_secd;
+	const struct usb_encryption_descriptor *etd, *ccm1_etd = NULL;
+	const void *itr, *top;
+	char buf[64];
+
+	secd = kmalloc(sizeof(*secd), GFP_KERNEL);
+	if (secd == NULL) {
+		result = -ENOMEM;
+		goto out;
+	}
+
+	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
+				    0, secd, sizeof(*secd));
+	if (result < (int)sizeof(*secd)) {
+		dev_err(dev, "Can't read security descriptor or "
+			"not enough data: %d\n", result);
+		goto out;
+	}
+	secd_size = le16_to_cpu(secd->wTotalLength);
+	new_secd = krealloc(secd, secd_size, GFP_KERNEL);
+	if (new_secd == NULL) {
+		dev_err(dev,
+			"Can't allocate space for security descriptors\n");
+		result = -ENOMEM;
+		goto out;
+	}
+	secd = new_secd;
+	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
+				    0, secd, secd_size);
+	if (result < secd_size) {
+		dev_err(dev, "Can't read security descriptor or "
+			"not enough data: %d\n", result);
+		goto out;
+	}
+	bytes = 0;
+	itr = &secd[1];
+	top = (void *)secd + result;
+	while (itr < top) {
+		etd = itr;
+		if (top - itr < sizeof(*etd)) {
+			dev_err(dev, "BUG: bad device security descriptor; "
+				"not enough data (%zu vs %zu bytes left)\n",
+				top - itr, sizeof(*etd));
+			break;
+		}
+		if (etd->bLength < sizeof(*etd)) {
+			dev_err(dev, "BUG: bad device encryption descriptor; "
+				"descriptor is too short "
+				"(%u vs %zu needed)\n",
+				etd->bLength, sizeof(*etd));
+			break;
+		}
+		itr += etd->bLength;
+		bytes += snprintf(buf + bytes, sizeof(buf) - bytes,
+				  "%s (0x%02x/%02x) ",
+				  wusb_et_name(etd->bEncryptionType),
+				  etd->bEncryptionValue, etd->bAuthKeyIndex);
+		if (etd->bEncryptionType == USB_ENC_TYPE_CCM_1)
+			ccm1_etd = etd;
+	}
+	/* This code only supports CCM1 as of now. */
+	/* FIXME: user has to choose which sec mode to use?
+	 * In theory we want CCM */
+	if (ccm1_etd == NULL) {
+		dev_err(dev, "WUSB device doesn't support CCM1 encryption, "
+			"can't use!\n");
+		result = -EINVAL;
+		goto out;
+	}
+	wusb_dev->ccm1_etd = *ccm1_etd;
+	dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
+		buf, wusb_et_name(ccm1_etd->bEncryptionType),
+		ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
+	result = 0;
+out:
+	kfree(secd);
+	return result;
+}
+
+void wusb_dev_sec_rm(struct wusb_dev *wusb_dev)
+{
+	/* Nothing so far */
+}
+
+/**
+ * Update the address of an unauthenticated WUSB device
+ *
+ * Once we have successfully authenticated, we take it to addr0 state
+ * and then to a normal address.
+ *
+ * Before the device's address (as known by it) was usb_dev->devnum |
+ * 0x80 (unauthenticated address). With this we update it to usb_dev->devnum.
+ */
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
+{
+	int result = -ENOMEM;
+	struct usb_device *usb_dev = wusb_dev->usb_dev;
+	struct device *dev = &usb_dev->dev;
+	u8 new_address = wusb_dev->addr & 0x7F;
+
+	/* Set address 0 */
+	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+			USB_REQ_SET_ADDRESS,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "auth failed: can't set address 0: %d\n",
+			result);
+		goto error_addr0;
+	}
+	result = wusb_set_dev_addr(wusbhc, wusb_dev, 0);
+	if (result < 0)
+		goto error_addr0;
+	usb_set_device_state(usb_dev, USB_STATE_DEFAULT);
+	usb_ep0_reinit(usb_dev);
+
+	/* Set new (authenticated) address. */
+	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
+			USB_REQ_SET_ADDRESS,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			new_address, 0, NULL, 0,
+			USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "auth failed: can't set address %u: %d\n",
+			new_address, result);
+		goto error_addr;
+	}
+	result = wusb_set_dev_addr(wusbhc, wusb_dev, new_address);
+	if (result < 0)
+		goto error_addr;
+	usb_set_device_state(usb_dev, USB_STATE_ADDRESS);
+	usb_ep0_reinit(usb_dev);
+	usb_dev->authenticated = 1;
+error_addr:
+error_addr0:
+	return result;
+}
+
+/*
+ *
+ *
+ */
+/* FIXME: split and cleanup */
+int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
+			    struct wusb_ckhdid *ck)
+{
+	int result = -ENOMEM;
+	struct usb_device *usb_dev = wusb_dev->usb_dev;
+	struct device *dev = &usb_dev->dev;
+	u32 tkid;
+	struct usb_handshake *hs;
+	struct aes_ccm_nonce ccm_n;
+	u8 mic[8];
+	struct wusb_keydvt_in keydvt_in;
+	struct wusb_keydvt_out keydvt_out;
+
+	hs = kcalloc(3, sizeof(hs[0]), GFP_KERNEL);
+	if (!hs)
+		goto error_kzalloc;
+
+	/* We need to turn encryption before beginning the 4way
+	 * hshake (WUSB1.0[.3.2.2]) */
+	result = wusb_dev_set_encryption(usb_dev, 1);
+	if (result < 0)
+		goto error_dev_set_encryption;
+
+	tkid = wusbhc_next_tkid(wusbhc, wusb_dev);
+
+	hs[0].bMessageNumber = 1;
+	hs[0].bStatus = 0;
+	put_unaligned_le32(tkid, hs[0].tTKID);
+	hs[0].bReserved = 0;
+	memcpy(hs[0].CDID, &wusb_dev->cdid, sizeof(hs[0].CDID));
+	get_random_bytes(&hs[0].nonce, sizeof(hs[0].nonce));
+	memset(hs[0].MIC, 0, sizeof(hs[0].MIC)); /* Per WUSB1.0[T7-22] */
+
+	result = usb_control_msg(
+		usb_dev, usb_sndctrlpipe(usb_dev, 0),
+		USB_REQ_SET_HANDSHAKE,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		1, 0, &hs[0], sizeof(hs[0]), USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Handshake1: request failed: %d\n", result);
+		goto error_hs1;
+	}
+
+	/* Handshake 2, from the device -- need to verify fields */
+	result = usb_control_msg(
+		usb_dev, usb_rcvctrlpipe(usb_dev, 0),
+		USB_REQ_GET_HANDSHAKE,
+		USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		2, 0, &hs[1], sizeof(hs[1]), USB_CTRL_GET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Handshake2: request failed: %d\n", result);
+		goto error_hs2;
+	}
+
+	result = -EINVAL;
+	if (hs[1].bMessageNumber != 2) {
+		dev_err(dev, "Handshake2 failed: bad message number %u\n",
+			hs[1].bMessageNumber);
+		goto error_hs2;
+	}
+	if (hs[1].bStatus != 0) {
+		dev_err(dev, "Handshake2 failed: bad status %u\n",
+			hs[1].bStatus);
+		goto error_hs2;
+	}
+	if (memcmp(hs[0].tTKID, hs[1].tTKID, sizeof(hs[0].tTKID))) {
+		dev_err(dev, "Handshake2 failed: TKID mismatch "
+			"(#1 0x%02x%02x%02x vs #2 0x%02x%02x%02x)\n",
+			hs[0].tTKID[0], hs[0].tTKID[1], hs[0].tTKID[2],
+			hs[1].tTKID[0], hs[1].tTKID[1], hs[1].tTKID[2]);
+		goto error_hs2;
+	}
+	if (memcmp(hs[0].CDID, hs[1].CDID, sizeof(hs[0].CDID))) {
+		dev_err(dev, "Handshake2 failed: CDID mismatch\n");
+		goto error_hs2;
+	}
+
+	/* Setup the CCM nonce */
+	memset(&ccm_n.sfn, 0, sizeof(ccm_n.sfn)); /* Per WUSB1.0[6.5.2] */
+	put_unaligned_le32(tkid, ccm_n.tkid);
+	ccm_n.src_addr = wusbhc->uwb_rc->uwb_dev.dev_addr;
+	ccm_n.dest_addr.data[0] = wusb_dev->addr;
+	ccm_n.dest_addr.data[1] = 0;
+
+	/* Derive the KCK and PTK from CK, the CCM, H and D nonces */
+	memcpy(keydvt_in.hnonce, hs[0].nonce, sizeof(keydvt_in.hnonce));
+	memcpy(keydvt_in.dnonce, hs[1].nonce, sizeof(keydvt_in.dnonce));
+	result = wusb_key_derive(&keydvt_out, ck->data, &ccm_n, &keydvt_in);
+	if (result < 0) {
+		dev_err(dev, "Handshake2 failed: cannot derive keys: %d\n",
+			result);
+		goto error_hs2;
+	}
+
+	/* Compute MIC and verify it */
+	result = wusb_oob_mic(mic, keydvt_out.kck, &ccm_n, &hs[1]);
+	if (result < 0) {
+		dev_err(dev, "Handshake2 failed: cannot compute MIC: %d\n",
+			result);
+		goto error_hs2;
+	}
+
+	if (memcmp(hs[1].MIC, mic, sizeof(hs[1].MIC))) {
+		dev_err(dev, "Handshake2 failed: MIC mismatch\n");
+		goto error_hs2;
+	}
+
+	/* Send Handshake3 */
+	hs[2].bMessageNumber = 3;
+	hs[2].bStatus = 0;
+	put_unaligned_le32(tkid, hs[2].tTKID);
+	hs[2].bReserved = 0;
+	memcpy(hs[2].CDID, &wusb_dev->cdid, sizeof(hs[2].CDID));
+	memcpy(hs[2].nonce, hs[0].nonce, sizeof(hs[2].nonce));
+	result = wusb_oob_mic(hs[2].MIC, keydvt_out.kck, &ccm_n, &hs[2]);
+	if (result < 0) {
+		dev_err(dev, "Handshake3 failed: cannot compute MIC: %d\n",
+			result);
+		goto error_hs2;
+	}
+
+	result = usb_control_msg(
+		usb_dev, usb_sndctrlpipe(usb_dev, 0),
+		USB_REQ_SET_HANDSHAKE,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		3, 0, &hs[2], sizeof(hs[2]), USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "Handshake3: request failed: %d\n", result);
+		goto error_hs3;
+	}
+
+	result = wusbhc->set_ptk(wusbhc, wusb_dev->port_idx, tkid,
+				 keydvt_out.ptk, sizeof(keydvt_out.ptk));
+	if (result < 0)
+		goto error_wusbhc_set_ptk;
+
+	result = wusb_dev_set_gtk(wusbhc, wusb_dev);
+	if (result < 0) {
+		dev_err(dev, "Set GTK for device: request failed: %d\n",
+			result);
+		goto error_wusbhc_set_gtk;
+	}
+
+	/* Update the device's address from unauth to auth */
+	if (usb_dev->authenticated == 0) {
+		result = wusb_dev_update_address(wusbhc, wusb_dev);
+		if (result < 0)
+			goto error_dev_update_address;
+	}
+	result = 0;
+	dev_info(dev, "device authenticated\n");
+
+error_dev_update_address:
+error_wusbhc_set_gtk:
+error_wusbhc_set_ptk:
+error_hs3:
+error_hs2:
+error_hs1:
+	memset(hs, 0, 3*sizeof(hs[0]));
+	memzero_explicit(&keydvt_out, sizeof(keydvt_out));
+	memzero_explicit(&keydvt_in, sizeof(keydvt_in));
+	memzero_explicit(&ccm_n, sizeof(ccm_n));
+	memzero_explicit(mic, sizeof(mic));
+	if (result < 0)
+		wusb_dev_set_encryption(usb_dev, 0);
+error_dev_set_encryption:
+	kfree(hs);
+error_kzalloc:
+	return result;
+}
+
+/*
+ * Once all connected and authenticated devices have received the new
+ * GTK, switch the host to using it.
+ */
+static void wusbhc_gtk_rekey_work(struct work_struct *work)
+{
+	struct wusbhc *wusbhc = container_of(work,
+					struct wusbhc, gtk_rekey_work);
+	size_t key_size = sizeof(wusbhc->gtk.data);
+	int port_idx;
+	struct wusb_dev *wusb_dev, *wusb_dev_next;
+	LIST_HEAD(rekey_list);
+
+	mutex_lock(&wusbhc->mutex);
+	/* generate the new key */
+	wusbhc_generate_gtk(wusbhc);
+	/* roll the gtk index. */
+	wusbhc->gtk_index = (wusbhc->gtk_index + 1) % (WUSB_KEY_INDEX_MAX + 1);
+	/*
+	 * Save all connected devices on a list while holding wusbhc->mutex and
+	 * take a reference to each one.  Then submit the set key request to
+	 * them after releasing the lock in order to avoid a deadlock.
+	 */
+	for (port_idx = 0; port_idx < wusbhc->ports_max; port_idx++) {
+		wusb_dev = wusbhc->port[port_idx].wusb_dev;
+		if (!wusb_dev || !wusb_dev->usb_dev
+			|| !wusb_dev->usb_dev->authenticated)
+			continue;
+
+		wusb_dev_get(wusb_dev);
+		list_add_tail(&wusb_dev->rekey_node, &rekey_list);
+	}
+	mutex_unlock(&wusbhc->mutex);
+
+	/* Submit the rekey requests without holding wusbhc->mutex. */
+	list_for_each_entry_safe(wusb_dev, wusb_dev_next, &rekey_list,
+		rekey_node) {
+		list_del_init(&wusb_dev->rekey_node);
+		dev_dbg(&wusb_dev->usb_dev->dev,
+			"%s: rekey device at port %d\n",
+			__func__, wusb_dev->port_idx);
+
+		if (wusb_dev_set_gtk(wusbhc, wusb_dev) < 0) {
+			dev_err(&wusb_dev->usb_dev->dev,
+				"%s: rekey device at port %d failed\n",
+				__func__, wusb_dev->port_idx);
+		}
+		wusb_dev_put(wusb_dev);
+	}
+
+	/* Switch the host controller to use the new GTK. */
+	mutex_lock(&wusbhc->mutex);
+	wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
+		&wusbhc->gtk.descr.bKeyData, key_size);
+	mutex_unlock(&wusbhc->mutex);
+}
+
+/**
+ * wusbhc_gtk_rekey - generate and distribute a new GTK
+ * @wusbhc: the WUSB host controller
+ *
+ * Generate a new GTK and distribute it to all connected and
+ * authenticated devices.  When all devices have the new GTK, the host
+ * starts using it.
+ *
+ * This must be called after every device disconnect (see [WUSB]
+ * section 6.2.11.2).
+ */
+void wusbhc_gtk_rekey(struct wusbhc *wusbhc)
+{
+	/*
+	 * We need to submit a URB to the downstream WUSB devices in order to
+	 * change the group key.  This can't be done while holding the
+	 * wusbhc->mutex since that is also taken in the urb_enqueue routine
+	 * and will cause a deadlock.  Instead, queue a work item to do
+	 * it when the lock is not held
+	 */
+	queue_work(wusbhc->wq_security, &wusbhc->gtk_rekey_work);
+}
diff --git a/drivers/staging/wusbcore/wa-hc.c b/drivers/staging/wusbcore/wa-hc.c
new file mode 100644
index 000000000000..6827075fb8a1
--- /dev/null
+++ b/drivers/staging/wusbcore/wa-hc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wire Adapter Host Controller Driver
+ * Common items to HWA and DWA based HCDs
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ */
+#include <linux/slab.h>
+#include <linux/module.h>
+#include "wusbhc.h"
+#include "wa-hc.h"
+
+/**
+ * Assumes
+ *
+ * wa->usb_dev and wa->usb_iface initialized and refcounted,
+ * wa->wa_descr initialized.
+ */
+int wa_create(struct wahc *wa, struct usb_interface *iface,
+	kernel_ulong_t quirks)
+{
+	int result;
+	struct device *dev = &iface->dev;
+
+	if (iface->cur_altsetting->desc.bNumEndpoints < 3)
+		return -ENODEV;
+
+	result = wa_rpipes_create(wa);
+	if (result < 0)
+		goto error_rpipes_create;
+	wa->quirks = quirks;
+	/* Fill up Data Transfer EP pointers */
+	wa->dti_epd = &iface->cur_altsetting->endpoint[1].desc;
+	wa->dto_epd = &iface->cur_altsetting->endpoint[2].desc;
+	wa->dti_buf_size = usb_endpoint_maxp(wa->dti_epd);
+	wa->dti_buf = kmalloc(wa->dti_buf_size, GFP_KERNEL);
+	if (wa->dti_buf == NULL) {
+		result = -ENOMEM;
+		goto error_dti_buf_alloc;
+	}
+	result = wa_nep_create(wa, iface);
+	if (result < 0) {
+		dev_err(dev, "WA-CDS: can't initialize notif endpoint: %d\n",
+			result);
+		goto error_nep_create;
+	}
+	return 0;
+
+error_nep_create:
+	kfree(wa->dti_buf);
+error_dti_buf_alloc:
+	wa_rpipes_destroy(wa);
+error_rpipes_create:
+	return result;
+}
+EXPORT_SYMBOL_GPL(wa_create);
+
+
+void __wa_destroy(struct wahc *wa)
+{
+	if (wa->dti_urb) {
+		usb_kill_urb(wa->dti_urb);
+		usb_put_urb(wa->dti_urb);
+	}
+	kfree(wa->dti_buf);
+	wa_nep_destroy(wa);
+	wa_rpipes_destroy(wa);
+}
+EXPORT_SYMBOL_GPL(__wa_destroy);
+
+/**
+ * wa_reset_all - reset the WA device
+ * @wa: the WA to be reset
+ *
+ * For HWAs the radio controller and all other PALs are also reset.
+ */
+void wa_reset_all(struct wahc *wa)
+{
+	/* FIXME: assuming HWA. */
+	wusbhc_reset_all(wa->wusb);
+}
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Wireless USB Wire Adapter core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/wa-hc.h b/drivers/staging/wusbcore/wa-hc.h
new file mode 100644
index 000000000000..5a38465724c2
--- /dev/null
+++ b/drivers/staging/wusbcore/wa-hc.h
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HWA Host Controller Driver
+ * Wire Adapter Control/Data Streaming Iface (WUSB1.0[8])
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This driver implements a USB Host Controller (struct usb_hcd) for a
+ * Wireless USB Host Controller based on the Wireless USB 1.0
+ * Host-Wire-Adapter specification (in layman terms, a USB-dongle that
+ * implements a Wireless USB host).
+ *
+ * Check out the Design-overview.txt file in the source documentation
+ * for other details on the implementation.
+ *
+ * Main blocks:
+ *
+ *  driver     glue with the driver API, workqueue daemon
+ *
+ *  lc         RC instance life cycle management (create, destroy...)
+ *
+ *  hcd        glue with the USB API Host Controller Interface API.
+ *
+ *  nep        Notification EndPoint management: collect notifications
+ *             and queue them with the workqueue daemon.
+ *
+ *             Handle notifications as coming from the NEP. Sends them
+ *             off others to their respective modules (eg: connect,
+ *             disconnect and reset go to devconnect).
+ *
+ *  rpipe      Remote Pipe management; rpipe is what we use to write
+ *             to an endpoint on a WUSB device that is connected to a
+ *             HWA RC.
+ *
+ *  xfer       Transfer management -- this is all the code that gets a
+ *             buffer and pushes it to a device (or viceversa). *
+ *
+ * Some day a lot of this code will be shared between this driver and
+ * the drivers for DWA (xfer, rpipe).
+ *
+ * All starts at driver.c:hwahc_probe(), when one of this guys is
+ * connected. hwahc_disconnect() stops it.
+ *
+ * During operation, the main driver is devices connecting or
+ * disconnecting. They cause the HWA RC to send notifications into
+ * nep.c:hwahc_nep_cb() that will dispatch them to
+ * notif.c:wa_notif_dispatch(). From there they will fan to cause
+ * device connects, disconnects, etc.
+ *
+ * Note much of the activity is difficult to follow. For example a
+ * device connect goes to devconnect, which will cause the "fake" root
+ * hub port to show a connect and stop there. Then hub_wq will notice
+ * and call into the rh.c:hwahc_rc_port_reset() code to authenticate
+ * the device (and this might require user intervention) and enable
+ * the port.
+ *
+ * We also have a timer workqueue going from devconnect.c that
+ * schedules in hwahc_devconnect_create().
+ *
+ * The rest of the traffic is in the usual entry points of a USB HCD,
+ * which are hooked up in driver.c:hwahc_rc_driver, and defined in
+ * hcd.c.
+ */
+
+#ifndef __HWAHC_INTERNAL_H__
+#define __HWAHC_INTERNAL_H__
+
+#include <linux/completion.h>
+#include <linux/usb.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include "../uwb/uwb.h"
+#include "include/wusb.h"
+#include "include/wusb-wa.h"
+
+struct wusbhc;
+struct wahc;
+extern void wa_urb_enqueue_run(struct work_struct *ws);
+extern void wa_process_errored_transfers_run(struct work_struct *ws);
+
+/**
+ * RPipe instance
+ *
+ * @descr's fields are kept in LE, as we need to send it back and
+ * forth.
+ *
+ * @wa is referenced when set
+ *
+ * @segs_available is the number of requests segments that still can
+ *                 be submitted to the controller without overloading
+ *                 it. It is initialized to descr->wRequests when
+ *                 aiming.
+ *
+ * A rpipe supports a max of descr->wRequests at the same time; before
+ * submitting seg_lock has to be taken. If segs_avail > 0, then we can
+ * submit; if not, we have to queue them.
+ */
+struct wa_rpipe {
+	struct kref refcnt;
+	struct usb_rpipe_descriptor descr;
+	struct usb_host_endpoint *ep;
+	struct wahc *wa;
+	spinlock_t seg_lock;
+	struct list_head seg_list;
+	struct list_head list_node;
+	atomic_t segs_available;
+	u8 buffer[1];	/* For reads/writes on USB */
+};
+
+
+enum wa_dti_state {
+	WA_DTI_TRANSFER_RESULT_PENDING,
+	WA_DTI_ISOC_PACKET_STATUS_PENDING,
+	WA_DTI_BUF_IN_DATA_PENDING
+};
+
+enum wa_quirks {
+	/*
+	 * The Alereon HWA expects the data frames in isochronous transfer
+	 * requests to be concatenated and not sent as separate packets.
+	 */
+	WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC	= 0x01,
+	/*
+	 * The Alereon HWA can be instructed to not send transfer notifications
+	 * as an optimization.
+	 */
+	WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS	= 0x02,
+};
+
+enum wa_vendor_specific_requests {
+	WA_REQ_ALEREON_DISABLE_XFER_NOTIFICATIONS = 0x4C,
+	WA_REQ_ALEREON_FEATURE_SET = 0x01,
+	WA_REQ_ALEREON_FEATURE_CLEAR = 0x00,
+};
+
+#define WA_MAX_BUF_IN_URBS	4
+/**
+ * Instance of a HWA Host Controller
+ *
+ * Except where a more specific lock/mutex applies or atomic, all
+ * fields protected by @mutex.
+ *
+ * @wa_descr  Can be accessed without locking because it is in
+ *            the same area where the device descriptors were
+ *            read, so it is guaranteed to exist unmodified while
+ *            the device exists.
+ *
+ *            Endianess has been converted to CPU's.
+ *
+ * @nep_* can be accessed without locking as its processing is
+ *        serialized; we submit a NEP URB and it comes to
+ *        hwahc_nep_cb(), which won't issue another URB until it is
+ *        done processing it.
+ *
+ * @xfer_list:
+ *
+ *   List of active transfers to verify existence from a xfer id
+ *   gotten from the xfer result message. Can't use urb->list because
+ *   it goes by endpoint, and we don't know the endpoint at the time
+ *   when we get the xfer result message. We can't really rely on the
+ *   pointer (will have to change for 64 bits) as the xfer id is 32 bits.
+ *
+ * @xfer_delayed_list:   List of transfers that need to be started
+ *                       (with a workqueue, because they were
+ *                       submitted from an atomic context).
+ *
+ * FIXME: this needs to be layered up: a wusbhc layer (for sharing
+ *        commonalities with WHCI), a wa layer (for sharing
+ *        commonalities with DWA-RC).
+ */
+struct wahc {
+	struct usb_device *usb_dev;
+	struct usb_interface *usb_iface;
+
+	/* HC to deliver notifications */
+	union {
+		struct wusbhc *wusb;
+		struct dwahc *dwa;
+	};
+
+	const struct usb_endpoint_descriptor *dto_epd, *dti_epd;
+	const struct usb_wa_descriptor *wa_descr;
+
+	struct urb *nep_urb;		/* Notification EndPoint [lockless] */
+	struct edc nep_edc;
+	void *nep_buffer;
+	size_t nep_buffer_size;
+
+	atomic_t notifs_queued;
+
+	u16 rpipes;
+	unsigned long *rpipe_bm;	/* rpipe usage bitmap */
+	struct list_head rpipe_delayed_list;	/* delayed RPIPES. */
+	spinlock_t rpipe_lock;	/* protect rpipe_bm and delayed list */
+	struct mutex rpipe_mutex;	/* assigning resources to endpoints */
+
+	/*
+	 * dti_state is used to track the state of the dti_urb. When dti_state
+	 * is WA_DTI_ISOC_PACKET_STATUS_PENDING, dti_isoc_xfer_in_progress and
+	 * dti_isoc_xfer_seg identify which xfer the incoming isoc packet
+	 * status refers to.
+	 */
+	enum wa_dti_state dti_state;
+	u32 dti_isoc_xfer_in_progress;
+	u8  dti_isoc_xfer_seg;
+	struct urb *dti_urb;		/* URB for reading xfer results */
+					/* URBs for reading data in */
+	struct urb buf_in_urbs[WA_MAX_BUF_IN_URBS];
+	int active_buf_in_urbs;		/* number of buf_in_urbs active. */
+	struct edc dti_edc;		/* DTI error density counter */
+	void *dti_buf;
+	size_t dti_buf_size;
+
+	unsigned long dto_in_use;	/* protect dto endoint serialization */
+
+	s32 status;			/* For reading status */
+
+	struct list_head xfer_list;
+	struct list_head xfer_delayed_list;
+	struct list_head xfer_errored_list;
+	/*
+	 * lock for the above xfer lists.  Can be taken while a xfer->lock is
+	 * held but not in the reverse order.
+	 */
+	spinlock_t xfer_list_lock;
+	struct work_struct xfer_enqueue_work;
+	struct work_struct xfer_error_work;
+	atomic_t xfer_id_count;
+
+	kernel_ulong_t	quirks;
+};
+
+
+extern int wa_create(struct wahc *wa, struct usb_interface *iface,
+	kernel_ulong_t);
+extern void __wa_destroy(struct wahc *wa);
+extern int wa_dti_start(struct wahc *wa);
+void wa_reset_all(struct wahc *wa);
+
+
+/* Miscellaneous constants */
+enum {
+	/** Max number of EPROTO errors we tolerate on the NEP in a
+	 * period of time */
+	HWAHC_EPROTO_MAX = 16,
+	/** Period of time for EPROTO errors (in jiffies) */
+	HWAHC_EPROTO_PERIOD = 4 * HZ,
+};
+
+
+/* Notification endpoint handling */
+extern int wa_nep_create(struct wahc *, struct usb_interface *);
+extern void wa_nep_destroy(struct wahc *);
+
+static inline int wa_nep_arm(struct wahc *wa, gfp_t gfp_mask)
+{
+	struct urb *urb = wa->nep_urb;
+	urb->transfer_buffer = wa->nep_buffer;
+	urb->transfer_buffer_length = wa->nep_buffer_size;
+	return usb_submit_urb(urb, gfp_mask);
+}
+
+static inline void wa_nep_disarm(struct wahc *wa)
+{
+	usb_kill_urb(wa->nep_urb);
+}
+
+
+/* RPipes */
+static inline void wa_rpipe_init(struct wahc *wa)
+{
+	INIT_LIST_HEAD(&wa->rpipe_delayed_list);
+	spin_lock_init(&wa->rpipe_lock);
+	mutex_init(&wa->rpipe_mutex);
+}
+
+static inline void wa_init(struct wahc *wa)
+{
+	int index;
+
+	edc_init(&wa->nep_edc);
+	atomic_set(&wa->notifs_queued, 0);
+	wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
+	wa_rpipe_init(wa);
+	edc_init(&wa->dti_edc);
+	INIT_LIST_HEAD(&wa->xfer_list);
+	INIT_LIST_HEAD(&wa->xfer_delayed_list);
+	INIT_LIST_HEAD(&wa->xfer_errored_list);
+	spin_lock_init(&wa->xfer_list_lock);
+	INIT_WORK(&wa->xfer_enqueue_work, wa_urb_enqueue_run);
+	INIT_WORK(&wa->xfer_error_work, wa_process_errored_transfers_run);
+	wa->dto_in_use = 0;
+	atomic_set(&wa->xfer_id_count, 1);
+	/* init the buf in URBs */
+	for (index = 0; index < WA_MAX_BUF_IN_URBS; ++index)
+		usb_init_urb(&(wa->buf_in_urbs[index]));
+	wa->active_buf_in_urbs = 0;
+}
+
+/**
+ * Destroy a pipe (when refcount drops to zero)
+ *
+ * Assumes it has been moved to the "QUIESCING" state.
+ */
+struct wa_xfer;
+extern void rpipe_destroy(struct kref *_rpipe);
+static inline
+void __rpipe_get(struct wa_rpipe *rpipe)
+{
+	kref_get(&rpipe->refcnt);
+}
+extern int rpipe_get_by_ep(struct wahc *, struct usb_host_endpoint *,
+			   struct urb *, gfp_t);
+static inline void rpipe_put(struct wa_rpipe *rpipe)
+{
+	kref_put(&rpipe->refcnt, rpipe_destroy);
+
+}
+extern void rpipe_ep_disable(struct wahc *, struct usb_host_endpoint *);
+extern void rpipe_clear_feature_stalled(struct wahc *,
+			struct usb_host_endpoint *);
+extern int wa_rpipes_create(struct wahc *);
+extern void wa_rpipes_destroy(struct wahc *);
+static inline void rpipe_avail_dec(struct wa_rpipe *rpipe)
+{
+	atomic_dec(&rpipe->segs_available);
+}
+
+/**
+ * Returns true if the rpipe is ready to submit more segments.
+ */
+static inline int rpipe_avail_inc(struct wa_rpipe *rpipe)
+{
+	return atomic_inc_return(&rpipe->segs_available) > 0
+		&& !list_empty(&rpipe->seg_list);
+}
+
+
+/* Transferring data */
+extern int wa_urb_enqueue(struct wahc *, struct usb_host_endpoint *,
+			  struct urb *, gfp_t);
+extern int wa_urb_dequeue(struct wahc *, struct urb *, int);
+extern void wa_handle_notif_xfer(struct wahc *, struct wa_notif_hdr *);
+
+
+/* Misc
+ *
+ * FIXME: Refcounting for the actual @hwahc object is not correct; I
+ *        mean, this should be refcounting on the HCD underneath, but
+ *        it is not. In any case, the semantics for HCD refcounting
+ *        are *weird*...on refcount reaching zero it just frees
+ *        it...no RC specific function is called...unless I miss
+ *        something.
+ *
+ * FIXME: has to go away in favour of a 'struct' hcd based solution
+ */
+static inline struct wahc *wa_get(struct wahc *wa)
+{
+	usb_get_intf(wa->usb_iface);
+	return wa;
+}
+
+static inline void wa_put(struct wahc *wa)
+{
+	usb_put_intf(wa->usb_iface);
+}
+
+
+static inline int __wa_feature(struct wahc *wa, unsigned op, u16 feature)
+{
+	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			op ? USB_REQ_SET_FEATURE : USB_REQ_CLEAR_FEATURE,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+			feature,
+			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+			NULL, 0, USB_CTRL_SET_TIMEOUT);
+}
+
+
+static inline int __wa_set_feature(struct wahc *wa, u16 feature)
+{
+	return  __wa_feature(wa, 1, feature);
+}
+
+
+static inline int __wa_clear_feature(struct wahc *wa, u16 feature)
+{
+	return __wa_feature(wa, 0, feature);
+}
+
+
+/**
+ * Return the status of a Wire Adapter
+ *
+ * @wa:		Wire Adapter instance
+ * @returns     < 0 errno code on error, or status bitmap as described
+ *              in WUSB1.0[8.3.1.6].
+ *
+ * NOTE: need malloc, some arches don't take USB from the stack
+ */
+static inline
+s32 __wa_get_status(struct wahc *wa)
+{
+	s32 result;
+	result = usb_control_msg(
+		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
+		USB_REQ_GET_STATUS,
+		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
+		0, wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
+		&wa->status, sizeof(wa->status), USB_CTRL_GET_TIMEOUT);
+	if (result >= 0)
+		result = wa->status;
+	return result;
+}
+
+
+/**
+ * Waits until the Wire Adapter's status matches @mask/@value
+ *
+ * @wa:		Wire Adapter instance.
+ * @returns     < 0 errno code on error, otherwise status.
+ *
+ * Loop until the WAs status matches the mask and value (status & mask
+ * == value). Timeout if it doesn't happen.
+ *
+ * FIXME: is there an official specification on how long status
+ *        changes can take?
+ */
+static inline s32 __wa_wait_status(struct wahc *wa, u32 mask, u32 value)
+{
+	s32 result;
+	unsigned loops = 10;
+	do {
+		msleep(50);
+		result = __wa_get_status(wa);
+		if ((result & mask) == value)
+			break;
+		if (loops-- == 0) {
+			result = -ETIMEDOUT;
+			break;
+		}
+	} while (result >= 0);
+	return result;
+}
+
+
+/** Command @hwahc to stop, @returns 0 if ok, < 0 errno code on error */
+static inline int __wa_stop(struct wahc *wa)
+{
+	int result;
+	struct device *dev = &wa->usb_iface->dev;
+
+	result = __wa_clear_feature(wa, WA_ENABLE);
+	if (result < 0 && result != -ENODEV) {
+		dev_err(dev, "error commanding HC to stop: %d\n", result);
+		goto out;
+	}
+	result = __wa_wait_status(wa, WA_ENABLE, 0);
+	if (result < 0 && result != -ENODEV)
+		dev_err(dev, "error waiting for HC to stop: %d\n", result);
+out:
+	return 0;
+}
+
+
+#endif /* #ifndef __HWAHC_INTERNAL_H__ */
diff --git a/drivers/staging/wusbcore/wa-nep.c b/drivers/staging/wusbcore/wa-nep.c
new file mode 100644
index 000000000000..5f0656db5482
--- /dev/null
+++ b/drivers/staging/wusbcore/wa-nep.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
+ * Notification EndPoint support
+ *
+ * Copyright (C) 2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This part takes care of getting the notification from the hw
+ * only and dispatching through wusbwad into
+ * wa_notif_dispatch. Handling is done there.
+ *
+ * WA notifications are limited in size; most of them are three or
+ * four bytes long, and the longest is the HWA Device Notification,
+ * which would not exceed 38 bytes (DNs are limited in payload to 32
+ * bytes plus 3 bytes header (WUSB1.0[7.6p2]), plus 3 bytes HWA
+ * header (WUSB1.0[8.5.4.2]).
+ *
+ * It is not clear if more than one Device Notification can be packed
+ * in a HWA Notification, I assume no because of the wording in
+ * WUSB1.0[8.5.4.2]. In any case, the bigger any notification could
+ * get is 256 bytes (as the bLength field is a byte).
+ *
+ * So what we do is we have this buffer and read into it; when a
+ * notification arrives we schedule work to a specific, single thread
+ * workqueue (so notifications are serialized) and copy the
+ * notification data. After scheduling the work, we rearm the read from
+ * the notification endpoint.
+ *
+ * Entry points here are:
+ *
+ * wa_nep_[create|destroy]()   To initialize/release this subsystem
+ *
+ * wa_nep_cb()                 Callback for the notification
+ *                                endpoint; when data is ready, this
+ *                                does the dispatching.
+ */
+#include <linux/workqueue.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+
+#include "wa-hc.h"
+#include "wusbhc.h"
+
+/* Structure for queueing notifications to the workqueue */
+struct wa_notif_work {
+	struct work_struct work;
+	struct wahc *wa;
+	size_t size;
+	u8 data[];
+};
+
+/*
+ * Process incoming notifications from the WA's Notification EndPoint
+ * [the wuswad daemon, basically]
+ *
+ * @_nw:	Pointer to a descriptor which has the pointer to the
+ *		@wa, the size of the buffer and the work queue
+ *		structure (so we can free all when done).
+ * @returns     0 if ok, < 0 errno code on error.
+ *
+ * All notifications follow the same format; they need to start with a
+ * 'struct wa_notif_hdr' header, so it is easy to parse through
+ * them. We just break the buffer in individual notifications (the
+ * standard doesn't say if it can be done or is forbidden, so we are
+ * cautious) and dispatch each.
+ *
+ * So the handling layers are is:
+ *
+ *   WA specific notification (from NEP)
+ *      Device Notification Received -> wa_handle_notif_dn()
+ *        WUSB Device notification generic handling
+ *      BPST Adjustment -> wa_handle_notif_bpst_adj()
+ *      ... -> ...
+ *
+ * @wa has to be referenced
+ */
+static void wa_notif_dispatch(struct work_struct *ws)
+{
+	void *itr;
+	u8 missing = 0;
+	struct wa_notif_work *nw = container_of(ws, struct wa_notif_work,
+						work);
+	struct wahc *wa = nw->wa;
+	struct wa_notif_hdr *notif_hdr;
+	size_t size;
+
+	struct device *dev = &wa->usb_iface->dev;
+
+#if 0
+	/* FIXME: need to check for this??? */
+	if (usb_hcd->state == HC_STATE_QUIESCING)	/* Going down? */
+		goto out;				/* screw it */
+#endif
+	atomic_dec(&wa->notifs_queued);		/* Throttling ctl */
+	size = nw->size;
+	itr = nw->data;
+
+	while (size) {
+		if (size < sizeof(*notif_hdr)) {
+			missing = sizeof(*notif_hdr) - size;
+			goto exhausted_buffer;
+		}
+		notif_hdr = itr;
+		if (size < notif_hdr->bLength)
+			goto exhausted_buffer;
+		itr += notif_hdr->bLength;
+		size -= notif_hdr->bLength;
+		/* Dispatch the notification [don't use itr or size!] */
+		switch (notif_hdr->bNotifyType) {
+		case HWA_NOTIF_DN: {
+			struct hwa_notif_dn *hwa_dn;
+			hwa_dn = container_of(notif_hdr, struct hwa_notif_dn,
+					      hdr);
+			wusbhc_handle_dn(wa->wusb, hwa_dn->bSourceDeviceAddr,
+					 hwa_dn->dndata,
+					 notif_hdr->bLength - sizeof(*hwa_dn));
+			break;
+		}
+		case WA_NOTIF_TRANSFER:
+			wa_handle_notif_xfer(wa, notif_hdr);
+			break;
+		case HWA_NOTIF_BPST_ADJ:
+			break; /* no action needed for BPST ADJ. */
+		case DWA_NOTIF_RWAKE:
+		case DWA_NOTIF_PORTSTATUS:
+			/* FIXME: unimplemented WA NOTIFs */
+			/* fallthru */
+		default:
+			dev_err(dev, "HWA: unknown notification 0x%x, "
+				"%zu bytes; discarding\n",
+				notif_hdr->bNotifyType,
+				(size_t)notif_hdr->bLength);
+			break;
+		}
+	}
+out:
+	wa_put(wa);
+	kfree(nw);
+	return;
+
+	/* THIS SHOULD NOT HAPPEN
+	 *
+	 * Buffer exahusted with partial data remaining; just warn and
+	 * discard the data, as this should not happen.
+	 */
+exhausted_buffer:
+	dev_warn(dev, "HWA: device sent short notification, "
+		 "%d bytes missing; discarding %d bytes.\n",
+		 missing, (int)size);
+	goto out;
+}
+
+/*
+ * Deliver incoming WA notifications to the wusbwa workqueue
+ *
+ * @wa:	Pointer the Wire Adapter Controller Data Streaming
+ *              instance (part of an 'struct usb_hcd').
+ * @size:       Size of the received buffer
+ * @returns     0 if ok, < 0 errno code on error.
+ *
+ * The input buffer is @wa->nep_buffer, with @size bytes
+ * (guaranteed to fit in the allocated space,
+ * @wa->nep_buffer_size).
+ */
+static int wa_nep_queue(struct wahc *wa, size_t size)
+{
+	int result = 0;
+	struct device *dev = &wa->usb_iface->dev;
+	struct wa_notif_work *nw;
+
+	/* dev_fnstart(dev, "(wa %p, size %zu)\n", wa, size); */
+	BUG_ON(size > wa->nep_buffer_size);
+	if (size == 0)
+		goto out;
+	if (atomic_read(&wa->notifs_queued) > 200) {
+		if (printk_ratelimit())
+			dev_err(dev, "Too many notifications queued, "
+				"throttling back\n");
+		goto out;
+	}
+	nw = kzalloc(sizeof(*nw) + size, GFP_ATOMIC);
+	if (nw == NULL) {
+		if (printk_ratelimit())
+			dev_err(dev, "No memory to queue notification\n");
+		result = -ENOMEM;
+		goto out;
+	}
+	INIT_WORK(&nw->work, wa_notif_dispatch);
+	nw->wa = wa_get(wa);
+	nw->size = size;
+	memcpy(nw->data, wa->nep_buffer, size);
+	atomic_inc(&wa->notifs_queued);		/* Throttling ctl */
+	queue_work(wusbd, &nw->work);
+out:
+	/* dev_fnend(dev, "(wa %p, size %zu) = result\n", wa, size, result); */
+	return result;
+}
+
+/*
+ * Callback for the notification event endpoint
+ *
+ * Check's that everything is fine and then passes the data to be
+ * queued to the workqueue.
+ */
+static void wa_nep_cb(struct urb *urb)
+{
+	int result;
+	struct wahc *wa = urb->context;
+	struct device *dev = &wa->usb_iface->dev;
+
+	switch (result = urb->status) {
+	case 0:
+		result = wa_nep_queue(wa, urb->actual_length);
+		if (result < 0)
+			dev_err(dev, "NEP: unable to process notification(s): "
+				"%d\n", result);
+		break;
+	case -ECONNRESET:	/* Not an error, but a controlled situation; */
+	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
+	case -ESHUTDOWN:
+		dev_dbg(dev, "NEP: going down %d\n", urb->status);
+		goto out;
+	default:	/* On general errors, we retry unless it gets ugly */
+		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "NEP: URB max acceptable errors "
+				"exceeded, resetting device\n");
+			wa_reset_all(wa);
+			goto out;
+		}
+		dev_err(dev, "NEP: URB error %d\n", urb->status);
+	}
+	result = wa_nep_arm(wa, GFP_ATOMIC);
+	if (result < 0) {
+		dev_err(dev, "NEP: cannot submit URB: %d\n", result);
+		wa_reset_all(wa);
+	}
+out:
+	return;
+}
+
+/*
+ * Initialize @wa's notification and event's endpoint stuff
+ *
+ * This includes the allocating the read buffer, the context ID
+ * allocation bitmap, the URB and submitting the URB.
+ */
+int wa_nep_create(struct wahc *wa, struct usb_interface *iface)
+{
+	int result;
+	struct usb_endpoint_descriptor *epd;
+	struct usb_device *usb_dev = interface_to_usbdev(iface);
+	struct device *dev = &iface->dev;
+
+	edc_init(&wa->nep_edc);
+	epd = &iface->cur_altsetting->endpoint[0].desc;
+	wa->nep_buffer_size = 1024;
+	wa->nep_buffer = kmalloc(wa->nep_buffer_size, GFP_KERNEL);
+	if (!wa->nep_buffer)
+		goto error_nep_buffer;
+	wa->nep_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (wa->nep_urb == NULL)
+		goto error_urb_alloc;
+	usb_fill_int_urb(wa->nep_urb, usb_dev,
+			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
+			 wa->nep_buffer, wa->nep_buffer_size,
+			 wa_nep_cb, wa, epd->bInterval);
+	result = wa_nep_arm(wa, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "Cannot submit notification URB: %d\n", result);
+		goto error_nep_arm;
+	}
+	return 0;
+
+error_nep_arm:
+	usb_free_urb(wa->nep_urb);
+error_urb_alloc:
+	kfree(wa->nep_buffer);
+error_nep_buffer:
+	return -ENOMEM;
+}
+
+void wa_nep_destroy(struct wahc *wa)
+{
+	wa_nep_disarm(wa);
+	usb_free_urb(wa->nep_urb);
+	kfree(wa->nep_buffer);
+}
diff --git a/drivers/staging/wusbcore/wa-rpipe.c b/drivers/staging/wusbcore/wa-rpipe.c
new file mode 100644
index 000000000000..a5734cbcd5ad
--- /dev/null
+++ b/drivers/staging/wusbcore/wa-rpipe.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB Wire Adapter
+ * rpipe management
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * FIXME: docs
+ *
+ * RPIPE
+ *
+ *   Targeted at different downstream endpoints
+ *
+ *   Descriptor: use to config the remote pipe.
+ *
+ *   The number of blocks could be dynamic (wBlocks in descriptor is
+ *   0)--need to schedule them then.
+ *
+ * Each bit in wa->rpipe_bm represents if an rpipe is being used or
+ * not. Rpipes are represented with a 'struct wa_rpipe' that is
+ * attached to the hcpriv member of a 'struct usb_host_endpoint'.
+ *
+ * When you need to xfer data to an endpoint, you get an rpipe for it
+ * with wa_ep_rpipe_get(), which gives you a reference to the rpipe
+ * and keeps a single one (the first one) with the endpoint. When you
+ * are done transferring, you drop that reference. At the end the
+ * rpipe is always allocated and bound to the endpoint. There it might
+ * be recycled when not used.
+ *
+ * Addresses:
+ *
+ *  We use a 1:1 mapping mechanism between port address (0 based
+ *  index, actually) and the address. The USB stack knows about this.
+ *
+ *  USB Stack port number    4 (1 based)
+ *  WUSB code port index     3 (0 based)
+ *  USB Address             5 (2 based -- 0 is for default, 1 for root hub)
+ *
+ *  Now, because we don't use the concept as default address exactly
+ *  like the (wired) USB code does, we need to kind of skip it. So we
+ *  never take addresses from the urb->pipe, but from the
+ *  urb->dev->devnum, to make sure that we always have the right
+ *  destination address.
+ */
+#include <linux/atomic.h>
+#include <linux/bitmap.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include "wusbhc.h"
+#include "wa-hc.h"
+
+static int __rpipe_get_descr(struct wahc *wa,
+			     struct usb_rpipe_descriptor *descr, u16 index)
+{
+	ssize_t result;
+	struct device *dev = &wa->usb_iface->dev;
+
+	/* Get the RPIPE descriptor -- we cannot use the usb_get_descriptor()
+	 * function because the arguments are different.
+	 */
+	result = usb_control_msg(
+		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
+		USB_REQ_GET_DESCRIPTOR,
+		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_RPIPE,
+		USB_DT_RPIPE<<8, index, descr, sizeof(*descr),
+		USB_CTRL_GET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "rpipe %u: get descriptor failed: %d\n",
+			index, (int)result);
+		goto error;
+	}
+	if (result < sizeof(*descr)) {
+		dev_err(dev, "rpipe %u: got short descriptor "
+			"(%zd vs %zd bytes needed)\n",
+			index, result, sizeof(*descr));
+		result = -EINVAL;
+		goto error;
+	}
+	result = 0;
+
+error:
+	return result;
+}
+
+/*
+ *
+ * The descriptor is assumed to be properly initialized (ie: you got
+ * it through __rpipe_get_descr()).
+ */
+static int __rpipe_set_descr(struct wahc *wa,
+			     struct usb_rpipe_descriptor *descr, u16 index)
+{
+	ssize_t result;
+	struct device *dev = &wa->usb_iface->dev;
+
+	/* we cannot use the usb_get_descriptor() function because the
+	 * arguments are different.
+	 */
+	result = usb_control_msg(
+		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+		USB_REQ_SET_DESCRIPTOR,
+		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
+		USB_DT_RPIPE<<8, index, descr, sizeof(*descr),
+		USB_CTRL_SET_TIMEOUT);
+	if (result < 0) {
+		dev_err(dev, "rpipe %u: set descriptor failed: %d\n",
+			index, (int)result);
+		goto error;
+	}
+	if (result < sizeof(*descr)) {
+		dev_err(dev, "rpipe %u: sent short descriptor "
+			"(%zd vs %zd bytes required)\n",
+			index, result, sizeof(*descr));
+		result = -EINVAL;
+		goto error;
+	}
+	result = 0;
+
+error:
+	return result;
+
+}
+
+static void rpipe_init(struct wa_rpipe *rpipe)
+{
+	kref_init(&rpipe->refcnt);
+	spin_lock_init(&rpipe->seg_lock);
+	INIT_LIST_HEAD(&rpipe->seg_list);
+	INIT_LIST_HEAD(&rpipe->list_node);
+}
+
+static unsigned rpipe_get_idx(struct wahc *wa, unsigned rpipe_idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&wa->rpipe_lock, flags);
+	rpipe_idx = find_next_zero_bit(wa->rpipe_bm, wa->rpipes, rpipe_idx);
+	if (rpipe_idx < wa->rpipes)
+		set_bit(rpipe_idx, wa->rpipe_bm);
+	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
+
+	return rpipe_idx;
+}
+
+static void rpipe_put_idx(struct wahc *wa, unsigned rpipe_idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&wa->rpipe_lock, flags);
+	clear_bit(rpipe_idx, wa->rpipe_bm);
+	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
+}
+
+void rpipe_destroy(struct kref *_rpipe)
+{
+	struct wa_rpipe *rpipe = container_of(_rpipe, struct wa_rpipe, refcnt);
+	u8 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
+
+	if (rpipe->ep)
+		rpipe->ep->hcpriv = NULL;
+	rpipe_put_idx(rpipe->wa, index);
+	wa_put(rpipe->wa);
+	kfree(rpipe);
+}
+EXPORT_SYMBOL_GPL(rpipe_destroy);
+
+/*
+ * Locate an idle rpipe, create an structure for it and return it
+ *
+ * @wa	  is referenced and unlocked
+ * @crs   enum rpipe_attr, required endpoint characteristics
+ *
+ * The rpipe can be used only sequentially (not in parallel).
+ *
+ * The rpipe is moved into the "ready" state.
+ */
+static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs,
+			  gfp_t gfp)
+{
+	int result;
+	unsigned rpipe_idx;
+	struct wa_rpipe *rpipe;
+	struct device *dev = &wa->usb_iface->dev;
+
+	rpipe = kzalloc(sizeof(*rpipe), gfp);
+	if (rpipe == NULL)
+		return -ENOMEM;
+	rpipe_init(rpipe);
+
+	/* Look for an idle pipe */
+	for (rpipe_idx = 0; rpipe_idx < wa->rpipes; rpipe_idx++) {
+		rpipe_idx = rpipe_get_idx(wa, rpipe_idx);
+		if (rpipe_idx >= wa->rpipes)	/* no more pipes :( */
+			break;
+		result =  __rpipe_get_descr(wa, &rpipe->descr, rpipe_idx);
+		if (result < 0)
+			dev_err(dev, "Can't get descriptor for rpipe %u: %d\n",
+				rpipe_idx, result);
+		else if ((rpipe->descr.bmCharacteristics & crs) != 0)
+			goto found;
+		rpipe_put_idx(wa, rpipe_idx);
+	}
+	*prpipe = NULL;
+	kfree(rpipe);
+	return -ENXIO;
+
+found:
+	set_bit(rpipe_idx, wa->rpipe_bm);
+	rpipe->wa = wa_get(wa);
+	*prpipe = rpipe;
+	return 0;
+}
+
+static int __rpipe_reset(struct wahc *wa, unsigned index)
+{
+	int result;
+	struct device *dev = &wa->usb_iface->dev;
+
+	result = usb_control_msg(
+		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+		USB_REQ_RPIPE_RESET,
+		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
+		0, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
+	if (result < 0)
+		dev_err(dev, "rpipe %u: reset failed: %d\n",
+			index, result);
+	return result;
+}
+
+/*
+ * Fake companion descriptor for ep0
+ *
+ * See WUSB1.0[7.4.4], most of this is zero for bulk/int/ctl
+ */
+static struct usb_wireless_ep_comp_descriptor epc0 = {
+	.bLength = sizeof(epc0),
+	.bDescriptorType = USB_DT_WIRELESS_ENDPOINT_COMP,
+	.bMaxBurst = 1,
+	.bMaxSequence = 2,
+};
+
+/*
+ * Look for EP companion descriptor
+ *
+ * Get there, look for Inara in the endpoint's extra descriptors
+ */
+static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find(
+		struct device *dev, struct usb_host_endpoint *ep)
+{
+	void *itr;
+	size_t itr_size;
+	struct usb_descriptor_header *hdr;
+	struct usb_wireless_ep_comp_descriptor *epcd;
+
+	if (ep->desc.bEndpointAddress == 0) {
+		epcd = &epc0;
+		goto out;
+	}
+	itr = ep->extra;
+	itr_size = ep->extralen;
+	epcd = NULL;
+	while (itr_size > 0) {
+		if (itr_size < sizeof(*hdr)) {
+			dev_err(dev, "HW Bug? ep 0x%02x: extra descriptors "
+				"at offset %zu: only %zu bytes left\n",
+				ep->desc.bEndpointAddress,
+				itr - (void *) ep->extra, itr_size);
+			break;
+		}
+		hdr = itr;
+		if (hdr->bDescriptorType == USB_DT_WIRELESS_ENDPOINT_COMP) {
+			epcd = itr;
+			break;
+		}
+		if (hdr->bLength > itr_size) {
+			dev_err(dev, "HW Bug? ep 0x%02x: extra descriptor "
+				"at offset %zu (type 0x%02x) "
+				"length %d but only %zu bytes left\n",
+				ep->desc.bEndpointAddress,
+				itr - (void *) ep->extra, hdr->bDescriptorType,
+				hdr->bLength, itr_size);
+			break;
+		}
+		itr += hdr->bLength;
+		itr_size -= hdr->bLength;
+	}
+out:
+	return epcd;
+}
+
+/*
+ * Aim an rpipe to its device & endpoint destination
+ *
+ * Make sure we change the address to unauthenticated if the device
+ * is WUSB and it is not authenticated.
+ */
+static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa,
+		     struct usb_host_endpoint *ep, struct urb *urb, gfp_t gfp)
+{
+	int result = -ENOMSG;	/* better code for lack of companion? */
+	struct device *dev = &wa->usb_iface->dev;
+	struct usb_device *usb_dev = urb->dev;
+	struct usb_wireless_ep_comp_descriptor *epcd;
+	u32 ack_window, epcd_max_sequence;
+	u8 unauth;
+
+	epcd = rpipe_epc_find(dev, ep);
+	if (epcd == NULL) {
+		dev_err(dev, "ep 0x%02x: can't find companion descriptor\n",
+			ep->desc.bEndpointAddress);
+		goto error;
+	}
+	unauth = usb_dev->wusb && !usb_dev->authenticated ? 0x80 : 0;
+	__rpipe_reset(wa, le16_to_cpu(rpipe->descr.wRPipeIndex));
+	atomic_set(&rpipe->segs_available,
+		le16_to_cpu(rpipe->descr.wRequests));
+	/* FIXME: block allocation system; request with queuing and timeout */
+	/* FIXME: compute so seg_size > ep->maxpktsize */
+	rpipe->descr.wBlocks = cpu_to_le16(16);		/* given */
+	/* ep0 maxpktsize is 0x200 (WUSB1.0[4.8.1]) */
+	if (usb_endpoint_xfer_isoc(&ep->desc))
+		rpipe->descr.wMaxPacketSize = epcd->wOverTheAirPacketSize;
+	else
+		rpipe->descr.wMaxPacketSize = ep->desc.wMaxPacketSize;
+
+	rpipe->descr.hwa_bMaxBurst = max(min_t(unsigned int,
+				epcd->bMaxBurst, 16U), 1U);
+	rpipe->descr.hwa_bDeviceInfoIndex =
+			wusb_port_no_to_idx(urb->dev->portnum);
+	/* FIXME: use maximum speed as supported or recommended by device */
+	rpipe->descr.bSpeed = usb_pipeendpoint(urb->pipe) == 0 ?
+		UWB_PHY_RATE_53 : UWB_PHY_RATE_200;
+
+	dev_dbg(dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
+		urb->dev->devnum, urb->dev->devnum | unauth,
+		le16_to_cpu(rpipe->descr.wRPipeIndex),
+		usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
+
+	rpipe->descr.hwa_reserved = 0;
+
+	rpipe->descr.bEndpointAddress = ep->desc.bEndpointAddress;
+	/* FIXME: bDataSequence */
+	rpipe->descr.bDataSequence = 0;
+
+	/* start with base window of hwa_bMaxBurst bits starting at 0. */
+	ack_window = 0xFFFFFFFF >> (32 - rpipe->descr.hwa_bMaxBurst);
+	rpipe->descr.dwCurrentWindow = cpu_to_le32(ack_window);
+	epcd_max_sequence = max(min_t(unsigned int,
+			epcd->bMaxSequence, 32U), 2U);
+	rpipe->descr.bMaxDataSequence = epcd_max_sequence - 1;
+	rpipe->descr.bInterval = ep->desc.bInterval;
+	if (usb_endpoint_xfer_isoc(&ep->desc))
+		rpipe->descr.bOverTheAirInterval = epcd->bOverTheAirInterval;
+	else
+		rpipe->descr.bOverTheAirInterval = 0;	/* 0 if not isoc */
+	/* FIXME: xmit power & preamble blah blah */
+	rpipe->descr.bmAttribute = (ep->desc.bmAttributes &
+					USB_ENDPOINT_XFERTYPE_MASK);
+	/* rpipe->descr.bmCharacteristics RO */
+	rpipe->descr.bmRetryOptions = (wa->wusb->retry_count & 0xF);
+	/* FIXME: use for assessing link quality? */
+	rpipe->descr.wNumTransactionErrors = 0;
+	result = __rpipe_set_descr(wa, &rpipe->descr,
+				   le16_to_cpu(rpipe->descr.wRPipeIndex));
+	if (result < 0) {
+		dev_err(dev, "Cannot aim rpipe: %d\n", result);
+		goto error;
+	}
+	result = 0;
+error:
+	return result;
+}
+
+/*
+ * Check an aimed rpipe to make sure it points to where we want
+ *
+ * We use bit 19 of the Linux USB pipe bitmap for unauth vs auth
+ * space; when it is like that, we or 0x80 to make an unauth address.
+ */
+static int rpipe_check_aim(const struct wa_rpipe *rpipe, const struct wahc *wa,
+			   const struct usb_host_endpoint *ep,
+			   const struct urb *urb, gfp_t gfp)
+{
+	int result = 0;
+	struct device *dev = &wa->usb_iface->dev;
+	u8 portnum = wusb_port_no_to_idx(urb->dev->portnum);
+
+#define AIM_CHECK(rdf, val, text)					\
+	do {								\
+		if (rpipe->descr.rdf != (val)) {			\
+			dev_err(dev,					\
+				"rpipe aim discrepancy: " #rdf " " text "\n", \
+				rpipe->descr.rdf, (val));		\
+			result = -EINVAL;				\
+			WARN_ON(1);					\
+		}							\
+	} while (0)
+	AIM_CHECK(hwa_bDeviceInfoIndex, portnum, "(%u vs %u)");
+	AIM_CHECK(bSpeed, usb_pipeendpoint(urb->pipe) == 0 ?
+			UWB_PHY_RATE_53 : UWB_PHY_RATE_200,
+		  "(%u vs %u)");
+	AIM_CHECK(bEndpointAddress, ep->desc.bEndpointAddress, "(%u vs %u)");
+	AIM_CHECK(bInterval, ep->desc.bInterval, "(%u vs %u)");
+	AIM_CHECK(bmAttribute, ep->desc.bmAttributes & 0x03, "(%u vs %u)");
+#undef AIM_CHECK
+	return result;
+}
+
+#ifndef CONFIG_BUG
+#define CONFIG_BUG 0
+#endif
+
+/*
+ * Make sure there is an rpipe allocated for an endpoint
+ *
+ * If already allocated, we just refcount it; if not, we get an
+ * idle one, aim it to the right location and take it.
+ *
+ * Attaches to ep->hcpriv and rpipe->ep to ep.
+ */
+int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep,
+		    struct urb *urb, gfp_t gfp)
+{
+	int result = 0;
+	struct device *dev = &wa->usb_iface->dev;
+	struct wa_rpipe *rpipe;
+	u8 eptype;
+
+	mutex_lock(&wa->rpipe_mutex);
+	rpipe = ep->hcpriv;
+	if (rpipe != NULL) {
+		if (CONFIG_BUG == 1) {
+			result = rpipe_check_aim(rpipe, wa, ep, urb, gfp);
+			if (result < 0)
+				goto error;
+		}
+		__rpipe_get(rpipe);
+		dev_dbg(dev, "ep 0x%02x: reusing rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
+	} else {
+		/* hmm, assign idle rpipe, aim it */
+		result = -ENOBUFS;
+		eptype = ep->desc.bmAttributes & 0x03;
+		result = rpipe_get_idle(&rpipe, wa, 1 << eptype, gfp);
+		if (result < 0)
+			goto error;
+		result = rpipe_aim(rpipe, wa, ep, urb, gfp);
+		if (result < 0) {
+			rpipe_put(rpipe);
+			goto error;
+		}
+		ep->hcpriv = rpipe;
+		rpipe->ep = ep;
+		__rpipe_get(rpipe);	/* for caching into ep->hcpriv */
+		dev_dbg(dev, "ep 0x%02x: using rpipe %u\n",
+			ep->desc.bEndpointAddress,
+			le16_to_cpu(rpipe->descr.wRPipeIndex));
+	}
+error:
+	mutex_unlock(&wa->rpipe_mutex);
+	return result;
+}
+
+/*
+ * Allocate the bitmap for each rpipe.
+ */
+int wa_rpipes_create(struct wahc *wa)
+{
+	wa->rpipes = le16_to_cpu(wa->wa_descr->wNumRPipes);
+	wa->rpipe_bm = bitmap_zalloc(wa->rpipes, GFP_KERNEL);
+	if (wa->rpipe_bm == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void wa_rpipes_destroy(struct wahc *wa)
+{
+	struct device *dev = &wa->usb_iface->dev;
+
+	if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) {
+		WARN_ON(1);
+		dev_err(dev, "BUG: pipes not released on exit: %*pb\n",
+			wa->rpipes, wa->rpipe_bm);
+	}
+	bitmap_free(wa->rpipe_bm);
+}
+
+/*
+ * Release resources allocated for an endpoint
+ *
+ * If there is an associated rpipe to this endpoint, Abort any pending
+ * transfers and put it. If the rpipe ends up being destroyed,
+ * __rpipe_destroy() will cleanup ep->hcpriv.
+ *
+ * This is called before calling hcd->stop(), so you don't need to do
+ * anything else in there.
+ */
+void rpipe_ep_disable(struct wahc *wa, struct usb_host_endpoint *ep)
+{
+	struct wa_rpipe *rpipe;
+
+	mutex_lock(&wa->rpipe_mutex);
+	rpipe = ep->hcpriv;
+	if (rpipe != NULL) {
+		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
+
+		usb_control_msg(
+			wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			USB_REQ_RPIPE_ABORT,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
+			0, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
+		rpipe_put(rpipe);
+	}
+	mutex_unlock(&wa->rpipe_mutex);
+}
+EXPORT_SYMBOL_GPL(rpipe_ep_disable);
+
+/* Clear the stalled status of an RPIPE. */
+void rpipe_clear_feature_stalled(struct wahc *wa, struct usb_host_endpoint *ep)
+{
+	struct wa_rpipe *rpipe;
+
+	mutex_lock(&wa->rpipe_mutex);
+	rpipe = ep->hcpriv;
+	if (rpipe != NULL) {
+		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
+
+		usb_control_msg(
+			wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
+			USB_REQ_CLEAR_FEATURE,
+			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
+			RPIPE_STALL, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
+	}
+	mutex_unlock(&wa->rpipe_mutex);
+}
+EXPORT_SYMBOL_GPL(rpipe_clear_feature_stalled);
diff --git a/drivers/staging/wusbcore/wa-xfer.c b/drivers/staging/wusbcore/wa-xfer.c
new file mode 100644
index 000000000000..abf88cea37bb
--- /dev/null
+++ b/drivers/staging/wusbcore/wa-xfer.c
@@ -0,0 +1,2927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * WUSB Wire Adapter
+ * Data transfer and URB enqueing
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * How transfers work: get a buffer, break it up in segments (segment
+ * size is a multiple of the maxpacket size). For each segment issue a
+ * segment request (struct wa_xfer_*), then send the data buffer if
+ * out or nothing if in (all over the DTO endpoint).
+ *
+ * For each submitted segment request, a notification will come over
+ * the NEP endpoint and a transfer result (struct xfer_result) will
+ * arrive in the DTI URB. Read it, get the xfer ID, see if there is
+ * data coming (inbound transfer), schedule a read and handle it.
+ *
+ * Sounds simple, it is a pain to implement.
+ *
+ *
+ * ENTRY POINTS
+ *
+ *   FIXME
+ *
+ * LIFE CYCLE / STATE DIAGRAM
+ *
+ *   FIXME
+ *
+ * THIS CODE IS DISGUSTING
+ *
+ *   Warned you are; it's my second try and still not happy with it.
+ *
+ * NOTES:
+ *
+ *   - No iso
+ *
+ *   - Supports DMA xfers, control, bulk and maybe interrupt
+ *
+ *   - Does not recycle unused rpipes
+ *
+ *     An rpipe is assigned to an endpoint the first time it is used,
+ *     and then it's there, assigned, until the endpoint is disabled
+ *     (destroyed [{h,d}wahc_op_ep_disable()]. The assignment of the
+ *     rpipe to the endpoint is done under the wa->rpipe_sem semaphore
+ *     (should be a mutex).
+ *
+ *     Two methods it could be done:
+ *
+ *     (a) set up a timer every time an rpipe's use count drops to 1
+ *         (which means unused) or when a transfer ends. Reset the
+ *         timer when a xfer is queued. If the timer expires, release
+ *         the rpipe [see rpipe_ep_disable()].
+ *
+ *     (b) when looking for free rpipes to attach [rpipe_get_by_ep()],
+ *         when none are found go over the list, check their endpoint
+ *         and their activity record (if no last-xfer-done-ts in the
+ *         last x seconds) take it
+ *
+ *     However, due to the fact that we have a set of limited
+ *     resources (max-segments-at-the-same-time per xfer,
+ *     xfers-per-ripe, blocks-per-rpipe, rpipes-per-host), at the end
+ *     we are going to have to rebuild all this based on an scheduler,
+ *     to where we have a list of transactions to do and based on the
+ *     availability of the different required components (blocks,
+ *     rpipes, segment slots, etc), we go scheduling them. Painful.
+ */
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/hash.h>
+#include <linux/ratelimit.h>
+#include <linux/export.h>
+#include <linux/scatterlist.h>
+
+#include "wa-hc.h"
+#include "wusbhc.h"
+
+enum {
+	/* [WUSB] section 8.3.3 allocates 7 bits for the segment index. */
+	WA_SEGS_MAX = 128,
+};
+
+enum wa_seg_status {
+	WA_SEG_NOTREADY,
+	WA_SEG_READY,
+	WA_SEG_DELAYED,
+	WA_SEG_SUBMITTED,
+	WA_SEG_PENDING,
+	WA_SEG_DTI_PENDING,
+	WA_SEG_DONE,
+	WA_SEG_ERROR,
+	WA_SEG_ABORTED,
+};
+
+static void wa_xfer_delayed_run(struct wa_rpipe *);
+static int __wa_xfer_delayed_run(struct wa_rpipe *rpipe, int *dto_waiting);
+
+/*
+ * Life cycle governed by 'struct urb' (the refcount of the struct is
+ * that of the 'struct urb' and usb_free_urb() would free the whole
+ * struct).
+ */
+struct wa_seg {
+	struct urb tr_urb;		/* transfer request urb. */
+	struct urb *isoc_pack_desc_urb;	/* for isoc packet descriptor. */
+	struct urb *dto_urb;		/* for data output. */
+	struct list_head list_node;	/* for rpipe->req_list */
+	struct wa_xfer *xfer;		/* out xfer */
+	u8 index;			/* which segment we are */
+	int isoc_frame_count;	/* number of isoc frames in this segment. */
+	int isoc_frame_offset;	/* starting frame offset in the xfer URB. */
+	/* Isoc frame that the current transfer buffer corresponds to. */
+	int isoc_frame_index;
+	int isoc_size;	/* size of all isoc frames sent by this seg. */
+	enum wa_seg_status status;
+	ssize_t result;			/* bytes xfered or error */
+	struct wa_xfer_hdr xfer_hdr;
+};
+
+static inline void wa_seg_init(struct wa_seg *seg)
+{
+	usb_init_urb(&seg->tr_urb);
+
+	/* set the remaining memory to 0. */
+	memset(((void *)seg) + sizeof(seg->tr_urb), 0,
+		sizeof(*seg) - sizeof(seg->tr_urb));
+}
+
+/*
+ * Protected by xfer->lock
+ *
+ */
+struct wa_xfer {
+	struct kref refcnt;
+	struct list_head list_node;
+	spinlock_t lock;
+	u32 id;
+
+	struct wahc *wa;		/* Wire adapter we are plugged to */
+	struct usb_host_endpoint *ep;
+	struct urb *urb;		/* URB we are transferring for */
+	struct wa_seg **seg;		/* transfer segments */
+	u8 segs, segs_submitted, segs_done;
+	unsigned is_inbound:1;
+	unsigned is_dma:1;
+	size_t seg_size;
+	int result;
+
+	gfp_t gfp;			/* allocation mask */
+
+	struct wusb_dev *wusb_dev;	/* for activity timestamps */
+};
+
+static void __wa_populate_dto_urb_isoc(struct wa_xfer *xfer,
+	struct wa_seg *seg, int curr_iso_frame);
+static void wa_complete_remaining_xfer_segs(struct wa_xfer *xfer,
+		int starting_index, enum wa_seg_status status);
+
+static inline void wa_xfer_init(struct wa_xfer *xfer)
+{
+	kref_init(&xfer->refcnt);
+	INIT_LIST_HEAD(&xfer->list_node);
+	spin_lock_init(&xfer->lock);
+}
+
+/*
+ * Destroy a transfer structure
+ *
+ * Note that freeing xfer->seg[cnt]->tr_urb will free the containing
+ * xfer->seg[cnt] memory that was allocated by __wa_xfer_setup_segs.
+ */
+static void wa_xfer_destroy(struct kref *_xfer)
+{
+	struct wa_xfer *xfer = container_of(_xfer, struct wa_xfer, refcnt);
+	if (xfer->seg) {
+		unsigned cnt;
+		for (cnt = 0; cnt < xfer->segs; cnt++) {
+			struct wa_seg *seg = xfer->seg[cnt];
+			if (seg) {
+				usb_free_urb(seg->isoc_pack_desc_urb);
+				if (seg->dto_urb) {
+					kfree(seg->dto_urb->sg);
+					usb_free_urb(seg->dto_urb);
+				}
+				usb_free_urb(&seg->tr_urb);
+			}
+		}
+		kfree(xfer->seg);
+	}
+	kfree(xfer);
+}
+
+static void wa_xfer_get(struct wa_xfer *xfer)
+{
+	kref_get(&xfer->refcnt);
+}
+
+static void wa_xfer_put(struct wa_xfer *xfer)
+{
+	kref_put(&xfer->refcnt, wa_xfer_destroy);
+}
+
+/*
+ * Try to get exclusive access to the DTO endpoint resource.  Return true
+ * if successful.
+ */
+static inline int __wa_dto_try_get(struct wahc *wa)
+{
+	return (test_and_set_bit(0, &wa->dto_in_use) == 0);
+}
+
+/* Release the DTO endpoint resource. */
+static inline void __wa_dto_put(struct wahc *wa)
+{
+	clear_bit_unlock(0, &wa->dto_in_use);
+}
+
+/* Service RPIPEs that are waiting on the DTO resource. */
+static void wa_check_for_delayed_rpipes(struct wahc *wa)
+{
+	unsigned long flags;
+	int dto_waiting = 0;
+	struct wa_rpipe *rpipe;
+
+	spin_lock_irqsave(&wa->rpipe_lock, flags);
+	while (!list_empty(&wa->rpipe_delayed_list) && !dto_waiting) {
+		rpipe = list_first_entry(&wa->rpipe_delayed_list,
+				struct wa_rpipe, list_node);
+		__wa_xfer_delayed_run(rpipe, &dto_waiting);
+		/* remove this RPIPE from the list if it is not waiting. */
+		if (!dto_waiting) {
+			pr_debug("%s: RPIPE %d serviced and removed from delayed list.\n",
+				__func__,
+				le16_to_cpu(rpipe->descr.wRPipeIndex));
+			list_del_init(&rpipe->list_node);
+		}
+	}
+	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
+}
+
+/* add this RPIPE to the end of the delayed RPIPE list. */
+static void wa_add_delayed_rpipe(struct wahc *wa, struct wa_rpipe *rpipe)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&wa->rpipe_lock, flags);
+	/* add rpipe to the list if it is not already on it. */
+	if (list_empty(&rpipe->list_node)) {
+		pr_debug("%s: adding RPIPE %d to the delayed list.\n",
+			__func__, le16_to_cpu(rpipe->descr.wRPipeIndex));
+		list_add_tail(&rpipe->list_node, &wa->rpipe_delayed_list);
+	}
+	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
+}
+
+/*
+ * xfer is referenced
+ *
+ * xfer->lock has to be unlocked
+ *
+ * We take xfer->lock for setting the result; this is a barrier
+ * against drivers/usb/core/hcd.c:unlink1() being called after we call
+ * usb_hcd_giveback_urb() and wa_urb_dequeue() trying to get a
+ * reference to the transfer.
+ */
+static void wa_xfer_giveback(struct wa_xfer *xfer)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&xfer->wa->xfer_list_lock, flags);
+	list_del_init(&xfer->list_node);
+	usb_hcd_unlink_urb_from_ep(&(xfer->wa->wusb->usb_hcd), xfer->urb);
+	spin_unlock_irqrestore(&xfer->wa->xfer_list_lock, flags);
+	/* FIXME: segmentation broken -- kills DWA */
+	wusbhc_giveback_urb(xfer->wa->wusb, xfer->urb, xfer->result);
+	wa_put(xfer->wa);
+	wa_xfer_put(xfer);
+}
+
+/*
+ * xfer is referenced
+ *
+ * xfer->lock has to be unlocked
+ */
+static void wa_xfer_completion(struct wa_xfer *xfer)
+{
+	if (xfer->wusb_dev)
+		wusb_dev_put(xfer->wusb_dev);
+	rpipe_put(xfer->ep->hcpriv);
+	wa_xfer_giveback(xfer);
+}
+
+/*
+ * Initialize a transfer's ID
+ *
+ * We need to use a sequential number; if we use the pointer or the
+ * hash of the pointer, it can repeat over sequential transfers and
+ * then it will confuse the HWA....wonder why in hell they put a 32
+ * bit handle in there then.
+ */
+static void wa_xfer_id_init(struct wa_xfer *xfer)
+{
+	xfer->id = atomic_add_return(1, &xfer->wa->xfer_id_count);
+}
+
+/* Return the xfer's ID. */
+static inline u32 wa_xfer_id(struct wa_xfer *xfer)
+{
+	return xfer->id;
+}
+
+/* Return the xfer's ID in transport format (little endian). */
+static inline __le32 wa_xfer_id_le32(struct wa_xfer *xfer)
+{
+	return cpu_to_le32(xfer->id);
+}
+
+/*
+ * If transfer is done, wrap it up and return true
+ *
+ * xfer->lock has to be locked
+ */
+static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
+{
+	struct device *dev = &xfer->wa->usb_iface->dev;
+	unsigned result, cnt;
+	struct wa_seg *seg;
+	struct urb *urb = xfer->urb;
+	unsigned found_short = 0;
+
+	result = xfer->segs_done == xfer->segs_submitted;
+	if (result == 0)
+		goto out;
+	urb->actual_length = 0;
+	for (cnt = 0; cnt < xfer->segs; cnt++) {
+		seg = xfer->seg[cnt];
+		switch (seg->status) {
+		case WA_SEG_DONE:
+			if (found_short && seg->result > 0) {
+				dev_dbg(dev, "xfer %p ID %08X#%u: bad short segments (%zu)\n",
+					xfer, wa_xfer_id(xfer), cnt,
+					seg->result);
+				urb->status = -EINVAL;
+				goto out;
+			}
+			urb->actual_length += seg->result;
+			if (!(usb_pipeisoc(xfer->urb->pipe))
+				&& seg->result < xfer->seg_size
+			    && cnt != xfer->segs-1)
+				found_short = 1;
+			dev_dbg(dev, "xfer %p ID %08X#%u: DONE short %d "
+				"result %zu urb->actual_length %d\n",
+				xfer, wa_xfer_id(xfer), seg->index, found_short,
+				seg->result, urb->actual_length);
+			break;
+		case WA_SEG_ERROR:
+			xfer->result = seg->result;
+			dev_dbg(dev, "xfer %p ID %08X#%u: ERROR result %zi(0x%08zX)\n",
+				xfer, wa_xfer_id(xfer), seg->index, seg->result,
+				seg->result);
+			goto out;
+		case WA_SEG_ABORTED:
+			xfer->result = seg->result;
+			dev_dbg(dev, "xfer %p ID %08X#%u: ABORTED result %zi(0x%08zX)\n",
+				xfer, wa_xfer_id(xfer), seg->index, seg->result,
+				seg->result);
+			goto out;
+		default:
+			dev_warn(dev, "xfer %p ID %08X#%u: is_done bad state %d\n",
+				 xfer, wa_xfer_id(xfer), cnt, seg->status);
+			xfer->result = -EINVAL;
+			goto out;
+		}
+	}
+	xfer->result = 0;
+out:
+	return result;
+}
+
+/*
+ * Mark the given segment as done.  Return true if this completes the xfer.
+ * This should only be called for segs that have been submitted to an RPIPE.
+ * Delayed segs are not marked as submitted so they do not need to be marked
+ * as done when cleaning up.
+ *
+ * xfer->lock has to be locked
+ */
+static unsigned __wa_xfer_mark_seg_as_done(struct wa_xfer *xfer,
+	struct wa_seg *seg, enum wa_seg_status status)
+{
+	seg->status = status;
+	xfer->segs_done++;
+
+	/* check for done. */
+	return __wa_xfer_is_done(xfer);
+}
+
+/*
+ * Search for a transfer list ID on the HCD's URB list
+ *
+ * For 32 bit architectures, we use the pointer itself; for 64 bits, a
+ * 32-bit hash of the pointer.
+ *
+ * @returns NULL if not found.
+ */
+static struct wa_xfer *wa_xfer_get_by_id(struct wahc *wa, u32 id)
+{
+	unsigned long flags;
+	struct wa_xfer *xfer_itr;
+	spin_lock_irqsave(&wa->xfer_list_lock, flags);
+	list_for_each_entry(xfer_itr, &wa->xfer_list, list_node) {
+		if (id == xfer_itr->id) {
+			wa_xfer_get(xfer_itr);
+			goto out;
+		}
+	}
+	xfer_itr = NULL;
+out:
+	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
+	return xfer_itr;
+}
+
+struct wa_xfer_abort_buffer {
+	struct urb urb;
+	struct wahc *wa;
+	struct wa_xfer_abort cmd;
+};
+
+static void __wa_xfer_abort_cb(struct urb *urb)
+{
+	struct wa_xfer_abort_buffer *b = urb->context;
+	struct wahc *wa = b->wa;
+
+	/*
+	 * If the abort request URB failed, then the HWA did not get the abort
+	 * command.  Forcibly clean up the xfer without waiting for a Transfer
+	 * Result from the HWA.
+	 */
+	if (urb->status < 0) {
+		struct wa_xfer *xfer;
+		struct device *dev = &wa->usb_iface->dev;
+
+		xfer = wa_xfer_get_by_id(wa, le32_to_cpu(b->cmd.dwTransferID));
+		dev_err(dev, "%s: Transfer Abort request failed. result: %d\n",
+			__func__, urb->status);
+		if (xfer) {
+			unsigned long flags;
+			int done, seg_index = 0;
+			struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+
+			dev_err(dev, "%s: cleaning up xfer %p ID 0x%08X.\n",
+				__func__, xfer, wa_xfer_id(xfer));
+			spin_lock_irqsave(&xfer->lock, flags);
+			/* skip done segs. */
+			while (seg_index < xfer->segs) {
+				struct wa_seg *seg = xfer->seg[seg_index];
+
+				if ((seg->status == WA_SEG_DONE) ||
+					(seg->status == WA_SEG_ERROR)) {
+					++seg_index;
+				} else {
+					break;
+				}
+			}
+			/* mark remaining segs as aborted. */
+			wa_complete_remaining_xfer_segs(xfer, seg_index,
+				WA_SEG_ABORTED);
+			done = __wa_xfer_is_done(xfer);
+			spin_unlock_irqrestore(&xfer->lock, flags);
+			if (done)
+				wa_xfer_completion(xfer);
+			wa_xfer_delayed_run(rpipe);
+			wa_xfer_put(xfer);
+		} else {
+			dev_err(dev, "%s: xfer ID 0x%08X already gone.\n",
+				 __func__, le32_to_cpu(b->cmd.dwTransferID));
+		}
+	}
+
+	wa_put(wa);	/* taken in __wa_xfer_abort */
+	usb_put_urb(&b->urb);
+}
+
+/*
+ * Aborts an ongoing transaction
+ *
+ * Assumes the transfer is referenced and locked and in a submitted
+ * state (mainly that there is an endpoint/rpipe assigned).
+ *
+ * The callback (see above) does nothing but freeing up the data by
+ * putting the URB. Because the URB is allocated at the head of the
+ * struct, the whole space we allocated is kfreed. *
+ */
+static int __wa_xfer_abort(struct wa_xfer *xfer)
+{
+	int result = -ENOMEM;
+	struct device *dev = &xfer->wa->usb_iface->dev;
+	struct wa_xfer_abort_buffer *b;
+	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+
+	b = kmalloc(sizeof(*b), GFP_ATOMIC);
+	if (b == NULL)
+		goto error_kmalloc;
+	b->cmd.bLength =  sizeof(b->cmd);
+	b->cmd.bRequestType = WA_XFER_ABORT;
+	b->cmd.wRPipe = rpipe->descr.wRPipeIndex;
+	b->cmd.dwTransferID = wa_xfer_id_le32(xfer);
+	b->wa = wa_get(xfer->wa);
+
+	usb_init_urb(&b->urb);
+	usb_fill_bulk_urb(&b->urb, xfer->wa->usb_dev,
+		usb_sndbulkpipe(xfer->wa->usb_dev,
+				xfer->wa->dto_epd->bEndpointAddress),
+		&b->cmd, sizeof(b->cmd), __wa_xfer_abort_cb, b);
+	result = usb_submit_urb(&b->urb, GFP_ATOMIC);
+	if (result < 0)
+		goto error_submit;
+	return result;				/* callback frees! */
+
+
+error_submit:
+	wa_put(xfer->wa);
+	if (printk_ratelimit())
+		dev_err(dev, "xfer %p: Can't submit abort request: %d\n",
+			xfer, result);
+	kfree(b);
+error_kmalloc:
+	return result;
+
+}
+
+/*
+ * Calculate the number of isoc frames starting from isoc_frame_offset
+ * that will fit a in transfer segment.
+ */
+static int __wa_seg_calculate_isoc_frame_count(struct wa_xfer *xfer,
+	int isoc_frame_offset, int *total_size)
+{
+	int segment_size = 0, frame_count = 0;
+	int index = isoc_frame_offset;
+	struct usb_iso_packet_descriptor *iso_frame_desc =
+		xfer->urb->iso_frame_desc;
+
+	while ((index < xfer->urb->number_of_packets)
+		&& ((segment_size + iso_frame_desc[index].length)
+				<= xfer->seg_size)) {
+		/*
+		 * For Alereon HWA devices, only include an isoc frame in an
+		 * out segment if it is physically contiguous with the previous
+		 * frame.  This is required because those devices expect
+		 * the isoc frames to be sent as a single USB transaction as
+		 * opposed to one transaction per frame with standard HWA.
+		 */
+		if ((xfer->wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
+			&& (xfer->is_inbound == 0)
+			&& (index > isoc_frame_offset)
+			&& ((iso_frame_desc[index - 1].offset +
+				iso_frame_desc[index - 1].length) !=
+				iso_frame_desc[index].offset))
+			break;
+
+		/* this frame fits. count it. */
+		++frame_count;
+		segment_size += iso_frame_desc[index].length;
+
+		/* move to the next isoc frame. */
+		++index;
+	}
+
+	*total_size = segment_size;
+	return frame_count;
+}
+
+/*
+ *
+ * @returns < 0 on error, transfer segment request size if ok
+ */
+static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer,
+				     enum wa_xfer_type *pxfer_type)
+{
+	ssize_t result;
+	struct device *dev = &xfer->wa->usb_iface->dev;
+	size_t maxpktsize;
+	struct urb *urb = xfer->urb;
+	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+
+	switch (rpipe->descr.bmAttribute & 0x3) {
+	case USB_ENDPOINT_XFER_CONTROL:
+		*pxfer_type = WA_XFER_TYPE_CTL;
+		result = sizeof(struct wa_xfer_ctl);
+		break;
+	case USB_ENDPOINT_XFER_INT:
+	case USB_ENDPOINT_XFER_BULK:
+		*pxfer_type = WA_XFER_TYPE_BI;
+		result = sizeof(struct wa_xfer_bi);
+		break;
+	case USB_ENDPOINT_XFER_ISOC:
+		*pxfer_type = WA_XFER_TYPE_ISO;
+		result = sizeof(struct wa_xfer_hwaiso);
+		break;
+	default:
+		/* never happens */
+		BUG();
+		result = -EINVAL;	/* shut gcc up */
+	}
+	xfer->is_inbound = urb->pipe & USB_DIR_IN ? 1 : 0;
+	xfer->is_dma = urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? 1 : 0;
+
+	maxpktsize = le16_to_cpu(rpipe->descr.wMaxPacketSize);
+	xfer->seg_size = le16_to_cpu(rpipe->descr.wBlocks)
+		* 1 << (xfer->wa->wa_descr->bRPipeBlockSize - 1);
+	/* Compute the segment size and make sure it is a multiple of
+	 * the maxpktsize (WUSB1.0[8.3.3.1])...not really too much of
+	 * a check (FIXME) */
+	if (xfer->seg_size < maxpktsize) {
+		dev_err(dev,
+			"HW BUG? seg_size %zu smaller than maxpktsize %zu\n",
+			xfer->seg_size, maxpktsize);
+		result = -EINVAL;
+		goto error;
+	}
+	xfer->seg_size = (xfer->seg_size / maxpktsize) * maxpktsize;
+	if ((rpipe->descr.bmAttribute & 0x3) == USB_ENDPOINT_XFER_ISOC) {
+		int index = 0;
+
+		xfer->segs = 0;
+		/*
+		 * loop over urb->number_of_packets to determine how many
+		 * xfer segments will be needed to send the isoc frames.
+		 */
+		while (index < urb->number_of_packets) {
+			int seg_size; /* don't care. */
+			index += __wa_seg_calculate_isoc_frame_count(xfer,
+					index, &seg_size);
+			++xfer->segs;
+		}
+	} else {
+		xfer->segs = DIV_ROUND_UP(urb->transfer_buffer_length,
+						xfer->seg_size);
+		if (xfer->segs == 0 && *pxfer_type == WA_XFER_TYPE_CTL)
+			xfer->segs = 1;
+	}
+
+	if (xfer->segs > WA_SEGS_MAX) {
+		dev_err(dev, "BUG? oops, number of segments %zu bigger than %d\n",
+			(urb->transfer_buffer_length/xfer->seg_size),
+			WA_SEGS_MAX);
+		result = -EINVAL;
+		goto error;
+	}
+error:
+	return result;
+}
+
+static void __wa_setup_isoc_packet_descr(
+		struct wa_xfer_packet_info_hwaiso *packet_desc,
+		struct wa_xfer *xfer,
+		struct wa_seg *seg) {
+	struct usb_iso_packet_descriptor *iso_frame_desc =
+		xfer->urb->iso_frame_desc;
+	int frame_index;
+
+	/* populate isoc packet descriptor. */
+	packet_desc->bPacketType = WA_XFER_ISO_PACKET_INFO;
+	packet_desc->wLength = cpu_to_le16(struct_size(packet_desc,
+					   PacketLength,
+					   seg->isoc_frame_count));
+	for (frame_index = 0; frame_index < seg->isoc_frame_count;
+		++frame_index) {
+		int offset_index = frame_index + seg->isoc_frame_offset;
+		packet_desc->PacketLength[frame_index] =
+			cpu_to_le16(iso_frame_desc[offset_index].length);
+	}
+}
+
+
+/* Fill in the common request header and xfer-type specific data. */
+static void __wa_xfer_setup_hdr0(struct wa_xfer *xfer,
+				 struct wa_xfer_hdr *xfer_hdr0,
+				 enum wa_xfer_type xfer_type,
+				 size_t xfer_hdr_size)
+{
+	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+	struct wa_seg *seg = xfer->seg[0];
+
+	xfer_hdr0 = &seg->xfer_hdr;
+	xfer_hdr0->bLength = xfer_hdr_size;
+	xfer_hdr0->bRequestType = xfer_type;
+	xfer_hdr0->wRPipe = rpipe->descr.wRPipeIndex;
+	xfer_hdr0->dwTransferID = wa_xfer_id_le32(xfer);
+	xfer_hdr0->bTransferSegment = 0;
+	switch (xfer_type) {
+	case WA_XFER_TYPE_CTL: {
+		struct wa_xfer_ctl *xfer_ctl =
+			container_of(xfer_hdr0, struct wa_xfer_ctl, hdr);
+		xfer_ctl->bmAttribute = xfer->is_inbound ? 1 : 0;
+		memcpy(&xfer_ctl->baSetupData, xfer->urb->setup_packet,
+		       sizeof(xfer_ctl->baSetupData));
+		break;
+	}
+	case WA_XFER_TYPE_BI:
+		break;
+	case WA_XFER_TYPE_ISO: {
+		struct wa_xfer_hwaiso *xfer_iso =
+			container_of(xfer_hdr0, struct wa_xfer_hwaiso, hdr);
+		struct wa_xfer_packet_info_hwaiso *packet_desc =
+			((void *)xfer_iso) + xfer_hdr_size;
+
+		/* populate the isoc section of the transfer request. */
+		xfer_iso->dwNumOfPackets = cpu_to_le32(seg->isoc_frame_count);
+		/* populate isoc packet descriptor. */
+		__wa_setup_isoc_packet_descr(packet_desc, xfer, seg);
+		break;
+	}
+	default:
+		BUG();
+	};
+}
+
+/*
+ * Callback for the OUT data phase of the segment request
+ *
+ * Check wa_seg_tr_cb(); most comments also apply here because this
+ * function does almost the same thing and they work closely
+ * together.
+ *
+ * If the seg request has failed but this DTO phase has succeeded,
+ * wa_seg_tr_cb() has already failed the segment and moved the
+ * status to WA_SEG_ERROR, so this will go through 'case 0' and
+ * effectively do nothing.
+ */
+static void wa_seg_dto_cb(struct urb *urb)
+{
+	struct wa_seg *seg = urb->context;
+	struct wa_xfer *xfer = seg->xfer;
+	struct wahc *wa;
+	struct device *dev;
+	struct wa_rpipe *rpipe;
+	unsigned long flags;
+	unsigned rpipe_ready = 0;
+	int data_send_done = 1, release_dto = 0, holding_dto = 0;
+	u8 done = 0;
+	int result;
+
+	/* free the sg if it was used. */
+	kfree(urb->sg);
+	urb->sg = NULL;
+
+	spin_lock_irqsave(&xfer->lock, flags);
+	wa = xfer->wa;
+	dev = &wa->usb_iface->dev;
+	if (usb_pipeisoc(xfer->urb->pipe)) {
+		/* Alereon HWA sends all isoc frames in a single transfer. */
+		if (wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
+			seg->isoc_frame_index += seg->isoc_frame_count;
+		else
+			seg->isoc_frame_index += 1;
+		if (seg->isoc_frame_index < seg->isoc_frame_count) {
+			data_send_done = 0;
+			holding_dto = 1; /* checked in error cases. */
+			/*
+			 * if this is the last isoc frame of the segment, we
+			 * can release DTO after sending this frame.
+			 */
+			if ((seg->isoc_frame_index + 1) >=
+				seg->isoc_frame_count)
+				release_dto = 1;
+		}
+		dev_dbg(dev, "xfer 0x%08X#%u: isoc frame = %d, holding_dto = %d, release_dto = %d.\n",
+			wa_xfer_id(xfer), seg->index, seg->isoc_frame_index,
+			holding_dto, release_dto);
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+
+	switch (urb->status) {
+	case 0:
+		spin_lock_irqsave(&xfer->lock, flags);
+		seg->result += urb->actual_length;
+		if (data_send_done) {
+			dev_dbg(dev, "xfer 0x%08X#%u: data out done (%zu bytes)\n",
+				wa_xfer_id(xfer), seg->index, seg->result);
+			if (seg->status < WA_SEG_PENDING)
+				seg->status = WA_SEG_PENDING;
+		} else {
+			/* should only hit this for isoc xfers. */
+			/*
+			 * Populate the dto URB with the next isoc frame buffer,
+			 * send the URB and release DTO if we no longer need it.
+			 */
+			 __wa_populate_dto_urb_isoc(xfer, seg,
+				seg->isoc_frame_offset + seg->isoc_frame_index);
+
+			/* resubmit the URB with the next isoc frame. */
+			/* take a ref on resubmit. */
+			wa_xfer_get(xfer);
+			result = usb_submit_urb(seg->dto_urb, GFP_ATOMIC);
+			if (result < 0) {
+				dev_err(dev, "xfer 0x%08X#%u: DTO submit failed: %d\n",
+				       wa_xfer_id(xfer), seg->index, result);
+				spin_unlock_irqrestore(&xfer->lock, flags);
+				goto error_dto_submit;
+			}
+		}
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (release_dto) {
+			__wa_dto_put(wa);
+			wa_check_for_delayed_rpipes(wa);
+		}
+		break;
+	case -ECONNRESET:	/* URB unlinked; no need to do anything */
+	case -ENOENT:		/* as it was done by the who unlinked us */
+		if (holding_dto) {
+			__wa_dto_put(wa);
+			wa_check_for_delayed_rpipes(wa);
+		}
+		break;
+	default:		/* Other errors ... */
+		dev_err(dev, "xfer 0x%08X#%u: data out error %d\n",
+			wa_xfer_id(xfer), seg->index, urb->status);
+		goto error_default;
+	}
+
+	/* taken when this URB was submitted. */
+	wa_xfer_put(xfer);
+	return;
+
+error_dto_submit:
+	/* taken on resubmit attempt. */
+	wa_xfer_put(xfer);
+error_default:
+	spin_lock_irqsave(&xfer->lock, flags);
+	rpipe = xfer->ep->hcpriv;
+	if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
+		    EDC_ERROR_TIMEFRAME)){
+		dev_err(dev, "DTO: URB max acceptable errors exceeded, resetting device\n");
+		wa_reset_all(wa);
+	}
+	if (seg->status != WA_SEG_ERROR) {
+		seg->result = urb->status;
+		__wa_xfer_abort(xfer);
+		rpipe_ready = rpipe_avail_inc(rpipe);
+		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_ERROR);
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	if (holding_dto) {
+		__wa_dto_put(wa);
+		wa_check_for_delayed_rpipes(wa);
+	}
+	if (done)
+		wa_xfer_completion(xfer);
+	if (rpipe_ready)
+		wa_xfer_delayed_run(rpipe);
+	/* taken when this URB was submitted. */
+	wa_xfer_put(xfer);
+}
+
+/*
+ * Callback for the isoc packet descriptor phase of the segment request
+ *
+ * Check wa_seg_tr_cb(); most comments also apply here because this
+ * function does almost the same thing and they work closely
+ * together.
+ *
+ * If the seg request has failed but this phase has succeeded,
+ * wa_seg_tr_cb() has already failed the segment and moved the
+ * status to WA_SEG_ERROR, so this will go through 'case 0' and
+ * effectively do nothing.
+ */
+static void wa_seg_iso_pack_desc_cb(struct urb *urb)
+{
+	struct wa_seg *seg = urb->context;
+	struct wa_xfer *xfer = seg->xfer;
+	struct wahc *wa;
+	struct device *dev;
+	struct wa_rpipe *rpipe;
+	unsigned long flags;
+	unsigned rpipe_ready = 0;
+	u8 done = 0;
+
+	switch (urb->status) {
+	case 0:
+		spin_lock_irqsave(&xfer->lock, flags);
+		wa = xfer->wa;
+		dev = &wa->usb_iface->dev;
+		dev_dbg(dev, "iso xfer %08X#%u: packet descriptor done\n",
+			wa_xfer_id(xfer), seg->index);
+		if (xfer->is_inbound && seg->status < WA_SEG_PENDING)
+			seg->status = WA_SEG_PENDING;
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		break;
+	case -ECONNRESET:	/* URB unlinked; no need to do anything */
+	case -ENOENT:		/* as it was done by the who unlinked us */
+		break;
+	default:		/* Other errors ... */
+		spin_lock_irqsave(&xfer->lock, flags);
+		wa = xfer->wa;
+		dev = &wa->usb_iface->dev;
+		rpipe = xfer->ep->hcpriv;
+		pr_err_ratelimited("iso xfer %08X#%u: packet descriptor error %d\n",
+				wa_xfer_id(xfer), seg->index, urb->status);
+		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME)){
+			dev_err(dev, "iso xfer: URB max acceptable errors exceeded, resetting device\n");
+			wa_reset_all(wa);
+		}
+		if (seg->status != WA_SEG_ERROR) {
+			usb_unlink_urb(seg->dto_urb);
+			seg->result = urb->status;
+			__wa_xfer_abort(xfer);
+			rpipe_ready = rpipe_avail_inc(rpipe);
+			done = __wa_xfer_mark_seg_as_done(xfer, seg,
+					WA_SEG_ERROR);
+		}
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (done)
+			wa_xfer_completion(xfer);
+		if (rpipe_ready)
+			wa_xfer_delayed_run(rpipe);
+	}
+	/* taken when this URB was submitted. */
+	wa_xfer_put(xfer);
+}
+
+/*
+ * Callback for the segment request
+ *
+ * If successful transition state (unless already transitioned or
+ * outbound transfer); otherwise, take a note of the error, mark this
+ * segment done and try completion.
+ *
+ * Note we don't access until we are sure that the transfer hasn't
+ * been cancelled (ECONNRESET, ENOENT), which could mean that
+ * seg->xfer could be already gone.
+ *
+ * We have to check before setting the status to WA_SEG_PENDING
+ * because sometimes the xfer result callback arrives before this
+ * callback (geeeeeeze), so it might happen that we are already in
+ * another state. As well, we don't set it if the transfer is not inbound,
+ * as in that case, wa_seg_dto_cb will do it when the OUT data phase
+ * finishes.
+ */
+static void wa_seg_tr_cb(struct urb *urb)
+{
+	struct wa_seg *seg = urb->context;
+	struct wa_xfer *xfer = seg->xfer;
+	struct wahc *wa;
+	struct device *dev;
+	struct wa_rpipe *rpipe;
+	unsigned long flags;
+	unsigned rpipe_ready;
+	u8 done = 0;
+
+	switch (urb->status) {
+	case 0:
+		spin_lock_irqsave(&xfer->lock, flags);
+		wa = xfer->wa;
+		dev = &wa->usb_iface->dev;
+		dev_dbg(dev, "xfer %p ID 0x%08X#%u: request done\n",
+			xfer, wa_xfer_id(xfer), seg->index);
+		if (xfer->is_inbound &&
+			seg->status < WA_SEG_PENDING &&
+			!(usb_pipeisoc(xfer->urb->pipe)))
+			seg->status = WA_SEG_PENDING;
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		break;
+	case -ECONNRESET:	/* URB unlinked; no need to do anything */
+	case -ENOENT:		/* as it was done by the who unlinked us */
+		break;
+	default:		/* Other errors ... */
+		spin_lock_irqsave(&xfer->lock, flags);
+		wa = xfer->wa;
+		dev = &wa->usb_iface->dev;
+		rpipe = xfer->ep->hcpriv;
+		if (printk_ratelimit())
+			dev_err(dev, "xfer %p ID 0x%08X#%u: request error %d\n",
+				xfer, wa_xfer_id(xfer), seg->index,
+				urb->status);
+		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME)){
+			dev_err(dev, "DTO: URB max acceptable errors "
+				"exceeded, resetting device\n");
+			wa_reset_all(wa);
+		}
+		usb_unlink_urb(seg->isoc_pack_desc_urb);
+		usb_unlink_urb(seg->dto_urb);
+		seg->result = urb->status;
+		__wa_xfer_abort(xfer);
+		rpipe_ready = rpipe_avail_inc(rpipe);
+		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_ERROR);
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (done)
+			wa_xfer_completion(xfer);
+		if (rpipe_ready)
+			wa_xfer_delayed_run(rpipe);
+	}
+	/* taken when this URB was submitted. */
+	wa_xfer_put(xfer);
+}
+
+/*
+ * Allocate an SG list to store bytes_to_transfer bytes and copy the
+ * subset of the in_sg that matches the buffer subset
+ * we are about to transfer.
+ */
+static struct scatterlist *wa_xfer_create_subset_sg(struct scatterlist *in_sg,
+	const unsigned int bytes_transferred,
+	const unsigned int bytes_to_transfer, int *out_num_sgs)
+{
+	struct scatterlist *out_sg;
+	unsigned int bytes_processed = 0, offset_into_current_page_data = 0,
+		nents;
+	struct scatterlist *current_xfer_sg = in_sg;
+	struct scatterlist *current_seg_sg, *last_seg_sg;
+
+	/* skip previously transferred pages. */
+	while ((current_xfer_sg) &&
+			(bytes_processed < bytes_transferred)) {
+		bytes_processed += current_xfer_sg->length;
+
+		/* advance the sg if current segment starts on or past the
+			next page. */
+		if (bytes_processed <= bytes_transferred)
+			current_xfer_sg = sg_next(current_xfer_sg);
+	}
+
+	/* the data for the current segment starts in current_xfer_sg.
+		calculate the offset. */
+	if (bytes_processed > bytes_transferred) {
+		offset_into_current_page_data = current_xfer_sg->length -
+			(bytes_processed - bytes_transferred);
+	}
+
+	/* calculate the number of pages needed by this segment. */
+	nents = DIV_ROUND_UP((bytes_to_transfer +
+		offset_into_current_page_data +
+		current_xfer_sg->offset),
+		PAGE_SIZE);
+
+	out_sg = kmalloc((sizeof(struct scatterlist) * nents), GFP_ATOMIC);
+	if (out_sg) {
+		sg_init_table(out_sg, nents);
+
+		/* copy the portion of the incoming SG that correlates to the
+		 * data to be transferred by this segment to the segment SG. */
+		last_seg_sg = current_seg_sg = out_sg;
+		bytes_processed = 0;
+
+		/* reset nents and calculate the actual number of sg entries
+			needed. */
+		nents = 0;
+		while ((bytes_processed < bytes_to_transfer) &&
+				current_seg_sg && current_xfer_sg) {
+			unsigned int page_len = min((current_xfer_sg->length -
+				offset_into_current_page_data),
+				(bytes_to_transfer - bytes_processed));
+
+			sg_set_page(current_seg_sg, sg_page(current_xfer_sg),
+				page_len,
+				current_xfer_sg->offset +
+				offset_into_current_page_data);
+
+			bytes_processed += page_len;
+
+			last_seg_sg = current_seg_sg;
+			current_seg_sg = sg_next(current_seg_sg);
+			current_xfer_sg = sg_next(current_xfer_sg);
+
+			/* only the first page may require additional offset. */
+			offset_into_current_page_data = 0;
+			nents++;
+		}
+
+		/* update num_sgs and terminate the list since we may have
+		 *  concatenated pages. */
+		sg_mark_end(last_seg_sg);
+		*out_num_sgs = nents;
+	}
+
+	return out_sg;
+}
+
+/*
+ * Populate DMA buffer info for the isoc dto urb.
+ */
+static void __wa_populate_dto_urb_isoc(struct wa_xfer *xfer,
+	struct wa_seg *seg, int curr_iso_frame)
+{
+	seg->dto_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	seg->dto_urb->sg = NULL;
+	seg->dto_urb->num_sgs = 0;
+	/* dto urb buffer address pulled from iso_frame_desc. */
+	seg->dto_urb->transfer_dma = xfer->urb->transfer_dma +
+		xfer->urb->iso_frame_desc[curr_iso_frame].offset;
+	/* The Alereon HWA sends a single URB with all isoc segs. */
+	if (xfer->wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
+		seg->dto_urb->transfer_buffer_length = seg->isoc_size;
+	else
+		seg->dto_urb->transfer_buffer_length =
+			xfer->urb->iso_frame_desc[curr_iso_frame].length;
+}
+
+/*
+ * Populate buffer ptr and size, DMA buffer or SG list for the dto urb.
+ */
+static int __wa_populate_dto_urb(struct wa_xfer *xfer,
+	struct wa_seg *seg, size_t buf_itr_offset, size_t buf_itr_size)
+{
+	int result = 0;
+
+	if (xfer->is_dma) {
+		seg->dto_urb->transfer_dma =
+			xfer->urb->transfer_dma + buf_itr_offset;
+		seg->dto_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+		seg->dto_urb->sg = NULL;
+		seg->dto_urb->num_sgs = 0;
+	} else {
+		/* do buffer or SG processing. */
+		seg->dto_urb->transfer_flags &=
+			~URB_NO_TRANSFER_DMA_MAP;
+		/* this should always be 0 before a resubmit. */
+		seg->dto_urb->num_mapped_sgs = 0;
+
+		if (xfer->urb->transfer_buffer) {
+			seg->dto_urb->transfer_buffer =
+				xfer->urb->transfer_buffer +
+				buf_itr_offset;
+			seg->dto_urb->sg = NULL;
+			seg->dto_urb->num_sgs = 0;
+		} else {
+			seg->dto_urb->transfer_buffer = NULL;
+
+			/*
+			 * allocate an SG list to store seg_size bytes
+			 * and copy the subset of the xfer->urb->sg that
+			 * matches the buffer subset we are about to
+			 * read.
+			 */
+			seg->dto_urb->sg = wa_xfer_create_subset_sg(
+				xfer->urb->sg,
+				buf_itr_offset, buf_itr_size,
+				&(seg->dto_urb->num_sgs));
+			if (!(seg->dto_urb->sg))
+				result = -ENOMEM;
+		}
+	}
+	seg->dto_urb->transfer_buffer_length = buf_itr_size;
+
+	return result;
+}
+
+/*
+ * Allocate the segs array and initialize each of them
+ *
+ * The segments are freed by wa_xfer_destroy() when the xfer use count
+ * drops to zero; however, because each segment is given the same life
+ * cycle as the USB URB it contains, it is actually freed by
+ * usb_put_urb() on the contained USB URB (twisted, eh?).
+ */
+static int __wa_xfer_setup_segs(struct wa_xfer *xfer, size_t xfer_hdr_size)
+{
+	int result, cnt, isoc_frame_offset = 0;
+	size_t alloc_size = sizeof(*xfer->seg[0])
+		- sizeof(xfer->seg[0]->xfer_hdr) + xfer_hdr_size;
+	struct usb_device *usb_dev = xfer->wa->usb_dev;
+	const struct usb_endpoint_descriptor *dto_epd = xfer->wa->dto_epd;
+	struct wa_seg *seg;
+	size_t buf_itr, buf_size, buf_itr_size;
+
+	result = -ENOMEM;
+	xfer->seg = kcalloc(xfer->segs, sizeof(xfer->seg[0]), GFP_ATOMIC);
+	if (xfer->seg == NULL)
+		goto error_segs_kzalloc;
+	buf_itr = 0;
+	buf_size = xfer->urb->transfer_buffer_length;
+	for (cnt = 0; cnt < xfer->segs; cnt++) {
+		size_t iso_pkt_descr_size = 0;
+		int seg_isoc_frame_count = 0, seg_isoc_size = 0;
+
+		/*
+		 * Adjust the size of the segment object to contain space for
+		 * the isoc packet descriptor buffer.
+		 */
+		if (usb_pipeisoc(xfer->urb->pipe)) {
+			seg_isoc_frame_count =
+				__wa_seg_calculate_isoc_frame_count(xfer,
+					isoc_frame_offset, &seg_isoc_size);
+
+			iso_pkt_descr_size =
+				sizeof(struct wa_xfer_packet_info_hwaiso) +
+				(seg_isoc_frame_count * sizeof(__le16));
+		}
+		result = -ENOMEM;
+		seg = xfer->seg[cnt] = kmalloc(alloc_size + iso_pkt_descr_size,
+						GFP_ATOMIC);
+		if (seg == NULL)
+			goto error_seg_kmalloc;
+		wa_seg_init(seg);
+		seg->xfer = xfer;
+		seg->index = cnt;
+		usb_fill_bulk_urb(&seg->tr_urb, usb_dev,
+				  usb_sndbulkpipe(usb_dev,
+						  dto_epd->bEndpointAddress),
+				  &seg->xfer_hdr, xfer_hdr_size,
+				  wa_seg_tr_cb, seg);
+		buf_itr_size = min(buf_size, xfer->seg_size);
+
+		if (usb_pipeisoc(xfer->urb->pipe)) {
+			seg->isoc_frame_count = seg_isoc_frame_count;
+			seg->isoc_frame_offset = isoc_frame_offset;
+			seg->isoc_size = seg_isoc_size;
+			/* iso packet descriptor. */
+			seg->isoc_pack_desc_urb =
+					usb_alloc_urb(0, GFP_ATOMIC);
+			if (seg->isoc_pack_desc_urb == NULL)
+				goto error_iso_pack_desc_alloc;
+			/*
+			 * The buffer for the isoc packet descriptor starts
+			 * after the transfer request header in the
+			 * segment object memory buffer.
+			 */
+			usb_fill_bulk_urb(
+				seg->isoc_pack_desc_urb, usb_dev,
+				usb_sndbulkpipe(usb_dev,
+					dto_epd->bEndpointAddress),
+				(void *)(&seg->xfer_hdr) +
+					xfer_hdr_size,
+				iso_pkt_descr_size,
+				wa_seg_iso_pack_desc_cb, seg);
+
+			/* adjust starting frame offset for next seg. */
+			isoc_frame_offset += seg_isoc_frame_count;
+		}
+
+		if (xfer->is_inbound == 0 && buf_size > 0) {
+			/* outbound data. */
+			seg->dto_urb = usb_alloc_urb(0, GFP_ATOMIC);
+			if (seg->dto_urb == NULL)
+				goto error_dto_alloc;
+			usb_fill_bulk_urb(
+				seg->dto_urb, usb_dev,
+				usb_sndbulkpipe(usb_dev,
+						dto_epd->bEndpointAddress),
+				NULL, 0, wa_seg_dto_cb, seg);
+
+			if (usb_pipeisoc(xfer->urb->pipe)) {
+				/*
+				 * Fill in the xfer buffer information for the
+				 * first isoc frame.  Subsequent frames in this
+				 * segment will be filled in and sent from the
+				 * DTO completion routine, if needed.
+				 */
+				__wa_populate_dto_urb_isoc(xfer, seg,
+					seg->isoc_frame_offset);
+			} else {
+				/* fill in the xfer buffer information. */
+				result = __wa_populate_dto_urb(xfer, seg,
+							buf_itr, buf_itr_size);
+				if (result < 0)
+					goto error_seg_outbound_populate;
+
+				buf_itr += buf_itr_size;
+				buf_size -= buf_itr_size;
+			}
+		}
+		seg->status = WA_SEG_READY;
+	}
+	return 0;
+
+	/*
+	 * Free the memory for the current segment which failed to init.
+	 * Use the fact that cnt is left at were it failed.  The remaining
+	 * segments will be cleaned up by wa_xfer_destroy.
+	 */
+error_seg_outbound_populate:
+	usb_free_urb(xfer->seg[cnt]->dto_urb);
+error_dto_alloc:
+	usb_free_urb(xfer->seg[cnt]->isoc_pack_desc_urb);
+error_iso_pack_desc_alloc:
+	kfree(xfer->seg[cnt]);
+	xfer->seg[cnt] = NULL;
+error_seg_kmalloc:
+error_segs_kzalloc:
+	return result;
+}
+
+/*
+ * Allocates all the stuff needed to submit a transfer
+ *
+ * Breaks the whole data buffer in a list of segments, each one has a
+ * structure allocated to it and linked in xfer->seg[index]
+ *
+ * FIXME: merge setup_segs() and the last part of this function, no
+ *        need to do two for loops when we could run everything in a
+ *        single one
+ */
+static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb)
+{
+	int result;
+	struct device *dev = &xfer->wa->usb_iface->dev;
+	enum wa_xfer_type xfer_type = 0; /* shut up GCC */
+	size_t xfer_hdr_size, cnt, transfer_size;
+	struct wa_xfer_hdr *xfer_hdr0, *xfer_hdr;
+
+	result = __wa_xfer_setup_sizes(xfer, &xfer_type);
+	if (result < 0)
+		goto error_setup_sizes;
+	xfer_hdr_size = result;
+	result = __wa_xfer_setup_segs(xfer, xfer_hdr_size);
+	if (result < 0) {
+		dev_err(dev, "xfer %p: Failed to allocate %d segments: %d\n",
+			xfer, xfer->segs, result);
+		goto error_setup_segs;
+	}
+	/* Fill the first header */
+	xfer_hdr0 = &xfer->seg[0]->xfer_hdr;
+	wa_xfer_id_init(xfer);
+	__wa_xfer_setup_hdr0(xfer, xfer_hdr0, xfer_type, xfer_hdr_size);
+
+	/* Fill remaining headers */
+	xfer_hdr = xfer_hdr0;
+	if (xfer_type == WA_XFER_TYPE_ISO) {
+		xfer_hdr0->dwTransferLength =
+			cpu_to_le32(xfer->seg[0]->isoc_size);
+		for (cnt = 1; cnt < xfer->segs; cnt++) {
+			struct wa_xfer_packet_info_hwaiso *packet_desc;
+			struct wa_seg *seg = xfer->seg[cnt];
+			struct wa_xfer_hwaiso *xfer_iso;
+
+			xfer_hdr = &seg->xfer_hdr;
+			xfer_iso = container_of(xfer_hdr,
+						struct wa_xfer_hwaiso, hdr);
+			packet_desc = ((void *)xfer_hdr) + xfer_hdr_size;
+			/*
+			 * Copy values from the 0th header. Segment specific
+			 * values are set below.
+			 */
+			memcpy(xfer_hdr, xfer_hdr0, xfer_hdr_size);
+			xfer_hdr->bTransferSegment = cnt;
+			xfer_hdr->dwTransferLength =
+				cpu_to_le32(seg->isoc_size);
+			xfer_iso->dwNumOfPackets =
+					cpu_to_le32(seg->isoc_frame_count);
+			__wa_setup_isoc_packet_descr(packet_desc, xfer, seg);
+			seg->status = WA_SEG_READY;
+		}
+	} else {
+		transfer_size = urb->transfer_buffer_length;
+		xfer_hdr0->dwTransferLength = transfer_size > xfer->seg_size ?
+			cpu_to_le32(xfer->seg_size) :
+			cpu_to_le32(transfer_size);
+		transfer_size -=  xfer->seg_size;
+		for (cnt = 1; cnt < xfer->segs; cnt++) {
+			xfer_hdr = &xfer->seg[cnt]->xfer_hdr;
+			memcpy(xfer_hdr, xfer_hdr0, xfer_hdr_size);
+			xfer_hdr->bTransferSegment = cnt;
+			xfer_hdr->dwTransferLength =
+				transfer_size > xfer->seg_size ?
+					cpu_to_le32(xfer->seg_size)
+					: cpu_to_le32(transfer_size);
+			xfer->seg[cnt]->status = WA_SEG_READY;
+			transfer_size -=  xfer->seg_size;
+		}
+	}
+	xfer_hdr->bTransferSegment |= 0x80;	/* this is the last segment */
+	result = 0;
+error_setup_segs:
+error_setup_sizes:
+	return result;
+}
+
+/*
+ *
+ *
+ * rpipe->seg_lock is held!
+ */
+static int __wa_seg_submit(struct wa_rpipe *rpipe, struct wa_xfer *xfer,
+			   struct wa_seg *seg, int *dto_done)
+{
+	int result;
+
+	/* default to done unless we encounter a multi-frame isoc segment. */
+	*dto_done = 1;
+
+	/*
+	 * Take a ref for each segment urb so the xfer cannot disappear until
+	 * all of the callbacks run.
+	 */
+	wa_xfer_get(xfer);
+	/* submit the transfer request. */
+	seg->status = WA_SEG_SUBMITTED;
+	result = usb_submit_urb(&seg->tr_urb, GFP_ATOMIC);
+	if (result < 0) {
+		pr_err("%s: xfer %p#%u: REQ submit failed: %d\n",
+		       __func__, xfer, seg->index, result);
+		wa_xfer_put(xfer);
+		goto error_tr_submit;
+	}
+	/* submit the isoc packet descriptor if present. */
+	if (seg->isoc_pack_desc_urb) {
+		wa_xfer_get(xfer);
+		result = usb_submit_urb(seg->isoc_pack_desc_urb, GFP_ATOMIC);
+		seg->isoc_frame_index = 0;
+		if (result < 0) {
+			pr_err("%s: xfer %p#%u: ISO packet descriptor submit failed: %d\n",
+			       __func__, xfer, seg->index, result);
+			wa_xfer_put(xfer);
+			goto error_iso_pack_desc_submit;
+		}
+	}
+	/* submit the out data if this is an out request. */
+	if (seg->dto_urb) {
+		struct wahc *wa = xfer->wa;
+		wa_xfer_get(xfer);
+		result = usb_submit_urb(seg->dto_urb, GFP_ATOMIC);
+		if (result < 0) {
+			pr_err("%s: xfer %p#%u: DTO submit failed: %d\n",
+			       __func__, xfer, seg->index, result);
+			wa_xfer_put(xfer);
+			goto error_dto_submit;
+		}
+		/*
+		 * If this segment contains more than one isoc frame, hold
+		 * onto the dto resource until we send all frames.
+		 * Only applies to non-Alereon devices.
+		 */
+		if (((wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC) == 0)
+			&& (seg->isoc_frame_count > 1))
+			*dto_done = 0;
+	}
+	rpipe_avail_dec(rpipe);
+	return 0;
+
+error_dto_submit:
+	usb_unlink_urb(seg->isoc_pack_desc_urb);
+error_iso_pack_desc_submit:
+	usb_unlink_urb(&seg->tr_urb);
+error_tr_submit:
+	seg->status = WA_SEG_ERROR;
+	seg->result = result;
+	*dto_done = 1;
+	return result;
+}
+
+/*
+ * Execute more queued request segments until the maximum concurrent allowed.
+ * Return true if the DTO resource was acquired and released.
+ *
+ * The ugly unlock/lock sequence on the error path is needed as the
+ * xfer->lock normally nests the seg_lock and not viceversa.
+ */
+static int __wa_xfer_delayed_run(struct wa_rpipe *rpipe, int *dto_waiting)
+{
+	int result, dto_acquired = 0, dto_done = 0;
+	struct device *dev = &rpipe->wa->usb_iface->dev;
+	struct wa_seg *seg;
+	struct wa_xfer *xfer;
+	unsigned long flags;
+
+	*dto_waiting = 0;
+
+	spin_lock_irqsave(&rpipe->seg_lock, flags);
+	while (atomic_read(&rpipe->segs_available) > 0
+	      && !list_empty(&rpipe->seg_list)
+	      && (dto_acquired = __wa_dto_try_get(rpipe->wa))) {
+		seg = list_first_entry(&(rpipe->seg_list), struct wa_seg,
+				 list_node);
+		list_del(&seg->list_node);
+		xfer = seg->xfer;
+		/*
+		 * Get a reference to the xfer in case the callbacks for the
+		 * URBs submitted by __wa_seg_submit attempt to complete
+		 * the xfer before this function completes.
+		 */
+		wa_xfer_get(xfer);
+		result = __wa_seg_submit(rpipe, xfer, seg, &dto_done);
+		/* release the dto resource if this RPIPE is done with it. */
+		if (dto_done)
+			__wa_dto_put(rpipe->wa);
+		dev_dbg(dev, "xfer %p ID %08X#%u submitted from delayed [%d segments available] %d\n",
+			xfer, wa_xfer_id(xfer), seg->index,
+			atomic_read(&rpipe->segs_available), result);
+		if (unlikely(result < 0)) {
+			int done;
+
+			spin_unlock_irqrestore(&rpipe->seg_lock, flags);
+			spin_lock_irqsave(&xfer->lock, flags);
+			__wa_xfer_abort(xfer);
+			/*
+			 * This seg was marked as submitted when it was put on
+			 * the RPIPE seg_list.  Mark it done.
+			 */
+			xfer->segs_done++;
+			done = __wa_xfer_is_done(xfer);
+			spin_unlock_irqrestore(&xfer->lock, flags);
+			if (done)
+				wa_xfer_completion(xfer);
+			spin_lock_irqsave(&rpipe->seg_lock, flags);
+		}
+		wa_xfer_put(xfer);
+	}
+	/*
+	 * Mark this RPIPE as waiting if dto was not acquired, there are
+	 * delayed segs and no active transfers to wake us up later.
+	 */
+	if (!dto_acquired && !list_empty(&rpipe->seg_list)
+		&& (atomic_read(&rpipe->segs_available) ==
+			le16_to_cpu(rpipe->descr.wRequests)))
+		*dto_waiting = 1;
+
+	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
+
+	return dto_done;
+}
+
+static void wa_xfer_delayed_run(struct wa_rpipe *rpipe)
+{
+	int dto_waiting;
+	int dto_done = __wa_xfer_delayed_run(rpipe, &dto_waiting);
+
+	/*
+	 * If this RPIPE is waiting on the DTO resource, add it to the tail of
+	 * the waiting list.
+	 * Otherwise, if the WA DTO resource was acquired and released by
+	 *  __wa_xfer_delayed_run, another RPIPE may have attempted to acquire
+	 * DTO and failed during that time.  Check the delayed list and process
+	 * any waiters.  Start searching from the next RPIPE index.
+	 */
+	if (dto_waiting)
+		wa_add_delayed_rpipe(rpipe->wa, rpipe);
+	else if (dto_done)
+		wa_check_for_delayed_rpipes(rpipe->wa);
+}
+
+/*
+ *
+ * xfer->lock is taken
+ *
+ * On failure submitting we just stop submitting and return error;
+ * wa_urb_enqueue_b() will execute the completion path
+ */
+static int __wa_xfer_submit(struct wa_xfer *xfer)
+{
+	int result, dto_acquired = 0, dto_done = 0, dto_waiting = 0;
+	struct wahc *wa = xfer->wa;
+	struct device *dev = &wa->usb_iface->dev;
+	unsigned cnt;
+	struct wa_seg *seg;
+	unsigned long flags;
+	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+	size_t maxrequests = le16_to_cpu(rpipe->descr.wRequests);
+	u8 available;
+	u8 empty;
+
+	spin_lock_irqsave(&wa->xfer_list_lock, flags);
+	list_add_tail(&xfer->list_node, &wa->xfer_list);
+	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
+
+	BUG_ON(atomic_read(&rpipe->segs_available) > maxrequests);
+	result = 0;
+	spin_lock_irqsave(&rpipe->seg_lock, flags);
+	for (cnt = 0; cnt < xfer->segs; cnt++) {
+		int delay_seg = 1;
+
+		available = atomic_read(&rpipe->segs_available);
+		empty = list_empty(&rpipe->seg_list);
+		seg = xfer->seg[cnt];
+		if (available && empty) {
+			/*
+			 * Only attempt to acquire DTO if we have a segment
+			 * to send.
+			 */
+			dto_acquired = __wa_dto_try_get(rpipe->wa);
+			if (dto_acquired) {
+				delay_seg = 0;
+				result = __wa_seg_submit(rpipe, xfer, seg,
+							&dto_done);
+				dev_dbg(dev, "xfer %p ID 0x%08X#%u: available %u empty %u submitted\n",
+					xfer, wa_xfer_id(xfer), cnt, available,
+					empty);
+				if (dto_done)
+					__wa_dto_put(rpipe->wa);
+
+				if (result < 0) {
+					__wa_xfer_abort(xfer);
+					goto error_seg_submit;
+				}
+			}
+		}
+
+		if (delay_seg) {
+			dev_dbg(dev, "xfer %p ID 0x%08X#%u: available %u empty %u delayed\n",
+				xfer, wa_xfer_id(xfer), cnt, available,  empty);
+			seg->status = WA_SEG_DELAYED;
+			list_add_tail(&seg->list_node, &rpipe->seg_list);
+		}
+		xfer->segs_submitted++;
+	}
+error_seg_submit:
+	/*
+	 * Mark this RPIPE as waiting if dto was not acquired, there are
+	 * delayed segs and no active transfers to wake us up later.
+	 */
+	if (!dto_acquired && !list_empty(&rpipe->seg_list)
+		&& (atomic_read(&rpipe->segs_available) ==
+			le16_to_cpu(rpipe->descr.wRequests)))
+		dto_waiting = 1;
+	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
+
+	if (dto_waiting)
+		wa_add_delayed_rpipe(rpipe->wa, rpipe);
+	else if (dto_done)
+		wa_check_for_delayed_rpipes(rpipe->wa);
+
+	return result;
+}
+
+/*
+ * Second part of a URB/transfer enqueuement
+ *
+ * Assumes this comes from wa_urb_enqueue() [maybe through
+ * wa_urb_enqueue_run()]. At this point:
+ *
+ * xfer->wa	filled and refcounted
+ * xfer->ep	filled with rpipe refcounted if
+ *              delayed == 0
+ * xfer->urb 	filled and refcounted (this is the case when called
+ *              from wa_urb_enqueue() as we come from usb_submit_urb()
+ *              and when called by wa_urb_enqueue_run(), as we took an
+ *              extra ref dropped by _run() after we return).
+ * xfer->gfp	filled
+ *
+ * If we fail at __wa_xfer_submit(), then we just check if we are done
+ * and if so, we run the completion procedure. However, if we are not
+ * yet done, we do nothing and wait for the completion handlers from
+ * the submitted URBs or from the xfer-result path to kick in. If xfer
+ * result never kicks in, the xfer will timeout from the USB code and
+ * dequeue() will be called.
+ */
+static int wa_urb_enqueue_b(struct wa_xfer *xfer)
+{
+	int result;
+	unsigned long flags;
+	struct urb *urb = xfer->urb;
+	struct wahc *wa = xfer->wa;
+	struct wusbhc *wusbhc = wa->wusb;
+	struct wusb_dev *wusb_dev;
+	unsigned done;
+
+	result = rpipe_get_by_ep(wa, xfer->ep, urb, xfer->gfp);
+	if (result < 0) {
+		pr_err("%s: error_rpipe_get\n", __func__);
+		goto error_rpipe_get;
+	}
+	result = -ENODEV;
+	/* FIXME: segmentation broken -- kills DWA */
+	mutex_lock(&wusbhc->mutex);		/* get a WUSB dev */
+	if (urb->dev == NULL) {
+		mutex_unlock(&wusbhc->mutex);
+		pr_err("%s: error usb dev gone\n", __func__);
+		goto error_dev_gone;
+	}
+	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, urb->dev);
+	if (wusb_dev == NULL) {
+		mutex_unlock(&wusbhc->mutex);
+		dev_err(&(urb->dev->dev), "%s: error wusb dev gone\n",
+			__func__);
+		goto error_dev_gone;
+	}
+	mutex_unlock(&wusbhc->mutex);
+
+	spin_lock_irqsave(&xfer->lock, flags);
+	xfer->wusb_dev = wusb_dev;
+	result = urb->status;
+	if (urb->status != -EINPROGRESS) {
+		dev_err(&(urb->dev->dev), "%s: error_dequeued\n", __func__);
+		goto error_dequeued;
+	}
+
+	result = __wa_xfer_setup(xfer, urb);
+	if (result < 0) {
+		dev_err(&(urb->dev->dev), "%s: error_xfer_setup\n", __func__);
+		goto error_xfer_setup;
+	}
+	/*
+	 * Get a xfer reference since __wa_xfer_submit starts asynchronous
+	 * operations that may try to complete the xfer before this function
+	 * exits.
+	 */
+	wa_xfer_get(xfer);
+	result = __wa_xfer_submit(xfer);
+	if (result < 0) {
+		dev_err(&(urb->dev->dev), "%s: error_xfer_submit\n", __func__);
+		goto error_xfer_submit;
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	wa_xfer_put(xfer);
+	return 0;
+
+	/*
+	 * this is basically wa_xfer_completion() broken up wa_xfer_giveback()
+	 * does a wa_xfer_put() that will call wa_xfer_destroy() and undo
+	 * setup().
+	 */
+error_xfer_setup:
+error_dequeued:
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	/* FIXME: segmentation broken, kills DWA */
+	if (wusb_dev)
+		wusb_dev_put(wusb_dev);
+error_dev_gone:
+	rpipe_put(xfer->ep->hcpriv);
+error_rpipe_get:
+	xfer->result = result;
+	return result;
+
+error_xfer_submit:
+	done = __wa_xfer_is_done(xfer);
+	xfer->result = result;
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	if (done)
+		wa_xfer_completion(xfer);
+	wa_xfer_put(xfer);
+	/* return success since the completion routine will run. */
+	return 0;
+}
+
+/*
+ * Execute the delayed transfers in the Wire Adapter @wa
+ *
+ * We need to be careful here, as dequeue() could be called in the
+ * middle.  That's why we do the whole thing under the
+ * wa->xfer_list_lock. If dequeue() jumps in, it first locks xfer->lock
+ * and then checks the list -- so as we would be acquiring in inverse
+ * order, we move the delayed list to a separate list while locked and then
+ * submit them without the list lock held.
+ */
+void wa_urb_enqueue_run(struct work_struct *ws)
+{
+	struct wahc *wa = container_of(ws, struct wahc, xfer_enqueue_work);
+	struct wa_xfer *xfer, *next;
+	struct urb *urb;
+	LIST_HEAD(tmp_list);
+
+	/* Create a copy of the wa->xfer_delayed_list while holding the lock */
+	spin_lock_irq(&wa->xfer_list_lock);
+	list_cut_position(&tmp_list, &wa->xfer_delayed_list,
+			wa->xfer_delayed_list.prev);
+	spin_unlock_irq(&wa->xfer_list_lock);
+
+	/*
+	 * enqueue from temp list without list lock held since wa_urb_enqueue_b
+	 * can take xfer->lock as well as lock mutexes.
+	 */
+	list_for_each_entry_safe(xfer, next, &tmp_list, list_node) {
+		list_del_init(&xfer->list_node);
+
+		urb = xfer->urb;
+		if (wa_urb_enqueue_b(xfer) < 0)
+			wa_xfer_giveback(xfer);
+		usb_put_urb(urb);	/* taken when queuing */
+	}
+}
+EXPORT_SYMBOL_GPL(wa_urb_enqueue_run);
+
+/*
+ * Process the errored transfers on the Wire Adapter outside of interrupt.
+ */
+void wa_process_errored_transfers_run(struct work_struct *ws)
+{
+	struct wahc *wa = container_of(ws, struct wahc, xfer_error_work);
+	struct wa_xfer *xfer, *next;
+	LIST_HEAD(tmp_list);
+
+	pr_info("%s: Run delayed STALL processing.\n", __func__);
+
+	/* Create a copy of the wa->xfer_errored_list while holding the lock */
+	spin_lock_irq(&wa->xfer_list_lock);
+	list_cut_position(&tmp_list, &wa->xfer_errored_list,
+			wa->xfer_errored_list.prev);
+	spin_unlock_irq(&wa->xfer_list_lock);
+
+	/*
+	 * run rpipe_clear_feature_stalled from temp list without list lock
+	 * held.
+	 */
+	list_for_each_entry_safe(xfer, next, &tmp_list, list_node) {
+		struct usb_host_endpoint *ep;
+		unsigned long flags;
+		struct wa_rpipe *rpipe;
+
+		spin_lock_irqsave(&xfer->lock, flags);
+		ep = xfer->ep;
+		rpipe = ep->hcpriv;
+		spin_unlock_irqrestore(&xfer->lock, flags);
+
+		/* clear RPIPE feature stalled without holding a lock. */
+		rpipe_clear_feature_stalled(wa, ep);
+
+		/* complete the xfer. This removes it from the tmp list. */
+		wa_xfer_completion(xfer);
+
+		/* check for work. */
+		wa_xfer_delayed_run(rpipe);
+	}
+}
+EXPORT_SYMBOL_GPL(wa_process_errored_transfers_run);
+
+/*
+ * Submit a transfer to the Wire Adapter in a delayed way
+ *
+ * The process of enqueuing involves possible sleeps() [see
+ * enqueue_b(), for the rpipe_get() and the mutex_lock()]. If we are
+ * in an atomic section, we defer the enqueue_b() call--else we call direct.
+ *
+ * @urb: We own a reference to it done by the HCI Linux USB stack that
+ *       will be given up by calling usb_hcd_giveback_urb() or by
+ *       returning error from this function -> ergo we don't have to
+ *       refcount it.
+ */
+int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep,
+		   struct urb *urb, gfp_t gfp)
+{
+	int result;
+	struct device *dev = &wa->usb_iface->dev;
+	struct wa_xfer *xfer;
+	unsigned long my_flags;
+	unsigned cant_sleep = irqs_disabled() | in_atomic();
+
+	if ((urb->transfer_buffer == NULL)
+	    && (urb->sg == NULL)
+	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
+	    && urb->transfer_buffer_length != 0) {
+		dev_err(dev, "BUG? urb %p: NULL xfer buffer & NODMA\n", urb);
+		dump_stack();
+	}
+
+	spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
+	result = usb_hcd_link_urb_to_ep(&(wa->wusb->usb_hcd), urb);
+	spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
+	if (result < 0)
+		goto error_link_urb;
+
+	result = -ENOMEM;
+	xfer = kzalloc(sizeof(*xfer), gfp);
+	if (xfer == NULL)
+		goto error_kmalloc;
+
+	result = -ENOENT;
+	if (urb->status != -EINPROGRESS)	/* cancelled */
+		goto error_dequeued;		/* before starting? */
+	wa_xfer_init(xfer);
+	xfer->wa = wa_get(wa);
+	xfer->urb = urb;
+	xfer->gfp = gfp;
+	xfer->ep = ep;
+	urb->hcpriv = xfer;
+
+	dev_dbg(dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
+		xfer, urb, urb->pipe, urb->transfer_buffer_length,
+		urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
+		urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
+		cant_sleep ? "deferred" : "inline");
+
+	if (cant_sleep) {
+		usb_get_urb(urb);
+		spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
+		list_add_tail(&xfer->list_node, &wa->xfer_delayed_list);
+		spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
+		queue_work(wusbd, &wa->xfer_enqueue_work);
+	} else {
+		result = wa_urb_enqueue_b(xfer);
+		if (result < 0) {
+			/*
+			 * URB submit/enqueue failed.  Clean up, return an
+			 * error and do not run the callback.  This avoids
+			 * an infinite submit/complete loop.
+			 */
+			dev_err(dev, "%s: URB enqueue failed: %d\n",
+			   __func__, result);
+			wa_put(xfer->wa);
+			wa_xfer_put(xfer);
+			spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
+			usb_hcd_unlink_urb_from_ep(&(wa->wusb->usb_hcd), urb);
+			spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
+			return result;
+		}
+	}
+	return 0;
+
+error_dequeued:
+	kfree(xfer);
+error_kmalloc:
+	spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
+	usb_hcd_unlink_urb_from_ep(&(wa->wusb->usb_hcd), urb);
+	spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
+error_link_urb:
+	return result;
+}
+EXPORT_SYMBOL_GPL(wa_urb_enqueue);
+
+/*
+ * Dequeue a URB and make sure uwb_hcd_giveback_urb() [completion
+ * handler] is called.
+ *
+ * Until a transfer goes successfully through wa_urb_enqueue() it
+ * needs to be dequeued with completion calling; when stuck in delayed
+ * or before wa_xfer_setup() is called, we need to do completion.
+ *
+ *  not setup  If there is no hcpriv yet, that means that that enqueue
+ *             still had no time to set the xfer up. Because
+ *             urb->status should be other than -EINPROGRESS,
+ *             enqueue() will catch that and bail out.
+ *
+ * If the transfer has gone through setup, we just need to clean it
+ * up. If it has gone through submit(), we have to abort it [with an
+ * asynch request] and then make sure we cancel each segment.
+ *
+ */
+int wa_urb_dequeue(struct wahc *wa, struct urb *urb, int status)
+{
+	unsigned long flags;
+	struct wa_xfer *xfer;
+	struct wa_seg *seg;
+	struct wa_rpipe *rpipe;
+	unsigned cnt, done = 0, xfer_abort_pending;
+	unsigned rpipe_ready = 0;
+	int result;
+
+	/* check if it is safe to unlink. */
+	spin_lock_irqsave(&wa->xfer_list_lock, flags);
+	result = usb_hcd_check_unlink_urb(&(wa->wusb->usb_hcd), urb, status);
+	if ((result == 0) && urb->hcpriv) {
+		/*
+		 * Get a xfer ref to prevent a race with wa_xfer_giveback
+		 * cleaning up the xfer while we are working with it.
+		 */
+		wa_xfer_get(urb->hcpriv);
+	}
+	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
+	if (result)
+		return result;
+
+	xfer = urb->hcpriv;
+	if (xfer == NULL)
+		return -ENOENT;
+	spin_lock_irqsave(&xfer->lock, flags);
+	pr_debug("%s: DEQUEUE xfer id 0x%08X\n", __func__, wa_xfer_id(xfer));
+	rpipe = xfer->ep->hcpriv;
+	if (rpipe == NULL) {
+		pr_debug("%s: xfer %p id 0x%08X has no RPIPE.  %s",
+			__func__, xfer, wa_xfer_id(xfer),
+			"Probably already aborted.\n" );
+		result = -ENOENT;
+		goto out_unlock;
+	}
+	/*
+	 * Check for done to avoid racing with wa_xfer_giveback and completing
+	 * twice.
+	 */
+	if (__wa_xfer_is_done(xfer)) {
+		pr_debug("%s: xfer %p id 0x%08X already done.\n", __func__,
+			xfer, wa_xfer_id(xfer));
+		result = -ENOENT;
+		goto out_unlock;
+	}
+	/* Check the delayed list -> if there, release and complete */
+	spin_lock(&wa->xfer_list_lock);
+	if (!list_empty(&xfer->list_node) && xfer->seg == NULL)
+		goto dequeue_delayed;
+	spin_unlock(&wa->xfer_list_lock);
+	if (xfer->seg == NULL)  	/* still hasn't reached */
+		goto out_unlock;	/* setup(), enqueue_b() completes */
+	/* Ok, the xfer is in flight already, it's been setup and submitted.*/
+	xfer_abort_pending = __wa_xfer_abort(xfer) >= 0;
+	/*
+	 * grab the rpipe->seg_lock here to prevent racing with
+	 * __wa_xfer_delayed_run.
+	 */
+	spin_lock(&rpipe->seg_lock);
+	for (cnt = 0; cnt < xfer->segs; cnt++) {
+		seg = xfer->seg[cnt];
+		pr_debug("%s: xfer id 0x%08X#%d status = %d\n",
+			__func__, wa_xfer_id(xfer), cnt, seg->status);
+		switch (seg->status) {
+		case WA_SEG_NOTREADY:
+		case WA_SEG_READY:
+			printk(KERN_ERR "xfer %p#%u: dequeue bad state %u\n",
+			       xfer, cnt, seg->status);
+			WARN_ON(1);
+			break;
+		case WA_SEG_DELAYED:
+			/*
+			 * delete from rpipe delayed list.  If no segments on
+			 * this xfer have been submitted, __wa_xfer_is_done will
+			 * trigger a giveback below.  Otherwise, the submitted
+			 * segments will be completed in the DTI interrupt.
+			 */
+			seg->status = WA_SEG_ABORTED;
+			seg->result = -ENOENT;
+			list_del(&seg->list_node);
+			xfer->segs_done++;
+			break;
+		case WA_SEG_DONE:
+		case WA_SEG_ERROR:
+		case WA_SEG_ABORTED:
+			break;
+			/*
+			 * The buf_in data for a segment in the
+			 * WA_SEG_DTI_PENDING state is actively being read.
+			 * Let wa_buf_in_cb handle it since it will be called
+			 * and will increment xfer->segs_done.  Cleaning up
+			 * here could cause wa_buf_in_cb to access the xfer
+			 * after it has been completed/freed.
+			 */
+		case WA_SEG_DTI_PENDING:
+			break;
+			/*
+			 * In the states below, the HWA device already knows
+			 * about the transfer.  If an abort request was sent,
+			 * allow the HWA to process it and wait for the
+			 * results.  Otherwise, the DTI state and seg completed
+			 * counts can get out of sync.
+			 */
+		case WA_SEG_SUBMITTED:
+		case WA_SEG_PENDING:
+			/*
+			 * Check if the abort was successfully sent.  This could
+			 * be false if the HWA has been removed but we haven't
+			 * gotten the disconnect notification yet.
+			 */
+			if (!xfer_abort_pending) {
+				seg->status = WA_SEG_ABORTED;
+				rpipe_ready = rpipe_avail_inc(rpipe);
+				xfer->segs_done++;
+			}
+			break;
+		}
+	}
+	spin_unlock(&rpipe->seg_lock);
+	xfer->result = urb->status;	/* -ENOENT or -ECONNRESET */
+	done = __wa_xfer_is_done(xfer);
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	if (done)
+		wa_xfer_completion(xfer);
+	if (rpipe_ready)
+		wa_xfer_delayed_run(rpipe);
+	wa_xfer_put(xfer);
+	return result;
+
+out_unlock:
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	wa_xfer_put(xfer);
+	return result;
+
+dequeue_delayed:
+	list_del_init(&xfer->list_node);
+	spin_unlock(&wa->xfer_list_lock);
+	xfer->result = urb->status;
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	wa_xfer_giveback(xfer);
+	wa_xfer_put(xfer);
+	usb_put_urb(urb);		/* we got a ref in enqueue() */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wa_urb_dequeue);
+
+/*
+ * Translation from WA status codes (WUSB1.0 Table 8.15) to errno
+ * codes
+ *
+ * Positive errno values are internal inconsistencies and should be
+ * flagged louder. Negative are to be passed up to the user in the
+ * normal way.
+ *
+ * @status: USB WA status code -- high two bits are stripped.
+ */
+static int wa_xfer_status_to_errno(u8 status)
+{
+	int errno;
+	u8 real_status = status;
+	static int xlat[] = {
+		[WA_XFER_STATUS_SUCCESS] = 		0,
+		[WA_XFER_STATUS_HALTED] = 		-EPIPE,
+		[WA_XFER_STATUS_DATA_BUFFER_ERROR] = 	-ENOBUFS,
+		[WA_XFER_STATUS_BABBLE] = 		-EOVERFLOW,
+		[WA_XFER_RESERVED] = 			EINVAL,
+		[WA_XFER_STATUS_NOT_FOUND] =		0,
+		[WA_XFER_STATUS_INSUFFICIENT_RESOURCE] = -ENOMEM,
+		[WA_XFER_STATUS_TRANSACTION_ERROR] = 	-EILSEQ,
+		[WA_XFER_STATUS_ABORTED] =		-ENOENT,
+		[WA_XFER_STATUS_RPIPE_NOT_READY] = 	EINVAL,
+		[WA_XFER_INVALID_FORMAT] = 		EINVAL,
+		[WA_XFER_UNEXPECTED_SEGMENT_NUMBER] = 	EINVAL,
+		[WA_XFER_STATUS_RPIPE_TYPE_MISMATCH] = 	EINVAL,
+	};
+	status &= 0x3f;
+
+	if (status == 0)
+		return 0;
+	if (status >= ARRAY_SIZE(xlat)) {
+		printk_ratelimited(KERN_ERR "%s(): BUG? "
+			       "Unknown WA transfer status 0x%02x\n",
+			       __func__, real_status);
+		return -EINVAL;
+	}
+	errno = xlat[status];
+	if (unlikely(errno > 0)) {
+		printk_ratelimited(KERN_ERR "%s(): BUG? "
+			       "Inconsistent WA status: 0x%02x\n",
+			       __func__, real_status);
+		errno = -errno;
+	}
+	return errno;
+}
+
+/*
+ * If a last segment flag and/or a transfer result error is encountered,
+ * no other segment transfer results will be returned from the device.
+ * Mark the remaining submitted or pending xfers as completed so that
+ * the xfer will complete cleanly.
+ *
+ * xfer->lock must be held
+ *
+ */
+static void wa_complete_remaining_xfer_segs(struct wa_xfer *xfer,
+		int starting_index, enum wa_seg_status status)
+{
+	int index;
+	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
+
+	for (index = starting_index; index < xfer->segs_submitted; index++) {
+		struct wa_seg *current_seg = xfer->seg[index];
+
+		BUG_ON(current_seg == NULL);
+
+		switch (current_seg->status) {
+		case WA_SEG_SUBMITTED:
+		case WA_SEG_PENDING:
+		case WA_SEG_DTI_PENDING:
+			rpipe_avail_inc(rpipe);
+		/*
+		 * do not increment RPIPE avail for the WA_SEG_DELAYED case
+		 * since it has not been submitted to the RPIPE.
+		 */
+		/* fall through */
+		case WA_SEG_DELAYED:
+			xfer->segs_done++;
+			current_seg->status = status;
+			break;
+		case WA_SEG_ABORTED:
+			break;
+		default:
+			WARN(1, "%s: xfer 0x%08X#%d. bad seg status = %d\n",
+				__func__, wa_xfer_id(xfer), index,
+				current_seg->status);
+			break;
+		}
+	}
+}
+
+/* Populate the given urb based on the current isoc transfer state. */
+static int __wa_populate_buf_in_urb_isoc(struct wahc *wa,
+	struct urb *buf_in_urb, struct wa_xfer *xfer, struct wa_seg *seg)
+{
+	int urb_start_frame = seg->isoc_frame_index + seg->isoc_frame_offset;
+	int seg_index, total_len = 0, urb_frame_index = urb_start_frame;
+	struct usb_iso_packet_descriptor *iso_frame_desc =
+						xfer->urb->iso_frame_desc;
+	const int dti_packet_size = usb_endpoint_maxp(wa->dti_epd);
+	int next_frame_contiguous;
+	struct usb_iso_packet_descriptor *iso_frame;
+
+	BUG_ON(buf_in_urb->status == -EINPROGRESS);
+
+	/*
+	 * If the current frame actual_length is contiguous with the next frame
+	 * and actual_length is a multiple of the DTI endpoint max packet size,
+	 * combine the current frame with the next frame in a single URB.  This
+	 * reduces the number of URBs that must be submitted in that case.
+	 */
+	seg_index = seg->isoc_frame_index;
+	do {
+		next_frame_contiguous = 0;
+
+		iso_frame = &iso_frame_desc[urb_frame_index];
+		total_len += iso_frame->actual_length;
+		++urb_frame_index;
+		++seg_index;
+
+		if (seg_index < seg->isoc_frame_count) {
+			struct usb_iso_packet_descriptor *next_iso_frame;
+
+			next_iso_frame = &iso_frame_desc[urb_frame_index];
+
+			if ((iso_frame->offset + iso_frame->actual_length) ==
+				next_iso_frame->offset)
+				next_frame_contiguous = 1;
+		}
+	} while (next_frame_contiguous
+			&& ((iso_frame->actual_length % dti_packet_size) == 0));
+
+	/* this should always be 0 before a resubmit. */
+	buf_in_urb->num_mapped_sgs	= 0;
+	buf_in_urb->transfer_dma = xfer->urb->transfer_dma +
+		iso_frame_desc[urb_start_frame].offset;
+	buf_in_urb->transfer_buffer_length = total_len;
+	buf_in_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+	buf_in_urb->transfer_buffer = NULL;
+	buf_in_urb->sg = NULL;
+	buf_in_urb->num_sgs = 0;
+	buf_in_urb->context = seg;
+
+	/* return the number of frames included in this URB. */
+	return seg_index - seg->isoc_frame_index;
+}
+
+/* Populate the given urb based on the current transfer state. */
+static int wa_populate_buf_in_urb(struct urb *buf_in_urb, struct wa_xfer *xfer,
+	unsigned int seg_idx, unsigned int bytes_transferred)
+{
+	int result = 0;
+	struct wa_seg *seg = xfer->seg[seg_idx];
+
+	BUG_ON(buf_in_urb->status == -EINPROGRESS);
+	/* this should always be 0 before a resubmit. */
+	buf_in_urb->num_mapped_sgs	= 0;
+
+	if (xfer->is_dma) {
+		buf_in_urb->transfer_dma = xfer->urb->transfer_dma
+			+ (seg_idx * xfer->seg_size);
+		buf_in_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+		buf_in_urb->transfer_buffer = NULL;
+		buf_in_urb->sg = NULL;
+		buf_in_urb->num_sgs = 0;
+	} else {
+		/* do buffer or SG processing. */
+		buf_in_urb->transfer_flags &= ~URB_NO_TRANSFER_DMA_MAP;
+
+		if (xfer->urb->transfer_buffer) {
+			buf_in_urb->transfer_buffer =
+				xfer->urb->transfer_buffer
+				+ (seg_idx * xfer->seg_size);
+			buf_in_urb->sg = NULL;
+			buf_in_urb->num_sgs = 0;
+		} else {
+			/* allocate an SG list to store seg_size bytes
+				and copy the subset of the xfer->urb->sg
+				that matches the buffer subset we are
+				about to read. */
+			buf_in_urb->sg = wa_xfer_create_subset_sg(
+				xfer->urb->sg,
+				seg_idx * xfer->seg_size,
+				bytes_transferred,
+				&(buf_in_urb->num_sgs));
+
+			if (!(buf_in_urb->sg)) {
+				buf_in_urb->num_sgs	= 0;
+				result = -ENOMEM;
+			}
+			buf_in_urb->transfer_buffer = NULL;
+		}
+	}
+	buf_in_urb->transfer_buffer_length = bytes_transferred;
+	buf_in_urb->context = seg;
+
+	return result;
+}
+
+/*
+ * Process a xfer result completion message
+ *
+ * inbound transfers: need to schedule a buf_in_urb read
+ *
+ * FIXME: this function needs to be broken up in parts
+ */
+static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer,
+		struct wa_xfer_result *xfer_result)
+{
+	int result;
+	struct device *dev = &wa->usb_iface->dev;
+	unsigned long flags;
+	unsigned int seg_idx;
+	struct wa_seg *seg;
+	struct wa_rpipe *rpipe;
+	unsigned done = 0;
+	u8 usb_status;
+	unsigned rpipe_ready = 0;
+	unsigned bytes_transferred = le32_to_cpu(xfer_result->dwTransferLength);
+	struct urb *buf_in_urb = &(wa->buf_in_urbs[0]);
+
+	spin_lock_irqsave(&xfer->lock, flags);
+	seg_idx = xfer_result->bTransferSegment & 0x7f;
+	if (unlikely(seg_idx >= xfer->segs))
+		goto error_bad_seg;
+	seg = xfer->seg[seg_idx];
+	rpipe = xfer->ep->hcpriv;
+	usb_status = xfer_result->bTransferStatus;
+	dev_dbg(dev, "xfer %p ID 0x%08X#%u: bTransferStatus 0x%02x (seg status %u)\n",
+		xfer, wa_xfer_id(xfer), seg_idx, usb_status, seg->status);
+	if (seg->status == WA_SEG_ABORTED
+	    || seg->status == WA_SEG_ERROR)	/* already handled */
+		goto segment_aborted;
+	if (seg->status == WA_SEG_SUBMITTED)	/* ops, got here */
+		seg->status = WA_SEG_PENDING;	/* before wa_seg{_dto}_cb() */
+	if (seg->status != WA_SEG_PENDING) {
+		if (printk_ratelimit())
+			dev_err(dev, "xfer %p#%u: Bad segment state %u\n",
+				xfer, seg_idx, seg->status);
+		seg->status = WA_SEG_PENDING;	/* workaround/"fix" it */
+	}
+	if (usb_status & 0x80) {
+		seg->result = wa_xfer_status_to_errno(usb_status);
+		dev_err(dev, "DTI: xfer %p 0x%08X:#%u failed (0x%02x)\n",
+			xfer, xfer->id, seg->index, usb_status);
+		seg->status = ((usb_status & 0x7F) == WA_XFER_STATUS_ABORTED) ?
+			WA_SEG_ABORTED : WA_SEG_ERROR;
+		goto error_complete;
+	}
+	/* FIXME: we ignore warnings, tally them for stats */
+	if (usb_status & 0x40) 		/* Warning?... */
+		usb_status = 0;		/* ... pass */
+	/*
+	 * If the last segment bit is set, complete the remaining segments.
+	 * When the current segment is completed, either in wa_buf_in_cb for
+	 * transfers with data or below for no data, the xfer will complete.
+	 */
+	if (xfer_result->bTransferSegment & 0x80)
+		wa_complete_remaining_xfer_segs(xfer, seg->index + 1,
+			WA_SEG_DONE);
+	if (usb_pipeisoc(xfer->urb->pipe)
+		&& (le32_to_cpu(xfer_result->dwNumOfPackets) > 0)) {
+		/* set up WA state to read the isoc packet status next. */
+		wa->dti_isoc_xfer_in_progress = wa_xfer_id(xfer);
+		wa->dti_isoc_xfer_seg = seg_idx;
+		wa->dti_state = WA_DTI_ISOC_PACKET_STATUS_PENDING;
+	} else if (xfer->is_inbound && !usb_pipeisoc(xfer->urb->pipe)
+			&& (bytes_transferred > 0)) {
+		/* IN data phase: read to buffer */
+		seg->status = WA_SEG_DTI_PENDING;
+		result = wa_populate_buf_in_urb(buf_in_urb, xfer, seg_idx,
+			bytes_transferred);
+		if (result < 0)
+			goto error_buf_in_populate;
+		++(wa->active_buf_in_urbs);
+		result = usb_submit_urb(buf_in_urb, GFP_ATOMIC);
+		if (result < 0) {
+			--(wa->active_buf_in_urbs);
+			goto error_submit_buf_in;
+		}
+	} else {
+		/* OUT data phase or no data, complete it -- */
+		seg->result = bytes_transferred;
+		rpipe_ready = rpipe_avail_inc(rpipe);
+		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_DONE);
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	if (done)
+		wa_xfer_completion(xfer);
+	if (rpipe_ready)
+		wa_xfer_delayed_run(rpipe);
+	return;
+
+error_submit_buf_in:
+	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+		dev_err(dev, "DTI: URB max acceptable errors "
+			"exceeded, resetting device\n");
+		wa_reset_all(wa);
+	}
+	if (printk_ratelimit())
+		dev_err(dev, "xfer %p#%u: can't submit DTI data phase: %d\n",
+			xfer, seg_idx, result);
+	seg->result = result;
+	kfree(buf_in_urb->sg);
+	buf_in_urb->sg = NULL;
+error_buf_in_populate:
+	__wa_xfer_abort(xfer);
+	seg->status = WA_SEG_ERROR;
+error_complete:
+	xfer->segs_done++;
+	rpipe_ready = rpipe_avail_inc(rpipe);
+	wa_complete_remaining_xfer_segs(xfer, seg->index + 1, seg->status);
+	done = __wa_xfer_is_done(xfer);
+	/*
+	 * queue work item to clear STALL for control endpoints.
+	 * Otherwise, let endpoint_reset take care of it.
+	 */
+	if (((usb_status & 0x3f) == WA_XFER_STATUS_HALTED) &&
+		usb_endpoint_xfer_control(&xfer->ep->desc) &&
+		done) {
+
+		dev_info(dev, "Control EP stall.  Queue delayed work.\n");
+		spin_lock(&wa->xfer_list_lock);
+		/* move xfer from xfer_list to xfer_errored_list. */
+		list_move_tail(&xfer->list_node, &wa->xfer_errored_list);
+		spin_unlock(&wa->xfer_list_lock);
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		queue_work(wusbd, &wa->xfer_error_work);
+	} else {
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (done)
+			wa_xfer_completion(xfer);
+		if (rpipe_ready)
+			wa_xfer_delayed_run(rpipe);
+	}
+
+	return;
+
+error_bad_seg:
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	wa_urb_dequeue(wa, xfer->urb, -ENOENT);
+	if (printk_ratelimit())
+		dev_err(dev, "xfer %p#%u: bad segment\n", xfer, seg_idx);
+	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
+		dev_err(dev, "DTI: URB max acceptable errors "
+			"exceeded, resetting device\n");
+		wa_reset_all(wa);
+	}
+	return;
+
+segment_aborted:
+	/* nothing to do, as the aborter did the completion */
+	spin_unlock_irqrestore(&xfer->lock, flags);
+}
+
+/*
+ * Process a isochronous packet status message
+ *
+ * inbound transfers: need to schedule a buf_in_urb read
+ */
+static int wa_process_iso_packet_status(struct wahc *wa, struct urb *urb)
+{
+	struct device *dev = &wa->usb_iface->dev;
+	struct wa_xfer_packet_status_hwaiso *packet_status;
+	struct wa_xfer_packet_status_len_hwaiso *status_array;
+	struct wa_xfer *xfer;
+	unsigned long flags;
+	struct wa_seg *seg;
+	struct wa_rpipe *rpipe;
+	unsigned done = 0, dti_busy = 0, data_frame_count = 0, seg_index;
+	unsigned first_frame_index = 0, rpipe_ready = 0;
+	size_t expected_size;
+
+	/* We have a xfer result buffer; check it */
+	dev_dbg(dev, "DTI: isoc packet status %d bytes at %p\n",
+		urb->actual_length, urb->transfer_buffer);
+	packet_status = (struct wa_xfer_packet_status_hwaiso *)(wa->dti_buf);
+	if (packet_status->bPacketType != WA_XFER_ISO_PACKET_STATUS) {
+		dev_err(dev, "DTI Error: isoc packet status--bad type 0x%02x\n",
+			packet_status->bPacketType);
+		goto error_parse_buffer;
+	}
+	xfer = wa_xfer_get_by_id(wa, wa->dti_isoc_xfer_in_progress);
+	if (xfer == NULL) {
+		dev_err(dev, "DTI Error: isoc packet status--unknown xfer 0x%08x\n",
+			wa->dti_isoc_xfer_in_progress);
+		goto error_parse_buffer;
+	}
+	spin_lock_irqsave(&xfer->lock, flags);
+	if (unlikely(wa->dti_isoc_xfer_seg >= xfer->segs))
+		goto error_bad_seg;
+	seg = xfer->seg[wa->dti_isoc_xfer_seg];
+	rpipe = xfer->ep->hcpriv;
+	expected_size = struct_size(packet_status, PacketStatus,
+				    seg->isoc_frame_count);
+	if (urb->actual_length != expected_size) {
+		dev_err(dev, "DTI Error: isoc packet status--bad urb length (%d bytes vs %zu needed)\n",
+			urb->actual_length, expected_size);
+		goto error_bad_seg;
+	}
+	if (le16_to_cpu(packet_status->wLength) != expected_size) {
+		dev_err(dev, "DTI Error: isoc packet status--bad length %u\n",
+			le16_to_cpu(packet_status->wLength));
+		goto error_bad_seg;
+	}
+	/* write isoc packet status and lengths back to the xfer urb. */
+	status_array = packet_status->PacketStatus;
+	xfer->urb->start_frame =
+		wa->wusb->usb_hcd.driver->get_frame_number(&wa->wusb->usb_hcd);
+	for (seg_index = 0; seg_index < seg->isoc_frame_count; ++seg_index) {
+		struct usb_iso_packet_descriptor *iso_frame_desc =
+			xfer->urb->iso_frame_desc;
+		const int xfer_frame_index =
+			seg->isoc_frame_offset + seg_index;
+
+		iso_frame_desc[xfer_frame_index].status =
+			wa_xfer_status_to_errno(
+			le16_to_cpu(status_array[seg_index].PacketStatus));
+		iso_frame_desc[xfer_frame_index].actual_length =
+			le16_to_cpu(status_array[seg_index].PacketLength);
+		/* track the number of frames successfully transferred. */
+		if (iso_frame_desc[xfer_frame_index].actual_length > 0) {
+			/* save the starting frame index for buf_in_urb. */
+			if (!data_frame_count)
+				first_frame_index = seg_index;
+			++data_frame_count;
+		}
+	}
+
+	if (xfer->is_inbound && data_frame_count) {
+		int result, total_frames_read = 0, urb_index = 0;
+		struct urb *buf_in_urb;
+
+		/* IN data phase: read to buffer */
+		seg->status = WA_SEG_DTI_PENDING;
+
+		/* start with the first frame with data. */
+		seg->isoc_frame_index = first_frame_index;
+		/* submit up to WA_MAX_BUF_IN_URBS read URBs. */
+		do {
+			int urb_frame_index, urb_frame_count;
+			struct usb_iso_packet_descriptor *iso_frame_desc;
+
+			buf_in_urb = &(wa->buf_in_urbs[urb_index]);
+			urb_frame_count = __wa_populate_buf_in_urb_isoc(wa,
+				buf_in_urb, xfer, seg);
+			/* advance frame index to start of next read URB. */
+			seg->isoc_frame_index += urb_frame_count;
+			total_frames_read += urb_frame_count;
+
+			++(wa->active_buf_in_urbs);
+			result = usb_submit_urb(buf_in_urb, GFP_ATOMIC);
+
+			/* skip 0-byte frames. */
+			urb_frame_index =
+				seg->isoc_frame_offset + seg->isoc_frame_index;
+			iso_frame_desc =
+				&(xfer->urb->iso_frame_desc[urb_frame_index]);
+			while ((seg->isoc_frame_index <
+						seg->isoc_frame_count) &&
+				 (iso_frame_desc->actual_length == 0)) {
+				++(seg->isoc_frame_index);
+				++iso_frame_desc;
+			}
+			++urb_index;
+
+		} while ((result == 0) && (urb_index < WA_MAX_BUF_IN_URBS)
+				&& (seg->isoc_frame_index <
+						seg->isoc_frame_count));
+
+		if (result < 0) {
+			--(wa->active_buf_in_urbs);
+			dev_err(dev, "DTI Error: Could not submit buf in URB (%d)",
+				result);
+			wa_reset_all(wa);
+		} else if (data_frame_count > total_frames_read)
+			/* If we need to read more frames, set DTI busy. */
+			dti_busy = 1;
+	} else {
+		/* OUT transfer or no more IN data, complete it -- */
+		rpipe_ready = rpipe_avail_inc(rpipe);
+		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_DONE);
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	if (dti_busy)
+		wa->dti_state = WA_DTI_BUF_IN_DATA_PENDING;
+	else
+		wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
+	if (done)
+		wa_xfer_completion(xfer);
+	if (rpipe_ready)
+		wa_xfer_delayed_run(rpipe);
+	wa_xfer_put(xfer);
+	return dti_busy;
+
+error_bad_seg:
+	spin_unlock_irqrestore(&xfer->lock, flags);
+	wa_xfer_put(xfer);
+error_parse_buffer:
+	return dti_busy;
+}
+
+/*
+ * Callback for the IN data phase
+ *
+ * If successful transition state; otherwise, take a note of the
+ * error, mark this segment done and try completion.
+ *
+ * Note we don't access until we are sure that the transfer hasn't
+ * been cancelled (ECONNRESET, ENOENT), which could mean that
+ * seg->xfer could be already gone.
+ */
+static void wa_buf_in_cb(struct urb *urb)
+{
+	struct wa_seg *seg = urb->context;
+	struct wa_xfer *xfer = seg->xfer;
+	struct wahc *wa;
+	struct device *dev;
+	struct wa_rpipe *rpipe;
+	unsigned rpipe_ready = 0, isoc_data_frame_count = 0;
+	unsigned long flags;
+	int resubmit_dti = 0, active_buf_in_urbs;
+	u8 done = 0;
+
+	/* free the sg if it was used. */
+	kfree(urb->sg);
+	urb->sg = NULL;
+
+	spin_lock_irqsave(&xfer->lock, flags);
+	wa = xfer->wa;
+	dev = &wa->usb_iface->dev;
+	--(wa->active_buf_in_urbs);
+	active_buf_in_urbs = wa->active_buf_in_urbs;
+	rpipe = xfer->ep->hcpriv;
+
+	if (usb_pipeisoc(xfer->urb->pipe)) {
+		struct usb_iso_packet_descriptor *iso_frame_desc =
+			xfer->urb->iso_frame_desc;
+		int	seg_index;
+
+		/*
+		 * Find the next isoc frame with data and count how many
+		 * frames with data remain.
+		 */
+		seg_index = seg->isoc_frame_index;
+		while (seg_index < seg->isoc_frame_count) {
+			const int urb_frame_index =
+				seg->isoc_frame_offset + seg_index;
+
+			if (iso_frame_desc[urb_frame_index].actual_length > 0) {
+				/* save the index of the next frame with data */
+				if (!isoc_data_frame_count)
+					seg->isoc_frame_index = seg_index;
+				++isoc_data_frame_count;
+			}
+			++seg_index;
+		}
+	}
+	spin_unlock_irqrestore(&xfer->lock, flags);
+
+	switch (urb->status) {
+	case 0:
+		spin_lock_irqsave(&xfer->lock, flags);
+
+		seg->result += urb->actual_length;
+		if (isoc_data_frame_count > 0) {
+			int result, urb_frame_count;
+
+			/* submit a read URB for the next frame with data. */
+			urb_frame_count = __wa_populate_buf_in_urb_isoc(wa, urb,
+				 xfer, seg);
+			/* advance index to start of next read URB. */
+			seg->isoc_frame_index += urb_frame_count;
+			++(wa->active_buf_in_urbs);
+			result = usb_submit_urb(urb, GFP_ATOMIC);
+			if (result < 0) {
+				--(wa->active_buf_in_urbs);
+				dev_err(dev, "DTI Error: Could not submit buf in URB (%d)",
+					result);
+				wa_reset_all(wa);
+			}
+			/*
+			 * If we are in this callback and
+			 * isoc_data_frame_count > 0, it means that the dti_urb
+			 * submission was delayed in wa_dti_cb.  Once
+			 * we submit the last buf_in_urb, we can submit the
+			 * delayed dti_urb.
+			 */
+			  resubmit_dti = (isoc_data_frame_count ==
+							urb_frame_count);
+		} else if (active_buf_in_urbs == 0) {
+			dev_dbg(dev,
+				"xfer %p 0x%08X#%u: data in done (%zu bytes)\n",
+				xfer, wa_xfer_id(xfer), seg->index,
+				seg->result);
+			rpipe_ready = rpipe_avail_inc(rpipe);
+			done = __wa_xfer_mark_seg_as_done(xfer, seg,
+					WA_SEG_DONE);
+		}
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (done)
+			wa_xfer_completion(xfer);
+		if (rpipe_ready)
+			wa_xfer_delayed_run(rpipe);
+		break;
+	case -ECONNRESET:	/* URB unlinked; no need to do anything */
+	case -ENOENT:		/* as it was done by the who unlinked us */
+		break;
+	default:		/* Other errors ... */
+		/*
+		 * Error on data buf read.  Only resubmit DTI if it hasn't
+		 * already been done by previously hitting this error or by a
+		 * successful completion of the previous buf_in_urb.
+		 */
+		resubmit_dti = wa->dti_state != WA_DTI_TRANSFER_RESULT_PENDING;
+		spin_lock_irqsave(&xfer->lock, flags);
+		if (printk_ratelimit())
+			dev_err(dev, "xfer %p 0x%08X#%u: data in error %d\n",
+				xfer, wa_xfer_id(xfer), seg->index,
+				urb->status);
+		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME)){
+			dev_err(dev, "DTO: URB max acceptable errors "
+				"exceeded, resetting device\n");
+			wa_reset_all(wa);
+		}
+		seg->result = urb->status;
+		rpipe_ready = rpipe_avail_inc(rpipe);
+		if (active_buf_in_urbs == 0)
+			done = __wa_xfer_mark_seg_as_done(xfer, seg,
+				WA_SEG_ERROR);
+		else
+			__wa_xfer_abort(xfer);
+		spin_unlock_irqrestore(&xfer->lock, flags);
+		if (done)
+			wa_xfer_completion(xfer);
+		if (rpipe_ready)
+			wa_xfer_delayed_run(rpipe);
+	}
+
+	if (resubmit_dti) {
+		int result;
+
+		wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
+
+		result = usb_submit_urb(wa->dti_urb, GFP_ATOMIC);
+		if (result < 0) {
+			dev_err(dev, "DTI Error: Could not submit DTI URB (%d)\n",
+				result);
+			wa_reset_all(wa);
+		}
+	}
+}
+
+/*
+ * Handle an incoming transfer result buffer
+ *
+ * Given a transfer result buffer, it completes the transfer (possibly
+ * scheduling and buffer in read) and then resubmits the DTI URB for a
+ * new transfer result read.
+ *
+ *
+ * The xfer_result DTI URB state machine
+ *
+ * States: OFF | RXR (Read-Xfer-Result) | RBI (Read-Buffer-In)
+ *
+ * We start in OFF mode, the first xfer_result notification [through
+ * wa_handle_notif_xfer()] moves us to RXR by posting the DTI-URB to
+ * read.
+ *
+ * We receive a buffer -- if it is not a xfer_result, we complain and
+ * repost the DTI-URB. If it is a xfer_result then do the xfer seg
+ * request accounting. If it is an IN segment, we move to RBI and post
+ * a BUF-IN-URB to the right buffer. The BUF-IN-URB callback will
+ * repost the DTI-URB and move to RXR state. if there was no IN
+ * segment, it will repost the DTI-URB.
+ *
+ * We go back to OFF when we detect a ENOENT or ESHUTDOWN (or too many
+ * errors) in the URBs.
+ */
+static void wa_dti_cb(struct urb *urb)
+{
+	int result, dti_busy = 0;
+	struct wahc *wa = urb->context;
+	struct device *dev = &wa->usb_iface->dev;
+	u32 xfer_id;
+	u8 usb_status;
+
+	BUG_ON(wa->dti_urb != urb);
+	switch (wa->dti_urb->status) {
+	case 0:
+		if (wa->dti_state == WA_DTI_TRANSFER_RESULT_PENDING) {
+			struct wa_xfer_result *xfer_result;
+			struct wa_xfer *xfer;
+
+			/* We have a xfer result buffer; check it */
+			dev_dbg(dev, "DTI: xfer result %d bytes at %p\n",
+				urb->actual_length, urb->transfer_buffer);
+			if (urb->actual_length != sizeof(*xfer_result)) {
+				dev_err(dev, "DTI Error: xfer result--bad size xfer result (%d bytes vs %zu needed)\n",
+					urb->actual_length,
+					sizeof(*xfer_result));
+				break;
+			}
+			xfer_result = (struct wa_xfer_result *)(wa->dti_buf);
+			if (xfer_result->hdr.bLength != sizeof(*xfer_result)) {
+				dev_err(dev, "DTI Error: xfer result--bad header length %u\n",
+					xfer_result->hdr.bLength);
+				break;
+			}
+			if (xfer_result->hdr.bNotifyType != WA_XFER_RESULT) {
+				dev_err(dev, "DTI Error: xfer result--bad header type 0x%02x\n",
+					xfer_result->hdr.bNotifyType);
+				break;
+			}
+			xfer_id = le32_to_cpu(xfer_result->dwTransferID);
+			usb_status = xfer_result->bTransferStatus & 0x3f;
+			if (usb_status == WA_XFER_STATUS_NOT_FOUND) {
+				/* taken care of already */
+				dev_dbg(dev, "%s: xfer 0x%08X#%u not found.\n",
+					__func__, xfer_id,
+					xfer_result->bTransferSegment & 0x7f);
+				break;
+			}
+			xfer = wa_xfer_get_by_id(wa, xfer_id);
+			if (xfer == NULL) {
+				/* FIXME: transaction not found. */
+				dev_err(dev, "DTI Error: xfer result--unknown xfer 0x%08x (status 0x%02x)\n",
+					xfer_id, usb_status);
+				break;
+			}
+			wa_xfer_result_chew(wa, xfer, xfer_result);
+			wa_xfer_put(xfer);
+		} else if (wa->dti_state == WA_DTI_ISOC_PACKET_STATUS_PENDING) {
+			dti_busy = wa_process_iso_packet_status(wa, urb);
+		} else {
+			dev_err(dev, "DTI Error: unexpected EP state = %d\n",
+				wa->dti_state);
+		}
+		break;
+	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
+	case -ESHUTDOWN:	/* going away! */
+		dev_dbg(dev, "DTI: going down! %d\n", urb->status);
+		goto out;
+	default:
+		/* Unknown error */
+		if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS,
+			    EDC_ERROR_TIMEFRAME)) {
+			dev_err(dev, "DTI: URB max acceptable errors "
+				"exceeded, resetting device\n");
+			wa_reset_all(wa);
+			goto out;
+		}
+		if (printk_ratelimit())
+			dev_err(dev, "DTI: URB error %d\n", urb->status);
+		break;
+	}
+
+	/* Resubmit the DTI URB if we are not busy processing isoc in frames. */
+	if (!dti_busy) {
+		result = usb_submit_urb(wa->dti_urb, GFP_ATOMIC);
+		if (result < 0) {
+			dev_err(dev, "DTI Error: Could not submit DTI URB (%d)\n",
+				result);
+			wa_reset_all(wa);
+		}
+	}
+out:
+	return;
+}
+
+/*
+ * Initialize the DTI URB for reading transfer result notifications and also
+ * the buffer-in URB, for reading buffers. Then we just submit the DTI URB.
+ */
+int wa_dti_start(struct wahc *wa)
+{
+	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
+	struct device *dev = &wa->usb_iface->dev;
+	int result = -ENOMEM, index;
+
+	if (wa->dti_urb != NULL)	/* DTI URB already started */
+		goto out;
+
+	wa->dti_urb = usb_alloc_urb(0, GFP_KERNEL);
+	if (wa->dti_urb == NULL)
+		goto error_dti_urb_alloc;
+	usb_fill_bulk_urb(
+		wa->dti_urb, wa->usb_dev,
+		usb_rcvbulkpipe(wa->usb_dev, 0x80 | dti_epd->bEndpointAddress),
+		wa->dti_buf, wa->dti_buf_size,
+		wa_dti_cb, wa);
+
+	/* init the buf in URBs */
+	for (index = 0; index < WA_MAX_BUF_IN_URBS; ++index) {
+		usb_fill_bulk_urb(
+			&(wa->buf_in_urbs[index]), wa->usb_dev,
+			usb_rcvbulkpipe(wa->usb_dev,
+				0x80 | dti_epd->bEndpointAddress),
+			NULL, 0, wa_buf_in_cb, wa);
+	}
+	result = usb_submit_urb(wa->dti_urb, GFP_KERNEL);
+	if (result < 0) {
+		dev_err(dev, "DTI Error: Could not submit DTI URB (%d) resetting\n",
+			result);
+		goto error_dti_urb_submit;
+	}
+out:
+	return 0;
+
+error_dti_urb_submit:
+	usb_put_urb(wa->dti_urb);
+	wa->dti_urb = NULL;
+error_dti_urb_alloc:
+	return result;
+}
+EXPORT_SYMBOL_GPL(wa_dti_start);
+/*
+ * Transfer complete notification
+ *
+ * Called from the notif.c code. We get a notification on EP2 saying
+ * that some endpoint has some transfer result data available. We are
+ * about to read it.
+ *
+ * To speed up things, we always have a URB reading the DTI URB; we
+ * don't really set it up and start it until the first xfer complete
+ * notification arrives, which is what we do here.
+ *
+ * Follow up in wa_dti_cb(), as that's where the whole state
+ * machine starts.
+ *
+ * @wa shall be referenced
+ */
+void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr)
+{
+	struct device *dev = &wa->usb_iface->dev;
+	struct wa_notif_xfer *notif_xfer;
+	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
+
+	notif_xfer = container_of(notif_hdr, struct wa_notif_xfer, hdr);
+	BUG_ON(notif_hdr->bNotifyType != WA_NOTIF_TRANSFER);
+
+	if ((0x80 | notif_xfer->bEndpoint) != dti_epd->bEndpointAddress) {
+		/* FIXME: hardcoded limitation, adapt */
+		dev_err(dev, "BUG: DTI ep is %u, not %u (hack me)\n",
+			notif_xfer->bEndpoint, dti_epd->bEndpointAddress);
+		goto error;
+	}
+
+	/* attempt to start the DTI ep processing. */
+	if (wa_dti_start(wa) < 0)
+		goto error;
+
+	return;
+
+error:
+	wa_reset_all(wa);
+}
diff --git a/drivers/staging/wusbcore/wusbhc.c b/drivers/staging/wusbcore/wusbhc.c
new file mode 100644
index 000000000000..d0b404d258e8
--- /dev/null
+++ b/drivers/staging/wusbcore/wusbhc.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Host Controller
+ * sysfs glue, wusbcore module support and life cycle management
+ *
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * Creation/destruction of wusbhc is split in two parts; that that
+ * doesn't require the HCD to be added (wusbhc_{create,destroy}) and
+ * the one that requires (phase B, wusbhc_b_{create,destroy}).
+ *
+ * This is so because usb_add_hcd() will start the HC, and thus, all
+ * the HC specific stuff has to be already initialized (like sysfs
+ * thingies).
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include "wusbhc.h"
+
+/**
+ * Extract the wusbhc that corresponds to a USB Host Controller class device
+ *
+ * WARNING! Apply only if @dev is that of a
+ *          wusbhc.usb_hcd.self->class_dev; otherwise, you loose.
+ */
+static struct wusbhc *usbhc_dev_to_wusbhc(struct device *dev)
+{
+	struct usb_bus *usb_bus = dev_get_drvdata(dev);
+	struct usb_hcd *usb_hcd = bus_to_hcd(usb_bus);
+	return usb_hcd_to_wusbhc(usb_hcd);
+}
+
+/*
+ * Show & store the current WUSB trust timeout
+ *
+ * We don't do locking--it is an 'atomic' value.
+ *
+ * The units that we store/show are always MILLISECONDS. However, the
+ * value of trust_timeout is jiffies.
+ */
+static ssize_t wusb_trust_timeout_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", wusbhc->trust_timeout);
+}
+
+static ssize_t wusb_trust_timeout_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t size)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	ssize_t result = -ENOSYS;
+	unsigned trust_timeout;
+
+	result = sscanf(buf, "%u", &trust_timeout);
+	if (result != 1) {
+		result = -EINVAL;
+		goto out;
+	}
+	wusbhc->trust_timeout = min_t(unsigned, trust_timeout, 500);
+	cancel_delayed_work(&wusbhc->keep_alive_timer);
+	flush_workqueue(wusbd);
+	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
+			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
+out:
+	return result < 0 ? result : size;
+}
+static DEVICE_ATTR_RW(wusb_trust_timeout);
+
+/*
+ * Show the current WUSB CHID.
+ */
+static ssize_t wusb_chid_show(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	const struct wusb_ckhdid *chid;
+
+	if (wusbhc->wuie_host_info != NULL)
+		chid = &wusbhc->wuie_host_info->CHID;
+	else
+		chid = &wusb_ckhdid_zero;
+
+	return sprintf(buf, "%16ph\n", chid->data);
+}
+
+/*
+ * Store a new CHID.
+ *
+ * - Write an all zeros CHID and it will stop the controller
+ * - Write a non-zero CHID and it will start it.
+ *
+ * See wusbhc_chid_set() for more info.
+ */
+static ssize_t wusb_chid_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t size)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	struct wusb_ckhdid chid;
+	ssize_t result;
+
+	result = sscanf(buf,
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx "
+			"%02hhx %02hhx %02hhx %02hhx\n",
+			&chid.data[0] , &chid.data[1] ,
+			&chid.data[2] , &chid.data[3] ,
+			&chid.data[4] , &chid.data[5] ,
+			&chid.data[6] , &chid.data[7] ,
+			&chid.data[8] , &chid.data[9] ,
+			&chid.data[10], &chid.data[11],
+			&chid.data[12], &chid.data[13],
+			&chid.data[14], &chid.data[15]);
+	if (result != 16) {
+		dev_err(dev, "Unrecognized CHID (need 16 8-bit hex digits): "
+			"%d\n", (int)result);
+		return -EINVAL;
+	}
+	result = wusbhc_chid_set(wusbhc, &chid);
+	return result < 0 ? result : size;
+}
+static DEVICE_ATTR_RW(wusb_chid);
+
+
+static ssize_t wusb_phy_rate_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+
+	return sprintf(buf, "%d\n", wusbhc->phy_rate);
+}
+
+static ssize_t wusb_phy_rate_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	uint8_t phy_rate;
+	ssize_t result;
+
+	result = sscanf(buf, "%hhu", &phy_rate);
+	if (result != 1)
+		return -EINVAL;
+	if (phy_rate >= UWB_PHY_RATE_INVALID)
+		return -EINVAL;
+
+	wusbhc->phy_rate = phy_rate;
+	return size;
+}
+static DEVICE_ATTR_RW(wusb_phy_rate);
+
+static ssize_t wusb_dnts_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+
+	return sprintf(buf, "num slots: %d\ninterval: %dms\n",
+			wusbhc->dnts_num_slots, wusbhc->dnts_interval);
+}
+
+static ssize_t wusb_dnts_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	uint8_t num_slots, interval;
+	ssize_t result;
+
+	result = sscanf(buf, "%hhu %hhu", &num_slots, &interval);
+
+	if (result != 2)
+		return -EINVAL;
+
+	wusbhc->dnts_num_slots = num_slots;
+	wusbhc->dnts_interval = interval;
+
+	return size;
+}
+static DEVICE_ATTR_RW(wusb_dnts);
+
+static ssize_t wusb_retry_count_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *buf)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+
+	return sprintf(buf, "%d\n", wusbhc->retry_count);
+}
+
+static ssize_t wusb_retry_count_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
+	uint8_t retry_count;
+	ssize_t result;
+
+	result = sscanf(buf, "%hhu", &retry_count);
+
+	if (result != 1)
+		return -EINVAL;
+
+	wusbhc->retry_count = max_t(uint8_t, retry_count,
+					WUSB_RETRY_COUNT_MAX);
+
+	return size;
+}
+static DEVICE_ATTR_RW(wusb_retry_count);
+
+/* Group all the WUSBHC attributes */
+static struct attribute *wusbhc_attrs[] = {
+		&dev_attr_wusb_trust_timeout.attr,
+		&dev_attr_wusb_chid.attr,
+		&dev_attr_wusb_phy_rate.attr,
+		&dev_attr_wusb_dnts.attr,
+		&dev_attr_wusb_retry_count.attr,
+		NULL,
+};
+
+static const struct attribute_group wusbhc_attr_group = {
+	.name = NULL,	/* we want them in the same directory */
+	.attrs = wusbhc_attrs,
+};
+
+/*
+ * Create a wusbhc instance
+ *
+ * NOTEs:
+ *
+ *  - assumes *wusbhc has been zeroed and wusbhc->usb_hcd has been
+ *    initialized but not added.
+ *
+ *  - fill out ports_max, mmcies_max and mmcie_{add,rm} before calling.
+ *
+ *  - fill out wusbhc->uwb_rc and refcount it before calling
+ *  - fill out the wusbhc->sec_modes array
+ */
+int wusbhc_create(struct wusbhc *wusbhc)
+{
+	int result = 0;
+
+	/* set defaults.  These can be overwritten using sysfs attributes. */
+	wusbhc->trust_timeout = WUSB_TRUST_TIMEOUT_MS;
+	wusbhc->phy_rate = UWB_PHY_RATE_INVALID - 1;
+	wusbhc->dnts_num_slots = 4;
+	wusbhc->dnts_interval = 2;
+	wusbhc->retry_count = WUSB_RETRY_COUNT_INFINITE;
+
+	mutex_init(&wusbhc->mutex);
+	result = wusbhc_mmcie_create(wusbhc);
+	if (result < 0)
+		goto error_mmcie_create;
+	result = wusbhc_devconnect_create(wusbhc);
+	if (result < 0)
+		goto error_devconnect_create;
+	result = wusbhc_rh_create(wusbhc);
+	if (result < 0)
+		goto error_rh_create;
+	result = wusbhc_sec_create(wusbhc);
+	if (result < 0)
+		goto error_sec_create;
+	return 0;
+
+error_sec_create:
+	wusbhc_rh_destroy(wusbhc);
+error_rh_create:
+	wusbhc_devconnect_destroy(wusbhc);
+error_devconnect_create:
+	wusbhc_mmcie_destroy(wusbhc);
+error_mmcie_create:
+	return result;
+}
+EXPORT_SYMBOL_GPL(wusbhc_create);
+
+static inline struct kobject *wusbhc_kobj(struct wusbhc *wusbhc)
+{
+	return &wusbhc->usb_hcd.self.controller->kobj;
+}
+
+/*
+ * Phase B of a wusbhc instance creation
+ *
+ * Creates fields that depend on wusbhc->usb_hcd having been
+ * added. This is where we create the sysfs files in
+ * /sys/class/usb_host/usb_hostX/.
+ *
+ * NOTE: Assumes wusbhc->usb_hcd has been already added by the upper
+ *       layer (hwahc or whci)
+ */
+int wusbhc_b_create(struct wusbhc *wusbhc)
+{
+	int result = 0;
+	struct device *dev = wusbhc->usb_hcd.self.controller;
+
+	result = sysfs_create_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group);
+	if (result < 0) {
+		dev_err(dev, "Cannot register WUSBHC attributes: %d\n",
+			result);
+		goto error_create_attr_group;
+	}
+
+	return 0;
+error_create_attr_group:
+	return result;
+}
+EXPORT_SYMBOL_GPL(wusbhc_b_create);
+
+void wusbhc_b_destroy(struct wusbhc *wusbhc)
+{
+	wusbhc_pal_unregister(wusbhc);
+	sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group);
+}
+EXPORT_SYMBOL_GPL(wusbhc_b_destroy);
+
+void wusbhc_destroy(struct wusbhc *wusbhc)
+{
+	wusbhc_sec_destroy(wusbhc);
+	wusbhc_rh_destroy(wusbhc);
+	wusbhc_devconnect_destroy(wusbhc);
+	wusbhc_mmcie_destroy(wusbhc);
+}
+EXPORT_SYMBOL_GPL(wusbhc_destroy);
+
+struct workqueue_struct *wusbd;
+EXPORT_SYMBOL_GPL(wusbd);
+
+/*
+ * WUSB Cluster ID allocation map
+ *
+ * Each WUSB bus in a channel is identified with a Cluster Id in the
+ * unauth address pace (WUSB1.0[4.3]). We take the range 0xe0 to 0xff
+ * (that's space for 31 WUSB controllers, as 0xff can't be taken). We
+ * start taking from 0xff, 0xfe, 0xfd... (hence the += or -= 0xff).
+ *
+ * For each one we taken, we pin it in the bitap
+ */
+#define CLUSTER_IDS 32
+static DECLARE_BITMAP(wusb_cluster_id_table, CLUSTER_IDS);
+static DEFINE_SPINLOCK(wusb_cluster_ids_lock);
+
+/*
+ * Get a WUSB Cluster ID
+ *
+ * Need to release with wusb_cluster_id_put() when done w/ it.
+ */
+/* FIXME: coordinate with the choose_addres() from the USB stack */
+/* we want to leave the top of the 128 range for cluster addresses and
+ * the bottom for device addresses (as we map them one on one with
+ * ports). */
+u8 wusb_cluster_id_get(void)
+{
+	u8 id;
+	spin_lock(&wusb_cluster_ids_lock);
+	id = find_first_zero_bit(wusb_cluster_id_table, CLUSTER_IDS);
+	if (id >= CLUSTER_IDS) {
+		id = 0;
+		goto out;
+	}
+	set_bit(id, wusb_cluster_id_table);
+	id = (u8) 0xff - id;
+out:
+	spin_unlock(&wusb_cluster_ids_lock);
+	return id;
+
+}
+EXPORT_SYMBOL_GPL(wusb_cluster_id_get);
+
+/*
+ * Release a WUSB Cluster ID
+ *
+ * Obtained it with wusb_cluster_id_get()
+ */
+void wusb_cluster_id_put(u8 id)
+{
+	id = 0xff - id;
+	BUG_ON(id >= CLUSTER_IDS);
+	spin_lock(&wusb_cluster_ids_lock);
+	WARN_ON(!test_bit(id, wusb_cluster_id_table));
+	clear_bit(id, wusb_cluster_id_table);
+	spin_unlock(&wusb_cluster_ids_lock);
+}
+EXPORT_SYMBOL_GPL(wusb_cluster_id_put);
+
+/**
+ * wusbhc_giveback_urb - return an URB to the USB core
+ * @wusbhc: the host controller the URB is from.
+ * @urb:    the URB.
+ * @status: the URB's status.
+ *
+ * Return an URB to the USB core doing some additional WUSB specific
+ * processing.
+ *
+ *  - After a successful transfer, update the trust timeout timestamp
+ *    for the WUSB device.
+ *
+ *  - [WUSB] sections 4.13 and 7.5.1 specify the stop retransmission
+ *    condition for the WCONNECTACK_IE is that the host has observed
+ *    the associated device responding to a control transfer.
+ */
+void wusbhc_giveback_urb(struct wusbhc *wusbhc, struct urb *urb, int status)
+{
+	struct wusb_dev *wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc,
+					urb->dev);
+
+	if (status == 0 && wusb_dev) {
+		wusb_dev->entry_ts = jiffies;
+
+		/* wusbhc_devconnect_acked() can't be called from
+		   atomic context so defer it to a work queue. */
+		if (!list_empty(&wusb_dev->cack_node))
+			queue_work(wusbd, &wusb_dev->devconnect_acked_work);
+		else
+			wusb_dev_put(wusb_dev);
+	}
+
+	usb_hcd_giveback_urb(&wusbhc->usb_hcd, urb, status);
+}
+EXPORT_SYMBOL_GPL(wusbhc_giveback_urb);
+
+/**
+ * wusbhc_reset_all - reset the HC hardware
+ * @wusbhc: the host controller to reset.
+ *
+ * Request a full hardware reset of the chip.  This will also reset
+ * the radio controller and any other PALs.
+ */
+void wusbhc_reset_all(struct wusbhc *wusbhc)
+{
+	if (wusbhc->uwb_rc)
+		uwb_rc_reset_all(wusbhc->uwb_rc);
+}
+EXPORT_SYMBOL_GPL(wusbhc_reset_all);
+
+static struct notifier_block wusb_usb_notifier = {
+	.notifier_call = wusb_usb_ncb,
+	.priority = INT_MAX	/* Need to be called first of all */
+};
+
+static int __init wusbcore_init(void)
+{
+	int result;
+	result = wusb_crypto_init();
+	if (result < 0)
+		goto error_crypto_init;
+	/* WQ is singlethread because we need to serialize notifications */
+	wusbd = create_singlethread_workqueue("wusbd");
+	if (wusbd == NULL) {
+		result = -ENOMEM;
+		printk(KERN_ERR "WUSB-core: Cannot create wusbd workqueue\n");
+		goto error_wusbd_create;
+	}
+	usb_register_notify(&wusb_usb_notifier);
+	bitmap_zero(wusb_cluster_id_table, CLUSTER_IDS);
+	set_bit(0, wusb_cluster_id_table);	/* reserve Cluster ID 0xff */
+	return 0;
+
+error_wusbd_create:
+	wusb_crypto_exit();
+error_crypto_init:
+	return result;
+
+}
+module_init(wusbcore_init);
+
+static void __exit wusbcore_exit(void)
+{
+	clear_bit(0, wusb_cluster_id_table);
+	if (!bitmap_empty(wusb_cluster_id_table, CLUSTER_IDS)) {
+		printk(KERN_ERR "BUG: WUSB Cluster IDs not released on exit: %*pb\n",
+		       CLUSTER_IDS, wusb_cluster_id_table);
+		WARN_ON(1);
+	}
+	usb_unregister_notify(&wusb_usb_notifier);
+	destroy_workqueue(wusbd);
+	wusb_crypto_exit();
+}
+module_exit(wusbcore_exit);
+
+MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
+MODULE_DESCRIPTION("Wireless USB core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/wusbcore/wusbhc.h b/drivers/staging/wusbcore/wusbhc.h
new file mode 100644
index 000000000000..716244a2ec44
--- /dev/null
+++ b/drivers/staging/wusbcore/wusbhc.h
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wireless USB Host Controller
+ * Common infrastructure for WHCI and HWA WUSB-HC drivers
+ *
+ *
+ * Copyright (C) 2005-2006 Intel Corporation
+ * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+ *
+ * This driver implements parts common to all Wireless USB Host
+ * Controllers (struct wusbhc, embedding a struct usb_hcd) and is used
+ * by:
+ *
+ *   - hwahc: HWA, USB-dongle that implements a Wireless USB host
+ *     controller, (Wireless USB 1.0 Host-Wire-Adapter specification).
+ *
+ *   - whci: WHCI, a PCI card with a wireless host controller
+ *     (Wireless Host Controller Interface 1.0 specification).
+ *
+ * Check out the Design-overview.txt file in the source documentation
+ * for other details on the implementation.
+ *
+ * Main blocks:
+ *
+ *  rh         Root Hub emulation (part of the HCD glue)
+ *
+ *  devconnect Handle all the issues related to device connection,
+ *             authentication, disconnection, timeout, reseting,
+ *             keepalives, etc.
+ *
+ *  mmc        MMC IE broadcasting handling
+ *
+ * A host controller driver just initializes its stuff and as part of
+ * that, creates a 'struct wusbhc' instance that handles all the
+ * common WUSB mechanisms. Links in the function ops that are specific
+ * to it and then registers the host controller. Ready to run.
+ */
+
+#ifndef __WUSBHC_H__
+#define __WUSBHC_H__
+
+#include <linux/usb.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <linux/usb/hcd.h>
+#include "../uwb/uwb.h"
+#include "include/wusb.h"
+
+/*
+ * Time from a WUSB channel stop request to the last transmitted MMC.
+ *
+ * This needs to be > 4.096 ms in case no MMCs can be transmitted in
+ * zone 0.
+ */
+#define WUSB_CHANNEL_STOP_DELAY_MS 8
+#define WUSB_RETRY_COUNT_MAX 15
+#define WUSB_RETRY_COUNT_INFINITE 0
+
+/**
+ * Wireless USB device
+ *
+ * Describe a WUSB device connected to the cluster. This struct
+ * belongs to the 'struct wusb_port' it is attached to and it is
+ * responsible for putting and clearing the pointer to it.
+ *
+ * Note this "complements" the 'struct usb_device' that the usb_hcd
+ * keeps for each connected USB device. However, it extends some
+ * information that is not available (there is no hcpriv ptr in it!)
+ * *and* most importantly, it's life cycle is different. It is created
+ * as soon as we get a DN_Connect (connect request notification) from
+ * the device through the WUSB host controller; the USB stack doesn't
+ * create the device until we authenticate it. FIXME: this will
+ * change.
+ *
+ * @bos:    This is allocated when the BOS descriptors are read from
+ *          the device and freed upon the wusb_dev struct dying.
+ * @wusb_cap_descr: points into @bos, and has been verified to be size
+ *                  safe.
+ */
+struct wusb_dev {
+	struct kref refcnt;
+	struct wusbhc *wusbhc;
+	struct list_head cack_node;	/* Connect-Ack list */
+	struct list_head rekey_node;	/* GTK rekey list */
+	u8 port_idx;
+	u8 addr;
+	u8 beacon_type:4;
+	struct usb_encryption_descriptor ccm1_etd;
+	struct wusb_ckhdid cdid;
+	unsigned long entry_ts;
+	struct usb_bos_descriptor *bos;
+	struct usb_wireless_cap_descriptor *wusb_cap_descr;
+	struct uwb_mas_bm availability;
+	struct work_struct devconnect_acked_work;
+	struct usb_device *usb_dev;
+};
+
+#define WUSB_DEV_ADDR_UNAUTH 0x80
+
+static inline void wusb_dev_init(struct wusb_dev *wusb_dev)
+{
+	kref_init(&wusb_dev->refcnt);
+	/* no need to init the cack_node */
+}
+
+extern void wusb_dev_destroy(struct kref *_wusb_dev);
+
+static inline struct wusb_dev *wusb_dev_get(struct wusb_dev *wusb_dev)
+{
+	kref_get(&wusb_dev->refcnt);
+	return wusb_dev;
+}
+
+static inline void wusb_dev_put(struct wusb_dev *wusb_dev)
+{
+	kref_put(&wusb_dev->refcnt, wusb_dev_destroy);
+}
+
+/**
+ * Wireless USB Host Controller root hub "fake" ports
+ * (state and device information)
+ *
+ * Wireless USB is wireless, so there are no ports; but we
+ * fake'em. Each RC can connect a max of devices at the same time
+ * (given in the Wireless Adapter descriptor, bNumPorts or WHCI's
+ * caps), referred to in wusbhc->ports_max.
+ *
+ * See rh.c for more information.
+ *
+ * The @status and @change use the same bits as in USB2.0[11.24.2.7],
+ * so we don't have to do much when getting the port's status.
+ *
+ * WUSB1.0[7.1], USB2.0[11.24.2.7.1,fig 11-10],
+ * include/linux/usb_ch9.h (#define USB_PORT_STAT_*)
+ */
+struct wusb_port {
+	u16 status;
+	u16 change;
+	struct wusb_dev *wusb_dev;	/* connected device's info */
+	u32 ptk_tkid;
+};
+
+/**
+ * WUSB Host Controller specifics
+ *
+ * All fields that are common to all Wireless USB controller types
+ * (HWA and WHCI) are grouped here. Host Controller
+ * functions/operations that only deal with general Wireless USB HC
+ * issues use this data type to refer to the host.
+ *
+ * @usb_hcd	   Instantiation of a USB host controller
+ *                 (initialized by upper layer [HWA=HC or WHCI].
+ *
+ * @dev		   Device that implements this; initialized by the
+ *                 upper layer (HWA-HC, WHCI...); this device should
+ *                 have a refcount.
+ *
+ * @trust_timeout  After this time without hearing for device
+ *                 activity, we consider the device gone and we have to
+ *                 re-authenticate.
+ *
+ *                 Can be accessed w/o locking--however, read to a
+ *                 local variable then use.
+ *
+ * @chid           WUSB Cluster Host ID: this is supposed to be a
+ *                 unique value that doesn't change across reboots (so
+ *                 that your devices do not require re-association).
+ *
+ *                 Read/Write protected by @mutex
+ *
+ * @dev_info       This array has ports_max elements. It is used to
+ *                 give the HC information about the WUSB devices (see
+ *                 'struct wusb_dev_info').
+ *
+ *	           For HWA we need to allocate it in heap; for WHCI it
+ *                 needs to be permanently mapped, so we keep it for
+ *                 both and make it easy. Call wusbhc->dev_info_set()
+ *                 to update an entry.
+ *
+ * @ports_max	   Number of simultaneous device connections (fake
+ *                 ports) this HC will take. Read-only.
+ *
+ * @port	   Array of port status for each fake root port. Guaranteed to
+ *                 always be the same length during device existence
+ *                 [this allows for some unlocked but referenced reading].
+ *
+ * @mmcies_max	   Max number of Information Elements this HC can send
+ *                 in its MMC. Read-only.
+ *
+ * @start          Start the WUSB channel.
+ *
+ * @stop           Stop the WUSB channel after the specified number of
+ *                 milliseconds.  Channel Stop IEs should be transmitted
+ *                 as required by [WUSB] 4.16.2.1.
+ *
+ * @mmcie_add	   HC specific operation (WHCI or HWA) for adding an
+ *                 MMCIE.
+ *
+ * @mmcie_rm	   HC specific operation (WHCI or HWA) for removing an
+ *                 MMCIE.
+ *
+ * @set_ptk:       Set the PTK and enable encryption for a device. Or, if
+ *                 the supplied key is NULL, disable encryption for that
+ *                 device.
+ *
+ * @set_gtk:       Set the GTK to be used for all future broadcast packets
+ *                 (i.e., MMCs).  With some hardware, setting the GTK may start
+ *                 MMC transmission.
+ *
+ * NOTE:
+ *
+ *  - If wusb_dev->usb_dev is not NULL, then usb_dev is valid
+ *    (wusb_dev has a refcount on it). Likewise, if usb_dev->wusb_dev
+ *    is not NULL, usb_dev->wusb_dev is valid (usb_dev keeps a
+ *    refcount on it).
+ *
+ *    Most of the times when you need to use it, it will be non-NULL,
+ *    so there is no real need to check for it (wusb_dev will
+ *    disappear before usb_dev).
+ *
+ *  - The following fields need to be filled out before calling
+ *    wusbhc_create(): ports_max, mmcies_max, mmcie_{add,rm}.
+ *
+ *  - there is no wusbhc_init() method, we do everything in
+ *    wusbhc_create().
+ *
+ *  - Creation is done in two phases, wusbhc_create() and
+ *    wusbhc_create_b(); b are the parts that need to be called after
+ *    calling usb_hcd_add(&wusbhc->usb_hcd).
+ */
+struct wusbhc {
+	struct usb_hcd usb_hcd;		/* HAS TO BE 1st */
+	struct device *dev;
+	struct uwb_rc *uwb_rc;
+	struct uwb_pal pal;
+
+	unsigned trust_timeout;			/* in jiffies */
+	struct wusb_ckhdid chid;
+	uint8_t phy_rate;
+	uint8_t dnts_num_slots;
+	uint8_t dnts_interval;
+	uint8_t retry_count;
+	struct wuie_host_info *wuie_host_info;
+
+	struct mutex mutex;			/* locks everything else */
+	u16 cluster_id;				/* Wireless USB Cluster ID */
+	struct wusb_port *port;			/* Fake port status handling */
+	struct wusb_dev_info *dev_info;		/* for Set Device Info mgmt */
+	u8 ports_max;
+	unsigned active:1;			/* currently xmit'ing MMCs */
+	struct wuie_keep_alive keep_alive_ie;	/* protected by mutex */
+	struct delayed_work keep_alive_timer;
+	struct list_head cack_list;		/* Connect acknowledging */
+	size_t cack_count;			/* protected by 'mutex' */
+	struct wuie_connect_ack cack_ie;
+	struct uwb_rsv *rsv;		/* cluster bandwidth reservation */
+
+	struct mutex mmcie_mutex;		/* MMC WUIE handling */
+	struct wuie_hdr **mmcie;		/* WUIE array */
+	u8 mmcies_max;
+	/* FIXME: make wusbhc_ops? */
+	int (*start)(struct wusbhc *wusbhc);
+	void (*stop)(struct wusbhc *wusbhc, int delay);
+	int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
+			 u8 handle, struct wuie_hdr *wuie);
+	int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle);
+	int (*dev_info_set)(struct wusbhc *, struct wusb_dev *wusb_dev);
+	int (*bwa_set)(struct wusbhc *wusbhc, s8 stream_index,
+		       const struct uwb_mas_bm *);
+	int (*set_ptk)(struct wusbhc *wusbhc, u8 port_idx,
+		       u32 tkid, const void *key, size_t key_size);
+	int (*set_gtk)(struct wusbhc *wusbhc,
+		       u32 tkid, const void *key, size_t key_size);
+	int (*set_num_dnts)(struct wusbhc *wusbhc, u8 interval, u8 slots);
+
+	struct {
+		struct usb_key_descriptor descr;
+		u8 data[16];				/* GTK key data */
+	} __attribute__((packed)) gtk;
+	u8 gtk_index;
+	u32 gtk_tkid;
+
+	/* workqueue for WUSB security related tasks. */
+	struct workqueue_struct *wq_security;
+	struct work_struct gtk_rekey_work;
+
+	struct usb_encryption_descriptor *ccm1_etd;
+};
+
+#define usb_hcd_to_wusbhc(u) container_of((u), struct wusbhc, usb_hcd)
+
+
+extern int wusbhc_create(struct wusbhc *);
+extern int wusbhc_b_create(struct wusbhc *);
+extern void wusbhc_b_destroy(struct wusbhc *);
+extern void wusbhc_destroy(struct wusbhc *);
+extern int wusb_dev_sysfs_add(struct wusbhc *, struct usb_device *,
+			      struct wusb_dev *);
+extern void wusb_dev_sysfs_rm(struct wusb_dev *);
+extern int wusbhc_sec_create(struct wusbhc *);
+extern int wusbhc_sec_start(struct wusbhc *);
+extern void wusbhc_sec_stop(struct wusbhc *);
+extern void wusbhc_sec_destroy(struct wusbhc *);
+extern void wusbhc_giveback_urb(struct wusbhc *wusbhc, struct urb *urb,
+				int status);
+void wusbhc_reset_all(struct wusbhc *wusbhc);
+
+int wusbhc_pal_register(struct wusbhc *wusbhc);
+void wusbhc_pal_unregister(struct wusbhc *wusbhc);
+
+/*
+ * Return @usb_dev's @usb_hcd (properly referenced) or NULL if gone
+ *
+ * @usb_dev: USB device, UNLOCKED and referenced (or otherwise, safe ptr)
+ *
+ * This is a safe assumption as @usb_dev->bus is referenced all the
+ * time during the @usb_dev life cycle.
+ */
+static inline
+struct usb_hcd *usb_hcd_get_by_usb_dev(struct usb_device *usb_dev)
+{
+	struct usb_hcd *usb_hcd;
+	usb_hcd = bus_to_hcd(usb_dev->bus);
+	return usb_get_hcd(usb_hcd);
+}
+
+/*
+ * Increment the reference count on a wusbhc.
+ *
+ * @wusbhc's life cycle is identical to that of the underlying usb_hcd.
+ */
+static inline struct wusbhc *wusbhc_get(struct wusbhc *wusbhc)
+{
+	return usb_get_hcd(&wusbhc->usb_hcd) ? wusbhc : NULL;
+}
+
+/*
+ * Return the wusbhc associated to a @usb_dev
+ *
+ * @usb_dev: USB device, UNLOCKED and referenced (or otherwise, safe ptr)
+ *
+ * @returns: wusbhc for @usb_dev; NULL if the @usb_dev is being torn down.
+ *           WARNING: referenced at the usb_hcd level, unlocked
+ *
+ * FIXME: move offline
+ */
+static inline struct wusbhc *wusbhc_get_by_usb_dev(struct usb_device *usb_dev)
+{
+	struct wusbhc *wusbhc = NULL;
+	struct usb_hcd *usb_hcd;
+	if (usb_dev->devnum > 1 && !usb_dev->wusb) {
+		/* but root hubs */
+		dev_err(&usb_dev->dev, "devnum %d wusb %d\n", usb_dev->devnum,
+			usb_dev->wusb);
+		BUG_ON(usb_dev->devnum > 1 && !usb_dev->wusb);
+	}
+	usb_hcd = usb_hcd_get_by_usb_dev(usb_dev);
+	if (usb_hcd == NULL)
+		return NULL;
+	BUG_ON(usb_hcd->wireless == 0);
+	return wusbhc = usb_hcd_to_wusbhc(usb_hcd);
+}
+
+
+static inline void wusbhc_put(struct wusbhc *wusbhc)
+{
+	usb_put_hcd(&wusbhc->usb_hcd);
+}
+
+int wusbhc_start(struct wusbhc *wusbhc);
+void wusbhc_stop(struct wusbhc *wusbhc);
+extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *);
+
+/* Device connect handling */
+extern int wusbhc_devconnect_create(struct wusbhc *);
+extern void wusbhc_devconnect_destroy(struct wusbhc *);
+extern int wusbhc_devconnect_start(struct wusbhc *wusbhc);
+extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
+extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
+			     struct wusb_dn_hdr *dn_hdr, size_t size);
+extern void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port);
+extern int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
+			void *priv);
+extern int wusb_set_dev_addr(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
+			     u8 addr);
+
+/* Wireless USB fake Root Hub methods */
+extern int wusbhc_rh_create(struct wusbhc *);
+extern void wusbhc_rh_destroy(struct wusbhc *);
+
+extern int wusbhc_rh_status_data(struct usb_hcd *, char *);
+extern int wusbhc_rh_control(struct usb_hcd *, u16, u16, u16, char *, u16);
+extern int wusbhc_rh_start_port_reset(struct usb_hcd *, unsigned);
+
+/* MMC handling */
+extern int wusbhc_mmcie_create(struct wusbhc *);
+extern void wusbhc_mmcie_destroy(struct wusbhc *);
+extern int wusbhc_mmcie_set(struct wusbhc *, u8 interval, u8 repeat_cnt,
+			    struct wuie_hdr *);
+extern void wusbhc_mmcie_rm(struct wusbhc *, struct wuie_hdr *);
+
+/* Bandwidth reservation */
+int wusbhc_rsv_establish(struct wusbhc *wusbhc);
+void wusbhc_rsv_terminate(struct wusbhc *wusbhc);
+
+/*
+ * I've always said
+ * I wanted a wedding in a church...
+ *
+ * but lately I've been thinking about
+ * the Botanical Gardens.
+ *
+ * We could do it by the tulips.
+ * It'll be beautiful
+ *
+ * --Security!
+ */
+extern int wusb_dev_sec_add(struct wusbhc *, struct usb_device *,
+				struct wusb_dev *);
+extern void wusb_dev_sec_rm(struct wusb_dev *) ;
+extern int wusb_dev_4way_handshake(struct wusbhc *, struct wusb_dev *,
+				   struct wusb_ckhdid *ck);
+void wusbhc_gtk_rekey(struct wusbhc *wusbhc);
+int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
+
+
+/* WUSB Cluster ID handling */
+extern u8 wusb_cluster_id_get(void);
+extern void wusb_cluster_id_put(u8);
+
+/*
+ * wusb_port_by_idx - return the port associated to a zero-based port index
+ *
+ * NOTE: valid without locking as long as wusbhc is referenced (as the
+ *       number of ports doesn't change). The data pointed to has to
+ *       be verified though :)
+ */
+static inline struct wusb_port *wusb_port_by_idx(struct wusbhc *wusbhc,
+						 u8 port_idx)
+{
+	return &wusbhc->port[port_idx];
+}
+
+/*
+ * wusb_port_no_to_idx - Convert port number (per usb_dev->portnum) to
+ * a port_idx.
+ *
+ * USB stack USB ports are 1 based!!
+ *
+ * NOTE: only valid for WUSB devices!!!
+ */
+static inline u8 wusb_port_no_to_idx(u8 port_no)
+{
+	return port_no - 1;
+}
+
+extern struct wusb_dev *__wusb_dev_get_by_usb_dev(struct wusbhc *,
+						  struct usb_device *);
+
+/*
+ * Return a referenced wusb_dev given a @usb_dev
+ *
+ * Returns NULL if the usb_dev is being torn down.
+ *
+ * FIXME: move offline
+ */
+static inline
+struct wusb_dev *wusb_dev_get_by_usb_dev(struct usb_device *usb_dev)
+{
+	struct wusbhc *wusbhc;
+	struct wusb_dev *wusb_dev;
+	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
+	if (wusbhc == NULL)
+		return NULL;
+	mutex_lock(&wusbhc->mutex);
+	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, usb_dev);
+	mutex_unlock(&wusbhc->mutex);
+	wusbhc_put(wusbhc);
+	return wusb_dev;
+}
+
+/* Misc */
+
+extern struct workqueue_struct *wusbd;
+#endif /* #ifndef __WUSBHC_H__ */
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 6e59d370ef81..9987c399819f 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -98,8 +98,6 @@ source "drivers/usb/core/Kconfig"
 
 source "drivers/usb/mon/Kconfig"
 
-source "drivers/usb/wusbcore/Kconfig"
-
 source "drivers/usb/host/Kconfig"
 
 source "drivers/usb/renesas_usbhs/Kconfig"
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index ecc2de1ffaae..db064dd59e08 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -35,8 +35,6 @@ obj-$(CONFIG_USB_MAX3421_HCD)	+= host/
 
 obj-$(CONFIG_USB_C67X00_HCD)	+= c67x00/
 
-obj-$(CONFIG_USB_WUSB)		+= wusbcore/
-
 obj-$(CONFIG_USB_ACM)		+= class/
 obj-$(CONFIG_USB_PRINTER)	+= class/
 obj-$(CONFIG_USB_WDM)		+= class/
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 40b5de597112..d040408f5baa 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -717,32 +717,6 @@ config USB_RENESAS_USBHS_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called renesas-usbhs.
 
-config USB_WHCI_HCD
-	tristate "Wireless USB Host Controller Interface (WHCI) driver"
-	depends on USB_PCI && USB && UWB
-	select USB_WUSB
-	select UWB_WHCI
-	help
-	  A driver for PCI-based Wireless USB Host Controllers that are
-	  compliant with the WHCI specification.
-
-	  To compile this driver a module, choose M here: the module
-	  will be called "whci-hcd".
-
-config USB_HWA_HCD
-	tristate "Host Wire Adapter (HWA) driver"
-	depends on USB && UWB
-	select USB_WUSB
-	select UWB_HWA
-	help
-	  This driver enables you to connect Wireless USB devices to
-	  your system using a Host Wire Adaptor USB dongle. This is an
-	  UWB Radio Controller and WUSB Host Controller connected to
-	  your machine via USB (specified in WUSB1.0).
-
-	  To compile this driver a module, choose M here: the module
-	  will be called "hwa-hc".
-
 config USB_IMX21_HCD
        tristate "i.MX21 HCD support"
        depends on ARM && ARCH_MXC
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 84514f71ae44..59b39e6b350b 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -35,8 +35,6 @@ ifneq ($(CONFIG_DEBUG_FS),)
 	xhci-hcd-y		+= xhci-debugfs.o
 endif
 
-obj-$(CONFIG_USB_WHCI_HCD)	+= whci/
-
 obj-$(CONFIG_USB_PCI)	+= pci-quirks.o
 
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
@@ -82,7 +80,6 @@ obj-$(CONFIG_USB_SL811_HCD)	+= sl811-hcd.o
 obj-$(CONFIG_USB_SL811_CS)	+= sl811_cs.o
 obj-$(CONFIG_USB_U132_HCD)	+= u132-hcd.o
 obj-$(CONFIG_USB_R8A66597_HCD)	+= r8a66597-hcd.o
-obj-$(CONFIG_USB_HWA_HCD)	+= hwa-hc.o
 obj-$(CONFIG_USB_IMX21_HCD)	+= imx21-hcd.o
 obj-$(CONFIG_USB_FSL_USB2)	+= fsl-mph-dr-of.o
 obj-$(CONFIG_USB_EHCI_FSL)	+= fsl-mph-dr-of.o
diff --git a/drivers/usb/host/hwa-hc.c b/drivers/usb/host/hwa-hc.c
deleted file mode 100644
index 6968b9f2b76b..000000000000
--- a/drivers/usb/host/hwa-hc.c
+++ /dev/null
@@ -1,875 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Host Wire Adapter:
- * Driver glue, HWA-specific functions, bridges to WAHC and WUSBHC
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * The HWA driver is a simple layer that forwards requests to the WAHC
- * (Wire Adater Host Controller) or WUSBHC (Wireless USB Host
- * Controller) layers.
- *
- * Host Wire Adapter is the 'WUSB 1.0 standard' name for Wireless-USB
- * Host Controller that is connected to your system via USB (a USB
- * dongle that implements a USB host...). There is also a Device Wired
- * Adaptor, DWA (Wireless USB hub) that uses the same mechanism for
- * transferring data (it is after all a USB host connected via
- * Wireless USB), we have a common layer called Wire Adapter Host
- * Controller that does all the hard work. The WUSBHC (Wireless USB
- * Host Controller) is the part common to WUSB Host Controllers, the
- * HWA and the PCI-based one, that is implemented following the WHCI
- * spec. All these layers are implemented in ../wusbcore.
- *
- * The main functions are hwahc_op_urb_{en,de}queue(), that pass the
- * job of converting a URB to a Wire Adapter
- *
- * Entry points:
- *
- *   hwahc_driver_*()   Driver initialization, registration and
- *                      teardown.
- *
- *   hwahc_probe()	New device came up, create an instance for
- *                      it [from device enumeration].
- *
- *   hwahc_disconnect()	Remove device instance [from device
- *                      enumeration].
- *
- *   [__]hwahc_op_*()   Host-Wire-Adaptor specific functions for
- *                      starting/stopping/etc (some might be made also
- *                      DWA).
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/workqueue.h>
-#include <linux/wait.h>
-#include <linux/completion.h>
-#include "../wusbcore/wa-hc.h"
-#include "../wusbcore/wusbhc.h"
-
-struct hwahc {
-	struct wusbhc wusbhc;	/* has to be 1st */
-	struct wahc wa;
-};
-
-/*
- * FIXME should be wusbhc
- *
- * NOTE: we need to cache the Cluster ID because later...there is no
- *       way to get it :)
- */
-static int __hwahc_set_cluster_id(struct hwahc *hwahc, u8 cluster_id)
-{
-	int result;
-	struct wusbhc *wusbhc = &hwahc->wusbhc;
-	struct wahc *wa = &hwahc->wa;
-	struct device *dev = &wa->usb_iface->dev;
-
-	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_SET_CLUSTER_ID,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			cluster_id,
-			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0)
-		dev_err(dev, "Cannot set WUSB Cluster ID to 0x%02x: %d\n",
-			cluster_id, result);
-	else
-		wusbhc->cluster_id = cluster_id;
-	dev_info(dev, "Wireless USB Cluster ID set to 0x%02x\n", cluster_id);
-	return result;
-}
-
-static int __hwahc_op_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots)
-{
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-
-	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_SET_NUM_DNTS,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			interval << 8 | slots,
-			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-}
-
-/*
- * Reset a WUSB host controller and wait for it to complete doing it.
- *
- * @usb_hcd:	Pointer to WUSB Host Controller instance.
- *
- */
-static int hwahc_op_reset(struct usb_hcd *usb_hcd)
-{
-	int result;
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	mutex_lock(&wusbhc->mutex);
-	wa_nep_disarm(&hwahc->wa);
-	result = __wa_set_feature(&hwahc->wa, WA_RESET);
-	if (result < 0) {
-		dev_err(dev, "error commanding HC to reset: %d\n", result);
-		goto error_unlock;
-	}
-	result = __wa_wait_status(&hwahc->wa, WA_STATUS_RESETTING, 0);
-	if (result < 0) {
-		dev_err(dev, "error waiting for HC to reset: %d\n", result);
-		goto error_unlock;
-	}
-error_unlock:
-	mutex_unlock(&wusbhc->mutex);
-	return result;
-}
-
-/*
- * FIXME: break this function up
- */
-static int hwahc_op_start(struct usb_hcd *usb_hcd)
-{
-	u8 addr;
-	int result;
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-
-	result = -ENOSPC;
-	mutex_lock(&wusbhc->mutex);
-	addr = wusb_cluster_id_get();
-	if (addr == 0)
-		goto error_cluster_id_get;
-	result = __hwahc_set_cluster_id(hwahc, addr);
-	if (result < 0)
-		goto error_set_cluster_id;
-
-	usb_hcd->uses_new_polling = 1;
-	set_bit(HCD_FLAG_POLL_RH, &usb_hcd->flags);
-	usb_hcd->state = HC_STATE_RUNNING;
-
-	/*
-	 * prevent USB core from suspending the root hub since
-	 * bus_suspend and bus_resume are not yet supported.
-	 */
-	pm_runtime_get_noresume(&usb_hcd->self.root_hub->dev);
-
-	result = 0;
-out:
-	mutex_unlock(&wusbhc->mutex);
-	return result;
-
-error_set_cluster_id:
-	wusb_cluster_id_put(addr);
-error_cluster_id_get:
-	goto out;
-
-}
-
-/*
- * No need to abort pipes, as when this is called, all the children
- * has been disconnected and that has done it [through
- * usb_disable_interface() -> usb_disable_endpoint() ->
- * hwahc_op_ep_disable() - >rpipe_ep_disable()].
- */
-static void hwahc_op_stop(struct usb_hcd *usb_hcd)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-
-	mutex_lock(&wusbhc->mutex);
-	wusb_cluster_id_put(wusbhc->cluster_id);
-	mutex_unlock(&wusbhc->mutex);
-}
-
-static int hwahc_op_get_frame_number(struct usb_hcd *usb_hcd)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-
-	/*
-	 * We cannot query the HWA for the WUSB time since that requires sending
-	 * a synchronous URB and this function can be called in_interrupt.
-	 * Instead, query the USB frame number for our parent and use that.
-	 */
-	return usb_get_current_frame_number(wa->usb_dev);
-}
-
-static int hwahc_op_urb_enqueue(struct usb_hcd *usb_hcd, struct urb *urb,
-				gfp_t gfp)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-
-	return wa_urb_enqueue(&hwahc->wa, urb->ep, urb, gfp);
-}
-
-static int hwahc_op_urb_dequeue(struct usb_hcd *usb_hcd, struct urb *urb,
-				int status)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-
-	return wa_urb_dequeue(&hwahc->wa, urb, status);
-}
-
-/*
- * Release resources allocated for an endpoint
- *
- * If there is an associated rpipe to this endpoint, go ahead and put it.
- */
-static void hwahc_op_endpoint_disable(struct usb_hcd *usb_hcd,
-				      struct usb_host_endpoint *ep)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-
-	rpipe_ep_disable(&hwahc->wa, ep);
-}
-
-static int __hwahc_op_wusbhc_start(struct wusbhc *wusbhc)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct device *dev = &hwahc->wa.usb_iface->dev;
-
-	result = __wa_set_feature(&hwahc->wa, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error commanding HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = __wa_wait_status(&hwahc->wa, WA_ENABLE, WA_ENABLE);
-	if (result < 0) {
-		dev_err(dev, "error waiting for HC to start: %d\n", result);
-		goto error_stop;
-	}
-	result = wa_nep_arm(&hwahc->wa, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "cannot listen to notifications: %d\n", result);
-		goto error_stop;
-	}
-	/*
-	 * If WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS is set,
-	 *  disable transfer notifications.
-	 */
-	if (hwahc->wa.quirks &
-		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS) {
-		struct usb_host_interface *cur_altsetting =
-			hwahc->wa.usb_iface->cur_altsetting;
-
-		result = usb_control_msg(hwahc->wa.usb_dev,
-				usb_sndctrlpipe(hwahc->wa.usb_dev, 0),
-				WA_REQ_ALEREON_DISABLE_XFER_NOTIFICATIONS,
-				USB_DIR_OUT | USB_TYPE_VENDOR |
-					USB_RECIP_INTERFACE,
-				WA_REQ_ALEREON_FEATURE_SET,
-				cur_altsetting->desc.bInterfaceNumber,
-				NULL, 0,
-				USB_CTRL_SET_TIMEOUT);
-		/*
-		 * If we successfully sent the control message, start DTI here
-		 * because no transfer notifications will be received which is
-		 * where DTI is normally started.
-		 */
-		if (result == 0)
-			result = wa_dti_start(&hwahc->wa);
-		else
-			result = 0;	/* OK.  Continue normally. */
-
-		if (result < 0) {
-			dev_err(dev, "cannot start DTI: %d\n", result);
-			goto error_dti_start;
-		}
-	}
-
-	return result;
-
-error_dti_start:
-	wa_nep_disarm(&hwahc->wa);
-error_stop:
-	__wa_clear_feature(&hwahc->wa, WA_ENABLE);
-	return result;
-}
-
-static void __hwahc_op_wusbhc_stop(struct wusbhc *wusbhc, int delay)
-{
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	int ret;
-
-	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			      WUSB_REQ_CHAN_STOP,
-			      USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			      delay * 1000,
-			      iface_no,
-			      NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (ret == 0)
-		msleep(delay);
-
-	wa_nep_disarm(&hwahc->wa);
-	__wa_stop(&hwahc->wa);
-}
-
-/*
- * Set the UWB MAS allocation for the WUSB cluster
- *
- * @stream_index: stream to use (-1 for cancelling the allocation)
- * @mas: mas bitmap to use
- */
-static int __hwahc_op_bwa_set(struct wusbhc *wusbhc, s8 stream_index,
-			      const struct uwb_mas_bm *mas)
-{
-	int result;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	struct device *dev = &wa->usb_iface->dev;
-	u8 mas_le[UWB_NUM_MAS/8];
-
-	/* Set the stream index */
-	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_SET_STREAM_IDX,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			stream_index,
-			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Cannot set WUSB stream index: %d\n", result);
-		goto out;
-	}
-	uwb_mas_bm_copy_le(mas_le, mas);
-	/* Set the MAS allocation */
-	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_SET_WUSB_MAS,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			0, wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			mas_le, 32, USB_CTRL_SET_TIMEOUT);
-	if (result < 0)
-		dev_err(dev, "Cannot set WUSB MAS allocation: %d\n", result);
-out:
-	return result;
-}
-
-/*
- * Add an IE to the host's MMC
- *
- * @interval:    See WUSB1.0[8.5.3.1]
- * @repeat_cnt:  See WUSB1.0[8.5.3.1]
- * @handle:      See WUSB1.0[8.5.3.1]
- * @wuie:        Pointer to the header of the WUSB IE data to add.
- *               MUST BE allocated in a kmalloc buffer (no stack or
- *               vmalloc).
- *
- * NOTE: the format of the WUSB IEs for MMCs are different to the
- *       normal MBOA MAC IEs (IE Id + Length in MBOA MAC vs. Length +
- *       Id in WUSB IEs). Standards...you gotta love'em.
- */
-static int __hwahc_op_mmcie_add(struct wusbhc *wusbhc, u8 interval,
-				u8 repeat_cnt, u8 handle,
-				struct wuie_hdr *wuie)
-{
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-
-	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_ADD_MMC_IE,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			interval << 8 | repeat_cnt,
-			handle << 8 | iface_no,
-			wuie, wuie->bLength, USB_CTRL_SET_TIMEOUT);
-}
-
-/*
- * Remove an IE to the host's MMC
- *
- * @handle:      See WUSB1.0[8.5.3.1]
- */
-static int __hwahc_op_mmcie_rm(struct wusbhc *wusbhc, u8 handle)
-{
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_REMOVE_MMC_IE,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			0, handle << 8 | iface_no,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-}
-
-/*
- * Update device information for a given fake port
- *
- * @port_idx: Fake port to which device is connected (wusbhc index, not
- *            USB port number).
- */
-static int __hwahc_op_dev_info_set(struct wusbhc *wusbhc,
-				   struct wusb_dev *wusb_dev)
-{
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	struct hwa_dev_info *dev_info;
-	int ret;
-
-	/* fill out the Device Info buffer and send it */
-	dev_info = kzalloc(sizeof(struct hwa_dev_info), GFP_KERNEL);
-	if (!dev_info)
-		return -ENOMEM;
-	uwb_mas_bm_copy_le(dev_info->bmDeviceAvailability,
-			   &wusb_dev->availability);
-	dev_info->bDeviceAddress = wusb_dev->addr;
-
-	/*
-	 * If the descriptors haven't been read yet, use a default PHY
-	 * rate of 53.3 Mbit/s only.  The correct value will be used
-	 * when this will be called again as part of the
-	 * authentication process (which occurs after the descriptors
-	 * have been read).
-	 */
-	if (wusb_dev->wusb_cap_descr)
-		dev_info->wPHYRates = wusb_dev->wusb_cap_descr->wPHYRates;
-	else
-		dev_info->wPHYRates = cpu_to_le16(USB_WIRELESS_PHY_53);
-
-	ret = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			WUSB_REQ_SET_DEV_INFO,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			0, wusb_dev->port_idx << 8 | iface_no,
-			dev_info, sizeof(struct hwa_dev_info),
-			USB_CTRL_SET_TIMEOUT);
-	kfree(dev_info);
-	return ret;
-}
-
-/*
- * Set host's idea of which encryption (and key) method to use when
- * talking to ad evice on a given port.
- *
- * If key is NULL, it means disable encryption for that "virtual port"
- * (used when we disconnect).
- */
-static int __hwahc_dev_set_key(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
-			       const void *key, size_t key_size,
-			       u8 key_idx)
-{
-	int result = -ENOMEM;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	struct usb_key_descriptor *keyd;
-	size_t keyd_len;
-
-	keyd_len = sizeof(*keyd) + key_size;
-	keyd = kzalloc(keyd_len, GFP_KERNEL);
-	if (keyd == NULL)
-		return -ENOMEM;
-
-	keyd->bLength = keyd_len;
-	keyd->bDescriptorType = USB_DT_KEY;
-	keyd->tTKID[0] = (tkid >>  0) & 0xff;
-	keyd->tTKID[1] = (tkid >>  8) & 0xff;
-	keyd->tTKID[2] = (tkid >> 16) & 0xff;
-	memcpy(keyd->bKeyData, key, key_size);
-
-	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			USB_REQ_SET_DESCRIPTOR,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			USB_DT_KEY << 8 | key_idx,
-			port_idx << 8 | iface_no,
-			keyd, keyd_len, USB_CTRL_SET_TIMEOUT);
-
-	kzfree(keyd); /* clear keys etc. */
-	return result;
-}
-
-/*
- * Set host's idea of which encryption (and key) method to use when
- * talking to ad evice on a given port.
- *
- * If key is NULL, it means disable encryption for that "virtual port"
- * (used when we disconnect).
- */
-static int __hwahc_op_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
-			      const void *key, size_t key_size)
-{
-	int result = -ENOMEM;
-	struct hwahc *hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	struct wahc *wa = &hwahc->wa;
-	u8 iface_no = wa->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	u8 encryption_value;
-
-	/* Tell the host which key to use to talk to the device */
-	if (key) {
-		u8 key_idx = wusb_key_index(0, WUSB_KEY_INDEX_TYPE_PTK,
-					    WUSB_KEY_INDEX_ORIGINATOR_HOST);
-
-		result = __hwahc_dev_set_key(wusbhc, port_idx, tkid,
-					     key, key_size, key_idx);
-		if (result < 0)
-			goto error_set_key;
-		encryption_value = wusbhc->ccm1_etd->bEncryptionValue;
-	} else {
-		/* FIXME: this should come from wusbhc->etd[UNSECURE].value */
-		encryption_value = 0;
-	}
-
-	/* Set the encryption type for communicating with the device */
-	result = usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			USB_REQ_SET_ENCRYPTION,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			encryption_value, port_idx << 8 | iface_no,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0)
-		dev_err(wusbhc->dev, "Can't set host's WUSB encryption for "
-			"port index %u to %s (value %d): %d\n", port_idx,
-			wusb_et_name(wusbhc->ccm1_etd->bEncryptionType),
-			wusbhc->ccm1_etd->bEncryptionValue, result);
-error_set_key:
-	return result;
-}
-
-/*
- * Set host's GTK key
- */
-static int __hwahc_op_set_gtk(struct wusbhc *wusbhc, u32 tkid,
-			      const void *key, size_t key_size)
-{
-	u8 key_idx = wusb_key_index(0, WUSB_KEY_INDEX_TYPE_GTK,
-				    WUSB_KEY_INDEX_ORIGINATOR_HOST);
-
-	return __hwahc_dev_set_key(wusbhc, 0, tkid, key, key_size, key_idx);
-}
-
-/*
- * Get the Wire Adapter class-specific descriptor
- *
- * NOTE: this descriptor comes with the big bundled configuration
- *       descriptor that includes the interfaces' and endpoints', so
- *       we just look for it in the cached copy kept by the USB stack.
- *
- * NOTE2: We convert LE fields to CPU order.
- */
-static int wa_fill_descr(struct wahc *wa)
-{
-	int result;
-	struct device *dev = &wa->usb_iface->dev;
-	char *itr;
-	struct usb_device *usb_dev = wa->usb_dev;
-	struct usb_descriptor_header *hdr;
-	struct usb_wa_descriptor *wa_descr;
-	size_t itr_size, actconfig_idx;
-
-	actconfig_idx = (usb_dev->actconfig - usb_dev->config) /
-			sizeof(usb_dev->config[0]);
-	itr = usb_dev->rawdescriptors[actconfig_idx];
-	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
-	while (itr_size >= sizeof(*hdr)) {
-		hdr = (struct usb_descriptor_header *) itr;
-		dev_dbg(dev, "Extra device descriptor: "
-			"type %02x/%u bytes @ %zu (%zu left)\n",
-			hdr->bDescriptorType, hdr->bLength,
-			(itr - usb_dev->rawdescriptors[actconfig_idx]),
-			itr_size);
-		if (hdr->bDescriptorType == USB_DT_WIRE_ADAPTER)
-			goto found;
-		itr += hdr->bLength;
-		itr_size -= hdr->bLength;
-	}
-	dev_err(dev, "cannot find Wire Adapter Class descriptor\n");
-	return -ENODEV;
-
-found:
-	result = -EINVAL;
-	if (hdr->bLength > itr_size) {	/* is it available? */
-		dev_err(dev, "incomplete Wire Adapter Class descriptor "
-			"(%zu bytes left, %u needed)\n",
-			itr_size, hdr->bLength);
-		goto error;
-	}
-	if (hdr->bLength < sizeof(*wa->wa_descr)) {
-		dev_err(dev, "short Wire Adapter Class descriptor\n");
-		goto error;
-	}
-	wa->wa_descr = wa_descr = (struct usb_wa_descriptor *) hdr;
-	if (le16_to_cpu(wa_descr->bcdWAVersion) > 0x0100)
-		dev_warn(dev, "Wire Adapter v%d.%d newer than groked v1.0\n",
-			 (le16_to_cpu(wa_descr->bcdWAVersion) & 0xff00) >> 8,
-			 le16_to_cpu(wa_descr->bcdWAVersion) & 0x00ff);
-	result = 0;
-error:
-	return result;
-}
-
-static const struct hc_driver hwahc_hc_driver = {
-	.description = "hwa-hcd",
-	.product_desc = "Wireless USB HWA host controller",
-	.hcd_priv_size = sizeof(struct hwahc) - sizeof(struct usb_hcd),
-	.irq = NULL,			/* FIXME */
-	.flags = HCD_USB25,
-	.reset = hwahc_op_reset,
-	.start = hwahc_op_start,
-	.stop = hwahc_op_stop,
-	.get_frame_number = hwahc_op_get_frame_number,
-	.urb_enqueue = hwahc_op_urb_enqueue,
-	.urb_dequeue = hwahc_op_urb_dequeue,
-	.endpoint_disable = hwahc_op_endpoint_disable,
-
-	.hub_status_data = wusbhc_rh_status_data,
-	.hub_control = wusbhc_rh_control,
-	.start_port_reset = wusbhc_rh_start_port_reset,
-};
-
-static int hwahc_security_create(struct hwahc *hwahc)
-{
-	int result;
-	struct wusbhc *wusbhc = &hwahc->wusbhc;
-	struct usb_device *usb_dev = hwahc->wa.usb_dev;
-	struct device *dev = &usb_dev->dev;
-	struct usb_security_descriptor *secd;
-	struct usb_encryption_descriptor *etd;
-	void *itr, *top;
-	size_t itr_size, needed, bytes;
-	u8 index;
-	char buf[64];
-
-	/* Find the host's security descriptors in the config descr bundle */
-	index = (usb_dev->actconfig - usb_dev->config) /
-		sizeof(usb_dev->config[0]);
-	itr = usb_dev->rawdescriptors[index];
-	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
-	top = itr + itr_size;
-	result = __usb_get_extra_descriptor(usb_dev->rawdescriptors[index],
-			le16_to_cpu(usb_dev->actconfig->desc.wTotalLength),
-			USB_DT_SECURITY, (void **) &secd, sizeof(*secd));
-	if (result == -1) {
-		dev_warn(dev, "BUG? WUSB host has no security descriptors\n");
-		return 0;
-	}
-	needed = sizeof(*secd);
-	if (top - (void *)secd < needed) {
-		dev_err(dev, "BUG? Not enough data to process security "
-			"descriptor header (%zu bytes left vs %zu needed)\n",
-			top - (void *) secd, needed);
-		return 0;
-	}
-	needed = le16_to_cpu(secd->wTotalLength);
-	if (top - (void *)secd < needed) {
-		dev_err(dev, "BUG? Not enough data to process security "
-			"descriptors (%zu bytes left vs %zu needed)\n",
-			top - (void *) secd, needed);
-		return 0;
-	}
-	/* Walk over the sec descriptors and store CCM1's on wusbhc */
-	itr = (void *) secd + sizeof(*secd);
-	top = (void *) secd + le16_to_cpu(secd->wTotalLength);
-	index = 0;
-	bytes = 0;
-	while (itr < top) {
-		etd = itr;
-		if (top - itr < sizeof(*etd)) {
-			dev_err(dev, "BUG: bad host security descriptor; "
-				"not enough data (%zu vs %zu left)\n",
-				top - itr, sizeof(*etd));
-			break;
-		}
-		if (etd->bLength < sizeof(*etd)) {
-			dev_err(dev, "BUG: bad host encryption descriptor; "
-				"descriptor is too short "
-				"(%zu vs %zu needed)\n",
-				(size_t)etd->bLength, sizeof(*etd));
-			break;
-		}
-		itr += etd->bLength;
-		bytes += snprintf(buf + bytes, sizeof(buf) - bytes,
-				  "%s (0x%02x) ",
-				  wusb_et_name(etd->bEncryptionType),
-				  etd->bEncryptionValue);
-		wusbhc->ccm1_etd = etd;
-	}
-	dev_info(dev, "supported encryption types: %s\n", buf);
-	if (wusbhc->ccm1_etd == NULL) {
-		dev_err(dev, "E: host doesn't support CCM-1 crypto\n");
-		return 0;
-	}
-	/* Pretty print what we support */
-	return 0;
-}
-
-static void hwahc_security_release(struct hwahc *hwahc)
-{
-	/* nothing to do here so far... */
-}
-
-static int hwahc_create(struct hwahc *hwahc, struct usb_interface *iface,
-	kernel_ulong_t quirks)
-{
-	int result;
-	struct device *dev = &iface->dev;
-	struct wusbhc *wusbhc = &hwahc->wusbhc;
-	struct wahc *wa = &hwahc->wa;
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-
-	wa->usb_dev = usb_get_dev(usb_dev);	/* bind the USB device */
-	wa->usb_iface = usb_get_intf(iface);
-	wusbhc->dev = dev;
-	/* defer getting the uwb_rc handle until it is needed since it
-	 * may not have been registered by the hwa_rc driver yet. */
-	wusbhc->uwb_rc = NULL;
-	result = wa_fill_descr(wa);	/* Get the device descriptor */
-	if (result < 0)
-		goto error_fill_descriptor;
-	if (wa->wa_descr->bNumPorts > USB_MAXCHILDREN) {
-		dev_err(dev, "FIXME: USB_MAXCHILDREN too low for WUSB "
-			"adapter (%u ports)\n", wa->wa_descr->bNumPorts);
-		wusbhc->ports_max = USB_MAXCHILDREN;
-	} else {
-		wusbhc->ports_max = wa->wa_descr->bNumPorts;
-	}
-	wusbhc->mmcies_max = wa->wa_descr->bNumMMCIEs;
-	wusbhc->start = __hwahc_op_wusbhc_start;
-	wusbhc->stop = __hwahc_op_wusbhc_stop;
-	wusbhc->mmcie_add = __hwahc_op_mmcie_add;
-	wusbhc->mmcie_rm = __hwahc_op_mmcie_rm;
-	wusbhc->dev_info_set = __hwahc_op_dev_info_set;
-	wusbhc->bwa_set = __hwahc_op_bwa_set;
-	wusbhc->set_num_dnts = __hwahc_op_set_num_dnts;
-	wusbhc->set_ptk = __hwahc_op_set_ptk;
-	wusbhc->set_gtk = __hwahc_op_set_gtk;
-	result = hwahc_security_create(hwahc);
-	if (result < 0) {
-		dev_err(dev, "Can't initialize security: %d\n", result);
-		goto error_security_create;
-	}
-	wa->wusb = wusbhc;	/* FIXME: ugly, need to fix */
-	result = wusbhc_create(&hwahc->wusbhc);
-	if (result < 0) {
-		dev_err(dev, "Can't create WUSB HC structures: %d\n", result);
-		goto error_wusbhc_create;
-	}
-	result = wa_create(&hwahc->wa, iface, quirks);
-	if (result < 0)
-		goto error_wa_create;
-	return 0;
-
-error_wa_create:
-	wusbhc_destroy(&hwahc->wusbhc);
-error_wusbhc_create:
-	/* WA Descr fill allocs no resources */
-error_security_create:
-error_fill_descriptor:
-	usb_put_intf(iface);
-	usb_put_dev(usb_dev);
-	return result;
-}
-
-static void hwahc_destroy(struct hwahc *hwahc)
-{
-	struct wusbhc *wusbhc = &hwahc->wusbhc;
-
-	mutex_lock(&wusbhc->mutex);
-	__wa_destroy(&hwahc->wa);
-	wusbhc_destroy(&hwahc->wusbhc);
-	hwahc_security_release(hwahc);
-	hwahc->wusbhc.dev = NULL;
-	uwb_rc_put(wusbhc->uwb_rc);
-	usb_put_intf(hwahc->wa.usb_iface);
-	usb_put_dev(hwahc->wa.usb_dev);
-	mutex_unlock(&wusbhc->mutex);
-}
-
-static void hwahc_init(struct hwahc *hwahc)
-{
-	wa_init(&hwahc->wa);
-}
-
-static int hwahc_probe(struct usb_interface *usb_iface,
-		       const struct usb_device_id *id)
-{
-	int result;
-	struct usb_hcd *usb_hcd;
-	struct wusbhc *wusbhc;
-	struct hwahc *hwahc;
-	struct device *dev = &usb_iface->dev;
-
-	result = -ENOMEM;
-	usb_hcd = usb_create_hcd(&hwahc_hc_driver, &usb_iface->dev, "wusb-hwa");
-	if (usb_hcd == NULL) {
-		dev_err(dev, "unable to allocate instance\n");
-		goto error_alloc;
-	}
-	usb_hcd->wireless = 1;
-	usb_hcd->self.sg_tablesize = ~0;
-	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-	hwahc_init(hwahc);
-	result = hwahc_create(hwahc, usb_iface, id->driver_info);
-	if (result < 0) {
-		dev_err(dev, "Cannot initialize internals: %d\n", result);
-		goto error_hwahc_create;
-	}
-	result = usb_add_hcd(usb_hcd, 0, 0);
-	if (result < 0) {
-		dev_err(dev, "Cannot add HCD: %d\n", result);
-		goto error_add_hcd;
-	}
-	device_wakeup_enable(usb_hcd->self.controller);
-	result = wusbhc_b_create(&hwahc->wusbhc);
-	if (result < 0) {
-		dev_err(dev, "Cannot setup phase B of WUSBHC: %d\n", result);
-		goto error_wusbhc_b_create;
-	}
-	return 0;
-
-error_wusbhc_b_create:
-	usb_remove_hcd(usb_hcd);
-error_add_hcd:
-	hwahc_destroy(hwahc);
-error_hwahc_create:
-	usb_put_hcd(usb_hcd);
-error_alloc:
-	return result;
-}
-
-static void hwahc_disconnect(struct usb_interface *usb_iface)
-{
-	struct usb_hcd *usb_hcd;
-	struct wusbhc *wusbhc;
-	struct hwahc *hwahc;
-
-	usb_hcd = usb_get_intfdata(usb_iface);
-	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	hwahc = container_of(wusbhc, struct hwahc, wusbhc);
-
-	wusbhc_b_destroy(&hwahc->wusbhc);
-	usb_remove_hcd(usb_hcd);
-	hwahc_destroy(hwahc);
-	usb_put_hcd(usb_hcd);
-}
-
-static const struct usb_device_id hwahc_id_table[] = {
-	/* Alereon 5310 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5310, 0xe0, 0x02, 0x01),
-	  .driver_info = WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC |
-		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS },
-	/* Alereon 5611 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5611, 0xe0, 0x02, 0x01),
-	  .driver_info = WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC |
-		WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS },
-	/* FIXME: use class labels for this */
-	{ USB_INTERFACE_INFO(0xe0, 0x02, 0x01), },
-	{},
-};
-MODULE_DEVICE_TABLE(usb, hwahc_id_table);
-
-static struct usb_driver hwahc_driver = {
-	.name =		"hwa-hc",
-	.probe =	hwahc_probe,
-	.disconnect =	hwahc_disconnect,
-	.id_table =	hwahc_id_table,
-};
-
-module_usb_driver(hwahc_driver);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Host Wired Adapter USB Host Control Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/host/whci/Makefile b/drivers/usb/host/whci/Makefile
deleted file mode 100644
index 859d20079df6..000000000000
--- a/drivers/usb/host/whci/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_USB_WHCI_HCD) += whci-hcd.o
-
-whci-hcd-y := \
-	asl.o	\
-	debug.o \
-	hcd.o	\
-	hw.o	\
-	init.o	\
-	int.o	\
-	pzl.o	\
-	qset.o	\
-	wusb.o
diff --git a/drivers/usb/host/whci/asl.c b/drivers/usb/host/whci/asl.c
deleted file mode 100644
index 276fb34c8efd..000000000000
--- a/drivers/usb/host/whci/asl.c
+++ /dev/null
@@ -1,376 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) asynchronous schedule management.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/dma-mapping.h>
-#include <linux/uwb/umc.h>
-#include <linux/usb.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-static void qset_get_next_prev(struct whc *whc, struct whc_qset *qset,
-			       struct whc_qset **next, struct whc_qset **prev)
-{
-	struct list_head *n, *p;
-
-	BUG_ON(list_empty(&whc->async_list));
-
-	n = qset->list_node.next;
-	if (n == &whc->async_list)
-		n = n->next;
-	p = qset->list_node.prev;
-	if (p == &whc->async_list)
-		p = p->prev;
-
-	*next = container_of(n, struct whc_qset, list_node);
-	*prev = container_of(p, struct whc_qset, list_node);
-
-}
-
-static void asl_qset_insert_begin(struct whc *whc, struct whc_qset *qset)
-{
-	list_move(&qset->list_node, &whc->async_list);
-	qset->in_sw_list = true;
-}
-
-static void asl_qset_insert(struct whc *whc, struct whc_qset *qset)
-{
-	struct whc_qset *next, *prev;
-
-	qset_clear(whc, qset);
-
-	/* Link into ASL. */
-	qset_get_next_prev(whc, qset, &next, &prev);
-	whc_qset_set_link_ptr(&qset->qh.link, next->qset_dma);
-	whc_qset_set_link_ptr(&prev->qh.link, qset->qset_dma);
-	qset->in_hw_list = true;
-}
-
-static void asl_qset_remove(struct whc *whc, struct whc_qset *qset)
-{
-	struct whc_qset *prev, *next;
-
-	qset_get_next_prev(whc, qset, &next, &prev);
-
-	list_move(&qset->list_node, &whc->async_removed_list);
-	qset->in_sw_list = false;
-
-	/*
-	 * No more qsets in the ASL?  The caller must stop the ASL as
-	 * it's no longer valid.
-	 */
-	if (list_empty(&whc->async_list))
-		return;
-
-	/* Remove from ASL. */
-	whc_qset_set_link_ptr(&prev->qh.link, next->qset_dma);
-	qset->in_hw_list = false;
-}
-
-/**
- * process_qset - process any recently inactivated or halted qTDs in a
- * qset.
- *
- * After inactive qTDs are removed, new qTDs can be added if the
- * urb queue still contains URBs.
- *
- * Returns any additional WUSBCMD bits for the ASL sync command (i.e.,
- * WUSBCMD_ASYNC_QSET_RM if a halted qset was removed).
- */
-static uint32_t process_qset(struct whc *whc, struct whc_qset *qset)
-{
-	enum whc_update update = 0;
-	uint32_t status = 0;
-
-	while (qset->ntds) {
-		struct whc_qtd *td;
-
-		td = &qset->qtd[qset->td_start];
-		status = le32_to_cpu(td->status);
-
-		/*
-		 * Nothing to do with a still active qTD.
-		 */
-		if (status & QTD_STS_ACTIVE)
-			break;
-
-		if (status & QTD_STS_HALTED) {
-			/* Ug, an error. */
-			process_halted_qtd(whc, qset, td);
-			/* A halted qTD always triggers an update
-			   because the qset was either removed or
-			   reactivated. */
-			update |= WHC_UPDATE_UPDATED;
-			goto done;
-		}
-
-		/* Mmm, a completed qTD. */
-		process_inactive_qtd(whc, qset, td);
-	}
-
-	if (!qset->remove)
-		update |= qset_add_qtds(whc, qset);
-
-done:
-	/*
-	 * Remove this qset from the ASL if requested, but only if has
-	 * no qTDs.
-	 */
-	if (qset->remove && qset->ntds == 0) {
-		asl_qset_remove(whc, qset);
-		update |= WHC_UPDATE_REMOVED;
-	}
-	return update;
-}
-
-void asl_start(struct whc *whc)
-{
-	struct whc_qset *qset;
-
-	qset = list_first_entry(&whc->async_list, struct whc_qset, list_node);
-
-	le_writeq(qset->qset_dma | QH_LINK_NTDS(8), whc->base + WUSBASYNCLISTADDR);
-
-	whc_write_wusbcmd(whc, WUSBCMD_ASYNC_EN, WUSBCMD_ASYNC_EN);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
-		      WUSBSTS_ASYNC_SCHED, WUSBSTS_ASYNC_SCHED,
-		      1000, "start ASL");
-}
-
-void asl_stop(struct whc *whc)
-{
-	whc_write_wusbcmd(whc, WUSBCMD_ASYNC_EN, 0);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
-		      WUSBSTS_ASYNC_SCHED, 0,
-		      1000, "stop ASL");
-}
-
-/**
- * asl_update - request an ASL update and wait for the hardware to be synced
- * @whc: the WHCI HC
- * @wusbcmd: WUSBCMD value to start the update.
- *
- * If the WUSB HC is inactive (i.e., the ASL is stopped) then the
- * update must be skipped as the hardware may not respond to update
- * requests.
- */
-void asl_update(struct whc *whc, uint32_t wusbcmd)
-{
-	struct wusbhc *wusbhc = &whc->wusbhc;
-	long t;
-
-	mutex_lock(&wusbhc->mutex);
-	if (wusbhc->active) {
-		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-		t = wait_event_timeout(
-			whc->async_list_wq,
-			(le_readl(whc->base + WUSBCMD) & WUSBCMD_ASYNC_UPDATED) == 0,
-			msecs_to_jiffies(1000));
-		if (t == 0)
-			whc_hw_error(whc, "ASL update timeout");
-	}
-	mutex_unlock(&wusbhc->mutex);
-}
-
-/**
- * scan_async_work - scan the ASL for qsets to process.
- *
- * Process each qset in the ASL in turn and then signal the WHC that
- * the ASL has been updated.
- *
- * Then start, stop or update the asynchronous schedule as required.
- */
-void scan_async_work(struct work_struct *work)
-{
-	struct whc *whc = container_of(work, struct whc, async_work);
-	struct whc_qset *qset, *t;
-	enum whc_update update = 0;
-
-	spin_lock_irq(&whc->lock);
-
-	/*
-	 * Transerve the software list backwards so new qsets can be
-	 * safely inserted into the ASL without making it non-circular.
-	 */
-	list_for_each_entry_safe_reverse(qset, t, &whc->async_list, list_node) {
-		if (!qset->in_hw_list) {
-			asl_qset_insert(whc, qset);
-			update |= WHC_UPDATE_ADDED;
-		}
-
-		update |= process_qset(whc, qset);
-	}
-
-	spin_unlock_irq(&whc->lock);
-
-	if (update) {
-		uint32_t wusbcmd = WUSBCMD_ASYNC_UPDATED | WUSBCMD_ASYNC_SYNCED_DB;
-		if (update & WHC_UPDATE_REMOVED)
-			wusbcmd |= WUSBCMD_ASYNC_QSET_RM;
-		asl_update(whc, wusbcmd);
-	}
-
-	/*
-	 * Now that the ASL is updated, complete the removal of any
-	 * removed qsets.
-	 *
-	 * If the qset was to be reset, do so and reinsert it into the
-	 * ASL if it has pending transfers.
-	 */
-	spin_lock_irq(&whc->lock);
-
-	list_for_each_entry_safe(qset, t, &whc->async_removed_list, list_node) {
-		qset_remove_complete(whc, qset);
-		if (qset->reset) {
-			qset_reset(whc, qset);
-			if (!list_empty(&qset->stds)) {
-				asl_qset_insert_begin(whc, qset);
-				queue_work(whc->workqueue, &whc->async_work);
-			}
-		}
-	}
-
-	spin_unlock_irq(&whc->lock);
-}
-
-/**
- * asl_urb_enqueue - queue an URB onto the asynchronous list (ASL).
- * @whc: the WHCI host controller
- * @urb: the URB to enqueue
- * @mem_flags: flags for any memory allocations
- *
- * The qset for the endpoint is obtained and the urb queued on to it.
- *
- * Work is scheduled to update the hardware's view of the ASL.
- */
-int asl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
-{
-	struct whc_qset *qset;
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	err = usb_hcd_link_urb_to_ep(&whc->wusbhc.usb_hcd, urb);
-	if (err < 0) {
-		spin_unlock_irqrestore(&whc->lock, flags);
-		return err;
-	}
-
-	qset = get_qset(whc, urb, GFP_ATOMIC);
-	if (qset == NULL)
-		err = -ENOMEM;
-	else
-		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
-	if (!err) {
-		if (!qset->in_sw_list && !qset->remove)
-			asl_qset_insert_begin(whc, qset);
-	} else
-		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
-
-	spin_unlock_irqrestore(&whc->lock, flags);
-
-	if (!err)
-		queue_work(whc->workqueue, &whc->async_work);
-
-	return err;
-}
-
-/**
- * asl_urb_dequeue - remove an URB (qset) from the async list.
- * @whc: the WHCI host controller
- * @urb: the URB to dequeue
- * @status: the current status of the URB
- *
- * URBs that do yet have qTDs can simply be removed from the software
- * queue, otherwise the qset must be removed from the ASL so the qTDs
- * can be removed.
- */
-int asl_urb_dequeue(struct whc *whc, struct urb *urb, int status)
-{
-	struct whc_urb *wurb = urb->hcpriv;
-	struct whc_qset *qset = wurb->qset;
-	struct whc_std *std, *t;
-	bool has_qtd = false;
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	ret = usb_hcd_check_unlink_urb(&whc->wusbhc.usb_hcd, urb, status);
-	if (ret < 0)
-		goto out;
-
-	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
-		if (std->urb == urb) {
-			if (std->qtd)
-				has_qtd = true;
-			qset_free_std(whc, std);
-		} else
-			std->qtd = NULL; /* so this std is re-added when the qset is */
-	}
-
-	if (has_qtd) {
-		asl_qset_remove(whc, qset);
-		wurb->status = status;
-		wurb->is_async = true;
-		queue_work(whc->workqueue, &wurb->dequeue_work);
-	} else
-		qset_remove_urb(whc, qset, urb, status);
-out:
-	spin_unlock_irqrestore(&whc->lock, flags);
-
-	return ret;
-}
-
-/**
- * asl_qset_delete - delete a qset from the ASL
- */
-void asl_qset_delete(struct whc *whc, struct whc_qset *qset)
-{
-	qset->remove = 1;
-	queue_work(whc->workqueue, &whc->async_work);
-	qset_delete(whc, qset);
-}
-
-/**
- * asl_init - initialize the asynchronous schedule list
- *
- * A dummy qset with no qTDs is added to the ASL to simplify removing
- * qsets (no need to stop the ASL when the last qset is removed).
- */
-int asl_init(struct whc *whc)
-{
-	struct whc_qset *qset;
-
-	qset = qset_alloc(whc, GFP_KERNEL);
-	if (qset == NULL)
-		return -ENOMEM;
-
-	asl_qset_insert_begin(whc, qset);
-	asl_qset_insert(whc, qset);
-
-	return 0;
-}
-
-/**
- * asl_clean_up - free ASL resources
- *
- * The ASL is stopped and empty except for the dummy qset.
- */
-void asl_clean_up(struct whc *whc)
-{
-	struct whc_qset *qset;
-
-	if (!list_empty(&whc->async_list)) {
-		qset = list_first_entry(&whc->async_list, struct whc_qset, list_node);
-		list_del(&qset->list_node);
-		qset_free(whc, qset);
-	}
-}
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
deleted file mode 100644
index 8ddfe3f1f693..000000000000
--- a/drivers/usb/host/whci/debug.c
+++ /dev/null
@@ -1,153 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) debug.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/export.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-struct whc_dbg {
-	struct dentry *di_f;
-	struct dentry *asl_f;
-	struct dentry *pzl_f;
-};
-
-static void qset_print(struct seq_file *s, struct whc_qset *qset)
-{
-	static const char *qh_type[] = {
-		"ctrl", "isoc", "bulk", "intr", "rsvd", "rsvd", "rsvd", "lpintr", };
-	struct whc_std *std;
-	struct urb *urb = NULL;
-	int i;
-
-	seq_printf(s, "qset %08x", (u32)qset->qset_dma);
-	if (&qset->list_node == qset->whc->async_list.prev) {
-		seq_printf(s, " (dummy)\n");
-	} else {
-		seq_printf(s, " ep%d%s-%s maxpkt: %d\n",
-			   qset->qh.info1 & 0x0f,
-			   (qset->qh.info1 >> 4) & 0x1 ? "in" : "out",
-			   qh_type[(qset->qh.info1 >> 5) & 0x7],
-			   (qset->qh.info1 >> 16) & 0xffff);
-	}
-	seq_printf(s, "  -> %08x\n", (u32)qset->qh.link);
-	seq_printf(s, "  info: %08x %08x %08x\n",
-		   qset->qh.info1, qset->qh.info2,  qset->qh.info3);
-	seq_printf(s, "  sts: %04x errs: %d curwin: %08x\n",
-		   qset->qh.status, qset->qh.err_count, qset->qh.cur_window);
-	seq_printf(s, "  TD: sts: %08x opts: %08x\n",
-		   qset->qh.overlay.qtd.status, qset->qh.overlay.qtd.options);
-
-	for (i = 0; i < WHCI_QSET_TD_MAX; i++) {
-		seq_printf(s, "  %c%c TD[%d]: sts: %08x opts: %08x ptr: %08x\n",
-			i == qset->td_start ? 'S' : ' ',
-			i == qset->td_end ? 'E' : ' ',
-			i, qset->qtd[i].status, qset->qtd[i].options,
-			(u32)qset->qtd[i].page_list_ptr);
-	}
-	seq_printf(s, "  ntds: %d\n", qset->ntds);
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (urb != std->urb) {
-			urb = std->urb;
-			seq_printf(s, "  urb %p transferred: %d bytes\n", urb,
-				urb->actual_length);
-		}
-		if (std->qtd)
-			seq_printf(s, "    sTD[%td]: %zu bytes @ %08x\n",
-				std->qtd - &qset->qtd[0],
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-		else
-			seq_printf(s, "    sTD[-]: %zd bytes @ %08x\n",
-				std->len, std->num_pointers ?
-				(u32)(std->pl_virt[0].buf_ptr) : (u32)std->dma_addr);
-	}
-}
-
-static int di_show(struct seq_file *s, void *p)
-{
-	struct whc *whc = s->private;
-	int d;
-
-	for (d = 0; d < whc->n_devices; d++) {
-		struct di_buf_entry *di = &whc->di_buf[d];
-
-		seq_printf(s, "DI[%d]\n", d);
-		seq_printf(s, "  availability: %*pb\n",
-			   UWB_NUM_MAS, (unsigned long *)di->availability_info);
-		seq_printf(s, "  %c%c key idx: %d dev addr: %d\n",
-			   (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ',
-			   (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ',
-			   (di->addr_sec_info & WHC_DI_KEY_IDX_MASK) >> 8,
-			   (di->addr_sec_info & WHC_DI_DEV_ADDR_MASK));
-	}
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(di);
-
-static int asl_show(struct seq_file *s, void *p)
-{
-	struct whc *whc = s->private;
-	struct whc_qset *qset;
-
-	list_for_each_entry(qset, &whc->async_list, list_node) {
-		qset_print(s, qset);
-	}
-
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(asl);
-
-static int pzl_show(struct seq_file *s, void *p)
-{
-	struct whc *whc = s->private;
-	struct whc_qset *qset;
-	int period;
-
-	for (period = 0; period < 5; period++) {
-		seq_printf(s, "Period %d\n", period);
-		list_for_each_entry(qset, &whc->periodic_list[period], list_node) {
-			qset_print(s, qset);
-		}
-	}
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(pzl);
-
-void whc_dbg_init(struct whc *whc)
-{
-	if (whc->wusbhc.pal.debugfs_dir == NULL)
-		return;
-
-	whc->dbg = kzalloc(sizeof(struct whc_dbg), GFP_KERNEL);
-	if (whc->dbg == NULL)
-		return;
-
-	whc->dbg->di_f = debugfs_create_file("di", 0444,
-					      whc->wusbhc.pal.debugfs_dir, whc,
-					      &di_fops);
-	whc->dbg->asl_f = debugfs_create_file("asl", 0444,
-					      whc->wusbhc.pal.debugfs_dir, whc,
-					      &asl_fops);
-	whc->dbg->pzl_f = debugfs_create_file("pzl", 0444,
-					      whc->wusbhc.pal.debugfs_dir, whc,
-					      &pzl_fops);
-}
-
-void whc_dbg_clean_up(struct whc *whc)
-{
-	if (whc->dbg) {
-		debugfs_remove(whc->dbg->pzl_f);
-		debugfs_remove(whc->dbg->asl_f);
-		debugfs_remove(whc->dbg->di_f);
-		kfree(whc->dbg);
-	}
-}
diff --git a/drivers/usb/host/whci/hcd.c b/drivers/usb/host/whci/hcd.c
deleted file mode 100644
index 8af9dcfea127..000000000000
--- a/drivers/usb/host/whci/hcd.c
+++ /dev/null
@@ -1,356 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) driver.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/uwb/umc.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-/*
- * One time initialization.
- *
- * Nothing to do here.
- */
-static int whc_reset(struct usb_hcd *usb_hcd)
-{
-	return 0;
-}
-
-/*
- * Start the wireless host controller.
- *
- * Start device notification.
- *
- * Put hc into run state, set DNTS parameters.
- */
-static int whc_start(struct usb_hcd *usb_hcd)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u8 bcid;
-	int ret;
-
-	mutex_lock(&wusbhc->mutex);
-
-	le_writel(WUSBINTR_GEN_CMD_DONE
-		  | WUSBINTR_HOST_ERR
-		  | WUSBINTR_ASYNC_SCHED_SYNCED
-		  | WUSBINTR_DNTS_INT
-		  | WUSBINTR_ERR_INT
-		  | WUSBINTR_INT,
-		  whc->base + WUSBINTR);
-
-	/* set cluster ID */
-	bcid = wusb_cluster_id_get();
-	ret = whc_set_cluster_id(whc, bcid);
-	if (ret < 0)
-		goto out;
-	wusbhc->cluster_id = bcid;
-
-	/* start HC */
-	whc_write_wusbcmd(whc, WUSBCMD_RUN, WUSBCMD_RUN);
-
-	usb_hcd->uses_new_polling = 1;
-	set_bit(HCD_FLAG_POLL_RH, &usb_hcd->flags);
-	usb_hcd->state = HC_STATE_RUNNING;
-
-out:
-	mutex_unlock(&wusbhc->mutex);
-	return ret;
-}
-
-
-/*
- * Stop the wireless host controller.
- *
- * Stop device notification.
- *
- * Wait for pending transfer to stop? Put hc into stop state?
- */
-static void whc_stop(struct usb_hcd *usb_hcd)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-
-	mutex_lock(&wusbhc->mutex);
-
-	/* stop HC */
-	le_writel(0, whc->base + WUSBINTR);
-	whc_write_wusbcmd(whc, WUSBCMD_RUN, 0);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
-		      WUSBSTS_HCHALTED, WUSBSTS_HCHALTED,
-		      100, "HC to halt");
-
-	wusb_cluster_id_put(wusbhc->cluster_id);
-
-	mutex_unlock(&wusbhc->mutex);
-}
-
-static int whc_get_frame_number(struct usb_hcd *usb_hcd)
-{
-	/* Frame numbers are not applicable to WUSB. */
-	return -ENOSYS;
-}
-
-
-/*
- * Queue an URB to the ASL or PZL
- */
-static int whc_urb_enqueue(struct usb_hcd *usb_hcd, struct urb *urb,
-			   gfp_t mem_flags)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	int ret;
-
-	switch (usb_pipetype(urb->pipe)) {
-	case PIPE_INTERRUPT:
-		ret = pzl_urb_enqueue(whc, urb, mem_flags);
-		break;
-	case PIPE_ISOCHRONOUS:
-		dev_err(&whc->umc->dev, "isochronous transfers unsupported\n");
-		ret = -ENOTSUPP;
-		break;
-	case PIPE_CONTROL:
-	case PIPE_BULK:
-	default:
-		ret = asl_urb_enqueue(whc, urb, mem_flags);
-		break;
-	}
-
-	return ret;
-}
-
-/*
- * Remove a queued URB from the ASL or PZL.
- */
-static int whc_urb_dequeue(struct usb_hcd *usb_hcd, struct urb *urb, int status)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	int ret;
-
-	switch (usb_pipetype(urb->pipe)) {
-	case PIPE_INTERRUPT:
-		ret = pzl_urb_dequeue(whc, urb, status);
-		break;
-	case PIPE_ISOCHRONOUS:
-		ret = -ENOTSUPP;
-		break;
-	case PIPE_CONTROL:
-	case PIPE_BULK:
-	default:
-		ret = asl_urb_dequeue(whc, urb, status);
-		break;
-	}
-
-	return ret;
-}
-
-/*
- * Wait for all URBs to the endpoint to be completed, then delete the
- * qset.
- */
-static void whc_endpoint_disable(struct usb_hcd *usb_hcd,
-				 struct usb_host_endpoint *ep)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	struct whc_qset *qset;
-
-	qset = ep->hcpriv;
-	if (qset) {
-		ep->hcpriv = NULL;
-		if (usb_endpoint_xfer_bulk(&ep->desc)
-		    || usb_endpoint_xfer_control(&ep->desc))
-			asl_qset_delete(whc, qset);
-		else
-			pzl_qset_delete(whc, qset);
-	}
-}
-
-static void whc_endpoint_reset(struct usb_hcd *usb_hcd,
-			       struct usb_host_endpoint *ep)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	struct whc_qset *qset;
-	unsigned long flags;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	qset = ep->hcpriv;
-	if (qset) {
-		qset->remove = 1;
-		qset->reset = 1;
-
-		if (usb_endpoint_xfer_bulk(&ep->desc)
-		    || usb_endpoint_xfer_control(&ep->desc))
-			queue_work(whc->workqueue, &whc->async_work);
-		else
-			queue_work(whc->workqueue, &whc->periodic_work);
-	}
-
-	spin_unlock_irqrestore(&whc->lock, flags);
-}
-
-
-static const struct hc_driver whc_hc_driver = {
-	.description = "whci-hcd",
-	.product_desc = "Wireless host controller",
-	.hcd_priv_size = sizeof(struct whc) - sizeof(struct usb_hcd),
-	.irq = whc_int_handler,
-	.flags = HCD_USB2,
-
-	.reset = whc_reset,
-	.start = whc_start,
-	.stop = whc_stop,
-	.get_frame_number = whc_get_frame_number,
-	.urb_enqueue = whc_urb_enqueue,
-	.urb_dequeue = whc_urb_dequeue,
-	.endpoint_disable = whc_endpoint_disable,
-	.endpoint_reset = whc_endpoint_reset,
-
-	.hub_status_data = wusbhc_rh_status_data,
-	.hub_control = wusbhc_rh_control,
-	.start_port_reset = wusbhc_rh_start_port_reset,
-};
-
-static int whc_probe(struct umc_dev *umc)
-{
-	int ret;
-	struct usb_hcd *usb_hcd;
-	struct wusbhc *wusbhc;
-	struct whc *whc;
-	struct device *dev = &umc->dev;
-
-	usb_hcd = usb_create_hcd(&whc_hc_driver, dev, "whci");
-	if (usb_hcd == NULL) {
-		dev_err(dev, "unable to create hcd\n");
-		return -ENOMEM;
-	}
-
-	usb_hcd->wireless = 1;
-	usb_hcd->self.sg_tablesize = 2048; /* somewhat arbitrary */
-
-	wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	whc = wusbhc_to_whc(wusbhc);
-	whc->umc = umc;
-
-	ret = whc_init(whc);
-	if (ret)
-		goto error_whc_init;
-
-	wusbhc->dev = dev;
-	wusbhc->uwb_rc = uwb_rc_get_by_grandpa(umc->dev.parent);
-	if (!wusbhc->uwb_rc) {
-		ret = -ENODEV;
-		dev_err(dev, "cannot get radio controller\n");
-		goto error_uwb_rc;
-	}
-
-	if (whc->n_devices > USB_MAXCHILDREN) {
-		dev_warn(dev, "USB_MAXCHILDREN too low for WUSB adapter (%u ports)\n",
-			 whc->n_devices);
-		wusbhc->ports_max = USB_MAXCHILDREN;
-	} else
-		wusbhc->ports_max = whc->n_devices;
-	wusbhc->mmcies_max      = whc->n_mmc_ies;
-	wusbhc->start           = whc_wusbhc_start;
-	wusbhc->stop            = whc_wusbhc_stop;
-	wusbhc->mmcie_add       = whc_mmcie_add;
-	wusbhc->mmcie_rm        = whc_mmcie_rm;
-	wusbhc->dev_info_set    = whc_dev_info_set;
-	wusbhc->bwa_set         = whc_bwa_set;
-	wusbhc->set_num_dnts    = whc_set_num_dnts;
-	wusbhc->set_ptk         = whc_set_ptk;
-	wusbhc->set_gtk         = whc_set_gtk;
-
-	ret = wusbhc_create(wusbhc);
-	if (ret)
-		goto error_wusbhc_create;
-
-	ret = usb_add_hcd(usb_hcd, whc->umc->irq, IRQF_SHARED);
-	if (ret) {
-		dev_err(dev, "cannot add HCD: %d\n", ret);
-		goto error_usb_add_hcd;
-	}
-	device_wakeup_enable(usb_hcd->self.controller);
-
-	ret = wusbhc_b_create(wusbhc);
-	if (ret) {
-		dev_err(dev, "WUSBHC phase B setup failed: %d\n", ret);
-		goto error_wusbhc_b_create;
-	}
-
-	whc_dbg_init(whc);
-
-	return 0;
-
-error_wusbhc_b_create:
-	usb_remove_hcd(usb_hcd);
-error_usb_add_hcd:
-	wusbhc_destroy(wusbhc);
-error_wusbhc_create:
-	uwb_rc_put(wusbhc->uwb_rc);
-error_uwb_rc:
-	whc_clean_up(whc);
-error_whc_init:
-	usb_put_hcd(usb_hcd);
-	return ret;
-}
-
-
-static void whc_remove(struct umc_dev *umc)
-{
-	struct usb_hcd *usb_hcd = dev_get_drvdata(&umc->dev);
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-
-	if (usb_hcd) {
-		whc_dbg_clean_up(whc);
-		wusbhc_b_destroy(wusbhc);
-		usb_remove_hcd(usb_hcd);
-		wusbhc_destroy(wusbhc);
-		uwb_rc_put(wusbhc->uwb_rc);
-		whc_clean_up(whc);
-		usb_put_hcd(usb_hcd);
-	}
-}
-
-static struct umc_driver whci_hc_driver = {
-	.name =		"whci-hcd",
-	.cap_id =       UMC_CAP_ID_WHCI_WUSB_HC,
-	.probe =	whc_probe,
-	.remove =	whc_remove,
-};
-
-static int __init whci_hc_driver_init(void)
-{
-	return umc_driver_register(&whci_hc_driver);
-}
-module_init(whci_hc_driver_init);
-
-static void __exit whci_hc_driver_exit(void)
-{
-	umc_driver_unregister(&whci_hc_driver);
-}
-module_exit(whci_hc_driver_exit);
-
-/* PCI device ID's that we handle (so it gets loaded) */
-static struct pci_device_id __used whci_hcd_id_table[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
-	{ /* empty last entry */ }
-};
-MODULE_DEVICE_TABLE(pci, whci_hcd_id_table);
-
-MODULE_DESCRIPTION("WHCI Wireless USB host controller driver");
-MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/host/whci/hw.c b/drivers/usb/host/whci/hw.c
deleted file mode 100644
index 22b3b7f7419d..000000000000
--- a/drivers/usb/host/whci/hw.c
+++ /dev/null
@@ -1,93 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) hardware access helpers.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/uwb/umc.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-void whc_write_wusbcmd(struct whc *whc, u32 mask, u32 val)
-{
-	unsigned long flags;
-	u32 cmd;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	cmd = le_readl(whc->base + WUSBCMD);
-	cmd = (cmd & ~mask) | val;
-	le_writel(cmd, whc->base + WUSBCMD);
-
-	spin_unlock_irqrestore(&whc->lock, flags);
-}
-
-/**
- * whc_do_gencmd - start a generic command via the WUSBGENCMDSTS register
- * @whc:    the WHCI HC
- * @cmd:    command to start.
- * @params: parameters for the command (the WUSBGENCMDPARAMS register value).
- * @addr:   pointer to any data for the command (may be NULL).
- * @len:    length of the data (if any).
- */
-int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len)
-{
-	unsigned long flags;
-	dma_addr_t dma_addr;
-	int t;
-	int ret = 0;
-
-	mutex_lock(&whc->mutex);
-
-	/* Wait for previous command to complete. */
-	t = wait_event_timeout(whc->cmd_wq,
-			       (le_readl(whc->base + WUSBGENCMDSTS) & WUSBGENCMDSTS_ACTIVE) == 0,
-			       WHC_GENCMD_TIMEOUT_MS);
-	if (t == 0) {
-		dev_err(&whc->umc->dev, "generic command timeout (%04x/%04x)\n",
-			le_readl(whc->base + WUSBGENCMDSTS),
-			le_readl(whc->base + WUSBGENCMDPARAMS));
-		ret = -ETIMEDOUT;
-		goto out;
-	}
-
-	if (addr) {
-		memcpy(whc->gen_cmd_buf, addr, len);
-		dma_addr = whc->gen_cmd_buf_dma;
-	} else
-		dma_addr = 0;
-
-	/* Poke registers to start cmd. */
-	spin_lock_irqsave(&whc->lock, flags);
-
-	le_writel(params, whc->base + WUSBGENCMDPARAMS);
-	le_writeq(dma_addr, whc->base + WUSBGENADDR);
-
-	le_writel(WUSBGENCMDSTS_ACTIVE | WUSBGENCMDSTS_IOC | cmd,
-		  whc->base + WUSBGENCMDSTS);
-
-	spin_unlock_irqrestore(&whc->lock, flags);
-out:
-	mutex_unlock(&whc->mutex);
-
-	return ret;
-}
-
-/**
- * whc_hw_error - recover from a hardware error
- * @whc:    the WHCI HC that broke.
- * @reason: a description of the failure.
- *
- * Recover from broken hardware with a full reset.
- */
-void whc_hw_error(struct whc *whc, const char *reason)
-{
-	struct wusbhc *wusbhc = &whc->wusbhc;
-
-	dev_err(&whc->umc->dev, "hardware error: %s\n", reason);
-	wusbhc_reset_all(wusbhc);
-}
diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c
deleted file mode 100644
index 82416973f773..000000000000
--- a/drivers/usb/host/whci/init.c
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) initialization.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/dma-mapping.h>
-#include <linux/uwb/umc.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-/*
- * Reset the host controller.
- */
-static void whc_hw_reset(struct whc *whc)
-{
-	le_writel(WUSBCMD_WHCRESET, whc->base + WUSBCMD);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBCMD, WUSBCMD_WHCRESET, 0,
-		      100, "reset");
-}
-
-static void whc_hw_init_di_buf(struct whc *whc)
-{
-	int d;
-
-	/* Disable all entries in the Device Information buffer. */
-	for (d = 0; d < whc->n_devices; d++)
-		whc->di_buf[d].addr_sec_info = WHC_DI_DISABLE;
-
-	le_writeq(whc->di_buf_dma, whc->base + WUSBDEVICEINFOADDR);
-}
-
-static void whc_hw_init_dn_buf(struct whc *whc)
-{
-	/* Clear the Device Notification buffer to ensure the V (valid)
-	 * bits are clear.  */
-	memset(whc->dn_buf, 0, 4096);
-
-	le_writeq(whc->dn_buf_dma, whc->base + WUSBDNTSBUFADDR);
-}
-
-int whc_init(struct whc *whc)
-{
-	u32 whcsparams;
-	int ret, i;
-	resource_size_t start, len;
-
-	spin_lock_init(&whc->lock);
-	mutex_init(&whc->mutex);
-	init_waitqueue_head(&whc->cmd_wq);
-	init_waitqueue_head(&whc->async_list_wq);
-	init_waitqueue_head(&whc->periodic_list_wq);
-	whc->workqueue = alloc_ordered_workqueue(dev_name(&whc->umc->dev), 0);
-	if (whc->workqueue == NULL) {
-		ret = -ENOMEM;
-		goto error;
-	}
-	INIT_WORK(&whc->dn_work, whc_dn_work);
-
-	INIT_WORK(&whc->async_work, scan_async_work);
-	INIT_LIST_HEAD(&whc->async_list);
-	INIT_LIST_HEAD(&whc->async_removed_list);
-
-	INIT_WORK(&whc->periodic_work, scan_periodic_work);
-	for (i = 0; i < 5; i++)
-		INIT_LIST_HEAD(&whc->periodic_list[i]);
-	INIT_LIST_HEAD(&whc->periodic_removed_list);
-
-	/* Map HC registers. */
-	start = whc->umc->resource.start;
-	len   = whc->umc->resource.end - start + 1;
-	if (!request_mem_region(start, len, "whci-hc")) {
-		dev_err(&whc->umc->dev, "can't request HC region\n");
-		ret = -EBUSY;
-		goto error;
-	}
-	whc->base_phys = start;
-	whc->base = ioremap(start, len);
-	if (!whc->base) {
-		dev_err(&whc->umc->dev, "ioremap\n");
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	whc_hw_reset(whc);
-
-	/* Read maximum number of devices, keys and MMC IEs. */
-	whcsparams = le_readl(whc->base + WHCSPARAMS);
-	whc->n_devices = WHCSPARAMS_TO_N_DEVICES(whcsparams);
-	whc->n_keys    = WHCSPARAMS_TO_N_KEYS(whcsparams);
-	whc->n_mmc_ies = WHCSPARAMS_TO_N_MMC_IES(whcsparams);
-
-	dev_dbg(&whc->umc->dev, "N_DEVICES = %d, N_KEYS = %d, N_MMC_IES = %d\n",
-		whc->n_devices, whc->n_keys, whc->n_mmc_ies);
-
-	whc->qset_pool = dma_pool_create("qset", &whc->umc->dev,
-					 sizeof(struct whc_qset), 64, 0);
-	if (whc->qset_pool == NULL) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	ret = asl_init(whc);
-	if (ret < 0)
-		goto error;
-	ret = pzl_init(whc);
-	if (ret < 0)
-		goto error;
-
-	/* Allocate and initialize a buffer for generic commands, the
-	   Device Information buffer, and the Device Notification
-	   buffer. */
-
-	whc->gen_cmd_buf = dma_alloc_coherent(&whc->umc->dev, WHC_GEN_CMD_DATA_LEN,
-					      &whc->gen_cmd_buf_dma, GFP_KERNEL);
-	if (whc->gen_cmd_buf == NULL) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	whc->dn_buf = dma_alloc_coherent(&whc->umc->dev,
-					 sizeof(struct dn_buf_entry) * WHC_N_DN_ENTRIES,
-					 &whc->dn_buf_dma, GFP_KERNEL);
-	if (!whc->dn_buf) {
-		ret = -ENOMEM;
-		goto error;
-	}
-	whc_hw_init_dn_buf(whc);
-
-	whc->di_buf = dma_alloc_coherent(&whc->umc->dev,
-					 sizeof(struct di_buf_entry) * whc->n_devices,
-					 &whc->di_buf_dma, GFP_KERNEL);
-	if (!whc->di_buf) {
-		ret = -ENOMEM;
-		goto error;
-	}
-	whc_hw_init_di_buf(whc);
-
-	return 0;
-
-error:
-	whc_clean_up(whc);
-	return ret;
-}
-
-void whc_clean_up(struct whc *whc)
-{
-	resource_size_t len;
-
-	if (whc->di_buf)
-		dma_free_coherent(&whc->umc->dev, sizeof(struct di_buf_entry) * whc->n_devices,
-				  whc->di_buf, whc->di_buf_dma);
-	if (whc->dn_buf)
-		dma_free_coherent(&whc->umc->dev, sizeof(struct dn_buf_entry) * WHC_N_DN_ENTRIES,
-				  whc->dn_buf, whc->dn_buf_dma);
-	if (whc->gen_cmd_buf)
-		dma_free_coherent(&whc->umc->dev, WHC_GEN_CMD_DATA_LEN,
-				  whc->gen_cmd_buf, whc->gen_cmd_buf_dma);
-
-	pzl_clean_up(whc);
-	asl_clean_up(whc);
-
-	dma_pool_destroy(whc->qset_pool);
-
-	len   = resource_size(&whc->umc->resource);
-	if (whc->base)
-		iounmap(whc->base);
-	if (whc->base_phys)
-		release_mem_region(whc->base_phys, len);
-
-	if (whc->workqueue)
-		destroy_workqueue(whc->workqueue);
-}
diff --git a/drivers/usb/host/whci/int.c b/drivers/usb/host/whci/int.c
deleted file mode 100644
index 7e4ad1b8f3e3..000000000000
--- a/drivers/usb/host/whci/int.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) interrupt handling.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/uwb/umc.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-static void transfer_done(struct whc *whc)
-{
-	queue_work(whc->workqueue, &whc->async_work);
-	queue_work(whc->workqueue, &whc->periodic_work);
-}
-
-irqreturn_t whc_int_handler(struct usb_hcd *hcd)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(hcd);
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u32 sts;
-
-	sts = le_readl(whc->base + WUSBSTS);
-	if (!(sts & WUSBSTS_INT_MASK))
-		return IRQ_NONE;
-	le_writel(sts & WUSBSTS_INT_MASK, whc->base + WUSBSTS);
-
-	if (sts & WUSBSTS_GEN_CMD_DONE)
-		wake_up(&whc->cmd_wq);
-
-	if (sts & WUSBSTS_HOST_ERR)
-		dev_err(&whc->umc->dev, "FIXME: host system error\n");
-
-	if (sts & WUSBSTS_ASYNC_SCHED_SYNCED)
-		wake_up(&whc->async_list_wq);
-
-	if (sts & WUSBSTS_PERIODIC_SCHED_SYNCED)
-		wake_up(&whc->periodic_list_wq);
-
-	if (sts & WUSBSTS_DNTS_INT)
-		queue_work(whc->workqueue, &whc->dn_work);
-
-	/*
-	 * A transfer completed (see [WHCI] section 4.7.1.2 for when
-	 * this occurs).
-	 */
-	if (sts & (WUSBSTS_INT | WUSBSTS_ERR_INT))
-		transfer_done(whc);
-
-	return IRQ_HANDLED;
-}
-
-static int process_dn_buf(struct whc *whc)
-{
-	struct wusbhc *wusbhc = &whc->wusbhc;
-	struct dn_buf_entry *dn;
-	int processed = 0;
-
-	for (dn = whc->dn_buf; dn < whc->dn_buf + WHC_N_DN_ENTRIES; dn++) {
-		if (dn->status & WHC_DN_STATUS_VALID) {
-			wusbhc_handle_dn(wusbhc, dn->src_addr,
-					 (struct wusb_dn_hdr *)dn->dn_data,
-					 dn->msg_size);
-			dn->status &= ~WHC_DN_STATUS_VALID;
-			processed++;
-		}
-	}
-	return processed;
-}
-
-void whc_dn_work(struct work_struct *work)
-{
-	struct whc *whc = container_of(work, struct whc, dn_work);
-	int processed;
-
-	do {
-		processed = process_dn_buf(whc);
-	} while (processed);
-}
diff --git a/drivers/usb/host/whci/pzl.c b/drivers/usb/host/whci/pzl.c
deleted file mode 100644
index ef52aeb02fde..000000000000
--- a/drivers/usb/host/whci/pzl.c
+++ /dev/null
@@ -1,404 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) periodic schedule management.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/dma-mapping.h>
-#include <linux/uwb/umc.h>
-#include <linux/usb.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-static void update_pzl_pointers(struct whc *whc, int period, u64 addr)
-{
-	switch (period) {
-	case 0:
-		whc_qset_set_link_ptr(&whc->pz_list[0], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[2], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[4], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[6], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[8], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[10], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[12], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[14], addr);
-		break;
-	case 1:
-		whc_qset_set_link_ptr(&whc->pz_list[1], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[5], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[9], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[13], addr);
-		break;
-	case 2:
-		whc_qset_set_link_ptr(&whc->pz_list[3], addr);
-		whc_qset_set_link_ptr(&whc->pz_list[11], addr);
-		break;
-	case 3:
-		whc_qset_set_link_ptr(&whc->pz_list[7], addr);
-		break;
-	case 4:
-		whc_qset_set_link_ptr(&whc->pz_list[15], addr);
-		break;
-	}
-}
-
-/*
- * Return the 'period' to use for this qset.  The minimum interval for
- * the endpoint is used so whatever urbs are submitted the device is
- * polled often enough.
- */
-static int qset_get_period(struct whc *whc, struct whc_qset *qset)
-{
-	uint8_t bInterval = qset->ep->desc.bInterval;
-
-	if (bInterval < 6)
-		bInterval = 6;
-	if (bInterval > 10)
-		bInterval = 10;
-	return bInterval - 6;
-}
-
-static void qset_insert_in_sw_list(struct whc *whc, struct whc_qset *qset)
-{
-	int period;
-
-	period = qset_get_period(whc, qset);
-
-	qset_clear(whc, qset);
-	list_move(&qset->list_node, &whc->periodic_list[period]);
-	qset->in_sw_list = true;
-}
-
-static void pzl_qset_remove(struct whc *whc, struct whc_qset *qset)
-{
-	list_move(&qset->list_node, &whc->periodic_removed_list);
-	qset->in_hw_list = false;
-	qset->in_sw_list = false;
-}
-
-/**
- * pzl_process_qset - process any recently inactivated or halted qTDs
- * in a qset.
- *
- * After inactive qTDs are removed, new qTDs can be added if the
- * urb queue still contains URBs.
- *
- * Returns the schedule updates required.
- */
-static enum whc_update pzl_process_qset(struct whc *whc, struct whc_qset *qset)
-{
-	enum whc_update update = 0;
-	uint32_t status = 0;
-
-	while (qset->ntds) {
-		struct whc_qtd *td;
-
-		td = &qset->qtd[qset->td_start];
-		status = le32_to_cpu(td->status);
-
-		/*
-		 * Nothing to do with a still active qTD.
-		 */
-		if (status & QTD_STS_ACTIVE)
-			break;
-
-		if (status & QTD_STS_HALTED) {
-			/* Ug, an error. */
-			process_halted_qtd(whc, qset, td);
-			/* A halted qTD always triggers an update
-			   because the qset was either removed or
-			   reactivated. */
-			update |= WHC_UPDATE_UPDATED;
-			goto done;
-		}
-
-		/* Mmm, a completed qTD. */
-		process_inactive_qtd(whc, qset, td);
-	}
-
-	if (!qset->remove)
-		update |= qset_add_qtds(whc, qset);
-
-done:
-	/*
-	 * If there are no qTDs in this qset, remove it from the PZL.
-	 */
-	if (qset->remove && qset->ntds == 0) {
-		pzl_qset_remove(whc, qset);
-		update |= WHC_UPDATE_REMOVED;
-	}
-
-	return update;
-}
-
-/**
- * pzl_start - start the periodic schedule
- * @whc: the WHCI host controller
- *
- * The PZL must be valid (e.g., all entries in the list should have
- * the T bit set).
- */
-void pzl_start(struct whc *whc)
-{
-	le_writeq(whc->pz_list_dma, whc->base + WUSBPERIODICLISTBASE);
-
-	whc_write_wusbcmd(whc, WUSBCMD_PERIODIC_EN, WUSBCMD_PERIODIC_EN);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
-		      WUSBSTS_PERIODIC_SCHED, WUSBSTS_PERIODIC_SCHED,
-		      1000, "start PZL");
-}
-
-/**
- * pzl_stop - stop the periodic schedule
- * @whc: the WHCI host controller
- */
-void pzl_stop(struct whc *whc)
-{
-	whc_write_wusbcmd(whc, WUSBCMD_PERIODIC_EN, 0);
-	whci_wait_for(&whc->umc->dev, whc->base + WUSBSTS,
-		      WUSBSTS_PERIODIC_SCHED, 0,
-		      1000, "stop PZL");
-}
-
-/**
- * pzl_update - request a PZL update and wait for the hardware to be synced
- * @whc: the WHCI HC
- * @wusbcmd: WUSBCMD value to start the update.
- *
- * If the WUSB HC is inactive (i.e., the PZL is stopped) then the
- * update must be skipped as the hardware may not respond to update
- * requests.
- */
-void pzl_update(struct whc *whc, uint32_t wusbcmd)
-{
-	struct wusbhc *wusbhc = &whc->wusbhc;
-	long t;
-
-	mutex_lock(&wusbhc->mutex);
-	if (wusbhc->active) {
-		whc_write_wusbcmd(whc, wusbcmd, wusbcmd);
-		t = wait_event_timeout(
-			whc->periodic_list_wq,
-			(le_readl(whc->base + WUSBCMD) & WUSBCMD_PERIODIC_UPDATED) == 0,
-			msecs_to_jiffies(1000));
-		if (t == 0)
-			whc_hw_error(whc, "PZL update timeout");
-	}
-	mutex_unlock(&wusbhc->mutex);
-}
-
-static void update_pzl_hw_view(struct whc *whc)
-{
-	struct whc_qset *qset, *t;
-	int period;
-	u64 tmp_qh = 0;
-
-	for (period = 0; period < 5; period++) {
-		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
-			whc_qset_set_link_ptr(&qset->qh.link, tmp_qh);
-			tmp_qh = qset->qset_dma;
-			qset->in_hw_list = true;
-		}
-		update_pzl_pointers(whc, period, tmp_qh);
-	}
-}
-
-/**
- * scan_periodic_work - scan the PZL for qsets to process.
- *
- * Process each qset in the PZL in turn and then signal the WHC that
- * the PZL has been updated.
- *
- * Then start, stop or update the periodic schedule as required.
- */
-void scan_periodic_work(struct work_struct *work)
-{
-	struct whc *whc = container_of(work, struct whc, periodic_work);
-	struct whc_qset *qset, *t;
-	enum whc_update update = 0;
-	int period;
-
-	spin_lock_irq(&whc->lock);
-
-	for (period = 4; period >= 0; period--) {
-		list_for_each_entry_safe(qset, t, &whc->periodic_list[period], list_node) {
-			if (!qset->in_hw_list)
-				update |= WHC_UPDATE_ADDED;
-			update |= pzl_process_qset(whc, qset);
-		}
-	}
-
-	if (update & (WHC_UPDATE_ADDED | WHC_UPDATE_REMOVED))
-		update_pzl_hw_view(whc);
-
-	spin_unlock_irq(&whc->lock);
-
-	if (update) {
-		uint32_t wusbcmd = WUSBCMD_PERIODIC_UPDATED | WUSBCMD_PERIODIC_SYNCED_DB;
-		if (update & WHC_UPDATE_REMOVED)
-			wusbcmd |= WUSBCMD_PERIODIC_QSET_RM;
-		pzl_update(whc, wusbcmd);
-	}
-
-	/*
-	 * Now that the PZL is updated, complete the removal of any
-	 * removed qsets.
-	 *
-	 * If the qset was to be reset, do so and reinsert it into the
-	 * PZL if it has pending transfers.
-	 */
-	spin_lock_irq(&whc->lock);
-
-	list_for_each_entry_safe(qset, t, &whc->periodic_removed_list, list_node) {
-		qset_remove_complete(whc, qset);
-		if (qset->reset) {
-			qset_reset(whc, qset);
-			if (!list_empty(&qset->stds)) {
-				qset_insert_in_sw_list(whc, qset);
-				queue_work(whc->workqueue, &whc->periodic_work);
-			}
-		}
-	}
-
-	spin_unlock_irq(&whc->lock);
-}
-
-/**
- * pzl_urb_enqueue - queue an URB onto the periodic list (PZL)
- * @whc: the WHCI host controller
- * @urb: the URB to enqueue
- * @mem_flags: flags for any memory allocations
- *
- * The qset for the endpoint is obtained and the urb queued on to it.
- *
- * Work is scheduled to update the hardware's view of the PZL.
- */
-int pzl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags)
-{
-	struct whc_qset *qset;
-	int err;
-	unsigned long flags;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	err = usb_hcd_link_urb_to_ep(&whc->wusbhc.usb_hcd, urb);
-	if (err < 0) {
-		spin_unlock_irqrestore(&whc->lock, flags);
-		return err;
-	}
-
-	qset = get_qset(whc, urb, GFP_ATOMIC);
-	if (qset == NULL)
-		err = -ENOMEM;
-	else
-		err = qset_add_urb(whc, qset, urb, GFP_ATOMIC);
-	if (!err) {
-		if (!qset->in_sw_list && !qset->remove)
-			qset_insert_in_sw_list(whc, qset);
-	} else
-		usb_hcd_unlink_urb_from_ep(&whc->wusbhc.usb_hcd, urb);
-
-	spin_unlock_irqrestore(&whc->lock, flags);
-
-	if (!err)
-		queue_work(whc->workqueue, &whc->periodic_work);
-
-	return err;
-}
-
-/**
- * pzl_urb_dequeue - remove an URB (qset) from the periodic list
- * @whc: the WHCI host controller
- * @urb: the URB to dequeue
- * @status: the current status of the URB
- *
- * URBs that do yet have qTDs can simply be removed from the software
- * queue, otherwise the qset must be removed so the qTDs can be safely
- * removed.
- */
-int pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status)
-{
-	struct whc_urb *wurb = urb->hcpriv;
-	struct whc_qset *qset = wurb->qset;
-	struct whc_std *std, *t;
-	bool has_qtd = false;
-	int ret;
-	unsigned long flags;
-
-	spin_lock_irqsave(&whc->lock, flags);
-
-	ret = usb_hcd_check_unlink_urb(&whc->wusbhc.usb_hcd, urb, status);
-	if (ret < 0)
-		goto out;
-
-	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
-		if (std->urb == urb) {
-			if (std->qtd)
-				has_qtd = true;
-			qset_free_std(whc, std);
-		} else
-			std->qtd = NULL; /* so this std is re-added when the qset is */
-	}
-
-	if (has_qtd) {
-		pzl_qset_remove(whc, qset);
-		update_pzl_hw_view(whc);
-		wurb->status = status;
-		wurb->is_async = false;
-		queue_work(whc->workqueue, &wurb->dequeue_work);
-	} else
-		qset_remove_urb(whc, qset, urb, status);
-out:
-	spin_unlock_irqrestore(&whc->lock, flags);
-
-	return ret;
-}
-
-/**
- * pzl_qset_delete - delete a qset from the PZL
- */
-void pzl_qset_delete(struct whc *whc, struct whc_qset *qset)
-{
-	qset->remove = 1;
-	queue_work(whc->workqueue, &whc->periodic_work);
-	qset_delete(whc, qset);
-}
-
-/**
- * pzl_init - initialize the periodic zone list
- * @whc: the WHCI host controller
- */
-int pzl_init(struct whc *whc)
-{
-	int i;
-
-	whc->pz_list = dma_alloc_coherent(&whc->umc->dev, sizeof(u64) * 16,
-					  &whc->pz_list_dma, GFP_KERNEL);
-	if (whc->pz_list == NULL)
-		return -ENOMEM;
-
-	/* Set T bit on all elements in PZL. */
-	for (i = 0; i < 16; i++)
-		whc->pz_list[i] = cpu_to_le64(QH_LINK_NTDS(8) | QH_LINK_T);
-
-	le_writeq(whc->pz_list_dma, whc->base + WUSBPERIODICLISTBASE);
-
-	return 0;
-}
-
-/**
- * pzl_clean_up - free PZL resources
- * @whc: the WHCI host controller
- *
- * The PZL is stopped and empty.
- */
-void pzl_clean_up(struct whc *whc)
-{
-	if (whc->pz_list)
-		dma_free_coherent(&whc->umc->dev,  sizeof(u64) * 16, whc->pz_list,
-				  whc->pz_list_dma);
-}
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
deleted file mode 100644
index 925166a207aa..000000000000
--- a/drivers/usb/host/whci/qset.c
+++ /dev/null
@@ -1,831 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) qset management.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/uwb/umc.h>
-#include <linux/usb.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags)
-{
-	struct whc_qset *qset;
-	dma_addr_t dma;
-
-	qset = dma_pool_zalloc(whc->qset_pool, mem_flags, &dma);
-	if (qset == NULL)
-		return NULL;
-
-	qset->qset_dma = dma;
-	qset->whc = whc;
-
-	INIT_LIST_HEAD(&qset->list_node);
-	INIT_LIST_HEAD(&qset->stds);
-
-	return qset;
-}
-
-/**
- * qset_fill_qh - fill the static endpoint state in a qset's QHead
- * @qset: the qset whose QH needs initializing with static endpoint
- *        state
- * @urb:  an urb for a transfer to this endpoint
- */
-static void qset_fill_qh(struct whc *whc, struct whc_qset *qset, struct urb *urb)
-{
-	struct usb_device *usb_dev = urb->dev;
-	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
-	struct usb_wireless_ep_comp_descriptor *epcd;
-	bool is_out;
-	uint8_t phy_rate;
-
-	is_out = usb_pipeout(urb->pipe);
-
-	qset->max_packet = le16_to_cpu(urb->ep->desc.wMaxPacketSize);
-
-	epcd = (struct usb_wireless_ep_comp_descriptor *)qset->ep->extra;
-	if (epcd) {
-		qset->max_seq = epcd->bMaxSequence;
-		qset->max_burst = epcd->bMaxBurst;
-	} else {
-		qset->max_seq = 2;
-		qset->max_burst = 1;
-	}
-
-	/*
-	 * Initial PHY rate is 53.3 Mbit/s for control endpoints or
-	 * the maximum supported by the device for other endpoints
-	 * (unless limited by the user).
-	 */
-	if (usb_pipecontrol(urb->pipe))
-		phy_rate = UWB_PHY_RATE_53;
-	else {
-		uint16_t phy_rates;
-
-		phy_rates = le16_to_cpu(wusb_dev->wusb_cap_descr->wPHYRates);
-		phy_rate = fls(phy_rates) - 1;
-		if (phy_rate > whc->wusbhc.phy_rate)
-			phy_rate = whc->wusbhc.phy_rate;
-	}
-
-	qset->qh.info1 = cpu_to_le32(
-		QH_INFO1_EP(usb_pipeendpoint(urb->pipe))
-		| (is_out ? QH_INFO1_DIR_OUT : QH_INFO1_DIR_IN)
-		| usb_pipe_to_qh_type(urb->pipe)
-		| QH_INFO1_DEV_INFO_IDX(wusb_port_no_to_idx(usb_dev->portnum))
-		| QH_INFO1_MAX_PKT_LEN(qset->max_packet)
-		);
-	qset->qh.info2 = cpu_to_le32(
-		QH_INFO2_BURST(qset->max_burst)
-		| QH_INFO2_DBP(0)
-		| QH_INFO2_MAX_COUNT(3)
-		| QH_INFO2_MAX_RETRY(3)
-		| QH_INFO2_MAX_SEQ(qset->max_seq - 1)
-		);
-	/* FIXME: where can we obtain these Tx parameters from?  Why
-	 * doesn't the chip know what Tx power to use? It knows the Rx
-	 * strength and can presumably guess the Tx power required
-	 * from that? */
-	qset->qh.info3 = cpu_to_le32(
-		QH_INFO3_TX_RATE(phy_rate)
-		| QH_INFO3_TX_PWR(0) /* 0 == max power */
-		);
-
-	qset->qh.cur_window = cpu_to_le32((1 << qset->max_burst) - 1);
-}
-
-/**
- * qset_clear - clear fields in a qset so it may be reinserted into a
- * schedule.
- *
- * The sequence number and current window are not cleared (see
- * qset_reset()).
- */
-void qset_clear(struct whc *whc, struct whc_qset *qset)
-{
-	qset->td_start = qset->td_end = qset->ntds = 0;
-
-	qset->qh.link = cpu_to_le64(QH_LINK_NTDS(8) | QH_LINK_T);
-	qset->qh.status = qset->qh.status & QH_STATUS_SEQ_MASK;
-	qset->qh.err_count = 0;
-	qset->qh.scratch[0] = 0;
-	qset->qh.scratch[1] = 0;
-	qset->qh.scratch[2] = 0;
-
-	memset(&qset->qh.overlay, 0, sizeof(qset->qh.overlay));
-
-	init_completion(&qset->remove_complete);
-}
-
-/**
- * qset_reset - reset endpoint state in a qset.
- *
- * Clears the sequence number and current window.  This qset must not
- * be in the ASL or PZL.
- */
-void qset_reset(struct whc *whc, struct whc_qset *qset)
-{
-	qset->reset = 0;
-
-	qset->qh.status &= ~QH_STATUS_SEQ_MASK;
-	qset->qh.cur_window = cpu_to_le32((1 << qset->max_burst) - 1);
-}
-
-/**
- * get_qset - get the qset for an async endpoint
- *
- * A new qset is created if one does not already exist.
- */
-struct whc_qset *get_qset(struct whc *whc, struct urb *urb,
-				 gfp_t mem_flags)
-{
-	struct whc_qset *qset;
-
-	qset = urb->ep->hcpriv;
-	if (qset == NULL) {
-		qset = qset_alloc(whc, mem_flags);
-		if (qset == NULL)
-			return NULL;
-
-		qset->ep = urb->ep;
-		urb->ep->hcpriv = qset;
-		qset_fill_qh(whc, qset, urb);
-	}
-	return qset;
-}
-
-void qset_remove_complete(struct whc *whc, struct whc_qset *qset)
-{
-	qset->remove = 0;
-	list_del_init(&qset->list_node);
-	complete(&qset->remove_complete);
-}
-
-/**
- * qset_add_qtds - add qTDs for an URB to a qset
- *
- * Returns true if the list (ASL/PZL) must be updated because (for a
- * WHCI 0.95 controller) an activated qTD was pointed to be iCur.
- */
-enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset)
-{
-	struct whc_std *std;
-	enum whc_update update = 0;
-
-	list_for_each_entry(std, &qset->stds, list_node) {
-		struct whc_qtd *qtd;
-		uint32_t status;
-
-		if (qset->ntds >= WHCI_QSET_TD_MAX
-		    || (qset->pause_after_urb && std->urb != qset->pause_after_urb))
-			break;
-
-		if (std->qtd)
-			continue; /* already has a qTD */
-
-		qtd = std->qtd = &qset->qtd[qset->td_end];
-
-		/* Fill in setup bytes for control transfers. */
-		if (usb_pipecontrol(std->urb->pipe))
-			memcpy(qtd->setup, std->urb->setup_packet, 8);
-
-		status = QTD_STS_ACTIVE | QTD_STS_LEN(std->len);
-
-		if (whc_std_last(std) && usb_pipeout(std->urb->pipe))
-			status |= QTD_STS_LAST_PKT;
-
-		/*
-		 * For an IN transfer the iAlt field should be set so
-		 * the h/w will automatically advance to the next
-		 * transfer. However, if there are 8 or more TDs
-		 * remaining in this transfer then iAlt cannot be set
-		 * as it could point to somewhere in this transfer.
-		 */
-		if (std->ntds_remaining < WHCI_QSET_TD_MAX) {
-			int ialt;
-			ialt = (qset->td_end + std->ntds_remaining) % WHCI_QSET_TD_MAX;
-			status |= QTD_STS_IALT(ialt);
-		} else if (usb_pipein(std->urb->pipe))
-			qset->pause_after_urb = std->urb;
-
-		if (std->num_pointers)
-			qtd->options = cpu_to_le32(QTD_OPT_IOC);
-		else
-			qtd->options = cpu_to_le32(QTD_OPT_IOC | QTD_OPT_SMALL);
-		qtd->page_list_ptr = cpu_to_le64(std->dma_addr);
-
-		qtd->status = cpu_to_le32(status);
-
-		if (QH_STATUS_TO_ICUR(qset->qh.status) == qset->td_end)
-			update = WHC_UPDATE_UPDATED;
-
-		if (++qset->td_end >= WHCI_QSET_TD_MAX)
-			qset->td_end = 0;
-		qset->ntds++;
-	}
-
-	return update;
-}
-
-/**
- * qset_remove_qtd - remove the first qTD from a qset.
- *
- * The qTD might be still active (if it's part of a IN URB that
- * resulted in a short read) so ensure it's deactivated.
- */
-static void qset_remove_qtd(struct whc *whc, struct whc_qset *qset)
-{
-	qset->qtd[qset->td_start].status = 0;
-
-	if (++qset->td_start >= WHCI_QSET_TD_MAX)
-		qset->td_start = 0;
-	qset->ntds--;
-}
-
-static void qset_copy_bounce_to_sg(struct whc *whc, struct whc_std *std)
-{
-	struct scatterlist *sg;
-	void *bounce;
-	size_t remaining, offset;
-
-	bounce = std->bounce_buf;
-	remaining = std->len;
-
-	sg = std->bounce_sg;
-	offset = std->bounce_offset;
-
-	while (remaining) {
-		size_t len;
-
-		len = min(sg->length - offset, remaining);
-		memcpy(sg_virt(sg) + offset, bounce, len);
-
-		bounce += len;
-		remaining -= len;
-
-		offset += len;
-		if (offset >= sg->length) {
-			sg = sg_next(sg);
-			offset = 0;
-		}
-	}
-
-}
-
-/**
- * qset_free_std - remove an sTD and free it.
- * @whc: the WHCI host controller
- * @std: the sTD to remove and free.
- */
-void qset_free_std(struct whc *whc, struct whc_std *std)
-{
-	list_del(&std->list_node);
-	if (std->bounce_buf) {
-		bool is_out = usb_pipeout(std->urb->pipe);
-		dma_addr_t dma_addr;
-
-		if (std->num_pointers)
-			dma_addr = le64_to_cpu(std->pl_virt[0].buf_ptr);
-		else
-			dma_addr = std->dma_addr;
-
-		dma_unmap_single(whc->wusbhc.dev, dma_addr,
-				 std->len, is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		if (!is_out)
-			qset_copy_bounce_to_sg(whc, std);
-		kfree(std->bounce_buf);
-	}
-	if (std->pl_virt) {
-		if (!dma_mapping_error(whc->wusbhc.dev, std->dma_addr))
-			dma_unmap_single(whc->wusbhc.dev, std->dma_addr,
-					 std->num_pointers * sizeof(struct whc_page_list_entry),
-					 DMA_TO_DEVICE);
-		kfree(std->pl_virt);
-		std->pl_virt = NULL;
-	}
-	kfree(std);
-}
-
-/**
- * qset_remove_qtds - remove an URB's qTDs (and sTDs).
- */
-static void qset_remove_qtds(struct whc *whc, struct whc_qset *qset,
-			     struct urb *urb)
-{
-	struct whc_std *std, *t;
-
-	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
-		if (std->urb != urb)
-			break;
-		if (std->qtd != NULL)
-			qset_remove_qtd(whc, qset);
-		qset_free_std(whc, std);
-	}
-}
-
-/**
- * qset_free_stds - free any remaining sTDs for an URB.
- */
-static void qset_free_stds(struct whc_qset *qset, struct urb *urb)
-{
-	struct whc_std *std, *t;
-
-	list_for_each_entry_safe(std, t, &qset->stds, list_node) {
-		if (std->urb == urb)
-			qset_free_std(qset->whc, std);
-	}
-}
-
-static int qset_fill_page_list(struct whc *whc, struct whc_std *std, gfp_t mem_flags)
-{
-	dma_addr_t dma_addr = std->dma_addr;
-	dma_addr_t sp, ep;
-	size_t pl_len;
-	int p;
-
-	/* Short buffers don't need a page list. */
-	if (std->len <= WHCI_PAGE_SIZE) {
-		std->num_pointers = 0;
-		return 0;
-	}
-
-	sp = dma_addr & ~(WHCI_PAGE_SIZE-1);
-	ep = dma_addr + std->len;
-	std->num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE);
-
-	pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
-	std->pl_virt = kmalloc(pl_len, mem_flags);
-	if (std->pl_virt == NULL)
-		return -ENOMEM;
-	std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt, pl_len, DMA_TO_DEVICE);
-	if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr)) {
-		kfree(std->pl_virt);
-		return -EFAULT;
-	}
-
-	for (p = 0; p < std->num_pointers; p++) {
-		std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
-		dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1);
-	}
-
-	return 0;
-}
-
-/**
- * urb_dequeue_work - executes asl/pzl update and gives back the urb to the system.
- */
-static void urb_dequeue_work(struct work_struct *work)
-{
-	struct whc_urb *wurb = container_of(work, struct whc_urb, dequeue_work);
-	struct whc_qset *qset = wurb->qset;
-	struct whc *whc = qset->whc;
-	unsigned long flags;
-
-	if (wurb->is_async)
-		asl_update(whc, WUSBCMD_ASYNC_UPDATED
-			   | WUSBCMD_ASYNC_SYNCED_DB
-			   | WUSBCMD_ASYNC_QSET_RM);
-	else
-		pzl_update(whc, WUSBCMD_PERIODIC_UPDATED
-			   | WUSBCMD_PERIODIC_SYNCED_DB
-			   | WUSBCMD_PERIODIC_QSET_RM);
-
-	spin_lock_irqsave(&whc->lock, flags);
-	qset_remove_urb(whc, qset, wurb->urb, wurb->status);
-	spin_unlock_irqrestore(&whc->lock, flags);
-}
-
-static struct whc_std *qset_new_std(struct whc *whc, struct whc_qset *qset,
-				    struct urb *urb, gfp_t mem_flags)
-{
-	struct whc_std *std;
-
-	std = kzalloc(sizeof(struct whc_std), mem_flags);
-	if (std == NULL)
-		return NULL;
-
-	std->urb = urb;
-	std->qtd = NULL;
-
-	INIT_LIST_HEAD(&std->list_node);
-	list_add_tail(&std->list_node, &qset->stds);
-
-	return std;
-}
-
-static int qset_add_urb_sg(struct whc *whc, struct whc_qset *qset, struct urb *urb,
-			   gfp_t mem_flags)
-{
-	size_t remaining;
-	struct scatterlist *sg;
-	int i;
-	int ntds = 0;
-	struct whc_std *std = NULL;
-	struct whc_page_list_entry *new_pl_virt;
-	dma_addr_t prev_end = 0;
-	size_t pl_len;
-	int p = 0;
-
-	remaining = urb->transfer_buffer_length;
-
-	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
-		dma_addr_t dma_addr;
-		size_t dma_remaining;
-		dma_addr_t sp, ep;
-		int num_pointers;
-
-		if (remaining == 0) {
-			break;
-		}
-
-		dma_addr = sg_dma_address(sg);
-		dma_remaining = min_t(size_t, sg_dma_len(sg), remaining);
-
-		while (dma_remaining) {
-			size_t dma_len;
-
-			/*
-			 * We can use the previous std (if it exists) provided that:
-			 * - the previous one ended on a page boundary.
-			 * - the current one begins on a page boundary.
-			 * - the previous one isn't full.
-			 *
-			 * If a new std is needed but the previous one
-			 * was not a whole number of packets then this
-			 * sg list cannot be mapped onto multiple
-			 * qTDs.  Return an error and let the caller
-			 * sort it out.
-			 */
-			if (!std
-			    || (prev_end & (WHCI_PAGE_SIZE-1))
-			    || (dma_addr & (WHCI_PAGE_SIZE-1))
-			    || std->len + WHCI_PAGE_SIZE > QTD_MAX_XFER_SIZE) {
-				if (std && std->len % qset->max_packet != 0)
-					return -EINVAL;
-				std = qset_new_std(whc, qset, urb, mem_flags);
-				if (std == NULL) {
-					return -ENOMEM;
-				}
-				ntds++;
-				p = 0;
-			}
-
-			dma_len = dma_remaining;
-
-			/*
-			 * If the remainder of this element doesn't
-			 * fit in a single qTD, limit the qTD to a
-			 * whole number of packets.  This allows the
-			 * remainder to go into the next qTD.
-			 */
-			if (std->len + dma_len > QTD_MAX_XFER_SIZE) {
-				dma_len = (QTD_MAX_XFER_SIZE / qset->max_packet)
-					* qset->max_packet - std->len;
-			}
-
-			std->len += dma_len;
-			std->ntds_remaining = -1; /* filled in later */
-
-			sp = dma_addr & ~(WHCI_PAGE_SIZE-1);
-			ep = dma_addr + dma_len;
-			num_pointers = DIV_ROUND_UP(ep - sp, WHCI_PAGE_SIZE);
-			std->num_pointers += num_pointers;
-
-			pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
-
-			new_pl_virt = krealloc(std->pl_virt, pl_len, mem_flags);
-			if (new_pl_virt == NULL) {
-				kfree(std->pl_virt);
-				std->pl_virt = NULL;
-				return -ENOMEM;
-			}
-			std->pl_virt = new_pl_virt;
-
-			for (;p < std->num_pointers; p++) {
-				std->pl_virt[p].buf_ptr = cpu_to_le64(dma_addr);
-				dma_addr = (dma_addr + WHCI_PAGE_SIZE) & ~(WHCI_PAGE_SIZE-1);
-			}
-
-			prev_end = dma_addr = ep;
-			dma_remaining -= dma_len;
-			remaining -= dma_len;
-		}
-	}
-
-	/* Now the number of stds is know, go back and fill in
-	   std->ntds_remaining. */
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (std->ntds_remaining == -1) {
-			pl_len = std->num_pointers * sizeof(struct whc_page_list_entry);
-			std->dma_addr = dma_map_single(whc->wusbhc.dev, std->pl_virt,
-						       pl_len, DMA_TO_DEVICE);
-			if (dma_mapping_error(whc->wusbhc.dev, std->dma_addr))
-				return -EFAULT;
-			std->ntds_remaining = ntds--;
-		}
-	}
-	return 0;
-}
-
-/**
- * qset_add_urb_sg_linearize - add an urb with sg list, copying the data
- *
- * If the URB contains an sg list whose elements cannot be directly
- * mapped to qTDs then the data must be transferred via bounce
- * buffers.
- */
-static int qset_add_urb_sg_linearize(struct whc *whc, struct whc_qset *qset,
-				     struct urb *urb, gfp_t mem_flags)
-{
-	bool is_out = usb_pipeout(urb->pipe);
-	size_t max_std_len;
-	size_t remaining;
-	int ntds = 0;
-	struct whc_std *std = NULL;
-	void *bounce = NULL;
-	struct scatterlist *sg;
-	int i;
-
-	/* limit maximum bounce buffer to 16 * 3.5 KiB ~= 28 k */
-	max_std_len = qset->max_burst * qset->max_packet;
-
-	remaining = urb->transfer_buffer_length;
-
-	for_each_sg(urb->sg, sg, urb->num_mapped_sgs, i) {
-		size_t len;
-		size_t sg_remaining;
-		void *orig;
-
-		if (remaining == 0) {
-			break;
-		}
-
-		sg_remaining = min_t(size_t, remaining, sg->length);
-		orig = sg_virt(sg);
-
-		while (sg_remaining) {
-			if (!std || std->len == max_std_len) {
-				std = qset_new_std(whc, qset, urb, mem_flags);
-				if (std == NULL)
-					return -ENOMEM;
-				std->bounce_buf = kmalloc(max_std_len, mem_flags);
-				if (std->bounce_buf == NULL)
-					return -ENOMEM;
-				std->bounce_sg = sg;
-				std->bounce_offset = orig - sg_virt(sg);
-				bounce = std->bounce_buf;
-				ntds++;
-			}
-
-			len = min(sg_remaining, max_std_len - std->len);
-
-			if (is_out)
-				memcpy(bounce, orig, len);
-
-			std->len += len;
-			std->ntds_remaining = -1; /* filled in later */
-
-			bounce += len;
-			orig += len;
-			sg_remaining -= len;
-			remaining -= len;
-		}
-	}
-
-	/*
-	 * For each of the new sTDs, map the bounce buffers, create
-	 * page lists (if necessary), and fill in std->ntds_remaining.
-	 */
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (std->ntds_remaining != -1)
-			continue;
-
-		std->dma_addr = dma_map_single(&whc->umc->dev, std->bounce_buf, std->len,
-					       is_out ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		if (dma_mapping_error(&whc->umc->dev, std->dma_addr))
-			return -EFAULT;
-
-		if (qset_fill_page_list(whc, std, mem_flags) < 0)
-			return -ENOMEM;
-
-		std->ntds_remaining = ntds--;
-	}
-
-	return 0;
-}
-
-/**
- * qset_add_urb - add an urb to the qset's queue.
- *
- * The URB is chopped into sTDs, one for each qTD that will required.
- * At least one qTD (and sTD) is required even if the transfer has no
- * data (e.g., for some control transfers).
- */
-int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb,
-	gfp_t mem_flags)
-{
-	struct whc_urb *wurb;
-	int remaining = urb->transfer_buffer_length;
-	u64 transfer_dma = urb->transfer_dma;
-	int ntds_remaining;
-	int ret;
-
-	wurb = kzalloc(sizeof(struct whc_urb), mem_flags);
-	if (wurb == NULL)
-		goto err_no_mem;
-	urb->hcpriv = wurb;
-	wurb->qset = qset;
-	wurb->urb = urb;
-	INIT_WORK(&wurb->dequeue_work, urb_dequeue_work);
-
-	if (urb->num_sgs) {
-		ret = qset_add_urb_sg(whc, qset, urb, mem_flags);
-		if (ret == -EINVAL) {
-			qset_free_stds(qset, urb);
-			ret = qset_add_urb_sg_linearize(whc, qset, urb, mem_flags);
-		}
-		if (ret < 0)
-			goto err_no_mem;
-		return 0;
-	}
-
-	ntds_remaining = DIV_ROUND_UP(remaining, QTD_MAX_XFER_SIZE);
-	if (ntds_remaining == 0)
-		ntds_remaining = 1;
-
-	while (ntds_remaining) {
-		struct whc_std *std;
-		size_t std_len;
-
-		std_len = remaining;
-		if (std_len > QTD_MAX_XFER_SIZE)
-			std_len = QTD_MAX_XFER_SIZE;
-
-		std = qset_new_std(whc, qset, urb, mem_flags);
-		if (std == NULL)
-			goto err_no_mem;
-
-		std->dma_addr = transfer_dma;
-		std->len = std_len;
-		std->ntds_remaining = ntds_remaining;
-
-		if (qset_fill_page_list(whc, std, mem_flags) < 0)
-			goto err_no_mem;
-
-		ntds_remaining--;
-		remaining -= std_len;
-		transfer_dma += std_len;
-	}
-
-	return 0;
-
-err_no_mem:
-	qset_free_stds(qset, urb);
-	return -ENOMEM;
-}
-
-/**
- * qset_remove_urb - remove an URB from the urb queue.
- *
- * The URB is returned to the USB subsystem.
- */
-void qset_remove_urb(struct whc *whc, struct whc_qset *qset,
-			    struct urb *urb, int status)
-{
-	struct wusbhc *wusbhc = &whc->wusbhc;
-	struct whc_urb *wurb = urb->hcpriv;
-
-	usb_hcd_unlink_urb_from_ep(&wusbhc->usb_hcd, urb);
-	/* Drop the lock as urb->complete() may enqueue another urb. */
-	spin_unlock(&whc->lock);
-	wusbhc_giveback_urb(wusbhc, urb, status);
-	spin_lock(&whc->lock);
-
-	kfree(wurb);
-}
-
-/**
- * get_urb_status_from_qtd - get the completed urb status from qTD status
- * @urb:    completed urb
- * @status: qTD status
- */
-static int get_urb_status_from_qtd(struct urb *urb, u32 status)
-{
-	if (status & QTD_STS_HALTED) {
-		if (status & QTD_STS_DBE)
-			return usb_pipein(urb->pipe) ? -ENOSR : -ECOMM;
-		else if (status & QTD_STS_BABBLE)
-			return -EOVERFLOW;
-		else if (status & QTD_STS_RCE)
-			return -ETIME;
-		return -EPIPE;
-	}
-	if (usb_pipein(urb->pipe)
-	    && (urb->transfer_flags & URB_SHORT_NOT_OK)
-	    && urb->actual_length < urb->transfer_buffer_length)
-		return -EREMOTEIO;
-	return 0;
-}
-
-/**
- * process_inactive_qtd - process an inactive (but not halted) qTD.
- *
- * Update the urb with the transfer bytes from the qTD, if the urb is
- * completely transferred or (in the case of an IN only) the LPF is
- * set, then the transfer is complete and the urb should be returned
- * to the system.
- */
-void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
-				 struct whc_qtd *qtd)
-{
-	struct whc_std *std = list_first_entry(&qset->stds, struct whc_std, list_node);
-	struct urb *urb = std->urb;
-	uint32_t status;
-	bool complete;
-
-	status = le32_to_cpu(qtd->status);
-
-	urb->actual_length += std->len - QTD_STS_TO_LEN(status);
-
-	if (usb_pipein(urb->pipe) && (status & QTD_STS_LAST_PKT))
-		complete = true;
-	else
-		complete = whc_std_last(std);
-
-	qset_remove_qtd(whc, qset);
-	qset_free_std(whc, std);
-
-	/*
-	 * Transfers for this URB are complete?  Then return it to the
-	 * USB subsystem.
-	 */
-	if (complete) {
-		qset_remove_qtds(whc, qset, urb);
-		qset_remove_urb(whc, qset, urb, get_urb_status_from_qtd(urb, status));
-
-		/*
-		 * If iAlt isn't valid then the hardware didn't
-		 * advance iCur. Adjust the start and end pointers to
-		 * match iCur.
-		 */
-		if (!(status & QTD_STS_IALT_VALID))
-			qset->td_start = qset->td_end
-				= QH_STATUS_TO_ICUR(le16_to_cpu(qset->qh.status));
-		qset->pause_after_urb = NULL;
-	}
-}
-
-/**
- * process_halted_qtd - process a qset with a halted qtd
- *
- * Remove all the qTDs for the failed URB and return the failed URB to
- * the USB subsystem.  Then remove all other qTDs so the qset can be
- * removed.
- *
- * FIXME: this is the point where rate adaptation can be done.  If a
- * transfer failed because it exceeded the maximum number of retries
- * then it could be reactivated with a slower rate without having to
- * remove the qset.
- */
-void process_halted_qtd(struct whc *whc, struct whc_qset *qset,
-			       struct whc_qtd *qtd)
-{
-	struct whc_std *std = list_first_entry(&qset->stds, struct whc_std, list_node);
-	struct urb *urb = std->urb;
-	int urb_status;
-
-	urb_status = get_urb_status_from_qtd(urb, le32_to_cpu(qtd->status));
-
-	qset_remove_qtds(whc, qset, urb);
-	qset_remove_urb(whc, qset, urb, urb_status);
-
-	list_for_each_entry(std, &qset->stds, list_node) {
-		if (qset->ntds == 0)
-			break;
-		qset_remove_qtd(whc, qset);
-		std->qtd = NULL;
-	}
-
-	qset->remove = 1;
-}
-
-void qset_free(struct whc *whc, struct whc_qset *qset)
-{
-	dma_pool_free(whc->qset_pool, qset, qset->qset_dma);
-}
-
-/**
- * qset_delete - wait for a qset to be unused, then free it.
- */
-void qset_delete(struct whc *whc, struct whc_qset *qset)
-{
-	wait_for_completion(&qset->remove_complete);
-	qset_free(whc, qset);
-}
diff --git a/drivers/usb/host/whci/whcd.h b/drivers/usb/host/whci/whcd.h
deleted file mode 100644
index 139476997e7c..000000000000
--- a/drivers/usb/host/whci/whcd.h
+++ /dev/null
@@ -1,202 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) private header.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#ifndef __WHCD_H
-#define __WHCD_H
-
-#include <linux/uwb/whci.h>
-#include <linux/uwb/umc.h>
-#include <linux/workqueue.h>
-
-#include "whci-hc.h"
-
-/* Generic command timeout. */
-#define WHC_GENCMD_TIMEOUT_MS 100
-
-struct whc_dbg;
-
-struct whc {
-	struct wusbhc wusbhc;
-	struct umc_dev *umc;
-
-	resource_size_t base_phys;
-	void __iomem *base;
-	int irq;
-
-	u8 n_devices;
-	u8 n_keys;
-	u8 n_mmc_ies;
-
-	u64 *pz_list;
-	struct dn_buf_entry *dn_buf;
-	struct di_buf_entry *di_buf;
-	dma_addr_t pz_list_dma;
-	dma_addr_t dn_buf_dma;
-	dma_addr_t di_buf_dma;
-
-	spinlock_t   lock;
-	struct mutex mutex;
-
-	void *            gen_cmd_buf;
-	dma_addr_t        gen_cmd_buf_dma;
-	wait_queue_head_t cmd_wq;
-
-	struct workqueue_struct *workqueue;
-	struct work_struct       dn_work;
-
-	struct dma_pool *qset_pool;
-
-	struct list_head async_list;
-	struct list_head async_removed_list;
-	wait_queue_head_t async_list_wq;
-	struct work_struct async_work;
-
-	struct list_head periodic_list[5];
-	struct list_head periodic_removed_list;
-	wait_queue_head_t periodic_list_wq;
-	struct work_struct periodic_work;
-
-	struct whc_dbg *dbg;
-};
-
-#define wusbhc_to_whc(w) (container_of((w), struct whc, wusbhc))
-
-/**
- * struct whc_std - a software TD.
- * @urb: the URB this sTD is for.
- * @offset: start of the URB's data for this TD.
- * @len: the length of data in the associated TD.
- * @ntds_remaining: number of TDs (starting from this one) in this transfer.
- *
- * @bounce_buf: a bounce buffer if the std was from an urb with a sg
- * list that could not be mapped to qTDs directly.
- * @bounce_sg: the first scatterlist element bounce_buf is for.
- * @bounce_offset: the offset into bounce_sg for the start of bounce_buf.
- *
- * Queued URBs may require more TDs than are available in a qset so we
- * use a list of these "software TDs" (sTDs) to hold per-TD data.
- */
-struct whc_std {
-	struct urb *urb;
-	size_t len;
-	int    ntds_remaining;
-	struct whc_qtd *qtd;
-
-	struct list_head list_node;
-	int num_pointers;
-	dma_addr_t dma_addr;
-	struct whc_page_list_entry *pl_virt;
-
-	void *bounce_buf;
-	struct scatterlist *bounce_sg;
-	unsigned bounce_offset;
-};
-
-/**
- * struct whc_urb - per URB host controller structure.
- * @urb: the URB this struct is for.
- * @qset: the qset associated to the URB.
- * @dequeue_work: the work to remove the URB when dequeued.
- * @is_async: the URB belongs to async sheduler or not.
- * @status: the status to be returned when calling wusbhc_giveback_urb.
- */
-struct whc_urb {
-	struct urb *urb;
-	struct whc_qset *qset;
-	struct work_struct dequeue_work;
-	bool is_async;
-	int status;
-};
-
-/**
- * whc_std_last - is this sTD the URB's last?
- * @std: the sTD to check.
- */
-static inline bool whc_std_last(struct whc_std *std)
-{
-	return std->ntds_remaining <= 1;
-}
-
-enum whc_update {
-	WHC_UPDATE_ADDED   = 0x01,
-	WHC_UPDATE_REMOVED = 0x02,
-	WHC_UPDATE_UPDATED = 0x04,
-};
-
-/* init.c */
-int whc_init(struct whc *whc);
-void whc_clean_up(struct whc *whc);
-
-/* hw.c */
-void whc_write_wusbcmd(struct whc *whc, u32 mask, u32 val);
-int whc_do_gencmd(struct whc *whc, u32 cmd, u32 params, void *addr, size_t len);
-void whc_hw_error(struct whc *whc, const char *reason);
-
-/* wusb.c */
-int whc_wusbhc_start(struct wusbhc *wusbhc);
-void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay);
-int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
-		  u8 handle, struct wuie_hdr *wuie);
-int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle);
-int whc_bwa_set(struct wusbhc *wusbhc, s8 stream_index, const struct uwb_mas_bm *mas_bm);
-int whc_dev_info_set(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
-int whc_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots);
-int whc_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
-		const void *ptk, size_t key_size);
-int whc_set_gtk(struct wusbhc *wusbhc, u32 tkid,
-		const void *gtk, size_t key_size);
-int whc_set_cluster_id(struct whc *whc, u8 bcid);
-
-/* int.c */
-irqreturn_t whc_int_handler(struct usb_hcd *hcd);
-void whc_dn_work(struct work_struct *work);
-
-/* asl.c */
-void asl_start(struct whc *whc);
-void asl_stop(struct whc *whc);
-int  asl_init(struct whc *whc);
-void asl_clean_up(struct whc *whc);
-int  asl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags);
-int  asl_urb_dequeue(struct whc *whc, struct urb *urb, int status);
-void asl_qset_delete(struct whc *whc, struct whc_qset *qset);
-void scan_async_work(struct work_struct *work);
-
-/* pzl.c */
-int  pzl_init(struct whc *whc);
-void pzl_clean_up(struct whc *whc);
-void pzl_start(struct whc *whc);
-void pzl_stop(struct whc *whc);
-int  pzl_urb_enqueue(struct whc *whc, struct urb *urb, gfp_t mem_flags);
-int  pzl_urb_dequeue(struct whc *whc, struct urb *urb, int status);
-void pzl_qset_delete(struct whc *whc, struct whc_qset *qset);
-void scan_periodic_work(struct work_struct *work);
-
-/* qset.c */
-struct whc_qset *qset_alloc(struct whc *whc, gfp_t mem_flags);
-void qset_free(struct whc *whc, struct whc_qset *qset);
-struct whc_qset *get_qset(struct whc *whc, struct urb *urb, gfp_t mem_flags);
-void qset_delete(struct whc *whc, struct whc_qset *qset);
-void qset_clear(struct whc *whc, struct whc_qset *qset);
-void qset_reset(struct whc *whc, struct whc_qset *qset);
-int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb,
-		 gfp_t mem_flags);
-void qset_free_std(struct whc *whc, struct whc_std *std);
-void qset_remove_urb(struct whc *whc, struct whc_qset *qset,
-			    struct urb *urb, int status);
-void process_halted_qtd(struct whc *whc, struct whc_qset *qset,
-			       struct whc_qtd *qtd);
-void process_inactive_qtd(struct whc *whc, struct whc_qset *qset,
-				 struct whc_qtd *qtd);
-enum whc_update qset_add_qtds(struct whc *whc, struct whc_qset *qset);
-void qset_remove_complete(struct whc *whc, struct whc_qset *qset);
-void pzl_update(struct whc *whc, uint32_t wusbcmd);
-void asl_update(struct whc *whc, uint32_t wusbcmd);
-
-/* debug.c */
-void whc_dbg_init(struct whc *whc);
-void whc_dbg_clean_up(struct whc *whc);
-
-#endif /* #ifndef __WHCD_H */
diff --git a/drivers/usb/host/whci/whci-hc.h b/drivers/usb/host/whci/whci-hc.h
deleted file mode 100644
index 5a86a57a80cc..000000000000
--- a/drivers/usb/host/whci/whci-hc.h
+++ /dev/null
@@ -1,401 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) data structures.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#ifndef _WHCI_WHCI_HC_H
-#define _WHCI_WHCI_HC_H
-
-#include <linux/list.h>
-
-/**
- * WHCI_PAGE_SIZE - page size use by WHCI
- *
- * WHCI assumes that host system uses pages of 4096 octets.
- */
-#define WHCI_PAGE_SIZE 4096
-
-
-/**
- * QTD_MAX_TXFER_SIZE - max number of bytes to transfer with a single
- * qtd.
- *
- * This is 2^20 - 1.
- */
-#define QTD_MAX_XFER_SIZE 1048575
-
-
-/**
- * struct whc_qtd - Queue Element Transfer Descriptors (qTD)
- *
- * This describes the data for a bulk, control or interrupt transfer.
- *
- * [WHCI] section 3.2.4
- */
-struct whc_qtd {
-	__le32 status; /*< remaining transfer len and transfer status */
-	__le32 options;
-	__le64 page_list_ptr; /*< physical pointer to data buffer page list*/
-	__u8   setup[8];      /*< setup data for control transfers */
-} __attribute__((packed));
-
-#define QTD_STS_ACTIVE     (1 << 31)  /* enable execution of transaction */
-#define QTD_STS_HALTED     (1 << 30)  /* transfer halted */
-#define QTD_STS_DBE        (1 << 29)  /* data buffer error */
-#define QTD_STS_BABBLE     (1 << 28)  /* babble detected */
-#define QTD_STS_RCE        (1 << 27)  /* retry count exceeded */
-#define QTD_STS_LAST_PKT   (1 << 26)  /* set Last Packet Flag in WUSB header */
-#define QTD_STS_INACTIVE   (1 << 25)  /* queue set is marked inactive */
-#define QTD_STS_IALT_VALID (1 << 23)                          /* iAlt field is valid */
-#define QTD_STS_IALT(i)    (QTD_STS_IALT_VALID | ((i) << 20)) /* iAlt field */
-#define QTD_STS_LEN(l)     ((l) << 0) /* transfer length */
-#define QTD_STS_TO_LEN(s)  ((s) & 0x000fffff)
-
-#define QTD_OPT_IOC      (1 << 1) /* page_list_ptr points to buffer directly */
-#define QTD_OPT_SMALL    (1 << 0) /* interrupt on complete */
-
-/**
- * struct whc_itd - Isochronous Queue Element Transfer Descriptors (iTD)
- *
- * This describes the data and other parameters for an isochronous
- * transfer.
- *
- * [WHCI] section 3.2.5
- */
-struct whc_itd {
-	__le16 presentation_time;    /*< presentation time for OUT transfers */
-	__u8   num_segments;         /*< number of data segments in segment list */
-	__u8   status;               /*< command execution status */
-	__le32 options;              /*< misc transfer options */
-	__le64 page_list_ptr;        /*< physical pointer to data buffer page list */
-	__le64 seg_list_ptr;         /*< physical pointer to segment list */
-} __attribute__((packed));
-
-#define ITD_STS_ACTIVE   (1 << 7) /* enable execution of transaction */
-#define ITD_STS_DBE      (1 << 5) /* data buffer error */
-#define ITD_STS_BABBLE   (1 << 4) /* babble detected */
-#define ITD_STS_INACTIVE (1 << 1) /* queue set is marked inactive */
-
-#define ITD_OPT_IOC      (1 << 1) /* interrupt on complete */
-#define ITD_OPT_SMALL    (1 << 0) /* page_list_ptr points to buffer directly */
-
-/**
- * Page list entry.
- *
- * A TD's page list must contain sufficient page list entries for the
- * total data length in the TD.
- *
- * [WHCI] section 3.2.4.3
- */
-struct whc_page_list_entry {
-	__le64 buf_ptr; /*< physical pointer to buffer */
-} __attribute__((packed));
-
-/**
- * struct whc_seg_list_entry - Segment list entry.
- *
- * Describes a portion of the data buffer described in the containing
- * qTD's page list.
- *
- * seg_ptr = qtd->page_list_ptr[qtd->seg_list_ptr[seg].idx].buf_ptr
- *           + qtd->seg_list_ptr[seg].offset;
- *
- * Segments can't cross page boundries.
- *
- * [WHCI] section 3.2.5.5
- */
-struct whc_seg_list_entry {
-	__le16 len;    /*< segment length */
-	__u8   idx;    /*< index into page list */
-	__u8   status; /*< segment status */
-	__le16 offset; /*< 12 bit offset into page */
-} __attribute__((packed));
-
-/**
- * struct whc_qhead - endpoint and status information for a qset.
- *
- * [WHCI] section 3.2.6
- */
-struct whc_qhead {
-	__le64 link; /*< next qset in list */
-	__le32 info1;
-	__le32 info2;
-	__le32 info3;
-	__le16 status;
-	__le16 err_count;  /*< transaction error count */
-	__le32 cur_window;
-	__le32 scratch[3]; /*< h/w scratch area */
-	union {
-		struct whc_qtd qtd;
-		struct whc_itd itd;
-	} overlay;
-} __attribute__((packed));
-
-#define QH_LINK_PTR_MASK (~0x03Full)
-#define QH_LINK_PTR(ptr) ((ptr) & QH_LINK_PTR_MASK)
-#define QH_LINK_IQS      (1 << 4) /* isochronous queue set */
-#define QH_LINK_NTDS(n)  (((n) - 1) << 1) /* number of TDs in queue set */
-#define QH_LINK_T        (1 << 0) /* last queue set in periodic schedule list */
-
-#define QH_INFO1_EP(e)           ((e) << 0)  /* endpoint number */
-#define QH_INFO1_DIR_IN          (1 << 4)    /* IN transfer */
-#define QH_INFO1_DIR_OUT         (0 << 4)    /* OUT transfer */
-#define QH_INFO1_TR_TYPE_CTRL    (0x0 << 5)  /* control transfer */
-#define QH_INFO1_TR_TYPE_ISOC    (0x1 << 5)  /* isochronous transfer */
-#define QH_INFO1_TR_TYPE_BULK    (0x2 << 5)  /* bulk transfer */
-#define QH_INFO1_TR_TYPE_INT     (0x3 << 5)  /* interrupt */
-#define QH_INFO1_TR_TYPE_LP_INT  (0x7 << 5)  /* low power interrupt */
-#define QH_INFO1_DEV_INFO_IDX(i) ((i) << 8)  /* index into device info buffer */
-#define QH_INFO1_SET_INACTIVE    (1 << 15)   /* set inactive after transfer */
-#define QH_INFO1_MAX_PKT_LEN(l)  ((l) << 16) /* maximum packet length */
-
-#define QH_INFO2_BURST(b)        ((b) << 0)  /* maximum burst length */
-#define QH_INFO2_DBP(p)          ((p) << 5)  /* data burst policy (see [WUSB] table 5-7) */
-#define QH_INFO2_MAX_COUNT(c)    ((c) << 8)  /* max isoc/int pkts per zone */
-#define QH_INFO2_RQS             (1 << 15)   /* reactivate queue set */
-#define QH_INFO2_MAX_RETRY(r)    ((r) << 16) /* maximum transaction retries */
-#define QH_INFO2_MAX_SEQ(s)      ((s) << 20) /* maximum sequence number */
-#define QH_INFO3_MAX_DELAY(d)    ((d) << 0)  /* maximum stream delay in 125 us units (isoc only) */
-#define QH_INFO3_INTERVAL(i)     ((i) << 16) /* segment interval in 125 us units (isoc only) */
-
-#define QH_INFO3_TX_RATE(r)      ((r) << 24) /* PHY rate (see [ECMA-368] section 10.3.1.1) */
-#define QH_INFO3_TX_PWR(p)       ((p) << 29) /* transmit power (see [WUSB] section 5.2.1.2) */
-
-#define QH_STATUS_FLOW_CTRL      (1 << 15)
-#define QH_STATUS_ICUR(i)        ((i) << 5)
-#define QH_STATUS_TO_ICUR(s)     (((s) >> 5) & 0x7)
-#define QH_STATUS_SEQ_MASK       0x1f
-
-/**
- * usb_pipe_to_qh_type - USB core pipe type to QH transfer type
- *
- * Returns the QH type field for a USB core pipe type.
- */
-static inline unsigned usb_pipe_to_qh_type(unsigned pipe)
-{
-	static const unsigned type[] = {
-		[PIPE_ISOCHRONOUS] = QH_INFO1_TR_TYPE_ISOC,
-		[PIPE_INTERRUPT]   = QH_INFO1_TR_TYPE_INT,
-		[PIPE_CONTROL]     = QH_INFO1_TR_TYPE_CTRL,
-		[PIPE_BULK]        = QH_INFO1_TR_TYPE_BULK,
-	};
-	return type[usb_pipetype(pipe)];
-}
-
-/**
- * Maxiumum number of TDs in a qset.
- */
-#define WHCI_QSET_TD_MAX 8
-
-/**
- * struct whc_qset - WUSB data transfers to a specific endpoint
- * @qh: the QHead of this qset
- * @qtd: up to 8 qTDs (for qsets for control, bulk and interrupt
- * transfers)
- * @itd: up to 8 iTDs (for qsets for isochronous transfers)
- * @qset_dma: DMA address for this qset
- * @whc: WHCI HC this qset is for
- * @ep: endpoint
- * @stds: list of sTDs queued to this qset
- * @ntds: number of qTDs queued (not necessarily the same as nTDs
- * field in the QH)
- * @td_start: index of the first qTD in the list
- * @td_end: index of next free qTD in the list (provided
- *          ntds < WHCI_QSET_TD_MAX)
- *
- * Queue Sets (qsets) are added to the asynchronous schedule list
- * (ASL) or the periodic zone list (PZL).
- *
- * qsets may contain up to 8 TDs (either qTDs or iTDs as appropriate).
- * Each TD may refer to at most 1 MiB of data. If a single transfer
- * has > 8MiB of data, TDs can be reused as they are completed since
- * the TD list is used as a circular buffer.  Similarly, several
- * (smaller) transfers may be queued in a qset.
- *
- * WHCI controllers may cache portions of the qsets in the ASL and
- * PZL, requiring the WHCD to inform the WHC that the lists have been
- * updated (fields changed or qsets inserted or removed).  For safe
- * insertion and removal of qsets from the lists the schedule must be
- * stopped to avoid races in updating the QH link pointers.
- *
- * Since the HC is free to execute qsets in any order, all transfers
- * to an endpoint should use the same qset to ensure transfers are
- * executed in the order they're submitted.
- *
- * [WHCI] section 3.2.3
- */
-struct whc_qset {
-	struct whc_qhead qh;
-	union {
-		struct whc_qtd qtd[WHCI_QSET_TD_MAX];
-		struct whc_itd itd[WHCI_QSET_TD_MAX];
-	};
-
-	/* private data for WHCD */
-	dma_addr_t qset_dma;
-	struct whc *whc;
-	struct usb_host_endpoint *ep;
-	struct list_head stds;
-	int ntds;
-	int td_start;
-	int td_end;
-	struct list_head list_node;
-	unsigned in_sw_list:1;
-	unsigned in_hw_list:1;
-	unsigned remove:1;
-	unsigned reset:1;
-	struct urb *pause_after_urb;
-	struct completion remove_complete;
-	uint16_t max_packet;
-	uint8_t max_burst;
-	uint8_t max_seq;
-};
-
-static inline void whc_qset_set_link_ptr(u64 *ptr, u64 target)
-{
-	if (target)
-		*ptr = (*ptr & ~(QH_LINK_PTR_MASK | QH_LINK_T)) | QH_LINK_PTR(target);
-	else
-		*ptr = QH_LINK_T;
-}
-
-/**
- * struct di_buf_entry - Device Information (DI) buffer entry.
- *
- * There's one of these per connected device.
- */
-struct di_buf_entry {
-	__le32 availability_info[8]; /*< MAS availability information, one MAS per bit */
-	__le32 addr_sec_info;        /*< addressing and security info */
-	__le32 reserved[7];
-} __attribute__((packed));
-
-#define WHC_DI_SECURE           (1 << 31)
-#define WHC_DI_DISABLE          (1 << 30)
-#define WHC_DI_KEY_IDX(k)       ((k) << 8)
-#define WHC_DI_KEY_IDX_MASK     0x0000ff00
-#define WHC_DI_DEV_ADDR(a)      ((a) << 0)
-#define WHC_DI_DEV_ADDR_MASK    0x000000ff
-
-/**
- * struct dn_buf_entry - Device Notification (DN) buffer entry.
- *
- * [WHCI] section 3.2.8
- */
-struct dn_buf_entry {
-	__u8   msg_size;    /*< number of octets of valid DN data */
-	__u8   reserved1;
-	__u8   src_addr;    /*< source address */
-	__u8   status;      /*< buffer entry status */
-	__le32 tkid;        /*< TKID for source device, valid if secure bit is set */
-	__u8   dn_data[56]; /*< up to 56 octets of DN data */
-} __attribute__((packed));
-
-#define WHC_DN_STATUS_VALID  (1 << 7) /* buffer entry is valid */
-#define WHC_DN_STATUS_SECURE (1 << 6) /* notification received using secure frame */
-
-#define WHC_N_DN_ENTRIES (4096 / sizeof(struct dn_buf_entry))
-
-/* The Add MMC IE WUSB Generic Command may take up to 256 bytes of
-   data. [WHCI] section 2.4.7. */
-#define WHC_GEN_CMD_DATA_LEN 256
-
-/*
- * HC registers.
- *
- * [WHCI] section 2.4
- */
-
-#define WHCIVERSION          0x00
-
-#define WHCSPARAMS           0x04
-#  define WHCSPARAMS_TO_N_MMC_IES(p) (((p) >> 16) & 0xff)
-#  define WHCSPARAMS_TO_N_KEYS(p)    (((p) >> 8) & 0xff)
-#  define WHCSPARAMS_TO_N_DEVICES(p) (((p) >> 0) & 0x7f)
-
-#define WUSBCMD              0x08
-#  define WUSBCMD_BCID(b)            ((b) << 16)
-#  define WUSBCMD_BCID_MASK          (0xff << 16)
-#  define WUSBCMD_ASYNC_QSET_RM      (1 << 12)
-#  define WUSBCMD_PERIODIC_QSET_RM   (1 << 11)
-#  define WUSBCMD_WUSBSI(s)          ((s) << 8)
-#  define WUSBCMD_WUSBSI_MASK        (0x7 << 8)
-#  define WUSBCMD_ASYNC_SYNCED_DB    (1 << 7)
-#  define WUSBCMD_PERIODIC_SYNCED_DB (1 << 6)
-#  define WUSBCMD_ASYNC_UPDATED      (1 << 5)
-#  define WUSBCMD_PERIODIC_UPDATED   (1 << 4)
-#  define WUSBCMD_ASYNC_EN           (1 << 3)
-#  define WUSBCMD_PERIODIC_EN        (1 << 2)
-#  define WUSBCMD_WHCRESET           (1 << 1)
-#  define WUSBCMD_RUN                (1 << 0)
-
-#define WUSBSTS              0x0c
-#  define WUSBSTS_ASYNC_SCHED             (1 << 15)
-#  define WUSBSTS_PERIODIC_SCHED          (1 << 14)
-#  define WUSBSTS_DNTS_SCHED              (1 << 13)
-#  define WUSBSTS_HCHALTED                (1 << 12)
-#  define WUSBSTS_GEN_CMD_DONE            (1 << 9)
-#  define WUSBSTS_CHAN_TIME_ROLLOVER      (1 << 8)
-#  define WUSBSTS_DNTS_OVERFLOW           (1 << 7)
-#  define WUSBSTS_BPST_ADJUSTMENT_CHANGED (1 << 6)
-#  define WUSBSTS_HOST_ERR                (1 << 5)
-#  define WUSBSTS_ASYNC_SCHED_SYNCED      (1 << 4)
-#  define WUSBSTS_PERIODIC_SCHED_SYNCED   (1 << 3)
-#  define WUSBSTS_DNTS_INT                (1 << 2)
-#  define WUSBSTS_ERR_INT                 (1 << 1)
-#  define WUSBSTS_INT                     (1 << 0)
-#  define WUSBSTS_INT_MASK                0x3ff
-
-#define WUSBINTR             0x10
-#  define WUSBINTR_GEN_CMD_DONE             (1 << 9)
-#  define WUSBINTR_CHAN_TIME_ROLLOVER       (1 << 8)
-#  define WUSBINTR_DNTS_OVERFLOW            (1 << 7)
-#  define WUSBINTR_BPST_ADJUSTMENT_CHANGED  (1 << 6)
-#  define WUSBINTR_HOST_ERR                 (1 << 5)
-#  define WUSBINTR_ASYNC_SCHED_SYNCED       (1 << 4)
-#  define WUSBINTR_PERIODIC_SCHED_SYNCED    (1 << 3)
-#  define WUSBINTR_DNTS_INT                 (1 << 2)
-#  define WUSBINTR_ERR_INT                  (1 << 1)
-#  define WUSBINTR_INT                      (1 << 0)
-#  define WUSBINTR_ALL 0x3ff
-
-#define WUSBGENCMDSTS        0x14
-#  define WUSBGENCMDSTS_ACTIVE (1 << 31)
-#  define WUSBGENCMDSTS_ERROR  (1 << 24)
-#  define WUSBGENCMDSTS_IOC    (1 << 23)
-#  define WUSBGENCMDSTS_MMCIE_ADD 0x01
-#  define WUSBGENCMDSTS_MMCIE_RM  0x02
-#  define WUSBGENCMDSTS_SET_MAS   0x03
-#  define WUSBGENCMDSTS_CHAN_STOP 0x04
-#  define WUSBGENCMDSTS_RWP_EN    0x05
-
-#define WUSBGENCMDPARAMS     0x18
-#define WUSBGENADDR          0x20
-#define WUSBASYNCLISTADDR    0x28
-#define WUSBDNTSBUFADDR      0x30
-#define WUSBDEVICEINFOADDR   0x38
-
-#define WUSBSETSECKEYCMD     0x40
-#  define WUSBSETSECKEYCMD_SET    (1 << 31)
-#  define WUSBSETSECKEYCMD_ERASE  (1 << 30)
-#  define WUSBSETSECKEYCMD_GTK    (1 << 8)
-#  define WUSBSETSECKEYCMD_IDX(i) ((i) << 0)
-
-#define WUSBTKID             0x44
-#define WUSBSECKEY           0x48
-#define WUSBPERIODICLISTBASE 0x58
-#define WUSBMASINDEX         0x60
-
-#define WUSBDNTSCTRL         0x64
-#  define WUSBDNTSCTRL_ACTIVE      (1 << 31)
-#  define WUSBDNTSCTRL_INTERVAL(i) ((i) << 8)
-#  define WUSBDNTSCTRL_SLOTS(s)    ((s) << 0)
-
-#define WUSBTIME             0x68
-#  define WUSBTIME_CHANNEL_TIME_MASK 0x00ffffff
-
-#define WUSBBPST             0x6c
-#define WUSBDIBUPDATED       0x70
-
-#endif /* #ifndef _WHCI_WHCI_HC_H */
diff --git a/drivers/usb/host/whci/wusb.c b/drivers/usb/host/whci/wusb.c
deleted file mode 100644
index 8a4d805ff63a..000000000000
--- a/drivers/usb/host/whci/wusb.c
+++ /dev/null
@@ -1,210 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless Host Controller (WHC) WUSB operations.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/uwb/umc.h>
-
-#include "../../wusbcore/wusbhc.h"
-
-#include "whcd.h"
-
-static int whc_update_di(struct whc *whc, int idx)
-{
-	int offset = idx / 32;
-	u32 bit = 1 << (idx % 32);
-
-	le_writel(bit, whc->base + WUSBDIBUPDATED + offset);
-
-	return whci_wait_for(&whc->umc->dev,
-			     whc->base + WUSBDIBUPDATED + offset, bit, 0,
-			     100, "DI update");
-}
-
-/*
- * WHCI starts MMCs based on there being a valid GTK so these need
- * only start/stop the asynchronous and periodic schedules and send a
- * channel stop command.
- */
-
-int whc_wusbhc_start(struct wusbhc *wusbhc)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-
-	asl_start(whc);
-	pzl_start(whc);
-
-	return 0;
-}
-
-void whc_wusbhc_stop(struct wusbhc *wusbhc, int delay)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u32 stop_time, now_time;
-	int ret;
-
-	pzl_stop(whc);
-	asl_stop(whc);
-
-	now_time = le_readl(whc->base + WUSBTIME) & WUSBTIME_CHANNEL_TIME_MASK;
-	stop_time = (now_time + ((delay * 8) << 7)) & 0x00ffffff;
-	ret = whc_do_gencmd(whc, WUSBGENCMDSTS_CHAN_STOP, stop_time, NULL, 0);
-	if (ret == 0)
-		msleep(delay);
-}
-
-int whc_mmcie_add(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
-		  u8 handle, struct wuie_hdr *wuie)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u32 params;
-
-	params = (interval << 24)
-		| (repeat_cnt << 16)
-		| (wuie->bLength << 8)
-		| handle;
-
-	return whc_do_gencmd(whc, WUSBGENCMDSTS_MMCIE_ADD, params, wuie, wuie->bLength);
-}
-
-int whc_mmcie_rm(struct wusbhc *wusbhc, u8 handle)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u32 params;
-
-	params = handle;
-
-	return whc_do_gencmd(whc, WUSBGENCMDSTS_MMCIE_RM, params, NULL, 0);
-}
-
-int whc_bwa_set(struct wusbhc *wusbhc, s8 stream_index, const struct uwb_mas_bm *mas_bm)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-
-	if (stream_index >= 0)
-		whc_write_wusbcmd(whc, WUSBCMD_WUSBSI_MASK, WUSBCMD_WUSBSI(stream_index));
-
-	return whc_do_gencmd(whc, WUSBGENCMDSTS_SET_MAS, 0, (void *)mas_bm, sizeof(*mas_bm));
-}
-
-int whc_dev_info_set(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	int idx = wusb_dev->port_idx;
-	struct di_buf_entry *di = &whc->di_buf[idx];
-	int ret;
-
-	mutex_lock(&whc->mutex);
-
-	uwb_mas_bm_copy_le(di->availability_info, &wusb_dev->availability);
-	di->addr_sec_info &= ~(WHC_DI_DISABLE | WHC_DI_DEV_ADDR_MASK);
-	di->addr_sec_info |= WHC_DI_DEV_ADDR(wusb_dev->addr);
-
-	ret = whc_update_di(whc, idx);
-
-	mutex_unlock(&whc->mutex);
-
-	return ret;
-}
-
-/*
- * Set the number of Device Notification Time Slots (DNTS) and enable
- * device notifications.
- */
-int whc_set_num_dnts(struct wusbhc *wusbhc, u8 interval, u8 slots)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	u32 dntsctrl;
-
-	dntsctrl = WUSBDNTSCTRL_ACTIVE
-		| WUSBDNTSCTRL_INTERVAL(interval)
-		| WUSBDNTSCTRL_SLOTS(slots);
-
-	le_writel(dntsctrl, whc->base + WUSBDNTSCTRL);
-
-	return 0;
-}
-
-static int whc_set_key(struct whc *whc, u8 key_index, uint32_t tkid,
-		       const void *key, size_t key_size, bool is_gtk)
-{
-	uint32_t setkeycmd;
-	uint32_t seckey[4];
-	int i;
-	int ret;
-
-	memcpy(seckey, key, key_size);
-	setkeycmd = WUSBSETSECKEYCMD_SET | WUSBSETSECKEYCMD_IDX(key_index);
-	if (is_gtk)
-		setkeycmd |= WUSBSETSECKEYCMD_GTK;
-
-	le_writel(tkid, whc->base + WUSBTKID);
-	for (i = 0; i < 4; i++)
-		le_writel(seckey[i], whc->base + WUSBSECKEY + 4*i);
-	le_writel(setkeycmd, whc->base + WUSBSETSECKEYCMD);
-
-	ret = whci_wait_for(&whc->umc->dev, whc->base + WUSBSETSECKEYCMD,
-			    WUSBSETSECKEYCMD_SET, 0, 100, "set key");
-
-	return ret;
-}
-
-/**
- * whc_set_ptk - set the PTK to use for a device.
- *
- * The index into the key table for this PTK is the same as the
- * device's port index.
- */
-int whc_set_ptk(struct wusbhc *wusbhc, u8 port_idx, u32 tkid,
-		const void *ptk, size_t key_size)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	struct di_buf_entry *di = &whc->di_buf[port_idx];
-	int ret;
-
-	mutex_lock(&whc->mutex);
-
-	if (ptk) {
-		ret = whc_set_key(whc, port_idx, tkid, ptk, key_size, false);
-		if (ret)
-			goto out;
-
-		di->addr_sec_info &= ~WHC_DI_KEY_IDX_MASK;
-		di->addr_sec_info |= WHC_DI_SECURE | WHC_DI_KEY_IDX(port_idx);
-	} else
-		di->addr_sec_info &= ~WHC_DI_SECURE;
-
-	ret = whc_update_di(whc, port_idx);
-out:
-	mutex_unlock(&whc->mutex);
-	return ret;
-}
-
-/**
- * whc_set_gtk - set the GTK for subsequent broadcast packets
- *
- * The GTK is stored in the last entry in the key table (the previous
- * N_DEVICES entries are for the per-device PTKs).
- */
-int whc_set_gtk(struct wusbhc *wusbhc, u32 tkid,
-		const void *gtk, size_t key_size)
-{
-	struct whc *whc = wusbhc_to_whc(wusbhc);
-	int ret;
-
-	mutex_lock(&whc->mutex);
-
-	ret = whc_set_key(whc, whc->n_devices, tkid, gtk, key_size, true);
-
-	mutex_unlock(&whc->mutex);
-
-	return ret;
-}
-
-int whc_set_cluster_id(struct whc *whc, u8 bcid)
-{
-	whc_write_wusbcmd(whc, WUSBCMD_BCID_MASK, WUSBCMD_BCID(bcid));
-	return 0;
-}
diff --git a/drivers/usb/wusbcore/Kconfig b/drivers/usb/wusbcore/Kconfig
deleted file mode 100644
index abc0f361021f..000000000000
--- a/drivers/usb/wusbcore/Kconfig
+++ /dev/null
@@ -1,38 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Wireless USB Core configuration
-#
-config USB_WUSB
-	tristate "Enable Wireless USB extensions"
-	depends on UWB
-	select CRYPTO
-	select CRYPTO_AES
-	select CRYPTO_CCM
-	help
-	  Enable the host-side support for Wireless USB.
-
-          To compile this support select Y (built in). It is safe to
-	  select even if you don't have the hardware.
-
-config USB_WUSB_CBAF
-	tristate "Support WUSB Cable Based Association (CBA)"
-	depends on USB
-	help
-	  Some WUSB devices support Cable Based Association. It's used to
-	  enable the secure communication between the host and the
-	  device.
-
-	  Enable this option if your WUSB device must to be connected
-	  via wired USB before establishing a wireless link.
-
-	  It is safe to select even if you don't have a compatible
-	  hardware.
-
-config USB_WUSB_CBAF_DEBUG
-	bool "Enable CBA debug messages"
-	depends on USB_WUSB_CBAF
-	help
-	  Say Y here if you want the CBA to produce a bunch of debug messages
-	  to the system log. Select this if you are having a problem with
-	  CBA support and want to see more of what is going on.
-
diff --git a/drivers/usb/wusbcore/Makefile b/drivers/usb/wusbcore/Makefile
deleted file mode 100644
index d604ccdd916f..000000000000
--- a/drivers/usb/wusbcore/Makefile
+++ /dev/null
@@ -1,26 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-ccflags-$(CONFIG_USB_WUSB_CBAF_DEBUG) := -DDEBUG
-
-obj-$(CONFIG_USB_WUSB)		+= wusbcore.o
-obj-$(CONFIG_USB_HWA_HCD)	+= wusb-wa.o
-obj-$(CONFIG_USB_WUSB_CBAF)	+= wusb-cbaf.o
-
-
-wusbcore-y :=		\
-	crypto.o	\
-	devconnect.o	\
-	dev-sysfs.o	\
-	mmc.o		\
-	pal.o		\
-	rh.o		\
-	reservation.o	\
-	security.o	\
-	wusbhc.o
-
-wusb-cbaf-y := cbaf.o
-
-wusb-wa-y :=		\
-	wa-hc.o		\
-	wa-nep.o	\
-	wa-rpipe.o	\
-	wa-xfer.o
diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c
deleted file mode 100644
index af77064c7456..000000000000
--- a/drivers/usb/wusbcore/cbaf.c
+++ /dev/null
@@ -1,645 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB - Cable Based Association
- *
- *
- * Copyright (C) 2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- *
- * WUSB devices have to be paired (associated in WUSB lingo) so
- * that they can connect to the system.
- *
- * One way of pairing is using CBA-Cable Based Association. First
- * time you plug the device with a cable, association is done between
- * host and device and subsequent times, you can connect wirelessly
- * without having to associate again. That's the idea.
- *
- * This driver does nothing Earth shattering. It just provides an
- * interface to chat with the wire-connected device so we can get a
- * CDID (device ID) that might have been previously associated to a
- * CHID (host ID) and to set up a new <CHID,CDID,CK> triplet
- * (connection context), with the CK being the secret, or connection
- * key. This is the pairing data.
- *
- * When a device with the CBA capability connects, the probe routine
- * just creates a bunch of sysfs files that a user space enumeration
- * manager uses to allow it to connect wirelessly to the system or not.
- *
- * The process goes like this:
- *
- * 1. Device plugs, cbaf is loaded, notifications happen.
- *
- * 2. The connection manager (CM) sees a device with CBAF capability
- *    (the wusb_chid etc. files in /sys/devices/blah/OURDEVICE).
- *
- * 3. The CM writes the host name, supported band groups, and the CHID
- *    (host ID) into the wusb_host_name, wusb_host_band_groups and
- *    wusb_chid files. These get sent to the device and the CDID (if
- *    any) for this host is requested.
- *
- * 4. The CM can verify that the device's supported band groups
- *    (wusb_device_band_groups) are compatible with the host.
- *
- * 5. The CM reads the wusb_cdid file.
- *
- * 6. The CM looks up its database
- *
- * 6.1 If it has a matching CHID,CDID entry, the device has been
- *     authorized before (paired) and nothing further needs to be
- *     done.
- *
- * 6.2 If the CDID is zero (or the CM doesn't find a matching CDID in
- *     its database), the device is assumed to be not known.  The CM
- *     may associate the host with device by: writing a randomly
- *     generated CDID to wusb_cdid and then a random CK to wusb_ck
- *     (this uploads the new CC to the device).
- *
- *     CMD may choose to prompt the user before associating with a new
- *     device.
- *
- * 7. Device is unplugged.
- *
- * When the device tries to connect wirelessly, it will present its
- * CDID to the WUSB host controller.  The CM will query the
- * database. If the CHID/CDID pair found, it will (with a 4-way
- * handshake) challenge the device to demonstrate it has the CK secret
- * key (from our database) without actually exchanging it. Once
- * satisfied, crypto keys are derived from the CK, the device is
- * connected and all communication is encrypted.
- *
- * References:
- *   [WUSB-AM] Association Models Supplement to the Certified Wireless
- *             Universal Serial Bus Specification, version 1.0.
- */
-#include <linux/module.h>
-#include <linux/ctype.h>
-#include <linux/usb.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/uwb.h>
-#include <linux/usb/wusb.h>
-#include <linux/usb/association.h>
-
-#define CBA_NAME_LEN 0x40 /* [WUSB-AM] table 4-7 */
-
-/* An instance of a Cable-Based-Association-Framework device */
-struct cbaf {
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-	void *buffer;
-	size_t buffer_size;
-
-	struct wusb_ckhdid chid;
-	char host_name[CBA_NAME_LEN];
-	u16 host_band_groups;
-
-	struct wusb_ckhdid cdid;
-	char device_name[CBA_NAME_LEN];
-	u16 device_band_groups;
-
-	struct wusb_ckhdid ck;
-};
-
-/*
- * Verify that a CBAF USB-interface has what we need
- *
- * According to [WUSB-AM], CBA devices should provide at least two
- * interfaces:
- *  - RETRIEVE_HOST_INFO
- *  - ASSOCIATE
- *
- * If the device doesn't provide these interfaces, we do not know how
- * to deal with it.
- */
-static int cbaf_check(struct cbaf *cbaf)
-{
-	int result;
-	struct device *dev = &cbaf->usb_iface->dev;
-	struct wusb_cbaf_assoc_info *assoc_info;
-	struct wusb_cbaf_assoc_request *assoc_request;
-	size_t assoc_size;
-	void *itr, *top;
-	int ar_rhi = 0, ar_assoc = 0;
-
-	result = usb_control_msg(
-		cbaf->usb_dev, usb_rcvctrlpipe(cbaf->usb_dev, 0),
-		CBAF_REQ_GET_ASSOCIATION_INFORMATION,
-		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		0, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-		cbaf->buffer, cbaf->buffer_size, USB_CTRL_GET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Cannot get available association types: %d\n",
-			result);
-		return result;
-	}
-
-	assoc_info = cbaf->buffer;
-	if (result < sizeof(*assoc_info)) {
-		dev_err(dev, "Not enough data to decode association info "
-			"header (%zu vs %zu bytes required)\n",
-			(size_t)result, sizeof(*assoc_info));
-		return result;
-	}
-
-	assoc_size = le16_to_cpu(assoc_info->Length);
-	if (result < assoc_size) {
-		dev_err(dev, "Not enough data to decode association info "
-			"(%zu vs %zu bytes required)\n",
-			(size_t)assoc_size, sizeof(*assoc_info));
-		return result;
-	}
-	/*
-	 * From now on, we just verify, but won't error out unless we
-	 * don't find the AR_TYPE_WUSB_{RETRIEVE_HOST_INFO,ASSOCIATE}
-	 * types.
-	 */
-	itr = cbaf->buffer + sizeof(*assoc_info);
-	top = cbaf->buffer + assoc_size;
-	dev_dbg(dev, "Found %u association requests (%zu bytes)\n",
-		 assoc_info->NumAssociationRequests, assoc_size);
-
-	while (itr < top) {
-		u16 ar_type, ar_subtype;
-		u32 ar_size;
-		const char *ar_name;
-
-		assoc_request = itr;
-
-		if (top - itr < sizeof(*assoc_request)) {
-			dev_err(dev, "Not enough data to decode association "
-				"request (%zu vs %zu bytes needed)\n",
-				top - itr, sizeof(*assoc_request));
-			break;
-		}
-
-		ar_type = le16_to_cpu(assoc_request->AssociationTypeId);
-		ar_subtype = le16_to_cpu(assoc_request->AssociationSubTypeId);
-		ar_size = le32_to_cpu(assoc_request->AssociationTypeInfoSize);
-		ar_name = "unknown";
-
-		switch (ar_type) {
-		case AR_TYPE_WUSB:
-			/* Verify we have what is mandated by [WUSB-AM]. */
-			switch (ar_subtype) {
-			case AR_TYPE_WUSB_RETRIEVE_HOST_INFO:
-				ar_name = "RETRIEVE_HOST_INFO";
-				ar_rhi = 1;
-				break;
-			case AR_TYPE_WUSB_ASSOCIATE:
-				/* send assoc data */
-				ar_name = "ASSOCIATE";
-				ar_assoc = 1;
-				break;
-			}
-			break;
-		}
-
-		dev_dbg(dev, "Association request #%02u: 0x%04x/%04x "
-			 "(%zu bytes): %s\n",
-			 assoc_request->AssociationDataIndex, ar_type,
-			 ar_subtype, (size_t)ar_size, ar_name);
-
-		itr += sizeof(*assoc_request);
-	}
-
-	if (!ar_rhi) {
-		dev_err(dev, "Missing RETRIEVE_HOST_INFO association "
-			"request\n");
-		return -EINVAL;
-	}
-	if (!ar_assoc) {
-		dev_err(dev, "Missing ASSOCIATE association request\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static const struct wusb_cbaf_host_info cbaf_host_info_defaults = {
-	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
-	.AssociationTypeId	  = cpu_to_le16(AR_TYPE_WUSB),
-	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
-	.AssociationSubTypeId = cpu_to_le16(AR_TYPE_WUSB_RETRIEVE_HOST_INFO),
-	.CHID_hdr                 = WUSB_AR_CHID,
-	.LangID_hdr               = WUSB_AR_LangID,
-	.HostFriendlyName_hdr     = WUSB_AR_HostFriendlyName,
-};
-
-/* Send WUSB host information (CHID and name) to a CBAF device */
-static int cbaf_send_host_info(struct cbaf *cbaf)
-{
-	struct wusb_cbaf_host_info *hi;
-	size_t name_len;
-	size_t hi_size;
-
-	hi = cbaf->buffer;
-	memset(hi, 0, sizeof(*hi));
-	*hi = cbaf_host_info_defaults;
-	hi->CHID = cbaf->chid;
-	hi->LangID = 0;	/* FIXME: I guess... */
-	strlcpy(hi->HostFriendlyName, cbaf->host_name, CBA_NAME_LEN);
-	name_len = strlen(cbaf->host_name);
-	hi->HostFriendlyName_hdr.len = cpu_to_le16(name_len);
-	hi_size = sizeof(*hi) + name_len;
-
-	return usb_control_msg(cbaf->usb_dev,
-			usb_sndctrlpipe(cbaf->usb_dev, 0),
-			CBAF_REQ_SET_ASSOCIATION_RESPONSE,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			0x0101,
-			cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			hi, hi_size, USB_CTRL_SET_TIMEOUT);
-}
-
-/*
- * Get device's information (CDID) associated to CHID
- *
- * The device will return it's information (CDID, name, bandgroups)
- * associated to the CHID we have set before, or 0 CDID and default
- * name and bandgroup if no CHID set or unknown.
- */
-static int cbaf_cdid_get(struct cbaf *cbaf)
-{
-	int result;
-	struct device *dev = &cbaf->usb_iface->dev;
-	struct wusb_cbaf_device_info *di;
-	size_t needed;
-
-	di = cbaf->buffer;
-	result = usb_control_msg(
-		cbaf->usb_dev, usb_rcvctrlpipe(cbaf->usb_dev, 0),
-		CBAF_REQ_GET_ASSOCIATION_REQUEST,
-		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		0x0200, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-		di, cbaf->buffer_size, USB_CTRL_GET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Cannot request device information: %d\n",
-			result);
-		return result;
-	}
-
-	needed = result < sizeof(*di) ? sizeof(*di) : le32_to_cpu(di->Length);
-	if (result < needed) {
-		dev_err(dev, "Not enough data in DEVICE_INFO reply (%zu vs "
-			"%zu bytes needed)\n", (size_t)result, needed);
-		return -ENOENT;
-	}
-
-	strlcpy(cbaf->device_name, di->DeviceFriendlyName, CBA_NAME_LEN);
-	cbaf->cdid = di->CDID;
-	cbaf->device_band_groups = le16_to_cpu(di->BandGroups);
-
-	return 0;
-}
-
-static ssize_t cbaf_wusb_chid_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return sprintf(buf, "%16ph\n", cbaf->chid.data);
-}
-
-static ssize_t cbaf_wusb_chid_store(struct device *dev,
-					 struct device_attribute *attr,
-					 const char *buf, size_t size)
-{
-	ssize_t result;
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	result = sscanf(buf,
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx",
-			&cbaf->chid.data[0] , &cbaf->chid.data[1],
-			&cbaf->chid.data[2] , &cbaf->chid.data[3],
-			&cbaf->chid.data[4] , &cbaf->chid.data[5],
-			&cbaf->chid.data[6] , &cbaf->chid.data[7],
-			&cbaf->chid.data[8] , &cbaf->chid.data[9],
-			&cbaf->chid.data[10], &cbaf->chid.data[11],
-			&cbaf->chid.data[12], &cbaf->chid.data[13],
-			&cbaf->chid.data[14], &cbaf->chid.data[15]);
-
-	if (result != 16)
-		return -EINVAL;
-
-	result = cbaf_send_host_info(cbaf);
-	if (result < 0)
-		return result;
-	result = cbaf_cdid_get(cbaf);
-	if (result < 0)
-		return result;
-	return size;
-}
-static DEVICE_ATTR(wusb_chid, 0600, cbaf_wusb_chid_show, cbaf_wusb_chid_store);
-
-static ssize_t cbaf_wusb_host_name_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n", cbaf->host_name);
-}
-
-static ssize_t cbaf_wusb_host_name_store(struct device *dev,
-					 struct device_attribute *attr,
-					 const char *buf, size_t size)
-{
-	ssize_t result;
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	result = sscanf(buf, "%63s", cbaf->host_name);
-	if (result != 1)
-		return -EINVAL;
-
-	return size;
-}
-static DEVICE_ATTR(wusb_host_name, 0600, cbaf_wusb_host_name_show,
-					 cbaf_wusb_host_name_store);
-
-static ssize_t cbaf_wusb_host_band_groups_show(struct device *dev,
-					       struct device_attribute *attr,
-					       char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%04x\n", cbaf->host_band_groups);
-}
-
-static ssize_t cbaf_wusb_host_band_groups_store(struct device *dev,
-						struct device_attribute *attr,
-						const char *buf, size_t size)
-{
-	ssize_t result;
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-	u16 band_groups = 0;
-
-	result = sscanf(buf, "%04hx", &band_groups);
-	if (result != 1)
-		return -EINVAL;
-
-	cbaf->host_band_groups = band_groups;
-
-	return size;
-}
-
-static DEVICE_ATTR(wusb_host_band_groups, 0600,
-		   cbaf_wusb_host_band_groups_show,
-		   cbaf_wusb_host_band_groups_store);
-
-static const struct wusb_cbaf_device_info cbaf_device_info_defaults = {
-	.Length_hdr               = WUSB_AR_Length,
-	.CDID_hdr                 = WUSB_AR_CDID,
-	.BandGroups_hdr           = WUSB_AR_BandGroups,
-	.LangID_hdr               = WUSB_AR_LangID,
-	.DeviceFriendlyName_hdr   = WUSB_AR_DeviceFriendlyName,
-};
-
-static ssize_t cbaf_wusb_cdid_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return sprintf(buf, "%16ph\n", cbaf->cdid.data);
-}
-
-static ssize_t cbaf_wusb_cdid_store(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t size)
-{
-	ssize_t result;
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-	struct wusb_ckhdid cdid;
-
-	result = sscanf(buf,
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx",
-			&cdid.data[0] , &cdid.data[1],
-			&cdid.data[2] , &cdid.data[3],
-			&cdid.data[4] , &cdid.data[5],
-			&cdid.data[6] , &cdid.data[7],
-			&cdid.data[8] , &cdid.data[9],
-			&cdid.data[10], &cdid.data[11],
-			&cdid.data[12], &cdid.data[13],
-			&cdid.data[14], &cdid.data[15]);
-	if (result != 16)
-		return -EINVAL;
-
-	cbaf->cdid = cdid;
-
-	return size;
-}
-static DEVICE_ATTR(wusb_cdid, 0600, cbaf_wusb_cdid_show, cbaf_wusb_cdid_store);
-
-static ssize_t cbaf_wusb_device_band_groups_show(struct device *dev,
-						 struct device_attribute *attr,
-						 char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return scnprintf(buf, PAGE_SIZE, "0x%04x\n", cbaf->device_band_groups);
-}
-
-static DEVICE_ATTR(wusb_device_band_groups, 0600,
-		   cbaf_wusb_device_band_groups_show,
-		   NULL);
-
-static ssize_t cbaf_wusb_device_name_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	return scnprintf(buf, PAGE_SIZE, "%s\n", cbaf->device_name);
-}
-static DEVICE_ATTR(wusb_device_name, 0600, cbaf_wusb_device_name_show, NULL);
-
-static const struct wusb_cbaf_cc_data cbaf_cc_data_defaults = {
-	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
-	.AssociationTypeId	  = cpu_to_le16(AR_TYPE_WUSB),
-	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
-	.AssociationSubTypeId     = cpu_to_le16(AR_TYPE_WUSB_ASSOCIATE),
-	.Length_hdr               = WUSB_AR_Length,
-	.Length		= cpu_to_le32(sizeof(struct wusb_cbaf_cc_data)),
-	.ConnectionContext_hdr    = WUSB_AR_ConnectionContext,
-	.BandGroups_hdr           = WUSB_AR_BandGroups,
-};
-
-static const struct wusb_cbaf_cc_data_fail cbaf_cc_data_fail_defaults = {
-	.AssociationTypeId_hdr    = WUSB_AR_AssociationTypeId,
-	.AssociationSubTypeId_hdr = WUSB_AR_AssociationSubTypeId,
-	.Length_hdr               = WUSB_AR_Length,
-	.AssociationStatus_hdr    = WUSB_AR_AssociationStatus,
-};
-
-/*
- * Send a new CC to the device.
- */
-static int cbaf_cc_upload(struct cbaf *cbaf)
-{
-	int result;
-	struct device *dev = &cbaf->usb_iface->dev;
-	struct wusb_cbaf_cc_data *ccd;
-
-	ccd =  cbaf->buffer;
-	*ccd = cbaf_cc_data_defaults;
-	ccd->CHID = cbaf->chid;
-	ccd->CDID = cbaf->cdid;
-	ccd->CK = cbaf->ck;
-	ccd->BandGroups = cpu_to_le16(cbaf->host_band_groups);
-
-	dev_dbg(dev, "Trying to upload CC:\n");
-	dev_dbg(dev, "  CHID       %16ph\n", ccd->CHID.data);
-	dev_dbg(dev, "  CDID       %16ph\n", ccd->CDID.data);
-	dev_dbg(dev, "  Bandgroups 0x%04x\n", cbaf->host_band_groups);
-
-	result = usb_control_msg(
-		cbaf->usb_dev, usb_sndctrlpipe(cbaf->usb_dev, 0),
-		CBAF_REQ_SET_ASSOCIATION_RESPONSE,
-		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		0x0201, cbaf->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-		ccd, sizeof(*ccd), USB_CTRL_SET_TIMEOUT);
-
-	return result;
-}
-
-static ssize_t cbaf_wusb_ck_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t size)
-{
-	ssize_t result;
-	struct usb_interface *iface = to_usb_interface(dev);
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-
-	result = sscanf(buf,
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx",
-			&cbaf->ck.data[0] , &cbaf->ck.data[1],
-			&cbaf->ck.data[2] , &cbaf->ck.data[3],
-			&cbaf->ck.data[4] , &cbaf->ck.data[5],
-			&cbaf->ck.data[6] , &cbaf->ck.data[7],
-			&cbaf->ck.data[8] , &cbaf->ck.data[9],
-			&cbaf->ck.data[10], &cbaf->ck.data[11],
-			&cbaf->ck.data[12], &cbaf->ck.data[13],
-			&cbaf->ck.data[14], &cbaf->ck.data[15]);
-	if (result != 16)
-		return -EINVAL;
-
-	result = cbaf_cc_upload(cbaf);
-	if (result < 0)
-		return result;
-
-	return size;
-}
-static DEVICE_ATTR(wusb_ck, 0600, NULL, cbaf_wusb_ck_store);
-
-static struct attribute *cbaf_dev_attrs[] = {
-	&dev_attr_wusb_host_name.attr,
-	&dev_attr_wusb_host_band_groups.attr,
-	&dev_attr_wusb_chid.attr,
-	&dev_attr_wusb_cdid.attr,
-	&dev_attr_wusb_device_name.attr,
-	&dev_attr_wusb_device_band_groups.attr,
-	&dev_attr_wusb_ck.attr,
-	NULL,
-};
-
-static const struct attribute_group cbaf_dev_attr_group = {
-	.name = NULL,	/* we want them in the same directory */
-	.attrs = cbaf_dev_attrs,
-};
-
-static int cbaf_probe(struct usb_interface *iface,
-		      const struct usb_device_id *id)
-{
-	struct cbaf *cbaf;
-	struct device *dev = &iface->dev;
-	int result = -ENOMEM;
-
-	cbaf = kzalloc(sizeof(*cbaf), GFP_KERNEL);
-	if (cbaf == NULL)
-		goto error_kzalloc;
-	cbaf->buffer = kmalloc(512, GFP_KERNEL);
-	if (cbaf->buffer == NULL)
-		goto error_kmalloc_buffer;
-
-	cbaf->buffer_size = 512;
-	cbaf->usb_dev = usb_get_dev(interface_to_usbdev(iface));
-	cbaf->usb_iface = usb_get_intf(iface);
-	result = cbaf_check(cbaf);
-	if (result < 0) {
-		dev_err(dev, "This device is not WUSB-CBAF compliant and is not supported yet.\n");
-		goto error_check;
-	}
-
-	result = sysfs_create_group(&dev->kobj, &cbaf_dev_attr_group);
-	if (result < 0) {
-		dev_err(dev, "Can't register sysfs attr group: %d\n", result);
-		goto error_create_group;
-	}
-	usb_set_intfdata(iface, cbaf);
-	return 0;
-
-error_create_group:
-error_check:
-	usb_put_intf(iface);
-	usb_put_dev(cbaf->usb_dev);
-	kfree(cbaf->buffer);
-error_kmalloc_buffer:
-	kfree(cbaf);
-error_kzalloc:
-	return result;
-}
-
-static void cbaf_disconnect(struct usb_interface *iface)
-{
-	struct cbaf *cbaf = usb_get_intfdata(iface);
-	struct device *dev = &iface->dev;
-	sysfs_remove_group(&dev->kobj, &cbaf_dev_attr_group);
-	usb_set_intfdata(iface, NULL);
-	usb_put_intf(iface);
-	usb_put_dev(cbaf->usb_dev);
-	kfree(cbaf->buffer);
-	/* paranoia: clean up crypto keys */
-	kzfree(cbaf);
-}
-
-static const struct usb_device_id cbaf_id_table[] = {
-	{ USB_INTERFACE_INFO(0xef, 0x03, 0x01), },
-	{ },
-};
-MODULE_DEVICE_TABLE(usb, cbaf_id_table);
-
-static struct usb_driver cbaf_driver = {
-	.name =		"wusb-cbaf",
-	.id_table =	cbaf_id_table,
-	.probe =	cbaf_probe,
-	.disconnect =	cbaf_disconnect,
-};
-
-module_usb_driver(cbaf_driver);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Wireless USB Cable Based Association");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
deleted file mode 100644
index 9ee66483ee54..000000000000
--- a/drivers/usb/wusbcore/crypto.c
+++ /dev/null
@@ -1,441 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Ultra Wide Band
- * AES-128 CCM Encryption
- *
- * Copyright (C) 2007 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * We don't do any encryption here; we use the Linux Kernel's AES-128
- * crypto modules to construct keys and payload blocks in a way
- * defined by WUSB1.0[6]. Check the erratas, as typos are are patched
- * there.
- *
- * Thanks a zillion to John Keys for his help and clarifications over
- * the designed-by-a-committee text.
- *
- * So the idea is that there is this basic Pseudo-Random-Function
- * defined in WUSB1.0[6.5] which is the core of everything. It works
- * by tweaking some blocks, AES crypting them and then xoring
- * something else with them (this seems to be called CBC(AES) -- can
- * you tell I know jack about crypto?). So we just funnel it into the
- * Linux Crypto API.
- *
- * We leave a crypto test module so we can verify that vectors match,
- * every now and then.
- *
- * Block size: 16 bytes -- AES seems to do things in 'block sizes'. I
- *             am learning a lot...
- *
- *             Conveniently, some data structures that need to be
- *             funneled through AES are...16 bytes in size!
- */
-
-#include <crypto/aes.h>
-#include <crypto/algapi.h>
-#include <crypto/hash.h>
-#include <crypto/skcipher.h>
-#include <linux/crypto.h>
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/uwb.h>
-#include <linux/slab.h>
-#include <linux/usb/wusb.h>
-#include <linux/scatterlist.h>
-
-static int debug_crypto_verify;
-
-module_param(debug_crypto_verify, int, 0);
-MODULE_PARM_DESC(debug_crypto_verify, "verify the key generation algorithms");
-
-static void wusb_key_dump(const void *buf, size_t len)
-{
-	print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_OFFSET, 16, 1,
-		       buf, len, 0);
-}
-
-/*
- * Block of data, as understood by AES-CCM
- *
- * The code assumes this structure is nothing but a 16 byte array
- * (packed in a struct to avoid common mess ups that I usually do with
- * arrays and enforcing type checking).
- */
-struct aes_ccm_block {
-	u8 data[16];
-} __attribute__((packed));
-
-/*
- * Counter-mode Blocks (WUSB1.0[6.4])
- *
- * According to CCM (or so it seems), for the purpose of calculating
- * the MIC, the message is broken in N counter-mode blocks, B0, B1,
- * ... BN.
- *
- * B0 contains flags, the CCM nonce and l(m).
- *
- * B1 contains l(a), the MAC header, the encryption offset and padding.
- *
- * If EO is nonzero, additional blocks are built from payload bytes
- * until EO is exhausted (FIXME: padding to 16 bytes, I guess). The
- * padding is not xmitted.
- */
-
-/* WUSB1.0[T6.4] */
-struct aes_ccm_b0 {
-	u8 flags;	/* 0x59, per CCM spec */
-	struct aes_ccm_nonce ccm_nonce;
-	__be16 lm;
-} __attribute__((packed));
-
-/* WUSB1.0[T6.5] */
-struct aes_ccm_b1 {
-	__be16 la;
-	u8 mac_header[10];
-	__le16 eo;
-	u8 security_reserved;	/* This is always zero */
-	u8 padding;		/* 0 */
-} __attribute__((packed));
-
-/*
- * Encryption Blocks (WUSB1.0[6.4.4])
- *
- * CCM uses Ax blocks to generate a keystream with which the MIC and
- * the message's payload are encoded. A0 always encrypts/decrypts the
- * MIC. Ax (x>0) are used for the successive payload blocks.
- *
- * The x is the counter, and is increased for each block.
- */
-struct aes_ccm_a {
-	u8 flags;	/* 0x01, per CCM spec */
-	struct aes_ccm_nonce ccm_nonce;
-	__be16 counter;	/* Value of x */
-} __attribute__((packed));
-
-/* Scratch space for MAC calculations. */
-struct wusb_mac_scratch {
-	struct aes_ccm_b0 b0;
-	struct aes_ccm_b1 b1;
-	struct aes_ccm_a ax;
-};
-
-/*
- * CC-MAC function WUSB1.0[6.5]
- *
- * Take a data string and produce the encrypted CBC Counter-mode MIC
- *
- * Note the names for most function arguments are made to (more or
- * less) match those used in the pseudo-function definition given in
- * WUSB1.0[6.5].
- *
- * @tfm_cbc: CBC(AES) blkcipher handle (initialized)
- *
- * @tfm_aes: AES cipher handle (initialized)
- *
- * @mic: buffer for placing the computed MIC (Message Integrity
- *       Code). This is exactly 8 bytes, and we expect the buffer to
- *       be at least eight bytes in length.
- *
- * @key: 128 bit symmetric key
- *
- * @n: CCM nonce
- *
- * @a: ASCII string, 14 bytes long (I guess zero padded if needed;
- *     we use exactly 14 bytes).
- *
- * @b: data stream to be processed
- *
- * @blen: size of b...
- *
- * Still not very clear how this is done, but looks like this: we
- * create block B0 (as WUSB1.0[6.5] says), then we AES-crypt it with
- * @key. We bytewise xor B0 with B1 (1) and AES-crypt that. Then we
- * take the payload and divide it in blocks (16 bytes), xor them with
- * the previous crypto result (16 bytes) and crypt it, repeat the next
- * block with the output of the previous one, rinse wash. So we use
- * the CBC-MAC(AES) shash, that does precisely that. The IV (Initial
- * Vector) is 16 bytes and is set to zero, so
- *
- * (1) Created as 6.5 says, again, using as l(a) 'Blen + 14', and
- *     using the 14 bytes of @a to fill up
- *     b1.{mac_header,e0,security_reserved,padding}.
- *
- * NOTE: The definition of l(a) in WUSB1.0[6.5] vs the definition of
- *       l(m) is orthogonal, they bear no relationship, so it is not
- *       in conflict with the parameter's relation that
- *       WUSB1.0[6.4.2]) defines.
- *
- * NOTE: WUSB1.0[A.1]: Host Nonce is missing a nibble? (1e); fixed in
- *       first errata released on 2005/07.
- *
- * NOTE: we need to clean IV to zero at each invocation to make sure
- *       we start with a fresh empty Initial Vector, so that the CBC
- *       works ok.
- *
- * NOTE: blen is not aligned to a block size, we'll pad zeros, that's
- *       what sg[4] is for. Maybe there is a smarter way to do this.
- */
-static int wusb_ccm_mac(struct crypto_shash *tfm_cbcmac,
-			struct wusb_mac_scratch *scratch,
-			void *mic,
-			const struct aes_ccm_nonce *n,
-			const struct aes_ccm_label *a, const void *b,
-			size_t blen)
-{
-	SHASH_DESC_ON_STACK(desc, tfm_cbcmac);
-	u8 iv[AES_BLOCK_SIZE];
-
-	/*
-	 * These checks should be compile time optimized out
-	 * ensure @a fills b1's mac_header and following fields
-	 */
-	BUILD_BUG_ON(sizeof(*a) != sizeof(scratch->b1) - sizeof(scratch->b1.la));
-	BUILD_BUG_ON(sizeof(scratch->b0) != sizeof(struct aes_ccm_block));
-	BUILD_BUG_ON(sizeof(scratch->b1) != sizeof(struct aes_ccm_block));
-	BUILD_BUG_ON(sizeof(scratch->ax) != sizeof(struct aes_ccm_block));
-
-	/* Setup B0 */
-	scratch->b0.flags = 0x59;	/* Format B0 */
-	scratch->b0.ccm_nonce = *n;
-	scratch->b0.lm = cpu_to_be16(0);	/* WUSB1.0[6.5] sez l(m) is 0 */
-
-	/* Setup B1
-	 *
-	 * The WUSB spec is anything but clear! WUSB1.0[6.5]
-	 * says that to initialize B1 from A with 'l(a) = blen +
-	 * 14'--after clarification, it means to use A's contents
-	 * for MAC Header, EO, sec reserved and padding.
-	 */
-	scratch->b1.la = cpu_to_be16(blen + 14);
-	memcpy(&scratch->b1.mac_header, a, sizeof(*a));
-
-	desc->tfm = tfm_cbcmac;
-	crypto_shash_init(desc);
-	crypto_shash_update(desc, (u8 *)&scratch->b0, sizeof(scratch->b0) +
-						      sizeof(scratch->b1));
-	crypto_shash_finup(desc, b, blen, iv);
-
-	/* Now we crypt the MIC Tag (*iv) with Ax -- values per WUSB1.0[6.5]
-	 * The procedure is to AES crypt the A0 block and XOR the MIC
-	 * Tag against it; we only do the first 8 bytes and place it
-	 * directly in the destination buffer.
-	 */
-	scratch->ax.flags = 0x01;		/* as per WUSB 1.0 spec */
-	scratch->ax.ccm_nonce = *n;
-	scratch->ax.counter = 0;
-
-	/* reuse the CBC-MAC transform to perform the single block encryption */
-	crypto_shash_digest(desc, (u8 *)&scratch->ax, sizeof(scratch->ax),
-			    (u8 *)&scratch->ax);
-
-	crypto_xor_cpy(mic, (u8 *)&scratch->ax, iv, 8);
-
-	return 8;
-}
-
-/*
- * WUSB Pseudo Random Function (WUSB1.0[6.5])
- *
- * @b: buffer to the source data; cannot be a global or const local
- *     (will confuse the scatterlists)
- */
-ssize_t wusb_prf(void *out, size_t out_size,
-		 const u8 key[16], const struct aes_ccm_nonce *_n,
-		 const struct aes_ccm_label *a,
-		 const void *b, size_t blen, size_t len)
-{
-	ssize_t result, bytes = 0, bitr;
-	struct aes_ccm_nonce n = *_n;
-	struct crypto_shash *tfm_cbcmac;
-	struct wusb_mac_scratch scratch;
-	u64 sfn = 0;
-	__le64 sfn_le;
-
-	tfm_cbcmac = crypto_alloc_shash("cbcmac(aes)", 0, 0);
-	if (IS_ERR(tfm_cbcmac)) {
-		result = PTR_ERR(tfm_cbcmac);
-		printk(KERN_ERR "E: can't load CBCMAC-AES: %d\n", (int)result);
-		goto error_alloc_cbcmac;
-	}
-
-	result = crypto_shash_setkey(tfm_cbcmac, key, AES_BLOCK_SIZE);
-	if (result < 0) {
-		printk(KERN_ERR "E: can't set CBCMAC-AES key: %d\n", (int)result);
-		goto error_setkey_cbcmac;
-	}
-
-	for (bitr = 0; bitr < (len + 63) / 64; bitr++) {
-		sfn_le = cpu_to_le64(sfn++);
-		memcpy(&n.sfn, &sfn_le, sizeof(n.sfn));	/* n.sfn++... */
-		result = wusb_ccm_mac(tfm_cbcmac, &scratch, out + bytes,
-				      &n, a, b, blen);
-		if (result < 0)
-			goto error_ccm_mac;
-		bytes += result;
-	}
-	result = bytes;
-
-error_ccm_mac:
-error_setkey_cbcmac:
-	crypto_free_shash(tfm_cbcmac);
-error_alloc_cbcmac:
-	return result;
-}
-
-/* WUSB1.0[A.2] test vectors */
-static const u8 stv_hsmic_key[16] = {
-	0x4b, 0x79, 0xa3, 0xcf, 0xe5, 0x53, 0x23, 0x9d,
-	0xd7, 0xc1, 0x6d, 0x1c, 0x2d, 0xab, 0x6d, 0x3f
-};
-
-static const struct aes_ccm_nonce stv_hsmic_n = {
-	.sfn = { 0 },
-	.tkid = { 0x76, 0x98, 0x01,  },
-	.dest_addr = { .data = { 0xbe, 0x00 } },
-		.src_addr = { .data = { 0x76, 0x98 } },
-};
-
-/*
- * Out-of-band MIC Generation verification code
- *
- */
-static int wusb_oob_mic_verify(void)
-{
-	int result;
-	u8 mic[8];
-	/* WUSB1.0[A.2] test vectors */
-	static const struct usb_handshake stv_hsmic_hs = {
-		.bMessageNumber = 2,
-		.bStatus 	= 00,
-		.tTKID 		= { 0x76, 0x98, 0x01 },
-		.bReserved 	= 00,
-		.CDID 		= { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35,
-				    0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
-				    0x3c, 0x3d, 0x3e, 0x3f },
-		.nonce	 	= { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
-				    0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
-				    0x2c, 0x2d, 0x2e, 0x2f },
-		.MIC	 	= { 0x75, 0x6a, 0x97, 0x51, 0x0c, 0x8c,
-				    0x14, 0x7b },
-	};
-	size_t hs_size;
-
-	result = wusb_oob_mic(mic, stv_hsmic_key, &stv_hsmic_n, &stv_hsmic_hs);
-	if (result < 0)
-		printk(KERN_ERR "E: WUSB OOB MIC test: failed: %d\n", result);
-	else if (memcmp(stv_hsmic_hs.MIC, mic, sizeof(mic))) {
-		printk(KERN_ERR "E: OOB MIC test: "
-		       "mismatch between MIC result and WUSB1.0[A2]\n");
-		hs_size = sizeof(stv_hsmic_hs) - sizeof(stv_hsmic_hs.MIC);
-		printk(KERN_ERR "E: Handshake2 in: (%zu bytes)\n", hs_size);
-		wusb_key_dump(&stv_hsmic_hs, hs_size);
-		printk(KERN_ERR "E: CCM Nonce in: (%zu bytes)\n",
-		       sizeof(stv_hsmic_n));
-		wusb_key_dump(&stv_hsmic_n, sizeof(stv_hsmic_n));
-		printk(KERN_ERR "E: MIC out:\n");
-		wusb_key_dump(mic, sizeof(mic));
-		printk(KERN_ERR "E: MIC out (from WUSB1.0[A.2]):\n");
-		wusb_key_dump(stv_hsmic_hs.MIC, sizeof(stv_hsmic_hs.MIC));
-		result = -EINVAL;
-	} else
-		result = 0;
-	return result;
-}
-
-/*
- * Test vectors for Key derivation
- *
- * These come from WUSB1.0[6.5.1], the vectors in WUSB1.0[A.1]
- * (errata corrected in 2005/07).
- */
-static const u8 stv_key_a1[16] __attribute__ ((__aligned__(4))) = {
-	0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87,
-	0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f
-};
-
-static const struct aes_ccm_nonce stv_keydvt_n_a1 = {
-	.sfn = { 0 },
-	.tkid = { 0x76, 0x98, 0x01,  },
-	.dest_addr = { .data = { 0xbe, 0x00 } },
-	.src_addr = { .data = { 0x76, 0x98 } },
-};
-
-static const struct wusb_keydvt_out stv_keydvt_out_a1 = {
-	.kck = {
-		0x4b, 0x79, 0xa3, 0xcf, 0xe5, 0x53, 0x23, 0x9d,
-		0xd7, 0xc1, 0x6d, 0x1c, 0x2d, 0xab, 0x6d, 0x3f
-	},
-	.ptk = {
-		0xc8, 0x70, 0x62, 0x82, 0xb6, 0x7c, 0xe9, 0x06,
-		0x7b, 0xc5, 0x25, 0x69, 0xf2, 0x36, 0x61, 0x2d
-	}
-};
-
-/*
- * Performa a test to make sure we match the vectors defined in
- * WUSB1.0[A.1](Errata2006/12)
- */
-static int wusb_key_derive_verify(void)
-{
-	int result = 0;
-	struct wusb_keydvt_out keydvt_out;
-	/* These come from WUSB1.0[A.1] + 2006/12 errata */
-	static const struct wusb_keydvt_in stv_keydvt_in_a1 = {
-		.hnonce = {
-			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-			0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-		},
-		.dnonce = {
-			0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
-			0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f
-		}
-	};
-
-	result = wusb_key_derive(&keydvt_out, stv_key_a1, &stv_keydvt_n_a1,
-				 &stv_keydvt_in_a1);
-	if (result < 0)
-		printk(KERN_ERR "E: WUSB key derivation test: "
-		       "derivation failed: %d\n", result);
-	if (memcmp(&stv_keydvt_out_a1, &keydvt_out, sizeof(keydvt_out))) {
-		printk(KERN_ERR "E: WUSB key derivation test: "
-		       "mismatch between key derivation result "
-		       "and WUSB1.0[A1] Errata 2006/12\n");
-		printk(KERN_ERR "E: keydvt in: key\n");
-		wusb_key_dump(stv_key_a1, sizeof(stv_key_a1));
-		printk(KERN_ERR "E: keydvt in: nonce\n");
-		wusb_key_dump(&stv_keydvt_n_a1, sizeof(stv_keydvt_n_a1));
-		printk(KERN_ERR "E: keydvt in: hnonce & dnonce\n");
-		wusb_key_dump(&stv_keydvt_in_a1, sizeof(stv_keydvt_in_a1));
-		printk(KERN_ERR "E: keydvt out: KCK\n");
-		wusb_key_dump(&keydvt_out.kck, sizeof(keydvt_out.kck));
-		printk(KERN_ERR "E: keydvt out: PTK\n");
-		wusb_key_dump(&keydvt_out.ptk, sizeof(keydvt_out.ptk));
-		result = -EINVAL;
-	} else
-		result = 0;
-	return result;
-}
-
-/*
- * Initialize crypto system
- *
- * FIXME: we do nothing now, other than verifying. Later on we'll
- * cache the encryption stuff, so that's why we have a separate init.
- */
-int wusb_crypto_init(void)
-{
-	int result;
-
-	if (debug_crypto_verify) {
-		result = wusb_key_derive_verify();
-		if (result < 0)
-			return result;
-		return wusb_oob_mic_verify();
-	}
-	return 0;
-}
-
-void wusb_crypto_exit(void)
-{
-	/* FIXME: free cached crypto transforms */
-}
diff --git a/drivers/usb/wusbcore/dev-sysfs.c b/drivers/usb/wusbcore/dev-sysfs.c
deleted file mode 100644
index 67b0a4c412b2..000000000000
--- a/drivers/usb/wusbcore/dev-sysfs.c
+++ /dev/null
@@ -1,124 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB devices
- * sysfs bindings
- *
- * Copyright (C) 2007 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Get them out of the way...
- */
-
-#include <linux/jiffies.h>
-#include <linux/ctype.h>
-#include <linux/workqueue.h>
-#include "wusbhc.h"
-
-static ssize_t wusb_disconnect_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t size)
-{
-	struct usb_device *usb_dev;
-	struct wusbhc *wusbhc;
-	unsigned command;
-	u8 port_idx;
-
-	if (sscanf(buf, "%u", &command) != 1)
-		return -EINVAL;
-	if (command == 0)
-		return size;
-	usb_dev = to_usb_device(dev);
-	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
-	if (wusbhc == NULL)
-		return -ENODEV;
-
-	mutex_lock(&wusbhc->mutex);
-	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
-	__wusbhc_dev_disable(wusbhc, port_idx);
-	mutex_unlock(&wusbhc->mutex);
-	wusbhc_put(wusbhc);
-	return size;
-}
-static DEVICE_ATTR_WO(wusb_disconnect);
-
-static ssize_t wusb_cdid_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	ssize_t result;
-	struct wusb_dev *wusb_dev;
-
-	wusb_dev = wusb_dev_get_by_usb_dev(to_usb_device(dev));
-	if (wusb_dev == NULL)
-		return -ENODEV;
-	result = sprintf(buf, "%16ph\n", wusb_dev->cdid.data);
-	wusb_dev_put(wusb_dev);
-	return result;
-}
-static DEVICE_ATTR_RO(wusb_cdid);
-
-static ssize_t wusb_ck_store(struct device *dev,
-			     struct device_attribute *attr,
-			     const char *buf, size_t size)
-{
-	int result;
-	struct usb_device *usb_dev;
-	struct wusbhc *wusbhc;
-	struct wusb_ckhdid ck;
-
-	result = sscanf(buf,
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx\n",
-			&ck.data[0] , &ck.data[1],
-			&ck.data[2] , &ck.data[3],
-			&ck.data[4] , &ck.data[5],
-			&ck.data[6] , &ck.data[7],
-			&ck.data[8] , &ck.data[9],
-			&ck.data[10], &ck.data[11],
-			&ck.data[12], &ck.data[13],
-			&ck.data[14], &ck.data[15]);
-	if (result != 16)
-		return -EINVAL;
-
-	usb_dev = to_usb_device(dev);
-	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
-	if (wusbhc == NULL)
-		return -ENODEV;
-	result = wusb_dev_4way_handshake(wusbhc, usb_dev->wusb_dev, &ck);
-	memzero_explicit(&ck, sizeof(ck));
-	wusbhc_put(wusbhc);
-	return result < 0 ? result : size;
-}
-static DEVICE_ATTR_WO(wusb_ck);
-
-static struct attribute *wusb_dev_attrs[] = {
-		&dev_attr_wusb_disconnect.attr,
-		&dev_attr_wusb_cdid.attr,
-		&dev_attr_wusb_ck.attr,
-		NULL,
-};
-
-static const struct attribute_group wusb_dev_attr_group = {
-	.name = NULL,	/* we want them in the same directory */
-	.attrs = wusb_dev_attrs,
-};
-
-int wusb_dev_sysfs_add(struct wusbhc *wusbhc, struct usb_device *usb_dev,
-		       struct wusb_dev *wusb_dev)
-{
-	int result = sysfs_create_group(&usb_dev->dev.kobj,
-					&wusb_dev_attr_group);
-	struct device *dev = &usb_dev->dev;
-	if (result < 0)
-		dev_err(dev, "Cannot register WUSB-dev attributes: %d\n",
-			result);
-	return result;
-}
-
-void wusb_dev_sysfs_rm(struct wusb_dev *wusb_dev)
-{
-	struct usb_device *usb_dev = wusb_dev->usb_dev;
-	if (usb_dev)
-		sysfs_remove_group(&usb_dev->dev.kobj, &wusb_dev_attr_group);
-}
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
deleted file mode 100644
index 1170f8baf608..000000000000
--- a/drivers/usb/wusbcore/devconnect.c
+++ /dev/null
@@ -1,1085 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
- * Device Connect handling
- *
- * Copyright (C) 2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- * FIXME: this file needs to be broken up, it's grown too big
- *
- *
- * WUSB1.0[7.1, 7.5.1, ]
- *
- * WUSB device connection is kind of messy. Some background:
- *
- *     When a device wants to connect it scans the UWB radio channels
- *     looking for a WUSB Channel; a WUSB channel is defined by MMCs
- *     (Micro Managed Commands or something like that) [see
- *     Design-overview for more on this] .
- *
- * So, device scans the radio, finds MMCs and thus a host and checks
- * when the next DNTS is. It sends a Device Notification Connect
- * (DN_Connect); the host picks it up (through nep.c and notif.c, ends
- * up in wusb_devconnect_ack(), which creates a wusb_dev structure in
- * wusbhc->port[port_number].wusb_dev), assigns an unauth address
- * to the device (this means from 0x80 to 0xfe) and sends, in the MMC
- * a Connect Ack Information Element (ConnAck IE).
- *
- * So now the device now has a WUSB address. From now on, we use
- * that to talk to it in the RPipes.
- *
- * ASSUMPTIONS:
- *
- *  - We use the the as device address the port number where it is
- *    connected (port 0 doesn't exist). For unauth, it is 128 + that.
- *
- * ROADMAP:
- *
- *   This file contains the logic for doing that--entry points:
- *
- *   wusb_devconnect_ack()      Ack a device until _acked() called.
- *                              Called by notif.c:wusb_handle_dn_connect()
- *                              when a DN_Connect is received.
- *
- *     wusb_devconnect_acked()  Ack done, release resources.
- *
- *   wusb_handle_dn_alive()     Called by notif.c:wusb_handle_dn()
- *                              for processing a DN_Alive pong from a device.
- *
- *   wusb_handle_dn_disconnect()Called by notif.c:wusb_handle_dn() to
- *                              process a disconnect request from a
- *                              device.
- *
- *   __wusb_dev_disable()       Called by rh.c:wusbhc_rh_clear_port_feat() when
- *                              disabling a port.
- *
- *   wusb_devconnect_create()   Called when creating the host by
- *                              lc.c:wusbhc_create().
- *
- *   wusb_devconnect_destroy()  Cleanup called removing the host. Called
- *                              by lc.c:wusbhc_destroy().
- *
- *   Each Wireless USB host maintains a list of DN_Connect requests
- *   (actually we maintain a list of pending Connect Acks, the
- *   wusbhc->ca_list).
- *
- * LIFE CYCLE OF port->wusb_dev
- *
- *   Before the @wusbhc structure put()s the reference it owns for
- *   port->wusb_dev [and clean the wusb_dev pointer], it needs to
- *   lock @wusbhc->mutex.
- */
-
-#include <linux/jiffies.h>
-#include <linux/ctype.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/export.h>
-#include "wusbhc.h"
-
-static void wusbhc_devconnect_acked_work(struct work_struct *work);
-
-static void wusb_dev_free(struct wusb_dev *wusb_dev)
-{
-	kfree(wusb_dev);
-}
-
-static struct wusb_dev *wusb_dev_alloc(struct wusbhc *wusbhc)
-{
-	struct wusb_dev *wusb_dev;
-
-	wusb_dev = kzalloc(sizeof(*wusb_dev), GFP_KERNEL);
-	if (wusb_dev == NULL)
-		goto err;
-
-	wusb_dev->wusbhc = wusbhc;
-
-	INIT_WORK(&wusb_dev->devconnect_acked_work, wusbhc_devconnect_acked_work);
-
-	return wusb_dev;
-err:
-	wusb_dev_free(wusb_dev);
-	return NULL;
-}
-
-
-/*
- * Using the Connect-Ack list, fill out the @wusbhc Connect-Ack WUSB IE
- * properly so that it can be added to the MMC.
- *
- * We just get the @wusbhc->ca_list and fill out the first four ones or
- * less (per-spec WUSB1.0[7.5, before T7-38). If the ConnectAck WUSB
- * IE is not allocated, we alloc it.
- *
- * @wusbhc->mutex must be taken
- */
-static void wusbhc_fill_cack_ie(struct wusbhc *wusbhc)
-{
-	unsigned cnt;
-	struct wusb_dev *dev_itr;
-	struct wuie_connect_ack *cack_ie;
-
-	cack_ie = &wusbhc->cack_ie;
-	cnt = 0;
-	list_for_each_entry(dev_itr, &wusbhc->cack_list, cack_node) {
-		cack_ie->blk[cnt].CDID = dev_itr->cdid;
-		cack_ie->blk[cnt].bDeviceAddress = dev_itr->addr;
-		if (++cnt >= WUIE_ELT_MAX)
-			break;
-	}
-	cack_ie->hdr.bLength = sizeof(cack_ie->hdr)
-		+ cnt * sizeof(cack_ie->blk[0]);
-}
-
-/*
- * Register a new device that wants to connect
- *
- * A new device wants to connect, so we add it to the Connect-Ack
- * list. We give it an address in the unauthorized range (bit 8 set);
- * user space will have to drive authorization further on.
- *
- * @dev_addr: address to use for the device (which is also the port
- *            number).
- *
- * @wusbhc->mutex must be taken
- */
-static struct wusb_dev *wusbhc_cack_add(struct wusbhc *wusbhc,
-					struct wusb_dn_connect *dnc,
-					const char *pr_cdid, u8 port_idx)
-{
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-	int new_connection = wusb_dn_connect_new_connection(dnc);
-	u8 dev_addr;
-	int result;
-
-	/* Is it registered already? */
-	list_for_each_entry(wusb_dev, &wusbhc->cack_list, cack_node)
-		if (!memcmp(&wusb_dev->cdid, &dnc->CDID,
-			    sizeof(wusb_dev->cdid)))
-			return wusb_dev;
-	/* We don't have it, create an entry, register it */
-	wusb_dev = wusb_dev_alloc(wusbhc);
-	if (wusb_dev == NULL)
-		return NULL;
-	wusb_dev_init(wusb_dev);
-	wusb_dev->cdid = dnc->CDID;
-	wusb_dev->port_idx = port_idx;
-
-	/*
-	 * Devices are always available within the cluster reservation
-	 * and since the hardware will take the intersection of the
-	 * per-device availability and the cluster reservation, the
-	 * per-device availability can simply be set to always
-	 * available.
-	 */
-	bitmap_fill(wusb_dev->availability.bm, UWB_NUM_MAS);
-
-	/* FIXME: handle reconnects instead of assuming connects are
-	   always new. */
-	if (1 && new_connection == 0)
-		new_connection = 1;
-	if (new_connection) {
-		dev_addr = (port_idx + 2) | WUSB_DEV_ADDR_UNAUTH;
-
-		dev_info(dev, "Connecting new WUSB device to address %u, "
-			"port %u\n", dev_addr, port_idx);
-
-		result = wusb_set_dev_addr(wusbhc, wusb_dev, dev_addr);
-		if (result < 0)
-			return NULL;
-	}
-	wusb_dev->entry_ts = jiffies;
-	list_add_tail(&wusb_dev->cack_node, &wusbhc->cack_list);
-	wusbhc->cack_count++;
-	wusbhc_fill_cack_ie(wusbhc);
-
-	return wusb_dev;
-}
-
-/*
- * Remove a Connect-Ack context entry from the HCs view
- *
- * @wusbhc->mutex must be taken
- */
-static void wusbhc_cack_rm(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	list_del_init(&wusb_dev->cack_node);
-	wusbhc->cack_count--;
-	wusbhc_fill_cack_ie(wusbhc);
-}
-
-/*
- * @wusbhc->mutex must be taken */
-static
-void wusbhc_devconnect_acked(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	wusbhc_cack_rm(wusbhc, wusb_dev);
-	if (wusbhc->cack_count)
-		wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
-	else
-		wusbhc_mmcie_rm(wusbhc, &wusbhc->cack_ie.hdr);
-}
-
-static void wusbhc_devconnect_acked_work(struct work_struct *work)
-{
-	struct wusb_dev *wusb_dev = container_of(work, struct wusb_dev,
-						 devconnect_acked_work);
-	struct wusbhc *wusbhc = wusb_dev->wusbhc;
-
-	mutex_lock(&wusbhc->mutex);
-	wusbhc_devconnect_acked(wusbhc, wusb_dev);
-	mutex_unlock(&wusbhc->mutex);
-
-	wusb_dev_put(wusb_dev);
-}
-
-/*
- * Ack a device for connection
- *
- * FIXME: docs
- *
- * @pr_cdid:	Printable CDID...hex Use @dnc->cdid for the real deal.
- *
- * So we get the connect ack IE (may have been allocated already),
- * find an empty connect block, an empty virtual port, create an
- * address with it (see below), make it an unauth addr [bit 7 set] and
- * set the MMC.
- *
- * Addresses: because WUSB hosts have no downstream hubs, we can do a
- *            1:1 mapping between 'port number' and device
- *            address. This simplifies many things, as during this
- *            initial connect phase the USB stack has no knowledge of
- *            the device and hasn't assigned an address yet--we know
- *            USB's choose_address() will use the same heuristics we
- *            use here, so we can assume which address will be assigned.
- *
- *            USB stack always assigns address 1 to the root hub, so
- *            to the port number we add 2 (thus virtual port #0 is
- *            addr #2).
- *
- * @wusbhc shall be referenced
- */
-static
-void wusbhc_devconnect_ack(struct wusbhc *wusbhc, struct wusb_dn_connect *dnc,
-			   const char *pr_cdid)
-{
-	int result;
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-	struct wusb_port *port;
-	unsigned idx;
-
-	mutex_lock(&wusbhc->mutex);
-
-	/* Check we are not handling it already */
-	for (idx = 0; idx < wusbhc->ports_max; idx++) {
-		port = wusb_port_by_idx(wusbhc, idx);
-		if (port->wusb_dev
-		    && memcmp(&dnc->CDID, &port->wusb_dev->cdid, sizeof(dnc->CDID)) == 0)
-			goto error_unlock;
-	}
-	/* Look up those fake ports we have for a free one */
-	for (idx = 0; idx < wusbhc->ports_max; idx++) {
-		port = wusb_port_by_idx(wusbhc, idx);
-		if ((port->status & USB_PORT_STAT_POWER)
-		    && !(port->status & USB_PORT_STAT_CONNECTION))
-			break;
-	}
-	if (idx >= wusbhc->ports_max) {
-		dev_err(dev, "Host controller can't connect more devices "
-			"(%u already connected); device %s rejected\n",
-			wusbhc->ports_max, pr_cdid);
-		/* NOTE: we could send a WUIE_Disconnect here, but we haven't
-		 *       event acked, so the device will eventually timeout the
-		 *       connection, right? */
-		goto error_unlock;
-	}
-
-	/* Make sure we are using no crypto on that "virtual port" */
-	wusbhc->set_ptk(wusbhc, idx, 0, NULL, 0);
-
-	/* Grab a filled in Connect-Ack context, fill out the
-	 * Connect-Ack Wireless USB IE, set the MMC */
-	wusb_dev = wusbhc_cack_add(wusbhc, dnc, pr_cdid, idx);
-	if (wusb_dev == NULL)
-		goto error_unlock;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &wusbhc->cack_ie.hdr);
-	if (result < 0)
-		goto error_unlock;
-	/* Give the device at least 2ms (WUSB1.0[7.5.1p3]), let's do
-	 * three for a good measure */
-	msleep(3);
-	port->wusb_dev = wusb_dev;
-	port->status |= USB_PORT_STAT_CONNECTION;
-	port->change |= USB_PORT_STAT_C_CONNECTION;
-	/* Now the port status changed to connected; hub_wq will
-	 * pick the change up and try to reset the port to bring it to
-	 * the enabled state--so this process returns up to the stack
-	 * and it calls back into wusbhc_rh_port_reset().
-	 */
-error_unlock:
-	mutex_unlock(&wusbhc->mutex);
-	return;
-
-}
-
-/*
- * Disconnect a Wireless USB device from its fake port
- *
- * Marks the port as disconnected so that hub_wq can pick up the change
- * and drops our knowledge about the device.
- *
- * Assumes there is a device connected
- *
- * @port_index: zero based port number
- *
- * NOTE: @wusbhc->mutex is locked
- *
- * WARNING: From here it is not very safe to access anything hanging off
- *	    wusb_dev
- */
-static void __wusbhc_dev_disconnect(struct wusbhc *wusbhc,
-				    struct wusb_port *port)
-{
-	struct wusb_dev *wusb_dev = port->wusb_dev;
-
-	port->status &= ~(USB_PORT_STAT_CONNECTION | USB_PORT_STAT_ENABLE
-			  | USB_PORT_STAT_SUSPEND | USB_PORT_STAT_RESET
-			  | USB_PORT_STAT_LOW_SPEED | USB_PORT_STAT_HIGH_SPEED);
-	port->change |= USB_PORT_STAT_C_CONNECTION | USB_PORT_STAT_C_ENABLE;
-	if (wusb_dev) {
-		dev_dbg(wusbhc->dev, "disconnecting device from port %d\n", wusb_dev->port_idx);
-		if (!list_empty(&wusb_dev->cack_node))
-			list_del_init(&wusb_dev->cack_node);
-		/* For the one in cack_add() */
-		wusb_dev_put(wusb_dev);
-	}
-	port->wusb_dev = NULL;
-
-	/* After a device disconnects, change the GTK (see [WUSB]
-	 * section 6.2.11.2). */
-	if (wusbhc->active)
-		wusbhc_gtk_rekey(wusbhc);
-
-	/* The Wireless USB part has forgotten about the device already; now
-	 * hub_wq's timer will pick up the disconnection and remove the USB
-	 * device from the system
-	 */
-}
-
-/*
- * Refresh the list of keep alives to emit in the MMC
- *
- * We only publish the first four devices that have a coming timeout
- * condition. Then when we are done processing those, we go for the
- * next ones. We ignore the ones that have timed out already (they'll
- * be purged).
- *
- * This might cause the first devices to timeout the last devices in
- * the port array...FIXME: come up with a better algorithm?
- *
- * Note we can't do much about MMC's ops errors; we hope next refresh
- * will kind of handle it.
- *
- * NOTE: @wusbhc->mutex is locked
- */
-static void __wusbhc_keep_alive(struct wusbhc *wusbhc)
-{
-	struct device *dev = wusbhc->dev;
-	unsigned cnt;
-	struct wusb_dev *wusb_dev;
-	struct wusb_port *wusb_port;
-	struct wuie_keep_alive *ie = &wusbhc->keep_alive_ie;
-	unsigned keep_alives, old_keep_alives;
-
-	old_keep_alives = ie->hdr.bLength - sizeof(ie->hdr);
-	keep_alives = 0;
-	for (cnt = 0;
-	     keep_alives < WUIE_ELT_MAX && cnt < wusbhc->ports_max;
-	     cnt++) {
-		unsigned tt = msecs_to_jiffies(wusbhc->trust_timeout);
-
-		wusb_port = wusb_port_by_idx(wusbhc, cnt);
-		wusb_dev = wusb_port->wusb_dev;
-
-		if (wusb_dev == NULL)
-			continue;
-		if (wusb_dev->usb_dev == NULL)
-			continue;
-
-		if (time_after(jiffies, wusb_dev->entry_ts + tt)) {
-			dev_err(dev, "KEEPALIVE: device %u timed out\n",
-				wusb_dev->addr);
-			__wusbhc_dev_disconnect(wusbhc, wusb_port);
-		} else if (time_after(jiffies, wusb_dev->entry_ts + tt/3)) {
-			/* Approaching timeout cut off, need to refresh */
-			ie->bDeviceAddress[keep_alives++] = wusb_dev->addr;
-		}
-	}
-	if (keep_alives & 0x1)	/* pad to even number ([WUSB] section 7.5.9) */
-		ie->bDeviceAddress[keep_alives++] = 0x7f;
-	ie->hdr.bLength = sizeof(ie->hdr) +
-		keep_alives*sizeof(ie->bDeviceAddress[0]);
-	if (keep_alives > 0)
-		wusbhc_mmcie_set(wusbhc, 10, 5, &ie->hdr);
-	else if (old_keep_alives != 0)
-		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-}
-
-/*
- * Do a run through all devices checking for timeouts
- */
-static void wusbhc_keep_alive_run(struct work_struct *ws)
-{
-	struct delayed_work *dw = to_delayed_work(ws);
-	struct wusbhc *wusbhc =	container_of(dw, struct wusbhc, keep_alive_timer);
-
-	mutex_lock(&wusbhc->mutex);
-	__wusbhc_keep_alive(wusbhc);
-	mutex_unlock(&wusbhc->mutex);
-
-	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
-			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
-}
-
-/*
- * Find the wusb_dev from its device address.
- *
- * The device can be found directly from the address (see
- * wusb_cack_add() for where the device address is set to port_idx
- * +2), except when the address is zero.
- */
-static struct wusb_dev *wusbhc_find_dev_by_addr(struct wusbhc *wusbhc, u8 addr)
-{
-	int p;
-
-	if (addr == 0xff) /* unconnected */
-		return NULL;
-
-	if (addr > 0) {
-		int port = (addr & ~0x80) - 2;
-		if (port < 0 || port >= wusbhc->ports_max)
-			return NULL;
-		return wusb_port_by_idx(wusbhc, port)->wusb_dev;
-	}
-
-	/* Look for the device with address 0. */
-	for (p = 0; p < wusbhc->ports_max; p++) {
-		struct wusb_dev *wusb_dev = wusb_port_by_idx(wusbhc, p)->wusb_dev;
-		if (wusb_dev && wusb_dev->addr == addr)
-			return wusb_dev;
-	}
-	return NULL;
-}
-
-/*
- * Handle a DN_Alive notification (WUSB1.0[7.6.1])
- *
- * This just updates the device activity timestamp and then refreshes
- * the keep alive IE.
- *
- * @wusbhc shall be referenced and unlocked
- */
-static void wusbhc_handle_dn_alive(struct wusbhc *wusbhc, u8 srcaddr)
-{
-	struct wusb_dev *wusb_dev;
-
-	mutex_lock(&wusbhc->mutex);
-	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
-	if (wusb_dev == NULL) {
-		dev_dbg(wusbhc->dev, "ignoring DN_Alive from unconnected device %02x\n",
-			srcaddr);
-	} else {
-		wusb_dev->entry_ts = jiffies;
-		__wusbhc_keep_alive(wusbhc);
-	}
-	mutex_unlock(&wusbhc->mutex);
-}
-
-/*
- * Handle a DN_Connect notification (WUSB1.0[7.6.1])
- *
- * @wusbhc
- * @pkt_hdr
- * @size:    Size of the buffer where the notification resides; if the
- *           notification data suggests there should be more data than
- *           available, an error will be signaled and the whole buffer
- *           consumed.
- *
- * @wusbhc->mutex shall be held
- */
-static void wusbhc_handle_dn_connect(struct wusbhc *wusbhc,
-				     struct wusb_dn_hdr *dn_hdr,
-				     size_t size)
-{
-	struct device *dev = wusbhc->dev;
-	struct wusb_dn_connect *dnc;
-	char pr_cdid[WUSB_CKHDID_STRSIZE];
-	static const char *beacon_behaviour[] = {
-		"reserved",
-		"self-beacon",
-		"directed-beacon",
-		"no-beacon"
-	};
-
-	if (size < sizeof(*dnc)) {
-		dev_err(dev, "DN CONNECT: short notification (%zu < %zu)\n",
-			size, sizeof(*dnc));
-		return;
-	}
-
-	dnc = container_of(dn_hdr, struct wusb_dn_connect, hdr);
-	sprintf(pr_cdid, "%16ph", dnc->CDID.data);
-	dev_info(dev, "DN CONNECT: device %s @ %x (%s) wants to %s\n",
-		 pr_cdid,
-		 wusb_dn_connect_prev_dev_addr(dnc),
-		 beacon_behaviour[wusb_dn_connect_beacon_behavior(dnc)],
-		 wusb_dn_connect_new_connection(dnc) ? "connect" : "reconnect");
-	/* ACK the connect */
-	wusbhc_devconnect_ack(wusbhc, dnc, pr_cdid);
-}
-
-/*
- * Handle a DN_Disconnect notification (WUSB1.0[7.6.1])
- *
- * Device is going down -- do the disconnect.
- *
- * @wusbhc shall be referenced and unlocked
- */
-static void wusbhc_handle_dn_disconnect(struct wusbhc *wusbhc, u8 srcaddr)
-{
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-
-	mutex_lock(&wusbhc->mutex);
-	wusb_dev = wusbhc_find_dev_by_addr(wusbhc, srcaddr);
-	if (wusb_dev == NULL) {
-		dev_dbg(dev, "ignoring DN DISCONNECT from unconnected device %02x\n",
-			srcaddr);
-	} else {
-		dev_info(dev, "DN DISCONNECT: device 0x%02x going down\n",
-			wusb_dev->addr);
-		__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc,
-			wusb_dev->port_idx));
-	}
-	mutex_unlock(&wusbhc->mutex);
-}
-
-/*
- * Handle a Device Notification coming a host
- *
- * The Device Notification comes from a host (HWA, DWA or WHCI)
- * wrapped in a set of headers. Somebody else has peeled off those
- * headers for us and we just get one Device Notifications.
- *
- * Invalid DNs (e.g., too short) are discarded.
- *
- * @wusbhc shall be referenced
- *
- * FIXMES:
- *  - implement priorities as in WUSB1.0[Table 7-55]?
- */
-void wusbhc_handle_dn(struct wusbhc *wusbhc, u8 srcaddr,
-		      struct wusb_dn_hdr *dn_hdr, size_t size)
-{
-	struct device *dev = wusbhc->dev;
-
-	if (size < sizeof(struct wusb_dn_hdr)) {
-		dev_err(dev, "DN data shorter than DN header (%d < %d)\n",
-			(int)size, (int)sizeof(struct wusb_dn_hdr));
-		return;
-	}
-	switch (dn_hdr->bType) {
-	case WUSB_DN_CONNECT:
-		wusbhc_handle_dn_connect(wusbhc, dn_hdr, size);
-		break;
-	case WUSB_DN_ALIVE:
-		wusbhc_handle_dn_alive(wusbhc, srcaddr);
-		break;
-	case WUSB_DN_DISCONNECT:
-		wusbhc_handle_dn_disconnect(wusbhc, srcaddr);
-		break;
-	case WUSB_DN_MASAVAILCHANGED:
-	case WUSB_DN_RWAKE:
-	case WUSB_DN_SLEEP:
-		/* FIXME: handle these DNs. */
-		break;
-	case WUSB_DN_EPRDY:
-		/* The hardware handles these. */
-		break;
-	default:
-		dev_warn(dev, "unknown DN %u (%d octets) from %u\n",
-			 dn_hdr->bType, (int)size, srcaddr);
-	}
-}
-EXPORT_SYMBOL_GPL(wusbhc_handle_dn);
-
-/*
- * Disconnect a WUSB device from a the cluster
- *
- * @wusbhc
- * @port     Fake port where the device is (wusbhc index, not USB port number).
- *
- * In Wireless USB, a disconnect is basically telling the device he is
- * being disconnected and forgetting about him.
- *
- * We send the device a Device Disconnect IE (WUSB1.0[7.5.11]) for 100
- * ms and then keep going.
- *
- * We don't do much in case of error; we always pretend we disabled
- * the port and disconnected the device. If physically the request
- * didn't get there (many things can fail in the way there), the stack
- * will reject the device's communication attempts.
- *
- * @wusbhc should be refcounted and locked
- */
-void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port_idx)
-{
-	int result;
-	struct device *dev = wusbhc->dev;
-	struct wusb_dev *wusb_dev;
-	struct wuie_disconnect *ie;
-
-	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
-	if (wusb_dev == NULL) {
-		/* reset no device? ignore */
-		dev_dbg(dev, "DISCONNECT: no device at port %u, ignoring\n",
-			port_idx);
-		return;
-	}
-	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
-
-	ie = kzalloc(sizeof(*ie), GFP_KERNEL);
-	if (ie == NULL)
-		return;
-	ie->hdr.bLength = sizeof(*ie);
-	ie->hdr.bIEIdentifier = WUIE_ID_DEVICE_DISCONNECT;
-	ie->bDeviceAddress = wusb_dev->addr;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &ie->hdr);
-	if (result < 0)
-		dev_err(dev, "DISCONNECT: can't set MMC: %d\n", result);
-	else {
-		/* At least 6 MMCs, assuming at least 1 MMC per zone. */
-		msleep(7*4);
-		wusbhc_mmcie_rm(wusbhc, &ie->hdr);
-	}
-	kfree(ie);
-}
-
-/*
- * Walk over the BOS descriptor, verify and grok it
- *
- * @usb_dev: referenced
- * @wusb_dev: referenced and unlocked
- *
- * The BOS descriptor is defined at WUSB1.0[7.4.1], and it defines a
- * "flexible" way to wrap all kinds of descriptors inside an standard
- * descriptor (wonder why they didn't use normal descriptors,
- * btw). Not like they lack code.
- *
- * At the end we go to look for the WUSB Device Capabilities
- * (WUSB1.0[7.4.1.1]) that is wrapped in a device capability descriptor
- * that is part of the BOS descriptor set. That tells us what does the
- * device support (dual role, beacon type, UWB PHY rates).
- */
-static int wusb_dev_bos_grok(struct usb_device *usb_dev,
-			     struct wusb_dev *wusb_dev,
-			     struct usb_bos_descriptor *bos, size_t desc_size)
-{
-	ssize_t result;
-	struct device *dev = &usb_dev->dev;
-	void *itr, *top;
-
-	/* Walk over BOS capabilities, verify them */
-	itr = (void *)bos + sizeof(*bos);
-	top = itr + desc_size - sizeof(*bos);
-	while (itr < top) {
-		struct usb_dev_cap_header *cap_hdr = itr;
-		size_t cap_size;
-		u8 cap_type;
-		if (top - itr < sizeof(*cap_hdr)) {
-			dev_err(dev, "Device BUG? premature end of BOS header "
-				"data [offset 0x%02x]: only %zu bytes left\n",
-				(int)(itr - (void *)bos), top - itr);
-			result = -ENOSPC;
-			goto error_bad_cap;
-		}
-		cap_size = cap_hdr->bLength;
-		cap_type = cap_hdr->bDevCapabilityType;
-		if (cap_size == 0)
-			break;
-		if (cap_size > top - itr) {
-			dev_err(dev, "Device BUG? premature end of BOS data "
-				"[offset 0x%02x cap %02x %zu bytes]: "
-				"only %zu bytes left\n",
-				(int)(itr - (void *)bos),
-				cap_type, cap_size, top - itr);
-			result = -EBADF;
-			goto error_bad_cap;
-		}
-		switch (cap_type) {
-		case USB_CAP_TYPE_WIRELESS_USB:
-			if (cap_size != sizeof(*wusb_dev->wusb_cap_descr))
-				dev_err(dev, "Device BUG? WUSB Capability "
-					"descriptor is %zu bytes vs %zu "
-					"needed\n", cap_size,
-					sizeof(*wusb_dev->wusb_cap_descr));
-			else
-				wusb_dev->wusb_cap_descr = itr;
-			break;
-		default:
-			dev_err(dev, "BUG? Unknown BOS capability 0x%02x "
-				"(%zu bytes) at offset 0x%02x\n", cap_type,
-				cap_size, (int)(itr - (void *)bos));
-		}
-		itr += cap_size;
-	}
-	result = 0;
-error_bad_cap:
-	return result;
-}
-
-/*
- * Add information from the BOS descriptors to the device
- *
- * @usb_dev: referenced
- * @wusb_dev: referenced and unlocked
- *
- * So what we do is we alloc a space for the BOS descriptor of 64
- * bytes; read the first four bytes which include the wTotalLength
- * field (WUSB1.0[T7-26]) and if it fits in those 64 bytes, read the
- * whole thing. If not we realloc to that size.
- *
- * Then we call the groking function, that will fill up
- * wusb_dev->wusb_cap_descr, which is what we'll need later on.
- */
-static int wusb_dev_bos_add(struct usb_device *usb_dev,
-			    struct wusb_dev *wusb_dev)
-{
-	ssize_t result;
-	struct device *dev = &usb_dev->dev;
-	struct usb_bos_descriptor *bos;
-	size_t alloc_size = 32, desc_size = 4;
-
-	bos = kmalloc(alloc_size, GFP_KERNEL);
-	if (bos == NULL)
-		return -ENOMEM;
-	result = usb_get_descriptor(usb_dev, USB_DT_BOS, 0, bos, desc_size);
-	if (result < 4) {
-		dev_err(dev, "Can't get BOS descriptor or too short: %zd\n",
-			result);
-		goto error_get_descriptor;
-	}
-	desc_size = le16_to_cpu(bos->wTotalLength);
-	if (desc_size >= alloc_size) {
-		kfree(bos);
-		alloc_size = desc_size;
-		bos = kmalloc(alloc_size, GFP_KERNEL);
-		if (bos == NULL)
-			return -ENOMEM;
-	}
-	result = usb_get_descriptor(usb_dev, USB_DT_BOS, 0, bos, desc_size);
-	if (result < 0 || result != desc_size) {
-		dev_err(dev, "Can't get  BOS descriptor or too short (need "
-			"%zu bytes): %zd\n", desc_size, result);
-		goto error_get_descriptor;
-	}
-	if (result < sizeof(*bos)
-	    || le16_to_cpu(bos->wTotalLength) != desc_size) {
-		dev_err(dev, "Can't get  BOS descriptor or too short (need "
-			"%zu bytes): %zd\n", desc_size, result);
-		goto error_get_descriptor;
-	}
-
-	result = wusb_dev_bos_grok(usb_dev, wusb_dev, bos, result);
-	if (result < 0)
-		goto error_bad_bos;
-	wusb_dev->bos = bos;
-	return 0;
-
-error_bad_bos:
-error_get_descriptor:
-	kfree(bos);
-	wusb_dev->wusb_cap_descr = NULL;
-	return result;
-}
-
-static void wusb_dev_bos_rm(struct wusb_dev *wusb_dev)
-{
-	kfree(wusb_dev->bos);
-	wusb_dev->wusb_cap_descr = NULL;
-};
-
-/*
- * USB stack's device addition Notifier Callback
- *
- * Called from drivers/usb/core/hub.c when a new device is added; we
- * use this hook to perform certain WUSB specific setup work on the
- * new device. As well, it is the first time we can connect the
- * wusb_dev and the usb_dev. So we note it down in wusb_dev and take a
- * reference that we'll drop.
- *
- * First we need to determine if the device is a WUSB device (else we
- * ignore it). For that we use the speed setting (USB_SPEED_WIRELESS)
- * [FIXME: maybe we'd need something more definitive]. If so, we track
- * it's usb_busd and from there, the WUSB HC.
- *
- * Because all WUSB HCs are contained in a 'struct wusbhc', voila, we
- * get the wusbhc for the device.
- *
- * We have a reference on @usb_dev (as we are called at the end of its
- * enumeration).
- *
- * NOTE: @usb_dev locked
- */
-static void wusb_dev_add_ncb(struct usb_device *usb_dev)
-{
-	int result = 0;
-	struct wusb_dev *wusb_dev;
-	struct wusbhc *wusbhc;
-	struct device *dev = &usb_dev->dev;
-	u8 port_idx;
-
-	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
-		return;		/* skip non wusb and wusb RHs */
-
-	usb_set_device_state(usb_dev, USB_STATE_UNAUTHENTICATED);
-
-	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
-	if (wusbhc == NULL)
-		goto error_nodev;
-	mutex_lock(&wusbhc->mutex);
-	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, usb_dev);
-	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
-	mutex_unlock(&wusbhc->mutex);
-	if (wusb_dev == NULL)
-		goto error_nodev;
-	wusb_dev->usb_dev = usb_get_dev(usb_dev);
-	usb_dev->wusb_dev = wusb_dev_get(wusb_dev);
-	result = wusb_dev_sec_add(wusbhc, usb_dev, wusb_dev);
-	if (result < 0) {
-		dev_err(dev, "Cannot enable security: %d\n", result);
-		goto error_sec_add;
-	}
-	/* Now query the device for it's BOS and attach it to wusb_dev */
-	result = wusb_dev_bos_add(usb_dev, wusb_dev);
-	if (result < 0) {
-		dev_err(dev, "Cannot get BOS descriptors: %d\n", result);
-		goto error_bos_add;
-	}
-	result = wusb_dev_sysfs_add(wusbhc, usb_dev, wusb_dev);
-	if (result < 0)
-		goto error_add_sysfs;
-out:
-	wusb_dev_put(wusb_dev);
-	wusbhc_put(wusbhc);
-error_nodev:
-	return;
-
-error_add_sysfs:
-	wusb_dev_bos_rm(wusb_dev);
-error_bos_add:
-	wusb_dev_sec_rm(wusb_dev);
-error_sec_add:
-	mutex_lock(&wusbhc->mutex);
-	__wusbhc_dev_disconnect(wusbhc, wusb_port_by_idx(wusbhc, port_idx));
-	mutex_unlock(&wusbhc->mutex);
-	goto out;
-}
-
-/*
- * Undo all the steps done at connection by the notifier callback
- *
- * NOTE: @usb_dev locked
- */
-static void wusb_dev_rm_ncb(struct usb_device *usb_dev)
-{
-	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
-
-	if (usb_dev->wusb == 0 || usb_dev->devnum == 1)
-		return;		/* skip non wusb and wusb RHs */
-
-	wusb_dev_sysfs_rm(wusb_dev);
-	wusb_dev_bos_rm(wusb_dev);
-	wusb_dev_sec_rm(wusb_dev);
-	wusb_dev->usb_dev = NULL;
-	usb_dev->wusb_dev = NULL;
-	wusb_dev_put(wusb_dev);
-	usb_put_dev(usb_dev);
-}
-
-/*
- * Handle notifications from the USB stack (notifier call back)
- *
- * This is called when the USB stack does a
- * usb_{bus,device}_{add,remove}() so we can do WUSB specific
- * handling. It is called with [for the case of
- * USB_DEVICE_{ADD,REMOVE} with the usb_dev locked.
- */
-int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
-		 void *priv)
-{
-	int result = NOTIFY_OK;
-
-	switch (val) {
-	case USB_DEVICE_ADD:
-		wusb_dev_add_ncb(priv);
-		break;
-	case USB_DEVICE_REMOVE:
-		wusb_dev_rm_ncb(priv);
-		break;
-	case USB_BUS_ADD:
-		/* ignore (for now) */
-	case USB_BUS_REMOVE:
-		break;
-	default:
-		WARN_ON(1);
-		result = NOTIFY_BAD;
-	}
-	return result;
-}
-
-/*
- * Return a referenced wusb_dev given a @wusbhc and @usb_dev
- */
-struct wusb_dev *__wusb_dev_get_by_usb_dev(struct wusbhc *wusbhc,
-					   struct usb_device *usb_dev)
-{
-	struct wusb_dev *wusb_dev;
-	u8 port_idx;
-
-	port_idx = wusb_port_no_to_idx(usb_dev->portnum);
-	BUG_ON(port_idx > wusbhc->ports_max);
-	wusb_dev = wusb_port_by_idx(wusbhc, port_idx)->wusb_dev;
-	if (wusb_dev != NULL)		/* ops, device is gone */
-		wusb_dev_get(wusb_dev);
-	return wusb_dev;
-}
-EXPORT_SYMBOL_GPL(__wusb_dev_get_by_usb_dev);
-
-void wusb_dev_destroy(struct kref *_wusb_dev)
-{
-	struct wusb_dev *wusb_dev = container_of(_wusb_dev, struct wusb_dev, refcnt);
-
-	list_del_init(&wusb_dev->cack_node);
-	wusb_dev_free(wusb_dev);
-}
-EXPORT_SYMBOL_GPL(wusb_dev_destroy);
-
-/*
- * Create all the device connect handling infrastructure
- *
- * This is basically the device info array, Connect Acknowledgement
- * (cack) lists, keep-alive timers (and delayed work thread).
- */
-int wusbhc_devconnect_create(struct wusbhc *wusbhc)
-{
-	wusbhc->keep_alive_ie.hdr.bIEIdentifier = WUIE_ID_KEEP_ALIVE;
-	wusbhc->keep_alive_ie.hdr.bLength = sizeof(wusbhc->keep_alive_ie.hdr);
-	INIT_DELAYED_WORK(&wusbhc->keep_alive_timer, wusbhc_keep_alive_run);
-
-	wusbhc->cack_ie.hdr.bIEIdentifier = WUIE_ID_CONNECTACK;
-	wusbhc->cack_ie.hdr.bLength = sizeof(wusbhc->cack_ie.hdr);
-	INIT_LIST_HEAD(&wusbhc->cack_list);
-
-	return 0;
-}
-
-/*
- * Release all resources taken by the devconnect stuff
- */
-void wusbhc_devconnect_destroy(struct wusbhc *wusbhc)
-{
-	/* no op */
-}
-
-/*
- * wusbhc_devconnect_start - start accepting device connections
- * @wusbhc: the WUSB HC
- *
- * Sets the Host Info IE to accept all new connections.
- *
- * FIXME: This also enables the keep alives but this is not necessary
- * until there are connected and authenticated devices.
- */
-int wusbhc_devconnect_start(struct wusbhc *wusbhc)
-{
-	struct device *dev = wusbhc->dev;
-	struct wuie_host_info *hi;
-	int result;
-
-	hi = kzalloc(sizeof(*hi), GFP_KERNEL);
-	if (hi == NULL)
-		return -ENOMEM;
-
-	hi->hdr.bLength       = sizeof(*hi);
-	hi->hdr.bIEIdentifier = WUIE_ID_HOST_INFO;
-	hi->attributes        = cpu_to_le16((wusbhc->rsv->stream << 3) | WUIE_HI_CAP_ALL);
-	hi->CHID              = wusbhc->chid;
-	result = wusbhc_mmcie_set(wusbhc, 0, 0, &hi->hdr);
-	if (result < 0) {
-		dev_err(dev, "Cannot add Host Info MMCIE: %d\n", result);
-		goto error_mmcie_set;
-	}
-	wusbhc->wuie_host_info = hi;
-
-	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
-			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
-
-	return 0;
-
-error_mmcie_set:
-	kfree(hi);
-	return result;
-}
-
-/*
- * wusbhc_devconnect_stop - stop managing connected devices
- * @wusbhc: the WUSB HC
- *
- * Disconnects any devices still connected, stops the keep alives and
- * removes the Host Info IE.
- */
-void wusbhc_devconnect_stop(struct wusbhc *wusbhc)
-{
-	int i;
-
-	mutex_lock(&wusbhc->mutex);
-	for (i = 0; i < wusbhc->ports_max; i++) {
-		if (wusbhc->port[i].wusb_dev)
-			__wusbhc_dev_disconnect(wusbhc, &wusbhc->port[i]);
-	}
-	mutex_unlock(&wusbhc->mutex);
-
-	cancel_delayed_work_sync(&wusbhc->keep_alive_timer);
-	wusbhc_mmcie_rm(wusbhc, &wusbhc->wuie_host_info->hdr);
-	kfree(wusbhc->wuie_host_info);
-	wusbhc->wuie_host_info = NULL;
-}
-
-/*
- * wusb_set_dev_addr - set the WUSB device address used by the host
- * @wusbhc: the WUSB HC the device is connect to
- * @wusb_dev: the WUSB device
- * @addr: new device address
- */
-int wusb_set_dev_addr(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev, u8 addr)
-{
-	int result;
-
-	wusb_dev->addr = addr;
-	result = wusbhc->dev_info_set(wusbhc, wusb_dev);
-	if (result < 0)
-		dev_err(wusbhc->dev, "device %d: failed to set device "
-			"address\n", wusb_dev->port_idx);
-	else
-		dev_info(wusbhc->dev, "device %d: %s addr %u\n",
-			 wusb_dev->port_idx,
-			 (addr & WUSB_DEV_ADDR_UNAUTH) ? "unauth" : "auth",
-			 wusb_dev->addr);
-
-	return result;
-}
diff --git a/drivers/usb/wusbcore/mmc.c b/drivers/usb/wusbcore/mmc.c
deleted file mode 100644
index acce0d551eb2..000000000000
--- a/drivers/usb/wusbcore/mmc.c
+++ /dev/null
@@ -1,303 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
- * MMC (Microscheduled Management Command) handling
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * WUIEs and MMC IEs...well, they are almost the same at the end. MMC
- * IEs are Wireless USB IEs that go into the MMC period...[what is
- * that? look in Design-overview.txt].
- *
- *
- * This is a simple subsystem to keep track of which IEs are being
- * sent by the host in the MMC period.
- *
- * For each WUIE we ask to send, we keep it in an array, so we can
- * request its removal later, or replace the content. They are tracked
- * by pointer, so be sure to use the same pointer if you want to
- * remove it or update the contents.
- *
- * FIXME:
- *  - add timers that autoremove intervalled IEs?
- */
-#include <linux/usb/wusb.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include "wusbhc.h"
-
-/* Initialize the MMCIEs handling mechanism */
-int wusbhc_mmcie_create(struct wusbhc *wusbhc)
-{
-	u8 mmcies = wusbhc->mmcies_max;
-	wusbhc->mmcie = kcalloc(mmcies, sizeof(wusbhc->mmcie[0]), GFP_KERNEL);
-	if (wusbhc->mmcie == NULL)
-		return -ENOMEM;
-	mutex_init(&wusbhc->mmcie_mutex);
-	return 0;
-}
-
-/* Release resources used by the MMCIEs handling mechanism */
-void wusbhc_mmcie_destroy(struct wusbhc *wusbhc)
-{
-	kfree(wusbhc->mmcie);
-}
-
-/*
- * Add or replace an MMC Wireless USB IE.
- *
- * @interval:    See WUSB1.0[8.5.3.1]
- * @repeat_cnt:  See WUSB1.0[8.5.3.1]
- * @handle:      See WUSB1.0[8.5.3.1]
- * @wuie:        Pointer to the header of the WUSB IE data to add.
- *               MUST BE allocated in a kmalloc buffer (no stack or
- *               vmalloc).
- *               THE CALLER ALWAYS OWNS THE POINTER (we don't free it
- *               on remove, we just forget about it).
- * @returns:     0 if ok, < 0 errno code on error.
- *
- * Goes over the *whole* @wusbhc->mmcie array looking for (a) the
- * first free spot and (b) if @wuie is already in the array (aka:
- * transmitted in the MMCs) the spot were it is.
- *
- * If present, we "overwrite it" (update).
- *
- *
- * NOTE: Need special ordering rules -- see below WUSB1.0 Table 7-38.
- *       The host uses the handle as the 'sort' index. We
- *       allocate the last one always for the WUIE_ID_HOST_INFO, and
- *       the rest, first come first serve in inverse order.
- *
- *       Host software must make sure that it adds the other IEs in
- *       the right order... the host hardware is responsible for
- *       placing the WCTA IEs in the right place with the other IEs
- *       set by host software.
- *
- * NOTE: we can access wusbhc->wa_descr without locking because it is
- *       read only.
- */
-int wusbhc_mmcie_set(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
-		     struct wuie_hdr *wuie)
-{
-	int result = -ENOBUFS;
-	unsigned handle, itr;
-
-	/* Search a handle, taking into account the ordering */
-	mutex_lock(&wusbhc->mmcie_mutex);
-	switch (wuie->bIEIdentifier) {
-	case WUIE_ID_HOST_INFO:
-		/* Always last */
-		handle = wusbhc->mmcies_max - 1;
-		break;
-	case WUIE_ID_ISOCH_DISCARD:
-		dev_err(wusbhc->dev, "Special ordering case for WUIE ID 0x%x "
-			"unimplemented\n", wuie->bIEIdentifier);
-		result = -ENOSYS;
-		goto error_unlock;
-	default:
-		/* search for it or find the last empty slot */
-		handle = ~0;
-		for (itr = 0; itr < wusbhc->mmcies_max - 1; itr++) {
-			if (wusbhc->mmcie[itr] == wuie) {
-				handle = itr;
-				break;
-			}
-			if (wusbhc->mmcie[itr] == NULL)
-				handle = itr;
-		}
-		if (handle == ~0)
-			goto error_unlock;
-	}
-	result = (wusbhc->mmcie_add)(wusbhc, interval, repeat_cnt, handle,
-				     wuie);
-	if (result >= 0)
-		wusbhc->mmcie[handle] = wuie;
-error_unlock:
-	mutex_unlock(&wusbhc->mmcie_mutex);
-	return result;
-}
-EXPORT_SYMBOL_GPL(wusbhc_mmcie_set);
-
-/*
- * Remove an MMC IE previously added with wusbhc_mmcie_set()
- *
- * @wuie	Pointer used to add the WUIE
- */
-void wusbhc_mmcie_rm(struct wusbhc *wusbhc, struct wuie_hdr *wuie)
-{
-	int result;
-	unsigned handle, itr;
-
-	mutex_lock(&wusbhc->mmcie_mutex);
-	for (itr = 0; itr < wusbhc->mmcies_max; itr++) {
-		if (wusbhc->mmcie[itr] == wuie) {
-			handle = itr;
-			goto found;
-		}
-	}
-	mutex_unlock(&wusbhc->mmcie_mutex);
-	return;
-
-found:
-	result = (wusbhc->mmcie_rm)(wusbhc, handle);
-	if (result == 0)
-		wusbhc->mmcie[itr] = NULL;
-	mutex_unlock(&wusbhc->mmcie_mutex);
-}
-EXPORT_SYMBOL_GPL(wusbhc_mmcie_rm);
-
-static int wusbhc_mmc_start(struct wusbhc *wusbhc)
-{
-	int ret;
-
-	mutex_lock(&wusbhc->mutex);
-	ret = wusbhc->start(wusbhc);
-	if (ret >= 0)
-		wusbhc->active = 1;
-	mutex_unlock(&wusbhc->mutex);
-
-	return ret;
-}
-
-static void wusbhc_mmc_stop(struct wusbhc *wusbhc)
-{
-	mutex_lock(&wusbhc->mutex);
-	wusbhc->active = 0;
-	wusbhc->stop(wusbhc, WUSB_CHANNEL_STOP_DELAY_MS);
-	mutex_unlock(&wusbhc->mutex);
-}
-
-/*
- * wusbhc_start - start transmitting MMCs and accepting connections
- * @wusbhc: the HC to start
- *
- * Establishes a cluster reservation, enables device connections, and
- * starts MMCs with appropriate DNTS parameters.
- */
-int wusbhc_start(struct wusbhc *wusbhc)
-{
-	int result;
-	struct device *dev = wusbhc->dev;
-
-	WARN_ON(wusbhc->wuie_host_info != NULL);
-	BUG_ON(wusbhc->uwb_rc == NULL);
-
-	result = wusbhc_rsv_establish(wusbhc);
-	if (result < 0) {
-		dev_err(dev, "cannot establish cluster reservation: %d\n",
-			result);
-		goto error_rsv_establish;
-	}
-
-	result = wusbhc_devconnect_start(wusbhc);
-	if (result < 0) {
-		dev_err(dev, "error enabling device connections: %d\n",
-			result);
-		goto error_devconnect_start;
-	}
-
-	result = wusbhc_sec_start(wusbhc);
-	if (result < 0) {
-		dev_err(dev, "error starting security in the HC: %d\n",
-			result);
-		goto error_sec_start;
-	}
-
-	result = wusbhc->set_num_dnts(wusbhc, wusbhc->dnts_interval,
-		wusbhc->dnts_num_slots);
-	if (result < 0) {
-		dev_err(dev, "Cannot set DNTS parameters: %d\n", result);
-		goto error_set_num_dnts;
-	}
-	result = wusbhc_mmc_start(wusbhc);
-	if (result < 0) {
-		dev_err(dev, "error starting wusbch: %d\n", result);
-		goto error_wusbhc_start;
-	}
-
-	return 0;
-
-error_wusbhc_start:
-	wusbhc_sec_stop(wusbhc);
-error_set_num_dnts:
-error_sec_start:
-	wusbhc_devconnect_stop(wusbhc);
-error_devconnect_start:
-	wusbhc_rsv_terminate(wusbhc);
-error_rsv_establish:
-	return result;
-}
-
-/*
- * wusbhc_stop - stop transmitting MMCs
- * @wusbhc: the HC to stop
- *
- * Stops the WUSB channel and removes the cluster reservation.
- */
-void wusbhc_stop(struct wusbhc *wusbhc)
-{
-	wusbhc_mmc_stop(wusbhc);
-	wusbhc_sec_stop(wusbhc);
-	wusbhc_devconnect_stop(wusbhc);
-	wusbhc_rsv_terminate(wusbhc);
-}
-
-/*
- * Set/reset/update a new CHID
- *
- * Depending on the previous state of the MMCs, start, stop or change
- * the sent MMC. This effectively switches the host controller on and
- * off (radio wise).
- */
-int wusbhc_chid_set(struct wusbhc *wusbhc, const struct wusb_ckhdid *chid)
-{
-	int result = 0;
-
-	if (memcmp(chid, &wusb_ckhdid_zero, sizeof(*chid)) == 0)
-		chid = NULL;
-
-	mutex_lock(&wusbhc->mutex);
-	if (chid) {
-		if (wusbhc->active) {
-			mutex_unlock(&wusbhc->mutex);
-			return -EBUSY;
-		}
-		wusbhc->chid = *chid;
-	}
-
-	/* register with UWB if we haven't already since we are about to start
-	    the radio. */
-	if ((chid) && (wusbhc->uwb_rc == NULL)) {
-		wusbhc->uwb_rc = uwb_rc_get_by_grandpa(wusbhc->dev->parent);
-		if (wusbhc->uwb_rc == NULL) {
-			result = -ENODEV;
-			dev_err(wusbhc->dev,
-				"Cannot get associated UWB Host Controller\n");
-			goto error_rc_get;
-		}
-
-		result = wusbhc_pal_register(wusbhc);
-		if (result < 0) {
-			dev_err(wusbhc->dev, "Cannot register as a UWB PAL\n");
-			goto error_pal_register;
-		}
-	}
-	mutex_unlock(&wusbhc->mutex);
-
-	if (chid)
-		result = uwb_radio_start(&wusbhc->pal);
-	else if (wusbhc->uwb_rc)
-		uwb_radio_stop(&wusbhc->pal);
-
-	return result;
-
-error_pal_register:
-	uwb_rc_put(wusbhc->uwb_rc);
-	wusbhc->uwb_rc = NULL;
-error_rc_get:
-	mutex_unlock(&wusbhc->mutex);
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(wusbhc_chid_set);
diff --git a/drivers/usb/wusbcore/pal.c b/drivers/usb/wusbcore/pal.c
deleted file mode 100644
index 30f569131471..000000000000
--- a/drivers/usb/wusbcore/pal.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Host Controller
- * UWB Protocol Adaptation Layer (PAL) glue.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include "wusbhc.h"
-
-static void wusbhc_channel_changed(struct uwb_pal *pal, int channel)
-{
-	struct wusbhc *wusbhc = container_of(pal, struct wusbhc, pal);
-
-	dev_dbg(wusbhc->dev, "%s: channel = %d\n", __func__, channel);
-	if (channel < 0)
-		wusbhc_stop(wusbhc);
-	else
-		wusbhc_start(wusbhc);
-}
-
-/**
- * wusbhc_pal_register - register the WUSB HC as a UWB PAL
- * @wusbhc: the WUSB HC
- */
-int wusbhc_pal_register(struct wusbhc *wusbhc)
-{
-	uwb_pal_init(&wusbhc->pal);
-
-	wusbhc->pal.name   = "wusbhc";
-	wusbhc->pal.device = wusbhc->usb_hcd.self.controller;
-	wusbhc->pal.rc     = wusbhc->uwb_rc;
-	wusbhc->pal.channel_changed = wusbhc_channel_changed;
-
-	return uwb_pal_register(&wusbhc->pal);
-}
-
-/**
- * wusbhc_pal_unregister - unregister the WUSB HC as a UWB PAL
- * @wusbhc: the WUSB HC
- */
-void wusbhc_pal_unregister(struct wusbhc *wusbhc)
-{
-	if (wusbhc->uwb_rc)
-		uwb_pal_unregister(&wusbhc->pal);
-}
diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c
deleted file mode 100644
index 6dcfc6825f55..000000000000
--- a/drivers/usb/wusbcore/reservation.c
+++ /dev/null
@@ -1,110 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB cluster reservation management
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/uwb.h>
-
-#include "wusbhc.h"
-
-/*
- * WUSB cluster reservations are multicast reservations with the
- * broadcast cluster ID (BCID) as the target DevAddr.
- *
- * FIXME: consider adjusting the reservation depending on what devices
- * are attached.
- */
-
-static int wusbhc_bwa_set(struct wusbhc *wusbhc, u8 stream,
-	const struct uwb_mas_bm *mas)
-{
-	if (mas == NULL)
-		mas = &uwb_mas_bm_zero;
-	return wusbhc->bwa_set(wusbhc, stream, mas);
-}
-
-/**
- * wusbhc_rsv_complete_cb - WUSB HC reservation complete callback
- * @rsv:    the reservation
- *
- * Either set or clear the HC's view of the reservation.
- *
- * FIXME: when a reservation is denied the HC should be stopped.
- */
-static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv)
-{
-	struct wusbhc *wusbhc = rsv->pal_priv;
-	struct device *dev = wusbhc->dev;
-	struct uwb_mas_bm mas;
-
-	dev_dbg(dev, "%s: state = %d\n", __func__, rsv->state);
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		uwb_rsv_get_usable_mas(rsv, &mas);
-		dev_dbg(dev, "established reservation: %*pb\n",
-			UWB_NUM_MAS, mas.bm);
-		wusbhc_bwa_set(wusbhc, rsv->stream, &mas);
-		break;
-	case UWB_RSV_STATE_NONE:
-		dev_dbg(dev, "removed reservation\n");
-		wusbhc_bwa_set(wusbhc, 0, NULL);
-		break;
-	default:
-		dev_dbg(dev, "unexpected reservation state: %d\n", rsv->state);
-		break;
-	}
-}
-
-
-/**
- * wusbhc_rsv_establish - establish a reservation for the cluster
- * @wusbhc: the WUSB HC requesting a bandwidth reservation
- */
-int wusbhc_rsv_establish(struct wusbhc *wusbhc)
-{
-	struct uwb_rc *rc = wusbhc->uwb_rc;
-	struct uwb_rsv *rsv;
-	struct uwb_dev_addr bcid;
-	int ret;
-
-	if (rc == NULL)
-		return -ENODEV;
-
-	rsv = uwb_rsv_create(rc, wusbhc_rsv_complete_cb, wusbhc);
-	if (rsv == NULL)
-		return -ENOMEM;
-
-	bcid.data[0] = wusbhc->cluster_id;
-	bcid.data[1] = 0;
-
-	rsv->target.type = UWB_RSV_TARGET_DEVADDR;
-	rsv->target.devaddr = bcid;
-	rsv->type = UWB_DRP_TYPE_PRIVATE;
-	rsv->max_mas = 256; /* try to get as much as possible */
-	rsv->min_mas = 15;  /* one MAS per zone */
-	rsv->max_interval = 1; /* max latency is one zone */
-	rsv->is_multicast = true;
-
-	ret = uwb_rsv_establish(rsv);
-	if (ret == 0)
-		wusbhc->rsv = rsv;
-	else
-		uwb_rsv_destroy(rsv);
-	return ret;
-}
-
-
-/**
- * wusbhc_rsv_terminate - terminate the cluster reservation
- * @wusbhc: the WUSB host whose reservation is to be terminated
- */
-void wusbhc_rsv_terminate(struct wusbhc *wusbhc)
-{
-	if (wusbhc->rsv) {
-		uwb_rsv_terminate(wusbhc->rsv);
-		uwb_rsv_destroy(wusbhc->rsv);
-		wusbhc->rsv = NULL;
-	}
-}
diff --git a/drivers/usb/wusbcore/rh.c b/drivers/usb/wusbcore/rh.c
deleted file mode 100644
index 20c08cd9dcbf..000000000000
--- a/drivers/usb/wusbcore/rh.c
+++ /dev/null
@@ -1,426 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Host Controller
- * Root Hub operations
- *
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * We fake a root hub that has fake ports (as many as simultaneous
- * devices the Wireless USB Host Controller can deal with). For each
- * port we keep an state in @wusbhc->port[index] identical to the one
- * specified in the USB2.0[ch11] spec and some extra device
- * information that complements the one in 'struct usb_device' (as
- * this lacs a hcpriv pointer).
- *
- * Note this is common to WHCI and HWA host controllers.
- *
- * Through here we enable most of the state changes that the USB stack
- * will use to connect or disconnect devices. We need to do some
- * forced adaptation of Wireless USB device states vs. wired:
- *
- *        USB:                 WUSB:
- *
- * Port   Powered-off          port slot n/a
- *        Powered-on           port slot available
- *        Disconnected         port slot available
- *        Connected            port slot assigned device
- *        		       device sent DN_Connect
- *                             device was authenticated
- *        Enabled              device is authenticated, transitioned
- *                             from unauth -> auth -> default address
- *                             -> enabled
- *        Reset                disconnect
- *        Disable              disconnect
- *
- * This maps the standard USB port states with the WUSB device states
- * so we can fake ports without having to modify the USB stack.
- *
- * FIXME: this process will change in the future
- *
- *
- * ENTRY POINTS
- *
- * Our entry points into here are, as in hcd.c, the USB stack root hub
- * ops defined in the usb_hcd struct:
- *
- * wusbhc_rh_status_data()	Provide hub and port status data bitmap
- *
- * wusbhc_rh_control()          Execution of all the major requests
- *                              you can do to a hub (Set|Clear
- *                              features, get descriptors, status, etc).
- *
- * wusbhc_rh_[suspend|resume]() That
- *
- * wusbhc_rh_start_port_reset() ??? unimplemented
- */
-#include <linux/slab.h>
-#include <linux/export.h>
-#include "wusbhc.h"
-
-/*
- * Reset a fake port
- *
- * Using a Reset Device IE is too heavyweight as it causes the device
- * to enter the UnConnected state and leave the cluster, this can mean
- * that when the device reconnects it is connected to a different fake
- * port.
- *
- * Instead, reset authenticated devices with a SetAddress(0), followed
- * by a SetAddresss(AuthAddr).
- *
- * For unauthenticated devices just pretend to reset but do nothing.
- * If the device initialization continues to fail it will eventually
- * time out after TrustTimeout and enter the UnConnected state.
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- *
- * Supposedly we are the only thread accesing @wusbhc->port; in any
- * case, maybe we should move the mutex locking from
- * wusbhc_devconnect_auth() to here.
- *
- * @port_idx refers to the wusbhc's port index, not the USB port number
- */
-static int wusbhc_rh_port_reset(struct wusbhc *wusbhc, u8 port_idx)
-{
-	int result = 0;
-	struct wusb_port *port = wusb_port_by_idx(wusbhc, port_idx);
-	struct wusb_dev *wusb_dev = port->wusb_dev;
-
-	if (wusb_dev == NULL)
-		return -ENOTCONN;
-
-	port->status |= USB_PORT_STAT_RESET;
-	port->change |= USB_PORT_STAT_C_RESET;
-
-	if (wusb_dev->addr & WUSB_DEV_ADDR_UNAUTH)
-		result = 0;
-	else
-		result = wusb_dev_update_address(wusbhc, wusb_dev);
-
-	port->status &= ~USB_PORT_STAT_RESET;
-	port->status |= USB_PORT_STAT_ENABLE;
-	port->change |= USB_PORT_STAT_C_RESET | USB_PORT_STAT_C_ENABLE;	
-
-	return result;
-}
-
-/*
- * Return the hub change status bitmap
- *
- * The bits in the change status bitmap are cleared when a
- * ClearPortFeature request is issued (USB2.0[11.12.3,11.12.4].
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- *
- * WARNING!! This gets called from atomic context; we cannot get the
- *           mutex--the only race condition we can find is some bit
- *           changing just after we copy it, which shouldn't be too
- *           big of a problem [and we can't make it an spinlock
- *           because other parts need to take it and sleep] .
- *
- *           @usb_hcd is refcounted, so it won't disappear under us
- *           and before killing a host, the polling of the root hub
- *           would be stopped anyway.
- */
-int wusbhc_rh_status_data(struct usb_hcd *usb_hcd, char *_buf)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	size_t cnt, size, bits_set = 0;
-
-	/* WE DON'T LOCK, see comment */
-	/* round up to bytes.  Hub bit is bit 0 so add 1. */
-	size = DIV_ROUND_UP(wusbhc->ports_max + 1, 8);
-
-	/* clear the output buffer. */
-	memset(_buf, 0, size);
-	/* set the bit for each changed port. */
-	for (cnt = 0; cnt < wusbhc->ports_max; cnt++) {
-
-		if (wusb_port_by_idx(wusbhc, cnt)->change) {
-			const int bitpos = cnt+1;
-
-			_buf[bitpos/8] |= (1 << (bitpos % 8));
-			bits_set++;
-		}
-	}
-
-	return bits_set ? size : 0;
-}
-EXPORT_SYMBOL_GPL(wusbhc_rh_status_data);
-
-/*
- * Return the hub's descriptor
- *
- * NOTE: almost cut and paste from ehci-hub.c
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked
- */
-static int wusbhc_rh_get_hub_descr(struct wusbhc *wusbhc, u16 wValue,
-				   u16 wIndex,
-				   struct usb_hub_descriptor *descr,
-				   u16 wLength)
-{
-	u16 temp = 1 + (wusbhc->ports_max / 8);
-	u8 length = 7 + 2 * temp;
-
-	if (wLength < length)
-		return -ENOSPC;
-	descr->bDescLength = 7 + 2 * temp;
-	descr->bDescriptorType = USB_DT_HUB; /* HUB type */
-	descr->bNbrPorts = wusbhc->ports_max;
-	descr->wHubCharacteristics = cpu_to_le16(
-		HUB_CHAR_COMMON_LPSM	/* All ports power at once */
-		| 0x00			/* not part of compound device */
-		| HUB_CHAR_NO_OCPM	/* No overcurrent protection */
-		| 0x00			/* 8 FS think time FIXME ?? */
-		| 0x00);		/* No port indicators */
-	descr->bPwrOn2PwrGood = 0;
-	descr->bHubContrCurrent = 0;
-	/* two bitmaps:  ports removable, and usb 1.0 legacy PortPwrCtrlMask */
-	memset(&descr->u.hs.DeviceRemovable[0], 0, temp);
-	memset(&descr->u.hs.DeviceRemovable[temp], 0xff, temp);
-	return 0;
-}
-
-/*
- * Clear a hub feature
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- *
- * Nothing to do, so no locking needed ;)
- */
-static int wusbhc_rh_clear_hub_feat(struct wusbhc *wusbhc, u16 feature)
-{
-	int result;
-
-	switch (feature) {
-	case C_HUB_LOCAL_POWER:
-		/* FIXME: maybe plug bit 0 to the power input status,
-		 * if any?
-		 * see wusbhc_rh_get_hub_status() */
-	case C_HUB_OVER_CURRENT:
-		result = 0;
-		break;
-	default:
-		result = -EPIPE;
-	}
-	return result;
-}
-
-/*
- * Return hub status (it is always zero...)
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- *
- * Nothing to do, so no locking needed ;)
- */
-static int wusbhc_rh_get_hub_status(struct wusbhc *wusbhc, u32 *buf,
-				    u16 wLength)
-{
-	/* FIXME: maybe plug bit 0 to the power input status (if any)? */
-	*buf = 0;
-	return 0;
-}
-
-/*
- * Set a port feature
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- */
-static int wusbhc_rh_set_port_feat(struct wusbhc *wusbhc, u16 feature,
-				   u8 selector, u8 port_idx)
-{
-	struct device *dev = wusbhc->dev;
-
-	if (port_idx > wusbhc->ports_max)
-		return -EINVAL;
-
-	switch (feature) {
-		/* According to USB2.0[11.24.2.13]p2, these features
-		 * are not required to be implemented. */
-	case USB_PORT_FEAT_C_OVER_CURRENT:
-	case USB_PORT_FEAT_C_ENABLE:
-	case USB_PORT_FEAT_C_SUSPEND:
-	case USB_PORT_FEAT_C_CONNECTION:
-	case USB_PORT_FEAT_C_RESET:
-		return 0;
-	case USB_PORT_FEAT_POWER:
-		/* No such thing, but we fake it works */
-		mutex_lock(&wusbhc->mutex);
-		wusb_port_by_idx(wusbhc, port_idx)->status |= USB_PORT_STAT_POWER;
-		mutex_unlock(&wusbhc->mutex);
-		return 0;
-	case USB_PORT_FEAT_RESET:
-		return wusbhc_rh_port_reset(wusbhc, port_idx);
-	case USB_PORT_FEAT_ENABLE:
-	case USB_PORT_FEAT_SUSPEND:
-		dev_err(dev, "(port_idx %d) set feat %d/%d UNIMPLEMENTED\n",
-			port_idx, feature, selector);
-		return -ENOSYS;
-	default:
-		dev_err(dev, "(port_idx %d) set feat %d/%d UNKNOWN\n",
-			port_idx, feature, selector);
-		return -EPIPE;
-	}
-
-	return 0;
-}
-
-/*
- * Clear a port feature...
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- */
-static int wusbhc_rh_clear_port_feat(struct wusbhc *wusbhc, u16 feature,
-				     u8 selector, u8 port_idx)
-{
-	int result = 0;
-	struct device *dev = wusbhc->dev;
-
-	if (port_idx > wusbhc->ports_max)
-		return -EINVAL;
-
-	mutex_lock(&wusbhc->mutex);
-	switch (feature) {
-	case USB_PORT_FEAT_POWER:	/* fake port always on */
-		/* According to USB2.0[11.24.2.7.1.4], no need to implement? */
-	case USB_PORT_FEAT_C_OVER_CURRENT:
-		break;
-	case USB_PORT_FEAT_C_RESET:
-		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_RESET;
-		break;
-	case USB_PORT_FEAT_C_CONNECTION:
-		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_CONNECTION;
-		break;
-	case USB_PORT_FEAT_ENABLE:
-		__wusbhc_dev_disable(wusbhc, port_idx);
-		break;
-	case USB_PORT_FEAT_C_ENABLE:
-		wusb_port_by_idx(wusbhc, port_idx)->change &= ~USB_PORT_STAT_C_ENABLE;
-		break;
-	case USB_PORT_FEAT_SUSPEND:
-	case USB_PORT_FEAT_C_SUSPEND:
-		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNIMPLEMENTED\n",
-			port_idx, feature, selector);
-		result = -ENOSYS;
-		break;
-	default:
-		dev_err(dev, "(port_idx %d) Clear feat %d/%d UNKNOWN\n",
-			port_idx, feature, selector);
-		result = -EPIPE;
-		break;
-	}
-	mutex_unlock(&wusbhc->mutex);
-
-	return result;
-}
-
-/*
- * Return the port's status
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- */
-static int wusbhc_rh_get_port_status(struct wusbhc *wusbhc, u16 port_idx,
-				     u32 *_buf, u16 wLength)
-{
-	__le16 *buf = (__le16 *)_buf;
-
-	if (port_idx > wusbhc->ports_max)
-		return -EINVAL;
-
-	mutex_lock(&wusbhc->mutex);
-	buf[0] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->status);
-	buf[1] = cpu_to_le16(wusb_port_by_idx(wusbhc, port_idx)->change);
-	mutex_unlock(&wusbhc->mutex);
-
-	return 0;
-}
-
-/*
- * Entry point for Root Hub operations
- *
- * @wusbhc is assumed referenced and @wusbhc->mutex unlocked.
- */
-int wusbhc_rh_control(struct usb_hcd *usb_hcd, u16 reqntype, u16 wValue,
-		      u16 wIndex, char *buf, u16 wLength)
-{
-	int result = -ENOSYS;
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-
-	switch (reqntype) {
-	case GetHubDescriptor:
-		result = wusbhc_rh_get_hub_descr(
-			wusbhc, wValue, wIndex,
-			(struct usb_hub_descriptor *) buf, wLength);
-		break;
-	case ClearHubFeature:
-		result = wusbhc_rh_clear_hub_feat(wusbhc, wValue);
-		break;
-	case GetHubStatus:
-		result = wusbhc_rh_get_hub_status(wusbhc, (u32 *)buf, wLength);
-		break;
-
-	case SetPortFeature:
-		result = wusbhc_rh_set_port_feat(wusbhc, wValue, wIndex >> 8,
-						 (wIndex & 0xff) - 1);
-		break;
-	case ClearPortFeature:
-		result = wusbhc_rh_clear_port_feat(wusbhc, wValue, wIndex >> 8,
-						   (wIndex & 0xff) - 1);
-		break;
-	case GetPortStatus:
-		result = wusbhc_rh_get_port_status(wusbhc, wIndex - 1,
-						   (u32 *)buf, wLength);
-		break;
-
-	case SetHubFeature:
-	default:
-		dev_err(wusbhc->dev, "%s (%p [%p], %x, %x, %x, %p, %x) "
-			"UNIMPLEMENTED\n", __func__, usb_hcd, wusbhc, reqntype,
-			wValue, wIndex, buf, wLength);
-		/* dump_stack(); */
-		result = -ENOSYS;
-	}
-	return result;
-}
-EXPORT_SYMBOL_GPL(wusbhc_rh_control);
-
-int wusbhc_rh_start_port_reset(struct usb_hcd *usb_hcd, unsigned port_idx)
-{
-	struct wusbhc *wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-	dev_err(wusbhc->dev, "%s (%p [%p], port_idx %u) UNIMPLEMENTED\n",
-		__func__, usb_hcd, wusbhc, port_idx);
-	WARN_ON(1);
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(wusbhc_rh_start_port_reset);
-
-static void wusb_port_init(struct wusb_port *port)
-{
-	port->status |= USB_PORT_STAT_HIGH_SPEED;
-}
-
-/*
- * Alloc fake port specific fields and status.
- */
-int wusbhc_rh_create(struct wusbhc *wusbhc)
-{
-	int result = -ENOMEM;
-	size_t port_size, itr;
-	port_size = wusbhc->ports_max * sizeof(wusbhc->port[0]);
-	wusbhc->port = kzalloc(port_size, GFP_KERNEL);
-	if (wusbhc->port == NULL)
-		goto error_port_alloc;
-	for (itr = 0; itr < wusbhc->ports_max; itr++)
-		wusb_port_init(&wusbhc->port[itr]);
-	result = 0;
-error_port_alloc:
-	return result;
-}
-
-void wusbhc_rh_destroy(struct wusbhc *wusbhc)
-{
-	kfree(wusbhc->port);
-}
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
deleted file mode 100644
index 14ac8c98ac9e..000000000000
--- a/drivers/usb/wusbcore/security.c
+++ /dev/null
@@ -1,599 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Host Controller
- * Security support: encryption enablement, etc
- *
- * Copyright (C) 2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/usb/ch9.h>
-#include <linux/random.h>
-#include <linux/export.h>
-#include "wusbhc.h"
-#include <asm/unaligned.h>
-
-static void wusbhc_gtk_rekey_work(struct work_struct *work);
-
-int wusbhc_sec_create(struct wusbhc *wusbhc)
-{
-	/*
-	 * WQ is singlethread because we need to serialize rekey operations.
-	 * Use a separate workqueue for security operations instead of the
-	 * wusbd workqueue because security operations may need to communicate
-	 * directly with downstream wireless devices using synchronous URBs.
-	 * If a device is not responding, this could block other host
-	 * controller operations.
-	 */
-	wusbhc->wq_security = create_singlethread_workqueue("wusbd_security");
-	if (wusbhc->wq_security == NULL) {
-		pr_err("WUSB-core: Cannot create wusbd_security workqueue\n");
-		return -ENOMEM;
-	}
-
-	wusbhc->gtk.descr.bLength = sizeof(wusbhc->gtk.descr) +
-		sizeof(wusbhc->gtk.data);
-	wusbhc->gtk.descr.bDescriptorType = USB_DT_KEY;
-	wusbhc->gtk.descr.bReserved = 0;
-	wusbhc->gtk_index = 0;
-
-	INIT_WORK(&wusbhc->gtk_rekey_work, wusbhc_gtk_rekey_work);
-
-	return 0;
-}
-
-
-/* Called when the HC is destroyed */
-void wusbhc_sec_destroy(struct wusbhc *wusbhc)
-{
-	destroy_workqueue(wusbhc->wq_security);
-}
-
-
-/**
- * wusbhc_next_tkid - generate a new, currently unused, TKID
- * @wusbhc:   the WUSB host controller
- * @wusb_dev: the device whose PTK the TKID is for
- *            (or NULL for a TKID for a GTK)
- *
- * The generated TKID consists of two parts: the device's authenticated
- * address (or 0 or a GTK); and an incrementing number.  This ensures
- * that TKIDs cannot be shared between devices and by the time the
- * incrementing number wraps around the older TKIDs will no longer be
- * in use (a maximum of two keys may be active at any one time).
- */
-static u32 wusbhc_next_tkid(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	u32 *tkid;
-	u32 addr;
-
-	if (wusb_dev == NULL) {
-		tkid = &wusbhc->gtk_tkid;
-		addr = 0;
-	} else {
-		tkid = &wusb_port_by_idx(wusbhc, wusb_dev->port_idx)->ptk_tkid;
-		addr = wusb_dev->addr & 0x7f;
-	}
-
-	*tkid = (addr << 8) | ((*tkid + 1) & 0xff);
-
-	return *tkid;
-}
-
-static void wusbhc_generate_gtk(struct wusbhc *wusbhc)
-{
-	const size_t key_size = sizeof(wusbhc->gtk.data);
-	u32 tkid;
-
-	tkid = wusbhc_next_tkid(wusbhc, NULL);
-
-	wusbhc->gtk.descr.tTKID[0] = (tkid >>  0) & 0xff;
-	wusbhc->gtk.descr.tTKID[1] = (tkid >>  8) & 0xff;
-	wusbhc->gtk.descr.tTKID[2] = (tkid >> 16) & 0xff;
-
-	get_random_bytes(wusbhc->gtk.descr.bKeyData, key_size);
-}
-
-/**
- * wusbhc_sec_start - start the security management process
- * @wusbhc: the WUSB host controller
- *
- * Generate and set an initial GTK on the host controller.
- *
- * Called when the HC is started.
- */
-int wusbhc_sec_start(struct wusbhc *wusbhc)
-{
-	const size_t key_size = sizeof(wusbhc->gtk.data);
-	int result;
-
-	wusbhc_generate_gtk(wusbhc);
-
-	result = wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
-				&wusbhc->gtk.descr.bKeyData, key_size);
-	if (result < 0)
-		dev_err(wusbhc->dev, "cannot set GTK for the host: %d\n",
-			result);
-
-	return result;
-}
-
-/**
- * wusbhc_sec_stop - stop the security management process
- * @wusbhc: the WUSB host controller
- *
- * Wait for any pending GTK rekeys to stop.
- */
-void wusbhc_sec_stop(struct wusbhc *wusbhc)
-{
-	cancel_work_sync(&wusbhc->gtk_rekey_work);
-}
-
-
-/** @returns encryption type name */
-const char *wusb_et_name(u8 x)
-{
-	switch (x) {
-	case USB_ENC_TYPE_UNSECURE:	return "unsecure";
-	case USB_ENC_TYPE_WIRED:	return "wired";
-	case USB_ENC_TYPE_CCM_1:	return "CCM-1";
-	case USB_ENC_TYPE_RSA_1:	return "RSA-1";
-	default:			return "unknown";
-	}
-}
-EXPORT_SYMBOL_GPL(wusb_et_name);
-
-/*
- * Set the device encryption method
- *
- * We tell the device which encryption method to use; we do this when
- * setting up the device's security.
- */
-static int wusb_dev_set_encryption(struct usb_device *usb_dev, int value)
-{
-	int result;
-	struct device *dev = &usb_dev->dev;
-	struct wusb_dev *wusb_dev = usb_dev->wusb_dev;
-
-	if (value) {
-		value = wusb_dev->ccm1_etd.bEncryptionValue;
-	} else {
-		/* FIXME: should be wusb_dev->etd[UNSECURE].bEncryptionValue */
-		value = 0;
-	}
-	/* Set device's */
-	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
-			USB_REQ_SET_ENCRYPTION,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			value, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0)
-		dev_err(dev, "Can't set device's WUSB encryption to "
-			"%s (value %d): %d\n",
-			wusb_et_name(wusb_dev->ccm1_etd.bEncryptionType),
-			wusb_dev->ccm1_etd.bEncryptionValue,  result);
-	return result;
-}
-
-/*
- * Set the GTK to be used by a device.
- *
- * The device must be authenticated.
- */
-static int wusb_dev_set_gtk(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	struct usb_device *usb_dev = wusb_dev->usb_dev;
-	u8 key_index = wusb_key_index(wusbhc->gtk_index,
-		WUSB_KEY_INDEX_TYPE_GTK, WUSB_KEY_INDEX_ORIGINATOR_HOST);
-
-	return usb_control_msg(
-		usb_dev, usb_sndctrlpipe(usb_dev, 0),
-		USB_REQ_SET_DESCRIPTOR,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_DT_KEY << 8 | key_index, 0,
-		&wusbhc->gtk.descr, wusbhc->gtk.descr.bLength,
-		USB_CTRL_SET_TIMEOUT);
-}
-
-
-/* FIXME: prototype for adding security */
-int wusb_dev_sec_add(struct wusbhc *wusbhc,
-		     struct usb_device *usb_dev, struct wusb_dev *wusb_dev)
-{
-	int result, bytes, secd_size;
-	struct device *dev = &usb_dev->dev;
-	struct usb_security_descriptor *secd, *new_secd;
-	const struct usb_encryption_descriptor *etd, *ccm1_etd = NULL;
-	const void *itr, *top;
-	char buf[64];
-
-	secd = kmalloc(sizeof(*secd), GFP_KERNEL);
-	if (secd == NULL) {
-		result = -ENOMEM;
-		goto out;
-	}
-
-	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
-				    0, secd, sizeof(*secd));
-	if (result < (int)sizeof(*secd)) {
-		dev_err(dev, "Can't read security descriptor or "
-			"not enough data: %d\n", result);
-		goto out;
-	}
-	secd_size = le16_to_cpu(secd->wTotalLength);
-	new_secd = krealloc(secd, secd_size, GFP_KERNEL);
-	if (new_secd == NULL) {
-		dev_err(dev,
-			"Can't allocate space for security descriptors\n");
-		result = -ENOMEM;
-		goto out;
-	}
-	secd = new_secd;
-	result = usb_get_descriptor(usb_dev, USB_DT_SECURITY,
-				    0, secd, secd_size);
-	if (result < secd_size) {
-		dev_err(dev, "Can't read security descriptor or "
-			"not enough data: %d\n", result);
-		goto out;
-	}
-	bytes = 0;
-	itr = &secd[1];
-	top = (void *)secd + result;
-	while (itr < top) {
-		etd = itr;
-		if (top - itr < sizeof(*etd)) {
-			dev_err(dev, "BUG: bad device security descriptor; "
-				"not enough data (%zu vs %zu bytes left)\n",
-				top - itr, sizeof(*etd));
-			break;
-		}
-		if (etd->bLength < sizeof(*etd)) {
-			dev_err(dev, "BUG: bad device encryption descriptor; "
-				"descriptor is too short "
-				"(%u vs %zu needed)\n",
-				etd->bLength, sizeof(*etd));
-			break;
-		}
-		itr += etd->bLength;
-		bytes += snprintf(buf + bytes, sizeof(buf) - bytes,
-				  "%s (0x%02x/%02x) ",
-				  wusb_et_name(etd->bEncryptionType),
-				  etd->bEncryptionValue, etd->bAuthKeyIndex);
-		if (etd->bEncryptionType == USB_ENC_TYPE_CCM_1)
-			ccm1_etd = etd;
-	}
-	/* This code only supports CCM1 as of now. */
-	/* FIXME: user has to choose which sec mode to use?
-	 * In theory we want CCM */
-	if (ccm1_etd == NULL) {
-		dev_err(dev, "WUSB device doesn't support CCM1 encryption, "
-			"can't use!\n");
-		result = -EINVAL;
-		goto out;
-	}
-	wusb_dev->ccm1_etd = *ccm1_etd;
-	dev_dbg(dev, "supported encryption: %s; using %s (0x%02x/%02x)\n",
-		buf, wusb_et_name(ccm1_etd->bEncryptionType),
-		ccm1_etd->bEncryptionValue, ccm1_etd->bAuthKeyIndex);
-	result = 0;
-out:
-	kfree(secd);
-	return result;
-}
-
-void wusb_dev_sec_rm(struct wusb_dev *wusb_dev)
-{
-	/* Nothing so far */
-}
-
-/**
- * Update the address of an unauthenticated WUSB device
- *
- * Once we have successfully authenticated, we take it to addr0 state
- * and then to a normal address.
- *
- * Before the device's address (as known by it) was usb_dev->devnum |
- * 0x80 (unauthenticated address). With this we update it to usb_dev->devnum.
- */
-int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev)
-{
-	int result = -ENOMEM;
-	struct usb_device *usb_dev = wusb_dev->usb_dev;
-	struct device *dev = &usb_dev->dev;
-	u8 new_address = wusb_dev->addr & 0x7F;
-
-	/* Set address 0 */
-	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
-			USB_REQ_SET_ADDRESS,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "auth failed: can't set address 0: %d\n",
-			result);
-		goto error_addr0;
-	}
-	result = wusb_set_dev_addr(wusbhc, wusb_dev, 0);
-	if (result < 0)
-		goto error_addr0;
-	usb_set_device_state(usb_dev, USB_STATE_DEFAULT);
-	usb_ep0_reinit(usb_dev);
-
-	/* Set new (authenticated) address. */
-	result = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0),
-			USB_REQ_SET_ADDRESS,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			new_address, 0, NULL, 0,
-			USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "auth failed: can't set address %u: %d\n",
-			new_address, result);
-		goto error_addr;
-	}
-	result = wusb_set_dev_addr(wusbhc, wusb_dev, new_address);
-	if (result < 0)
-		goto error_addr;
-	usb_set_device_state(usb_dev, USB_STATE_ADDRESS);
-	usb_ep0_reinit(usb_dev);
-	usb_dev->authenticated = 1;
-error_addr:
-error_addr0:
-	return result;
-}
-
-/*
- *
- *
- */
-/* FIXME: split and cleanup */
-int wusb_dev_4way_handshake(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
-			    struct wusb_ckhdid *ck)
-{
-	int result = -ENOMEM;
-	struct usb_device *usb_dev = wusb_dev->usb_dev;
-	struct device *dev = &usb_dev->dev;
-	u32 tkid;
-	struct usb_handshake *hs;
-	struct aes_ccm_nonce ccm_n;
-	u8 mic[8];
-	struct wusb_keydvt_in keydvt_in;
-	struct wusb_keydvt_out keydvt_out;
-
-	hs = kcalloc(3, sizeof(hs[0]), GFP_KERNEL);
-	if (!hs)
-		goto error_kzalloc;
-
-	/* We need to turn encryption before beginning the 4way
-	 * hshake (WUSB1.0[.3.2.2]) */
-	result = wusb_dev_set_encryption(usb_dev, 1);
-	if (result < 0)
-		goto error_dev_set_encryption;
-
-	tkid = wusbhc_next_tkid(wusbhc, wusb_dev);
-
-	hs[0].bMessageNumber = 1;
-	hs[0].bStatus = 0;
-	put_unaligned_le32(tkid, hs[0].tTKID);
-	hs[0].bReserved = 0;
-	memcpy(hs[0].CDID, &wusb_dev->cdid, sizeof(hs[0].CDID));
-	get_random_bytes(&hs[0].nonce, sizeof(hs[0].nonce));
-	memset(hs[0].MIC, 0, sizeof(hs[0].MIC)); /* Per WUSB1.0[T7-22] */
-
-	result = usb_control_msg(
-		usb_dev, usb_sndctrlpipe(usb_dev, 0),
-		USB_REQ_SET_HANDSHAKE,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		1, 0, &hs[0], sizeof(hs[0]), USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Handshake1: request failed: %d\n", result);
-		goto error_hs1;
-	}
-
-	/* Handshake 2, from the device -- need to verify fields */
-	result = usb_control_msg(
-		usb_dev, usb_rcvctrlpipe(usb_dev, 0),
-		USB_REQ_GET_HANDSHAKE,
-		USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		2, 0, &hs[1], sizeof(hs[1]), USB_CTRL_GET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Handshake2: request failed: %d\n", result);
-		goto error_hs2;
-	}
-
-	result = -EINVAL;
-	if (hs[1].bMessageNumber != 2) {
-		dev_err(dev, "Handshake2 failed: bad message number %u\n",
-			hs[1].bMessageNumber);
-		goto error_hs2;
-	}
-	if (hs[1].bStatus != 0) {
-		dev_err(dev, "Handshake2 failed: bad status %u\n",
-			hs[1].bStatus);
-		goto error_hs2;
-	}
-	if (memcmp(hs[0].tTKID, hs[1].tTKID, sizeof(hs[0].tTKID))) {
-		dev_err(dev, "Handshake2 failed: TKID mismatch "
-			"(#1 0x%02x%02x%02x vs #2 0x%02x%02x%02x)\n",
-			hs[0].tTKID[0], hs[0].tTKID[1], hs[0].tTKID[2],
-			hs[1].tTKID[0], hs[1].tTKID[1], hs[1].tTKID[2]);
-		goto error_hs2;
-	}
-	if (memcmp(hs[0].CDID, hs[1].CDID, sizeof(hs[0].CDID))) {
-		dev_err(dev, "Handshake2 failed: CDID mismatch\n");
-		goto error_hs2;
-	}
-
-	/* Setup the CCM nonce */
-	memset(&ccm_n.sfn, 0, sizeof(ccm_n.sfn)); /* Per WUSB1.0[6.5.2] */
-	put_unaligned_le32(tkid, ccm_n.tkid);
-	ccm_n.src_addr = wusbhc->uwb_rc->uwb_dev.dev_addr;
-	ccm_n.dest_addr.data[0] = wusb_dev->addr;
-	ccm_n.dest_addr.data[1] = 0;
-
-	/* Derive the KCK and PTK from CK, the CCM, H and D nonces */
-	memcpy(keydvt_in.hnonce, hs[0].nonce, sizeof(keydvt_in.hnonce));
-	memcpy(keydvt_in.dnonce, hs[1].nonce, sizeof(keydvt_in.dnonce));
-	result = wusb_key_derive(&keydvt_out, ck->data, &ccm_n, &keydvt_in);
-	if (result < 0) {
-		dev_err(dev, "Handshake2 failed: cannot derive keys: %d\n",
-			result);
-		goto error_hs2;
-	}
-
-	/* Compute MIC and verify it */
-	result = wusb_oob_mic(mic, keydvt_out.kck, &ccm_n, &hs[1]);
-	if (result < 0) {
-		dev_err(dev, "Handshake2 failed: cannot compute MIC: %d\n",
-			result);
-		goto error_hs2;
-	}
-
-	if (memcmp(hs[1].MIC, mic, sizeof(hs[1].MIC))) {
-		dev_err(dev, "Handshake2 failed: MIC mismatch\n");
-		goto error_hs2;
-	}
-
-	/* Send Handshake3 */
-	hs[2].bMessageNumber = 3;
-	hs[2].bStatus = 0;
-	put_unaligned_le32(tkid, hs[2].tTKID);
-	hs[2].bReserved = 0;
-	memcpy(hs[2].CDID, &wusb_dev->cdid, sizeof(hs[2].CDID));
-	memcpy(hs[2].nonce, hs[0].nonce, sizeof(hs[2].nonce));
-	result = wusb_oob_mic(hs[2].MIC, keydvt_out.kck, &ccm_n, &hs[2]);
-	if (result < 0) {
-		dev_err(dev, "Handshake3 failed: cannot compute MIC: %d\n",
-			result);
-		goto error_hs2;
-	}
-
-	result = usb_control_msg(
-		usb_dev, usb_sndctrlpipe(usb_dev, 0),
-		USB_REQ_SET_HANDSHAKE,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		3, 0, &hs[2], sizeof(hs[2]), USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "Handshake3: request failed: %d\n", result);
-		goto error_hs3;
-	}
-
-	result = wusbhc->set_ptk(wusbhc, wusb_dev->port_idx, tkid,
-				 keydvt_out.ptk, sizeof(keydvt_out.ptk));
-	if (result < 0)
-		goto error_wusbhc_set_ptk;
-
-	result = wusb_dev_set_gtk(wusbhc, wusb_dev);
-	if (result < 0) {
-		dev_err(dev, "Set GTK for device: request failed: %d\n",
-			result);
-		goto error_wusbhc_set_gtk;
-	}
-
-	/* Update the device's address from unauth to auth */
-	if (usb_dev->authenticated == 0) {
-		result = wusb_dev_update_address(wusbhc, wusb_dev);
-		if (result < 0)
-			goto error_dev_update_address;
-	}
-	result = 0;
-	dev_info(dev, "device authenticated\n");
-
-error_dev_update_address:
-error_wusbhc_set_gtk:
-error_wusbhc_set_ptk:
-error_hs3:
-error_hs2:
-error_hs1:
-	memset(hs, 0, 3*sizeof(hs[0]));
-	memzero_explicit(&keydvt_out, sizeof(keydvt_out));
-	memzero_explicit(&keydvt_in, sizeof(keydvt_in));
-	memzero_explicit(&ccm_n, sizeof(ccm_n));
-	memzero_explicit(mic, sizeof(mic));
-	if (result < 0)
-		wusb_dev_set_encryption(usb_dev, 0);
-error_dev_set_encryption:
-	kfree(hs);
-error_kzalloc:
-	return result;
-}
-
-/*
- * Once all connected and authenticated devices have received the new
- * GTK, switch the host to using it.
- */
-static void wusbhc_gtk_rekey_work(struct work_struct *work)
-{
-	struct wusbhc *wusbhc = container_of(work,
-					struct wusbhc, gtk_rekey_work);
-	size_t key_size = sizeof(wusbhc->gtk.data);
-	int port_idx;
-	struct wusb_dev *wusb_dev, *wusb_dev_next;
-	LIST_HEAD(rekey_list);
-
-	mutex_lock(&wusbhc->mutex);
-	/* generate the new key */
-	wusbhc_generate_gtk(wusbhc);
-	/* roll the gtk index. */
-	wusbhc->gtk_index = (wusbhc->gtk_index + 1) % (WUSB_KEY_INDEX_MAX + 1);
-	/*
-	 * Save all connected devices on a list while holding wusbhc->mutex and
-	 * take a reference to each one.  Then submit the set key request to
-	 * them after releasing the lock in order to avoid a deadlock.
-	 */
-	for (port_idx = 0; port_idx < wusbhc->ports_max; port_idx++) {
-		wusb_dev = wusbhc->port[port_idx].wusb_dev;
-		if (!wusb_dev || !wusb_dev->usb_dev
-			|| !wusb_dev->usb_dev->authenticated)
-			continue;
-
-		wusb_dev_get(wusb_dev);
-		list_add_tail(&wusb_dev->rekey_node, &rekey_list);
-	}
-	mutex_unlock(&wusbhc->mutex);
-
-	/* Submit the rekey requests without holding wusbhc->mutex. */
-	list_for_each_entry_safe(wusb_dev, wusb_dev_next, &rekey_list,
-		rekey_node) {
-		list_del_init(&wusb_dev->rekey_node);
-		dev_dbg(&wusb_dev->usb_dev->dev,
-			"%s: rekey device at port %d\n",
-			__func__, wusb_dev->port_idx);
-
-		if (wusb_dev_set_gtk(wusbhc, wusb_dev) < 0) {
-			dev_err(&wusb_dev->usb_dev->dev,
-				"%s: rekey device at port %d failed\n",
-				__func__, wusb_dev->port_idx);
-		}
-		wusb_dev_put(wusb_dev);
-	}
-
-	/* Switch the host controller to use the new GTK. */
-	mutex_lock(&wusbhc->mutex);
-	wusbhc->set_gtk(wusbhc, wusbhc->gtk_tkid,
-		&wusbhc->gtk.descr.bKeyData, key_size);
-	mutex_unlock(&wusbhc->mutex);
-}
-
-/**
- * wusbhc_gtk_rekey - generate and distribute a new GTK
- * @wusbhc: the WUSB host controller
- *
- * Generate a new GTK and distribute it to all connected and
- * authenticated devices.  When all devices have the new GTK, the host
- * starts using it.
- *
- * This must be called after every device disconnect (see [WUSB]
- * section 6.2.11.2).
- */
-void wusbhc_gtk_rekey(struct wusbhc *wusbhc)
-{
-	/*
-	 * We need to submit a URB to the downstream WUSB devices in order to
-	 * change the group key.  This can't be done while holding the
-	 * wusbhc->mutex since that is also taken in the urb_enqueue routine
-	 * and will cause a deadlock.  Instead, queue a work item to do
-	 * it when the lock is not held
-	 */
-	queue_work(wusbhc->wq_security, &wusbhc->gtk_rekey_work);
-}
diff --git a/drivers/usb/wusbcore/wa-hc.c b/drivers/usb/wusbcore/wa-hc.c
deleted file mode 100644
index 6827075fb8a1..000000000000
--- a/drivers/usb/wusbcore/wa-hc.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wire Adapter Host Controller Driver
- * Common items to HWA and DWA based HCDs
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-#include <linux/slab.h>
-#include <linux/module.h>
-#include "wusbhc.h"
-#include "wa-hc.h"
-
-/**
- * Assumes
- *
- * wa->usb_dev and wa->usb_iface initialized and refcounted,
- * wa->wa_descr initialized.
- */
-int wa_create(struct wahc *wa, struct usb_interface *iface,
-	kernel_ulong_t quirks)
-{
-	int result;
-	struct device *dev = &iface->dev;
-
-	if (iface->cur_altsetting->desc.bNumEndpoints < 3)
-		return -ENODEV;
-
-	result = wa_rpipes_create(wa);
-	if (result < 0)
-		goto error_rpipes_create;
-	wa->quirks = quirks;
-	/* Fill up Data Transfer EP pointers */
-	wa->dti_epd = &iface->cur_altsetting->endpoint[1].desc;
-	wa->dto_epd = &iface->cur_altsetting->endpoint[2].desc;
-	wa->dti_buf_size = usb_endpoint_maxp(wa->dti_epd);
-	wa->dti_buf = kmalloc(wa->dti_buf_size, GFP_KERNEL);
-	if (wa->dti_buf == NULL) {
-		result = -ENOMEM;
-		goto error_dti_buf_alloc;
-	}
-	result = wa_nep_create(wa, iface);
-	if (result < 0) {
-		dev_err(dev, "WA-CDS: can't initialize notif endpoint: %d\n",
-			result);
-		goto error_nep_create;
-	}
-	return 0;
-
-error_nep_create:
-	kfree(wa->dti_buf);
-error_dti_buf_alloc:
-	wa_rpipes_destroy(wa);
-error_rpipes_create:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wa_create);
-
-
-void __wa_destroy(struct wahc *wa)
-{
-	if (wa->dti_urb) {
-		usb_kill_urb(wa->dti_urb);
-		usb_put_urb(wa->dti_urb);
-	}
-	kfree(wa->dti_buf);
-	wa_nep_destroy(wa);
-	wa_rpipes_destroy(wa);
-}
-EXPORT_SYMBOL_GPL(__wa_destroy);
-
-/**
- * wa_reset_all - reset the WA device
- * @wa: the WA to be reset
- *
- * For HWAs the radio controller and all other PALs are also reset.
- */
-void wa_reset_all(struct wahc *wa)
-{
-	/* FIXME: assuming HWA. */
-	wusbhc_reset_all(wa->wusb);
-}
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Wireless USB Wire Adapter core");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/wusbcore/wa-hc.h b/drivers/usb/wusbcore/wa-hc.h
deleted file mode 100644
index ec90fff21deb..000000000000
--- a/drivers/usb/wusbcore/wa-hc.h
+++ /dev/null
@@ -1,467 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * HWA Host Controller Driver
- * Wire Adapter Control/Data Streaming Iface (WUSB1.0[8])
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This driver implements a USB Host Controller (struct usb_hcd) for a
- * Wireless USB Host Controller based on the Wireless USB 1.0
- * Host-Wire-Adapter specification (in layman terms, a USB-dongle that
- * implements a Wireless USB host).
- *
- * Check out the Design-overview.txt file in the source documentation
- * for other details on the implementation.
- *
- * Main blocks:
- *
- *  driver     glue with the driver API, workqueue daemon
- *
- *  lc         RC instance life cycle management (create, destroy...)
- *
- *  hcd        glue with the USB API Host Controller Interface API.
- *
- *  nep        Notification EndPoint management: collect notifications
- *             and queue them with the workqueue daemon.
- *
- *             Handle notifications as coming from the NEP. Sends them
- *             off others to their respective modules (eg: connect,
- *             disconnect and reset go to devconnect).
- *
- *  rpipe      Remote Pipe management; rpipe is what we use to write
- *             to an endpoint on a WUSB device that is connected to a
- *             HWA RC.
- *
- *  xfer       Transfer management -- this is all the code that gets a
- *             buffer and pushes it to a device (or viceversa). *
- *
- * Some day a lot of this code will be shared between this driver and
- * the drivers for DWA (xfer, rpipe).
- *
- * All starts at driver.c:hwahc_probe(), when one of this guys is
- * connected. hwahc_disconnect() stops it.
- *
- * During operation, the main driver is devices connecting or
- * disconnecting. They cause the HWA RC to send notifications into
- * nep.c:hwahc_nep_cb() that will dispatch them to
- * notif.c:wa_notif_dispatch(). From there they will fan to cause
- * device connects, disconnects, etc.
- *
- * Note much of the activity is difficult to follow. For example a
- * device connect goes to devconnect, which will cause the "fake" root
- * hub port to show a connect and stop there. Then hub_wq will notice
- * and call into the rh.c:hwahc_rc_port_reset() code to authenticate
- * the device (and this might require user intervention) and enable
- * the port.
- *
- * We also have a timer workqueue going from devconnect.c that
- * schedules in hwahc_devconnect_create().
- *
- * The rest of the traffic is in the usual entry points of a USB HCD,
- * which are hooked up in driver.c:hwahc_rc_driver, and defined in
- * hcd.c.
- */
-
-#ifndef __HWAHC_INTERNAL_H__
-#define __HWAHC_INTERNAL_H__
-
-#include <linux/completion.h>
-#include <linux/usb.h>
-#include <linux/mutex.h>
-#include <linux/spinlock.h>
-#include <linux/uwb.h>
-#include <linux/usb/wusb.h>
-#include <linux/usb/wusb-wa.h>
-
-struct wusbhc;
-struct wahc;
-extern void wa_urb_enqueue_run(struct work_struct *ws);
-extern void wa_process_errored_transfers_run(struct work_struct *ws);
-
-/**
- * RPipe instance
- *
- * @descr's fields are kept in LE, as we need to send it back and
- * forth.
- *
- * @wa is referenced when set
- *
- * @segs_available is the number of requests segments that still can
- *                 be submitted to the controller without overloading
- *                 it. It is initialized to descr->wRequests when
- *                 aiming.
- *
- * A rpipe supports a max of descr->wRequests at the same time; before
- * submitting seg_lock has to be taken. If segs_avail > 0, then we can
- * submit; if not, we have to queue them.
- */
-struct wa_rpipe {
-	struct kref refcnt;
-	struct usb_rpipe_descriptor descr;
-	struct usb_host_endpoint *ep;
-	struct wahc *wa;
-	spinlock_t seg_lock;
-	struct list_head seg_list;
-	struct list_head list_node;
-	atomic_t segs_available;
-	u8 buffer[1];	/* For reads/writes on USB */
-};
-
-
-enum wa_dti_state {
-	WA_DTI_TRANSFER_RESULT_PENDING,
-	WA_DTI_ISOC_PACKET_STATUS_PENDING,
-	WA_DTI_BUF_IN_DATA_PENDING
-};
-
-enum wa_quirks {
-	/*
-	 * The Alereon HWA expects the data frames in isochronous transfer
-	 * requests to be concatenated and not sent as separate packets.
-	 */
-	WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC	= 0x01,
-	/*
-	 * The Alereon HWA can be instructed to not send transfer notifications
-	 * as an optimization.
-	 */
-	WUSB_QUIRK_ALEREON_HWA_DISABLE_XFER_NOTIFICATIONS	= 0x02,
-};
-
-enum wa_vendor_specific_requests {
-	WA_REQ_ALEREON_DISABLE_XFER_NOTIFICATIONS = 0x4C,
-	WA_REQ_ALEREON_FEATURE_SET = 0x01,
-	WA_REQ_ALEREON_FEATURE_CLEAR = 0x00,
-};
-
-#define WA_MAX_BUF_IN_URBS	4
-/**
- * Instance of a HWA Host Controller
- *
- * Except where a more specific lock/mutex applies or atomic, all
- * fields protected by @mutex.
- *
- * @wa_descr  Can be accessed without locking because it is in
- *            the same area where the device descriptors were
- *            read, so it is guaranteed to exist unmodified while
- *            the device exists.
- *
- *            Endianess has been converted to CPU's.
- *
- * @nep_* can be accessed without locking as its processing is
- *        serialized; we submit a NEP URB and it comes to
- *        hwahc_nep_cb(), which won't issue another URB until it is
- *        done processing it.
- *
- * @xfer_list:
- *
- *   List of active transfers to verify existence from a xfer id
- *   gotten from the xfer result message. Can't use urb->list because
- *   it goes by endpoint, and we don't know the endpoint at the time
- *   when we get the xfer result message. We can't really rely on the
- *   pointer (will have to change for 64 bits) as the xfer id is 32 bits.
- *
- * @xfer_delayed_list:   List of transfers that need to be started
- *                       (with a workqueue, because they were
- *                       submitted from an atomic context).
- *
- * FIXME: this needs to be layered up: a wusbhc layer (for sharing
- *        commonalities with WHCI), a wa layer (for sharing
- *        commonalities with DWA-RC).
- */
-struct wahc {
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-
-	/* HC to deliver notifications */
-	union {
-		struct wusbhc *wusb;
-		struct dwahc *dwa;
-	};
-
-	const struct usb_endpoint_descriptor *dto_epd, *dti_epd;
-	const struct usb_wa_descriptor *wa_descr;
-
-	struct urb *nep_urb;		/* Notification EndPoint [lockless] */
-	struct edc nep_edc;
-	void *nep_buffer;
-	size_t nep_buffer_size;
-
-	atomic_t notifs_queued;
-
-	u16 rpipes;
-	unsigned long *rpipe_bm;	/* rpipe usage bitmap */
-	struct list_head rpipe_delayed_list;	/* delayed RPIPES. */
-	spinlock_t rpipe_lock;	/* protect rpipe_bm and delayed list */
-	struct mutex rpipe_mutex;	/* assigning resources to endpoints */
-
-	/*
-	 * dti_state is used to track the state of the dti_urb. When dti_state
-	 * is WA_DTI_ISOC_PACKET_STATUS_PENDING, dti_isoc_xfer_in_progress and
-	 * dti_isoc_xfer_seg identify which xfer the incoming isoc packet
-	 * status refers to.
-	 */
-	enum wa_dti_state dti_state;
-	u32 dti_isoc_xfer_in_progress;
-	u8  dti_isoc_xfer_seg;
-	struct urb *dti_urb;		/* URB for reading xfer results */
-					/* URBs for reading data in */
-	struct urb buf_in_urbs[WA_MAX_BUF_IN_URBS];
-	int active_buf_in_urbs;		/* number of buf_in_urbs active. */
-	struct edc dti_edc;		/* DTI error density counter */
-	void *dti_buf;
-	size_t dti_buf_size;
-
-	unsigned long dto_in_use;	/* protect dto endoint serialization */
-
-	s32 status;			/* For reading status */
-
-	struct list_head xfer_list;
-	struct list_head xfer_delayed_list;
-	struct list_head xfer_errored_list;
-	/*
-	 * lock for the above xfer lists.  Can be taken while a xfer->lock is
-	 * held but not in the reverse order.
-	 */
-	spinlock_t xfer_list_lock;
-	struct work_struct xfer_enqueue_work;
-	struct work_struct xfer_error_work;
-	atomic_t xfer_id_count;
-
-	kernel_ulong_t	quirks;
-};
-
-
-extern int wa_create(struct wahc *wa, struct usb_interface *iface,
-	kernel_ulong_t);
-extern void __wa_destroy(struct wahc *wa);
-extern int wa_dti_start(struct wahc *wa);
-void wa_reset_all(struct wahc *wa);
-
-
-/* Miscellaneous constants */
-enum {
-	/** Max number of EPROTO errors we tolerate on the NEP in a
-	 * period of time */
-	HWAHC_EPROTO_MAX = 16,
-	/** Period of time for EPROTO errors (in jiffies) */
-	HWAHC_EPROTO_PERIOD = 4 * HZ,
-};
-
-
-/* Notification endpoint handling */
-extern int wa_nep_create(struct wahc *, struct usb_interface *);
-extern void wa_nep_destroy(struct wahc *);
-
-static inline int wa_nep_arm(struct wahc *wa, gfp_t gfp_mask)
-{
-	struct urb *urb = wa->nep_urb;
-	urb->transfer_buffer = wa->nep_buffer;
-	urb->transfer_buffer_length = wa->nep_buffer_size;
-	return usb_submit_urb(urb, gfp_mask);
-}
-
-static inline void wa_nep_disarm(struct wahc *wa)
-{
-	usb_kill_urb(wa->nep_urb);
-}
-
-
-/* RPipes */
-static inline void wa_rpipe_init(struct wahc *wa)
-{
-	INIT_LIST_HEAD(&wa->rpipe_delayed_list);
-	spin_lock_init(&wa->rpipe_lock);
-	mutex_init(&wa->rpipe_mutex);
-}
-
-static inline void wa_init(struct wahc *wa)
-{
-	int index;
-
-	edc_init(&wa->nep_edc);
-	atomic_set(&wa->notifs_queued, 0);
-	wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
-	wa_rpipe_init(wa);
-	edc_init(&wa->dti_edc);
-	INIT_LIST_HEAD(&wa->xfer_list);
-	INIT_LIST_HEAD(&wa->xfer_delayed_list);
-	INIT_LIST_HEAD(&wa->xfer_errored_list);
-	spin_lock_init(&wa->xfer_list_lock);
-	INIT_WORK(&wa->xfer_enqueue_work, wa_urb_enqueue_run);
-	INIT_WORK(&wa->xfer_error_work, wa_process_errored_transfers_run);
-	wa->dto_in_use = 0;
-	atomic_set(&wa->xfer_id_count, 1);
-	/* init the buf in URBs */
-	for (index = 0; index < WA_MAX_BUF_IN_URBS; ++index)
-		usb_init_urb(&(wa->buf_in_urbs[index]));
-	wa->active_buf_in_urbs = 0;
-}
-
-/**
- * Destroy a pipe (when refcount drops to zero)
- *
- * Assumes it has been moved to the "QUIESCING" state.
- */
-struct wa_xfer;
-extern void rpipe_destroy(struct kref *_rpipe);
-static inline
-void __rpipe_get(struct wa_rpipe *rpipe)
-{
-	kref_get(&rpipe->refcnt);
-}
-extern int rpipe_get_by_ep(struct wahc *, struct usb_host_endpoint *,
-			   struct urb *, gfp_t);
-static inline void rpipe_put(struct wa_rpipe *rpipe)
-{
-	kref_put(&rpipe->refcnt, rpipe_destroy);
-
-}
-extern void rpipe_ep_disable(struct wahc *, struct usb_host_endpoint *);
-extern void rpipe_clear_feature_stalled(struct wahc *,
-			struct usb_host_endpoint *);
-extern int wa_rpipes_create(struct wahc *);
-extern void wa_rpipes_destroy(struct wahc *);
-static inline void rpipe_avail_dec(struct wa_rpipe *rpipe)
-{
-	atomic_dec(&rpipe->segs_available);
-}
-
-/**
- * Returns true if the rpipe is ready to submit more segments.
- */
-static inline int rpipe_avail_inc(struct wa_rpipe *rpipe)
-{
-	return atomic_inc_return(&rpipe->segs_available) > 0
-		&& !list_empty(&rpipe->seg_list);
-}
-
-
-/* Transferring data */
-extern int wa_urb_enqueue(struct wahc *, struct usb_host_endpoint *,
-			  struct urb *, gfp_t);
-extern int wa_urb_dequeue(struct wahc *, struct urb *, int);
-extern void wa_handle_notif_xfer(struct wahc *, struct wa_notif_hdr *);
-
-
-/* Misc
- *
- * FIXME: Refcounting for the actual @hwahc object is not correct; I
- *        mean, this should be refcounting on the HCD underneath, but
- *        it is not. In any case, the semantics for HCD refcounting
- *        are *weird*...on refcount reaching zero it just frees
- *        it...no RC specific function is called...unless I miss
- *        something.
- *
- * FIXME: has to go away in favour of a 'struct' hcd based solution
- */
-static inline struct wahc *wa_get(struct wahc *wa)
-{
-	usb_get_intf(wa->usb_iface);
-	return wa;
-}
-
-static inline void wa_put(struct wahc *wa)
-{
-	usb_put_intf(wa->usb_iface);
-}
-
-
-static inline int __wa_feature(struct wahc *wa, unsigned op, u16 feature)
-{
-	return usb_control_msg(wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			op ? USB_REQ_SET_FEATURE : USB_REQ_CLEAR_FEATURE,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-			feature,
-			wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-			NULL, 0, USB_CTRL_SET_TIMEOUT);
-}
-
-
-static inline int __wa_set_feature(struct wahc *wa, u16 feature)
-{
-	return  __wa_feature(wa, 1, feature);
-}
-
-
-static inline int __wa_clear_feature(struct wahc *wa, u16 feature)
-{
-	return __wa_feature(wa, 0, feature);
-}
-
-
-/**
- * Return the status of a Wire Adapter
- *
- * @wa:		Wire Adapter instance
- * @returns     < 0 errno code on error, or status bitmap as described
- *              in WUSB1.0[8.3.1.6].
- *
- * NOTE: need malloc, some arches don't take USB from the stack
- */
-static inline
-s32 __wa_get_status(struct wahc *wa)
-{
-	s32 result;
-	result = usb_control_msg(
-		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
-		USB_REQ_GET_STATUS,
-		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		0, wa->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-		&wa->status, sizeof(wa->status), USB_CTRL_GET_TIMEOUT);
-	if (result >= 0)
-		result = wa->status;
-	return result;
-}
-
-
-/**
- * Waits until the Wire Adapter's status matches @mask/@value
- *
- * @wa:		Wire Adapter instance.
- * @returns     < 0 errno code on error, otherwise status.
- *
- * Loop until the WAs status matches the mask and value (status & mask
- * == value). Timeout if it doesn't happen.
- *
- * FIXME: is there an official specification on how long status
- *        changes can take?
- */
-static inline s32 __wa_wait_status(struct wahc *wa, u32 mask, u32 value)
-{
-	s32 result;
-	unsigned loops = 10;
-	do {
-		msleep(50);
-		result = __wa_get_status(wa);
-		if ((result & mask) == value)
-			break;
-		if (loops-- == 0) {
-			result = -ETIMEDOUT;
-			break;
-		}
-	} while (result >= 0);
-	return result;
-}
-
-
-/** Command @hwahc to stop, @returns 0 if ok, < 0 errno code on error */
-static inline int __wa_stop(struct wahc *wa)
-{
-	int result;
-	struct device *dev = &wa->usb_iface->dev;
-
-	result = __wa_clear_feature(wa, WA_ENABLE);
-	if (result < 0 && result != -ENODEV) {
-		dev_err(dev, "error commanding HC to stop: %d\n", result);
-		goto out;
-	}
-	result = __wa_wait_status(wa, WA_ENABLE, 0);
-	if (result < 0 && result != -ENODEV)
-		dev_err(dev, "error waiting for HC to stop: %d\n", result);
-out:
-	return 0;
-}
-
-
-#endif /* #ifndef __HWAHC_INTERNAL_H__ */
diff --git a/drivers/usb/wusbcore/wa-nep.c b/drivers/usb/wusbcore/wa-nep.c
deleted file mode 100644
index 5f0656db5482..000000000000
--- a/drivers/usb/wusbcore/wa-nep.c
+++ /dev/null
@@ -1,289 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB Wire Adapter: Control/Data Streaming Interface (WUSB[8])
- * Notification EndPoint support
- *
- * Copyright (C) 2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This part takes care of getting the notification from the hw
- * only and dispatching through wusbwad into
- * wa_notif_dispatch. Handling is done there.
- *
- * WA notifications are limited in size; most of them are three or
- * four bytes long, and the longest is the HWA Device Notification,
- * which would not exceed 38 bytes (DNs are limited in payload to 32
- * bytes plus 3 bytes header (WUSB1.0[7.6p2]), plus 3 bytes HWA
- * header (WUSB1.0[8.5.4.2]).
- *
- * It is not clear if more than one Device Notification can be packed
- * in a HWA Notification, I assume no because of the wording in
- * WUSB1.0[8.5.4.2]. In any case, the bigger any notification could
- * get is 256 bytes (as the bLength field is a byte).
- *
- * So what we do is we have this buffer and read into it; when a
- * notification arrives we schedule work to a specific, single thread
- * workqueue (so notifications are serialized) and copy the
- * notification data. After scheduling the work, we rearm the read from
- * the notification endpoint.
- *
- * Entry points here are:
- *
- * wa_nep_[create|destroy]()   To initialize/release this subsystem
- *
- * wa_nep_cb()                 Callback for the notification
- *                                endpoint; when data is ready, this
- *                                does the dispatching.
- */
-#include <linux/workqueue.h>
-#include <linux/ctype.h>
-#include <linux/slab.h>
-
-#include "wa-hc.h"
-#include "wusbhc.h"
-
-/* Structure for queueing notifications to the workqueue */
-struct wa_notif_work {
-	struct work_struct work;
-	struct wahc *wa;
-	size_t size;
-	u8 data[];
-};
-
-/*
- * Process incoming notifications from the WA's Notification EndPoint
- * [the wuswad daemon, basically]
- *
- * @_nw:	Pointer to a descriptor which has the pointer to the
- *		@wa, the size of the buffer and the work queue
- *		structure (so we can free all when done).
- * @returns     0 if ok, < 0 errno code on error.
- *
- * All notifications follow the same format; they need to start with a
- * 'struct wa_notif_hdr' header, so it is easy to parse through
- * them. We just break the buffer in individual notifications (the
- * standard doesn't say if it can be done or is forbidden, so we are
- * cautious) and dispatch each.
- *
- * So the handling layers are is:
- *
- *   WA specific notification (from NEP)
- *      Device Notification Received -> wa_handle_notif_dn()
- *        WUSB Device notification generic handling
- *      BPST Adjustment -> wa_handle_notif_bpst_adj()
- *      ... -> ...
- *
- * @wa has to be referenced
- */
-static void wa_notif_dispatch(struct work_struct *ws)
-{
-	void *itr;
-	u8 missing = 0;
-	struct wa_notif_work *nw = container_of(ws, struct wa_notif_work,
-						work);
-	struct wahc *wa = nw->wa;
-	struct wa_notif_hdr *notif_hdr;
-	size_t size;
-
-	struct device *dev = &wa->usb_iface->dev;
-
-#if 0
-	/* FIXME: need to check for this??? */
-	if (usb_hcd->state == HC_STATE_QUIESCING)	/* Going down? */
-		goto out;				/* screw it */
-#endif
-	atomic_dec(&wa->notifs_queued);		/* Throttling ctl */
-	size = nw->size;
-	itr = nw->data;
-
-	while (size) {
-		if (size < sizeof(*notif_hdr)) {
-			missing = sizeof(*notif_hdr) - size;
-			goto exhausted_buffer;
-		}
-		notif_hdr = itr;
-		if (size < notif_hdr->bLength)
-			goto exhausted_buffer;
-		itr += notif_hdr->bLength;
-		size -= notif_hdr->bLength;
-		/* Dispatch the notification [don't use itr or size!] */
-		switch (notif_hdr->bNotifyType) {
-		case HWA_NOTIF_DN: {
-			struct hwa_notif_dn *hwa_dn;
-			hwa_dn = container_of(notif_hdr, struct hwa_notif_dn,
-					      hdr);
-			wusbhc_handle_dn(wa->wusb, hwa_dn->bSourceDeviceAddr,
-					 hwa_dn->dndata,
-					 notif_hdr->bLength - sizeof(*hwa_dn));
-			break;
-		}
-		case WA_NOTIF_TRANSFER:
-			wa_handle_notif_xfer(wa, notif_hdr);
-			break;
-		case HWA_NOTIF_BPST_ADJ:
-			break; /* no action needed for BPST ADJ. */
-		case DWA_NOTIF_RWAKE:
-		case DWA_NOTIF_PORTSTATUS:
-			/* FIXME: unimplemented WA NOTIFs */
-			/* fallthru */
-		default:
-			dev_err(dev, "HWA: unknown notification 0x%x, "
-				"%zu bytes; discarding\n",
-				notif_hdr->bNotifyType,
-				(size_t)notif_hdr->bLength);
-			break;
-		}
-	}
-out:
-	wa_put(wa);
-	kfree(nw);
-	return;
-
-	/* THIS SHOULD NOT HAPPEN
-	 *
-	 * Buffer exahusted with partial data remaining; just warn and
-	 * discard the data, as this should not happen.
-	 */
-exhausted_buffer:
-	dev_warn(dev, "HWA: device sent short notification, "
-		 "%d bytes missing; discarding %d bytes.\n",
-		 missing, (int)size);
-	goto out;
-}
-
-/*
- * Deliver incoming WA notifications to the wusbwa workqueue
- *
- * @wa:	Pointer the Wire Adapter Controller Data Streaming
- *              instance (part of an 'struct usb_hcd').
- * @size:       Size of the received buffer
- * @returns     0 if ok, < 0 errno code on error.
- *
- * The input buffer is @wa->nep_buffer, with @size bytes
- * (guaranteed to fit in the allocated space,
- * @wa->nep_buffer_size).
- */
-static int wa_nep_queue(struct wahc *wa, size_t size)
-{
-	int result = 0;
-	struct device *dev = &wa->usb_iface->dev;
-	struct wa_notif_work *nw;
-
-	/* dev_fnstart(dev, "(wa %p, size %zu)\n", wa, size); */
-	BUG_ON(size > wa->nep_buffer_size);
-	if (size == 0)
-		goto out;
-	if (atomic_read(&wa->notifs_queued) > 200) {
-		if (printk_ratelimit())
-			dev_err(dev, "Too many notifications queued, "
-				"throttling back\n");
-		goto out;
-	}
-	nw = kzalloc(sizeof(*nw) + size, GFP_ATOMIC);
-	if (nw == NULL) {
-		if (printk_ratelimit())
-			dev_err(dev, "No memory to queue notification\n");
-		result = -ENOMEM;
-		goto out;
-	}
-	INIT_WORK(&nw->work, wa_notif_dispatch);
-	nw->wa = wa_get(wa);
-	nw->size = size;
-	memcpy(nw->data, wa->nep_buffer, size);
-	atomic_inc(&wa->notifs_queued);		/* Throttling ctl */
-	queue_work(wusbd, &nw->work);
-out:
-	/* dev_fnend(dev, "(wa %p, size %zu) = result\n", wa, size, result); */
-	return result;
-}
-
-/*
- * Callback for the notification event endpoint
- *
- * Check's that everything is fine and then passes the data to be
- * queued to the workqueue.
- */
-static void wa_nep_cb(struct urb *urb)
-{
-	int result;
-	struct wahc *wa = urb->context;
-	struct device *dev = &wa->usb_iface->dev;
-
-	switch (result = urb->status) {
-	case 0:
-		result = wa_nep_queue(wa, urb->actual_length);
-		if (result < 0)
-			dev_err(dev, "NEP: unable to process notification(s): "
-				"%d\n", result);
-		break;
-	case -ECONNRESET:	/* Not an error, but a controlled situation; */
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-	case -ESHUTDOWN:
-		dev_dbg(dev, "NEP: going down %d\n", urb->status);
-		goto out;
-	default:	/* On general errors, we retry unless it gets ugly */
-		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "NEP: URB max acceptable errors "
-				"exceeded, resetting device\n");
-			wa_reset_all(wa);
-			goto out;
-		}
-		dev_err(dev, "NEP: URB error %d\n", urb->status);
-	}
-	result = wa_nep_arm(wa, GFP_ATOMIC);
-	if (result < 0) {
-		dev_err(dev, "NEP: cannot submit URB: %d\n", result);
-		wa_reset_all(wa);
-	}
-out:
-	return;
-}
-
-/*
- * Initialize @wa's notification and event's endpoint stuff
- *
- * This includes the allocating the read buffer, the context ID
- * allocation bitmap, the URB and submitting the URB.
- */
-int wa_nep_create(struct wahc *wa, struct usb_interface *iface)
-{
-	int result;
-	struct usb_endpoint_descriptor *epd;
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-	struct device *dev = &iface->dev;
-
-	edc_init(&wa->nep_edc);
-	epd = &iface->cur_altsetting->endpoint[0].desc;
-	wa->nep_buffer_size = 1024;
-	wa->nep_buffer = kmalloc(wa->nep_buffer_size, GFP_KERNEL);
-	if (!wa->nep_buffer)
-		goto error_nep_buffer;
-	wa->nep_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (wa->nep_urb == NULL)
-		goto error_urb_alloc;
-	usb_fill_int_urb(wa->nep_urb, usb_dev,
-			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
-			 wa->nep_buffer, wa->nep_buffer_size,
-			 wa_nep_cb, wa, epd->bInterval);
-	result = wa_nep_arm(wa, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "Cannot submit notification URB: %d\n", result);
-		goto error_nep_arm;
-	}
-	return 0;
-
-error_nep_arm:
-	usb_free_urb(wa->nep_urb);
-error_urb_alloc:
-	kfree(wa->nep_buffer);
-error_nep_buffer:
-	return -ENOMEM;
-}
-
-void wa_nep_destroy(struct wahc *wa)
-{
-	wa_nep_disarm(wa);
-	usb_free_urb(wa->nep_urb);
-	kfree(wa->nep_buffer);
-}
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
deleted file mode 100644
index a5734cbcd5ad..000000000000
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ /dev/null
@@ -1,539 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB Wire Adapter
- * rpipe management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- *
- * RPIPE
- *
- *   Targeted at different downstream endpoints
- *
- *   Descriptor: use to config the remote pipe.
- *
- *   The number of blocks could be dynamic (wBlocks in descriptor is
- *   0)--need to schedule them then.
- *
- * Each bit in wa->rpipe_bm represents if an rpipe is being used or
- * not. Rpipes are represented with a 'struct wa_rpipe' that is
- * attached to the hcpriv member of a 'struct usb_host_endpoint'.
- *
- * When you need to xfer data to an endpoint, you get an rpipe for it
- * with wa_ep_rpipe_get(), which gives you a reference to the rpipe
- * and keeps a single one (the first one) with the endpoint. When you
- * are done transferring, you drop that reference. At the end the
- * rpipe is always allocated and bound to the endpoint. There it might
- * be recycled when not used.
- *
- * Addresses:
- *
- *  We use a 1:1 mapping mechanism between port address (0 based
- *  index, actually) and the address. The USB stack knows about this.
- *
- *  USB Stack port number    4 (1 based)
- *  WUSB code port index     3 (0 based)
- *  USB Address             5 (2 based -- 0 is for default, 1 for root hub)
- *
- *  Now, because we don't use the concept as default address exactly
- *  like the (wired) USB code does, we need to kind of skip it. So we
- *  never take addresses from the urb->pipe, but from the
- *  urb->dev->devnum, to make sure that we always have the right
- *  destination address.
- */
-#include <linux/atomic.h>
-#include <linux/bitmap.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include "wusbhc.h"
-#include "wa-hc.h"
-
-static int __rpipe_get_descr(struct wahc *wa,
-			     struct usb_rpipe_descriptor *descr, u16 index)
-{
-	ssize_t result;
-	struct device *dev = &wa->usb_iface->dev;
-
-	/* Get the RPIPE descriptor -- we cannot use the usb_get_descriptor()
-	 * function because the arguments are different.
-	 */
-	result = usb_control_msg(
-		wa->usb_dev, usb_rcvctrlpipe(wa->usb_dev, 0),
-		USB_REQ_GET_DESCRIPTOR,
-		USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_RPIPE,
-		USB_DT_RPIPE<<8, index, descr, sizeof(*descr),
-		USB_CTRL_GET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "rpipe %u: get descriptor failed: %d\n",
-			index, (int)result);
-		goto error;
-	}
-	if (result < sizeof(*descr)) {
-		dev_err(dev, "rpipe %u: got short descriptor "
-			"(%zd vs %zd bytes needed)\n",
-			index, result, sizeof(*descr));
-		result = -EINVAL;
-		goto error;
-	}
-	result = 0;
-
-error:
-	return result;
-}
-
-/*
- *
- * The descriptor is assumed to be properly initialized (ie: you got
- * it through __rpipe_get_descr()).
- */
-static int __rpipe_set_descr(struct wahc *wa,
-			     struct usb_rpipe_descriptor *descr, u16 index)
-{
-	ssize_t result;
-	struct device *dev = &wa->usb_iface->dev;
-
-	/* we cannot use the usb_get_descriptor() function because the
-	 * arguments are different.
-	 */
-	result = usb_control_msg(
-		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-		USB_REQ_SET_DESCRIPTOR,
-		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
-		USB_DT_RPIPE<<8, index, descr, sizeof(*descr),
-		USB_CTRL_SET_TIMEOUT);
-	if (result < 0) {
-		dev_err(dev, "rpipe %u: set descriptor failed: %d\n",
-			index, (int)result);
-		goto error;
-	}
-	if (result < sizeof(*descr)) {
-		dev_err(dev, "rpipe %u: sent short descriptor "
-			"(%zd vs %zd bytes required)\n",
-			index, result, sizeof(*descr));
-		result = -EINVAL;
-		goto error;
-	}
-	result = 0;
-
-error:
-	return result;
-
-}
-
-static void rpipe_init(struct wa_rpipe *rpipe)
-{
-	kref_init(&rpipe->refcnt);
-	spin_lock_init(&rpipe->seg_lock);
-	INIT_LIST_HEAD(&rpipe->seg_list);
-	INIT_LIST_HEAD(&rpipe->list_node);
-}
-
-static unsigned rpipe_get_idx(struct wahc *wa, unsigned rpipe_idx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&wa->rpipe_lock, flags);
-	rpipe_idx = find_next_zero_bit(wa->rpipe_bm, wa->rpipes, rpipe_idx);
-	if (rpipe_idx < wa->rpipes)
-		set_bit(rpipe_idx, wa->rpipe_bm);
-	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
-
-	return rpipe_idx;
-}
-
-static void rpipe_put_idx(struct wahc *wa, unsigned rpipe_idx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&wa->rpipe_lock, flags);
-	clear_bit(rpipe_idx, wa->rpipe_bm);
-	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
-}
-
-void rpipe_destroy(struct kref *_rpipe)
-{
-	struct wa_rpipe *rpipe = container_of(_rpipe, struct wa_rpipe, refcnt);
-	u8 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
-
-	if (rpipe->ep)
-		rpipe->ep->hcpriv = NULL;
-	rpipe_put_idx(rpipe->wa, index);
-	wa_put(rpipe->wa);
-	kfree(rpipe);
-}
-EXPORT_SYMBOL_GPL(rpipe_destroy);
-
-/*
- * Locate an idle rpipe, create an structure for it and return it
- *
- * @wa	  is referenced and unlocked
- * @crs   enum rpipe_attr, required endpoint characteristics
- *
- * The rpipe can be used only sequentially (not in parallel).
- *
- * The rpipe is moved into the "ready" state.
- */
-static int rpipe_get_idle(struct wa_rpipe **prpipe, struct wahc *wa, u8 crs,
-			  gfp_t gfp)
-{
-	int result;
-	unsigned rpipe_idx;
-	struct wa_rpipe *rpipe;
-	struct device *dev = &wa->usb_iface->dev;
-
-	rpipe = kzalloc(sizeof(*rpipe), gfp);
-	if (rpipe == NULL)
-		return -ENOMEM;
-	rpipe_init(rpipe);
-
-	/* Look for an idle pipe */
-	for (rpipe_idx = 0; rpipe_idx < wa->rpipes; rpipe_idx++) {
-		rpipe_idx = rpipe_get_idx(wa, rpipe_idx);
-		if (rpipe_idx >= wa->rpipes)	/* no more pipes :( */
-			break;
-		result =  __rpipe_get_descr(wa, &rpipe->descr, rpipe_idx);
-		if (result < 0)
-			dev_err(dev, "Can't get descriptor for rpipe %u: %d\n",
-				rpipe_idx, result);
-		else if ((rpipe->descr.bmCharacteristics & crs) != 0)
-			goto found;
-		rpipe_put_idx(wa, rpipe_idx);
-	}
-	*prpipe = NULL;
-	kfree(rpipe);
-	return -ENXIO;
-
-found:
-	set_bit(rpipe_idx, wa->rpipe_bm);
-	rpipe->wa = wa_get(wa);
-	*prpipe = rpipe;
-	return 0;
-}
-
-static int __rpipe_reset(struct wahc *wa, unsigned index)
-{
-	int result;
-	struct device *dev = &wa->usb_iface->dev;
-
-	result = usb_control_msg(
-		wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-		USB_REQ_RPIPE_RESET,
-		USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
-		0, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	if (result < 0)
-		dev_err(dev, "rpipe %u: reset failed: %d\n",
-			index, result);
-	return result;
-}
-
-/*
- * Fake companion descriptor for ep0
- *
- * See WUSB1.0[7.4.4], most of this is zero for bulk/int/ctl
- */
-static struct usb_wireless_ep_comp_descriptor epc0 = {
-	.bLength = sizeof(epc0),
-	.bDescriptorType = USB_DT_WIRELESS_ENDPOINT_COMP,
-	.bMaxBurst = 1,
-	.bMaxSequence = 2,
-};
-
-/*
- * Look for EP companion descriptor
- *
- * Get there, look for Inara in the endpoint's extra descriptors
- */
-static struct usb_wireless_ep_comp_descriptor *rpipe_epc_find(
-		struct device *dev, struct usb_host_endpoint *ep)
-{
-	void *itr;
-	size_t itr_size;
-	struct usb_descriptor_header *hdr;
-	struct usb_wireless_ep_comp_descriptor *epcd;
-
-	if (ep->desc.bEndpointAddress == 0) {
-		epcd = &epc0;
-		goto out;
-	}
-	itr = ep->extra;
-	itr_size = ep->extralen;
-	epcd = NULL;
-	while (itr_size > 0) {
-		if (itr_size < sizeof(*hdr)) {
-			dev_err(dev, "HW Bug? ep 0x%02x: extra descriptors "
-				"at offset %zu: only %zu bytes left\n",
-				ep->desc.bEndpointAddress,
-				itr - (void *) ep->extra, itr_size);
-			break;
-		}
-		hdr = itr;
-		if (hdr->bDescriptorType == USB_DT_WIRELESS_ENDPOINT_COMP) {
-			epcd = itr;
-			break;
-		}
-		if (hdr->bLength > itr_size) {
-			dev_err(dev, "HW Bug? ep 0x%02x: extra descriptor "
-				"at offset %zu (type 0x%02x) "
-				"length %d but only %zu bytes left\n",
-				ep->desc.bEndpointAddress,
-				itr - (void *) ep->extra, hdr->bDescriptorType,
-				hdr->bLength, itr_size);
-			break;
-		}
-		itr += hdr->bLength;
-		itr_size -= hdr->bLength;
-	}
-out:
-	return epcd;
-}
-
-/*
- * Aim an rpipe to its device & endpoint destination
- *
- * Make sure we change the address to unauthenticated if the device
- * is WUSB and it is not authenticated.
- */
-static int rpipe_aim(struct wa_rpipe *rpipe, struct wahc *wa,
-		     struct usb_host_endpoint *ep, struct urb *urb, gfp_t gfp)
-{
-	int result = -ENOMSG;	/* better code for lack of companion? */
-	struct device *dev = &wa->usb_iface->dev;
-	struct usb_device *usb_dev = urb->dev;
-	struct usb_wireless_ep_comp_descriptor *epcd;
-	u32 ack_window, epcd_max_sequence;
-	u8 unauth;
-
-	epcd = rpipe_epc_find(dev, ep);
-	if (epcd == NULL) {
-		dev_err(dev, "ep 0x%02x: can't find companion descriptor\n",
-			ep->desc.bEndpointAddress);
-		goto error;
-	}
-	unauth = usb_dev->wusb && !usb_dev->authenticated ? 0x80 : 0;
-	__rpipe_reset(wa, le16_to_cpu(rpipe->descr.wRPipeIndex));
-	atomic_set(&rpipe->segs_available,
-		le16_to_cpu(rpipe->descr.wRequests));
-	/* FIXME: block allocation system; request with queuing and timeout */
-	/* FIXME: compute so seg_size > ep->maxpktsize */
-	rpipe->descr.wBlocks = cpu_to_le16(16);		/* given */
-	/* ep0 maxpktsize is 0x200 (WUSB1.0[4.8.1]) */
-	if (usb_endpoint_xfer_isoc(&ep->desc))
-		rpipe->descr.wMaxPacketSize = epcd->wOverTheAirPacketSize;
-	else
-		rpipe->descr.wMaxPacketSize = ep->desc.wMaxPacketSize;
-
-	rpipe->descr.hwa_bMaxBurst = max(min_t(unsigned int,
-				epcd->bMaxBurst, 16U), 1U);
-	rpipe->descr.hwa_bDeviceInfoIndex =
-			wusb_port_no_to_idx(urb->dev->portnum);
-	/* FIXME: use maximum speed as supported or recommended by device */
-	rpipe->descr.bSpeed = usb_pipeendpoint(urb->pipe) == 0 ?
-		UWB_PHY_RATE_53 : UWB_PHY_RATE_200;
-
-	dev_dbg(dev, "addr %u (0x%02x) rpipe #%u ep# %u speed %d\n",
-		urb->dev->devnum, urb->dev->devnum | unauth,
-		le16_to_cpu(rpipe->descr.wRPipeIndex),
-		usb_pipeendpoint(urb->pipe), rpipe->descr.bSpeed);
-
-	rpipe->descr.hwa_reserved = 0;
-
-	rpipe->descr.bEndpointAddress = ep->desc.bEndpointAddress;
-	/* FIXME: bDataSequence */
-	rpipe->descr.bDataSequence = 0;
-
-	/* start with base window of hwa_bMaxBurst bits starting at 0. */
-	ack_window = 0xFFFFFFFF >> (32 - rpipe->descr.hwa_bMaxBurst);
-	rpipe->descr.dwCurrentWindow = cpu_to_le32(ack_window);
-	epcd_max_sequence = max(min_t(unsigned int,
-			epcd->bMaxSequence, 32U), 2U);
-	rpipe->descr.bMaxDataSequence = epcd_max_sequence - 1;
-	rpipe->descr.bInterval = ep->desc.bInterval;
-	if (usb_endpoint_xfer_isoc(&ep->desc))
-		rpipe->descr.bOverTheAirInterval = epcd->bOverTheAirInterval;
-	else
-		rpipe->descr.bOverTheAirInterval = 0;	/* 0 if not isoc */
-	/* FIXME: xmit power & preamble blah blah */
-	rpipe->descr.bmAttribute = (ep->desc.bmAttributes &
-					USB_ENDPOINT_XFERTYPE_MASK);
-	/* rpipe->descr.bmCharacteristics RO */
-	rpipe->descr.bmRetryOptions = (wa->wusb->retry_count & 0xF);
-	/* FIXME: use for assessing link quality? */
-	rpipe->descr.wNumTransactionErrors = 0;
-	result = __rpipe_set_descr(wa, &rpipe->descr,
-				   le16_to_cpu(rpipe->descr.wRPipeIndex));
-	if (result < 0) {
-		dev_err(dev, "Cannot aim rpipe: %d\n", result);
-		goto error;
-	}
-	result = 0;
-error:
-	return result;
-}
-
-/*
- * Check an aimed rpipe to make sure it points to where we want
- *
- * We use bit 19 of the Linux USB pipe bitmap for unauth vs auth
- * space; when it is like that, we or 0x80 to make an unauth address.
- */
-static int rpipe_check_aim(const struct wa_rpipe *rpipe, const struct wahc *wa,
-			   const struct usb_host_endpoint *ep,
-			   const struct urb *urb, gfp_t gfp)
-{
-	int result = 0;
-	struct device *dev = &wa->usb_iface->dev;
-	u8 portnum = wusb_port_no_to_idx(urb->dev->portnum);
-
-#define AIM_CHECK(rdf, val, text)					\
-	do {								\
-		if (rpipe->descr.rdf != (val)) {			\
-			dev_err(dev,					\
-				"rpipe aim discrepancy: " #rdf " " text "\n", \
-				rpipe->descr.rdf, (val));		\
-			result = -EINVAL;				\
-			WARN_ON(1);					\
-		}							\
-	} while (0)
-	AIM_CHECK(hwa_bDeviceInfoIndex, portnum, "(%u vs %u)");
-	AIM_CHECK(bSpeed, usb_pipeendpoint(urb->pipe) == 0 ?
-			UWB_PHY_RATE_53 : UWB_PHY_RATE_200,
-		  "(%u vs %u)");
-	AIM_CHECK(bEndpointAddress, ep->desc.bEndpointAddress, "(%u vs %u)");
-	AIM_CHECK(bInterval, ep->desc.bInterval, "(%u vs %u)");
-	AIM_CHECK(bmAttribute, ep->desc.bmAttributes & 0x03, "(%u vs %u)");
-#undef AIM_CHECK
-	return result;
-}
-
-#ifndef CONFIG_BUG
-#define CONFIG_BUG 0
-#endif
-
-/*
- * Make sure there is an rpipe allocated for an endpoint
- *
- * If already allocated, we just refcount it; if not, we get an
- * idle one, aim it to the right location and take it.
- *
- * Attaches to ep->hcpriv and rpipe->ep to ep.
- */
-int rpipe_get_by_ep(struct wahc *wa, struct usb_host_endpoint *ep,
-		    struct urb *urb, gfp_t gfp)
-{
-	int result = 0;
-	struct device *dev = &wa->usb_iface->dev;
-	struct wa_rpipe *rpipe;
-	u8 eptype;
-
-	mutex_lock(&wa->rpipe_mutex);
-	rpipe = ep->hcpriv;
-	if (rpipe != NULL) {
-		if (CONFIG_BUG == 1) {
-			result = rpipe_check_aim(rpipe, wa, ep, urb, gfp);
-			if (result < 0)
-				goto error;
-		}
-		__rpipe_get(rpipe);
-		dev_dbg(dev, "ep 0x%02x: reusing rpipe %u\n",
-			ep->desc.bEndpointAddress,
-			le16_to_cpu(rpipe->descr.wRPipeIndex));
-	} else {
-		/* hmm, assign idle rpipe, aim it */
-		result = -ENOBUFS;
-		eptype = ep->desc.bmAttributes & 0x03;
-		result = rpipe_get_idle(&rpipe, wa, 1 << eptype, gfp);
-		if (result < 0)
-			goto error;
-		result = rpipe_aim(rpipe, wa, ep, urb, gfp);
-		if (result < 0) {
-			rpipe_put(rpipe);
-			goto error;
-		}
-		ep->hcpriv = rpipe;
-		rpipe->ep = ep;
-		__rpipe_get(rpipe);	/* for caching into ep->hcpriv */
-		dev_dbg(dev, "ep 0x%02x: using rpipe %u\n",
-			ep->desc.bEndpointAddress,
-			le16_to_cpu(rpipe->descr.wRPipeIndex));
-	}
-error:
-	mutex_unlock(&wa->rpipe_mutex);
-	return result;
-}
-
-/*
- * Allocate the bitmap for each rpipe.
- */
-int wa_rpipes_create(struct wahc *wa)
-{
-	wa->rpipes = le16_to_cpu(wa->wa_descr->wNumRPipes);
-	wa->rpipe_bm = bitmap_zalloc(wa->rpipes, GFP_KERNEL);
-	if (wa->rpipe_bm == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-void wa_rpipes_destroy(struct wahc *wa)
-{
-	struct device *dev = &wa->usb_iface->dev;
-
-	if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) {
-		WARN_ON(1);
-		dev_err(dev, "BUG: pipes not released on exit: %*pb\n",
-			wa->rpipes, wa->rpipe_bm);
-	}
-	bitmap_free(wa->rpipe_bm);
-}
-
-/*
- * Release resources allocated for an endpoint
- *
- * If there is an associated rpipe to this endpoint, Abort any pending
- * transfers and put it. If the rpipe ends up being destroyed,
- * __rpipe_destroy() will cleanup ep->hcpriv.
- *
- * This is called before calling hcd->stop(), so you don't need to do
- * anything else in there.
- */
-void rpipe_ep_disable(struct wahc *wa, struct usb_host_endpoint *ep)
-{
-	struct wa_rpipe *rpipe;
-
-	mutex_lock(&wa->rpipe_mutex);
-	rpipe = ep->hcpriv;
-	if (rpipe != NULL) {
-		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
-
-		usb_control_msg(
-			wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			USB_REQ_RPIPE_ABORT,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
-			0, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
-		rpipe_put(rpipe);
-	}
-	mutex_unlock(&wa->rpipe_mutex);
-}
-EXPORT_SYMBOL_GPL(rpipe_ep_disable);
-
-/* Clear the stalled status of an RPIPE. */
-void rpipe_clear_feature_stalled(struct wahc *wa, struct usb_host_endpoint *ep)
-{
-	struct wa_rpipe *rpipe;
-
-	mutex_lock(&wa->rpipe_mutex);
-	rpipe = ep->hcpriv;
-	if (rpipe != NULL) {
-		u16 index = le16_to_cpu(rpipe->descr.wRPipeIndex);
-
-		usb_control_msg(
-			wa->usb_dev, usb_sndctrlpipe(wa->usb_dev, 0),
-			USB_REQ_CLEAR_FEATURE,
-			USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_RPIPE,
-			RPIPE_STALL, index, NULL, 0, USB_CTRL_SET_TIMEOUT);
-	}
-	mutex_unlock(&wa->rpipe_mutex);
-}
-EXPORT_SYMBOL_GPL(rpipe_clear_feature_stalled);
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
deleted file mode 100644
index abf88cea37bb..000000000000
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ /dev/null
@@ -1,2927 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * WUSB Wire Adapter
- * Data transfer and URB enqueing
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * How transfers work: get a buffer, break it up in segments (segment
- * size is a multiple of the maxpacket size). For each segment issue a
- * segment request (struct wa_xfer_*), then send the data buffer if
- * out or nothing if in (all over the DTO endpoint).
- *
- * For each submitted segment request, a notification will come over
- * the NEP endpoint and a transfer result (struct xfer_result) will
- * arrive in the DTI URB. Read it, get the xfer ID, see if there is
- * data coming (inbound transfer), schedule a read and handle it.
- *
- * Sounds simple, it is a pain to implement.
- *
- *
- * ENTRY POINTS
- *
- *   FIXME
- *
- * LIFE CYCLE / STATE DIAGRAM
- *
- *   FIXME
- *
- * THIS CODE IS DISGUSTING
- *
- *   Warned you are; it's my second try and still not happy with it.
- *
- * NOTES:
- *
- *   - No iso
- *
- *   - Supports DMA xfers, control, bulk and maybe interrupt
- *
- *   - Does not recycle unused rpipes
- *
- *     An rpipe is assigned to an endpoint the first time it is used,
- *     and then it's there, assigned, until the endpoint is disabled
- *     (destroyed [{h,d}wahc_op_ep_disable()]. The assignment of the
- *     rpipe to the endpoint is done under the wa->rpipe_sem semaphore
- *     (should be a mutex).
- *
- *     Two methods it could be done:
- *
- *     (a) set up a timer every time an rpipe's use count drops to 1
- *         (which means unused) or when a transfer ends. Reset the
- *         timer when a xfer is queued. If the timer expires, release
- *         the rpipe [see rpipe_ep_disable()].
- *
- *     (b) when looking for free rpipes to attach [rpipe_get_by_ep()],
- *         when none are found go over the list, check their endpoint
- *         and their activity record (if no last-xfer-done-ts in the
- *         last x seconds) take it
- *
- *     However, due to the fact that we have a set of limited
- *     resources (max-segments-at-the-same-time per xfer,
- *     xfers-per-ripe, blocks-per-rpipe, rpipes-per-host), at the end
- *     we are going to have to rebuild all this based on an scheduler,
- *     to where we have a list of transactions to do and based on the
- *     availability of the different required components (blocks,
- *     rpipes, segment slots, etc), we go scheduling them. Painful.
- */
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/hash.h>
-#include <linux/ratelimit.h>
-#include <linux/export.h>
-#include <linux/scatterlist.h>
-
-#include "wa-hc.h"
-#include "wusbhc.h"
-
-enum {
-	/* [WUSB] section 8.3.3 allocates 7 bits for the segment index. */
-	WA_SEGS_MAX = 128,
-};
-
-enum wa_seg_status {
-	WA_SEG_NOTREADY,
-	WA_SEG_READY,
-	WA_SEG_DELAYED,
-	WA_SEG_SUBMITTED,
-	WA_SEG_PENDING,
-	WA_SEG_DTI_PENDING,
-	WA_SEG_DONE,
-	WA_SEG_ERROR,
-	WA_SEG_ABORTED,
-};
-
-static void wa_xfer_delayed_run(struct wa_rpipe *);
-static int __wa_xfer_delayed_run(struct wa_rpipe *rpipe, int *dto_waiting);
-
-/*
- * Life cycle governed by 'struct urb' (the refcount of the struct is
- * that of the 'struct urb' and usb_free_urb() would free the whole
- * struct).
- */
-struct wa_seg {
-	struct urb tr_urb;		/* transfer request urb. */
-	struct urb *isoc_pack_desc_urb;	/* for isoc packet descriptor. */
-	struct urb *dto_urb;		/* for data output. */
-	struct list_head list_node;	/* for rpipe->req_list */
-	struct wa_xfer *xfer;		/* out xfer */
-	u8 index;			/* which segment we are */
-	int isoc_frame_count;	/* number of isoc frames in this segment. */
-	int isoc_frame_offset;	/* starting frame offset in the xfer URB. */
-	/* Isoc frame that the current transfer buffer corresponds to. */
-	int isoc_frame_index;
-	int isoc_size;	/* size of all isoc frames sent by this seg. */
-	enum wa_seg_status status;
-	ssize_t result;			/* bytes xfered or error */
-	struct wa_xfer_hdr xfer_hdr;
-};
-
-static inline void wa_seg_init(struct wa_seg *seg)
-{
-	usb_init_urb(&seg->tr_urb);
-
-	/* set the remaining memory to 0. */
-	memset(((void *)seg) + sizeof(seg->tr_urb), 0,
-		sizeof(*seg) - sizeof(seg->tr_urb));
-}
-
-/*
- * Protected by xfer->lock
- *
- */
-struct wa_xfer {
-	struct kref refcnt;
-	struct list_head list_node;
-	spinlock_t lock;
-	u32 id;
-
-	struct wahc *wa;		/* Wire adapter we are plugged to */
-	struct usb_host_endpoint *ep;
-	struct urb *urb;		/* URB we are transferring for */
-	struct wa_seg **seg;		/* transfer segments */
-	u8 segs, segs_submitted, segs_done;
-	unsigned is_inbound:1;
-	unsigned is_dma:1;
-	size_t seg_size;
-	int result;
-
-	gfp_t gfp;			/* allocation mask */
-
-	struct wusb_dev *wusb_dev;	/* for activity timestamps */
-};
-
-static void __wa_populate_dto_urb_isoc(struct wa_xfer *xfer,
-	struct wa_seg *seg, int curr_iso_frame);
-static void wa_complete_remaining_xfer_segs(struct wa_xfer *xfer,
-		int starting_index, enum wa_seg_status status);
-
-static inline void wa_xfer_init(struct wa_xfer *xfer)
-{
-	kref_init(&xfer->refcnt);
-	INIT_LIST_HEAD(&xfer->list_node);
-	spin_lock_init(&xfer->lock);
-}
-
-/*
- * Destroy a transfer structure
- *
- * Note that freeing xfer->seg[cnt]->tr_urb will free the containing
- * xfer->seg[cnt] memory that was allocated by __wa_xfer_setup_segs.
- */
-static void wa_xfer_destroy(struct kref *_xfer)
-{
-	struct wa_xfer *xfer = container_of(_xfer, struct wa_xfer, refcnt);
-	if (xfer->seg) {
-		unsigned cnt;
-		for (cnt = 0; cnt < xfer->segs; cnt++) {
-			struct wa_seg *seg = xfer->seg[cnt];
-			if (seg) {
-				usb_free_urb(seg->isoc_pack_desc_urb);
-				if (seg->dto_urb) {
-					kfree(seg->dto_urb->sg);
-					usb_free_urb(seg->dto_urb);
-				}
-				usb_free_urb(&seg->tr_urb);
-			}
-		}
-		kfree(xfer->seg);
-	}
-	kfree(xfer);
-}
-
-static void wa_xfer_get(struct wa_xfer *xfer)
-{
-	kref_get(&xfer->refcnt);
-}
-
-static void wa_xfer_put(struct wa_xfer *xfer)
-{
-	kref_put(&xfer->refcnt, wa_xfer_destroy);
-}
-
-/*
- * Try to get exclusive access to the DTO endpoint resource.  Return true
- * if successful.
- */
-static inline int __wa_dto_try_get(struct wahc *wa)
-{
-	return (test_and_set_bit(0, &wa->dto_in_use) == 0);
-}
-
-/* Release the DTO endpoint resource. */
-static inline void __wa_dto_put(struct wahc *wa)
-{
-	clear_bit_unlock(0, &wa->dto_in_use);
-}
-
-/* Service RPIPEs that are waiting on the DTO resource. */
-static void wa_check_for_delayed_rpipes(struct wahc *wa)
-{
-	unsigned long flags;
-	int dto_waiting = 0;
-	struct wa_rpipe *rpipe;
-
-	spin_lock_irqsave(&wa->rpipe_lock, flags);
-	while (!list_empty(&wa->rpipe_delayed_list) && !dto_waiting) {
-		rpipe = list_first_entry(&wa->rpipe_delayed_list,
-				struct wa_rpipe, list_node);
-		__wa_xfer_delayed_run(rpipe, &dto_waiting);
-		/* remove this RPIPE from the list if it is not waiting. */
-		if (!dto_waiting) {
-			pr_debug("%s: RPIPE %d serviced and removed from delayed list.\n",
-				__func__,
-				le16_to_cpu(rpipe->descr.wRPipeIndex));
-			list_del_init(&rpipe->list_node);
-		}
-	}
-	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
-}
-
-/* add this RPIPE to the end of the delayed RPIPE list. */
-static void wa_add_delayed_rpipe(struct wahc *wa, struct wa_rpipe *rpipe)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&wa->rpipe_lock, flags);
-	/* add rpipe to the list if it is not already on it. */
-	if (list_empty(&rpipe->list_node)) {
-		pr_debug("%s: adding RPIPE %d to the delayed list.\n",
-			__func__, le16_to_cpu(rpipe->descr.wRPipeIndex));
-		list_add_tail(&rpipe->list_node, &wa->rpipe_delayed_list);
-	}
-	spin_unlock_irqrestore(&wa->rpipe_lock, flags);
-}
-
-/*
- * xfer is referenced
- *
- * xfer->lock has to be unlocked
- *
- * We take xfer->lock for setting the result; this is a barrier
- * against drivers/usb/core/hcd.c:unlink1() being called after we call
- * usb_hcd_giveback_urb() and wa_urb_dequeue() trying to get a
- * reference to the transfer.
- */
-static void wa_xfer_giveback(struct wa_xfer *xfer)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&xfer->wa->xfer_list_lock, flags);
-	list_del_init(&xfer->list_node);
-	usb_hcd_unlink_urb_from_ep(&(xfer->wa->wusb->usb_hcd), xfer->urb);
-	spin_unlock_irqrestore(&xfer->wa->xfer_list_lock, flags);
-	/* FIXME: segmentation broken -- kills DWA */
-	wusbhc_giveback_urb(xfer->wa->wusb, xfer->urb, xfer->result);
-	wa_put(xfer->wa);
-	wa_xfer_put(xfer);
-}
-
-/*
- * xfer is referenced
- *
- * xfer->lock has to be unlocked
- */
-static void wa_xfer_completion(struct wa_xfer *xfer)
-{
-	if (xfer->wusb_dev)
-		wusb_dev_put(xfer->wusb_dev);
-	rpipe_put(xfer->ep->hcpriv);
-	wa_xfer_giveback(xfer);
-}
-
-/*
- * Initialize a transfer's ID
- *
- * We need to use a sequential number; if we use the pointer or the
- * hash of the pointer, it can repeat over sequential transfers and
- * then it will confuse the HWA....wonder why in hell they put a 32
- * bit handle in there then.
- */
-static void wa_xfer_id_init(struct wa_xfer *xfer)
-{
-	xfer->id = atomic_add_return(1, &xfer->wa->xfer_id_count);
-}
-
-/* Return the xfer's ID. */
-static inline u32 wa_xfer_id(struct wa_xfer *xfer)
-{
-	return xfer->id;
-}
-
-/* Return the xfer's ID in transport format (little endian). */
-static inline __le32 wa_xfer_id_le32(struct wa_xfer *xfer)
-{
-	return cpu_to_le32(xfer->id);
-}
-
-/*
- * If transfer is done, wrap it up and return true
- *
- * xfer->lock has to be locked
- */
-static unsigned __wa_xfer_is_done(struct wa_xfer *xfer)
-{
-	struct device *dev = &xfer->wa->usb_iface->dev;
-	unsigned result, cnt;
-	struct wa_seg *seg;
-	struct urb *urb = xfer->urb;
-	unsigned found_short = 0;
-
-	result = xfer->segs_done == xfer->segs_submitted;
-	if (result == 0)
-		goto out;
-	urb->actual_length = 0;
-	for (cnt = 0; cnt < xfer->segs; cnt++) {
-		seg = xfer->seg[cnt];
-		switch (seg->status) {
-		case WA_SEG_DONE:
-			if (found_short && seg->result > 0) {
-				dev_dbg(dev, "xfer %p ID %08X#%u: bad short segments (%zu)\n",
-					xfer, wa_xfer_id(xfer), cnt,
-					seg->result);
-				urb->status = -EINVAL;
-				goto out;
-			}
-			urb->actual_length += seg->result;
-			if (!(usb_pipeisoc(xfer->urb->pipe))
-				&& seg->result < xfer->seg_size
-			    && cnt != xfer->segs-1)
-				found_short = 1;
-			dev_dbg(dev, "xfer %p ID %08X#%u: DONE short %d "
-				"result %zu urb->actual_length %d\n",
-				xfer, wa_xfer_id(xfer), seg->index, found_short,
-				seg->result, urb->actual_length);
-			break;
-		case WA_SEG_ERROR:
-			xfer->result = seg->result;
-			dev_dbg(dev, "xfer %p ID %08X#%u: ERROR result %zi(0x%08zX)\n",
-				xfer, wa_xfer_id(xfer), seg->index, seg->result,
-				seg->result);
-			goto out;
-		case WA_SEG_ABORTED:
-			xfer->result = seg->result;
-			dev_dbg(dev, "xfer %p ID %08X#%u: ABORTED result %zi(0x%08zX)\n",
-				xfer, wa_xfer_id(xfer), seg->index, seg->result,
-				seg->result);
-			goto out;
-		default:
-			dev_warn(dev, "xfer %p ID %08X#%u: is_done bad state %d\n",
-				 xfer, wa_xfer_id(xfer), cnt, seg->status);
-			xfer->result = -EINVAL;
-			goto out;
-		}
-	}
-	xfer->result = 0;
-out:
-	return result;
-}
-
-/*
- * Mark the given segment as done.  Return true if this completes the xfer.
- * This should only be called for segs that have been submitted to an RPIPE.
- * Delayed segs are not marked as submitted so they do not need to be marked
- * as done when cleaning up.
- *
- * xfer->lock has to be locked
- */
-static unsigned __wa_xfer_mark_seg_as_done(struct wa_xfer *xfer,
-	struct wa_seg *seg, enum wa_seg_status status)
-{
-	seg->status = status;
-	xfer->segs_done++;
-
-	/* check for done. */
-	return __wa_xfer_is_done(xfer);
-}
-
-/*
- * Search for a transfer list ID on the HCD's URB list
- *
- * For 32 bit architectures, we use the pointer itself; for 64 bits, a
- * 32-bit hash of the pointer.
- *
- * @returns NULL if not found.
- */
-static struct wa_xfer *wa_xfer_get_by_id(struct wahc *wa, u32 id)
-{
-	unsigned long flags;
-	struct wa_xfer *xfer_itr;
-	spin_lock_irqsave(&wa->xfer_list_lock, flags);
-	list_for_each_entry(xfer_itr, &wa->xfer_list, list_node) {
-		if (id == xfer_itr->id) {
-			wa_xfer_get(xfer_itr);
-			goto out;
-		}
-	}
-	xfer_itr = NULL;
-out:
-	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
-	return xfer_itr;
-}
-
-struct wa_xfer_abort_buffer {
-	struct urb urb;
-	struct wahc *wa;
-	struct wa_xfer_abort cmd;
-};
-
-static void __wa_xfer_abort_cb(struct urb *urb)
-{
-	struct wa_xfer_abort_buffer *b = urb->context;
-	struct wahc *wa = b->wa;
-
-	/*
-	 * If the abort request URB failed, then the HWA did not get the abort
-	 * command.  Forcibly clean up the xfer without waiting for a Transfer
-	 * Result from the HWA.
-	 */
-	if (urb->status < 0) {
-		struct wa_xfer *xfer;
-		struct device *dev = &wa->usb_iface->dev;
-
-		xfer = wa_xfer_get_by_id(wa, le32_to_cpu(b->cmd.dwTransferID));
-		dev_err(dev, "%s: Transfer Abort request failed. result: %d\n",
-			__func__, urb->status);
-		if (xfer) {
-			unsigned long flags;
-			int done, seg_index = 0;
-			struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-
-			dev_err(dev, "%s: cleaning up xfer %p ID 0x%08X.\n",
-				__func__, xfer, wa_xfer_id(xfer));
-			spin_lock_irqsave(&xfer->lock, flags);
-			/* skip done segs. */
-			while (seg_index < xfer->segs) {
-				struct wa_seg *seg = xfer->seg[seg_index];
-
-				if ((seg->status == WA_SEG_DONE) ||
-					(seg->status == WA_SEG_ERROR)) {
-					++seg_index;
-				} else {
-					break;
-				}
-			}
-			/* mark remaining segs as aborted. */
-			wa_complete_remaining_xfer_segs(xfer, seg_index,
-				WA_SEG_ABORTED);
-			done = __wa_xfer_is_done(xfer);
-			spin_unlock_irqrestore(&xfer->lock, flags);
-			if (done)
-				wa_xfer_completion(xfer);
-			wa_xfer_delayed_run(rpipe);
-			wa_xfer_put(xfer);
-		} else {
-			dev_err(dev, "%s: xfer ID 0x%08X already gone.\n",
-				 __func__, le32_to_cpu(b->cmd.dwTransferID));
-		}
-	}
-
-	wa_put(wa);	/* taken in __wa_xfer_abort */
-	usb_put_urb(&b->urb);
-}
-
-/*
- * Aborts an ongoing transaction
- *
- * Assumes the transfer is referenced and locked and in a submitted
- * state (mainly that there is an endpoint/rpipe assigned).
- *
- * The callback (see above) does nothing but freeing up the data by
- * putting the URB. Because the URB is allocated at the head of the
- * struct, the whole space we allocated is kfreed. *
- */
-static int __wa_xfer_abort(struct wa_xfer *xfer)
-{
-	int result = -ENOMEM;
-	struct device *dev = &xfer->wa->usb_iface->dev;
-	struct wa_xfer_abort_buffer *b;
-	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-
-	b = kmalloc(sizeof(*b), GFP_ATOMIC);
-	if (b == NULL)
-		goto error_kmalloc;
-	b->cmd.bLength =  sizeof(b->cmd);
-	b->cmd.bRequestType = WA_XFER_ABORT;
-	b->cmd.wRPipe = rpipe->descr.wRPipeIndex;
-	b->cmd.dwTransferID = wa_xfer_id_le32(xfer);
-	b->wa = wa_get(xfer->wa);
-
-	usb_init_urb(&b->urb);
-	usb_fill_bulk_urb(&b->urb, xfer->wa->usb_dev,
-		usb_sndbulkpipe(xfer->wa->usb_dev,
-				xfer->wa->dto_epd->bEndpointAddress),
-		&b->cmd, sizeof(b->cmd), __wa_xfer_abort_cb, b);
-	result = usb_submit_urb(&b->urb, GFP_ATOMIC);
-	if (result < 0)
-		goto error_submit;
-	return result;				/* callback frees! */
-
-
-error_submit:
-	wa_put(xfer->wa);
-	if (printk_ratelimit())
-		dev_err(dev, "xfer %p: Can't submit abort request: %d\n",
-			xfer, result);
-	kfree(b);
-error_kmalloc:
-	return result;
-
-}
-
-/*
- * Calculate the number of isoc frames starting from isoc_frame_offset
- * that will fit a in transfer segment.
- */
-static int __wa_seg_calculate_isoc_frame_count(struct wa_xfer *xfer,
-	int isoc_frame_offset, int *total_size)
-{
-	int segment_size = 0, frame_count = 0;
-	int index = isoc_frame_offset;
-	struct usb_iso_packet_descriptor *iso_frame_desc =
-		xfer->urb->iso_frame_desc;
-
-	while ((index < xfer->urb->number_of_packets)
-		&& ((segment_size + iso_frame_desc[index].length)
-				<= xfer->seg_size)) {
-		/*
-		 * For Alereon HWA devices, only include an isoc frame in an
-		 * out segment if it is physically contiguous with the previous
-		 * frame.  This is required because those devices expect
-		 * the isoc frames to be sent as a single USB transaction as
-		 * opposed to one transaction per frame with standard HWA.
-		 */
-		if ((xfer->wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
-			&& (xfer->is_inbound == 0)
-			&& (index > isoc_frame_offset)
-			&& ((iso_frame_desc[index - 1].offset +
-				iso_frame_desc[index - 1].length) !=
-				iso_frame_desc[index].offset))
-			break;
-
-		/* this frame fits. count it. */
-		++frame_count;
-		segment_size += iso_frame_desc[index].length;
-
-		/* move to the next isoc frame. */
-		++index;
-	}
-
-	*total_size = segment_size;
-	return frame_count;
-}
-
-/*
- *
- * @returns < 0 on error, transfer segment request size if ok
- */
-static ssize_t __wa_xfer_setup_sizes(struct wa_xfer *xfer,
-				     enum wa_xfer_type *pxfer_type)
-{
-	ssize_t result;
-	struct device *dev = &xfer->wa->usb_iface->dev;
-	size_t maxpktsize;
-	struct urb *urb = xfer->urb;
-	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-
-	switch (rpipe->descr.bmAttribute & 0x3) {
-	case USB_ENDPOINT_XFER_CONTROL:
-		*pxfer_type = WA_XFER_TYPE_CTL;
-		result = sizeof(struct wa_xfer_ctl);
-		break;
-	case USB_ENDPOINT_XFER_INT:
-	case USB_ENDPOINT_XFER_BULK:
-		*pxfer_type = WA_XFER_TYPE_BI;
-		result = sizeof(struct wa_xfer_bi);
-		break;
-	case USB_ENDPOINT_XFER_ISOC:
-		*pxfer_type = WA_XFER_TYPE_ISO;
-		result = sizeof(struct wa_xfer_hwaiso);
-		break;
-	default:
-		/* never happens */
-		BUG();
-		result = -EINVAL;	/* shut gcc up */
-	}
-	xfer->is_inbound = urb->pipe & USB_DIR_IN ? 1 : 0;
-	xfer->is_dma = urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? 1 : 0;
-
-	maxpktsize = le16_to_cpu(rpipe->descr.wMaxPacketSize);
-	xfer->seg_size = le16_to_cpu(rpipe->descr.wBlocks)
-		* 1 << (xfer->wa->wa_descr->bRPipeBlockSize - 1);
-	/* Compute the segment size and make sure it is a multiple of
-	 * the maxpktsize (WUSB1.0[8.3.3.1])...not really too much of
-	 * a check (FIXME) */
-	if (xfer->seg_size < maxpktsize) {
-		dev_err(dev,
-			"HW BUG? seg_size %zu smaller than maxpktsize %zu\n",
-			xfer->seg_size, maxpktsize);
-		result = -EINVAL;
-		goto error;
-	}
-	xfer->seg_size = (xfer->seg_size / maxpktsize) * maxpktsize;
-	if ((rpipe->descr.bmAttribute & 0x3) == USB_ENDPOINT_XFER_ISOC) {
-		int index = 0;
-
-		xfer->segs = 0;
-		/*
-		 * loop over urb->number_of_packets to determine how many
-		 * xfer segments will be needed to send the isoc frames.
-		 */
-		while (index < urb->number_of_packets) {
-			int seg_size; /* don't care. */
-			index += __wa_seg_calculate_isoc_frame_count(xfer,
-					index, &seg_size);
-			++xfer->segs;
-		}
-	} else {
-		xfer->segs = DIV_ROUND_UP(urb->transfer_buffer_length,
-						xfer->seg_size);
-		if (xfer->segs == 0 && *pxfer_type == WA_XFER_TYPE_CTL)
-			xfer->segs = 1;
-	}
-
-	if (xfer->segs > WA_SEGS_MAX) {
-		dev_err(dev, "BUG? oops, number of segments %zu bigger than %d\n",
-			(urb->transfer_buffer_length/xfer->seg_size),
-			WA_SEGS_MAX);
-		result = -EINVAL;
-		goto error;
-	}
-error:
-	return result;
-}
-
-static void __wa_setup_isoc_packet_descr(
-		struct wa_xfer_packet_info_hwaiso *packet_desc,
-		struct wa_xfer *xfer,
-		struct wa_seg *seg) {
-	struct usb_iso_packet_descriptor *iso_frame_desc =
-		xfer->urb->iso_frame_desc;
-	int frame_index;
-
-	/* populate isoc packet descriptor. */
-	packet_desc->bPacketType = WA_XFER_ISO_PACKET_INFO;
-	packet_desc->wLength = cpu_to_le16(struct_size(packet_desc,
-					   PacketLength,
-					   seg->isoc_frame_count));
-	for (frame_index = 0; frame_index < seg->isoc_frame_count;
-		++frame_index) {
-		int offset_index = frame_index + seg->isoc_frame_offset;
-		packet_desc->PacketLength[frame_index] =
-			cpu_to_le16(iso_frame_desc[offset_index].length);
-	}
-}
-
-
-/* Fill in the common request header and xfer-type specific data. */
-static void __wa_xfer_setup_hdr0(struct wa_xfer *xfer,
-				 struct wa_xfer_hdr *xfer_hdr0,
-				 enum wa_xfer_type xfer_type,
-				 size_t xfer_hdr_size)
-{
-	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-	struct wa_seg *seg = xfer->seg[0];
-
-	xfer_hdr0 = &seg->xfer_hdr;
-	xfer_hdr0->bLength = xfer_hdr_size;
-	xfer_hdr0->bRequestType = xfer_type;
-	xfer_hdr0->wRPipe = rpipe->descr.wRPipeIndex;
-	xfer_hdr0->dwTransferID = wa_xfer_id_le32(xfer);
-	xfer_hdr0->bTransferSegment = 0;
-	switch (xfer_type) {
-	case WA_XFER_TYPE_CTL: {
-		struct wa_xfer_ctl *xfer_ctl =
-			container_of(xfer_hdr0, struct wa_xfer_ctl, hdr);
-		xfer_ctl->bmAttribute = xfer->is_inbound ? 1 : 0;
-		memcpy(&xfer_ctl->baSetupData, xfer->urb->setup_packet,
-		       sizeof(xfer_ctl->baSetupData));
-		break;
-	}
-	case WA_XFER_TYPE_BI:
-		break;
-	case WA_XFER_TYPE_ISO: {
-		struct wa_xfer_hwaiso *xfer_iso =
-			container_of(xfer_hdr0, struct wa_xfer_hwaiso, hdr);
-		struct wa_xfer_packet_info_hwaiso *packet_desc =
-			((void *)xfer_iso) + xfer_hdr_size;
-
-		/* populate the isoc section of the transfer request. */
-		xfer_iso->dwNumOfPackets = cpu_to_le32(seg->isoc_frame_count);
-		/* populate isoc packet descriptor. */
-		__wa_setup_isoc_packet_descr(packet_desc, xfer, seg);
-		break;
-	}
-	default:
-		BUG();
-	};
-}
-
-/*
- * Callback for the OUT data phase of the segment request
- *
- * Check wa_seg_tr_cb(); most comments also apply here because this
- * function does almost the same thing and they work closely
- * together.
- *
- * If the seg request has failed but this DTO phase has succeeded,
- * wa_seg_tr_cb() has already failed the segment and moved the
- * status to WA_SEG_ERROR, so this will go through 'case 0' and
- * effectively do nothing.
- */
-static void wa_seg_dto_cb(struct urb *urb)
-{
-	struct wa_seg *seg = urb->context;
-	struct wa_xfer *xfer = seg->xfer;
-	struct wahc *wa;
-	struct device *dev;
-	struct wa_rpipe *rpipe;
-	unsigned long flags;
-	unsigned rpipe_ready = 0;
-	int data_send_done = 1, release_dto = 0, holding_dto = 0;
-	u8 done = 0;
-	int result;
-
-	/* free the sg if it was used. */
-	kfree(urb->sg);
-	urb->sg = NULL;
-
-	spin_lock_irqsave(&xfer->lock, flags);
-	wa = xfer->wa;
-	dev = &wa->usb_iface->dev;
-	if (usb_pipeisoc(xfer->urb->pipe)) {
-		/* Alereon HWA sends all isoc frames in a single transfer. */
-		if (wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
-			seg->isoc_frame_index += seg->isoc_frame_count;
-		else
-			seg->isoc_frame_index += 1;
-		if (seg->isoc_frame_index < seg->isoc_frame_count) {
-			data_send_done = 0;
-			holding_dto = 1; /* checked in error cases. */
-			/*
-			 * if this is the last isoc frame of the segment, we
-			 * can release DTO after sending this frame.
-			 */
-			if ((seg->isoc_frame_index + 1) >=
-				seg->isoc_frame_count)
-				release_dto = 1;
-		}
-		dev_dbg(dev, "xfer 0x%08X#%u: isoc frame = %d, holding_dto = %d, release_dto = %d.\n",
-			wa_xfer_id(xfer), seg->index, seg->isoc_frame_index,
-			holding_dto, release_dto);
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-
-	switch (urb->status) {
-	case 0:
-		spin_lock_irqsave(&xfer->lock, flags);
-		seg->result += urb->actual_length;
-		if (data_send_done) {
-			dev_dbg(dev, "xfer 0x%08X#%u: data out done (%zu bytes)\n",
-				wa_xfer_id(xfer), seg->index, seg->result);
-			if (seg->status < WA_SEG_PENDING)
-				seg->status = WA_SEG_PENDING;
-		} else {
-			/* should only hit this for isoc xfers. */
-			/*
-			 * Populate the dto URB with the next isoc frame buffer,
-			 * send the URB and release DTO if we no longer need it.
-			 */
-			 __wa_populate_dto_urb_isoc(xfer, seg,
-				seg->isoc_frame_offset + seg->isoc_frame_index);
-
-			/* resubmit the URB with the next isoc frame. */
-			/* take a ref on resubmit. */
-			wa_xfer_get(xfer);
-			result = usb_submit_urb(seg->dto_urb, GFP_ATOMIC);
-			if (result < 0) {
-				dev_err(dev, "xfer 0x%08X#%u: DTO submit failed: %d\n",
-				       wa_xfer_id(xfer), seg->index, result);
-				spin_unlock_irqrestore(&xfer->lock, flags);
-				goto error_dto_submit;
-			}
-		}
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (release_dto) {
-			__wa_dto_put(wa);
-			wa_check_for_delayed_rpipes(wa);
-		}
-		break;
-	case -ECONNRESET:	/* URB unlinked; no need to do anything */
-	case -ENOENT:		/* as it was done by the who unlinked us */
-		if (holding_dto) {
-			__wa_dto_put(wa);
-			wa_check_for_delayed_rpipes(wa);
-		}
-		break;
-	default:		/* Other errors ... */
-		dev_err(dev, "xfer 0x%08X#%u: data out error %d\n",
-			wa_xfer_id(xfer), seg->index, urb->status);
-		goto error_default;
-	}
-
-	/* taken when this URB was submitted. */
-	wa_xfer_put(xfer);
-	return;
-
-error_dto_submit:
-	/* taken on resubmit attempt. */
-	wa_xfer_put(xfer);
-error_default:
-	spin_lock_irqsave(&xfer->lock, flags);
-	rpipe = xfer->ep->hcpriv;
-	if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
-		    EDC_ERROR_TIMEFRAME)){
-		dev_err(dev, "DTO: URB max acceptable errors exceeded, resetting device\n");
-		wa_reset_all(wa);
-	}
-	if (seg->status != WA_SEG_ERROR) {
-		seg->result = urb->status;
-		__wa_xfer_abort(xfer);
-		rpipe_ready = rpipe_avail_inc(rpipe);
-		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_ERROR);
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	if (holding_dto) {
-		__wa_dto_put(wa);
-		wa_check_for_delayed_rpipes(wa);
-	}
-	if (done)
-		wa_xfer_completion(xfer);
-	if (rpipe_ready)
-		wa_xfer_delayed_run(rpipe);
-	/* taken when this URB was submitted. */
-	wa_xfer_put(xfer);
-}
-
-/*
- * Callback for the isoc packet descriptor phase of the segment request
- *
- * Check wa_seg_tr_cb(); most comments also apply here because this
- * function does almost the same thing and they work closely
- * together.
- *
- * If the seg request has failed but this phase has succeeded,
- * wa_seg_tr_cb() has already failed the segment and moved the
- * status to WA_SEG_ERROR, so this will go through 'case 0' and
- * effectively do nothing.
- */
-static void wa_seg_iso_pack_desc_cb(struct urb *urb)
-{
-	struct wa_seg *seg = urb->context;
-	struct wa_xfer *xfer = seg->xfer;
-	struct wahc *wa;
-	struct device *dev;
-	struct wa_rpipe *rpipe;
-	unsigned long flags;
-	unsigned rpipe_ready = 0;
-	u8 done = 0;
-
-	switch (urb->status) {
-	case 0:
-		spin_lock_irqsave(&xfer->lock, flags);
-		wa = xfer->wa;
-		dev = &wa->usb_iface->dev;
-		dev_dbg(dev, "iso xfer %08X#%u: packet descriptor done\n",
-			wa_xfer_id(xfer), seg->index);
-		if (xfer->is_inbound && seg->status < WA_SEG_PENDING)
-			seg->status = WA_SEG_PENDING;
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		break;
-	case -ECONNRESET:	/* URB unlinked; no need to do anything */
-	case -ENOENT:		/* as it was done by the who unlinked us */
-		break;
-	default:		/* Other errors ... */
-		spin_lock_irqsave(&xfer->lock, flags);
-		wa = xfer->wa;
-		dev = &wa->usb_iface->dev;
-		rpipe = xfer->ep->hcpriv;
-		pr_err_ratelimited("iso xfer %08X#%u: packet descriptor error %d\n",
-				wa_xfer_id(xfer), seg->index, urb->status);
-		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)){
-			dev_err(dev, "iso xfer: URB max acceptable errors exceeded, resetting device\n");
-			wa_reset_all(wa);
-		}
-		if (seg->status != WA_SEG_ERROR) {
-			usb_unlink_urb(seg->dto_urb);
-			seg->result = urb->status;
-			__wa_xfer_abort(xfer);
-			rpipe_ready = rpipe_avail_inc(rpipe);
-			done = __wa_xfer_mark_seg_as_done(xfer, seg,
-					WA_SEG_ERROR);
-		}
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (done)
-			wa_xfer_completion(xfer);
-		if (rpipe_ready)
-			wa_xfer_delayed_run(rpipe);
-	}
-	/* taken when this URB was submitted. */
-	wa_xfer_put(xfer);
-}
-
-/*
- * Callback for the segment request
- *
- * If successful transition state (unless already transitioned or
- * outbound transfer); otherwise, take a note of the error, mark this
- * segment done and try completion.
- *
- * Note we don't access until we are sure that the transfer hasn't
- * been cancelled (ECONNRESET, ENOENT), which could mean that
- * seg->xfer could be already gone.
- *
- * We have to check before setting the status to WA_SEG_PENDING
- * because sometimes the xfer result callback arrives before this
- * callback (geeeeeeze), so it might happen that we are already in
- * another state. As well, we don't set it if the transfer is not inbound,
- * as in that case, wa_seg_dto_cb will do it when the OUT data phase
- * finishes.
- */
-static void wa_seg_tr_cb(struct urb *urb)
-{
-	struct wa_seg *seg = urb->context;
-	struct wa_xfer *xfer = seg->xfer;
-	struct wahc *wa;
-	struct device *dev;
-	struct wa_rpipe *rpipe;
-	unsigned long flags;
-	unsigned rpipe_ready;
-	u8 done = 0;
-
-	switch (urb->status) {
-	case 0:
-		spin_lock_irqsave(&xfer->lock, flags);
-		wa = xfer->wa;
-		dev = &wa->usb_iface->dev;
-		dev_dbg(dev, "xfer %p ID 0x%08X#%u: request done\n",
-			xfer, wa_xfer_id(xfer), seg->index);
-		if (xfer->is_inbound &&
-			seg->status < WA_SEG_PENDING &&
-			!(usb_pipeisoc(xfer->urb->pipe)))
-			seg->status = WA_SEG_PENDING;
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		break;
-	case -ECONNRESET:	/* URB unlinked; no need to do anything */
-	case -ENOENT:		/* as it was done by the who unlinked us */
-		break;
-	default:		/* Other errors ... */
-		spin_lock_irqsave(&xfer->lock, flags);
-		wa = xfer->wa;
-		dev = &wa->usb_iface->dev;
-		rpipe = xfer->ep->hcpriv;
-		if (printk_ratelimit())
-			dev_err(dev, "xfer %p ID 0x%08X#%u: request error %d\n",
-				xfer, wa_xfer_id(xfer), seg->index,
-				urb->status);
-		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)){
-			dev_err(dev, "DTO: URB max acceptable errors "
-				"exceeded, resetting device\n");
-			wa_reset_all(wa);
-		}
-		usb_unlink_urb(seg->isoc_pack_desc_urb);
-		usb_unlink_urb(seg->dto_urb);
-		seg->result = urb->status;
-		__wa_xfer_abort(xfer);
-		rpipe_ready = rpipe_avail_inc(rpipe);
-		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_ERROR);
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (done)
-			wa_xfer_completion(xfer);
-		if (rpipe_ready)
-			wa_xfer_delayed_run(rpipe);
-	}
-	/* taken when this URB was submitted. */
-	wa_xfer_put(xfer);
-}
-
-/*
- * Allocate an SG list to store bytes_to_transfer bytes and copy the
- * subset of the in_sg that matches the buffer subset
- * we are about to transfer.
- */
-static struct scatterlist *wa_xfer_create_subset_sg(struct scatterlist *in_sg,
-	const unsigned int bytes_transferred,
-	const unsigned int bytes_to_transfer, int *out_num_sgs)
-{
-	struct scatterlist *out_sg;
-	unsigned int bytes_processed = 0, offset_into_current_page_data = 0,
-		nents;
-	struct scatterlist *current_xfer_sg = in_sg;
-	struct scatterlist *current_seg_sg, *last_seg_sg;
-
-	/* skip previously transferred pages. */
-	while ((current_xfer_sg) &&
-			(bytes_processed < bytes_transferred)) {
-		bytes_processed += current_xfer_sg->length;
-
-		/* advance the sg if current segment starts on or past the
-			next page. */
-		if (bytes_processed <= bytes_transferred)
-			current_xfer_sg = sg_next(current_xfer_sg);
-	}
-
-	/* the data for the current segment starts in current_xfer_sg.
-		calculate the offset. */
-	if (bytes_processed > bytes_transferred) {
-		offset_into_current_page_data = current_xfer_sg->length -
-			(bytes_processed - bytes_transferred);
-	}
-
-	/* calculate the number of pages needed by this segment. */
-	nents = DIV_ROUND_UP((bytes_to_transfer +
-		offset_into_current_page_data +
-		current_xfer_sg->offset),
-		PAGE_SIZE);
-
-	out_sg = kmalloc((sizeof(struct scatterlist) * nents), GFP_ATOMIC);
-	if (out_sg) {
-		sg_init_table(out_sg, nents);
-
-		/* copy the portion of the incoming SG that correlates to the
-		 * data to be transferred by this segment to the segment SG. */
-		last_seg_sg = current_seg_sg = out_sg;
-		bytes_processed = 0;
-
-		/* reset nents and calculate the actual number of sg entries
-			needed. */
-		nents = 0;
-		while ((bytes_processed < bytes_to_transfer) &&
-				current_seg_sg && current_xfer_sg) {
-			unsigned int page_len = min((current_xfer_sg->length -
-				offset_into_current_page_data),
-				(bytes_to_transfer - bytes_processed));
-
-			sg_set_page(current_seg_sg, sg_page(current_xfer_sg),
-				page_len,
-				current_xfer_sg->offset +
-				offset_into_current_page_data);
-
-			bytes_processed += page_len;
-
-			last_seg_sg = current_seg_sg;
-			current_seg_sg = sg_next(current_seg_sg);
-			current_xfer_sg = sg_next(current_xfer_sg);
-
-			/* only the first page may require additional offset. */
-			offset_into_current_page_data = 0;
-			nents++;
-		}
-
-		/* update num_sgs and terminate the list since we may have
-		 *  concatenated pages. */
-		sg_mark_end(last_seg_sg);
-		*out_num_sgs = nents;
-	}
-
-	return out_sg;
-}
-
-/*
- * Populate DMA buffer info for the isoc dto urb.
- */
-static void __wa_populate_dto_urb_isoc(struct wa_xfer *xfer,
-	struct wa_seg *seg, int curr_iso_frame)
-{
-	seg->dto_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-	seg->dto_urb->sg = NULL;
-	seg->dto_urb->num_sgs = 0;
-	/* dto urb buffer address pulled from iso_frame_desc. */
-	seg->dto_urb->transfer_dma = xfer->urb->transfer_dma +
-		xfer->urb->iso_frame_desc[curr_iso_frame].offset;
-	/* The Alereon HWA sends a single URB with all isoc segs. */
-	if (xfer->wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC)
-		seg->dto_urb->transfer_buffer_length = seg->isoc_size;
-	else
-		seg->dto_urb->transfer_buffer_length =
-			xfer->urb->iso_frame_desc[curr_iso_frame].length;
-}
-
-/*
- * Populate buffer ptr and size, DMA buffer or SG list for the dto urb.
- */
-static int __wa_populate_dto_urb(struct wa_xfer *xfer,
-	struct wa_seg *seg, size_t buf_itr_offset, size_t buf_itr_size)
-{
-	int result = 0;
-
-	if (xfer->is_dma) {
-		seg->dto_urb->transfer_dma =
-			xfer->urb->transfer_dma + buf_itr_offset;
-		seg->dto_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-		seg->dto_urb->sg = NULL;
-		seg->dto_urb->num_sgs = 0;
-	} else {
-		/* do buffer or SG processing. */
-		seg->dto_urb->transfer_flags &=
-			~URB_NO_TRANSFER_DMA_MAP;
-		/* this should always be 0 before a resubmit. */
-		seg->dto_urb->num_mapped_sgs = 0;
-
-		if (xfer->urb->transfer_buffer) {
-			seg->dto_urb->transfer_buffer =
-				xfer->urb->transfer_buffer +
-				buf_itr_offset;
-			seg->dto_urb->sg = NULL;
-			seg->dto_urb->num_sgs = 0;
-		} else {
-			seg->dto_urb->transfer_buffer = NULL;
-
-			/*
-			 * allocate an SG list to store seg_size bytes
-			 * and copy the subset of the xfer->urb->sg that
-			 * matches the buffer subset we are about to
-			 * read.
-			 */
-			seg->dto_urb->sg = wa_xfer_create_subset_sg(
-				xfer->urb->sg,
-				buf_itr_offset, buf_itr_size,
-				&(seg->dto_urb->num_sgs));
-			if (!(seg->dto_urb->sg))
-				result = -ENOMEM;
-		}
-	}
-	seg->dto_urb->transfer_buffer_length = buf_itr_size;
-
-	return result;
-}
-
-/*
- * Allocate the segs array and initialize each of them
- *
- * The segments are freed by wa_xfer_destroy() when the xfer use count
- * drops to zero; however, because each segment is given the same life
- * cycle as the USB URB it contains, it is actually freed by
- * usb_put_urb() on the contained USB URB (twisted, eh?).
- */
-static int __wa_xfer_setup_segs(struct wa_xfer *xfer, size_t xfer_hdr_size)
-{
-	int result, cnt, isoc_frame_offset = 0;
-	size_t alloc_size = sizeof(*xfer->seg[0])
-		- sizeof(xfer->seg[0]->xfer_hdr) + xfer_hdr_size;
-	struct usb_device *usb_dev = xfer->wa->usb_dev;
-	const struct usb_endpoint_descriptor *dto_epd = xfer->wa->dto_epd;
-	struct wa_seg *seg;
-	size_t buf_itr, buf_size, buf_itr_size;
-
-	result = -ENOMEM;
-	xfer->seg = kcalloc(xfer->segs, sizeof(xfer->seg[0]), GFP_ATOMIC);
-	if (xfer->seg == NULL)
-		goto error_segs_kzalloc;
-	buf_itr = 0;
-	buf_size = xfer->urb->transfer_buffer_length;
-	for (cnt = 0; cnt < xfer->segs; cnt++) {
-		size_t iso_pkt_descr_size = 0;
-		int seg_isoc_frame_count = 0, seg_isoc_size = 0;
-
-		/*
-		 * Adjust the size of the segment object to contain space for
-		 * the isoc packet descriptor buffer.
-		 */
-		if (usb_pipeisoc(xfer->urb->pipe)) {
-			seg_isoc_frame_count =
-				__wa_seg_calculate_isoc_frame_count(xfer,
-					isoc_frame_offset, &seg_isoc_size);
-
-			iso_pkt_descr_size =
-				sizeof(struct wa_xfer_packet_info_hwaiso) +
-				(seg_isoc_frame_count * sizeof(__le16));
-		}
-		result = -ENOMEM;
-		seg = xfer->seg[cnt] = kmalloc(alloc_size + iso_pkt_descr_size,
-						GFP_ATOMIC);
-		if (seg == NULL)
-			goto error_seg_kmalloc;
-		wa_seg_init(seg);
-		seg->xfer = xfer;
-		seg->index = cnt;
-		usb_fill_bulk_urb(&seg->tr_urb, usb_dev,
-				  usb_sndbulkpipe(usb_dev,
-						  dto_epd->bEndpointAddress),
-				  &seg->xfer_hdr, xfer_hdr_size,
-				  wa_seg_tr_cb, seg);
-		buf_itr_size = min(buf_size, xfer->seg_size);
-
-		if (usb_pipeisoc(xfer->urb->pipe)) {
-			seg->isoc_frame_count = seg_isoc_frame_count;
-			seg->isoc_frame_offset = isoc_frame_offset;
-			seg->isoc_size = seg_isoc_size;
-			/* iso packet descriptor. */
-			seg->isoc_pack_desc_urb =
-					usb_alloc_urb(0, GFP_ATOMIC);
-			if (seg->isoc_pack_desc_urb == NULL)
-				goto error_iso_pack_desc_alloc;
-			/*
-			 * The buffer for the isoc packet descriptor starts
-			 * after the transfer request header in the
-			 * segment object memory buffer.
-			 */
-			usb_fill_bulk_urb(
-				seg->isoc_pack_desc_urb, usb_dev,
-				usb_sndbulkpipe(usb_dev,
-					dto_epd->bEndpointAddress),
-				(void *)(&seg->xfer_hdr) +
-					xfer_hdr_size,
-				iso_pkt_descr_size,
-				wa_seg_iso_pack_desc_cb, seg);
-
-			/* adjust starting frame offset for next seg. */
-			isoc_frame_offset += seg_isoc_frame_count;
-		}
-
-		if (xfer->is_inbound == 0 && buf_size > 0) {
-			/* outbound data. */
-			seg->dto_urb = usb_alloc_urb(0, GFP_ATOMIC);
-			if (seg->dto_urb == NULL)
-				goto error_dto_alloc;
-			usb_fill_bulk_urb(
-				seg->dto_urb, usb_dev,
-				usb_sndbulkpipe(usb_dev,
-						dto_epd->bEndpointAddress),
-				NULL, 0, wa_seg_dto_cb, seg);
-
-			if (usb_pipeisoc(xfer->urb->pipe)) {
-				/*
-				 * Fill in the xfer buffer information for the
-				 * first isoc frame.  Subsequent frames in this
-				 * segment will be filled in and sent from the
-				 * DTO completion routine, if needed.
-				 */
-				__wa_populate_dto_urb_isoc(xfer, seg,
-					seg->isoc_frame_offset);
-			} else {
-				/* fill in the xfer buffer information. */
-				result = __wa_populate_dto_urb(xfer, seg,
-							buf_itr, buf_itr_size);
-				if (result < 0)
-					goto error_seg_outbound_populate;
-
-				buf_itr += buf_itr_size;
-				buf_size -= buf_itr_size;
-			}
-		}
-		seg->status = WA_SEG_READY;
-	}
-	return 0;
-
-	/*
-	 * Free the memory for the current segment which failed to init.
-	 * Use the fact that cnt is left at were it failed.  The remaining
-	 * segments will be cleaned up by wa_xfer_destroy.
-	 */
-error_seg_outbound_populate:
-	usb_free_urb(xfer->seg[cnt]->dto_urb);
-error_dto_alloc:
-	usb_free_urb(xfer->seg[cnt]->isoc_pack_desc_urb);
-error_iso_pack_desc_alloc:
-	kfree(xfer->seg[cnt]);
-	xfer->seg[cnt] = NULL;
-error_seg_kmalloc:
-error_segs_kzalloc:
-	return result;
-}
-
-/*
- * Allocates all the stuff needed to submit a transfer
- *
- * Breaks the whole data buffer in a list of segments, each one has a
- * structure allocated to it and linked in xfer->seg[index]
- *
- * FIXME: merge setup_segs() and the last part of this function, no
- *        need to do two for loops when we could run everything in a
- *        single one
- */
-static int __wa_xfer_setup(struct wa_xfer *xfer, struct urb *urb)
-{
-	int result;
-	struct device *dev = &xfer->wa->usb_iface->dev;
-	enum wa_xfer_type xfer_type = 0; /* shut up GCC */
-	size_t xfer_hdr_size, cnt, transfer_size;
-	struct wa_xfer_hdr *xfer_hdr0, *xfer_hdr;
-
-	result = __wa_xfer_setup_sizes(xfer, &xfer_type);
-	if (result < 0)
-		goto error_setup_sizes;
-	xfer_hdr_size = result;
-	result = __wa_xfer_setup_segs(xfer, xfer_hdr_size);
-	if (result < 0) {
-		dev_err(dev, "xfer %p: Failed to allocate %d segments: %d\n",
-			xfer, xfer->segs, result);
-		goto error_setup_segs;
-	}
-	/* Fill the first header */
-	xfer_hdr0 = &xfer->seg[0]->xfer_hdr;
-	wa_xfer_id_init(xfer);
-	__wa_xfer_setup_hdr0(xfer, xfer_hdr0, xfer_type, xfer_hdr_size);
-
-	/* Fill remaining headers */
-	xfer_hdr = xfer_hdr0;
-	if (xfer_type == WA_XFER_TYPE_ISO) {
-		xfer_hdr0->dwTransferLength =
-			cpu_to_le32(xfer->seg[0]->isoc_size);
-		for (cnt = 1; cnt < xfer->segs; cnt++) {
-			struct wa_xfer_packet_info_hwaiso *packet_desc;
-			struct wa_seg *seg = xfer->seg[cnt];
-			struct wa_xfer_hwaiso *xfer_iso;
-
-			xfer_hdr = &seg->xfer_hdr;
-			xfer_iso = container_of(xfer_hdr,
-						struct wa_xfer_hwaiso, hdr);
-			packet_desc = ((void *)xfer_hdr) + xfer_hdr_size;
-			/*
-			 * Copy values from the 0th header. Segment specific
-			 * values are set below.
-			 */
-			memcpy(xfer_hdr, xfer_hdr0, xfer_hdr_size);
-			xfer_hdr->bTransferSegment = cnt;
-			xfer_hdr->dwTransferLength =
-				cpu_to_le32(seg->isoc_size);
-			xfer_iso->dwNumOfPackets =
-					cpu_to_le32(seg->isoc_frame_count);
-			__wa_setup_isoc_packet_descr(packet_desc, xfer, seg);
-			seg->status = WA_SEG_READY;
-		}
-	} else {
-		transfer_size = urb->transfer_buffer_length;
-		xfer_hdr0->dwTransferLength = transfer_size > xfer->seg_size ?
-			cpu_to_le32(xfer->seg_size) :
-			cpu_to_le32(transfer_size);
-		transfer_size -=  xfer->seg_size;
-		for (cnt = 1; cnt < xfer->segs; cnt++) {
-			xfer_hdr = &xfer->seg[cnt]->xfer_hdr;
-			memcpy(xfer_hdr, xfer_hdr0, xfer_hdr_size);
-			xfer_hdr->bTransferSegment = cnt;
-			xfer_hdr->dwTransferLength =
-				transfer_size > xfer->seg_size ?
-					cpu_to_le32(xfer->seg_size)
-					: cpu_to_le32(transfer_size);
-			xfer->seg[cnt]->status = WA_SEG_READY;
-			transfer_size -=  xfer->seg_size;
-		}
-	}
-	xfer_hdr->bTransferSegment |= 0x80;	/* this is the last segment */
-	result = 0;
-error_setup_segs:
-error_setup_sizes:
-	return result;
-}
-
-/*
- *
- *
- * rpipe->seg_lock is held!
- */
-static int __wa_seg_submit(struct wa_rpipe *rpipe, struct wa_xfer *xfer,
-			   struct wa_seg *seg, int *dto_done)
-{
-	int result;
-
-	/* default to done unless we encounter a multi-frame isoc segment. */
-	*dto_done = 1;
-
-	/*
-	 * Take a ref for each segment urb so the xfer cannot disappear until
-	 * all of the callbacks run.
-	 */
-	wa_xfer_get(xfer);
-	/* submit the transfer request. */
-	seg->status = WA_SEG_SUBMITTED;
-	result = usb_submit_urb(&seg->tr_urb, GFP_ATOMIC);
-	if (result < 0) {
-		pr_err("%s: xfer %p#%u: REQ submit failed: %d\n",
-		       __func__, xfer, seg->index, result);
-		wa_xfer_put(xfer);
-		goto error_tr_submit;
-	}
-	/* submit the isoc packet descriptor if present. */
-	if (seg->isoc_pack_desc_urb) {
-		wa_xfer_get(xfer);
-		result = usb_submit_urb(seg->isoc_pack_desc_urb, GFP_ATOMIC);
-		seg->isoc_frame_index = 0;
-		if (result < 0) {
-			pr_err("%s: xfer %p#%u: ISO packet descriptor submit failed: %d\n",
-			       __func__, xfer, seg->index, result);
-			wa_xfer_put(xfer);
-			goto error_iso_pack_desc_submit;
-		}
-	}
-	/* submit the out data if this is an out request. */
-	if (seg->dto_urb) {
-		struct wahc *wa = xfer->wa;
-		wa_xfer_get(xfer);
-		result = usb_submit_urb(seg->dto_urb, GFP_ATOMIC);
-		if (result < 0) {
-			pr_err("%s: xfer %p#%u: DTO submit failed: %d\n",
-			       __func__, xfer, seg->index, result);
-			wa_xfer_put(xfer);
-			goto error_dto_submit;
-		}
-		/*
-		 * If this segment contains more than one isoc frame, hold
-		 * onto the dto resource until we send all frames.
-		 * Only applies to non-Alereon devices.
-		 */
-		if (((wa->quirks & WUSB_QUIRK_ALEREON_HWA_CONCAT_ISOC) == 0)
-			&& (seg->isoc_frame_count > 1))
-			*dto_done = 0;
-	}
-	rpipe_avail_dec(rpipe);
-	return 0;
-
-error_dto_submit:
-	usb_unlink_urb(seg->isoc_pack_desc_urb);
-error_iso_pack_desc_submit:
-	usb_unlink_urb(&seg->tr_urb);
-error_tr_submit:
-	seg->status = WA_SEG_ERROR;
-	seg->result = result;
-	*dto_done = 1;
-	return result;
-}
-
-/*
- * Execute more queued request segments until the maximum concurrent allowed.
- * Return true if the DTO resource was acquired and released.
- *
- * The ugly unlock/lock sequence on the error path is needed as the
- * xfer->lock normally nests the seg_lock and not viceversa.
- */
-static int __wa_xfer_delayed_run(struct wa_rpipe *rpipe, int *dto_waiting)
-{
-	int result, dto_acquired = 0, dto_done = 0;
-	struct device *dev = &rpipe->wa->usb_iface->dev;
-	struct wa_seg *seg;
-	struct wa_xfer *xfer;
-	unsigned long flags;
-
-	*dto_waiting = 0;
-
-	spin_lock_irqsave(&rpipe->seg_lock, flags);
-	while (atomic_read(&rpipe->segs_available) > 0
-	      && !list_empty(&rpipe->seg_list)
-	      && (dto_acquired = __wa_dto_try_get(rpipe->wa))) {
-		seg = list_first_entry(&(rpipe->seg_list), struct wa_seg,
-				 list_node);
-		list_del(&seg->list_node);
-		xfer = seg->xfer;
-		/*
-		 * Get a reference to the xfer in case the callbacks for the
-		 * URBs submitted by __wa_seg_submit attempt to complete
-		 * the xfer before this function completes.
-		 */
-		wa_xfer_get(xfer);
-		result = __wa_seg_submit(rpipe, xfer, seg, &dto_done);
-		/* release the dto resource if this RPIPE is done with it. */
-		if (dto_done)
-			__wa_dto_put(rpipe->wa);
-		dev_dbg(dev, "xfer %p ID %08X#%u submitted from delayed [%d segments available] %d\n",
-			xfer, wa_xfer_id(xfer), seg->index,
-			atomic_read(&rpipe->segs_available), result);
-		if (unlikely(result < 0)) {
-			int done;
-
-			spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-			spin_lock_irqsave(&xfer->lock, flags);
-			__wa_xfer_abort(xfer);
-			/*
-			 * This seg was marked as submitted when it was put on
-			 * the RPIPE seg_list.  Mark it done.
-			 */
-			xfer->segs_done++;
-			done = __wa_xfer_is_done(xfer);
-			spin_unlock_irqrestore(&xfer->lock, flags);
-			if (done)
-				wa_xfer_completion(xfer);
-			spin_lock_irqsave(&rpipe->seg_lock, flags);
-		}
-		wa_xfer_put(xfer);
-	}
-	/*
-	 * Mark this RPIPE as waiting if dto was not acquired, there are
-	 * delayed segs and no active transfers to wake us up later.
-	 */
-	if (!dto_acquired && !list_empty(&rpipe->seg_list)
-		&& (atomic_read(&rpipe->segs_available) ==
-			le16_to_cpu(rpipe->descr.wRequests)))
-		*dto_waiting = 1;
-
-	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-
-	return dto_done;
-}
-
-static void wa_xfer_delayed_run(struct wa_rpipe *rpipe)
-{
-	int dto_waiting;
-	int dto_done = __wa_xfer_delayed_run(rpipe, &dto_waiting);
-
-	/*
-	 * If this RPIPE is waiting on the DTO resource, add it to the tail of
-	 * the waiting list.
-	 * Otherwise, if the WA DTO resource was acquired and released by
-	 *  __wa_xfer_delayed_run, another RPIPE may have attempted to acquire
-	 * DTO and failed during that time.  Check the delayed list and process
-	 * any waiters.  Start searching from the next RPIPE index.
-	 */
-	if (dto_waiting)
-		wa_add_delayed_rpipe(rpipe->wa, rpipe);
-	else if (dto_done)
-		wa_check_for_delayed_rpipes(rpipe->wa);
-}
-
-/*
- *
- * xfer->lock is taken
- *
- * On failure submitting we just stop submitting and return error;
- * wa_urb_enqueue_b() will execute the completion path
- */
-static int __wa_xfer_submit(struct wa_xfer *xfer)
-{
-	int result, dto_acquired = 0, dto_done = 0, dto_waiting = 0;
-	struct wahc *wa = xfer->wa;
-	struct device *dev = &wa->usb_iface->dev;
-	unsigned cnt;
-	struct wa_seg *seg;
-	unsigned long flags;
-	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-	size_t maxrequests = le16_to_cpu(rpipe->descr.wRequests);
-	u8 available;
-	u8 empty;
-
-	spin_lock_irqsave(&wa->xfer_list_lock, flags);
-	list_add_tail(&xfer->list_node, &wa->xfer_list);
-	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
-
-	BUG_ON(atomic_read(&rpipe->segs_available) > maxrequests);
-	result = 0;
-	spin_lock_irqsave(&rpipe->seg_lock, flags);
-	for (cnt = 0; cnt < xfer->segs; cnt++) {
-		int delay_seg = 1;
-
-		available = atomic_read(&rpipe->segs_available);
-		empty = list_empty(&rpipe->seg_list);
-		seg = xfer->seg[cnt];
-		if (available && empty) {
-			/*
-			 * Only attempt to acquire DTO if we have a segment
-			 * to send.
-			 */
-			dto_acquired = __wa_dto_try_get(rpipe->wa);
-			if (dto_acquired) {
-				delay_seg = 0;
-				result = __wa_seg_submit(rpipe, xfer, seg,
-							&dto_done);
-				dev_dbg(dev, "xfer %p ID 0x%08X#%u: available %u empty %u submitted\n",
-					xfer, wa_xfer_id(xfer), cnt, available,
-					empty);
-				if (dto_done)
-					__wa_dto_put(rpipe->wa);
-
-				if (result < 0) {
-					__wa_xfer_abort(xfer);
-					goto error_seg_submit;
-				}
-			}
-		}
-
-		if (delay_seg) {
-			dev_dbg(dev, "xfer %p ID 0x%08X#%u: available %u empty %u delayed\n",
-				xfer, wa_xfer_id(xfer), cnt, available,  empty);
-			seg->status = WA_SEG_DELAYED;
-			list_add_tail(&seg->list_node, &rpipe->seg_list);
-		}
-		xfer->segs_submitted++;
-	}
-error_seg_submit:
-	/*
-	 * Mark this RPIPE as waiting if dto was not acquired, there are
-	 * delayed segs and no active transfers to wake us up later.
-	 */
-	if (!dto_acquired && !list_empty(&rpipe->seg_list)
-		&& (atomic_read(&rpipe->segs_available) ==
-			le16_to_cpu(rpipe->descr.wRequests)))
-		dto_waiting = 1;
-	spin_unlock_irqrestore(&rpipe->seg_lock, flags);
-
-	if (dto_waiting)
-		wa_add_delayed_rpipe(rpipe->wa, rpipe);
-	else if (dto_done)
-		wa_check_for_delayed_rpipes(rpipe->wa);
-
-	return result;
-}
-
-/*
- * Second part of a URB/transfer enqueuement
- *
- * Assumes this comes from wa_urb_enqueue() [maybe through
- * wa_urb_enqueue_run()]. At this point:
- *
- * xfer->wa	filled and refcounted
- * xfer->ep	filled with rpipe refcounted if
- *              delayed == 0
- * xfer->urb 	filled and refcounted (this is the case when called
- *              from wa_urb_enqueue() as we come from usb_submit_urb()
- *              and when called by wa_urb_enqueue_run(), as we took an
- *              extra ref dropped by _run() after we return).
- * xfer->gfp	filled
- *
- * If we fail at __wa_xfer_submit(), then we just check if we are done
- * and if so, we run the completion procedure. However, if we are not
- * yet done, we do nothing and wait for the completion handlers from
- * the submitted URBs or from the xfer-result path to kick in. If xfer
- * result never kicks in, the xfer will timeout from the USB code and
- * dequeue() will be called.
- */
-static int wa_urb_enqueue_b(struct wa_xfer *xfer)
-{
-	int result;
-	unsigned long flags;
-	struct urb *urb = xfer->urb;
-	struct wahc *wa = xfer->wa;
-	struct wusbhc *wusbhc = wa->wusb;
-	struct wusb_dev *wusb_dev;
-	unsigned done;
-
-	result = rpipe_get_by_ep(wa, xfer->ep, urb, xfer->gfp);
-	if (result < 0) {
-		pr_err("%s: error_rpipe_get\n", __func__);
-		goto error_rpipe_get;
-	}
-	result = -ENODEV;
-	/* FIXME: segmentation broken -- kills DWA */
-	mutex_lock(&wusbhc->mutex);		/* get a WUSB dev */
-	if (urb->dev == NULL) {
-		mutex_unlock(&wusbhc->mutex);
-		pr_err("%s: error usb dev gone\n", __func__);
-		goto error_dev_gone;
-	}
-	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, urb->dev);
-	if (wusb_dev == NULL) {
-		mutex_unlock(&wusbhc->mutex);
-		dev_err(&(urb->dev->dev), "%s: error wusb dev gone\n",
-			__func__);
-		goto error_dev_gone;
-	}
-	mutex_unlock(&wusbhc->mutex);
-
-	spin_lock_irqsave(&xfer->lock, flags);
-	xfer->wusb_dev = wusb_dev;
-	result = urb->status;
-	if (urb->status != -EINPROGRESS) {
-		dev_err(&(urb->dev->dev), "%s: error_dequeued\n", __func__);
-		goto error_dequeued;
-	}
-
-	result = __wa_xfer_setup(xfer, urb);
-	if (result < 0) {
-		dev_err(&(urb->dev->dev), "%s: error_xfer_setup\n", __func__);
-		goto error_xfer_setup;
-	}
-	/*
-	 * Get a xfer reference since __wa_xfer_submit starts asynchronous
-	 * operations that may try to complete the xfer before this function
-	 * exits.
-	 */
-	wa_xfer_get(xfer);
-	result = __wa_xfer_submit(xfer);
-	if (result < 0) {
-		dev_err(&(urb->dev->dev), "%s: error_xfer_submit\n", __func__);
-		goto error_xfer_submit;
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	wa_xfer_put(xfer);
-	return 0;
-
-	/*
-	 * this is basically wa_xfer_completion() broken up wa_xfer_giveback()
-	 * does a wa_xfer_put() that will call wa_xfer_destroy() and undo
-	 * setup().
-	 */
-error_xfer_setup:
-error_dequeued:
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	/* FIXME: segmentation broken, kills DWA */
-	if (wusb_dev)
-		wusb_dev_put(wusb_dev);
-error_dev_gone:
-	rpipe_put(xfer->ep->hcpriv);
-error_rpipe_get:
-	xfer->result = result;
-	return result;
-
-error_xfer_submit:
-	done = __wa_xfer_is_done(xfer);
-	xfer->result = result;
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	if (done)
-		wa_xfer_completion(xfer);
-	wa_xfer_put(xfer);
-	/* return success since the completion routine will run. */
-	return 0;
-}
-
-/*
- * Execute the delayed transfers in the Wire Adapter @wa
- *
- * We need to be careful here, as dequeue() could be called in the
- * middle.  That's why we do the whole thing under the
- * wa->xfer_list_lock. If dequeue() jumps in, it first locks xfer->lock
- * and then checks the list -- so as we would be acquiring in inverse
- * order, we move the delayed list to a separate list while locked and then
- * submit them without the list lock held.
- */
-void wa_urb_enqueue_run(struct work_struct *ws)
-{
-	struct wahc *wa = container_of(ws, struct wahc, xfer_enqueue_work);
-	struct wa_xfer *xfer, *next;
-	struct urb *urb;
-	LIST_HEAD(tmp_list);
-
-	/* Create a copy of the wa->xfer_delayed_list while holding the lock */
-	spin_lock_irq(&wa->xfer_list_lock);
-	list_cut_position(&tmp_list, &wa->xfer_delayed_list,
-			wa->xfer_delayed_list.prev);
-	spin_unlock_irq(&wa->xfer_list_lock);
-
-	/*
-	 * enqueue from temp list without list lock held since wa_urb_enqueue_b
-	 * can take xfer->lock as well as lock mutexes.
-	 */
-	list_for_each_entry_safe(xfer, next, &tmp_list, list_node) {
-		list_del_init(&xfer->list_node);
-
-		urb = xfer->urb;
-		if (wa_urb_enqueue_b(xfer) < 0)
-			wa_xfer_giveback(xfer);
-		usb_put_urb(urb);	/* taken when queuing */
-	}
-}
-EXPORT_SYMBOL_GPL(wa_urb_enqueue_run);
-
-/*
- * Process the errored transfers on the Wire Adapter outside of interrupt.
- */
-void wa_process_errored_transfers_run(struct work_struct *ws)
-{
-	struct wahc *wa = container_of(ws, struct wahc, xfer_error_work);
-	struct wa_xfer *xfer, *next;
-	LIST_HEAD(tmp_list);
-
-	pr_info("%s: Run delayed STALL processing.\n", __func__);
-
-	/* Create a copy of the wa->xfer_errored_list while holding the lock */
-	spin_lock_irq(&wa->xfer_list_lock);
-	list_cut_position(&tmp_list, &wa->xfer_errored_list,
-			wa->xfer_errored_list.prev);
-	spin_unlock_irq(&wa->xfer_list_lock);
-
-	/*
-	 * run rpipe_clear_feature_stalled from temp list without list lock
-	 * held.
-	 */
-	list_for_each_entry_safe(xfer, next, &tmp_list, list_node) {
-		struct usb_host_endpoint *ep;
-		unsigned long flags;
-		struct wa_rpipe *rpipe;
-
-		spin_lock_irqsave(&xfer->lock, flags);
-		ep = xfer->ep;
-		rpipe = ep->hcpriv;
-		spin_unlock_irqrestore(&xfer->lock, flags);
-
-		/* clear RPIPE feature stalled without holding a lock. */
-		rpipe_clear_feature_stalled(wa, ep);
-
-		/* complete the xfer. This removes it from the tmp list. */
-		wa_xfer_completion(xfer);
-
-		/* check for work. */
-		wa_xfer_delayed_run(rpipe);
-	}
-}
-EXPORT_SYMBOL_GPL(wa_process_errored_transfers_run);
-
-/*
- * Submit a transfer to the Wire Adapter in a delayed way
- *
- * The process of enqueuing involves possible sleeps() [see
- * enqueue_b(), for the rpipe_get() and the mutex_lock()]. If we are
- * in an atomic section, we defer the enqueue_b() call--else we call direct.
- *
- * @urb: We own a reference to it done by the HCI Linux USB stack that
- *       will be given up by calling usb_hcd_giveback_urb() or by
- *       returning error from this function -> ergo we don't have to
- *       refcount it.
- */
-int wa_urb_enqueue(struct wahc *wa, struct usb_host_endpoint *ep,
-		   struct urb *urb, gfp_t gfp)
-{
-	int result;
-	struct device *dev = &wa->usb_iface->dev;
-	struct wa_xfer *xfer;
-	unsigned long my_flags;
-	unsigned cant_sleep = irqs_disabled() | in_atomic();
-
-	if ((urb->transfer_buffer == NULL)
-	    && (urb->sg == NULL)
-	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)
-	    && urb->transfer_buffer_length != 0) {
-		dev_err(dev, "BUG? urb %p: NULL xfer buffer & NODMA\n", urb);
-		dump_stack();
-	}
-
-	spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
-	result = usb_hcd_link_urb_to_ep(&(wa->wusb->usb_hcd), urb);
-	spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
-	if (result < 0)
-		goto error_link_urb;
-
-	result = -ENOMEM;
-	xfer = kzalloc(sizeof(*xfer), gfp);
-	if (xfer == NULL)
-		goto error_kmalloc;
-
-	result = -ENOENT;
-	if (urb->status != -EINPROGRESS)	/* cancelled */
-		goto error_dequeued;		/* before starting? */
-	wa_xfer_init(xfer);
-	xfer->wa = wa_get(wa);
-	xfer->urb = urb;
-	xfer->gfp = gfp;
-	xfer->ep = ep;
-	urb->hcpriv = xfer;
-
-	dev_dbg(dev, "xfer %p urb %p pipe 0x%02x [%d bytes] %s %s %s\n",
-		xfer, urb, urb->pipe, urb->transfer_buffer_length,
-		urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP ? "dma" : "nodma",
-		urb->pipe & USB_DIR_IN ? "inbound" : "outbound",
-		cant_sleep ? "deferred" : "inline");
-
-	if (cant_sleep) {
-		usb_get_urb(urb);
-		spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
-		list_add_tail(&xfer->list_node, &wa->xfer_delayed_list);
-		spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
-		queue_work(wusbd, &wa->xfer_enqueue_work);
-	} else {
-		result = wa_urb_enqueue_b(xfer);
-		if (result < 0) {
-			/*
-			 * URB submit/enqueue failed.  Clean up, return an
-			 * error and do not run the callback.  This avoids
-			 * an infinite submit/complete loop.
-			 */
-			dev_err(dev, "%s: URB enqueue failed: %d\n",
-			   __func__, result);
-			wa_put(xfer->wa);
-			wa_xfer_put(xfer);
-			spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
-			usb_hcd_unlink_urb_from_ep(&(wa->wusb->usb_hcd), urb);
-			spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
-			return result;
-		}
-	}
-	return 0;
-
-error_dequeued:
-	kfree(xfer);
-error_kmalloc:
-	spin_lock_irqsave(&wa->xfer_list_lock, my_flags);
-	usb_hcd_unlink_urb_from_ep(&(wa->wusb->usb_hcd), urb);
-	spin_unlock_irqrestore(&wa->xfer_list_lock, my_flags);
-error_link_urb:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wa_urb_enqueue);
-
-/*
- * Dequeue a URB and make sure uwb_hcd_giveback_urb() [completion
- * handler] is called.
- *
- * Until a transfer goes successfully through wa_urb_enqueue() it
- * needs to be dequeued with completion calling; when stuck in delayed
- * or before wa_xfer_setup() is called, we need to do completion.
- *
- *  not setup  If there is no hcpriv yet, that means that that enqueue
- *             still had no time to set the xfer up. Because
- *             urb->status should be other than -EINPROGRESS,
- *             enqueue() will catch that and bail out.
- *
- * If the transfer has gone through setup, we just need to clean it
- * up. If it has gone through submit(), we have to abort it [with an
- * asynch request] and then make sure we cancel each segment.
- *
- */
-int wa_urb_dequeue(struct wahc *wa, struct urb *urb, int status)
-{
-	unsigned long flags;
-	struct wa_xfer *xfer;
-	struct wa_seg *seg;
-	struct wa_rpipe *rpipe;
-	unsigned cnt, done = 0, xfer_abort_pending;
-	unsigned rpipe_ready = 0;
-	int result;
-
-	/* check if it is safe to unlink. */
-	spin_lock_irqsave(&wa->xfer_list_lock, flags);
-	result = usb_hcd_check_unlink_urb(&(wa->wusb->usb_hcd), urb, status);
-	if ((result == 0) && urb->hcpriv) {
-		/*
-		 * Get a xfer ref to prevent a race with wa_xfer_giveback
-		 * cleaning up the xfer while we are working with it.
-		 */
-		wa_xfer_get(urb->hcpriv);
-	}
-	spin_unlock_irqrestore(&wa->xfer_list_lock, flags);
-	if (result)
-		return result;
-
-	xfer = urb->hcpriv;
-	if (xfer == NULL)
-		return -ENOENT;
-	spin_lock_irqsave(&xfer->lock, flags);
-	pr_debug("%s: DEQUEUE xfer id 0x%08X\n", __func__, wa_xfer_id(xfer));
-	rpipe = xfer->ep->hcpriv;
-	if (rpipe == NULL) {
-		pr_debug("%s: xfer %p id 0x%08X has no RPIPE.  %s",
-			__func__, xfer, wa_xfer_id(xfer),
-			"Probably already aborted.\n" );
-		result = -ENOENT;
-		goto out_unlock;
-	}
-	/*
-	 * Check for done to avoid racing with wa_xfer_giveback and completing
-	 * twice.
-	 */
-	if (__wa_xfer_is_done(xfer)) {
-		pr_debug("%s: xfer %p id 0x%08X already done.\n", __func__,
-			xfer, wa_xfer_id(xfer));
-		result = -ENOENT;
-		goto out_unlock;
-	}
-	/* Check the delayed list -> if there, release and complete */
-	spin_lock(&wa->xfer_list_lock);
-	if (!list_empty(&xfer->list_node) && xfer->seg == NULL)
-		goto dequeue_delayed;
-	spin_unlock(&wa->xfer_list_lock);
-	if (xfer->seg == NULL)  	/* still hasn't reached */
-		goto out_unlock;	/* setup(), enqueue_b() completes */
-	/* Ok, the xfer is in flight already, it's been setup and submitted.*/
-	xfer_abort_pending = __wa_xfer_abort(xfer) >= 0;
-	/*
-	 * grab the rpipe->seg_lock here to prevent racing with
-	 * __wa_xfer_delayed_run.
-	 */
-	spin_lock(&rpipe->seg_lock);
-	for (cnt = 0; cnt < xfer->segs; cnt++) {
-		seg = xfer->seg[cnt];
-		pr_debug("%s: xfer id 0x%08X#%d status = %d\n",
-			__func__, wa_xfer_id(xfer), cnt, seg->status);
-		switch (seg->status) {
-		case WA_SEG_NOTREADY:
-		case WA_SEG_READY:
-			printk(KERN_ERR "xfer %p#%u: dequeue bad state %u\n",
-			       xfer, cnt, seg->status);
-			WARN_ON(1);
-			break;
-		case WA_SEG_DELAYED:
-			/*
-			 * delete from rpipe delayed list.  If no segments on
-			 * this xfer have been submitted, __wa_xfer_is_done will
-			 * trigger a giveback below.  Otherwise, the submitted
-			 * segments will be completed in the DTI interrupt.
-			 */
-			seg->status = WA_SEG_ABORTED;
-			seg->result = -ENOENT;
-			list_del(&seg->list_node);
-			xfer->segs_done++;
-			break;
-		case WA_SEG_DONE:
-		case WA_SEG_ERROR:
-		case WA_SEG_ABORTED:
-			break;
-			/*
-			 * The buf_in data for a segment in the
-			 * WA_SEG_DTI_PENDING state is actively being read.
-			 * Let wa_buf_in_cb handle it since it will be called
-			 * and will increment xfer->segs_done.  Cleaning up
-			 * here could cause wa_buf_in_cb to access the xfer
-			 * after it has been completed/freed.
-			 */
-		case WA_SEG_DTI_PENDING:
-			break;
-			/*
-			 * In the states below, the HWA device already knows
-			 * about the transfer.  If an abort request was sent,
-			 * allow the HWA to process it and wait for the
-			 * results.  Otherwise, the DTI state and seg completed
-			 * counts can get out of sync.
-			 */
-		case WA_SEG_SUBMITTED:
-		case WA_SEG_PENDING:
-			/*
-			 * Check if the abort was successfully sent.  This could
-			 * be false if the HWA has been removed but we haven't
-			 * gotten the disconnect notification yet.
-			 */
-			if (!xfer_abort_pending) {
-				seg->status = WA_SEG_ABORTED;
-				rpipe_ready = rpipe_avail_inc(rpipe);
-				xfer->segs_done++;
-			}
-			break;
-		}
-	}
-	spin_unlock(&rpipe->seg_lock);
-	xfer->result = urb->status;	/* -ENOENT or -ECONNRESET */
-	done = __wa_xfer_is_done(xfer);
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	if (done)
-		wa_xfer_completion(xfer);
-	if (rpipe_ready)
-		wa_xfer_delayed_run(rpipe);
-	wa_xfer_put(xfer);
-	return result;
-
-out_unlock:
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	wa_xfer_put(xfer);
-	return result;
-
-dequeue_delayed:
-	list_del_init(&xfer->list_node);
-	spin_unlock(&wa->xfer_list_lock);
-	xfer->result = urb->status;
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	wa_xfer_giveback(xfer);
-	wa_xfer_put(xfer);
-	usb_put_urb(urb);		/* we got a ref in enqueue() */
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wa_urb_dequeue);
-
-/*
- * Translation from WA status codes (WUSB1.0 Table 8.15) to errno
- * codes
- *
- * Positive errno values are internal inconsistencies and should be
- * flagged louder. Negative are to be passed up to the user in the
- * normal way.
- *
- * @status: USB WA status code -- high two bits are stripped.
- */
-static int wa_xfer_status_to_errno(u8 status)
-{
-	int errno;
-	u8 real_status = status;
-	static int xlat[] = {
-		[WA_XFER_STATUS_SUCCESS] = 		0,
-		[WA_XFER_STATUS_HALTED] = 		-EPIPE,
-		[WA_XFER_STATUS_DATA_BUFFER_ERROR] = 	-ENOBUFS,
-		[WA_XFER_STATUS_BABBLE] = 		-EOVERFLOW,
-		[WA_XFER_RESERVED] = 			EINVAL,
-		[WA_XFER_STATUS_NOT_FOUND] =		0,
-		[WA_XFER_STATUS_INSUFFICIENT_RESOURCE] = -ENOMEM,
-		[WA_XFER_STATUS_TRANSACTION_ERROR] = 	-EILSEQ,
-		[WA_XFER_STATUS_ABORTED] =		-ENOENT,
-		[WA_XFER_STATUS_RPIPE_NOT_READY] = 	EINVAL,
-		[WA_XFER_INVALID_FORMAT] = 		EINVAL,
-		[WA_XFER_UNEXPECTED_SEGMENT_NUMBER] = 	EINVAL,
-		[WA_XFER_STATUS_RPIPE_TYPE_MISMATCH] = 	EINVAL,
-	};
-	status &= 0x3f;
-
-	if (status == 0)
-		return 0;
-	if (status >= ARRAY_SIZE(xlat)) {
-		printk_ratelimited(KERN_ERR "%s(): BUG? "
-			       "Unknown WA transfer status 0x%02x\n",
-			       __func__, real_status);
-		return -EINVAL;
-	}
-	errno = xlat[status];
-	if (unlikely(errno > 0)) {
-		printk_ratelimited(KERN_ERR "%s(): BUG? "
-			       "Inconsistent WA status: 0x%02x\n",
-			       __func__, real_status);
-		errno = -errno;
-	}
-	return errno;
-}
-
-/*
- * If a last segment flag and/or a transfer result error is encountered,
- * no other segment transfer results will be returned from the device.
- * Mark the remaining submitted or pending xfers as completed so that
- * the xfer will complete cleanly.
- *
- * xfer->lock must be held
- *
- */
-static void wa_complete_remaining_xfer_segs(struct wa_xfer *xfer,
-		int starting_index, enum wa_seg_status status)
-{
-	int index;
-	struct wa_rpipe *rpipe = xfer->ep->hcpriv;
-
-	for (index = starting_index; index < xfer->segs_submitted; index++) {
-		struct wa_seg *current_seg = xfer->seg[index];
-
-		BUG_ON(current_seg == NULL);
-
-		switch (current_seg->status) {
-		case WA_SEG_SUBMITTED:
-		case WA_SEG_PENDING:
-		case WA_SEG_DTI_PENDING:
-			rpipe_avail_inc(rpipe);
-		/*
-		 * do not increment RPIPE avail for the WA_SEG_DELAYED case
-		 * since it has not been submitted to the RPIPE.
-		 */
-		/* fall through */
-		case WA_SEG_DELAYED:
-			xfer->segs_done++;
-			current_seg->status = status;
-			break;
-		case WA_SEG_ABORTED:
-			break;
-		default:
-			WARN(1, "%s: xfer 0x%08X#%d. bad seg status = %d\n",
-				__func__, wa_xfer_id(xfer), index,
-				current_seg->status);
-			break;
-		}
-	}
-}
-
-/* Populate the given urb based on the current isoc transfer state. */
-static int __wa_populate_buf_in_urb_isoc(struct wahc *wa,
-	struct urb *buf_in_urb, struct wa_xfer *xfer, struct wa_seg *seg)
-{
-	int urb_start_frame = seg->isoc_frame_index + seg->isoc_frame_offset;
-	int seg_index, total_len = 0, urb_frame_index = urb_start_frame;
-	struct usb_iso_packet_descriptor *iso_frame_desc =
-						xfer->urb->iso_frame_desc;
-	const int dti_packet_size = usb_endpoint_maxp(wa->dti_epd);
-	int next_frame_contiguous;
-	struct usb_iso_packet_descriptor *iso_frame;
-
-	BUG_ON(buf_in_urb->status == -EINPROGRESS);
-
-	/*
-	 * If the current frame actual_length is contiguous with the next frame
-	 * and actual_length is a multiple of the DTI endpoint max packet size,
-	 * combine the current frame with the next frame in a single URB.  This
-	 * reduces the number of URBs that must be submitted in that case.
-	 */
-	seg_index = seg->isoc_frame_index;
-	do {
-		next_frame_contiguous = 0;
-
-		iso_frame = &iso_frame_desc[urb_frame_index];
-		total_len += iso_frame->actual_length;
-		++urb_frame_index;
-		++seg_index;
-
-		if (seg_index < seg->isoc_frame_count) {
-			struct usb_iso_packet_descriptor *next_iso_frame;
-
-			next_iso_frame = &iso_frame_desc[urb_frame_index];
-
-			if ((iso_frame->offset + iso_frame->actual_length) ==
-				next_iso_frame->offset)
-				next_frame_contiguous = 1;
-		}
-	} while (next_frame_contiguous
-			&& ((iso_frame->actual_length % dti_packet_size) == 0));
-
-	/* this should always be 0 before a resubmit. */
-	buf_in_urb->num_mapped_sgs	= 0;
-	buf_in_urb->transfer_dma = xfer->urb->transfer_dma +
-		iso_frame_desc[urb_start_frame].offset;
-	buf_in_urb->transfer_buffer_length = total_len;
-	buf_in_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-	buf_in_urb->transfer_buffer = NULL;
-	buf_in_urb->sg = NULL;
-	buf_in_urb->num_sgs = 0;
-	buf_in_urb->context = seg;
-
-	/* return the number of frames included in this URB. */
-	return seg_index - seg->isoc_frame_index;
-}
-
-/* Populate the given urb based on the current transfer state. */
-static int wa_populate_buf_in_urb(struct urb *buf_in_urb, struct wa_xfer *xfer,
-	unsigned int seg_idx, unsigned int bytes_transferred)
-{
-	int result = 0;
-	struct wa_seg *seg = xfer->seg[seg_idx];
-
-	BUG_ON(buf_in_urb->status == -EINPROGRESS);
-	/* this should always be 0 before a resubmit. */
-	buf_in_urb->num_mapped_sgs	= 0;
-
-	if (xfer->is_dma) {
-		buf_in_urb->transfer_dma = xfer->urb->transfer_dma
-			+ (seg_idx * xfer->seg_size);
-		buf_in_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
-		buf_in_urb->transfer_buffer = NULL;
-		buf_in_urb->sg = NULL;
-		buf_in_urb->num_sgs = 0;
-	} else {
-		/* do buffer or SG processing. */
-		buf_in_urb->transfer_flags &= ~URB_NO_TRANSFER_DMA_MAP;
-
-		if (xfer->urb->transfer_buffer) {
-			buf_in_urb->transfer_buffer =
-				xfer->urb->transfer_buffer
-				+ (seg_idx * xfer->seg_size);
-			buf_in_urb->sg = NULL;
-			buf_in_urb->num_sgs = 0;
-		} else {
-			/* allocate an SG list to store seg_size bytes
-				and copy the subset of the xfer->urb->sg
-				that matches the buffer subset we are
-				about to read. */
-			buf_in_urb->sg = wa_xfer_create_subset_sg(
-				xfer->urb->sg,
-				seg_idx * xfer->seg_size,
-				bytes_transferred,
-				&(buf_in_urb->num_sgs));
-
-			if (!(buf_in_urb->sg)) {
-				buf_in_urb->num_sgs	= 0;
-				result = -ENOMEM;
-			}
-			buf_in_urb->transfer_buffer = NULL;
-		}
-	}
-	buf_in_urb->transfer_buffer_length = bytes_transferred;
-	buf_in_urb->context = seg;
-
-	return result;
-}
-
-/*
- * Process a xfer result completion message
- *
- * inbound transfers: need to schedule a buf_in_urb read
- *
- * FIXME: this function needs to be broken up in parts
- */
-static void wa_xfer_result_chew(struct wahc *wa, struct wa_xfer *xfer,
-		struct wa_xfer_result *xfer_result)
-{
-	int result;
-	struct device *dev = &wa->usb_iface->dev;
-	unsigned long flags;
-	unsigned int seg_idx;
-	struct wa_seg *seg;
-	struct wa_rpipe *rpipe;
-	unsigned done = 0;
-	u8 usb_status;
-	unsigned rpipe_ready = 0;
-	unsigned bytes_transferred = le32_to_cpu(xfer_result->dwTransferLength);
-	struct urb *buf_in_urb = &(wa->buf_in_urbs[0]);
-
-	spin_lock_irqsave(&xfer->lock, flags);
-	seg_idx = xfer_result->bTransferSegment & 0x7f;
-	if (unlikely(seg_idx >= xfer->segs))
-		goto error_bad_seg;
-	seg = xfer->seg[seg_idx];
-	rpipe = xfer->ep->hcpriv;
-	usb_status = xfer_result->bTransferStatus;
-	dev_dbg(dev, "xfer %p ID 0x%08X#%u: bTransferStatus 0x%02x (seg status %u)\n",
-		xfer, wa_xfer_id(xfer), seg_idx, usb_status, seg->status);
-	if (seg->status == WA_SEG_ABORTED
-	    || seg->status == WA_SEG_ERROR)	/* already handled */
-		goto segment_aborted;
-	if (seg->status == WA_SEG_SUBMITTED)	/* ops, got here */
-		seg->status = WA_SEG_PENDING;	/* before wa_seg{_dto}_cb() */
-	if (seg->status != WA_SEG_PENDING) {
-		if (printk_ratelimit())
-			dev_err(dev, "xfer %p#%u: Bad segment state %u\n",
-				xfer, seg_idx, seg->status);
-		seg->status = WA_SEG_PENDING;	/* workaround/"fix" it */
-	}
-	if (usb_status & 0x80) {
-		seg->result = wa_xfer_status_to_errno(usb_status);
-		dev_err(dev, "DTI: xfer %p 0x%08X:#%u failed (0x%02x)\n",
-			xfer, xfer->id, seg->index, usb_status);
-		seg->status = ((usb_status & 0x7F) == WA_XFER_STATUS_ABORTED) ?
-			WA_SEG_ABORTED : WA_SEG_ERROR;
-		goto error_complete;
-	}
-	/* FIXME: we ignore warnings, tally them for stats */
-	if (usb_status & 0x40) 		/* Warning?... */
-		usb_status = 0;		/* ... pass */
-	/*
-	 * If the last segment bit is set, complete the remaining segments.
-	 * When the current segment is completed, either in wa_buf_in_cb for
-	 * transfers with data or below for no data, the xfer will complete.
-	 */
-	if (xfer_result->bTransferSegment & 0x80)
-		wa_complete_remaining_xfer_segs(xfer, seg->index + 1,
-			WA_SEG_DONE);
-	if (usb_pipeisoc(xfer->urb->pipe)
-		&& (le32_to_cpu(xfer_result->dwNumOfPackets) > 0)) {
-		/* set up WA state to read the isoc packet status next. */
-		wa->dti_isoc_xfer_in_progress = wa_xfer_id(xfer);
-		wa->dti_isoc_xfer_seg = seg_idx;
-		wa->dti_state = WA_DTI_ISOC_PACKET_STATUS_PENDING;
-	} else if (xfer->is_inbound && !usb_pipeisoc(xfer->urb->pipe)
-			&& (bytes_transferred > 0)) {
-		/* IN data phase: read to buffer */
-		seg->status = WA_SEG_DTI_PENDING;
-		result = wa_populate_buf_in_urb(buf_in_urb, xfer, seg_idx,
-			bytes_transferred);
-		if (result < 0)
-			goto error_buf_in_populate;
-		++(wa->active_buf_in_urbs);
-		result = usb_submit_urb(buf_in_urb, GFP_ATOMIC);
-		if (result < 0) {
-			--(wa->active_buf_in_urbs);
-			goto error_submit_buf_in;
-		}
-	} else {
-		/* OUT data phase or no data, complete it -- */
-		seg->result = bytes_transferred;
-		rpipe_ready = rpipe_avail_inc(rpipe);
-		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_DONE);
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	if (done)
-		wa_xfer_completion(xfer);
-	if (rpipe_ready)
-		wa_xfer_delayed_run(rpipe);
-	return;
-
-error_submit_buf_in:
-	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-		dev_err(dev, "DTI: URB max acceptable errors "
-			"exceeded, resetting device\n");
-		wa_reset_all(wa);
-	}
-	if (printk_ratelimit())
-		dev_err(dev, "xfer %p#%u: can't submit DTI data phase: %d\n",
-			xfer, seg_idx, result);
-	seg->result = result;
-	kfree(buf_in_urb->sg);
-	buf_in_urb->sg = NULL;
-error_buf_in_populate:
-	__wa_xfer_abort(xfer);
-	seg->status = WA_SEG_ERROR;
-error_complete:
-	xfer->segs_done++;
-	rpipe_ready = rpipe_avail_inc(rpipe);
-	wa_complete_remaining_xfer_segs(xfer, seg->index + 1, seg->status);
-	done = __wa_xfer_is_done(xfer);
-	/*
-	 * queue work item to clear STALL for control endpoints.
-	 * Otherwise, let endpoint_reset take care of it.
-	 */
-	if (((usb_status & 0x3f) == WA_XFER_STATUS_HALTED) &&
-		usb_endpoint_xfer_control(&xfer->ep->desc) &&
-		done) {
-
-		dev_info(dev, "Control EP stall.  Queue delayed work.\n");
-		spin_lock(&wa->xfer_list_lock);
-		/* move xfer from xfer_list to xfer_errored_list. */
-		list_move_tail(&xfer->list_node, &wa->xfer_errored_list);
-		spin_unlock(&wa->xfer_list_lock);
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		queue_work(wusbd, &wa->xfer_error_work);
-	} else {
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (done)
-			wa_xfer_completion(xfer);
-		if (rpipe_ready)
-			wa_xfer_delayed_run(rpipe);
-	}
-
-	return;
-
-error_bad_seg:
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	wa_urb_dequeue(wa, xfer->urb, -ENOENT);
-	if (printk_ratelimit())
-		dev_err(dev, "xfer %p#%u: bad segment\n", xfer, seg_idx);
-	if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS, EDC_ERROR_TIMEFRAME)) {
-		dev_err(dev, "DTI: URB max acceptable errors "
-			"exceeded, resetting device\n");
-		wa_reset_all(wa);
-	}
-	return;
-
-segment_aborted:
-	/* nothing to do, as the aborter did the completion */
-	spin_unlock_irqrestore(&xfer->lock, flags);
-}
-
-/*
- * Process a isochronous packet status message
- *
- * inbound transfers: need to schedule a buf_in_urb read
- */
-static int wa_process_iso_packet_status(struct wahc *wa, struct urb *urb)
-{
-	struct device *dev = &wa->usb_iface->dev;
-	struct wa_xfer_packet_status_hwaiso *packet_status;
-	struct wa_xfer_packet_status_len_hwaiso *status_array;
-	struct wa_xfer *xfer;
-	unsigned long flags;
-	struct wa_seg *seg;
-	struct wa_rpipe *rpipe;
-	unsigned done = 0, dti_busy = 0, data_frame_count = 0, seg_index;
-	unsigned first_frame_index = 0, rpipe_ready = 0;
-	size_t expected_size;
-
-	/* We have a xfer result buffer; check it */
-	dev_dbg(dev, "DTI: isoc packet status %d bytes at %p\n",
-		urb->actual_length, urb->transfer_buffer);
-	packet_status = (struct wa_xfer_packet_status_hwaiso *)(wa->dti_buf);
-	if (packet_status->bPacketType != WA_XFER_ISO_PACKET_STATUS) {
-		dev_err(dev, "DTI Error: isoc packet status--bad type 0x%02x\n",
-			packet_status->bPacketType);
-		goto error_parse_buffer;
-	}
-	xfer = wa_xfer_get_by_id(wa, wa->dti_isoc_xfer_in_progress);
-	if (xfer == NULL) {
-		dev_err(dev, "DTI Error: isoc packet status--unknown xfer 0x%08x\n",
-			wa->dti_isoc_xfer_in_progress);
-		goto error_parse_buffer;
-	}
-	spin_lock_irqsave(&xfer->lock, flags);
-	if (unlikely(wa->dti_isoc_xfer_seg >= xfer->segs))
-		goto error_bad_seg;
-	seg = xfer->seg[wa->dti_isoc_xfer_seg];
-	rpipe = xfer->ep->hcpriv;
-	expected_size = struct_size(packet_status, PacketStatus,
-				    seg->isoc_frame_count);
-	if (urb->actual_length != expected_size) {
-		dev_err(dev, "DTI Error: isoc packet status--bad urb length (%d bytes vs %zu needed)\n",
-			urb->actual_length, expected_size);
-		goto error_bad_seg;
-	}
-	if (le16_to_cpu(packet_status->wLength) != expected_size) {
-		dev_err(dev, "DTI Error: isoc packet status--bad length %u\n",
-			le16_to_cpu(packet_status->wLength));
-		goto error_bad_seg;
-	}
-	/* write isoc packet status and lengths back to the xfer urb. */
-	status_array = packet_status->PacketStatus;
-	xfer->urb->start_frame =
-		wa->wusb->usb_hcd.driver->get_frame_number(&wa->wusb->usb_hcd);
-	for (seg_index = 0; seg_index < seg->isoc_frame_count; ++seg_index) {
-		struct usb_iso_packet_descriptor *iso_frame_desc =
-			xfer->urb->iso_frame_desc;
-		const int xfer_frame_index =
-			seg->isoc_frame_offset + seg_index;
-
-		iso_frame_desc[xfer_frame_index].status =
-			wa_xfer_status_to_errno(
-			le16_to_cpu(status_array[seg_index].PacketStatus));
-		iso_frame_desc[xfer_frame_index].actual_length =
-			le16_to_cpu(status_array[seg_index].PacketLength);
-		/* track the number of frames successfully transferred. */
-		if (iso_frame_desc[xfer_frame_index].actual_length > 0) {
-			/* save the starting frame index for buf_in_urb. */
-			if (!data_frame_count)
-				first_frame_index = seg_index;
-			++data_frame_count;
-		}
-	}
-
-	if (xfer->is_inbound && data_frame_count) {
-		int result, total_frames_read = 0, urb_index = 0;
-		struct urb *buf_in_urb;
-
-		/* IN data phase: read to buffer */
-		seg->status = WA_SEG_DTI_PENDING;
-
-		/* start with the first frame with data. */
-		seg->isoc_frame_index = first_frame_index;
-		/* submit up to WA_MAX_BUF_IN_URBS read URBs. */
-		do {
-			int urb_frame_index, urb_frame_count;
-			struct usb_iso_packet_descriptor *iso_frame_desc;
-
-			buf_in_urb = &(wa->buf_in_urbs[urb_index]);
-			urb_frame_count = __wa_populate_buf_in_urb_isoc(wa,
-				buf_in_urb, xfer, seg);
-			/* advance frame index to start of next read URB. */
-			seg->isoc_frame_index += urb_frame_count;
-			total_frames_read += urb_frame_count;
-
-			++(wa->active_buf_in_urbs);
-			result = usb_submit_urb(buf_in_urb, GFP_ATOMIC);
-
-			/* skip 0-byte frames. */
-			urb_frame_index =
-				seg->isoc_frame_offset + seg->isoc_frame_index;
-			iso_frame_desc =
-				&(xfer->urb->iso_frame_desc[urb_frame_index]);
-			while ((seg->isoc_frame_index <
-						seg->isoc_frame_count) &&
-				 (iso_frame_desc->actual_length == 0)) {
-				++(seg->isoc_frame_index);
-				++iso_frame_desc;
-			}
-			++urb_index;
-
-		} while ((result == 0) && (urb_index < WA_MAX_BUF_IN_URBS)
-				&& (seg->isoc_frame_index <
-						seg->isoc_frame_count));
-
-		if (result < 0) {
-			--(wa->active_buf_in_urbs);
-			dev_err(dev, "DTI Error: Could not submit buf in URB (%d)",
-				result);
-			wa_reset_all(wa);
-		} else if (data_frame_count > total_frames_read)
-			/* If we need to read more frames, set DTI busy. */
-			dti_busy = 1;
-	} else {
-		/* OUT transfer or no more IN data, complete it -- */
-		rpipe_ready = rpipe_avail_inc(rpipe);
-		done = __wa_xfer_mark_seg_as_done(xfer, seg, WA_SEG_DONE);
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	if (dti_busy)
-		wa->dti_state = WA_DTI_BUF_IN_DATA_PENDING;
-	else
-		wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
-	if (done)
-		wa_xfer_completion(xfer);
-	if (rpipe_ready)
-		wa_xfer_delayed_run(rpipe);
-	wa_xfer_put(xfer);
-	return dti_busy;
-
-error_bad_seg:
-	spin_unlock_irqrestore(&xfer->lock, flags);
-	wa_xfer_put(xfer);
-error_parse_buffer:
-	return dti_busy;
-}
-
-/*
- * Callback for the IN data phase
- *
- * If successful transition state; otherwise, take a note of the
- * error, mark this segment done and try completion.
- *
- * Note we don't access until we are sure that the transfer hasn't
- * been cancelled (ECONNRESET, ENOENT), which could mean that
- * seg->xfer could be already gone.
- */
-static void wa_buf_in_cb(struct urb *urb)
-{
-	struct wa_seg *seg = urb->context;
-	struct wa_xfer *xfer = seg->xfer;
-	struct wahc *wa;
-	struct device *dev;
-	struct wa_rpipe *rpipe;
-	unsigned rpipe_ready = 0, isoc_data_frame_count = 0;
-	unsigned long flags;
-	int resubmit_dti = 0, active_buf_in_urbs;
-	u8 done = 0;
-
-	/* free the sg if it was used. */
-	kfree(urb->sg);
-	urb->sg = NULL;
-
-	spin_lock_irqsave(&xfer->lock, flags);
-	wa = xfer->wa;
-	dev = &wa->usb_iface->dev;
-	--(wa->active_buf_in_urbs);
-	active_buf_in_urbs = wa->active_buf_in_urbs;
-	rpipe = xfer->ep->hcpriv;
-
-	if (usb_pipeisoc(xfer->urb->pipe)) {
-		struct usb_iso_packet_descriptor *iso_frame_desc =
-			xfer->urb->iso_frame_desc;
-		int	seg_index;
-
-		/*
-		 * Find the next isoc frame with data and count how many
-		 * frames with data remain.
-		 */
-		seg_index = seg->isoc_frame_index;
-		while (seg_index < seg->isoc_frame_count) {
-			const int urb_frame_index =
-				seg->isoc_frame_offset + seg_index;
-
-			if (iso_frame_desc[urb_frame_index].actual_length > 0) {
-				/* save the index of the next frame with data */
-				if (!isoc_data_frame_count)
-					seg->isoc_frame_index = seg_index;
-				++isoc_data_frame_count;
-			}
-			++seg_index;
-		}
-	}
-	spin_unlock_irqrestore(&xfer->lock, flags);
-
-	switch (urb->status) {
-	case 0:
-		spin_lock_irqsave(&xfer->lock, flags);
-
-		seg->result += urb->actual_length;
-		if (isoc_data_frame_count > 0) {
-			int result, urb_frame_count;
-
-			/* submit a read URB for the next frame with data. */
-			urb_frame_count = __wa_populate_buf_in_urb_isoc(wa, urb,
-				 xfer, seg);
-			/* advance index to start of next read URB. */
-			seg->isoc_frame_index += urb_frame_count;
-			++(wa->active_buf_in_urbs);
-			result = usb_submit_urb(urb, GFP_ATOMIC);
-			if (result < 0) {
-				--(wa->active_buf_in_urbs);
-				dev_err(dev, "DTI Error: Could not submit buf in URB (%d)",
-					result);
-				wa_reset_all(wa);
-			}
-			/*
-			 * If we are in this callback and
-			 * isoc_data_frame_count > 0, it means that the dti_urb
-			 * submission was delayed in wa_dti_cb.  Once
-			 * we submit the last buf_in_urb, we can submit the
-			 * delayed dti_urb.
-			 */
-			  resubmit_dti = (isoc_data_frame_count ==
-							urb_frame_count);
-		} else if (active_buf_in_urbs == 0) {
-			dev_dbg(dev,
-				"xfer %p 0x%08X#%u: data in done (%zu bytes)\n",
-				xfer, wa_xfer_id(xfer), seg->index,
-				seg->result);
-			rpipe_ready = rpipe_avail_inc(rpipe);
-			done = __wa_xfer_mark_seg_as_done(xfer, seg,
-					WA_SEG_DONE);
-		}
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (done)
-			wa_xfer_completion(xfer);
-		if (rpipe_ready)
-			wa_xfer_delayed_run(rpipe);
-		break;
-	case -ECONNRESET:	/* URB unlinked; no need to do anything */
-	case -ENOENT:		/* as it was done by the who unlinked us */
-		break;
-	default:		/* Other errors ... */
-		/*
-		 * Error on data buf read.  Only resubmit DTI if it hasn't
-		 * already been done by previously hitting this error or by a
-		 * successful completion of the previous buf_in_urb.
-		 */
-		resubmit_dti = wa->dti_state != WA_DTI_TRANSFER_RESULT_PENDING;
-		spin_lock_irqsave(&xfer->lock, flags);
-		if (printk_ratelimit())
-			dev_err(dev, "xfer %p 0x%08X#%u: data in error %d\n",
-				xfer, wa_xfer_id(xfer), seg->index,
-				urb->status);
-		if (edc_inc(&wa->nep_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)){
-			dev_err(dev, "DTO: URB max acceptable errors "
-				"exceeded, resetting device\n");
-			wa_reset_all(wa);
-		}
-		seg->result = urb->status;
-		rpipe_ready = rpipe_avail_inc(rpipe);
-		if (active_buf_in_urbs == 0)
-			done = __wa_xfer_mark_seg_as_done(xfer, seg,
-				WA_SEG_ERROR);
-		else
-			__wa_xfer_abort(xfer);
-		spin_unlock_irqrestore(&xfer->lock, flags);
-		if (done)
-			wa_xfer_completion(xfer);
-		if (rpipe_ready)
-			wa_xfer_delayed_run(rpipe);
-	}
-
-	if (resubmit_dti) {
-		int result;
-
-		wa->dti_state = WA_DTI_TRANSFER_RESULT_PENDING;
-
-		result = usb_submit_urb(wa->dti_urb, GFP_ATOMIC);
-		if (result < 0) {
-			dev_err(dev, "DTI Error: Could not submit DTI URB (%d)\n",
-				result);
-			wa_reset_all(wa);
-		}
-	}
-}
-
-/*
- * Handle an incoming transfer result buffer
- *
- * Given a transfer result buffer, it completes the transfer (possibly
- * scheduling and buffer in read) and then resubmits the DTI URB for a
- * new transfer result read.
- *
- *
- * The xfer_result DTI URB state machine
- *
- * States: OFF | RXR (Read-Xfer-Result) | RBI (Read-Buffer-In)
- *
- * We start in OFF mode, the first xfer_result notification [through
- * wa_handle_notif_xfer()] moves us to RXR by posting the DTI-URB to
- * read.
- *
- * We receive a buffer -- if it is not a xfer_result, we complain and
- * repost the DTI-URB. If it is a xfer_result then do the xfer seg
- * request accounting. If it is an IN segment, we move to RBI and post
- * a BUF-IN-URB to the right buffer. The BUF-IN-URB callback will
- * repost the DTI-URB and move to RXR state. if there was no IN
- * segment, it will repost the DTI-URB.
- *
- * We go back to OFF when we detect a ENOENT or ESHUTDOWN (or too many
- * errors) in the URBs.
- */
-static void wa_dti_cb(struct urb *urb)
-{
-	int result, dti_busy = 0;
-	struct wahc *wa = urb->context;
-	struct device *dev = &wa->usb_iface->dev;
-	u32 xfer_id;
-	u8 usb_status;
-
-	BUG_ON(wa->dti_urb != urb);
-	switch (wa->dti_urb->status) {
-	case 0:
-		if (wa->dti_state == WA_DTI_TRANSFER_RESULT_PENDING) {
-			struct wa_xfer_result *xfer_result;
-			struct wa_xfer *xfer;
-
-			/* We have a xfer result buffer; check it */
-			dev_dbg(dev, "DTI: xfer result %d bytes at %p\n",
-				urb->actual_length, urb->transfer_buffer);
-			if (urb->actual_length != sizeof(*xfer_result)) {
-				dev_err(dev, "DTI Error: xfer result--bad size xfer result (%d bytes vs %zu needed)\n",
-					urb->actual_length,
-					sizeof(*xfer_result));
-				break;
-			}
-			xfer_result = (struct wa_xfer_result *)(wa->dti_buf);
-			if (xfer_result->hdr.bLength != sizeof(*xfer_result)) {
-				dev_err(dev, "DTI Error: xfer result--bad header length %u\n",
-					xfer_result->hdr.bLength);
-				break;
-			}
-			if (xfer_result->hdr.bNotifyType != WA_XFER_RESULT) {
-				dev_err(dev, "DTI Error: xfer result--bad header type 0x%02x\n",
-					xfer_result->hdr.bNotifyType);
-				break;
-			}
-			xfer_id = le32_to_cpu(xfer_result->dwTransferID);
-			usb_status = xfer_result->bTransferStatus & 0x3f;
-			if (usb_status == WA_XFER_STATUS_NOT_FOUND) {
-				/* taken care of already */
-				dev_dbg(dev, "%s: xfer 0x%08X#%u not found.\n",
-					__func__, xfer_id,
-					xfer_result->bTransferSegment & 0x7f);
-				break;
-			}
-			xfer = wa_xfer_get_by_id(wa, xfer_id);
-			if (xfer == NULL) {
-				/* FIXME: transaction not found. */
-				dev_err(dev, "DTI Error: xfer result--unknown xfer 0x%08x (status 0x%02x)\n",
-					xfer_id, usb_status);
-				break;
-			}
-			wa_xfer_result_chew(wa, xfer, xfer_result);
-			wa_xfer_put(xfer);
-		} else if (wa->dti_state == WA_DTI_ISOC_PACKET_STATUS_PENDING) {
-			dti_busy = wa_process_iso_packet_status(wa, urb);
-		} else {
-			dev_err(dev, "DTI Error: unexpected EP state = %d\n",
-				wa->dti_state);
-		}
-		break;
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-	case -ESHUTDOWN:	/* going away! */
-		dev_dbg(dev, "DTI: going down! %d\n", urb->status);
-		goto out;
-	default:
-		/* Unknown error */
-		if (edc_inc(&wa->dti_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME)) {
-			dev_err(dev, "DTI: URB max acceptable errors "
-				"exceeded, resetting device\n");
-			wa_reset_all(wa);
-			goto out;
-		}
-		if (printk_ratelimit())
-			dev_err(dev, "DTI: URB error %d\n", urb->status);
-		break;
-	}
-
-	/* Resubmit the DTI URB if we are not busy processing isoc in frames. */
-	if (!dti_busy) {
-		result = usb_submit_urb(wa->dti_urb, GFP_ATOMIC);
-		if (result < 0) {
-			dev_err(dev, "DTI Error: Could not submit DTI URB (%d)\n",
-				result);
-			wa_reset_all(wa);
-		}
-	}
-out:
-	return;
-}
-
-/*
- * Initialize the DTI URB for reading transfer result notifications and also
- * the buffer-in URB, for reading buffers. Then we just submit the DTI URB.
- */
-int wa_dti_start(struct wahc *wa)
-{
-	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
-	struct device *dev = &wa->usb_iface->dev;
-	int result = -ENOMEM, index;
-
-	if (wa->dti_urb != NULL)	/* DTI URB already started */
-		goto out;
-
-	wa->dti_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (wa->dti_urb == NULL)
-		goto error_dti_urb_alloc;
-	usb_fill_bulk_urb(
-		wa->dti_urb, wa->usb_dev,
-		usb_rcvbulkpipe(wa->usb_dev, 0x80 | dti_epd->bEndpointAddress),
-		wa->dti_buf, wa->dti_buf_size,
-		wa_dti_cb, wa);
-
-	/* init the buf in URBs */
-	for (index = 0; index < WA_MAX_BUF_IN_URBS; ++index) {
-		usb_fill_bulk_urb(
-			&(wa->buf_in_urbs[index]), wa->usb_dev,
-			usb_rcvbulkpipe(wa->usb_dev,
-				0x80 | dti_epd->bEndpointAddress),
-			NULL, 0, wa_buf_in_cb, wa);
-	}
-	result = usb_submit_urb(wa->dti_urb, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "DTI Error: Could not submit DTI URB (%d) resetting\n",
-			result);
-		goto error_dti_urb_submit;
-	}
-out:
-	return 0;
-
-error_dti_urb_submit:
-	usb_put_urb(wa->dti_urb);
-	wa->dti_urb = NULL;
-error_dti_urb_alloc:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wa_dti_start);
-/*
- * Transfer complete notification
- *
- * Called from the notif.c code. We get a notification on EP2 saying
- * that some endpoint has some transfer result data available. We are
- * about to read it.
- *
- * To speed up things, we always have a URB reading the DTI URB; we
- * don't really set it up and start it until the first xfer complete
- * notification arrives, which is what we do here.
- *
- * Follow up in wa_dti_cb(), as that's where the whole state
- * machine starts.
- *
- * @wa shall be referenced
- */
-void wa_handle_notif_xfer(struct wahc *wa, struct wa_notif_hdr *notif_hdr)
-{
-	struct device *dev = &wa->usb_iface->dev;
-	struct wa_notif_xfer *notif_xfer;
-	const struct usb_endpoint_descriptor *dti_epd = wa->dti_epd;
-
-	notif_xfer = container_of(notif_hdr, struct wa_notif_xfer, hdr);
-	BUG_ON(notif_hdr->bNotifyType != WA_NOTIF_TRANSFER);
-
-	if ((0x80 | notif_xfer->bEndpoint) != dti_epd->bEndpointAddress) {
-		/* FIXME: hardcoded limitation, adapt */
-		dev_err(dev, "BUG: DTI ep is %u, not %u (hack me)\n",
-			notif_xfer->bEndpoint, dti_epd->bEndpointAddress);
-		goto error;
-	}
-
-	/* attempt to start the DTI ep processing. */
-	if (wa_dti_start(wa) < 0)
-		goto error;
-
-	return;
-
-error:
-	wa_reset_all(wa);
-}
diff --git a/drivers/usb/wusbcore/wusbhc.c b/drivers/usb/wusbcore/wusbhc.c
deleted file mode 100644
index d0b404d258e8..000000000000
--- a/drivers/usb/wusbcore/wusbhc.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Host Controller
- * sysfs glue, wusbcore module support and life cycle management
- *
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Creation/destruction of wusbhc is split in two parts; that that
- * doesn't require the HCD to be added (wusbhc_{create,destroy}) and
- * the one that requires (phase B, wusbhc_b_{create,destroy}).
- *
- * This is so because usb_add_hcd() will start the HC, and thus, all
- * the HC specific stuff has to be already initialized (like sysfs
- * thingies).
- */
-#include <linux/device.h>
-#include <linux/module.h>
-#include "wusbhc.h"
-
-/**
- * Extract the wusbhc that corresponds to a USB Host Controller class device
- *
- * WARNING! Apply only if @dev is that of a
- *          wusbhc.usb_hcd.self->class_dev; otherwise, you loose.
- */
-static struct wusbhc *usbhc_dev_to_wusbhc(struct device *dev)
-{
-	struct usb_bus *usb_bus = dev_get_drvdata(dev);
-	struct usb_hcd *usb_hcd = bus_to_hcd(usb_bus);
-	return usb_hcd_to_wusbhc(usb_hcd);
-}
-
-/*
- * Show & store the current WUSB trust timeout
- *
- * We don't do locking--it is an 'atomic' value.
- *
- * The units that we store/show are always MILLISECONDS. However, the
- * value of trust_timeout is jiffies.
- */
-static ssize_t wusb_trust_timeout_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", wusbhc->trust_timeout);
-}
-
-static ssize_t wusb_trust_timeout_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t size)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	ssize_t result = -ENOSYS;
-	unsigned trust_timeout;
-
-	result = sscanf(buf, "%u", &trust_timeout);
-	if (result != 1) {
-		result = -EINVAL;
-		goto out;
-	}
-	wusbhc->trust_timeout = min_t(unsigned, trust_timeout, 500);
-	cancel_delayed_work(&wusbhc->keep_alive_timer);
-	flush_workqueue(wusbd);
-	queue_delayed_work(wusbd, &wusbhc->keep_alive_timer,
-			   msecs_to_jiffies(wusbhc->trust_timeout / 2));
-out:
-	return result < 0 ? result : size;
-}
-static DEVICE_ATTR_RW(wusb_trust_timeout);
-
-/*
- * Show the current WUSB CHID.
- */
-static ssize_t wusb_chid_show(struct device *dev,
-			      struct device_attribute *attr, char *buf)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	const struct wusb_ckhdid *chid;
-
-	if (wusbhc->wuie_host_info != NULL)
-		chid = &wusbhc->wuie_host_info->CHID;
-	else
-		chid = &wusb_ckhdid_zero;
-
-	return sprintf(buf, "%16ph\n", chid->data);
-}
-
-/*
- * Store a new CHID.
- *
- * - Write an all zeros CHID and it will stop the controller
- * - Write a non-zero CHID and it will start it.
- *
- * See wusbhc_chid_set() for more info.
- */
-static ssize_t wusb_chid_store(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t size)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	struct wusb_ckhdid chid;
-	ssize_t result;
-
-	result = sscanf(buf,
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx "
-			"%02hhx %02hhx %02hhx %02hhx\n",
-			&chid.data[0] , &chid.data[1] ,
-			&chid.data[2] , &chid.data[3] ,
-			&chid.data[4] , &chid.data[5] ,
-			&chid.data[6] , &chid.data[7] ,
-			&chid.data[8] , &chid.data[9] ,
-			&chid.data[10], &chid.data[11],
-			&chid.data[12], &chid.data[13],
-			&chid.data[14], &chid.data[15]);
-	if (result != 16) {
-		dev_err(dev, "Unrecognized CHID (need 16 8-bit hex digits): "
-			"%d\n", (int)result);
-		return -EINVAL;
-	}
-	result = wusbhc_chid_set(wusbhc, &chid);
-	return result < 0 ? result : size;
-}
-static DEVICE_ATTR_RW(wusb_chid);
-
-
-static ssize_t wusb_phy_rate_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-
-	return sprintf(buf, "%d\n", wusbhc->phy_rate);
-}
-
-static ssize_t wusb_phy_rate_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t size)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	uint8_t phy_rate;
-	ssize_t result;
-
-	result = sscanf(buf, "%hhu", &phy_rate);
-	if (result != 1)
-		return -EINVAL;
-	if (phy_rate >= UWB_PHY_RATE_INVALID)
-		return -EINVAL;
-
-	wusbhc->phy_rate = phy_rate;
-	return size;
-}
-static DEVICE_ATTR_RW(wusb_phy_rate);
-
-static ssize_t wusb_dnts_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-
-	return sprintf(buf, "num slots: %d\ninterval: %dms\n",
-			wusbhc->dnts_num_slots, wusbhc->dnts_interval);
-}
-
-static ssize_t wusb_dnts_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t size)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	uint8_t num_slots, interval;
-	ssize_t result;
-
-	result = sscanf(buf, "%hhu %hhu", &num_slots, &interval);
-
-	if (result != 2)
-		return -EINVAL;
-
-	wusbhc->dnts_num_slots = num_slots;
-	wusbhc->dnts_interval = interval;
-
-	return size;
-}
-static DEVICE_ATTR_RW(wusb_dnts);
-
-static ssize_t wusb_retry_count_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-
-	return sprintf(buf, "%d\n", wusbhc->retry_count);
-}
-
-static ssize_t wusb_retry_count_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t size)
-{
-	struct wusbhc *wusbhc = usbhc_dev_to_wusbhc(dev);
-	uint8_t retry_count;
-	ssize_t result;
-
-	result = sscanf(buf, "%hhu", &retry_count);
-
-	if (result != 1)
-		return -EINVAL;
-
-	wusbhc->retry_count = max_t(uint8_t, retry_count,
-					WUSB_RETRY_COUNT_MAX);
-
-	return size;
-}
-static DEVICE_ATTR_RW(wusb_retry_count);
-
-/* Group all the WUSBHC attributes */
-static struct attribute *wusbhc_attrs[] = {
-		&dev_attr_wusb_trust_timeout.attr,
-		&dev_attr_wusb_chid.attr,
-		&dev_attr_wusb_phy_rate.attr,
-		&dev_attr_wusb_dnts.attr,
-		&dev_attr_wusb_retry_count.attr,
-		NULL,
-};
-
-static const struct attribute_group wusbhc_attr_group = {
-	.name = NULL,	/* we want them in the same directory */
-	.attrs = wusbhc_attrs,
-};
-
-/*
- * Create a wusbhc instance
- *
- * NOTEs:
- *
- *  - assumes *wusbhc has been zeroed and wusbhc->usb_hcd has been
- *    initialized but not added.
- *
- *  - fill out ports_max, mmcies_max and mmcie_{add,rm} before calling.
- *
- *  - fill out wusbhc->uwb_rc and refcount it before calling
- *  - fill out the wusbhc->sec_modes array
- */
-int wusbhc_create(struct wusbhc *wusbhc)
-{
-	int result = 0;
-
-	/* set defaults.  These can be overwritten using sysfs attributes. */
-	wusbhc->trust_timeout = WUSB_TRUST_TIMEOUT_MS;
-	wusbhc->phy_rate = UWB_PHY_RATE_INVALID - 1;
-	wusbhc->dnts_num_slots = 4;
-	wusbhc->dnts_interval = 2;
-	wusbhc->retry_count = WUSB_RETRY_COUNT_INFINITE;
-
-	mutex_init(&wusbhc->mutex);
-	result = wusbhc_mmcie_create(wusbhc);
-	if (result < 0)
-		goto error_mmcie_create;
-	result = wusbhc_devconnect_create(wusbhc);
-	if (result < 0)
-		goto error_devconnect_create;
-	result = wusbhc_rh_create(wusbhc);
-	if (result < 0)
-		goto error_rh_create;
-	result = wusbhc_sec_create(wusbhc);
-	if (result < 0)
-		goto error_sec_create;
-	return 0;
-
-error_sec_create:
-	wusbhc_rh_destroy(wusbhc);
-error_rh_create:
-	wusbhc_devconnect_destroy(wusbhc);
-error_devconnect_create:
-	wusbhc_mmcie_destroy(wusbhc);
-error_mmcie_create:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wusbhc_create);
-
-static inline struct kobject *wusbhc_kobj(struct wusbhc *wusbhc)
-{
-	return &wusbhc->usb_hcd.self.controller->kobj;
-}
-
-/*
- * Phase B of a wusbhc instance creation
- *
- * Creates fields that depend on wusbhc->usb_hcd having been
- * added. This is where we create the sysfs files in
- * /sys/class/usb_host/usb_hostX/.
- *
- * NOTE: Assumes wusbhc->usb_hcd has been already added by the upper
- *       layer (hwahc or whci)
- */
-int wusbhc_b_create(struct wusbhc *wusbhc)
-{
-	int result = 0;
-	struct device *dev = wusbhc->usb_hcd.self.controller;
-
-	result = sysfs_create_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group);
-	if (result < 0) {
-		dev_err(dev, "Cannot register WUSBHC attributes: %d\n",
-			result);
-		goto error_create_attr_group;
-	}
-
-	return 0;
-error_create_attr_group:
-	return result;
-}
-EXPORT_SYMBOL_GPL(wusbhc_b_create);
-
-void wusbhc_b_destroy(struct wusbhc *wusbhc)
-{
-	wusbhc_pal_unregister(wusbhc);
-	sysfs_remove_group(wusbhc_kobj(wusbhc), &wusbhc_attr_group);
-}
-EXPORT_SYMBOL_GPL(wusbhc_b_destroy);
-
-void wusbhc_destroy(struct wusbhc *wusbhc)
-{
-	wusbhc_sec_destroy(wusbhc);
-	wusbhc_rh_destroy(wusbhc);
-	wusbhc_devconnect_destroy(wusbhc);
-	wusbhc_mmcie_destroy(wusbhc);
-}
-EXPORT_SYMBOL_GPL(wusbhc_destroy);
-
-struct workqueue_struct *wusbd;
-EXPORT_SYMBOL_GPL(wusbd);
-
-/*
- * WUSB Cluster ID allocation map
- *
- * Each WUSB bus in a channel is identified with a Cluster Id in the
- * unauth address pace (WUSB1.0[4.3]). We take the range 0xe0 to 0xff
- * (that's space for 31 WUSB controllers, as 0xff can't be taken). We
- * start taking from 0xff, 0xfe, 0xfd... (hence the += or -= 0xff).
- *
- * For each one we taken, we pin it in the bitap
- */
-#define CLUSTER_IDS 32
-static DECLARE_BITMAP(wusb_cluster_id_table, CLUSTER_IDS);
-static DEFINE_SPINLOCK(wusb_cluster_ids_lock);
-
-/*
- * Get a WUSB Cluster ID
- *
- * Need to release with wusb_cluster_id_put() when done w/ it.
- */
-/* FIXME: coordinate with the choose_addres() from the USB stack */
-/* we want to leave the top of the 128 range for cluster addresses and
- * the bottom for device addresses (as we map them one on one with
- * ports). */
-u8 wusb_cluster_id_get(void)
-{
-	u8 id;
-	spin_lock(&wusb_cluster_ids_lock);
-	id = find_first_zero_bit(wusb_cluster_id_table, CLUSTER_IDS);
-	if (id >= CLUSTER_IDS) {
-		id = 0;
-		goto out;
-	}
-	set_bit(id, wusb_cluster_id_table);
-	id = (u8) 0xff - id;
-out:
-	spin_unlock(&wusb_cluster_ids_lock);
-	return id;
-
-}
-EXPORT_SYMBOL_GPL(wusb_cluster_id_get);
-
-/*
- * Release a WUSB Cluster ID
- *
- * Obtained it with wusb_cluster_id_get()
- */
-void wusb_cluster_id_put(u8 id)
-{
-	id = 0xff - id;
-	BUG_ON(id >= CLUSTER_IDS);
-	spin_lock(&wusb_cluster_ids_lock);
-	WARN_ON(!test_bit(id, wusb_cluster_id_table));
-	clear_bit(id, wusb_cluster_id_table);
-	spin_unlock(&wusb_cluster_ids_lock);
-}
-EXPORT_SYMBOL_GPL(wusb_cluster_id_put);
-
-/**
- * wusbhc_giveback_urb - return an URB to the USB core
- * @wusbhc: the host controller the URB is from.
- * @urb:    the URB.
- * @status: the URB's status.
- *
- * Return an URB to the USB core doing some additional WUSB specific
- * processing.
- *
- *  - After a successful transfer, update the trust timeout timestamp
- *    for the WUSB device.
- *
- *  - [WUSB] sections 4.13 and 7.5.1 specify the stop retransmission
- *    condition for the WCONNECTACK_IE is that the host has observed
- *    the associated device responding to a control transfer.
- */
-void wusbhc_giveback_urb(struct wusbhc *wusbhc, struct urb *urb, int status)
-{
-	struct wusb_dev *wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc,
-					urb->dev);
-
-	if (status == 0 && wusb_dev) {
-		wusb_dev->entry_ts = jiffies;
-
-		/* wusbhc_devconnect_acked() can't be called from
-		   atomic context so defer it to a work queue. */
-		if (!list_empty(&wusb_dev->cack_node))
-			queue_work(wusbd, &wusb_dev->devconnect_acked_work);
-		else
-			wusb_dev_put(wusb_dev);
-	}
-
-	usb_hcd_giveback_urb(&wusbhc->usb_hcd, urb, status);
-}
-EXPORT_SYMBOL_GPL(wusbhc_giveback_urb);
-
-/**
- * wusbhc_reset_all - reset the HC hardware
- * @wusbhc: the host controller to reset.
- *
- * Request a full hardware reset of the chip.  This will also reset
- * the radio controller and any other PALs.
- */
-void wusbhc_reset_all(struct wusbhc *wusbhc)
-{
-	if (wusbhc->uwb_rc)
-		uwb_rc_reset_all(wusbhc->uwb_rc);
-}
-EXPORT_SYMBOL_GPL(wusbhc_reset_all);
-
-static struct notifier_block wusb_usb_notifier = {
-	.notifier_call = wusb_usb_ncb,
-	.priority = INT_MAX	/* Need to be called first of all */
-};
-
-static int __init wusbcore_init(void)
-{
-	int result;
-	result = wusb_crypto_init();
-	if (result < 0)
-		goto error_crypto_init;
-	/* WQ is singlethread because we need to serialize notifications */
-	wusbd = create_singlethread_workqueue("wusbd");
-	if (wusbd == NULL) {
-		result = -ENOMEM;
-		printk(KERN_ERR "WUSB-core: Cannot create wusbd workqueue\n");
-		goto error_wusbd_create;
-	}
-	usb_register_notify(&wusb_usb_notifier);
-	bitmap_zero(wusb_cluster_id_table, CLUSTER_IDS);
-	set_bit(0, wusb_cluster_id_table);	/* reserve Cluster ID 0xff */
-	return 0;
-
-error_wusbd_create:
-	wusb_crypto_exit();
-error_crypto_init:
-	return result;
-
-}
-module_init(wusbcore_init);
-
-static void __exit wusbcore_exit(void)
-{
-	clear_bit(0, wusb_cluster_id_table);
-	if (!bitmap_empty(wusb_cluster_id_table, CLUSTER_IDS)) {
-		printk(KERN_ERR "BUG: WUSB Cluster IDs not released on exit: %*pb\n",
-		       CLUSTER_IDS, wusb_cluster_id_table);
-		WARN_ON(1);
-	}
-	usb_unregister_notify(&wusb_usb_notifier);
-	destroy_workqueue(wusbd);
-	wusb_crypto_exit();
-}
-module_exit(wusbcore_exit);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Wireless USB core");
-MODULE_LICENSE("GPL");
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
deleted file mode 100644
index 7681d796ca5b..000000000000
--- a/drivers/usb/wusbcore/wusbhc.h
+++ /dev/null
@@ -1,487 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Host Controller
- * Common infrastructure for WHCI and HWA WUSB-HC drivers
- *
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This driver implements parts common to all Wireless USB Host
- * Controllers (struct wusbhc, embedding a struct usb_hcd) and is used
- * by:
- *
- *   - hwahc: HWA, USB-dongle that implements a Wireless USB host
- *     controller, (Wireless USB 1.0 Host-Wire-Adapter specification).
- *
- *   - whci: WHCI, a PCI card with a wireless host controller
- *     (Wireless Host Controller Interface 1.0 specification).
- *
- * Check out the Design-overview.txt file in the source documentation
- * for other details on the implementation.
- *
- * Main blocks:
- *
- *  rh         Root Hub emulation (part of the HCD glue)
- *
- *  devconnect Handle all the issues related to device connection,
- *             authentication, disconnection, timeout, reseting,
- *             keepalives, etc.
- *
- *  mmc        MMC IE broadcasting handling
- *
- * A host controller driver just initializes its stuff and as part of
- * that, creates a 'struct wusbhc' instance that handles all the
- * common WUSB mechanisms. Links in the function ops that are specific
- * to it and then registers the host controller. Ready to run.
- */
-
-#ifndef __WUSBHC_H__
-#define __WUSBHC_H__
-
-#include <linux/usb.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/kref.h>
-#include <linux/workqueue.h>
-#include <linux/usb/hcd.h>
-#include <linux/uwb.h>
-#include <linux/usb/wusb.h>
-
-/*
- * Time from a WUSB channel stop request to the last transmitted MMC.
- *
- * This needs to be > 4.096 ms in case no MMCs can be transmitted in
- * zone 0.
- */
-#define WUSB_CHANNEL_STOP_DELAY_MS 8
-#define WUSB_RETRY_COUNT_MAX 15
-#define WUSB_RETRY_COUNT_INFINITE 0
-
-/**
- * Wireless USB device
- *
- * Describe a WUSB device connected to the cluster. This struct
- * belongs to the 'struct wusb_port' it is attached to and it is
- * responsible for putting and clearing the pointer to it.
- *
- * Note this "complements" the 'struct usb_device' that the usb_hcd
- * keeps for each connected USB device. However, it extends some
- * information that is not available (there is no hcpriv ptr in it!)
- * *and* most importantly, it's life cycle is different. It is created
- * as soon as we get a DN_Connect (connect request notification) from
- * the device through the WUSB host controller; the USB stack doesn't
- * create the device until we authenticate it. FIXME: this will
- * change.
- *
- * @bos:    This is allocated when the BOS descriptors are read from
- *          the device and freed upon the wusb_dev struct dying.
- * @wusb_cap_descr: points into @bos, and has been verified to be size
- *                  safe.
- */
-struct wusb_dev {
-	struct kref refcnt;
-	struct wusbhc *wusbhc;
-	struct list_head cack_node;	/* Connect-Ack list */
-	struct list_head rekey_node;	/* GTK rekey list */
-	u8 port_idx;
-	u8 addr;
-	u8 beacon_type:4;
-	struct usb_encryption_descriptor ccm1_etd;
-	struct wusb_ckhdid cdid;
-	unsigned long entry_ts;
-	struct usb_bos_descriptor *bos;
-	struct usb_wireless_cap_descriptor *wusb_cap_descr;
-	struct uwb_mas_bm availability;
-	struct work_struct devconnect_acked_work;
-	struct usb_device *usb_dev;
-};
-
-#define WUSB_DEV_ADDR_UNAUTH 0x80
-
-static inline void wusb_dev_init(struct wusb_dev *wusb_dev)
-{
-	kref_init(&wusb_dev->refcnt);
-	/* no need to init the cack_node */
-}
-
-extern void wusb_dev_destroy(struct kref *_wusb_dev);
-
-static inline struct wusb_dev *wusb_dev_get(struct wusb_dev *wusb_dev)
-{
-	kref_get(&wusb_dev->refcnt);
-	return wusb_dev;
-}
-
-static inline void wusb_dev_put(struct wusb_dev *wusb_dev)
-{
-	kref_put(&wusb_dev->refcnt, wusb_dev_destroy);
-}
-
-/**
- * Wireless USB Host Controller root hub "fake" ports
- * (state and device information)
- *
- * Wireless USB is wireless, so there are no ports; but we
- * fake'em. Each RC can connect a max of devices at the same time
- * (given in the Wireless Adapter descriptor, bNumPorts or WHCI's
- * caps), referred to in wusbhc->ports_max.
- *
- * See rh.c for more information.
- *
- * The @status and @change use the same bits as in USB2.0[11.24.2.7],
- * so we don't have to do much when getting the port's status.
- *
- * WUSB1.0[7.1], USB2.0[11.24.2.7.1,fig 11-10],
- * include/linux/usb_ch9.h (#define USB_PORT_STAT_*)
- */
-struct wusb_port {
-	u16 status;
-	u16 change;
-	struct wusb_dev *wusb_dev;	/* connected device's info */
-	u32 ptk_tkid;
-};
-
-/**
- * WUSB Host Controller specifics
- *
- * All fields that are common to all Wireless USB controller types
- * (HWA and WHCI) are grouped here. Host Controller
- * functions/operations that only deal with general Wireless USB HC
- * issues use this data type to refer to the host.
- *
- * @usb_hcd	   Instantiation of a USB host controller
- *                 (initialized by upper layer [HWA=HC or WHCI].
- *
- * @dev		   Device that implements this; initialized by the
- *                 upper layer (HWA-HC, WHCI...); this device should
- *                 have a refcount.
- *
- * @trust_timeout  After this time without hearing for device
- *                 activity, we consider the device gone and we have to
- *                 re-authenticate.
- *
- *                 Can be accessed w/o locking--however, read to a
- *                 local variable then use.
- *
- * @chid           WUSB Cluster Host ID: this is supposed to be a
- *                 unique value that doesn't change across reboots (so
- *                 that your devices do not require re-association).
- *
- *                 Read/Write protected by @mutex
- *
- * @dev_info       This array has ports_max elements. It is used to
- *                 give the HC information about the WUSB devices (see
- *                 'struct wusb_dev_info').
- *
- *	           For HWA we need to allocate it in heap; for WHCI it
- *                 needs to be permanently mapped, so we keep it for
- *                 both and make it easy. Call wusbhc->dev_info_set()
- *                 to update an entry.
- *
- * @ports_max	   Number of simultaneous device connections (fake
- *                 ports) this HC will take. Read-only.
- *
- * @port	   Array of port status for each fake root port. Guaranteed to
- *                 always be the same length during device existence
- *                 [this allows for some unlocked but referenced reading].
- *
- * @mmcies_max	   Max number of Information Elements this HC can send
- *                 in its MMC. Read-only.
- *
- * @start          Start the WUSB channel.
- *
- * @stop           Stop the WUSB channel after the specified number of
- *                 milliseconds.  Channel Stop IEs should be transmitted
- *                 as required by [WUSB] 4.16.2.1.
- *
- * @mmcie_add	   HC specific operation (WHCI or HWA) for adding an
- *                 MMCIE.
- *
- * @mmcie_rm	   HC specific operation (WHCI or HWA) for removing an
- *                 MMCIE.
- *
- * @set_ptk:       Set the PTK and enable encryption for a device. Or, if
- *                 the supplied key is NULL, disable encryption for that
- *                 device.
- *
- * @set_gtk:       Set the GTK to be used for all future broadcast packets
- *                 (i.e., MMCs).  With some hardware, setting the GTK may start
- *                 MMC transmission.
- *
- * NOTE:
- *
- *  - If wusb_dev->usb_dev is not NULL, then usb_dev is valid
- *    (wusb_dev has a refcount on it). Likewise, if usb_dev->wusb_dev
- *    is not NULL, usb_dev->wusb_dev is valid (usb_dev keeps a
- *    refcount on it).
- *
- *    Most of the times when you need to use it, it will be non-NULL,
- *    so there is no real need to check for it (wusb_dev will
- *    disappear before usb_dev).
- *
- *  - The following fields need to be filled out before calling
- *    wusbhc_create(): ports_max, mmcies_max, mmcie_{add,rm}.
- *
- *  - there is no wusbhc_init() method, we do everything in
- *    wusbhc_create().
- *
- *  - Creation is done in two phases, wusbhc_create() and
- *    wusbhc_create_b(); b are the parts that need to be called after
- *    calling usb_hcd_add(&wusbhc->usb_hcd).
- */
-struct wusbhc {
-	struct usb_hcd usb_hcd;		/* HAS TO BE 1st */
-	struct device *dev;
-	struct uwb_rc *uwb_rc;
-	struct uwb_pal pal;
-
-	unsigned trust_timeout;			/* in jiffies */
-	struct wusb_ckhdid chid;
-	uint8_t phy_rate;
-	uint8_t dnts_num_slots;
-	uint8_t dnts_interval;
-	uint8_t retry_count;
-	struct wuie_host_info *wuie_host_info;
-
-	struct mutex mutex;			/* locks everything else */
-	u16 cluster_id;				/* Wireless USB Cluster ID */
-	struct wusb_port *port;			/* Fake port status handling */
-	struct wusb_dev_info *dev_info;		/* for Set Device Info mgmt */
-	u8 ports_max;
-	unsigned active:1;			/* currently xmit'ing MMCs */
-	struct wuie_keep_alive keep_alive_ie;	/* protected by mutex */
-	struct delayed_work keep_alive_timer;
-	struct list_head cack_list;		/* Connect acknowledging */
-	size_t cack_count;			/* protected by 'mutex' */
-	struct wuie_connect_ack cack_ie;
-	struct uwb_rsv *rsv;		/* cluster bandwidth reservation */
-
-	struct mutex mmcie_mutex;		/* MMC WUIE handling */
-	struct wuie_hdr **mmcie;		/* WUIE array */
-	u8 mmcies_max;
-	/* FIXME: make wusbhc_ops? */
-	int (*start)(struct wusbhc *wusbhc);
-	void (*stop)(struct wusbhc *wusbhc, int delay);
-	int (*mmcie_add)(struct wusbhc *wusbhc, u8 interval, u8 repeat_cnt,
-			 u8 handle, struct wuie_hdr *wuie);
-	int (*mmcie_rm)(struct wusbhc *wusbhc, u8 handle);
-	int (*dev_info_set)(struct wusbhc *, struct wusb_dev *wusb_dev);
-	int (*bwa_set)(struct wusbhc *wusbhc, s8 stream_index,
-		       const struct uwb_mas_bm *);
-	int (*set_ptk)(struct wusbhc *wusbhc, u8 port_idx,
-		       u32 tkid, const void *key, size_t key_size);
-	int (*set_gtk)(struct wusbhc *wusbhc,
-		       u32 tkid, const void *key, size_t key_size);
-	int (*set_num_dnts)(struct wusbhc *wusbhc, u8 interval, u8 slots);
-
-	struct {
-		struct usb_key_descriptor descr;
-		u8 data[16];				/* GTK key data */
-	} __attribute__((packed)) gtk;
-	u8 gtk_index;
-	u32 gtk_tkid;
-
-	/* workqueue for WUSB security related tasks. */
-	struct workqueue_struct *wq_security;
-	struct work_struct gtk_rekey_work;
-
-	struct usb_encryption_descriptor *ccm1_etd;
-};
-
-#define usb_hcd_to_wusbhc(u) container_of((u), struct wusbhc, usb_hcd)
-
-
-extern int wusbhc_create(struct wusbhc *);
-extern int wusbhc_b_create(struct wusbhc *);
-extern void wusbhc_b_destroy(struct wusbhc *);
-extern void wusbhc_destroy(struct wusbhc *);
-extern int wusb_dev_sysfs_add(struct wusbhc *, struct usb_device *,
-			      struct wusb_dev *);
-extern void wusb_dev_sysfs_rm(struct wusb_dev *);
-extern int wusbhc_sec_create(struct wusbhc *);
-extern int wusbhc_sec_start(struct wusbhc *);
-extern void wusbhc_sec_stop(struct wusbhc *);
-extern void wusbhc_sec_destroy(struct wusbhc *);
-extern void wusbhc_giveback_urb(struct wusbhc *wusbhc, struct urb *urb,
-				int status);
-void wusbhc_reset_all(struct wusbhc *wusbhc);
-
-int wusbhc_pal_register(struct wusbhc *wusbhc);
-void wusbhc_pal_unregister(struct wusbhc *wusbhc);
-
-/*
- * Return @usb_dev's @usb_hcd (properly referenced) or NULL if gone
- *
- * @usb_dev: USB device, UNLOCKED and referenced (or otherwise, safe ptr)
- *
- * This is a safe assumption as @usb_dev->bus is referenced all the
- * time during the @usb_dev life cycle.
- */
-static inline
-struct usb_hcd *usb_hcd_get_by_usb_dev(struct usb_device *usb_dev)
-{
-	struct usb_hcd *usb_hcd;
-	usb_hcd = bus_to_hcd(usb_dev->bus);
-	return usb_get_hcd(usb_hcd);
-}
-
-/*
- * Increment the reference count on a wusbhc.
- *
- * @wusbhc's life cycle is identical to that of the underlying usb_hcd.
- */
-static inline struct wusbhc *wusbhc_get(struct wusbhc *wusbhc)
-{
-	return usb_get_hcd(&wusbhc->usb_hcd) ? wusbhc : NULL;
-}
-
-/*
- * Return the wusbhc associated to a @usb_dev
- *
- * @usb_dev: USB device, UNLOCKED and referenced (or otherwise, safe ptr)
- *
- * @returns: wusbhc for @usb_dev; NULL if the @usb_dev is being torn down.
- *           WARNING: referenced at the usb_hcd level, unlocked
- *
- * FIXME: move offline
- */
-static inline struct wusbhc *wusbhc_get_by_usb_dev(struct usb_device *usb_dev)
-{
-	struct wusbhc *wusbhc = NULL;
-	struct usb_hcd *usb_hcd;
-	if (usb_dev->devnum > 1 && !usb_dev->wusb) {
-		/* but root hubs */
-		dev_err(&usb_dev->dev, "devnum %d wusb %d\n", usb_dev->devnum,
-			usb_dev->wusb);
-		BUG_ON(usb_dev->devnum > 1 && !usb_dev->wusb);
-	}
-	usb_hcd = usb_hcd_get_by_usb_dev(usb_dev);
-	if (usb_hcd == NULL)
-		return NULL;
-	BUG_ON(usb_hcd->wireless == 0);
-	return wusbhc = usb_hcd_to_wusbhc(usb_hcd);
-}
-
-
-static inline void wusbhc_put(struct wusbhc *wusbhc)
-{
-	usb_put_hcd(&wusbhc->usb_hcd);
-}
-
-int wusbhc_start(struct wusbhc *wusbhc);
-void wusbhc_stop(struct wusbhc *wusbhc);
-extern int wusbhc_chid_set(struct wusbhc *, const struct wusb_ckhdid *);
-
-/* Device connect handling */
-extern int wusbhc_devconnect_create(struct wusbhc *);
-extern void wusbhc_devconnect_destroy(struct wusbhc *);
-extern int wusbhc_devconnect_start(struct wusbhc *wusbhc);
-extern void wusbhc_devconnect_stop(struct wusbhc *wusbhc);
-extern void wusbhc_handle_dn(struct wusbhc *, u8 srcaddr,
-			     struct wusb_dn_hdr *dn_hdr, size_t size);
-extern void __wusbhc_dev_disable(struct wusbhc *wusbhc, u8 port);
-extern int wusb_usb_ncb(struct notifier_block *nb, unsigned long val,
-			void *priv);
-extern int wusb_set_dev_addr(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev,
-			     u8 addr);
-
-/* Wireless USB fake Root Hub methods */
-extern int wusbhc_rh_create(struct wusbhc *);
-extern void wusbhc_rh_destroy(struct wusbhc *);
-
-extern int wusbhc_rh_status_data(struct usb_hcd *, char *);
-extern int wusbhc_rh_control(struct usb_hcd *, u16, u16, u16, char *, u16);
-extern int wusbhc_rh_start_port_reset(struct usb_hcd *, unsigned);
-
-/* MMC handling */
-extern int wusbhc_mmcie_create(struct wusbhc *);
-extern void wusbhc_mmcie_destroy(struct wusbhc *);
-extern int wusbhc_mmcie_set(struct wusbhc *, u8 interval, u8 repeat_cnt,
-			    struct wuie_hdr *);
-extern void wusbhc_mmcie_rm(struct wusbhc *, struct wuie_hdr *);
-
-/* Bandwidth reservation */
-int wusbhc_rsv_establish(struct wusbhc *wusbhc);
-void wusbhc_rsv_terminate(struct wusbhc *wusbhc);
-
-/*
- * I've always said
- * I wanted a wedding in a church...
- *
- * but lately I've been thinking about
- * the Botanical Gardens.
- *
- * We could do it by the tulips.
- * It'll be beautiful
- *
- * --Security!
- */
-extern int wusb_dev_sec_add(struct wusbhc *, struct usb_device *,
-				struct wusb_dev *);
-extern void wusb_dev_sec_rm(struct wusb_dev *) ;
-extern int wusb_dev_4way_handshake(struct wusbhc *, struct wusb_dev *,
-				   struct wusb_ckhdid *ck);
-void wusbhc_gtk_rekey(struct wusbhc *wusbhc);
-int wusb_dev_update_address(struct wusbhc *wusbhc, struct wusb_dev *wusb_dev);
-
-
-/* WUSB Cluster ID handling */
-extern u8 wusb_cluster_id_get(void);
-extern void wusb_cluster_id_put(u8);
-
-/*
- * wusb_port_by_idx - return the port associated to a zero-based port index
- *
- * NOTE: valid without locking as long as wusbhc is referenced (as the
- *       number of ports doesn't change). The data pointed to has to
- *       be verified though :)
- */
-static inline struct wusb_port *wusb_port_by_idx(struct wusbhc *wusbhc,
-						 u8 port_idx)
-{
-	return &wusbhc->port[port_idx];
-}
-
-/*
- * wusb_port_no_to_idx - Convert port number (per usb_dev->portnum) to
- * a port_idx.
- *
- * USB stack USB ports are 1 based!!
- *
- * NOTE: only valid for WUSB devices!!!
- */
-static inline u8 wusb_port_no_to_idx(u8 port_no)
-{
-	return port_no - 1;
-}
-
-extern struct wusb_dev *__wusb_dev_get_by_usb_dev(struct wusbhc *,
-						  struct usb_device *);
-
-/*
- * Return a referenced wusb_dev given a @usb_dev
- *
- * Returns NULL if the usb_dev is being torn down.
- *
- * FIXME: move offline
- */
-static inline
-struct wusb_dev *wusb_dev_get_by_usb_dev(struct usb_device *usb_dev)
-{
-	struct wusbhc *wusbhc;
-	struct wusb_dev *wusb_dev;
-	wusbhc = wusbhc_get_by_usb_dev(usb_dev);
-	if (wusbhc == NULL)
-		return NULL;
-	mutex_lock(&wusbhc->mutex);
-	wusb_dev = __wusb_dev_get_by_usb_dev(wusbhc, usb_dev);
-	mutex_unlock(&wusbhc->mutex);
-	wusbhc_put(wusbhc);
-	return wusb_dev;
-}
-
-/* Misc */
-
-extern struct workqueue_struct *wusbd;
-#endif /* #ifndef __WUSBHC_H__ */
diff --git a/drivers/uwb/Kconfig b/drivers/uwb/Kconfig
deleted file mode 100644
index 259e053e1e09..000000000000
--- a/drivers/uwb/Kconfig
+++ /dev/null
@@ -1,72 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# UWB device configuration
-#
-
-menuconfig UWB
-	tristate "Ultra Wideband devices"
-	default n
-	select GENERIC_NET_UTILS
-	help
-	  UWB is a high-bandwidth, low-power, point-to-point radio
-	  technology using a wide spectrum (3.1-10.6GHz). It is
-	  optimized for in-room use (480Mbps at 2 meters, 110Mbps at
-	  10m). It serves as the transport layer for other protocols,
-	  such as Wireless USB (WUSB).
-
-	  The topology is peer to peer; however, higher level
-	  protocols (such as WUSB) might impose a master/slave
-	  relationship.
-
-	  Say Y here if your computer has UWB radio controllers (USB or PCI)
-	  based. You will need to enable the radio controllers
-	  below.  It is ok to select all of them, no harm done.
-
-	  For more help check the UWB and WUSB related files in
-	  <file:Documentation/usb/>.
-
-	  To compile the UWB stack as a module, choose M here.
-
-if UWB
-
-config UWB_HWA
-	tristate "UWB Radio Control driver for WUSB-compliant USB dongles (HWA)"
-	depends on USB
-	help
-	  This driver enables the radio controller for HWA USB
-	  devices. HWA stands for Host Wire Adapter, and it is a UWB
-	  Radio Controller connected to your system via USB. Most of
-	  them come with a Wireless USB host controller also.
-
-	  To compile this driver select Y (built in) or M (module). It
-	  is safe to select any even if you do not have the hardware.
-
-config UWB_WHCI
-        tristate "UWB Radio Control driver for WHCI-compliant cards"
-        depends on PCI
-        help
-          This driver enables the radio controller for WHCI cards.
-
-          WHCI is a specification developed by Intel
-          (http://www.intel.com/technology/comms/wusb/whci.htm) much
-          in the spirit of USB's EHCI, but for UWB and Wireless USB
-          radio/host controllers connected via memory mapping (eg:
-          PCI). Most of these cards come also with a Wireless USB host
-          controller.
-
-          To compile this driver select Y (built in) or M (module). It
-          is safe to select any even if you do not have the hardware.
-
-config UWB_I1480U
-        tristate "Support for Intel Wireless UWB Link 1480 HWA"
-        depends on UWB_HWA
-        select FW_LOADER
-        help
-         This driver enables support for the i1480 when connected via
-         USB. It consists of a firmware uploader that will enable it
-         to behave as an HWA device.
-
-         To compile this driver select Y (built in) or M (module). It
-         is safe to select any even if you do not have the hardware.
-
-endif # UWB
diff --git a/drivers/uwb/Makefile b/drivers/uwb/Makefile
deleted file mode 100644
index 32f4de7afbd6..000000000000
--- a/drivers/uwb/Makefile
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_UWB)		+= uwb.o
-obj-$(CONFIG_UWB_WHCI)		+= umc.o whci.o whc-rc.o
-obj-$(CONFIG_UWB_HWA)		+= hwa-rc.o
-obj-$(CONFIG_UWB_I1480U)	+= i1480/
-
-uwb-objs :=		\
-	address.o	\
-	allocator.o	\
-	beacon.o	\
-	driver.o	\
-	drp.o		\
-	drp-avail.o	\
-	drp-ie.o	\
-	est.o		\
-	ie.o		\
-	ie-rcv.o	\
-	lc-dev.o	\
-	lc-rc.o		\
-	neh.o		\
-	pal.o		\
-	radio.o		\
-	reset.o		\
-	rsv.o		\
-	scan.o		\
-	uwb-debug.o	\
-	uwbd.o
-
-umc-objs :=		\
-	umc-bus.o	\
-	umc-dev.o	\
-	umc-drv.o
diff --git a/drivers/uwb/address.c b/drivers/uwb/address.c
deleted file mode 100644
index 857d5cd56a95..000000000000
--- a/drivers/uwb/address.c
+++ /dev/null
@@ -1,352 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Address management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/random.h>
-#include <linux/etherdevice.h>
-
-#include "uwb-internal.h"
-
-
-/** Device Address Management command */
-struct uwb_rc_cmd_dev_addr_mgmt {
-	struct uwb_rccb rccb;
-	u8 bmOperationType;
-	u8 baAddr[6];
-} __attribute__((packed));
-
-
-/**
- * Low level command for setting/getting UWB radio's addresses
- *
- * @hwarc:	HWA Radio Control interface instance
- * @bmOperationType:
- * 		Set/get, MAC/DEV (see WUSB1.0[8.6.2.2])
- * @baAddr:	address buffer--assumed to have enough data to hold
- *              the address type requested.
- * @reply:	Pointer to reply buffer (can be stack allocated)
- * @returns:	0 if ok, < 0 errno code on error.
- *
- * @cmd has to be allocated because USB cannot grok USB or vmalloc
- * buffers depending on your combination of host architecture.
- */
-static
-int uwb_rc_dev_addr_mgmt(struct uwb_rc *rc,
-			 u8 bmOperationType, const u8 *baAddr,
-			 struct uwb_rc_evt_dev_addr_mgmt *reply)
-{
-	int result;
-	struct uwb_rc_cmd_dev_addr_mgmt *cmd;
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_kzalloc;
-	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
-	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_DEV_ADDR_MGMT);
-	cmd->bmOperationType = bmOperationType;
-	if (baAddr) {
-		size_t size = 0;
-		switch (bmOperationType >> 1) {
-		case 0:	size = 2; break;
-		case 1:	size = 6; break;
-		default: BUG();
-		}
-		memcpy(cmd->baAddr, baAddr, size);
-	}
-	reply->rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply->rceb.wEvent = UWB_RC_CMD_DEV_ADDR_MGMT;
-	result = uwb_rc_cmd(rc, "DEV-ADDR-MGMT",
-			    &cmd->rccb, sizeof(*cmd),
-			    &reply->rceb, sizeof(*reply));
-	if (result < 0)
-		goto error_cmd;
-	if (result < sizeof(*reply)) {
-		dev_err(&rc->uwb_dev.dev,
-			"DEV-ADDR-MGMT: not enough data replied: "
-			"%d vs %zu bytes needed\n", result, sizeof(*reply));
-		result = -ENOMSG;
-	} else if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev,
-			"DEV-ADDR-MGMT: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply->bResultCode),
-			reply->bResultCode);
-		result = -EIO;
-	} else
-		result = 0;
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	return result;
-}
-
-
-/**
- * Set the UWB RC MAC or device address.
- *
- * @rc:      UWB Radio Controller
- * @_addr:   Pointer to address to write [assumed to be either a
- *           'struct uwb_mac_addr *' or a 'struct uwb_dev_addr *'].
- * @type:    Type of address to set (UWB_ADDR_DEV or UWB_ADDR_MAC).
- * @returns: 0 if ok, < 0 errno code on error.
- *
- * Some anal retentivity here: even if both 'struct
- * uwb_{dev,mac}_addr' have the actual byte array in the same offset
- * and I could just pass _addr to hwarc_cmd_dev_addr_mgmt(), I prefer
- * to use some syntatic sugar in case someday we decide to change the
- * format of the structs. The compiler will optimize it out anyway.
- */
-static int uwb_rc_addr_set(struct uwb_rc *rc,
-		    const void *_addr, enum uwb_addr_type type)
-{
-	int result;
-	u8 bmOperationType = 0x1; 		/* Set address */
-	const struct uwb_dev_addr *dev_addr = _addr;
-	const struct uwb_mac_addr *mac_addr = _addr;
-	struct uwb_rc_evt_dev_addr_mgmt reply;
-	const u8 *baAddr;
-
-	result = -EINVAL;
-	switch (type) {
-	case UWB_ADDR_DEV:
-		baAddr = dev_addr->data;
-		break;
-	case UWB_ADDR_MAC:
-		baAddr = mac_addr->data;
-		bmOperationType |= 0x2;
-		break;
-	default:
-		return result;
-	}
-	return uwb_rc_dev_addr_mgmt(rc, bmOperationType, baAddr, &reply);
-}
-
-
-/**
- * Get the UWB radio's MAC or device address.
- *
- * @rc:      UWB Radio Controller
- * @_addr:   Where to write the address data [assumed to be either a
- *           'struct uwb_mac_addr *' or a 'struct uwb_dev_addr *'].
- * @type:    Type of address to get (UWB_ADDR_DEV or UWB_ADDR_MAC).
- * @returns: 0 if ok (and *_addr set), < 0 errno code on error.
- *
- * See comment in uwb_rc_addr_set() about anal retentivity in the
- * type handling of the address variables.
- */
-static int uwb_rc_addr_get(struct uwb_rc *rc,
-		    void *_addr, enum uwb_addr_type type)
-{
-	int result;
-	u8 bmOperationType = 0x0; 		/* Get address */
-	struct uwb_rc_evt_dev_addr_mgmt evt;
-	struct uwb_dev_addr *dev_addr = _addr;
-	struct uwb_mac_addr *mac_addr = _addr;
-	u8 *baAddr;
-
-	result = -EINVAL;
-	switch (type) {
-	case UWB_ADDR_DEV:
-		baAddr = dev_addr->data;
-		break;
-	case UWB_ADDR_MAC:
-		bmOperationType |= 0x2;
-		baAddr = mac_addr->data;
-		break;
-	default:
-		return result;
-	}
-	result = uwb_rc_dev_addr_mgmt(rc, bmOperationType, baAddr, &evt);
-	if (result == 0)
-		switch (type) {
-		case UWB_ADDR_DEV:
-			memcpy(&dev_addr->data, evt.baAddr,
-			       sizeof(dev_addr->data));
-			break;
-		case UWB_ADDR_MAC:
-			memcpy(&mac_addr->data, evt.baAddr,
-			       sizeof(mac_addr->data));
-			break;
-		default:		/* shut gcc up */
-			BUG();
-		}
-	return result;
-}
-
-
-/** Get @rc's MAC address to @addr */
-int uwb_rc_mac_addr_get(struct uwb_rc *rc,
-			struct uwb_mac_addr *addr) {
-	return uwb_rc_addr_get(rc, addr, UWB_ADDR_MAC);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_mac_addr_get);
-
-
-/** Get @rc's device address to @addr */
-int uwb_rc_dev_addr_get(struct uwb_rc *rc,
-			struct uwb_dev_addr *addr) {
-	return uwb_rc_addr_get(rc, addr, UWB_ADDR_DEV);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_dev_addr_get);
-
-
-/** Set @rc's address to @addr */
-int uwb_rc_mac_addr_set(struct uwb_rc *rc,
-			const struct uwb_mac_addr *addr)
-{
-	int result = -EINVAL;
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_addr_set(rc, addr, UWB_ADDR_MAC);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-
-/** Set @rc's address to @addr */
-int uwb_rc_dev_addr_set(struct uwb_rc *rc,
-			const struct uwb_dev_addr *addr)
-{
-	int result = -EINVAL;
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_addr_set(rc, addr, UWB_ADDR_DEV);
-	rc->uwb_dev.dev_addr = *addr;
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-/* Returns !0 if given address is already assigned to device. */
-int __uwb_mac_addr_assigned_check(struct device *dev, void *_addr)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_mac_addr *addr = _addr;
-
-	if (!uwb_mac_addr_cmp(addr, &uwb_dev->mac_addr))
-		return !0;
-	return 0;
-}
-
-/* Returns !0 if given address is already assigned to device. */
-int __uwb_dev_addr_assigned_check(struct device *dev, void *_addr)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_dev_addr *addr = _addr;
-	if (!uwb_dev_addr_cmp(addr, &uwb_dev->dev_addr))
-		return !0;
-	return 0;
-}
-
-/**
- * uwb_dev_addr_assign - assigned a generated DevAddr to a radio controller
- * @rc:      the (local) radio controller device requiring a new DevAddr
- *
- * A new DevAddr is required when:
- *    - first setting up a radio controller
- *    - if the hardware reports a DevAddr conflict
- *
- * The DevAddr is randomly generated in the generated DevAddr range
- * [0x100, 0xfeff]. The number of devices in a beacon group is limited
- * by mMaxBPLength (96) so this address space will never be exhausted.
- *
- * [ECMA-368] 17.1.1, 17.16.
- */
-int uwb_rc_dev_addr_assign(struct uwb_rc *rc)
-{
-	struct uwb_dev_addr new_addr;
-
-	do {
-		get_random_bytes(new_addr.data, sizeof(new_addr.data));
-	} while (new_addr.data[0] == 0x00 || new_addr.data[0] == 0xff
-		 || __uwb_dev_addr_assigned(rc, &new_addr));
-
-	return uwb_rc_dev_addr_set(rc, &new_addr);
-}
-
-/**
- * uwbd_evt_handle_rc_dev_addr_conflict - handle a DEV_ADDR_CONFLICT event
- * @evt: the DEV_ADDR_CONFLICT notification from the radio controller
- *
- * A new (non-conflicting) DevAddr is assigned to the radio controller.
- *
- * [ECMA-368] 17.1.1.1.
- */
-int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt)
-{
-	struct uwb_rc *rc = evt->rc;
-
-	return uwb_rc_dev_addr_assign(rc);
-}
-
-/*
- * Print the 48-bit EUI MAC address of the radio controller when
- * reading /sys/class/uwb_rc/XX/mac_address
- */
-static ssize_t uwb_rc_mac_addr_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	struct uwb_mac_addr addr;
-	ssize_t result;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = uwb_rc_addr_get(rc, &addr, UWB_ADDR_MAC);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	if (result >= 0) {
-		result = uwb_mac_addr_print(buf, UWB_ADDR_STRSIZE, &addr);
-		buf[result++] = '\n';
-	}
-	return result;
-}
-
-/*
- * Parse a 48 bit address written to /sys/class/uwb_rc/XX/mac_address
- * and if correct, set it.
- */
-static ssize_t uwb_rc_mac_addr_store(struct device *dev,
-				     struct device_attribute *attr,
-				     const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	struct uwb_mac_addr addr;
-	ssize_t result;
-
-	if (!mac_pton(buf, addr.data))
-		return -EINVAL;
-	if (is_multicast_ether_addr(addr.data)) {
-		dev_err(&rc->uwb_dev.dev, "refusing to set multicast "
-			"MAC address %s\n", buf);
-		return -EINVAL;
-	}
-	result = uwb_rc_mac_addr_set(rc, &addr);
-	if (result == 0)
-		rc->uwb_dev.mac_addr = addr;
-
-	return result < 0 ? result : size;
-}
-DEVICE_ATTR(mac_address, S_IRUGO | S_IWUSR, uwb_rc_mac_addr_show, uwb_rc_mac_addr_store);
-
-/** Print @addr to @buf, @return bytes written */
-size_t __uwb_addr_print(char *buf, size_t buf_size, const unsigned char *addr,
-			int type)
-{
-	size_t result;
-	if (type)
-		result = scnprintf(buf, buf_size, "%pM", addr);
-	else
-		result = scnprintf(buf, buf_size, "%02x:%02x",
-				  addr[1], addr[0]);
-	return result;
-}
-EXPORT_SYMBOL_GPL(__uwb_addr_print);
diff --git a/drivers/uwb/allocator.c b/drivers/uwb/allocator.c
deleted file mode 100644
index 2e1590124d5f..000000000000
--- a/drivers/uwb/allocator.c
+++ /dev/null
@@ -1,374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB reservation management.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/uwb.h>
-
-#include "uwb-internal.h"
-
-static void uwb_rsv_fill_column_alloc(struct uwb_rsv_alloc_info *ai)
-{
-	int col, mas, safe_mas, unsafe_mas;
-	unsigned char *bm = ai->bm;
-	struct uwb_rsv_col_info *ci = ai->ci;
-	unsigned char c;
-
-	for (col = ci->csi.start_col; col < UWB_NUM_ZONES; col += ci->csi.interval) {
-    
-		safe_mas   = ci->csi.safe_mas_per_col;
-		unsafe_mas = ci->csi.unsafe_mas_per_col;
-    
-		for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++ ) {
-			if (bm[col * UWB_MAS_PER_ZONE + mas] == 0) {
-	
-				if (safe_mas > 0) {
-					safe_mas--;
-					c = UWB_RSV_MAS_SAFE;
-				} else if (unsafe_mas > 0) {
-					unsafe_mas--;
-					c = UWB_RSV_MAS_UNSAFE;
-				} else {
-					break;
-				}
-				bm[col * UWB_MAS_PER_ZONE + mas] = c;
-			}
-		}
-	}
-}
-
-static void uwb_rsv_fill_row_alloc(struct uwb_rsv_alloc_info *ai)
-{
-	int mas, col, rows;
-	unsigned char *bm = ai->bm;
-	struct uwb_rsv_row_info *ri = &ai->ri;
-	unsigned char c;
-
-	rows = 1;
-	c = UWB_RSV_MAS_SAFE;
-	for (mas = UWB_MAS_PER_ZONE - 1; mas >= 0; mas--) {
-		if (ri->avail[mas] == 1) {
-      
-			if (rows > ri->used_rows) {
-				break;
-			} else if (rows > 7) {
-				c = UWB_RSV_MAS_UNSAFE;
-			}
-
-			for (col = 0; col < UWB_NUM_ZONES; col++) {
-				if (bm[col * UWB_NUM_ZONES + mas] != UWB_RSV_MAS_NOT_AVAIL) {
-					bm[col * UWB_NUM_ZONES + mas] = c;
-					if(c == UWB_RSV_MAS_SAFE)
-						ai->safe_allocated_mases++;
-					else
-						ai->unsafe_allocated_mases++;
-				}
-			}
-			rows++;
-		}
-	}
-	ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
-}
-
-/*
- * Find the best column set for a given availability, interval, num safe mas and
- * num unsafe mas.
- *
- * The different sets are tried in order as shown below, depending on the interval.
- *
- * interval = 16
- *	deep = 0
- *		set 1 ->  {  8 }
- *	deep = 1
- *		set 1 ->  {  4 }
- *		set 2 ->  { 12 }
- *	deep = 2
- *		set 1 ->  {  2 }
- *		set 2 ->  {  6 }
- *		set 3 ->  { 10 }
- *		set 4 ->  { 14 }
- *	deep = 3
- *		set 1 ->  {  1 }
- *		set 2 ->  {  3 }
- *		set 3 ->  {  5 }
- *		set 4 ->  {  7 }
- *		set 5 ->  {  9 }
- *		set 6 ->  { 11 }
- *		set 7 ->  { 13 }
- *		set 8 ->  { 15 }
- *
- * interval = 8
- *	deep = 0
- *		set 1 ->  {  4  12 }
- *	deep = 1
- *		set 1 ->  {  2  10 }
- *		set 2 ->  {  6  14 }
- *	deep = 2
- *		set 1 ->  {  1   9 }
- *		set 2 ->  {  3  11 }
- *		set 3 ->  {  5  13 }
- *		set 4 ->  {  7  15 }
- *
- * interval = 4
- *	deep = 0
- *		set 1 ->  {  2   6  10  14 }
- *	deep = 1
- *		set 1 ->  {  1   5   9  13 }
- *		set 2 ->  {  3   7  11  15 }
- *
- * interval = 2
- *	deep = 0
- *		set 1 ->  {  1   3   5   7   9  11  13  15 }
- */
-static int uwb_rsv_find_best_column_set(struct uwb_rsv_alloc_info *ai, int interval, 
-					int num_safe_mas, int num_unsafe_mas)
-{
-	struct uwb_rsv_col_info *ci = ai->ci;
-	struct uwb_rsv_col_set_info *csi = &ci->csi;
-	struct uwb_rsv_col_set_info tmp_csi;
-	int deep, set, col, start_col_deep, col_start_set;
-	int start_col, max_mas_in_set, lowest_max_mas_in_deep;
-	int n_mas;
-	int found = UWB_RSV_ALLOC_NOT_FOUND; 
-
-	tmp_csi.start_col = 0;
-	start_col_deep = interval;
-	n_mas = num_unsafe_mas + num_safe_mas;
-
-	for (deep = 0; ((interval >> deep) & 0x1) == 0; deep++) {
-		start_col_deep /= 2;
-		col_start_set = 0;
-		lowest_max_mas_in_deep = UWB_MAS_PER_ZONE;
-
-		for (set = 1; set <= (1 << deep); set++) {
-			max_mas_in_set = 0;
-			start_col = start_col_deep + col_start_set;
-			for (col = start_col; col < UWB_NUM_ZONES; col += interval) {
-                
-				if (ci[col].max_avail_safe >= num_safe_mas &&
-				    ci[col].max_avail_unsafe >= n_mas) {
-					if (ci[col].highest_mas[n_mas] > max_mas_in_set)
-						max_mas_in_set = ci[col].highest_mas[n_mas];
-				} else {
-					max_mas_in_set = 0;
-					break;
-				}
-			}
-			if ((lowest_max_mas_in_deep > max_mas_in_set) && max_mas_in_set) {
-				lowest_max_mas_in_deep = max_mas_in_set;
-
-				tmp_csi.start_col = start_col;
-			}
-			col_start_set += (interval >> deep);
-		}
-
-		if (lowest_max_mas_in_deep < 8) {
-			csi->start_col = tmp_csi.start_col;
-			found = UWB_RSV_ALLOC_FOUND;
-			break;
-		} else if ((lowest_max_mas_in_deep > 8) && 
-			   (lowest_max_mas_in_deep != UWB_MAS_PER_ZONE) &&
-			   (found == UWB_RSV_ALLOC_NOT_FOUND)) {
-			csi->start_col = tmp_csi.start_col;
-			found = UWB_RSV_ALLOC_FOUND;
-		}
-	}
-
-	if (found == UWB_RSV_ALLOC_FOUND) {
-		csi->interval = interval;
-		csi->safe_mas_per_col = num_safe_mas;
-		csi->unsafe_mas_per_col = num_unsafe_mas;
-
-		ai->safe_allocated_mases = (UWB_NUM_ZONES / interval) * num_safe_mas;
-		ai->unsafe_allocated_mases = (UWB_NUM_ZONES / interval) * num_unsafe_mas;
-		ai->total_allocated_mases = ai->safe_allocated_mases + ai->unsafe_allocated_mases;
-		ai->interval = interval;		
-	}
-	return found;
-}
-
-static void get_row_descriptors(struct uwb_rsv_alloc_info *ai)
-{
-	unsigned char *bm = ai->bm;
-	struct uwb_rsv_row_info *ri = &ai->ri;
-	int col, mas;
-  
-	ri->free_rows = 16;
-	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
-		ri->avail[mas] = 1;
-		for (col = 1; col < UWB_NUM_ZONES; col++) {
-			if (bm[col * UWB_NUM_ZONES + mas] == UWB_RSV_MAS_NOT_AVAIL) {
-				ri->free_rows--;
-				ri->avail[mas]=0;
-				break;
-			}
-		}
-	}
-}
-
-static void uwb_rsv_fill_column_info(unsigned char *bm, int column, struct uwb_rsv_col_info *rci)
-{
-	int mas;
-	int block_count = 0, start_block = 0; 
-	int previous_avail = 0;
-	int available = 0;
-	int safe_mas_in_row[UWB_MAS_PER_ZONE] = {
-		8, 7, 6, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 1,
-	};
-
-	rci->max_avail_safe = 0;
-
-	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas ++) {
-		if (!bm[column * UWB_NUM_ZONES + mas]) {
-			available++;
-			rci->max_avail_unsafe = available;
-
-			rci->highest_mas[available] = mas;
-
-			if (previous_avail) {
-				block_count++;
-				if ((block_count > safe_mas_in_row[start_block]) &&
-				    (!rci->max_avail_safe))
-					rci->max_avail_safe = available - 1;
-			} else {
-				previous_avail = 1;
-				start_block = mas;
-				block_count = 1;
-			}
-		} else {
-			previous_avail = 0;
-		}
-	}
-	if (!rci->max_avail_safe)
-		rci->max_avail_safe = rci->max_avail_unsafe;
-}
-
-static void get_column_descriptors(struct uwb_rsv_alloc_info *ai)
-{
-	unsigned char *bm = ai->bm;
-	struct uwb_rsv_col_info *ci = ai->ci;
-	int col;
-
-	for (col = 1; col < UWB_NUM_ZONES; col++) {
-		uwb_rsv_fill_column_info(bm, col, &ci[col]);
-	}
-}
-
-static int uwb_rsv_find_best_row_alloc(struct uwb_rsv_alloc_info *ai)
-{
-	int n_rows;
-	int max_rows = ai->max_mas / UWB_USABLE_MAS_PER_ROW;
-	int min_rows = ai->min_mas / UWB_USABLE_MAS_PER_ROW;
-	if (ai->min_mas % UWB_USABLE_MAS_PER_ROW)
-		min_rows++;
-	for (n_rows = max_rows; n_rows >= min_rows; n_rows--) {
-		if (n_rows <= ai->ri.free_rows) {
-			ai->ri.used_rows = n_rows;
-			ai->interval = 1; /* row reservation */
-			uwb_rsv_fill_row_alloc(ai);
-			return UWB_RSV_ALLOC_FOUND;
-		}
-	}  
-	return UWB_RSV_ALLOC_NOT_FOUND;
-}
-
-static int uwb_rsv_find_best_col_alloc(struct uwb_rsv_alloc_info *ai, int interval)
-{
-	int n_safe, n_unsafe, n_mas;  
-	int n_column = UWB_NUM_ZONES / interval;
-	int max_per_zone = ai->max_mas / n_column;
-	int min_per_zone = ai->min_mas / n_column;
-
-	if (ai->min_mas % n_column)
-		min_per_zone++;
-
-	if (min_per_zone > UWB_MAS_PER_ZONE) {
-		return UWB_RSV_ALLOC_NOT_FOUND;
-	}
-    
-	if (max_per_zone > UWB_MAS_PER_ZONE) {
-		max_per_zone = UWB_MAS_PER_ZONE;
-	}
-    
-	for (n_mas = max_per_zone; n_mas >= min_per_zone; n_mas--) {
-		if (uwb_rsv_find_best_column_set(ai, interval, 0, n_mas) == UWB_RSV_ALLOC_NOT_FOUND)
-			continue;
-		for (n_safe = n_mas; n_safe >= 0; n_safe--) {
-			n_unsafe = n_mas - n_safe;
-			if (uwb_rsv_find_best_column_set(ai, interval, n_safe, n_unsafe) == UWB_RSV_ALLOC_FOUND) {
-				uwb_rsv_fill_column_alloc(ai);
-				return UWB_RSV_ALLOC_FOUND;
-			}
-		}
-	}
-	return UWB_RSV_ALLOC_NOT_FOUND;
-}
-
-int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv, struct uwb_mas_bm *available, 
-				 struct uwb_mas_bm *result)
-{
-	struct uwb_rsv_alloc_info *ai;
-	int interval;
-	int bit_index;
-
-	ai = kzalloc(sizeof(struct uwb_rsv_alloc_info), GFP_KERNEL);
-	if (!ai)
-		return UWB_RSV_ALLOC_NOT_FOUND;
-	ai->min_mas = rsv->min_mas;
-	ai->max_mas = rsv->max_mas;
-	ai->max_interval = rsv->max_interval;
-
-
-	/* fill the not available vector from the available bm */
-	for_each_clear_bit(bit_index, available->bm, UWB_NUM_MAS)
-		ai->bm[bit_index] = UWB_RSV_MAS_NOT_AVAIL;
-
-	if (ai->max_interval == 1) {
-		get_row_descriptors(ai);
-		if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
-			goto alloc_found;
-		else
-			goto alloc_not_found;
-	}
-
-	get_column_descriptors(ai);
-        
-	for (interval = 16; interval >= 2; interval>>=1) {
-		if (interval > ai->max_interval)
-			continue;
-		if (uwb_rsv_find_best_col_alloc(ai, interval) == UWB_RSV_ALLOC_FOUND)
-			goto alloc_found;
-	}
-
-	/* try row reservation if no column is found */
-	get_row_descriptors(ai);
-	if (uwb_rsv_find_best_row_alloc(ai) == UWB_RSV_ALLOC_FOUND)
-		goto alloc_found;
-	else
-		goto alloc_not_found;
-
-  alloc_found:
-	bitmap_zero(result->bm, UWB_NUM_MAS);
-	bitmap_zero(result->unsafe_bm, UWB_NUM_MAS);
-	/* fill the safe and unsafe bitmaps */
-	for (bit_index = 0; bit_index < UWB_NUM_MAS; bit_index++) {
-		if (ai->bm[bit_index] == UWB_RSV_MAS_SAFE)
-			set_bit(bit_index, result->bm);
-		else if (ai->bm[bit_index] == UWB_RSV_MAS_UNSAFE)
-			set_bit(bit_index, result->unsafe_bm);
-	}
-	bitmap_or(result->bm, result->bm, result->unsafe_bm, UWB_NUM_MAS);
-
-	result->safe   = ai->safe_allocated_mases;
-	result->unsafe = ai->unsafe_allocated_mases;
-	
-	kfree(ai);		
-	return UWB_RSV_ALLOC_FOUND;
-  
-  alloc_not_found:
-	kfree(ai);
-	return UWB_RSV_ALLOC_NOT_FOUND;
-}
diff --git a/drivers/uwb/beacon.c b/drivers/uwb/beacon.c
deleted file mode 100644
index c483c19c5ef8..000000000000
--- a/drivers/uwb/beacon.c
+++ /dev/null
@@ -1,595 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Beacon management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/kdev_t.h>
-#include <linux/slab.h>
-
-#include "uwb-internal.h"
-
-/* Start Beaconing command structure */
-struct uwb_rc_cmd_start_beacon {
-	struct uwb_rccb rccb;
-	__le16 wBPSTOffset;
-	u8 bChannelNumber;
-} __attribute__((packed));
-
-
-static int uwb_rc_start_beacon(struct uwb_rc *rc, u16 bpst_offset, u8 channel)
-{
-	int result;
-	struct uwb_rc_cmd_start_beacon *cmd;
-	struct uwb_rc_evt_confirm reply;
-
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		return -ENOMEM;
-	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
-	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_START_BEACON);
-	cmd->wBPSTOffset = cpu_to_le16(bpst_offset);
-	cmd->bChannelNumber = channel;
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_START_BEACON;
-	result = uwb_rc_cmd(rc, "START-BEACON", &cmd->rccb, sizeof(*cmd),
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev,
-			"START-BEACON: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
-		result = -EIO;
-	}
-error_cmd:
-	kfree(cmd);
-	return result;
-}
-
-static int uwb_rc_stop_beacon(struct uwb_rc *rc)
-{
-	int result;
-	struct uwb_rccb *cmd;
-	struct uwb_rc_evt_confirm reply;
-
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		return -ENOMEM;
-	cmd->bCommandType = UWB_RC_CET_GENERAL;
-	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_STOP_BEACON);
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_STOP_BEACON;
-	result = uwb_rc_cmd(rc, "STOP-BEACON", cmd, sizeof(*cmd),
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev,
-			"STOP-BEACON: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
-		result = -EIO;
-	}
-error_cmd:
-	kfree(cmd);
-	return result;
-}
-
-/*
- * Start/stop beacons
- *
- * @rc:          UWB Radio Controller to operate on
- * @channel:     UWB channel on which to beacon (WUSB[table
- *               5-12]). If -1, stop beaconing.
- * @bpst_offset: Beacon Period Start Time offset; FIXME-do zero
- *
- * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
- * of a SET IE command after the device sent the first beacon that includes
- * the IEs specified in the SET IE command. So, after we start beaconing we
- * check if there is anything in the IE cache and call the SET IE command
- * if needed.
- */
-int uwb_rc_beacon(struct uwb_rc *rc, int channel, unsigned bpst_offset)
-{
-	int result;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	dev_dbg(dev, "%s: channel = %d\n", __func__, channel);
-	if (channel < 0)
-		channel = -1;
-	if (channel == -1)
-		result = uwb_rc_stop_beacon(rc);
-	else {
-		/* channel >= 0...dah */
-		result = uwb_rc_start_beacon(rc, bpst_offset, channel);
-		if (result < 0) {
-			dev_err(dev, "Cannot start beaconing: %d\n", result);
-			return result;
-		}
-		if (le16_to_cpu(rc->ies->wIELength) > 0) {
-			result = uwb_rc_set_ie(rc, rc->ies);
-			if (result < 0) {
-				dev_err(dev, "Cannot set new IE on device: "
-					"%d\n", result);
-				result = uwb_rc_stop_beacon(rc);
-				channel = -1;
-				bpst_offset = 0;
-			}
-		}
-	}
-
-	if (result >= 0)
-		rc->beaconing = channel;
-	return result;
-}
-
-/*
- * Beacon cache
- *
- * The purpose of this is to speed up the lookup of becon information
- * when a new beacon arrives. The UWB Daemon uses it also to keep a
- * tab of which devices are in radio distance and which not. When a
- * device's beacon stays present for more than a certain amount of
- * time, it is considered a new, usable device. When a beacon ceases
- * to be received for a certain amount of time, it is considered that
- * the device is gone.
- *
- * FIXME: use an allocator for the entries
- * FIXME: use something faster for search than a list
- */
-
-void uwb_bce_kfree(struct kref *_bce)
-{
-	struct uwb_beca_e *bce = container_of(_bce, struct uwb_beca_e, refcnt);
-
-	kfree(bce->be);
-	kfree(bce);
-}
-
-
-/* Find a beacon by dev addr in the cache */
-static
-struct uwb_beca_e *__uwb_beca_find_bydev(struct uwb_rc *rc,
-					 const struct uwb_dev_addr *dev_addr)
-{
-	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
-		if (!memcmp(&bce->dev_addr, dev_addr, sizeof(bce->dev_addr)))
-			goto out;
-	}
-	bce = NULL;
-out:
-	return bce;
-}
-
-/* Find a beacon by dev addr in the cache */
-static
-struct uwb_beca_e *__uwb_beca_find_bymac(struct uwb_rc *rc,
-					 const struct uwb_mac_addr *mac_addr)
-{
-	struct uwb_beca_e *bce, *next;
-	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
-		if (!memcmp(bce->mac_addr, mac_addr->data,
-			    sizeof(struct uwb_mac_addr)))
-			goto out;
-	}
-	bce = NULL;
-out:
-	return bce;
-}
-
-/**
- * uwb_dev_get_by_devaddr - get a UWB device with a specific DevAddr
- * @rc:      the radio controller that saw the device
- * @devaddr: DevAddr of the UWB device to find
- *
- * There may be more than one matching device (in the case of a
- * DevAddr conflict), but only the first one is returned.
- */
-struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
-				       const struct uwb_dev_addr *devaddr)
-{
-	struct uwb_dev *found = NULL;
-	struct uwb_beca_e *bce;
-
-	mutex_lock(&rc->uwb_beca.mutex);
-	bce = __uwb_beca_find_bydev(rc, devaddr);
-	if (bce)
-		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&rc->uwb_beca.mutex);
-
-	return found;
-}
-
-/**
- * uwb_dev_get_by_macaddr - get a UWB device with a specific EUI-48
- * @rc:      the radio controller that saw the device
- * @devaddr: EUI-48 of the UWB device to find
- */
-struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
-				       const struct uwb_mac_addr *macaddr)
-{
-	struct uwb_dev *found = NULL;
-	struct uwb_beca_e *bce;
-
-	mutex_lock(&rc->uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(rc, macaddr);
-	if (bce)
-		found = uwb_dev_try_get(rc, bce->uwb_dev);
-	mutex_unlock(&rc->uwb_beca.mutex);
-
-	return found;
-}
-
-/* Initialize a beacon cache entry */
-static void uwb_beca_e_init(struct uwb_beca_e *bce)
-{
-	mutex_init(&bce->mutex);
-	kref_init(&bce->refcnt);
-	stats_init(&bce->lqe_stats);
-	stats_init(&bce->rssi_stats);
-}
-
-/*
- * Add a beacon to the cache
- *
- * @be:         Beacon event information
- * @bf:         Beacon frame (part of b, really)
- * @ts_jiffies: Timestamp (in jiffies) when the beacon was received
- */
-static
-struct uwb_beca_e *__uwb_beca_add(struct uwb_rc *rc,
-				  struct uwb_rc_evt_beacon *be,
-				  struct uwb_beacon_frame *bf,
-				  unsigned long ts_jiffies)
-{
-	struct uwb_beca_e *bce;
-
-	bce = kzalloc(sizeof(*bce), GFP_KERNEL);
-	if (bce == NULL)
-		return NULL;
-	uwb_beca_e_init(bce);
-	bce->ts_jiffies = ts_jiffies;
-	bce->uwb_dev = NULL;
-	list_add(&bce->node, &rc->uwb_beca.list);
-	return bce;
-}
-
-/*
- * Wipe out beacon entries that became stale
- *
- * Remove associated devicest too.
- */
-void uwb_beca_purge(struct uwb_rc *rc)
-{
-	struct uwb_beca_e *bce, *next;
-	unsigned long expires;
-
-	mutex_lock(&rc->uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
-		expires = bce->ts_jiffies + msecs_to_jiffies(beacon_timeout_ms);
-		if (time_after(jiffies, expires)) {
-			uwbd_dev_offair(bce);
-		}
-	}
-	mutex_unlock(&rc->uwb_beca.mutex);
-}
-
-/* Clean up the whole beacon cache. Called on shutdown */
-void uwb_beca_release(struct uwb_rc *rc)
-{
-	struct uwb_beca_e *bce, *next;
-
-	mutex_lock(&rc->uwb_beca.mutex);
-	list_for_each_entry_safe(bce, next, &rc->uwb_beca.list, node) {
-		list_del(&bce->node);
-		uwb_bce_put(bce);
-	}
-	mutex_unlock(&rc->uwb_beca.mutex);
-}
-
-static void uwb_beacon_print(struct uwb_rc *rc, struct uwb_rc_evt_beacon *be,
-			     struct uwb_beacon_frame *bf)
-{
-	char macbuf[UWB_ADDR_STRSIZE];
-	char devbuf[UWB_ADDR_STRSIZE];
-	char dstbuf[UWB_ADDR_STRSIZE];
-
-	uwb_mac_addr_print(macbuf, sizeof(macbuf), &bf->Device_Identifier);
-	uwb_dev_addr_print(devbuf, sizeof(devbuf), &bf->hdr.SrcAddr);
-	uwb_dev_addr_print(dstbuf, sizeof(dstbuf), &bf->hdr.DestAddr);
-	dev_info(&rc->uwb_dev.dev,
-		 "BEACON from %s to %s (ch%u offset %u slot %u MAC %s)\n",
-		 devbuf, dstbuf, be->bChannelNumber, be->wBPSTOffset,
-		 bf->Beacon_Slot_Number, macbuf);
-}
-
-/*
- * @bce: beacon cache entry, referenced
- */
-ssize_t uwb_bce_print_IEs(struct uwb_dev *uwb_dev, struct uwb_beca_e *bce,
-			  char *buf, size_t size)
-{
-	ssize_t result = 0;
-	struct uwb_rc_evt_beacon *be;
-	struct uwb_beacon_frame *bf;
-	int ies_len;
-	struct uwb_ie_hdr *ies;
-
-	mutex_lock(&bce->mutex);
-
-	be = bce->be;
-	if (be) {
-		bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
-		ies_len = be->wBeaconInfoLength - sizeof(struct uwb_beacon_frame);
-		ies = (struct uwb_ie_hdr *)bf->IEData;
-
-		result = uwb_ie_dump_hex(ies, ies_len, buf, size);
-	}
-
-	mutex_unlock(&bce->mutex);
-
-	return result;
-}
-
-/*
- * Verify that the beacon event, frame and IEs are ok
- */
-static int uwb_verify_beacon(struct uwb_rc *rc, struct uwb_event *evt,
-			     struct uwb_rc_evt_beacon *be)
-{
-	int result = -EINVAL;
-	struct uwb_beacon_frame *bf;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	/* Is there enough data to decode a beacon frame? */
-	if (evt->notif.size < sizeof(*be) + sizeof(*bf)) {
-		dev_err(dev, "BEACON event: Not enough data to decode "
-			"(%zu vs %zu bytes needed)\n", evt->notif.size,
-			sizeof(*be) + sizeof(*bf));
-		goto error;
-	}
-	/* FIXME: make sure beacon frame IEs are fine and that the whole thing
-	 * is consistent */
-	result = 0;
-error:
-	return result;
-}
-
-/*
- * Handle UWB_RC_EVT_BEACON events
- *
- * We check the beacon cache to see how the received beacon fares. If
- * is there already we refresh the timestamp. If not we create a new
- * entry.
- *
- * According to the WHCI and WUSB specs, only one beacon frame is
- * allowed per notification block, so we don't bother about scanning
- * for more.
- */
-int uwbd_evt_handle_rc_beacon(struct uwb_event *evt)
-{
-	int result = -EINVAL;
-	struct uwb_rc *rc;
-	struct uwb_rc_evt_beacon *be;
-	struct uwb_beacon_frame *bf;
-	struct uwb_beca_e *bce;
-
-	rc = evt->rc;
-	be = container_of(evt->notif.rceb, struct uwb_rc_evt_beacon, rceb);
-	result = uwb_verify_beacon(rc, evt, be);
-	if (result < 0)
-		return result;
-
-	/* FIXME: handle alien beacons. */
-	if (be->bBeaconType == UWB_RC_BEACON_TYPE_OL_ALIEN ||
-	    be->bBeaconType == UWB_RC_BEACON_TYPE_NOL_ALIEN) {
-		return -ENOSYS;
-	}
-
-	bf = (struct uwb_beacon_frame *) be->BeaconInfo;
-
-	/*
-	 * Drop beacons from devices with a NULL EUI-48 -- they cannot
-	 * be uniquely identified.
-	 *
-	 * It's expected that these will all be WUSB devices and they
-	 * have a WUSB specific connection method so ignoring them
-	 * here shouldn't be a problem.
-	 */
-	if (uwb_mac_addr_bcast(&bf->Device_Identifier))
-		return 0;
-
-	mutex_lock(&rc->uwb_beca.mutex);
-	bce = __uwb_beca_find_bymac(rc, &bf->Device_Identifier);
-	if (bce == NULL) {
-		/* Not in there, a new device is pinging */
-		uwb_beacon_print(evt->rc, be, bf);
-		bce = __uwb_beca_add(rc, be, bf, evt->ts_jiffies);
-		if (bce == NULL) {
-			mutex_unlock(&rc->uwb_beca.mutex);
-			return -ENOMEM;
-		}
-	}
-	mutex_unlock(&rc->uwb_beca.mutex);
-
-	mutex_lock(&bce->mutex);
-	/* purge old beacon data */
-	kfree(bce->be);
-
-	/* Update commonly used fields */
-	bce->ts_jiffies = evt->ts_jiffies;
-	bce->be = be;
-	bce->dev_addr = bf->hdr.SrcAddr;
-	bce->mac_addr = &bf->Device_Identifier;
-	be->wBPSTOffset = le16_to_cpu(be->wBPSTOffset);
-	be->wBeaconInfoLength = le16_to_cpu(be->wBeaconInfoLength);
-	stats_add_sample(&bce->lqe_stats, be->bLQI - 7);
-	stats_add_sample(&bce->rssi_stats, be->bRSSI + 18);
-
-	/*
-	 * This might be a beacon from a new device.
-	 */
-	if (bce->uwb_dev == NULL)
-		uwbd_dev_onair(evt->rc, bce);
-
-	mutex_unlock(&bce->mutex);
-
-	return 1; /* we keep the event data */
-}
-
-/*
- * Handle UWB_RC_EVT_BEACON_SIZE events
- *
- * XXXXX
- */
-int uwbd_evt_handle_rc_beacon_size(struct uwb_event *evt)
-{
-	int result = -EINVAL;
-	struct device *dev = &evt->rc->uwb_dev.dev;
-	struct uwb_rc_evt_beacon_size *bs;
-
-	/* Is there enough data to decode the event? */
-	if (evt->notif.size < sizeof(*bs)) {
-		dev_err(dev, "BEACON SIZE notification: Not enough data to "
-			"decode (%zu vs %zu bytes needed)\n",
-			evt->notif.size, sizeof(*bs));
-		goto error;
-	}
-	bs = container_of(evt->notif.rceb, struct uwb_rc_evt_beacon_size, rceb);
-	if (0)
-		dev_info(dev, "Beacon size changed to %u bytes "
-			"(FIXME: action?)\n", le16_to_cpu(bs->wNewBeaconSize));
-	else {
-		/* temporary hack until we do something with this message... */
-		static unsigned count;
-		if (++count % 1000 == 0)
-			dev_info(dev, "Beacon size changed %u times "
-				"(FIXME: action?)\n", count);
-	}
-	result = 0;
-error:
-	return result;
-}
-
-/**
- * uwbd_evt_handle_rc_bp_slot_change - handle a BP_SLOT_CHANGE event
- * @evt: the BP_SLOT_CHANGE notification from the radio controller
- *
- * If the event indicates that no beacon period slots were available
- * then radio controller has transitioned to a non-beaconing state.
- * Otherwise, simply save the current beacon slot.
- */
-int uwbd_evt_handle_rc_bp_slot_change(struct uwb_event *evt)
-{
-	struct uwb_rc *rc = evt->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_evt_bp_slot_change *bpsc;
-
-	if (evt->notif.size < sizeof(*bpsc)) {
-		dev_err(dev, "BP SLOT CHANGE event: Not enough data\n");
-		return -EINVAL;
-	}
-	bpsc = container_of(evt->notif.rceb, struct uwb_rc_evt_bp_slot_change, rceb);
-
-	if (uwb_rc_evt_bp_slot_change_no_slot(bpsc)) {
-		dev_err(dev, "stopped beaconing: No free slots in BP\n");
-		mutex_lock(&rc->uwb_dev.mutex);
-		rc->beaconing = -1;
-		mutex_unlock(&rc->uwb_dev.mutex);
-	} else
-		rc->uwb_dev.beacon_slot = uwb_rc_evt_bp_slot_change_slot_num(bpsc);
-
-	return 0;
-}
-
-/**
- * Handle UWB_RC_EVT_BPOIE_CHANGE events
- *
- * XXXXX
- */
-struct uwb_ie_bpo {
-	struct uwb_ie_hdr hdr;
-	u8                bp_length;
-	u8                data[];
-} __attribute__((packed));
-
-int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *evt)
-{
-	int result = -EINVAL;
-	struct device *dev = &evt->rc->uwb_dev.dev;
-	struct uwb_rc_evt_bpoie_change *bpoiec;
-	struct uwb_ie_bpo *bpoie;
-	static unsigned count;	/* FIXME: this is a temp hack */
-	size_t iesize;
-
-	/* Is there enough data to decode it? */
-	if (evt->notif.size < sizeof(*bpoiec)) {
-		dev_err(dev, "BPOIEC notification: Not enough data to "
-			"decode (%zu vs %zu bytes needed)\n",
-			evt->notif.size, sizeof(*bpoiec));
-		goto error;
-	}
-	bpoiec = container_of(evt->notif.rceb, struct uwb_rc_evt_bpoie_change, rceb);
-	iesize = le16_to_cpu(bpoiec->wBPOIELength);
-	if (iesize < sizeof(*bpoie)) {
-		dev_err(dev, "BPOIEC notification: Not enough IE data to "
-			"decode (%zu vs %zu bytes needed)\n",
-			iesize, sizeof(*bpoie));
-		goto error;
-	}
-	if (++count % 1000 == 0)	/* Lame placeholder */
-		dev_info(dev, "BPOIE: %u changes received\n", count);
-	/*
-	 * FIXME: At this point we should go over all the IEs in the
-	 *        bpoiec->BPOIE array and act on each.
-	 */
-	result = 0;
-error:
-	return result;
-}
-
-/*
- * Print beaconing state.
- */
-static ssize_t uwb_rc_beacon_show(struct device *dev,
-				  struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	ssize_t result;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = sprintf(buf, "%d\n", rc->beaconing);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-/*
- * Start beaconing on the specified channel, or stop beaconing.
- */
-static ssize_t uwb_rc_beacon_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	int channel;
-	ssize_t result = -EINVAL;
-
-	result = sscanf(buf, "%d", &channel);
-	if (result >= 1)
-		result = uwb_radio_force_channel(rc, channel);
-
-	return result < 0 ? result : size;
-}
-DEVICE_ATTR(beacon, S_IRUGO | S_IWUSR, uwb_rc_beacon_show, uwb_rc_beacon_store);
diff --git a/drivers/uwb/driver.c b/drivers/uwb/driver.c
deleted file mode 100644
index 5755c2e49ffc..000000000000
--- a/drivers/uwb/driver.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Driver initialization, etc
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- *
- * Life cycle: FIXME: explain
- *
- *  UWB radio controller:
- *
- *    1. alloc a uwb_rc, zero it
- *    2. call uwb_rc_init() on it to set it up + ops (won't do any
- *       kind of allocation)
- *    3. register (now it is owned by the UWB stack--deregister before
- *       freeing/destroying).
- *    4. It lives on it's own now (UWB stack handles)--when it
- *       disconnects, call unregister()
- *    5. free it.
- *
- *    Make sure you have a reference to the uwb_rc before calling
- *    any of the UWB API functions.
- *
- * TODO:
- *
- * 1. Locking and life cycle management is crappy still. All entry
- *    points to the UWB HCD API assume you have a reference on the
- *    uwb_rc structure and that it won't go away. They mutex lock it
- *    before doing anything.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/kdev_t.h>
-#include <linux/random.h>
-
-#include "uwb-internal.h"
-
-
-/* UWB stack attributes (or 'global' constants) */
-
-
-/**
- * If a beacon disappears for longer than this, then we consider the
- * device who was represented by that beacon to be gone.
- *
- * ECMA-368[17.2.3, last para] establishes that a device must not
- * consider a device to be its neighbour if he doesn't receive a beacon
- * for more than mMaxLostBeacons. mMaxLostBeacons is defined in
- * ECMA-368[17.16] as 3; because we can get only one beacon per
- * superframe, that'd be 3 * 65ms = 195 ~ 200 ms. Let's give it time
- * for jitter and stuff and make it 500 ms.
- */
-unsigned long beacon_timeout_ms = 500;
-
-static
-ssize_t beacon_timeout_ms_show(struct class *class,
-				struct class_attribute *attr,
-				char *buf)
-{
-	return scnprintf(buf, PAGE_SIZE, "%lu\n", beacon_timeout_ms);
-}
-
-static
-ssize_t beacon_timeout_ms_store(struct class *class,
-				struct class_attribute *attr,
-				const char *buf, size_t size)
-{
-	unsigned long bt;
-	ssize_t result;
-	result = sscanf(buf, "%lu", &bt);
-	if (result != 1)
-		return -EINVAL;
-	beacon_timeout_ms = bt;
-	return size;
-}
-static CLASS_ATTR_RW(beacon_timeout_ms);
-
-static struct attribute *uwb_class_attrs[] = {
-	&class_attr_beacon_timeout_ms.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(uwb_class);
-
-/** Device model classes */
-struct class uwb_rc_class = {
-	.name        = "uwb_rc",
-	.class_groups = uwb_class_groups,
-};
-
-
-static int __init uwb_subsys_init(void)
-{
-	int result = 0;
-
-	result = uwb_est_create();
-	if (result < 0) {
-		printk(KERN_ERR "uwb: Can't initialize EST subsystem\n");
-		goto error_est_init;
-	}
-
-	result = class_register(&uwb_rc_class);
-	if (result < 0)
-		goto error_uwb_rc_class_register;
-
-	/* Register the UWB bus */
-	result = bus_register(&uwb_bus_type);
-	if (result) {
-		pr_err("%s - registering bus driver failed\n", __func__);
-		goto exit_bus;
-	}
-
-	uwb_dbg_init();
-	return 0;
-
-exit_bus:
-	class_unregister(&uwb_rc_class);
-error_uwb_rc_class_register:
-	uwb_est_destroy();
-error_est_init:
-	return result;
-}
-module_init(uwb_subsys_init);
-
-static void __exit uwb_subsys_exit(void)
-{
-	uwb_dbg_exit();
-	bus_unregister(&uwb_bus_type);
-	class_unregister(&uwb_rc_class);
-	uwb_est_destroy();
-	return;
-}
-module_exit(uwb_subsys_exit);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Ultra Wide Band core");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/drp-avail.c b/drivers/uwb/drp-avail.c
deleted file mode 100644
index 02392ab82a7d..000000000000
--- a/drivers/uwb/drp-avail.c
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * DRP availability management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Reinette Chatre <reinette.chatre@intel.com>
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- *
- * Manage DRP Availability (the MAS available for DRP
- * reservations). Thus:
- *
- * - Handle DRP Availability Change notifications
- *
- * - Allow the reservation manager to indicate MAS reserved/released
- *   by local (owned by/targeted at the radio controller)
- *   reservations.
- *
- * - Based on the two sources above, generate a DRP Availability IE to
- *   be included in the beacon.
- *
- * See also the documentation for struct uwb_drp_avail.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/bitmap.h>
-#include "uwb-internal.h"
-
-/**
- * uwb_drp_avail_init - initialize an RC's MAS availability
- *
- * All MAS are available initially.  The RC will inform use which
- * slots are used for the BP (it may change in size).
- */
-void uwb_drp_avail_init(struct uwb_rc *rc)
-{
-	bitmap_fill(rc->drp_avail.global, UWB_NUM_MAS);
-	bitmap_fill(rc->drp_avail.local, UWB_NUM_MAS);
-	bitmap_fill(rc->drp_avail.pending, UWB_NUM_MAS);
-}
-
-/*
- * Determine MAS available for new local reservations.
- *
- * avail = global & local & pending
- */
-void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail)
-{
-	bitmap_and(avail->bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
-	bitmap_and(avail->bm, avail->bm, rc->drp_avail.pending, UWB_NUM_MAS);
-}
-
-/**
- * uwb_drp_avail_reserve_pending - reserve MAS for a new reservation
- * @rc: the radio controller
- * @mas: the MAS to reserve
- *
- * Returns 0 on success, or -EBUSY if the MAS requested aren't available.
- */
-int uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas)
-{
-	struct uwb_mas_bm avail;
-
-	uwb_drp_available(rc, &avail);
-	if (!bitmap_subset(mas->bm, avail.bm, UWB_NUM_MAS))
-		return -EBUSY;
-
-	bitmap_andnot(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
-	return 0;
-}
-
-/**
- * uwb_drp_avail_reserve - reserve MAS for an established reservation
- * @rc: the radio controller
- * @mas: the MAS to reserve
- */
-void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas)
-{
-	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
-	bitmap_andnot(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
-	rc->drp_avail.ie_valid = false;
-}
-
-/**
- * uwb_drp_avail_release - release MAS from a pending or established reservation
- * @rc: the radio controller
- * @mas: the MAS to release
- */
-void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas)
-{
-	bitmap_or(rc->drp_avail.local, rc->drp_avail.local, mas->bm, UWB_NUM_MAS);
-	bitmap_or(rc->drp_avail.pending, rc->drp_avail.pending, mas->bm, UWB_NUM_MAS);
-	rc->drp_avail.ie_valid = false;
-	uwb_rsv_handle_drp_avail_change(rc);
-}
-
-/**
- * uwb_drp_avail_ie_update - update the DRP Availability IE
- * @rc: the radio controller
- *
- * avail = global & local
- */
-void uwb_drp_avail_ie_update(struct uwb_rc *rc)
-{
-	struct uwb_mas_bm avail;
-
-	bitmap_and(avail.bm, rc->drp_avail.global, rc->drp_avail.local, UWB_NUM_MAS);
-
-	rc->drp_avail.ie.hdr.element_id = UWB_IE_DRP_AVAILABILITY;
-	rc->drp_avail.ie.hdr.length = UWB_NUM_MAS / 8;
-	uwb_mas_bm_copy_le(rc->drp_avail.ie.bmp, &avail);
-	rc->drp_avail.ie_valid = true;
-}
-
-/**
- * Create an unsigned long from a buffer containing a byte stream.
- *
- * @array: pointer to buffer
- * @itr:   index of buffer from where we start
- * @len:   the buffer's remaining size may not be exact multiple of
- *         sizeof(unsigned long), @len is the length of buffer that needs
- *         to be converted. This will be sizeof(unsigned long) or smaller
- *         (BUG if not). If it is smaller then we will pad the remaining
- *         space of the result with zeroes.
- */
-static
-unsigned long get_val(u8 *array, size_t itr, size_t len)
-{
-	unsigned long val = 0;
-	size_t top = itr + len;
-
-	BUG_ON(len > sizeof(val));
-
-	while (itr < top) {
-		val <<= 8;
-		val |= array[top - 1];
-		top--;
-	}
-	val <<= 8 * (sizeof(val) - len); /* padding */
-	return val;
-}
-
-/**
- * Initialize bitmap from data buffer.
- *
- * The bitmap to be converted could come from a IE, for example a
- * DRP Availability IE.
- * From ECMA-368 1.0 [16.8.7]: "
- * octets: 1            1               N * (0 to 32)
- *         Element ID   Length (=N)     DRP Availability Bitmap
- *
- * The DRP Availability Bitmap field is up to 256 bits long, one
- * bit for each MAS in the superframe, where the least-significant
- * bit of the field corresponds to the first MAS in the superframe
- * and successive bits correspond to successive MASs."
- *
- * The DRP Availability bitmap is in octets from 0 to 32, so octet
- * 32 contains bits for MAS 1-8, etc. If the bitmap is smaller than 32
- * octets, the bits in octets not included at the end of the bitmap are
- * treated as zero. In this case (when the bitmap is smaller than 32
- * octets) the MAS represented range from MAS 1 to MAS (size of bitmap)
- * with the last octet still containing bits for MAS 1-8, etc.
- *
- * For example:
- * F00F0102 03040506 0708090A 0B0C0D0E 0F010203
- * ^^^^
- * ||||
- * ||||
- * |||\LSB of byte is MAS 9
- * ||\MSB of byte is MAS 16
- * |\LSB of first byte is MAS 1
- * \ MSB of byte is MAS 8
- *
- * An example of this encoding can be found in ECMA-368 Annex-D [Table D.11]
- *
- * The resulting bitmap will have the following mapping:
- *	bit position 0 == MAS 1
- *	bit position 1 == MAS 2
- *	...
- *	bit position (UWB_NUM_MAS - 1) == MAS UWB_NUM_MAS
- *
- * @bmp_itr:	pointer to bitmap (can be declared with DECLARE_BITMAP)
- * @buffer:	pointer to buffer containing bitmap data in big endian
- *              format (MSB first)
- * @buffer_size:number of bytes with which bitmap should be initialized
- */
-static
-void buffer_to_bmp(unsigned long *bmp_itr, void *_buffer,
-		   size_t buffer_size)
-{
-	u8 *buffer = _buffer;
-	size_t itr, len;
-	unsigned long val;
-
-	itr = 0;
-	while (itr < buffer_size) {
-		len = buffer_size - itr >= sizeof(val) ?
-			sizeof(val) : buffer_size - itr;
-		val = get_val(buffer, itr, len);
-		bmp_itr[itr / sizeof(val)] = val;
-		itr += sizeof(val);
-	}
-}
-
-
-/**
- * Extract DRP Availability bitmap from the notification.
- *
- * The notification that comes in contains a bitmap of (UWB_NUM_MAS / 8) bytes
- * We convert that to our internal representation.
- */
-static
-int uwbd_evt_get_drp_avail(struct uwb_event *evt, unsigned long *bmp)
-{
-	struct device *dev = &evt->rc->uwb_dev.dev;
-	struct uwb_rc_evt_drp_avail *drp_evt;
-	int result = -EINVAL;
-
-	/* Is there enough data to decode the event? */
-	if (evt->notif.size < sizeof(*drp_evt)) {
-		dev_err(dev, "DRP Availability Change: Not enough "
-			"data to decode event [%zu bytes, %zu "
-			"needed]\n", evt->notif.size, sizeof(*drp_evt));
-		goto error;
-	}
-	drp_evt = container_of(evt->notif.rceb, struct uwb_rc_evt_drp_avail, rceb);
-	buffer_to_bmp(bmp, drp_evt->bmp, UWB_NUM_MAS/8);
-	result = 0;
-error:
-	return result;
-}
-
-
-/**
- * Process an incoming DRP Availability notification.
- *
- * @evt:	Event information (packs the actual event data, which
- *              radio controller it came to, etc).
- *
- * @returns:    0 on success (so uwbd() frees the event buffer), < 0
- *              on error.
- *
- * According to ECMA-368 1.0 [16.8.7], bits set to ONE indicate that
- * the MAS slot is available, bits set to ZERO indicate that the slot
- * is busy.
- *
- * So we clear available slots, we set used slots :)
- *
- * The notification only marks non-availability based on the BP and
- * received DRP IEs that are not for this radio controller.  A copy of
- * this bitmap is needed to generate the real availability (which
- * includes local and pending reservations).
- *
- * The DRP Availability IE that this radio controller emits will need
- * to be updated.
- */
-int uwbd_evt_handle_rc_drp_avail(struct uwb_event *evt)
-{
-	int result;
-	struct uwb_rc *rc = evt->rc;
-	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
-
-	result = uwbd_evt_get_drp_avail(evt, bmp);
-	if (result < 0)
-		return result;
-
-	mutex_lock(&rc->rsvs_mutex);
-	bitmap_copy(rc->drp_avail.global, bmp, UWB_NUM_MAS);
-	rc->drp_avail.ie_valid = false;
-	uwb_rsv_handle_drp_avail_change(rc);
-	mutex_unlock(&rc->rsvs_mutex);
-
-	uwb_rsv_sched_update(rc);
-
-	return 0;
-}
diff --git a/drivers/uwb/drp-ie.c b/drivers/uwb/drp-ie.c
deleted file mode 100644
index 4b545b41161c..000000000000
--- a/drivers/uwb/drp-ie.c
+++ /dev/null
@@ -1,305 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB DRP IE management.
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/uwb.h>
-
-#include "uwb-internal.h"
-
-
-/*
- * Return the reason code for a reservations's DRP IE.
- */
-static int uwb_rsv_reason_code(struct uwb_rsv *rsv)
-{
-	static const int reason_codes[] = {
-		[UWB_RSV_STATE_O_INITIATED]          = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_O_PENDING]            = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_O_MODIFIED]           = UWB_DRP_REASON_MODIFIED,
-		[UWB_RSV_STATE_O_ESTABLISHED]        = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_O_TO_BE_MOVED]        = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_O_MOVE_COMBINING]     = UWB_DRP_REASON_MODIFIED,
-		[UWB_RSV_STATE_O_MOVE_REDUCING]      = UWB_DRP_REASON_MODIFIED,
-		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_ACCEPTED]           = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_CONFLICT]           = UWB_DRP_REASON_CONFLICT,
-		[UWB_RSV_STATE_T_PENDING]            = UWB_DRP_REASON_PENDING,
-		[UWB_RSV_STATE_T_DENIED]             = UWB_DRP_REASON_DENIED,
-		[UWB_RSV_STATE_T_RESIZED]            = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
-		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
-		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
-	};
-
-	return reason_codes[rsv->state];
-}
-
-/*
- * Return the reason code for a reservations's companion DRP IE .
- */
-static int uwb_rsv_companion_reason_code(struct uwb_rsv *rsv)
-{
-	static const int companion_reason_codes[] = {
-		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = UWB_DRP_REASON_ACCEPTED,
-		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = UWB_DRP_REASON_CONFLICT,
-		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = UWB_DRP_REASON_PENDING,
-		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = UWB_DRP_REASON_DENIED,
-	};
-
-	return companion_reason_codes[rsv->state];
-}
-
-/*
- * Return the status bit for a reservations's DRP IE.
- */
-int uwb_rsv_status(struct uwb_rsv *rsv)
-{
-	static const int statuses[] = {
-		[UWB_RSV_STATE_O_INITIATED]          = 0,
-		[UWB_RSV_STATE_O_PENDING]            = 0,
-		[UWB_RSV_STATE_O_MODIFIED]           = 1,
-		[UWB_RSV_STATE_O_ESTABLISHED]        = 1,
-		[UWB_RSV_STATE_O_TO_BE_MOVED]        = 0,
-		[UWB_RSV_STATE_O_MOVE_COMBINING]     = 1,
-		[UWB_RSV_STATE_O_MOVE_REDUCING]      = 1,
-		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 1,
-		[UWB_RSV_STATE_T_ACCEPTED]           = 1,
-		[UWB_RSV_STATE_T_CONFLICT]           = 0,
-		[UWB_RSV_STATE_T_PENDING]            = 0,
-		[UWB_RSV_STATE_T_DENIED]             = 0,
-		[UWB_RSV_STATE_T_RESIZED]            = 1,
-		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
-		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 1,
-		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 1,
-		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 1,
-
-	};
-
-	return statuses[rsv->state];
-}
-
-/*
- * Return the status bit for a reservations's companion DRP IE .
- */
-int uwb_rsv_companion_status(struct uwb_rsv *rsv)
-{
-	static const int companion_statuses[] = {
-		[UWB_RSV_STATE_O_MOVE_EXPANDING]     = 0,
-		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = 1,
-		[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = 0,
-		[UWB_RSV_STATE_T_EXPANDING_PENDING]  = 0,
-		[UWB_RSV_STATE_T_EXPANDING_DENIED]   = 0,
-	};
-
-	return companion_statuses[rsv->state];
-}
-
-/*
- * Allocate a DRP IE.
- *
- * To save having to free/allocate a DRP IE when its MAS changes,
- * enough memory is allocated for the maxiumum number of DRP
- * allocation fields.  This gives an overhead per reservation of up to
- * (UWB_NUM_ZONES - 1) * 4 = 60 octets.
- */
-static struct uwb_ie_drp *uwb_drp_ie_alloc(void)
-{
-	struct uwb_ie_drp *drp_ie;
-
-	drp_ie = kzalloc(struct_size(drp_ie, allocs, UWB_NUM_ZONES),
-			 GFP_KERNEL);
-	if (drp_ie)
-		drp_ie->hdr.element_id = UWB_IE_DRP;
-	return drp_ie;
-}
-
-
-/*
- * Fill a DRP IE's allocation fields from a MAS bitmap.
- */
-static void uwb_drp_ie_from_bm(struct uwb_ie_drp *drp_ie,
-			       struct uwb_mas_bm *mas)
-{
-	int z, i, num_fields = 0, next = 0;
-	struct uwb_drp_alloc *zones;
-	__le16 current_bmp;
-	DECLARE_BITMAP(tmp_bmp, UWB_NUM_MAS);
-	DECLARE_BITMAP(tmp_mas_bm, UWB_MAS_PER_ZONE);
-
-	zones = drp_ie->allocs;
-
-	bitmap_copy(tmp_bmp, mas->bm, UWB_NUM_MAS);
-
-	/* Determine unique MAS bitmaps in zones from bitmap. */
-	for (z = 0; z < UWB_NUM_ZONES; z++) {
-		bitmap_copy(tmp_mas_bm, tmp_bmp, UWB_MAS_PER_ZONE);
-		if (bitmap_weight(tmp_mas_bm, UWB_MAS_PER_ZONE) > 0) {
-			bool found = false;
-			current_bmp = (__le16) *tmp_mas_bm;
-			for (i = 0; i < next; i++) {
-				if (current_bmp == zones[i].mas_bm) {
-					zones[i].zone_bm |= 1 << z;
-					found = true;
-					break;
-				}
-			}
-			if (!found)  {
-				num_fields++;
-				zones[next].zone_bm = 1 << z;
-				zones[next].mas_bm = current_bmp;
-				next++;
-			}
-		}
-		bitmap_shift_right(tmp_bmp, tmp_bmp, UWB_MAS_PER_ZONE, UWB_NUM_MAS);
-	}
-
-	/* Store in format ready for transmission (le16). */
-	for (i = 0; i < num_fields; i++) {
-		drp_ie->allocs[i].zone_bm = cpu_to_le16(zones[i].zone_bm);
-		drp_ie->allocs[i].mas_bm = cpu_to_le16(zones[i].mas_bm);
-	}
-
-	drp_ie->hdr.length = sizeof(struct uwb_ie_drp) - sizeof(struct uwb_ie_hdr)
-		+ num_fields * sizeof(struct uwb_drp_alloc);
-}
-
-/**
- * uwb_drp_ie_update - update a reservation's DRP IE
- * @rsv: the reservation
- */
-int uwb_drp_ie_update(struct uwb_rsv *rsv)
-{
-	struct uwb_ie_drp *drp_ie;
-	struct uwb_rsv_move *mv;
-	int unsafe;
-
-	if (rsv->state == UWB_RSV_STATE_NONE) {
-		kfree(rsv->drp_ie);
-		rsv->drp_ie = NULL;
-		return 0;
-	}
-
-	unsafe = rsv->mas.unsafe ? 1 : 0;
-
-	if (rsv->drp_ie == NULL) {
-		rsv->drp_ie = uwb_drp_ie_alloc();
-		if (rsv->drp_ie == NULL)
-			return -ENOMEM;
-	}
-	drp_ie = rsv->drp_ie;
-
-	uwb_ie_drp_set_unsafe(drp_ie,       unsafe);
-	uwb_ie_drp_set_tiebreaker(drp_ie,   rsv->tiebreaker);
-	uwb_ie_drp_set_owner(drp_ie,        uwb_rsv_is_owner(rsv));
-	uwb_ie_drp_set_status(drp_ie,       uwb_rsv_status(rsv));
-	uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_reason_code(rsv));
-	uwb_ie_drp_set_stream_index(drp_ie, rsv->stream);
-	uwb_ie_drp_set_type(drp_ie,         rsv->type);
-
-	if (uwb_rsv_is_owner(rsv)) {
-		switch (rsv->target.type) {
-		case UWB_RSV_TARGET_DEV:
-			drp_ie->dev_addr = rsv->target.dev->dev_addr;
-			break;
-		case UWB_RSV_TARGET_DEVADDR:
-			drp_ie->dev_addr = rsv->target.devaddr;
-			break;
-		}
-	} else
-		drp_ie->dev_addr = rsv->owner->dev_addr;
-
-	uwb_drp_ie_from_bm(drp_ie, &rsv->mas);
-
-	if (uwb_rsv_has_two_drp_ies(rsv)) {
-		mv = &rsv->mv;
-		if (mv->companion_drp_ie == NULL) {
-			mv->companion_drp_ie = uwb_drp_ie_alloc();
-			if (mv->companion_drp_ie == NULL)
-				return -ENOMEM;
-		}
-		drp_ie = mv->companion_drp_ie;
-
-		/* keep all the same configuration of the main drp_ie */
-		memcpy(drp_ie, rsv->drp_ie, sizeof(struct uwb_ie_drp));
-
-
-		/* FIXME: handle properly the unsafe bit */
-		uwb_ie_drp_set_unsafe(drp_ie,       1);
-		uwb_ie_drp_set_status(drp_ie,       uwb_rsv_companion_status(rsv));
-		uwb_ie_drp_set_reason_code(drp_ie,  uwb_rsv_companion_reason_code(rsv));
-
-		uwb_drp_ie_from_bm(drp_ie, &mv->companion_mas);
-	}
-
-	rsv->ie_valid = true;
-	return 0;
-}
-
-/*
- * Set MAS bits from given MAS bitmap in a single zone of large bitmap.
- *
- * We are given a zone id and the MAS bitmap of bits that need to be set in
- * this zone. Note that this zone may already have bits set and this only
- * adds settings - we cannot simply assign the MAS bitmap contents to the
- * zone contents. We iterate over the the bits (MAS) in the zone and set the
- * bits that are set in the given MAS bitmap.
- */
-static
-void uwb_drp_ie_single_zone_to_bm(struct uwb_mas_bm *bm, u8 zone, u16 mas_bm)
-{
-	int mas;
-	u16 mas_mask;
-
-	for (mas = 0; mas < UWB_MAS_PER_ZONE; mas++) {
-		mas_mask = 1 << mas;
-		if (mas_bm & mas_mask)
-			set_bit(zone * UWB_NUM_ZONES + mas, bm->bm);
-	}
-}
-
-/**
- * uwb_drp_ie_zones_to_bm - convert DRP allocation fields to a bitmap
- * @mas:    MAS bitmap that will be populated to correspond to the
- *          allocation fields in the DRP IE
- * @drp_ie: the DRP IE that contains the allocation fields.
- *
- * The input format is an array of MAS allocation fields (16 bit Zone
- * bitmap, 16 bit MAS bitmap) as described in [ECMA-368] section
- * 16.8.6. The output is a full 256 bit MAS bitmap.
- *
- * We go over all the allocation fields, for each allocation field we
- * know which zones are impacted. We iterate over all the zones
- * impacted and call a function that will set the correct MAS bits in
- * each zone.
- */
-void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie)
-{
-	int numallocs = (drp_ie->hdr.length - 4) / 4;
-	const struct uwb_drp_alloc *alloc;
-	int cnt;
-	u16 zone_bm, mas_bm;
-	u8 zone;
-	u16 zone_mask;
-
-	bitmap_zero(bm->bm, UWB_NUM_MAS);
-
-	for (cnt = 0; cnt < numallocs; cnt++) {
-		alloc = &drp_ie->allocs[cnt];
-		zone_bm = le16_to_cpu(alloc->zone_bm);
-		mas_bm = le16_to_cpu(alloc->mas_bm);
-		for (zone = 0; zone < UWB_NUM_ZONES; zone++)   {
-			zone_mask = 1 << zone;
-			if (zone_bm & zone_mask)
-				uwb_drp_ie_single_zone_to_bm(bm, zone, mas_bm);
-		}
-	}
-}
-
diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c
deleted file mode 100644
index 869987bede7b..000000000000
--- a/drivers/uwb/drp.c
+++ /dev/null
@@ -1,842 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Dynamic Reservation Protocol handling
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include "uwb-internal.h"
-
-
-/* DRP Conflict Actions ([ECMA-368 2nd Edition] 17.4.6) */
-enum uwb_drp_conflict_action {
-	/* Reservation is maintained, no action needed */
-	UWB_DRP_CONFLICT_MANTAIN = 0,
-
-	/* the device shall not transmit frames in conflicting MASs in
-	 * the following superframe. If the device is the reservation
-	 * target, it shall also set the Reason Code in its DRP IE to
-	 * Conflict in its beacon in the following superframe.
-	 */
-	UWB_DRP_CONFLICT_ACT1,
-
-	/* the device shall not set the Reservation Status bit to ONE
-	 * and shall not transmit frames in conflicting MASs. If the
-	 * device is the reservation target, it shall also set the
-	 * Reason Code in its DRP IE to Conflict.
-	 */
-	UWB_DRP_CONFLICT_ACT2,
-
-	/* the device shall not transmit frames in conflicting MASs in
-	 * the following superframe. It shall remove the conflicting
-	 * MASs from the reservation or set the Reservation Status to
-	 * ZERO in its beacon in the following superframe. If the
-	 * device is the reservation target, it shall also set the
-	 * Reason Code in its DRP IE to Conflict.
-	 */
-	UWB_DRP_CONFLICT_ACT3,
-};
-
-
-static void uwb_rc_set_drp_cmd_done(struct uwb_rc *rc, void *arg,
-				    struct uwb_rceb *reply, ssize_t reply_size)
-{
-	struct uwb_rc_evt_set_drp_ie *r = (struct uwb_rc_evt_set_drp_ie *)reply;
-	unsigned long flags;
-
-	if (r != NULL) {
-		if (r->bResultCode != UWB_RC_RES_SUCCESS)
-			dev_err(&rc->uwb_dev.dev, "SET-DRP-IE failed: %s (%d)\n",
-				uwb_rc_strerror(r->bResultCode), r->bResultCode);
-	} else
-		dev_err(&rc->uwb_dev.dev, "SET-DRP-IE: timeout\n");
-
-	spin_lock_irqsave(&rc->rsvs_lock, flags);
-	if (rc->set_drp_ie_pending > 1) {
-		rc->set_drp_ie_pending = 0;
-		uwb_rsv_queue_update(rc);
-	} else {
-		rc->set_drp_ie_pending = 0;
-	}
-	spin_unlock_irqrestore(&rc->rsvs_lock, flags);
-}
-
-/**
- * Construct and send the SET DRP IE
- *
- * @rc:         UWB Host controller
- * @returns:    >= 0 number of bytes still available in the beacon
- *              < 0 errno code on error.
- *
- * See WUSB[8.6.2.7]: The host must set all the DRP IEs that it wants the
- * device to include in its beacon at the same time. We thus have to
- * traverse all reservations and include the DRP IEs of all PENDING
- * and NEGOTIATED reservations in a SET DRP command for transmission.
- *
- * A DRP Availability IE is appended.
- *
- * rc->rsvs_mutex is held
- *
- * FIXME We currently ignore the returned value indicating the remaining space
- * in beacon. This could be used to deny reservation requests earlier if
- * determined that they would cause the beacon space to be exceeded.
- */
-int uwb_rc_send_all_drp_ie(struct uwb_rc *rc)
-{
-	int result;
-	struct uwb_rc_cmd_set_drp_ie *cmd;
-	struct uwb_rsv *rsv;
-	struct uwb_rsv_move *mv;
-	int num_bytes = 0;
-	u8 *IEDataptr;
-
-	result = -ENOMEM;
-	/* First traverse all reservations to determine memory needed. */
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->drp_ie != NULL) {
-			num_bytes += rsv->drp_ie->hdr.length + 2;
-			if (uwb_rsv_has_two_drp_ies(rsv) &&
-				(rsv->mv.companion_drp_ie != NULL)) {
-				mv = &rsv->mv;
-				num_bytes +=
-					mv->companion_drp_ie->hdr.length + 2;
-			}
-		}
-	}
-	num_bytes += sizeof(rc->drp_avail.ie);
-	cmd = kzalloc(sizeof(*cmd) + num_bytes, GFP_KERNEL);
-	if (cmd == NULL)
-		goto error;
-	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
-	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_DRP_IE);
-	cmd->wIELength = num_bytes;
-	IEDataptr = (u8 *)&cmd->IEData[0];
-
-	/* FIXME: DRV avail IE is not always needed */
-	/* put DRP avail IE first */
-	memcpy(IEDataptr, &rc->drp_avail.ie, sizeof(rc->drp_avail.ie));
-	IEDataptr += sizeof(struct uwb_ie_drp_avail);
-
-	/* Next traverse all reservations to place IEs in allocated memory. */
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->drp_ie != NULL) {
-			memcpy(IEDataptr, rsv->drp_ie,
-			       rsv->drp_ie->hdr.length + 2);
-			IEDataptr += rsv->drp_ie->hdr.length + 2;
-
-			if (uwb_rsv_has_two_drp_ies(rsv) &&
-				(rsv->mv.companion_drp_ie != NULL)) {
-				mv = &rsv->mv;
-				memcpy(IEDataptr, mv->companion_drp_ie,
-				       mv->companion_drp_ie->hdr.length + 2);
-				IEDataptr +=
-					mv->companion_drp_ie->hdr.length + 2;
-			}
-		}
-	}
-
-	result = uwb_rc_cmd_async(rc, "SET-DRP-IE",
-				&cmd->rccb, sizeof(*cmd) + num_bytes,
-				UWB_RC_CET_GENERAL, UWB_RC_CMD_SET_DRP_IE,
-				uwb_rc_set_drp_cmd_done, NULL);
-
-	rc->set_drp_ie_pending = 1;
-
-	kfree(cmd);
-error:
-	return result;
-}
-
-/*
- * Evaluate the action to perform using conflict resolution rules
- *
- * Return a uwb_drp_conflict_action.
- */
-static int evaluate_conflict_action(struct uwb_ie_drp *ext_drp_ie, int ext_beacon_slot,
-				    struct uwb_rsv *rsv, int our_status)
-{
-	int our_tie_breaker = rsv->tiebreaker;
-	int our_type        = rsv->type;
-	int our_beacon_slot = rsv->rc->uwb_dev.beacon_slot;
-
-	int ext_tie_breaker = uwb_ie_drp_tiebreaker(ext_drp_ie);
-	int ext_status      = uwb_ie_drp_status(ext_drp_ie);
-	int ext_type        = uwb_ie_drp_type(ext_drp_ie);
-
-
-	/* [ECMA-368 2nd Edition] 17.4.6 */
-	if (ext_type == UWB_DRP_TYPE_PCA && our_type == UWB_DRP_TYPE_PCA) {
-		return UWB_DRP_CONFLICT_MANTAIN;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-1 */
-	if (our_type == UWB_DRP_TYPE_ALIEN_BP) {
-		return UWB_DRP_CONFLICT_MANTAIN;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-2 */
-	if (ext_type == UWB_DRP_TYPE_ALIEN_BP) {
-		/* here we know our_type != UWB_DRP_TYPE_ALIEN_BP */
-		return UWB_DRP_CONFLICT_ACT1;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-3 */
-	if (our_status == 0 && ext_status == 1) {
-		return UWB_DRP_CONFLICT_ACT2;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-4 */
-	if (our_status == 1 && ext_status == 0) {
-		return UWB_DRP_CONFLICT_MANTAIN;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-5a */
-	if (our_tie_breaker == ext_tie_breaker &&
-	    our_beacon_slot <  ext_beacon_slot) {
-		return UWB_DRP_CONFLICT_MANTAIN;
-	}
-
-	/* [ECMA-368 2nd Edition] 17.4.6-5b */
-	if (our_tie_breaker != ext_tie_breaker &&
-	    our_beacon_slot >  ext_beacon_slot) {
-		return UWB_DRP_CONFLICT_MANTAIN;
-	}
-
-	if (our_status == 0) {
-		if (our_tie_breaker == ext_tie_breaker) {
-			/* [ECMA-368 2nd Edition] 17.4.6-6a */
-			if (our_beacon_slot > ext_beacon_slot) {
-				return UWB_DRP_CONFLICT_ACT2;
-			}
-		} else  {
-			/* [ECMA-368 2nd Edition] 17.4.6-6b */
-			if (our_beacon_slot < ext_beacon_slot) {
-				return UWB_DRP_CONFLICT_ACT2;
-			}
-		}
-	} else {
-		if (our_tie_breaker == ext_tie_breaker) {
-			/* [ECMA-368 2nd Edition] 17.4.6-7a */
-			if (our_beacon_slot > ext_beacon_slot) {
-				return UWB_DRP_CONFLICT_ACT3;
-			}
-		} else {
-			/* [ECMA-368 2nd Edition] 17.4.6-7b */
-			if (our_beacon_slot < ext_beacon_slot) {
-				return UWB_DRP_CONFLICT_ACT3;
-			}
-		}
-	}
-	return UWB_DRP_CONFLICT_MANTAIN;
-}
-
-static void handle_conflict_normal(struct uwb_ie_drp *drp_ie,
-				   int ext_beacon_slot,
-				   struct uwb_rsv *rsv,
-				   struct uwb_mas_bm *conflicting_mas)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct uwb_rsv_move *mv = &rsv->mv;
-	struct uwb_drp_backoff_win *bow = &rc->bow;
-	int action;
-
-	action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, uwb_rsv_status(rsv));
-
-	if (uwb_rsv_is_owner(rsv)) {
-		switch(action) {
-		case UWB_DRP_CONFLICT_ACT2:
-			/* try move */
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_TO_BE_MOVED);
-			if (bow->can_reserve_extra_mases == false)
-				uwb_rsv_backoff_win_increment(rc);
-
-			break;
-		case UWB_DRP_CONFLICT_ACT3:
-			uwb_rsv_backoff_win_increment(rc);
-			/* drop some mases with reason modified */
-			/* put in the companion the mases to be dropped */
-			bitmap_and(mv->companion_mas.bm, rsv->mas.bm, conflicting_mas->bm, UWB_NUM_MAS);
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
-		default:
-			break;
-		}
-	} else {
-		switch(action) {
-		case UWB_DRP_CONFLICT_ACT2:
-		case UWB_DRP_CONFLICT_ACT3:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
-		default:
-			break;
-		}
-
-	}
-
-}
-
-static void handle_conflict_expanding(struct uwb_ie_drp *drp_ie, int ext_beacon_slot,
-				      struct uwb_rsv *rsv, bool companion_only,
-				      struct uwb_mas_bm *conflicting_mas)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct uwb_drp_backoff_win *bow = &rc->bow;
-	struct uwb_rsv_move *mv = &rsv->mv;
-	int action;
-
-	if (companion_only) {
-		/* status of companion is 0 at this point */
-		action = evaluate_conflict_action(drp_ie, ext_beacon_slot, rsv, 0);
-		if (uwb_rsv_is_owner(rsv)) {
-			switch(action) {
-			case UWB_DRP_CONFLICT_ACT2:
-			case UWB_DRP_CONFLICT_ACT3:
-				uwb_rsv_set_state(rsv,
-						UWB_RSV_STATE_O_ESTABLISHED);
-				rsv->needs_release_companion_mas = false;
-				if (bow->can_reserve_extra_mases == false)
-					uwb_rsv_backoff_win_increment(rc);
-				uwb_drp_avail_release(rsv->rc,
-						&rsv->mv.companion_mas);
-			}
-		} else { /* rsv is target */
-			switch(action) {
-			case UWB_DRP_CONFLICT_ACT2:
-			case UWB_DRP_CONFLICT_ACT3:
-				uwb_rsv_set_state(rsv,
-					UWB_RSV_STATE_T_EXPANDING_CONFLICT);
-                                /* send_drp_avail_ie = true; */
-			}
-		}
-	} else { /* also base part of the reservation is conflicting */
-		if (uwb_rsv_is_owner(rsv)) {
-			uwb_rsv_backoff_win_increment(rc);
-			/* remove companion part */
-			uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
-
-			/* drop some mases with reason modified */
-
-			/* put in the companion the mases to be dropped */
-			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm,
-					conflicting_mas->bm, UWB_NUM_MAS);
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
-		} else { /* it is a target rsv */
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
-                        /* send_drp_avail_ie = true; */
-		}
-	}
-}
-
-static void uwb_drp_handle_conflict_rsv(struct uwb_rc *rc, struct uwb_rsv *rsv,
-					struct uwb_rc_evt_drp *drp_evt,
-					struct uwb_ie_drp *drp_ie,
-					struct uwb_mas_bm *conflicting_mas)
-{
-	struct uwb_rsv_move *mv;
-
-	/* check if the conflicting reservation has two drp_ies */
-	if (uwb_rsv_has_two_drp_ies(rsv)) {
-		mv = &rsv->mv;
-		if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm,
-								UWB_NUM_MAS)) {
-			handle_conflict_expanding(drp_ie,
-						drp_evt->beacon_slot_number,
-						rsv, false, conflicting_mas);
-		} else {
-			if (bitmap_intersects(mv->companion_mas.bm,
-					conflicting_mas->bm, UWB_NUM_MAS)) {
-				handle_conflict_expanding(
-					drp_ie, drp_evt->beacon_slot_number,
-					rsv, true, conflicting_mas);
-			}
-		}
-	} else if (bitmap_intersects(rsv->mas.bm, conflicting_mas->bm,
-							UWB_NUM_MAS)) {
-		handle_conflict_normal(drp_ie, drp_evt->beacon_slot_number,
-					rsv, conflicting_mas);
-	}
-}
-
-static void uwb_drp_handle_all_conflict_rsv(struct uwb_rc *rc,
-					    struct uwb_rc_evt_drp *drp_evt,
-					    struct uwb_ie_drp *drp_ie,
-					    struct uwb_mas_bm *conflicting_mas)
-{
-	struct uwb_rsv *rsv;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie,
-							conflicting_mas);
-	}
-}
-
-static void uwb_drp_process_target_accepted(struct uwb_rc *rc,
-	struct uwb_rsv *rsv, struct uwb_rc_evt_drp *drp_evt,
-	struct uwb_ie_drp *drp_ie, struct uwb_mas_bm *mas)
-{
-	struct uwb_rsv_move *mv = &rsv->mv;
-	int status;
-
-	status = uwb_ie_drp_status(drp_ie);
-
-	if (rsv->state == UWB_RSV_STATE_T_CONFLICT) {
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_CONFLICT);
-		return;
-	}
-
-	if (rsv->state == UWB_RSV_STATE_T_EXPANDING_ACCEPTED) {
-		/* drp_ie is companion */
-		if (!bitmap_equal(rsv->mas.bm, mas->bm, UWB_NUM_MAS)) {
-			/* stroke companion */
-			uwb_rsv_set_state(rsv,
-				UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
-		}
-	} else {
-		if (!bitmap_equal(rsv->mas.bm, mas->bm, UWB_NUM_MAS)) {
-			if (uwb_drp_avail_reserve_pending(rc, mas) == -EBUSY) {
-				/* FIXME: there is a conflict, find
-				 * the conflicting reservations and
-				 * take a sensible action. Consider
-				 * that in drp_ie there is the
-				 * "neighbour" */
-				uwb_drp_handle_all_conflict_rsv(rc, drp_evt,
-						drp_ie, mas);
-			} else {
-				/* accept the extra reservation */
-				bitmap_copy(mv->companion_mas.bm, mas->bm,
-								UWB_NUM_MAS);
-				uwb_rsv_set_state(rsv,
-					UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
-			}
-		} else {
-			if (status) {
-				uwb_rsv_set_state(rsv,
-						UWB_RSV_STATE_T_ACCEPTED);
-			}
-		}
-
-	}
-}
-
-/*
- * Based on the DRP IE, transition a target reservation to a new
- * state.
- */
-static void uwb_drp_process_target(struct uwb_rc *rc, struct uwb_rsv *rsv,
-		   struct uwb_ie_drp *drp_ie, struct uwb_rc_evt_drp *drp_evt)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rsv_move *mv = &rsv->mv;
-	int status;
-	enum uwb_drp_reason reason_code;
-	struct uwb_mas_bm mas;
-
-	status = uwb_ie_drp_status(drp_ie);
-	reason_code = uwb_ie_drp_reason_code(drp_ie);
-	uwb_drp_ie_to_bm(&mas, drp_ie);
-
-	switch (reason_code) {
-	case UWB_DRP_REASON_ACCEPTED:
-		uwb_drp_process_target_accepted(rc, rsv, drp_evt, drp_ie, &mas);
-		break;
-
-	case UWB_DRP_REASON_MODIFIED:
-		/* check to see if we have already modified the reservation */
-		if (bitmap_equal(rsv->mas.bm, mas.bm, UWB_NUM_MAS)) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
-			break;
-		}
-
-		/* find if the owner wants to expand or reduce */
-		if (bitmap_subset(mas.bm, rsv->mas.bm, UWB_NUM_MAS)) {
-			/* owner is reducing */
-			bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mas.bm,
-				UWB_NUM_MAS);
-			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
-		}
-
-		bitmap_copy(rsv->mas.bm, mas.bm, UWB_NUM_MAS);
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_RESIZED);
-		break;
-	default:
-		dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-			 reason_code, status);
-	}
-}
-
-static void uwb_drp_process_owner_accepted(struct uwb_rsv *rsv,
-						struct uwb_mas_bm *mas)
-{
-	struct uwb_rsv_move *mv = &rsv->mv;
-
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_PENDING:
-	case UWB_RSV_STATE_O_INITIATED:
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-		break;
-	case UWB_RSV_STATE_O_MODIFIED:
-		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-		else
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MODIFIED);
-		break;
-
-	case UWB_RSV_STATE_O_MOVE_REDUCING: /* shouldn' t be a problem */
-		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-		else
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
-		break;
-	case UWB_RSV_STATE_O_MOVE_EXPANDING:
-		if (bitmap_equal(mas->bm, mv->companion_mas.bm, UWB_NUM_MAS)) {
-			/* Companion reservation accepted */
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
-		} else {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
-		}
-		break;
-	case UWB_RSV_STATE_O_MOVE_COMBINING:
-		if (bitmap_equal(mas->bm, rsv->mas.bm, UWB_NUM_MAS))
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
-		else
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
-		break;
-	default:
-		break;
-	}
-}
-/*
- * Based on the DRP IE, transition an owner reservation to a new
- * state.
- */
-static void uwb_drp_process_owner(struct uwb_rc *rc, struct uwb_rsv *rsv,
-				  struct uwb_dev *src, struct uwb_ie_drp *drp_ie,
-				  struct uwb_rc_evt_drp *drp_evt)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	int status;
-	enum uwb_drp_reason reason_code;
-	struct uwb_mas_bm mas;
-
-	status = uwb_ie_drp_status(drp_ie);
-	reason_code = uwb_ie_drp_reason_code(drp_ie);
-	uwb_drp_ie_to_bm(&mas, drp_ie);
-
-	if (status) {
-		switch (reason_code) {
-		case UWB_DRP_REASON_ACCEPTED:
-			uwb_drp_process_owner_accepted(rsv, &mas);
-			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
-		}
-	} else {
-		switch (reason_code) {
-		case UWB_DRP_REASON_PENDING:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_PENDING);
-			break;
-		case UWB_DRP_REASON_DENIED:
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-			break;
-		case UWB_DRP_REASON_CONFLICT:
-			/* resolve the conflict */
-			bitmap_complement(mas.bm, src->last_availability_bm,
-					  UWB_NUM_MAS);
-			uwb_drp_handle_conflict_rsv(rc, rsv, drp_evt, drp_ie, &mas);
-			break;
-		default:
-			dev_warn(dev, "ignoring invalid DRP IE state (%d/%d)\n",
-				 reason_code, status);
-		}
-	}
-}
-
-static void uwb_cnflt_alien_stroke_timer(struct uwb_cnflt_alien *cnflt)
-{
-	unsigned timeout_us = UWB_MAX_LOST_BEACONS * UWB_SUPERFRAME_LENGTH_US;
-	mod_timer(&cnflt->timer, jiffies + usecs_to_jiffies(timeout_us));
-}
-
-static void uwb_cnflt_update_work(struct work_struct *work)
-{
-	struct uwb_cnflt_alien *cnflt = container_of(work,
-						     struct uwb_cnflt_alien,
-						     cnflt_update_work);
-	struct uwb_cnflt_alien *c;
-	struct uwb_rc *rc = cnflt->rc;
-
-	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	list_del(&cnflt->rc_node);
-
-	/* update rc global conflicting alien bitmap */
-	bitmap_zero(rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
-
-	list_for_each_entry(c, &rc->cnflt_alien_list, rc_node) {
-		bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm,
-						c->mas.bm, UWB_NUM_MAS);
-	}
-
-	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work,
-					usecs_to_jiffies(delay_us));
-
-	kfree(cnflt);
-	mutex_unlock(&rc->rsvs_mutex);
-}
-
-static void uwb_cnflt_timer(struct timer_list *t)
-{
-	struct uwb_cnflt_alien *cnflt = from_timer(cnflt, t, timer);
-
-	queue_work(cnflt->rc->rsv_workq, &cnflt->cnflt_update_work);
-}
-
-/*
- * We have received an DRP_IE of type Alien BP and we need to make
- * sure we do not transmit in conflicting MASs.
- */
-static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_mas_bm mas;
-	struct uwb_cnflt_alien *cnflt;
-	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
-
-	uwb_drp_ie_to_bm(&mas, drp_ie);
-
-	list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) {
-		if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) {
-			/* Existing alien BP reservation conflicting
-			 * bitmap, just reset the timer */
-			uwb_cnflt_alien_stroke_timer(cnflt);
-			return;
-		}
-	}
-
-	/* New alien BP reservation conflicting bitmap */
-
-	/* alloc and initialize new uwb_cnflt_alien */
-	cnflt = kzalloc(sizeof(struct uwb_cnflt_alien), GFP_KERNEL);
-	if (!cnflt) {
-		dev_err(dev, "failed to alloc uwb_cnflt_alien struct\n");
-		return;
-	}
-
-	INIT_LIST_HEAD(&cnflt->rc_node);
-	timer_setup(&cnflt->timer, uwb_cnflt_timer, 0);
-
-	cnflt->rc = rc;
-	INIT_WORK(&cnflt->cnflt_update_work, uwb_cnflt_update_work);
-
-	bitmap_copy(cnflt->mas.bm, mas.bm, UWB_NUM_MAS);
-
-	list_add_tail(&cnflt->rc_node, &rc->cnflt_alien_list);
-
-	/* update rc global conflicting alien bitmap */
-	bitmap_or(rc->cnflt_alien_bitmap.bm, rc->cnflt_alien_bitmap.bm, mas.bm, UWB_NUM_MAS);
-
-	queue_delayed_work(rc->rsv_workq, &rc->rsv_alien_bp_work, usecs_to_jiffies(delay_us));
-
-	/* start the timer */
-	uwb_cnflt_alien_stroke_timer(cnflt);
-}
-
-static void uwb_drp_process_not_involved(struct uwb_rc *rc,
-					 struct uwb_rc_evt_drp *drp_evt,
-					 struct uwb_ie_drp *drp_ie)
-{
-	struct uwb_mas_bm mas;
-
-	uwb_drp_ie_to_bm(&mas, drp_ie);
-	uwb_drp_handle_all_conflict_rsv(rc, drp_evt, drp_ie, &mas);
-}
-
-static void uwb_drp_process_involved(struct uwb_rc *rc, struct uwb_dev *src,
-				     struct uwb_rc_evt_drp *drp_evt,
-				     struct uwb_ie_drp *drp_ie)
-{
-	struct uwb_rsv *rsv;
-
-	rsv = uwb_rsv_find(rc, src, drp_ie);
-	if (!rsv) {
-		/*
-		 * No reservation? It's either for a recently
-		 * terminated reservation; or the DRP IE couldn't be
-		 * processed (e.g., an invalid IE or out of memory).
-		 */
-		return;
-	}
-
-	/*
-	 * Do nothing with DRP IEs for reservations that have been
-	 * terminated.
-	 */
-	if (rsv->state == UWB_RSV_STATE_NONE) {
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-		return;
-	}
-
-	if (uwb_ie_drp_owner(drp_ie))
-		uwb_drp_process_target(rc, rsv, drp_ie, drp_evt);
-	else
-		uwb_drp_process_owner(rc, rsv, src, drp_ie, drp_evt);
-
-}
-
-
-static bool uwb_drp_involves_us(struct uwb_rc *rc, struct uwb_ie_drp *drp_ie)
-{
-	return uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, &drp_ie->dev_addr) == 0;
-}
-
-/*
- * Process a received DRP IE.
- */
-static void uwb_drp_process(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
-			    struct uwb_dev *src, struct uwb_ie_drp *drp_ie)
-{
-	if (uwb_ie_drp_type(drp_ie) == UWB_DRP_TYPE_ALIEN_BP)
-		uwb_drp_handle_alien_drp(rc, drp_ie);
-	else if (uwb_drp_involves_us(rc, drp_ie))
-		uwb_drp_process_involved(rc, src, drp_evt, drp_ie);
-	else
-		uwb_drp_process_not_involved(rc, drp_evt, drp_ie);
-}
-
-/*
- * Process a received DRP Availability IE
- */
-static void uwb_drp_availability_process(struct uwb_rc *rc, struct uwb_dev *src,
-					 struct uwb_ie_drp_avail *drp_availability_ie)
-{
-	bitmap_copy(src->last_availability_bm,
-		    drp_availability_ie->bmp, UWB_NUM_MAS);
-}
-
-/*
- * Process all the DRP IEs (both DRP IEs and the DRP Availability IE)
- * from a device.
- */
-static
-void uwb_drp_process_all(struct uwb_rc *rc, struct uwb_rc_evt_drp *drp_evt,
-			 size_t ielen, struct uwb_dev *src_dev)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_ie_hdr *ie_hdr;
-	void *ptr;
-
-	ptr = drp_evt->ie_data;
-	for (;;) {
-		ie_hdr = uwb_ie_next(&ptr, &ielen);
-		if (!ie_hdr)
-			break;
-
-		switch (ie_hdr->element_id) {
-		case UWB_IE_DRP_AVAILABILITY:
-			uwb_drp_availability_process(rc, src_dev, (struct uwb_ie_drp_avail *)ie_hdr);
-			break;
-		case UWB_IE_DRP:
-			uwb_drp_process(rc, drp_evt, src_dev, (struct uwb_ie_drp *)ie_hdr);
-			break;
-		default:
-			dev_warn(dev, "unexpected IE in DRP notification\n");
-			break;
-		}
-	}
-
-	if (ielen > 0)
-		dev_warn(dev, "%d octets remaining in DRP notification\n",
-			 (int)ielen);
-}
-
-/**
- * uwbd_evt_handle_rc_drp - handle a DRP_IE event
- * @evt: the DRP_IE event from the radio controller
- *
- * This processes DRP notifications from the radio controller, either
- * initiating a new reservation or transitioning an existing
- * reservation into a different state.
- *
- * DRP notifications can occur for three different reasons:
- *
- * - UWB_DRP_NOTIF_DRP_IE_RECVD: one or more DRP IEs with the RC as
- *   the target or source have been received.
- *
- *   These DRP IEs could be new or for an existing reservation.
- *
- *   If the DRP IE for an existing reservation ceases to be to
- *   received for at least mMaxLostBeacons, the reservation should be
- *   considered to be terminated.  Note that the TERMINATE reason (see
- *   below) may not always be signalled (e.g., the remote device has
- *   two or more reservations established with the RC).
- *
- * - UWB_DRP_NOTIF_CONFLICT: DRP IEs from any device in the beacon
- *   group conflict with the RC's reservations.
- *
- * - UWB_DRP_NOTIF_TERMINATE: DRP IEs are no longer being received
- *   from a device (i.e., it's terminated all reservations).
- *
- * Only the software state of the reservations is changed; the setting
- * of the radio controller's DRP IEs is done after all the events in
- * an event buffer are processed.  This saves waiting multiple times
- * for the SET_DRP_IE command to complete.
- */
-int uwbd_evt_handle_rc_drp(struct uwb_event *evt)
-{
-	struct device *dev = &evt->rc->uwb_dev.dev;
-	struct uwb_rc *rc = evt->rc;
-	struct uwb_rc_evt_drp *drp_evt;
-	size_t ielength, bytes_left;
-	struct uwb_dev_addr src_addr;
-	struct uwb_dev *src_dev;
-
-	/* Is there enough data to decode the event (and any IEs in
-	   its payload)? */
-	if (evt->notif.size < sizeof(*drp_evt)) {
-		dev_err(dev, "DRP event: Not enough data to decode event "
-			"[%zu bytes left, %zu needed]\n",
-			evt->notif.size, sizeof(*drp_evt));
-		return 0;
-	}
-	bytes_left = evt->notif.size - sizeof(*drp_evt);
-	drp_evt = container_of(evt->notif.rceb, struct uwb_rc_evt_drp, rceb);
-	ielength = le16_to_cpu(drp_evt->ie_length);
-	if (bytes_left != ielength) {
-		dev_err(dev, "DRP event: Not enough data in payload [%zu"
-			"bytes left, %zu declared in the event]\n",
-			bytes_left, ielength);
-		return 0;
-	}
-
-	memcpy(src_addr.data, &drp_evt->src_addr, sizeof(src_addr));
-	src_dev = uwb_dev_get_by_devaddr(rc, &src_addr);
-	if (!src_dev) {
-		/*
-		 * A DRP notification from an unrecognized device.
-		 *
-		 * This is probably from a WUSB device that doesn't
-		 * have an EUI-48 and therefore doesn't show up in the
-		 * UWB device database.  It's safe to simply ignore
-		 * these.
-		 */
-		return 0;
-	}
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	/* We do not distinguish from the reason */
-	uwb_drp_process_all(rc, drp_evt, ielength, src_dev);
-
-	mutex_unlock(&rc->rsvs_mutex);
-
-	uwb_dev_put(src_dev);
-	return 0;
-}
diff --git a/drivers/uwb/est.c b/drivers/uwb/est.c
deleted file mode 100644
index d4141ffdd775..000000000000
--- a/drivers/uwb/est.c
+++ /dev/null
@@ -1,450 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band Radio Control
- * Event Size Tables management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- *
- * Infrastructure, code and data tables for guessing the size of
- * events received on the notification endpoints of UWB radio
- * controllers.
- *
- * You define a table of events and for each, its size and how to get
- * the extra size.
- *
- * ENTRY POINTS:
- *
- * uwb_est_{init/destroy}(): To initialize/release the EST subsystem.
- *
- * uwb_est_[u]register(): To un/register event size tables
- *   uwb_est_grow()
- *
- * uwb_est_find_size(): Get the size of an event
- *   uwb_est_get_size()
- */
-#include <linux/spinlock.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-struct uwb_est {
-	u16 type_event_high;
-	u16 vendor, product;
-	u8 entries;
-	const struct uwb_est_entry *entry;
-};
-
-static struct uwb_est *uwb_est;
-static u8 uwb_est_size;
-static u8 uwb_est_used;
-static DEFINE_RWLOCK(uwb_est_lock);
-
-/**
- * WUSB Standard Event Size Table, HWA-RC interface
- *
- * Sizes for events and notifications type 0 (general), high nibble 0.
- */
-static
-struct uwb_est_entry uwb_est_00_00xx[] = {
-	[UWB_RC_EVT_IE_RCV] = {
-		.size = sizeof(struct uwb_rc_evt_ie_rcv),
-		.offset = 1 + offsetof(struct uwb_rc_evt_ie_rcv, wIELength),
-	},
-	[UWB_RC_EVT_BEACON] = {
-		.size = sizeof(struct uwb_rc_evt_beacon),
-		.offset = 1 + offsetof(struct uwb_rc_evt_beacon, wBeaconInfoLength),
-	},
-	[UWB_RC_EVT_BEACON_SIZE] = {
-		.size = sizeof(struct uwb_rc_evt_beacon_size),
-	},
-	[UWB_RC_EVT_BPOIE_CHANGE] = {
-		.size = sizeof(struct uwb_rc_evt_bpoie_change),
-		.offset = 1 + offsetof(struct uwb_rc_evt_bpoie_change,
-				       wBPOIELength),
-	},
-	[UWB_RC_EVT_BP_SLOT_CHANGE] = {
-		.size = sizeof(struct uwb_rc_evt_bp_slot_change),
-	},
-	[UWB_RC_EVT_BP_SWITCH_IE_RCV] = {
-		.size = sizeof(struct uwb_rc_evt_bp_switch_ie_rcv),
-		.offset = 1 + offsetof(struct uwb_rc_evt_bp_switch_ie_rcv, wIELength),
-	},
-	[UWB_RC_EVT_DEV_ADDR_CONFLICT] = {
-		.size = sizeof(struct uwb_rc_evt_dev_addr_conflict),
-	},
-	[UWB_RC_EVT_DRP_AVAIL] = {
-		.size = sizeof(struct uwb_rc_evt_drp_avail)
-	},
-	[UWB_RC_EVT_DRP] = {
-		.size = sizeof(struct uwb_rc_evt_drp),
-		.offset = 1 + offsetof(struct uwb_rc_evt_drp, ie_length),
-	},
-	[UWB_RC_EVT_BP_SWITCH_STATUS] = {
-		.size = sizeof(struct uwb_rc_evt_bp_switch_status),
-	},
-	[UWB_RC_EVT_CMD_FRAME_RCV] = {
-		.size = sizeof(struct uwb_rc_evt_cmd_frame_rcv),
-		.offset = 1 + offsetof(struct uwb_rc_evt_cmd_frame_rcv, dataLength),
-	},
-	[UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV] = {
-		.size = sizeof(struct uwb_rc_evt_channel_change_ie_rcv),
-		.offset = 1 + offsetof(struct uwb_rc_evt_channel_change_ie_rcv, wIELength),
-	},
-	[UWB_RC_CMD_CHANNEL_CHANGE] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_DEV_ADDR_MGMT] = {
-		.size = sizeof(struct uwb_rc_evt_dev_addr_mgmt) },
-	[UWB_RC_CMD_GET_IE] = {
-		.size = sizeof(struct uwb_rc_evt_get_ie),
-		.offset = 1 + offsetof(struct uwb_rc_evt_get_ie, wIELength),
-	},
-	[UWB_RC_CMD_RESET] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SCAN] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SET_BEACON_FILTER] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SET_DRP_IE] = {
-		.size = sizeof(struct uwb_rc_evt_set_drp_ie),
-	},
-	[UWB_RC_CMD_SET_IE] = {
-		.size = sizeof(struct uwb_rc_evt_set_ie),
-	},
-	[UWB_RC_CMD_SET_NOTIFICATION_FILTER] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SET_TX_POWER] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SLEEP] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_START_BEACON] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_STOP_BEACON] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_BP_MERGE] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SEND_COMMAND_FRAME] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-	[UWB_RC_CMD_SET_ASIE_NOTIF] = {
-		.size = sizeof(struct uwb_rc_evt_confirm),
-	},
-};
-
-static
-struct uwb_est_entry uwb_est_01_00xx[] = {
-	[UWB_RC_DAA_ENERGY_DETECTED] = {
-		.size = sizeof(struct uwb_rc_evt_daa_energy_detected),
-	},
-	[UWB_RC_SET_DAA_ENERGY_MASK] = {
-		.size = sizeof(struct uwb_rc_evt_set_daa_energy_mask),
-	},
-	[UWB_RC_SET_NOTIFICATION_FILTER_EX] = {
-		.size = sizeof(struct uwb_rc_evt_set_notification_filter_ex),
-	},
-};
-
-/**
- * Initialize the EST subsystem
- *
- * Register the standard tables also.
- *
- * FIXME: tag init
- */
-int uwb_est_create(void)
-{
-	int result;
-
-	uwb_est_size = 2;
-	uwb_est_used = 0;
-	uwb_est = kcalloc(uwb_est_size, sizeof(uwb_est[0]), GFP_KERNEL);
-	if (uwb_est == NULL)
-		return -ENOMEM;
-
-	result = uwb_est_register(UWB_RC_CET_GENERAL, 0, 0xffff, 0xffff,
-				  uwb_est_00_00xx, ARRAY_SIZE(uwb_est_00_00xx));
-	if (result < 0)
-		goto out;
-	result = uwb_est_register(UWB_RC_CET_EX_TYPE_1, 0, 0xffff, 0xffff,
-				  uwb_est_01_00xx, ARRAY_SIZE(uwb_est_01_00xx));
-out:
-	return result;
-}
-
-
-/** Clean it up */
-void uwb_est_destroy(void)
-{
-	kfree(uwb_est);
-	uwb_est = NULL;
-	uwb_est_size = uwb_est_used = 0;
-}
-
-
-/**
- * Double the capacity of the EST table
- *
- * @returns 0 if ok, < 0 errno no error.
- */
-static
-int uwb_est_grow(void)
-{
-	size_t actual_size = uwb_est_size * sizeof(uwb_est[0]);
-	void *new = kmalloc_array(2, actual_size, GFP_ATOMIC);
-	if (new == NULL)
-		return -ENOMEM;
-	memcpy(new, uwb_est, actual_size);
-	memset(new + actual_size, 0, actual_size);
-	kfree(uwb_est);
-	uwb_est = new;
-	uwb_est_size *= 2;
-	return 0;
-}
-
-
-/**
- * Register an event size table
- *
- * Makes room for it if the table is full, and then inserts  it in the
- * right position (entries are sorted by type, event_high, vendor and
- * then product).
- *
- * @vendor:  vendor code for matching against the device (0x0000 and
- *           0xffff mean any); use 0x0000 to force all to match without
- *           checking possible vendor specific ones, 0xfffff to match
- *           after checking vendor specific ones.
- *
- * @product: product code from that vendor; same matching rules, use
- *           0x0000 for not allowing vendor specific matches, 0xffff
- *           for allowing.
- *
- * This arragement just makes the tables sort differenty. Because the
- * table is sorted by growing type-event_high-vendor-product, a zero
- * vendor will match before than a 0x456a vendor, that will match
- * before a 0xfffff vendor.
- *
- * @returns 0 if ok, < 0 errno on error (-ENOENT if not found).
- */
-/* FIXME: add bus type to vendor/product code */
-int uwb_est_register(u8 type, u8 event_high, u16 vendor, u16 product,
-		     const struct uwb_est_entry *entry, size_t entries)
-{
-	unsigned long flags;
-	unsigned itr;
-	int result = 0;
-
-	write_lock_irqsave(&uwb_est_lock, flags);
-	if (uwb_est_used == uwb_est_size) {
-		result = uwb_est_grow();
-		if (result < 0)
-			goto out;
-	}
-	/* Find the right spot to insert it in */
-	for (itr = 0; itr < uwb_est_used; itr++)
-		if (uwb_est[itr].type_event_high < type
-		    && uwb_est[itr].vendor < vendor
-		    && uwb_est[itr].product < product)
-			break;
-
-	/* Shift others to make room for the new one? */
-	if (itr < uwb_est_used)
-		memmove(&uwb_est[itr+1], &uwb_est[itr], uwb_est_used - itr);
-	uwb_est[itr].type_event_high = type << 8 | event_high;
-	uwb_est[itr].vendor = vendor;
-	uwb_est[itr].product = product;
-	uwb_est[itr].entry = entry;
-	uwb_est[itr].entries = entries;
-	uwb_est_used++;
-out:
-	write_unlock_irqrestore(&uwb_est_lock, flags);
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_est_register);
-
-
-/**
- * Unregister an event size table
- *
- * This just removes the specified entry and moves the ones after it
- * to fill in the gap. This is needed to keep the list sorted; no
- * reallocation is done to reduce the size of the table.
- *
- * We unregister by all the data we used to register instead of by
- * pointer to the @entry array because we might have used the same
- * table for a bunch of IDs (for example).
- *
- * @returns 0 if ok, < 0 errno on error (-ENOENT if not found).
- */
-int uwb_est_unregister(u8 type, u8 event_high, u16 vendor, u16 product,
-		       const struct uwb_est_entry *entry, size_t entries)
-{
-	unsigned long flags;
-	unsigned itr;
-	struct uwb_est est_cmp = {
-		.type_event_high = type << 8 | event_high,
-		.vendor = vendor,
-		.product = product,
-		.entry = entry,
-		.entries = entries
-	};
-	write_lock_irqsave(&uwb_est_lock, flags);
-	for (itr = 0; itr < uwb_est_used; itr++)
-		if (!memcmp(&uwb_est[itr], &est_cmp, sizeof(est_cmp)))
-			goto found;
-	write_unlock_irqrestore(&uwb_est_lock, flags);
-	return -ENOENT;
-
-found:
-	if (itr < uwb_est_used - 1)	/* Not last one? move ones above */
-		memmove(&uwb_est[itr], &uwb_est[itr+1], uwb_est_used - itr - 1);
-	uwb_est_used--;
-	write_unlock_irqrestore(&uwb_est_lock, flags);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_est_unregister);
-
-
-/**
- * Get the size of an event from a table
- *
- * @rceb: pointer to the buffer with the event
- * @rceb_size: size of the area pointed to by @rceb in bytes.
- * @returns: > 0      Size of the event
- *	     -ENOSPC  An area big enough was not provided to look
- *		      ahead into the event's guts and guess the size.
- *	     -EINVAL  Unknown event code (wEvent).
- *
- * This will look at the received RCEB and guess what is the total
- * size. For variable sized events, it will look further ahead into
- * their length field to see how much data should be read.
- *
- * Note this size is *not* final--the neh (Notification/Event Handle)
- * might specificy an extra size to add.
- */
-static
-ssize_t uwb_est_get_size(struct uwb_rc *uwb_rc, struct uwb_est *est,
-			 u8 event_low, const struct uwb_rceb *rceb,
-			 size_t rceb_size)
-{
-	unsigned offset;
-	ssize_t size;
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	const struct uwb_est_entry *entry;
-
-	size = -ENOENT;
-	if (event_low >= est->entries) {	/* in range? */
-		dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: event %u out of range\n",
-			est, est->type_event_high, est->vendor, est->product,
-			est->entries, event_low);
-		goto out;
-	}
-	size = -ENOENT;
-	entry = &est->entry[event_low];
-	if (entry->size == 0 && entry->offset == 0) {	/* unknown? */
-		dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: event %u unknown\n",
-			est, est->type_event_high, est->vendor,	est->product,
-			est->entries, event_low);
-		goto out;
-	}
-	offset = entry->offset;	/* extra fries with that? */
-	if (offset == 0)
-		size = entry->size;
-	else {
-		/* Ops, got an extra size field at 'offset'--read it */
-		const void *ptr = rceb;
-		size_t type_size = 0;
-		offset--;
-		size = -ENOSPC;			/* enough data for more? */
-		switch (entry->type) {
-		case UWB_EST_16:  type_size = sizeof(__le16); break;
-		case UWB_EST_8:   type_size = sizeof(u8);     break;
-		default: 	 BUG();
-		}
-		if (offset + type_size > rceb_size) {
-			dev_err(dev, "EST %p 0x%04x/%04x/%04x[%u]: "
-				"not enough data to read extra size\n",
-				est, est->type_event_high, est->vendor,
-				est->product, est->entries);
-			goto out;
-		}
-		size = entry->size;
-		ptr += offset;
-		switch (entry->type) {
-		case UWB_EST_16:  size += le16_to_cpu(*(__le16 *)ptr); break;
-		case UWB_EST_8:   size += *(u8 *)ptr;                  break;
-		default: 	 BUG();
-		}
-	}
-out:
-	return size;
-}
-
-
-/**
- * Guesses the size of a WA event
- *
- * @rceb: pointer to the buffer with the event
- * @rceb_size: size of the area pointed to by @rceb in bytes.
- * @returns: > 0      Size of the event
- *	     -ENOSPC  An area big enough was not provided to look
- *		      ahead into the event's guts and guess the size.
- *	     -EINVAL  Unknown event code (wEvent).
- *
- * This will look at the received RCEB and guess what is the total
- * size by checking all the tables registered with
- * uwb_est_register(). For variable sized events, it will look further
- * ahead into their length field to see how much data should be read.
- *
- * Note this size is *not* final--the neh (Notification/Event Handle)
- * might specificy an extra size to add or replace.
- */
-ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
-			  size_t rceb_size)
-{
-	/* FIXME: add vendor/product data */
-	ssize_t size;
-	struct device *dev = &rc->uwb_dev.dev;
-	unsigned long flags;
-	unsigned itr;
-	u16 type_event_high, event;
-
-	read_lock_irqsave(&uwb_est_lock, flags);
-	size = -ENOSPC;
-	if (rceb_size < sizeof(*rceb))
-		goto out;
-	event = le16_to_cpu(rceb->wEvent);
-	type_event_high = rceb->bEventType << 8 | (event & 0xff00) >> 8;
-	for (itr = 0; itr < uwb_est_used; itr++) {
-		if (uwb_est[itr].type_event_high != type_event_high)
-			continue;
-		size = uwb_est_get_size(rc, &uwb_est[itr],
-					event & 0x00ff, rceb, rceb_size);
-		/* try more tables that might handle the same type */
-		if (size != -ENOENT)
-			goto out;
-	}
-	dev_dbg(dev,
-		"event 0x%02x/%04x/%02x: no handlers available; RCEB %4ph\n",
-		(unsigned) rceb->bEventType,
-		(unsigned) le16_to_cpu(rceb->wEvent),
-		(unsigned) rceb->bEventContext,
-		rceb);
-	size = -ENOENT;
-out:
-	read_unlock_irqrestore(&uwb_est_lock, flags);
-	return size;
-}
-EXPORT_SYMBOL_GPL(uwb_est_find_size);
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
deleted file mode 100644
index cd03b7f827c1..000000000000
--- a/drivers/uwb/hwa-rc.c
+++ /dev/null
@@ -1,929 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * WUSB Host Wire Adapter: Radio Control Interface (WUSB[8.6])
- * Radio Control command/event transport
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Initialize the Radio Control interface Driver.
- *
- * For each device probed, creates an 'struct hwarc' which contains
- * just the representation of the UWB Radio Controller, and the logic
- * for reading notifications and passing them to the UWB Core.
- *
- * So we initialize all of those, register the UWB Radio Controller
- * and setup the notification/event handle to pipe the notifications
- * to the UWB management Daemon.
- *
- * Command and event filtering.
- *
- * This is the driver for the Radio Control Interface described in WUSB
- * 1.0. The core UWB module assumes that all drivers are compliant to the
- * WHCI 0.95 specification. We thus create a filter that parses all
- * incoming messages from the (WUSB 1.0) device and manipulate them to
- * conform to the WHCI 0.95 specification. Similarly, outgoing messages
- * are parsed and manipulated to conform to the WUSB 1.0 compliant messages
- * that the device expects. Only a few messages are affected:
- * Affected events:
- *    UWB_RC_EVT_BEACON
- *    UWB_RC_EVT_BP_SLOT_CHANGE
- *    UWB_RC_EVT_DRP_AVAIL
- *    UWB_RC_EVT_DRP
- * Affected commands:
- *    UWB_RC_CMD_SCAN
- *    UWB_RC_CMD_SET_DRP_IE
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/usb.h>
-#include <linux/usb/wusb.h>
-#include <linux/usb/wusb-wa.h>
-#include <linux/uwb.h>
-
-#include "uwb-internal.h"
-
-/* The device uses commands and events from the WHCI specification, although
- * reporting itself as WUSB compliant. */
-#define WUSB_QUIRK_WHCI_CMD_EVT		0x01
-
-/**
- * Descriptor for an instance of the UWB Radio Control Driver that
- * attaches to the RCI interface of the Host Wired Adapter.
- *
- * Unless there is a lock specific to the 'data members', all access
- * is protected by uwb_rc->mutex.
- *
- * The NEEP (Notification/Event EndPoint) URB (@neep_urb) writes to
- * @rd_buffer. Note there is no locking because it is perfectly (heh!)
- * serialized--probe() submits an URB, callback is called, processes
- * the data (synchronously), submits another URB, and so on. There is
- * no concurrent access to the buffer.
- */
-struct hwarc {
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-	struct uwb_rc *uwb_rc;		/* UWB host controller */
-	struct urb *neep_urb;		/* Notification endpoint handling */
-	struct edc neep_edc;
-	void *rd_buffer;		/* NEEP read buffer */
-};
-
-
-/* Beacon received notification (WUSB 1.0 [8.6.3.2]) */
-struct uwb_rc_evt_beacon_WUSB_0100 {
-	struct uwb_rceb rceb;
-	u8	bChannelNumber;
-	__le16	wBPSTOffset;
-	u8	bLQI;
-	u8	bRSSI;
-	__le16	wBeaconInfoLength;
-	u8	BeaconInfo[];
-} __attribute__((packed));
-
-/**
- * Filter WUSB 1.0 BEACON RCV notification to be WHCI 0.95
- *
- * @header: the incoming event
- * @buf_size: size of buffer containing incoming event
- * @new_size: size of event after filtering completed
- *
- * The WHCI 0.95 spec has a "Beacon Type" field. This value is unknown at
- * the time we receive the beacon from WUSB so we just set it to
- * UWB_RC_BEACON_TYPE_NEIGHBOR as a default.
- * The solution below allocates memory upon receipt of every beacon from a
- * WUSB device. This will deteriorate performance. What is the right way to
- * do this?
- */
-static
-int hwarc_filter_evt_beacon_WUSB_0100(struct uwb_rc *rc,
-				      struct uwb_rceb **header,
-				      const size_t buf_size,
-				      size_t *new_size)
-{
-	struct uwb_rc_evt_beacon_WUSB_0100 *be;
-	struct uwb_rc_evt_beacon *newbe;
-	size_t bytes_left, ielength;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	be = container_of(*header, struct uwb_rc_evt_beacon_WUSB_0100, rceb);
-	bytes_left = buf_size;
-	if (bytes_left < sizeof(*be)) {
-		dev_err(dev, "Beacon Received Notification: Not enough data "
-			"to decode for filtering (%zu vs %zu bytes needed)\n",
-			bytes_left, sizeof(*be));
-		return -EINVAL;
-	}
-	bytes_left -= sizeof(*be);
-	ielength = le16_to_cpu(be->wBeaconInfoLength);
-	if (bytes_left < ielength) {
-		dev_err(dev, "Beacon Received Notification: Not enough data "
-			"to decode IEs (%zu vs %zu bytes needed)\n",
-			bytes_left, ielength);
-		return -EINVAL;
-	}
-	newbe = kzalloc(sizeof(*newbe) + ielength, GFP_ATOMIC);
-	if (newbe == NULL)
-		return -ENOMEM;
-	newbe->rceb = be->rceb;
-	newbe->bChannelNumber = be->bChannelNumber;
-	newbe->bBeaconType = UWB_RC_BEACON_TYPE_NEIGHBOR;
-	newbe->wBPSTOffset = be->wBPSTOffset;
-	newbe->bLQI = be->bLQI;
-	newbe->bRSSI = be->bRSSI;
-	newbe->wBeaconInfoLength = be->wBeaconInfoLength;
-	memcpy(newbe->BeaconInfo, be->BeaconInfo, ielength);
-	*header = &newbe->rceb;
-	*new_size = sizeof(*newbe) + ielength;
-	return 1;  /* calling function will free memory */
-}
-
-
-/* DRP Availability change notification (WUSB 1.0 [8.6.3.8]) */
-struct uwb_rc_evt_drp_avail_WUSB_0100 {
-	struct uwb_rceb rceb;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/**
- * Filter WUSB 1.0 DRP AVAILABILITY CHANGE notification to be WHCI 0.95
- *
- * @header: the incoming event
- * @buf_size: size of buffer containing incoming event
- * @new_size: size of event after filtering completed
- */
-static
-int hwarc_filter_evt_drp_avail_WUSB_0100(struct uwb_rc *rc,
-					 struct uwb_rceb **header,
-					 const size_t buf_size,
-					 size_t *new_size)
-{
-	struct uwb_rc_evt_drp_avail_WUSB_0100 *da;
-	struct uwb_rc_evt_drp_avail *newda;
-	struct uwb_ie_hdr *ie_hdr;
-	size_t bytes_left, ielength;
-	struct device *dev = &rc->uwb_dev.dev;
-
-
-	da = container_of(*header, struct uwb_rc_evt_drp_avail_WUSB_0100, rceb);
-	bytes_left = buf_size;
-	if (bytes_left < sizeof(*da)) {
-		dev_err(dev, "Not enough data to decode DRP Avail "
-			"Notification for filtering. Expected %zu, "
-			"received %zu.\n", (size_t)sizeof(*da), bytes_left);
-		return -EINVAL;
-	}
-	bytes_left -= sizeof(*da);
-	ielength = le16_to_cpu(da->wIELength);
-	if (bytes_left < ielength) {
-		dev_err(dev, "DRP Avail Notification filter: IE length "
-			"[%zu bytes] does not match actual length "
-			"[%zu bytes].\n", ielength, bytes_left);
-		return -EINVAL;
-	}
-	if (ielength < sizeof(*ie_hdr)) {
-		dev_err(dev, "DRP Avail Notification filter: Not enough "
-			"data to decode IE [%zu bytes, %zu needed]\n",
-			ielength, sizeof(*ie_hdr));
-		return -EINVAL;
-	}
-	ie_hdr = (void *) da->IEData;
-	if (ie_hdr->length > 32) {
-		dev_err(dev, "DRP Availability Change event has unexpected "
-			"length for filtering. Expected < 32 bytes, "
-			"got %zu bytes.\n", (size_t)ie_hdr->length);
-		return -EINVAL;
-	}
-	newda = kzalloc(sizeof(*newda), GFP_ATOMIC);
-	if (newda == NULL)
-		return -ENOMEM;
-	newda->rceb = da->rceb;
-	memcpy(newda->bmp, (u8 *) ie_hdr + sizeof(*ie_hdr), ie_hdr->length);
-	*header = &newda->rceb;
-	*new_size = sizeof(*newda);
-	return 1; /* calling function will free memory */
-}
-
-
-/* DRP notification (WUSB 1.0 [8.6.3.9]) */
-struct uwb_rc_evt_drp_WUSB_0100 {
-	struct uwb_rceb rceb;
-	struct uwb_dev_addr wSrcAddr;
-	u8 bExplicit;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/**
- * Filter WUSB 1.0 DRP Notification to be WHCI 0.95
- *
- * @header: the incoming event
- * @buf_size: size of buffer containing incoming event
- * @new_size: size of event after filtering completed
- *
- * It is hard to manage DRP reservations without having a Reason code.
- * Unfortunately there is none in the WUSB spec. We just set the default to
- * DRP IE RECEIVED.
- * We do not currently use the bBeaconSlotNumber value, so we set this to
- * zero for now.
- */
-static
-int hwarc_filter_evt_drp_WUSB_0100(struct uwb_rc *rc,
-				   struct uwb_rceb **header,
-				   const size_t buf_size,
-				   size_t *new_size)
-{
-	struct uwb_rc_evt_drp_WUSB_0100 *drpev;
-	struct uwb_rc_evt_drp *newdrpev;
-	size_t bytes_left, ielength;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	drpev = container_of(*header, struct uwb_rc_evt_drp_WUSB_0100, rceb);
-	bytes_left = buf_size;
-	if (bytes_left < sizeof(*drpev)) {
-		dev_err(dev, "Not enough data to decode DRP Notification "
-			"for filtering. Expected %zu, received %zu.\n",
-			(size_t)sizeof(*drpev), bytes_left);
-		return -EINVAL;
-	}
-	ielength = le16_to_cpu(drpev->wIELength);
-	bytes_left -= sizeof(*drpev);
-	if (bytes_left < ielength) {
-		dev_err(dev, "DRP Notification filter: header length [%zu "
-			"bytes] does not match actual length [%zu "
-			"bytes].\n", ielength, bytes_left);
-		return -EINVAL;
-	}
-	newdrpev = kzalloc(sizeof(*newdrpev) + ielength, GFP_ATOMIC);
-	if (newdrpev == NULL)
-		return -ENOMEM;
-	newdrpev->rceb = drpev->rceb;
-	newdrpev->src_addr = drpev->wSrcAddr;
-	newdrpev->reason = UWB_DRP_NOTIF_DRP_IE_RCVD;
-	newdrpev->beacon_slot_number = 0;
-	newdrpev->ie_length = drpev->wIELength;
-	memcpy(newdrpev->ie_data, drpev->IEData, ielength);
-	*header = &newdrpev->rceb;
-	*new_size = sizeof(*newdrpev) + ielength;
-	return 1; /* calling function will free memory */
-}
-
-
-/* Scan Command (WUSB 1.0 [8.6.2.5]) */
-struct uwb_rc_cmd_scan_WUSB_0100 {
-	struct uwb_rccb rccb;
-	u8 bChannelNumber;
-	u8 bScanState;
-} __attribute__((packed));
-
-/**
- * Filter WHCI 0.95 SCAN command to be WUSB 1.0 SCAN command
- *
- * @header:   command sent to device (compliant to WHCI 0.95)
- * @size:     size of command sent to device
- *
- * We only reduce the size by two bytes because the WUSB 1.0 scan command
- * does not have the last field (wStarttime). Also, make sure we don't send
- * the device an unexpected scan type.
- */
-static
-int hwarc_filter_cmd_scan_WUSB_0100(struct uwb_rc *rc,
-				    struct uwb_rccb **header,
-				    size_t *size)
-{
-	struct uwb_rc_cmd_scan *sc;
-
-	sc = container_of(*header, struct uwb_rc_cmd_scan, rccb);
-
-	if (sc->bScanState == UWB_SCAN_ONLY_STARTTIME)
-		sc->bScanState = UWB_SCAN_ONLY;
-	/* Don't send the last two bytes. */
-	*size -= 2;
-	return 0;
-}
-
-
-/* SET DRP IE command (WUSB 1.0 [8.6.2.7]) */
-struct uwb_rc_cmd_set_drp_ie_WUSB_0100 {
-	struct uwb_rccb rccb;
-	u8 bExplicit;
-	__le16 wIELength;
-	struct uwb_ie_drp IEData[];
-} __attribute__((packed));
-
-/**
- * Filter WHCI 0.95 SET DRP IE command to be WUSB 1.0 SET DRP IE command
- *
- * @header:   command sent to device (compliant to WHCI 0.95)
- * @size:     size of command sent to device
- *
- * WUSB has an extra bExplicit field - we assume always explicit
- * negotiation so this field is set. The command expected by the device is
- * thus larger than the one prepared by the driver so we need to
- * reallocate memory to accommodate this.
- * We trust the driver to send us the correct data so no checking is done
- * on incoming data - evn though it is variable length.
- */
-static
-int hwarc_filter_cmd_set_drp_ie_WUSB_0100(struct uwb_rc *rc,
-					  struct uwb_rccb **header,
-					  size_t *size)
-{
-	struct uwb_rc_cmd_set_drp_ie *orgcmd;
-	struct uwb_rc_cmd_set_drp_ie_WUSB_0100 *cmd;
-	size_t ielength;
-
-	orgcmd = container_of(*header, struct uwb_rc_cmd_set_drp_ie, rccb);
-	ielength = le16_to_cpu(orgcmd->wIELength);
-	cmd = kzalloc(sizeof(*cmd) + ielength, GFP_KERNEL);
-	if (cmd == NULL)
-		return -ENOMEM;
-	cmd->rccb = orgcmd->rccb;
-	cmd->bExplicit = 0;
-	cmd->wIELength = orgcmd->wIELength;
-	memcpy(cmd->IEData, orgcmd->IEData, ielength);
-	*header = &cmd->rccb;
-	*size = sizeof(*cmd) + ielength;
-	return 1; /* calling function will free memory */
-}
-
-
-/**
- * Filter data from WHCI driver to WUSB device
- *
- * @header: WHCI 0.95 compliant command from driver
- * @size:   length of command
- *
- * The routine managing commands to the device (uwb_rc_cmd()) will call the
- * filtering function pointer (if it exists) before it passes any data to
- * the device. At this time the command has been formatted according to
- * WHCI 0.95 and is ready to be sent to the device.
- *
- * The filter function will be provided with the current command and its
- * length. The function will manipulate the command if necessary and
- * potentially reallocate memory for a command that needed more memory that
- * the given command. If new memory was created the function will return 1
- * to indicate to the calling function that the memory need to be freed
- * when not needed any more. The size will contain the new length of the
- * command.
- * If memory has not been allocated we rely on the original mechanisms to
- * free the memory of the command - even when we reduce the value of size.
- */
-static
-int hwarc_filter_cmd_WUSB_0100(struct uwb_rc *rc, struct uwb_rccb **header,
-			       size_t *size)
-{
-	int result;
-	struct uwb_rccb *rccb = *header;
-	int cmd = le16_to_cpu(rccb->wCommand);
-	switch (cmd) {
-	case UWB_RC_CMD_SCAN:
-		result = hwarc_filter_cmd_scan_WUSB_0100(rc, header, size);
-		break;
-	case UWB_RC_CMD_SET_DRP_IE:
-		result = hwarc_filter_cmd_set_drp_ie_WUSB_0100(rc, header, size);
-		break;
-	default:
-		result = -ENOANO;
-		break;
-	}
-	return result;
-}
-
-
-/**
- * Filter data from WHCI driver to WUSB device
- *
- * @header: WHCI 0.95 compliant command from driver
- * @size:   length of command
- *
- * Filter commands based on which protocol the device supports. The WUSB
- * errata should be the same as WHCI 0.95 so we do not filter that here -
- * only WUSB 1.0.
- */
-static
-int hwarc_filter_cmd(struct uwb_rc *rc, struct uwb_rccb **header,
-		     size_t *size)
-{
-	int result = -ENOANO;
-	if (rc->version == 0x0100)
-		result = hwarc_filter_cmd_WUSB_0100(rc, header, size);
-	return result;
-}
-
-
-/**
- * Compute return value as sum of incoming value and value at given offset
- *
- * @rceb:      event for which we compute the size, it contains a variable
- *	       length field.
- * @core_size: size of the "non variable" part of the event
- * @offset:    place in event where the length of the variable part is stored
- * @buf_size: total length of buffer in which event arrived - we need to make
- *	       sure we read the offset in memory that is still part of the event
- */
-static
-ssize_t hwarc_get_event_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
-			     size_t core_size, size_t offset,
-			     const size_t buf_size)
-{
-	ssize_t size = -ENOSPC;
-	const void *ptr = rceb;
-	size_t type_size = sizeof(__le16);
-	struct device *dev = &rc->uwb_dev.dev;
-
-	if (offset + type_size >= buf_size) {
-		dev_err(dev, "Not enough data to read extra size of event "
-			"0x%02x/%04x/%02x, only got %zu bytes.\n",
-			rceb->bEventType, le16_to_cpu(rceb->wEvent),
-			rceb->bEventContext, buf_size);
-		goto out;
-	}
-	ptr += offset;
-	size = core_size + le16_to_cpu(*(__le16 *)ptr);
-out:
-	return size;
-}
-
-
-/* Beacon slot change notification (WUSB 1.0 [8.6.3.5]) */
-struct uwb_rc_evt_bp_slot_change_WUSB_0100 {
-	struct uwb_rceb rceb;
-	u8 bSlotNumber;
-} __attribute__((packed));
-
-
-/**
- * Filter data from WUSB device to WHCI driver
- *
- * @header:	 incoming event
- * @buf_size:	 size of buffer in which event arrived
- * @_event_size: actual size of event in the buffer
- * @new_size:	 size of event after filtered
- *
- * We don't know how the buffer is constructed - there may be more than one
- * event in it so buffer length does not determine event length. We first
- * determine the expected size of the incoming event. This value is passed
- * back only if the actual filtering succeeded (so we know the computed
- * expected size is correct). This value will be zero if
- * the event did not need any filtering.
- *
- * WHCI interprets the BP Slot Change event's data differently than
- * WUSB. The event sizes are exactly the same. The data field
- * indicates the new beacon slot in which a RC is transmitting its
- * beacon. The maximum value of this is 96 (wMacBPLength ECMA-368
- * 17.16 (Table 117)). We thus know that the WUSB value will not set
- * the bit bNoSlot, so we don't really do anything (placeholder).
- */
-static
-int hwarc_filter_event_WUSB_0100(struct uwb_rc *rc, struct uwb_rceb **header,
-				 const size_t buf_size, size_t *_real_size,
-				 size_t *_new_size)
-{
-	int result = -ENOANO;
-	struct uwb_rceb *rceb = *header;
-	int event = le16_to_cpu(rceb->wEvent);
-	ssize_t event_size;
-	size_t core_size, offset;
-
-	if (rceb->bEventType != UWB_RC_CET_GENERAL)
-		goto out;
-	switch (event) {
-	case UWB_RC_EVT_BEACON:
-		core_size = sizeof(struct uwb_rc_evt_beacon_WUSB_0100);
-		offset = offsetof(struct uwb_rc_evt_beacon_WUSB_0100,
-				  wBeaconInfoLength);
-		event_size = hwarc_get_event_size(rc, rceb, core_size,
-						  offset, buf_size);
-		if (event_size < 0)
-			goto out;
-		*_real_size = event_size;
-		result = hwarc_filter_evt_beacon_WUSB_0100(rc, header,
-							   buf_size, _new_size);
-		break;
-	case UWB_RC_EVT_BP_SLOT_CHANGE:
-		*_new_size = *_real_size =
-			sizeof(struct uwb_rc_evt_bp_slot_change_WUSB_0100);
-		result = 0;
-		break;
-
-	case UWB_RC_EVT_DRP_AVAIL:
-		core_size = sizeof(struct uwb_rc_evt_drp_avail_WUSB_0100);
-		offset = offsetof(struct uwb_rc_evt_drp_avail_WUSB_0100,
-				  wIELength);
-		event_size = hwarc_get_event_size(rc, rceb, core_size,
-						  offset, buf_size);
-		if (event_size < 0)
-			goto out;
-		*_real_size = event_size;
-		result = hwarc_filter_evt_drp_avail_WUSB_0100(
-			rc, header, buf_size, _new_size);
-		break;
-
-	case UWB_RC_EVT_DRP:
-		core_size = sizeof(struct uwb_rc_evt_drp_WUSB_0100);
-		offset = offsetof(struct uwb_rc_evt_drp_WUSB_0100, wIELength);
-		event_size = hwarc_get_event_size(rc, rceb, core_size,
-						  offset, buf_size);
-		if (event_size < 0)
-			goto out;
-		*_real_size = event_size;
-		result = hwarc_filter_evt_drp_WUSB_0100(rc, header,
-							buf_size, _new_size);
-		break;
-
-	default:
-		break;
-	}
-out:
-	return result;
-}
-
-/**
- * Filter data from WUSB device to WHCI driver
- *
- * @header:	 incoming event
- * @buf_size:	 size of buffer in which event arrived
- * @_event_size: actual size of event in the buffer
- * @_new_size:	 size of event after filtered
- *
- * Filter events based on which protocol the device supports. The WUSB
- * errata should be the same as WHCI 0.95 so we do not filter that here -
- * only WUSB 1.0.
- *
- * If we don't handle it, we return -ENOANO (why the weird error code?
- * well, so if I get it, I can pinpoint in the code that raised
- * it...after all, not too many places use the higher error codes).
- */
-static
-int hwarc_filter_event(struct uwb_rc *rc, struct uwb_rceb **header,
-		       const size_t buf_size, size_t *_real_size,
-		       size_t *_new_size)
-{
-	int result = -ENOANO;
-	if (rc->version == 0x0100)
-		result =  hwarc_filter_event_WUSB_0100(
-			rc, header, buf_size, _real_size, _new_size);
-	return result;
-}
-
-
-/**
- * Execute an UWB RC command on HWA
- *
- * @rc:	      Instance of a Radio Controller that is a HWA
- * @cmd:      Buffer containing the RCCB and payload to execute
- * @cmd_size: Size of the command buffer.
- *
- * NOTE: rc's mutex has to be locked
- */
-static
-int hwarc_cmd(struct uwb_rc *uwb_rc, const struct uwb_rccb *cmd, size_t cmd_size)
-{
-	struct hwarc *hwarc = uwb_rc->priv;
-	return usb_control_msg(
-		hwarc->usb_dev, usb_sndctrlpipe(hwarc->usb_dev, 0),
-		WA_EXEC_RC_CMD, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE,
-		0, hwarc->usb_iface->cur_altsetting->desc.bInterfaceNumber,
-		(void *) cmd, cmd_size, 100 /* FIXME: this is totally arbitrary */);
-}
-
-static
-int hwarc_reset(struct uwb_rc *uwb_rc)
-{
-	struct hwarc *hwarc = uwb_rc->priv;
-	int result;
-
-	/* device lock must be held when calling usb_reset_device. */
-	result = usb_lock_device_for_reset(hwarc->usb_dev, NULL);
-	if (result >= 0) {
-		result = usb_reset_device(hwarc->usb_dev);
-		usb_unlock_device(hwarc->usb_dev);
-	}
-
-	return result;
-}
-
-/**
- * Callback for the notification and event endpoint
- *
- * Check's that everything is fine and then passes the read data to
- * the notification/event handling mechanism (neh).
- */
-static
-void hwarc_neep_cb(struct urb *urb)
-{
-	struct hwarc *hwarc = urb->context;
-	struct usb_interface *usb_iface = hwarc->usb_iface;
-	struct device *dev = &usb_iface->dev;
-	int result;
-
-	switch (result = urb->status) {
-	case 0:
-		uwb_rc_neh_grok(hwarc->uwb_rc, urb->transfer_buffer,
-				urb->actual_length);
-		break;
-	case -ECONNRESET:	/* Not an error, but a controlled situation; */
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-		goto out;
-	case -ESHUTDOWN:	/* going away! */
-		goto out;
-	default:		/* On general errors, retry unless it gets ugly */
-		if (edc_inc(&hwarc->neep_edc, EDC_MAX_ERRORS,
-			    EDC_ERROR_TIMEFRAME))
-			goto error_exceeded;
-		dev_err(dev, "NEEP: URB error %d\n", urb->status);
-	}
-	result = usb_submit_urb(urb, GFP_ATOMIC);
-	if (result < 0 && result != -ENODEV && result != -EPERM) {
-		/* ignoring unrecoverable errors */
-		dev_err(dev, "NEEP: Can't resubmit URB (%d) resetting device\n",
-			result);
-		goto error;
-	}
-out:
-	return;
-
-error_exceeded:
-	dev_err(dev, "NEEP: URB max acceptable errors "
-		"exceeded, resetting device\n");
-error:
-	uwb_rc_neh_error(hwarc->uwb_rc, result);
-	uwb_rc_reset_all(hwarc->uwb_rc);
-	return;
-}
-
-static void hwarc_init(struct hwarc *hwarc)
-{
-	edc_init(&hwarc->neep_edc);
-}
-
-/**
- * Initialize the notification/event endpoint stuff
- *
- * Note this is effectively a parallel thread; it knows that
- * hwarc->uwb_rc always exists because the existence of a 'hwarc'
- * means that there is a reverence on the hwarc->uwb_rc (see
- * _probe()), and thus _neep_cb() can execute safely.
- */
-static int hwarc_neep_init(struct uwb_rc *rc)
-{
-	struct hwarc *hwarc = rc->priv;
-	struct usb_interface *iface = hwarc->usb_iface;
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-	struct device *dev = &iface->dev;
-	int result;
-	struct usb_endpoint_descriptor *epd;
-
-	epd = &iface->cur_altsetting->endpoint[0].desc;
-	hwarc->rd_buffer = (void *) __get_free_page(GFP_KERNEL);
-	if (hwarc->rd_buffer == NULL) {
-		dev_err(dev, "Unable to allocate notification's read buffer\n");
-		goto error_rd_buffer;
-	}
-	hwarc->neep_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (hwarc->neep_urb == NULL)
-		goto error_urb_alloc;
-	usb_fill_int_urb(hwarc->neep_urb, usb_dev,
-			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
-			 hwarc->rd_buffer, PAGE_SIZE,
-			 hwarc_neep_cb, hwarc, epd->bInterval);
-	result = usb_submit_urb(hwarc->neep_urb, GFP_ATOMIC);
-	if (result < 0) {
-		dev_err(dev, "Cannot submit notification URB: %d\n", result);
-		goto error_neep_submit;
-	}
-	return 0;
-
-error_neep_submit:
-	usb_free_urb(hwarc->neep_urb);
-	hwarc->neep_urb = NULL;
-error_urb_alloc:
-	free_page((unsigned long)hwarc->rd_buffer);
-	hwarc->rd_buffer = NULL;
-error_rd_buffer:
-	return -ENOMEM;
-}
-
-
-/** Clean up all the notification endpoint resources */
-static void hwarc_neep_release(struct uwb_rc *rc)
-{
-	struct hwarc *hwarc = rc->priv;
-
-	usb_kill_urb(hwarc->neep_urb);
-	usb_free_urb(hwarc->neep_urb);
-	hwarc->neep_urb = NULL;
-
-	free_page((unsigned long)hwarc->rd_buffer);
-	hwarc->rd_buffer = NULL;
-}
-
-/**
- * Get the version from class-specific descriptor
- *
- * NOTE: this descriptor comes with the big bundled configuration
- *	 descriptor that includes the interfaces' and endpoints', so
- *	 we just look for it in the cached copy kept by the USB stack.
- *
- * NOTE2: We convert LE fields to CPU order.
- */
-static int hwarc_get_version(struct uwb_rc *rc)
-{
-	int result;
-
-	struct hwarc *hwarc = rc->priv;
-	struct uwb_rc_control_intf_class_desc *descr;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct usb_device *usb_dev = hwarc->usb_dev;
-	char *itr;
-	struct usb_descriptor_header *hdr;
-	size_t itr_size, actconfig_idx;
-	u16 version;
-
-	actconfig_idx = (usb_dev->actconfig - usb_dev->config) /
-		sizeof(usb_dev->config[0]);
-	itr = usb_dev->rawdescriptors[actconfig_idx];
-	itr_size = le16_to_cpu(usb_dev->actconfig->desc.wTotalLength);
-	while (itr_size >= sizeof(*hdr)) {
-		hdr = (struct usb_descriptor_header *) itr;
-		dev_dbg(dev, "Extra device descriptor: "
-			"type %02x/%u bytes @ %zu (%zu left)\n",
-			hdr->bDescriptorType, hdr->bLength,
-			(itr - usb_dev->rawdescriptors[actconfig_idx]),
-			itr_size);
-		if (hdr->bDescriptorType == USB_DT_CS_RADIO_CONTROL)
-			goto found;
-		itr += hdr->bLength;
-		itr_size -= hdr->bLength;
-	}
-	dev_err(dev, "cannot find Radio Control Interface Class descriptor\n");
-	return -ENODEV;
-
-found:
-	result = -EINVAL;
-	if (hdr->bLength > itr_size) {	/* is it available? */
-		dev_err(dev, "incomplete Radio Control Interface Class "
-			"descriptor (%zu bytes left, %u needed)\n",
-			itr_size, hdr->bLength);
-		goto error;
-	}
-	if (hdr->bLength < sizeof(*descr)) {
-		dev_err(dev, "short Radio Control Interface Class "
-			"descriptor\n");
-		goto error;
-	}
-	descr = (struct uwb_rc_control_intf_class_desc *) hdr;
-	/* Make LE fields CPU order */
-	version = __le16_to_cpu(descr->bcdRCIVersion);
-	if (version != 0x0100) {
-		dev_err(dev, "Device reports protocol version 0x%04x. We "
-			"do not support that. \n", version);
-		result = -EINVAL;
-		goto error;
-	}
-	rc->version = version;
-	dev_dbg(dev, "Device supports WUSB protocol version 0x%04x \n",	rc->version);
-	result = 0;
-error:
-	return result;
-}
-
-/*
- * By creating a 'uwb_rc', we have a reference on it -- that reference
- * is the one we drop when we disconnect.
- *
- * No need to switch altsettings; according to WUSB1.0[8.6.1.1], there
- * is only one altsetting allowed.
- */
-static int hwarc_probe(struct usb_interface *iface,
-		       const struct usb_device_id *id)
-{
-	int result;
-	struct uwb_rc *uwb_rc;
-	struct hwarc *hwarc;
-	struct device *dev = &iface->dev;
-
-	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
-		return -ENODEV;
-	if (!usb_endpoint_xfer_int(&iface->cur_altsetting->endpoint[0].desc))
-		return -ENODEV;
-
-	result = -ENOMEM;
-	uwb_rc = uwb_rc_alloc();
-	if (uwb_rc == NULL) {
-		dev_err(dev, "unable to allocate RC instance\n");
-		goto error_rc_alloc;
-	}
-	hwarc = kzalloc(sizeof(*hwarc), GFP_KERNEL);
-	if (hwarc == NULL) {
-		dev_err(dev, "unable to allocate HWA RC instance\n");
-		goto error_alloc;
-	}
-	hwarc_init(hwarc);
-	hwarc->usb_dev = usb_get_dev(interface_to_usbdev(iface));
-	hwarc->usb_iface = usb_get_intf(iface);
-	hwarc->uwb_rc = uwb_rc;
-
-	uwb_rc->owner = THIS_MODULE;
-	uwb_rc->start = hwarc_neep_init;
-	uwb_rc->stop  = hwarc_neep_release;
-	uwb_rc->cmd   = hwarc_cmd;
-	uwb_rc->reset = hwarc_reset;
-	if (id->driver_info & WUSB_QUIRK_WHCI_CMD_EVT) {
-		uwb_rc->filter_cmd   = NULL;
-		uwb_rc->filter_event = NULL;
-	} else {
-		uwb_rc->filter_cmd   = hwarc_filter_cmd;
-		uwb_rc->filter_event = hwarc_filter_event;
-	}
-
-	result = uwb_rc_add(uwb_rc, dev, hwarc);
-	if (result < 0)
-		goto error_rc_add;
-	result = hwarc_get_version(uwb_rc);
-	if (result < 0) {
-		dev_err(dev, "cannot retrieve version of RC \n");
-		goto error_get_version;
-	}
-	usb_set_intfdata(iface, hwarc);
-	return 0;
-
-error_get_version:
-	uwb_rc_rm(uwb_rc);
-error_rc_add:
-	usb_put_intf(iface);
-	usb_put_dev(hwarc->usb_dev);
-	kfree(hwarc);
-error_alloc:
-	uwb_rc_put(uwb_rc);
-error_rc_alloc:
-	return result;
-}
-
-static void hwarc_disconnect(struct usb_interface *iface)
-{
-	struct hwarc *hwarc = usb_get_intfdata(iface);
-	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
-
-	usb_set_intfdata(hwarc->usb_iface, NULL);
-	uwb_rc_rm(uwb_rc);
-	usb_put_intf(hwarc->usb_iface);
-	usb_put_dev(hwarc->usb_dev);
-	kfree(hwarc);
-	uwb_rc_put(uwb_rc);	/* when creating the device, refcount = 1 */
-}
-
-static int hwarc_pre_reset(struct usb_interface *iface)
-{
-	struct hwarc *hwarc = usb_get_intfdata(iface);
-	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
-
-	uwb_rc_pre_reset(uwb_rc);
-	return 0;
-}
-
-static int hwarc_post_reset(struct usb_interface *iface)
-{
-	struct hwarc *hwarc = usb_get_intfdata(iface);
-	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
-
-	return uwb_rc_post_reset(uwb_rc);
-}
-
-/** USB device ID's that we handle */
-static const struct usb_device_id hwarc_id_table[] = {
-	/* D-Link DUB-1210 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3d02, 0xe0, 0x01, 0x02),
-	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
-	/* Intel i1480 (using firmware 1.3PA2-20070828) */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x8086, 0x0c3b, 0xe0, 0x01, 0x02),
-	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
-	/* Alereon 5310 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5310, 0xe0, 0x01, 0x02),
-	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
-	/* Alereon 5611 */
-	{ USB_DEVICE_AND_INTERFACE_INFO(0x13dc, 0x5611, 0xe0, 0x01, 0x02),
-	  .driver_info = WUSB_QUIRK_WHCI_CMD_EVT },
-	/* Generic match for the Radio Control interface */
-	{ USB_INTERFACE_INFO(0xe0, 0x01, 0x02), },
-	{ },
-};
-MODULE_DEVICE_TABLE(usb, hwarc_id_table);
-
-static struct usb_driver hwarc_driver = {
-	.name =		"hwa-rc",
-	.id_table =	hwarc_id_table,
-	.probe =	hwarc_probe,
-	.disconnect =	hwarc_disconnect,
-	.pre_reset =    hwarc_pre_reset,
-	.post_reset =   hwarc_post_reset,
-};
-
-module_usb_driver(hwarc_driver);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Host Wireless Adapter Radio Control Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/i1480/Makefile b/drivers/uwb/i1480/Makefile
deleted file mode 100644
index d26fb9b845ae..000000000000
--- a/drivers/uwb/i1480/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_UWB_I1480U)	+= dfu/ i1480-est.o
diff --git a/drivers/uwb/i1480/dfu/Makefile b/drivers/uwb/i1480/dfu/Makefile
deleted file mode 100644
index 4739fdac5922..000000000000
--- a/drivers/uwb/i1480/dfu/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_UWB_I1480U)	+= i1480-dfu-usb.o
-
-i1480-dfu-usb-objs := \
-	dfu.o 	\
-	mac.o	\
-	phy.o	\
-	usb.o
-
-
diff --git a/drivers/uwb/i1480/dfu/dfu.c b/drivers/uwb/i1480/dfu/dfu.c
deleted file mode 100644
index ec1af858ead9..000000000000
--- a/drivers/uwb/i1480/dfu/dfu.c
+++ /dev/null
@@ -1,198 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless UWB Link 1480
- * Main driver
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Common code for firmware upload used by the USB and PCI version;
- * i1480_fw_upload() takes a device descriptor and uses the function
- * pointers it provides to upload firmware and prepare the PHY.
- *
- * As well, provides common functions used by the rest of the code.
- */
-#include "i1480-dfu.h"
-#include <linux/errno.h>
-#include <linux/delay.h>
-#include <linux/pci.h>
-#include <linux/device.h>
-#include <linux/uwb.h>
-#include <linux/random.h>
-#include <linux/export.h>
-
-/*
- * i1480_rceb_check - Check RCEB for expected field values
- * @i1480: pointer to device for which RCEB is being checked
- * @rceb: RCEB being checked
- * @cmd: which command the RCEB is related to
- * @context: expected context
- * @expected_type: expected event type
- * @expected_event: expected event
- *
- * If @cmd is NULL, do not print error messages, but still return an error
- * code.
- *
- * Return 0 if @rceb matches the expected values, -EINVAL otherwise.
- */
-int i1480_rceb_check(const struct i1480 *i1480, const struct uwb_rceb *rceb,
-		     const char *cmd, u8 context, u8 expected_type,
-		     unsigned expected_event)
-{
-	int result = 0;
-	struct device *dev = i1480->dev;
-	if (rceb->bEventContext != context) {
-		if (cmd)
-			dev_err(dev, "%s: unexpected context id 0x%02x "
-				"(expected 0x%02x)\n", cmd,
-				rceb->bEventContext, context);
-		result = -EINVAL;
-	}
-	if (rceb->bEventType != expected_type) {
-		if (cmd)
-			dev_err(dev, "%s: unexpected event type 0x%02x "
-				"(expected 0x%02x)\n", cmd,
-				rceb->bEventType, expected_type);
-		result = -EINVAL;
-	}
-	if (le16_to_cpu(rceb->wEvent) != expected_event) {
-		if (cmd)
-			dev_err(dev, "%s: unexpected event 0x%04x "
-				"(expected 0x%04x)\n", cmd,
-				le16_to_cpu(rceb->wEvent), expected_event);
-		result = -EINVAL;
-	}
-	return result;
-}
-EXPORT_SYMBOL_GPL(i1480_rceb_check);
-
-
-/*
- * Execute a Radio Control Command
- *
- * Command data has to be in i1480->cmd_buf.
- *
- * @returns size of the reply data filled in i1480->evt_buf or < 0 errno
- *          code on error.
- */
-ssize_t i1480_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size,
-		  size_t reply_size)
-{
-	ssize_t result;
-	struct uwb_rceb *reply = i1480->evt_buf;
-	struct uwb_rccb *cmd = i1480->cmd_buf;
-	u16 expected_event = reply->wEvent;
-	u8 expected_type = reply->bEventType;
-	u8 context;
-
-	init_completion(&i1480->evt_complete);
-	i1480->evt_result = -EINPROGRESS;
-	do {
-		get_random_bytes(&context, 1);
-	} while (context == 0x00 || context == 0xff);
-	cmd->bCommandContext = context;
-	result = i1480->cmd(i1480, cmd_name, cmd_size);
-	if (result < 0)
-		goto error;
-	/* wait for the callback to report a event was received */
-	result = wait_for_completion_interruptible_timeout(
-		&i1480->evt_complete, HZ);
-	if (result == 0) {
-		result = -ETIMEDOUT;
-		goto error;
-	}
-	if (result < 0)
-		goto error;
-	result = i1480->evt_result;
-	if (result < 0) {
-		dev_err(i1480->dev, "%s: command reply reception failed: %zd\n",
-			cmd_name, result);
-		goto error;
-	}
-	/*
-	 * Firmware versions >= 1.4.12224 for IOGear GUWA100U generate a
-	 * spurious notification after firmware is downloaded. So check whether
-	 * the receibed RCEB is such notification before assuming that the
-	 * command has failed.
-	 */
-	if (i1480_rceb_check(i1480, i1480->evt_buf, NULL,
-			     0, 0xfd, 0x0022) == 0) {
-		/* Now wait for the actual RCEB for this command. */
-		result = i1480->wait_init_done(i1480);
-		if (result < 0)
-			goto error;
-		result = i1480->evt_result;
-	}
-	if (result != reply_size) {
-		dev_err(i1480->dev, "%s returned only %zu bytes, %zu expected\n",
-			cmd_name, result, reply_size);
-		result = -EINVAL;
-		goto error;
-	}
-	/* Verify we got the right event in response */
-	result = i1480_rceb_check(i1480, i1480->evt_buf, cmd_name, context,
-				  expected_type, expected_event);
-error:
-	return result;
-}
-EXPORT_SYMBOL_GPL(i1480_cmd);
-
-
-static
-int i1480_print_state(struct i1480 *i1480)
-{
-	int result;
-	u32 *buf = (u32 *) i1480->cmd_buf;
-
-	result = i1480->read(i1480, 0x80080000, 2 * sizeof(*buf));
-	if (result < 0) {
-		dev_err(i1480->dev, "cannot read U & L states: %d\n", result);
-		goto error;
-	}
-	dev_info(i1480->dev, "state U 0x%08x, L 0x%08x\n", buf[0], buf[1]);
-error:
-	return result;
-}
-
-
-/*
- * PCI probe, firmware uploader
- *
- * _mac_fw_upload() will call rc_setup(), which needs an rc_release().
- */
-int i1480_fw_upload(struct i1480 *i1480)
-{
-	int result;
-
-	result = i1480_pre_fw_upload(i1480);	/* PHY pre fw */
-	if (result < 0 && result != -ENOENT) {
-		i1480_print_state(i1480);
-		goto error;
-	}
-	result = i1480_mac_fw_upload(i1480);	/* MAC fw */
-	if (result < 0) {
-		if (result == -ENOENT)
-			dev_err(i1480->dev, "Cannot locate MAC FW file '%s'\n",
-				i1480->mac_fw_name);
-		else
-			i1480_print_state(i1480);
-		goto error;
-	}
-	result = i1480_phy_fw_upload(i1480);	/* PHY fw */
-	if (result < 0 && result != -ENOENT) {
-		i1480_print_state(i1480);
-		goto error_rc_release;
-	}
-	/*
-	 * FIXME: find some reliable way to check whether firmware is running
-	 * properly. Maybe use some standard request that has no side effects?
-	 */
-	dev_info(i1480->dev, "firmware uploaded successfully\n");
-error_rc_release:
-	if (i1480->rc_release)
-		i1480->rc_release(i1480);
-	result = 0;
-error:
-	return result;
-}
-EXPORT_SYMBOL_GPL(i1480_fw_upload);
diff --git a/drivers/uwb/i1480/dfu/i1480-dfu.h b/drivers/uwb/i1480/dfu/i1480-dfu.h
deleted file mode 100644
index 9dd567d174b3..000000000000
--- a/drivers/uwb/i1480/dfu/i1480-dfu.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * i1480 Device Firmware Upload
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This driver is the firmware uploader for the Intel Wireless UWB
- * Link 1480 device (both in the USB and PCI incarnations).
- *
- * The process is quite simple: we stop the device, write the firmware
- * to its memory and then restart it. Wait for the device to let us
- * know it is done booting firmware. Ready.
- *
- * We might have to upload before or after a phy firmware (which might
- * be done in two methods, using a normal firmware image or through
- * the MPI port).
- *
- * Because USB and PCI use common methods, we just make ops out of the
- * common operations (read, write, wait_init_done and cmd) and
- * implement them in usb.c and pci.c.
- *
- * The flow is (some parts omitted):
- *
- * i1480_{usb,pci}_probe()	  On enumerate/discovery
- *   i1480_fw_upload()
- *     i1480_pre_fw_upload()
- *       __mac_fw_upload()
- *         fw_hdrs_load()
- *         mac_fw_hdrs_push()
- *           i1480->write()       [i1480_{usb,pci}_write()]
- *           i1480_fw_cmp()
- *             i1480->read()      [i1480_{usb,pci}_read()]
- *     i1480_mac_fw_upload()
- *       __mac_fw_upload()
- *       i1480->setup(()
- *       i1480->wait_init_done()
- *       i1480_cmd_reset()
- *         i1480->cmd()           [i1480_{usb,pci}_cmd()]
- *         ...
- *     i1480_phy_fw_upload()
- *       request_firmware()
- *       i1480_mpi_write()
- *         i1480->cmd()           [i1480_{usb,pci}_cmd()]
- *
- * Once the probe function enumerates the device and uploads the
- * firmware, we just exit with -ENODEV, as we don't really want to
- * attach to the device.
- */
-#ifndef __i1480_DFU_H__
-#define __i1480_DFU_H__
-
-#include <linux/uwb/spec.h>
-#include <linux/types.h>
-#include <linux/completion.h>
-
-#define i1480_FW_UPLOAD_MODE_MASK (cpu_to_le32(0x00000018))
-
-#if i1480_FW > 0x00000302
-#define i1480_RCEB_EXTENDED
-#endif
-
-struct uwb_rccb;
-struct uwb_rceb;
-
-/*
- * Common firmware upload handlers
- *
- * Normally you embed this struct in another one specific to your hw.
- *
- * @write	Write to device's memory from buffer.
- * @read	Read from device's memory to i1480->evt_buf.
- * @setup	Setup device after basic firmware is uploaded
- * @wait_init_done
- *              Wait for the device to send a notification saying init
- *              is done.
- * @cmd         FOP for issuing the command to the hardware. The
- *              command data is contained in i1480->cmd_buf and the size
- *              is supplied as an argument. The command replied is put
- *              in i1480->evt_buf and the size in i1480->evt_result (or if
- *              an error, a < 0 errno code).
- *
- * @cmd_buf	Memory buffer used to send commands to the device.
- *              Allocated by the upper layers i1480_fw_upload().
- *              Size has to be @buf_size.
- * @evt_buf	Memory buffer used to place the async notifications
- *              received by the hw. Allocated by the upper layers
- *              i1480_fw_upload().
- *              Size has to be @buf_size.
- * @cmd_complete
- *              Low level driver uses this to notify code waiting afor
- *              an event that the event has arrived and data is in
- *              i1480->evt_buf (and size/result in i1480->evt_result).
- * @hw_rev
- *              Use this value to activate dfu code to support new revisions
- *              of hardware.  i1480_init() sets this to a default value.
- *              It should be updated by the USB and PCI code.
- */
-struct i1480 {
-	struct device *dev;
-
-	int (*write)(struct i1480 *, u32 addr, const void *, size_t);
-	int (*read)(struct i1480 *, u32 addr, size_t);
-	int (*rc_setup)(struct i1480 *);
-	void (*rc_release)(struct i1480 *);
-	int (*wait_init_done)(struct i1480 *);
-	int (*cmd)(struct i1480 *, const char *cmd_name, size_t cmd_size);
-	const char *pre_fw_name;
-	const char *mac_fw_name;
-	const char *mac_fw_name_deprecate;	/* FIXME: Will go away */
-	const char *phy_fw_name;
-	u8 hw_rev;
-
-	size_t buf_size;	/* size of both evt_buf and cmd_buf */
-	void *evt_buf, *cmd_buf;
-	ssize_t evt_result;
-	struct completion evt_complete;
-};
-
-static inline
-void i1480_init(struct i1480 *i1480)
-{
-	i1480->hw_rev = 1;
-	init_completion(&i1480->evt_complete);
-}
-
-extern int i1480_fw_upload(struct i1480 *);
-extern int i1480_pre_fw_upload(struct i1480 *);
-extern int i1480_mac_fw_upload(struct i1480 *);
-extern int i1480_phy_fw_upload(struct i1480 *);
-extern ssize_t i1480_cmd(struct i1480 *, const char *, size_t, size_t);
-extern int i1480_rceb_check(const struct i1480 *,
-			    const struct uwb_rceb *, const char *, u8,
-			    u8, unsigned);
-
-enum {
-	/* Vendor specific command type */
-	i1480_CET_VS1 = 		0xfd,
-	/* i1480 commands */
-	i1480_CMD_SET_IP_MAS = 		0x000e,
-	i1480_CMD_GET_MAC_PHY_INFO = 	0x0003,
-	i1480_CMD_MPI_WRITE =		0x000f,
-	i1480_CMD_MPI_READ = 		0x0010,
-	/* i1480 events */
-#if i1480_FW > 0x00000302
-	i1480_EVT_CONFIRM = 		0x0002,
-	i1480_EVT_RM_INIT_DONE = 	0x0101,
-	i1480_EVT_DEV_ADD = 		0x0103,
-	i1480_EVT_DEV_RM = 		0x0104,
-	i1480_EVT_DEV_ID_CHANGE = 	0x0105,
-	i1480_EVT_GET_MAC_PHY_INFO =	i1480_CMD_GET_MAC_PHY_INFO,
-#else
-	i1480_EVT_CONFIRM = 		0x0002,
-	i1480_EVT_RM_INIT_DONE = 	0x0101,
-	i1480_EVT_DEV_ADD = 		0x0103,
-	i1480_EVT_DEV_RM = 		0x0104,
-	i1480_EVT_DEV_ID_CHANGE = 	0x0105,
-	i1480_EVT_GET_MAC_PHY_INFO =	i1480_EVT_CONFIRM,
-#endif
-};
-
-
-struct i1480_evt_confirm {
-	struct uwb_rceb rceb;
-#ifdef i1480_RCEB_EXTENDED
-	__le16 wParamLength;
-#endif
-	u8 bResultCode;
-} __attribute__((packed));
-
-
-struct i1480_rceb {
-	struct uwb_rceb rceb;
-#ifdef i1480_RCEB_EXTENDED
-	__le16 wParamLength;
-#endif
-} __attribute__((packed));
-
-
-/**
- * Get MAC & PHY Information confirm event structure
- *
- * Confirm event returned by the command.
- */
-struct i1480_evt_confirm_GMPI {
-#if i1480_FW > 0x00000302
-	struct uwb_rceb rceb;
-	__le16 wParamLength;
-	__le16 status;
-	u8 mac_addr[6];		/* EUI-64 bit IEEE address [still 8 bytes?] */
-	u8 dev_addr[2];
-	__le16 mac_fw_rev;	/* major = v >> 8; minor = v & 0xff */
-	u8 hw_rev;
-	u8 phy_vendor;
-	u8 phy_rev;		/* major v = >> 8; minor = v & 0xff */
-	__le16 mac_caps;
-	u8 phy_caps[3];
-	u8 key_stores;
-	__le16 mcast_addr_stores;
-	u8 sec_mode_supported;
-#else
-	struct uwb_rceb rceb;
-	u8 status;
-	u8 mac_addr[8];         /* EUI-64 bit IEEE address [still 8 bytes?] */
-	u8 dev_addr[2];
-	__le16 mac_fw_rev;      /* major = v >> 8; minor = v & 0xff */
-	__le16 phy_fw_rev;      /* major v = >> 8; minor = v & 0xff */
-	__le16 mac_caps;
-	u8 phy_caps;
-	u8 key_stores;
-	__le16 mcast_addr_stores;
-	u8 sec_mode_supported;
-#endif
-} __attribute__((packed));
-
-
-struct i1480_cmd_mpi_write {
-	struct uwb_rccb rccb;
-	__le16 size;
-	u8 data[];
-};
-
-
-struct i1480_cmd_mpi_read {
-	struct uwb_rccb rccb;
-	__le16 size;
-	struct {
-		u8 page, offset;
-	} __attribute__((packed)) data[];
-} __attribute__((packed));
-
-
-struct i1480_evt_mpi_read {
-	struct uwb_rceb rceb;
-#ifdef i1480_RCEB_EXTENDED
-	__le16 wParamLength;
-#endif
-	u8 bResultCode;
-	__le16 size;
-	struct {
-		u8 page, offset, value;
-	} __attribute__((packed)) data[];
-} __attribute__((packed));
-
-
-#endif /* #ifndef __i1480_DFU_H__ */
diff --git a/drivers/uwb/i1480/dfu/mac.c b/drivers/uwb/i1480/dfu/mac.c
deleted file mode 100644
index ddc224f01a7f..000000000000
--- a/drivers/uwb/i1480/dfu/mac.c
+++ /dev/null
@@ -1,496 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless UWB Link 1480
- * MAC Firmware upload implementation
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Implementation of the code for parsing the firmware file (extract
- * the headers and binary code chunks) in the fw_*() functions. The
- * code to upload pre and mac firmwares is the same, so it uses a
- * common entry point in __mac_fw_upload(), which uses the i1480
- * function pointers to push the firmware to the device.
- */
-#include <linux/delay.h>
-#include <linux/firmware.h>
-#include <linux/slab.h>
-#include <linux/uwb.h>
-#include "i1480-dfu.h"
-
-/*
- * Descriptor for a continuous segment of MAC fw data
- */
-struct fw_hdr {
-	unsigned long address;
-	size_t length;
-	const u32 *bin;
-	struct fw_hdr *next;
-};
-
-
-/* Free a chain of firmware headers */
-static
-void fw_hdrs_free(struct fw_hdr *hdr)
-{
-	struct fw_hdr *next;
-
-	while (hdr) {
-		next = hdr->next;
-		kfree(hdr);
-		hdr = next;
-	}
-}
-
-
-/* Fill a firmware header descriptor from a memory buffer */
-static
-int fw_hdr_load(struct i1480 *i1480, struct fw_hdr *hdr, unsigned hdr_cnt,
-		const char *_data, const u32 *data_itr, const u32 *data_top)
-{
-	size_t hdr_offset =  (const char *) data_itr - _data;
-	size_t remaining_size = (void *) data_top - (void *) data_itr;
-	if (data_itr + 2 > data_top) {
-		dev_err(i1480->dev, "fw hdr #%u/%zu: EOF reached in header at "
-		       "offset %zu, limit %zu\n",
-		       hdr_cnt, hdr_offset,
-		       (const char *) data_itr + 2 - _data,
-		       (const char *) data_top - _data);
-		return -EINVAL;
-	}
-	hdr->next = NULL;
-	hdr->address = le32_to_cpu(*data_itr++);
-	hdr->length = le32_to_cpu(*data_itr++);
-	hdr->bin = data_itr;
-	if (hdr->length > remaining_size) {
-		dev_err(i1480->dev, "fw hdr #%u/%zu: EOF reached in data; "
-		       "chunk too long (%zu bytes), only %zu left\n",
-		       hdr_cnt, hdr_offset, hdr->length, remaining_size);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-
-/**
- * Get a buffer where the firmware is supposed to be and create a
- * chain of headers linking them together.
- *
- * @phdr: where to place the pointer to the first header (headers link
- *        to the next via the @hdr->next ptr); need to free the whole
- *        chain when done.
- *
- * @_data: Pointer to the data buffer.
- *
- * @_data_size: Size of the data buffer (bytes); data size has to be a
- *              multiple of 4. Function will fail if not.
- *
- * Goes over the whole binary blob; reads the first chunk and creates
- * a fw hdr from it (which points to where the data is in @_data and
- * the length of the chunk); then goes on to the next chunk until
- * done. Each header is linked to the next.
- */
-static
-int fw_hdrs_load(struct i1480 *i1480, struct fw_hdr **phdr,
-		 const char *_data, size_t data_size)
-{
-	int result;
-	unsigned hdr_cnt = 0;
-	u32 *data = (u32 *) _data, *data_itr, *data_top;
-	struct fw_hdr *hdr, **prev_hdr = phdr;
-
-	result = -EINVAL;
-	/* Check size is ok and pointer is aligned */
-	if (data_size % sizeof(u32) != 0)
-		goto error;
-	if ((unsigned long) _data % sizeof(u16) != 0)
-		goto error;
-	*phdr = NULL;
-	data_itr = data;
-	data_top = (u32 *) (_data + data_size);
-	while (data_itr < data_top) {
-		result = -ENOMEM;
-		hdr = kmalloc(sizeof(*hdr), GFP_KERNEL);
-		if (hdr == NULL) {
-			dev_err(i1480->dev, "Cannot allocate fw header "
-			       "for chunk #%u\n", hdr_cnt);
-			goto error_alloc;
-		}
-		result = fw_hdr_load(i1480, hdr, hdr_cnt,
-				     _data, data_itr, data_top);
-		if (result < 0)
-			goto error_load;
-		data_itr += 2 + hdr->length;
-		*prev_hdr = hdr;
-		prev_hdr = &hdr->next;
-		hdr_cnt++;
-	};
-	*prev_hdr = NULL;
-	return 0;
-
-error_load:
-	kfree(hdr);
-error_alloc:
-	fw_hdrs_free(*phdr);
-error:
-	return result;
-}
-
-
-/**
- * Compares a chunk of fw with one in the devices's memory
- *
- * @i1480:     Device instance
- * @hdr:     Pointer to the firmware chunk
- * @returns: 0 if equal, < 0 errno on error. If > 0, it is the offset
- *           where the difference was found (plus one).
- *
- * Kind of dirty and simplistic, but does the trick in both the PCI
- * and USB version. We do a quick[er] memcmp(), and if it fails, we do
- * a byte-by-byte to find the offset.
- */
-static
-ssize_t i1480_fw_cmp(struct i1480 *i1480, struct fw_hdr *hdr)
-{
-	ssize_t result = 0;
-	u32 src_itr = 0, cnt;
-	size_t size = hdr->length*sizeof(hdr->bin[0]);
-	size_t chunk_size;
-	u8 *bin = (u8 *) hdr->bin;
-
-	while (size > 0) {
-		chunk_size = size < i1480->buf_size ? size : i1480->buf_size;
-		result = i1480->read(i1480, hdr->address + src_itr, chunk_size);
-		if (result < 0) {
-			dev_err(i1480->dev, "error reading for verification: "
-				"%zd\n", result);
-			goto error;
-		}
-		if (memcmp(i1480->cmd_buf, bin + src_itr, result)) {
-			u8 *buf = i1480->cmd_buf;
-			for (cnt = 0; cnt < result; cnt++)
-				if (bin[src_itr + cnt] != buf[cnt]) {
-					dev_err(i1480->dev, "byte failed at "
-						"src_itr %u cnt %u [0x%02x "
-						"vs 0x%02x]\n", src_itr, cnt,
-						bin[src_itr + cnt], buf[cnt]);
-					result = src_itr + cnt + 1;
-					goto cmp_failed;
-				}
-		}
-		src_itr += result;
-		size -= result;
-	}
-	result = 0;
-error:
-cmp_failed:
-	return result;
-}
-
-
-/**
- * Writes firmware headers to the device.
- *
- * @prd:     PRD instance
- * @hdr:     Processed firmware
- * @returns: 0 if ok, < 0 errno on error.
- */
-static
-int mac_fw_hdrs_push(struct i1480 *i1480, struct fw_hdr *hdr,
-		     const char *fw_name, const char *fw_tag)
-{
-	struct device *dev = i1480->dev;
-	ssize_t result = 0;
-	struct fw_hdr *hdr_itr;
-	int verif_retry_count;
-
-	/* Now, header by header, push them to the hw */
-	for (hdr_itr = hdr; hdr_itr != NULL; hdr_itr = hdr_itr->next) {
-		verif_retry_count = 0;
-retry:
-		dev_dbg(dev, "fw chunk (%zu @ 0x%08lx)\n",
-			hdr_itr->length * sizeof(hdr_itr->bin[0]),
-			hdr_itr->address);
-		result = i1480->write(i1480, hdr_itr->address, hdr_itr->bin,
-				    hdr_itr->length*sizeof(hdr_itr->bin[0]));
-		if (result < 0) {
-			dev_err(dev, "%s fw '%s': write failed (%zuB @ 0x%lx):"
-				" %zd\n", fw_tag, fw_name,
-				hdr_itr->length * sizeof(hdr_itr->bin[0]),
-				hdr_itr->address, result);
-			break;
-		}
-		result = i1480_fw_cmp(i1480, hdr_itr);
-		if (result < 0) {
-			dev_err(dev, "%s fw '%s': verification read "
-				"failed (%zuB @ 0x%lx): %zd\n",
-				fw_tag, fw_name,
-				hdr_itr->length * sizeof(hdr_itr->bin[0]),
-				hdr_itr->address, result);
-			break;
-		}
-		if (result > 0) {	/* Offset where it failed + 1 */
-			result--;
-			dev_err(dev, "%s fw '%s': WARNING: verification "
-				"failed at 0x%lx: retrying\n",
-				fw_tag, fw_name, hdr_itr->address + result);
-			if (++verif_retry_count < 3)
-				goto retry;	/* write this block again! */
-			dev_err(dev, "%s fw '%s': verification failed at 0x%lx: "
-				"tried %d times\n", fw_tag, fw_name,
-				hdr_itr->address + result, verif_retry_count);
-			result = -EINVAL;
-			break;
-		}
-	}
-	return result;
-}
-
-
-/** Puts the device in firmware upload mode.*/
-static
-int mac_fw_upload_enable(struct i1480 *i1480)
-{
-	int result;
-	u32 reg = 0x800000c0;
-	u32 *buffer = (u32 *)i1480->cmd_buf;
-
-	if (i1480->hw_rev > 1)
-		reg = 0x8000d0d4;
-	result = i1480->read(i1480, reg, sizeof(u32));
-	if (result < 0)
-		goto error_cmd;
-	*buffer &= ~i1480_FW_UPLOAD_MODE_MASK;
-	result = i1480->write(i1480, reg, buffer, sizeof(u32));
-	if (result < 0)
-		goto error_cmd;
-	return 0;
-error_cmd:
-	dev_err(i1480->dev, "can't enable fw upload mode: %d\n", result);
-	return result;
-}
-
-
-/** Gets the device out of firmware upload mode. */
-static
-int mac_fw_upload_disable(struct i1480 *i1480)
-{
-	int result;
-	u32 reg = 0x800000c0;
-	u32 *buffer = (u32 *)i1480->cmd_buf;
-
-	if (i1480->hw_rev > 1)
-		reg = 0x8000d0d4;
-	result = i1480->read(i1480, reg, sizeof(u32));
-	if (result < 0)
-		goto error_cmd;
-	*buffer |= i1480_FW_UPLOAD_MODE_MASK;
-	result = i1480->write(i1480, reg, buffer, sizeof(u32));
-	if (result < 0)
-		goto error_cmd;
-	return 0;
-error_cmd:
-	dev_err(i1480->dev, "can't disable fw upload mode: %d\n", result);
-	return result;
-}
-
-
-
-/**
- * Generic function for uploading a MAC firmware.
- *
- * @i1480:     Device instance
- * @fw_name: Name of firmware file to upload.
- * @fw_tag:  Name of the firmware type (for messages)
- *           [eg: MAC, PRE]
- * @do_wait: Wait for device to emit initialization done message (0
- *           for PRE fws, 1 for MAC fws).
- * @returns: 0 if ok, < 0 errno on error.
- */
-static
-int __mac_fw_upload(struct i1480 *i1480, const char *fw_name,
-		    const char *fw_tag)
-{
-	int result;
-	const struct firmware *fw;
-	struct fw_hdr *fw_hdrs;
-
-	result = request_firmware(&fw, fw_name, i1480->dev);
-	if (result < 0)	/* Up to caller to complain on -ENOENT */
-		goto out;
-	result = fw_hdrs_load(i1480, &fw_hdrs, fw->data, fw->size);
-	if (result < 0) {
-		dev_err(i1480->dev, "%s fw '%s': failed to parse firmware "
-			"file: %d\n", fw_tag, fw_name, result);
-		goto out_release;
-	}
-	result = mac_fw_upload_enable(i1480);
-	if (result < 0)
-		goto out_hdrs_release;
-	result = mac_fw_hdrs_push(i1480, fw_hdrs, fw_name, fw_tag);
-	mac_fw_upload_disable(i1480);
-out_hdrs_release:
-	if (result >= 0)
-		dev_info(i1480->dev, "%s fw '%s': uploaded\n", fw_tag, fw_name);
-	else
-		dev_err(i1480->dev, "%s fw '%s': failed to upload (%d), "
-			"power cycle device\n", fw_tag, fw_name, result);
-	fw_hdrs_free(fw_hdrs);
-out_release:
-	release_firmware(fw);
-out:
-	return result;
-}
-
-
-/**
- * Upload a pre-PHY firmware
- *
- */
-int i1480_pre_fw_upload(struct i1480 *i1480)
-{
-	int result;
-	result = __mac_fw_upload(i1480, i1480->pre_fw_name, "PRE");
-	if (result == 0)
-		msleep(400);
-	return result;
-}
-
-
-/**
- * Reset a the MAC and PHY
- *
- * @i1480:     Device's instance
- * @returns: 0 if ok, < 0 errno code on error
- *
- * We put the command on kmalloc'ed memory as some arches cannot do
- * USB from the stack. The reply event is copied from an stage buffer,
- * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
- *
- * We issue the reset to make sure the UWB controller reinits the PHY;
- * this way we can now if the PHY init went ok.
- */
-static
-int i1480_cmd_reset(struct i1480 *i1480)
-{
-	int result;
-	struct uwb_rccb *cmd = (void *) i1480->cmd_buf;
-	struct i1480_evt_reset {
-		struct uwb_rceb rceb;
-		u8 bResultCode;
-	} __attribute__((packed)) *reply = (void *) i1480->evt_buf;
-
-	result = -ENOMEM;
-	cmd->bCommandType = UWB_RC_CET_GENERAL;
-	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_RESET);
-	reply->rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply->rceb.wEvent = UWB_RC_CMD_RESET;
-	result = i1480_cmd(i1480, "RESET", sizeof(*cmd), sizeof(*reply));
-	if (result < 0)
-		goto out;
-	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(i1480->dev, "RESET: command execution failed: %u\n",
-			reply->bResultCode);
-		result = -EIO;
-	}
-out:
-	return result;
-
-}
-
-
-/* Wait for the MAC FW to start running */
-static
-int i1480_fw_is_running_q(struct i1480 *i1480)
-{
-	int cnt = 0;
-	int result;
-	u32 *val = (u32 *) i1480->cmd_buf;
-
-	for (cnt = 0; cnt < 10; cnt++) {
-		msleep(100);
-		result = i1480->read(i1480, 0x80080000, 4);
-		if (result < 0) {
-			dev_err(i1480->dev, "Can't read 0x8008000: %d\n", result);
-			goto out;
-		}
-		if (*val == 0x55555555UL)	/* fw running? cool */
-			goto out;
-	}
-	dev_err(i1480->dev, "Timed out waiting for fw to start\n");
-	result = -ETIMEDOUT;
-out:
-	return result;
-
-}
-
-
-/**
- * Upload MAC firmware, wait for it to start
- *
- * @i1480:     Device instance
- * @fw_name: Name of the file that contains the firmware
- *
- * This has to be called after the pre fw has been uploaded (if
- * there is any).
- */
-int i1480_mac_fw_upload(struct i1480 *i1480)
-{
-	int result = 0, deprecated_name = 0;
-	struct i1480_rceb *rcebe = (void *) i1480->evt_buf;
-
-	result = __mac_fw_upload(i1480, i1480->mac_fw_name, "MAC");
-	if (result == -ENOENT) {
-		result = __mac_fw_upload(i1480, i1480->mac_fw_name_deprecate,
-					 "MAC");
-		deprecated_name = 1;
-	}
-	if (result < 0)
-		return result;
-	if (deprecated_name == 1)
-		dev_warn(i1480->dev,
-			 "WARNING: firmware file name %s is deprecated, "
-			 "please rename to %s\n",
-			 i1480->mac_fw_name_deprecate, i1480->mac_fw_name);
-	result = i1480_fw_is_running_q(i1480);
-	if (result < 0)
-		goto error_fw_not_running;
-	result = i1480->rc_setup ? i1480->rc_setup(i1480) : 0;
-	if (result < 0) {
-		dev_err(i1480->dev, "Cannot setup after MAC fw upload: %d\n",
-			result);
-		goto error_setup;
-	}
-	result = i1480->wait_init_done(i1480);	/* wait init'on */
-	if (result < 0) {
-		dev_err(i1480->dev, "MAC fw '%s': Initialization timed out "
-			"(%d)\n", i1480->mac_fw_name, result);
-		goto error_init_timeout;
-	}
-	/* verify we got the right initialization done event */
-	if (i1480->evt_result != sizeof(*rcebe)) {
-		dev_err(i1480->dev, "MAC fw '%s': initialization event returns "
-			"wrong size (%zu bytes vs %zu needed)\n",
-			i1480->mac_fw_name, i1480->evt_result, sizeof(*rcebe));
-		goto error_size;
-	}
-	result = -EIO;
-	if (i1480_rceb_check(i1480, &rcebe->rceb, NULL, 0, i1480_CET_VS1,
-			     i1480_EVT_RM_INIT_DONE) < 0) {
-		dev_err(i1480->dev, "wrong initialization event 0x%02x/%04x/%02x "
-			"received; expected 0x%02x/%04x/00\n",
-			rcebe->rceb.bEventType, le16_to_cpu(rcebe->rceb.wEvent),
-			rcebe->rceb.bEventContext, i1480_CET_VS1,
-			i1480_EVT_RM_INIT_DONE);
-		goto error_init_timeout;
-	}
-	result = i1480_cmd_reset(i1480);
-	if (result < 0)
-		dev_err(i1480->dev, "MAC fw '%s': MBOA reset failed (%d)\n",
-			i1480->mac_fw_name, result);
-error_fw_not_running:
-error_init_timeout:
-error_size:
-error_setup:
-	return result;
-}
diff --git a/drivers/uwb/i1480/dfu/phy.c b/drivers/uwb/i1480/dfu/phy.c
deleted file mode 100644
index 50da4527c113..000000000000
--- a/drivers/uwb/i1480/dfu/phy.c
+++ /dev/null
@@ -1,190 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless UWB Link 1480
- * PHY parameters upload
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Code for uploading the PHY parameters to the PHY through the UWB
- * Radio Control interface.
- *
- * We just send the data through the MPI interface using HWA-like
- * commands and then reset the PHY to make sure it is ok.
- */
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <linux/firmware.h>
-#include <linux/usb/wusb.h>
-#include "i1480-dfu.h"
-
-
-/**
- * Write a value array to an address of the MPI interface
- *
- * @i1480:	Device descriptor
- * @data:	Data array to write
- * @size:	Size of the data array
- * @returns:	0 if ok, < 0 errno code on error.
- *
- * The data array is organized into pairs:
- *
- * ADDRESS VALUE
- *
- * ADDRESS is BE 16 bit unsigned, VALUE 8 bit unsigned. Size thus has
- * to be a multiple of three.
- */
-static
-int i1480_mpi_write(struct i1480 *i1480, const void *data, size_t size)
-{
-	int result;
-	struct i1480_cmd_mpi_write *cmd = i1480->cmd_buf;
-	struct i1480_evt_confirm *reply = i1480->evt_buf;
-
-	BUG_ON(size > 480);
-	result = -ENOMEM;
-	cmd->rccb.bCommandType = i1480_CET_VS1;
-	cmd->rccb.wCommand = cpu_to_le16(i1480_CMD_MPI_WRITE);
-	cmd->size = cpu_to_le16(size);
-	memcpy(cmd->data, data, size);
-	reply->rceb.bEventType = i1480_CET_VS1;
-	reply->rceb.wEvent = i1480_CMD_MPI_WRITE;
-	result = i1480_cmd(i1480, "MPI-WRITE", sizeof(*cmd) + size, sizeof(*reply));
-	if (result < 0)
-		goto out;
-	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(i1480->dev, "MPI-WRITE: command execution failed: %d\n",
-			reply->bResultCode);
-		result = -EIO;
-	}
-out:
-	return result;
-}
-
-
-/**
- * Read a value array to from an address of the MPI interface
- *
- * @i1480:	Device descriptor
- * @data:	where to place the read array
- * @srcaddr:	Where to read from
- * @size:	Size of the data read array
- * @returns:	0 if ok, < 0 errno code on error.
- *
- * The command data array is organized into pairs ADDR0 ADDR1..., and
- * the returned data in ADDR0 VALUE0 ADDR1 VALUE1...
- *
- * We generate the command array to be a sequential read and then
- * rearrange the result.
- *
- * We use the i1480->cmd_buf for the command, i1480->evt_buf for the reply.
- *
- * As the reply has to fit in 512 bytes (i1480->evt_buffer), the max amount
- * of values we can read is (512 - sizeof(*reply)) / 3
- */
-static
-int i1480_mpi_read(struct i1480 *i1480, u8 *data, u16 srcaddr, size_t size)
-{
-	int result;
-	struct i1480_cmd_mpi_read *cmd = i1480->cmd_buf;
-	struct i1480_evt_mpi_read *reply = i1480->evt_buf;
-	unsigned cnt;
-
-	memset(i1480->cmd_buf, 0x69, 512);
-	memset(i1480->evt_buf, 0x69, 512);
-
-	BUG_ON(size > (i1480->buf_size - sizeof(*reply)) / 3);
-	result = -ENOMEM;
-	cmd->rccb.bCommandType = i1480_CET_VS1;
-	cmd->rccb.wCommand = cpu_to_le16(i1480_CMD_MPI_READ);
-	cmd->size = cpu_to_le16(3*size);
-	for (cnt = 0; cnt < size; cnt++) {
-		cmd->data[cnt].page = (srcaddr + cnt) >> 8;
-		cmd->data[cnt].offset = (srcaddr + cnt) & 0xff;
-	}
-	reply->rceb.bEventType = i1480_CET_VS1;
-	reply->rceb.wEvent = i1480_CMD_MPI_READ;
-	result = i1480_cmd(i1480, "MPI-READ", sizeof(*cmd) + 2*size,
-			sizeof(*reply) + 3*size);
-	if (result < 0)
-		goto out;
-	if (reply->bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(i1480->dev, "MPI-READ: command execution failed: %d\n",
-			reply->bResultCode);
-		result = -EIO;
-		goto out;
-	}
-	for (cnt = 0; cnt < size; cnt++) {
-		if (reply->data[cnt].page != (srcaddr + cnt) >> 8)
-			dev_err(i1480->dev, "MPI-READ: page inconsistency at "
-				"index %u: expected 0x%02x, got 0x%02x\n", cnt,
-				(srcaddr + cnt) >> 8, reply->data[cnt].page);
-		if (reply->data[cnt].offset != ((srcaddr + cnt) & 0x00ff))
-			dev_err(i1480->dev, "MPI-READ: offset inconsistency at "
-				"index %u: expected 0x%02x, got 0x%02x\n", cnt,
-				(srcaddr + cnt) & 0x00ff,
-				reply->data[cnt].offset);
-		data[cnt] = reply->data[cnt].value;
-	}
-	result = 0;
-out:
-	return result;
-}
-
-
-/**
- * Upload a PHY firmware, wait for it to start
- *
- * @i1480:     Device instance
- * @fw_name: Name of the file that contains the firmware
- *
- * We assume the MAC fw is up and running. This means we can use the
- * MPI interface to write the PHY firmware. Once done, we issue an
- * MBOA Reset, which will force the MAC to reset and reinitialize the
- * PHY. If that works, we are ready to go.
- *
- * Max packet size for the MPI write is 512, so the max buffer is 480
- * (which gives us 160 byte triads of MSB, LSB and VAL for the data).
- */
-int i1480_phy_fw_upload(struct i1480 *i1480)
-{
-	int result;
-	const struct firmware *fw;
-	const char *data_itr, *data_top;
-	const size_t MAX_BLK_SIZE = 480;	/* 160 triads */
-	size_t data_size;
-	u8 phy_stat;
-
-	result = request_firmware(&fw, i1480->phy_fw_name, i1480->dev);
-	if (result < 0)
-		goto out;
-	/* Loop writing data in chunks as big as possible until done. */
-	for (data_itr = fw->data, data_top = data_itr + fw->size;
-	     data_itr < data_top; data_itr += MAX_BLK_SIZE) {
-		data_size = min(MAX_BLK_SIZE, (size_t) (data_top - data_itr));
-		result = i1480_mpi_write(i1480, data_itr, data_size);
-		if (result < 0)
-			goto error_mpi_write;
-	}
-	/* Read MPI page 0, offset 6; if 0, PHY was initialized correctly. */
-	result = i1480_mpi_read(i1480, &phy_stat, 0x0006, 1);
-	if (result < 0) {
-		dev_err(i1480->dev, "PHY: can't get status: %d\n", result);
-		goto error_mpi_status;
-	}
-	if (phy_stat != 0) {
-		result = -ENODEV;
-		dev_info(i1480->dev, "error, PHY not ready: %u\n", phy_stat);
-		goto error_phy_status;
-	}
-	dev_info(i1480->dev, "PHY fw '%s': uploaded\n", i1480->phy_fw_name);
-error_phy_status:
-error_mpi_status:
-error_mpi_write:
-	release_firmware(fw);
-	if (result < 0)
-		dev_err(i1480->dev, "PHY fw '%s': failed to upload (%d), "
-			"power cycle device\n", i1480->phy_fw_name, result);
-out:
-	return result;
-}
diff --git a/drivers/uwb/i1480/dfu/usb.c b/drivers/uwb/i1480/dfu/usb.c
deleted file mode 100644
index 6129a8f4b5f2..000000000000
--- a/drivers/uwb/i1480/dfu/usb.c
+++ /dev/null
@@ -1,448 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless UWB Link 1480
- * USB SKU firmware upload implementation
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This driver will prepare the i1480 device to behave as a real
- * Wireless USB HWA adaptor by uploading the firmware.
- *
- * When the device is connected or driver is loaded, i1480_usb_probe()
- * is called--this will allocate and initialize the device structure,
- * fill in the pointers to the common functions (read, write,
- * wait_init_done and cmd for HWA command execution) and once that is
- * done, call the common firmware uploading routine. Then clean up and
- * return -ENODEV, as we don't attach to the device.
- *
- * The rest are the basic ops we implement that the fw upload code
- * uses to do its job. All the ops in the common code are i1480->NAME,
- * the functions are i1480_usb_NAME().
- */
-#include <linux/module.h>
-#include <linux/usb.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/uwb.h>
-#include <linux/usb/wusb.h>
-#include <linux/usb/wusb-wa.h>
-#include "i1480-dfu.h"
-
-struct i1480_usb {
-	struct i1480 i1480;
-	struct usb_device *usb_dev;
-	struct usb_interface *usb_iface;
-	struct urb *neep_urb;	/* URB for reading from EP1 */
-};
-
-
-static
-void i1480_usb_init(struct i1480_usb *i1480_usb)
-{
-	i1480_init(&i1480_usb->i1480);
-}
-
-
-static
-int i1480_usb_create(struct i1480_usb *i1480_usb, struct usb_interface *iface)
-{
-	struct usb_device *usb_dev = interface_to_usbdev(iface);
-	int result = -ENOMEM;
-
-	i1480_usb->usb_dev = usb_get_dev(usb_dev);	/* bind the USB device */
-	i1480_usb->usb_iface = usb_get_intf(iface);
-	usb_set_intfdata(iface, i1480_usb);		/* Bind the driver to iface0 */
-	i1480_usb->neep_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (i1480_usb->neep_urb == NULL)
-		goto error;
-	return 0;
-
-error:
-	usb_set_intfdata(iface, NULL);
-	usb_put_intf(iface);
-	usb_put_dev(usb_dev);
-	return result;
-}
-
-
-static
-void i1480_usb_destroy(struct i1480_usb *i1480_usb)
-{
-	usb_kill_urb(i1480_usb->neep_urb);
-	usb_free_urb(i1480_usb->neep_urb);
-	usb_set_intfdata(i1480_usb->usb_iface, NULL);
-	usb_put_intf(i1480_usb->usb_iface);
-	usb_put_dev(i1480_usb->usb_dev);
-}
-
-
-/**
- * Write a buffer to a memory address in the i1480 device
- *
- * @i1480:  i1480 instance
- * @memory_address:
- *          Address where to write the data buffer to.
- * @buffer: Buffer to the data
- * @size:   Size of the buffer [has to be < 512].
- * @returns: 0 if ok, < 0 errno code on error.
- *
- * Data buffers to USB cannot be on the stack or in vmalloc'ed areas,
- * so we copy it to the local i1480 buffer before proceeding. In any
- * case, we have a max size we can send.
- */
-static
-int i1480_usb_write(struct i1480 *i1480, u32 memory_address,
-		    const void *buffer, size_t size)
-{
-	int result = 0;
-	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
-	size_t buffer_size, itr = 0;
-
-	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
-	while (size > 0) {
-		buffer_size = size < i1480->buf_size ? size : i1480->buf_size;
-		memcpy(i1480->cmd_buf, buffer + itr, buffer_size);
-		result = usb_control_msg(
-			i1480_usb->usb_dev, usb_sndctrlpipe(i1480_usb->usb_dev, 0),
-			0xf0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-			memory_address,	(memory_address >> 16),
-			i1480->cmd_buf, buffer_size, 100 /* FIXME: arbitrary */);
-		if (result < 0)
-			break;
-		itr += result;
-		memory_address += result;
-		size -= result;
-	}
-	return result;
-}
-
-
-/**
- * Read a block [max size 512] of the device's memory to @i1480's buffer.
- *
- * @i1480: i1480 instance
- * @memory_address:
- *         Address where to read from.
- * @size:  Size to read. Smaller than or equal to 512.
- * @returns: >= 0 number of bytes written if ok, < 0 errno code on error.
- *
- * NOTE: if the memory address or block is incorrect, you might get a
- *       stall or a different memory read. Caller has to verify the
- *       memory address and size passed back in the @neh structure.
- */
-static
-int i1480_usb_read(struct i1480 *i1480, u32 addr, size_t size)
-{
-	ssize_t result = 0, bytes = 0;
-	size_t itr, read_size = i1480->buf_size;
-	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
-
-	BUG_ON(size > i1480->buf_size);
-	BUG_ON(size & 0x3); /* Needs to be a multiple of 4 */
-	BUG_ON(read_size > 512);
-
-	if (addr >= 0x8000d200 && addr < 0x8000d400)	/* Yeah, HW quirk */
-		read_size = 4;
-
-	for (itr = 0; itr < size; itr += read_size) {
-		size_t itr_addr = addr + itr;
-		size_t itr_size = min(read_size, size - itr);
-		result = usb_control_msg(
-			i1480_usb->usb_dev, usb_rcvctrlpipe(i1480_usb->usb_dev, 0),
-			0xf0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-			itr_addr, (itr_addr >> 16),
-			i1480->cmd_buf + itr, itr_size,
-			100 /* FIXME: arbitrary */);
-		if (result < 0) {
-			dev_err(i1480->dev, "%s: USB read error: %zd\n",
-				__func__, result);
-			goto out;
-		}
-		if (result != itr_size) {
-			result = -EIO;
-			dev_err(i1480->dev,
-				"%s: partial read got only %zu bytes vs %zu expected\n",
-				__func__, result, itr_size);
-			goto out;
-		}
-		bytes += result;
-	}
-	result = bytes;
-out:
-	return result;
-}
-
-
-/**
- * Callback for reads on the notification/event endpoint
- *
- * Just enables the completion read handler.
- */
-static
-void i1480_usb_neep_cb(struct urb *urb)
-{
-	struct i1480 *i1480 = urb->context;
-	struct device *dev = i1480->dev;
-
-	switch (urb->status) {
-	case 0:
-		break;
-	case -ECONNRESET:	/* Not an error, but a controlled situation; */
-	case -ENOENT:		/* (we killed the URB)...so, no broadcast */
-		dev_dbg(dev, "NEEP: reset/noent %d\n", urb->status);
-		break;
-	case -ESHUTDOWN:	/* going away! */
-		dev_dbg(dev, "NEEP: down %d\n", urb->status);
-		break;
-	default:
-		dev_err(dev, "NEEP: unknown status %d\n", urb->status);
-		break;
-	}
-	i1480->evt_result = urb->actual_length;
-	complete(&i1480->evt_complete);
-	return;
-}
-
-
-/**
- * Wait for the MAC FW to initialize
- *
- * MAC FW sends a 0xfd/0101/00 notification to EP1 when done
- * initializing. Get that notification into i1480->evt_buf; upper layer
- * will verify it.
- *
- * Set i1480->evt_result with the result of getting the event or its
- * size (if successful).
- *
- * Delivers the data directly to i1480->evt_buf
- */
-static
-int i1480_usb_wait_init_done(struct i1480 *i1480)
-{
-	int result;
-	struct device *dev = i1480->dev;
-	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
-	struct usb_endpoint_descriptor *epd;
-
-	init_completion(&i1480->evt_complete);
-	i1480->evt_result = -EINPROGRESS;
-	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
-	usb_fill_int_urb(i1480_usb->neep_urb, i1480_usb->usb_dev,
-			 usb_rcvintpipe(i1480_usb->usb_dev, epd->bEndpointAddress),
-			 i1480->evt_buf, i1480->buf_size,
-			 i1480_usb_neep_cb, i1480, epd->bInterval);
-	result = usb_submit_urb(i1480_usb->neep_urb, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "init done: cannot submit NEEP read: %d\n",
-			result);
-		goto error_submit;
-	}
-	/* Wait for the USB callback to get the data */
-	result = wait_for_completion_interruptible_timeout(
-		&i1480->evt_complete, HZ);
-	if (result <= 0) {
-		result = result == 0 ? -ETIMEDOUT : result;
-		goto error_wait;
-	}
-	usb_kill_urb(i1480_usb->neep_urb);
-	return 0;
-
-error_wait:
-	usb_kill_urb(i1480_usb->neep_urb);
-error_submit:
-	i1480->evt_result = result;
-	return result;
-}
-
-
-/**
- * Generic function for issuing commands to the i1480
- *
- * @i1480:      i1480 instance
- * @cmd_name:   Name of the command (for error messages)
- * @cmd:        Pointer to command buffer
- * @cmd_size:   Size of the command buffer
- * @reply:      Buffer for the reply event
- * @reply_size: Expected size back (including RCEB); the reply buffer
- *              is assumed to be as big as this.
- * @returns:    >= 0 size of the returned event data if ok,
- *              < 0 errno code on error.
- *
- * Arms the NE handle, issues the command to the device and checks the
- * basics of the reply event.
- */
-static
-int i1480_usb_cmd(struct i1480 *i1480, const char *cmd_name, size_t cmd_size)
-{
-	int result;
-	struct device *dev = i1480->dev;
-	struct i1480_usb *i1480_usb = container_of(i1480, struct i1480_usb, i1480);
-	struct usb_endpoint_descriptor *epd;
-	struct uwb_rccb *cmd = i1480->cmd_buf;
-	u8 iface_no;
-
-	/* Post a read on the notification & event endpoint */
-	iface_no = i1480_usb->usb_iface->cur_altsetting->desc.bInterfaceNumber;
-	epd = &i1480_usb->usb_iface->cur_altsetting->endpoint[0].desc;
-	usb_fill_int_urb(
-		i1480_usb->neep_urb, i1480_usb->usb_dev,
-		usb_rcvintpipe(i1480_usb->usb_dev, epd->bEndpointAddress),
-		i1480->evt_buf, i1480->buf_size,
-		i1480_usb_neep_cb, i1480, epd->bInterval);
-	result = usb_submit_urb(i1480_usb->neep_urb, GFP_KERNEL);
-	if (result < 0) {
-		dev_err(dev, "%s: cannot submit NEEP read: %d\n",
-			cmd_name, result);
-		goto error_submit_ep1;
-	}
-	/* Now post the command on EP0 */
-	result = usb_control_msg(
-		i1480_usb->usb_dev, usb_sndctrlpipe(i1480_usb->usb_dev, 0),
-		WA_EXEC_RC_CMD,
-		USB_DIR_OUT | USB_RECIP_INTERFACE | USB_TYPE_CLASS,
-		0, iface_no,
-		cmd, cmd_size,
-		100 /* FIXME: this is totally arbitrary */);
-	if (result < 0) {
-		dev_err(dev, "%s: control request failed: %d\n",
-			cmd_name, result);
-		goto error_submit_ep0;
-	}
-	return result;
-
-error_submit_ep0:
-	usb_kill_urb(i1480_usb->neep_urb);
-error_submit_ep1:
-	return result;
-}
-
-
-/*
- * Probe a i1480 device for uploading firmware.
- *
- * We attach only to interface #0, which is the radio control interface.
- */
-static
-int i1480_usb_probe(struct usb_interface *iface, const struct usb_device_id *id)
-{
-	struct usb_device *udev = interface_to_usbdev(iface);
-	struct i1480_usb *i1480_usb;
-	struct i1480 *i1480;
-	struct device *dev = &iface->dev;
-	int result;
-
-	result = -ENODEV;
-	if (iface->cur_altsetting->desc.bInterfaceNumber != 0) {
-		dev_dbg(dev, "not attaching to iface %d\n",
-			iface->cur_altsetting->desc.bInterfaceNumber);
-		goto error;
-	}
-	if (iface->num_altsetting > 1 &&
-			le16_to_cpu(udev->descriptor.idProduct) == 0xbabe) {
-		/* Need altsetting #1 [HW QUIRK] or EP1 won't work */
-		result = usb_set_interface(interface_to_usbdev(iface), 0, 1);
-		if (result < 0)
-			dev_warn(dev,
-				 "can't set altsetting 1 on iface 0: %d\n",
-				 result);
-	}
-
-	if (iface->cur_altsetting->desc.bNumEndpoints < 1)
-		return -ENODEV;
-
-	result = -ENOMEM;
-	i1480_usb = kzalloc(sizeof(*i1480_usb), GFP_KERNEL);
-	if (i1480_usb == NULL) {
-		dev_err(dev, "Unable to allocate instance\n");
-		goto error;
-	}
-	i1480_usb_init(i1480_usb);
-
-	i1480 = &i1480_usb->i1480;
-	i1480->buf_size = 512;
-	i1480->cmd_buf = kmalloc_array(2, i1480->buf_size, GFP_KERNEL);
-	if (i1480->cmd_buf == NULL) {
-		dev_err(dev, "Cannot allocate transfer buffers\n");
-		result = -ENOMEM;
-		goto error_buf_alloc;
-	}
-	i1480->evt_buf = i1480->cmd_buf + i1480->buf_size;
-
-	result = i1480_usb_create(i1480_usb, iface);
-	if (result < 0) {
-		dev_err(dev, "Cannot create instance: %d\n", result);
-		goto error_create;
-	}
-
-	/* setup the fops and upload the firmware */
-	i1480->pre_fw_name = "i1480-pre-phy-0.0.bin";
-	i1480->mac_fw_name = "i1480-usb-0.0.bin";
-	i1480->mac_fw_name_deprecate = "ptc-0.0.bin";
-	i1480->phy_fw_name = "i1480-phy-0.0.bin";
-	i1480->dev = &iface->dev;
-	i1480->write = i1480_usb_write;
-	i1480->read = i1480_usb_read;
-	i1480->rc_setup = NULL;
-	i1480->wait_init_done = i1480_usb_wait_init_done;
-	i1480->cmd = i1480_usb_cmd;
-
-	result = i1480_fw_upload(&i1480_usb->i1480);	/* the real thing */
-	if (result >= 0) {
-		usb_reset_device(i1480_usb->usb_dev);
-		result = -ENODEV;	/* we don't want to bind to the iface */
-	}
-	i1480_usb_destroy(i1480_usb);
-error_create:
-	kfree(i1480->cmd_buf);
-error_buf_alloc:
-	kfree(i1480_usb);
-error:
-	return result;
-}
-
-MODULE_FIRMWARE("i1480-pre-phy-0.0.bin");
-MODULE_FIRMWARE("i1480-usb-0.0.bin");
-MODULE_FIRMWARE("i1480-phy-0.0.bin");
-
-#define i1480_USB_DEV(v, p)				\
-{							\
-	.match_flags = USB_DEVICE_ID_MATCH_DEVICE	\
-		 | USB_DEVICE_ID_MATCH_DEV_INFO		\
-		 | USB_DEVICE_ID_MATCH_INT_INFO,	\
-	.idVendor = (v),				\
-	.idProduct = (p),				\
-	.bDeviceClass = 0xff,				\
-	.bDeviceSubClass = 0xff,			\
-	.bDeviceProtocol = 0xff,			\
-	.bInterfaceClass = 0xff,			\
-	.bInterfaceSubClass = 0xff,			\
-	.bInterfaceProtocol = 0xff,			\
-}
-
-
-/** USB device ID's that we handle */
-static const struct usb_device_id i1480_usb_id_table[] = {
-	i1480_USB_DEV(0x8086, 0xdf3b),
-	i1480_USB_DEV(0x15a9, 0x0005),
-	i1480_USB_DEV(0x07d1, 0x3802),
-	i1480_USB_DEV(0x050d, 0x305a),
-	i1480_USB_DEV(0x3495, 0x3007),
-	{},
-};
-MODULE_DEVICE_TABLE(usb, i1480_usb_id_table);
-
-
-static struct usb_driver i1480_dfu_driver = {
-	.name =		"i1480-dfu-usb",
-	.id_table =	i1480_usb_id_table,
-	.probe =	i1480_usb_probe,
-	.disconnect =	NULL,
-};
-
-module_usb_driver(i1480_dfu_driver);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Intel Wireless UWB Link 1480 firmware uploader for USB");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/i1480/i1480-est.c b/drivers/uwb/i1480/i1480-est.c
deleted file mode 100644
index 1346c409d10e..000000000000
--- a/drivers/uwb/i1480/i1480-est.c
+++ /dev/null
@@ -1,85 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Wireless UWB Link 1480
- * Event Size tables for Wired Adaptors
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/usb.h>
-#include <linux/uwb.h>
-#include "dfu/i1480-dfu.h"
-
-
-/** Event size table for wEvents 0x00XX */
-static struct uwb_est_entry i1480_est_fd00[] = {
-	/* Anybody expecting this response has to use
-	 * neh->extra_size to specify the real size that will
-	 * come back. */
-	[i1480_EVT_CONFIRM] = { .size = sizeof(struct i1480_evt_confirm) },
-	[i1480_CMD_SET_IP_MAS] = { .size = sizeof(struct i1480_evt_confirm) },
-#ifdef i1480_RCEB_EXTENDED
-	[0x09] = {
-		.size = sizeof(struct i1480_rceb),
-		.offset = 1 + offsetof(struct i1480_rceb, wParamLength),
-	},
-#endif
-};
-
-/** Event size table for wEvents 0x01XX */
-static struct uwb_est_entry i1480_est_fd01[] = {
-	[0xff & i1480_EVT_RM_INIT_DONE] = { .size = sizeof(struct i1480_rceb) },
-	[0xff & i1480_EVT_DEV_ADD] = { .size = sizeof(struct i1480_rceb) + 9 },
-	[0xff & i1480_EVT_DEV_RM] = { .size = sizeof(struct i1480_rceb) + 9 },
-	[0xff & i1480_EVT_DEV_ID_CHANGE] = {
-		.size = sizeof(struct i1480_rceb) + 2 },
-};
-
-static int __init i1480_est_init(void)
-{
-	int result = uwb_est_register(i1480_CET_VS1, 0x00, 0x8086, 0x0c3b,
-				      i1480_est_fd00,
-				      ARRAY_SIZE(i1480_est_fd00));
-	if (result < 0) {
-		printk(KERN_ERR "Can't register EST table fd00: %d\n", result);
-		return result;
-	}
-	result = uwb_est_register(i1480_CET_VS1, 0x01, 0x8086, 0x0c3b,
-				  i1480_est_fd01, ARRAY_SIZE(i1480_est_fd01));
-	if (result < 0) {
-		printk(KERN_ERR "Can't register EST table fd01: %d\n", result);
-		return result;
-	}
-	return 0;
-}
-module_init(i1480_est_init);
-
-static void __exit i1480_est_exit(void)
-{
-	uwb_est_unregister(i1480_CET_VS1, 0x00, 0x8086, 0x0c3b,
-			   i1480_est_fd00, ARRAY_SIZE(i1480_est_fd00));
-	uwb_est_unregister(i1480_CET_VS1, 0x01, 0x8086, 0x0c3b,
-			   i1480_est_fd01, ARRAY_SIZE(i1480_est_fd01));
-}
-module_exit(i1480_est_exit);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("i1480's Vendor Specific Event Size Tables");
-MODULE_LICENSE("GPL");
-
-/**
- * USB device ID's that we handle
- *
- * [so we are loaded when this kind device is connected]
- */
-static struct usb_device_id __used i1480_est_id_table[] = {
-	{ USB_DEVICE(0x8086, 0xdf3b), },
-	{ USB_DEVICE(0x8086, 0x0c3b), },
-	{ },
-};
-MODULE_DEVICE_TABLE(usb, i1480_est_id_table);
diff --git a/drivers/uwb/ie-rcv.c b/drivers/uwb/ie-rcv.c
deleted file mode 100644
index 51a4706e0dd3..000000000000
--- a/drivers/uwb/ie-rcv.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * IE Received notification handling.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/bitmap.h>
-#include "uwb-internal.h"
-
-/*
- * Process an incoming IE Received notification.
- */
-int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *evt)
-{
-	int result = -EINVAL;
-	struct device *dev = &evt->rc->uwb_dev.dev;
-	struct uwb_rc_evt_ie_rcv *iercv;
-
-	/* Is there enough data to decode it? */
-	if (evt->notif.size < sizeof(*iercv)) {
-		dev_err(dev, "IE Received notification: Not enough data to "
-			"decode (%zu vs %zu bytes needed)\n",
-			evt->notif.size, sizeof(*iercv));
-		goto error;
-	}
-	iercv = container_of(evt->notif.rceb, struct uwb_rc_evt_ie_rcv, rceb);
-
-	dev_dbg(dev, "IE received, element ID=%d\n", iercv->IEData[0]);
-
-	if (iercv->IEData[0] == UWB_RELINQUISH_REQUEST_IE) {
-		dev_warn(dev, "unhandled Relinquish Request IE\n");
-	}
-
-	return 0;
-error:
-	return result;
-}
diff --git a/drivers/uwb/ie.c b/drivers/uwb/ie.c
deleted file mode 100644
index b11678dd0380..000000000000
--- a/drivers/uwb/ie.c
+++ /dev/null
@@ -1,366 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Information Element Handling
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Reinette Chatre <reinette.chatre@intel.com>
- *
- * FIXME: docs
- */
-
-#include <linux/slab.h>
-#include <linux/export.h>
-#include "uwb-internal.h"
-
-/**
- * uwb_ie_next - get the next IE in a buffer
- * @ptr: start of the buffer containing the IE data
- * @len: length of the buffer
- *
- * Both @ptr and @len are updated so subsequent calls to uwb_ie_next()
- * will get the next IE.
- *
- * NULL is returned (and @ptr and @len will not be updated) if there
- * are no more IEs in the buffer or the buffer is too short.
- */
-struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len)
-{
-	struct uwb_ie_hdr *hdr;
-	size_t ie_len;
-
-	if (*len < sizeof(struct uwb_ie_hdr))
-		return NULL;
-
-	hdr = *ptr;
-	ie_len = sizeof(struct uwb_ie_hdr) + hdr->length;
-
-	if (*len < ie_len)
-		return NULL;
-
-	*ptr += ie_len;
-	*len -= ie_len;
-
-	return hdr;
-}
-EXPORT_SYMBOL_GPL(uwb_ie_next);
-
-/**
- * uwb_ie_dump_hex - print IEs to a character buffer
- * @ies: the IEs to print.
- * @len: length of all the IEs.
- * @buf: the destination buffer.
- * @size: size of @buf.
- *
- * Returns the number of characters written.
- */
-int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
-		    char *buf, size_t size)
-{
-	void *ptr;
-	const struct uwb_ie_hdr *ie;
-	int r = 0;
-	u8 *d;
-
-	ptr = (void *)ies;
-	for (;;) {
-		ie = uwb_ie_next(&ptr, &len);
-		if (!ie)
-			break;
-
-		r += scnprintf(buf + r, size - r, "%02x %02x",
-			       (unsigned)ie->element_id,
-			       (unsigned)ie->length);
-		d = (uint8_t *)ie + sizeof(struct uwb_ie_hdr);
-		while (d != ptr && r < size)
-			r += scnprintf(buf + r, size - r, " %02x", (unsigned)*d++);
-		if (r < size)
-			buf[r++] = '\n';
-	};
-
-	return r;
-}
-
-/**
- * Get the IEs that a radio controller is sending in its beacon
- *
- * @uwb_rc:  UWB Radio Controller
- * @returns: Size read from the system
- *
- * We don't need to lock the uwb_rc's mutex because we don't modify
- * anything. Once done with the iedata buffer, call
- * uwb_rc_ie_release(iedata). Don't call kfree on it.
- */
-static
-ssize_t uwb_rc_get_ie(struct uwb_rc *uwb_rc, struct uwb_rc_evt_get_ie **pget_ie)
-{
-	ssize_t result;
-	struct device *dev = &uwb_rc->uwb_dev.dev;
-	struct uwb_rccb *cmd = NULL;
-	struct uwb_rceb *reply = NULL;
-	struct uwb_rc_evt_get_ie *get_ie;
-
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		return -ENOMEM;
-
-	cmd->bCommandType = UWB_RC_CET_GENERAL;
-	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_GET_IE);
-	result = uwb_rc_vcmd(uwb_rc, "GET_IE", cmd, sizeof(*cmd),
-			     UWB_RC_CET_GENERAL, UWB_RC_CMD_GET_IE,
-			     &reply);
-	kfree(cmd);
-	if (result < 0)
-		return result;
-
-	get_ie = container_of(reply, struct uwb_rc_evt_get_ie, rceb);
-	if (result < sizeof(*get_ie)) {
-		dev_err(dev, "not enough data returned for decoding GET IE "
-			"(%zu bytes received vs %zu needed)\n",
-			result, sizeof(*get_ie));
-		return -EINVAL;
-	} else if (result < sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength)) {
-		dev_err(dev, "not enough data returned for decoding GET IE "
-			"payload (%zu bytes received vs %zu needed)\n", result,
-			sizeof(*get_ie) + le16_to_cpu(get_ie->wIELength));
-		return -EINVAL;
-	}
-
-	*pget_ie = get_ie;
-	return result;
-}
-
-
-/**
- * Replace all IEs currently being transmitted by a device
- *
- * @cmd:    pointer to the SET-IE command with the IEs to set
- * @size:   size of @buf
- */
-int uwb_rc_set_ie(struct uwb_rc *rc, struct uwb_rc_cmd_set_ie *cmd)
-{
-	int result;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_evt_set_ie reply;
-
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_SET_IE;
-	result = uwb_rc_cmd(rc, "SET-IE", &cmd->rccb,
-			    sizeof(*cmd) + le16_to_cpu(cmd->wIELength),
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	else if (result != sizeof(reply)) {
-		dev_err(dev, "SET-IE: not enough data to decode reply "
-			"(%d bytes received vs %zu needed)\n",
-			result, sizeof(reply));
-		result = -EIO;
-	} else if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(dev, "SET-IE: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
-		result = -EIO;
-	} else
-		result = 0;
-error_cmd:
-	return result;
-}
-
-/* Cleanup the whole IE management subsystem */
-void uwb_rc_ie_init(struct uwb_rc *uwb_rc)
-{
-	mutex_init(&uwb_rc->ies_mutex);
-}
-
-
-/**
- * uwb_rc_ie_setup - setup a radio controller's IE manager
- * @uwb_rc: the radio controller.
- *
- * The current set of IEs are obtained from the hardware with a GET-IE
- * command (since the radio controller is not yet beaconing this will
- * be just the hardware's MAC and PHY Capability IEs).
- *
- * Returns 0 on success; -ve on an error.
- */
-int uwb_rc_ie_setup(struct uwb_rc *uwb_rc)
-{
-	struct uwb_rc_evt_get_ie *ie_info = NULL;
-	int capacity;
-
-	capacity = uwb_rc_get_ie(uwb_rc, &ie_info);
-	if (capacity < 0)
-		return capacity;
-
-	mutex_lock(&uwb_rc->ies_mutex);
-
-	uwb_rc->ies = (struct uwb_rc_cmd_set_ie *)ie_info;
-	uwb_rc->ies->rccb.bCommandType = UWB_RC_CET_GENERAL;
-	uwb_rc->ies->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SET_IE);
-	uwb_rc->ies_capacity = capacity;
-
-	mutex_unlock(&uwb_rc->ies_mutex);
-
-	return 0;
-}
-
-
-/* Cleanup the whole IE management subsystem */
-void uwb_rc_ie_release(struct uwb_rc *uwb_rc)
-{
-	kfree(uwb_rc->ies);
-	uwb_rc->ies = NULL;
-	uwb_rc->ies_capacity = 0;
-}
-
-
-static int uwb_rc_ie_add_one(struct uwb_rc *rc, const struct uwb_ie_hdr *new_ie)
-{
-	struct uwb_rc_cmd_set_ie *new_ies;
-	void *ptr, *prev_ie;
-	struct uwb_ie_hdr *ie;
-	size_t length, new_ie_len, new_capacity, size, prev_size;
-
-	length = le16_to_cpu(rc->ies->wIELength);
-	new_ie_len = sizeof(struct uwb_ie_hdr) + new_ie->length;
-	new_capacity = sizeof(struct uwb_rc_cmd_set_ie) + length + new_ie_len;
-
-	if (new_capacity > rc->ies_capacity) {
-		new_ies = krealloc(rc->ies, new_capacity, GFP_KERNEL);
-		if (!new_ies)
-			return -ENOMEM;
-		rc->ies = new_ies;
-	}
-
-	ptr = rc->ies->IEData;
-	size = length;
-	for (;;) {
-		prev_ie = ptr;
-		prev_size = size;
-		ie = uwb_ie_next(&ptr, &size);
-		if (!ie || ie->element_id > new_ie->element_id)
-			break;
-	}
-
-	memmove(prev_ie + new_ie_len, prev_ie, prev_size);
-	memcpy(prev_ie, new_ie, new_ie_len);
-	rc->ies->wIELength = cpu_to_le16(length + new_ie_len);
-
-	return 0;
-}
-
-/**
- * uwb_rc_ie_add - add new IEs to the radio controller's beacon
- * @uwb_rc: the radio controller.
- * @ies: the buffer containing the new IE or IEs to be added to
- *       the device's beacon.
- * @size: length of all the IEs.
- *
- * According to WHCI 0.95 [4.13.6] the driver will only receive the RCEB
- * after the device sent the first beacon that includes the IEs specified
- * in the SET IE command. We thus cannot send this command if the device is
- * not beaconing. Instead, a SET IE command will be sent later right after
- * we start beaconing.
- *
- * Setting an IE on the device will overwrite all current IEs in device. So
- * we take the current IEs being transmitted by the device, insert the
- * new one, and call SET IE with all the IEs needed.
- *
- * Returns 0 on success; or -ENOMEM.
- */
-int uwb_rc_ie_add(struct uwb_rc *uwb_rc,
-		  const struct uwb_ie_hdr *ies, size_t size)
-{
-	int result = 0;
-	void *ptr;
-	const struct uwb_ie_hdr *ie;
-
-	mutex_lock(&uwb_rc->ies_mutex);
-
-	ptr = (void *)ies;
-	for (;;) {
-		ie = uwb_ie_next(&ptr, &size);
-		if (!ie)
-			break;
-
-		result = uwb_rc_ie_add_one(uwb_rc, ie);
-		if (result < 0)
-			break;
-	}
-	if (result >= 0) {
-		if (size == 0) {
-			if (uwb_rc->beaconing != -1)
-				result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-		} else
-			result = -EINVAL;
-	}
-
-	mutex_unlock(&uwb_rc->ies_mutex);
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_ie_add);
-
-
-/*
- * Remove an IE from internal cache
- *
- * We are dealing with our internal IE cache so no need to verify that the
- * IEs are valid (it has been done already).
- *
- * Should be called with ies_mutex held
- *
- * We do not break out once an IE is found in the cache. It is currently
- * possible to have more than one IE with the same ID included in the
- * beacon. We don't reallocate, we just mark the size smaller.
- */
-static
-void uwb_rc_ie_cache_rm(struct uwb_rc *uwb_rc, enum uwb_ie to_remove)
-{
-	struct uwb_ie_hdr *ie;
-	size_t len = le16_to_cpu(uwb_rc->ies->wIELength);
-	void *ptr;
-	size_t size;
-
-	ptr = uwb_rc->ies->IEData;
-	size = len;
-	for (;;) {
-		ie = uwb_ie_next(&ptr, &size);
-		if (!ie)
-			break;
-		if (ie->element_id == to_remove) {
-			len -= sizeof(struct uwb_ie_hdr) + ie->length;
-			memmove(ie, ptr, size);
-			ptr = ie;
-		}
-	}
-	uwb_rc->ies->wIELength = cpu_to_le16(len);
-}
-
-
-/**
- * uwb_rc_ie_rm - remove an IE from the radio controller's beacon
- * @uwb_rc: the radio controller.
- * @element_id: the element ID of the IE to remove.
- *
- * Only IEs previously added with uwb_rc_ie_add() may be removed.
- *
- * Returns 0 on success; or -ve the SET-IE command to the radio
- * controller failed.
- */
-int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id)
-{
-	int result = 0;
-
-	mutex_lock(&uwb_rc->ies_mutex);
-
-	uwb_rc_ie_cache_rm(uwb_rc, element_id);
-
-	if (uwb_rc->beaconing != -1)
-		result = uwb_rc_set_ie(uwb_rc, uwb_rc->ies);
-
-	mutex_unlock(&uwb_rc->ies_mutex);
-
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_ie_rm);
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
deleted file mode 100644
index 3e5c07fd6b10..000000000000
--- a/drivers/uwb/lc-dev.c
+++ /dev/null
@@ -1,457 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Life cycle of devices
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/export.h>
-#include <linux/err.h>
-#include <linux/kdev_t.h>
-#include <linux/random.h>
-#include <linux/stat.h>
-#include "uwb-internal.h"
-
-/* We initialize addresses to 0xff (invalid, as it is bcast) */
-static inline void uwb_dev_addr_init(struct uwb_dev_addr *addr)
-{
-	memset(&addr->data, 0xff, sizeof(addr->data));
-}
-
-static inline void uwb_mac_addr_init(struct uwb_mac_addr *addr)
-{
-	memset(&addr->data, 0xff, sizeof(addr->data));
-}
-
-/*
- * Add callback @new to be called when an event occurs in @rc.
- */
-int uwb_notifs_register(struct uwb_rc *rc, struct uwb_notifs_handler *new)
-{
-	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
-		return -ERESTARTSYS;
-	list_add(&new->list_node, &rc->notifs_chain.list);
-	mutex_unlock(&rc->notifs_chain.mutex);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_notifs_register);
-
-/*
- * Remove event handler (callback)
- */
-int uwb_notifs_deregister(struct uwb_rc *rc, struct uwb_notifs_handler *entry)
-{
-	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
-		return -ERESTARTSYS;
-	list_del(&entry->list_node);
-	mutex_unlock(&rc->notifs_chain.mutex);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_notifs_deregister);
-
-/*
- * Notify all event handlers of a given event on @rc
- *
- * We are called with a valid reference to the device, or NULL if the
- * event is not for a particular event (e.g., a BG join event).
- */
-void uwb_notify(struct uwb_rc *rc, struct uwb_dev *uwb_dev, enum uwb_notifs event)
-{
-	struct uwb_notifs_handler *handler;
-	if (mutex_lock_interruptible(&rc->notifs_chain.mutex))
-		return;
-	if (!list_empty(&rc->notifs_chain.list)) {
-		list_for_each_entry(handler, &rc->notifs_chain.list, list_node) {
-			handler->cb(handler->data, uwb_dev, event);
-		}
-	}
-	mutex_unlock(&rc->notifs_chain.mutex);
-}
-
-/*
- * Release the backing device of a uwb_dev that has been dynamically allocated.
- */
-static void uwb_dev_sys_release(struct device *dev)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-
-	uwb_bce_put(uwb_dev->bce);
-	memset(uwb_dev, 0x69, sizeof(*uwb_dev));
-	kfree(uwb_dev);
-}
-
-/*
- * Initialize a UWB device instance
- *
- * Alloc, zero and call this function.
- */
-void uwb_dev_init(struct uwb_dev *uwb_dev)
-{
-	mutex_init(&uwb_dev->mutex);
-	device_initialize(&uwb_dev->dev);
-	uwb_dev->dev.release = uwb_dev_sys_release;
-	uwb_dev_addr_init(&uwb_dev->dev_addr);
-	uwb_mac_addr_init(&uwb_dev->mac_addr);
-	bitmap_fill(uwb_dev->streams, UWB_NUM_GLOBAL_STREAMS);
-}
-
-static ssize_t uwb_dev_EUI_48_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	char addr[UWB_ADDR_STRSIZE];
-
-	uwb_mac_addr_print(addr, sizeof(addr), &uwb_dev->mac_addr);
-	return sprintf(buf, "%s\n", addr);
-}
-static DEVICE_ATTR(EUI_48, S_IRUGO, uwb_dev_EUI_48_show, NULL);
-
-static ssize_t uwb_dev_DevAddr_show(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	char addr[UWB_ADDR_STRSIZE];
-
-	uwb_dev_addr_print(addr, sizeof(addr), &uwb_dev->dev_addr);
-	return sprintf(buf, "%s\n", addr);
-}
-static DEVICE_ATTR(DevAddr, S_IRUGO, uwb_dev_DevAddr_show, NULL);
-
-/*
- * Show the BPST of this device.
- *
- * Calculated from the receive time of the device's beacon and it's
- * slot number.
- */
-static ssize_t uwb_dev_BPST_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_beca_e *bce;
-	struct uwb_beacon_frame *bf;
-	u16 bpst;
-
-	bce = uwb_dev->bce;
-	mutex_lock(&bce->mutex);
-	bf = (struct uwb_beacon_frame *)bce->be->BeaconInfo;
-	bpst = bce->be->wBPSTOffset
-		- (u16)(bf->Beacon_Slot_Number * UWB_BEACON_SLOT_LENGTH_US);
-	mutex_unlock(&bce->mutex);
-
-	return sprintf(buf, "%d\n", bpst);
-}
-static DEVICE_ATTR(BPST, S_IRUGO, uwb_dev_BPST_show, NULL);
-
-/*
- * Show the IEs a device is beaconing
- *
- * We need to access the beacon cache, so we just lock it really
- * quick, print the IEs and unlock.
- *
- * We have a reference on the cache entry, so that should be
- * quite safe.
- */
-static ssize_t uwb_dev_IEs_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-
-	return uwb_bce_print_IEs(uwb_dev, uwb_dev->bce, buf, PAGE_SIZE);
-}
-static DEVICE_ATTR(IEs, S_IRUGO | S_IWUSR, uwb_dev_IEs_show, NULL);
-
-static ssize_t uwb_dev_LQE_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_beca_e *bce = uwb_dev->bce;
-	size_t result;
-
-	mutex_lock(&bce->mutex);
-	result = stats_show(&uwb_dev->bce->lqe_stats, buf);
-	mutex_unlock(&bce->mutex);
-	return result;
-}
-
-static ssize_t uwb_dev_LQE_store(struct device *dev,
-				 struct device_attribute *attr,
-				 const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_beca_e *bce = uwb_dev->bce;
-	ssize_t result;
-
-	mutex_lock(&bce->mutex);
-	result = stats_store(&uwb_dev->bce->lqe_stats, buf, size);
-	mutex_unlock(&bce->mutex);
-	return result;
-}
-static DEVICE_ATTR(LQE, S_IRUGO | S_IWUSR, uwb_dev_LQE_show, uwb_dev_LQE_store);
-
-static ssize_t uwb_dev_RSSI_show(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_beca_e *bce = uwb_dev->bce;
-	size_t result;
-
-	mutex_lock(&bce->mutex);
-	result = stats_show(&uwb_dev->bce->rssi_stats, buf);
-	mutex_unlock(&bce->mutex);
-	return result;
-}
-
-static ssize_t uwb_dev_RSSI_store(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_beca_e *bce = uwb_dev->bce;
-	ssize_t result;
-
-	mutex_lock(&bce->mutex);
-	result = stats_store(&uwb_dev->bce->rssi_stats, buf, size);
-	mutex_unlock(&bce->mutex);
-	return result;
-}
-static DEVICE_ATTR(RSSI, S_IRUGO | S_IWUSR, uwb_dev_RSSI_show, uwb_dev_RSSI_store);
-
-
-static struct attribute *uwb_dev_attrs[] = {
-	&dev_attr_EUI_48.attr,
-	&dev_attr_DevAddr.attr,
-	&dev_attr_BPST.attr,
-	&dev_attr_IEs.attr,
-	&dev_attr_LQE.attr,
-	&dev_attr_RSSI.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(uwb_dev);
-
-/* UWB bus type. */
-struct bus_type uwb_bus_type = {
-	.name =		"uwb",
-	.dev_groups =	uwb_dev_groups,
-};
-
-/**
- * Device SYSFS registration
- */
-static int __uwb_dev_sys_add(struct uwb_dev *uwb_dev, struct device *parent_dev)
-{
-	struct device *dev;
-
-	dev = &uwb_dev->dev;
-	dev->parent = parent_dev;
-	dev_set_drvdata(dev, uwb_dev);
-
-	return device_add(dev);
-}
-
-
-static void __uwb_dev_sys_rm(struct uwb_dev *uwb_dev)
-{
-	dev_set_drvdata(&uwb_dev->dev, NULL);
-	device_del(&uwb_dev->dev);
-}
-
-
-/**
- * Register and initialize a new UWB device
- *
- * Did you call uwb_dev_init() on it?
- *
- * @parent_rc: is the parent radio controller who has the link to the
- *             device. When registering the UWB device that is a UWB
- *             Radio Controller, we point back to it.
- *
- * If registering the device that is part of a radio, caller has set
- * rc->uwb_dev->dev. Otherwise it is to be left NULL--a new one will
- * be allocated.
- */
-int uwb_dev_add(struct uwb_dev *uwb_dev, struct device *parent_dev,
-		struct uwb_rc *parent_rc)
-{
-	int result;
-	struct device *dev;
-
-	BUG_ON(uwb_dev == NULL);
-	BUG_ON(parent_dev == NULL);
-	BUG_ON(parent_rc == NULL);
-
-	mutex_lock(&uwb_dev->mutex);
-	dev = &uwb_dev->dev;
-	uwb_dev->rc = parent_rc;
-	result = __uwb_dev_sys_add(uwb_dev, parent_dev);
-	if (result < 0)
-		printk(KERN_ERR "UWB: unable to register dev %s with sysfs: %d\n",
-		       dev_name(dev), result);
-	mutex_unlock(&uwb_dev->mutex);
-	return result;
-}
-
-
-void uwb_dev_rm(struct uwb_dev *uwb_dev)
-{
-	mutex_lock(&uwb_dev->mutex);
-	__uwb_dev_sys_rm(uwb_dev);
-	mutex_unlock(&uwb_dev->mutex);
-}
-
-
-static
-int __uwb_dev_try_get(struct device *dev, void *__target_uwb_dev)
-{
-	struct uwb_dev *target_uwb_dev = __target_uwb_dev;
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	if (uwb_dev == target_uwb_dev) {
-		uwb_dev_get(uwb_dev);
-		return 1;
-	} else
-		return 0;
-}
-
-
-/**
- * Given a UWB device descriptor, validate and refcount it
- *
- * @returns NULL if the device does not exist or is quiescing; the ptr to
- *               it otherwise.
- */
-struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev)
-{
-	if (uwb_dev_for_each(rc, __uwb_dev_try_get, uwb_dev))
-		return uwb_dev;
-	else
-		return NULL;
-}
-EXPORT_SYMBOL_GPL(uwb_dev_try_get);
-
-
-/**
- * Remove a device from the system [grunt for other functions]
- */
-int __uwb_dev_offair(struct uwb_dev *uwb_dev, struct uwb_rc *rc)
-{
-	struct device *dev = &uwb_dev->dev;
-	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
-
-	uwb_mac_addr_print(macbuf, sizeof(macbuf), &uwb_dev->mac_addr);
-	uwb_dev_addr_print(devbuf, sizeof(devbuf), &uwb_dev->dev_addr);
-	dev_info(dev, "uwb device (mac %s dev %s) disconnected from %s %s\n",
-		 macbuf, devbuf,
-		 uwb_dev->dev.bus->name,
-		 rc ? dev_name(&(rc->uwb_dev.dev)) : "");
-	uwb_dev_rm(uwb_dev);
-	list_del(&uwb_dev->bce->node);
-	uwb_bce_put(uwb_dev->bce);
-	uwb_dev_put(uwb_dev);	/* for the creation in _onair() */
-
-	return 0;
-}
-
-
-/**
- * A device went off the air, clean up after it!
- *
- * This is called by the UWB Daemon (through the beacon purge function
- * uwb_bcn_cache_purge) when it is detected that a device has been in
- * radio silence for a while.
- *
- * If this device is actually a local radio controller we don't need
- * to go through the offair process, as it is not registered as that.
- *
- * NOTE: uwb_bcn_cache.mutex is held!
- */
-void uwbd_dev_offair(struct uwb_beca_e *bce)
-{
-	struct uwb_dev *uwb_dev;
-
-	uwb_dev = bce->uwb_dev;
-	if (uwb_dev) {
-		uwb_notify(uwb_dev->rc, uwb_dev, UWB_NOTIF_OFFAIR);
-		__uwb_dev_offair(uwb_dev, uwb_dev->rc);
-	}
-}
-
-
-/**
- * A device went on the air, start it up!
- *
- * This is called by the UWB Daemon when it is detected that a device
- * has popped up in the radio range of the radio controller.
- *
- * It will just create the freaking device, register the beacon and
- * stuff and yatla, done.
- *
- *
- * NOTE: uwb_beca.mutex is held, bce->mutex is held
- */
-void uwbd_dev_onair(struct uwb_rc *rc, struct uwb_beca_e *bce)
-{
-	int result;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_dev *uwb_dev;
-	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
-
-	uwb_mac_addr_print(macbuf, sizeof(macbuf), bce->mac_addr);
-	uwb_dev_addr_print(devbuf, sizeof(devbuf), &bce->dev_addr);
-	uwb_dev = kzalloc(sizeof(struct uwb_dev), GFP_KERNEL);
-	if (uwb_dev == NULL) {
-		dev_err(dev, "new device %s: Cannot allocate memory\n",
-			macbuf);
-		return;
-	}
-	uwb_dev_init(uwb_dev);		/* This sets refcnt to one, we own it */
-	uwb_dev->dev.bus = &uwb_bus_type;
-	uwb_dev->mac_addr = *bce->mac_addr;
-	uwb_dev->dev_addr = bce->dev_addr;
-	dev_set_name(&uwb_dev->dev, "%s", macbuf);
-
-	/* plug the beacon cache */
-	bce->uwb_dev = uwb_dev;
-	uwb_dev->bce = bce;
-	uwb_bce_get(bce);		/* released in uwb_dev_sys_release() */
-
-	result = uwb_dev_add(uwb_dev, &rc->uwb_dev.dev, rc);
-	if (result < 0) {
-		dev_err(dev, "new device %s: cannot instantiate device\n",
-			macbuf);
-		goto error_dev_add;
-	}
-
-	dev_info(dev, "uwb device (mac %s dev %s) connected to %s %s\n",
-		 macbuf, devbuf, uwb_dev->dev.bus->name,
-		 dev_name(&(rc->uwb_dev.dev)));
-	uwb_notify(rc, uwb_dev, UWB_NOTIF_ONAIR);
-	return;
-
-error_dev_add:
-	bce->uwb_dev = NULL;
-	uwb_bce_put(bce);
-	kfree(uwb_dev);
-	return;
-}
-
-/**
- * Iterate over the list of UWB devices, calling a @function on each
- *
- * See docs for bus_for_each()....
- *
- * @rc:       radio controller for the devices.
- * @function: function to call.
- * @priv:     data to pass to @function.
- * @returns:  0 if no invocation of function() returned a value
- *            different to zero. That value otherwise.
- */
-int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f function, void *priv)
-{
-	return device_for_each_child(&rc->uwb_dev.dev, priv, function);
-}
-EXPORT_SYMBOL_GPL(uwb_dev_for_each);
diff --git a/drivers/uwb/lc-rc.c b/drivers/uwb/lc-rc.c
deleted file mode 100644
index ee31b221cdc2..000000000000
--- a/drivers/uwb/lc-rc.c
+++ /dev/null
@@ -1,569 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Life cycle of radio controllers
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- *
- * A UWB radio controller is also a UWB device, so it embeds one...
- *
- * List of RCs comes from the 'struct class uwb_rc_class'.
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/random.h>
-#include <linux/kdev_t.h>
-#include <linux/etherdevice.h>
-#include <linux/usb.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-static int uwb_rc_index_match(struct device *dev, const void *data)
-{
-	const int *index = data;
-	struct uwb_rc *rc = dev_get_drvdata(dev);
-
-	if (rc->index == *index)
-		return 1;
-	return 0;
-}
-
-static struct uwb_rc *uwb_rc_find_by_index(int index)
-{
-	struct device *dev;
-	struct uwb_rc *rc = NULL;
-
-	dev = class_find_device(&uwb_rc_class, NULL, &index, uwb_rc_index_match);
-	if (dev) {
-		rc = dev_get_drvdata(dev);
-		put_device(dev);
-	}
-
-	return rc;
-}
-
-static int uwb_rc_new_index(void)
-{
-	int index = 0;
-
-	for (;;) {
-		if (!uwb_rc_find_by_index(index))
-			return index;
-		if (++index < 0)
-			index = 0;
-	}
-}
-
-/**
- * Release the backing device of a uwb_rc that has been dynamically allocated.
- */
-static void uwb_rc_sys_release(struct device *dev)
-{
-	struct uwb_dev *uwb_dev = container_of(dev, struct uwb_dev, dev);
-	struct uwb_rc *rc = container_of(uwb_dev, struct uwb_rc, uwb_dev);
-
-	uwb_rc_ie_release(rc);
-	kfree(rc);
-}
-
-
-void uwb_rc_init(struct uwb_rc *rc)
-{
-	struct uwb_dev *uwb_dev = &rc->uwb_dev;
-
-	uwb_dev_init(uwb_dev);
-	rc->uwb_dev.dev.class = &uwb_rc_class;
-	rc->uwb_dev.dev.release = uwb_rc_sys_release;
-	uwb_rc_neh_create(rc);
-	rc->beaconing = -1;
-	rc->scan_type = UWB_SCAN_DISABLED;
-	INIT_LIST_HEAD(&rc->notifs_chain.list);
-	mutex_init(&rc->notifs_chain.mutex);
-	INIT_LIST_HEAD(&rc->uwb_beca.list);
-	mutex_init(&rc->uwb_beca.mutex);
-	uwb_drp_avail_init(rc);
-	uwb_rc_ie_init(rc);
-	uwb_rsv_init(rc);
-	uwb_rc_pal_init(rc);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_init);
-
-
-struct uwb_rc *uwb_rc_alloc(void)
-{
-	struct uwb_rc *rc;
-	rc = kzalloc(sizeof(*rc), GFP_KERNEL);
-	if (rc == NULL)
-		return NULL;
-	uwb_rc_init(rc);
-	return rc;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_alloc);
-
-/*
- * Show the ASIE that is broadcast in the UWB beacon by this uwb_rc device.
- */
-static ssize_t ASIE_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	struct uwb_ie_hdr *ie;
-	void *ptr;
-	size_t len;
-	int result = 0;
-
-	/* init empty buffer. */
-	result = scnprintf(buf, PAGE_SIZE, "\n");
-	mutex_lock(&rc->ies_mutex);
-	/* walk IEData looking for an ASIE. */
-	ptr = rc->ies->IEData;
-	len = le16_to_cpu(rc->ies->wIELength);
-	for (;;) {
-		ie = uwb_ie_next(&ptr, &len);
-		if (!ie)
-			break;
-		if (ie->element_id == UWB_APP_SPEC_IE) {
-			result = uwb_ie_dump_hex(ie,
-					ie->length + sizeof(struct uwb_ie_hdr),
-					buf, PAGE_SIZE);
-			break;
-		}
-	}
-	mutex_unlock(&rc->ies_mutex);
-
-	return result;
-}
-
-/*
- * Update the ASIE that is broadcast in the UWB beacon by this uwb_rc device.
- */
-static ssize_t ASIE_store(struct device *dev,
-				 struct device_attribute *attr,
-				 const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	char ie_buf[255];
-	int result, ie_len = 0;
-	const char *cur_ptr = buf;
-	struct uwb_ie_hdr *ie;
-
-	/* empty string means clear the ASIE. */
-	if (strlen(buf) <= 1) {
-		uwb_rc_ie_rm(rc, UWB_APP_SPEC_IE);
-		return size;
-	}
-
-	/* if non-empty string, convert string of hex chars to binary. */
-	while (ie_len < sizeof(ie_buf)) {
-		int char_count;
-
-		if (sscanf(cur_ptr, " %02hhX %n",
-				&(ie_buf[ie_len]), &char_count) > 0) {
-			++ie_len;
-			/* skip chars read from cur_ptr. */
-			cur_ptr += char_count;
-		} else {
-			break;
-		}
-	}
-
-	/* validate IE length and type. */
-	if (ie_len < sizeof(struct uwb_ie_hdr)) {
-		dev_err(dev, "%s: Invalid ASIE size %d.\n", __func__, ie_len);
-		return -EINVAL;
-	}
-
-	ie = (struct uwb_ie_hdr *)ie_buf;
-	if (ie->element_id != UWB_APP_SPEC_IE) {
-		dev_err(dev, "%s: Invalid IE element type size = 0x%02X.\n",
-				__func__, ie->element_id);
-		return -EINVAL;
-	}
-
-	/* bounds check length field from user. */
-	if (ie->length > (ie_len - sizeof(struct uwb_ie_hdr)))
-		ie->length = ie_len - sizeof(struct uwb_ie_hdr);
-
-	/*
-	 * Valid ASIE received. Remove current ASIE then add the new one using
-	 * uwb_rc_ie_add.
-	 */
-	uwb_rc_ie_rm(rc, UWB_APP_SPEC_IE);
-
-	result = uwb_rc_ie_add(rc, ie, ie->length + sizeof(struct uwb_ie_hdr));
-
-	return result >= 0 ? size : result;
-}
-static DEVICE_ATTR_RW(ASIE);
-
-static struct attribute *rc_attrs[] = {
-		&dev_attr_mac_address.attr,
-		&dev_attr_scan.attr,
-		&dev_attr_beacon.attr,
-		&dev_attr_ASIE.attr,
-		NULL,
-};
-
-static const struct attribute_group rc_attr_group = {
-	.attrs = rc_attrs,
-};
-
-/*
- * Registration of sysfs specific stuff
- */
-static int uwb_rc_sys_add(struct uwb_rc *rc)
-{
-	return sysfs_create_group(&rc->uwb_dev.dev.kobj, &rc_attr_group);
-}
-
-
-static void __uwb_rc_sys_rm(struct uwb_rc *rc)
-{
-	sysfs_remove_group(&rc->uwb_dev.dev.kobj, &rc_attr_group);
-}
-
-/**
- * uwb_rc_mac_addr_setup - get an RC's EUI-48 address or set it
- * @rc:  the radio controller.
- *
- * If the EUI-48 address is 00:00:00:00:00:00 or FF:FF:FF:FF:FF:FF
- * then a random locally administered EUI-48 is generated and set on
- * the device.  The probability of address collisions is sufficiently
- * unlikely (1/2^40 = 9.1e-13) that they're not checked for.
- */
-static
-int uwb_rc_mac_addr_setup(struct uwb_rc *rc)
-{
-	int result;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_dev *uwb_dev = &rc->uwb_dev;
-	char devname[UWB_ADDR_STRSIZE];
-	struct uwb_mac_addr addr;
-
-	result = uwb_rc_mac_addr_get(rc, &addr);
-	if (result < 0) {
-		dev_err(dev, "cannot retrieve UWB EUI-48 address: %d\n", result);
-		return result;
-	}
-
-	if (uwb_mac_addr_unset(&addr) || uwb_mac_addr_bcast(&addr)) {
-		addr.data[0] = 0x02; /* locally administered and unicast */
-		get_random_bytes(&addr.data[1], sizeof(addr.data)-1);
-
-		result = uwb_rc_mac_addr_set(rc, &addr);
-		if (result < 0) {
-			uwb_mac_addr_print(devname, sizeof(devname), &addr);
-			dev_err(dev, "cannot set EUI-48 address %s: %d\n",
-				devname, result);
-			return result;
-		}
-	}
-	uwb_dev->mac_addr = addr;
-	return 0;
-}
-
-
-
-static int uwb_rc_setup(struct uwb_rc *rc)
-{
-	int result;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	result = uwb_radio_setup(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot setup UWB radio: %d\n", result);
-		goto error;
-	}
-	result = uwb_rc_mac_addr_setup(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot setup UWB MAC address: %d\n", result);
-		goto error;
-	}
-	result = uwb_rc_dev_addr_assign(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot assign UWB DevAddr: %d\n", result);
-		goto error;
-	}
-	result = uwb_rc_ie_setup(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot setup IE subsystem: %d\n", result);
-		goto error_ie_setup;
-	}
-	result = uwb_rsv_setup(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot setup reservation subsystem: %d\n", result);
-		goto error_rsv_setup;
-	}
-	uwb_dbg_add_rc(rc);
-	return 0;
-
-error_rsv_setup:
-	uwb_rc_ie_release(rc);
-error_ie_setup:
-error:
-	return result;
-}
-
-
-/**
- * Register a new UWB radio controller
- *
- * Did you call uwb_rc_init() on your rc?
- *
- * We assume that this is being called with a > 0 refcount on
- * it [through ops->{get|put}_device(). We'll take our own, though.
- *
- * @parent_dev is our real device, the one that provides the actual UWB device
- */
-int uwb_rc_add(struct uwb_rc *rc, struct device *parent_dev, void *priv)
-{
-	int result;
-	struct device *dev;
-	char macbuf[UWB_ADDR_STRSIZE], devbuf[UWB_ADDR_STRSIZE];
-
-	rc->index = uwb_rc_new_index();
-
-	dev = &rc->uwb_dev.dev;
-	dev_set_name(dev, "uwb%d", rc->index);
-
-	rc->priv = priv;
-
-	init_waitqueue_head(&rc->uwbd.wq);
-	INIT_LIST_HEAD(&rc->uwbd.event_list);
-	spin_lock_init(&rc->uwbd.event_list_lock);
-
-	uwbd_start(rc);
-
-	result = rc->start(rc);
-	if (result < 0)
-		goto error_rc_start;
-
-	result = uwb_rc_setup(rc);
-	if (result < 0) {
-		dev_err(dev, "cannot setup UWB radio controller: %d\n", result);
-		goto error_rc_setup;
-	}
-
-	result = uwb_dev_add(&rc->uwb_dev, parent_dev, rc);
-	if (result < 0 && result != -EADDRNOTAVAIL)
-		goto error_dev_add;
-
-	result = uwb_rc_sys_add(rc);
-	if (result < 0) {
-		dev_err(parent_dev, "cannot register UWB radio controller "
-			"dev attributes: %d\n", result);
-		goto error_sys_add;
-	}
-
-	uwb_mac_addr_print(macbuf, sizeof(macbuf), &rc->uwb_dev.mac_addr);
-	uwb_dev_addr_print(devbuf, sizeof(devbuf), &rc->uwb_dev.dev_addr);
-	dev_info(dev,
-		 "new uwb radio controller (mac %s dev %s) on %s %s\n",
-		 macbuf, devbuf, parent_dev->bus->name, dev_name(parent_dev));
-	rc->ready = 1;
-	return 0;
-
-error_sys_add:
-	uwb_dev_rm(&rc->uwb_dev);
-error_dev_add:
-error_rc_setup:
-	rc->stop(rc);
-error_rc_start:
-	uwbd_stop(rc);
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_add);
-
-
-static int uwb_dev_offair_helper(struct device *dev, void *priv)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-
-	return __uwb_dev_offair(uwb_dev, uwb_dev->rc);
-}
-
-/*
- * Remove a Radio Controller; stop beaconing/scanning, disconnect all children
- */
-void uwb_rc_rm(struct uwb_rc *rc)
-{
-	rc->ready = 0;
-
-	uwb_dbg_del_rc(rc);
-	uwb_rsv_remove_all(rc);
-	uwb_radio_shutdown(rc);
-
-	rc->stop(rc);
-
-	uwbd_stop(rc);
-	uwb_rc_neh_destroy(rc);
-
-	uwb_dev_lock(&rc->uwb_dev);
-	rc->priv = NULL;
-	rc->cmd = NULL;
-	uwb_dev_unlock(&rc->uwb_dev);
-	mutex_lock(&rc->uwb_beca.mutex);
-	uwb_dev_for_each(rc, uwb_dev_offair_helper, NULL);
-	__uwb_rc_sys_rm(rc);
-	mutex_unlock(&rc->uwb_beca.mutex);
-	uwb_rsv_cleanup(rc);
- 	uwb_beca_release(rc);
-	uwb_dev_rm(&rc->uwb_dev);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_rm);
-
-static int find_rc_try_get(struct device *dev, const void *data)
-{
-	const struct uwb_rc *target_rc = data;
-	struct uwb_rc *rc = dev_get_drvdata(dev);
-
-	if (rc == NULL) {
-		WARN_ON(1);
-		return 0;
-	}
-	if (rc == target_rc) {
-		if (rc->ready == 0)
-			return 0;
-		else
-			return 1;
-	}
-	return 0;
-}
-
-/**
- * Given a radio controller descriptor, validate and refcount it
- *
- * @returns NULL if the rc does not exist or is quiescing; the ptr to
- *               it otherwise.
- */
-struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *target_rc)
-{
-	struct device *dev;
-	struct uwb_rc *rc = NULL;
-
-	dev = class_find_device(&uwb_rc_class, NULL, target_rc,
-				find_rc_try_get);
-	if (dev) {
-		rc = dev_get_drvdata(dev);
-		__uwb_rc_get(rc);
-		put_device(dev);
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(__uwb_rc_try_get);
-
-/*
- * RC get for external refcount acquirers...
- *
- * Increments the refcount of the device and it's backend modules
- */
-static inline struct uwb_rc *uwb_rc_get(struct uwb_rc *rc)
-{
-	if (rc->ready == 0)
-		return NULL;
-	uwb_dev_get(&rc->uwb_dev);
-	return rc;
-}
-
-static int find_rc_grandpa(struct device *dev, const void *data)
-{
-	const struct device *grandpa_dev = data;
-	struct uwb_rc *rc = dev_get_drvdata(dev);
-
-	if (rc->uwb_dev.dev.parent->parent == grandpa_dev) {
-		rc = uwb_rc_get(rc);
-		return 1;
-	}
-	return 0;
-}
-
-/**
- * Locate and refcount a radio controller given a common grand-parent
- *
- * @grandpa_dev  Pointer to the 'grandparent' device structure.
- * @returns NULL If the rc does not exist or is quiescing; the ptr to
- *               it otherwise, properly referenced.
- *
- * The Radio Control interface (or the UWB Radio Controller) is always
- * an interface of a device. The parent is the interface, the
- * grandparent is the device that encapsulates the interface.
- *
- * There is no need to lock around as the "grandpa" would be
- * refcounted by the target, and to remove the referemes, the
- * uwb_rc_class->sem would have to be taken--we hold it, ergo we
- * should be safe.
- */
-struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *grandpa_dev)
-{
-	struct device *dev;
-	struct uwb_rc *rc = NULL;
-
-	dev = class_find_device(&uwb_rc_class, NULL, grandpa_dev,
-				find_rc_grandpa);
-	if (dev) {
-		rc = dev_get_drvdata(dev);
-		put_device(dev);
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_get_by_grandpa);
-
-/**
- * Find a radio controller by device address
- *
- * @returns the pointer to the radio controller, properly referenced
- */
-static int find_rc_dev(struct device *dev, const void *data)
-{
-	const struct uwb_dev_addr *addr = data;
-	struct uwb_rc *rc = dev_get_drvdata(dev);
-
-	if (rc == NULL) {
-		WARN_ON(1);
-		return 0;
-	}
-	if (!uwb_dev_addr_cmp(&rc->uwb_dev.dev_addr, addr)) {
-		rc = uwb_rc_get(rc);
-		return 1;
-	}
-	return 0;
-}
-
-struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *addr)
-{
-	struct device *dev;
-	struct uwb_rc *rc = NULL;
-
-	dev = class_find_device(&uwb_rc_class, NULL, addr, find_rc_dev);
-	if (dev) {
-		rc = dev_get_drvdata(dev);
-		put_device(dev);
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_get_by_dev);
-
-/**
- * Drop a reference on a radio controller
- *
- * This is the version that should be done by entities external to the
- * UWB Radio Control stack (ie: clients of the API).
- */
-void uwb_rc_put(struct uwb_rc *rc)
-{
-	__uwb_rc_put(rc);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_put);
diff --git a/drivers/uwb/neh.c b/drivers/uwb/neh.c
deleted file mode 100644
index 1695584be412..000000000000
--- a/drivers/uwb/neh.c
+++ /dev/null
@@ -1,606 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * WUSB Wire Adapter: Radio Control Interface (WUSB[8])
- * Notification and Event Handling
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * The RC interface of the Host Wire Adapter (USB dongle) or WHCI PCI
- * card delivers a stream of notifications and events to the
- * notification end event endpoint or area. This code takes care of
- * getting a buffer with that data, breaking it up in separate
- * notifications and events and then deliver those.
- *
- * Events are answers to commands and they carry a context ID that
- * associates them to the command. Notifications are that,
- * notifications, they come out of the blue and have a context ID of
- * zero. Think of the context ID kind of like a handler. The
- * uwb_rc_neh_* code deals with managing context IDs.
- *
- * This is why you require a handle to operate on a UWB host. When you
- * open a handle a context ID is assigned to you.
- *
- * So, as it is done is:
- *
- * 1. Add an event handler [uwb_rc_neh_add()] (assigns a ctx id)
- * 2. Issue command [rc->cmd(rc, ...)]
- * 3. Arm the timeout timer [uwb_rc_neh_arm()]
- * 4, Release the reference to the neh [uwb_rc_neh_put()]
- * 5. Wait for the callback
- * 6. Command result (RCEB) is passed to the callback
- *
- * If (2) fails, you should remove the handle [uwb_rc_neh_rm()]
- * instead of arming the timer.
- *
- * Handles are for using in *serialized* code, single thread.
- *
- * When the notification/event comes, the IRQ handler/endpoint
- * callback passes the data read to uwb_rc_neh_grok() which will break
- * it up in a discrete series of events, look up who is listening for
- * them and execute the pertinent callbacks.
- *
- * If the reader detects an error while reading the data stream, call
- * uwb_rc_neh_error().
- *
- * CONSTRAINTS/ASSUMPTIONS:
- *
- * - Most notifications/events are small (less thank .5k), copying
- *   around is ok.
- *
- * - Notifications/events are ALWAYS smaller than PAGE_SIZE
- *
- * - Notifications/events always come in a single piece (ie: a buffer
- *   will always contain entire notifications/events).
- *
- * - we cannot know in advance how long each event is (because they
- *   lack a length field in their header--smart move by the standards
- *   body, btw). So we need a facility to get the event size given the
- *   header. This is what the EST code does (notif/Event Size
- *   Tables), check nest.c--as well, you can associate the size to
- *   the handle [w/ neh->extra_size()].
- *
- * - Most notifications/events are fixed size; only a few are variable
- *   size (NEST takes care of that).
- *
- * - Listeners of events expect them, so they usually provide a
- *   buffer, as they know the size. Listeners to notifications don't,
- *   so we allocate their buffers dynamically.
- */
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-/*
- * UWB Radio Controller Notification/Event Handle
- *
- * Represents an entity waiting for an event coming from the UWB Radio
- * Controller with a given context id (context) and type (evt_type and
- * evt). On reception of the notification/event, the callback (cb) is
- * called with the event.
- *
- * If the timer expires before the event is received, the callback is
- * called with -ETIMEDOUT as the event size.
- */
-struct uwb_rc_neh {
-	struct kref kref;
-
-	struct uwb_rc *rc;
-	u8 evt_type;
-	__le16 evt;
-	u8 context;
-	u8 completed;
-	uwb_rc_cmd_cb_f cb;
-	void *arg;
-
-	struct timer_list timer;
-	struct list_head list_node;
-};
-
-static void uwb_rc_neh_timer(struct timer_list *t);
-
-static void uwb_rc_neh_release(struct kref *kref)
-{
-	struct uwb_rc_neh *neh = container_of(kref, struct uwb_rc_neh, kref);
-
-	kfree(neh);
-}
-
-static void uwb_rc_neh_get(struct uwb_rc_neh *neh)
-{
-	kref_get(&neh->kref);
-}
-
-/**
- * uwb_rc_neh_put - release reference to a neh
- * @neh: the neh
- */
-void uwb_rc_neh_put(struct uwb_rc_neh *neh)
-{
-	kref_put(&neh->kref, uwb_rc_neh_release);
-}
-
-
-/**
- * Assigns @neh a context id from @rc's pool
- *
- * @rc:	    UWB Radio Controller descriptor; @rc->neh_lock taken
- * @neh:    Notification/Event Handle
- * @returns 0 if context id was assigned ok; < 0 errno on error (if
- *	    all the context IDs are taken).
- *
- * (assumes @wa is locked).
- *
- * NOTE: WUSB spec reserves context ids 0x00 for notifications and
- *	 0xff is invalid, so they must not be used. Initialization
- *	 fills up those two in the bitmap so they are not allocated.
- *
- * We spread the allocation around to reduce the possibility of two
- * consecutive opened @neh's getting the same context ID assigned (to
- * avoid surprises with late events that timed out long time ago). So
- * first we search from where @rc->ctx_roll is, if not found, we
- * search from zero.
- */
-static
-int __uwb_rc_ctx_get(struct uwb_rc *rc, struct uwb_rc_neh *neh)
-{
-	int result;
-	result = find_next_zero_bit(rc->ctx_bm, UWB_RC_CTX_MAX,
-				    rc->ctx_roll++);
-	if (result < UWB_RC_CTX_MAX)
-		goto found;
-	result = find_first_zero_bit(rc->ctx_bm, UWB_RC_CTX_MAX);
-	if (result < UWB_RC_CTX_MAX)
-		goto found;
-	return -ENFILE;
-found:
-	set_bit(result, rc->ctx_bm);
-	neh->context = result;
-	return 0;
-}
-
-
-/** Releases @neh's context ID back to @rc (@rc->neh_lock is locked). */
-static
-void __uwb_rc_ctx_put(struct uwb_rc *rc, struct uwb_rc_neh *neh)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	if (neh->context == 0)
-		return;
-	if (test_bit(neh->context, rc->ctx_bm) == 0) {
-		dev_err(dev, "context %u not set in bitmap\n",
-			neh->context);
-		WARN_ON(1);
-	}
-	clear_bit(neh->context, rc->ctx_bm);
-	neh->context = 0;
-}
-
-/**
- * uwb_rc_neh_add - add a neh for a radio controller command
- * @rc:             the radio controller
- * @cmd:            the radio controller command
- * @expected_type:  the type of the expected response event
- * @expected_event: the expected event ID
- * @cb:             callback for when the event is received
- * @arg:            argument for the callback
- *
- * Creates a neh and adds it to the list of those waiting for an
- * event.  A context ID will be assigned to the command.
- */
-struct uwb_rc_neh *uwb_rc_neh_add(struct uwb_rc *rc, struct uwb_rccb *cmd,
-				  u8 expected_type, u16 expected_event,
-				  uwb_rc_cmd_cb_f cb, void *arg)
-{
-	int result;
-	unsigned long flags;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_neh *neh;
-
-	neh = kzalloc(sizeof(*neh), GFP_KERNEL);
-	if (neh == NULL) {
-		result = -ENOMEM;
-		goto error_kzalloc;
-	}
-
-	kref_init(&neh->kref);
-	INIT_LIST_HEAD(&neh->list_node);
-	timer_setup(&neh->timer, uwb_rc_neh_timer, 0);
-
-	neh->rc = rc;
-	neh->evt_type = expected_type;
-	neh->evt = cpu_to_le16(expected_event);
-	neh->cb = cb;
-	neh->arg = arg;
-
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	result = __uwb_rc_ctx_get(rc, neh);
-	if (result >= 0) {
-		cmd->bCommandContext = neh->context;
-		list_add_tail(&neh->list_node, &rc->neh_list);
-		uwb_rc_neh_get(neh);
-	}
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
-	if (result < 0)
-		goto error_ctx_get;
-
-	return neh;
-
-error_ctx_get:
-	kfree(neh);
-error_kzalloc:
-	dev_err(dev, "cannot open handle to radio controller: %d\n", result);
-	return ERR_PTR(result);
-}
-
-static void __uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
-{
-	__uwb_rc_ctx_put(rc, neh);
-	list_del(&neh->list_node);
-}
-
-/**
- * uwb_rc_neh_rm - remove a neh.
- * @rc:  the radio controller
- * @neh: the neh to remove
- *
- * Remove an active neh immediately instead of waiting for the event
- * (or a time out).
- */
-void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	__uwb_rc_neh_rm(rc, neh);
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
-
-	del_timer_sync(&neh->timer);
-	uwb_rc_neh_put(neh);
-}
-
-/**
- * uwb_rc_neh_arm - arm an event handler timeout timer
- *
- * @rc:     UWB Radio Controller
- * @neh:    Notification/event handler for @rc
- *
- * The timer is only armed if the neh is active.
- */
-void uwb_rc_neh_arm(struct uwb_rc *rc, struct uwb_rc_neh *neh)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	if (neh->context)
-		mod_timer(&neh->timer,
-			  jiffies + msecs_to_jiffies(UWB_RC_CMD_TIMEOUT_MS));
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
-}
-
-static void uwb_rc_neh_cb(struct uwb_rc_neh *neh, struct uwb_rceb *rceb, size_t size)
-{
-	(*neh->cb)(neh->rc, neh->arg, rceb, size);
-	uwb_rc_neh_put(neh);
-}
-
-static bool uwb_rc_neh_match(struct uwb_rc_neh *neh, const struct uwb_rceb *rceb)
-{
-	return neh->evt_type == rceb->bEventType
-		&& neh->evt == rceb->wEvent
-		&& neh->context == rceb->bEventContext;
-}
-
-/**
- * Find the handle waiting for a RC Radio Control Event
- *
- * @rc:         UWB Radio Controller
- * @rceb:       Pointer to the RCEB buffer
- * @event_size: Pointer to the size of the RCEB buffer. Might be
- *              adjusted to take into account the @neh->extra_size
- *              settings.
- *
- * If the listener has no buffer (NULL buffer), one is allocated for
- * the right size (the amount of data received). @neh->ptr will point
- * to the event payload, which always starts with a 'struct
- * uwb_rceb'. kfree() it when done.
- */
-static
-struct uwb_rc_neh *uwb_rc_neh_lookup(struct uwb_rc *rc,
-				     const struct uwb_rceb *rceb)
-{
-	struct uwb_rc_neh *neh = NULL, *h;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rc->neh_lock, flags);
-
-	list_for_each_entry(h, &rc->neh_list, list_node) {
-		if (uwb_rc_neh_match(h, rceb)) {
-			neh = h;
-			break;
-		}
-	}
-
-	if (neh)
-		__uwb_rc_neh_rm(rc, neh);
-
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
-
-	return neh;
-}
-
-
-/*
- * Process notifications coming from the radio control interface
- *
- * @rc:    UWB Radio Control Interface descriptor
- * @neh:   Notification/Event Handler @neh->ptr points to
- *         @uwb_evt->buffer.
- *
- * This function is called by the event/notif handling subsystem when
- * notifications arrive (hwarc_probe() arms a notification/event handle
- * that calls back this function for every received notification; this
- * function then will rearm itself).
- *
- * Notification data buffers are dynamically allocated by the NEH
- * handling code in neh.c [uwb_rc_neh_lookup()]. What is actually
- * allocated is space to contain the notification data.
- *
- * Buffers are prefixed with a Radio Control Event Block (RCEB) as
- * defined by the WUSB Wired-Adapter Radio Control interface. We
- * just use it for the notification code.
- *
- * On each case statement we just transcode endianess of the different
- * fields. We declare a pointer to a RCI definition of an event, and
- * then to a UWB definition of the same event (which are the same,
- * remember). Event if we use different pointers
- */
-static
-void uwb_rc_notif(struct uwb_rc *rc, struct uwb_rceb *rceb, ssize_t size)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_event *uwb_evt;
-
-	if (size == -ESHUTDOWN)
-		return;
-	if (size < 0) {
-		dev_err(dev, "ignoring event with error code %zu\n",
-			size);
-		return;
-	}
-
-	uwb_evt = kzalloc(sizeof(*uwb_evt), GFP_ATOMIC);
-	if (unlikely(uwb_evt == NULL)) {
-		dev_err(dev, "no memory to queue event 0x%02x/%04x/%02x\n",
-			rceb->bEventType, le16_to_cpu(rceb->wEvent),
-			rceb->bEventContext);
-		return;
-	}
-	uwb_evt->rc = __uwb_rc_get(rc);	/* will be put by uwbd's uwbd_event_handle() */
-	uwb_evt->ts_jiffies = jiffies;
-	uwb_evt->type = UWB_EVT_TYPE_NOTIF;
-	uwb_evt->notif.size = size;
-	uwb_evt->notif.rceb = rceb;
-
-	uwbd_event_queue(uwb_evt);
-}
-
-static void uwb_rc_neh_grok_event(struct uwb_rc *rc, struct uwb_rceb *rceb, size_t size)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_neh *neh;
-	struct uwb_rceb *notif;
-	unsigned long flags;
-
-	if (rceb->bEventContext == 0) {
-		notif = kmalloc(size, GFP_ATOMIC);
-		if (notif) {
-			memcpy(notif, rceb, size);
-			uwb_rc_notif(rc, notif, size);
-		} else
-			dev_err(dev, "event 0x%02x/%04x/%02x (%zu bytes): no memory\n",
-				rceb->bEventType, le16_to_cpu(rceb->wEvent),
-				rceb->bEventContext, size);
-	} else {
-		neh = uwb_rc_neh_lookup(rc, rceb);
-		if (neh) {
-			spin_lock_irqsave(&rc->neh_lock, flags);
-			/* to guard against a timeout */
-			neh->completed = 1;
-			del_timer(&neh->timer);
-			spin_unlock_irqrestore(&rc->neh_lock, flags);
-			uwb_rc_neh_cb(neh, rceb, size);
-		} else
-			dev_warn(dev, "event 0x%02x/%04x/%02x (%zu bytes): nobody cared\n",
-				 rceb->bEventType, le16_to_cpu(rceb->wEvent),
-				 rceb->bEventContext, size);
-	}
-}
-
-/**
- * Given a buffer with one or more UWB RC events/notifications, break
- * them up and dispatch them.
- *
- * @rc:	      UWB Radio Controller
- * @buf:      Buffer with the stream of notifications/events
- * @buf_size: Amount of data in the buffer
- *
- * Note each notification/event starts always with a 'struct
- * uwb_rceb', so the minimum size if 4 bytes.
- *
- * The device may pass us events formatted differently than expected.
- * These are first filtered, potentially creating a new event in a new
- * memory location. If a new event is created by the filter it is also
- * freed here.
- *
- * For each notif/event, tries to guess the size looking at the EST
- * tables, then looks for a neh that is waiting for that event and if
- * found, copies the payload to the neh's buffer and calls it back. If
- * not, the data is ignored.
- *
- * Note that if we can't find a size description in the EST tables, we
- * still might find a size in the 'neh' handle in uwb_rc_neh_lookup().
- *
- * Assumptions:
- *
- *   @rc->neh_lock is NOT taken
- *
- * We keep track of various sizes here:
- * size:      contains the size of the buffer that is processed for the
- *            incoming event. this buffer may contain events that are not
- *            formatted as WHCI.
- * real_size: the actual space taken by this event in the buffer.
- *            We need to keep track of the real size of an event to be able to
- *            advance the buffer correctly.
- * event_size: the size of the event as expected by the core layer
- *            [OR] the size of the event after filtering. if the filtering
- *            created a new event in a new memory location then this is
- *            effectively the size of a new event buffer
- */
-void uwb_rc_neh_grok(struct uwb_rc *rc, void *buf, size_t buf_size)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	void *itr;
-	struct uwb_rceb *rceb;
-	size_t size, real_size, event_size;
-	int needtofree;
-
-	itr = buf;
-	size = buf_size;
-	while (size > 0) {
-		if (size < sizeof(*rceb)) {
-			dev_err(dev, "not enough data in event buffer to "
-				"process incoming events (%zu left, minimum is "
-				"%zu)\n", size, sizeof(*rceb));
-			break;
-		}
-
-		rceb = itr;
-		if (rc->filter_event) {
-			needtofree = rc->filter_event(rc, &rceb, size,
-						      &real_size, &event_size);
-			if (needtofree < 0 && needtofree != -ENOANO) {
-				dev_err(dev, "BUG: Unable to filter event "
-					"(0x%02x/%04x/%02x) from "
-					"device. \n", rceb->bEventType,
-					le16_to_cpu(rceb->wEvent),
-					rceb->bEventContext);
-				break;
-			}
-		} else
-			needtofree = -ENOANO;
-		/* do real processing if there was no filtering or the
-		 * filtering didn't act */
-		if (needtofree == -ENOANO) {
-			ssize_t ret = uwb_est_find_size(rc, rceb, size);
-			if (ret < 0)
-				break;
-			if (ret > size) {
-				dev_err(dev, "BUG: hw sent incomplete event "
-					"0x%02x/%04x/%02x (%zd bytes), only got "
-					"%zu bytes. We don't handle that.\n",
-					rceb->bEventType, le16_to_cpu(rceb->wEvent),
-					rceb->bEventContext, ret, size);
-				break;
-			}
-			real_size = event_size = ret;
-		}
-		uwb_rc_neh_grok_event(rc, rceb, event_size);
-
-		if (needtofree == 1)
-			kfree(rceb);
-
-		itr += real_size;
-		size -= real_size;
-	}
-}
-EXPORT_SYMBOL_GPL(uwb_rc_neh_grok);
-
-
-/**
- * The entity that reads from the device notification/event channel has
- * detected an error.
- *
- * @rc:    UWB Radio Controller
- * @error: Errno error code
- *
- */
-void uwb_rc_neh_error(struct uwb_rc *rc, int error)
-{
-	struct uwb_rc_neh *neh;
-	unsigned long flags;
-
-	for (;;) {
-		spin_lock_irqsave(&rc->neh_lock, flags);
-		if (list_empty(&rc->neh_list)) {
-			spin_unlock_irqrestore(&rc->neh_lock, flags);
-			break;
-		}
-		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
-		__uwb_rc_neh_rm(rc, neh);
-		spin_unlock_irqrestore(&rc->neh_lock, flags);
-
-		del_timer_sync(&neh->timer);
-		uwb_rc_neh_cb(neh, NULL, error);
-	}
-}
-EXPORT_SYMBOL_GPL(uwb_rc_neh_error);
-
-
-static void uwb_rc_neh_timer(struct timer_list *t)
-{
-	struct uwb_rc_neh *neh = from_timer(neh, t, timer);
-	struct uwb_rc *rc = neh->rc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rc->neh_lock, flags);
-	if (neh->completed) {
-		spin_unlock_irqrestore(&rc->neh_lock, flags);
-		return;
-	}
-	if (neh->context)
-		__uwb_rc_neh_rm(rc, neh);
-	else
-		neh = NULL;
-	spin_unlock_irqrestore(&rc->neh_lock, flags);
-
-	if (neh)
-		uwb_rc_neh_cb(neh, NULL, -ETIMEDOUT);
-}
-
-/** Initializes the @rc's neh subsystem
- */
-void uwb_rc_neh_create(struct uwb_rc *rc)
-{
-	spin_lock_init(&rc->neh_lock);
-	INIT_LIST_HEAD(&rc->neh_list);
-	set_bit(0, rc->ctx_bm);		/* 0 is reserved (see [WUSB] table 8-65) */
-	set_bit(0xff, rc->ctx_bm);	/* and 0xff is invalid */
-	rc->ctx_roll = 1;
-}
-
-
-/** Release's the @rc's neh subsystem */
-void uwb_rc_neh_destroy(struct uwb_rc *rc)
-{
-	unsigned long flags;
-	struct uwb_rc_neh *neh;
-
-	for (;;) {
-		spin_lock_irqsave(&rc->neh_lock, flags);
-		if (list_empty(&rc->neh_list)) {
-			spin_unlock_irqrestore(&rc->neh_lock, flags);
-			break;
-		}
-		neh = list_first_entry(&rc->neh_list, struct uwb_rc_neh, list_node);
-		__uwb_rc_neh_rm(rc, neh);
-		spin_unlock_irqrestore(&rc->neh_lock, flags);
-
-		del_timer_sync(&neh->timer);
-		uwb_rc_neh_put(neh);
-	}
-}
diff --git a/drivers/uwb/pal.c b/drivers/uwb/pal.c
deleted file mode 100644
index 765fd426dbd1..000000000000
--- a/drivers/uwb/pal.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB PAL support.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/debugfs.h>
-#include <linux/uwb.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-/**
- * uwb_pal_init - initialize a UWB PAL
- * @pal: the PAL to initialize
- */
-void uwb_pal_init(struct uwb_pal *pal)
-{
-	INIT_LIST_HEAD(&pal->node);
-}
-EXPORT_SYMBOL_GPL(uwb_pal_init);
-
-/**
- * uwb_pal_register - register a UWB PAL
- * @pal: the PAL
- *
- * The PAL must be initialized with uwb_pal_init().
- */
-int uwb_pal_register(struct uwb_pal *pal)
-{
-	struct uwb_rc *rc = pal->rc;
-	int ret;
-
-	if (pal->device) {
-		/* create a link to the uwb_rc in the PAL device's directory. */
-		ret = sysfs_create_link(&pal->device->kobj,
-					&rc->uwb_dev.dev.kobj, "uwb_rc");
-		if (ret < 0)
-			return ret;
-		/* create a link to the PAL in the UWB device's directory. */
-		ret = sysfs_create_link(&rc->uwb_dev.dev.kobj,
-					&pal->device->kobj, pal->name);
-		if (ret < 0) {
-			sysfs_remove_link(&pal->device->kobj, "uwb_rc");
-			return ret;
-		}
-	}
-
-	pal->debugfs_dir = uwb_dbg_create_pal_dir(pal);
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	list_add(&pal->node, &rc->pals);
-	mutex_unlock(&rc->uwb_dev.mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(uwb_pal_register);
-
-static int find_rc(struct device *dev, const void *data)
-{
-	const struct uwb_rc *target_rc = data;
-	struct uwb_rc *rc = dev_get_drvdata(dev);
-
-	if (rc == NULL) {
-		WARN_ON(1);
-		return 0;
-	}
-	if (rc == target_rc) {
-		if (rc->ready == 0)
-			return 0;
-		else
-			return 1;
-	}
-	return 0;
-}
-
-/**
- * Given a radio controller descriptor see if it is registered.
- *
- * @returns false if the rc does not exist or is quiescing; true otherwise.
- */
-static bool uwb_rc_class_device_exists(struct uwb_rc *target_rc)
-{
-	struct device *dev;
-
-	dev = class_find_device(&uwb_rc_class, NULL, target_rc,	find_rc);
-
-	put_device(dev);
-
-	return (dev != NULL);
-}
-
-/**
- * uwb_pal_unregister - unregister a UWB PAL
- * @pal: the PAL
- */
-void uwb_pal_unregister(struct uwb_pal *pal)
-{
-	struct uwb_rc *rc = pal->rc;
-
-	uwb_radio_stop(pal);
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	list_del(&pal->node);
-	mutex_unlock(&rc->uwb_dev.mutex);
-
-	debugfs_remove(pal->debugfs_dir);
-
-	if (pal->device) {
-		/* remove link to the PAL in the UWB device's directory. */
-		if (uwb_rc_class_device_exists(rc))
-			sysfs_remove_link(&rc->uwb_dev.dev.kobj, pal->name);
-
-		/* remove link to uwb_rc in the PAL device's directory. */
-		sysfs_remove_link(&pal->device->kobj, "uwb_rc");
-	}
-}
-EXPORT_SYMBOL_GPL(uwb_pal_unregister);
-
-/**
- * uwb_rc_pal_init - initialize the PAL related parts of a radio controller
- * @rc: the radio controller
- */
-void uwb_rc_pal_init(struct uwb_rc *rc)
-{
-	INIT_LIST_HEAD(&rc->pals);
-}
diff --git a/drivers/uwb/radio.c b/drivers/uwb/radio.c
deleted file mode 100644
index 240dd755927e..000000000000
--- a/drivers/uwb/radio.c
+++ /dev/null
@@ -1,196 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB radio (channel) management.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/uwb.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-
-static int uwb_radio_select_channel(struct uwb_rc *rc)
-{
-	/*
-	 * Default to channel 9 (BG1, TFC1) unless the user has
-	 * selected a specific channel or there are no active PALs.
-	 */
-	if (rc->active_pals == 0)
-		return -1;
-	if (rc->beaconing_forced)
-		return rc->beaconing_forced;
-	return 9;
-}
-
-
-/*
- * Notify all active PALs that the channel has changed.
- */
-static void uwb_radio_channel_changed(struct uwb_rc *rc, int channel)
-{
-	struct uwb_pal *pal;
-
-	list_for_each_entry(pal, &rc->pals, node) {
-		if (pal->channel && channel != pal->channel) {
-			pal->channel = channel;
-			if (pal->channel_changed)
-				pal->channel_changed(pal, pal->channel);
-		}
-	}
-}
-
-/*
- * Change to a new channel and notify any active PALs of the new
- * channel.
- *
- * When stopping the radio, PALs need to be notified first so they can
- * terminate any active reservations.
- */
-static int uwb_radio_change_channel(struct uwb_rc *rc, int channel)
-{
-	int ret = 0;
-	struct device *dev = &rc->uwb_dev.dev;
-
-	dev_dbg(dev, "%s: channel = %d, rc->beaconing = %d\n", __func__,
-		channel, rc->beaconing);
-
-	if (channel == -1)
-		uwb_radio_channel_changed(rc, channel);
-
-	if (channel != rc->beaconing) {
-		if (rc->beaconing != -1 && channel != -1) {
-			/*
-			 * FIXME: should signal the channel change
-			 * with a Channel Change IE.
-			 */
-			ret = uwb_radio_change_channel(rc, -1);
-			if (ret < 0)
-				return ret;
-		}
-		ret = uwb_rc_beacon(rc, channel, 0);
-	}
-
-	if (channel != -1)
-		uwb_radio_channel_changed(rc, rc->beaconing);
-
-	return ret;
-}
-
-/**
- * uwb_radio_start - request that the radio be started
- * @pal: the PAL making the request.
- *
- * If the radio is not already active, a suitable channel is selected
- * and beacons are started.
- */
-int uwb_radio_start(struct uwb_pal *pal)
-{
-	struct uwb_rc *rc = pal->rc;
-	int ret = 0;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-
-	if (!pal->channel) {
-		pal->channel = -1;
-		rc->active_pals++;
-		ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
-	}
-
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(uwb_radio_start);
-
-/**
- * uwb_radio_stop - request that the radio be stopped.
- * @pal: the PAL making the request.
- *
- * Stops the radio if no other PAL is making use of it.
- */
-void uwb_radio_stop(struct uwb_pal *pal)
-{
-	struct uwb_rc *rc = pal->rc;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-
-	if (pal->channel) {
-		rc->active_pals--;
-		uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
-		pal->channel = 0;
-	}
-
-	mutex_unlock(&rc->uwb_dev.mutex);
-}
-EXPORT_SYMBOL_GPL(uwb_radio_stop);
-
-/*
- * uwb_radio_force_channel - force a specific channel to be used
- * @rc: the radio controller.
- * @channel: the channel to use; -1 to force the radio to stop; 0 to
- *   use the default channel selection algorithm.
- */
-int uwb_radio_force_channel(struct uwb_rc *rc, int channel)
-{
-	int ret = 0;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-
-	rc->beaconing_forced = channel;
-	ret = uwb_radio_change_channel(rc, uwb_radio_select_channel(rc));
-
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return ret;
-}
-
-/*
- * uwb_radio_setup - setup the radio manager
- * @rc: the radio controller.
- *
- * The radio controller is reset to ensure it's in a known state
- * before it's used.
- */
-int uwb_radio_setup(struct uwb_rc *rc)
-{
-	return uwb_rc_reset(rc);
-}
-
-/*
- * uwb_radio_reset_state - reset any radio manager state
- * @rc: the radio controller.
- *
- * All internal radio manager state is reset to values corresponding
- * to a reset radio controller.
- */
-void uwb_radio_reset_state(struct uwb_rc *rc)
-{
-	struct uwb_pal *pal;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-
-	list_for_each_entry(pal, &rc->pals, node) {
-		if (pal->channel) {
-			pal->channel = -1;
-			if (pal->channel_changed)
-				pal->channel_changed(pal, -1);
-		}
-	}
-
-	rc->beaconing = -1;
-	rc->scanning = -1;
-
-	mutex_unlock(&rc->uwb_dev.mutex);
-}
-
-/*
- * uwb_radio_shutdown - shutdown the radio manager
- * @rc: the radio controller.
- *
- * The radio controller is reset.
- */
-void uwb_radio_shutdown(struct uwb_rc *rc)
-{
-	uwb_radio_reset_state(rc);
-	uwb_rc_reset(rc);
-}
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
deleted file mode 100644
index 8fc7c14d903e..000000000000
--- a/drivers/uwb/reset.c
+++ /dev/null
@@ -1,379 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * UWB basic command support and radio reset
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME:
- *
- *  - docs
- *
- *  - Now we are serializing (using the uwb_dev->mutex) the command
- *    execution; it should be parallelized as much as possible some
- *    day.
- */
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-/**
- * Command result codes (WUSB1.0[T8-69])
- */
-static
-const char *__strerror[] = {
-	"success",
-	"failure",
-	"hardware failure",
-	"no more slots",
-	"beacon is too large",
-	"invalid parameter",
-	"unsupported power level",
-	"time out (wa) or invalid ie data (whci)",
-	"beacon size exceeded",
-	"cancelled",
-	"invalid state",
-	"invalid size",
-	"ack not received",
-	"no more asie notification",
-};
-
-
-/** Return a string matching the given error code */
-const char *uwb_rc_strerror(unsigned code)
-{
-	if (code == 255)
-		return "time out";
-	if (code >= ARRAY_SIZE(__strerror))
-		return "unknown error";
-	return __strerror[code];
-}
-
-int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
-		     struct uwb_rccb *cmd, size_t cmd_size,
-		     u8 expected_type, u16 expected_event,
-		     uwb_rc_cmd_cb_f cb, void *arg)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_neh *neh;
-	int needtofree = 0;
-	int result;
-
-	uwb_dev_lock(&rc->uwb_dev);	/* Protect against rc->priv being removed */
-	if (rc->priv == NULL) {
-		uwb_dev_unlock(&rc->uwb_dev);
-		return -ESHUTDOWN;
-	}
-
-	if (rc->filter_cmd) {
-		needtofree = rc->filter_cmd(rc, &cmd, &cmd_size);
-		if (needtofree < 0 && needtofree != -ENOANO) {
-			dev_err(dev, "%s: filter error: %d\n",
-				cmd_name, needtofree);
-			uwb_dev_unlock(&rc->uwb_dev);
-			return needtofree;
-		}
-	}
-
-	neh = uwb_rc_neh_add(rc, cmd, expected_type, expected_event, cb, arg);
-	if (IS_ERR(neh)) {
-		result = PTR_ERR(neh);
-		uwb_dev_unlock(&rc->uwb_dev);
-		goto out;
-	}
-
-	result = rc->cmd(rc, cmd, cmd_size);
-	uwb_dev_unlock(&rc->uwb_dev);
-	if (result < 0)
-		uwb_rc_neh_rm(rc, neh);
-	else
-		uwb_rc_neh_arm(rc, neh);
-	uwb_rc_neh_put(neh);
-out:
-	if (needtofree == 1)
-		kfree(cmd);
-	return result < 0 ? result : 0;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_cmd_async);
-
-struct uwb_rc_cmd_done_params {
-	struct completion completion;
-	struct uwb_rceb *reply;
-	ssize_t reply_size;
-};
-
-static void uwb_rc_cmd_done(struct uwb_rc *rc, void *arg,
-			    struct uwb_rceb *reply, ssize_t reply_size)
-{
-	struct uwb_rc_cmd_done_params *p = (struct uwb_rc_cmd_done_params *)arg;
-
-	if (reply_size > 0) {
-		if (p->reply)
-			reply_size = min(p->reply_size, reply_size);
-		else
-			p->reply = kmalloc(reply_size, GFP_ATOMIC);
-
-		if (p->reply)
-			memcpy(p->reply, reply, reply_size);
-		else
-			reply_size = -ENOMEM;
-	}
-	p->reply_size = reply_size;
-	complete(&p->completion);
-}
-
-
-/**
- * Generic function for issuing commands to the Radio Control Interface
- *
- * @rc:       UWB Radio Control descriptor
- * @cmd_name: Name of the command being issued (for error messages)
- * @cmd:      Pointer to rccb structure containing the command;
- *            normally you embed this structure as the first member of
- *            the full command structure.
- * @cmd_size: Size of the whole command buffer pointed to by @cmd.
- * @reply:    Pointer to where to store the reply
- * @reply_size: @reply's size
- * @expected_type: Expected type in the return event
- * @expected_event: Expected event code in the return event
- * @preply:   Here a pointer to where the event data is received will
- *            be stored. Once done with the data, free with kfree().
- *
- * This function is generic; it works for commands that return a fixed
- * and known size or for commands that return a variable amount of data.
- *
- * If a buffer is provided, that is used, although it could be chopped
- * to the maximum size of the buffer. If the buffer is NULL, then one
- * be allocated in *preply with the whole contents of the reply.
- *
- * @rc needs to be referenced
- */
-static
-ssize_t __uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
-		     struct uwb_rccb *cmd, size_t cmd_size,
-		     struct uwb_rceb *reply, size_t reply_size,
-		     u8 expected_type, u16 expected_event,
-		     struct uwb_rceb **preply)
-{
-	ssize_t result = 0;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rc_cmd_done_params params;
-
-	init_completion(&params.completion);
-	params.reply = reply;
-	params.reply_size = reply_size;
-
-	result = uwb_rc_cmd_async(rc, cmd_name, cmd, cmd_size,
-				  expected_type, expected_event,
-				  uwb_rc_cmd_done, &params);
-	if (result)
-		return result;
-
-	wait_for_completion(&params.completion);
-
-	if (preply)
-		*preply = params.reply;
-
-	if (params.reply_size < 0)
-		dev_err(dev, "%s: confirmation event 0x%02x/%04x/%02x "
-			"reception failed: %d\n", cmd_name,
-			expected_type, expected_event, cmd->bCommandContext,
-			(int)params.reply_size);
-	return params.reply_size;
-}
-
-
-/**
- * Generic function for issuing commands to the Radio Control Interface
- *
- * @rc:       UWB Radio Control descriptor
- * @cmd_name: Name of the command being issued (for error messages)
- * @cmd:      Pointer to rccb structure containing the command;
- *            normally you embed this structure as the first member of
- *            the full command structure.
- * @cmd_size: Size of the whole command buffer pointed to by @cmd.
- * @reply:    Pointer to the beginning of the confirmation event
- *            buffer. Normally bigger than an 'struct hwarc_rceb'.
- *            You need to fill out reply->bEventType and reply->wEvent (in
- *            cpu order) as the function will use them to verify the
- *            confirmation event.
- * @reply_size: Size of the reply buffer
- *
- * The function checks that the length returned in the reply is at
- * least as big as @reply_size; if not, it will be deemed an error and
- * -EIO returned.
- *
- * @rc needs to be referenced
- */
-ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
-		   struct uwb_rccb *cmd, size_t cmd_size,
-		   struct uwb_rceb *reply, size_t reply_size)
-{
-	struct device *dev = &rc->uwb_dev.dev;
-	ssize_t result;
-
-	result = __uwb_rc_cmd(rc, cmd_name,
-			      cmd, cmd_size, reply, reply_size,
-			      reply->bEventType, reply->wEvent, NULL);
-
-	if (result > 0 && result < reply_size) {
-		dev_err(dev, "%s: not enough data returned for decoding reply "
-			"(%zu bytes received vs at least %zu needed)\n",
-			cmd_name, result, reply_size);
-		result = -EIO;
-	}
-	return result;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_cmd);
-
-
-/**
- * Generic function for issuing commands to the Radio Control
- * Interface that return an unknown amount of data
- *
- * @rc:       UWB Radio Control descriptor
- * @cmd_name: Name of the command being issued (for error messages)
- * @cmd:      Pointer to rccb structure containing the command;
- *            normally you embed this structure as the first member of
- *            the full command structure.
- * @cmd_size: Size of the whole command buffer pointed to by @cmd.
- * @expected_type: Expected type in the return event
- * @expected_event: Expected event code in the return event
- * @preply:   Here a pointer to where the event data is received will
- *            be stored. Once done with the data, free with kfree().
- *
- * The function checks that the length returned in the reply is at
- * least as big as a 'struct uwb_rceb *'; if not, it will be deemed an
- * error and -EIO returned.
- *
- * @rc needs to be referenced
- */
-ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
-		    struct uwb_rccb *cmd, size_t cmd_size,
-		    u8 expected_type, u16 expected_event,
-		    struct uwb_rceb **preply)
-{
-	return __uwb_rc_cmd(rc, cmd_name, cmd, cmd_size, NULL, 0,
-			    expected_type, expected_event, preply);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_vcmd);
-
-
-/**
- * Reset a UWB Host Controller (and all radio settings)
- *
- * @rc:      Host Controller descriptor
- * @returns: 0 if ok, < 0 errno code on error
- *
- * We put the command on kmalloc'ed memory as some arches cannot do
- * USB from the stack. The reply event is copied from an stage buffer,
- * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
- */
-int uwb_rc_reset(struct uwb_rc *rc)
-{
-	int result = -ENOMEM;
-	struct uwb_rc_evt_confirm reply;
-	struct uwb_rccb *cmd;
-	size_t cmd_size = sizeof(*cmd);
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_kzalloc;
-	cmd->bCommandType = UWB_RC_CET_GENERAL;
-	cmd->wCommand = cpu_to_le16(UWB_RC_CMD_RESET);
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_RESET;
-	result = uwb_rc_cmd(rc, "RESET", cmd, cmd_size,
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev,
-			"RESET: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
-		result = -EIO;
-	}
-error_cmd:
-	kfree(cmd);
-error_kzalloc:
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-int uwbd_msg_handle_reset(struct uwb_event *evt)
-{
-	struct uwb_rc *rc = evt->rc;
-	int ret;
-
-	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
-	ret = rc->reset(rc);
-	if (ret < 0) {
-		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
-		goto error;
-	}
-	return 0;
-error:
-	/* Nothing can be done except try the reset again. Wait a bit
-	   to avoid reset loops during probe() or remove(). */
-	msleep(1000);
-	uwb_rc_reset_all(rc);
-	return ret;
-}
-
-/**
- * uwb_rc_reset_all - request a reset of the radio controller and PALs
- * @rc: the radio controller of the hardware device to be reset.
- *
- * The full hardware reset of the radio controller and all the PALs
- * will be scheduled.
- */
-void uwb_rc_reset_all(struct uwb_rc *rc)
-{
-	struct uwb_event *evt;
-
-	evt = kzalloc(sizeof(struct uwb_event), GFP_ATOMIC);
-	if (unlikely(evt == NULL))
-		return;
-
-	evt->rc = __uwb_rc_get(rc);	/* will be put by uwbd's uwbd_event_handle() */
-	evt->ts_jiffies = jiffies;
-	evt->type = UWB_EVT_TYPE_MSG;
-	evt->message = UWB_EVT_MSG_RESET;
-
-	uwbd_event_queue(evt);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_reset_all);
-
-void uwb_rc_pre_reset(struct uwb_rc *rc)
-{
-	rc->stop(rc);
-	uwbd_flush(rc);
-
-	uwb_radio_reset_state(rc);
-	uwb_rsv_remove_all(rc);
-}
-EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
-
-int uwb_rc_post_reset(struct uwb_rc *rc)
-{
-	int ret;
-
-	ret = rc->start(rc);
-	if (ret)
-		goto out;
-	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
-	if (ret)
-		goto out;
-	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
-	if (ret)
-		goto out;
-out:
-	return ret;
-}
-EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
deleted file mode 100644
index ec924deb0a32..000000000000
--- a/drivers/uwb/rsv.c
+++ /dev/null
@@ -1,1000 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB reservation management.
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/uwb.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/export.h>
-
-#include "uwb-internal.h"
-
-static void uwb_rsv_timer(struct timer_list *t);
-
-static const char *rsv_states[] = {
-	[UWB_RSV_STATE_NONE]                 = "none            ",
-	[UWB_RSV_STATE_O_INITIATED]          = "o initiated     ",
-	[UWB_RSV_STATE_O_PENDING]            = "o pending       ",
-	[UWB_RSV_STATE_O_MODIFIED]           = "o modified      ",
-	[UWB_RSV_STATE_O_ESTABLISHED]        = "o established   ",
-	[UWB_RSV_STATE_O_TO_BE_MOVED]        = "o to be moved   ",
-	[UWB_RSV_STATE_O_MOVE_EXPANDING]     = "o move expanding",
-	[UWB_RSV_STATE_O_MOVE_COMBINING]     = "o move combining",
-	[UWB_RSV_STATE_O_MOVE_REDUCING]      = "o move reducing ",
-	[UWB_RSV_STATE_T_ACCEPTED]           = "t accepted      ",
-	[UWB_RSV_STATE_T_CONFLICT]           = "t conflict      ",
-	[UWB_RSV_STATE_T_PENDING]            = "t pending       ",
-	[UWB_RSV_STATE_T_DENIED]             = "t denied        ",
-	[UWB_RSV_STATE_T_RESIZED]            = "t resized       ",
-	[UWB_RSV_STATE_T_EXPANDING_ACCEPTED] = "t expanding acc ",
-	[UWB_RSV_STATE_T_EXPANDING_CONFLICT] = "t expanding conf",
-	[UWB_RSV_STATE_T_EXPANDING_PENDING]  = "t expanding pend",
-	[UWB_RSV_STATE_T_EXPANDING_DENIED]   = "t expanding den ",
-};
-
-static const char *rsv_types[] = {
-	[UWB_DRP_TYPE_ALIEN_BP] = "alien-bp",
-	[UWB_DRP_TYPE_HARD]     = "hard",
-	[UWB_DRP_TYPE_SOFT]     = "soft",
-	[UWB_DRP_TYPE_PRIVATE]  = "private",
-	[UWB_DRP_TYPE_PCA]      = "pca",
-};
-
-bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv)
-{
-	static const bool has_two_drp_ies[] = {
-		[UWB_RSV_STATE_O_INITIATED]               = false,
-		[UWB_RSV_STATE_O_PENDING]                 = false,
-		[UWB_RSV_STATE_O_MODIFIED]                = false,
-		[UWB_RSV_STATE_O_ESTABLISHED]             = false,
-		[UWB_RSV_STATE_O_TO_BE_MOVED]             = false,
-		[UWB_RSV_STATE_O_MOVE_COMBINING]          = false,
-		[UWB_RSV_STATE_O_MOVE_REDUCING]           = false,
-		[UWB_RSV_STATE_O_MOVE_EXPANDING]          = true,
-		[UWB_RSV_STATE_T_ACCEPTED]                = false,
-		[UWB_RSV_STATE_T_CONFLICT]                = false,
-		[UWB_RSV_STATE_T_PENDING]                 = false,
-		[UWB_RSV_STATE_T_DENIED]                  = false,
-		[UWB_RSV_STATE_T_RESIZED]                 = false,
-		[UWB_RSV_STATE_T_EXPANDING_ACCEPTED]      = true,
-		[UWB_RSV_STATE_T_EXPANDING_CONFLICT]      = true,
-		[UWB_RSV_STATE_T_EXPANDING_PENDING]       = true,
-		[UWB_RSV_STATE_T_EXPANDING_DENIED]        = true,
-	};
-
-	return has_two_drp_ies[rsv->state];
-}
-
-/**
- * uwb_rsv_state_str - return a string for a reservation state
- * @state: the reservation state.
- */
-const char *uwb_rsv_state_str(enum uwb_rsv_state state)
-{
-	if (state < UWB_RSV_STATE_NONE || state >= UWB_RSV_STATE_LAST)
-		return "unknown";
-	return rsv_states[state];
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_state_str);
-
-/**
- * uwb_rsv_type_str - return a string for a reservation type
- * @type: the reservation type
- */
-const char *uwb_rsv_type_str(enum uwb_drp_type type)
-{
-	if (type < UWB_DRP_TYPE_ALIEN_BP || type > UWB_DRP_TYPE_PCA)
-		return "invalid";
-	return rsv_types[type];
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_type_str);
-
-void uwb_rsv_dump(char *text, struct uwb_rsv *rsv)
-{
-	struct device *dev = &rsv->rc->uwb_dev.dev;
-	struct uwb_dev_addr devaddr;
-	char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
-
-	uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		devaddr = rsv->target.dev->dev_addr;
-	else
-		devaddr = rsv->target.devaddr;
-	uwb_dev_addr_print(target, sizeof(target), &devaddr);
-
-	dev_dbg(dev, "rsv %s %s -> %s: %s\n",
-		text, owner, target, uwb_rsv_state_str(rsv->state));
-}
-
-static void uwb_rsv_release(struct kref *kref)
-{
-	struct uwb_rsv *rsv = container_of(kref, struct uwb_rsv, kref);
-
-	kfree(rsv);
-}
-
-void uwb_rsv_get(struct uwb_rsv *rsv)
-{
-	kref_get(&rsv->kref);
-}
-
-void uwb_rsv_put(struct uwb_rsv *rsv)
-{
-	kref_put(&rsv->kref, uwb_rsv_release);
-}
-
-/*
- * Get a free stream index for a reservation.
- *
- * If the target is a DevAddr (e.g., a WUSB cluster reservation) then
- * the stream is allocated from a pool of per-RC stream indexes,
- * otherwise a unique stream index for the target is selected.
- */
-static int uwb_rsv_get_stream(struct uwb_rsv *rsv)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	unsigned long *streams_bm;
-	int stream;
-
-	switch (rsv->target.type) {
-	case UWB_RSV_TARGET_DEV:
-		streams_bm = rsv->target.dev->streams;
-		break;
-	case UWB_RSV_TARGET_DEVADDR:
-		streams_bm = rc->uwb_dev.streams;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	stream = find_first_zero_bit(streams_bm, UWB_NUM_STREAMS);
-	if (stream >= UWB_NUM_STREAMS) {
-		dev_err(dev, "%s: no available stream found\n", __func__);
-		return -EBUSY;
-	}
-
-	rsv->stream = stream;
-	set_bit(stream, streams_bm);
-
-	dev_dbg(dev, "get stream %d\n", rsv->stream);
-
-	return 0;
-}
-
-static void uwb_rsv_put_stream(struct uwb_rsv *rsv)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct device *dev = &rc->uwb_dev.dev;
-	unsigned long *streams_bm;
-
-	switch (rsv->target.type) {
-	case UWB_RSV_TARGET_DEV:
-		streams_bm = rsv->target.dev->streams;
-		break;
-	case UWB_RSV_TARGET_DEVADDR:
-		streams_bm = rc->uwb_dev.streams;
-		break;
-	default:
-		return;
-	}
-
-	clear_bit(rsv->stream, streams_bm);
-
-	dev_dbg(dev, "put stream %d\n", rsv->stream);
-}
-
-void uwb_rsv_backoff_win_timer(struct timer_list *t)
-{
-	struct uwb_drp_backoff_win *bow = from_timer(bow, t, timer);
-	struct uwb_rc *rc = container_of(bow, struct uwb_rc, bow);
-	struct device *dev = &rc->uwb_dev.dev;
-
-	bow->can_reserve_extra_mases = true;
-	if (bow->total_expired <= 4) {
-		bow->total_expired++;
-	} else {
-		/* after 4 backoff window has expired we can exit from
-		 * the backoff procedure */
-		bow->total_expired = 0;
-		bow->window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
-	}
-	dev_dbg(dev, "backoff_win_timer total_expired=%d, n=%d\n", bow->total_expired, bow->n);
-
-	/* try to relocate all the "to be moved" relocations */
-	uwb_rsv_handle_drp_avail_change(rc);
-}
-
-void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
-{
-	struct uwb_drp_backoff_win *bow = &rc->bow;
-	struct device *dev = &rc->uwb_dev.dev;
-	unsigned timeout_us;
-
-	dev_dbg(dev, "backoff_win_increment: window=%d\n", bow->window);
-
-	bow->can_reserve_extra_mases = false;
-
-	if((bow->window << 1) == UWB_DRP_BACKOFF_WIN_MAX)
-		return;
-
-	bow->window <<= 1;
-	bow->n = prandom_u32() & (bow->window - 1);
-	dev_dbg(dev, "new_window=%d, n=%d\n", bow->window, bow->n);
-
-	/* reset the timer associated variables */
-	timeout_us = bow->n * UWB_SUPERFRAME_LENGTH_US;
-	bow->total_expired = 0;
-	mod_timer(&bow->timer, jiffies + usecs_to_jiffies(timeout_us));
-}
-
-static void uwb_rsv_stroke_timer(struct uwb_rsv *rsv)
-{
-	int sframes = UWB_MAX_LOST_BEACONS;
-
-	/*
-	 * Multicast reservations can become established within 1
-	 * super frame and should not be terminated if no response is
-	 * received.
-	 */
-	if (rsv->state == UWB_RSV_STATE_NONE) {
-		sframes = 0;
-	} else if (rsv->is_multicast) {
-		if (rsv->state == UWB_RSV_STATE_O_INITIATED
-		    || rsv->state == UWB_RSV_STATE_O_MOVE_EXPANDING
-		    || rsv->state == UWB_RSV_STATE_O_MOVE_COMBINING
-		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING)
-			sframes = 1;
-		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED)
-			sframes = 0;
-
-	}
-
-	if (sframes > 0) {
-		/*
-		 * Add an additional 2 superframes to account for the
-		 * time to send the SET DRP IE command.
-		 */
-		unsigned timeout_us = (sframes + 2) * UWB_SUPERFRAME_LENGTH_US;
-		mod_timer(&rsv->timer, jiffies + usecs_to_jiffies(timeout_us));
-	} else
-		del_timer(&rsv->timer);
-}
-
-/*
- * Update a reservations state, and schedule an update of the
- * transmitted DRP IEs.
- */
-static void uwb_rsv_state_update(struct uwb_rsv *rsv,
-				 enum uwb_rsv_state new_state)
-{
-	rsv->state = new_state;
-	rsv->ie_valid = false;
-
-	uwb_rsv_dump("SU", rsv);
-
-	uwb_rsv_stroke_timer(rsv);
-	uwb_rsv_sched_update(rsv->rc);
-}
-
-static void uwb_rsv_callback(struct uwb_rsv *rsv)
-{
-	if (rsv->callback)
-		rsv->callback(rsv);
-}
-
-void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state)
-{
-	struct uwb_rsv_move *mv = &rsv->mv;
-
-	if (rsv->state == new_state) {
-		switch (rsv->state) {
-		case UWB_RSV_STATE_O_ESTABLISHED:
-		case UWB_RSV_STATE_O_MOVE_EXPANDING:
-		case UWB_RSV_STATE_O_MOVE_COMBINING:
-		case UWB_RSV_STATE_O_MOVE_REDUCING:
-		case UWB_RSV_STATE_T_ACCEPTED:
-		case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
-		case UWB_RSV_STATE_T_RESIZED:
-		case UWB_RSV_STATE_NONE:
-			uwb_rsv_stroke_timer(rsv);
-			break;
-		default:
-			/* Expecting a state transition so leave timer
-			   as-is. */
-			break;
-		}
-		return;
-	}
-
-	uwb_rsv_dump("SC", rsv);
-
-	switch (new_state) {
-	case UWB_RSV_STATE_NONE:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_NONE);
-		uwb_rsv_remove(rsv);
-		uwb_rsv_callback(rsv);
-		break;
-	case UWB_RSV_STATE_O_INITIATED:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_INITIATED);
-		break;
-	case UWB_RSV_STATE_O_PENDING:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_PENDING);
-		break;
-	case UWB_RSV_STATE_O_MODIFIED:
-		/* in the companion there are the MASes to drop */
-		bitmap_andnot(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MODIFIED);
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		if (rsv->state == UWB_RSV_STATE_O_MODIFIED
-		    || rsv->state == UWB_RSV_STATE_O_MOVE_REDUCING) {
-			uwb_drp_avail_release(rsv->rc, &mv->companion_mas);
-			rsv->needs_release_companion_mas = false;
-		}
-		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-		uwb_rsv_callback(rsv);
-		break;
-	case UWB_RSV_STATE_O_MOVE_EXPANDING:
-		rsv->needs_release_companion_mas = true;
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
-		break;
-	case UWB_RSV_STATE_O_MOVE_COMBINING:
-		rsv->needs_release_companion_mas = false;
-		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
-		bitmap_or(rsv->mas.bm, rsv->mas.bm, mv->companion_mas.bm, UWB_NUM_MAS);
-		rsv->mas.safe   += mv->companion_mas.safe;
-		rsv->mas.unsafe += mv->companion_mas.unsafe;
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
-		break;
-	case UWB_RSV_STATE_O_MOVE_REDUCING:
-		bitmap_andnot(mv->companion_mas.bm, rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
-		rsv->needs_release_companion_mas = true;
-		rsv->mas.safe   = mv->final_mas.safe;
-		rsv->mas.unsafe = mv->final_mas.unsafe;
-		bitmap_copy(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS);
-		bitmap_copy(rsv->mas.unsafe_bm, mv->final_mas.unsafe_bm, UWB_NUM_MAS);
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
-		break;
-	case UWB_RSV_STATE_T_ACCEPTED:
-	case UWB_RSV_STATE_T_RESIZED:
-		rsv->needs_release_companion_mas = false;
-		uwb_drp_avail_reserve(rsv->rc, &rsv->mas);
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_ACCEPTED);
-		uwb_rsv_callback(rsv);
-		break;
-	case UWB_RSV_STATE_T_DENIED:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_DENIED);
-		break;
-	case UWB_RSV_STATE_T_CONFLICT:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_CONFLICT);
-		break;
-	case UWB_RSV_STATE_T_PENDING:
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_PENDING);
-		break;
-	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
-		rsv->needs_release_companion_mas = true;
-		uwb_drp_avail_reserve(rsv->rc, &mv->companion_mas);
-		uwb_rsv_state_update(rsv, UWB_RSV_STATE_T_EXPANDING_ACCEPTED);
-		break;
-	default:
-		dev_err(&rsv->rc->uwb_dev.dev, "unhandled state: %s (%d)\n",
-			uwb_rsv_state_str(new_state), new_state);
-	}
-}
-
-static void uwb_rsv_handle_timeout_work(struct work_struct *work)
-{
-	struct uwb_rsv *rsv = container_of(work, struct uwb_rsv,
-					   handle_timeout_work);
-	struct uwb_rc *rc = rsv->rc;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	uwb_rsv_dump("TO", rsv);
-
-	switch (rsv->state) {
-	case UWB_RSV_STATE_O_INITIATED:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			goto unlock;
-		}
-		break;
-	case UWB_RSV_STATE_O_MOVE_EXPANDING:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_COMBINING);
-			goto unlock;
-		}
-		break;
-	case UWB_RSV_STATE_O_MOVE_COMBINING:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_REDUCING);
-			goto unlock;
-		}
-		break;
-	case UWB_RSV_STATE_O_MOVE_REDUCING:
-		if (rsv->is_multicast) {
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_ESTABLISHED);
-			goto unlock;
-		}
-		break;
-	case UWB_RSV_STATE_O_ESTABLISHED:
-		if (rsv->is_multicast)
-			goto unlock;
-		break;
-	case UWB_RSV_STATE_T_EXPANDING_ACCEPTED:
-		/*
-		 * The time out could be for the main or of the
-		 * companion DRP, assume it's for the companion and
-		 * drop that first.  A further time out is required to
-		 * drop the main.
-		 */
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_T_ACCEPTED);
-		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
-		goto unlock;
-	case UWB_RSV_STATE_NONE:
-		goto unlock;
-	default:
-		break;
-	}
-
-	uwb_rsv_remove(rsv);
-
-unlock:
-	mutex_unlock(&rc->rsvs_mutex);
-}
-
-static struct uwb_rsv *uwb_rsv_alloc(struct uwb_rc *rc)
-{
-	struct uwb_rsv *rsv;
-
-	rsv = kzalloc(sizeof(struct uwb_rsv), GFP_KERNEL);
-	if (!rsv)
-		return NULL;
-
-	INIT_LIST_HEAD(&rsv->rc_node);
-	INIT_LIST_HEAD(&rsv->pal_node);
-	kref_init(&rsv->kref);
-	timer_setup(&rsv->timer, uwb_rsv_timer, 0);
-
-	rsv->rc = rc;
-	INIT_WORK(&rsv->handle_timeout_work, uwb_rsv_handle_timeout_work);
-
-	return rsv;
-}
-
-/**
- * uwb_rsv_create - allocate and initialize a UWB reservation structure
- * @rc: the radio controller
- * @cb: callback to use when the reservation completes or terminates
- * @pal_priv: data private to the PAL to be passed in the callback
- *
- * The callback is called when the state of the reservation changes from:
- *
- *   - pending to accepted
- *   - pending to denined
- *   - accepted to terminated
- *   - pending to terminated
- */
-struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb, void *pal_priv)
-{
-	struct uwb_rsv *rsv;
-
-	rsv = uwb_rsv_alloc(rc);
-	if (!rsv)
-		return NULL;
-
-	rsv->callback = cb;
-	rsv->pal_priv = pal_priv;
-
-	return rsv;
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_create);
-
-void uwb_rsv_remove(struct uwb_rsv *rsv)
-{
-	uwb_rsv_dump("RM", rsv);
-
-	if (rsv->state != UWB_RSV_STATE_NONE)
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-
-	if (rsv->needs_release_companion_mas)
-		uwb_drp_avail_release(rsv->rc, &rsv->mv.companion_mas);
-	uwb_drp_avail_release(rsv->rc, &rsv->mas);
-
-	if (uwb_rsv_is_owner(rsv))
-		uwb_rsv_put_stream(rsv);
-
-	uwb_dev_put(rsv->owner);
-	if (rsv->target.type == UWB_RSV_TARGET_DEV)
-		uwb_dev_put(rsv->target.dev);
-
-	list_del_init(&rsv->rc_node);
-	uwb_rsv_put(rsv);
-}
-
-/**
- * uwb_rsv_destroy - free a UWB reservation structure
- * @rsv: the reservation to free
- *
- * The reservation must already be terminated.
- */
-void uwb_rsv_destroy(struct uwb_rsv *rsv)
-{
-	uwb_rsv_put(rsv);
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_destroy);
-
-/**
- * usb_rsv_establish - start a reservation establishment
- * @rsv: the reservation
- *
- * The PAL should fill in @rsv's owner, target, type, max_mas,
- * min_mas, max_interval and is_multicast fields.  If the target is a
- * uwb_dev it must be referenced.
- *
- * The reservation's callback will be called when the reservation is
- * accepted, denied or times out.
- */
-int uwb_rsv_establish(struct uwb_rsv *rsv)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct uwb_mas_bm available;
-	struct device *dev = &rc->uwb_dev.dev;
-	int ret;
-
-	mutex_lock(&rc->rsvs_mutex);
-	ret = uwb_rsv_get_stream(rsv);
-	if (ret) {
-		dev_err(dev, "%s: uwb_rsv_get_stream failed: %d\n",
-			__func__, ret);
-		goto out;
-	}
-
-	rsv->tiebreaker = prandom_u32() & 1;
-	/* get available mas bitmap */
-	uwb_drp_available(rc, &available);
-
-	ret = uwb_rsv_find_best_allocation(rsv, &available, &rsv->mas);
-	if (ret == UWB_RSV_ALLOC_NOT_FOUND) {
-		ret = -EBUSY;
-		uwb_rsv_put_stream(rsv);
-		dev_err(dev, "%s: uwb_rsv_find_best_allocation failed: %d\n",
-			__func__, ret);
-		goto out;
-	}
-
-	ret = uwb_drp_avail_reserve_pending(rc, &rsv->mas);
-	if (ret != 0) {
-		uwb_rsv_put_stream(rsv);
-		dev_err(dev, "%s: uwb_drp_avail_reserve_pending failed: %d\n",
-			__func__, ret);
-		goto out;
-	}
-
-	uwb_rsv_get(rsv);
-	list_add_tail(&rsv->rc_node, &rc->reservations);
-	rsv->owner = &rc->uwb_dev;
-	uwb_dev_get(rsv->owner);
-	uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_INITIATED);
-out:
-	mutex_unlock(&rc->rsvs_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_establish);
-
-/**
- * uwb_rsv_modify - modify an already established reservation
- * @rsv: the reservation to modify
- * @max_mas: new maximum MAS to reserve
- * @min_mas: new minimum MAS to reserve
- * @max_interval: new max_interval to use
- *
- * FIXME: implement this once there are PALs that use it.
- */
-int uwb_rsv_modify(struct uwb_rsv *rsv, int max_mas, int min_mas, int max_interval)
-{
-	return -ENOSYS;
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_modify);
-
-/*
- * move an already established reservation (rc->rsvs_mutex must to be
- * taken when tis function is called)
- */
-int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available)
-{
-	struct uwb_rc *rc = rsv->rc;
-	struct uwb_drp_backoff_win *bow = &rc->bow;
-	struct device *dev = &rc->uwb_dev.dev;
-	struct uwb_rsv_move *mv;
-	int ret = 0;
-
-	if (bow->can_reserve_extra_mases == false)
-		return -EBUSY;
-
-	mv = &rsv->mv;
-
-	if (uwb_rsv_find_best_allocation(rsv, available, &mv->final_mas) == UWB_RSV_ALLOC_FOUND) {
-
-		if (!bitmap_equal(rsv->mas.bm, mv->final_mas.bm, UWB_NUM_MAS)) {
-			/* We want to move the reservation */
-			bitmap_andnot(mv->companion_mas.bm, mv->final_mas.bm, rsv->mas.bm, UWB_NUM_MAS);
-			uwb_drp_avail_reserve_pending(rc, &mv->companion_mas);
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_O_MOVE_EXPANDING);
-		}
-	} else {
-		dev_dbg(dev, "new allocation not found\n");
-	}
-
-	return ret;
-}
-
-/* It will try to move every reservation in state O_ESTABLISHED giving
- * to the MAS allocator algorithm an availability that is the real one
- * plus the allocation already established from the reservation. */
-void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc)
-{
-	struct uwb_drp_backoff_win *bow = &rc->bow;
-	struct uwb_rsv *rsv;
-	struct uwb_mas_bm mas;
-
-	if (bow->can_reserve_extra_mases == false)
-		return;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->state == UWB_RSV_STATE_O_ESTABLISHED ||
-		    rsv->state == UWB_RSV_STATE_O_TO_BE_MOVED) {
-			uwb_drp_available(rc, &mas);
-			bitmap_or(mas.bm, mas.bm, rsv->mas.bm, UWB_NUM_MAS);
-			uwb_rsv_try_move(rsv, &mas);
-		}
-	}
-
-}
-
-/**
- * uwb_rsv_terminate - terminate an established reservation
- * @rsv: the reservation to terminate
- *
- * A reservation is terminated by removing the DRP IE from the beacon,
- * the other end will consider the reservation to be terminated when
- * it does not see the DRP IE for at least mMaxLostBeacons.
- *
- * If applicable, the reference to the target uwb_dev will be released.
- */
-void uwb_rsv_terminate(struct uwb_rsv *rsv)
-{
-	struct uwb_rc *rc = rsv->rc;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	if (rsv->state != UWB_RSV_STATE_NONE)
-		uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-
-	mutex_unlock(&rc->rsvs_mutex);
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_terminate);
-
-/**
- * uwb_rsv_accept - accept a new reservation from a peer
- * @rsv:      the reservation
- * @cb:       call back for reservation changes
- * @pal_priv: data to be passed in the above call back
- *
- * Reservation requests from peers are denied unless a PAL accepts it
- * by calling this function.
- *
- * The PAL call uwb_rsv_destroy() for all accepted reservations before
- * calling uwb_pal_unregister().
- */
-void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv)
-{
-	uwb_rsv_get(rsv);
-
-	rsv->callback = cb;
-	rsv->pal_priv = pal_priv;
-	rsv->state    = UWB_RSV_STATE_T_ACCEPTED;
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_accept);
-
-/*
- * Is a received DRP IE for this reservation?
- */
-static bool uwb_rsv_match(struct uwb_rsv *rsv, struct uwb_dev *src,
-			  struct uwb_ie_drp *drp_ie)
-{
-	struct uwb_dev_addr *rsv_src;
-	int stream;
-
-	stream = uwb_ie_drp_stream_index(drp_ie);
-
-	if (rsv->stream != stream)
-		return false;
-
-	switch (rsv->target.type) {
-	case UWB_RSV_TARGET_DEVADDR:
-		return rsv->stream == stream;
-	case UWB_RSV_TARGET_DEV:
-		if (uwb_ie_drp_owner(drp_ie))
-			rsv_src = &rsv->owner->dev_addr;
-		else
-			rsv_src = &rsv->target.dev->dev_addr;
-		return uwb_dev_addr_cmp(&src->dev_addr, rsv_src) == 0;
-	}
-	return false;
-}
-
-static struct uwb_rsv *uwb_rsv_new_target(struct uwb_rc *rc,
-					  struct uwb_dev *src,
-					  struct uwb_ie_drp *drp_ie)
-{
-	struct uwb_rsv *rsv;
-	struct uwb_pal *pal;
-	enum uwb_rsv_state state;
-
-	rsv = uwb_rsv_alloc(rc);
-	if (!rsv)
-		return NULL;
-
-	rsv->rc          = rc;
-	rsv->owner       = src;
-	uwb_dev_get(rsv->owner);
-	rsv->target.type = UWB_RSV_TARGET_DEV;
-	rsv->target.dev  = &rc->uwb_dev;
-	uwb_dev_get(&rc->uwb_dev);
-	rsv->type        = uwb_ie_drp_type(drp_ie);
-	rsv->stream      = uwb_ie_drp_stream_index(drp_ie);
-	uwb_drp_ie_to_bm(&rsv->mas, drp_ie);
-
-	/*
-	 * See if any PALs are interested in this reservation. If not,
-	 * deny the request.
-	 */
-	rsv->state = UWB_RSV_STATE_T_DENIED;
-	mutex_lock(&rc->uwb_dev.mutex);
-	list_for_each_entry(pal, &rc->pals, node) {
-		if (pal->new_rsv)
-			pal->new_rsv(pal, rsv);
-		if (rsv->state == UWB_RSV_STATE_T_ACCEPTED)
-			break;
-	}
-	mutex_unlock(&rc->uwb_dev.mutex);
-
-	list_add_tail(&rsv->rc_node, &rc->reservations);
-	state = rsv->state;
-	rsv->state = UWB_RSV_STATE_NONE;
-
-	/* FIXME: do something sensible here */
-	if (state == UWB_RSV_STATE_T_ACCEPTED
-	    && uwb_drp_avail_reserve_pending(rc, &rsv->mas) == -EBUSY) {
-		/* FIXME: do something sensible here */
-	} else {
-		uwb_rsv_set_state(rsv, state);
-	}
-
-	return rsv;
-}
-
-/**
- * uwb_rsv_get_usable_mas - get the bitmap of the usable MAS of a reservations
- * @rsv: the reservation.
- * @mas: returns the available MAS.
- *
- * The usable MAS of a reservation may be less than the negotiated MAS
- * if alien BPs are present.
- */
-void uwb_rsv_get_usable_mas(struct uwb_rsv *rsv, struct uwb_mas_bm *mas)
-{
-	bitmap_zero(mas->bm, UWB_NUM_MAS);
-	bitmap_andnot(mas->bm, rsv->mas.bm, rsv->rc->cnflt_alien_bitmap.bm, UWB_NUM_MAS);
-}
-EXPORT_SYMBOL_GPL(uwb_rsv_get_usable_mas);
-
-/**
- * uwb_rsv_find - find a reservation for a received DRP IE.
- * @rc: the radio controller
- * @src: source of the DRP IE
- * @drp_ie: the DRP IE
- *
- * If the reservation cannot be found and the DRP IE is from a peer
- * attempting to establish a new reservation, create a new reservation
- * and add it to the list.
- */
-struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
-			     struct uwb_ie_drp *drp_ie)
-{
-	struct uwb_rsv *rsv;
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (uwb_rsv_match(rsv, src, drp_ie))
-			return rsv;
-	}
-
-	if (uwb_ie_drp_owner(drp_ie))
-		return uwb_rsv_new_target(rc, src, drp_ie);
-
-	return NULL;
-}
-
-/*
- * Go through all the reservations and check for timeouts and (if
- * necessary) update their DRP IEs.
- *
- * FIXME: look at building the SET_DRP_IE command here rather than
- * having to rescan the list in uwb_rc_send_all_drp_ie().
- */
-static bool uwb_rsv_update_all(struct uwb_rc *rc)
-{
-	struct uwb_rsv *rsv, *t;
-	bool ie_updated = false;
-
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		if (!rsv->ie_valid) {
-			uwb_drp_ie_update(rsv);
-			ie_updated = true;
-		}
-	}
-
-	return ie_updated;
-}
-
-void uwb_rsv_queue_update(struct uwb_rc *rc)
-{
-	unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE;
-
-	queue_delayed_work(rc->rsv_workq, &rc->rsv_update_work, usecs_to_jiffies(delay_us));
-}
-
-/**
- * uwb_rsv_sched_update - schedule an update of the DRP IEs
- * @rc: the radio controller.
- *
- * To improve performance and ensure correctness with [ECMA-368] the
- * number of SET-DRP-IE commands that are done are limited.
- *
- * DRP IEs update come from two sources: DRP events from the hardware
- * which all occur at the beginning of the superframe ('syncronous'
- * events) and reservation establishment/termination requests from
- * PALs or timers ('asynchronous' events).
- *
- * A delayed work ensures that all the synchronous events result in
- * one SET-DRP-IE command.
- *
- * Additional logic (the set_drp_ie_pending and rsv_updated_postponed
- * flags) will prevent an asynchrous event starting a SET-DRP-IE
- * command if one is currently awaiting a response.
- *
- * FIXME: this does leave a window where an asynchrous event can delay
- * the SET-DRP-IE for a synchronous event by one superframe.
- */
-void uwb_rsv_sched_update(struct uwb_rc *rc)
-{
-	spin_lock_irq(&rc->rsvs_lock);
-	if (!delayed_work_pending(&rc->rsv_update_work)) {
-		if (rc->set_drp_ie_pending > 0) {
-			rc->set_drp_ie_pending++;
-			goto unlock;
-		}
-		uwb_rsv_queue_update(rc);
-	}
-unlock:
-	spin_unlock_irq(&rc->rsvs_lock);
-}
-
-/*
- * Update DRP IEs and, if necessary, the DRP Availability IE and send
- * the updated IEs to the radio controller.
- */
-static void uwb_rsv_update_work(struct work_struct *work)
-{
-	struct uwb_rc *rc = container_of(work, struct uwb_rc,
-					 rsv_update_work.work);
-	bool ie_updated;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	ie_updated = uwb_rsv_update_all(rc);
-
-	if (!rc->drp_avail.ie_valid) {
-		uwb_drp_avail_ie_update(rc);
-		ie_updated = true;
-	}
-
-	if (ie_updated && (rc->set_drp_ie_pending == 0))
-		uwb_rc_send_all_drp_ie(rc);
-
-	mutex_unlock(&rc->rsvs_mutex);
-}
-
-static void uwb_rsv_alien_bp_work(struct work_struct *work)
-{
-	struct uwb_rc *rc = container_of(work, struct uwb_rc,
-					 rsv_alien_bp_work.work);
-	struct uwb_rsv *rsv;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		if (rsv->type != UWB_DRP_TYPE_ALIEN_BP) {
-			uwb_rsv_callback(rsv);
-		}
-	}
-
-	mutex_unlock(&rc->rsvs_mutex);
-}
-
-static void uwb_rsv_timer(struct timer_list *t)
-{
-	struct uwb_rsv *rsv = from_timer(rsv, t, timer);
-
-	queue_work(rsv->rc->rsv_workq, &rsv->handle_timeout_work);
-}
-
-/**
- * uwb_rsv_remove_all - remove all reservations
- * @rc: the radio controller
- *
- * A DRP IE update is not done.
- */
-void uwb_rsv_remove_all(struct uwb_rc *rc)
-{
-	struct uwb_rsv *rsv, *t;
-
-	mutex_lock(&rc->rsvs_mutex);
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		if (rsv->state != UWB_RSV_STATE_NONE)
-			uwb_rsv_set_state(rsv, UWB_RSV_STATE_NONE);
-		del_timer_sync(&rsv->timer);
-	}
-	/* Cancel any postponed update. */
-	rc->set_drp_ie_pending = 0;
-	mutex_unlock(&rc->rsvs_mutex);
-
-	cancel_delayed_work_sync(&rc->rsv_update_work);
-	flush_workqueue(rc->rsv_workq);
-
-	mutex_lock(&rc->rsvs_mutex);
-	list_for_each_entry_safe(rsv, t, &rc->reservations, rc_node) {
-		uwb_rsv_remove(rsv);
-	}
-	mutex_unlock(&rc->rsvs_mutex);
-}
-
-void uwb_rsv_init(struct uwb_rc *rc)
-{
-	INIT_LIST_HEAD(&rc->reservations);
-	INIT_LIST_HEAD(&rc->cnflt_alien_list);
-	mutex_init(&rc->rsvs_mutex);
-	spin_lock_init(&rc->rsvs_lock);
-	INIT_DELAYED_WORK(&rc->rsv_update_work, uwb_rsv_update_work);
-	INIT_DELAYED_WORK(&rc->rsv_alien_bp_work, uwb_rsv_alien_bp_work);
-	rc->bow.can_reserve_extra_mases = true;
-	rc->bow.total_expired = 0;
-	rc->bow.window = UWB_DRP_BACKOFF_WIN_MIN >> 1;
-	timer_setup(&rc->bow.timer, uwb_rsv_backoff_win_timer, 0);
-
-	bitmap_complement(rc->uwb_dev.streams, rc->uwb_dev.streams, UWB_NUM_STREAMS);
-}
-
-int uwb_rsv_setup(struct uwb_rc *rc)
-{
-	char name[16];
-
-	snprintf(name, sizeof(name), "%s_rsvd", dev_name(&rc->uwb_dev.dev));
-	rc->rsv_workq = create_singlethread_workqueue(name);
-	if (rc->rsv_workq == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void uwb_rsv_cleanup(struct uwb_rc *rc)
-{
-	uwb_rsv_remove_all(rc);
-	destroy_workqueue(rc->rsv_workq);
-}
diff --git a/drivers/uwb/scan.c b/drivers/uwb/scan.c
deleted file mode 100644
index ffc3f452302d..000000000000
--- a/drivers/uwb/scan.c
+++ /dev/null
@@ -1,120 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Scanning management
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: docs
- * FIXME: there are issues here on how BEACON and SCAN on USB RCI deal
- *        with each other. Currently seems that START_BEACON while
- *        SCAN_ONLY will cancel the scan, so we need to update the
- *        state here. Clarification request sent by email on
- *        10/05/2005.
- *        10/28/2005 No clear answer heard--maybe we'll hack the API
- *                   so that when we start beaconing, if the HC is
- *                   scanning in a mode not compatible with beaconing
- *                   we just fail.
- */
-
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include "uwb-internal.h"
-
-
-/**
- * Start/stop scanning in a radio controller
- *
- * @rc:      UWB Radio Controller
- * @channel: Channel to scan; encodings in WUSB1.0[Table 5.12]
- * @type:    Type of scanning to do.
- * @bpst_offset: value at which to start scanning (if type ==
- *                UWB_SCAN_ONLY_STARTTIME)
- * @returns: 0 if ok, < 0 errno code on error
- *
- * We put the command on kmalloc'ed memory as some arches cannot do
- * USB from the stack. The reply event is copied from an stage buffer,
- * so it can be in the stack. See WUSB1.0[8.6.2.4] for more details.
- */
-int uwb_rc_scan(struct uwb_rc *rc,
-		unsigned channel, enum uwb_scan_type type,
-		unsigned bpst_offset)
-{
-	int result;
-	struct uwb_rc_cmd_scan *cmd;
-	struct uwb_rc_evt_confirm reply;
-
-	result = -ENOMEM;
-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (cmd == NULL)
-		goto error_kzalloc;
-	mutex_lock(&rc->uwb_dev.mutex);
-	cmd->rccb.bCommandType = UWB_RC_CET_GENERAL;
-	cmd->rccb.wCommand = cpu_to_le16(UWB_RC_CMD_SCAN);
-	cmd->bChannelNumber = channel;
-	cmd->bScanState = type;
-	cmd->wStartTime = cpu_to_le16(bpst_offset);
-	reply.rceb.bEventType = UWB_RC_CET_GENERAL;
-	reply.rceb.wEvent = UWB_RC_CMD_SCAN;
-	result = uwb_rc_cmd(rc, "SCAN", &cmd->rccb, sizeof(*cmd),
-			    &reply.rceb, sizeof(reply));
-	if (result < 0)
-		goto error_cmd;
-	if (reply.bResultCode != UWB_RC_RES_SUCCESS) {
-		dev_err(&rc->uwb_dev.dev,
-			"SCAN: command execution failed: %s (%d)\n",
-			uwb_rc_strerror(reply.bResultCode), reply.bResultCode);
-		result = -EIO;
-		goto error_cmd;
-	}
-	rc->scanning = channel;
-	rc->scan_type = type;
-error_cmd:
-	mutex_unlock(&rc->uwb_dev.mutex);
-	kfree(cmd);
-error_kzalloc:
-	return result;
-}
-
-/*
- * Print scanning state
- */
-static ssize_t uwb_rc_scan_show(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	ssize_t result;
-
-	mutex_lock(&rc->uwb_dev.mutex);
-	result = sprintf(buf, "%d %d\n", rc->scanning, rc->scan_type);
-	mutex_unlock(&rc->uwb_dev.mutex);
-	return result;
-}
-
-/*
- *
- */
-static ssize_t uwb_rc_scan_store(struct device *dev,
-				 struct device_attribute *attr,
-				 const char *buf, size_t size)
-{
-	struct uwb_dev *uwb_dev = to_uwb_dev(dev);
-	struct uwb_rc *rc = uwb_dev->rc;
-	unsigned channel;
-	unsigned type;
-	unsigned bpst_offset = 0;
-	ssize_t result = -EINVAL;
-
-	result = sscanf(buf, "%u %u %u\n", &channel, &type, &bpst_offset);
-	if (result >= 2 && type < UWB_SCAN_TOP)
-		result = uwb_rc_scan(rc, channel, type, bpst_offset);
-
-	return result < 0 ? result : size;
-}
-
-/** Radio Control sysfs interface (declaration) */
-DEVICE_ATTR(scan, S_IRUGO | S_IWUSR, uwb_rc_scan_show, uwb_rc_scan_store);
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
deleted file mode 100644
index 0fdc38078eee..000000000000
--- a/drivers/uwb/umc-bus.c
+++ /dev/null
@@ -1,211 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Bus for UWB Multi-interface Controller capabilities.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/sysfs.h>
-#include <linux/workqueue.h>
-#include <linux/module.h>
-#include <linux/uwb/umc.h>
-#include <linux/pci.h>
-
-static int umc_bus_pre_reset_helper(struct device *dev, void *data)
-{
-	int ret = 0;
-
-	if (dev->driver) {
-		struct umc_dev *umc = to_umc_dev(dev);
-		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
-
-		if (umc_drv->pre_reset)
-			ret = umc_drv->pre_reset(umc);
-		else
-			device_release_driver(dev);
-	}
-	return ret;
-}
-
-static int umc_bus_post_reset_helper(struct device *dev, void *data)
-{
-	int ret = 0;
-
-	if (dev->driver) {
-		struct umc_dev *umc = to_umc_dev(dev);
-		struct umc_driver *umc_drv = to_umc_driver(dev->driver);
-
-		if (umc_drv->post_reset)
-			ret = umc_drv->post_reset(umc);
-	} else
-		ret = device_attach(dev);
-
-	return ret;
-}
-
-/**
- * umc_controller_reset - reset the whole UMC controller
- * @umc: the UMC device for the radio controller.
- *
- * Drivers or all capabilities of the controller will have their
- * pre_reset methods called or be unbound from their device.  Then all
- * post_reset methods will be called or the drivers will be rebound.
- *
- * Radio controllers must provide pre_reset and post_reset methods and
- * reset the hardware in their start method.
- *
- * If this is called while a probe() or remove() is in progress it
- * will return -EAGAIN and not perform the reset.
- */
-int umc_controller_reset(struct umc_dev *umc)
-{
-	struct device *parent = umc->dev.parent;
-	int ret = 0;
-
-	if (!device_trylock(parent))
-		return -EAGAIN;
-	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
-	if (ret >= 0)
-		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
-	device_unlock(parent);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(umc_controller_reset);
-
-/**
- * umc_match_pci_id - match a UMC driver to a UMC device's parent PCI device.
- * @umc_drv: umc driver with match_data pointing to a zero-terminated
- * table of pci_device_id's.
- * @umc: umc device whose parent is to be matched.
- */
-int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc)
-{
-	const struct pci_device_id *id_table = umc_drv->match_data;
-	struct pci_dev *pci;
-
-	if (!dev_is_pci(umc->dev.parent))
-		return 0;
-
-	pci = to_pci_dev(umc->dev.parent);
-	return pci_match_id(id_table, pci) != NULL;
-}
-EXPORT_SYMBOL_GPL(umc_match_pci_id);
-
-static int umc_bus_rescan_helper(struct device *dev, void *data)
-{
-	int ret = 0;
-
-	if (!dev->driver)
-		ret = device_attach(dev);
-
-	return ret;
-}
-
-static void umc_bus_rescan(struct device *parent)
-{
-	int err;
-
-	/*
-	 * We can't use bus_rescan_devices() here as it deadlocks when
-	 * it tries to retake the dev->parent semaphore.
-	 */
-	err = device_for_each_child(parent, NULL, umc_bus_rescan_helper);
-	if (err < 0)
-		printk(KERN_WARNING "%s: rescan of bus failed: %d\n",
-		       KBUILD_MODNAME, err);
-}
-
-static int umc_bus_match(struct device *dev, struct device_driver *drv)
-{
-	struct umc_dev *umc = to_umc_dev(dev);
-	struct umc_driver *umc_driver = to_umc_driver(drv);
-
-	if (umc->cap_id == umc_driver->cap_id) {
-		if (umc_driver->match)
-			return umc_driver->match(umc_driver, umc);
-		else
-			return 1;
-	}
-	return 0;
-}
-
-static int umc_device_probe(struct device *dev)
-{
-	struct umc_dev *umc;
-	struct umc_driver *umc_driver;
-	int err;
-
-	umc_driver = to_umc_driver(dev->driver);
-	umc = to_umc_dev(dev);
-
-	get_device(dev);
-	err = umc_driver->probe(umc);
-	if (err)
-		put_device(dev);
-	else
-		umc_bus_rescan(dev->parent);
-
-	return err;
-}
-
-static int umc_device_remove(struct device *dev)
-{
-	struct umc_dev *umc;
-	struct umc_driver *umc_driver;
-
-	umc_driver = to_umc_driver(dev->driver);
-	umc = to_umc_dev(dev);
-
-	umc_driver->remove(umc);
-	put_device(dev);
-	return 0;
-}
-
-static ssize_t capability_id_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct umc_dev *umc = to_umc_dev(dev);
-
-	return sprintf(buf, "0x%02x\n", umc->cap_id);
-}
-static DEVICE_ATTR_RO(capability_id);
-
-static ssize_t version_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct umc_dev *umc = to_umc_dev(dev);
-
-	return sprintf(buf, "0x%04x\n", umc->version);
-}
-static DEVICE_ATTR_RO(version);
-
-static struct attribute *umc_dev_attrs[] = {
-	&dev_attr_capability_id.attr,
-	&dev_attr_version.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(umc_dev);
-
-struct bus_type umc_bus_type = {
-	.name		= "umc",
-	.match		= umc_bus_match,
-	.probe		= umc_device_probe,
-	.remove		= umc_device_remove,
-	.dev_groups	= umc_dev_groups,
-};
-EXPORT_SYMBOL_GPL(umc_bus_type);
-
-static int __init umc_bus_init(void)
-{
-	return bus_register(&umc_bus_type);
-}
-module_init(umc_bus_init);
-
-static void __exit umc_bus_exit(void)
-{
-	bus_unregister(&umc_bus_type);
-}
-module_exit(umc_bus_exit);
-
-MODULE_DESCRIPTION("UWB Multi-interface Controller capability bus");
-MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/umc-dev.c b/drivers/uwb/umc-dev.c
deleted file mode 100644
index c845ca414bb2..000000000000
--- a/drivers/uwb/umc-dev.c
+++ /dev/null
@@ -1,94 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB Multi-interface Controller device management.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <linux/uwb/umc.h>
-
-static void umc_device_release(struct device *dev)
-{
-	struct umc_dev *umc = to_umc_dev(dev);
-
-	kfree(umc);
-}
-
-/**
- * umc_device_create - allocate a child UMC device
- * @parent: parent of the new UMC device.
- * @n:      index of the new device.
- *
- * The new UMC device will have a bus ID of the parent with '-n'
- * appended.
- */
-struct umc_dev *umc_device_create(struct device *parent, int n)
-{
-	struct umc_dev *umc;
-
-	umc = kzalloc(sizeof(struct umc_dev), GFP_KERNEL);
-	if (umc) {
-		dev_set_name(&umc->dev, "%s-%d", dev_name(parent), n);
-		umc->dev.parent  = parent;
-		umc->dev.bus     = &umc_bus_type;
-		umc->dev.release = umc_device_release;
-
-		umc->dev.dma_mask = parent->dma_mask;
-	}
-	return umc;
-}
-EXPORT_SYMBOL_GPL(umc_device_create);
-
-/**
- * umc_device_register - register a UMC device
- * @umc: pointer to the UMC device
- *
- * The memory resource for the UMC device is acquired and the device
- * registered with the system.
- */
-int umc_device_register(struct umc_dev *umc)
-{
-	int err;
-
-	err = request_resource(umc->resource.parent, &umc->resource);
-	if (err < 0) {
-		dev_err(&umc->dev, "can't allocate resource range %pR: %d\n",
-			&umc->resource, err);
-		goto error_request_resource;
-	}
-
-	err = device_register(&umc->dev);
-	if (err < 0)
-		goto error_device_register;
-	return 0;
-
-error_device_register:
-	put_device(&umc->dev);
-	release_resource(&umc->resource);
-error_request_resource:
-	return err;
-}
-EXPORT_SYMBOL_GPL(umc_device_register);
-
-/**
- * umc_device_unregister - unregister a UMC device
- * @umc: pointer to the UMC device
- *
- * First we unregister the device, make sure the driver can do it's
- * resource release thing and then we try to release any left over
- * resources. We take a ref to the device, to make sure it doesn't
- * disappear under our feet.
- */
-void umc_device_unregister(struct umc_dev *umc)
-{
-	struct device *dev;
-	if (!umc)
-		return;
-	dev = get_device(&umc->dev);
-	device_unregister(&umc->dev);
-	release_resource(&umc->resource);
-	put_device(dev);
-}
-EXPORT_SYMBOL_GPL(umc_device_unregister);
diff --git a/drivers/uwb/umc-drv.c b/drivers/uwb/umc-drv.c
deleted file mode 100644
index b141d520efbf..000000000000
--- a/drivers/uwb/umc-drv.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * UWB Multi-interface Controller driver management.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/uwb/umc.h>
-
-int __umc_driver_register(struct umc_driver *umc_drv, struct module *module,
-			  const char *mod_name)
-{
-	umc_drv->driver.name     = umc_drv->name;
-	umc_drv->driver.owner    = module;
-	umc_drv->driver.mod_name = mod_name;
-	umc_drv->driver.bus      = &umc_bus_type;
-
-	return driver_register(&umc_drv->driver);
-}
-EXPORT_SYMBOL_GPL(__umc_driver_register);
-
-/**
- * umc_driver_register - unregister a UMC capabiltity driver.
- * @umc_drv:  pointer to the driver.
- */
-void umc_driver_unregister(struct umc_driver *umc_drv)
-{
-	driver_unregister(&umc_drv->driver);
-}
-EXPORT_SYMBOL_GPL(umc_driver_unregister);
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
deleted file mode 100644
index 5457b6d42387..000000000000
--- a/drivers/uwb/uwb-debug.c
+++ /dev/null
@@ -1,355 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Debug support
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- *
- * FIXME: doc
- */
-
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/notifier.h>
-#include <linux/device.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/seq_file.h>
-
-#include <linux/uwb/debug-cmd.h>
-
-#include "uwb-internal.h"
-
-/*
- * Debug interface
- *
- * Per radio controller debugfs files (in uwb/uwbN/):
- *
- * command: Flexible command interface (see <linux/uwb/debug-cmd.h>).
- *
- * reservations: information on reservations.
- *
- * accept: Set to true (Y or 1) to accept reservation requests from
- * peers.
- *
- * drp_avail: DRP availability information.
- */
-
-struct uwb_dbg {
-	struct uwb_pal pal;
-
-	bool accept;
-	struct list_head rsvs;
-
-	struct dentry *root_d;
-	struct dentry *command_f;
-	struct dentry *reservations_f;
-	struct dentry *accept_f;
-	struct dentry *drp_avail_f;
-	spinlock_t list_lock;
-};
-
-static struct dentry *root_dir;
-
-static void uwb_dbg_rsv_cb(struct uwb_rsv *rsv)
-{
-	struct uwb_dbg *dbg = rsv->pal_priv;
-
-	uwb_rsv_dump("debug", rsv);
-
-	if (rsv->state == UWB_RSV_STATE_NONE) {
-		spin_lock(&dbg->list_lock);
-		list_del(&rsv->pal_node);
-		spin_unlock(&dbg->list_lock);
-		uwb_rsv_destroy(rsv);
-	}
-}
-
-static int cmd_rsv_establish(struct uwb_rc *rc,
-			     struct uwb_dbg_cmd_rsv_establish *cmd)
-{
-	struct uwb_mac_addr macaddr;
-	struct uwb_rsv *rsv;
-	struct uwb_dev *target;
-	int ret;
-
-	memcpy(&macaddr, cmd->target, sizeof(macaddr));
-	target = uwb_dev_get_by_macaddr(rc, &macaddr);
-	if (target == NULL)
-		return -ENODEV;
-
-	rsv = uwb_rsv_create(rc, uwb_dbg_rsv_cb, rc->dbg);
-	if (rsv == NULL) {
-		uwb_dev_put(target);
-		return -ENOMEM;
-	}
-
-	rsv->target.type  = UWB_RSV_TARGET_DEV;
-	rsv->target.dev   = target;
-	rsv->type         = cmd->type;
-	rsv->max_mas      = cmd->max_mas;
-	rsv->min_mas      = cmd->min_mas;
-	rsv->max_interval = cmd->max_interval;
-
-	ret = uwb_rsv_establish(rsv);
-	if (ret)
-		uwb_rsv_destroy(rsv);
-	else {
-		spin_lock(&(rc->dbg)->list_lock);
-		list_add_tail(&rsv->pal_node, &rc->dbg->rsvs);
-		spin_unlock(&(rc->dbg)->list_lock);
-	}
-	return ret;
-}
-
-static int cmd_rsv_terminate(struct uwb_rc *rc,
-			     struct uwb_dbg_cmd_rsv_terminate *cmd)
-{
-	struct uwb_rsv *rsv, *found = NULL;
-	int i = 0;
-
-	spin_lock(&(rc->dbg)->list_lock);
-
-	list_for_each_entry(rsv, &rc->dbg->rsvs, pal_node) {
-		if (i == cmd->index) {
-			found = rsv;
-			uwb_rsv_get(found);
-			break;
-		}
-		i++;
-	}
-
-	spin_unlock(&(rc->dbg)->list_lock);
-
-	if (!found)
-		return -EINVAL;
-
-	uwb_rsv_terminate(found);
-	uwb_rsv_put(found);
-
-	return 0;
-}
-
-static int cmd_ie_add(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_add)
-{
-	return uwb_rc_ie_add(rc,
-			     (const struct uwb_ie_hdr *) ie_to_add->data,
-			     ie_to_add->len);
-}
-
-static int cmd_ie_rm(struct uwb_rc *rc, struct uwb_dbg_cmd_ie *ie_to_rm)
-{
-	return uwb_rc_ie_rm(rc, ie_to_rm->data[0]);
-}
-
-static ssize_t command_write(struct file *file, const char __user *buf,
-			 size_t len, loff_t *off)
-{
-	struct uwb_rc *rc = file->private_data;
-	struct uwb_dbg_cmd cmd;
-	int ret = 0;
-	
-	if (len != sizeof(struct uwb_dbg_cmd))
-		return -EINVAL;
-
-	if (copy_from_user(&cmd, buf, len) != 0)
-		return -EFAULT;
-
-	switch (cmd.type) {
-	case UWB_DBG_CMD_RSV_ESTABLISH:
-		ret = cmd_rsv_establish(rc, &cmd.rsv_establish);
-		break;
-	case UWB_DBG_CMD_RSV_TERMINATE:
-		ret = cmd_rsv_terminate(rc, &cmd.rsv_terminate);
-		break;
-	case UWB_DBG_CMD_IE_ADD:
-		ret = cmd_ie_add(rc, &cmd.ie_add);
-		break;
-	case UWB_DBG_CMD_IE_RM:
-		ret = cmd_ie_rm(rc, &cmd.ie_rm);
-		break;
-	case UWB_DBG_CMD_RADIO_START:
-		ret = uwb_radio_start(&rc->dbg->pal);
-		break;
-	case UWB_DBG_CMD_RADIO_STOP:
-		uwb_radio_stop(&rc->dbg->pal);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ret < 0 ? ret : len;
-}
-
-static const struct file_operations command_fops = {
-	.open	= simple_open,
-	.write  = command_write,
-	.read   = NULL,
-	.llseek = no_llseek,
-	.owner  = THIS_MODULE,
-};
-
-static int reservations_show(struct seq_file *s, void *p)
-{
-	struct uwb_rc *rc = s->private;
-	struct uwb_rsv *rsv;
-
-	mutex_lock(&rc->rsvs_mutex);
-
-	list_for_each_entry(rsv, &rc->reservations, rc_node) {
-		struct uwb_dev_addr devaddr;
-		char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE];
-		bool is_owner;
-
-		uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr);
-		if (rsv->target.type == UWB_RSV_TARGET_DEV) {
-			devaddr = rsv->target.dev->dev_addr;
-			is_owner = &rc->uwb_dev == rsv->owner;
-		} else {
-			devaddr = rsv->target.devaddr;
-			is_owner = true;
-		}
-		uwb_dev_addr_print(target, sizeof(target), &devaddr);
-
-		seq_printf(s, "%c %s -> %s: %s\n",
-			   is_owner ? 'O' : 'T',
-			   owner, target, uwb_rsv_state_str(rsv->state));
-		seq_printf(s, "  stream: %d  type: %s\n",
-			   rsv->stream, uwb_rsv_type_str(rsv->type));
-		seq_printf(s, "  %*pb\n", UWB_NUM_MAS, rsv->mas.bm);
-	}
-
-	mutex_unlock(&rc->rsvs_mutex);
-
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(reservations);
-
-static int drp_avail_show(struct seq_file *s, void *p)
-{
-	struct uwb_rc *rc = s->private;
-
-	seq_printf(s, "global:  %*pb\n", UWB_NUM_MAS, rc->drp_avail.global);
-	seq_printf(s, "local:   %*pb\n", UWB_NUM_MAS, rc->drp_avail.local);
-	seq_printf(s, "pending: %*pb\n", UWB_NUM_MAS, rc->drp_avail.pending);
-
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(drp_avail);
-
-static void uwb_dbg_channel_changed(struct uwb_pal *pal, int channel)
-{
-	struct device *dev = &pal->rc->uwb_dev.dev;
-
-	if (channel > 0)
-		dev_info(dev, "debug: channel %d started\n", channel);
-	else
-		dev_info(dev, "debug: channel stopped\n");
-}
-
-static void uwb_dbg_new_rsv(struct uwb_pal *pal, struct uwb_rsv *rsv)
-{
-	struct uwb_dbg *dbg = container_of(pal, struct uwb_dbg, pal);
-
-	if (dbg->accept) {
-		spin_lock(&dbg->list_lock);
-		list_add_tail(&rsv->pal_node, &dbg->rsvs);
-		spin_unlock(&dbg->list_lock);
-		uwb_rsv_accept(rsv, uwb_dbg_rsv_cb, dbg);
-	}
-}
-
-/**
- * uwb_dbg_add_rc - add a debug interface for a radio controller
- * @rc: the radio controller
- */
-void uwb_dbg_add_rc(struct uwb_rc *rc)
-{
-	rc->dbg = kzalloc(sizeof(struct uwb_dbg), GFP_KERNEL);
-	if (rc->dbg == NULL)
-		return;
-
-	INIT_LIST_HEAD(&rc->dbg->rsvs);
-	spin_lock_init(&(rc->dbg)->list_lock);
-
-	uwb_pal_init(&rc->dbg->pal);
-	rc->dbg->pal.rc = rc;
-	rc->dbg->pal.channel_changed = uwb_dbg_channel_changed;
-	rc->dbg->pal.new_rsv = uwb_dbg_new_rsv;
-	uwb_pal_register(&rc->dbg->pal);
-
-	if (root_dir) {
-		rc->dbg->root_d = debugfs_create_dir(dev_name(&rc->uwb_dev.dev),
-						     root_dir);
-		rc->dbg->command_f = debugfs_create_file("command", 0200,
-							 rc->dbg->root_d, rc,
-							 &command_fops);
-		rc->dbg->reservations_f = debugfs_create_file("reservations", 0444,
-							      rc->dbg->root_d, rc,
-							      &reservations_fops);
-		rc->dbg->accept_f = debugfs_create_bool("accept", 0644,
-							rc->dbg->root_d,
-							&rc->dbg->accept);
-		rc->dbg->drp_avail_f = debugfs_create_file("drp_avail", 0444,
-							   rc->dbg->root_d, rc,
-							   &drp_avail_fops);
-	}
-}
-
-/**
- * uwb_dbg_del_rc - remove a radio controller's debug interface
- * @rc: the radio controller
- */
-void uwb_dbg_del_rc(struct uwb_rc *rc)
-{
-	struct uwb_rsv *rsv, *t;
-
-	if (rc->dbg == NULL)
-		return;
-
-	list_for_each_entry_safe(rsv, t, &rc->dbg->rsvs, pal_node) {
-		uwb_rsv_terminate(rsv);
-	}
-
-	uwb_pal_unregister(&rc->dbg->pal);
-
-	if (root_dir) {
-		debugfs_remove(rc->dbg->drp_avail_f);
-		debugfs_remove(rc->dbg->accept_f);
-		debugfs_remove(rc->dbg->reservations_f);
-		debugfs_remove(rc->dbg->command_f);
-		debugfs_remove(rc->dbg->root_d);
-	}
-}
-
-/**
- * uwb_dbg_exit - initialize the debug interface sub-module
- */
-void uwb_dbg_init(void)
-{
-	root_dir = debugfs_create_dir("uwb", NULL);
-}
-
-/**
- * uwb_dbg_exit - clean-up the debug interface sub-module
- */
-void uwb_dbg_exit(void)
-{
-	debugfs_remove(root_dir);
-}
-
-/**
- * uwb_dbg_create_pal_dir - create a debugfs directory for a PAL
- * @pal: The PAL.
- */
-struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal)
-{
-	struct uwb_rc *rc = pal->rc;
-
-	if (root_dir && rc->dbg && rc->dbg->root_d && pal->name)
-		return debugfs_create_dir(pal->name, rc->dbg->root_d);
-	return NULL;
-}
diff --git a/drivers/uwb/uwb-internal.h b/drivers/uwb/uwb-internal.h
deleted file mode 100644
index 00de0a5333d2..000000000000
--- a/drivers/uwb/uwb-internal.h
+++ /dev/null
@@ -1,366 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Ultra Wide Band
- * UWB internal API
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This contains most of the internal API for UWB. This is stuff used
- * across the stack that of course, is of no interest to the rest.
- *
- * Some parts might end up going public (like uwb_rc_*())...
- */
-
-#ifndef __UWB_INTERNAL_H__
-#define __UWB_INTERNAL_H__
-
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/uwb.h>
-#include <linux/mutex.h>
-
-struct uwb_beca_e;
-
-/* General device API */
-extern void uwb_dev_init(struct uwb_dev *uwb_dev);
-extern int __uwb_dev_offair(struct uwb_dev *, struct uwb_rc *);
-extern int uwb_dev_add(struct uwb_dev *uwb_dev, struct device *parent_dev,
-		       struct uwb_rc *parent_rc);
-extern void uwb_dev_rm(struct uwb_dev *uwb_dev);
-extern void uwbd_dev_onair(struct uwb_rc *, struct uwb_beca_e *);
-extern void uwbd_dev_offair(struct uwb_beca_e *);
-void uwb_notify(struct uwb_rc *rc, struct uwb_dev *uwb_dev, enum uwb_notifs event);
-
-/* General UWB Radio Controller Internal API */
-extern struct uwb_rc *__uwb_rc_try_get(struct uwb_rc *);
-static inline struct uwb_rc *__uwb_rc_get(struct uwb_rc *rc)
-{
-	uwb_dev_get(&rc->uwb_dev);
-	return rc;
-}
-
-static inline void __uwb_rc_put(struct uwb_rc *rc)
-{
-	if (rc)
-		uwb_dev_put(&rc->uwb_dev);
-}
-
-extern int uwb_rc_reset(struct uwb_rc *rc);
-extern int uwb_rc_beacon(struct uwb_rc *rc,
-			 int channel, unsigned bpst_offset);
-extern int uwb_rc_scan(struct uwb_rc *rc,
-		       unsigned channel, enum uwb_scan_type type,
-		       unsigned bpst_offset);
-extern int uwb_rc_send_all_drp_ie(struct uwb_rc *rc);
-
-void uwb_rc_ie_init(struct uwb_rc *);
-int uwb_rc_ie_setup(struct uwb_rc *);
-void uwb_rc_ie_release(struct uwb_rc *);
-int uwb_ie_dump_hex(const struct uwb_ie_hdr *ies, size_t len,
-		    char *buf, size_t size);
-int uwb_rc_set_ie(struct uwb_rc *, struct uwb_rc_cmd_set_ie *);
-
-
-extern const char *uwb_rc_strerror(unsigned code);
-
-/*
- * Time to wait for a response to an RC command.
- *
- * Some commands can take a long time to response. e.g., START_BEACON
- * may scan for several superframes before joining an existing beacon
- * group and this can take around 600 ms.
- */
-#define UWB_RC_CMD_TIMEOUT_MS 1000 /* ms */
-
-/*
- * Notification/Event Handlers
- */
-
-struct uwb_rc_neh;
-
-extern int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
-			    struct uwb_rccb *cmd, size_t cmd_size,
-			    u8 expected_type, u16 expected_event,
-			    uwb_rc_cmd_cb_f cb, void *arg);
-
-
-void uwb_rc_neh_create(struct uwb_rc *rc);
-void uwb_rc_neh_destroy(struct uwb_rc *rc);
-
-struct uwb_rc_neh *uwb_rc_neh_add(struct uwb_rc *rc, struct uwb_rccb *cmd,
-				  u8 expected_type, u16 expected_event,
-				  uwb_rc_cmd_cb_f cb, void *arg);
-void uwb_rc_neh_rm(struct uwb_rc *rc, struct uwb_rc_neh *neh);
-void uwb_rc_neh_arm(struct uwb_rc *rc, struct uwb_rc_neh *neh);
-void uwb_rc_neh_put(struct uwb_rc_neh *neh);
-
-/* Event size tables */
-extern int uwb_est_create(void);
-extern void uwb_est_destroy(void);
-
-/*
- * UWB conflicting alien reservations
- */
-struct uwb_cnflt_alien {
-	struct uwb_rc *rc;
-	struct list_head rc_node;
-	struct uwb_mas_bm mas;
-	struct timer_list timer;
-	struct work_struct cnflt_update_work;
-};
-
-enum uwb_uwb_rsv_alloc_result {
-	UWB_RSV_ALLOC_FOUND = 0,
-	UWB_RSV_ALLOC_NOT_FOUND,
-};
-
-enum uwb_rsv_mas_status {
-	UWB_RSV_MAS_NOT_AVAIL = 1,
-	UWB_RSV_MAS_SAFE,
-	UWB_RSV_MAS_UNSAFE,
-};
-
-struct uwb_rsv_col_set_info {
-	unsigned char start_col;
-	unsigned char interval;
-	unsigned char safe_mas_per_col;
-	unsigned char unsafe_mas_per_col;
-};
-
-struct uwb_rsv_col_info {
-	unsigned char max_avail_safe;
-	unsigned char max_avail_unsafe;
-	unsigned char highest_mas[UWB_MAS_PER_ZONE];
-	struct uwb_rsv_col_set_info csi;
-};
-
-struct uwb_rsv_row_info {
-	unsigned char avail[UWB_MAS_PER_ZONE];
-	unsigned char free_rows;
-	unsigned char used_rows;
-};
-
-/*
- * UWB find allocation
- */
-struct uwb_rsv_alloc_info {
-	unsigned char bm[UWB_MAS_PER_ZONE * UWB_NUM_ZONES];
-	struct uwb_rsv_col_info ci[UWB_NUM_ZONES];
-	struct uwb_rsv_row_info ri;
-	struct uwb_mas_bm *not_available;
-	struct uwb_mas_bm *result;
-	int min_mas;
-	int max_mas;
-	int max_interval;
-	int total_allocated_mases;
-	int safe_allocated_mases;
-	int unsafe_allocated_mases;
-	int interval;
-};
-
-int uwb_rsv_find_best_allocation(struct uwb_rsv *rsv,
-				 struct uwb_mas_bm *available,
-				 struct uwb_mas_bm *result);
-void uwb_rsv_handle_drp_avail_change(struct uwb_rc *rc);
-/*
- * UWB Events & management daemon
- */
-
-/**
- * enum uwb_event_type - types of UWB management daemon events
- *
- * The UWB management daemon (uwbd) can receive two types of events:
- *   UWB_EVT_TYPE_NOTIF - notification from the radio controller.
- *   UWB_EVT_TYPE_MSG   - a simple message.
- */
-enum uwb_event_type {
-	UWB_EVT_TYPE_NOTIF,
-	UWB_EVT_TYPE_MSG,
-};
-
-/**
- * struct uwb_event_notif - an event for a radio controller notification
- * @size: Size of the buffer (ie: Guaranteed to contain at least
- *        a full 'struct uwb_rceb')
- * @rceb: Pointer to a kmalloced() event payload
- */
-struct uwb_event_notif {
-	size_t size;
-	struct uwb_rceb *rceb;
-};
-
-/**
- * enum uwb_event_message - an event for a message for asynchronous processing
- *
- * UWB_EVT_MSG_RESET - reset the radio controller and all PAL hardware.
- */
-enum uwb_event_message {
-	UWB_EVT_MSG_RESET,
-};
-
-/**
- * UWB Event
- * @rc:         Radio controller that emitted the event (referenced)
- * @ts_jiffies: Timestamp, when was it received
- * @type:       This event's type.
- */
-struct uwb_event {
-	struct list_head list_node;
-	struct uwb_rc *rc;
-	unsigned long ts_jiffies;
-	enum uwb_event_type type;
-	union {
-		struct uwb_event_notif notif;
-		enum uwb_event_message message;
-	};
-};
-
-extern void uwbd_start(struct uwb_rc *rc);
-extern void uwbd_stop(struct uwb_rc *rc);
-extern struct uwb_event *uwb_event_alloc(size_t, gfp_t gfp_mask);
-extern void uwbd_event_queue(struct uwb_event *);
-void uwbd_flush(struct uwb_rc *rc);
-
-/* UWB event handlers */
-extern int uwbd_evt_handle_rc_ie_rcv(struct uwb_event *);
-extern int uwbd_evt_handle_rc_beacon(struct uwb_event *);
-extern int uwbd_evt_handle_rc_beacon_size(struct uwb_event *);
-extern int uwbd_evt_handle_rc_bpoie_change(struct uwb_event *);
-extern int uwbd_evt_handle_rc_bp_slot_change(struct uwb_event *);
-extern int uwbd_evt_handle_rc_drp(struct uwb_event *);
-extern int uwbd_evt_handle_rc_drp_avail(struct uwb_event *);
-
-int uwbd_msg_handle_reset(struct uwb_event *evt);
-
-
-/*
- * Address management
- */
-int uwb_rc_dev_addr_assign(struct uwb_rc *rc);
-int uwbd_evt_handle_rc_dev_addr_conflict(struct uwb_event *evt);
-
-/*
- * UWB Beacon Cache
- *
- * Each beacon we received is kept in a cache--when we receive that
- * beacon consistently, that means there is a new device that we have
- * to add to the system.
- */
-
-extern unsigned long beacon_timeout_ms;
-
-/**
- * Beacon cache entry
- *
- * @jiffies_refresh: last time a beacon was  received that refreshed
- *                   this cache entry.
- * @uwb_dev: device connected to this beacon. This pointer is not
- *           safe, you need to get it with uwb_dev_try_get()
- *
- * @hits: how many time we have seen this beacon since last time we
- *        cleared it
- */
-struct uwb_beca_e {
-	struct mutex mutex;
-	struct kref refcnt;
-	struct list_head node;
-	struct uwb_mac_addr *mac_addr;
-	struct uwb_dev_addr dev_addr;
-	u8 hits;
-	unsigned long ts_jiffies;
-	struct uwb_dev *uwb_dev;
-	struct uwb_rc_evt_beacon *be;
-	struct stats lqe_stats, rssi_stats;	/* radio statistics */
-};
-struct uwb_beacon_frame;
-extern ssize_t uwb_bce_print_IEs(struct uwb_dev *, struct uwb_beca_e *,
-				 char *, size_t);
-
-extern void uwb_bce_kfree(struct kref *_bce);
-static inline void uwb_bce_get(struct uwb_beca_e *bce)
-{
-	kref_get(&bce->refcnt);
-}
-static inline void uwb_bce_put(struct uwb_beca_e *bce)
-{
-	kref_put(&bce->refcnt, uwb_bce_kfree);
-}
-extern void uwb_beca_purge(struct uwb_rc *rc);
-extern void uwb_beca_release(struct uwb_rc *rc);
-
-struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
-				       const struct uwb_dev_addr *devaddr);
-struct uwb_dev *uwb_dev_get_by_macaddr(struct uwb_rc *rc,
-				       const struct uwb_mac_addr *macaddr);
-
-int uwb_radio_setup(struct uwb_rc *rc);
-void uwb_radio_reset_state(struct uwb_rc *rc);
-void uwb_radio_shutdown(struct uwb_rc *rc);
-int uwb_radio_force_channel(struct uwb_rc *rc, int channel);
-
-/* -- UWB Sysfs representation */
-extern struct class uwb_rc_class;
-extern struct bus_type uwb_bus_type;
-extern struct device_attribute dev_attr_mac_address;
-extern struct device_attribute dev_attr_beacon;
-extern struct device_attribute dev_attr_scan;
-
-/* -- DRP Bandwidth allocator: bandwidth allocations, reservations, DRP */
-void uwb_rsv_init(struct uwb_rc *rc);
-int uwb_rsv_setup(struct uwb_rc *rc);
-void uwb_rsv_cleanup(struct uwb_rc *rc);
-void uwb_rsv_remove_all(struct uwb_rc *rc);
-void uwb_rsv_get(struct uwb_rsv *rsv);
-void uwb_rsv_put(struct uwb_rsv *rsv);
-bool uwb_rsv_has_two_drp_ies(struct uwb_rsv *rsv);
-void uwb_rsv_dump(char *text, struct uwb_rsv *rsv);
-int uwb_rsv_try_move(struct uwb_rsv *rsv, struct uwb_mas_bm *available);
-void uwb_rsv_backoff_win_timer(struct timer_list *t);
-void uwb_rsv_backoff_win_increment(struct uwb_rc *rc);
-int uwb_rsv_status(struct uwb_rsv *rsv);
-int uwb_rsv_companion_status(struct uwb_rsv *rsv);
-
-void uwb_rsv_set_state(struct uwb_rsv *rsv, enum uwb_rsv_state new_state);
-void uwb_rsv_remove(struct uwb_rsv *rsv);
-struct uwb_rsv *uwb_rsv_find(struct uwb_rc *rc, struct uwb_dev *src,
-			     struct uwb_ie_drp *drp_ie);
-void uwb_rsv_sched_update(struct uwb_rc *rc);
-void uwb_rsv_queue_update(struct uwb_rc *rc);
-
-int uwb_drp_ie_update(struct uwb_rsv *rsv);
-void uwb_drp_ie_to_bm(struct uwb_mas_bm *bm, const struct uwb_ie_drp *drp_ie);
-
-void uwb_drp_avail_init(struct uwb_rc *rc);
-void uwb_drp_available(struct uwb_rc *rc, struct uwb_mas_bm *avail);
-int  uwb_drp_avail_reserve_pending(struct uwb_rc *rc, struct uwb_mas_bm *mas);
-void uwb_drp_avail_reserve(struct uwb_rc *rc, struct uwb_mas_bm *mas);
-void uwb_drp_avail_release(struct uwb_rc *rc, struct uwb_mas_bm *mas);
-void uwb_drp_avail_ie_update(struct uwb_rc *rc);
-
-/* -- PAL support */
-void uwb_rc_pal_init(struct uwb_rc *rc);
-
-/* -- Misc */
-
-extern ssize_t uwb_mac_frame_hdr_print(char *, size_t,
-				       const struct uwb_mac_frame_hdr *);
-
-/* -- Debug interface */
-void uwb_dbg_init(void);
-void uwb_dbg_exit(void);
-void uwb_dbg_add_rc(struct uwb_rc *rc);
-void uwb_dbg_del_rc(struct uwb_rc *rc);
-struct dentry *uwb_dbg_create_pal_dir(struct uwb_pal *pal);
-
-static inline void uwb_dev_lock(struct uwb_dev *uwb_dev)
-{
-	device_lock(&uwb_dev->dev);
-}
-
-static inline void uwb_dev_unlock(struct uwb_dev *uwb_dev)
-{
-	device_unlock(&uwb_dev->dev);
-}
-
-#endif /* #ifndef __UWB_INTERNAL_H__ */
diff --git a/drivers/uwb/uwbd.c b/drivers/uwb/uwbd.c
deleted file mode 100644
index dc5c743d4f5f..000000000000
--- a/drivers/uwb/uwbd.c
+++ /dev/null
@@ -1,356 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Ultra Wide Band
- * Neighborhood Management Daemon
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This daemon takes care of maintaing information that describes the
- * UWB neighborhood that the radios in this machine can see. It also
- * keeps a tab of which devices are visible, makes sure each HC sits
- * on a different channel to avoid interfering, etc.
- *
- * Different drivers (radio controller, device, any API in general)
- * communicate with this daemon through an event queue. Daemon wakes
- * up, takes a list of events and handles them one by one; handling
- * function is extracted from a table based on the event's type and
- * subtype. Events are freed only if the handling function says so.
- *
- *   . Lock protecting the event list has to be an spinlock and locked
- *     with IRQSAVE because it might be called from an interrupt
- *     context (ie: when events arrive and the notification drops
- *     down from the ISR).
- *
- *   . UWB radio controller drivers queue events to the daemon using
- *     uwbd_event_queue(). They just get the event, chew it to make it
- *     look like UWBD likes it and pass it in a buffer allocated with
- *     uwb_event_alloc().
- *
- * EVENTS
- *
- * Events have a type, a subtype, a length, some other stuff and the
- * data blob, which depends on the event. The header is 'struct
- * uwb_event'; for payloads, see 'struct uwbd_evt_*'.
- *
- * EVENT HANDLER TABLES
- *
- * To find a handling function for an event, the type is used to index
- * a subtype-table in the type-table. The subtype-table is indexed
- * with the subtype to get the function that handles the event. Start
- * with the main type-table 'uwbd_evt_type_handler'.
- *
- * DEVICES
- *
- * Devices are created when a bunch of beacons have been received and
- * it is stablished that the device has stable radio presence. CREATED
- * only, not configured. Devices are ONLY configured when an
- * Application-Specific IE Probe is receieved, in which the device
- * declares which Protocol ID it groks. Then the device is CONFIGURED
- * (and the driver->probe() stuff of the device model is invoked).
- *
- * Devices are considered disconnected when a certain number of
- * beacons are not received in an amount of time.
- *
- * Handler functions are called normally uwbd_evt_handle_*().
- */
-#include <linux/kthread.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/freezer.h>
-
-#include "uwb-internal.h"
-
-/*
- * UWBD Event handler function signature
- *
- * Return !0 if the event needs not to be freed (ie the handler
- * takes/took care of it). 0 means the daemon code will free the
- * event.
- *
- * @evt->rc is already referenced and guaranteed to exist. See
- * uwb_evt_handle().
- */
-typedef int (*uwbd_evt_handler_f)(struct uwb_event *);
-
-/**
- * Properties of a UWBD event
- *
- * @handler:    the function that will handle this event
- * @name:       text name of event
- */
-struct uwbd_event {
-	uwbd_evt_handler_f handler;
-	const char *name;
-};
-
-/* Table of handlers for and properties of the UWBD Radio Control Events */
-static struct uwbd_event uwbd_urc_events[] = {
-	[UWB_RC_EVT_IE_RCV] = {
-		.handler = uwbd_evt_handle_rc_ie_rcv,
-		.name = "IE_RECEIVED"
-	},
-	[UWB_RC_EVT_BEACON] = {
-		.handler = uwbd_evt_handle_rc_beacon,
-		.name = "BEACON_RECEIVED"
-	},
-	[UWB_RC_EVT_BEACON_SIZE] = {
-		.handler = uwbd_evt_handle_rc_beacon_size,
-		.name = "BEACON_SIZE_CHANGE"
-	},
-	[UWB_RC_EVT_BPOIE_CHANGE] = {
-		.handler = uwbd_evt_handle_rc_bpoie_change,
-		.name = "BPOIE_CHANGE"
-	},
-	[UWB_RC_EVT_BP_SLOT_CHANGE] = {
-		.handler = uwbd_evt_handle_rc_bp_slot_change,
-		.name = "BP_SLOT_CHANGE"
-	},
-	[UWB_RC_EVT_DRP_AVAIL] = {
-		.handler = uwbd_evt_handle_rc_drp_avail,
-		.name = "DRP_AVAILABILITY_CHANGE"
-	},
-	[UWB_RC_EVT_DRP] = {
-		.handler = uwbd_evt_handle_rc_drp,
-		.name = "DRP"
-	},
-	[UWB_RC_EVT_DEV_ADDR_CONFLICT] = {
-		.handler = uwbd_evt_handle_rc_dev_addr_conflict,
-		.name = "DEV_ADDR_CONFLICT",
-	},
-};
-
-
-
-struct uwbd_evt_type_handler {
-	const char *name;
-	struct uwbd_event *uwbd_events;
-	size_t size;
-};
-
-/* Table of handlers for each UWBD Event type. */
-static struct uwbd_evt_type_handler uwbd_urc_evt_type_handlers[] = {
-	[UWB_RC_CET_GENERAL] = {
-		.name        = "URC",
-		.uwbd_events = uwbd_urc_events,
-		.size        = ARRAY_SIZE(uwbd_urc_events),
-	},
-};
-
-static const struct uwbd_event uwbd_message_handlers[] = {
-	[UWB_EVT_MSG_RESET] = {
-		.handler = uwbd_msg_handle_reset,
-		.name = "reset",
-	},
-};
-
-/*
- * Handle an URC event passed to the UWB Daemon
- *
- * @evt: the event to handle
- * @returns: 0 if the event can be kfreed, !0 on the contrary
- *           (somebody else took ownership) [coincidentally, returning
- *           a <0 errno code will free it :)].
- *
- * Looks up the two indirection tables (one for the type, one for the
- * subtype) to decide which function handles it and then calls the
- * handler.
- *
- * The event structure passed to the event handler has the radio
- * controller in @evt->rc referenced. The reference will be dropped
- * once the handler returns, so if it needs it for longer (async),
- * it'll need to take another one.
- */
-static
-int uwbd_event_handle_urc(struct uwb_event *evt)
-{
-	int result = -EINVAL;
-	struct uwbd_evt_type_handler *type_table;
-	uwbd_evt_handler_f handler;
-	u8 type, context;
-	u16 event;
-
-	type = evt->notif.rceb->bEventType;
-	event = le16_to_cpu(evt->notif.rceb->wEvent);
-	context = evt->notif.rceb->bEventContext;
-
-	if (type >= ARRAY_SIZE(uwbd_urc_evt_type_handlers))
-		goto out;
-	type_table = &uwbd_urc_evt_type_handlers[type];
-	if (type_table->uwbd_events == NULL)
-		goto out;
-	if (event >= type_table->size)
-		goto out;
-	handler = type_table->uwbd_events[event].handler;
-	if (handler == NULL)
-		goto out;
-
-	result = (*handler)(evt);
-out:
-	if (result < 0)
-		dev_err(&evt->rc->uwb_dev.dev,
-			"UWBD: event 0x%02x/%04x/%02x, handling failed: %d\n",
-			type, event, context, result);
-	return result;
-}
-
-static void uwbd_event_handle_message(struct uwb_event *evt)
-{
-	struct uwb_rc *rc;
-	int result;
-
-	rc = evt->rc;
-
-	if (evt->message < 0 || evt->message >= ARRAY_SIZE(uwbd_message_handlers)) {
-		dev_err(&rc->uwb_dev.dev, "UWBD: invalid message type %d\n", evt->message);
-		return;
-	}
-
-	result = uwbd_message_handlers[evt->message].handler(evt);
-	if (result < 0)
-		dev_err(&rc->uwb_dev.dev, "UWBD: '%s' message failed: %d\n",
-			uwbd_message_handlers[evt->message].name, result);
-}
-
-static void uwbd_event_handle(struct uwb_event *evt)
-{
-	struct uwb_rc *rc;
-	int should_keep;
-
-	rc = evt->rc;
-
-	if (rc->ready) {
-		switch (evt->type) {
-		case UWB_EVT_TYPE_NOTIF:
-			should_keep = uwbd_event_handle_urc(evt);
-			if (should_keep <= 0)
-				kfree(evt->notif.rceb);
-			break;
-		case UWB_EVT_TYPE_MSG:
-			uwbd_event_handle_message(evt);
-			break;
-		default:
-			dev_err(&rc->uwb_dev.dev, "UWBD: invalid event type %d\n", evt->type);
-			break;
-		}
-	}
-
-	__uwb_rc_put(rc);	/* for the __uwb_rc_get() in uwb_rc_notif_cb() */
-}
-
-/**
- * UWB Daemon
- *
- * Listens to all UWB notifications and takes care to track the state
- * of the UWB neighbourhood for the kernel. When we do a run, we
- * spinlock, move the list to a private copy and release the
- * lock. Hold it as little as possible. Not a conflict: it is
- * guaranteed we own the events in the private list.
- *
- * FIXME: should change so we don't have a 1HZ timer all the time, but
- *        only if there are devices.
- */
-static int uwbd(void *param)
-{
-	struct uwb_rc *rc = param;
-	unsigned long flags;
-	struct uwb_event *evt;
-	int should_stop = 0;
-
-	while (1) {
-		wait_event_interruptible_timeout(
-			rc->uwbd.wq,
-			!list_empty(&rc->uwbd.event_list)
-			  || (should_stop = kthread_should_stop()),
-			HZ);
-		if (should_stop)
-			break;
-
-		spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
-		if (!list_empty(&rc->uwbd.event_list)) {
-			evt = list_first_entry(&rc->uwbd.event_list, struct uwb_event, list_node);
-			list_del(&evt->list_node);
-		} else
-			evt = NULL;
-		spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
-
-		if (evt) {
-			uwbd_event_handle(evt);
-			kfree(evt);
-		}
-
-		uwb_beca_purge(rc);	/* Purge devices that left */
-	}
-	return 0;
-}
-
-
-/** Start the UWB daemon */
-void uwbd_start(struct uwb_rc *rc)
-{
-	struct task_struct *task = kthread_run(uwbd, rc, "uwbd");
-	if (IS_ERR(task)) {
-		rc->uwbd.task = NULL;
-		printk(KERN_ERR "UWB: Cannot start management daemon; "
-		       "UWB won't work\n");
-	} else {
-		rc->uwbd.task = task;
-		rc->uwbd.pid = rc->uwbd.task->pid;
-	}
-}
-
-/* Stop the UWB daemon and free any unprocessed events */
-void uwbd_stop(struct uwb_rc *rc)
-{
-	if (rc->uwbd.task)
-		kthread_stop(rc->uwbd.task);
-	uwbd_flush(rc);
-}
-
-/*
- * Queue an event for the management daemon
- *
- * When some lower layer receives an event, it uses this function to
- * push it forward to the UWB daemon.
- *
- * Once you pass the event, you don't own it any more, but the daemon
- * does. It will uwb_event_free() it when done, so make sure you
- * uwb_event_alloc()ed it or bad things will happen.
- *
- * If the daemon is not running, we just free the event.
- */
-void uwbd_event_queue(struct uwb_event *evt)
-{
-	struct uwb_rc *rc = evt->rc;
-	unsigned long flags;
-
-	spin_lock_irqsave(&rc->uwbd.event_list_lock, flags);
-	if (rc->uwbd.pid != 0) {
-		list_add(&evt->list_node, &rc->uwbd.event_list);
-		wake_up_all(&rc->uwbd.wq);
-	} else {
-		__uwb_rc_put(evt->rc);
-		if (evt->type == UWB_EVT_TYPE_NOTIF)
-			kfree(evt->notif.rceb);
-		kfree(evt);
-	}
-	spin_unlock_irqrestore(&rc->uwbd.event_list_lock, flags);
-	return;
-}
-
-void uwbd_flush(struct uwb_rc *rc)
-{
-	struct uwb_event *evt, *nxt;
-
-	spin_lock_irq(&rc->uwbd.event_list_lock);
-	list_for_each_entry_safe(evt, nxt, &rc->uwbd.event_list, list_node) {
-		if (evt->rc == rc) {
-			__uwb_rc_put(rc);
-			list_del(&evt->list_node);
-			if (evt->type == UWB_EVT_TYPE_NOTIF)
-				kfree(evt->notif.rceb);
-			kfree(evt);
-		}
-	}
-	spin_unlock_irq(&rc->uwbd.event_list_lock);
-}
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
deleted file mode 100644
index 22397f70dee2..000000000000
--- a/drivers/uwb/whc-rc.c
+++ /dev/null
@@ -1,467 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Wireless Host Controller: Radio Control Interface (WHCI v0.95[2.3])
- * Radio Control command/event transport to the UWB stack
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * Initialize and hook up the Radio Control interface.
- *
- * For each device probed, creates an 'struct whcrc' which contains
- * just the representation of the UWB Radio Controller, and the logic
- * for reading notifications and passing them to the UWB Core.
- *
- * So we initialize all of those, register the UWB Radio Controller
- * and setup the notification/event handle to pipe the notifications
- * to the UWB management Daemon.
- *
- * Once uwb_rc_add() is called, the UWB stack takes control, resets
- * the radio and readies the device to take commands the UWB
- * API/user-space.
- *
- * Note this driver is just a transport driver; the commands are
- * formed at the UWB stack and given to this driver who will deliver
- * them to the hw and transfer the replies/notifications back to the
- * UWB stack through the UWB daemon (UWBD).
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/sched.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/uwb.h>
-#include <linux/uwb/whci.h>
-#include <linux/uwb/umc.h>
-
-#include "uwb-internal.h"
-
-/**
- * Descriptor for an instance of the UWB Radio Control Driver that
- * attaches to the URC interface of the WHCI PCI card.
- *
- * Unless there is a lock specific to the 'data members', all access
- * is protected by uwb_rc->mutex.
- */
-struct whcrc {
-	struct umc_dev *umc_dev;
-	struct uwb_rc *uwb_rc;		/* UWB host controller */
-
-	unsigned long area;
-	void __iomem *rc_base;
-	size_t rc_len;
-	spinlock_t irq_lock;
-
-	void *evt_buf, *cmd_buf;
-	dma_addr_t evt_dma_buf, cmd_dma_buf;
-	wait_queue_head_t cmd_wq;
-	struct work_struct event_work;
-};
-
-/**
- * Execute an UWB RC command on WHCI/RC
- *
- * @rc:       Instance of a Radio Controller that is a whcrc
- * @cmd:      Buffer containing the RCCB and payload to execute
- * @cmd_size: Size of the command buffer.
- *
- * We copy the command into whcrc->cmd_buf (as it is pretty and
- * aligned`and physically contiguous) and then press the right keys in
- * the controller's URCCMD register to get it to read it. We might
- * have to wait for the cmd_sem to be open to us.
- *
- * NOTE: rc's mutex has to be locked
- */
-static int whcrc_cmd(struct uwb_rc *uwb_rc,
-	      const struct uwb_rccb *cmd, size_t cmd_size)
-{
-	int result = 0;
-	struct whcrc *whcrc = uwb_rc->priv;
-	struct device *dev = &whcrc->umc_dev->dev;
-	u32 urccmd;
-
-	if (cmd_size >= 4096)
-		return -EINVAL;
-
-	/*
-	 * If the URC is halted, then the hardware has reset itself.
-	 * Attempt to recover by restarting the device and then return
-	 * an error as it's likely that the current command isn't
-	 * valid for a newly started RC.
-	 */
-	if (le_readl(whcrc->rc_base + URCSTS) & URCSTS_HALTED) {
-		dev_err(dev, "requesting reset of halted radio controller\n");
-		uwb_rc_reset_all(uwb_rc);
-		return -EIO;
-	}
-
-	result = wait_event_timeout(whcrc->cmd_wq,
-		!(le_readl(whcrc->rc_base + URCCMD) & URCCMD_ACTIVE), HZ/2);
-	if (result == 0) {
-		dev_err(dev, "device is not ready to execute commands\n");
-		return -ETIMEDOUT;
-	}
-
-	memmove(whcrc->cmd_buf, cmd, cmd_size);
-	le_writeq(whcrc->cmd_dma_buf, whcrc->rc_base + URCCMDADDR);
-
-	spin_lock(&whcrc->irq_lock);
-	urccmd = le_readl(whcrc->rc_base + URCCMD);
-	urccmd &= ~(URCCMD_EARV | URCCMD_SIZE_MASK);
-	le_writel(urccmd | URCCMD_ACTIVE | URCCMD_IWR | cmd_size,
-		  whcrc->rc_base + URCCMD);
-	spin_unlock(&whcrc->irq_lock);
-
-	return 0;
-}
-
-static int whcrc_reset(struct uwb_rc *rc)
-{
-	struct whcrc *whcrc = rc->priv;
-
-	return umc_controller_reset(whcrc->umc_dev);
-}
-
-/**
- * Reset event reception mechanism and tell hw we are ready to get more
- *
- * We have read all the events in the event buffer, so we are ready to
- * reset it to the beginning.
- *
- * This is only called during initialization or after an event buffer
- * has been retired.  This means we can be sure that event processing
- * is disabled and it's safe to update the URCEVTADDR register.
- *
- * There's no need to wait for the event processing to start as the
- * URC will not clear URCCMD_ACTIVE until (internal) event buffer
- * space is available.
- */
-static
-void whcrc_enable_events(struct whcrc *whcrc)
-{
-	u32 urccmd;
-
-	le_writeq(whcrc->evt_dma_buf, whcrc->rc_base + URCEVTADDR);
-
-	spin_lock(&whcrc->irq_lock);
-	urccmd = le_readl(whcrc->rc_base + URCCMD) & ~URCCMD_ACTIVE;
-	le_writel(urccmd | URCCMD_EARV, whcrc->rc_base + URCCMD);
-	spin_unlock(&whcrc->irq_lock);
-}
-
-static void whcrc_event_work(struct work_struct *work)
-{
-	struct whcrc *whcrc = container_of(work, struct whcrc, event_work);
-	size_t size;
-	u64 urcevtaddr;
-
-	urcevtaddr = le_readq(whcrc->rc_base + URCEVTADDR);
-	size = urcevtaddr & URCEVTADDR_OFFSET_MASK;
-
-	uwb_rc_neh_grok(whcrc->uwb_rc, whcrc->evt_buf, size);
-	whcrc_enable_events(whcrc);
-}
-
-/**
- * Catch interrupts?
- *
- * We ack inmediately (and expect the hw to do the right thing and
- * raise another IRQ if things have changed :)
- */
-static
-irqreturn_t whcrc_irq_cb(int irq, void *_whcrc)
-{
-	struct whcrc *whcrc = _whcrc;
-	struct device *dev = &whcrc->umc_dev->dev;
-	u32 urcsts;
-
-	urcsts = le_readl(whcrc->rc_base + URCSTS);
-	if (!(urcsts & URCSTS_INT_MASK))
-		return IRQ_NONE;
-	le_writel(urcsts & URCSTS_INT_MASK, whcrc->rc_base + URCSTS);
-
-	if (urcsts & URCSTS_HSE) {
-		dev_err(dev, "host system error -- hardware halted\n");
-		/* FIXME: do something sensible here */
-		goto out;
-	}
-	if (urcsts & URCSTS_ER)
-		schedule_work(&whcrc->event_work);
-	if (urcsts & URCSTS_RCI)
-		wake_up_all(&whcrc->cmd_wq);
-out:
-	return IRQ_HANDLED;
-}
-
-
-/**
- * Initialize a UMC RC interface: map regions, get (shared) IRQ
- */
-static
-int whcrc_setup_rc_umc(struct whcrc *whcrc)
-{
-	int result = 0;
-	struct device *dev = &whcrc->umc_dev->dev;
-	struct umc_dev *umc_dev = whcrc->umc_dev;
-
-	whcrc->area = umc_dev->resource.start;
-	whcrc->rc_len = resource_size(&umc_dev->resource);
-	result = -EBUSY;
-	if (request_mem_region(whcrc->area, whcrc->rc_len, KBUILD_MODNAME) == NULL) {
-		dev_err(dev, "can't request URC region (%zu bytes @ 0x%lx): %d\n",
-			whcrc->rc_len, whcrc->area, result);
-		goto error_request_region;
-	}
-
-	whcrc->rc_base = ioremap_nocache(whcrc->area, whcrc->rc_len);
-	if (whcrc->rc_base == NULL) {
-		dev_err(dev, "can't ioremap registers (%zu bytes @ 0x%lx): %d\n",
-			whcrc->rc_len, whcrc->area, result);
-		goto error_ioremap_nocache;
-	}
-
-	result = request_irq(umc_dev->irq, whcrc_irq_cb, IRQF_SHARED,
-			     KBUILD_MODNAME, whcrc);
-	if (result < 0) {
-		dev_err(dev, "can't allocate IRQ %d: %d\n",
-			umc_dev->irq, result);
-		goto error_request_irq;
-	}
-
-	result = -ENOMEM;
-	whcrc->cmd_buf = dma_alloc_coherent(&umc_dev->dev, PAGE_SIZE,
-					    &whcrc->cmd_dma_buf, GFP_KERNEL);
-	if (whcrc->cmd_buf == NULL) {
-		dev_err(dev, "Can't allocate cmd transfer buffer\n");
-		goto error_cmd_buffer;
-	}
-
-	whcrc->evt_buf = dma_alloc_coherent(&umc_dev->dev, PAGE_SIZE,
-					    &whcrc->evt_dma_buf, GFP_KERNEL);
-	if (whcrc->evt_buf == NULL) {
-		dev_err(dev, "Can't allocate evt transfer buffer\n");
-		goto error_evt_buffer;
-	}
-	return 0;
-
-error_evt_buffer:
-	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->cmd_buf,
-			  whcrc->cmd_dma_buf);
-error_cmd_buffer:
-	free_irq(umc_dev->irq, whcrc);
-error_request_irq:
-	iounmap(whcrc->rc_base);
-error_ioremap_nocache:
-	release_mem_region(whcrc->area, whcrc->rc_len);
-error_request_region:
-	return result;
-}
-
-
-/**
- * Release RC's UMC resources
- */
-static
-void whcrc_release_rc_umc(struct whcrc *whcrc)
-{
-	struct umc_dev *umc_dev = whcrc->umc_dev;
-
-	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->evt_buf,
-			  whcrc->evt_dma_buf);
-	dma_free_coherent(&umc_dev->dev, PAGE_SIZE, whcrc->cmd_buf,
-			  whcrc->cmd_dma_buf);
-	free_irq(umc_dev->irq, whcrc);
-	iounmap(whcrc->rc_base);
-	release_mem_region(whcrc->area, whcrc->rc_len);
-}
-
-
-/**
- * whcrc_start_rc - start a WHCI radio controller
- * @whcrc: the radio controller to start
- *
- * Reset the UMC device, start the radio controller, enable events and
- * finally enable interrupts.
- */
-static int whcrc_start_rc(struct uwb_rc *rc)
-{
-	struct whcrc *whcrc = rc->priv;
-	struct device *dev = &whcrc->umc_dev->dev;
-
-	/* Reset the thing */
-	le_writel(URCCMD_RESET, whcrc->rc_base + URCCMD);
-	if (whci_wait_for(dev, whcrc->rc_base + URCCMD, URCCMD_RESET, 0,
-			  5000, "hardware reset") < 0)
-		return -EBUSY;
-
-	/* Set the event buffer, start the controller (enable IRQs later) */
-	le_writel(0, whcrc->rc_base + URCINTR);
-	le_writel(URCCMD_RS, whcrc->rc_base + URCCMD);
-	if (whci_wait_for(dev, whcrc->rc_base + URCSTS, URCSTS_HALTED, 0,
-			  5000, "radio controller start") < 0)
-		return -ETIMEDOUT;
-	whcrc_enable_events(whcrc);
-	le_writel(URCINTR_EN_ALL, whcrc->rc_base + URCINTR);
-	return 0;
-}
-
-
-/**
- * whcrc_stop_rc - stop a WHCI radio controller
- * @whcrc: the radio controller to stop
- *
- * Disable interrupts and cancel any pending event processing work
- * before clearing the Run/Stop bit.
- */
-static
-void whcrc_stop_rc(struct uwb_rc *rc)
-{
-	struct whcrc *whcrc = rc->priv;
-	struct umc_dev *umc_dev = whcrc->umc_dev;
-
-	le_writel(0, whcrc->rc_base + URCINTR);
-	cancel_work_sync(&whcrc->event_work);
-
-	le_writel(0, whcrc->rc_base + URCCMD);
-	whci_wait_for(&umc_dev->dev, whcrc->rc_base + URCSTS,
-		      URCSTS_HALTED, URCSTS_HALTED, 100, "radio controller stop");
-}
-
-static void whcrc_init(struct whcrc *whcrc)
-{
-	spin_lock_init(&whcrc->irq_lock);
-	init_waitqueue_head(&whcrc->cmd_wq);
-	INIT_WORK(&whcrc->event_work, whcrc_event_work);
-}
-
-/**
- * Initialize the radio controller.
- *
- * NOTE: we setup whcrc->uwb_rc before calling uwb_rc_add(); in the
- *       IRQ handler we use that to determine if the hw is ready to
- *       handle events. Looks like a race condition, but it really is
- *       not.
- */
-static
-int whcrc_probe(struct umc_dev *umc_dev)
-{
-	int result;
-	struct uwb_rc *uwb_rc;
-	struct whcrc *whcrc;
-	struct device *dev = &umc_dev->dev;
-
-	result = -ENOMEM;
-	uwb_rc = uwb_rc_alloc();
-	if (uwb_rc == NULL) {
-		dev_err(dev, "unable to allocate RC instance\n");
-		goto error_rc_alloc;
-	}
-	whcrc = kzalloc(sizeof(*whcrc), GFP_KERNEL);
-	if (whcrc == NULL) {
-		dev_err(dev, "unable to allocate WHC-RC instance\n");
-		goto error_alloc;
-	}
-	whcrc_init(whcrc);
-	whcrc->umc_dev = umc_dev;
-
-	result = whcrc_setup_rc_umc(whcrc);
-	if (result < 0) {
-		dev_err(dev, "Can't setup RC UMC interface: %d\n", result);
-		goto error_setup_rc_umc;
-	}
-	whcrc->uwb_rc = uwb_rc;
-
-	uwb_rc->owner = THIS_MODULE;
-	uwb_rc->cmd   = whcrc_cmd;
-	uwb_rc->reset = whcrc_reset;
-	uwb_rc->start = whcrc_start_rc;
-	uwb_rc->stop  = whcrc_stop_rc;
-
-	result = uwb_rc_add(uwb_rc, dev, whcrc);
-	if (result < 0)
-		goto error_rc_add;
-	umc_set_drvdata(umc_dev, whcrc);
-	return 0;
-
-error_rc_add:
-	whcrc_release_rc_umc(whcrc);
-error_setup_rc_umc:
-	kfree(whcrc);
-error_alloc:
-	uwb_rc_put(uwb_rc);
-error_rc_alloc:
-	return result;
-}
-
-/**
- * Clean up the radio control resources
- *
- * When we up the command semaphore, everybody possibly held trying to
- * execute a command should be granted entry and then they'll see the
- * host is quiescing and up it (so it will chain to the next waiter).
- * This should not happen (in any case), as we can only remove when
- * there are no handles open...
- */
-static void whcrc_remove(struct umc_dev *umc_dev)
-{
-	struct whcrc *whcrc = umc_get_drvdata(umc_dev);
-	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
-
-	umc_set_drvdata(umc_dev, NULL);
-	uwb_rc_rm(uwb_rc);
-	whcrc_release_rc_umc(whcrc);
-	kfree(whcrc);
-	uwb_rc_put(uwb_rc);
-}
-
-static int whcrc_pre_reset(struct umc_dev *umc)
-{
-	struct whcrc *whcrc = umc_get_drvdata(umc);
-	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
-
-	uwb_rc_pre_reset(uwb_rc);
-	return 0;
-}
-
-static int whcrc_post_reset(struct umc_dev *umc)
-{
-	struct whcrc *whcrc = umc_get_drvdata(umc);
-	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
-
-	return uwb_rc_post_reset(uwb_rc);
-}
-
-/* PCI device ID's that we handle [so it gets loaded] */
-static struct pci_device_id __used whcrc_id_table[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
-	{ /* empty last entry */ }
-};
-MODULE_DEVICE_TABLE(pci, whcrc_id_table);
-
-static struct umc_driver whcrc_driver = {
-	.name       = "whc-rc",
-	.cap_id     = UMC_CAP_ID_WHCI_RC,
-	.probe      = whcrc_probe,
-	.remove     = whcrc_remove,
-	.pre_reset  = whcrc_pre_reset,
-	.post_reset = whcrc_post_reset,
-};
-
-static int __init whcrc_driver_init(void)
-{
-	return umc_driver_register(&whcrc_driver);
-}
-module_init(whcrc_driver_init);
-
-static void __exit whcrc_driver_exit(void)
-{
-	umc_driver_unregister(&whcrc_driver);
-}
-module_exit(whcrc_driver_exit);
-
-MODULE_AUTHOR("Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>");
-MODULE_DESCRIPTION("Wireless Host Controller Radio Control Driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/uwb/whci.c b/drivers/uwb/whci.c
deleted file mode 100644
index be8a8b8e857b..000000000000
--- a/drivers/uwb/whci.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * WHCI UWB Multi-interface Controller enumerator.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- */
-#include <linux/delay.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/slab.h>
-#include <linux/uwb/whci.h>
-#include <linux/uwb/umc.h>
-
-struct whci_card {
-	struct pci_dev *pci;
-	void __iomem *uwbbase;
-	u8 n_caps;
-	struct umc_dev *devs[0];
-};
-
-
-/* Fix faulty HW :( */
-static
-u64 whci_capdata_quirks(struct whci_card *card, u64 capdata)
-{
-	u64 capdata_orig = capdata;
-	struct pci_dev *pci_dev = card->pci;
-	if (pci_dev->vendor == PCI_VENDOR_ID_INTEL
-	    && (pci_dev->device == 0x0c3b || pci_dev->device == 0004)
-	    && pci_dev->class == 0x0d1010) {
-		switch (UWBCAPDATA_TO_CAP_ID(capdata)) {
-			/* WLP capability has 0x100 bytes of aperture */
-		case 0x80:
-			capdata |= 0x40 << 8; break;
-			/* WUSB capability has 0x80 bytes of aperture
-			 * and ID is 1 */
-		case 0x02:
-			capdata &= ~0xffff;
-			capdata |= 0x2001;
-			break;
-		}
-	}
-	if (capdata_orig != capdata)
-		dev_warn(&pci_dev->dev,
-			 "PCI v%04x d%04x c%06x#%02x: "
-			 "corrected capdata from %016Lx to %016Lx\n",
-			 pci_dev->vendor, pci_dev->device, pci_dev->class,
-			 (unsigned)UWBCAPDATA_TO_CAP_ID(capdata),
-			 (unsigned long long)capdata_orig,
-			 (unsigned long long)capdata);
-	return capdata;
-}
-
-
-/**
- * whci_wait_for - wait for a WHCI register to be set
- *
- * Polls (for at most @max_ms ms) until '*@reg & @mask == @result'.
- */
-int whci_wait_for(struct device *dev, u32 __iomem *reg, u32 mask, u32 result,
-	unsigned long max_ms, const char *tag)
-{
-	unsigned t = 0;
-	u32 val;
-	for (;;) {
-		val = le_readl(reg);
-		if ((val & mask) == result)
-			break;
-		if (t >= max_ms) {
-			dev_err(dev, "%s timed out\n", tag);
-			return -ETIMEDOUT;
-		}
-		msleep(10);
-		t += 10;
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(whci_wait_for);
-
-
-/*
- * NOTE: the capinfo and capdata registers are slightly different
- *       (size and cap-id fields). So for cap #0, we need to fill
- *       in. Size comes from the size of the register block
- *       (statically calculated); cap_id comes from nowhere, we use
- *       zero, that is reserved, for the radio controller, because
- *       none was defined at the spec level.
- */
-static int whci_add_cap(struct whci_card *card, int n)
-{
-	struct umc_dev *umc;
-	u64 capdata;
-	int bar, err;
-
-	umc = umc_device_create(&card->pci->dev, n);
-	if (umc == NULL)
-		return -ENOMEM;
-
-	capdata = le_readq(card->uwbbase + UWBCAPDATA(n));
-
-	bar = UWBCAPDATA_TO_BAR(capdata) << 1;
-
-	capdata = whci_capdata_quirks(card, capdata);
-	/* Capability 0 is the radio controller. It's size is 32
-	 * bytes (WHCI0.95[2.3, T2-9]). */
-	umc->version         = UWBCAPDATA_TO_VERSION(capdata);
-	umc->cap_id          = n == 0 ? 0 : UWBCAPDATA_TO_CAP_ID(capdata);
-	umc->bar	     = bar;
-	umc->resource.start  = pci_resource_start(card->pci, bar)
-		+ UWBCAPDATA_TO_OFFSET(capdata);
-	umc->resource.end    = umc->resource.start
-		+ (n == 0 ? 0x20 : UWBCAPDATA_TO_SIZE(capdata)) - 1;
-	umc->resource.name   = dev_name(&umc->dev);
-	umc->resource.flags  = card->pci->resource[bar].flags;
-	umc->resource.parent = &card->pci->resource[bar];
-	umc->irq             = card->pci->irq;
-
-	err = umc_device_register(umc);
-	if (err < 0)
-		goto error;
-	card->devs[n] = umc;
-	return 0;
-
-error:
-	kfree(umc);
-	return err;
-}
-
-static void whci_del_cap(struct whci_card *card, int n)
-{
-	struct umc_dev *umc = card->devs[n];
-
-	umc_device_unregister(umc);
-}
-
-static int whci_n_caps(struct pci_dev *pci)
-{
-	void __iomem *uwbbase;
-	u64 capinfo;
-
-	uwbbase = pci_iomap(pci, 0, 8);
-	if (!uwbbase)
-		return -ENOMEM;
-	capinfo = le_readq(uwbbase + UWBCAPINFO);
-	pci_iounmap(pci, uwbbase);
-
-	return UWBCAPINFO_TO_N_CAPS(capinfo);
-}
-
-static int whci_probe(struct pci_dev *pci, const struct pci_device_id *id)
-{
-	struct whci_card *card;
-	int err, n_caps, n;
-
-	err = pci_enable_device(pci);
-	if (err < 0)
-		goto error;
-	pci_enable_msi(pci);
-	pci_set_master(pci);
-	err = -ENXIO;
-	if (!pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
-		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
-	else if (!pci_set_dma_mask(pci, DMA_BIT_MASK(32)))
-		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
-	else
-		goto error_dma;
-
-	err = n_caps = whci_n_caps(pci);
-	if (n_caps < 0)
-		goto error_ncaps;
-
-	err = -ENOMEM;
-	card = kzalloc(sizeof(struct whci_card)
-		       + sizeof(struct umc_dev *) * (n_caps + 1),
-		       GFP_KERNEL);
-	if (card == NULL)
-		goto error_kzalloc;
-	card->pci = pci;
-	card->n_caps = n_caps;
-
-	err = -EBUSY;
-	if (!request_mem_region(pci_resource_start(pci, 0),
-				UWBCAPDATA_SIZE(card->n_caps),
-				"whci (capability data)"))
-		goto error_request_memregion;
-	err = -ENOMEM;
-	card->uwbbase = pci_iomap(pci, 0, UWBCAPDATA_SIZE(card->n_caps));
-	if (!card->uwbbase)
-		goto error_iomap;
-
-	/* Add each capability. */
-	for (n = 0; n <= card->n_caps; n++) {
-		err = whci_add_cap(card, n);
-		if (err < 0 && n == 0) {
-			dev_err(&pci->dev, "cannot bind UWB radio controller:"
-				" %d\n", err);
-			goto error_bind;
-		}
-		if (err < 0)
-			dev_warn(&pci->dev, "warning: cannot bind capability "
-				 "#%u: %d\n", n, err);
-	}
-	pci_set_drvdata(pci, card);
-	return 0;
-
-error_bind:
-	pci_iounmap(pci, card->uwbbase);
-error_iomap:
-	release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps));
-error_request_memregion:
-	kfree(card);
-error_kzalloc:
-error_ncaps:
-error_dma:
-	pci_disable_msi(pci);
-	pci_disable_device(pci);
-error:
-	return err;
-}
-
-static void whci_remove(struct pci_dev *pci)
-{
-	struct whci_card *card = pci_get_drvdata(pci);
-	int n;
-
-	pci_set_drvdata(pci, NULL);
-	/* Unregister each capability in reverse (so the master device
-	 * is unregistered last). */
-	for (n = card->n_caps; n >= 0 ; n--)
-		whci_del_cap(card, n);
-	pci_iounmap(pci, card->uwbbase);
-	release_mem_region(pci_resource_start(pci, 0), UWBCAPDATA_SIZE(card->n_caps));
-	kfree(card);
-	pci_disable_msi(pci);
-	pci_disable_device(pci);
-}
-
-static struct pci_device_id whci_id_table[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
-	{ 0 },
-};
-MODULE_DEVICE_TABLE(pci, whci_id_table);
-
-
-static struct pci_driver whci_driver = {
-	.name     = "whci",
-	.id_table = whci_id_table,
-	.probe    = whci_probe,
-	.remove   = whci_remove,
-};
-
-module_pci_driver(whci_driver);
-MODULE_DESCRIPTION("WHCI UWB Multi-interface Controller enumerator");
-MODULE_AUTHOR("Cambridge Silicon Radio Ltd.");
-MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h
deleted file mode 100644
index d7f3cb9b9db5..000000000000
--- a/include/linux/usb/association.h
+++ /dev/null
@@ -1,151 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB - Cable Based Association
- *
- * Copyright (C) 2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- */
-#ifndef __LINUX_USB_ASSOCIATION_H
-#define __LINUX_USB_ASSOCIATION_H
-
-
-/*
- * Association attributes
- *
- * Association Models Supplement to WUSB 1.0 T[3-1]
- *
- * Each field in the structures has it's ID, it's length and then the
- * value. This is the actual definition of the field's ID and its
- * length.
- */
-struct wusb_am_attr {
-	__u8 id;
-	__u8 len;
-};
-
-/* Different fields defined by the spec */
-#define WUSB_AR_AssociationTypeId	{ .id = cpu_to_le16(0x0000), .len = cpu_to_le16(2) }
-#define WUSB_AR_AssociationSubTypeId	{ .id = cpu_to_le16(0x0001), .len = cpu_to_le16(2) }
-#define WUSB_AR_Length			{ .id = cpu_to_le16(0x0002), .len = cpu_to_le16(4) }
-#define WUSB_AR_AssociationStatus	{ .id = cpu_to_le16(0x0004), .len = cpu_to_le16(4) }
-#define WUSB_AR_LangID			{ .id = cpu_to_le16(0x0008), .len = cpu_to_le16(2) }
-#define WUSB_AR_DeviceFriendlyName	{ .id = cpu_to_le16(0x000b), .len = cpu_to_le16(64) } /* max */
-#define WUSB_AR_HostFriendlyName	{ .id = cpu_to_le16(0x000c), .len = cpu_to_le16(64) } /* max */
-#define WUSB_AR_CHID			{ .id = cpu_to_le16(0x1000), .len = cpu_to_le16(16) }
-#define WUSB_AR_CDID			{ .id = cpu_to_le16(0x1001), .len = cpu_to_le16(16) }
-#define WUSB_AR_ConnectionContext	{ .id = cpu_to_le16(0x1002), .len = cpu_to_le16(48) }
-#define WUSB_AR_BandGroups		{ .id = cpu_to_le16(0x1004), .len = cpu_to_le16(2) }
-
-/* CBAF Control Requests (AMS1.0[T4-1] */
-enum {
-	CBAF_REQ_GET_ASSOCIATION_INFORMATION = 0x01,
-	CBAF_REQ_GET_ASSOCIATION_REQUEST,
-	CBAF_REQ_SET_ASSOCIATION_RESPONSE
-};
-
-/*
- * CBAF USB-interface defitions
- *
- * No altsettings, one optional interrupt endpoint.
- */
-enum {
-	CBAF_IFACECLASS    = 0xef,
-	CBAF_IFACESUBCLASS = 0x03,
-	CBAF_IFACEPROTOCOL = 0x01,
-};
-
-/* Association Information (AMS1.0[T4-3]) */
-struct wusb_cbaf_assoc_info {
-	__le16 Length;
-	__u8 NumAssociationRequests;
-	__le16 Flags;
-	__u8 AssociationRequestsArray[];
-} __attribute__((packed));
-
-/* Association Request (AMS1.0[T4-4]) */
-struct wusb_cbaf_assoc_request {
-	__u8 AssociationDataIndex;
-	__u8 Reserved;
-	__le16 AssociationTypeId;
-	__le16 AssociationSubTypeId;
-	__le32 AssociationTypeInfoSize;
-} __attribute__((packed));
-
-enum {
-	AR_TYPE_WUSB                    = 0x0001,
-	AR_TYPE_WUSB_RETRIEVE_HOST_INFO = 0x0000,
-	AR_TYPE_WUSB_ASSOCIATE          = 0x0001,
-};
-
-/* Association Attribute header (AMS1.0[3.8]) */
-struct wusb_cbaf_attr_hdr {
-	__le16 id;
-	__le16 len;
-} __attribute__((packed));
-
-/* Host Info (AMS1.0[T4-7]) (yeah, more headers and fields...) */
-struct wusb_cbaf_host_info {
-	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
-	__le16 AssociationTypeId;
-	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
-	__le16 AssociationSubTypeId;
-	struct wusb_cbaf_attr_hdr CHID_hdr;
-	struct wusb_ckhdid CHID;
-	struct wusb_cbaf_attr_hdr LangID_hdr;
-	__le16 LangID;
-	struct wusb_cbaf_attr_hdr HostFriendlyName_hdr;
-	__u8 HostFriendlyName[];
-} __attribute__((packed));
-
-/* Device Info (AMS1.0[T4-8])
- *
- * I still don't get this tag'n'header stuff for each goddamn
- * field...
- */
-struct wusb_cbaf_device_info {
-	struct wusb_cbaf_attr_hdr Length_hdr;
-	__le32 Length;
-	struct wusb_cbaf_attr_hdr CDID_hdr;
-	struct wusb_ckhdid CDID;
-	struct wusb_cbaf_attr_hdr BandGroups_hdr;
-	__le16 BandGroups;
-	struct wusb_cbaf_attr_hdr LangID_hdr;
-	__le16 LangID;
-	struct wusb_cbaf_attr_hdr DeviceFriendlyName_hdr;
-	__u8 DeviceFriendlyName[];
-} __attribute__((packed));
-
-/* Connection Context; CC_DATA - Success case (AMS1.0[T4-9]) */
-struct wusb_cbaf_cc_data {
-	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
-	__le16 AssociationTypeId;
-	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
-	__le16 AssociationSubTypeId;
-	struct wusb_cbaf_attr_hdr Length_hdr;
-	__le32 Length;
-	struct wusb_cbaf_attr_hdr ConnectionContext_hdr;
-	struct wusb_ckhdid CHID;
-	struct wusb_ckhdid CDID;
-	struct wusb_ckhdid CK;
-	struct wusb_cbaf_attr_hdr BandGroups_hdr;
-	__le16 BandGroups;
-} __attribute__((packed));
-
-/* CC_DATA - Failure case (AMS1.0[T4-10]) */
-struct wusb_cbaf_cc_data_fail {
-	struct wusb_cbaf_attr_hdr AssociationTypeId_hdr;
-	__le16 AssociationTypeId;
-	struct wusb_cbaf_attr_hdr AssociationSubTypeId_hdr;
-	__le16 AssociationSubTypeId;
-	struct wusb_cbaf_attr_hdr Length_hdr;
-	__le16 Length;
-	struct wusb_cbaf_attr_hdr AssociationStatus_hdr;
-	__u32 AssociationStatus;
-} __attribute__((packed));
-
-#endif	/* __LINUX_USB_ASSOCIATION_H */
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
deleted file mode 100644
index 64a840b5106e..000000000000
--- a/include/linux/usb/wusb-wa.h
+++ /dev/null
@@ -1,304 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Wire Adapter constants and structures.
- *
- * Copyright (C) 2005-2006 Intel Corporation.
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- * FIXME: organize properly, group logically
- *
- * All the event structures are defined in uwb/spec.h, as they are
- * common to the WHCI and WUSB radio control interfaces.
- *
- * References:
- *   [WUSB] Wireless Universal Serial Bus Specification, revision 1.0, ch8
- */
-#ifndef __LINUX_USB_WUSB_WA_H
-#define __LINUX_USB_WUSB_WA_H
-
-/**
- * Radio Command Request for the Radio Control Interface
- *
- * Radio Control Interface command and event codes are the same as
- * WHCI, and listed in include/linux/uwb.h:UWB_RC_{CMD,EVT}_*
- */
-enum {
-	WA_EXEC_RC_CMD = 40,	/* Radio Control command Request */
-};
-
-/* Wireless Adapter Requests ([WUSB] table 8-51) */
-enum {
-	WUSB_REQ_ADD_MMC_IE     = 20,
-	WUSB_REQ_REMOVE_MMC_IE  = 21,
-	WUSB_REQ_SET_NUM_DNTS   = 22,
-	WUSB_REQ_SET_CLUSTER_ID = 23,
-	WUSB_REQ_SET_DEV_INFO   = 24,
-	WUSB_REQ_GET_TIME       = 25,
-	WUSB_REQ_SET_STREAM_IDX = 26,
-	WUSB_REQ_SET_WUSB_MAS   = 27,
-	WUSB_REQ_CHAN_STOP      = 28,
-};
-
-
-/* Wireless Adapter WUSB Channel Time types ([WUSB] table 8-52) */
-enum {
-	WUSB_TIME_ADJ   = 0,
-	WUSB_TIME_BPST  = 1,
-	WUSB_TIME_WUSB  = 2,
-};
-
-enum {
-	WA_ENABLE = 0x01,
-	WA_RESET = 0x02,
-	RPIPE_PAUSE = 0x1,
-	RPIPE_STALL = 0x2,
-};
-
-/* Responses from Get Status request ([WUSB] section 8.3.1.6) */
-enum {
-	WA_STATUS_ENABLED = 0x01,
-	WA_STATUS_RESETTING = 0x02
-};
-
-enum rpipe_crs {
-	RPIPE_CRS_CTL = 0x01,
-	RPIPE_CRS_ISO = 0x02,
-	RPIPE_CRS_BULK = 0x04,
-	RPIPE_CRS_INTR = 0x08
-};
-
-/**
- * RPipe descriptor ([WUSB] section 8.5.2.11)
- *
- * FIXME: explain rpipes
- */
-struct usb_rpipe_descriptor {
-	u8	bLength;
-	u8	bDescriptorType;
-	__le16  wRPipeIndex;
-	__le16	wRequests;
-	__le16	wBlocks;		/* rw if 0 */
-	__le16	wMaxPacketSize;		/* rw */
-	union {
-		u8	dwa_bHSHubAddress;		/* rw: DWA. */
-		u8	hwa_bMaxBurst;			/* rw: HWA. */
-	};
-	union {
-		u8	dwa_bHSHubPort;		/*  rw: DWA. */
-		u8	hwa_bDeviceInfoIndex;	/*  rw: HWA. */
-	};
-	u8	bSpeed;			/* rw: xfer rate 'enum uwb_phy_rate' */
-	union {
-		u8 dwa_bDeviceAddress;	/* rw: DWA Target device address. */
-		u8 hwa_reserved;		/* rw: HWA. */
-	};
-	u8	bEndpointAddress;	/* rw: Target EP address */
-	u8	bDataSequence;		/* ro: Current Data sequence */
-	__le32	dwCurrentWindow;	/* ro */
-	u8	bMaxDataSequence;	/* ro?: max supported seq */
-	u8	bInterval;		/* rw:  */
-	u8	bOverTheAirInterval;	/* rw:  */
-	u8	bmAttribute;		/* ro?  */
-	u8	bmCharacteristics;	/* ro? enum rpipe_attr, supported xsactions */
-	u8	bmRetryOptions;		/* rw? */
-	__le16	wNumTransactionErrors;	/* rw */
-} __attribute__ ((packed));
-
-/**
- * Wire Adapter Notification types ([WUSB] sections 8.4.5 & 8.5.4)
- *
- * These are the notifications coming on the notification endpoint of
- * an HWA and a DWA.
- */
-enum wa_notif_type {
-	DWA_NOTIF_RWAKE = 0x91,
-	DWA_NOTIF_PORTSTATUS = 0x92,
-	WA_NOTIF_TRANSFER = 0x93,
-	HWA_NOTIF_BPST_ADJ = 0x94,
-	HWA_NOTIF_DN = 0x95,
-};
-
-/**
- * Wire Adapter notification header
- *
- * Notifications coming from a wire adapter use a common header
- * defined in [WUSB] sections 8.4.5 & 8.5.4.
- */
-struct wa_notif_hdr {
-	u8 bLength;
-	u8 bNotifyType;			/* enum wa_notif_type */
-} __packed;
-
-/**
- * HWA DN Received notification [(WUSB] section 8.5.4.2)
- *
- * The DNData is specified in WUSB1.0[7.6]. For each device
- * notification we received, we just need to dispatch it.
- *
- * @dndata:  this is really an array of notifications, but all start
- *           with the same header.
- */
-struct hwa_notif_dn {
-	struct wa_notif_hdr hdr;
-	u8 bSourceDeviceAddr;		/* from errata 2005/07 */
-	u8 bmAttributes;
-	struct wusb_dn_hdr dndata[];
-} __packed;
-
-/* [WUSB] section 8.3.3 */
-enum wa_xfer_type {
-	WA_XFER_TYPE_CTL = 0x80,
-	WA_XFER_TYPE_BI = 0x81,		/* bulk/interrupt */
-	WA_XFER_TYPE_ISO = 0x82,
-	WA_XFER_RESULT = 0x83,
-	WA_XFER_ABORT = 0x84,
-	WA_XFER_ISO_PACKET_INFO = 0xA0,
-	WA_XFER_ISO_PACKET_STATUS = 0xA1,
-};
-
-/* [WUSB] section 8.3.3 */
-struct wa_xfer_hdr {
-	u8 bLength;			/* 0x18 */
-	u8 bRequestType;		/* 0x80 WA_REQUEST_TYPE_CTL */
-	__le16 wRPipe;			/* RPipe index */
-	__le32 dwTransferID;		/* Host-assigned ID */
-	__le32 dwTransferLength;	/* Length of data to xfer */
-	u8 bTransferSegment;
-} __packed;
-
-struct wa_xfer_ctl {
-	struct wa_xfer_hdr hdr;
-	u8 bmAttribute;
-	__le16 wReserved;
-	struct usb_ctrlrequest baSetupData;
-} __packed;
-
-struct wa_xfer_bi {
-	struct wa_xfer_hdr hdr;
-	u8 bReserved;
-	__le16 wReserved;
-} __packed;
-
-/* [WUSB] section 8.5.5 */
-struct wa_xfer_hwaiso {
-	struct wa_xfer_hdr hdr;
-	u8 bReserved;
-	__le16 wPresentationTime;
-	__le32 dwNumOfPackets;
-} __packed;
-
-struct wa_xfer_packet_info_hwaiso {
-	__le16 wLength;
-	u8 bPacketType;
-	u8 bReserved;
-	__le16 PacketLength[0];
-} __packed;
-
-struct wa_xfer_packet_status_len_hwaiso {
-	__le16 PacketLength;
-	__le16 PacketStatus;
-} __packed;
-
-struct wa_xfer_packet_status_hwaiso {
-	__le16 wLength;
-	u8 bPacketType;
-	u8 bReserved;
-	struct wa_xfer_packet_status_len_hwaiso PacketStatus[0];
-} __packed;
-
-/* [WUSB] section 8.3.3.5 */
-struct wa_xfer_abort {
-	u8 bLength;
-	u8 bRequestType;
-	__le16 wRPipe;			/* RPipe index */
-	__le32 dwTransferID;		/* Host-assigned ID */
-} __packed;
-
-/**
- * WA Transfer Complete notification ([WUSB] section 8.3.3.3)
- *
- */
-struct wa_notif_xfer {
-	struct wa_notif_hdr hdr;
-	u8 bEndpoint;
-	u8 Reserved;
-} __packed;
-
-/** Transfer result basic codes [WUSB] table 8-15 */
-enum {
-	WA_XFER_STATUS_SUCCESS,
-	WA_XFER_STATUS_HALTED,
-	WA_XFER_STATUS_DATA_BUFFER_ERROR,
-	WA_XFER_STATUS_BABBLE,
-	WA_XFER_RESERVED,
-	WA_XFER_STATUS_NOT_FOUND,
-	WA_XFER_STATUS_INSUFFICIENT_RESOURCE,
-	WA_XFER_STATUS_TRANSACTION_ERROR,
-	WA_XFER_STATUS_ABORTED,
-	WA_XFER_STATUS_RPIPE_NOT_READY,
-	WA_XFER_INVALID_FORMAT,
-	WA_XFER_UNEXPECTED_SEGMENT_NUMBER,
-	WA_XFER_STATUS_RPIPE_TYPE_MISMATCH,
-};
-
-/** [WUSB] section 8.3.3.4 */
-struct wa_xfer_result {
-	struct wa_notif_hdr hdr;
-	__le32 dwTransferID;
-	__le32 dwTransferLength;
-	u8     bTransferSegment;
-	u8     bTransferStatus;
-	__le32 dwNumOfPackets;
-} __packed;
-
-/**
- * Wire Adapter Class Descriptor ([WUSB] section 8.5.2.7).
- *
- * NOTE: u16 fields are read Little Endian from the hardware.
- *
- * @bNumPorts is the original max number of devices that the host can
- *            connect; we might chop this so the stack can handle
- *            it. In case you need to access it, use wusbhc->ports_max
- *            if it is a Wireless USB WA.
- */
-struct usb_wa_descriptor {
-	u8	bLength;
-	u8	bDescriptorType;
-	__le16	bcdWAVersion;
-	u8	bNumPorts;		/* don't use!! */
-	u8	bmAttributes;		/* Reserved == 0 */
-	__le16	wNumRPipes;
-	__le16	wRPipeMaxBlock;
-	u8	bRPipeBlockSize;
-	u8	bPwrOn2PwrGood;
-	u8	bNumMMCIEs;
-	u8	DeviceRemovable;	/* FIXME: in DWA this is up to 16 bytes */
-} __packed;
-
-/**
- * HWA Device Information Buffer (WUSB1.0[T8.54])
- */
-struct hwa_dev_info {
-	u8	bmDeviceAvailability[32];       /* FIXME: ignored for now */
-	u8	bDeviceAddress;
-	__le16	wPHYRates;
-	u8	bmDeviceAttribute;
-} __packed;
-
-#endif /* #ifndef __LINUX_USB_WUSB_WA_H */
diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h
deleted file mode 100644
index 65adee629106..000000000000
--- a/include/linux/usb/wusb.h
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Wireless USB Standard Definitions
- * Event Size Tables
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- *
- *
- * FIXME: docs
- * FIXME: organize properly, group logically
- *
- * All the event structures are defined in uwb/spec.h, as they are
- * common to the WHCI and WUSB radio control interfaces.
- */
-
-#ifndef __WUSB_H__
-#define __WUSB_H__
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/uwb/spec.h>
-#include <linux/usb/ch9.h>
-#include <linux/param.h>
-
-/**
- * WUSB Information Element header
- *
- * I don't know why, they decided to make it different to the MBOA MAC
- * IE Header; beats me.
- */
-struct wuie_hdr {
-	u8 bLength;
-	u8 bIEIdentifier;
-} __attribute__((packed));
-
-enum {
-	WUIE_ID_WCTA = 0x80,
-	WUIE_ID_CONNECTACK,
-	WUIE_ID_HOST_INFO,
-	WUIE_ID_CHANGE_ANNOUNCE,
-	WUIE_ID_DEVICE_DISCONNECT,
-	WUIE_ID_HOST_DISCONNECT,
-	WUIE_ID_KEEP_ALIVE = 0x89,
-	WUIE_ID_ISOCH_DISCARD,
-	WUIE_ID_RESET_DEVICE,
-};
-
-/**
- * Maximum number of array elements in a WUSB IE.
- *
- * WUSB1.0[7.5 before table 7-38] says that in WUSB IEs that
- * are "arrays" have to limited to 4 elements. So we define it
- * like that to ease up and submit only the neeed size.
- */
-#define WUIE_ELT_MAX 4
-
-/**
- * Wrapper for the data that defines a CHID, a CDID or a CK
- *
- * WUSB defines that CHIDs, CDIDs and CKs are a 16 byte string of
- * data. In order to avoid confusion and enforce types, we wrap it.
- *
- * Make it packed, as we use it in some hw definitions.
- */
-struct wusb_ckhdid {
-	u8 data[16];
-} __attribute__((packed));
-
-static const struct wusb_ckhdid wusb_ckhdid_zero = { .data = { 0 } };
-
-#define WUSB_CKHDID_STRSIZE (3 * sizeof(struct wusb_ckhdid) + 1)
-
-/**
- * WUSB IE: Host Information (WUSB1.0[7.5.2])
- *
- * Used to provide information about the host to the Wireless USB
- * devices in range (CHID can be used as an ASCII string).
- */
-struct wuie_host_info {
-	struct wuie_hdr hdr;
-	__le16 attributes;
-	struct wusb_ckhdid CHID;
-} __attribute__((packed));
-
-/**
- * WUSB IE: Connect Ack (WUSB1.0[7.5.1])
- *
- * Used to acknowledge device connect requests. See note for
- * WUIE_ELT_MAX.
- */
-struct wuie_connect_ack {
-	struct wuie_hdr hdr;
-	struct {
-		struct wusb_ckhdid CDID;
-		u8 bDeviceAddress;	/* 0 means unused */
-		u8 bReserved;
-	} blk[WUIE_ELT_MAX];
-} __attribute__((packed));
-
-/**
- * WUSB IE Host Information Element, Connect Availability
- *
- * WUSB1.0[7.5.2], bmAttributes description
- */
-enum {
-	WUIE_HI_CAP_RECONNECT = 0,
-	WUIE_HI_CAP_LIMITED,
-	WUIE_HI_CAP_RESERVED,
-	WUIE_HI_CAP_ALL,
-};
-
-/**
- * WUSB IE: Channel Stop (WUSB1.0[7.5.8])
- *
- * Tells devices the host is going to stop sending MMCs and will disappear.
- */
-struct wuie_channel_stop {
-	struct wuie_hdr hdr;
-	u8 attributes;
-	u8 timestamp[3];
-} __attribute__((packed));
-
-/**
- * WUSB IE: Keepalive (WUSB1.0[7.5.9])
- *
- * Ask device(s) to send keepalives.
- */
-struct wuie_keep_alive {
-	struct wuie_hdr hdr;
-	u8 bDeviceAddress[WUIE_ELT_MAX];
-} __attribute__((packed));
-
-/**
- * WUSB IE: Reset device (WUSB1.0[7.5.11])
- *
- * Tell device to reset; in all truth, we can fit 4 CDIDs, but we only
- * use it for one at the time...
- *
- * In any case, this request is a wee bit silly: why don't they target
- * by address??
- */
-struct wuie_reset {
-	struct wuie_hdr hdr;
-	struct wusb_ckhdid CDID;
-} __attribute__((packed));
-
-/**
- * WUSB IE: Disconnect device (WUSB1.0[7.5.11])
- *
- * Tell device to disconnect; we can fit 4 addresses, but we only use
- * it for one at the time...
- */
-struct wuie_disconnect {
-	struct wuie_hdr hdr;
-	u8 bDeviceAddress;
-	u8 padding;
-} __attribute__((packed));
-
-/**
- * WUSB IE: Host disconnect ([WUSB] section 7.5.5)
- *
- * Tells all connected devices to disconnect.
- */
-struct wuie_host_disconnect {
-	struct wuie_hdr hdr;
-} __attribute__((packed));
-
-/**
- * WUSB Device Notification header (WUSB1.0[7.6])
- */
-struct wusb_dn_hdr {
-	u8 bType;
-	u8 notifdata[];
-} __attribute__((packed));
-
-/** Device Notification codes (WUSB1.0[Table 7-54]) */
-enum WUSB_DN {
-	WUSB_DN_CONNECT = 0x01,
-	WUSB_DN_DISCONNECT = 0x02,
-	WUSB_DN_EPRDY = 0x03,
-	WUSB_DN_MASAVAILCHANGED = 0x04,
-	WUSB_DN_RWAKE = 0x05,
-	WUSB_DN_SLEEP = 0x06,
-	WUSB_DN_ALIVE = 0x07,
-};
-
-/** WUSB Device Notification Connect */
-struct wusb_dn_connect {
-	struct wusb_dn_hdr hdr;
-	__le16 attributes;
-	struct wusb_ckhdid CDID;
-} __attribute__((packed));
-
-static inline int wusb_dn_connect_prev_dev_addr(const struct wusb_dn_connect *dn)
-{
-	return le16_to_cpu(dn->attributes) & 0xff;
-}
-
-static inline int wusb_dn_connect_new_connection(const struct wusb_dn_connect *dn)
-{
-	return (le16_to_cpu(dn->attributes) >> 8) & 0x1;
-}
-
-static inline int wusb_dn_connect_beacon_behavior(const struct wusb_dn_connect *dn)
-{
-	return (le16_to_cpu(dn->attributes) >> 9) & 0x03;
-}
-
-/** Device is alive (aka: pong) (WUSB1.0[7.6.7]) */
-struct wusb_dn_alive {
-	struct wusb_dn_hdr hdr;
-} __attribute__((packed));
-
-/** Device is disconnecting (WUSB1.0[7.6.2]) */
-struct wusb_dn_disconnect {
-	struct wusb_dn_hdr hdr;
-} __attribute__((packed));
-
-/* General constants */
-enum {
-	WUSB_TRUST_TIMEOUT_MS = 4000,	/* [WUSB] section 4.15.1 */
-};
-
-/*
- * WUSB Crypto stuff (WUSB1.0[6])
- */
-
-extern const char *wusb_et_name(u8);
-
-/**
- * WUSB key index WUSB1.0[7.3.2.4], for usage when setting keys for
- * the host or the device.
- */
-static inline u8 wusb_key_index(int index, int type, int originator)
-{
-	return (originator << 6) | (type << 4) | index;
-}
-
-#define WUSB_KEY_INDEX_TYPE_PTK			0 /* for HWA only */
-#define WUSB_KEY_INDEX_TYPE_ASSOC		1
-#define WUSB_KEY_INDEX_TYPE_GTK			2
-#define WUSB_KEY_INDEX_ORIGINATOR_HOST		0
-#define WUSB_KEY_INDEX_ORIGINATOR_DEVICE	1
-/* bits 0-3 used for the key index. */
-#define WUSB_KEY_INDEX_MAX			15
-
-/* A CCM Nonce, defined in WUSB1.0[6.4.1] */
-struct aes_ccm_nonce {
-	u8 sfn[6];              /* Little Endian */
-	u8 tkid[3];             /* LE */
-	struct uwb_dev_addr dest_addr;
-	struct uwb_dev_addr src_addr;
-} __attribute__((packed));
-
-/* A CCM operation label, defined on WUSB1.0[6.5.x] */
-struct aes_ccm_label {
-	u8 data[14];
-} __attribute__((packed));
-
-/*
- * Input to the key derivation sequence defined in
- * WUSB1.0[6.5.1]. Rest of the data is in the CCM Nonce passed to the
- * PRF function.
- */
-struct wusb_keydvt_in {
-	u8 hnonce[16];
-	u8 dnonce[16];
-} __attribute__((packed));
-
-/*
- * Output from the key derivation sequence defined in
- * WUSB1.0[6.5.1].
- */
-struct wusb_keydvt_out {
-	u8 kck[16];
-	u8 ptk[16];
-} __attribute__((packed));
-
-/* Pseudo Random Function WUSB1.0[6.5] */
-extern int wusb_crypto_init(void);
-extern void wusb_crypto_exit(void);
-extern ssize_t wusb_prf(void *out, size_t out_size,
-			const u8 key[16], const struct aes_ccm_nonce *_n,
-			const struct aes_ccm_label *a,
-			const void *b, size_t blen, size_t len);
-
-static inline int wusb_prf_64(void *out, size_t out_size, const u8 key[16],
-			      const struct aes_ccm_nonce *n,
-			      const struct aes_ccm_label *a,
-			      const void *b, size_t blen)
-{
-	return wusb_prf(out, out_size, key, n, a, b, blen, 64);
-}
-
-static inline int wusb_prf_128(void *out, size_t out_size, const u8 key[16],
-			       const struct aes_ccm_nonce *n,
-			       const struct aes_ccm_label *a,
-			       const void *b, size_t blen)
-{
-	return wusb_prf(out, out_size, key, n, a, b, blen, 128);
-}
-
-static inline int wusb_prf_256(void *out, size_t out_size, const u8 key[16],
-			       const struct aes_ccm_nonce *n,
-			       const struct aes_ccm_label *a,
-			       const void *b, size_t blen)
-{
-	return wusb_prf(out, out_size, key, n, a, b, blen, 256);
-}
-
-/* Key derivation WUSB1.0[6.5.1] */
-static inline int wusb_key_derive(struct wusb_keydvt_out *keydvt_out,
-				  const u8 key[16],
-				  const struct aes_ccm_nonce *n,
-				  const struct wusb_keydvt_in *keydvt_in)
-{
-	const struct aes_ccm_label a = { .data = "Pair-wise keys" };
-	return wusb_prf_256(keydvt_out, sizeof(*keydvt_out), key, n, &a,
-			    keydvt_in, sizeof(*keydvt_in));
-}
-
-/*
- * Out-of-band MIC Generation WUSB1.0[6.5.2]
- *
- * Compute the MIC over @key, @n and @hs and place it in @mic_out.
- *
- * @mic_out:  Where to place the 8 byte MIC tag
- * @key:      KCK from the derivation process
- * @n:        CCM nonce, n->sfn == 0, TKID as established in the
- *            process.
- * @hs:       Handshake struct for phase 2 of the 4-way.
- *            hs->bStatus and hs->bReserved are zero.
- *            hs->bMessageNumber is 2 (WUSB1.0[7.3.2.5.2]
- *            hs->dest_addr is the device's USB address padded with 0
- *            hs->src_addr is the hosts's UWB device address
- *            hs->mic is ignored (as we compute that value).
- */
-static inline int wusb_oob_mic(u8 mic_out[8], const u8 key[16],
-			       const struct aes_ccm_nonce *n,
-			       const struct usb_handshake *hs)
-{
-	const struct aes_ccm_label a = { .data = "out-of-bandMIC" };
-	return wusb_prf_64(mic_out, 8, key, n, &a,
-			   hs, sizeof(*hs) - sizeof(hs->MIC));
-}
-
-#endif /* #ifndef __WUSB_H__ */
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
deleted file mode 100644
index 6918a61e1ac1..000000000000
--- a/include/linux/uwb.h
+++ /dev/null
@@ -1,817 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Ultra Wide Band
- * UWB API
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * FIXME: doc: overview of the API, different parts and pointers
- */
-
-#ifndef __LINUX__UWB_H__
-#define __LINUX__UWB_H__
-
-#include <linux/limits.h>
-#include <linux/device.h>
-#include <linux/mutex.h>
-#include <linux/timer.h>
-#include <linux/wait.h>
-#include <linux/workqueue.h>
-#include <linux/uwb/spec.h>
-#include <asm/page.h>
-
-struct uwb_dev;
-struct uwb_beca_e;
-struct uwb_rc;
-struct uwb_rsv;
-struct uwb_dbg;
-
-/**
- * struct uwb_dev - a UWB Device
- * @rc: UWB Radio Controller that discovered the device (kind of its
- *     parent).
- * @bce: a beacon cache entry for this device; or NULL if the device
- *     is a local radio controller.
- * @mac_addr: the EUI-48 address of this device.
- * @dev_addr: the current DevAddr used by this device.
- * @beacon_slot: the slot number the beacon is using.
- * @streams: bitmap of streams allocated to reservations targeted at
- *     this device.  For an RC, this is the streams allocated for
- *     reservations targeted at DevAddrs.
- *
- * A UWB device may either by a neighbor or part of a local radio
- * controller.
- */
-struct uwb_dev {
-	struct mutex mutex;
-	struct list_head list_node;
-	struct device dev;
-	struct uwb_rc *rc;		/* radio controller */
-	struct uwb_beca_e *bce;		/* Beacon Cache Entry */
-
-	struct uwb_mac_addr mac_addr;
-	struct uwb_dev_addr dev_addr;
-	int beacon_slot;
-	DECLARE_BITMAP(streams, UWB_NUM_STREAMS);
-	DECLARE_BITMAP(last_availability_bm, UWB_NUM_MAS);
-};
-#define to_uwb_dev(d) container_of(d, struct uwb_dev, dev)
-
-/**
- * UWB HWA/WHCI Radio Control {Command|Event} Block context IDs
- *
- * RC[CE]Bs have a 'context ID' field that matches the command with
- * the event received to confirm it.
- *
- * Maximum number of context IDs
- */
-enum { UWB_RC_CTX_MAX = 256 };
-
-
-/** Notification chain head for UWB generated events to listeners */
-struct uwb_notifs_chain {
-	struct list_head list;
-	struct mutex mutex;
-};
-
-/* Beacon cache list */
-struct uwb_beca {
-	struct list_head list;
-	size_t entries;
-	struct mutex mutex;
-};
-
-/* Event handling thread. */
-struct uwbd {
-	int pid;
-	struct task_struct *task;
-	wait_queue_head_t wq;
-	struct list_head event_list;
-	spinlock_t event_list_lock;
-};
-
-/**
- * struct uwb_mas_bm - a bitmap of all MAS in a superframe
- * @bm: a bitmap of length #UWB_NUM_MAS
- */
-struct uwb_mas_bm {
-	DECLARE_BITMAP(bm, UWB_NUM_MAS);
-	DECLARE_BITMAP(unsafe_bm, UWB_NUM_MAS);
-	int safe;
-	int unsafe;
-};
-
-/**
- * uwb_rsv_state - UWB Reservation state.
- *
- * NONE - reservation is not active (no DRP IE being transmitted).
- *
- * Owner reservation states:
- *
- * INITIATED - owner has sent an initial DRP request.
- * PENDING - target responded with pending Reason Code.
- * MODIFIED - reservation manager is modifying an established
- * reservation with a different MAS allocation.
- * ESTABLISHED - the reservation has been successfully negotiated.
- *
- * Target reservation states:
- *
- * DENIED - request is denied.
- * ACCEPTED - request is accepted.
- * PENDING - PAL has yet to make a decision to whether to accept or
- * deny.
- *
- * FIXME: further target states TBD.
- */
-enum uwb_rsv_state {
-	UWB_RSV_STATE_NONE = 0,
-	UWB_RSV_STATE_O_INITIATED,
-	UWB_RSV_STATE_O_PENDING,
-	UWB_RSV_STATE_O_MODIFIED,
-	UWB_RSV_STATE_O_ESTABLISHED,
-	UWB_RSV_STATE_O_TO_BE_MOVED,
-	UWB_RSV_STATE_O_MOVE_EXPANDING,
-	UWB_RSV_STATE_O_MOVE_COMBINING,
-	UWB_RSV_STATE_O_MOVE_REDUCING,
-	UWB_RSV_STATE_T_ACCEPTED,
-	UWB_RSV_STATE_T_DENIED,
-	UWB_RSV_STATE_T_CONFLICT,
-	UWB_RSV_STATE_T_PENDING,
-	UWB_RSV_STATE_T_EXPANDING_ACCEPTED,
-	UWB_RSV_STATE_T_EXPANDING_CONFLICT,
-	UWB_RSV_STATE_T_EXPANDING_PENDING,
-	UWB_RSV_STATE_T_EXPANDING_DENIED,
-	UWB_RSV_STATE_T_RESIZED,
-
-	UWB_RSV_STATE_LAST,
-};
-
-enum uwb_rsv_target_type {
-	UWB_RSV_TARGET_DEV,
-	UWB_RSV_TARGET_DEVADDR,
-};
-
-/**
- * struct uwb_rsv_target - the target of a reservation.
- *
- * Reservations unicast and targeted at a single device
- * (UWB_RSV_TARGET_DEV); or (e.g., in the case of WUSB) targeted at a
- * specific (private) DevAddr (UWB_RSV_TARGET_DEVADDR).
- */
-struct uwb_rsv_target {
-	enum uwb_rsv_target_type type;
-	union {
-		struct uwb_dev *dev;
-		struct uwb_dev_addr devaddr;
-	};
-};
-
-struct uwb_rsv_move {
-	struct uwb_mas_bm final_mas;
-	struct uwb_ie_drp *companion_drp_ie;
-	struct uwb_mas_bm companion_mas;
-};
-
-/*
- * Number of streams reserved for reservations targeted at DevAddrs.
- */
-#define UWB_NUM_GLOBAL_STREAMS 1
-
-typedef void (*uwb_rsv_cb_f)(struct uwb_rsv *rsv);
-
-/**
- * struct uwb_rsv - a DRP reservation
- *
- * Data structure management:
- *
- * @rc:             the radio controller this reservation is for
- *                  (as target or owner)
- * @rc_node:        a list node for the RC
- * @pal_node:       a list node for the PAL
- *
- * Owner and target parameters:
- *
- * @owner:          the UWB device owning this reservation
- * @target:         the target UWB device
- * @type:           reservation type
- *
- * Owner parameters:
- *
- * @max_mas:        maxiumum number of MAS
- * @min_mas:        minimum number of MAS
- * @sparsity:       owner selected sparsity
- * @is_multicast:   true iff multicast
- *
- * @callback:       callback function when the reservation completes
- * @pal_priv:       private data for the PAL making the reservation
- *
- * Reservation status:
- *
- * @status:         negotiation status
- * @stream:         stream index allocated for this reservation
- * @tiebreaker:     conflict tiebreaker for this reservation
- * @mas:            reserved MAS
- * @drp_ie:         the DRP IE
- * @ie_valid:       true iff the DRP IE matches the reservation parameters
- *
- * DRP reservations are uniquely identified by the owner, target and
- * stream index.  However, when using a DevAddr as a target (e.g., for
- * a WUSB cluster reservation) the responses may be received from
- * devices with different DevAddrs.  In this case, reservations are
- * uniquely identified by just the stream index.  A number of stream
- * indexes (UWB_NUM_GLOBAL_STREAMS) are reserved for this.
- */
-struct uwb_rsv {
-	struct uwb_rc *rc;
-	struct list_head rc_node;
-	struct list_head pal_node;
-	struct kref kref;
-
-	struct uwb_dev *owner;
-	struct uwb_rsv_target target;
-	enum uwb_drp_type type;
-	int max_mas;
-	int min_mas;
-	int max_interval;
-	bool is_multicast;
-
-	uwb_rsv_cb_f callback;
-	void *pal_priv;
-
-	enum uwb_rsv_state state;
-	bool needs_release_companion_mas;
-	u8 stream;
-	u8 tiebreaker;
-	struct uwb_mas_bm mas;
-	struct uwb_ie_drp *drp_ie;
-	struct uwb_rsv_move mv;
-	bool ie_valid;
-	struct timer_list timer;
-	struct work_struct handle_timeout_work;
-};
-
-static const
-struct uwb_mas_bm uwb_mas_bm_zero = { .bm = { 0 } };
-
-static inline void uwb_mas_bm_copy_le(void *dst, const struct uwb_mas_bm *mas)
-{
-	bitmap_copy_le(dst, mas->bm, UWB_NUM_MAS);
-}
-
-/**
- * struct uwb_drp_avail - a radio controller's view of MAS usage
- * @global:   MAS unused by neighbors (excluding reservations targeted
- *            or owned by the local radio controller) or the beaon period
- * @local:    MAS unused by local established reservations
- * @pending:  MAS unused by local pending reservations
- * @ie:       DRP Availability IE to be included in the beacon
- * @ie_valid: true iff @ie is valid and does not need to regenerated from
- *            @global and @local
- *
- * Each radio controller maintains a view of MAS usage or
- * availability. MAS available for a new reservation are determined
- * from the intersection of @global, @local, and @pending.
- *
- * The radio controller must transmit a DRP Availability IE that's the
- * intersection of @global and @local.
- *
- * A set bit indicates the MAS is unused and available.
- *
- * rc->rsvs_mutex should be held before accessing this data structure.
- *
- * [ECMA-368] section 17.4.3.
- */
-struct uwb_drp_avail {
-	DECLARE_BITMAP(global, UWB_NUM_MAS);
-	DECLARE_BITMAP(local, UWB_NUM_MAS);
-	DECLARE_BITMAP(pending, UWB_NUM_MAS);
-	struct uwb_ie_drp_avail ie;
-	bool ie_valid;
-};
-
-struct uwb_drp_backoff_win {
-	u8 window;
-	u8 n;
-	int total_expired;
-	struct timer_list timer;
-	bool can_reserve_extra_mases;
-};
-
-const char *uwb_rsv_state_str(enum uwb_rsv_state state);
-const char *uwb_rsv_type_str(enum uwb_drp_type type);
-
-struct uwb_rsv *uwb_rsv_create(struct uwb_rc *rc, uwb_rsv_cb_f cb,
-			       void *pal_priv);
-void uwb_rsv_destroy(struct uwb_rsv *rsv);
-
-int uwb_rsv_establish(struct uwb_rsv *rsv);
-int uwb_rsv_modify(struct uwb_rsv *rsv,
-		   int max_mas, int min_mas, int sparsity);
-void uwb_rsv_terminate(struct uwb_rsv *rsv);
-
-void uwb_rsv_accept(struct uwb_rsv *rsv, uwb_rsv_cb_f cb, void *pal_priv);
-
-void uwb_rsv_get_usable_mas(struct uwb_rsv *orig_rsv, struct uwb_mas_bm *mas);
-
-/**
- * Radio Control Interface instance
- *
- *
- * Life cycle rules: those of the UWB Device.
- *
- * @index:    an index number for this radio controller, as used in the
- *            device name.
- * @version:  version of protocol supported by this device
- * @priv:     Backend implementation; rw with uwb_dev.dev.sem taken.
- * @cmd:      Backend implementation to execute commands; rw and call
- *            only  with uwb_dev.dev.sem taken.
- * @reset:    Hardware reset of radio controller and any PAL controllers.
- * @filter:   Backend implementation to manipulate data to and from device
- *            to be compliant to specification assumed by driver (WHCI
- *            0.95).
- *
- *            uwb_dev.dev.mutex is used to execute commands and update
- *            the corresponding structures; can't use a spinlock
- *            because rc->cmd() can sleep.
- * @ies:         This is a dynamically allocated array cacheing the
- *               IEs (settable by the host) that the beacon of this
- *               radio controller is currently sending.
- *
- *               In reality, we store here the full command we set to
- *               the radio controller (which is basically a command
- *               prefix followed by all the IEs the beacon currently
- *               contains). This way we don't have to realloc and
- *               memcpy when setting it.
- *
- *               We set this up in uwb_rc_ie_setup(), where we alloc
- *               this struct, call get_ie() [so we know which IEs are
- *               currently being sent, if any].
- *
- * @ies_capacity:Amount of space (in bytes) allocated in @ies. The
- *               amount used is given by sizeof(*ies) plus ies->wIELength
- *               (which is a little endian quantity all the time).
- * @ies_mutex:   protect the IE cache
- * @dbg:         information for the debug interface
- */
-struct uwb_rc {
-	struct uwb_dev uwb_dev;
-	int index;
-	u16 version;
-
-	struct module *owner;
-	void *priv;
-	int (*start)(struct uwb_rc *rc);
-	void (*stop)(struct uwb_rc *rc);
-	int (*cmd)(struct uwb_rc *, const struct uwb_rccb *, size_t);
-	int (*reset)(struct uwb_rc *rc);
-	int (*filter_cmd)(struct uwb_rc *, struct uwb_rccb **, size_t *);
-	int (*filter_event)(struct uwb_rc *, struct uwb_rceb **, const size_t,
-			    size_t *, size_t *);
-
-	spinlock_t neh_lock;		/* protects neh_* and ctx_* */
-	struct list_head neh_list;	/* Open NE handles */
-	unsigned long ctx_bm[UWB_RC_CTX_MAX / 8 / sizeof(unsigned long)];
-	u8 ctx_roll;
-
-	int beaconing;			/* Beaconing state [channel number] */
-	int beaconing_forced;
-	int scanning;
-	enum uwb_scan_type scan_type:3;
-	unsigned ready:1;
-	struct uwb_notifs_chain notifs_chain;
-	struct uwb_beca uwb_beca;
-
-	struct uwbd uwbd;
-
-	struct uwb_drp_backoff_win bow;
-	struct uwb_drp_avail drp_avail;
-	struct list_head reservations;
-	struct list_head cnflt_alien_list;
-	struct uwb_mas_bm cnflt_alien_bitmap;
-	struct mutex rsvs_mutex;
-	spinlock_t rsvs_lock;
-	struct workqueue_struct *rsv_workq;
-
-	struct delayed_work rsv_update_work;
-	struct delayed_work rsv_alien_bp_work;
-	int set_drp_ie_pending;
-	struct mutex ies_mutex;
-	struct uwb_rc_cmd_set_ie *ies;
-	size_t ies_capacity;
-
-	struct list_head pals;
-	int active_pals;
-
-	struct uwb_dbg *dbg;
-};
-
-
-/**
- * struct uwb_pal - a UWB PAL
- * @name:    descriptive name for this PAL (wusbhc, wlp, etc.).
- * @device:  a device for the PAL.  Used to link the PAL and the radio
- *           controller in sysfs.
- * @rc:      the radio controller the PAL uses.
- * @channel_changed: called when the channel used by the radio changes.
- *           A channel of -1 means the channel has been stopped.
- * @new_rsv: called when a peer requests a reservation (may be NULL if
- *           the PAL cannot accept reservation requests).
- * @channel: channel being used by the PAL; 0 if the PAL isn't using
- *           the radio; -1 if the PAL wishes to use the radio but
- *           cannot.
- * @debugfs_dir: a debugfs directory which the PAL can use for its own
- *           debugfs files.
- *
- * A Protocol Adaptation Layer (PAL) is a user of the WiMedia UWB
- * radio platform (e.g., WUSB, WLP or Bluetooth UWB AMP).
- *
- * The PALs using a radio controller must register themselves to
- * permit the UWB stack to coordinate usage of the radio between the
- * various PALs or to allow PALs to response to certain requests from
- * peers.
- *
- * A struct uwb_pal should be embedded in a containing structure
- * belonging to the PAL and initialized with uwb_pal_init()).  Fields
- * should be set appropriately by the PAL before registering the PAL
- * with uwb_pal_register().
- */
-struct uwb_pal {
-	struct list_head node;
-	const char *name;
-	struct device *device;
-	struct uwb_rc *rc;
-
-	void (*channel_changed)(struct uwb_pal *pal, int channel);
-	void (*new_rsv)(struct uwb_pal *pal, struct uwb_rsv *rsv);
-
-	int channel;
-	struct dentry *debugfs_dir;
-};
-
-void uwb_pal_init(struct uwb_pal *pal);
-int uwb_pal_register(struct uwb_pal *pal);
-void uwb_pal_unregister(struct uwb_pal *pal);
-
-int uwb_radio_start(struct uwb_pal *pal);
-void uwb_radio_stop(struct uwb_pal *pal);
-
-/*
- * General public API
- *
- * This API can be used by UWB device drivers or by those implementing
- * UWB Radio Controllers
- */
-struct uwb_dev *uwb_dev_get_by_devaddr(struct uwb_rc *rc,
-				       const struct uwb_dev_addr *devaddr);
-struct uwb_dev *uwb_dev_get_by_rc(struct uwb_dev *, struct uwb_rc *);
-static inline void uwb_dev_get(struct uwb_dev *uwb_dev)
-{
-	get_device(&uwb_dev->dev);
-}
-static inline void uwb_dev_put(struct uwb_dev *uwb_dev)
-{
-	put_device(&uwb_dev->dev);
-}
-struct uwb_dev *uwb_dev_try_get(struct uwb_rc *rc, struct uwb_dev *uwb_dev);
-
-/**
- * Callback function for 'uwb_{dev,rc}_foreach()'.
- *
- * @dev:  Linux device instance
- *        'uwb_dev = container_of(dev, struct uwb_dev, dev)'
- * @priv: Data passed by the caller to 'uwb_{dev,rc}_foreach()'.
- *
- * @returns: 0 to continue the iterations, any other val to stop
- *           iterating and return the value to the caller of
- *           _foreach().
- */
-typedef int (*uwb_dev_for_each_f)(struct device *dev, void *priv);
-int uwb_dev_for_each(struct uwb_rc *rc, uwb_dev_for_each_f func, void *priv);
-
-struct uwb_rc *uwb_rc_alloc(void);
-struct uwb_rc *uwb_rc_get_by_dev(const struct uwb_dev_addr *);
-struct uwb_rc *uwb_rc_get_by_grandpa(const struct device *);
-void uwb_rc_put(struct uwb_rc *rc);
-
-typedef void (*uwb_rc_cmd_cb_f)(struct uwb_rc *rc, void *arg,
-                                struct uwb_rceb *reply, ssize_t reply_size);
-
-int uwb_rc_cmd_async(struct uwb_rc *rc, const char *cmd_name,
-		     struct uwb_rccb *cmd, size_t cmd_size,
-		     u8 expected_type, u16 expected_event,
-		     uwb_rc_cmd_cb_f cb, void *arg);
-ssize_t uwb_rc_cmd(struct uwb_rc *rc, const char *cmd_name,
-		   struct uwb_rccb *cmd, size_t cmd_size,
-		   struct uwb_rceb *reply, size_t reply_size);
-ssize_t uwb_rc_vcmd(struct uwb_rc *rc, const char *cmd_name,
-		    struct uwb_rccb *cmd, size_t cmd_size,
-		    u8 expected_type, u16 expected_event,
-		    struct uwb_rceb **preply);
-
-size_t __uwb_addr_print(char *, size_t, const unsigned char *, int);
-
-int uwb_rc_dev_addr_set(struct uwb_rc *, const struct uwb_dev_addr *);
-int uwb_rc_dev_addr_get(struct uwb_rc *, struct uwb_dev_addr *);
-int uwb_rc_mac_addr_set(struct uwb_rc *, const struct uwb_mac_addr *);
-int uwb_rc_mac_addr_get(struct uwb_rc *, struct uwb_mac_addr *);
-int __uwb_mac_addr_assigned_check(struct device *, void *);
-int __uwb_dev_addr_assigned_check(struct device *, void *);
-
-/* Print in @buf a pretty repr of @addr */
-static inline size_t uwb_dev_addr_print(char *buf, size_t buf_size,
-					const struct uwb_dev_addr *addr)
-{
-	return __uwb_addr_print(buf, buf_size, addr->data, 0);
-}
-
-/* Print in @buf a pretty repr of @addr */
-static inline size_t uwb_mac_addr_print(char *buf, size_t buf_size,
-					const struct uwb_mac_addr *addr)
-{
-	return __uwb_addr_print(buf, buf_size, addr->data, 1);
-}
-
-/* @returns 0 if device addresses @addr2 and @addr1 are equal */
-static inline int uwb_dev_addr_cmp(const struct uwb_dev_addr *addr1,
-				   const struct uwb_dev_addr *addr2)
-{
-	return memcmp(addr1, addr2, sizeof(*addr1));
-}
-
-/* @returns 0 if MAC addresses @addr2 and @addr1 are equal */
-static inline int uwb_mac_addr_cmp(const struct uwb_mac_addr *addr1,
-				   const struct uwb_mac_addr *addr2)
-{
-	return memcmp(addr1, addr2, sizeof(*addr1));
-}
-
-/* @returns !0 if a MAC @addr is a broadcast address */
-static inline int uwb_mac_addr_bcast(const struct uwb_mac_addr *addr)
-{
-	struct uwb_mac_addr bcast = {
-		.data = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }
-	};
-	return !uwb_mac_addr_cmp(addr, &bcast);
-}
-
-/* @returns !0 if a MAC @addr is all zeroes*/
-static inline int uwb_mac_addr_unset(const struct uwb_mac_addr *addr)
-{
-	struct uwb_mac_addr unset = {
-		.data = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
-	};
-	return !uwb_mac_addr_cmp(addr, &unset);
-}
-
-/* @returns !0 if the address is in use. */
-static inline unsigned __uwb_dev_addr_assigned(struct uwb_rc *rc,
-					       struct uwb_dev_addr *addr)
-{
-	return uwb_dev_for_each(rc, __uwb_dev_addr_assigned_check, addr);
-}
-
-/*
- * UWB Radio Controller API
- *
- * This API is used (in addition to the general API) to implement UWB
- * Radio Controllers.
- */
-void uwb_rc_init(struct uwb_rc *);
-int uwb_rc_add(struct uwb_rc *, struct device *dev, void *rc_priv);
-void uwb_rc_rm(struct uwb_rc *);
-void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
-void uwb_rc_neh_error(struct uwb_rc *, int);
-void uwb_rc_reset_all(struct uwb_rc *rc);
-void uwb_rc_pre_reset(struct uwb_rc *rc);
-int uwb_rc_post_reset(struct uwb_rc *rc);
-
-/**
- * uwb_rsv_is_owner - is the owner of this reservation the RC?
- * @rsv: the reservation
- */
-static inline bool uwb_rsv_is_owner(struct uwb_rsv *rsv)
-{
-	return rsv->owner == &rsv->rc->uwb_dev;
-}
-
-/**
- * enum uwb_notifs - UWB events that can be passed to any listeners
- * @UWB_NOTIF_ONAIR: a new neighbour has joined the beacon group.
- * @UWB_NOTIF_OFFAIR: a neighbour has left the beacon group.
- *
- * Higher layers can register callback functions with the radio
- * controller using uwb_notifs_register(). The radio controller
- * maintains a list of all registered handlers and will notify all
- * nodes when an event occurs.
- */
-enum uwb_notifs {
-	UWB_NOTIF_ONAIR,
-	UWB_NOTIF_OFFAIR,
-};
-
-/* Callback function registered with UWB */
-struct uwb_notifs_handler {
-	struct list_head list_node;
-	void (*cb)(void *, struct uwb_dev *, enum uwb_notifs);
-	void *data;
-};
-
-int uwb_notifs_register(struct uwb_rc *, struct uwb_notifs_handler *);
-int uwb_notifs_deregister(struct uwb_rc *, struct uwb_notifs_handler *);
-
-
-/**
- * UWB radio controller Event Size Entry (for creating entry tables)
- *
- * WUSB and WHCI define events and notifications, and they might have
- * fixed or variable size.
- *
- * Each event/notification has a size which is not necessarily known
- * in advance based on the event code. As well, vendor specific
- * events/notifications will have a size impossible to determine
- * unless we know about the device's specific details.
- *
- * It was way too smart of the spec writers not to think that it would
- * be impossible for a generic driver to skip over vendor specific
- * events/notifications if there are no LENGTH fields in the HEADER of
- * each message...the transaction size cannot be counted on as the
- * spec does not forbid to pack more than one event in a single
- * transaction.
- *
- * Thus, we guess sizes with tables (or for events, when you know the
- * size ahead of time you can use uwb_rc_neh_extra_size*()). We
- * register tables with the known events and their sizes, and then we
- * traverse those tables. For those with variable length, we provide a
- * way to lookup the size inside the event/notification's
- * payload. This allows device-specific event size tables to be
- * registered.
- *
- * @size:   Size of the payload
- *
- * @offset: if != 0, at offset @offset-1 starts a field with a length
- *          that has to be added to @size. The format of the field is
- *          given by @type.
- *
- * @type:   Type and length of the offset field. Most common is LE 16
- *          bits (that's why that is zero); others are there mostly to
- *          cover for bugs and weirdos.
- */
-struct uwb_est_entry {
-	size_t size;
-	unsigned offset;
-	enum { UWB_EST_16 = 0, UWB_EST_8 = 1 } type;
-};
-
-int uwb_est_register(u8 type, u8 code_high, u16 vendor, u16 product,
-		     const struct uwb_est_entry *, size_t entries);
-int uwb_est_unregister(u8 type, u8 code_high, u16 vendor, u16 product,
-		       const struct uwb_est_entry *, size_t entries);
-ssize_t uwb_est_find_size(struct uwb_rc *rc, const struct uwb_rceb *rceb,
-			  size_t len);
-
-/* -- Misc */
-
-enum {
-	EDC_MAX_ERRORS = 10,
-	EDC_ERROR_TIMEFRAME = HZ,
-};
-
-/* error density counter */
-struct edc {
-	unsigned long timestart;
-	u16 errorcount;
-};
-
-static inline
-void edc_init(struct edc *edc)
-{
-	edc->timestart = jiffies;
-}
-
-/* Called when an error occurred.
- * This is way to determine if the number of acceptable errors per time
- * period has been exceeded. It is not accurate as there are cases in which
- * this scheme will not work, for example if there are periodic occurrences
- * of errors that straddle updates to the start time. This scheme is
- * sufficient for our usage.
- *
- * @returns 1 if maximum acceptable errors per timeframe has been exceeded.
- */
-static inline int edc_inc(struct edc *err_hist, u16 max_err, u16 timeframe)
-{
-	unsigned long now;
-
-	now = jiffies;
-	if (now - err_hist->timestart > timeframe) {
-		err_hist->errorcount = 1;
-		err_hist->timestart = now;
-	} else if (++err_hist->errorcount > max_err) {
-			err_hist->errorcount = 0;
-			err_hist->timestart = now;
-			return 1;
-	}
-	return 0;
-}
-
-
-/* Information Element handling */
-
-struct uwb_ie_hdr *uwb_ie_next(void **ptr, size_t *len);
-int uwb_rc_ie_add(struct uwb_rc *uwb_rc, const struct uwb_ie_hdr *ies, size_t size);
-int uwb_rc_ie_rm(struct uwb_rc *uwb_rc, enum uwb_ie element_id);
-
-/*
- * Transmission statistics
- *
- * UWB uses LQI and RSSI (one byte values) for reporting radio signal
- * strength and line quality indication. We do quick and dirty
- * averages of those. They are signed values, btw.
- *
- * For 8 bit quantities, we keep the min, the max, an accumulator
- * (@sigma) and a # of samples. When @samples gets to 255, we compute
- * the average (@sigma / @samples), place it in @sigma and reset
- * @samples to 1 (so we use it as the first sample).
- *
- * Now, statistically speaking, probably I am kicking the kidneys of
- * some books I have in my shelves collecting dust, but I just want to
- * get an approx, not the Nobel.
- *
- * LOCKING: there is no locking per se, but we try to keep a lockless
- * schema. Only _add_samples() modifies the values--as long as you
- * have other locking on top that makes sure that no two calls of
- * _add_sample() happen at the same time, then we are fine. Now, for
- * resetting the values we just set @samples to 0 and that makes the
- * next _add_sample() to start with defaults. Reading the values in
- * _show() currently can race, so you need to make sure the calls are
- * under the same lock that protects calls to _add_sample(). FIXME:
- * currently unlocked (It is not ultraprecise but does the trick. Bite
- * me).
- */
-struct stats {
-	s8 min, max;
-	s16 sigma;
-	atomic_t samples;
-};
-
-static inline
-void stats_init(struct stats *stats)
-{
-	atomic_set(&stats->samples, 0);
-	wmb();
-}
-
-static inline
-void stats_add_sample(struct stats *stats, s8 sample)
-{
-	s8 min, max;
-	s16 sigma;
-	unsigned samples = atomic_read(&stats->samples);
-	if (samples == 0) {	/* it was zero before, so we initialize */
-		min = 127;
-		max = -128;
-		sigma = 0;
-	} else {
-		min = stats->min;
-		max = stats->max;
-		sigma = stats->sigma;
-	}
-
-	if (sample < min)	/* compute new values */
-		min = sample;
-	else if (sample > max)
-		max = sample;
-	sigma += sample;
-
-	stats->min = min;	/* commit */
-	stats->max = max;
-	stats->sigma = sigma;
-	if (atomic_add_return(1, &stats->samples) > 255) {
-		/* wrapped around! reset */
-		stats->sigma = sigma / 256;
-		atomic_set(&stats->samples, 1);
-	}
-}
-
-static inline ssize_t stats_show(struct stats *stats, char *buf)
-{
-	int min, max, avg;
-	int samples = atomic_read(&stats->samples);
-	if (samples == 0)
-		min = max = avg = 0;
-	else {
-		min = stats->min;
-		max = stats->max;
-		avg = stats->sigma / samples;
-	}
-	return scnprintf(buf, PAGE_SIZE, "%d %d %d\n", min, max, avg);
-}
-
-static inline ssize_t stats_store(struct stats *stats, const char *buf,
-				  size_t size)
-{
-	stats_init(stats);
-	return size;
-}
-
-#endif /* #ifndef __LINUX__UWB_H__ */
diff --git a/include/linux/uwb/debug-cmd.h b/include/linux/uwb/debug-cmd.h
deleted file mode 100644
index f97db6c3bcc0..000000000000
--- a/include/linux/uwb/debug-cmd.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Ultra Wide Band
- * Debug interface commands
- *
- * Copyright (C) 2008 Cambridge Silicon Radio Ltd.
- */
-#ifndef __LINUX__UWB__DEBUG_CMD_H__
-#define __LINUX__UWB__DEBUG_CMD_H__
-
-#include <linux/types.h>
-
-/*
- * Debug interface commands
- *
- * UWB_DBG_CMD_RSV_ESTABLISH: Establish a new unicast reservation.
- *
- * UWB_DBG_CMD_RSV_TERMINATE: Terminate the Nth reservation.
- */
-
-enum uwb_dbg_cmd_type {
-	UWB_DBG_CMD_RSV_ESTABLISH = 1,
-	UWB_DBG_CMD_RSV_TERMINATE = 2,
-	UWB_DBG_CMD_IE_ADD = 3,
-	UWB_DBG_CMD_IE_RM = 4,
-	UWB_DBG_CMD_RADIO_START = 5,
-	UWB_DBG_CMD_RADIO_STOP = 6,
-};
-
-struct uwb_dbg_cmd_rsv_establish {
-	__u8  target[6];
-	__u8  type;
-	__u16 max_mas;
-	__u16 min_mas;
-	__u8  max_interval;
-};
-
-struct uwb_dbg_cmd_rsv_terminate {
-	int index;
-};
-
-struct uwb_dbg_cmd_ie {
-	__u8 data[128];
-	int len;
-};
-
-struct uwb_dbg_cmd {
-	__u32 type;
-	union {
-		struct uwb_dbg_cmd_rsv_establish rsv_establish;
-		struct uwb_dbg_cmd_rsv_terminate rsv_terminate;
-		struct uwb_dbg_cmd_ie ie_add;
-		struct uwb_dbg_cmd_ie ie_rm;
-	};
-};
-
-#endif /* #ifndef __LINUX__UWB__DEBUG_CMD_H__ */
diff --git a/include/linux/uwb/spec.h b/include/linux/uwb/spec.h
deleted file mode 100644
index 5f75caf7b4ba..000000000000
--- a/include/linux/uwb/spec.h
+++ /dev/null
@@ -1,767 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Ultra Wide Band
- * UWB Standard definitions
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * All these definitions are based on the ECMA-368 standard.
- *
- * Note all definitions are Little Endian in the wire, and we will
- * convert them to host order before operating on the bitfields (that
- * yes, we use extensively).
- */
-
-#ifndef __LINUX__UWB_SPEC_H__
-#define __LINUX__UWB_SPEC_H__
-
-#include <linux/types.h>
-#include <linux/bitmap.h>
-#include <linux/if_ether.h>
-
-#define i1480_FW 0x00000303
-/* #define i1480_FW 0x00000302 */
-
-/**
- * Number of Medium Access Slots in a superframe.
- *
- * UWB divides time in SuperFrames, each one divided in 256 pieces, or
- * Medium Access Slots. See MBOA MAC[5.4.5] for details. The MAS is the
- * basic bandwidth allocation unit in UWB.
- */
-enum { UWB_NUM_MAS = 256 };
-
-/**
- * Number of Zones in superframe.
- *
- * UWB divides the superframe into zones with numbering starting from BPST.
- * See MBOA MAC[16.8.6]
- */
-enum { UWB_NUM_ZONES = 16 };
-
-/*
- * Number of MAS in a zone.
- */
-#define UWB_MAS_PER_ZONE (UWB_NUM_MAS / UWB_NUM_ZONES)
-
-/*
- * Number of MAS required before a row can be considered available.
- */
-#define UWB_USABLE_MAS_PER_ROW (UWB_NUM_ZONES - 1)
-
-/*
- * Number of streams per DRP reservation between a pair of devices.
- *
- * [ECMA-368] section 16.8.6.
- */
-enum { UWB_NUM_STREAMS = 8 };
-
-/*
- * mMasLength
- *
- * The length of a MAS in microseconds.
- *
- * [ECMA-368] section 17.16.
- */
-enum { UWB_MAS_LENGTH_US = 256 };
-
-/*
- * mBeaconSlotLength
- *
- * The length of the beacon slot in microseconds.
- *
- * [ECMA-368] section 17.16
- */
-enum { UWB_BEACON_SLOT_LENGTH_US = 85 };
-
-/*
- * mMaxLostBeacons
- *
- * The number beacons missing in consecutive superframes before a
- * device can be considered as unreachable.
- *
- * [ECMA-368] section 17.16
- */
-enum { UWB_MAX_LOST_BEACONS = 3 };
-
-/*
- * mDRPBackOffWinMin
- *
- * The minimum number of superframes to wait before trying to reserve
- * extra MAS.
- *
- * [ECMA-368] section 17.16
- */
-enum { UWB_DRP_BACKOFF_WIN_MIN = 2 };
-
-/*
- * mDRPBackOffWinMax
- *
- * The maximum number of superframes to wait before trying to reserve
- * extra MAS.
- *
- * [ECMA-368] section 17.16
- */
-enum { UWB_DRP_BACKOFF_WIN_MAX = 16 };
-
-/*
- * Length of a superframe in microseconds.
- */
-#define UWB_SUPERFRAME_LENGTH_US (UWB_MAS_LENGTH_US * UWB_NUM_MAS)
-
-/**
- * UWB MAC address
- *
- * It is *imperative* that this struct is exactly 6 packed bytes (as
- * it is also used to define headers sent down and up the wire/radio).
- */
-struct uwb_mac_addr {
-	u8 data[ETH_ALEN];
-} __attribute__((packed));
-
-
-/**
- * UWB device address
- *
- * It is *imperative* that this struct is exactly 6 packed bytes (as
- * it is also used to define headers sent down and up the wire/radio).
- */
-struct uwb_dev_addr {
-	u8 data[2];
-} __attribute__((packed));
-
-
-/**
- * Types of UWB addresses
- *
- * Order matters (by size).
- */
-enum uwb_addr_type {
-	UWB_ADDR_DEV = 0,
-	UWB_ADDR_MAC = 1,
-};
-
-
-/** Size of a char buffer for printing a MAC/device address */
-enum { UWB_ADDR_STRSIZE = 32 };
-
-
-/** UWB WiMedia protocol IDs. */
-enum uwb_prid {
-	UWB_PRID_WLP_RESERVED   = 0x0000,
-	UWB_PRID_WLP		= 0x0001,
-	UWB_PRID_WUSB_BOT	= 0x0010,
-	UWB_PRID_WUSB		= 0x0010,
-	UWB_PRID_WUSB_TOP	= 0x001F,
-};
-
-
-/** PHY Rate (MBOA MAC[7.8.12, Table 61]) */
-enum uwb_phy_rate {
-	UWB_PHY_RATE_53 = 0,
-	UWB_PHY_RATE_80,
-	UWB_PHY_RATE_106,
-	UWB_PHY_RATE_160,
-	UWB_PHY_RATE_200,
-	UWB_PHY_RATE_320,
-	UWB_PHY_RATE_400,
-	UWB_PHY_RATE_480,
-	UWB_PHY_RATE_INVALID
-};
-
-
-/**
- * Different ways to scan (MBOA MAC[6.2.2, Table 8], WUSB[Table 8-78])
- */
-enum uwb_scan_type {
-	UWB_SCAN_ONLY = 0,
-	UWB_SCAN_OUTSIDE_BP,
-	UWB_SCAN_WHILE_INACTIVE,
-	UWB_SCAN_DISABLED,
-	UWB_SCAN_ONLY_STARTTIME,
-	UWB_SCAN_TOP
-};
-
-
-/** ACK Policy types (MBOA MAC[7.2.1.3]) */
-enum uwb_ack_pol {
-	UWB_ACK_NO = 0,
-	UWB_ACK_INM = 1,
-	UWB_ACK_B = 2,
-	UWB_ACK_B_REQ = 3,
-};
-
-
-/** DRP reservation types ([ECMA-368 table 106) */
-enum uwb_drp_type {
-	UWB_DRP_TYPE_ALIEN_BP = 0,
-	UWB_DRP_TYPE_HARD,
-	UWB_DRP_TYPE_SOFT,
-	UWB_DRP_TYPE_PRIVATE,
-	UWB_DRP_TYPE_PCA,
-};
-
-
-/** DRP Reason Codes ([ECMA-368] table 107) */
-enum uwb_drp_reason {
-	UWB_DRP_REASON_ACCEPTED = 0,
-	UWB_DRP_REASON_CONFLICT,
-	UWB_DRP_REASON_PENDING,
-	UWB_DRP_REASON_DENIED,
-	UWB_DRP_REASON_MODIFIED,
-};
-
-/** Relinquish Request Reason Codes ([ECMA-368] table 113) */
-enum uwb_relinquish_req_reason {
-	UWB_RELINQUISH_REQ_REASON_NON_SPECIFIC = 0,
-	UWB_RELINQUISH_REQ_REASON_OVER_ALLOCATION,
-};
-
-/**
- *  DRP Notification Reason Codes (WHCI 0.95 [3.1.4.9])
- */
-enum uwb_drp_notif_reason {
-	UWB_DRP_NOTIF_DRP_IE_RCVD = 0,
-	UWB_DRP_NOTIF_CONFLICT,
-	UWB_DRP_NOTIF_TERMINATE,
-};
-
-
-/** Allocation of MAS slots in a DRP request MBOA MAC[7.8.7] */
-struct uwb_drp_alloc {
-	__le16 zone_bm;
-	__le16 mas_bm;
-} __attribute__((packed));
-
-
-/** General MAC Header format (ECMA-368[16.2]) */
-struct uwb_mac_frame_hdr {
-	__le16 Frame_Control;
-	struct uwb_dev_addr DestAddr;
-	struct uwb_dev_addr SrcAddr;
-	__le16 Sequence_Control;
-	__le16 Access_Information;
-} __attribute__((packed));
-
-
-/**
- * uwb_beacon_frame - a beacon frame including MAC headers
- *
- * [ECMA] section 16.3.
- */
-struct uwb_beacon_frame {
-	struct uwb_mac_frame_hdr hdr;
-	struct uwb_mac_addr Device_Identifier;	/* may be a NULL EUI-48 */
-	u8 Beacon_Slot_Number;
-	u8 Device_Control;
-	u8 IEData[];
-} __attribute__((packed));
-
-
-/** Information Element codes (MBOA MAC[T54]) */
-enum uwb_ie {
-	UWB_PCA_AVAILABILITY = 2,
-	UWB_IE_DRP_AVAILABILITY = 8,
-	UWB_IE_DRP = 9,
-	UWB_BP_SWITCH_IE = 11,
-	UWB_MAC_CAPABILITIES_IE = 12,
-	UWB_PHY_CAPABILITIES_IE = 13,
-	UWB_APP_SPEC_PROBE_IE = 15,
-	UWB_IDENTIFICATION_IE = 19,
-	UWB_MASTER_KEY_ID_IE = 20,
-	UWB_RELINQUISH_REQUEST_IE = 21,
-	UWB_IE_WLP = 250, /* WiMedia Logical Link Control Protocol WLP 0.99 */
-	UWB_APP_SPEC_IE = 255,
-};
-
-
-/**
- * Header common to all Information Elements (IEs)
- */
-struct uwb_ie_hdr {
-	u8 element_id;	/* enum uwb_ie */
-	u8 length;
-} __attribute__((packed));
-
-
-/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.6]) */
-struct uwb_ie_drp {
-	struct uwb_ie_hdr	hdr;
-	__le16                  drp_control;
-	struct uwb_dev_addr	dev_addr;
-	struct uwb_drp_alloc	allocs[];
-} __attribute__((packed));
-
-static inline int uwb_ie_drp_type(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 0) & 0x7;
-}
-
-static inline int uwb_ie_drp_stream_index(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 3) & 0x7;
-}
-
-static inline int uwb_ie_drp_reason_code(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 6) & 0x7;
-}
-
-static inline int uwb_ie_drp_status(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 9) & 0x1;
-}
-
-static inline int uwb_ie_drp_owner(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 10) & 0x1;
-}
-
-static inline int uwb_ie_drp_tiebreaker(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 11) & 0x1;
-}
-
-static inline int uwb_ie_drp_unsafe(struct uwb_ie_drp *ie)
-{
-	return (le16_to_cpu(ie->drp_control) >> 12) & 0x1;
-}
-
-static inline void uwb_ie_drp_set_type(struct uwb_ie_drp *ie, enum uwb_drp_type type)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x7 << 0)) | (type << 0);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_stream_index(struct uwb_ie_drp *ie, int stream_index)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x7 << 3)) | (stream_index << 3);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_reason_code(struct uwb_ie_drp *ie,
-				       enum uwb_drp_reason reason_code)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (ie->drp_control & ~(0x7 << 6)) | (reason_code << 6);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_status(struct uwb_ie_drp *ie, int status)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x1 << 9)) | (status << 9);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_owner(struct uwb_ie_drp *ie, int owner)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x1 << 10)) | (owner << 10);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_tiebreaker(struct uwb_ie_drp *ie, int tiebreaker)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x1 << 11)) | (tiebreaker << 11);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-static inline void uwb_ie_drp_set_unsafe(struct uwb_ie_drp *ie, int unsafe)
-{
-	u16 drp_control = le16_to_cpu(ie->drp_control);
-	drp_control = (drp_control & ~(0x1 << 12)) | (unsafe << 12);
-	ie->drp_control = cpu_to_le16(drp_control);
-}
-
-/** Dynamic Reservation Protocol IE (MBOA MAC[7.8.7]) */
-struct uwb_ie_drp_avail {
-	struct uwb_ie_hdr	hdr;
-	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
-} __attribute__((packed));
-
-/* Relinqish Request IE ([ECMA-368] section 16.8.19). */
-struct uwb_relinquish_request_ie {
-        struct uwb_ie_hdr       hdr;
-        __le16                  relinquish_req_control;
-        struct uwb_dev_addr     dev_addr;
-        struct uwb_drp_alloc    allocs[];
-} __attribute__((packed));
-
-static inline int uwb_ie_relinquish_req_reason_code(struct uwb_relinquish_request_ie *ie)
-{
-	return (le16_to_cpu(ie->relinquish_req_control) >> 0) & 0xf;
-}
-
-static inline void uwb_ie_relinquish_req_set_reason_code(struct uwb_relinquish_request_ie *ie,
-							 int reason_code)
-{
-	u16 ctrl = le16_to_cpu(ie->relinquish_req_control);
-	ctrl = (ctrl & ~(0xf << 0)) | (reason_code << 0);
-	ie->relinquish_req_control = cpu_to_le16(ctrl);
-}
-
-/**
- * The Vendor ID is set to an OUI that indicates the vendor of the device.
- * ECMA-368 [16.8.10]
- */
-struct uwb_vendor_id {
-	u8 data[3];
-} __attribute__((packed));
-
-/**
- * The device type ID
- * FIXME: clarify what this means
- * ECMA-368 [16.8.10]
- */
-struct uwb_device_type_id {
-	u8 data[3];
-} __attribute__((packed));
-
-
-/**
- * UWB device information types
- * ECMA-368 [16.8.10]
- */
-enum uwb_dev_info_type {
-	UWB_DEV_INFO_VENDOR_ID = 0,
-	UWB_DEV_INFO_VENDOR_TYPE,
-	UWB_DEV_INFO_NAME,
-};
-
-/**
- * UWB device information found in Identification IE
- * ECMA-368 [16.8.10]
- */
-struct uwb_dev_info {
-	u8 type;	/* enum uwb_dev_info_type */
-	u8 length;
-	u8 data[];
-} __attribute__((packed));
-
-/**
- * UWB Identification IE
- * ECMA-368 [16.8.10]
- */
-struct uwb_identification_ie {
-	struct uwb_ie_hdr hdr;
-	struct uwb_dev_info info[];
-} __attribute__((packed));
-
-/*
- * UWB Radio Controller
- *
- * These definitions are common to the Radio Control layers as
- * exported by the WUSB1.0 HWA and WHCI interfaces.
- */
-
-/** Radio Control Command Block (WUSB1.0[Table 8-65] and WHCI 0.95) */
-struct uwb_rccb {
-	u8 bCommandType;		/* enum hwa_cet */
-	__le16 wCommand;		/* Command code */
-	u8 bCommandContext;		/* Context ID */
-} __attribute__((packed));
-
-
-/** Radio Control Event Block (WUSB[table 8-66], WHCI 0.95) */
-struct uwb_rceb {
-	u8 bEventType;			/* enum hwa_cet */
-	__le16 wEvent;			/* Event code */
-	u8 bEventContext;		/* Context ID */
-} __attribute__((packed));
-
-
-enum {
-	UWB_RC_CET_GENERAL = 0,		/* General Command/Event type */
-	UWB_RC_CET_EX_TYPE_1 = 1,	/* Extended Type 1 Command/Event type */
-};
-
-/* Commands to the radio controller */
-enum uwb_rc_cmd {
-	UWB_RC_CMD_CHANNEL_CHANGE = 16,
-	UWB_RC_CMD_DEV_ADDR_MGMT = 17,	/* Device Address Management */
-	UWB_RC_CMD_GET_IE = 18,		/* GET Information Elements */
-	UWB_RC_CMD_RESET = 19,
-	UWB_RC_CMD_SCAN = 20,		/* Scan management  */
-	UWB_RC_CMD_SET_BEACON_FILTER = 21,
-	UWB_RC_CMD_SET_DRP_IE = 22,	/* Dynamic Reservation Protocol IEs */
-	UWB_RC_CMD_SET_IE = 23,		/* Information Element management */
-	UWB_RC_CMD_SET_NOTIFICATION_FILTER = 24,
-	UWB_RC_CMD_SET_TX_POWER = 25,
-	UWB_RC_CMD_SLEEP = 26,
-	UWB_RC_CMD_START_BEACON = 27,
-	UWB_RC_CMD_STOP_BEACON = 28,
-	UWB_RC_CMD_BP_MERGE = 29,
-	UWB_RC_CMD_SEND_COMMAND_FRAME = 30,
-	UWB_RC_CMD_SET_ASIE_NOTIF = 31,
-};
-
-/* Notifications from the radio controller */
-enum uwb_rc_evt {
-	UWB_RC_EVT_IE_RCV = 0,
-	UWB_RC_EVT_BEACON = 1,
-	UWB_RC_EVT_BEACON_SIZE = 2,
-	UWB_RC_EVT_BPOIE_CHANGE = 3,
-	UWB_RC_EVT_BP_SLOT_CHANGE = 4,
-	UWB_RC_EVT_BP_SWITCH_IE_RCV = 5,
-	UWB_RC_EVT_DEV_ADDR_CONFLICT = 6,
-	UWB_RC_EVT_DRP_AVAIL = 7,
-	UWB_RC_EVT_DRP = 8,
-	UWB_RC_EVT_BP_SWITCH_STATUS = 9,
-	UWB_RC_EVT_CMD_FRAME_RCV = 10,
-	UWB_RC_EVT_CHANNEL_CHANGE_IE_RCV = 11,
-	/* Events (command responses) use the same code as the command */
-	UWB_RC_EVT_UNKNOWN_CMD_RCV = 65535,
-};
-
-enum uwb_rc_extended_type_1_cmd {
-	UWB_RC_SET_DAA_ENERGY_MASK = 32,
-	UWB_RC_SET_NOTIFICATION_FILTER_EX = 33,
-};
-
-enum uwb_rc_extended_type_1_evt {
-	UWB_RC_DAA_ENERGY_DETECTED = 0,
-};
-
-/* Radio Control Result Code. [WHCI] table 3-3. */
-enum {
-	UWB_RC_RES_SUCCESS = 0,
-	UWB_RC_RES_FAIL,
-	UWB_RC_RES_FAIL_HARDWARE,
-	UWB_RC_RES_FAIL_NO_SLOTS,
-	UWB_RC_RES_FAIL_BEACON_TOO_LARGE,
-	UWB_RC_RES_FAIL_INVALID_PARAMETER,
-	UWB_RC_RES_FAIL_UNSUPPORTED_PWR_LEVEL,
-	UWB_RC_RES_FAIL_INVALID_IE_DATA,
-	UWB_RC_RES_FAIL_BEACON_SIZE_EXCEEDED,
-	UWB_RC_RES_FAIL_CANCELLED,
-	UWB_RC_RES_FAIL_INVALID_STATE,
-	UWB_RC_RES_FAIL_INVALID_SIZE,
-	UWB_RC_RES_FAIL_ACK_NOT_RECEIVED,
-	UWB_RC_RES_FAIL_NO_MORE_ASIE_NOTIF,
-	UWB_RC_RES_FAIL_TIME_OUT = 255,
-};
-
-/* Confirm event. [WHCI] section 3.1.3.1 etc. */
-struct uwb_rc_evt_confirm {
-	struct uwb_rceb rceb;
-	u8 bResultCode;
-} __attribute__((packed));
-
-/* Device Address Management event. [WHCI] section 3.1.3.2. */
-struct uwb_rc_evt_dev_addr_mgmt {
-	struct uwb_rceb rceb;
-	u8 baAddr[ETH_ALEN];
-	u8 bResultCode;
-} __attribute__((packed));
-
-
-/* Get IE Event. [WHCI] section 3.1.3.3. */
-struct uwb_rc_evt_get_ie {
-	struct uwb_rceb rceb;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/* Set DRP IE Event. [WHCI] section 3.1.3.7. */
-struct uwb_rc_evt_set_drp_ie {
-	struct uwb_rceb rceb;
-	__le16 wRemainingSpace;
-	u8 bResultCode;
-} __attribute__((packed));
-
-/* Set IE Event. [WHCI] section 3.1.3.8. */
-struct uwb_rc_evt_set_ie {
-	struct uwb_rceb rceb;
-	__le16 RemainingSpace;
-	u8 bResultCode;
-} __attribute__((packed));
-
-/* Scan command. [WHCI] 3.1.3.5. */
-struct uwb_rc_cmd_scan {
-	struct uwb_rccb rccb;
-	u8 bChannelNumber;
-	u8 bScanState;
-	__le16 wStartTime;
-} __attribute__((packed));
-
-/* Set DRP IE command. [WHCI] section 3.1.3.7. */
-struct uwb_rc_cmd_set_drp_ie {
-	struct uwb_rccb rccb;
-	__le16 wIELength;
-	struct uwb_ie_drp IEData[];
-} __attribute__((packed));
-
-/* Set IE command. [WHCI] section 3.1.3.8. */
-struct uwb_rc_cmd_set_ie {
-	struct uwb_rccb rccb;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/* Set DAA Energy Mask event. [WHCI 0.96] section 3.1.3.17. */
-struct uwb_rc_evt_set_daa_energy_mask {
-	struct uwb_rceb rceb;
-	__le16 wLength;
-	u8 result;
-} __attribute__((packed));
-
-/* Set Notification Filter Extended event. [WHCI 0.96] section 3.1.3.18. */
-struct uwb_rc_evt_set_notification_filter_ex {
-	struct uwb_rceb rceb;
-	__le16 wLength;
-	u8 result;
-} __attribute__((packed));
-
-/* IE Received notification. [WHCI] section 3.1.4.1. */
-struct uwb_rc_evt_ie_rcv {
-	struct uwb_rceb rceb;
-	struct uwb_dev_addr SrcAddr;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/* Type of the received beacon. [WHCI] section 3.1.4.2. */
-enum uwb_rc_beacon_type {
-	UWB_RC_BEACON_TYPE_SCAN = 0,
-	UWB_RC_BEACON_TYPE_NEIGHBOR,
-	UWB_RC_BEACON_TYPE_OL_ALIEN,
-	UWB_RC_BEACON_TYPE_NOL_ALIEN,
-};
-
-/* Beacon received notification. [WHCI] 3.1.4.2. */
-struct uwb_rc_evt_beacon {
-	struct uwb_rceb rceb;
-	u8	bChannelNumber;
-	u8	bBeaconType;
-	__le16	wBPSTOffset;
-	u8	bLQI;
-	u8	bRSSI;
-	__le16	wBeaconInfoLength;
-	u8	BeaconInfo[];
-} __attribute__((packed));
-
-
-/* Beacon Size Change notification. [WHCI] section 3.1.4.3 */
-struct uwb_rc_evt_beacon_size {
-	struct uwb_rceb rceb;
-	__le16 wNewBeaconSize;
-} __attribute__((packed));
-
-
-/* BPOIE Change notification. [WHCI] section 3.1.4.4. */
-struct uwb_rc_evt_bpoie_change {
-	struct uwb_rceb rceb;
-	__le16 wBPOIELength;
-	u8 BPOIE[];
-} __attribute__((packed));
-
-
-/* Beacon Slot Change notification. [WHCI] section 3.1.4.5. */
-struct uwb_rc_evt_bp_slot_change {
-	struct uwb_rceb rceb;
-	u8 slot_info;
-} __attribute__((packed));
-
-static inline int uwb_rc_evt_bp_slot_change_slot_num(
-	const struct uwb_rc_evt_bp_slot_change *evt)
-{
-	return evt->slot_info & 0x7f;
-}
-
-static inline int uwb_rc_evt_bp_slot_change_no_slot(
-	const struct uwb_rc_evt_bp_slot_change *evt)
-{
-	return (evt->slot_info & 0x80) >> 7;
-}
-
-/* BP Switch IE Received notification. [WHCI] section 3.1.4.6. */
-struct uwb_rc_evt_bp_switch_ie_rcv {
-	struct uwb_rceb rceb;
-	struct uwb_dev_addr wSrcAddr;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/* DevAddr Conflict notification. [WHCI] section 3.1.4.7. */
-struct uwb_rc_evt_dev_addr_conflict {
-	struct uwb_rceb rceb;
-} __attribute__((packed));
-
-/* DRP notification. [WHCI] section 3.1.4.9. */
-struct uwb_rc_evt_drp {
-	struct uwb_rceb           rceb;
-	struct uwb_dev_addr       src_addr;
-	u8                        reason;
-	u8                        beacon_slot_number;
-	__le16                    ie_length;
-	u8                        ie_data[];
-} __attribute__((packed));
-
-static inline enum uwb_drp_notif_reason uwb_rc_evt_drp_reason(struct uwb_rc_evt_drp *evt)
-{
-	return evt->reason & 0x0f;
-}
-
-
-/* DRP Availability Change notification. [WHCI] section 3.1.4.8. */
-struct uwb_rc_evt_drp_avail {
-	struct uwb_rceb rceb;
-	DECLARE_BITMAP(bmp, UWB_NUM_MAS);
-} __attribute__((packed));
-
-/* BP switch status notification. [WHCI] section 3.1.4.10. */
-struct uwb_rc_evt_bp_switch_status {
-	struct uwb_rceb rceb;
-	u8 status;
-	u8 slot_offset;
-	__le16 bpst_offset;
-	u8 move_countdown;
-} __attribute__((packed));
-
-/* Command Frame Received notification. [WHCI] section 3.1.4.11. */
-struct uwb_rc_evt_cmd_frame_rcv {
-	struct uwb_rceb rceb;
-	__le16 receive_time;
-	struct uwb_dev_addr wSrcAddr;
-	struct uwb_dev_addr wDstAddr;
-	__le16 control;
-	__le16 reserved;
-	__le16 dataLength;
-	u8 data[];
-} __attribute__((packed));
-
-/* Channel Change IE Received notification. [WHCI] section 3.1.4.12. */
-struct uwb_rc_evt_channel_change_ie_rcv {
-	struct uwb_rceb rceb;
-	struct uwb_dev_addr wSrcAddr;
-	__le16 wIELength;
-	u8 IEData[];
-} __attribute__((packed));
-
-/* DAA Energy Detected notification. [WHCI 0.96] section 3.1.4.14. */
-struct uwb_rc_evt_daa_energy_detected {
-	struct uwb_rceb rceb;
-	__le16 wLength;
-	u8 bandID;
-	u8 reserved;
-	u8 toneBmp[16];
-} __attribute__((packed));
-
-
-/**
- * Radio Control Interface Class Descriptor
- *
- *  WUSB 1.0 [8.6.1.2]
- */
-struct uwb_rc_control_intf_class_desc {
-	u8 bLength;
-	u8 bDescriptorType;
-	__le16 bcdRCIVersion;
-} __attribute__((packed));
-
-#endif /* #ifndef __LINUX__UWB_SPEC_H__ */
diff --git a/include/linux/uwb/umc.h b/include/linux/uwb/umc.h
deleted file mode 100644
index ddbceb39ad15..000000000000
--- a/include/linux/uwb/umc.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * UWB Multi-interface Controller support.
- *
- * Copyright (C) 2007 Cambridge Silicon Radio Ltd.
- *
- * UMC (UWB Multi-interface Controller) capabilities (e.g., radio
- * controller, host controller) are presented as devices on the "umc"
- * bus.
- *
- * The radio controller is not strictly a UMC capability but it's
- * useful to present it as such.
- *
- * References:
- *
- *   [WHCI] Wireless Host Controller Interface Specification for
- *          Certified Wireless Universal Serial Bus, revision 0.95.
- *
- * How this works is kind of convoluted but simple. The whci.ko driver
- * loads when WHCI devices are detected. These WHCI devices expose
- * many devices in the same PCI function (they couldn't have reused
- * functions, no), so for each PCI function that exposes these many
- * devices, whci ceates a umc_dev [whci_probe() -> whci_add_cap()]
- * with umc_device_create() and adds it to the bus with
- * umc_device_register().
- *
- * umc_device_register() calls device_register() which will push the
- * bus management code to load your UMC driver's somehting_probe()
- * that you have registered for that capability code.
- *
- * Now when the WHCI device is removed, whci_remove() will go over
- * each umc_dev assigned to each of the PCI function's capabilities
- * and through whci_del_cap() call umc_device_unregister() each
- * created umc_dev. Of course, if you are bound to the device, your
- * driver's something_remove() will be called.
- */
-
-#ifndef _LINUX_UWB_UMC_H_
-#define _LINUX_UWB_UMC_H_
-
-#include <linux/device.h>
-#include <linux/pci.h>
-
-/*
- * UMC capability IDs.
- *
- * 0x00 is reserved so use it for the radio controller device.
- *
- * [WHCI] table 2-8
- */
-#define UMC_CAP_ID_WHCI_RC      0x00 /* radio controller */
-#define UMC_CAP_ID_WHCI_WUSB_HC 0x01 /* WUSB host controller */
-
-/**
- * struct umc_dev - UMC capability device
- *
- * @version:  version of the specification this capability conforms to.
- * @cap_id:   capability ID.
- * @bar:      PCI Bar (64 bit) where the resource lies
- * @resource: register space resource.
- * @irq:      interrupt line.
- */
-struct umc_dev {
-	u16		version;
-	u8		cap_id;
-	u8		bar;
-	struct resource resource;
-	unsigned	irq;
-	struct device	dev;
-};
-
-#define to_umc_dev(d) container_of(d, struct umc_dev, dev)
-
-/**
- * struct umc_driver - UMC capability driver
- * @cap_id: supported capability ID.
- * @match: driver specific capability matching function.
- * @match_data: driver specific data for match() (e.g., a
- * table of pci_device_id's if umc_match_pci_id() is used).
- */
-struct umc_driver {
-	char *name;
-	u8 cap_id;
-	int (*match)(struct umc_driver *, struct umc_dev *);
-	const void *match_data;
-
-	int  (*probe)(struct umc_dev *);
-	void (*remove)(struct umc_dev *);
-	int  (*pre_reset)(struct umc_dev *);
-	int  (*post_reset)(struct umc_dev *);
-
-	struct device_driver driver;
-};
-
-#define to_umc_driver(d) container_of(d, struct umc_driver, driver)
-
-extern struct bus_type umc_bus_type;
-
-struct umc_dev *umc_device_create(struct device *parent, int n);
-int __must_check umc_device_register(struct umc_dev *umc);
-void umc_device_unregister(struct umc_dev *umc);
-
-int __must_check __umc_driver_register(struct umc_driver *umc_drv,
-				       struct module *mod,
-				       const char *mod_name);
-
-/**
- * umc_driver_register - register a UMC capabiltity driver.
- * @umc_drv:  pointer to the driver.
- */
-#define umc_driver_register(umc_drv) \
-	__umc_driver_register(umc_drv, THIS_MODULE, KBUILD_MODNAME)
-
-void umc_driver_unregister(struct umc_driver *umc_drv);
-
-/*
- * Utility function you can use to match (umc_driver->match) against a
- * null-terminated array of 'struct pci_device_id' in
- * umc_driver->match_data.
- */
-int umc_match_pci_id(struct umc_driver *umc_drv, struct umc_dev *umc);
-
-/**
- * umc_parent_pci_dev - return the UMC's parent PCI device or NULL if none
- * @umc_dev: UMC device whose parent PCI device we are looking for
- *
- * DIRTY!!! DON'T RELY ON THIS
- *
- * FIXME: This is as dirty as it gets, but we need some way to check
- * the correct type of umc_dev->parent (so that for example, we can
- * cast to pci_dev). Casting to pci_dev is necessary because at some
- * point we need to request resources from the device. Mapping is
- * easily over come (ioremap and stuff are bus agnostic), but hooking
- * up to some error handlers (such as pci error handlers) might need
- * this.
- *
- * THIS might (probably will) be removed in the future, so don't count
- * on it.
- */
-static inline struct pci_dev *umc_parent_pci_dev(struct umc_dev *umc_dev)
-{
-	struct pci_dev *pci_dev = NULL;
-	if (dev_is_pci(umc_dev->dev.parent))
-		pci_dev = to_pci_dev(umc_dev->dev.parent);
-	return pci_dev;
-}
-
-/**
- * umc_dev_get() - reference a UMC device.
- * @umc_dev: Pointer to UMC device.
- *
- * NOTE: we are assuming in this whole scheme that the parent device
- *       is referenced at _probe() time and unreferenced at _remove()
- *       time by the parent's subsystem.
- */
-static inline struct umc_dev *umc_dev_get(struct umc_dev *umc_dev)
-{
-	get_device(&umc_dev->dev);
-	return umc_dev;
-}
-
-/**
- * umc_dev_put() - unreference a UMC device.
- * @umc_dev: Pointer to UMC device.
- */
-static inline void umc_dev_put(struct umc_dev *umc_dev)
-{
-	put_device(&umc_dev->dev);
-}
-
-/**
- * umc_set_drvdata - set UMC device's driver data.
- * @umc_dev: Pointer to UMC device.
- * @data:    Data to set.
- */
-static inline void umc_set_drvdata(struct umc_dev *umc_dev, void *data)
-{
-	dev_set_drvdata(&umc_dev->dev, data);
-}
-
-/**
- * umc_get_drvdata - recover UMC device's driver data.
- * @umc_dev: Pointer to UMC device.
- */
-static inline void *umc_get_drvdata(struct umc_dev *umc_dev)
-{
-	return dev_get_drvdata(&umc_dev->dev);
-}
-
-int umc_controller_reset(struct umc_dev *umc);
-
-#endif /* #ifndef _LINUX_UWB_UMC_H_ */
diff --git a/include/linux/uwb/whci.h b/include/linux/uwb/whci.h
deleted file mode 100644
index 1a5c2cc2b008..000000000000
--- a/include/linux/uwb/whci.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Wireless Host Controller Interface for Ultra-Wide-Band and Wireless USB
- *
- * Copyright (C) 2005-2006 Intel Corporation
- * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
- *
- * References:
- *   [WHCI] Wireless Host Controller Interface Specification for
- *          Certified Wireless Universal Serial Bus, revision 0.95.
- */
-#ifndef _LINUX_UWB_WHCI_H_
-#define _LINUX_UWB_WHCI_H_
-
-#include <linux/pci.h>
-
-/*
- * UWB interface capability registers (offsets from UWBBASE)
- *
- * [WHCI] section 2.2
- */
-#define UWBCAPINFO	0x00 /* == UWBCAPDATA(0) */
-#  define UWBCAPINFO_TO_N_CAPS(c)	(((c) >> 0)  & 0xFull)
-#define UWBCAPDATA(n)	(8*(n))
-#  define UWBCAPDATA_TO_VERSION(c)	(((c) >> 32) & 0xFFFFull)
-#  define UWBCAPDATA_TO_OFFSET(c)	(((c) >> 18) & 0x3FFFull)
-#  define UWBCAPDATA_TO_BAR(c)		(((c) >> 16) & 0x3ull)
-#  define UWBCAPDATA_TO_SIZE(c)		((((c) >> 8) & 0xFFull) * sizeof(u32))
-#  define UWBCAPDATA_TO_CAP_ID(c)	(((c) >> 0)  & 0xFFull)
-
-/* Size of the WHCI capability data (including the RC capability) for
-   a device with n capabilities. */
-#define UWBCAPDATA_SIZE(n) (8 + 8*(n))
-
-
-/*
- * URC registers (offsets from URCBASE)
- *
- * [WHCI] section 2.3
- */
-#define URCCMD		0x00
-#  define URCCMD_RESET		(1 << 31)  /* UMC Hardware reset */
-#  define URCCMD_RS		(1 << 30)  /* Run/Stop */
-#  define URCCMD_EARV		(1 << 29)  /* Event Address Register Valid */
-#  define URCCMD_ACTIVE		(1 << 15)  /* Command is active */
-#  define URCCMD_IWR		(1 << 14)  /* Interrupt When Ready */
-#  define URCCMD_SIZE_MASK	0x00000fff /* Command size mask */
-#define URCSTS		0x04
-#  define URCSTS_EPS		(1 << 17)  /* Event Processing Status */
-#  define URCSTS_HALTED		(1 << 16)  /* RC halted */
-#  define URCSTS_HSE		(1 << 10)  /* Host System Error...fried */
-#  define URCSTS_ER		(1 <<  9)  /* Event Ready */
-#  define URCSTS_RCI		(1 <<  8)  /* Ready for Command Interrupt */
-#  define URCSTS_INT_MASK	0x00000700 /* URC interrupt sources */
-#  define URCSTS_ISI		0x000000ff /* Interrupt Source Identification */
-#define URCINTR		0x08
-#  define URCINTR_EN_ALL	0x000007ff /* Enable all interrupt sources */
-#define URCCMDADDR	0x10
-#define URCEVTADDR	0x18
-#  define URCEVTADDR_OFFSET_MASK 0xfff    /* Event pointer offset mask */
-
-
-/** Write 32 bit @value to little endian register at @addr */
-static inline
-void le_writel(u32 value, void __iomem *addr)
-{
-	iowrite32(value, addr);
-}
-
-
-/** Read from 32 bit little endian register at @addr */
-static inline
-u32 le_readl(void __iomem *addr)
-{
-	return ioread32(addr);
-}
-
-
-/** Write 64 bit @value to little endian register at @addr */
-static inline
-void le_writeq(u64 value, void __iomem *addr)
-{
-	iowrite32(value, addr);
-	iowrite32(value >> 32, addr + 4);
-}
-
-
-/** Read from 64 bit little endian register at @addr */
-static inline
-u64 le_readq(void __iomem *addr)
-{
-	u64 value;
-	value  = ioread32(addr);
-	value |= (u64)ioread32(addr + 4) << 32;
-	return value;
-}
-
-extern int whci_wait_for(struct device *dev, u32 __iomem *reg,
-			 u32 mask, u32 result,
-			 unsigned long max_ms,  const char *tag);
-
-#endif /* #ifndef _LINUX_UWB_WHCI_H_ */
-- 
cgit v1.2.3


From e57d143091f1c0b1a98140a4d2e63e113afb62c0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 8 Aug 2019 08:47:14 +0200
Subject: mutex: Fix up mutex_waiter usage

The patch moving bits into mutex.c was a little too much; by also
moving struct mutex_waiter a few less common CONFIGs would no longer
build.

Fixes: 5f35d5a66b3e ("locking/mutex: Make __mutex_owner static to mutex.c")
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/mutex.h  | 13 +++++++++++++
 kernel/locking/mutex.c | 13 -------------
 kernel/locking/mutex.h |  2 --
 3 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index eb8c62aba263..aca8f36dfac9 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -65,6 +65,19 @@ struct mutex {
 #endif
 };
 
+/*
+ * This is the control structure for tasks blocked on mutex,
+ * which resides on the blocked task's kernel stack:
+ */
+struct mutex_waiter {
+	struct list_head	list;
+	struct task_struct	*task;
+	struct ww_acquire_ctx	*ww_ctx;
+#ifdef CONFIG_DEBUG_MUTEXES
+	void			*magic;
+#endif
+};
+
 #ifdef CONFIG_DEBUG_MUTEXES
 
 #define __DEBUG_MUTEX_INITIALIZER(lockname)				\
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index b4bcb0236d7a..468a9b8422e3 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -36,19 +36,6 @@
 # include "mutex.h"
 #endif
 
-/*
- * This is the control structure for tasks blocked on mutex,
- * which resides on the blocked task's kernel stack:
- */
-struct mutex_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-	struct ww_acquire_ctx	*ww_ctx;
-#ifdef CONFIG_DEBUG_MUTEXES
-	void			*magic;
-#endif
-};
-
 void
 __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
 {
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 7cde5c6d414e..1c2287d3fa71 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -19,8 +19,6 @@
 #define debug_mutex_unlock(lock)			do { } while (0)
 #define debug_mutex_init(lock, name, key)		do { } while (0)
 
-struct mutex_waiter;
-
 static inline void
 debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
 {
-- 
cgit v1.2.3


From e55f31a599478fb06a5a5d95e019e963322535cb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 25 Jun 2019 15:36:45 +0200
Subject: efi: x86: move efi_is_table_address() into arch/x86

The function efi_is_table_address() and the associated array of table
pointers is specific to x86. Since we will be adding some more x86
specific tables, let's move this code out of the generic code first.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/x86/include/asm/efi.h  |  5 +++++
 arch/x86/mm/ioremap.c       |  1 +
 arch/x86/platform/efi/efi.c | 33 +++++++++++++++++++++++++++++++++
 drivers/firmware/efi/efi.c  | 33 ---------------------------------
 include/linux/efi.h         |  7 -------
 5 files changed, 39 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 606a4b6a9812..43a82e59c59d 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -242,6 +242,7 @@ static inline bool efi_is_64bit(void)
 		__efi_early()->runtime_services), __VA_ARGS__)
 
 extern bool efi_reboot_required(void);
+extern bool efi_is_table_address(unsigned long phys_addr);
 
 #else
 static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
@@ -249,6 +250,10 @@ static inline bool efi_reboot_required(void)
 {
 	return false;
 }
+static inline  bool efi_is_table_address(unsigned long phys_addr)
+{
+	return false;
+}
 #endif /* CONFIG_EFI */
 
 #endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 63e99f15d7cf..a39dcdb5ae34 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,6 +19,7 @@
 
 #include <asm/set_memory.h>
 #include <asm/e820/api.h>
+#include <asm/efi.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a7189a3b4d70..8d9be97a5607 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -64,6 +64,25 @@ static efi_config_table_type_t arch_tables[] __initdata = {
 	{NULL_GUID, NULL, NULL},
 };
 
+static const unsigned long * const efi_tables[] = {
+	&efi.mps,
+	&efi.acpi,
+	&efi.acpi20,
+	&efi.smbios,
+	&efi.smbios3,
+	&efi.sal_systab,
+	&efi.boot_info,
+	&efi.hcdp,
+	&efi.uga,
+	&efi.uv_systab,
+	&efi.fw_vendor,
+	&efi.runtime,
+	&efi.config_table,
+	&efi.esrt,
+	&efi.properties_table,
+	&efi.mem_attr_table,
+};
+
 u64 efi_setup;		/* efi setup_data physical address */
 
 static int add_efi_memmap __initdata;
@@ -1049,3 +1068,17 @@ static int __init arch_parse_efi_cmdline(char *str)
 	return 0;
 }
 early_param("efi", arch_parse_efi_cmdline);
+
+bool efi_is_table_address(unsigned long phys_addr)
+{
+	unsigned int i;
+
+	if (phys_addr == EFI_INVALID_TABLE_ADDR)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
+		if (*(efi_tables[i]) == phys_addr)
+			return true;
+
+	return false;
+}
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index ad3b1f4866b3..cbdbdbc8f9eb 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -57,25 +57,6 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
-static unsigned long *efi_tables[] = {
-	&efi.mps,
-	&efi.acpi,
-	&efi.acpi20,
-	&efi.smbios,
-	&efi.smbios3,
-	&efi.sal_systab,
-	&efi.boot_info,
-	&efi.hcdp,
-	&efi.uga,
-	&efi.uv_systab,
-	&efi.fw_vendor,
-	&efi.runtime,
-	&efi.config_table,
-	&efi.esrt,
-	&efi.properties_table,
-	&efi.mem_attr_table,
-};
-
 struct mm_struct efi_mm = {
 	.mm_rb			= RB_ROOT,
 	.mm_users		= ATOMIC_INIT(2),
@@ -964,20 +945,6 @@ int efi_status_to_err(efi_status_t status)
 	return err;
 }
 
-bool efi_is_table_address(unsigned long phys_addr)
-{
-	unsigned int i;
-
-	if (phys_addr == EFI_INVALID_TABLE_ADDR)
-		return false;
-
-	for (i = 0; i < ARRAY_SIZE(efi_tables); i++)
-		if (*(efi_tables[i]) == phys_addr)
-			return true;
-
-	return false;
-}
-
 static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
 static struct linux_efi_memreserve *efi_memreserve_root __ro_after_init;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f87fabea4a85..60a6242765d8 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1211,8 +1211,6 @@ static inline bool efi_enabled(int feature)
 	return test_bit(feature, &efi.flags) != 0;
 }
 extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused);
-
-extern bool efi_is_table_address(unsigned long phys_addr);
 #else
 static inline bool efi_enabled(int feature)
 {
@@ -1226,11 +1224,6 @@ efi_capsule_pending(int *reset_type)
 {
 	return false;
 }
-
-static inline bool efi_is_table_address(unsigned long phys_addr)
-{
-	return false;
-}
 #endif
 
 extern int efi_status_to_err(efi_status_t status);
-- 
cgit v1.2.3


From ec7e1605d79d1d469b25e396f2056e42386f512f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 25 Jun 2019 15:48:35 +0200
Subject: efi/x86: move UV_SYSTAB handling into arch/x86

The SGI UV UEFI machines are tightly coupled to the x86 architecture
so there is no need to keep any awareness of its existence in the
generic EFI layer, especially since we already have the infrastructure
to handle arch-specific configuration tables, and were even already
using it to some extent.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/x86/include/asm/uv/uv.h   |  4 +++-
 arch/x86/platform/efi/efi.c    |  6 ++++--
 arch/x86/platform/uv/bios_uv.c | 10 ++++++----
 drivers/firmware/efi/efi.c     |  1 -
 include/linux/efi.h            |  1 -
 5 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index e60c45fd3679..6bc6d89d8e2a 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,10 +12,12 @@ struct mm_struct;
 #ifdef CONFIG_X86_UV
 #include <linux/efi.h>
 
+extern unsigned long uv_systab_phys;
+
 extern enum uv_system_type get_uv_system_type(void);
 static inline bool is_early_uv_system(void)
 {
-	return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab);
+	return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
 }
 extern int is_uv_system(void);
 extern int is_uv_hubless(void);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 8d9be97a5607..9866a3584765 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -59,7 +59,7 @@ static efi_system_table_t efi_systab __initdata;
 
 static efi_config_table_type_t arch_tables[] __initdata = {
 #ifdef CONFIG_X86_UV
-	{UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
+	{UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys},
 #endif
 	{NULL_GUID, NULL, NULL},
 };
@@ -74,7 +74,9 @@ static const unsigned long * const efi_tables[] = {
 	&efi.boot_info,
 	&efi.hcdp,
 	&efi.uga,
-	&efi.uv_systab,
+#ifdef CONFIG_X86_UV
+	&uv_systab_phys,
+#endif
 	&efi.fw_vendor,
 	&efi.runtime,
 	&efi.config_table,
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 7c69652ffeea..c2ee31953372 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -14,6 +14,8 @@
 #include <asm/uv/bios.h>
 #include <asm/uv/uv_hub.h>
 
+unsigned long uv_systab_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
+
 struct uv_systab *uv_systab;
 
 static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3,
@@ -185,13 +187,13 @@ EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
 void uv_bios_init(void)
 {
 	uv_systab = NULL;
-	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
-	    !efi.uv_systab || efi_runtime_disabled()) {
+	if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
+	    !uv_systab_phys || efi_runtime_disabled()) {
 		pr_crit("UV: UVsystab: missing\n");
 		return;
 	}
 
-	uv_systab = ioremap(efi.uv_systab, sizeof(struct uv_systab));
+	uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
 	if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
 		pr_err("UV: UVsystab: bad signature!\n");
 		iounmap(uv_systab);
@@ -203,7 +205,7 @@ void uv_bios_init(void)
 		int size = uv_systab->size;
 
 		iounmap(uv_systab);
-		uv_systab = ioremap(efi.uv_systab, size);
+		uv_systab = ioremap(uv_systab_phys, size);
 		if (!uv_systab) {
 			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
 			return;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index cbdbdbc8f9eb..4dfd873373bd 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -43,7 +43,6 @@ struct efi __read_mostly efi = {
 	.boot_info		= EFI_INVALID_TABLE_ADDR,
 	.hcdp			= EFI_INVALID_TABLE_ADDR,
 	.uga			= EFI_INVALID_TABLE_ADDR,
-	.uv_systab		= EFI_INVALID_TABLE_ADDR,
 	.fw_vendor		= EFI_INVALID_TABLE_ADDR,
 	.runtime		= EFI_INVALID_TABLE_ADDR,
 	.config_table		= EFI_INVALID_TABLE_ADDR,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 60a6242765d8..171bb1005a10 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -988,7 +988,6 @@ extern struct efi {
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
-	unsigned long uv_systab;	/* UV system table */
 	unsigned long fw_vendor;	/* fw_vendor */
 	unsigned long runtime;		/* runtime table */
 	unsigned long config_table;	/* config tables */
-- 
cgit v1.2.3


From 5828efb95bc43ad6a59f05458d3aed9649dd5a63 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 25 Jun 2019 16:28:53 +0200
Subject: efi: ia64: move SAL systab handling out of generic EFI code

The SAL systab is an Itanium specific EFI configuration table, so
move its handling into arch/ia64 where it belongs.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/ia64/include/asm/sal.h       | 1 +
 arch/ia64/include/asm/sn/sn_sal.h | 2 +-
 arch/ia64/kernel/efi.c            | 3 +++
 arch/ia64/kernel/setup.c          | 2 +-
 arch/x86/platform/efi/efi.c       | 1 -
 drivers/firmware/efi/efi.c        | 2 --
 include/linux/efi.h               | 1 -
 7 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h
index 588f33156da6..08f5b6aaed73 100644
--- a/arch/ia64/include/asm/sal.h
+++ b/arch/ia64/include/asm/sal.h
@@ -43,6 +43,7 @@
 #include <asm/pal.h>
 #include <asm/fpu.h>
 
+extern unsigned long sal_systab_phys;
 extern spinlock_t sal_lock;
 
 /* SAL spec _requires_ eight args for each call. */
diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h
index 1f5ff470a5a1..5142c444652d 100644
--- a/arch/ia64/include/asm/sn/sn_sal.h
+++ b/arch/ia64/include/asm/sn/sn_sal.h
@@ -167,7 +167,7 @@
 static inline u32
 sn_sal_rev(void)
 {
-	struct ia64_sal_systab *systab = __va(efi.sal_systab);
+	struct ia64_sal_systab *systab = __va(sal_systab_phys);
 
 	return (u32)(systab->sal_b_rev_major << 8 | systab->sal_b_rev_minor);
 }
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 3795d18276c4..0a34dcc435c6 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -47,8 +47,11 @@
 
 static __initdata unsigned long palo_phys;
 
+unsigned long sal_systab_phys = EFI_INVALID_TABLE_ADDR;
+
 static __initdata efi_config_table_type_t arch_tables[] = {
 	{PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys},
+	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &sal_systab_phys},
 	{NULL_GUID, NULL, 0},
 };
 
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c9cfa760cd57..0e1b4eb149b4 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -572,7 +572,7 @@ setup_arch (char **cmdline_p)
 	find_memory();
 
 	/* process SAL system table: */
-	ia64_sal_init(__va(efi.sal_systab));
+	ia64_sal_init(__va(sal_systab_phys));
 
 #ifdef CONFIG_ITANIUM
 	ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 9866a3584765..6697c109c449 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,7 +70,6 @@ static const unsigned long * const efi_tables[] = {
 	&efi.acpi20,
 	&efi.smbios,
 	&efi.smbios3,
-	&efi.sal_systab,
 	&efi.boot_info,
 	&efi.hcdp,
 	&efi.uga,
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4dfd873373bd..801925c5bcfb 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -39,7 +39,6 @@ struct efi __read_mostly efi = {
 	.acpi20			= EFI_INVALID_TABLE_ADDR,
 	.smbios			= EFI_INVALID_TABLE_ADDR,
 	.smbios3		= EFI_INVALID_TABLE_ADDR,
-	.sal_systab		= EFI_INVALID_TABLE_ADDR,
 	.boot_info		= EFI_INVALID_TABLE_ADDR,
 	.hcdp			= EFI_INVALID_TABLE_ADDR,
 	.uga			= EFI_INVALID_TABLE_ADDR,
@@ -456,7 +455,6 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
 	{HCDP_TABLE_GUID, "HCDP", &efi.hcdp},
 	{MPS_TABLE_GUID, "MPS", &efi.mps},
-	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
 	{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
 	{SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
 	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 171bb1005a10..f88318b85fb0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -984,7 +984,6 @@ extern struct efi {
 	unsigned long acpi20;		/* ACPI table  (ACPI 2.0) */
 	unsigned long smbios;		/* SMBIOS table (32 bit entry point) */
 	unsigned long smbios3;		/* SMBIOS table (64 bit entry point) */
-	unsigned long sal_systab;	/* SAL system table */
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
 	unsigned long uga;		/* UGA table */
-- 
cgit v1.2.3


From 1c5fecb61255aa12a16c4c06335ab68979865914 Mon Sep 17 00:00:00 2001
From: Narendra K <Narendra.K@dell.com>
Date: Wed, 10 Jul 2019 18:59:15 +0000
Subject: efi: Export Runtime Configuration Interface table to sysfs

System firmware advertises the address of the 'Runtime
Configuration Interface table version 2 (RCI2)' via
an EFI Configuration Table entry. This code retrieves the RCI2
table from the address and exports it to sysfs as a binary
attribute 'rci2' under /sys/firmware/efi/tables directory.
The approach adopted is similar to the attribute 'DMI' under
/sys/firmware/dmi/tables.

RCI2 table contains BIOS HII in XML format and is used to populate
BIOS setup page in Dell EMC OpenManage Server Administrator tool.
The BIOS setup page contains BIOS tokens which can be configured.

Signed-off-by: Narendra K <Narendra.K@dell.com>
Reviewed-by: Mario Limonciello <mario.limonciello@dell.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 Documentation/ABI/testing/sysfs-firmware-efi |   8 ++
 arch/x86/platform/efi/efi.c                  |   3 +
 drivers/firmware/efi/Kconfig                 |  13 +++
 drivers/firmware/efi/Makefile                |   1 +
 drivers/firmware/efi/efi.c                   |   3 +
 drivers/firmware/efi/rci2-table.c            | 147 +++++++++++++++++++++++++++
 include/linux/efi.h                          |   5 +
 7 files changed, 180 insertions(+)
 create mode 100644 drivers/firmware/efi/rci2-table.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
index e794eac32a90..5e4d0b27cdfe 100644
--- a/Documentation/ABI/testing/sysfs-firmware-efi
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -28,3 +28,11 @@ Description:	Displays the physical addresses of all EFI Configuration
 		versions are always printed first, i.e. ACPI20 comes
 		before ACPI.
 Users:		dmidecode
+
+What:		/sys/firmware/efi/tables/rci2
+Date:		July 2019
+Contact:	Narendra K <Narendra.K@dell.com>, linux-bugs@dell.com
+Description:	Displays the content of the Runtime Configuration Interface
+		Table version 2 on Dell EMC PowerEdge systems in binary format
+Users:		It is used by Dell EMC OpenManage Server Administrator tool to
+		populate BIOS setup page.
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 6697c109c449..c202e1b07e29 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -82,6 +82,9 @@ static const unsigned long * const efi_tables[] = {
 	&efi.esrt,
 	&efi.properties_table,
 	&efi.mem_attr_table,
+#ifdef CONFIG_EFI_RCI2_TABLE
+	&rci2_table_phys,
+#endif
 };
 
 u64 efi_setup;		/* efi setup_data physical address */
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index d4ea929e8b34..178ee8106828 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -180,6 +180,19 @@ config RESET_ATTACK_MITIGATION
 	  have been evicted, since otherwise it will trigger even on clean
 	  reboots.
 
+config EFI_RCI2_TABLE
+	bool "EFI Runtime Configuration Interface Table Version 2 Support"
+	help
+	  Displays the content of the Runtime Configuration Interface
+	  Table version 2 on Dell EMC PowerEdge systems as a binary
+	  attribute 'rci2' under /sys/firmware/efi/tables directory.
+
+	  RCI2 table contains BIOS HII in XML format and is used to populate
+	  BIOS setup page in Dell EMC OpenManage Server Administrator tool.
+	  The BIOS setup page contains BIOS tokens which can be configured.
+
+	  Say Y here for Dell EMC PowerEdge systems.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index d2d0d2030620..4ac2de4dfa72 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
 obj-$(CONFIG_EFI_TEST)			+= test/
 obj-$(CONFIG_EFI_DEV_PATH_PARSER)	+= dev-path-parser.o
 obj-$(CONFIG_APPLE_PROPERTIES)		+= apple-properties.o
+obj-$(CONFIG_EFI_RCI2_TABLE)		+= rci2-table.o
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 801925c5bcfb..8f1ab04f6743 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -465,6 +465,9 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{LINUX_EFI_TPM_EVENT_LOG_GUID, "TPMEventLog", &efi.tpm_log},
 	{LINUX_EFI_TPM_FINAL_LOG_GUID, "TPMFinalLog", &efi.tpm_final_log},
 	{LINUX_EFI_MEMRESERVE_TABLE_GUID, "MEMRESERVE", &efi.mem_reserve},
+#ifdef CONFIG_EFI_RCI2_TABLE
+	{DELLEMC_EFI_RCI2_TABLE_GUID, NULL, &rci2_table_phys},
+#endif
 	{NULL_GUID, NULL, NULL},
 };
 
diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
new file mode 100644
index 000000000000..3e290f96620a
--- /dev/null
+++ b/drivers/firmware/efi/rci2-table.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Export Runtime Configuration Interface Table Version 2 (RCI2)
+ * to sysfs
+ *
+ * Copyright (C) 2019 Dell Inc
+ * by Narendra K <Narendra.K@dell.com>
+ *
+ * System firmware advertises the address of the RCI2 Table via
+ * an EFI Configuration Table entry. This code retrieves the RCI2
+ * table from the address and exports it to sysfs as a binary
+ * attribute 'rci2' under /sys/firmware/efi/tables directory.
+ */
+
+#include <linux/kobject.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/efi.h>
+#include <linux/types.h>
+#include <linux/io.h>
+
+#define RCI_SIGNATURE	"_RC_"
+
+struct rci2_table_global_hdr {
+	u16 type;
+	u16 resvd0;
+	u16 hdr_len;
+	u8 rci2_sig[4];
+	u16 resvd1;
+	u32 resvd2;
+	u32 resvd3;
+	u8 major_rev;
+	u8 minor_rev;
+	u16 num_of_structs;
+	u32 rci2_len;
+	u16 rci2_chksum;
+} __packed;
+
+static u8 *rci2_base;
+static u32 rci2_table_len;
+unsigned long rci2_table_phys __ro_after_init = EFI_INVALID_TABLE_ADDR;
+
+static ssize_t raw_table_read(struct file *file, struct kobject *kobj,
+			      struct bin_attribute *attr, char *buf,
+			      loff_t pos, size_t count)
+{
+	memcpy(buf, attr->private + pos, count);
+	return count;
+}
+
+static BIN_ATTR(rci2, S_IRUSR, raw_table_read, NULL, 0);
+
+static u16 checksum(void)
+{
+	u8 len_is_odd = rci2_table_len % 2;
+	u32 chksum_len = rci2_table_len;
+	u16 *base = (u16 *)rci2_base;
+	u8 buf[2] = {0};
+	u32 offset = 0;
+	u16 chksum = 0;
+
+	if (len_is_odd)
+		chksum_len -= 1;
+
+	while (offset < chksum_len) {
+		chksum += *base;
+		offset += 2;
+		base++;
+	}
+
+	if (len_is_odd) {
+		buf[0] = *(u8 *)base;
+		chksum += *(u16 *)(buf);
+	}
+
+	return chksum;
+}
+
+int __init efi_rci2_sysfs_init(void)
+{
+	struct kobject *tables_kobj;
+	int ret = -ENOMEM;
+
+	rci2_base = memremap(rci2_table_phys,
+			     sizeof(struct rci2_table_global_hdr),
+			     MEMREMAP_WB);
+	if (!rci2_base) {
+		pr_debug("RCI2 table init failed - could not map RCI2 table\n");
+		goto err;
+	}
+
+	if (strncmp(rci2_base +
+		    offsetof(struct rci2_table_global_hdr, rci2_sig),
+		    RCI_SIGNATURE, 4)) {
+		pr_debug("RCI2 table init failed - incorrect signature\n");
+		ret = -ENODEV;
+		goto err_unmap;
+	}
+
+	rci2_table_len = *(u32 *)(rci2_base +
+				  offsetof(struct rci2_table_global_hdr,
+				  rci2_len));
+
+	memunmap(rci2_base);
+
+	if (!rci2_table_len) {
+		pr_debug("RCI2 table init failed - incorrect table length\n");
+		goto err;
+	}
+
+	rci2_base = memremap(rci2_table_phys, rci2_table_len, MEMREMAP_WB);
+	if (!rci2_base) {
+		pr_debug("RCI2 table - could not map RCI2 table\n");
+		goto err;
+	}
+
+	if (checksum() != 0) {
+		pr_debug("RCI2 table - incorrect checksum\n");
+		ret = -ENODEV;
+		goto err_unmap;
+	}
+
+	tables_kobj = kobject_create_and_add("tables", efi_kobj);
+	if (!tables_kobj) {
+		pr_debug("RCI2 table - tables_kobj creation failed\n");
+		goto err_unmap;
+	}
+
+	bin_attr_rci2.size = rci2_table_len;
+	bin_attr_rci2.private = rci2_base;
+	ret = sysfs_create_bin_file(tables_kobj, &bin_attr_rci2);
+	if (ret != 0) {
+		pr_debug("RCI2 table - rci2 sysfs bin file creation failed\n");
+		kobject_del(tables_kobj);
+		kobject_put(tables_kobj);
+		goto err_unmap;
+	}
+
+	return 0;
+
+ err_unmap:
+	memunmap(rci2_base);
+ err:
+	pr_debug("RCI2 table - sysfs initialization failed\n");
+	return ret;
+}
+late_initcall(efi_rci2_sysfs_init);
diff --git a/include/linux/efi.h b/include/linux/efi.h
index f88318b85fb0..bd3837022307 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -692,6 +692,9 @@ void efi_native_runtime_setup(void);
 #define LINUX_EFI_TPM_FINAL_LOG_GUID		EFI_GUID(0x1e2ed096, 0x30e2, 0x4254,  0xbd, 0x89, 0x86, 0x3b, 0xbe, 0xf8, 0x23, 0x25)
 #define LINUX_EFI_MEMRESERVE_TABLE_GUID		EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5,  0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
 
+/* OEM GUIDs */
+#define DELLEMC_EFI_RCI2_TABLE_GUID		EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
@@ -1713,6 +1716,8 @@ struct efi_tcg2_final_events_table {
 };
 extern int efi_tpm_final_log_size;
 
+extern unsigned long rci2_table_phys;
+
 /*
  * efi_runtime_service() function identifiers.
  * "NONE" is used by efi_recover_from_page_fault() to check if the page
-- 
cgit v1.2.3


From ac9eafbe930abb589e9289842a99cc575cadb854 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 1 Aug 2019 19:31:10 +0200
Subject: ACPI: PM: s2idle: Execute LPS0 _DSM functions with suspended devices

According to Section 3.5 of the "Intel Low Power S0 Idle" document [1],
Function 5 of the LPS0 _DSM is expected to be invoked when the system
configuration matches the criteria for entering the target low-power
state of the platform.  In particular, this means that all devices
should be suspended and in low-power states already when that function
is invoked.

This is not the case currently, however, because Function 5 of the
LPS0 _DSM is invoked by it before the "noirq" phase of device suspend,
which means that some devices may not have been put into low-power
states yet at that point.  That is a consequence of the previous
design of the suspend-to-idle flow that allowed the "noirq" phase of
device suspend and the "noirq" phase of device resume to be carried
out for multiple times while "suspended" (if any spurious wakeup
events were detected) and the point of the LPS0 _DSM Function 5
invocation was chosen so as to call it (and LPS0 _DSM Function 6
analogously) once per suspend-resume cycle (regardless of how many
times the "noirq" phases of device suspend and resume were carried
out while "suspended").

Now that the suspend-to-idle flow has been redesigned to carry out
the "noirq" phases of device suspend and resume once in each cycle,
the code can be reordered to follow the specification that it is
based on more closely.

For this purpose, add ->prepare_late and ->restore_early platform
callbacks for suspend-to-idle, to be executed, respectively, after
the "noirq" phase of suspending devices and before the "noirq"
phase of resuming them and make ACPI use them for the invocation
of LPS0 _DSM functions as appropriate.

While at it, move the LPS0 entry requirements check to be made
before invoking Functions 3 and 5 of the LPS0 _DSM (also once
per cycle) as follows from the specification [1].

Link: https://uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf # [1]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
 drivers/acpi/sleep.c    | 36 ++++++++++++++++++++++++------------
 include/linux/suspend.h |  2 ++
 kernel/power/suspend.c  | 12 +++++++++---
 3 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 864bb18d3a5d..8f7e95f97e1f 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -954,11 +954,6 @@ static int acpi_s2idle_begin(void)
 
 static int acpi_s2idle_prepare(void)
 {
-	if (lps0_device_handle && !sleep_no_lps0) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
-	}
-
 	if (acpi_sci_irq_valid())
 		enable_irq_wake(acpi_sci_irq);
 
@@ -972,11 +967,22 @@ static int acpi_s2idle_prepare(void)
 	return 0;
 }
 
-static void acpi_s2idle_wake(void)
+static int acpi_s2idle_prepare_late(void)
 {
-	if (lps0_device_handle && !sleep_no_lps0 && pm_debug_messages_on)
+	if (!lps0_device_handle || sleep_no_lps0)
+		return 0;
+
+	if (pm_debug_messages_on)
 		lpi_check_constraints();
 
+	acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF);
+	acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY);
+
+	return 0;
+}
+
+static void acpi_s2idle_wake(void)
+{
 	/*
 	 * If IRQD_WAKEUP_ARMED is set for the SCI at this point, the SCI has
 	 * not triggered while suspended, so bail out.
@@ -1011,6 +1017,15 @@ static void acpi_s2idle_wake(void)
 	rearm_wake_irq(acpi_sci_irq);
 }
 
+static void acpi_s2idle_restore_early(void)
+{
+	if (!lps0_device_handle || sleep_no_lps0)
+		return;
+
+	acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
+	acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
+}
+
 static void acpi_s2idle_restore(void)
 {
 	s2idle_wakeup = false;
@@ -1021,11 +1036,6 @@ static void acpi_s2idle_restore(void)
 
 	if (acpi_sci_irq_valid())
 		disable_irq_wake(acpi_sci_irq);
-
-	if (lps0_device_handle && !sleep_no_lps0) {
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT);
-		acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON);
-	}
 }
 
 static void acpi_s2idle_end(void)
@@ -1036,7 +1046,9 @@ static void acpi_s2idle_end(void)
 static const struct platform_s2idle_ops acpi_s2idle_ops = {
 	.begin = acpi_s2idle_begin,
 	.prepare = acpi_s2idle_prepare,
+	.prepare_late = acpi_s2idle_prepare_late,
 	.wake = acpi_s2idle_wake,
+	.restore_early = acpi_s2idle_restore_early,
 	.restore = acpi_s2idle_restore,
 	.end = acpi_s2idle_end,
 };
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f0c4a8445140..6fc8843f1c9e 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -190,7 +190,9 @@ struct platform_suspend_ops {
 struct platform_s2idle_ops {
 	int (*begin)(void);
 	int (*prepare)(void);
+	int (*prepare_late)(void);
 	void (*wake)(void);
+	void (*restore_early)(void);
 	void (*restore)(void);
 	void (*end)(void);
 };
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 2b6057853b33..ed9ddef12b13 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -253,13 +253,19 @@ static int platform_suspend_prepare_late(suspend_state_t state)
 
 static int platform_suspend_prepare_noirq(suspend_state_t state)
 {
-	return state != PM_SUSPEND_TO_IDLE && suspend_ops->prepare_late ?
-		suspend_ops->prepare_late() : 0;
+	if (state == PM_SUSPEND_TO_IDLE) {
+		if (s2idle_ops && s2idle_ops->prepare_late)
+			return s2idle_ops->prepare_late();
+	}
+	return suspend_ops->prepare_late ? suspend_ops->prepare_late() : 0;
 }
 
 static void platform_resume_noirq(suspend_state_t state)
 {
-	if (state != PM_SUSPEND_TO_IDLE && suspend_ops->wake)
+	if (state == PM_SUSPEND_TO_IDLE) {
+		if (s2idle_ops && s2idle_ops->restore_early)
+			s2idle_ops->restore_early();
+	} else if (suspend_ops->wake)
 		suspend_ops->wake();
 }
 
-- 
cgit v1.2.3


From 619a1f195f93276dc8c6e33fe057e007adc9c288 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2019 20:02:31 +0200
Subject: ALSA: hda: Remove page allocation redirection

The HD-audio core allocates and releases pages via driver's specific
dma_alloc_pages and dma_free_pages ops defined in bus->io_ops.  This
was because some platforms require the uncached pages and the handling
of page flags had to be done locally in the driver code.

Since the recent change in ALSA core memory allocator, we can simply
pass SNDRV_DMA_TYPE_DEV_UC for the uncached pages, and the only
difference became about this type to be passed to the core allocator.
That is, it's good time for cleaning up the mess.

This patch changes the allocation code in HD-audio core to call the
core allocator directly so that we get rid of dma_alloc_pages and
dma_free_pages io_ops.  If a driver needs the uncached pages, it has
to set bus->dma_type right after the bus initialization.

This is merely a code refactoring and shouldn't bring any behavior
changes.

Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h                |  6 +-----
 sound/hda/ext/hdac_ext_bus.c           | 13 -------------
 sound/hda/hdac_bus.c                   |  1 +
 sound/hda/hdac_controller.c            | 18 +++++++++---------
 sound/hda/hdac_stream.c                |  8 ++++----
 sound/pci/hda/hda_intel.c              | 24 ++++--------------------
 sound/pci/hda/hda_tegra.c              | 16 ----------------
 sound/soc/intel/skylake/skl-messages.c | 15 ++-------------
 sound/soc/sof/intel/hda-bus.c          | 14 --------------
 9 files changed, 21 insertions(+), 94 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 612a17e375d0..20549def0a27 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -264,11 +264,6 @@ struct hdac_io_ops {
 	u16 (*reg_readw)(u16 __iomem *addr);
 	void (*reg_writeb)(u8 value, u8 __iomem *addr);
 	u8 (*reg_readb)(u8 __iomem *addr);
-	/* Allocation ops */
-	int (*dma_alloc_pages)(struct hdac_bus *bus, int type, size_t size,
-			       struct snd_dma_buffer *buf);
-	void (*dma_free_pages)(struct hdac_bus *bus,
-			       struct snd_dma_buffer *buf);
 };
 
 #define HDA_UNSOL_QUEUE_SIZE	64
@@ -344,6 +339,7 @@ struct hdac_bus {
 	/* CORB/RIRB and position buffers */
 	struct snd_dma_buffer rb;
 	struct snd_dma_buffer posbuf;
+	int dma_type;			/* SNDRV_DMA_TYPE_XXX for CORB/RIRB */
 
 	/* hdac_stream linked list */
 	struct list_head stream_list;
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 4f9f1d2a2ec5..7825b74068f4 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -47,17 +47,6 @@ static u8 hdac_ext_readb(u8 __iomem *addr)
 	return readb(addr);
 }
 
-static int hdac_ext_dma_alloc_pages(struct hdac_bus *bus, int type,
-			   size_t size, struct snd_dma_buffer *buf)
-{
-	return snd_dma_alloc_pages(type, bus->dev, size, buf);
-}
-
-static void hdac_ext_dma_free_pages(struct hdac_bus *bus, struct snd_dma_buffer *buf)
-{
-	snd_dma_free_pages(buf);
-}
-
 static const struct hdac_io_ops hdac_ext_default_io = {
 	.reg_writel = hdac_ext_writel,
 	.reg_readl = hdac_ext_readl,
@@ -65,8 +54,6 @@ static const struct hdac_io_ops hdac_ext_default_io = {
 	.reg_readw = hdac_ext_readw,
 	.reg_writeb = hdac_ext_writeb,
 	.reg_readb = hdac_ext_readb,
-	.dma_alloc_pages = hdac_ext_dma_alloc_pages,
-	.dma_free_pages = hdac_ext_dma_free_pages,
 };
 
 /**
diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c
index 14e57ffd5bc1..00ea12e67dc8 100644
--- a/sound/hda/hdac_bus.c
+++ b/sound/hda/hdac_bus.c
@@ -34,6 +34,7 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
 	else
 		bus->ops = &default_ops;
 	bus->io_ops = io_ops;
+	bus->dma_type = SNDRV_DMA_TYPE_DEV;
 	INIT_LIST_HEAD(&bus->stream_list);
 	INIT_LIST_HEAD(&bus->codec_list);
 	INIT_WORK(&bus->unsol_work, snd_hdac_bus_process_unsol_events);
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index 3b0110545070..7e7be8e4dcf9 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -575,12 +575,13 @@ int snd_hdac_bus_alloc_stream_pages(struct hdac_bus *bus)
 {
 	struct hdac_stream *s;
 	int num_streams = 0;
+	int dma_type = bus->dma_type ? bus->dma_type : SNDRV_DMA_TYPE_DEV;
 	int err;
 
 	list_for_each_entry(s, &bus->stream_list, list) {
 		/* allocate memory for the BDL for each stream */
-		err = bus->io_ops->dma_alloc_pages(bus, SNDRV_DMA_TYPE_DEV,
-						   BDL_SIZE, &s->bdl);
+		err = snd_dma_alloc_pages(dma_type, bus->dev,
+					  BDL_SIZE, &s->bdl);
 		num_streams++;
 		if (err < 0)
 			return -ENOMEM;
@@ -589,16 +590,15 @@ int snd_hdac_bus_alloc_stream_pages(struct hdac_bus *bus)
 	if (WARN_ON(!num_streams))
 		return -EINVAL;
 	/* allocate memory for the position buffer */
-	err = bus->io_ops->dma_alloc_pages(bus, SNDRV_DMA_TYPE_DEV,
-					   num_streams * 8, &bus->posbuf);
+	err = snd_dma_alloc_pages(dma_type, bus->dev,
+				  num_streams * 8, &bus->posbuf);
 	if (err < 0)
 		return -ENOMEM;
 	list_for_each_entry(s, &bus->stream_list, list)
 		s->posbuf = (__le32 *)(bus->posbuf.area + s->index * 8);
 
 	/* single page (at least 4096 bytes) must suffice for both ringbuffes */
-	return bus->io_ops->dma_alloc_pages(bus, SNDRV_DMA_TYPE_DEV,
-					    PAGE_SIZE, &bus->rb);
+	return snd_dma_alloc_pages(dma_type, bus->dev, PAGE_SIZE, &bus->rb);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_alloc_stream_pages);
 
@@ -612,12 +612,12 @@ void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus)
 
 	list_for_each_entry(s, &bus->stream_list, list) {
 		if (s->bdl.area)
-			bus->io_ops->dma_free_pages(bus, &s->bdl);
+			snd_dma_free_pages(&s->bdl);
 	}
 
 	if (bus->rb.area)
-		bus->io_ops->dma_free_pages(bus, &bus->rb);
+		snd_dma_free_pages(&bus->rb);
 	if (bus->posbuf.area)
-		bus->io_ops->dma_free_pages(bus, &bus->posbuf);
+		snd_dma_free_pages(&bus->posbuf);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_free_stream_pages);
diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
index 55d53b89ac21..fc68d4ce0a37 100644
--- a/sound/hda/hdac_stream.c
+++ b/sound/hda/hdac_stream.c
@@ -680,8 +680,8 @@ int snd_hdac_dsp_prepare(struct hdac_stream *azx_dev, unsigned int format,
 	azx_dev->locked = true;
 	spin_unlock_irq(&bus->reg_lock);
 
-	err = bus->io_ops->dma_alloc_pages(bus, SNDRV_DMA_TYPE_DEV_SG,
-					   byte_size, bufp);
+	err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, bus->dev,
+				  byte_size, bufp);
 	if (err < 0)
 		goto err_alloc;
 
@@ -707,7 +707,7 @@ int snd_hdac_dsp_prepare(struct hdac_stream *azx_dev, unsigned int format,
 	return azx_dev->stream_tag;
 
  error:
-	bus->io_ops->dma_free_pages(bus, bufp);
+	snd_dma_free_pages(bufp);
  err_alloc:
 	spin_lock_irq(&bus->reg_lock);
 	azx_dev->locked = false;
@@ -754,7 +754,7 @@ void snd_hdac_dsp_cleanup(struct hdac_stream *azx_dev,
 	azx_dev->period_bytes = 0;
 	azx_dev->format_val = 0;
 
-	bus->io_ops->dma_free_pages(bus, dmab);
+	snd_dma_free_pages(dmab);
 	dmab->area = NULL;
 
 	spin_lock_irq(&bus->reg_lock);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index cb8b0945547c..3bb4c26f2799 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1694,6 +1694,10 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
 		return err;
 	}
 
+	/* use the non-cached pages in non-snoop mode */
+	if (!azx_snoop(chip))
+		azx_bus(chip)->dma_type = SNDRV_DMA_TYPE_DEV_UC;
+
 	/* Workaround for a communication error on CFL (bko#199007) and CNL */
 	if (IS_CFL(pci) || IS_CNL(pci))
 		azx_bus(chip)->polling_mode = 1;
@@ -1979,24 +1983,6 @@ static int disable_msi_reset_irq(struct azx *chip)
 	return 0;
 }
 
-/* DMA page allocation helpers.  */
-static int dma_alloc_pages(struct hdac_bus *bus,
-			   int type,
-			   size_t size,
-			   struct snd_dma_buffer *buf)
-{
-	struct azx *chip = bus_to_azx(bus);
-
-	if (!azx_snoop(chip) && type == SNDRV_DMA_TYPE_DEV)
-		type = SNDRV_DMA_TYPE_DEV_UC;
-	return snd_dma_alloc_pages(type, bus->dev, size, buf);
-}
-
-static void dma_free_pages(struct hdac_bus *bus, struct snd_dma_buffer *buf)
-{
-	snd_dma_free_pages(buf);
-}
-
 static void pcm_mmap_prepare(struct snd_pcm_substream *substream,
 			     struct vm_area_struct *area)
 {
@@ -2015,8 +2001,6 @@ static const struct hdac_io_ops pci_hda_io_ops = {
 	.reg_readw = pci_azx_readw,
 	.reg_writeb = pci_azx_writeb,
 	.reg_readb = pci_azx_readb,
-	.dma_alloc_pages = dma_alloc_pages,
-	.dma_free_pages = dma_free_pages,
 };
 
 static const struct hda_controller_ops pci_hda_ops = {
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index 7dbe9f39fc79..ba414cc639f1 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -75,20 +75,6 @@ MODULE_PARM_DESC(power_save,
 #define power_save	0
 #endif
 
-/*
- * DMA page allocation ops.
- */
-static int dma_alloc_pages(struct hdac_bus *bus, int type, size_t size,
-			   struct snd_dma_buffer *buf)
-{
-	return snd_dma_alloc_pages(type, bus->dev, size, buf);
-}
-
-static void dma_free_pages(struct hdac_bus *bus, struct snd_dma_buffer *buf)
-{
-	snd_dma_free_pages(buf);
-}
-
 /*
  * Register access ops. Tegra HDA register access is DWORD only.
  */
@@ -153,8 +139,6 @@ static const struct hdac_io_ops hda_tegra_io_ops = {
 	.reg_readw = hda_tegra_readw,
 	.reg_writeb = hda_tegra_writeb,
 	.reg_readb = hda_tegra_readb,
-	.dma_alloc_pages = dma_alloc_pages,
-	.dma_free_pages = dma_free_pages,
 };
 
 static const struct hda_controller_ops hda_tegra_ops; /* nothing special */
diff --git a/sound/soc/intel/skylake/skl-messages.c b/sound/soc/intel/skylake/skl-messages.c
index febc070839e0..c6f9e05c929e 100644
--- a/sound/soc/intel/skylake/skl-messages.c
+++ b/sound/soc/intel/skylake/skl-messages.c
@@ -25,23 +25,12 @@
 static int skl_alloc_dma_buf(struct device *dev,
 		struct snd_dma_buffer *dmab, size_t size)
 {
-	struct hdac_bus *bus = dev_get_drvdata(dev);
-
-	if (!bus)
-		return -ENODEV;
-
-	return  bus->io_ops->dma_alloc_pages(bus, SNDRV_DMA_TYPE_DEV, size, dmab);
+	return snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, dev, size, dmab);
 }
 
 static int skl_free_dma_buf(struct device *dev, struct snd_dma_buffer *dmab)
 {
-	struct hdac_bus *bus = dev_get_drvdata(dev);
-
-	if (!bus)
-		return -ENODEV;
-
-	bus->io_ops->dma_free_pages(bus, dmab);
-
+	snd_dma_free_pages(dmab);
 	return 0;
 }
 
diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c
index a7e6d8227df6..0bc93fa06b5b 100644
--- a/sound/soc/sof/intel/hda-bus.c
+++ b/sound/soc/sof/intel/hda-bus.c
@@ -51,18 +51,6 @@ static u8 sof_hda_readb(u8 __iomem *addr)
 	return readb(addr);
 }
 
-static int sof_hda_dma_alloc_pages(struct hdac_bus *bus, int type,
-				   size_t size, struct snd_dma_buffer *buf)
-{
-	return snd_dma_alloc_pages(type, bus->dev, size, buf);
-}
-
-static void sof_hda_dma_free_pages(struct hdac_bus *bus,
-				   struct snd_dma_buffer *buf)
-{
-	snd_dma_free_pages(buf);
-}
-
 static const struct hdac_io_ops io_ops = {
 	.reg_writel = sof_hda_writel,
 	.reg_readl = sof_hda_readl,
@@ -70,8 +58,6 @@ static const struct hdac_io_ops io_ops = {
 	.reg_readw = sof_hda_readw,
 	.reg_writeb = sof_hda_writeb,
 	.reg_readb = sof_hda_readb,
-	.dma_alloc_pages = sof_hda_dma_alloc_pages,
-	.dma_free_pages = sof_hda_dma_free_pages,
 };
 
 /*
-- 
cgit v1.2.3


From 19abfefd4c7604993d1c31e098a3f48bdafe334d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 7 Aug 2019 20:32:08 +0200
Subject: ALSA: hda: Direct MMIO accesses

HD-audio drivers access to the mmio registers indirectly via the
corresponding bus->io_ops callbacks.  This is because some platform
(notably Tegra SoC) requires the word-aligned access.  But it's rather
a rare case, and other platforms suffer from the penalties by indirect
calls unnecessarily.

This patch is an attempt to optimize and cleanup for this situation.
Now the special aligned access is used only when a new kconfig
CONFIG_SND_HDA_ALIGNED_MMIO is set.  And the HD-audio core itself
provides the aligned MMIO access helpers instead of the driver side.
If Kconfig isn't set (as default), the standard helpers like readl()
or writel() are used directly.

A couple of places in ASoC Intel drivers have the access via io_ops
reg_writel(), and they are replaced with the direct writel() calls.

And now with this patch, the whole bus->io_ops becomes empty, so it's
dropped completely.  The bus initialization functions are changed
accordingly as well to drop the whole bus->io_ops.

Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h        | 63 +++++++++++++++++++-------------------
 include/sound/hdaudio_ext.h    |  1 -
 sound/hda/Kconfig              |  3 ++
 sound/hda/ext/hdac_ext_bus.c   | 47 +----------------------------
 sound/hda/hdac_bus.c           | 35 +++++++++++++++++++---
 sound/pci/hda/Kconfig          |  1 +
 sound/pci/hda/hda_controller.c |  6 ++--
 sound/pci/hda/hda_controller.h |  3 +-
 sound/pci/hda/hda_intel.c      | 47 +----------------------------
 sound/pci/hda/hda_tegra.c      | 68 +-----------------------------------------
 sound/soc/intel/skylake/skl.c  |  7 ++---
 sound/soc/sof/intel/hda-bus.c  | 40 -------------------------
 sound/soc/sof/intel/hda-dsp.c  |  2 +-
 13 files changed, 75 insertions(+), 248 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 20549def0a27..4af4af55e854 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -253,19 +253,6 @@ struct hdac_ext_bus_ops {
 	int (*hdev_detach)(struct hdac_device *hdev);
 };
 
-/*
- * Lowlevel I/O operators
- */
-struct hdac_io_ops {
-	/* mapped register accesses */
-	void (*reg_writel)(u32 value, u32 __iomem *addr);
-	u32 (*reg_readl)(u32 __iomem *addr);
-	void (*reg_writew)(u16 value, u16 __iomem *addr);
-	u16 (*reg_readw)(u16 __iomem *addr);
-	void (*reg_writeb)(u8 value, u8 __iomem *addr);
-	u8 (*reg_readb)(u8 __iomem *addr);
-};
-
 #define HDA_UNSOL_QUEUE_SIZE	64
 #define HDA_MAX_CODECS		8	/* limit by controller side */
 
@@ -299,7 +286,6 @@ struct hdac_rb {
 struct hdac_bus {
 	struct device *dev;
 	const struct hdac_bus_ops *ops;
-	const struct hdac_io_ops *io_ops;
 	const struct hdac_ext_bus_ops *ext_ops;
 
 	/* h/w resources */
@@ -380,8 +366,7 @@ struct hdac_bus {
 };
 
 int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
-		      const struct hdac_bus_ops *ops,
-		      const struct hdac_io_ops *io_ops);
+		      const struct hdac_bus_ops *ops);
 void snd_hdac_bus_exit(struct hdac_bus *bus);
 int snd_hdac_bus_exec_verb(struct hdac_bus *bus, unsigned int addr,
 			   unsigned int cmd, unsigned int *res);
@@ -425,21 +410,38 @@ int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
 int snd_hdac_bus_alloc_stream_pages(struct hdac_bus *bus);
 void snd_hdac_bus_free_stream_pages(struct hdac_bus *bus);
 
+#ifdef CONFIG_SND_HDA_ALIGNED_MMIO
+unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask);
+void snd_hdac_aligned_write(unsigned int val, void __iomem *addr,
+			    unsigned int mask);
+#define snd_hdac_reg_writeb(v, addr)	snd_hdac_aligned_write(v, addr, 0xff)
+#define snd_hdac_reg_writew(v, addr)	snd_hdac_aligned_write(v, addr, 0xffff)
+#define snd_hdac_reg_readb(addr)	snd_hdac_aligned_read(addr, 0xff)
+#define snd_hdac_reg_readw(addr)	snd_hdac_aligned_read(addr, 0xffff)
+#else /* CONFIG_SND_HDA_ALIGNED_MMIO */
+#define snd_hdac_reg_writeb(val, addr)	writeb(val, addr)
+#define snd_hdac_reg_writew(val, addr)	writew(val, addr)
+#define snd_hdac_reg_readb(addr)	readb(addr)
+#define snd_hdac_reg_readw(addr)	readw(addr)
+#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */
+#define snd_hdac_reg_writel(val, addr)	writel(val, addr)
+#define snd_hdac_reg_readl(addr)	readl(addr)
+
 /*
  * macros for easy use
  */
 #define _snd_hdac_chip_writeb(chip, reg, value) \
-	((chip)->io_ops->reg_writeb(value, (chip)->remap_addr + (reg)))
+	snd_hdac_reg_writeb(value, (chip)->remap_addr + (reg))
 #define _snd_hdac_chip_readb(chip, reg) \
-	((chip)->io_ops->reg_readb((chip)->remap_addr + (reg)))
+	snd_hdac_reg_readb((chip)->remap_addr + (reg))
 #define _snd_hdac_chip_writew(chip, reg, value) \
-	((chip)->io_ops->reg_writew(value, (chip)->remap_addr + (reg)))
+	snd_hdac_reg_writew(value, (chip)->remap_addr + (reg))
 #define _snd_hdac_chip_readw(chip, reg) \
-	((chip)->io_ops->reg_readw((chip)->remap_addr + (reg)))
+	snd_hdac_reg_readw((chip)->remap_addr + (reg))
 #define _snd_hdac_chip_writel(chip, reg, value) \
-	((chip)->io_ops->reg_writel(value, (chip)->remap_addr + (reg)))
+	snd_hdac_reg_writel(value, (chip)->remap_addr + (reg))
 #define _snd_hdac_chip_readl(chip, reg) \
-	((chip)->io_ops->reg_readl((chip)->remap_addr + (reg)))
+	snd_hdac_reg_readl((chip)->remap_addr + (reg))
 
 /* read/write a register, pass without AZX_REG_ prefix */
 #define snd_hdac_chip_writel(chip, reg, value) \
@@ -544,24 +546,19 @@ int snd_hdac_get_stream_stripe_ctl(struct hdac_bus *bus,
 /*
  * macros for easy use
  */
-#define _snd_hdac_stream_write(type, dev, reg, value)			\
-	((dev)->bus->io_ops->reg_write ## type(value, (dev)->sd_addr + (reg)))
-#define _snd_hdac_stream_read(type, dev, reg)				\
-	((dev)->bus->io_ops->reg_read ## type((dev)->sd_addr + (reg)))
-
 /* read/write a register, pass without AZX_REG_ prefix */
 #define snd_hdac_stream_writel(dev, reg, value) \
-	_snd_hdac_stream_write(l, dev, AZX_REG_ ## reg, value)
+	snd_hdac_reg_writel(value, (dev)->sd_addr + AZX_REG_ ## reg)
 #define snd_hdac_stream_writew(dev, reg, value) \
-	_snd_hdac_stream_write(w, dev, AZX_REG_ ## reg, value)
+	snd_hdac_reg_writew(value, (dev)->sd_addr + AZX_REG_ ## reg)
 #define snd_hdac_stream_writeb(dev, reg, value) \
-	_snd_hdac_stream_write(b, dev, AZX_REG_ ## reg, value)
+	snd_hdac_reg_writeb(value, (dev)->sd_addr + AZX_REG_ ## reg)
 #define snd_hdac_stream_readl(dev, reg) \
-	_snd_hdac_stream_read(l, dev, AZX_REG_ ## reg)
+	snd_hdac_reg_readl((dev)->sd_addr + AZX_REG_ ## reg)
 #define snd_hdac_stream_readw(dev, reg) \
-	_snd_hdac_stream_read(w, dev, AZX_REG_ ## reg)
+	snd_hdac_reg_readw((dev)->sd_addr + AZX_REG_ ## reg)
 #define snd_hdac_stream_readb(dev, reg) \
-	_snd_hdac_stream_read(b, dev, AZX_REG_ ## reg)
+	snd_hdac_reg_readb((dev)->sd_addr + AZX_REG_ ## reg)
 
 /* update a register, pass without AZX_REG_ prefix */
 #define snd_hdac_stream_updatel(dev, reg, mask, val) \
diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h
index f34aced69ca8..ef88b20c7b0a 100644
--- a/include/sound/hdaudio_ext.h
+++ b/include/sound/hdaudio_ext.h
@@ -6,7 +6,6 @@
 
 int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 		      const struct hdac_bus_ops *ops,
-		      const struct hdac_io_ops *io_ops,
 		      const struct hdac_ext_bus_ops *ext_ops);
 
 void snd_hdac_ext_bus_exit(struct hdac_bus *bus);
diff --git a/sound/hda/Kconfig b/sound/hda/Kconfig
index f6feced15f17..1d475cf3f754 100644
--- a/sound/hda/Kconfig
+++ b/sound/hda/Kconfig
@@ -6,6 +6,9 @@ config SND_HDA_CORE
 config SND_HDA_DSP_LOADER
 	bool
 
+config SND_HDA_ALIGNED_MMIO
+	bool
+
 config SND_HDA_COMPONENT
 	bool
 
diff --git a/sound/hda/ext/hdac_ext_bus.c b/sound/hda/ext/hdac_ext_bus.c
index 7825b74068f4..242306d820ec 100644
--- a/sound/hda/ext/hdac_ext_bus.c
+++ b/sound/hda/ext/hdac_ext_bus.c
@@ -17,67 +17,22 @@
 MODULE_DESCRIPTION("HDA extended core");
 MODULE_LICENSE("GPL v2");
 
-static void hdac_ext_writel(u32 value, u32 __iomem *addr)
-{
-	writel(value, addr);
-}
-
-static u32 hdac_ext_readl(u32 __iomem *addr)
-{
-	return readl(addr);
-}
-
-static void hdac_ext_writew(u16 value, u16 __iomem *addr)
-{
-	writew(value, addr);
-}
-
-static u16 hdac_ext_readw(u16 __iomem *addr)
-{
-	return readw(addr);
-}
-
-static void hdac_ext_writeb(u8 value, u8 __iomem *addr)
-{
-	writeb(value, addr);
-}
-
-static u8 hdac_ext_readb(u8 __iomem *addr)
-{
-	return readb(addr);
-}
-
-static const struct hdac_io_ops hdac_ext_default_io = {
-	.reg_writel = hdac_ext_writel,
-	.reg_readl = hdac_ext_readl,
-	.reg_writew = hdac_ext_writew,
-	.reg_readw = hdac_ext_readw,
-	.reg_writeb = hdac_ext_writeb,
-	.reg_readb = hdac_ext_readb,
-};
-
 /**
  * snd_hdac_ext_bus_init - initialize a HD-audio extended bus
  * @ebus: the pointer to extended bus object
  * @dev: device pointer
  * @ops: bus verb operators
- * @io_ops: lowlevel I/O operators, can be NULL. If NULL core will use
  * default ops
  *
  * Returns 0 if successful, or a negative error code.
  */
 int snd_hdac_ext_bus_init(struct hdac_bus *bus, struct device *dev,
 			const struct hdac_bus_ops *ops,
-			const struct hdac_io_ops *io_ops,
 			const struct hdac_ext_bus_ops *ext_ops)
 {
 	int ret;
 
-	/* check if io ops are provided, if not load the defaults */
-	if (io_ops == NULL)
-		io_ops = &hdac_ext_default_io;
-
-	ret = snd_hdac_bus_init(bus, dev, ops, io_ops);
+	ret = snd_hdac_bus_init(bus, dev, ops);
 	if (ret < 0)
 		return ret;
 
diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c
index 00ea12e67dc8..dc2523ef7d98 100644
--- a/sound/hda/hdac_bus.c
+++ b/sound/hda/hdac_bus.c
@@ -19,13 +19,11 @@ static const struct hdac_bus_ops default_ops = {
  * snd_hdac_bus_init - initialize a HD-audio bas bus
  * @bus: the pointer to bus object
  * @ops: bus verb operators
- * @io_ops: lowlevel I/O operators
  *
  * Returns 0 if successful, or a negative error code.
  */
 int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
-		      const struct hdac_bus_ops *ops,
-		      const struct hdac_io_ops *io_ops)
+		      const struct hdac_bus_ops *ops)
 {
 	memset(bus, 0, sizeof(*bus));
 	bus->dev = dev;
@@ -33,7 +31,6 @@ int snd_hdac_bus_init(struct hdac_bus *bus, struct device *dev,
 		bus->ops = ops;
 	else
 		bus->ops = &default_ops;
-	bus->io_ops = io_ops;
 	bus->dma_type = SNDRV_DMA_TYPE_DEV;
 	INIT_LIST_HEAD(&bus->stream_list);
 	INIT_LIST_HEAD(&bus->codec_list);
@@ -218,3 +215,33 @@ void snd_hdac_bus_remove_device(struct hdac_bus *bus,
 	flush_work(&bus->unsol_work);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_bus_remove_device);
+
+#ifdef CONFIG_SND_HDA_ALIGNED_MMIO
+/* Helpers for aligned read/write of mmio space, for Tegra */
+unsigned int snd_hdac_aligned_read(void __iomem *addr, unsigned int mask)
+{
+	void __iomem *aligned_addr =
+		(void __iomem *)((unsigned long)(addr) & ~0x3);
+	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
+	unsigned int v;
+
+	v = readl(aligned_addr);
+	return (v >> shift) & mask;
+}
+EXPORT_SYMBOL_GPL(snd_hdac_aligned_read);
+
+void snd_hdac_aligned_write(unsigned int val, void __iomem *addr,
+			    unsigned int mask)
+{
+	void __iomem *aligned_addr =
+		(void __iomem *)((unsigned long)(addr) & ~0x3);
+	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
+	unsigned int v;
+
+	v = readl(aligned_addr);
+	v &= ~(mask << shift);
+	v |= val << shift;
+	writel(v, aligned_addr);
+}
+EXPORT_SYMBOL_GPL(snd_hdac_aligned_write);
+#endif /* CONFIG_SND_HDA_ALIGNED_MMIO */
diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig
index 35d934309cb2..82198ea8f7f8 100644
--- a/sound/pci/hda/Kconfig
+++ b/sound/pci/hda/Kconfig
@@ -26,6 +26,7 @@ config SND_HDA_TEGRA
 	tristate "NVIDIA Tegra HD Audio"
 	depends on ARCH_TEGRA
 	select SND_HDA
+	select SND_HDA_ALIGNED_MMIO
 	help
 	  Say Y here to support the HDA controller present in NVIDIA
 	  Tegra SoCs
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index c8d1b4316245..ee5504e2441f 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -1202,14 +1202,12 @@ void snd_hda_bus_reset(struct hda_bus *bus)
 }
 
 /* HD-audio bus initialization */
-int azx_bus_init(struct azx *chip, const char *model,
-		 const struct hdac_io_ops *io_ops)
+int azx_bus_init(struct azx *chip, const char *model)
 {
 	struct hda_bus *bus = &chip->bus;
 	int err;
 
-	err = snd_hdac_bus_init(&bus->core, chip->card->dev, &bus_core_ops,
-				io_ops);
+	err = snd_hdac_bus_init(&bus->core, chip->card->dev, &bus_core_ops);
 	if (err < 0)
 		return err;
 
diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h
index baa15374fbcb..146a71e0d594 100644
--- a/sound/pci/hda/hda_controller.h
+++ b/sound/pci/hda/hda_controller.h
@@ -206,8 +206,7 @@ void azx_stop_chip(struct azx *chip);
 irqreturn_t azx_interrupt(int irq, void *dev_id);
 
 /* Codec interface */
-int azx_bus_init(struct azx *chip, const char *model,
-		 const struct hdac_io_ops *io_ops);
+int azx_bus_init(struct azx *chip, const char *model);
 int azx_probe_codecs(struct azx *chip, unsigned int max_slots);
 int azx_codec_configure(struct azx *chip);
 int azx_init_streams(struct azx *chip);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 3bb4c26f2799..963a92943a6d 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1627,7 +1627,6 @@ static int default_bdl_pos_adj(struct azx *chip)
 /*
  * constructor
  */
-static const struct hdac_io_ops pci_hda_io_ops;
 static const struct hda_controller_ops pci_hda_ops;
 
 static int azx_create(struct snd_card *card, struct pci_dev *pci,
@@ -1687,7 +1686,7 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci,
 	else
 		chip->bdl_pos_adj = bdl_pos_adj[dev];
 
-	err = azx_bus_init(chip, model[dev], &pci_hda_io_ops);
+	err = azx_bus_init(chip, model[dev]);
 	if (err < 0) {
 		kfree(hda);
 		pci_disable_device(pci);
@@ -1932,41 +1931,6 @@ static void azx_firmware_cb(const struct firmware *fw, void *context)
 }
 #endif
 
-/*
- * HDA controller ops.
- */
-
-/* PCI register access. */
-static void pci_azx_writel(u32 value, u32 __iomem *addr)
-{
-	writel(value, addr);
-}
-
-static u32 pci_azx_readl(u32 __iomem *addr)
-{
-	return readl(addr);
-}
-
-static void pci_azx_writew(u16 value, u16 __iomem *addr)
-{
-	writew(value, addr);
-}
-
-static u16 pci_azx_readw(u16 __iomem *addr)
-{
-	return readw(addr);
-}
-
-static void pci_azx_writeb(u8 value, u8 __iomem *addr)
-{
-	writeb(value, addr);
-}
-
-static u8 pci_azx_readb(u8 __iomem *addr)
-{
-	return readb(addr);
-}
-
 static int disable_msi_reset_irq(struct azx *chip)
 {
 	struct hdac_bus *bus = azx_bus(chip);
@@ -1994,15 +1958,6 @@ static void pcm_mmap_prepare(struct snd_pcm_substream *substream,
 #endif
 }
 
-static const struct hdac_io_ops pci_hda_io_ops = {
-	.reg_writel = pci_azx_writel,
-	.reg_readl = pci_azx_readl,
-	.reg_writew = pci_azx_writew,
-	.reg_readw = pci_azx_readw,
-	.reg_writeb = pci_azx_writeb,
-	.reg_readb = pci_azx_readb,
-};
-
 static const struct hda_controller_ops pci_hda_ops = {
 	.disable_msi_reset_irq = disable_msi_reset_irq,
 	.pcm_mmap_prepare = pcm_mmap_prepare,
diff --git a/sound/pci/hda/hda_tegra.c b/sound/pci/hda/hda_tegra.c
index ba414cc639f1..8350954b7986 100644
--- a/sound/pci/hda/hda_tegra.c
+++ b/sound/pci/hda/hda_tegra.c
@@ -75,72 +75,6 @@ MODULE_PARM_DESC(power_save,
 #define power_save	0
 #endif
 
-/*
- * Register access ops. Tegra HDA register access is DWORD only.
- */
-static void hda_tegra_writel(u32 value, u32 __iomem *addr)
-{
-	writel(value, addr);
-}
-
-static u32 hda_tegra_readl(u32 __iomem *addr)
-{
-	return readl(addr);
-}
-
-static void hda_tegra_writew(u16 value, u16 __iomem  *addr)
-{
-	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
-	u32 v;
-
-	v = readl(dword_addr);
-	v &= ~(0xffff << shift);
-	v |= value << shift;
-	writel(v, dword_addr);
-}
-
-static u16 hda_tegra_readw(u16 __iomem *addr)
-{
-	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
-	u32 v;
-
-	v = readl(dword_addr);
-	return (v >> shift) & 0xffff;
-}
-
-static void hda_tegra_writeb(u8 value, u8 __iomem *addr)
-{
-	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
-	u32 v;
-
-	v = readl(dword_addr);
-	v &= ~(0xff << shift);
-	v |= value << shift;
-	writel(v, dword_addr);
-}
-
-static u8 hda_tegra_readb(u8 __iomem *addr)
-{
-	unsigned int shift = ((unsigned long)(addr) & 0x3) << 3;
-	void __iomem *dword_addr = (void __iomem *)((unsigned long)(addr) & ~0x3);
-	u32 v;
-
-	v = readl(dword_addr);
-	return (v >> shift) & 0xff;
-}
-
-static const struct hdac_io_ops hda_tegra_io_ops = {
-	.reg_writel = hda_tegra_writel,
-	.reg_readl = hda_tegra_readl,
-	.reg_writew = hda_tegra_writew,
-	.reg_readw = hda_tegra_readw,
-	.reg_writeb = hda_tegra_writeb,
-	.reg_readb = hda_tegra_readb,
-};
-
 static const struct hda_controller_ops hda_tegra_ops; /* nothing special */
 
 static void hda_tegra_init(struct hda_tegra *hda)
@@ -459,7 +393,7 @@ static int hda_tegra_create(struct snd_card *card,
 
 	INIT_WORK(&hda->probe_work, hda_tegra_probe_work);
 
-	err = azx_bus_init(chip, NULL, &hda_tegra_io_ops);
+	err = azx_bus_init(chip, NULL);
 	if (err < 0)
 		return err;
 
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index 3362e71b4563..c6d8076dc2fd 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -132,7 +132,7 @@ static int skl_init_chip(struct hdac_bus *bus, bool full_reset)
 
 	/* Reset stream-to-link mapping */
 	list_for_each_entry(hlink, &bus->hlink_list, list)
-		bus->io_ops->reg_writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
+		writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
 
 	skl_enable_miscbdcge(bus->dev, true);
 
@@ -854,7 +854,6 @@ out_err:
  * constructor
  */
 static int skl_create(struct pci_dev *pci,
-		      const struct hdac_io_ops *io_ops,
 		      struct skl **rskl)
 {
 	struct hdac_ext_bus_ops *ext_ops = NULL;
@@ -884,7 +883,7 @@ static int skl_create(struct pci_dev *pci,
 #if IS_ENABLED(CONFIG_SND_SOC_INTEL_SKYLAKE_HDAUDIO_CODEC)
 	ext_ops = snd_soc_hdac_hda_get_ops();
 #endif
-	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, io_ops, ext_ops);
+	snd_hdac_ext_bus_init(bus, &pci->dev, &bus_core_ops, ext_ops);
 	bus->use_posbuf = 1;
 	skl->pci = pci;
 	INIT_WORK(&skl->probe_work, skl_probe_work);
@@ -1013,7 +1012,7 @@ static int skl_probe(struct pci_dev *pci,
 	}
 
 	/* we use ext core ops, so provide NULL for ops here */
-	err = skl_create(pci, NULL, &skl);
+	err = skl_create(pci, &skl);
 	if (err < 0)
 		return err;
 
diff --git a/sound/soc/sof/intel/hda-bus.c b/sound/soc/sof/intel/hda-bus.c
index 0bc93fa06b5b..438121c70f99 100644
--- a/sound/soc/sof/intel/hda-bus.c
+++ b/sound/soc/sof/intel/hda-bus.c
@@ -21,45 +21,6 @@ static const struct hdac_bus_ops bus_ops = {
 
 #endif
 
-static void sof_hda_writel(u32 value, u32 __iomem *addr)
-{
-	writel(value, addr);
-}
-
-static u32 sof_hda_readl(u32 __iomem *addr)
-{
-	return readl(addr);
-}
-
-static void sof_hda_writew(u16 value, u16 __iomem *addr)
-{
-	writew(value, addr);
-}
-
-static u16 sof_hda_readw(u16 __iomem *addr)
-{
-	return readw(addr);
-}
-
-static void sof_hda_writeb(u8 value, u8 __iomem *addr)
-{
-	writeb(value, addr);
-}
-
-static u8 sof_hda_readb(u8 __iomem *addr)
-{
-	return readb(addr);
-}
-
-static const struct hdac_io_ops io_ops = {
-	.reg_writel = sof_hda_writel,
-	.reg_readl = sof_hda_readl,
-	.reg_writew = sof_hda_writew,
-	.reg_readw = sof_hda_readw,
-	.reg_writeb = sof_hda_writeb,
-	.reg_readb = sof_hda_readb,
-};
-
 /*
  * This can be used for both with/without hda link support.
  */
@@ -69,7 +30,6 @@ void sof_hda_bus_init(struct hdac_bus *bus, struct device *dev,
 	memset(bus, 0, sizeof(*bus));
 	bus->dev = dev;
 
-	bus->io_ops = &io_ops;
 	INIT_LIST_HEAD(&bus->stream_list);
 
 	bus->irq = -1;
diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 91de4785b6a3..8d4ce5b4febd 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -356,7 +356,7 @@ static int hda_resume(struct snd_sof_dev *sdev)
 
 	/* Reset stream-to-link mapping */
 	list_for_each_entry(hlink, &bus->hlink_list, list)
-		bus->io_ops->reg_writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
+		writel(0, hlink->ml_addr + AZX_REG_ML_LOSIDV);
 
 	hda_dsp_ctrl_misc_clock_gating(sdev, true);
 #else
-- 
cgit v1.2.3


From 7d0c76bdf227774154e191636be739d7dc0247ba Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Fri, 26 Jul 2019 14:53:26 +0530
Subject: clk: qcom: Add WCSS gcc clock control for QCS404

Add support for the WCSS QDSP gcc clock control used on qcs404
based devices. This would allow wcss remoteproc driver to control
the required gcc clocks to bring the subsystem out of reset.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/gcc-qcs404.c               | 30 +++++++++++++++++++++++++++++
 include/dt-bindings/clock/qcom,gcc-qcs404.h |  3 +++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/qcom/gcc-qcs404.c b/drivers/clk/qcom/gcc-qcs404.c
index 29cf464dd2c8..e12c04c09a6a 100644
--- a/drivers/clk/qcom/gcc-qcs404.c
+++ b/drivers/clk/qcom/gcc-qcs404.c
@@ -2604,6 +2604,32 @@ static struct clk_branch gcc_usb_hs_system_clk = {
 	},
 };
 
+static struct clk_branch gcc_wdsp_q6ss_ahbs_clk = {
+	.halt_reg = 0x1e004,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1e004,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wdsp_q6ss_ahbs_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
+static struct clk_branch gcc_wdsp_q6ss_axim_clk = {
+	.halt_reg = 0x1e008,
+	.halt_check = BRANCH_HALT,
+	.clkr = {
+		.enable_reg = 0x1e008,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.name = "gcc_wdsp_q6ss_axim_clk",
+			.ops = &clk_branch2_ops,
+		},
+	},
+};
+
 static struct clk_hw *gcc_qcs404_hws[] = {
 	&cxo.hw,
 };
@@ -2749,6 +2775,9 @@ static struct clk_regmap *gcc_qcs404_clocks[] = {
 	[GCC_QDSS_DAP_CLK] = &gcc_qdss_dap_clk.clkr,
 	[GCC_DCC_CLK] = &gcc_dcc_clk.clkr,
 	[GCC_DCC_XO_CLK] = &gcc_dcc_xo_clk.clkr,
+	[GCC_WCSS_Q6_AHB_CLK] = &gcc_wdsp_q6ss_ahbs_clk.clkr,
+	[GCC_WCSS_Q6_AXIM_CLK] =  &gcc_wdsp_q6ss_axim_clk.clkr,
+
 };
 
 static const struct qcom_reset_map gcc_qcs404_resets[] = {
@@ -2774,6 +2803,7 @@ static const struct qcom_reset_map gcc_qcs404_resets[] = {
 	[GCC_PCIE_0_SLEEP_ARES] = { 0x3e040, 1 },
 	[GCC_PCIE_0_PIPE_ARES] = { 0x3e040, 0 },
 	[GCC_EMAC_BCR] = { 0x4e000 },
+	[GCC_WDSP_RESTART] = {0x19000},
 };
 
 static const struct regmap_config gcc_qcs404_regmap_config = {
diff --git a/include/dt-bindings/clock/qcom,gcc-qcs404.h b/include/dt-bindings/clock/qcom,gcc-qcs404.h
index 2cd62c98561f..bc3051543347 100644
--- a/include/dt-bindings/clock/qcom,gcc-qcs404.h
+++ b/include/dt-bindings/clock/qcom,gcc-qcs404.h
@@ -146,6 +146,8 @@
 #define GCC_MDP_TBU_CLK					138
 #define GCC_QDSS_DAP_CLK				139
 #define GCC_DCC_XO_CLK					140
+#define GCC_WCSS_Q6_AHB_CLK				141
+#define GCC_WCSS_Q6_AXIM_CLK				142
 #define GCC_CDSP_CFG_AHB_CLK				143
 #define GCC_BIMC_CDSP_CLK				144
 #define GCC_CDSP_TBU_CLK				145
@@ -173,5 +175,6 @@
 #define GCC_PCIE_0_CORE_STICKY_ARES			19
 #define GCC_PCIE_0_SLEEP_ARES				20
 #define GCC_PCIE_0_PIPE_ARES				21
+#define GCC_WDSP_RESTART				22
 
 #endif
-- 
cgit v1.2.3


From 64ebb57a3df6c7126532f8acd22b0612867ad3e0 Mon Sep 17 00:00:00 2001
From: "yong.liang" <yong.liang@mediatek.com>
Date: Fri, 26 Jul 2019 15:01:35 +0800
Subject: clk: reset: Modify reset-controller driver

Set reset signal by a register and
clear reset signal by another register for 8183.

Signed-off-by: yong.liang <yong.liang@mediatek.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mediatek/clk-mt8183.c                  | 16 ++++-
 drivers/clk/mediatek/clk-mtk.h                     |  3 +
 drivers/clk/mediatek/reset.c                       | 56 ++++++++++++++-
 .../dt-bindings/reset-controller/mt8183-resets.h   | 81 ++++++++++++++++++++++
 4 files changed, 152 insertions(+), 4 deletions(-)
 create mode 100644 include/dt-bindings/reset-controller/mt8183-resets.h

(limited to 'include')

diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c
index 1aa5f4059251..94bbadc0d259 100644
--- a/drivers/clk/mediatek/clk-mt8183.c
+++ b/drivers/clk/mediatek/clk-mt8183.c
@@ -17,6 +17,9 @@
 
 #include <dt-bindings/clock/mt8183-clk.h>
 
+/* Infra global controller reset set register */
+#define INFRA_RST0_SET_OFFSET		0x120
+
 static DEFINE_SPINLOCK(mt8183_clk_lock);
 
 static const struct mtk_fixed_clk top_fixed_clks[] = {
@@ -1185,13 +1188,24 @@ static int clk_mt8183_infra_probe(struct platform_device *pdev)
 {
 	struct clk_onecell_data *clk_data;
 	struct device_node *node = pdev->dev.of_node;
+	int r;
 
 	clk_data = mtk_alloc_clk_data(CLK_INFRA_NR_CLK);
 
 	mtk_clk_register_gates(node, infra_clks, ARRAY_SIZE(infra_clks),
 		clk_data);
 
-	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+	if (r) {
+		dev_err(&pdev->dev,
+			"%s(): could not register clock provider: %d\n",
+			__func__, r);
+		return r;
+	}
+
+	mtk_register_reset_controller_set_clr(node, 4, INFRA_RST0_SET_OFFSET);
+
+	return r;
 }
 
 static int clk_mt8183_mcu_probe(struct platform_device *pdev)
diff --git a/drivers/clk/mediatek/clk-mtk.h b/drivers/clk/mediatek/clk-mtk.h
index 733a11d1de94..2e9e26084798 100644
--- a/drivers/clk/mediatek/clk-mtk.h
+++ b/drivers/clk/mediatek/clk-mtk.h
@@ -240,4 +240,7 @@ struct clk *mtk_clk_register_ref2usb_tx(const char *name,
 void mtk_register_reset_controller(struct device_node *np,
 			unsigned int num_regs, int regofs);
 
+void mtk_register_reset_controller_set_clr(struct device_node *np,
+	unsigned int num_regs, int regofs);
+
 #endif /* __DRV_CLK_MTK_H */
diff --git a/drivers/clk/mediatek/reset.c b/drivers/clk/mediatek/reset.c
index d8376b92349e..cb939c071b0c 100644
--- a/drivers/clk/mediatek/reset.c
+++ b/drivers/clk/mediatek/reset.c
@@ -19,6 +19,24 @@ struct mtk_reset {
 	struct reset_controller_dev rcdev;
 };
 
+static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev,
+	unsigned long id)
+{
+	struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+	unsigned int reg = data->regofs + ((id / 32) << 4);
+
+	return regmap_write(data->regmap, reg, 1);
+}
+
+static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
+	unsigned long id)
+{
+	struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
+	unsigned int reg = data->regofs + ((id / 32) << 4) + 0x4;
+
+	return regmap_write(data->regmap, reg, 1);
+}
+
 static int mtk_reset_assert(struct reset_controller_dev *rcdev,
 			      unsigned long id)
 {
@@ -49,14 +67,32 @@ static int mtk_reset(struct reset_controller_dev *rcdev,
 	return mtk_reset_deassert(rcdev, id);
 }
 
+static int mtk_reset_set_clr(struct reset_controller_dev *rcdev,
+	unsigned long id)
+{
+	int ret;
+
+	ret = mtk_reset_assert_set_clr(rcdev, id);
+	if (ret)
+		return ret;
+	return mtk_reset_deassert_set_clr(rcdev, id);
+}
+
 static const struct reset_control_ops mtk_reset_ops = {
 	.assert = mtk_reset_assert,
 	.deassert = mtk_reset_deassert,
 	.reset = mtk_reset,
 };
 
-void mtk_register_reset_controller(struct device_node *np,
-			unsigned int num_regs, int regofs)
+static const struct reset_control_ops mtk_reset_ops_set_clr = {
+	.assert = mtk_reset_assert_set_clr,
+	.deassert = mtk_reset_deassert_set_clr,
+	.reset = mtk_reset_set_clr,
+};
+
+static void mtk_register_reset_controller_common(struct device_node *np,
+			unsigned int num_regs, int regofs,
+			const struct reset_control_ops *reset_ops)
 {
 	struct mtk_reset *data;
 	int ret;
@@ -77,7 +113,7 @@ void mtk_register_reset_controller(struct device_node *np,
 	data->regofs = regofs;
 	data->rcdev.owner = THIS_MODULE;
 	data->rcdev.nr_resets = num_regs * 32;
-	data->rcdev.ops = &mtk_reset_ops;
+	data->rcdev.ops = reset_ops;
 	data->rcdev.of_node = np;
 
 	ret = reset_controller_register(&data->rcdev);
@@ -87,3 +123,17 @@ void mtk_register_reset_controller(struct device_node *np,
 		return;
 	}
 }
+
+void mtk_register_reset_controller(struct device_node *np,
+	unsigned int num_regs, int regofs)
+{
+	mtk_register_reset_controller_common(np, num_regs, regofs,
+		&mtk_reset_ops);
+}
+
+void mtk_register_reset_controller_set_clr(struct device_node *np,
+	unsigned int num_regs, int regofs)
+{
+	mtk_register_reset_controller_common(np, num_regs, regofs,
+		&mtk_reset_ops_set_clr);
+}
diff --git a/include/dt-bindings/reset-controller/mt8183-resets.h b/include/dt-bindings/reset-controller/mt8183-resets.h
new file mode 100644
index 000000000000..8804e34ebdd4
--- /dev/null
+++ b/include/dt-bindings/reset-controller/mt8183-resets.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Yong Liang <yong.liang@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8183
+#define _DT_BINDINGS_RESET_CONTROLLER_MT8183
+
+/* INFRACFG AO resets */
+#define MT8183_INFRACFG_AO_THERM_SW_RST				0
+#define MT8183_INFRACFG_AO_USB_TOP_SW_RST			1
+#define MT8183_INFRACFG_AO_MM_IOMMU_SW_RST			3
+#define MT8183_INFRACFG_AO_MSDC3_SW_RST				4
+#define MT8183_INFRACFG_AO_MSDC2_SW_RST				5
+#define MT8183_INFRACFG_AO_MSDC1_SW_RST				6
+#define MT8183_INFRACFG_AO_MSDC0_SW_RST				7
+#define MT8183_INFRACFG_AO_APDMA_SW_RST				9
+#define MT8183_INFRACFG_AO_MIMP_D_SW_RST			10
+#define MT8183_INFRACFG_AO_BTIF_SW_RST				12
+#define MT8183_INFRACFG_AO_DISP_PWM_SW_RST			14
+#define MT8183_INFRACFG_AO_AUXADC_SW_RST			15
+
+#define MT8183_INFRACFG_AO_IRTX_SW_RST				32
+#define MT8183_INFRACFG_AO_SPI0_SW_RST				33
+#define MT8183_INFRACFG_AO_I2C0_SW_RST				34
+#define MT8183_INFRACFG_AO_I2C1_SW_RST				35
+#define MT8183_INFRACFG_AO_I2C2_SW_RST				36
+#define MT8183_INFRACFG_AO_I2C3_SW_RST				37
+#define MT8183_INFRACFG_AO_UART0_SW_RST				38
+#define MT8183_INFRACFG_AO_UART1_SW_RST				39
+#define MT8183_INFRACFG_AO_UART2_SW_RST				40
+#define MT8183_INFRACFG_AO_PWM_SW_RST				41
+#define MT8183_INFRACFG_AO_SPI1_SW_RST				42
+#define MT8183_INFRACFG_AO_I2C4_SW_RST				43
+#define MT8183_INFRACFG_AO_DVFSP_SW_RST				44
+#define MT8183_INFRACFG_AO_SPI2_SW_RST				45
+#define MT8183_INFRACFG_AO_SPI3_SW_RST				46
+#define MT8183_INFRACFG_AO_UFSHCI_SW_RST			47
+
+#define MT8183_INFRACFG_AO_PMIC_WRAP_SW_RST			64
+#define MT8183_INFRACFG_AO_SPM_SW_RST				65
+#define MT8183_INFRACFG_AO_USBSIF_SW_RST			66
+#define MT8183_INFRACFG_AO_KP_SW_RST				68
+#define MT8183_INFRACFG_AO_APXGPT_SW_RST			69
+#define MT8183_INFRACFG_AO_CLDMA_AO_SW_RST			70
+#define MT8183_INFRACFG_AO_UNIPRO_UFS_SW_RST			71
+#define MT8183_INFRACFG_AO_DX_CC_SW_RST				72
+#define MT8183_INFRACFG_AO_UFSPHY_SW_RST			73
+
+#define MT8183_INFRACFG_AO_DX_CC_SEC_SW_RST			96
+#define MT8183_INFRACFG_AO_GCE_SW_RST				97
+#define MT8183_INFRACFG_AO_CLDMA_SW_RST				98
+#define MT8183_INFRACFG_AO_TRNG_SW_RST				99
+#define MT8183_INFRACFG_AO_AP_MD_CCIF_1_SW_RST			103
+#define MT8183_INFRACFG_AO_AP_MD_CCIF_SW_RST			104
+#define MT8183_INFRACFG_AO_I2C1_IMM_SW_RST			105
+#define MT8183_INFRACFG_AO_I2C1_ARB_SW_RST			106
+#define MT8183_INFRACFG_AO_I2C2_IMM_SW_RST			107
+#define MT8183_INFRACFG_AO_I2C2_ARB_SW_RST			108
+#define MT8183_INFRACFG_AO_I2C5_SW_RST				109
+#define MT8183_INFRACFG_AO_I2C5_IMM_SW_RST			110
+#define MT8183_INFRACFG_AO_I2C5_ARB_SW_RST			111
+#define MT8183_INFRACFG_AO_SPI4_SW_RST				112
+#define MT8183_INFRACFG_AO_SPI5_SW_RST				113
+#define MT8183_INFRACFG_AO_INFRA2MFGAXI_CBIP_CLAS_SW_RST	114
+#define MT8183_INFRACFG_AO_MFGAXI2INFRA_M0_CBIP_GLAS_OUT_SW_RST	115
+#define MT8183_INFRACFG_AO_MFGAXI2INFRA_M1_CBIP_GLAS_OUT_SW_RST	116
+#define MT8183_INFRACFG_AO_UFS_AES_SW_RST			117
+#define MT8183_INFRACFG_AO_CCU_I2C_IRQ_SW_RST			118
+#define MT8183_INFRACFG_AO_CCU_I2C_DMA_SW_RST			119
+#define MT8183_INFRACFG_AO_I2C6_SW_RST				120
+#define MT8183_INFRACFG_AO_CCU_GALS_SW_RST			121
+#define MT8183_INFRACFG_AO_IPU_GALS_SW_RST			122
+#define MT8183_INFRACFG_AO_CONN2AP_GALS_SW_RST			123
+#define MT8183_INFRACFG_AO_AP_MD_CCIF2_SW_RST			124
+#define MT8183_INFRACFG_AO_AP_MD_CCIF3_SW_RST			125
+#define MT8183_INFRACFG_AO_I2C7_SW_RST				126
+#define MT8183_INFRACFG_AO_I2C8_SW_RST				127
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT8183 */
-- 
cgit v1.2.3


From 75354284cc3aa58f7e54d479d9bee69bd2ca828f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Aug 2019 16:14:44 +0900
Subject: auxdisplay: charlcd: move charlcd.h to drivers/auxdisplay

This header is included in drivers/auxdisplay/. Make it a local header.

Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/charlcd.c |  2 +-
 drivers/auxdisplay/charlcd.h | 39 +++++++++++++++++++++++++++++++++++++++
 drivers/auxdisplay/hd44780.c |  3 +--
 drivers/auxdisplay/panel.c   |  2 +-
 include/misc/charlcd.h       | 39 ---------------------------------------
 5 files changed, 42 insertions(+), 43 deletions(-)
 create mode 100644 drivers/auxdisplay/charlcd.h
 delete mode 100644 include/misc/charlcd.h

(limited to 'include')

diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 92745efefb54..bef6b85778b6 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -20,7 +20,7 @@
 
 #include <generated/utsrelease.h>
 
-#include <misc/charlcd.h>
+#include "charlcd.h"
 
 #define LCD_MINOR		156
 
diff --git a/drivers/auxdisplay/charlcd.h b/drivers/auxdisplay/charlcd.h
new file mode 100644
index 000000000000..8cf6c18b0adb
--- /dev/null
+++ b/drivers/auxdisplay/charlcd.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Character LCD driver for Linux
+ *
+ * Copyright (C) 2000-2008, Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2016-2017 Glider bvba
+ */
+
+struct charlcd {
+	const struct charlcd_ops *ops;
+	const unsigned char *char_conv;	/* Optional */
+
+	int ifwidth;			/* 4-bit or 8-bit (default) */
+	int height;
+	int width;
+	int bwidth;			/* Default set by charlcd_alloc() */
+	int hwidth;			/* Default set by charlcd_alloc() */
+
+	void *drvdata;			/* Set by charlcd_alloc() */
+};
+
+struct charlcd_ops {
+	/* Required */
+	void (*write_cmd)(struct charlcd *lcd, int cmd);
+	void (*write_data)(struct charlcd *lcd, int data);
+
+	/* Optional */
+	void (*write_cmd_raw4)(struct charlcd *lcd, int cmd);	/* 4-bit only */
+	void (*clear_fast)(struct charlcd *lcd);
+	void (*backlight)(struct charlcd *lcd, int on);
+};
+
+struct charlcd *charlcd_alloc(unsigned int drvdata_size);
+void charlcd_free(struct charlcd *lcd);
+
+int charlcd_register(struct charlcd *lcd);
+int charlcd_unregister(struct charlcd *lcd);
+
+void charlcd_poke(struct charlcd *lcd);
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index ab15b64707ad..bcbe13092327 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -14,8 +14,7 @@
 #include <linux/property.h>
 #include <linux/slab.h>
 
-#include <misc/charlcd.h>
-
+#include "charlcd.h"
 
 enum hd44780_pin {
 	/* Order does matter due to writing to GPIO array subsets! */
diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index e6bd727da503..85965953683e 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -55,7 +55,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
-#include <misc/charlcd.h>
+#include "charlcd.h"
 
 #define KEYPAD_MINOR		185
 
diff --git a/include/misc/charlcd.h b/include/misc/charlcd.h
deleted file mode 100644
index 8cf6c18b0adb..000000000000
--- a/include/misc/charlcd.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Character LCD driver for Linux
- *
- * Copyright (C) 2000-2008, Willy Tarreau <w@1wt.eu>
- * Copyright (C) 2016-2017 Glider bvba
- */
-
-struct charlcd {
-	const struct charlcd_ops *ops;
-	const unsigned char *char_conv;	/* Optional */
-
-	int ifwidth;			/* 4-bit or 8-bit (default) */
-	int height;
-	int width;
-	int bwidth;			/* Default set by charlcd_alloc() */
-	int hwidth;			/* Default set by charlcd_alloc() */
-
-	void *drvdata;			/* Set by charlcd_alloc() */
-};
-
-struct charlcd_ops {
-	/* Required */
-	void (*write_cmd)(struct charlcd *lcd, int cmd);
-	void (*write_data)(struct charlcd *lcd, int data);
-
-	/* Optional */
-	void (*write_cmd_raw4)(struct charlcd *lcd, int cmd);	/* 4-bit only */
-	void (*clear_fast)(struct charlcd *lcd);
-	void (*backlight)(struct charlcd *lcd, int on);
-};
-
-struct charlcd *charlcd_alloc(unsigned int drvdata_size);
-void charlcd_free(struct charlcd *lcd);
-
-int charlcd_register(struct charlcd *lcd);
-int charlcd_unregister(struct charlcd *lcd);
-
-void charlcd_poke(struct charlcd *lcd);
-- 
cgit v1.2.3


From b3da42519c3e6ad977af844d61c31d0e5f874f1c Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 7 Aug 2019 10:31:24 +0900
Subject: ASoC: soc-core: tidyup for card->deferred_resume_work

card->deferred_resume_work is used if CONFIG_PM_SLEEP was defined.
but
	1) It is defined even though CONFIG_PM_SLEEP was not defined
	2) random ifdef code is difficult to read.
This patch tidyup these issues.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/877e7paho1.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  5 +++--
 sound/soc/soc-core.c | 14 ++++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 6ac6481b4882..85ad971e9432 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1058,8 +1058,6 @@ struct snd_soc_card {
 	int num_of_dapm_routes;
 	bool fully_routed;
 
-	struct work_struct deferred_resume_work;
-
 	/* lists of probed devices belonging to this card */
 	struct list_head component_dev_list;
 	struct list_head list;
@@ -1079,6 +1077,9 @@ struct snd_soc_card {
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs_card_root;
+#endif
+#ifdef CONFIG_PM_SLEEP
+	struct work_struct deferred_resume_work;
 #endif
 	u32 pop_time;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 6b0042835233..25b26caea4e0 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -701,9 +701,18 @@ int snd_soc_resume(struct device *dev)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snd_soc_resume);
+
+static void soc_resume_init(struct snd_soc_card *card)
+{
+	/* deferred resume work */
+	INIT_WORK(&card->deferred_resume_work, soc_resume_deferred);
+}
 #else
 #define snd_soc_suspend NULL
 #define snd_soc_resume NULL
+static inline void soc_resume_init(struct snd_soc_card *card)
+{
+}
 #endif
 
 static const struct snd_soc_dai_ops null_dai_ops = {
@@ -1984,10 +1993,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 
 	soc_init_card_debugfs(card);
 
-#ifdef CONFIG_PM_SLEEP
-	/* deferred resume work */
-	INIT_WORK(&card->deferred_resume_work, soc_resume_deferred);
-#endif
+	soc_resume_init(card);
 
 	ret = snd_soc_dapm_new_controls(&card->dapm, card->dapm_widgets,
 					card->num_dapm_widgets);
-- 
cgit v1.2.3


From ee5b3f11416d1ba69e919b2fe86aae0b46f9a83e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 7 Aug 2019 10:31:31 +0900
Subject: ASoC: soc-core: define soc_dpcm_debugfs_add() for non CONFIG_DEBUG_FS

soc_dpcm_debugfs_add() is implemented at soc-pcm.c under CONFIG_DEBUG_FS.
Thus, soc-core.c which is only user of it need to use CONFIG_DEBUG_FS, too.

This patch defines soc_dpcm_debugfs_add() for non CONFIG_DEBUG_FS case.
Then, we can remove #ifdef CONFIG_DEBUG_FS from soc-core.c

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/875zn9ahnv.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dpcm.h | 9 ++++++++-
 sound/soc/soc-core.c     | 2 --
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dpcm.h b/include/sound/soc-dpcm.h
index 4be3a2b7c106..e55aeb00ce2d 100644
--- a/include/sound/soc-dpcm.h
+++ b/include/sound/soc-dpcm.h
@@ -142,9 +142,16 @@ void snd_soc_dpcm_be_set_state(struct snd_soc_pcm_runtime *be, int stream,
 
 /* internal use only */
 int soc_dpcm_be_digital_mute(struct snd_soc_pcm_runtime *fe, int mute);
-void soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd);
 int soc_dpcm_runtime_update(struct snd_soc_card *);
 
+#ifdef CONFIG_DEBUG_FS
+void soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd);
+#else
+static inline void soc_dpcm_debugfs_add(struct snd_soc_pcm_runtime *rtd)
+{
+}
+#endif
+
 int dpcm_path_get(struct snd_soc_pcm_runtime *fe,
 	int stream, struct snd_soc_dapm_widget_list **list_);
 int dpcm_process_paths(struct snd_soc_pcm_runtime *fe,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 25b26caea4e0..2a75fba31aa4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1487,11 +1487,9 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_DEBUG_FS
 	/* add DPCM sysfs entries */
 	if (dai_link->dynamic)
 		soc_dpcm_debugfs_add(rtd);
-#endif
 
 	num = rtd->num;
 
-- 
cgit v1.2.3


From 26149e3e1f44d27897d0af9ca4bcd723674bad44 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 21 Jul 2019 14:18:42 +0300
Subject: net/mlx5: kTLS, Fix wrong TIS opmod constants

Fix the used constants for TLS TIS opmods, per the HW specification.

Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ce9839c8bc1a..c2f056b5766d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -446,11 +446,11 @@ enum {
 };
 
 enum {
-	MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x20,
+	MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x1,
 };
 
 enum {
-	MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x20,
+	MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x1,
 };
 
 enum {
-- 
cgit v1.2.3


From a9bc3390327317345dd4683b70970c83ab400ea3 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 30 Jul 2019 11:55:25 +0300
Subject: net/mlx5e: kTLS, Fix progress params context WQE layout

The TLS progress params context WQE should not include an
Eth segment, drop it.
In addition, align the tls_progress_params layout with the
HW specification document:
- fix the tisn field name.
- remove the valid bit.

Fixes: a12ff35e0fb7 ("net/mlx5: Introduce TLS TX offload hardware bits and structures")
Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h               | 9 +++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h    | 6 ++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 4 ++--
 include/linux/mlx5/mlx5_ifc.h                              | 5 ++---
 4 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ce1be2a84231..f6b64a03cd06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -184,8 +184,13 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
-	struct mlx5_wqe_eth_seg  eth;
-	struct mlx5_wqe_data_seg data[0];
+	union {
+		struct {
+			struct mlx5_wqe_eth_seg  eth;
+			struct mlx5_wqe_data_seg data[0];
+		};
+		u8 tls_progress_params_ctx[0];
+	};
 };
 
 struct mlx5e_rx_wqe_ll {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 407da83474ef..b7298f9ee3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -11,12 +11,14 @@
 #include "accel/tls.h"
 
 #define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
-	(sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params))
+	(offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \
+	 MLX5_ST_SZ_BYTES(tls_static_params))
 #define MLX5E_KTLS_STATIC_WQEBBS \
 	(DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB))
 
 #define MLX5E_KTLS_PROGRESS_WQE_SZ \
-	(sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params))
+	(offsetof(struct mlx5e_tx_wqe, tls_progress_params_ctx) + \
+	 MLX5_ST_SZ_BYTES(tls_progress_params))
 #define MLX5E_KTLS_PROGRESS_WQEBBS \
 	(DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
 #define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 3766545ce259..9f67bfb559f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -80,7 +80,7 @@ build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn,
 static void
 fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
 {
-	MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn);
+	MLX5_SET(tls_progress_params, ctx, tisn, priv_tx->tisn);
 	MLX5_SET(tls_progress_params, ctx, record_tracker_state,
 		 MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START);
 	MLX5_SET(tls_progress_params, ctx, auth_state,
@@ -104,7 +104,7 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
 					     PROGRESS_PARAMS_DS_CNT);
 	cseg->fm_ce_se         = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0;
 
-	fill_progress_params_ctx(wqe->data, priv_tx);
+	fill_progress_params_ctx(wqe->tls_progress_params_ctx, priv_tx);
 }
 
 static void tx_fill_wi(struct mlx5e_txqsq *sq,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ec571fd7fcf8..b8b570c30b5e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10054,9 +10054,8 @@ struct mlx5_ifc_tls_static_params_bits {
 };
 
 struct mlx5_ifc_tls_progress_params_bits {
-	u8         valid[0x1];
-	u8         reserved_at_1[0x7];
-	u8         pd[0x18];
+	u8         reserved_at_0[0x8];
+	u8         tisn[0x18];
 
 	u8         next_record_tcp_sn[0x20];
 
-- 
cgit v1.2.3


From b8074aa2460b535915e8f65bf83c4bcb4220f804 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Mon, 29 Jul 2019 13:13:57 +0300
Subject: PCI: Convert pci_resource_to_user() to a weak function

Convert pci_resource_to_user() to a weak function so the existing
architecture-specific implementations will automatically override the
generic one.  This allows us to remove HAVE_ARCH_PCI_RESOURCE_TO_USER
definitions and avoid the conditional compilation for this single function.

Link: https://lore.kernel.org/r/20190729101401.28068-1-efremov@linux.com
Link: https://lore.kernel.org/r/20190729101401.28068-2-efremov@linux.com
Link: https://lore.kernel.org/r/20190729101401.28068-3-efremov@linux.com
Link: https://lore.kernel.org/r/20190729101401.28068-4-efremov@linux.com
Link: https://lore.kernel.org/r/20190729101401.28068-5-efremov@linux.com
Link: https://lore.kernel.org/r/20190729101401.28068-6-efremov@linux.com
Signed-off-by: Denis Efremov <efremov@linux.com>
[bhelgaas: squash into one commit]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Paul Burton <paul.burton@mips.com>	# MIPS
---
 arch/microblaze/include/asm/pci.h |  2 --
 arch/mips/include/asm/pci.h       |  1 -
 arch/powerpc/include/asm/pci.h    |  2 --
 arch/sparc/include/asm/pci.h      |  2 --
 drivers/pci/pci.c                 | 12 ++++++++++++
 include/linux/pci.h               | 16 ----------------
 6 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 21ddba9188b2..7c4dc5d85f53 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -66,8 +66,6 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 unsigned long size,
 					 pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 /* This part of code was originally in xilinx-pci.h */
 #ifdef CONFIG_PCI_XILINX
 extern void __init xilinx_pci_init(void);
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 436099883022..6f48649201c5 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -108,7 +108,6 @@ extern unsigned long PCIBIOS_MIN_MEM;
 
 #define HAVE_PCI_MMAP
 #define ARCH_GENERIC_PCI_MMAP_RESOURCE
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 
 /*
  * Dynamic DMA mapping stuff.
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 2372d35533ad..327567b8f7d6 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -112,8 +112,6 @@ extern pgprot_t	pci_phys_mem_access_prot(struct file *file,
 					 unsigned long size,
 					 pgprot_t prot);
 
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-
 extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
 extern void pcibios_setup_bus_devices(struct pci_bus *bus);
 extern void pcibios_setup_bus_self(struct pci_bus *bus);
diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h
index cfec79bb1831..4deddf430e5d 100644
--- a/arch/sparc/include/asm/pci.h
+++ b/arch/sparc/include/asm/pci.h
@@ -38,8 +38,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 #define arch_can_pci_mmap_io()	1
 #define HAVE_ARCH_PCI_GET_UNMAPPED_AREA
 #define get_pci_unmapped_area get_fb_unmapped_area
-
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
 #endif /* CONFIG_SPARC64 */
 
 #if defined(CONFIG_SPARC64) || defined(CONFIG_LEON_PCI)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 29ed5ec1ac27..da3241bb4479 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5932,6 +5932,18 @@ resource_size_t __weak pcibios_default_alignment(void)
 	return 0;
 }
 
+/*
+ * Arches that don't want to expose struct resource to userland as-is in
+ * sysfs and /proc can implement their own pci_resource_to_user().
+ */
+void __weak pci_resource_to_user(const struct pci_dev *dev, int bar,
+				 const struct resource *rsrc,
+				 resource_size_t *start, resource_size_t *end)
+{
+	*start = rsrc->start;
+	*end = rsrc->end;
+}
+
 #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 static DEFINE_SPINLOCK(resource_alignment_lock);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..dcc5dd310c14 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1870,25 +1870,9 @@ static inline const char *pci_name(const struct pci_dev *pdev)
 	return dev_name(&pdev->dev);
 }
 
-
-/*
- * Some archs don't want to expose struct resource to userland as-is
- * in sysfs and /proc
- */
-#ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER
 void pci_resource_to_user(const struct pci_dev *dev, int bar,
 			  const struct resource *rsrc,
 			  resource_size_t *start, resource_size_t *end);
-#else
-static inline void pci_resource_to_user(const struct pci_dev *dev, int bar,
-		const struct resource *rsrc, resource_size_t *start,
-		resource_size_t *end)
-{
-	*start = rsrc->start;
-	*end = rsrc->end;
-}
-#endif /* HAVE_ARCH_PCI_RESOURCE_TO_USER */
-
 
 /*
  * The world is not perfect and supplies us with broken PCI devices.
-- 
cgit v1.2.3


From 4bc3c420246e70088c1d09f7bc30ab5583fbd5f9 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 24 Jul 2019 13:16:03 -0400
Subject: dt-bindings: ingenic: Add DT bindings for TCU clocks

This header provides clock numbers for the ingenic,tcu
DT binding.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Tested-by: Mathieu Malaterre <malat@debian.org>
Tested-by: Artur Rojek <contact@artur-rojek.eu>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: od@zcrc.me
---
 include/dt-bindings/clock/ingenic,tcu.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 include/dt-bindings/clock/ingenic,tcu.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/ingenic,tcu.h b/include/dt-bindings/clock/ingenic,tcu.h
new file mode 100644
index 000000000000..d569650a7945
--- /dev/null
+++ b/include/dt-bindings/clock/ingenic,tcu.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides clock numbers for the ingenic,tcu DT binding.
+ */
+
+#ifndef __DT_BINDINGS_CLOCK_INGENIC_TCU_H__
+#define __DT_BINDINGS_CLOCK_INGENIC_TCU_H__
+
+#define TCU_CLK_TIMER0	0
+#define TCU_CLK_TIMER1	1
+#define TCU_CLK_TIMER2	2
+#define TCU_CLK_TIMER3	3
+#define TCU_CLK_TIMER4	4
+#define TCU_CLK_TIMER5	5
+#define TCU_CLK_TIMER6	6
+#define TCU_CLK_TIMER7	7
+#define TCU_CLK_WDT	8
+#define TCU_CLK_OST	9
+
+#endif /* __DT_BINDINGS_CLOCK_INGENIC_TCU_H__ */
-- 
cgit v1.2.3


From 39233b7c611248c0d05209b4854bc63e26485655 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 24 Jul 2019 13:16:06 -0400
Subject: mfd/syscon: Add device_node_to_regmap()

device_node_to_regmap() is exactly like syscon_node_to_regmap(), but it
does not check that the node is compatible with "syscon", and won't
attach the first clock it finds to the regmap.

The rationale behind this, is that one device node with a standard
compatible string "foo,bar" can be covered by multiple drivers sharing a
regmap, or by a single driver doing all the job without a regmap, but
these are implementation details which shouldn't reflect on the
devicetree.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: od@zcrc.me
Cc: Mathieu Malaterre <malat@debian.org>
---
 drivers/mfd/syscon.c       | 46 ++++++++++++++++++++++++++++++----------------
 include/linux/mfd/syscon.h |  6 ++++++
 2 files changed, 36 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index b65e585fc8c6..660723276481 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -40,7 +40,7 @@ static const struct regmap_config syscon_regmap_config = {
 	.reg_stride = 4,
 };
 
-static struct syscon *of_syscon_register(struct device_node *np)
+static struct syscon *of_syscon_register(struct device_node *np, bool check_clk)
 {
 	struct clk *clk;
 	struct syscon *syscon;
@@ -51,9 +51,6 @@ static struct syscon *of_syscon_register(struct device_node *np)
 	struct regmap_config syscon_config = syscon_regmap_config;
 	struct resource res;
 
-	if (!of_device_is_compatible(np, "syscon"))
-		return ERR_PTR(-EINVAL);
-
 	syscon = kzalloc(sizeof(*syscon), GFP_KERNEL);
 	if (!syscon)
 		return ERR_PTR(-ENOMEM);
@@ -117,16 +114,18 @@ static struct syscon *of_syscon_register(struct device_node *np)
 		goto err_regmap;
 	}
 
-	clk = of_clk_get(np, 0);
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		/* clock is optional */
-		if (ret != -ENOENT)
-			goto err_clk;
-	} else {
-		ret = regmap_mmio_attach_clk(regmap, clk);
-		if (ret)
-			goto err_attach;
+	if (check_clk) {
+		clk = of_clk_get(np, 0);
+		if (IS_ERR(clk)) {
+			ret = PTR_ERR(clk);
+			/* clock is optional */
+			if (ret != -ENOENT)
+				goto err_clk;
+		} else {
+			ret = regmap_mmio_attach_clk(regmap, clk);
+			if (ret)
+				goto err_attach;
+		}
 	}
 
 	syscon->regmap = regmap;
@@ -150,7 +149,8 @@ err_map:
 	return ERR_PTR(ret);
 }
 
-struct regmap *syscon_node_to_regmap(struct device_node *np)
+static struct regmap *device_node_get_regmap(struct device_node *np,
+					     bool check_clk)
 {
 	struct syscon *entry, *syscon = NULL;
 
@@ -165,13 +165,27 @@ struct regmap *syscon_node_to_regmap(struct device_node *np)
 	spin_unlock(&syscon_list_slock);
 
 	if (!syscon)
-		syscon = of_syscon_register(np);
+		syscon = of_syscon_register(np, check_clk);
 
 	if (IS_ERR(syscon))
 		return ERR_CAST(syscon);
 
 	return syscon->regmap;
 }
+
+struct regmap *device_node_to_regmap(struct device_node *np)
+{
+	return device_node_get_regmap(np, false);
+}
+EXPORT_SYMBOL_GPL(device_node_to_regmap);
+
+struct regmap *syscon_node_to_regmap(struct device_node *np)
+{
+	if (!of_device_is_compatible(np, "syscon"))
+		return ERR_PTR(-EINVAL);
+
+	return device_node_get_regmap(np, true);
+}
 EXPORT_SYMBOL_GPL(syscon_node_to_regmap);
 
 struct regmap *syscon_regmap_lookup_by_compatible(const char *s)
diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index 8cfda0554381..112dc66262cc 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -17,12 +17,18 @@
 struct device_node;
 
 #ifdef CONFIG_MFD_SYSCON
+extern struct regmap *device_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_node_to_regmap(struct device_node *np);
 extern struct regmap *syscon_regmap_lookup_by_compatible(const char *s);
 extern struct regmap *syscon_regmap_lookup_by_phandle(
 					struct device_node *np,
 					const char *property);
 #else
+static inline struct regmap *device_node_to_regmap(struct device_node *np)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline struct regmap *syscon_node_to_regmap(struct device_node *np)
 {
 	return ERR_PTR(-ENOTSUPP);
-- 
cgit v1.2.3


From 73dd11dc1a883d4c994d729dc9984f4890001157 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 24 Jul 2019 13:16:10 -0400
Subject: clk: jz4740: Add TCU clock

Add the missing TCU clock to the list of clocks supplied by the CGU for
the JZ4740 SoC.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Tested-by: Mathieu Malaterre <malat@debian.org>
Tested-by: Artur Rojek <contact@artur-rojek.eu>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: James Hogan <jhogan@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: linux-clk@vger.kernel.org
Cc: od@zcrc.me
---
 drivers/clk/ingenic/jz4740-cgu.c       | 6 ++++++
 include/dt-bindings/clock/jz4740-cgu.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/ingenic/jz4740-cgu.c b/drivers/clk/ingenic/jz4740-cgu.c
index 4c0a20949c2c..67f8a0e14284 100644
--- a/drivers/clk/ingenic/jz4740-cgu.c
+++ b/drivers/clk/ingenic/jz4740-cgu.c
@@ -222,6 +222,12 @@ static const struct ingenic_cgu_clk_info jz4740_cgu_clocks[] = {
 		.parents = { JZ4740_CLK_EXT, -1, -1, -1 },
 		.gate = { CGU_REG_CLKGR, 5 },
 	},
+
+	[JZ4740_CLK_TCU] = {
+		"tcu", CGU_CLK_GATE,
+		.parents = { JZ4740_CLK_EXT, -1, -1, -1 },
+		.gate = { CGU_REG_CLKGR, 1 },
+	},
 };
 
 static void __init jz4740_cgu_init(struct device_node *np)
diff --git a/include/dt-bindings/clock/jz4740-cgu.h b/include/dt-bindings/clock/jz4740-cgu.h
index 6ed83f926ae7..e82d77028581 100644
--- a/include/dt-bindings/clock/jz4740-cgu.h
+++ b/include/dt-bindings/clock/jz4740-cgu.h
@@ -34,5 +34,6 @@
 #define JZ4740_CLK_ADC		19
 #define JZ4740_CLK_I2C		20
 #define JZ4740_CLK_AIC		21
+#define JZ4740_CLK_TCU		22
 
 #endif /* __DT_BINDINGS_CLOCK_JZ4740_CGU_H__ */
-- 
cgit v1.2.3


From 891584f48a9084ba462f10da4c6bb28b6181b543 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 2 Aug 2019 17:15:03 +0200
Subject: inet: frags: re-introduce skb coalescing for local delivery

Before commit d4289fcc9b16 ("net: IP6 defrag: use rbtrees for IPv6
defrag"), a netperf UDP_STREAM test[0] using big IPv6 datagrams (thus
generating many fragments) and running over an IPsec tunnel, reported
more than 6Gbps throughput. After that patch, the same test gets only
9Mbps when receiving on a be2net nic (driver can make a big difference
here, for example, ixgbe doesn't seem to be affected).

By reusing the IPv4 defragmentation code, IPv6 lost fragment coalescing
(IPv4 fragment coalescing was dropped by commit 14fe22e33462 ("Revert
"ipv4: use skb coalescing in defragmentation"")).

Without fragment coalescing, be2net runs out of Rx ring entries and
starts to drop frames (ethtool reports rx_drops_no_frags errors). Since
the netperf traffic is only composed of UDP fragments, any lost packet
prevents reassembly of the full datagram. Therefore, fragments which
have no possibility to ever get reassembled pile up in the reassembly
queue, until the memory accounting exeeds the threshold. At that point
no fragment is accepted anymore, which effectively discards all
netperf traffic.

When reassembly timeout expires, some stale fragments are removed from
the reassembly queue, so a few packets can be received, reassembled
and delivered to the netperf receiver. But the nic still drops frames
and soon the reassembly queue gets filled again with stale fragments.
These long time frames where no datagram can be received explain why
the performance drop is so significant.

Re-introducing fragment coalescing is enough to get the initial
performances again (6.6Gbps with be2net): driver doesn't drop frames
anymore (no more rx_drops_no_frags errors) and the reassembly engine
works at full speed.

This patch is quite conservative and only coalesces skbs for local
IPv4 and IPv6 delivery (in order to avoid changing skb geometry when
forwarding). Coalescing could be extended in the future if need be, as
more scenarios would probably benefit from it.

[0]: Test configuration
Sender:
ip xfrm policy flush
ip xfrm state flush
ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1
ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir in tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow
ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1
ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir out tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow
netserver -D -L fc00:2::1

Receiver:
ip xfrm policy flush
ip xfrm state flush
ip xfrm state add src fc00:2::1 dst fc00:1::1 proto esp spi 0x1001 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:2::1 dst fc00:1::1
ip xfrm policy add src fc00:2::1 dst fc00:1::1 dir in tmpl src fc00:2::1 dst fc00:1::1 proto esp mode transport action allow
ip xfrm state add src fc00:1::1 dst fc00:2::1 proto esp spi 0x1000 aead 'rfc4106(gcm(aes))' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b 96 mode transport sel src fc00:1::1 dst fc00:2::1
ip xfrm policy add src fc00:1::1 dst fc00:2::1 dir out tmpl src fc00:1::1 dst fc00:2::1 proto esp mode transport action allow
netperf -H fc00:2::1 -f k -P 0 -L fc00:1::1 -l 60 -t UDP_STREAM -I 99,5 -i 5,5 -T5,5 -6

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_frag.h                 |  2 +-
 net/ieee802154/6lowpan/reassembly.c     |  2 +-
 net/ipv4/inet_fragment.c                | 39 +++++++++++++++++++++++----------
 net/ipv4/ip_fragment.c                  |  8 ++++++-
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/reassembly.c                   |  2 +-
 6 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 010f26b31c89..bac79e817776 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -171,7 +171,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb,
 void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 			      struct sk_buff *parent);
 void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
-			    void *reasm_data);
+			    void *reasm_data, bool try_coalesce);
 struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q);
 
 #endif
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index e4aba5d485be..bbe9b3b2d395 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -170,7 +170,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb,
 	reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
 	if (!reasm_data)
 		goto out_oom;
-	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
 
 	skb->dev = ldev;
 	skb->tstamp = fq->q.stamp;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index a999451345f9..10d31733297d 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -475,11 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
 EXPORT_SYMBOL(inet_frag_reasm_prepare);
 
 void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
-			    void *reasm_data)
+			    void *reasm_data, bool try_coalesce)
 {
 	struct sk_buff **nextp = (struct sk_buff **)reasm_data;
 	struct rb_node *rbn;
 	struct sk_buff *fp;
+	int sum_truesize;
 
 	skb_push(head, head->data - skb_network_header(head));
 
@@ -487,25 +488,41 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
 	fp = FRAG_CB(head)->next_frag;
 	rbn = rb_next(&head->rbnode);
 	rb_erase(&head->rbnode, &q->rb_fragments);
+
+	sum_truesize = head->truesize;
 	while (rbn || fp) {
 		/* fp points to the next sk_buff in the current run;
 		 * rbn points to the next run.
 		 */
 		/* Go through the current run. */
 		while (fp) {
-			*nextp = fp;
-			nextp = &fp->next;
-			fp->prev = NULL;
-			memset(&fp->rbnode, 0, sizeof(fp->rbnode));
-			fp->sk = NULL;
-			head->data_len += fp->len;
-			head->len += fp->len;
+			struct sk_buff *next_frag = FRAG_CB(fp)->next_frag;
+			bool stolen;
+			int delta;
+
+			sum_truesize += fp->truesize;
 			if (head->ip_summed != fp->ip_summed)
 				head->ip_summed = CHECKSUM_NONE;
 			else if (head->ip_summed == CHECKSUM_COMPLETE)
 				head->csum = csum_add(head->csum, fp->csum);
-			head->truesize += fp->truesize;
-			fp = FRAG_CB(fp)->next_frag;
+
+			if (try_coalesce && skb_try_coalesce(head, fp, &stolen,
+							     &delta)) {
+				kfree_skb_partial(fp, stolen);
+			} else {
+				fp->prev = NULL;
+				memset(&fp->rbnode, 0, sizeof(fp->rbnode));
+				fp->sk = NULL;
+
+				head->data_len += fp->len;
+				head->len += fp->len;
+				head->truesize += fp->truesize;
+
+				*nextp = fp;
+				nextp = &fp->next;
+			}
+
+			fp = next_frag;
 		}
 		/* Move to the next run. */
 		if (rbn) {
@@ -516,7 +533,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
 			rbn = rbnext;
 		}
 	}
-	sub_frag_mem_limit(q->fqdir, head->truesize);
+	sub_frag_mem_limit(q->fqdir, sum_truesize);
 
 	*nextp = NULL;
 	skb_mark_not_on_list(head);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 4385eb9e781f..cfeb8890f94e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -393,6 +393,11 @@ err:
 	return err;
 }
 
+static bool ip_frag_coalesce_ok(const struct ipq *qp)
+{
+	return qp->q.key.v4.user == IP_DEFRAG_LOCAL_DELIVER;
+}
+
 /* Build a new IP datagram from all its fragments. */
 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 			 struct sk_buff *prev_tail, struct net_device *dev)
@@ -421,7 +426,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	if (len > 65535)
 		goto out_oversize;
 
-	inet_frag_reasm_finish(&qp->q, skb, reasm_data);
+	inet_frag_reasm_finish(&qp->q, skb, reasm_data,
+			       ip_frag_coalesce_ok(qp));
 
 	skb->dev = dev;
 	IPCB(skb)->frag_max_size = max(qp->max_df_size, qp->q.max_size);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 0f82c150543b..fed9666a2f7d 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -348,7 +348,7 @@ static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
 
 	skb_reset_transport_header(skb);
 
-	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, false);
 
 	skb->ignore_df = 1;
 	skb->dev = dev;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index ca05b16f1bb9..1f5d4d196dcc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -282,7 +282,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
 
 	skb_reset_transport_header(skb);
 
-	inet_frag_reasm_finish(&fq->q, skb, reasm_data);
+	inet_frag_reasm_finish(&fq->q, skb, reasm_data, true);
 
 	skb->dev = dev;
 	ipv6_hdr(skb)->payload_len = htons(payload_len);
-- 
cgit v1.2.3


From 5e6d9fc76190aa70db9cbfb18a6f44f4ee83b7f5 Mon Sep 17 00:00:00 2001
From: Rahul Verma <rahulv@marvell.com>
Date: Mon, 5 Aug 2019 23:59:50 -0700
Subject: qed: Add new ethtool supported port types based on media.

Supported ports in ethtool <eth1> are displayed based on media type.
For media type fibre and twinaxial, port type is "FIBRE". Media type
Base-T is "TP" and media KR is "Backplane".

V1->V2:
Corrected the subject.

Signed-off-by: Rahul Verma <rahulv@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c      | 6 +++++-
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 3 ++-
 include/linux/qed/qed_if.h                      | 2 +-
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 829dd60ab937..e5ac8bd4fd14 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1688,6 +1688,7 @@ static void qed_fill_link_capability(struct qed_hwfn *hwfn,
 
 	switch (media_type) {
 	case MEDIA_DA_TWINAX:
+		*if_capability |= QED_LM_FIBRE_BIT;
 		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
 			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
 		/* For DAC media multiple speed capabilities are supported*/
@@ -1707,6 +1708,7 @@ static void qed_fill_link_capability(struct qed_hwfn *hwfn,
 			*if_capability |= QED_LM_100000baseCR4_Full_BIT;
 		break;
 	case MEDIA_BASE_T:
+		*if_capability |= QED_LM_TP_BIT;
 		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) {
 			if (capability &
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
@@ -1718,6 +1720,7 @@ static void qed_fill_link_capability(struct qed_hwfn *hwfn,
 			}
 		}
 		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) {
+			*if_capability |= QED_LM_FIBRE_BIT;
 			if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET)
 				*if_capability |= QED_LM_1000baseT_Full_BIT;
 			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET)
@@ -1728,6 +1731,7 @@ static void qed_fill_link_capability(struct qed_hwfn *hwfn,
 	case MEDIA_SFPP_10G_FIBER:
 	case MEDIA_XFP_FIBER:
 	case MEDIA_MODULE_FIBER:
+		*if_capability |= QED_LM_FIBRE_BIT;
 		if (capability &
 		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
 			if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) ||
@@ -1770,6 +1774,7 @@ static void qed_fill_link_capability(struct qed_hwfn *hwfn,
 
 		break;
 	case MEDIA_KR:
+		*if_capability |= QED_LM_Backplane_BIT;
 		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
 			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
 		if (capability &
@@ -1821,7 +1826,6 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->link_up = true;
 
 	/* TODO - at the moment assume supported and advertised speed equal */
-	if_link->supported_caps = QED_LM_FIBRE_BIT;
 	if (link_caps.default_speed_autoneg)
 		if_link->supported_caps |= QED_LM_Autoneg_BIT;
 	if (params.pause.autoneg ||
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index e85f9fef930c..abcee474909a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -424,12 +424,13 @@ struct qede_link_mode_mapping {
 };
 
 static const struct qede_link_mode_mapping qed_lm_map[] = {
+	{QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
 	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
 	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
 	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
 	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
 	{QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT},
-	{QED_LM_2500baseX_Full_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT},
+	{QED_LM_TP_BIT, ETHTOOL_LINK_MODE_TP_BIT},
 	{QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
 	{QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
 	{QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT},
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index eef02e64b422..23021366a4e5 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -689,7 +689,7 @@ enum qed_link_mode_bits {
 	QED_LM_40000baseLR4_Full_BIT = BIT(9),
 	QED_LM_50000baseKR2_Full_BIT = BIT(10),
 	QED_LM_100000baseKR4_Full_BIT = BIT(11),
-	QED_LM_2500baseX_Full_BIT = BIT(12),
+	QED_LM_TP_BIT = BIT(12),
 	QED_LM_Backplane_BIT = BIT(13),
 	QED_LM_1000baseKX_Full_BIT = BIT(14),
 	QED_LM_10000baseKX4_Full_BIT = BIT(15),
-- 
cgit v1.2.3


From 323ebb61e32b478e2432c5a3cbf9e2ca678a9609 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 6 Aug 2019 14:53:55 +0100
Subject: net: use listified RX for handling GRO_NORMAL skbs

When GRO decides not to coalesce a packet, in napi_frags_finish(), instead
 of passing it to the stack immediately, place it on a list in the napi
 struct.  Then, at flush time (napi_complete_done(), napi_poll(), or
 napi_busy_loop()), call netif_receive_skb_list_internal() on the list.
We'd like to do that in napi_gro_flush(), but it's not called if
 !napi->gro_bitmask, so we have to do it in the callers instead.  (There are
 a handful of drivers that call napi_gro_flush() themselves, but it's not
 clear why, or whether this will affect them.)
Because a full 64 packets is an inefficiently large batch, also consume the
 list whenever it exceeds gro_normal_batch, a new net/core sysctl that
 defaults to 8.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  3 +++
 net/core/dev.c             | 44 +++++++++++++++++++++++++++++++++++++++++---
 net/core/sysctl_net_core.c |  8 ++++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88292953aa6f..55ac223553f8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -332,6 +332,8 @@ struct napi_struct {
 	struct net_device	*dev;
 	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
+	struct list_head	rx_list; /* Pending GRO_NORMAL skbs */
+	int			rx_count; /* length of rx_list */
 	struct hrtimer		timer;
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
@@ -4239,6 +4241,7 @@ extern int		dev_weight_rx_bias;
 extern int		dev_weight_tx_bias;
 extern int		dev_rx_weight;
 extern int		dev_tx_weight;
+extern int		gro_normal_batch;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index af071b0ce88e..49589ed2018d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3963,6 +3963,8 @@ int dev_weight_rx_bias __read_mostly = 1;  /* bias for backlog weight */
 int dev_weight_tx_bias __read_mostly = 1;  /* bias for output_queue quota */
 int dev_rx_weight __read_mostly = 64;
 int dev_tx_weight __read_mostly = 64;
+/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+int gro_normal_batch __read_mostly = 8;
 
 /* Called with irq disabled */
 static inline void ____napi_schedule(struct softnet_data *sd,
@@ -5747,6 +5749,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_get_frags);
 
+/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
+static void gro_normal_list(struct napi_struct *napi)
+{
+	if (!napi->rx_count)
+		return;
+	netif_receive_skb_list_internal(&napi->rx_list);
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
+}
+
+/* Queue one GRO_NORMAL SKB up for list processing.  If batch size exceeded,
+ * pass the whole batch up to the stack.
+ */
+static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
+{
+	list_add_tail(&skb->list, &napi->rx_list);
+	if (++napi->rx_count >= gro_normal_batch)
+		gro_normal_list(napi);
+}
+
 static gro_result_t napi_frags_finish(struct napi_struct *napi,
 				      struct sk_buff *skb,
 				      gro_result_t ret)
@@ -5756,8 +5778,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
 	case GRO_HELD:
 		__skb_push(skb, ETH_HLEN);
 		skb->protocol = eth_type_trans(skb, skb->dev);
-		if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
-			ret = GRO_DROP;
+		if (ret == GRO_NORMAL)
+			gro_normal_one(napi, skb);
 		break;
 
 	case GRO_DROP:
@@ -6034,6 +6056,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
+	gro_normal_list(n);
+
 	if (n->gro_bitmask) {
 		unsigned long timeout = 0;
 
@@ -6119,10 +6143,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
 	 * Ideally, a new ndo_busy_poll_stop() could avoid another round.
 	 */
 	rc = napi->poll(napi, BUSY_POLL_BUDGET);
+	/* We can't gro_normal_list() here, because napi->poll() might have
+	 * rearmed the napi (napi_complete_done()) in which case it could
+	 * already be running on another CPU.
+	 */
 	trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
 	netpoll_poll_unlock(have_poll_lock);
-	if (rc == BUSY_POLL_BUDGET)
+	if (rc == BUSY_POLL_BUDGET) {
+		/* As the whole budget was spent, we still own the napi so can
+		 * safely handle the rx_list.
+		 */
+		gro_normal_list(napi);
 		__napi_schedule(napi);
+	}
 	local_bh_enable();
 }
 
@@ -6167,6 +6200,7 @@ restart:
 		}
 		work = napi_poll(napi, BUSY_POLL_BUDGET);
 		trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
+		gro_normal_list(napi);
 count:
 		if (work > 0)
 			__NET_ADD_STATS(dev_net(napi->dev),
@@ -6272,6 +6306,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 	napi->timer.function = napi_watchdog;
 	init_gro_hash(napi);
 	napi->skb = NULL;
+	INIT_LIST_HEAD(&napi->rx_list);
+	napi->rx_count = 0;
 	napi->poll = poll;
 	if (weight > NAPI_POLL_WEIGHT)
 		netdev_err_once(dev, "%s() called with weight %d\n", __func__,
@@ -6368,6 +6404,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 		goto out_unlock;
 	}
 
+	gro_normal_list(n);
+
 	if (n->gro_bitmask) {
 		/* flush too old packets
 		 * If HZ < 1000, flush all packets.
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 8da5b3a54dac..eb29e5adc84d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_do_static_key,
 	},
+	{
+		.procname	= "gro_normal_batch",
+		.data		= &gro_normal_batch,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From 4e481908c51bf02457aecdedc2d80e1be22e0146 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 7 Aug 2019 09:13:52 +0800
Subject: flow_offload: move tc indirect block to flow offload

move tc indirect block to flow_offload and rename
it to flow indirect block.The nf_tables can use the
indr block architecture.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  10 +-
 .../net/ethernet/netronome/nfp/flower/offload.c    |  11 +-
 include/net/flow_offload.h                         |  29 +++
 include/net/pkt_cls.h                              |  35 ---
 include/net/sch_generic.h                          |   3 -
 net/core/flow_offload.c                            | 215 ++++++++++++++++++
 net/sched/cls_api.c                                | 240 +++------------------
 7 files changed, 280 insertions(+), 263 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index fbb9de633578..b7f113e996e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -781,9 +781,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 {
 	int err;
 
-	err = __tc_indr_block_cb_register(netdev, rpriv,
-					  mlx5e_rep_indr_setup_tc_cb,
-					  rpriv);
+	err = __flow_indr_block_cb_register(netdev, rpriv,
+					    mlx5e_rep_indr_setup_tc_cb,
+					    rpriv);
 	if (err) {
 		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 
@@ -796,8 +796,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
 static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
 					    struct net_device *netdev)
 {
-	__tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
-				      rpriv);
+	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb,
+					rpriv);
 }
 
 static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index ff8a9f1a38f8..3a4f4f042ae7 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1649,16 +1649,17 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
 		return NOTIFY_OK;
 
 	if (event == NETDEV_REGISTER) {
-		err = __tc_indr_block_cb_register(netdev, app,
-						  nfp_flower_indr_setup_tc_cb,
-						  app);
+		err = __flow_indr_block_cb_register(netdev, app,
+						    nfp_flower_indr_setup_tc_cb,
+						    app);
 		if (err)
 			nfp_flower_cmsg_warn(app,
 					     "Indirect block reg failed - %s\n",
 					     netdev->name);
 	} else if (event == NETDEV_UNREGISTER) {
-		__tc_indr_block_cb_unregister(netdev,
-					      nfp_flower_indr_setup_tc_cb, app);
+		__flow_indr_block_cb_unregister(netdev,
+						nfp_flower_indr_setup_tc_cb,
+						app);
 	}
 
 	return NOTIFY_OK;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index d3b12bc8a114..46b8777ad05d 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <net/flow_dissector.h>
+#include <linux/rhashtable.h>
 
 struct flow_match {
 	struct flow_dissector	*dissector;
@@ -370,4 +371,32 @@ static inline void flow_block_init(struct flow_block *flow_block)
 	INIT_LIST_HEAD(&flow_block->cb_list);
 }
 
+typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
+				      enum tc_setup_type type, void *type_data);
+
+typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
+					flow_indr_block_bind_cb_t *cb,
+					void *cb_priv,
+					enum flow_block_command command);
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb, void *cb_ident);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident);
+
+void flow_indr_block_call(struct net_device *dev,
+			  flow_indr_block_ing_cmd_t *cb,
+			  struct flow_block_offload *bo,
+			  enum flow_block_command command);
+
 #endif /* _NET_FLOW_OFFLOAD_H */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809ca90d..0790a4ed909c 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
 	return block->q;
 }
 
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident);
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident);
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident);
-
 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		 struct tcf_result *res, bool compat_mode);
 
@@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
 {
 }
 
-static inline
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	return 0;
-}
-
-static inline
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
-static inline
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-}
-
 static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			       struct tcf_result *res, bool compat_mode)
 {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6b6b01234dd9..d9f359af0b93 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -23,9 +23,6 @@ struct tcf_walker;
 struct module;
 struct bpf_flow_keys;
 
-typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
-				    enum tc_setup_type type, void *type_data);
-
 struct qdisc_rate_table {
 	struct tc_ratespec rate;
 	u32		data[256];
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d63b970784dc..4cc18e462d50 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -2,6 +2,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <net/flow_offload.h>
+#include <linux/rtnetlink.h>
 
 struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
@@ -280,3 +281,217 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 	}
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
+
+static struct rhashtable indr_setup_block_ht;
+
+struct flow_indr_block_cb {
+	struct list_head list;
+	void *cb_priv;
+	flow_indr_block_bind_cb_t *cb;
+	void *cb_ident;
+};
+
+struct flow_indr_block_dev {
+	struct rhash_head ht_node;
+	struct net_device *dev;
+	unsigned int refcnt;
+	flow_indr_block_ing_cmd_t  *block_ing_cmd_cb;
+	struct list_head cb_list;
+};
+
+static const struct rhashtable_params flow_indr_setup_block_ht_params = {
+	.key_offset	= offsetof(struct flow_indr_block_dev, dev),
+	.head_offset	= offsetof(struct flow_indr_block_dev, ht_node),
+	.key_len	= sizeof(struct net_device *),
+};
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_lookup(struct net_device *dev)
+{
+	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
+				      flow_indr_setup_block_ht_params);
+}
+
+static struct flow_indr_block_dev *
+flow_indr_block_dev_get(struct net_device *dev)
+{
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (indr_dev)
+		goto inc_ref;
+
+	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	INIT_LIST_HEAD(&indr_dev->cb_list);
+	indr_dev->dev = dev;
+	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+				   flow_indr_setup_block_ht_params)) {
+		kfree(indr_dev);
+		return NULL;
+	}
+
+inc_ref:
+	indr_dev->refcnt++;
+	return indr_dev;
+}
+
+static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
+{
+	if (--indr_dev->refcnt)
+		return;
+
+	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
+			       flow_indr_setup_block_ht_params);
+	kfree(indr_dev);
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
+			  flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		if (indr_block_cb->cb == cb &&
+		    indr_block_cb->cb_ident == cb_ident)
+			return indr_block_cb;
+	return NULL;
+}
+
+static struct flow_indr_block_cb *
+flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
+		       flow_indr_block_bind_cb_t *cb, void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (indr_block_cb)
+		return ERR_PTR(-EEXIST);
+
+	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
+	if (!indr_block_cb)
+		return ERR_PTR(-ENOMEM);
+
+	indr_block_cb->cb_priv = cb_priv;
+	indr_block_cb->cb = cb;
+	indr_block_cb->cb_ident = cb_ident;
+	list_add(&indr_block_cb->list, &indr_dev->cb_list);
+
+	return indr_block_cb;
+}
+
+static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
+{
+	list_del(&indr_block_cb->list);
+	kfree(indr_block_cb);
+}
+
+int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				  flow_indr_block_bind_cb_t *cb,
+				  void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+	int err;
+
+	indr_dev = flow_indr_block_dev_get(dev);
+	if (!indr_dev)
+		return -ENOMEM;
+
+	indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
+	err = PTR_ERR_OR_ZERO(indr_block_cb);
+	if (err)
+		goto err_dev_put;
+
+	if (indr_dev->block_ing_cmd_cb)
+		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+					   indr_block_cb->cb_priv,
+					   FLOW_BLOCK_BIND);
+
+	return 0;
+
+err_dev_put:
+	flow_indr_block_dev_put(indr_dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
+
+int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
+				flow_indr_block_bind_cb_t *cb,
+				void *cb_ident)
+{
+	int err;
+
+	rtnl_lock();
+	err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
+
+void __flow_indr_block_cb_unregister(struct net_device *dev,
+				     flow_indr_block_bind_cb_t *cb,
+				     void *cb_ident)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
+	if (!indr_block_cb)
+		return;
+
+	if (indr_dev->block_ing_cmd_cb)
+		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
+					   indr_block_cb->cb_priv,
+					   FLOW_BLOCK_UNBIND);
+
+	flow_indr_block_cb_del(indr_block_cb);
+	flow_indr_block_dev_put(indr_dev);
+}
+EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
+
+void flow_indr_block_cb_unregister(struct net_device *dev,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_ident)
+{
+	rtnl_lock();
+	__flow_indr_block_cb_unregister(dev, cb, cb_ident);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
+
+void flow_indr_block_call(struct net_device *dev,
+			  flow_indr_block_ing_cmd_t cb,
+			  struct flow_block_offload *bo,
+			  enum flow_block_command command)
+{
+	struct flow_indr_block_cb *indr_block_cb;
+	struct flow_indr_block_dev *indr_dev;
+
+	indr_dev = flow_indr_block_dev_lookup(dev);
+	if (!indr_dev)
+		return;
+
+	indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
+				     ? cb : NULL;
+
+	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
+		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
+				  bo);
+}
+EXPORT_SYMBOL_GPL(flow_indr_block_call);
+
+static int __init init_flow_indr_rhashtable(void)
+{
+	return rhashtable_init(&indr_setup_block_ht,
+			       &flow_indr_setup_block_ht_params);
+}
+subsys_initcall(init_flow_indr_rhashtable);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 7c34fc6851c3..0b0dde26783d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -37,6 +37,7 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
 #include <net/tc_act/tc_mpls.h>
+#include <net/flow_offload.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
 
@@ -545,139 +546,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 	}
 }
 
-static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
-{
-	const struct Qdisc_class_ops *cops;
-	struct Qdisc *qdisc;
-
-	if (!dev_ingress_queue(dev))
-		return NULL;
-
-	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
-	if (!qdisc)
-		return NULL;
-
-	cops = qdisc->ops->cl_ops;
-	if (!cops)
-		return NULL;
-
-	if (!cops->tcf_block)
-		return NULL;
-
-	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
-}
-
-static struct rhashtable indr_setup_block_ht;
-
-struct tc_indr_block_dev {
-	struct rhash_head ht_node;
-	struct net_device *dev;
-	unsigned int refcnt;
-	struct list_head cb_list;
-};
-
-struct tc_indr_block_cb {
-	struct list_head list;
-	void *cb_priv;
-	tc_indr_block_bind_cb_t *cb;
-	void *cb_ident;
-};
-
-static const struct rhashtable_params tc_indr_setup_block_ht_params = {
-	.key_offset	= offsetof(struct tc_indr_block_dev, dev),
-	.head_offset	= offsetof(struct tc_indr_block_dev, ht_node),
-	.key_len	= sizeof(struct net_device *),
-};
-
-static struct tc_indr_block_dev *
-tc_indr_block_dev_lookup(struct net_device *dev)
-{
-	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
-				      tc_indr_setup_block_ht_params);
-}
-
-static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
-{
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (indr_dev)
-		goto inc_ref;
-
-	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
-	if (!indr_dev)
-		return NULL;
-
-	INIT_LIST_HEAD(&indr_dev->cb_list);
-	indr_dev->dev = dev;
-	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-				   tc_indr_setup_block_ht_params)) {
-		kfree(indr_dev);
-		return NULL;
-	}
-
-inc_ref:
-	indr_dev->refcnt++;
-	return indr_dev;
-}
-
-static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
-{
-	if (--indr_dev->refcnt)
-		return;
-
-	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-			       tc_indr_setup_block_ht_params);
-	kfree(indr_dev);
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
-			tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		if (indr_block_cb->cb == cb &&
-		    indr_block_cb->cb_ident == cb_ident)
-			return indr_block_cb;
-	return NULL;
-}
-
-static struct tc_indr_block_cb *
-tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
-		     tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (indr_block_cb)
-		return ERR_PTR(-EEXIST);
-
-	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
-	if (!indr_block_cb)
-		return ERR_PTR(-ENOMEM);
-
-	indr_block_cb->cb_priv = cb_priv;
-	indr_block_cb->cb = cb;
-	indr_block_cb->cb_ident = cb_ident;
-	list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
-	return indr_block_cb;
-}
-
-static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
-{
-	list_del(&indr_block_cb->list);
-	kfree(indr_block_cb);
-}
-
 static int tcf_block_setup(struct tcf_block *block,
 			   struct flow_block_offload *bo);
 
 static void tc_indr_block_ing_cmd(struct net_device *dev,
 				  struct tcf_block *block,
-				  tc_indr_block_bind_cb_t *cb,
+				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_priv,
 				  enum flow_block_command command)
 {
@@ -699,97 +573,40 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
 	tcf_block_setup(block, &bo);
 }
 
-int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-	struct tcf_block *block;
-	int err;
-
-	indr_dev = tc_indr_block_dev_get(dev);
-	if (!indr_dev)
-		return -ENOMEM;
-
-	indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
-	err = PTR_ERR_OR_ZERO(indr_block_cb);
-	if (err)
-		goto err_dev_put;
-
-	block = tc_dev_ingress_block(dev);
-	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
-			      indr_block_cb->cb_priv, FLOW_BLOCK_BIND);
-	return 0;
-
-err_dev_put:
-	tc_indr_block_dev_put(indr_dev);
-	return err;
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
-
-int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-			      tc_indr_block_bind_cb_t *cb, void *cb_ident)
+static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
 {
-	int err;
-
-	rtnl_lock();
-	err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
-	rtnl_unlock();
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
+	const struct Qdisc_class_ops *cops;
+	struct Qdisc *qdisc;
 
-void __tc_indr_block_cb_unregister(struct net_device *dev,
-				   tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-	struct tcf_block *block;
+	if (!dev_ingress_queue(dev))
+		return NULL;
 
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
+	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
+	if (!qdisc)
+		return NULL;
 
-	indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (!indr_block_cb)
-		return;
+	cops = qdisc->ops->cl_ops;
+	if (!cops)
+		return NULL;
 
-	/* Send unbind message if required to free any block cbs. */
-	block = tc_dev_ingress_block(dev);
-	tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb,
-			      indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND);
-	tc_indr_block_cb_del(indr_block_cb);
-	tc_indr_block_dev_put(indr_dev);
-}
-EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
+	if (!cops->tcf_block)
+		return NULL;
 
-void tc_indr_block_cb_unregister(struct net_device *dev,
-				 tc_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	rtnl_lock();
-	__tc_indr_block_cb_unregister(dev, cb, cb_ident);
-	rtnl_unlock();
+	return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
 }
-EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
 
-static void flow_indr_block_call(struct net_device *dev,
-				 struct flow_block_offload *bo,
-				 enum flow_block_command command)
+static void tc_indr_block_get_and_ing_cmd(struct net_device *dev,
+					  flow_indr_block_bind_cb_t *cb,
+					  void *cb_priv,
+					  enum flow_block_command command)
 {
-	struct tc_indr_block_cb *indr_block_cb;
-	struct tc_indr_block_dev *indr_dev;
-
-	indr_dev = tc_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
+	struct tcf_block *block = tc_dev_ingress_block(dev);
 
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
-				  bo);
+	tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command);
 }
 
-static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
+static void tc_indr_block_call(struct tcf_block *block,
+			       struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
 			       enum flow_block_command command,
 			       struct netlink_ext_ack *extack)
@@ -804,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	flow_indr_block_call(dev, &bo, command);
+	flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
 	tcf_block_setup(block, &bo);
 }
 
@@ -3378,11 +3195,6 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
-	err = rhashtable_init(&indr_setup_block_ht,
-			      &tc_indr_setup_block_ht_params);
-	if (err)
-		goto err_rhash_setup_block_ht;
-
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
@@ -3396,8 +3208,6 @@ static int __init tc_filter_init(void)
 
 	return 0;
 
-err_rhash_setup_block_ht:
-	unregister_pernet_subsys(&tcf_net_ops);
 err_register_pernet_subsys:
 	destroy_workqueue(tc_filter_wq);
 	return err;
-- 
cgit v1.2.3


From 1150ab0f1b333ca310431dac65d8fa403b8471da Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 7 Aug 2019 09:13:53 +0800
Subject: flow_offload: support get multi-subsystem block

It provide a callback list to find the blocks of tc
and nft subsystems

Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h | 10 ++++++++-
 net/core/flow_offload.c    | 51 ++++++++++++++++++++++++++++++++++------------
 net/sched/cls_api.c        |  9 +++++++-
 3 files changed, 55 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 46b8777ad05d..e8069b6c474c 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -379,6 +379,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev,
 					void *cb_priv,
 					enum flow_block_command command);
 
+struct flow_indr_block_ing_entry {
+	flow_indr_block_ing_cmd_t *cb;
+	struct list_head	list;
+};
+
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry);
+
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_ident);
@@ -395,7 +404,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
 				   void *cb_ident);
 
 void flow_indr_block_call(struct net_device *dev,
-			  flow_indr_block_ing_cmd_t *cb,
 			  struct flow_block_offload *bo,
 			  enum flow_block_command command);
 
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 4cc18e462d50..64c3d4d72b9c 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -3,6 +3,7 @@
 #include <linux/slab.h>
 #include <net/flow_offload.h>
 #include <linux/rtnetlink.h>
+#include <linux/mutex.h>
 
 struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
@@ -282,6 +283,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
 
+static LIST_HEAD(block_ing_cb_list);
+
 static struct rhashtable indr_setup_block_ht;
 
 struct flow_indr_block_cb {
@@ -295,7 +298,6 @@ struct flow_indr_block_dev {
 	struct rhash_head ht_node;
 	struct net_device *dev;
 	unsigned int refcnt;
-	flow_indr_block_ing_cmd_t  *block_ing_cmd_cb;
 	struct list_head cb_list;
 };
 
@@ -389,6 +391,20 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
 	kfree(indr_block_cb);
 }
 
+static void flow_block_ing_cmd(struct net_device *dev,
+			       flow_indr_block_bind_cb_t *cb,
+			       void *cb_priv,
+			       enum flow_block_command command)
+{
+	struct flow_indr_block_ing_entry *entry;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, &block_ing_cb_list, list) {
+		entry->cb(dev, cb, cb_priv, command);
+	}
+	rcu_read_unlock();
+}
+
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_ident)
@@ -406,10 +422,8 @@ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 	if (err)
 		goto err_dev_put;
 
-	if (indr_dev->block_ing_cmd_cb)
-		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
-					   indr_block_cb->cb_priv,
-					   FLOW_BLOCK_BIND);
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_BIND);
 
 	return 0;
 
@@ -448,10 +462,8 @@ void __flow_indr_block_cb_unregister(struct net_device *dev,
 	if (!indr_block_cb)
 		return;
 
-	if (indr_dev->block_ing_cmd_cb)
-		indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb,
-					   indr_block_cb->cb_priv,
-					   FLOW_BLOCK_UNBIND);
+	flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
+			   FLOW_BLOCK_UNBIND);
 
 	flow_indr_block_cb_del(indr_block_cb);
 	flow_indr_block_dev_put(indr_dev);
@@ -469,7 +481,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev,
 EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
 
 void flow_indr_block_call(struct net_device *dev,
-			  flow_indr_block_ing_cmd_t cb,
 			  struct flow_block_offload *bo,
 			  enum flow_block_command command)
 {
@@ -480,15 +491,29 @@ void flow_indr_block_call(struct net_device *dev,
 	if (!indr_dev)
 		return;
 
-	indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND
-				     ? cb : NULL;
-
 	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
 		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 				  bo);
 }
 EXPORT_SYMBOL_GPL(flow_indr_block_call);
 
+static DEFINE_MUTEX(flow_indr_block_ing_cb_lock);
+void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_add_tail_rcu(&entry->list, &block_ing_cb_list);
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb);
+
+void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry)
+{
+	mutex_lock(&flow_indr_block_ing_cb_lock);
+	list_del_rcu(&entry->list);
+	mutex_unlock(&flow_indr_block_ing_cb_lock);
+}
+EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb);
+
 static int __init init_flow_indr_rhashtable(void)
 {
 	return rhashtable_init(&indr_setup_block_ht,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b0dde26783d..e0d8b456e9f5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -621,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block,
 	};
 	INIT_LIST_HEAD(&bo.cb_list);
 
-	flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command);
+	flow_indr_block_call(dev, &bo, command);
 	tcf_block_setup(block, &bo);
 }
 
@@ -3183,6 +3183,11 @@ static struct pernet_operations tcf_net_ops = {
 	.size = sizeof(struct tcf_net),
 };
 
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb = tc_indr_block_get_and_ing_cmd,
+	.list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
 static int __init tc_filter_init(void)
 {
 	int err;
@@ -3195,6 +3200,8 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
+	flow_indr_add_block_ing_cb(&block_ing_entry);
+
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
-- 
cgit v1.2.3


From 9a32669fecfb484a1f78fe48d0e42a5efccb0675 Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 7 Aug 2019 09:13:54 +0800
Subject: netfilter: nf_tables_offload: support indr block call

nftable support indr-block call. It makes nftable an offload vlan
and tunnel device.

nft add table netdev firewall
nft add chain netdev firewall aclout { type filter hook ingress offload device mlx_pf0vf0 priority - 300 \; }
nft add rule netdev firewall aclout ip daddr 10.0.0.1 fwd to vlan0
nft add chain netdev firewall aclin { type filter hook ingress device vlan0 priority - 300 \; }
nft add rule netdev firewall aclin ip daddr 10.0.0.7 fwd to mlx_pf0vf0

Signed-off-by: wenxu <wenxu@ucloud.cn>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_tables_offload.h |   4 +
 net/netfilter/nf_tables_api.c             |   7 ++
 net/netfilter/nf_tables_offload.c         | 148 +++++++++++++++++++++++++-----
 3 files changed, 135 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663a10e3..bffd51a90343 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -63,6 +63,10 @@ struct nft_rule;
 struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+				    flow_indr_block_bind_cb_t *cb,
+				    void *cb_priv,
+				    enum flow_block_command command);
 
 #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
 	(__reg)->base_offset	=					\
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 605a7cfe7ca7..fe3b7b0c6c66 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7593,6 +7593,11 @@ static struct pernet_operations nf_tables_net_ops = {
 	.exit	= nf_tables_exit_net,
 };
 
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb = nft_indr_block_get_and_ing_cmd,
+	.list = LIST_HEAD_INIT(block_ing_entry.list),
+};
+
 static int __init nf_tables_module_init(void)
 {
 	int err;
@@ -7624,6 +7629,7 @@ static int __init nf_tables_module_init(void)
 		goto err5;
 
 	nft_chain_route_init();
+	flow_indr_add_block_ing_cb(&block_ing_entry);
 	return err;
 err5:
 	rhltable_destroy(&nft_objname_ht);
@@ -7640,6 +7646,7 @@ err1:
 
 static void __exit nf_tables_module_exit(void)
 {
+	flow_indr_del_block_ing_cb(&block_ing_entry);
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 	nft_chain_filter_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64f5fd5f240e..d3c4c9c88bc8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -171,24 +171,110 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
 	return 0;
 }
 
+static int nft_block_setup(struct nft_base_chain *basechain,
+			   struct flow_block_offload *bo,
+			   enum flow_block_command cmd)
+{
+	int err;
+
+	switch (cmd) {
+	case FLOW_BLOCK_BIND:
+		err = nft_flow_offload_bind(bo, basechain);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = nft_flow_offload_unbind(bo, basechain);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int nft_block_offload_cmd(struct nft_base_chain *chain,
+				 struct net_device *dev,
+				 enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+	int err;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
+static void nft_indr_block_ing_cmd(struct net_device *dev,
+				   struct nft_base_chain *chain,
+				   flow_indr_block_bind_cb_t *cb,
+				   void *cb_priv,
+				   enum flow_block_command cmd)
+{
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo = {};
+
+	if (!chain)
+		return;
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
+
+	nft_block_setup(chain, &bo, cmd);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+				      struct net_device *dev,
+				      enum flow_block_command cmd)
+{
+	struct flow_block_offload bo = {};
+	struct netlink_ext_ack extack = {};
+
+	bo.net = dev_net(dev);
+	bo.block = &chain->flow_block;
+	bo.command = cmd;
+	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+	bo.extack = &extack;
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	flow_indr_block_call(dev, &bo, cmd);
+
+	if (list_empty(&bo.cb_list))
+		return -EOPNOTSUPP;
+
+	return nft_block_setup(chain, &bo, cmd);
+}
+
 #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
 
 static int nft_flow_offload_chain(struct nft_trans *trans,
 				  enum flow_block_command cmd)
 {
 	struct nft_chain *chain = trans->ctx.chain;
-	struct netlink_ext_ack extack = {};
-	struct flow_block_offload bo = {};
 	struct nft_base_chain *basechain;
 	struct net_device *dev;
-	int err;
 
 	if (!nft_is_base_chain(chain))
 		return -EOPNOTSUPP;
 
 	basechain = nft_base_chain(chain);
 	dev = basechain->ops.dev;
-	if (!dev || !dev->netdev_ops->ndo_setup_tc)
+	if (!dev)
 		return -EOPNOTSUPP;
 
 	/* Only default policy to accept is supported for now. */
@@ -197,26 +283,10 @@ static int nft_flow_offload_chain(struct nft_trans *trans,
 	    nft_trans_chain_policy(trans) != NF_ACCEPT)
 		return -EOPNOTSUPP;
 
-	bo.command = cmd;
-	bo.block = &basechain->flow_block;
-	bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
-	bo.extack = &extack;
-	INIT_LIST_HEAD(&bo.cb_list);
-
-	err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo);
-	if (err < 0)
-		return err;
-
-	switch (cmd) {
-	case FLOW_BLOCK_BIND:
-		err = nft_flow_offload_bind(&bo, basechain);
-		break;
-	case FLOW_BLOCK_UNBIND:
-		err = nft_flow_offload_unbind(&bo, basechain);
-		break;
-	}
-
-	return err;
+	if (dev->netdev_ops->ndo_setup_tc)
+		return nft_block_offload_cmd(basechain, dev, cmd);
+	else
+		return nft_indr_block_offload_cmd(basechain, dev, cmd);
 }
 
 int nft_flow_rule_offload_commit(struct net *net)
@@ -266,3 +336,33 @@ int nft_flow_rule_offload_commit(struct net *net)
 
 	return err;
 }
+
+void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
+				    flow_indr_block_bind_cb_t *cb,
+				    void *cb_priv,
+				    enum flow_block_command command)
+{
+	struct net *net = dev_net(dev);
+	const struct nft_table *table;
+	const struct nft_chain *chain;
+
+	list_for_each_entry_rcu(table, &net->nft.tables, list) {
+		if (table->family != NFPROTO_NETDEV)
+			continue;
+
+		list_for_each_entry_rcu(chain, &table->chains, list) {
+			if (nft_is_base_chain(chain)) {
+				struct nft_base_chain *basechain;
+
+				basechain = nft_base_chain(chain);
+				if (!strncmp(basechain->dev_name, dev->name,
+					     IFNAMSIZ)) {
+					nft_indr_block_ing_cmd(dev, basechain,
+							       cb, cb_priv,
+							       command);
+					return;
+				}
+			}
+		}
+	}
+}
-- 
cgit v1.2.3


From 65526f638efc6435b9658d45a3ddd07e6fdbbb7d Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Wed, 31 Jul 2019 16:05:54 +0300
Subject: crypto: gcm - helper functions for assoclen/authsize check

Added inline helper functions to check authsize and assoclen for
gcm, rfc4106 and rfc4543.
These are used in the generic implementation of gcm, rfc4106 and
rfc4543.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/gcm.c         | 41 ++++++++++++++-------------------------
 include/crypto/gcm.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/crypto/gcm.c b/crypto/gcm.c
index f254e2d4c206..2f3b50f8f3e0 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -152,20 +152,7 @@ out:
 static int crypto_gcm_setauthsize(struct crypto_aead *tfm,
 				  unsigned int authsize)
 {
-	switch (authsize) {
-	case 4:
-	case 8:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
+	return crypto_gcm_check_authsize(authsize);
 }
 
 static void crypto_gcm_init_common(struct aead_request *req)
@@ -762,15 +749,11 @@ static int crypto_rfc4106_setauthsize(struct crypto_aead *parent,
 				      unsigned int authsize)
 {
 	struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(parent);
+	int err;
 
-	switch (authsize) {
-	case 8:
-	case 12:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
+	err = crypto_rfc4106_check_authsize(authsize);
+	if (err)
+		return err;
 
 	return crypto_aead_setauthsize(ctx->child, authsize);
 }
@@ -818,8 +801,11 @@ static struct aead_request *crypto_rfc4106_crypt(struct aead_request *req)
 
 static int crypto_rfc4106_encrypt(struct aead_request *req)
 {
-	if (req->assoclen != 16 && req->assoclen != 20)
-		return -EINVAL;
+	int err;
+
+	err = crypto_ipsec_check_assoclen(req->assoclen);
+	if (err)
+		return err;
 
 	req = crypto_rfc4106_crypt(req);
 
@@ -828,8 +814,11 @@ static int crypto_rfc4106_encrypt(struct aead_request *req)
 
 static int crypto_rfc4106_decrypt(struct aead_request *req)
 {
-	if (req->assoclen != 16 && req->assoclen != 20)
-		return -EINVAL;
+	int err;
+
+	err = crypto_ipsec_check_assoclen(req->assoclen);
+	if (err)
+		return err;
 
 	req = crypto_rfc4106_crypt(req);
 
diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h
index c50e057ea17e..9d7eff04f224 100644
--- a/include/crypto/gcm.h
+++ b/include/crypto/gcm.h
@@ -1,8 +1,63 @@
 #ifndef _CRYPTO_GCM_H
 #define _CRYPTO_GCM_H
 
+#include <linux/errno.h>
+
 #define GCM_AES_IV_SIZE 12
 #define GCM_RFC4106_IV_SIZE 8
 #define GCM_RFC4543_IV_SIZE 8
 
+/*
+ * validate authentication tag for GCM
+ */
+static inline int crypto_gcm_check_authsize(unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * validate authentication tag for RFC4106
+ */
+static inline int crypto_rfc4106_check_authsize(unsigned int authsize)
+{
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * validate assoclen for RFC4106/RFC4543
+ */
+static inline int crypto_ipsec_check_assoclen(unsigned int assoclen)
+{
+	switch (assoclen) {
+	case 16:
+	case 20:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
 #endif
-- 
cgit v1.2.3


From bc67d04e75260942fb534fb91673103dcad7ca96 Mon Sep 17 00:00:00 2001
From: Iuliana Prodan <iuliana.prodan@nxp.com>
Date: Wed, 31 Jul 2019 16:05:55 +0300
Subject: crypto: aes - helper function to validate key length for AES
 algorithms

Add inline helper function to check key length for AES algorithms.
The key can be 128, 192 or 256 bits size.
This function is used in the generic aes implementation.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/aes.h | 17 +++++++++++++++++
 lib/crypto/aes.c     |  8 ++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 8e0f4cf948e5..2090729701ab 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -31,6 +31,23 @@ struct crypto_aes_ctx {
 extern const u32 crypto_ft_tab[4][256] ____cacheline_aligned;
 extern const u32 crypto_it_tab[4][256] ____cacheline_aligned;
 
+/*
+ * validate key length for AES algorithms
+ */
+static inline int aes_check_keylen(unsigned int keylen)
+{
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_192:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
 
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
index 4e100af38c51..827fe89922ff 100644
--- a/lib/crypto/aes.c
+++ b/lib/crypto/aes.c
@@ -187,11 +187,11 @@ int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
 {
 	u32 kwords = key_len / sizeof(u32);
 	u32 rc, i, j;
+	int err;
 
-	if (key_len != AES_KEYSIZE_128 &&
-	    key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256)
-		return -EINVAL;
+	err = aes_check_keylen(key_len);
+	if (err)
+		return err;
 
 	ctx->key_length = key_len;
 
-- 
cgit v1.2.3


From 76067459c686c4fc6352613e5a6a54e4ffef2861 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Wed, 7 Aug 2019 10:03:12 +0200
Subject: net: stmmac: Implement RSS and enable it in XGMAC core

Implement the RSS functionality and add the corresponding callbacks in
XGMAC core.

Changes from v1:
	- Do not use magic constants (Jakub)
	- Use ethtool_rxfh_indir_default() (Jakub)

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |  5 ++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     | 22 ++++++-
 .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    | 52 +++++++++++++++
 .../net/ethernet/stmicro/stmmac/dwxgmac2_descs.c   | 29 +++++++++
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c |  1 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h         | 11 ++++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |  9 +++
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   | 75 ++++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  | 41 +++++++++++-
 include/linux/stmmac.h                             |  1 +
 10 files changed, 242 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ed872eed1cab..45a997fe571c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -354,6 +354,7 @@ struct dma_features {
 	unsigned int frpbs;
 	unsigned int frpes;
 	unsigned int addr64;
+	unsigned int rssen;
 };
 
 /* GMAC TX FIFO is 8K, Rx FIFO is 16K */
@@ -381,6 +382,10 @@ struct dma_features {
 
 #define JUMBO_LEN		9000
 
+/* Receive Side Scaling */
+#define STMMAC_RSS_HASH_KEY_SIZE	40
+#define STMMAC_RSS_MAX_TABLE_SIZE	256
+
 extern const struct stmmac_desc_ops enh_desc_ops;
 extern const struct stmmac_desc_ops ndesc_ops;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index b77091161765..ed3a85f73a72 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -89,6 +89,7 @@
 #define XGMAC_HWFEAT_RWKSEL		BIT(6)
 #define XGMAC_HWFEAT_GMIISEL		BIT(1)
 #define XGMAC_HW_FEATURE1		0x00000120
+#define XGMAC_HWFEAT_RSSEN		BIT(20)
 #define XGMAC_HWFEAT_TSOEN		BIT(18)
 #define XGMAC_HWFEAT_ADDR64		GENMASK(15, 14)
 #define XGMAC_HWFEAT_TXFIFOSIZE		GENMASK(10, 6)
@@ -109,6 +110,17 @@
 #define XGMAC_DCS_SHIFT			16
 #define XGMAC_ADDRx_LOW(x)		(0x00000304 + (x) * 0x8)
 #define XGMAC_ARP_ADDR			0x00000c10
+#define XGMAC_RSS_CTRL			0x00000c80
+#define XGMAC_UDP4TE			BIT(3)
+#define XGMAC_TCP4TE			BIT(2)
+#define XGMAC_IP2TE			BIT(1)
+#define XGMAC_RSSE			BIT(0)
+#define XGMAC_RSS_ADDR			0x00000c88
+#define XGMAC_RSSIA_SHIFT		8
+#define XGMAC_ADDRT			BIT(2)
+#define XGMAC_CT			BIT(1)
+#define XGMAC_OB			BIT(0)
+#define XGMAC_RSS_DATA			0x00000c8c
 #define XGMAC_TIMESTAMP_STATUS		0x00000d20
 #define XGMAC_TXTSC			BIT(15)
 #define XGMAC_TXTIMESTAMP_NSEC		0x00000d30
@@ -125,8 +137,9 @@
 #define XGMAC_MTL_INT_STATUS		0x00001020
 #define XGMAC_MTL_RXQ_DMA_MAP0		0x00001030
 #define XGMAC_MTL_RXQ_DMA_MAP1		0x00001034
-#define XGMAC_QxMDMACH(x)		GENMASK((x) * 8 + 3, (x) * 8)
+#define XGMAC_QxMDMACH(x)		GENMASK((x) * 8 + 7, (x) * 8)
 #define XGMAC_QxMDMACH_SHIFT(x)		((x) * 8)
+#define XGMAC_QDDMACH			BIT(7)
 #define XGMAC_TC_PRTY_MAP0		0x00001040
 #define XGMAC_TC_PRTY_MAP1		0x00001044
 #define XGMAC_PSTC(x)			GENMASK((x) * 8 + 7, (x) * 8)
@@ -261,6 +274,13 @@
 #define XGMAC_RDES3_IOC			BIT(30)
 #define XGMAC_RDES3_LD			BIT(28)
 #define XGMAC_RDES3_CDA			BIT(27)
+#define XGMAC_RDES3_RSV			BIT(26)
+#define XGMAC_RDES3_L34T		GENMASK(23, 20)
+#define XGMAC_RDES3_L34T_SHIFT		20
+#define XGMAC_L34T_IP4TCP		0x1
+#define XGMAC_L34T_IP4UDP		0x2
+#define XGMAC_L34T_IP6TCP		0x9
+#define XGMAC_L34T_IP6UDP		0xA
 #define XGMAC_RDES3_ES			BIT(15)
 #define XGMAC_RDES3_PL			GENMASK(13, 0)
 #define XGMAC_RDES3_TSD			BIT(6)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index bfbd5ae11540..04eec85acc59 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -6,6 +6,7 @@
 
 #include <linux/bitrev.h>
 #include <linux/crc32.h>
+#include <linux/iopoll.h>
 #include "stmmac.h"
 #include "dwxgmac2.h"
 
@@ -439,6 +440,56 @@ static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable)
 	writel(value, ioaddr + XGMAC_RX_CONFIG);
 }
 
+static int dwxgmac2_rss_write_reg(void __iomem *ioaddr, bool is_key, int idx,
+				  u32 val)
+{
+	u32 ctrl = 0;
+
+	writel(val, ioaddr + XGMAC_RSS_DATA);
+	ctrl |= idx << XGMAC_RSSIA_SHIFT;
+	ctrl |= is_key ? XGMAC_ADDRT : 0x0;
+	ctrl |= XGMAC_OB;
+	writel(ctrl, ioaddr + XGMAC_RSS_ADDR);
+
+	return readl_poll_timeout(ioaddr + XGMAC_RSS_ADDR, ctrl,
+				  !(ctrl & XGMAC_OB), 100, 10000);
+}
+
+static int dwxgmac2_rss_configure(struct mac_device_info *hw,
+				  struct stmmac_rss *cfg, u32 num_rxq)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 *key = (u32 *)cfg->key;
+	int i, ret;
+	u32 value;
+
+	value = readl(ioaddr + XGMAC_RSS_CTRL);
+	if (!cfg->enable) {
+		value &= ~XGMAC_RSSE;
+		writel(value, ioaddr + XGMAC_RSS_CTRL);
+		return 0;
+	}
+
+	for (i = 0; i < (sizeof(cfg->key) / sizeof(u32)); i++) {
+		ret = dwxgmac2_rss_write_reg(ioaddr, true, i, *key++);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(cfg->table); i++) {
+		ret = dwxgmac2_rss_write_reg(ioaddr, false, i, cfg->table[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < num_rxq; i++)
+		dwxgmac2_map_mtl_to_dma(hw, i, XGMAC_QDDMACH);
+
+	value |= XGMAC_UDP4TE | XGMAC_TCP4TE | XGMAC_IP2TE | XGMAC_RSSE;
+	writel(value, ioaddr + XGMAC_RSS_CTRL);
+	return 0;
+}
+
 const struct stmmac_ops dwxgmac210_ops = {
 	.core_init = dwxgmac2_core_init,
 	.set_mac = dwxgmac2_set_mac,
@@ -469,6 +520,7 @@ const struct stmmac_ops dwxgmac210_ops = {
 	.debug = NULL,
 	.set_filter = dwxgmac2_set_filter,
 	.set_mac_loopback = dwxgmac2_set_mac_loopback,
+	.rss_configure = dwxgmac2_rss_configure,
 };
 
 int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index c4c45402b8f8..8c5dd6a36157 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -254,6 +254,34 @@ static void dwxgmac2_clear(struct dma_desc *p)
 	p->des3 = 0;
 }
 
+static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
+				enum pkt_hash_types *type)
+{
+	unsigned int rdes3 = le32_to_cpu(p->des3);
+	u32 ptype;
+
+	if (rdes3 & XGMAC_RDES3_RSV) {
+		ptype = (rdes3 & XGMAC_RDES3_L34T) >> XGMAC_RDES3_L34T_SHIFT;
+
+		switch (ptype) {
+		case XGMAC_L34T_IP4TCP:
+		case XGMAC_L34T_IP4UDP:
+		case XGMAC_L34T_IP6TCP:
+		case XGMAC_L34T_IP6UDP:
+			*type = PKT_HASH_TYPE_L4;
+			break;
+		default:
+			*type = PKT_HASH_TYPE_L3;
+			break;
+		}
+
+		*hash = le32_to_cpu(p->des1);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
 const struct stmmac_desc_ops dwxgmac210_desc_ops = {
 	.tx_status = dwxgmac2_get_tx_status,
 	.rx_status = dwxgmac2_get_rx_status,
@@ -277,4 +305,5 @@ const struct stmmac_desc_ops dwxgmac210_desc_ops = {
 	.get_addr = dwxgmac2_get_addr,
 	.set_addr = dwxgmac2_set_addr,
 	.clear = dwxgmac2_clear,
+	.get_rx_hash = dwxgmac2_get_rx_hash,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 0f1c772e892a..45a6634ee397 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -363,6 +363,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
 
 	/* MAC HW feature 1 */
 	hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1);
+	dma_cap->rssen = (hw_cap & XGMAC_HWFEAT_RSSEN) >> 20;
 	dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18;
 
 	dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 00539a09d1db..bfe7efee9481 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -86,6 +86,9 @@ struct stmmac_desc_ops {
 	void (*set_addr)(struct dma_desc *p, dma_addr_t addr);
 	/* clear descriptor */
 	void (*clear)(struct dma_desc *p);
+	/* RSS */
+	int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
+			   enum pkt_hash_types *type);
 };
 
 #define stmmac_init_rx_desc(__priv, __args...) \
@@ -136,6 +139,8 @@ struct stmmac_desc_ops {
 	stmmac_do_void_callback(__priv, desc, set_addr, __args)
 #define stmmac_clear_desc(__priv, __args...) \
 	stmmac_do_void_callback(__priv, desc, clear, __args)
+#define stmmac_get_rx_hash(__priv, __args...) \
+	stmmac_do_callback(__priv, desc, get_rx_hash, __args)
 
 struct stmmac_dma_cfg;
 struct dma_features;
@@ -249,6 +254,7 @@ struct rgmii_adv;
 struct stmmac_safety_stats;
 struct stmmac_tc_entry;
 struct stmmac_pps_cfg;
+struct stmmac_rss;
 
 /* Helpers to program the MAC core */
 struct stmmac_ops {
@@ -327,6 +333,9 @@ struct stmmac_ops {
 			       u32 sub_second_inc, u32 systime_flags);
 	/* Loopback for selftests */
 	void (*set_mac_loopback)(void __iomem *ioaddr, bool enable);
+	/* RSS */
+	int (*rss_configure)(struct mac_device_info *hw,
+			     struct stmmac_rss *cfg, u32 num_rxq);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -397,6 +406,8 @@ struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, flex_pps_config, __args)
 #define stmmac_set_mac_loopback(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args)
+#define stmmac_rss_configure(__priv, __args...) \
+	stmmac_do_callback(__priv, mac, rss_configure, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 5cd966c154f3..d2f6f56ae29c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -113,6 +113,12 @@ struct stmmac_pps_cfg {
 	struct timespec64 period;
 };
 
+struct stmmac_rss {
+	int enable;
+	u8 key[STMMAC_RSS_HASH_KEY_SIZE];
+	u32 table[STMMAC_RSS_MAX_TABLE_SIZE];
+};
+
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
 	u32 tx_coal_frames;
@@ -203,6 +209,9 @@ struct stmmac_priv {
 
 	/* Pulse Per Second output */
 	struct stmmac_pps_cfg pps[STMMAC_PPS_MAX];
+
+	/* Receive Side Scaling */
+	struct stmmac_rss rss;
 };
 
 enum stmmac_state {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index d294590cba27..2423160ab582 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -764,6 +764,76 @@ static int stmmac_set_coalesce(struct net_device *dev,
 	return 0;
 }
 
+static int stmmac_get_rxnfc(struct net_device *dev,
+			    struct ethtool_rxnfc *rxnfc, u32 *rule_locs)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	switch (rxnfc->cmd) {
+	case ETHTOOL_GRXRINGS:
+		rxnfc->data = priv->plat->rx_queues_to_use;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static u32 stmmac_get_rxfh_key_size(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return sizeof(priv->rss.key);
+}
+
+static u32 stmmac_get_rxfh_indir_size(struct net_device *dev)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+
+	return ARRAY_SIZE(priv->rss.table);
+}
+
+static int stmmac_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+			   u8 *hfunc)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+			indir[i] = priv->rss.table[i];
+	}
+
+	if (key)
+		memcpy(key, priv->rss.key, sizeof(priv->rss.key));
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	return 0;
+}
+
+static int stmmac_set_rxfh(struct net_device *dev, const u32 *indir,
+			   const u8 *key, const u8 hfunc)
+{
+	struct stmmac_priv *priv = netdev_priv(dev);
+	int i;
+
+	if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && (hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (indir) {
+		for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+			priv->rss.table[i] = indir[i];
+	}
+
+	if (key)
+		memcpy(priv->rss.key, key, sizeof(priv->rss.key));
+
+	return stmmac_rss_configure(priv, priv->hw, &priv->rss,
+				    priv->plat->rx_queues_to_use);
+}
+
 static int stmmac_get_ts_info(struct net_device *dev,
 			      struct ethtool_ts_info *info)
 {
@@ -855,6 +925,11 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.get_eee = stmmac_ethtool_op_get_eee,
 	.set_eee = stmmac_ethtool_op_set_eee,
 	.get_sset_count	= stmmac_get_sset_count,
+	.get_rxnfc = stmmac_get_rxnfc,
+	.get_rxfh_key_size = stmmac_get_rxfh_key_size,
+	.get_rxfh_indir_size = stmmac_get_rxfh_indir_size,
+	.get_rxfh = stmmac_get_rxfh,
+	.set_rxfh = stmmac_set_rxfh,
 	.get_ts_info = stmmac_get_ts_info,
 	.get_coalesce = stmmac_get_coalesce,
 	.set_coalesce = stmmac_set_coalesce,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index fd54c7c87485..404a0548f213 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2417,6 +2417,22 @@ static void stmmac_mac_config_rx_queues_routing(struct stmmac_priv *priv)
 	}
 }
 
+static void stmmac_mac_config_rss(struct stmmac_priv *priv)
+{
+	if (!priv->dma_cap.rssen || !priv->plat->rss_en) {
+		priv->rss.enable = false;
+		return;
+	}
+
+	if (priv->dev->features & NETIF_F_RXHASH)
+		priv->rss.enable = true;
+	else
+		priv->rss.enable = false;
+
+	stmmac_rss_configure(priv, priv->hw, &priv->rss,
+			     priv->plat->rx_queues_to_use);
+}
+
 /**
  *  stmmac_mtl_configuration - Configure MTL
  *  @priv: driver private structure
@@ -2461,6 +2477,10 @@ static void stmmac_mtl_configuration(struct stmmac_priv *priv)
 	/* Set RX routing */
 	if (rx_queues_count > 1)
 		stmmac_mac_config_rx_queues_routing(priv);
+
+	/* Receive Side Scaling */
+	if (rx_queues_count > 1)
+		stmmac_mac_config_rss(priv);
 }
 
 static void stmmac_safety_feat_configuration(struct stmmac_priv *priv)
@@ -3385,9 +3405,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			priv->dev->stats.rx_errors++;
 			buf->page = NULL;
 		} else {
+			enum pkt_hash_types hash_type;
 			struct sk_buff *skb;
-			int frame_len;
 			unsigned int des;
+			int frame_len;
+			u32 hash;
 
 			stmmac_get_desc_addr(priv, p, &des);
 			frame_len = stmmac_get_rx_frame_len(priv, p, coe);
@@ -3452,6 +3474,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+			if (!stmmac_get_rx_hash(priv, p, &hash, &hash_type))
+				skb_set_hash(skb, hash, hash_type);
+
+			skb_record_rx_queue(skb, queue);
 			napi_gro_receive(&ch->rx_napi, skb);
 
 			/* Data payload copied into SKB, page ready for recycle */
@@ -4175,8 +4201,8 @@ int stmmac_dvr_probe(struct device *device,
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
-	u32 queue, maxq;
-	int ret = 0;
+	u32 queue, rxq, maxq;
+	int i, ret = 0;
 
 	ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
 				       MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES);
@@ -4284,6 +4310,15 @@ int stmmac_dvr_probe(struct device *device,
 #endif
 	priv->msg_enable = netif_msg_init(debug, default_msg_level);
 
+	/* Initialize RSS */
+	rxq = priv->plat->rx_queues_to_use;
+	netdev_rss_key_fill(priv->rss.key, sizeof(priv->rss.key));
+	for (i = 0; i < ARRAY_SIZE(priv->rss.table); i++)
+		priv->rss.table[i] = ethtool_rxfh_indir_default(i, rxq);
+
+	if (priv->dma_cap.rssen && priv->plat->rss_en)
+		ndev->features |= NETIF_F_RXHASH;
+
 	/* MTU range: 46 - hw-specific max */
 	ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
 	if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 7b3e354bcd3c..5cc6b6faf359 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -173,6 +173,7 @@ struct plat_stmmacenet_data {
 	int has_gmac4;
 	bool has_sun8i;
 	bool tso_en;
+	int rss_en;
 	int mac_port_sel_speed;
 	bool en_tx_lpi_clockgating;
 	int has_xgmac;
-- 
cgit v1.2.3


From b2a3974253d32374af556541141d7fdad8fe2ce0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 21 Jul 2019 16:36:49 +0900
Subject: usb: dwc3: omap: squash include/linux/platform_data/dwc3-omap.h

This enum is only used in drivers/usb/dwc3/dwc3-omap3.c

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/dwc3/dwc3-omap.c            |  7 +++++-
 include/linux/platform_data/dwc3-omap.h | 43 ---------------------------------
 2 files changed, 6 insertions(+), 44 deletions(-)
 delete mode 100644 include/linux/platform_data/dwc3-omap.h

(limited to 'include')

diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
index 84c459c15222..c2940dbfb683 100644
--- a/drivers/usb/dwc3/dwc3-omap.c
+++ b/drivers/usb/dwc3/dwc3-omap.c
@@ -14,7 +14,6 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/dwc3-omap.h>
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 #include <linux/ioport.h>
@@ -106,6 +105,12 @@
 #define USBOTGSS_UTMI_OTG_CTRL_SESSVALID	BIT(2)
 #define USBOTGSS_UTMI_OTG_CTRL_VBUSVALID	BIT(1)
 
+enum dwc3_omap_utmi_mode {
+	DWC3_OMAP_UTMI_MODE_UNKNOWN = 0,
+	DWC3_OMAP_UTMI_MODE_HW,
+	DWC3_OMAP_UTMI_MODE_SW,
+};
+
 struct dwc3_omap {
 	struct device		*dev;
 
diff --git a/include/linux/platform_data/dwc3-omap.h b/include/linux/platform_data/dwc3-omap.h
deleted file mode 100644
index 1d36ca874cc8..000000000000
--- a/include/linux/platform_data/dwc3-omap.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * dwc3-omap.h - OMAP Specific Glue layer, header.
- *
- * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com
- * All rights reserved.
- *
- * Author: Felipe Balbi <balbi@ti.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The names of the above-listed copyright holders may not be used
- *    to endorse or promote products derived from this software without
- *    specific prior written permission.
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2, as published by the Free
- * Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-enum dwc3_omap_utmi_mode {
-	DWC3_OMAP_UTMI_MODE_UNKNOWN = 0,
-	DWC3_OMAP_UTMI_MODE_HW,
-	DWC3_OMAP_UTMI_MODE_SW,
-};
-- 
cgit v1.2.3


From 414776621d1006e57e80e6db7fdc3837897aaa64 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 7 Aug 2019 17:03:59 -0700
Subject: net/tls: prevent skb_orphan() from leaking TLS plain text with
 offload

sk_validate_xmit_skb() and drivers depend on the sk member of
struct sk_buff to identify segments requiring encryption.
Any operation which removes or does not preserve the original TLS
socket such as skb_orphan() or skb_clone() will cause clear text
leaks.

Make the TCP socket underlying an offloaded TLS connection
mark all skbs as decrypted, if TLS TX is in offload mode.
Then in sk_validate_xmit_skb() catch skbs which have no socket
(or a socket with no validation) and decrypted flag set.

Note that CONFIG_SOCK_VALIDATE_XMIT, CONFIG_TLS_DEVICE and
sk->sk_validate_xmit_skb are slightly interchangeable right now,
they all imply TLS offload. The new checks are guarded by
CONFIG_TLS_DEVICE because that's the option guarding the
sk_buff->decrypted member.

Second, smaller issue with orphaning is that it breaks
the guarantee that packets will be delivered to device
queues in-order. All TLS offload drivers depend on that
scheduling property. This means skb_orphan_partial()'s
trick of preserving partial socket references will cause
issues in the drivers. We need a full orphan, and as a
result netem delay/throttling will cause all TLS offload
skbs to be dropped.

Reusing the sk_buff->decrypted flag also protects from
leaking clear text when incoming, decrypted skb is redirected
(e.g. by TC).

See commit 0608c69c9a80 ("bpf: sk_msg, sock{map|hash} redirect
through ULP") for justification why the internal flag is safe.
The only location which could leak the flag in is tcp_bpf_sendmsg(),
which is taken care of by clearing the previously unused bit.

v2:
 - remove superfluous decrypted mark copy (Willem);
 - remove the stale doc entry (Boris);
 - rely entirely on EOR marking to prevent coalescing (Boris);
 - use an internal sendpages flag instead of marking the socket
   (Boris).
v3 (Willem):
 - reorganize the can_skb_orphan_partial() condition;
 - fix the flag leak-in through tcp_bpf_sendmsg.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tls-offload.rst | 18 ------------------
 include/linux/skbuff.h                   |  8 ++++++++
 include/linux/socket.h                   |  3 +++
 include/net/sock.h                       | 10 +++++++++-
 net/core/sock.c                          | 19 ++++++++++++++-----
 net/ipv4/tcp.c                           |  3 +++
 net/ipv4/tcp_bpf.c                       |  6 +++++-
 net/ipv4/tcp_output.c                    |  3 +++
 net/tls/tls_device.c                     |  9 +++++++--
 9 files changed, 52 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index b70b70dc4524..0dd3f748239f 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -506,21 +506,3 @@ Drivers should ignore the changes to TLS the device feature flags.
 These flags will be acted upon accordingly by the core ``ktls`` code.
 TLS device feature flags only control adding of new TLS connection
 offloads, old connections will remain active after flags are cleared.
-
-Known bugs
-==========
-
-skb_orphan() leaks clear text
------------------------------
-
-Currently drivers depend on the :c:member:`sk` member of
-:c:type:`struct sk_buff <sk_buff>` to identify segments requiring
-encryption. Any operation which removes or does not preserve the socket
-association such as :c:func:`skb_orphan` or :c:func:`skb_clone`
-will cause the driver to miss the packets and lead to clear text leaks.
-
-Redirects leak clear text
--------------------------
-
-In the RX direction, if segment has already been decrypted by the device
-and it gets redirected or mirrored - clear text will be transmitted out.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8af86d995d6..ba5583522d24 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1374,6 +1374,14 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
 	to->l4_hash = from->l4_hash;
 };
 
+static inline void skb_copy_decrypted(struct sk_buff *to,
+				      const struct sk_buff *from)
+{
+#ifdef CONFIG_TLS_DEVICE
+	to->decrypted = from->decrypted;
+#endif
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 97523818cb14..fc0bed59fc84 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -292,6 +292,9 @@ struct ucred {
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
 #define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
+#define MSG_SENDPAGE_DECRYPTED	0x100000 /* sendpage() internal : page may carry
+					  * plain text and require encryption
+					  */
 
 #define MSG_ZEROCOPY	0x4000000	/* Use user data in kernel path */
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
diff --git a/include/net/sock.h b/include/net/sock.h
index 228db3998e46..2c53f1a1d905 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2482,6 +2482,7 @@ static inline bool sk_fullsock(const struct sock *sk)
 
 /* Checks if this SKB belongs to an HW offloaded socket
  * and whether any SW fallbacks are required based on dev.
+ * Check decrypted mark in case skb_orphan() cleared socket.
  */
 static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
 						   struct net_device *dev)
@@ -2489,8 +2490,15 @@ static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb,
 #ifdef CONFIG_SOCK_VALIDATE_XMIT
 	struct sock *sk = skb->sk;
 
-	if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb)
+	if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) {
 		skb = sk->sk_validate_xmit_skb(sk, dev, skb);
+#ifdef CONFIG_TLS_DEVICE
+	} else if (unlikely(skb->decrypted)) {
+		pr_warn_ratelimited("unencrypted skb with no associated socket - dropping\n");
+		kfree_skb(skb);
+		skb = NULL;
+#endif
+	}
 #endif
 
 	return skb;
diff --git a/net/core/sock.c b/net/core/sock.c
index d57b0cc995a0..6d08553f885c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 }
 EXPORT_SYMBOL(skb_set_owner_w);
 
+static bool can_skb_orphan_partial(const struct sk_buff *skb)
+{
+#ifdef CONFIG_TLS_DEVICE
+	/* Drivers depend on in-order delivery for crypto offload,
+	 * partial orphan breaks out-of-order-OK logic.
+	 */
+	if (skb->decrypted)
+		return false;
+#endif
+	return (skb->destructor == sock_wfree ||
+		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
+}
+
 /* This helper is used by netem, as it can hold packets in its
  * delay queue. We want to allow the owner socket to send more
  * packets, as if they were already TX completed by a typical driver.
@@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb)
 	if (skb_is_tcp_pure_ack(skb))
 		return;
 
-	if (skb->destructor == sock_wfree
-#ifdef CONFIG_INET
-	    || skb->destructor == tcp_wfree
-#endif
-		) {
+	if (can_skb_orphan_partial(skb)) {
 		struct sock *sk = skb->sk;
 
 		if (refcount_inc_not_zero(&sk->sk_refcnt)) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 776905899ac0..77b485d60b9d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -984,6 +984,9 @@ new_segment:
 			if (!skb)
 				goto wait_for_memory;
 
+#ifdef CONFIG_TLS_DEVICE
+			skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
+#endif
 			skb_entail(sk, skb);
 			copy = size_goal;
 		}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 3d1e15401384..8a56e09cfb0e 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -398,10 +398,14 @@ more_data:
 static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_msg tmp, *msg_tx = NULL;
-	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
 	int copied = 0, err = 0;
 	struct sk_psock *psock;
 	long timeo;
+	int flags;
+
+	/* Don't let internal do_tcp_sendpages() flags through */
+	flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
+	flags |= MSG_NO_SHARED_FRAGS;
 
 	psock = sk_psock_get(sk);
 	if (unlikely(!psock))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6e4afc48d7bb..979520e46e33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1320,6 +1320,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 	buff = sk_stream_alloc_skb(sk, nsize, gfp, true);
 	if (!buff)
 		return -ENOMEM; /* We'll just try again later. */
+	skb_copy_decrypted(buff, skb);
 
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
@@ -1874,6 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	buff = sk_stream_alloc_skb(sk, 0, gfp, true);
 	if (unlikely(!buff))
 		return -ENOMEM;
+	skb_copy_decrypted(buff, skb);
 
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
@@ -2143,6 +2145,7 @@ static int tcp_mtu_probe(struct sock *sk)
 	sk_mem_charge(sk, nskb->truesize);
 
 	skb = tcp_send_head(sk);
+	skb_copy_decrypted(nskb, skb);
 
 	TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
 	TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 7c0b2b778703..43922d86e510 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -373,9 +373,9 @@ static int tls_push_data(struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_prot_info *prot = &tls_ctx->prot_info;
 	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
-	int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
 	int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
 	struct tls_record_info *record = ctx->open_record;
+	int tls_push_record_flags;
 	struct page_frag *pfrag;
 	size_t orig_size = size;
 	u32 max_open_record_len;
@@ -390,6 +390,9 @@ static int tls_push_data(struct sock *sk,
 	if (sk->sk_err)
 		return -sk->sk_err;
 
+	flags |= MSG_SENDPAGE_DECRYPTED;
+	tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
+
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 	if (tls_is_partially_sent_record(tls_ctx)) {
 		rc = tls_push_partial_record(sk, tls_ctx, flags);
@@ -576,7 +579,9 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
 		gfp_t sk_allocation = sk->sk_allocation;
 
 		sk->sk_allocation = GFP_ATOMIC;
-		tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL);
+		tls_push_partial_record(sk, ctx,
+					MSG_DONTWAIT | MSG_NOSIGNAL |
+					MSG_SENDPAGE_DECRYPTED);
 		sk->sk_allocation = sk_allocation;
 	}
 }
-- 
cgit v1.2.3


From 7794f486ed0b1fa8022dd0a27b9babf86a46d1cf Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 7 Aug 2019 10:29:50 -0400
Subject: usbfs: Add ioctls for runtime power management

It has been requested that usbfs should implement runtime power
management, instead of forcing the device to remain at full power as
long as the device file is open.  This patch introduces that new
feature.

It does so by adding three new usbfs ioctls:

	USBDEVFS_FORBID_SUSPEND: Prevents the device from going into
	runtime suspend (and causes a resume if the device is already
	suspended).

	USBDEVFS_ALLOW_SUSPEND: Allows the device to go into runtime
	suspend.  Some time may elapse before the device actually is
	suspended, depending on things like the autosuspend delay.

	USBDEVFS_WAIT_FOR_RESUME: Blocks until the call is interrupted
	by a signal or at least one runtime resume has occurred since
	the most recent ALLOW_SUSPEND ioctl call (which may mean
	immediately, even if the device is currently suspended).  In
	the latter case, the device is prevented from suspending again
	just as if FORBID_SUSPEND was called before the ioctl returns.

For backward compatibility, when the device file is first opened
runtime suspends are forbidden.  The userspace program can then allow
suspends whenever it wants, and either resume the device directly (by
forbidding suspends again) or wait for a resume from some other source
(such as a remote wakeup).  URBs submitted to a suspended device will
fail or will complete with an appropriate error code.

This combination of ioctls is sufficient for user programs to have
nearly the same degree of control over a device's runtime power
behavior as kernel drivers do.

Still lacking is documentation for the new ioctls.  I intend to add it
later, after the existing documentation for the usbfs userspace API is
straightened out into a reasonable form.

Suggested-by: Mayuresh Kulkarni <mkulkarni@opensource.cirrus.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1908071013220.1514-100000@iolanthe.rowland.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c          | 96 +++++++++++++++++++++++++++++++++++++--
 drivers/usb/core/generic.c        |  5 ++
 drivers/usb/core/usb.h            |  3 ++
 include/uapi/linux/usbdevice_fs.h |  3 ++
 4 files changed, 102 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index b265ab5405f9..cdd34dcb2395 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -48,6 +48,9 @@
 #define USB_DEVICE_MAX			(USB_MAXBUS * 128)
 #define USB_SG_SIZE			16384 /* split-size for large txs */
 
+/* Mutual exclusion for ps->list in resume vs. release and remove */
+static DEFINE_MUTEX(usbfs_mutex);
+
 struct usb_dev_state {
 	struct list_head list;      /* state list */
 	struct usb_device *dev;
@@ -57,14 +60,17 @@ struct usb_dev_state {
 	struct list_head async_completed;
 	struct list_head memory_list;
 	wait_queue_head_t wait;     /* wake up if a request completed */
+	wait_queue_head_t wait_for_resume;   /* wake up upon runtime resume */
 	unsigned int discsignr;
 	struct pid *disc_pid;
 	const struct cred *cred;
 	sigval_t disccontext;
 	unsigned long ifclaimed;
 	u32 disabled_bulk_eps;
-	bool privileges_dropped;
 	unsigned long interface_allowed_mask;
+	int not_yet_resumed;
+	bool suspend_allowed;
+	bool privileges_dropped;
 };
 
 struct usb_memory {
@@ -694,9 +700,7 @@ static void driver_disconnect(struct usb_interface *intf)
 	destroy_async_on_interface(ps, ifnum);
 }
 
-/* The following routines are merely placeholders.  There is no way
- * to inform a user task about suspend or resumes.
- */
+/* We don't care about suspend/resume of claimed interfaces */
 static int driver_suspend(struct usb_interface *intf, pm_message_t msg)
 {
 	return 0;
@@ -707,12 +711,32 @@ static int driver_resume(struct usb_interface *intf)
 	return 0;
 }
 
+/* The following routines apply to the entire device, not interfaces */
+void usbfs_notify_suspend(struct usb_device *udev)
+{
+	/* We don't need to handle this */
+}
+
+void usbfs_notify_resume(struct usb_device *udev)
+{
+	struct usb_dev_state *ps;
+
+	/* Protect against simultaneous remove or release */
+	mutex_lock(&usbfs_mutex);
+	list_for_each_entry(ps, &udev->filelist, list) {
+		WRITE_ONCE(ps->not_yet_resumed, 0);
+		wake_up_all(&ps->wait_for_resume);
+	}
+	mutex_unlock(&usbfs_mutex);
+}
+
 struct usb_driver usbfs_driver = {
 	.name =		"usbfs",
 	.probe =	driver_probe,
 	.disconnect =	driver_disconnect,
 	.suspend =	driver_suspend,
 	.resume =	driver_resume,
+	.supports_autosuspend = 1,
 };
 
 static int claimintf(struct usb_dev_state *ps, unsigned int ifnum)
@@ -997,9 +1021,12 @@ static int usbdev_open(struct inode *inode, struct file *file)
 	INIT_LIST_HEAD(&ps->async_completed);
 	INIT_LIST_HEAD(&ps->memory_list);
 	init_waitqueue_head(&ps->wait);
+	init_waitqueue_head(&ps->wait_for_resume);
 	ps->disc_pid = get_pid(task_pid(current));
 	ps->cred = get_current_cred();
 	smp_wmb();
+
+	/* Can't race with resume; the device is already active */
 	list_add_tail(&ps->list, &dev->filelist);
 	file->private_data = ps;
 	usb_unlock_device(dev);
@@ -1025,7 +1052,10 @@ static int usbdev_release(struct inode *inode, struct file *file)
 	usb_lock_device(dev);
 	usb_hub_release_all_ports(dev, ps);
 
+	/* Protect against simultaneous resume */
+	mutex_lock(&usbfs_mutex);
 	list_del_init(&ps->list);
+	mutex_unlock(&usbfs_mutex);
 
 	for (ifnum = 0; ps->ifclaimed && ifnum < 8*sizeof(ps->ifclaimed);
 			ifnum++) {
@@ -1033,7 +1063,8 @@ static int usbdev_release(struct inode *inode, struct file *file)
 			releaseintf(ps, ifnum);
 	}
 	destroy_all_async(ps);
-	usb_autosuspend_device(dev);
+	if (!ps->suspend_allowed)
+		usb_autosuspend_device(dev);
 	usb_unlock_device(dev);
 	usb_put_dev(dev);
 	put_pid(ps->disc_pid);
@@ -2384,6 +2415,47 @@ static int proc_drop_privileges(struct usb_dev_state *ps, void __user *arg)
 	return 0;
 }
 
+static int proc_forbid_suspend(struct usb_dev_state *ps)
+{
+	int ret = 0;
+
+	if (ps->suspend_allowed) {
+		ret = usb_autoresume_device(ps->dev);
+		if (ret == 0)
+			ps->suspend_allowed = false;
+		else if (ret != -ENODEV)
+			ret = -EIO;
+	}
+	return ret;
+}
+
+static int proc_allow_suspend(struct usb_dev_state *ps)
+{
+	if (!connected(ps))
+		return -ENODEV;
+
+	WRITE_ONCE(ps->not_yet_resumed, 1);
+	if (!ps->suspend_allowed) {
+		usb_autosuspend_device(ps->dev);
+		ps->suspend_allowed = true;
+	}
+	return 0;
+}
+
+static int proc_wait_for_resume(struct usb_dev_state *ps)
+{
+	int ret;
+
+	usb_unlock_device(ps->dev);
+	ret = wait_event_interruptible(ps->wait_for_resume,
+			READ_ONCE(ps->not_yet_resumed) == 0);
+	usb_lock_device(ps->dev);
+
+	if (ret != 0)
+		return -EINTR;
+	return proc_forbid_suspend(ps);
+}
+
 /*
  * NOTE:  All requests here that have interface numbers as parameters
  * are assuming that somehow the configuration has been prevented from
@@ -2578,6 +2650,15 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
 	case USBDEVFS_GET_SPEED:
 		ret = ps->dev->speed;
 		break;
+	case USBDEVFS_FORBID_SUSPEND:
+		ret = proc_forbid_suspend(ps);
+		break;
+	case USBDEVFS_ALLOW_SUSPEND:
+		ret = proc_allow_suspend(ps);
+		break;
+	case USBDEVFS_WAIT_FOR_RESUME:
+		ret = proc_wait_for_resume(ps);
+		break;
 	}
 
 	/* Handle variable-length commands */
@@ -2651,15 +2732,20 @@ static void usbdev_remove(struct usb_device *udev)
 {
 	struct usb_dev_state *ps;
 
+	/* Protect against simultaneous resume */
+	mutex_lock(&usbfs_mutex);
 	while (!list_empty(&udev->filelist)) {
 		ps = list_entry(udev->filelist.next, struct usb_dev_state, list);
 		destroy_all_async(ps);
 		wake_up_all(&ps->wait);
+		WRITE_ONCE(ps->not_yet_resumed, 0);
+		wake_up_all(&ps->wait_for_resume);
 		list_del_init(&ps->list);
 		if (ps->discsignr)
 			kill_pid_usb_asyncio(ps->discsignr, EPIPE, ps->disccontext,
 					     ps->disc_pid, ps->cred);
 	}
+	mutex_unlock(&usbfs_mutex);
 }
 
 static int usbdev_notify(struct notifier_block *self,
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 1ac9c1e5f773..38f8b3e31762 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -257,6 +257,8 @@ static int generic_suspend(struct usb_device *udev, pm_message_t msg)
 	else
 		rc = usb_port_suspend(udev, msg);
 
+	if (rc == 0)
+		usbfs_notify_suspend(udev);
 	return rc;
 }
 
@@ -273,6 +275,9 @@ static int generic_resume(struct usb_device *udev, pm_message_t msg)
 		rc = hcd_bus_resume(udev, msg);
 	else
 		rc = usb_port_resume(udev, msg);
+
+	if (rc == 0)
+		usbfs_notify_resume(udev);
 	return rc;
 }
 
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index bd8d01f85a13..2932d1ec5def 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -95,6 +95,9 @@ extern int usb_runtime_idle(struct device *dev);
 extern int usb_enable_usb2_hardware_lpm(struct usb_device *udev);
 extern int usb_disable_usb2_hardware_lpm(struct usb_device *udev);
 
+extern void usbfs_notify_suspend(struct usb_device *udev);
+extern void usbfs_notify_resume(struct usb_device *udev);
+
 #else
 
 static inline int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h
index 78efe870c2b7..d24bbb6d3ca1 100644
--- a/include/uapi/linux/usbdevice_fs.h
+++ b/include/uapi/linux/usbdevice_fs.h
@@ -223,5 +223,8 @@ struct usbdevfs_streams {
  * extending size of the data returned.
  */
 #define USBDEVFS_CONNINFO_EX(len)  _IOC(_IOC_READ, 'U', 32, len)
+#define USBDEVFS_FORBID_SUSPEND    _IO('U', 33)
+#define USBDEVFS_ALLOW_SUSPEND     _IO('U', 34)
+#define USBDEVFS_WAIT_FOR_RESUME   _IO('U', 35)
 
 #endif /* _UAPI_LINUX_USBDEVICE_FS_H */
-- 
cgit v1.2.3


From 3567894b6914813299300019e028874927210880 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 31 Jul 2019 10:40:16 +0200
Subject: clk: core: introduce clk_hw_set_parent()

Introduce the clk_hw_set_parent() provider call to change parent of
a clock by using the clk_hw pointers.

This eases the clock reparenting from clock rate notifiers and
implementing DVFS with simpler code avoiding the boilerplates
functions as __clk_lookup(clk_hw_get_name()) then clk_set_parent().

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/clk.c            | 6 ++++++
 include/linux/clk-provider.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index c0990703ce54..c11b1781d24a 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -2487,6 +2487,12 @@ runtime_put:
 	return ret;
 }
 
+int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *parent)
+{
+	return clk_core_set_parent_nolock(hw->core, parent->core);
+}
+EXPORT_SYMBOL_GPL(clk_hw_set_parent);
+
 /**
  * clk_set_parent - switch the parent of a mux clk
  * @clk: the mux clk whose input we are switching
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 2ae7604783dd..dce5521a9bf6 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -817,6 +817,7 @@ unsigned int clk_hw_get_num_parents(const struct clk_hw *hw);
 struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw);
 struct clk_hw *clk_hw_get_parent_by_index(const struct clk_hw *hw,
 					  unsigned int index);
+int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *new_parent);
 unsigned int __clk_get_enable_count(struct clk *clk);
 unsigned long clk_hw_get_rate(const struct clk_hw *hw);
 unsigned long __clk_get_flags(struct clk *clk);
-- 
cgit v1.2.3


From 85ab9d954698961960240622de4fad85c7d8a61e Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Wed, 31 Jul 2019 10:40:19 +0200
Subject: clk: meson: g12a: expose CPUB clock ID for G12B

Expose the CPUB clock id to add DVFS to the second CPU cluster of
the Amlogic G12B SoC.

Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 drivers/clk/meson/g12a.h              | 1 -
 include/dt-bindings/clock/g12a-clkc.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/meson/g12a.h b/drivers/clk/meson/g12a.h
index c8aed31fbe17..559a34cfdfeb 100644
--- a/drivers/clk/meson/g12a.h
+++ b/drivers/clk/meson/g12a.h
@@ -216,7 +216,6 @@
 #define CLKID_CPUB_CLK_DYN1_DIV			221
 #define CLKID_CPUB_CLK_DYN1			222
 #define CLKID_CPUB_CLK_DYN			223
-#define CLKID_CPUB_CLK				224
 #define CLKID_CPUB_CLK_DIV16_EN			225
 #define CLKID_CPUB_CLK_DIV16			226
 #define CLKID_CPUB_CLK_DIV2			227
diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h
index b6b127e45634..8ccc29ac7a72 100644
--- a/include/dt-bindings/clock/g12a-clkc.h
+++ b/include/dt-bindings/clock/g12a-clkc.h
@@ -137,5 +137,6 @@
 #define CLKID_VDEC_HEVC				207
 #define CLKID_VDEC_HEVCF			210
 #define CLKID_TS				212
+#define CLKID_CPUB_CLK				224
 
 #endif /* __G12A_CLKC_H */
-- 
cgit v1.2.3


From 6a0a8d10a3661a036b55af695542a714c429ab7c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 9 Aug 2019 11:01:27 +0200
Subject: netfilter: nf_tables: use-after-free in failing rule with bound set

If a rule that has already a bound anonymous set fails to be added, the
preparation phase releases the rule and the bound set. However, the
transaction object from the abort path still has a reference to the set
object that is stale, leading to a use-after-free when checking for the
set->bound field. Add a new field to the transaction that specifies if
the set is bound, so the abort path can skip releasing it since the rule
command owns it and it takes care of releasing it. After this update,
the set->bound field is removed.

[   24.649883] Unable to handle kernel paging request at virtual address 0000000000040434
[   24.657858] Mem abort info:
[   24.660686]   ESR = 0x96000004
[   24.663769]   Exception class = DABT (current EL), IL = 32 bits
[   24.669725]   SET = 0, FnV = 0
[   24.672804]   EA = 0, S1PTW = 0
[   24.675975] Data abort info:
[   24.678880]   ISV = 0, ISS = 0x00000004
[   24.682743]   CM = 0, WnR = 0
[   24.685723] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000428952000
[   24.692207] [0000000000040434] pgd=0000000000000000
[   24.697119] Internal error: Oops: 96000004 [#1] SMP
[...]
[   24.889414] Call trace:
[   24.891870]  __nf_tables_abort+0x3f0/0x7a0
[   24.895984]  nf_tables_abort+0x20/0x40
[   24.899750]  nfnetlink_rcv_batch+0x17c/0x588
[   24.904037]  nfnetlink_rcv+0x13c/0x190
[   24.907803]  netlink_unicast+0x18c/0x208
[   24.911742]  netlink_sendmsg+0x1b0/0x350
[   24.915682]  sock_sendmsg+0x4c/0x68
[   24.919185]  ___sys_sendmsg+0x288/0x2c8
[   24.923037]  __sys_sendmsg+0x7c/0xd0
[   24.926628]  __arm64_sys_sendmsg+0x2c/0x38
[   24.930744]  el0_svc_common.constprop.0+0x94/0x158
[   24.935556]  el0_svc_handler+0x34/0x90
[   24.939322]  el0_svc+0x8/0xc
[   24.942216] Code: 37280300 f9404023 91014262 aa1703e0 (f9401863)
[   24.948336] ---[ end trace cebbb9dcbed3b56f ]---

Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  9 +++++++--
 net/netfilter/nf_tables_api.c     | 15 ++++++++++-----
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9b624566b82d..475d6f28ca67 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -421,8 +421,7 @@ struct nft_set {
 	unsigned char			*udata;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
-	u16				flags:13,
-					bound:1,
+	u16				flags:14,
 					genmask:2;
 	u8				klen;
 	u8				dlen;
@@ -1348,12 +1347,15 @@ struct nft_trans_rule {
 struct nft_trans_set {
 	struct nft_set			*set;
 	u32				set_id;
+	bool				bound;
 };
 
 #define nft_trans_set(trans)	\
 	(((struct nft_trans_set *)trans->data)->set)
 #define nft_trans_set_id(trans)	\
 	(((struct nft_trans_set *)trans->data)->set_id)
+#define nft_trans_set_bound(trans)	\
+	(((struct nft_trans_set *)trans->data)->bound)
 
 struct nft_trans_chain {
 	bool				update;
@@ -1384,12 +1386,15 @@ struct nft_trans_table {
 struct nft_trans_elem {
 	struct nft_set			*set;
 	struct nft_set_elem		elem;
+	bool				bound;
 };
 
 #define nft_trans_elem_set(trans)	\
 	(((struct nft_trans_elem *)trans->data)->set)
 #define nft_trans_elem(trans)	\
 	(((struct nft_trans_elem *)trans->data)->elem)
+#define nft_trans_elem_set_bound(trans)	\
+	(((struct nft_trans_elem *)trans->data)->bound)
 
 struct nft_trans_obj {
 	struct nft_object		*obj;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 605a7cfe7ca7..88abbddf8967 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -138,9 +138,14 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
 		return;
 
 	list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
-		if (trans->msg_type == NFT_MSG_NEWSET &&
-		    nft_trans_set(trans) == set) {
-			set->bound = true;
+		switch (trans->msg_type) {
+		case NFT_MSG_NEWSET:
+			if (nft_trans_set(trans) == set)
+				nft_trans_set_bound(trans) = true;
+			break;
+		case NFT_MSG_NEWSETELEM:
+			if (nft_trans_elem_set(trans) == set)
+				nft_trans_elem_set_bound(trans) = true;
 			break;
 		}
 	}
@@ -6906,7 +6911,7 @@ static int __nf_tables_abort(struct net *net)
 			break;
 		case NFT_MSG_NEWSET:
 			trans->ctx.table->use--;
-			if (nft_trans_set(trans)->bound) {
+			if (nft_trans_set_bound(trans)) {
 				nft_trans_destroy(trans);
 				break;
 			}
@@ -6918,7 +6923,7 @@ static int __nf_tables_abort(struct net *net)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSETELEM:
-			if (nft_trans_elem_set(trans)->bound) {
+			if (nft_trans_elem_set_bound(trans)) {
 				nft_trans_destroy(trans);
 				break;
 			}
-- 
cgit v1.2.3


From 284e21fab2cfcf90dacce565e0b12f29e5df00c1 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Tue, 6 Aug 2019 01:49:17 -0300
Subject: x86, s390/mm: Move sme_active() and sme_me_mask to x86-specific
 header

Now that generic code doesn't reference them, move sme_active() and
sme_me_mask to x86's <asm/mem_encrypt.h>.

Also remove the export for sme_active() since it's only used in files that
won't be built as modules. sme_me_mask on the other hand is used in
arch/x86/kvm/svm.c (via __sme_set() and __psp_pa()) which can be built as a
module so its export needs to stay.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190806044919.10622-5-bauerman@linux.ibm.com
---
 arch/s390/include/asm/mem_encrypt.h |  4 +---
 arch/x86/include/asm/mem_encrypt.h  | 10 ++++++++++
 arch/x86/mm/mem_encrypt.c           |  1 -
 include/linux/mem_encrypt.h         | 14 +-------------
 4 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 3eb018508190..ff813a56bc30 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,9 +4,7 @@
 
 #ifndef __ASSEMBLY__
 
-#define sme_me_mask	0ULL
-
-static inline bool sme_active(void) { return false; }
+static inline bool mem_encrypt_active(void) { return false; }
 extern bool sev_active(void);
 
 int set_memory_encrypted(unsigned long addr, int numpages);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 0c196c47d621..848ce43b9040 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -92,6 +92,16 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
 
 extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
 
+static inline bool mem_encrypt_active(void)
+{
+	return sme_me_mask;
+}
+
+static inline u64 sme_get_me_mask(void)
+{
+	return sme_me_mask;
+}
+
 #endif	/* __ASSEMBLY__ */
 
 #endif	/* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index fece30ca8b0c..94da5a88abe6 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -344,7 +344,6 @@ bool sme_active(void)
 {
 	return sme_me_mask && !sev_enabled;
 }
-EXPORT_SYMBOL(sme_active);
 
 bool sev_active(void)
 {
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 470bd53a89df..0c5b0ff9eb29 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -18,23 +18,11 @@
 
 #else	/* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
-#define sme_me_mask	0ULL
-
-static inline bool sme_active(void) { return false; }
+static inline bool mem_encrypt_active(void) { return false; }
 static inline bool sev_active(void) { return false; }
 
 #endif	/* CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
-static inline bool mem_encrypt_active(void)
-{
-	return sme_me_mask;
-}
-
-static inline u64 sme_get_me_mask(void)
-{
-	return sme_me_mask;
-}
-
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 /*
  * The __sme_set() and __sme_clr() macros are useful for adding or removing
-- 
cgit v1.2.3


From ae7eb82a92fae4d255a0caa9f7c0f99e3babfec1 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Date: Tue, 6 Aug 2019 01:49:18 -0300
Subject: fs/core/vmcore: Move sev_active() reference to x86 arch code

Secure Encrypted Virtualization is an x86-specific feature, so it shouldn't
appear in generic kernel code because it forces non-x86 architectures to
define the sev_active() function, which doesn't make a lot of sense.

To solve this problem, add an x86 elfcorehdr_read() function to override
the generic weak implementation. To do that, it's necessary to make
read_from_oldmem() public so that it can be used outside of vmcore.c.

Also, remove the export for sev_active() since it's only used in files that
won't be built as modules.

Signed-off-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190806044919.10622-6-bauerman@linux.ibm.com
---
 arch/x86/kernel/crash_dump_64.c |  5 +++++
 arch/x86/mm/mem_encrypt.c       |  1 -
 fs/proc/vmcore.c                |  8 ++++----
 include/linux/crash_dump.h      | 14 ++++++++++++++
 include/linux/mem_encrypt.h     |  1 -
 5 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 22369dd5de3b..045e82e8945b 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -70,3 +70,8 @@ ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
 {
 	return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true);
 }
+
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	return read_from_oldmem(buf, count, ppos, 0, sev_active());
+}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 94da5a88abe6..9268c12458c8 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -349,7 +349,6 @@ bool sev_active(void)
 {
 	return sme_me_mask && sev_enabled;
 }
-EXPORT_SYMBOL(sev_active);
 
 /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
 bool force_dma_unencrypted(struct device *dev)
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 7bcc92add72c..7b13988796e1 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -104,9 +104,9 @@ static int pfn_is_ram(unsigned long pfn)
 }
 
 /* Reads a page from the oldmem device from given offset. */
-static ssize_t read_from_oldmem(char *buf, size_t count,
-				u64 *ppos, int userbuf,
-				bool encrypted)
+ssize_t read_from_oldmem(char *buf, size_t count,
+			 u64 *ppos, int userbuf,
+			 bool encrypted)
 {
 	unsigned long pfn, offset;
 	size_t nr_bytes;
@@ -170,7 +170,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
  */
 ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
 {
-	return read_from_oldmem(buf, count, ppos, 0, sev_active());
+	return read_from_oldmem(buf, count, ppos, 0, false);
 }
 
 /*
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index f774c5eb9e3c..4664fc1871de 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -115,4 +115,18 @@ static inline int vmcore_add_device_dump(struct vmcoredd_data *data)
 	return -EOPNOTSUPP;
 }
 #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
+
+#ifdef CONFIG_PROC_VMCORE
+ssize_t read_from_oldmem(char *buf, size_t count,
+			 u64 *ppos, int userbuf,
+			 bool encrypted);
+#else
+static inline ssize_t read_from_oldmem(char *buf, size_t count,
+				       u64 *ppos, int userbuf,
+				       bool encrypted)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_PROC_VMCORE */
+
 #endif /* LINUX_CRASHDUMP_H */
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 0c5b0ff9eb29..5c4a18a91f89 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -19,7 +19,6 @@
 #else	/* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
 static inline bool mem_encrypt_active(void) { return false; }
-static inline bool sev_active(void) { return false; }
 
 #endif	/* CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
-- 
cgit v1.2.3


From 1f573cce48a2ebb35953034062ef01056b7d6a58 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 9 Aug 2019 11:20:33 +0100
Subject: device.h: Fix warnings for mismatched parameter names in comments

Fix the warnings for parameter named as "driver" instead of the actual "drv"
in the comments as reported by the kbuild robot.

Reported-by:  kbuild test robot <lkp@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20190809102033.28463-1-suzuki.poulose@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 23efaff5f10c..e32038f4ef56 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -479,7 +479,7 @@ struct device *driver_find_device(struct device_driver *drv,
 /**
  * driver_find_device_by_name - device iterator for locating a particular device
  * of a specific name.
- * @driver: the driver we're iterating
+ * @drv: the driver we're iterating
  * @name: name of the device to match
  */
 static inline struct device *driver_find_device_by_name(struct device_driver *drv,
@@ -491,7 +491,7 @@ static inline struct device *driver_find_device_by_name(struct device_driver *dr
 /**
  * driver_find_device_by_of_node- device iterator for locating a particular device
  * by of_node pointer.
- * @driver: the driver we're iterating
+ * @drv: the driver we're iterating
  * @np: of_node pointer to match.
  */
 static inline struct device *
@@ -504,7 +504,7 @@ driver_find_device_by_of_node(struct device_driver *drv,
 /**
  * driver_find_device_by_fwnode- device iterator for locating a particular device
  * by fwnode pointer.
- * @driver: the driver we're iterating
+ * @drv: the driver we're iterating
  * @fwnode: fwnode pointer to match.
  */
 static inline struct device *
@@ -536,7 +536,7 @@ static inline struct device *driver_find_next_device(struct device_driver *drv,
 /**
  * driver_find_device_by_acpi_dev : device iterator for locating a particular
  * device matching the ACPI_COMPANION device.
- * @driver: the driver we're iterating
+ * @drv: the driver we're iterating
  * @adev: ACPI_COMPANION device to match.
  */
 static inline struct device *
-- 
cgit v1.2.3


From b9c6ff94e43a0ee053e0c1d983fba1ac4953b762 Mon Sep 17 00:00:00 2001
From: "Suthikulpanit, Suravee" <Suravee.Suthikulpanit@amd.com>
Date: Tue, 23 Jul 2019 19:00:37 +0000
Subject: iommu/amd: Re-factor guest virtual APIC (de-)activation code

Re-factore the logic for activate/deactivate guest virtual APIC mode (GAM)
into helper functions, and export them for other drivers (e.g. SVM).
to support run-time activate/deactivate of SVM AVIC.

Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c       | 85 +++++++++++++++++++++++++++++------------
 drivers/iommu/amd_iommu_types.h |  9 +++++
 include/linux/amd-iommu.h       | 12 ++++++
 3 files changed, 82 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b607a92791d3..008da21a2592 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4313,13 +4313,62 @@ static const struct irq_domain_ops amd_ir_domain_ops = {
 	.deactivate = irq_remapping_deactivate,
 };
 
+int amd_iommu_activate_guest_mode(void *data)
+{
+	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+
+	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+	    !entry || entry->lo.fields_vapic.guest_mode)
+		return 0;
+
+	entry->lo.val = 0;
+	entry->hi.val = 0;
+
+	entry->lo.fields_vapic.guest_mode  = 1;
+	entry->lo.fields_vapic.ga_log_intr = 1;
+	entry->hi.fields.ga_root_ptr       = ir_data->ga_root_ptr;
+	entry->hi.fields.vector            = ir_data->ga_vector;
+	entry->lo.fields_vapic.ga_tag      = ir_data->ga_tag;
+
+	return modify_irte_ga(ir_data->irq_2_irte.devid,
+			      ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_activate_guest_mode);
+
+int amd_iommu_deactivate_guest_mode(void *data)
+{
+	struct amd_ir_data *ir_data = (struct amd_ir_data *)data;
+	struct irte_ga *entry = (struct irte_ga *) ir_data->entry;
+	struct irq_cfg *cfg = ir_data->cfg;
+
+	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) ||
+	    !entry || !entry->lo.fields_vapic.guest_mode)
+		return 0;
+
+	entry->lo.val = 0;
+	entry->hi.val = 0;
+
+	entry->lo.fields_remap.dm          = apic->irq_dest_mode;
+	entry->lo.fields_remap.int_type    = apic->irq_delivery_mode;
+	entry->hi.fields.vector            = cfg->vector;
+	entry->lo.fields_remap.destination =
+				APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
+	entry->hi.fields.destination =
+				APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
+
+	return modify_irte_ga(ir_data->irq_2_irte.devid,
+			      ir_data->irq_2_irte.index, entry, NULL);
+}
+EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode);
+
 static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 {
+	int ret;
 	struct amd_iommu *iommu;
 	struct amd_iommu_pi_data *pi_data = vcpu_info;
 	struct vcpu_data *vcpu_pi_info = pi_data->vcpu_data;
 	struct amd_ir_data *ir_data = data->chip_data;
-	struct irte_ga *irte = (struct irte_ga *) ir_data->entry;
 	struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
 	struct iommu_dev_data *dev_data = search_dev_data(irte_info->devid);
 
@@ -4330,6 +4379,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 	if (!dev_data || !dev_data->use_vapic)
 		return 0;
 
+	ir_data->cfg = irqd_cfg(data);
 	pi_data->ir_data = ir_data;
 
 	/* Note:
@@ -4348,37 +4398,24 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info)
 
 	pi_data->prev_ga_tag = ir_data->cached_ga_tag;
 	if (pi_data->is_guest_mode) {
-		/* Setting */
-		irte->hi.fields.ga_root_ptr = (pi_data->base >> 12);
-		irte->hi.fields.vector = vcpu_pi_info->vector;
-		irte->lo.fields_vapic.ga_log_intr = 1;
-		irte->lo.fields_vapic.guest_mode = 1;
-		irte->lo.fields_vapic.ga_tag = pi_data->ga_tag;
-
-		ir_data->cached_ga_tag = pi_data->ga_tag;
+		ir_data->ga_root_ptr = (pi_data->base >> 12);
+		ir_data->ga_vector = vcpu_pi_info->vector;
+		ir_data->ga_tag = pi_data->ga_tag;
+		ret = amd_iommu_activate_guest_mode(ir_data);
+		if (!ret)
+			ir_data->cached_ga_tag = pi_data->ga_tag;
 	} else {
-		/* Un-Setting */
-		struct irq_cfg *cfg = irqd_cfg(data);
-
-		irte->hi.val = 0;
-		irte->lo.val = 0;
-		irte->hi.fields.vector = cfg->vector;
-		irte->lo.fields_remap.guest_mode = 0;
-		irte->lo.fields_remap.destination =
-				APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
-		irte->hi.fields.destination =
-				APICID_TO_IRTE_DEST_HI(cfg->dest_apicid);
-		irte->lo.fields_remap.int_type = apic->irq_delivery_mode;
-		irte->lo.fields_remap.dm = apic->irq_dest_mode;
+		ret = amd_iommu_deactivate_guest_mode(ir_data);
 
 		/*
 		 * This communicates the ga_tag back to the caller
 		 * so that it can do all the necessary clean up.
 		 */
-		ir_data->cached_ga_tag = 0;
+		if (!ret)
+			ir_data->cached_ga_tag = 0;
 	}
 
-	return modify_irte_ga(irte_info->devid, irte_info->index, irte, ir_data);
+	return ret;
 }
 
 
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 64edd5a9694c..9ac229e92b07 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -873,6 +873,15 @@ struct amd_ir_data {
 	struct msi_msg msi_entry;
 	void *entry;    /* Pointer to union irte or struct irte_ga */
 	void *ref;      /* Pointer to the actual irte */
+
+	/**
+	 * Store information for activate/de-activate
+	 * Guest virtual APIC mode during runtime.
+	 */
+	struct irq_cfg *cfg;
+	int ga_vector;
+	int ga_root_ptr;
+	int ga_tag;
 };
 
 struct amd_irte_ops {
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 4a4d00646040..21e950e4ab62 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -184,6 +184,9 @@ extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32));
 extern int
 amd_iommu_update_ga(int cpu, bool is_run, void *data);
 
+extern int amd_iommu_activate_guest_mode(void *data);
+extern int amd_iommu_deactivate_guest_mode(void *data);
+
 #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
 
 static inline int
@@ -198,6 +201,15 @@ amd_iommu_update_ga(int cpu, bool is_run, void *data)
 	return 0;
 }
 
+static inline int amd_iommu_activate_guest_mode(void *data)
+{
+	return 0;
+}
+
+static inline int amd_iommu_deactivate_guest_mode(void *data)
+{
+	return 0;
+}
 #endif /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */
 
 #endif /* _ASM_X86_AMD_IOMMU_H */
-- 
cgit v1.2.3


From 3846a3b9511c5166082a93536d919a9c42abcd91 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 7 Aug 2019 11:26:45 +0300
Subject: iommu/omap: fix boot issue on remoteprocs with AMMU/Unicache

Support has been added to the OMAP IOMMU driver to fix a boot hang
issue on OMAP remoteprocs with AMMU/Unicache, caused by an improper
AMMU/Unicache state upon initial deassertion of the processor reset.
The issue is described in detail in the next three paragraphs.

All the Cortex M3/M4 IPU processor subsystems in OMAP SoCs have a
AMMU/Unicache IP that dictates the memory attributes for addresses
seen by the processor cores. The AMMU/Unicache is configured/enabled
by the SCACHE_CONFIG.BYPASS bit - a value of 1 enables the cache and
mandates all addresses accessed by M3/M4 be defined in the AMMU. This
bit is not programmable from the host processor. The M3/M4 boot
sequence starts out with the AMMU/Unicache in disabled state, and
SYS/BIOS programs the AMMU regions and enables the Unicache during
one of its initial boot steps. This SCACHE_CONFIG.BYPASS bit is
however enabled by default whenever a RET reset is applied to the IP,
irrespective of whether it was previously enabled or not. The AMMU
registers lose their context whenever this reset is applied. The reset
is effective as long as the MMU portion of the subsystem is enabled
and clocked. This behavior is common to all the IPU and DSP subsystems
that have an AMMU/Unicache.

The IPU boot sequence involves enabling and programming the MMU, and
loading the processor and releasing the reset(s) for the processor.
The PM setup code currently sets the target state for most of the
power domains to RET. The L2 MMU can be enabled, programmed and
accessed properly just fine with the domain in hardware supervised
mode, while the power domain goes through a RET->ON->RET transition
during the programming sequence. However, the ON->RET transition
asserts a RET reset, and the SCACHE_CONFIG.BYPASS bit gets auto-set.
An AMMU fault is thrown immediately when the M3/M4 core's reset is
released since the first instruction address itself will not be
defined in any valid AMMU regions. The ON->RET transition happens
automatically on the power domain after enabling the iommu due to
the hardware supervised mode.

This patch adds and invokes the .set_pwrdm_constraint pdata ops, if
present, during the OMAP IOMMU enable and disable functions to resolve
the above boot hang issue. The ops will allow to invoke a mach-omap2
layer API pwrdm_set_next_pwrst() in a multi-arch kernel environment.
The ops also returns the current power domain state while enforcing
the constraint so that the driver can store it and use it to set back
the power domain state while releasing the constraint. The pdata ops
implementation restricts the target power domain to ON during enable,
and back to the original power domain state during disable, and thereby
eliminating the conditions for the boot issue. The implementation is
effective only when the original power domain state is either RET or
OFF, and is a no-op when it is ON or INACTIVE.

The .set_pwrdm_constraint ops need to be plugged in pdata-quirks
for the affected remote processors to be able to boot properly.

Note that the current issue is seen only on kernels with the affected
power domains programmed to enter RET. For eg., IPU1 on DRA7xx is in a
separate domain and is susceptible to this bug, while the IPU2 subsystem
is within CORE power domain, and CORE RET is not supported on this SoC.
IPUs on OMAP4 and OMAP5 are also susceptible since they are in CORE power
domain, and CORE RET is a valid power target on these SoCs.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/arm/mach-omap2/Makefile             |  2 ++
 arch/arm/mach-omap2/omap-iommu.c         | 43 ++++++++++++++++++++++++++++++++
 drivers/iommu/omap-iommu.c               | 17 +++++++++++++
 drivers/iommu/omap-iommu.h               |  2 ++
 include/linux/platform_data/iommu-omap.h |  2 ++
 5 files changed, 66 insertions(+)
 create mode 100644 arch/arm/mach-omap2/omap-iommu.c

(limited to 'include')

diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 600650551621..d4f11c5070ae 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -229,3 +229,5 @@ include/generated/ti-pm-asm-offsets.h: arch/arm/mach-omap2/pm-asm-offsets.s FORC
 $(obj)/sleep33xx.o $(obj)/sleep43xx.o: include/generated/ti-pm-asm-offsets.h
 
 targets += pm-asm-offsets.s
+
+obj-$(CONFIG_OMAP_IOMMU)		+= omap-iommu.o
diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c
new file mode 100644
index 000000000000..f1a6ece8108e
--- /dev/null
+++ b/arch/arm/mach-omap2/omap-iommu.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OMAP IOMMU quirks for various TI SoCs
+ *
+ * Copyright (C) 2015-2019 Texas Instruments Incorporated - http://www.ti.com/
+ *      Suman Anna <s-anna@ti.com>
+ */
+
+#include <linux/platform_device.h>
+#include <linux/err.h>
+
+#include "omap_hwmod.h"
+#include "omap_device.h"
+#include "powerdomain.h"
+
+int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request,
+				    u8 *pwrst)
+{
+	struct powerdomain *pwrdm;
+	struct omap_device *od;
+	u8 next_pwrst;
+
+	od = to_omap_device(pdev);
+	if (!od)
+		return -ENODEV;
+
+	if (od->hwmods_cnt != 1)
+		return -EINVAL;
+
+	pwrdm = omap_hwmod_get_pwrdm(od->hwmods[0]);
+	if (!pwrdm)
+		return -EINVAL;
+
+	if (request)
+		*pwrst = pwrdm_read_next_pwrst(pwrdm);
+
+	if (*pwrst > PWRDM_POWER_RET)
+		return 0;
+
+	next_pwrst = request ? PWRDM_POWER_ON : *pwrst;
+
+	return pwrdm_set_next_pwrst(pwrdm, next_pwrst);
+}
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index dfb961d8c21b..84b99d5841ae 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -190,6 +190,14 @@ static int iommu_enable(struct omap_iommu *obj)
 	struct platform_device *pdev = to_platform_device(obj->dev);
 	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
+	if (pdata && pdata->set_pwrdm_constraint) {
+		err = pdata->set_pwrdm_constraint(pdev, true, &obj->pwrst);
+		if (err) {
+			dev_warn(obj->dev, "pwrdm_constraint failed to be set, status = %d\n",
+				 err);
+		}
+	}
+
 	if (pdata && pdata->deassert_reset) {
 		err = pdata->deassert_reset(pdev, pdata->reset_name);
 		if (err) {
@@ -209,6 +217,7 @@ static void iommu_disable(struct omap_iommu *obj)
 {
 	struct platform_device *pdev = to_platform_device(obj->dev);
 	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	int ret;
 
 	omap2_iommu_disable(obj);
 
@@ -216,6 +225,14 @@ static void iommu_disable(struct omap_iommu *obj)
 
 	if (pdata && pdata->assert_reset)
 		pdata->assert_reset(pdev, pdata->reset_name);
+
+	if (pdata && pdata->set_pwrdm_constraint) {
+		ret = pdata->set_pwrdm_constraint(pdev, false, &obj->pwrst);
+		if (ret) {
+			dev_warn(obj->dev, "pwrdm_constraint failed to be reset, status = %d\n",
+				 ret);
+		}
+	}
 }
 
 /*
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 09968a02d291..aac1ca65ef9d 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -78,6 +78,8 @@ struct omap_iommu {
 
 	struct iommu_device iommu;
 	struct iommu_group *group;
+
+	u8 pwrst;
 };
 
 /**
diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index 44d913a7580c..1ed60265a20e 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -13,4 +13,6 @@ struct iommu_platform_data {
 	const char *reset_name;
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
 	int (*deassert_reset)(struct platform_device *pdev, const char *name);
+	int (*set_pwrdm_constraint)(struct platform_device *pdev, bool request,
+				    u8 *pwrst);
 };
-- 
cgit v1.2.3


From 74c116df66d25a9fb48d44ce545a505edc5fbbba Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 7 Aug 2019 11:26:46 +0300
Subject: iommu/omap: add pdata ops for omap_device_enable/idle

Add two new platform data ops to allow the OMAP iommu driver to
be able to invoke the omap_device_enable and omap_device_idle
from within the driver. These are being added to streamline the
sequence between managing the hard reset lines and the clocks
during the suspend path, as the default device pm_domain callback
sequences in omap_device layer are not conducive for the OMAP
IOMMU driver.

This could have been done by expanding the existing pdata ops
for reset management (like in the OMAP remoteproc driver), but
this was chosen to avoid adding additional code in the separate
file in the mach-omap2 layer.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/platform_data/iommu-omap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h
index 1ed60265a20e..8474a0208b34 100644
--- a/include/linux/platform_data/iommu-omap.h
+++ b/include/linux/platform_data/iommu-omap.h
@@ -13,6 +13,8 @@ struct iommu_platform_data {
 	const char *reset_name;
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
 	int (*deassert_reset)(struct platform_device *pdev, const char *name);
+	int (*device_enable)(struct platform_device *pdev);
+	int (*device_idle)(struct platform_device *pdev);
 	int (*set_pwrdm_constraint)(struct platform_device *pdev, bool request,
 				    u8 *pwrst);
 };
-- 
cgit v1.2.3


From d9c4d8a6cc0f852adf3829fbe40e2e3f6213b0c6 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Wed, 7 Aug 2019 11:26:50 +0300
Subject: iommu/omap: introduce new API for runtime suspend/resume control

This patch adds the support for the OMAP IOMMUs to be suspended
during the auto suspend/resume of the OMAP remoteproc devices. The
remote processors are auto suspended after a certain time of idle
or inactivity period. This is done by introducing two new API,
omap_iommu_domain_deactivate() and omap_iommu_domain_activate()
to allow the client users/master devices of the IOMMU devices to
deactivate & activate the IOMMU devices from their runtime
suspend/resume operations. There is no API exposed by the IOMMU
layer at present, and so these new API are added directly in the
OMAP IOMMU driver to minimize framework changes.

The API simply decrements and increments the runtime usage count
of the IOMMU devices and let the context be saved/restored using
the existing runtime pm callbacks.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/omap-iommu.h |  8 +++++++
 2 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index ef62ac9057bb..ecf14001beed 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -938,6 +938,64 @@ static void omap_iommu_restore_tlb_entries(struct omap_iommu *obj)
 	iotlb_lock_set(obj, &l);
 }
 
+/**
+ * omap_iommu_domain_deactivate - deactivate attached iommu devices
+ * @domain: iommu domain attached to the target iommu device
+ *
+ * This API allows the client devices of IOMMU devices to suspend
+ * the IOMMUs they control at runtime, after they are idled and
+ * suspended all activity. System Suspend will leverage the PM
+ * driver late callbacks.
+ **/
+int omap_iommu_domain_deactivate(struct iommu_domain *domain)
+{
+	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+	struct omap_iommu_device *iommu;
+	struct omap_iommu *oiommu;
+	int i;
+
+	if (!omap_domain->dev)
+		return 0;
+
+	iommu = omap_domain->iommus;
+	iommu += (omap_domain->num_iommus - 1);
+	for (i = 0; i < omap_domain->num_iommus; i++, iommu--) {
+		oiommu = iommu->iommu_dev;
+		pm_runtime_put_sync(oiommu->dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_domain_deactivate);
+
+/**
+ * omap_iommu_domain_activate - activate attached iommu devices
+ * @domain: iommu domain attached to the target iommu device
+ *
+ * This API allows the client devices of IOMMU devices to resume the
+ * IOMMUs they control at runtime, before they can resume operations.
+ * System Resume will leverage the PM driver late callbacks.
+ **/
+int omap_iommu_domain_activate(struct iommu_domain *domain)
+{
+	struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+	struct omap_iommu_device *iommu;
+	struct omap_iommu *oiommu;
+	int i;
+
+	if (!omap_domain->dev)
+		return 0;
+
+	iommu = omap_domain->iommus;
+	for (i = 0; i < omap_domain->num_iommus; i++, iommu++) {
+		oiommu = iommu->iommu_dev;
+		pm_runtime_get_sync(oiommu->dev);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(omap_iommu_domain_activate);
+
 /**
  * omap_iommu_runtime_suspend - disable an iommu device
  * @dev:	iommu device
diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h
index 153bf25b4df3..36b645726813 100644
--- a/include/linux/omap-iommu.h
+++ b/include/linux/omap-iommu.h
@@ -10,12 +10,20 @@
 #ifndef _OMAP_IOMMU_H_
 #define _OMAP_IOMMU_H_
 
+struct iommu_domain;
+
 #ifdef CONFIG_OMAP_IOMMU
 extern void omap_iommu_save_ctx(struct device *dev);
 extern void omap_iommu_restore_ctx(struct device *dev);
+
+int omap_iommu_domain_deactivate(struct iommu_domain *domain);
+int omap_iommu_domain_activate(struct iommu_domain *domain);
 #else
 static inline void omap_iommu_save_ctx(struct device *dev) {}
 static inline void omap_iommu_restore_ctx(struct device *dev) {}
+
+static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain) {}
+static inline int omap_iommu_domain_activate(struct iommu_domain *domain) {}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 9ffeb6d08c3a4bbd7b1e33711b241f511e2ded79 Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Fri, 9 Aug 2019 12:03:12 +0100
Subject: PSCI: cpuidle: Refactor CPU suspend power_state parameter handling

Current PSCI code handles idle state entry through the
psci_cpu_suspend_enter() API, that takes an idle state index as a
parameter and convert the index into a previously initialized
power_state parameter before calling the PSCI.CPU_SUSPEND() with it.

This is unwieldly, since it forces the PSCI firmware layer to keep track
of power_state parameter for every idle state so that the
index->power_state conversion can be made in the PSCI firmware layer
instead of the CPUidle driver implementations.

Move the power_state handling out of drivers/firmware/psci
into the respective ACPI/DT PSCI CPUidle backends and convert
the psci_cpu_suspend_enter() API to get the power_state
parameter as input, which makes it closer to its firmware
interface PSCI.CPU_SUSPEND() API.

A notable side effect is that the PSCI ACPI/DT CPUidle backends
now can directly handle (and if needed update) power_state
parameters before handing them over to the PSCI firmware
interface to trigger PSCI.CPU_SUSPEND() calls.

Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/cpuidle.c    |  49 +++++++++++--
 drivers/cpuidle/cpuidle-psci.c |  87 ++++++++++++++++++++++-
 drivers/firmware/psci/psci.c   | 157 +++--------------------------------------
 include/linux/cpuidle.h        |  17 +++--
 include/linux/psci.h           |   4 +-
 5 files changed, 154 insertions(+), 160 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index 619e0ebb8399..e4d6af2fdec7 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -47,17 +47,58 @@ int arm_cpuidle_suspend(int index)
 
 #define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
 
+static int psci_acpi_cpu_init_idle(unsigned int cpu)
+{
+	int i, count;
+	struct acpi_lpi_state *lpi;
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	if (unlikely(!pr || !pr->flags.has_lpi))
+		return -EINVAL;
+
+	count = pr->power.count - 1;
+	if (count <= 0)
+		return -ENODEV;
+
+	for (i = 0; i < count; i++) {
+		u32 state;
+
+		lpi = &pr->power.lpi_states[i + 1];
+		/*
+		 * Only bits[31:0] represent a PSCI power_state while
+		 * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
+		 */
+		state = lpi->address;
+		if (!psci_power_state_is_valid(state)) {
+			pr_warn("Invalid PSCI power state %#x\n", state);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 int acpi_processor_ffh_lpi_probe(unsigned int cpu)
 {
-	return psci_cpu_init_idle(cpu);
+	return psci_acpi_cpu_init_idle(cpu);
 }
 
 int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
 {
+	u32 state = lpi->address;
+
 	if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
-		return CPU_PM_CPU_IDLE_ENTER_RETENTION(psci_cpu_suspend_enter,
-						lpi->index);
+		return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter,
+						lpi->index, state);
 	else
-		return CPU_PM_CPU_IDLE_ENTER(psci_cpu_suspend_enter, lpi->index);
+		return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter,
+					     lpi->index, state);
 }
 #endif
diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index ab1dea918ea3..f3c1a2396f98 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -22,10 +22,15 @@
 
 #include "dt_idle_states.h"
 
+static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
+
 static int psci_enter_idle_state(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv, int idx)
 {
-	return CPU_PM_CPU_IDLE_ENTER(psci_cpu_suspend_enter, idx);
+	u32 *state = __this_cpu_read(psci_power_state);
+
+	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter,
+					   idx, state[idx - 1]);
 }
 
 static struct cpuidle_driver psci_idle_driver __initdata = {
@@ -51,6 +56,86 @@ static const struct of_device_id psci_idle_state_match[] __initconst = {
 	{ },
 };
 
+static int __init psci_dt_parse_state_node(struct device_node *np, u32 *state)
+{
+	int err = of_property_read_u32(np, "arm,psci-suspend-param", state);
+
+	if (err) {
+		pr_warn("%pOF missing arm,psci-suspend-param property\n", np);
+		return err;
+	}
+
+	if (!psci_power_state_is_valid(*state)) {
+		pr_warn("Invalid PSCI power state %#x\n", *state);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
+{
+	int i, ret = 0, count = 0;
+	u32 *psci_states;
+	struct device_node *state_node;
+
+	/* Count idle states */
+	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+					      count))) {
+		count++;
+		of_node_put(state_node);
+	}
+
+	if (!count)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+		ret = psci_dt_parse_state_node(state_node, &psci_states[i]);
+		of_node_put(state_node);
+
+		if (ret)
+			goto free_mem;
+
+		pr_debug("psci-power-state %#x index %d\n", psci_states[i], i);
+	}
+
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+
+free_mem:
+	kfree(psci_states);
+	return ret;
+}
+
+static __init int psci_cpu_init_idle(unsigned int cpu)
+{
+	struct device_node *cpu_node;
+	int ret;
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	cpu_node = of_cpu_device_node_get(cpu);
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = psci_dt_cpu_init_idle(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
+
 static int __init psci_idle_init_cpu(int cpu)
 {
 	struct cpuidle_driver *drv;
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index b343f8a34c6a..84f4ff351c62 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -103,7 +103,7 @@ static inline bool psci_power_state_loses_context(u32 state)
 	return state & mask;
 }
 
-static inline bool psci_power_state_is_valid(u32 state)
+bool psci_power_state_is_valid(u32 state)
 {
 	const u32 valid_mask = psci_has_ext_power_state() ?
 			       PSCI_1_0_EXT_POWER_STATE_MASK :
@@ -277,162 +277,21 @@ static int __init psci_features(u32 psci_func_id)
 }
 
 #ifdef CONFIG_CPU_IDLE
-static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
-
-static int psci_dt_parse_state_node(struct device_node *np, u32 *state)
-{
-	int err = of_property_read_u32(np, "arm,psci-suspend-param", state);
-
-	if (err) {
-		pr_warn("%pOF missing arm,psci-suspend-param property\n", np);
-		return err;
-	}
-
-	if (!psci_power_state_is_valid(*state)) {
-		pr_warn("Invalid PSCI power state %#x\n", *state);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
-{
-	int i, ret = 0, count = 0;
-	u32 *psci_states;
-	struct device_node *state_node;
-
-	/* Count idle states */
-	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
-					      count))) {
-		count++;
-		of_node_put(state_node);
-	}
-
-	if (!count)
-		return -ENODEV;
-
-	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
-	if (!psci_states)
-		return -ENOMEM;
-
-	for (i = 0; i < count; i++) {
-		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-		ret = psci_dt_parse_state_node(state_node, &psci_states[i]);
-		of_node_put(state_node);
-
-		if (ret)
-			goto free_mem;
-
-		pr_debug("psci-power-state %#x index %d\n", psci_states[i], i);
-	}
-
-	/* Idle states parsed correctly, initialize per-cpu pointer */
-	per_cpu(psci_power_state, cpu) = psci_states;
-	return 0;
-
-free_mem:
-	kfree(psci_states);
-	return ret;
-}
-
-#ifdef CONFIG_ACPI
-#include <acpi/processor.h>
-
-static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu)
-{
-	int i, count;
-	u32 *psci_states;
-	struct acpi_lpi_state *lpi;
-	struct acpi_processor *pr = per_cpu(processors, cpu);
-
-	if (unlikely(!pr || !pr->flags.has_lpi))
-		return -EINVAL;
-
-	count = pr->power.count - 1;
-	if (count <= 0)
-		return -ENODEV;
-
-	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
-	if (!psci_states)
-		return -ENOMEM;
-
-	for (i = 0; i < count; i++) {
-		u32 state;
-
-		lpi = &pr->power.lpi_states[i + 1];
-		/*
-		 * Only bits[31:0] represent a PSCI power_state while
-		 * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification
-		 */
-		state = lpi->address;
-		if (!psci_power_state_is_valid(state)) {
-			pr_warn("Invalid PSCI power state %#x\n", state);
-			kfree(psci_states);
-			return -EINVAL;
-		}
-		psci_states[i] = state;
-	}
-	/* Idle states parsed correctly, initialize per-cpu pointer */
-	per_cpu(psci_power_state, cpu) = psci_states;
-	return 0;
-}
-#else
-static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu)
-{
-	return -EINVAL;
-}
-#endif
-
-int psci_cpu_init_idle(unsigned int cpu)
-{
-	struct device_node *cpu_node;
-	int ret;
-
-	/*
-	 * If the PSCI cpu_suspend function hook has not been initialized
-	 * idle states must not be enabled, so bail out
-	 */
-	if (!psci_ops.cpu_suspend)
-		return -EOPNOTSUPP;
-
-	if (!acpi_disabled)
-		return psci_acpi_cpu_init_idle(cpu);
-
-	cpu_node = of_get_cpu_node(cpu, NULL);
-	if (!cpu_node)
-		return -ENODEV;
-
-	ret = psci_dt_cpu_init_idle(cpu_node, cpu);
-
-	of_node_put(cpu_node);
-
-	return ret;
-}
-
-static int psci_suspend_finisher(unsigned long index)
+static int psci_suspend_finisher(unsigned long state)
 {
-	u32 *state = __this_cpu_read(psci_power_state);
+	u32 power_state = state;
 
-	return psci_ops.cpu_suspend(state[index - 1],
-				    __pa_symbol(cpu_resume));
+	return psci_ops.cpu_suspend(power_state, __pa_symbol(cpu_resume));
 }
 
-int psci_cpu_suspend_enter(unsigned long index)
+int psci_cpu_suspend_enter(u32 state)
 {
 	int ret;
-	u32 *state = __this_cpu_read(psci_power_state);
-	/*
-	 * idle state index 0 corresponds to wfi, should never be called
-	 * from the cpu_suspend operations
-	 */
-	if (WARN_ON_ONCE(!index))
-		return -EINVAL;
 
-	if (!psci_power_state_loses_context(state[index - 1]))
-		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+	if (!psci_power_state_loses_context(state))
+		ret = psci_ops.cpu_suspend(state, 0);
 	else
-		ret = cpu_suspend(index, psci_suspend_finisher);
+		ret = cpu_suspend(state, psci_suspend_finisher);
 
 	return ret;
 }
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index bb9a0db89f1a..12ae4b87494e 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -256,7 +256,10 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 {return 0;}
 #endif
 
-#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, is_retention) \
+#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter,			\
+				idx,					\
+				state,					\
+				is_retention)				\
 ({									\
 	int __ret = 0;							\
 									\
@@ -268,7 +271,7 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 	if (!is_retention)						\
 		__ret =  cpu_pm_enter();				\
 	if (!__ret) {							\
-		__ret = low_level_idle_enter(idx);			\
+		__ret = low_level_idle_enter(state);			\
 		if (!is_retention)					\
 			cpu_pm_exit();					\
 	}								\
@@ -277,9 +280,15 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
 })
 
 #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx)	\
-	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 0)
+	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0)
 
 #define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx)	\
-	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 1)
+	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1)
+
+#define CPU_PM_CPU_IDLE_ENTER_PARAM(low_level_idle_enter, idx, state)	\
+	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0)
+
+#define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(low_level_idle_enter, idx, state)	\
+	__CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1)
 
 #endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/psci.h b/include/linux/psci.h
index a8a15613c157..e2bacc6fd2f2 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -15,8 +15,8 @@
 
 bool psci_tos_resident_on(int cpu);
 
-int psci_cpu_init_idle(unsigned int cpu);
-int psci_cpu_suspend_enter(unsigned long index);
+int psci_cpu_suspend_enter(u32 state);
+bool psci_power_state_is_valid(u32 state);
 
 enum psci_conduit {
 	PSCI_CONDUIT_NONE,
-- 
cgit v1.2.3


From 3dc29b8b2062075602c7aff1514a120b4ed0187f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 8 Aug 2019 14:52:33 +0900
Subject: ASoC: soc-core: support snd_soc_dai_link_component for aux_dev

To find aux_dev, ASoC is using .name, codec_name, codec_of_node.
Here, .name is used to fallback in case of no codec.

But, we already have this kind of component finding method by
snd_soc_dai_link_component and soc_find_component().
We shouldn't have duplicated implementation to do same things.
This patch adds snd_soc_dai_link_component support to finding aux_dev.

Now, no driver is using only .name.
All drivers are using codec_name and/or codec_of_node.
This means no driver is finding component from .name so far.
(Actually almost all drivers are using .name as just "device name",
 not for finding component...)

This patch
1) add snd_soc_dai_link_component support for aux_dev. legacy style will
   be removed if all drivers are switched to new style.
2) try to find component via snd_soc_dai_link_component.
   Then, it doesn't try to find via .name, because no driver is using
   it so far.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87y3046wcf.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  |  7 +++++++
 sound/soc/soc-core.c | 36 ++++++++++--------------------------
 2 files changed, 17 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 85ad971e9432..fd6ecea48fc0 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -941,6 +941,7 @@ struct snd_soc_dai_link {
 #define COMP_CPU(_dai)			{ .dai_name = _dai, }
 #define COMP_CODEC(_name, _dai)		{ .name = _name, .dai_name = _dai, }
 #define COMP_PLATFORM(_name)		{ .name = _name }
+#define COMP_AUX(_name)			{ .name = _name }
 #define COMP_DUMMY()			{ .name = "snd-soc-dummy", .dai_name = "snd-soc-dummy-dai", }
 
 extern struct snd_soc_dai_link_component null_dailink_component[0];
@@ -971,6 +972,12 @@ struct snd_soc_aux_dev {
 	const char *codec_name;
 	struct device_node *codec_of_node;
 
+	/*
+	 * name, codec_name, codec_of_node will be replaced
+	 * into dlc. don't use both in the same time
+	 */
+	struct snd_soc_dai_link_component dlc;
+
 	/* codec/machine specific init - e.g. add machine controls */
 	int (*init)(struct snd_soc_component *component);
 };
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index bf45e60eb34f..56b99e340dda 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1537,38 +1537,22 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
 {
 	struct snd_soc_aux_dev *aux_dev = &card->aux_dev[num];
 	struct snd_soc_component *component;
-	struct snd_soc_dai_link_component dlc;
 
-	if (aux_dev->codec_of_node || aux_dev->codec_name) {
-		/* codecs, usually analog devices */
-		dlc.name = aux_dev->codec_name;
-		dlc.of_node = aux_dev->codec_of_node;
-		component = soc_find_component(&dlc);
-		if (!component) {
-			if (dlc.of_node)
-				dlc.name = of_node_full_name(dlc.of_node);
-			goto err_defer;
-		}
-	} else if (aux_dev->name) {
-		/* generic components */
-		dlc.name = aux_dev->name;
-		dlc.of_node = NULL;
-		component = soc_find_component(&dlc);
-		if (!component)
-			goto err_defer;
-	} else {
-		dev_err(card->dev, "ASoC: Invalid auxiliary device\n");
-		return -EINVAL;
-	}
+	/* remove me */
+	if (aux_dev->codec_name)
+		aux_dev->dlc.name = aux_dev->codec_name;
+	if (aux_dev->codec_of_node)
+		aux_dev->dlc.of_node = aux_dev->codec_of_node;
+
+	/* codecs, usually analog devices */
+	component = soc_find_component(&aux_dev->dlc);
+	if (!component)
+		return -EPROBE_DEFER;
 
 	component->init = aux_dev->init;
 	list_add(&component->card_aux_list, &card->aux_comp_list);
 
 	return 0;
-
-err_defer:
-	dev_err(card->dev, "ASoC: %s not registered\n", dlc.name);
-	return -EPROBE_DEFER;
 }
 
 static int soc_probe_aux_devices(struct snd_soc_card *card)
-- 
cgit v1.2.3


From a48b561d873d1d9fda55782d275eff94ec647863 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 8 Aug 2019 14:54:39 +0900
Subject: ASoC: soc-core: remove legacy style of aux_dev

Now all drivers are using snd_soc_dai_link_component for aux_dev.
Let's remove legacy style

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87h86s6w8x.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 9 ---------
 sound/soc/soc-core.c | 6 ------
 2 files changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index fd6ecea48fc0..2fc56e5963f3 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -963,19 +963,10 @@ struct snd_soc_codec_conf {
 };
 
 struct snd_soc_aux_dev {
-	const char *name;		/* Codec name */
-
 	/*
 	 * specify multi-codec either by device name, or by
 	 * DT/OF node, but not both.
 	 */
-	const char *codec_name;
-	struct device_node *codec_of_node;
-
-	/*
-	 * name, codec_name, codec_of_node will be replaced
-	 * into dlc. don't use both in the same time
-	 */
 	struct snd_soc_dai_link_component dlc;
 
 	/* codec/machine specific init - e.g. add machine controls */
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 56b99e340dda..4af382d52675 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1538,12 +1538,6 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
 	struct snd_soc_aux_dev *aux_dev = &card->aux_dev[num];
 	struct snd_soc_component *component;
 
-	/* remove me */
-	if (aux_dev->codec_name)
-		aux_dev->dlc.name = aux_dev->codec_name;
-	if (aux_dev->codec_of_node)
-		aux_dev->dlc.of_node = aux_dev->codec_of_node;
-
 	/* codecs, usually analog devices */
 	component = soc_find_component(&aux_dev->dlc);
 	if (!component)
-- 
cgit v1.2.3


From c2b71c71037bea7765aa6ff37824520d19108769 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 8 Aug 2019 14:54:44 +0900
Subject: ASoC: soc-core: add for_each_xxx macro for aux_dev

To be more readable code, this patch adds
new for_each_xxx() macro for aux_dev.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ftmc6w8s.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h        | 10 ++++++++++
 sound/soc/meson/axg-card.c |  7 ++++---
 sound/soc/soc-core.c       | 15 ++++++++-------
 3 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 2fc56e5963f3..b1fe5ebea257 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1087,6 +1087,10 @@ struct snd_soc_card {
 	for ((i) = 0;							\
 	     ((i) < (card)->num_links) && ((link) = &(card)->dai_link[i]); \
 	     (i)++)
+#define for_each_card_pre_auxs(card, i, aux)				\
+	for ((i) = 0;							\
+	     ((i) < (card)->num_aux_devs) && ((aux) = &(card)->aux_dev[i]); \
+	     (i)++)
 
 #define for_each_card_links(card, link)				\
 	list_for_each_entry(link, &(card)->dai_link_list, list)
@@ -1098,6 +1102,12 @@ struct snd_soc_card {
 #define for_each_card_rtds_safe(card, rtd, _rtd)	\
 	list_for_each_entry_safe(rtd, _rtd, &(card)->rtd_list, list)
 
+#define for_each_card_auxs(card, component)			\
+	list_for_each_entry(component, &card->aux_comp_list, card_aux_list)
+#define for_each_card_auxs_safe(card, component, _comp)	\
+	list_for_each_entry_safe(component, _comp,	\
+				 &card->aux_comp_list, card_aux_list)
+
 #define for_each_card_components(card, component)			\
 	list_for_each_entry(component, &(card)->component_dev_list, card_list)
 
diff --git a/sound/soc/meson/axg-card.c b/sound/soc/meson/axg-card.c
index 6283e5025548..1f698adde506 100644
--- a/sound/soc/meson/axg-card.c
+++ b/sound/soc/meson/axg-card.c
@@ -111,6 +111,7 @@ static void axg_card_clean_references(struct axg_card *priv)
 	struct snd_soc_card *card = &priv->card;
 	struct snd_soc_dai_link *link;
 	struct snd_soc_dai_link_component *codec;
+	struct snd_soc_aux_dev *aux;
 	int i, j;
 
 	if (card->dai_link) {
@@ -123,8 +124,8 @@ static void axg_card_clean_references(struct axg_card *priv)
 	}
 
 	if (card->aux_dev) {
-		for (i = 0; i < card->num_aux_devs; i++)
-			of_node_put(card->aux_dev[i].dlc.of_node);
+		for_each_card_pre_auxs(card, i, aux)
+			of_node_put(aux->dlc.of_node);
 	}
 
 	kfree(card->dai_link);
@@ -157,7 +158,7 @@ static int axg_card_add_aux_devices(struct snd_soc_card *card)
 	card->aux_dev = aux;
 	card->num_aux_devs = num;
 
-	for (i = 0; i < card->num_aux_devs; i++, aux++) {
+	for_each_card_pre_auxs(card, i, aux) {
 		aux->dlc.of_node =
 			of_parse_phandle(node, "audio-aux-devs", i);
 		if (!aux->dlc.of_node)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 4af382d52675..e9f44505cc3e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1533,9 +1533,9 @@ static int soc_probe_link_dais(struct snd_soc_card *card,
 	return ret;
 }
 
-static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
+static int soc_bind_aux_dev(struct snd_soc_card *card,
+			    struct snd_soc_aux_dev *aux_dev)
 {
-	struct snd_soc_aux_dev *aux_dev = &card->aux_dev[num];
 	struct snd_soc_component *component;
 
 	/* codecs, usually analog devices */
@@ -1544,6 +1544,7 @@ static int soc_bind_aux_dev(struct snd_soc_card *card, int num)
 		return -EPROBE_DEFER;
 
 	component->init = aux_dev->init;
+	/* see for_each_card_auxs */
 	list_add(&component->card_aux_list, &card->aux_comp_list);
 
 	return 0;
@@ -1556,7 +1557,7 @@ static int soc_probe_aux_devices(struct snd_soc_card *card)
 	int ret;
 
 	for_each_comp_order(order) {
-		list_for_each_entry(comp, &card->aux_comp_list, card_aux_list) {
+		for_each_card_auxs(card, comp) {
 			if (comp->driver->probe_order == order) {
 				ret = soc_probe_component(card,	comp);
 				if (ret < 0) {
@@ -1578,8 +1579,7 @@ static void soc_remove_aux_devices(struct snd_soc_card *card)
 	int order;
 
 	for_each_comp_order(order) {
-		list_for_each_entry_safe(comp, _comp,
-			&card->aux_comp_list, card_aux_list) {
+		for_each_card_auxs_safe(card, comp, _comp) {
 
 			if (comp->driver->remove_order == order) {
 				soc_remove_component(comp);
@@ -1913,6 +1913,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 {
 	struct snd_soc_pcm_runtime *rtd;
 	struct snd_soc_dai_link *dai_link;
+	struct snd_soc_aux_dev *aux;
 	int ret, i, order;
 
 	mutex_lock(&client_mutex);
@@ -1943,8 +1944,8 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 	}
 
 	/* bind aux_devs too */
-	for (i = 0; i < card->num_aux_devs; i++) {
-		ret = soc_bind_aux_dev(card, i);
+	for_each_card_pre_auxs(card, i, aux) {
+		ret = soc_bind_aux_dev(card, aux);
 		if (ret != 0)
 			goto probe_end;
 	}
-- 
cgit v1.2.3


From 73499ad21d595638213f2a5f8b9b58259fa0cae2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 9 Aug 2019 18:09:13 +0200
Subject: iommu/omap: Fix compilation warnings

A recent patch introduced a new compiler warning because
two functions with non-void return type have no return
statement in omap-iommu.h for CONFIG_OMAP_IOMMU=n.

Fix this by adding return statements to these functions.

Fixes: d9c4d8a6cc0f8 ('iommu/omap: introduce new API for runtime suspend/resume control')
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/omap-iommu.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/omap-iommu.h b/include/linux/omap-iommu.h
index 36b645726813..2c32ca09df02 100644
--- a/include/linux/omap-iommu.h
+++ b/include/linux/omap-iommu.h
@@ -22,8 +22,15 @@ int omap_iommu_domain_activate(struct iommu_domain *domain);
 static inline void omap_iommu_save_ctx(struct device *dev) {}
 static inline void omap_iommu_restore_ctx(struct device *dev) {}
 
-static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain) {}
-static inline int omap_iommu_domain_activate(struct iommu_domain *domain) {}
+static inline int omap_iommu_domain_deactivate(struct iommu_domain *domain)
+{
+	return -ENODEV;
+}
+
+static inline int omap_iommu_domain_activate(struct iommu_domain *domain)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 28875945ba98d1b47a8a706812b6494d165bb0a0 Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 16 Jul 2019 18:12:22 -0400
Subject: rcu: Add support for consolidated-RCU reader checking

This commit adds RCU-reader checks to list_for_each_entry_rcu() and
hlist_for_each_entry_rcu().  These checks are optional, and are indicated
by a lockdep expression passed to a new optional argument to these two
macros.  If this optional lockdep expression is omitted, these two macros
act as before, checking for an RCU read-side critical section.

Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
[ paulmck: Update to eliminate return within macro and update comment. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rculist.h  | 32 +++++++++++++---
 include/linux/rcupdate.h |  7 ++++
 kernel/rcu/Kconfig.debug | 11 ++++++
 kernel/rcu/update.c      | 96 +++++++++++++++++++++++++++++++-----------------
 4 files changed, 108 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 932296144131..4158b7212936 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -40,6 +40,24 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list)
  */
 #define list_next_rcu(list)	(*((struct list_head __rcu **)(&(list)->next)))
 
+/*
+ * Check during list traversal that we are within an RCU reader
+ */
+
+#define check_arg_count_one(dummy)
+
+#ifdef CONFIG_PROVE_RCU_LIST
+#define __list_check_rcu(dummy, cond, extra...)				\
+	({								\
+	check_arg_count_one(extra);					\
+	RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(),		\
+			 "RCU-list traversed in non-reader section!");	\
+	 })
+#else
+#define __list_check_rcu(dummy, cond, extra...)				\
+	({ check_arg_count_one(extra); })
+#endif
+
 /*
  * Insert a new entry between two known consecutive entries.
  *
@@ -343,14 +361,16 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the list_head within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define list_for_each_entry_rcu(pos, head, member) \
-	for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
-		&pos->member != (head); \
+#define list_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = list_entry_rcu((head)->next, typeof(*pos), member);	\
+		&pos->member != (head);					\
 		pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 /**
@@ -616,13 +636,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
  * @member:	the name of the hlist_node within the struct.
+ * @cond:	optional lockdep expression if called from non-RCU protection.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as hlist_add_head_rcu()
  * as long as the traversal is guarded by rcu_read_lock().
  */
-#define hlist_for_each_entry_rcu(pos, head, member)			\
-	for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
+#define hlist_for_each_entry_rcu(pos, head, member, cond...)		\
+	for (__list_check_rcu(dummy, ## cond, 0),			\
+	     pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
 			typeof(*(pos)), member);			\
 		pos;							\
 		pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index bfcafbc1e301..80d6056f5855 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -221,6 +221,7 @@ int debug_lockdep_rcu_enabled(void);
 int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
 int rcu_read_lock_sched_held(void);
+int rcu_read_lock_any_held(void);
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -241,6 +242,12 @@ static inline int rcu_read_lock_sched_held(void)
 {
 	return !preemptible();
 }
+
+static inline int rcu_read_lock_any_held(void)
+{
+	return !preemptible();
+}
+
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 5ec3ea4028e2..4aa02eee8f6c 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -8,6 +8,17 @@ menu "RCU Debugging"
 config PROVE_RCU
 	def_bool PROVE_LOCKING
 
+config PROVE_RCU_LIST
+	bool "RCU list lockdep debugging"
+	depends on PROVE_RCU && RCU_EXPERT
+	default n
+	help
+	  Enable RCU lockdep checking for list usages. By default it is
+	  turned off since there are several list RCU users that still
+	  need to be converted to pass a lockdep expression. To prevent
+	  false-positive splats, we keep it default disabled but once all
+	  users are converted, we can remove this config option.
+
 config TORTURE_TEST
 	tristate
 	default n
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 9dd5aeef6e70..38cbd616b381 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -61,9 +61,15 @@ module_param(rcu_normal_after_boot, int, 0);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 /**
- * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
+ * rcu_read_lock_held_common() - might we be in RCU-sched read-side critical section?
+ * @ret:	Best guess answer if lockdep cannot be relied on
  *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
+ * Returns true if lockdep must be ignored, in which case *ret contains
+ * the best guess described below.  Otherwise returns false, in which
+ * case *ret tells the caller nothing and the caller should instead
+ * consult lockdep.
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, set *ret to nonzero iff in an
  * RCU-sched read-side critical section.  In absence of
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.  Note that disabling
@@ -75,30 +81,44 @@ module_param(rcu_normal_after_boot, int, 0);
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
  * and while lockdep is disabled.
  *
- * Note that if the CPU is in the idle loop from an RCU point of
- * view (ie: that we are in the section between rcu_idle_enter() and
- * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU
- * did an rcu_read_lock().  The reason for this is that RCU ignores CPUs
- * that are in such a section, considering these as in extended quiescent
- * state, so such a CPU is effectively never in an RCU read-side critical
- * section regardless of what RCU primitives it invokes.  This state of
- * affairs is required --- we need to keep an RCU-free window in idle
- * where the CPU may possibly enter into low power mode. This way we can
- * notice an extended quiescent state to other CPUs that started a grace
- * period. Otherwise we would delay any grace period as long as we run in
- * the idle task.
+ * Note that if the CPU is in the idle loop from an RCU point of view (ie:
+ * that we are in the section between rcu_idle_enter() and rcu_idle_exit())
+ * then rcu_read_lock_held() sets *ret to false even if the CPU did an
+ * rcu_read_lock().  The reason for this is that RCU ignores CPUs that are
+ * in such a section, considering these as in extended quiescent state,
+ * so such a CPU is effectively never in an RCU read-side critical section
+ * regardless of what RCU primitives it invokes.  This state of affairs is
+ * required --- we need to keep an RCU-free window in idle where the CPU may
+ * possibly enter into low power mode. This way we can notice an extended
+ * quiescent state to other CPUs that started a grace period. Otherwise
+ * we would delay any grace period as long as we run in the idle task.
  *
- * Similarly, we avoid claiming an SRCU read lock held if the current
+ * Similarly, we avoid claiming an RCU read lock held if the current
  * CPU is offline.
  */
+static bool rcu_read_lock_held_common(bool *ret)
+{
+	if (!debug_lockdep_rcu_enabled()) {
+		*ret = 1;
+		return true;
+	}
+	if (!rcu_is_watching()) {
+		*ret = 0;
+		return true;
+	}
+	if (!rcu_lockdep_current_cpu_online()) {
+		*ret = 0;
+		return true;
+	}
+	return false;
+}
+
 int rcu_read_lock_sched_held(void)
 {
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
+	bool ret;
+
+	if (rcu_read_lock_held_common(&ret))
+		return ret;
 	return lock_is_held(&rcu_sched_lock_map) || !preemptible();
 }
 EXPORT_SYMBOL(rcu_read_lock_sched_held);
@@ -257,12 +277,10 @@ NOKPROBE_SYMBOL(debug_lockdep_rcu_enabled);
  */
 int rcu_read_lock_held(void)
 {
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
+	bool ret;
+
+	if (rcu_read_lock_held_common(&ret))
+		return ret;
 	return lock_is_held(&rcu_lock_map);
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_held);
@@ -284,16 +302,28 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  */
 int rcu_read_lock_bh_held(void)
 {
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
+	bool ret;
+
+	if (rcu_read_lock_held_common(&ret))
+		return ret;
 	return in_softirq() || irqs_disabled();
 }
 EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
+int rcu_read_lock_any_held(void)
+{
+	bool ret;
+
+	if (rcu_read_lock_held_common(&ret))
+		return ret;
+	if (lock_is_held(&rcu_lock_map) ||
+	    lock_is_held(&rcu_bh_lock_map) ||
+	    lock_is_held(&rcu_sched_lock_map))
+		return 1;
+	return !preemptible();
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_any_held);
+
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 /**
-- 
cgit v1.2.3


From fbab8d6735e2643365040bd9e1057addc0d9b4cf Mon Sep 17 00:00:00 2001
From: "Joel Fernandes (Google)" <joel@joelfernandes.org>
Date: Tue, 16 Jul 2019 18:12:23 -0400
Subject: rcu/sync: Remove custom check for RCU readers

The rcu/sync code currently does a special check for being in an RCU
read-side critical section.  With RCU consolidating flavors and the
generic helper added earlier in this series, this check is no longer need.
This commit switches to the generic helper, saving a couple of lines
of code.

Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcu_sync.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 9b83865d24f9..0027d4c8087c 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -31,9 +31,7 @@ struct rcu_sync {
  */
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
-	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
-			 !rcu_read_lock_bh_held() &&
-			 !rcu_read_lock_sched_held(),
+	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
 			 "suspicious rcu_sync_is_idle() usage");
 	return !READ_ONCE(rsp->gp_state); /* GP_IDLE */
 }
-- 
cgit v1.2.3


From 3a5e523479c49b082b8ac291a1a9fbd035c06df5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 9 Aug 2019 15:27:15 +0200
Subject: devlink: remove pointless data_len arg from region snapshot create

The size of the snapshot has to be the same as the size of the region,
therefore no need to pass it again during snapshot creation. Remove the
arg and use region->size instead.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 7 ++-----
 include/net/devlink.h                       | 2 +-
 net/core/devlink.c                          | 9 +++------
 3 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index 88316c743820..eaf08f7ad128 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -99,8 +99,7 @@ static void mlx4_crdump_collect_crspace(struct mlx4_dev *dev,
 					readl(cr_space + offset);
 
 		err = devlink_region_snapshot_create(crdump->region_crspace,
-						     cr_res_size, crspace_data,
-						     id, &kvfree);
+						     crspace_data, id, &kvfree);
 		if (err) {
 			kvfree(crspace_data);
 			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
@@ -139,9 +138,7 @@ static void mlx4_crdump_collect_fw_health(struct mlx4_dev *dev,
 					readl(health_buf_start + offset);
 
 		err = devlink_region_snapshot_create(crdump->region_fw_health,
-						     HEALTH_BUFFER_SIZE,
-						     health_data,
-						     id, &kvfree);
+						     health_data, id, &kvfree);
 		if (err) {
 			kvfree(health_data);
 			mlx4_warn(dev, "crdump: devlink create %s snapshot id %d err %d\n",
diff --git a/include/net/devlink.h b/include/net/devlink.h
index bc36f942a7d5..451268f64880 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -702,7 +702,7 @@ struct devlink_region *devlink_region_create(struct devlink *devlink,
 					     u64 region_size);
 void devlink_region_destroy(struct devlink_region *region);
 u32 devlink_region_shapshot_id_get(struct devlink *devlink);
-int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+int devlink_region_snapshot_create(struct devlink_region *region,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor);
 int devlink_info_serial_number_put(struct devlink_info_req *req,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4f40aeace902..e8f0b891f000 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -342,7 +342,6 @@ struct devlink_snapshot {
 	struct list_head list;
 	struct devlink_region *region;
 	devlink_snapshot_data_dest_t *data_destructor;
-	u64 data_len;
 	u8 *data;
 	u32 id;
 };
@@ -3748,8 +3747,8 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
 	if (!snapshot)
 		return -EINVAL;
 
-	if (end_offset > snapshot->data_len || dump)
-		end_offset = snapshot->data_len;
+	if (end_offset > region->size || dump)
+		end_offset = region->size;
 
 	while (curr_offset < end_offset) {
 		u32 data_size;
@@ -6784,12 +6783,11 @@ EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get);
  *	The @snapshot_id should be obtained using the getter function.
  *
  *	@region: devlink region of the snapshot
- *	@data_len: size of snapshot data
  *	@data: snapshot data
  *	@snapshot_id: snapshot id to be created
  *	@data_destructor: pointer to destructor function to free data
  */
-int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
+int devlink_region_snapshot_create(struct devlink_region *region,
 				   u8 *data, u32 snapshot_id,
 				   devlink_snapshot_data_dest_t *data_destructor)
 {
@@ -6819,7 +6817,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len,
 	snapshot->id = snapshot_id;
 	snapshot->region = region;
 	snapshot->data = data;
-	snapshot->data_len = data_len;
 	snapshot->data_destructor = data_destructor;
 
 	list_add_tail(&snapshot->list, &region->snapshot_list);
-- 
cgit v1.2.3


From c04b79b6cfd714144f6a2cf359603d82ee631e62 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Wed, 7 Aug 2019 19:52:29 -0400
Subject: tcp: add new tcp_mtu_probe_floor sysctl

The current implementation of TCP MTU probing can considerably
underestimate the MTU on lossy connections allowing the MSS to get down to
48. We have found that in almost all of these cases on our networks these
paths can handle much larger MTUs meaning the connections are being
artificially limited. Even though TCP MTU probing can raise the MSS back up
we have seen this not to be the case causing connections to be "stuck" with
an MSS of 48 when heavy loss is present.

Prior to pushing out this change we could not keep TCP MTU probing enabled
b/c of the above reasons. Now with a reasonble floor set we've had it
enabled for the past 6 months.

The new sysctl will still default to TCP_MIN_SND_MSS (48), but gives
administrators the ability to control the floor of MSS probing.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 6 ++++++
 include/net/netns/ipv4.h               | 1 +
 net/ipv4/sysctl_net_ipv4.c             | 9 +++++++++
 net/ipv4/tcp_ipv4.c                    | 1 +
 net/ipv4/tcp_timer.c                   | 2 +-
 5 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index df33674799b5..49e95f438ed7 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -256,6 +256,12 @@ tcp_base_mss - INTEGER
 	Path MTU discovery (MTU probing).  If MTU probing is enabled,
 	this is the initial MSS used by the connection.
 
+tcp_mtu_probe_floor - INTEGER
+	If MTU probing is enabled this caps the minimum MSS used for search_low
+	for the connection.
+
+	Default : 48
+
 tcp_min_snd_mss - INTEGER
 	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
 	as described in RFC 1122 and RFC 6691.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index bc24a8ec1ce5..c0c0791b1912 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -116,6 +116,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_l3mdev_accept;
 #endif
 	int sysctl_tcp_mtu_probing;
+	int sysctl_tcp_mtu_probe_floor;
 	int sysctl_tcp_base_mss;
 	int sysctl_tcp_min_snd_mss;
 	int sysctl_tcp_probe_threshold;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0b980e841927..59ded25acd04 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -819,6 +819,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.extra1		= &tcp_min_snd_mss_min,
 		.extra2		= &tcp_min_snd_mss_max,
 	},
+	{
+		.procname	= "tcp_mtu_probe_floor",
+		.data		= &init_net.ipv4.sysctl_tcp_mtu_probe_floor,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_min_snd_mss_min,
+		.extra2		= &tcp_min_snd_mss_max,
+	},
 	{
 		.procname	= "tcp_probe_threshold",
 		.data		= &init_net.ipv4.sysctl_tcp_probe_threshold,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index d57641cb3477..e0a372676329 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2637,6 +2637,7 @@ static int __net_init tcp_sk_init(struct net *net)
 	net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS;
 	net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
 	net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+	net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS;
 
 	net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
 	net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c801cd37cc2a..dbd9d2d0ee63 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 	} else {
 		mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
 		mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
-		mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len);
+		mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
 		mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
 		icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 	}
-- 
cgit v1.2.3


From 1555e6fdf062e2ae2514c67f46d475c7ebcce10e Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Wed, 7 Aug 2019 19:52:30 -0400
Subject: tcp: Update TCP_BASE_MSS comment

TCP_BASE_MSS is used as the default initial MSS value when MTU probing is
enabled. Update the comment to reflect this.

Suggested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Josh Hunt <johunt@akamai.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 81e8ade1e6e4..9e9fbfaf052b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -64,7 +64,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
 #define TCP_MIN_MSS		88U
 
-/* The least MTU to use for probing */
+/* The initial MTU to use for probing */
 #define TCP_BASE_MSS		1024
 
 /* probing interval, default to 10 minutes as per RFC4821 */
-- 
cgit v1.2.3


From cd48bdda4fb82c2fe569d97af4217c530168c99c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 8 Aug 2019 13:57:25 +0200
Subject: sock: make cookie generation global instead of per netns

Generating and retrieving socket cookies are a useful feature that is
exposed to BPF for various program types through bpf_get_socket_cookie()
helper.

The fact that the cookie counter is per netns is quite a limitation
for BPF in practice in particular for programs in host namespace that
use socket cookies as part of a map lookup key since they will be
causing socket cookie collisions e.g. when attached to BPF cgroup hooks
or cls_bpf on tc egress in host namespace handling container traffic
from veth or ipvlan devices with peer in different netns. Change the
counter to be global instead.

Socket cookie consumers must assume the value as opqaue in any case.
Not every socket must have a cookie generated and knowledge of the
counter value itself does not provide much value either way hence
conversion to global is fine.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Martynas Pumputis <m@lambda.lt>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 1 -
 include/uapi/linux/bpf.h    | 4 ++--
 net/core/sock_diag.c        | 3 ++-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 4a9da951a794..cb668bc2692d 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -61,7 +61,6 @@ struct net {
 	spinlock_t		rules_mod_lock;
 
 	u32			hash_mix;
-	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
 	struct list_head	exit_list;	/* To linked to call pernet exit
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index fa1c753dcdbc..a5aa7d3ac6a1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1466,8 +1466,8 @@ union bpf_attr {
  * 		If no cookie has been set yet, generate a new cookie. Once
  * 		generated, the socket cookie remains stable for the life of the
  * 		socket. This helper can be useful for monitoring per socket
- * 		networking traffic statistics as it provides a unique socket
- * 		identifier per namespace.
+ * 		networking traffic statistics as it provides a global socket
+ * 		identifier that can be assumed unique.
  * 	Return
  * 		A 8-byte long non-decreasing number on success, or 0 if the
  * 		socket field is missing inside *skb*.
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 3312a5849a97..c13ffbd33d8d 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 static struct workqueue_struct *broadcast_wq;
+static atomic64_t cookie_gen;
 
 u64 sock_gen_cookie(struct sock *sk)
 {
@@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk)
 
 		if (res)
 			return res;
-		res = atomic64_inc_return(&sock_net(sk)->cookie_gen);
+		res = atomic64_inc_return(&cookie_gen);
 		atomic64_cmpxchg(&sk->sk_cookie, 0, res);
 	}
 }
-- 
cgit v1.2.3


From dd58edc328cec1a0d837f3f2f41e9955ec623e3e Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 1 Jun 2018 21:47:43 +0300
Subject: net/mlx5e: Extend mod header entry with reference counter

List of flows attached to mod header entry is used as implicit reference
counter (mod header entry is deallocated when list becomes free) and as a
mechanism to obtain mod header entry that flow is attached to (through list
head). This is not safe when concurrent modification of list of flows
attached to mod header entry is possible. Proper atomic reference counter
is required to support concurrent access.

As a preparation for extending mod header with reference counting, extract
code that lookups and deletes mod header entry into standalone put/get
helpers. In order to remove this dependency on external locking, extend mod
header entry with reference counter to manage its lifetime and extend flow
structure with direct pointer to mod header entry that flow is attached to.

To remove code duplication between legacy and switchdev mode
implementations that both support mod_hdr functionality, store mod_hdr
table in dedicated structure used by both fdb and kernel namespaces. New
table structure is extended with table lock by one of the following patches
in this series. Implement helper function to get correct mod_hdr table
depending on flow namespace.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jianbo Liu <jianbol@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 94 +++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  2 +-
 include/linux/mlx5/fs.h                           |  4 +
 5 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 100506a3dd58..ca2161b42c7f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -16,7 +16,7 @@ struct mlx5e_tc_table {
 
 	struct rhashtable               ht;
 
-	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	struct mod_hdr_tbl mod_hdr;
 	struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index b6a91e3054c0..fe1b04aa910a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -119,6 +119,7 @@ struct mlx5e_tc_flow {
 	 */
 	struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
 	struct mlx5e_tc_flow    *peer_flow;
+	struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
 	struct list_head	mod_hdr; /* flows sharing the same mod hdr ID */
 	struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
 	struct list_head	hairpin; /* flows sharing the same hairpin */
@@ -194,6 +195,8 @@ struct mlx5e_mod_hdr_entry {
 	struct mod_hdr_key key;
 
 	u32 mod_hdr_id;
+
+	refcount_t refcnt;
 };
 
 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
@@ -284,14 +287,51 @@ static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
 	return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
 }
 
+static struct mod_hdr_tbl *
+get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
+		&priv->fs.tc.mod_hdr;
+}
+
+static struct mlx5e_mod_hdr_entry *
+mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
+{
+	struct mlx5e_mod_hdr_entry *mh, *found = NULL;
+
+	hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
+		if (!cmp_mod_hdr_info(&mh->key, key)) {
+			refcount_inc(&mh->refcnt);
+			found = mh;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
+			      struct mlx5e_mod_hdr_entry *mh)
+{
+	if (!refcount_dec_and_test(&mh->refcnt))
+		return;
+
+	WARN_ON(!list_empty(&mh->flows));
+	mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+	hash_del(&mh->mod_hdr_hlist);
+	kfree(mh);
+}
+
 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 				struct mlx5e_tc_flow *flow,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	bool is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
 	int num_actions, actions_size, namespace, err;
-	bool found = false, is_eswitch_flow;
 	struct mlx5e_mod_hdr_entry *mh;
+	struct mod_hdr_tbl *tbl;
 	struct mod_hdr_key key;
 	u32 hash_key;
 
@@ -303,28 +343,12 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 
 	hash_key = hash_mod_hdr_info(&key);
 
-	is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
-	if (is_eswitch_flow) {
-		namespace = MLX5_FLOW_NAMESPACE_FDB;
-		hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
-				       mod_hdr_hlist, hash_key) {
-			if (!cmp_mod_hdr_info(&mh->key, &key)) {
-				found = true;
-				break;
-			}
-		}
-	} else {
-		namespace = MLX5_FLOW_NAMESPACE_KERNEL;
-		hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
-				       mod_hdr_hlist, hash_key) {
-			if (!cmp_mod_hdr_info(&mh->key, &key)) {
-				found = true;
-				break;
-			}
-		}
-	}
+	namespace = is_eswitch_flow ?
+		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+	tbl = get_mod_hdr_table(priv, namespace);
 
-	if (found)
+	mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
+	if (mh)
 		goto attach_flow;
 
 	mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
@@ -335,6 +359,7 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 	memcpy(mh->key.actions, key.actions, actions_size);
 	mh->key.num_actions = num_actions;
 	INIT_LIST_HEAD(&mh->flows);
+	refcount_set(&mh->refcnt, 1);
 
 	err = mlx5_modify_header_alloc(priv->mdev, namespace,
 				       mh->key.num_actions,
@@ -343,12 +368,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 	if (err)
 		goto out_err;
 
-	if (is_eswitch_flow)
-		hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
-	else
-		hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
+	hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 
 attach_flow:
+	flow->mh = mh;
 	list_add(&flow->mod_hdr, &mh->flows);
 	if (is_eswitch_flow)
 		flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
@@ -365,23 +388,14 @@ out_err:
 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 				 struct mlx5e_tc_flow *flow)
 {
-	struct list_head *next = flow->mod_hdr.next;
-
 	/* flow wasn't fully initialized */
-	if (list_empty(&flow->mod_hdr))
+	if (!flow->mh)
 		return;
 
 	list_del(&flow->mod_hdr);
 
-	if (list_empty(next)) {
-		struct mlx5e_mod_hdr_entry *mh;
-
-		mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
-
-		mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
-		hash_del(&mh->mod_hdr_hlist);
-		kfree(mh);
-	}
+	mlx5e_mod_hdr_put(priv, flow->mh);
+	flow->mh = NULL;
 }
 
 static
@@ -3844,7 +3858,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	int err;
 
 	mutex_init(&tc->t_lock);
-	hash_init(tc->mod_hdr_tbl);
+	hash_init(tc->mod_hdr.hlist);
 	mutex_init(&tc->hairpin_tbl_lock);
 	hash_init(tc->hairpin_tbl);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5fbebee7254d..5ce3c81e3083 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2000,7 +2000,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 
 	hash_init(esw->offloads.encap_tbl);
-	hash_init(esw->offloads.mod_hdr_tbl);
+	hash_init(esw->offloads.mod_hdr.hlist);
 	atomic64_set(&esw->offloads.num_flows, 0);
 	mutex_init(&esw->state_lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 804912e38dee..fd63ba4ed0da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -182,7 +182,7 @@ struct mlx5_esw_offload {
 	struct list_head peer_flows;
 	struct mutex peer_mutex;
 	DECLARE_HASHTABLE(encap_tbl, 8);
-	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
+	struct mod_hdr_tbl mod_hdr;
 	DECLARE_HASHTABLE(termtbl_tbl, 8);
 	struct mutex termtbl_mutex; /* protects termtbl hash */
 	const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES];
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index f049af3f3cd8..96650a33aa91 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -126,6 +126,10 @@ struct mlx5_flow_destination {
 	};
 };
 
+struct mod_hdr_tbl {
+	DECLARE_HASHTABLE(hlist, 8);
+};
+
 struct mlx5_flow_namespace *
 mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
 struct mlx5_flow_namespace *
-- 
cgit v1.2.3


From d2faae25c3050a87c8ff965a7939e999e3154b62 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 9 Aug 2019 13:20:48 +0300
Subject: net/mlx5e: Protect mod_hdr hash table with mutex

To remove dependency on rtnl lock, protect mod_hdr hash table from
concurrent modifications with new mutex.

Implement helper function to get flow namespace to prevent code
duplication.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 35 ++++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |  2 ++
 include/linux/mlx5/fs.h                           |  1 +
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 09d5cc700297..0600b7878600 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -315,22 +315,31 @@ mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key
 }
 
 static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
-			      struct mlx5e_mod_hdr_entry *mh)
+			      struct mlx5e_mod_hdr_entry *mh,
+			      int namespace)
 {
-	if (!refcount_dec_and_test(&mh->refcnt))
+	struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
+
+	if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
 		return;
+	hash_del(&mh->mod_hdr_hlist);
+	mutex_unlock(&tbl->lock);
 
 	WARN_ON(!list_empty(&mh->flows));
 	mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
-	hash_del(&mh->mod_hdr_hlist);
+
 	kfree(mh);
 }
 
+static int get_flow_name_space(struct mlx5e_tc_flow *flow)
+{
+	return mlx5e_is_eswitch_flow(flow) ?
+		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+}
 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 				struct mlx5e_tc_flow *flow,
 				struct mlx5e_tc_flow_parse_attr *parse_attr)
 {
-	bool is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
 	int num_actions, actions_size, namespace, err;
 	struct mlx5e_mod_hdr_entry *mh;
 	struct mod_hdr_tbl *tbl;
@@ -345,17 +354,19 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 
 	hash_key = hash_mod_hdr_info(&key);
 
-	namespace = is_eswitch_flow ?
-		MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
+	namespace = get_flow_name_space(flow);
 	tbl = get_mod_hdr_table(priv, namespace);
 
+	mutex_lock(&tbl->lock);
 	mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
 	if (mh)
 		goto attach_flow;
 
 	mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
-	if (!mh)
-		return -ENOMEM;
+	if (!mh) {
+		err = -ENOMEM;
+		goto out_err;
+	}
 
 	mh->key.actions = (void *)mh + sizeof(*mh);
 	memcpy(mh->key.actions, key.actions, actions_size);
@@ -374,11 +385,12 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 	hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 
 attach_flow:
+	mutex_unlock(&tbl->lock);
 	flow->mh = mh;
 	spin_lock(&mh->flows_lock);
 	list_add(&flow->mod_hdr, &mh->flows);
 	spin_unlock(&mh->flows_lock);
-	if (is_eswitch_flow)
+	if (mlx5e_is_eswitch_flow(flow))
 		flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
 	else
 		flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
@@ -386,6 +398,7 @@ attach_flow:
 	return 0;
 
 out_err:
+	mutex_unlock(&tbl->lock);
 	kfree(mh);
 	return err;
 }
@@ -401,7 +414,7 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 	list_del(&flow->mod_hdr);
 	spin_unlock(&flow->mh->flows_lock);
 
-	mlx5e_mod_hdr_put(priv, flow->mh);
+	mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
 	flow->mh = NULL;
 }
 
@@ -3865,6 +3878,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	int err;
 
 	mutex_init(&tc->t_lock);
+	mutex_init(&tc->mod_hdr.lock);
 	hash_init(tc->mod_hdr.hlist);
 	mutex_init(&tc->hairpin_tbl_lock);
 	hash_init(tc->hairpin_tbl);
@@ -3898,6 +3912,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 	if (tc->netdevice_nb.notifier_call)
 		unregister_netdevice_notifier(&tc->netdevice_nb);
 
+	mutex_destroy(&tc->mod_hdr.lock);
 	mutex_destroy(&tc->hairpin_tbl_lock);
 
 	rhashtable_destroy(&tc->ht);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 5ce3c81e3083..2d734ecae719 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2000,6 +2000,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 
 	hash_init(esw->offloads.encap_tbl);
+	mutex_init(&esw->offloads.mod_hdr.lock);
 	hash_init(esw->offloads.mod_hdr.hlist);
 	atomic64_set(&esw->offloads.num_flows, 0);
 	mutex_init(&esw->state_lock);
@@ -2037,6 +2038,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	esw_offloads_cleanup_reps(esw);
+	mutex_destroy(&esw->offloads.mod_hdr.lock);
 	kfree(esw->vports);
 	kfree(esw);
 }
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 96650a33aa91..1cb1045ce313 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -127,6 +127,7 @@ struct mlx5_flow_destination {
 };
 
 struct mod_hdr_tbl {
+	struct mutex lock; /* protects hlist */
 	DECLARE_HASHTABLE(hlist, 8);
 };
 
-- 
cgit v1.2.3


From ef2e4094e076858343ea1202046443f642a245cd Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 26 Jul 2019 08:26:52 -0500
Subject: net/mlx5: E-switch, Removed unused hwid

Currently mlx5_eswitch_rep stores same hw ID for all representors.
However it is never used from this structure.
It is always used from mlx5_vport.

Hence, remove unused field.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Vu Pham <vuhuong@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 +-----
 include/linux/mlx5/eswitch.h                               | 1 -
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8fe5dddf18d0..42cc5001255b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1393,10 +1393,9 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw)
 int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 {
 	int total_vports = esw->total_vports;
-	struct mlx5_core_dev *dev = esw->dev;
 	struct mlx5_eswitch_rep *rep;
-	u8 hw_id[ETH_ALEN], rep_type;
 	int vport_index;
+	u8 rep_type;
 
 	esw->offloads.vport_reps = kcalloc(total_vports,
 					   sizeof(struct mlx5_eswitch_rep),
@@ -1404,12 +1403,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw)
 	if (!esw->offloads.vport_reps)
 		return -ENOMEM;
 
-	mlx5_query_mac_address(dev, hw_id);
-
 	mlx5_esw_for_all_reps(esw, vport_index, rep) {
 		rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index);
 		rep->vport_index = vport_index;
-		ether_addr_copy(rep->hw_id, hw_id);
 
 		for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++)
 			atomic_set(&rep->rep_data[rep_type].state,
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 46b5ba029802..38a70d16d8d5 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -44,7 +44,6 @@ struct mlx5_eswitch_rep_data {
 struct mlx5_eswitch_rep {
 	struct mlx5_eswitch_rep_data rep_data[NUM_REP_TYPES];
 	u16		       vport;
-	u8		       hw_id[ETH_ALEN];
 	u16		       vlan;
 	/* Only IB rep is using vport_index */
 	u16		       vport_index;
-- 
cgit v1.2.3


From 060157e1dbc133075a2e20786d6ff6d4b41909f9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 14:55:43 -0700
Subject: Input: remove w90x900 keyboard driver

The ARM w90x900 platform is getting removed, so this driver is obsolete.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig               |  11 --
 drivers/input/keyboard/Makefile              |   1 -
 drivers/input/keyboard/w90p910_keypad.c      | 264 ---------------------------
 include/linux/platform_data/keypad-w90p910.h |  16 --
 4 files changed, 292 deletions(-)
 delete mode 100644 drivers/input/keyboard/w90p910_keypad.c
 delete mode 100644 include/linux/platform_data/keypad-w90p910.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 8e9c3ea9d5e7..c1da129a4eb5 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -731,17 +731,6 @@ config KEYBOARD_XTKBD
 	  To compile this driver as a module, choose M here: the
 	  module will be called xtkbd.
 
-config KEYBOARD_W90P910
-	tristate "W90P910 Matrix Keypad support"
-	depends on ARCH_W90X900
-	select INPUT_MATRIXKMAP
-	help
-	  Say Y here to enable the matrix keypad on evaluation board
-	  based on W90P910.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called w90p910_keypad.
-
 config KEYBOARD_CROS_EC
 	tristate "ChromeOS EC keyboard"
 	select INPUT_MATRIXKMAP
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 06a0af6efeae..9510325c0c5d 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -68,4 +68,3 @@ obj-$(CONFIG_KEYBOARD_TEGRA)		+= tegra-kbc.o
 obj-$(CONFIG_KEYBOARD_TM2_TOUCHKEY)	+= tm2-touchkey.o
 obj-$(CONFIG_KEYBOARD_TWL4030)		+= twl4030_keypad.o
 obj-$(CONFIG_KEYBOARD_XTKBD)		+= xtkbd.o
-obj-$(CONFIG_KEYBOARD_W90P910)		+= w90p910_keypad.o
diff --git a/drivers/input/keyboard/w90p910_keypad.c b/drivers/input/keyboard/w90p910_keypad.c
deleted file mode 100644
index c88d05d6108a..000000000000
--- a/drivers/input/keyboard/w90p910_keypad.c
+++ /dev/null
@@ -1,264 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2008-2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/input.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-
-#include <linux/platform_data/keypad-w90p910.h>
-
-/* Keypad Interface Control Registers */
-#define KPI_CONF		0x00
-#define KPI_3KCONF		0x04
-#define KPI_LPCONF		0x08
-#define KPI_STATUS		0x0C
-
-#define IS1KEY			(0x01 << 16)
-#define INTTR			(0x01 << 21)
-#define KEY0R			(0x0f << 3)
-#define KEY0C			0x07
-#define DEBOUNCE_BIT		0x08
-#define KSIZE0			(0x01 << 16)
-#define KSIZE1			(0x01 << 17)
-#define KPSEL			(0x01 << 19)
-#define ENKP			(0x01 << 18)
-
-#define KGET_RAW(n)		(((n) & KEY0R) >> 3)
-#define KGET_COLUMN(n)		((n) & KEY0C)
-
-#define W90P910_NUM_ROWS	8
-#define W90P910_NUM_COLS	8
-#define W90P910_ROW_SHIFT	3
-
-struct w90p910_keypad {
-	const struct w90p910_keypad_platform_data *pdata;
-	struct clk *clk;
-	struct input_dev *input_dev;
-	void __iomem *mmio_base;
-	int irq;
-	unsigned short keymap[W90P910_NUM_ROWS * W90P910_NUM_COLS];
-};
-
-static void w90p910_keypad_scan_matrix(struct w90p910_keypad *keypad,
-							unsigned int status)
-{
-	struct input_dev *input_dev = keypad->input_dev;
-	unsigned int row = KGET_RAW(status);
-	unsigned int col = KGET_COLUMN(status);
-	unsigned int code = MATRIX_SCAN_CODE(row, col, W90P910_ROW_SHIFT);
-	unsigned int key = keypad->keymap[code];
-
-	input_event(input_dev, EV_MSC, MSC_SCAN, code);
-	input_report_key(input_dev, key, 1);
-	input_sync(input_dev);
-
-	input_event(input_dev, EV_MSC, MSC_SCAN, code);
-	input_report_key(input_dev, key, 0);
-	input_sync(input_dev);
-}
-
-static irqreturn_t w90p910_keypad_irq_handler(int irq, void *dev_id)
-{
-	struct w90p910_keypad *keypad = dev_id;
-	unsigned int  kstatus, val;
-
-	kstatus = __raw_readl(keypad->mmio_base + KPI_STATUS);
-
-	val = INTTR | IS1KEY;
-
-	if (kstatus & val)
-		w90p910_keypad_scan_matrix(keypad, kstatus);
-
-	return IRQ_HANDLED;
-}
-
-static int w90p910_keypad_open(struct input_dev *dev)
-{
-	struct w90p910_keypad *keypad = input_get_drvdata(dev);
-	const struct w90p910_keypad_platform_data *pdata = keypad->pdata;
-	unsigned int val, config;
-
-	/* Enable unit clock */
-	clk_enable(keypad->clk);
-
-	val = __raw_readl(keypad->mmio_base + KPI_CONF);
-	val |= (KPSEL | ENKP);
-	val &= ~(KSIZE0 | KSIZE1);
-
-	config = pdata->prescale | (pdata->debounce << DEBOUNCE_BIT);
-
-	val |= config;
-
-	__raw_writel(val, keypad->mmio_base + KPI_CONF);
-
-	return 0;
-}
-
-static void w90p910_keypad_close(struct input_dev *dev)
-{
-	struct w90p910_keypad *keypad = input_get_drvdata(dev);
-
-	/* Disable clock unit */
-	clk_disable(keypad->clk);
-}
-
-static int w90p910_keypad_probe(struct platform_device *pdev)
-{
-	const struct w90p910_keypad_platform_data *pdata =
-						dev_get_platdata(&pdev->dev);
-	const struct matrix_keymap_data *keymap_data;
-	struct w90p910_keypad *keypad;
-	struct input_dev *input_dev;
-	struct resource *res;
-	int irq;
-	int error;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "no platform data defined\n");
-		return -EINVAL;
-	}
-
-	keymap_data = pdata->keymap_data;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get keypad irq\n");
-		return -ENXIO;
-	}
-
-	keypad = kzalloc(sizeof(struct w90p910_keypad), GFP_KERNEL);
-	input_dev = input_allocate_device();
-	if (!keypad || !input_dev) {
-		dev_err(&pdev->dev, "failed to allocate driver data\n");
-		error = -ENOMEM;
-		goto failed_free;
-	}
-
-	keypad->pdata = pdata;
-	keypad->input_dev = input_dev;
-	keypad->irq = irq;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to get I/O memory\n");
-		error = -ENXIO;
-		goto failed_free;
-	}
-
-	res = request_mem_region(res->start, resource_size(res), pdev->name);
-	if (res == NULL) {
-		dev_err(&pdev->dev, "failed to request I/O memory\n");
-		error = -EBUSY;
-		goto failed_free;
-	}
-
-	keypad->mmio_base = ioremap(res->start, resource_size(res));
-	if (keypad->mmio_base == NULL) {
-		dev_err(&pdev->dev, "failed to remap I/O memory\n");
-		error = -ENXIO;
-		goto failed_free_res;
-	}
-
-	keypad->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(keypad->clk)) {
-		dev_err(&pdev->dev, "failed to get keypad clock\n");
-		error = PTR_ERR(keypad->clk);
-		goto failed_free_io;
-	}
-
-	/* set multi-function pin for w90p910 kpi. */
-	mfp_set_groupi(&pdev->dev);
-
-	input_dev->name = pdev->name;
-	input_dev->id.bustype = BUS_HOST;
-	input_dev->open = w90p910_keypad_open;
-	input_dev->close = w90p910_keypad_close;
-	input_dev->dev.parent = &pdev->dev;
-
-	error = matrix_keypad_build_keymap(keymap_data, NULL,
-					   W90P910_NUM_ROWS, W90P910_NUM_COLS,
-					   keypad->keymap, input_dev);
-	if (error) {
-		dev_err(&pdev->dev, "failed to build keymap\n");
-		goto failed_put_clk;
-	}
-
-	error = request_irq(keypad->irq, w90p910_keypad_irq_handler,
-			    0, pdev->name, keypad);
-	if (error) {
-		dev_err(&pdev->dev, "failed to request IRQ\n");
-		goto failed_put_clk;
-	}
-
-	__set_bit(EV_REP, input_dev->evbit);
-	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
-	input_set_drvdata(input_dev, keypad);
-
-	/* Register the input device */
-	error = input_register_device(input_dev);
-	if (error) {
-		dev_err(&pdev->dev, "failed to register input device\n");
-		goto failed_free_irq;
-	}
-
-	platform_set_drvdata(pdev, keypad);
-	return 0;
-
-failed_free_irq:
-	free_irq(irq, keypad);
-failed_put_clk:
-	clk_put(keypad->clk);
-failed_free_io:
-	iounmap(keypad->mmio_base);
-failed_free_res:
-	release_mem_region(res->start, resource_size(res));
-failed_free:
-	input_free_device(input_dev);
-	kfree(keypad);
-	return error;
-}
-
-static int w90p910_keypad_remove(struct platform_device *pdev)
-{
-	struct w90p910_keypad *keypad = platform_get_drvdata(pdev);
-	struct resource *res;
-
-	free_irq(keypad->irq, keypad);
-
-	clk_put(keypad->clk);
-
-	input_unregister_device(keypad->input_dev);
-
-	iounmap(keypad->mmio_base);
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(res->start, resource_size(res));
-
-	kfree(keypad);
-
-	return 0;
-}
-
-static struct platform_driver w90p910_keypad_driver = {
-	.probe		= w90p910_keypad_probe,
-	.remove		= w90p910_keypad_remove,
-	.driver		= {
-		.name	= "nuc900-kpi",
-	},
-};
-module_platform_driver(w90p910_keypad_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("w90p910 keypad driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-keypad");
diff --git a/include/linux/platform_data/keypad-w90p910.h b/include/linux/platform_data/keypad-w90p910.h
deleted file mode 100644
index 206ca4ecd93f..000000000000
--- a/include/linux/platform_data/keypad-w90p910.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARCH_W90P910_KEYPAD_H
-#define __ASM_ARCH_W90P910_KEYPAD_H
-
-#include <linux/input/matrix_keypad.h>
-
-extern void mfp_set_groupi(struct device *dev);
-
-struct w90p910_keypad_platform_data {
-	const struct matrix_keymap_data *keymap_data;
-
-	unsigned int	prescale;
-	unsigned int	debounce;
-};
-
-#endif /* __ASM_ARCH_W90P910_KEYPAD_H */
-- 
cgit v1.2.3


From 6a1490367c44f94614e39e8b98ff7114ff8a6449 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 23 Jul 2019 11:44:01 +0530
Subject: cpufreq: Add policy create/remove notifiers back

This effectively reverts some changes made by commit f9f41e3ef99
("cpufreq: Remove policy create/remove notifiers").

We have a new use case for policy create/remove notifiers (for
allocating/freeing QoS requests per policy), so add them back.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 15 ++++++++++++++-
 include/linux/cpufreq.h   |  2 ++
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8dda62367816..c13dcb59b30c 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1266,7 +1266,17 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
 				   DEV_PM_QOS_MAX_FREQUENCY);
 	dev_pm_qos_remove_notifier(dev, &policy->nb_min,
 				   DEV_PM_QOS_MIN_FREQUENCY);
-	dev_pm_qos_remove_request(policy->max_freq_req);
+
+	if (policy->max_freq_req) {
+		/*
+		 * CPUFREQ_CREATE_POLICY notification is sent only after
+		 * successfully adding max_freq_req request.
+		 */
+		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+					     CPUFREQ_REMOVE_POLICY, policy);
+		dev_pm_qos_remove_request(policy->max_freq_req);
+	}
+
 	dev_pm_qos_remove_request(policy->min_freq_req);
 	kfree(policy->min_freq_req);
 
@@ -1391,6 +1401,9 @@ static int cpufreq_online(unsigned int cpu)
 				ret);
 			goto out_destroy_policy;
 		}
+
+		blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+				CPUFREQ_CREATE_POLICY, policy);
 	}
 
 	if (cpufreq_driver->get && has_target()) {
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 536a049d7ecc..afc10384a681 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -458,6 +458,8 @@ static inline void cpufreq_resume(void) {}
 /* Policy Notifiers  */
 #define CPUFREQ_ADJUST			(0)
 #define CPUFREQ_NOTIFY			(1)
+#define CPUFREQ_CREATE_POLICY		(2)
+#define CPUFREQ_REMOVE_POLICY		(3)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
-- 
cgit v1.2.3


From 33dcb37cef741294b481f4d889a465b8091f11bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Jul 2019 09:26:40 +0200
Subject: dma-mapping: fix page attributes for dma_mmap_*

All the way back to introducing dma_common_mmap we've defaulted to mark
the pages as uncached.  But this is wrong for DMA coherent devices.
Later on DMA_ATTR_WRITE_COMBINE also got incorrect treatment as that
flag is only treated special on the alloc side for non-coherent devices.

Introduce a new dma_pgprot helper that deals with the check for coherent
devices so that only the remapping cases ever reach arch_dma_mmap_pgprot
and we thus ensure no aliasing of page attributes happens, which makes
the powerpc version of arch_dma_mmap_pgprot obsolete and simplifies the
remaining ones.

Note that this means arch_dma_mmap_pgprot is a bit misnamed now, but
we'll phase it out soon.

Fixes: 64ccc9c033c6 ("common: dma-mapping: add support for generic dma_mmap_* calls")
Reported-by: Shawn Anastasio <shawn@anastas.io>
Reported-by: Gavin Li <git@thegavinli.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
---
 arch/arm/mm/dma-mapping.c        |  4 +---
 arch/arm64/mm/dma-mapping.c      |  4 +---
 arch/powerpc/Kconfig             |  1 -
 arch/powerpc/kernel/Makefile     |  3 +--
 arch/powerpc/kernel/dma-common.c | 17 -----------------
 drivers/iommu/dma-iommu.c        |  6 +++---
 include/linux/dma-noncoherent.h  | 13 +++++++++----
 kernel/dma/mapping.c             | 19 ++++++++++++++++++-
 kernel/dma/remap.c               |  2 +-
 9 files changed, 34 insertions(+), 35 deletions(-)
 delete mode 100644 arch/powerpc/kernel/dma-common.c

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 6774b03aa405..d42557ee69c2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2405,9 +2405,7 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs)
 {
-	if (!dev_is_dma_coherent(dev))
-		return __get_dma_pgprot(attrs, prot);
-	return prot;
+	return __get_dma_pgprot(attrs, prot);
 }
 
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 1d3f0b5a9940..bd2b039f43a6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -14,9 +14,7 @@
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs)
 {
-	if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
-		return pgprot_writecombine(prot);
-	return prot;
+	return pgprot_writecombine(prot);
 }
 
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..d8dcd8820369 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -121,7 +121,6 @@ config PPC
 	select ARCH_32BIT_OFF_T if PPC32
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ea0c69236789..56dfa7a2a6f2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,8 +49,7 @@ obj-y				:= cputable.o ptrace.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o \
-				   dma-common.o
+				   of_platform.o prom_parse.o
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c
deleted file mode 100644
index dc7ef6b17b69..000000000000
--- a/arch/powerpc/kernel/dma-common.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Contains common dma routines for all powerpc platforms.
- *
- * Copyright (C) 2019 Shawn Anastasio.
- */
-
-#include <linux/mm.h>
-#include <linux/dma-noncoherent.h>
-
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
-{
-	if (!dev_is_dma_coherent(dev))
-		return pgprot_noncached(prot);
-	return prot;
-}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a7f9c3edbcb2..0015fe610b23 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -574,7 +574,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
 	struct iova_domain *iovad = &cookie->iovad;
 	bool coherent = dev_is_dma_coherent(dev);
 	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
-	pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
+	pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 	struct page **pages;
 	struct sg_table sgt;
@@ -975,7 +975,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
 		return NULL;
 
 	if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) {
-		pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
+		pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 
 		cpu_addr = dma_common_contiguous_remap(page, alloc_size,
 				VM_USERMAP, prot, __builtin_return_address(0));
@@ -1035,7 +1035,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn, off = vma->vm_pgoff;
 	int ret;
 
-	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
+	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 3813211a9aad..0bff3d7fac92 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -42,13 +42,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
 long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 		dma_addr_t dma_addr);
-
-#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs);
+
+#ifdef CONFIG_MMU
+pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
 #else
-# define arch_dma_mmap_pgprot(dev, prot, attrs)	pgprot_noncached(prot)
-#endif
+static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
+		unsigned long attrs)
+{
+	return prot;	/* no protection bits supported without page tables */
+}
+#endif /* CONFIG_MMU */
 
 #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
 void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b945239621d8..b0038ca3aa92 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -150,6 +150,23 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
 }
 EXPORT_SYMBOL(dma_get_sgtable_attrs);
 
+#ifdef CONFIG_MMU
+/*
+ * Return the page attributes used for mapping dma_alloc_* memory, either in
+ * kernel space if remapping is needed, or to userspace through dma_mmap_*.
+ */
+pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
+{
+	if (dev_is_dma_coherent(dev) ||
+	    (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
+             (attrs & DMA_ATTR_NON_CONSISTENT)))
+		return prot;
+	if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT))
+		return arch_dma_mmap_pgprot(dev, prot, attrs);
+	return pgprot_noncached(prot);
+}
+#endif /* CONFIG_MMU */
+
 /*
  * Create userspace mapping for the DMA-coherent memory.
  */
@@ -164,7 +181,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn;
 	int ret = -ENXIO;
 
-	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
+	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index a594aec07882..ffe78f0b2fe4 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -218,7 +218,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 	/* create a coherent mapping */
 	ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
-			arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
+			dma_pgprot(dev, PAGE_KERNEL, attrs),
 			__builtin_return_address(0));
 	if (!ret) {
 		__dma_direct_free_pages(dev, size, page);
-- 
cgit v1.2.3


From a62052ba2aecb9269a32efeb3e22f96b83a13304 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 10 Aug 2019 12:17:16 +0200
Subject: wimax: no need to check return value of debugfs_create functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

This cleans up a lot of unneeded code and logic around the debugfs wimax
files, making all of this much simpler and easier to understand.

Cc: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: linux-wimax@intel.com
Cc: netdev@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/debugfs.c | 150 ++++++-------------------------------
 drivers/net/wimax/i2400m/driver.c  |   7 +-
 drivers/net/wimax/i2400m/i2400m.h  |   7 +-
 drivers/net/wimax/i2400m/usb.c     |  64 +++-------------
 include/linux/wimax/debug.h        |  20 +----
 net/wimax/debugfs.c                |  42 ++---------
 net/wimax/stack.c                  |  11 +--
 net/wimax/wimax-internal.h         |   7 +-
 8 files changed, 51 insertions(+), 257 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wimax/i2400m/debugfs.c b/drivers/net/wimax/i2400m/debugfs.c
index 6544ac9df047..73f5892ce6c1 100644
--- a/drivers/net/wimax/i2400m/debugfs.c
+++ b/drivers/net/wimax/i2400m/debugfs.c
@@ -30,15 +30,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_netdev_queue_stopped,
 			debugfs_netdev_queue_stopped_get,
 			NULL, "%llu\n");
 
-
-static
-struct dentry *debugfs_create_netdev_queue_stopped(
-	const char *name, struct dentry *parent, struct i2400m *i2400m)
-{
-	return debugfs_create_file(name, 0400, parent, i2400m,
-				   &fops_netdev_queue_stopped);
-}
-
 /*
  * We don't allow partial reads of this file, as then the reader would
  * get weirdly confused data as it is updated.
@@ -167,15 +158,6 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_i2400m_suspend,
 			NULL, debugfs_i2400m_suspend_set,
 			"%llu\n");
 
-static
-struct dentry *debugfs_create_i2400m_suspend(
-	const char *name, struct dentry *parent, struct i2400m *i2400m)
-{
-	return debugfs_create_file(name, 0200, parent, i2400m,
-				   &fops_i2400m_suspend);
-}
-
-
 /*
  * Reset the device
  *
@@ -205,73 +187,25 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_i2400m_reset,
 			NULL, debugfs_i2400m_reset_set,
 			"%llu\n");
 
-static
-struct dentry *debugfs_create_i2400m_reset(
-	const char *name, struct dentry *parent, struct i2400m *i2400m)
+void i2400m_debugfs_add(struct i2400m *i2400m)
 {
-	return debugfs_create_file(name, 0200, parent, i2400m,
-				   &fops_i2400m_reset);
-}
-
-
-#define __debugfs_register(prefix, name, parent)			\
-do {									\
-	result = d_level_register_debugfs(prefix, name, parent);	\
-	if (result < 0)							\
-		goto error;						\
-} while (0)
-
-
-int i2400m_debugfs_add(struct i2400m *i2400m)
-{
-	int result;
-	struct device *dev = i2400m_dev(i2400m);
 	struct dentry *dentry = i2400m->wimax_dev.debugfs_dentry;
-	struct dentry *fd;
 
 	dentry = debugfs_create_dir("i2400m", dentry);
-	result = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
-		if (result == -ENODEV)
-			result = 0;	/* No debugfs support */
-		goto error;
-	}
 	i2400m->debugfs_dentry = dentry;
-	__debugfs_register("dl_", control, dentry);
-	__debugfs_register("dl_", driver, dentry);
-	__debugfs_register("dl_", debugfs, dentry);
-	__debugfs_register("dl_", fw, dentry);
-	__debugfs_register("dl_", netdev, dentry);
-	__debugfs_register("dl_", rfkill, dentry);
-	__debugfs_register("dl_", rx, dentry);
-	__debugfs_register("dl_", tx, dentry);
-
-	fd = debugfs_create_size_t("tx_in", 0400, dentry,
-				   &i2400m->tx_in);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"tx_in: %d\n", result);
-		goto error;
-	}
 
-	fd = debugfs_create_size_t("tx_out", 0400, dentry,
-				   &i2400m->tx_out);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"tx_out: %d\n", result);
-		goto error;
-	}
+	d_level_register_debugfs("dl_", control, dentry);
+	d_level_register_debugfs("dl_", driver, dentry);
+	d_level_register_debugfs("dl_", debugfs, dentry);
+	d_level_register_debugfs("dl_", fw, dentry);
+	d_level_register_debugfs("dl_", netdev, dentry);
+	d_level_register_debugfs("dl_", rfkill, dentry);
+	d_level_register_debugfs("dl_", rx, dentry);
+	d_level_register_debugfs("dl_", tx, dentry);
 
-	fd = debugfs_create_u32("state", 0600, dentry,
-				&i2400m->state);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"state: %d\n", result);
-		goto error;
-	}
+	debugfs_create_size_t("tx_in", 0400, dentry, &i2400m->tx_in);
+	debugfs_create_size_t("tx_out", 0400, dentry, &i2400m->tx_out);
+	debugfs_create_u32("state", 0600, dentry, &i2400m->state);
 
 	/*
 	 * Trace received messages from user space
@@ -295,60 +229,22 @@ int i2400m_debugfs_add(struct i2400m *i2400m)
 	 * It is not really very atomic, but it is also not too
 	 * critical.
 	 */
-	fd = debugfs_create_u8("trace_msg_from_user", 0600, dentry,
-			       &i2400m->trace_msg_from_user);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"trace_msg_from_user: %d\n", result);
-		goto error;
-	}
+	debugfs_create_u8("trace_msg_from_user", 0600, dentry,
+			  &i2400m->trace_msg_from_user);
 
-	fd = debugfs_create_netdev_queue_stopped("netdev_queue_stopped",
-						 dentry, i2400m);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"netdev_queue_stopped: %d\n", result);
-		goto error;
-	}
+	debugfs_create_file("netdev_queue_stopped", 0400, dentry, i2400m,
+			    &fops_netdev_queue_stopped);
 
-	fd = debugfs_create_file("rx_stats", 0600, dentry, i2400m,
-				 &i2400m_rx_stats_fops);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"rx_stats: %d\n", result);
-		goto error;
-	}
+	debugfs_create_file("rx_stats", 0600, dentry, i2400m,
+			    &i2400m_rx_stats_fops);
 
-	fd = debugfs_create_file("tx_stats", 0600, dentry, i2400m,
-				 &i2400m_tx_stats_fops);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"tx_stats: %d\n", result);
-		goto error;
-	}
+	debugfs_create_file("tx_stats", 0600, dentry, i2400m,
+			    &i2400m_tx_stats_fops);
 
-	fd = debugfs_create_i2400m_suspend("suspend", dentry, i2400m);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry suspend: %d\n",
-			result);
-		goto error;
-	}
+	debugfs_create_file("suspend", 0200, dentry, i2400m,
+			    &fops_i2400m_suspend);
 
-	fd = debugfs_create_i2400m_reset("reset", dentry, i2400m);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry reset: %d\n", result);
-		goto error;
-	}
-
-	result = 0;
-error:
-	return result;
+	debugfs_create_file("reset", 0200, dentry, i2400m, &fops_i2400m_reset);
 }
 
 void i2400m_debugfs_rm(struct i2400m *i2400m)
diff --git a/drivers/net/wimax/i2400m/driver.c b/drivers/net/wimax/i2400m/driver.c
index 0a29222a1bf9..f66c0f8f6f4a 100644
--- a/drivers/net/wimax/i2400m/driver.c
+++ b/drivers/net/wimax/i2400m/driver.c
@@ -905,11 +905,7 @@ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
 		goto error_sysfs_setup;
 	}
 
-	result = i2400m_debugfs_add(i2400m);
-	if (result < 0) {
-		dev_err(dev, "cannot setup i2400m's debugfs: %d\n", result);
-		goto error_debugfs_setup;
-	}
+	i2400m_debugfs_add(i2400m);
 
 	result = i2400m_dev_start(i2400m, bm_flags);
 	if (result < 0)
@@ -919,7 +915,6 @@ int i2400m_setup(struct i2400m *i2400m, enum i2400m_bri bm_flags)
 
 error_dev_start:
 	i2400m_debugfs_rm(i2400m);
-error_debugfs_setup:
 	sysfs_remove_group(&i2400m->wimax_dev.net_dev->dev.kobj,
 			   &i2400m_dev_attr_group);
 error_sysfs_setup:
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 5a34e72bab9a..a3733a6d14f5 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -812,13 +812,10 @@ enum i2400m_pt;
 int i2400m_tx(struct i2400m *, const void *, size_t, enum i2400m_pt);
 
 #ifdef CONFIG_DEBUG_FS
-int i2400m_debugfs_add(struct i2400m *);
+void i2400m_debugfs_add(struct i2400m *);
 void i2400m_debugfs_rm(struct i2400m *);
 #else
-static inline int i2400m_debugfs_add(struct i2400m *i2400m)
-{
-	return 0;
-}
+static inline void i2400m_debugfs_add(struct i2400m *i2400m) {}
 static inline void i2400m_debugfs_rm(struct i2400m *i2400m) {}
 #endif
 
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index 2075e7b1fff6..6953f904232f 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -366,61 +366,25 @@ struct d_level D_LEVEL[] = {
 };
 size_t D_LEVEL_SIZE = ARRAY_SIZE(D_LEVEL);
 
-
-#define __debugfs_register(prefix, name, parent)			\
-do {									\
-	result = d_level_register_debugfs(prefix, name, parent);	\
-	if (result < 0)							\
-		goto error;						\
-} while (0)
-
-
 static
-int i2400mu_debugfs_add(struct i2400mu *i2400mu)
+void i2400mu_debugfs_add(struct i2400mu *i2400mu)
 {
-	int result;
-	struct device *dev = &i2400mu->usb_iface->dev;
 	struct dentry *dentry = i2400mu->i2400m.wimax_dev.debugfs_dentry;
-	struct dentry *fd;
 
 	dentry = debugfs_create_dir("i2400m-usb", dentry);
-	result = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
-		if (result == -ENODEV)
-			result = 0;	/* No debugfs support */
-		goto error;
-	}
 	i2400mu->debugfs_dentry = dentry;
-	__debugfs_register("dl_", usb, dentry);
-	__debugfs_register("dl_", fw, dentry);
-	__debugfs_register("dl_", notif, dentry);
-	__debugfs_register("dl_", rx, dentry);
-	__debugfs_register("dl_", tx, dentry);
 
-	/* Don't touch these if you don't know what you are doing */
-	fd = debugfs_create_u8("rx_size_auto_shrink", 0600, dentry,
-			       &i2400mu->rx_size_auto_shrink);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"rx_size_auto_shrink: %d\n", result);
-		goto error;
-	}
+	d_level_register_debugfs("dl_", usb, dentry);
+	d_level_register_debugfs("dl_", fw, dentry);
+	d_level_register_debugfs("dl_", notif, dentry);
+	d_level_register_debugfs("dl_", rx, dentry);
+	d_level_register_debugfs("dl_", tx, dentry);
 
-	fd = debugfs_create_size_t("rx_size", 0600, dentry,
-				   &i2400mu->rx_size);
-	result = PTR_ERR(fd);
-	if (IS_ERR(fd) && result != -ENODEV) {
-		dev_err(dev, "Can't create debugfs entry "
-			"rx_size: %d\n", result);
-		goto error;
-	}
-
-	return 0;
+	/* Don't touch these if you don't know what you are doing */
+	debugfs_create_u8("rx_size_auto_shrink", 0600, dentry,
+			  &i2400mu->rx_size_auto_shrink);
 
-error:
-	debugfs_remove_recursive(i2400mu->debugfs_dentry);
-	return result;
+	debugfs_create_size_t("rx_size", 0600, dentry, &i2400mu->rx_size);
 }
 
 
@@ -534,15 +498,9 @@ int i2400mu_probe(struct usb_interface *iface,
 		dev_err(dev, "cannot setup device: %d\n", result);
 		goto error_setup;
 	}
-	result = i2400mu_debugfs_add(i2400mu);
-	if (result < 0) {
-		dev_err(dev, "Can't register i2400mu's debugfs: %d\n", result);
-		goto error_debugfs_add;
-	}
+	i2400mu_debugfs_add(i2400mu);
 	return 0;
 
-error_debugfs_add:
-	i2400m_release(i2400m);
 error_setup:
 	usb_set_intfdata(iface, NULL);
 	usb_put_dev(i2400mu->usb_dev);
diff --git a/include/linux/wimax/debug.h b/include/linux/wimax/debug.h
index 7cb63e4ec0ae..4dd2c1cea6a9 100644
--- a/include/linux/wimax/debug.h
+++ b/include/linux/wimax/debug.h
@@ -98,9 +98,7 @@
  * To manipulate from user space the levels, create a debugfs dentry
  * and then register each submodule with:
  *
- *     result = d_level_register_debugfs("PREFIX_", submodule_X, parent);
- *     if (result < 0)
- *            goto error;
+ *     d_level_register_debugfs("PREFIX_", submodule_X, parent);
  *
  * Where PREFIX_ is a name of your chosing. This will create debugfs
  * file with a single numeric value that can be use to tweak it. To
@@ -408,25 +406,13 @@ do {							\
  * @submodule: name of submodule (not a string, just the name)
  * @dentry: debugfs parent dentry
  *
- * Returns: 0 if ok, < 0 errno on error.
- *
  * For removing, just use debugfs_remove_recursive() on the parent.
  */
 #define d_level_register_debugfs(prefix, name, parent)			\
 ({									\
-	int rc;								\
-	struct dentry *fd;						\
-	struct dentry *verify_parent_type = parent;			\
-	fd = debugfs_create_u8(						\
-		prefix #name, 0600, verify_parent_type,			\
+	debugfs_create_u8(						\
+		prefix #name, 0600, parent,				\
 		&(D_LEVEL[__D_SUBMODULE_ ## name].level));		\
-	rc = PTR_ERR(fd);						\
-	if (IS_ERR(fd) && rc != -ENODEV)				\
-		printk(KERN_ERR "%s: Can't create debugfs entry %s: "	\
-		       "%d\n", __func__, prefix #name, rc);		\
-	else								\
-		rc = 0;							\
-	rc;								\
 })
 
 
diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c
index 1af56df30276..3c54bb6b925a 100644
--- a/net/wimax/debugfs.c
+++ b/net/wimax/debugfs.c
@@ -13,49 +13,23 @@
 #define D_SUBMODULE debugfs
 #include "debug-levels.h"
 
-
-#define __debugfs_register(prefix, name, parent)			\
-do {									\
-	result = d_level_register_debugfs(prefix, name, parent);	\
-	if (result < 0)							\
-		goto error;						\
-} while (0)
-
-
-int wimax_debugfs_add(struct wimax_dev *wimax_dev)
+void wimax_debugfs_add(struct wimax_dev *wimax_dev)
 {
-	int result;
 	struct net_device *net_dev = wimax_dev->net_dev;
-	struct device *dev = net_dev->dev.parent;
 	struct dentry *dentry;
 	char buf[128];
 
 	snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name);
 	dentry = debugfs_create_dir(buf, NULL);
-	result = PTR_ERR(dentry);
-	if (IS_ERR(dentry)) {
-		if (result == -ENODEV)
-			result = 0;	/* No debugfs support */
-		else
-			dev_err(dev, "Can't create debugfs dentry: %d\n",
-				result);
-		goto out;
-	}
 	wimax_dev->debugfs_dentry = dentry;
-	__debugfs_register("wimax_dl_", debugfs, dentry);
-	__debugfs_register("wimax_dl_", id_table, dentry);
-	__debugfs_register("wimax_dl_", op_msg, dentry);
-	__debugfs_register("wimax_dl_", op_reset, dentry);
-	__debugfs_register("wimax_dl_", op_rfkill, dentry);
-	__debugfs_register("wimax_dl_", op_state_get, dentry);
-	__debugfs_register("wimax_dl_", stack, dentry);
-	result = 0;
-out:
-	return result;
 
-error:
-	debugfs_remove_recursive(wimax_dev->debugfs_dentry);
-	return result;
+	d_level_register_debugfs("wimax_dl_", debugfs, dentry);
+	d_level_register_debugfs("wimax_dl_", id_table, dentry);
+	d_level_register_debugfs("wimax_dl_", op_msg, dentry);
+	d_level_register_debugfs("wimax_dl_", op_reset, dentry);
+	d_level_register_debugfs("wimax_dl_", op_rfkill, dentry);
+	d_level_register_debugfs("wimax_dl_", op_state_get, dentry);
+	d_level_register_debugfs("wimax_dl_", stack, dentry);
 }
 
 void wimax_debugfs_rm(struct wimax_dev *wimax_dev)
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 1ba99d65feca..4b9b1c5e8f3a 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -481,12 +481,7 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
 	/* Set up user-space interaction */
 	mutex_lock(&wimax_dev->mutex);
 	wimax_id_table_add(wimax_dev);
-	result = wimax_debugfs_add(wimax_dev);
-	if (result < 0) {
-		dev_err(dev, "cannot initialize debugfs: %d\n",
-			result);
-		goto error_debugfs_add;
-	}
+	wimax_debugfs_add(wimax_dev);
 
 	__wimax_state_set(wimax_dev, WIMAX_ST_DOWN);
 	mutex_unlock(&wimax_dev->mutex);
@@ -498,10 +493,6 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev)
 	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev);
 	return 0;
 
-error_debugfs_add:
-	wimax_id_table_rm(wimax_dev);
-	mutex_unlock(&wimax_dev->mutex);
-	wimax_rfkill_rm(wimax_dev);
 error_rfkill_add:
 	d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n",
 		wimax_dev, net_dev, result);
diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h
index e819a09337ee..40751207296c 100644
--- a/net/wimax/wimax-internal.h
+++ b/net/wimax/wimax-internal.h
@@ -57,13 +57,10 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state)
 void __wimax_state_change(struct wimax_dev *, enum wimax_st);
 
 #ifdef CONFIG_DEBUG_FS
-int wimax_debugfs_add(struct wimax_dev *);
+void wimax_debugfs_add(struct wimax_dev *);
 void wimax_debugfs_rm(struct wimax_dev *);
 #else
-static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev)
-{
-	return 0;
-}
+static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {}
 static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {}
 #endif
 
-- 
cgit v1.2.3


From 9f818c8a7388ad1a5c60ace50be6f658c058a5f2 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 10 Aug 2019 12:17:18 +0200
Subject: mlx5: no need to check return value of debugfs_create functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

This cleans up a lot of unneeded code and logic around the debugfs
files, making all of this much simpler and easier to understand as we
don't need to keep the dentries saved anymore.

Cc: Saeed Mahameed <saeedm@mellanox.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: netdev@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  51 ++---------
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  | 102 ++-------------------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  11 +--
 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |   2 +-
 include/linux/mlx5/driver.h                        |  12 +--
 7 files changed, 24 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 8cdd7e66f8df..973f90888b1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1368,49 +1368,19 @@ static void clean_debug_files(struct mlx5_core_dev *dev)
 	debugfs_remove_recursive(dbg->dbg_root);
 }
 
-static int create_debugfs_files(struct mlx5_core_dev *dev)
+static void create_debugfs_files(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
-	int err = -ENOMEM;
-
-	if (!mlx5_debugfs_root)
-		return 0;
 
 	dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root);
-	if (!dbg->dbg_root)
-		return err;
-
-	dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root,
-					  dev, &dfops);
-	if (!dbg->dbg_in)
-		goto err_dbg;
 
-	dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root,
-					   dev, &dfops);
-	if (!dbg->dbg_out)
-		goto err_dbg;
-
-	dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root,
-					      dev, &olfops);
-	if (!dbg->dbg_outlen)
-		goto err_dbg;
-
-	dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root,
-					    &dbg->status);
-	if (!dbg->dbg_status)
-		goto err_dbg;
-
-	dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
-	if (!dbg->dbg_run)
-		goto err_dbg;
+	debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops);
+	debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops);
+	debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops);
+	debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status);
+	debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
 
 	mlx5_cmdif_debugfs_init(dev);
-
-	return 0;
-
-err_dbg:
-	clean_debug_files(dev);
-	return err;
 }
 
 static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode)
@@ -2007,17 +1977,10 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		goto err_cache;
 	}
 
-	err = create_debugfs_files(dev);
-	if (err) {
-		err = -ENOMEM;
-		goto err_wq;
-	}
+	create_debugfs_files(dev);
 
 	return 0;
 
-err_wq:
-	destroy_workqueue(cmd->wq);
-
 err_cache:
 	destroy_msg_cache(dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index a11e22d0b0cc..04854e5fbcd7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -92,8 +92,6 @@ EXPORT_SYMBOL(mlx5_debugfs_root);
 void mlx5_register_debugfs(void)
 {
 	mlx5_debugfs_root = debugfs_create_dir("mlx5", NULL);
-	if (IS_ERR_OR_NULL(mlx5_debugfs_root))
-		mlx5_debugfs_root = NULL;
 }
 
 void mlx5_unregister_debugfs(void)
@@ -101,45 +99,25 @@ void mlx5_unregister_debugfs(void)
 	debugfs_remove(mlx5_debugfs_root);
 }
 
-int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	atomic_set(&dev->num_qps, 0);
 
 	dev->priv.qp_debugfs = debugfs_create_dir("QPs",  dev->priv.dbg_root);
-	if (!dev->priv.qp_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.qp_debugfs);
 }
 
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	dev->priv.eq_debugfs = debugfs_create_dir("EQs",  dev->priv.dbg_root);
-	if (!dev->priv.eq_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.eq_debugfs);
 }
 
@@ -183,85 +161,41 @@ static const struct file_operations stats_fops = {
 	.write	= average_write,
 };
 
-int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_stats *stats;
 	struct dentry **cmd;
 	const char *namep;
-	int err;
 	int i;
 
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	cmd = &dev->priv.cmdif_debugfs;
 	*cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
-	if (!*cmd)
-		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
 		stats = &dev->cmd.stats[i];
 		namep = mlx5_command_str(i);
 		if (strcmp(namep, "unknown command opcode")) {
 			stats->root = debugfs_create_dir(namep, *cmd);
-			if (!stats->root) {
-				mlx5_core_warn(dev, "failed adding command %d\n",
-					       i);
-				err = -ENOMEM;
-				goto out;
-			}
-
-			stats->avg = debugfs_create_file("average", 0400,
-							 stats->root, stats,
-							 &stats_fops);
-			if (!stats->avg) {
-				mlx5_core_warn(dev, "failed creating debugfs file\n");
-				err = -ENOMEM;
-				goto out;
-			}
-
-			stats->count = debugfs_create_u64("n", 0400,
-							  stats->root,
-							  &stats->n);
-			if (!stats->count) {
-				mlx5_core_warn(dev, "failed creating debugfs file\n");
-				err = -ENOMEM;
-				goto out;
-			}
+
+			debugfs_create_file("average", 0400, stats->root, stats,
+					    &stats_fops);
+			debugfs_create_u64("n", 0400, stats->root, &stats->n);
 		}
 	}
-
-	return 0;
-out:
-	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
-	return err;
 }
 
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.cmdif_debugfs);
 }
 
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return 0;
-
 	dev->priv.cq_debugfs = debugfs_create_dir("CQs",  dev->priv.dbg_root);
-	if (!dev->priv.cq_debugfs)
-		return -ENOMEM;
-
-	return 0;
 }
 
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
-	if (!mlx5_debugfs_root)
-		return;
-
 	debugfs_remove_recursive(dev->priv.cq_debugfs);
 }
 
@@ -484,7 +418,6 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
 {
 	struct mlx5_rsc_debug *d;
 	char resn[32];
-	int err;
 	int i;
 
 	d = kzalloc(struct_size(d, fields, nfile), GFP_KERNEL);
@@ -496,30 +429,15 @@ static int add_res_tree(struct mlx5_core_dev *dev, enum dbg_rsc_type type,
 	d->type = type;
 	sprintf(resn, "0x%x", rsn);
 	d->root = debugfs_create_dir(resn,  root);
-	if (!d->root) {
-		err = -ENOMEM;
-		goto out_free;
-	}
 
 	for (i = 0; i < nfile; i++) {
 		d->fields[i].i = i;
-		d->fields[i].dent = debugfs_create_file(field[i], 0400,
-							d->root, &d->fields[i],
-							&fops);
-		if (!d->fields[i].dent) {
-			err = -ENOMEM;
-			goto out_rem;
-		}
+		debugfs_create_file(field[i], 0400, d->root, &d->fields[i],
+				    &fops);
 	}
 	*dbg = d;
 
 	return 0;
-out_rem:
-	debugfs_remove_recursive(d->root);
-
-out_free:
-	kfree(d);
-	return err;
 }
 
 static void rem_res_tree(struct mlx5_rsc_debug *d)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 2df9aaa421c6..09d4c64b6e73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -411,7 +411,7 @@ void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
 int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *eq_table;
-	int i, err;
+	int i;
 
 	eq_table = kvzalloc(sizeof(*eq_table), GFP_KERNEL);
 	if (!eq_table)
@@ -419,9 +419,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 
 	dev->priv.eq_table = eq_table;
 
-	err = mlx5_eq_debugfs_init(dev);
-	if (err)
-		goto kvfree_eq_table;
+	mlx5_eq_debugfs_init(dev);
 
 	mutex_init(&eq_table->lock);
 	for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++)
@@ -429,11 +427,6 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev)
 
 	eq_table->irq_table = dev->priv.irq_table;
 	return 0;
-
-kvfree_eq_table:
-	kvfree(eq_table);
-	dev->priv.eq_table = NULL;
-	return err;
 }
 
 void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 3dfab91ae5f2..4be4d2d36218 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -87,7 +87,7 @@ void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
-int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 /* This function should only be called after mlx5_cmd_force_teardown_hca */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fa0e991f1983..0b70b1d6338d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -826,11 +826,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_eq_cleanup;
 	}
 
-	err = mlx5_cq_debugfs_init(dev);
-	if (err) {
-		mlx5_core_err(dev, "failed to initialize cq debugfs\n");
-		goto err_events_cleanup;
-	}
+	mlx5_cq_debugfs_init(dev);
 
 	mlx5_init_qp_table(dev);
 
@@ -891,7 +887,6 @@ err_tables_cleanup:
 	mlx5_cleanup_mkey_table(dev);
 	mlx5_cleanup_qp_table(dev);
 	mlx5_cq_debugfs_cleanup(dev);
-err_events_cleanup:
 	mlx5_events_cleanup(dev);
 err_eq_cleanup:
 	mlx5_eq_table_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 471bbc48bc1f..87b75b2207c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -146,7 +146,7 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
 
 void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
 void mlx5_cmd_flush(struct mlx5_core_dev *dev);
-int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d8f348ef9c33..df23f17eed64 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -189,7 +189,6 @@ enum mlx5_coredev_type {
 };
 
 struct mlx5_field_desc {
-	struct dentry	       *dent;
 	int			i;
 };
 
@@ -242,11 +241,6 @@ struct mlx5_cmd_msg {
 
 struct mlx5_cmd_debug {
 	struct dentry	       *dbg_root;
-	struct dentry	       *dbg_in;
-	struct dentry	       *dbg_out;
-	struct dentry	       *dbg_outlen;
-	struct dentry	       *dbg_status;
-	struct dentry	       *dbg_run;
 	void		       *in_msg;
 	void		       *out_msg;
 	u8			status;
@@ -271,8 +265,6 @@ struct mlx5_cmd_stats {
 	u64		sum;
 	u64		n;
 	struct dentry  *root;
-	struct dentry  *avg;
-	struct dentry  *count;
 	/* protect command average calculations */
 	spinlock_t	lock;
 };
@@ -972,7 +964,7 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
 
-int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
@@ -984,7 +976,7 @@ int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db,
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 
 const char *mlx5_command_str(int command);
-int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
+void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
-- 
cgit v1.2.3


From e031d5f558f1535968df1ecae8d734b84c17f78d Mon Sep 17 00:00:00 2001
From: Denis Ciocca <denis.ciocca@st.com>
Date: Mon, 5 Aug 2019 11:57:11 -0700
Subject: iio:st_sensors: remove buffer allocation at each buffer enable

This patch is removing the buffer allocation at each buffer enable.
We just allocate enough memory in the main structure during probe
to cover maximum size needed (that anyway is pretty small) [16bytes].

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_buffer.c       | 12 +-----------
 drivers/iio/gyro/st_gyro_buffer.c         | 12 +-----------
 drivers/iio/magnetometer/st_magn_buffer.c | 12 +-----------
 drivers/iio/pressure/st_pressure_buffer.c | 12 +-----------
 include/linux/iio/common/st_sensors.h     | 14 +++++++++-----
 5 files changed, 13 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c
index 59dcef02ec19..9f2b40474b8e 100644
--- a/drivers/iio/accel/st_accel_buffer.c
+++ b/drivers/iio/accel/st_accel_buffer.c
@@ -31,17 +31,11 @@ int st_accel_trig_set_state(struct iio_trigger *trig, bool state)
 
 static int st_accel_buffer_postenable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *adata = iio_priv(indio_dev);
 	int err;
 
-	adata->buffer_data = kmalloc(indio_dev->scan_bytes,
-				     GFP_DMA | GFP_KERNEL);
-	if (!adata->buffer_data)
-		return -ENOMEM;
-
 	err = iio_triggered_buffer_postenable(indio_dev);
 	if (err < 0)
-		goto st_accel_free_buffer;
+		return err;
 
 	err = st_sensors_set_axis_enable(indio_dev,
 					 (u8)indio_dev->active_scan_mask[0]);
@@ -58,14 +52,11 @@ st_accel_buffer_enable_all_axis:
 	st_sensors_set_axis_enable(indio_dev, ST_SENSORS_ENABLE_ALL_AXIS);
 st_accel_buffer_predisable:
 	iio_triggered_buffer_predisable(indio_dev);
-st_accel_free_buffer:
-	kfree(adata->buffer_data);
 	return err;
 }
 
 static int st_accel_buffer_predisable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *adata = iio_priv(indio_dev);
 	int err, err2;
 
 	err = st_sensors_set_enable(indio_dev, false);
@@ -79,7 +70,6 @@ st_accel_buffer_predisable:
 	if (!err)
 		err = err2;
 
-	kfree(adata->buffer_data);
 	return err;
 }
 
diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c
index c6ddfecc1fc3..7465ad62391c 100644
--- a/drivers/iio/gyro/st_gyro_buffer.c
+++ b/drivers/iio/gyro/st_gyro_buffer.c
@@ -31,17 +31,11 @@ int st_gyro_trig_set_state(struct iio_trigger *trig, bool state)
 
 static int st_gyro_buffer_postenable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *gdata = iio_priv(indio_dev);
 	int err;
 
-	gdata->buffer_data = kmalloc(indio_dev->scan_bytes,
-				     GFP_DMA | GFP_KERNEL);
-	if (!gdata->buffer_data)
-		return -ENOMEM;
-
 	err = iio_triggered_buffer_postenable(indio_dev);
 	if (err < 0)
-		goto st_gyro_free_buffer;
+		return err;
 
 	err = st_sensors_set_axis_enable(indio_dev,
 					 (u8)indio_dev->active_scan_mask[0]);
@@ -58,15 +52,12 @@ st_gyro_buffer_enable_all_axis:
 	st_sensors_set_axis_enable(indio_dev, ST_SENSORS_ENABLE_ALL_AXIS);
 st_gyro_buffer_predisable:
 	iio_triggered_buffer_predisable(indio_dev);
-st_gyro_free_buffer:
-	kfree(gdata->buffer_data);
 	return err;
 }
 
 static int st_gyro_buffer_predisable(struct iio_dev *indio_dev)
 {
 	int err, err2;
-	struct st_sensor_data *gdata = iio_priv(indio_dev);
 
 	err = st_sensors_set_enable(indio_dev, false);
 	if (err < 0)
@@ -79,7 +70,6 @@ st_gyro_buffer_predisable:
 	if (!err)
 		err = err2;
 
-	kfree(gdata->buffer_data);
 	return err;
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index 658d627dad8e..bb425c167a96 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -31,17 +31,11 @@ int st_magn_trig_set_state(struct iio_trigger *trig, bool state)
 
 static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *mdata = iio_priv(indio_dev);
 	int err;
 
-	mdata->buffer_data = kmalloc(indio_dev->scan_bytes,
-				     GFP_DMA | GFP_KERNEL);
-	if (!mdata->buffer_data)
-		return -ENOMEM;
-
 	err = iio_triggered_buffer_postenable(indio_dev);
 	if (err < 0)
-		goto st_magn_free_buffer;
+		return err;
 
 	err = st_sensors_set_enable(indio_dev, true);
 	if (err < 0)
@@ -51,14 +45,11 @@ static int st_magn_buffer_postenable(struct iio_dev *indio_dev)
 
 st_magn_buffer_predisable:
 	iio_triggered_buffer_predisable(indio_dev);
-st_magn_free_buffer:
-	kfree(mdata->buffer_data);
 	return err;
 }
 
 static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *mdata = iio_priv(indio_dev);
 	int err, err2;
 
 	err = st_sensors_set_enable(indio_dev, false);
@@ -67,7 +58,6 @@ static int st_magn_buffer_predisable(struct iio_dev *indio_dev)
 	if (!err)
 		err = err2;
 
-	kfree(mdata->buffer_data);
 	return err;
 }
 
diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c
index 77cb2d862f19..418dbf9e6e1e 100644
--- a/drivers/iio/pressure/st_pressure_buffer.c
+++ b/drivers/iio/pressure/st_pressure_buffer.c
@@ -31,17 +31,11 @@ int st_press_trig_set_state(struct iio_trigger *trig, bool state)
 
 static int st_press_buffer_postenable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *press_data = iio_priv(indio_dev);
 	int err;
 
-	press_data->buffer_data = kmalloc(indio_dev->scan_bytes,
-					  GFP_DMA | GFP_KERNEL);
-	if (!press_data->buffer_data)
-		return -ENOMEM;
-
 	err = iio_triggered_buffer_postenable(indio_dev);
 	if (err < 0)
-		goto st_press_free_buffer;
+		return err;
 
 	err = st_sensors_set_enable(indio_dev, true);
 	if (err < 0)
@@ -51,14 +45,11 @@ static int st_press_buffer_postenable(struct iio_dev *indio_dev)
 
 st_press_buffer_predisable:
 	iio_triggered_buffer_predisable(indio_dev);
-st_press_free_buffer:
-	kfree(press_data->buffer_data);
 	return err;
 }
 
 static int st_press_buffer_predisable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *press_data = iio_priv(indio_dev);
 	int err, err2;
 
 	err = st_sensors_set_enable(indio_dev, false);
@@ -67,7 +58,6 @@ static int st_press_buffer_predisable(struct iio_dev *indio_dev)
 	if (!err)
 		err = err2;
 
-	kfree(press_data->buffer_data);
 	return err;
 }
 
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 4d0889bf1c6c..686be532f4cb 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -20,8 +20,12 @@
 
 #include <linux/platform_data/st_sensors_pdata.h>
 
-#define ST_SENSORS_TX_MAX_LENGTH		2
-#define ST_SENSORS_RX_MAX_LENGTH		6
+/*
+ * Buffer size max case: 2bytes per channel, 3 channels in total +
+ *			 8bytes timestamp channel (s64)
+ */
+#define ST_SENSORS_MAX_BUFFER_SIZE		(ALIGN(2 * 3, sizeof(s64)) + \
+						 sizeof(s64))
 
 #define ST_SENSORS_ODR_LIST_MAX			10
 #define ST_SENSORS_FULLSCALE_AVL_MAX		10
@@ -215,7 +219,6 @@ struct st_sensor_settings {
  * @vdd_io: Pointer to sensor's Vdd-IO power supply
  * @regmap: Pointer to specific sensor regmap configuration.
  * @enabled: Status of the sensor (false->off, true->on).
- * @buffer_data: Data used by buffer part.
  * @odr: Output data rate of the sensor [Hz].
  * num_data_channels: Number of data channels used in buffer.
  * @drdy_int_pin: Redirect DRDY on pin 1 (1) or pin 2 (2).
@@ -224,6 +227,7 @@ struct st_sensor_settings {
  * @edge_irq: the IRQ triggers on edges and need special handling.
  * @hw_irq_trigger: if we're using the hardware interrupt on the sensor.
  * @hw_timestamp: Latest timestamp from the interrupt handler, when in use.
+ * @buffer_data: Data used by buffer part.
  */
 struct st_sensor_data {
 	struct device *dev;
@@ -237,8 +241,6 @@ struct st_sensor_data {
 
 	bool enabled;
 
-	char *buffer_data;
-
 	unsigned int odr;
 	unsigned int num_data_channels;
 
@@ -249,6 +251,8 @@ struct st_sensor_data {
 	bool edge_irq;
 	bool hw_irq_trigger;
 	s64 hw_timestamp;
+
+	char buffer_data[ST_SENSORS_MAX_BUFFER_SIZE] ____cacheline_aligned;
 };
 
 #ifdef CONFIG_IIO_BUFFER
-- 
cgit v1.2.3


From b1ac3a4b9aa2f68d498824f1235788e67b51b486 Mon Sep 17 00:00:00 2001
From: Przemyslaw Gaj <pgaj@cadence.com>
Date: Sat, 22 Jun 2019 21:54:59 +0100
Subject: i3c: add addr and lvr to i2c_dev_desc structure

I need to store address and lvr value for I2C devices without static definition
in DT. This allows secondary master to transmit DEFSLVS command properly.

Main changes between v4 and v5:
- Change in defslvs to use addr and lvr from i2c_dev_desc structure
- Change in CDNS and DW drivers to use addr and lvr from i2c_dev_desc structure

Signed-off-by: Przemyslaw Gaj <pgaj@cadence.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/i3c/master.c                 | 10 ++++++----
 drivers/i3c/master/dw-i3c-master.c   |  4 ++--
 drivers/i3c/master/i3c-master-cdns.c |  4 ++--
 include/linux/i3c/master.h           |  5 +++++
 4 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 87d0f349dd37..a38fdf325d30 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -600,6 +600,8 @@ i3c_master_alloc_i2c_dev(struct i3c_master_controller *master,
 
 	dev->common.master = master;
 	dev->boardinfo = boardinfo;
+	dev->addr = boardinfo->base.addr;
+	dev->lvr = boardinfo->lvr;
 
 	return dev;
 }
@@ -918,8 +920,8 @@ int i3c_master_defslvs_locked(struct i3c_master_controller *master)
 
 	desc = defslvs->slaves;
 	i3c_bus_for_each_i2cdev(bus, i2cdev) {
-		desc->lvr = i2cdev->boardinfo->lvr;
-		desc->static_addr = i2cdev->boardinfo->base.addr << 1;
+		desc->lvr = i2cdev->lvr;
+		desc->static_addr = i2cdev->addr << 1;
 		desc++;
 	}
 
@@ -1586,8 +1588,8 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master)
 				 common.node) {
 		i3c_master_detach_i2c_dev(i2cdev);
 		i3c_bus_set_addr_slot_status(&master->bus,
-					i2cdev->boardinfo->base.addr,
-					I3C_ADDR_SLOT_FREE);
+					     i2cdev->addr,
+					     I3C_ADDR_SLOT_FREE);
 		i3c_master_free_i2c_dev(i2cdev);
 	}
 }
diff --git a/drivers/i3c/master/dw-i3c-master.c b/drivers/i3c/master/dw-i3c-master.c
index 09912d75c6d5..b0ff0e12d84c 100644
--- a/drivers/i3c/master/dw-i3c-master.c
+++ b/drivers/i3c/master/dw-i3c-master.c
@@ -1033,12 +1033,12 @@ static int dw_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev)
 		return -ENOMEM;
 
 	data->index = pos;
-	master->addrs[pos] = dev->boardinfo->base.addr;
+	master->addrs[pos] = dev->addr;
 	master->free_pos &= ~BIT(pos);
 	i2c_dev_set_master_data(dev, data);
 
 	writel(DEV_ADDR_TABLE_LEGACY_I2C_DEV |
-	       DEV_ADDR_TABLE_STATIC_ADDR(dev->boardinfo->base.addr),
+	       DEV_ADDR_TABLE_STATIC_ADDR(dev->addr),
 	       master->regs +
 	       DEV_ADDR_TABLE_LOC(master->datstartaddr, data->index));
 
diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c
index c8adf1eb7e1e..10db0bf0655a 100644
--- a/drivers/i3c/master/i3c-master-cdns.c
+++ b/drivers/i3c/master/i3c-master-cdns.c
@@ -1003,9 +1003,9 @@ static int cdns_i3c_master_attach_i2c_dev(struct i2c_dev_desc *dev)
 	master->free_rr_slots &= ~BIT(slot);
 	i2c_dev_set_master_data(dev, data);
 
-	writel(prepare_rr0_dev_address(dev->boardinfo->base.addr),
+	writel(prepare_rr0_dev_address(dev->addr),
 	       master->regs + DEV_ID_RR0(data->id));
-	writel(dev->boardinfo->lvr, master->regs + DEV_ID_RR2(data->id));
+	writel(dev->lvr, master->regs + DEV_ID_RR2(data->id));
 	writel(readl(master->regs + DEVS_CTRL) |
 	       DEVS_CTRL_DEV_ACTIVE(data->id),
 	       master->regs + DEVS_CTRL);
diff --git a/include/linux/i3c/master.h b/include/linux/i3c/master.h
index 1f08fa8d69d2..9cb39d901cd5 100644
--- a/include/linux/i3c/master.h
+++ b/include/linux/i3c/master.h
@@ -71,6 +71,9 @@ struct i2c_dev_boardinfo {
  * @common: common part of the I2C device descriptor
  * @boardinfo: pointer to the boardinfo attached to this I2C device
  * @dev: I2C device object registered to the I2C framework
+ * @addr: I2C device address
+ * @lvr: LVR (Legacy Virtual Register) needed by the I3C core to know about
+ *	 the I2C device limitations
  *
  * Each I2C device connected on the bus will have an i2c_dev_desc.
  * This object is created by the core and later attached to the controller
@@ -84,6 +87,8 @@ struct i2c_dev_desc {
 	struct i3c_i2c_dev_desc common;
 	const struct i2c_dev_boardinfo *boardinfo;
 	struct i2c_client *dev;
+	u16 addr;
+	u8 lvr;
 };
 
 /**
-- 
cgit v1.2.3


From 28315f7999870bb56da236f6b4ffce63efcc7897 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:50 +0300
Subject: drop_monitor: Add alert mode operations

The next patch is going to add another alert mode in which the dropped
packet is notified to user space, instead of only a summary of recent
drops.

Abstract the differences between the modes by adding alert mode
operations. The operations are selected based on the currently
configured mode and associated with the probes and the work item just
before tracing starts.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  9 +++++++++
 net/core/drop_monitor.c          | 38 ++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 5edbd0a675fd..0fecdedeb6ca 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -62,4 +62,13 @@ enum {
  * Our group identifiers
  */
 #define NET_DM_GRP_ALERT 1
+
+/**
+ * enum net_dm_alert_mode - Alert mode.
+ * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ */
+enum net_dm_alert_mode {
+	NET_DM_ALERT_MODE_SUMMARY,
+};
+
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cd2f3069f34e..9cd2f662cb9e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -75,6 +75,16 @@ static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+
+struct net_dm_alert_ops {
+	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+				void *location);
+	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+				int work, int budget);
+	void (*work_item_func)(struct work_struct *work);
+};
+
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -241,10 +251,23 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+	.kfree_skb_probe	= trace_kfree_skb_hit,
+	.napi_poll_probe	= trace_napi_poll_hit,
+	.work_item_func		= send_dm_alert,
+};
+
+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
+	[NET_DM_ALERT_MODE_SUMMARY]	= &net_dm_alert_summary_ops,
+};
+
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 {
+	const struct net_dm_alert_ops *ops;
 	int cpu, rc;
 
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
 	if (!try_module_get(THIS_MODULE)) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
 		return -ENODEV;
@@ -254,7 +277,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
 		struct sk_buff *skb;
 
-		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+		INIT_WORK(&data->dm_alert_work, ops->work_item_func);
 		timer_setup(&data->send_timer, sched_send_work, 0);
 		/* Allocate a new per-CPU skb for the summary alert message and
 		 * free the old one which might contain stale data from
@@ -264,13 +287,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 		consume_skb(skb);
 	}
 
-	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
 		goto err_module_put;
 	}
 
-	rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+	rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
 		goto err_unregister_trace;
@@ -279,7 +302,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 	return 0;
 
 err_unregister_trace:
-	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 err_module_put:
 	module_put(THIS_MODULE);
 	return rc;
@@ -288,10 +311,13 @@ err_module_put:
 static void net_dm_trace_off_set(void)
 {
 	struct dm_hw_stat_delta *new_stat, *temp;
+	const struct net_dm_alert_ops *ops;
 	int cpu;
 
-	unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 
 	tracepoint_synchronize_unregister();
 
-- 
cgit v1.2.3


From ca30707dee2bc8bc81cfd8b4277fe90f7ca6df1f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:51 +0300
Subject: drop_monitor: Add packet alert mode

So far drop monitor supported only one alert mode in which a summary of
locations in which packets were recently dropped was sent to user space.

This alert mode is sufficient in order to understand that packets were
dropped, but lacks information to perform a more detailed analysis.

Add a new alert mode in which the dropped packet itself is passed to
user space along with metadata: The drop location (as program counter
and resolved symbol), ingress netdevice and drop timestamp. More
metadata can be added in the future.

To avoid performing expensive operations in the context in which
kfree_skb() is invoked (can be hard IRQ), the dropped skb is cloned and
queued on per-CPU skb drop list. Then, in process context the netlink
message is allocated, prepared and finally sent to user space.

The per-CPU skb drop list is limited to 1000 skbs to prevent exhausting
the system's memory. Subsequent patches will make this limit
configurable and also add a counter that indicates how many skbs were
tail dropped.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  27 ++++
 net/core/drop_monitor.c          | 280 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 305 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 0fecdedeb6ca..cfaaf75371b8 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -53,6 +53,7 @@ enum {
 	NET_DM_CMD_CONFIG,
 	NET_DM_CMD_START,
 	NET_DM_CMD_STOP,
+	NET_DM_CMD_PACKET_ALERT,
 	_NET_DM_CMD_MAX,
 };
 
@@ -63,12 +64,38 @@ enum {
  */
 #define NET_DM_GRP_ALERT 1
 
+enum net_dm_attr {
+	NET_DM_ATTR_UNSPEC,
+
+	NET_DM_ATTR_ALERT_MODE,			/* u8 */
+	NET_DM_ATTR_PC,				/* u64 */
+	NET_DM_ATTR_SYMBOL,			/* string */
+	NET_DM_ATTR_IN_PORT,			/* nested */
+	NET_DM_ATTR_TIMESTAMP,			/* struct timespec */
+	NET_DM_ATTR_PROTO,			/* u16 */
+	NET_DM_ATTR_PAYLOAD,			/* binary */
+	NET_DM_ATTR_PAD,
+
+	__NET_DM_ATTR_MAX,
+	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
+};
+
 /**
  * enum net_dm_alert_mode - Alert mode.
  * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ * @NET_DM_ALERT_MODE_PACKET: Each dropped packet is sent to user space along
+ *                            with metadata.
  */
 enum net_dm_alert_mode {
 	NET_DM_ALERT_MODE_SUMMARY,
+	NET_DM_ALERT_MODE_PACKET,
+};
+
+enum {
+	NET_DM_ATTR_PORT_NETDEV_IFINDEX,	/* u32 */
+
+	__NET_DM_ATTR_PORT_MAX,
+	NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
 };
 
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9cd2f662cb9e..ba765832413b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -54,6 +54,7 @@ static DEFINE_MUTEX(net_dm_mutex);
 struct per_cpu_dm_data {
 	spinlock_t		lock;	/* Protects 'skb' and 'send_timer' */
 	struct sk_buff		*skb;
+	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
 };
@@ -85,6 +86,14 @@ struct net_dm_alert_ops {
 	void (*work_item_func)(struct work_struct *work);
 };
 
+struct net_dm_skb_cb {
+	void *pc;
+};
+
+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
+
+#define NET_DM_QUEUE_LEN 1000
+
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -257,8 +266,214 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
 	.work_item_func		= send_dm_alert,
 };
 
+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+					      struct sk_buff *skb,
+					      void *location)
+{
+	ktime_t tstamp = ktime_get_real();
+	struct per_cpu_dm_data *data;
+	struct sk_buff *nskb;
+	unsigned long flags;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	NET_DM_SKB_CB(nskb)->pc = location;
+	/* Override the timestamp because we care about the time when the
+	 * packet was dropped.
+	 */
+	nskb->tstamp = tstamp;
+
+	data = this_cpu_ptr(&dm_cpu_data);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+		__skb_queue_tail(&data->drop_queue, nskb);
+	else
+		goto unlock_free;
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	schedule_work(&data->dm_alert_work);
+
+	return;
+
+unlock_free:
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+	consume_skb(nskb);
+}
+
+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
+					      struct napi_struct *napi,
+					      int work, int budget)
+{
+}
+
+static size_t net_dm_in_port_size(void)
+{
+	       /* NET_DM_ATTR_IN_PORT nest */
+	return nla_total_size(0) +
+	       /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
+	       nla_total_size(sizeof(u32));
+}
+
+#define NET_DM_MAX_SYMBOL_LEN 40
+
+static size_t net_dm_packet_report_size(size_t payload_len)
+{
+	size_t size;
+
+	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_PC */
+	       nla_total_size(sizeof(u64)) +
+	       /* NET_DM_ATTR_SYMBOL */
+	       nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
+	       /* NET_DM_ATTR_IN_PORT */
+	       net_dm_in_port_size() +
+	       /* NET_DM_ATTR_TIMESTAMP */
+	       nla_total_size(sizeof(struct timespec)) +
+	       /* NET_DM_ATTR_PROTO */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PAYLOAD */
+	       nla_total_size(payload_len);
+}
+
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (ifindex &&
+	    nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
+				     size_t payload_len)
+{
+	u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+	char buf[NET_DM_MAX_SYMBOL_LEN];
+	struct nlattr *attr;
+	struct timespec ts;
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_PACKET_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+	if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
+		goto nla_put_failure;
+
+	rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+	if (rc)
+		goto nla_put_failure;
+
+	if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
+	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+		goto nla_put_failure;
+
+	if (!payload_len)
+		goto out;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+		goto nla_put_failure;
+
+	attr = skb_put(msg, nla_total_size(payload_len));
+	attr->nla_type = NET_DM_ATTR_PAYLOAD;
+	attr->nla_len = nla_attr_size(payload_len);
+	if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
+
+static void net_dm_packet_report(struct sk_buff *skb)
+{
+	struct sk_buff *msg;
+	size_t payload_len;
+	int rc;
+
+	/* Make sure we start copying the packet from the MAC header */
+	if (skb->data > skb_mac_header(skb))
+		skb_push(skb, skb->data - skb_mac_header(skb));
+	else
+		skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+	/* Ensure packet fits inside a single netlink attribute */
+	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+
+	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_packet_report_fill(msg, skb, payload_len);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	consume_skb(skb);
+}
+
+static void net_dm_packet_work(struct work_struct *work)
+{
+	struct per_cpu_dm_data *data;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	skb_queue_splice_tail_init(&data->drop_queue, &list);
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	while ((skb = __skb_dequeue(&list)))
+		net_dm_packet_report(skb);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
+	.kfree_skb_probe	= net_dm_packet_trace_kfree_skb_hit,
+	.napi_poll_probe	= net_dm_packet_trace_napi_poll_hit,
+	.work_item_func		= net_dm_packet_work,
+};
+
 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
 	[NET_DM_ALERT_MODE_SUMMARY]	= &net_dm_alert_summary_ops,
+	[NET_DM_ALERT_MODE_PACKET]	= &net_dm_alert_packet_ops,
 };
 
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
@@ -326,9 +541,12 @@ static void net_dm_trace_off_set(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct sk_buff *skb;
 
 		del_timer_sync(&data->send_timer);
 		cancel_work_sync(&data->dm_alert_work);
+		while ((skb = __skb_dequeue(&data->drop_queue)))
+			consume_skb(skb);
 	}
 
 	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
@@ -370,12 +588,61 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 	return rc;
 }
 
+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
+					   enum net_dm_alert_mode *p_alert_mode)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
+
+	switch (val) {
+	case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
+	case NET_DM_ALERT_MODE_PACKET:
+		*p_alert_mode = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int net_dm_alert_mode_set(struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	enum net_dm_alert_mode alert_mode;
+	int rc;
+
+	if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
+		return 0;
+
+	rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
+		return -EINVAL;
+	}
+
+	net_dm_alert_mode = alert_mode;
+
+	return 0;
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
-	NL_SET_ERR_MSG_MOD(info->extack, "Command not supported");
+	struct netlink_ext_ack *extack = info->extack;
+	int rc;
 
-	return -EOPNOTSUPP;
+	if (trace_state == TRACE_ON) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
+		return -EBUSY;
+	}
+
+	rc = net_dm_alert_mode_set(info);
+	if (rc)
+		return rc;
+
+	return 0;
 }
 
 static int net_dm_cmd_trace(struct sk_buff *skb,
@@ -430,6 +697,11 @@ out:
 	return NOTIFY_DONE;
 }
 
+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
+	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
+	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+};
+
 static const struct genl_ops dropmon_ops[] = {
 	{
 		.cmd = NET_DM_CMD_CONFIG,
@@ -467,6 +739,8 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
 	.hdrsize        = 0,
 	.name           = "NET_DM",
 	.version        = 2,
+	.maxattr	= NET_DM_ATTR_MAX,
+	.policy		= net_dm_nl_policy,
 	.pre_doit	= net_dm_nl_pre_doit,
 	.post_doit	= net_dm_nl_post_doit,
 	.module		= THIS_MODULE,
@@ -510,6 +784,7 @@ static int __init init_net_drop_monitor(void)
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
 		spin_lock_init(&data->lock);
+		skb_queue_head_init(&data->drop_queue);
 	}
 
 	goto out;
@@ -539,6 +814,7 @@ static void exit_net_drop_monitor(void)
 		 * to this struct and can free the skb inside it
 		 */
 		kfree_skb(data->skb);
+		WARN_ON(!skb_queue_empty(&data->drop_queue));
 	}
 
 	BUG_ON(genl_unregister_family(&net_drop_monitor_family));
-- 
cgit v1.2.3


From 57986617a736aec2980c1c78a9dd8dcdf477ee6e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:52 +0300
Subject: drop_monitor: Allow truncation of dropped packets

When sending dropped packets to user space it is not always necessary to
copy the entire packet as usually only the headers are of interest.

Allow user to specify the truncation length and add the original length
of the packet as additional metadata to the netlink message.

By default no truncation is performed.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  2 ++
 net/core/drop_monitor.c          | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index cfaaf75371b8..5cd7eb1f66ba 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -75,6 +75,8 @@ enum net_dm_attr {
 	NET_DM_ATTR_PROTO,			/* u16 */
 	NET_DM_ATTR_PAYLOAD,			/* binary */
 	NET_DM_ATTR_PAD,
+	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
+	NET_DM_ATTR_ORIG_LEN,			/* u32 */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ba765832413b..9f884adaa85f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -77,6 +77,7 @@ static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+static u32 net_dm_trunc_len;
 
 struct net_dm_alert_ops {
 	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -334,6 +335,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	       net_dm_in_port_size() +
 	       /* NET_DM_ATTR_TIMESTAMP */
 	       nla_total_size(sizeof(struct timespec)) +
+	       /* NET_DM_ATTR_ORIG_LEN */
+	       nla_total_size(sizeof(u32)) +
 	       /* NET_DM_ATTR_PROTO */
 	       nla_total_size(sizeof(u16)) +
 	       /* NET_DM_ATTR_PAYLOAD */
@@ -391,6 +394,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+		goto nla_put_failure;
+
 	if (!payload_len)
 		goto out;
 
@@ -429,6 +435,8 @@ static void net_dm_packet_report(struct sk_buff *skb)
 
 	/* Ensure packet fits inside a single netlink attribute */
 	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+	if (net_dm_trunc_len)
+		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
 
 	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
 	if (!msg)
@@ -627,6 +635,14 @@ static int net_dm_alert_mode_set(struct genl_info *info)
 	return 0;
 }
 
+static void net_dm_trunc_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
+		return;
+
+	net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
@@ -642,6 +658,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 	if (rc)
 		return rc;
 
+	net_dm_trunc_len_set(info);
+
 	return 0;
 }
 
@@ -700,6 +718,7 @@ out:
 static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
 	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
 	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dropmon_ops[] = {
-- 
cgit v1.2.3


From 444be061d012f1a8ebf95292a648a4e0e2afa83f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:53 +0300
Subject: drop_monitor: Add a command to query current configuration

Users should be able to query the current configuration of drop monitor
before they start using it. Add a command to query the existing
configuration which currently consists of alert mode and packet
truncation length.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  2 ++
 net/core/drop_monitor.c          | 48 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 5cd7eb1f66ba..3b765a8428b5 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -54,6 +54,8 @@ enum {
 	NET_DM_CMD_START,
 	NET_DM_CMD_STOP,
 	NET_DM_CMD_PACKET_ALERT,
+	NET_DM_CMD_CONFIG_GET,
+	NET_DM_CMD_CONFIG_NEW,
 	_NET_DM_CMD_MAX,
 };
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9f884adaa85f..135638474ab8 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -676,6 +676,50 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_config_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
+}
+
 static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
@@ -738,6 +782,10 @@ static const struct genl_ops dropmon_ops[] = {
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
 	},
+	{
+		.cmd = NET_DM_CMD_CONFIG_GET,
+		.doit = net_dm_cmd_config_get,
+	},
 };
 
 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
-- 
cgit v1.2.3


From 30328d46af593dcf24582f2a431d84ea0cf4bdef Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:54 +0300
Subject: drop_monitor: Make drop queue length configurable

In packet alert mode, each CPU holds a list of dropped skbs that need to
be processed in process context and sent to user space. To avoid
exhausting the system's memory the maximum length of this queue is
currently set to 1000.

Allow users to tune the length of this queue according to their needs.
The configured length is reported to user space when drop monitor
configuration is queried.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  1 +
 net/core/drop_monitor.c          | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 3b765a8428b5..1d0bdb1ba954 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -79,6 +79,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_PAD,
 	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
 	NET_DM_ATTR_ORIG_LEN,			/* u32 */
+	NET_DM_ATTR_QUEUE_LEN,			/* u32 */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 135638474ab8..eb3c34d69ea9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -78,6 +78,7 @@ static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
 static u32 net_dm_trunc_len;
+static u32 net_dm_queue_len = 1000;
 
 struct net_dm_alert_ops {
 	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -93,8 +94,6 @@ struct net_dm_skb_cb {
 
 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
 
-#define NET_DM_QUEUE_LEN 1000
-
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -289,7 +288,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 	data = this_cpu_ptr(&dm_cpu_data);
 
 	spin_lock_irqsave(&data->drop_queue.lock, flags);
-	if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+	if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
 		__skb_queue_tail(&data->drop_queue, nskb);
 	else
 		goto unlock_free;
@@ -643,6 +642,14 @@ static void net_dm_trunc_len_set(struct genl_info *info)
 	net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
 }
 
+static void net_dm_queue_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
+		return;
+
+	net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
@@ -660,6 +667,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 
 	net_dm_trunc_len_set(info);
 
+	net_dm_queue_len_set(info);
+
 	return 0;
 }
 
@@ -691,6 +700,9 @@ static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
 	if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	return 0;
@@ -763,6 +775,7 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
 	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
 	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
 	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
+	[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dropmon_ops[] = {
-- 
cgit v1.2.3


From e9feb58020f952f7d9de785ede9a7d54ab1eda5c Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 11 Aug 2019 10:35:55 +0300
Subject: drop_monitor: Expose tail drop counter

Previous patch made the length of the per-CPU skb drop list
configurable. Expose a counter that shows how many packets could not be
enqueued to this list.

This allows users determine the desired queue length.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  10 ++++
 net/core/drop_monitor.c          | 101 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 1d0bdb1ba954..405b31cbf723 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -56,6 +56,8 @@ enum {
 	NET_DM_CMD_PACKET_ALERT,
 	NET_DM_CMD_CONFIG_GET,
 	NET_DM_CMD_CONFIG_NEW,
+	NET_DM_CMD_STATS_GET,
+	NET_DM_CMD_STATS_NEW,
 	_NET_DM_CMD_MAX,
 };
 
@@ -80,6 +82,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
 	NET_DM_ATTR_ORIG_LEN,			/* u32 */
 	NET_DM_ATTR_QUEUE_LEN,			/* u32 */
+	NET_DM_ATTR_STATS,			/* nested */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
@@ -103,4 +106,11 @@ enum {
 	NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
 };
 
+enum {
+	NET_DM_ATTR_STATS_DROPPED,		/* u64 */
+
+	__NET_DM_ATTR_STATS_MAX,
+	NET_DM_ATTR_STATS_MAX = __NET_DM_ATTR_STATS_MAX - 1
+};
+
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index eb3c34d69ea9..39e094907391 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -51,12 +51,18 @@ static int trace_state = TRACE_OFF;
  */
 static DEFINE_MUTEX(net_dm_mutex);
 
+struct net_dm_stats {
+	u64 dropped;
+	struct u64_stats_sync syncp;
+};
+
 struct per_cpu_dm_data {
 	spinlock_t		lock;	/* Protects 'skb' and 'send_timer' */
 	struct sk_buff		*skb;
 	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
+	struct net_dm_stats	stats;
 };
 
 struct dm_hw_stat_delta {
@@ -300,6 +306,9 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 
 unlock_free:
 	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+	u64_stats_update_begin(&data->stats.syncp);
+	data->stats.dropped++;
+	u64_stats_update_end(&data->stats.syncp);
 	consume_skb(nskb);
 }
 
@@ -732,6 +741,93 @@ free_msg:
 	return rc;
 }
 
+static void net_dm_stats_read(struct net_dm_stats *stats)
+{
+	int cpu;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct net_dm_stats *cpu_stats = &data->stats;
+		unsigned int start;
+		u64 dropped;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			dropped = cpu_stats->dropped;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->dropped += dropped;
+	}
+}
+
+static int net_dm_stats_put(struct sk_buff *msg)
+{
+	struct net_dm_stats stats;
+	struct nlattr *attr;
+
+	net_dm_stats_read(&stats);
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+			      stats.dropped, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	rc = net_dm_stats_put(msg);
+	if (rc)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_stats_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
+}
+
 static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
@@ -799,6 +895,10 @@ static const struct genl_ops dropmon_ops[] = {
 		.cmd = NET_DM_CMD_CONFIG_GET,
 		.doit = net_dm_cmd_config_get,
 	},
+	{
+		.cmd = NET_DM_CMD_STATS_GET,
+		.doit = net_dm_cmd_stats_get,
+	},
 };
 
 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
@@ -865,6 +965,7 @@ static int __init init_net_drop_monitor(void)
 		data = &per_cpu(dm_cpu_data, cpu);
 		spin_lock_init(&data->lock);
 		skb_queue_head_init(&data->drop_queue);
+		u64_stats_init(&data->stats.syncp);
 	}
 
 	goto out;
-- 
cgit v1.2.3


From f4069cd7fa6583e7094001c6fce6f426d17a4c76 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 9 Aug 2019 20:43:50 +0200
Subject: net: phy: prepare phylib to deal with PHY's extending Clause 22

The integrated PHY in 2.5Gbps chip RTL8125 is the first (known to me)
PHY that uses standard Clause 22 for all modes up to 1Gbps and adds
2.5Gbps control using vendor-specific registers. To use phylib for
the standard part little extensions are needed:
- Move most of genphy_config_aneg to a new function
  __genphy_config_aneg that takes a parameter whether restarting
  auto-negotiation is needed (depending on whether content of
  vendor-specific advertisement register changed).
- Don't clear phydev->lp_advertising in genphy_read_status so that
  we can set non-C22 mode flags before.

Basically both changes mimic the behavior of the equivalent Clause 45
functions.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 27 ++++++++++++---------------
 include/linux/phy.h          |  8 +++++++-
 2 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index a70a98dc9879..b039632de73a 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1671,18 +1671,20 @@ int genphy_restart_aneg(struct phy_device *phydev)
 EXPORT_SYMBOL(genphy_restart_aneg);
 
 /**
- * genphy_config_aneg - restart auto-negotiation or write BMCR
+ * __genphy_config_aneg - restart auto-negotiation or write BMCR
  * @phydev: target phy_device struct
+ * @changed: whether autoneg is requested
  *
  * Description: If auto-negotiation is enabled, we configure the
  *   advertising, and then restart auto-negotiation.  If it is not
  *   enabled, then we write the BMCR.
  */
-int genphy_config_aneg(struct phy_device *phydev)
+int __genphy_config_aneg(struct phy_device *phydev, bool changed)
 {
-	int err, changed;
+	int err;
 
-	changed = genphy_config_eee_advert(phydev);
+	if (genphy_config_eee_advert(phydev))
+		changed = true;
 
 	if (AUTONEG_ENABLE != phydev->autoneg)
 		return genphy_setup_forced(phydev);
@@ -1690,10 +1692,10 @@ int genphy_config_aneg(struct phy_device *phydev)
 	err = genphy_config_advert(phydev);
 	if (err < 0) /* error */
 		return err;
+	else if (err)
+		changed = true;
 
-	changed |= err;
-
-	if (changed == 0) {
+	if (!changed) {
 		/* Advertisement hasn't changed, but maybe aneg was never on to
 		 * begin with?  Or maybe phy was isolated?
 		 */
@@ -1703,18 +1705,15 @@ int genphy_config_aneg(struct phy_device *phydev)
 			return ctl;
 
 		if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
-			changed = 1; /* do restart aneg */
+			changed = true; /* do restart aneg */
 	}
 
 	/* Only restart aneg if we are advertising something different
 	 * than we were before.
 	 */
-	if (changed > 0)
-		return genphy_restart_aneg(phydev);
-
-	return 0;
+	return changed ? genphy_restart_aneg(phydev) : 0;
 }
-EXPORT_SYMBOL(genphy_config_aneg);
+EXPORT_SYMBOL(__genphy_config_aneg);
 
 /**
  * genphy_aneg_done - return auto-negotiation status
@@ -1801,8 +1800,6 @@ int genphy_read_status(struct phy_device *phydev)
 	phydev->pause = 0;
 	phydev->asym_pause = 0;
 
-	linkmode_zero(phydev->lp_advertising);
-
 	if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
 		if (phydev->is_gigabit_capable) {
 			lpagb = phy_read(phydev, MII_STAT1000);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 462b90b73f93..7117825ee57a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1069,7 +1069,7 @@ int genphy_read_abilities(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_eee_advert(struct phy_device *phydev);
-int genphy_config_aneg(struct phy_device *phydev);
+int __genphy_config_aneg(struct phy_device *phydev, bool changed);
 int genphy_aneg_done(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
@@ -1077,6 +1077,12 @@ int genphy_suspend(struct phy_device *phydev);
 int genphy_resume(struct phy_device *phydev);
 int genphy_loopback(struct phy_device *phydev, bool enable);
 int genphy_soft_reset(struct phy_device *phydev);
+
+static inline int genphy_config_aneg(struct phy_device *phydev)
+{
+	return __genphy_config_aneg(phydev, false);
+}
+
 static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
 	return 0;
-- 
cgit v1.2.3


From bf22b343ca800aac076ccf986e762b28545aa6bb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 9 Aug 2019 20:44:22 +0200
Subject: net: phy: add phy_modify_paged_changed

Add helper function phy_modify_paged_changed, behavios is the same
as for phy_modify_changed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-core.c | 29 ++++++++++++++++++++++++-----
 include/linux/phy.h        |  2 ++
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 16667fbac8bf..9ae3abb2daca 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -783,24 +783,43 @@ int phy_write_paged(struct phy_device *phydev, int page, u32 regnum, u16 val)
 EXPORT_SYMBOL(phy_write_paged);
 
 /**
- * phy_modify_paged() - Convenience function for modifying a paged register
+ * phy_modify_paged_changed() - Function for modifying a paged register
  * @phydev: a pointer to a &struct phy_device
  * @page: the page for the phy
  * @regnum: register number
  * @mask: bit mask of bits to clear
  * @set: bit mask of bits to set
  *
- * Same rules as for phy_read() and phy_write().
+ * Returns negative errno, 0 if there was no change, and 1 in case of change
  */
-int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
-		     u16 mask, u16 set)
+int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum,
+			     u16 mask, u16 set)
 {
 	int ret = 0, oldpage;
 
 	oldpage = phy_select_page(phydev, page);
 	if (oldpage >= 0)
-		ret = __phy_modify(phydev, regnum, mask, set);
+		ret = __phy_modify_changed(phydev, regnum, mask, set);
 
 	return phy_restore_page(phydev, oldpage, ret);
 }
+EXPORT_SYMBOL(phy_modify_paged_changed);
+
+/**
+ * phy_modify_paged() - Convenience function for modifying a paged register
+ * @phydev: a pointer to a &struct phy_device
+ * @page: the page for the phy
+ * @regnum: register number
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ *
+ * Same rules as for phy_read() and phy_write().
+ */
+int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
+		     u16 mask, u16 set)
+{
+	int ret = phy_modify_paged_changed(phydev, page, regnum, mask, set);
+
+	return ret < 0 ? ret : 0;
+}
 EXPORT_SYMBOL(phy_modify_paged);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7117825ee57a..781f4810ceba 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -984,6 +984,8 @@ int phy_select_page(struct phy_device *phydev, int page);
 int phy_restore_page(struct phy_device *phydev, int oldpage, int ret);
 int phy_read_paged(struct phy_device *phydev, int page, u32 regnum);
 int phy_write_paged(struct phy_device *phydev, int page, u32 regnum, u16 val);
+int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum,
+			     u16 mask, u16 set);
 int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
 		     u16 mask, u16 set);
 
-- 
cgit v1.2.3


From 22a5d3ce0d032d843e36d431e78e6a7dd0efa193 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 26 Jul 2019 15:05:38 +1000
Subject: usb: Add definitions for the USB2.0 hub TT requests

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/hcd.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index bab27ccc8ff5..367f9b39ac56 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -591,6 +591,10 @@ extern void usb_ep0_reinit(struct usb_device *);
 #define GetPortStatus		HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, USB_REQ_GET_STATUS)
 #define SetHubFeature		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_HUB, USB_REQ_SET_FEATURE)
 #define SetPortFeature		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, USB_REQ_SET_FEATURE)
+#define ClearTTBuffer		HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_CLEAR_TT_BUFFER)
+#define ResetTT			HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_RESET_TT)
+#define GetTTState		HUB_CLASS_REQ(USB_DIR_IN, USB_RT_PORT, HUB_GET_TT_STATE)
+#define StopTT			HUB_CLASS_REQ(USB_DIR_OUT, USB_RT_PORT, HUB_STOP_TT)
 
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From cbfe612d471fc3eda048a6a70c5c8f5ee34026a4 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 6 Jul 2019 18:47:22 +0200
Subject: mfd: aat2870: No need to check return value of debugfs_create
 functions

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/aat2870-core.c  | 13 ++-----------
 include/linux/mfd/aat2870.h |  1 -
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/aat2870-core.c b/drivers/mfd/aat2870-core.c
index 9f58cb2d3789..78ee4b28fca2 100644
--- a/drivers/mfd/aat2870-core.c
+++ b/drivers/mfd/aat2870-core.c
@@ -321,18 +321,9 @@ static const struct file_operations aat2870_reg_fops = {
 static void aat2870_init_debugfs(struct aat2870_data *aat2870)
 {
 	aat2870->dentry_root = debugfs_create_dir("aat2870", NULL);
-	if (!aat2870->dentry_root) {
-		dev_warn(aat2870->dev,
-			 "Failed to create debugfs root directory\n");
-		return;
-	}
 
-	aat2870->dentry_reg = debugfs_create_file("regs", 0644,
-						  aat2870->dentry_root,
-						  aat2870, &aat2870_reg_fops);
-	if (!aat2870->dentry_reg)
-		dev_warn(aat2870->dev,
-			 "Failed to create debugfs register file\n");
+	debugfs_create_file("regs", 0644, aat2870->dentry_root, aat2870,
+			    &aat2870_reg_fops);
 }
 
 #else
diff --git a/include/linux/mfd/aat2870.h b/include/linux/mfd/aat2870.h
index af7267c480ee..2445842d482d 100644
--- a/include/linux/mfd/aat2870.h
+++ b/include/linux/mfd/aat2870.h
@@ -136,7 +136,6 @@ struct aat2870_data {
 
 	/* for debugfs */
 	struct dentry *dentry_root;
-	struct dentry *dentry_reg;
 };
 
 struct aat2870_subdev_info {
-- 
cgit v1.2.3


From 624e3fceb533b33927535aaec713bd91ec80e99b Mon Sep 17 00:00:00 2001
From: Yicheng Li <yichengli@chromium.org>
Date: Mon, 8 Jul 2019 11:15:37 -0700
Subject: mfd: cros_ec: Update cros_ec_commands.h

Update cros_ec_commands.h to match the fingerprint MCU section in
the current ec_commands.h

Signed-off-by: Yicheng Li <yichengli@chromium.org>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/cros_ec_commands.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 7ccb8757b79d..98415686cbfa 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -5513,6 +5513,18 @@ struct ec_params_fp_seed {
 	uint8_t seed[FP_CONTEXT_TPM_BYTES];
 } __ec_align4;
 
+#define EC_CMD_FP_ENC_STATUS 0x0409
+
+/* FP TPM seed has been set or not */
+#define FP_ENC_STATUS_SEED_SET BIT(0)
+
+struct ec_response_fp_encryption_status {
+	/* Used bits in encryption engine status */
+	uint32_t valid_flags;
+	/* Encryption engine status */
+	uint32_t status;
+} __ec_align4;
+
 /*****************************************************************************/
 /* Touchpad MCU commands: range 0x0500-0x05FF */
 
-- 
cgit v1.2.3


From e406b832d89d63b9eb525fa8fea18eb7a1598c60 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Mon, 22 Jul 2019 20:26:28 +0200
Subject: mfd: da9063: Remove now unused platform_data

All preparational patches have been applied, we can now remove the
include file for platform_data. Yiha!

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/Kbuild                   |  1 -
 include/linux/mfd/da9063/pdata.h | 60 ----------------------------------------
 2 files changed, 61 deletions(-)
 delete mode 100644 include/linux/mfd/da9063/pdata.h

(limited to 'include')

diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..68aa094fe86e 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -313,7 +313,6 @@ header-test-			+= linux/mfd/as3722.h
 header-test-			+= linux/mfd/cros_ec_commands.h
 header-test-			+= linux/mfd/da903x.h
 header-test-			+= linux/mfd/da9055/pdata.h
-header-test-			+= linux/mfd/da9063/pdata.h
 header-test-			+= linux/mfd/db8500-prcmu.h
 header-test-			+= linux/mfd/dbx500-prcmu.h
 header-test-			+= linux/mfd/dln2.h
diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
deleted file mode 100644
index 085edbf7601b..000000000000
--- a/include/linux/mfd/da9063/pdata.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Platform configuration options for DA9063
- *
- * Copyright 2012 Dialog Semiconductor Ltd.
- *
- * Author: Michal Hajduk, Dialog Semiconductor
- * Author: Krystian Garbaciak, Dialog Semiconductor
- */
-
-#ifndef __MFD_DA9063_PDATA_H__
-#define __MFD_DA9063_PDATA_H__
-
-/*
- * RGB LED configuration
- */
-/* LED IDs for flags in struct led_info. */
-enum {
-	DA9063_GPIO11_LED,
-	DA9063_GPIO14_LED,
-	DA9063_GPIO15_LED,
-
-	DA9063_LED_NUM
-};
-#define DA9063_LED_ID_MASK		0x3
-
-/* LED polarity for flags in struct led_info. */
-#define DA9063_LED_HIGH_LEVEL_ACTIVE	0x0
-#define DA9063_LED_LOW_LEVEL_ACTIVE	0x4
-
-
-/*
- * General PMIC configuration
- */
-/* HWMON ADC channels configuration */
-#define DA9063_FLG_FORCE_IN0_MANUAL_MODE	0x0010
-#define DA9063_FLG_FORCE_IN0_AUTO_MODE		0x0020
-#define DA9063_FLG_FORCE_IN1_MANUAL_MODE	0x0040
-#define DA9063_FLG_FORCE_IN1_AUTO_MODE		0x0080
-#define DA9063_FLG_FORCE_IN2_MANUAL_MODE	0x0100
-#define DA9063_FLG_FORCE_IN2_AUTO_MODE		0x0200
-#define DA9063_FLG_FORCE_IN3_MANUAL_MODE	0x0400
-#define DA9063_FLG_FORCE_IN3_AUTO_MODE		0x0800
-
-/* Disable register caching. */
-#define DA9063_FLG_NO_CACHE			0x0008
-
-struct da9063;
-
-/* DA9063 platform data */
-struct da9063_pdata {
-	int				(*init)(struct da9063 *da9063);
-	int				irq_base;
-	bool				key_power;
-	unsigned			flags;
-	struct da9063_regulators_pdata	*regulators_pdata;
-	struct led_platform_data	*leds_pdata;
-};
-
-#endif	/* __MFD_DA9063_PDATA_H__ */
-- 
cgit v1.2.3


From f173f26a4d543fa57e6ed9505bbbbf9bec61f544 Mon Sep 17 00:00:00 2001
From: Vignesh Raghavendra <vigneshr@ti.com>
Date: Tue, 6 Aug 2019 10:40:39 +0530
Subject: mtd: spi-nor: always use bounce buffer for register read/writes

spi-mem layer expects all buffers passed to it to be DMA'able. But
spi-nor layer mostly allocates buffers on stack for reading/writing to
registers and therefore are not DMA'able. Introduce bounce buffer to be
used to read/write to registers. This ensures that buffer passed to
spi-mem layer during register read/writes is DMA'able. With this change
nor->cmd-buf is no longer used, so drop it.

Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 80 ++++++++++++++++++++++++-------------------
 include/linux/mtd/spi-nor.h   |  7 ++--
 2 files changed, 49 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 03cc788511d5..7f0831be90a0 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -296,15 +296,14 @@ struct flash_info {
 static int read_sr(struct spi_nor *nor)
 {
 	int ret;
-	u8 val;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR, nor->bouncebuf, 1);
 	if (ret < 0) {
 		pr_err("error %d reading SR\n", (int) ret);
 		return ret;
 	}
 
-	return val;
+	return nor->bouncebuf[0];
 }
 
 /*
@@ -315,15 +314,14 @@ static int read_sr(struct spi_nor *nor)
 static int read_fsr(struct spi_nor *nor)
 {
 	int ret;
-	u8 val;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, nor->bouncebuf, 1);
 	if (ret < 0) {
 		pr_err("error %d reading FSR\n", ret);
 		return ret;
 	}
 
-	return val;
+	return nor->bouncebuf[0];
 }
 
 /*
@@ -334,15 +332,14 @@ static int read_fsr(struct spi_nor *nor)
 static int read_cr(struct spi_nor *nor)
 {
 	int ret;
-	u8 val;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDCR, nor->bouncebuf, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading CR\n", ret);
 		return ret;
 	}
 
-	return val;
+	return nor->bouncebuf[0];
 }
 
 /*
@@ -351,8 +348,8 @@ static int read_cr(struct spi_nor *nor)
  */
 static int write_sr(struct spi_nor *nor, u8 val)
 {
-	nor->cmd_buf[0] = val;
-	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1);
+	nor->bouncebuf[0] = val;
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->bouncebuf, 1);
 }
 
 /*
@@ -500,31 +497,31 @@ static int set_4byte(struct spi_nor *nor, bool enable)
 			 * We must clear the register to enable normal behavior.
 			 */
 			write_enable(nor);
-			nor->cmd_buf[0] = 0;
-			nor->write_reg(nor, SPINOR_OP_WREAR, nor->cmd_buf, 1);
+			nor->bouncebuf[0] = 0;
+			nor->write_reg(nor, SPINOR_OP_WREAR,
+				       nor->bouncebuf, 1);
 			write_disable(nor);
 		}
 
 		return status;
 	default:
 		/* Spansion style */
-		nor->cmd_buf[0] = enable << 7;
-		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1);
+		nor->bouncebuf[0] = enable << 7;
+		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->bouncebuf, 1);
 	}
 }
 
 static int s3an_sr_ready(struct spi_nor *nor)
 {
 	int ret;
-	u8 val;
 
-	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, nor->bouncebuf, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
 		return ret;
 	}
 
-	return !!(val & XSR_RDY);
+	return !!(nor->bouncebuf[0] & XSR_RDY);
 }
 
 static int spi_nor_sr_ready(struct spi_nor *nor)
@@ -683,7 +680,6 @@ static loff_t spi_nor_s3an_addr_convert(struct spi_nor *nor, unsigned int addr)
  */
 static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 {
-	u8 buf[SPI_NOR_MAX_ADDR_WIDTH];
 	int i;
 
 	if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
@@ -697,11 +693,12 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 	 * control
 	 */
 	for (i = nor->addr_width - 1; i >= 0; i--) {
-		buf[i] = addr & 0xff;
+		nor->bouncebuf[i] = addr & 0xff;
 		addr >>= 8;
 	}
 
-	return nor->write_reg(nor, nor->erase_opcode, buf, nor->addr_width);
+	return nor->write_reg(nor, nor->erase_opcode, nor->bouncebuf,
+			      nor->addr_width);
 }
 
 /**
@@ -1485,9 +1482,11 @@ static int macronix_quad_enable(struct spi_nor *nor)
  */
 static int spansion_quad_enable(struct spi_nor *nor)
 {
-	u8 sr_cr[2] = {0, CR_QUAD_EN_SPAN};
+	u8 *sr_cr = nor->bouncebuf;
 	int ret;
 
+	sr_cr[0] = 0;
+	sr_cr[1] = CR_QUAD_EN_SPAN;
 	ret = write_sr_cr(nor, sr_cr);
 	if (ret)
 		return ret;
@@ -1517,7 +1516,7 @@ static int spansion_quad_enable(struct spi_nor *nor)
  */
 static int spansion_no_read_cr_quad_enable(struct spi_nor *nor)
 {
-	u8 sr_cr[2];
+	u8 *sr_cr = nor->bouncebuf;
 	int ret;
 
 	/* Keep the current value of the Status Register. */
@@ -1548,7 +1547,7 @@ static int spansion_no_read_cr_quad_enable(struct spi_nor *nor)
 static int spansion_read_cr_quad_enable(struct spi_nor *nor)
 {
 	struct device *dev = nor->dev;
-	u8 sr_cr[2];
+	u8 *sr_cr = nor->bouncebuf;
 	int ret;
 
 	/* Check current Quad Enable bit value. */
@@ -1599,22 +1598,22 @@ static int spansion_read_cr_quad_enable(struct spi_nor *nor)
  */
 static int sr2_bit7_quad_enable(struct spi_nor *nor)
 {
-	u8 sr2;
+	u8 *sr2 = nor->bouncebuf;
 	int ret;
 
 	/* Check current Quad Enable bit value. */
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, &sr2, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
 	if (ret)
 		return ret;
-	if (sr2 & SR2_QUAD_EN_BIT7)
+	if (*sr2 & SR2_QUAD_EN_BIT7)
 		return 0;
 
 	/* Update the Quad Enable bit. */
-	sr2 |= SR2_QUAD_EN_BIT7;
+	*sr2 |= SR2_QUAD_EN_BIT7;
 
 	write_enable(nor);
 
-	ret = nor->write_reg(nor, SPINOR_OP_WRSR2, &sr2, 1);
+	ret = nor->write_reg(nor, SPINOR_OP_WRSR2, sr2, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error while writing status register 2\n");
 		return -EINVAL;
@@ -1627,8 +1626,8 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
 	}
 
 	/* Read back and check it. */
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, &sr2, 1);
-	if (!(ret > 0 && (sr2 & SR2_QUAD_EN_BIT7))) {
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+	if (!(ret > 0 && (*sr2 & SR2_QUAD_EN_BIT7))) {
 		dev_err(nor->dev, "SR2 Quad bit not set\n");
 		return -EINVAL;
 	}
@@ -1687,7 +1686,7 @@ static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
 {
 	int ret;
 	u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
-	u8 sr_cr[2] = {0};
+	u8 *sr_cr =  nor->bouncebuf;
 
 	/* Check current Quad Enable bit value. */
 	ret = read_cr(nor);
@@ -2177,7 +2176,7 @@ static const struct flash_info spi_nor_ids[] = {
 static const struct flash_info *spi_nor_read_id(struct spi_nor *nor)
 {
 	int			tmp;
-	u8			id[SPI_NOR_MAX_ID_LEN];
+	u8			*id = nor->bouncebuf;
 	const struct flash_info	*info;
 
 	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
@@ -2393,9 +2392,8 @@ static int spi_nor_check(struct spi_nor *nor)
 static int s3an_nor_scan(struct spi_nor *nor)
 {
 	int ret;
-	u8 val;
 
-	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, nor->bouncebuf, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
 		return ret;
@@ -2417,7 +2415,7 @@ static int s3an_nor_scan(struct spi_nor *nor)
 	 * The current addressing mode can be read from the XRDSR register
 	 * and should not be changed, because is a destructive operation.
 	 */
-	if (val & XSR_PAGESIZE) {
+	if (nor->bouncebuf[0] & XSR_PAGESIZE) {
 		/* Flash in Power of 2 mode */
 		nor->page_size = (nor->page_size == 264) ? 256 : 512;
 		nor->mtd.writebufsize = nor->page_size;
@@ -4121,6 +4119,16 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 	nor->read_proto = SNOR_PROTO_1_1_1;
 	nor->write_proto = SNOR_PROTO_1_1_1;
 
+	/*
+	 * We need the bounce buffer early to read/write registers when going
+	 * through the spi-mem layer (buffers have to be DMA-able).
+	 */
+	nor->bouncebuf_size = PAGE_SIZE;
+	nor->bouncebuf = devm_kmalloc(dev, nor->bouncebuf_size,
+				      GFP_KERNEL);
+	if (!nor->bouncebuf)
+		return -ENOMEM;
+
 	if (name)
 		info = spi_nor_match_id(name);
 	/* Try to auto-detect if chip name wasn't specified or not found */
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 9f57cdfcc93d..6b5956a7a65a 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -344,6 +344,9 @@ struct flash_info;
  * @mtd:		point to a mtd_info structure
  * @lock:		the lock for the read/write/erase/lock/unlock operations
  * @dev:		point to a spi device, or a spi nor controller device.
+ * @bouncebuf:		bounce buffer used when the buffer passed by the MTD
+ *                      layer is not DMA-able
+ * @bouncebuf_size:	size of the bounce buffer
  * @info:		spi-nor part JDEC MFR id and other info
  * @page_size:		the page size of the SPI NOR
  * @addr_width:		number of address bytes
@@ -356,7 +359,6 @@ struct flash_info;
  * @read_proto:		the SPI protocol for read operations
  * @write_proto:	the SPI protocol for write operations
  * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
- * @cmd_buf:		used by the write_reg
  * @erase_map:		the erase map of the SPI NOR
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
@@ -382,6 +384,8 @@ struct spi_nor {
 	struct mtd_info		mtd;
 	struct mutex		lock;
 	struct device		*dev;
+	u8			*bouncebuf;
+	size_t			bouncebuf_size;
 	const struct flash_info	*info;
 	u32			page_size;
 	u8			addr_width;
@@ -394,7 +398,6 @@ struct spi_nor {
 	enum spi_nor_protocol	reg_proto;
 	bool			sst_write_second;
 	u32			flags;
-	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
 	struct spi_nor_erase_map	erase_map;
 
 	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
-- 
cgit v1.2.3


From b35b9a10362d203451d920d01ab8d6cd55cbaf2a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 6 Aug 2019 10:40:40 +0530
Subject: mtd: spi-nor: Move m25p80 code in spi-nor.c

The m25p80 driver is actually a generic wrapper around the spi-mem
layer. Not only the driver name is misleading, but we'd expect such a
common logic to be directly available in the core. Another reason for
moving this code is that SPI NOR controller drivers should
progressively be replaced by SPI controller drivers implementing the
spi_mem_ops interface, and when the conversion is done, we should have
a single spi-nor driver directly interfacing with the spi-mem layer.

While moving the code we also fix a longstanding issue when
non-DMA-able buffers are passed by the MTD layer.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 drivers/mtd/devices/Kconfig   |  18 --
 drivers/mtd/devices/Makefile  |   1 -
 drivers/mtd/devices/m25p80.c  | 347 -----------------------
 drivers/mtd/spi-nor/Kconfig   |   2 +
 drivers/mtd/spi-nor/spi-nor.c | 628 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/mtd/spi-nor.h   |   3 +
 6 files changed, 605 insertions(+), 394 deletions(-)
 delete mode 100644 drivers/mtd/devices/m25p80.c

(limited to 'include')

diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig
index 49abbc52457d..f96287c4b789 100644
--- a/drivers/mtd/devices/Kconfig
+++ b/drivers/mtd/devices/Kconfig
@@ -79,24 +79,6 @@ config MTD_DATAFLASH_OTP
 	  other key product data.  The second half is programmed with a
 	  unique-to-each-chip bit pattern at the factory.
 
-config MTD_M25P80
-	tristate "Support most SPI Flash chips (AT26DF, M25P, W25X, ...)"
-	depends on SPI_MASTER && MTD_SPI_NOR
-	select SPI_MEM
-	help
-	  This enables access to most modern SPI flash chips, used for
-	  program and data storage.   Series supported include Atmel AT26DF,
-	  Spansion S25SL, SST 25VF, ST M25P, and Winbond W25X.  Other chips
-	  are supported as well.  See the driver source for the current list,
-	  or to add other chips.
-
-	  Note that the original DataFlash chips (AT45 series, not AT26DF),
-	  need an entirely different driver.
-
-	  Set up your spi devices with the right board-specific platform data,
-	  if you want to specify device partitioning or to use a device which
-	  doesn't support the JEDEC ID instruction.
-
 config MTD_MCHP23K256
 	tristate "Microchip 23K256 SRAM"
 	depends on SPI_MASTER
diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile
index 94895eab3066..991c8d12c016 100644
--- a/drivers/mtd/devices/Makefile
+++ b/drivers/mtd/devices/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_MTD_MTDRAM)	+= mtdram.o
 obj-$(CONFIG_MTD_LART)		+= lart.o
 obj-$(CONFIG_MTD_BLOCK2MTD)	+= block2mtd.o
 obj-$(CONFIG_MTD_DATAFLASH)	+= mtd_dataflash.o
-obj-$(CONFIG_MTD_M25P80)	+= m25p80.o
 obj-$(CONFIG_MTD_MCHP23K256)	+= mchp23k256.o
 obj-$(CONFIG_MTD_SPEAR_SMI)	+= spear_smi.o
 obj-$(CONFIG_MTD_SST25L)	+= sst25l.o
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
deleted file mode 100644
index c50888670250..000000000000
--- a/drivers/mtd/devices/m25p80.c
+++ /dev/null
@@ -1,347 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * MTD SPI driver for ST M25Pxx (and similar) serial flash chips
- *
- * Author: Mike Lavender, mike@steroidmicros.com
- *
- * Copyright (c) 2005, Intec Automation Inc.
- *
- * Some parts are based on lart.c by Abraham Van Der Merwe
- *
- * Cleaned up and generalized based on mtd_dataflash.c
- */
-
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/device.h>
-
-#include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
-
-#include <linux/spi/spi.h>
-#include <linux/spi/spi-mem.h>
-#include <linux/spi/flash.h>
-#include <linux/mtd/spi-nor.h>
-
-struct m25p {
-	struct spi_mem		*spimem;
-	struct spi_nor		spi_nor;
-};
-
-static int m25p80_read_reg(struct spi_nor *nor, u8 code, u8 *val, int len)
-{
-	struct m25p *flash = nor->priv;
-	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(code, 1),
-					  SPI_MEM_OP_NO_ADDR,
-					  SPI_MEM_OP_NO_DUMMY,
-					  SPI_MEM_OP_DATA_IN(len, NULL, 1));
-	void *scratchbuf;
-	int ret;
-
-	scratchbuf = kmalloc(len, GFP_KERNEL);
-	if (!scratchbuf)
-		return -ENOMEM;
-
-	op.data.buf.in = scratchbuf;
-	ret = spi_mem_exec_op(flash->spimem, &op);
-	if (ret < 0)
-		dev_err(&flash->spimem->spi->dev, "error %d reading %x\n", ret,
-			code);
-	else
-		memcpy(val, scratchbuf, len);
-
-	kfree(scratchbuf);
-
-	return ret;
-}
-
-static int m25p80_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
-{
-	struct m25p *flash = nor->priv;
-	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(opcode, 1),
-					  SPI_MEM_OP_NO_ADDR,
-					  SPI_MEM_OP_NO_DUMMY,
-					  SPI_MEM_OP_DATA_OUT(len, NULL, 1));
-	void *scratchbuf;
-	int ret;
-
-	scratchbuf = kmemdup(buf, len, GFP_KERNEL);
-	if (!scratchbuf)
-		return -ENOMEM;
-
-	op.data.buf.out = scratchbuf;
-	ret = spi_mem_exec_op(flash->spimem, &op);
-	kfree(scratchbuf);
-
-	return ret;
-}
-
-static ssize_t m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
-			    const u_char *buf)
-{
-	struct m25p *flash = nor->priv;
-	struct spi_mem_op op =
-			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
-				   SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
-				   SPI_MEM_OP_NO_DUMMY,
-				   SPI_MEM_OP_DATA_OUT(len, buf, 1));
-	int ret;
-
-	/* get transfer protocols. */
-	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
-	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
-	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
-
-	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
-		op.addr.nbytes = 0;
-
-	ret = spi_mem_adjust_op_size(flash->spimem, &op);
-	if (ret)
-		return ret;
-	op.data.nbytes = len < op.data.nbytes ? len : op.data.nbytes;
-
-	ret = spi_mem_exec_op(flash->spimem, &op);
-	if (ret)
-		return ret;
-
-	return op.data.nbytes;
-}
-
-/*
- * Read an address range from the nor chip.  The address range
- * may be any size provided it is within the physical boundaries.
- */
-static ssize_t m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
-			   u_char *buf)
-{
-	struct m25p *flash = nor->priv;
-	struct spi_mem_op op =
-			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
-				   SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
-				   SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
-				   SPI_MEM_OP_DATA_IN(len, buf, 1));
-	size_t remaining = len;
-	int ret;
-
-	/* get transfer protocols. */
-	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
-	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
-	op.dummy.buswidth = op.addr.buswidth;
-	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
-
-	/* convert the dummy cycles to the number of bytes */
-	op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
-
-	while (remaining) {
-		op.data.nbytes = remaining < UINT_MAX ? remaining : UINT_MAX;
-		ret = spi_mem_adjust_op_size(flash->spimem, &op);
-		if (ret)
-			return ret;
-
-		ret = spi_mem_exec_op(flash->spimem, &op);
-		if (ret)
-			return ret;
-
-		op.addr.val += op.data.nbytes;
-		remaining -= op.data.nbytes;
-		op.data.buf.in += op.data.nbytes;
-	}
-
-	return len;
-}
-
-/*
- * board specific setup should have ensured the SPI clock used here
- * matches what the READ command supports, at least until this driver
- * understands FAST_READ (for clocks over 25 MHz).
- */
-static int m25p_probe(struct spi_mem *spimem)
-{
-	struct spi_device *spi = spimem->spi;
-	struct flash_platform_data	*data;
-	struct m25p *flash;
-	struct spi_nor *nor;
-	struct spi_nor_hwcaps hwcaps = {
-		.mask = SNOR_HWCAPS_READ |
-			SNOR_HWCAPS_READ_FAST |
-			SNOR_HWCAPS_PP,
-	};
-	char *flash_name;
-	int ret;
-
-	data = dev_get_platdata(&spimem->spi->dev);
-
-	flash = devm_kzalloc(&spimem->spi->dev, sizeof(*flash), GFP_KERNEL);
-	if (!flash)
-		return -ENOMEM;
-
-	nor = &flash->spi_nor;
-
-	/* install the hooks */
-	nor->read = m25p80_read;
-	nor->write = m25p80_write;
-	nor->write_reg = m25p80_write_reg;
-	nor->read_reg = m25p80_read_reg;
-
-	nor->dev = &spimem->spi->dev;
-	spi_nor_set_flash_node(nor, spi->dev.of_node);
-	nor->priv = flash;
-
-	spi_mem_set_drvdata(spimem, flash);
-	flash->spimem = spimem;
-
-	if (spi->mode & SPI_RX_OCTAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
-
-		if (spi->mode & SPI_TX_OCTAL)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_8_8 |
-					SNOR_HWCAPS_PP_1_1_8 |
-					SNOR_HWCAPS_PP_1_8_8);
-	} else if (spi->mode & SPI_RX_QUAD) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
-
-		if (spi->mode & SPI_TX_QUAD)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
-					SNOR_HWCAPS_PP_1_1_4 |
-					SNOR_HWCAPS_PP_1_4_4);
-	} else if (spi->mode & SPI_RX_DUAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
-
-		if (spi->mode & SPI_TX_DUAL)
-			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
-	}
-
-	if (data && data->name)
-		nor->mtd.name = data->name;
-
-	if (!nor->mtd.name)
-		nor->mtd.name = spi_mem_get_name(spimem);
-
-	/* For some (historical?) reason many platforms provide two different
-	 * names in flash_platform_data: "name" and "type". Quite often name is
-	 * set to "m25p80" and then "type" provides a real chip name.
-	 * If that's the case, respect "type" and ignore a "name".
-	 */
-	if (data && data->type)
-		flash_name = data->type;
-	else if (!strcmp(spi->modalias, "spi-nor"))
-		flash_name = NULL; /* auto-detect */
-	else
-		flash_name = spi->modalias;
-
-	ret = spi_nor_scan(nor, flash_name, &hwcaps);
-	if (ret)
-		return ret;
-
-	return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
-				   data ? data->nr_parts : 0);
-}
-
-
-static int m25p_remove(struct spi_mem *spimem)
-{
-	struct m25p	*flash = spi_mem_get_drvdata(spimem);
-
-	spi_nor_restore(&flash->spi_nor);
-
-	/* Clean up MTD stuff. */
-	return mtd_device_unregister(&flash->spi_nor.mtd);
-}
-
-static void m25p_shutdown(struct spi_mem *spimem)
-{
-	struct m25p *flash = spi_mem_get_drvdata(spimem);
-
-	spi_nor_restore(&flash->spi_nor);
-}
-/*
- * Do NOT add to this array without reading the following:
- *
- * Historically, many flash devices are bound to this driver by their name. But
- * since most of these flash are compatible to some extent, and their
- * differences can often be differentiated by the JEDEC read-ID command, we
- * encourage new users to add support to the spi-nor library, and simply bind
- * against a generic string here (e.g., "jedec,spi-nor").
- *
- * Many flash names are kept here in this list (as well as in spi-nor.c) to
- * keep them available as module aliases for existing platforms.
- */
-static const struct spi_device_id m25p_ids[] = {
-	/*
-	 * Allow non-DT platform devices to bind to the "spi-nor" modalias, and
-	 * hack around the fact that the SPI core does not provide uevent
-	 * matching for .of_match_table
-	 */
-	{"spi-nor"},
-
-	/*
-	 * Entries not used in DTs that should be safe to drop after replacing
-	 * them with "spi-nor" in platform data.
-	 */
-	{"s25sl064a"},	{"w25x16"},	{"m25p10"},	{"m25px64"},
-
-	/*
-	 * Entries that were used in DTs without "jedec,spi-nor" fallback and
-	 * should be kept for backward compatibility.
-	 */
-	{"at25df321a"},	{"at25df641"},	{"at26df081a"},
-	{"mx25l4005a"},	{"mx25l1606e"},	{"mx25l6405d"},	{"mx25l12805d"},
-	{"mx25l25635e"},{"mx66l51235l"},
-	{"n25q064"},	{"n25q128a11"},	{"n25q128a13"},	{"n25q512a"},
-	{"s25fl256s1"},	{"s25fl512s"},	{"s25sl12801"},	{"s25fl008k"},
-	{"s25fl064k"},
-	{"sst25vf040b"},{"sst25vf016b"},{"sst25vf032b"},{"sst25wf040"},
-	{"m25p40"},	{"m25p80"},	{"m25p16"},	{"m25p32"},
-	{"m25p64"},	{"m25p128"},
-	{"w25x80"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
-	{"w25q80bl"},	{"w25q128"},	{"w25q256"},
-
-	/* Flashes that can't be detected using JEDEC */
-	{"m25p05-nonjedec"},	{"m25p10-nonjedec"},	{"m25p20-nonjedec"},
-	{"m25p40-nonjedec"},	{"m25p80-nonjedec"},	{"m25p16-nonjedec"},
-	{"m25p32-nonjedec"},	{"m25p64-nonjedec"},	{"m25p128-nonjedec"},
-
-	/* Everspin MRAMs (non-JEDEC) */
-	{ "mr25h128" }, /* 128 Kib, 40 MHz */
-	{ "mr25h256" }, /* 256 Kib, 40 MHz */
-	{ "mr25h10" },  /*   1 Mib, 40 MHz */
-	{ "mr25h40" },  /*   4 Mib, 40 MHz */
-
-	{ },
-};
-MODULE_DEVICE_TABLE(spi, m25p_ids);
-
-static const struct of_device_id m25p_of_table[] = {
-	/*
-	 * Generic compatibility for SPI NOR that can be identified by the
-	 * JEDEC READ ID opcode (0x9F). Use this, if possible.
-	 */
-	{ .compatible = "jedec,spi-nor" },
-	{}
-};
-MODULE_DEVICE_TABLE(of, m25p_of_table);
-
-static struct spi_mem_driver m25p80_driver = {
-	.spidrv = {
-		.driver = {
-			.name	= "m25p80",
-			.of_match_table = m25p_of_table,
-		},
-		.id_table	= m25p_ids,
-	},
-	.probe	= m25p_probe,
-	.remove	= m25p_remove,
-	.shutdown	= m25p_shutdown,
-
-	/* REVISIT: many of these chips have deep power-down modes, which
-	 * should clearly be entered on suspend() to minimize power use.
-	 * And also when they're otherwise idle...
-	 */
-};
-
-module_spi_mem_driver(m25p80_driver);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mike Lavender");
-MODULE_DESCRIPTION("MTD SPI driver for ST M25Pxx flash chips");
diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
index 6de83277ce8b..f237fcdf7f86 100644
--- a/drivers/mtd/spi-nor/Kconfig
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -2,6 +2,8 @@
 menuconfig MTD_SPI_NOR
 	tristate "SPI-NOR device support"
 	depends on MTD
+	depends on MTD && SPI_MASTER
+	select SPI_MEM
 	help
 	  This is the framework for the SPI NOR which can be used by the SPI
 	  device drivers and the SPI-NOR device driver.
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 7f0831be90a0..de906ab907c7 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -19,6 +19,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/of_platform.h>
+#include <linux/sched/task_stack.h>
 #include <linux/spi/flash.h>
 #include <linux/mtd/spi-nor.h>
 
@@ -288,6 +289,154 @@ struct flash_info {
 
 #define JEDEC_MFR(info)	((info)->id[0])
 
+/**
+ * spi_nor_spimem_xfer_data() - helper function to read/write data to
+ *                              flash's memory region
+ * @nor:        pointer to 'struct spi_nor'
+ * @op:         pointer to 'struct spi_mem_op' template for transfer
+ *
+ * Return: number of bytes transferred on success, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_xfer_data(struct spi_nor *nor,
+					struct spi_mem_op *op)
+{
+	bool usebouncebuf = false;
+	void *rdbuf = NULL;
+	const void *buf;
+	int ret;
+
+	if (op->data.dir == SPI_MEM_DATA_IN)
+		buf = op->data.buf.in;
+	else
+		buf = op->data.buf.out;
+
+	if (object_is_on_stack(buf) || !virt_addr_valid(buf))
+		usebouncebuf = true;
+
+	if (usebouncebuf) {
+		if (op->data.nbytes > nor->bouncebuf_size)
+			op->data.nbytes = nor->bouncebuf_size;
+
+		if (op->data.dir == SPI_MEM_DATA_IN) {
+			rdbuf = op->data.buf.in;
+			op->data.buf.in = nor->bouncebuf;
+		} else {
+			op->data.buf.out = nor->bouncebuf;
+			memcpy(nor->bouncebuf, buf,
+			       op->data.nbytes);
+		}
+	}
+
+	ret = spi_mem_adjust_op_size(nor->spimem, op);
+	if (ret)
+		return ret;
+
+	ret = spi_mem_exec_op(nor->spimem, op);
+	if (ret)
+		return ret;
+
+	if (usebouncebuf && op->data.dir == SPI_MEM_DATA_IN)
+		memcpy(rdbuf, nor->bouncebuf, op->data.nbytes);
+
+	return op->data.nbytes;
+}
+
+/**
+ * spi_nor_spimem_read_data() - read data from flash's memory region via
+ *                              spi-mem
+ * @nor:        pointer to 'struct spi_nor'
+ * @from:       offset to read from
+ * @len:        number of bytes to read
+ * @buf:        pointer to dst buffer
+ *
+ * Return: number of bytes read successfully, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_read_data(struct spi_nor *nor, loff_t from,
+					size_t len, u8 *buf)
+{
+	struct spi_mem_op op =
+		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->read_opcode, 1),
+			   SPI_MEM_OP_ADDR(nor->addr_width, from, 1),
+			   SPI_MEM_OP_DUMMY(nor->read_dummy, 1),
+			   SPI_MEM_OP_DATA_IN(len, buf, 1));
+
+	/* get transfer protocols. */
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->read_proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->read_proto);
+	op.dummy.buswidth = op.addr.buswidth;
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->read_proto);
+
+	/* convert the dummy cycles to the number of bytes */
+	op.dummy.nbytes = (nor->read_dummy * op.dummy.buswidth) / 8;
+
+	return spi_nor_spimem_xfer_data(nor, &op);
+}
+
+/**
+ * spi_nor_read_data() - read data from flash memory
+ * @nor:        pointer to 'struct spi_nor'
+ * @from:       offset to read from
+ * @len:        number of bytes to read
+ * @buf:        pointer to dst buffer
+ *
+ * Return: number of bytes read successfully, -errno otherwise
+ */
+static ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len,
+				 u8 *buf)
+{
+	if (nor->spimem)
+		return spi_nor_spimem_read_data(nor, from, len, buf);
+
+	return nor->read(nor, from, len, buf);
+}
+
+/**
+ * spi_nor_spimem_write_data() - write data to flash memory via
+ *                               spi-mem
+ * @nor:        pointer to 'struct spi_nor'
+ * @to:         offset to write to
+ * @len:        number of bytes to write
+ * @buf:        pointer to src buffer
+ *
+ * Return: number of bytes written successfully, -errno otherwise
+ */
+static ssize_t spi_nor_spimem_write_data(struct spi_nor *nor, loff_t to,
+					 size_t len, const u8 *buf)
+{
+	struct spi_mem_op op =
+		SPI_MEM_OP(SPI_MEM_OP_CMD(nor->program_opcode, 1),
+			   SPI_MEM_OP_ADDR(nor->addr_width, to, 1),
+			   SPI_MEM_OP_NO_DUMMY,
+			   SPI_MEM_OP_DATA_OUT(len, buf, 1));
+
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(nor->write_proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(nor->write_proto);
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(nor->write_proto);
+
+	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
+		op.addr.nbytes = 0;
+
+	return spi_nor_spimem_xfer_data(nor, &op);
+}
+
+/**
+ * spi_nor_write_data() - write data to flash memory
+ * @nor:        pointer to 'struct spi_nor'
+ * @to:         offset to write to
+ * @len:        number of bytes to write
+ * @buf:        pointer to src buffer
+ *
+ * Return: number of bytes written successfully, -errno otherwise
+ */
+static ssize_t spi_nor_write_data(struct spi_nor *nor, loff_t to, size_t len,
+				  const u8 *buf)
+{
+	if (nor->spimem)
+		return spi_nor_spimem_write_data(nor, to, len, buf);
+
+	return nor->write(nor, to, len, buf);
+}
+
 /*
  * Read the status register, returning its value in the location
  * Return the status register value.
@@ -297,7 +446,18 @@ static int read_sr(struct spi_nor *nor)
 {
 	int ret;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR, nor->bouncebuf, 1);
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+		ret = spi_mem_exec_op(nor->spimem, &op);
+	} else {
+		ret = nor->read_reg(nor, SPINOR_OP_RDSR, nor->bouncebuf, 1);
+	}
+
 	if (ret < 0) {
 		pr_err("error %d reading SR\n", (int) ret);
 		return ret;
@@ -315,7 +475,18 @@ static int read_fsr(struct spi_nor *nor)
 {
 	int ret;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDFSR, nor->bouncebuf, 1);
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+		ret = spi_mem_exec_op(nor->spimem, &op);
+	} else {
+		ret = nor->read_reg(nor, SPINOR_OP_RDFSR, nor->bouncebuf, 1);
+	}
+
 	if (ret < 0) {
 		pr_err("error %d reading FSR\n", ret);
 		return ret;
@@ -333,7 +504,18 @@ static int read_cr(struct spi_nor *nor)
 {
 	int ret;
 
-	ret = nor->read_reg(nor, SPINOR_OP_RDCR, nor->bouncebuf, 1);
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+		ret = spi_mem_exec_op(nor->spimem, &op);
+	} else {
+		ret = nor->read_reg(nor, SPINOR_OP_RDCR, nor->bouncebuf, 1);
+	}
+
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading CR\n", ret);
 		return ret;
@@ -349,6 +531,16 @@ static int read_cr(struct spi_nor *nor)
 static int write_sr(struct spi_nor *nor, u8 val)
 {
 	nor->bouncebuf[0] = val;
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
 	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->bouncebuf, 1);
 }
 
@@ -358,6 +550,16 @@ static int write_sr(struct spi_nor *nor, u8 val)
  */
 static int write_enable(struct spi_nor *nor)
 {
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
 	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
 }
 
@@ -366,6 +568,16 @@ static int write_enable(struct spi_nor *nor)
  */
 static int write_disable(struct spi_nor *nor)
 {
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
 	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0);
 }
 
@@ -465,12 +677,64 @@ static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
 	}
 }
 
+static int macronix_set_4byte(struct spi_nor *nor, bool enable)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(enable ?
+						  SPINOR_OP_EN4B :
+						  SPINOR_OP_EX4B,
+						  1),
+				  SPI_MEM_OP_NO_ADDR,
+				  SPI_MEM_OP_NO_DUMMY,
+				  SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B,
+			      NULL, 0);
+}
+
+static int spansion_set_4byte(struct spi_nor *nor, bool enable)
+{
+	nor->bouncebuf[0] = enable << 7;
+
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_BRWR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, SPINOR_OP_BRWR, nor->bouncebuf, 1);
+}
+
+static int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
+{
+	nor->bouncebuf[0] = ear;
+
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREAR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, SPINOR_OP_WREAR, nor->bouncebuf, 1);
+}
+
 /* Enable/disable 4-byte addressing mode. */
 static int set_4byte(struct spi_nor *nor, bool enable)
 {
 	int status;
 	bool need_wren = false;
-	u8 cmd;
 
 	switch (JEDEC_MFR(nor->info)) {
 	case SNOR_MFR_ST:
@@ -483,8 +747,7 @@ static int set_4byte(struct spi_nor *nor, bool enable)
 		if (need_wren)
 			write_enable(nor);
 
-		cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
-		status = nor->write_reg(nor, cmd, NULL, 0);
+		status = macronix_set_4byte(nor, enable);
 		if (need_wren)
 			write_disable(nor);
 
@@ -497,25 +760,37 @@ static int set_4byte(struct spi_nor *nor, bool enable)
 			 * We must clear the register to enable normal behavior.
 			 */
 			write_enable(nor);
-			nor->bouncebuf[0] = 0;
-			nor->write_reg(nor, SPINOR_OP_WREAR,
-				       nor->bouncebuf, 1);
+			spi_nor_write_ear(nor, 0);
 			write_disable(nor);
 		}
 
 		return status;
 	default:
 		/* Spansion style */
-		nor->bouncebuf[0] = enable << 7;
-		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->bouncebuf, 1);
+		return spansion_set_4byte(nor, enable);
 	}
 }
 
+static int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_XRDSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, sr, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->read_reg(nor, SPINOR_OP_XRDSR, sr, 1);
+}
+
 static int s3an_sr_ready(struct spi_nor *nor)
 {
 	int ret;
 
-	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, nor->bouncebuf, 1);
+	ret = spi_nor_xread_sr(nor, nor->bouncebuf);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
 		return ret;
@@ -524,6 +799,21 @@ static int s3an_sr_ready(struct spi_nor *nor)
 	return !!(nor->bouncebuf[0] & XSR_RDY);
 }
 
+static int spi_nor_clear_sr(struct spi_nor *nor)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, SPINOR_OP_CLSR, NULL, 0);
+}
+
 static int spi_nor_sr_ready(struct spi_nor *nor)
 {
 	int sr = read_sr(nor);
@@ -536,13 +826,28 @@ static int spi_nor_sr_ready(struct spi_nor *nor)
 		else
 			dev_err(nor->dev, "Programming Error occurred\n");
 
-		nor->write_reg(nor, SPINOR_OP_CLSR, NULL, 0);
+		spi_nor_clear_sr(nor);
 		return -EIO;
 	}
 
 	return !(sr & SR_WIP);
 }
 
+static int spi_nor_clear_fsr(struct spi_nor *nor)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLFSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0);
+}
+
 static int spi_nor_fsr_ready(struct spi_nor *nor)
 {
 	int fsr = read_fsr(nor);
@@ -559,7 +864,7 @@ static int spi_nor_fsr_ready(struct spi_nor *nor)
 			dev_err(nor->dev,
 			"Attempted to modify a protected sector.\n");
 
-		nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0);
+		spi_nor_clear_fsr(nor);
 		return -EIO;
 	}
 
@@ -627,6 +932,16 @@ static int erase_chip(struct spi_nor *nor)
 {
 	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
 
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CHIP_ERASE, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
 	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0);
 }
 
@@ -688,6 +1003,16 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 	if (nor->erase)
 		return nor->erase(nor, addr);
 
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 1),
+				   SPI_MEM_OP_ADDR(nor->addr_width, addr, 1),
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_NO_DATA);
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
 	/*
 	 * Default implementation, if driver doesn't have a specialized HW
 	 * control
@@ -1403,7 +1728,18 @@ static int write_sr_cr(struct spi_nor *nor, u8 *sr_cr)
 
 	write_enable(nor);
 
-	ret = nor->write_reg(nor, SPINOR_OP_WRSR, sr_cr, 2);
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_OUT(2, sr_cr, 1));
+
+		ret = spi_mem_exec_op(nor->spimem, &op);
+	} else {
+		ret = nor->write_reg(nor, SPINOR_OP_WRSR, sr_cr, 2);
+	}
+
 	if (ret < 0) {
 		dev_err(nor->dev,
 			"error while writing configuration register\n");
@@ -1584,6 +1920,36 @@ static int spansion_read_cr_quad_enable(struct spi_nor *nor)
 	return 0;
 }
 
+static int spi_nor_write_sr2(struct spi_nor *nor, u8 *sr2)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_OUT(1, sr2, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->write_reg(nor, SPINOR_OP_WRSR2, sr2, 1);
+}
+
+static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
+{
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(1, sr2, 1));
+
+		return spi_mem_exec_op(nor->spimem, &op);
+	}
+
+	return nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+}
+
 /**
  * sr2_bit7_quad_enable() - set QE bit in Status Register 2.
  * @nor:	pointer to a 'struct spi_nor'
@@ -1602,7 +1968,7 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
 	int ret;
 
 	/* Check current Quad Enable bit value. */
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+	ret = spi_nor_read_sr2(nor, sr2);
 	if (ret)
 		return ret;
 	if (*sr2 & SR2_QUAD_EN_BIT7)
@@ -1613,7 +1979,7 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
 
 	write_enable(nor);
 
-	ret = nor->write_reg(nor, SPINOR_OP_WRSR2, sr2, 1);
+	ret = spi_nor_write_sr2(nor, sr2);
 	if (ret < 0) {
 		dev_err(nor->dev, "error while writing status register 2\n");
 		return -EINVAL;
@@ -1626,7 +1992,7 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
 	}
 
 	/* Read back and check it. */
-	ret = nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+	ret = spi_nor_read_sr2(nor, sr2);
 	if (!(ret > 0 && (*sr2 & SR2_QUAD_EN_BIT7))) {
 		dev_err(nor->dev, "SR2 Quad bit not set\n");
 		return -EINVAL;
@@ -2179,7 +2545,18 @@ static const struct flash_info *spi_nor_read_id(struct spi_nor *nor)
 	u8			*id = nor->bouncebuf;
 	const struct flash_info	*info;
 
-	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
+	if (nor->spimem) {
+		struct spi_mem_op op =
+			SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDID, 1),
+				   SPI_MEM_OP_NO_ADDR,
+				   SPI_MEM_OP_NO_DUMMY,
+				   SPI_MEM_OP_DATA_IN(SPI_NOR_MAX_ID_LEN, id, 1));
+
+		tmp = spi_mem_exec_op(nor->spimem, &op);
+	} else {
+		tmp = nor->read_reg(nor, SPINOR_OP_RDID, id,
+				    SPI_NOR_MAX_ID_LEN);
+	}
 	if (tmp < 0) {
 		dev_err(nor->dev, "error %d reading JEDEC ID\n", tmp);
 		return ERR_PTR(tmp);
@@ -2215,7 +2592,7 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
 		if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
 			addr = spi_nor_s3an_addr_convert(nor, addr);
 
-		ret = nor->read(nor, addr, len, buf);
+		ret = spi_nor_read_data(nor, addr, len, buf);
 		if (ret == 0) {
 			/* We shouldn't see 0-length reads */
 			ret = -EIO;
@@ -2260,7 +2637,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		nor->program_opcode = SPINOR_OP_BP;
 
 		/* write one byte. */
-		ret = nor->write(nor, to, 1, buf);
+		ret = spi_nor_write_data(nor, to, 1, buf);
 		if (ret < 0)
 			goto sst_write_err;
 		WARN(ret != 1, "While writing 1 byte written %i bytes\n",
@@ -2276,7 +2653,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		nor->program_opcode = SPINOR_OP_AAI_WP;
 
 		/* write two bytes. */
-		ret = nor->write(nor, to, 2, buf + actual);
+		ret = spi_nor_write_data(nor, to, 2, buf + actual);
 		if (ret < 0)
 			goto sst_write_err;
 		WARN(ret != 2, "While writing 2 bytes written %i bytes\n",
@@ -2299,7 +2676,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		write_enable(nor);
 
 		nor->program_opcode = SPINOR_OP_BP;
-		ret = nor->write(nor, to, 1, buf + actual);
+		ret = spi_nor_write_data(nor, to, 1, buf + actual);
 		if (ret < 0)
 			goto sst_write_err;
 		WARN(ret != 1, "While writing 1 byte written %i bytes\n",
@@ -2361,7 +2738,7 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 			addr = spi_nor_s3an_addr_convert(nor, addr);
 
 		write_enable(nor);
-		ret = nor->write(nor, addr, page_remain, buf + i);
+		ret = spi_nor_write_data(nor, addr, page_remain, buf + i);
 		if (ret < 0)
 			goto write_err;
 		written = ret;
@@ -2380,8 +2757,10 @@ write_err:
 
 static int spi_nor_check(struct spi_nor *nor)
 {
-	if (!nor->dev || !nor->read || !nor->write ||
-		!nor->read_reg || !nor->write_reg) {
+	if (!nor->dev ||
+	    (!nor->spimem &&
+	    (!nor->read || !nor->write || !nor->read_reg ||
+	      !nor->write_reg))) {
 		pr_err("spi-nor: please fill all the necessary fields!\n");
 		return -EINVAL;
 	}
@@ -2393,7 +2772,7 @@ static int s3an_nor_scan(struct spi_nor *nor)
 {
 	int ret;
 
-	ret = nor->read_reg(nor, SPINOR_OP_XRDSR, nor->bouncebuf, 1);
+	ret = spi_nor_xread_sr(nor, nor->bouncebuf);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
 		return ret;
@@ -2523,7 +2902,7 @@ static int spi_nor_read_raw(struct spi_nor *nor, u32 addr, size_t len, u8 *buf)
 	int ret;
 
 	while (len) {
-		ret = nor->read(nor, addr, len, buf);
+		ret = spi_nor_read_data(nor, addr, len, buf);
 		if (!ret || ret > len)
 			return -EIO;
 		if (ret < 0)
@@ -4122,6 +4501,10 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 	/*
 	 * We need the bounce buffer early to read/write registers when going
 	 * through the spi-mem layer (buffers have to be DMA-able).
+	 * For spi-mem drivers, we'll reallocate a new buffer if
+	 * nor->page_size turns out to be greater than PAGE_SIZE (which
+	 * shouldn't happen before long since NOR pages are usually less
+	 * than 1KB) after spi_nor_scan() returns.
 	 */
 	nor->bouncebuf_size = PAGE_SIZE;
 	nor->bouncebuf = devm_kmalloc(dev, nor->bouncebuf_size,
@@ -4324,6 +4707,195 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 }
 EXPORT_SYMBOL_GPL(spi_nor_scan);
 
+static int spi_nor_probe(struct spi_mem *spimem)
+{
+	struct spi_device *spi = spimem->spi;
+	struct flash_platform_data *data = dev_get_platdata(&spi->dev);
+	struct spi_nor *nor;
+	struct spi_nor_hwcaps hwcaps = {
+		.mask = SNOR_HWCAPS_READ |
+			SNOR_HWCAPS_READ_FAST |
+			SNOR_HWCAPS_PP,
+	};
+	char *flash_name;
+	int ret;
+
+	nor = devm_kzalloc(&spi->dev, sizeof(*nor), GFP_KERNEL);
+	if (!nor)
+		return -ENOMEM;
+
+	nor->spimem = spimem;
+	nor->dev = &spi->dev;
+	spi_nor_set_flash_node(nor, spi->dev.of_node);
+
+	spi_mem_set_drvdata(spimem, nor);
+
+	if (spi->mode & SPI_RX_OCTAL) {
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
+
+		if (spi->mode & SPI_TX_OCTAL)
+			hwcaps.mask |= (SNOR_HWCAPS_READ_1_8_8 |
+					SNOR_HWCAPS_PP_1_1_8 |
+					SNOR_HWCAPS_PP_1_8_8);
+	} else if (spi->mode & SPI_RX_QUAD) {
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
+
+		if (spi->mode & SPI_TX_QUAD)
+			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
+					SNOR_HWCAPS_PP_1_1_4 |
+					SNOR_HWCAPS_PP_1_4_4);
+	} else if (spi->mode & SPI_RX_DUAL) {
+		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
+
+		if (spi->mode & SPI_TX_DUAL)
+			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
+	}
+
+	if (data && data->name)
+		nor->mtd.name = data->name;
+
+	if (!nor->mtd.name)
+		nor->mtd.name = spi_mem_get_name(spimem);
+
+	/*
+	 * For some (historical?) reason many platforms provide two different
+	 * names in flash_platform_data: "name" and "type". Quite often name is
+	 * set to "m25p80" and then "type" provides a real chip name.
+	 * If that's the case, respect "type" and ignore a "name".
+	 */
+	if (data && data->type)
+		flash_name = data->type;
+	else if (!strcmp(spi->modalias, "spi-nor"))
+		flash_name = NULL; /* auto-detect */
+	else
+		flash_name = spi->modalias;
+
+	ret = spi_nor_scan(nor, flash_name, &hwcaps);
+	if (ret)
+		return ret;
+
+	/*
+	 * None of the existing parts have > 512B pages, but let's play safe
+	 * and add this logic so that if anyone ever adds support for such
+	 * a NOR we don't end up with buffer overflows.
+	 */
+	if (nor->page_size > PAGE_SIZE) {
+		nor->bouncebuf_size = nor->page_size;
+		devm_kfree(nor->dev, nor->bouncebuf);
+		nor->bouncebuf = devm_kmalloc(nor->dev,
+					      nor->bouncebuf_size,
+					      GFP_KERNEL);
+		if (!nor->bouncebuf)
+			return -ENOMEM;
+	}
+
+	return mtd_device_register(&nor->mtd, data ? data->parts : NULL,
+				   data ? data->nr_parts : 0);
+}
+
+static int spi_nor_remove(struct spi_mem *spimem)
+{
+	struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+	spi_nor_restore(nor);
+
+	/* Clean up MTD stuff. */
+	return mtd_device_unregister(&nor->mtd);
+}
+
+static void spi_nor_shutdown(struct spi_mem *spimem)
+{
+	struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+	spi_nor_restore(nor);
+}
+
+/*
+ * Do NOT add to this array without reading the following:
+ *
+ * Historically, many flash devices are bound to this driver by their name. But
+ * since most of these flash are compatible to some extent, and their
+ * differences can often be differentiated by the JEDEC read-ID command, we
+ * encourage new users to add support to the spi-nor library, and simply bind
+ * against a generic string here (e.g., "jedec,spi-nor").
+ *
+ * Many flash names are kept here in this list (as well as in spi-nor.c) to
+ * keep them available as module aliases for existing platforms.
+ */
+static const struct spi_device_id spi_nor_dev_ids[] = {
+	/*
+	 * Allow non-DT platform devices to bind to the "spi-nor" modalias, and
+	 * hack around the fact that the SPI core does not provide uevent
+	 * matching for .of_match_table
+	 */
+	{"spi-nor"},
+
+	/*
+	 * Entries not used in DTs that should be safe to drop after replacing
+	 * them with "spi-nor" in platform data.
+	 */
+	{"s25sl064a"},	{"w25x16"},	{"m25p10"},	{"m25px64"},
+
+	/*
+	 * Entries that were used in DTs without "jedec,spi-nor" fallback and
+	 * should be kept for backward compatibility.
+	 */
+	{"at25df321a"},	{"at25df641"},	{"at26df081a"},
+	{"mx25l4005a"},	{"mx25l1606e"},	{"mx25l6405d"},	{"mx25l12805d"},
+	{"mx25l25635e"},{"mx66l51235l"},
+	{"n25q064"},	{"n25q128a11"},	{"n25q128a13"},	{"n25q512a"},
+	{"s25fl256s1"},	{"s25fl512s"},	{"s25sl12801"},	{"s25fl008k"},
+	{"s25fl064k"},
+	{"sst25vf040b"},{"sst25vf016b"},{"sst25vf032b"},{"sst25wf040"},
+	{"m25p40"},	{"m25p80"},	{"m25p16"},	{"m25p32"},
+	{"m25p64"},	{"m25p128"},
+	{"w25x80"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
+	{"w25q80bl"},	{"w25q128"},	{"w25q256"},
+
+	/* Flashes that can't be detected using JEDEC */
+	{"m25p05-nonjedec"},	{"m25p10-nonjedec"},	{"m25p20-nonjedec"},
+	{"m25p40-nonjedec"},	{"m25p80-nonjedec"},	{"m25p16-nonjedec"},
+	{"m25p32-nonjedec"},	{"m25p64-nonjedec"},	{"m25p128-nonjedec"},
+
+	/* Everspin MRAMs (non-JEDEC) */
+	{ "mr25h128" }, /* 128 Kib, 40 MHz */
+	{ "mr25h256" }, /* 256 Kib, 40 MHz */
+	{ "mr25h10" },  /*   1 Mib, 40 MHz */
+	{ "mr25h40" },  /*   4 Mib, 40 MHz */
+
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, spi_nor_dev_ids);
+
+static const struct of_device_id spi_nor_of_table[] = {
+	/*
+	 * Generic compatibility for SPI NOR that can be identified by the
+	 * JEDEC READ ID opcode (0x9F). Use this, if possible.
+	 */
+	{ .compatible = "jedec,spi-nor" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, spi_nor_of_table);
+
+/*
+ * REVISIT: many of these chips have deep power-down modes, which
+ * should clearly be entered on suspend() to minimize power use.
+ * And also when they're otherwise idle...
+ */
+static struct spi_mem_driver spi_nor_driver = {
+	.spidrv = {
+		.driver = {
+			.name = "spi-nor",
+			.of_match_table = spi_nor_of_table,
+		},
+		.id_table = spi_nor_dev_ids,
+	},
+	.probe = spi_nor_probe,
+	.remove = spi_nor_remove,
+	.shutdown = spi_nor_shutdown,
+};
+module_spi_mem_driver(spi_nor_driver);
+
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
 MODULE_AUTHOR("Mike Lavender");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 6b5956a7a65a..4f35b1877889 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -9,6 +9,7 @@
 #include <linux/bitops.h>
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/mtd.h>
+#include <linux/spi/spi-mem.h>
 
 /*
  * Manufacturer IDs
@@ -344,6 +345,7 @@ struct flash_info;
  * @mtd:		point to a mtd_info structure
  * @lock:		the lock for the read/write/erase/lock/unlock operations
  * @dev:		point to a spi device, or a spi nor controller device.
+ * @spimem:		point to the spi mem device
  * @bouncebuf:		bounce buffer used when the buffer passed by the MTD
  *                      layer is not DMA-able
  * @bouncebuf_size:	size of the bounce buffer
@@ -384,6 +386,7 @@ struct spi_nor {
 	struct mtd_info		mtd;
 	struct mutex		lock;
 	struct device		*dev;
+	struct spi_mem		*spimem;
 	u8			*bouncebuf;
 	size_t			bouncebuf_size;
 	const struct flash_info	*info;
-- 
cgit v1.2.3


From c76f5089796a948e8daaeda348040a3f98b18748 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 6 Aug 2019 10:40:41 +0530
Subject: mtd: spi-nor: Rework hwcaps selection for the spi-mem case

The spi-mem layer provides a spi_mem_supports_op() function to check
whether a specific operation is supported by the controller or not.
This is much more accurate than the hwcaps selection logic based on
SPI_{RX,TX}_ flags.

Rework the hwcaps selection logic to use spi_mem_supports_op() when
nor->spimem != NULL.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 183 +++++++++++++++++++++++++++++++++---------
 include/linux/mtd/spi-nor.h   |  14 ++++
 2 files changed, 161 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index de906ab907c7..63af87609bac 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2951,6 +2951,129 @@ static int spi_nor_read_sfdp(struct spi_nor *nor, u32 addr,
 	return ret;
 }
 
+/**
+ * spi_nor_spimem_check_op - check if the operation is supported
+ *                           by controller
+ *@nor:        pointer to a 'struct spi_nor'
+ *@op:         pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_op(struct spi_nor *nor,
+				   struct spi_mem_op *op)
+{
+	/*
+	 * First test with 4 address bytes. The opcode itself might
+	 * be a 3B addressing opcode but we don't care, because
+	 * SPI controller implementation should not check the opcode,
+	 * but just the sequence.
+	 */
+	op->addr.nbytes = 4;
+	if (!spi_mem_supports_op(nor->spimem, op)) {
+		if (nor->mtd.size > SZ_16M)
+			return -ENOTSUPP;
+
+		/* If flash size <= 16MB, 3 address bytes are sufficient */
+		op->addr.nbytes = 3;
+		if (!spi_mem_supports_op(nor->spimem, op))
+			return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * spi_nor_spimem_check_readop - check if the read op is supported
+ *                               by controller
+ *@nor:         pointer to a 'struct spi_nor'
+ *@read:        pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_readop(struct spi_nor *nor,
+				       const struct spi_nor_read_command *read)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(read->opcode, 1),
+					  SPI_MEM_OP_ADDR(3, 0, 1),
+					  SPI_MEM_OP_DUMMY(0, 1),
+					  SPI_MEM_OP_DATA_IN(0, NULL, 1));
+
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(read->proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(read->proto);
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(read->proto);
+	op.dummy.buswidth = op.addr.buswidth;
+	op.dummy.nbytes = (read->num_mode_clocks + read->num_wait_states) *
+			  op.dummy.buswidth / 8;
+
+	return spi_nor_spimem_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_spimem_check_pp - check if the page program op is supported
+ *                           by controller
+ *@nor:         pointer to a 'struct spi_nor'
+ *@pp:          pointer to op template to be checked
+ *
+ * Returns 0 if operation is supported, -ENOTSUPP otherwise.
+ */
+static int spi_nor_spimem_check_pp(struct spi_nor *nor,
+				   const struct spi_nor_pp_command *pp)
+{
+	struct spi_mem_op op = SPI_MEM_OP(SPI_MEM_OP_CMD(pp->opcode, 1),
+					  SPI_MEM_OP_ADDR(3, 0, 1),
+					  SPI_MEM_OP_NO_DUMMY,
+					  SPI_MEM_OP_DATA_OUT(0, NULL, 1));
+
+	op.cmd.buswidth = spi_nor_get_protocol_inst_nbits(pp->proto);
+	op.addr.buswidth = spi_nor_get_protocol_addr_nbits(pp->proto);
+	op.data.buswidth = spi_nor_get_protocol_data_nbits(pp->proto);
+
+	return spi_nor_spimem_check_op(nor, &op);
+}
+
+/**
+ * spi_nor_spimem_adjust_hwcaps - Find optimal Read/Write protocol
+ *                                based on SPI controller capabilities
+ * @nor:        pointer to a 'struct spi_nor'
+ * @params:     pointer to the 'struct spi_nor_flash_parameter'
+ *              representing SPI NOR flash capabilities
+ * @hwcaps:     pointer to resulting capabilities after adjusting
+ *              according to controller and flash's capability
+ */
+static void
+spi_nor_spimem_adjust_hwcaps(struct spi_nor *nor,
+			     const struct spi_nor_flash_parameter *params,
+			     u32 *hwcaps)
+{
+	unsigned int cap;
+
+	/* DTR modes are not supported yet, mask them all. */
+	*hwcaps &= ~SNOR_HWCAPS_DTR;
+
+	/* X-X-X modes are not supported yet, mask them all. */
+	*hwcaps &= ~SNOR_HWCAPS_X_X_X;
+
+	for (cap = 0; cap < sizeof(*hwcaps) * BITS_PER_BYTE; cap++) {
+		int rdidx, ppidx;
+
+		if (!(*hwcaps & BIT(cap)))
+			continue;
+
+		rdidx = spi_nor_hwcaps_read2cmd(BIT(cap));
+		if (rdidx >= 0 &&
+		    spi_nor_spimem_check_readop(nor, &params->reads[rdidx]))
+			*hwcaps &= ~BIT(cap);
+
+		ppidx = spi_nor_hwcaps_pp2cmd(BIT(cap));
+		if (ppidx < 0)
+			continue;
+
+		if (spi_nor_spimem_check_pp(nor,
+					    &params->page_programs[ppidx]))
+			*hwcaps &= ~BIT(cap);
+	}
+}
+
 /**
  * spi_nor_read_sfdp_dma_unsafe() - read Serial Flash Discoverable Parameters.
  * @nor:	pointer to a 'struct spi_nor'
@@ -4360,16 +4483,25 @@ static int spi_nor_setup(struct spi_nor *nor,
 	 */
 	shared_mask = hwcaps->mask & params->hwcaps.mask;
 
-	/* SPI n-n-n protocols are not supported yet. */
-	ignored_mask = (SNOR_HWCAPS_READ_2_2_2 |
-			SNOR_HWCAPS_READ_4_4_4 |
-			SNOR_HWCAPS_READ_8_8_8 |
-			SNOR_HWCAPS_PP_4_4_4 |
-			SNOR_HWCAPS_PP_8_8_8);
-	if (shared_mask & ignored_mask) {
-		dev_dbg(nor->dev,
-			"SPI n-n-n protocols are not supported yet.\n");
-		shared_mask &= ~ignored_mask;
+	if (nor->spimem) {
+		/*
+		 * When called from spi_nor_probe(), all caps are set and we
+		 * need to discard some of them based on what the SPI
+		 * controller actually supports (using spi_mem_supports_op()).
+		 */
+		spi_nor_spimem_adjust_hwcaps(nor, params, &shared_mask);
+	} else {
+		/*
+		 * SPI n-n-n protocols are not supported when the SPI
+		 * controller directly implements the spi_nor interface.
+		 * Yet another reason to switch to spi-mem.
+		 */
+		ignored_mask = SNOR_HWCAPS_X_X_X;
+		if (shared_mask & ignored_mask) {
+			dev_dbg(nor->dev,
+				"SPI n-n-n protocols are not supported.\n");
+			shared_mask &= ~ignored_mask;
+		}
 	}
 
 	/* Select the (Fast) Read command. */
@@ -4712,11 +4844,11 @@ static int spi_nor_probe(struct spi_mem *spimem)
 	struct spi_device *spi = spimem->spi;
 	struct flash_platform_data *data = dev_get_platdata(&spi->dev);
 	struct spi_nor *nor;
-	struct spi_nor_hwcaps hwcaps = {
-		.mask = SNOR_HWCAPS_READ |
-			SNOR_HWCAPS_READ_FAST |
-			SNOR_HWCAPS_PP,
-	};
+	/*
+	 * Enable all caps by default. The core will mask them after
+	 * checking what's really supported using spi_mem_supports_op().
+	 */
+	const struct spi_nor_hwcaps hwcaps = { .mask = SNOR_HWCAPS_ALL };
 	char *flash_name;
 	int ret;
 
@@ -4730,27 +4862,6 @@ static int spi_nor_probe(struct spi_mem *spimem)
 
 	spi_mem_set_drvdata(spimem, nor);
 
-	if (spi->mode & SPI_RX_OCTAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_8;
-
-		if (spi->mode & SPI_TX_OCTAL)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_8_8 |
-					SNOR_HWCAPS_PP_1_1_8 |
-					SNOR_HWCAPS_PP_1_8_8);
-	} else if (spi->mode & SPI_RX_QUAD) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_4;
-
-		if (spi->mode & SPI_TX_QUAD)
-			hwcaps.mask |= (SNOR_HWCAPS_READ_1_4_4 |
-					SNOR_HWCAPS_PP_1_1_4 |
-					SNOR_HWCAPS_PP_1_4_4);
-	} else if (spi->mode & SPI_RX_DUAL) {
-		hwcaps.mask |= SNOR_HWCAPS_READ_1_1_2;
-
-		if (spi->mode & SPI_TX_DUAL)
-			hwcaps.mask |= SNOR_HWCAPS_READ_1_2_2;
-	}
-
 	if (data && data->name)
 		nor->mtd.name = data->name;
 
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 4f35b1877889..5f1acb1867dd 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -524,6 +524,20 @@ struct spi_nor_hwcaps {
 #define SNOR_HWCAPS_PP_1_8_8	BIT(21)
 #define SNOR_HWCAPS_PP_8_8_8	BIT(22)
 
+#define SNOR_HWCAPS_X_X_X	(SNOR_HWCAPS_READ_2_2_2 |	\
+				 SNOR_HWCAPS_READ_4_4_4 |	\
+				 SNOR_HWCAPS_READ_8_8_8 |	\
+				 SNOR_HWCAPS_PP_4_4_4 |		\
+				 SNOR_HWCAPS_PP_8_8_8)
+
+#define SNOR_HWCAPS_DTR		(SNOR_HWCAPS_READ_1_1_1_DTR |	\
+				 SNOR_HWCAPS_READ_1_2_2_DTR |	\
+				 SNOR_HWCAPS_READ_1_4_4_DTR |	\
+				 SNOR_HWCAPS_READ_1_8_8_DTR)
+
+#define SNOR_HWCAPS_ALL		(SNOR_HWCAPS_READ_MASK |	\
+				 SNOR_HWCAPS_PP_MASK)
+
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
-- 
cgit v1.2.3


From 0ed4c252bf80b35fe768ec6506b2e58986f99687 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Sun, 28 Jul 2019 11:12:24 +0800
Subject: clk: sunxi-ng: v3s: add Allwinner V3 support

Allwinner V3 has the same main die with V3s, but with more pins wired.
There's a I2S bus on V3 that is not available on V3s.

Add the V3-only peripheral's clocks and reset to the V3s CCU driver,
bound to a new V3 compatible string. The driver name is not changed
because it's part of the device tree binding (the header file name).

Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.c      | 228 +++++++++++++++++++++++++++++-
 drivers/clk/sunxi-ng/ccu-sun8i-v3s.h      |   2 +-
 include/dt-bindings/clock/sun8i-v3s-ccu.h |   4 +
 include/dt-bindings/reset/sun8i-v3s-ccu.h |   3 +
 4 files changed, 234 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
index f79170e145df..5c779eec454b 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c
@@ -235,6 +235,8 @@ static SUNXI_CCU_GATE(bus_codec_clk,	"bus-codec",	"apb1",
 		      0x068, BIT(0), 0);
 static SUNXI_CCU_GATE(bus_pio_clk,	"bus-pio",	"apb1",
 		      0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,	"bus-i2s0",	"apb1",
+		      0x068, BIT(12), 0);
 
 static SUNXI_CCU_GATE(bus_i2c0_clk,	"bus-i2c0",	"apb2",
 		      0x06c, BIT(0), 0);
@@ -306,6 +308,11 @@ static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
 				  BIT(31),	/* gate */
 				  0);
 
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+					    "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents,
+			       0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
 static SUNXI_CCU_GATE(usb_phy0_clk,	"usb-phy0",	"osc24M",
 		      0x0cc, BIT(8), 0);
 static SUNXI_CCU_GATE(usb_ohci0_clk,	"usb-ohci0",	"osc24M",
@@ -443,6 +450,80 @@ static const struct clk_hw *clk_parent_pll_audio[] = {
 	&pll_audio_base_clk.common.hw
 };
 
+static struct ccu_common *sun8i_v3_ccu_clks[] = {
+	&pll_cpu_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr0_clk.common,
+	&pll_periph0_clk.common,
+	&pll_isp_clk.common,
+	&pll_periph1_clk.common,
+	&pll_ddr1_clk.common,
+	&cpu_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&ahb2_clk.common,
+	&bus_ce_clk.common,
+	&bus_dma_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_dram_clk.common,
+	&bus_emac_clk.common,
+	&bus_hstimer_clk.common,
+	&bus_spi0_clk.common,
+	&bus_otg_clk.common,
+	&bus_ehci0_clk.common,
+	&bus_ohci0_clk.common,
+	&bus_ve_clk.common,
+	&bus_tcon0_clk.common,
+	&bus_csi_clk.common,
+	&bus_de_clk.common,
+	&bus_codec_clk.common,
+	&bus_pio_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_ephy_clk.common,
+	&bus_dbg_clk.common,
+	&mmc0_clk.common,
+	&mmc0_sample_clk.common,
+	&mmc0_output_clk.common,
+	&mmc1_clk.common,
+	&mmc1_sample_clk.common,
+	&mmc1_output_clk.common,
+	&mmc2_clk.common,
+	&mmc2_sample_clk.common,
+	&mmc2_output_clk.common,
+	&ce_clk.common,
+	&spi0_clk.common,
+	&i2s0_clk.common,
+	&usb_phy0_clk.common,
+	&usb_ohci0_clk.common,
+	&dram_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_clk.common,
+	&dram_ohci_clk.common,
+	&dram_ehci_clk.common,
+	&de_clk.common,
+	&tcon_clk.common,
+	&csi_misc_clk.common,
+	&csi0_mclk_clk.common,
+	&csi1_sclk_clk.common,
+	&csi1_mclk_clk.common,
+	&ve_clk.common,
+	&ac_dig_clk.common,
+	&avs_clk.common,
+	&mbus_clk.common,
+	&mipi_csi_clk.common,
+};
+
 /* We hardcode the divider to 4 for now */
 static CLK_FIXED_FACTOR_HWS(pll_audio_clk, "pll-audio",
 			    clk_parent_pll_audio,
@@ -540,6 +621,88 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = {
 	.num	= CLK_NUMBER,
 };
 
+static struct clk_hw_onecell_data sun8i_v3_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPU]		= &pll_cpu_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO]		= &pll_video_clk.common.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR0]		= &pll_ddr0_clk.common.hw,
+		[CLK_PLL_PERIPH0]	= &pll_periph0_clk.common.hw,
+		[CLK_PLL_PERIPH0_2X]	= &pll_periph0_2x_clk.hw,
+		[CLK_PLL_ISP]		= &pll_isp_clk.common.hw,
+		[CLK_PLL_PERIPH1]	= &pll_periph1_clk.common.hw,
+		[CLK_PLL_DDR1]		= &pll_ddr1_clk.common.hw,
+		[CLK_CPU]		= &cpu_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_AHB2]		= &ahb2_clk.common.hw,
+		[CLK_BUS_CE]		= &bus_ce_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_BUS_EMAC]		= &bus_emac_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_BUS_EHCI0]		= &bus_ehci0_clk.common.hw,
+		[CLK_BUS_OHCI0]		= &bus_ohci0_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_BUS_TCON0]		= &bus_tcon0_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_BUS_DE]		= &bus_de_clk.common.hw,
+		[CLK_BUS_CODEC]		= &bus_codec_clk.common.hw,
+		[CLK_BUS_PIO]		= &bus_pio_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_EPHY]		= &bus_ephy_clk.common.hw,
+		[CLK_BUS_DBG]		= &bus_dbg_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC0_SAMPLE]	= &mmc0_sample_clk.common.hw,
+		[CLK_MMC0_OUTPUT]	= &mmc0_output_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC1_SAMPLE]	= &mmc1_sample_clk.common.hw,
+		[CLK_MMC1_OUTPUT]	= &mmc1_output_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_MMC2_SAMPLE]	= &mmc2_sample_clk.common.hw,
+		[CLK_MMC2_OUTPUT]	= &mmc2_output_clk.common.hw,
+		[CLK_CE]		= &ce_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_OHCI0]		= &usb_ohci0_clk.common.hw,
+		[CLK_DRAM]		= &dram_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI]		= &dram_csi_clk.common.hw,
+		[CLK_DRAM_EHCI]		= &dram_ehci_clk.common.hw,
+		[CLK_DRAM_OHCI]		= &dram_ohci_clk.common.hw,
+		[CLK_DE]		= &de_clk.common.hw,
+		[CLK_TCON0]		= &tcon_clk.common.hw,
+		[CLK_CSI_MISC]		= &csi_misc_clk.common.hw,
+		[CLK_CSI0_MCLK]		= &csi0_mclk_clk.common.hw,
+		[CLK_CSI1_SCLK]		= &csi1_sclk_clk.common.hw,
+		[CLK_CSI1_MCLK]		= &csi1_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_AC_DIG]		= &ac_dig_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_MIPI_CSI]		= &mipi_csi_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
 static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
 	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
 
@@ -575,6 +738,42 @@ static struct ccu_reset_map sun8i_v3s_ccu_resets[] = {
 	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
 };
 
+static struct ccu_reset_map sun8i_v3_ccu_resets[] = {
+	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
+
+	[RST_MBUS]		=  { 0x0fc, BIT(31) },
+
+	[RST_BUS_CE]		=  { 0x2c0, BIT(5) },
+	[RST_BUS_DMA]		=  { 0x2c0, BIT(6) },
+	[RST_BUS_MMC0]		=  { 0x2c0, BIT(8) },
+	[RST_BUS_MMC1]		=  { 0x2c0, BIT(9) },
+	[RST_BUS_MMC2]		=  { 0x2c0, BIT(10) },
+	[RST_BUS_DRAM]		=  { 0x2c0, BIT(14) },
+	[RST_BUS_EMAC]		=  { 0x2c0, BIT(17) },
+	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
+	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(24) },
+	[RST_BUS_EHCI0]		=  { 0x2c0, BIT(26) },
+	[RST_BUS_OHCI0]		=  { 0x2c0, BIT(29) },
+
+	[RST_BUS_VE]		=  { 0x2c4, BIT(0) },
+	[RST_BUS_TCON0]		=  { 0x2c4, BIT(4) },
+	[RST_BUS_CSI]		=  { 0x2c4, BIT(8) },
+	[RST_BUS_DE]		=  { 0x2c4, BIT(12) },
+	[RST_BUS_DBG]		=  { 0x2c4, BIT(31) },
+
+	[RST_BUS_EPHY]		=  { 0x2c8, BIT(2) },
+
+	[RST_BUS_CODEC]		=  { 0x2d0, BIT(0) },
+	[RST_BUS_I2S0]		=  { 0x2d0, BIT(12) },
+
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+};
+
 static const struct sunxi_ccu_desc sun8i_v3s_ccu_desc = {
 	.ccu_clks	= sun8i_v3s_ccu_clks,
 	.num_ccu_clks	= ARRAY_SIZE(sun8i_v3s_ccu_clks),
@@ -585,7 +784,18 @@ static const struct sunxi_ccu_desc sun8i_v3s_ccu_desc = {
 	.num_resets	= ARRAY_SIZE(sun8i_v3s_ccu_resets),
 };
 
-static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+static const struct sunxi_ccu_desc sun8i_v3_ccu_desc = {
+	.ccu_clks	= sun8i_v3_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun8i_v3_ccu_clks),
+
+	.hw_clks	= &sun8i_v3_hw_clks,
+
+	.resets		= sun8i_v3_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun8i_v3_ccu_resets),
+};
+
+static void __init sun8i_v3_v3s_ccu_init(struct device_node *node,
+					 const struct sunxi_ccu_desc *ccu_desc)
 {
 	void __iomem *reg;
 	u32 val;
@@ -601,7 +811,21 @@ static void __init sun8i_v3s_ccu_setup(struct device_node *node)
 	val &= ~GENMASK(19, 16);
 	writel(val | (3 << 16), reg + SUN8I_V3S_PLL_AUDIO_REG);
 
-	sunxi_ccu_probe(node, reg, &sun8i_v3s_ccu_desc);
+	sunxi_ccu_probe(node, reg, ccu_desc);
+}
+
+static void __init sun8i_v3s_ccu_setup(struct device_node *node)
+{
+	sun8i_v3_v3s_ccu_init(node, &sun8i_v3s_ccu_desc);
+}
+
+static void __init sun8i_v3_ccu_setup(struct device_node *node)
+{
+	sun8i_v3_v3s_ccu_init(node, &sun8i_v3_ccu_desc);
 }
+
 CLK_OF_DECLARE(sun8i_v3s_ccu, "allwinner,sun8i-v3s-ccu",
 	       sun8i_v3s_ccu_setup);
+
+CLK_OF_DECLARE(sun8i_v3_ccu, "allwinner,sun8i-v3-ccu",
+	       sun8i_v3_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
index 10af324bd6b1..b0160d305a67 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h
@@ -51,6 +51,6 @@
 
 #define CLK_PLL_DDR1		74
 
-#define CLK_NUMBER		(CLK_PLL_DDR1 + 1)
+#define CLK_NUMBER		(CLK_I2S0 + 1)
 
 #endif /* _CCU_SUN8I_H3_H_ */
diff --git a/include/dt-bindings/clock/sun8i-v3s-ccu.h b/include/dt-bindings/clock/sun8i-v3s-ccu.h
index c0d5d5599c87..014ac6123d17 100644
--- a/include/dt-bindings/clock/sun8i-v3s-ccu.h
+++ b/include/dt-bindings/clock/sun8i-v3s-ccu.h
@@ -104,4 +104,8 @@
 
 #define CLK_MIPI_CSI		73
 
+/* Clocks not available on V3s */
+#define CLK_BUS_I2S0		75
+#define CLK_I2S0		76
+
 #endif /* _DT_BINDINGS_CLK_SUN8I_V3S_H_ */
diff --git a/include/dt-bindings/reset/sun8i-v3s-ccu.h b/include/dt-bindings/reset/sun8i-v3s-ccu.h
index b58ef21a2e18..b6790173afd6 100644
--- a/include/dt-bindings/reset/sun8i-v3s-ccu.h
+++ b/include/dt-bindings/reset/sun8i-v3s-ccu.h
@@ -75,4 +75,7 @@
 #define RST_BUS_UART1		50
 #define RST_BUS_UART2		51
 
+/* Reset lines not available on V3s */
+#define RST_BUS_I2S0		52
+
 #endif /* _DT_BINDINGS_RST_SUN8I_H3_H_ */
-- 
cgit v1.2.3


From accd2dd72c8f087441d725dd916688171519e4e6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 9 Aug 2019 10:23:57 +0200
Subject: PCI/ASPM: Add pcie_aspm_enabled()

Add a function checking whether or not PCIe ASPM has been enabled for
a given device.

It will be used by the NVMe driver to decide how to handle the
device during system suspend.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/aspm.c | 20 ++++++++++++++++++++
 include/linux/pci.h     |  2 ++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e44af7f4d37f..464f8f92653f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1170,6 +1170,26 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
 module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
 	NULL, 0644);
 
+/**
+ * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
+ * @pdev: Target device.
+ */
+bool pcie_aspm_enabled(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
+	bool ret;
+
+	if (!bridge)
+		return false;
+
+	mutex_lock(&aspm_lock);
+	ret = bridge->link_state ? !!bridge->link_state->aspm_enabled : false;
+	mutex_unlock(&aspm_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
+
 #ifdef CONFIG_PCIEASPM_DEBUG
 static ssize_t link_state_show(struct device *dev,
 		struct device_attribute *attr,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..82e4cd1b7ac3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1567,8 +1567,10 @@ extern bool pcie_ports_native;
 
 #ifdef CONFIG_PCIEASPM
 bool pcie_aspm_support_enabled(void);
+bool pcie_aspm_enabled(struct pci_dev *pdev);
 #else
 static inline bool pcie_aspm_support_enabled(void) { return false; }
+static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
 #endif
 
 #ifdef CONFIG_PCIEAER
-- 
cgit v1.2.3


From 5fbdac150d4813765fd515ffad6c7f5e63be14aa Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 17 Jul 2019 08:48:06 +0000
Subject: mtd: spi-nor: fix description for int (*flash_is_locked)()

The description was interleaved.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 include/linux/mtd/spi-nor.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 5f1acb1867dd..63560b375168 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -376,10 +376,10 @@ struct flash_info;
  * @flash_lock:		[FLASH-SPECIFIC] lock a region of the SPI NOR
  * @flash_unlock:	[FLASH-SPECIFIC] unlock a region of the SPI NOR
  * @flash_is_locked:	[FLASH-SPECIFIC] check if a region of the SPI NOR is
+ *			completely locked
  * @quad_enable:	[FLASH-SPECIFIC] enables SPI NOR quad mode
  * @clear_sr_bp:	[FLASH-SPECIFIC] clears the Block Protection Bits from
  *			the SPI NOR Status Register.
- *			completely locked
  * @priv:		the private data
  */
 struct spi_nor {
-- 
cgit v1.2.3


From 4752544a447b52b9949eb648a3b7719357853f91 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 23 Jul 2019 14:00:09 +0100
Subject: firmware: arm_scmi: Use the correct style for SPDX License Identifier

Fix to correct the SPDX License Identifier style in header file related
to firmware frivers for ARM SCMI message protocol.

For C header files Documentation/process/license-rules.rst mandates
C-like comments(opposed to C source files where C++ style should be
used).

While at it, change GPL-2.0 to GPL-2.0-only similar to the ones in
psci.h and scpi_protocol.h

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 include/linux/scmi_protocol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 9ff2e9357e9a..aa1e791779b4 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * SCMI Message Protocol driver header
  *
-- 
cgit v1.2.3


From 9eefa43a1a03960c7458e1463132f893702741be Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 Jul 2019 09:40:33 +0100
Subject: firmware: arm_scmi: Align few names in sensors protocol with SCMI
 specification

Looks like more code developed during the draft versions of the
specification slipped through and they don't match the final
released version. This seem to have happened only with sensor
protocol.

Renaming few command and function names here to match exactly with
the released version of SCMI specification for ease of maintenance.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/sensors.c | 28 +++++++++++++++-------------
 include/linux/scmi_protocol.h       | 12 ++++++------
 2 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
index 0e94ab56f679..17dbabd8a94a 100644
--- a/drivers/firmware/arm_scmi/sensors.c
+++ b/drivers/firmware/arm_scmi/sensors.c
@@ -9,8 +9,8 @@
 
 enum scmi_sensor_protocol_cmd {
 	SENSOR_DESCRIPTION_GET = 0x3,
-	SENSOR_CONFIG_SET = 0x4,
-	SENSOR_TRIP_POINT_SET = 0x5,
+	SENSOR_TRIP_POINT_NOTIFY = 0x4,
+	SENSOR_TRIP_POINT_CONFIG = 0x5,
 	SENSOR_READING_GET = 0x6,
 };
 
@@ -42,9 +42,10 @@ struct scmi_msg_resp_sensor_description {
 	} desc[0];
 };
 
-struct scmi_msg_set_sensor_config {
+struct scmi_msg_sensor_trip_point_notify {
 	__le32 id;
 	__le32 event_control;
+#define SENSOR_TP_NOTIFY_ALL	BIT(0)
 };
 
 struct scmi_msg_set_sensor_trip_point {
@@ -160,15 +161,15 @@ static int scmi_sensor_description_get(const struct scmi_handle *handle,
 	return ret;
 }
 
-static int
-scmi_sensor_configuration_set(const struct scmi_handle *handle, u32 sensor_id)
+static int scmi_sensor_trip_point_notify(const struct scmi_handle *handle,
+					 u32 sensor_id, bool enable)
 {
 	int ret;
-	u32 evt_cntl = BIT(0);
+	u32 evt_cntl = enable ? SENSOR_TP_NOTIFY_ALL : 0;
 	struct scmi_xfer *t;
-	struct scmi_msg_set_sensor_config *cfg;
+	struct scmi_msg_sensor_trip_point_notify *cfg;
 
-	ret = scmi_xfer_get_init(handle, SENSOR_CONFIG_SET,
+	ret = scmi_xfer_get_init(handle, SENSOR_TRIP_POINT_NOTIFY,
 				 SCMI_PROTOCOL_SENSOR, sizeof(*cfg), 0, &t);
 	if (ret)
 		return ret;
@@ -183,15 +184,16 @@ scmi_sensor_configuration_set(const struct scmi_handle *handle, u32 sensor_id)
 	return ret;
 }
 
-static int scmi_sensor_trip_point_set(const struct scmi_handle *handle,
-				      u32 sensor_id, u8 trip_id, u64 trip_value)
+static int
+scmi_sensor_trip_point_config(const struct scmi_handle *handle, u32 sensor_id,
+			      u8 trip_id, u64 trip_value)
 {
 	int ret;
 	u32 evt_cntl = SENSOR_TP_BOTH;
 	struct scmi_xfer *t;
 	struct scmi_msg_set_sensor_trip_point *trip;
 
-	ret = scmi_xfer_get_init(handle, SENSOR_TRIP_POINT_SET,
+	ret = scmi_xfer_get_init(handle, SENSOR_TRIP_POINT_CONFIG,
 				 SCMI_PROTOCOL_SENSOR, sizeof(*trip), 0, &t);
 	if (ret)
 		return ret;
@@ -255,8 +257,8 @@ static int scmi_sensor_count_get(const struct scmi_handle *handle)
 static struct scmi_sensor_ops sensor_ops = {
 	.count_get = scmi_sensor_count_get,
 	.info_get = scmi_sensor_info_get,
-	.configuration_set = scmi_sensor_configuration_set,
-	.trip_point_set = scmi_sensor_trip_point_set,
+	.trip_point_notify = scmi_sensor_trip_point_notify,
+	.trip_point_config = scmi_sensor_trip_point_config,
 	.reading_get = scmi_sensor_reading_get,
 };
 
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index aa1e791779b4..1383d47e6435 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -167,9 +167,9 @@ enum scmi_sensor_class {
  *
  * @count_get: get the count of sensors provided by SCMI
  * @info_get: get the information of the specified sensor
- * @configuration_set: control notifications on cross-over events for
+ * @trip_point_notify: control notifications on cross-over events for
  *	the trip-points
- * @trip_point_set: selects and configures a trip-point of interest
+ * @trip_point_config: selects and configures a trip-point of interest
  * @reading_get: gets the current value of the sensor
  */
 struct scmi_sensor_ops {
@@ -177,10 +177,10 @@ struct scmi_sensor_ops {
 
 	const struct scmi_sensor_info *(*info_get)
 		(const struct scmi_handle *handle, u32 sensor_id);
-	int (*configuration_set)(const struct scmi_handle *handle,
-				 u32 sensor_id);
-	int (*trip_point_set)(const struct scmi_handle *handle, u32 sensor_id,
-			      u8 trip_id, u64 trip_value);
+	int (*trip_point_notify)(const struct scmi_handle *handle,
+				 u32 sensor_id, bool enable);
+	int (*trip_point_config)(const struct scmi_handle *handle,
+				 u32 sensor_id, u8 trip_id, u64 trip_value);
 	int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id,
 			   bool async, u64 *value);
 };
-- 
cgit v1.2.3


From 6a55331c87d86a7406d8126ae75bdd07244a91b1 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 Jul 2019 09:40:57 +0100
Subject: firmware: arm_scmi: Drop async flag in sensor_ops->reading_get

SENSOR_DESCRIPTION_GET provides attributes to indicate if the sensor
supports asynchronous read. Ideally we should be able to read that flag
and use asynchronous reads for any sensors with that attribute set.

In order to add that support, let's drop the async flag passed to
sensor_ops->reading_get and dynamically switch between sync and async
flags based on the attributes as provided by the firmware.

Cc: linux-hwmon@vger.kernel.org
Acked-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/sensors.c | 4 ++--
 drivers/hwmon/scmi-hwmon.c          | 2 +-
 include/linux/scmi_protocol.h       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
index 17dbabd8a94a..1b5757c77a35 100644
--- a/drivers/firmware/arm_scmi/sensors.c
+++ b/drivers/firmware/arm_scmi/sensors.c
@@ -211,7 +211,7 @@ scmi_sensor_trip_point_config(const struct scmi_handle *handle, u32 sensor_id,
 }
 
 static int scmi_sensor_reading_get(const struct scmi_handle *handle,
-				   u32 sensor_id, bool async, u64 *value)
+				   u32 sensor_id, u64 *value)
 {
 	int ret;
 	struct scmi_xfer *t;
@@ -225,7 +225,7 @@ static int scmi_sensor_reading_get(const struct scmi_handle *handle,
 
 	sensor = t->tx.buf;
 	sensor->id = cpu_to_le32(sensor_id);
-	sensor->flags = cpu_to_le32(async ? SENSOR_READ_ASYNC : 0);
+	sensor->flags = cpu_to_le32(0);
 
 	ret = scmi_do_xfer(handle, t);
 	if (!ret) {
diff --git a/drivers/hwmon/scmi-hwmon.c b/drivers/hwmon/scmi-hwmon.c
index 0c93fc5ca762..8a7732c0bef3 100644
--- a/drivers/hwmon/scmi-hwmon.c
+++ b/drivers/hwmon/scmi-hwmon.c
@@ -72,7 +72,7 @@ static int scmi_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
 	const struct scmi_handle *h = scmi_sensors->handle;
 
 	sensor = *(scmi_sensors->info[type] + channel);
-	ret = h->sensor_ops->reading_get(h, sensor->id, false, &value);
+	ret = h->sensor_ops->reading_get(h, sensor->id, &value);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 1383d47e6435..2ace5af210ad 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -182,7 +182,7 @@ struct scmi_sensor_ops {
 	int (*trip_point_config)(const struct scmi_handle *handle,
 				 u32 sensor_id, u8 trip_id, u64 trip_value);
 	int (*reading_get)(const struct scmi_handle *handle, u32 sensor_id,
-			   bool async, u64 *value);
+			   u64 *value);
 };
 
 /**
-- 
cgit v1.2.3


From d09aac0eb17c6ce2b66095e1e324f60ec9dd8988 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 Jul 2019 09:41:01 +0100
Subject: firmware: arm_scmi: Add asynchronous sensor read if it supports

SENSOR_DESCRIPTION_GET provides attributes to indicate if the sensor
supports asynchronous read. We can read that flag and use asynchronous
reads for any sensors with that attribute set.

Let's use the new scmi_do_xfer_with_response to support asynchronous
sensor reads.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/sensors.c | 30 ++++++++++++++++++++++--------
 include/linux/scmi_protocol.h       |  2 ++
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c
index 1b5757c77a35..7570308a16a0 100644
--- a/drivers/firmware/arm_scmi/sensors.c
+++ b/drivers/firmware/arm_scmi/sensors.c
@@ -136,9 +136,10 @@ static int scmi_sensor_description_get(const struct scmi_handle *handle,
 		}
 
 		for (cnt = 0; cnt < num_returned; cnt++) {
-			u32 attrh;
+			u32 attrh, attrl;
 			struct scmi_sensor_info *s;
 
+			attrl = le32_to_cpu(buf->desc[cnt].attributes_low);
 			attrh = le32_to_cpu(buf->desc[cnt].attributes_high);
 			s = &si->sensors[desc_index + cnt];
 			s->id = le32_to_cpu(buf->desc[cnt].id);
@@ -147,6 +148,8 @@ static int scmi_sensor_description_get(const struct scmi_handle *handle,
 			/* Sign extend to a full s8 */
 			if (s->scale & SENSOR_SCALE_SIGN)
 				s->scale |= SENSOR_SCALE_EXTEND;
+			s->async = SUPPORTS_ASYNC_READ(attrl);
+			s->num_trip_points = NUM_TRIP_POINTS(attrl);
 			strlcpy(s->name, buf->desc[cnt].name, SCMI_MAX_STR_SIZE);
 		}
 
@@ -214,8 +217,11 @@ static int scmi_sensor_reading_get(const struct scmi_handle *handle,
 				   u32 sensor_id, u64 *value)
 {
 	int ret;
+	__le32 *pval;
 	struct scmi_xfer *t;
 	struct scmi_msg_sensor_reading_get *sensor;
+	struct sensors_info *si = handle->sensor_priv;
+	struct scmi_sensor_info *s = si->sensors + sensor_id;
 
 	ret = scmi_xfer_get_init(handle, SENSOR_READING_GET,
 				 SCMI_PROTOCOL_SENSOR, sizeof(*sensor),
@@ -223,16 +229,24 @@ static int scmi_sensor_reading_get(const struct scmi_handle *handle,
 	if (ret)
 		return ret;
 
+	pval = t->rx.buf;
 	sensor = t->tx.buf;
 	sensor->id = cpu_to_le32(sensor_id);
-	sensor->flags = cpu_to_le32(0);
-
-	ret = scmi_do_xfer(handle, t);
-	if (!ret) {
-		__le32 *pval = t->rx.buf;
 
-		*value = le32_to_cpu(*pval);
-		*value |= (u64)le32_to_cpu(*(pval + 1)) << 32;
+	if (s->async) {
+		sensor->flags = cpu_to_le32(SENSOR_READ_ASYNC);
+		ret = scmi_do_xfer_with_response(handle, t);
+		if (!ret) {
+			*value = le32_to_cpu(*(pval + 1));
+			*value |= (u64)le32_to_cpu(*(pval + 2)) << 32;
+		}
+	} else {
+		sensor->flags = cpu_to_le32(0);
+		ret = scmi_do_xfer(handle, t);
+		if (!ret) {
+			*value = le32_to_cpu(*pval);
+			*value |= (u64)le32_to_cpu(*(pval + 1)) << 32;
+		}
 	}
 
 	scmi_xfer_put(handle, t);
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index 2ace5af210ad..ae7381413f1f 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -145,6 +145,8 @@ struct scmi_sensor_info {
 	u32 id;
 	u8 type;
 	s8 scale;
+	u8 num_trip_points;
+	bool async;
 	char name[SCMI_MAX_STR_SIZE];
 };
 
-- 
cgit v1.2.3


From d0aba11614552d43a61c3acdf36876b00060286e Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 Jul 2019 09:42:22 +0100
Subject: firmware: arm_scmi: Drop config flag in clk_ops->rate_set

CLOCK_PROTOCOL_ATTRIBUTES provides attributes to indicate the maximum
number of pending asynchronous clock rate changes supported by the
platform. If it's non-zero, then we should be able to use asynchronous
clock rate set for any clocks until the maximum limit is reached.

In order to add that support, let's drop the config flag passed to
clk_ops->rate_set and handle the asynchronous requests dynamically.

Cc: Stephen Boyd <sboyd@kernel.org>
Cc: linux-clk@vger.kernel.org
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/clk/clk-scmi.c            | 2 +-
 drivers/firmware/arm_scmi/clock.c | 4 ++--
 include/linux/scmi_protocol.h     | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/clk-scmi.c b/drivers/clk/clk-scmi.c
index a2287c770d5c..886f7c5df51a 100644
--- a/drivers/clk/clk-scmi.c
+++ b/drivers/clk/clk-scmi.c
@@ -69,7 +69,7 @@ static int scmi_clk_set_rate(struct clk_hw *hw, unsigned long rate,
 {
 	struct scmi_clk *clk = to_scmi_clk(hw);
 
-	return clk->handle->clk_ops->rate_set(clk->handle, clk->id, 0, rate);
+	return clk->handle->clk_ops->rate_set(clk->handle, clk->id, rate);
 }
 
 static int scmi_clk_enable(struct clk_hw *hw)
diff --git a/drivers/firmware/arm_scmi/clock.c b/drivers/firmware/arm_scmi/clock.c
index 0a194af92438..dd215bd11a58 100644
--- a/drivers/firmware/arm_scmi/clock.c
+++ b/drivers/firmware/arm_scmi/clock.c
@@ -218,7 +218,7 @@ scmi_clock_rate_get(const struct scmi_handle *handle, u32 clk_id, u64 *value)
 }
 
 static int scmi_clock_rate_set(const struct scmi_handle *handle, u32 clk_id,
-			       u32 config, u64 rate)
+			       u64 rate)
 {
 	int ret;
 	struct scmi_xfer *t;
@@ -230,7 +230,7 @@ static int scmi_clock_rate_set(const struct scmi_handle *handle, u32 clk_id,
 		return ret;
 
 	cfg = t->tx.buf;
-	cfg->flags = cpu_to_le32(config);
+	cfg->flags = cpu_to_le32(0);
 	cfg->id = cpu_to_le32(clk_id);
 	cfg->value_low = cpu_to_le32(rate & 0xffffffff);
 	cfg->value_high = cpu_to_le32(rate >> 32);
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index ae7381413f1f..f0f2b53a1dac 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -71,7 +71,7 @@ struct scmi_clk_ops {
 	int (*rate_get)(const struct scmi_handle *handle, u32 clk_id,
 			u64 *rate);
 	int (*rate_set)(const struct scmi_handle *handle, u32 clk_id,
-			u32 config, u64 rate);
+			u64 rate);
 	int (*enable)(const struct scmi_handle *handle, u32 clk_id);
 	int (*disable)(const struct scmi_handle *handle, u32 clk_id);
 };
-- 
cgit v1.2.3


From 95a15d80aa0de938299acfcbc6aa6f2b16f5d7e5 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 8 Jul 2019 09:41:06 +0100
Subject: firmware: arm_scmi: Add RESET protocol in SCMI v2.0

SCMIv2.0 adds a new Reset Management Protocol to manage various reset
states a given device or domain can enter. Device(s) that can be
collectively reset through a common reset signal constitute a reset
domain for the firmware.

A reset domain can be reset autonomously or explicitly through assertion
and de-assertion of the signal. When autonomous reset is chosen, the
firmware is responsible for taking the necessary steps to reset the
domain and to subsequently bring it out of reset. When explicit reset is
chosen, the caller has to specifically assert and then de-assert the
reset signal by issuing two separate RESET commands.

Add the basic SCMI reset infrastructure that can be used by Linux
reset controller driver.

Reviewed-by: Peng Fan <peng.fan@nxp.com>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/Makefile |   2 +-
 drivers/firmware/arm_scmi/reset.c  | 231 +++++++++++++++++++++++++++++++++++++
 include/linux/scmi_protocol.h      |  26 +++++
 3 files changed, 258 insertions(+), 1 deletion(-)
 create mode 100644 drivers/firmware/arm_scmi/reset.c

(limited to 'include')

diff --git a/drivers/firmware/arm_scmi/Makefile b/drivers/firmware/arm_scmi/Makefile
index c47d28d556b6..5f298f00a82e 100644
--- a/drivers/firmware/arm_scmi/Makefile
+++ b/drivers/firmware/arm_scmi/Makefile
@@ -2,5 +2,5 @@
 obj-y	= scmi-bus.o scmi-driver.o scmi-protocols.o
 scmi-bus-y = bus.o
 scmi-driver-y = driver.o
-scmi-protocols-y = base.o clock.o perf.o power.o sensors.o
+scmi-protocols-y = base.o clock.o perf.o power.o reset.o sensors.o
 obj-$(CONFIG_ARM_SCMI_POWER_DOMAIN) += scmi_pm_domain.o
diff --git a/drivers/firmware/arm_scmi/reset.c b/drivers/firmware/arm_scmi/reset.c
new file mode 100644
index 000000000000..64cc81915581
--- /dev/null
+++ b/drivers/firmware/arm_scmi/reset.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * System Control and Management Interface (SCMI) Reset Protocol
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include "common.h"
+
+enum scmi_reset_protocol_cmd {
+	RESET_DOMAIN_ATTRIBUTES = 0x3,
+	RESET = 0x4,
+	RESET_NOTIFY = 0x5,
+};
+
+enum scmi_reset_protocol_notify {
+	RESET_ISSUED = 0x0,
+};
+
+#define NUM_RESET_DOMAIN_MASK	0xffff
+#define RESET_NOTIFY_ENABLE	BIT(0)
+
+struct scmi_msg_resp_reset_domain_attributes {
+	__le32 attributes;
+#define SUPPORTS_ASYNC_RESET(x)		((x) & BIT(31))
+#define SUPPORTS_NOTIFY_RESET(x)	((x) & BIT(30))
+	__le32 latency;
+	    u8 name[SCMI_MAX_STR_SIZE];
+};
+
+struct scmi_msg_reset_domain_reset {
+	__le32 domain_id;
+	__le32 flags;
+#define AUTONOMOUS_RESET	BIT(0)
+#define EXPLICIT_RESET_ASSERT	BIT(1)
+#define ASYNCHRONOUS_RESET	BIT(2)
+	__le32 reset_state;
+#define ARCH_RESET_TYPE		BIT(31)
+#define COLD_RESET_STATE	BIT(0)
+#define ARCH_COLD_RESET		(ARCH_RESET_TYPE | COLD_RESET_STATE)
+};
+
+struct reset_dom_info {
+	bool async_reset;
+	bool reset_notify;
+	u32 latency_us;
+	char name[SCMI_MAX_STR_SIZE];
+};
+
+struct scmi_reset_info {
+	int num_domains;
+	struct reset_dom_info *dom_info;
+};
+
+static int scmi_reset_attributes_get(const struct scmi_handle *handle,
+				     struct scmi_reset_info *pi)
+{
+	int ret;
+	struct scmi_xfer *t;
+	u32 attr;
+
+	ret = scmi_xfer_get_init(handle, PROTOCOL_ATTRIBUTES,
+				 SCMI_PROTOCOL_RESET, 0, sizeof(attr), &t);
+	if (ret)
+		return ret;
+
+	ret = scmi_do_xfer(handle, t);
+	if (!ret) {
+		attr = get_unaligned_le32(t->rx.buf);
+		pi->num_domains = attr & NUM_RESET_DOMAIN_MASK;
+	}
+
+	scmi_xfer_put(handle, t);
+	return ret;
+}
+
+static int
+scmi_reset_domain_attributes_get(const struct scmi_handle *handle, u32 domain,
+				 struct reset_dom_info *dom_info)
+{
+	int ret;
+	struct scmi_xfer *t;
+	struct scmi_msg_resp_reset_domain_attributes *attr;
+
+	ret = scmi_xfer_get_init(handle, RESET_DOMAIN_ATTRIBUTES,
+				 SCMI_PROTOCOL_RESET, sizeof(domain),
+				 sizeof(*attr), &t);
+	if (ret)
+		return ret;
+
+	put_unaligned_le32(domain, t->tx.buf);
+	attr = t->rx.buf;
+
+	ret = scmi_do_xfer(handle, t);
+	if (!ret) {
+		u32 attributes = le32_to_cpu(attr->attributes);
+
+		dom_info->async_reset = SUPPORTS_ASYNC_RESET(attributes);
+		dom_info->reset_notify = SUPPORTS_NOTIFY_RESET(attributes);
+		dom_info->latency_us = le32_to_cpu(attr->latency);
+		if (dom_info->latency_us == U32_MAX)
+			dom_info->latency_us = 0;
+		strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE);
+	}
+
+	scmi_xfer_put(handle, t);
+	return ret;
+}
+
+static int scmi_reset_num_domains_get(const struct scmi_handle *handle)
+{
+	struct scmi_reset_info *pi = handle->reset_priv;
+
+	return pi->num_domains;
+}
+
+static char *scmi_reset_name_get(const struct scmi_handle *handle, u32 domain)
+{
+	struct scmi_reset_info *pi = handle->reset_priv;
+	struct reset_dom_info *dom = pi->dom_info + domain;
+
+	return dom->name;
+}
+
+static int scmi_reset_latency_get(const struct scmi_handle *handle, u32 domain)
+{
+	struct scmi_reset_info *pi = handle->reset_priv;
+	struct reset_dom_info *dom = pi->dom_info + domain;
+
+	return dom->latency_us;
+}
+
+static int scmi_domain_reset(const struct scmi_handle *handle, u32 domain,
+			     u32 flags, u32 state)
+{
+	int ret;
+	struct scmi_xfer *t;
+	struct scmi_msg_reset_domain_reset *dom;
+	struct scmi_reset_info *pi = handle->reset_priv;
+	struct reset_dom_info *rdom = pi->dom_info + domain;
+
+	if (rdom->async_reset)
+		flags |= ASYNCHRONOUS_RESET;
+
+	ret = scmi_xfer_get_init(handle, RESET, SCMI_PROTOCOL_RESET,
+				 sizeof(*dom), 0, &t);
+	if (ret)
+		return ret;
+
+	dom = t->tx.buf;
+	dom->domain_id = cpu_to_le32(domain);
+	dom->flags = cpu_to_le32(flags);
+	dom->domain_id = cpu_to_le32(state);
+
+	if (rdom->async_reset)
+		ret = scmi_do_xfer_with_response(handle, t);
+	else
+		ret = scmi_do_xfer(handle, t);
+
+	scmi_xfer_put(handle, t);
+	return ret;
+}
+
+static int scmi_reset_domain_reset(const struct scmi_handle *handle, u32 domain)
+{
+	return scmi_domain_reset(handle, domain, AUTONOMOUS_RESET,
+				 ARCH_COLD_RESET);
+}
+
+static int
+scmi_reset_domain_assert(const struct scmi_handle *handle, u32 domain)
+{
+	return scmi_domain_reset(handle, domain, EXPLICIT_RESET_ASSERT,
+				 ARCH_COLD_RESET);
+}
+
+static int
+scmi_reset_domain_deassert(const struct scmi_handle *handle, u32 domain)
+{
+	return scmi_domain_reset(handle, domain, 0, ARCH_COLD_RESET);
+}
+
+static struct scmi_reset_ops reset_ops = {
+	.num_domains_get = scmi_reset_num_domains_get,
+	.name_get = scmi_reset_name_get,
+	.latency_get = scmi_reset_latency_get,
+	.reset = scmi_reset_domain_reset,
+	.assert = scmi_reset_domain_assert,
+	.deassert = scmi_reset_domain_deassert,
+};
+
+static int scmi_reset_protocol_init(struct scmi_handle *handle)
+{
+	int domain;
+	u32 version;
+	struct scmi_reset_info *pinfo;
+
+	scmi_version_get(handle, SCMI_PROTOCOL_RESET, &version);
+
+	dev_dbg(handle->dev, "Reset Version %d.%d\n",
+		PROTOCOL_REV_MAJOR(version), PROTOCOL_REV_MINOR(version));
+
+	pinfo = devm_kzalloc(handle->dev, sizeof(*pinfo), GFP_KERNEL);
+	if (!pinfo)
+		return -ENOMEM;
+
+	scmi_reset_attributes_get(handle, pinfo);
+
+	pinfo->dom_info = devm_kcalloc(handle->dev, pinfo->num_domains,
+				       sizeof(*pinfo->dom_info), GFP_KERNEL);
+	if (!pinfo->dom_info)
+		return -ENOMEM;
+
+	for (domain = 0; domain < pinfo->num_domains; domain++) {
+		struct reset_dom_info *dom = pinfo->dom_info + domain;
+
+		scmi_reset_domain_attributes_get(handle, domain, dom);
+	}
+
+	handle->reset_ops = &reset_ops;
+	handle->reset_priv = pinfo;
+
+	return 0;
+}
+
+static int __init scmi_reset_init(void)
+{
+	return scmi_protocol_register(SCMI_PROTOCOL_RESET,
+				      &scmi_reset_protocol_init);
+}
+subsys_initcall(scmi_reset_init);
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f0f2b53a1dac..881fea47c83d 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -187,6 +187,26 @@ struct scmi_sensor_ops {
 			   u64 *value);
 };
 
+/**
+ * struct scmi_reset_ops - represents the various operations provided
+ *	by SCMI Reset Protocol
+ *
+ * @num_domains_get: get the count of reset domains provided by SCMI
+ * @name_get: gets the name of a reset domain
+ * @latency_get: gets the reset latency for the specified reset domain
+ * @reset: resets the specified reset domain
+ * @assert: explicitly assert reset signal of the specified reset domain
+ * @deassert: explicitly deassert reset signal of the specified reset domain
+ */
+struct scmi_reset_ops {
+	int (*num_domains_get)(const struct scmi_handle *handle);
+	char *(*name_get)(const struct scmi_handle *handle, u32 domain);
+	int (*latency_get)(const struct scmi_handle *handle, u32 domain);
+	int (*reset)(const struct scmi_handle *handle, u32 domain);
+	int (*assert)(const struct scmi_handle *handle, u32 domain);
+	int (*deassert)(const struct scmi_handle *handle, u32 domain);
+};
+
 /**
  * struct scmi_handle - Handle returned to ARM SCMI clients for usage.
  *
@@ -196,6 +216,7 @@ struct scmi_sensor_ops {
  * @perf_ops: pointer to set of performance protocol operations
  * @clk_ops: pointer to set of clock protocol operations
  * @sensor_ops: pointer to set of sensor protocol operations
+ * @reset_ops: pointer to set of reset protocol operations
  * @perf_priv: pointer to private data structure specific to performance
  *	protocol(for internal use only)
  * @clk_priv: pointer to private data structure specific to clock
@@ -204,6 +225,8 @@ struct scmi_sensor_ops {
  *	protocol(for internal use only)
  * @sensor_priv: pointer to private data structure specific to sensors
  *	protocol(for internal use only)
+ * @reset_priv: pointer to private data structure specific to reset
+ *	protocol(for internal use only)
  */
 struct scmi_handle {
 	struct device *dev;
@@ -212,11 +235,13 @@ struct scmi_handle {
 	struct scmi_clk_ops *clk_ops;
 	struct scmi_power_ops *power_ops;
 	struct scmi_sensor_ops *sensor_ops;
+	struct scmi_reset_ops *reset_ops;
 	/* for protocol internal use */
 	void *perf_priv;
 	void *clk_priv;
 	void *power_priv;
 	void *sensor_priv;
+	void *reset_priv;
 };
 
 enum scmi_std_protocol {
@@ -226,6 +251,7 @@ enum scmi_std_protocol {
 	SCMI_PROTOCOL_PERF = 0x13,
 	SCMI_PROTOCOL_CLOCK = 0x14,
 	SCMI_PROTOCOL_SENSOR = 0x15,
+	SCMI_PROTOCOL_RESET = 0x16,
 };
 
 struct scmi_device {
-- 
cgit v1.2.3


From a4872e80ce7d2a1844328176dbf279d0a2b89bdb Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Mon, 5 Aug 2019 13:21:50 +0800
Subject: mfd: mt6397: Extract IRQ related code from core driver

In order to support different types of irq design, we decide to add
separate irq drivers for different design and keep mt6397 mfd core
simple and reusable to all generations of PMICs so far.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Makefile            |   3 +-
 drivers/mfd/mt6397-core.c       | 146 --------------------------------
 drivers/mfd/mt6397-irq.c        | 181 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/mt6397/core.h |   9 ++
 4 files changed, 192 insertions(+), 147 deletions(-)
 create mode 100644 drivers/mfd/mt6397-irq.c

(limited to 'include')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 446d5df7cacb..7b6a6aa4fe42 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -240,7 +240,8 @@ obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
 obj-$(CONFIG_INTEL_SOC_PMIC_BXTWC)	+= intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)	+= intel_soc_pmic_chtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)	+= intel_soc_pmic_chtdc_ti.o
-obj-$(CONFIG_MFD_MT6397)	+= mt6397-core.o
+mt6397-objs	:= mt6397-core.o mt6397-irq.o
+obj-$(CONFIG_MFD_MT6397)	+= mt6397.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR)	+= altera-a10sr.o
 obj-$(CONFIG_MFD_ALTERA_SYSMGR) += altera-sysmgr.o
diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index c0708622f41b..93c888153b77 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -18,10 +18,6 @@
 #define MT6397_RTC_BASE		0xe000
 #define MT6397_RTC_SIZE		0x3e
 
-#define MT6323_CHIP_ID		0x23
-#define MT6391_CHIP_ID		0x91
-#define MT6397_CHIP_ID		0x97
-
 static const struct resource mt6397_rtc_resources[] = {
 	{
 		.start = MT6397_RTC_BASE,
@@ -86,148 +82,6 @@ static const struct mfd_cell mt6397_devs[] = {
 	}
 };
 
-static void mt6397_irq_lock(struct irq_data *data)
-{
-	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-	mutex_lock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_sync_unlock(struct irq_data *data)
-{
-	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-
-	regmap_write(mt6397->regmap, mt6397->int_con[0],
-		     mt6397->irq_masks_cur[0]);
-	regmap_write(mt6397->regmap, mt6397->int_con[1],
-		     mt6397->irq_masks_cur[1]);
-
-	mutex_unlock(&mt6397->irqlock);
-}
-
-static void mt6397_irq_disable(struct irq_data *data)
-{
-	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-	int shift = data->hwirq & 0xf;
-	int reg = data->hwirq >> 4;
-
-	mt6397->irq_masks_cur[reg] &= ~BIT(shift);
-}
-
-static void mt6397_irq_enable(struct irq_data *data)
-{
-	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
-	int shift = data->hwirq & 0xf;
-	int reg = data->hwirq >> 4;
-
-	mt6397->irq_masks_cur[reg] |= BIT(shift);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
-{
-	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
-	int shift = irq_data->hwirq & 0xf;
-	int reg = irq_data->hwirq >> 4;
-
-	if (on)
-		mt6397->wake_mask[reg] |= BIT(shift);
-	else
-		mt6397->wake_mask[reg] &= ~BIT(shift);
-
-	return 0;
-}
-#else
-#define mt6397_irq_set_wake NULL
-#endif
-
-static struct irq_chip mt6397_irq_chip = {
-	.name = "mt6397-irq",
-	.irq_bus_lock = mt6397_irq_lock,
-	.irq_bus_sync_unlock = mt6397_irq_sync_unlock,
-	.irq_enable = mt6397_irq_enable,
-	.irq_disable = mt6397_irq_disable,
-	.irq_set_wake = mt6397_irq_set_wake,
-};
-
-static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
-		int irqbase)
-{
-	unsigned int status;
-	int i, irq, ret;
-
-	ret = regmap_read(mt6397->regmap, reg, &status);
-	if (ret) {
-		dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
-		return;
-	}
-
-	for (i = 0; i < 16; i++) {
-		if (status & BIT(i)) {
-			irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
-			if (irq)
-				handle_nested_irq(irq);
-		}
-	}
-
-	regmap_write(mt6397->regmap, reg, status);
-}
-
-static irqreturn_t mt6397_irq_thread(int irq, void *data)
-{
-	struct mt6397_chip *mt6397 = data;
-
-	mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
-	mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
-
-	return IRQ_HANDLED;
-}
-
-static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
-					irq_hw_number_t hw)
-{
-	struct mt6397_chip *mt6397 = d->host_data;
-
-	irq_set_chip_data(irq, mt6397);
-	irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
-	irq_set_nested_thread(irq, 1);
-	irq_set_noprobe(irq);
-
-	return 0;
-}
-
-static const struct irq_domain_ops mt6397_irq_domain_ops = {
-	.map = mt6397_irq_domain_map,
-};
-
-static int mt6397_irq_init(struct mt6397_chip *mt6397)
-{
-	int ret;
-
-	mutex_init(&mt6397->irqlock);
-
-	/* Mask all interrupt sources */
-	regmap_write(mt6397->regmap, mt6397->int_con[0], 0x0);
-	regmap_write(mt6397->regmap, mt6397->int_con[1], 0x0);
-
-	mt6397->irq_domain = irq_domain_add_linear(mt6397->dev->of_node,
-		MT6397_IRQ_NR, &mt6397_irq_domain_ops, mt6397);
-	if (!mt6397->irq_domain) {
-		dev_err(mt6397->dev, "could not create irq domain\n");
-		return -ENOMEM;
-	}
-
-	ret = devm_request_threaded_irq(mt6397->dev, mt6397->irq, NULL,
-		mt6397_irq_thread, IRQF_ONESHOT, "mt6397-pmic", mt6397);
-	if (ret) {
-		dev_err(mt6397->dev, "failed to register irq=%d; err: %d\n",
-			mt6397->irq, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
 #ifdef CONFIG_PM_SLEEP
 static int mt6397_irq_suspend(struct device *dev)
 {
diff --git a/drivers/mfd/mt6397-irq.c b/drivers/mfd/mt6397-irq.c
new file mode 100644
index 000000000000..b2d3ce1f3115
--- /dev/null
+++ b/drivers/mfd/mt6397-irq.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2019 MediaTek Inc.
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/mt6323/core.h>
+#include <linux/mfd/mt6323/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/mfd/mt6397/registers.h>
+
+static void mt6397_irq_lock(struct irq_data *data)
+{
+	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+	mutex_lock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_sync_unlock(struct irq_data *data)
+{
+	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+
+	regmap_write(mt6397->regmap, mt6397->int_con[0],
+		     mt6397->irq_masks_cur[0]);
+	regmap_write(mt6397->regmap, mt6397->int_con[1],
+		     mt6397->irq_masks_cur[1]);
+
+	mutex_unlock(&mt6397->irqlock);
+}
+
+static void mt6397_irq_disable(struct irq_data *data)
+{
+	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+	int shift = data->hwirq & 0xf;
+	int reg = data->hwirq >> 4;
+
+	mt6397->irq_masks_cur[reg] &= ~BIT(shift);
+}
+
+static void mt6397_irq_enable(struct irq_data *data)
+{
+	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(data);
+	int shift = data->hwirq & 0xf;
+	int reg = data->hwirq >> 4;
+
+	mt6397->irq_masks_cur[reg] |= BIT(shift);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int mt6397_irq_set_wake(struct irq_data *irq_data, unsigned int on)
+{
+	struct mt6397_chip *mt6397 = irq_data_get_irq_chip_data(irq_data);
+	int shift = irq_data->hwirq & 0xf;
+	int reg = irq_data->hwirq >> 4;
+
+	if (on)
+		mt6397->wake_mask[reg] |= BIT(shift);
+	else
+		mt6397->wake_mask[reg] &= ~BIT(shift);
+
+	return 0;
+}
+#else
+#define mt6397_irq_set_wake NULL
+#endif
+
+static struct irq_chip mt6397_irq_chip = {
+	.name = "mt6397-irq",
+	.irq_bus_lock = mt6397_irq_lock,
+	.irq_bus_sync_unlock = mt6397_irq_sync_unlock,
+	.irq_enable = mt6397_irq_enable,
+	.irq_disable = mt6397_irq_disable,
+	.irq_set_wake = mt6397_irq_set_wake,
+};
+
+static void mt6397_irq_handle_reg(struct mt6397_chip *mt6397, int reg,
+				  int irqbase)
+{
+	unsigned int status;
+	int i, irq, ret;
+
+	ret = regmap_read(mt6397->regmap, reg, &status);
+	if (ret) {
+		dev_err(mt6397->dev, "Failed to read irq status: %d\n", ret);
+		return;
+	}
+
+	for (i = 0; i < 16; i++) {
+		if (status & BIT(i)) {
+			irq = irq_find_mapping(mt6397->irq_domain, irqbase + i);
+			if (irq)
+				handle_nested_irq(irq);
+		}
+	}
+
+	regmap_write(mt6397->regmap, reg, status);
+}
+
+static irqreturn_t mt6397_irq_thread(int irq, void *data)
+{
+	struct mt6397_chip *mt6397 = data;
+
+	mt6397_irq_handle_reg(mt6397, mt6397->int_status[0], 0);
+	mt6397_irq_handle_reg(mt6397, mt6397->int_status[1], 16);
+
+	return IRQ_HANDLED;
+}
+
+static int mt6397_irq_domain_map(struct irq_domain *d, unsigned int irq,
+				 irq_hw_number_t hw)
+{
+	struct mt6397_chip *mt6397 = d->host_data;
+
+	irq_set_chip_data(irq, mt6397);
+	irq_set_chip_and_handler(irq, &mt6397_irq_chip, handle_level_irq);
+	irq_set_nested_thread(irq, 1);
+	irq_set_noprobe(irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mt6397_irq_domain_ops = {
+	.map = mt6397_irq_domain_map,
+};
+
+int mt6397_irq_init(struct mt6397_chip *chip)
+{
+	int ret;
+
+	mutex_init(&chip->irqlock);
+
+	switch (chip->chip_id) {
+	case MT6323_CHIP_ID:
+		chip->int_con[0] = MT6323_INT_CON0;
+		chip->int_con[1] = MT6323_INT_CON1;
+		chip->int_status[0] = MT6323_INT_STATUS0;
+		chip->int_status[1] = MT6323_INT_STATUS1;
+		break;
+
+	case MT6391_CHIP_ID:
+	case MT6397_CHIP_ID:
+		chip->int_con[0] = MT6397_INT_CON0;
+		chip->int_con[1] = MT6397_INT_CON1;
+		chip->int_status[0] = MT6397_INT_STATUS0;
+		chip->int_status[1] = MT6397_INT_STATUS1;
+		break;
+
+	default:
+		dev_err(chip->dev, "unsupported chip: 0x%x\n", chip->chip_id);
+		return -ENODEV;
+	}
+
+	/* Mask all interrupt sources */
+	regmap_write(chip->regmap, chip->int_con[0], 0x0);
+	regmap_write(chip->regmap, chip->int_con[1], 0x0);
+
+	chip->irq_domain = irq_domain_add_linear(chip->dev->of_node,
+						 MT6397_IRQ_NR,
+						 &mt6397_irq_domain_ops,
+						 chip);
+	if (!chip->irq_domain) {
+		dev_err(chip->dev, "could not create irq domain\n");
+		return -ENOMEM;
+	}
+
+	ret = devm_request_threaded_irq(chip->dev, chip->irq, NULL,
+					mt6397_irq_thread, IRQF_ONESHOT,
+					"mt6397-pmic", chip);
+	if (ret) {
+		dev_err(chip->dev, "failed to register irq=%d; err: %d\n",
+			chip->irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index 25a95e72179b..9320c2a1e3e6 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -7,6 +7,12 @@
 #ifndef __MFD_MT6397_CORE_H__
 #define __MFD_MT6397_CORE_H__
 
+enum chip_id {
+	MT6323_CHIP_ID = 0x23,
+	MT6391_CHIP_ID = 0x91,
+	MT6397_CHIP_ID = 0x97,
+};
+
 enum mt6397_irq_numbers {
 	MT6397_IRQ_SPKL_AB = 0,
 	MT6397_IRQ_SPKR_AB,
@@ -54,6 +60,9 @@ struct mt6397_chip {
 	u16 irq_masks_cache[2];
 	u16 int_con[2];
 	u16 int_status[2];
+	u16 chip_id;
 };
 
+int mt6397_irq_init(struct mt6397_chip *chip);
+
 #endif /* __MFD_MT6397_CORE_H__ */
-- 
cgit v1.2.3


From bbd1b70639f785a970d998f35155c713f975e3ac Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Thu, 8 Aug 2019 15:40:06 -0500
Subject: ACPI/PPTT: Add support for ACPI 6.3 thread flag

ACPI 6.3 adds a flag to the CPU node to indicate whether
the given PE is a thread. Add a function to return that
information for a given linux logical CPU.

Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Robert Richter <rrichter@marvell.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/acpi/pptt.c  | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/acpi.h |  5 +++++
 2 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 1e7ac0bd0d3a..f31544d3656e 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -540,6 +540,44 @@ static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag)
 	return retval;
 }
 
+/**
+ * check_acpi_cpu_flag() - Determine if CPU node has a flag set
+ * @cpu: Kernel logical CPU number
+ * @rev: The minimum PPTT revision defining the flag
+ * @flag: The flag itself
+ *
+ * Check the node representing a CPU for a given flag.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, the CPU cannot be found or
+ *	   the table revision isn't new enough.
+ *	   1, any passed flag set
+ *	   0, flag unset
+ */
+static int check_acpi_cpu_flag(unsigned int cpu, int rev, u32 flag)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	struct acpi_pptt_processor *cpu_node = NULL;
+	int ret = -ENOENT;
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		acpi_pptt_warn_missing();
+		return ret;
+	}
+
+	if (table->revision >= rev)
+		cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
+
+	if (cpu_node)
+		ret = (cpu_node->flags & flag) != 0;
+
+	acpi_put_table(table);
+
+	return ret;
+}
+
 /**
  * acpi_find_last_cache_level() - Determines the number of cache levels for a PE
  * @cpu: Kernel logical CPU number
@@ -604,6 +642,20 @@ int cache_setup_acpi(unsigned int cpu)
 	return status;
 }
 
+/**
+ * acpi_pptt_cpu_is_thread() - Determine if CPU is a thread
+ * @cpu: Kernel logical CPU number
+ *
+ * Return: 1, a thread
+ *         0, not a thread
+ *         -ENOENT ,if the PPTT doesn't exist, the CPU cannot be found or
+ *         the table revision isn't new enough.
+ */
+int acpi_pptt_cpu_is_thread(unsigned int cpu)
+{
+	return check_acpi_cpu_flag(cpu, 2, ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD);
+}
+
 /**
  * find_acpi_cpu_topology() - Determine a unique topology value for a given CPU
  * @cpu: Kernel logical CPU number
@@ -664,7 +716,6 @@ int find_acpi_cpu_cache_topology(unsigned int cpu, int level)
 	return ret;
 }
 
-
 /**
  * find_acpi_cpu_topology_package() - Determine a unique CPU package value
  * @cpu: Kernel logical CPU number
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 9426b9aaed86..9d0e20a2ac83 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1302,11 +1302,16 @@ static inline int lpit_read_residency_count_address(u64 *address)
 #endif
 
 #ifdef CONFIG_ACPI_PPTT
+int acpi_pptt_cpu_is_thread(unsigned int cpu);
 int find_acpi_cpu_topology(unsigned int cpu, int level);
 int find_acpi_cpu_topology_package(unsigned int cpu);
 int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
 int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
 #else
+static inline int acpi_pptt_cpu_is_thread(unsigned int cpu)
+{
+	return -EINVAL;
+}
 static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
 {
 	return -EINVAL;
-- 
cgit v1.2.3


From ffbf23d50353915dc2622a3b7b4ddc678165f92d Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Thu, 1 Aug 2019 12:56:36 +0300
Subject: firmware: imx: Add DSP IPC protocol interface

Some of i.MX8 processors (e.g i.MX8QM, i.MX8QXP) contain
the Tensilica HiFi4 DSP for advanced pre- and post-audio
processing.

The communication between Host CPU and DSP firmware is
taking place using a shared memory area for message passing
and a dedicated Messaging Unit for notifications.

DSP IPC protocol offers a doorbell interface using
imx-mailbox API.

We use 4 MU channels (2 x TXDB, 2 x RXDB) to implement a
request-reply protocol.

Connection 0 (txdb0, rxdb0):
        - Host writes messasge to shared memory [SHMEM]
	- Host sends a request [MU]
	- DSP handles request [SHMEM]
	- DSP sends reply [MU]

Connection 1 (txdb1, rxdb1):
	- DSP writes a message to shared memory [SHMEM]
	- DSP sends a request [MU]
	- Host handles request [SHMEM]
	- Host sends reply [MU]

The protocol interface will be used by a Host client to
communicate with the DSP. First client will be the i.MX8
part from Sound Open Firmware infrastructure.

The protocol offers the following interface:

On Tx:
   - imx_dsp_ring_doorbell, will be called to notify the DSP
   that it needs to handle a request.

On Rx:
   - clients need to provide two callbacks:
	.handle_reply
	.handle_request
  - the callbacks will be used by the protocol on
    notification arrival from DSP.

Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Reviewed-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/imx/Kconfig     |  11 +++
 drivers/firmware/imx/Makefile    |   1 +
 drivers/firmware/imx/imx-dsp.c   | 155 +++++++++++++++++++++++++++++++++++++++
 include/linux/firmware/imx/dsp.h |  67 +++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 drivers/firmware/imx/imx-dsp.c
 create mode 100644 include/linux/firmware/imx/dsp.h

(limited to 'include')

diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
index 42b566f8903f..0dbee32da4c6 100644
--- a/drivers/firmware/imx/Kconfig
+++ b/drivers/firmware/imx/Kconfig
@@ -1,4 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0-only
+config IMX_DSP
+	bool "IMX DSP Protocol driver"
+	depends on IMX_MBOX
+	help
+	  This enables DSP IPC protocol between host AP (Linux)
+	  and the firmware running on DSP.
+	  DSP exists on some i.MX8 processors (e.g i.MX8QM, i.MX8QXP).
+
+	  It acts like a doorbell. Client might use shared memory to
+	  exchange information with DSP side.
+
 config IMX_SCU
 	bool "IMX SCU Protocol driver"
 	depends on IMX_MBOX
diff --git a/drivers/firmware/imx/Makefile b/drivers/firmware/imx/Makefile
index 802c4ad8e8f9..08bc9ddfbdfb 100644
--- a/drivers/firmware/imx/Makefile
+++ b/drivers/firmware/imx/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_IMX_DSP)		+= imx-dsp.o
 obj-$(CONFIG_IMX_SCU)		+= imx-scu.o misc.o imx-scu-irq.o
 obj-$(CONFIG_IMX_SCU_PD)	+= scu-pd.o
diff --git a/drivers/firmware/imx/imx-dsp.c b/drivers/firmware/imx/imx-dsp.c
new file mode 100644
index 000000000000..a43d2db5cbdb
--- /dev/null
+++ b/drivers/firmware/imx/imx-dsp.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2019 NXP
+ *  Author: Daniel Baluta <daniel.baluta@nxp.com>
+ *
+ * Implementation of the DSP IPC interface (host side)
+ */
+
+#include <linux/firmware/imx/dsp.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+/*
+ * imx_dsp_ring_doorbell - triggers an interrupt on the other side (DSP)
+ *
+ * @dsp: DSP IPC handle
+ * @chan_idx: index of the channel where to trigger the interrupt
+ *
+ * Returns non-negative value for success, negative value for error
+ */
+int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc, unsigned int idx)
+{
+	int ret;
+	struct imx_dsp_chan *dsp_chan;
+
+	if (idx >= DSP_MU_CHAN_NUM)
+		return -EINVAL;
+
+	dsp_chan = &ipc->chans[idx];
+	ret = mbox_send_message(dsp_chan->ch, NULL);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(imx_dsp_ring_doorbell);
+
+/*
+ * imx_dsp_handle_rx - rx callback used by imx mailbox
+ *
+ * @c: mbox client
+ * @msg: message received
+ *
+ * Users of DSP IPC will need to privde handle_reply and handle_request
+ * callbacks.
+ */
+static void imx_dsp_handle_rx(struct mbox_client *c, void *msg)
+{
+	struct imx_dsp_chan *chan = container_of(c, struct imx_dsp_chan, cl);
+
+	if (chan->idx == 0) {
+		chan->ipc->ops->handle_reply(chan->ipc);
+	} else {
+		chan->ipc->ops->handle_request(chan->ipc);
+		imx_dsp_ring_doorbell(chan->ipc, 1);
+	}
+}
+
+static int imx_dsp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct imx_dsp_ipc *dsp_ipc;
+	struct imx_dsp_chan *dsp_chan;
+	struct mbox_client *cl;
+	char *chan_name;
+	int ret;
+	int i, j;
+
+	device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
+
+	dsp_ipc = devm_kzalloc(dev, sizeof(*dsp_ipc), GFP_KERNEL);
+	if (!dsp_ipc)
+		return -ENOMEM;
+
+	for (i = 0; i < DSP_MU_CHAN_NUM; i++) {
+		if (i < 2)
+			chan_name = kasprintf(GFP_KERNEL, "txdb%d", i);
+		else
+			chan_name = kasprintf(GFP_KERNEL, "rxdb%d", i - 2);
+
+		if (!chan_name)
+			return -ENOMEM;
+
+		dsp_chan = &dsp_ipc->chans[i];
+		cl = &dsp_chan->cl;
+		cl->dev = dev;
+		cl->tx_block = false;
+		cl->knows_txdone = true;
+		cl->rx_callback = imx_dsp_handle_rx;
+
+		dsp_chan->ipc = dsp_ipc;
+		dsp_chan->idx = i % 2;
+		dsp_chan->ch = mbox_request_channel_byname(cl, chan_name);
+		if (IS_ERR(dsp_chan->ch)) {
+			ret = PTR_ERR(dsp_chan->ch);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to request mbox chan %s ret %d\n",
+					chan_name, ret);
+			goto out;
+		}
+
+		dev_dbg(dev, "request mbox chan %s\n", chan_name);
+		/* chan_name is not used anymore by framework */
+		kfree(chan_name);
+	}
+
+	dsp_ipc->dev = dev;
+
+	dev_set_drvdata(dev, dsp_ipc);
+
+	dev_info(dev, "NXP i.MX DSP IPC initialized\n");
+
+	return devm_of_platform_populate(dev);
+out:
+	kfree(chan_name);
+	for (j = 0; j < i; j++) {
+		dsp_chan = &dsp_ipc->chans[j];
+		mbox_free_channel(dsp_chan->ch);
+	}
+
+	return ret;
+}
+
+static int imx_dsp_remove(struct platform_device *pdev)
+{
+	struct imx_dsp_chan *dsp_chan;
+	struct imx_dsp_ipc *dsp_ipc;
+	int i;
+
+	dsp_ipc = dev_get_drvdata(&pdev->dev);
+
+	for (i = 0; i < DSP_MU_CHAN_NUM; i++) {
+		dsp_chan = &dsp_ipc->chans[i];
+		mbox_free_channel(dsp_chan->ch);
+	}
+
+	return 0;
+}
+
+static struct platform_driver imx_dsp_driver = {
+	.driver = {
+		.name = "imx-dsp",
+	},
+	.probe = imx_dsp_probe,
+	.remove = imx_dsp_remove,
+};
+builtin_platform_driver(imx_dsp_driver);
+
+MODULE_AUTHOR("Daniel Baluta <daniel.baluta@nxp.com>");
+MODULE_DESCRIPTION("IMX DSP IPC protocol driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/firmware/imx/dsp.h b/include/linux/firmware/imx/dsp.h
new file mode 100644
index 000000000000..7562099c9e46
--- /dev/null
+++ b/include/linux/firmware/imx/dsp.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2019 NXP
+ *
+ * Header file for the DSP IPC implementation
+ */
+
+#ifndef _IMX_DSP_IPC_H
+#define _IMX_DSP_IPC_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/mailbox_client.h>
+
+#define DSP_MU_CHAN_NUM		4
+
+struct imx_dsp_chan {
+	struct imx_dsp_ipc *ipc;
+	struct mbox_client cl;
+	struct mbox_chan *ch;
+	char *name;
+	int idx;
+};
+
+struct imx_dsp_ops {
+	void (*handle_reply)(struct imx_dsp_ipc *ipc);
+	void (*handle_request)(struct imx_dsp_ipc *ipc);
+};
+
+struct imx_dsp_ipc {
+	/* Host <-> DSP communication uses 2 txdb and 2 rxdb channels */
+	struct imx_dsp_chan chans[DSP_MU_CHAN_NUM];
+	struct device *dev;
+	struct imx_dsp_ops *ops;
+	void *private_data;
+};
+
+static inline void imx_dsp_set_data(struct imx_dsp_ipc *ipc, void *data)
+{
+	if (!ipc)
+		return;
+
+	ipc->private_data = data;
+}
+
+static inline void *imx_dsp_get_data(struct imx_dsp_ipc *ipc)
+{
+	if (!ipc)
+		return NULL;
+
+	return ipc->private_data;
+}
+
+#if IS_ENABLED(CONFIG_IMX_DSP)
+
+int imx_dsp_ring_doorbell(struct imx_dsp_ipc *dsp, unsigned int chan_idx);
+
+#else
+
+static inline int imx_dsp_ring_doorbell(struct imx_dsp_ipc *ipc,
+					unsigned int chan_idx)
+{
+	return -ENOTSUPP;
+}
+
+#endif
+#endif /* _IMX_DSP_IPC_H */
-- 
cgit v1.2.3


From 72a7720fca37fec0daf295923f17ac5d88a613e1 Mon Sep 17 00:00:00 2001
From: Kamal Heib <kamalheib1@gmail.com>
Date: Wed, 7 Aug 2019 13:31:35 +0300
Subject: RDMA: Introduce ib_port_phys_state enum

In order to improve readability, add ib_port_phys_state enum to replace
the use of magic numbers.

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Reviewed-by: Andrew Boyer <aboyer@tobark.org>
Acked-by: Michal Kalderon <michal.kalderon@marvell.com>
Acked-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20190807103138.17219-2-kamalheib1@gmail.com
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/sysfs.c              | 30 ++++++++++++++++++----------
 drivers/infiniband/hw/bnxt_re/ib_verbs.c     |  4 ++--
 drivers/infiniband/hw/efa/efa_verbs.c        |  2 +-
 drivers/infiniband/hw/hns/hns_roce_device.h  | 10 ----------
 drivers/infiniband/hw/hns/hns_roce_main.c    |  3 ++-
 drivers/infiniband/hw/mlx4/main.c            |  3 ++-
 drivers/infiniband/hw/mlx5/main.c            |  4 ++--
 drivers/infiniband/hw/ocrdma/ocrdma_verbs.c  |  4 ++--
 drivers/infiniband/hw/qedr/verbs.c           |  4 ++--
 drivers/infiniband/hw/usnic/usnic_ib_verbs.c |  7 ++++---
 drivers/infiniband/sw/rxe/rxe.h              |  4 ----
 drivers/infiniband/sw/rxe/rxe_param.h        |  2 +-
 drivers/infiniband/sw/rxe/rxe_verbs.c        |  6 +++---
 drivers/infiniband/sw/siw/siw_verbs.c        |  3 ++-
 include/rdma/ib_verbs.h                      | 10 ++++++++++
 15 files changed, 53 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index b477295a96c2..7a50cedcef1f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -289,6 +289,24 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
 		       ib_width_enum_to_int(attr.active_width), speed);
 }
 
+static const char *phys_state_to_str(enum ib_port_phys_state phys_state)
+{
+	static const char * phys_state_str[] = {
+		"<unknown>",
+		"Sleep",
+		"Polling",
+		"Disabled",
+		"PortConfigurationTraining",
+		"LinkUp",
+		"LinkErrorRecovery",
+		"Phy Test",
+	};
+
+	if (phys_state < ARRAY_SIZE(phys_state_str))
+		return phys_state_str[phys_state];
+	return "<unknown>";
+}
+
 static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
 			       char *buf)
 {
@@ -300,16 +318,8 @@ static ssize_t phys_state_show(struct ib_port *p, struct port_attribute *unused,
 	if (ret)
 		return ret;
 
-	switch (attr.phys_state) {
-	case 1:  return sprintf(buf, "1: Sleep\n");
-	case 2:  return sprintf(buf, "2: Polling\n");
-	case 3:  return sprintf(buf, "3: Disabled\n");
-	case 4:  return sprintf(buf, "4: PortConfigurationTraining\n");
-	case 5:  return sprintf(buf, "5: LinkUp\n");
-	case 6:  return sprintf(buf, "6: LinkErrorRecovery\n");
-	case 7:  return sprintf(buf, "7: Phy Test\n");
-	default: return sprintf(buf, "%d: <unknown>\n", attr.phys_state);
-	}
+	return sprintf(buf, "%d: %s\n", attr.phys_state,
+		       phys_state_to_str(attr.phys_state));
 }
 
 static ssize_t link_layer_show(struct ib_port *p, struct port_attribute *unused,
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 098ab883733e..f9e97d0cc459 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -220,10 +220,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 
 	if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
 		port_attr->state = IB_PORT_ACTIVE;
-		port_attr->phys_state = 5;
+		port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	} else {
 		port_attr->state = IB_PORT_DOWN;
-		port_attr->phys_state = 3;
+		port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 	}
 	port_attr->max_mtu = IB_MTU_4096;
 	port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 32d3b3deabce..70851bd7f801 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -333,7 +333,7 @@ int efa_query_port(struct ib_device *ibdev, u8 port,
 	props->lmc = 1;
 
 	props->state = IB_PORT_ACTIVE;
-	props->phys_state = 5;
+	props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	props->gid_tbl_len = 1;
 	props->pkey_tbl_len = 1;
 	props->active_speed = IB_SPEED_EDR;
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index b39497a13b61..12a2b8565771 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -989,16 +989,6 @@ struct hns_roce_hw {
 	const struct ib_device_ops *hns_roce_dev_srq_ops;
 };
 
-enum hns_phy_state {
-	HNS_ROCE_PHY_SLEEP		= 1,
-	HNS_ROCE_PHY_POLLING		= 2,
-	HNS_ROCE_PHY_DISABLED		= 3,
-	HNS_ROCE_PHY_TRAINING		= 4,
-	HNS_ROCE_PHY_LINKUP		= 5,
-	HNS_ROCE_PHY_LINKERR		= 6,
-	HNS_ROCE_PHY_TEST		= 7
-};
-
 struct hns_roce_dev {
 	struct ib_device	ib_dev;
 	struct platform_device  *pdev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1e4ba48f5613..1b757cc924c3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -262,7 +262,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
 	props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ?
 			IB_PORT_ACTIVE : IB_PORT_DOWN;
 	props->phys_state = (props->state == IB_PORT_ACTIVE) ?
-			     HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED;
+			     IB_PORT_PHYS_STATE_LINK_UP :
+			     IB_PORT_PHYS_STATE_DISABLED;
 
 	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8790101facb7..8d2f1e38b891 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -734,7 +734,8 @@ out:
 
 static u8 state_to_phys_state(enum ib_port_state state)
 {
-	return state == IB_PORT_ACTIVE ? 5 : 3;
+	return state == IB_PORT_ACTIVE ?
+		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 }
 
 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 4a3d700cd783..af1986b4aa7d 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -535,7 +535,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
 	props->pkey_tbl_len     = 1;
 	props->state            = IB_PORT_DOWN;
-	props->phys_state       = 3;
+	props->phys_state       = IB_PORT_PHYS_STATE_DISABLED;
 
 	mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
 	props->qkey_viol_cntr = qkey_viol_cntr;
@@ -561,7 +561,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 
 	if (netif_running(ndev) && netif_carrier_ok(ndev)) {
 		props->state      = IB_PORT_ACTIVE;
-		props->phys_state = 5;
+		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	}
 
 	ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index bccc11378109..e8267e590772 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -163,10 +163,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
 	netdev = dev->nic_info.netdev;
 	if (netif_running(netdev) && netif_oper_up(netdev)) {
 		port_state = IB_PORT_ACTIVE;
-		props->phys_state = 5;
+		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	} else {
 		port_state = IB_PORT_DOWN;
-		props->phys_state = 3;
+		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 	}
 	props->max_mtu = IB_MTU_4096;
 	props->active_mtu = iboe_get_mtu(netdev->mtu);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 0c6a4bc848f5..6f3ce86019b7 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -221,10 +221,10 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 	/* *attr being zeroed by the caller, avoid zeroing it here */
 	if (rdma_port->port_state == QED_RDMA_PORT_UP) {
 		attr->state = IB_PORT_ACTIVE;
-		attr->phys_state = 5;
+		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	} else {
 		attr->state = IB_PORT_DOWN;
-		attr->phys_state = 3;
+		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 	}
 	attr->max_mtu = IB_MTU_4096;
 	attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a354c7c86547..556b8e44a51c 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -356,13 +356,14 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
 
 	if (!us_ibdev->ufdev->link_up) {
 		props->state = IB_PORT_DOWN;
-		props->phys_state = 3;
+		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 	} else if (!us_ibdev->ufdev->inaddr) {
 		props->state = IB_PORT_INIT;
-		props->phys_state = 4;
+		props->phys_state =
+			IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
 	} else {
 		props->state = IB_PORT_ACTIVE;
-		props->phys_state = 5;
+		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	}
 
 	props->port_cap_flags = 0;
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index ecf6e659c0da..fb07eed9e402 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -65,10 +65,6 @@
  */
 #define RXE_UVERBS_ABI_VERSION		2
 
-#define RDMA_LINK_PHYS_STATE_LINK_UP	(5)
-#define RDMA_LINK_PHYS_STATE_DISABLED	(3)
-#define RDMA_LINK_PHYS_STATE_POLLING	(2)
-
 #define RXE_ROCE_V2_SPORT		(0xc000)
 
 static inline u32 rxe_crc32(struct rxe_dev *rxe,
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 1abed47ca221..fe5207386700 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -154,7 +154,7 @@ enum rxe_port_param {
 	RXE_PORT_ACTIVE_WIDTH		= IB_WIDTH_1X,
 	RXE_PORT_ACTIVE_SPEED		= 1,
 	RXE_PORT_PKEY_TBL_LEN		= 64,
-	RXE_PORT_PHYS_STATE		= 2,
+	RXE_PORT_PHYS_STATE		= IB_PORT_PHYS_STATE_POLLING,
 	RXE_PORT_SUBNET_PREFIX		= 0xfe80000000000000ULL,
 };
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 4ebdfcf4d33e..623129f27f5a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -69,11 +69,11 @@ static int rxe_query_port(struct ib_device *dev,
 			      &attr->active_width);
 
 	if (attr->state == IB_PORT_ACTIVE)
-		attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+		attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
 	else if (dev_get_flags(rxe->ndev) & IFF_UP)
-		attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+		attr->phys_state = IB_PORT_PHYS_STATE_POLLING;
 	else
-		attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+		attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 
 	mutex_unlock(&rxe->usdev_lock);
 
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 32dc79d0e898..404e7ca4b30c 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -206,7 +206,8 @@ int siw_query_port(struct ib_device *base_dev, u8 port,
 	attr->gid_tbl_len = 1;
 	attr->max_msg_sz = -1;
 	attr->max_mtu = ib_mtu_int_to_enum(sdev->netdev->mtu);
-	attr->phys_state = sdev->state == IB_PORT_ACTIVE ? 5 : 3;
+	attr->phys_state = sdev->state == IB_PORT_ACTIVE ?
+		IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
 	attr->pkey_tbl_len = 1;
 	attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
 	attr->state = sdev->state;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 0ecda7d15df2..391499008a22 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -493,6 +493,16 @@ enum ib_port_state {
 	IB_PORT_ACTIVE_DEFER	= 5
 };
 
+enum ib_port_phys_state {
+	IB_PORT_PHYS_STATE_SLEEP = 1,
+	IB_PORT_PHYS_STATE_POLLING = 2,
+	IB_PORT_PHYS_STATE_DISABLED = 3,
+	IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING = 4,
+	IB_PORT_PHYS_STATE_LINK_UP = 5,
+	IB_PORT_PHYS_STATE_LINK_ERROR_RECOVERY = 6,
+	IB_PORT_PHYS_STATE_PHY_TEST = 7,
+};
+
 enum ib_port_width {
 	IB_WIDTH_1X	= 1,
 	IB_WIDTH_2X	= 16,
-- 
cgit v1.2.3


From 1eee4228a583b20082367e5cdce403bbdad2d7f1 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 24 Jul 2019 13:36:09 -0500
Subject: drm/amdgpu: add renoir asic_type enum

This patch adds renoir to amd_asic_type enum and amdgpu_asic_name[].

Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 include/drm/amd_asic_type.h                | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 682833f90fdd..dbe86e355ead 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -102,6 +102,7 @@ static const char *amdgpu_asic_name[] = {
 	"VEGA20",
 	"RAVEN",
 	"ARCTURUS",
+	"RENOIR",
 	"NAVI10",
 	"NAVI14",
 	"NAVI12",
diff --git a/include/drm/amd_asic_type.h b/include/drm/amd_asic_type.h
index 737a1e374f0c..296aab724677 100644
--- a/include/drm/amd_asic_type.h
+++ b/include/drm/amd_asic_type.h
@@ -50,6 +50,7 @@ enum amd_asic_type {
 	CHIP_VEGA20,
 	CHIP_RAVEN,
 	CHIP_ARCTURUS,
+	CHIP_RENOIR,
 	CHIP_NAVI10,
 	CHIP_NAVI14,
 	CHIP_NAVI12,
-- 
cgit v1.2.3


From ac5656d8a4cdd93cd2c74355ed12e5617817e0e7 Mon Sep 17 00:00:00 2001
From: Aaron Goidel <acgoide@tycho.nsa.gov>
Date: Mon, 12 Aug 2019 11:20:00 -0400
Subject: fanotify, inotify, dnotify, security: add security hook for fs
 notifications

As of now, setting watches on filesystem objects has, at most, applied a
check for read access to the inode, and in the case of fanotify, requires
CAP_SYS_ADMIN. No specific security hook or permission check has been
provided to control the setting of watches. Using any of inotify, dnotify,
or fanotify, it is possible to observe, not only write-like operations, but
even read access to a file. Modeling the watch as being merely a read from
the file is insufficient for the needs of SELinux. This is due to the fact
that read access should not necessarily imply access to information about
when another process reads from a file. Furthermore, fanotify watches grant
more power to an application in the form of permission events. While
notification events are solely, unidirectional (i.e. they only pass
information to the receiving application), permission events are blocking.
Permission events make a request to the receiving application which will
then reply with a decision as to whether or not that action may be
completed. This causes the issue of the watching application having the
ability to exercise control over the triggering process. Without drawing a
distinction within the permission check, the ability to read would imply
the greater ability to control an application. Additionally, mount and
superblock watches apply to all files within the same mount or superblock.
Read access to one file should not necessarily imply the ability to watch
all files accessed within a given mount or superblock.

In order to solve these issues, a new LSM hook is implemented and has been
placed within the system calls for marking filesystem objects with inotify,
fanotify, and dnotify watches. These calls to the hook are placed at the
point at which the target path has been resolved and are provided with the
path struct, the mask of requested notification events, and the type of
object on which the mark is being set (inode, superblock, or mount). The
mask and obj_type have already been translated into common FS_* values
shared by the entirety of the fs notification infrastructure. The path
struct is passed rather than just the inode so that the mount is available,
particularly for mount watches. This also allows for use of the hook by
pathname-based security modules. However, since the hook is intended for
use even by inode based security modules, it is not placed under the
CONFIG_SECURITY_PATH conditional. Otherwise, the inode-based security
modules would need to enable all of the path hooks, even though they do not
use any of them.

This only provides a hook at the point of setting a watch, and presumes
that permission to set a particular watch implies the ability to receive
all notification about that object which match the mask. This is all that
is required for SELinux. If other security modules require additional hooks
or infrastructure to control delivery of notification, these can be added
by them. It does not make sense for us to propose hooks for which we have
no implementation. The understanding that all notifications received by the
requesting application are all strictly of a type for which the application
has been granted permission shows that this implementation is sufficient in
its coverage.

Security modules wishing to provide complete control over fanotify must
also implement a security_file_open hook that validates that the access
requested by the watching application is authorized. Fanotify has the issue
that it returns a file descriptor with the file mode specified during
fanotify_init() to the watching process on event. This is already covered
by the LSM security_file_open hook if the security module implements
checking of the requested file mode there. Otherwise, a watching process
can obtain escalated access to a file for which it has not been authorized.

The selinux_path_notify hook implementation works by adding five new file
permissions: watch, watch_mount, watch_sb, watch_reads, and watch_with_perm
(descriptions about which will follow), and one new filesystem permission:
watch (which is applied to superblock checks). The hook then decides which
subset of these permissions must be held by the requesting application
based on the contents of the provided mask and the obj_type. The
selinux_file_open hook already checks the requested file mode and therefore
ensures that a watching process cannot escalate its access through
fanotify.

The watch, watch_mount, and watch_sb permissions are the baseline
permissions for setting a watch on an object and each are a requirement for
any watch to be set on a file, mount, or superblock respectively. It should
be noted that having either of the other two permissions (watch_reads and
watch_with_perm) does not imply the watch, watch_mount, or watch_sb
permission. Superblock watches further require the filesystem watch
permission to the superblock. As there is no labeled object in view for
mounts, there is no specific check for mount watches beyond watch_mount to
the inode. Such a check could be added in the future, if a suitable labeled
object existed representing the mount.

The watch_reads permission is required to receive notifications from
read-exclusive events on filesystem objects. These events include accessing
a file for the purpose of reading and closing a file which has been opened
read-only. This distinction has been drawn in order to provide a direct
indication in the policy for this otherwise not obvious capability. Read
access to a file should not necessarily imply the ability to observe read
events on a file.

Finally, watch_with_perm only applies to fanotify masks since it is the
only way to set a mask which allows for the blocking, permission event.
This permission is needed for any watch which is of this type. Though
fanotify requires CAP_SYS_ADMIN, this is insufficient as it gives implicit
trust to root, which we do not do, and does not support least privilege.

Signed-off-by: Aaron Goidel <acgoide@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/notify/dnotify/dnotify.c         | 15 +++++++++---
 fs/notify/fanotify/fanotify_user.c  | 19 +++++++++++++--
 fs/notify/inotify/inotify_user.c    | 14 +++++++++--
 include/linux/lsm_hooks.h           |  9 ++++++-
 include/linux/security.h            | 10 ++++++--
 security/security.c                 |  6 +++++
 security/selinux/hooks.c            | 47 +++++++++++++++++++++++++++++++++++++
 security/selinux/include/classmap.h |  5 ++--
 8 files changed, 113 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 250369d6901d..87a7f61bc91c 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -22,6 +22,7 @@
 #include <linux/sched/signal.h>
 #include <linux/dnotify.h>
 #include <linux/init.h>
+#include <linux/security.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/fdtable.h>
@@ -288,6 +289,17 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 		goto out_err;
 	}
 
+	/*
+	 * convert the userspace DN_* "arg" to the internal FS_*
+	 * defined in fsnotify
+	 */
+	mask = convert_arg(arg);
+
+	error = security_path_notify(&filp->f_path, mask,
+			FSNOTIFY_OBJ_TYPE_INODE);
+	if (error)
+		goto out_err;
+
 	/* expect most fcntl to add new rather than augment old */
 	dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
 	if (!dn) {
@@ -302,9 +314,6 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 		goto out_err;
 	}
 
-	/* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
-	mask = convert_arg(arg);
-
 	/* set up the new_fsn_mark and new_dn_mark */
 	new_fsn_mark = &new_dn_mark->fsn_mark;
 	fsnotify_init_mark(new_fsn_mark, dnotify_group);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index a90bb19dcfa2..8b4e2ad6d208 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -528,7 +528,8 @@ static const struct file_operations fanotify_fops = {
 };
 
 static int fanotify_find_path(int dfd, const char __user *filename,
-			      struct path *path, unsigned int flags)
+			      struct path *path, unsigned int flags, __u64 mask,
+			      unsigned int obj_type)
 {
 	int ret;
 
@@ -567,8 +568,15 @@ static int fanotify_find_path(int dfd, const char __user *filename,
 
 	/* you can only watch an inode if you have read permissions on it */
 	ret = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (ret) {
+		path_put(path);
+		goto out;
+	}
+
+	ret = security_path_notify(path, mask, obj_type);
 	if (ret)
 		path_put(path);
+
 out:
 	return ret;
 }
@@ -931,6 +939,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	__kernel_fsid_t __fsid, *fsid = NULL;
 	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+	unsigned int obj_type;
 	int ret;
 
 	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -945,8 +954,13 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 
 	switch (mark_type) {
 	case FAN_MARK_INODE:
+		obj_type = FSNOTIFY_OBJ_TYPE_INODE;
+		break;
 	case FAN_MARK_MOUNT:
+		obj_type = FSNOTIFY_OBJ_TYPE_VFSMOUNT;
+		break;
 	case FAN_MARK_FILESYSTEM:
+		obj_type = FSNOTIFY_OBJ_TYPE_SB;
 		break;
 	default:
 		return -EINVAL;
@@ -1014,7 +1028,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		goto fput_and_out;
 	}
 
-	ret = fanotify_find_path(dfd, pathname, &path, flags);
+	ret = fanotify_find_path(dfd, pathname, &path, flags,
+			(mask & ALL_FSNOTIFY_EVENTS), obj_type);
 	if (ret)
 		goto fput_and_out;
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 7b53598c8804..408e9917ed42 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -39,6 +39,7 @@
 #include <linux/poll.h>
 #include <linux/wait.h>
 #include <linux/memcontrol.h>
+#include <linux/security.h>
 
 #include "inotify.h"
 #include "../fdinfo.h"
@@ -342,7 +343,8 @@ static const struct file_operations inotify_fops = {
 /*
  * find_inode - resolve a user-given path to a specific inode
  */
-static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
+static int inotify_find_inode(const char __user *dirname, struct path *path,
+						unsigned int flags, __u64 mask)
 {
 	int error;
 
@@ -351,8 +353,15 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
 	error = inode_permission(path->dentry->d_inode, MAY_READ);
+	if (error) {
+		path_put(path);
+		return error;
+	}
+	error = security_path_notify(path, mask,
+				FSNOTIFY_OBJ_TYPE_INODE);
 	if (error)
 		path_put(path);
+
 	return error;
 }
 
@@ -744,7 +753,8 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
 	if (mask & IN_ONLYDIR)
 		flags |= LOOKUP_DIRECTORY;
 
-	ret = inotify_find_inode(pathname, &path, flags);
+	ret = inotify_find_inode(pathname, &path, flags,
+			(mask & IN_ALL_EVENTS));
 	if (ret)
 		goto fput_and_out;
 
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 47f58cfb6a19..ead98af9c602 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -339,6 +339,9 @@
  *	Check for permission to change root directory.
  *	@path contains the path structure.
  *	Return 0 if permission is granted.
+ * @path_notify:
+ *	Check permissions before setting a watch on events as defined by @mask,
+ *	on an object at @path, whose type is defined by @obj_type.
  * @inode_readlink:
  *	Check the permission to read the symbolic link.
  *	@dentry contains the dentry structure for the file link.
@@ -1535,7 +1538,9 @@ union security_list_options {
 	int (*path_chown)(const struct path *path, kuid_t uid, kgid_t gid);
 	int (*path_chroot)(const struct path *path);
 #endif
-
+	/* Needed for inode based security check */
+	int (*path_notify)(const struct path *path, u64 mask,
+				unsigned int obj_type);
 	int (*inode_alloc_security)(struct inode *inode);
 	void (*inode_free_security)(struct inode *inode);
 	int (*inode_init_security)(struct inode *inode, struct inode *dir,
@@ -1860,6 +1865,8 @@ struct security_hook_heads {
 	struct hlist_head path_chown;
 	struct hlist_head path_chroot;
 #endif
+	/* Needed for inode based modules as well */
+	struct hlist_head path_notify;
 	struct hlist_head inode_alloc_security;
 	struct hlist_head inode_free_security;
 	struct hlist_head inode_init_security;
diff --git a/include/linux/security.h b/include/linux/security.h
index 659071c2e57c..7d9c1da1f659 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -259,7 +259,8 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode,
 					struct qstr *name,
 					const struct cred *old,
 					struct cred *new);
-
+int security_path_notify(const struct path *path, u64 mask,
+					unsigned int obj_type);
 int security_inode_alloc(struct inode *inode);
 void security_inode_free(struct inode *inode);
 int security_inode_init_security(struct inode *inode, struct inode *dir,
@@ -387,7 +388,6 @@ int security_ismaclabel(const char *name);
 int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
-
 void security_inode_invalidate_secctx(struct inode *inode);
 int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
 int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
@@ -621,6 +621,12 @@ static inline int security_move_mount(const struct path *from_path,
 	return 0;
 }
 
+static inline int security_path_notify(const struct path *path, u64 mask,
+				unsigned int obj_type)
+{
+	return 0;
+}
+
 static inline int security_inode_alloc(struct inode *inode)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index 613a5c00e602..30687e1366b7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -871,6 +871,12 @@ int security_move_mount(const struct path *from_path, const struct path *to_path
 	return call_int_hook(move_mount, 0, from_path, to_path);
 }
 
+int security_path_notify(const struct path *path, u64 mask,
+				unsigned int obj_type)
+{
+	return call_int_hook(path_notify, 0, path, mask, obj_type);
+}
+
 int security_inode_alloc(struct inode *inode)
 {
 	int rc = lsm_inode_alloc(inode);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f77b314d0575..d55571c585ff 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -92,6 +92,8 @@
 #include <linux/kernfs.h>
 #include <linux/stringhash.h>	/* for hashlen_string() */
 #include <uapi/linux/mount.h>
+#include <linux/fsnotify.h>
+#include <linux/fanotify.h>
 
 #include "avc.h"
 #include "objsec.h"
@@ -3261,6 +3263,50 @@ static int selinux_inode_removexattr(struct dentry *dentry, const char *name)
 	return -EACCES;
 }
 
+static int selinux_path_notify(const struct path *path, u64 mask,
+						unsigned int obj_type)
+{
+	int ret;
+	u32 perm;
+
+	struct common_audit_data ad;
+
+	ad.type = LSM_AUDIT_DATA_PATH;
+	ad.u.path = *path;
+
+	/*
+	 * Set permission needed based on the type of mark being set.
+	 * Performs an additional check for sb watches.
+	 */
+	switch (obj_type) {
+	case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
+		perm = FILE__WATCH_MOUNT;
+		break;
+	case FSNOTIFY_OBJ_TYPE_SB:
+		perm = FILE__WATCH_SB;
+		ret = superblock_has_perm(current_cred(), path->dentry->d_sb,
+						FILESYSTEM__WATCH, &ad);
+		if (ret)
+			return ret;
+		break;
+	case FSNOTIFY_OBJ_TYPE_INODE:
+		perm = FILE__WATCH;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* blocking watches require the file:watch_with_perm permission */
+	if (mask & (ALL_FSNOTIFY_PERM_EVENTS))
+		perm |= FILE__WATCH_WITH_PERM;
+
+	/* watches on read-like events need the file:watch_reads permission */
+	if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_CLOSE_NOWRITE))
+		perm |= FILE__WATCH_READS;
+
+	return path_has_perm(current_cred(), path, perm);
+}
+
 /*
  * Copy the inode security context value to the user.
  *
@@ -6798,6 +6844,7 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(inode_getsecid, selinux_inode_getsecid),
 	LSM_HOOK_INIT(inode_copy_up, selinux_inode_copy_up),
 	LSM_HOOK_INIT(inode_copy_up_xattr, selinux_inode_copy_up_xattr),
+	LSM_HOOK_INIT(path_notify, selinux_path_notify),
 
 	LSM_HOOK_INIT(kernfs_init_security, selinux_kernfs_init_security),
 
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 201f7e588a29..32e9b03be3dd 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -7,7 +7,8 @@
 
 #define COMMON_FILE_PERMS COMMON_FILE_SOCK_PERMS, "unlink", "link", \
     "rename", "execute", "quotaon", "mounton", "audit_access", \
-    "open", "execmod"
+	"open", "execmod", "watch", "watch_mount", "watch_sb", \
+	"watch_with_perm", "watch_reads"
 
 #define COMMON_SOCK_PERMS COMMON_FILE_SOCK_PERMS, "bind", "connect", \
     "listen", "accept", "getopt", "setopt", "shutdown", "recvfrom",  \
@@ -60,7 +61,7 @@ struct security_class_mapping secclass_map[] = {
 	{ "filesystem",
 	  { "mount", "remount", "unmount", "getattr",
 	    "relabelfrom", "relabelto", "associate", "quotamod",
-	    "quotaget", NULL } },
+	    "quotaget", "watch", NULL } },
 	{ "file",
 	  { COMMON_FILE_PERMS,
 	    "execute_no_trans", "entrypoint", NULL } },
-- 
cgit v1.2.3


From 3cb5f3aeaf07215ba86f0ed45f084a9056968dd5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 8 Aug 2019 20:02:08 -0700
Subject: scsi: qla2xxx: Modify NVMe include directives

Since struct sg_table is used in nvme-fc-driver.h, include
<linux/scatterlist.h> from that header file.

Since no definitions or declarations from <linux/blk-mq.h> are used in the
qla_nvme.h header file, do not include <linux/blk-mq.h> from that header
file.

Cc: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Himanshu Madhani <hmadhani@marvell.com>
Reviewed-by: Himanshu Madhani <hmadhani@marvell.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_nvme.h | 1 -
 include/linux/nvme-fc-driver.h  | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/scsi/qla2xxx/qla_nvme.h b/drivers/scsi/qla2xxx/qla_nvme.h
index 25a2b82d5095..ef912902d4e5 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.h
+++ b/drivers/scsi/qla2xxx/qla_nvme.h
@@ -7,7 +7,6 @@
 #ifndef __QLA_NVME_H
 #define __QLA_NVME_H
 
-#include <linux/blk-mq.h>
 #include <uapi/scsi/fc/fc_fs.h>
 #include <uapi/scsi/fc/fc_els.h>
 #include <linux/nvme-fc-driver.h>
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 98d904961b33..10f81629b9ce 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -6,6 +6,8 @@
 #ifndef _NVME_FC_DRIVER_H
 #define _NVME_FC_DRIVER_H 1
 
+#include <linux/scatterlist.h>
+
 
 /*
  * **********************  LLDD FC-NVME Host API ********************
-- 
cgit v1.2.3


From 7af0ab0d3aab951518b0d520f95e9f6b1995ec69 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:43 -0700
Subject: fs, fscrypt: move uapi definitions to new header <linux/fscrypt.h>

More fscrypt definitions are being added, and we shouldn't use a
disproportionate amount of space in <linux/fs.h> for fscrypt stuff.
So move the fscrypt definitions to a new header <linux/fscrypt.h>.

For source compatibility with existing userspace programs, <linux/fs.h>
still includes the new header.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 MAINTAINERS                  |  1 +
 include/linux/fscrypt.h      |  1 +
 include/uapi/linux/fs.h      | 54 +++------------------------------------
 include/uapi/linux/fscrypt.h | 61 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 51 deletions(-)
 create mode 100644 include/uapi/linux/fscrypt.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index a2c343ee3b2c..714ffdaaa920 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6603,6 +6603,7 @@ T:	git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git
 S:	Supported
 F:	fs/crypto/
 F:	include/linux/fscrypt*.h
+F:	include/uapi/linux/fscrypt.h
 F:	Documentation/filesystems/fscrypt.rst
 
 FSI SUBSYSTEM
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index bd8f207a2fb6..81c0c754f8b2 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <uapi/linux/fscrypt.h>
 
 #define FS_CRYPTO_BLOCK_SIZE		16
 
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 59c71fa8c553..41bd84d25a98 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -13,6 +13,9 @@
 #include <linux/limits.h>
 #include <linux/ioctl.h>
 #include <linux/types.h>
+#ifndef __KERNEL__
+#include <linux/fscrypt.h>
+#endif
 
 /* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
 #if !defined(__KERNEL__)
@@ -212,57 +215,6 @@ struct fsxattr {
 #define FS_IOC_GETFSLABEL		_IOR(0x94, 49, char[FSLABEL_MAX])
 #define FS_IOC_SETFSLABEL		_IOW(0x94, 50, char[FSLABEL_MAX])
 
-/*
- * File system encryption support
- */
-/* Policy provided via an ioctl on the topmost directory */
-#define FS_KEY_DESCRIPTOR_SIZE	8
-
-#define FS_POLICY_FLAGS_PAD_4		0x00
-#define FS_POLICY_FLAGS_PAD_8		0x01
-#define FS_POLICY_FLAGS_PAD_16		0x02
-#define FS_POLICY_FLAGS_PAD_32		0x03
-#define FS_POLICY_FLAGS_PAD_MASK	0x03
-#define FS_POLICY_FLAG_DIRECT_KEY	0x04	/* use master key directly */
-#define FS_POLICY_FLAGS_VALID		0x07
-
-/* Encryption algorithms */
-#define FS_ENCRYPTION_MODE_INVALID		0
-#define FS_ENCRYPTION_MODE_AES_256_XTS		1
-#define FS_ENCRYPTION_MODE_AES_256_GCM		2
-#define FS_ENCRYPTION_MODE_AES_256_CBC		3
-#define FS_ENCRYPTION_MODE_AES_256_CTS		4
-#define FS_ENCRYPTION_MODE_AES_128_CBC		5
-#define FS_ENCRYPTION_MODE_AES_128_CTS		6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_ADIANTUM		9
-
-struct fscrypt_policy {
-	__u8 version;
-	__u8 contents_encryption_mode;
-	__u8 filenames_encryption_mode;
-	__u8 flags;
-	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
-};
-
-#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
-#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
-
-/* Parameters for passing an encryption key into the kernel keyring */
-#define FS_KEY_DESC_PREFIX		"fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE		8
-
-/* Structure that userspace passes to the kernel keyring */
-#define FS_MAX_KEY_SIZE			64
-
-struct fscrypt_key {
-	__u32 mode;
-	__u8 raw[FS_MAX_KEY_SIZE];
-	__u32 size;
-};
-
 /*
  * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
  *
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
new file mode 100644
index 000000000000..26f6d2c19afd
--- /dev/null
+++ b/include/uapi/linux/fscrypt.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * fscrypt user API
+ *
+ * These ioctls can be used on filesystems that support fscrypt.  See the
+ * "User API" section of Documentation/filesystems/fscrypt.rst.
+ */
+#ifndef _UAPI_LINUX_FSCRYPT_H
+#define _UAPI_LINUX_FSCRYPT_H
+
+#include <linux/types.h>
+
+#define FS_KEY_DESCRIPTOR_SIZE	8
+
+/* Encryption policy flags */
+#define FS_POLICY_FLAGS_PAD_4		0x00
+#define FS_POLICY_FLAGS_PAD_8		0x01
+#define FS_POLICY_FLAGS_PAD_16		0x02
+#define FS_POLICY_FLAGS_PAD_32		0x03
+#define FS_POLICY_FLAGS_PAD_MASK	0x03
+#define FS_POLICY_FLAG_DIRECT_KEY	0x04	/* use master key directly */
+#define FS_POLICY_FLAGS_VALID		0x07
+
+/* Encryption algorithms */
+#define FS_ENCRYPTION_MODE_INVALID		0
+#define FS_ENCRYPTION_MODE_AES_256_XTS		1
+#define FS_ENCRYPTION_MODE_AES_256_GCM		2
+#define FS_ENCRYPTION_MODE_AES_256_CBC		3
+#define FS_ENCRYPTION_MODE_AES_256_CTS		4
+#define FS_ENCRYPTION_MODE_AES_128_CBC		5
+#define FS_ENCRYPTION_MODE_AES_128_CTS		6
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
+#define FS_ENCRYPTION_MODE_ADIANTUM		9
+
+struct fscrypt_policy {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
+
+/* Parameters for passing an encryption key into the kernel keyring */
+#define FS_KEY_DESC_PREFIX		"fscrypt:"
+#define FS_KEY_DESC_PREFIX_SIZE		8
+
+/* Structure that userspace passes to the kernel keyring */
+#define FS_MAX_KEY_SIZE			64
+
+struct fscrypt_key {
+	__u32 mode;
+	__u8 raw[FS_MAX_KEY_SIZE];
+	__u32 size;
+};
+
+#endif /* _UAPI_LINUX_FSCRYPT_H */
-- 
cgit v1.2.3


From 2336d0deb2d4680349de59d6fbdfc338437be191 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:44 -0700
Subject: fscrypt: use FSCRYPT_ prefix for uapi constants

Prefix all filesystem encryption UAPI constants except the ioctl numbers
with "FSCRYPT_" rather than with "FS_".  This namespaces the constants
more appropriately and makes it clear that they are related specifically
to the filesystem encryption feature, and to the 'fscrypt_*' structures.
With some of the old names like "FS_POLICY_FLAGS_VALID", it was not
immediately clear that the constant had anything to do with encryption.

This is also useful because we'll be adding more encryption-related
constants, e.g. for the policy version, and we'd otherwise have to
choose whether to use unclear names like FS_POLICY_V1 or inconsistent
names like FS_ENCRYPTION_POLICY_V1.

For source compatibility with existing userspace programs, keep the old
names defined as aliases to the new names.

Finally, as long as new names are being defined anyway, I skipped
defining new names for the fscrypt mode numbers that aren't actually
used: INVALID (0), AES_256_GCM (2), AES_256_CBC (3), SPECK128_256_XTS
(7), and SPECK128_256_CTS (8).

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 Documentation/filesystems/fscrypt.rst | 40 ++++++++++-----------
 include/uapi/linux/fscrypt.h          | 65 ++++++++++++++++++++++-------------
 2 files changed, 62 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 82efa41b0e6c..d60b885c4024 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -225,9 +225,10 @@ a little endian number, except that:
   is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
   of the file's data encryption key.
 
-- In the "direct key" configuration (FS_POLICY_FLAG_DIRECT_KEY set in
-  the fscrypt_policy), the file's nonce is also appended to the IV.
-  Currently this is only allowed with the Adiantum encryption mode.
+- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
+  set in the fscrypt_policy), the file's nonce is also appended to the
+  IV.  Currently this is only allowed with the Adiantum encryption
+  mode.
 
 Filenames encryption
 --------------------
@@ -274,14 +275,14 @@ empty directory or verifies that a directory or regular file already
 has the specified encryption policy.  It takes in a pointer to a
 :c:type:`struct fscrypt_policy`, defined as follows::
 
-    #define FS_KEY_DESCRIPTOR_SIZE  8
+    #define FSCRYPT_KEY_DESCRIPTOR_SIZE  8
 
     struct fscrypt_policy {
             __u8 version;
             __u8 contents_encryption_mode;
             __u8 filenames_encryption_mode;
             __u8 flags;
-            __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+            __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
     };
 
 This structure must be initialized as follows:
@@ -289,19 +290,18 @@ This structure must be initialized as follows:
 - ``version`` must be 0.
 
 - ``contents_encryption_mode`` and ``filenames_encryption_mode`` must
-  be set to constants from ``<linux/fs.h>`` which identify the
-  encryption modes to use.  If unsure, use
-  FS_ENCRYPTION_MODE_AES_256_XTS (1) for ``contents_encryption_mode``
-  and FS_ENCRYPTION_MODE_AES_256_CTS (4) for
-  ``filenames_encryption_mode``.
+  be set to constants from ``<linux/fscrypt.h>`` which identify the
+  encryption modes to use.  If unsure, use FSCRYPT_MODE_AES_256_XTS
+  (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
+  (4) for ``filenames_encryption_mode``.
 
-- ``flags`` must contain a value from ``<linux/fs.h>`` which
+- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
   identifies the amount of NUL-padding to use when encrypting
-  filenames.  If unsure, use FS_POLICY_FLAGS_PAD_32 (0x3).
-  In addition, if the chosen encryption modes are both
-  FS_ENCRYPTION_MODE_ADIANTUM, this can contain
-  FS_POLICY_FLAG_DIRECT_KEY to specify that the master key should be
-  used directly, without key derivation.
+  filenames.  If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).  In
+  addition, if the chosen encryption modes are both
+  FSCRYPT_MODE_ADIANTUM, this can contain
+  FSCRYPT_POLICY_FLAG_DIRECT_KEY to specify that the master key should
+  be used directly, without key derivation.
 
 - ``master_key_descriptor`` specifies how to find the master key in
   the keyring; see `Adding keys`_.  It is up to userspace to choose a
@@ -401,11 +401,11 @@ followed by the 16-character lower case hex representation of the
 ``master_key_descriptor`` that was set in the encryption policy.  The
 key payload must conform to the following structure::
 
-    #define FS_MAX_KEY_SIZE 64
+    #define FSCRYPT_MAX_KEY_SIZE 64
 
     struct fscrypt_key {
             u32 mode;
-            u8 raw[FS_MAX_KEY_SIZE];
+            u8 raw[FSCRYPT_MAX_KEY_SIZE];
             u32 size;
     };
 
@@ -574,7 +574,7 @@ much confusion if an encryption policy were to be added to or removed
 from anything other than an empty directory.)  The struct is defined
 as follows::
 
-    #define FS_KEY_DESCRIPTOR_SIZE  8
+    #define FSCRYPT_KEY_DESCRIPTOR_SIZE  8
     #define FS_KEY_DERIVATION_NONCE_SIZE 16
 
     struct fscrypt_context {
@@ -582,7 +582,7 @@ as follows::
             u8 contents_encryption_mode;
             u8 filenames_encryption_mode;
             u8 flags;
-            u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+            u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
             u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
     };
 
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 26f6d2c19afd..674b0452ef57 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -10,35 +10,30 @@
 
 #include <linux/types.h>
 
-#define FS_KEY_DESCRIPTOR_SIZE	8
+#define FSCRYPT_KEY_DESCRIPTOR_SIZE	8
 
 /* Encryption policy flags */
-#define FS_POLICY_FLAGS_PAD_4		0x00
-#define FS_POLICY_FLAGS_PAD_8		0x01
-#define FS_POLICY_FLAGS_PAD_16		0x02
-#define FS_POLICY_FLAGS_PAD_32		0x03
-#define FS_POLICY_FLAGS_PAD_MASK	0x03
-#define FS_POLICY_FLAG_DIRECT_KEY	0x04	/* use master key directly */
-#define FS_POLICY_FLAGS_VALID		0x07
+#define FSCRYPT_POLICY_FLAGS_PAD_4		0x00
+#define FSCRYPT_POLICY_FLAGS_PAD_8		0x01
+#define FSCRYPT_POLICY_FLAGS_PAD_16		0x02
+#define FSCRYPT_POLICY_FLAGS_PAD_32		0x03
+#define FSCRYPT_POLICY_FLAGS_PAD_MASK		0x03
+#define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04	/* use master key directly */
+#define FSCRYPT_POLICY_FLAGS_VALID		0x07
 
 /* Encryption algorithms */
-#define FS_ENCRYPTION_MODE_INVALID		0
-#define FS_ENCRYPTION_MODE_AES_256_XTS		1
-#define FS_ENCRYPTION_MODE_AES_256_GCM		2
-#define FS_ENCRYPTION_MODE_AES_256_CBC		3
-#define FS_ENCRYPTION_MODE_AES_256_CTS		4
-#define FS_ENCRYPTION_MODE_AES_128_CBC		5
-#define FS_ENCRYPTION_MODE_AES_128_CTS		6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_ADIANTUM		9
+#define FSCRYPT_MODE_AES_256_XTS		1
+#define FSCRYPT_MODE_AES_256_CTS		4
+#define FSCRYPT_MODE_AES_128_CBC		5
+#define FSCRYPT_MODE_AES_128_CTS		6
+#define FSCRYPT_MODE_ADIANTUM			9
 
 struct fscrypt_policy {
 	__u8 version;
 	__u8 contents_encryption_mode;
 	__u8 filenames_encryption_mode;
 	__u8 flags;
-	__u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 };
 
 #define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
@@ -46,16 +41,40 @@ struct fscrypt_policy {
 #define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
 
 /* Parameters for passing an encryption key into the kernel keyring */
-#define FS_KEY_DESC_PREFIX		"fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE		8
+#define FSCRYPT_KEY_DESC_PREFIX		"fscrypt:"
+#define FSCRYPT_KEY_DESC_PREFIX_SIZE		8
 
 /* Structure that userspace passes to the kernel keyring */
-#define FS_MAX_KEY_SIZE			64
+#define FSCRYPT_MAX_KEY_SIZE			64
 
 struct fscrypt_key {
 	__u32 mode;
-	__u8 raw[FS_MAX_KEY_SIZE];
+	__u8 raw[FSCRYPT_MAX_KEY_SIZE];
 	__u32 size;
 };
+/**********************************************************************/
+
+/* old names; don't add anything new here! */
+#define FS_KEY_DESCRIPTOR_SIZE		FSCRYPT_KEY_DESCRIPTOR_SIZE
+#define FS_POLICY_FLAGS_PAD_4		FSCRYPT_POLICY_FLAGS_PAD_4
+#define FS_POLICY_FLAGS_PAD_8		FSCRYPT_POLICY_FLAGS_PAD_8
+#define FS_POLICY_FLAGS_PAD_16		FSCRYPT_POLICY_FLAGS_PAD_16
+#define FS_POLICY_FLAGS_PAD_32		FSCRYPT_POLICY_FLAGS_PAD_32
+#define FS_POLICY_FLAGS_PAD_MASK	FSCRYPT_POLICY_FLAGS_PAD_MASK
+#define FS_POLICY_FLAG_DIRECT_KEY	FSCRYPT_POLICY_FLAG_DIRECT_KEY
+#define FS_POLICY_FLAGS_VALID		FSCRYPT_POLICY_FLAGS_VALID
+#define FS_ENCRYPTION_MODE_INVALID	0	/* never used */
+#define FS_ENCRYPTION_MODE_AES_256_XTS	FSCRYPT_MODE_AES_256_XTS
+#define FS_ENCRYPTION_MODE_AES_256_GCM	2	/* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CBC	3	/* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CTS	FSCRYPT_MODE_AES_256_CTS
+#define FS_ENCRYPTION_MODE_AES_128_CBC	FSCRYPT_MODE_AES_128_CBC
+#define FS_ENCRYPTION_MODE_AES_128_CTS	FSCRYPT_MODE_AES_128_CTS
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS	7	/* removed */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS	8	/* removed */
+#define FS_ENCRYPTION_MODE_ADIANTUM	FSCRYPT_MODE_ADIANTUM
+#define FS_KEY_DESC_PREFIX		FSCRYPT_KEY_DESC_PREFIX
+#define FS_KEY_DESC_PREFIX_SIZE		FSCRYPT_KEY_DESC_PREFIX_SIZE
+#define FS_MAX_KEY_SIZE			FSCRYPT_MAX_KEY_SIZE
 
 #endif /* _UAPI_LINUX_FSCRYPT_H */
-- 
cgit v1.2.3


From 3b6df59bc4d242ac5847592de55d1ff327cd4549 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:44 -0700
Subject: fscrypt: use FSCRYPT_* definitions, not FS_*

Update fs/crypto/ to use the new names for the UAPI constants rather
than the old names, then make the old definitions conditional on
!__KERNEL__.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/crypto.c           |  2 +-
 fs/crypto/fname.c            |  2 +-
 fs/crypto/fscrypt_private.h  | 16 ++++++-------
 fs/crypto/keyinfo.c          | 53 ++++++++++++++++++++++----------------------
 fs/crypto/policy.c           | 14 ++++++------
 include/uapi/linux/fscrypt.h |  2 ++
 6 files changed, 46 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 3e4624cfe4b5..7502c1f0ede9 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -141,7 +141,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 	memset(iv, 0, ci->ci_mode->ivsize);
 	iv->lblk_num = cpu_to_le64(lblk_num);
 
-	if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY)
+	if (ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
 		memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 
 	if (ci->ci_essiv_tfm != NULL)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 5cab3bb2d1fc..f4977d44d69b 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -182,7 +182,7 @@ bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
 				  u32 max_len, u32 *encrypted_len_ret)
 {
 	int padding = 4 << (inode->i_crypt_info->ci_flags &
-			    FS_POLICY_FLAGS_PAD_MASK);
+			    FSCRYPT_POLICY_FLAGS_PAD_MASK);
 	u32 encrypted_len;
 
 	if (orig_len > max_len)
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4d715708c6e1..fae411b2f78d 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -34,7 +34,7 @@ struct fscrypt_context {
 	u8 contents_encryption_mode;
 	u8 filenames_encryption_mode;
 	u8 flags;
-	u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
 } __packed;
 
@@ -84,7 +84,7 @@ struct fscrypt_info {
 	u8 ci_data_mode;
 	u8 ci_filename_mode;
 	u8 ci_flags;
-	u8 ci_master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
+	u8 ci_master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 	u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
 };
 
@@ -98,16 +98,16 @@ typedef enum {
 static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
 					   u32 filenames_mode)
 {
-	if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC &&
-	    filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS)
+	if (contents_mode == FSCRYPT_MODE_AES_128_CBC &&
+	    filenames_mode == FSCRYPT_MODE_AES_128_CTS)
 		return true;
 
-	if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS &&
-	    filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS)
+	if (contents_mode == FSCRYPT_MODE_AES_256_XTS &&
+	    filenames_mode == FSCRYPT_MODE_AES_256_CTS)
 		return true;
 
-	if (contents_mode == FS_ENCRYPTION_MODE_ADIANTUM &&
-	    filenames_mode == FS_ENCRYPTION_MODE_ADIANTUM)
+	if (contents_mode == FSCRYPT_MODE_ADIANTUM &&
+	    filenames_mode == FSCRYPT_MODE_ADIANTUM)
 		return true;
 
 	return false;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
index 212994300233..22345ddede11 100644
--- a/fs/crypto/keyinfo.c
+++ b/fs/crypto/keyinfo.c
@@ -20,7 +20,7 @@
 
 static struct crypto_shash *essiv_hash_tfm;
 
-/* Table of keys referenced by FS_POLICY_FLAG_DIRECT_KEY policies */
+/* Table of keys referenced by DIRECT_KEY policies */
 static DEFINE_HASHTABLE(fscrypt_master_keys, 6); /* 6 bits = 64 buckets */
 static DEFINE_SPINLOCK(fscrypt_master_keys_lock);
 
@@ -77,7 +77,7 @@ out:
  */
 static struct key *
 find_and_lock_process_key(const char *prefix,
-			  const u8 descriptor[FS_KEY_DESCRIPTOR_SIZE],
+			  const u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE],
 			  unsigned int min_keysize,
 			  const struct fscrypt_key **payload_ret)
 {
@@ -87,7 +87,7 @@ find_and_lock_process_key(const char *prefix,
 	const struct fscrypt_key *payload;
 
 	description = kasprintf(GFP_NOFS, "%s%*phN", prefix,
-				FS_KEY_DESCRIPTOR_SIZE, descriptor);
+				FSCRYPT_KEY_DESCRIPTOR_SIZE, descriptor);
 	if (!description)
 		return ERR_PTR(-ENOMEM);
 
@@ -105,7 +105,7 @@ find_and_lock_process_key(const char *prefix,
 	payload = (const struct fscrypt_key *)ukp->data;
 
 	if (ukp->datalen != sizeof(struct fscrypt_key) ||
-	    payload->size < 1 || payload->size > FS_MAX_KEY_SIZE) {
+	    payload->size < 1 || payload->size > FSCRYPT_MAX_KEY_SIZE) {
 		fscrypt_warn(NULL,
 			     "key with description '%s' has invalid payload",
 			     key->description);
@@ -129,32 +129,32 @@ invalid:
 }
 
 static struct fscrypt_mode available_modes[] = {
-	[FS_ENCRYPTION_MODE_AES_256_XTS] = {
+	[FSCRYPT_MODE_AES_256_XTS] = {
 		.friendly_name = "AES-256-XTS",
 		.cipher_str = "xts(aes)",
 		.keysize = 64,
 		.ivsize = 16,
 	},
-	[FS_ENCRYPTION_MODE_AES_256_CTS] = {
+	[FSCRYPT_MODE_AES_256_CTS] = {
 		.friendly_name = "AES-256-CTS-CBC",
 		.cipher_str = "cts(cbc(aes))",
 		.keysize = 32,
 		.ivsize = 16,
 	},
-	[FS_ENCRYPTION_MODE_AES_128_CBC] = {
+	[FSCRYPT_MODE_AES_128_CBC] = {
 		.friendly_name = "AES-128-CBC",
 		.cipher_str = "cbc(aes)",
 		.keysize = 16,
 		.ivsize = 16,
 		.needs_essiv = true,
 	},
-	[FS_ENCRYPTION_MODE_AES_128_CTS] = {
+	[FSCRYPT_MODE_AES_128_CTS] = {
 		.friendly_name = "AES-128-CTS-CBC",
 		.cipher_str = "cts(cbc(aes))",
 		.keysize = 16,
 		.ivsize = 16,
 	},
-	[FS_ENCRYPTION_MODE_ADIANTUM] = {
+	[FSCRYPT_MODE_ADIANTUM] = {
 		.friendly_name = "Adiantum",
 		.cipher_str = "adiantum(xchacha12,aes)",
 		.keysize = 32,
@@ -192,7 +192,7 @@ static int find_and_derive_key(const struct inode *inode,
 	const struct fscrypt_key *payload;
 	int err;
 
-	key = find_and_lock_process_key(FS_KEY_DESC_PREFIX,
+	key = find_and_lock_process_key(FSCRYPT_KEY_DESC_PREFIX,
 					ctx->master_key_descriptor,
 					mode->keysize, &payload);
 	if (key == ERR_PTR(-ENOKEY) && inode->i_sb->s_cop->key_prefix) {
@@ -203,7 +203,7 @@ static int find_and_derive_key(const struct inode *inode,
 	if (IS_ERR(key))
 		return PTR_ERR(key);
 
-	if (ctx->flags & FS_POLICY_FLAG_DIRECT_KEY) {
+	if (ctx->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
 		if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) {
 			fscrypt_warn(inode,
 				     "Direct key mode not allowed with %s",
@@ -272,14 +272,14 @@ err_free_tfm:
 	return ERR_PTR(err);
 }
 
-/* Master key referenced by FS_POLICY_FLAG_DIRECT_KEY policy */
+/* Master key referenced by DIRECT_KEY policy */
 struct fscrypt_master_key {
 	struct hlist_node mk_node;
 	refcount_t mk_refcount;
 	const struct fscrypt_mode *mk_mode;
 	struct crypto_skcipher *mk_ctfm;
-	u8 mk_descriptor[FS_KEY_DESCRIPTOR_SIZE];
-	u8 mk_raw[FS_MAX_KEY_SIZE];
+	u8 mk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+	u8 mk_raw[FSCRYPT_MAX_KEY_SIZE];
 };
 
 static void free_master_key(struct fscrypt_master_key *mk)
@@ -320,13 +320,13 @@ find_or_insert_master_key(struct fscrypt_master_key *to_insert,
 	 * raw key, and use crypto_memneq() when comparing raw keys.
 	 */
 
-	BUILD_BUG_ON(sizeof(hash_key) > FS_KEY_DESCRIPTOR_SIZE);
+	BUILD_BUG_ON(sizeof(hash_key) > FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
 
 	spin_lock(&fscrypt_master_keys_lock);
 	hash_for_each_possible(fscrypt_master_keys, mk, mk_node, hash_key) {
 		if (memcmp(ci->ci_master_key_descriptor, mk->mk_descriptor,
-			   FS_KEY_DESCRIPTOR_SIZE) != 0)
+			   FSCRYPT_KEY_DESCRIPTOR_SIZE) != 0)
 			continue;
 		if (mode != mk->mk_mode)
 			continue;
@@ -370,7 +370,7 @@ fscrypt_get_master_key(const struct fscrypt_info *ci, struct fscrypt_mode *mode,
 		goto err_free_mk;
 	}
 	memcpy(mk->mk_descriptor, ci->ci_master_key_descriptor,
-	       FS_KEY_DESCRIPTOR_SIZE);
+	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	memcpy(mk->mk_raw, raw_key, mode->keysize);
 
 	return find_or_insert_master_key(mk, raw_key, mode, ci);
@@ -448,8 +448,8 @@ out:
 
 /*
  * Given the encryption mode and key (normally the derived key, but for
- * FS_POLICY_FLAG_DIRECT_KEY mode it's the master key), set up the inode's
- * symmetric cipher transform object(s).
+ * DIRECT_KEY mode it's the master key), set up the inode's symmetric cipher
+ * transform object(s).
  */
 static int setup_crypto_transform(struct fscrypt_info *ci,
 				  struct fscrypt_mode *mode,
@@ -459,7 +459,7 @@ static int setup_crypto_transform(struct fscrypt_info *ci,
 	struct crypto_skcipher *ctfm;
 	int err;
 
-	if (ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY) {
+	if (ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
 		mk = fscrypt_get_master_key(ci, mode, raw_key, inode);
 		if (IS_ERR(mk))
 			return PTR_ERR(mk);
@@ -476,7 +476,7 @@ static int setup_crypto_transform(struct fscrypt_info *ci,
 	if (mode->needs_essiv) {
 		/* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
 		WARN_ON(mode->ivsize != AES_BLOCK_SIZE);
-		WARN_ON(ci->ci_flags & FS_POLICY_FLAG_DIRECT_KEY);
+		WARN_ON(ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY);
 
 		err = init_essiv_generator(ci, raw_key, mode->keysize);
 		if (err) {
@@ -530,9 +530,10 @@ int fscrypt_get_encryption_info(struct inode *inode)
 		/* Fake up a context for an unencrypted directory */
 		memset(&ctx, 0, sizeof(ctx));
 		ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-		ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
-		ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
-		memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
+		ctx.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
+		memset(ctx.master_key_descriptor, 0x42,
+		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	} else if (res != sizeof(ctx)) {
 		fscrypt_warn(inode,
 			     "Unknown encryption context size (%d bytes)", res);
@@ -545,7 +546,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
 		return -EINVAL;
 	}
 
-	if (ctx.flags & ~FS_POLICY_FLAGS_VALID) {
+	if (ctx.flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
 		fscrypt_warn(inode, "Unknown encryption context flags (0x%02x)",
 			     ctx.flags);
 		return -EINVAL;
@@ -559,7 +560,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
 	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
 	memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
-	       FS_KEY_DESCRIPTOR_SIZE);
+	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 
 	mode = select_encryption_mode(crypt_info, inode);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 4941fe8471ce..da7ae9c8b4ad 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -22,7 +22,7 @@ static bool is_encryption_context_consistent_with_policy(
 				const struct fscrypt_policy *policy)
 {
 	return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
-		      FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+		      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
 		(ctx->flags == policy->flags) &&
 		(ctx->contents_encryption_mode ==
 		 policy->contents_encryption_mode) &&
@@ -37,13 +37,13 @@ static int create_encryption_context_from_policy(struct inode *inode,
 
 	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
 	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
-					FS_KEY_DESCRIPTOR_SIZE);
+					FSCRYPT_KEY_DESCRIPTOR_SIZE);
 
 	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
 				     policy->filenames_encryption_mode))
 		return -EINVAL;
 
-	if (policy->flags & ~FS_POLICY_FLAGS_VALID)
+	if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID)
 		return -EINVAL;
 
 	ctx.contents_encryption_mode = policy->contents_encryption_mode;
@@ -128,7 +128,7 @@ int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
 	policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
 	policy.flags = ctx.flags;
 	memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
-				FS_KEY_DESCRIPTOR_SIZE);
+				FSCRYPT_KEY_DESCRIPTOR_SIZE);
 
 	if (copy_to_user(arg, &policy, sizeof(policy)))
 		return -EFAULT;
@@ -202,7 +202,7 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 	if (parent_ci && child_ci) {
 		return memcmp(parent_ci->ci_master_key_descriptor,
 			      child_ci->ci_master_key_descriptor,
-			      FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+			      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
 			(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
 			(parent_ci->ci_filename_mode ==
 			 child_ci->ci_filename_mode) &&
@@ -219,7 +219,7 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 
 	return memcmp(parent_ctx.master_key_descriptor,
 		      child_ctx.master_key_descriptor,
-		      FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+		      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
 		(parent_ctx.contents_encryption_mode ==
 		 child_ctx.contents_encryption_mode) &&
 		(parent_ctx.filenames_encryption_mode ==
@@ -257,7 +257,7 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
 	ctx.filenames_encryption_mode = ci->ci_filename_mode;
 	ctx.flags = ci->ci_flags;
 	memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
-	       FS_KEY_DESCRIPTOR_SIZE);
+	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 	BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
 	res = parent->i_sb->s_cop->set_context(child, &ctx,
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 674b0452ef57..29a945d165de 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -55,6 +55,7 @@ struct fscrypt_key {
 /**********************************************************************/
 
 /* old names; don't add anything new here! */
+#ifndef __KERNEL__
 #define FS_KEY_DESCRIPTOR_SIZE		FSCRYPT_KEY_DESCRIPTOR_SIZE
 #define FS_POLICY_FLAGS_PAD_4		FSCRYPT_POLICY_FLAGS_PAD_4
 #define FS_POLICY_FLAGS_PAD_8		FSCRYPT_POLICY_FLAGS_PAD_8
@@ -76,5 +77,6 @@ struct fscrypt_key {
 #define FS_KEY_DESC_PREFIX		FSCRYPT_KEY_DESC_PREFIX
 #define FS_KEY_DESC_PREFIX_SIZE		FSCRYPT_KEY_DESC_PREFIX_SIZE
 #define FS_MAX_KEY_SIZE			FSCRYPT_MAX_KEY_SIZE
+#endif /* !__KERNEL__ */
 
 #endif /* _UAPI_LINUX_FSCRYPT_H */
-- 
cgit v1.2.3


From feed825861919ac3fac4b2ab83673dd5225f7f0d Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:45 -0700
Subject: fscrypt: rename keyinfo.c to keysetup.c

Rename keyinfo.c to keysetup.c since this better describes what the file
does (sets up the key), and it matches the new file keysetup_v1.c.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/Makefile          |   2 +-
 fs/crypto/fscrypt_private.h |   2 +-
 fs/crypto/keyinfo.c         | 345 --------------------------------------------
 fs/crypto/keysetup.c        | 345 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fscrypt.h     |   4 +-
 5 files changed, 349 insertions(+), 349 deletions(-)
 delete mode 100644 fs/crypto/keyinfo.c
 create mode 100644 fs/crypto/keysetup.c

(limited to 'include')

diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
index 1fba255c34ca..ad14d4c29784 100644
--- a/fs/crypto/Makefile
+++ b/fs/crypto/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_FS_ENCRYPTION)	+= fscrypto.o
 fscrypto-y := crypto.o \
 	      fname.o \
 	      hooks.o \
-	      keyinfo.o \
+	      keysetup.o \
 	      keysetup_v1.o \
 	      policy.o
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 387b44b255f6..794dcba25ca8 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -156,7 +156,7 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
 					 u32 orig_len, u32 max_len,
 					 u32 *encrypted_len_ret);
 
-/* keyinfo.c */
+/* keysetup.c */
 
 struct fscrypt_mode {
 	const char *friendly_name;
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
deleted file mode 100644
index f4a47448e9ef..000000000000
--- a/fs/crypto/keyinfo.c
+++ /dev/null
@@ -1,345 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Key setup facility for FS encryption support.
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar.
- * Heavily modified since then.
- */
-
-#include <crypto/aes.h>
-#include <crypto/sha.h>
-#include <crypto/skcipher.h>
-#include <linux/key.h>
-
-#include "fscrypt_private.h"
-
-static struct crypto_shash *essiv_hash_tfm;
-
-static struct fscrypt_mode available_modes[] = {
-	[FSCRYPT_MODE_AES_256_XTS] = {
-		.friendly_name = "AES-256-XTS",
-		.cipher_str = "xts(aes)",
-		.keysize = 64,
-		.ivsize = 16,
-	},
-	[FSCRYPT_MODE_AES_256_CTS] = {
-		.friendly_name = "AES-256-CTS-CBC",
-		.cipher_str = "cts(cbc(aes))",
-		.keysize = 32,
-		.ivsize = 16,
-	},
-	[FSCRYPT_MODE_AES_128_CBC] = {
-		.friendly_name = "AES-128-CBC",
-		.cipher_str = "cbc(aes)",
-		.keysize = 16,
-		.ivsize = 16,
-		.needs_essiv = true,
-	},
-	[FSCRYPT_MODE_AES_128_CTS] = {
-		.friendly_name = "AES-128-CTS-CBC",
-		.cipher_str = "cts(cbc(aes))",
-		.keysize = 16,
-		.ivsize = 16,
-	},
-	[FSCRYPT_MODE_ADIANTUM] = {
-		.friendly_name = "Adiantum",
-		.cipher_str = "adiantum(xchacha12,aes)",
-		.keysize = 32,
-		.ivsize = 32,
-	},
-};
-
-static struct fscrypt_mode *
-select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
-{
-	if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
-		fscrypt_warn(inode,
-			     "Unsupported encryption modes (contents mode %d, filenames mode %d)",
-			     ci->ci_data_mode, ci->ci_filename_mode);
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (S_ISREG(inode->i_mode))
-		return &available_modes[ci->ci_data_mode];
-
-	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		return &available_modes[ci->ci_filename_mode];
-
-	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
-		  inode->i_ino, (inode->i_mode & S_IFMT));
-	return ERR_PTR(-EINVAL);
-}
-
-/* Create a symmetric cipher object for the given encryption mode and key */
-struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
-						  const u8 *raw_key,
-						  const struct inode *inode)
-{
-	struct crypto_skcipher *tfm;
-	int err;
-
-	tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
-	if (IS_ERR(tfm)) {
-		if (PTR_ERR(tfm) == -ENOENT) {
-			fscrypt_warn(inode,
-				     "Missing crypto API support for %s (API name: \"%s\")",
-				     mode->friendly_name, mode->cipher_str);
-			return ERR_PTR(-ENOPKG);
-		}
-		fscrypt_err(inode, "Error allocating '%s' transform: %ld",
-			    mode->cipher_str, PTR_ERR(tfm));
-		return tfm;
-	}
-	if (unlikely(!mode->logged_impl_name)) {
-		/*
-		 * fscrypt performance can vary greatly depending on which
-		 * crypto algorithm implementation is used.  Help people debug
-		 * performance problems by logging the ->cra_driver_name the
-		 * first time a mode is used.  Note that multiple threads can
-		 * race here, but it doesn't really matter.
-		 */
-		mode->logged_impl_name = true;
-		pr_info("fscrypt: %s using implementation \"%s\"\n",
-			mode->friendly_name,
-			crypto_skcipher_alg(tfm)->base.cra_driver_name);
-	}
-	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
-	err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
-	if (err)
-		goto err_free_tfm;
-
-	return tfm;
-
-err_free_tfm:
-	crypto_free_skcipher(tfm);
-	return ERR_PTR(err);
-}
-
-static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
-{
-	struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
-
-	/* init hash transform on demand */
-	if (unlikely(!tfm)) {
-		struct crypto_shash *prev_tfm;
-
-		tfm = crypto_alloc_shash("sha256", 0, 0);
-		if (IS_ERR(tfm)) {
-			if (PTR_ERR(tfm) == -ENOENT) {
-				fscrypt_warn(NULL,
-					     "Missing crypto API support for SHA-256");
-				return -ENOPKG;
-			}
-			fscrypt_err(NULL,
-				    "Error allocating SHA-256 transform: %ld",
-				    PTR_ERR(tfm));
-			return PTR_ERR(tfm);
-		}
-		prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
-		if (prev_tfm) {
-			crypto_free_shash(tfm);
-			tfm = prev_tfm;
-		}
-	}
-
-	{
-		SHASH_DESC_ON_STACK(desc, tfm);
-		desc->tfm = tfm;
-
-		return crypto_shash_digest(desc, key, keysize, salt);
-	}
-}
-
-static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
-				int keysize)
-{
-	int err;
-	struct crypto_cipher *essiv_tfm;
-	u8 salt[SHA256_DIGEST_SIZE];
-
-	if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE))
-		return -EINVAL;
-
-	essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(essiv_tfm))
-		return PTR_ERR(essiv_tfm);
-
-	ci->ci_essiv_tfm = essiv_tfm;
-
-	err = derive_essiv_salt(raw_key, keysize, salt);
-	if (err)
-		goto out;
-
-	/*
-	 * Using SHA256 to derive the salt/key will result in AES-256 being
-	 * used for IV generation. File contents encryption will still use the
-	 * configured keysize (AES-128) nevertheless.
-	 */
-	err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
-	if (err)
-		goto out;
-
-out:
-	memzero_explicit(salt, sizeof(salt));
-	return err;
-}
-
-/* Given the per-file key, set up the file's crypto transform object(s) */
-int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
-{
-	struct fscrypt_mode *mode = ci->ci_mode;
-	struct crypto_skcipher *ctfm;
-	int err;
-
-	ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode);
-	if (IS_ERR(ctfm))
-		return PTR_ERR(ctfm);
-
-	ci->ci_ctfm = ctfm;
-
-	if (mode->needs_essiv) {
-		err = init_essiv_generator(ci, derived_key, mode->keysize);
-		if (err) {
-			fscrypt_warn(ci->ci_inode,
-				     "Error initializing ESSIV generator: %d",
-				     err);
-			return err;
-		}
-	}
-	return 0;
-}
-
-/*
- * Find the master key, then set up the inode's actual encryption key.
- */
-static int setup_file_encryption_key(struct fscrypt_info *ci)
-{
-	return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
-}
-
-static void put_crypt_info(struct fscrypt_info *ci)
-{
-	if (!ci)
-		return;
-
-	if (ci->ci_direct_key) {
-		fscrypt_put_direct_key(ci->ci_direct_key);
-	} else {
-		crypto_free_skcipher(ci->ci_ctfm);
-		crypto_free_cipher(ci->ci_essiv_tfm);
-	}
-	kmem_cache_free(fscrypt_info_cachep, ci);
-}
-
-int fscrypt_get_encryption_info(struct inode *inode)
-{
-	struct fscrypt_info *crypt_info;
-	struct fscrypt_context ctx;
-	struct fscrypt_mode *mode;
-	int res;
-
-	if (fscrypt_has_encryption_key(inode))
-		return 0;
-
-	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
-	if (res)
-		return res;
-
-	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
-	if (res < 0) {
-		if (!fscrypt_dummy_context_enabled(inode) ||
-		    IS_ENCRYPTED(inode)) {
-			fscrypt_warn(inode,
-				     "Error %d getting encryption context",
-				     res);
-			return res;
-		}
-		/* Fake up a context for an unencrypted directory */
-		memset(&ctx, 0, sizeof(ctx));
-		ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-		ctx.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
-		ctx.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
-		memset(ctx.master_key_descriptor, 0x42,
-		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	} else if (res != sizeof(ctx)) {
-		fscrypt_warn(inode,
-			     "Unknown encryption context size (%d bytes)", res);
-		return -EINVAL;
-	}
-
-	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1) {
-		fscrypt_warn(inode, "Unknown encryption context version (%d)",
-			     ctx.format);
-		return -EINVAL;
-	}
-
-	if (ctx.flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
-		fscrypt_warn(inode, "Unknown encryption context flags (0x%02x)",
-			     ctx.flags);
-		return -EINVAL;
-	}
-
-	crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
-	if (!crypt_info)
-		return -ENOMEM;
-
-	crypt_info->ci_inode = inode;
-
-	crypt_info->ci_flags = ctx.flags;
-	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
-	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
-	memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
-	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
-
-	mode = select_encryption_mode(crypt_info, inode);
-	if (IS_ERR(mode)) {
-		res = PTR_ERR(mode);
-		goto out;
-	}
-	WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
-	crypt_info->ci_mode = mode;
-
-	res = setup_file_encryption_key(crypt_info);
-	if (res)
-		goto out;
-
-	if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL)
-		crypt_info = NULL;
-out:
-	if (res == -ENOKEY)
-		res = 0;
-	put_crypt_info(crypt_info);
-	return res;
-}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
-
-/**
- * fscrypt_put_encryption_info - free most of an inode's fscrypt data
- *
- * Free the inode's fscrypt_info.  Filesystems must call this when the inode is
- * being evicted.  An RCU grace period need not have elapsed yet.
- */
-void fscrypt_put_encryption_info(struct inode *inode)
-{
-	put_crypt_info(inode->i_crypt_info);
-	inode->i_crypt_info = NULL;
-}
-EXPORT_SYMBOL(fscrypt_put_encryption_info);
-
-/**
- * fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay
- *
- * Free the inode's cached decrypted symlink target, if any.  Filesystems must
- * call this after an RCU grace period, just before they free the inode.
- */
-void fscrypt_free_inode(struct inode *inode)
-{
-	if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) {
-		kfree(inode->i_link);
-		inode->i_link = NULL;
-	}
-}
-EXPORT_SYMBOL(fscrypt_free_inode);
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
new file mode 100644
index 000000000000..f4a47448e9ef
--- /dev/null
+++ b/fs/crypto/keysetup.c
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Key setup facility for FS encryption support.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar.
+ * Heavily modified since then.
+ */
+
+#include <crypto/aes.h>
+#include <crypto/sha.h>
+#include <crypto/skcipher.h>
+#include <linux/key.h>
+
+#include "fscrypt_private.h"
+
+static struct crypto_shash *essiv_hash_tfm;
+
+static struct fscrypt_mode available_modes[] = {
+	[FSCRYPT_MODE_AES_256_XTS] = {
+		.friendly_name = "AES-256-XTS",
+		.cipher_str = "xts(aes)",
+		.keysize = 64,
+		.ivsize = 16,
+	},
+	[FSCRYPT_MODE_AES_256_CTS] = {
+		.friendly_name = "AES-256-CTS-CBC",
+		.cipher_str = "cts(cbc(aes))",
+		.keysize = 32,
+		.ivsize = 16,
+	},
+	[FSCRYPT_MODE_AES_128_CBC] = {
+		.friendly_name = "AES-128-CBC",
+		.cipher_str = "cbc(aes)",
+		.keysize = 16,
+		.ivsize = 16,
+		.needs_essiv = true,
+	},
+	[FSCRYPT_MODE_AES_128_CTS] = {
+		.friendly_name = "AES-128-CTS-CBC",
+		.cipher_str = "cts(cbc(aes))",
+		.keysize = 16,
+		.ivsize = 16,
+	},
+	[FSCRYPT_MODE_ADIANTUM] = {
+		.friendly_name = "Adiantum",
+		.cipher_str = "adiantum(xchacha12,aes)",
+		.keysize = 32,
+		.ivsize = 32,
+	},
+};
+
+static struct fscrypt_mode *
+select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
+{
+	if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
+		fscrypt_warn(inode,
+			     "Unsupported encryption modes (contents mode %d, filenames mode %d)",
+			     ci->ci_data_mode, ci->ci_filename_mode);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (S_ISREG(inode->i_mode))
+		return &available_modes[ci->ci_data_mode];
+
+	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		return &available_modes[ci->ci_filename_mode];
+
+	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
+		  inode->i_ino, (inode->i_mode & S_IFMT));
+	return ERR_PTR(-EINVAL);
+}
+
+/* Create a symmetric cipher object for the given encryption mode and key */
+struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
+						  const u8 *raw_key,
+						  const struct inode *inode)
+{
+	struct crypto_skcipher *tfm;
+	int err;
+
+	tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0);
+	if (IS_ERR(tfm)) {
+		if (PTR_ERR(tfm) == -ENOENT) {
+			fscrypt_warn(inode,
+				     "Missing crypto API support for %s (API name: \"%s\")",
+				     mode->friendly_name, mode->cipher_str);
+			return ERR_PTR(-ENOPKG);
+		}
+		fscrypt_err(inode, "Error allocating '%s' transform: %ld",
+			    mode->cipher_str, PTR_ERR(tfm));
+		return tfm;
+	}
+	if (unlikely(!mode->logged_impl_name)) {
+		/*
+		 * fscrypt performance can vary greatly depending on which
+		 * crypto algorithm implementation is used.  Help people debug
+		 * performance problems by logging the ->cra_driver_name the
+		 * first time a mode is used.  Note that multiple threads can
+		 * race here, but it doesn't really matter.
+		 */
+		mode->logged_impl_name = true;
+		pr_info("fscrypt: %s using implementation \"%s\"\n",
+			mode->friendly_name,
+			crypto_skcipher_alg(tfm)->base.cra_driver_name);
+	}
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
+	err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize);
+	if (err)
+		goto err_free_tfm;
+
+	return tfm;
+
+err_free_tfm:
+	crypto_free_skcipher(tfm);
+	return ERR_PTR(err);
+}
+
+static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
+{
+	struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
+
+	/* init hash transform on demand */
+	if (unlikely(!tfm)) {
+		struct crypto_shash *prev_tfm;
+
+		tfm = crypto_alloc_shash("sha256", 0, 0);
+		if (IS_ERR(tfm)) {
+			if (PTR_ERR(tfm) == -ENOENT) {
+				fscrypt_warn(NULL,
+					     "Missing crypto API support for SHA-256");
+				return -ENOPKG;
+			}
+			fscrypt_err(NULL,
+				    "Error allocating SHA-256 transform: %ld",
+				    PTR_ERR(tfm));
+			return PTR_ERR(tfm);
+		}
+		prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
+		if (prev_tfm) {
+			crypto_free_shash(tfm);
+			tfm = prev_tfm;
+		}
+	}
+
+	{
+		SHASH_DESC_ON_STACK(desc, tfm);
+		desc->tfm = tfm;
+
+		return crypto_shash_digest(desc, key, keysize, salt);
+	}
+}
+
+static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
+				int keysize)
+{
+	int err;
+	struct crypto_cipher *essiv_tfm;
+	u8 salt[SHA256_DIGEST_SIZE];
+
+	if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE))
+		return -EINVAL;
+
+	essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(essiv_tfm))
+		return PTR_ERR(essiv_tfm);
+
+	ci->ci_essiv_tfm = essiv_tfm;
+
+	err = derive_essiv_salt(raw_key, keysize, salt);
+	if (err)
+		goto out;
+
+	/*
+	 * Using SHA256 to derive the salt/key will result in AES-256 being
+	 * used for IV generation. File contents encryption will still use the
+	 * configured keysize (AES-128) nevertheless.
+	 */
+	err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
+	if (err)
+		goto out;
+
+out:
+	memzero_explicit(salt, sizeof(salt));
+	return err;
+}
+
+/* Given the per-file key, set up the file's crypto transform object(s) */
+int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
+{
+	struct fscrypt_mode *mode = ci->ci_mode;
+	struct crypto_skcipher *ctfm;
+	int err;
+
+	ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode);
+	if (IS_ERR(ctfm))
+		return PTR_ERR(ctfm);
+
+	ci->ci_ctfm = ctfm;
+
+	if (mode->needs_essiv) {
+		err = init_essiv_generator(ci, derived_key, mode->keysize);
+		if (err) {
+			fscrypt_warn(ci->ci_inode,
+				     "Error initializing ESSIV generator: %d",
+				     err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Find the master key, then set up the inode's actual encryption key.
+ */
+static int setup_file_encryption_key(struct fscrypt_info *ci)
+{
+	return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+	if (!ci)
+		return;
+
+	if (ci->ci_direct_key) {
+		fscrypt_put_direct_key(ci->ci_direct_key);
+	} else {
+		crypto_free_skcipher(ci->ci_ctfm);
+		crypto_free_cipher(ci->ci_essiv_tfm);
+	}
+	kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
+int fscrypt_get_encryption_info(struct inode *inode)
+{
+	struct fscrypt_info *crypt_info;
+	struct fscrypt_context ctx;
+	struct fscrypt_mode *mode;
+	int res;
+
+	if (fscrypt_has_encryption_key(inode))
+		return 0;
+
+	res = fscrypt_initialize(inode->i_sb->s_cop->flags);
+	if (res)
+		return res;
+
+	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	if (res < 0) {
+		if (!fscrypt_dummy_context_enabled(inode) ||
+		    IS_ENCRYPTED(inode)) {
+			fscrypt_warn(inode,
+				     "Error %d getting encryption context",
+				     res);
+			return res;
+		}
+		/* Fake up a context for an unencrypted directory */
+		memset(&ctx, 0, sizeof(ctx));
+		ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+		ctx.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
+		ctx.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
+		memset(ctx.master_key_descriptor, 0x42,
+		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+	} else if (res != sizeof(ctx)) {
+		fscrypt_warn(inode,
+			     "Unknown encryption context size (%d bytes)", res);
+		return -EINVAL;
+	}
+
+	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1) {
+		fscrypt_warn(inode, "Unknown encryption context version (%d)",
+			     ctx.format);
+		return -EINVAL;
+	}
+
+	if (ctx.flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+		fscrypt_warn(inode, "Unknown encryption context flags (0x%02x)",
+			     ctx.flags);
+		return -EINVAL;
+	}
+
+	crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
+	if (!crypt_info)
+		return -ENOMEM;
+
+	crypt_info->ci_inode = inode;
+
+	crypt_info->ci_flags = ctx.flags;
+	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+	memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
+	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+	memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+	mode = select_encryption_mode(crypt_info, inode);
+	if (IS_ERR(mode)) {
+		res = PTR_ERR(mode);
+		goto out;
+	}
+	WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
+	crypt_info->ci_mode = mode;
+
+	res = setup_file_encryption_key(crypt_info);
+	if (res)
+		goto out;
+
+	if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+		crypt_info = NULL;
+out:
+	if (res == -ENOKEY)
+		res = 0;
+	put_crypt_info(crypt_info);
+	return res;
+}
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
+
+/**
+ * fscrypt_put_encryption_info - free most of an inode's fscrypt data
+ *
+ * Free the inode's fscrypt_info.  Filesystems must call this when the inode is
+ * being evicted.  An RCU grace period need not have elapsed yet.
+ */
+void fscrypt_put_encryption_info(struct inode *inode)
+{
+	put_crypt_info(inode->i_crypt_info);
+	inode->i_crypt_info = NULL;
+}
+EXPORT_SYMBOL(fscrypt_put_encryption_info);
+
+/**
+ * fscrypt_free_inode - free an inode's fscrypt data requiring RCU delay
+ *
+ * Free the inode's cached decrypted symlink target, if any.  Filesystems must
+ * call this after an RCU grace period, just before they free the inode.
+ */
+void fscrypt_free_inode(struct inode *inode)
+{
+	if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) {
+		kfree(inode->i_link);
+		inode->i_link = NULL;
+	}
+}
+EXPORT_SYMBOL(fscrypt_free_inode);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 81c0c754f8b2..583802cb2e35 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -138,7 +138,7 @@ extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
 					void *, bool);
-/* keyinfo.c */
+/* keysetup.c */
 extern int fscrypt_get_encryption_info(struct inode *);
 extern void fscrypt_put_encryption_info(struct inode *);
 extern void fscrypt_free_inode(struct inode *);
@@ -367,7 +367,7 @@ static inline int fscrypt_inherit_context(struct inode *parent,
 	return -EOPNOTSUPP;
 }
 
-/* keyinfo.c */
+/* keysetup.c */
 static inline int fscrypt_get_encryption_info(struct inode *inode)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 22d94f493bfb408fdd764f7b1d0363af2122fba5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:46 -0700
Subject: fscrypt: add FS_IOC_ADD_ENCRYPTION_KEY ioctl

Add a new fscrypt ioctl, FS_IOC_ADD_ENCRYPTION_KEY.  This ioctl adds an
encryption key to the filesystem's fscrypt keyring ->s_master_keys,
making any files encrypted with that key appear "unlocked".

Why we need this
~~~~~~~~~~~~~~~~

The main problem is that the "locked/unlocked" (ciphertext/plaintext)
status of encrypted files is global, but the fscrypt keys are not.
fscrypt only looks for keys in the keyring(s) the process accessing the
filesystem is subscribed to: the thread keyring, process keyring, and
session keyring, where the session keyring may contain the user keyring.

Therefore, userspace has to put fscrypt keys in the keyrings for
individual users or sessions.  But this means that when a process with a
different keyring tries to access encrypted files, whether they appear
"unlocked" or not is nondeterministic.  This is because it depends on
whether the files are currently present in the inode cache.

Fixing this by consistently providing each process its own view of the
filesystem depending on whether it has the key or not isn't feasible due
to how the VFS caches work.  Furthermore, while sometimes users expect
this behavior, it is misguided for two reasons.  First, it would be an
OS-level access control mechanism largely redundant with existing access
control mechanisms such as UNIX file permissions, ACLs, LSMs, etc.
Encryption is actually for protecting the data at rest.

Second, almost all users of fscrypt actually do need the keys to be
global.  The largest users of fscrypt, Android and Chromium OS, achieve
this by having PID 1 create a "session keyring" that is inherited by
every process.  This works, but it isn't scalable because it prevents
session keyrings from being used for any other purpose.

On general-purpose Linux distros, the 'fscrypt' userspace tool [1] can't
similarly abuse the session keyring, so to make 'sudo' work on all
systems it has to link all the user keyrings into root's user keyring
[2].  This is ugly and raises security concerns.  Moreover it can't make
the keys available to system services, such as sshd trying to access the
user's '~/.ssh' directory (see [3], [4]) or NetworkManager trying to
read certificates from the user's home directory (see [5]); or to Docker
containers (see [6], [7]).

By having an API to add a key to the *filesystem* we'll be able to fix
the above bugs, remove userspace workarounds, and clearly express the
intended semantics: the locked/unlocked status of an encrypted directory
is global, and encryption is orthogonal to OS-level access control.

Why not use the add_key() syscall
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

We use an ioctl for this API rather than the existing add_key() system
call because the ioctl gives us the flexibility needed to implement
fscrypt-specific semantics that will be introduced in later patches:

- Supporting key removal with the semantics such that the secret is
  removed immediately and any unused inodes using the key are evicted;
  also, the eviction of any in-use inodes can be retried.

- Calculating a key-dependent cryptographic identifier and returning it
  to userspace.

- Allowing keys to be added and removed by non-root users, but only keys
  for v2 encryption policies; and to prevent denial-of-service attacks,
  users can only remove keys they themselves have added, and a key is
  only really removed after all users who added it have removed it.

Trying to shoehorn these semantics into the keyrings syscalls would be
very difficult, whereas the ioctls make things much easier.

However, to reuse code the implementation still uses the keyrings
service internally.  Thus we get lockless RCU-mode key lookups without
having to re-implement it, and the keys automatically show up in
/proc/keys for debugging purposes.

References:

    [1] https://github.com/google/fscrypt
    [2] https://goo.gl/55cCrI#heading=h.vf09isp98isb
    [3] https://github.com/google/fscrypt/issues/111#issuecomment-444347939
    [4] https://github.com/google/fscrypt/issues/116
    [5] https://bugs.launchpad.net/ubuntu/+source/fscrypt/+bug/1770715
    [6] https://github.com/google/fscrypt/issues/128
    [7] https://askubuntu.com/questions/1130306/cannot-run-docker-on-an-encrypted-filesystem

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/Makefile           |   1 +
 fs/crypto/crypto.c           |  10 +-
 fs/crypto/fscrypt_private.h  |  62 +++++++++-
 fs/crypto/keyring.c          | 286 +++++++++++++++++++++++++++++++++++++++++++
 fs/crypto/keysetup.c         |  35 +++++-
 fs/super.c                   |   2 +
 include/linux/fs.h           |   1 +
 include/linux/fscrypt.h      |  14 +++
 include/uapi/linux/fscrypt.h |  49 ++++++--
 9 files changed, 447 insertions(+), 13 deletions(-)
 create mode 100644 fs/crypto/keyring.c

(limited to 'include')

diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
index ad14d4c29784..6b2485b41393 100644
--- a/fs/crypto/Makefile
+++ b/fs/crypto/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_FS_ENCRYPTION)	+= fscrypto.o
 fscrypto-y := crypto.o \
 	      fname.o \
 	      hooks.o \
+	      keyring.o \
 	      keysetup.o \
 	      keysetup_v1.o \
 	      policy.o
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 7502c1f0ede9..65ca077e8d58 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -478,6 +478,8 @@ void fscrypt_msg(const struct inode *inode, const char *level,
  */
 static int __init fscrypt_init(void)
 {
+	int err = -ENOMEM;
+
 	/*
 	 * Use an unbound workqueue to allow bios to be decrypted in parallel
 	 * even when they happen to complete on the same CPU.  This sacrifices
@@ -500,13 +502,19 @@ static int __init fscrypt_init(void)
 	if (!fscrypt_info_cachep)
 		goto fail_free_ctx;
 
+	err = fscrypt_init_keyring();
+	if (err)
+		goto fail_free_info;
+
 	return 0;
 
+fail_free_info:
+	kmem_cache_destroy(fscrypt_info_cachep);
 fail_free_ctx:
 	kmem_cache_destroy(fscrypt_ctx_cachep);
 fail_free_queue:
 	destroy_workqueue(fscrypt_read_workqueue);
 fail:
-	return -ENOMEM;
+	return err;
 }
 late_initcall(fscrypt_init)
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 794dcba25ca8..0d9ebfd3bf3a 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -14,9 +14,12 @@
 #include <linux/fscrypt.h>
 #include <crypto/hash.h>
 
-/* Encryption parameters */
+#define CONST_STRLEN(str)	(sizeof(str) - 1)
+
 #define FS_KEY_DERIVATION_NONCE_SIZE	16
 
+#define FSCRYPT_MIN_KEY_SIZE		16
+
 /**
  * Encryption context for inode
  *
@@ -156,6 +159,63 @@ extern bool fscrypt_fname_encrypted_size(const struct inode *inode,
 					 u32 orig_len, u32 max_len,
 					 u32 *encrypted_len_ret);
 
+/* keyring.c */
+
+/*
+ * fscrypt_master_key_secret - secret key material of an in-use master key
+ */
+struct fscrypt_master_key_secret {
+
+	/* Size of the raw key in bytes */
+	u32			size;
+
+	/* The raw key */
+	u8			raw[FSCRYPT_MAX_KEY_SIZE];
+
+} __randomize_layout;
+
+/*
+ * fscrypt_master_key - an in-use master key
+ *
+ * This represents a master encryption key which has been added to the
+ * filesystem and can be used to "unlock" the encrypted files which were
+ * encrypted with it.
+ */
+struct fscrypt_master_key {
+
+	/* The secret key material */
+	struct fscrypt_master_key_secret	mk_secret;
+
+	/* Arbitrary key descriptor which was assigned by userspace */
+	struct fscrypt_key_specifier		mk_spec;
+
+} __randomize_layout;
+
+static inline const char *master_key_spec_type(
+				const struct fscrypt_key_specifier *spec)
+{
+	switch (spec->type) {
+	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
+		return "descriptor";
+	}
+	return "[unknown]";
+}
+
+static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
+{
+	switch (spec->type) {
+	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
+		return FSCRYPT_KEY_DESCRIPTOR_SIZE;
+	}
+	return 0;
+}
+
+extern struct key *
+fscrypt_find_master_key(struct super_block *sb,
+			const struct fscrypt_key_specifier *mk_spec);
+
+extern int __init fscrypt_init_keyring(void);
+
 /* keysetup.c */
 
 struct fscrypt_mode {
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
new file mode 100644
index 000000000000..bcd7d2836e1e
--- /dev/null
+++ b/fs/crypto/keyring.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Filesystem-level keyring for fscrypt
+ *
+ * Copyright 2019 Google LLC
+ */
+
+/*
+ * This file implements management of fscrypt master keys in the
+ * filesystem-level keyring, including the ioctls:
+ *
+ * - FS_IOC_ADD_ENCRYPTION_KEY
+ *
+ * See the "User API" section of Documentation/filesystems/fscrypt.rst for more
+ * information about these ioctls.
+ */
+
+#include <linux/key-type.h>
+#include <linux/seq_file.h>
+
+#include "fscrypt_private.h"
+
+static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret)
+{
+	memzero_explicit(secret, sizeof(*secret));
+}
+
+static void move_master_key_secret(struct fscrypt_master_key_secret *dst,
+				   struct fscrypt_master_key_secret *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+	memzero_explicit(src, sizeof(*src));
+}
+
+static void free_master_key(struct fscrypt_master_key *mk)
+{
+	wipe_master_key_secret(&mk->mk_secret);
+	kzfree(mk);
+}
+
+static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec)
+{
+	if (spec->__reserved)
+		return false;
+	return master_key_spec_len(spec) != 0;
+}
+
+static int fscrypt_key_instantiate(struct key *key,
+				   struct key_preparsed_payload *prep)
+{
+	key->payload.data[0] = (struct fscrypt_master_key *)prep->data;
+	return 0;
+}
+
+static void fscrypt_key_destroy(struct key *key)
+{
+	free_master_key(key->payload.data[0]);
+}
+
+static void fscrypt_key_describe(const struct key *key, struct seq_file *m)
+{
+	seq_puts(m, key->description);
+}
+
+/*
+ * Type of key in ->s_master_keys.  Each key of this type represents a master
+ * key which has been added to the filesystem.  Its payload is a
+ * 'struct fscrypt_master_key'.  The "." prefix in the key type name prevents
+ * users from adding keys of this type via the keyrings syscalls rather than via
+ * the intended method of FS_IOC_ADD_ENCRYPTION_KEY.
+ */
+static struct key_type key_type_fscrypt = {
+	.name			= "._fscrypt",
+	.instantiate		= fscrypt_key_instantiate,
+	.destroy		= fscrypt_key_destroy,
+	.describe		= fscrypt_key_describe,
+};
+
+/* Search ->s_master_keys */
+static struct key *search_fscrypt_keyring(struct key *keyring,
+					  struct key_type *type,
+					  const char *description)
+{
+	/*
+	 * We need to mark the keyring reference as "possessed" so that we
+	 * acquire permission to search it, via the KEY_POS_SEARCH permission.
+	 */
+	key_ref_t keyref = make_key_ref(keyring, true /* possessed */);
+
+	keyref = keyring_search(keyref, type, description, false);
+	if (IS_ERR(keyref)) {
+		if (PTR_ERR(keyref) == -EAGAIN || /* not found */
+		    PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */
+			keyref = ERR_PTR(-ENOKEY);
+		return ERR_CAST(keyref);
+	}
+	return key_ref_to_ptr(keyref);
+}
+
+#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE	\
+	(CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id))
+
+#define FSCRYPT_MK_DESCRIPTION_SIZE	(2 * FSCRYPT_KEY_DESCRIPTOR_SIZE + 1)
+
+static void format_fs_keyring_description(
+			char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
+			const struct super_block *sb)
+{
+	sprintf(description, "fscrypt-%s", sb->s_id);
+}
+
+static void format_mk_description(
+			char description[FSCRYPT_MK_DESCRIPTION_SIZE],
+			const struct fscrypt_key_specifier *mk_spec)
+{
+	sprintf(description, "%*phN",
+		master_key_spec_len(mk_spec), (u8 *)&mk_spec->u);
+}
+
+/* Create ->s_master_keys if needed.  Synchronized by fscrypt_add_key_mutex. */
+static int allocate_filesystem_keyring(struct super_block *sb)
+{
+	char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE];
+	struct key *keyring;
+
+	if (sb->s_master_keys)
+		return 0;
+
+	format_fs_keyring_description(description, sb);
+	keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				current_cred(), KEY_POS_SEARCH |
+				  KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
+				KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+	if (IS_ERR(keyring))
+		return PTR_ERR(keyring);
+
+	/* Pairs with READ_ONCE() in fscrypt_find_master_key() */
+	smp_store_release(&sb->s_master_keys, keyring);
+	return 0;
+}
+
+void fscrypt_sb_free(struct super_block *sb)
+{
+	key_put(sb->s_master_keys);
+	sb->s_master_keys = NULL;
+}
+
+/*
+ * Find the specified master key in ->s_master_keys.
+ * Returns ERR_PTR(-ENOKEY) if not found.
+ */
+struct key *fscrypt_find_master_key(struct super_block *sb,
+				    const struct fscrypt_key_specifier *mk_spec)
+{
+	struct key *keyring;
+	char description[FSCRYPT_MK_DESCRIPTION_SIZE];
+
+	/* pairs with smp_store_release() in allocate_filesystem_keyring() */
+	keyring = READ_ONCE(sb->s_master_keys);
+	if (keyring == NULL)
+		return ERR_PTR(-ENOKEY); /* No keyring yet, so no keys yet. */
+
+	format_mk_description(description, mk_spec);
+	return search_fscrypt_keyring(keyring, &key_type_fscrypt, description);
+}
+
+/*
+ * Allocate a new fscrypt_master_key which contains the given secret, set it as
+ * the payload of a new 'struct key' of type fscrypt, and link the 'struct key'
+ * into the given keyring.  Synchronized by fscrypt_add_key_mutex.
+ */
+static int add_new_master_key(struct fscrypt_master_key_secret *secret,
+			      const struct fscrypt_key_specifier *mk_spec,
+			      struct key *keyring)
+{
+	struct fscrypt_master_key *mk;
+	char description[FSCRYPT_MK_DESCRIPTION_SIZE];
+	struct key *key;
+	int err;
+
+	mk = kzalloc(sizeof(*mk), GFP_KERNEL);
+	if (!mk)
+		return -ENOMEM;
+
+	mk->mk_spec = *mk_spec;
+
+	move_master_key_secret(&mk->mk_secret, secret);
+
+	format_mk_description(description, mk_spec);
+	key = key_alloc(&key_type_fscrypt, description,
+			GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+			KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_VIEW,
+			KEY_ALLOC_NOT_IN_QUOTA, NULL);
+	if (IS_ERR(key)) {
+		err = PTR_ERR(key);
+		goto out_free_mk;
+	}
+	err = key_instantiate_and_link(key, mk, sizeof(*mk), keyring, NULL);
+	key_put(key);
+	if (err)
+		goto out_free_mk;
+
+	return 0;
+
+out_free_mk:
+	free_master_key(mk);
+	return err;
+}
+
+static int add_master_key(struct super_block *sb,
+			  struct fscrypt_master_key_secret *secret,
+			  const struct fscrypt_key_specifier *mk_spec)
+{
+	static DEFINE_MUTEX(fscrypt_add_key_mutex);
+	struct key *key;
+	int err;
+
+	mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */
+	key = fscrypt_find_master_key(sb, mk_spec);
+	if (IS_ERR(key)) {
+		err = PTR_ERR(key);
+		if (err != -ENOKEY)
+			goto out_unlock;
+		/* Didn't find the key in ->s_master_keys.  Add it. */
+		err = allocate_filesystem_keyring(sb);
+		if (err)
+			goto out_unlock;
+		err = add_new_master_key(secret, mk_spec, sb->s_master_keys);
+	} else {
+		key_put(key);
+		err = 0;
+	}
+out_unlock:
+	mutex_unlock(&fscrypt_add_key_mutex);
+	return err;
+}
+
+/*
+ * Add a master encryption key to the filesystem, causing all files which were
+ * encrypted with it to appear "unlocked" (decrypted) when accessed.
+ *
+ * For more details, see the "FS_IOC_ADD_ENCRYPTION_KEY" section of
+ * Documentation/filesystems/fscrypt.rst.
+ */
+int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
+{
+	struct super_block *sb = file_inode(filp)->i_sb;
+	struct fscrypt_add_key_arg __user *uarg = _uarg;
+	struct fscrypt_add_key_arg arg;
+	struct fscrypt_master_key_secret secret;
+	int err;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (!valid_key_spec(&arg.key_spec))
+		return -EINVAL;
+
+	if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE ||
+	    arg.raw_size > FSCRYPT_MAX_KEY_SIZE)
+		return -EINVAL;
+
+	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
+		return -EINVAL;
+
+	memset(&secret, 0, sizeof(secret));
+	secret.size = arg.raw_size;
+	err = -EFAULT;
+	if (copy_from_user(secret.raw, uarg->raw, secret.size))
+		goto out_wipe_secret;
+
+	err = -EACCES;
+	if (!capable(CAP_SYS_ADMIN))
+		goto out_wipe_secret;
+
+	err = add_master_key(sb, &secret, &arg.key_spec);
+out_wipe_secret:
+	wipe_master_key_secret(&secret);
+	return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
+
+int __init fscrypt_init_keyring(void)
+{
+	return register_key_type(&key_type_fscrypt);
+}
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index f4a47448e9ef..1c6d18bcdc7b 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -216,7 +216,40 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
  */
 static int setup_file_encryption_key(struct fscrypt_info *ci)
 {
-	return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
+	struct key *key;
+	struct fscrypt_master_key *mk = NULL;
+	struct fscrypt_key_specifier mk_spec;
+	int err;
+
+	mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
+	memcpy(mk_spec.u.descriptor, ci->ci_master_key_descriptor,
+	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+
+	key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
+	if (IS_ERR(key)) {
+		if (key != ERR_PTR(-ENOKEY))
+			return PTR_ERR(key);
+
+		return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
+	}
+
+	mk = key->payload.data[0];
+
+	if (mk->mk_secret.size < ci->ci_mode->keysize) {
+		fscrypt_warn(NULL,
+			     "key with %s %*phN is too short (got %u bytes, need %u+ bytes)",
+			     master_key_spec_type(&mk_spec),
+			     master_key_spec_len(&mk_spec), (u8 *)&mk_spec.u,
+			     mk->mk_secret.size, ci->ci_mode->keysize);
+		err = -ENOKEY;
+		goto out_release_key;
+	}
+
+	err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
+
+out_release_key:
+	key_put(key);
+	return err;
 }
 
 static void put_crypt_info(struct fscrypt_info *ci)
diff --git a/fs/super.c b/fs/super.c
index 5960578a4076..e486a442a61f 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -32,6 +32,7 @@
 #include <linux/backing-dev.h>
 #include <linux/rculist_bl.h>
 #include <linux/cleancache.h>
+#include <linux/fscrypt.h>
 #include <linux/fsnotify.h>
 #include <linux/lockdep.h>
 #include <linux/user_namespace.h>
@@ -290,6 +291,7 @@ static void __put_super(struct super_block *s)
 		WARN_ON(s->s_inode_lru.node);
 		WARN_ON(!list_empty(&s->s_mounts));
 		security_sb_free(s);
+		fscrypt_sb_free(s);
 		put_user_ns(s->s_user_ns);
 		kfree(s->s_subtype);
 		call_rcu(&s->rcu, destroy_super_rcu);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 997a530ff4e9..5dff77326cec 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1427,6 +1427,7 @@ struct super_block {
 	const struct xattr_handler **s_xattr;
 #ifdef CONFIG_FS_ENCRYPTION
 	const struct fscrypt_operations	*s_cop;
+	struct key		*s_master_keys; /* master crypto keys in use */
 #endif
 	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
 	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 583802cb2e35..46bf66cf76ef 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -138,6 +138,10 @@ extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
 					void *, bool);
+/* keyring.c */
+extern void fscrypt_sb_free(struct super_block *sb);
+extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
+
 /* keysetup.c */
 extern int fscrypt_get_encryption_info(struct inode *);
 extern void fscrypt_put_encryption_info(struct inode *);
@@ -367,6 +371,16 @@ static inline int fscrypt_inherit_context(struct inode *parent,
 	return -EOPNOTSUPP;
 }
 
+/* keyring.c */
+static inline void fscrypt_sb_free(struct super_block *sb)
+{
+}
+
+static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 /* keysetup.c */
 static inline int fscrypt_get_encryption_info(struct inode *inode)
 {
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 29a945d165de..6aeca3cb0a2d 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -36,22 +36,51 @@ struct fscrypt_policy {
 	__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 };
 
-#define FS_IOC_SET_ENCRYPTION_POLICY	_IOR('f', 19, struct fscrypt_policy)
-#define FS_IOC_GET_ENCRYPTION_PWSALT	_IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY	_IOW('f', 21, struct fscrypt_policy)
-
-/* Parameters for passing an encryption key into the kernel keyring */
+/*
+ * Process-subscribed "logon" key description prefix and payload format.
+ * Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead.
+ */
 #define FSCRYPT_KEY_DESC_PREFIX		"fscrypt:"
-#define FSCRYPT_KEY_DESC_PREFIX_SIZE		8
-
-/* Structure that userspace passes to the kernel keyring */
-#define FSCRYPT_MAX_KEY_SIZE			64
-
+#define FSCRYPT_KEY_DESC_PREFIX_SIZE	8
+#define FSCRYPT_MAX_KEY_SIZE		64
 struct fscrypt_key {
 	__u32 mode;
 	__u8 raw[FSCRYPT_MAX_KEY_SIZE];
 	__u32 size;
 };
+
+/*
+ * Keys are specified by an arbitrary 8-byte key "descriptor",
+ * matching fscrypt_policy::master_key_descriptor.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR	1
+
+/*
+ * Specifies a key.  This doesn't contain the actual key itself; this is just
+ * the "name" of the key.
+ */
+struct fscrypt_key_specifier {
+	__u32 type;	/* one of FSCRYPT_KEY_SPEC_TYPE_* */
+	__u32 __reserved;
+	union {
+		__u8 __reserved[32]; /* reserve some extra space */
+		__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+	} u;
+};
+
+/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */
+struct fscrypt_add_key_arg {
+	struct fscrypt_key_specifier key_spec;
+	__u32 raw_size;
+	__u32 __reserved[9];
+	__u8 raw[];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
+
 /**********************************************************************/
 
 /* old names; don't add anything new here! */
-- 
cgit v1.2.3


From b1c0ec3599f42ad372063b0235a3c33f65eb1e30 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:46 -0700
Subject: fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY ioctl

Add a new fscrypt ioctl, FS_IOC_REMOVE_ENCRYPTION_KEY.  This ioctl
removes an encryption key that was added by FS_IOC_ADD_ENCRYPTION_KEY.
It wipes the secret key itself, then "locks" the encrypted files and
directories that had been unlocked using that key -- implemented by
evicting the relevant dentries and inodes from the VFS caches.

The problem this solves is that many fscrypt users want the ability to
remove encryption keys, causing the corresponding encrypted directories
to appear "locked" (presented in ciphertext form) again.  Moreover,
users want removing an encryption key to *really* remove it, in the
sense that the removed keys cannot be recovered even if kernel memory is
compromised, e.g. by the exploit of a kernel security vulnerability or
by a physical attack.  This is desirable after a user logs out of the
system, for example.  In many cases users even already assume this to be
the case and are surprised to hear when it's not.

It is not sufficient to simply unlink the master key from the keyring
(or to revoke or invalidate it), since the actual encryption transform
objects are still pinned in memory by their inodes.  Therefore, to
really remove a key we must also evict the relevant inodes.

Currently one workaround is to run 'sync && echo 2 >
/proc/sys/vm/drop_caches'.  But, that evicts all unused inodes in the
system rather than just the inodes associated with the key being
removed, causing severe performance problems.  Moreover, it requires
root privileges, so regular users can't "lock" their encrypted files.

Another workaround, used in Chromium OS kernels, is to add a new
VFS-level ioctl FS_IOC_DROP_CACHE which is a more restricted version of
drop_caches that operates on a single super_block.  It does:

        shrink_dcache_sb(sb);
        invalidate_inodes(sb, false);

But it's still a hack.  Yet, the major users of filesystem encryption
want this feature badly enough that they are actually using these hacks.

To properly solve the problem, start maintaining a list of the inodes
which have been "unlocked" using each master key.  Originally this
wasn't possible because the kernel didn't keep track of in-use master
keys at all.  But, with the ->s_master_keys keyring it is now possible.

Then, add an ioctl FS_IOC_REMOVE_ENCRYPTION_KEY.  It finds the specified
master key in ->s_master_keys, then wipes the secret key itself, which
prevents any additional inodes from being unlocked with the key.  Then,
it syncs the filesystem and evicts the inodes in the key's list.  The
normal inode eviction code will free and wipe the per-file keys (in
->i_crypt_info).  Note that freeing ->i_crypt_info without evicting the
inodes was also considered, but would have been racy.

Some inodes may still be in use when a master key is removed, and we
can't simply revoke random file descriptors, mmap's, etc.  Thus, the
ioctl simply skips in-use inodes, and returns -EBUSY to indicate that
some inodes weren't evicted.  The master key *secret* is still removed,
but the fscrypt_master_key struct remains to keep track of the remaining
inodes.  Userspace can then retry the ioctl to evict the remaining
inodes.  Alternatively, if userspace adds the key again, the refreshed
secret will be associated with the existing list of inodes so they
remain correctly tracked for future key removals.

The ioctl doesn't wipe pagecache pages.  Thus, we tolerate that after a
kernel compromise some portions of plaintext file contents may still be
recoverable from memory.  This can be solved by enabling page poisoning
system-wide, which security conscious users may choose to do.  But it's
very difficult to solve otherwise, e.g. note that plaintext file
contents may have been read in other places than pagecache pages.

Like FS_IOC_ADD_ENCRYPTION_KEY, FS_IOC_REMOVE_ENCRYPTION_KEY is
initially restricted to privileged users only.  This is sufficient for
some use cases, but not all.  A later patch will relax this restriction,
but it will require introducing key hashes, among other changes.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fscrypt_private.h  |  53 ++++++++-
 fs/crypto/keyring.c          | 260 ++++++++++++++++++++++++++++++++++++++++++-
 fs/crypto/keysetup.c         | 103 ++++++++++++++++-
 include/linux/fscrypt.h      |  12 ++
 include/uapi/linux/fscrypt.h |   9 ++
 5 files changed, 432 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 0d9ebfd3bf3a..fc804f4a03fc 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -78,6 +78,19 @@ struct fscrypt_info {
 	/* Back-pointer to the inode */
 	struct inode *ci_inode;
 
+	/*
+	 * The master key with which this inode was unlocked (decrypted).  This
+	 * will be NULL if the master key was found in a process-subscribed
+	 * keyring rather than in the filesystem-level keyring.
+	 */
+	struct key *ci_master_key;
+
+	/*
+	 * Link in list of inodes that were unlocked with the master key.
+	 * Only used when ->ci_master_key is set.
+	 */
+	struct list_head ci_master_key_link;
+
 	/*
 	 * If non-NULL, then encryption is done using the master key directly
 	 * and ci_ctfm will equal ci_direct_key->dk_ctfm.
@@ -183,14 +196,52 @@ struct fscrypt_master_key_secret {
  */
 struct fscrypt_master_key {
 
-	/* The secret key material */
+	/*
+	 * The secret key material.  After FS_IOC_REMOVE_ENCRYPTION_KEY is
+	 * executed, this is wiped and no new inodes can be unlocked with this
+	 * key; however, there may still be inodes in ->mk_decrypted_inodes
+	 * which could not be evicted.  As long as some inodes still remain,
+	 * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
+	 * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
+	 *
+	 * Locking: protected by key->sem.
+	 */
 	struct fscrypt_master_key_secret	mk_secret;
 
 	/* Arbitrary key descriptor which was assigned by userspace */
 	struct fscrypt_key_specifier		mk_spec;
 
+	/*
+	 * Length of ->mk_decrypted_inodes, plus one if mk_secret is present.
+	 * Once this goes to 0, the master key is removed from ->s_master_keys.
+	 * The 'struct fscrypt_master_key' will continue to live as long as the
+	 * 'struct key' whose payload it is, but we won't let this reference
+	 * count rise again.
+	 */
+	refcount_t		mk_refcount;
+
+	/*
+	 * List of inodes that were unlocked using this key.  This allows the
+	 * inodes to be evicted efficiently if the key is removed.
+	 */
+	struct list_head	mk_decrypted_inodes;
+	spinlock_t		mk_decrypted_inodes_lock;
+
 } __randomize_layout;
 
+static inline bool
+is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
+{
+	/*
+	 * The READ_ONCE() is only necessary for fscrypt_drop_inode() and
+	 * fscrypt_key_describe().  These run in atomic context, so they can't
+	 * take key->sem and thus 'secret' can change concurrently which would
+	 * be a data race.  But they only need to know whether the secret *was*
+	 * present at the time of check, so READ_ONCE() suffices.
+	 */
+	return READ_ONCE(secret->size) != 0;
+}
+
 static inline const char *master_key_spec_type(
 				const struct fscrypt_key_specifier *spec)
 {
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index bcd7d2836e1e..c3423f0edc70 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -10,6 +10,7 @@
  * filesystem-level keyring, including the ioctls:
  *
  * - FS_IOC_ADD_ENCRYPTION_KEY
+ * - FS_IOC_REMOVE_ENCRYPTION_KEY
  *
  * See the "User API" section of Documentation/filesystems/fscrypt.rst for more
  * information about these ioctls.
@@ -60,6 +61,13 @@ static void fscrypt_key_destroy(struct key *key)
 static void fscrypt_key_describe(const struct key *key, struct seq_file *m)
 {
 	seq_puts(m, key->description);
+
+	if (key_is_positive(key)) {
+		const struct fscrypt_master_key *mk = key->payload.data[0];
+
+		if (!is_master_key_secret_present(&mk->mk_secret))
+			seq_puts(m, ": secret removed");
+	}
 }
 
 /*
@@ -186,6 +194,10 @@ static int add_new_master_key(struct fscrypt_master_key_secret *secret,
 
 	move_master_key_secret(&mk->mk_secret, secret);
 
+	refcount_set(&mk->mk_refcount, 1); /* secret is present */
+	INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
+	spin_lock_init(&mk->mk_decrypted_inodes_lock);
+
 	format_mk_description(description, mk_spec);
 	key = key_alloc(&key_type_fscrypt, description,
 			GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
@@ -207,6 +219,21 @@ out_free_mk:
 	return err;
 }
 
+#define KEY_DEAD	1
+
+static int add_existing_master_key(struct fscrypt_master_key *mk,
+				   struct fscrypt_master_key_secret *secret)
+{
+	if (is_master_key_secret_present(&mk->mk_secret))
+		return 0;
+
+	if (!refcount_inc_not_zero(&mk->mk_refcount))
+		return KEY_DEAD;
+
+	move_master_key_secret(&mk->mk_secret, secret);
+	return 0;
+}
+
 static int add_master_key(struct super_block *sb,
 			  struct fscrypt_master_key_secret *secret,
 			  const struct fscrypt_key_specifier *mk_spec)
@@ -216,6 +243,7 @@ static int add_master_key(struct super_block *sb,
 	int err;
 
 	mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */
+retry:
 	key = fscrypt_find_master_key(sb, mk_spec);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
@@ -227,8 +255,20 @@ static int add_master_key(struct super_block *sb,
 			goto out_unlock;
 		err = add_new_master_key(secret, mk_spec, sb->s_master_keys);
 	} else {
+		/*
+		 * Found the key in ->s_master_keys.  Re-add the secret if
+		 * needed.
+		 */
+		down_write(&key->sem);
+		err = add_existing_master_key(key->payload.data[0], secret);
+		up_write(&key->sem);
+		if (err == KEY_DEAD) {
+			/* Key being removed or needs to be removed */
+			key_invalidate(key);
+			key_put(key);
+			goto retry;
+		}
 		key_put(key);
-		err = 0;
 	}
 out_unlock:
 	mutex_unlock(&fscrypt_add_key_mutex);
@@ -280,6 +320,224 @@ out_wipe_secret:
 }
 EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key);
 
+/*
+ * Try to evict the inode's dentries from the dentry cache.  If the inode is a
+ * directory, then it can have at most one dentry; however, that dentry may be
+ * pinned by child dentries, so first try to evict the children too.
+ */
+static void shrink_dcache_inode(struct inode *inode)
+{
+	struct dentry *dentry;
+
+	if (S_ISDIR(inode->i_mode)) {
+		dentry = d_find_any_alias(inode);
+		if (dentry) {
+			shrink_dcache_parent(dentry);
+			dput(dentry);
+		}
+	}
+	d_prune_aliases(inode);
+}
+
+static void evict_dentries_for_decrypted_inodes(struct fscrypt_master_key *mk)
+{
+	struct fscrypt_info *ci;
+	struct inode *inode;
+	struct inode *toput_inode = NULL;
+
+	spin_lock(&mk->mk_decrypted_inodes_lock);
+
+	list_for_each_entry(ci, &mk->mk_decrypted_inodes, ci_master_key_link) {
+		inode = ci->ci_inode;
+		spin_lock(&inode->i_lock);
+		if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
+		__iget(inode);
+		spin_unlock(&inode->i_lock);
+		spin_unlock(&mk->mk_decrypted_inodes_lock);
+
+		shrink_dcache_inode(inode);
+		iput(toput_inode);
+		toput_inode = inode;
+
+		spin_lock(&mk->mk_decrypted_inodes_lock);
+	}
+
+	spin_unlock(&mk->mk_decrypted_inodes_lock);
+	iput(toput_inode);
+}
+
+static int check_for_busy_inodes(struct super_block *sb,
+				 struct fscrypt_master_key *mk)
+{
+	struct list_head *pos;
+	size_t busy_count = 0;
+	unsigned long ino;
+	struct dentry *dentry;
+	char _path[256];
+	char *path = NULL;
+
+	spin_lock(&mk->mk_decrypted_inodes_lock);
+
+	list_for_each(pos, &mk->mk_decrypted_inodes)
+		busy_count++;
+
+	if (busy_count == 0) {
+		spin_unlock(&mk->mk_decrypted_inodes_lock);
+		return 0;
+	}
+
+	{
+		/* select an example file to show for debugging purposes */
+		struct inode *inode =
+			list_first_entry(&mk->mk_decrypted_inodes,
+					 struct fscrypt_info,
+					 ci_master_key_link)->ci_inode;
+		ino = inode->i_ino;
+		dentry = d_find_alias(inode);
+	}
+	spin_unlock(&mk->mk_decrypted_inodes_lock);
+
+	if (dentry) {
+		path = dentry_path(dentry, _path, sizeof(_path));
+		dput(dentry);
+	}
+	if (IS_ERR_OR_NULL(path))
+		path = "(unknown)";
+
+	fscrypt_warn(NULL,
+		     "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)",
+		     sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec),
+		     master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u,
+		     ino, path);
+	return -EBUSY;
+}
+
+static int try_to_lock_encrypted_files(struct super_block *sb,
+				       struct fscrypt_master_key *mk)
+{
+	int err1;
+	int err2;
+
+	/*
+	 * An inode can't be evicted while it is dirty or has dirty pages.
+	 * Thus, we first have to clean the inodes in ->mk_decrypted_inodes.
+	 *
+	 * Just do it the easy way: call sync_filesystem().  It's overkill, but
+	 * it works, and it's more important to minimize the amount of caches we
+	 * drop than the amount of data we sync.  Also, unprivileged users can
+	 * already call sync_filesystem() via sys_syncfs() or sys_sync().
+	 */
+	down_read(&sb->s_umount);
+	err1 = sync_filesystem(sb);
+	up_read(&sb->s_umount);
+	/* If a sync error occurs, still try to evict as much as possible. */
+
+	/*
+	 * Inodes are pinned by their dentries, so we have to evict their
+	 * dentries.  shrink_dcache_sb() would suffice, but would be overkill
+	 * and inappropriate for use by unprivileged users.  So instead go
+	 * through the inodes' alias lists and try to evict each dentry.
+	 */
+	evict_dentries_for_decrypted_inodes(mk);
+
+	/*
+	 * evict_dentries_for_decrypted_inodes() already iput() each inode in
+	 * the list; any inodes for which that dropped the last reference will
+	 * have been evicted due to fscrypt_drop_inode() detecting the key
+	 * removal and telling the VFS to evict the inode.  So to finish, we
+	 * just need to check whether any inodes couldn't be evicted.
+	 */
+	err2 = check_for_busy_inodes(sb, mk);
+
+	return err1 ?: err2;
+}
+
+/*
+ * Try to remove an fscrypt master encryption key.
+ *
+ * First we wipe the actual master key secret, so that no more inodes can be
+ * unlocked with it.  Then we try to evict all cached inodes that had been
+ * unlocked with the key.
+ *
+ * If all inodes were evicted, then we unlink the fscrypt_master_key from the
+ * keyring.  Otherwise it remains in the keyring in the "incompletely removed"
+ * state (without the actual secret key) where it tracks the list of remaining
+ * inodes.  Userspace can execute the ioctl again later to retry eviction, or
+ * alternatively can re-add the secret key again.
+ *
+ * For more details, see the "Removing keys" section of
+ * Documentation/filesystems/fscrypt.rst.
+ */
+int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
+{
+	struct super_block *sb = file_inode(filp)->i_sb;
+	struct fscrypt_remove_key_arg __user *uarg = _uarg;
+	struct fscrypt_remove_key_arg arg;
+	struct key *key;
+	struct fscrypt_master_key *mk;
+	u32 status_flags = 0;
+	int err;
+	bool dead;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (!valid_key_spec(&arg.key_spec))
+		return -EINVAL;
+
+	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+
+	/* Find the key being removed. */
+	key = fscrypt_find_master_key(sb, &arg.key_spec);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+	mk = key->payload.data[0];
+
+	down_write(&key->sem);
+
+	/* Wipe the secret. */
+	dead = false;
+	if (is_master_key_secret_present(&mk->mk_secret)) {
+		wipe_master_key_secret(&mk->mk_secret);
+		dead = refcount_dec_and_test(&mk->mk_refcount);
+	}
+	up_write(&key->sem);
+	if (dead) {
+		/*
+		 * No inodes reference the key, and we wiped the secret, so the
+		 * key object is free to be removed from the keyring.
+		 */
+		key_invalidate(key);
+		err = 0;
+	} else {
+		/* Some inodes still reference this key; try to evict them. */
+		err = try_to_lock_encrypted_files(sb, mk);
+		if (err == -EBUSY) {
+			status_flags |=
+				FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY;
+			err = 0;
+		}
+	}
+	/*
+	 * We return 0 if we successfully did something: wiped the secret, or
+	 * tried locking the files again.  Users need to check the informational
+	 * status flags if they care whether the key has been fully removed
+	 * including all files locked.
+	 */
+	key_put(key);
+	if (err == 0)
+		err = put_user(status_flags, &uarg->removal_status_flags);
+	return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key);
+
 int __init fscrypt_init_keyring(void)
 {
 	return register_key_type(&key_type_fscrypt);
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 1c6d18bcdc7b..7b60a47fc73c 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -213,8 +213,16 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
 
 /*
  * Find the master key, then set up the inode's actual encryption key.
+ *
+ * If the master key is found in the filesystem-level keyring, then the
+ * corresponding 'struct key' is returned in *master_key_ret with
+ * ->sem read-locked.  This is needed to ensure that only one task links the
+ * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create
+ * an fscrypt_info for the same inode), and to synchronize the master key being
+ * removed with a new inode starting to use it.
  */
-static int setup_file_encryption_key(struct fscrypt_info *ci)
+static int setup_file_encryption_key(struct fscrypt_info *ci,
+				     struct key **master_key_ret)
 {
 	struct key *key;
 	struct fscrypt_master_key *mk = NULL;
@@ -234,6 +242,13 @@ static int setup_file_encryption_key(struct fscrypt_info *ci)
 	}
 
 	mk = key->payload.data[0];
+	down_read(&key->sem);
+
+	/* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
+	if (!is_master_key_secret_present(&mk->mk_secret)) {
+		err = -ENOKEY;
+		goto out_release_key;
+	}
 
 	if (mk->mk_secret.size < ci->ci_mode->keysize) {
 		fscrypt_warn(NULL,
@@ -246,14 +261,22 @@ static int setup_file_encryption_key(struct fscrypt_info *ci)
 	}
 
 	err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
+	if (err)
+		goto out_release_key;
+
+	*master_key_ret = key;
+	return 0;
 
 out_release_key:
+	up_read(&key->sem);
 	key_put(key);
 	return err;
 }
 
 static void put_crypt_info(struct fscrypt_info *ci)
 {
+	struct key *key;
+
 	if (!ci)
 		return;
 
@@ -263,6 +286,26 @@ static void put_crypt_info(struct fscrypt_info *ci)
 		crypto_free_skcipher(ci->ci_ctfm);
 		crypto_free_cipher(ci->ci_essiv_tfm);
 	}
+
+	key = ci->ci_master_key;
+	if (key) {
+		struct fscrypt_master_key *mk = key->payload.data[0];
+
+		/*
+		 * Remove this inode from the list of inodes that were unlocked
+		 * with the master key.
+		 *
+		 * In addition, if we're removing the last inode from a key that
+		 * already had its secret removed, invalidate the key so that it
+		 * gets removed from ->s_master_keys.
+		 */
+		spin_lock(&mk->mk_decrypted_inodes_lock);
+		list_del(&ci->ci_master_key_link);
+		spin_unlock(&mk->mk_decrypted_inodes_lock);
+		if (refcount_dec_and_test(&mk->mk_refcount))
+			key_invalidate(key);
+		key_put(key);
+	}
 	kmem_cache_free(fscrypt_info_cachep, ci);
 }
 
@@ -271,6 +314,7 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	struct fscrypt_info *crypt_info;
 	struct fscrypt_context ctx;
 	struct fscrypt_mode *mode;
+	struct key *master_key = NULL;
 	int res;
 
 	if (fscrypt_has_encryption_key(inode))
@@ -335,13 +379,30 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	WARN_ON(mode->ivsize > FSCRYPT_MAX_IV_SIZE);
 	crypt_info->ci_mode = mode;
 
-	res = setup_file_encryption_key(crypt_info);
+	res = setup_file_encryption_key(crypt_info, &master_key);
 	if (res)
 		goto out;
 
-	if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL)
+	if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) {
+		if (master_key) {
+			struct fscrypt_master_key *mk =
+				master_key->payload.data[0];
+
+			refcount_inc(&mk->mk_refcount);
+			crypt_info->ci_master_key = key_get(master_key);
+			spin_lock(&mk->mk_decrypted_inodes_lock);
+			list_add(&crypt_info->ci_master_key_link,
+				 &mk->mk_decrypted_inodes);
+			spin_unlock(&mk->mk_decrypted_inodes_lock);
+		}
 		crypt_info = NULL;
+	}
+	res = 0;
 out:
+	if (master_key) {
+		up_read(&master_key->sem);
+		key_put(master_key);
+	}
 	if (res == -ENOKEY)
 		res = 0;
 	put_crypt_info(crypt_info);
@@ -376,3 +437,39 @@ void fscrypt_free_inode(struct inode *inode)
 	}
 }
 EXPORT_SYMBOL(fscrypt_free_inode);
+
+/**
+ * fscrypt_drop_inode - check whether the inode's master key has been removed
+ *
+ * Filesystems supporting fscrypt must call this from their ->drop_inode()
+ * method so that encrypted inodes are evicted as soon as they're no longer in
+ * use and their master key has been removed.
+ *
+ * Return: 1 if fscrypt wants the inode to be evicted now, otherwise 0
+ */
+int fscrypt_drop_inode(struct inode *inode)
+{
+	const struct fscrypt_info *ci = READ_ONCE(inode->i_crypt_info);
+	const struct fscrypt_master_key *mk;
+
+	/*
+	 * If ci is NULL, then the inode doesn't have an encryption key set up
+	 * so it's irrelevant.  If ci_master_key is NULL, then the master key
+	 * was provided via the legacy mechanism of the process-subscribed
+	 * keyrings, so we don't know whether it's been removed or not.
+	 */
+	if (!ci || !ci->ci_master_key)
+		return 0;
+	mk = ci->ci_master_key->payload.data[0];
+
+	/*
+	 * Note: since we aren't holding key->sem, the result here can
+	 * immediately become outdated.  But there's no correctness problem with
+	 * unnecessarily evicting.  Nor is there a correctness problem with not
+	 * evicting while iput() is racing with the key being removed, since
+	 * then the thread removing the key will either evict the inode itself
+	 * or will correctly detect that it wasn't evicted due to the race.
+	 */
+	return !is_master_key_secret_present(&mk->mk_secret);
+}
+EXPORT_SYMBOL_GPL(fscrypt_drop_inode);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 46bf66cf76ef..b494c5f9c01f 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -141,11 +141,13 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *,
 /* keyring.c */
 extern void fscrypt_sb_free(struct super_block *sb);
 extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
+extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
 
 /* keysetup.c */
 extern int fscrypt_get_encryption_info(struct inode *);
 extern void fscrypt_put_encryption_info(struct inode *);
 extern void fscrypt_free_inode(struct inode *);
+extern int fscrypt_drop_inode(struct inode *inode);
 
 /* fname.c */
 extern int fscrypt_setup_filename(struct inode *, const struct qstr *,
@@ -381,6 +383,11 @@ static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg)
 	return -EOPNOTSUPP;
 }
 
+static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 /* keysetup.c */
 static inline int fscrypt_get_encryption_info(struct inode *inode)
 {
@@ -396,6 +403,11 @@ static inline void fscrypt_free_inode(struct inode *inode)
 {
 }
 
+static inline int fscrypt_drop_inode(struct inode *inode)
+{
+	return 0;
+}
+
  /* fname.c */
 static inline int fscrypt_setup_filename(struct inode *dir,
 					 const struct qstr *iname,
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 6aeca3cb0a2d..07f37a27a944 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -76,10 +76,19 @@ struct fscrypt_add_key_arg {
 	__u8 raw[];
 };
 
+/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */
+struct fscrypt_remove_key_arg {
+	struct fscrypt_key_specifier key_spec;
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY	0x00000001
+	__u32 removal_status_flags;	/* output */
+	__u32 __reserved[5];
+};
+
 #define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
 #define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
 #define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
 
 /**********************************************************************/
 
-- 
cgit v1.2.3


From 5a7e29924dac34f0690b06906aad9d70d3c022a5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:46 -0700
Subject: fscrypt: add FS_IOC_GET_ENCRYPTION_KEY_STATUS ioctl

Add a new fscrypt ioctl, FS_IOC_GET_ENCRYPTION_KEY_STATUS.  Given a key
specified by 'struct fscrypt_key_specifier' (the same way a key is
specified for the other fscrypt key management ioctls), it returns
status information in a 'struct fscrypt_get_key_status_arg'.

The main motivation for this is that applications need to be able to
check whether an encrypted directory is "unlocked" or not, so that they
can add the key if it is not, and avoid adding the key (which may
involve prompting the user for a passphrase) if it already is.

It's possible to use some workarounds such as checking whether opening a
regular file fails with ENOKEY, or checking whether the filenames "look
like gibberish" or not.  However, no workaround is usable in all cases.

Like the other key management ioctls, the keyrings syscalls may seem at
first to be a good fit for this.  Unfortunately, they are not.  Even if
we exposed the keyring ID of the ->s_master_keys keyring and gave
everyone Search permission on it (note: currently the keyrings
permission system would also allow everyone to "invalidate" the keyring
too), the fscrypt keys have an additional state that doesn't map cleanly
to the keyrings API: the secret can be removed, but we can be still
tracking the files that were using the key, and the removal can be
re-attempted or the secret added again.

After later patches, some applications will also need a way to determine
whether a key was added by the current user vs. by some other user.
Reserved fields are included in fscrypt_get_key_status_arg for this and
other future extensions.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/keyring.c          | 63 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fscrypt.h      |  7 +++++
 include/uapi/linux/fscrypt.h | 15 +++++++++++
 3 files changed, 85 insertions(+)

(limited to 'include')

diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index c3423f0edc70..8c94c2fa0c13 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -11,6 +11,7 @@
  *
  * - FS_IOC_ADD_ENCRYPTION_KEY
  * - FS_IOC_REMOVE_ENCRYPTION_KEY
+ * - FS_IOC_GET_ENCRYPTION_KEY_STATUS
  *
  * See the "User API" section of Documentation/filesystems/fscrypt.rst for more
  * information about these ioctls.
@@ -538,6 +539,68 @@ int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
 }
 EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key);
 
+/*
+ * Retrieve the status of an fscrypt master encryption key.
+ *
+ * We set ->status to indicate whether the key is absent, present, or
+ * incompletely removed.  "Incompletely removed" means that the master key
+ * secret has been removed, but some files which had been unlocked with it are
+ * still in use.  This field allows applications to easily determine the state
+ * of an encrypted directory without using a hack such as trying to open a
+ * regular file in it (which can confuse the "incompletely removed" state with
+ * absent or present).
+ *
+ * For more details, see the "FS_IOC_GET_ENCRYPTION_KEY_STATUS" section of
+ * Documentation/filesystems/fscrypt.rst.
+ */
+int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
+{
+	struct super_block *sb = file_inode(filp)->i_sb;
+	struct fscrypt_get_key_status_arg arg;
+	struct key *key;
+	struct fscrypt_master_key *mk;
+	int err;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (!valid_key_spec(&arg.key_spec))
+		return -EINVAL;
+
+	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
+		return -EINVAL;
+
+	memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved));
+
+	key = fscrypt_find_master_key(sb, &arg.key_spec);
+	if (IS_ERR(key)) {
+		if (key != ERR_PTR(-ENOKEY))
+			return PTR_ERR(key);
+		arg.status = FSCRYPT_KEY_STATUS_ABSENT;
+		err = 0;
+		goto out;
+	}
+	mk = key->payload.data[0];
+	down_read(&key->sem);
+
+	if (!is_master_key_secret_present(&mk->mk_secret)) {
+		arg.status = FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED;
+		err = 0;
+		goto out_release_key;
+	}
+
+	arg.status = FSCRYPT_KEY_STATUS_PRESENT;
+	err = 0;
+out_release_key:
+	up_read(&key->sem);
+	key_put(key);
+out:
+	if (!err && copy_to_user(uarg, &arg, sizeof(arg)))
+		err = -EFAULT;
+	return err;
+}
+EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_key_status);
+
 int __init fscrypt_init_keyring(void)
 {
 	return register_key_type(&key_type_fscrypt);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index b494c5f9c01f..6628d09585bd 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -142,6 +142,7 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *,
 extern void fscrypt_sb_free(struct super_block *sb);
 extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
 extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
+extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
 
 /* keysetup.c */
 extern int fscrypt_get_encryption_info(struct inode *);
@@ -388,6 +389,12 @@ static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
 	return -EOPNOTSUPP;
 }
 
+static inline int fscrypt_ioctl_get_key_status(struct file *filp,
+					       void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 /* keysetup.c */
 static inline int fscrypt_get_encryption_info(struct inode *inode)
 {
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 07f37a27a944..ed5995b15016 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -84,11 +84,26 @@ struct fscrypt_remove_key_arg {
 	__u32 __reserved[5];
 };
 
+/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */
+struct fscrypt_get_key_status_arg {
+	/* input */
+	struct fscrypt_key_specifier key_spec;
+	__u32 __reserved[6];
+
+	/* output */
+#define FSCRYPT_KEY_STATUS_ABSENT		1
+#define FSCRYPT_KEY_STATUS_PRESENT		2
+#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED	3
+	__u32 status;
+	__u32 __out_reserved[15];
+};
+
 #define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
 #define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
 #define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
 #define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
+#define FS_IOC_GET_ENCRYPTION_KEY_STATUS	_IOWR('f', 26, struct fscrypt_get_key_status_arg)
 
 /**********************************************************************/
 
-- 
cgit v1.2.3


From 5dae460c2292dbbdac3a7a982cd566f470d957a2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:47 -0700
Subject: fscrypt: v2 encryption policy support

Add a new fscrypt policy version, "v2".  It has the following changes
from the original policy version, which we call "v1" (*):

- Master keys (the user-provided encryption keys) are only ever used as
  input to HKDF-SHA512.  This is more flexible and less error-prone, and
  it avoids the quirks and limitations of the AES-128-ECB based KDF.
  Three classes of cryptographically isolated subkeys are defined:

    - Per-file keys, like used in v1 policies except for the new KDF.

    - Per-mode keys.  These implement the semantics of the DIRECT_KEY
      flag, which for v1 policies made the master key be used directly.
      These are also planned to be used for inline encryption when
      support for it is added.

    - Key identifiers (see below).

- Each master key is identified by a 16-byte master_key_identifier,
  which is derived from the key itself using HKDF-SHA512.  This prevents
  users from associating the wrong key with an encrypted file or
  directory.  This was easily possible with v1 policies, which
  identified the key by an arbitrary 8-byte master_key_descriptor.

- The key must be provided in the filesystem-level keyring, not in a
  process-subscribed keyring.

The following UAPI additions are made:

- The existing ioctl FS_IOC_SET_ENCRYPTION_POLICY can now be passed a
  fscrypt_policy_v2 to set a v2 encryption policy.  It's disambiguated
  from fscrypt_policy/fscrypt_policy_v1 by the version code prefix.

- A new ioctl FS_IOC_GET_ENCRYPTION_POLICY_EX is added.  It allows
  getting the v1 or v2 encryption policy of an encrypted file or
  directory.  The existing FS_IOC_GET_ENCRYPTION_POLICY ioctl could not
  be used because it did not have a way for userspace to indicate which
  policy structure is expected.  The new ioctl includes a size field, so
  it is extensible to future fscrypt policy versions.

- The ioctls FS_IOC_ADD_ENCRYPTION_KEY, FS_IOC_REMOVE_ENCRYPTION_KEY,
  and FS_IOC_GET_ENCRYPTION_KEY_STATUS now support managing keys for v2
  encryption policies.  Such keys are kept logically separate from keys
  for v1 encryption policies, and are identified by 'identifier' rather
  than by 'descriptor'.  The 'identifier' need not be provided when
  adding a key, since the kernel will calculate it anyway.

This patch temporarily keeps adding/removing v2 policy keys behind the
same permission check done for adding/removing v1 policy keys:
capable(CAP_SYS_ADMIN).  However, the next patch will carefully take
advantage of the cryptographically secure master_key_identifier to allow
non-root users to add/remove v2 policy keys, thus providing a full
replacement for v1 policies.

(*) Actually, in the API fscrypt_policy::version is 0 while on-disk
    fscrypt_context::format is 1.  But I believe it makes the most sense
    to advance both to '2' to have them be in sync, and to consider the
    numbering to start at 1 except for the API quirk.

Reviewed-by: Paul Crowley <paulcrowley@google.com>
Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/crypto.c           |   2 +-
 fs/crypto/fname.c            |   3 +-
 fs/crypto/fscrypt_private.h  | 187 ++++++++++++++++---
 fs/crypto/keyring.c          |  35 +++-
 fs/crypto/keysetup.c         | 202 ++++++++++++++++-----
 fs/crypto/keysetup_v1.c      |  18 +-
 fs/crypto/policy.c           | 422 ++++++++++++++++++++++++++++++++-----------
 include/linux/fscrypt.h      |   9 +-
 include/uapi/linux/fscrypt.h |  57 +++++-
 9 files changed, 741 insertions(+), 194 deletions(-)

(limited to 'include')

diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 65ca077e8d58..32a7ad0098cc 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -141,7 +141,7 @@ void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
 	memset(iv, 0, ci->ci_mode->ivsize);
 	iv->lblk_num = cpu_to_le64(lblk_num);
 
-	if (ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
+	if (fscrypt_is_direct_key_policy(&ci->ci_policy))
 		memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
 
 	if (ci->ci_essiv_tfm != NULL)
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index f4977d44d69b..3da3707c10e3 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -181,7 +181,8 @@ static int base64_decode(const char *src, int len, u8 *dst)
 bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len,
 				  u32 max_len, u32 *encrypted_len_ret)
 {
-	int padding = 4 << (inode->i_crypt_info->ci_flags &
+	const struct fscrypt_info *ci = inode->i_crypt_info;
+	int padding = 4 << (fscrypt_policy_flags(&ci->ci_policy) &
 			    FSCRYPT_POLICY_FLAGS_PAD_MASK);
 	u32 encrypted_len;
 
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 9556e9499dc5..c89e37d38e42 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -20,27 +20,127 @@
 
 #define FSCRYPT_MIN_KEY_SIZE		16
 
-/**
- * Encryption context for inode
- *
- * Protector format:
- *  1 byte: Protector format (1 = this version)
- *  1 byte: File contents encryption mode
- *  1 byte: File names encryption mode
- *  1 byte: Flags
- *  8 bytes: Master Key descriptor
- *  16 bytes: Encryption Key derivation nonce
- */
-struct fscrypt_context {
-	u8 format;
+#define FSCRYPT_CONTEXT_V1	1
+#define FSCRYPT_CONTEXT_V2	2
+
+struct fscrypt_context_v1 {
+	u8 version; /* FSCRYPT_CONTEXT_V1 */
 	u8 contents_encryption_mode;
 	u8 filenames_encryption_mode;
 	u8 flags;
 	u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
-} __packed;
+};
 
-#define FS_ENCRYPTION_CONTEXT_FORMAT_V1		1
+struct fscrypt_context_v2 {
+	u8 version; /* FSCRYPT_CONTEXT_V2 */
+	u8 contents_encryption_mode;
+	u8 filenames_encryption_mode;
+	u8 flags;
+	u8 __reserved[4];
+	u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+	u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE];
+};
+
+/**
+ * fscrypt_context - the encryption context of an inode
+ *
+ * This is the on-disk equivalent of an fscrypt_policy, stored alongside each
+ * encrypted file usually in a hidden extended attribute.  It contains the
+ * fields from the fscrypt_policy, in order to identify the encryption algorithm
+ * and key with which the file is encrypted.  It also contains a nonce that was
+ * randomly generated by fscrypt itself; this is used as KDF input or as a tweak
+ * to cause different files to be encrypted differently.
+ */
+union fscrypt_context {
+	u8 version;
+	struct fscrypt_context_v1 v1;
+	struct fscrypt_context_v2 v2;
+};
+
+/*
+ * Return the size expected for the given fscrypt_context based on its version
+ * number, or 0 if the context version is unrecognized.
+ */
+static inline int fscrypt_context_size(const union fscrypt_context *ctx)
+{
+	switch (ctx->version) {
+	case FSCRYPT_CONTEXT_V1:
+		BUILD_BUG_ON(sizeof(ctx->v1) != 28);
+		return sizeof(ctx->v1);
+	case FSCRYPT_CONTEXT_V2:
+		BUILD_BUG_ON(sizeof(ctx->v2) != 40);
+		return sizeof(ctx->v2);
+	}
+	return 0;
+}
+
+#undef fscrypt_policy
+union fscrypt_policy {
+	u8 version;
+	struct fscrypt_policy_v1 v1;
+	struct fscrypt_policy_v2 v2;
+};
+
+/*
+ * Return the size expected for the given fscrypt_policy based on its version
+ * number, or 0 if the policy version is unrecognized.
+ */
+static inline int fscrypt_policy_size(const union fscrypt_policy *policy)
+{
+	switch (policy->version) {
+	case FSCRYPT_POLICY_V1:
+		return sizeof(policy->v1);
+	case FSCRYPT_POLICY_V2:
+		return sizeof(policy->v2);
+	}
+	return 0;
+}
+
+/* Return the contents encryption mode of a valid encryption policy */
+static inline u8
+fscrypt_policy_contents_mode(const union fscrypt_policy *policy)
+{
+	switch (policy->version) {
+	case FSCRYPT_POLICY_V1:
+		return policy->v1.contents_encryption_mode;
+	case FSCRYPT_POLICY_V2:
+		return policy->v2.contents_encryption_mode;
+	}
+	BUG();
+}
+
+/* Return the filenames encryption mode of a valid encryption policy */
+static inline u8
+fscrypt_policy_fnames_mode(const union fscrypt_policy *policy)
+{
+	switch (policy->version) {
+	case FSCRYPT_POLICY_V1:
+		return policy->v1.filenames_encryption_mode;
+	case FSCRYPT_POLICY_V2:
+		return policy->v2.filenames_encryption_mode;
+	}
+	BUG();
+}
+
+/* Return the flags (FSCRYPT_POLICY_FLAG*) of a valid encryption policy */
+static inline u8
+fscrypt_policy_flags(const union fscrypt_policy *policy)
+{
+	switch (policy->version) {
+	case FSCRYPT_POLICY_V1:
+		return policy->v1.flags;
+	case FSCRYPT_POLICY_V2:
+		return policy->v2.flags;
+	}
+	BUG();
+}
+
+static inline bool
+fscrypt_is_direct_key_policy(const union fscrypt_policy *policy)
+{
+	return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY;
+}
 
 /**
  * For encrypted symlinks, the ciphertext length is stored at the beginning
@@ -70,8 +170,8 @@ struct fscrypt_info {
 	struct crypto_cipher *ci_essiv_tfm;
 
 	/*
-	 * Encryption mode used for this inode.  It corresponds to either
-	 * ci_data_mode or ci_filename_mode, depending on the inode type.
+	 * Encryption mode used for this inode.  It corresponds to either the
+	 * contents or filenames encryption mode, depending on the inode type.
 	 */
 	struct fscrypt_mode *ci_mode;
 
@@ -97,11 +197,10 @@ struct fscrypt_info {
 	 */
 	struct fscrypt_direct_key *ci_direct_key;
 
-	/* fields from the fscrypt_context */
-	u8 ci_data_mode;
-	u8 ci_filename_mode;
-	u8 ci_flags;
-	u8 ci_master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+	/* The encryption policy used by this inode */
+	union fscrypt_policy ci_policy;
+
+	/* This inode's nonce, copied from the fscrypt_context */
 	u8 ci_nonce[FS_KEY_DERIVATION_NONCE_SIZE];
 };
 
@@ -181,6 +280,17 @@ struct fscrypt_hkdf {
 extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
 			     unsigned int master_key_size);
 
+/*
+ * The list of contexts in which fscrypt uses HKDF.  These values are used as
+ * the first byte of the HKDF application-specific info string to guarantee that
+ * info strings are never repeated between contexts.  This ensures that all HKDF
+ * outputs are unique and cryptographically isolated, i.e. knowledge of one
+ * output doesn't reveal another.
+ */
+#define HKDF_CONTEXT_KEY_IDENTIFIER	1
+#define HKDF_CONTEXT_PER_FILE_KEY	2
+#define HKDF_CONTEXT_PER_MODE_KEY	3
+
 extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context,
 			       const u8 *info, unsigned int infolen,
 			       u8 *okm, unsigned int okmlen);
@@ -194,10 +304,16 @@ extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf);
  */
 struct fscrypt_master_key_secret {
 
-	/* Size of the raw key in bytes */
+	/*
+	 * For v2 policy keys: HKDF context keyed by this master key.
+	 * For v1 policy keys: not set (hkdf.hmac_tfm == NULL).
+	 */
+	struct fscrypt_hkdf	hkdf;
+
+	/* Size of the raw key in bytes.  Set even if ->raw isn't set. */
 	u32			size;
 
-	/* The raw key */
+	/* For v1 policy keys: the raw key.  Wiped for v2 policy keys. */
 	u8			raw[FSCRYPT_MAX_KEY_SIZE];
 
 } __randomize_layout;
@@ -223,7 +339,12 @@ struct fscrypt_master_key {
 	 */
 	struct fscrypt_master_key_secret	mk_secret;
 
-	/* Arbitrary key descriptor which was assigned by userspace */
+	/*
+	 * For v1 policy keys: an arbitrary key descriptor which was assigned by
+	 * userspace (->descriptor).
+	 *
+	 * For v2 policy keys: a cryptographic hash of this key (->identifier).
+	 */
 	struct fscrypt_key_specifier		mk_spec;
 
 	/*
@@ -242,6 +363,9 @@ struct fscrypt_master_key {
 	struct list_head	mk_decrypted_inodes;
 	spinlock_t		mk_decrypted_inodes_lock;
 
+	/* Per-mode tfms for DIRECT_KEY policies, allocated on-demand */
+	struct crypto_skcipher	*mk_mode_keys[__FSCRYPT_MODE_MAX + 1];
+
 } __randomize_layout;
 
 static inline bool
@@ -263,6 +387,8 @@ static inline const char *master_key_spec_type(
 	switch (spec->type) {
 	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
 		return "descriptor";
+	case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
+		return "identifier";
 	}
 	return "[unknown]";
 }
@@ -272,6 +398,8 @@ static inline int master_key_spec_len(const struct fscrypt_key_specifier *spec)
 	switch (spec->type) {
 	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
 		return FSCRYPT_KEY_DESCRIPTOR_SIZE;
+	case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
+		return FSCRYPT_KEY_IDENTIFIER_SIZE;
 	}
 	return 0;
 }
@@ -315,5 +443,14 @@ extern int fscrypt_setup_v1_file_key(struct fscrypt_info *ci,
 
 extern int fscrypt_setup_v1_file_key_via_subscribed_keyrings(
 					struct fscrypt_info *ci);
+/* policy.c */
+
+extern bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
+				   const union fscrypt_policy *policy2);
+extern bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
+				     const struct inode *inode);
+extern int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
+				       const union fscrypt_context *ctx_u,
+				       int ctx_size);
 
 #endif /* _FSCRYPT_PRIVATE_H */
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 8c94c2fa0c13..fca3bdf01e7c 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -17,6 +17,7 @@
  * information about these ioctls.
  */
 
+#include <crypto/skcipher.h>
 #include <linux/key-type.h>
 #include <linux/seq_file.h>
 
@@ -24,6 +25,7 @@
 
 static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret)
 {
+	fscrypt_destroy_hkdf(&secret->hkdf);
 	memzero_explicit(secret, sizeof(*secret));
 }
 
@@ -36,7 +38,13 @@ static void move_master_key_secret(struct fscrypt_master_key_secret *dst,
 
 static void free_master_key(struct fscrypt_master_key *mk)
 {
+	size_t i;
+
 	wipe_master_key_secret(&mk->mk_secret);
+
+	for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++)
+		crypto_free_skcipher(mk->mk_mode_keys[i]);
+
 	kzfree(mk);
 }
 
@@ -109,7 +117,7 @@ static struct key *search_fscrypt_keyring(struct key *keyring,
 #define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE	\
 	(CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id))
 
-#define FSCRYPT_MK_DESCRIPTION_SIZE	(2 * FSCRYPT_KEY_DESCRIPTOR_SIZE + 1)
+#define FSCRYPT_MK_DESCRIPTION_SIZE	(2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
 
 static void format_fs_keyring_description(
 			char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
@@ -314,6 +322,31 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
 	if (!capable(CAP_SYS_ADMIN))
 		goto out_wipe_secret;
 
+	if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
+		err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
+		if (err)
+			goto out_wipe_secret;
+
+		/*
+		 * Now that the HKDF context is initialized, the raw key is no
+		 * longer needed.
+		 */
+		memzero_explicit(secret.raw, secret.size);
+
+		/* Calculate the key identifier and return it to userspace. */
+		err = fscrypt_hkdf_expand(&secret.hkdf,
+					  HKDF_CONTEXT_KEY_IDENTIFIER,
+					  NULL, 0, arg.key_spec.u.identifier,
+					  FSCRYPT_KEY_IDENTIFIER_SIZE);
+		if (err)
+			goto out_wipe_secret;
+		err = -EFAULT;
+		if (copy_to_user(uarg->key_spec.u.identifier,
+				 arg.key_spec.u.identifier,
+				 FSCRYPT_KEY_IDENTIFIER_SIZE))
+			goto out_wipe_secret;
+	}
+
 	err = add_master_key(sb, &secret, &arg.key_spec);
 out_wipe_secret:
 	wipe_master_key_secret(&secret);
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index 7b60a47fc73c..f423d48264db 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -52,20 +52,14 @@ static struct fscrypt_mode available_modes[] = {
 };
 
 static struct fscrypt_mode *
-select_encryption_mode(const struct fscrypt_info *ci, const struct inode *inode)
+select_encryption_mode(const union fscrypt_policy *policy,
+		       const struct inode *inode)
 {
-	if (!fscrypt_valid_enc_modes(ci->ci_data_mode, ci->ci_filename_mode)) {
-		fscrypt_warn(inode,
-			     "Unsupported encryption modes (contents mode %d, filenames mode %d)",
-			     ci->ci_data_mode, ci->ci_filename_mode);
-		return ERR_PTR(-EINVAL);
-	}
-
 	if (S_ISREG(inode->i_mode))
-		return &available_modes[ci->ci_data_mode];
+		return &available_modes[fscrypt_policy_contents_mode(policy)];
 
 	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		return &available_modes[ci->ci_filename_mode];
+		return &available_modes[fscrypt_policy_fnames_mode(policy)];
 
 	WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n",
 		  inode->i_ino, (inode->i_mode & S_IFMT));
@@ -211,6 +205,82 @@ int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
 	return 0;
 }
 
+static int setup_per_mode_key(struct fscrypt_info *ci,
+			      struct fscrypt_master_key *mk)
+{
+	struct fscrypt_mode *mode = ci->ci_mode;
+	u8 mode_num = mode - available_modes;
+	struct crypto_skcipher *tfm, *prev_tfm;
+	u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
+	int err;
+
+	if (WARN_ON(mode_num >= ARRAY_SIZE(mk->mk_mode_keys)))
+		return -EINVAL;
+
+	/* pairs with cmpxchg() below */
+	tfm = READ_ONCE(mk->mk_mode_keys[mode_num]);
+	if (likely(tfm != NULL))
+		goto done;
+
+	BUILD_BUG_ON(sizeof(mode_num) != 1);
+	err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
+				  HKDF_CONTEXT_PER_MODE_KEY,
+				  &mode_num, sizeof(mode_num),
+				  mode_key, mode->keysize);
+	if (err)
+		return err;
+	tfm = fscrypt_allocate_skcipher(mode, mode_key, ci->ci_inode);
+	memzero_explicit(mode_key, mode->keysize);
+	if (IS_ERR(tfm))
+		return PTR_ERR(tfm);
+
+	/* pairs with READ_ONCE() above */
+	prev_tfm = cmpxchg(&mk->mk_mode_keys[mode_num], NULL, tfm);
+	if (prev_tfm != NULL) {
+		crypto_free_skcipher(tfm);
+		tfm = prev_tfm;
+	}
+done:
+	ci->ci_ctfm = tfm;
+	return 0;
+}
+
+static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
+				     struct fscrypt_master_key *mk)
+{
+	u8 derived_key[FSCRYPT_MAX_KEY_SIZE];
+	int err;
+
+	if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
+		/*
+		 * DIRECT_KEY: instead of deriving per-file keys, the per-file
+		 * nonce will be included in all the IVs.  But unlike v1
+		 * policies, for v2 policies in this case we don't encrypt with
+		 * the master key directly but rather derive a per-mode key.
+		 * This ensures that the master key is consistently used only
+		 * for HKDF, avoiding key reuse issues.
+		 */
+		if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) {
+			fscrypt_warn(ci->ci_inode,
+				     "Direct key flag not allowed with %s",
+				     ci->ci_mode->friendly_name);
+			return -EINVAL;
+		}
+		return setup_per_mode_key(ci, mk);
+	}
+
+	err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
+				  HKDF_CONTEXT_PER_FILE_KEY,
+				  ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE,
+				  derived_key, ci->ci_mode->keysize);
+	if (err)
+		return err;
+
+	err = fscrypt_set_derived_key(ci, derived_key);
+	memzero_explicit(derived_key, ci->ci_mode->keysize);
+	return err;
+}
+
 /*
  * Find the master key, then set up the inode's actual encryption key.
  *
@@ -229,15 +299,36 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
 	struct fscrypt_key_specifier mk_spec;
 	int err;
 
-	mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
-	memcpy(mk_spec.u.descriptor, ci->ci_master_key_descriptor,
-	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+	switch (ci->ci_policy.version) {
+	case FSCRYPT_POLICY_V1:
+		mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR;
+		memcpy(mk_spec.u.descriptor,
+		       ci->ci_policy.v1.master_key_descriptor,
+		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
+		break;
+	case FSCRYPT_POLICY_V2:
+		mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER;
+		memcpy(mk_spec.u.identifier,
+		       ci->ci_policy.v2.master_key_identifier,
+		       FSCRYPT_KEY_IDENTIFIER_SIZE);
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
 
 	key = fscrypt_find_master_key(ci->ci_inode->i_sb, &mk_spec);
 	if (IS_ERR(key)) {
-		if (key != ERR_PTR(-ENOKEY))
+		if (key != ERR_PTR(-ENOKEY) ||
+		    ci->ci_policy.version != FSCRYPT_POLICY_V1)
 			return PTR_ERR(key);
 
+		/*
+		 * As a legacy fallback for v1 policies, search for the key in
+		 * the current task's subscribed keyrings too.  Don't move this
+		 * to before the search of ->s_master_keys, since users
+		 * shouldn't be able to override filesystem-level keys.
+		 */
 		return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci);
 	}
 
@@ -250,6 +341,12 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
 		goto out_release_key;
 	}
 
+	/*
+	 * Require that the master key be at least as long as the derived key.
+	 * Otherwise, the derived key cannot possibly contain as much entropy as
+	 * that required by the encryption mode it will be used for.  For v1
+	 * policies it's also required for the KDF to work at all.
+	 */
 	if (mk->mk_secret.size < ci->ci_mode->keysize) {
 		fscrypt_warn(NULL,
 			     "key with %s %*phN is too short (got %u bytes, need %u+ bytes)",
@@ -260,7 +357,18 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
 		goto out_release_key;
 	}
 
-	err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
+	switch (ci->ci_policy.version) {
+	case FSCRYPT_POLICY_V1:
+		err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw);
+		break;
+	case FSCRYPT_POLICY_V2:
+		err = fscrypt_setup_v2_file_key(ci, mk);
+		break;
+	default:
+		WARN_ON(1);
+		err = -EINVAL;
+		break;
+	}
 	if (err)
 		goto out_release_key;
 
@@ -282,7 +390,8 @@ static void put_crypt_info(struct fscrypt_info *ci)
 
 	if (ci->ci_direct_key) {
 		fscrypt_put_direct_key(ci->ci_direct_key);
-	} else {
+	} else if ((ci->ci_ctfm != NULL || ci->ci_essiv_tfm != NULL) &&
+		   !fscrypt_is_direct_key_policy(&ci->ci_policy)) {
 		crypto_free_skcipher(ci->ci_ctfm);
 		crypto_free_cipher(ci->ci_essiv_tfm);
 	}
@@ -312,7 +421,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
 int fscrypt_get_encryption_info(struct inode *inode)
 {
 	struct fscrypt_info *crypt_info;
-	struct fscrypt_context ctx;
+	union fscrypt_context ctx;
 	struct fscrypt_mode *mode;
 	struct key *master_key = NULL;
 	int res;
@@ -335,27 +444,12 @@ int fscrypt_get_encryption_info(struct inode *inode)
 		}
 		/* Fake up a context for an unencrypted directory */
 		memset(&ctx, 0, sizeof(ctx));
-		ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-		ctx.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
-		ctx.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
-		memset(ctx.master_key_descriptor, 0x42,
+		ctx.version = FSCRYPT_CONTEXT_V1;
+		ctx.v1.contents_encryption_mode = FSCRYPT_MODE_AES_256_XTS;
+		ctx.v1.filenames_encryption_mode = FSCRYPT_MODE_AES_256_CTS;
+		memset(ctx.v1.master_key_descriptor, 0x42,
 		       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	} else if (res != sizeof(ctx)) {
-		fscrypt_warn(inode,
-			     "Unknown encryption context size (%d bytes)", res);
-		return -EINVAL;
-	}
-
-	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1) {
-		fscrypt_warn(inode, "Unknown encryption context version (%d)",
-			     ctx.format);
-		return -EINVAL;
-	}
-
-	if (ctx.flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
-		fscrypt_warn(inode, "Unknown encryption context flags (0x%02x)",
-			     ctx.flags);
-		return -EINVAL;
+		res = sizeof(ctx.v1);
 	}
 
 	crypt_info = kmem_cache_zalloc(fscrypt_info_cachep, GFP_NOFS);
@@ -364,14 +458,34 @@ int fscrypt_get_encryption_info(struct inode *inode)
 
 	crypt_info->ci_inode = inode;
 
-	crypt_info->ci_flags = ctx.flags;
-	crypt_info->ci_data_mode = ctx.contents_encryption_mode;
-	crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
-	memcpy(crypt_info->ci_master_key_descriptor, ctx.master_key_descriptor,
-	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	memcpy(crypt_info->ci_nonce, ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+	res = fscrypt_policy_from_context(&crypt_info->ci_policy, &ctx, res);
+	if (res) {
+		fscrypt_warn(inode,
+			     "Unrecognized or corrupt encryption context");
+		goto out;
+	}
+
+	switch (ctx.version) {
+	case FSCRYPT_CONTEXT_V1:
+		memcpy(crypt_info->ci_nonce, ctx.v1.nonce,
+		       FS_KEY_DERIVATION_NONCE_SIZE);
+		break;
+	case FSCRYPT_CONTEXT_V2:
+		memcpy(crypt_info->ci_nonce, ctx.v2.nonce,
+		       FS_KEY_DERIVATION_NONCE_SIZE);
+		break;
+	default:
+		WARN_ON(1);
+		res = -EINVAL;
+		goto out;
+	}
+
+	if (!fscrypt_supported_policy(&crypt_info->ci_policy, inode)) {
+		res = -EINVAL;
+		goto out;
+	}
 
-	mode = select_encryption_mode(crypt_info, inode);
+	mode = select_encryption_mode(&crypt_info->ci_policy, inode);
 	if (IS_ERR(mode)) {
 		res = PTR_ERR(mode);
 		goto out;
diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c
index 631690bb6ed5..ad1a36c370c3 100644
--- a/fs/crypto/keysetup_v1.c
+++ b/fs/crypto/keysetup_v1.c
@@ -189,12 +189,13 @@ find_or_insert_direct_key(struct fscrypt_direct_key *to_insert,
 	 */
 
 	BUILD_BUG_ON(sizeof(hash_key) > FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	memcpy(&hash_key, ci->ci_master_key_descriptor, sizeof(hash_key));
+	memcpy(&hash_key, ci->ci_policy.v1.master_key_descriptor,
+	       sizeof(hash_key));
 
 	spin_lock(&fscrypt_direct_keys_lock);
 	hash_for_each_possible(fscrypt_direct_keys, dk, dk_node, hash_key) {
-		if (memcmp(ci->ci_master_key_descriptor, dk->dk_descriptor,
-			   FSCRYPT_KEY_DESCRIPTOR_SIZE) != 0)
+		if (memcmp(ci->ci_policy.v1.master_key_descriptor,
+			   dk->dk_descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE) != 0)
 			continue;
 		if (ci->ci_mode != dk->dk_mode)
 			continue;
@@ -237,7 +238,7 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key)
 		dk->dk_ctfm = NULL;
 		goto err_free_dk;
 	}
-	memcpy(dk->dk_descriptor, ci->ci_master_key_descriptor,
+	memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor,
 	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
 	memcpy(dk->dk_raw, raw_key, ci->ci_mode->keysize);
 
@@ -262,7 +263,8 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci,
 		return -EINVAL;
 	}
 
-	if (ci->ci_data_mode != ci->ci_filename_mode) {
+	if (ci->ci_policy.v1.contents_encryption_mode !=
+	    ci->ci_policy.v1.filenames_encryption_mode) {
 		fscrypt_warn(ci->ci_inode,
 			     "Direct key mode not allowed with different contents and filenames modes");
 		return -EINVAL;
@@ -308,7 +310,7 @@ out:
 
 int fscrypt_setup_v1_file_key(struct fscrypt_info *ci, const u8 *raw_master_key)
 {
-	if (ci->ci_flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
+	if (ci->ci_policy.v1.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY)
 		return setup_v1_file_key_direct(ci, raw_master_key);
 	else
 		return setup_v1_file_key_derived(ci, raw_master_key);
@@ -321,11 +323,11 @@ int fscrypt_setup_v1_file_key_via_subscribed_keyrings(struct fscrypt_info *ci)
 	int err;
 
 	key = find_and_lock_process_key(FSCRYPT_KEY_DESC_PREFIX,
-					ci->ci_master_key_descriptor,
+					ci->ci_policy.v1.master_key_descriptor,
 					ci->ci_mode->keysize, &payload);
 	if (key == ERR_PTR(-ENOKEY) && ci->ci_inode->i_sb->s_cop->key_prefix) {
 		key = find_and_lock_process_key(ci->ci_inode->i_sb->s_cop->key_prefix,
-						ci->ci_master_key_descriptor,
+						ci->ci_policy.v1.master_key_descriptor,
 						ci->ci_mode->keysize, &payload);
 	}
 	if (IS_ERR(key))
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index da7ae9c8b4ad..0141d338c1fd 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -5,8 +5,9 @@
  * Copyright (C) 2015, Google, Inc.
  * Copyright (C) 2015, Motorola Mobility.
  *
- * Written by Michael Halcrow, 2015.
+ * Originally written by Michael Halcrow, 2015.
  * Modified by Jaegeuk Kim, 2015.
+ * Modified by Eric Biggers, 2019 for v2 policy support.
  */
 
 #include <linux/random.h>
@@ -14,70 +15,291 @@
 #include <linux/mount.h>
 #include "fscrypt_private.h"
 
-/*
- * check whether an encryption policy is consistent with an encryption context
+/**
+ * fscrypt_policies_equal - check whether two encryption policies are the same
+ *
+ * Return: %true if equal, else %false
  */
-static bool is_encryption_context_consistent_with_policy(
-				const struct fscrypt_context *ctx,
-				const struct fscrypt_policy *policy)
+bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
+			    const union fscrypt_policy *policy2)
 {
-	return memcmp(ctx->master_key_descriptor, policy->master_key_descriptor,
-		      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(ctx->flags == policy->flags) &&
-		(ctx->contents_encryption_mode ==
-		 policy->contents_encryption_mode) &&
-		(ctx->filenames_encryption_mode ==
-		 policy->filenames_encryption_mode);
+	if (policy1->version != policy2->version)
+		return false;
+
+	return !memcmp(policy1, policy2, fscrypt_policy_size(policy1));
 }
 
-static int create_encryption_context_from_policy(struct inode *inode,
-				const struct fscrypt_policy *policy)
+/**
+ * fscrypt_supported_policy - check whether an encryption policy is supported
+ *
+ * Given an encryption policy, check whether all its encryption modes and other
+ * settings are supported by this kernel.  (But we don't currently don't check
+ * for crypto API support here, so attempting to use an algorithm not configured
+ * into the crypto API will still fail later.)
+ *
+ * Return: %true if supported, else %false
+ */
+bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
+			      const struct inode *inode)
 {
-	struct fscrypt_context ctx;
+	switch (policy_u->version) {
+	case FSCRYPT_POLICY_V1: {
+		const struct fscrypt_policy_v1 *policy = &policy_u->v1;
+
+		if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+					     policy->filenames_encryption_mode)) {
+			fscrypt_warn(inode,
+				     "Unsupported encryption modes (contents %d, filenames %d)",
+				     policy->contents_encryption_mode,
+				     policy->filenames_encryption_mode);
+			return false;
+		}
+
+		if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+			fscrypt_warn(inode,
+				     "Unsupported encryption flags (0x%02x)",
+				     policy->flags);
+			return false;
+		}
+
+		return true;
+	}
+	case FSCRYPT_POLICY_V2: {
+		const struct fscrypt_policy_v2 *policy = &policy_u->v2;
+
+		if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
+					     policy->filenames_encryption_mode)) {
+			fscrypt_warn(inode,
+				     "Unsupported encryption modes (contents %d, filenames %d)",
+				     policy->contents_encryption_mode,
+				     policy->filenames_encryption_mode);
+			return false;
+		}
+
+		if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+			fscrypt_warn(inode,
+				     "Unsupported encryption flags (0x%02x)",
+				     policy->flags);
+			return false;
+		}
+
+		if (memchr_inv(policy->__reserved, 0,
+			       sizeof(policy->__reserved))) {
+			fscrypt_warn(inode,
+				     "Reserved bits set in encryption policy");
+			return false;
+		}
+
+		return true;
+	}
+	}
+	return false;
+}
+
+/**
+ * fscrypt_new_context_from_policy - create a new fscrypt_context from a policy
+ *
+ * Create an fscrypt_context for an inode that is being assigned the given
+ * encryption policy.  A new nonce is randomly generated.
+ *
+ * Return: the size of the new context in bytes.
+ */
+static int fscrypt_new_context_from_policy(union fscrypt_context *ctx_u,
+					   const union fscrypt_policy *policy_u)
+{
+	memset(ctx_u, 0, sizeof(*ctx_u));
+
+	switch (policy_u->version) {
+	case FSCRYPT_POLICY_V1: {
+		const struct fscrypt_policy_v1 *policy = &policy_u->v1;
+		struct fscrypt_context_v1 *ctx = &ctx_u->v1;
+
+		ctx->version = FSCRYPT_CONTEXT_V1;
+		ctx->contents_encryption_mode =
+			policy->contents_encryption_mode;
+		ctx->filenames_encryption_mode =
+			policy->filenames_encryption_mode;
+		ctx->flags = policy->flags;
+		memcpy(ctx->master_key_descriptor,
+		       policy->master_key_descriptor,
+		       sizeof(ctx->master_key_descriptor));
+		get_random_bytes(ctx->nonce, sizeof(ctx->nonce));
+		return sizeof(*ctx);
+	}
+	case FSCRYPT_POLICY_V2: {
+		const struct fscrypt_policy_v2 *policy = &policy_u->v2;
+		struct fscrypt_context_v2 *ctx = &ctx_u->v2;
+
+		ctx->version = FSCRYPT_CONTEXT_V2;
+		ctx->contents_encryption_mode =
+			policy->contents_encryption_mode;
+		ctx->filenames_encryption_mode =
+			policy->filenames_encryption_mode;
+		ctx->flags = policy->flags;
+		memcpy(ctx->master_key_identifier,
+		       policy->master_key_identifier,
+		       sizeof(ctx->master_key_identifier));
+		get_random_bytes(ctx->nonce, sizeof(ctx->nonce));
+		return sizeof(*ctx);
+	}
+	}
+	BUG();
+}
 
-	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-	memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
-					FSCRYPT_KEY_DESCRIPTOR_SIZE);
+/**
+ * fscrypt_policy_from_context - convert an fscrypt_context to an fscrypt_policy
+ *
+ * Given an fscrypt_context, build the corresponding fscrypt_policy.
+ *
+ * Return: 0 on success, or -EINVAL if the fscrypt_context has an unrecognized
+ * version number or size.
+ *
+ * This does *not* validate the settings within the policy itself, e.g. the
+ * modes, flags, and reserved bits.  Use fscrypt_supported_policy() for that.
+ */
+int fscrypt_policy_from_context(union fscrypt_policy *policy_u,
+				const union fscrypt_context *ctx_u,
+				int ctx_size)
+{
+	memset(policy_u, 0, sizeof(*policy_u));
 
-	if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode,
-				     policy->filenames_encryption_mode))
+	if (ctx_size <= 0 || ctx_size != fscrypt_context_size(ctx_u))
 		return -EINVAL;
 
-	if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID)
+	switch (ctx_u->version) {
+	case FSCRYPT_CONTEXT_V1: {
+		const struct fscrypt_context_v1 *ctx = &ctx_u->v1;
+		struct fscrypt_policy_v1 *policy = &policy_u->v1;
+
+		policy->version = FSCRYPT_POLICY_V1;
+		policy->contents_encryption_mode =
+			ctx->contents_encryption_mode;
+		policy->filenames_encryption_mode =
+			ctx->filenames_encryption_mode;
+		policy->flags = ctx->flags;
+		memcpy(policy->master_key_descriptor,
+		       ctx->master_key_descriptor,
+		       sizeof(policy->master_key_descriptor));
+		return 0;
+	}
+	case FSCRYPT_CONTEXT_V2: {
+		const struct fscrypt_context_v2 *ctx = &ctx_u->v2;
+		struct fscrypt_policy_v2 *policy = &policy_u->v2;
+
+		policy->version = FSCRYPT_POLICY_V2;
+		policy->contents_encryption_mode =
+			ctx->contents_encryption_mode;
+		policy->filenames_encryption_mode =
+			ctx->filenames_encryption_mode;
+		policy->flags = ctx->flags;
+		memcpy(policy->__reserved, ctx->__reserved,
+		       sizeof(policy->__reserved));
+		memcpy(policy->master_key_identifier,
+		       ctx->master_key_identifier,
+		       sizeof(policy->master_key_identifier));
+		return 0;
+	}
+	}
+	/* unreachable */
+	return -EINVAL;
+}
+
+/* Retrieve an inode's encryption policy */
+static int fscrypt_get_policy(struct inode *inode, union fscrypt_policy *policy)
+{
+	const struct fscrypt_info *ci;
+	union fscrypt_context ctx;
+	int ret;
+
+	ci = READ_ONCE(inode->i_crypt_info);
+	if (ci) {
+		/* key available, use the cached policy */
+		*policy = ci->ci_policy;
+		return 0;
+	}
+
+	if (!IS_ENCRYPTED(inode))
+		return -ENODATA;
+
+	ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	if (ret < 0)
+		return (ret == -ERANGE) ? -EINVAL : ret;
+
+	return fscrypt_policy_from_context(policy, &ctx, ret);
+}
+
+static int set_encryption_policy(struct inode *inode,
+				 const union fscrypt_policy *policy)
+{
+	union fscrypt_context ctx;
+	int ctxsize;
+
+	if (!fscrypt_supported_policy(policy, inode))
 		return -EINVAL;
 
-	ctx.contents_encryption_mode = policy->contents_encryption_mode;
-	ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
-	ctx.flags = policy->flags;
-	BUILD_BUG_ON(sizeof(ctx.nonce) != FS_KEY_DERIVATION_NONCE_SIZE);
-	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+	if (policy->version == FSCRYPT_POLICY_V1) {
+		/*
+		 * The original encryption policy version provided no way of
+		 * verifying that the correct master key was supplied, which was
+		 * insecure in scenarios where multiple users have access to the
+		 * same encrypted files (even just read-only access).  The new
+		 * encryption policy version fixes this and also implies use of
+		 * an improved key derivation function and allows non-root users
+		 * to securely remove keys.  So as long as compatibility with
+		 * old kernels isn't required, it is recommended to use the new
+		 * policy version for all new encrypted directories.
+		 */
+		pr_warn_once("%s (pid %d) is setting deprecated v1 encryption policy; recommend upgrading to v2.\n",
+			     current->comm, current->pid);
+	}
 
-	return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
+	ctxsize = fscrypt_new_context_from_policy(&ctx, policy);
+
+	return inode->i_sb->s_cop->set_context(inode, &ctx, ctxsize, NULL);
 }
 
 int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
 {
-	struct fscrypt_policy policy;
+	union fscrypt_policy policy;
+	union fscrypt_policy existing_policy;
 	struct inode *inode = file_inode(filp);
+	u8 version;
+	int size;
 	int ret;
-	struct fscrypt_context ctx;
 
-	if (copy_from_user(&policy, arg, sizeof(policy)))
+	if (get_user(policy.version, (const u8 __user *)arg))
 		return -EFAULT;
 
+	size = fscrypt_policy_size(&policy);
+	if (size <= 0)
+		return -EINVAL;
+
+	/*
+	 * We should just copy the remaining 'size - 1' bytes here, but a
+	 * bizarre bug in gcc 7 and earlier (fixed by gcc r255731) causes gcc to
+	 * think that size can be 0 here (despite the check above!) *and* that
+	 * it's a compile-time constant.  Thus it would think copy_from_user()
+	 * is passed compile-time constant ULONG_MAX, causing the compile-time
+	 * buffer overflow check to fail, breaking the build. This only occurred
+	 * when building an i386 kernel with -Os and branch profiling enabled.
+	 *
+	 * Work around it by just copying the first byte again...
+	 */
+	version = policy.version;
+	if (copy_from_user(&policy, arg, size))
+		return -EFAULT;
+	policy.version = version;
+
 	if (!inode_owner_or_capable(inode))
 		return -EACCES;
 
-	if (policy.version != 0)
-		return -EINVAL;
-
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
 
 	inode_lock(inode);
 
-	ret = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+	ret = fscrypt_get_policy(inode, &existing_policy);
 	if (ret == -ENODATA) {
 		if (!S_ISDIR(inode->i_mode))
 			ret = -ENOTDIR;
@@ -86,14 +308,10 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
 		else if (!inode->i_sb->s_cop->empty_dir(inode))
 			ret = -ENOTEMPTY;
 		else
-			ret = create_encryption_context_from_policy(inode,
-								    &policy);
-	} else if (ret == sizeof(ctx) &&
-		   is_encryption_context_consistent_with_policy(&ctx,
-								&policy)) {
-		/* The file already uses the same encryption policy. */
-		ret = 0;
-	} else if (ret >= 0 || ret == -ERANGE) {
+			ret = set_encryption_policy(inode, &policy);
+	} else if (ret == -EINVAL ||
+		   (ret == 0 && !fscrypt_policies_equal(&policy,
+							&existing_policy))) {
 		/* The file already uses a different encryption policy. */
 		ret = -EEXIST;
 	}
@@ -105,37 +323,57 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
 }
 EXPORT_SYMBOL(fscrypt_ioctl_set_policy);
 
+/* Original ioctl version; can only get the original policy version */
 int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
 {
-	struct inode *inode = file_inode(filp);
-	struct fscrypt_context ctx;
-	struct fscrypt_policy policy;
-	int res;
+	union fscrypt_policy policy;
+	int err;
 
-	if (!IS_ENCRYPTED(inode))
-		return -ENODATA;
+	err = fscrypt_get_policy(file_inode(filp), &policy);
+	if (err)
+		return err;
 
-	res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
-	if (res < 0 && res != -ERANGE)
-		return res;
-	if (res != sizeof(ctx))
-		return -EINVAL;
-	if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+	if (policy.version != FSCRYPT_POLICY_V1)
 		return -EINVAL;
 
-	policy.version = 0;
-	policy.contents_encryption_mode = ctx.contents_encryption_mode;
-	policy.filenames_encryption_mode = ctx.filenames_encryption_mode;
-	policy.flags = ctx.flags;
-	memcpy(policy.master_key_descriptor, ctx.master_key_descriptor,
-				FSCRYPT_KEY_DESCRIPTOR_SIZE);
-
-	if (copy_to_user(arg, &policy, sizeof(policy)))
+	if (copy_to_user(arg, &policy, sizeof(policy.v1)))
 		return -EFAULT;
 	return 0;
 }
 EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
 
+/* Extended ioctl version; can get policies of any version */
+int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *uarg)
+{
+	struct fscrypt_get_policy_ex_arg arg;
+	union fscrypt_policy *policy = (union fscrypt_policy *)&arg.policy;
+	size_t policy_size;
+	int err;
+
+	/* arg is policy_size, then policy */
+	BUILD_BUG_ON(offsetof(typeof(arg), policy_size) != 0);
+	BUILD_BUG_ON(offsetofend(typeof(arg), policy_size) !=
+		     offsetof(typeof(arg), policy));
+	BUILD_BUG_ON(sizeof(arg.policy) != sizeof(*policy));
+
+	err = fscrypt_get_policy(file_inode(filp), policy);
+	if (err)
+		return err;
+	policy_size = fscrypt_policy_size(policy);
+
+	if (copy_from_user(&arg, uarg, sizeof(arg.policy_size)))
+		return -EFAULT;
+
+	if (policy_size > arg.policy_size)
+		return -EOVERFLOW;
+	arg.policy_size = policy_size;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg.policy_size) + policy_size))
+		return -EFAULT;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_policy_ex);
+
 /**
  * fscrypt_has_permitted_context() - is a file's encryption policy permitted
  *				     within its directory?
@@ -157,10 +395,8 @@ EXPORT_SYMBOL(fscrypt_ioctl_get_policy);
  */
 int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 {
-	const struct fscrypt_operations *cops = parent->i_sb->s_cop;
-	const struct fscrypt_info *parent_ci, *child_ci;
-	struct fscrypt_context parent_ctx, child_ctx;
-	int res;
+	union fscrypt_policy parent_policy, child_policy;
+	int err;
 
 	/* No restrictions on file types which are never encrypted */
 	if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
@@ -190,41 +426,22 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
 	 * In any case, if an unexpected error occurs, fall back to "forbidden".
 	 */
 
-	res = fscrypt_get_encryption_info(parent);
-	if (res)
+	err = fscrypt_get_encryption_info(parent);
+	if (err)
 		return 0;
-	res = fscrypt_get_encryption_info(child);
-	if (res)
+	err = fscrypt_get_encryption_info(child);
+	if (err)
 		return 0;
-	parent_ci = READ_ONCE(parent->i_crypt_info);
-	child_ci = READ_ONCE(child->i_crypt_info);
-
-	if (parent_ci && child_ci) {
-		return memcmp(parent_ci->ci_master_key_descriptor,
-			      child_ci->ci_master_key_descriptor,
-			      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
-			(parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
-			(parent_ci->ci_filename_mode ==
-			 child_ci->ci_filename_mode) &&
-			(parent_ci->ci_flags == child_ci->ci_flags);
-	}
 
-	res = cops->get_context(parent, &parent_ctx, sizeof(parent_ctx));
-	if (res != sizeof(parent_ctx))
+	err = fscrypt_get_policy(parent, &parent_policy);
+	if (err)
 		return 0;
 
-	res = cops->get_context(child, &child_ctx, sizeof(child_ctx));
-	if (res != sizeof(child_ctx))
+	err = fscrypt_get_policy(child, &child_policy);
+	if (err)
 		return 0;
 
-	return memcmp(parent_ctx.master_key_descriptor,
-		      child_ctx.master_key_descriptor,
-		      FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 &&
-		(parent_ctx.contents_encryption_mode ==
-		 child_ctx.contents_encryption_mode) &&
-		(parent_ctx.filenames_encryption_mode ==
-		 child_ctx.filenames_encryption_mode) &&
-		(parent_ctx.flags == child_ctx.flags);
+	return fscrypt_policies_equal(&parent_policy, &child_policy);
 }
 EXPORT_SYMBOL(fscrypt_has_permitted_context);
 
@@ -240,7 +457,8 @@ EXPORT_SYMBOL(fscrypt_has_permitted_context);
 int fscrypt_inherit_context(struct inode *parent, struct inode *child,
 						void *fs_data, bool preload)
 {
-	struct fscrypt_context ctx;
+	union fscrypt_context ctx;
+	int ctxsize;
 	struct fscrypt_info *ci;
 	int res;
 
@@ -252,16 +470,10 @@ int fscrypt_inherit_context(struct inode *parent, struct inode *child,
 	if (ci == NULL)
 		return -ENOKEY;
 
-	ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-	ctx.contents_encryption_mode = ci->ci_data_mode;
-	ctx.filenames_encryption_mode = ci->ci_filename_mode;
-	ctx.flags = ci->ci_flags;
-	memcpy(ctx.master_key_descriptor, ci->ci_master_key_descriptor,
-	       FSCRYPT_KEY_DESCRIPTOR_SIZE);
-	get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+	ctxsize = fscrypt_new_context_from_policy(&ctx, &ci->ci_policy);
+
 	BUILD_BUG_ON(sizeof(ctx) != FSCRYPT_SET_CONTEXT_MAX_SIZE);
-	res = parent->i_sb->s_cop->set_context(child, &ctx,
-						sizeof(ctx), fs_data);
+	res = parent->i_sb->s_cop->set_context(child, &ctx, ctxsize, fs_data);
 	if (res)
 		return res;
 	return preload ? fscrypt_get_encryption_info(child): 0;
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 6628d09585bd..8b8ff0484042 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -43,7 +43,7 @@ struct fscrypt_name {
 #define fname_len(p)		((p)->disk_name.len)
 
 /* Maximum value for the third parameter of fscrypt_operations.set_context(). */
-#define FSCRYPT_SET_CONTEXT_MAX_SIZE	28
+#define FSCRYPT_SET_CONTEXT_MAX_SIZE	40
 
 #ifdef CONFIG_FS_ENCRYPTION
 /*
@@ -135,6 +135,7 @@ extern void fscrypt_free_bounce_page(struct page *bounce_page);
 /* policy.c */
 extern int fscrypt_ioctl_set_policy(struct file *, const void __user *);
 extern int fscrypt_ioctl_get_policy(struct file *, void __user *);
+extern int fscrypt_ioctl_get_policy_ex(struct file *, void __user *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
 					void *, bool);
@@ -361,6 +362,12 @@ static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg)
 	return -EOPNOTSUPP;
 }
 
+static inline int fscrypt_ioctl_get_policy_ex(struct file *filp,
+					      void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int fscrypt_has_permitted_context(struct inode *parent,
 						struct inode *child)
 {
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index ed5995b15016..f961ebf83e98 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -10,15 +10,13 @@
 
 #include <linux/types.h>
 
-#define FSCRYPT_KEY_DESCRIPTOR_SIZE	8
-
 /* Encryption policy flags */
 #define FSCRYPT_POLICY_FLAGS_PAD_4		0x00
 #define FSCRYPT_POLICY_FLAGS_PAD_8		0x01
 #define FSCRYPT_POLICY_FLAGS_PAD_16		0x02
 #define FSCRYPT_POLICY_FLAGS_PAD_32		0x03
 #define FSCRYPT_POLICY_FLAGS_PAD_MASK		0x03
-#define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04	/* use master key directly */
+#define FSCRYPT_POLICY_FLAG_DIRECT_KEY		0x04
 #define FSCRYPT_POLICY_FLAGS_VALID		0x07
 
 /* Encryption algorithms */
@@ -27,14 +25,24 @@
 #define FSCRYPT_MODE_AES_128_CBC		5
 #define FSCRYPT_MODE_AES_128_CTS		6
 #define FSCRYPT_MODE_ADIANTUM			9
+#define __FSCRYPT_MODE_MAX			9
 
-struct fscrypt_policy {
+/*
+ * Legacy policy version; ad-hoc KDF and no key verification.
+ * For new encrypted directories, use fscrypt_policy_v2 instead.
+ *
+ * Careful: the .version field for this is actually 0, not 1.
+ */
+#define FSCRYPT_POLICY_V1		0
+#define FSCRYPT_KEY_DESCRIPTOR_SIZE	8
+struct fscrypt_policy_v1 {
 	__u8 version;
 	__u8 contents_encryption_mode;
 	__u8 filenames_encryption_mode;
 	__u8 flags;
 	__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
 };
+#define fscrypt_policy	fscrypt_policy_v1
 
 /*
  * Process-subscribed "logon" key description prefix and payload format.
@@ -50,14 +58,45 @@ struct fscrypt_key {
 };
 
 /*
- * Keys are specified by an arbitrary 8-byte key "descriptor",
- * matching fscrypt_policy::master_key_descriptor.
+ * New policy version with HKDF and key verification (recommended).
+ */
+#define FSCRYPT_POLICY_V2		2
+#define FSCRYPT_KEY_IDENTIFIER_SIZE	16
+struct fscrypt_policy_v2 {
+	__u8 version;
+	__u8 contents_encryption_mode;
+	__u8 filenames_encryption_mode;
+	__u8 flags;
+	__u8 __reserved[4];
+	__u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+};
+
+/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */
+struct fscrypt_get_policy_ex_arg {
+	__u64 policy_size; /* input/output */
+	union {
+		__u8 version;
+		struct fscrypt_policy_v1 v1;
+		struct fscrypt_policy_v2 v2;
+	} policy; /* output */
+};
+
+/*
+ * v1 policy keys are specified by an arbitrary 8-byte key "descriptor",
+ * matching fscrypt_policy_v1::master_key_descriptor.
  */
 #define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR	1
 
 /*
- * Specifies a key.  This doesn't contain the actual key itself; this is just
- * the "name" of the key.
+ * v2 policy keys are specified by a 16-byte key "identifier" which the kernel
+ * calculates as a cryptographic hash of the key itself,
+ * matching fscrypt_policy_v2::master_key_identifier.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER	2
+
+/*
+ * Specifies a key, either for v1 or v2 policies.  This doesn't contain the
+ * actual key itself; this is just the "name" of the key.
  */
 struct fscrypt_key_specifier {
 	__u32 type;	/* one of FSCRYPT_KEY_SPEC_TYPE_* */
@@ -65,6 +104,7 @@ struct fscrypt_key_specifier {
 	union {
 		__u8 __reserved[32]; /* reserve some extra space */
 		__u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+		__u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
 	} u;
 };
 
@@ -101,6 +141,7 @@ struct fscrypt_get_key_status_arg {
 #define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
 #define FS_IOC_GET_ENCRYPTION_PWSALT		_IOW('f', 20, __u8[16])
 #define FS_IOC_GET_ENCRYPTION_POLICY		_IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY_EX		_IOWR('f', 22, __u8[9]) /* size + version */
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
 #define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
 #define FS_IOC_GET_ENCRYPTION_KEY_STATUS	_IOWR('f', 26, struct fscrypt_get_key_status_arg)
-- 
cgit v1.2.3


From 23c688b54016eed15d39f4387ca9da241e165922 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:47 -0700
Subject: fscrypt: allow unprivileged users to add/remove keys for v2 policies

Allow the FS_IOC_ADD_ENCRYPTION_KEY and FS_IOC_REMOVE_ENCRYPTION_KEY
ioctls to be used by non-root users to add and remove encryption keys
from the filesystem-level crypto keyrings, subject to limitations.

Motivation: while privileged fscrypt key management is sufficient for
some users (e.g. Android and Chromium OS, where a privileged process
manages all keys), the old API by design also allows non-root users to
set up and use encrypted directories, and we don't want to regress on
that.  Especially, we don't want to force users to continue using the
old API, running into the visibility mismatch between files and keyrings
and being unable to "lock" encrypted directories.

Intuitively, the ioctls have to be privileged since they manipulate
filesystem-level state.  However, it's actually safe to make them
unprivileged if we very carefully enforce some specific limitations.

First, each key must be identified by a cryptographic hash so that a
user can't add the wrong key for another user's files.  For v2
encryption policies, we use the key_identifier for this.  v1 policies
don't have this, so managing keys for them remains privileged.

Second, each key a user adds is charged to their quota for the keyrings
service.  Thus, a user can't exhaust memory by adding a huge number of
keys.  By default each non-root user is allowed up to 200 keys; this can
be changed using the existing sysctl 'kernel.keys.maxkeys'.

Third, if multiple users add the same key, we keep track of those users
of the key (of which there remains a single copy), and won't really
remove the key, i.e. "lock" the encrypted files, until all those users
have removed it.  This prevents denial of service attacks that would be
possible under simpler schemes, such allowing the first user who added a
key to remove it -- since that could be a malicious user who has
compromised the key.  Of course, encryption keys should be kept secret,
but the idea is that using encryption should never be *less* secure than
not using encryption, even if your key was compromised.

We tolerate that a user will be unable to really remove a key, i.e.
unable to "lock" their encrypted files, if another user has added the
same key.  But in a sense, this is actually a good thing because it will
avoid providing a false notion of security where a key appears to have
been removed when actually it's still in memory, available to any
attacker who compromises the operating system kernel.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fscrypt_private.h  |  31 ++++-
 fs/crypto/keyring.c          | 320 ++++++++++++++++++++++++++++++++++++++++---
 fs/crypto/keysetup.c         |  18 +--
 include/uapi/linux/fscrypt.h |   6 +-
 4 files changed, 341 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index c89e37d38e42..d0e238234234 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -335,9 +335,16 @@ struct fscrypt_master_key {
 	 * FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
 	 * FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
 	 *
-	 * Locking: protected by key->sem.
+	 * Locking: protected by key->sem (outer) and mk_secret_sem (inner).
+	 * The reason for two locks is that key->sem also protects modifying
+	 * mk_users, which ranks it above the semaphore for the keyring key
+	 * type, which is in turn above page faults (via keyring_read).  But
+	 * sometimes filesystems call fscrypt_get_encryption_info() from within
+	 * a transaction, which ranks it below page faults.  So we need a
+	 * separate lock which protects mk_secret but not also mk_users.
 	 */
 	struct fscrypt_master_key_secret	mk_secret;
+	struct rw_semaphore			mk_secret_sem;
 
 	/*
 	 * For v1 policy keys: an arbitrary key descriptor which was assigned by
@@ -347,6 +354,22 @@ struct fscrypt_master_key {
 	 */
 	struct fscrypt_key_specifier		mk_spec;
 
+	/*
+	 * Keyring which contains a key of type 'key_type_fscrypt_user' for each
+	 * user who has added this key.  Normally each key will be added by just
+	 * one user, but it's possible that multiple users share a key, and in
+	 * that case we need to keep track of those users so that one user can't
+	 * remove the key before the others want it removed too.
+	 *
+	 * This is NULL for v1 policy keys; those can only be added by root.
+	 *
+	 * Locking: in addition to this keyrings own semaphore, this is
+	 * protected by the master key's key->sem, so we can do atomic
+	 * search+insert.  It can also be searched without taking any locks, but
+	 * in that case the returned key may have already been removed.
+	 */
+	struct key		*mk_users;
+
 	/*
 	 * Length of ->mk_decrypted_inodes, plus one if mk_secret is present.
 	 * Once this goes to 0, the master key is removed from ->s_master_keys.
@@ -374,9 +397,9 @@ is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
 	/*
 	 * The READ_ONCE() is only necessary for fscrypt_drop_inode() and
 	 * fscrypt_key_describe().  These run in atomic context, so they can't
-	 * take key->sem and thus 'secret' can change concurrently which would
-	 * be a data race.  But they only need to know whether the secret *was*
-	 * present at the time of check, so READ_ONCE() suffices.
+	 * take ->mk_secret_sem and thus 'secret' can change concurrently which
+	 * would be a data race.  But they only need to know whether the secret
+	 * *was* present at the time of check, so READ_ONCE() suffices.
 	 */
 	return READ_ONCE(secret->size) != 0;
 }
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index fca3bdf01e7c..c654effb83f5 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -45,6 +45,7 @@ static void free_master_key(struct fscrypt_master_key *mk)
 	for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++)
 		crypto_free_skcipher(mk->mk_mode_keys[i]);
 
+	key_put(mk->mk_users);
 	kzfree(mk);
 }
 
@@ -93,7 +94,39 @@ static struct key_type key_type_fscrypt = {
 	.describe		= fscrypt_key_describe,
 };
 
-/* Search ->s_master_keys */
+static int fscrypt_user_key_instantiate(struct key *key,
+					struct key_preparsed_payload *prep)
+{
+	/*
+	 * We just charge FSCRYPT_MAX_KEY_SIZE bytes to the user's key quota for
+	 * each key, regardless of the exact key size.  The amount of memory
+	 * actually used is greater than the size of the raw key anyway.
+	 */
+	return key_payload_reserve(key, FSCRYPT_MAX_KEY_SIZE);
+}
+
+static void fscrypt_user_key_describe(const struct key *key, struct seq_file *m)
+{
+	seq_puts(m, key->description);
+}
+
+/*
+ * Type of key in ->mk_users.  Each key of this type represents a particular
+ * user who has added a particular master key.
+ *
+ * Note that the name of this key type really should be something like
+ * ".fscrypt-user" instead of simply ".fscrypt".  But the shorter name is chosen
+ * mainly for simplicity of presentation in /proc/keys when read by a non-root
+ * user.  And it is expected to be rare that a key is actually added by multiple
+ * users, since users should keep their encryption keys confidential.
+ */
+static struct key_type key_type_fscrypt_user = {
+	.name			= ".fscrypt",
+	.instantiate		= fscrypt_user_key_instantiate,
+	.describe		= fscrypt_user_key_describe,
+};
+
+/* Search ->s_master_keys or ->mk_users */
 static struct key *search_fscrypt_keyring(struct key *keyring,
 					  struct key_type *type,
 					  const char *description)
@@ -119,6 +152,13 @@ static struct key *search_fscrypt_keyring(struct key *keyring,
 
 #define FSCRYPT_MK_DESCRIPTION_SIZE	(2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
 
+#define FSCRYPT_MK_USERS_DESCRIPTION_SIZE	\
+	(CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \
+	 CONST_STRLEN("-users") + 1)
+
+#define FSCRYPT_MK_USER_DESCRIPTION_SIZE	\
+	(2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1)
+
 static void format_fs_keyring_description(
 			char description[FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE],
 			const struct super_block *sb)
@@ -134,6 +174,23 @@ static void format_mk_description(
 		master_key_spec_len(mk_spec), (u8 *)&mk_spec->u);
 }
 
+static void format_mk_users_keyring_description(
+			char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE],
+			const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
+{
+	sprintf(description, "fscrypt-%*phN-users",
+		FSCRYPT_KEY_IDENTIFIER_SIZE, mk_identifier);
+}
+
+static void format_mk_user_description(
+			char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE],
+			const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE])
+{
+
+	sprintf(description, "%*phN.uid.%u", FSCRYPT_KEY_IDENTIFIER_SIZE,
+		mk_identifier, __kuid_val(current_fsuid()));
+}
+
 /* Create ->s_master_keys if needed.  Synchronized by fscrypt_add_key_mutex. */
 static int allocate_filesystem_keyring(struct super_block *sb)
 {
@@ -181,6 +238,80 @@ struct key *fscrypt_find_master_key(struct super_block *sb,
 	return search_fscrypt_keyring(keyring, &key_type_fscrypt, description);
 }
 
+static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk)
+{
+	char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE];
+	struct key *keyring;
+
+	format_mk_users_keyring_description(description,
+					    mk->mk_spec.u.identifier);
+	keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
+				current_cred(), KEY_POS_SEARCH |
+				  KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW,
+				KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+	if (IS_ERR(keyring))
+		return PTR_ERR(keyring);
+
+	mk->mk_users = keyring;
+	return 0;
+}
+
+/*
+ * Find the current user's "key" in the master key's ->mk_users.
+ * Returns ERR_PTR(-ENOKEY) if not found.
+ */
+static struct key *find_master_key_user(struct fscrypt_master_key *mk)
+{
+	char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
+
+	format_mk_user_description(description, mk->mk_spec.u.identifier);
+	return search_fscrypt_keyring(mk->mk_users, &key_type_fscrypt_user,
+				      description);
+}
+
+/*
+ * Give the current user a "key" in ->mk_users.  This charges the user's quota
+ * and marks the master key as added by the current user, so that it cannot be
+ * removed by another user with the key.  Either the master key's key->sem must
+ * be held for write, or the master key must be still undergoing initialization.
+ */
+static int add_master_key_user(struct fscrypt_master_key *mk)
+{
+	char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE];
+	struct key *mk_user;
+	int err;
+
+	format_mk_user_description(description, mk->mk_spec.u.identifier);
+	mk_user = key_alloc(&key_type_fscrypt_user, description,
+			    current_fsuid(), current_gid(), current_cred(),
+			    KEY_POS_SEARCH | KEY_USR_VIEW, 0, NULL);
+	if (IS_ERR(mk_user))
+		return PTR_ERR(mk_user);
+
+	err = key_instantiate_and_link(mk_user, NULL, 0, mk->mk_users, NULL);
+	key_put(mk_user);
+	return err;
+}
+
+/*
+ * Remove the current user's "key" from ->mk_users.
+ * The master key's key->sem must be held for write.
+ *
+ * Returns 0 if removed, -ENOKEY if not found, or another -errno code.
+ */
+static int remove_master_key_user(struct fscrypt_master_key *mk)
+{
+	struct key *mk_user;
+	int err;
+
+	mk_user = find_master_key_user(mk);
+	if (IS_ERR(mk_user))
+		return PTR_ERR(mk_user);
+	err = key_unlink(mk->mk_users, mk_user);
+	key_put(mk_user);
+	return err;
+}
+
 /*
  * Allocate a new fscrypt_master_key which contains the given secret, set it as
  * the payload of a new 'struct key' of type fscrypt, and link the 'struct key'
@@ -202,11 +333,26 @@ static int add_new_master_key(struct fscrypt_master_key_secret *secret,
 	mk->mk_spec = *mk_spec;
 
 	move_master_key_secret(&mk->mk_secret, secret);
+	init_rwsem(&mk->mk_secret_sem);
 
 	refcount_set(&mk->mk_refcount, 1); /* secret is present */
 	INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
 	spin_lock_init(&mk->mk_decrypted_inodes_lock);
 
+	if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
+		err = allocate_master_key_users_keyring(mk);
+		if (err)
+			goto out_free_mk;
+		err = add_master_key_user(mk);
+		if (err)
+			goto out_free_mk;
+	}
+
+	/*
+	 * Note that we don't charge this key to anyone's quota, since when
+	 * ->mk_users is in use those keys are charged instead, and otherwise
+	 * (when ->mk_users isn't in use) only root can add these keys.
+	 */
 	format_mk_description(description, mk_spec);
 	key = key_alloc(&key_type_fscrypt, description,
 			GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
@@ -233,13 +379,45 @@ out_free_mk:
 static int add_existing_master_key(struct fscrypt_master_key *mk,
 				   struct fscrypt_master_key_secret *secret)
 {
-	if (is_master_key_secret_present(&mk->mk_secret))
-		return 0;
+	struct key *mk_user;
+	bool rekey;
+	int err;
 
-	if (!refcount_inc_not_zero(&mk->mk_refcount))
+	/*
+	 * If the current user is already in ->mk_users, then there's nothing to
+	 * do.  (Not applicable for v1 policy keys, which have NULL ->mk_users.)
+	 */
+	if (mk->mk_users) {
+		mk_user = find_master_key_user(mk);
+		if (mk_user != ERR_PTR(-ENOKEY)) {
+			if (IS_ERR(mk_user))
+				return PTR_ERR(mk_user);
+			key_put(mk_user);
+			return 0;
+		}
+	}
+
+	/* If we'll be re-adding ->mk_secret, try to take the reference. */
+	rekey = !is_master_key_secret_present(&mk->mk_secret);
+	if (rekey && !refcount_inc_not_zero(&mk->mk_refcount))
 		return KEY_DEAD;
 
-	move_master_key_secret(&mk->mk_secret, secret);
+	/* Add the current user to ->mk_users, if applicable. */
+	if (mk->mk_users) {
+		err = add_master_key_user(mk);
+		if (err) {
+			if (rekey && refcount_dec_and_test(&mk->mk_refcount))
+				return KEY_DEAD;
+			return err;
+		}
+	}
+
+	/* Re-add the secret if needed. */
+	if (rekey) {
+		down_write(&mk->mk_secret_sem);
+		move_master_key_secret(&mk->mk_secret, secret);
+		up_write(&mk->mk_secret_sem);
+	}
 	return 0;
 }
 
@@ -266,7 +444,7 @@ retry:
 	} else {
 		/*
 		 * Found the key in ->s_master_keys.  Re-add the secret if
-		 * needed.
+		 * needed, and add the user to ->mk_users if needed.
 		 */
 		down_write(&key->sem);
 		err = add_existing_master_key(key->payload.data[0], secret);
@@ -288,6 +466,23 @@ out_unlock:
  * Add a master encryption key to the filesystem, causing all files which were
  * encrypted with it to appear "unlocked" (decrypted) when accessed.
  *
+ * When adding a key for use by v1 encryption policies, this ioctl is
+ * privileged, and userspace must provide the 'key_descriptor'.
+ *
+ * When adding a key for use by v2+ encryption policies, this ioctl is
+ * unprivileged.  This is needed, in general, to allow non-root users to use
+ * encryption without encountering the visibility problems of process-subscribed
+ * keyrings and the inability to properly remove keys.  This works by having
+ * each key identified by its cryptographically secure hash --- the
+ * 'key_identifier'.  The cryptographic hash ensures that a malicious user
+ * cannot add the wrong key for a given identifier.  Furthermore, each added key
+ * is charged to the appropriate user's quota for the keyrings service, which
+ * prevents a malicious user from adding too many keys.  Finally, we forbid a
+ * user from removing a key while other users have added it too, which prevents
+ * a user who knows another user's key from causing a denial-of-service by
+ * removing it at an inopportune time.  (We tolerate that a user who knows a key
+ * can prevent other users from removing it.)
+ *
  * For more details, see the "FS_IOC_ADD_ENCRYPTION_KEY" section of
  * Documentation/filesystems/fscrypt.rst.
  */
@@ -318,11 +513,18 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
 	if (copy_from_user(secret.raw, uarg->raw, secret.size))
 		goto out_wipe_secret;
 
-	err = -EACCES;
-	if (!capable(CAP_SYS_ADMIN))
-		goto out_wipe_secret;
-
-	if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) {
+	switch (arg.key_spec.type) {
+	case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR:
+		/*
+		 * Only root can add keys that are identified by an arbitrary
+		 * descriptor rather than by a cryptographic hash --- since
+		 * otherwise a malicious user could add the wrong key.
+		 */
+		err = -EACCES;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out_wipe_secret;
+		break;
+	case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER:
 		err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size);
 		if (err)
 			goto out_wipe_secret;
@@ -345,6 +547,11 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg)
 				 arg.key_spec.u.identifier,
 				 FSCRYPT_KEY_IDENTIFIER_SIZE))
 			goto out_wipe_secret;
+		break;
+	default:
+		WARN_ON(1);
+		err = -EINVAL;
+		goto out_wipe_secret;
 	}
 
 	err = add_master_key(sb, &secret, &arg.key_spec);
@@ -492,9 +699,12 @@ static int try_to_lock_encrypted_files(struct super_block *sb,
 /*
  * Try to remove an fscrypt master encryption key.
  *
- * First we wipe the actual master key secret, so that no more inodes can be
- * unlocked with it.  Then we try to evict all cached inodes that had been
- * unlocked with the key.
+ * This removes the current user's claim to the key, then removes the key itself
+ * if no other users have claims.
+ *
+ * To "remove the key itself", first we wipe the actual master key secret, so
+ * that no more inodes can be unlocked with it.  Then we try to evict all cached
+ * inodes that had been unlocked with the key.
  *
  * If all inodes were evicted, then we unlink the fscrypt_master_key from the
  * keyring.  Otherwise it remains in the keyring in the "incompletely removed"
@@ -525,7 +735,12 @@ int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
 	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	/*
+	 * Only root can add and remove keys that are identified by an arbitrary
+	 * descriptor rather than by a cryptographic hash.
+	 */
+	if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR &&
+	    !capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
 	/* Find the key being removed. */
@@ -536,11 +751,34 @@ int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
 
 	down_write(&key->sem);
 
-	/* Wipe the secret. */
+	/* If relevant, remove current user's claim to the key */
+	if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) {
+		err = remove_master_key_user(mk);
+		if (err) {
+			up_write(&key->sem);
+			goto out_put_key;
+		}
+		if (mk->mk_users->keys.nr_leaves_on_tree != 0) {
+			/*
+			 * Other users have still added the key too.  We removed
+			 * the current user's claim to the key, but we still
+			 * can't remove the key itself.
+			 */
+			status_flags |=
+				FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS;
+			err = 0;
+			up_write(&key->sem);
+			goto out_put_key;
+		}
+	}
+
+	/* No user claims remaining.  Go ahead and wipe the secret. */
 	dead = false;
 	if (is_master_key_secret_present(&mk->mk_secret)) {
+		down_write(&mk->mk_secret_sem);
 		wipe_master_key_secret(&mk->mk_secret);
 		dead = refcount_dec_and_test(&mk->mk_refcount);
+		up_write(&mk->mk_secret_sem);
 	}
 	up_write(&key->sem);
 	if (dead) {
@@ -560,11 +798,12 @@ int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
 		}
 	}
 	/*
-	 * We return 0 if we successfully did something: wiped the secret, or
-	 * tried locking the files again.  Users need to check the informational
-	 * status flags if they care whether the key has been fully removed
-	 * including all files locked.
+	 * We return 0 if we successfully did something: removed a claim to the
+	 * key, wiped the secret, or tried locking the files again.  Users need
+	 * to check the informational status flags if they care whether the key
+	 * has been fully removed including all files locked.
 	 */
+out_put_key:
 	key_put(key);
 	if (err == 0)
 		err = put_user(status_flags, &uarg->removal_status_flags);
@@ -583,6 +822,15 @@ EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key);
  * regular file in it (which can confuse the "incompletely removed" state with
  * absent or present).
  *
+ * In addition, for v2 policy keys we allow applications to determine, via
+ * ->status_flags and ->user_count, whether the key has been added by the
+ * current user, by other users, or by both.  Most applications should not need
+ * this, since ordinarily only one user should know a given key.  However, if a
+ * secret key is shared by multiple users, applications may wish to add an
+ * already-present key to prevent other users from removing it.  This ioctl can
+ * be used to check whether that really is the case before the work is done to
+ * add the key --- which might e.g. require prompting the user for a passphrase.
+ *
  * For more details, see the "FS_IOC_GET_ENCRYPTION_KEY_STATUS" section of
  * Documentation/filesystems/fscrypt.rst.
  */
@@ -603,6 +851,8 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
 	if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved)))
 		return -EINVAL;
 
+	arg.status_flags = 0;
+	arg.user_count = 0;
 	memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved));
 
 	key = fscrypt_find_master_key(sb, &arg.key_spec);
@@ -623,6 +873,20 @@ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg)
 	}
 
 	arg.status = FSCRYPT_KEY_STATUS_PRESENT;
+	if (mk->mk_users) {
+		struct key *mk_user;
+
+		arg.user_count = mk->mk_users->keys.nr_leaves_on_tree;
+		mk_user = find_master_key_user(mk);
+		if (!IS_ERR(mk_user)) {
+			arg.status_flags |=
+				FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF;
+			key_put(mk_user);
+		} else if (mk_user != ERR_PTR(-ENOKEY)) {
+			err = PTR_ERR(mk_user);
+			goto out_release_key;
+		}
+	}
 	err = 0;
 out_release_key:
 	up_read(&key->sem);
@@ -636,5 +900,19 @@ EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_key_status);
 
 int __init fscrypt_init_keyring(void)
 {
-	return register_key_type(&key_type_fscrypt);
+	int err;
+
+	err = register_key_type(&key_type_fscrypt);
+	if (err)
+		return err;
+
+	err = register_key_type(&key_type_fscrypt_user);
+	if (err)
+		goto err_unregister_fscrypt;
+
+	return 0;
+
+err_unregister_fscrypt:
+	unregister_key_type(&key_type_fscrypt);
+	return err;
 }
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index f423d48264db..d71c2d6dd162 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -286,10 +286,10 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
  *
  * If the master key is found in the filesystem-level keyring, then the
  * corresponding 'struct key' is returned in *master_key_ret with
- * ->sem read-locked.  This is needed to ensure that only one task links the
- * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create
- * an fscrypt_info for the same inode), and to synchronize the master key being
- * removed with a new inode starting to use it.
+ * ->mk_secret_sem read-locked.  This is needed to ensure that only one task
+ * links the fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race
+ * to create an fscrypt_info for the same inode), and to synchronize the master
+ * key being removed with a new inode starting to use it.
  */
 static int setup_file_encryption_key(struct fscrypt_info *ci,
 				     struct key **master_key_ret)
@@ -333,7 +333,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
 	}
 
 	mk = key->payload.data[0];
-	down_read(&key->sem);
+	down_read(&mk->mk_secret_sem);
 
 	/* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
 	if (!is_master_key_secret_present(&mk->mk_secret)) {
@@ -376,7 +376,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
 	return 0;
 
 out_release_key:
-	up_read(&key->sem);
+	up_read(&mk->mk_secret_sem);
 	key_put(key);
 	return err;
 }
@@ -514,7 +514,9 @@ int fscrypt_get_encryption_info(struct inode *inode)
 	res = 0;
 out:
 	if (master_key) {
-		up_read(&master_key->sem);
+		struct fscrypt_master_key *mk = master_key->payload.data[0];
+
+		up_read(&mk->mk_secret_sem);
 		key_put(master_key);
 	}
 	if (res == -ENOKEY)
@@ -577,7 +579,7 @@ int fscrypt_drop_inode(struct inode *inode)
 	mk = ci->ci_master_key->payload.data[0];
 
 	/*
-	 * Note: since we aren't holding key->sem, the result here can
+	 * Note: since we aren't holding ->mk_secret_sem, the result here can
 	 * immediately become outdated.  But there's no correctness problem with
 	 * unnecessarily evicting.  Nor is there a correctness problem with not
 	 * evicting while iput() is racing with the key being removed, since
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index f961ebf83e98..b9fb775e3db8 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -120,6 +120,7 @@ struct fscrypt_add_key_arg {
 struct fscrypt_remove_key_arg {
 	struct fscrypt_key_specifier key_spec;
 #define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY	0x00000001
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS	0x00000002
 	__u32 removal_status_flags;	/* output */
 	__u32 __reserved[5];
 };
@@ -135,7 +136,10 @@ struct fscrypt_get_key_status_arg {
 #define FSCRYPT_KEY_STATUS_PRESENT		2
 #define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED	3
 	__u32 status;
-	__u32 __out_reserved[15];
+#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF   0x00000001
+	__u32 status_flags;
+	__u32 user_count;
+	__u32 __out_reserved[13];
 };
 
 #define FS_IOC_SET_ENCRYPTION_POLICY		_IOR('f', 19, struct fscrypt_policy)
-- 
cgit v1.2.3


From 78a1b96bcf7a0721c7852bb1475218c3cbef884a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 4 Aug 2019 19:35:47 -0700
Subject: fscrypt: add FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS ioctl

Add a root-only variant of the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl which
removes all users' claims of the key, not just the current user's claim.
I.e., it always removes the key itself, no matter how many users have
added it.

This is useful for forcing a directory to be locked, without having to
figure out which user ID(s) the key was added under.  This is planned to
be used by a command like 'sudo fscrypt lock DIR --all-users' in the
fscrypt userspace tool (http://github.com/google/fscrypt).

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/keyring.c          | 29 ++++++++++++++++++++++++-----
 include/linux/fscrypt.h      |  8 ++++++++
 include/uapi/linux/fscrypt.h |  1 +
 3 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index c654effb83f5..8c600ead0e2e 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -11,6 +11,7 @@
  *
  * - FS_IOC_ADD_ENCRYPTION_KEY
  * - FS_IOC_REMOVE_ENCRYPTION_KEY
+ * - FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS
  * - FS_IOC_GET_ENCRYPTION_KEY_STATUS
  *
  * See the "User API" section of Documentation/filesystems/fscrypt.rst for more
@@ -699,8 +700,10 @@ static int try_to_lock_encrypted_files(struct super_block *sb,
 /*
  * Try to remove an fscrypt master encryption key.
  *
- * This removes the current user's claim to the key, then removes the key itself
- * if no other users have claims.
+ * FS_IOC_REMOVE_ENCRYPTION_KEY (all_users=false) removes the current user's
+ * claim to the key, then removes the key itself if no other users have claims.
+ * FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS (all_users=true) always removes the
+ * key itself.
  *
  * To "remove the key itself", first we wipe the actual master key secret, so
  * that no more inodes can be unlocked with it.  Then we try to evict all cached
@@ -715,7 +718,7 @@ static int try_to_lock_encrypted_files(struct super_block *sb,
  * For more details, see the "Removing keys" section of
  * Documentation/filesystems/fscrypt.rst.
  */
-int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
+static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
 {
 	struct super_block *sb = file_inode(filp)->i_sb;
 	struct fscrypt_remove_key_arg __user *uarg = _uarg;
@@ -751,9 +754,12 @@ int fscrypt_ioctl_remove_key(struct file *filp, void __user *_uarg)
 
 	down_write(&key->sem);
 
-	/* If relevant, remove current user's claim to the key */
+	/* If relevant, remove current user's (or all users) claim to the key */
 	if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) {
-		err = remove_master_key_user(mk);
+		if (all_users)
+			err = keyring_clear(mk->mk_users);
+		else
+			err = remove_master_key_user(mk);
 		if (err) {
 			up_write(&key->sem);
 			goto out_put_key;
@@ -809,8 +815,21 @@ out_put_key:
 		err = put_user(status_flags, &uarg->removal_status_flags);
 	return err;
 }
+
+int fscrypt_ioctl_remove_key(struct file *filp, void __user *uarg)
+{
+	return do_remove_key(filp, uarg, false);
+}
 EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key);
 
+int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *uarg)
+{
+	if (!capable(CAP_SYS_ADMIN))
+		return -EACCES;
+	return do_remove_key(filp, uarg, true);
+}
+EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key_all_users);
+
 /*
  * Retrieve the status of an fscrypt master encryption key.
  *
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 8b8ff0484042..f622f7460ed8 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -143,6 +143,8 @@ extern int fscrypt_inherit_context(struct inode *, struct inode *,
 extern void fscrypt_sb_free(struct super_block *sb);
 extern int fscrypt_ioctl_add_key(struct file *filp, void __user *arg);
 extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg);
+extern int fscrypt_ioctl_remove_key_all_users(struct file *filp,
+					      void __user *arg);
 extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
 
 /* keysetup.c */
@@ -396,6 +398,12 @@ static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg)
 	return -EOPNOTSUPP;
 }
 
+static inline int fscrypt_ioctl_remove_key_all_users(struct file *filp,
+						     void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int fscrypt_ioctl_get_key_status(struct file *filp,
 					       void __user *arg)
 {
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index b9fb775e3db8..39ccfe9311c3 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -148,6 +148,7 @@ struct fscrypt_get_key_status_arg {
 #define FS_IOC_GET_ENCRYPTION_POLICY_EX		_IOWR('f', 22, __u8[9]) /* size + version */
 #define FS_IOC_ADD_ENCRYPTION_KEY		_IOWR('f', 23, struct fscrypt_add_key_arg)
 #define FS_IOC_REMOVE_ENCRYPTION_KEY		_IOWR('f', 24, struct fscrypt_remove_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS	_IOWR('f', 25, struct fscrypt_remove_key_arg)
 #define FS_IOC_GET_ENCRYPTION_KEY_STATUS	_IOWR('f', 26, struct fscrypt_get_key_status_arg)
 
 /**********************************************************************/
-- 
cgit v1.2.3


From 075c2b6bf654bdba977795e95bba130113156a35 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 21 Jul 2019 23:25:02 +0900
Subject: scsi: use __u{8,16,32,64} instead of uint{8,16,32,64}_t in uapi
 headers

When CONFIG_UAPI_HEADER_TEST=y, exported headers are compile-tested to make
sure they can be included from user-space.

Currently, scsi_bsg_fc.h, scsi_netlink.h, and scsi_netlink_fc.h are
excluded from the test coverage. To make them join the compile-test, we
need to fix the build errors attached below.

For a case like this, we decided to use __u{8,16,32,64} variable types in
this discussion:

  https://lkml.org/lkml/2019/6/5/18

Build log:

  CC      usr/include/scsi/scsi_netlink_fc.h.s
  CC      usr/include/scsi/scsi_netlink.h.s
  CC      usr/include/scsi/scsi_bsg_fc.h.s
In file included from ./usr/include/scsi/scsi_netlink_fc.h:10:0,
                 from <command-line>:32:
./usr/include/scsi/scsi_netlink.h:29:2: error: unknown type name  uint8_t
  uint8_t version;
  ^~~~~~~
./usr/include/scsi/scsi_netlink.h:30:2: error: unknown type name  uint8_t
  uint8_t transport;
  ^~~~~~~
./usr/include/scsi/scsi_netlink.h:31:2: error: unknown type name  uint16_t
  uint16_t magic;
  ^~~~~~~~
./usr/include/scsi/scsi_netlink.h:32:2: error: unknown type name  uint16_t
  uint16_t msgtype;
  ^~~~~~~~
  CC      usr/include/rdma/vmw_pvrdma-abi.h.s
./usr/include/scsi/scsi_netlink.h:33:2: error: unknown type name  uint16_t
  uint16_t msglen;
  ^~~~~~~~
./usr/include/scsi/scsi_netlink.h:34:33: error:  uint64_t  undeclared here (not in a function); did you mean  __uint128_t ?
 } __attribute__((aligned(sizeof(uint64_t))));
                                 ^~~~~~~~
                                 __uint128_t
./usr/include/scsi/scsi_netlink.h:78:2: error: expected specifier-qualifier-list before  uint64_t
  uint64_t vendor_id;
  ^~~~~~~~
In file included from <command-line>:32:0:
./usr/include/scsi/scsi_netlink_fc.h:46:2: error: expected specifier-qualifier-list before  uint64_t
  uint64_t seconds;
  ^~~~~~~~
make[2]: *** [scripts/Makefile.build;302: usr/include/scsi/scsi_netlink_fc.h.s] Error 1
make[2]: *** Waiting for unfinished jobs....
In file included from <command-line>:32:0:
./usr/include/scsi/scsi_netlink.h:29:2: error: unknown type name  uint8_t
  uint8_t version;
  ^~~~~~~
./usr/include/scsi/scsi_netlink.h:30:2: error: unknown type name  uint8_t
  uint8_t transport;
  ^~~~~~~
./usr/include/scsi/scsi_netlink.h:31:2: error: unknown type name  uint16_t
  uint16_t magic;
  ^~~~~~~~
./usr/include/scsi/scsi_netlink.h:32:2: error: unknown type name  uint16_t
  uint16_t msgtype;
  ^~~~~~~~
./usr/include/scsi/scsi_netlink.h:33:2: error: unknown type name  uint16_t
  uint16_t msglen;
  ^~~~~~~~
./usr/include/scsi/scsi_netlink.h:34:33: error:  uint64_t  undeclared here (not in a function); did you mean  __uint128_t ?
 } __attribute__((aligned(sizeof(uint64_t))));
                                 ^~~~~~~~
                                 __uint128_t
./usr/include/scsi/scsi_netlink.h:78:2: error: expected specifier-qualifier-list before  uint64_t
  uint64_t vendor_id;
  ^~~~~~~~
make[2]: *** [scripts/Makefile.build;302: usr/include/scsi/scsi_netlink.h.s] Error 1
In file included from <command-line>:32:0:
./usr/include/scsi/scsi_bsg_fc.h:69:2: error: unknown type name  uint8_t
  uint8_t  reserved;
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:72:2: error: unknown type name  uint8_t
  uint8_t  port_id[3];
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:90:2: error: unknown type name  uint8_t
  uint8_t  reserved;
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:93:2: error: unknown type name  uint8_t
  uint8_t  port_id[3];
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:114:2: error: unknown type name  uint8_t
  uint8_t  command_code;
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:117:2: error: unknown type name  uint8_t
  uint8_t  port_id[3];
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:154:2: error: unknown type name  uint32_t
  uint32_t status;  /* See FC_CTELS_STATUS_xxx */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:158:3: error: unknown type name  uint8_t
   uint8_t action;  /* fragment_id for CT REJECT */
   ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:159:3: error: unknown type name  uint8_t
   uint8_t reason_code;
   ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:160:3: error: unknown type name  uint8_t
   uint8_t reason_explanation;
   ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:161:3: error: unknown type name  uint8_t
   uint8_t vendor_unique;
   ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:177:2: error: unknown type name  uint8_t
  uint8_t  reserved;
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:180:2: error: unknown type name  uint8_t
  uint8_t  port_id[3];
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:185:2: error: unknown type name  uint32_t
  uint32_t preamble_word0; /* revision & IN_ID */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:186:2: error: unknown type name  uint32_t
  uint32_t preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:187:2: error: unknown type name  uint32_t
  uint32_t preamble_word2; /* Cmd Code, Max Size */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:207:2: error: unknown type name  uint64_t
  uint64_t vendor_id;
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:210:2: error: unknown type name  uint32_t
  uint32_t vendor_cmd[0];
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:217:2: error: unknown type name  uint32_t
  uint32_t vendor_rsp[0];
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:236:2: error: unknown type name  uint8_t
  uint8_t els_code;
  ^~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:254:2: error: unknown type name  uint32_t
  uint32_t preamble_word0; /* revision & IN_ID */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:255:2: error: unknown type name  uint32_t
  uint32_t preamble_word1; /* GS_Type, GS_SubType, Options, Rsvd */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:256:2: error: unknown type name  uint32_t
  uint32_t preamble_word2; /* Cmd Code, Max Size */
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:268:2: error: unknown type name  uint32_t
  uint32_t msgcode;
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:292:2: error: unknown type name  uint32_t
  uint32_t result;
  ^~~~~~~~
./usr/include/scsi/scsi_bsg_fc.h:295:2: error: unknown type name  uint32_t
  uint32_t reply_payload_rcv_len;
  ^~~~~~~~

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/uapi/scsi/scsi_bsg_fc.h     | 54 +++++++++++++++++++------------------
 include/uapi/scsi/scsi_netlink.h    | 20 +++++++-------
 include/uapi/scsi/scsi_netlink_fc.h | 17 ++++++------
 3 files changed, 47 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/uapi/scsi/scsi_bsg_fc.h b/include/uapi/scsi/scsi_bsg_fc.h
index 52f32a60d056..3ae65e93235c 100644
--- a/include/uapi/scsi/scsi_bsg_fc.h
+++ b/include/uapi/scsi/scsi_bsg_fc.h
@@ -8,6 +8,8 @@
 #ifndef SCSI_BSG_FC_H
 #define SCSI_BSG_FC_H
 
+#include <linux/types.h>
+
 /*
  * This file intended to be included by both kernel and user space
  */
@@ -66,10 +68,10 @@
  * with the transport upon completion of the login.
  */
 struct fc_bsg_host_add_rport {
-	uint8_t		reserved;
+	__u8	reserved;
 
 	/* FC Address Identier of the remote port to login to */
-	uint8_t		port_id[3];
+	__u8	port_id[3];
 };
 
 /* Response:
@@ -87,10 +89,10 @@ struct fc_bsg_host_add_rport {
  * remain logged in with the remote port.
  */
 struct fc_bsg_host_del_rport {
-	uint8_t		reserved;
+	__u8	reserved;
 
 	/* FC Address Identier of the remote port to logout of */
-	uint8_t		port_id[3];
+	__u8	port_id[3];
 };
 
 /* Response:
@@ -111,10 +113,10 @@ struct fc_bsg_host_els {
 	 * ELS Command Code being sent (must be the same as byte 0
 	 * of the payload)
 	 */
-	uint8_t 	command_code;
+	__u8	command_code;
 
 	/* FC Address Identier of the remote port to send the ELS to */
-	uint8_t		port_id[3];
+	__u8	port_id[3];
 };
 
 /* Response:
@@ -151,14 +153,14 @@ struct fc_bsg_ctels_reply {
 	 * Note: x_RJT/BSY status will indicae that the rjt_data field
 	 *   is valid and contains the reason/explanation values.
 	 */
-	uint32_t	status;		/* See FC_CTELS_STATUS_xxx */
+	__u32	status;		/* See FC_CTELS_STATUS_xxx */
 
 	/* valid if status is not FC_CTELS_STATUS_OK */
 	struct	{
-		uint8_t	action;		/* fragment_id for CT REJECT */
-		uint8_t	reason_code;
-		uint8_t	reason_explanation;
-		uint8_t	vendor_unique;
+		__u8	action;		/* fragment_id for CT REJECT */
+		__u8	reason_code;
+		__u8	reason_explanation;
+		__u8	vendor_unique;
 	} rjt_data;
 };
 
@@ -174,17 +176,17 @@ struct fc_bsg_ctels_reply {
  * and whether to tear it down after the request.
  */
 struct fc_bsg_host_ct {
-	uint8_t		reserved;
+	__u8	reserved;
 
 	/* FC Address Identier of the remote port to send the ELS to */
-	uint8_t		port_id[3];
+	__u8	port_id[3];
 
 	/*
 	 * We need words 0-2 of the generic preamble for the LLD's
 	 */
-	uint32_t	preamble_word0;	/* revision & IN_ID */
-	uint32_t	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
-	uint32_t	preamble_word2;	/* Cmd Code, Max Size */
+	__u32	preamble_word0;	/* revision & IN_ID */
+	__u32	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
+	__u32	preamble_word2;	/* Cmd Code, Max Size */
 
 };
 /* Response:
@@ -204,17 +206,17 @@ struct fc_bsg_host_vendor {
 	 * Identifies the vendor that the message is formatted for. This
 	 * should be the recipient of the message.
 	 */
-	uint64_t vendor_id;
+	__u64 vendor_id;
 
 	/* start of vendor command area */
-	uint32_t vendor_cmd[0];
+	__u32 vendor_cmd[0];
 };
 
 /* Response:
  */
 struct fc_bsg_host_vendor_reply {
 	/* start of vendor response area */
-	uint32_t vendor_rsp[0];
+	__u32 vendor_rsp[0];
 };
 
 
@@ -233,7 +235,7 @@ struct fc_bsg_rport_els {
 	 * ELS Command Code being sent (must be the same as
 	 * byte 0 of the payload)
 	 */
-	uint8_t els_code;
+	__u8 els_code;
 };
 
 /* Response:
@@ -251,9 +253,9 @@ struct fc_bsg_rport_ct {
 	/*
 	 * We need words 0-2 of the generic preamble for the LLD's
 	 */
-	uint32_t	preamble_word0;	/* revision & IN_ID */
-	uint32_t	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
-	uint32_t	preamble_word2;	/* Cmd Code, Max Size */
+	__u32	preamble_word0;	/* revision & IN_ID */
+	__u32	preamble_word1;	/* GS_Type, GS_SubType, Options, Rsvd */
+	__u32	preamble_word2;	/* Cmd Code, Max Size */
 };
 /* Response:
  *
@@ -265,7 +267,7 @@ struct fc_bsg_rport_ct {
 
 /* request (CDB) structure of the sg_io_v4 */
 struct fc_bsg_request {
-	uint32_t msgcode;
+	__u32 msgcode;
 	union {
 		struct fc_bsg_host_add_rport	h_addrport;
 		struct fc_bsg_host_del_rport	h_delrport;
@@ -289,10 +291,10 @@ struct fc_bsg_reply {
 	 *    msg and status fields. The per-msgcode reply structure
 	 *    will contain valid data.
 	 */
-	uint32_t result;
+	__u32 result;
 
 	/* If there was reply_payload, how much was recevied ? */
-	uint32_t reply_payload_rcv_len;
+	__u32 reply_payload_rcv_len;
 
 	union {
 		struct fc_bsg_host_vendor_reply		vendor_reply;
diff --git a/include/uapi/scsi/scsi_netlink.h b/include/uapi/scsi/scsi_netlink.h
index 5dd382054e45..1b1737c3c9d8 100644
--- a/include/uapi/scsi/scsi_netlink.h
+++ b/include/uapi/scsi/scsi_netlink.h
@@ -26,12 +26,12 @@
 
 /* SCSI_TRANSPORT_MSG event message header */
 struct scsi_nl_hdr {
-	uint8_t version;
-	uint8_t transport;
-	uint16_t magic;
-	uint16_t msgtype;
-	uint16_t msglen;
-} __attribute__((aligned(sizeof(uint64_t))));
+	__u8 version;
+	__u8 transport;
+	__u16 magic;
+	__u16 msgtype;
+	__u16 msglen;
+} __attribute__((aligned(sizeof(__u64))));
 
 /* scsi_nl_hdr->version value */
 #define SCSI_NL_VERSION				1
@@ -75,10 +75,10 @@ struct scsi_nl_hdr {
  */
 struct scsi_nl_host_vendor_msg {
 	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
-	uint64_t vendor_id;
-	uint16_t host_no;
-	uint16_t vmsg_datalen;
-} __attribute__((aligned(sizeof(uint64_t))));
+	__u64 vendor_id;
+	__u16 host_no;
+	__u16 vmsg_datalen;
+} __attribute__((aligned(sizeof(__u64))));
 
 
 /*
diff --git a/include/uapi/scsi/scsi_netlink_fc.h b/include/uapi/scsi/scsi_netlink_fc.h
index a39023579051..7535253f1a96 100644
--- a/include/uapi/scsi/scsi_netlink_fc.h
+++ b/include/uapi/scsi/scsi_netlink_fc.h
@@ -7,6 +7,7 @@
 #ifndef SCSI_NETLINK_FC_H
 #define SCSI_NETLINK_FC_H
 
+#include <linux/types.h>
 #include <scsi/scsi_netlink.h>
 
 /*
@@ -43,14 +44,14 @@
  */
 struct fc_nl_event {
 	struct scsi_nl_hdr snlh;		/* must be 1st element ! */
-	uint64_t seconds;
-	uint64_t vendor_id;
-	uint16_t host_no;
-	uint16_t event_datalen;
-	uint32_t event_num;
-	uint32_t event_code;
-	uint32_t event_data;
-} __attribute__((aligned(sizeof(uint64_t))));
+	__u64 seconds;
+	__u64 vendor_id;
+	__u16 host_no;
+	__u16 event_datalen;
+	__u32 event_num;
+	__u32 event_code;
+	__u32 event_data;
+} __attribute__((aligned(sizeof(__u64))));
 
 
 #endif /* SCSI_NETLINK_FC_H */
-- 
cgit v1.2.3


From 3fda4c617e84c21c8a03b7cc2b3dbfe6c6461592 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:22 -0700
Subject: fs-verity: implement FS_IOC_ENABLE_VERITY ioctl

Add a function for filesystems to call to implement the
FS_IOC_ENABLE_VERITY ioctl.  This ioctl enables fs-verity on a file.

See the "FS_IOC_ENABLE_VERITY" section of
Documentation/filesystems/fsverity.rst for the documentation.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/Makefile       |   3 +-
 fs/verity/enable.c       | 363 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsverity.h |  66 +++++++++
 3 files changed, 431 insertions(+), 1 deletion(-)
 create mode 100644 fs/verity/enable.c

(limited to 'include')

diff --git a/fs/verity/Makefile b/fs/verity/Makefile
index 7fa628cd5eba..04b37475fd28 100644
--- a/fs/verity/Makefile
+++ b/fs/verity/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-$(CONFIG_FS_VERITY) += hash_algs.o \
+obj-$(CONFIG_FS_VERITY) += enable.o \
+			   hash_algs.o \
 			   init.o \
 			   open.o \
 			   verify.o
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
new file mode 100644
index 000000000000..df5dab03f0c2
--- /dev/null
+++ b/fs/verity/enable.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/enable.c: ioctl to enable verity on a file
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <crypto/hash.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+static int build_merkle_tree_level(struct inode *inode, unsigned int level,
+				   u64 num_blocks_to_hash,
+				   const struct merkle_tree_params *params,
+				   u8 *pending_hashes,
+				   struct ahash_request *req)
+{
+	const struct fsverity_operations *vops = inode->i_sb->s_vop;
+	unsigned int pending_size = 0;
+	u64 dst_block_num;
+	u64 i;
+	int err;
+
+	if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
+		return -EINVAL;
+
+	if (level < params->num_levels) {
+		dst_block_num = params->level_start[level];
+	} else {
+		if (WARN_ON(num_blocks_to_hash != 1))
+			return -EINVAL;
+		dst_block_num = 0; /* unused */
+	}
+
+	for (i = 0; i < num_blocks_to_hash; i++) {
+		struct page *src_page;
+
+		if ((pgoff_t)i % 10000 == 0 || i + 1 == num_blocks_to_hash)
+			pr_debug("Hashing block %llu of %llu for level %u\n",
+				 i + 1, num_blocks_to_hash, level);
+
+		if (level == 0) {
+			/* Leaf: hashing a data block */
+			src_page = read_mapping_page(inode->i_mapping, i, NULL);
+			if (IS_ERR(src_page)) {
+				err = PTR_ERR(src_page);
+				fsverity_err(inode,
+					     "Error %d reading data page %llu",
+					     err, i);
+				return err;
+			}
+		} else {
+			/* Non-leaf: hashing hash block from level below */
+			src_page = vops->read_merkle_tree_page(inode,
+					params->level_start[level - 1] + i);
+			if (IS_ERR(src_page)) {
+				err = PTR_ERR(src_page);
+				fsverity_err(inode,
+					     "Error %d reading Merkle tree page %llu",
+					     err, params->level_start[level - 1] + i);
+				return err;
+			}
+		}
+
+		err = fsverity_hash_page(params, inode, req, src_page,
+					 &pending_hashes[pending_size]);
+		put_page(src_page);
+		if (err)
+			return err;
+		pending_size += params->digest_size;
+
+		if (level == params->num_levels) /* Root hash? */
+			return 0;
+
+		if (pending_size + params->digest_size > params->block_size ||
+		    i + 1 == num_blocks_to_hash) {
+			/* Flush the pending hash block */
+			memset(&pending_hashes[pending_size], 0,
+			       params->block_size - pending_size);
+			err = vops->write_merkle_tree_block(inode,
+					pending_hashes,
+					dst_block_num,
+					params->log_blocksize);
+			if (err) {
+				fsverity_err(inode,
+					     "Error %d writing Merkle tree block %llu",
+					     err, dst_block_num);
+				return err;
+			}
+			dst_block_num++;
+			pending_size = 0;
+		}
+
+		if (fatal_signal_pending(current))
+			return -EINTR;
+		cond_resched();
+	}
+	return 0;
+}
+
+/*
+ * Build the Merkle tree for the given inode using the given parameters, and
+ * return the root hash in @root_hash.
+ *
+ * The tree is written to a filesystem-specific location as determined by the
+ * ->write_merkle_tree_block() method.  However, the blocks that comprise the
+ * tree are the same for all filesystems.
+ */
+static int build_merkle_tree(struct inode *inode,
+			     const struct merkle_tree_params *params,
+			     u8 *root_hash)
+{
+	u8 *pending_hashes;
+	struct ahash_request *req;
+	u64 blocks;
+	unsigned int level;
+	int err = -ENOMEM;
+
+	if (inode->i_size == 0) {
+		/* Empty file is a special case; root hash is all 0's */
+		memset(root_hash, 0, params->digest_size);
+		return 0;
+	}
+
+	pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
+	req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL);
+	if (!pending_hashes || !req)
+		goto out;
+
+	/*
+	 * Build each level of the Merkle tree, starting at the leaf level
+	 * (level 0) and ascending to the root node (level 'num_levels - 1').
+	 * Then at the end (level 'num_levels'), calculate the root hash.
+	 */
+	blocks = (inode->i_size + params->block_size - 1) >>
+		 params->log_blocksize;
+	for (level = 0; level <= params->num_levels; level++) {
+		err = build_merkle_tree_level(inode, level, blocks, params,
+					      pending_hashes, req);
+		if (err)
+			goto out;
+		blocks = (blocks + params->hashes_per_block - 1) >>
+			 params->log_arity;
+	}
+	memcpy(root_hash, pending_hashes, params->digest_size);
+	err = 0;
+out:
+	kfree(pending_hashes);
+	ahash_request_free(req);
+	return err;
+}
+
+static int enable_verity(struct file *filp,
+			 const struct fsverity_enable_arg *arg)
+{
+	struct inode *inode = file_inode(filp);
+	const struct fsverity_operations *vops = inode->i_sb->s_vop;
+	struct merkle_tree_params params = { };
+	struct fsverity_descriptor *desc;
+	size_t desc_size = sizeof(*desc);
+	struct fsverity_info *vi;
+	int err;
+
+	/* Start initializing the fsverity_descriptor */
+	desc = kzalloc(desc_size, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+	desc->version = 1;
+	desc->hash_algorithm = arg->hash_algorithm;
+	desc->log_blocksize = ilog2(arg->block_size);
+
+	/* Get the salt if the user provided one */
+	if (arg->salt_size &&
+	    copy_from_user(desc->salt,
+			   (const u8 __user *)(uintptr_t)arg->salt_ptr,
+			   arg->salt_size)) {
+		err = -EFAULT;
+		goto out;
+	}
+	desc->salt_size = arg->salt_size;
+
+	desc->data_size = cpu_to_le64(inode->i_size);
+
+	/* Prepare the Merkle tree parameters */
+	err = fsverity_init_merkle_tree_params(&params, inode,
+					       arg->hash_algorithm,
+					       desc->log_blocksize,
+					       desc->salt, desc->salt_size);
+	if (err)
+		goto out;
+
+	/*
+	 * Start enabling verity on this file, serialized by the inode lock.
+	 * Fail if verity is already enabled or is already being enabled.
+	 */
+	inode_lock(inode);
+	if (IS_VERITY(inode))
+		err = -EEXIST;
+	else
+		err = vops->begin_enable_verity(filp);
+	inode_unlock(inode);
+	if (err)
+		goto out;
+
+	/*
+	 * Build the Merkle tree.  Don't hold the inode lock during this, since
+	 * on huge files this may take a very long time and we don't want to
+	 * force unrelated syscalls like chown() to block forever.  We don't
+	 * need the inode lock here because deny_write_access() already prevents
+	 * the file from being written to or truncated, and we still serialize
+	 * ->begin_enable_verity() and ->end_enable_verity() using the inode
+	 * lock and only allow one process to be here at a time on a given file.
+	 */
+	pr_debug("Building Merkle tree...\n");
+	BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
+	err = build_merkle_tree(inode, &params, desc->root_hash);
+	if (err) {
+		fsverity_err(inode, "Error %d building Merkle tree", err);
+		goto rollback;
+	}
+	pr_debug("Done building Merkle tree.  Root hash is %s:%*phN\n",
+		 params.hash_alg->name, params.digest_size, desc->root_hash);
+
+	/*
+	 * Create the fsverity_info.  Don't bother trying to save work by
+	 * reusing the merkle_tree_params from above.  Instead, just create the
+	 * fsverity_info from the fsverity_descriptor as if it were just loaded
+	 * from disk.  This is simpler, and it serves as an extra check that the
+	 * metadata we're writing is valid before actually enabling verity.
+	 */
+	vi = fsverity_create_info(inode, desc, desc_size);
+	if (IS_ERR(vi)) {
+		err = PTR_ERR(vi);
+		goto rollback;
+	}
+
+	/*
+	 * Tell the filesystem to finish enabling verity on the file.
+	 * Serialized with ->begin_enable_verity() by the inode lock.
+	 */
+	inode_lock(inode);
+	err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
+	inode_unlock(inode);
+	if (err) {
+		fsverity_err(inode, "%ps() failed with err %d",
+			     vops->end_enable_verity, err);
+		fsverity_free_info(vi);
+	} else if (WARN_ON(!IS_VERITY(inode))) {
+		err = -EINVAL;
+		fsverity_free_info(vi);
+	} else {
+		/* Successfully enabled verity */
+
+		/*
+		 * Readers can start using ->i_verity_info immediately, so it
+		 * can't be rolled back once set.  So don't set it until just
+		 * after the filesystem has successfully enabled verity.
+		 */
+		fsverity_set_info(inode, vi);
+	}
+out:
+	kfree(params.hashstate);
+	kfree(desc);
+	return err;
+
+rollback:
+	inode_lock(inode);
+	(void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
+	inode_unlock(inode);
+	goto out;
+}
+
+/**
+ * fsverity_ioctl_enable() - enable verity on a file
+ *
+ * Enable fs-verity on a file.  See the "FS_IOC_ENABLE_VERITY" section of
+ * Documentation/filesystems/fsverity.rst for the documentation.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
+{
+	struct inode *inode = file_inode(filp);
+	struct fsverity_enable_arg arg;
+	int err;
+
+	if (copy_from_user(&arg, uarg, sizeof(arg)))
+		return -EFAULT;
+
+	if (arg.version != 1)
+		return -EINVAL;
+
+	if (arg.__reserved1 ||
+	    memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
+		return -EINVAL;
+
+	if (arg.block_size != PAGE_SIZE)
+		return -EINVAL;
+
+	if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
+		return -EMSGSIZE;
+
+	if (arg.sig_size)
+		return -EINVAL;
+
+	/*
+	 * Require a regular file with write access.  But the actual fd must
+	 * still be readonly so that we can lock out all writers.  This is
+	 * needed to guarantee that no writable fds exist to the file once it
+	 * has verity enabled, and to stabilize the data being hashed.
+	 */
+
+	err = inode_permission(inode, MAY_WRITE);
+	if (err)
+		return err;
+
+	if (IS_APPEND(inode))
+		return -EPERM;
+
+	if (S_ISDIR(inode->i_mode))
+		return -EISDIR;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EINVAL;
+
+	err = mnt_want_write_file(filp);
+	if (err) /* -EROFS */
+		return err;
+
+	err = deny_write_access(filp);
+	if (err) /* -ETXTBSY */
+		goto out_drop_write;
+
+	err = enable_verity(filp, &arg);
+	if (err)
+		goto out_allow_write_access;
+
+	/*
+	 * Some pages of the file may have been evicted from pagecache after
+	 * being used in the Merkle tree construction, then read into pagecache
+	 * again by another process reading from the file concurrently.  Since
+	 * these pages didn't undergo verification against the file measurement
+	 * which fs-verity now claims to be enforcing, we have to wipe the
+	 * pagecache to ensure that all future reads are verified.
+	 */
+	filemap_write_and_wait(inode->i_mapping);
+	invalidate_inode_pages2(inode->i_mapping);
+
+	/*
+	 * allow_write_access() is needed to pair with deny_write_access().
+	 * Regardless, the filesystem won't allow writing to verity files.
+	 */
+out_allow_write_access:
+	allow_write_access(filp);
+out_drop_write:
+	mnt_drop_write_file(filp);
+	return err;
+}
+EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index 95c257cd7ff0..d1a5dbf450c4 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -17,6 +17,44 @@
 /* Verity operations for filesystems */
 struct fsverity_operations {
 
+	/**
+	 * Begin enabling verity on the given file.
+	 *
+	 * @filp: a readonly file descriptor for the file
+	 *
+	 * The filesystem must do any needed filesystem-specific preparations
+	 * for enabling verity, e.g. evicting inline data.  It also must return
+	 * -EBUSY if verity is already being enabled on the given file.
+	 *
+	 * i_rwsem is held for write.
+	 *
+	 * Return: 0 on success, -errno on failure
+	 */
+	int (*begin_enable_verity)(struct file *filp);
+
+	/**
+	 * End enabling verity on the given file.
+	 *
+	 * @filp: a readonly file descriptor for the file
+	 * @desc: the verity descriptor to write, or NULL on failure
+	 * @desc_size: size of verity descriptor, or 0 on failure
+	 * @merkle_tree_size: total bytes the Merkle tree took up
+	 *
+	 * If desc == NULL, then enabling verity failed and the filesystem only
+	 * must do any necessary cleanups.  Else, it must also store the given
+	 * verity descriptor to a fs-specific location associated with the inode
+	 * and do any fs-specific actions needed to mark the inode as a verity
+	 * inode, e.g. setting a bit in the on-disk inode.  The filesystem is
+	 * also responsible for setting the S_VERITY flag in the VFS inode.
+	 *
+	 * i_rwsem is held for write, but it may have been dropped between
+	 * ->begin_enable_verity() and ->end_enable_verity().
+	 *
+	 * Return: 0 on success, -errno on failure
+	 */
+	int (*end_enable_verity)(struct file *filp, const void *desc,
+				 size_t desc_size, u64 merkle_tree_size);
+
 	/**
 	 * Get the verity descriptor of the given inode.
 	 *
@@ -50,6 +88,22 @@ struct fsverity_operations {
 	 */
 	struct page *(*read_merkle_tree_page)(struct inode *inode,
 					      pgoff_t index);
+
+	/**
+	 * Write a Merkle tree block to the given inode.
+	 *
+	 * @inode: the inode for which the Merkle tree is being built
+	 * @buf: block to write
+	 * @index: 0-based index of the block within the Merkle tree
+	 * @log_blocksize: log base 2 of the Merkle tree block size
+	 *
+	 * This is only called between ->begin_enable_verity() and
+	 * ->end_enable_verity().
+	 *
+	 * Return: 0 on success, -errno on failure
+	 */
+	int (*write_merkle_tree_block)(struct inode *inode, const void *buf,
+				       u64 index, int log_blocksize);
 };
 
 #ifdef CONFIG_FS_VERITY
@@ -60,6 +114,10 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 	return READ_ONCE(inode->i_verity_info);
 }
 
+/* enable.c */
+
+extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
+
 /* open.c */
 
 extern int fsverity_file_open(struct inode *inode, struct file *filp);
@@ -79,6 +137,14 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 	return NULL;
 }
 
+/* enable.c */
+
+static inline int fsverity_ioctl_enable(struct file *filp,
+					const void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 /* open.c */
 
 static inline int fsverity_file_open(struct inode *inode, struct file *filp)
-- 
cgit v1.2.3


From 4dd893d832cf4da5409e1fecea9c4f6452a93f2b Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:23 -0700
Subject: fs-verity: implement FS_IOC_MEASURE_VERITY ioctl

Add a function for filesystems to call to implement the
FS_IOC_MEASURE_VERITY ioctl.  This ioctl retrieves the file measurement
that fs-verity calculated for the given file and is enforcing for reads;
i.e., reads that don't match this hash will fail.  This ioctl can be
used for authentication or logging of file measurements in userspace.

See the "FS_IOC_MEASURE_VERITY" section of
Documentation/filesystems/fsverity.rst for the documentation.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/Makefile       |  1 +
 fs/verity/measure.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fsverity.h | 11 ++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 fs/verity/measure.c

(limited to 'include')

diff --git a/fs/verity/Makefile b/fs/verity/Makefile
index 04b37475fd28..6f7675ae0a31 100644
--- a/fs/verity/Makefile
+++ b/fs/verity/Makefile
@@ -3,5 +3,6 @@
 obj-$(CONFIG_FS_VERITY) += enable.o \
 			   hash_algs.o \
 			   init.o \
+			   measure.o \
 			   open.o \
 			   verify.o
diff --git a/fs/verity/measure.c b/fs/verity/measure.c
new file mode 100644
index 000000000000..05049b68c745
--- /dev/null
+++ b/fs/verity/measure.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fs/verity/measure.c: ioctl to get a verity file's measurement
+ *
+ * Copyright 2019 Google LLC
+ */
+
+#include "fsverity_private.h"
+
+#include <linux/uaccess.h>
+
+/**
+ * fsverity_ioctl_measure() - get a verity file's measurement
+ *
+ * Retrieve the file measurement that the kernel is enforcing for reads from a
+ * verity file.  See the "FS_IOC_MEASURE_VERITY" section of
+ * Documentation/filesystems/fsverity.rst for the documentation.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int fsverity_ioctl_measure(struct file *filp, void __user *_uarg)
+{
+	const struct inode *inode = file_inode(filp);
+	struct fsverity_digest __user *uarg = _uarg;
+	const struct fsverity_info *vi;
+	const struct fsverity_hash_alg *hash_alg;
+	struct fsverity_digest arg;
+
+	vi = fsverity_get_info(inode);
+	if (!vi)
+		return -ENODATA; /* not a verity file */
+	hash_alg = vi->tree_params.hash_alg;
+
+	/*
+	 * The user specifies the digest_size their buffer has space for; we can
+	 * return the digest if it fits in the available space.  We write back
+	 * the actual size, which may be shorter than the user-specified size.
+	 */
+
+	if (get_user(arg.digest_size, &uarg->digest_size))
+		return -EFAULT;
+	if (arg.digest_size < hash_alg->digest_size)
+		return -EOVERFLOW;
+
+	memset(&arg, 0, sizeof(arg));
+	arg.digest_algorithm = hash_alg - fsverity_hash_algs;
+	arg.digest_size = hash_alg->digest_size;
+
+	if (copy_to_user(uarg, &arg, sizeof(arg)))
+		return -EFAULT;
+
+	if (copy_to_user(uarg->digest, vi->measurement, hash_alg->digest_size))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsverity_ioctl_measure);
diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h
index d1a5dbf450c4..3b6b8ccebe7d 100644
--- a/include/linux/fsverity.h
+++ b/include/linux/fsverity.h
@@ -118,6 +118,10 @@ static inline struct fsverity_info *fsverity_get_info(const struct inode *inode)
 
 extern int fsverity_ioctl_enable(struct file *filp, const void __user *arg);
 
+/* measure.c */
+
+extern int fsverity_ioctl_measure(struct file *filp, void __user *arg);
+
 /* open.c */
 
 extern int fsverity_file_open(struct inode *inode, struct file *filp);
@@ -145,6 +149,13 @@ static inline int fsverity_ioctl_enable(struct file *filp,
 	return -EOPNOTSUPP;
 }
 
+/* measure.c */
+
+static inline int fsverity_ioctl_measure(struct file *filp, void __user *arg)
+{
+	return -EOPNOTSUPP;
+}
+
 /* open.c */
 
 static inline int fsverity_file_open(struct inode *inode, struct file *filp)
-- 
cgit v1.2.3


From add890c9f9d2d1d79184ded72f23b37b164fc673 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 22 Jul 2019 09:26:23 -0700
Subject: fs-verity: add SHA-512 support

Add SHA-512 support to fs-verity.  This is primarily a demonstration of
the trivial changes needed to support a new hash algorithm in fs-verity;
most users will still use SHA-256, due to the smaller space required to
store the hashes.  But some users may prefer SHA-512.

Reviewed-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/verity/fsverity_private.h  | 2 +-
 fs/verity/hash_algs.c         | 5 +++++
 include/uapi/linux/fsverity.h | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index eaa2b3b93bbf..02a547f0667c 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -29,7 +29,7 @@ struct ahash_request;
  * Largest digest size among all hash algorithms supported by fs-verity.
  * Currently assumed to be <= size of fsverity_descriptor::root_hash.
  */
-#define FS_VERITY_MAX_DIGEST_SIZE	SHA256_DIGEST_SIZE
+#define FS_VERITY_MAX_DIGEST_SIZE	SHA512_DIGEST_SIZE
 
 /* A hash algorithm supported by fs-verity */
 struct fsverity_hash_alg {
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
index 7df1d67742b8..31e6d7d2389a 100644
--- a/fs/verity/hash_algs.c
+++ b/fs/verity/hash_algs.c
@@ -17,6 +17,11 @@ struct fsverity_hash_alg fsverity_hash_algs[] = {
 		.digest_size = SHA256_DIGEST_SIZE,
 		.block_size = SHA256_BLOCK_SIZE,
 	},
+	[FS_VERITY_HASH_ALG_SHA512] = {
+		.name = "sha512",
+		.digest_size = SHA512_DIGEST_SIZE,
+		.block_size = SHA512_BLOCK_SIZE,
+	},
 };
 
 /**
diff --git a/include/uapi/linux/fsverity.h b/include/uapi/linux/fsverity.h
index 57d1d7fc0c34..da0daf6c193b 100644
--- a/include/uapi/linux/fsverity.h
+++ b/include/uapi/linux/fsverity.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 
 #define FS_VERITY_HASH_ALG_SHA256	1
+#define FS_VERITY_HASH_ALG_SHA512	2
 
 struct fsverity_enable_arg {
 	__u32 version;
-- 
cgit v1.2.3


From b884e2de2afc68ce30f7093747378ef972dde253 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 30 Jul 2019 21:29:54 +0800
Subject: lib: logic_pio: Add logic_pio_unregister_range()

Add a function to unregister a logical PIO range.

Logical PIO space can still be leaked when unregistering certain
LOGIC_PIO_CPU_MMIO regions, but this acceptable for now since there are no
callers to unregister LOGIC_PIO_CPU_MMIO regions, and the logical PIO
region allocation scheme would need significant work to improve this.

Cc: stable@vger.kernel.org
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Wei Xu <xuwei5@hisilicon.com>
---
 include/linux/logic_pio.h |  1 +
 lib/logic_pio.c           | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/logic_pio.h b/include/linux/logic_pio.h
index cbd9d8495690..88e1e6304a71 100644
--- a/include/linux/logic_pio.h
+++ b/include/linux/logic_pio.h
@@ -117,6 +117,7 @@ struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode);
 unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
 			resource_size_t hw_addr, resource_size_t size);
 int logic_pio_register_range(struct logic_pio_hwaddr *newrange);
+void logic_pio_unregister_range(struct logic_pio_hwaddr *range);
 resource_size_t logic_pio_to_hwaddr(unsigned long pio);
 unsigned long logic_pio_trans_cpuaddr(resource_size_t hw_addr);
 
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
index d0165c88f705..905027574e5d 100644
--- a/lib/logic_pio.c
+++ b/lib/logic_pio.c
@@ -98,6 +98,20 @@ end_register:
 	return ret;
 }
 
+/**
+ * logic_pio_unregister_range - unregister a logical PIO range for a host
+ * @range: pointer to the IO range which has been already registered.
+ *
+ * Unregister a previously-registered IO range node.
+ */
+void logic_pio_unregister_range(struct logic_pio_hwaddr *range)
+{
+	mutex_lock(&io_range_mutex);
+	list_del_rcu(&range->list);
+	mutex_unlock(&io_range_mutex);
+	synchronize_rcu();
+}
+
 /**
  * find_io_range_by_fwnode - find logical PIO range for given FW node
  * @fwnode: FW node handle associated with logical PIO range
-- 
cgit v1.2.3


From b1635ee6120cbeb3de6ab270432b2a2b839c9c56 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Aug 2019 11:43:56 +0300
Subject: net/mlx5: Add XRQ legacy commands opcodes

Add XRQ legacy commands opcodes, will be used via the DEVX interface.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 4 ++++
 include/linux/mlx5/mlx5_ifc.h                 | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 8cdd7e66f8df..53d09620e215 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -446,6 +446,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_CREATE_UMEM:
 	case MLX5_CMD_OP_DESTROY_UMEM:
 	case MLX5_CMD_OP_ALLOC_MEMIC:
+	case MLX5_CMD_OP_MODIFY_XRQ:
+	case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -637,6 +639,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(DESTROY_UCTX);
 	MLX5_COMMAND_STR_CASE(CREATE_UMEM);
 	MLX5_COMMAND_STR_CASE(DESTROY_UMEM);
+	MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR);
+	MLX5_COMMAND_STR_CASE(MODIFY_XRQ);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1f9d4a8e6227..ab6ae723aae6 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -172,6 +172,8 @@ enum {
 	MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY     = 0x725,
 	MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY       = 0x726,
 	MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS        = 0x727,
+	MLX5_CMD_OP_RELEASE_XRQ_ERROR             = 0x729,
+	MLX5_CMD_OP_MODIFY_XRQ                    = 0x72a,
 	MLX5_CMD_OP_QUERY_ESW_FUNCTIONS           = 0x740,
 	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
 	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
-- 
cgit v1.2.3


From 43dd16efc7f235f153804500a4363769bd2276fc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 1 Aug 2019 14:09:26 +0200
Subject: netfilter: nf_tables: store data in offload context registers

Store immediate data into offload context register. This allows follow
up instructions to take it from the corresponding source register.

This patch is required to support for payload mangling, although other
instructions that take data from source register will benefit from this
too.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h |  1 +
 net/netfilter/nft_immediate.c             | 24 +++++++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663a10e3..4977fbe7ed08 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -9,6 +9,7 @@ struct nft_offload_reg {
 	u32		len;
 	u32		base_offset;
 	u32		offset;
+	struct nft_data data;
 	struct nft_data	mask;
 };
 
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index ca2ae4b95a8d..c7f0ef73d939 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
 	return 0;
 }
 
-static int nft_immediate_offload(struct nft_offload_ctx *ctx,
-				 struct nft_flow_rule *flow,
-				 const struct nft_expr *expr)
+static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx,
+					 struct nft_flow_rule *flow,
+					 const struct nft_immediate_expr *priv)
 {
-	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 	struct flow_action_entry *entry;
 	const struct nft_data *data;
 
-	if (priv->dreg != NFT_REG_VERDICT)
-		return -EOPNOTSUPP;
-
 	entry = &flow->rule->action.entries[ctx->num_actions++];
 
 	data = &priv->data;
@@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx,
 	return 0;
 }
 
+static int nft_immediate_offload(struct nft_offload_ctx *ctx,
+				 struct nft_flow_rule *flow,
+				 const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	if (priv->dreg == NFT_REG_VERDICT)
+		return nft_immediate_offload_verdict(ctx, flow, priv);
+
+	memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data));
+
+	return 0;
+}
+
 static const struct nft_expr_ops nft_imm_ops = {
 	.type		= &nft_imm_type,
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
-- 
cgit v1.2.3


From bd96b4c75675e616eefef6d85d163530eef9c5e5 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:16:58 +0100
Subject: netfilter: inline four headers files into another one.

linux/netfilter/ipset/ip_set.h included four other header files:

  include/linux/netfilter/ipset/ip_set_comment.h
  include/linux/netfilter/ipset/ip_set_counter.h
  include/linux/netfilter/ipset/ip_set_skbinfo.h
  include/linux/netfilter/ipset/ip_set_timeout.h

Of these the first three were not included anywhere else.  The last,
ip_set_timeout.h, was included in a couple of other places, but defined
inline functions which call other inline functions defined in ip_set.h,
so ip_set.h had to be included before it.

Inlined all four into ip_set.h, and updated the other files that
included ip_set_timeout.h.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Acked-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h         | 238 ++++++++++++++++++++++++-
 include/linux/netfilter/ipset/ip_set_comment.h |  73 --------
 include/linux/netfilter/ipset/ip_set_counter.h |  84 ---------
 include/linux/netfilter/ipset/ip_set_skbinfo.h |  42 -----
 include/linux/netfilter/ipset/ip_set_timeout.h |  77 --------
 net/netfilter/ipset/ip_set_hash_gen.h          |   2 +-
 net/netfilter/xt_set.c                         |   1 -
 7 files changed, 235 insertions(+), 282 deletions(-)
 delete mode 100644 include/linux/netfilter/ipset/ip_set_comment.h
 delete mode 100644 include/linux/netfilter/ipset/ip_set_counter.h
 delete mode 100644 include/linux/netfilter/ipset/ip_set_skbinfo.h
 delete mode 100644 include/linux/netfilter/ipset/ip_set_timeout.h

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 12ad9b1853b4..9bc255a8461b 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -452,10 +452,240 @@ bitmap_bytes(u32 a, u32 b)
 	return 4 * ((((b - a + 8) / 8) + 3) / 4);
 }
 
-#include <linux/netfilter/ipset/ip_set_timeout.h>
-#include <linux/netfilter/ipset/ip_set_comment.h>
-#include <linux/netfilter/ipset/ip_set_counter.h>
-#include <linux/netfilter/ipset/ip_set_skbinfo.h>
+/* How often should the gc be run by default */
+#define IPSET_GC_TIME			(3 * 60)
+
+/* Timeout period depending on the timeout value of the given set */
+#define IPSET_GC_PERIOD(timeout) \
+	((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1)
+
+/* Entry is set with no timeout value */
+#define IPSET_ELEM_PERMANENT	0
+
+/* Set is defined with timeout support: timeout value may be 0 */
+#define IPSET_NO_TIMEOUT	UINT_MAX
+
+/* Max timeout value, see msecs_to_jiffies() in jiffies.h */
+#define IPSET_MAX_TIMEOUT	(UINT_MAX >> 1)/MSEC_PER_SEC
+
+#define ip_set_adt_opt_timeout(opt, set)	\
+((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
+
+static inline unsigned int
+ip_set_timeout_uget(struct nlattr *tb)
+{
+	unsigned int timeout = ip_set_get_h32(tb);
+
+	/* Normalize to fit into jiffies */
+	if (timeout > IPSET_MAX_TIMEOUT)
+		timeout = IPSET_MAX_TIMEOUT;
+
+	return timeout;
+}
+
+static inline bool
+ip_set_timeout_expired(const unsigned long *t)
+{
+	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
+}
+
+static inline void
+ip_set_timeout_set(unsigned long *timeout, u32 value)
+{
+	unsigned long t;
+
+	if (!value) {
+		*timeout = IPSET_ELEM_PERMANENT;
+		return;
+	}
+
+	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
+	if (t == IPSET_ELEM_PERMANENT)
+		/* Bingo! :-) */
+		t--;
+	*timeout = t;
+}
+
+static inline u32
+ip_set_timeout_get(const unsigned long *timeout)
+{
+	u32 t;
+
+	if (*timeout == IPSET_ELEM_PERMANENT)
+		return 0;
+
+	t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
+	/* Zero value in userspace means no timeout */
+	return t == 0 ? 1 : t;
+}
+
+static inline char*
+ip_set_comment_uget(struct nlattr *tb)
+{
+	return nla_data(tb);
+}
+
+/* Called from uadd only, protected by the set spinlock.
+ * The kadt functions don't use the comment extensions in any way.
+ */
+static inline void
+ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
+		    const struct ip_set_ext *ext)
+{
+	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
+	size_t len = ext->comment ? strlen(ext->comment) : 0;
+
+	if (unlikely(c)) {
+		set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+		kfree_rcu(c, rcu);
+		rcu_assign_pointer(comment->c, NULL);
+	}
+	if (!len)
+		return;
+	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
+		len = IPSET_MAX_COMMENT_SIZE;
+	c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
+	if (unlikely(!c))
+		return;
+	strlcpy(c->str, ext->comment, len + 1);
+	set->ext_size += sizeof(*c) + strlen(c->str) + 1;
+	rcu_assign_pointer(comment->c, c);
+}
+
+/* Used only when dumping a set, protected by rcu_read_lock() */
+static inline int
+ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
+{
+	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
+
+	if (!c)
+		return 0;
+	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
+}
+
+/* Called from uadd/udel, flush or the garbage collectors protected
+ * by the set spinlock.
+ * Called when the set is destroyed and when there can't be any user
+ * of the set data anymore.
+ */
+static inline void
+ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
+{
+	struct ip_set_comment_rcu *c;
+
+	c = rcu_dereference_protected(comment->c, 1);
+	if (unlikely(!c))
+		return;
+	set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
+	kfree_rcu(c, rcu);
+	rcu_assign_pointer(comment->c, NULL);
+}
+
+static inline void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static inline void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static inline u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static inline u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->packets);
+}
+
+static inline bool
+ip_set_match_counter(u64 counter, u64 match, u8 op)
+{
+	switch (op) {
+	case IPSET_COUNTER_NONE:
+		return true;
+	case IPSET_COUNTER_EQ:
+		return counter == match;
+	case IPSET_COUNTER_NE:
+		return counter != match;
+	case IPSET_COUNTER_LT:
+		return counter < match;
+	case IPSET_COUNTER_GT:
+		return counter > match;
+	}
+	return false;
+}
+
+static inline void
+ip_set_update_counter(struct ip_set_counter *counter,
+		      const struct ip_set_ext *ext, u32 flags)
+{
+	if (ext->packets != ULLONG_MAX &&
+	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
+		ip_set_add_bytes(ext->bytes, counter);
+		ip_set_add_packets(ext->packets, counter);
+	}
+}
+
+static inline bool
+ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
+{
+	return nla_put_net64(skb, IPSET_ATTR_BYTES,
+			     cpu_to_be64(ip_set_get_bytes(counter)),
+			     IPSET_ATTR_PAD) ||
+	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
+			     cpu_to_be64(ip_set_get_packets(counter)),
+			     IPSET_ATTR_PAD);
+}
+
+static inline void
+ip_set_init_counter(struct ip_set_counter *counter,
+		    const struct ip_set_ext *ext)
+{
+	if (ext->bytes != ULLONG_MAX)
+		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
+	if (ext->packets != ULLONG_MAX)
+		atomic64_set(&(counter)->packets, (long long)(ext->packets));
+}
+
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		   const struct ip_set_ext *ext,
+		   struct ip_set_ext *mext, u32 flags)
+{
+	mext->skbinfo = *skbinfo;
+}
+
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
+{
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask),
+			      IPSET_ATTR_PAD)) ||
+	       (skbinfo->skbprio &&
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			      cpu_to_be16(skbinfo->skbqueue)));
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	*skbinfo = ext->skbinfo;
+}
 
 #define IP_SET_INIT_KEXT(skb, opt, set)			\
 	{ .bytes = (skb)->len, .packets = 1,		\
diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
deleted file mode 100644
index 0b894d81bbf2..000000000000
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _IP_SET_COMMENT_H
-#define _IP_SET_COMMENT_H
-
-/* Copyright (C) 2013 Oliver Smith <oliver@8.c.9.b.0.7.4.0.1.0.0.2.ip6.arpa>
- */
-
-#ifdef __KERNEL__
-
-static inline char*
-ip_set_comment_uget(struct nlattr *tb)
-{
-	return nla_data(tb);
-}
-
-/* Called from uadd only, protected by the set spinlock.
- * The kadt functions don't use the comment extensions in any way.
- */
-static inline void
-ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
-		    const struct ip_set_ext *ext)
-{
-	struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
-	size_t len = ext->comment ? strlen(ext->comment) : 0;
-
-	if (unlikely(c)) {
-		set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
-		kfree_rcu(c, rcu);
-		rcu_assign_pointer(comment->c, NULL);
-	}
-	if (!len)
-		return;
-	if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
-		len = IPSET_MAX_COMMENT_SIZE;
-	c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
-	if (unlikely(!c))
-		return;
-	strlcpy(c->str, ext->comment, len + 1);
-	set->ext_size += sizeof(*c) + strlen(c->str) + 1;
-	rcu_assign_pointer(comment->c, c);
-}
-
-/* Used only when dumping a set, protected by rcu_read_lock() */
-static inline int
-ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
-{
-	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
-
-	if (!c)
-		return 0;
-	return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
-}
-
-/* Called from uadd/udel, flush or the garbage collectors protected
- * by the set spinlock.
- * Called when the set is destroyed and when there can't be any user
- * of the set data anymore.
- */
-static inline void
-ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
-{
-	struct ip_set_comment_rcu *c;
-
-	c = rcu_dereference_protected(comment->c, 1);
-	if (unlikely(!c))
-		return;
-	set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
-	kfree_rcu(c, rcu);
-	rcu_assign_pointer(comment->c, NULL);
-}
-
-#endif
-#endif
diff --git a/include/linux/netfilter/ipset/ip_set_counter.h b/include/linux/netfilter/ipset/ip_set_counter.h
deleted file mode 100644
index 3400958c07be..000000000000
--- a/include/linux/netfilter/ipset/ip_set_counter.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _IP_SET_COUNTER_H
-#define _IP_SET_COUNTER_H
-
-/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@netfilter.org> */
-
-#ifdef __KERNEL__
-
-static inline void
-ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
-{
-	atomic64_add((long long)bytes, &(counter)->bytes);
-}
-
-static inline void
-ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
-{
-	atomic64_add((long long)packets, &(counter)->packets);
-}
-
-static inline u64
-ip_set_get_bytes(const struct ip_set_counter *counter)
-{
-	return (u64)atomic64_read(&(counter)->bytes);
-}
-
-static inline u64
-ip_set_get_packets(const struct ip_set_counter *counter)
-{
-	return (u64)atomic64_read(&(counter)->packets);
-}
-
-static inline bool
-ip_set_match_counter(u64 counter, u64 match, u8 op)
-{
-	switch (op) {
-	case IPSET_COUNTER_NONE:
-		return true;
-	case IPSET_COUNTER_EQ:
-		return counter == match;
-	case IPSET_COUNTER_NE:
-		return counter != match;
-	case IPSET_COUNTER_LT:
-		return counter < match;
-	case IPSET_COUNTER_GT:
-		return counter > match;
-	}
-	return false;
-}
-
-static inline void
-ip_set_update_counter(struct ip_set_counter *counter,
-		      const struct ip_set_ext *ext, u32 flags)
-{
-	if (ext->packets != ULLONG_MAX &&
-	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
-		ip_set_add_bytes(ext->bytes, counter);
-		ip_set_add_packets(ext->packets, counter);
-	}
-}
-
-static inline bool
-ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
-{
-	return nla_put_net64(skb, IPSET_ATTR_BYTES,
-			     cpu_to_be64(ip_set_get_bytes(counter)),
-			     IPSET_ATTR_PAD) ||
-	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
-			     cpu_to_be64(ip_set_get_packets(counter)),
-			     IPSET_ATTR_PAD);
-}
-
-static inline void
-ip_set_init_counter(struct ip_set_counter *counter,
-		    const struct ip_set_ext *ext)
-{
-	if (ext->bytes != ULLONG_MAX)
-		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
-	if (ext->packets != ULLONG_MAX)
-		atomic64_set(&(counter)->packets, (long long)(ext->packets));
-}
-
-#endif /* __KERNEL__ */
-#endif /* _IP_SET_COUNTER_H */
diff --git a/include/linux/netfilter/ipset/ip_set_skbinfo.h b/include/linux/netfilter/ipset/ip_set_skbinfo.h
deleted file mode 100644
index 3a2df02dbd55..000000000000
--- a/include/linux/netfilter/ipset/ip_set_skbinfo.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _IP_SET_SKBINFO_H
-#define _IP_SET_SKBINFO_H
-
-/* Copyright (C) 2015 Jozsef Kadlecsik <kadlec@netfilter.org> */
-
-#ifdef __KERNEL__
-
-static inline void
-ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
-		   const struct ip_set_ext *ext,
-		   struct ip_set_ext *mext, u32 flags)
-{
-	mext->skbinfo = *skbinfo;
-}
-
-static inline bool
-ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
-{
-	/* Send nonzero parameters only */
-	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
-		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
-					  skbinfo->skbmarkmask),
-			      IPSET_ATTR_PAD)) ||
-	       (skbinfo->skbprio &&
-		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-			      cpu_to_be32(skbinfo->skbprio))) ||
-	       (skbinfo->skbqueue &&
-		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-			      cpu_to_be16(skbinfo->skbqueue)));
-}
-
-static inline void
-ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
-		    const struct ip_set_ext *ext)
-{
-	*skbinfo = ext->skbinfo;
-}
-
-#endif /* __KERNEL__ */
-#endif /* _IP_SET_SKBINFO_H */
diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h
deleted file mode 100644
index 2be60e379ecf..000000000000
--- a/include/linux/netfilter/ipset/ip_set_timeout.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _IP_SET_TIMEOUT_H
-#define _IP_SET_TIMEOUT_H
-
-/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */
-
-#ifdef __KERNEL__
-
-/* How often should the gc be run by default */
-#define IPSET_GC_TIME			(3 * 60)
-
-/* Timeout period depending on the timeout value of the given set */
-#define IPSET_GC_PERIOD(timeout) \
-	((timeout/3) ? min_t(u32, (timeout)/3, IPSET_GC_TIME) : 1)
-
-/* Entry is set with no timeout value */
-#define IPSET_ELEM_PERMANENT	0
-
-/* Set is defined with timeout support: timeout value may be 0 */
-#define IPSET_NO_TIMEOUT	UINT_MAX
-
-/* Max timeout value, see msecs_to_jiffies() in jiffies.h */
-#define IPSET_MAX_TIMEOUT	(UINT_MAX >> 1)/MSEC_PER_SEC
-
-#define ip_set_adt_opt_timeout(opt, set)	\
-((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout)
-
-static inline unsigned int
-ip_set_timeout_uget(struct nlattr *tb)
-{
-	unsigned int timeout = ip_set_get_h32(tb);
-
-	/* Normalize to fit into jiffies */
-	if (timeout > IPSET_MAX_TIMEOUT)
-		timeout = IPSET_MAX_TIMEOUT;
-
-	return timeout;
-}
-
-static inline bool
-ip_set_timeout_expired(const unsigned long *t)
-{
-	return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
-}
-
-static inline void
-ip_set_timeout_set(unsigned long *timeout, u32 value)
-{
-	unsigned long t;
-
-	if (!value) {
-		*timeout = IPSET_ELEM_PERMANENT;
-		return;
-	}
-
-	t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
-	if (t == IPSET_ELEM_PERMANENT)
-		/* Bingo! :-) */
-		t--;
-	*timeout = t;
-}
-
-static inline u32
-ip_set_timeout_get(const unsigned long *timeout)
-{
-	u32 t;
-
-	if (*timeout == IPSET_ELEM_PERMANENT)
-		return 0;
-
-	t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
-	/* Zero value in userspace means no timeout */
-	return t == 0 ? 1 : t;
-}
-
-#endif	/* __KERNEL__ */
-#endif /* _IP_SET_TIMEOUT_H */
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 3fced06ab752..d098d87bc331 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -7,7 +7,7 @@
 #include <linux/rcupdate.h>
 #include <linux/jhash.h>
 #include <linux/types.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
+#include <linux/netfilter/ipset/ip_set.h>
 
 #define __ipset_dereference_protected(p, c)	rcu_dereference_protected(p, c)
 #define ipset_dereference_protected(p, set) \
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index ecbfa291fb70..731bc2cafae4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -14,7 +14,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/ipset/ip_set.h>
-#include <linux/netfilter/ipset/ip_set_timeout.h>
 #include <uapi/linux/netfilter/xt_set.h>
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From a1b2f04ea527397fcacacd09e0d690927feef429 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:16:59 +0100
Subject: netfilter: add missing includes to a number of header-files.

A number of netfilter header-files used declarations and definitions
from other headers without including them.  Added include directives to
make those declarations and definitions available.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_getport.h   | 4 ++++
 include/linux/netfilter/nf_conntrack_amanda.h    | 4 ++++
 include/linux/netfilter/nf_conntrack_ftp.h       | 8 +++++---
 include/linux/netfilter/nf_conntrack_h323.h      | 7 +++++--
 include/linux/netfilter/nf_conntrack_h323_asn1.h | 2 ++
 include/linux/netfilter/nf_conntrack_irc.h       | 4 ++++
 include/linux/netfilter/nf_conntrack_pptp.h      | 9 +++++----
 include/linux/netfilter/nf_conntrack_sip.h       | 4 ++--
 include/linux/netfilter/nf_conntrack_snmp.h      | 3 +++
 include/linux/netfilter/nf_conntrack_tftp.h      | 5 +++++
 include/net/netfilter/br_netfilter.h             | 2 ++
 include/net/netfilter/ipv4/nf_dup_ipv4.h         | 3 +++
 include/net/netfilter/ipv6/nf_defrag_ipv6.h      | 4 +++-
 include/net/netfilter/ipv6/nf_dup_ipv6.h         | 2 ++
 include/net/netfilter/nf_conntrack_bridge.h      | 4 ++++
 include/net/netfilter/nf_conntrack_count.h       | 3 +++
 include/net/netfilter/nf_dup_netdev.h            | 2 ++
 include/net/netfilter/nf_flow_table.h            | 1 +
 include/net/netfilter/nf_nat_helper.h            | 4 ++--
 include/net/netfilter/nf_nat_redirect.h          | 3 +++
 include/net/netfilter/nf_queue.h                 | 2 ++
 include/net/netfilter/nf_reject.h                | 3 +++
 include/net/netfilter/nf_tables_ipv6.h           | 1 +
 include/net/netfilter/nft_fib.h                  | 2 ++
 include/net/netfilter/nft_meta.h                 | 2 ++
 include/net/netfilter/nft_reject.h               | 5 +++++
 include/uapi/linux/netfilter/xt_policy.h         | 1 +
 27 files changed, 80 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index ac6a11d38a19..a906df06948b 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -2,6 +2,10 @@
 #ifndef _IP_SET_GETPORT_H
 #define _IP_SET_GETPORT_H
 
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <uapi/linux/in.h>
+
 extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
 				__be16 *port, u8 *proto);
 
diff --git a/include/linux/netfilter/nf_conntrack_amanda.h b/include/linux/netfilter/nf_conntrack_amanda.h
index 34345e543ba2..6f0ac896fcc9 100644
--- a/include/linux/netfilter/nf_conntrack_amanda.h
+++ b/include/linux/netfilter/nf_conntrack_amanda.h
@@ -3,6 +3,10 @@
 #define _NF_CONNTRACK_AMANDA_H
 /* AMANDA tracking. */
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
 extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb,
 					  enum ip_conntrack_info ctinfo,
 					  unsigned int protoff,
diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h
index 73a296dfd019..0e38302820b9 100644
--- a/include/linux/netfilter/nf_conntrack_ftp.h
+++ b/include/linux/netfilter/nf_conntrack_ftp.h
@@ -2,8 +2,12 @@
 #ifndef _NF_CONNTRACK_FTP_H
 #define _NF_CONNTRACK_FTP_H
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/netfilter/nf_conntrack_expect.h>
 #include <uapi/linux/netfilter/nf_conntrack_ftp.h>
-
+#include <uapi/linux/netfilter/nf_conntrack_tuple_common.h>
 
 #define FTP_PORT	21
 
@@ -20,8 +24,6 @@ struct nf_ct_ftp_master {
 	u_int16_t flags[IP_CT_DIR_MAX];
 };
 
-struct nf_conntrack_expect;
-
 /* For NAT to hook in when we find a packet which describes what other
  * connection we should expect. */
 extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb,
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index f76ed373a2a5..96dfa886f8c0 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -4,7 +4,12 @@
 
 #ifdef __KERNEL__
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
 #include <linux/netfilter/nf_conntrack_h323_asn1.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <uapi/linux/netfilter/nf_conntrack_tuple_common.h>
 
 #define RAS_PORT 1719
 #define Q931_PORT 1720
@@ -28,8 +33,6 @@ struct nf_ct_h323_master {
 	};
 };
 
-struct nf_conn;
-
 int get_h225_addr(struct nf_conn *ct, unsigned char *data,
 		  TransportAddress *taddr, union nf_inet_addr *addr,
 		  __be16 *port);
diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h
index 19df78341fb3..bd6797f823b2 100644
--- a/include/linux/netfilter/nf_conntrack_h323_asn1.h
+++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h
@@ -37,6 +37,8 @@
 /*****************************************************************************
  * H.323 Types
  ****************************************************************************/
+
+#include <linux/types.h>
 #include <linux/netfilter/nf_conntrack_h323_types.h>
 
 typedef struct {
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index 00c2b74206e1..f75e005db969 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -4,6 +4,10 @@
 
 #ifdef __KERNEL__
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
 #define IRC_PORT	6667
 
 extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index 833a5b2255ea..3f10e806f0dc 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -3,7 +3,12 @@
 #ifndef _NF_CONNTRACK_PPTP_H
 #define _NF_CONNTRACK_PPTP_H
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
 #include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <uapi/linux/netfilter/nf_conntrack_tuple_common.h>
 
 extern const char *const pptp_msg_name[];
 
@@ -297,10 +302,6 @@ union pptp_ctrl_union {
 	struct PptpSetLinkInfo		setlink;
 };
 
-/* crap needed for nf_conntrack_compat.h */
-struct nf_conn;
-struct nf_conntrack_expect;
-
 extern int
 (*nf_nat_pptp_hook_outbound)(struct sk_buff *skb,
 			     struct nf_conn *ct, enum ip_conntrack_info ctinfo,
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index c7fc38807a33..f6437f7841af 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -3,9 +3,9 @@
 #define __NF_CONNTRACK_SIP_H__
 #ifdef __KERNEL__
 
-#include <net/netfilter/nf_conntrack_expect.h>
-
+#include <linux/skbuff.h>
 #include <linux/types.h>
+#include <net/netfilter/nf_conntrack_expect.h>
 
 #define SIP_PORT	5060
 #define SIP_TIMEOUT	3600
diff --git a/include/linux/netfilter/nf_conntrack_snmp.h b/include/linux/netfilter/nf_conntrack_snmp.h
index 818088c47475..87e4f33eb55f 100644
--- a/include/linux/netfilter/nf_conntrack_snmp.h
+++ b/include/linux/netfilter/nf_conntrack_snmp.h
@@ -2,6 +2,9 @@
 #ifndef _NF_CONNTRACK_SNMP_H
 #define _NF_CONNTRACK_SNMP_H
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+
 extern int (*nf_nat_snmp_hook)(struct sk_buff *skb,
 				unsigned int protoff,
 				struct nf_conn *ct,
diff --git a/include/linux/netfilter/nf_conntrack_tftp.h b/include/linux/netfilter/nf_conntrack_tftp.h
index 5769e12dd0a2..dc4c1b9beac0 100644
--- a/include/linux/netfilter/nf_conntrack_tftp.h
+++ b/include/linux/netfilter/nf_conntrack_tftp.h
@@ -4,6 +4,11 @@
 
 #define TFTP_PORT 69
 
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+
 struct tftphdr {
 	__be16 opcode;
 };
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 302fcd3aade2..ca121ed906df 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -2,6 +2,8 @@
 #ifndef _BR_NETFILTER_H_
 #define _BR_NETFILTER_H_
 
+#include <linux/netfilter.h>
+
 #include "../../../net/bridge/br_private.h"
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
diff --git a/include/net/netfilter/ipv4/nf_dup_ipv4.h b/include/net/netfilter/ipv4/nf_dup_ipv4.h
index c962e0be3549..a2bc16cdbcd3 100644
--- a/include/net/netfilter/ipv4/nf_dup_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_dup_ipv4.h
@@ -2,6 +2,9 @@
 #ifndef _NF_DUP_IPV4_H_
 #define _NF_DUP_IPV4_H_
 
+#include <linux/skbuff.h>
+#include <uapi/linux/in.h>
+
 void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		 const struct in_addr *gw, int oif);
 
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 9d7e28736da9..6d31cd041143 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -2,7 +2,9 @@
 #ifndef _NF_DEFRAG_IPV6_H
 #define _NF_DEFRAG_IPV6_H
 
-struct net;
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
 int nf_defrag_ipv6_enable(struct net *);
 
 int nf_ct_frag6_init(void);
diff --git a/include/net/netfilter/ipv6/nf_dup_ipv6.h b/include/net/netfilter/ipv6/nf_dup_ipv6.h
index caf0c2dd8ee7..f6312bb04a13 100644
--- a/include/net/netfilter/ipv6/nf_dup_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_dup_ipv6.h
@@ -2,6 +2,8 @@
 #ifndef _NF_DUP_IPV6_H_
 #define _NF_DUP_IPV6_H_
 
+#include <linux/skbuff.h>
+
 void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
 		 const struct in6_addr *gw, int oif);
 
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
index 9a5514d5bc51..8f2e5b2ab523 100644
--- a/include/net/netfilter/nf_conntrack_bridge.h
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -1,6 +1,10 @@
 #ifndef NF_CONNTRACK_BRIDGE_
 #define NF_CONNTRACK_BRIDGE_
 
+#include <linux/module.h>
+#include <linux/types.h>
+#include <uapi/linux/if_ether.h>
+
 struct nf_ct_bridge_info {
 	struct nf_hook_ops	*ops;
 	unsigned int		ops_size;
diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h
index f32fc8289473..9645b47fa7e4 100644
--- a/include/net/netfilter/nf_conntrack_count.h
+++ b/include/net/netfilter/nf_conntrack_count.h
@@ -2,6 +2,9 @@
 #define _NF_CONNTRACK_COUNT_H
 
 #include <linux/list.h>
+#include <linux/spinlock.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
 
 struct nf_conncount_data;
 
diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h
index 2a6f6dcad3d9..181672672160 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -2,6 +2,8 @@
 #ifndef _NF_DUP_NETDEV_H_
 #define _NF_DUP_NETDEV_H_
 
+#include <net/netfilter/nf_tables.h>
+
 void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
 void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
 
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d8c187936bec..7249e331bd0b 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -6,6 +6,7 @@
 #include <linux/netdevice.h>
 #include <linux/rhashtable-types.h>
 #include <linux/rcupdate.h>
+#include <linux/netfilter.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/dst.h>
 
diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h
index 97d7033e93a4..efae84646353 100644
--- a/include/net/netfilter/nf_nat_helper.h
+++ b/include/net/netfilter/nf_nat_helper.h
@@ -3,9 +3,9 @@
 #define _NF_NAT_HELPER_H
 /* NAT protocol helper routines. */
 
+#include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack.h>
-
-struct sk_buff;
+#include <net/netfilter/nf_conntrack_expect.h>
 
 /* These return true or false. */
 bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h
index c129aacc8ae8..2418653a66db 100644
--- a/include/net/netfilter/nf_nat_redirect.h
+++ b/include/net/netfilter/nf_nat_redirect.h
@@ -2,6 +2,9 @@
 #ifndef _NF_NAT_REDIRECT_H_
 #define _NF_NAT_REDIRECT_H_
 
+#include <linux/skbuff.h>
+#include <uapi/linux/netfilter/nf_nat.h>
+
 unsigned int
 nf_nat_redirect_ipv4(struct sk_buff *skb,
 		     const struct nf_nat_ipv4_multi_range_compat *mr,
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 3cb6dcf53a4e..359b80b43169 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -5,6 +5,8 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/jhash.h>
+#include <linux/netfilter.h>
+#include <linux/skbuff.h>
 
 /* Each queued (to userspace) skbuff has one of these. */
 struct nf_queue_entry {
diff --git a/include/net/netfilter/nf_reject.h b/include/net/netfilter/nf_reject.h
index 221f877f29d1..9051c3a0c8e7 100644
--- a/include/net/netfilter/nf_reject.h
+++ b/include/net/netfilter/nf_reject.h
@@ -2,6 +2,9 @@
 #ifndef _NF_REJECT_H
 #define _NF_REJECT_H
 
+#include <linux/types.h>
+#include <uapi/linux/in.h>
+
 static inline bool nf_reject_verify_csum(__u8 proto)
 {
 	/* Skip protocols that don't use 16-bit one's complement checksum
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index dabe6fdb553a..d0f1c537b017 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -4,6 +4,7 @@
 
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ipv6.h>
+#include <net/netfilter/nf_tables.h>
 
 static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 					struct sk_buff *skb)
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index e4c4d8eaca8c..628b6fa579cd 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -2,6 +2,8 @@
 #ifndef _NFT_FIB_H_
 #define _NFT_FIB_H_
 
+#include <net/netfilter/nf_tables.h>
+
 struct nft_fib {
 	enum nft_registers	dreg:8;
 	u8			result;
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index 5c69e9b09388..07e2fd507963 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -2,6 +2,8 @@
 #ifndef _NFT_META_H_
 #define _NFT_META_H_
 
+#include <net/netfilter/nf_tables.h>
+
 struct nft_meta {
 	enum nft_meta_keys	key:8;
 	union {
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index de80c50761f0..56b123a42220 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -2,6 +2,11 @@
 #ifndef _NFT_REJECT_H_
 #define _NFT_REJECT_H_
 
+#include <linux/types.h>
+#include <net/netlink.h>
+#include <net/netfilter/nf_tables.h>
+#include <uapi/linux/netfilter/nf_tables.h>
+
 struct nft_reject {
 	enum nft_reject_types	type:8;
 	u8			icmp_code;
diff --git a/include/uapi/linux/netfilter/xt_policy.h b/include/uapi/linux/netfilter/xt_policy.h
index 323bfa3074c5..4cf2ce2a8a44 100644
--- a/include/uapi/linux/netfilter/xt_policy.h
+++ b/include/uapi/linux/netfilter/xt_policy.h
@@ -2,6 +2,7 @@
 #ifndef _XT_POLICY_H
 #define _XT_POLICY_H
 
+#include <linux/netfilter.h>
 #include <linux/types.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-- 
cgit v1.2.3


From 9211bfbff80a7604273086fec5685efcdc10be2b Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:00 +0100
Subject: netfilter: add missing IS_ENABLED(CONFIG_BRIDGE_NETFILTER) checks to
 header-file.

br_netfilter.h defines inline functions that use an enum constant and
struct member that are only defined if CONFIG_BRIDGE_NETFILTER is
enabled.  Added preprocessor checks to ensure br_netfilter.h will
compile if CONFIG_BRIDGE_NETFILTER is disabled.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index ca121ed906df..33533ea852a1 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -8,12 +8,16 @@
 
 static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
 {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info *b = skb_ext_add(skb, SKB_EXT_BRIDGE_NF);
 
 	if (b)
 		memset(b, 0, sizeof(*b));
 
 	return b;
+#else
+	return NULL;
+#endif
 }
 
 void nf_bridge_update_protocol(struct sk_buff *skb);
@@ -38,10 +42,14 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct net_bridge_port *port;
 
 	port = br_port_get_rcu(dev);
 	return port ? &port->br->fake_rtable : NULL;
+#else
+	return NULL;
+#endif
 }
 
 struct net_device *setup_pre_routing(struct sk_buff *skb,
-- 
cgit v1.2.3


From 47e640af2e492cc28778dd6f894d50313f7fba75 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:01 +0100
Subject: netfilter: add missing IS_ENABLED(CONFIG_NF_TABLES) check to
 header-file.

nf_tables.h defines an API comprising several inline functions and
macros that depend on the nft member of struct net.  However, this is
only defined is CONFIG_NF_TABLES is enabled.  Added preprocessor checks
to ensure that nf_tables.h will compile if CONFIG_NF_TABLES is disabled.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 9b624566b82d..66edf76301d3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1207,6 +1207,8 @@ void nft_trace_notify(struct nft_traceinfo *info);
 #define MODULE_ALIAS_NFT_OBJ(type) \
 	MODULE_ALIAS("nft-obj-" __stringify(type))
 
+#if IS_ENABLED(CONFIG_NF_TABLES)
+
 /*
  * The gencursor defines two generations, the currently active and the
  * next one. Objects contain a bitmask of 2 bits specifying the generations
@@ -1280,6 +1282,8 @@ static inline void nft_set_elem_change_active(const struct net *net,
 	ext->genmask ^= nft_genmask_next(net);
 }
 
+#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
+
 /*
  * We use a free bit in the genmask field to indicate the element
  * is busy, meaning it is currently being processed either by
-- 
cgit v1.2.3


From 0abc8bf4f2842e409926096f0fa009b468cbd855 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:02 +0100
Subject: netfilter: add missing IS_ENABLED(CONFIG_NF_CONNTRACK) checks to some
 header-files.

struct nf_conn contains a "struct nf_conntrack ct_general" member and
struct net contains a "struct netns_ct ct" member which are both only
defined in CONFIG_NF_CONNTRACK is enabled.  These members are used in a
number of inline functions defined in other header-files.  Added
preprocessor checks to make sure the headers will compile if
CONFIG_NF_CONNTRACK is disabled.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h           | 10 ++++++++++
 include/net/netfilter/nf_conntrack_acct.h      | 13 +++++++++++++
 include/net/netfilter/nf_conntrack_l4proto.h   |  2 ++
 include/net/netfilter/nf_conntrack_timestamp.h |  6 ++++++
 4 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index c86657d99630..2cc304efe7f9 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -59,6 +59,7 @@ struct nf_conntrack_net {
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
 struct nf_conn {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	/* Usage count in here is 1 for hash table, 1 per skb,
 	 * plus 1 for any connection(s) we are `master' for
 	 *
@@ -68,6 +69,7 @@ struct nf_conn {
 	 * beware nf_ct_get() is different and don't inc refcnt.
 	 */
 	struct nf_conntrack ct_general;
+#endif
 
 	spinlock_t	lock;
 	/* jiffies32 when this ct is considered dead */
@@ -148,6 +150,8 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 			     const struct nf_conn *ignored_conntrack);
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+
 #define NFCT_INFOMASK	7UL
 #define NFCT_PTRMASK	~(NFCT_INFOMASK)
 
@@ -167,6 +171,8 @@ static inline void nf_ct_put(struct nf_conn *ct)
 	nf_conntrack_put(&ct->ct_general);
 }
 
+#endif
+
 /* Protocol module loading */
 int nf_ct_l3proto_try_module_get(unsigned short l3proto);
 void nf_ct_l3proto_module_put(unsigned short l3proto);
@@ -318,12 +324,16 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 u32 nf_ct_get_id(const struct nf_conn *ct);
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+
 static inline void
 nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
 {
 	skb->_nfct = (unsigned long)ct | info;
 }
 
+#endif
+
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 1fee733c18a7..ad9f2172dee1 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -29,6 +29,7 @@ struct nf_conn_acct *nf_conn_acct_find(const struct nf_conn *ct)
 static inline
 struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	struct net *net = nf_ct_net(ct);
 	struct nf_conn_acct *acct;
 
@@ -41,22 +42,34 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 
 
 	return acct;
+#else
+	return NULL;
+#endif
 };
 
 /* Check if connection tracking accounting is enabled */
 static inline bool nf_ct_acct_enabled(struct net *net)
 {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	return net->ct.sysctl_acct != 0;
+#else
+	return false;
+#endif
 }
 
 /* Enable/disable connection tracking accounting */
 static inline void nf_ct_set_acct(struct net *net, bool enable)
 {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	net->ct.sysctl_acct = enable;
+#endif
 }
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 void nf_conntrack_acct_pernet_init(struct net *net);
 
 int nf_conntrack_acct_init(void);
 void nf_conntrack_acct_fini(void);
+#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
+
 #endif /* _NF_CONNTRACK_ACCT_H */
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index a49edfdf47e8..1990d54bf8f2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -176,6 +176,7 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 			       const char *fmt, ...) { }
 #endif /* CONFIG_SYSCTL */
 
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 static inline struct nf_generic_net *nf_generic_pernet(struct net *net)
 {
        return &net->ct.nf_ct_proto.generic;
@@ -200,6 +201,7 @@ static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
 {
        return &net->ct.nf_ct_proto.icmpv6;
 }
+#endif
 
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net)
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
index 0ed617bf0a3d..2b8aeba649aa 100644
--- a/include/net/netfilter/nf_conntrack_timestamp.h
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -40,12 +40,18 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
 
 static inline bool nf_ct_tstamp_enabled(struct net *net)
 {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	return net->ct.sysctl_tstamp != 0;
+#else
+	return false;
+#endif
 }
 
 static inline void nf_ct_set_tstamp(struct net *net, bool enable)
 {
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	net->ct.sysctl_tstamp = enable;
+#endif
 }
 
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
-- 
cgit v1.2.3


From 78458e3e08cda2aacaec9fde8c295dfc2f88618a Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:03 +0100
Subject: netfilter: add missing IS_ENABLED(CONFIG_NETFILTER) checks to some
 header-files.

linux/netfilter.h defines a number of struct and inline function
definitions which are only available is CONFIG_NETFILTER is enabled.
These structs and functions are used in declarations and definitions in
other header-files.  Added preprocessor checks to make sure these
headers will compile if CONFIG_NETFILTER is disabled.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h           | 6 ++++++
 include/linux/netfilter_arp/arp_tables.h     | 2 ++
 include/linux/netfilter_bridge/ebtables.h    | 2 ++
 include/linux/netfilter_ipv4/ip_tables.h     | 4 ++++
 include/linux/netfilter_ipv6/ip6_tables.h    | 2 ++
 include/net/netfilter/br_netfilter.h         | 2 ++
 include/net/netfilter/nf_conntrack_bridge.h  | 2 ++
 include/net/netfilter/nf_conntrack_core.h    | 3 +++
 include/net/netfilter/nf_conntrack_l4proto.h | 2 ++
 include/net/netfilter/nf_conntrack_tuple.h   | 2 ++
 include/net/netfilter/nf_flow_table.h        | 4 ++++
 include/net/netfilter/nf_nat.h               | 4 ++++
 include/net/netfilter/nf_queue.h             | 5 +++++
 include/net/netfilter/nf_synproxy.h          | 4 ++++
 include/net/netfilter/nf_tables.h            | 8 ++++++++
 15 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1f852ef7b098..ae62bf1c6824 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -35,12 +35,15 @@ struct xt_action_param {
 	union {
 		const void *matchinfo, *targinfo;
 	};
+#if IS_ENABLED(CONFIG_NETFILTER)
 	const struct nf_hook_state *state;
+#endif
 	int fragoff;
 	unsigned int thoff;
 	bool hotdrop;
 };
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 static inline struct net *xt_net(const struct xt_action_param *par)
 {
 	return par->state->net;
@@ -75,6 +78,7 @@ static inline u_int8_t xt_family(const struct xt_action_param *par)
 {
 	return par->state->pf;
 }
+#endif
 
 /**
  * struct xt_mtchk_param - parameters for match extensions'
@@ -446,7 +450,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 	return cnt;
 }
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
+#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index e98028f00e47..1b7b35bb9c27 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -49,6 +49,7 @@ struct arpt_error {
 }
 
 extern void *arpt_alloc_initial_table(const struct xt_table *);
+#if IS_ENABLED(CONFIG_NETFILTER)
 int arpt_register_table(struct net *net, const struct xt_table *table,
 			const struct arpt_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
@@ -57,6 +58,7 @@ void arpt_unregister_table(struct net *net, struct xt_table *table,
 extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
+#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index c6935be7c6ca..b5b2d371f0ef 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -105,6 +105,7 @@ struct ebt_table {
 
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
 		     ~(__alignof__(struct _xt_align)-1))
+#if IS_ENABLED(CONFIG_NETFILTER)
 extern int ebt_register_table(struct net *net,
 			      const struct ebt_table *table,
 			      const struct nf_hook_ops *ops,
@@ -114,6 +115,7 @@ extern void ebt_unregister_table(struct net *net, struct ebt_table *table,
 extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
+#endif
 
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index d026e63a5aa4..f40a65481df4 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -25,11 +25,13 @@
 
 extern void ipt_init(void) __init;
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
 		       const struct nf_hook_ops *ops, struct xt_table **res);
 void ipt_unregister_table(struct net *net, struct xt_table *table,
 			  const struct nf_hook_ops *ops);
+#endif
 
 /* Standard entry. */
 struct ipt_standard {
@@ -65,9 +67,11 @@ struct ipt_error {
 }
 
 extern void *ipt_alloc_initial_table(const struct xt_table *);
+#if IS_ENABLED(CONFIG_NETFILTER)
 extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct xt_table *table);
+#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 99cbfd3add40..53b7309613bf 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -26,6 +26,7 @@
 extern void ip6t_init(void) __init;
 
 extern void *ip6t_alloc_initial_table(const struct xt_table *);
+#if IS_ENABLED(CONFIG_NETFILTER)
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
@@ -34,6 +35,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table,
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
+#endif
 
 /* Check for an extension */
 static inline int
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 33533ea852a1..2a613c84d49f 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -55,6 +55,7 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 struct net_device *setup_pre_routing(struct sk_buff *skb,
 				     const struct net *net);
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 #if IS_ENABLED(CONFIG_IPV6)
 int br_validate_ipv6(struct net *net, struct sk_buff *skb);
 unsigned int br_nf_pre_routing_ipv6(void *priv,
@@ -73,5 +74,6 @@ br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 #endif
+#endif
 
 #endif /* _BR_NETFILTER_H_ */
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
index 8f2e5b2ab523..34c28f248b18 100644
--- a/include/net/netfilter/nf_conntrack_bridge.h
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -6,7 +6,9 @@
 #include <uapi/linux/if_ether.h>
 
 struct nf_ct_bridge_info {
+#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops	*ops;
+#endif
 	unsigned int		ops_size;
 	struct module		*me;
 };
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index de10faf2ce91..71a2d9cb64ea 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -20,7 +20,10 @@
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
+
+#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state);
+#endif
 
 int nf_conntrack_init_net(struct net *net);
 void nf_conntrack_cleanup_net(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 1990d54bf8f2..c200b95d27ae 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,6 +75,7 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
 bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
 				      const struct nf_conntrack_tuple *orig);
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
 			    unsigned int dataoff,
 			    const struct nf_hook_state *state,
@@ -131,6 +132,7 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
 			    unsigned int dataoff,
 			    enum ip_conntrack_info ctinfo,
 			    const struct nf_hook_state *state);
+#endif
 
 void nf_conntrack_generic_init_net(struct net *net);
 void nf_conntrack_tcp_init_net(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index bf0444e111a6..480c87b44a96 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -121,6 +121,7 @@ struct nf_conntrack_tuple_hash {
 	struct nf_conntrack_tuple tuple;
 };
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1,
 					   const struct nf_conntrack_tuple *t2)
 { 
@@ -183,5 +184,6 @@ nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t,
 	return nf_ct_tuple_src_mask_cmp(t, tuple, mask) &&
 	       __nf_ct_tuple_dst_equal(t, tuple);
 }
+#endif
 
 #endif /* _NF_CONNTRACK_TUPLE_H */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 7249e331bd0b..609df33b1209 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -17,7 +17,9 @@ struct nf_flowtable_type {
 	int				family;
 	int				(*init)(struct nf_flowtable *ft);
 	void				(*free)(struct nf_flowtable *ft);
+#if IS_ENABLED(CONFIG_NETFILTER)
 	nf_hookfn			*hook;
+#endif
 	struct module			*owner;
 };
 
@@ -115,10 +117,12 @@ struct flow_ports {
 	__be16 source, dest;
 };
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 				     const struct nf_hook_state *state);
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				       const struct nf_hook_state *state);
+#endif
 
 #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 423cda2c6542..eec208fb9c23 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -69,10 +69,12 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 #endif
 }
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
 		       const struct nf_hook_ops *nat_ops, unsigned int ops_count);
 void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
 			  unsigned int ops_count);
+#endif
 
 unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			   unsigned int hooknum, struct sk_buff *skb);
@@ -92,6 +94,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int hooknum, unsigned int hdrlen);
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops);
 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
 
@@ -104,6 +107,7 @@ void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
 unsigned int
 nf_nat_inet_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state);
+#endif
 
 int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
 
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 359b80b43169..80edb46a1bbc 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -15,7 +15,9 @@ struct nf_queue_entry {
 	unsigned int		id;
 	unsigned int		hook_index;	/* index in hook_entries->hook[] */
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_state	state;
+#endif
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
@@ -121,6 +123,9 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
 	return queue;
 }
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	     unsigned int index, unsigned int verdict);
+#endif
+
 #endif /* _NF_QUEUE_H */
diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h
index 87d73fb5279d..dc420b47e3aa 100644
--- a/include/net/netfilter/nf_synproxy.h
+++ b/include/net/netfilter/nf_synproxy.h
@@ -20,8 +20,10 @@ bool synproxy_recv_client_ack(struct net *net,
 			      const struct tcphdr *th,
 			      struct synproxy_options *opts, u32 recv_seq);
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
 				const struct nf_hook_state *nhs);
+#endif
 int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net);
 void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net);
 
@@ -35,8 +37,10 @@ bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb,
 				   const struct tcphdr *th,
 				   struct synproxy_options *opts, u32 recv_seq);
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
 				const struct nf_hook_state *nhs);
+#endif
 int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net);
 void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net);
 #else
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 66edf76301d3..dc301e3d6739 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -25,6 +25,7 @@ struct nft_pktinfo {
 	struct xt_action_param		xt;
 };
 
+#if IS_ENABLED(CONFIG_NETFILTER)
 static inline struct net *nft_net(const struct nft_pktinfo *pkt)
 {
 	return pkt->xt.state->net;
@@ -57,6 +58,7 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 	pkt->skb = skb;
 	pkt->xt.state = state;
 }
+#endif
 
 static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
 					  struct sk_buff *skb)
@@ -927,9 +929,11 @@ struct nft_chain_type {
 	int				family;
 	struct module			*owner;
 	unsigned int			hook_mask;
+#if IS_ENABLED(CONFIG_NETFILTER)
 	nf_hookfn			*hooks[NF_MAX_HOOKS];
 	int				(*ops_register)(struct net *net, const struct nf_hook_ops *ops);
 	void				(*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
+#endif
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -955,7 +959,9 @@ struct nft_stats {
  *	@flow_block: flow block (for hardware offload)
  */
 struct nft_base_chain {
+#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops		ops;
+#endif
 	const struct nft_chain_type	*type;
 	u8				policy;
 	u8				flags;
@@ -1152,7 +1158,9 @@ struct nft_flowtable {
 					use:30;
 	u64				handle;
 	/* runtime data below here */
+#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops		*ops ____cacheline_aligned;
+#endif
 	struct nf_flowtable		data;
 };
 
-- 
cgit v1.2.3


From 20a9379d9a03ee0712d225035308973ecc5f6783 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:04 +0100
Subject: netfilter: remove "#ifdef __KERNEL__" guards from some headers.

A number of non-UAPI Netfilter header-files contained superfluous
"#ifdef __KERNEL__" guards.  Removed them.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_dccp.h      | 3 ---
 include/linux/netfilter/nf_conntrack_h323.h      | 4 ----
 include/linux/netfilter/nf_conntrack_irc.h       | 3 ---
 include/linux/netfilter/nf_conntrack_pptp.h      | 3 ---
 include/linux/netfilter/nf_conntrack_proto_gre.h | 2 --
 include/linux/netfilter/nf_conntrack_sane.h      | 4 ----
 include/linux/netfilter/nf_conntrack_sip.h       | 2 --
 7 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h
index ace0f952d50f..c509ed76e714 100644
--- a/include/linux/netfilter/nf_conntrack_dccp.h
+++ b/include/linux/netfilter/nf_conntrack_dccp.h
@@ -25,7 +25,6 @@ enum ct_dccp_roles {
 };
 #define CT_DCCP_ROLE_MAX	(__CT_DCCP_ROLE_MAX - 1)
 
-#ifdef __KERNEL__
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 
 struct nf_ct_dccp {
@@ -36,6 +35,4 @@ struct nf_ct_dccp {
 	u_int64_t	handshake_seq;
 };
 
-#endif /* __KERNEL__ */
-
 #endif /* _NF_CONNTRACK_DCCP_H */
diff --git a/include/linux/netfilter/nf_conntrack_h323.h b/include/linux/netfilter/nf_conntrack_h323.h
index 96dfa886f8c0..4561ec0fcea4 100644
--- a/include/linux/netfilter/nf_conntrack_h323.h
+++ b/include/linux/netfilter/nf_conntrack_h323.h
@@ -2,8 +2,6 @@
 #ifndef _NF_CONNTRACK_H323_H
 #define _NF_CONNTRACK_H323_H
 
-#ifdef __KERNEL__
-
 #include <linux/netfilter.h>
 #include <linux/skbuff.h>
 #include <linux/types.h>
@@ -97,5 +95,3 @@ extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct,
 			     struct nf_conntrack_expect *exp);
 
 #endif
-
-#endif
diff --git a/include/linux/netfilter/nf_conntrack_irc.h b/include/linux/netfilter/nf_conntrack_irc.h
index f75e005db969..d02255f721e1 100644
--- a/include/linux/netfilter/nf_conntrack_irc.h
+++ b/include/linux/netfilter/nf_conntrack_irc.h
@@ -2,8 +2,6 @@
 #ifndef _NF_CONNTRACK_IRC_H
 #define _NF_CONNTRACK_IRC_H
 
-#ifdef __KERNEL__
-
 #include <linux/netfilter.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack_expect.h>
@@ -17,5 +15,4 @@ extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb,
 				       unsigned int matchlen,
 				       struct nf_conntrack_expect *exp);
 
-#endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_IRC_H */
diff --git a/include/linux/netfilter/nf_conntrack_pptp.h b/include/linux/netfilter/nf_conntrack_pptp.h
index 3f10e806f0dc..fcc409de31a4 100644
--- a/include/linux/netfilter/nf_conntrack_pptp.h
+++ b/include/linux/netfilter/nf_conntrack_pptp.h
@@ -50,8 +50,6 @@ struct nf_nat_pptp {
 	__be16 pac_call_id;			/* NAT'ed PAC call id */
 };
 
-#ifdef __KERNEL__
-
 #define PPTP_CONTROL_PORT	1723
 
 #define PPTP_PACKET_CONTROL	1
@@ -324,5 +322,4 @@ extern void
 (*nf_nat_pptp_hook_expectfn)(struct nf_conn *ct,
 			     struct nf_conntrack_expect *exp);
 
-#endif /* __KERNEL__ */
 #endif /* _NF_CONNTRACK_PPTP_H */
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 25f9a770fb84..f33aa6021364 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -10,7 +10,6 @@ struct nf_ct_gre {
 	unsigned int timeout;
 };
 
-#ifdef __KERNEL__
 #include <net/netfilter/nf_conntrack_tuple.h>
 
 struct nf_conn;
@@ -32,5 +31,4 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
 
 bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 		      struct net *net, struct nf_conntrack_tuple *tuple);
-#endif /* __KERNEL__ */
 #endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/linux/netfilter/nf_conntrack_sane.h b/include/linux/netfilter/nf_conntrack_sane.h
index 7d2de44edce3..46c7acd1b4a7 100644
--- a/include/linux/netfilter/nf_conntrack_sane.h
+++ b/include/linux/netfilter/nf_conntrack_sane.h
@@ -3,8 +3,6 @@
 #define _NF_CONNTRACK_SANE_H
 /* SANE tracking. */
 
-#ifdef __KERNEL__
-
 #define SANE_PORT	6566
 
 enum sane_state {
@@ -17,6 +15,4 @@ struct nf_ct_sane_master {
 	enum sane_state state;
 };
 
-#endif /* __KERNEL__ */
-
 #endif /* _NF_CONNTRACK_SANE_H */
diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
index f6437f7841af..c620521c42bc 100644
--- a/include/linux/netfilter/nf_conntrack_sip.h
+++ b/include/linux/netfilter/nf_conntrack_sip.h
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __NF_CONNTRACK_SIP_H__
 #define __NF_CONNTRACK_SIP_H__
-#ifdef __KERNEL__
 
 #include <linux/skbuff.h>
 #include <linux/types.h>
@@ -196,5 +195,4 @@ int ct_sip_get_sdp_header(const struct nf_conn *ct, const char *dptr,
 			  enum sdp_header_types term,
 			  unsigned int *matchoff, unsigned int *matchlen);
 
-#endif /* __KERNEL__ */
 #endif /* __NF_CONNTRACK_SIP_H__ */
-- 
cgit v1.2.3


From 2a475c409fe81a76fb26a6b023509d648237bbe6 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 7 Aug 2019 15:17:05 +0100
Subject: kbuild: remove all netfilter headers from header-test blacklist.

All the blacklisted NF headers can now be compiled stand-alone, so
removed them from the blacklist.

Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/Kbuild | 74 ----------------------------------------------------------
 1 file changed, 74 deletions(-)

(limited to 'include')

diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..af498acb7cd2 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -386,31 +386,6 @@ header-test-			+= linux/mvebu-pmsu.h
 header-test-			+= linux/mxm-wmi.h
 header-test-			+= linux/n_r3964.h
 header-test-			+= linux/ndctl.h
-header-test-			+= linux/netfilter/ipset/ip_set.h
-header-test-			+= linux/netfilter/ipset/ip_set_bitmap.h
-header-test-			+= linux/netfilter/ipset/ip_set_comment.h
-header-test-			+= linux/netfilter/ipset/ip_set_counter.h
-header-test-			+= linux/netfilter/ipset/ip_set_getport.h
-header-test-			+= linux/netfilter/ipset/ip_set_hash.h
-header-test-			+= linux/netfilter/ipset/ip_set_list.h
-header-test-			+= linux/netfilter/ipset/ip_set_skbinfo.h
-header-test-			+= linux/netfilter/ipset/ip_set_timeout.h
-header-test-			+= linux/netfilter/nf_conntrack_amanda.h
-header-test-			+= linux/netfilter/nf_conntrack_ftp.h
-header-test-			+= linux/netfilter/nf_conntrack_h323.h
-header-test-			+= linux/netfilter/nf_conntrack_h323_asn1.h
-header-test-			+= linux/netfilter/nf_conntrack_irc.h
-header-test-			+= linux/netfilter/nf_conntrack_pptp.h
-header-test-			+= linux/netfilter/nf_conntrack_proto_gre.h
-header-test-			+= linux/netfilter/nf_conntrack_sip.h
-header-test-			+= linux/netfilter/nf_conntrack_snmp.h
-header-test-			+= linux/netfilter/nf_conntrack_tftp.h
-header-test-			+= linux/netfilter/x_tables.h
-header-test-			+= linux/netfilter_arp/arp_tables.h
-header-test-			+= linux/netfilter_bridge/ebtables.h
-header-test-			+= linux/netfilter_ipv4/ip4_tables.h
-header-test-			+= linux/netfilter_ipv4/ip_tables.h
-header-test-			+= linux/netfilter_ipv6/ip6_tables.h
 header-test-			+= linux/nfs.h
 header-test-			+= linux/nfs_fs_i.h
 header-test-			+= linux/nfs_fs_sb.h
@@ -874,43 +849,6 @@ header-test-			+= net/mpls_iptunnel.h
 header-test-			+= net/mrp.h
 header-test-			+= net/ncsi.h
 header-test-			+= net/netevent.h
-header-test-			+= net/netfilter/br_netfilter.h
-header-test-			+= net/netfilter/ipv4/nf_dup_ipv4.h
-header-test-			+= net/netfilter/ipv6/nf_defrag_ipv6.h
-header-test-			+= net/netfilter/ipv6/nf_dup_ipv6.h
-header-test-			+= net/netfilter/nf_conntrack.h
-header-test-			+= net/netfilter/nf_conntrack_acct.h
-header-test-			+= net/netfilter/nf_conntrack_bridge.h
-header-test-			+= net/netfilter/nf_conntrack_core.h
-header-test-			+= net/netfilter/nf_conntrack_count.h
-header-test-			+= net/netfilter/nf_conntrack_ecache.h
-header-test-			+= net/netfilter/nf_conntrack_expect.h
-header-test-			+= net/netfilter/nf_conntrack_extend.h
-header-test-			+= net/netfilter/nf_conntrack_helper.h
-header-test-			+= net/netfilter/nf_conntrack_l4proto.h
-header-test-			+= net/netfilter/nf_conntrack_labels.h
-header-test-			+= net/netfilter/nf_conntrack_seqadj.h
-header-test-			+= net/netfilter/nf_conntrack_synproxy.h
-header-test-			+= net/netfilter/nf_conntrack_timeout.h
-header-test-			+= net/netfilter/nf_conntrack_timestamp.h
-header-test-			+= net/netfilter/nf_conntrack_tuple.h
-header-test-			+= net/netfilter/nf_dup_netdev.h
-header-test-			+= net/netfilter/nf_flow_table.h
-header-test-			+= net/netfilter/nf_nat.h
-header-test-			+= net/netfilter/nf_nat_helper.h
-header-test-			+= net/netfilter/nf_nat_masquerade.h
-header-test-			+= net/netfilter/nf_nat_redirect.h
-header-test-			+= net/netfilter/nf_queue.h
-header-test-			+= net/netfilter/nf_reject.h
-header-test-			+= net/netfilter/nf_synproxy.h
-header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables.h
-header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_core.h
-header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_ipv4.h
-header-test-			+= net/netfilter/nf_tables_ipv6.h
-header-test-$(CONFIG_NF_TABLES)	+= net/netfilter/nf_tables_offload.h
-header-test-			+= net/netfilter/nft_fib.h
-header-test-			+= net/netfilter/nft_meta.h
-header-test-			+= net/netfilter/nft_reject.h
 header-test-			+= net/netns/can.h
 header-test-			+= net/netns/generic.h
 header-test-			+= net/netns/ieee802154_6lowpan.h
@@ -1140,18 +1078,6 @@ header-test-			+= uapi/linux/kvm_para.h
 header-test-			+= uapi/linux/lightnvm.h
 header-test-			+= uapi/linux/mic_common.h
 header-test-			+= uapi/linux/mman.h
-header-test-			+= uapi/linux/netfilter/ipset/ip_set_bitmap.h
-header-test-			+= uapi/linux/netfilter/ipset/ip_set_hash.h
-header-test-			+= uapi/linux/netfilter/ipset/ip_set_list.h
-header-test-			+= uapi/linux/netfilter/nf_synproxy.h
-header-test-			+= uapi/linux/netfilter/xt_policy.h
-header-test-			+= uapi/linux/netfilter/xt_set.h
-header-test-			+= uapi/linux/netfilter_arp/arp_tables.h
-header-test-			+= uapi/linux/netfilter_arp/arpt_mangle.h
-header-test-			+= uapi/linux/netfilter_ipv4/ip_tables.h
-header-test-			+= uapi/linux/netfilter_ipv4/ipt_LOG.h
-header-test-			+= uapi/linux/netfilter_ipv6/ip6_tables.h
-header-test-			+= uapi/linux/netfilter_ipv6/ip6t_LOG.h
 header-test-			+= uapi/linux/nilfs2_ondisk.h
 header-test-			+= uapi/linux/patchkey.h
 header-test-			+= uapi/linux/ptrace.h
-- 
cgit v1.2.3


From d220540795ec2682a145c896461a4a742478ac52 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Tue, 13 Aug 2019 04:13:32 -0700
Subject: dt-bindings: ti-sysc: Add SPDX license identifier

Add the appropriate SPDX license identifier to the common
TI sysc bindings header file.

Signed-off-by: Suman Anna <s-anna@ti.com>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 include/dt-bindings/bus/ti-sysc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/dt-bindings/bus/ti-sysc.h b/include/dt-bindings/bus/ti-sysc.h
index 7138384e2ef9..babd08a1d226 100644
--- a/include/dt-bindings/bus/ti-sysc.h
+++ b/include/dt-bindings/bus/ti-sysc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /* TI sysc interconnect target module defines */
 
 /* Generic sysc found on omap2 and later, also known as type1 */
-- 
cgit v1.2.3


From 54d662227caebaabd25823c6d955ba92dfa361a8 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Tue, 13 Aug 2019 04:13:32 -0700
Subject: bus: ti-sysc: Switch to SPDX license identifier

Use the appropriate SPDX license identifier in the TI sysc
interconnect target driver source files and drop the previous
boilerplate license text. Also, add the the SPDX license
identifier in the associated ti-sysc header files.

Signed-off-by: Suman Anna <s-anna@ti.com>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 10 +---------
 include/linux/platform_data/ti-sysc.h |  2 ++
 2 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 2db474ab4c6b..45e08528fdff 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * ti-sysc.c - Texas Instruments sysc interconnect target driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/io.h>
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 0c587d4fc718..7d009dc08a54 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
 #ifndef __TI_SYSC_DATA_H__
 #define __TI_SYSC_DATA_H__
 
-- 
cgit v1.2.3


From b58056da2ec28e2c1b66096cd5109997f04d3fd1 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Tue, 13 Aug 2019 04:13:32 -0700
Subject: bus: ti-sysc: Add missing kerneldoc comments

A few fields in various structures is missing the corresponding
kerneldoc comments. Add them. Also, fixed the comment for sidlemodes.

Signed-off-by: Suman Anna <s-anna@ti.com>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 7 +++++++
 include/linux/platform_data/ti-sysc.h | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 45e08528fdff..35997a2b2dc4 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -54,15 +54,22 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
  * @module_size: size of the interconnect target module
  * @module_va: virtual address of the interconnect target module
  * @offsets: register offsets from module base
+ * @mdata: ti-sysc to hwmod translation data for a module
  * @clocks: clocks used by the interconnect target module
  * @clock_roles: clock role names for the found clocks
  * @nr_clocks: number of clocks used by the interconnect target module
+ * @rsts: resets used by the interconnect target module
  * @legacy_mode: configured for legacy mode if set
  * @cap: interconnect target module capabilities
  * @cfg: interconnect target module configuration
+ * @cookie: data used by legacy platform callbacks
  * @name: name if available
  * @revision: interconnect target module revision
+ * @enabled: sysc runtime enabled status
  * @needs_resume: runtime resume needed on resume from suspend
+ * @child_needs_resume: runtime resume needed for child on resume from suspend
+ * @disable_on_idle: status flag used for disabling modules with resets
+ * @idle_work: work structure used to perform delayed idle on a module
  * @clk_enable_quirk: module specific clock enable quirk
  * @clk_disable_quirk: module specific clock disable quirk
  * @reset_done_quirk: module specific reset done quirk
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 7d009dc08a54..1a0905435b32 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -72,7 +72,7 @@ struct sysc_regbits {
 
 /**
  * struct sysc_capabilities - capabilities for an interconnect target module
- *
+ * @type: sysc type identifier for the module
  * @sysc_mask: bitmask of supported SYSCONFIG register bits
  * @regbits: bitmask of SYSCONFIG register bits
  * @mod_quirks: bitmask of module specific quirks
@@ -87,8 +87,9 @@ struct sysc_capabilities {
 /**
  * struct sysc_config - configuration for an interconnect target module
  * @sysc_val: configured value for sysc register
+ * @syss_mask: configured mask value for SYSSTATUS register
  * @midlemodes: bitmask of supported master idle modes
- * @sidlemodes: bitmask of supported master idle modes
+ * @sidlemodes: bitmask of supported slave idle modes
  * @srst_udelay: optional delay needed after OCP soft reset
  * @quirks: bitmask of enabled quirks
  */
-- 
cgit v1.2.3


From 448d5a55759a2e60208b75ceed4bd88cd4f5a963 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Tue, 13 Aug 2019 17:06:15 +0530
Subject: PCI: Add #defines for some of PCIe spec r4.0 features

Add #defines only for the Data Link Feature and Physical Layer 16.0 GT/s
features as defined in PCIe spec r4.0, sec 7.7.4 for Data Link Feature and
sec 7.7.5 for Physical Layer 16.0 GT/s.

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f28e562d7ca8..d28d0319d932 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -713,7 +713,9 @@
 #define PCI_EXT_CAP_ID_DPC	0x1D	/* Downstream Port Containment */
 #define PCI_EXT_CAP_ID_L1SS	0x1E	/* L1 PM Substates */
 #define PCI_EXT_CAP_ID_PTM	0x1F	/* Precision Time Measurement */
-#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PTM
+#define PCI_EXT_CAP_ID_DLF	0x25	/* Data Link Feature */
+#define PCI_EXT_CAP_ID_PL_16GT	0x26	/* Physical Layer 16.0 GT/s */
+#define PCI_EXT_CAP_ID_MAX	PCI_EXT_CAP_ID_PL_16GT
 
 #define PCI_EXT_CAP_DSN_SIZEOF	12
 #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40
@@ -1053,4 +1055,14 @@
 #define  PCI_L1SS_CTL1_LTR_L12_TH_SCALE	0xe0000000  /* LTR_L1.2_THRESHOLD_Scale */
 #define PCI_L1SS_CTL2		0x0c	/* Control 2 Register */
 
+/* Data Link Feature */
+#define PCI_DLF_CAP		0x04	/* Capabilities Register */
+#define  PCI_DLF_EXCHANGE_ENABLE	0x80000000  /* Data Link Feature Exchange Enable */
+
+/* Physical Layer 16.0 GT/s */
+#define PCI_PL_16GT_LE_CTRL	0x20	/* Lane Equalization Control Register */
+#define  PCI_PL_16GT_LE_CTRL_DSP_TX_PRESET_MASK		0x0000000F
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_MASK		0x000000F0
+#define  PCI_PL_16GT_LE_CTRL_USP_TX_PRESET_SHIFT	4
+
 #endif /* LINUX_PCI_REGS_H */
-- 
cgit v1.2.3


From e9dc7c60507c822992e793bd3845f0556ae0ff98 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sat, 10 Aug 2019 21:18:09 +0200
Subject: can: gw: use struct canfd_frame as internal data structure

To prepare the CAN FD support this patch implements the first adaptions in
data structures for CAN FD without changing the current functionality.

Additionally some code at the end of this patch is moved or indented to
simplify the review of the next implementation step.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/gw.h |   5 +-
 net/can/gw.c                | 222 +++++++++++++++++++++++---------------------
 2 files changed, 120 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 7bee7a0b9800..ed811bc463b5 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -93,10 +93,11 @@ enum {
 
 /* CAN frame elements that are affected by curr. 3 CAN frame modifications */
 #define CGW_MOD_ID	0x01
-#define CGW_MOD_DLC	0x02
+#define CGW_MOD_DLC	0x02		/* contains the data length in bytes */
+#define CGW_MOD_LEN	CGW_MOD_DLC	/* CAN FD length representation */
 #define CGW_MOD_DATA	0x04
 
-#define CGW_FRAME_MODS 3 /* ID DLC DATA */
+#define CGW_FRAME_MODS 3 /* ID DLC/LEN DATA */
 
 #define MAX_MODFUNCTIONS (CGW_MOD_FUNCS * CGW_FRAME_MODS)
 
diff --git a/net/can/gw.c b/net/can/gw.c
index 4c6ad0054a4c..3a1ad206fbef 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -85,10 +85,10 @@ static struct kmem_cache *cgw_cache __read_mostly;
 /* structure that contains the (on-the-fly) CAN frame modifications */
 struct cf_mod {
 	struct {
-		struct can_frame and;
-		struct can_frame or;
-		struct can_frame xor;
-		struct can_frame set;
+		struct canfd_frame and;
+		struct canfd_frame or;
+		struct canfd_frame xor;
+		struct canfd_frame set;
 	} modframe;
 	struct {
 		u8 and;
@@ -96,7 +96,7 @@ struct cf_mod {
 		u8 xor;
 		u8 set;
 	} modtype;
-	void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf,
+	void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf,
 					  struct cf_mod *mod);
 
 	/* CAN frame checksum calculation after CAN frame modifications */
@@ -105,8 +105,10 @@ struct cf_mod {
 		struct cgw_csum_crc8 crc8;
 	} csum;
 	struct {
-		void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
-		void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
+		void (*xor)(struct canfd_frame *cf,
+			    struct cgw_csum_xor *xor);
+		void (*crc8)(struct canfd_frame *cf,
+			     struct cgw_csum_crc8 *crc8);
 	} csumfunc;
 	u32 uid;
 };
@@ -149,23 +151,23 @@ struct cgw_job {
 
 /* modification functions that are invoked in the hot path in can_can_gw_rcv */
 
-#define MODFUNC(func, op) static void func(struct can_frame *cf, \
+#define MODFUNC(func, op) static void func(struct canfd_frame *cf, \
 					   struct cf_mod *mod) { op ; }
 
 MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
-MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc)
+MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len)
 MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
 MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
-MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc)
+MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len)
 MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
 MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
-MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc)
+MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len)
 MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
 MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
-MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc)
+MODFUNC(mod_set_len, cf->len = mod->modframe.set.len)
 MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
 
-static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
+static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
 {
 	/* Copy the struct members separately to ensure that no uninitialized
 	 * data are copied in the 3 bytes hole of the struct. This is needed
@@ -173,12 +175,14 @@ static inline void canframecpy(struct can_frame *dst, struct can_frame *src)
 	 */
 
 	dst->can_id = src->can_id;
-	dst->can_dlc = src->can_dlc;
+	dst->len = src->can_dlc;
 	*(u64 *)dst->data = *(u64 *)src->data;
 }
 
 static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
 {
+	s8 dlen = CAN_MAX_DLEN;
+
 	/* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
 	 * relative to received dlc -1 .. -8 :
 	 * e.g. for received dlc = 8
@@ -187,27 +191,27 @@ static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
 	 * -8 => index = 0 (data[0])
 	 */
 
-	if (fr > -9 && fr < 8 &&
-	    to > -9 && to < 8 &&
-	    re > -9 && re < 8)
+	if (fr >= -dlen && fr < dlen &&
+	    to >= -dlen && to < dlen &&
+	    re >= -dlen && re < dlen)
 		return 0;
 	else
 		return -EINVAL;
 }
 
-static inline int calc_idx(int idx, int rx_dlc)
+static inline int calc_idx(int idx, int rx_len)
 {
 	if (idx < 0)
-		return rx_dlc + idx;
+		return rx_len + idx;
 	else
 		return idx;
 }
 
-static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
-	int from = calc_idx(xor->from_idx, cf->can_dlc);
-	int to = calc_idx(xor->to_idx, cf->can_dlc);
-	int res = calc_idx(xor->result_idx, cf->can_dlc);
+	int from = calc_idx(xor->from_idx, cf->len);
+	int to = calc_idx(xor->to_idx, cf->len);
+	int res = calc_idx(xor->result_idx, cf->len);
 	u8 val = xor->init_xor_val;
 	int i;
 
@@ -225,7 +229,7 @@ static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor)
 	cf->data[res] = val;
 }
 
-static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
 	u8 val = xor->init_xor_val;
 	int i;
@@ -236,7 +240,7 @@ static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor)
 	cf->data[xor->result_idx] = val;
 }
 
-static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
+static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor)
 {
 	u8 val = xor->init_xor_val;
 	int i;
@@ -247,11 +251,12 @@ static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor)
 	cf->data[xor->result_idx] = val;
 }
 
-static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_rel(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
-	int from = calc_idx(crc8->from_idx, cf->can_dlc);
-	int to = calc_idx(crc8->to_idx, cf->can_dlc);
-	int res = calc_idx(crc8->result_idx, cf->can_dlc);
+	int from = calc_idx(crc8->from_idx, cf->len);
+	int to = calc_idx(crc8->to_idx, cf->len);
+	int res = calc_idx(crc8->result_idx, cf->len);
 	u8 crc = crc8->init_crc_val;
 	int i;
 
@@ -284,7 +289,8 @@ static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
 	cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
 }
 
-static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_pos(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
 	u8 crc = crc8->init_crc_val;
 	int i;
@@ -310,7 +316,8 @@ static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
 	cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val;
 }
 
-static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
+static void cgw_csum_crc8_neg(struct canfd_frame *cf,
+			      struct cgw_csum_crc8 *crc8)
 {
 	u8 crc = crc8->init_crc_val;
 	int i;
@@ -340,7 +347,7 @@ static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8)
 static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 {
 	struct cgw_job *gwj = (struct cgw_job *)data;
-	struct can_frame *cf;
+	struct canfd_frame *cf;
 	struct sk_buff *nskb;
 	int modidx = 0;
 
@@ -400,7 +407,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	nskb->dev = gwj->dst.dev;
 
 	/* pointer to modifiable CAN frame */
-	cf = (struct can_frame *)nskb->data;
+	cf = (struct canfd_frame *)nskb->data;
 
 	/* perform preprocessed modification functions if there are any */
 	while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx])
@@ -409,22 +416,22 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	/* Has the CAN frame been modified? */
 	if (modidx) {
 		/* get available space for the processed CAN frame type */
-		int max_len = nskb->len - offsetof(struct can_frame, data);
+		int max_len = nskb->len - offsetof(struct canfd_frame, data);
 
 		/* dlc may have changed, make sure it fits to the CAN frame */
-		if (cf->can_dlc > max_len)
+		if (cf->len > max_len)
 			goto out_delete;
 
 		/* check for checksum updates in classic CAN length only */
 		if (gwj->mod.csumfunc.crc8) {
-			if (cf->can_dlc > 8)
+			if (cf->len > 8)
 				goto out_delete;
 
 			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
 		}
 
 		if (gwj->mod.csumfunc.xor) {
-			if (cf->can_dlc > 8)
+			if (cf->len > 8)
 				goto out_delete;
 
 			(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
@@ -492,7 +499,6 @@ static int cgw_notifier(struct notifier_block *nb,
 static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 		       u32 pid, u32 seq, int flags)
 {
-	struct cgw_frame_mod mb;
 	struct rtcanmsg *rtcan;
 	struct nlmsghdr *nlh;
 
@@ -529,32 +535,36 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
-	if (gwj->mod.modtype.and) {
-		memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.and;
-		if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+	if (1) {
+		struct cgw_frame_mod mb;
 
-	if (gwj->mod.modtype.or) {
-		memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.or;
-		if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+		if (gwj->mod.modtype.and) {
+			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.and;
+			if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 
-	if (gwj->mod.modtype.xor) {
-		memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.xor;
-		if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
-			goto cancel;
-	}
+		if (gwj->mod.modtype.or) {
+			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.or;
+			if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 
-	if (gwj->mod.modtype.set) {
-		memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
-		mb.modtype = gwj->mod.modtype.set;
-		if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
-			goto cancel;
+		if (gwj->mod.modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.xor;
+			if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.set) {
+			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.set;
+			if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
 	}
 
 	if (gwj->mod.uid) {
@@ -642,7 +652,6 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			  u8 gwtype, void *gwtypeattr, u8 *limhops)
 {
 	struct nlattr *tb[CGW_MAX + 1];
-	struct cgw_frame_mod mb;
 	int modidx = 0;
 	int err = 0;
 
@@ -662,69 +671,72 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 	}
 
 	/* check for AND/OR/XOR/SET modifications */
+	if (1) {
+		struct cgw_frame_mod mb;
 
-	if (tb[CGW_MOD_AND]) {
-		nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
+		if (tb[CGW_MOD_AND]) {
+			nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN);
 
-		canframecpy(&mod->modframe.and, &mb.cf);
-		mod->modtype.and = mb.modtype;
+			canframecpy(&mod->modframe.and, &mb.cf);
+			mod->modtype.and = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_and_id;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_and_id;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_and_dlc;
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_and_len;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_and_data;
-	}
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_and_data;
+		}
 
-	if (tb[CGW_MOD_OR]) {
-		nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
+		if (tb[CGW_MOD_OR]) {
+			nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN);
 
-		canframecpy(&mod->modframe.or, &mb.cf);
-		mod->modtype.or = mb.modtype;
+			canframecpy(&mod->modframe.or, &mb.cf);
+			mod->modtype.or = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_or_id;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_or_id;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_or_dlc;
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_or_len;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_or_data;
-	}
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_or_data;
+		}
 
-	if (tb[CGW_MOD_XOR]) {
-		nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
+		if (tb[CGW_MOD_XOR]) {
+			nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN);
 
-		canframecpy(&mod->modframe.xor, &mb.cf);
-		mod->modtype.xor = mb.modtype;
+			canframecpy(&mod->modframe.xor, &mb.cf);
+			mod->modtype.xor = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_xor_id;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_xor_id;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_xor_dlc;
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_xor_len;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_xor_data;
-	}
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_xor_data;
+		}
 
-	if (tb[CGW_MOD_SET]) {
-		nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
+		if (tb[CGW_MOD_SET]) {
+			nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN);
 
-		canframecpy(&mod->modframe.set, &mb.cf);
-		mod->modtype.set = mb.modtype;
+			canframecpy(&mod->modframe.set, &mb.cf);
+			mod->modtype.set = mb.modtype;
 
-		if (mb.modtype & CGW_MOD_ID)
-			mod->modfunc[modidx++] = mod_set_id;
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_set_id;
 
-		if (mb.modtype & CGW_MOD_DLC)
-			mod->modfunc[modidx++] = mod_set_dlc;
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_set_len;
 
-		if (mb.modtype & CGW_MOD_DATA)
-			mod->modfunc[modidx++] = mod_set_data;
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_set_data;
+		}
 	}
 
 	/* check for checksum operations after CAN frame modifications */
-- 
cgit v1.2.3


From 456a8a646b2563438c16a9b27decf9aa717f1ebb Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Sat, 10 Aug 2019 21:18:10 +0200
Subject: can: gw: add support for CAN FD frames

Introduce CAN FD support which needs an extension of the netlink API to
pass CAN FD type content to the kernel which has a different size to
Classic CAN. Additionally the struct canfd_frame has a new 'flags' element
that can now be modified with can-gw.

The new CGW_FLAGS_CAN_FD option flag defines whether the routing job
handles Classic CAN or CAN FD frames. This setting is very strict at
reception time and enables the new possibilities, e.g. CGW_FDMOD_* and
modifying the flags element of struct canfd_frame, only when
CGW_FLAGS_CAN_FD is set.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/gw.h |  14 ++-
 net/can/gw.c                | 214 ++++++++++++++++++++++++++++++++++++++------
 2 files changed, 200 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index ed811bc463b5..3aea5388c8e4 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -80,6 +80,10 @@ enum {
 	CGW_DELETED,	/* number of deleted CAN frames (see max_hops param) */
 	CGW_LIM_HOPS,	/* limit the number of hops of this specific rule */
 	CGW_MOD_UID,	/* user defined identifier for modification updates */
+	CGW_FDMOD_AND,	/* CAN FD frame modification binary AND */
+	CGW_FDMOD_OR,	/* CAN FD frame modification binary OR */
+	CGW_FDMOD_XOR,	/* CAN FD frame modification binary XOR */
+	CGW_FDMOD_SET,	/* CAN FD frame modification set alternate values */
 	__CGW_MAX
 };
 
@@ -88,6 +92,7 @@ enum {
 #define CGW_FLAGS_CAN_ECHO 0x01
 #define CGW_FLAGS_CAN_SRC_TSTAMP 0x02
 #define CGW_FLAGS_CAN_IIF_TX_OK 0x04
+#define CGW_FLAGS_CAN_FD 0x08
 
 #define CGW_MOD_FUNCS 4 /* AND OR XOR SET */
 
@@ -96,8 +101,9 @@ enum {
 #define CGW_MOD_DLC	0x02		/* contains the data length in bytes */
 #define CGW_MOD_LEN	CGW_MOD_DLC	/* CAN FD length representation */
 #define CGW_MOD_DATA	0x04
+#define CGW_MOD_FLAGS	0x08		/* CAN FD flags */
 
-#define CGW_FRAME_MODS 3 /* ID DLC/LEN DATA */
+#define CGW_FRAME_MODS 4 /* ID DLC/LEN DATA FLAGS */
 
 #define MAX_MODFUNCTIONS (CGW_MOD_FUNCS * CGW_FRAME_MODS)
 
@@ -106,7 +112,13 @@ struct cgw_frame_mod {
 	__u8 modtype;
 } __attribute__((packed));
 
+struct cgw_fdframe_mod {
+	struct canfd_frame cf;
+	__u8 modtype;
+} __attribute__((packed));
+
 #define CGW_MODATTR_LEN sizeof(struct cgw_frame_mod)
+#define CGW_FDMODATTR_LEN sizeof(struct cgw_fdframe_mod)
 
 struct cgw_csum_xor {
 	__s8 from_idx;
diff --git a/net/can/gw.c b/net/can/gw.c
index 3a1ad206fbef..65d60c93af29 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /* gw.c - CAN frame Gateway/Router/Bridge with netlink interface
  *
- * Copyright (c) 2017 Volkswagen Group Electronic Research
+ * Copyright (c) 2019 Volkswagen Group Electronic Research
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
-#define CAN_GW_VERSION "20170425"
+#define CAN_GW_VERSION "20190810"
 #define CAN_GW_NAME "can-gw"
 
 MODULE_DESCRIPTION("PF_CAN netlink gateway");
@@ -156,17 +156,50 @@ struct cgw_job {
 
 MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id)
 MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len)
+MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags)
 MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data)
 MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id)
 MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len)
+MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags)
 MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data)
 MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id)
 MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len)
+MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags)
 MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data)
 MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id)
 MODFUNC(mod_set_len, cf->len = mod->modframe.set.len)
+MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags)
 MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data)
 
+static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i);
+}
+
+static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i);
+}
+
+static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	int i;
+
+	for (i = 0; i < CANFD_MAX_DLEN; i += 8)
+		*(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i);
+}
+
+static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod)
+{
+	memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN);
+}
+
 static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
 {
 	/* Copy the struct members separately to ensure that no uninitialized
@@ -179,10 +212,26 @@ static void canframecpy(struct canfd_frame *dst, struct can_frame *src)
 	*(u64 *)dst->data = *(u64 *)src->data;
 }
 
-static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re)
+static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src)
+{
+	/* Copy the struct members separately to ensure that no uninitialized
+	 * data are copied in the 2 bytes hole of the struct. This is needed
+	 * to make easy compares of the data in the struct cf_mod.
+	 */
+
+	dst->can_id = src->can_id;
+	dst->flags = src->flags;
+	dst->len = src->len;
+	memcpy(dst->data, src->data, CANFD_MAX_DLEN);
+}
+
+static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r)
 {
 	s8 dlen = CAN_MAX_DLEN;
 
+	if (r->flags & CGW_FLAGS_CAN_FD)
+		dlen = CANFD_MAX_DLEN;
+
 	/* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0]
 	 * relative to received dlc -1 .. -8 :
 	 * e.g. for received dlc = 8
@@ -351,6 +400,15 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 	struct sk_buff *nskb;
 	int modidx = 0;
 
+	/* process strictly Classic CAN or CAN FD frames */
+	if (gwj->flags & CGW_FLAGS_CAN_FD) {
+		if (skb->len != CANFD_MTU)
+			return;
+	} else {
+		if (skb->len != CAN_MTU)
+			return;
+	}
+
 	/* Do not handle CAN frames routed more than 'max_hops' times.
 	 * In general we should never catch this delimiter which is intended
 	 * to cover a misconfiguration protection (e.g. circular CAN routes).
@@ -419,23 +477,19 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 		int max_len = nskb->len - offsetof(struct canfd_frame, data);
 
 		/* dlc may have changed, make sure it fits to the CAN frame */
-		if (cf->len > max_len)
-			goto out_delete;
-
-		/* check for checksum updates in classic CAN length only */
-		if (gwj->mod.csumfunc.crc8) {
-			if (cf->len > 8)
-				goto out_delete;
-
-			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
+		if (cf->len > max_len) {
+			/* delete frame due to misconfiguration */
+			gwj->deleted_frames++;
+			kfree_skb(nskb);
+			return;
 		}
 
-		if (gwj->mod.csumfunc.xor) {
-			if (cf->len > 8)
-				goto out_delete;
+		/* check for checksum updates */
+		if (gwj->mod.csumfunc.crc8)
+			(*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8);
 
+		if (gwj->mod.csumfunc.xor)
 			(*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor);
-		}
 	}
 
 	/* clear the skb timestamp if not configured the other way */
@@ -447,13 +501,6 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data)
 		gwj->dropped_frames++;
 	else
 		gwj->handled_frames++;
-
-	return;
-
- out_delete:
-	/* delete frame due to misconfiguration */
-	gwj->deleted_frames++;
-	kfree_skb(nskb);
 }
 
 static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj)
@@ -535,7 +582,37 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
-	if (1) {
+	if (gwj->flags & CGW_FLAGS_CAN_FD) {
+		struct cgw_fdframe_mod mb;
+
+		if (gwj->mod.modtype.and) {
+			memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.and;
+			if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.or) {
+			memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.or;
+			if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.xor) {
+			memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.xor;
+			if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+
+		if (gwj->mod.modtype.set) {
+			memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf));
+			mb.modtype = gwj->mod.modtype.set;
+			if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0)
+				goto cancel;
+		}
+	} else {
 		struct cgw_frame_mod mb;
 
 		if (gwj->mod.modtype.and) {
@@ -645,6 +722,10 @@ static const struct nla_policy cgw_policy[CGW_MAX + 1] = {
 	[CGW_FILTER]	= { .len = sizeof(struct can_filter) },
 	[CGW_LIM_HOPS]	= { .type = NLA_U8 },
 	[CGW_MOD_UID]	= { .type = NLA_U32 },
+	[CGW_FDMOD_AND]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_OR]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_XOR]	= { .len = sizeof(struct cgw_fdframe_mod) },
+	[CGW_FDMOD_SET]	= { .len = sizeof(struct cgw_fdframe_mod) },
 };
 
 /* check for common and gwtype specific attributes */
@@ -652,6 +733,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			  u8 gwtype, void *gwtypeattr, u8 *limhops)
 {
 	struct nlattr *tb[CGW_MAX + 1];
+	struct rtcanmsg *r = nlmsg_data(nlh);
 	int modidx = 0;
 	int err = 0;
 
@@ -671,7 +753,85 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 	}
 
 	/* check for AND/OR/XOR/SET modifications */
-	if (1) {
+	if (r->flags & CGW_FLAGS_CAN_FD) {
+		struct cgw_fdframe_mod mb;
+
+		if (tb[CGW_FDMOD_AND]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN);
+
+			canfdframecpy(&mod->modframe.and, &mb.cf);
+			mod->modtype.and = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_and_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_and_len;
+
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_and_flags;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_and_fddata;
+		}
+
+		if (tb[CGW_FDMOD_OR]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN);
+
+			canfdframecpy(&mod->modframe.or, &mb.cf);
+			mod->modtype.or = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_or_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_or_len;
+
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_or_flags;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_or_fddata;
+		}
+
+		if (tb[CGW_FDMOD_XOR]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN);
+
+			canfdframecpy(&mod->modframe.xor, &mb.cf);
+			mod->modtype.xor = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_xor_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_xor_len;
+
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_xor_flags;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_xor_fddata;
+		}
+
+		if (tb[CGW_FDMOD_SET]) {
+			nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN);
+
+			canfdframecpy(&mod->modframe.set, &mb.cf);
+			mod->modtype.set = mb.modtype;
+
+			if (mb.modtype & CGW_MOD_ID)
+				mod->modfunc[modidx++] = mod_set_id;
+
+			if (mb.modtype & CGW_MOD_LEN)
+				mod->modfunc[modidx++] = mod_set_len;
+
+			if (mb.modtype & CGW_MOD_FLAGS)
+				mod->modfunc[modidx++] = mod_set_flags;
+
+			if (mb.modtype & CGW_MOD_DATA)
+				mod->modfunc[modidx++] = mod_set_fddata;
+		}
+	} else {
 		struct cgw_frame_mod mb;
 
 		if (tb[CGW_MOD_AND]) {
@@ -745,7 +905,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]);
 
 			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
-						 c->result_idx);
+						 c->result_idx, r);
 			if (err)
 				return err;
 
@@ -768,7 +928,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]);
 
 			err = cgw_chk_csum_parms(c->from_idx, c->to_idx,
-						 c->result_idx);
+						 c->result_idx, r);
 			if (err)
 				return err;
 
-- 
cgit v1.2.3


From 3ca3c4aad2efa2931b663acc4ece7a38b31071d1 Mon Sep 17 00:00:00 2001
From: Andre Hartmann <aha_1980@gmx.de>
Date: Sat, 23 Mar 2019 16:04:19 +0100
Subject: can: netlink: fix documentation typos

This patch fixes some documentation typos in struct can_bittiming_const.

Signed-off-by: Andre Hartmann <aha_1980@gmx.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/netlink.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 9f56fad4785b..1bc70d3a4d39 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -40,15 +40,15 @@ struct can_bittiming {
 };
 
 /*
- * CAN harware-dependent bit-timing constant
+ * CAN hardware-dependent bit-timing constant
  *
  * Used for calculating and checking bit-timing parameters
  */
 struct can_bittiming_const {
 	char name[16];		/* Name of the CAN controller hardware */
-	__u32 tseg1_min;	/* Time segement 1 = prop_seg + phase_seg1 */
+	__u32 tseg1_min;	/* Time segment 1 = prop_seg + phase_seg1 */
 	__u32 tseg1_max;
-	__u32 tseg2_min;	/* Time segement 2 = phase_seg2 */
+	__u32 tseg2_min;	/* Time segment 2 = phase_seg2 */
 	__u32 tseg2_max;
 	__u32 sjw_max;		/* Synchronisation jump width */
 	__u32 brp_min;		/* Bit-rate prescaler */
-- 
cgit v1.2.3


From 4b950bb9ac0c7246dcf75060040577c3de60c166 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 28 Jul 2019 20:27:41 +0200
Subject: Kbuild: Handle PREEMPT_RT for version string and magic

Update the build scripts and the version magic to reflect when
CONFIG_PREEMPT_RT is enabled in the same way as CONFIG_PREEMPT is treated.

The resulting version strings:

  Linux m 5.3.0-rc1+ #100 SMP Fri Jul 26 ...
  Linux m 5.3.0-rc1+ #101 SMP PREEMPT Fri Jul 26 ...
  Linux m 5.3.0-rc1+ #102 SMP PREEMPT_RT Fri Jul 26 ...

The module vermagic:

  5.3.0-rc1+ SMP mod_unload modversions
  5.3.0-rc1+ SMP preempt mod_unload modversions
  5.3.0-rc1+ SMP preempt_rt mod_unload modversions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 include/linux/vermagic.h | 2 ++
 init/Makefile            | 5 +++--
 scripts/Makefile.modpost | 2 +-
 scripts/mkcompile_h      | 4 +++-
 4 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index bae807eb2933..9aced11e9000 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -9,6 +9,8 @@
 #endif
 #ifdef CONFIG_PREEMPT
 #define MODULE_VERMAGIC_PREEMPT "preempt "
+#elif defined(CONFIG_PREEMPT_RT)
+#define MODULE_VERMAGIC_PREEMPT "preempt_rt "
 #else
 #define MODULE_VERMAGIC_PREEMPT ""
 #endif
diff --git a/init/Makefile b/init/Makefile
index a3e5ce2bcf08..6246a06364d0 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -33,5 +33,6 @@ $(obj)/version.o: include/generated/compile.h
 silent_chk_compile.h = :
 include/generated/compile.h: FORCE
 	@$($(quiet)chk_compile.h)
-	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
-	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
+	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@	\
+	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)"	\
+	"$(CONFIG_PREEMPT_RT)" "$(CC) $(KBUILD_CFLAGS)"
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 26e6574ecd08..e003350bc473 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -23,7 +23,7 @@
 #   Version magic (see include/linux/vermagic.h for full details)
 #     - Kernel release
 #     - SMP is CONFIG_SMP
-#     - PREEMPT is CONFIG_PREEMPT
+#     - PREEMPT is CONFIG_PREEMPT[_RT]
 #     - GCC Version
 #   Module info
 #     - Module version (MODULE_VERSION)
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index 2339f86126cb..d1d757c6edf4 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -5,7 +5,8 @@ TARGET=$1
 ARCH=$2
 SMP=$3
 PREEMPT=$4
-CC=$5
+PREEMPT_RT=$5
+CC=$6
 
 vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
 
@@ -53,6 +54,7 @@ UTS_VERSION="#$VERSION"
 CONFIG_FLAGS=""
 if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
 if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
+if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
 UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
 
 # Truncate to maximum length
-- 
cgit v1.2.3


From 2c8ccb37b08fe364f02a9914daca474d43151453 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>
Date: Fri, 9 Aug 2019 17:18:16 +0200
Subject: RDMA/siw: Change CQ flags from 64->32 bits

This patch changes the driver/user shared (mmapped) CQ notification
flags field from unsigned 64-bits size to unsigned 32-bits size. This
enables building siw on 32-bit architectures.

This patch changes the siw-abi, but as siw was only just merged in
this merge window cycle, there are no released kernels with the prior
abi.  We are making no attempt to be binary compatible with siw user
space libraries prior to the merge of siw into the upstream kernel,
only moving forward with upstream kernels and upstream rdma-core
provided siw libraries are we guaranteeing compatibility.

Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20190809151816.13018-1-bmt@zurich.ibm.com
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/siw/Kconfig     |  2 +-
 drivers/infiniband/sw/siw/siw.h       |  2 +-
 drivers/infiniband/sw/siw/siw_qp.c    | 14 ++++++++++----
 drivers/infiniband/sw/siw/siw_verbs.c | 16 +++++++++++-----
 include/uapi/rdma/siw-abi.h           |  3 ++-
 5 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index dace276aea14..b622fc62f2cd 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -1,6 +1,6 @@
 config RDMA_SIW
 	tristate "Software RDMA over TCP/IP (iWARP) driver"
-	depends on INET && INFINIBAND && LIBCRC32C && 64BIT
+	depends on INET && INFINIBAND && LIBCRC32C
 	select DMA_VIRT_OPS
 	help
 	This driver implements the iWARP RDMA transport over
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 03fd7b2f595f..77b1aabf6ff3 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -214,7 +214,7 @@ struct siw_wqe {
 struct siw_cq {
 	struct ib_cq base_cq;
 	spinlock_t lock;
-	u64 *notify;
+	struct siw_cq_ctrl *notify;
 	struct siw_cqe *queue;
 	u32 cq_put;
 	u32 cq_get;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index e27bd5b35b96..0990307c5d2c 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1013,18 +1013,24 @@ out:
  */
 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
 {
-	u64 cq_notify;
+	u32 cq_notify;
 
 	if (!cq->base_cq.comp_handler)
 		return false;
 
-	cq_notify = READ_ONCE(*cq->notify);
+	/* Read application shared notification state */
+	cq_notify = READ_ONCE(cq->notify->flags);
 
 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
 	     (flags & SIW_WQE_SOLICITED))) {
-		/* dis-arm CQ */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+		/*
+		 * CQ notification is one-shot: Since the
+		 * current CQE causes user notification,
+		 * the CQ gets dis-aremd and must be re-aremd
+		 * by the user for a new notification.
+		 */
+		WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
 
 		return true;
 	}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 32dc79d0e898..e7f3a2379d9d 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 
 	spin_lock_init(&cq->lock);
 
-	cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
 
 	if (udata) {
 		struct siw_uresp_create_cq uresp = {};
@@ -1141,11 +1141,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
 	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
 
 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		/* CQ event for next solicited completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+		/*
+		 * Enable CQ event for next solicited completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
 	else
-		/* CQ event for any signalled completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+		/*
+		 * Enable CQ event for any signalled completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
 
 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 		return cq->cq_put - cq->cq_get;
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
index 7de68f1dc707..af735f55b291 100644
--- a/include/uapi/rdma/siw-abi.h
+++ b/include/uapi/rdma/siw-abi.h
@@ -180,6 +180,7 @@ struct siw_cqe {
  * to control CQ arming.
  */
 struct siw_cq_ctrl {
-	__aligned_u64 notify;
+	__u32 flags;
+	__u32 pad;
 };
 #endif
-- 
cgit v1.2.3


From 972d7560ee37d48a9121ca1b7796d107a16784e7 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Aug 2019 11:43:57 +0300
Subject: IB/mlx5: Add legacy events to DEVX list

Add two events that were defined in the device specification but were
not exposed in the driver list.

Post this patch those events can be read over the DEVX events interface
once be reported by the firmware.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Edward Srouji <edwards@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190808084358.29517-4-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 8 ++++++++
 include/linux/mlx5/device.h       | 9 +++++++++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index fd577ffd7864..3dbdfe0eb5e4 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -233,6 +233,8 @@ static bool is_legacy_obj_event_num(u16 event_num)
 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
 	case MLX5_EVENT_TYPE_DCT_DRAINED:
 	case MLX5_EVENT_TYPE_COMP:
+	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
+	case MLX5_EVENT_TYPE_XRQ_ERROR:
 		return true;
 	default:
 		return false;
@@ -315,8 +317,10 @@ static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe)
 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
 		return eqe->data.qp_srq.type;
 	case MLX5_EVENT_TYPE_CQ_ERROR:
+	case MLX5_EVENT_TYPE_XRQ_ERROR:
 		return 0;
 	case MLX5_EVENT_TYPE_DCT_DRAINED:
+	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
 		return MLX5_EVENT_QUEUE_TYPE_DCT;
 	default:
 		return MLX5_GET(affiliated_event_header, &eqe->data, obj_type);
@@ -2300,7 +2304,11 @@ static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data)
 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
 		obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 		break;
+	case MLX5_EVENT_TYPE_XRQ_ERROR:
+		obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff;
+		break;
 	case MLX5_EVENT_TYPE_DCT_DRAINED:
+	case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION:
 		obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
 		break;
 	case MLX5_EVENT_TYPE_CQ_ERROR:
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ce9839c8bc1a..e427af260ebe 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -328,6 +328,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_GPIO_EVENT	   = 0x15,
 	MLX5_EVENT_TYPE_PORT_MODULE_EVENT  = 0x16,
 	MLX5_EVENT_TYPE_TEMP_WARN_EVENT    = 0x17,
+	MLX5_EVENT_TYPE_XRQ_ERROR	   = 0x18,
 	MLX5_EVENT_TYPE_REMOTE_CONFIG	   = 0x19,
 	MLX5_EVENT_TYPE_GENERAL_EVENT	   = 0x22,
 	MLX5_EVENT_TYPE_MONITOR_COUNTER    = 0x24,
@@ -345,6 +346,7 @@ enum mlx5_event {
 	MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED = 0xe,
 
 	MLX5_EVENT_TYPE_DCT_DRAINED        = 0x1c,
+	MLX5_EVENT_TYPE_DCT_KEY_VIOLATION  = 0x1d,
 
 	MLX5_EVENT_TYPE_FPGA_ERROR         = 0x20,
 	MLX5_EVENT_TYPE_FPGA_QP_ERROR      = 0x21,
@@ -584,6 +586,12 @@ struct mlx5_eqe_cq_err {
 	u8	syndrome;
 };
 
+struct mlx5_eqe_xrq_err {
+	__be32	reserved1[5];
+	__be32	type_xrqn;
+	__be32	reserved2;
+};
+
 struct mlx5_eqe_port_state {
 	u8	reserved0[8];
 	u8	port;
@@ -698,6 +706,7 @@ union ev_data {
 	struct mlx5_eqe_pps		pps;
 	struct mlx5_eqe_dct             dct;
 	struct mlx5_eqe_temp_warning	temp_warning;
+	struct mlx5_eqe_xrq_err		xrq_err;
 } __packed;
 
 struct mlx5_eqe {
-- 
cgit v1.2.3


From 127ab2cc5f19692efe422935267b9db0845b2b04 Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Fri, 9 Aug 2019 15:13:23 +0300
Subject: interconnect: Add support for path tags

Consumers may have use cases with different bandwidth requirements based
on the system or driver state. The consumer driver can append a specific
tag to the path and pass this information to the interconnect platform
driver to do the aggregation based on this state.

Introduce icc_set_tag() function that will allow the consumers to append
an optional tag to each path. The aggregation of these tagged paths is
platform specific.

Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/core.c           | 24 +++++++++++++++++++++++-
 drivers/interconnect/qcom/sdm845.c    |  2 +-
 include/linux/interconnect-provider.h |  4 ++--
 include/linux/interconnect.h          |  5 +++++
 4 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 871eb4bc4efc..251354bb7fdc 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -29,6 +29,7 @@ static struct dentry *icc_debugfs_dir;
  * @req_node: entry in list of requests for the particular @node
  * @node: the interconnect node to which this constraint applies
  * @dev: reference to the device that sets the constraints
+ * @tag: path tag (optional)
  * @avg_bw: an integer describing the average bandwidth in kBps
  * @peak_bw: an integer describing the peak bandwidth in kBps
  */
@@ -36,6 +37,7 @@ struct icc_req {
 	struct hlist_node req_node;
 	struct icc_node *node;
 	struct device *dev;
+	u32 tag;
 	u32 avg_bw;
 	u32 peak_bw;
 };
@@ -204,7 +206,7 @@ static int aggregate_requests(struct icc_node *node)
 	node->peak_bw = 0;
 
 	hlist_for_each_entry(r, &node->req_list, req_node)
-		p->aggregate(node, r->avg_bw, r->peak_bw,
+		p->aggregate(node, r->tag, r->avg_bw, r->peak_bw,
 			     &node->avg_bw, &node->peak_bw);
 
 	return 0;
@@ -385,6 +387,26 @@ struct icc_path *of_icc_get(struct device *dev, const char *name)
 }
 EXPORT_SYMBOL_GPL(of_icc_get);
 
+/**
+ * icc_set_tag() - set an optional tag on a path
+ * @path: the path we want to tag
+ * @tag: the tag value
+ *
+ * This function allows consumers to append a tag to the requests associated
+ * with a path, so that a different aggregation could be done based on this tag.
+ */
+void icc_set_tag(struct icc_path *path, u32 tag)
+{
+	int i;
+
+	if (!path)
+		return;
+
+	for (i = 0; i < path->num_nodes; i++)
+		path->reqs[i].tag = tag;
+}
+EXPORT_SYMBOL_GPL(icc_set_tag);
+
 /**
  * icc_set_bw() - set bandwidth constraints on an interconnect path
  * @path: reference to the path returned by icc_get()
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
index 4915b78da673..fb526004c82e 100644
--- a/drivers/interconnect/qcom/sdm845.c
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -626,7 +626,7 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm)
 	bcm->dirty = false;
 }
 
-static int qcom_icc_aggregate(struct icc_node *node, u32 avg_bw,
+static int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
 			      u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
 {
 	size_t i;
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index 63caccadc2db..4ee19fd41568 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -45,8 +45,8 @@ struct icc_provider {
 	struct list_head	provider_list;
 	struct list_head	nodes;
 	int (*set)(struct icc_node *src, struct icc_node *dst);
-	int (*aggregate)(struct icc_node *node, u32 avg_bw, u32 peak_bw,
-			 u32 *agg_avg, u32 *agg_peak);
+	int (*aggregate)(struct icc_node *node, u32 tag, u32 avg_bw,
+			 u32 peak_bw, u32 *agg_avg, u32 *agg_peak);
 	struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data);
 	struct device		*dev;
 	int			users;
diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h
index dc25864755ba..d70a914cba11 100644
--- a/include/linux/interconnect.h
+++ b/include/linux/interconnect.h
@@ -30,6 +30,7 @@ struct icc_path *icc_get(struct device *dev, const int src_id,
 struct icc_path *of_icc_get(struct device *dev, const char *name);
 void icc_put(struct icc_path *path);
 int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw);
+void icc_set_tag(struct icc_path *path, u32 tag);
 
 #else
 
@@ -54,6 +55,10 @@ static inline int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw)
 	return 0;
 }
 
+static inline void icc_set_tag(struct icc_path *path, u32 tag)
+{
+}
+
 #endif /* CONFIG_INTERCONNECT */
 
 #endif /* __LINUX_INTERCONNECT_H */
-- 
cgit v1.2.3


From cbd5a9c28bb5d2560be34fc6a2b6e60197628433 Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Fri, 9 Aug 2019 15:13:24 +0300
Subject: interconnect: Add pre_aggregate() callback

Introduce an optional callback in interconnect provider drivers. It can be
used for implementing actions, that need to be executed before the actual
aggregation of the bandwidth requests has started.

The benefit of this for now is that it will significantly simplify the code
in provider drivers.

Suggested-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/interconnect/core.c           | 3 +++
 include/linux/interconnect-provider.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 251354bb7fdc..7b971228df38 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -205,6 +205,9 @@ static int aggregate_requests(struct icc_node *node)
 	node->avg_bw = 0;
 	node->peak_bw = 0;
 
+	if (p->pre_aggregate)
+		p->pre_aggregate(node);
+
 	hlist_for_each_entry(r, &node->req_list, req_node)
 		p->aggregate(node, r->tag, r->avg_bw, r->peak_bw,
 			     &node->avg_bw, &node->peak_bw);
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index 4ee19fd41568..b16f9effa555 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -36,6 +36,8 @@ struct icc_node *of_icc_xlate_onecell(struct of_phandle_args *spec,
  * @nodes: internal list of the interconnect provider nodes
  * @set: pointer to device specific set operation function
  * @aggregate: pointer to device specific aggregate operation function
+ * @pre_aggregate: pointer to device specific function that is called
+ *		   before the aggregation begins (optional)
  * @xlate: provider-specific callback for mapping nodes from phandle arguments
  * @dev: the device this interconnect provider belongs to
  * @users: count of active users
@@ -47,6 +49,7 @@ struct icc_provider {
 	int (*set)(struct icc_node *src, struct icc_node *dst);
 	int (*aggregate)(struct icc_node *node, u32 tag, u32 avg_bw,
 			 u32 peak_bw, u32 *agg_avg, u32 *agg_peak);
+	void (*pre_aggregate)(struct icc_node *node);
 	struct icc_node* (*xlate)(struct of_phandle_args *spec, void *data);
 	struct device		*dev;
 	int			users;
-- 
cgit v1.2.3


From 24f516ebbab8a212a9aa8c3d69f185371f5e200b Mon Sep 17 00:00:00 2001
From: Georgi Djakov <georgi.djakov@linaro.org>
Date: Tue, 23 Jul 2019 17:23:35 +0300
Subject: dt-bindings: interconnect: Add Qualcomm QCS404 DT bindings

The Qualcomm QCS404 platform has several buses that could be controlled
and tuned according to the bandwidth demand.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 .../bindings/interconnect/qcom,qcs404.txt          | 45 +++++++++++
 include/dt-bindings/interconnect/qcom,qcs404.h     | 88 ++++++++++++++++++++++
 2 files changed, 133 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt
 create mode 100644 include/dt-bindings/interconnect/qcom,qcs404.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt
new file mode 100644
index 000000000000..c07d89812b73
--- /dev/null
+++ b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.txt
@@ -0,0 +1,45 @@
+Qualcomm QCS404 Network-On-Chip interconnect driver binding
+-----------------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+			"qcom,qcs404-bimc"
+			"qcom,qcs404-pcnoc"
+			"qcom,qcs404-snoc"
+- #interconnect-cells : should contain 1
+
+reg : specifies the physical base address and size of registers
+clocks : list of phandles and specifiers to all interconnect bus clocks
+clock-names : clock names should include both "bus" and "bus_a"
+
+Example:
+
+soc {
+	...
+	bimc: interconnect@400000 {
+		reg = <0x00400000 0x80000>;
+		compatible = "qcom,qcs404-bimc";
+		#interconnect-cells = <1>;
+		clock-names = "bus", "bus_a";
+		clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
+			<&rpmcc RPM_SMD_BIMC_A_CLK>;
+	};
+
+	pnoc: interconnect@500000 {
+		reg = <0x00500000 0x15080>;
+		compatible = "qcom,qcs404-pcnoc";
+		#interconnect-cells = <1>;
+		clock-names = "bus", "bus_a";
+		clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
+			<&rpmcc RPM_SMD_PNOC_A_CLK>;
+	};
+
+	snoc: interconnect@580000 {
+		reg = <0x00580000 0x23080>;
+		compatible = "qcom,qcs404-snoc";
+		#interconnect-cells = <1>;
+		clock-names = "bus", "bus_a";
+		clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
+			<&rpmcc RPM_SMD_SNOC_A_CLK>;
+	};
+};
diff --git a/include/dt-bindings/interconnect/qcom,qcs404.h b/include/dt-bindings/interconnect/qcom,qcs404.h
new file mode 100644
index 000000000000..960f6e39c5f2
--- /dev/null
+++ b/include/dt-bindings/interconnect/qcom,qcs404.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Qualcomm interconnect IDs
+ *
+ * Copyright (c) 2019, Linaro Ltd.
+ * Author: Georgi Djakov <georgi.djakov@linaro.org>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_QCS404_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_QCS404_H
+
+#define MASTER_AMPSS_M0			0
+#define MASTER_OXILI			1
+#define MASTER_MDP_PORT0		2
+#define MASTER_SNOC_BIMC_1		3
+#define MASTER_TCU_0			4
+#define SLAVE_EBI_CH0			5
+#define SLAVE_BIMC_SNOC			6
+
+#define MASTER_SPDM			0
+#define MASTER_BLSP_1			1
+#define MASTER_BLSP_2			2
+#define MASTER_XI_USB_HS1		3
+#define MASTER_CRYPT0			4
+#define MASTER_SDCC_1			5
+#define MASTER_SDCC_2			6
+#define MASTER_SNOC_PCNOC		7
+#define MASTER_QPIC			8
+#define PCNOC_INT_0			9
+#define PCNOC_INT_2			10
+#define PCNOC_INT_3			11
+#define PCNOC_S_0			12
+#define PCNOC_S_1			13
+#define PCNOC_S_2			14
+#define PCNOC_S_3			15
+#define PCNOC_S_4			16
+#define PCNOC_S_6			17
+#define PCNOC_S_7			18
+#define PCNOC_S_8			19
+#define PCNOC_S_9			20
+#define PCNOC_S_10			21
+#define PCNOC_S_11			22
+#define SLAVE_SPDM			23
+#define SLAVE_PDM			24
+#define SLAVE_PRNG			25
+#define SLAVE_TCSR			26
+#define SLAVE_SNOC_CFG			27
+#define SLAVE_MESSAGE_RAM		28
+#define SLAVE_DISP_SS_CFG		29
+#define SLAVE_GPU_CFG			30
+#define SLAVE_BLSP_1			31
+#define SLAVE_BLSP_2			32
+#define SLAVE_TLMM_NORTH		33
+#define SLAVE_PCIE			34
+#define SLAVE_ETHERNET			35
+#define SLAVE_TLMM_EAST			36
+#define SLAVE_TCU			37
+#define SLAVE_PMIC_ARB			38
+#define SLAVE_SDCC_1			39
+#define SLAVE_SDCC_2			40
+#define SLAVE_TLMM_SOUTH		41
+#define SLAVE_USB_HS			42
+#define SLAVE_USB3			43
+#define SLAVE_CRYPTO_0_CFG		44
+#define SLAVE_PCNOC_SNOC		45
+
+#define MASTER_QDSS_BAM			0
+#define MASTER_BIMC_SNOC		1
+#define MASTER_PCNOC_SNOC		2
+#define MASTER_QDSS_ETR			3
+#define MASTER_EMAC			4
+#define MASTER_PCIE			5
+#define MASTER_USB3			6
+#define QDSS_INT			7
+#define SNOC_INT_0			8
+#define SNOC_INT_1			9
+#define SNOC_INT_2			10
+#define SLAVE_KPSS_AHB			11
+#define SLAVE_WCSS			12
+#define SLAVE_SNOC_BIMC_1		13
+#define SLAVE_IMEM			14
+#define SLAVE_SNOC_PCNOC		15
+#define SLAVE_QDSS_STM			16
+#define SLAVE_CATS_0			17
+#define SLAVE_CATS_1			18
+#define SLAVE_LPASS			19
+
+#endif
-- 
cgit v1.2.3


From f7c9a9b664fb32a127e8e9a987b52023b92c3a0b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 1 Apr 2019 09:57:01 -0700
Subject: rcu/nocb: Rename and document no-CB CB kthread sleep trace event

The nocb_cb_wait() function traces a "FollowerSleep" trace_rcu_nocb_wake()
event, which never was documented and is now misleading.  This commit
therefore changes "FollowerSleep" to "CBSleep", documents this, and
updates the documentation for "Sleep" as well.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/trace/events/rcu.h | 3 ++-
 kernel/rcu/tree_plugin.h   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 02a3f78f7cd8..313324d1b135 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -267,7 +267,8 @@ TRACE_EVENT_RCU(rcu_exp_funnel_lock,
  *	"WakeNotPoll": Don't wake rcuo kthread because it is polling.
  *	"DeferredWake": Carried out the "IsDeferred" wakeup.
  *	"Poll": Start of new polling cycle for rcu_nocb_poll.
- *	"Sleep": Sleep waiting for CBs for !rcu_nocb_poll.
+ *	"Sleep": Sleep waiting for GP for !rcu_nocb_poll.
+ *	"CBSleep": Sleep waiting for CBs for !rcu_nocb_poll.
  *	"WokeEmpty": rcuo kthread woke to find empty list.
  *	"WokeNonEmpty": rcuo kthread woke to find non-empty list.
  *	"WaitQueue": Enqueue partially done, timed wait for it to complete.
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0af36e98e70f..be065aacd63b 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1891,7 +1891,7 @@ static int rcu_nocb_gp_kthread(void *arg)
  */
 static bool nocb_cb_wait(struct rcu_data *rdp)
 {
-	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FollowerSleep"));
+	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
 	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
 				 READ_ONCE(rdp->nocb_cb_head));
 	if (smp_load_acquire(&rdp->nocb_cb_head)) { /* VVV */
-- 
cgit v1.2.3


From 1bb5f9b95afe5d9d6b586389ce5e8f461a5b671c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 12 Apr 2019 12:34:41 -0700
Subject: rcu/nocb: Use separate flag to indicate disabled ->cblist

NULLing the RCU_NEXT_TAIL pointer was a clever way to save a byte, but
forward-progress considerations would require that this pointer be both
NULL and non-NULL, which, absent a quantum-computer port of the Linux
kernel, simply won't happen.  This commit therefore creates as separate
->enabled flag to replace the current NULL checks.

[ paulmck: Add include files per 0day test robot and -next. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcu_segcblist.h | 4 ++++
 kernel/rcu/rcu_segcblist.c    | 3 ++-
 kernel/rcu/rcu_segcblist.h    | 2 +-
 kernel/rcu/tree_plugin.h      | 2 +-
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 87404cb015f1..ed2cfd3c0743 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -14,6 +14,9 @@
 #ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H
 #define __INCLUDE_LINUX_RCU_SEGCBLIST_H
 
+#include <linux/types.h>
+#include <linux/atomic.h>
+
 /* Simple unsegmented callback lists. */
 struct rcu_cblist {
 	struct rcu_head *head;
@@ -67,6 +70,7 @@ struct rcu_segcblist {
 	unsigned long gp_seq[RCU_CBLIST_NSEGS];
 	long len;
 	long len_lazy;
+	u8 enabled;
 };
 
 #define RCU_SEGCBLIST_INITIALIZER(n) \
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 9bd5f6023c21..b305dcac34c9 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -58,6 +58,7 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp)
 		rsclp->tails[i] = &rsclp->head;
 	rsclp->len = 0;
 	rsclp->len_lazy = 0;
+	rsclp->enabled = 1;
 }
 
 /*
@@ -69,7 +70,7 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
 	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
 	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
 	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
-	rsclp->tails[RCU_NEXT_TAIL] = NULL;
+	rsclp->enabled = 0;
 }
 
 /*
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 822a39da0533..b2de7b32da29 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -63,7 +63,7 @@ static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
  */
 static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
 {
-	return !!rsclp->tails[RCU_NEXT_TAIL];
+	return rsclp->enabled;
 }
 
 /*
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0a3f8680b450..b8a43cf9bb4e 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2189,8 +2189,8 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 				rcu_segcblist_n_cbs(&rdp->cblist));
 		atomic_long_set(&rdp->nocb_q_count_lazy,
 				rcu_segcblist_n_lazy_cbs(&rdp->cblist));
-		rcu_segcblist_init(&rdp->cblist);
 	}
+	rcu_segcblist_init(&rdp->cblist);
 	rcu_segcblist_disable(&rdp->cblist);
 	return true;
 }
-- 
cgit v1.2.3


From ce5215c1342c6c89b3c3c45fea82cddf0b013787 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Fri, 12 Apr 2019 15:58:34 -0700
Subject: rcu/nocb: Use separate flag to indicate offloaded ->cblist

RCU callback processing currently uses rcu_is_nocb_cpu() to determine
whether or not the current CPU's callbacks are to be offloaded.
This works, but it is not so good for cache locality.  Plus use of
->cblist for offloaded callbacks will greatly increase the frequency
of these checks.  This commit therefore adds a ->offloaded flag to the
rcu_segcblist structure to provide a more flexible and cache-friendly
means of checking for callback offloading.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcu_segcblist.h |  1 +
 kernel/rcu/rcu_segcblist.c    | 12 ++++++++++++
 kernel/rcu/rcu_segcblist.h    |  7 +++++++
 kernel/rcu/tree.c             | 10 ++++++----
 kernel/rcu/tree_plugin.h      | 11 +++++++----
 5 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ed2cfd3c0743..8b684888f71d 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -71,6 +71,7 @@ struct rcu_segcblist {
 	long len;
 	long len_lazy;
 	u8 enabled;
+	u8 offloaded;
 };
 
 #define RCU_SEGCBLIST_INITIALIZER(n) \
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index b305dcac34c9..700779f4c0cb 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -73,6 +73,18 @@ void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
 	rsclp->enabled = 0;
 }
 
+/*
+ * Mark the specified rcu_segcblist structure as offloaded.  This
+ * structure must be empty.
+ */
+void rcu_segcblist_offload(struct rcu_segcblist *rsclp)
+{
+	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
+	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp));
+	rsclp->offloaded = 1;
+}
+
 /*
  * Does the specified rcu_segcblist structure contain callbacks that
  * are ready to be invoked?
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index b2de7b32da29..8f3783391075 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -66,6 +66,12 @@ static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp)
 	return rsclp->enabled;
 }
 
+/* Is the specified rcu_segcblist offloaded?  */
+static inline bool rcu_segcblist_is_offloaded(struct rcu_segcblist *rsclp)
+{
+	return rsclp->offloaded;
+}
+
 /*
  * Are all segments following the specified segment of the specified
  * rcu_segcblist structure empty of callbacks?  (The specified
@@ -78,6 +84,7 @@ static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg)
 
 void rcu_segcblist_init(struct rcu_segcblist *rsclp);
 void rcu_segcblist_disable(struct rcu_segcblist *rsclp);
+void rcu_segcblist_offload(struct rcu_segcblist *rsclp);
 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index a14e5fbbea46..6f5c96c4f9a3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2858,10 +2858,11 @@ void rcu_barrier(void)
 	 * corresponding CPU's preceding callbacks have been invoked.
 	 */
 	for_each_possible_cpu(cpu) {
-		if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
-			continue;
 		rdp = per_cpu_ptr(&rcu_data, cpu);
-		if (rcu_is_nocb_cpu(cpu)) {
+		if (!cpu_online(cpu) &&
+		    !rcu_segcblist_is_offloaded(&rdp->cblist))
+			continue;
+		if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
 			if (!rcu_nocb_cpu_needs_barrier(cpu)) {
 				rcu_barrier_trace(TPS("OfflineNoCB"), cpu,
 						   rcu_state.barrier_sequence);
@@ -3155,7 +3156,8 @@ void rcutree_migrate_callbacks(int cpu)
 	struct rcu_node *rnp_root = rcu_get_root();
 	bool needwake;
 
-	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
+	if (rcu_segcblist_is_offloaded(&rdp->cblist) ||
+	    rcu_segcblist_empty(&rdp->cblist))
 		return;  /* No callbacks to migrate. */
 
 	local_irq_save(flags);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b8a43cf9bb4e..fc6133eed50a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1382,7 +1382,7 @@ static void rcu_prepare_for_idle(void)
 	int tne;
 
 	lockdep_assert_irqs_disabled();
-	if (rcu_is_nocb_cpu(smp_processor_id()))
+	if (rcu_segcblist_is_offloaded(&rdp->cblist))
 		return;
 
 	/* Handle nohz enablement switches conservatively. */
@@ -1431,8 +1431,10 @@ static void rcu_prepare_for_idle(void)
  */
 static void rcu_cleanup_after_idle(void)
 {
+	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+
 	lockdep_assert_irqs_disabled();
-	if (rcu_is_nocb_cpu(smp_processor_id()))
+	if (rcu_segcblist_is_offloaded(&rdp->cblist))
 		return;
 	if (rcu_try_advance_all_cbs())
 		invoke_rcu_core();
@@ -1694,7 +1696,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 			    bool lazy, unsigned long flags)
 {
 
-	if (!rcu_is_nocb_cpu(rdp->cpu))
+	if (!rcu_segcblist_is_offloaded(&rdp->cblist))
 		return false;
 	__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
 	if (__is_kfree_rcu_offset((unsigned long)rhp->func))
@@ -1729,7 +1731,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
 						     unsigned long flags)
 {
 	lockdep_assert_irqs_disabled();
-	if (!rcu_is_nocb_cpu(smp_processor_id()))
+	if (!rcu_segcblist_is_offloaded(&my_rdp->cblist))
 		return false; /* Not NOCBs CPU, caller must migrate CBs. */
 	__call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
 				rcu_segcblist_tail(&rdp->cblist),
@@ -2192,6 +2194,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
 	}
 	rcu_segcblist_init(&rdp->cblist);
 	rcu_segcblist_disable(&rdp->cblist);
+	rcu_segcblist_offload(&rdp->cblist);
 	return true;
 }
 
-- 
cgit v1.2.3


From 5d6742b37727e111f4755155e59c5319cf5caa7b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Wed, 15 May 2019 09:56:40 -0700
Subject: rcu/nocb: Use rcu_segcblist for no-CBs CPUs

Currently the RCU callbacks for no-CBs CPUs are queued on a series of
ad-hoc linked lists, which means that these callbacks cannot benefit
from "drive-by" grace periods, thus suffering needless delays prior
to invocation.  In addition, the no-CBs grace-period kthreads first
wait for callbacks to appear and later wait for a new grace period,
which means that callbacks appearing during a grace-period wait can
be delayed.  These delays increase memory footprint, and could even
result in an out-of-memory condition.

This commit therefore enqueues RCU callbacks from no-CBs CPUs on the
rcu_segcblist structure that is already used by non-no-CBs CPUs.  It also
restructures the no-CBs grace-period kthread to be checking for incoming
callbacks while waiting for grace periods.  Also, instead of waiting
for a new grace period, it waits for the closest grace period that will
cause some of the callbacks to be safe to invoke.  All of these changes
reduce callback latency and thus the number of outstanding callbacks,
in turn reducing the probability of an out-of-memory condition.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/trace/events/rcu.h |   1 -
 kernel/rcu/rcu_segcblist.c |  12 ++
 kernel/rcu/rcu_segcblist.h |   1 +
 kernel/rcu/tree.c          | 116 ++++++-----
 kernel/rcu/tree.h          |  14 +-
 kernel/rcu/tree_plugin.h   | 510 +++++++++++++++++----------------------------
 6 files changed, 270 insertions(+), 384 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index 313324d1b135..694bd040cf51 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -100,7 +100,6 @@ TRACE_EVENT_RCU(rcu_grace_period,
  * "Startedroot": Requested a nocb grace period based on root-node data.
  * "NoGPkthread": The RCU grace-period kthread has not yet started.
  * "StartWait": Start waiting for the requested grace period.
- * "ResumeWait": Resume waiting after signal.
  * "EndWait": Complete wait.
  * "Cleanup": Clean up rcu_node structure after previous GP.
  * "CleanupMore": Clean up, and another GP is needed.
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 9ac28f175627..92968b856593 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -127,6 +127,18 @@ struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
 	return NULL;
 }
 
+/*
+ * Return false if there are no CBs awaiting grace periods, otherwise,
+ * return true and store the nearest waited-upon grace period into *lp.
+ */
+bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp)
+{
+	if (!rcu_segcblist_pend_cbs(rsclp))
+		return false;
+	*lp = rsclp->gp_seq[RCU_WAIT_TAIL];
+	return true;
+}
+
 /*
  * Enqueue the specified callback onto the specified rcu_segcblist
  * structure, updating accounting as needed.  Note that the ->len
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index ed3fcece39a9..db38f0a512c4 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -89,6 +89,7 @@ bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp);
 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp);
 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp);
 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp);
+bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp);
 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
 			   struct rcu_head *rhp, bool lazy);
 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2917ce379b23..054418d2d960 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1343,8 +1343,10 @@ static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
  */
 static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 {
-	bool ret;
+	bool ret = false;
 	bool need_gp;
+	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+			       rcu_segcblist_is_offloaded(&rdp->cblist);
 
 	raw_lockdep_assert_held_rcu_node(rnp);
 
@@ -1354,10 +1356,12 @@ static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
 	/* Handle the ends of any preceding grace periods first. */
 	if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
 	    unlikely(READ_ONCE(rdp->gpwrap))) {
-		ret = rcu_advance_cbs(rnp, rdp); /* Advance callbacks. */
+		if (!offloaded)
+			ret = rcu_advance_cbs(rnp, rdp); /* Advance CBs. */
 		trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
 	} else {
-		ret = rcu_accelerate_cbs(rnp, rdp); /* Recent callbacks. */
+		if (!offloaded)
+			ret = rcu_accelerate_cbs(rnp, rdp); /* Recent CBs. */
 	}
 
 	/* Now handle the beginnings of any new-to-this-CPU grace periods. */
@@ -1658,6 +1662,7 @@ static void rcu_gp_cleanup(void)
 	unsigned long gp_duration;
 	bool needgp = false;
 	unsigned long new_gp_seq;
+	bool offloaded;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp = rcu_get_root();
 	struct swait_queue_head *sq;
@@ -1723,7 +1728,9 @@ static void rcu_gp_cleanup(void)
 		needgp = true;
 	}
 	/* Advance CBs to reduce false positives below. */
-	if (!rcu_accelerate_cbs(rnp, rdp) && needgp) {
+	offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+		    rcu_segcblist_is_offloaded(&rdp->cblist);
+	if ((offloaded || !rcu_accelerate_cbs(rnp, rdp)) && needgp) {
 		WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
 		rcu_state.gp_req_activity = jiffies;
 		trace_rcu_grace_period(rcu_state.name,
@@ -1917,7 +1924,9 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 {
 	unsigned long flags;
 	unsigned long mask;
-	bool needwake;
+	bool needwake = false;
+	const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+			       rcu_segcblist_is_offloaded(&rdp->cblist);
 	struct rcu_node *rnp;
 
 	rnp = rdp->mynode;
@@ -1944,7 +1953,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
 		 * This GP can't end until cpu checks in, so all of our
 		 * callbacks can be processed during the next GP.
 		 */
-		needwake = rcu_accelerate_cbs(rnp, rdp);
+		if (!offloaded)
+			needwake = rcu_accelerate_cbs(rnp, rdp);
 
 		rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
 		/* ^^^ Released rnp->lock */
@@ -2082,7 +2092,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl);
 	long bl, count;
 
-	WARN_ON_ONCE(rdp->cblist.offloaded);
 	/* If no callbacks are ready, just return. */
 	if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
 		trace_rcu_batch_start(rcu_state.name,
@@ -2101,13 +2110,14 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	 * callback counts, as rcu_barrier() needs to be conservative.
 	 */
 	local_irq_save(flags);
+	rcu_nocb_lock(rdp);
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
 	bl = rdp->blimit;
 	trace_rcu_batch_start(rcu_state.name,
 			      rcu_segcblist_n_lazy_cbs(&rdp->cblist),
 			      rcu_segcblist_n_cbs(&rdp->cblist), bl);
 	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
-	local_irq_restore(flags);
+	rcu_nocb_unlock_irqrestore(rdp, flags);
 
 	/* Invoke callbacks. */
 	rhp = rcu_cblist_dequeue(&rcl);
@@ -2120,12 +2130,22 @@ static void rcu_do_batch(struct rcu_data *rdp)
 		 * Note: The rcl structure counts down from zero.
 		 */
 		if (-rcl.len >= bl &&
+		    !rcu_segcblist_is_offloaded(&rdp->cblist) &&
 		    (need_resched() ||
 		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))
 			break;
+		if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
+			WARN_ON_ONCE(in_serving_softirq());
+			local_bh_enable();
+			lockdep_assert_irqs_enabled();
+			cond_resched_tasks_rcu_qs();
+			lockdep_assert_irqs_enabled();
+			local_bh_disable();
+		}
 	}
 
 	local_irq_save(flags);
+	rcu_nocb_lock(rdp);
 	count = -rcl.len;
 	trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
 			    is_idle_task(current), rcu_is_callbacks_kthread());
@@ -2153,10 +2173,11 @@ static void rcu_do_batch(struct rcu_data *rdp)
 	 */
 	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != (count == 0));
 
-	local_irq_restore(flags);
+	rcu_nocb_unlock_irqrestore(rdp, flags);
 
 	/* Re-invoke RCU core processing if there are callbacks remaining. */
-	if (rcu_segcblist_ready_cbs(&rdp->cblist))
+	if (!rcu_segcblist_is_offloaded(&rdp->cblist) &&
+	    rcu_segcblist_ready_cbs(&rdp->cblist))
 		invoke_rcu_core();
 }
 
@@ -2312,7 +2333,8 @@ static __latent_entropy void rcu_core(void)
 	rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check());
 
 	/* If there are callbacks ready, invoke them. */
-	if (rcu_segcblist_ready_cbs(&rdp->cblist) &&
+	if (!rcu_segcblist_is_offloaded(&rdp->cblist) &&
+	    rcu_segcblist_ready_cbs(&rdp->cblist) &&
 	    likely(READ_ONCE(rcu_scheduler_fully_active)))
 		rcu_do_batch(rdp);
 
@@ -2492,10 +2514,11 @@ static void rcu_leak_callback(struct rcu_head *rhp)
  * is expected to specify a CPU.
  */
 static void
-__call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
+__call_rcu(struct rcu_head *head, rcu_callback_t func, bool lazy)
 {
 	unsigned long flags;
 	struct rcu_data *rdp;
+	bool was_alldone;
 
 	/* Misaligned rcu_head! */
 	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
@@ -2517,29 +2540,17 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
 	rdp = this_cpu_ptr(&rcu_data);
 
 	/* Add the callback to our list. */
-	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) ||
-	    rcu_segcblist_is_offloaded(&rdp->cblist) || cpu != -1) {
-		int offline;
-
-		if (cpu != -1)
-			rdp = per_cpu_ptr(&rcu_data, cpu);
-		if (likely(rdp->mynode)) {
-			/* Post-boot, so this should be for a no-CBs CPU. */
-			offline = !__call_rcu_nocb(rdp, head, lazy, flags);
-			WARN_ON_ONCE(offline);
-			/* Offline CPU, _call_rcu() illegal, leak callback.  */
-			local_irq_restore(flags);
-			return;
-		}
-		/*
-		 * Very early boot, before rcu_init().  Initialize if needed
-		 * and then drop through to queue the callback.
-		 */
-		WARN_ON_ONCE(cpu != -1);
+	if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist))) {
+		// This can trigger due to call_rcu() from offline CPU:
+		WARN_ON_ONCE(rcu_scheduler_active != RCU_SCHEDULER_INACTIVE);
 		WARN_ON_ONCE(!rcu_is_watching());
+		// Very early boot, before rcu_init().  Initialize if needed
+		// and then drop through to queue the callback.
 		if (rcu_segcblist_empty(&rdp->cblist))
 			rcu_segcblist_init(&rdp->cblist);
 	}
+	rcu_nocb_lock(rdp);
+	was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist);
 	rcu_segcblist_enqueue(&rdp->cblist, head, lazy);
 	if (__is_kfree_rcu_offset((unsigned long)func))
 		trace_rcu_kfree_callback(rcu_state.name, head,
@@ -2552,8 +2563,13 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
 				   rcu_segcblist_n_cbs(&rdp->cblist));
 
 	/* Go handle any RCU core processing required. */
-	__call_rcu_core(rdp, head, flags);
-	local_irq_restore(flags);
+	if (IS_ENABLED(CONFIG_RCU_NOCB_CPU) &&
+	    unlikely(rcu_segcblist_is_offloaded(&rdp->cblist))) {
+		__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */
+	} else {
+		__call_rcu_core(rdp, head, flags);
+		local_irq_restore(flags);
+	}
 }
 
 /**
@@ -2593,7 +2609,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
  */
 void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-	__call_rcu(head, func, -1, 0);
+	__call_rcu(head, func, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
@@ -2606,7 +2622,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
  */
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
-	__call_rcu(head, func, -1, 1);
+	__call_rcu(head, func, 1);
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
@@ -2806,6 +2822,7 @@ static void rcu_barrier_func(void *unused)
 	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
 	rdp->barrier_head.func = rcu_barrier_callback;
 	debug_rcu_head_queue(&rdp->barrier_head);
+	rcu_nocb_lock(rdp);
 	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
 		atomic_inc(&rcu_state.barrier_cpu_count);
 	} else {
@@ -2813,6 +2830,7 @@ static void rcu_barrier_func(void *unused)
 		rcu_barrier_trace(TPS("IRQNQ"), -1,
 				   rcu_state.barrier_sequence);
 	}
+	rcu_nocb_unlock(rdp);
 }
 
 /**
@@ -2867,19 +2885,7 @@ void rcu_barrier(void)
 		if (!cpu_online(cpu) &&
 		    !rcu_segcblist_is_offloaded(&rdp->cblist))
 			continue;
-		if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
-			if (!rcu_nocb_cpu_needs_barrier(cpu)) {
-				rcu_barrier_trace(TPS("OfflineNoCB"), cpu,
-						   rcu_state.barrier_sequence);
-			} else {
-				rcu_barrier_trace(TPS("OnlineNoCB"), cpu,
-						   rcu_state.barrier_sequence);
-				smp_mb__before_atomic();
-				atomic_inc(&rcu_state.barrier_cpu_count);
-				__call_rcu(&rdp->barrier_head,
-					   rcu_barrier_callback, cpu, 0);
-			}
-		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
+		if (rcu_segcblist_n_cbs(&rdp->cblist)) {
 			rcu_barrier_trace(TPS("OnlineQ"), cpu,
 					   rcu_state.barrier_sequence);
 			smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
@@ -3169,10 +3175,7 @@ void rcutree_migrate_callbacks(int cpu)
 	local_irq_save(flags);
 	my_rdp = this_cpu_ptr(&rcu_data);
 	my_rnp = my_rdp->mynode;
-	if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
-		local_irq_restore(flags);
-		return;
-	}
+	rcu_nocb_lock(my_rdp); /* irqs already disabled. */
 	raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
 	/* Leverage recent GPs and set GP for new callbacks. */
 	needwake = rcu_advance_cbs(my_rnp, rdp) ||
@@ -3180,9 +3183,16 @@ void rcutree_migrate_callbacks(int cpu)
 	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
 	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
 		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
-	raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);
+	if (rcu_segcblist_is_offloaded(&my_rdp->cblist)) {
+		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
+		__call_rcu_nocb_wake(my_rdp, true, flags);
+	} else {
+		rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */
+		raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags);
+	}
 	if (needwake)
 		rcu_gp_kthread_wake();
+	lockdep_assert_irqs_enabled();
 	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
 		  !rcu_segcblist_empty(&rdp->cblist),
 		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8d9cfcac6757..529eec2aa74d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -211,7 +211,9 @@ struct rcu_data {
 					/* CBs waiting for GP. */
 	struct rcu_head **nocb_gp_tail;
 	bool nocb_gp_sleep;		/* Is the nocb GP thread asleep? */
+	bool nocb_gp_forced;		/* Forced nocb GP thread wakeup? */
 	struct swait_queue_head nocb_gp_wq; /* For nocb kthreads to sleep on. */
+	bool nocb_cb_sleep;		/* Is the nocb CB thread asleep? */
 	struct task_struct *nocb_cb_kthread;
 	struct rcu_data *nocb_next_cb_rdp;
 					/* Next rcu_data in wakeup chain. */
@@ -421,20 +423,20 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
 static void rcu_preempt_deferred_qs(struct task_struct *t);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
-static bool rcu_nocb_cpu_needs_barrier(int cpu);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-			    bool lazy, unsigned long flags);
-static bool rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
-				      struct rcu_data *rdp,
-				      unsigned long flags);
+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+				 unsigned long flags);
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
 static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_cpu_nocb_kthread(int cpu);
 static void __init rcu_spawn_nocb_kthreads(void);
+static void rcu_nocb_lock(struct rcu_data *rdp);
+static void rcu_nocb_unlock(struct rcu_data *rdp);
+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
+				       unsigned long flags);
 #ifdef CONFIG_RCU_NOCB_CPU
 static void __init rcu_organize_nocb_kthreads(void);
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 2d37fd3fa0d4..feffc46cccb0 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1494,6 +1494,45 @@ static int __init parse_rcu_nocb_poll(char *arg)
 }
 early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
 
+/*
+ * Acquire the specified rcu_data structure's ->nocb_lock, but only
+ * if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_lock(struct rcu_data *rdp)
+{
+	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
+		lockdep_assert_irqs_disabled();
+		raw_spin_lock(&rdp->nocb_lock);
+	}
+}
+
+/*
+ * Release the specified rcu_data structure's ->nocb_lock, but only
+ * if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_unlock(struct rcu_data *rdp)
+{
+	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
+		lockdep_assert_irqs_disabled();
+		raw_spin_unlock(&rdp->nocb_lock);
+	}
+}
+
+/*
+ * Release the specified rcu_data structure's ->nocb_lock and restore
+ * interrupts, but only if it corresponds to a no-CBs CPU.
+ */
+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
+				       unsigned long flags)
+{
+	if (rcu_segcblist_is_offloaded(&rdp->cblist)) {
+		lockdep_assert_irqs_disabled();
+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+	} else {
+		local_irq_restore(flags);
+	}
+}
+
 /*
  * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
  * grace period.
@@ -1526,7 +1565,7 @@ bool rcu_is_nocb_cpu(int cpu)
  * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock
  * and this function releases it.
  */
-static void __wake_nocb_gp(struct rcu_data *rdp, bool force,
+static void wake_nocb_gp(struct rcu_data *rdp, bool force,
 			   unsigned long flags)
 	__releases(rdp->nocb_lock)
 {
@@ -1537,30 +1576,19 @@ static void __wake_nocb_gp(struct rcu_data *rdp, bool force,
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 		return;
 	}
-	if (rdp_gp->nocb_gp_sleep || force) {
-		/* Prior smp_mb__after_atomic() orders against prior enqueue. */
-		WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
+	if (READ_ONCE(rdp_gp->nocb_gp_sleep) || force) {
 		del_timer(&rdp->nocb_timer);
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		smp_mb(); /* ->nocb_gp_sleep before swake_up_one(). */
-		swake_up_one(&rdp_gp->nocb_gp_wq);
+		smp_mb(); /* enqueue before ->nocb_gp_sleep. */
+		raw_spin_lock_irqsave(&rdp_gp->nocb_lock, flags);
+		WRITE_ONCE(rdp_gp->nocb_gp_sleep, false);
+		raw_spin_unlock_irqrestore(&rdp_gp->nocb_lock, flags);
+		wake_up_process(rdp_gp->nocb_gp_kthread);
 	} else {
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
 	}
 }
 
-/*
- * Kick the GP kthread for this NOCB group, but caller has not
- * acquired locks.
- */
-static void wake_nocb_gp(struct rcu_data *rdp, bool force)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
-	__wake_nocb_gp(rdp, force, flags);
-}
-
 /*
  * Arrange to wake the GP kthread for this NOCB group at some future
  * time when it is safe to do so.
@@ -1568,295 +1596,148 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force)
 static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype,
 			       const char *reason)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
 	if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
 		mod_timer(&rdp->nocb_timer, jiffies + 1);
 	WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
-	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-}
-
-/* Does rcu_barrier need to queue an RCU callback on the specified CPU?  */
-static bool rcu_nocb_cpu_needs_barrier(int cpu)
-{
-	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
-	unsigned long ret;
-#ifdef CONFIG_PROVE_RCU
-	struct rcu_head *rhp;
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
-	/*
-	 * Check count of all no-CBs callbacks awaiting invocation.
-	 * There needs to be a barrier before this function is called,
-	 * but associated with a prior determination that no more
-	 * callbacks would be posted.  In the worst case, the first
-	 * barrier in rcu_barrier() suffices (but the caller cannot
-	 * necessarily rely on this, not a substitute for the caller
-	 * getting the concurrency design right!).  There must also be a
-	 * barrier between the following load and posting of a callback
-	 * (if a callback is in fact needed).  This is associated with an
-	 * atomic_inc() in the caller.
-	 */
-	ret = rcu_get_n_cbs_nocb_cpu(rdp);
-
-#ifdef CONFIG_PROVE_RCU
-	rhp = READ_ONCE(rdp->nocb_head);
-	if (!rhp)
-		rhp = READ_ONCE(rdp->nocb_gp_head);
-	if (!rhp)
-		rhp = READ_ONCE(rdp->nocb_cb_head);
-
-	/* Having no rcuo kthread but CBs after scheduler starts is bad! */
-	if (!READ_ONCE(rdp->nocb_cb_kthread) && rhp &&
-	    rcu_scheduler_fully_active) {
-		/* RCU callback enqueued before CPU first came online??? */
-		pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
-		       cpu, rhp->func);
-		WARN_ON_ONCE(1);
-	}
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
-	return !!ret;
 }
 
 /*
- * Enqueue the specified string of rcu_head structures onto the specified
- * CPU's no-CBs lists.  The CPU is specified by rdp, the head of the
- * string by rhp, and the tail of the string by rhtp.  The non-lazy/lazy
- * counts are supplied by rhcount and rhcount_lazy.
+ * Awaken the no-CBs grace-period kthead if needed, either due to it
+ * legitimately being asleep or due to overload conditions.
  *
  * If warranted, also wake up the kthread servicing this CPUs queues.
  */
-static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
-				    struct rcu_head *rhp,
-				    struct rcu_head **rhtp,
-				    int rhcount, int rhcount_lazy,
-				    unsigned long flags)
+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone,
+				 unsigned long flags)
+				 __releases(rdp->nocb_lock)
 {
 	int len;
-	struct rcu_head **old_rhpp;
 	struct task_struct *t;
 
-	/* Enqueue the callback on the nocb list and update counts. */
-	atomic_long_add(rhcount, &rdp->nocb_q_count);
-	/* rcu_barrier() relies on ->nocb_q_count add before xchg. */
-	old_rhpp = xchg(&rdp->nocb_tail, rhtp);
-	WRITE_ONCE(*old_rhpp, rhp);
-	atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
-	smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
-
-	/* If we are not being polled and there is a kthread, awaken it ... */
+	// If we are being polled or there is no kthread, just leave.
 	t = READ_ONCE(rdp->nocb_gp_kthread);
 	if (rcu_nocb_poll || !t) {
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 				    TPS("WakeNotPoll"));
+		rcu_nocb_unlock_irqrestore(rdp, flags);
 		return;
 	}
-	len = rcu_get_n_cbs_nocb_cpu(rdp);
-	if (old_rhpp == &rdp->nocb_head) {
+	// Need to actually to a wakeup.
+	len = rcu_segcblist_n_cbs(&rdp->cblist);
+	if (was_alldone) {
 		if (!irqs_disabled_flags(flags)) {
 			/* ... if queue was empty ... */
-			wake_nocb_gp(rdp, false);
+			wake_nocb_gp(rdp, false, flags);
 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 					    TPS("WakeEmpty"));
 		} else {
 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE,
 					   TPS("WakeEmptyIsDeferred"));
+			rcu_nocb_unlock_irqrestore(rdp, flags);
 		}
 		rdp->qlen_last_fqs_check = 0;
 	} else if (len > rdp->qlen_last_fqs_check + qhimark) {
 		/* ... or if many callbacks queued. */
 		if (!irqs_disabled_flags(flags)) {
-			wake_nocb_gp(rdp, true);
+			wake_nocb_gp(rdp, true, flags);
 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
 					    TPS("WakeOvf"));
 		} else {
 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE,
 					   TPS("WakeOvfIsDeferred"));
+			rcu_nocb_unlock_irqrestore(rdp, flags);
 		}
 		rdp->qlen_last_fqs_check = LONG_MAX / 2;
 	} else {
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
+		rcu_nocb_unlock_irqrestore(rdp, flags);
 	}
+	if (!irqs_disabled_flags(flags))
+		lockdep_assert_irqs_enabled();
 	return;
 }
 
 /*
- * This is a helper for __call_rcu(), which invokes this when the normal
- * callback queue is inoperable.  If this is not a no-CBs CPU, this
- * function returns failure back to __call_rcu(), which can complain
- * appropriately.
- *
- * Otherwise, this function queues the callback where the corresponding
- * "rcuo" kthread can find it.
- */
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-			    bool lazy, unsigned long flags)
-{
-
-	if (!rcu_segcblist_is_offloaded(&rdp->cblist))
-		return false;
-	__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
-	if (__is_kfree_rcu_offset((unsigned long)rhp->func))
-		trace_rcu_kfree_callback(rcu_state.name, rhp,
-					 (unsigned long)rhp->func,
-					 -atomic_long_read(&rdp->nocb_q_count_lazy),
-					 -rcu_get_n_cbs_nocb_cpu(rdp));
-	else
-		trace_rcu_callback(rcu_state.name, rhp,
-				   -atomic_long_read(&rdp->nocb_q_count_lazy),
-				   -rcu_get_n_cbs_nocb_cpu(rdp));
-
-	return true;
-}
-
-/*
- * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
- * not a no-CBs CPU.
- */
-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
-						     struct rcu_data *rdp,
-						     unsigned long flags)
-{
-	lockdep_assert_irqs_disabled();
-	if (!rcu_segcblist_is_offloaded(&my_rdp->cblist))
-		return false; /* Not NOCBs CPU, caller must migrate CBs. */
-	__call_rcu_nocb_enqueue(my_rdp, rcu_segcblist_head(&rdp->cblist),
-				rcu_segcblist_tail(&rdp->cblist),
-				rcu_segcblist_n_cbs(&rdp->cblist),
-				rcu_segcblist_n_lazy_cbs(&rdp->cblist), flags);
-	rcu_segcblist_init(&rdp->cblist);
-	rcu_segcblist_disable(&rdp->cblist);
-	return true;
-}
-
-/*
- * If necessary, kick off a new grace period, and either way wait
- * for a subsequent grace period to complete.
- */
-static void rcu_nocb_wait_gp(struct rcu_data *rdp)
-{
-	unsigned long c;
-	bool d;
-	unsigned long flags;
-	bool needwake;
-	struct rcu_node *rnp = rdp->mynode;
-
-	local_irq_save(flags);
-	c = rcu_seq_snap(&rcu_state.gp_seq);
-	if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
-		local_irq_restore(flags);
-	} else {
-		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
-		needwake = rcu_start_this_gp(rnp, rdp, c);
-		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-		if (needwake)
-			rcu_gp_kthread_wake();
-	}
-
-	/*
-	 * Wait for the grace period.  Do so interruptibly to avoid messing
-	 * up the load average.
-	 */
-	trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
-	for (;;) {
-		swait_event_interruptible_exclusive(
-			rnp->nocb_gp_wq[rcu_seq_ctr(c) & 0x1],
-			(d = rcu_seq_done(&rnp->gp_seq, c)));
-		if (likely(d))
-			break;
-		WARN_ON(signal_pending(current));
-		trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
-	}
-	trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
-	smp_mb(); /* Ensure that CB invocation happens after GP end. */
-}
-
-/*
- * No-CBs GP kthreads come here to wait for additional callbacks to show up.
- * This function does not return until callbacks appear.
+ * No-CBs GP kthreads come here to wait for additional callbacks to show up
+ * or for grace periods to end.
  */
 static void nocb_gp_wait(struct rcu_data *my_rdp)
 {
-	bool firsttime = true;
+	int __maybe_unused cpu = my_rdp->cpu;
+	unsigned long cur_gp_seq;
 	unsigned long flags;
 	bool gotcbs;
+	bool needwait_gp = false;
+	bool needwake;
+	bool needwake_gp;
 	struct rcu_data *rdp;
-	struct rcu_head **tail;
-
-	/* Wait for callbacks to appear. */
-	if (!rcu_nocb_poll) {
-		trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu, TPS("Sleep"));
-		swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
-				!READ_ONCE(my_rdp->nocb_gp_sleep));
-		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
-		my_rdp->nocb_gp_sleep = true;
-		WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
-		del_timer(&my_rdp->nocb_timer);
-		raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
-	} else if (firsttime) {
-		firsttime = false; /* Don't drown trace log with "Poll"! */
-		trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu, TPS("Poll"));
-	}
+	struct rcu_node *rnp;
+	unsigned long wait_gp_seq;
 
 	/*
-	 * Each pass through the following loop checks for CBs.
-	 * We are our own first CB kthread.  Any CBs found are moved to
-	 * nocb_gp_head, where they await a grace period.
+	 * Each pass through the following loop checks for CBs and for the
+	 * nearest grace period (if any) to wait for next.  The CB kthreads
+	 * and the global grace-period kthread are awakened if needed.
 	 */
-	gotcbs = false;
-	smp_mb(); /* wakeup and _sleep before ->nocb_head reads. */
 	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
-		rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
-		if (!rdp->nocb_gp_head)
-			continue;  /* No CBs here, try next. */
-
-		/* Move callbacks to wait-for-GP list, which is empty. */
-		WRITE_ONCE(rdp->nocb_head, NULL);
-		rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
-		gotcbs = true;
-	}
-
-	/* No callbacks?  Sleep a bit if polling, and go retry.  */
-	if (unlikely(!gotcbs)) {
-		WARN_ON(signal_pending(current));
-		if (rcu_nocb_poll) {
-			schedule_timeout_interruptible(1);
-		} else {
-			trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu,
-					    TPS("WokeEmpty"));
+		if (rcu_segcblist_empty(&rdp->cblist))
+			continue; /* No callbacks here, try next. */
+		rnp = rdp->mynode;
+		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+		WRITE_ONCE(my_rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
+		del_timer(&my_rdp->nocb_timer);
+		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+		needwake_gp = rcu_advance_cbs(rnp, rdp);
+		raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
+		// Need to wait on some grace period?
+		if (rcu_segcblist_nextgp(&rdp->cblist, &cur_gp_seq)) {
+			if (!needwait_gp ||
+			    ULONG_CMP_LT(cur_gp_seq, wait_gp_seq))
+				wait_gp_seq = cur_gp_seq;
+			needwait_gp = true;
 		}
-		return;
-	}
-
-	/* Wait for one grace period. */
-	rcu_nocb_wait_gp(my_rdp);
-
-	/* Each pass through this loop wakes a CB kthread, if needed. */
-	for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) {
-		if (!rcu_nocb_poll &&
-		    READ_ONCE(rdp->nocb_head) &&
-		    READ_ONCE(my_rdp->nocb_gp_sleep)) {
-			raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
-			my_rdp->nocb_gp_sleep = false;/* No need to sleep.*/
-			raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
+		if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
+			needwake = rdp->nocb_cb_sleep;
+			WRITE_ONCE(rdp->nocb_cb_sleep, false);
+			smp_mb(); /* CB invocation -after- GP end. */
+		} else {
+			needwake = false;
 		}
-		if (!rdp->nocb_gp_head)
-			continue; /* No CBs, so no need to wake kthread. */
-
-		/* Append callbacks to CB kthread's "done" list. */
-		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
-		tail = rdp->nocb_cb_tail;
-		rdp->nocb_cb_tail = rdp->nocb_gp_tail;
-		*tail = rdp->nocb_gp_head;
 		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		if (tail == &rdp->nocb_cb_head) {
-			/* List was empty, so wake up the kthread.  */
+		if (needwake) {
 			swake_up_one(&rdp->nocb_cb_wq);
+			gotcbs = true;
 		}
+		if (needwake_gp)
+			rcu_gp_kthread_wake();
+	}
+
+	if (rcu_nocb_poll) {
+		/* Polling, so trace if first poll in the series. */
+		if (gotcbs)
+			trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Poll"));
+		schedule_timeout_interruptible(1);
+	} else if (!needwait_gp) {
+		/* Wait for callbacks to appear. */
+		trace_rcu_nocb_wake(rcu_state.name, cpu, TPS("Sleep"));
+		swait_event_interruptible_exclusive(my_rdp->nocb_gp_wq,
+				!READ_ONCE(my_rdp->nocb_gp_sleep));
+	} else {
+		rnp = my_rdp->mynode;
+		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("StartWait"));
+		swait_event_interruptible_exclusive(
+			rnp->nocb_gp_wq[rcu_seq_ctr(wait_gp_seq) & 0x1],
+			rcu_seq_done(&rnp->gp_seq, wait_gp_seq) ||
+			!READ_ONCE(my_rdp->nocb_gp_sleep));
+		trace_rcu_this_gp(rnp, my_rdp, wait_gp_seq, TPS("EndWait"));
+	}
+	if (!rcu_nocb_poll) {
+		raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
+		WRITE_ONCE(my_rdp->nocb_gp_sleep, true);
+		raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
 	}
+	WARN_ON(signal_pending(current));
 }
 
 /*
@@ -1871,92 +1752,69 @@ static int rcu_nocb_gp_kthread(void *arg)
 {
 	struct rcu_data *rdp = arg;
 
-	for (;;)
+	for (;;) {
 		nocb_gp_wait(rdp);
+		cond_resched_tasks_rcu_qs();
+	}
 	return 0;
 }
 
 /*
- * No-CBs CB kthreads come here to wait for additional callbacks to show up.
- * This function returns true ("keep waiting") until callbacks appear and
- * then false ("stop waiting") when callbacks finally do appear.
+ * Invoke any ready callbacks from the corresponding no-CBs CPU,
+ * then, if there are no more, wait for more to appear.
  */
-static bool nocb_cb_wait(struct rcu_data *rdp)
+static void nocb_cb_wait(struct rcu_data *rdp)
 {
+	unsigned long flags;
+	bool needwake_gp = false;
+	struct rcu_node *rnp = rdp->mynode;
+
+	local_irq_save(flags);
+	rcu_momentary_dyntick_idle();
+	local_irq_restore(flags);
+	local_bh_disable();
+	rcu_do_batch(rdp);
+	local_bh_enable();
+	lockdep_assert_irqs_enabled();
+	raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
+	raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
+	needwake_gp = rcu_advance_cbs(rdp->mynode, rdp);
+	raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
+	if (rcu_segcblist_ready_cbs(&rdp->cblist)) {
+		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+		if (needwake_gp)
+			rcu_gp_kthread_wake();
+		return;
+	}
+
 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
+	WRITE_ONCE(rdp->nocb_cb_sleep, true);
+	raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
+	if (needwake_gp)
+		rcu_gp_kthread_wake();
 	swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
-				 READ_ONCE(rdp->nocb_cb_head));
-	if (smp_load_acquire(&rdp->nocb_cb_head)) { /* VVV */
-		/* ^^^ Ensure CB invocation follows _head test. */
-		return false;
+				 !READ_ONCE(rdp->nocb_cb_sleep));
+	if (!smp_load_acquire(&rdp->nocb_cb_sleep)) { /* VVV */
+		/* ^^^ Ensure CB invocation follows _sleep test. */
+		return;
 	}
 	WARN_ON(signal_pending(current));
 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
-	return true;
 }
 
 /*
- * Per-rcu_data kthread, but only for no-CBs CPUs.  Each kthread invokes
- * callbacks queued by the corresponding no-CBs CPU, however, there is an
- * optional GP-CB relationship so that the grace-period kthreads don't
- * have to do quite so many wakeups (as in they only need to wake the
- * no-CBs GP kthreads, not the CB kthreads).
+ * Per-rcu_data kthread, but only for no-CBs CPUs.  Repeatedly invoke
+ * nocb_cb_wait() to do the dirty work.
  */
 static int rcu_nocb_cb_kthread(void *arg)
 {
-	int c, cl;
-	unsigned long flags;
-	struct rcu_head *list;
-	struct rcu_head *next;
-	struct rcu_head **tail;
 	struct rcu_data *rdp = arg;
 
-	/* Each pass through this loop invokes one batch of callbacks */
+	// Each pass through this loop does one callback batch, and,
+	// if there are no more ready callbacks, waits for them.
 	for (;;) {
-		/* Wait for callbacks. */
-		while (nocb_cb_wait(rdp))
-			continue;
-
-		/* Pull the ready-to-invoke callbacks onto local list. */
-		raw_spin_lock_irqsave(&rdp->nocb_lock, flags);
-		list = rdp->nocb_cb_head;
-		rdp->nocb_cb_head = NULL;
-		tail = rdp->nocb_cb_tail;
-		rdp->nocb_cb_tail = &rdp->nocb_cb_head;
-		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
-		if (WARN_ON_ONCE(!list))
-			continue;
-		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty"));
-
-		/* Each pass through the following loop invokes a callback. */
-		trace_rcu_batch_start(rcu_state.name,
-				      atomic_long_read(&rdp->nocb_q_count_lazy),
-				      rcu_get_n_cbs_nocb_cpu(rdp), -1);
-		c = cl = 0;
-		while (list) {
-			next = list->next;
-			/* Wait for enqueuing to complete, if needed. */
-			while (next == NULL && &list->next != tail) {
-				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
-						    TPS("WaitQueue"));
-				schedule_timeout_interruptible(1);
-				trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
-						    TPS("WokeQueue"));
-				next = list->next;
-			}
-			debug_rcu_head_unqueue(list);
-			local_bh_disable();
-			if (__rcu_reclaim(rcu_state.name, list))
-				cl++;
-			c++;
-			local_bh_enable();
-			cond_resched_tasks_rcu_qs();
-			list = next;
-		}
-		trace_rcu_batch_end(rcu_state.name, c, !!list, 0, 0, 1);
-		smp_mb__before_atomic();  /* _add after CB invocation. */
-		atomic_long_add(-c, &rdp->nocb_q_count);
-		atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
+		nocb_cb_wait(rdp);
+		cond_resched_tasks_rcu_qs();
 	}
 	return 0;
 }
@@ -1980,7 +1838,7 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
 	}
 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
 	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
-	__wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+	wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
 }
 
@@ -2194,10 +2052,21 @@ static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp)
 
 #else /* #ifdef CONFIG_RCU_NOCB_CPU */
 
-static bool rcu_nocb_cpu_needs_barrier(int cpu)
+/* No ->nocb_lock to acquire.  */
+static void rcu_nocb_lock(struct rcu_data *rdp)
 {
-	WARN_ON_ONCE(1); /* Should be dead code. */
-	return false;
+}
+
+/* No ->nocb_lock to release.  */
+static void rcu_nocb_unlock(struct rcu_data *rdp)
+{
+}
+
+/* No ->nocb_lock to release.  */
+static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp,
+				       unsigned long flags)
+{
+	local_irq_restore(flags);
 }
 
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
@@ -2213,17 +2082,10 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 {
 }
 
-static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
-			    bool lazy, unsigned long flags)
+static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
+				 unsigned long flags)
 {
-	return false;
-}
-
-static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_data *my_rdp,
-						     struct rcu_data *rdp,
-						     unsigned long flags)
-{
-	return false;
+	WARN_ON_ONCE(1);  /* Should be dead code! */
 }
 
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
-- 
cgit v1.2.3


From eda669a6a2c517fd6db41d0fe3c95c1b749c60bd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.ibm.com>
Date: Mon, 1 Jul 2019 17:36:53 -0700
Subject: rcu/nocb: Atomic ->len field in rcu_segcblist structure

Upcoming ->nocb_lock contention-reduction work requires that the
rcu_segcblist structure's ->len field be concurrently manipulated,
but only if there are no-CBs CPUs in the kernel.  This commit
therefore makes this ->len field be an atomic_long_t, but only
in CONFIG_RCU_NOCB_CPU=y kernels.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcu_segcblist.h |  4 ++
 kernel/rcu/rcu_segcblist.c    | 86 +++++++++++++++++++++++++++++++++++++++----
 kernel/rcu/rcu_segcblist.h    | 12 +++++-
 3 files changed, 94 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index 8b684888f71d..646759042333 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -68,7 +68,11 @@ struct rcu_segcblist {
 	struct rcu_head *head;
 	struct rcu_head **tails[RCU_CBLIST_NSEGS];
 	unsigned long gp_seq[RCU_CBLIST_NSEGS];
+#ifdef CONFIG_RCU_NOCB_CPU
+	atomic_long_t len;
+#else
 	long len;
+#endif
 	long len_lazy;
 	u8 enabled;
 	u8 offloaded;
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 92968b856593..ff431cc83037 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -23,6 +23,19 @@ void rcu_cblist_init(struct rcu_cblist *rclp)
 	rclp->len_lazy = 0;
 }
 
+/*
+ * Enqueue an rcu_head structure onto the specified callback list.
+ * This function assumes that the callback is non-lazy because it
+ * is intended for use by no-CBs CPUs, which do not distinguish
+ * between lazy and non-lazy RCU callbacks.
+ */
+void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp)
+{
+	*rclp->tail = rhp;
+	rclp->tail = &rhp->next;
+	WRITE_ONCE(rclp->len, rclp->len + 1);
+}
+
 /*
  * Dequeue the oldest rcu_head structure from the specified callback
  * list.  This function assumes that the callback is non-lazy, but
@@ -44,6 +57,67 @@ struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
 	return rhp;
 }
 
+/* Set the length of an rcu_segcblist structure. */
+void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
+{
+#ifdef CONFIG_RCU_NOCB_CPU
+	atomic_long_set(&rsclp->len, v);
+#else
+	WRITE_ONCE(rsclp->len, v);
+#endif
+}
+
+/*
+ * Increase the numeric length of an rcu_segcblist structure by the
+ * specified amount, which can be negative.  This can cause the ->len
+ * field to disagree with the actual number of callbacks on the structure.
+ * This increase is fully ordered with respect to the callers accesses
+ * both before and after.
+ */
+void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
+{
+#ifdef CONFIG_RCU_NOCB_CPU
+	smp_mb__before_atomic(); /* Up to the caller! */
+	atomic_long_add(v, &rsclp->len);
+	smp_mb__after_atomic(); /* Up to the caller! */
+#else
+	smp_mb(); /* Up to the caller! */
+	WRITE_ONCE(rsclp->len, rsclp->len + v);
+	smp_mb(); /* Up to the caller! */
+#endif
+}
+
+/*
+ * Increase the numeric length of an rcu_segcblist structure by one.
+ * This can cause the ->len field to disagree with the actual number of
+ * callbacks on the structure.  This increase is fully ordered with respect
+ * to the callers accesses both before and after.
+ */
+void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
+{
+	rcu_segcblist_add_len(rsclp, 1);
+}
+
+/*
+ * Exchange the numeric length of the specified rcu_segcblist structure
+ * with the specified value.  This can cause the ->len field to disagree
+ * with the actual number of callbacks on the structure.  This exchange is
+ * fully ordered with respect to the callers accesses both before and after.
+ */
+long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
+{
+#ifdef CONFIG_RCU_NOCB_CPU
+	return atomic_long_xchg(&rsclp->len, v);
+#else
+	long ret = rsclp->len;
+
+	smp_mb(); /* Up to the caller! */
+	WRITE_ONCE(rsclp->len, v);
+	smp_mb(); /* Up to the caller! */
+	return ret;
+#endif
+}
+
 /*
  * Initialize an rcu_segcblist structure.
  */
@@ -56,7 +130,7 @@ void rcu_segcblist_init(struct rcu_segcblist *rsclp)
 	rsclp->head = NULL;
 	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
 		rsclp->tails[i] = &rsclp->head;
-	rsclp->len = 0;
+	rcu_segcblist_set_len(rsclp, 0);
 	rsclp->len_lazy = 0;
 	rsclp->enabled = 1;
 }
@@ -151,7 +225,7 @@ bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp)
 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
 			   struct rcu_head *rhp, bool lazy)
 {
-	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */
+	rcu_segcblist_inc_len(rsclp);
 	if (lazy)
 		rsclp->len_lazy++;
 	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
@@ -177,7 +251,7 @@ bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
 
 	if (rcu_segcblist_n_cbs(rsclp) == 0)
 		return false;
-	WRITE_ONCE(rsclp->len, rsclp->len + 1);
+	rcu_segcblist_inc_len(rsclp);
 	if (lazy)
 		rsclp->len_lazy++;
 	smp_mb(); /* Ensure counts are updated before callback is entrained. */
@@ -204,9 +278,8 @@ void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
 					       struct rcu_cblist *rclp)
 {
 	rclp->len_lazy += rsclp->len_lazy;
-	rclp->len += rsclp->len;
 	rsclp->len_lazy = 0;
-	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */
+	rclp->len = rcu_segcblist_xchg_len(rsclp, 0);
 }
 
 /*
@@ -259,8 +332,7 @@ void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
 				struct rcu_cblist *rclp)
 {
 	rsclp->len_lazy += rclp->len_lazy;
-	/* ->len sampled locklessly. */
-	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len);
+	rcu_segcblist_add_len(rsclp, rclp->len);
 	rclp->len_lazy = 0;
 	rclp->len = 0;
 }
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index db38f0a512c4..1ff996647d3c 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -9,6 +9,12 @@
 
 #include <linux/rcu_segcblist.h>
 
+/* Return number of callbacks in the specified callback list. */
+static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp)
+{
+	return READ_ONCE(rclp->len);
+}
+
 /*
  * Account for the fact that a previously dequeued callback turned out
  * to be marked as lazy.
@@ -42,7 +48,11 @@ static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp)
 /* Return number of callbacks in segmented callback list. */
 static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp)
 {
+#ifdef CONFIG_RCU_NOCB_CPU
+	return atomic_long_read(&rsclp->len);
+#else
 	return READ_ONCE(rsclp->len);
+#endif
 }
 
 /* Return number of lazy callbacks in segmented callback list. */
@@ -54,7 +64,7 @@ static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp)
 /* Return number of lazy callbacks in segmented callback list. */
 static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp)
 {
-	return rsclp->len - rsclp->len_lazy;
+	return rcu_segcblist_n_cbs(rsclp) - rsclp->len_lazy;
 }
 
 /*
-- 
cgit v1.2.3


From 76470ccd62f18bfa0954bec10f2329339f793914 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Tue, 13 Aug 2019 15:37:04 -0700
Subject: mm: document zone device struct page field usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm/hmm: fixes for device private page migration", v3.

Testing the latest linux git tree turned up a few bugs with page
migration to and from ZONE_DEVICE private and anonymous pages.
Hopefully it clarifies how ZONE_DEVICE private struct page uses the same
mapping and index fields from the source anonymous page mapping.

This patch (of 3):

Struct page for ZONE_DEVICE private pages uses the page->mapping and and
page->index fields while the source anonymous pages are migrated to
device private memory.  This is so rmap_walk() can find the page when
migrating the ZONE_DEVICE private page back to system memory.
ZONE_DEVICE pmem backed fsdax pages also use the page->mapping and
page->index fields when files are mapped into a process address space.

Add comments to struct page and remove the unused "_zd_pad_1" field to
make this more clear.

Link: http://lkml.kernel.org/r/20190724232700.23327-2-rcampbell@nvidia.com
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3a37a89eb7a7..6a7a1083b6fb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,7 +159,16 @@ struct page {
 			/** @pgmap: Points to the hosting device page map. */
 			struct dev_pagemap *pgmap;
 			void *zone_device_data;
-			unsigned long _zd_pad_1;	/* uses mapping */
+			/*
+			 * ZONE_DEVICE private pages are counted as being
+			 * mapped so the next 3 words hold the mapping, index,
+			 * and private fields from the source anonymous or
+			 * page cache page while the page is migrated to device
+			 * private memory.
+			 * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
+			 * use the mapping, index, and private fields when
+			 * pmem backed DAX files are mapped.
+			 */
 		};
 
 		/** @rcu_head: You can use this to free a page by RCU. */
-- 
cgit v1.2.3


From ec9f02384f6053f2a5417e82b65078edc5364a8d Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 13 Aug 2019 15:37:41 -0700
Subject: mm: workingset: fix vmstat counters for shadow nodes

Memcg counters for shadow nodes are broken because the memcg pointer is
obtained in a wrong way. The following approach is used:
        virt_to_page(xa_node)->mem_cgroup

Since commit 4d96ba353075 ("mm: memcg/slab: stop setting
page->mem_cgroup pointer for slab pages") page->mem_cgroup pointer isn't
set for slab pages, so memcg_from_slab_page() should be used instead.

Also I doubt that it ever worked correctly: virt_to_head_page() should
be used instead of virt_to_page().  Otherwise objects residing on tail
pages are not accounted, because only the head page contains a valid
mem_cgroup pointer.  That was a case since the introduction of these
counters by the commit 68d48e6a2df5 ("mm: workingset: add vmstat counter
for shadow nodes").

Link: http://lkml.kernel.org/r/20190801233532.138743-1-guro@fb.com
Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages")
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 19 +++++++++++++++++++
 mm/memcontrol.c            | 20 ++++++++++++++++++++
 mm/workingset.c            | 10 ++++------
 3 files changed, 43 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44c41462be33..2cd4359cb38c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -668,6 +668,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 
 void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			int val);
+void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
 
 static inline void mod_lruvec_state(struct lruvec *lruvec,
 				    enum node_stat_item idx, int val)
@@ -1072,6 +1073,14 @@ static inline void mod_lruvec_page_state(struct page *page,
 	mod_node_page_state(page_pgdat(page), idx, val);
 }
 
+static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+					   int val)
+{
+	struct page *page = virt_to_head_page(p);
+
+	__mod_node_page_state(page_pgdat(page), idx, val);
+}
+
 static inline
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 					    gfp_t gfp_mask,
@@ -1159,6 +1168,16 @@ static inline void __dec_lruvec_page_state(struct page *page,
 	__mod_lruvec_page_state(page, idx, -1);
 }
 
+static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx)
+{
+	__mod_lruvec_slab_state(p, idx, 1);
+}
+
+static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx)
+{
+	__mod_lruvec_slab_state(p, idx, -1);
+}
+
 /* idx can be of type enum memcg_stat_item or node_stat_item */
 static inline void inc_memcg_state(struct mem_cgroup *memcg,
 				   int idx)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2e405e058eda..6f5c0c517c49 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -768,6 +768,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 }
 
+void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
+{
+	struct page *page = virt_to_head_page(p);
+	pg_data_t *pgdat = page_pgdat(page);
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+	memcg = memcg_from_slab_page(page);
+
+	/* Untracked pages have no memcg, no lruvec. Update only the node */
+	if (!memcg || memcg == root_mem_cgroup) {
+		__mod_node_page_state(pgdat, idx, val);
+	} else {
+		lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		__mod_lruvec_state(lruvec, idx, val);
+	}
+	rcu_read_unlock();
+}
+
 /**
  * __count_memcg_events - account VM events in a cgroup
  * @memcg: the memory cgroup
diff --git a/mm/workingset.c b/mm/workingset.c
index e0b4edcb88c8..c963831d354f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -380,14 +380,12 @@ void workingset_update_node(struct xa_node *node)
 	if (node->count && node->count == node->nr_values) {
 		if (list_empty(&node->private_list)) {
 			list_lru_add(&shadow_nodes, &node->private_list);
-			__inc_lruvec_page_state(virt_to_page(node),
-						WORKINGSET_NODES);
+			__inc_lruvec_slab_state(node, WORKINGSET_NODES);
 		}
 	} else {
 		if (!list_empty(&node->private_list)) {
 			list_lru_del(&shadow_nodes, &node->private_list);
-			__dec_lruvec_page_state(virt_to_page(node),
-						WORKINGSET_NODES);
+			__dec_lruvec_slab_state(node, WORKINGSET_NODES);
 		}
 	}
 }
@@ -480,7 +478,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	}
 
 	list_lru_isolate(lru, item);
-	__dec_lruvec_page_state(virt_to_page(node), WORKINGSET_NODES);
+	__dec_lruvec_slab_state(node, WORKINGSET_NODES);
 
 	spin_unlock(lru_lock);
 
@@ -503,7 +501,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * shadow entries we were tracking ...
 	 */
 	xas_store(&xas, NULL);
-	__inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
+	__inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM);
 
 out_invalid:
 	xa_unlock_irq(&mapping->i_pages);
-- 
cgit v1.2.3


From 0cfaee2af3a04c0be5f056cebe5f804dedc59a43 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 13 Aug 2019 15:37:47 -0700
Subject: include/asm-generic/5level-fixup.h: fix variable 'p4d' set but not
 used

A compiler throws a warning on an arm64 system since commit 9849a5697d3d
("arch, mm: convert all architectures to use 5level-fixup.h"),

  mm/kasan/init.c: In function 'kasan_free_p4d':
  mm/kasan/init.c:344:9: warning: variable 'p4d' set but not used [-Wunused-but-set-variable]
   p4d_t *p4d;
          ^~~

because p4d_none() in "5level-fixup.h" is compiled away while it is a
static inline function in "pgtable-nopud.h".

However, if converted p4d_none() to a static inline there, powerpc would
be unhappy as it reads those in assembler language in
"arch/powerpc/include/asm/book3s/64/pgtable.h", so it needs to skip
assembly include for the static inline C function.

While at it, converted a few similar functions to be consistent with the
ones in "pgtable-nopud.h".

Link: http://lkml.kernel.org/r/20190806232917.881-1-cai@lca.pw
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/5level-fixup.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index bb6cb347018c..f6947da70d71 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -19,9 +19,24 @@
 
 #define p4d_alloc(mm, pgd, address)	(pgd)
 #define p4d_offset(pgd, start)		(pgd)
-#define p4d_none(p4d)			0
-#define p4d_bad(p4d)			0
-#define p4d_present(p4d)		1
+
+#ifndef __ASSEMBLY__
+static inline int p4d_none(p4d_t p4d)
+{
+	return 0;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	return 1;
+}
+#endif
+
 #define p4d_ERROR(p4d)			do { } while (0)
 #define p4d_clear(p4d)			pgd_clear(p4d)
 #define p4d_val(p4d)			pgd_val(p4d)
-- 
cgit v1.2.3


From 92717d429b38e4f9f934eed7e605cc42858f1839 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 13 Aug 2019 15:37:50 -0700
Subject: Revert "Revert "mm, thp: consolidate THP gfp handling into
 alloc_hugepage_direct_gfpmask""

Patch series "reapply: relax __GFP_THISNODE for MADV_HUGEPAGE mappings".

The fixes for what was originally reported as "pathological THP
behavior" we rightfully reverted to be sure not to introduced
regressions at end of a merge window after a severe regression report
from the kernel bot.  We can safely re-apply them now that we had time
to analyze the problem.

The mm process worked fine, because the good fixes were eventually
committed upstream without excessive delay.

The regression reported by the kernel bot however forced us to revert
the good fixes to be sure not to introduce regressions and to give us
the time to analyze the issue further.  The silver lining is that this
extra time allowed to think more at this issue and also plan for a
future direction to improve things further in terms of THP NUMA
locality.

This patch (of 2):

This reverts commit 356ff8a9a78fb35d ("Revert "mm, thp: consolidate THP
gfp handling into alloc_hugepage_direct_gfpmask").  So it reapplies
89c83fb539f954 ("mm, thp: consolidate THP gfp handling into
alloc_hugepage_direct_gfpmask").

Consolidation of the THP allocation flags at the same place was meant to
be a clean up to easier handle otherwise scattered code which is
imposing a maintenance burden.  There were no real problems observed
with the gfp mask consolidation but the reversion was rushed through
without a larger consensus regardless.

This patch brings the consolidation back because this should make the
long term maintainability easier as well as it should allow future
changes to be less error prone.

[mhocko@kernel.org: changelog additions]
Link: http://lkml.kernel.org/r/20190503223146.2312-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 12 ++++--------
 mm/huge_memory.c    | 27 ++++++++++++++-------------
 mm/mempolicy.c      | 32 +++-----------------------------
 mm/shmem.c          |  2 +-
 4 files changed, 22 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fb07b503dc45..f33881688f42 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
-	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+			int node);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-	alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1334ede667a8..f7e388b8662d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -644,30 +644,30 @@ release:
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
 	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | __GFP_THISNODE |
+		       (vma_madvised ? 0 : __GFP_NORETRY);
 
 	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+		return gfp_mask | __GFP_KSWAPD_RECLAIM;
 
 	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT |
-			(vma_madvised ? __GFP_DIRECT_RECLAIM :
-					__GFP_KSWAPD_RECLAIM);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+						  __GFP_KSWAPD_RECLAIM);
 
 	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT |
-		       (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
 
-	return GFP_TRANSHUGE_LIGHT;
+	return gfp_mask;
 }
 
 /* Caller must hold page table lock. */
@@ -739,8 +739,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma);
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1347,8 +1347,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (__transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+				haddr, numa_node_id());
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 547cd403ed02..9c9877a43d58 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-					 HPAGE_PMD_ORDER);
+		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+				address, numa_node_id());
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -2083,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
- *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2094,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node, bool hugepage)
+		unsigned long addr, int node)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2112,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-		int hpage_node = node;
-
-		/*
-		 * For hugepage allocation and non-interleave policy which
-		 * allows the current node (or other explicitly preferred
-		 * node) we only try to allocate from the current/preferred
-		 * node and don't fall back to other nodes, as the cost of
-		 * remote accesses would likely offset THP benefits.
-		 *
-		 * If the policy is interleave, or does not allow the current
-		 * node in its nodemask, we allocate the standard way.
-		 */
-		if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
-			hpage_node = pol->v.preferred_node;
-
-		nmask = policy_nodemask(gfp, pol);
-		if (!nmask || node_isset(hpage_node, *nmask)) {
-			mpol_cond_put(pol);
-			page = __alloc_pages_node(hpage_node,
-						gfp | __GFP_THISNODE, order);
-			goto out;
-		}
-	}
-
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 626d8c74b973..2bed4761f279 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);
-- 
cgit v1.2.3


From a8282608c88e08b1782141026eab61204c1e533f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 13 Aug 2019 15:37:53 -0700
Subject: Revert "mm, thp: restore node-local hugepage allocations"

This reverts commit 2f0799a0ffc033b ("mm, thp: restore node-local
hugepage allocations").

commit 2f0799a0ffc033b was rightfully applied to avoid the risk of a
severe regression that was reported by the kernel test robot at the end
of the merge window.  Now we understood the regression was a false
positive and was caused by a significant increase in fairness during a
swap trashing benchmark.  So it's safe to re-apply the fix and continue
improving the code from there.  The benchmark that reported the
regression is very useful, but it provides a meaningful result only when
there is no significant alteration in fairness during the workload.  The
removal of __GFP_THISNODE increased fairness.

__GFP_THISNODE cannot be used in the generic page faults path for new
memory allocations under the MPOL_DEFAULT mempolicy, or the allocation
behavior significantly deviates from what the MPOL_DEFAULT semantics are
supposed to be for THP and 4k allocations alike.

Setting THP defrag to "always" or using MADV_HUGEPAGE (with THP defrag
set to "madvise") has never meant to provide an implicit MPOL_BIND on
the "current" node the task is running on, causing swap storms and
providing a much more aggressive behavior than even zone_reclaim_node =
3.

Any workload who could have benefited from __GFP_THISNODE has now to
enable zone_reclaim_mode=1||2||3.  __GFP_THISNODE implicitly provided
the zone_reclaim_mode behavior, but it only did so if THP was enabled:
if THP was disabled, there would have been no chance to get any 4k page
from the current node if the current node was full of pagecache, which
further shows how this __GFP_THISNODE was misplaced in MADV_HUGEPAGE.
MADV_HUGEPAGE has never been intended to provide any zone_reclaim_mode
semantics, in fact the two are orthogonal, zone_reclaim_mode = 1|2|3
must work exactly the same with MADV_HUGEPAGE set or not.

The performance characteristic of memory depends on the hardware
details.  The numbers below are obtained on Naples/EPYC architecture and
the N/A projection extends them to show what we should aim for in the
future as a good THP NUMA locality default.  The benchmark used
exercises random memory seeks (note: the cost of the page faults is not
part of the measurement).

  D0 THP | D0 4k | D1 THP | D1 4k | D2 THP | D2 4k | D3 THP | D3 4k | ...
  0%     | +43%  | +45%   | +106% | +131%  | +224% | N/A    | N/A

D0 means distance zero (i.e.  local memory), D1 means distance one (i.e.
intra socket memory), D2 means distance two (i.e.  inter socket memory),
etc...

For the guest physical memory allocated by qemu and for guest mode
kernel the performance characteristic of RAM is more complex and an
ideal default could be:

  D0 THP | D1 THP | D0 4k | D2 THP | D1 4k | D3 THP | D2 4k | D3 4k | ...
  0%     | +58%   | +101% | N/A    | +222% | N/A    | N/A   | N/A

NOTE: the N/A are projections and haven't been measured yet, the
measurement in this case is done on a 1950x with only two NUMA nodes.
The THP case here means THP was used both in the host and in the guest.

After applying this commit the THP NUMA locality order that we'll get
out of MADV_HUGEPAGE is this:

  D0 THP | D1 THP | D2 THP | D3 THP | ... | D0 4k | D1 4k | D2 4k | D3 4k | ...

Before this commit it was:

  D0 THP | D0 4k | D1 4k | D2 4k | D3 4k | ...

Even if we ignore the breakage of large workloads that can't fit in a
single node that the __GFP_THISNODE implicit "current node" mbind
caused, the THP NUMA locality order provided by __GFP_THISNODE was still
not the one we shall aim for in the long term (i.e.  the first one at
the top).

After this commit is applied, we can introduce a new allocator multi
order API and to replace those two alloc_pages_vmas calls in the page
fault path, with a single multi order call:

        unsigned int order = (1 << HPAGE_PMD_ORDER) | (1 << 0);
        page = alloc_pages_multi_order(..., &order);
        if (!page)
        	goto out;
        if (!(order & (1 << 0))) {
        	VM_WARN_ON(order != 1 << HPAGE_PMD_ORDER);
        	/* THP fault */
        } else {
        	VM_WARN_ON(order != 1 << 0);
        	/* 4k fallback */
        }

The page allocator logic has to be altered so that when it fails on any
zone with order 9, it has to try again with a order 0 before falling
back to the next zone in the zonelist.

After that we need to do more measurements and evaluate if adding an
opt-in feature for guest mode is worth it, to swap "DN 4k | DN+1 THP"
with "DN+1 THP | DN 4k" at every NUMA distance crossing.

Link: http://lkml.kernel.org/r/20190503223146.2312-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 ++
 mm/huge_memory.c          | 42 ++++++++++++++++++++++++++----------------
 mm/mempolicy.c            |  2 +-
 3 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af416..bac395f1d00a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f7e388b8662d..738065f765ab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -647,27 +647,37 @@ release:
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
+	gfp_t this_node = 0;
 
-	/* Always do synchronous compaction */
-	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | __GFP_THISNODE |
-		       (vma_madvised ? 0 : __GFP_NORETRY);
+#ifdef CONFIG_NUMA
+	struct mempolicy *pol;
+	/*
+	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+	 * specified, to express a general desire to stay on the current
+	 * node for optimistic allocation attempts. If the defrag mode
+	 * and/or madvise hint requires the direct reclaim then we prefer
+	 * to fallback to other node rather than node reclaim because that
+	 * can lead to excessive reclaim even though there is free memory
+	 * on other nodes. We expect that NUMA preferences are specified
+	 * by memory policies.
+	 */
+	pol = get_vma_policy(vma, addr);
+	if (pol->mode != MPOL_BIND)
+		this_node = __GFP_THISNODE;
+	mpol_cond_put(pol);
+#endif
 
-	/* Kick kcompactd and fail quickly */
+	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | __GFP_KSWAPD_RECLAIM;
-
-	/* Synchronous compaction if madvised, otherwise kick kcompactd */
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-						  __GFP_KSWAPD_RECLAIM);
-
-	/* Only do synchronous compaction if madvised */
+		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+							     __GFP_KSWAPD_RECLAIM | this_node);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
-
-	return gfp_mask;
+		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+							     this_node);
+	return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9c9877a43d58..65e0874fce17 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1734,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
-- 
cgit v1.2.3


From 331c56ac73846fa267c04ee6aa9a00bb5fed9440 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 12 Aug 2019 23:51:27 +0200
Subject: net: phy: add phy_speed_down_core and phy_resolve_min_speed

phy_speed_down_core provides most of the functionality for
phy_speed_down. It makes use of new helper phy_resolve_min_speed that is
based on the sorting of the settings[] array. In certain cases it may be
helpful to be able to exclude legacy half duplex modes, therefore
prepare phy_resolve_min_speed() for it.

v2:
- rename __phy_speed_down to phy_speed_down_core

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/phy/phy-core.c | 28 ++++++++++++++++++++++++++++
 include/linux/phy.h        |  1 +
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 95f1e85d0d87..369903d9b6ec 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -315,6 +315,34 @@ void phy_resolve_aneg_linkmode(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(phy_resolve_aneg_linkmode);
 
+static int phy_resolve_min_speed(struct phy_device *phydev, bool fdx_only)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(common);
+	int i = ARRAY_SIZE(settings);
+
+	linkmode_and(common, phydev->lp_advertising, phydev->advertising);
+
+	while (--i >= 0) {
+		if (test_bit(settings[i].bit, common)) {
+			if (fdx_only && settings[i].duplex != DUPLEX_FULL)
+				continue;
+			return settings[i].speed;
+		}
+	}
+
+	return SPEED_UNKNOWN;
+}
+
+int phy_speed_down_core(struct phy_device *phydev)
+{
+	int min_common_speed = phy_resolve_min_speed(phydev, true);
+
+	if (min_common_speed == SPEED_UNKNOWN)
+		return -EINVAL;
+
+	return __set_linkmode_max_speed(min_common_speed, phydev->advertising);
+}
+
 static void mmd_phy_indirect(struct mii_bus *bus, int phy_addr, int devad,
 			     u16 regnum)
 {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 781f4810ceba..62fdc7ff2b24 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -665,6 +665,7 @@ size_t phy_speeds(unsigned int *speeds, size_t size,
 		  unsigned long *mask);
 void of_set_phy_supported(struct phy_device *phydev);
 void of_set_phy_eee_broken(struct phy_device *phydev);
+int phy_speed_down_core(struct phy_device *phydev);
 
 /**
  * phy_is_started - Convenience function to check whether PHY is started
-- 
cgit v1.2.3


From 65b27995a4ab8fc51b4adc6b4dcdca20f7a595bb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 12 Aug 2019 23:52:19 +0200
Subject: net: phy: let phy_speed_down/up support speeds >1Gbps

So far phy_speed_down/up can be used up to 1Gbps only. Remove this
restriction by using new helper __phy_speed_down. New member adv_old
in struct phy_device is used by phy_speed_up to restore the advertised
modes before calling phy_speed_down. Don't simply advertise what is
supported because a user may have intentionally removed modes from
advertisement.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/phy/phy.c | 60 ++++++++++++++-------------------------------------
 include/linux/phy.h   |  2 ++
 2 files changed, 18 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ef7aa738e0dc..f3adea9ef400 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -608,38 +608,21 @@ static int phy_poll_aneg_done(struct phy_device *phydev)
  */
 int phy_speed_down(struct phy_device *phydev, bool sync)
 {
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_tmp);
 	int ret;
 
 	if (phydev->autoneg != AUTONEG_ENABLE)
 		return 0;
 
-	linkmode_copy(adv_old, phydev->advertising);
-	linkmode_copy(adv, phydev->lp_advertising);
-	linkmode_and(adv, adv, phydev->supported);
-
-	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, adv) ||
-	    linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, adv)) {
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-				   phydev->advertising);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-				   phydev->advertising);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
-				   phydev->advertising);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-				   phydev->advertising);
-	} else if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-				     adv) ||
-		   linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
-				     adv)) {
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
-				   phydev->advertising);
-		linkmode_clear_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-				   phydev->advertising);
-	}
+	linkmode_copy(adv_tmp, phydev->advertising);
+
+	ret = phy_speed_down_core(phydev);
+	if (ret)
+		return ret;
 
-	if (linkmode_equal(phydev->advertising, adv_old))
+	linkmode_copy(phydev->adv_old, adv_tmp);
+
+	if (linkmode_equal(phydev->advertising, adv_tmp))
 		return 0;
 
 	ret = phy_config_aneg(phydev);
@@ -658,30 +641,19 @@ EXPORT_SYMBOL_GPL(phy_speed_down);
  */
 int phy_speed_up(struct phy_device *phydev)
 {
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(all_speeds) = { 0, };
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(not_speeds);
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(speeds);
-
-	linkmode_copy(adv_old, phydev->advertising);
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_tmp);
 
 	if (phydev->autoneg != AUTONEG_ENABLE)
 		return 0;
 
-	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, all_speeds);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, all_speeds);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, all_speeds);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, all_speeds);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, all_speeds);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, all_speeds);
+	if (linkmode_empty(phydev->adv_old))
+		return 0;
 
-	linkmode_andnot(not_speeds, adv_old, all_speeds);
-	linkmode_copy(supported, phydev->supported);
-	linkmode_and(speeds, supported, all_speeds);
-	linkmode_or(phydev->advertising, not_speeds, speeds);
+	linkmode_copy(adv_tmp, phydev->advertising);
+	linkmode_copy(phydev->advertising, phydev->adv_old);
+	linkmode_zero(phydev->adv_old);
 
-	if (linkmode_equal(phydev->advertising, adv_old))
+	if (linkmode_equal(phydev->advertising, adv_tmp))
 		return 0;
 
 	return phy_config_aneg(phydev);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 62fdc7ff2b24..5ac7d21375ac 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -403,6 +403,8 @@ struct phy_device {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(advertising);
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
+	/* used with phy_speed_down */
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(adv_old);
 
 	/* Energy efficient ethernet modes which should be prohibited */
 	u32 eee_broken_modes;
-- 
cgit v1.2.3


From d00ee64e1dcf09b3afefd1340f3e9eb637272714 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 12 Aug 2019 13:07:07 -0700
Subject: netlink: Fix nlmsg_parse as a wrapper for strict message parsing

Eric reported a syzbot warning:

BUG: KMSAN: uninit-value in nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510
CPU: 0 PID: 11812 Comm: syz-executor444 Not tainted 5.3.0-rc3+ #17
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x191/0x1f0 lib/dump_stack.c:113
 kmsan_report+0x162/0x2d0 mm/kmsan/kmsan_report.c:109
 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:294
 nh_valid_get_del_req+0x6f1/0x8c0 net/ipv4/nexthop.c:1510
 rtm_del_nexthop+0x1b1/0x610 net/ipv4/nexthop.c:1543
 rtnetlink_rcv_msg+0x115a/0x1580 net/core/rtnetlink.c:5223
 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2477
 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5241
 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline]
 netlink_unicast+0xf6c/0x1050 net/netlink/af_netlink.c:1328
 netlink_sendmsg+0x110f/0x1330 net/netlink/af_netlink.c:1917
 sock_sendmsg_nosec net/socket.c:637 [inline]
 sock_sendmsg net/socket.c:657 [inline]
 ___sys_sendmsg+0x14ff/0x1590 net/socket.c:2311
 __sys_sendmmsg+0x53a/0xae0 net/socket.c:2413
 __do_sys_sendmmsg net/socket.c:2442 [inline]
 __se_sys_sendmmsg+0xbd/0xe0 net/socket.c:2439
 __x64_sys_sendmmsg+0x56/0x70 net/socket.c:2439
 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:297
 entry_SYSCALL_64_after_hwframe+0x63/0xe7

The root cause is nlmsg_parse calling __nla_parse which means the
header struct size is not checked.

nlmsg_parse should be a wrapper around __nlmsg_parse with
NL_VALIDATE_STRICT for the validate argument very much like
nlmsg_parse_deprecated is for NL_VALIDATE_LIBERAL.

Fixes: 3de6440354465 ("netlink: re-add parse/validate functions in strict mode")
Reported-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 include/net/netlink.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index e4650e5b64a1..b140c8f1be22 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -684,9 +684,8 @@ static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
 			      const struct nla_policy *policy,
 			      struct netlink_ext_ack *extack)
 {
-	return __nla_parse(tb, maxtype, nlmsg_attrdata(nlh, hdrlen),
-			   nlmsg_attrlen(nlh, hdrlen), policy,
-			   NL_VALIDATE_STRICT, extack);
+	return __nlmsg_parse(nlh, hdrlen, tb, maxtype, policy,
+			     NL_VALIDATE_STRICT, extack);
 }
 
 /**
-- 
cgit v1.2.3


From 5532c6282787124a7fd778d779d19148ae87479e Mon Sep 17 00:00:00 2001
From: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Date: Thu, 4 Jul 2019 17:58:12 -0300
Subject: media: dvb_frontend.h: Fix shifting signed 32-bit value problem

Fix DVBFE_ALGO_RECOVERY and DVBFE_ALGO_SEARCH_ERROR use BIT macro which
fixes undefined behavior error by certain compilers.

Also changed all other bit shifted definitions to use macro for better
readability.

Signed-off-by: Luke Nowakowski-Krijger <lnowakow@eng.ucsd.edu>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/dvb_frontend.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/media/dvb_frontend.h b/include/media/dvb_frontend.h
index f05cd7b94a2c..0d76fa4551b3 100644
--- a/include/media/dvb_frontend.h
+++ b/include/media/dvb_frontend.h
@@ -41,6 +41,7 @@
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/bitops.h>
 
 #include <linux/dvb/frontend.h>
 
@@ -141,10 +142,10 @@ struct analog_parameters {
  *	These devices have AUTO recovery capabilities from LOCK failure
  */
 enum dvbfe_algo {
-	DVBFE_ALGO_HW			= (1 <<  0),
-	DVBFE_ALGO_SW			= (1 <<  1),
-	DVBFE_ALGO_CUSTOM		= (1 <<  2),
-	DVBFE_ALGO_RECOVERY		= (1 << 31)
+	DVBFE_ALGO_HW			= BIT(0),
+	DVBFE_ALGO_SW			= BIT(1),
+	DVBFE_ALGO_CUSTOM		= BIT(2),
+	DVBFE_ALGO_RECOVERY		= BIT(31),
 };
 
 /**
@@ -170,12 +171,12 @@ enum dvbfe_algo {
  *	The frontend search algorithm was requested to search again
  */
 enum dvbfe_search {
-	DVBFE_ALGO_SEARCH_SUCCESS	= (1 <<  0),
-	DVBFE_ALGO_SEARCH_ASLEEP	= (1 <<  1),
-	DVBFE_ALGO_SEARCH_FAILED	= (1 <<  2),
-	DVBFE_ALGO_SEARCH_INVALID	= (1 <<  3),
-	DVBFE_ALGO_SEARCH_AGAIN		= (1 <<  4),
-	DVBFE_ALGO_SEARCH_ERROR		= (1 << 31),
+	DVBFE_ALGO_SEARCH_SUCCESS	= BIT(0),
+	DVBFE_ALGO_SEARCH_ASLEEP	= BIT(1),
+	DVBFE_ALGO_SEARCH_FAILED	= BIT(2),
+	DVBFE_ALGO_SEARCH_INVALID	= BIT(3),
+	DVBFE_ALGO_SEARCH_AGAIN		= BIT(4),
+	DVBFE_ALGO_SEARCH_ERROR		= BIT(31),
 };
 
 /**
-- 
cgit v1.2.3


From 92ffdb61f65d5d27bc750b6249c381c89767aaba Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Aug 2019 04:07:38 -0300
Subject: media: rc: add include guard to rc-map.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/media/rc-map.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 3a7f8728f6ec..32e49dcd1348 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -5,6 +5,9 @@
  * Copyright (c) 2010 by Mauro Carvalho Chehab
  */
 
+#ifndef _MEDIA_RC_MAP_H
+#define _MEDIA_RC_MAP_H
+
 #include <linux/input.h>
 #include <uapi/linux/lirc.h>
 
@@ -274,3 +277,5 @@ struct rc_map *rc_map_get(const char *name);
  * Please, do not just append newer Remote Controller names at the end.
  * The names should be ordered in alphabetical order
  */
+
+#endif /* _MEDIA_RC_MAP_H */
-- 
cgit v1.2.3


From 68e03b85474a51ec1921b4d13204782594ef7223 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 31 Jul 2019 20:38:14 +0800
Subject: gpio: Fix build error of function redefinition

when do randbuilding, I got this error:

In file included from drivers/hwmon/pmbus/ucd9000.c:19:0:
./include/linux/gpio/driver.h:576:1: error: redefinition of gpiochip_add_pin_range
 gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 ^~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/hwmon/pmbus/ucd9000.c:18:0:
./include/linux/gpio.h:245:1: note: previous definition of gpiochip_add_pin_range was here
 gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
 ^~~~~~~~~~~~~~~~~~~~~~

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: 964cb341882f ("gpio: move pincontrol calls to <linux/gpio/driver.h>")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20190731123814.46624-1-yuehaibing@huawei.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 40915b461f18..f757a58191a6 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -241,30 +241,6 @@ static inline int irq_to_gpio(unsigned irq)
 	return -EINVAL;
 }
 
-static inline int
-gpiochip_add_pin_range(struct gpio_chip *chip, const char *pinctl_name,
-		       unsigned int gpio_offset, unsigned int pin_offset,
-		       unsigned int npins)
-{
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-static inline int
-gpiochip_add_pingroup_range(struct gpio_chip *chip,
-			struct pinctrl_dev *pctldev,
-			unsigned int gpio_offset, const char *pin_group)
-{
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-static inline void
-gpiochip_remove_pin_ranges(struct gpio_chip *chip)
-{
-	WARN_ON(1);
-}
-
 static inline int devm_gpio_request(struct device *dev, unsigned gpio,
 				    const char *label)
 {
-- 
cgit v1.2.3


From 06d9532fa6b34f12a6d75711162d47c17c1add72 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 13 Aug 2019 22:26:36 +0100
Subject: rxrpc: Fix read-after-free in rxrpc_queue_local()

rxrpc_queue_local() attempts to queue the local endpoint it is given and
then, if successful, prints a trace line.  The trace line includes the
current usage count - but we're not allowed to look at the local endpoint
at this point as we passed our ref on it to the workqueue.

Fix this by reading the usage count before queuing the work item.

Also fix the reading of local->debug_id for trace lines, which must be done
with the same consideration as reading the usage count.

Fixes: 09d2bf595db4 ("rxrpc: Add a tracepoint to track rxrpc_local refcounting")
Reported-by: syzbot+78e71c5bab4f76a6a719@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h |  6 +++---
 net/rxrpc/local_object.c     | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index cc1d060cbf13..fa06b528c73c 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -498,10 +498,10 @@ rxrpc_tx_points;
 #define E_(a, b)	{ a, b }
 
 TRACE_EVENT(rxrpc_local,
-	    TP_PROTO(struct rxrpc_local *local, enum rxrpc_local_trace op,
+	    TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op,
 		     int usage, const void *where),
 
-	    TP_ARGS(local, op, usage, where),
+	    TP_ARGS(local_debug_id, op, usage, where),
 
 	    TP_STRUCT__entry(
 		    __field(unsigned int,	local		)
@@ -511,7 +511,7 @@ TRACE_EVENT(rxrpc_local,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->local = local->debug_id;
+		    __entry->local = local_debug_id;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->where = where;
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index c45765b7263e..72a6e12a9304 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -93,7 +93,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
 		local->debug_id = atomic_inc_return(&rxrpc_debug_id);
 		memcpy(&local->srx, srx, sizeof(*srx));
 		local->srx.srx_service = 0;
-		trace_rxrpc_local(local, rxrpc_local_new, 1, NULL);
+		trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL);
 	}
 
 	_leave(" = %p", local);
@@ -321,7 +321,7 @@ struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local)
 	int n;
 
 	n = atomic_inc_return(&local->usage);
-	trace_rxrpc_local(local, rxrpc_local_got, n, here);
+	trace_rxrpc_local(local->debug_id, rxrpc_local_got, n, here);
 	return local;
 }
 
@@ -335,7 +335,8 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
 	if (local) {
 		int n = atomic_fetch_add_unless(&local->usage, 1, 0);
 		if (n > 0)
-			trace_rxrpc_local(local, rxrpc_local_got, n + 1, here);
+			trace_rxrpc_local(local->debug_id, rxrpc_local_got,
+					  n + 1, here);
 		else
 			local = NULL;
 	}
@@ -343,16 +344,16 @@ struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local)
 }
 
 /*
- * Queue a local endpoint unless it has become unreferenced and pass the
- * caller's reference to the work item.
+ * Queue a local endpoint and pass the caller's reference to the work item.
  */
 void rxrpc_queue_local(struct rxrpc_local *local)
 {
 	const void *here = __builtin_return_address(0);
+	unsigned int debug_id = local->debug_id;
+	int n = atomic_read(&local->usage);
 
 	if (rxrpc_queue_work(&local->processor))
-		trace_rxrpc_local(local, rxrpc_local_queued,
-				  atomic_read(&local->usage), here);
+		trace_rxrpc_local(debug_id, rxrpc_local_queued, n, here);
 	else
 		rxrpc_put_local(local);
 }
@@ -367,7 +368,7 @@ void rxrpc_put_local(struct rxrpc_local *local)
 
 	if (local) {
 		n = atomic_dec_return(&local->usage);
-		trace_rxrpc_local(local, rxrpc_local_put, n, here);
+		trace_rxrpc_local(local->debug_id, rxrpc_local_put, n, here);
 
 		if (n == 0)
 			call_rcu(&local->rcu, rxrpc_local_rcu);
@@ -456,7 +457,7 @@ static void rxrpc_local_processor(struct work_struct *work)
 		container_of(work, struct rxrpc_local, processor);
 	bool again;
 
-	trace_rxrpc_local(local, rxrpc_local_processing,
+	trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
 			  atomic_read(&local->usage), NULL);
 
 	do {
-- 
cgit v1.2.3


From af80559b4d9cd481c63505edbe425ff6bad4ab8d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Fri, 9 Aug 2019 17:40:47 +0200
Subject: i2c: replace i2c_new_secondary_device with an ERR_PTR variant

In the general move to have i2c_new_*_device functions which return
ERR_PTR instead of NULL, this patch converts i2c_new_secondary_device().

There are only few users, so this patch converts the I2C core and all
users in one go. The function gets renamed to i2c_new_ancillary_device()
so out-of-tree users will get a build failure to understand they need to
adapt their error checking code.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com> # adv748x
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> # adv7511 + adv7604
Reviewed-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> # adv7604
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/gpu/drm/bridge/adv7511/adv7511_drv.c | 18 +++++++++---------
 drivers/i2c/i2c-core-base.c                  | 10 +++++-----
 drivers/media/i2c/adv748x/adv748x-core.c     |  6 +++---
 drivers/media/i2c/adv7604.c                  | 22 ++++++++++++----------
 include/linux/i2c.h                          |  2 +-
 5 files changed, 30 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index f6d2681f6927..9e13e466e72c 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -981,10 +981,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv)
 {
 	int ret;
 
-	adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec",
+	adv->i2c_cec = i2c_new_ancillary_device(adv->i2c_main, "cec",
 						ADV7511_CEC_I2C_ADDR_DEFAULT);
-	if (!adv->i2c_cec)
-		return -EINVAL;
+	if (IS_ERR(adv->i2c_cec))
+		return PTR_ERR(adv->i2c_cec);
 	i2c_set_clientdata(adv->i2c_cec, adv);
 
 	adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
@@ -1165,20 +1165,20 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
 	adv7511_packet_disable(adv7511, 0xffff);
 
-	adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid",
+	adv7511->i2c_edid = i2c_new_ancillary_device(i2c, "edid",
 					ADV7511_EDID_I2C_ADDR_DEFAULT);
-	if (!adv7511->i2c_edid) {
-		ret = -EINVAL;
+	if (IS_ERR(adv7511->i2c_edid)) {
+		ret = PTR_ERR(adv7511->i2c_edid);
 		goto uninit_regulators;
 	}
 
 	regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR,
 		     adv7511->i2c_edid->addr << 1);
 
-	adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet",
+	adv7511->i2c_packet = i2c_new_ancillary_device(i2c, "packet",
 					ADV7511_PACKET_I2C_ADDR_DEFAULT);
-	if (!adv7511->i2c_packet) {
-		ret = -EINVAL;
+	if (IS_ERR(adv7511->i2c_packet)) {
+		ret = PTR_ERR(adv7511->i2c_packet);
 		goto err_i2c_unregister_edid;
 	}
 
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index b4d84bd475da..ca10525e6d94 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -964,7 +964,7 @@ struct i2c_client *devm_i2c_new_dummy_device(struct device *dev,
 EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
 
 /**
- * i2c_new_secondary_device - Helper to get the instantiated secondary address
+ * i2c_new_ancillary_device - Helper to get the instantiated secondary address
  * and create the associated device
  * @client: Handle to the primary client
  * @name: Handle to specify which secondary address to get
@@ -983,9 +983,9 @@ EXPORT_SYMBOL_GPL(devm_i2c_new_dummy_device);
  * cell whose "reg-names" value matches the slave name.
  *
  * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
+ * i2c_unregister_device(); or an ERR_PTR to describe the error.
  */
-struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
+struct i2c_client *i2c_new_ancillary_device(struct i2c_client *client,
 						const char *name,
 						u16 default_addr)
 {
@@ -1000,9 +1000,9 @@ struct i2c_client *i2c_new_secondary_device(struct i2c_client *client,
 	}
 
 	dev_dbg(&client->adapter->dev, "Address for %s : 0x%x\n", name, addr);
-	return i2c_new_dummy(client->adapter, addr);
+	return i2c_new_dummy_device(client->adapter, addr);
 }
-EXPORT_SYMBOL_GPL(i2c_new_secondary_device);
+EXPORT_SYMBOL_GPL(i2c_new_ancillary_device);
 
 /* ------------------------------------------------------------------------- */
 
diff --git a/drivers/media/i2c/adv748x/adv748x-core.c b/drivers/media/i2c/adv748x/adv748x-core.c
index f57cd77a32fa..2567de2b0037 100644
--- a/drivers/media/i2c/adv748x/adv748x-core.c
+++ b/drivers/media/i2c/adv748x/adv748x-core.c
@@ -183,14 +183,14 @@ static int adv748x_initialise_clients(struct adv748x_state *state)
 	int ret;
 
 	for (i = ADV748X_PAGE_DPLL; i < ADV748X_PAGE_MAX; ++i) {
-		state->i2c_clients[i] = i2c_new_secondary_device(
+		state->i2c_clients[i] = i2c_new_ancillary_device(
 				state->client,
 				adv748x_default_addresses[i].name,
 				adv748x_default_addresses[i].default_addr);
 
-		if (state->i2c_clients[i] == NULL) {
+		if (IS_ERR(state->i2c_clients[i])) {
 			adv_err(state, "failed to create i2c client %u\n", i);
-			return -ENOMEM;
+			return PTR_ERR(state->i2c_clients[i]);
 		}
 
 		ret = adv748x_configure_regmap(state, i);
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index 28a84bf9f8a9..2dedd6ebb236 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -2862,10 +2862,8 @@ static void adv76xx_unregister_clients(struct adv76xx_state *state)
 {
 	unsigned int i;
 
-	for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i) {
-		if (state->i2c_clients[i])
-			i2c_unregister_device(state->i2c_clients[i]);
-	}
+	for (i = 1; i < ARRAY_SIZE(state->i2c_clients); ++i)
+		i2c_unregister_device(state->i2c_clients[i]);
 }
 
 static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
@@ -2878,14 +2876,14 @@ static struct i2c_client *adv76xx_dummy_client(struct v4l2_subdev *sd,
 	struct i2c_client *new_client;
 
 	if (pdata && pdata->i2c_addresses[page])
-		new_client = i2c_new_dummy(client->adapter,
+		new_client = i2c_new_dummy_device(client->adapter,
 					   pdata->i2c_addresses[page]);
 	else
-		new_client = i2c_new_secondary_device(client,
+		new_client = i2c_new_ancillary_device(client,
 				adv76xx_default_addresses[page].name,
 				adv76xx_default_addresses[page].default_addr);
 
-	if (new_client)
+	if (!IS_ERR(new_client))
 		io_write(sd, io_reg, new_client->addr << 1);
 
 	return new_client;
@@ -3516,15 +3514,19 @@ static int adv76xx_probe(struct i2c_client *client,
 	}
 
 	for (i = 1; i < ADV76XX_PAGE_MAX; ++i) {
+		struct i2c_client *dummy_client;
+
 		if (!(BIT(i) & state->info->page_mask))
 			continue;
 
-		state->i2c_clients[i] = adv76xx_dummy_client(sd, i);
-		if (!state->i2c_clients[i]) {
-			err = -EINVAL;
+		dummy_client = adv76xx_dummy_client(sd, i);
+		if (IS_ERR(dummy_client)) {
+			err = PTR_ERR(dummy_client);
 			v4l2_err(sd, "failed to create i2c client %u\n", i);
 			goto err_i2c;
 		}
+
+		state->i2c_clients[i] = dummy_client;
 	}
 
 	INIT_DELAYED_WORK(&state->delayed_work_enable_hotplug,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index fa5552c2307b..ebbe024dd9e0 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -473,7 +473,7 @@ extern struct i2c_client *
 devm_i2c_new_dummy_device(struct device *dev, struct i2c_adapter *adap, u16 address);
 
 extern struct i2c_client *
-i2c_new_secondary_device(struct i2c_client *client,
+i2c_new_ancillary_device(struct i2c_client *client,
 				const char *name,
 				u16 default_addr);
 
-- 
cgit v1.2.3


From 5bc7f990cd98f3f7977d62bfe75c50b4b33d13f6 Mon Sep 17 00:00:00 2001
From: Marek Behún <marek.behun@nic.cz>
Date: Mon, 12 Aug 2019 18:11:14 +0200
Subject: bus: Add support for Moxtet bus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On the Turris Mox router different modules can be connected to the main
CPU board: currently a module with a SFP cage, a module with MiniPCIe
connector, a PCIe pass-through MiniPCIe connector module, a 4-port
switch module, an 8-port switch module, and a 4-port USB3 module.

For example:
  [CPU]-[PCIe-pass-through]-[PCIe]-[8-port switch]-[8-port switch]-[SFP]

Each of this modules has an input and output shift register, and these
are connected via SPI to the CPU board.

Via SPI we are able to discover which modules are connected, in which
order, and we can also read some information about the modules (eg.
their interrupt status), and configure them.
From each module 8 bits can be read (of which low 4 bits identify the
module) and 8 bits can be written.

For example from the module with a SFP cage we can read the LOS,
TX-FAULT and MOD-DEF0 signals, while we can write TX-DISABLE and
RATE-SELECT signals.

This driver creates a new bus type, called "moxtet". For each Mox module
it finds via SPI, it creates a new device on the moxtet bus so that
drivers can be written for them.

It also implements a virtual interrupt controller for the modules which
send their interrupt status over the SPI shift register. These modules
do this in addition to sending their interrupt status via the shared
interrupt line. When the shared interrupt is triggered, we read from the
shift register and handle IRQs for all devices which are in interrupt.

The topology of how Mox modules are connected can then be read by
listing /sys/bus/moxtet/devices.

Link: https://lore.kernel.org/r/20190812161118.21476-2-marek.behun@nic.cz
Signed-off-by: Marek Behún <marek.behun@nic.cz>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS                      |   7 +
 drivers/bus/Kconfig              |  10 +
 drivers/bus/Makefile             |   1 +
 drivers/bus/moxtet.c             | 886 +++++++++++++++++++++++++++++++++++++++
 include/dt-bindings/bus/moxtet.h |  16 +
 include/linux/moxtet.h           | 109 +++++
 6 files changed, 1029 insertions(+)
 create mode 100644 drivers/bus/moxtet.c
 create mode 100644 include/dt-bindings/bus/moxtet.h
 create mode 100644 include/linux/moxtet.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 783569e3c4b4..01309a228593 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1626,6 +1626,13 @@ F:	drivers/clocksource/timer-atlas7.c
 N:	[^a-z]sirf
 X:	drivers/gnss
 
+ARM/CZ.NIC TURRIS MOX SUPPORT
+M:	Marek Behun <marek.behun@nic.cz>
+W:	http://mox.turris.cz
+S:	Maintained
+F:	include/linux/moxtet.h
+F:	drivers/bus/moxtet.c
+
 ARM/EBSA110 MACHINE SUPPORT
 M:	Russell King <linux@armlinux.org.uk>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 1851112ccc29..6b331061d34b 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -29,6 +29,16 @@ config BRCMSTB_GISB_ARB
 	  arbiter. This driver provides timeout and target abort error handling
 	  and internal bus master decoding.
 
+config MOXTET
+	tristate "CZ.NIC Turris Mox module configuration bus"
+	depends on SPI_MASTER && OF
+	help
+	  Say yes here to add support for the module configuration bus found
+	  on CZ.NIC's Turris Mox. This is needed for the ability to discover
+	  the order in which the modules are connected and to get/set some of
+	  their settings. For example the GPIOs on Mox SFP module are
+	  configured through this bus.
+
 config HISILICON_LPC
 	bool "Support for ISA I/O space on HiSilicon Hip06/7"
 	depends on ARM64 && (ARCH_HISI || COMPILE_TEST)
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index ca300b1914ce..16b43d3468c6 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_ARM_CCI)		+= arm-cci.o
 
 obj-$(CONFIG_HISILICON_LPC)	+= hisi_lpc.o
 obj-$(CONFIG_BRCMSTB_GISB_ARB)	+= brcmstb_gisb.o
+obj-$(CONFIG_MOXTET)		+= moxtet.o
 
 # DPAA2 fsl-mc bus
 obj-$(CONFIG_FSL_MC_BUS)	+= fsl-mc/
diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c
new file mode 100644
index 000000000000..1ee4570e7e17
--- /dev/null
+++ b/drivers/bus/moxtet.c
@@ -0,0 +1,886 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Turris Mox module configuration bus driver
+ *
+ * Copyright (C) 2019 Marek Behun <marek.behun@nic.cz>
+ */
+
+#include <dt-bindings/bus/moxtet.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moxtet.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/spi/spi.h>
+
+/*
+ * @name:	module name for sysfs
+ * @hwirq_base:	base index for IRQ for this module (-1 if no IRQs)
+ * @nirqs:	how many interrupts does the shift register provide
+ * @desc:	module description for kernel log
+ */
+static const struct {
+	const char *name;
+	int hwirq_base;
+	int nirqs;
+	const char *desc;
+} mox_module_table[] = {
+	/* do not change order of this array! */
+	{ NULL,		 0,			0, NULL },
+	{ "sfp",	-1,			0, "MOX D (SFP cage)" },
+	{ "pci",	MOXTET_IRQ_PCI,		1, "MOX B (Mini-PCIe)" },
+	{ "topaz",	MOXTET_IRQ_TOPAZ,	1, "MOX C (4 port switch)" },
+	{ "peridot",	MOXTET_IRQ_PERIDOT(0),	1, "MOX E (8 port switch)" },
+	{ "usb3",	MOXTET_IRQ_USB3,	2, "MOX F (USB 3.0)" },
+	{ "pci-bridge",	-1,			0, "MOX G (Mini-PCIe bridge)" },
+};
+
+static inline bool mox_module_known(unsigned int id)
+{
+	return id >= TURRIS_MOX_MODULE_FIRST && id <= TURRIS_MOX_MODULE_LAST;
+}
+
+static inline const char *mox_module_name(unsigned int id)
+{
+	if (mox_module_known(id))
+		return mox_module_table[id].name;
+	else
+		return "unknown";
+}
+
+#define DEF_MODULE_ATTR(name, fmt, ...)					\
+static ssize_t								\
+module_##name##_show(struct device *dev, struct device_attribute *a,	\
+		     char *buf)						\
+{									\
+	struct moxtet_device *mdev = to_moxtet_device(dev);		\
+	return sprintf(buf, (fmt), __VA_ARGS__);			\
+}									\
+static DEVICE_ATTR_RO(module_##name)
+
+DEF_MODULE_ATTR(id, "0x%x\n", mdev->id);
+DEF_MODULE_ATTR(name, "%s\n", mox_module_name(mdev->id));
+DEF_MODULE_ATTR(description, "%s\n",
+		mox_module_known(mdev->id) ? mox_module_table[mdev->id].desc
+					   : "");
+
+static struct attribute *moxtet_dev_attrs[] = {
+	&dev_attr_module_id.attr,
+	&dev_attr_module_name.attr,
+	&dev_attr_module_description.attr,
+	NULL,
+};
+
+static const struct attribute_group moxtet_dev_group = {
+	.attrs = moxtet_dev_attrs,
+};
+
+static const struct attribute_group *moxtet_dev_groups[] = {
+	&moxtet_dev_group,
+	NULL,
+};
+
+static int moxtet_match(struct device *dev, struct device_driver *drv)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+	struct moxtet_driver *tdrv = to_moxtet_driver(drv);
+	const enum turris_mox_module_id *t;
+
+	if (of_driver_match_device(dev, drv))
+		return 1;
+
+	if (!tdrv->id_table)
+		return 0;
+
+	for (t = tdrv->id_table; *t; ++t)
+		if (*t == mdev->id)
+			return 1;
+
+	return 0;
+}
+
+struct bus_type moxtet_bus_type = {
+	.name		= "moxtet",
+	.dev_groups	= moxtet_dev_groups,
+	.match		= moxtet_match,
+};
+EXPORT_SYMBOL_GPL(moxtet_bus_type);
+
+int __moxtet_register_driver(struct module *owner,
+			     struct moxtet_driver *mdrv)
+{
+	mdrv->driver.owner = owner;
+	mdrv->driver.bus = &moxtet_bus_type;
+	return driver_register(&mdrv->driver);
+}
+EXPORT_SYMBOL_GPL(__moxtet_register_driver);
+
+static int moxtet_dev_check(struct device *dev, void *data)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+	struct moxtet_device *new_dev = data;
+
+	if (mdev->moxtet == new_dev->moxtet && mdev->id == new_dev->id &&
+	    mdev->idx == new_dev->idx)
+		return -EBUSY;
+	return 0;
+}
+
+static void moxtet_dev_release(struct device *dev)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+
+	put_device(mdev->moxtet->dev);
+	kfree(mdev);
+}
+
+static struct moxtet_device *
+moxtet_alloc_device(struct moxtet *moxtet)
+{
+	struct moxtet_device *dev;
+
+	if (!get_device(moxtet->dev))
+		return NULL;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		put_device(moxtet->dev);
+		return NULL;
+	}
+
+	dev->moxtet = moxtet;
+	dev->dev.parent = moxtet->dev;
+	dev->dev.bus = &moxtet_bus_type;
+	dev->dev.release = moxtet_dev_release;
+
+	device_initialize(&dev->dev);
+
+	return dev;
+}
+
+static int moxtet_add_device(struct moxtet_device *dev)
+{
+	static DEFINE_MUTEX(add_mutex);
+	int ret;
+
+	if (dev->idx >= TURRIS_MOX_MAX_MODULES || dev->id > 0xf)
+		return -EINVAL;
+
+	dev_set_name(&dev->dev, "moxtet-%s.%u", mox_module_name(dev->id),
+		     dev->idx);
+
+	mutex_lock(&add_mutex);
+
+	ret = bus_for_each_dev(&moxtet_bus_type, NULL, dev,
+			       moxtet_dev_check);
+	if (ret)
+		goto done;
+
+	ret = device_add(&dev->dev);
+	if (ret < 0)
+		dev_err(dev->moxtet->dev, "can't add %s, status %d\n",
+			dev_name(dev->moxtet->dev), ret);
+
+done:
+	mutex_unlock(&add_mutex);
+	return ret;
+}
+
+static int __unregister(struct device *dev, void *null)
+{
+	if (dev->of_node) {
+		of_node_clear_flag(dev->of_node, OF_POPULATED);
+		of_node_put(dev->of_node);
+	}
+
+	device_unregister(dev);
+
+	return 0;
+}
+
+static struct moxtet_device *
+of_register_moxtet_device(struct moxtet *moxtet, struct device_node *nc)
+{
+	struct moxtet_device *dev;
+	u32 val;
+	int ret;
+
+	dev = moxtet_alloc_device(moxtet);
+	if (!dev) {
+		dev_err(moxtet->dev,
+			"Moxtet device alloc error for %pOF\n", nc);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = of_property_read_u32(nc, "reg", &val);
+	if (ret) {
+		dev_err(moxtet->dev, "%pOF has no valid 'reg' property (%d)\n",
+			nc, ret);
+		goto err_put;
+	}
+
+	dev->idx = val;
+
+	if (dev->idx >= TURRIS_MOX_MAX_MODULES) {
+		dev_err(moxtet->dev, "%pOF Moxtet address 0x%x out of range\n",
+			nc, dev->idx);
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	dev->id = moxtet->modules[dev->idx];
+
+	if (!dev->id) {
+		dev_err(moxtet->dev, "%pOF Moxtet address 0x%x is empty\n", nc,
+			dev->idx);
+		ret = -ENODEV;
+		goto err_put;
+	}
+
+	of_node_get(nc);
+	dev->dev.of_node = nc;
+
+	ret = moxtet_add_device(dev);
+	if (ret) {
+		dev_err(moxtet->dev,
+			"Moxtet device register error for %pOF\n", nc);
+		of_node_put(nc);
+		goto err_put;
+	}
+
+	return dev;
+
+err_put:
+	put_device(&dev->dev);
+	return ERR_PTR(ret);
+}
+
+static void of_register_moxtet_devices(struct moxtet *moxtet)
+{
+	struct moxtet_device *dev;
+	struct device_node *nc;
+
+	if (!moxtet->dev->of_node)
+		return;
+
+	for_each_available_child_of_node(moxtet->dev->of_node, nc) {
+		if (of_node_test_and_set_flag(nc, OF_POPULATED))
+			continue;
+		dev = of_register_moxtet_device(moxtet, nc);
+		if (IS_ERR(dev)) {
+			dev_warn(moxtet->dev,
+				 "Failed to create Moxtet device for %pOF\n",
+				 nc);
+			of_node_clear_flag(nc, OF_POPULATED);
+		}
+	}
+}
+
+static void
+moxtet_register_devices_from_topology(struct moxtet *moxtet)
+{
+	struct moxtet_device *dev;
+	int i, ret;
+
+	for (i = 0; i < moxtet->count; ++i) {
+		dev = moxtet_alloc_device(moxtet);
+		if (!dev) {
+			dev_err(moxtet->dev, "Moxtet device %u alloc error\n",
+				i);
+			continue;
+		}
+
+		dev->idx = i;
+		dev->id = moxtet->modules[i];
+
+		ret = moxtet_add_device(dev);
+		if (ret && ret != -EBUSY) {
+			put_device(&dev->dev);
+			dev_err(moxtet->dev,
+				"Moxtet device %u register error: %i\n", i,
+				ret);
+		}
+	}
+}
+
+/*
+ * @nsame:	how many modules with same id are already in moxtet->modules
+ */
+static int moxtet_set_irq(struct moxtet *moxtet, int idx, int id, int nsame)
+{
+	int i, first;
+	struct moxtet_irqpos *pos;
+
+	first = mox_module_table[id].hwirq_base +
+		nsame * mox_module_table[id].nirqs;
+
+	if (first + mox_module_table[id].nirqs > MOXTET_NIRQS)
+		return -EINVAL;
+
+	for (i = 0; i < mox_module_table[id].nirqs; ++i) {
+		pos = &moxtet->irq.position[first + i];
+		pos->idx = idx;
+		pos->bit = i;
+		moxtet->irq.exists |= BIT(first + i);
+	}
+
+	return 0;
+}
+
+static int moxtet_find_topology(struct moxtet *moxtet)
+{
+	u8 buf[TURRIS_MOX_MAX_MODULES];
+	int cnts[TURRIS_MOX_MODULE_LAST];
+	int i, ret;
+
+	memset(cnts, 0, sizeof(cnts));
+
+	ret = spi_read(to_spi_device(moxtet->dev), buf, TURRIS_MOX_MAX_MODULES);
+	if (ret < 0)
+		return ret;
+
+	if (buf[0] == TURRIS_MOX_CPU_ID_EMMC) {
+		dev_info(moxtet->dev, "Found MOX A (eMMC CPU) module\n");
+	} else if (buf[0] == TURRIS_MOX_CPU_ID_SD) {
+		dev_info(moxtet->dev, "Found MOX A (CPU) module\n");
+	} else {
+		dev_err(moxtet->dev, "Invalid Turris MOX A CPU module 0x%02x\n",
+			buf[0]);
+		return -ENODEV;
+	}
+
+	moxtet->count = 0;
+
+	for (i = 1; i < TURRIS_MOX_MAX_MODULES; ++i) {
+		int id;
+
+		if (buf[i] == 0xff)
+			break;
+
+		id = buf[i] & 0xf;
+
+		moxtet->modules[i-1] = id;
+		++moxtet->count;
+
+		if (mox_module_known(id)) {
+			dev_info(moxtet->dev, "Found %s module\n",
+				 mox_module_table[id].desc);
+
+			if (moxtet_set_irq(moxtet, i-1, id, cnts[id]++) < 0)
+				dev_err(moxtet->dev,
+					"  Cannot set IRQ for module %s\n",
+					mox_module_table[id].desc);
+		} else {
+			dev_warn(moxtet->dev,
+				 "Unknown Moxtet module found (ID 0x%02x)\n",
+				 id);
+		}
+	}
+
+	return 0;
+}
+
+static int moxtet_spi_read(struct moxtet *moxtet, u8 *buf)
+{
+	struct spi_transfer xfer = {
+		.rx_buf = buf,
+		.tx_buf = moxtet->tx,
+		.len = moxtet->count + 1
+	};
+	int ret;
+
+	mutex_lock(&moxtet->lock);
+
+	ret = spi_sync_transfer(to_spi_device(moxtet->dev), &xfer, 1);
+
+	mutex_unlock(&moxtet->lock);
+
+	return ret;
+}
+
+int moxtet_device_read(struct device *dev)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+	struct moxtet *moxtet = mdev->moxtet;
+	u8 buf[TURRIS_MOX_MAX_MODULES];
+	int ret;
+
+	if (mdev->idx >= moxtet->count)
+		return -EINVAL;
+
+	ret = moxtet_spi_read(moxtet, buf);
+	if (ret < 0)
+		return ret;
+
+	return buf[mdev->idx + 1] >> 4;
+}
+EXPORT_SYMBOL_GPL(moxtet_device_read);
+
+int moxtet_device_write(struct device *dev, u8 val)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+	struct moxtet *moxtet = mdev->moxtet;
+	int ret;
+
+	if (mdev->idx >= moxtet->count)
+		return -EINVAL;
+
+	mutex_lock(&moxtet->lock);
+
+	moxtet->tx[moxtet->count - mdev->idx] = val;
+
+	ret = spi_write(to_spi_device(moxtet->dev), moxtet->tx,
+			moxtet->count + 1);
+
+	mutex_unlock(&moxtet->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(moxtet_device_write);
+
+int moxtet_device_written(struct device *dev)
+{
+	struct moxtet_device *mdev = to_moxtet_device(dev);
+	struct moxtet *moxtet = mdev->moxtet;
+
+	if (mdev->idx >= moxtet->count)
+		return -EINVAL;
+
+	return moxtet->tx[moxtet->count - mdev->idx];
+}
+EXPORT_SYMBOL_GPL(moxtet_device_written);
+
+#ifdef CONFIG_DEBUG_FS
+static int moxtet_debug_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+
+	return nonseekable_open(inode, file);
+}
+
+static ssize_t input_read(struct file *file, char __user *buf, size_t len,
+			  loff_t *ppos)
+{
+	struct moxtet *moxtet = file->private_data;
+	u8 bin[TURRIS_MOX_MAX_MODULES];
+	u8 hex[sizeof(buf) * 2 + 1];
+	int ret, n;
+
+	ret = moxtet_spi_read(moxtet, bin);
+	if (ret < 0)
+		return ret;
+
+	n = moxtet->count + 1;
+	bin2hex(hex, bin, n);
+
+	hex[2*n] = '\n';
+
+	return simple_read_from_buffer(buf, len, ppos, hex, 2*n + 1);
+}
+
+static const struct file_operations input_fops = {
+	.owner	= THIS_MODULE,
+	.open	= moxtet_debug_open,
+	.read	= input_read,
+	.llseek	= no_llseek,
+};
+
+static ssize_t output_read(struct file *file, char __user *buf, size_t len,
+			   loff_t *ppos)
+{
+	struct moxtet *moxtet = file->private_data;
+	u8 hex[TURRIS_MOX_MAX_MODULES * 2 + 1];
+	u8 *p = hex;
+	int i;
+
+	mutex_lock(&moxtet->lock);
+
+	for (i = 0; i < moxtet->count; ++i)
+		p = hex_byte_pack(p, moxtet->tx[moxtet->count - i]);
+
+	mutex_unlock(&moxtet->lock);
+
+	*p++ = '\n';
+
+	return simple_read_from_buffer(buf, len, ppos, hex, p - hex);
+}
+
+static ssize_t output_write(struct file *file, const char __user *buf,
+			    size_t len, loff_t *ppos)
+{
+	struct moxtet *moxtet = file->private_data;
+	u8 bin[TURRIS_MOX_MAX_MODULES];
+	u8 hex[sizeof(bin) * 2 + 1];
+	size_t res;
+	loff_t dummy = 0;
+	int err, i;
+
+	if (len > 2 * moxtet->count + 1 || len < 2 * moxtet->count)
+		return -EINVAL;
+
+	res = simple_write_to_buffer(hex, sizeof(hex), &dummy, buf, len);
+	if (res < 0)
+		return res;
+
+	if (len % 2 == 1 && hex[len - 1] != '\n')
+		return -EINVAL;
+
+	err = hex2bin(bin, hex, moxtet->count);
+	if (err < 0)
+		return -EINVAL;
+
+	mutex_lock(&moxtet->lock);
+
+	for (i = 0; i < moxtet->count; ++i)
+		moxtet->tx[moxtet->count - i] = bin[i];
+
+	err = spi_write(to_spi_device(moxtet->dev), moxtet->tx,
+			moxtet->count + 1);
+
+	mutex_unlock(&moxtet->lock);
+
+	return err < 0 ? err : len;
+}
+
+static const struct file_operations output_fops = {
+	.owner	= THIS_MODULE,
+	.open	= moxtet_debug_open,
+	.read	= output_read,
+	.write	= output_write,
+	.llseek	= no_llseek,
+};
+
+static int moxtet_register_debugfs(struct moxtet *moxtet)
+{
+	struct dentry *root, *entry;
+
+	root = debugfs_create_dir("moxtet", NULL);
+
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	entry = debugfs_create_file_unsafe("input", 0444, root, moxtet,
+					   &input_fops);
+	if (IS_ERR(entry))
+		goto err_remove;
+
+	entry = debugfs_create_file_unsafe("output", 0644, root, moxtet,
+					   &output_fops);
+	if (IS_ERR(entry))
+		goto err_remove;
+
+	moxtet->debugfs_root = root;
+
+	return 0;
+err_remove:
+	debugfs_remove_recursive(root);
+	return PTR_ERR(entry);
+}
+
+static void moxtet_unregister_debugfs(struct moxtet *moxtet)
+{
+	debugfs_remove_recursive(moxtet->debugfs_root);
+}
+#else
+static inline int moxtet_register_debugfs(struct moxtet *moxtet)
+{
+	return 0;
+}
+
+static inline void moxtet_unregister_debugfs(struct moxtet *moxtet)
+{
+}
+#endif
+
+static int moxtet_irq_domain_map(struct irq_domain *d, unsigned int irq,
+				 irq_hw_number_t hw)
+{
+	struct moxtet *moxtet = d->host_data;
+
+	if (hw >= MOXTET_NIRQS || !(moxtet->irq.exists & BIT(hw))) {
+		dev_err(moxtet->dev, "Invalid hw irq number\n");
+		return -EINVAL;
+	}
+
+	irq_set_chip_data(irq, d->host_data);
+	irq_set_chip_and_handler(irq, &moxtet->irq.chip, handle_level_irq);
+
+	return 0;
+}
+
+static int moxtet_irq_domain_xlate(struct irq_domain *d,
+				   struct device_node *ctrlr,
+				   const u32 *intspec, unsigned int intsize,
+				   unsigned long *out_hwirq,
+				   unsigned int *out_type)
+{
+	struct moxtet *moxtet = d->host_data;
+	int irq;
+
+	if (WARN_ON(intsize < 1))
+		return -EINVAL;
+
+	irq = intspec[0];
+
+	if (irq >= MOXTET_NIRQS || !(moxtet->irq.exists & BIT(irq)))
+		return -EINVAL;
+
+	*out_hwirq = irq;
+	*out_type = IRQ_TYPE_NONE;
+	return 0;
+}
+
+static const struct irq_domain_ops moxtet_irq_domain = {
+	.map = moxtet_irq_domain_map,
+	.xlate = moxtet_irq_domain_xlate,
+};
+
+static void moxtet_irq_mask(struct irq_data *d)
+{
+	struct moxtet *moxtet = irq_data_get_irq_chip_data(d);
+
+	moxtet->irq.masked |= BIT(d->hwirq);
+}
+
+static void moxtet_irq_unmask(struct irq_data *d)
+{
+	struct moxtet *moxtet = irq_data_get_irq_chip_data(d);
+
+	moxtet->irq.masked &= ~BIT(d->hwirq);
+}
+
+static void moxtet_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct moxtet *moxtet = irq_data_get_irq_chip_data(d);
+	struct moxtet_irqpos *pos = &moxtet->irq.position[d->hwirq];
+	int id;
+
+	id = moxtet->modules[pos->idx];
+
+	seq_printf(p, " moxtet-%s.%i#%i", mox_module_name(id), pos->idx,
+		   pos->bit);
+}
+
+static const struct irq_chip moxtet_irq_chip = {
+	.name			= "moxtet",
+	.irq_mask		= moxtet_irq_mask,
+	.irq_unmask		= moxtet_irq_unmask,
+	.irq_print_chip		= moxtet_irq_print_chip,
+};
+
+static int moxtet_irq_read(struct moxtet *moxtet, unsigned long *map)
+{
+	struct moxtet_irqpos *pos = moxtet->irq.position;
+	u8 buf[TURRIS_MOX_MAX_MODULES];
+	int i, ret;
+
+	ret = moxtet_spi_read(moxtet, buf);
+	if (ret < 0)
+		return ret;
+
+	*map = 0;
+
+	for_each_set_bit(i, &moxtet->irq.exists, MOXTET_NIRQS) {
+		if (!(buf[pos[i].idx + 1] & BIT(4 + pos[i].bit)))
+			set_bit(i, map);
+	}
+
+	return 0;
+}
+
+static irqreturn_t moxtet_irq_thread_fn(int irq, void *data)
+{
+	struct moxtet *moxtet = data;
+	unsigned long set;
+	int nhandled = 0, i, sub_irq, ret;
+
+	ret = moxtet_irq_read(moxtet, &set);
+	if (ret < 0)
+		goto out;
+
+	set &= ~moxtet->irq.masked;
+
+	do {
+		for_each_set_bit(i, &set, MOXTET_NIRQS) {
+			sub_irq = irq_find_mapping(moxtet->irq.domain, i);
+			handle_nested_irq(sub_irq);
+			dev_dbg(moxtet->dev, "%i irq\n", i);
+			++nhandled;
+		}
+
+		ret = moxtet_irq_read(moxtet, &set);
+		if (ret < 0)
+			goto out;
+
+		set &= ~moxtet->irq.masked;
+	} while (set);
+
+out:
+	return (nhandled > 0 ? IRQ_HANDLED : IRQ_NONE);
+}
+
+static void moxtet_irq_free(struct moxtet *moxtet)
+{
+	int i, irq;
+
+	for (i = 0; i < MOXTET_NIRQS; ++i) {
+		if (moxtet->irq.exists & BIT(i)) {
+			irq = irq_find_mapping(moxtet->irq.domain, i);
+			irq_dispose_mapping(irq);
+		}
+	}
+
+	irq_domain_remove(moxtet->irq.domain);
+}
+
+static int moxtet_irq_setup(struct moxtet *moxtet)
+{
+	int i, ret;
+
+	moxtet->irq.domain = irq_domain_add_simple(moxtet->dev->of_node,
+						   MOXTET_NIRQS, 0,
+						   &moxtet_irq_domain, moxtet);
+	if (moxtet->irq.domain == NULL) {
+		dev_err(moxtet->dev, "Could not add IRQ domain\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < MOXTET_NIRQS; ++i)
+		if (moxtet->irq.exists & BIT(i))
+			irq_create_mapping(moxtet->irq.domain, i);
+
+	moxtet->irq.chip = moxtet_irq_chip;
+	moxtet->irq.masked = ~0;
+
+	ret = request_threaded_irq(moxtet->dev_irq, NULL, moxtet_irq_thread_fn,
+				   IRQF_ONESHOT, "moxtet", moxtet);
+	if (ret < 0)
+		goto err_free;
+
+	return 0;
+
+err_free:
+	moxtet_irq_free(moxtet);
+	return ret;
+}
+
+static int moxtet_probe(struct spi_device *spi)
+{
+	struct moxtet *moxtet;
+	int ret;
+
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	moxtet = devm_kzalloc(&spi->dev, sizeof(struct moxtet),
+			      GFP_KERNEL);
+	if (!moxtet)
+		return -ENOMEM;
+
+	moxtet->dev = &spi->dev;
+	spi_set_drvdata(spi, moxtet);
+
+	mutex_init(&moxtet->lock);
+
+	moxtet->dev_irq = of_irq_get(moxtet->dev->of_node, 0);
+	if (moxtet->dev_irq == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
+	if (moxtet->dev_irq <= 0) {
+		dev_err(moxtet->dev, "No IRQ resource found\n");
+		return -ENXIO;
+	}
+
+	ret = moxtet_find_topology(moxtet);
+	if (ret < 0)
+		return ret;
+
+	if (moxtet->irq.exists) {
+		ret = moxtet_irq_setup(moxtet);
+		if (ret < 0)
+			return ret;
+	}
+
+	of_register_moxtet_devices(moxtet);
+	moxtet_register_devices_from_topology(moxtet);
+
+	ret = moxtet_register_debugfs(moxtet);
+	if (ret < 0)
+		dev_warn(moxtet->dev, "Failed creating debugfs entries: %i\n",
+			 ret);
+
+	return 0;
+}
+
+static int moxtet_remove(struct spi_device *spi)
+{
+	struct moxtet *moxtet = spi_get_drvdata(spi);
+	int dummy;
+
+	free_irq(moxtet->dev_irq, moxtet);
+
+	moxtet_irq_free(moxtet);
+
+	moxtet_unregister_debugfs(moxtet);
+
+	dummy = device_for_each_child(moxtet->dev, NULL, __unregister);
+
+	mutex_destroy(&moxtet->lock);
+
+	return 0;
+}
+
+static const struct of_device_id moxtet_dt_ids[] = {
+	{ .compatible = "cznic,moxtet" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, moxtet_dt_ids);
+
+static struct spi_driver moxtet_spi_driver = {
+	.driver = {
+		.name		= "moxtet",
+		.of_match_table = moxtet_dt_ids,
+	},
+	.probe		= moxtet_probe,
+	.remove		= moxtet_remove,
+};
+
+static int __init moxtet_init(void)
+{
+	int ret;
+
+	ret = bus_register(&moxtet_bus_type);
+	if (ret < 0) {
+		pr_err("moxtet bus registration failed: %d\n", ret);
+		goto error;
+	}
+
+	ret = spi_register_driver(&moxtet_spi_driver);
+	if (ret < 0) {
+		pr_err("moxtet spi driver registration failed: %d\n", ret);
+		goto error_bus;
+	}
+
+	return 0;
+
+error_bus:
+	bus_unregister(&moxtet_bus_type);
+error:
+	return ret;
+}
+postcore_initcall_sync(moxtet_init);
+
+static void __exit moxtet_exit(void)
+{
+	spi_unregister_driver(&moxtet_spi_driver);
+	bus_unregister(&moxtet_bus_type);
+}
+module_exit(moxtet_exit);
+
+MODULE_AUTHOR("Marek Behun <marek.behun@nic.cz>");
+MODULE_DESCRIPTION("CZ.NIC's Turris Mox module configuration bus");
+MODULE_LICENSE("GPL v2");
diff --git a/include/dt-bindings/bus/moxtet.h b/include/dt-bindings/bus/moxtet.h
new file mode 100644
index 000000000000..dc9345440ebe
--- /dev/null
+++ b/include/dt-bindings/bus/moxtet.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Constant for device tree bindings for Turris Mox module configuration bus
+ *
+ * Copyright (C) 2019 Marek Behun <marek.behun@nic.cz>
+ */
+
+#ifndef _DT_BINDINGS_BUS_MOXTET_H
+#define _DT_BINDINGS_BUS_MOXTET_H
+
+#define MOXTET_IRQ_PCI		0
+#define MOXTET_IRQ_USB3		4
+#define MOXTET_IRQ_PERIDOT(n)	(8 + (n))
+#define MOXTET_IRQ_TOPAZ	12
+
+#endif /* _DT_BINDINGS_BUS_MOXTET_H */
diff --git a/include/linux/moxtet.h b/include/linux/moxtet.h
new file mode 100644
index 000000000000..490db6886dcc
--- /dev/null
+++ b/include/linux/moxtet.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Turris Mox module configuration bus driver
+ *
+ * Copyright (C) 2019 Marek Behun <marek.behun@nic.cz>
+ */
+
+#ifndef __LINUX_MOXTET_H
+#define __LINUX_MOXTET_H
+
+#include <linux/device.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/mutex.h>
+
+#define TURRIS_MOX_MAX_MODULES	10
+
+enum turris_mox_cpu_module_id {
+	TURRIS_MOX_CPU_ID_EMMC	= 0x00,
+	TURRIS_MOX_CPU_ID_SD	= 0x10,
+};
+
+enum turris_mox_module_id {
+	TURRIS_MOX_MODULE_FIRST		= 0x01,
+
+	TURRIS_MOX_MODULE_SFP		= 0x01,
+	TURRIS_MOX_MODULE_PCI		= 0x02,
+	TURRIS_MOX_MODULE_TOPAZ		= 0x03,
+	TURRIS_MOX_MODULE_PERIDOT	= 0x04,
+	TURRIS_MOX_MODULE_USB3		= 0x05,
+	TURRIS_MOX_MODULE_PCI_BRIDGE	= 0x06,
+
+	TURRIS_MOX_MODULE_LAST		= 0x06,
+};
+
+#define MOXTET_NIRQS	16
+
+extern struct bus_type moxtet_type;
+
+struct moxtet {
+	struct device			*dev;
+	struct mutex			lock;
+	u8				modules[TURRIS_MOX_MAX_MODULES];
+	int				count;
+	u8				tx[TURRIS_MOX_MAX_MODULES];
+	int				dev_irq;
+	struct {
+		struct irq_domain	*domain;
+		struct irq_chip		chip;
+		unsigned long		masked, exists;
+		struct moxtet_irqpos {
+					u8 idx;
+					u8 bit;
+		} position[MOXTET_NIRQS];
+	} irq;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry			*debugfs_root;
+#endif
+};
+
+struct moxtet_driver {
+	const enum turris_mox_module_id	*id_table;
+	struct device_driver		driver;
+};
+
+static inline struct moxtet_driver *
+to_moxtet_driver(struct device_driver *drv)
+{
+	if (!drv)
+		return NULL;
+	return container_of(drv, struct moxtet_driver, driver);
+}
+
+extern int __moxtet_register_driver(struct module *owner,
+				    struct moxtet_driver *mdrv);
+
+static inline void moxtet_unregister_driver(struct moxtet_driver *mdrv)
+{
+	if (mdrv)
+		driver_unregister(&mdrv->driver);
+}
+
+#define moxtet_register_driver(driver) \
+	__moxtet_register_driver(THIS_MODULE, driver)
+
+#define module_moxtet_driver(__moxtet_driver) \
+	module_driver(__moxtet_driver, moxtet_register_driver, \
+			moxtet_unregister_driver)
+
+struct moxtet_device {
+	struct device			dev;
+	struct moxtet			*moxtet;
+	enum turris_mox_module_id	id;
+	unsigned int			idx;
+};
+
+extern int moxtet_device_read(struct device *dev);
+extern int moxtet_device_write(struct device *dev, u8 val);
+extern int moxtet_device_written(struct device *dev);
+
+static inline struct moxtet_device *
+to_moxtet_device(struct device *dev)
+{
+	if (!dev)
+		return NULL;
+	return container_of(dev, struct moxtet_device, dev);
+}
+
+#endif /* __LINUX_MOXTET_H */
-- 
cgit v1.2.3


From aad7ad2a01e774f5cd0dde8f0bc6e253ae3f0d35 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 18:33:18 +0200
Subject: dma: iop-adma: allow building without platform headers

Now that iop3xx and iop13xx are gone, the iop-adma driver no
longer needs to deal with incompatible register layout defined
in machine specific header files.

Move the iop32x specific definitions into drivers/dma/iop-adma.h
and the platform_data into include/linux/platform_data/dma-iop32x.h,
and change the machine code to no longer reference those.

The DMA0_ID/DMA1_ID/AAU_ID macros are required as part of the
platform data interface and still need to be visible, so move
those from one header to the other.

Link: https://lore.kernel.org/r/20190809163334.489360-4-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/include/asm/hardware/iop3xx-adma.h | 919 ----------------------------
 arch/arm/include/asm/hardware/iop_adma.h    | 106 ----
 arch/arm/mach-iop32x/include/mach/adma.h    |   6 -
 arch/arm/plat-iop/adma.c                    |   3 +-
 drivers/dma/Kconfig                         |   4 +-
 drivers/dma/iop-adma.c                      |   3 +-
 drivers/dma/iop-adma.h                      | 914 +++++++++++++++++++++++++++
 include/linux/platform_data/dma-iop32x.h    | 110 ++++
 8 files changed, 1028 insertions(+), 1037 deletions(-)
 delete mode 100644 arch/arm/include/asm/hardware/iop3xx-adma.h
 delete mode 100644 arch/arm/include/asm/hardware/iop_adma.h
 delete mode 100644 arch/arm/mach-iop32x/include/mach/adma.h
 create mode 100644 drivers/dma/iop-adma.h
 create mode 100644 include/linux/platform_data/dma-iop32x.h

(limited to 'include')

diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
deleted file mode 100644
index 6d998df17efd..000000000000
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ /dev/null
@@ -1,919 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright © 2006, Intel Corporation.
- */
-#ifndef _ADMA_H
-#define _ADMA_H
-#include <linux/types.h>
-#include <linux/io.h>
-#include <mach/hardware.h>
-#include <asm/hardware/iop_adma.h>
-
-/* Memory copy units */
-#define DMA_CCR(chan)		(chan->mmr_base + 0x0)
-#define DMA_CSR(chan)		(chan->mmr_base + 0x4)
-#define DMA_DAR(chan)		(chan->mmr_base + 0xc)
-#define DMA_NDAR(chan)		(chan->mmr_base + 0x10)
-#define DMA_PADR(chan)		(chan->mmr_base + 0x14)
-#define DMA_PUADR(chan)	(chan->mmr_base + 0x18)
-#define DMA_LADR(chan)		(chan->mmr_base + 0x1c)
-#define DMA_BCR(chan)		(chan->mmr_base + 0x20)
-#define DMA_DCR(chan)		(chan->mmr_base + 0x24)
-
-/* Application accelerator unit  */
-#define AAU_ACR(chan)		(chan->mmr_base + 0x0)
-#define AAU_ASR(chan)		(chan->mmr_base + 0x4)
-#define AAU_ADAR(chan)		(chan->mmr_base + 0x8)
-#define AAU_ANDAR(chan)	(chan->mmr_base + 0xc)
-#define AAU_SAR(src, chan)	(chan->mmr_base + (0x10 + ((src) << 2)))
-#define AAU_DAR(chan)		(chan->mmr_base + 0x20)
-#define AAU_ABCR(chan)		(chan->mmr_base + 0x24)
-#define AAU_ADCR(chan)		(chan->mmr_base + 0x28)
-#define AAU_SAR_EDCR(src_edc)	(chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
-#define AAU_EDCR0_IDX	8
-#define AAU_EDCR1_IDX	17
-#define AAU_EDCR2_IDX	26
-
-#define DMA0_ID 0
-#define DMA1_ID 1
-#define AAU_ID 2
-
-struct iop3xx_aau_desc_ctrl {
-	unsigned int int_en:1;
-	unsigned int blk1_cmd_ctrl:3;
-	unsigned int blk2_cmd_ctrl:3;
-	unsigned int blk3_cmd_ctrl:3;
-	unsigned int blk4_cmd_ctrl:3;
-	unsigned int blk5_cmd_ctrl:3;
-	unsigned int blk6_cmd_ctrl:3;
-	unsigned int blk7_cmd_ctrl:3;
-	unsigned int blk8_cmd_ctrl:3;
-	unsigned int blk_ctrl:2;
-	unsigned int dual_xor_en:1;
-	unsigned int tx_complete:1;
-	unsigned int zero_result_err:1;
-	unsigned int zero_result_en:1;
-	unsigned int dest_write_en:1;
-};
-
-struct iop3xx_aau_e_desc_ctrl {
-	unsigned int reserved:1;
-	unsigned int blk1_cmd_ctrl:3;
-	unsigned int blk2_cmd_ctrl:3;
-	unsigned int blk3_cmd_ctrl:3;
-	unsigned int blk4_cmd_ctrl:3;
-	unsigned int blk5_cmd_ctrl:3;
-	unsigned int blk6_cmd_ctrl:3;
-	unsigned int blk7_cmd_ctrl:3;
-	unsigned int blk8_cmd_ctrl:3;
-	unsigned int reserved2:7;
-};
-
-struct iop3xx_dma_desc_ctrl {
-	unsigned int pci_transaction:4;
-	unsigned int int_en:1;
-	unsigned int dac_cycle_en:1;
-	unsigned int mem_to_mem_en:1;
-	unsigned int crc_data_tx_en:1;
-	unsigned int crc_gen_en:1;
-	unsigned int crc_seed_dis:1;
-	unsigned int reserved:21;
-	unsigned int crc_tx_complete:1;
-};
-
-struct iop3xx_desc_dma {
-	u32 next_desc;
-	union {
-		u32 pci_src_addr;
-		u32 pci_dest_addr;
-		u32 src_addr;
-	};
-	union {
-		u32 upper_pci_src_addr;
-		u32 upper_pci_dest_addr;
-	};
-	union {
-		u32 local_pci_src_addr;
-		u32 local_pci_dest_addr;
-		u32 dest_addr;
-	};
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_dma_desc_ctrl desc_ctrl_field;
-	};
-	u32 crc_addr;
-};
-
-struct iop3xx_desc_aau {
-	u32 next_desc;
-	u32 src[4];
-	u32 dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	union {
-		u32 src_addr;
-		u32 e_desc_ctrl;
-		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
-	} src_edc[31];
-};
-
-struct iop3xx_aau_gfmr {
-	unsigned int gfmr1:8;
-	unsigned int gfmr2:8;
-	unsigned int gfmr3:8;
-	unsigned int gfmr4:8;
-};
-
-struct iop3xx_desc_pq_xor {
-	u32 next_desc;
-	u32 src[3];
-	union {
-		u32 data_mult1;
-		struct iop3xx_aau_gfmr data_mult1_field;
-	};
-	u32 dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	union {
-		u32 src_addr;
-		u32 e_desc_ctrl;
-		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
-		u32 data_multiplier;
-		struct iop3xx_aau_gfmr data_mult_field;
-		u32 reserved;
-	} src_edc_gfmr[19];
-};
-
-struct iop3xx_desc_dual_xor {
-	u32 next_desc;
-	u32 src0_addr;
-	u32 src1_addr;
-	u32 h_src_addr;
-	u32 d_src_addr;
-	u32 h_dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	u32 d_dest_addr;
-};
-
-union iop3xx_desc {
-	struct iop3xx_desc_aau *aau;
-	struct iop3xx_desc_dma *dma;
-	struct iop3xx_desc_pq_xor *pq_xor;
-	struct iop3xx_desc_dual_xor *dual_xor;
-	void *ptr;
-};
-
-/* No support for p+q operations */
-static inline int
-iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
-		  unsigned long flags)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
-			 dma_addr_t addr, unsigned char coef)
-{
-	BUG();
-}
-
-static inline int
-iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
-			  unsigned long flags)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
-{
-	BUG();
-}
-
-#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
-
-static inline void
-iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
-			      dma_addr_t *src)
-{
-	BUG();
-}
-
-static inline int iop_adma_get_max_xor(void)
-{
-	return 32;
-}
-
-static inline int iop_adma_get_max_pq(void)
-{
-	BUG();
-	return 0;
-}
-
-static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
-{
-	int id = chan->device->id;
-
-	switch (id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return __raw_readl(DMA_DAR(chan));
-	case AAU_ID:
-		return __raw_readl(AAU_ADAR(chan));
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
-						u32 next_desc_addr)
-{
-	int id = chan->device->id;
-
-	switch (id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		__raw_writel(next_desc_addr, DMA_NDAR(chan));
-		break;
-	case AAU_ID:
-		__raw_writel(next_desc_addr, AAU_ANDAR(chan));
-		break;
-	}
-
-}
-
-#define IOP_ADMA_STATUS_BUSY (1 << 10)
-#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
-#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
-#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
-
-static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
-}
-
-static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
-					int num_slots)
-{
-	/* num_slots will only ever be 1, 2, 4, or 8 */
-	return (desc->idx & (num_slots - 1)) ? 0 : 1;
-}
-
-/* to do: support large (i.e. > hw max) buffer sizes */
-static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
-{
-	*slots_per_op = 1;
-	return 1;
-}
-
-/* to do: support large (i.e. > hw max) buffer sizes */
-static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
-{
-	*slots_per_op = 1;
-	return 1;
-}
-
-static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
-					int *slots_per_op)
-{
-	static const char slot_count_table[] = {
-						1, 1, 1, 1, /* 01 - 04 */
-						2, 2, 2, 2, /* 05 - 08 */
-						4, 4, 4, 4, /* 09 - 12 */
-						4, 4, 4, 4, /* 13 - 16 */
-						8, 8, 8, 8, /* 17 - 20 */
-						8, 8, 8, 8, /* 21 - 24 */
-						8, 8, 8, 8, /* 25 - 28 */
-						8, 8, 8, 8, /* 29 - 32 */
-					      };
-	*slots_per_op = slot_count_table[src_cnt - 1];
-	return *slots_per_op;
-}
-
-static inline int
-iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return iop_chan_memcpy_slot_count(0, slots_per_op);
-	case AAU_ID:
-		return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
-						int *slots_per_op)
-{
-	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
-
-	if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
-		return slot_cnt;
-
-	len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
-	while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
-		len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
-		slot_cnt += *slots_per_op;
-	}
-
-	slot_cnt += *slots_per_op;
-
-	return slot_cnt;
-}
-
-/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
- * descriptors
- */
-static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
-						int *slots_per_op)
-{
-	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
-
-	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
-		return slot_cnt;
-
-	len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-	while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-		slot_cnt += *slots_per_op;
-	}
-
-	slot_cnt += *slots_per_op;
-
-	return slot_cnt;
-}
-
-static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->byte_count;
-	case AAU_ID:
-		return hw_desc.aau->byte_count;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/* translate the src_idx to a descriptor word index */
-static inline int __desc_idx(int src_idx)
-{
-	static const int desc_idx_table[] = { 0, 0, 0, 0,
-					      0, 1, 2, 3,
-					      5, 6, 7, 8,
-					      9, 10, 11, 12,
-					      14, 15, 16, 17,
-					      18, 19, 20, 21,
-					      23, 24, 25, 26,
-					      27, 28, 29, 30,
-					    };
-
-	return desc_idx_table[src_idx];
-}
-
-static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					int src_idx)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->src_addr;
-	case AAU_ID:
-		break;
-	default:
-		BUG();
-	}
-
-	if (src_idx < 4)
-		return hw_desc.aau->src[src_idx];
-	else
-		return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
-}
-
-static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
-					int src_idx, dma_addr_t addr)
-{
-	if (src_idx < 4)
-		hw_desc->src[src_idx] = addr;
-	else
-		hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
-}
-
-static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
-{
-	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_dma_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	u_desc_ctrl.field.mem_to_mem_en = 1;
-	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-	hw_desc->upper_pci_src_addr = 0;
-	hw_desc->crc_addr = 0;
-}
-
-static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
-	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-}
-
-static inline u32
-iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
-		     unsigned long flags)
-{
-	int i, shift;
-	u32 edcr;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	switch (src_cnt) {
-	case 25 ... 32:
-		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		edcr = 0;
-		shift = 1;
-		for (i = 24; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
-		src_cnt = 24;
-		/* fall through */
-	case 17 ... 24:
-		if (!u_desc_ctrl.field.blk_ctrl) {
-			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		}
-		edcr = 0;
-		shift = 1;
-		for (i = 16; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
-		src_cnt = 16;
-		/* fall through */
-	case 9 ... 16:
-		if (!u_desc_ctrl.field.blk_ctrl)
-			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
-		edcr = 0;
-		shift = 1;
-		for (i = 8; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
-		src_cnt = 8;
-		/* fall through */
-	case 2 ... 8:
-		shift = 1;
-		for (i = 0; i < src_cnt; i++) {
-			u_desc_ctrl.value |= (1 << shift);
-			shift += 3;
-		}
-
-		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
-			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
-	}
-
-	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-
-	return u_desc_ctrl.value;
-}
-
-static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
-		  unsigned long flags)
-{
-	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
-}
-
-/* return the number of operations */
-static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
-		       unsigned long flags)
-{
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-	int i, j;
-
-	hw_desc = desc->hw_desc;
-
-	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, j++) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
-		u_desc_ctrl.field.dest_write_en = 0;
-		u_desc_ctrl.field.zero_result_en = 1;
-		u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-		iter->desc_ctrl = u_desc_ctrl.value;
-
-		/* for the subsequent descriptors preserve the store queue
-		 * and chain them together
-		 */
-		if (i) {
-			prev_hw_desc =
-				iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
-			prev_hw_desc->next_desc =
-				(u32) (desc->async_tx.phys + (i << 5));
-		}
-	}
-
-	return j;
-}
-
-static inline void
-iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
-		       unsigned long flags)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	switch (src_cnt) {
-	case 25 ... 32:
-		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-		/* fall through */
-	case 17 ... 24:
-		if (!u_desc_ctrl.field.blk_ctrl) {
-			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		}
-		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
-		/* fall through */
-	case 9 ... 16:
-		if (!u_desc_ctrl.field.blk_ctrl)
-			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
-		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
-		/* fall through */
-	case 1 ... 8:
-		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
-			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
-	}
-
-	u_desc_ctrl.field.dest_write_en = 0;
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-}
-
-static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					u32 byte_count)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		hw_desc.dma->byte_count = byte_count;
-		break;
-	case AAU_ID:
-		hw_desc.aau->byte_count = byte_count;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void
-iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
-			struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		iop_desc_init_memcpy(desc, 1);
-		hw_desc.dma->byte_count = 0;
-		hw_desc.dma->dest_addr = 0;
-		hw_desc.dma->src_addr = 0;
-		break;
-	case AAU_ID:
-		iop_desc_init_null_xor(desc, 2, 1);
-		hw_desc.aau->byte_count = 0;
-		hw_desc.aau->dest_addr = 0;
-		hw_desc.aau->src[0] = 0;
-		hw_desc.aau->src[1] = 0;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void
-iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
-{
-	int slots_per_op = desc->slots_per_op;
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int i = 0;
-
-	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		hw_desc->byte_count = len;
-	} else {
-		do {
-			iter = iop_hw_desc_slot_idx(hw_desc, i);
-			iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-			len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-			i += slots_per_op;
-		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
-
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iter->byte_count = len;
-	}
-}
-
-static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					dma_addr_t addr)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		hw_desc.dma->dest_addr = addr;
-		break;
-	case AAU_ID:
-		hw_desc.aau->dest_addr = addr;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
-					dma_addr_t addr)
-{
-	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
-	hw_desc->src_addr = addr;
-}
-
-static inline void
-iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
-				dma_addr_t addr)
-{
-
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	int i;
-
-	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
-	}
-}
-
-static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
-					int src_idx, dma_addr_t addr)
-{
-
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	int i;
-
-	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
-	}
-}
-
-static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
-					u32 next_desc_addr)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	iop_paranoia(hw_desc.dma->next_desc);
-	hw_desc.dma->next_desc = next_desc_addr;
-}
-
-static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-	return hw_desc.dma->next_desc;
-}
-
-static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-	hw_desc.dma->next_desc = 0;
-}
-
-static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
-						u32 val)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	hw_desc->src[0] = val;
-}
-
-static inline enum sum_check_flags
-iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
-
-	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-	return desc_ctrl.zero_result_err << SUM_CHECK_P;
-}
-
-static inline void iop_chan_append(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl;
-
-	dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-	dma_chan_ctrl |= 0x2;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
-{
-	return __raw_readl(DMA_CSR(chan));
-}
-
-static inline void iop_chan_disable(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-	dma_chan_ctrl &= ~1;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline void iop_chan_enable(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-
-	dma_chan_ctrl |= 1;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	status &= (1 << 9);
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	status &= (1 << 8);
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
-		break;
-	case AAU_ID:
-		status &= (1 << 5);
-		break;
-	default:
-		BUG();
-	}
-
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline int
-iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return test_bit(5, &status);
-}
-
-static inline int
-iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(2, &status);
-	default:
-		return 0;
-	}
-}
-
-static inline int
-iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(3, &status);
-	default:
-		return 0;
-	}
-}
-
-static inline int
-iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(1, &status);
-	default:
-		return 0;
-	}
-}
-#endif /* _ADMA_H */
diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h
deleted file mode 100644
index bcedbab90ac0..000000000000
--- a/arch/arm/include/asm/hardware/iop_adma.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright © 2006, Intel Corporation.
- */
-#ifndef IOP_ADMA_H
-#define IOP_ADMA_H
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-#include <linux/interrupt.h>
-
-#define IOP_ADMA_SLOT_SIZE 32
-#define IOP_ADMA_THRESHOLD 4
-#ifdef DEBUG
-#define IOP_PARANOIA 1
-#else
-#define IOP_PARANOIA 0
-#endif
-#define iop_paranoia(x) BUG_ON(IOP_PARANOIA && (x))
-
-/**
- * struct iop_adma_device - internal representation of an ADMA device
- * @pdev: Platform device
- * @id: HW ADMA Device selector
- * @dma_desc_pool: base of DMA descriptor region (DMA address)
- * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
- * @common: embedded struct dma_device
- */
-struct iop_adma_device {
-	struct platform_device *pdev;
-	int id;
-	dma_addr_t dma_desc_pool;
-	void *dma_desc_pool_virt;
-	struct dma_device common;
-};
-
-/**
- * struct iop_adma_chan - internal representation of an ADMA device
- * @pending: allows batching of hardware operations
- * @lock: serializes enqueue/dequeue operations to the slot pool
- * @mmr_base: memory mapped register base
- * @chain: device chain view of the descriptors
- * @device: parent device
- * @common: common dmaengine channel object members
- * @last_used: place holder for allocation to continue from where it left off
- * @all_slots: complete domain of slots usable by the channel
- * @slots_allocated: records the actual size of the descriptor slot pool
- * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
- */
-struct iop_adma_chan {
-	int pending;
-	spinlock_t lock; /* protects the descriptor slot pool */
-	void __iomem *mmr_base;
-	struct list_head chain;
-	struct iop_adma_device *device;
-	struct dma_chan common;
-	struct iop_adma_desc_slot *last_used;
-	struct list_head all_slots;
-	int slots_allocated;
-	struct tasklet_struct irq_tasklet;
-};
-
-/**
- * struct iop_adma_desc_slot - IOP-ADMA software descriptor
- * @slot_node: node on the iop_adma_chan.all_slots list
- * @chain_node: node on the op_adma_chan.chain list
- * @hw_desc: virtual address of the hardware descriptor chain
- * @phys: hardware address of the hardware descriptor chain
- * @group_head: first operation in a transaction
- * @slot_cnt: total slots used in an transaction (group of operations)
- * @slots_per_op: number of slots per operation
- * @idx: pool index
- * @tx_list: list of descriptors that are associated with one operation
- * @async_tx: support for the async_tx api
- * @group_list: list of slots that make up a multi-descriptor transaction
- *	for example transfer lengths larger than the supported hw max
- * @xor_check_result: result of zero sum
- * @crc32_result: result crc calculation
- */
-struct iop_adma_desc_slot {
-	struct list_head slot_node;
-	struct list_head chain_node;
-	void *hw_desc;
-	struct iop_adma_desc_slot *group_head;
-	u16 slot_cnt;
-	u16 slots_per_op;
-	u16 idx;
-	struct list_head tx_list;
-	struct dma_async_tx_descriptor async_tx;
-	union {
-		u32 *xor_check_result;
-		u32 *crc32_result;
-		u32 *pq_check_result;
-	};
-};
-
-struct iop_adma_platform_data {
-	int hw_id;
-	dma_cap_mask_t cap_mask;
-	size_t pool_size;
-};
-
-#define to_iop_sw_desc(addr_hw_desc) \
-	container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
-#define iop_hw_desc_slot_idx(hw_desc, idx) \
-	( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
-#endif
diff --git a/arch/arm/mach-iop32x/include/mach/adma.h b/arch/arm/mach-iop32x/include/mach/adma.h
deleted file mode 100644
index 2b20063123ad..000000000000
--- a/arch/arm/mach-iop32x/include/mach/adma.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef IOP32X_ADMA_H
-#define IOP32X_ADMA_H
-#include <asm/hardware/iop3xx-adma.h>
-#endif
-
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index 368496471e60..601cc9c11b07 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -6,8 +6,7 @@
 #include <linux/platform_device.h>
 #include <asm/hardware/iop3xx.h>
 #include <linux/dma-mapping.h>
-#include <mach/adma.h>
-#include <asm/hardware/iop_adma.h>
+#include <linux/platform_data/dma-iop32x.h>
 
 #define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT
 #define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index cc84863bc52b..7c511e3db4c8 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -294,8 +294,8 @@ config INTEL_IOATDMA
 	  If unsure, say N.
 
 config INTEL_IOP_ADMA
-	tristate "Intel IOP ADMA support"
-	depends on ARCH_IOP32X
+	tristate "Intel IOP32x ADMA support"
+	depends on ARCH_IOP32X || COMPILE_TEST
 	select DMA_ENGINE
 	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index aebdd671651a..03f4a588cf7f 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -22,8 +22,7 @@
 #include <linux/raid/pq.h>
 #include <linux/slab.h>
 
-#include <mach/adma.h>
-
+#include "iop-adma.h"
 #include "dmaengine.h"
 
 #define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
diff --git a/drivers/dma/iop-adma.h b/drivers/dma/iop-adma.h
new file mode 100644
index 000000000000..c499c9578f00
--- /dev/null
+++ b/drivers/dma/iop-adma.h
@@ -0,0 +1,914 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2006, Intel Corporation.
+ */
+#ifndef _ADMA_H
+#define _ADMA_H
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/platform_data/dma-iop32x.h>
+
+/* Memory copy units */
+#define DMA_CCR(chan)		(chan->mmr_base + 0x0)
+#define DMA_CSR(chan)		(chan->mmr_base + 0x4)
+#define DMA_DAR(chan)		(chan->mmr_base + 0xc)
+#define DMA_NDAR(chan)		(chan->mmr_base + 0x10)
+#define DMA_PADR(chan)		(chan->mmr_base + 0x14)
+#define DMA_PUADR(chan)	(chan->mmr_base + 0x18)
+#define DMA_LADR(chan)		(chan->mmr_base + 0x1c)
+#define DMA_BCR(chan)		(chan->mmr_base + 0x20)
+#define DMA_DCR(chan)		(chan->mmr_base + 0x24)
+
+/* Application accelerator unit  */
+#define AAU_ACR(chan)		(chan->mmr_base + 0x0)
+#define AAU_ASR(chan)		(chan->mmr_base + 0x4)
+#define AAU_ADAR(chan)		(chan->mmr_base + 0x8)
+#define AAU_ANDAR(chan)	(chan->mmr_base + 0xc)
+#define AAU_SAR(src, chan)	(chan->mmr_base + (0x10 + ((src) << 2)))
+#define AAU_DAR(chan)		(chan->mmr_base + 0x20)
+#define AAU_ABCR(chan)		(chan->mmr_base + 0x24)
+#define AAU_ADCR(chan)		(chan->mmr_base + 0x28)
+#define AAU_SAR_EDCR(src_edc)	(chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
+#define AAU_EDCR0_IDX	8
+#define AAU_EDCR1_IDX	17
+#define AAU_EDCR2_IDX	26
+
+struct iop3xx_aau_desc_ctrl {
+	unsigned int int_en:1;
+	unsigned int blk1_cmd_ctrl:3;
+	unsigned int blk2_cmd_ctrl:3;
+	unsigned int blk3_cmd_ctrl:3;
+	unsigned int blk4_cmd_ctrl:3;
+	unsigned int blk5_cmd_ctrl:3;
+	unsigned int blk6_cmd_ctrl:3;
+	unsigned int blk7_cmd_ctrl:3;
+	unsigned int blk8_cmd_ctrl:3;
+	unsigned int blk_ctrl:2;
+	unsigned int dual_xor_en:1;
+	unsigned int tx_complete:1;
+	unsigned int zero_result_err:1;
+	unsigned int zero_result_en:1;
+	unsigned int dest_write_en:1;
+};
+
+struct iop3xx_aau_e_desc_ctrl {
+	unsigned int reserved:1;
+	unsigned int blk1_cmd_ctrl:3;
+	unsigned int blk2_cmd_ctrl:3;
+	unsigned int blk3_cmd_ctrl:3;
+	unsigned int blk4_cmd_ctrl:3;
+	unsigned int blk5_cmd_ctrl:3;
+	unsigned int blk6_cmd_ctrl:3;
+	unsigned int blk7_cmd_ctrl:3;
+	unsigned int blk8_cmd_ctrl:3;
+	unsigned int reserved2:7;
+};
+
+struct iop3xx_dma_desc_ctrl {
+	unsigned int pci_transaction:4;
+	unsigned int int_en:1;
+	unsigned int dac_cycle_en:1;
+	unsigned int mem_to_mem_en:1;
+	unsigned int crc_data_tx_en:1;
+	unsigned int crc_gen_en:1;
+	unsigned int crc_seed_dis:1;
+	unsigned int reserved:21;
+	unsigned int crc_tx_complete:1;
+};
+
+struct iop3xx_desc_dma {
+	u32 next_desc;
+	union {
+		u32 pci_src_addr;
+		u32 pci_dest_addr;
+		u32 src_addr;
+	};
+	union {
+		u32 upper_pci_src_addr;
+		u32 upper_pci_dest_addr;
+	};
+	union {
+		u32 local_pci_src_addr;
+		u32 local_pci_dest_addr;
+		u32 dest_addr;
+	};
+	u32 byte_count;
+	union {
+		u32 desc_ctrl;
+		struct iop3xx_dma_desc_ctrl desc_ctrl_field;
+	};
+	u32 crc_addr;
+};
+
+struct iop3xx_desc_aau {
+	u32 next_desc;
+	u32 src[4];
+	u32 dest_addr;
+	u32 byte_count;
+	union {
+		u32 desc_ctrl;
+		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+	};
+	union {
+		u32 src_addr;
+		u32 e_desc_ctrl;
+		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
+	} src_edc[31];
+};
+
+struct iop3xx_aau_gfmr {
+	unsigned int gfmr1:8;
+	unsigned int gfmr2:8;
+	unsigned int gfmr3:8;
+	unsigned int gfmr4:8;
+};
+
+struct iop3xx_desc_pq_xor {
+	u32 next_desc;
+	u32 src[3];
+	union {
+		u32 data_mult1;
+		struct iop3xx_aau_gfmr data_mult1_field;
+	};
+	u32 dest_addr;
+	u32 byte_count;
+	union {
+		u32 desc_ctrl;
+		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+	};
+	union {
+		u32 src_addr;
+		u32 e_desc_ctrl;
+		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
+		u32 data_multiplier;
+		struct iop3xx_aau_gfmr data_mult_field;
+		u32 reserved;
+	} src_edc_gfmr[19];
+};
+
+struct iop3xx_desc_dual_xor {
+	u32 next_desc;
+	u32 src0_addr;
+	u32 src1_addr;
+	u32 h_src_addr;
+	u32 d_src_addr;
+	u32 h_dest_addr;
+	u32 byte_count;
+	union {
+		u32 desc_ctrl;
+		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
+	};
+	u32 d_dest_addr;
+};
+
+union iop3xx_desc {
+	struct iop3xx_desc_aau *aau;
+	struct iop3xx_desc_dma *dma;
+	struct iop3xx_desc_pq_xor *pq_xor;
+	struct iop3xx_desc_dual_xor *dual_xor;
+	void *ptr;
+};
+
+/* No support for p+q operations */
+static inline int
+iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+	BUG();
+	return 0;
+}
+
+static inline void
+iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
+{
+	BUG();
+}
+
+static inline void
+iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
+{
+	BUG();
+}
+
+static inline void
+iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+			 dma_addr_t addr, unsigned char coef)
+{
+	BUG();
+}
+
+static inline int
+iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
+{
+	BUG();
+	return 0;
+}
+
+static inline void
+iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+			  unsigned long flags)
+{
+	BUG();
+}
+
+static inline void
+iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+	BUG();
+}
+
+#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
+
+static inline void
+iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
+			      dma_addr_t *src)
+{
+	BUG();
+}
+
+static inline int iop_adma_get_max_xor(void)
+{
+	return 32;
+}
+
+static inline int iop_adma_get_max_pq(void)
+{
+	BUG();
+	return 0;
+}
+
+static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
+{
+	int id = chan->device->id;
+
+	switch (id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return __raw_readl(DMA_DAR(chan));
+	case AAU_ID:
+		return __raw_readl(AAU_ADAR(chan));
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
+						u32 next_desc_addr)
+{
+	int id = chan->device->id;
+
+	switch (id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		__raw_writel(next_desc_addr, DMA_NDAR(chan));
+		break;
+	case AAU_ID:
+		__raw_writel(next_desc_addr, AAU_ANDAR(chan));
+		break;
+	}
+
+}
+
+#define IOP_ADMA_STATUS_BUSY (1 << 10)
+#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
+#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
+#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
+
+static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
+{
+	u32 status = __raw_readl(DMA_CSR(chan));
+	return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
+}
+
+static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
+					int num_slots)
+{
+	/* num_slots will only ever be 1, 2, 4, or 8 */
+	return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/* to do: support large (i.e. > hw max) buffer sizes */
+static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
+{
+	*slots_per_op = 1;
+	return 1;
+}
+
+/* to do: support large (i.e. > hw max) buffer sizes */
+static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
+{
+	*slots_per_op = 1;
+	return 1;
+}
+
+static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
+					int *slots_per_op)
+{
+	static const char slot_count_table[] = {
+						1, 1, 1, 1, /* 01 - 04 */
+						2, 2, 2, 2, /* 05 - 08 */
+						4, 4, 4, 4, /* 09 - 12 */
+						4, 4, 4, 4, /* 13 - 16 */
+						8, 8, 8, 8, /* 17 - 20 */
+						8, 8, 8, 8, /* 21 - 24 */
+						8, 8, 8, 8, /* 25 - 28 */
+						8, 8, 8, 8, /* 29 - 32 */
+					      };
+	*slots_per_op = slot_count_table[src_cnt - 1];
+	return *slots_per_op;
+}
+
+static inline int
+iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
+{
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return iop_chan_memcpy_slot_count(0, slots_per_op);
+	case AAU_ID:
+		return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
+						int *slots_per_op)
+{
+	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
+
+	if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
+		return slot_cnt;
+
+	len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
+	while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
+		len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
+		slot_cnt += *slots_per_op;
+	}
+
+	slot_cnt += *slots_per_op;
+
+	return slot_cnt;
+}
+
+/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
+ * descriptors
+ */
+static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
+						int *slots_per_op)
+{
+	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
+
+	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
+		return slot_cnt;
+
+	len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+	while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+		len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+		slot_cnt += *slots_per_op;
+	}
+
+	slot_cnt += *slots_per_op;
+
+	return slot_cnt;
+}
+
+static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
+					struct iop_adma_chan *chan)
+{
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return hw_desc.dma->byte_count;
+	case AAU_ID:
+		return hw_desc.aau->byte_count;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+/* translate the src_idx to a descriptor word index */
+static inline int __desc_idx(int src_idx)
+{
+	static const int desc_idx_table[] = { 0, 0, 0, 0,
+					      0, 1, 2, 3,
+					      5, 6, 7, 8,
+					      9, 10, 11, 12,
+					      14, 15, 16, 17,
+					      18, 19, 20, 21,
+					      23, 24, 25, 26,
+					      27, 28, 29, 30,
+					    };
+
+	return desc_idx_table[src_idx];
+}
+
+static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
+					struct iop_adma_chan *chan,
+					int src_idx)
+{
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return hw_desc.dma->src_addr;
+	case AAU_ID:
+		break;
+	default:
+		BUG();
+	}
+
+	if (src_idx < 4)
+		return hw_desc.aau->src[src_idx];
+	else
+		return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
+}
+
+static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
+					int src_idx, dma_addr_t addr)
+{
+	if (src_idx < 4)
+		hw_desc->src[src_idx] = addr;
+	else
+		hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
+}
+
+static inline void
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
+{
+	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
+	union {
+		u32 value;
+		struct iop3xx_dma_desc_ctrl field;
+	} u_desc_ctrl;
+
+	u_desc_ctrl.value = 0;
+	u_desc_ctrl.field.mem_to_mem_en = 1;
+	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+	hw_desc->desc_ctrl = u_desc_ctrl.value;
+	hw_desc->upper_pci_src_addr = 0;
+	hw_desc->crc_addr = 0;
+}
+
+static inline void
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
+{
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+	union {
+		u32 value;
+		struct iop3xx_aau_desc_ctrl field;
+	} u_desc_ctrl;
+
+	u_desc_ctrl.value = 0;
+	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
+	u_desc_ctrl.field.dest_write_en = 1;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+	hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline u32
+iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
+		     unsigned long flags)
+{
+	int i, shift;
+	u32 edcr;
+	union {
+		u32 value;
+		struct iop3xx_aau_desc_ctrl field;
+	} u_desc_ctrl;
+
+	u_desc_ctrl.value = 0;
+	switch (src_cnt) {
+	case 25 ... 32:
+		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+		edcr = 0;
+		shift = 1;
+		for (i = 24; i < src_cnt; i++) {
+			edcr |= (1 << shift);
+			shift += 3;
+		}
+		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
+		src_cnt = 24;
+		/* fall through */
+	case 17 ... 24:
+		if (!u_desc_ctrl.field.blk_ctrl) {
+			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+		}
+		edcr = 0;
+		shift = 1;
+		for (i = 16; i < src_cnt; i++) {
+			edcr |= (1 << shift);
+			shift += 3;
+		}
+		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
+		src_cnt = 16;
+		/* fall through */
+	case 9 ... 16:
+		if (!u_desc_ctrl.field.blk_ctrl)
+			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
+		edcr = 0;
+		shift = 1;
+		for (i = 8; i < src_cnt; i++) {
+			edcr |= (1 << shift);
+			shift += 3;
+		}
+		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
+		src_cnt = 8;
+		/* fall through */
+	case 2 ... 8:
+		shift = 1;
+		for (i = 0; i < src_cnt; i++) {
+			u_desc_ctrl.value |= (1 << shift);
+			shift += 3;
+		}
+
+		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
+			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
+	}
+
+	u_desc_ctrl.field.dest_write_en = 1;
+	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+	hw_desc->desc_ctrl = u_desc_ctrl.value;
+
+	return u_desc_ctrl.value;
+}
+
+static inline void
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
+{
+	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
+}
+
+/* return the number of operations */
+static inline int
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
+{
+	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
+	union {
+		u32 value;
+		struct iop3xx_aau_desc_ctrl field;
+	} u_desc_ctrl;
+	int i, j;
+
+	hw_desc = desc->hw_desc;
+
+	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
+		i += slots_per_op, j++) {
+		iter = iop_hw_desc_slot_idx(hw_desc, i);
+		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
+		u_desc_ctrl.field.dest_write_en = 0;
+		u_desc_ctrl.field.zero_result_en = 1;
+		u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+		iter->desc_ctrl = u_desc_ctrl.value;
+
+		/* for the subsequent descriptors preserve the store queue
+		 * and chain them together
+		 */
+		if (i) {
+			prev_hw_desc =
+				iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
+			prev_hw_desc->next_desc =
+				(u32) (desc->async_tx.phys + (i << 5));
+		}
+	}
+
+	return j;
+}
+
+static inline void
+iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
+{
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+	union {
+		u32 value;
+		struct iop3xx_aau_desc_ctrl field;
+	} u_desc_ctrl;
+
+	u_desc_ctrl.value = 0;
+	switch (src_cnt) {
+	case 25 ... 32:
+		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+		/* fall through */
+	case 17 ... 24:
+		if (!u_desc_ctrl.field.blk_ctrl) {
+			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
+			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
+		}
+		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
+		/* fall through */
+	case 9 ... 16:
+		if (!u_desc_ctrl.field.blk_ctrl)
+			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
+		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
+		/* fall through */
+	case 1 ... 8:
+		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
+			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
+	}
+
+	u_desc_ctrl.field.dest_write_en = 0;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
+	hw_desc->desc_ctrl = u_desc_ctrl.value;
+}
+
+static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
+					struct iop_adma_chan *chan,
+					u32 byte_count)
+{
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		hw_desc.dma->byte_count = byte_count;
+		break;
+	case AAU_ID:
+		hw_desc.aau->byte_count = byte_count;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void
+iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
+			struct iop_adma_chan *chan)
+{
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		iop_desc_init_memcpy(desc, 1);
+		hw_desc.dma->byte_count = 0;
+		hw_desc.dma->dest_addr = 0;
+		hw_desc.dma->src_addr = 0;
+		break;
+	case AAU_ID:
+		iop_desc_init_null_xor(desc, 2, 1);
+		hw_desc.aau->byte_count = 0;
+		hw_desc.aau->dest_addr = 0;
+		hw_desc.aau->src[0] = 0;
+		hw_desc.aau->src[1] = 0;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void
+iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
+{
+	int slots_per_op = desc->slots_per_op;
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+	int i = 0;
+
+	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+		hw_desc->byte_count = len;
+	} else {
+		do {
+			iter = iop_hw_desc_slot_idx(hw_desc, i);
+			iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+			len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
+			i += slots_per_op;
+		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
+
+		iter = iop_hw_desc_slot_idx(hw_desc, i);
+		iter->byte_count = len;
+	}
+}
+
+static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
+					struct iop_adma_chan *chan,
+					dma_addr_t addr)
+{
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		hw_desc.dma->dest_addr = addr;
+		break;
+	case AAU_ID:
+		hw_desc.aau->dest_addr = addr;
+		break;
+	default:
+		BUG();
+	}
+}
+
+static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
+					dma_addr_t addr)
+{
+	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
+	hw_desc->src_addr = addr;
+}
+
+static inline void
+iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
+				dma_addr_t addr)
+{
+
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+	int i;
+
+	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
+		i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
+		iter = iop_hw_desc_slot_idx(hw_desc, i);
+		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
+	}
+}
+
+static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
+					int src_idx, dma_addr_t addr)
+{
+
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
+	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
+	int i;
+
+	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
+		i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
+		iter = iop_hw_desc_slot_idx(hw_desc, i);
+		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
+	}
+}
+
+static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
+					u32 next_desc_addr)
+{
+	/* hw_desc->next_desc is the same location for all channels */
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+
+	iop_paranoia(hw_desc.dma->next_desc);
+	hw_desc.dma->next_desc = next_desc_addr;
+}
+
+static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
+{
+	/* hw_desc->next_desc is the same location for all channels */
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+	return hw_desc.dma->next_desc;
+}
+
+static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
+{
+	/* hw_desc->next_desc is the same location for all channels */
+	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
+	hw_desc.dma->next_desc = 0;
+}
+
+static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
+						u32 val)
+{
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+	hw_desc->src[0] = val;
+}
+
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+{
+	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
+	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
+
+	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
+	return desc_ctrl.zero_result_err << SUM_CHECK_P;
+}
+
+static inline void iop_chan_append(struct iop_adma_chan *chan)
+{
+	u32 dma_chan_ctrl;
+
+	dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+	dma_chan_ctrl |= 0x2;
+	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
+{
+	return __raw_readl(DMA_CSR(chan));
+}
+
+static inline void iop_chan_disable(struct iop_adma_chan *chan)
+{
+	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+	dma_chan_ctrl &= ~1;
+	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline void iop_chan_enable(struct iop_adma_chan *chan)
+{
+	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
+
+	dma_chan_ctrl |= 1;
+	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
+}
+
+static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
+{
+	u32 status = __raw_readl(DMA_CSR(chan));
+	status &= (1 << 9);
+	__raw_writel(status, DMA_CSR(chan));
+}
+
+static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
+{
+	u32 status = __raw_readl(DMA_CSR(chan));
+	status &= (1 << 8);
+	__raw_writel(status, DMA_CSR(chan));
+}
+
+static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
+{
+	u32 status = __raw_readl(DMA_CSR(chan));
+
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
+		break;
+	case AAU_ID:
+		status &= (1 << 5);
+		break;
+	default:
+		BUG();
+	}
+
+	__raw_writel(status, DMA_CSR(chan));
+}
+
+static inline int
+iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
+{
+	return 0;
+}
+
+static inline int
+iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
+{
+	return 0;
+}
+
+static inline int
+iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
+{
+	return 0;
+}
+
+static inline int
+iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
+{
+	return test_bit(5, &status);
+}
+
+static inline int
+iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
+{
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return test_bit(2, &status);
+	default:
+		return 0;
+	}
+}
+
+static inline int
+iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
+{
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return test_bit(3, &status);
+	default:
+		return 0;
+	}
+}
+
+static inline int
+iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
+{
+	switch (chan->device->id) {
+	case DMA0_ID:
+	case DMA1_ID:
+		return test_bit(1, &status);
+	default:
+		return 0;
+	}
+}
+#endif /* _ADMA_H */
diff --git a/include/linux/platform_data/dma-iop32x.h b/include/linux/platform_data/dma-iop32x.h
new file mode 100644
index 000000000000..ac83cff89549
--- /dev/null
+++ b/include/linux/platform_data/dma-iop32x.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright © 2006, Intel Corporation.
+ */
+#ifndef IOP_ADMA_H
+#define IOP_ADMA_H
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define IOP_ADMA_SLOT_SIZE 32
+#define IOP_ADMA_THRESHOLD 4
+#ifdef DEBUG
+#define IOP_PARANOIA 1
+#else
+#define IOP_PARANOIA 0
+#endif
+#define iop_paranoia(x) BUG_ON(IOP_PARANOIA && (x))
+
+#define DMA0_ID 0
+#define DMA1_ID 1
+#define AAU_ID 2
+
+/**
+ * struct iop_adma_device - internal representation of an ADMA device
+ * @pdev: Platform device
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct iop_adma_device {
+	struct platform_device *pdev;
+	int id;
+	dma_addr_t dma_desc_pool;
+	void *dma_desc_pool_virt;
+	struct dma_device common;
+};
+
+/**
+ * struct iop_adma_chan - internal representation of an ADMA device
+ * @pending: allows batching of hardware operations
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @mmr_base: memory mapped register base
+ * @chain: device chain view of the descriptors
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
+ */
+struct iop_adma_chan {
+	int pending;
+	spinlock_t lock; /* protects the descriptor slot pool */
+	void __iomem *mmr_base;
+	struct list_head chain;
+	struct iop_adma_device *device;
+	struct dma_chan common;
+	struct iop_adma_desc_slot *last_used;
+	struct list_head all_slots;
+	int slots_allocated;
+	struct tasklet_struct irq_tasklet;
+};
+
+/**
+ * struct iop_adma_desc_slot - IOP-ADMA software descriptor
+ * @slot_node: node on the iop_adma_chan.all_slots list
+ * @chain_node: node on the op_adma_chan.chain list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @tx_list: list of descriptors that are associated with one operation
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ *	for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct iop_adma_desc_slot {
+	struct list_head slot_node;
+	struct list_head chain_node;
+	void *hw_desc;
+	struct iop_adma_desc_slot *group_head;
+	u16 slot_cnt;
+	u16 slots_per_op;
+	u16 idx;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+	union {
+		u32 *xor_check_result;
+		u32 *crc32_result;
+		u32 *pq_check_result;
+	};
+};
+
+struct iop_adma_platform_data {
+	int hw_id;
+	dma_cap_mask_t cap_mask;
+	size_t pool_size;
+};
+
+#define to_iop_sw_desc(addr_hw_desc) \
+	container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
+#define iop_hw_desc_slot_idx(hw_desc, idx) \
+	( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
+#endif
-- 
cgit v1.2.3


From b8e24a9300b0836a9d39f6b20746766b3b81f1bd Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 8 Aug 2019 15:03:00 -0400
Subject: block: annotate refault stalls from IO submission

psi tracks the time tasks wait for refaulting pages to become
uptodate, but it does not track the time spent submitting the IO. The
submission part can be significant if backing storage is contended or
when cgroup throttling (io.latency) is in effect - a lot of time is
spent in submit_bio(). In that case, we underreport memory pressure.

Annotate submit_bio() to account submission time as memory stall when
the bio is reading userspace workingset pages.

Tested-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               |  3 +++
 block/blk-core.c          | 23 ++++++++++++++++++++++-
 include/linux/blk_types.h |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 24a496f5d2e2..54769659a434 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
 
 	bio->bi_iter.bi_size += len;
 	bio->bi_vcnt++;
+
+	if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
+		bio_set_flag(bio, BIO_WORKINGSET);
 }
 EXPORT_SYMBOL_GPL(__bio_add_page);
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 919629ce4015..834aea04718f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -36,6 +36,7 @@
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
+#include <linux/psi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
+	bool workingset_read = false;
+	unsigned long pflags;
+	blk_qc_t ret;
+
 	if (blkcg_punt_bio_submit(bio))
 		return BLK_QC_T_NONE;
 
@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio)
 		if (op_is_write(bio_op(bio))) {
 			count_vm_events(PGPGOUT, count);
 		} else {
+			if (bio_flagged(bio, BIO_WORKINGSET))
+				workingset_read = true;
 			task_io_account_read(bio->bi_iter.bi_size);
 			count_vm_events(PGPGIN, count);
 		}
@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio)
 		}
 	}
 
-	return generic_make_request(bio);
+	/*
+	 * If we're reading data that is part of the userspace
+	 * workingset, count submission time as memory stall. When the
+	 * device is congested, or the submitting cgroup IO-throttled,
+	 * submission can be a significant part of overall IO time.
+	 */
+	if (workingset_read)
+		psi_memstall_enter(&pflags);
+
+	ret = generic_make_request(bio);
+
+	if (workingset_read)
+		psi_memstall_leave(&pflags);
+
+	return ret;
 }
 EXPORT_SYMBOL(submit_bio);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d6ce7b3ec8b1..5a1118d4ef7e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -209,6 +209,7 @@ enum {
 	BIO_BOUNCED,		/* bio is a bounce bio */
 	BIO_USER_MAPPED,	/* contains user pages */
 	BIO_NULL_MAPPED,	/* contains invalid user pages */
+	BIO_WORKINGSET,		/* contains userspace workingset pages */
 	BIO_QUIET,		/* Make BIO Quiet */
 	BIO_CHAIN,		/* chained bio, ->bi_remaining in effect */
 	BIO_REFFED,		/* bio has elevated ->bi_cnt */
-- 
cgit v1.2.3


From 4ed3350539aa931f58c939fcd803c7510584e143 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 13 Aug 2019 16:15:38 -0400
Subject: USB: usbfs: Add a capability flag for runtime suspend

The recent commit 7794f486ed0b ("usbfs: Add ioctls for runtime power
management") neglected to add a corresponding capability flag.  This
patch rectifies the omission.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Mayuresh Kulkarni <mkulkarni@opensource.cirrus.com>

Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1908131613490.1941-100000@iolanthe.rowland.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/devio.c          | 9 ++++++++-
 include/uapi/linux/usbdevice_fs.h | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 5db7b6dbcb44..24d4a801ca64 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -44,6 +44,12 @@
 
 #include "usb.h"
 
+#ifdef CONFIG_PM
+#define MAYBE_CAP_SUSPEND	USBDEVFS_CAP_SUSPEND
+#else
+#define MAYBE_CAP_SUSPEND	0
+#endif
+
 #define USB_MAXBUS			64
 #define USB_DEVICE_MAX			(USB_MAXBUS * 128)
 #define USB_SG_SIZE			16384 /* split-size for large txs */
@@ -2310,7 +2316,8 @@ static int proc_get_capabilities(struct usb_dev_state *ps, void __user *arg)
 
 	caps = USBDEVFS_CAP_ZERO_PACKET | USBDEVFS_CAP_NO_PACKET_SIZE_LIM |
 			USBDEVFS_CAP_REAP_AFTER_DISCONNECT | USBDEVFS_CAP_MMAP |
-			USBDEVFS_CAP_DROP_PRIVILEGES | USBDEVFS_CAP_CONNINFO_EX;
+			USBDEVFS_CAP_DROP_PRIVILEGES |
+			USBDEVFS_CAP_CONNINFO_EX | MAYBE_CAP_SUSPEND;
 	if (!ps->dev->bus->no_stop_on_short)
 		caps |= USBDEVFS_CAP_BULK_CONTINUATION;
 	if (ps->dev->bus->sg_tablesize)
diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h
index d24bbb6d3ca1..cf525cddeb94 100644
--- a/include/uapi/linux/usbdevice_fs.h
+++ b/include/uapi/linux/usbdevice_fs.h
@@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo {
 #define USBDEVFS_CAP_MMAP			0x20
 #define USBDEVFS_CAP_DROP_PRIVILEGES		0x40
 #define USBDEVFS_CAP_CONNINFO_EX		0x80
+#define USBDEVFS_CAP_SUSPEND			0x100
 
 /* USBDEVFS_DISCONNECT_CLAIM flags & struct */
 
-- 
cgit v1.2.3


From 53eff75e5f4dd4b9bc489955fdc60fde48d85e93 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 14 Aug 2019 18:27:08 +0200
Subject: ALSA: hda: Drop export of snd_hdac_bus_add/remove_device()

snd_hdac_bus_add_device() and snd_hdac_remove_device() are called only
internally in hda-core.  Let's drop the exports of them and move the
declarations into local.h.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h | 3 ---
 sound/hda/hdac_bus.c    | 3 +--
 sound/hda/local.h       | 4 ++++
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index 4af4af55e854..edb176a265c7 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -374,9 +374,6 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr,
 				    unsigned int cmd, unsigned int *res);
 void snd_hdac_bus_queue_event(struct hdac_bus *bus, u32 res, u32 res_ex);
 
-int snd_hdac_bus_add_device(struct hdac_bus *bus, struct hdac_device *codec);
-void snd_hdac_bus_remove_device(struct hdac_bus *bus,
-				struct hdac_device *codec);
 void snd_hdac_bus_process_unsol_events(struct work_struct *work);
 
 static inline void snd_hdac_codec_link_up(struct hdac_device *codec)
diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c
index cd25e2b3f7f2..18ed3185df82 100644
--- a/sound/hda/hdac_bus.c
+++ b/sound/hda/hdac_bus.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/export.h>
 #include <sound/hdaudio.h>
+#include "local.h"
 #include "trace.h"
 
 static const struct hdac_bus_ops default_ops = {
@@ -196,7 +197,6 @@ int snd_hdac_bus_add_device(struct hdac_bus *bus, struct hdac_device *codec)
 	bus->num_codecs++;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(snd_hdac_bus_add_device);
 
 /**
  * snd_hdac_bus_remove_device - Remove a codec from bus
@@ -215,7 +215,6 @@ void snd_hdac_bus_remove_device(struct hdac_bus *bus,
 	bus->num_codecs--;
 	flush_work(&bus->unsol_work);
 }
-EXPORT_SYMBOL_GPL(snd_hdac_bus_remove_device);
 
 #ifdef CONFIG_SND_HDA_ALIGNED_MMIO
 /* Helpers for aligned read/write of mmio space, for Tegra */
diff --git a/sound/hda/local.h b/sound/hda/local.h
index 877631e39373..3a4e303169a6 100644
--- a/sound/hda/local.h
+++ b/sound/hda/local.h
@@ -33,4 +33,8 @@ int hda_widget_sysfs_reinit(struct hdac_device *codec, hda_nid_t start_nid,
 			    int num_nodes);
 void hda_widget_sysfs_exit(struct hdac_device *codec);
 
+int snd_hdac_bus_add_device(struct hdac_bus *bus, struct hdac_device *codec);
+void snd_hdac_bus_remove_device(struct hdac_bus *bus,
+				struct hdac_device *codec);
+
 #endif /* __HDAC_LOCAL_H */
-- 
cgit v1.2.3


From ddf7cb83b0f45feb94ad89a987f600c766c463ca Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 14 Aug 2019 19:59:44 +0200
Subject: ALSA: hda: Unexport a few more stuff

Drop EXPORT_SYMBOL*() from a few more stuff in HD-audio core that
aren't used outside.  Particular the unsol event handler can be
staticized now because the recent change removed all external
callers.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hdaudio.h | 6 ------
 sound/hda/hdac_bus.c    | 5 +++--
 sound/hda/hdac_device.c | 6 ++----
 sound/hda/hdac_regmap.c | 1 +
 sound/hda/local.h       | 3 +++
 5 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index edb176a265c7..b260c5fd2337 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -122,10 +122,6 @@ int snd_hdac_codec_modalias(struct hdac_device *hdac, char *buf, size_t size);
 
 int snd_hdac_refresh_widgets(struct hdac_device *codec);
 
-unsigned int snd_hdac_make_cmd(struct hdac_device *codec, hda_nid_t nid,
-			       unsigned int verb, unsigned int parm);
-int snd_hdac_exec_verb(struct hdac_device *codec, unsigned int cmd,
-		       unsigned int flags, unsigned int *res);
 int snd_hdac_read(struct hdac_device *codec, hda_nid_t nid,
 		  unsigned int verb, unsigned int parm, unsigned int *res);
 int _snd_hdac_read_parm(struct hdac_device *codec, hda_nid_t nid, int parm,
@@ -374,8 +370,6 @@ int snd_hdac_bus_exec_verb_unlocked(struct hdac_bus *bus, unsigned int addr,
 				    unsigned int cmd, unsigned int *res);
 void snd_hdac_bus_queue_event(struct hdac_bus *bus, u32 res, u32 res_ex);
 
-void snd_hdac_bus_process_unsol_events(struct work_struct *work);
-
 static inline void snd_hdac_codec_link_up(struct hdac_device *codec)
 {
 	set_bit(codec->addr, &codec->bus->codec_powered);
diff --git a/sound/hda/hdac_bus.c b/sound/hda/hdac_bus.c
index 18ed3185df82..8f19876244eb 100644
--- a/sound/hda/hdac_bus.c
+++ b/sound/hda/hdac_bus.c
@@ -12,6 +12,8 @@
 #include "local.h"
 #include "trace.h"
 
+static void snd_hdac_bus_process_unsol_events(struct work_struct *work);
+
 static const struct hdac_bus_ops default_ops = {
 	.command = snd_hdac_bus_send_cmd,
 	.get_response = snd_hdac_bus_get_response,
@@ -149,7 +151,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_bus_queue_event);
 /*
  * process queued unsolicited events
  */
-void snd_hdac_bus_process_unsol_events(struct work_struct *work)
+static void snd_hdac_bus_process_unsol_events(struct work_struct *work)
 {
 	struct hdac_bus *bus = container_of(work, struct hdac_bus, unsol_work);
 	struct hdac_device *codec;
@@ -172,7 +174,6 @@ void snd_hdac_bus_process_unsol_events(struct work_struct *work)
 			drv->unsol_event(codec, res);
 	}
 }
-EXPORT_SYMBOL_GPL(snd_hdac_bus_process_unsol_events);
 
 /**
  * snd_hdac_bus_add_device - Add a codec to bus
diff --git a/sound/hda/hdac_device.c b/sound/hda/hdac_device.c
index 033bcef8751a..bf83d7062ef6 100644
--- a/sound/hda/hdac_device.c
+++ b/sound/hda/hdac_device.c
@@ -221,8 +221,8 @@ EXPORT_SYMBOL_GPL(snd_hdac_codec_modalias);
  *
  * Return an encoded command verb or -1 for error.
  */
-unsigned int snd_hdac_make_cmd(struct hdac_device *codec, hda_nid_t nid,
-			       unsigned int verb, unsigned int parm)
+static unsigned int snd_hdac_make_cmd(struct hdac_device *codec, hda_nid_t nid,
+				      unsigned int verb, unsigned int parm)
 {
 	u32 val, addr;
 
@@ -240,7 +240,6 @@ unsigned int snd_hdac_make_cmd(struct hdac_device *codec, hda_nid_t nid,
 	val |= parm;
 	return val;
 }
-EXPORT_SYMBOL_GPL(snd_hdac_make_cmd);
 
 /**
  * snd_hdac_exec_verb - execute an encoded verb
@@ -261,7 +260,6 @@ int snd_hdac_exec_verb(struct hdac_device *codec, unsigned int cmd,
 		return codec->exec_verb(codec, cmd, flags, res);
 	return snd_hdac_bus_exec_verb(codec->bus, codec->addr, cmd, res);
 }
-EXPORT_SYMBOL_GPL(snd_hdac_exec_verb);
 
 
 /**
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index f399a1552e73..286361ecd640 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -21,6 +21,7 @@
 #include <sound/core.h>
 #include <sound/hdaudio.h>
 #include <sound/hda_regmap.h>
+#include "local.h"
 
 static int codec_pm_lock(struct hdac_device *codec)
 {
diff --git a/sound/hda/local.h b/sound/hda/local.h
index 3a4e303169a6..5b935219352f 100644
--- a/sound/hda/local.h
+++ b/sound/hda/local.h
@@ -37,4 +37,7 @@ int snd_hdac_bus_add_device(struct hdac_bus *bus, struct hdac_device *codec);
 void snd_hdac_bus_remove_device(struct hdac_bus *bus,
 				struct hdac_device *codec);
 
+int snd_hdac_exec_verb(struct hdac_device *codec, unsigned int cmd,
+		       unsigned int flags, unsigned int *res);
+
 #endif /* __HDAC_LOCAL_H */
-- 
cgit v1.2.3


From 707816c8b0508ba7afcee9a13b02bb4261e4bd8c Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Wed, 14 Aug 2019 09:01:28 +0100
Subject: netfilter: remove deprecation warnings from uapi headers.

There are two netfilter userspace headers which contain deprecation
warnings.  While these headers are not used within the kernel, they are
compiled stand-alone for header-testing.

Pablo informs me that userspace iptables still refer to these headers,
and the intention was to use xt_LOG.h instead and remove these, but
userspace was never updated.

Remove the warnings.

Fixes: 2a475c409fe8 ("kbuild: remove all netfilter headers from header-test blacklist.")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_ipv4/ipt_LOG.h  | 2 --
 include/uapi/linux/netfilter_ipv6/ip6t_LOG.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter_ipv4/ipt_LOG.h b/include/uapi/linux/netfilter_ipv4/ipt_LOG.h
index 6dec14ba851b..b7cf2c669f40 100644
--- a/include/uapi/linux/netfilter_ipv4/ipt_LOG.h
+++ b/include/uapi/linux/netfilter_ipv4/ipt_LOG.h
@@ -2,8 +2,6 @@
 #ifndef _IPT_LOG_H
 #define _IPT_LOG_H
 
-#warning "Please update iptables, this file will be removed soon!"
-
 /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
 #define IPT_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
 #define IPT_LOG_TCPOPT		0x02	/* Log TCP options */
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h b/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h
index 7553a434e4da..23e91a9c2583 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h
@@ -2,8 +2,6 @@
 #ifndef _IP6T_LOG_H
 #define _IP6T_LOG_H
 
-#warning "Please update iptables, this file will be removed soon!"
-
 /* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */
 #define IP6T_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
 #define IP6T_LOG_TCPOPT		0x02	/* Log TCP options */
-- 
cgit v1.2.3


From fdd61a013a24f2699aec1a446f0168682b6f9ec4 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 8 Aug 2019 14:32:37 +0200
Subject: gpio: Add support for hierarchical IRQ domains

Hierarchical IRQ domains can be used to stack different IRQ
controllers on top of each other.

Bring hierarchical IRQ domains into the GPIOLIB core with the
following basic idea:

Drivers that need their interrupts handled hierarchically
specify a callback to translate the child hardware IRQ and
IRQ type for each GPIO offset to a parent hardware IRQ and
parent hardware IRQ type.

Users have to pass the callback, fwnode, and parent irqdomain
before calling gpiochip_irqchip_add().

We use the new method of just filling in the struct
gpio_irq_chip before adding the gpiochip for all hierarchical
irqchips of this type.

The code path for device tree is pretty straight-forward,
while the code path for old boardfiles or anything else will
be more convoluted requireing upfront allocation of the
interrupts when adding the chip.

One specific use-case where this can be useful is if a power
management controller has top-level controls for wakeup
interrupts. In such cases, the power management controller can
be a parent to other interrupt controllers and program
additional registers when an IRQ has its wake capability
enabled or disabled.

The hierarchical irqchip helper code will only be available
when IRQ_DOMAIN_HIERARCHY is selected to GPIO chips using
this should select or depend on that symbol. When using
hierarchical IRQs, the parent interrupt controller must
also be hierarchical all the way up to the top interrupt
controller wireing directly into the CPU, so on systems
that do not have this we can get rid of all the extra
code for supporting hierarchical irqs.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Lina Iyer <ilina@codeaurora.org>
Cc: Jon Hunter <jonathanh@nvidia.com>
Cc: Sowjanya Komatineni <skomatineni@nvidia.com>
Cc: Bitan Biswas <bbiswas@nvidia.com>
Cc: linux-tegra@vger.kernel.org
Cc: David Daney <david.daney@cavium.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Brian Masney <masneyb@onstation.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Brian Masney <masneyb@onstation.org>
Co-developed-by: Brian Masney <masneyb@onstation.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20190808123242.5359-1-linus.walleij@linaro.org
---
 Documentation/driver-api/gpio/driver.rst | 122 ++++++++++--
 drivers/gpio/gpiolib.c                   | 320 +++++++++++++++++++++++++++++--
 include/linux/gpio/driver.h              | 111 +++++++++++
 3 files changed, 527 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst
index 921c71a3d683..5e96dbc7f4e7 100644
--- a/Documentation/driver-api/gpio/driver.rst
+++ b/Documentation/driver-api/gpio/driver.rst
@@ -259,7 +259,7 @@ most often cascaded off a parent interrupt controller, and in some special
 cases the GPIO logic is melded with a SoC's primary interrupt controller.
 
 The IRQ portions of the GPIO block are implemented using an irq_chip, using
-the header <linux/irq.h>. So basically such a driver is utilizing two sub-
+the header <linux/irq.h>. So this combined driver is utilizing two sub-
 systems simultaneously: gpio and irq.
 
 It is legal for any IRQ consumer to request an IRQ from any irqchip even if it
@@ -391,25 +391,119 @@ Infrastructure helpers for GPIO irqchips
 ----------------------------------------
 
 To help out in handling the set-up and management of GPIO irqchips and the
-associated irqdomain and resource allocation callbacks, the gpiolib has
-some helpers that can be enabled by selecting the GPIOLIB_IRQCHIP Kconfig
-symbol:
-
-- gpiochip_irqchip_add(): adds a chained cascaded irqchip to a gpiochip. It
-  will pass the struct gpio_chip* for the chip to all IRQ callbacks, so the
-  callbacks need to embed the gpio_chip in its state container and obtain a
-  pointer to the container using container_of().
-  (See Documentation/driver-api/driver-model/design-patterns.rst)
+associated irqdomain and resource allocation callbacks. These are activated
+by selecting the Kconfig symbol GPIOLIB_IRQCHIP. If the symbol
+IRQ_DOMAIN_HIERARCHY is also selected, hierarchical helpers will also be
+provided. A big portion of overhead code will be managed by gpiolib,
+under the assumption that your interrupts are 1-to-1-mapped to the
+GPIO line index:
+
+  GPIO line offset   Hardware IRQ
+  0                  0
+  1                  1
+  2                  2
+  ...                ...
+  ngpio-1            ngpio-1
+
+If some GPIO lines do not have corresponding IRQs, the bitmask valid_mask
+and the flag need_valid_mask in gpio_irq_chip can be used to mask off some
+lines as invalid for associating with IRQs.
+
+The preferred way to set up the helpers is to fill in the
+struct gpio_irq_chip inside struct gpio_chip before adding the gpio_chip.
+If you do this, the additional irq_chip will be set up by gpiolib at the
+same time as setting up the rest of the GPIO functionality. The following
+is a typical example of a cascaded interrupt handler using gpio_irq_chip:
+
+  /* Typical state container with dynamic irqchip */
+  struct my_gpio {
+      struct gpio_chip gc;
+      struct irq_chip irq;
+  };
+
+  int irq; /* from platform etc */
+  struct my_gpio *g;
+  struct gpio_irq_chip *girq;
+
+  /* Set up the irqchip dynamically */
+  g->irq.name = "my_gpio_irq";
+  g->irq.irq_ack = my_gpio_ack_irq;
+  g->irq.irq_mask = my_gpio_mask_irq;
+  g->irq.irq_unmask = my_gpio_unmask_irq;
+  g->irq.irq_set_type = my_gpio_set_irq_type;
+
+  /* Get a pointer to the gpio_irq_chip */
+  girq = &g->gc.irq;
+  girq->chip = &g->irq;
+  girq->parent_handler = ftgpio_gpio_irq_handler;
+  girq->num_parents = 1;
+  girq->parents = devm_kcalloc(dev, 1, sizeof(*girq->parents),
+                               GFP_KERNEL);
+  if (!girq->parents)
+      return -ENOMEM;
+  girq->default_type = IRQ_TYPE_NONE;
+  girq->handler = handle_bad_irq;
+  girq->parents[0] = irq;
+
+  return devm_gpiochip_add_data(dev, &g->gc, g);
+
+The helper support using hierarchical interrupt controllers as well.
+In this case the typical set-up will look like this:
+
+  /* Typical state container with dynamic irqchip */
+  struct my_gpio {
+      struct gpio_chip gc;
+      struct irq_chip irq;
+      struct fwnode_handle *fwnode;
+  };
+
+  int irq; /* from platform etc */
+  struct my_gpio *g;
+  struct gpio_irq_chip *girq;
+
+  /* Set up the irqchip dynamically */
+  g->irq.name = "my_gpio_irq";
+  g->irq.irq_ack = my_gpio_ack_irq;
+  g->irq.irq_mask = my_gpio_mask_irq;
+  g->irq.irq_unmask = my_gpio_unmask_irq;
+  g->irq.irq_set_type = my_gpio_set_irq_type;
+
+  /* Get a pointer to the gpio_irq_chip */
+  girq = &g->gc.irq;
+  girq->chip = &g->irq;
+  girq->default_type = IRQ_TYPE_NONE;
+  girq->handler = handle_bad_irq;
+  girq->fwnode = g->fwnode;
+  girq->parent_domain = parent;
+  girq->child_to_parent_hwirq = my_gpio_child_to_parent_hwirq;
+
+  return devm_gpiochip_add_data(dev, &g->gc, g);
+
+As you can see pretty similar, but you do not supply a parent handler for
+the IRQ, instead a parent irqdomain, an fwnode for the hardware and
+a funcion .child_to_parent_hwirq() that has the purpose of looking up
+the parent hardware irq from a child (i.e. this gpio chip) hardware irq.
+As always it is good to look at examples in the kernel tree for advice
+on how to find the required pieces.
+
+The old way of adding irqchips to gpiochips after registration is also still
+available but we try to move away from this:
+
+- DEPRECATED: gpiochip_irqchip_add(): adds a chained cascaded irqchip to a
+  gpiochip. It will pass the struct gpio_chip* for the chip to all IRQ
+  callbacks, so the callbacks need to embed the gpio_chip in its state
+  container and obtain a pointer to the container using container_of().
+  (See Documentation/driver-model/design-patterns.txt)
 
 - gpiochip_irqchip_add_nested(): adds a nested cascaded irqchip to a gpiochip,
   as discussed above regarding different types of cascaded irqchips. The
   cascaded irq has to be handled by a threaded interrupt handler.
   Apart from that it works exactly like the chained irqchip.
 
-- gpiochip_set_chained_irqchip(): sets up a chained cascaded irq handler for a
-  gpio_chip from a parent IRQ and passes the struct gpio_chip* as handler
-  data. Notice that we pass is as the handler data, since the irqchip data is
-  likely used by the parent irqchip.
+- DEPRECATED: gpiochip_set_chained_irqchip(): sets up a chained cascaded irq
+  handler for a gpio_chip from a parent IRQ and passes the struct gpio_chip*
+  as handler data. Notice that we pass is as the handler data, since the
+  irqchip data is likely used by the parent irqchip.
 
 - gpiochip_set_nested_irqchip(): sets up a nested cascaded irq handler for a
   gpio_chip from a parent IRQ. As the parent IRQ has usually been
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 537a37a89891..5277b8f1ff7c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1720,6 +1720,273 @@ void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip,
 }
 EXPORT_SYMBOL_GPL(gpiochip_set_nested_irqchip);
 
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+
+/**
+ * gpiochip_set_hierarchical_irqchip() - connects a hierarchical irqchip
+ * to a gpiochip
+ * @gc: the gpiochip to set the irqchip hierarchical handler to
+ * @irqchip: the irqchip to handle this level of the hierarchy, the interrupt
+ * will then percolate up to the parent
+ */
+static void gpiochip_set_hierarchical_irqchip(struct gpio_chip *gc,
+					      struct irq_chip *irqchip)
+{
+	/* DT will deal with mapping each IRQ as we go along */
+	if (is_of_node(gc->irq.fwnode))
+		return;
+
+	/*
+	 * This is for legacy and boardfile "irqchip" fwnodes: allocate
+	 * irqs upfront instead of dynamically since we don't have the
+	 * dynamic type of allocation that hardware description languages
+	 * provide. Once all GPIO drivers using board files are gone from
+	 * the kernel we can delete this code, but for a transitional period
+	 * it is necessary to keep this around.
+	 */
+	if (is_fwnode_irqchip(gc->irq.fwnode)) {
+		int i;
+		int ret;
+
+		for (i = 0; i < gc->ngpio; i++) {
+			struct irq_fwspec fwspec;
+			unsigned int parent_hwirq;
+			unsigned int parent_type;
+			struct gpio_irq_chip *girq = &gc->irq;
+
+			/*
+			 * We call the child to parent translation function
+			 * only to check if the child IRQ is valid or not.
+			 * Just pick the rising edge type here as that is what
+			 * we likely need to support.
+			 */
+			ret = girq->child_to_parent_hwirq(gc, i,
+							  IRQ_TYPE_EDGE_RISING,
+							  &parent_hwirq,
+							  &parent_type);
+			if (ret) {
+				chip_err(gc, "skip set-up on hwirq %d\n",
+					 i);
+				continue;
+			}
+
+			fwspec.fwnode = gc->irq.fwnode;
+			/* This is the hwirq for the GPIO line side of things */
+			fwspec.param[0] = girq->child_offset_to_irq(gc, i);
+			/* Just pick something */
+			fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
+			fwspec.param_count = 2;
+			ret = __irq_domain_alloc_irqs(gc->irq.domain,
+						      /* just pick something */
+						      -1,
+						      1,
+						      NUMA_NO_NODE,
+						      &fwspec,
+						      false,
+						      NULL);
+			if (ret < 0) {
+				chip_err(gc,
+					 "can not allocate irq for GPIO line %d parent hwirq %d in hierarchy domain: %d\n",
+					 i, parent_hwirq,
+					 ret);
+			}
+		}
+	}
+
+	chip_err(gc, "%s unknown fwnode type proceed anyway\n", __func__);
+
+	return;
+}
+
+static int gpiochip_hierarchy_irq_domain_translate(struct irq_domain *d,
+						   struct irq_fwspec *fwspec,
+						   unsigned long *hwirq,
+						   unsigned int *type)
+{
+	/* We support standard DT translation */
+	if (is_of_node(fwspec->fwnode) && fwspec->param_count == 2) {
+		return irq_domain_translate_twocell(d, fwspec, hwirq, type);
+	}
+
+	/* This is for board files and others not using DT */
+	if (is_fwnode_irqchip(fwspec->fwnode)) {
+		int ret;
+
+		ret = irq_domain_translate_twocell(d, fwspec, hwirq, type);
+		if (ret)
+			return ret;
+		WARN_ON(*type == IRQ_TYPE_NONE);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d,
+					       unsigned int irq,
+					       unsigned int nr_irqs,
+					       void *data)
+{
+	struct gpio_chip *gc = d->host_data;
+	irq_hw_number_t hwirq;
+	unsigned int type = IRQ_TYPE_NONE;
+	struct irq_fwspec *fwspec = data;
+	struct irq_fwspec parent_fwspec;
+	unsigned int parent_hwirq;
+	unsigned int parent_type;
+	struct gpio_irq_chip *girq = &gc->irq;
+	int ret;
+
+	/*
+	 * The nr_irqs parameter is always one except for PCI multi-MSI
+	 * so this should not happen.
+	 */
+	WARN_ON(nr_irqs != 1);
+
+	ret = gc->irq.child_irq_domain_ops.translate(d, fwspec, &hwirq, &type);
+	if (ret)
+		return ret;
+
+	chip_info(gc, "allocate IRQ %d, hwirq %lu\n", irq,  hwirq);
+
+	ret = girq->child_to_parent_hwirq(gc, hwirq, type,
+					  &parent_hwirq, &parent_type);
+	if (ret) {
+		chip_err(gc, "can't look up hwirq %lu\n", hwirq);
+		return ret;
+	}
+	chip_info(gc, "found parent hwirq %u\n", parent_hwirq);
+
+	/*
+	 * We set handle_bad_irq because the .set_type() should
+	 * always be invoked and set the right type of handler.
+	 */
+	irq_domain_set_info(d,
+			    irq,
+			    hwirq,
+			    gc->irq.chip,
+			    gc,
+			    girq->handler,
+			    NULL, NULL);
+	irq_set_probe(irq);
+
+	/*
+	 * Create a IRQ fwspec to send up to the parent irqdomain:
+	 * specify the hwirq we address on the parent and tie it
+	 * all together up the chain.
+	 */
+	parent_fwspec.fwnode = d->parent->fwnode;
+	/* This parent only handles asserted level IRQs */
+	girq->populate_parent_fwspec(gc, &parent_fwspec, parent_hwirq,
+				     parent_type);
+	chip_info(gc, "alloc_irqs_parent for %d parent hwirq %d\n",
+		  irq, parent_hwirq);
+	ret = irq_domain_alloc_irqs_parent(d, irq, 1, &parent_fwspec);
+	if (ret)
+		chip_err(gc,
+			 "failed to allocate parent hwirq %d for hwirq %lu\n",
+			 parent_hwirq, hwirq);
+
+	return ret;
+}
+
+static unsigned int gpiochip_child_offset_to_irq_noop(struct gpio_chip *chip,
+						      unsigned int offset)
+{
+	return offset;
+}
+
+static void gpiochip_hierarchy_setup_domain_ops(struct irq_domain_ops *ops)
+{
+	ops->activate = gpiochip_irq_domain_activate;
+	ops->deactivate = gpiochip_irq_domain_deactivate;
+	ops->alloc = gpiochip_hierarchy_irq_domain_alloc;
+	ops->free = irq_domain_free_irqs_common;
+
+	/*
+	 * We only allow overriding the translate() function for
+	 * hierarchical chips, and this should only be done if the user
+	 * really need something other than 1:1 translation.
+	 */
+	if (!ops->translate)
+		ops->translate = gpiochip_hierarchy_irq_domain_translate;
+}
+
+static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
+{
+	if (!gc->irq.child_to_parent_hwirq ||
+	    !gc->irq.fwnode) {
+		chip_err(gc, "missing irqdomain vital data\n");
+		return -EINVAL;
+	}
+
+	if (!gc->irq.child_offset_to_irq)
+		gc->irq.child_offset_to_irq = gpiochip_child_offset_to_irq_noop;
+
+	if (!gc->irq.populate_parent_fwspec)
+		gc->irq.populate_parent_fwspec =
+			gpiochip_populate_parent_fwspec_twocell;
+
+	gpiochip_hierarchy_setup_domain_ops(&gc->irq.child_irq_domain_ops);
+
+	gc->irq.domain = irq_domain_create_hierarchy(
+		gc->irq.parent_domain,
+		0,
+		gc->ngpio,
+		gc->irq.fwnode,
+		&gc->irq.child_irq_domain_ops,
+		gc);
+
+	if (!gc->irq.domain)
+		return -ENOMEM;
+
+	gpiochip_set_hierarchical_irqchip(gc, gc->irq.chip);
+
+	return 0;
+}
+
+static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
+{
+	return !!gc->irq.parent_domain;
+}
+
+void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
+					     struct irq_fwspec *fwspec,
+					     unsigned int parent_hwirq,
+					     unsigned int parent_type)
+{
+	fwspec->param_count = 2;
+	fwspec->param[0] = parent_hwirq;
+	fwspec->param[1] = parent_type;
+}
+EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_twocell);
+
+void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
+					      struct irq_fwspec *fwspec,
+					      unsigned int parent_hwirq,
+					      unsigned int parent_type)
+{
+	fwspec->param_count = 4;
+	fwspec->param[0] = 0;
+	fwspec->param[1] = parent_hwirq;
+	fwspec->param[2] = 0;
+	fwspec->param[3] = parent_type;
+}
+EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell);
+
+#else
+
+static int gpiochip_hierarchy_add_domain(struct gpio_chip *gc)
+{
+	return -EINVAL;
+}
+
+static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc)
+{
+	return false;
+}
+
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
 /**
  * gpiochip_irq_map() - maps an IRQ into a GPIO irqchip
  * @d: the irqdomain used by this irqchip
@@ -1788,6 +2055,11 @@ static const struct irq_domain_ops gpiochip_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
+/*
+ * TODO: move these activate/deactivate in under the hierarchicial
+ * irqchip implementation as static once SPMI and SSBI (all external
+ * users) are phased over.
+ */
 /**
  * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ
  * @domain: The IRQ domain used by this IRQ chip
@@ -1827,10 +2099,25 @@ EXPORT_SYMBOL_GPL(gpiochip_irq_domain_deactivate);
 
 static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 {
+	struct irq_domain *domain = chip->irq.domain;
+
 	if (!gpiochip_irqchip_irq_valid(chip, offset))
 		return -ENXIO;
 
-	return irq_create_mapping(chip->irq.domain, offset);
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (irq_domain_is_hierarchy(domain)) {
+		struct irq_fwspec spec;
+
+		spec.fwnode = domain->fwnode;
+		spec.param_count = 2;
+		spec.param[0] = chip->irq.child_offset_to_irq(chip, offset);
+		spec.param[1] = IRQ_TYPE_NONE;
+
+		return irq_create_fwspec_mapping(&spec);
+	}
+#endif
+
+	return irq_create_mapping(domain, offset);
 }
 
 static int gpiochip_irq_reqres(struct irq_data *d)
@@ -1907,7 +2194,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
 				struct lock_class_key *request_key)
 {
 	struct irq_chip *irqchip = gpiochip->irq.chip;
-	const struct irq_domain_ops *ops;
+	const struct irq_domain_ops *ops = NULL;
 	struct device_node *np;
 	unsigned int type;
 	unsigned int i;
@@ -1943,16 +2230,25 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
 	gpiochip->irq.lock_key = lock_key;
 	gpiochip->irq.request_key = request_key;
 
-	if (gpiochip->irq.domain_ops)
-		ops = gpiochip->irq.domain_ops;
-	else
-		ops = &gpiochip_domain_ops;
-
-	gpiochip->irq.domain = irq_domain_add_simple(np, gpiochip->ngpio,
-						     gpiochip->irq.first,
-						     ops, gpiochip);
-	if (!gpiochip->irq.domain)
-		return -EINVAL;
+	/* If a parent irqdomain is provided, let's build a hierarchy */
+	if (gpiochip_hierarchy_is_hierarchical(gpiochip)) {
+		int ret = gpiochip_hierarchy_add_domain(gpiochip);
+		if (ret)
+			return ret;
+	} else {
+		/* Some drivers provide custom irqdomain ops */
+		if (gpiochip->irq.domain_ops)
+			ops = gpiochip->irq.domain_ops;
+
+		if (!ops)
+			ops = &gpiochip_domain_ops;
+		gpiochip->irq.domain = irq_domain_add_simple(np,
+			gpiochip->ngpio,
+			gpiochip->irq.first,
+			ops, gpiochip);
+		if (!gpiochip->irq.domain)
+			return -EINVAL;
+	}
 
 	if (gpiochip->irq.parent_handler) {
 		void *data = gpiochip->irq.parent_handler_data ?: gpiochip;
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 6a0e420915a3..0e6d3b0c0211 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -23,6 +23,9 @@ enum gpio_lookup_flags;
 #ifdef CONFIG_GPIOLIB
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
+
+struct gpio_chip;
+
 /**
  * struct gpio_irq_chip - GPIO interrupt controller
  */
@@ -49,6 +52,84 @@ struct gpio_irq_chip {
 	 */
 	const struct irq_domain_ops *domain_ops;
 
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+	/**
+	 * @fwnode:
+	 *
+	 * Firmware node corresponding to this gpiochip/irqchip, necessary
+	 * for hierarchical irqdomain support.
+	 */
+	struct fwnode_handle *fwnode;
+
+	/**
+	 * @parent_domain:
+	 *
+	 * If non-NULL, will be set as the parent of this GPIO interrupt
+	 * controller's IRQ domain to establish a hierarchical interrupt
+	 * domain. The presence of this will activate the hierarchical
+	 * interrupt support.
+	 */
+	struct irq_domain *parent_domain;
+
+	/**
+	 * @child_to_parent_hwirq:
+	 *
+	 * This callback translates a child hardware IRQ offset to a parent
+	 * hardware IRQ offset on a hierarchical interrupt chip. The child
+	 * hardware IRQs correspond to the GPIO index 0..ngpio-1 (see the
+	 * ngpio field of struct gpio_chip) and the corresponding parent
+	 * hardware IRQ and type (such as IRQ_TYPE_*) shall be returned by
+	 * the driver. The driver can calculate this from an offset or using
+	 * a lookup table or whatever method is best for this chip. Return
+	 * 0 on successful translation in the driver.
+	 *
+	 * If some ranges of hardware IRQs do not have a corresponding parent
+	 * HWIRQ, return -EINVAL, but also make sure to fill in @valid_mask and
+	 * @need_valid_mask to make these GPIO lines unavailable for
+	 * translation.
+	 */
+	int (*child_to_parent_hwirq)(struct gpio_chip *chip,
+				     unsigned int child_hwirq,
+				     unsigned int child_type,
+				     unsigned int *parent_hwirq,
+				     unsigned int *parent_type);
+
+	/**
+	 * @populate_parent_fwspec:
+	 *
+	 * This optional callback populates the &struct irq_fwspec for the
+	 * parent's IRQ domain. If this is not specified, then
+	 * &gpiochip_populate_parent_fwspec_twocell will be used. A four-cell
+	 * variant named &gpiochip_populate_parent_fwspec_fourcell is also
+	 * available.
+	 */
+	void (*populate_parent_fwspec)(struct gpio_chip *chip,
+				       struct irq_fwspec *fwspec,
+				       unsigned int parent_hwirq,
+				       unsigned int parent_type);
+
+	/**
+	 * @child_offset_to_irq:
+	 *
+	 * This optional callback is used to translate the child's GPIO line
+	 * offset on the GPIO chip to an IRQ number for the GPIO to_irq()
+	 * callback. If this is not specified, then a default callback will be
+	 * provided that returns the line offset.
+	 */
+	unsigned int (*child_offset_to_irq)(struct gpio_chip *chip,
+					    unsigned int pin);
+
+	/**
+	 * @child_irq_domain_ops:
+	 *
+	 * The IRQ domain operations that will be used for this GPIO IRQ
+	 * chip. If no operations are provided, then default callbacks will
+	 * be populated to setup the IRQ hierarchy. Some drivers need to
+	 * supply their own translate function.
+	 */
+	struct irq_domain_ops child_irq_domain_ops;
+#endif
+
 	/**
 	 * @handler:
 	 *
@@ -449,6 +530,36 @@ struct bgpio_pdata {
 	int ngpio;
 };
 
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+
+void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
+					     struct irq_fwspec *fwspec,
+					     unsigned int parent_hwirq,
+					     unsigned int parent_type);
+void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
+					      struct irq_fwspec *fwspec,
+					      unsigned int parent_hwirq,
+					      unsigned int parent_type);
+
+#else
+
+static void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
+						    struct irq_fwspec *fwspec,
+						    unsigned int parent_hwirq,
+						    unsigned int parent_type)
+{
+}
+
+static void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
+						     struct irq_fwspec *fwspec,
+						     unsigned int parent_hwirq,
+						     unsigned int parent_type)
+{
+}
+
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+
 #if IS_ENABLED(CONFIG_GPIO_GENERIC)
 
 int bgpio_init(struct gpio_chip *gc, struct device *dev,
-- 
cgit v1.2.3


From 088880ddc0b20086b71bb87b805fb63ff07c35f2 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Fri, 2 Aug 2019 14:27:33 +0200
Subject: drm/etnaviv: implement softpin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With softpin we allow the userspace to take control over the GPU virtual
address space. The new capability is relected by a bump of the minor DRM
version. There are a few restrictions for userspace to take into
account:

1. The kernel reserves a bit of the address space to implement zero page
faulting and mapping of the kernel internal ring buffer. Userspace can
query the kernel for the first usable GPU VM address via
ETNAVIV_PARAM_SOFTPIN_START_ADDR.

2. We only allow softpin on GPUs, which implement proper process
separation via PPAS. If softpin is not available the softpin start
address will be set to ~0.

3. Softpin is all or nothing. A submit using softpin must not use any
address fixups via relocs.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Guido Günther <agx@sigxcpu.org>
---
 drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c     |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_drv.c        |  2 +-
 drivers/gpu/drm/etnaviv/etnaviv_drv.h        |  2 ++
 drivers/gpu/drm/etnaviv/etnaviv_gem.c        |  5 +++--
 drivers/gpu/drm/etnaviv/etnaviv_gem.h        |  1 +
 drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c | 26 +++++++++++++++++++++++++-
 drivers/gpu/drm/etnaviv/etnaviv_gpu.c        |  9 +++++++++
 include/uapi/drm/etnaviv_drm.h               | 10 +++++++++-
 8 files changed, 51 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
index f39430ba3593..9dc20d892c15 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -44,6 +44,7 @@ etnaviv_cmdbuf_suballoc_new(struct device *dev)
 	mutex_init(&suballoc->lock);
 	init_waitqueue_head(&suballoc->free_event);
 
+	BUILD_BUG_ON(ETNAVIV_SOFTPIN_START_ADDRESS < SUBALLOC_SIZE);
 	suballoc->vaddr = dma_alloc_wc(dev, SUBALLOC_SIZE,
 				       &suballoc->paddr, GFP_KERNEL);
 	if (!suballoc->vaddr) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 80f1edcbbea0..45851d510f62 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -529,7 +529,7 @@ static struct drm_driver etnaviv_drm_driver = {
 	.desc               = "etnaviv DRM",
 	.date               = "20151214",
 	.major              = 1,
-	.minor              = 2,
+	.minor              = 3,
 };
 
 /*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index a488cfdb6bbf..32cfa5a48d42 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -24,6 +24,8 @@ struct etnaviv_gem_object;
 struct etnaviv_gem_submit;
 struct etnaviv_iommu_global;
 
+#define ETNAVIV_SOFTPIN_START_ADDRESS	SZ_4M /* must be >= SUBALLOC_SIZE */
+
 struct etnaviv_file_private {
 	struct etnaviv_iommu_context	*mmu;
 	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 8c62c5e0a494..401ce4512b8d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -248,7 +248,8 @@ void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
 }
 
 struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
-	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context)
+	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context,
+	u64 va)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct etnaviv_vram_mapping *mapping;
@@ -309,7 +310,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 
 	ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj,
 				    mmu_context->global->memory_base,
-				    mapping, 0);
+				    mapping, va);
 	if (ret < 0) {
 		etnaviv_iommu_context_put(mmu_context);
 		kfree(mapping);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index 2d01ee1f65c1..f1164934ecdf 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -77,6 +77,7 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
 
 struct etnaviv_gem_submit_bo {
 	u32 flags;
+	u64 va;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_vram_mapping *mapping;
 	struct dma_fence *excl;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 63a1206492d2..75afce42921b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -72,6 +72,14 @@ static int submit_lookup_objects(struct etnaviv_gem_submit *submit,
 		}
 
 		submit->bos[i].flags = bo->flags;
+		if (submit->flags & ETNA_SUBMIT_SOFTPIN) {
+			if (bo->presumed < ETNAVIV_SOFTPIN_START_ADDRESS) {
+				DRM_ERROR("invalid softpin address\n");
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			submit->bos[i].va = bo->presumed;
+		}
 
 		/* normally use drm_gem_object_lookup(), but for bulk lookup
 		 * all under single table_lock just hit object_idr directly:
@@ -224,11 +232,17 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
 		struct etnaviv_vram_mapping *mapping;
 
 		mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
-						  submit->mmu_context);
+						  submit->mmu_context,
+						  submit->bos[i].va);
 		if (IS_ERR(mapping)) {
 			ret = PTR_ERR(mapping);
 			break;
 		}
+
+		if ((submit->flags & ETNA_SUBMIT_SOFTPIN) &&
+		     submit->bos[i].va != mapping->iova)
+			return -EINVAL;
+
 		atomic_inc(&etnaviv_obj->gpu_active);
 
 		submit->bos[i].flags |= BO_PINNED;
@@ -261,6 +275,10 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 	u32 *ptr = stream;
 	int ret;
 
+	/* Submits using softpin don't blend with relocs */
+	if ((submit->flags & ETNA_SUBMIT_SOFTPIN) && nr_relocs != 0)
+		return -EINVAL;
+
 	for (i = 0; i < nr_relocs; i++) {
 		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
 		struct etnaviv_gem_submit_bo *bo;
@@ -445,6 +463,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if ((args->flags & ETNA_SUBMIT_SOFTPIN) &&
+	    priv->mmu_global->version != ETNAVIV_IOMMU_V2) {
+		DRM_ERROR("softpin requested on incompatible MMU\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index d8a83ebfce47..d47d1a8e0219 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -42,6 +42,8 @@ static const struct platform_device_id gpu_ids[] = {
 
 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 {
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+
 	switch (param) {
 	case ETNAVIV_PARAM_GPU_MODEL:
 		*value = gpu->identity.model;
@@ -147,6 +149,13 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.varyings_count;
 		break;
 
+	case ETNAVIV_PARAM_SOFTPIN_START_ADDR:
+		if (priv->mmu_global->version == ETNAVIV_IOMMU_V2)
+			*value = ETNAVIV_SOFTPIN_START_ADDRESS;
+		else
+			*value = ~0ULL;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 0d5c49dc478c..09d0df8b71c5 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -73,6 +73,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
 #define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
+#define ETNAVIV_PARAM_SOFTPIN_START_ADDR            0x1b
 
 #define ETNA_MAX_PIPES 4
 
@@ -148,6 +149,11 @@ struct drm_etnaviv_gem_submit_reloc {
  * then patching the cmdstream for this entry is skipped.  This can
  * avoid kernel needing to map/access the cmdstream bo in the common
  * case.
+ * If the submit is a softpin submit (ETNA_SUBMIT_SOFTPIN) the 'presumed'
+ * field is interpreted as the fixed location to map the bo into the gpu
+ * virtual address space. If the kernel is unable to map the buffer at
+ * this location the submit will fail. This means userspace is responsible
+ * for the whole gpu virtual address management.
  */
 #define ETNA_SUBMIT_BO_READ             0x0001
 #define ETNA_SUBMIT_BO_WRITE            0x0002
@@ -177,9 +183,11 @@ struct drm_etnaviv_gem_submit_pmr {
 #define ETNA_SUBMIT_NO_IMPLICIT         0x0001
 #define ETNA_SUBMIT_FENCE_FD_IN         0x0002
 #define ETNA_SUBMIT_FENCE_FD_OUT        0x0004
+#define ETNA_SUBMIT_SOFTPIN             0x0008
 #define ETNA_SUBMIT_FLAGS		(ETNA_SUBMIT_NO_IMPLICIT | \
 					 ETNA_SUBMIT_FENCE_FD_IN | \
-					 ETNA_SUBMIT_FENCE_FD_OUT)
+					 ETNA_SUBMIT_FENCE_FD_OUT| \
+					 ETNA_SUBMIT_SOFTPIN)
 #define ETNA_PIPE_3D      0x00
 #define ETNA_PIPE_2D      0x01
 #define ETNA_PIPE_VG      0x02
-- 
cgit v1.2.3


From edfbcb321faf07ca970e4191abe061deeb7d3788 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 11 Aug 2019 10:05:16 +0200
Subject: usb: add a hcd_uses_dma helper

The USB buffer allocation code is the only place in the usb core (and in
fact the whole kernel) that uses is_device_dma_capable, while the URB
mapping code uses the uses_dma flag in struct usb_bus.  Switch the buffer
allocation to use the uses_dma flag used by the rest of the USB code,
and create a helper in hcd.h that checks this flag as well as the
CONFIG_HAS_DMA to simplify the caller a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20190811080520.21712-3-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/buffer.c | 10 +++-------
 drivers/usb/core/hcd.c    |  4 ++--
 drivers/usb/dwc2/hcd.c    |  2 +-
 include/linux/usb.h       |  2 +-
 include/linux/usb/hcd.h   |  3 +++
 5 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index 1a5b3dcae930..6cf22c27f2d2 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -66,9 +66,7 @@ int hcd_buffer_create(struct usb_hcd *hcd)
 	char		name[16];
 	int		i, size;
 
-	if (hcd->localmem_pool ||
-	    !IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(hcd->self.sysdev))
+	if (hcd->localmem_pool || !hcd_uses_dma(hcd))
 		return 0;
 
 	for (i = 0; i < HCD_BUFFER_POOLS; i++) {
@@ -129,8 +127,7 @@ void *hcd_buffer_alloc(
 		return gen_pool_dma_alloc(hcd->localmem_pool, size, dma);
 
 	/* some USB hosts just use PIO */
-	if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(bus->sysdev)) {
+	if (!hcd_uses_dma(hcd)) {
 		*dma = ~(dma_addr_t) 0;
 		return kmalloc(size, mem_flags);
 	}
@@ -160,8 +157,7 @@ void hcd_buffer_free(
 		return;
 	}
 
-	if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(bus->sysdev)) {
+	if (!hcd_uses_dma(hcd)) {
 		kfree(addr);
 		return;
 	}
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2ccbc2f83570..8592c0344fe8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1412,7 +1412,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
 		if (hcd->self.uses_pio_for_control)
 			return ret;
-		if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) {
+		if (hcd_uses_dma(hcd)) {
 			if (is_vmalloc_addr(urb->setup_packet)) {
 				WARN_ONCE(1, "setup packet is not dma capable\n");
 				return -EAGAIN;
@@ -1446,7 +1446,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 	if (urb->transfer_buffer_length != 0
 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) {
+		if (hcd_uses_dma(hcd)) {
 			if (urb->num_sgs) {
 				int n;
 
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index ee144ff8af5b..111787a137ee 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -4608,7 +4608,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 
 	buf = urb->transfer_buffer;
 
-	if (hcd->self.uses_dma) {
+	if (hcd_uses_dma(hcd)) {
 		if (!buf && (urb->transfer_dma & 3)) {
 			dev_err(hsotg->dev,
 				"%s: unaligned transfer with no transfer_buffer",
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 83d35d993e8c..e87826e23d59 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1457,7 +1457,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * field rather than determining a dma address themselves.
  *
  * Note that transfer_buffer must still be set if the controller
- * does not support DMA (as indicated by bus.uses_dma) and when talking
+ * does not support DMA (as indicated by hcd_uses_dma()) and when talking
  * to root hub. If you have to trasfer between highmem zone and the device
  * on such controller, create a bounce buffer or bail out with an error.
  * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index bab27ccc8ff5..a20e7815d814 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -422,6 +422,9 @@ static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd,
 	return hcd->high_prio_bh.completing_ep == ep;
 }
 
+#define hcd_uses_dma(hcd) \
+	(IS_ENABLED(CONFIG_HAS_DMA) && (hcd)->self.uses_dma)
+
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
 extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
 		int status);
-- 
cgit v1.2.3


From 72b745e3ad65deac94ea4eb83262c52ba3ffdb5b Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 13 Aug 2019 13:45:32 +0300
Subject: ASoC: core: Move pcm_mutex up to card level from snd_soc_pcm_runtime

The pcm_mutex is used to prevent concurrent execution of snd_pcm_ops
callbacks. This works fine most of the cases but it can not handle setups
when the same DAI is used by different rtd, for example:
pcm3168a have two DAIs: one for Playback and one for Capture.
If the codec is connected to a single CPU DAI we need to have two dai_link
to support both playback and capture.

In this case the snd_pcm_ops callbacks can be executed in parallel causing
unexpected races in DAI drivers.

By moving the pcm_mutex up to card level this can be solved
while - hopefully - not breaking other setups.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Tested-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190813104532.16669-1-peter.ujfalusi@ti.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h      |  6 ++++--
 sound/soc/soc-compress.c | 48 ++++++++++++++++++++++++------------------------
 sound/soc/soc-core.c     |  2 +-
 sound/soc/soc-pcm.c      | 36 ++++++++++++++++++------------------
 4 files changed, 47 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index b1fe5ebea257..5c841c2ee814 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -988,6 +988,10 @@ struct snd_soc_card {
 	struct mutex mutex;
 	struct mutex dapm_mutex;
 
+	/* Mutex for PCM operations */
+	struct mutex pcm_mutex;
+	enum snd_soc_pcm_subclass pcm_subclass;
+
 	spinlock_t dpcm_lock;
 
 	bool instantiated;
@@ -1116,8 +1120,6 @@ struct snd_soc_pcm_runtime {
 	struct device *dev;
 	struct snd_soc_card *card;
 	struct snd_soc_dai_link *dai_link;
-	struct mutex pcm_mutex;
-	enum snd_soc_pcm_subclass pcm_subclass;
 	struct snd_pcm_ops ops;
 
 	unsigned int params_select; /* currently selected param for dai link */
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 289211069a1e..9e54d8ae6d2c 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -80,7 +80,7 @@ static int soc_compr_open(struct snd_compr_stream *cstream)
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->startup) {
 		ret = cpu_dai->driver->cops->startup(cstream, cpu_dai);
@@ -108,7 +108,7 @@ static int soc_compr_open(struct snd_compr_stream *cstream)
 
 	snd_soc_runtime_activate(rtd, cstream->direction);
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 
 	return 0;
 
@@ -118,7 +118,7 @@ machine_err:
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->shutdown)
 		cpu_dai->driver->cops->shutdown(cstream, cpu_dai);
 out:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -224,7 +224,7 @@ static void close_delayed_work(struct work_struct *work)
 			container_of(work, struct snd_soc_pcm_runtime, delayed_work.work);
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	dev_dbg(rtd->dev,
 		"Compress ASoC: pop wq checking: %s status: %s waiting: %s\n",
@@ -239,7 +239,7 @@ static void close_delayed_work(struct work_struct *work)
 					  SND_SOC_DAPM_STREAM_STOP);
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 }
 
 static int soc_compr_free(struct snd_compr_stream *cstream)
@@ -249,7 +249,7 @@ static int soc_compr_free(struct snd_compr_stream *cstream)
 	struct snd_soc_dai *codec_dai = rtd->codec_dai;
 	int stream;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (cstream->direction == SND_COMPRESS_PLAYBACK)
 		stream = SNDRV_PCM_STREAM_PLAYBACK;
@@ -292,7 +292,7 @@ static int soc_compr_free(struct snd_compr_stream *cstream)
 					  SND_SOC_DAPM_STREAM_STOP);
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return 0;
 }
 
@@ -375,7 +375,7 @@ static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd)
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	ret = soc_compr_components_trigger(cstream, cmd);
 	if (ret < 0)
@@ -394,7 +394,7 @@ static int soc_compr_trigger(struct snd_compr_stream *cstream, int cmd)
 	}
 
 out:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -480,7 +480,7 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream,
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	/*
 	 * First we call set_params for the CPU DAI, then the component
@@ -514,14 +514,14 @@ static int soc_compr_set_params(struct snd_compr_stream *cstream,
 
 	/* cancel any delayed stream shutdown that is pending */
 	rtd->pop_wait = 0;
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 
 	cancel_delayed_work_sync(&rtd->delayed_work);
 
 	return 0;
 
 err:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -593,7 +593,7 @@ static int soc_compr_get_params(struct snd_compr_stream *cstream,
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->get_params) {
 		ret = cpu_dai->driver->cops->get_params(cstream, params, cpu_dai);
@@ -613,7 +613,7 @@ static int soc_compr_get_params(struct snd_compr_stream *cstream,
 	}
 
 err:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -625,7 +625,7 @@ static int soc_compr_get_caps(struct snd_compr_stream *cstream,
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -638,7 +638,7 @@ static int soc_compr_get_caps(struct snd_compr_stream *cstream,
 		break;
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -650,7 +650,7 @@ static int soc_compr_get_codec_caps(struct snd_compr_stream *cstream,
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -664,7 +664,7 @@ static int soc_compr_get_codec_caps(struct snd_compr_stream *cstream,
 		break;
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -676,7 +676,7 @@ static int soc_compr_ack(struct snd_compr_stream *cstream, size_t bytes)
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 	int ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->ack) {
 		ret = cpu_dai->driver->cops->ack(cstream, bytes, cpu_dai);
@@ -697,7 +697,7 @@ static int soc_compr_ack(struct snd_compr_stream *cstream, size_t bytes)
 	}
 
 err:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -710,7 +710,7 @@ static int soc_compr_pointer(struct snd_compr_stream *cstream,
 	int ret = 0;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (cpu_dai->driver->cops && cpu_dai->driver->cops->pointer)
 		cpu_dai->driver->cops->pointer(cstream, tstamp, cpu_dai);
@@ -726,7 +726,7 @@ static int soc_compr_pointer(struct snd_compr_stream *cstream,
 		break;
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -738,7 +738,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream,
 	struct snd_soc_rtdcom_list *rtdcom;
 	int ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -751,7 +751,7 @@ static int soc_compr_copy(struct snd_compr_stream *cstream,
 		break;
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index abe2f47cee6e..b3f820fb53e6 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1360,7 +1360,6 @@ static int soc_post_component_init(struct snd_soc_pcm_runtime *rtd,
 	rtd->dev->groups = soc_dev_attr_groups;
 	dev_set_name(rtd->dev, "%s", name);
 	dev_set_drvdata(rtd->dev, rtd);
-	mutex_init(&rtd->pcm_mutex);
 	INIT_LIST_HEAD(&rtd->dpcm[SNDRV_PCM_STREAM_PLAYBACK].be_clients);
 	INIT_LIST_HEAD(&rtd->dpcm[SNDRV_PCM_STREAM_CAPTURE].be_clients);
 	INIT_LIST_HEAD(&rtd->dpcm[SNDRV_PCM_STREAM_PLAYBACK].fe_clients);
@@ -2383,6 +2382,7 @@ int snd_soc_register_card(struct snd_soc_card *card)
 	card->instantiated = 0;
 	mutex_init(&card->mutex);
 	mutex_init(&card->dapm_mutex);
+	mutex_init(&card->pcm_mutex);
 	spin_lock_init(&card->dpcm_lock);
 
 	return snd_soc_bind_card(card);
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index da657c8179cc..e163dde5eab1 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -36,7 +36,7 @@
  * Increments the active count for all the DAIs and components attached to a PCM
  * runtime. Should typically be called when a stream is opened.
  *
- * Must be called with the rtd->pcm_mutex being held
+ * Must be called with the rtd->card->pcm_mutex being held
  */
 void snd_soc_runtime_activate(struct snd_soc_pcm_runtime *rtd, int stream)
 {
@@ -44,7 +44,7 @@ void snd_soc_runtime_activate(struct snd_soc_pcm_runtime *rtd, int stream)
 	struct snd_soc_dai *codec_dai;
 	int i;
 
-	lockdep_assert_held(&rtd->pcm_mutex);
+	lockdep_assert_held(&rtd->card->pcm_mutex);
 
 	if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		cpu_dai->playback_active++;
@@ -72,7 +72,7 @@ void snd_soc_runtime_activate(struct snd_soc_pcm_runtime *rtd, int stream)
  * Decrements the active count for all the DAIs and components attached to a PCM
  * runtime. Should typically be called when a stream is closed.
  *
- * Must be called with the rtd->pcm_mutex being held
+ * Must be called with the rtd->card->pcm_mutex being held
  */
 void snd_soc_runtime_deactivate(struct snd_soc_pcm_runtime *rtd, int stream)
 {
@@ -80,7 +80,7 @@ void snd_soc_runtime_deactivate(struct snd_soc_pcm_runtime *rtd, int stream)
 	struct snd_soc_dai *codec_dai;
 	int i;
 
-	lockdep_assert_held(&rtd->pcm_mutex);
+	lockdep_assert_held(&rtd->card->pcm_mutex);
 
 	if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		cpu_dai->playback_active--;
@@ -506,7 +506,7 @@ static int soc_pcm_open(struct snd_pcm_substream *substream)
 		pm_runtime_get_sync(component->dev);
 	}
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	/* startup the audio subsystem */
 	ret = snd_soc_dai_startup(cpu_dai, substream);
@@ -604,7 +604,7 @@ dynamic:
 
 	snd_soc_runtime_activate(rtd, substream->stream);
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return 0;
 
 config_err:
@@ -623,7 +623,7 @@ component_err:
 
 	snd_soc_dai_shutdown(cpu_dai, substream);
 out:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -653,7 +653,7 @@ static void close_delayed_work(struct work_struct *work)
 			container_of(work, struct snd_soc_pcm_runtime, delayed_work.work);
 	struct snd_soc_dai *codec_dai = rtd->codec_dais[0];
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	dev_dbg(rtd->dev, "ASoC: pop wq checking: %s status: %s waiting: %s\n",
 		 codec_dai->driver->playback.stream_name,
@@ -667,7 +667,7 @@ static void close_delayed_work(struct work_struct *work)
 					  SND_SOC_DAPM_STREAM_STOP);
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 }
 
 static void codec2codec_close_delayed_work(struct work_struct *work)
@@ -694,7 +694,7 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
 	struct snd_soc_dai *codec_dai;
 	int i;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	snd_soc_runtime_deactivate(rtd, substream->stream);
 
@@ -738,7 +738,7 @@ static int soc_pcm_close(struct snd_pcm_substream *substream)
 					  SND_SOC_DAPM_STREAM_STOP);
 	}
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 
 	for_each_rtdcom(rtd, rtdcom) {
 		component = rtdcom->component;
@@ -771,7 +771,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	struct snd_soc_dai *codec_dai;
 	int i, ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	if (rtd->dai_link->ops->prepare) {
 		ret = rtd->dai_link->ops->prepare(substream);
@@ -826,7 +826,7 @@ static int soc_pcm_prepare(struct snd_pcm_substream *substream)
 	snd_soc_dai_digital_mute(cpu_dai, 0, substream->stream);
 
 out:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -876,7 +876,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
 	struct snd_soc_dai *codec_dai;
 	int i, ret = 0;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 	if (rtd->dai_link->ops->hw_params) {
 		ret = rtd->dai_link->ops->hw_params(substream, params);
 		if (ret < 0) {
@@ -962,7 +962,7 @@ static int soc_pcm_hw_params(struct snd_pcm_substream *substream,
         if (ret)
 		goto component_err;
 out:
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 
 component_err:
@@ -986,7 +986,7 @@ codec_err:
 	if (rtd->dai_link->ops->hw_free)
 		rtd->dai_link->ops->hw_free(substream);
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return ret;
 }
 
@@ -1001,7 +1001,7 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 	bool playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
 	int i;
 
-	mutex_lock_nested(&rtd->pcm_mutex, rtd->pcm_subclass);
+	mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
 
 	/* clear the corresponding DAIs parameters when going to be inactive */
 	if (cpu_dai->active == 1) {
@@ -1043,7 +1043,7 @@ static int soc_pcm_hw_free(struct snd_pcm_substream *substream)
 
 	snd_soc_dai_hw_free(cpu_dai, substream);
 
-	mutex_unlock(&rtd->pcm_mutex);
+	mutex_unlock(&rtd->card->pcm_mutex);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 6d54e455689edcf1f9ef30761dd4fdfdc1cba33a Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:51 +0100
Subject: misc: xilinx_sdfec: Store driver config and state

Stores configuration based on parameters from the DT
node and values from the SD-FEC core plus reads
the default state from the SD-FEC core. To obtain
values from the core register read, write capabilities
have been added plus related register map details.

Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-2-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 305 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/misc/xilinx_sdfec.h | 138 ++++++++++++++++++
 2 files changed, 437 insertions(+), 6 deletions(-)
 create mode 100644 include/uapi/misc/xilinx_sdfec.h

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index f257d3812110..24d9f79fe073 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -20,11 +20,92 @@
 #include <linux/slab.h>
 #include <linux/clk.h>
 
+#include <uapi/misc/xilinx_sdfec.h>
+
 #define DEV_NAME_LEN 12
 
 static struct idr dev_idr;
 static struct mutex dev_idr_lock;
 
+/* Xilinx SDFEC Register Map */
+/* CODE_WRI_PROTECT Register */
+#define XSDFEC_CODE_WR_PROTECT_ADDR (0x4)
+
+/* ACTIVE Register */
+#define XSDFEC_ACTIVE_ADDR (0x8)
+#define XSDFEC_IS_ACTIVITY_SET (0x1)
+
+/* AXIS_WIDTH Register */
+#define XSDFEC_AXIS_WIDTH_ADDR (0xC)
+#define XSDFEC_AXIS_DOUT_WORDS_LSB (5)
+#define XSDFEC_AXIS_DOUT_WIDTH_LSB (3)
+#define XSDFEC_AXIS_DIN_WORDS_LSB (2)
+#define XSDFEC_AXIS_DIN_WIDTH_LSB (0)
+
+/* AXIS_ENABLE Register */
+#define XSDFEC_AXIS_ENABLE_ADDR (0x10)
+#define XSDFEC_AXIS_OUT_ENABLE_MASK (0x38)
+#define XSDFEC_AXIS_IN_ENABLE_MASK (0x7)
+#define XSDFEC_AXIS_ENABLE_MASK                                                \
+	(XSDFEC_AXIS_OUT_ENABLE_MASK | XSDFEC_AXIS_IN_ENABLE_MASK)
+
+/* FEC_CODE Register */
+#define XSDFEC_FEC_CODE_ADDR (0x14)
+
+/* ORDER Register Map */
+#define XSDFEC_ORDER_ADDR (0x18)
+
+/* Interrupt Status Register */
+#define XSDFEC_ISR_ADDR (0x1C)
+/* Interrupt Status Register Bit Mask */
+#define XSDFEC_ISR_MASK (0x3F)
+
+/* Write Only - Interrupt Enable Register */
+#define XSDFEC_IER_ADDR (0x20)
+/* Write Only - Interrupt Disable Register */
+#define XSDFEC_IDR_ADDR (0x24)
+/* Read Only - Interrupt Mask Register */
+#define XSDFEC_IMR_ADDR (0x28)
+
+/* ECC Interrupt Status Register */
+#define XSDFEC_ECC_ISR_ADDR (0x2C)
+/* Single Bit Errors */
+#define XSDFEC_ECC_ISR_SBE_MASK (0x7FF)
+/* PL Initialize Single Bit Errors */
+#define XSDFEC_PL_INIT_ECC_ISR_SBE_MASK (0x3C00000)
+/* Multi Bit Errors */
+#define XSDFEC_ECC_ISR_MBE_MASK (0x3FF800)
+/* PL Initialize Multi Bit Errors */
+#define XSDFEC_PL_INIT_ECC_ISR_MBE_MASK (0x3C000000)
+/* Multi Bit Error to Event Shift */
+#define XSDFEC_ECC_ISR_MBE_TO_EVENT_SHIFT (11)
+/* PL Initialize Multi Bit Error to Event Shift */
+#define XSDFEC_PL_INIT_ECC_ISR_MBE_TO_EVENT_SHIFT (4)
+/* ECC Interrupt Status Bit Mask */
+#define XSDFEC_ECC_ISR_MASK (XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_ECC_ISR_MBE_MASK)
+/* ECC Interrupt Status PL Initialize Bit Mask */
+#define XSDFEC_PL_INIT_ECC_ISR_MASK                                            \
+	(XSDFEC_PL_INIT_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+/* ECC Interrupt Status All Bit Mask */
+#define XSDFEC_ALL_ECC_ISR_MASK                                                \
+	(XSDFEC_ECC_ISR_MASK | XSDFEC_PL_INIT_ECC_ISR_MASK)
+/* ECC Interrupt Status Single Bit Errors Mask */
+#define XSDFEC_ALL_ECC_ISR_SBE_MASK                                            \
+	(XSDFEC_ECC_ISR_SBE_MASK | XSDFEC_PL_INIT_ECC_ISR_SBE_MASK)
+/* ECC Interrupt Status Multi Bit Errors Mask */
+#define XSDFEC_ALL_ECC_ISR_MBE_MASK                                            \
+	(XSDFEC_ECC_ISR_MBE_MASK | XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+
+/* Write Only - ECC Interrupt Enable Register */
+#define XSDFEC_ECC_IER_ADDR (0x30)
+/* Write Only - ECC Interrupt Disable Register */
+#define XSDFEC_ECC_IDR_ADDR (0x34)
+/* Read Only - ECC Interrupt Mask Register */
+#define XSDFEC_ECC_IMR_ADDR (0x38)
+
+/* BYPASS Register */
+#define XSDFEC_BYPASS_ADDR (0x3C)
+
 /**
  * struct xsdfec_clks - For managing SD-FEC clocks
  * @core_clk: Main processing clock for core
@@ -49,31 +130,237 @@ struct xsdfec_clks {
 
 /**
  * struct xsdfec_dev - Driver data for SDFEC
- * @regs: device physical base address
- * @dev: pointer to device struct
  * @miscdev: Misc device handle
- * @error_data_lock: Error counter and states spinlock
  * @clks: Clocks managed by the SDFEC driver
+ * @regs: device physical base address
+ * @dev: pointer to device struct
+ * @config: Configuration of the SDFEC device
  * @dev_name: Device name
+ * @state: State of the SDFEC device
+ * @error_data_lock: Error counter and states spinlock
  * @dev_id: Device ID
  *
  * This structure contains necessary state for SDFEC driver to operate
  */
 struct xsdfec_dev {
+	struct miscdevice miscdev;
+	struct xsdfec_clks clks;
 	void __iomem *regs;
 	struct device *dev;
-	struct miscdevice miscdev;
+	struct xsdfec_config config;
+	char dev_name[DEV_NAME_LEN];
+	enum xsdfec_state state;
 	/* Spinlock to protect state_updated and stats_updated */
 	spinlock_t error_data_lock;
-	struct xsdfec_clks clks;
-	char dev_name[DEV_NAME_LEN];
 	int dev_id;
 };
 
+static inline void xsdfec_regwrite(struct xsdfec_dev *xsdfec, u32 addr,
+				   u32 value)
+{
+	dev_dbg(xsdfec->dev, "Writing 0x%x to offset 0x%x", value, addr);
+	iowrite32(value, xsdfec->regs + addr);
+}
+
+static inline u32 xsdfec_regread(struct xsdfec_dev *xsdfec, u32 addr)
+{
+	u32 rval;
+
+	rval = ioread32(xsdfec->regs + addr);
+	dev_dbg(xsdfec->dev, "Read value = 0x%x from offset 0x%x", rval, addr);
+	return rval;
+}
+
+static void update_bool_config_from_reg(struct xsdfec_dev *xsdfec,
+					u32 reg_offset, u32 bit_num,
+					char *config_value)
+{
+	u32 reg_val;
+	u32 bit_mask = 1 << bit_num;
+
+	reg_val = xsdfec_regread(xsdfec, reg_offset);
+	*config_value = (reg_val & bit_mask) > 0;
+}
+
+static void update_config_from_hw(struct xsdfec_dev *xsdfec)
+{
+	u32 reg_value;
+	bool sdfec_started;
+
+	/* Update the Order */
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ORDER_ADDR);
+	xsdfec->config.order = reg_value;
+
+	update_bool_config_from_reg(xsdfec, XSDFEC_BYPASS_ADDR,
+				    0, /* Bit Number, maybe change to mask */
+				    &xsdfec->config.bypass);
+
+	update_bool_config_from_reg(xsdfec, XSDFEC_CODE_WR_PROTECT_ADDR,
+				    0, /* Bit Number */
+				    &xsdfec->config.code_wr_protect);
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+	xsdfec->config.irq.enable_isr = (reg_value & XSDFEC_ISR_MASK) > 0;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+	xsdfec->config.irq.enable_ecc_isr =
+		(reg_value & XSDFEC_ECC_ISR_MASK) > 0;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR);
+	sdfec_started = (reg_value & XSDFEC_AXIS_IN_ENABLE_MASK) > 0;
+	if (sdfec_started)
+		xsdfec->state = XSDFEC_STARTED;
+	else
+		xsdfec->state = XSDFEC_STOPPED;
+}
+
+static u32
+xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg)
+{
+	u32 axis_width_field = 0;
+
+	switch (axis_width_cfg) {
+	case XSDFEC_1x128b:
+		axis_width_field = 0;
+		break;
+	case XSDFEC_2x128b:
+		axis_width_field = 1;
+		break;
+	case XSDFEC_4x128b:
+		axis_width_field = 2;
+		break;
+	}
+
+	return axis_width_field;
+}
+
+static u32 xsdfec_translate_axis_words_cfg_val(enum xsdfec_axis_word_include
+	axis_word_inc_cfg)
+{
+	u32 axis_words_field = 0;
+
+	if (axis_word_inc_cfg == XSDFEC_FIXED_VALUE ||
+	    axis_word_inc_cfg == XSDFEC_IN_BLOCK)
+		axis_words_field = 0;
+	else if (axis_word_inc_cfg == XSDFEC_PER_AXI_TRANSACTION)
+		axis_words_field = 1;
+
+	return axis_words_field;
+}
+
+static int xsdfec_cfg_axi_streams(struct xsdfec_dev *xsdfec)
+{
+	u32 reg_value;
+	u32 dout_words_field;
+	u32 dout_width_field;
+	u32 din_words_field;
+	u32 din_width_field;
+	struct xsdfec_config *config = &xsdfec->config;
+
+	/* translate config info to register values */
+	dout_words_field =
+		xsdfec_translate_axis_words_cfg_val(config->dout_word_include);
+	dout_width_field =
+		xsdfec_translate_axis_width_cfg_val(config->dout_width);
+	din_words_field =
+		xsdfec_translate_axis_words_cfg_val(config->din_word_include);
+	din_width_field =
+		xsdfec_translate_axis_width_cfg_val(config->din_width);
+
+	reg_value = dout_words_field << XSDFEC_AXIS_DOUT_WORDS_LSB;
+	reg_value |= dout_width_field << XSDFEC_AXIS_DOUT_WIDTH_LSB;
+	reg_value |= din_words_field << XSDFEC_AXIS_DIN_WORDS_LSB;
+	reg_value |= din_width_field << XSDFEC_AXIS_DIN_WIDTH_LSB;
+
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_WIDTH_ADDR, reg_value);
+
+	return 0;
+}
+
 static const struct file_operations xsdfec_fops = {
 	.owner = THIS_MODULE,
 };
 
+static int xsdfec_parse_of(struct xsdfec_dev *xsdfec)
+{
+	struct device *dev = xsdfec->dev;
+	struct device_node *node = dev->of_node;
+	int rval;
+	const char *fec_code;
+	u32 din_width;
+	u32 din_word_include;
+	u32 dout_width;
+	u32 dout_word_include;
+
+	rval = of_property_read_string(node, "xlnx,sdfec-code", &fec_code);
+	if (rval < 0)
+		return rval;
+
+	if (!strcasecmp(fec_code, "ldpc"))
+		xsdfec->config.code = XSDFEC_LDPC_CODE;
+	else if (!strcasecmp(fec_code, "turbo"))
+		xsdfec->config.code = XSDFEC_TURBO_CODE;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-din-words",
+				    &din_word_include);
+	if (rval < 0)
+		return rval;
+
+	if (din_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX)
+		xsdfec->config.din_word_include = din_word_include;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-din-width", &din_width);
+	if (rval < 0)
+		return rval;
+
+	switch (din_width) {
+	/* Fall through and set for valid values */
+	case XSDFEC_1x128b:
+	case XSDFEC_2x128b:
+	case XSDFEC_4x128b:
+		xsdfec->config.din_width = din_width;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-dout-words",
+				    &dout_word_include);
+	if (rval < 0)
+		return rval;
+
+	if (dout_word_include < XSDFEC_AXIS_WORDS_INCLUDE_MAX)
+		xsdfec->config.dout_word_include = dout_word_include;
+	else
+		return -EINVAL;
+
+	rval = of_property_read_u32(node, "xlnx,sdfec-dout-width", &dout_width);
+	if (rval < 0)
+		return rval;
+
+	switch (dout_width) {
+	/* Fall through and set for valid values */
+	case XSDFEC_1x128b:
+	case XSDFEC_2x128b:
+	case XSDFEC_4x128b:
+		xsdfec->config.dout_width = dout_width;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Write LDPC to CODE Register */
+	xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code);
+
+	xsdfec_cfg_axi_streams(xsdfec);
+
+	return 0;
+}
+
 static int xsdfec_clk_init(struct platform_device *pdev,
 			   struct xsdfec_clks *clks)
 {
@@ -260,6 +547,12 @@ static int xsdfec_probe(struct platform_device *pdev)
 		goto err_xsdfec_dev;
 	}
 
+	err = xsdfec_parse_of(xsdfec);
+	if (err < 0)
+		goto err_xsdfec_dev;
+
+	update_config_from_hw(xsdfec);
+
 	/* Save driver private data */
 	platform_set_drvdata(pdev, xsdfec);
 
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
new file mode 100644
index 000000000000..330ea25ddfa1
--- /dev/null
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Xilinx SD-FEC
+ *
+ * Copyright (C) 2019 Xilinx, Inc.
+ *
+ * Description:
+ * This driver is developed for SDFEC16 IP. It provides a char device
+ * in sysfs and supports file operations like open(), close() and ioctl().
+ */
+#ifndef __XILINX_SDFEC_H__
+#define __XILINX_SDFEC_H__
+
+#include <linux/types.h>
+
+/**
+ * enum xsdfec_code - Code Type.
+ * @XSDFEC_TURBO_CODE: Driver is configured for Turbo mode.
+ * @XSDFEC_LDPC_CODE: Driver is configured for LDPC mode.
+ *
+ * This enum is used to indicate the mode of the driver. The mode is determined
+ * by checking which codes are set in the driver. Note that the mode cannot be
+ * changed by the driver.
+ */
+enum xsdfec_code {
+	XSDFEC_TURBO_CODE = 0,
+	XSDFEC_LDPC_CODE,
+};
+
+/**
+ * enum xsdfec_order - Order
+ * @XSDFEC_MAINTAIN_ORDER: Maintain order execution of blocks.
+ * @XSDFEC_OUT_OF_ORDER: Out-of-order execution of blocks.
+ *
+ * This enum is used to indicate whether the order of blocks can change from
+ * input to output.
+ */
+enum xsdfec_order {
+	XSDFEC_MAINTAIN_ORDER = 0,
+	XSDFEC_OUT_OF_ORDER,
+};
+
+/**
+ * enum xsdfec_state - State.
+ * @XSDFEC_INIT: Driver is initialized.
+ * @XSDFEC_STARTED: Driver is started.
+ * @XSDFEC_STOPPED: Driver is stopped.
+ * @XSDFEC_NEEDS_RESET: Driver needs to be reset.
+ * @XSDFEC_PL_RECONFIGURE: Programmable Logic needs to be recofigured.
+ *
+ * This enum is used to indicate the state of the driver.
+ */
+enum xsdfec_state {
+	XSDFEC_INIT = 0,
+	XSDFEC_STARTED,
+	XSDFEC_STOPPED,
+	XSDFEC_NEEDS_RESET,
+	XSDFEC_PL_RECONFIGURE,
+};
+
+/**
+ * enum xsdfec_axis_width - AXIS_WIDTH.DIN Setting for 128-bit width.
+ * @XSDFEC_1x128b: DIN data input stream consists of a 128-bit lane
+ * @XSDFEC_2x128b: DIN data input stream consists of two 128-bit lanes
+ * @XSDFEC_4x128b: DIN data input stream consists of four 128-bit lanes
+ *
+ * This enum is used to indicate the AXIS_WIDTH.DIN setting for 128-bit width.
+ * The number of lanes of the DIN data input stream depends upon the
+ * AXIS_WIDTH.DIN parameter.
+ */
+enum xsdfec_axis_width {
+	XSDFEC_1x128b = 1,
+	XSDFEC_2x128b = 2,
+	XSDFEC_4x128b = 4,
+};
+
+/**
+ * enum xsdfec_axis_word_include - Words Configuration.
+ * @XSDFEC_FIXED_VALUE: Fixed, the DIN_WORDS AXI4-Stream interface is removed
+ *			from the IP instance and is driven with the specified
+ *			number of words.
+ * @XSDFEC_IN_BLOCK: In Block, configures the IP instance to expect a single
+ *		     DIN_WORDS value per input code block. The DIN_WORDS
+ *		     interface is present.
+ * @XSDFEC_PER_AXI_TRANSACTION: Per Transaction, configures the IP instance to
+ * expect one DIN_WORDS value per input transaction on the DIN interface. The
+ * DIN_WORDS interface is present.
+ * @XSDFEC_AXIS_WORDS_INCLUDE_MAX: Used to indicate out of bound Words
+ *				   Configurations.
+ *
+ * This enum is used to specify the DIN_WORDS configuration.
+ */
+enum xsdfec_axis_word_include {
+	XSDFEC_FIXED_VALUE = 0,
+	XSDFEC_IN_BLOCK,
+	XSDFEC_PER_AXI_TRANSACTION,
+	XSDFEC_AXIS_WORDS_INCLUDE_MAX,
+};
+
+/**
+ * struct xsdfec_irq - Enabling or Disabling Interrupts.
+ * @enable_isr: If true enables the ISR
+ * @enable_ecc_isr: If true enables the ECC ISR
+ */
+struct xsdfec_irq {
+	__s8 enable_isr;
+	__s8 enable_ecc_isr;
+};
+
+/**
+ * struct xsdfec_config - Configuration of SD-FEC core.
+ * @code: The codes being used by the SD-FEC instance
+ * @order: Order of Operation
+ * @din_width: Width of the DIN AXI4-Stream
+ * @din_word_include: How DIN_WORDS are inputted
+ * @dout_width: Width of the DOUT AXI4-Stream
+ * @dout_word_include: HOW DOUT_WORDS are outputted
+ * @irq: Enabling or disabling interrupts
+ * @bypass: Is the core being bypassed
+ * @code_wr_protect: Is write protection of LDPC codes enabled
+ */
+struct xsdfec_config {
+	__u32 code;
+	__u32 order;
+	__u32 din_width;
+	__u32 din_word_include;
+	__u32 dout_width;
+	__u32 dout_word_include;
+	struct xsdfec_irq irq;
+	__s8 bypass;
+	__s8 code_wr_protect;
+};
+
+/*
+ * XSDFEC IOCTL List
+ */
+#define XSDFEC_MAGIC 'f'
+#endif /* __XILINX_SDFEC_H__ */
-- 
cgit v1.2.3


From 6f86ed820178ba35f87712548e6cd43b91608a6c Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:52 +0100
Subject: misc: xilinx_sdfec: Add ability to configure turbo

Add the capability to configure and retrieve turbo mode
via the ioctls XSDFEC_SET_TURBO and XSDFEC_GET_TURBO.
Add char device interface per DT node present and support
file operations:
- open(),
- close(),
- unlocked_ioctl(),
- compat_ioctl().

Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-3-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 110 +++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/xilinx_sdfec.h |  67 ++++++++++++++++++++++++
 2 files changed, 177 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index 24d9f79fe073..d3dba7e6f896 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -19,6 +19,7 @@
 #include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
+#include <linux/compat.h>
 
 #include <uapi/misc/xilinx_sdfec.h>
 
@@ -106,6 +107,12 @@ static struct mutex dev_idr_lock;
 /* BYPASS Register */
 #define XSDFEC_BYPASS_ADDR (0x3C)
 
+/* Turbo Code Register */
+#define XSDFEC_TURBO_ADDR (0x100)
+#define XSDFEC_TURBO_SCALE_MASK (0xFFF)
+#define XSDFEC_TURBO_SCALE_BIT_POS (8)
+#define XSDFEC_TURBO_SCALE_MAX (15)
+
 /**
  * struct xsdfec_clks - For managing SD-FEC clocks
  * @core_clk: Main processing clock for core
@@ -214,6 +221,55 @@ static void update_config_from_hw(struct xsdfec_dev *xsdfec)
 		xsdfec->state = XSDFEC_STOPPED;
 }
 
+static int xsdfec_set_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_turbo turbo;
+	int err;
+	u32 turbo_write;
+
+	err = copy_from_user(&turbo, arg, sizeof(turbo));
+	if (err)
+		return -EFAULT;
+
+	if (turbo.alg >= XSDFEC_TURBO_ALG_MAX)
+		return -EINVAL;
+
+	if (turbo.scale > XSDFEC_TURBO_SCALE_MAX)
+		return -EINVAL;
+
+	/* Check to see what device tree says about the FEC codes */
+	if (xsdfec->config.code == XSDFEC_LDPC_CODE)
+		return -EIO;
+
+	turbo_write = ((turbo.scale & XSDFEC_TURBO_SCALE_MASK)
+		       << XSDFEC_TURBO_SCALE_BIT_POS) |
+		      turbo.alg;
+	xsdfec_regwrite(xsdfec, XSDFEC_TURBO_ADDR, turbo_write);
+	return err;
+}
+
+static int xsdfec_get_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	u32 reg_value;
+	struct xsdfec_turbo turbo_params;
+	int err;
+
+	if (xsdfec->config.code == XSDFEC_LDPC_CODE)
+		return -EIO;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_TURBO_ADDR);
+
+	turbo_params.scale = (reg_value & XSDFEC_TURBO_SCALE_MASK) >>
+			     XSDFEC_TURBO_SCALE_BIT_POS;
+	turbo_params.alg = reg_value & 0x1;
+
+	err = copy_to_user(arg, &turbo_params, sizeof(turbo_params));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
 static u32
 xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg)
 {
@@ -277,8 +333,62 @@ static int xsdfec_cfg_axi_streams(struct xsdfec_dev *xsdfec)
 	return 0;
 }
 
+static int xsdfec_dev_open(struct inode *iptr, struct file *fptr)
+{
+	return 0;
+}
+
+static int xsdfec_dev_release(struct inode *iptr, struct file *fptr)
+{
+	return 0;
+}
+
+static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
+			     unsigned long data)
+{
+	struct xsdfec_dev *xsdfec;
+	void __user *arg = NULL;
+	int rval = -EINVAL;
+
+	xsdfec = container_of(fptr->private_data, struct xsdfec_dev, miscdev);
+
+	/* check if ioctl argument is present and valid */
+	if (_IOC_DIR(cmd) != _IOC_NONE) {
+		arg = (void __user *)data;
+		if (!arg)
+			return rval;
+	}
+
+	switch (cmd) {
+	case XSDFEC_SET_TURBO:
+		rval = xsdfec_set_turbo(xsdfec, arg);
+		break;
+	case XSDFEC_GET_TURBO:
+		rval = xsdfec_get_turbo(xsdfec, arg);
+		break;
+	default:
+		/* Should not get here */
+		break;
+	}
+	return rval;
+}
+
+#ifdef CONFIG_COMPAT
+static long xsdfec_dev_compat_ioctl(struct file *file, unsigned int cmd,
+				    unsigned long data)
+{
+	return xsdfec_dev_ioctl(file, cmd, (unsigned long)compat_ptr(data));
+}
+#endif
+
 static const struct file_operations xsdfec_fops = {
 	.owner = THIS_MODULE,
+	.open = xsdfec_dev_open,
+	.release = xsdfec_dev_release,
+	.unlocked_ioctl = xsdfec_dev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = xsdfec_dev_compat_ioctl,
+#endif
 };
 
 static int xsdfec_parse_of(struct xsdfec_dev *xsdfec)
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
index 330ea25ddfa1..34f32c37261d 100644
--- a/include/uapi/misc/xilinx_sdfec.h
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -40,6 +40,22 @@ enum xsdfec_order {
 	XSDFEC_OUT_OF_ORDER,
 };
 
+/**
+ * enum xsdfec_turbo_alg - Turbo Algorithm Type.
+ * @XSDFEC_MAX_SCALE: Max Log-Map algorithm with extrinsic scaling. When
+ *		      scaling is set to this is equivalent to the Max Log-Map
+ *		      algorithm.
+ * @XSDFEC_MAX_STAR: Log-Map algorithm.
+ * @XSDFEC_TURBO_ALG_MAX: Used to indicate out of bound Turbo algorithms.
+ *
+ * This enum specifies which Turbo Decode algorithm is in use.
+ */
+enum xsdfec_turbo_alg {
+	XSDFEC_MAX_SCALE = 0,
+	XSDFEC_MAX_STAR,
+	XSDFEC_TURBO_ALG_MAX,
+};
+
 /**
  * enum xsdfec_state - State.
  * @XSDFEC_INIT: Driver is initialized.
@@ -97,6 +113,29 @@ enum xsdfec_axis_word_include {
 	XSDFEC_AXIS_WORDS_INCLUDE_MAX,
 };
 
+/**
+ * struct xsdfec_turbo - User data for Turbo codes.
+ * @alg: Specifies which Turbo decode algorithm to use
+ * @scale: Specifies the extrinsic scaling to apply when the Max Scale algorithm
+ *	   has been selected
+ *
+ * Turbo code structure to communicate parameters to XSDFEC driver.
+ */
+struct xsdfec_turbo {
+	__u32 alg;
+	__u8 scale;
+};
+
+/**
+ * struct xsdfec_status - Status of SD-FEC core.
+ * @state: State of the SD-FEC core
+ * @activity: Describes if the SD-FEC instance is Active
+ */
+struct xsdfec_status {
+	__u32 state;
+	__s8 activity;
+};
+
 /**
  * struct xsdfec_irq - Enabling or Disabling Interrupts.
  * @enable_isr: If true enables the ISR
@@ -135,4 +174,32 @@ struct xsdfec_config {
  * XSDFEC IOCTL List
  */
 #define XSDFEC_MAGIC 'f'
+/**
+ * DOC: XSDFEC_SET_TURBO
+ * @Parameters
+ *
+ * @struct xsdfec_turbo *
+ *	Pointer to the &struct xsdfec_turbo that contains the Turbo decode
+ *	settings for the SD-FEC core
+ *
+ * @Description
+ *
+ * ioctl that sets the SD-FEC Turbo parameter values
+ *
+ * This can only be used when the driver is in the XSDFEC_STOPPED state
+ */
+#define XSDFEC_SET_TURBO _IOW(XSDFEC_MAGIC, 4, struct xsdfec_turbo)
+/**
+ * DOC: XSDFEC_GET_TURBO
+ * @Parameters
+ *
+ * @struct xsdfec_turbo *
+ *	Pointer to the &struct xsdfec_turbo that contains the current Turbo
+ *	decode settings of the SD-FEC Block
+ *
+ * @Description
+ *
+ * ioctl that returns SD-FEC turbo param values
+ */
+#define XSDFEC_GET_TURBO _IOR(XSDFEC_MAGIC, 7, struct xsdfec_turbo)
 #endif /* __XILINX_SDFEC_H__ */
-- 
cgit v1.2.3


From 20ec628e8007ec75c2f884e00004f39eab6289b5 Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:53 +0100
Subject: misc: xilinx_sdfec: Add ability to configure LDPC

Add the capability to configure LDPC mode via the ioctl
XSDFEC_ADD_LDPC_CODE_PARAMS.

Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-4-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 324 +++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/xilinx_sdfec.h |  98 ++++++++++++
 2 files changed, 422 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index d3dba7e6f896..9be4de07eee2 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/compat.h>
+#include <linux/highmem.h>
 
 #include <uapi/misc/xilinx_sdfec.h>
 
@@ -113,6 +114,57 @@ static struct mutex dev_idr_lock;
 #define XSDFEC_TURBO_SCALE_BIT_POS (8)
 #define XSDFEC_TURBO_SCALE_MAX (15)
 
+/* REG0 Register */
+#define XSDFEC_LDPC_CODE_REG0_ADDR_BASE (0x2000)
+#define XSDFEC_LDPC_CODE_REG0_ADDR_HIGH (0x27F0)
+#define XSDFEC_REG0_N_MIN (4)
+#define XSDFEC_REG0_N_MAX (32768)
+#define XSDFEC_REG0_N_MUL_P (256)
+#define XSDFEC_REG0_N_LSB (0)
+#define XSDFEC_REG0_K_MIN (2)
+#define XSDFEC_REG0_K_MAX (32766)
+#define XSDFEC_REG0_K_MUL_P (256)
+#define XSDFEC_REG0_K_LSB (16)
+
+/* REG1 Register */
+#define XSDFEC_LDPC_CODE_REG1_ADDR_BASE (0x2004)
+#define XSDFEC_LDPC_CODE_REG1_ADDR_HIGH (0x27f4)
+#define XSDFEC_REG1_PSIZE_MIN (2)
+#define XSDFEC_REG1_PSIZE_MAX (512)
+#define XSDFEC_REG1_NO_PACKING_MASK (0x400)
+#define XSDFEC_REG1_NO_PACKING_LSB (10)
+#define XSDFEC_REG1_NM_MASK (0xFF800)
+#define XSDFEC_REG1_NM_LSB (11)
+#define XSDFEC_REG1_BYPASS_MASK (0x100000)
+
+/* REG2 Register */
+#define XSDFEC_LDPC_CODE_REG2_ADDR_BASE (0x2008)
+#define XSDFEC_LDPC_CODE_REG2_ADDR_HIGH (0x27f8)
+#define XSDFEC_REG2_NLAYERS_MIN (1)
+#define XSDFEC_REG2_NLAYERS_MAX (256)
+#define XSDFEC_REG2_NNMQC_MASK (0xFFE00)
+#define XSDFEC_REG2_NMQC_LSB (9)
+#define XSDFEC_REG2_NORM_TYPE_MASK (0x100000)
+#define XSDFEC_REG2_NORM_TYPE_LSB (20)
+#define XSDFEC_REG2_SPECIAL_QC_MASK (0x200000)
+#define XSDFEC_REG2_SPEICAL_QC_LSB (21)
+#define XSDFEC_REG2_NO_FINAL_PARITY_MASK (0x400000)
+#define XSDFEC_REG2_NO_FINAL_PARITY_LSB (22)
+#define XSDFEC_REG2_MAX_SCHEDULE_MASK (0x1800000)
+#define XSDFEC_REG2_MAX_SCHEDULE_LSB (23)
+
+/* REG3 Register */
+#define XSDFEC_LDPC_CODE_REG3_ADDR_BASE (0x200C)
+#define XSDFEC_LDPC_CODE_REG3_ADDR_HIGH (0x27FC)
+#define XSDFEC_REG3_LA_OFF_LSB (8)
+#define XSDFEC_REG3_QC_OFF_LSB (16)
+
+#define XSDFEC_LDPC_REG_JUMP (0x10)
+#define XSDFEC_REG_WIDTH_JUMP (4)
+
+/* The maximum number of pinned pages */
+#define MAX_NUM_PAGES ((XSDFEC_QC_TABLE_DEPTH / PAGE_SIZE) + 1)
+
 /**
  * struct xsdfec_clks - For managing SD-FEC clocks
  * @core_clk: Main processing clock for core
@@ -270,6 +322,275 @@ static int xsdfec_get_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
 	return err;
 }
 
+static int xsdfec_reg0_write(struct xsdfec_dev *xsdfec, u32 n, u32 k, u32 psize,
+			     u32 offset)
+{
+	u32 wdata;
+
+	if (n < XSDFEC_REG0_N_MIN || n > XSDFEC_REG0_N_MAX ||
+	    (n > XSDFEC_REG0_N_MUL_P * psize) || n <= k || ((n % psize) != 0)) {
+		dev_dbg(xsdfec->dev, "N value is not in range");
+		return -EINVAL;
+	}
+	n <<= XSDFEC_REG0_N_LSB;
+
+	if (k < XSDFEC_REG0_K_MIN || k > XSDFEC_REG0_K_MAX ||
+	    (k > XSDFEC_REG0_K_MUL_P * psize) || ((k % psize) != 0)) {
+		dev_dbg(xsdfec->dev, "K value is not in range");
+		return -EINVAL;
+	}
+	k = k << XSDFEC_REG0_K_LSB;
+	wdata = k | n;
+
+	if (XSDFEC_LDPC_CODE_REG0_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG0_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg0 space 0x%x",
+			XSDFEC_LDPC_CODE_REG0_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG0_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg1_write(struct xsdfec_dev *xsdfec, u32 psize,
+			     u32 no_packing, u32 nm, u32 offset)
+{
+	u32 wdata;
+
+	if (psize < XSDFEC_REG1_PSIZE_MIN || psize > XSDFEC_REG1_PSIZE_MAX) {
+		dev_dbg(xsdfec->dev, "Psize is not in range");
+		return -EINVAL;
+	}
+
+	if (no_packing != 0 && no_packing != 1)
+		dev_dbg(xsdfec->dev, "No-packing bit register invalid");
+	no_packing = ((no_packing << XSDFEC_REG1_NO_PACKING_LSB) &
+		      XSDFEC_REG1_NO_PACKING_MASK);
+
+	if (nm & ~(XSDFEC_REG1_NM_MASK >> XSDFEC_REG1_NM_LSB))
+		dev_dbg(xsdfec->dev, "NM is beyond 10 bits");
+	nm = (nm << XSDFEC_REG1_NM_LSB) & XSDFEC_REG1_NM_MASK;
+
+	wdata = nm | no_packing | psize;
+	if (XSDFEC_LDPC_CODE_REG1_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG1_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg1 space 0x%x",
+			XSDFEC_LDPC_CODE_REG1_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG1_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg2_write(struct xsdfec_dev *xsdfec, u32 nlayers, u32 nmqc,
+			     u32 norm_type, u32 special_qc, u32 no_final_parity,
+			     u32 max_schedule, u32 offset)
+{
+	u32 wdata;
+
+	if (nlayers < XSDFEC_REG2_NLAYERS_MIN ||
+	    nlayers > XSDFEC_REG2_NLAYERS_MAX) {
+		dev_dbg(xsdfec->dev, "Nlayers is not in range");
+		return -EINVAL;
+	}
+
+	if (nmqc & ~(XSDFEC_REG2_NNMQC_MASK >> XSDFEC_REG2_NMQC_LSB))
+		dev_dbg(xsdfec->dev, "NMQC exceeds 11 bits");
+	nmqc = (nmqc << XSDFEC_REG2_NMQC_LSB) & XSDFEC_REG2_NNMQC_MASK;
+
+	if (norm_type > 1)
+		dev_dbg(xsdfec->dev, "Norm type is invalid");
+	norm_type = ((norm_type << XSDFEC_REG2_NORM_TYPE_LSB) &
+		     XSDFEC_REG2_NORM_TYPE_MASK);
+	if (special_qc > 1)
+		dev_dbg(xsdfec->dev, "Special QC in invalid");
+	special_qc = ((special_qc << XSDFEC_REG2_SPEICAL_QC_LSB) &
+		      XSDFEC_REG2_SPECIAL_QC_MASK);
+
+	if (no_final_parity > 1)
+		dev_dbg(xsdfec->dev, "No final parity check invalid");
+	no_final_parity =
+		((no_final_parity << XSDFEC_REG2_NO_FINAL_PARITY_LSB) &
+		 XSDFEC_REG2_NO_FINAL_PARITY_MASK);
+	if (max_schedule &
+	    ~(XSDFEC_REG2_MAX_SCHEDULE_MASK >> XSDFEC_REG2_MAX_SCHEDULE_LSB))
+		dev_dbg(xsdfec->dev, "Max Schdule exceeds 2 bits");
+	max_schedule = ((max_schedule << XSDFEC_REG2_MAX_SCHEDULE_LSB) &
+			XSDFEC_REG2_MAX_SCHEDULE_MASK);
+
+	wdata = (max_schedule | no_final_parity | special_qc | norm_type |
+		 nmqc | nlayers);
+
+	if (XSDFEC_LDPC_CODE_REG2_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG2_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg2 space 0x%x",
+			XSDFEC_LDPC_CODE_REG2_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG2_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_reg3_write(struct xsdfec_dev *xsdfec, u8 sc_off, u8 la_off,
+			     u16 qc_off, u32 offset)
+{
+	u32 wdata;
+
+	wdata = ((qc_off << XSDFEC_REG3_QC_OFF_LSB) |
+		 (la_off << XSDFEC_REG3_LA_OFF_LSB) | sc_off);
+	if (XSDFEC_LDPC_CODE_REG3_ADDR_BASE + (offset * XSDFEC_LDPC_REG_JUMP) >
+	    XSDFEC_LDPC_CODE_REG3_ADDR_HIGH) {
+		dev_dbg(xsdfec->dev, "Writing outside of LDPC reg3 space 0x%x",
+			XSDFEC_LDPC_CODE_REG3_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP));
+		return -EINVAL;
+	}
+	xsdfec_regwrite(xsdfec,
+			XSDFEC_LDPC_CODE_REG3_ADDR_BASE +
+				(offset * XSDFEC_LDPC_REG_JUMP),
+			wdata);
+	return 0;
+}
+
+static int xsdfec_table_write(struct xsdfec_dev *xsdfec, u32 offset,
+			      u32 *src_ptr, u32 len, const u32 base_addr,
+			      const u32 depth)
+{
+	u32 reg = 0;
+	u32 res;
+	u32 n, i;
+	u32 *addr = NULL;
+	struct page *page[MAX_NUM_PAGES];
+
+	/*
+	 * Writes that go beyond the length of
+	 * Shared Scale(SC) table should fail
+	 */
+	if ((XSDFEC_REG_WIDTH_JUMP * (offset + len)) > depth) {
+		dev_dbg(xsdfec->dev, "Write exceeds SC table length");
+		return -EINVAL;
+	}
+
+	n = (len * XSDFEC_REG_WIDTH_JUMP) / PAGE_SIZE;
+	if ((len * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE)
+		n += 1;
+
+	res = get_user_pages_fast((unsigned long)src_ptr, n, 0, page);
+	if (res < n) {
+		for (i = 0; i < res; i++)
+			put_page(page[i]);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < n; i++) {
+		addr = kmap(page[i]);
+		do {
+			xsdfec_regwrite(xsdfec,
+					base_addr + ((offset + reg) *
+						     XSDFEC_REG_WIDTH_JUMP),
+					addr[reg]);
+			reg++;
+		} while ((reg < len) &&
+			 ((reg * XSDFEC_REG_WIDTH_JUMP) % PAGE_SIZE));
+		put_page(page[i]);
+	}
+	return reg;
+}
+
+static int xsdfec_add_ldpc(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_ldpc_params *ldpc;
+	int ret, n;
+
+	ldpc = kzalloc(sizeof(*ldpc), GFP_KERNEL);
+	if (!ldpc)
+		return -ENOMEM;
+
+	ret = copy_from_user(ldpc, arg, sizeof(*ldpc));
+	if (ret)
+		goto err_out;
+
+	if (xsdfec->config.code == XSDFEC_TURBO_CODE) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	if (xsdfec->config.code_wr_protect) {
+		ret = -EIO;
+		goto err_out;
+	}
+
+	/* Write Reg 0 */
+	ret = xsdfec_reg0_write(xsdfec, ldpc->n, ldpc->k, ldpc->psize,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 1 */
+	ret = xsdfec_reg1_write(xsdfec, ldpc->psize, ldpc->no_packing, ldpc->nm,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 2 */
+	ret = xsdfec_reg2_write(xsdfec, ldpc->nlayers, ldpc->nmqc,
+				ldpc->norm_type, ldpc->special_qc,
+				ldpc->no_final_parity, ldpc->max_schedule,
+				ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Reg 3 */
+	ret = xsdfec_reg3_write(xsdfec, ldpc->sc_off, ldpc->la_off,
+				ldpc->qc_off, ldpc->code_id);
+	if (ret)
+		goto err_out;
+
+	/* Write Shared Codes */
+	n = ldpc->nlayers / 4;
+	if (ldpc->nlayers % 4)
+		n++;
+
+	ret = xsdfec_table_write(xsdfec, ldpc->sc_off, ldpc->sc_table, n,
+				 XSDFEC_LDPC_SC_TABLE_ADDR_BASE,
+				 XSDFEC_SC_TABLE_DEPTH);
+	if (ret < 0)
+		goto err_out;
+
+	ret = xsdfec_table_write(xsdfec, 4 * ldpc->la_off, ldpc->la_table,
+				 ldpc->nlayers, XSDFEC_LDPC_LA_TABLE_ADDR_BASE,
+				 XSDFEC_LA_TABLE_DEPTH);
+	if (ret < 0)
+		goto err_out;
+
+	ret = xsdfec_table_write(xsdfec, 4 * ldpc->qc_off, ldpc->qc_table,
+				 ldpc->nqc, XSDFEC_LDPC_QC_TABLE_ADDR_BASE,
+				 XSDFEC_QC_TABLE_DEPTH);
+	if (ret > 0)
+		ret = 0;
+err_out:
+	kfree(ldpc);
+	return ret;
+}
+
 static u32
 xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg)
 {
@@ -366,6 +687,9 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 	case XSDFEC_GET_TURBO:
 		rval = xsdfec_get_turbo(xsdfec, arg);
 		break;
+	case XSDFEC_ADD_LDPC_CODE_PARAMS:
+		rval = xsdfec_add_ldpc(xsdfec, arg);
+		break;
 	default:
 		/* Should not get here */
 		break;
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
index 34f32c37261d..ce532a5e3e81 100644
--- a/include/uapi/misc/xilinx_sdfec.h
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -13,6 +13,22 @@
 
 #include <linux/types.h>
 
+/* Shared LDPC Tables */
+#define XSDFEC_LDPC_SC_TABLE_ADDR_BASE (0x10000)
+#define XSDFEC_LDPC_SC_TABLE_ADDR_HIGH (0x10400)
+#define XSDFEC_LDPC_LA_TABLE_ADDR_BASE (0x18000)
+#define XSDFEC_LDPC_LA_TABLE_ADDR_HIGH (0x19000)
+#define XSDFEC_LDPC_QC_TABLE_ADDR_BASE (0x20000)
+#define XSDFEC_LDPC_QC_TABLE_ADDR_HIGH (0x28000)
+
+/* LDPC tables depth */
+#define XSDFEC_SC_TABLE_DEPTH                                                  \
+	(XSDFEC_LDPC_SC_TABLE_ADDR_HIGH - XSDFEC_LDPC_SC_TABLE_ADDR_BASE)
+#define XSDFEC_LA_TABLE_DEPTH                                                  \
+	(XSDFEC_LDPC_LA_TABLE_ADDR_HIGH - XSDFEC_LDPC_LA_TABLE_ADDR_BASE)
+#define XSDFEC_QC_TABLE_DEPTH                                                  \
+	(XSDFEC_LDPC_QC_TABLE_ADDR_HIGH - XSDFEC_LDPC_QC_TABLE_ADDR_BASE)
+
 /**
  * enum xsdfec_code - Code Type.
  * @XSDFEC_TURBO_CODE: Driver is configured for Turbo mode.
@@ -126,6 +142,53 @@ struct xsdfec_turbo {
 	__u8 scale;
 };
 
+/**
+ * struct xsdfec_ldpc_params - User data for LDPC codes.
+ * @n: Number of code word bits
+ * @k: Number of information bits
+ * @psize: Size of sub-matrix
+ * @nlayers: Number of layers in code
+ * @nqc: Quasi Cyclic Number
+ * @nmqc: Number of M-sized QC operations in parity check matrix
+ * @nm: Number of M-size vectors in N
+ * @norm_type: Normalization required or not
+ * @no_packing: Determines if multiple QC ops should be performed
+ * @special_qc: Sub-Matrix property for Circulant weight > 0
+ * @no_final_parity: Decide if final parity check needs to be performed
+ * @max_schedule: Experimental code word scheduling limit
+ * @sc_off: SC offset
+ * @la_off: LA offset
+ * @qc_off: QC offset
+ * @sc_table: Pointer to SC Table which must be page aligned
+ * @la_table: Pointer to LA Table which must be page aligned
+ * @qc_table: Pointer to QC Table which must be page aligned
+ * @code_id: LDPC Code
+ *
+ * This structure describes the LDPC code that is passed to the driver by the
+ * application.
+ */
+struct xsdfec_ldpc_params {
+	__u32 n;
+	__u32 k;
+	__u32 psize;
+	__u32 nlayers;
+	__u32 nqc;
+	__u32 nmqc;
+	__u32 nm;
+	__u32 norm_type;
+	__u32 no_packing;
+	__u32 special_qc;
+	__u32 no_final_parity;
+	__u32 max_schedule;
+	__u32 sc_off;
+	__u32 la_off;
+	__u32 qc_off;
+	__u32 *sc_table;
+	__u32 *la_table;
+	__u32 *qc_table;
+	__u16 code_id;
+};
+
 /**
  * struct xsdfec_status - Status of SD-FEC core.
  * @state: State of the SD-FEC core
@@ -170,6 +233,20 @@ struct xsdfec_config {
 	__s8 code_wr_protect;
 };
 
+/**
+ * struct xsdfec_ldpc_param_table_sizes - Used to store sizes of SD-FEC table
+ *					  entries for an individual LPDC code
+ *					  parameter.
+ * @sc_size: Size of SC table used
+ * @la_size: Size of LA table used
+ * @qc_size: Size of QC table used
+ */
+struct xsdfec_ldpc_param_table_sizes {
+	__u32 sc_size;
+	__u32 la_size;
+	__u32 qc_size;
+};
+
 /*
  * XSDFEC IOCTL List
  */
@@ -189,6 +266,27 @@ struct xsdfec_config {
  * This can only be used when the driver is in the XSDFEC_STOPPED state
  */
 #define XSDFEC_SET_TURBO _IOW(XSDFEC_MAGIC, 4, struct xsdfec_turbo)
+/**
+ * DOC: XSDFEC_ADD_LDPC_CODE_PARAMS
+ * @Parameters
+ *
+ * @struct xsdfec_ldpc_params *
+ *	Pointer to the &struct xsdfec_ldpc_params that contains the LDPC code
+ *	parameters to be added to the SD-FEC Block
+ *
+ * @Description
+ * ioctl to add an LDPC code to the SD-FEC LDPC codes
+ *
+ * This can only be used when:
+ *
+ * - Driver is in the XSDFEC_STOPPED state
+ *
+ * - SD-FEC core is configured as LPDC
+ *
+ * - SD-FEC Code Write Protection is disabled
+ */
+#define XSDFEC_ADD_LDPC_CODE_PARAMS                                            \
+	_IOW(XSDFEC_MAGIC, 5, struct xsdfec_ldpc_params)
 /**
  * DOC: XSDFEC_GET_TURBO
  * @Parameters
-- 
cgit v1.2.3


From 77dd39d924e650cd20696d790f861dfe26e0cb64 Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:54 +0100
Subject: misc: xilinx_sdfec: Add ability to get/set config

- Add capability to get SD-FEC config data using ioctl
XSDFEC_GET_CONFIG.

- Add capability to set SD-FEC data order using ioctl
SDFEC_SET_ORDER.

- Add capability to set SD-FEC bypass option using ioctl
XSDFEC_SET_BYPASS.

- Add capability to set SD-FEC active state using ioctl
XSDFEC_IS_ACTIVE.

Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-5-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 88 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/xilinx_sdfec.h | 57 ++++++++++++++++++++++++++
 2 files changed, 145 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index 9be4de07eee2..579f23617dd7 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -273,6 +273,17 @@ static void update_config_from_hw(struct xsdfec_dev *xsdfec)
 		xsdfec->state = XSDFEC_STOPPED;
 }
 
+static int xsdfec_get_config(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	int err;
+
+	err = copy_to_user(arg, &xsdfec->config, sizeof(xsdfec->config));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
 static int xsdfec_set_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
 {
 	struct xsdfec_turbo turbo;
@@ -591,6 +602,71 @@ err_out:
 	return ret;
 }
 
+static int xsdfec_set_order(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	bool order_invalid;
+	enum xsdfec_order order;
+	int err;
+
+	err = get_user(order, (enum xsdfec_order *)arg);
+	if (err)
+		return -EFAULT;
+
+	order_invalid = (order != XSDFEC_MAINTAIN_ORDER) &&
+			(order != XSDFEC_OUT_OF_ORDER);
+	if (order_invalid)
+		return -EINVAL;
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED)
+		return -EIO;
+
+	xsdfec_regwrite(xsdfec, XSDFEC_ORDER_ADDR, order);
+
+	xsdfec->config.order = order;
+
+	return 0;
+}
+
+static int xsdfec_set_bypass(struct xsdfec_dev *xsdfec, bool __user *arg)
+{
+	bool bypass;
+	int err;
+
+	err = get_user(bypass, arg);
+	if (err)
+		return -EFAULT;
+
+	/* Verify Device has not started */
+	if (xsdfec->state == XSDFEC_STARTED)
+		return -EIO;
+
+	if (bypass)
+		xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 1);
+	else
+		xsdfec_regwrite(xsdfec, XSDFEC_BYPASS_ADDR, 0);
+
+	xsdfec->config.bypass = bypass;
+
+	return 0;
+}
+
+static int xsdfec_is_active(struct xsdfec_dev *xsdfec, bool __user *arg)
+{
+	u32 reg_value;
+	bool is_active;
+	int err;
+
+	reg_value = xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR);
+	/* using a double ! operator instead of casting */
+	is_active = !!(reg_value & XSDFEC_IS_ACTIVITY_SET);
+	err = put_user(is_active, arg);
+	if (err)
+		return -EFAULT;
+
+	return err;
+}
+
 static u32
 xsdfec_translate_axis_width_cfg_val(enum xsdfec_axis_width axis_width_cfg)
 {
@@ -681,6 +757,9 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 	}
 
 	switch (cmd) {
+	case XSDFEC_GET_CONFIG:
+		rval = xsdfec_get_config(xsdfec, arg);
+		break;
 	case XSDFEC_SET_TURBO:
 		rval = xsdfec_set_turbo(xsdfec, arg);
 		break;
@@ -690,6 +769,15 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 	case XSDFEC_ADD_LDPC_CODE_PARAMS:
 		rval = xsdfec_add_ldpc(xsdfec, arg);
 		break;
+	case XSDFEC_SET_ORDER:
+		rval = xsdfec_set_order(xsdfec, arg);
+		break;
+	case XSDFEC_SET_BYPASS:
+		rval = xsdfec_set_bypass(xsdfec, arg);
+		break;
+	case XSDFEC_IS_ACTIVE:
+		rval = xsdfec_is_active(xsdfec, (bool __user *)arg);
+		break;
 	default:
 		/* Should not get here */
 		break;
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
index ce532a5e3e81..b8897ce41821 100644
--- a/include/uapi/misc/xilinx_sdfec.h
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -287,6 +287,19 @@ struct xsdfec_ldpc_param_table_sizes {
  */
 #define XSDFEC_ADD_LDPC_CODE_PARAMS                                            \
 	_IOW(XSDFEC_MAGIC, 5, struct xsdfec_ldpc_params)
+/**
+ * DOC: XSDFEC_GET_CONFIG
+ * @Parameters
+ *
+ * @struct xsdfec_config *
+ *	Pointer to the &struct xsdfec_config that contains the current
+ *	configuration settings of the SD-FEC Block
+ *
+ * @Description
+ *
+ * ioctl that returns SD-FEC core configuration
+ */
+#define XSDFEC_GET_CONFIG _IOR(XSDFEC_MAGIC, 6, struct xsdfec_config)
 /**
  * DOC: XSDFEC_GET_TURBO
  * @Parameters
@@ -300,4 +313,48 @@ struct xsdfec_ldpc_param_table_sizes {
  * ioctl that returns SD-FEC turbo param values
  */
 #define XSDFEC_GET_TURBO _IOR(XSDFEC_MAGIC, 7, struct xsdfec_turbo)
+/**
+ * DOC: XSDFEC_SET_ORDER
+ * @Parameters
+ *
+ * @struct unsigned long *
+ *	Pointer to the unsigned long that contains a value from the
+ *	@enum xsdfec_order
+ *
+ * @Description
+ *
+ * ioctl that sets order, if order of blocks can change from input to output
+ *
+ * This can only be used when the driver is in the XSDFEC_STOPPED state
+ */
+#define XSDFEC_SET_ORDER _IOW(XSDFEC_MAGIC, 8, unsigned long)
+/**
+ * DOC: XSDFEC_SET_BYPASS
+ * @Parameters
+ *
+ * @struct bool *
+ *	Pointer to bool that sets the bypass value, where false results in
+ *	normal operation and false results in the SD-FEC performing the
+ *	configured operations (same number of cycles) but output data matches
+ *	the input data
+ *
+ * @Description
+ *
+ * ioctl that sets bypass.
+ *
+ * This can only be used when the driver is in the XSDFEC_STOPPED state
+ */
+#define XSDFEC_SET_BYPASS _IOW(XSDFEC_MAGIC, 9, bool)
+/**
+ * DOC: XSDFEC_IS_ACTIVE
+ * @Parameters
+ *
+ * @struct bool *
+ *	Pointer to bool that returns true if the SD-FEC is processing data
+ *
+ * @Description
+ *
+ * ioctl that determines if SD-FEC is processing data
+ */
+#define XSDFEC_IS_ACTIVE _IOR(XSDFEC_MAGIC, 10, bool)
 #endif /* __XILINX_SDFEC_H__ */
-- 
cgit v1.2.3


From cc538f609dee49b73545569c49e3abd891fdd8b3 Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:55 +0100
Subject: misc: xilinx_sdfec: Support poll file operation

Support monitoring and detecting the SD-FEC error events
through IRQ and poll file operation.

The SD-FEC device can detect one-error or multi-error events.
An error triggers an interrupt which creates and run the ONE_SHOT
IRQ thread.
The ONE_SHOT IRQ thread detects type of error and pass that
information to the poll function.
The file_operation callback poll(), collects the events and
updates the statistics accordingly.
The function poll blocks() on waiting queue which can be
unblocked by ONE_SHOT IRQ handling thread.

Support SD-FEC interrupt set ioctl callback.
The SD-FEC can detect two type of errors: coding errors (ECC) and
a data interface errors (TLAST).
The errors are  events which can trigger an IRQ if enabled.
The driver can monitor and detect these errors through IRQ.
Also the driver updates the statistical data.

Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-6-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 240 ++++++++++++++++++++++++++++++++++++++-
 include/uapi/misc/xilinx_sdfec.h |  13 +++
 2 files changed, 249 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index 579f23617dd7..61775c5626ed 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -191,27 +191,43 @@ struct xsdfec_clks {
  * struct xsdfec_dev - Driver data for SDFEC
  * @miscdev: Misc device handle
  * @clks: Clocks managed by the SDFEC driver
- * @regs: device physical base address
- * @dev: pointer to device struct
+ * @waitq: Driver wait queue
  * @config: Configuration of the SDFEC device
  * @dev_name: Device name
+ * @flags: spinlock flags
+ * @regs: device physical base address
+ * @dev: pointer to device struct
  * @state: State of the SDFEC device
  * @error_data_lock: Error counter and states spinlock
  * @dev_id: Device ID
+ * @isr_err_count: Count of ISR errors
+ * @cecc_count: Count of Correctable ECC errors (SBE)
+ * @uecc_count: Count of Uncorrectable ECC errors (MBE)
+ * @irq: IRQ number
+ * @state_updated: indicates State updated by interrupt handler
+ * @stats_updated: indicates Stats updated by interrupt handler
  *
  * This structure contains necessary state for SDFEC driver to operate
  */
 struct xsdfec_dev {
 	struct miscdevice miscdev;
 	struct xsdfec_clks clks;
-	void __iomem *regs;
-	struct device *dev;
+	wait_queue_head_t waitq;
 	struct xsdfec_config config;
 	char dev_name[DEV_NAME_LEN];
+	unsigned long flags;
+	void __iomem *regs;
+	struct device *dev;
 	enum xsdfec_state state;
 	/* Spinlock to protect state_updated and stats_updated */
 	spinlock_t error_data_lock;
 	int dev_id;
+	u32 isr_err_count;
+	u32 cecc_count;
+	u32 uecc_count;
+	int irq;
+	bool state_updated;
+	bool stats_updated;
 };
 
 static inline void xsdfec_regwrite(struct xsdfec_dev *xsdfec, u32 addr,
@@ -284,6 +300,90 @@ static int xsdfec_get_config(struct xsdfec_dev *xsdfec, void __user *arg)
 	return err;
 }
 
+static int xsdfec_isr_enable(struct xsdfec_dev *xsdfec, bool enable)
+{
+	u32 mask_read;
+
+	if (enable) {
+		/* Enable */
+		xsdfec_regwrite(xsdfec, XSDFEC_IER_ADDR, XSDFEC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+		if (mask_read & XSDFEC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC enabling irq with IER failed");
+			return -EIO;
+		}
+	} else {
+		/* Disable */
+		xsdfec_regwrite(xsdfec, XSDFEC_IDR_ADDR, XSDFEC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_IMR_ADDR);
+		if ((mask_read & XSDFEC_ISR_MASK) != XSDFEC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC disabling irq with IDR failed");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
+static int xsdfec_ecc_isr_enable(struct xsdfec_dev *xsdfec, bool enable)
+{
+	u32 mask_read;
+
+	if (enable) {
+		/* Enable */
+		xsdfec_regwrite(xsdfec, XSDFEC_ECC_IER_ADDR,
+				XSDFEC_ALL_ECC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+		if (mask_read & XSDFEC_ALL_ECC_ISR_MASK) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC enabling ECC irq with ECC IER failed");
+			return -EIO;
+		}
+	} else {
+		/* Disable */
+		xsdfec_regwrite(xsdfec, XSDFEC_ECC_IDR_ADDR,
+				XSDFEC_ALL_ECC_ISR_MASK);
+		mask_read = xsdfec_regread(xsdfec, XSDFEC_ECC_IMR_ADDR);
+		if (!(((mask_read & XSDFEC_ALL_ECC_ISR_MASK) ==
+		       XSDFEC_ECC_ISR_MASK) ||
+		      ((mask_read & XSDFEC_ALL_ECC_ISR_MASK) ==
+		       XSDFEC_PL_INIT_ECC_ISR_MASK))) {
+			dev_dbg(xsdfec->dev,
+				"SDFEC disable ECC irq with ECC IDR failed");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+
+static int xsdfec_set_irq(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_irq irq;
+	int err;
+	int isr_err;
+	int ecc_err;
+
+	err = copy_from_user(&irq, arg, sizeof(irq));
+	if (err)
+		return -EFAULT;
+
+	/* Setup tlast related IRQ */
+	isr_err = xsdfec_isr_enable(xsdfec, irq.enable_isr);
+	if (!isr_err)
+		xsdfec->config.irq.enable_isr = irq.enable_isr;
+
+	/* Setup ECC related IRQ */
+	ecc_err = xsdfec_ecc_isr_enable(xsdfec, irq.enable_ecc_isr);
+	if (!ecc_err)
+		xsdfec->config.irq.enable_ecc_isr = irq.enable_ecc_isr;
+
+	if (isr_err < 0 || ecc_err < 0)
+		err = -EIO;
+
+	return err;
+}
+
 static int xsdfec_set_turbo(struct xsdfec_dev *xsdfec, void __user *arg)
 {
 	struct xsdfec_turbo turbo;
@@ -760,6 +860,9 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 	case XSDFEC_GET_CONFIG:
 		rval = xsdfec_get_config(xsdfec, arg);
 		break;
+	case XSDFEC_SET_IRQ:
+		rval = xsdfec_set_irq(xsdfec, arg);
+		break;
 	case XSDFEC_SET_TURBO:
 		rval = xsdfec_set_turbo(xsdfec, arg);
 		break;
@@ -793,11 +896,36 @@ static long xsdfec_dev_compat_ioctl(struct file *file, unsigned int cmd,
 }
 #endif
 
+static unsigned int xsdfec_poll(struct file *file, poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct xsdfec_dev *xsdfec;
+
+	xsdfec = container_of(file->private_data, struct xsdfec_dev, miscdev);
+
+	if (!xsdfec)
+		return POLLNVAL | POLLHUP;
+
+	poll_wait(file, &xsdfec->waitq, wait);
+
+	/* XSDFEC ISR detected an error */
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	if (xsdfec->state_updated)
+		mask |= POLLIN | POLLPRI;
+
+	if (xsdfec->stats_updated)
+		mask |= POLLIN | POLLRDNORM;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	return mask;
+}
+
 static const struct file_operations xsdfec_fops = {
 	.owner = THIS_MODULE,
 	.open = xsdfec_dev_open,
 	.release = xsdfec_dev_release,
 	.unlocked_ioctl = xsdfec_dev_ioctl,
+	.poll = xsdfec_poll,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = xsdfec_dev_compat_ioctl,
 #endif
@@ -883,6 +1011,91 @@ static int xsdfec_parse_of(struct xsdfec_dev *xsdfec)
 	return 0;
 }
 
+static irqreturn_t xsdfec_irq_thread(int irq, void *dev_id)
+{
+	struct xsdfec_dev *xsdfec = dev_id;
+	irqreturn_t ret = IRQ_HANDLED;
+	u32 ecc_err;
+	u32 isr_err;
+	u32 uecc_count;
+	u32 cecc_count;
+	u32 isr_err_count;
+	u32 aecc_count;
+	u32 tmp;
+
+	WARN_ON(xsdfec->irq != irq);
+
+	/* Mask Interrupts */
+	xsdfec_isr_enable(xsdfec, false);
+	xsdfec_ecc_isr_enable(xsdfec, false);
+	/* Read ISR */
+	ecc_err = xsdfec_regread(xsdfec, XSDFEC_ECC_ISR_ADDR);
+	isr_err = xsdfec_regread(xsdfec, XSDFEC_ISR_ADDR);
+	/* Clear the interrupts */
+	xsdfec_regwrite(xsdfec, XSDFEC_ECC_ISR_ADDR, ecc_err);
+	xsdfec_regwrite(xsdfec, XSDFEC_ISR_ADDR, isr_err);
+
+	tmp = ecc_err & XSDFEC_ALL_ECC_ISR_MBE_MASK;
+	/* Count uncorrectable 2-bit errors */
+	uecc_count = hweight32(tmp);
+	/* Count all ECC errors */
+	aecc_count = hweight32(ecc_err);
+	/* Number of correctable 1-bit ECC error */
+	cecc_count = aecc_count - 2 * uecc_count;
+	/* Count ISR errors */
+	isr_err_count = hweight32(isr_err);
+	dev_dbg(xsdfec->dev, "tmp=%x, uecc=%x, aecc=%x, cecc=%x, isr=%x", tmp,
+		uecc_count, aecc_count, cecc_count, isr_err_count);
+	dev_dbg(xsdfec->dev, "uecc=%x, cecc=%x, isr=%x", xsdfec->uecc_count,
+		xsdfec->cecc_count, xsdfec->isr_err_count);
+
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	/* Add new errors to a 2-bits counter */
+	if (uecc_count)
+		xsdfec->uecc_count += uecc_count;
+	/* Add new errors to a 1-bits counter */
+	if (cecc_count)
+		xsdfec->cecc_count += cecc_count;
+	/* Add new errors to a ISR counter */
+	if (isr_err_count)
+		xsdfec->isr_err_count += isr_err_count;
+
+	/* Update state/stats flag */
+	if (uecc_count) {
+		if (ecc_err & XSDFEC_ECC_ISR_MBE_MASK)
+			xsdfec->state = XSDFEC_NEEDS_RESET;
+		else if (ecc_err & XSDFEC_PL_INIT_ECC_ISR_MBE_MASK)
+			xsdfec->state = XSDFEC_PL_RECONFIGURE;
+		xsdfec->stats_updated = true;
+		xsdfec->state_updated = true;
+	}
+
+	if (cecc_count)
+		xsdfec->stats_updated = true;
+
+	if (isr_err_count) {
+		xsdfec->state = XSDFEC_NEEDS_RESET;
+		xsdfec->stats_updated = true;
+		xsdfec->state_updated = true;
+	}
+
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+	dev_dbg(xsdfec->dev, "state=%x, stats=%x", xsdfec->state_updated,
+		xsdfec->stats_updated);
+
+	/* Enable another polling */
+	if (xsdfec->state_updated || xsdfec->stats_updated)
+		wake_up_interruptible(&xsdfec->waitq);
+	else
+		ret = IRQ_NONE;
+
+	/* Unmask Interrupts */
+	xsdfec_isr_enable(xsdfec, true);
+	xsdfec_ecc_isr_enable(xsdfec, true);
+
+	return ret;
+}
+
 static int xsdfec_clk_init(struct platform_device *pdev,
 			   struct xsdfec_clks *clks)
 {
@@ -1049,6 +1262,7 @@ static int xsdfec_probe(struct platform_device *pdev)
 	struct device *dev;
 	struct resource *res;
 	int err;
+	bool irq_enabled = true;
 
 	xsdfec = devm_kzalloc(&pdev->dev, sizeof(*xsdfec), GFP_KERNEL);
 	if (!xsdfec)
@@ -1069,6 +1283,12 @@ static int xsdfec_probe(struct platform_device *pdev)
 		goto err_xsdfec_dev;
 	}
 
+	xsdfec->irq = platform_get_irq(pdev, 0);
+	if (xsdfec->irq < 0) {
+		dev_dbg(dev, "platform_get_irq failed");
+		irq_enabled = false;
+	}
+
 	err = xsdfec_parse_of(xsdfec);
 	if (err < 0)
 		goto err_xsdfec_dev;
@@ -1078,6 +1298,18 @@ static int xsdfec_probe(struct platform_device *pdev)
 	/* Save driver private data */
 	platform_set_drvdata(pdev, xsdfec);
 
+	if (irq_enabled) {
+		init_waitqueue_head(&xsdfec->waitq);
+		/* Register IRQ thread */
+		err = devm_request_threaded_irq(dev, xsdfec->irq, NULL,
+						xsdfec_irq_thread, IRQF_ONESHOT,
+						"xilinx-sdfec16", xsdfec);
+		if (err < 0) {
+			dev_err(dev, "unable to request IRQ%d", xsdfec->irq);
+			goto err_xsdfec_dev;
+		}
+	}
+
 	mutex_lock(&dev_idr_lock);
 	err = idr_alloc(&dev_idr, xsdfec->dev_name, 0, 0, GFP_KERNEL);
 	mutex_unlock(&dev_idr_lock);
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
index b8897ce41821..85ee288dbb33 100644
--- a/include/uapi/misc/xilinx_sdfec.h
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -251,6 +251,19 @@ struct xsdfec_ldpc_param_table_sizes {
  * XSDFEC IOCTL List
  */
 #define XSDFEC_MAGIC 'f'
+/**
+ * DOC: XSDFEC_SET_IRQ
+ * @Parameters
+ *
+ * @struct xsdfec_irq *
+ *	Pointer to the &struct xsdfec_irq that contains the interrupt settings
+ *	for the SD-FEC core
+ *
+ * @Description
+ *
+ * ioctl to enable or disable irq
+ */
+#define XSDFEC_SET_IRQ _IOW(XSDFEC_MAGIC, 3, struct xsdfec_irq)
 /**
  * DOC: XSDFEC_SET_TURBO
  * @Parameters
-- 
cgit v1.2.3


From 6bd6a690c2e7e710aa7ccefa4edc83f14099907e Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.cvetic@xilinx.com>
Date: Sat, 27 Jul 2019 09:33:56 +0100
Subject: misc: xilinx_sdfec: Add stats & status ioctls

SD-FEC statistic data are:
- count of data interface errors (isr_err_count)
- count of Correctable ECC errors (cecc_count)
- count of Uncorrectable ECC errors (uecc_count)

Add support:
1. clear stats ioctl callback which clears collected
statistic data,
2. get stats ioctl callback which reads a collected
statistic data,
3. set default configuration ioctl callback,
4. start ioctl callback enables SD-FEC HW,
5. stop ioctl callback disables SD-FEC HW.

In a failed state driver enables the following ioctls:
- get status
- get statistics
- clear stats
- set default SD-FEC device configuration

Tested-by: Santhosh Dyavanapally <SDYAVANA@xilinx.com>
Tested by: Punnaiah Choudary Kalluri <punnaia@xilinx.com>
Tested-by: Derek Kiernan <derek.kiernan@xilinx.com>
Tested-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Signed-off-by: Derek Kiernan <derek.kiernan@xilinx.com>
Signed-off-by: Dragan Cvetic <dragan.cvetic@xilinx.com>
Link: https://lore.kernel.org/r/1564216438-322406-7-git-send-email-dragan.cvetic@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/xilinx_sdfec.c      | 125 +++++++++++++++++++++++++++++++++++++++
 include/uapi/misc/xilinx_sdfec.h |  75 +++++++++++++++++++++++
 2 files changed, 200 insertions(+)

(limited to 'include')

diff --git a/drivers/misc/xilinx_sdfec.c b/drivers/misc/xilinx_sdfec.c
index 61775c5626ed..45f127ac12f6 100644
--- a/drivers/misc/xilinx_sdfec.c
+++ b/drivers/misc/xilinx_sdfec.c
@@ -206,6 +206,7 @@ struct xsdfec_clks {
  * @irq: IRQ number
  * @state_updated: indicates State updated by interrupt handler
  * @stats_updated: indicates Stats updated by interrupt handler
+ * @intr_enabled: indicates IRQ enabled
  *
  * This structure contains necessary state for SDFEC driver to operate
  */
@@ -228,6 +229,7 @@ struct xsdfec_dev {
 	int irq;
 	bool state_updated;
 	bool stats_updated;
+	bool intr_enabled;
 };
 
 static inline void xsdfec_regwrite(struct xsdfec_dev *xsdfec, u32 addr,
@@ -289,6 +291,25 @@ static void update_config_from_hw(struct xsdfec_dev *xsdfec)
 		xsdfec->state = XSDFEC_STOPPED;
 }
 
+static int xsdfec_get_status(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	struct xsdfec_status status;
+	int err;
+
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	status.state = xsdfec->state;
+	xsdfec->state_updated = false;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+	status.activity = (xsdfec_regread(xsdfec, XSDFEC_ACTIVE_ADDR) &
+			   XSDFEC_IS_ACTIVITY_SET);
+
+	err = copy_to_user(arg, &status, sizeof(status));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
 static int xsdfec_get_config(struct xsdfec_dev *xsdfec, void __user *arg)
 {
 	int err;
@@ -840,6 +861,82 @@ static int xsdfec_dev_release(struct inode *iptr, struct file *fptr)
 	return 0;
 }
 
+static int xsdfec_start(struct xsdfec_dev *xsdfec)
+{
+	u32 regread;
+
+	regread = xsdfec_regread(xsdfec, XSDFEC_FEC_CODE_ADDR);
+	regread &= 0x1;
+	if (regread != xsdfec->config.code) {
+		dev_dbg(xsdfec->dev,
+			"%s SDFEC HW code does not match driver code, reg %d, code %d",
+			__func__, regread, xsdfec->config.code);
+		return -EINVAL;
+	}
+
+	/* Set AXIS enable */
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR,
+			XSDFEC_AXIS_ENABLE_MASK);
+	/* Done */
+	xsdfec->state = XSDFEC_STARTED;
+	return 0;
+}
+
+static int xsdfec_stop(struct xsdfec_dev *xsdfec)
+{
+	u32 regread;
+
+	if (xsdfec->state != XSDFEC_STARTED)
+		dev_dbg(xsdfec->dev, "Device not started correctly");
+	/* Disable AXIS_ENABLE Input interfaces only */
+	regread = xsdfec_regread(xsdfec, XSDFEC_AXIS_ENABLE_ADDR);
+	regread &= (~XSDFEC_AXIS_IN_ENABLE_MASK);
+	xsdfec_regwrite(xsdfec, XSDFEC_AXIS_ENABLE_ADDR, regread);
+	/* Stop */
+	xsdfec->state = XSDFEC_STOPPED;
+	return 0;
+}
+
+static int xsdfec_clear_stats(struct xsdfec_dev *xsdfec)
+{
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	xsdfec->isr_err_count = 0;
+	xsdfec->uecc_count = 0;
+	xsdfec->cecc_count = 0;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	return 0;
+}
+
+static int xsdfec_get_stats(struct xsdfec_dev *xsdfec, void __user *arg)
+{
+	int err;
+	struct xsdfec_stats user_stats;
+
+	spin_lock_irqsave(&xsdfec->error_data_lock, xsdfec->flags);
+	user_stats.isr_err_count = xsdfec->isr_err_count;
+	user_stats.cecc_count = xsdfec->cecc_count;
+	user_stats.uecc_count = xsdfec->uecc_count;
+	xsdfec->stats_updated = false;
+	spin_unlock_irqrestore(&xsdfec->error_data_lock, xsdfec->flags);
+
+	err = copy_to_user(arg, &user_stats, sizeof(user_stats));
+	if (err)
+		err = -EFAULT;
+
+	return err;
+}
+
+static int xsdfec_set_default_config(struct xsdfec_dev *xsdfec)
+{
+	/* Ensure registers are aligned with core configuration */
+	xsdfec_regwrite(xsdfec, XSDFEC_FEC_CODE_ADDR, xsdfec->config.code);
+	xsdfec_cfg_axi_streams(xsdfec);
+	update_config_from_hw(xsdfec);
+
+	return 0;
+}
+
 static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 			     unsigned long data)
 {
@@ -849,6 +946,16 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 
 	xsdfec = container_of(fptr->private_data, struct xsdfec_dev, miscdev);
 
+	/* In failed state allow only reset and get status IOCTLs */
+	if (xsdfec->state == XSDFEC_NEEDS_RESET &&
+	    (cmd != XSDFEC_SET_DEFAULT_CONFIG && cmd != XSDFEC_GET_STATUS &&
+	     cmd != XSDFEC_GET_STATS && cmd != XSDFEC_CLEAR_STATS)) {
+		return -EPERM;
+	}
+
+	if (_IOC_TYPE(cmd) != XSDFEC_MAGIC)
+		return -ENOTTY;
+
 	/* check if ioctl argument is present and valid */
 	if (_IOC_DIR(cmd) != _IOC_NONE) {
 		arg = (void __user *)data;
@@ -857,9 +964,27 @@ static long xsdfec_dev_ioctl(struct file *fptr, unsigned int cmd,
 	}
 
 	switch (cmd) {
+	case XSDFEC_START_DEV:
+		rval = xsdfec_start(xsdfec);
+		break;
+	case XSDFEC_STOP_DEV:
+		rval = xsdfec_stop(xsdfec);
+		break;
+	case XSDFEC_CLEAR_STATS:
+		rval = xsdfec_clear_stats(xsdfec);
+		break;
+	case XSDFEC_GET_STATS:
+		rval = xsdfec_get_stats(xsdfec, arg);
+		break;
+	case XSDFEC_GET_STATUS:
+		rval = xsdfec_get_status(xsdfec, arg);
+		break;
 	case XSDFEC_GET_CONFIG:
 		rval = xsdfec_get_config(xsdfec, arg);
 		break;
+	case XSDFEC_SET_DEFAULT_CONFIG:
+		rval = xsdfec_set_default_config(xsdfec);
+		break;
 	case XSDFEC_SET_IRQ:
 		rval = xsdfec_set_irq(xsdfec, arg);
 		break;
diff --git a/include/uapi/misc/xilinx_sdfec.h b/include/uapi/misc/xilinx_sdfec.h
index 85ee288dbb33..ee1a42ae6f46 100644
--- a/include/uapi/misc/xilinx_sdfec.h
+++ b/include/uapi/misc/xilinx_sdfec.h
@@ -233,6 +233,21 @@ struct xsdfec_config {
 	__s8 code_wr_protect;
 };
 
+/**
+ * struct xsdfec_stats - Stats retrived by ioctl XSDFEC_GET_STATS. Used
+ *			 to buffer atomic_t variables from struct
+ *			 xsdfec_dev. Counts are accumulated until
+ *			 the user clears them.
+ * @isr_err_count: Count of ISR errors
+ * @cecc_count: Count of Correctable ECC errors (SBE)
+ * @uecc_count: Count of Uncorrectable ECC errors (MBE)
+ */
+struct xsdfec_stats {
+	__u32 isr_err_count;
+	__u32 cecc_count;
+	__u32 uecc_count;
+};
+
 /**
  * struct xsdfec_ldpc_param_table_sizes - Used to store sizes of SD-FEC table
  *					  entries for an individual LPDC code
@@ -251,6 +266,32 @@ struct xsdfec_ldpc_param_table_sizes {
  * XSDFEC IOCTL List
  */
 #define XSDFEC_MAGIC 'f'
+/**
+ * DOC: XSDFEC_START_DEV
+ *
+ * @Description
+ *
+ * ioctl to start SD-FEC core
+ *
+ * This fails if the XSDFEC_SET_ORDER ioctl has not been previously called
+ */
+#define XSDFEC_START_DEV _IO(XSDFEC_MAGIC, 0)
+/**
+ * DOC: XSDFEC_STOP_DEV
+ *
+ * @Description
+ *
+ * ioctl to stop the SD-FEC core
+ */
+#define XSDFEC_STOP_DEV _IO(XSDFEC_MAGIC, 1)
+/**
+ * DOC: XSDFEC_GET_STATUS
+ *
+ * @Description
+ *
+ * ioctl that returns status of SD-FEC core
+ */
+#define XSDFEC_GET_STATUS _IOR(XSDFEC_MAGIC, 2, struct xsdfec_status)
 /**
  * DOC: XSDFEC_SET_IRQ
  * @Parameters
@@ -370,4 +411,38 @@ struct xsdfec_ldpc_param_table_sizes {
  * ioctl that determines if SD-FEC is processing data
  */
 #define XSDFEC_IS_ACTIVE _IOR(XSDFEC_MAGIC, 10, bool)
+/**
+ * DOC: XSDFEC_CLEAR_STATS
+ *
+ * @Description
+ *
+ * ioctl that clears error stats collected during interrupts
+ */
+#define XSDFEC_CLEAR_STATS _IO(XSDFEC_MAGIC, 11)
+/**
+ * DOC: XSDFEC_GET_STATS
+ * @Parameters
+ *
+ * @struct xsdfec_stats *
+ *	Pointer to the &struct xsdfec_stats that will contain the updated stats
+ *	values
+ *
+ * @Description
+ *
+ * ioctl that returns SD-FEC core stats
+ *
+ * This can only be used when the driver is in the XSDFEC_STOPPED state
+ */
+#define XSDFEC_GET_STATS _IOR(XSDFEC_MAGIC, 12, struct xsdfec_stats)
+/**
+ * DOC: XSDFEC_SET_DEFAULT_CONFIG
+ *
+ * @Description
+ *
+ * ioctl that returns SD-FEC core to default config, use after a reset
+ *
+ * This can only be used when the driver is in the XSDFEC_STOPPED state
+ */
+#define XSDFEC_SET_DEFAULT_CONFIG _IO(XSDFEC_MAGIC, 13)
+
 #endif /* __XILINX_SDFEC_H__ */
-- 
cgit v1.2.3


From 3a9477a06c6acb4234407b59999835a6f12f889d Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 15 Aug 2019 10:50:30 -0500
Subject: ASoC: SOF: ipc: add ALH parameters

The only configuration parameter is the ALH stream ID. No range
checking is done by the driver, the firmware should check that the
stream is valid for a specific hardware.

Bump the ABI Minor number to keep the alignment with SOF firmware

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190815155032.29181-3-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai-intel.h | 9 +++++++++
 include/sound/sof/dai.h       | 1 +
 include/uapi/sound/sof/abi.h  | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/sof/dai-intel.h b/include/sound/sof/dai-intel.h
index a81afd3fbd41..b03e2ec08694 100644
--- a/include/sound/sof/dai-intel.h
+++ b/include/sound/sof/dai-intel.h
@@ -179,4 +179,13 @@ struct sof_ipc_dai_dmic_params {
 	struct sof_ipc_dai_dmic_pdm_ctrl pdm[0];
 } __packed;
 
+/* ALH Configuration Request - SOF_IPC_DAI_ALH_CONFIG */
+struct sof_ipc_dai_alh_params {
+	struct sof_ipc_hdr hdr;
+	uint32_t stream_id;
+
+	/* reserved for future use */
+	uint32_t reserved[15];
+} __packed;
+
 #endif
diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 3d174e20aa53..da9825ad41d4 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -70,6 +70,7 @@ struct sof_ipc_dai_config {
 		struct sof_ipc_dai_ssp_params ssp;
 		struct sof_ipc_dai_dmic_params dmic;
 		struct sof_ipc_dai_hda_params hda;
+		struct sof_ipc_dai_alh_params alh;
 	};
 } __packed;
 
diff --git a/include/uapi/sound/sof/abi.h b/include/uapi/sound/sof/abi.h
index dff70a42445a..a0fe0d4c4b66 100644
--- a/include/uapi/sound/sof/abi.h
+++ b/include/uapi/sound/sof/abi.h
@@ -26,7 +26,7 @@
 
 /* SOF ABI version major, minor and patch numbers */
 #define SOF_ABI_MAJOR 3
-#define SOF_ABI_MINOR 9
+#define SOF_ABI_MINOR 10
 #define SOF_ABI_PATCH 0
 
 /* SOF ABI version number. Format within 32bit word is MMmmmppp */
-- 
cgit v1.2.3


From ff461ebfd4b7051d9b79ca023741e6c2d960a912 Mon Sep 17 00:00:00 2001
From: Raag Jadav <raagjadav@gmail.com>
Date: Wed, 14 Aug 2019 00:02:56 +0530
Subject: regulator: act8865 regulator modes and suspend states

Add documentation for act8865 regulator modes and suspend states.
Add active-semi,8865-regulator.h file for device tree binding constants
for act8865 regulators.

Signed-off-by: Raag Jadav <raagjadav@gmail.com>
Link: https://lore.kernel.org/r/1565721176-8955-3-git-send-email-raagjadav@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../bindings/regulator/act8865-regulator.txt       | 27 +++++++++++++++++++--
 .../regulator/active-semi,8865-regulator.h         | 28 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 include/dt-bindings/regulator/active-semi,8865-regulator.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
index 3ae9f1088845..b9f58e480349 100644
--- a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
@@ -34,6 +34,9 @@ Optional input supply properties:
   - inl67-supply: The input supply for LDO_REG3 and LDO_REG4
 
 Any standard regulator properties can be used to configure the single regulator.
+regulator-initial-mode, regulator-allowed-modes and regulator-mode could be specified
+for act8865 using mode values from dt-bindings/regulator/active-semi,8865-regulator.h
+file.
 
 The valid names for regulators are:
 	- for act8846:
@@ -47,6 +50,8 @@ The valid names for regulators are:
 Example:
 --------
 
+#include <dt-bindings/regulator/active-semi,8865-regulator.h>
+
 		i2c1: i2c@f0018000 {
 			pmic: act8865@5b {
 				compatible = "active-semi,act8865";
@@ -65,9 +70,19 @@ Example:
 						regulator-name = "VCC_1V2";
 						regulator-min-microvolt = <1100000>;
 						regulator-max-microvolt = <1300000>;
-						regulator-suspend-mem-microvolt = <1150000>;
-						regulator-suspend-standby-microvolt = <1150000>;
 						regulator-always-on;
+
+						regulator-allowed-modes = <ACT8865_REGULATOR_MODE_FIXED>,
+									  <ACT8865_REGULATOR_MODE_LOWPOWER>;
+						regulator-initial-mode = <ACT8865_REGULATOR_MODE_FIXED>;
+
+						regulator-state-mem {
+							regulator-on-in-suspend;
+							regulator-suspend-min-microvolt = <1150000>;
+							regulator-suspend-max-microvolt = <1150000>;
+							regulator-changeable-in-suspend;
+							regulator-mode = <ACT8865_REGULATOR_MODE_LOWPOWER>;
+						};
 					};
 
 					vcc_3v3_reg: DCDC_REG3 {
@@ -82,6 +97,14 @@ Example:
 						regulator-min-microvolt = <3300000>;
 						regulator-max-microvolt = <3300000>;
 						regulator-always-on;
+
+						regulator-allowed-modes = <ACT8865_REGULATOR_MODE_NORMAL>,
+									  <ACT8865_REGULATOR_MODE_LOWPOWER>;
+						regulator-initial-mode = <ACT8865_REGULATOR_MODE_NORMAL>;
+
+						regulator-state-mem {
+							regulator-off-in-suspend;
+						};
 					};
 
 					vddfuse_reg: LDO_REG2 {
diff --git a/include/dt-bindings/regulator/active-semi,8865-regulator.h b/include/dt-bindings/regulator/active-semi,8865-regulator.h
new file mode 100644
index 000000000000..15473dbeaf38
--- /dev/null
+++ b/include/dt-bindings/regulator/active-semi,8865-regulator.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Device Tree binding constants for the ACT8865 PMIC regulators
+ */
+
+#ifndef _DT_BINDINGS_REGULATOR_ACT8865_H
+#define _DT_BINDINGS_REGULATOR_ACT8865_H
+
+/*
+ * These constants should be used to specify regulator modes in device tree for
+ * ACT8865 regulators as follows:
+ * ACT8865_REGULATOR_MODE_FIXED:	It is specific to DCDC regulators and it
+ *					specifies the usage of fixed-frequency
+ *					PWM.
+ *
+ * ACT8865_REGULATOR_MODE_NORMAL:	It is specific to LDO regulators and it
+ *					specifies the usage of normal mode.
+ *
+ * ACT8865_REGULATOR_MODE_LOWPOWER:	For DCDC and LDO regulators; it specify
+ *					the usage of proprietary power-saving
+ *					mode.
+ */
+
+#define ACT8865_REGULATOR_MODE_FIXED		1
+#define ACT8865_REGULATOR_MODE_NORMAL		2
+#define ACT8865_REGULATOR_MODE_LOWPOWER	3
+
+#endif
-- 
cgit v1.2.3


From 7b6620d7db566a46f49b4b9deab9fa061fd4b59b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 15 Aug 2019 11:09:16 -0600
Subject: block: remove REQ_NOWAIT_INLINE

We had a few issues with this code, and there's still a problem around
how we deal with error handling for chained/split bios. For now, just
revert the code and we'll try again with a thoroug solution. This
reverts commits:

e15c2ffa1091 ("block: fix O_DIRECT error handling for bio fragments")
0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments")
6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO")
893a1c97205a ("blk-mq: allow REQ_NOWAIT to return an error inline")

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            |  8 ++------
 fs/block_dev.c            | 49 +++++------------------------------------------
 include/linux/blk_types.h |  5 +----
 3 files changed, 8 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a8e6a58f5f28..0835f4d8d42e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1958,13 +1958,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, &data);
 	if (unlikely(!rq)) {
 		rq_qos_cleanup(q, bio);
-
-		cookie = BLK_QC_T_NONE;
-		if (bio->bi_opf & REQ_NOWAIT_INLINE)
-			cookie = BLK_QC_T_EAGAIN;
-		else if (bio->bi_opf & REQ_NOWAIT)
+		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
-		return cookie;
+		return BLK_QC_T_NONE;
 	}
 
 	trace_block_getrq(q, bio, bio->bi_opf);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index eb657ab94060..677cb364d33f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	struct bio *bio;
 	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
-	bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
 	loff_t pos = iocb->ki_pos;
 	blk_qc_t qc = BLK_QC_T_NONE;
-	gfp_t gfp;
-	int ret;
+	int ret = 0;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	if (nowait)
-		gfp = GFP_NOWAIT;
-	else
-		gfp = GFP_KERNEL;
-
-	bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
-	if (!bio)
-		return -EAGAIN;
+	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -384,7 +375,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (!is_poll)
 		blk_start_plug(&plug);
 
-	ret = 0;
 	for (;;) {
 		bio_set_dev(bio, bdev);
 		bio->bi_iter.bi_sector = pos >> 9;
@@ -409,14 +399,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			task_io_account_write(bio->bi_iter.bi_size);
 		}
 
-		/*
-		 * Tell underlying layer to not block for resource shortage.
-		 * And if we would have blocked, return error inline instead
-		 * of through the bio->bi_end_io() callback.
-		 */
-		if (nowait)
-			bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
-
+		dio->size += bio->bi_iter.bi_size;
 		pos += bio->bi_iter.bi_size;
 
 		nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
@@ -428,13 +411,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 				polled = true;
 			}
 
-			dio->size += bio->bi_iter.bi_size;
 			qc = submit_bio(bio);
-			if (qc == BLK_QC_T_EAGAIN) {
-				dio->size -= bio->bi_iter.bi_size;
-				ret = -EAGAIN;
-				goto error;
-			}
 
 			if (polled)
 				WRITE_ONCE(iocb->ki_cookie, qc);
@@ -455,19 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			atomic_inc(&dio->ref);
 		}
 
-		dio->size += bio->bi_iter.bi_size;
-		qc = submit_bio(bio);
-		if (qc == BLK_QC_T_EAGAIN) {
-			dio->size -= bio->bi_iter.bi_size;
-			ret = -EAGAIN;
-			goto error;
-		}
-
-		bio = bio_alloc(gfp, nr_pages);
-		if (!bio) {
-			ret = -EAGAIN;
-			goto error;
-		}
+		submit_bio(bio);
+		bio = bio_alloc(GFP_KERNEL, nr_pages);
 	}
 
 	if (!is_poll)
@@ -487,7 +453,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	}
 	__set_current_state(TASK_RUNNING);
 
-out:
 	if (!ret)
 		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
@@ -495,10 +460,6 @@ out:
 
 	bio_put(&dio->bio);
 	return ret;
-error:
-	if (!is_poll)
-		blk_finish_plug(&plug);
-	goto out;
 }
 
 static ssize_t
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1b1fa1557e68..feff3fe4467e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,7 +311,6 @@ enum req_flag_bits {
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NOWAIT,           /* Don't wait if request will block */
-	__REQ_NOWAIT_INLINE,	/* Return would-block error inline */
 	/*
 	 * When a shared kthread needs to issue a bio for a cgroup, doing
 	 * so synchronously can lead to priority inversions as the kthread
@@ -346,7 +345,6 @@ enum req_flag_bits {
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
-#define REQ_NOWAIT_INLINE	(1ULL << __REQ_NOWAIT_INLINE)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
@@ -420,13 +418,12 @@ static inline int op_stat_group(unsigned int op)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
-#define BLK_QC_T_EAGAIN		-2U
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
-	return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
+	return cookie != BLK_QC_T_NONE;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
-- 
cgit v1.2.3


From 5f7af9ecebce7fab78b0237b0cd998f2ed3de03c Mon Sep 17 00:00:00 2001
From: Pan Xiuli <xiuli.pan@linux.intel.com>
Date: Thu, 15 Aug 2019 10:57:46 -0500
Subject: ASoC: Intel: common: add ACPI matching tables for Tiger Lake

Initial support for TGL w/ RT1308

Signed-off-by: Pan Xiuli <xiuli.pan@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190815155749.29304-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi-intel-match.h              |  1 +
 sound/soc/intel/common/Makefile                   |  1 +
 sound/soc/intel/common/soc-acpi-intel-tgl-match.c | 24 +++++++++++++++++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-tgl-match.c

(limited to 'include')

diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h
index bb5e1e4ce8bf..ce6c4a970939 100644
--- a/include/sound/soc-acpi-intel-match.h
+++ b/include/sound/soc-acpi-intel-match.h
@@ -25,6 +25,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_bxt_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_icl_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[];
 
 /*
  * generic table used for HDA codec-based platforms, possibly with
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index 56c81e20b5bf..a14aca62ff96 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -8,6 +8,7 @@ snd-soc-acpi-intel-match-objs := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-m
 	soc-acpi-intel-skl-match.o soc-acpi-intel-kbl-match.o \
 	soc-acpi-intel-bxt-match.o soc-acpi-intel-glk-match.o \
 	soc-acpi-intel-cnl-match.o soc-acpi-intel-icl-match.o \
+	soc-acpi-intel-tgl-match.o \
 	soc-acpi-intel-hda-match.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
new file mode 100644
index 000000000000..57a6298d6dca
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-tgl-match.c - tables and support for ICL ACPI enumeration.
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[] = {
+	{
+		.id = "10EC1308",
+		.drv_name = "tgl_rt1308",
+		.sof_fw_filename = "sof-tgl.ri",
+		.sof_tplg_filename = "sof-tgl-rt1308.tplg",
+	},
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_tgl_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
-- 
cgit v1.2.3


From f4ead53241c7dcd2f3130e059cb3dfdec0ec9871 Mon Sep 17 00:00:00 2001
From: Pan Xiuli <xiuli.pan@linux.intel.com>
Date: Thu, 15 Aug 2019 10:57:48 -0500
Subject: ASoC: Intel: common: add ACPI matching tables for EHL

There are no upstream machine drivers just yet so just add dummy table
for compilation in nocodec-mode.

Signed-off-by: Pan Xiuli <xiuli.pan@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190815155749.29304-4-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-acpi-intel-match.h              |  1 +
 sound/soc/intel/common/Makefile                   |  2 +-
 sound/soc/intel/common/soc-acpi-intel-ehl-match.c | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 sound/soc/intel/common/soc-acpi-intel-ehl-match.c

(limited to 'include')

diff --git a/include/sound/soc-acpi-intel-match.h b/include/sound/soc-acpi-intel-match.h
index ce6c4a970939..6c9929abd90b 100644
--- a/include/sound/soc-acpi-intel-match.h
+++ b/include/sound/soc-acpi-intel-match.h
@@ -26,6 +26,7 @@ extern struct snd_soc_acpi_mach snd_soc_acpi_intel_glk_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_cnl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_icl_machines[];
 extern struct snd_soc_acpi_mach snd_soc_acpi_intel_tgl_machines[];
+extern struct snd_soc_acpi_mach snd_soc_acpi_intel_ehl_machines[];
 
 /*
  * generic table used for HDA codec-based platforms, possibly with
diff --git a/sound/soc/intel/common/Makefile b/sound/soc/intel/common/Makefile
index a14aca62ff96..18d9630ae9a2 100644
--- a/sound/soc/intel/common/Makefile
+++ b/sound/soc/intel/common/Makefile
@@ -8,7 +8,7 @@ snd-soc-acpi-intel-match-objs := soc-acpi-intel-byt-match.o soc-acpi-intel-cht-m
 	soc-acpi-intel-skl-match.o soc-acpi-intel-kbl-match.o \
 	soc-acpi-intel-bxt-match.o soc-acpi-intel-glk-match.o \
 	soc-acpi-intel-cnl-match.o soc-acpi-intel-icl-match.o \
-	soc-acpi-intel-tgl-match.o \
+	soc-acpi-intel-tgl-match.o soc-acpi-intel-ehl-match.o \
 	soc-acpi-intel-hda-match.o
 
 obj-$(CONFIG_SND_SOC_INTEL_SST) += snd-soc-sst-dsp.o snd-soc-sst-ipc.o
diff --git a/sound/soc/intel/common/soc-acpi-intel-ehl-match.c b/sound/soc/intel/common/soc-acpi-intel-ehl-match.c
new file mode 100644
index 000000000000..a1290c3fa99f
--- /dev/null
+++ b/sound/soc/intel/common/soc-acpi-intel-ehl-match.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * soc-apci-intel-ehl-match.c - tables and support for EHL ACPI enumeration.
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ */
+
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+
+struct snd_soc_acpi_mach snd_soc_acpi_intel_ehl_machines[] = {
+	{},
+};
+EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_ehl_machines);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel Common ACPI Match module");
-- 
cgit v1.2.3


From 40d8aff614f71ab3cab20785b4f213e3802d4e87 Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 13 Aug 2019 17:08:20 +0200
Subject: soc: samsung: chipid: Convert exynos-chipid driver to use the regmap
 API

Convert the driver to use regmap API in order to allow other
drivers, like ASV, to access the CHIPID registers.

Add definition of selected CHIPID register offsets and register bit
fields for Exynos5422 SoC.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 drivers/soc/samsung/exynos-chipid.c       | 34 ++++++++------------
 include/linux/soc/samsung/exynos-chipid.h | 52 +++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 21 deletions(-)
 create mode 100644 include/linux/soc/samsung/exynos-chipid.h

(limited to 'include')

diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index bcf691f2b650..006a95feb618 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -9,16 +9,13 @@
  */
 
 #include <linux/io.h>
+#include <linux/mfd/syscon.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
+#include <linux/regmap.h>
 #include <linux/slab.h>
+#include <linux/soc/samsung/exynos-chipid.h>
 #include <linux/sys_soc.h>
 
-#define EXYNOS_SUBREV_MASK	(0xF << 4)
-#define EXYNOS_MAINREV_MASK	(0xF << 0)
-#define EXYNOS_REV_MASK		(EXYNOS_SUBREV_MASK | EXYNOS_MAINREV_MASK)
-#define EXYNOS_MASK		0xFFFFF000
-
 static const struct exynos_soc_id {
 	const char *name;
 	unsigned int id;
@@ -51,29 +48,24 @@ static const char * __init product_id_to_soc_id(unsigned int product_id)
 int __init exynos_chipid_early_init(void)
 {
 	struct soc_device_attribute *soc_dev_attr;
-	void __iomem *exynos_chipid_base;
 	struct soc_device *soc_dev;
 	struct device_node *root;
-	struct device_node *np;
+	struct regmap *regmap;
 	u32 product_id;
 	u32 revision;
+	int ret;
 
-	/* look up for chipid node */
-	np = of_find_compatible_node(NULL, NULL, "samsung,exynos4210-chipid");
-	if (!np)
-		return -ENODEV;
-
-	exynos_chipid_base = of_iomap(np, 0);
-	of_node_put(np);
-
-	if (!exynos_chipid_base) {
-		pr_err("Failed to map SoC chipid\n");
-		return -ENXIO;
+	regmap = syscon_regmap_lookup_by_compatible("samsung,exynos4210-chipid");
+	if (IS_ERR(regmap)) {
+		pr_err("Failed to get CHIPID regmap\n");
+		return PTR_ERR(regmap);
 	}
 
-	product_id = readl_relaxed(exynos_chipid_base);
+	ret = regmap_read(regmap, EXYNOS_CHIPID_REG_PRO_ID, &product_id);
+	if (ret < 0)
+		return ret;
+
 	revision = product_id & EXYNOS_REV_MASK;
-	iounmap(exynos_chipid_base);
 
 	soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL);
 	if (!soc_dev_attr)
diff --git a/include/linux/soc/samsung/exynos-chipid.h b/include/linux/soc/samsung/exynos-chipid.h
new file mode 100644
index 000000000000..8bca6763f99c
--- /dev/null
+++ b/include/linux/soc/samsung/exynos-chipid.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd.
+ *	      http://www.samsung.com/
+ *
+ * Exynos - CHIPID support
+ */
+#ifndef __LINUX_SOC_EXYNOS_CHIPID_H
+#define __LINUX_SOC_EXYNOS_CHIPID_H
+
+#define EXYNOS_CHIPID_REG_PRO_ID	0x00
+#define EXYNOS_SUBREV_MASK		(0xf << 4)
+#define EXYNOS_MAINREV_MASK		(0xf << 0)
+#define EXYNOS_REV_MASK			(EXYNOS_SUBREV_MASK | \
+					 EXYNOS_MAINREV_MASK)
+#define EXYNOS_MASK			0xfffff000
+
+#define EXYNOS_CHIPID_REG_PKG_ID	0x04
+/* Bit field definitions for EXYNOS_CHIPID_REG_PKG_ID register */
+#define EXYNOS5422_IDS_OFFSET		24
+#define EXYNOS5422_IDS_MASK		0xff
+#define EXYNOS5422_USESG_OFFSET	3
+#define EXYNOS5422_USESG_MASK		0x01
+#define EXYNOS5422_SG_OFFSET		0
+#define EXYNOS5422_SG_MASK		0x07
+#define EXYNOS5422_TABLE_OFFSET	8
+#define EXYNOS5422_TABLE_MASK		0x03
+#define EXYNOS5422_SG_A_OFFSET		17
+#define EXYNOS5422_SG_A_MASK		0x0f
+#define EXYNOS5422_SG_B_OFFSET		21
+#define EXYNOS5422_SG_B_MASK		0x03
+#define EXYNOS5422_SG_BSIGN_OFFSET	23
+#define EXYNOS5422_SG_BSIGN_MASK	0x01
+#define EXYNOS5422_BIN2_OFFSET		12
+#define EXYNOS5422_BIN2_MASK		0x01
+
+#define EXYNOS_CHIPID_REG_LOT_ID	0x14
+
+#define EXYNOS_CHIPID_REG_AUX_INFO	0x1c
+/* Bit field definitions for EXYNOS_CHIPID_REG_AUX_INFO register */
+#define EXYNOS5422_TMCB_OFFSET		0
+#define EXYNOS5422_TMCB_MASK		0x7f
+#define EXYNOS5422_ARM_UP_OFFSET	8
+#define EXYNOS5422_ARM_UP_MASK		0x03
+#define EXYNOS5422_ARM_DN_OFFSET	10
+#define EXYNOS5422_ARM_DN_MASK		0x03
+#define EXYNOS5422_KFC_UP_OFFSET	12
+#define EXYNOS5422_KFC_UP_MASK		0x03
+#define EXYNOS5422_KFC_DN_OFFSET	14
+#define EXYNOS5422_KFC_DN_MASK		0x03
+
+#endif /*__LINUX_SOC_EXYNOS_CHIPID_H */
-- 
cgit v1.2.3


From 9dc03ffd996d4103cc2a11286d61e517bce27440 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 16:40:32 +0200
Subject: net: lpc-enet: factor out iram access

The lpc_eth driver uses a platform specific method to find
the internal sram. This prevents building it on other machines.

Rework to only use one function call and keep the other platform
internals where they belong. Ideally this would look up the
sram location from DT, but as this is a rarely used driver,
I want to keep the modifications to a minimum.

Link: https://lore.kernel.org/r/20190809144043.476786-7-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-lpc32xx/common.c             |  9 +++++++--
 arch/arm/mach-lpc32xx/common.h             |  1 -
 arch/arm/mach-lpc32xx/include/mach/board.h | 15 ---------------
 drivers/net/ethernet/nxp/lpc_eth.c         | 17 ++++++++---------
 include/linux/soc/nxp/lpc32xx-misc.h       | 24 ++++++++++++++++++++++++
 5 files changed, 39 insertions(+), 27 deletions(-)
 delete mode 100644 arch/arm/mach-lpc32xx/include/mach/board.h
 create mode 100644 include/linux/soc/nxp/lpc32xx-misc.h

(limited to 'include')

diff --git a/arch/arm/mach-lpc32xx/common.c b/arch/arm/mach-lpc32xx/common.c
index 5b71b4fab2cd..f648324d5fb4 100644
--- a/arch/arm/mach-lpc32xx/common.c
+++ b/arch/arm/mach-lpc32xx/common.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/soc/nxp/lpc32xx-misc.h>
 
 #include <asm/mach/map.h>
 #include <asm/system_info.h>
@@ -32,7 +33,7 @@ void lpc32xx_get_uid(u32 devid[4])
  */
 #define LPC32XX_IRAM_BANK_SIZE SZ_128K
 static u32 iram_size;
-u32 lpc32xx_return_iram_size(void)
+u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr)
 {
 	if (iram_size == 0) {
 		u32 savedval1, savedval2;
@@ -53,10 +54,14 @@ u32 lpc32xx_return_iram_size(void)
 		} else
 			iram_size = LPC32XX_IRAM_BANK_SIZE * 2;
 	}
+	if (dmaaddr)
+		*dmaaddr = LPC32XX_IRAM_BASE;
+	if (mapbase)
+		*mapbase = io_p2v(LPC32XX_IRAM_BASE);
 
 	return iram_size;
 }
-EXPORT_SYMBOL_GPL(lpc32xx_return_iram_size);
+EXPORT_SYMBOL_GPL(lpc32xx_return_iram);
 
 static struct map_desc lpc32xx_io_desc[] __initdata = {
 	{
diff --git a/arch/arm/mach-lpc32xx/common.h b/arch/arm/mach-lpc32xx/common.h
index 8e597ce48a73..32f0ad217807 100644
--- a/arch/arm/mach-lpc32xx/common.h
+++ b/arch/arm/mach-lpc32xx/common.h
@@ -23,7 +23,6 @@ extern void __init lpc32xx_serial_init(void);
  */
 extern void lpc32xx_get_uid(u32 devid[4]);
 
-extern u32 lpc32xx_return_iram_size(void);
 /*
  * Pointers used for sizing and copying suspend function data
  */
diff --git a/arch/arm/mach-lpc32xx/include/mach/board.h b/arch/arm/mach-lpc32xx/include/mach/board.h
deleted file mode 100644
index 476513d970a4..000000000000
--- a/arch/arm/mach-lpc32xx/include/mach/board.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * arm/arch/mach-lpc32xx/include/mach/board.h
- *
- * Author: Kevin Wells <kevin.wells@nxp.com>
- *
- * Copyright (C) 2010 NXP Semiconductors
- */
-
-#ifndef __ASM_ARCH_BOARD_H
-#define __ASM_ARCH_BOARD_H
-
-extern u32 lpc32xx_return_iram_size(void);
-
-#endif  /* __ASM_ARCH_BOARD_H */
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index f7e11f1b0426..bcdd0adcfb0c 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -18,8 +18,8 @@
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
+#include <linux/soc/nxp/lpc32xx-misc.h>
 
-#include <mach/board.h>
 #include <mach/hardware.h>
 #include <mach/platform.h>
 
@@ -1311,16 +1311,15 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	/* Get size of DMA buffers/descriptors region */
 	pldat->dma_buff_size = (ENET_TX_DESC + ENET_RX_DESC) * (ENET_MAXF_SIZE +
 		sizeof(struct txrx_desc_t) + sizeof(struct rx_status_t));
-	pldat->dma_buff_base_v = 0;
 
 	if (use_iram_for_net(dev)) {
-		dma_handle = LPC32XX_IRAM_BASE;
-		if (pldat->dma_buff_size <= lpc32xx_return_iram_size())
-			pldat->dma_buff_base_v =
-				io_p2v(LPC32XX_IRAM_BASE);
-		else
+		if (pldat->dma_buff_size >
+		    lpc32xx_return_iram(&pldat->dma_buff_base_v, &dma_handle)) {
+			pldat->dma_buff_base_v = NULL;
+			pldat->dma_buff_size = 0;
 			netdev_err(ndev,
 				"IRAM not big enough for net buffers, using SDRAM instead.\n");
+		}
 	}
 
 	if (pldat->dma_buff_base_v == 0) {
@@ -1409,7 +1408,7 @@ err_out_unregister_netdev:
 	unregister_netdev(ndev);
 err_out_dma_unmap:
 	if (!use_iram_for_net(dev) ||
-	    pldat->dma_buff_size > lpc32xx_return_iram_size())
+	    pldat->dma_buff_size > lpc32xx_return_iram(NULL, NULL))
 		dma_free_coherent(dev, pldat->dma_buff_size,
 				  pldat->dma_buff_base_v,
 				  pldat->dma_buff_base_p);
@@ -1436,7 +1435,7 @@ static int lpc_eth_drv_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 
 	if (!use_iram_for_net(&pldat->pdev->dev) ||
-	    pldat->dma_buff_size > lpc32xx_return_iram_size())
+	    pldat->dma_buff_size > lpc32xx_return_iram(NULL, NULL))
 		dma_free_coherent(&pldat->pdev->dev, pldat->dma_buff_size,
 				  pldat->dma_buff_base_v,
 				  pldat->dma_buff_base_p);
diff --git a/include/linux/soc/nxp/lpc32xx-misc.h b/include/linux/soc/nxp/lpc32xx-misc.h
new file mode 100644
index 000000000000..f232e1a1bcdc
--- /dev/null
+++ b/include/linux/soc/nxp/lpc32xx-misc.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Author: Kevin Wells <kevin.wells@nxp.com>
+ *
+ * Copyright (C) 2010 NXP Semiconductors
+ */
+
+#ifndef __SOC_LPC32XX_MISC_H
+#define __SOC_LPC32XX_MISC_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_ARCH_LPC32XX
+extern u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr);
+#else
+static inline u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr)
+{
+	*mapbase = NULL;
+	*dmaaddr = 0;
+	return 0;
+}
+#endif
+
+#endif  /* __SOC_LPC32XX_MISC_H */
-- 
cgit v1.2.3


From ecca1a6277aac10e40e4baba28adb893899b24b3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 16:40:33 +0200
Subject: net: lpc-enet: move phy setup into platform code

Setting the phy mode requires touching a platform specific
register, which prevents us from building the driver without
its header files.

Move it into a separate function in arch/arm/mach/lpc32xx
to hide the core registers from the network driver.

Link: https://lore.kernel.org/r/20190809144043.476786-8-arnd@arndb.de
Acked-by: Sylvain Lemieux <slemieux.tyco@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-lpc32xx/common.c       | 12 ++++++++++++
 drivers/net/ethernet/nxp/lpc_eth.c   | 12 +-----------
 include/linux/soc/nxp/lpc32xx-misc.h |  5 +++++
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-lpc32xx/common.c b/arch/arm/mach-lpc32xx/common.c
index f648324d5fb4..a475339333c1 100644
--- a/arch/arm/mach-lpc32xx/common.c
+++ b/arch/arm/mach-lpc32xx/common.c
@@ -63,6 +63,18 @@ u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr)
 }
 EXPORT_SYMBOL_GPL(lpc32xx_return_iram);
 
+void lpc32xx_set_phy_interface_mode(phy_interface_t mode)
+{
+	u32 tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL);
+	tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK;
+	if (mode == PHY_INTERFACE_MODE_MII)
+		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS;
+	else
+		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS;
+	__raw_writel(tmp, LPC32XX_CLKPWR_MACCLK_CTRL);
+}
+EXPORT_SYMBOL_GPL(lpc32xx_set_phy_interface_mode);
+
 static struct map_desc lpc32xx_io_desc[] __initdata = {
 	{
 		.virtual	= (unsigned long)IO_ADDRESS(LPC32XX_AHB0_START),
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index bcdd0adcfb0c..0893b77c385d 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -20,9 +20,6 @@
 #include <linux/spinlock.h>
 #include <linux/soc/nxp/lpc32xx-misc.h>
 
-#include <mach/hardware.h>
-#include <mach/platform.h>
-
 #define MODNAME "lpc-eth"
 #define DRV_VERSION "1.00"
 
@@ -1237,16 +1234,9 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
 	dma_addr_t dma_handle;
 	struct resource *res;
 	int irq, ret;
-	u32 tmp;
 
 	/* Setup network interface for RMII or MII mode */
-	tmp = __raw_readl(LPC32XX_CLKPWR_MACCLK_CTRL);
-	tmp &= ~LPC32XX_CLKPWR_MACCTRL_PINS_MSK;
-	if (lpc_phy_interface_mode(dev) == PHY_INTERFACE_MODE_MII)
-		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_MII_PINS;
-	else
-		tmp |= LPC32XX_CLKPWR_MACCTRL_USE_RMII_PINS;
-	__raw_writel(tmp, LPC32XX_CLKPWR_MACCLK_CTRL);
+	lpc32xx_set_phy_interface_mode(lpc_phy_interface_mode(dev));
 
 	/* Get platform resources */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/include/linux/soc/nxp/lpc32xx-misc.h b/include/linux/soc/nxp/lpc32xx-misc.h
index f232e1a1bcdc..af4f82f6cf3b 100644
--- a/include/linux/soc/nxp/lpc32xx-misc.h
+++ b/include/linux/soc/nxp/lpc32xx-misc.h
@@ -9,9 +9,11 @@
 #define __SOC_LPC32XX_MISC_H
 
 #include <linux/types.h>
+#include <linux/phy.h>
 
 #ifdef CONFIG_ARCH_LPC32XX
 extern u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr);
+extern void lpc32xx_set_phy_interface_mode(phy_interface_t mode);
 #else
 static inline u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr)
 {
@@ -19,6 +21,9 @@ static inline u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaadd
 	*dmaaddr = 0;
 	return 0;
 }
+static inline void lpc32xx_set_phy_interface_mode(phy_interface_t mode)
+{
+}
 #endif
 
 #endif  /* __SOC_LPC32XX_MISC_H */
-- 
cgit v1.2.3


From ffba29c9ebd0977dbf77bf6064776716a51b8ae5 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 16:40:37 +0200
Subject: serial: lpc32xx: allow compile testing

The lpc32xx_loopback_set() function in hte lpc32xx_hs driver is the
one thing that relies on platform header files. Move that into the
core platform code so we only need a variable declaration for it,
and enable COMPILE_TEST building.

Link: https://lore.kernel.org/r/20190809144043.476786-12-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-lpc32xx/serial.c       | 30 ++++++++++++++++++++++++++++++
 drivers/tty/serial/lpc32xx_hs.c      | 35 ++++-------------------------------
 include/linux/soc/nxp/lpc32xx-misc.h |  4 ++++
 3 files changed, 38 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-lpc32xx/serial.c b/arch/arm/mach-lpc32xx/serial.c
index 3f9b30df9f0e..cfb35e5691cd 100644
--- a/arch/arm/mach-lpc32xx/serial.c
+++ b/arch/arm/mach-lpc32xx/serial.c
@@ -60,6 +60,36 @@ static struct uartinit uartinit_data[] __initdata = {
 	},
 };
 
+/* LPC3250 Errata HSUART.1: Hang workaround via loopback mode on inactivity */
+void lpc32xx_loopback_set(resource_size_t mapbase, int state)
+{
+	int bit;
+	u32 tmp;
+
+	switch (mapbase) {
+	case LPC32XX_HS_UART1_BASE:
+		bit = 0;
+		break;
+	case LPC32XX_HS_UART2_BASE:
+		bit = 1;
+		break;
+	case LPC32XX_HS_UART7_BASE:
+		bit = 6;
+		break;
+	default:
+		WARN(1, "lpc32xx_hs: Warning: Unknown port at %08x\n", mapbase);
+		return;
+	}
+
+	tmp = readl(LPC32XX_UARTCTL_CLOOP);
+	if (state)
+		tmp |= (1 << bit);
+	else
+		tmp &= ~(1 << bit);
+	writel(tmp, LPC32XX_UARTCTL_CLOOP);
+}
+EXPORT_SYMBOL_GPL(lpc32xx_loopback_set);
+
 void __init lpc32xx_serial_init(void)
 {
 	u32 tmp, clkmodes = 0;
diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
index 7f14cd8fac47..d3843f722182 100644
--- a/drivers/tty/serial/lpc32xx_hs.c
+++ b/drivers/tty/serial/lpc32xx_hs.c
@@ -25,6 +25,8 @@
 #include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/of.h>
+#include <linux/sizes.h>
+#include <linux/soc/nxp/lpc32xx-misc.h>
 
 /*
  * High Speed UART register offsets
@@ -79,6 +81,8 @@
 #define LPC32XX_HSU_TX_TL8B			(0x2 << 0)
 #define LPC32XX_HSU_TX_TL16B			(0x3 << 0)
 
+#define LPC32XX_MAIN_OSC_FREQ			13000000
+
 #define MODNAME "lpc32xx_hsuart"
 
 struct lpc32xx_hsuart_port {
@@ -149,8 +153,6 @@ static void lpc32xx_hsuart_console_write(struct console *co, const char *s,
 	local_irq_restore(flags);
 }
 
-static void lpc32xx_loopback_set(resource_size_t mapbase, int state);
-
 static int __init lpc32xx_hsuart_console_setup(struct console *co,
 					       char *options)
 {
@@ -437,35 +439,6 @@ static void serial_lpc32xx_break_ctl(struct uart_port *port,
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
-/* LPC3250 Errata HSUART.1: Hang workaround via loopback mode on inactivity */
-static void lpc32xx_loopback_set(resource_size_t mapbase, int state)
-{
-	int bit;
-	u32 tmp;
-
-	switch (mapbase) {
-	case LPC32XX_HS_UART1_BASE:
-		bit = 0;
-		break;
-	case LPC32XX_HS_UART2_BASE:
-		bit = 1;
-		break;
-	case LPC32XX_HS_UART7_BASE:
-		bit = 6;
-		break;
-	default:
-		WARN(1, "lpc32xx_hs: Warning: Unknown port at %08x\n", mapbase);
-		return;
-	}
-
-	tmp = readl(LPC32XX_UARTCTL_CLOOP);
-	if (state)
-		tmp |= (1 << bit);
-	else
-		tmp &= ~(1 << bit);
-	writel(tmp, LPC32XX_UARTCTL_CLOOP);
-}
-
 /* port->lock is not held.  */
 static int serial_lpc32xx_startup(struct uart_port *port)
 {
diff --git a/include/linux/soc/nxp/lpc32xx-misc.h b/include/linux/soc/nxp/lpc32xx-misc.h
index af4f82f6cf3b..699c6f1e3aab 100644
--- a/include/linux/soc/nxp/lpc32xx-misc.h
+++ b/include/linux/soc/nxp/lpc32xx-misc.h
@@ -14,6 +14,7 @@
 #ifdef CONFIG_ARCH_LPC32XX
 extern u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr);
 extern void lpc32xx_set_phy_interface_mode(phy_interface_t mode);
+extern void lpc32xx_loopback_set(resource_size_t mapbase, int state);
 #else
 static inline u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaaddr)
 {
@@ -24,6 +25,9 @@ static inline u32 lpc32xx_return_iram(void __iomem **mapbase, dma_addr_t *dmaadd
 static inline void lpc32xx_set_phy_interface_mode(phy_interface_t mode)
 {
 }
+static inline void lpc32xx_loopback_set(resource_size_t mapbase, int state)
+{
+}
 #endif
 
 #endif  /* __SOC_LPC32XX_MISC_H */
-- 
cgit v1.2.3


From 0dabbe1bb3a4cf47d0cf52828355293f18acdae9 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Wed, 14 Aug 2019 01:11:53 -0700
Subject: qed: Add driver API for flashing the config attributes.

The patch adds driver interface for reading the config attributes from user
provided buffer, and updates these values on nvm config flash partition.

This is basically an expansion of our existing ethtool -f implementation.
The management FW has exposed an additional method of configuring some of
the nvram options, and this makes use of that. This implementation will
come into use when newer FW files which contain configuration directives
employing this API will be provided to ethtool -f.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 68 ++++++++++++++++++++++++++++++
 include/linux/qed/qed_if.h                 |  1 +
 2 files changed, 69 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index e5ac8bd4fd14..0a7645937412 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -67,6 +67,8 @@
 #define QED_ROCE_QPS			(8192)
 #define QED_ROCE_DPIS			(8)
 #define QED_RDMA_SRQS                   QED_ROCE_QPS
+#define QED_NVM_CFG_SET_FLAGS		0xE
+#define QED_NVM_CFG_SET_PF_FLAGS	0x1E
 
 static char version[] =
 	"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -2231,6 +2233,69 @@ static int qed_nvm_flash_image_validate(struct qed_dev *cdev,
 	return 0;
 }
 
+/* Binary file format -
+ *     /----------------------------------------------------------------------\
+ * 0B  |                       0x5 [command index]                            |
+ * 4B  | Entity ID     | Reserved        |  Number of config attributes       |
+ * 8B  | Config ID                       | Length        | Value              |
+ *     |                                                                      |
+ *     \----------------------------------------------------------------------/
+ * There can be several cfg_id-Length-Value sets as specified by 'Number of...'.
+ * Entity ID - A non zero entity value for which the config need to be updated.
+ *
+ * The API parses config attributes from the user provided buffer and flashes
+ * them to the respective NVM path using Management FW inerface.
+ */
+static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 entity_id, len, buf[32];
+	struct qed_ptt *ptt;
+	u16 cfg_id, count;
+	int rc = 0, i;
+	u32 flags;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	/* NVM CFG ID attribute header */
+	*data += 4;
+	entity_id = **data;
+	*data += 2;
+	count = *((u16 *)*data);
+	*data += 2;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Read config ids: entity id %02x num _attrs = %0d\n",
+		   entity_id, count);
+	/* NVM CFG ID attributes */
+	for (i = 0; i < count; i++) {
+		cfg_id = *((u16 *)*data);
+		*data += 2;
+		len = **data;
+		(*data)++;
+		memcpy(buf, *data, len);
+		*data += len;
+
+		flags = entity_id ? QED_NVM_CFG_SET_PF_FLAGS :
+			QED_NVM_CFG_SET_FLAGS;
+
+		DP_VERBOSE(cdev, NETIF_MSG_DRV,
+			   "cfg_id = %d len = %d\n", cfg_id, len);
+		rc = qed_mcp_nvm_set_cfg(hwfn, ptt, cfg_id, entity_id, flags,
+					 buf, len);
+		if (rc) {
+			DP_ERR(cdev, "Error %d configuring %d\n", rc, cfg_id);
+			break;
+		}
+	}
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static int qed_nvm_flash(struct qed_dev *cdev, const char *name)
 {
 	const struct firmware *image;
@@ -2272,6 +2337,9 @@ static int qed_nvm_flash(struct qed_dev *cdev, const char *name)
 			rc = qed_nvm_flash_image_access(cdev, &data,
 							&check_resp);
 			break;
+		case QED_NVM_FLASH_CMD_NVM_CFG_ID:
+			rc = qed_nvm_flash_cfg_write(cdev, &data);
+			break;
 		default:
 			DP_ERR(cdev, "Unknown command %08x\n", cmd_type);
 			rc = -EINVAL;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 23021366a4e5..e366399874f3 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -804,6 +804,7 @@ enum qed_nvm_flash_cmd {
 	QED_NVM_FLASH_CMD_FILE_DATA = 0x2,
 	QED_NVM_FLASH_CMD_FILE_START = 0x3,
 	QED_NVM_FLASH_CMD_NVM_CHANGE = 0x4,
+	QED_NVM_FLASH_CMD_NVM_CFG_ID = 0x5,
 	QED_NVM_FLASH_CMD_NVM_MAX,
 };
 
-- 
cgit v1.2.3


From bfc4c359b2822bd2c457ccab271baeb33181c7c9 Mon Sep 17 00:00:00 2001
From: Anusha Srivatsa <anusha.srivatsa@intel.com>
Date: Mon, 12 Aug 2019 15:27:37 -0700
Subject: drm/i915/cml: Add Missing PCI IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The BSpec has added three new IDS for CML.
Update the IDs in accordance to the Spec.

Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Signed-off-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Reviewed-by: Anshuman Gupta <anshuman.gupta@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190812222737.29356-1-anusha.srivatsa@intel.com
---
 include/drm/i915_pciids.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index a70c982ddff9..b1f66b117c74 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -466,7 +466,10 @@
 	INTEL_VGA_DEVICE(0x9BC5, info), \
 	INTEL_VGA_DEVICE(0x9BC8, info), \
 	INTEL_VGA_DEVICE(0x9BC4, info), \
-	INTEL_VGA_DEVICE(0x9BC2, info)
+	INTEL_VGA_DEVICE(0x9BC2, info), \
+	INTEL_VGA_DEVICE(0x9BC6, info), \
+	INTEL_VGA_DEVICE(0x9BE6, info), \
+	INTEL_VGA_DEVICE(0x9BF6, info)
 
 #define INTEL_KBL_IDS(info) \
 	INTEL_KBL_GT1_IDS(info), \
-- 
cgit v1.2.3


From d507a54f5865d8dcbdd16c66a1a2da15640878ca Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Mon, 5 Aug 2019 21:23:05 +0200
Subject: platform/x86: asus-wmi: Add support for charge threshold

Most newer ASUS laptops supports limiting the battery charge level, which
help prolonging the battery life.

Tested on a Zenbook UX430UNR.

Signed-off-by: Kristian Klausen <kristian@klausen.dk>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c            | 48 ++++++++++++++++++++++++++++++
 include/linux/platform_data/x86/asus-wmi.h |  1 +
 2 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 34dfbed65332..22ae350e0a96 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -195,6 +195,8 @@ struct asus_wmi {
 	u8 fan_boost_mode_mask;
 	u8 fan_boost_mode;
 
+	int charge_threshold;
+
 	struct hotplug_slot hotplug_slot;
 	struct mutex hotplug_lock;
 	struct mutex wmi_lock;
@@ -2075,6 +2077,43 @@ static ssize_t cpufv_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR_WO(cpufv);
 
+
+static ssize_t charge_threshold_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+	int value, ret, rv;
+
+	ret = kstrtouint(buf, 10, &value);
+
+	if (!count || ret != 0)
+		return -EINVAL;
+	if (value < 0 || value > 100)
+		return -EINVAL;
+
+	asus_wmi_set_devstate(ASUS_WMI_CHARGE_THRESHOLD, value, &rv);
+
+	if (rv != 1)
+		return -EIO;
+
+	/* There isn't any method in the DSDT to read the threshold, so we
+	 * save the threshold.
+	 */
+	asus->charge_threshold = value;
+	return count;
+}
+
+static ssize_t charge_threshold_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct asus_wmi *asus = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", asus->charge_threshold);
+}
+
+static DEVICE_ATTR_RW(charge_threshold);
+
 static struct attribute *platform_attributes[] = {
 	&dev_attr_cpufv.attr,
 	&dev_attr_camera.attr,
@@ -2083,6 +2122,7 @@ static struct attribute *platform_attributes[] = {
 	&dev_attr_lid_resume.attr,
 	&dev_attr_als_enable.attr,
 	&dev_attr_fan_boost_mode.attr,
+	&dev_attr_charge_threshold.attr,
 	NULL
 };
 
@@ -2106,6 +2146,8 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 		devid = ASUS_WMI_DEVID_ALS_ENABLE;
 	else if (attr == &dev_attr_fan_boost_mode.attr)
 		ok = asus->fan_boost_mode_available;
+	else if (attr == &dev_attr_charge_threshold.attr)
+		devid = ASUS_WMI_CHARGE_THRESHOLD;
 
 	if (devid != -1)
 		ok = !(asus_wmi_get_devstate_simple(asus, devid) < 0);
@@ -2434,6 +2476,12 @@ static int asus_wmi_add(struct platform_device *pdev)
 	}
 
 	asus_wmi_debugfs_init(asus);
+	/* The charge threshold is only reset when the system is power cycled,
+	 * and we can't get the current threshold so let set it to 100% on
+	 * module load.
+	 */
+	asus_wmi_set_devstate(ASUS_WMI_CHARGE_THRESHOLD, 100, NULL);
+	asus->charge_threshold = 100;
 
 	return 0;
 
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 409e16064f4b..53934ef38d98 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -61,6 +61,7 @@
 
 /* Misc */
 #define ASUS_WMI_DEVID_CAMERA		0x00060013
+#define ASUS_WMI_CHARGE_THRESHOLD	0x00120057
 
 /* Storage */
 #define ASUS_WMI_DEVID_CARDREADER	0x00080013
-- 
cgit v1.2.3


From f59b16ef4ccea1b52f1c4e4c60ce507dc0bcc0ad Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@nxp.com>
Date: Thu, 15 Aug 2019 14:20:15 -0500
Subject: ASoC: SOF: topology: Add dummy support for i.MX8 DAIs

Add dummy support for SAI/ESAI digital audio interface
IPs found on i.MX8 boards.

Signed-off-by: Daniel Baluta <daniel.baluta@nxp.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190815192018.30570-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai.h         |  2 ++
 include/uapi/sound/sof/tokens.h |  8 ++++++++
 sound/soc/sof/topology.c        | 30 ++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index da9825ad41d4..86494294274e 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -50,6 +50,8 @@ enum sof_ipc_dai_type {
 	SOF_DAI_INTEL_DMIC,		/**< Intel DMIC */
 	SOF_DAI_INTEL_HDA,		/**< Intel HD/A */
 	SOF_DAI_INTEL_SOUNDWIRE,	/**< Intel SoundWire */
+	SOF_DAI_IMX_SAI,		/**< i.MX SAI */
+	SOF_DAI_IMX_ESAI,		/**< i.MX ESAI */
 };
 
 /* general purpose DAI configuration */
diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h
index 6435240cef13..8f996857fb24 100644
--- a/include/uapi/sound/sof/tokens.h
+++ b/include/uapi/sound/sof/tokens.h
@@ -106,4 +106,12 @@
 /* for backward compatibility */
 #define SOF_TKN_EFFECT_TYPE	SOF_TKN_PROCESS_TYPE
 
+/* SAI */
+#define SOF_TKN_IMX_SAI_FIRST_TOKEN		1000
+/* TODO: Add SAI tokens */
+
+/* ESAI */
+#define SOF_TKN_IMX_ESAI_FIRST_TOKEN		1100
+/* TODO: Add ESAI tokens */
+
 #endif
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 9cffea142395..a215bf58b138 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -346,6 +346,8 @@ static const struct sof_dai_types sof_dais[] = {
 	{"SSP", SOF_DAI_INTEL_SSP},
 	{"HDA", SOF_DAI_INTEL_HDA},
 	{"DMIC", SOF_DAI_INTEL_DMIC},
+	{"SAI", SOF_DAI_IMX_SAI},
+	{"ESAI", SOF_DAI_IMX_ESAI},
 };
 
 static enum sof_ipc_dai_type find_dai(const char *name)
@@ -2513,6 +2515,26 @@ static int sof_link_ssp_load(struct snd_soc_component *scomp, int index,
 	return ret;
 }
 
+static int sof_link_sai_load(struct snd_soc_component *scomp, int index,
+			     struct snd_soc_dai_link *link,
+			     struct snd_soc_tplg_link_config *cfg,
+			     struct snd_soc_tplg_hw_config *hw_config,
+			     struct sof_ipc_dai_config *config)
+{
+	/*TODO: Add implementation */
+	return 0;
+}
+
+static int sof_link_esai_load(struct snd_soc_component *scomp, int index,
+			      struct snd_soc_dai_link *link,
+			      struct snd_soc_tplg_link_config *cfg,
+			      struct snd_soc_tplg_hw_config *hw_config,
+			      struct sof_ipc_dai_config *config)
+{
+	/*TODO: Add implementation */
+	return 0;
+}
+
 static int sof_link_dmic_load(struct snd_soc_component *scomp, int index,
 			      struct snd_soc_dai_link *link,
 			      struct snd_soc_tplg_link_config *cfg,
@@ -2837,6 +2859,14 @@ static int sof_link_load(struct snd_soc_component *scomp, int index,
 		ret = sof_link_hda_load(scomp, index, link, cfg, hw_config,
 					&config);
 		break;
+	case SOF_DAI_IMX_SAI:
+		ret = sof_link_sai_load(scomp, index, link, cfg, hw_config,
+					&config);
+		break;
+	case SOF_DAI_IMX_ESAI:
+		ret = sof_link_esai_load(scomp, index, link, cfg, hw_config,
+					 &config);
+		break;
 	default:
 		dev_err(sdev->dev, "error: invalid DAI type %d\n", config.type);
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 8207a1c4911226d0dd0592ffed5fce4a1f95e4b2 Mon Sep 17 00:00:00 2001
From: Bard liao <yung-chuan.liao@linux.intel.com>
Date: Thu, 15 Aug 2019 14:20:16 -0500
Subject: ASoC: SOF: rename SOUNDWIRE to ALH

Rename SOUNDWIRE to ALH.

Signed-off-by: Bard liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190815192018.30570-3-pierre-louis.bossart@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/sof/dai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/sof/dai.h b/include/sound/sof/dai.h
index 86494294274e..9da53f8dc7a7 100644
--- a/include/sound/sof/dai.h
+++ b/include/sound/sof/dai.h
@@ -49,7 +49,7 @@ enum sof_ipc_dai_type {
 	SOF_DAI_INTEL_SSP,		/**< Intel SSP */
 	SOF_DAI_INTEL_DMIC,		/**< Intel DMIC */
 	SOF_DAI_INTEL_HDA,		/**< Intel HD/A */
-	SOF_DAI_INTEL_SOUNDWIRE,	/**< Intel SoundWire */
+	SOF_DAI_INTEL_ALH,		/**< Intel ALH  */
 	SOF_DAI_IMX_SAI,		/**< i.MX SAI */
 	SOF_DAI_IMX_ESAI,		/**< i.MX ESAI */
 };
-- 
cgit v1.2.3


From a6e6fe6549f609f5c00c91e988da9d25f8ae8604 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 12 Aug 2019 11:30:35 -0600
Subject: PCI/P2PDMA: Introduce private pagemap structure

Move the PCI bus offset from the generic dev_pagemap structure to a
specific pci_p2pdma_pagemap structure.

This structure will grow in subsequent patches.

Link: https://lore.kernel.org/r/20190730163545.4915-2-logang@deltatee.com
Link: https://lore.kernel.org/r/20190812173048.9186-2-logang@deltatee.com
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/p2pdma.c     | 26 ++++++++++++++++++++------
 include/linux/memremap.h |  1 -
 2 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 234476226529..03e9c887bdfb 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -25,6 +25,16 @@ struct pci_p2pdma {
 	bool p2pmem_published;
 };
 
+struct pci_p2pdma_pagemap {
+	struct dev_pagemap pgmap;
+	u64 bus_offset;
+};
+
+static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
+{
+	return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
+}
+
 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
@@ -135,6 +145,7 @@ out:
 int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 			    u64 offset)
 {
+	struct pci_p2pdma_pagemap *p2p_pgmap;
 	struct dev_pagemap *pgmap;
 	void *addr;
 	int error;
@@ -157,14 +168,17 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 			return error;
 	}
 
-	pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
-	if (!pgmap)
+	p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
+	if (!p2p_pgmap)
 		return -ENOMEM;
+
+	pgmap = &p2p_pgmap->pgmap;
 	pgmap->res.start = pci_resource_start(pdev, bar) + offset;
 	pgmap->res.end = pgmap->res.start + size - 1;
 	pgmap->res.flags = pci_resource_flags(pdev, bar);
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
-	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+
+	p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
 		pci_resource_start(pdev, bar);
 
 	addr = devm_memremap_pages(&pdev->dev, pgmap);
@@ -720,7 +734,7 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
 int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		      enum dma_data_direction dir)
 {
-	struct dev_pagemap *pgmap;
+	struct pci_p2pdma_pagemap *p2p_pgmap;
 	struct scatterlist *s;
 	phys_addr_t paddr;
 	int i;
@@ -736,10 +750,10 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		return 0;
 
 	for_each_sg(sg, s, nents, i) {
-		pgmap = sg_page(s)->pgmap;
+		p2p_pgmap = to_p2p_pgmap(sg_page(s)->pgmap);
 		paddr = sg_phys(s);
 
-		s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+		s->dma_address = paddr - p2p_pgmap->bus_offset;
 		sg_dma_len(s) = s->length;
 	}
 
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f8a5b2a19945..b459518ce475 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -112,7 +112,6 @@ struct dev_pagemap {
 	struct device *dev;
 	enum memory_type type;
 	unsigned int flags;
-	u64 pci_p2pdma_bus_offset;
 	const struct dev_pagemap_ops *ops;
 };
 
-- 
cgit v1.2.3


From 2b9f4bb2a4fb77da4862f9ddf5209de2bcdaa0c0 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 12 Aug 2019 11:30:42 -0600
Subject: PCI/P2PDMA: Add attrs argument to pci_p2pdma_map_sg()

This is to match the dma_map_sg() API which this function will have to call
in an future patch.

Add a pci_p2pdma_map_sg_attrs() function and helper to call it with no
attributes just like the dma_map_sg() function.

Link: https://lore.kernel.org/r/20190730163545.4915-9-logang@deltatee.com
Link: https://lore.kernel.org/r/20190812173048.9186-9-logang@deltatee.com
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c    |  4 ++--
 drivers/pci/p2pdma.c       |  7 ++++---
 include/linux/pci-p2pdma.h | 15 +++++++++++----
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bb970ca82517..7747712054cd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -832,8 +832,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		goto out;
 
 	if (is_pci_p2pdma_page(sg_page(iod->sg)))
-		nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
-					      rq_dma_dir(req));
+		nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
+				iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
 	else
 		nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
 					     rq_dma_dir(req), DMA_ATTR_NO_WARN);
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 2c4a8e92ed64..94fbacbcbbd0 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -798,13 +798,14 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
  * @sg: scatter list to map
  * @nents: elements in the scatterlist
  * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_map_sg() (if called)
  *
  * Scatterlists mapped with this function should not be unmapped in any way.
  *
  * Returns the number of SG entries mapped or 0 on error.
  */
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		      enum dma_data_direction dir)
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
 {
 	struct pci_p2pdma_pagemap *p2p_pgmap;
 	struct scatterlist *s;
@@ -831,7 +832,7 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 
 	return nents;
 }
-EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
 
 /**
  * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index bca9bc3e5be7..7fd51954f93a 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -30,8 +30,8 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
 					 unsigned int *nents, u32 length);
 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
-int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		      enum dma_data_direction dir);
+int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs);
 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
 			    bool *use_p2pdma);
 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
@@ -81,8 +81,9 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 {
 }
-static inline int pci_p2pdma_map_sg(struct device *dev,
-		struct scatterlist *sg, int nents, enum dma_data_direction dir)
+static inline int pci_p2pdma_map_sg_attrs(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
 {
 	return 0;
 }
@@ -111,4 +112,10 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client)
 	return pci_p2pmem_find_many(&client, 1);
 }
 
+static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg,
+				    int nents, enum dma_data_direction dir)
+{
+	return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0);
+}
+
 #endif /* _LINUX_PCI_P2P_H */
-- 
cgit v1.2.3


From 7f73eac3a7137eabfb0c005c7ba55eb7994b9673 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Mon, 12 Aug 2019 11:30:43 -0600
Subject: PCI/P2PDMA: Introduce pci_p2pdma_unmap_sg()

Add pci_p2pdma_unmap_sg() to the two places that call pci_p2pdma_map_sg().

This is a prep patch to introduce correct mappings for p2pdma transactions
that go through the root complex.

Link: https://lore.kernel.org/r/20190730163545.4915-10-logang@deltatee.com
Link: https://lore.kernel.org/r/20190812173048.9186-10-logang@deltatee.com
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/infiniband/core/rw.c |  6 ++++--
 drivers/nvme/host/pci.c      |  6 ++++--
 drivers/pci/p2pdma.c         | 18 +++++++++++++++++-
 include/linux/pci-p2pdma.h   | 13 +++++++++++++
 4 files changed, 38 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index dce06108c8c3..5337393d4dfe 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -583,8 +583,10 @@ void rdma_rw_ctx_destroy(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 		break;
 	}
 
-	/* P2PDMA contexts do not need to be unmapped */
-	if (!is_pci_p2pdma_page(sg_page(sg)))
+	if (is_pci_p2pdma_page(sg_page(sg)))
+		pci_p2pdma_unmap_sg(qp->pd->device->dma_device, sg,
+				    sg_cnt, dir);
+	else
 		ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir);
 }
 EXPORT_SYMBOL(rdma_rw_ctx_destroy);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7747712054cd..2348b15f6bd0 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -547,8 +547,10 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 
 	WARN_ON_ONCE(!iod->nents);
 
-	/* P2PDMA requests do not need to be unmapped */
-	if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+	if (is_pci_p2pdma_page(sg_page(iod->sg)))
+		pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
+				    rq_dma_dir(req));
+	else
 		dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
 
 
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 94fbacbcbbd0..1eec7a5ec27e 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -800,7 +800,8 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
  * @dir: DMA direction
  * @attrs: DMA attributes passed to dma_map_sg() (if called)
  *
- * Scatterlists mapped with this function should not be unmapped in any way.
+ * Scatterlists mapped with this function should be unmapped using
+ * pci_p2pdma_unmap_sg_attrs().
  *
  * Returns the number of SG entries mapped or 0 on error.
  */
@@ -834,6 +835,21 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
 
+/**
+ * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was
+ *	mapped with pci_p2pdma_map_sg()
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: number of elements returned by pci_p2pdma_map_sg()
+ * @dir: DMA direction
+ * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
+ */
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
+
 /**
  * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
  *		to enable p2pdma
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 7fd51954f93a..8318a97c9c61 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -32,6 +32,8 @@ void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
 int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction dir, unsigned long attrs);
+void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs);
 int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
 			    bool *use_p2pdma);
 ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
@@ -87,6 +89,11 @@ static inline int pci_p2pdma_map_sg_attrs(struct device *dev,
 {
 	return 0;
 }
+static inline void pci_p2pdma_unmap_sg_attrs(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+}
 static inline int pci_p2pdma_enable_store(const char *page,
 		struct pci_dev **p2p_dev, bool *use_p2pdma)
 {
@@ -118,4 +125,10 @@ static inline int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg,
 	return pci_p2pdma_map_sg_attrs(dev, sg, nents, dir, 0);
 }
 
+static inline void pci_p2pdma_unmap_sg(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+	pci_p2pdma_unmap_sg_attrs(dev, sg, nents, dir, 0);
+}
+
 #endif /* _LINUX_PCI_P2P_H */
-- 
cgit v1.2.3


From 2c7933f53f6bff7656e3324ca1a04e478bdc57c1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 6 Aug 2019 20:15:40 -0300
Subject: mm/mmu_notifiers: add a get/put scheme for the registration

Many places in the kernel have a flow where userspace will create some
object and that object will need to connect to the subsystem's
mmu_notifier subscription for the duration of its lifetime.

In this case the subsystem is usually tracking multiple mm_structs and it
is difficult to keep track of what struct mmu_notifier's have been
allocated for what mm's.

Since this has been open coded in a variety of exciting ways, provide core
functionality to do this safely.

This approach uses the struct mmu_notifier_ops * as a key to determine if
the subsystem has a notifier registered on the mm or not. If there is a
registration then the existing notifier struct is returned, otherwise the
ops->alloc_notifiers() is used to create a new per-subsystem notifier for
the mm.

The destroy side incorporates an async call_srcu based destruction which
will avoid bugs in the callers such as commit 6d7c3cde93c1 ("mm/hmm: fix
use after free with struct hmm in the mmu notifiers").

Since we are inside the mmu notifier core locking is fairly simple, the
allocation uses the same approach as for mmu_notifier_mm, the write side
of the mmap_sem makes everything deterministic and we only need to do
hlist_add_head_rcu() under the mm_take_all_locks(). The new users count
and the discoverability in the hlist is fully serialized by the
mmu_notifier_mm->lock.

Link: https://lore.kernel.org/r/20190806231548.25242-4-jgg@ziepe.ca
Co-developed-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/mmu_notifier.h |  35 ++++++++++
 mm/mmu_notifier.c            | 156 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 185 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b6c004bd9f6a..31aa971315a1 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -211,6 +211,19 @@ struct mmu_notifier_ops {
 	 */
 	void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 				 unsigned long start, unsigned long end);
+
+	/*
+	 * These callbacks are used with the get/put interface to manage the
+	 * lifetime of the mmu_notifier memory. alloc_notifier() returns a new
+	 * notifier for use with the mm.
+	 *
+	 * free_notifier() is only called after the mmu_notifier has been
+	 * fully put, calls to any ops callback are prevented and no ops
+	 * callbacks are currently running. It is called from a SRCU callback
+	 * and cannot sleep.
+	 */
+	struct mmu_notifier *(*alloc_notifier)(struct mm_struct *mm);
+	void (*free_notifier)(struct mmu_notifier *mn);
 };
 
 /*
@@ -227,6 +240,9 @@ struct mmu_notifier_ops {
 struct mmu_notifier {
 	struct hlist_node hlist;
 	const struct mmu_notifier_ops *ops;
+	struct mm_struct *mm;
+	struct rcu_head rcu;
+	unsigned int users;
 };
 
 static inline int mm_has_notifiers(struct mm_struct *mm)
@@ -234,6 +250,21 @@ static inline int mm_has_notifiers(struct mm_struct *mm)
 	return unlikely(mm->mmu_notifier_mm);
 }
 
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
+					     struct mm_struct *mm);
+static inline struct mmu_notifier *
+mmu_notifier_get(const struct mmu_notifier_ops *ops, struct mm_struct *mm)
+{
+	struct mmu_notifier *ret;
+
+	down_write(&mm->mmap_sem);
+	ret = mmu_notifier_get_locked(ops, mm);
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+void mmu_notifier_put(struct mmu_notifier *mn);
+void mmu_notifier_synchronize(void);
+
 extern int mmu_notifier_register(struct mmu_notifier *mn,
 				 struct mm_struct *mm);
 extern int __mmu_notifier_register(struct mmu_notifier *mn,
@@ -581,6 +612,10 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
 #define set_pte_at_notify set_pte_at
 
+static inline void mmu_notifier_synchronize(void)
+{
+}
+
 #endif /* CONFIG_MMU_NOTIFIER */
 
 #endif /* _LINUX_MMU_NOTIFIER_H */
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 696810f632ad..9e92ec8006fc 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -248,6 +248,9 @@ int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 	lockdep_assert_held_write(&mm->mmap_sem);
 	BUG_ON(atomic_read(&mm->mm_users) <= 0);
 
+	mn->mm = mm;
+	mn->users = 1;
+
 	if (!mm->mmu_notifier_mm) {
 		/*
 		 * kmalloc cannot be called under mm_take_all_locks(), but we
@@ -295,18 +298,24 @@ out_clean:
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_register);
 
-/*
+/**
+ * mmu_notifier_register - Register a notifier on a mm
+ * @mn: The notifier to attach
+ * @mm: The mm to attach the notifier to
+ *
  * Must not hold mmap_sem nor any other VM related lock when calling
  * this registration function. Must also ensure mm_users can't go down
  * to zero while this runs to avoid races with mmu_notifier_release,
  * so mm has to be current->mm or the mm should be pinned safely such
  * as with get_task_mm(). If the mm is not current->mm, the mm_users
  * pin should be released by calling mmput after mmu_notifier_register
- * returns. mmu_notifier_unregister must be always called to
- * unregister the notifier. mm_count is automatically pinned to allow
- * mmu_notifier_unregister to safely run at any time later, before or
- * after exit_mmap. ->release will always be called before exit_mmap
- * frees the pages.
+ * returns.
+ *
+ * mmu_notifier_unregister() or mmu_notifier_put() must be always called to
+ * unregister the notifier.
+ *
+ * While the caller has a mmu_notifier get the mn->mm pointer will remain
+ * valid, and can be converted to an active mm pointer via mmget_not_zero().
  */
 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 {
@@ -319,6 +328,72 @@ int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_register);
 
+static struct mmu_notifier *
+find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops)
+{
+	struct mmu_notifier *mn;
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	hlist_for_each_entry_rcu (mn, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops != ops)
+			continue;
+
+		if (likely(mn->users != UINT_MAX))
+			mn->users++;
+		else
+			mn = ERR_PTR(-EOVERFLOW);
+		spin_unlock(&mm->mmu_notifier_mm->lock);
+		return mn;
+	}
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+	return NULL;
+}
+
+/**
+ * mmu_notifier_get_locked - Return the single struct mmu_notifier for
+ *                           the mm & ops
+ * @ops: The operations struct being subscribe with
+ * @mm : The mm to attach notifiers too
+ *
+ * This function either allocates a new mmu_notifier via
+ * ops->alloc_notifier(), or returns an already existing notifier on the
+ * list. The value of the ops pointer is used to determine when two notifiers
+ * are the same.
+ *
+ * Each call to mmu_notifier_get() must be paired with a call to
+ * mmu_notifier_put(). The caller must hold the write side of mm->mmap_sem.
+ *
+ * While the caller has a mmu_notifier get the mm pointer will remain valid,
+ * and can be converted to an active mm pointer via mmget_not_zero().
+ */
+struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops,
+					     struct mm_struct *mm)
+{
+	struct mmu_notifier *mn;
+	int ret;
+
+	lockdep_assert_held_write(&mm->mmap_sem);
+
+	if (mm->mmu_notifier_mm) {
+		mn = find_get_mmu_notifier(mm, ops);
+		if (mn)
+			return mn;
+	}
+
+	mn = ops->alloc_notifier(mm);
+	if (IS_ERR(mn))
+		return mn;
+	mn->ops = ops;
+	ret = __mmu_notifier_register(mn, mm);
+	if (ret)
+		goto out_free;
+	return mn;
+out_free:
+	mn->ops->free_notifier(mn);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_get_locked);
+
 /* this is called after the last mmu_notifier_unregister() returned */
 void __mmu_notifier_mm_destroy(struct mm_struct *mm)
 {
@@ -397,6 +472,75 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
 
+static void mmu_notifier_free_rcu(struct rcu_head *rcu)
+{
+	struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu);
+	struct mm_struct *mm = mn->mm;
+
+	mn->ops->free_notifier(mn);
+	/* Pairs with the get in __mmu_notifier_register() */
+	mmdrop(mm);
+}
+
+/**
+ * mmu_notifier_put - Release the reference on the notifier
+ * @mn: The notifier to act on
+ *
+ * This function must be paired with each mmu_notifier_get(), it releases the
+ * reference obtained by the get. If this is the last reference then process
+ * to free the notifier will be run asynchronously.
+ *
+ * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release
+ * when the mm_struct is destroyed. Instead free_notifier is always called to
+ * release any resources held by the user.
+ *
+ * As ops->release is not guaranteed to be called, the user must ensure that
+ * all sptes are dropped, and no new sptes can be established before
+ * mmu_notifier_put() is called.
+ *
+ * This function can be called from the ops->release callback, however the
+ * caller must still ensure it is called pairwise with mmu_notifier_get().
+ *
+ * Modules calling this function must call mmu_notifier_synchronize() in
+ * their __exit functions to ensure the async work is completed.
+ */
+void mmu_notifier_put(struct mmu_notifier *mn)
+{
+	struct mm_struct *mm = mn->mm;
+
+	spin_lock(&mm->mmu_notifier_mm->lock);
+	if (WARN_ON(!mn->users) || --mn->users)
+		goto out_unlock;
+	hlist_del_init_rcu(&mn->hlist);
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+
+	call_srcu(&srcu, &mn->rcu, mmu_notifier_free_rcu);
+	return;
+
+out_unlock:
+	spin_unlock(&mm->mmu_notifier_mm->lock);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_put);
+
+/**
+ * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed
+ *
+ * This function ensures that all outstanding async SRU work from
+ * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops
+ * associated with an unused mmu_notifier will no longer be called.
+ *
+ * Before using the caller must ensure that all of its mmu_notifiers have been
+ * fully released via mmu_notifier_put().
+ *
+ * Modules using the mmu_notifier_put() API should call this in their __exit
+ * function to avoid module unloading races.
+ */
+void mmu_notifier_synchronize(void)
+{
+	synchronize_srcu(&srcu);
+}
+EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
+
 bool
 mmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range)
 {
-- 
cgit v1.2.3


From 0214f33c4e0e1f5a9a9c3f4881310c61713294e6 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@kernel.org>
Date: Wed, 31 Jul 2019 12:35:17 -0700
Subject: clk: Overwrite clk_hw::init with NULL during clk_register()

We don't want clk provider drivers to use the init structure after clk
registration time, but we leave a dangling reference to it by means of
clk_hw::init. Let's overwrite the member with NULL during clk_register()
so that this can't be used anymore after registration time.

Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Cc: Doug Anderson <dianders@chromium.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lkml.kernel.org/r/20190731193517.237136-10-sboyd@kernel.org
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 drivers/clk/clk.c            | 24 ++++++++++++++++--------
 include/linux/clk-provider.h |  3 ++-
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index c0990703ce54..efac620264a2 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -3484,9 +3484,9 @@ static int clk_cpy_name(const char **dst_p, const char *src, bool must_exist)
 	return 0;
 }
 
-static int clk_core_populate_parent_map(struct clk_core *core)
+static int clk_core_populate_parent_map(struct clk_core *core,
+					const struct clk_init_data *init)
 {
-	const struct clk_init_data *init = core->hw->init;
 	u8 num_parents = init->num_parents;
 	const char * const *parent_names = init->parent_names;
 	const struct clk_hw **parent_hws = init->parent_hws;
@@ -3566,6 +3566,14 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
 {
 	int ret;
 	struct clk_core *core;
+	const struct clk_init_data *init = hw->init;
+
+	/*
+	 * The init data is not supposed to be used outside of registration path.
+	 * Set it to NULL so that provider drivers can't use it either and so that
+	 * we catch use of hw->init early on in the core.
+	 */
+	hw->init = NULL;
 
 	core = kzalloc(sizeof(*core), GFP_KERNEL);
 	if (!core) {
@@ -3573,17 +3581,17 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
 		goto fail_out;
 	}
 
-	core->name = kstrdup_const(hw->init->name, GFP_KERNEL);
+	core->name = kstrdup_const(init->name, GFP_KERNEL);
 	if (!core->name) {
 		ret = -ENOMEM;
 		goto fail_name;
 	}
 
-	if (WARN_ON(!hw->init->ops)) {
+	if (WARN_ON(!init->ops)) {
 		ret = -EINVAL;
 		goto fail_ops;
 	}
-	core->ops = hw->init->ops;
+	core->ops = init->ops;
 
 	if (dev && pm_runtime_enabled(dev))
 		core->rpm_enabled = true;
@@ -3592,13 +3600,13 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw)
 	if (dev && dev->driver)
 		core->owner = dev->driver->owner;
 	core->hw = hw;
-	core->flags = hw->init->flags;
-	core->num_parents = hw->init->num_parents;
+	core->flags = init->flags;
+	core->num_parents = init->num_parents;
 	core->min_rate = 0;
 	core->max_rate = ULONG_MAX;
 	hw->core = core;
 
-	ret = clk_core_populate_parent_map(core);
+	ret = clk_core_populate_parent_map(core, init);
 	if (ret)
 		goto fail_parents;
 
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 2ae7604783dd..214c75ed62ae 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -299,7 +299,8 @@ struct clk_init_data {
  * into the clk API
  *
  * @init: pointer to struct clk_init_data that contains the init data shared
- * with the common clock framework.
+ * with the common clock framework. This pointer will be set to NULL once
+ * a clk_register() variant is called on this clk_hw pointer.
  */
 struct clk_hw {
 	struct clk_core *core;
-- 
cgit v1.2.3


From f7bc6e42bf12487182fc442a08eca25d968dc543 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 13 Aug 2019 09:25:00 +0200
Subject: drivers: remove the SGI SN2 IOC4 base support

The IOC4 is a multi-function chip seen on SGI SN2 and some SGI MIPS
systems.  This removes the base driver, which while not having an SN2
Kconfig dependency was only for sub-drivers that had one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lkml.kernel.org/r/20190813072514.23299-15-hch@lst.de
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/driver-api/sgi-ioc4.rst    |  49 ---
 arch/ia64/configs/generic_defconfig      |   1 -
 arch/ia64/configs/gensparse_defconfig    |   1 -
 arch/mips/configs/bigsur_defconfig       |   1 -
 arch/mips/configs/ip32_defconfig         |   1 -
 arch/mips/configs/markeins_defconfig     |   1 -
 arch/mips/configs/rm200_defconfig        |   1 -
 arch/mips/configs/sb1250_swarm_defconfig |   1 -
 drivers/misc/Kconfig                     |  12 -
 drivers/misc/Makefile                    |   1 -
 drivers/misc/ioc4.c                      | 498 -------------------------------
 include/linux/ioc4.h                     | 184 ------------
 include/linux/pci_ids.h                  |   1 -
 13 files changed, 752 deletions(-)
 delete mode 100644 Documentation/driver-api/sgi-ioc4.rst
 delete mode 100644 drivers/misc/ioc4.c
 delete mode 100644 include/linux/ioc4.h

(limited to 'include')

diff --git a/Documentation/driver-api/sgi-ioc4.rst b/Documentation/driver-api/sgi-ioc4.rst
deleted file mode 100644
index 72709222d3c0..000000000000
--- a/Documentation/driver-api/sgi-ioc4.rst
+++ /dev/null
@@ -1,49 +0,0 @@
-====================================
-SGI IOC4 PCI (multi function) device
-====================================
-
-The SGI IOC4 PCI device is a bit of a strange beast, so some notes on
-it are in order.
-
-First, even though the IOC4 performs multiple functions, such as an
-IDE controller, a serial controller, a PS/2 keyboard/mouse controller,
-and an external interrupt mechanism, it's not implemented as a
-multifunction device.  The consequence of this from a software
-standpoint is that all these functions share a single IRQ, and
-they can't all register to own the same PCI device ID.  To make
-matters a bit worse, some of the register blocks (and even registers
-themselves) present in IOC4 are mixed-purpose between these several
-functions, meaning that there's no clear "owning" device driver.
-
-The solution is to organize the IOC4 driver into several independent
-drivers, "ioc4", "sgiioc4", and "ioc4_serial".  Note that there is no
-PS/2 controller driver as this functionality has never been wired up
-on a shipping IO card.
-
-ioc4
-====
-This is the core (or shim) driver for IOC4.  It is responsible for
-initializing the basic functionality of the chip, and allocating
-the PCI resources that are shared between the IOC4 functions.
-
-This driver also provides registration functions that the other
-IOC4 drivers can call to make their presence known.  Each driver
-needs to provide a probe and remove function, which are invoked
-by the core driver at appropriate times.  The interface of these
-IOC4 function probe and remove operations isn't precisely the same
-as PCI device probe and remove operations, but is logically the
-same operation.
-
-sgiioc4
-=======
-This is the IDE driver for IOC4.  Its name isn't very descriptive
-simply for historical reasons (it used to be the only IOC4 driver
-component).  There's not much to say about it other than it hooks
-up to the ioc4 driver via the appropriate registration, probe, and
-remove functions.
-
-ioc4_serial
-===========
-This is the serial driver for IOC4.  There's not much to say about it
-other than it hooks up to the ioc4 driver via the appropriate registration,
-probe, and remove functions.
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index 8dd921dce4b5..661d90b3e148 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -44,7 +44,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_SGI_IOC4=y
 CONFIG_SGI_XP=m
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index 5d5ea744f7e6..7844e6a956a4 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -36,7 +36,6 @@ CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
-CONFIG_SGI_IOC4=y
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_IDE_GENERIC=y
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index 66566026409d..f14ad0538f4e 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -103,7 +103,6 @@ CONFIG_FW_LOADER=m
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
-CONFIG_SGI_IOC4=m
 CONFIG_EEPROM_LEGACY=y
 CONFIG_EEPROM_MAX6875=y
 CONFIG_IDE=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 572cab91670c..370884018aad 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -46,7 +46,6 @@ CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
 CONFIG_BLK_DEV_NBD=m
-CONFIG_SGI_IOC4=y
 CONFIG_RAID_ATTRS=y
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig
index ae93a94f8c71..507ad91b21e7 100644
--- a/arch/mips/configs/markeins_defconfig
+++ b/arch/mips/configs/markeins_defconfig
@@ -117,7 +117,6 @@ CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP=y
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_SGI_IOC4=m
 CONFIG_SCSI=m
 # CONFIG_SCSI_PROC_FS is not set
 CONFIG_BLK_DEV_SD=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 0f4b09f8a0ee..8762e75f5d5f 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -198,7 +198,6 @@ CONFIG_BLK_DEV_SX8=m
 CONFIG_BLK_DEV_RAM=m
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
-CONFIG_SGI_IOC4=m
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
diff --git a/arch/mips/configs/sb1250_swarm_defconfig b/arch/mips/configs/sb1250_swarm_defconfig
index 6883ea4477d4..bb0b1b22ebe1 100644
--- a/arch/mips/configs/sb1250_swarm_defconfig
+++ b/arch/mips/configs/sb1250_swarm_defconfig
@@ -49,7 +49,6 @@ CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=9220
 CONFIG_CDROM_PKTCDVD=m
 CONFIG_ATA_OVER_ETH=m
-CONFIG_SGI_IOC4=m
 CONFIG_IDE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_IDETAPE=y
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 299032693934..423d2d26d8f7 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -126,18 +126,6 @@ config INTEL_MID_PTI
 	  an Intel Atom (non-netbook) mobile device containing a MIPI
 	  P1149.7 standard implementation.
 
-config SGI_IOC4
-	tristate "SGI IOC4 Base IO support"
-	depends on PCI
-	---help---
-	  This option enables basic support for the IOC4 chip on certain
-	  SGI IO controller cards (IO9, IO10, and PCI-RT).  This option
-	  does not enable any specific functions on such a card, but provides
-	  necessary infrastructure for other drivers to utilize.
-
-	  If you have an SGI Altix with an IOC4-based card say Y.
-	  Otherwise say N.
-
 config TIFM_CORE
 	tristate "TI Flash Media interface support"
 	depends on PCI
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index abd8ae249746..8dae0a976200 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -21,7 +21,6 @@ obj-$(CONFIG_QCOM_COINCELL)	+= qcom-coincell.o
 obj-$(CONFIG_QCOM_FASTRPC)	+= fastrpc.o
 obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
 obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
-obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
 obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
deleted file mode 100644
index 9d0445a567db..000000000000
--- a/drivers/misc/ioc4.c
+++ /dev/null
@@ -1,498 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2005-2006 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-/* This file contains the master driver module for use by SGI IOC4 subdrivers.
- *
- * It allocates any resources shared between multiple subdevices, and
- * provides accessor functions (where needed) and the like for those
- * resources.  It also provides a mechanism for the subdevice modules
- * to support loading and unloading.
- *
- * Non-shared resources (e.g. external interrupt A_INT_OUT register page
- * alias, serial port and UART registers) are handled by the subdevice
- * modules themselves.
- *
- * This is all necessary because IOC4 is not implemented as a multi-function
- * PCI device, but an amalgamation of disparate registers for several
- * types of device (ATA, serial, external interrupts).  The normal
- * resource management in the kernel doesn't have quite the right interfaces
- * to handle this situation (e.g. multiple modules can't claim the same
- * PCI ID), thus this IOC4 master module.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/ioc4.h>
-#include <linux/ktime.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/time.h>
-#include <asm/io.h>
-
-/***************
- * Definitions *
- ***************/
-
-/* Tweakable values */
-
-/* PCI bus speed detection/calibration */
-#define IOC4_CALIBRATE_COUNT 63		/* Calibration cycle period */
-#define IOC4_CALIBRATE_CYCLES 256	/* Average over this many cycles */
-#define IOC4_CALIBRATE_DISCARD 2	/* Discard first few cycles */
-#define IOC4_CALIBRATE_LOW_MHZ 25	/* Lower bound on bus speed sanity */
-#define IOC4_CALIBRATE_HIGH_MHZ 75	/* Upper bound on bus speed sanity */
-#define IOC4_CALIBRATE_DEFAULT_MHZ 66	/* Assumed if sanity check fails */
-
-/************************
- * Submodule management *
- ************************/
-
-static DEFINE_MUTEX(ioc4_mutex);
-
-static LIST_HEAD(ioc4_devices);
-static LIST_HEAD(ioc4_submodules);
-
-/* Register an IOC4 submodule */
-int
-ioc4_register_submodule(struct ioc4_submodule *is)
-{
-	struct ioc4_driver_data *idd;
-
-	mutex_lock(&ioc4_mutex);
-	list_add(&is->is_list, &ioc4_submodules);
-
-	/* Initialize submodule for each IOC4 */
-	if (!is->is_probe)
-		goto out;
-
-	list_for_each_entry(idd, &ioc4_devices, idd_list) {
-		if (is->is_probe(idd)) {
-			printk(KERN_WARNING
-			       "%s: IOC4 submodule %s probe failed "
-			       "for pci_dev %s",
-			       __func__, module_name(is->is_owner),
-			       pci_name(idd->idd_pdev));
-		}
-	}
- out:
-	mutex_unlock(&ioc4_mutex);
-	return 0;
-}
-
-/* Unregister an IOC4 submodule */
-void
-ioc4_unregister_submodule(struct ioc4_submodule *is)
-{
-	struct ioc4_driver_data *idd;
-
-	mutex_lock(&ioc4_mutex);
-	list_del(&is->is_list);
-
-	/* Remove submodule for each IOC4 */
-	if (!is->is_remove)
-		goto out;
-
-	list_for_each_entry(idd, &ioc4_devices, idd_list) {
-		if (is->is_remove(idd)) {
-			printk(KERN_WARNING
-			       "%s: IOC4 submodule %s remove failed "
-			       "for pci_dev %s.\n",
-			       __func__, module_name(is->is_owner),
-			       pci_name(idd->idd_pdev));
-		}
-	}
- out:
-	mutex_unlock(&ioc4_mutex);
-}
-
-/*********************
- * Device management *
- *********************/
-
-#define IOC4_CALIBRATE_LOW_LIMIT \
-	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_LOW_MHZ)
-#define IOC4_CALIBRATE_HIGH_LIMIT \
-	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_HIGH_MHZ)
-#define IOC4_CALIBRATE_DEFAULT \
-	(1000*IOC4_EXTINT_COUNT_DIVISOR/IOC4_CALIBRATE_DEFAULT_MHZ)
-
-#define IOC4_CALIBRATE_END \
-	(IOC4_CALIBRATE_CYCLES + IOC4_CALIBRATE_DISCARD)
-
-#define IOC4_INT_OUT_MODE_TOGGLE 0x7	/* Toggle INT_OUT every COUNT+1 ticks */
-
-/* Determines external interrupt output clock period of the PCI bus an
- * IOC4 is attached to.  This value can be used to determine the PCI
- * bus speed.
- *
- * IOC4 has a design feature that various internal timers are derived from
- * the PCI bus clock.  This causes IOC4 device drivers to need to take the
- * bus speed into account when setting various register values (e.g. INT_OUT
- * register COUNT field, UART divisors, etc).  Since this information is
- * needed by several subdrivers, it is determined by the main IOC4 driver,
- * even though the following code utilizes external interrupt registers
- * to perform the speed calculation.
- */
-static void
-ioc4_clock_calibrate(struct ioc4_driver_data *idd)
-{
-	union ioc4_int_out int_out;
-	union ioc4_gpcr gpcr;
-	unsigned int state, last_state;
-	uint64_t start, end, period;
-	unsigned int count;
-
-	/* Enable output */
-	gpcr.raw = 0;
-	gpcr.fields.dir = IOC4_GPCR_DIR_0;
-	gpcr.fields.int_out_en = 1;
-	writel(gpcr.raw, &idd->idd_misc_regs->gpcr_s.raw);
-
-	/* Reset to power-on state */
-	writel(0, &idd->idd_misc_regs->int_out.raw);
-
-	/* Set up square wave */
-	int_out.raw = 0;
-	int_out.fields.count = IOC4_CALIBRATE_COUNT;
-	int_out.fields.mode = IOC4_INT_OUT_MODE_TOGGLE;
-	int_out.fields.diag = 0;
-	writel(int_out.raw, &idd->idd_misc_regs->int_out.raw);
-
-	/* Check square wave period averaged over some number of cycles */
-	start = ktime_get_ns();
-	state = 1; /* make sure the first read isn't a rising edge */
-	for (count = 0; count <= IOC4_CALIBRATE_END; count++) {
-		do { /* wait for a rising edge */
-			last_state = state;
-			int_out.raw = readl(&idd->idd_misc_regs->int_out.raw);
-			state = int_out.fields.int_out;
-		} while (last_state || !state);
-
-		/* discard the first few cycles */
-		if (count == IOC4_CALIBRATE_DISCARD)
-			start = ktime_get_ns();
-	}
-	end = ktime_get_ns();
-
-	/* Calculation rearranged to preserve intermediate precision.
-	 * Logically:
-	 * 1. "end - start" gives us the measurement period over all
-	 *    the square wave cycles.
-	 * 2. Divide by number of square wave cycles to get the period
-	 *    of a square wave cycle.
-	 * 3. Divide by 2*(int_out.fields.count+1), which is the formula
-	 *    by which the IOC4 generates the square wave, to get the
-	 *    period of an IOC4 INT_OUT count.
-	 */
-	period = (end - start) /
-		(IOC4_CALIBRATE_CYCLES * 2 * (IOC4_CALIBRATE_COUNT + 1));
-
-	/* Bounds check the result. */
-	if (period > IOC4_CALIBRATE_LOW_LIMIT ||
-	    period < IOC4_CALIBRATE_HIGH_LIMIT) {
-		printk(KERN_INFO
-		       "IOC4 %s: Clock calibration failed.  Assuming"
-		       "PCI clock is %d ns.\n",
-		       pci_name(idd->idd_pdev),
-		       IOC4_CALIBRATE_DEFAULT / IOC4_EXTINT_COUNT_DIVISOR);
-		period = IOC4_CALIBRATE_DEFAULT;
-	} else {
-		u64 ns = period;
-
-		do_div(ns, IOC4_EXTINT_COUNT_DIVISOR);
-		printk(KERN_DEBUG
-		       "IOC4 %s: PCI clock is %llu ns.\n",
-		       pci_name(idd->idd_pdev), (unsigned long long)ns);
-	}
-
-	/* Remember results.  We store the extint clock period rather
-	 * than the PCI clock period so that greater precision is
-	 * retained.  Divide by IOC4_EXTINT_COUNT_DIVISOR to get
-	 * PCI clock period.
-	 */
-	idd->count_period = period;
-}
-
-/* There are three variants of IOC4 cards: IO9, IO10, and PCI-RT.
- * Each brings out different combinations of IOC4 signals, thus.
- * the IOC4 subdrivers need to know to which we're attached.
- *
- * We look for the presence of a SCSI (IO9) or SATA (IO10) controller
- * on the same PCI bus at slot number 3 to differentiate IO9 from IO10.
- * If neither is present, it's a PCI-RT.
- */
-static unsigned int
-ioc4_variant(struct ioc4_driver_data *idd)
-{
-	struct pci_dev *pdev = NULL;
-	int found = 0;
-
-	/* IO9: Look for a QLogic ISP 12160 at the same bus and slot 3. */
-	do {
-		pdev = pci_get_device(PCI_VENDOR_ID_QLOGIC,
-				      PCI_DEVICE_ID_QLOGIC_ISP12160, pdev);
-		if (pdev &&
-		    idd->idd_pdev->bus->number == pdev->bus->number &&
-		    3 == PCI_SLOT(pdev->devfn))
-			found = 1;
-	} while (pdev && !found);
-	if (NULL != pdev) {
-		pci_dev_put(pdev);
-		return IOC4_VARIANT_IO9;
-	}
-
-	/* IO10: Look for a Vitesse VSC 7174 at the same bus and slot 3. */
-	pdev = NULL;
-	do {
-		pdev = pci_get_device(PCI_VENDOR_ID_VITESSE,
-				      PCI_DEVICE_ID_VITESSE_VSC7174, pdev);
-		if (pdev &&
-		    idd->idd_pdev->bus->number == pdev->bus->number &&
-		    3 == PCI_SLOT(pdev->devfn))
-			found = 1;
-	} while (pdev && !found);
-	if (NULL != pdev) {
-		pci_dev_put(pdev);
-		return IOC4_VARIANT_IO10;
-	}
-
-	/* PCI-RT: No SCSI/SATA controller will be present */
-	return IOC4_VARIANT_PCI_RT;
-}
-
-static void
-ioc4_load_modules(struct work_struct *work)
-{
-	request_module("sgiioc4");
-}
-
-static DECLARE_WORK(ioc4_load_modules_work, ioc4_load_modules);
-
-/* Adds a new instance of an IOC4 card */
-static int
-ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
-{
-	struct ioc4_driver_data *idd;
-	struct ioc4_submodule *is;
-	uint32_t pcmd;
-	int ret;
-
-	/* Enable IOC4 and take ownership of it */
-	if ((ret = pci_enable_device(pdev))) {
-		printk(KERN_WARNING
-		       "%s: Failed to enable IOC4 device for pci_dev %s.\n",
-		       __func__, pci_name(pdev));
-		goto out;
-	}
-	pci_set_master(pdev);
-
-	/* Set up per-IOC4 data */
-	idd = kmalloc(sizeof(struct ioc4_driver_data), GFP_KERNEL);
-	if (!idd) {
-		printk(KERN_WARNING
-		       "%s: Failed to allocate IOC4 data for pci_dev %s.\n",
-		       __func__, pci_name(pdev));
-		ret = -ENODEV;
-		goto out_idd;
-	}
-	idd->idd_pdev = pdev;
-	idd->idd_pci_id = pci_id;
-
-	/* Map IOC4 misc registers.  These are shared between subdevices
-	 * so the main IOC4 module manages them.
-	 */
-	idd->idd_bar0 = pci_resource_start(idd->idd_pdev, 0);
-	if (!idd->idd_bar0) {
-		printk(KERN_WARNING
-		       "%s: Unable to find IOC4 misc resource "
-		       "for pci_dev %s.\n",
-		       __func__, pci_name(idd->idd_pdev));
-		ret = -ENODEV;
-		goto out_pci;
-	}
-	if (!request_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs),
-			    "ioc4_misc")) {
-		printk(KERN_WARNING
-		       "%s: Unable to request IOC4 misc region "
-		       "for pci_dev %s.\n",
-		       __func__, pci_name(idd->idd_pdev));
-		ret = -ENODEV;
-		goto out_pci;
-	}
-	idd->idd_misc_regs = ioremap(idd->idd_bar0,
-				     sizeof(struct ioc4_misc_regs));
-	if (!idd->idd_misc_regs) {
-		printk(KERN_WARNING
-		       "%s: Unable to remap IOC4 misc region "
-		       "for pci_dev %s.\n",
-		       __func__, pci_name(idd->idd_pdev));
-		ret = -ENODEV;
-		goto out_misc_region;
-	}
-
-	/* Failsafe portion of per-IOC4 initialization */
-
-	/* Detect card variant */
-	idd->idd_variant = ioc4_variant(idd);
-	printk(KERN_INFO "IOC4 %s: %s card detected.\n", pci_name(pdev),
-	       idd->idd_variant == IOC4_VARIANT_IO9 ? "IO9" :
-	       idd->idd_variant == IOC4_VARIANT_PCI_RT ? "PCI-RT" :
-	       idd->idd_variant == IOC4_VARIANT_IO10 ? "IO10" : "unknown");
-
-	/* Initialize IOC4 */
-	pci_read_config_dword(idd->idd_pdev, PCI_COMMAND, &pcmd);
-	pci_write_config_dword(idd->idd_pdev, PCI_COMMAND,
-			       pcmd | PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
-
-	/* Determine PCI clock */
-	ioc4_clock_calibrate(idd);
-
-	/* Disable/clear all interrupts.  Need to do this here lest
-	 * one submodule request the shared IOC4 IRQ, but interrupt
-	 * is generated by a different subdevice.
-	 */
-	/* Disable */
-	writel(~0, &idd->idd_misc_regs->other_iec.raw);
-	writel(~0, &idd->idd_misc_regs->sio_iec);
-	/* Clear (i.e. acknowledge) */
-	writel(~0, &idd->idd_misc_regs->other_ir.raw);
-	writel(~0, &idd->idd_misc_regs->sio_ir);
-
-	/* Track PCI-device specific data */
-	idd->idd_serial_data = NULL;
-	pci_set_drvdata(idd->idd_pdev, idd);
-
-	mutex_lock(&ioc4_mutex);
-	list_add_tail(&idd->idd_list, &ioc4_devices);
-
-	/* Add this IOC4 to all submodules */
-	list_for_each_entry(is, &ioc4_submodules, is_list) {
-		if (is->is_probe && is->is_probe(idd)) {
-			printk(KERN_WARNING
-			       "%s: IOC4 submodule 0x%s probe failed "
-			       "for pci_dev %s.\n",
-			       __func__, module_name(is->is_owner),
-			       pci_name(idd->idd_pdev));
-		}
-	}
-	mutex_unlock(&ioc4_mutex);
-
-	/* Request sgiioc4 IDE driver on boards that bring that functionality
-	 * off of IOC4.  The root filesystem may be hosted on a drive connected
-	 * to IOC4, so we need to make sure the sgiioc4 driver is loaded as it
-	 * won't be picked up by modprobes due to the ioc4 module owning the
-	 * PCI device.
-	 */
-	if (idd->idd_variant != IOC4_VARIANT_PCI_RT) {
-		/* Request the module from a work procedure as the modprobe
-		 * goes out to a userland helper and that will hang if done
-		 * directly from ioc4_probe().
-		 */
-		printk(KERN_INFO "IOC4 loading sgiioc4 submodule\n");
-		schedule_work(&ioc4_load_modules_work);
-	}
-
-	return 0;
-
-out_misc_region:
-	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
-out_pci:
-	kfree(idd);
-out_idd:
-	pci_disable_device(pdev);
-out:
-	return ret;
-}
-
-/* Removes a particular instance of an IOC4 card. */
-static void
-ioc4_remove(struct pci_dev *pdev)
-{
-	struct ioc4_submodule *is;
-	struct ioc4_driver_data *idd;
-
-	idd = pci_get_drvdata(pdev);
-
-	/* Remove this IOC4 from all submodules */
-	mutex_lock(&ioc4_mutex);
-	list_for_each_entry(is, &ioc4_submodules, is_list) {
-		if (is->is_remove && is->is_remove(idd)) {
-			printk(KERN_WARNING
-			       "%s: IOC4 submodule 0x%s remove failed "
-			       "for pci_dev %s.\n",
-			       __func__, module_name(is->is_owner),
-			       pci_name(idd->idd_pdev));
-		}
-	}
-	mutex_unlock(&ioc4_mutex);
-
-	/* Release resources */
-	iounmap(idd->idd_misc_regs);
-	if (!idd->idd_bar0) {
-		printk(KERN_WARNING
-		       "%s: Unable to get IOC4 misc mapping for pci_dev %s. "
-		       "Device removal may be incomplete.\n",
-		       __func__, pci_name(idd->idd_pdev));
-	}
-	release_mem_region(idd->idd_bar0, sizeof(struct ioc4_misc_regs));
-
-	/* Disable IOC4 and relinquish */
-	pci_disable_device(pdev);
-
-	/* Remove and free driver data */
-	mutex_lock(&ioc4_mutex);
-	list_del(&idd->idd_list);
-	mutex_unlock(&ioc4_mutex);
-	kfree(idd);
-}
-
-static const struct pci_device_id ioc4_id_table[] = {
-	{PCI_VENDOR_ID_SGI, PCI_DEVICE_ID_SGI_IOC4, PCI_ANY_ID,
-	 PCI_ANY_ID, 0x0b4000, 0xFFFFFF},
-	{0}
-};
-
-static struct pci_driver ioc4_driver = {
-	.name = "IOC4",
-	.id_table = ioc4_id_table,
-	.probe = ioc4_probe,
-	.remove = ioc4_remove,
-};
-
-MODULE_DEVICE_TABLE(pci, ioc4_id_table);
-
-/*********************
- * Module management *
- *********************/
-
-/* Module load */
-static int __init
-ioc4_init(void)
-{
-	return pci_register_driver(&ioc4_driver);
-}
-
-/* Module unload */
-static void __exit
-ioc4_exit(void)
-{
-	/* Ensure ioc4_load_modules() has completed before exiting */
-	flush_work(&ioc4_load_modules_work);
-	pci_unregister_driver(&ioc4_driver);
-}
-
-module_init(ioc4_init);
-module_exit(ioc4_exit);
-
-MODULE_AUTHOR("Brent Casavant - Silicon Graphics, Inc. <bcasavan@sgi.com>");
-MODULE_DESCRIPTION("PCI driver master module for SGI IOC4 Base-IO Card");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(ioc4_register_submodule);
-EXPORT_SYMBOL(ioc4_unregister_submodule);
diff --git a/include/linux/ioc4.h b/include/linux/ioc4.h
deleted file mode 100644
index 51e2b9fb6372..000000000000
--- a/include/linux/ioc4.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
- */
-
-#ifndef _LINUX_IOC4_H
-#define _LINUX_IOC4_H
-
-#include <linux/interrupt.h>
-
-/***************
- * Definitions *
- ***************/
-
-/* Miscellaneous values inherent to hardware */
-
-#define IOC4_EXTINT_COUNT_DIVISOR 520	/* PCI clocks per COUNT tick */
-
-/***********************************
- * Structures needed by subdrivers *
- ***********************************/
-
-/* This structure fully describes the IOC4 miscellaneous registers which
- * appear at bar[0]+0x00000 through bar[0]+0x0005c.  The corresponding
- * PCI resource is managed by the main IOC4 driver because it contains
- * registers of interest to many different IOC4 subdrivers.
- */
-struct ioc4_misc_regs {
-	/* Miscellaneous IOC4 registers */
-	union ioc4_pci_err_addr_l {
-		uint32_t raw;
-		struct {
-			uint32_t valid:1;	/* Address captured */
-			uint32_t master_id:4;	/* Unit causing error
-						 * 0/1: Serial port 0 TX/RX
-						 * 2/3: Serial port 1 TX/RX
-						 * 4/5: Serial port 2 TX/RX
-						 * 6/7: Serial port 3 TX/RX
-						 * 8: ATA/ATAPI
-						 * 9-15: Undefined
-						 */
-			uint32_t mul_err:1;	/* Multiple errors occurred */
-			uint32_t addr:26;	/* Bits 31-6 of error addr */
-		} fields;
-	} pci_err_addr_l;
-	uint32_t pci_err_addr_h;	/* Bits 63-32 of error addr */
-	union ioc4_sio_int {
-		uint32_t raw;
-		struct {
-			uint8_t tx_mt:1;	/* TX ring buffer empty */
-			uint8_t rx_full:1;	/* RX ring buffer full */
-			uint8_t rx_high:1;	/* RX high-water exceeded */
-			uint8_t rx_timer:1;	/* RX timer has triggered */
-			uint8_t delta_dcd:1;	/* DELTA_DCD seen */
-			uint8_t delta_cts:1;	/* DELTA_CTS seen */
-			uint8_t intr_pass:1;	/* Interrupt pass-through */
-			uint8_t tx_explicit:1;	/* TX, MCW, or delay complete */
-		} fields[4];
-	} sio_ir;		/* Serial interrupt state */
-	union ioc4_other_int {
-		uint32_t raw;
-		struct {
-			uint32_t ata_int:1;	/* ATA port passthru */
-			uint32_t ata_memerr:1;	/* ATA halted by mem error */
-			uint32_t memerr:4;	/* Serial halted by mem err */
-			uint32_t kbd_int:1;	/* kbd/mouse intr asserted */
-			uint32_t reserved:16;	/* zero */
-			uint32_t rt_int:1;	/* INT_OUT section latch */
-			uint32_t gen_int:8;	/* Intr. from generic pins */
-		} fields;
-	} other_ir;		/* Other interrupt state */
-	union ioc4_sio_int sio_ies;	/* Serial interrupt enable set */
-	union ioc4_other_int other_ies;	/* Other interrupt enable set */
-	union ioc4_sio_int sio_iec;	/* Serial interrupt enable clear */
-	union ioc4_other_int other_iec;	/* Other interrupt enable clear */
-	union ioc4_sio_cr {
-		uint32_t raw;
-		struct {
-			uint32_t cmd_pulse:4;	/* Bytebus strobe width */
-			uint32_t arb_diag:3;	/* PCI bus requester */
-			uint32_t sio_diag_idle:1;	/* Active ser req? */
-			uint32_t ata_diag_idle:1;	/* Active ATA req? */
-			uint32_t ata_diag_active:1;	/* ATA req is winner */
-			uint32_t reserved:22;	/* zero */
-		} fields;
-	} sio_cr;
-	uint32_t unused1;
-	union ioc4_int_out {
-		uint32_t raw;
-		struct {
-			uint32_t count:16;	/* Period control */
-			uint32_t mode:3;	/* Output signal shape */
-			uint32_t reserved:11;	/* zero */
-			uint32_t diag:1;	/* Timebase control */
-			uint32_t int_out:1;	/* Current value */
-		} fields;
-	} int_out;		/* External interrupt output control */
-	uint32_t unused2;
-	union ioc4_gpcr {
-		uint32_t raw;
-		struct {
-			uint32_t dir:8;	/* Pin direction */
-			uint32_t edge:8;	/* Edge/level mode */
-			uint32_t reserved1:4;	/* zero */
-			uint32_t int_out_en:1;	/* INT_OUT enable */
-			uint32_t reserved2:11;	/* zero */
-		} fields;
-	} gpcr_s;		/* Generic PIO control set */
-	union ioc4_gpcr gpcr_c;	/* Generic PIO control clear */
-	union ioc4_gpdr {
-		uint32_t raw;
-		struct {
-			uint32_t gen_pin:8;	/* State of pins */
-			uint32_t reserved:24;
-		} fields;
-	} gpdr;			/* Generic PIO data */
-	uint32_t unused3;
-	union ioc4_gppr {
-		uint32_t raw;
-		struct {
-			uint32_t gen_pin:1;	/* Single pin state */
-			uint32_t reserved:31;
-		} fields;
-	} gppr[8];		/* Generic PIO pins */
-};
-
-/* Masks for GPCR DIR pins */
-#define IOC4_GPCR_DIR_0 0x01	/* External interrupt output */
-#define IOC4_GPCR_DIR_1 0x02	/* External interrupt input */
-#define IOC4_GPCR_DIR_2 0x04
-#define IOC4_GPCR_DIR_3 0x08	/* Keyboard/mouse presence */
-#define IOC4_GPCR_DIR_4 0x10	/* Ser. port 0 xcvr select (0=232, 1=422) */
-#define IOC4_GPCR_DIR_5 0x20	/* Ser. port 1 xcvr select (0=232, 1=422) */
-#define IOC4_GPCR_DIR_6 0x40	/* Ser. port 2 xcvr select (0=232, 1=422) */
-#define IOC4_GPCR_DIR_7 0x80	/* Ser. port 3 xcvr select (0=232, 1=422) */
-
-/* Masks for GPCR EDGE pins */
-#define IOC4_GPCR_EDGE_0 0x01
-#define IOC4_GPCR_EDGE_1 0x02	/* External interrupt input */
-#define IOC4_GPCR_EDGE_2 0x04
-#define IOC4_GPCR_EDGE_3 0x08
-#define IOC4_GPCR_EDGE_4 0x10
-#define IOC4_GPCR_EDGE_5 0x20
-#define IOC4_GPCR_EDGE_6 0x40
-#define IOC4_GPCR_EDGE_7 0x80
-
-#define IOC4_VARIANT_IO9	0x0900
-#define IOC4_VARIANT_PCI_RT	0x0901
-#define IOC4_VARIANT_IO10	0x1000
-
-/* One of these per IOC4 */
-struct ioc4_driver_data {
-	struct list_head idd_list;
-	unsigned long idd_bar0;
-	struct pci_dev *idd_pdev;
-	const struct pci_device_id *idd_pci_id;
-	struct ioc4_misc_regs __iomem *idd_misc_regs;
-	unsigned long count_period;
-	void *idd_serial_data;
-	unsigned int idd_variant;
-};
-
-/* One per submodule */
-struct ioc4_submodule {
-	struct list_head is_list;
-	char *is_name;
-	struct module *is_owner;
-	int (*is_probe) (struct ioc4_driver_data *);
-	int (*is_remove) (struct ioc4_driver_data *);
-};
-
-#define IOC4_NUM_CARDS		8	/* max cards per partition */
-
-/**********************************
- * Functions needed by submodules *
- **********************************/
-
-extern int ioc4_register_submodule(struct ioc4_submodule *);
-extern void ioc4_unregister_submodule(struct ioc4_submodule *);
-
-#endif				/* _LINUX_IOC4_H */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..85e2ca611d42 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1070,7 +1070,6 @@
 #define PCI_VENDOR_ID_SGI		0x10a9
 #define PCI_DEVICE_ID_SGI_IOC3		0x0003
 #define PCI_DEVICE_ID_SGI_LITHIUM	0x1002
-#define PCI_DEVICE_ID_SGI_IOC4		0x100a
 
 #define PCI_VENDOR_ID_WINBOND		0x10ad
 #define PCI_DEVICE_ID_WINBOND_82C105	0x0105
-- 
cgit v1.2.3


From 58a96fc35375ab87db7c5b69336f5befde1b548f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Tue, 16 Jul 2019 20:34:41 +0200
Subject: Bluetooth: Add debug setting for changing minimum encryption key size

For testing and qualification purposes it is useful to allow changing
the minimum encryption key size value that the host stack is going to
enforce. This adds a new debugfs setting min_encrypt_key_size to achieve
this functionality.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         |  1 +
 net/bluetooth/hci_debugfs.c      | 31 +++++++++++++++++++++++++++++++
 net/bluetooth/l2cap_core.c       |  2 +-
 4 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ded574b32c20..ffc95b382eb5 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -278,6 +278,7 @@ struct hci_dev {
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
 	__u16		auth_payload_timeout;
+	__u8		min_enc_key_size;
 	__u8		ssp_debug_mode;
 	__u8		hw_error_code;
 	__u32		clock;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b9585e7d9d2e..04bc79359a17 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3202,6 +3202,7 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE;
 	hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE;
 	hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT;
+	hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE;
 
 	mutex_init(&hdev->lock);
 	mutex_init(&hdev->req_lock);
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index bb67f4a5479a..402e2cc54044 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -433,6 +433,35 @@ static int auto_accept_delay_set(void *data, u64 val)
 	return 0;
 }
 
+static int min_encrypt_key_size_set(void *data, u64 val)
+{
+	struct hci_dev *hdev = data;
+
+	if (val < 1 || val > 16)
+		return -EINVAL;
+
+	hci_dev_lock(hdev);
+	hdev->min_enc_key_size = val;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+static int min_encrypt_key_size_get(void *data, u64 *val)
+{
+	struct hci_dev *hdev = data;
+
+	hci_dev_lock(hdev);
+	*val = hdev->min_enc_key_size;
+	hci_dev_unlock(hdev);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(min_encrypt_key_size_fops,
+			min_encrypt_key_size_get,
+			min_encrypt_key_size_set, "%llu\n");
+
 static int auto_accept_delay_get(void *data, u64 *val)
 {
 	struct hci_dev *hdev = data;
@@ -545,6 +574,8 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 	if (lmp_ssp_capable(hdev)) {
 		debugfs_create_file("ssp_debug_mode", 0444, hdev->debugfs,
 				    hdev, &ssp_debug_mode_fops);
+		debugfs_create_file("min_encrypt_key_size", 0644, hdev->debugfs,
+				    hdev, &min_encrypt_key_size_fops);
 		debugfs_create_file("auto_accept_delay", 0644, hdev->debugfs,
 				    hdev, &auto_accept_delay_fops);
 	}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index cc506fe99b4d..dfc1edb168b7 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -1361,7 +1361,7 @@ static bool l2cap_check_enc_key_size(struct hci_conn *hcon)
 	 * actually encrypted before enforcing a key size.
 	 */
 	return (!test_bit(HCI_CONN_ENCRYPT, &hcon->flags) ||
-		hcon->enc_key_size >= HCI_MIN_ENC_KEY_SIZE);
+		hcon->enc_key_size >= hcon->hdev->min_enc_key_size);
 }
 
 static void l2cap_do_start(struct l2cap_chan *chan)
-- 
cgit v1.2.3


From 4b9cb2a5ceedd7f715d92570b773fad024ca9950 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 17 Aug 2019 12:30:39 +0200
Subject: net: phy: remove genphy_config_init

Now that all users have been removed we can remove genphy_config_init.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 51 --------------------------------------------
 include/linux/phy.h          |  1 -
 2 files changed, 52 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9c546bae9ec9..d5db7604d7c4 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1885,57 +1885,6 @@ int genphy_soft_reset(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_soft_reset);
 
-int genphy_config_init(struct phy_device *phydev)
-{
-	int val;
-	__ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, };
-
-	linkmode_set_bit_array(phy_basic_ports_array,
-			       ARRAY_SIZE(phy_basic_ports_array),
-			       features);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features);
-	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features);
-
-	/* Do we support autonegotiation? */
-	val = phy_read(phydev, MII_BMSR);
-	if (val < 0)
-		return val;
-
-	if (val & BMSR_ANEGCAPABLE)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features);
-
-	if (val & BMSR_100FULL)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features);
-	if (val & BMSR_100HALF)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features);
-	if (val & BMSR_10FULL)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features);
-	if (val & BMSR_10HALF)
-		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features);
-
-	if (val & BMSR_ESTATEN) {
-		val = phy_read(phydev, MII_ESTATUS);
-		if (val < 0)
-			return val;
-
-		if (val & ESTATUS_1000_TFULL)
-			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
-					 features);
-		if (val & ESTATUS_1000_THALF)
-			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
-					 features);
-		if (val & ESTATUS_1000_XFULL)
-			linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT,
-					 features);
-	}
-
-	linkmode_and(phydev->supported, phydev->supported, features);
-	linkmode_and(phydev->advertising, phydev->advertising, features);
-
-	return 0;
-}
-EXPORT_SYMBOL(genphy_config_init);
-
 /**
  * genphy_read_abilities - read PHY abilities from Clause 22 registers
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5ac7d21375ac..d26779f1fb6b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1069,7 +1069,6 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 void phy_attached_info(struct phy_device *phydev);
 
 /* Clause 22 PHY */
-int genphy_config_init(struct phy_device *phydev);
 int genphy_read_abilities(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From edd3d0074c256848bbf5805350b39d40b1e2bf2b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:12 +0300
Subject: drop_monitor: Add basic infrastructure for hardware drops

Export a function that can be invoked in order to report packets that
were dropped by the underlying hardware along with metadata.

Subsequent patches will add support for the different alert modes.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                |  1 +
 include/net/drop_monitor.h | 33 +++++++++++++++++++++++++++++++++
 net/core/drop_monitor.c    | 28 ++++++++++++++++++++++++++++
 3 files changed, 62 insertions(+)
 create mode 100644 include/net/drop_monitor.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index fd9ab61c2670..96d3e60697f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11156,6 +11156,7 @@ S:	Maintained
 W:	https://fedorahosted.org/dropwatch/
 F:	net/core/drop_monitor.c
 F:	include/uapi/linux/net_dropmon.h
+F:	include/net/drop_monitor.h
 
 NETWORKING DRIVERS
 M:	"David S. Miller" <davem@davemloft.net>
diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h
new file mode 100644
index 000000000000..2ab668461463
--- /dev/null
+++ b/include/net/drop_monitor.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_DROP_MONITOR_H_
+#define _NET_DROP_MONITOR_H_
+
+#include <linux/ktime.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+/**
+ * struct net_dm_hw_metadata - Hardware-supplied packet metadata.
+ * @trap_group_name: Hardware trap group name.
+ * @trap_name: Hardware trap name.
+ * @input_dev: Input netdevice.
+ */
+struct net_dm_hw_metadata {
+	const char *trap_group_name;
+	const char *trap_name;
+	struct net_device *input_dev;
+};
+
+#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
+void net_dm_hw_report(struct sk_buff *skb,
+		      const struct net_dm_hw_metadata *hw_metadata);
+#else
+static inline void
+net_dm_hw_report(struct sk_buff *skb,
+		 const struct net_dm_hw_metadata *hw_metadata)
+{
+}
+#endif
+
+#endif /* _NET_DROP_MONITOR_H_ */
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index aa9147a18329..6020f34728af 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -26,6 +26,7 @@
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <net/drop_monitor.h>
 #include <net/genetlink.h>
 #include <net/netevent.h>
 
@@ -43,6 +44,7 @@
  * netlink alerts
  */
 static int trace_state = TRACE_OFF;
+static bool monitor_hw;
 
 /* net_dm_mutex
  *
@@ -93,6 +95,8 @@ struct net_dm_alert_ops {
 	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
 				int work, int budget);
 	void (*work_item_func)(struct work_struct *work);
+	void (*hw_probe)(struct sk_buff *skb,
+			 const struct net_dm_hw_metadata *hw_metadata);
 };
 
 struct net_dm_skb_cb {
@@ -267,10 +271,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static void
+net_dm_hw_summary_probe(struct sk_buff *skb,
+			const struct net_dm_hw_metadata *hw_metadata)
+{
+}
+
 static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
 	.kfree_skb_probe	= trace_kfree_skb_hit,
 	.napi_poll_probe	= trace_napi_poll_hit,
 	.work_item_func		= send_dm_alert,
+	.hw_probe		= net_dm_hw_summary_probe,
 };
 
 static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
@@ -482,10 +493,17 @@ static void net_dm_packet_work(struct work_struct *work)
 		net_dm_packet_report(skb);
 }
 
+static void
+net_dm_hw_packet_probe(struct sk_buff *skb,
+		       const struct net_dm_hw_metadata *hw_metadata)
+{
+}
+
 static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
 	.kfree_skb_probe	= net_dm_packet_trace_kfree_skb_hit,
 	.napi_poll_probe	= net_dm_packet_trace_napi_poll_hit,
 	.work_item_func		= net_dm_packet_work,
+	.hw_probe		= net_dm_hw_packet_probe,
 };
 
 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
@@ -493,6 +511,16 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
 	[NET_DM_ALERT_MODE_PACKET]	= &net_dm_alert_packet_ops,
 };
 
+void net_dm_hw_report(struct sk_buff *skb,
+		      const struct net_dm_hw_metadata *hw_metadata)
+{
+	if (!monitor_hw)
+		return;
+
+	net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+}
+EXPORT_SYMBOL_GPL(net_dm_hw_report);
+
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 {
 	const struct net_dm_alert_ops *ops;
-- 
cgit v1.2.3


From 5e58109b1ea454b93e455e0e8fc0bc4c226b8c0a Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:14 +0300
Subject: drop_monitor: Add support for packet alert mode for hardware drops

In a similar fashion to software drops, extend drop monitor to send
netlink events when packets are dropped by the underlying hardware.

The main difference is that instead of encoding the program counter (PC)
from which kfree_skb() was called in the netlink message, we encode the
hardware trap name. The two are mostly equivalent since they should both
help the user understand why the packet was dropped.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  10 ++
 net/core/drop_monitor.c          | 304 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 310 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 405b31cbf723..9f8fb1bb4aa4 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -83,6 +83,10 @@ enum net_dm_attr {
 	NET_DM_ATTR_ORIG_LEN,			/* u32 */
 	NET_DM_ATTR_QUEUE_LEN,			/* u32 */
 	NET_DM_ATTR_STATS,			/* nested */
+	NET_DM_ATTR_HW_STATS,			/* nested */
+	NET_DM_ATTR_ORIGIN,			/* u16 */
+	NET_DM_ATTR_HW_TRAP_GROUP_NAME,		/* string */
+	NET_DM_ATTR_HW_TRAP_NAME,		/* string */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
@@ -101,6 +105,7 @@ enum net_dm_alert_mode {
 
 enum {
 	NET_DM_ATTR_PORT_NETDEV_IFINDEX,	/* u32 */
+	NET_DM_ATTR_PORT_NETDEV_NAME,		/* string */
 
 	__NET_DM_ATTR_PORT_MAX,
 	NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
@@ -113,4 +118,9 @@ enum {
 	NET_DM_ATTR_STATS_MAX = __NET_DM_ATTR_STATS_MAX - 1
 };
 
+enum net_dm_origin {
+	NET_DM_ORIGIN_SW,
+	NET_DM_ORIGIN_HW,
+};
+
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index a2c7f9162c9d..5a950b5af8fd 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -95,12 +95,16 @@ struct net_dm_alert_ops {
 	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
 				int work, int budget);
 	void (*work_item_func)(struct work_struct *work);
+	void (*hw_work_item_func)(struct work_struct *work);
 	void (*hw_probe)(struct sk_buff *skb,
 			 const struct net_dm_hw_metadata *hw_metadata);
 };
 
 struct net_dm_skb_cb {
-	void *pc;
+	union {
+		struct net_dm_hw_metadata *hw_metadata;
+		void *pc;
+	};
 };
 
 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
@@ -335,7 +339,9 @@ static size_t net_dm_in_port_size(void)
 	       /* NET_DM_ATTR_IN_PORT nest */
 	return nla_total_size(0) +
 	       /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
-	       nla_total_size(sizeof(u32));
+	       nla_total_size(sizeof(u32)) +
+	       /* NET_DM_ATTR_PORT_NETDEV_NAME */
+	       nla_total_size(IFNAMSIZ + 1);
 }
 
 #define NET_DM_MAX_SYMBOL_LEN 40
@@ -347,6 +353,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
 
 	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_ORIGIN */
+	       nla_total_size(sizeof(u16)) +
 	       /* NET_DM_ATTR_PC */
 	       nla_total_size(sizeof(u64)) +
 	       /* NET_DM_ATTR_SYMBOL */
@@ -363,7 +371,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	       nla_total_size(payload_len);
 }
 
-static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex,
+					    const char *name)
 {
 	struct nlattr *attr;
 
@@ -375,6 +384,9 @@ static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
 	    nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
 		goto nla_put_failure;
 
+	if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name))
+		goto nla_put_failure;
+
 	nla_nest_end(msg, attr);
 
 	return 0;
@@ -399,6 +411,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	if (!hdr)
 		return -EMSGSIZE;
 
+	if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW))
+		goto nla_put_failure;
+
 	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
 		goto nla_put_failure;
 
@@ -406,7 +421,7 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
 		goto nla_put_failure;
 
-	rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+	rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL);
 	if (rc)
 		goto nla_put_failure;
 
@@ -493,16 +508,249 @@ static void net_dm_packet_work(struct work_struct *work)
 		net_dm_packet_report(skb);
 }
 
+static size_t
+net_dm_hw_packet_report_size(size_t payload_len,
+			     const struct net_dm_hw_metadata *hw_metadata)
+{
+	size_t size;
+
+	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_ORIGIN */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */
+	       nla_total_size(strlen(hw_metadata->trap_group_name) + 1) +
+	       /* NET_DM_ATTR_HW_TRAP_NAME */
+	       nla_total_size(strlen(hw_metadata->trap_name) + 1) +
+	       /* NET_DM_ATTR_IN_PORT */
+	       net_dm_in_port_size() +
+	       /* NET_DM_ATTR_TIMESTAMP */
+	       nla_total_size(sizeof(struct timespec)) +
+	       /* NET_DM_ATTR_ORIG_LEN */
+	       nla_total_size(sizeof(u32)) +
+	       /* NET_DM_ATTR_PROTO */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PAYLOAD */
+	       nla_total_size(payload_len);
+}
+
+static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
+					struct sk_buff *skb, size_t payload_len)
+{
+	struct net_dm_hw_metadata *hw_metadata;
+	struct nlattr *attr;
+	struct timespec ts;
+	void *hdr;
+
+	hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_PACKET_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME,
+			   hw_metadata->trap_group_name))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME,
+			   hw_metadata->trap_name))
+		goto nla_put_failure;
+
+	if (hw_metadata->input_dev) {
+		struct net_device *dev = hw_metadata->input_dev;
+		int rc;
+
+		rc = net_dm_packet_report_in_port_put(msg, dev->ifindex,
+						      dev->name);
+		if (rc)
+			goto nla_put_failure;
+	}
+
+	if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
+	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+		goto nla_put_failure;
+
+	if (!payload_len)
+		goto out;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+		goto nla_put_failure;
+
+	attr = skb_put(msg, nla_total_size(payload_len));
+	attr->nla_type = NET_DM_ATTR_PAYLOAD;
+	attr->nla_len = nla_attr_size(payload_len);
+	if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static struct net_dm_hw_metadata *
+net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata)
+{
+	struct net_dm_hw_metadata *n_hw_metadata;
+	const char *trap_group_name;
+	const char *trap_name;
+
+	n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC);
+	if (!n_hw_metadata)
+		return NULL;
+
+	trap_group_name = kmemdup(hw_metadata->trap_group_name,
+				  strlen(hw_metadata->trap_group_name) + 1,
+				  GFP_ATOMIC | __GFP_ZERO);
+	if (!trap_group_name)
+		goto free_hw_metadata;
+	n_hw_metadata->trap_group_name = trap_group_name;
+
+	trap_name = kmemdup(hw_metadata->trap_name,
+			    strlen(hw_metadata->trap_name) + 1,
+			    GFP_ATOMIC | __GFP_ZERO);
+	if (!trap_name)
+		goto free_trap_group;
+	n_hw_metadata->trap_name = trap_name;
+
+	n_hw_metadata->input_dev = hw_metadata->input_dev;
+	if (n_hw_metadata->input_dev)
+		dev_hold(n_hw_metadata->input_dev);
+
+	return n_hw_metadata;
+
+free_trap_group:
+	kfree(trap_group_name);
+free_hw_metadata:
+	kfree(n_hw_metadata);
+	return NULL;
+}
+
+static void
+net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata)
+{
+	if (hw_metadata->input_dev)
+		dev_put(hw_metadata->input_dev);
+	kfree(hw_metadata->trap_name);
+	kfree(hw_metadata->trap_group_name);
+	kfree(hw_metadata);
+}
+
+static void net_dm_hw_packet_report(struct sk_buff *skb)
+{
+	struct net_dm_hw_metadata *hw_metadata;
+	struct sk_buff *msg;
+	size_t payload_len;
+	int rc;
+
+	if (skb->data > skb_mac_header(skb))
+		skb_push(skb, skb->data - skb_mac_header(skb));
+	else
+		skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+	if (net_dm_trunc_len)
+		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
+
+	hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+	msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata),
+			GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_hw_packet_report_fill(msg, skb, payload_len);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata);
+	consume_skb(skb);
+}
+
+static void net_dm_hw_packet_work(struct work_struct *work)
+{
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+	skb_queue_splice_tail_init(&hw_data->drop_queue, &list);
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+	while ((skb = __skb_dequeue(&list)))
+		net_dm_hw_packet_report(skb);
+}
+
 static void
 net_dm_hw_packet_probe(struct sk_buff *skb,
 		       const struct net_dm_hw_metadata *hw_metadata)
 {
+	struct net_dm_hw_metadata *n_hw_metadata;
+	ktime_t tstamp = ktime_get_real();
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff *nskb;
+	unsigned long flags;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata);
+	if (!n_hw_metadata)
+		goto free;
+
+	NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata;
+	nskb->tstamp = tstamp;
+
+	hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+
+	spin_lock_irqsave(&hw_data->drop_queue.lock, flags);
+	if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len)
+		__skb_queue_tail(&hw_data->drop_queue, nskb);
+	else
+		goto unlock_free;
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+
+	schedule_work(&hw_data->dm_alert_work);
+
+	return;
+
+unlock_free:
+	spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags);
+	u64_stats_update_begin(&hw_data->stats.syncp);
+	hw_data->stats.dropped++;
+	u64_stats_update_end(&hw_data->stats.syncp);
+	net_dm_hw_metadata_free(n_hw_metadata);
+free:
+	consume_skb(nskb);
 }
 
 static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
 	.kfree_skb_probe	= net_dm_packet_trace_kfree_skb_hit,
 	.napi_poll_probe	= net_dm_packet_trace_napi_poll_hit,
 	.work_item_func		= net_dm_packet_work,
+	.hw_work_item_func	= net_dm_hw_packet_work,
 	.hw_probe		= net_dm_hw_packet_probe,
 };
 
@@ -819,6 +1067,50 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static void net_dm_hw_stats_read(struct net_dm_stats *stats)
+{
+	int cpu;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct net_dm_stats *cpu_stats = &hw_data->stats;
+		unsigned int start;
+		u64 dropped;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			dropped = cpu_stats->dropped;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->dropped += dropped;
+	}
+}
+
+static int net_dm_hw_stats_put(struct sk_buff *msg)
+{
+	struct net_dm_stats stats;
+	struct nlattr *attr;
+
+	net_dm_hw_stats_read(&stats);
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+			      stats.dropped, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
 static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
 {
 	void *hdr;
@@ -833,6 +1125,10 @@ static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
 	if (rc)
 		goto nla_put_failure;
 
+	rc = net_dm_hw_stats_put(msg);
+	if (rc)
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	return 0;
-- 
cgit v1.2.3


From d40e1deb930f4bd51a5214983e50aafc26db686e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:15 +0300
Subject: drop_monitor: Add support for summary alert mode for hardware drops

In summary alert mode a notification is sent with a list of recent drop
reasons and a count of how many packets were dropped due to this reason.

To avoid expensive operations in the context in which packets are
dropped, each CPU holds an array whose number of entries is the maximum
number of drop reasons that can be encoded in the netlink notification.
Each entry stores the drop reason and a count. When a packet is dropped
the array is traversed and a new entry is created or the count of an
existing entry is incremented.

Later, in process context, the array is replaced with a newly allocated
copy and the old array is encoded in a netlink notification. To avoid
breaking user space, the notification includes the ancillary header,
which is 'struct net_dm_alert_msg' with number of entries set to '0'.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |   3 +
 net/core/drop_monitor.c          | 195 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 196 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 9f8fb1bb4aa4..3bddc9ec978c 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -87,6 +87,9 @@ enum net_dm_attr {
 	NET_DM_ATTR_ORIGIN,			/* u16 */
 	NET_DM_ATTR_HW_TRAP_GROUP_NAME,		/* string */
 	NET_DM_ATTR_HW_TRAP_NAME,		/* string */
+	NET_DM_ATTR_HW_ENTRIES,			/* nested */
+	NET_DM_ATTR_HW_ENTRY,			/* nested */
+	NET_DM_ATTR_HW_TRAP_COUNT,		/* u32 */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5a950b5af8fd..807c79d606aa 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -58,9 +58,26 @@ struct net_dm_stats {
 	struct u64_stats_sync syncp;
 };
 
+#define NET_DM_MAX_HW_TRAP_NAME_LEN 40
+
+struct net_dm_hw_entry {
+	char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN];
+	u32 count;
+};
+
+struct net_dm_hw_entries {
+	u32 num_entries;
+	struct net_dm_hw_entry entries[0];
+};
+
 struct per_cpu_dm_data {
-	spinlock_t		lock;	/* Protects 'skb' and 'send_timer' */
-	struct sk_buff		*skb;
+	spinlock_t		lock;	/* Protects 'skb', 'hw_entries' and
+					 * 'send_timer'
+					 */
+	union {
+		struct sk_buff			*skb;
+		struct net_dm_hw_entries	*hw_entries;
+	};
 	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
@@ -275,16 +292,189 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static struct net_dm_hw_entries *
+net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data)
+{
+	struct net_dm_hw_entries *hw_entries;
+	unsigned long flags;
+
+	hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit),
+			     GFP_KERNEL);
+	if (!hw_entries) {
+		/* If the memory allocation failed, we try to perform another
+		 * allocation in 1/10 second. Otherwise, the probe function
+		 * will constantly bail out.
+		 */
+		mod_timer(&hw_data->send_timer, jiffies + HZ / 10);
+	}
+
+	spin_lock_irqsave(&hw_data->lock, flags);
+	swap(hw_data->hw_entries, hw_entries);
+	spin_unlock_irqrestore(&hw_data->lock, flags);
+
+	return hw_entries;
+}
+
+static int net_dm_hw_entry_put(struct sk_buff *msg,
+			       const struct net_dm_hw_entry *hw_entry)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_hw_entries_put(struct sk_buff *msg,
+				 const struct net_dm_hw_entries *hw_entries)
+{
+	struct nlattr *attr;
+	int i;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES);
+	if (!attr)
+		return -EMSGSIZE;
+
+	for (i = 0; i < hw_entries->num_entries; i++) {
+		int rc;
+
+		rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]);
+		if (rc)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int
+net_dm_hw_summary_report_fill(struct sk_buff *msg,
+			      const struct net_dm_hw_entries *hw_entries)
+{
+	struct net_dm_alert_msg anc_hdr = { 0 };
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	/* We need to put the ancillary header in order not to break user
+	 * space.
+	 */
+	if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr))
+		goto nla_put_failure;
+
+	rc = net_dm_hw_entries_put(msg, hw_entries);
+	if (rc)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static void net_dm_hw_summary_work(struct work_struct *work)
+{
+	struct net_dm_hw_entries *hw_entries;
+	struct per_cpu_dm_data *hw_data;
+	struct sk_buff *msg;
+	int rc;
+
+	hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+	if (!hw_entries)
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_hw_summary_report_fill(msg, hw_entries);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	kfree(hw_entries);
+}
+
 static void
 net_dm_hw_summary_probe(struct sk_buff *skb,
 			const struct net_dm_hw_metadata *hw_metadata)
 {
+	struct net_dm_hw_entries *hw_entries;
+	struct net_dm_hw_entry *hw_entry;
+	struct per_cpu_dm_data *hw_data;
+	unsigned long flags;
+	int i;
+
+	hw_data = this_cpu_ptr(&dm_hw_cpu_data);
+	spin_lock_irqsave(&hw_data->lock, flags);
+	hw_entries = hw_data->hw_entries;
+
+	if (!hw_entries)
+		goto out;
+
+	for (i = 0; i < hw_entries->num_entries; i++) {
+		hw_entry = &hw_entries->entries[i];
+		if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name,
+			     NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) {
+			hw_entry->count++;
+			goto out;
+		}
+	}
+	if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit))
+		goto out;
+
+	hw_entry = &hw_entries->entries[hw_entries->num_entries];
+	strlcpy(hw_entry->trap_name, hw_metadata->trap_name,
+		NET_DM_MAX_HW_TRAP_NAME_LEN - 1);
+	hw_entry->count = 1;
+	hw_entries->num_entries++;
+
+	if (!timer_pending(&hw_data->send_timer)) {
+		hw_data->send_timer.expires = jiffies + dm_delay * HZ;
+		add_timer(&hw_data->send_timer);
+	}
+
+out:
+	spin_unlock_irqrestore(&hw_data->lock, flags);
 }
 
 static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
 	.kfree_skb_probe	= trace_kfree_skb_hit,
 	.napi_poll_probe	= trace_napi_poll_hit,
 	.work_item_func		= send_dm_alert,
+	.hw_work_item_func	= net_dm_hw_summary_work,
 	.hw_probe		= net_dm_hw_summary_probe,
 };
 
@@ -1309,6 +1499,7 @@ static void net_dm_hw_cpu_data_fini(int cpu)
 	struct per_cpu_dm_data *hw_data;
 
 	hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+	kfree(hw_data->hw_entries);
 	__net_dm_cpu_data_fini(hw_data);
 }
 
-- 
cgit v1.2.3


From 8e94c3bc922e70225bd35891c8e6002bddd984eb Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:16 +0300
Subject: drop_monitor: Allow user to start monitoring hardware drops

Drop monitor has start and stop commands, but so far these were only
used to start and stop monitoring of software drops.

Now that drop monitor can also monitor hardware drops, we should allow
the user to control these as well.

Do that by adding SW and HW flags to these commands. If no flag is
specified, then only start / stop monitoring software drops. This is
done in order to maintain backward-compatibility with existing user
space applications.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |   2 +
 net/core/drop_monitor.c          | 124 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 3bddc9ec978c..75a35dccb675 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -90,6 +90,8 @@ enum net_dm_attr {
 	NET_DM_ATTR_HW_ENTRIES,			/* nested */
 	NET_DM_ATTR_HW_ENTRY,			/* nested */
 	NET_DM_ATTR_HW_TRAP_COUNT,		/* u32 */
+	NET_DM_ATTR_SW_DROPS,			/* flag */
+	NET_DM_ATTR_HW_DROPS,			/* flag */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 807c79d606aa..bfc024024aa3 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -952,13 +952,82 @@ static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
 void net_dm_hw_report(struct sk_buff *skb,
 		      const struct net_dm_hw_metadata *hw_metadata)
 {
+	rcu_read_lock();
+
 	if (!monitor_hw)
-		return;
+		goto out;
 
 	net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata);
+
+out:
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(net_dm_hw_report);
 
+static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
+{
+	const struct net_dm_alert_ops *ops;
+	int cpu;
+
+	if (monitor_hw) {
+		NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled");
+		return -EAGAIN;
+	}
+
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	if (!try_module_get(THIS_MODULE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+		return -ENODEV;
+	}
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct net_dm_hw_entries *hw_entries;
+
+		INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func);
+		timer_setup(&hw_data->send_timer, sched_send_work, 0);
+		hw_entries = net_dm_hw_reset_per_cpu_data(hw_data);
+		kfree(hw_entries);
+	}
+
+	monitor_hw = true;
+
+	return 0;
+}
+
+static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack)
+{
+	int cpu;
+
+	if (!monitor_hw)
+		NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled");
+
+	monitor_hw = false;
+
+	/* After this call returns we are guaranteed that no CPU is processing
+	 * any hardware drops.
+	 */
+	synchronize_rcu();
+
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+		struct sk_buff *skb;
+
+		del_timer_sync(&hw_data->send_timer);
+		cancel_work_sync(&hw_data->dm_alert_work);
+		while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+			struct net_dm_hw_metadata *hw_metadata;
+
+			hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+			net_dm_hw_metadata_free(hw_metadata);
+			consume_skb(skb);
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 {
 	const struct net_dm_alert_ops *ops;
@@ -1153,14 +1222,61 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 	return 0;
 }
 
+static int net_dm_monitor_start(bool set_sw, bool set_hw,
+				struct netlink_ext_ack *extack)
+{
+	bool sw_set = false;
+	int rc;
+
+	if (set_sw) {
+		rc = set_all_monitor_traces(TRACE_ON, extack);
+		if (rc)
+			return rc;
+		sw_set = true;
+	}
+
+	if (set_hw) {
+		rc = net_dm_hw_monitor_start(extack);
+		if (rc)
+			goto err_monitor_hw;
+	}
+
+	return 0;
+
+err_monitor_hw:
+	if (sw_set)
+		set_all_monitor_traces(TRACE_OFF, extack);
+	return rc;
+}
+
+static void net_dm_monitor_stop(bool set_sw, bool set_hw,
+				struct netlink_ext_ack *extack)
+{
+	if (set_hw)
+		net_dm_hw_monitor_stop(extack);
+	if (set_sw)
+		set_all_monitor_traces(TRACE_OFF, extack);
+}
+
 static int net_dm_cmd_trace(struct sk_buff *skb,
 			struct genl_info *info)
 {
+	bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS];
+	bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS];
+	struct netlink_ext_ack *extack = info->extack;
+
+	/* To maintain backward compatibility, we start / stop monitoring of
+	 * software drops if no flag is specified.
+	 */
+	if (!set_sw && !set_hw)
+		set_sw = true;
+
 	switch (info->genlhdr->cmd) {
 	case NET_DM_CMD_START:
-		return set_all_monitor_traces(TRACE_ON, info->extack);
+		return net_dm_monitor_start(set_sw, set_hw, extack);
 	case NET_DM_CMD_STOP:
-		return set_all_monitor_traces(TRACE_OFF, info->extack);
+		net_dm_monitor_stop(set_sw, set_hw, extack);
+		return 0;
 	}
 
 	return -EOPNOTSUPP;
@@ -1392,6 +1508,8 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
 	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
 	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
 	[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
+	[NET_DM_ATTR_SW_DROPS]	= {. type = NLA_FLAG },
+	[NET_DM_ATTR_HW_DROPS]	= {. type = NLA_FLAG },
 };
 
 static const struct genl_ops dropmon_ops[] = {
-- 
cgit v1.2.3


From 0f420b6c52e9799f664429e739a421fb4c5527a3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:17 +0300
Subject: devlink: Add packet trap infrastructure

Add the basic packet trap infrastructure that allows device drivers to
register their supported packet traps and trap groups with devlink.

Each driver is expected to provide basic information about each
supported trap, such as name and ID, but also the supported metadata
types that will accompany each packet trapped via the trap. The
currently supported metadata type is just the input port, but more will
be added in the future. For example, output port and traffic class.

Trap groups allow users to set the action of all member traps. In
addition, users can retrieve per-group statistics in case per-trap
statistics are too narrow. In the future, the trap group object can be
extended with more attributes, such as policer settings which will limit
the amount of traffic generated by member traps towards the CPU.

Beside registering their packet traps with devlink, drivers are also
expected to report trapped packets to devlink along with relevant
metadata. devlink will maintain packets and bytes statistics for each
packet trap and will potentially report the trapped packet with its
metadata to user space via drop monitor netlink channel.

The interface towards the drivers is simple and allows devlink to set
the action of the trap. Currently, only two actions are supported:
'trap' and 'drop'. When set to 'trap', the device is expected to provide
the sole copy of the packet to the driver which will pass it to devlink.
When set to 'drop', the device is expected to drop the packet and not
send a copy to the driver. In the future, more actions can be added,
such as 'mirror'.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h        |  129 +++++
 include/uapi/linux/devlink.h |   62 +++
 net/Kconfig                  |    1 +
 net/core/devlink.c           | 1068 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 1255 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 451268f64880..03b32e33e93e 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/refcount.h>
 #include <net/net_namespace.h>
 #include <uapi/linux/devlink.h>
 
@@ -31,6 +32,8 @@ struct devlink {
 	struct list_head reporter_list;
 	struct mutex reporters_lock; /* protects reporter_list */
 	struct devlink_dpipe_headers *dpipe_headers;
+	struct list_head trap_list;
+	struct list_head trap_group_list;
 	const struct devlink_ops *ops;
 	struct device *dev;
 	possible_net_t _net;
@@ -497,6 +500,89 @@ struct devlink_health_reporter_ops {
 			struct devlink_fmsg *fmsg);
 };
 
+/**
+ * struct devlink_trap_group - Immutable packet trap group attributes.
+ * @name: Trap group name.
+ * @id: Trap group identifier.
+ * @generic: Whether the trap group is generic or not.
+ *
+ * Describes immutable attributes of packet trap groups that drivers register
+ * with devlink.
+ */
+struct devlink_trap_group {
+	const char *name;
+	u16 id;
+	bool generic;
+};
+
+#define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT	BIT(0)
+
+/**
+ * struct devlink_trap - Immutable packet trap attributes.
+ * @type: Trap type.
+ * @init_action: Initial trap action.
+ * @generic: Whether the trap is generic or not.
+ * @id: Trap identifier.
+ * @name: Trap name.
+ * @group: Immutable packet trap group attributes.
+ * @metadata_cap: Metadata types that can be provided by the trap.
+ *
+ * Describes immutable attributes of packet traps that drivers register with
+ * devlink.
+ */
+struct devlink_trap {
+	enum devlink_trap_type type;
+	enum devlink_trap_action init_action;
+	bool generic;
+	u16 id;
+	const char *name;
+	struct devlink_trap_group group;
+	u32 metadata_cap;
+};
+
+enum devlink_trap_generic_id {
+	/* Add new generic trap IDs above */
+	__DEVLINK_TRAP_GENERIC_ID_MAX,
+	DEVLINK_TRAP_GENERIC_ID_MAX = __DEVLINK_TRAP_GENERIC_ID_MAX - 1,
+};
+
+enum devlink_trap_group_generic_id {
+	/* Add new generic trap group IDs above */
+	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_MAX =
+		__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX - 1,
+};
+
+#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \
+	{								      \
+		.type = DEVLINK_TRAP_TYPE_##_type,			      \
+		.init_action = DEVLINK_TRAP_ACTION_##_init_action,	      \
+		.generic = true,					      \
+		.id = DEVLINK_TRAP_GENERIC_ID_##_id,			      \
+		.name = DEVLINK_TRAP_GENERIC_NAME_##_id,		      \
+		.group = _group,					      \
+		.metadata_cap = _metadata_cap,				      \
+	}
+
+#define DEVLINK_TRAP_DRIVER(_type, _init_action, _id, _name, _group,	      \
+			    _metadata_cap)				      \
+	{								      \
+		.type = DEVLINK_TRAP_TYPE_##_type,			      \
+		.init_action = DEVLINK_TRAP_ACTION_##_init_action,	      \
+		.generic = false,					      \
+		.id = _id,						      \
+		.name = _name,						      \
+		.group = _group,					      \
+		.metadata_cap = _metadata_cap,				      \
+	}
+
+#define DEVLINK_TRAP_GROUP_GENERIC(_id)					      \
+	{								      \
+		.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id,		      \
+		.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id,		      \
+		.generic = true,					      \
+	}
+
 struct devlink_ops {
 	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
@@ -558,6 +644,38 @@ struct devlink_ops {
 	int (*flash_update)(struct devlink *devlink, const char *file_name,
 			    const char *component,
 			    struct netlink_ext_ack *extack);
+	/**
+	 * @trap_init: Trap initialization function.
+	 *
+	 * Should be used by device drivers to initialize the trap in the
+	 * underlying device. Drivers should also store the provided trap
+	 * context, so that they could efficiently pass it to
+	 * devlink_trap_report() when the trap is triggered.
+	 */
+	int (*trap_init)(struct devlink *devlink,
+			 const struct devlink_trap *trap, void *trap_ctx);
+	/**
+	 * @trap_fini: Trap de-initialization function.
+	 *
+	 * Should be used by device drivers to de-initialize the trap in the
+	 * underlying device.
+	 */
+	void (*trap_fini)(struct devlink *devlink,
+			  const struct devlink_trap *trap, void *trap_ctx);
+	/**
+	 * @trap_action_set: Trap action set function.
+	 */
+	int (*trap_action_set)(struct devlink *devlink,
+			       const struct devlink_trap *trap,
+			       enum devlink_trap_action action);
+	/**
+	 * @trap_group_init: Trap group initialization function.
+	 *
+	 * Should be used by device drivers to initialize the trap group in the
+	 * underlying device.
+	 */
+	int (*trap_group_init)(struct devlink *devlink,
+			       const struct devlink_trap_group *group);
 };
 
 static inline void *devlink_priv(struct devlink *devlink)
@@ -774,6 +892,17 @@ void devlink_flash_update_status_notify(struct devlink *devlink,
 					unsigned long done,
 					unsigned long total);
 
+int devlink_traps_register(struct devlink *devlink,
+			   const struct devlink_trap *traps,
+			   size_t traps_count, void *priv);
+void devlink_traps_unregister(struct devlink *devlink,
+			      const struct devlink_trap *traps,
+			      size_t traps_count);
+void devlink_trap_report(struct devlink *devlink,
+			 struct sk_buff *skb, void *trap_ctx,
+			 struct devlink_port *in_devlink_port);
+void *devlink_trap_ctx_priv(void *trap_ctx);
+
 #if IS_ENABLED(CONFIG_NET_DEVLINK)
 
 void devlink_compat_running_version(struct net_device *dev,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index ffc993256527..546e75dd74ac 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -107,6 +107,16 @@ enum devlink_command {
 	DEVLINK_CMD_FLASH_UPDATE_END,		/* notification only */
 	DEVLINK_CMD_FLASH_UPDATE_STATUS,	/* notification only */
 
+	DEVLINK_CMD_TRAP_GET,		/* can dump */
+	DEVLINK_CMD_TRAP_SET,
+	DEVLINK_CMD_TRAP_NEW,
+	DEVLINK_CMD_TRAP_DEL,
+
+	DEVLINK_CMD_TRAP_GROUP_GET,	/* can dump */
+	DEVLINK_CMD_TRAP_GROUP_SET,
+	DEVLINK_CMD_TRAP_GROUP_NEW,
+	DEVLINK_CMD_TRAP_GROUP_DEL,
+
 	/* add new commands above here */
 	__DEVLINK_CMD_MAX,
 	DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1
@@ -194,6 +204,47 @@ enum devlink_param_fw_load_policy_value {
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
 };
 
+enum {
+	DEVLINK_ATTR_STATS_RX_PACKETS,		/* u64 */
+	DEVLINK_ATTR_STATS_RX_BYTES,		/* u64 */
+
+	__DEVLINK_ATTR_STATS_MAX,
+	DEVLINK_ATTR_STATS_MAX = __DEVLINK_ATTR_STATS_MAX - 1
+};
+
+/**
+ * enum devlink_trap_action - Packet trap action.
+ * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
+ *                            sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ */
+enum devlink_trap_action {
+	DEVLINK_TRAP_ACTION_DROP,
+	DEVLINK_TRAP_ACTION_TRAP,
+};
+
+/**
+ * enum devlink_trap_type - Packet trap type.
+ * @DEVLINK_TRAP_TYPE_DROP: Trap reason is a drop. Trapped packets are only
+ *                          processed by devlink and not injected to the
+ *                          kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_EXCEPTION: Trap reason is an exception. Packet was not
+ *                               forwarded as intended due to an exception
+ *                               (e.g., missing neighbour entry) and trapped to
+ *                               control plane for resolution. Trapped packets
+ *                               are processed by devlink and injected to
+ *                               the kernel's Rx path.
+ */
+enum devlink_trap_type {
+	DEVLINK_TRAP_TYPE_DROP,
+	DEVLINK_TRAP_TYPE_EXCEPTION,
+};
+
+enum {
+	/* Trap can report input port as metadata */
+	DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT,
+};
+
 enum devlink_attr {
 	/* don't change the order or add anything between, this is ABI! */
 	DEVLINK_ATTR_UNSPEC,
@@ -348,6 +399,17 @@ enum devlink_attr {
 	DEVLINK_ATTR_PORT_PCI_PF_NUMBER,	/* u16 */
 	DEVLINK_ATTR_PORT_PCI_VF_NUMBER,	/* u16 */
 
+	DEVLINK_ATTR_STATS,				/* nested */
+
+	DEVLINK_ATTR_TRAP_NAME,				/* string */
+	/* enum devlink_trap_action */
+	DEVLINK_ATTR_TRAP_ACTION,			/* u8 */
+	/* enum devlink_trap_type */
+	DEVLINK_ATTR_TRAP_TYPE,				/* u8 */
+	DEVLINK_ATTR_TRAP_GENERIC,			/* flag */
+	DEVLINK_ATTR_TRAP_METADATA,			/* nested */
+	DEVLINK_ATTR_TRAP_GROUP_NAME,			/* string */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/Kconfig b/net/Kconfig
index 57f51a279ad6..3101bfcbdd7a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -430,6 +430,7 @@ config NET_SOCK_MSG
 config NET_DEVLINK
 	bool
 	default n
+	imply NET_DROP_MONITOR
 
 config PAGE_POOL
        bool
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d3dbb904bf3b..960ceaec98d6 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -18,6 +18,8 @@
 #include <linux/spinlock.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/timekeeping.h>
 #include <rdma/ib_verbs.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -25,6 +27,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/devlink.h>
+#include <net/drop_monitor.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/devlink.h>
 
@@ -551,7 +554,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
 		goto nla_put_failure;
 
-	spin_lock(&devlink_port->type_lock);
+	spin_lock_bh(&devlink_port->type_lock);
 	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
 		goto nla_put_failure_type_locked;
 	if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET &&
@@ -576,7 +579,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 				   ibdev->name))
 			goto nla_put_failure_type_locked;
 	}
-	spin_unlock(&devlink_port->type_lock);
+	spin_unlock_bh(&devlink_port->type_lock);
 	if (devlink_nl_port_attrs_put(msg, devlink_port))
 		goto nla_put_failure;
 
@@ -584,7 +587,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 	return 0;
 
 nla_put_failure_type_locked:
-	spin_unlock(&devlink_port->type_lock);
+	spin_unlock_bh(&devlink_port->type_lock);
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
@@ -5154,6 +5157,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
 	return 0;
 }
 
+struct devlink_stats {
+	u64 rx_bytes;
+	u64 rx_packets;
+	struct u64_stats_sync syncp;
+};
+
+/**
+ * struct devlink_trap_group_item - Packet trap group attributes.
+ * @group: Immutable packet trap group attributes.
+ * @refcount: Number of trap items using the group.
+ * @list: trap_group_list member.
+ * @stats: Trap group statistics.
+ *
+ * Describes packet trap group attributes. Created by devlink during trap
+ * registration.
+ */
+struct devlink_trap_group_item {
+	const struct devlink_trap_group *group;
+	refcount_t refcount;
+	struct list_head list;
+	struct devlink_stats __percpu *stats;
+};
+
+/**
+ * struct devlink_trap_item - Packet trap attributes.
+ * @trap: Immutable packet trap attributes.
+ * @group_item: Associated group item.
+ * @list: trap_list member.
+ * @action: Trap action.
+ * @stats: Trap statistics.
+ * @priv: Driver private information.
+ *
+ * Describes both mutable and immutable packet trap attributes. Created by
+ * devlink during trap registration and used for all trap related operations.
+ */
+struct devlink_trap_item {
+	const struct devlink_trap *trap;
+	struct devlink_trap_group_item *group_item;
+	struct list_head list;
+	enum devlink_trap_action action;
+	struct devlink_stats __percpu *stats;
+	void *priv;
+};
+
+static struct devlink_trap_item *
+devlink_trap_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_item *trap_item;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (!strcmp(trap_item->trap->name, name))
+			return trap_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_item *
+devlink_trap_item_get_from_info(struct devlink *devlink,
+				struct genl_info *info)
+{
+	struct nlattr *attr;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_NAME])
+		return NULL;
+	attr = info->attrs[DEVLINK_ATTR_TRAP_NAME];
+
+	return devlink_trap_item_lookup(devlink, nla_data(attr));
+}
+
+static int
+devlink_trap_action_get_from_info(struct genl_info *info,
+				  enum devlink_trap_action *p_trap_action)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
+	switch (val) {
+	case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
+	case DEVLINK_TRAP_ACTION_TRAP:
+		*p_trap_action = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_metadata_put(struct sk_buff *msg,
+				     const struct devlink_trap *trap)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats,
+				    struct devlink_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(i) {
+		struct devlink_stats *cpu_stats;
+		u64 rx_packets, rx_bytes;
+		unsigned int start;
+
+		cpu_stats = per_cpu_ptr(trap_stats, i);
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			rx_packets = cpu_stats->rx_packets;
+			rx_bytes = cpu_stats->rx_bytes;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->rx_packets += rx_packets;
+		stats->rx_bytes += rx_bytes;
+	}
+}
+
+static int devlink_trap_stats_put(struct sk_buff *msg,
+				  struct devlink_stats __percpu *trap_stats)
+{
+	struct devlink_stats stats;
+	struct nlattr *attr;
+
+	devlink_trap_stats_read(trap_stats, &stats);
+
+	attr = nla_nest_start(msg, DEVLINK_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS,
+			      stats.rx_packets, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES,
+			      stats.rx_bytes, DEVLINK_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd, u32 portid, u32 seq,
+				int flags)
+{
+	struct devlink_trap_group_item *group_item = trap_item->group_item;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type))
+		goto nla_put_failure;
+
+	if (trap_item->trap->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action))
+		goto nla_put_failure;
+
+	err = devlink_trap_metadata_put(msg, trap_item->trap);
+	if (err)
+		goto nla_put_failure;
+
+	err = devlink_trap_stats_put(msg, trap_item->stats);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item,
+				   DEVLINK_CMD_TRAP_NEW, info->snd_portid,
+				   info->snd_seq, 0);
+	if (err)
+		goto err_trap_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
+					  struct netlink_callback *cb)
+{
+	struct devlink_trap_item *trap_item;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(trap_item, &devlink->trap_list, list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_trap_fill(msg, devlink, trap_item,
+						   DEVLINK_CMD_TRAP_NEW,
+						   NETLINK_CB(cb->skb).portid,
+						   cb->nlh->nlmsg_seq,
+						   NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int __devlink_trap_action_set(struct devlink *devlink,
+				     struct devlink_trap_item *trap_item,
+				     enum devlink_trap_action trap_action,
+				     struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (trap_item->action != trap_action &&
+	    trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+		return 0;
+	}
+
+	err = devlink->ops->trap_action_set(devlink, trap_item->trap,
+					    trap_action);
+	if (err)
+		return err;
+
+	trap_item->action = trap_action;
+
+	return 0;
+}
+
+static int devlink_trap_action_set(struct devlink *devlink,
+				   struct devlink_trap_item *trap_item,
+				   struct genl_info *info)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	return __devlink_trap_action_set(devlink, trap_item, trap_action,
+					 info->extack);
+}
+
+static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
+					struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (list_empty(&devlink->trap_list))
+		return -EOPNOTSUPP;
+
+	trap_item = devlink_trap_item_get_from_info(devlink, info);
+	if (!trap_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+		return -ENOENT;
+	}
+
+	err = devlink_trap_action_set(devlink, trap_item, info);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_lookup(struct devlink *devlink, const char *name)
+{
+	struct devlink_trap_group_item *group_item;
+
+	list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+		if (!strcmp(group_item->group->name, name))
+			return group_item;
+	}
+
+	return NULL;
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get_from_info(struct devlink *devlink,
+				      struct genl_info *info)
+{
+	char *name;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME])
+		return NULL;
+	name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]);
+
+	return devlink_trap_group_item_lookup(devlink, name);
+}
+
+static int
+devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink,
+			   const struct devlink_trap_group_item *group_item,
+			   enum devlink_command cmd, u32 portid, u32 seq,
+			   int flags)
+{
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (devlink_nl_put_handle(msg, devlink))
+		goto nla_put_failure;
+
+	if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME,
+			   group_item->group->name))
+		goto nla_put_failure;
+
+	if (group_item->group->generic &&
+	    nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC))
+		goto nla_put_failure;
+
+	err = devlink_trap_stats_put(msg, group_item->stats);
+	if (err)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	struct sk_buff *msg;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+					 DEVLINK_CMD_TRAP_GROUP_NEW,
+					 info->snd_portid, info->snd_seq, 0);
+	if (err)
+		goto err_trap_group_fill;
+
+	return genlmsg_reply(msg, info);
+
+err_trap_group_fill:
+	nlmsg_free(msg);
+	return err;
+}
+
+static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
+						struct netlink_callback *cb)
+{
+	enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+	struct devlink_trap_group_item *group_item;
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct devlink *devlink;
+	int start = cb->args[0];
+	int idx = 0;
+	int err;
+
+	mutex_lock(&devlink_mutex);
+	list_for_each_entry(devlink, &devlink_list, list) {
+		if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
+			continue;
+		mutex_lock(&devlink->lock);
+		list_for_each_entry(group_item, &devlink->trap_group_list,
+				    list) {
+			if (idx < start) {
+				idx++;
+				continue;
+			}
+			err = devlink_nl_trap_group_fill(msg, devlink,
+							 group_item, cmd,
+							 portid,
+							 cb->nlh->nlmsg_seq,
+							 NLM_F_MULTI);
+			if (err) {
+				mutex_unlock(&devlink->lock);
+				goto out;
+			}
+			idx++;
+		}
+		mutex_unlock(&devlink->lock);
+	}
+out:
+	mutex_unlock(&devlink_mutex);
+
+	cb->args[0] = idx;
+	return msg->len;
+}
+
+static int
+__devlink_trap_group_action_set(struct devlink *devlink,
+				struct devlink_trap_group_item *group_item,
+				enum devlink_trap_action trap_action,
+				struct netlink_ext_ack *extack)
+{
+	const char *group_name = group_item->group->name;
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	list_for_each_entry(trap_item, &devlink->trap_list, list) {
+		if (strcmp(trap_item->trap->group.name, group_name))
+			continue;
+		err = __devlink_trap_action_set(devlink, trap_item,
+						trap_action, extack);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int
+devlink_trap_group_action_set(struct devlink *devlink,
+			      struct devlink_trap_group_item *group_item,
+			      struct genl_info *info)
+{
+	enum devlink_trap_action trap_action;
+	int err;
+
+	if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION])
+		return 0;
+
+	err = devlink_trap_action_get_from_info(info, &trap_action);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+		return -EINVAL;
+	}
+
+	err = __devlink_trap_group_action_set(devlink, group_item, trap_action,
+					      info->extack);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
+					      struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_trap_group_item *group_item;
+	int err;
+
+	if (list_empty(&devlink->trap_group_list))
+		return -EOPNOTSUPP;
+
+	group_item = devlink_trap_group_item_get_from_info(devlink, info);
+	if (!group_item) {
+		NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+		return -ENOENT;
+	}
+
+	err = devlink_trap_group_action_set(devlink, group_item, info);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
@@ -5184,6 +5752,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
 	[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
 	[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+	[DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+	[DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
 };
 
 static const struct genl_ops devlink_nl_ops[] = {
@@ -5483,6 +6054,32 @@ static const struct genl_ops devlink_nl_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
 	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GET,
+		.doit = devlink_nl_cmd_trap_get_doit,
+		.dumpit = devlink_nl_cmd_trap_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_SET,
+		.doit = devlink_nl_cmd_trap_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
+		.doit = devlink_nl_cmd_trap_group_get_doit,
+		.dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = DEVLINK_CMD_TRAP_GROUP_SET,
+		.doit = devlink_nl_cmd_trap_group_set_doit,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK,
+	},
 };
 
 static struct genl_family devlink_nl_family __ro_after_init = {
@@ -5528,6 +6125,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size)
 	INIT_LIST_HEAD(&devlink->param_list);
 	INIT_LIST_HEAD(&devlink->region_list);
 	INIT_LIST_HEAD(&devlink->reporter_list);
+	INIT_LIST_HEAD(&devlink->trap_list);
+	INIT_LIST_HEAD(&devlink->trap_group_list);
 	mutex_init(&devlink->lock);
 	mutex_init(&devlink->reporters_lock);
 	return devlink;
@@ -5574,6 +6173,8 @@ void devlink_free(struct devlink *devlink)
 {
 	mutex_destroy(&devlink->reporters_lock);
 	mutex_destroy(&devlink->lock);
+	WARN_ON(!list_empty(&devlink->trap_group_list));
+	WARN_ON(!list_empty(&devlink->trap_list));
 	WARN_ON(!list_empty(&devlink->reporter_list));
 	WARN_ON(!list_empty(&devlink->region_list));
 	WARN_ON(!list_empty(&devlink->param_list));
@@ -5678,10 +6279,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
 	if (WARN_ON(!devlink_port->registered))
 		return;
 	devlink_port_type_warn_cancel(devlink_port);
-	spin_lock(&devlink_port->type_lock);
+	spin_lock_bh(&devlink_port->type_lock);
 	devlink_port->type = type;
 	devlink_port->type_dev = type_dev;
-	spin_unlock(&devlink_port->type_lock);
+	spin_unlock_bh(&devlink_port->type_lock);
 	devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
 }
 
@@ -6834,6 +7435,463 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
 
+#define DEVLINK_TRAP(_id, _type)					      \
+	{								      \
+		.type = DEVLINK_TRAP_TYPE_##_type,			      \
+		.id = DEVLINK_TRAP_GENERIC_ID_##_id,			      \
+		.name = DEVLINK_TRAP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap devlink_trap_generic[] = {
+};
+
+#define DEVLINK_TRAP_GROUP(_id)						      \
+	{								      \
+		.id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id,		      \
+		.name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id,		      \
+	}
+
+static const struct devlink_trap_group devlink_trap_group_generic[] = {
+};
+
+static int devlink_trap_generic_verify(const struct devlink_trap *trap)
+{
+	if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(trap->name, devlink_trap_generic[trap->id].name))
+		return -EINVAL;
+
+	if (trap->type != devlink_trap_generic[trap->id].type)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int devlink_trap_driver_verify(const struct devlink_trap *trap)
+{
+	int i;
+
+	if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) {
+		if (!strcmp(trap->name, devlink_trap_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_verify(const struct devlink_trap *trap)
+{
+	if (!trap || !trap->name || !trap->group.name)
+		return -EINVAL;
+
+	if (trap->generic)
+		return devlink_trap_generic_verify(trap);
+	else
+		return devlink_trap_driver_verify(trap);
+}
+
+static int
+devlink_trap_group_generic_verify(const struct devlink_trap_group *group)
+{
+	if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	if (strcmp(group->name, devlink_trap_group_generic[group->id].name))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+devlink_trap_group_driver_verify(const struct devlink_trap_group *group)
+{
+	int i;
+
+	if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) {
+		if (!strcmp(group->name, devlink_trap_group_generic[i].name))
+			return -EEXIST;
+	}
+
+	return 0;
+}
+
+static int devlink_trap_group_verify(const struct devlink_trap_group *group)
+{
+	if (group->generic)
+		return devlink_trap_group_generic_verify(group);
+	else
+		return devlink_trap_group_driver_verify(group);
+}
+
+static void
+devlink_trap_group_notify(struct devlink *devlink,
+			  const struct devlink_trap_group_item *group_item,
+			  enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0,
+					 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_create(struct devlink *devlink,
+			       const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+	int err;
+
+	err = devlink_trap_group_verify(group);
+	if (err)
+		return ERR_PTR(err);
+
+	group_item = kzalloc(sizeof(*group_item), GFP_KERNEL);
+	if (!group_item)
+		return ERR_PTR(-ENOMEM);
+
+	group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!group_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	group_item->group = group;
+	refcount_set(&group_item->refcount, 1);
+
+	if (devlink->ops->trap_group_init) {
+		err = devlink->ops->trap_group_init(devlink, group);
+		if (err)
+			goto err_group_init;
+	}
+
+	list_add_tail(&group_item->list, &devlink->trap_group_list);
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_NEW);
+
+	return group_item;
+
+err_group_init:
+	free_percpu(group_item->stats);
+err_stats_alloc:
+	kfree(group_item);
+	return ERR_PTR(err);
+}
+
+static void
+devlink_trap_group_item_destroy(struct devlink *devlink,
+				struct devlink_trap_group_item *group_item)
+{
+	devlink_trap_group_notify(devlink, group_item,
+				  DEVLINK_CMD_TRAP_GROUP_DEL);
+	list_del(&group_item->list);
+	free_percpu(group_item->stats);
+	kfree(group_item);
+}
+
+static struct devlink_trap_group_item *
+devlink_trap_group_item_get(struct devlink *devlink,
+			    const struct devlink_trap_group *group)
+{
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_lookup(devlink, group->name);
+	if (group_item) {
+		refcount_inc(&group_item->refcount);
+		return group_item;
+	}
+
+	return devlink_trap_group_item_create(devlink, group);
+}
+
+static void
+devlink_trap_group_item_put(struct devlink *devlink,
+			    struct devlink_trap_group_item *group_item)
+{
+	if (!refcount_dec_and_test(&group_item->refcount))
+		return;
+
+	devlink_trap_group_item_destroy(devlink, group_item);
+}
+
+static int
+devlink_trap_item_group_link(struct devlink *devlink,
+			     struct devlink_trap_item *trap_item)
+{
+	struct devlink_trap_group_item *group_item;
+
+	group_item = devlink_trap_group_item_get(devlink,
+						 &trap_item->trap->group);
+	if (IS_ERR(group_item))
+		return PTR_ERR(group_item);
+
+	trap_item->group_item = group_item;
+
+	return 0;
+}
+
+static void
+devlink_trap_item_group_unlink(struct devlink *devlink,
+			       struct devlink_trap_item *trap_item)
+{
+	devlink_trap_group_item_put(devlink, trap_item->group_item);
+}
+
+static void devlink_trap_notify(struct devlink *devlink,
+				const struct devlink_trap_item *trap_item,
+				enum devlink_command cmd)
+{
+	struct sk_buff *msg;
+	int err;
+
+	WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
+		     cmd != DEVLINK_CMD_TRAP_DEL);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0);
+	if (err) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+static int
+devlink_trap_register(struct devlink *devlink,
+		      const struct devlink_trap *trap, void *priv)
+{
+	struct devlink_trap_item *trap_item;
+	int err;
+
+	if (devlink_trap_item_lookup(devlink, trap->name))
+		return -EEXIST;
+
+	trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL);
+	if (!trap_item)
+		return -ENOMEM;
+
+	trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats);
+	if (!trap_item->stats) {
+		err = -ENOMEM;
+		goto err_stats_alloc;
+	}
+
+	trap_item->trap = trap;
+	trap_item->action = trap->init_action;
+	trap_item->priv = priv;
+
+	err = devlink_trap_item_group_link(devlink, trap_item);
+	if (err)
+		goto err_group_link;
+
+	err = devlink->ops->trap_init(devlink, trap, trap_item);
+	if (err)
+		goto err_trap_init;
+
+	list_add_tail(&trap_item->list, &devlink->trap_list);
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW);
+
+	return 0;
+
+err_trap_init:
+	devlink_trap_item_group_unlink(devlink, trap_item);
+err_group_link:
+	free_percpu(trap_item->stats);
+err_stats_alloc:
+	kfree(trap_item);
+	return err;
+}
+
+static void devlink_trap_unregister(struct devlink *devlink,
+				    const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL);
+	list_del(&trap_item->list);
+	if (devlink->ops->trap_fini)
+		devlink->ops->trap_fini(devlink, trap, trap_item);
+	devlink_trap_item_group_unlink(devlink, trap_item);
+	free_percpu(trap_item->stats);
+	kfree(trap_item);
+}
+
+static void devlink_trap_disable(struct devlink *devlink,
+				 const struct devlink_trap *trap)
+{
+	struct devlink_trap_item *trap_item;
+
+	trap_item = devlink_trap_item_lookup(devlink, trap->name);
+	if (WARN_ON_ONCE(!trap_item))
+		return;
+
+	devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP);
+	trap_item->action = DEVLINK_TRAP_ACTION_DROP;
+}
+
+/**
+ * devlink_traps_register - Register packet traps with devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ * @priv: Driver private information.
+ *
+ * Return: Non-zero value on failure.
+ */
+int devlink_traps_register(struct devlink *devlink,
+			   const struct devlink_trap *traps,
+			   size_t traps_count, void *priv)
+{
+	int i, err;
+
+	if (!devlink->ops->trap_init || !devlink->ops->trap_action_set)
+		return -EINVAL;
+
+	mutex_lock(&devlink->lock);
+	for (i = 0; i < traps_count; i++) {
+		const struct devlink_trap *trap = &traps[i];
+
+		err = devlink_trap_verify(trap);
+		if (err)
+			goto err_trap_verify;
+
+		err = devlink_trap_register(devlink, trap, priv);
+		if (err)
+			goto err_trap_register;
+	}
+	mutex_unlock(&devlink->lock);
+
+	return 0;
+
+err_trap_register:
+err_trap_verify:
+	for (i--; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+	mutex_unlock(&devlink->lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_traps_register);
+
+/**
+ * devlink_traps_unregister - Unregister packet traps from devlink.
+ * @devlink: devlink.
+ * @traps: Packet traps.
+ * @traps_count: Count of provided packet traps.
+ */
+void devlink_traps_unregister(struct devlink *devlink,
+			      const struct devlink_trap *traps,
+			      size_t traps_count)
+{
+	int i;
+
+	mutex_lock(&devlink->lock);
+	/* Make sure we do not have any packets in-flight while unregistering
+	 * traps by disabling all of them and waiting for a grace period.
+	 */
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_disable(devlink, &traps[i]);
+	synchronize_rcu();
+	for (i = traps_count - 1; i >= 0; i--)
+		devlink_trap_unregister(devlink, &traps[i]);
+	mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devlink_traps_unregister);
+
+static void
+devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats,
+			  size_t skb_len)
+{
+	struct devlink_stats *stats;
+
+	stats = this_cpu_ptr(trap_stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_bytes += skb_len;
+	stats->rx_packets++;
+	u64_stats_update_end(&stats->syncp);
+}
+
+static void
+devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata,
+				  const struct devlink_trap_item *trap_item,
+				  struct devlink_port *in_devlink_port)
+{
+	struct devlink_trap_group_item *group_item = trap_item->group_item;
+
+	hw_metadata->trap_group_name = group_item->group->name;
+	hw_metadata->trap_name = trap_item->trap->name;
+
+	spin_lock(&in_devlink_port->type_lock);
+	if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH)
+		hw_metadata->input_dev = in_devlink_port->type_dev;
+	spin_unlock(&in_devlink_port->type_lock);
+}
+
+/**
+ * devlink_trap_report - Report trapped packet to drop monitor.
+ * @devlink: devlink.
+ * @skb: Trapped packet.
+ * @trap_ctx: Trap context.
+ * @in_devlink_port: Input devlink port.
+ */
+void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
+			 void *trap_ctx, struct devlink_port *in_devlink_port)
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+	struct net_dm_hw_metadata hw_metadata = {};
+
+	devlink_trap_stats_update(trap_item->stats, skb->len);
+	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
+
+	devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
+					  in_devlink_port);
+	net_dm_hw_report(skb, &hw_metadata);
+}
+EXPORT_SYMBOL_GPL(devlink_trap_report);
+
+/**
+ * devlink_trap_ctx_priv - Trap context to driver private information.
+ * @trap_ctx: Trap context.
+ *
+ * Return: Driver private information passed during registration.
+ */
+void *devlink_trap_ctx_priv(void *trap_ctx)
+{
+	struct devlink_trap_item *trap_item = trap_ctx;
+
+	return trap_item->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv);
+
 static void __devlink_compat_running_version(struct devlink *devlink,
 					     char *buf, size_t len)
 {
-- 
cgit v1.2.3


From 391203ab11df9b23cd0b867122bccbe33fe16f02 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:18 +0300
Subject: devlink: Add generic packet traps and groups

Add generic packet traps and groups that can report dropped packets as
well as exceptions such as TTL error.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 40 ++++++++++++++++++++++++++++++++++++++++
 net/core/devlink.c    | 12 ++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 03b32e33e93e..fb02e0e89f9d 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -541,18 +541,58 @@ struct devlink_trap {
 };
 
 enum devlink_trap_generic_id {
+	DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
+	DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH,
+	DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
+	DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER,
+	DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE,
+	DEVLINK_TRAP_GENERIC_ID_TTL_ERROR,
+	DEVLINK_TRAP_GENERIC_ID_TAIL_DROP,
+
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
 	DEVLINK_TRAP_GENERIC_ID_MAX = __DEVLINK_TRAP_GENERIC_ID_MAX - 1,
 };
 
 enum devlink_trap_group_generic_id {
+	DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS,
+
 	/* Add new generic trap group IDs above */
 	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_MAX =
 		__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX - 1,
 };
 
+#define DEVLINK_TRAP_GENERIC_NAME_SMAC_MC \
+	"source_mac_is_multicast"
+#define DEVLINK_TRAP_GENERIC_NAME_VLAN_TAG_MISMATCH \
+	"vlan_tag_mismatch"
+#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_VLAN_FILTER \
+	"ingress_vlan_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_INGRESS_STP_FILTER \
+	"ingress_spanning_tree_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_EMPTY_TX_LIST \
+	"port_list_is_empty"
+#define DEVLINK_TRAP_GENERIC_NAME_PORT_LOOPBACK_FILTER \
+	"port_loopback_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_BLACKHOLE_ROUTE \
+	"blackhole_route"
+#define DEVLINK_TRAP_GENERIC_NAME_TTL_ERROR \
+	"ttl_value_is_too_small"
+#define DEVLINK_TRAP_GENERIC_NAME_TAIL_DROP \
+	"tail_drop"
+
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
+	"l2_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_DROPS \
+	"l3_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BUFFER_DROPS \
+	"buffer_drops"
+
 #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group, _metadata_cap) \
 	{								      \
 		.type = DEVLINK_TRAP_TYPE_##_type,			      \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 960ceaec98d6..650f36379203 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -7443,6 +7443,15 @@ EXPORT_SYMBOL_GPL(devlink_region_snapshot_create);
 	}
 
 static const struct devlink_trap devlink_trap_generic[] = {
+	DEVLINK_TRAP(SMAC_MC, DROP),
+	DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP),
+	DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP),
+	DEVLINK_TRAP(INGRESS_STP_FILTER, DROP),
+	DEVLINK_TRAP(EMPTY_TX_LIST, DROP),
+	DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP),
+	DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP),
+	DEVLINK_TRAP(TTL_ERROR, EXCEPTION),
+	DEVLINK_TRAP(TAIL_DROP, DROP),
 };
 
 #define DEVLINK_TRAP_GROUP(_id)						      \
@@ -7452,6 +7461,9 @@ static const struct devlink_trap devlink_trap_generic[] = {
 	}
 
 static const struct devlink_trap_group devlink_trap_group_generic[] = {
+	DEVLINK_TRAP_GROUP(L2_DROPS),
+	DEVLINK_TRAP_GROUP(L3_DROPS),
+	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
 };
 
 static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-- 
cgit v1.2.3


From f3047ca01f12bf997ef199710b51490b0f750b34 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sat, 17 Aug 2019 16:28:19 +0300
Subject: Documentation: Add devlink-trap documentation

Add initial documentation of the devlink-trap mechanism, explaining the
background, motivation and the semantics of the interface.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-trap.rst | 187 ++++++++++++++++++++++++++++++
 Documentation/networking/index.rst        |   1 +
 include/net/devlink.h                     |   6 +
 3 files changed, 194 insertions(+)
 create mode 100644 Documentation/networking/devlink-trap.rst

(limited to 'include')

diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink-trap.rst
new file mode 100644
index 000000000000..dbc7a3e00fd8
--- /dev/null
+++ b/Documentation/networking/devlink-trap.rst
@@ -0,0 +1,187 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Devlink Trap
+============
+
+Background
+==========
+
+Devices capable of offloading the kernel's datapath and perform functions such
+as bridging and routing must also be able to send specific packets to the
+kernel (i.e., the CPU) for processing.
+
+For example, a device acting as a multicast-aware bridge must be able to send
+IGMP membership reports to the kernel for processing by the bridge module.
+Without processing such packets, the bridge module could never populate its
+MDB.
+
+As another example, consider a device acting as router which has received an IP
+packet with a TTL of 1. Upon routing the packet the device must send it to the
+kernel so that it will route it as well and generate an ICMP Time Exceeded
+error datagram. Without letting the kernel route such packets itself, utilities
+such as ``traceroute`` could never work.
+
+The fundamental ability of sending certain packets to the kernel for processing
+is called "packet trapping".
+
+Overview
+========
+
+The ``devlink-trap`` mechanism allows capable device drivers to register their
+supported packet traps with ``devlink`` and report trapped packets to
+``devlink`` for further analysis.
+
+Upon receiving trapped packets, ``devlink`` will perform a per-trap packets and
+bytes accounting and potentially report the packet to user space via a netlink
+event along with all the provided metadata (e.g., trap reason, timestamp, input
+port). This is especially useful for drop traps (see :ref:`Trap-Types`)
+as it allows users to obtain further visibility into packet drops that would
+otherwise be invisible.
+
+The following diagram provides a general overview of ``devlink-trap``::
+
+                                    Netlink event: Packet w/ metadata
+                                                   Or a summary of recent drops
+                                  ^
+                                  |
+         Userspace                |
+        +---------------------------------------------------+
+         Kernel                   |
+                                  |
+                          +-------+--------+
+                          |                |
+                          |  drop_monitor  |
+                          |                |
+                          +-------^--------+
+                                  |
+                                  |
+                                  |
+                             +----+----+
+                             |         |      Kernel's Rx path
+                             | devlink |      (non-drop traps)
+                             |         |
+                             +----^----+      ^
+                                  |           |
+                                  +-----------+
+                                  |
+                          +-------+-------+
+                          |               |
+                          | Device driver |
+                          |               |
+                          +-------^-------+
+         Kernel                   |
+        +---------------------------------------------------+
+         Hardware                 |
+                                  | Trapped packet
+                                  |
+                               +--+---+
+                               |      |
+                               | ASIC |
+                               |      |
+                               +------+
+
+.. _Trap-Types:
+
+Trap Types
+==========
+
+The ``devlink-trap`` mechanism supports the following packet trap types:
+
+  * ``drop``: Trapped packets were dropped by the underlying device. Packets
+    are only processed by ``devlink`` and not injected to the kernel's Rx path.
+    The trap action (see :ref:`Trap-Actions`) can be changed.
+  * ``exception``: Trapped packets were not forwarded as intended by the
+    underlying device due to an exception (e.g., TTL error, missing neighbour
+    entry) and trapped to the control plane for resolution. Packets are
+    processed by ``devlink`` and injected to the kernel's Rx path. Changing the
+    action of such traps is not allowed, as it can easily break the control
+    plane.
+
+.. _Trap-Actions:
+
+Trap Actions
+============
+
+The ``devlink-trap`` mechanism supports the following packet trap actions:
+
+  * ``trap``: The sole copy of the packet is sent to the CPU.
+  * ``drop``: The packet is dropped by the underlying device and a copy is not
+    sent to the CPU.
+
+Generic Packet Traps
+====================
+
+Generic packet traps are used to describe traps that trap well-defined packets
+or packets that are trapped due to well-defined conditions (e.g., TTL error).
+Such traps can be shared by multiple device drivers and their description must
+be added to the following table:
+
+.. list-table:: List of Generic Packet Traps
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``source_mac_is_multicast``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop because of a
+       multicast source MAC
+   * - ``vlan_tag_mismatch``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case of VLAN
+       tag mismatch: The ingress bridge port is not configured with a PVID and
+       the packet is untagged or prio-tagged
+   * - ``ingress_vlan_filter``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case they are
+       tagged with a VLAN that is not configured on the ingress bridge port
+   * - ``ingress_spanning_tree_filter``
+     - ``drop``
+     - Traps incoming packets that the device decided to drop in case the STP
+       state of the ingress bridge port is not "forwarding"
+   * - ``port_list_is_empty``
+     - ``drop``
+     - Traps packets that the device decided to drop in case they need to be
+       flooded and the flood list is empty
+   * - ``port_loopback_filter``
+     - ``drop``
+     - Traps packets that the device decided to drop in case after layer 2
+       forwarding the only port from which they should be transmitted through
+       is the port from which they were received
+   * - ``blackhole_route``
+     - ``drop``
+     - Traps packets that the device decided to drop in case they hit a
+       blackhole route
+   * - ``ttl_value_is_too_small``
+     - ``exception``
+     - Traps unicast packets that should be forwarded by the device whose TTL
+       was decremented to 0 or less
+   * - ``tail_drop``
+     - ``drop``
+     - Traps packets that the device decided to drop because they could not be
+       enqueued to a transmission queue which is full
+
+Generic Packet Trap Groups
+==========================
+
+Generic packet trap groups are used to aggregate logically related packet
+traps. These groups allow the user to batch operations such as setting the trap
+action of all member traps. In addition, ``devlink-trap`` can report aggregated
+per-group packets and bytes statistics, in case per-trap statistics are too
+narrow. The description of these groups must be added to the following table:
+
+.. list-table:: List of Generic Packet Trap Groups
+   :widths: 10 90
+
+   * - Name
+     - Description
+   * - ``l2_drops``
+     - Contains packet traps for packets that were dropped by the device during
+       layer 2 forwarding (i.e., bridge)
+   * - ``l3_drops``
+     - Contains packet traps for packets that were dropped by the device or hit
+       an exception (e.g., TTL error) during layer 3 forwarding
+   * - ``buffer_drops``
+     - Contains packet traps for packets that were dropped by the device due to
+       an enqueue decision
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index a46fca264bee..86a814e4d450 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -14,6 +14,7 @@ Contents:
    device_drivers/index
    dsa/index
    devlink-info-versions
+   devlink-trap
    ieee802154
    kapi
    z8530book
diff --git a/include/net/devlink.h b/include/net/devlink.h
index fb02e0e89f9d..7f43c48f54cd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -540,6 +540,9 @@ struct devlink_trap {
 	u32 metadata_cap;
 };
 
+/* All traps must be documented in
+ * Documentation/networking/devlink-trap.rst
+ */
 enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
 	DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH,
@@ -556,6 +559,9 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_MAX = __DEVLINK_TRAP_GENERIC_ID_MAX - 1,
 };
 
+/* All trap groups must be documented in
+ * Documentation/networking/devlink-trap.rst
+ */
 enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS,
-- 
cgit v1.2.3


From 9116e5e2b1fff71dce501d971e86a3695acc3dba Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 14 Aug 2019 09:27:16 +0200
Subject: xsk: replace ndo_xsk_async_xmit with ndo_xsk_wakeup

This commit replaces ndo_xsk_async_xmit with ndo_xsk_wakeup. This new
ndo provides the same functionality as before but with the addition of
a new flags field that is used to specifiy if Rx, Tx or both should be
woken up. The previous ndo only woke up Tx, as implied by the
name. The i40e and ixgbe drivers (which are all the supported ones)
are updated with this new interface.

This new ndo will be used by the new need_wakeup functionality of XDP
sockets that need to be able to wake up both Rx and Tx driver
processing.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c          |  5 +++--
 drivers/net/ethernet/intel/i40e/i40e_xsk.c           |  7 ++++---
 drivers/net/ethernet/intel/i40e/i40e_xsk.h           |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c        |  5 +++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c         |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c    |  2 +-
 include/linux/netdevice.h                            | 14 ++++++++++++--
 net/xdp/xdp_umem.c                                   |  3 +--
 net/xdp/xsk.c                                        |  3 ++-
 12 files changed, 32 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6d456e579314..a75c66c8679d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -12570,7 +12570,8 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
 	if (need_reset && prog)
 		for (i = 0; i < vsi->num_queue_pairs; i++)
 			if (vsi->xdp_rings[i]->xsk_umem)
-				(void)i40e_xsk_async_xmit(vsi->netdev, i);
+				(void)i40e_xsk_wakeup(vsi->netdev, i,
+						      XDP_WAKEUP_RX);
 
 	return 0;
 }
@@ -12892,7 +12893,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_bridge_setlink	= i40e_ndo_bridge_setlink,
 	.ndo_bpf		= i40e_xdp,
 	.ndo_xdp_xmit		= i40e_xdp_xmit,
-	.ndo_xsk_async_xmit	= i40e_xsk_async_xmit,
+	.ndo_xsk_wakeup	        = i40e_xsk_wakeup,
 	.ndo_dfwd_add_station	= i40e_fwd_add,
 	.ndo_dfwd_del_station	= i40e_fwd_del,
 };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 32bad014d76c..d0ff5d8b297b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -116,7 +116,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 			return err;
 
 		/* Kick start the NAPI context so that receiving will start */
-		err = i40e_xsk_async_xmit(vsi->netdev, qid);
+		err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
 		if (err)
 			return err;
 	}
@@ -765,13 +765,14 @@ out_xmit:
 }
 
 /**
- * i40e_xsk_async_xmit - Implements the ndo_xsk_async_xmit
+ * i40e_xsk_wakeup - Implements the ndo_xsk_wakeup
  * @dev: the netdevice
  * @queue_id: queue id to wake up
+ * @flags: ignored in our case since we have Rx and Tx in the same NAPI.
  *
  * Returns <0 for errors, 0 otherwise.
  **/
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id)
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	struct i40e_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 8cc0a2e7d9a2..9ed59c14eb55 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -18,6 +18,6 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 
 bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
 			   struct i40e_ring *tx_ring, int napi_budget);
-int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 
 #endif /* _I40E_XSK_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index dc7b128c780e..05729b448612 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10263,7 +10263,8 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 	if (need_reset && prog)
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			if (adapter->xdp_ring[i]->xsk_umem)
-				(void)ixgbe_xsk_async_xmit(adapter->netdev, i);
+				(void)ixgbe_xsk_wakeup(adapter->netdev, i,
+						       XDP_WAKEUP_RX);
 
 	return 0;
 }
@@ -10382,7 +10383,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_features_check	= ixgbe_features_check,
 	.ndo_bpf		= ixgbe_xdp,
 	.ndo_xdp_xmit		= ixgbe_xdp_xmit,
-	.ndo_xsk_async_xmit	= ixgbe_xsk_async_xmit,
+	.ndo_xsk_wakeup         = ixgbe_xsk_wakeup,
 };
 
 static void ixgbe_disable_txr_hw(struct ixgbe_adapter *adapter,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index d93a690aff74..6d01700b46bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -42,7 +42,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring);
 bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
 			    struct ixgbe_ring *tx_ring, int napi_budget);
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 queue_id);
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 void ixgbe_xsk_clean_tx_ring(struct ixgbe_ring *tx_ring);
 
 #endif /* #define _IXGBE_TXRX_COMMON_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 6b609553329f..e598af9faced 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -100,7 +100,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 		ixgbe_txrx_ring_enable(adapter, qid);
 
 		/* Kick start the NAPI context so that receiving will start */
-		err = ixgbe_xsk_async_xmit(adapter->netdev, qid);
+		err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
 		if (err)
 			return err;
 	}
@@ -692,7 +692,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
 	return budget > 0 && xmit_done;
 }
 
-int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid)
+int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_ring *ring;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 35e188cf4ea4..9704634a87aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -7,7 +7,7 @@
 #include "en/params.h"
 #include <net/xdp_sock.h>
 
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid)
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5e_params *params = &priv->channels.params;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 7add18bf78d8..9c505158b975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -8,7 +8,7 @@
 
 /* TX data path */
 
-int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid);
+int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
 
 bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 9a2fcef6e7f0..9df7e5c3c4cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4540,7 +4540,7 @@ const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
 	.ndo_bpf		 = mlx5e_xdp,
 	.ndo_xdp_xmit            = mlx5e_xdp_xmit,
-	.ndo_xsk_async_xmit      = mlx5e_xsk_async_xmit,
+	.ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
 #ifdef CONFIG_MLX5_EN_ARFS
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55ac223553f8..0ef0570386a2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -901,6 +901,10 @@ struct netdev_bpf {
 	};
 };
 
+/* Flags for ndo_xsk_wakeup. */
+#define XDP_WAKEUP_RX (1 << 0)
+#define XDP_WAKEUP_TX (1 << 1)
+
 #ifdef CONFIG_XFRM_OFFLOAD
 struct xfrmdev_ops {
 	int	(*xdo_dev_state_add) (struct xfrm_state *x);
@@ -1227,6 +1231,12 @@ struct tlsdev_ops;
  *	that got dropped are freed/returned via xdp_return_frame().
  *	Returns negative number, means general error invoking ndo, meaning
  *	no frames were xmit'ed and core-caller will free all frames.
+ * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags);
+ *      This function is used to wake up the softirq, ksoftirqd or kthread
+ *	responsible for sending and/or receiving packets on a specific
+ *	queue id bound to an AF_XDP socket. The flags field specifies if
+ *	only RX, only Tx, or both should be woken up using the flags
+ *	XDP_WAKEUP_RX and XDP_WAKEUP_TX.
  * struct devlink_port *(*ndo_get_devlink_port)(struct net_device *dev);
  *	Get devlink port instance associated with a given netdev.
  *	Called with a reference on the netdevice and devlink locks only,
@@ -1426,8 +1436,8 @@ struct net_device_ops {
 	int			(*ndo_xdp_xmit)(struct net_device *dev, int n,
 						struct xdp_frame **xdp,
 						u32 flags);
-	int			(*ndo_xsk_async_xmit)(struct net_device *dev,
-						      u32 queue_id);
+	int			(*ndo_xsk_wakeup)(struct net_device *dev,
+						  u32 queue_id, u32 flags);
 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
 };
 
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index a0607969f8c0..6e2d4da4cdcd 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -112,8 +112,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 		/* For copy-mode, we are done. */
 		return 0;
 
-	if (!dev->netdev_ops->ndo_bpf ||
-	    !dev->netdev_ops->ndo_xsk_async_xmit) {
+	if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) {
 		err = -EOPNOTSUPP;
 		goto err_unreg_umem;
 	}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59b57d708697..1fe40a936456 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -212,7 +212,8 @@ static int xsk_zc_xmit(struct sock *sk)
 	struct xdp_sock *xs = xdp_sk(sk);
 	struct net_device *dev = xs->dev;
 
-	return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id);
+	return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+					       XDP_WAKEUP_TX);
 }
 
 static void xsk_destruct_skb(struct sk_buff *skb)
-- 
cgit v1.2.3


From 77cd0d7b3f257fd0e3096b4fdcff1a7d38e99e10 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 14 Aug 2019 09:27:17 +0200
Subject: xsk: add support for need_wakeup flag in AF_XDP rings

This commit adds support for a new flag called need_wakeup in the
AF_XDP Tx and fill rings. When this flag is set, it means that the
application has to explicitly wake up the kernel Rx (for the bit in
the fill ring) or kernel Tx (for bit in the Tx ring) processing by
issuing a syscall. Poll() can wake up both depending on the flags
submitted and sendto() will wake up tx processing only.

The main reason for introducing this new flag is to be able to
efficiently support the case when application and driver is executing
on the same core. Previously, the driver was just busy-spinning on the
fill ring if it ran out of buffers in the HW and there were none on
the fill ring. This approach works when the application is running on
another core as it can replenish the fill ring while the driver is
busy-spinning. Though, this is a lousy approach if both of them are
running on the same core as the probability of the fill ring getting
more entries when the driver is busy-spinning is zero. With this new
feature the driver now sets the need_wakeup flag and returns to the
application. The application can then replenish the fill queue and
then explicitly wake up the Rx processing in the kernel using the
syscall poll(). For Tx, the flag is only set to one if the driver has
no outstanding Tx completion interrupts. If it has some, the flag is
zero as it will be woken up by a completion interrupt anyway.

As a nice side effect, this new flag also improves the performance of
the case where application and driver are running on two different
cores as it reduces the number of syscalls to the kernel. The kernel
tells user space if it needs to be woken up by a syscall, and this
eliminates many of the syscalls.

This flag needs some simple driver support. If the driver does not
support this, the Rx flag is always zero and the Tx flag is always
one. This makes any application relying on this feature default to the
old behaviour of not requiring any syscalls in the Rx path and always
having to call sendto() in the Tx path.

For backwards compatibility reasons, this feature has to be explicitly
turned on using a new bind flag (XDP_USE_NEED_WAKEUP). I recommend
that you always turn it on as it so far always have had a positive
performance impact.

The name and inspiration of the flag has been taken from io_uring by
Jens Axboe. Details about this feature in io_uring can be found in
http://kernel.dk/io_uring.pdf, section 8.3.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h      |  33 +++++++++-
 include/uapi/linux/if_xdp.h |  13 ++++
 net/xdp/xdp_umem.c          |   9 +++
 net/xdp/xsk.c               | 146 ++++++++++++++++++++++++++++++++++++++------
 net/xdp/xsk.h               |  13 ++++
 net/xdp/xsk_queue.h         |   1 +
 6 files changed, 195 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 69796d264f06..6aebea2b18cb 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -27,6 +27,9 @@ struct xdp_umem_fq_reuse {
 	u64 handles[];
 };
 
+/* Flags for the umem flags field. */
+#define XDP_UMEM_USES_NEED_WAKEUP (1 << 0)
+
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
@@ -41,10 +44,12 @@ struct xdp_umem {
 	struct work_struct work;
 	struct page **pgs;
 	u32 npgs;
+	u16 queue_id;
+	u8 need_wakeup;
+	u8 flags;
 	int id;
 	struct net_device *dev;
 	struct xdp_umem_fq_reuse *fq_reuse;
-	u16 queue_id;
 	bool zc;
 	spinlock_t xsk_list_lock;
 	struct list_head xsk_list;
@@ -95,6 +100,11 @@ struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
 					  struct xdp_umem_fq_reuse *newq);
 void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
 struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
 
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
@@ -241,6 +251,27 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
 {
 }
 
+static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+	return false;
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index faaa5ca2a117..62b80d57b72a 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -16,6 +16,15 @@
 #define XDP_SHARED_UMEM	(1 << 0)
 #define XDP_COPY	(1 << 1) /* Force copy-mode */
 #define XDP_ZEROCOPY	(1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
 
 struct sockaddr_xdp {
 	__u16 sxdp_family;
@@ -25,10 +34,14 @@ struct sockaddr_xdp {
 	__u32 sxdp_shared_umem_fd;
 };
 
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
 struct xdp_ring_offset {
 	__u64 producer;
 	__u64 consumer;
 	__u64 desc;
+	__u64 flags;
 };
 
 struct xdp_mmap_offsets {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 6e2d4da4cdcd..cda6feb1edb0 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -106,6 +106,15 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 	umem->dev = dev;
 	umem->queue_id = queue_id;
 
+	if (flags & XDP_USE_NEED_WAKEUP) {
+		umem->flags |= XDP_UMEM_USES_NEED_WAKEUP;
+		/* Tx needs to be explicitly woken up the first time.
+		 * Also for supporting drivers that do not implement this
+		 * feature. They will always have to call sendto().
+		 */
+		xsk_set_tx_need_wakeup(umem);
+	}
+
 	dev_hold(dev);
 
 	if (force_copy)
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 1fe40a936456..9f900b56b15b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -55,6 +55,66 @@ void xsk_umem_discard_addr(struct xdp_umem *umem)
 }
 EXPORT_SYMBOL(xsk_umem_discard_addr);
 
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+	if (umem->need_wakeup & XDP_WAKEUP_RX)
+		return;
+
+	umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP;
+	umem->need_wakeup |= XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_set_rx_need_wakeup);
+
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+	struct xdp_sock *xs;
+
+	if (umem->need_wakeup & XDP_WAKEUP_TX)
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+		xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
+	}
+	rcu_read_unlock();
+
+	umem->need_wakeup |= XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_set_tx_need_wakeup);
+
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+	if (!(umem->need_wakeup & XDP_WAKEUP_RX))
+		return;
+
+	umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+	umem->need_wakeup &= ~XDP_WAKEUP_RX;
+}
+EXPORT_SYMBOL(xsk_clear_rx_need_wakeup);
+
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+	struct xdp_sock *xs;
+
+	if (!(umem->need_wakeup & XDP_WAKEUP_TX))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+		xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
+	}
+	rcu_read_unlock();
+
+	umem->need_wakeup &= ~XDP_WAKEUP_TX;
+}
+EXPORT_SYMBOL(xsk_clear_tx_need_wakeup);
+
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+	return umem->flags & XDP_UMEM_USES_NEED_WAKEUP;
+}
+EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
+
 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
 	void *to_buf, *from_buf;
@@ -320,6 +380,12 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
 	unsigned int mask = datagram_poll(file, sock, wait);
 	struct sock *sk = sock->sk;
 	struct xdp_sock *xs = xdp_sk(sk);
+	struct net_device *dev = xs->dev;
+	struct xdp_umem *umem = xs->umem;
+
+	if (umem->need_wakeup)
+		dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+						umem->need_wakeup);
 
 	if (xs->rx && !xskq_empty_desc(xs->rx))
 		mask |= POLLIN | POLLRDNORM;
@@ -428,7 +494,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		return -EINVAL;
 
 	flags = sxdp->sxdp_flags;
-	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
+	if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY |
+		      XDP_USE_NEED_WAKEUP))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -455,7 +522,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		struct xdp_sock *umem_xs;
 		struct socket *sock;
 
-		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
+		if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) ||
+		    (flags & XDP_USE_NEED_WAKEUP)) {
 			/* Cannot specify flags for shared sockets. */
 			err = -EINVAL;
 			goto out_unlock;
@@ -550,6 +618,9 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		}
 		q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
 		err = xsk_init_queue(entries, q, false);
+		if (!err && optname == XDP_TX_RING)
+			/* Tx needs to be explicitly woken up the first time */
+			xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
 		mutex_unlock(&xs->mutex);
 		return err;
 	}
@@ -611,6 +682,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 	return -ENOPROTOOPT;
 }
 
+static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring)
+{
+	ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
+	ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
+	ring->desc = offsetof(struct xdp_rxtx_ring, desc);
+}
+
+static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring)
+{
+	ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer);
+	ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
+	ring->desc = offsetof(struct xdp_umem_ring, desc);
+}
+
 static int xsk_getsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, int __user *optlen)
 {
@@ -650,26 +735,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
 	case XDP_MMAP_OFFSETS:
 	{
 		struct xdp_mmap_offsets off;
+		struct xdp_mmap_offsets_v1 off_v1;
+		bool flags_supported = true;
+		void *to_copy;
 
-		if (len < sizeof(off))
+		if (len < sizeof(off_v1))
 			return -EINVAL;
+		else if (len < sizeof(off))
+			flags_supported = false;
+
+		if (flags_supported) {
+			/* xdp_ring_offset is identical to xdp_ring_offset_v1
+			 * except for the flags field added to the end.
+			 */
+			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+					       &off.rx);
+			xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *)
+					       &off.tx);
+			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+					       &off.fr);
+			xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *)
+					       &off.cr);
+			off.rx.flags = offsetof(struct xdp_rxtx_ring,
+						ptrs.flags);
+			off.tx.flags = offsetof(struct xdp_rxtx_ring,
+						ptrs.flags);
+			off.fr.flags = offsetof(struct xdp_umem_ring,
+						ptrs.flags);
+			off.cr.flags = offsetof(struct xdp_umem_ring,
+						ptrs.flags);
+
+			len = sizeof(off);
+			to_copy = &off;
+		} else {
+			xsk_enter_rxtx_offsets(&off_v1.rx);
+			xsk_enter_rxtx_offsets(&off_v1.tx);
+			xsk_enter_umem_offsets(&off_v1.fr);
+			xsk_enter_umem_offsets(&off_v1.cr);
+
+			len = sizeof(off_v1);
+			to_copy = &off_v1;
+		}
 
-		off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-		off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-		off.rx.desc	= offsetof(struct xdp_rxtx_ring, desc);
-		off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
-		off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
-		off.tx.desc	= offsetof(struct xdp_rxtx_ring, desc);
-
-		off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-		off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-		off.fr.desc	= offsetof(struct xdp_umem_ring, desc);
-		off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
-		off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
-		off.cr.desc	= offsetof(struct xdp_umem_ring, desc);
-
-		len = sizeof(off);
-		if (copy_to_user(optval, &off, len))
+		if (copy_to_user(optval, to_copy, len))
 			return -EFAULT;
 		if (put_user(len, optlen))
 			return -EFAULT;
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index ba8120610426..4cfd106bdb53 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,19 @@
 #ifndef XSK_H_
 #define XSK_H_
 
+struct xdp_ring_offset_v1 {
+	__u64 producer;
+	__u64 consumer;
+	__u64 desc;
+};
+
+struct xdp_mmap_offsets_v1 {
+	struct xdp_ring_offset_v1 rx;
+	struct xdp_ring_offset_v1 tx;
+	struct xdp_ring_offset_v1 fr;
+	struct xdp_ring_offset_v1 cr;
+};
+
 static inline struct xdp_sock *xdp_sk(struct sock *sk)
 {
 	return (struct xdp_sock *)sk;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 909c5168ed0f..dd9e985c2461 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -16,6 +16,7 @@
 struct xdp_ring {
 	u32 producer ____cacheline_aligned_in_smp;
 	u32 consumer ____cacheline_aligned_in_smp;
+	u32 flags;
 };
 
 /* Used for the RX and TX queues for packets */
-- 
cgit v1.2.3


From b0e4701ce15d0381cdea0643c7f0a35dc529cec2 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 14 Aug 2019 10:37:48 -0700
Subject: bpf: export bpf_map_inc_not_zero

Rename existing bpf_map_inc_not_zero to __bpf_map_inc_not_zero to
indicate that it's caller's responsibility to do proper locking.
Create and export bpf_map_inc_not_zero wrapper that properly
locks map_idr_lock. Will be used in the next commit to
hold a map while cloning a socket.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h  |  2 ++
 kernel/bpf/syscall.c | 16 +++++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f9a506147c8a..15ae49862b82 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -647,6 +647,8 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
 struct bpf_map *__bpf_map_get(struct fd f);
 struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
+struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map,
+						   bool uref);
 void bpf_map_put_with_uref(struct bpf_map *map);
 void bpf_map_put(struct bpf_map *map);
 int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5d141f16f6fa..cf8052b016e7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -683,8 +683,8 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 }
 
 /* map_idr_lock should have been held */
-static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
-					    bool uref)
+static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map,
+					      bool uref)
 {
 	int refold;
 
@@ -704,6 +704,16 @@ static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
 	return map;
 }
 
+struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref)
+{
+	spin_lock_bh(&map_idr_lock);
+	map = __bpf_map_inc_not_zero(map, uref);
+	spin_unlock_bh(&map_idr_lock);
+
+	return map;
+}
+EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero);
+
 int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
 {
 	return -ENOTSUPP;
@@ -2177,7 +2187,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
 	spin_lock_bh(&map_idr_lock);
 	map = idr_find(&map_idr, id);
 	if (map)
-		map = bpf_map_inc_not_zero(map, true);
+		map = __bpf_map_inc_not_zero(map, true);
 	else
 		map = ERR_PTR(-ENOENT);
 	spin_unlock_bh(&map_idr_lock);
-- 
cgit v1.2.3


From 8f51dfc73bf181f2304e1498f55d5f452e060cbe Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 14 Aug 2019 10:37:49 -0700
Subject: bpf: support cloning sk storage on accept()

Add new helper bpf_sk_storage_clone which optionally clones sk storage
and call it from sk_clone_lock.

Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/bpf_sk_storage.h |  10 +++++
 include/uapi/linux/bpf.h     |   3 ++
 net/core/bpf_sk_storage.c    | 104 +++++++++++++++++++++++++++++++++++++++++--
 net/core/sock.c              |   9 ++--
 4 files changed, 120 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
index b9dcb02e756b..8e4f831d2e52 100644
--- a/include/net/bpf_sk_storage.h
+++ b/include/net/bpf_sk_storage.h
@@ -10,4 +10,14 @@ void bpf_sk_storage_free(struct sock *sk);
 extern const struct bpf_func_proto bpf_sk_storage_get_proto;
 extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
 
+#ifdef CONFIG_BPF_SYSCALL
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
+#else
+static inline int bpf_sk_storage_clone(const struct sock *sk,
+				       struct sock *newsk)
+{
+	return 0;
+}
+#endif
+
 #endif /* _BPF_SK_STORAGE_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4393bd4b2419..0ef594ac3899 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -337,6 +337,9 @@ enum bpf_attach_type {
 #define BPF_F_RDONLY_PROG	(1U << 7)
 #define BPF_F_WRONLY_PROG	(1U << 8)
 
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE		(1U << 9)
+
 /* flags for BPF_PROG_QUERY */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 94c7f77ecb6b..da5639a5bd3b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -12,6 +12,9 @@
 
 static atomic_t cache_idx;
 
+#define SK_STORAGE_CREATE_FLAG_MASK					\
+	(BPF_F_NO_PREALLOC | BPF_F_CLONE)
+
 struct bucket {
 	struct hlist_head list;
 	raw_spinlock_t lock;
@@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem)
 		kfree_rcu(sk_storage, rcu);
 }
 
-/* sk_storage->lock must be held and sk_storage->list cannot be empty */
 static void __selem_link_sk(struct bpf_sk_storage *sk_storage,
 			    struct bpf_sk_storage_elem *selem)
 {
@@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map)
 	return 0;
 }
 
-/* Called by __sk_destruct() */
+/* Called by __sk_destruct() & bpf_sk_storage_clone() */
 void bpf_sk_storage_free(struct sock *sk)
 {
 	struct bpf_sk_storage_elem *selem;
@@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 	smap = (struct bpf_sk_storage_map *)map;
 
+	/* Note that this map might be concurrently cloned from
+	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
+	 * RCU read section to finish before proceeding. New RCU
+	 * read sections should be prevented via bpf_map_inc_not_zero.
+	 */
 	synchronize_rcu();
 
 	/* bpf prog and the userspace can no longer access this map
@@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 
 static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
 {
-	if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries ||
+	if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
+	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+	    attr->max_entries ||
 	    attr->key_size != sizeof(int) || !attr->value_size ||
 	    /* Enforce BTF for userspace sk dumping */
 	    !attr->btf_key_type_id || !attr->btf_value_type_id)
@@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
 	return err;
 }
 
+static struct bpf_sk_storage_elem *
+bpf_sk_storage_clone_elem(struct sock *newsk,
+			  struct bpf_sk_storage_map *smap,
+			  struct bpf_sk_storage_elem *selem)
+{
+	struct bpf_sk_storage_elem *copy_selem;
+
+	copy_selem = selem_alloc(smap, newsk, NULL, true);
+	if (!copy_selem)
+		return NULL;
+
+	if (map_value_has_spin_lock(&smap->map))
+		copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
+				      SDATA(selem)->data, true);
+	else
+		copy_map_value(&smap->map, SDATA(copy_selem)->data,
+			       SDATA(selem)->data);
+
+	return copy_selem;
+}
+
+int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
+{
+	struct bpf_sk_storage *new_sk_storage = NULL;
+	struct bpf_sk_storage *sk_storage;
+	struct bpf_sk_storage_elem *selem;
+	int ret = 0;
+
+	RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+
+	rcu_read_lock();
+	sk_storage = rcu_dereference(sk->sk_bpf_storage);
+
+	if (!sk_storage || hlist_empty(&sk_storage->list))
+		goto out;
+
+	hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+		struct bpf_sk_storage_elem *copy_selem;
+		struct bpf_sk_storage_map *smap;
+		struct bpf_map *map;
+
+		smap = rcu_dereference(SDATA(selem)->smap);
+		if (!(smap->map.map_flags & BPF_F_CLONE))
+			continue;
+
+		/* Note that for lockless listeners adding new element
+		 * here can race with cleanup in bpf_sk_storage_map_free.
+		 * Try to grab map refcnt to make sure that it's still
+		 * alive and prevent concurrent removal.
+		 */
+		map = bpf_map_inc_not_zero(&smap->map, false);
+		if (IS_ERR(map))
+			continue;
+
+		copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
+		if (!copy_selem) {
+			ret = -ENOMEM;
+			bpf_map_put(map);
+			goto out;
+		}
+
+		if (new_sk_storage) {
+			selem_link_map(smap, copy_selem);
+			__selem_link_sk(new_sk_storage, copy_selem);
+		} else {
+			ret = sk_storage_alloc(newsk, smap, copy_selem);
+			if (ret) {
+				kfree(copy_selem);
+				atomic_sub(smap->elem_size,
+					   &newsk->sk_omem_alloc);
+				bpf_map_put(map);
+				goto out;
+			}
+
+			new_sk_storage = rcu_dereference(copy_selem->sk_storage);
+		}
+		bpf_map_put(map);
+	}
+
+out:
+	rcu_read_unlock();
+
+	/* In case of an error, don't free anything explicitly here, the
+	 * caller is responsible to call bpf_sk_storage_free.
+	 */
+
+	return ret;
+}
+
 BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
 	   void *, value, u64, flags)
 {
diff --git a/net/core/sock.c b/net/core/sock.c
index d57b0cc995a0..f5e801a9cea4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 			goto out;
 		}
 		RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
-#ifdef CONFIG_BPF_SYSCALL
-		RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
-#endif
+
+		if (bpf_sk_storage_clone(sk, newsk)) {
+			sk_free_unlock_clone(newsk);
+			newsk = NULL;
+			goto out;
+		}
 
 		newsk->sk_err	   = 0;
 		newsk->sk_err_soft = 0;
-- 
cgit v1.2.3


From 0402acd683c678874df6bdbc23530ca07ea19353 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Thu, 15 Aug 2019 11:30:13 +0200
Subject: xsk: remove AF_XDP socket from map when the socket is released
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When an AF_XDP socket is released/closed the XSKMAP still holds a
reference to the socket in a "released" state. The socket will still
use the netdev queue resource, and block newly created sockets from
attaching to that queue, but no user application can access the
fill/complete/rx/tx queues. This results in that all applications need
to explicitly clear the map entry from the old "zombie state"
socket. This should be done automatically.

In this patch, the sockets tracks, and have a reference to, which maps
it resides in. When the socket is released, it will remove itself from
all maps.

Suggested-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h |  18 +++++++
 kernel/bpf/xskmap.c    | 125 +++++++++++++++++++++++++++++++++++++++++--------
 net/xdp/xsk.c          |  50 ++++++++++++++++++++
 3 files changed, 173 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 6aebea2b18cb..f023b9940d64 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -55,6 +55,16 @@ struct xdp_umem {
 	struct list_head xsk_list;
 };
 
+/* Nodes are linked in the struct xdp_sock map_list field, and used to
+ * track which maps a certain socket reside in.
+ */
+struct xsk_map;
+struct xsk_map_node {
+	struct list_head node;
+	struct xsk_map *map;
+	struct xdp_sock **map_entry;
+};
+
 struct xdp_sock {
 	/* struct sock must be the first member of struct xdp_sock */
 	struct sock sk;
@@ -80,6 +90,9 @@ struct xdp_sock {
 	/* Protects generic receive. */
 	spinlock_t rx_lock;
 	u64 rx_dropped;
+	struct list_head map_list;
+	/* Protects map_list */
+	spinlock_t map_list_lock;
 };
 
 struct xdp_buff;
@@ -106,6 +119,11 @@ void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
 void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
 bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
 
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+			     struct xdp_sock **map_entry);
+int xsk_map_inc(struct xsk_map *map);
+void xsk_map_put(struct xsk_map *map);
+
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
 	return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 9bb96ace9fa1..16031d489173 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -13,8 +13,71 @@ struct xsk_map {
 	struct bpf_map map;
 	struct xdp_sock **xsk_map;
 	struct list_head __percpu *flush_list;
+	spinlock_t lock; /* Synchronize map updates */
 };
 
+int xsk_map_inc(struct xsk_map *map)
+{
+	struct bpf_map *m = &map->map;
+
+	m = bpf_map_inc(m, false);
+	return IS_ERR(m) ? PTR_ERR(m) : 0;
+}
+
+void xsk_map_put(struct xsk_map *map)
+{
+	bpf_map_put(&map->map);
+}
+
+static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
+					       struct xdp_sock **map_entry)
+{
+	struct xsk_map_node *node;
+	int err;
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+	if (!node)
+		return NULL;
+
+	err = xsk_map_inc(map);
+	if (err) {
+		kfree(node);
+		return ERR_PTR(err);
+	}
+
+	node->map = map;
+	node->map_entry = map_entry;
+	return node;
+}
+
+static void xsk_map_node_free(struct xsk_map_node *node)
+{
+	xsk_map_put(node->map);
+	kfree(node);
+}
+
+static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
+{
+	spin_lock_bh(&xs->map_list_lock);
+	list_add_tail(&node->node, &xs->map_list);
+	spin_unlock_bh(&xs->map_list_lock);
+}
+
+static void xsk_map_sock_delete(struct xdp_sock *xs,
+				struct xdp_sock **map_entry)
+{
+	struct xsk_map_node *n, *tmp;
+
+	spin_lock_bh(&xs->map_list_lock);
+	list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
+		if (map_entry == n->map_entry) {
+			list_del(&n->node);
+			xsk_map_node_free(n);
+		}
+	}
+	spin_unlock_bh(&xs->map_list_lock);
+}
+
 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
 {
 	struct xsk_map *m;
@@ -34,6 +97,7 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
 		return ERR_PTR(-ENOMEM);
 
 	bpf_map_init_from_attr(&m->map, attr);
+	spin_lock_init(&m->lock);
 
 	cost = (u64)m->map.max_entries * sizeof(struct xdp_sock *);
 	cost += sizeof(struct list_head) * num_possible_cpus();
@@ -71,21 +135,9 @@ free_m:
 static void xsk_map_free(struct bpf_map *map)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	int i;
 
 	bpf_clear_redirect_map(map);
 	synchronize_net();
-
-	for (i = 0; i < map->max_entries; i++) {
-		struct xdp_sock *xs;
-
-		xs = m->xsk_map[i];
-		if (!xs)
-			continue;
-
-		sock_put((struct sock *)xs);
-	}
-
 	free_percpu(m->flush_list);
 	bpf_map_area_free(m->xsk_map);
 	kfree(m);
@@ -164,8 +216,9 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 			       u64 map_flags)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *xs, *old_xs, **map_entry;
 	u32 i = *(u32 *)key, fd = *(u32 *)value;
-	struct xdp_sock *xs, *old_xs;
+	struct xsk_map_node *node;
 	struct socket *sock;
 	int err;
 
@@ -192,32 +245,64 @@ static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
 		return -EOPNOTSUPP;
 	}
 
-	sock_hold(sock->sk);
+	map_entry = &m->xsk_map[i];
+	node = xsk_map_node_alloc(m, map_entry);
+	if (IS_ERR(node)) {
+		sockfd_put(sock);
+		return PTR_ERR(node);
+	}
 
-	old_xs = xchg(&m->xsk_map[i], xs);
+	spin_lock_bh(&m->lock);
+	old_xs = READ_ONCE(*map_entry);
+	if (old_xs == xs) {
+		err = 0;
+		goto out;
+	}
+	xsk_map_sock_add(xs, node);
+	WRITE_ONCE(*map_entry, xs);
 	if (old_xs)
-		sock_put((struct sock *)old_xs);
-
+		xsk_map_sock_delete(old_xs, map_entry);
+	spin_unlock_bh(&m->lock);
 	sockfd_put(sock);
 	return 0;
+
+out:
+	spin_unlock_bh(&m->lock);
+	sockfd_put(sock);
+	xsk_map_node_free(node);
+	return err;
 }
 
 static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 {
 	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *old_xs;
+	struct xdp_sock *old_xs, **map_entry;
 	int k = *(u32 *)key;
 
 	if (k >= map->max_entries)
 		return -EINVAL;
 
-	old_xs = xchg(&m->xsk_map[k], NULL);
+	spin_lock_bh(&m->lock);
+	map_entry = &m->xsk_map[k];
+	old_xs = xchg(map_entry, NULL);
 	if (old_xs)
-		sock_put((struct sock *)old_xs);
+		xsk_map_sock_delete(old_xs, map_entry);
+	spin_unlock_bh(&m->lock);
 
 	return 0;
 }
 
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+			     struct xdp_sock **map_entry)
+{
+	spin_lock_bh(&map->lock);
+	if (READ_ONCE(*map_entry) == xs) {
+		WRITE_ONCE(*map_entry, NULL);
+		xsk_map_sock_delete(xs, map_entry);
+	}
+	spin_unlock_bh(&map->lock);
+}
+
 const struct bpf_map_ops xsk_map_ops = {
 	.map_alloc = xsk_map_alloc,
 	.map_free = xsk_map_free,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f900b56b15b..ee4428a892fa 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -429,6 +429,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs)
 	dev_put(dev);
 }
 
+static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs,
+					      struct xdp_sock ***map_entry)
+{
+	struct xsk_map *map = NULL;
+	struct xsk_map_node *node;
+
+	*map_entry = NULL;
+
+	spin_lock_bh(&xs->map_list_lock);
+	node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node,
+					node);
+	if (node) {
+		WARN_ON(xsk_map_inc(node->map));
+		map = node->map;
+		*map_entry = node->map_entry;
+	}
+	spin_unlock_bh(&xs->map_list_lock);
+	return map;
+}
+
+static void xsk_delete_from_maps(struct xdp_sock *xs)
+{
+	/* This function removes the current XDP socket from all the
+	 * maps it resides in. We need to take extra care here, due to
+	 * the two locks involved. Each map has a lock synchronizing
+	 * updates to the entries, and each socket has a lock that
+	 * synchronizes access to the list of maps (map_list). For
+	 * deadlock avoidance the locks need to be taken in the order
+	 * "map lock"->"socket map list lock". We start off by
+	 * accessing the socket map list, and take a reference to the
+	 * map to guarantee existence between the
+	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete()
+	 * calls. Then we ask the map to remove the socket, which
+	 * tries to remove the socket from the map. Note that there
+	 * might be updates to the map between
+	 * xsk_get_map_list_entry() and xsk_map_try_sock_delete().
+	 */
+	struct xdp_sock **map_entry = NULL;
+	struct xsk_map *map;
+
+	while ((map = xsk_get_map_list_entry(xs, &map_entry))) {
+		xsk_map_try_sock_delete(map, xs, map_entry);
+		xsk_map_put(map);
+	}
+}
+
 static int xsk_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
@@ -448,6 +494,7 @@ static int xsk_release(struct socket *sock)
 	sock_prot_inuse_add(net, sk->sk_prot, -1);
 	local_bh_enable();
 
+	xsk_delete_from_maps(xs);
 	xsk_unbind_dev(xs);
 
 	xskq_destroy(xs->rx);
@@ -964,6 +1011,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
 	spin_lock_init(&xs->rx_lock);
 	spin_lock_init(&xs->tx_completion_lock);
 
+	INIT_LIST_HEAD(&xs->map_list);
+	spin_lock_init(&xs->map_list_lock);
+
 	mutex_lock(&net->xdp.lock);
 	sk_add_node_rcu(sk, &net->xdp.list);
 	mutex_unlock(&net->xdp.lock);
-- 
cgit v1.2.3


From 24cab82c34aa6f3ede3de1d8621624cb5ef33feb Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 18 Mar 2019 10:13:01 +0000
Subject: KVM: arm/arm64: vgic: Add LPI translation cache definition

Add the basic data structure that expresses an MSI to LPI
translation as well as the allocation/release hooks.

The size of the cache is arbitrarily defined as 16*nr_vcpus.

Tested-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/kvm/arm_vgic.h        |  3 +++
 virt/kvm/arm/vgic/vgic-init.c |  5 +++++
 virt/kvm/arm/vgic/vgic-its.c  | 49 +++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic/vgic.h      |  2 ++
 4 files changed, 59 insertions(+)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7a30524a80ee..ded50a30e2d5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -249,6 +249,9 @@ struct vgic_dist {
 	struct list_head	lpi_list_head;
 	int			lpi_list_count;
 
+	/* LPI translation cache */
+	struct list_head	lpi_translation_cache;
+
 	/* used by vgic-debug */
 	struct vgic_state_iter *iter;
 
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index bdbc297d06fb..80127ca9269f 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -53,6 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
 	struct vgic_dist *dist = &kvm->arch.vgic;
 
 	INIT_LIST_HEAD(&dist->lpi_list_head);
+	INIT_LIST_HEAD(&dist->lpi_translation_cache);
 	raw_spin_lock_init(&dist->lpi_list_lock);
 }
 
@@ -294,6 +295,7 @@ int vgic_init(struct kvm *kvm)
 	}
 
 	if (vgic_has_its(kvm)) {
+		vgic_lpi_translation_cache_init(kvm);
 		ret = vgic_v4_init(kvm);
 		if (ret)
 			goto out;
@@ -335,6 +337,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
 		INIT_LIST_HEAD(&dist->rd_regions);
 	}
 
+	if (vgic_has_its(kvm))
+		vgic_lpi_translation_cache_destroy(kvm);
+
 	if (vgic_supports_direct_msis(kvm))
 		vgic_v4_teardown(kvm);
 }
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 482036612adf..0e5c1519bbe2 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -138,6 +138,14 @@ struct its_ite {
 	u32 event_id;
 };
 
+struct vgic_translation_cache_entry {
+	struct list_head	entry;
+	phys_addr_t		db;
+	u32			devid;
+	u32			eventid;
+	struct vgic_irq		*irq;
+};
+
 /**
  * struct vgic_its_abi - ITS abi ops and settings
  * @cte_esz: collection table entry size
@@ -1657,6 +1665,45 @@ out:
 	return ret;
 }
 
+/* Default is 16 cached LPIs per vcpu */
+#define LPI_DEFAULT_PCPU_CACHE_SIZE	16
+
+void vgic_lpi_translation_cache_init(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	unsigned int sz;
+	int i;
+
+	if (!list_empty(&dist->lpi_translation_cache))
+		return;
+
+	sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
+
+	for (i = 0; i < sz; i++) {
+		struct vgic_translation_cache_entry *cte;
+
+		/* An allocation failure is not fatal */
+		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
+		if (WARN_ON(!cte))
+			break;
+
+		INIT_LIST_HEAD(&cte->entry);
+		list_add(&cte->entry, &dist->lpi_translation_cache);
+	}
+}
+
+void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_translation_cache_entry *cte, *tmp;
+
+	list_for_each_entry_safe(cte, tmp,
+				 &dist->lpi_translation_cache, entry) {
+		list_del(&cte->entry);
+		kfree(cte);
+	}
+}
+
 #define INITIAL_BASER_VALUE						  \
 	(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)		| \
 	 GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner)		| \
@@ -1685,6 +1732,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 			kfree(its);
 			return ret;
 		}
+
+		vgic_lpi_translation_cache_init(dev->kvm);
 	}
 
 	mutex_init(&its->its_lock);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 797e05004d80..a25b790ffa4e 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -307,6 +307,8 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 			 u32 devid, u32 eventid, struct vgic_irq **irq);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
+void vgic_lpi_translation_cache_init(struct kvm *kvm);
+void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
 
 bool vgic_supports_direct_msis(struct kvm *kvm);
 int vgic_v4_init(struct kvm *kvm);
-- 
cgit v1.2.3


From ef01adae0e43cfb2468d0ea07137cc63cf31495c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 16 Aug 2019 03:24:09 +0200
Subject: net: sched: use major priority number as hardware priority

tc transparently maps the software priority number to hardware. Update
it to pass the major priority which is what most drivers expect. Update
drivers too so they do not need to lshift the priority field of the
flow_cls_common_offload object. The stmmac driver is an exception, since
this code assumes the tc software priority is fine, therefore, lshift it
just to be conservative.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c   |  2 +-
 drivers/net/ethernet/mscc/ocelot_flower.c            | 12 +++---------
 drivers/net/ethernet/netronome/nfp/flower/qos_conf.c |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c      |  2 +-
 include/net/pkt_cls.h                                |  2 +-
 6 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index deeb65da99f3..00b2d4a86159 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3167,7 +3167,7 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
 
 	esw_attr->parse_attr = parse_attr;
 	esw_attr->chain = f->common.chain_index;
-	esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+	esw_attr->prio = f->common.prio;
 
 	esw_attr->in_rep = in_rep;
 	esw_attr->in_mdev = in_mdev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index e8ac90564dbe..84a87d059333 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -471,7 +471,7 @@ int mlxsw_sp_acl_rulei_commit(struct mlxsw_sp_acl_rule_info *rulei)
 void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei,
 				 unsigned int priority)
 {
-	rulei->priority = priority >> 16;
+	rulei->priority = priority;
 }
 
 void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei,
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 59487d446a09..b894bc0c9c16 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -13,12 +13,6 @@ struct ocelot_port_block {
 	struct ocelot_port *port;
 };
 
-static u16 get_prio(u32 prio)
-{
-	/* prio starts from 0x1000 while the ids starts from 0 */
-	return prio >> 16;
-}
-
 static int ocelot_flower_parse_action(struct flow_cls_offload *f,
 				      struct ocelot_ace_rule *rule)
 {
@@ -168,7 +162,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 	}
 
 finished_key_parsing:
-	ocelot_rule->prio = get_prio(f->common.prio);
+	ocelot_rule->prio = f->common.prio;
 	ocelot_rule->id = f->cookie;
 	return ocelot_flower_parse_action(f, ocelot_rule);
 }
@@ -218,7 +212,7 @@ static int ocelot_flower_destroy(struct flow_cls_offload *f,
 	struct ocelot_ace_rule rule;
 	int ret;
 
-	rule.prio = get_prio(f->common.prio);
+	rule.prio = f->common.prio;
 	rule.port = port_block->port;
 	rule.id = f->cookie;
 
@@ -236,7 +230,7 @@ static int ocelot_flower_stats_update(struct flow_cls_offload *f,
 	struct ocelot_ace_rule rule;
 	int ret;
 
-	rule.prio = get_prio(f->common.prio);
+	rule.prio = f->common.prio;
 	rule.port = port_block->port;
 	rule.id = f->cookie;
 	ret = ocelot_ace_rule_stats_update(&rule);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
index 86e968cd5ffd..124a43dc136a 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c
@@ -93,7 +93,7 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev,
 		return -EOPNOTSUPP;
 	}
 
-	if (flow->common.prio != (1 << 16)) {
+	if (flow->common.prio != 1) {
 		NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload requires highest priority");
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 37c0bc699cd9..6c305b6ecad0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -94,7 +94,7 @@ static int tc_fill_entry(struct stmmac_priv *priv,
 	struct stmmac_tc_entry *entry, *frag = NULL;
 	struct tc_u32_sel *sel = cls->knode.sel;
 	u32 off, data, mask, real_off, rem;
-	u32 prio = cls->common.prio;
+	u32 prio = cls->common.prio << 16;
 	int ret;
 
 	/* Only 1 match per entry */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e429809ca90d..98be18ef1ed3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -646,7 +646,7 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
 {
 	cls_common->chain_index = tp->chain->index;
 	cls_common->protocol = tp->protocol;
-	cls_common->prio = tp->prio;
+	cls_common->prio = tp->prio >> 16;
 	if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE)
 		cls_common->extack = extack;
 }
-- 
cgit v1.2.3


From 3bc158f8d0330f0ac58597c023acca2234c14616 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 16 Aug 2019 03:24:10 +0200
Subject: netfilter: nf_tables: map basechain priority to hardware priority

This patch adds initial support for offloading basechains using the
priority range from 1 to 65535. This is restricting the netfilter
priority range to 16-bit integer since this is what most drivers assume
so far from tc. It should be possible to extend this range of supported
priorities later on once drivers are updated to support for 32-bit
integer priorities.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_tables_offload.h |  2 ++
 net/netfilter/nf_tables_api.c             |  4 ++++
 net/netfilter/nf_tables_offload.c         | 17 ++++++++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 3196663a10e3..c8b9dec376f5 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -73,4 +73,6 @@ int nft_flow_rule_offload_commit(struct net *net);
 	(__reg)->key		= __key;				\
 	memset(&(__reg)->mask, 0xff, (__reg)->len);
 
+int nft_chain_offload_priority(struct nft_base_chain *basechain);
+
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 88abbddf8967..d47469f824a1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1667,6 +1667,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 
 		chain->flags |= NFT_BASE_CHAIN | flags;
 		basechain->policy = NF_ACCEPT;
+		if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
+		    nft_chain_offload_priority(basechain) < 0)
+			return -EOPNOTSUPP;
+
 		flow_block_init(&basechain->flow_block);
 	} else {
 		chain = kzalloc(sizeof(*chain), GFP_KERNEL);
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64f5fd5f240e..c0d18c1d77ac 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -103,10 +103,11 @@ void nft_offload_update_dependency(struct nft_offload_ctx *ctx,
 }
 
 static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
-					 __be16 proto,
-					struct netlink_ext_ack *extack)
+					 __be16 proto, int priority,
+					 struct netlink_ext_ack *extack)
 {
 	common->protocol = proto;
+	common->prio = priority;
 	common->extack = extack;
 }
 
@@ -124,6 +125,15 @@ static int nft_setup_cb_call(struct nft_base_chain *basechain,
 	return 0;
 }
 
+int nft_chain_offload_priority(struct nft_base_chain *basechain)
+{
+	if (basechain->ops.priority <= 0 ||
+	    basechain->ops.priority > USHRT_MAX)
+		return -1;
+
+	return 0;
+}
+
 static int nft_flow_offload_rule(struct nft_trans *trans,
 				 enum flow_cls_command command)
 {
@@ -142,7 +152,8 @@ static int nft_flow_offload_rule(struct nft_trans *trans,
 	if (flow)
 		proto = flow->proto;
 
-	nft_flow_offload_common_init(&cls_flow.common, proto, &extack);
+	nft_flow_offload_common_init(&cls_flow.common, proto,
+				     basechain->ops.priority, &extack);
 	cls_flow.command = command;
 	cls_flow.cookie = (unsigned long) rule;
 	if (flow)
-- 
cgit v1.2.3


From 89a26cd4b501e9511d3cd3d22327fc76a75a38b3 Mon Sep 17 00:00:00 2001
From: Juliana Rodrigueiro <juliana.rodrigueiro@intra2net.com>
Date: Fri, 16 Aug 2019 17:02:22 +0200
Subject: netfilter: xt_nfacct: Fix alignment mismatch in xt_nfacct_match_info

When running a 64-bit kernel with a 32-bit iptables binary, the size of
the xt_nfacct_match_info struct diverges.

    kernel: sizeof(struct xt_nfacct_match_info) : 40
    iptables: sizeof(struct xt_nfacct_match_info)) : 36

Trying to append nfacct related rules results in an unhelpful message.
Although it is suggested to look for more information in dmesg, nothing
can be found there.

    # iptables -A <chain> -m nfacct --nfacct-name <acct-object>
    iptables: Invalid argument. Run `dmesg' for more information.

This patch fixes the memory misalignment by enforcing 8-byte alignment
within the struct's first revision. This solution is often used in many
other uapi netfilter headers.

Signed-off-by: Juliana Rodrigueiro <juliana.rodrigueiro@intra2net.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_nfacct.h |  5 +++++
 net/netfilter/xt_nfacct.c                | 36 ++++++++++++++++++++++----------
 2 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h
index 5c8a4d760ee3..b5123ab8d54a 100644
--- a/include/uapi/linux/netfilter/xt_nfacct.h
+++ b/include/uapi/linux/netfilter/xt_nfacct.h
@@ -11,4 +11,9 @@ struct xt_nfacct_match_info {
 	struct nf_acct	*nfacct;
 };
 
+struct xt_nfacct_match_info_v1 {
+	char		name[NFACCT_NAME_MAX];
+	struct nf_acct	*nfacct __attribute__((aligned(8)));
+};
+
 #endif /* _XT_NFACCT_MATCH_H */
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index d0ab1adf5bff..5aab6df74e0f 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -54,25 +54,39 @@ nfacct_mt_destroy(const struct xt_mtdtor_param *par)
 	nfnl_acct_put(info->nfacct);
 }
 
-static struct xt_match nfacct_mt_reg __read_mostly = {
-	.name       = "nfacct",
-	.family     = NFPROTO_UNSPEC,
-	.checkentry = nfacct_mt_checkentry,
-	.match      = nfacct_mt,
-	.destroy    = nfacct_mt_destroy,
-	.matchsize  = sizeof(struct xt_nfacct_match_info),
-	.usersize   = offsetof(struct xt_nfacct_match_info, nfacct),
-	.me         = THIS_MODULE,
+static struct xt_match nfacct_mt_reg[] __read_mostly = {
+	{
+		.name       = "nfacct",
+		.revision   = 0,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info),
+		.usersize   = offsetof(struct xt_nfacct_match_info, nfacct),
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "nfacct",
+		.revision   = 1,
+		.family     = NFPROTO_UNSPEC,
+		.checkentry = nfacct_mt_checkentry,
+		.match      = nfacct_mt,
+		.destroy    = nfacct_mt_destroy,
+		.matchsize  = sizeof(struct xt_nfacct_match_info_v1),
+		.usersize   = offsetof(struct xt_nfacct_match_info_v1, nfacct),
+		.me         = THIS_MODULE,
+	},
 };
 
 static int __init nfacct_mt_init(void)
 {
-	return xt_register_match(&nfacct_mt_reg);
+	return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 static void __exit nfacct_mt_exit(void)
 {
-	xt_unregister_match(&nfacct_mt_reg);
+	xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg));
 }
 
 module_init(nfacct_mt_init);
-- 
cgit v1.2.3


From 33da8e7c814f77310250bb54a9db36a44c5de784 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 16 Aug 2019 12:33:54 -0500
Subject: signal: Allow cifs and drbd to receive their terminating signals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

My recent to change to only use force_sig for a synchronous events
wound up breaking signal reception cifs and drbd.  I had overlooked
the fact that by default kthreads start out with all signals set to
SIG_IGN.  So a change I thought was safe turned out to have made it
impossible for those kernel thread to catch their signals.

Reverting the work on force_sig is a bad idea because what the code
was doing was very much a misuse of force_sig.  As the way force_sig
ultimately allowed the signal to happen was to change the signal
handler to SIG_DFL.  Which after the first signal will allow userspace
to send signals to these kernel threads.  At least for
wake_ack_receiver in drbd that does not appear actively wrong.

So correct this problem by adding allow_kernel_signal that will allow
signals whose siginfo reports they were sent by the kernel through,
but will not allow userspace generated signals, and update cifs and
drbd to call allow_kernel_signal in an appropriate place so that their
thread can receive this signal.

Fixing things this way ensures that userspace won't be able to send
signals and cause problems, that it is clear which signals the
threads are expecting to receive, and it guarantees that nothing
else in the system will be affected.

This change was partly inspired by similar cifs and drbd patches that
added allow_signal.

Reported-by: ronnie sahlberg <ronniesahlberg@gmail.com>
Reported-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Tested-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Cc: Steve French <smfrench@gmail.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Fixes: 247bc9470b1e ("cifs: fix rmmod regression in cifs.ko caused by force_sig changes")
Fixes: 72abe3bcf091 ("signal/cifs: Fix cifs_put_tcp_session to call send_sig instead of force_sig")
Fixes: fee109901f39 ("signal/drbd: Use send_sig not force_sig")
Fixes: 3cf5d076fb4d ("signal: Remove task parameter from force_sig")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 drivers/block/drbd/drbd_main.c |  2 ++
 fs/cifs/connect.c              |  2 +-
 include/linux/signal.h         | 15 ++++++++++++++-
 kernel/signal.c                |  5 +++++
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9bd4ddd12b25..5b248763a672 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -322,6 +322,8 @@ static int drbd_thread_setup(void *arg)
 		 thi->name[0],
 		 resource->name);
 
+	allow_kernel_signal(DRBD_SIGKILL);
+	allow_kernel_signal(SIGXCPU);
 restart:
 	retval = thi->function(thi);
 
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a15a6e738eb5..1795e80cbdf7 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,7 +1113,7 @@ cifs_demultiplex_thread(void *p)
 		mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
 
 	set_freezable();
-	allow_signal(SIGKILL);
+	allow_kernel_signal(SIGKILL);
 	while (server->tcpStatus != CifsExiting) {
 		if (try_to_freeze())
 			continue;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index b5d99482d3fe..1a5f88316b08 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -282,6 +282,9 @@ extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 extern void kernel_sigaction(int, __sighandler_t);
 
+#define SIG_KTHREAD ((__force __sighandler_t)2)
+#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3)
+
 static inline void allow_signal(int sig)
 {
 	/*
@@ -289,7 +292,17 @@ static inline void allow_signal(int sig)
 	 * know it'll be handled, so that they don't get converted to
 	 * SIGKILL or just silently dropped.
 	 */
-	kernel_sigaction(sig, (__force __sighandler_t)2);
+	kernel_sigaction(sig, SIG_KTHREAD);
+}
+
+static inline void allow_kernel_signal(int sig)
+{
+	/*
+	 * Kernel threads handle their own signals. Let the signal code
+	 * know signals sent by the kernel will be handled, so that they
+	 * don't get silently dropped.
+	 */
+	kernel_sigaction(sig, SIG_KTHREAD_KERNEL);
 }
 
 static inline void disallow_signal(int sig)
diff --git a/kernel/signal.c b/kernel/signal.c
index e667be6907d7..534fec266a33 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -90,6 +90,11 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
 	    handler == SIG_DFL && !(force && sig_kernel_only(sig)))
 		return true;
 
+	/* Only allow kernel generated signals to this kthread */
+	if (unlikely((t->flags & PF_KTHREAD) &&
+		     (handler == SIG_KTHREAD_KERNEL) && !force))
+		return true;
+
 	return sig_handler_ignored(handler, sig);
 }
 
-- 
cgit v1.2.3


From be378b600791044cdc9820fe0ae13efa9e5499aa Mon Sep 17 00:00:00 2001
From: Leonard Crestez <leonard.crestez@nxp.com>
Date: Tue, 13 Aug 2019 20:05:31 +0300
Subject: clk: imx8mn: Add GIC clock

This is enabled by default but if it's not explicitly defined and marked
as critical then its parent might get turned off.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/clk/imx/clk-imx8mn.c             | 5 +++++
 include/dt-bindings/clock/imx8mn-clock.h | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
index 106cc417c19b..3a71bb37c656 100644
--- a/drivers/clk/imx/clk-imx8mn.c
+++ b/drivers/clk/imx/clk-imx8mn.c
@@ -271,6 +271,10 @@ static const char * const imx8mn_usb_phy_sels[] = {"osc_24m", "sys_pll1_100m", "
 						   "sys_pll2_100m", "sys_pll2_200m", "clk_ext2",
 						   "clk_ext3", "audio_pll2_out", };
 
+static const char * const imx8mn_gic_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_40m",
+					"sys_pll2_100m", "sys_pll1_800m", "clk_ext2",
+					"clk_ext4", "audio_pll2_out" };
+
 static const char * const imx8mn_ecspi1_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_40m",
 						  "sys_pll1_160m", "sys_pll1_800m", "sys_pll3_out",
 						  "sys_pll2_250m", "audio_pll2_out", };
@@ -524,6 +528,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
 	clks[IMX8MN_CLK_UART4] = imx8m_clk_composite("uart4", imx8mn_uart4_sels, base + 0xb080);
 	clks[IMX8MN_CLK_USB_CORE_REF] = imx8m_clk_composite("usb_core_ref", imx8mn_usb_core_sels, base + 0xb100);
 	clks[IMX8MN_CLK_USB_PHY_REF] = imx8m_clk_composite("usb_phy_ref", imx8mn_usb_phy_sels, base + 0xb180);
+	clks[IMX8MN_CLK_GIC] = imx8m_clk_composite_critical("gic", imx8mn_gic_sels, base + 0xb200);
 	clks[IMX8MN_CLK_ECSPI1] = imx8m_clk_composite("ecspi1", imx8mn_ecspi1_sels, base + 0xb280);
 	clks[IMX8MN_CLK_ECSPI2] = imx8m_clk_composite("ecspi2", imx8mn_ecspi2_sels, base + 0xb300);
 	clks[IMX8MN_CLK_PWM1] = imx8m_clk_composite("pwm1", imx8mn_pwm1_sels, base + 0xb380);
diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h
index 5255b1c2420e..d7b201652f4c 100644
--- a/include/dt-bindings/clock/imx8mn-clock.h
+++ b/include/dt-bindings/clock/imx8mn-clock.h
@@ -209,7 +209,8 @@
 #define IMX8MN_CLK_ARM				191
 #define IMX8MN_CLK_NAND_USDHC_BUS_RAWNAND_CLK	192
 #define IMX8MN_CLK_GPU_CORE_ROOT		193
+#define IMX8MN_CLK_GIC				194
 
-#define IMX8MN_CLK_END				194
+#define IMX8MN_CLK_END				195
 
 #endif
-- 
cgit v1.2.3


From 38a429c898ddd210cc35463b096389f97c3c5a73 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 19 Aug 2019 16:39:27 +0900
Subject: netfilter: add include guard to nf_conntrack_h323_types.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_h323_types.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_h323_types.h b/include/linux/netfilter/nf_conntrack_h323_types.h
index 7a6871ac8784..74c6f9241944 100644
--- a/include/linux/netfilter/nf_conntrack_h323_types.h
+++ b/include/linux/netfilter/nf_conntrack_h323_types.h
@@ -4,6 +4,9 @@
  * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
  */
 
+#ifndef _NF_CONNTRACK_H323_TYPES_H
+#define _NF_CONNTRACK_H323_TYPES_H
+
 typedef struct TransportAddress_ipAddress {	/* SEQUENCE */
 	int options;		/* No use */
 	unsigned int ip;
@@ -931,3 +934,5 @@ typedef struct RasMessage {	/* CHOICE */
 		InfoRequestResponse infoRequestResponse;
 	};
 } RasMessage;
+
+#endif /* _NF_CONNTRACK_H323_TYPES_H */
-- 
cgit v1.2.3


From b6a32bbd8735def2d0d696ba59205d1874b7800f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 16 Aug 2019 18:09:23 +0200
Subject: genirq: Force interrupt threading on RT

Switch force_irqthreads from a boot time modifiable variable to a compile
time constant when CONFIG_PREEMPT_RT is enabled.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190816160923.12855-1-bigeasy@linutronix.de
---
 include/linux/interrupt.h | 4 ++++
 kernel/irq/manage.c       | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5b8328a99b2a..07b527dca996 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -472,7 +472,11 @@ extern int irq_set_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
 				 bool state);
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
+# ifdef CONFIG_PREEMPT_RT
+#  define force_irqthreads	(true)
+# else
 extern bool force_irqthreads;
+# endif
 #else
 #define force_irqthreads	(0)
 #endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index e8f7f179bf77..97de1b7d43af 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -23,7 +23,7 @@
 
 #include "internals.h"
 
-#ifdef CONFIG_IRQ_FORCED_THREADING
+#if defined(CONFIG_IRQ_FORCED_THREADING) && !defined(CONFIG_PREEMPT_RT)
 __read_mostly bool force_irqthreads;
 EXPORT_SYMBOL_GPL(force_irqthreads);
 
-- 
cgit v1.2.3


From 988721db93b2f5e6477cb0ea0b64ba9bcfd67778 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Fri, 16 Aug 2019 14:12:33 -0700
Subject: block: remove struct request_queue queue_head

The dispatch list is not used any more, as the legacy block IO stack
has been removed.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 1 -
 include/linux/blkdev.h | 4 ----
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 834aea04718f..5d0d7441a443 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -482,7 +482,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (!q)
 		return NULL;
 
-	INIT_LIST_HEAD(&q->queue_head);
 	q->last_merge = NULL;
 
 	q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 167bf879f072..4798bb25f1ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,10 +391,6 @@ static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 struct request_queue {
-	/*
-	 * Together with queue_head for cacheline sharing
-	 */
-	struct list_head	queue_head;
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
 
-- 
cgit v1.2.3


From d6dfe43ec6062beea5ba1172b957e74a13c95b86 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 16 Aug 2019 17:48:36 -0400
Subject: svcrdma: Remove svc_rdma_wq

Clean up: the system workqueue will work just as well.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          | 1 -
 net/sunrpc/xprtrdma/svc_rdma.c           | 7 -------
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 3 ++-
 3 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 981f0d726ad4..edb39900fe04 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -200,7 +200,6 @@ extern struct svc_xprt_class svc_rdma_bc_class;
 #endif
 
 /* svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
 extern int svc_rdma_init(void);
 extern void svc_rdma_cleanup(void);
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index abdb3004a1e3..97bca509a391 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -73,8 +73,6 @@ atomic_t rdma_stat_rq_prod;
 atomic_t rdma_stat_sq_poll;
 atomic_t rdma_stat_sq_prod;
 
-struct workqueue_struct *svc_rdma_wq;
-
 /*
  * This function implements reading and resetting an atomic_t stat
  * variable through read/write to a proc file. Any write to the file
@@ -230,7 +228,6 @@ static struct ctl_table svcrdma_root_table[] = {
 void svc_rdma_cleanup(void)
 {
 	dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
-	destroy_workqueue(svc_rdma_wq);
 	if (svcrdma_table_header) {
 		unregister_sysctl_table(svcrdma_table_header);
 		svcrdma_table_header = NULL;
@@ -246,10 +243,6 @@ int svc_rdma_init(void)
 	dprintk("\tmax_bc_requests  : %u\n", svcrdma_max_bc_requests);
 	dprintk("\tmax_inline       : %d\n", svcrdma_max_req_size);
 
-	svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
-	if (!svc_rdma_wq)
-		return -ENOMEM;
-
 	if (!svcrdma_table_header)
 		svcrdma_table_header =
 			register_sysctl_table(svcrdma_root_table);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 3fe665152d95..18d6eb3686e7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -630,8 +630,9 @@ static void svc_rdma_free(struct svc_xprt *xprt)
 {
 	struct svcxprt_rdma *rdma =
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
+
 	INIT_WORK(&rdma->sc_work, __svc_rdma_free);
-	queue_work(svc_rdma_wq, &rdma->sc_work);
+	schedule_work(&rdma->sc_work);
 }
 
 static int svc_rdma_has_wspace(struct svc_xprt *xprt)
-- 
cgit v1.2.3


From 4866073e6ddf03066c925d3237903d7f4ca68982 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 16 Aug 2019 17:49:38 -0400
Subject: svcrdma: Use llist for managing cache of recv_ctxts

Use a wait-free mechanism for managing the svc_rdma_recv_ctxts free
list. Subsequently, sc_recv_lock can be eliminated.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h          |  5 +++--
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 24 ++++++++++--------------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  3 +--
 3 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index edb39900fe04..40f65888dd38 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -42,6 +42,7 @@
 
 #ifndef SVC_RDMA_H
 #define SVC_RDMA_H
+#include <linux/llist.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/rpc_rdma.h>
@@ -107,8 +108,7 @@ struct svcxprt_rdma {
 	struct list_head     sc_read_complete_q;
 	struct work_struct   sc_work;
 
-	spinlock_t	     sc_recv_lock;
-	struct list_head     sc_recv_ctxts;
+	struct llist_head    sc_recv_ctxts;
 };
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING	3
@@ -125,6 +125,7 @@ enum {
 #define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
 
 struct svc_rdma_recv_ctxt {
+	struct llist_node	rc_node;
 	struct list_head	rc_list;
 	struct ib_recv_wr	rc_recv_wr;
 	struct ib_cqe		rc_cqe;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 65e2fb9aac65..96bccd398469 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -172,9 +172,10 @@ static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
 void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
 {
 	struct svc_rdma_recv_ctxt *ctxt;
+	struct llist_node *node;
 
-	while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts))) {
-		list_del(&ctxt->rc_list);
+	while ((node = llist_del_first(&rdma->sc_recv_ctxts))) {
+		ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 	}
 }
@@ -183,21 +184,18 @@ static struct svc_rdma_recv_ctxt *
 svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
 {
 	struct svc_rdma_recv_ctxt *ctxt;
+	struct llist_node *node;
 
-	spin_lock(&rdma->sc_recv_lock);
-	ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_recv_ctxts);
-	if (!ctxt)
+	node = llist_del_first(&rdma->sc_recv_ctxts);
+	if (!node)
 		goto out_empty;
-	list_del(&ctxt->rc_list);
-	spin_unlock(&rdma->sc_recv_lock);
+	ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
 
 out:
 	ctxt->rc_page_count = 0;
 	return ctxt;
 
 out_empty:
-	spin_unlock(&rdma->sc_recv_lock);
-
 	ctxt = svc_rdma_recv_ctxt_alloc(rdma);
 	if (!ctxt)
 		return NULL;
@@ -218,11 +216,9 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 	for (i = 0; i < ctxt->rc_page_count; i++)
 		put_page(ctxt->rc_pages[i]);
 
-	if (!ctxt->rc_temp) {
-		spin_lock(&rdma->sc_recv_lock);
-		list_add(&ctxt->rc_list, &rdma->sc_recv_ctxts);
-		spin_unlock(&rdma->sc_recv_lock);
-	} else
+	if (!ctxt->rc_temp)
+		llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+	else
 		svc_rdma_recv_ctxt_destroy(rdma, ctxt);
 }
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 18d6eb3686e7..4182d569b5cf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -140,14 +140,13 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
 	INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
 	INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
-	INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
+	init_llist_head(&cma_xprt->sc_recv_ctxts);
 	INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
 	spin_lock_init(&cma_xprt->sc_send_lock);
-	spin_lock_init(&cma_xprt->sc_recv_lock);
 	spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
 
 	/*
-- 
cgit v1.2.3


From f69d6d8eef7807f8d937b81da24bebd2e926e4d2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Sun, 18 Aug 2019 14:18:44 -0400
Subject: sunrpc: add a new cache_detail operation for when a cache is flushed

When the exports table is changed, exportfs will usually write a new
time to the "flush" file in the nfsd.export cache procfile. This tells
the kernel to flush any entries that are older than that value.

This gives us a mechanism to tell whether an unexport might have
occurred. Add a new ->flush cache_detail operation that is called after
flushing the cache whenever someone writes to a "flush" file.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 1 +
 net/sunrpc/cache.c           | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index f7d086b77a21..f8603724fbee 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -87,6 +87,7 @@ struct cache_detail {
 					      int has_died);
 
 	struct cache_head *	(*alloc)(void);
+	void			(*flush)(void);
 	int			(*match)(struct cache_head *orig, struct cache_head *new);
 	void			(*init)(struct cache_head *orig, struct cache_head *new);
 	void			(*update)(struct cache_head *orig, struct cache_head *new);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index a6a6190ad37a..a349094f6fb7 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1524,6 +1524,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
 	cd->nextcheck = now;
 	cache_flush();
 
+	if (cd->flush)
+		cd->flush();
+
 	*ppos += count;
 	return count;
 }
-- 
cgit v1.2.3


From 18f6622ebbdea56a83f8e553c159ce2d62d3ad0c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jeff.layton@primarydata.com>
Date: Sun, 18 Aug 2019 14:18:45 -0400
Subject: locks: create a new notifier chain for lease attempts

With the new file caching infrastructure in nfsd, we can end up holding
files open for an indefinite period of time, even when they are still
idle. This may prevent the kernel from handing out leases on the file,
which is something we don't want to block.

Fix this by running a SRCU notifier call chain whenever on any
lease attempt. nfsd can then purge the cache for that inode before
returning.

Since SRCU is only conditionally compiled in, we must only define the
new chain if it's enabled, and users of the chain must ensure that
SRCU is enabled.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/locks.c         | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  5 +++++
 2 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index 686eae21daf6..1913481bfbf7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1990,6 +1990,64 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
+#if IS_ENABLED(CONFIG_SRCU)
+/*
+ * Kernel subsystems can register to be notified on any attempt to set
+ * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
+ * to close files that it may have cached when there is an attempt to set a
+ * conflicting lease.
+ */
+static struct srcu_notifier_head lease_notifier_chain;
+
+static inline void
+lease_notifier_chain_init(void)
+{
+	srcu_init_notifier_head(&lease_notifier_chain);
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+	if (arg != F_UNLCK)
+		srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+	return srcu_notifier_chain_register(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+	srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#else /* !IS_ENABLED(CONFIG_SRCU) */
+static inline void
+lease_notifier_chain_init(void)
+{
+}
+
+static inline void
+setlease_notifier(long arg, struct file_lock *lease)
+{
+}
+
+int lease_register_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(lease_register_notifier);
+
+void lease_unregister_notifier(struct notifier_block *nb)
+{
+}
+EXPORT_SYMBOL_GPL(lease_unregister_notifier);
+
+#endif /* IS_ENABLED(CONFIG_SRCU) */
+
 /**
  * vfs_setlease        -       sets a lease on an open file
  * @filp:	file pointer
@@ -2010,6 +2068,8 @@ EXPORT_SYMBOL(generic_setlease);
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
+	if (lease)
+		setlease_notifier(arg, *lease);
 	if (filp->f_op->setlease)
 		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
@@ -2923,6 +2983,7 @@ static int __init filelock_init(void)
 		INIT_HLIST_HEAD(&fll->hlist);
 	}
 
+	lease_notifier_chain_init();
 	return 0;
 }
 core_initcall(filelock_init);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56b8e358af5c..0f106c7f4bb9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1155,6 +1155,11 @@ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock *, int, struct list_head *);
+
+struct notifier_block;
+extern int lease_register_notifier(struct notifier_block *);
+extern void lease_unregister_notifier(struct notifier_block *);
+
 struct files_struct;
 extern void show_fd_locks(struct seq_file *f,
 			 struct file *filp, struct files_struct *files);
-- 
cgit v1.2.3


From b72679ee89a0a0ecd26f7b6fcae96cdaababff94 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 18 Aug 2019 14:18:46 -0400
Subject: notify: export symbols for use by the knfsd file cache

The knfsd file cache will need to detect when files are unlinked, so that
it can close the associated cached files. Export a minimal set of notifier
functions to allow it to do so.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/notify/fsnotify.h             | 2 --
 fs/notify/group.c                | 2 ++
 fs/notify/mark.c                 | 6 ++++++
 include/linux/fsnotify_backend.h | 2 ++
 4 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 5a00121fb219..f3462828a0e2 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -54,8 +54,6 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 {
 	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
 }
-/* Wait until all marks queued for destruction are destroyed */
-extern void fsnotify_wait_marks_destroyed(void);
 
 /*
  * update the dentry->d_flags of all of inode's children to indicate if inode cares
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0391190305cc..133f723aca07 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -108,6 +108,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
 	if (refcount_dec_and_test(&group->refcnt))
 		fsnotify_final_destroy_group(group);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_group);
 
 /*
  * Create a new fsnotify_group and hold a reference for the group returned.
@@ -137,6 +138,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
 
 	return group;
 }
+EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
 
 int fsnotify_fasync(int fd, struct file *file, int on)
 {
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 99ddd126f6f0..1d96216dffd1 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -276,6 +276,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	queue_delayed_work(system_unbound_wq, &reaper_work,
 			   FSNOTIFY_REAPER_DELAY);
 }
+EXPORT_SYMBOL_GPL(fsnotify_put_mark);
 
 /*
  * Get mark reference when we found the mark via lockless traversal of object
@@ -430,6 +431,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	mutex_unlock(&group->mark_mutex);
 	fsnotify_free_mark(mark);
 }
+EXPORT_SYMBOL_GPL(fsnotify_destroy_mark);
 
 /*
  * Sorting function for lists of fsnotify marks.
@@ -685,6 +687,7 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, fsnotify_connp_t *connp,
 	mutex_unlock(&group->mark_mutex);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(fsnotify_add_mark);
 
 /*
  * Given a list of marks, find the mark associated with given group. If found
@@ -711,6 +714,7 @@ struct fsnotify_mark *fsnotify_find_mark(fsnotify_connp_t *connp,
 	spin_unlock(&conn->lock);
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(fsnotify_find_mark);
 
 /* Clear any marks in a group with given type mask */
 void fsnotify_clear_marks_by_group(struct fsnotify_group *group,
@@ -809,6 +813,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
 	mark->group = group;
 	WRITE_ONCE(mark->connector, NULL);
 }
+EXPORT_SYMBOL_GPL(fsnotify_init_mark);
 
 /*
  * Destroy all marks in destroy_list, waits for SRCU period to finish before
@@ -837,3 +842,4 @@ void fsnotify_wait_marks_destroyed(void)
 {
 	flush_delayed_work(&reaper_work);
 }
+EXPORT_SYMBOL_GPL(fsnotify_wait_marks_destroyed);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 2de3b2ddd19a..1915bdba2fad 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -475,6 +475,8 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 extern void fsnotify_detach_mark(struct fsnotify_mark *mark);
 /* free mark */
 extern void fsnotify_free_mark(struct fsnotify_mark *mark);
+/* Wait until all marks queued for destruction are destroyed */
+extern void fsnotify_wait_marks_destroyed(void);
 /* run all the marks in a group, and clear all of the marks attached to given object type */
 extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type);
 /* run all the marks in a group, and clear all of the vfsmount marks */
-- 
cgit v1.2.3


From 4333fb96ca1086d1cec0f93f78c453aa2dee8a5c Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 16 Aug 2019 13:01:22 -0300
Subject: media: lib/sort.c: implement sort() variant taking context argument

Our list_sort() utility has always supported a context argument that
is passed through to the comparison routine. Now there's a use case
for the similar thing for sort().

This implements sort_r by simply extending the existing sort function
in the obvious way. To avoid code duplication, we want to implement
sort() in terms of sort_r(). The naive way to do that is

static int cmp_wrapper(const void *a, const void *b, const void *ctx)
{
  int (*real_cmp)(const void*, const void*) = ctx;
  return real_cmp(a, b);
}

sort(..., cmp) { sort_r(..., cmp_wrapper, cmp) }

but this would do two indirect calls for each comparison. Instead, do
as is done for the default swap functions - that only adds a cost of a
single easily predicted branch to each comparison call.

Aside from introducing support for the context argument, this also
serves as preparation for patches that will eliminate the indirect
comparison calls in common cases.

Requested-by: Boris Brezillon <boris.brezillon@collabora.com>

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/sort.h |  5 +++++
 lib/sort.c           | 34 ++++++++++++++++++++++++++++------
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sort.h b/include/linux/sort.h
index 2b99a5dd073d..61b96d0ebc44 100644
--- a/include/linux/sort.h
+++ b/include/linux/sort.h
@@ -4,6 +4,11 @@
 
 #include <linux/types.h>
 
+void sort_r(void *base, size_t num, size_t size,
+	    int (*cmp)(const void *, const void *, const void *),
+	    void (*swap)(void *, void *, int),
+	    const void *priv);
+
 void sort(void *base, size_t num, size_t size,
 	  int (*cmp)(const void *, const void *),
 	  void (*swap)(void *, void *, int));
diff --git a/lib/sort.c b/lib/sort.c
index cf408aec3733..d54cf97e9548 100644
--- a/lib/sort.c
+++ b/lib/sort.c
@@ -144,6 +144,18 @@ static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
 		swap_func(a, b, (int)size);
 }
 
+typedef int (*cmp_func_t)(const void *, const void *);
+typedef int (*cmp_r_func_t)(const void *, const void *, const void *);
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b,
+		  cmp_r_func_t cmp, const void *priv)
+{
+	if (cmp == _CMP_WRAPPER)
+		return ((cmp_func_t)(priv))(a, b);
+	return cmp(a, b, priv);
+}
+
 /**
  * parent - given the offset of the child, find the offset of the parent.
  * @i: the offset of the heap element whose parent is sought.  Non-zero.
@@ -171,12 +183,13 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
 }
 
 /**
- * sort - sort an array of elements
+ * sort_r - sort an array of elements
  * @base: pointer to data to sort
  * @num: number of elements
  * @size: size of each element
  * @cmp_func: pointer to comparison function
  * @swap_func: pointer to swap function or NULL
+ * @priv: third argument passed to comparison function
  *
  * This function does a heapsort on the given array.  You may provide
  * a swap_func function if you need to do something more than a memory
@@ -188,9 +201,10 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
  * O(n*n) worst-case behavior and extra memory requirements that make
  * it less suitable for kernel use.
  */
-void sort(void *base, size_t num, size_t size,
-	  int (*cmp_func)(const void *, const void *),
-	  void (*swap_func)(void *, void *, int size))
+void sort_r(void *base, size_t num, size_t size,
+	    int (*cmp_func)(const void *, const void *, const void *),
+	    void (*swap_func)(void *, void *, int size),
+	    const void *priv)
 {
 	/* pre-scale counters for performance */
 	size_t n = num * size, a = (num/2) * size;
@@ -238,12 +252,12 @@ void sort(void *base, size_t num, size_t size,
 		 * average, 3/4 worst-case.)
 		 */
 		for (b = a; c = 2*b + size, (d = c + size) < n;)
-			b = cmp_func(base + c, base + d) >= 0 ? c : d;
+			b = do_cmp(base + c, base + d, cmp_func, priv) >= 0 ? c : d;
 		if (d == n)	/* Special case last leaf with no sibling */
 			b = c;
 
 		/* Now backtrack from "b" to the correct location for "a" */
-		while (b != a && cmp_func(base + a, base + b) >= 0)
+		while (b != a && do_cmp(base + a, base + b, cmp_func, priv) >= 0)
 			b = parent(b, lsbit, size);
 		c = b;			/* Where "a" belongs */
 		while (b != a) {	/* Shift it into place */
@@ -252,4 +266,12 @@ void sort(void *base, size_t num, size_t size,
 		}
 	}
 }
+EXPORT_SYMBOL(sort_r);
+
+void sort(void *base, size_t num, size_t size,
+	  int (*cmp_func)(const void *, const void *),
+	  void (*swap_func)(void *, void *, int size))
+{
+	return sort_r(base, num, size, _CMP_WRAPPER, swap_func, cmp_func);
+}
 EXPORT_SYMBOL(sort);
-- 
cgit v1.2.3


From 7bb3c32abd7bafd346f667cccb7dfe9686f14ddd Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Fri, 16 Aug 2019 13:01:23 -0300
Subject: media: uapi: h264: Rename pixel format

The V4L2_PIX_FMT_H264_SLICE_RAW name was originally suggested
because the pixel format would represent H264 slices without any
start code.

However, as we will now introduce a start code menu control,
give the pixel format a more meaningful name, while it's
still early enough to do so.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/pixfmt-compressed.rst | 4 ++--
 drivers/media/v4l2-core/v4l2-ioctl.c               | 2 +-
 drivers/staging/media/sunxi/cedrus/cedrus_dec.c    | 2 +-
 drivers/staging/media/sunxi/cedrus/cedrus_video.c  | 6 +++---
 include/media/h264-ctrls.h                         | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index f52a7b67023d..9b65473a2288 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -52,9 +52,9 @@ Compressed Formats
       - ``V4L2_PIX_FMT_H264_MVC``
       - 'M264'
       - H264 MVC video elementary stream.
-    * .. _V4L2-PIX-FMT-H264-SLICE-RAW:
+    * .. _V4L2-PIX-FMT-H264-SLICE:
 
-      - ``V4L2_PIX_FMT_H264_SLICE_RAW``
+      - ``V4L2_PIX_FMT_H264_SLICE``
       - 'S264'
       - H264 parsed slice data, without the start code and as
 	extracted from the H264 bitstream.  This format is adapted for
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index bb5b4926538a..39f10621c91b 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1343,7 +1343,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 		case V4L2_PIX_FMT_H264:		descr = "H.264"; break;
 		case V4L2_PIX_FMT_H264_NO_SC:	descr = "H.264 (No Start Codes)"; break;
 		case V4L2_PIX_FMT_H264_MVC:	descr = "H.264 MVC"; break;
-		case V4L2_PIX_FMT_H264_SLICE_RAW:	descr = "H.264 Parsed Slice Data"; break;
+		case V4L2_PIX_FMT_H264_SLICE:	descr = "H.264 Parsed Slice Data"; break;
 		case V4L2_PIX_FMT_H263:		descr = "H.263"; break;
 		case V4L2_PIX_FMT_MPEG1:	descr = "MPEG-1 ES"; break;
 		case V4L2_PIX_FMT_MPEG2:	descr = "MPEG-2 ES"; break;
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
index bdad87eb9d79..56ca4c9ad01c 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_dec.c
@@ -46,7 +46,7 @@ void cedrus_device_run(void *priv)
 			V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION);
 		break;
 
-	case V4L2_PIX_FMT_H264_SLICE_RAW:
+	case V4L2_PIX_FMT_H264_SLICE:
 		run.h264.decode_params = cedrus_find_control_data(ctx,
 			V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS);
 		run.h264.pps = cedrus_find_control_data(ctx,
diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_video.c b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
index 681dfe3367a6..eeee3efd247b 100644
--- a/drivers/staging/media/sunxi/cedrus/cedrus_video.c
+++ b/drivers/staging/media/sunxi/cedrus/cedrus_video.c
@@ -38,7 +38,7 @@ static struct cedrus_format cedrus_formats[] = {
 		.directions	= CEDRUS_DECODE_SRC,
 	},
 	{
-		.pixelformat	= V4L2_PIX_FMT_H264_SLICE_RAW,
+		.pixelformat	= V4L2_PIX_FMT_H264_SLICE,
 		.directions	= CEDRUS_DECODE_SRC,
 	},
 	{
@@ -104,7 +104,7 @@ static void cedrus_prepare_format(struct v4l2_pix_format *pix_fmt)
 
 	switch (pix_fmt->pixelformat) {
 	case V4L2_PIX_FMT_MPEG2_SLICE:
-	case V4L2_PIX_FMT_H264_SLICE_RAW:
+	case V4L2_PIX_FMT_H264_SLICE:
 		/* Zero bytes per line for encoded source. */
 		bytesperline = 0;
 
@@ -449,7 +449,7 @@ static int cedrus_start_streaming(struct vb2_queue *vq, unsigned int count)
 		ctx->current_codec = CEDRUS_CODEC_MPEG2;
 		break;
 
-	case V4L2_PIX_FMT_H264_SLICE_RAW:
+	case V4L2_PIX_FMT_H264_SLICE:
 		ctx->current_codec = CEDRUS_CODEC_H264;
 		break;
 
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index e1404d78d6ff..6160a69c0143 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -14,7 +14,7 @@
 #include <linux/videodev2.h>
 
 /* Our pixel format isn't stable at the moment */
-#define V4L2_PIX_FMT_H264_SLICE_RAW v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */
+#define V4L2_PIX_FMT_H264_SLICE v4l2_fourcc('S', '2', '6', '4') /* H264 parsed slices */
 
 /*
  * This is put insanely high to avoid conflicting with controls that
-- 
cgit v1.2.3


From 5604be66a56867a784e162299a48c214921ffa1b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Fri, 16 Aug 2019 13:01:24 -0300
Subject: media: uapi: h264: Add the concept of decoding mode

Some stateless decoders don't support per-slice decoding granularity
(or at least not in a way that would make them efficient or easy to use).

Expose a menu to control the supported decoding modes. Drivers are
allowed to support only one decoding but they can support both too.

To fully specify the decoding operation, we need to introduce
a start_byte_offset, to indicate where slices start.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst   | 57 +++++++++++++++++++++-
 Documentation/media/uapi/v4l/pixfmt-compressed.rst |  6 ++-
 drivers/media/v4l2-core/v4l2-ctrls.c               |  9 ++++
 include/media/h264-ctrls.h                         | 10 ++++
 4 files changed, 79 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index c5f39dd50043..1da17a2c94d7 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -1747,6 +1747,14 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u32
       - ``size``
       -
+    * - __u32
+      - ``start_byte_offset``
+        Offset (in bytes) from the beginning of the OUTPUT buffer to the start
+        of the slice. If the slice starts with a start code, then this is the
+        offset to such start code. When operating in slice-based decoding mode
+        (see :c:type:`v4l2_mpeg_video_h264_decode_mode`), this field should
+        be set to 0. When operating in frame-based decoding mode, this field
+        should be 0 for the first slice.
     * - __u32
       - ``header_bit_size``
       -
@@ -1930,7 +1938,10 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       -
     * - __u16
       - ``num_slices``
-      - Number of slices needed to decode the current frame
+      - Number of slices needed to decode the current frame/field. When
+        operating in slice-based decoding mode (see
+        :c:type:`v4l2_mpeg_video_h264_decode_mode`), this field
+        should always be set to one.
     * - __u16
       - ``nal_ref_idc``
       - NAL reference ID value coming from the NAL Unit header
@@ -2021,6 +2032,50 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
       - 0x00000004
       - The DPB entry is a long term reference frame
 
+``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE (enum)``
+    Specifies the decoding mode to use. Currently exposes slice-based and
+    frame-based decoding but new modes might be added later on.
+    This control is used as a modifier for V4L2_PIX_FMT_H264_SLICE
+    pixel format. Applications that support V4L2_PIX_FMT_H264_SLICE
+    are required to set this control in order to specify the decoding mode
+    that is expected for the buffer.
+    Drivers may expose a single or multiple decoding modes, depending
+    on what they can support.
+
+    .. note::
+
+       This menu control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_mpeg_video_h264_decode_mode
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED``
+      - 0
+      - Decoding is done at the slice granularity.
+        In this mode, ``num_slices`` field in struct
+        :c:type:`v4l2_ctrl_h264_decode_params` should be set to 1,
+        and ``start_byte_offset`` in struct
+        :c:type:`v4l2_ctrl_h264_slice_params` should be set to 0.
+        The OUTPUT buffer must contain a single slice.
+    * - ``V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED``
+      - 1
+      - Decoding is done at the frame granularity.
+        In this mode, ``num_slices`` field in struct
+        :c:type:`v4l2_ctrl_h264_decode_params` should be set to the number
+        of slices in the frame, and ``start_byte_offset`` in struct
+        :c:type:`v4l2_ctrl_h264_slice_params` should be set accordingly
+        for each slice. For the first slice, ``start_byte_offset`` should
+        be zero.
+        The OUTPUT buffer must contain all slices needed to decode the
+        frame. The OUTPUT buffer must also contain both fields.
+
 .. _v4l2-mpeg-mpeg2:
 
 ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)``
diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index 9b65473a2288..d666eb51741a 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -60,8 +60,10 @@ Compressed Formats
 	extracted from the H264 bitstream.  This format is adapted for
 	stateless video decoders that implement an H264 pipeline
 	(using the :ref:`mem2mem` and :ref:`media-request-api`).
-	Metadata associated with the frame to decode are required to
-	be passed through the ``V4L2_CID_MPEG_VIDEO_H264_SPS``,
+	This pixelformat has a modifier that must be set at least once
+	through the ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE`` control.
+	In addition, metadata associated with the frame to decode are
+	required to be passed through the ``V4L2_CID_MPEG_VIDEO_H264_SPS``,
 	``V4L2_CID_MPEG_VIDEO_H264_PPS``,
 	``V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX``,
 	``V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS`` and
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index cd1ae016706f..2c67f9fc4d5b 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -402,6 +402,11 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"Explicit",
 		NULL,
 	};
+	static const char * const h264_decode_mode[] = {
+		"Slice-Based",
+		"Frame-Based",
+		NULL,
+	};
 	static const char * const mpeg_mpeg2_level[] = {
 		"Low",
 		"Main",
@@ -633,6 +638,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return h264_fp_arrangement_type;
 	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
 		return h264_fmo_map_type;
+	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
+		return h264_decode_mode;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 		return mpeg_mpeg2_level;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
@@ -852,6 +859,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX:		return "H264 Scaling Matrix";
 	case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
+	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:		return "H264 Decode Mode";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:			return "MPEG2 Level";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:			return "MPEG2 Profile";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
@@ -1220,6 +1228,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC:
 	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
 	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
+	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index 6160a69c0143..928c48c57282 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -26,6 +26,7 @@
 #define V4L2_CID_MPEG_VIDEO_H264_SCALING_MATRIX	(V4L2_CID_MPEG_BASE+1002)
 #define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS	(V4L2_CID_MPEG_BASE+1003)
 #define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS	(V4L2_CID_MPEG_BASE+1004)
+#define V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE	(V4L2_CID_MPEG_BASE+1005)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_H264_SPS			0x0110
@@ -34,6 +35,11 @@
 #define V4L2_CTRL_TYPE_H264_SLICE_PARAMS	0x0113
 #define V4L2_CTRL_TYPE_H264_DECODE_PARAMS	0x0114
 
+enum v4l2_mpeg_video_h264_decode_mode {
+	V4L2_MPEG_VIDEO_H264_DECODE_MODE_SLICE_BASED,
+	V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
+};
+
 #define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG			0x01
 #define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG			0x02
 #define V4L2_H264_SPS_CONSTRAINT_SET2_FLAG			0x04
@@ -125,6 +131,10 @@ struct v4l2_h264_pred_weight_table {
 struct v4l2_ctrl_h264_slice_params {
 	/* Size in bytes, including header */
 	__u32 size;
+
+	/* Offset in bytes to the start of slice in the OUTPUT buffer. */
+	__u32 start_byte_offset;
+
 	/* Offset in bits to slice_data() from the beginning of this slice. */
 	__u32 header_bit_size;
 
-- 
cgit v1.2.3


From 8cae93e090113e46bd29a99c1727d8f13ea12fdf Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Fri, 16 Aug 2019 13:01:25 -0300
Subject: media: uapi: h264: Add the concept of start code

Stateless decoders have different expectations about the
start code that is prepended on H264 slices. Add a
menu control to express the supported start code types
(including no start code).

Drivers are allowed to support only one start code type,
but they can support both too.

Note that this is independent of the H264 decoding mode,
which specifies the granularity of the decoding operations.
Either in frame-based or slice-based mode, this new control
will allow to define the start code expected on H264 slices.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst   | 33 ++++++++++++++++++++++
 Documentation/media/uapi/v4l/pixfmt-compressed.rst |  5 ++--
 drivers/media/v4l2-core/v4l2-ctrls.c               |  9 ++++++
 include/media/h264-ctrls.h                         |  6 ++++
 4 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index 1da17a2c94d7..810ae9bb6f7c 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -2076,6 +2076,39 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
         The OUTPUT buffer must contain all slices needed to decode the
         frame. The OUTPUT buffer must also contain both fields.
 
+``V4L2_CID_MPEG_VIDEO_H264_START_CODE (enum)``
+    Specifies the H264 slice start code expected for each slice.
+    This control is used as a modifier for V4L2_PIX_FMT_H264_SLICE
+    pixel format. Applications that support V4L2_PIX_FMT_H264_SLICE
+    are required to set this control in order to specify the start code
+    that is expected for the buffer.
+    Drivers may expose a single or multiple start codes, depending
+    on what they can support.
+
+    .. note::
+
+       This menu control is not yet part of the public kernel API and
+       it is expected to change.
+
+.. c:type:: v4l2_mpeg_video_h264_start_code
+
+.. cssclass:: longtable
+
+.. flat-table::
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       1 1 2
+
+    * - ``V4L2_MPEG_VIDEO_H264_START_CODE_NONE``
+      - 0
+      - Selecting this value specifies that H264 slices are passed
+        to the driver without any start code.
+    * - ``V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B``
+      - 1
+      - Selecting this value specifies that H264 slices are expected
+        to be prefixed by Annex B start codes. According to :ref:`h264`
+        valid start codes can be 3-bytes 0x000001 or 4-bytes 0x00000001.
+
 .. _v4l2-mpeg-mpeg2:
 
 ``V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS (struct)``
diff --git a/Documentation/media/uapi/v4l/pixfmt-compressed.rst b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
index d666eb51741a..493b6020107d 100644
--- a/Documentation/media/uapi/v4l/pixfmt-compressed.rst
+++ b/Documentation/media/uapi/v4l/pixfmt-compressed.rst
@@ -60,8 +60,9 @@ Compressed Formats
 	extracted from the H264 bitstream.  This format is adapted for
 	stateless video decoders that implement an H264 pipeline
 	(using the :ref:`mem2mem` and :ref:`media-request-api`).
-	This pixelformat has a modifier that must be set at least once
-	through the ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE`` control.
+	This pixelformat has two modifiers that must be set at least once
+	through the ``V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE``
+        and ``V4L2_CID_MPEG_VIDEO_H264_START_CODE`` controls.
 	In addition, metadata associated with the frame to decode are
 	required to be passed through the ``V4L2_CID_MPEG_VIDEO_H264_SPS``,
 	``V4L2_CID_MPEG_VIDEO_H264_PPS``,
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index 2c67f9fc4d5b..1d8f38824631 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -407,6 +407,11 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"Frame-Based",
 		NULL,
 	};
+	static const char * const h264_start_code[] = {
+		"No Start Code",
+		"Annex B Start Code",
+		NULL,
+	};
 	static const char * const mpeg_mpeg2_level[] = {
 		"Low",
 		"Main",
@@ -640,6 +645,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		return h264_fmo_map_type;
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
 		return h264_decode_mode;
+	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:
+		return h264_start_code;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 		return mpeg_mpeg2_level;
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
@@ -860,6 +867,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS:		return "H264 Slice Parameters";
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS:		return "H264 Decode Parameters";
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:		return "H264 Decode Mode";
+	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:		return "H264 Start Code";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:			return "MPEG2 Level";
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:			return "MPEG2 Profile";
 	case V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP:		return "MPEG4 I-Frame QP Value";
@@ -1229,6 +1237,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE:
 	case V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE:
 	case V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE:
+	case V4L2_CID_MPEG_VIDEO_H264_START_CODE:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_LEVEL:
 	case V4L2_CID_MPEG_VIDEO_MPEG2_PROFILE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL:
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index 928c48c57282..ba2876a64cf6 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -27,6 +27,7 @@
 #define V4L2_CID_MPEG_VIDEO_H264_SLICE_PARAMS	(V4L2_CID_MPEG_BASE+1003)
 #define V4L2_CID_MPEG_VIDEO_H264_DECODE_PARAMS	(V4L2_CID_MPEG_BASE+1004)
 #define V4L2_CID_MPEG_VIDEO_H264_DECODE_MODE	(V4L2_CID_MPEG_BASE+1005)
+#define V4L2_CID_MPEG_VIDEO_H264_START_CODE	(V4L2_CID_MPEG_BASE+1006)
 
 /* enum v4l2_ctrl_type type values */
 #define V4L2_CTRL_TYPE_H264_SPS			0x0110
@@ -40,6 +41,11 @@ enum v4l2_mpeg_video_h264_decode_mode {
 	V4L2_MPEG_VIDEO_H264_DECODE_MODE_FRAME_BASED,
 };
 
+enum v4l2_mpeg_video_h264_start_code {
+	V4L2_MPEG_VIDEO_H264_START_CODE_NONE,
+	V4L2_MPEG_VIDEO_H264_START_CODE_ANNEX_B,
+};
+
 #define V4L2_H264_SPS_CONSTRAINT_SET0_FLAG			0x01
 #define V4L2_H264_SPS_CONSTRAINT_SET1_FLAG			0x02
 #define V4L2_H264_SPS_CONSTRAINT_SET2_FLAG			0x04
-- 
cgit v1.2.3


From c3adb85745ca6cc19532b2ee197d7abece1ac732 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Fri, 16 Aug 2019 13:01:26 -0300
Subject: media: uapi: h264: Get rid of the p0/b0/b1 ref-lists

Those lists can be extracted from the dpb, let's simplify userspace
life and build that list kernel-side (generic helpers will be provided
for drivers that need this list).

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Nicolas Dufresne <nicolas.dufresne@collabora.com>
Reviewed-by: Ezequiel Garcia <ezequiel@collabora.com>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/ext-ctrls-codec.rst | 9 ---------
 include/media/h264-ctrls.h                       | 3 ---
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
index 810ae9bb6f7c..bc5dd8e76567 100644
--- a/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
+++ b/Documentation/media/uapi/v4l/ext-ctrls-codec.rst
@@ -1945,15 +1945,6 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type -
     * - __u16
       - ``nal_ref_idc``
       - NAL reference ID value coming from the NAL Unit header
-    * - __u8
-      - ``ref_pic_list_p0[32]``
-      - Backward reference list used by P-frames in the original bitstream order
-    * - __u8
-      - ``ref_pic_list_b0[32]``
-      - Backward reference list used by B-frames in the original bitstream order
-    * - __u8
-      - ``ref_pic_list_b1[32]``
-      - Forward reference list used by B-frames in the original bitstream order
     * - __s32
       - ``top_field_order_cnt``
       - Picture Order Count for the coded top field
diff --git a/include/media/h264-ctrls.h b/include/media/h264-ctrls.h
index ba2876a64cf6..e877bf1d537c 100644
--- a/include/media/h264-ctrls.h
+++ b/include/media/h264-ctrls.h
@@ -202,9 +202,6 @@ struct v4l2_ctrl_h264_decode_params {
 	struct v4l2_h264_dpb_entry dpb[16];
 	__u16 num_slices;
 	__u16 nal_ref_idc;
-	__u8 ref_pic_list_p0[32];
-	__u8 ref_pic_list_b0[32];
-	__u8 ref_pic_list_b1[32];
 	__s32 top_field_order_cnt;
 	__s32 bottom_field_order_cnt;
 	__u32 flags; /* V4L2_H264_DECODE_PARAM_FLAG_* */
-- 
cgit v1.2.3


From 555df336c754ac9de1af9a5c72508918b3796b18 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 16:02:01 +0100
Subject: keys: Fix description size

The maximum key description size is 4095.  Commit f771fde82051 ("keys:
Simplify key description management") inadvertantly reduced that to 255
and made sizes between 256 and 4095 work weirdly, and any size whereby
size & 255 == 0 would cause an assertion in __key_link_begin() at the
following line:

	BUG_ON(index_key->desc_len == 0);

This can be fixed by simply increasing the size of desc_len in struct
keyring_index_key to a u16.

Note the argument length test in keyutils only checked empty
descriptions and descriptions with a size around the limit (ie.  4095)
and not for all the values in between, so it missed this.  This has been
addressed and

	https://git.kernel.org/pub/scm/linux/kernel/git/dhowells/keyutils.git/commit/?id=066bf56807c26cd3045a25f355b34c1d8a20a5aa

now exhaustively tests all possible lengths of type, description and
payload and then some.

The assertion failure looks something like:

 kernel BUG at security/keys/keyring.c:1245!
 ...
 RIP: 0010:__key_link_begin+0x88/0xa0
 ...
 Call Trace:
  key_create_or_update+0x211/0x4b0
  __x64_sys_add_key+0x101/0x200
  do_syscall_64+0x5b/0x1e0
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

It can be triggered by:

	keyctl add user "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" a @s

Fixes: f771fde82051 ("keys: Simplify key description management")
Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/key.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/key.h b/include/linux/key.h
index 91f391cd272e..50028338a4cc 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -94,11 +94,11 @@ struct keyring_index_key {
 	union {
 		struct {
 #ifdef __LITTLE_ENDIAN /* Put desc_len at the LSB of x */
-			u8	desc_len;
-			char	desc[sizeof(long) - 1];	/* First few chars of description */
+			u16	desc_len;
+			char	desc[sizeof(long) - 2];	/* First few chars of description */
 #else
-			char	desc[sizeof(long) - 1];	/* First few chars of description */
-			u8	desc_len;
+			char	desc[sizeof(long) - 2];	/* First few chars of description */
+			u16	desc_len;
 #endif
 		};
 		unsigned long x;
-- 
cgit v1.2.3


From 2b770bee787daec66ed86baf4fc83275f949c8ac Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Thu, 15 Aug 2019 11:44:45 -0300
Subject: media: videodev2.h: add V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM

Add an enum_fmt format flag to specifically tag coded formats where
full bytestream parsing is supported by the device.

Some stateful decoders are capable of fully parsing a bytestream,
but others require that userspace pre-parses the bytestream into
frames or fields (see the corresponding pixelformat descriptions
for details).

If this flag is set, then this pre-parsing step is not required
(but still possible, of course).

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Alexandre Courbot <acourbot@chromium.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/vidioc-enum-fmt.rst | 8 ++++++++
 Documentation/media/videodev2.h.rst.exceptions   | 1 +
 include/uapi/linux/videodev2.h                   | 5 +++--
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst b/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
index 822d6730e7d2..ebc05ce74bdf 100644
--- a/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
+++ b/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
@@ -127,6 +127,14 @@ one until ``EINVAL`` is returned.
       - This format is not native to the device but emulated through
 	software (usually libv4l2), where possible try to use a native
 	format instead for better performance.
+    * - ``V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM``
+      - 0x0004
+      - The hardware decoder for this compressed bytestream format (aka coded
+	format) is capable of parsing a continuous bytestream. Applications do
+	not need to parse the bytestream themselves to find the boundaries
+	between frames/fields. This flag can only be used in combination with
+	the ``V4L2_FMT_FLAG_COMPRESSED`` flag, since this applies to compressed
+	formats only. This flag is valid for stateful decoders only.
 
 
 Return Value
diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions
index 8e7d3492d248..a0640b6d0f68 100644
--- a/Documentation/media/videodev2.h.rst.exceptions
+++ b/Documentation/media/videodev2.h.rst.exceptions
@@ -180,6 +180,7 @@ replace define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA reserved-formats
 # V4L2 format flags
 replace define V4L2_FMT_FLAG_COMPRESSED fmtdesc-flags
 replace define V4L2_FMT_FLAG_EMULATED fmtdesc-flags
+replace define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM fmtdesc-flags
 
 # V4L2 timecode types
 replace define V4L2_TC_TYPE_24FPS timecode-type
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 2427bc4d8eba..67077d52c59d 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -774,8 +774,9 @@ struct v4l2_fmtdesc {
 	__u32		    reserved[4];
 };
 
-#define V4L2_FMT_FLAG_COMPRESSED 0x0001
-#define V4L2_FMT_FLAG_EMULATED   0x0002
+#define V4L2_FMT_FLAG_COMPRESSED		0x0001
+#define V4L2_FMT_FLAG_EMULATED			0x0002
+#define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM	0x0004
 
 	/* Frame Size and frame rate enumeration */
 /*
-- 
cgit v1.2.3


From 60a039eb27f921abaef4778999db510dd75d5e48 Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Thu, 15 Aug 2019 11:44:46 -0300
Subject: media: videodev2.h: add V4L2_FMT_FLAG_DYN_RESOLUTION

Add an enum_fmt format flag to specifically tag coded formats where
dynamic resolution switching is supported by the device.

This is useful for some codec drivers that can support dynamic
resolution switching for one or more of their listed coded formats. It
allows userspace to know whether it should extract the video parameters
itself, or if it can rely on the device to send V4L2_EVENT_SOURCE_CHANGE
when such changes are detected.

Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Reviewed-by: Alexandre Courbot <acourbot@chromium.org>
Acked-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 Documentation/media/uapi/v4l/vidioc-enum-fmt.rst | 8 ++++++++
 Documentation/media/videodev2.h.rst.exceptions   | 1 +
 include/uapi/linux/videodev2.h                   | 1 +
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst b/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
index ebc05ce74bdf..399ef1062bac 100644
--- a/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
+++ b/Documentation/media/uapi/v4l/vidioc-enum-fmt.rst
@@ -135,6 +135,14 @@ one until ``EINVAL`` is returned.
 	between frames/fields. This flag can only be used in combination with
 	the ``V4L2_FMT_FLAG_COMPRESSED`` flag, since this applies to compressed
 	formats only. This flag is valid for stateful decoders only.
+    * - ``V4L2_FMT_FLAG_DYN_RESOLUTION``
+      - 0x0008
+      - Dynamic resolution switching is supported by the device for this
+	compressed bytestream format (aka coded format). It will notify the user
+	via the event ``V4L2_EVENT_SOURCE_CHANGE`` when changes in the video
+	parameters are detected. This flag can only be used in combination
+	with the ``V4L2_FMT_FLAG_COMPRESSED`` flag, since this applies to
+	compressed formats only. It is also only applies to stateful codecs.
 
 
 Return Value
diff --git a/Documentation/media/videodev2.h.rst.exceptions b/Documentation/media/videodev2.h.rst.exceptions
index a0640b6d0f68..adeb6b7a15cb 100644
--- a/Documentation/media/videodev2.h.rst.exceptions
+++ b/Documentation/media/videodev2.h.rst.exceptions
@@ -181,6 +181,7 @@ replace define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA reserved-formats
 replace define V4L2_FMT_FLAG_COMPRESSED fmtdesc-flags
 replace define V4L2_FMT_FLAG_EMULATED fmtdesc-flags
 replace define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM fmtdesc-flags
+replace define V4L2_FMT_FLAG_DYN_RESOLUTION fmtdesc-flags
 
 # V4L2 timecode types
 replace define V4L2_TC_TYPE_24FPS timecode-type
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 67077d52c59d..530638dffd93 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -777,6 +777,7 @@ struct v4l2_fmtdesc {
 #define V4L2_FMT_FLAG_COMPRESSED		0x0001
 #define V4L2_FMT_FLAG_EMULATED			0x0002
 #define V4L2_FMT_FLAG_CONTINUOUS_BYTESTREAM	0x0004
+#define V4L2_FMT_FLAG_DYN_RESOLUTION		0x0008
 
 	/* Frame Size and frame rate enumeration */
 /*
-- 
cgit v1.2.3


From db2cb969e8aed2395620fe2cb0bffd194c02b4b1 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Tue, 6 Aug 2019 11:30:00 +0200
Subject: vfio: re-arrange vfio region definitions

It is easy to miss already defined region types. Let's re-arrange
the definitions a bit and add more comments to make it hopefully
a bit clearer.

No functional change.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/uapi/linux/vfio.h | 45 ++++++++++++++++++++++++++-------------------
 1 file changed, 26 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748dac79..e809b22f6a60 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -295,15 +295,38 @@ struct vfio_region_info_cap_type {
 	__u32 subtype;	/* type specific */
 };
 
+/*
+ * List of region types, global per bus driver.
+ * If you introduce a new type, please add it here.
+ */
+
+/* PCI region type containing a PCI vendor part */
 #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
+#define VFIO_REGION_TYPE_GFX                    (1)
+#define VFIO_REGION_TYPE_CCW			(2)
+
+/* sub-types for VFIO_REGION_TYPE_PCI_* */
 
-/* 8086 Vendor sub-types */
+/* 8086 vendor PCI sub-types */
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
 #define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
 
-#define VFIO_REGION_TYPE_GFX                    (1)
+/* 10de vendor PCI sub-types */
+/*
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
+
+/* 1014 vendor PCI sub-types */
+/*
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
+
+/* sub-types for VFIO_REGION_TYPE_GFX */
 #define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
 
 /**
@@ -353,25 +376,9 @@ struct vfio_region_gfx_edid {
 #define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
 };
 
-#define VFIO_REGION_TYPE_CCW			(2)
-/* ccw sub-types */
+/* sub-types for VFIO_REGION_TYPE_CCW */
 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
 
-/*
- * 10de vendor sub-type
- *
- * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
- */
-#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
-
-/*
- * 1014 vendor sub-type
- *
- * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
- * to do TLB invalidation on a GPU.
- */
-#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
-
 /*
  * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
  * which allows direct access to non-MSIX registers which happened to be within
-- 
cgit v1.2.3


From a717072007e8aedd3f951726d8cf55454860b30d Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Tue, 23 Jul 2019 17:06:36 +0100
Subject: vfio/type1: Add IOVA range capability support

This allows the user-space to retrieve the supported IOVA
range(s), excluding any non-relaxable reserved regions. The
implementation is based on capability chains, added to
VFIO_IOMMU_GET_INFO ioctl.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 101 ++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h       |  26 ++++++++++-
 2 files changed, 126 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 56cf55776d6c..d0c5e768acb7 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2138,6 +2138,73 @@ static int vfio_domains_have_iommu_cache(struct vfio_iommu *iommu)
 	return ret;
 }
 
+static int vfio_iommu_iova_add_cap(struct vfio_info_cap *caps,
+		 struct vfio_iommu_type1_info_cap_iova_range *cap_iovas,
+		 size_t size)
+{
+	struct vfio_info_cap_header *header;
+	struct vfio_iommu_type1_info_cap_iova_range *iova_cap;
+
+	header = vfio_info_cap_add(caps, size,
+				   VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, 1);
+	if (IS_ERR(header))
+		return PTR_ERR(header);
+
+	iova_cap = container_of(header,
+				struct vfio_iommu_type1_info_cap_iova_range,
+				header);
+	iova_cap->nr_iovas = cap_iovas->nr_iovas;
+	memcpy(iova_cap->iova_ranges, cap_iovas->iova_ranges,
+	       cap_iovas->nr_iovas * sizeof(*cap_iovas->iova_ranges));
+	return 0;
+}
+
+static int vfio_iommu_iova_build_caps(struct vfio_iommu *iommu,
+				      struct vfio_info_cap *caps)
+{
+	struct vfio_iommu_type1_info_cap_iova_range *cap_iovas;
+	struct vfio_iova *iova;
+	size_t size;
+	int iovas = 0, i = 0, ret;
+
+	mutex_lock(&iommu->lock);
+
+	list_for_each_entry(iova, &iommu->iova_list, list)
+		iovas++;
+
+	if (!iovas) {
+		/*
+		 * Return 0 as a container with a single mdev device
+		 * will have an empty list
+		 */
+		ret = 0;
+		goto out_unlock;
+	}
+
+	size = sizeof(*cap_iovas) + (iovas * sizeof(*cap_iovas->iova_ranges));
+
+	cap_iovas = kzalloc(size, GFP_KERNEL);
+	if (!cap_iovas) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	cap_iovas->nr_iovas = iovas;
+
+	list_for_each_entry(iova, &iommu->iova_list, list) {
+		cap_iovas->iova_ranges[i].start = iova->start;
+		cap_iovas->iova_ranges[i].end = iova->end;
+		i++;
+	}
+
+	ret = vfio_iommu_iova_add_cap(caps, cap_iovas, size);
+
+	kfree(cap_iovas);
+out_unlock:
+	mutex_unlock(&iommu->lock);
+	return ret;
+}
+
 static long vfio_iommu_type1_ioctl(void *iommu_data,
 				   unsigned int cmd, unsigned long arg)
 {
@@ -2159,19 +2226,53 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 		}
 	} else if (cmd == VFIO_IOMMU_GET_INFO) {
 		struct vfio_iommu_type1_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		unsigned long capsz;
+		int ret;
 
 		minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
 
+		/* For backward compatibility, cannot require this */
+		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
 		if (copy_from_user(&info, (void __user *)arg, minsz))
 			return -EFAULT;
 
 		if (info.argsz < minsz)
 			return -EINVAL;
 
+		if (info.argsz >= capsz) {
+			minsz = capsz;
+			info.cap_offset = 0; /* output, no-recopy necessary */
+		}
+
 		info.flags = VFIO_IOMMU_INFO_PGSIZES;
 
 		info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
 
+		ret = vfio_iommu_iova_build_caps(iommu, &caps);
+		if (ret)
+			return ret;
+
+		if (caps.size) {
+			info.flags |= VFIO_IOMMU_INFO_CAPS;
+
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						sizeof(info), caps.buf,
+						caps.size)) {
+					kfree(caps.buf);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
 		return copy_to_user((void __user *)arg, &info, minsz) ?
 			-EFAULT : 0;
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8f10748dac79..1259dccd09d2 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -714,7 +714,31 @@ struct vfio_iommu_type1_info {
 	__u32	argsz;
 	__u32	flags;
 #define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
-	__u64	iova_pgsizes;		/* Bitmap of supported page sizes */
+#define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
+	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
+};
+
+/*
+ * The IOVA capability allows to report the valid IOVA range(s)
+ * excluding any non-relaxable reserved regions exposed by
+ * devices attached to the container. Any DMA map attempt
+ * outside the valid iova range will return error.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
+
+struct vfio_iova_range {
+	__u64	start;
+	__u64	end;
+};
+
+struct vfio_iommu_type1_info_cap_iova_range {
+	struct	vfio_info_cap_header header;
+	__u32	nr_iovas;
+	__u32	reserved;
+	struct	vfio_iova_range iova_ranges[];
 };
 
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
-- 
cgit v1.2.3


From 99b60d56a35b18af267f275559a530db372bfad7 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 16 Aug 2019 21:56:27 +0200
Subject: net: phy: add EEE-related constants

Add EEE-related constants. This includes the new MMD EEE registers for
NBase-T / 802.3bz.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mdio.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/mdio.h b/include/uapi/linux/mdio.h
index 0a552061ff1c..4bcb41c71b8c 100644
--- a/include/uapi/linux/mdio.h
+++ b/include/uapi/linux/mdio.h
@@ -45,11 +45,14 @@
 #define MDIO_AN_ADVERTISE	16	/* AN advertising (base page) */
 #define MDIO_AN_LPA		19	/* AN LP abilities (base page) */
 #define MDIO_PCS_EEE_ABLE	20	/* EEE Capability register */
+#define MDIO_PCS_EEE_ABLE2	21	/* EEE Capability register 2 */
 #define MDIO_PMA_NG_EXTABLE	21	/* 2.5G/5G PMA/PMD extended ability */
 #define MDIO_PCS_EEE_WK_ERR	22	/* EEE wake error counter */
 #define MDIO_PHYXS_LNSTAT	24	/* PHY XGXS lane state */
 #define MDIO_AN_EEE_ADV		60	/* EEE advertisement */
 #define MDIO_AN_EEE_LPABLE	61	/* EEE link partner ability */
+#define MDIO_AN_EEE_ADV2	62	/* EEE advertisement 2 */
+#define MDIO_AN_EEE_LPABLE2	63	/* EEE link partner ability 2 */
 
 /* Media-dependent registers. */
 #define MDIO_PMA_10GBT_SWAPPOL	130	/* 10GBASE-T pair swap & polarity */
@@ -276,6 +279,13 @@
 #define MDIO_EEE_1000KX		0x0010	/* 1000KX EEE cap */
 #define MDIO_EEE_10GKX4		0x0020	/* 10G KX4 EEE cap */
 #define MDIO_EEE_10GKR		0x0040	/* 10G KR EEE cap */
+#define MDIO_EEE_40GR_FW	0x0100	/* 40G R fast wake */
+#define MDIO_EEE_40GR_DS	0x0200	/* 40G R deep sleep */
+#define MDIO_EEE_100GR_FW	0x1000	/* 100G R fast wake */
+#define MDIO_EEE_100GR_DS	0x2000	/* 100G R deep sleep */
+
+#define MDIO_EEE_2_5GT		0x0001	/* 2.5GT EEE cap */
+#define MDIO_EEE_5GT		0x0002	/* 5GT EEE cap */
 
 /* 2.5G/5G Extended abilities register. */
 #define MDIO_PMA_NG_EXTABLE_2_5GBT	0x0001	/* 2.5GBASET ability */
-- 
cgit v1.2.3


From 3a7ef457e85173a5b9ec7a03016db5a57b717b33 Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Tue, 13 Aug 2019 00:46:01 +0200
Subject: ipv6: Fix return value of ipv6_mc_may_pull() for malformed packets

Commit ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and
ipv6_mc_check_mld() calls") replaces direct calls to pskb_may_pull()
in br_ipv6_multicast_mld2_report() with calls to ipv6_mc_may_pull(),
that returns -EINVAL on buffers too short to be valid IPv6 packets,
while maintaining the previous handling of the return code.

This leads to the direct opposite of the intended effect: if the
packet is malformed, -EINVAL evaluates as true, and we'll happily
proceed with the processing.

Return 0 if the packet is too short, in the same way as this was
fixed for IPv4 by commit 083b78a9ed64 ("ip: fix ip_mc_may_pull()
return value").

I don't have a reproducer for this, unlike the one referred to by
the IPv4 commit, but this is clearly broken.

Fixes: ba5ea614622d ("bridge: simplify ip_mc_check_igmp() and ipv6_mc_check_mld() calls")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index becdad576859..3f62b347b04a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -206,7 +206,7 @@ static inline int ipv6_mc_may_pull(struct sk_buff *skb,
 				   unsigned int len)
 {
 	if (skb_transport_offset(skb) + ipv6_transport_len(skb) < len)
-		return -EINVAL;
+		return 0;
 
 	return pskb_may_pull(skb, len);
 }
-- 
cgit v1.2.3


From 4e27428fb5626f966aa961b1aad8751f2ebeef72 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 19 Aug 2019 22:02:43 +0800
Subject: sctp: add asconf_enable in struct sctp_endpoint

This patch is to make addip/asconf flag per endpoint,
and its value is initialized by the per netns flag,
net->sctp.addip_enable.

It also replaces the checks of net->sctp.addip_enable
with ep->asconf_enable in some places.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  1 +
 net/sctp/endpointola.c     |  3 ++-
 net/sctp/sm_make_chunk.c   | 18 +++++++++---------
 net/sctp/socket.c          | 17 +++++++----------
 4 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ba5c4f6eede5..daac1eff18c9 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1325,6 +1325,7 @@ struct sctp_endpoint {
 	__u8  auth_enable:1,
 	      intl_enable:1,
 	      prsctp_enable:1,
+	      asconf_enable:1,
 	      reconf_enable:1;
 
 	__u8  strreset_enable;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 69cebb2c998b..38b8d7cf8557 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -52,6 +52,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	if (!ep->digest)
 		return NULL;
 
+	ep->asconf_enable = net->sctp.addip_enable;
 	ep->auth_enable = net->sctp.auth_enable;
 	if (ep->auth_enable) {
 		/* Allocate space for HMACS and CHUNKS authentication
@@ -86,7 +87,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 		/* If the Add-IP functionality is enabled, we must
 		 * authenticate, ASCONF and ASCONF-ACK chunks
 		 */
-		if (net->sctp.addip_enable) {
+		if (ep->asconf_enable) {
 			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
 			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
 			auth_chunks->param_hdr.length =
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 36bd8a6e82df..338278f24c24 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -207,7 +207,6 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 				  const struct sctp_bind_addr *bp,
 				  gfp_t gfp, int vparam_len)
 {
-	struct net *net = sock_net(asoc->base.sk);
 	struct sctp_supported_ext_param ext_param;
 	struct sctp_adaptation_ind_param aiparam;
 	struct sctp_paramhdr *auth_chunks = NULL;
@@ -255,7 +254,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	 *  the ASCONF,the ASCONF-ACK, and the AUTH  chunks in its INIT and
 	 *  INIT-ACK parameters.
 	 */
-	if (net->sctp.addip_enable) {
+	if (asoc->ep->asconf_enable) {
 		extensions[num_ext] = SCTP_CID_ASCONF;
 		extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
 		num_ext += 2;
@@ -1964,7 +1963,9 @@ static int sctp_process_hn_param(const struct sctp_association *asoc,
 	return 0;
 }
 
-static int sctp_verify_ext_param(struct net *net, union sctp_params param)
+static int sctp_verify_ext_param(struct net *net,
+				 const struct sctp_endpoint *ep,
+				 union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
 	int have_asconf = 0;
@@ -1991,7 +1992,7 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param)
 	if (net->sctp.addip_noauth)
 		return 1;
 
-	if (net->sctp.addip_enable && !have_auth && have_asconf)
+	if (ep->asconf_enable && !have_auth && have_asconf)
 		return 0;
 
 	return 1;
@@ -2001,7 +2002,6 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 				   union sctp_params param)
 {
 	__u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
-	struct net *net = sock_net(asoc->base.sk);
 	int i;
 
 	for (i = 0; i < num_ext; i++) {
@@ -2023,7 +2023,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc,
 			break;
 		case SCTP_CID_ASCONF:
 		case SCTP_CID_ASCONF_ACK:
-			if (net->sctp.addip_enable)
+			if (asoc->ep->asconf_enable)
 				asoc->peer.asconf_capable = 1;
 			break;
 		case SCTP_CID_I_DATA:
@@ -2145,12 +2145,12 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
 		break;
 
 	case SCTP_PARAM_SUPPORTED_EXT:
-		if (!sctp_verify_ext_param(net, param))
+		if (!sctp_verify_ext_param(net, ep, param))
 			return SCTP_IERROR_ABORT;
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (net->sctp.addip_enable)
+		if (ep->asconf_enable)
 			break;
 		goto fallthrough;
 
@@ -2605,7 +2605,7 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_SET_PRIMARY:
-		if (!net->sctp.addip_enable)
+		if (!ep->asconf_enable)
 			goto fall_through;
 
 		addr_param = param.v + sizeof(struct sctp_addip_param);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 12503e16fa96..559793f7f72a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -524,7 +524,6 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 				   struct sockaddr	*addrs,
 				   int 			addrcnt)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock		*sp;
 	struct sctp_endpoint		*ep;
 	struct sctp_association		*asoc;
@@ -539,12 +538,12 @@ static int sctp_send_asconf_add_ip(struct sock		*sk,
 	int 				i;
 	int 				retval = 0;
 
-	if (!net->sctp.addip_enable)
-		return retval;
-
 	sp = sctp_sk(sk);
 	ep = sp->ep;
 
+	if (!ep->asconf_enable)
+		return retval;
+
 	pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
 		 __func__, sk, addrs, addrcnt);
 
@@ -727,7 +726,6 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 				   struct sockaddr	*addrs,
 				   int			addrcnt)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_endpoint	*ep;
 	struct sctp_association	*asoc;
@@ -743,12 +741,12 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 	int			stored = 0;
 
 	chunk = NULL;
-	if (!net->sctp.addip_enable)
-		return retval;
-
 	sp = sctp_sk(sk);
 	ep = sp->ep;
 
+	if (!ep->asconf_enable)
+		return retval;
+
 	pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
 		 __func__, sk, addrs, addrcnt);
 
@@ -3330,7 +3328,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
 static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
 					     unsigned int optlen)
 {
-	struct net *net = sock_net(sk);
 	struct sctp_sock	*sp;
 	struct sctp_association	*asoc = NULL;
 	struct sctp_setpeerprim	prim;
@@ -3340,7 +3337,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 
 	sp = sctp_sk(sk);
 
-	if (!net->sctp.addip_enable)
+	if (!sp->ep->asconf_enable)
 		return -EPERM;
 
 	if (optlen != sizeof(struct sctp_setpeerprim))
-- 
cgit v1.2.3


From df2c71ffdfae58961981d7cbcccea93688fc4e96 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 19 Aug 2019 22:02:46 +0800
Subject: sctp: add SCTP_ASCONF_SUPPORTED sockopt

SCTP_ASCONF_SUPPORTED sockopt is used to set enpoint's asconf
flag. With this feature, each endpoint will have its own flag
for its future asoc's asconf_capable, instead of netns asconf
flag.

Note that when both ep's asconf_enable and auth_enable are
enabled, SCTP_CID_ASCONF and SCTP_CID_ASCONF_ACK should be
added into auth_chunk_list.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |  1 +
 net/sctp/socket.c         | 82 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index b8f2c4d56532..9b9b82debc0d 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -134,6 +134,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_INTERLEAVING_SUPPORTED	125
 #define SCTP_SENDMSG_CONNECT	126
 #define SCTP_EVENT	127
+#define SCTP_ASCONF_SUPPORTED	128
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 559793f7f72a..b21a70708405 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4496,6 +4496,42 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval,
 	return retval;
 }
 
+static int sctp_setsockopt_asconf_supported(struct sock *sk,
+					    char __user *optval,
+					    unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	struct sctp_endpoint *ep;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	ep = sctp_sk(sk)->ep;
+	ep->asconf_enable = !!params.assoc_value;
+
+	if (ep->asconf_enable && ep->auth_enable) {
+		sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+		sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+	}
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4696,6 +4732,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_EVENT:
 		retval = sctp_setsockopt_event(sk, optval, optlen);
 		break;
+	case SCTP_ASCONF_SUPPORTED:
+		retval = sctp_setsockopt_asconf_supported(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7675,6 +7714,45 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval,
 	return 0;
 }
 
+static int sctp_getsockopt_asconf_supported(struct sock *sk, int len,
+					    char __user *optval,
+					    int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.asconf_capable
+				  : sctp_sk(sk)->ep->asconf_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -7876,6 +7954,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 	case SCTP_EVENT:
 		retval = sctp_getsockopt_event(sk, len, optval, optlen);
 		break;
+	case SCTP_ASCONF_SUPPORTED:
+		retval = sctp_getsockopt_asconf_supported(sk, len, optval,
+							  optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From 03f961270f4256fe9f47b94aea889bd26877216b Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 19 Aug 2019 22:02:48 +0800
Subject: sctp: add sctp_auth_init and sctp_auth_free

This patch is to factor out sctp_auth_init and sctp_auth_free
functions, and sctp_auth_init will also be used in the next
patch for SCTP_AUTH_SUPPORTED sockopt.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/auth.h |  2 ++
 net/sctp/auth.c         | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/sctp/endpointola.c  | 61 ++++---------------------------------------
 3 files changed, 76 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/auth.h b/include/net/sctp/auth.h
index caaae2de9099..d4b3b2dcd15b 100644
--- a/include/net/sctp/auth.h
+++ b/include/net/sctp/auth.h
@@ -107,5 +107,7 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep,
 			 struct sctp_association *asoc, __u16 key_id);
 int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
 			   struct sctp_association *asoc, __u16 key_id);
+int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp);
+void sctp_auth_free(struct sctp_endpoint *ep);
 
 #endif
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 61b00904d830..4278764d82b8 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -1007,3 +1007,72 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep,
 
 	return 0;
 }
+
+int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp)
+{
+	int err = -ENOMEM;
+
+	/* Allocate space for HMACS and CHUNKS authentication
+	 * variables.  There are arrays that we encode directly
+	 * into parameters to make the rest of the operations easier.
+	 */
+	if (!ep->auth_hmacs_list) {
+		struct sctp_hmac_algo_param *auth_hmacs;
+
+		auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
+						 SCTP_AUTH_NUM_HMACS), gfp);
+		if (!auth_hmacs)
+			goto nomem;
+		/* Initialize the HMACS parameter.
+		 * SCTP-AUTH: Section 3.3
+		 *    Every endpoint supporting SCTP chunk authentication MUST
+		 *    support the HMAC based on the SHA-1 algorithm.
+		 */
+		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
+		auth_hmacs->param_hdr.length =
+				htons(sizeof(struct sctp_paramhdr) + 2);
+		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
+		ep->auth_hmacs_list = auth_hmacs;
+	}
+
+	if (!ep->auth_chunk_list) {
+		struct sctp_chunks_param *auth_chunks;
+
+		auth_chunks = kzalloc(sizeof(*auth_chunks) +
+				      SCTP_NUM_CHUNK_TYPES, gfp);
+		if (!auth_chunks)
+			goto nomem;
+		/* Initialize the CHUNKS parameter */
+		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
+		auth_chunks->param_hdr.length =
+				htons(sizeof(struct sctp_paramhdr));
+		ep->auth_chunk_list = auth_chunks;
+	}
+
+	/* Allocate and initialize transorms arrays for supported
+	 * HMACs.
+	 */
+	err = sctp_auth_init_hmacs(ep, gfp);
+	if (err)
+		goto nomem;
+
+	return 0;
+
+nomem:
+	/* Free all allocations */
+	kfree(ep->auth_hmacs_list);
+	kfree(ep->auth_chunk_list);
+	ep->auth_hmacs_list = NULL;
+	ep->auth_chunk_list = NULL;
+	return err;
+}
+
+void sctp_auth_free(struct sctp_endpoint *ep)
+{
+	kfree(ep->auth_hmacs_list);
+	kfree(ep->auth_chunk_list);
+	ep->auth_hmacs_list = NULL;
+	ep->auth_chunk_list = NULL;
+	sctp_auth_destroy_hmacs(ep->auth_hmacs);
+	ep->auth_hmacs = NULL;
+}
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 38b8d7cf8557..75a407df32c5 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -43,10 +43,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 						gfp_t gfp)
 {
 	struct net *net = sock_net(sk);
-	struct sctp_hmac_algo_param *auth_hmacs = NULL;
-	struct sctp_chunks_param *auth_chunks = NULL;
 	struct sctp_shared_key *null_key;
-	int err;
 
 	ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
 	if (!ep->digest)
@@ -55,51 +52,12 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	ep->asconf_enable = net->sctp.addip_enable;
 	ep->auth_enable = net->sctp.auth_enable;
 	if (ep->auth_enable) {
-		/* Allocate space for HMACS and CHUNKS authentication
-		 * variables.  There are arrays that we encode directly
-		 * into parameters to make the rest of the operations easier.
-		 */
-		auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids,
-						 SCTP_AUTH_NUM_HMACS), gfp);
-		if (!auth_hmacs)
-			goto nomem;
-
-		auth_chunks = kzalloc(sizeof(*auth_chunks) +
-				      SCTP_NUM_CHUNK_TYPES, gfp);
-		if (!auth_chunks)
+		if (sctp_auth_init(ep, gfp))
 			goto nomem;
-
-		/* Initialize the HMACS parameter.
-		 * SCTP-AUTH: Section 3.3
-		 *    Every endpoint supporting SCTP chunk authentication MUST
-		 *    support the HMAC based on the SHA-1 algorithm.
-		 */
-		auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO;
-		auth_hmacs->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr) + 2);
-		auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1);
-
-		/* Initialize the CHUNKS parameter */
-		auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS;
-		auth_chunks->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr));
-
-		/* If the Add-IP functionality is enabled, we must
-		 * authenticate, ASCONF and ASCONF-ACK chunks
-		 */
 		if (ep->asconf_enable) {
-			auth_chunks->chunks[0] = SCTP_CID_ASCONF;
-			auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
-			auth_chunks->param_hdr.length =
-					htons(sizeof(struct sctp_paramhdr) + 2);
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
 		}
-
-		/* Allocate and initialize transorms arrays for supported
-		 * HMACs.
-		 */
-		err = sctp_auth_init_hmacs(ep, gfp);
-		if (err)
-			goto nomem;
 	}
 
 	/* Initialize the base structure. */
@@ -146,8 +104,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	/* Add the null key to the endpoint shared keys list and
 	 * set the hmcas and chunks pointers.
 	 */
-	ep->auth_hmacs_list = auth_hmacs;
-	ep->auth_chunk_list = auth_chunks;
 	ep->prsctp_enable = net->sctp.prsctp_enable;
 	ep->reconf_enable = net->sctp.reconf_enable;
 
@@ -158,11 +114,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	return ep;
 
 nomem_shkey:
-	sctp_auth_destroy_hmacs(ep->auth_hmacs);
+	sctp_auth_free(ep);
 nomem:
-	/* Free all allocations */
-	kfree(auth_hmacs);
-	kfree(auth_chunks);
 	kfree(ep->digest);
 	return NULL;
 
@@ -245,11 +198,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 	 * chunks and hmacs arrays that were allocated
 	 */
 	sctp_auth_destroy_keys(&ep->endpoint_shared_keys);
-	kfree(ep->auth_hmacs_list);
-	kfree(ep->auth_chunk_list);
-
-	/* AUTH - Free any allocated HMAC transform containers */
-	sctp_auth_destroy_hmacs(ep->auth_hmacs);
+	sctp_auth_free(ep);
 
 	/* Cleanup. */
 	sctp_inq_free(&ep->base.inqueue);
-- 
cgit v1.2.3


From 56dd525abd56f7acd7b44a52935726e3ada4916c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 19 Aug 2019 22:02:49 +0800
Subject: sctp: add SCTP_AUTH_SUPPORTED sockopt

SCTP_AUTH_SUPPORTED sockopt is used to set enpoint's auth
flag. With this feature, each endpoint will have its own
flag for its future asoc's auth_capable, instead of netns
auth flag.

Note that when both ep's auth_enable is enabled, endpoint
auth related data should be initialized. If asconf_enable
is also set, SCTP_CID_ASCONF/SCTP_CID_ASCONF_ACK should
be added into auth_chunk_list.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |  1 +
 net/sctp/socket.c         | 86 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 9b9b82debc0d..62527aca8477 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -135,6 +135,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_SENDMSG_CONNECT	126
 #define SCTP_EVENT	127
 #define SCTP_ASCONF_SUPPORTED	128
+#define SCTP_AUTH_SUPPORTED	129
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dcde8d92c568..82bc25223cfe 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4520,6 +4520,46 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_auth_supported(struct sock *sk,
+					  char __user *optval,
+					  unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	struct sctp_endpoint *ep;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	ep = sctp_sk(sk)->ep;
+	if (params.assoc_value) {
+		retval = sctp_auth_init(ep, GFP_KERNEL);
+		if (retval)
+			goto out;
+		if (ep->asconf_enable) {
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF);
+			sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK);
+		}
+	}
+
+	ep->auth_enable = !!params.assoc_value;
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4723,6 +4763,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_ASCONF_SUPPORTED:
 		retval = sctp_setsockopt_asconf_supported(sk, optval, optlen);
 		break;
+	case SCTP_AUTH_SUPPORTED:
+		retval = sctp_setsockopt_auth_supported(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7746,6 +7789,45 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_auth_supported(struct sock *sk, int len,
+					  char __user *optval,
+					  int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.auth_capable
+				  : sctp_sk(sk)->ep->auth_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -7951,6 +8033,10 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_asconf_supported(sk, len, optval,
 							  optlen);
 		break;
+	case SCTP_AUTH_SUPPORTED:
+		retval = sctp_getsockopt_auth_supported(sk, len, optval,
+							optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From e6b1db98cf4d54d9ea59cfcc195f70dc946fdd38 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Mon, 19 Aug 2019 17:17:37 -0700
Subject: security: Support early LSMs

The lockdown module is intended to allow for kernels to be locked down
early in boot - sufficiently early that we don't have the ability to
kmalloc() yet. Add support for early initialisation of some LSMs, and
then add them to the list of names when we do full initialisation later.
Early LSMs are initialised in link order and cannot be overridden via
boot parameters, and cannot make use of kmalloc() (since the allocator
isn't initialised yet).

(Fixed by Stephen Rothwell to include a stub to fix builds when
!CONFIG_SECURITY)

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/asm-generic/vmlinux.lds.h |  8 ++++++-
 include/linux/lsm_hooks.h         |  6 +++++
 include/linux/security.h          |  6 +++++
 init/main.c                       |  1 +
 security/security.c               | 50 ++++++++++++++++++++++++++++++++-------
 5 files changed, 62 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 088987e9a3ea..c1807d14daa3 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -208,8 +208,13 @@
 			__start_lsm_info = .;				\
 			KEEP(*(.lsm_info.init))				\
 			__end_lsm_info = .;
+#define EARLY_LSM_TABLE()	. = ALIGN(8);				\
+			__start_early_lsm_info = .;			\
+			KEEP(*(.early_lsm_info.init))			\
+			__end_early_lsm_info = .;
 #else
 #define LSM_TABLE()
+#define EARLY_LSM_TABLE()
 #endif
 
 #define ___OF_TABLE(cfg, name)	_OF_TABLE_##cfg(name)
@@ -609,7 +614,8 @@
 	ACPI_PROBE_TABLE(irqchip)					\
 	ACPI_PROBE_TABLE(timer)						\
 	EARLYCON_TABLE()						\
-	LSM_TABLE()
+	LSM_TABLE()							\
+	EARLY_LSM_TABLE()
 
 #define INIT_TEXT							\
 	*(.init.text .init.text.*)					\
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 47f58cfb6a19..b02e8bb6654d 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -2104,12 +2104,18 @@ struct lsm_info {
 };
 
 extern struct lsm_info __start_lsm_info[], __end_lsm_info[];
+extern struct lsm_info __start_early_lsm_info[], __end_early_lsm_info[];
 
 #define DEFINE_LSM(lsm)							\
 	static struct lsm_info __lsm_##lsm				\
 		__used __section(.lsm_info.init)			\
 		__aligned(sizeof(unsigned long))
 
+#define DEFINE_EARLY_LSM(lsm)						\
+	static struct lsm_info __early_lsm_##lsm			\
+		__used __section(.early_lsm_info.init)			\
+		__aligned(sizeof(unsigned long))
+
 #ifdef CONFIG_SECURITY_SELINUX_DISABLE
 /*
  * Assuring the safety of deleting a security module is up to
diff --git a/include/linux/security.h b/include/linux/security.h
index 659071c2e57c..c5dd90981c98 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -195,6 +195,7 @@ int unregister_lsm_notifier(struct notifier_block *nb);
 
 /* prototypes */
 extern int security_init(void);
+extern int early_security_init(void);
 
 /* Security operations */
 int security_binder_set_context_mgr(struct task_struct *mgr);
@@ -423,6 +424,11 @@ static inline int security_init(void)
 	return 0;
 }
 
+static inline int early_security_init(void)
+{
+	return 0;
+}
+
 static inline int security_binder_set_context_mgr(struct task_struct *mgr)
 {
 	return 0;
diff --git a/init/main.c b/init/main.c
index 66a196c5e4c3..598effd29a0a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -569,6 +569,7 @@ asmlinkage __visible void __init start_kernel(void)
 	boot_cpu_init();
 	page_address_init();
 	pr_notice("%s", linux_banner);
+	early_security_init();
 	setup_arch(&command_line);
 	mm_init_cpumask(&init_mm);
 	setup_command_line(command_line);
diff --git a/security/security.c b/security/security.c
index f493db0bf62a..ef4a0111c8b4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -33,6 +33,7 @@
 
 /* How many LSMs were built into the kernel? */
 #define LSM_COUNT (__end_lsm_info - __start_lsm_info)
+#define EARLY_LSM_COUNT (__end_early_lsm_info - __start_early_lsm_info)
 
 struct security_hook_heads security_hook_heads __lsm_ro_after_init;
 static ATOMIC_NOTIFIER_HEAD(lsm_notifier_chain);
@@ -277,6 +278,8 @@ static void __init ordered_lsm_parse(const char *order, const char *origin)
 static void __init lsm_early_cred(struct cred *cred);
 static void __init lsm_early_task(struct task_struct *task);
 
+static int lsm_append(const char *new, char **result);
+
 static void __init ordered_lsm_init(void)
 {
 	struct lsm_info **lsm;
@@ -323,6 +326,26 @@ static void __init ordered_lsm_init(void)
 	kfree(ordered_lsms);
 }
 
+int __init early_security_init(void)
+{
+	int i;
+	struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+	struct lsm_info *lsm;
+
+	for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
+	     i++)
+		INIT_HLIST_HEAD(&list[i]);
+
+	for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+		if (!lsm->enabled)
+			lsm->enabled = &lsm_enabled_true;
+		prepare_lsm(lsm);
+		initialize_lsm(lsm);
+	}
+
+	return 0;
+}
+
 /**
  * security_init - initializes the security framework
  *
@@ -330,14 +353,18 @@ static void __init ordered_lsm_init(void)
  */
 int __init security_init(void)
 {
-	int i;
-	struct hlist_head *list = (struct hlist_head *) &security_hook_heads;
+	struct lsm_info *lsm;
 
 	pr_info("Security Framework initializing\n");
 
-	for (i = 0; i < sizeof(security_hook_heads) / sizeof(struct hlist_head);
-	     i++)
-		INIT_HLIST_HEAD(&list[i]);
+	/*
+	 * Append the names of the early LSM modules now that kmalloc() is
+	 * available
+	 */
+	for (lsm = __start_early_lsm_info; lsm < __end_early_lsm_info; lsm++) {
+		if (lsm->enabled)
+			lsm_append(lsm->name, &lsm_names);
+	}
 
 	/* Load LSMs in specified order. */
 	ordered_lsm_init();
@@ -384,7 +411,7 @@ static bool match_last_lsm(const char *list, const char *lsm)
 	return !strcmp(last, lsm);
 }
 
-static int lsm_append(char *new, char **result)
+static int lsm_append(const char *new, char **result)
 {
 	char *cp;
 
@@ -422,8 +449,15 @@ void __init security_add_hooks(struct security_hook_list *hooks, int count,
 		hooks[i].lsm = lsm;
 		hlist_add_tail_rcu(&hooks[i].list, hooks[i].head);
 	}
-	if (lsm_append(lsm, &lsm_names) < 0)
-		panic("%s - Cannot get early memory.\n", __func__);
+
+	/*
+	 * Don't try to append during early_security_init(), we'll come back
+	 * and fix this up afterwards.
+	 */
+	if (slab_is_available()) {
+		if (lsm_append(lsm, &lsm_names) < 0)
+			panic("%s - Cannot get early memory.\n", __func__);
+	}
 }
 
 int call_lsm_notifier(enum lsm_event event, void *data)
-- 
cgit v1.2.3


From 9e47d31d6a57b5babaca36d42b0d11b6db6019b7 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Mon, 19 Aug 2019 17:17:38 -0700
Subject: security: Add a "locked down" LSM hook

Add a mechanism to allow LSMs to make a policy decision around whether
kernel functionality that would allow tampering with or examining the
runtime state of the kernel should be permitted.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_hooks.h |  7 +++++++
 include/linux/security.h  | 32 ++++++++++++++++++++++++++++++++
 security/security.c       |  6 ++++++
 3 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index b02e8bb6654d..2f4ba9062fb8 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1446,6 +1446,11 @@
  * @bpf_prog_free_security:
  *	Clean up the security information stored inside bpf prog.
  *
+ * @locked_down
+ *     Determine whether a kernel feature that potentially enables arbitrary
+ *     code execution in kernel space should be permitted.
+ *
+ *     @what: kernel feature being accessed
  */
 union security_list_options {
 	int (*binder_set_context_mgr)(struct task_struct *mgr);
@@ -1807,6 +1812,7 @@ union security_list_options {
 	int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
 	void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
 #endif /* CONFIG_BPF_SYSCALL */
+	int (*locked_down)(enum lockdown_reason what);
 };
 
 struct security_hook_heads {
@@ -2046,6 +2052,7 @@ struct security_hook_heads {
 	struct hlist_head bpf_prog_alloc_security;
 	struct hlist_head bpf_prog_free_security;
 #endif /* CONFIG_BPF_SYSCALL */
+	struct hlist_head locked_down;
 } __randomize_layout;
 
 /*
diff --git a/include/linux/security.h b/include/linux/security.h
index c5dd90981c98..04cf48fab15d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -77,6 +77,33 @@ enum lsm_event {
 	LSM_POLICY_CHANGE,
 };
 
+/*
+ * These are reasons that can be passed to the security_locked_down()
+ * LSM hook. Lockdown reasons that protect kernel integrity (ie, the
+ * ability for userland to modify kernel code) are placed before
+ * LOCKDOWN_INTEGRITY_MAX.  Lockdown reasons that protect kernel
+ * confidentiality (ie, the ability for userland to extract
+ * information from the running kernel that would otherwise be
+ * restricted) are placed before LOCKDOWN_CONFIDENTIALITY_MAX.
+ *
+ * LSM authors should note that the semantics of any given lockdown
+ * reason are not guaranteed to be stable - the same reason may block
+ * one set of features in one kernel release, and a slightly different
+ * set of features in a later kernel release. LSMs that seek to expose
+ * lockdown policy at any level of granularity other than "none",
+ * "integrity" or "confidentiality" are responsible for either
+ * ensuring that they expose a consistent level of functionality to
+ * userland, or ensuring that userland is aware that this is
+ * potentially a moving target. It is easy to misuse this information
+ * in a way that could break userspace. Please be careful not to do
+ * so.
+ */
+enum lockdown_reason {
+	LOCKDOWN_NONE,
+	LOCKDOWN_INTEGRITY_MAX,
+	LOCKDOWN_CONFIDENTIALITY_MAX,
+};
+
 /* These functions are in security/commoncap.c */
 extern int cap_capable(const struct cred *cred, struct user_namespace *ns,
 		       int cap, unsigned int opts);
@@ -393,6 +420,7 @@ void security_inode_invalidate_secctx(struct inode *inode);
 int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
 int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
 int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
+int security_locked_down(enum lockdown_reason what);
 #else /* CONFIG_SECURITY */
 
 static inline int call_lsm_notifier(enum lsm_event event, void *data)
@@ -1210,6 +1238,10 @@ static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32
 {
 	return -EOPNOTSUPP;
 }
+static inline int security_locked_down(enum lockdown_reason what)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/security/security.c b/security/security.c
index ef4a0111c8b4..7fc373486d7a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2389,3 +2389,9 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux)
 	call_void_hook(bpf_prog_free_security, aux);
 }
 #endif /* CONFIG_BPF_SYSCALL */
+
+int security_locked_down(enum lockdown_reason what)
+{
+	return call_int_hook(locked_down, 0, what);
+}
+EXPORT_SYMBOL(security_locked_down);
-- 
cgit v1.2.3


From 000d388ed3bbed745f366ce71b2bb7c2ee70f449 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Mon, 19 Aug 2019 17:17:39 -0700
Subject: security: Add a static lockdown policy LSM

While existing LSMs can be extended to handle lockdown policy,
distributions generally want to be able to apply a straightforward
static policy. This patch adds a simple LSM that can be configured to
reject either integrity or all lockdown queries, and can be configured
at runtime (through securityfs), boot time (via a kernel parameter) or
build time (via a kconfig option). Based on initial code by David
Howells.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/admin-guide/kernel-parameters.txt |   9 ++
 include/linux/security.h                        |   3 +
 security/Kconfig                                |  11 +-
 security/Makefile                               |   2 +
 security/lockdown/Kconfig                       |  46 +++++++
 security/lockdown/Makefile                      |   1 +
 security/lockdown/lockdown.c                    | 169 ++++++++++++++++++++++++
 7 files changed, 236 insertions(+), 5 deletions(-)
 create mode 100644 security/lockdown/Kconfig
 create mode 100644 security/lockdown/Makefile
 create mode 100644 security/lockdown/lockdown.c

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 138f6664b2e2..0f28350f1ee6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2244,6 +2244,15 @@
 	lockd.nlm_udpport=M	[NFS] Assign UDP port.
 			Format: <integer>
 
+	lockdown=	[SECURITY]
+			{ integrity | confidentiality }
+			Enable the kernel lockdown feature. If set to
+			integrity, kernel features that allow userland to
+			modify the running kernel are disabled. If set to
+			confidentiality, kernel features that allow userland
+			to extract confidential information from the kernel
+			are also disabled.
+
 	locktorture.nreaders_stress= [KNL]
 			Set the number of locking read-acquisition kthreads.
 			Defaults to being automatically set based on the
diff --git a/include/linux/security.h b/include/linux/security.h
index 04cf48fab15d..74787335d9ce 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -97,6 +97,9 @@ enum lsm_event {
  * potentially a moving target. It is easy to misuse this information
  * in a way that could break userspace. Please be careful not to do
  * so.
+ *
+ * If you add to this, remember to extend lockdown_reasons in
+ * security/lockdown/lockdown.c.
  */
 enum lockdown_reason {
 	LOCKDOWN_NONE,
diff --git a/security/Kconfig b/security/Kconfig
index 466cc1f8ffed..7c62d446e209 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -237,6 +237,7 @@ source "security/apparmor/Kconfig"
 source "security/loadpin/Kconfig"
 source "security/yama/Kconfig"
 source "security/safesetid/Kconfig"
+source "security/lockdown/Kconfig"
 
 source "security/integrity/Kconfig"
 
@@ -276,11 +277,11 @@ endchoice
 
 config LSM
 	string "Ordered list of enabled LSMs"
-	default "yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
-	default "yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
-	default "yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
-	default "yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
-	default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
+	default "lockdown,yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK
+	default "lockdown,yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR
+	default "lockdown,yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO
+	default "lockdown,yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC
+	default "lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor"
 	help
 	  A comma-separated list of LSMs, in initialization order.
 	  Any LSMs left off this list will be ignored. This can be
diff --git a/security/Makefile b/security/Makefile
index c598b904938f..be1dd9d2cb2f 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -11,6 +11,7 @@ subdir-$(CONFIG_SECURITY_APPARMOR)	+= apparmor
 subdir-$(CONFIG_SECURITY_YAMA)		+= yama
 subdir-$(CONFIG_SECURITY_LOADPIN)	+= loadpin
 subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
+subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown
 
 # always enable default capabilities
 obj-y					+= commoncap.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_SECURITY_APPARMOR)		+= apparmor/
 obj-$(CONFIG_SECURITY_YAMA)		+= yama/
 obj-$(CONFIG_SECURITY_LOADPIN)		+= loadpin/
 obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)	+= lockdown/
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
 
 # Object integrity file lists
diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
new file mode 100644
index 000000000000..7a1d213227a4
--- /dev/null
+++ b/security/lockdown/Kconfig
@@ -0,0 +1,46 @@
+config SECURITY_LOCKDOWN_LSM
+	bool "Basic module for enforcing kernel lockdown"
+	depends on SECURITY
+	help
+	  Build support for an LSM that enforces a coarse kernel lockdown
+	  behaviour.
+
+config SECURITY_LOCKDOWN_LSM_EARLY
+	bool "Enable lockdown LSM early in init"
+	depends on SECURITY_LOCKDOWN_LSM
+	help
+	  Enable the lockdown LSM early in boot. This is necessary in order
+	  to ensure that lockdown enforcement can be carried out on kernel
+	  boot parameters that are otherwise parsed before the security
+	  subsystem is fully initialised. If enabled, lockdown will
+	  unconditionally be called before any other LSMs.
+
+choice
+	prompt "Kernel default lockdown mode"
+	default LOCK_DOWN_KERNEL_FORCE_NONE
+	depends on SECURITY_LOCKDOWN_LSM
+	help
+	  The kernel can be configured to default to differing levels of
+	  lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_NONE
+	bool "None"
+	help
+	  No lockdown functionality is enabled by default. Lockdown may be
+	  enabled via the kernel commandline or /sys/kernel/security/lockdown.
+
+config LOCK_DOWN_KERNEL_FORCE_INTEGRITY
+	bool "Integrity"
+	help
+	 The kernel runs in integrity mode by default. Features that allow
+	 the kernel to be modified at runtime are disabled.
+
+config LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY
+	bool "Confidentiality"
+	help
+	 The kernel runs in confidentiality mode by default. Features that
+	 allow the kernel to be modified at runtime or that permit userland
+	 code to read confidential material held inside the kernel are
+	 disabled.
+
+endchoice
diff --git a/security/lockdown/Makefile b/security/lockdown/Makefile
new file mode 100644
index 000000000000..e3634b9017e7
--- /dev/null
+++ b/security/lockdown/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown.o
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
new file mode 100644
index 000000000000..7172ad75496b
--- /dev/null
+++ b/security/lockdown/lockdown.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Lock down the kernel
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/security.h>
+#include <linux/export.h>
+#include <linux/lsm_hooks.h>
+
+static enum lockdown_reason kernel_locked_down;
+
+static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
+	[LOCKDOWN_NONE] = "none",
+	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
+	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
+};
+
+static enum lockdown_reason lockdown_levels[] = {LOCKDOWN_NONE,
+						 LOCKDOWN_INTEGRITY_MAX,
+						 LOCKDOWN_CONFIDENTIALITY_MAX};
+
+/*
+ * Put the kernel into lock-down mode.
+ */
+static int lock_kernel_down(const char *where, enum lockdown_reason level)
+{
+	if (kernel_locked_down >= level)
+		return -EPERM;
+
+	kernel_locked_down = level;
+	pr_notice("Kernel is locked down from %s; see man kernel_lockdown.7\n",
+		  where);
+	return 0;
+}
+
+static int __init lockdown_param(char *level)
+{
+	if (!level)
+		return -EINVAL;
+
+	if (strcmp(level, "integrity") == 0)
+		lock_kernel_down("command line", LOCKDOWN_INTEGRITY_MAX);
+	else if (strcmp(level, "confidentiality") == 0)
+		lock_kernel_down("command line", LOCKDOWN_CONFIDENTIALITY_MAX);
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+early_param("lockdown", lockdown_param);
+
+/**
+ * lockdown_is_locked_down - Find out if the kernel is locked down
+ * @what: Tag to use in notice generated if lockdown is in effect
+ */
+static int lockdown_is_locked_down(enum lockdown_reason what)
+{
+	if (kernel_locked_down >= what) {
+		if (lockdown_reasons[what])
+			pr_notice("Lockdown: %s is restricted; see man kernel_lockdown.7\n",
+				  lockdown_reasons[what]);
+		return -EPERM;
+	}
+
+	return 0;
+}
+
+static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = {
+	LSM_HOOK_INIT(locked_down, lockdown_is_locked_down),
+};
+
+static int __init lockdown_lsm_init(void)
+{
+#if defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY)
+	lock_kernel_down("Kernel configuration", LOCKDOWN_INTEGRITY_MAX);
+#elif defined(CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY)
+	lock_kernel_down("Kernel configuration", LOCKDOWN_CONFIDENTIALITY_MAX);
+#endif
+	security_add_hooks(lockdown_hooks, ARRAY_SIZE(lockdown_hooks),
+			   "lockdown");
+	return 0;
+}
+
+static ssize_t lockdown_read(struct file *filp, char __user *buf, size_t count,
+			     loff_t *ppos)
+{
+	char temp[80];
+	int i, offset = 0;
+
+	for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+		enum lockdown_reason level = lockdown_levels[i];
+
+		if (lockdown_reasons[level]) {
+			const char *label = lockdown_reasons[level];
+
+			if (kernel_locked_down == level)
+				offset += sprintf(temp+offset, "[%s] ", label);
+			else
+				offset += sprintf(temp+offset, "%s ", label);
+		}
+	}
+
+	/* Convert the last space to a newline if needed. */
+	if (offset > 0)
+		temp[offset-1] = '\n';
+
+	return simple_read_from_buffer(buf, count, ppos, temp, strlen(temp));
+}
+
+static ssize_t lockdown_write(struct file *file, const char __user *buf,
+			      size_t n, loff_t *ppos)
+{
+	char *state;
+	int i, len, err = -EINVAL;
+
+	state = memdup_user_nul(buf, n);
+	if (IS_ERR(state))
+		return PTR_ERR(state);
+
+	len = strlen(state);
+	if (len && state[len-1] == '\n') {
+		state[len-1] = '\0';
+		len--;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lockdown_levels); i++) {
+		enum lockdown_reason level = lockdown_levels[i];
+		const char *label = lockdown_reasons[level];
+
+		if (label && !strcmp(state, label))
+			err = lock_kernel_down("securityfs", level);
+	}
+
+	kfree(state);
+	return err ? err : n;
+}
+
+static const struct file_operations lockdown_ops = {
+	.read  = lockdown_read,
+	.write = lockdown_write,
+};
+
+static int __init lockdown_secfs_init(void)
+{
+	struct dentry *dentry;
+
+	dentry = securityfs_create_file("lockdown", 0600, NULL, NULL,
+					&lockdown_ops);
+	return PTR_ERR_OR_ZERO(dentry);
+}
+
+core_initcall(lockdown_secfs_init);
+
+#ifdef CONFIG_SECURITY_LOCKDOWN_LSM_EARLY
+DEFINE_EARLY_LSM(lockdown) = {
+#else
+DEFINE_LSM(lockdown) = {
+#endif
+	.name = "lockdown",
+	.init = lockdown_lsm_init,
+};
-- 
cgit v1.2.3


From 49fcf732bdae0550721ef73af7c45109ce26b2a9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:40 -0700
Subject: lockdown: Enforce module signatures if the kernel is locked down

If the kernel is locked down, require that all modules have valid
signatures that we can verify.

I have adjusted the errors generated:

 (1) If there's no signature (ENODATA) or we can't check it (ENOPKG,
     ENOKEY), then:

     (a) If signatures are enforced then EKEYREJECTED is returned.

     (b) If there's no signature or we can't check it, but the kernel is
	 locked down then EPERM is returned (this is then consistent with
	 other lockdown cases).

 (2) If the signature is unparseable (EBADMSG, EINVAL), the signature fails
     the check (EKEYREJECTED) or a system error occurs (eg. ENOMEM), we
     return the error we got.

Note that the X.509 code doesn't check for key expiry as the RTC might not
be valid or might not have been transferred to the kernel's clock yet.

 [Modified by Matthew Garrett to remove the IMA integration. This will
  be replaced with integration with the IMA architecture policy
  patchset.]

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <matthewgarrett@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Jessica Yu <jeyu@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     |  1 +
 init/Kconfig                 |  5 +++++
 kernel/module.c              | 37 ++++++++++++++++++++++++++++++-------
 security/lockdown/Kconfig    |  1 +
 security/lockdown/lockdown.c |  1 +
 5 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 74787335d9ce..9e8abb60a99f 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -103,6 +103,7 @@ enum lsm_event {
  */
 enum lockdown_reason {
 	LOCKDOWN_NONE,
+	LOCKDOWN_MODULE_SIGNATURE,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/init/Kconfig b/init/Kconfig
index 0e2344389501..e6069368f278 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1939,6 +1939,11 @@ config MODULE_SIG
 	  kernel build dependency so that the signing tool can use its crypto
 	  library.
 
+	  You should enable this option if you wish to use either
+	  CONFIG_SECURITY_LOCKDOWN_LSM or lockdown functionality imposed via
+	  another LSM - otherwise unsigned modules will be loadable regardless
+	  of the lockdown policy.
+
 	  !!!WARNING!!!  If you enable this option, you MUST make sure that the
 	  module DOES NOT get stripped after being signed.  This includes the
 	  debuginfo strip done by some packagers (such as rpmbuild) and
diff --git a/kernel/module.c b/kernel/module.c
index 80c7c09584cf..2206c08a5e10 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2753,8 +2753,9 @@ static inline void kmemleak_load_module(const struct module *mod,
 #ifdef CONFIG_MODULE_SIG
 static int module_sig_check(struct load_info *info, int flags)
 {
-	int err = -ENOKEY;
+	int err = -ENODATA;
 	const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
+	const char *reason;
 	const void *mod = info->hdr;
 
 	/*
@@ -2769,16 +2770,38 @@ static int module_sig_check(struct load_info *info, int flags)
 		err = mod_verify_sig(mod, info);
 	}
 
-	if (!err) {
+	switch (err) {
+	case 0:
 		info->sig_ok = true;
 		return 0;
-	}
 
-	/* Not having a signature is only an error if we're strict. */
-	if (err == -ENOKEY && !is_module_sig_enforced())
-		err = 0;
+		/* We don't permit modules to be loaded into trusted kernels
+		 * without a valid signature on them, but if we're not
+		 * enforcing, certain errors are non-fatal.
+		 */
+	case -ENODATA:
+		reason = "Loading of unsigned module";
+		goto decide;
+	case -ENOPKG:
+		reason = "Loading of module with unsupported crypto";
+		goto decide;
+	case -ENOKEY:
+		reason = "Loading of module with unavailable key";
+	decide:
+		if (is_module_sig_enforced()) {
+			pr_notice("%s is rejected\n", reason);
+			return -EKEYREJECTED;
+		}
 
-	return err;
+		return security_locked_down(LOCKDOWN_MODULE_SIGNATURE);
+
+		/* All other errors are fatal, including nomem, unparseable
+		 * signatures and signature check failures - even if signatures
+		 * aren't required.
+		 */
+	default:
+		return err;
+	}
 }
 #else /* !CONFIG_MODULE_SIG */
 static int module_sig_check(struct load_info *info, int flags)
diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig
index 7a1d213227a4..e84ddf484010 100644
--- a/security/lockdown/Kconfig
+++ b/security/lockdown/Kconfig
@@ -1,6 +1,7 @@
 config SECURITY_LOCKDOWN_LSM
 	bool "Basic module for enforcing kernel lockdown"
 	depends on SECURITY
+	select MODULE_SIG if MODULES
 	help
 	  Build support for an LSM that enforces a coarse kernel lockdown
 	  behaviour.
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 7172ad75496b..d8e42125a5dd 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -18,6 +18,7 @@ static enum lockdown_reason kernel_locked_down;
 
 static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_NONE] = "none",
+	[LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 9b9d8dda1ed72e9bd560ab0ca93d322a9440510e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:41 -0700
Subject: lockdown: Restrict /dev/{mem,kmem,port} when the kernel is locked
 down

Allowing users to read and write to core kernel memory makes it possible
for the kernel to be subverted, avoiding module loading restrictions, and
also to steal cryptographic information.

Disallow /dev/mem and /dev/kmem from being opened this when the kernel has
been locked down to prevent this.

Also disallow /dev/port from being opened to prevent raw ioport access and
thus DMA from being used to accomplish the same thing.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: x86@kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/char/mem.c           | 7 +++++--
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index b08dc50f9f26..d0148aee1aab 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -29,8 +29,8 @@
 #include <linux/export.h>
 #include <linux/io.h>
 #include <linux/uio.h>
-
 #include <linux/uaccess.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_IA64
 # include <linux/efi.h>
@@ -786,7 +786,10 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
 
 static int open_port(struct inode *inode, struct file *filp)
 {
-	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	return security_locked_down(LOCKDOWN_DEV_MEM);
 }
 
 #define zero_lseek	null_lseek
diff --git a/include/linux/security.h b/include/linux/security.h
index 9e8abb60a99f..e5dd446ef35b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -104,6 +104,7 @@ enum lsm_event {
 enum lockdown_reason {
 	LOCKDOWN_NONE,
 	LOCKDOWN_MODULE_SIGNATURE,
+	LOCKDOWN_DEV_MEM,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index d8e42125a5dd..240ecaa10a1d 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -19,6 +19,7 @@ static enum lockdown_reason kernel_locked_down;
 static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_NONE] = "none",
 	[LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
+	[LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 7d31f4602f8d366072471ca138e4ea7b8edf9be0 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:42 -0700
Subject: kexec_load: Disable at runtime if the kernel is locked down

The kexec_load() syscall permits the loading and execution of arbitrary
code in ring 0, which is something that lock-down is meant to prevent. It
makes sense to disable kexec_load() in this situation.

This does not affect kexec_file_load() syscall which can check for a
signature on the image to be booted.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Dave Young <dyoung@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: kexec@lists.infradead.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     | 1 +
 kernel/kexec.c               | 8 ++++++++
 security/lockdown/lockdown.c | 1 +
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index e5dd446ef35b..b607a8ac97fe 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -105,6 +105,7 @@ enum lockdown_reason {
 	LOCKDOWN_NONE,
 	LOCKDOWN_MODULE_SIGNATURE,
 	LOCKDOWN_DEV_MEM,
+	LOCKDOWN_KEXEC,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 1b018f1a6e0d..bc933c0db9bf 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -205,6 +205,14 @@ static inline int kexec_load_check(unsigned long nr_segments,
 	if (result < 0)
 		return result;
 
+	/*
+	 * kexec can be used to circumvent module loading restrictions, so
+	 * prevent loading in that case
+	 */
+	result = security_locked_down(LOCKDOWN_KEXEC);
+	if (result)
+		return result;
+
 	/*
 	 * Verify we have a legal set of flags
 	 * This leaves us room for future extensions.
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 240ecaa10a1d..aaf30ad351f9 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -20,6 +20,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_NONE] = "none",
 	[LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
 	[LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
+	[LOCKDOWN_KEXEC] = "kexec of unsigned images",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 99d5cadfde2b1acb7650021df5abaa5ec447dd10 Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Mon, 19 Aug 2019 17:17:44 -0700
Subject: kexec_file: split KEXEC_VERIFY_SIG into KEXEC_SIG and KEXEC_SIG_FORCE

This is a preparatory patch for kexec_file_load() lockdown.  A locked down
kernel needs to prevent unsigned kernel images from being loaded with
kexec_file_load().  Currently, the only way to force the signature
verification is compiling with KEXEC_VERIFY_SIG.  This prevents loading
usigned images even when the kernel is not locked down at runtime.

This patch splits KEXEC_VERIFY_SIG into KEXEC_SIG and KEXEC_SIG_FORCE.
Analogous to the MODULE_SIG and MODULE_SIG_FORCE for modules, KEXEC_SIG
turns on the signature verification but allows unsigned images to be
loaded.  KEXEC_SIG_FORCE disallows images without a valid signature.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
cc: kexec@lists.infradead.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/arm64/Kconfig                      |  6 ++--
 arch/s390/Kconfig                       |  2 +-
 arch/s390/configs/debug_defconfig       |  2 +-
 arch/s390/configs/defconfig             |  2 +-
 arch/s390/configs/performance_defconfig |  2 +-
 arch/s390/kernel/kexec_elf.c            |  4 +--
 arch/s390/kernel/kexec_image.c          |  4 +--
 arch/s390/kernel/machine_kexec_file.c   |  4 +--
 arch/x86/Kconfig                        | 20 ++++++++---
 arch/x86/kernel/ima_arch.c              |  4 +--
 crypto/asymmetric_keys/verify_pefile.c  |  4 ++-
 include/linux/kexec.h                   |  4 +--
 kernel/kexec_file.c                     | 60 ++++++++++++++++++++++++++++-----
 security/integrity/ima/Kconfig          |  2 +-
 security/integrity/ima/ima_main.c       |  2 +-
 15 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 697ea0510729..f940500a941b 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -961,7 +961,7 @@ config KEXEC_FILE
 	  for kernel and initramfs as opposed to list of segments as
 	  accepted by previous system call.
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
 	depends on KEXEC_FILE
 	help
@@ -976,13 +976,13 @@ config KEXEC_VERIFY_SIG
 config KEXEC_IMAGE_VERIFY_SIG
 	bool "Enable Image signature verification support"
 	default y
-	depends on KEXEC_VERIFY_SIG
+	depends on KEXEC_SIG
 	depends on EFI && SIGNED_PE_FILE_VERIFICATION
 	help
 	  Enable Image signature verification support.
 
 comment "Support for PE file signature verification disabled"
-	depends on KEXEC_VERIFY_SIG
+	depends on KEXEC_SIG
 	depends on !EFI || !SIGNED_PE_FILE_VERIFICATION
 
 config CRASH_DUMP
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 109243fdb6ec..c4a423f30d49 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -555,7 +555,7 @@ config ARCH_HAS_KEXEC_PURGATORY
 	def_bool y
 	depends on KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
 	depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
 	help
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index b0920b35f87b..525e0a6addb9 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,7 +64,7 @@ CONFIG_NUMA=y
 CONFIG_PREEMPT=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
+CONFIG_KEXEC_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index c59b922cb6c5..4c37279acdb4 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -39,7 +39,7 @@ CONFIG_NR_CPUS=256
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
+CONFIG_KEXEC_SIG=y
 CONFIG_CRASH_DUMP=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_DEBUG=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 09aa5cb14873..158ad0f0d433 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,7 +65,7 @@ CONFIG_NR_CPUS=512
 CONFIG_NUMA=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC_FILE=y
-CONFIG_KEXEC_VERIFY_SIG=y
+CONFIG_KEXEC_SIG=y
 CONFIG_EXPOLINE=y
 CONFIG_EXPOLINE_AUTO=y
 CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 6d0635ceddd0..9b4f37a4edf1 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -130,7 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_elf_ops = {
 	.probe = s390_elf_probe,
 	.load = s390_elf_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC__SIG
 	.verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 58318bf89fd9..af23eff5774d 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -59,7 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
 const struct kexec_file_ops s390_kexec_image_ops = {
 	.probe = s390_image_probe,
 	.load = s390_image_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 	.verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 };
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fbdd3ea73667..c0f33ba49a9a 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -22,7 +22,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
 	NULL,
 };
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 /*
  * Module signature information block.
  *
@@ -90,7 +90,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
 				      VERIFYING_MODULE_SIGNATURE,
 				      NULL, NULL);
 }
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 
 static int kexec_file_update_purgatory(struct kimage *image,
 				       struct s390_load_data *data)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2bbbd4d1ba31..cd41998aa6e6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2006,20 +2006,30 @@ config KEXEC_FILE
 config ARCH_HAS_KEXEC_PURGATORY
 	def_bool KEXEC_FILE
 
-config KEXEC_VERIFY_SIG
+config KEXEC_SIG
 	bool "Verify kernel signature during kexec_file_load() syscall"
 	depends on KEXEC_FILE
 	---help---
-	  This option makes kernel signature verification mandatory for
-	  the kexec_file_load() syscall.
 
-	  In addition to that option, you need to enable signature
+	  This option makes the kexec_file_load() syscall check for a valid
+	  signature of the kernel image.  The image can still be loaded without
+	  a valid signature unless you also enable KEXEC_SIG_FORCE, though if
+	  there's a signature that we can check, then it must be valid.
+
+	  In addition to this option, you need to enable signature
 	  verification for the corresponding kernel image type being
 	  loaded in order for this to work.
 
+config KEXEC_SIG_FORCE
+	bool "Require a valid signature in kexec_file_load() syscall"
+	depends on KEXEC_SIG
+	---help---
+	  This option makes kernel signature verification mandatory for
+	  the kexec_file_load() syscall.
+
 config KEXEC_BZIMAGE_VERIFY_SIG
 	bool "Enable bzImage signature verification support"
-	depends on KEXEC_VERIFY_SIG
+	depends on KEXEC_SIG
 	depends on SIGNED_PE_FILE_VERIFICATION
 	select SYSTEM_TRUSTED_KEYRING
 	---help---
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index 64b973f0e985..b98890894731 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -66,9 +66,9 @@ bool arch_ima_get_secureboot(void)
 
 /* secureboot arch rules */
 static const char * const sb_arch_rules[] = {
-#if !IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+#if !IS_ENABLED(CONFIG_KEXEC_SIG)
 	"appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
 	"measure func=KEXEC_KERNEL_CHECK",
 #if !IS_ENABLED(CONFIG_MODULE_SIG)
 	"appraise func=MODULE_CHECK appraise_type=imasig",
diff --git a/crypto/asymmetric_keys/verify_pefile.c b/crypto/asymmetric_keys/verify_pefile.c
index 3b303fe2f061..cc9dbcecaaca 100644
--- a/crypto/asymmetric_keys/verify_pefile.c
+++ b/crypto/asymmetric_keys/verify_pefile.c
@@ -96,7 +96,7 @@ static int pefile_parse_binary(const void *pebuf, unsigned int pelen,
 
 	if (!ddir->certs.virtual_address || !ddir->certs.size) {
 		pr_debug("Unsigned PE binary\n");
-		return -EKEYREJECTED;
+		return -ENODATA;
 	}
 
 	chkaddr(ctx->header_size, ddir->certs.virtual_address,
@@ -403,6 +403,8 @@ error_no_desc:
  *  (*) 0 if at least one signature chain intersects with the keys in the trust
  *	keyring, or:
  *
+ *  (*) -ENODATA if there is no signature present.
+ *
  *  (*) -ENOPKG if a suitable crypto module couldn't be found for a check on a
  *	chain.
  *
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index b9b1bc5f9669..58b27c7bdc2b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -125,7 +125,7 @@ typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
 			     unsigned long cmdline_len);
 typedef int (kexec_cleanup_t)(void *loader_data);
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 typedef int (kexec_verify_sig_t)(const char *kernel_buf,
 				 unsigned long kernel_len);
 #endif
@@ -134,7 +134,7 @@ struct kexec_file_ops {
 	kexec_probe_t *probe;
 	kexec_load_t *load;
 	kexec_cleanup_t *cleanup;
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 	kexec_verify_sig_t *verify_sig;
 #endif
 };
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index ef7b951a8087..972931201995 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -88,7 +88,7 @@ int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
 	return kexec_image_post_load_cleanup_default(image);
 }
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
 static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
 					  unsigned long buf_len)
 {
@@ -177,6 +177,51 @@ void kimage_file_post_load_cleanup(struct kimage *image)
 	image->image_loader_data = NULL;
 }
 
+#ifdef CONFIG_KEXEC_SIG
+static int
+kimage_validate_signature(struct kimage *image)
+{
+	const char *reason;
+	int ret;
+
+	ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+					   image->kernel_buf_len);
+	switch (ret) {
+	case 0:
+		break;
+
+		/* Certain verification errors are non-fatal if we're not
+		 * checking errors, provided we aren't mandating that there
+		 * must be a valid signature.
+		 */
+	case -ENODATA:
+		reason = "kexec of unsigned image";
+		goto decide;
+	case -ENOPKG:
+		reason = "kexec of image with unsupported crypto";
+		goto decide;
+	case -ENOKEY:
+		reason = "kexec of image with unavailable key";
+	decide:
+		if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+			pr_notice("%s rejected\n", reason);
+			return ret;
+		}
+
+		return 0;
+
+		/* All other errors are fatal, including nomem, unparseable
+		 * signatures and signature check failures - even if signatures
+		 * aren't required.
+		 */
+	default:
+		pr_notice("kernel signature verification failed (%d).\n", ret);
+	}
+
+	return ret;
+}
+#endif
+
 /*
  * In file mode list of segments is prepared by kernel. Copy relevant
  * data from user space, do error checking, prepare segment list
@@ -186,7 +231,7 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 			     const char __user *cmdline_ptr,
 			     unsigned long cmdline_len, unsigned flags)
 {
-	int ret = 0;
+	int ret;
 	void *ldata;
 	loff_t size;
 
@@ -205,14 +250,11 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
 	if (ret)
 		goto out;
 
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-	ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
-					   image->kernel_buf_len);
-	if (ret) {
-		pr_debug("kernel signature verification failed.\n");
+#ifdef CONFIG_KEXEC_SIG
+	ret = kimage_validate_signature(image);
+
+	if (ret)
 		goto out;
-	}
-	pr_debug("kernel signature verification successful.\n");
 #endif
 	/* It is possible that there no initramfs is being loaded */
 	if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig
index 2692c7358c2c..32cd25fa44a5 100644
--- a/security/integrity/ima/Kconfig
+++ b/security/integrity/ima/Kconfig
@@ -160,7 +160,7 @@ config IMA_APPRAISE
 
 config IMA_ARCH_POLICY
         bool "Enable loading an IMA architecture specific policy"
-        depends on KEXEC_VERIFY_SIG || IMA_APPRAISE && INTEGRITY_ASYMMETRIC_KEYS
+        depends on KEXEC_SIG || IMA_APPRAISE && INTEGRITY_ASYMMETRIC_KEYS
         default n
         help
           This option enables loading an IMA architecture specific policy
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index f556e6c18f9b..1cffda4412b7 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -541,7 +541,7 @@ int ima_load_data(enum kernel_load_data_id id)
 
 	switch (id) {
 	case LOADING_KEXEC_IMAGE:
-		if (IS_ENABLED(CONFIG_KEXEC_VERIFY_SIG)
+		if (IS_ENABLED(CONFIG_KEXEC_SIG)
 		    && arch_ima_get_secureboot()) {
 			pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n");
 			return -EACCES;
-- 
cgit v1.2.3


From 38bd94b8a1bd46e6d3d9718c7ff582e4c8ccb440 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@fedoraproject.org>
Date: Mon, 19 Aug 2019 17:17:46 -0700
Subject: hibernate: Disable when the kernel is locked down

There is currently no way to verify the resume image when returning
from hibernate.  This might compromise the signed modules trust model,
so until we can work with signed hibernate images we disable it when the
kernel is locked down.

Signed-off-by: Josh Boyer <jwboyer@fedoraproject.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: rjw@rjwysocki.net
Cc: pavel@ucw.cz
cc: linux-pm@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     | 1 +
 kernel/power/hibernate.c     | 3 ++-
 security/lockdown/lockdown.c | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index b607a8ac97fe..80ac7fb27aa9 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -106,6 +106,7 @@ enum lockdown_reason {
 	LOCKDOWN_MODULE_SIGNATURE,
 	LOCKDOWN_DEV_MEM,
 	LOCKDOWN_KEXEC,
+	LOCKDOWN_HIBERNATION,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index cd7434e6000d..3c0a5a8170b0 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -30,6 +30,7 @@
 #include <linux/ctype.h>
 #include <linux/genhd.h>
 #include <linux/ktime.h>
+#include <linux/security.h>
 #include <trace/events/power.h>
 
 #include "power.h"
@@ -68,7 +69,7 @@ static const struct platform_hibernation_ops *hibernation_ops;
 
 bool hibernation_available(void)
 {
-	return (nohibernate == 0);
+	return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION);
 }
 
 /**
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index aaf30ad351f9..3462f7edcaac 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -21,6 +21,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
 	[LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
 	[LOCKDOWN_KEXEC] = "kexec of unsigned images",
+	[LOCKDOWN_HIBERNATION] = "hibernation",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From eb627e17727ebeede70697ae1798688b0d328b54 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:47 -0700
Subject: PCI: Lock down BAR access when the kernel is locked down

Any hardware that can potentially generate DMA has to be locked down in
order to avoid it being possible for an attacker to modify kernel code,
allowing them to circumvent disabled module loading or module signing.
Default to paranoid - in future we can potentially relax this for
sufficiently IOMMU-isolated devices.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: linux-pci@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/pci/pci-sysfs.c      | 16 ++++++++++++++++
 drivers/pci/proc.c           | 14 ++++++++++++--
 drivers/pci/syscall.c        |  4 +++-
 include/linux/security.h     |  1 +
 security/lockdown/lockdown.c |  1 +
 5 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 6d27475e39b2..ec103a7e13fc 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -903,6 +903,11 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj,
 	unsigned int size = count;
 	loff_t init_off = off;
 	u8 *data = (u8 *) buf;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
 
 	if (off > dev->cfg_size)
 		return 0;
@@ -1164,6 +1169,11 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	int bar = (unsigned long)attr->private;
 	enum pci_mmap_state mmap_type;
 	struct resource *res = &pdev->resource[bar];
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
 
 	if (res->flags & IORESOURCE_MEM && iomem_is_exclusive(res->start))
 		return -EINVAL;
@@ -1240,6 +1250,12 @@ static ssize_t pci_write_resource_io(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *attr, char *buf,
 				     loff_t off, size_t count)
 {
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
 	return pci_resource_io(filp, kobj, attr, buf, off, count, true);
 }
 
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index 445b51db75b0..e29b0d5ced62 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -13,6 +13,7 @@
 #include <linux/seq_file.h>
 #include <linux/capability.h>
 #include <linux/uaccess.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include "pci.h"
 
@@ -115,7 +116,11 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf,
 	struct pci_dev *dev = PDE_DATA(ino);
 	int pos = *ppos;
 	int size = dev->cfg_size;
-	int cnt;
+	int cnt, ret;
+
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
 
 	if (pos >= size)
 		return 0;
@@ -196,6 +201,10 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
 #endif /* HAVE_PCI_MMAP */
 	int ret = 0;
 
+	ret = security_locked_down(LOCKDOWN_PCI_ACCESS);
+	if (ret)
+		return ret;
+
 	switch (cmd) {
 	case PCIIOC_CONTROLLER:
 		ret = pci_domain_nr(dev->bus);
@@ -238,7 +247,8 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
 	struct pci_filp_private *fpriv = file->private_data;
 	int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM;
 
-	if (!capable(CAP_SYS_RAWIO))
+	if (!capable(CAP_SYS_RAWIO) ||
+	    security_locked_down(LOCKDOWN_PCI_ACCESS))
 		return -EPERM;
 
 	if (fpriv->mmap_state == pci_mmap_io) {
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index d96626c614f5..31e39558d49d 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c
@@ -7,6 +7,7 @@
 
 #include <linux/errno.h>
 #include <linux/pci.h>
+#include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include "pci.h"
@@ -90,7 +91,8 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
 	u32 dword;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN) ||
+	    security_locked_down(LOCKDOWN_PCI_ACCESS))
 		return -EPERM;
 
 	dev = pci_get_domain_bus_and_slot(0, bus, dfn);
diff --git a/include/linux/security.h b/include/linux/security.h
index 80ac7fb27aa9..2b763f0ee352 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -107,6 +107,7 @@ enum lockdown_reason {
 	LOCKDOWN_DEV_MEM,
 	LOCKDOWN_KEXEC,
 	LOCKDOWN_HIBERNATION,
+	LOCKDOWN_PCI_ACCESS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 3462f7edcaac..410e90eda848 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -22,6 +22,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
 	[LOCKDOWN_KEXEC] = "kexec of unsigned images",
 	[LOCKDOWN_HIBERNATION] = "hibernation",
+	[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 96c4f67293e4cd8b3394adce5a8041a2784e68a3 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:48 -0700
Subject: x86: Lock down IO port access when the kernel is locked down

IO port access would permit users to gain access to PCI configuration
registers, which in turn (on a lot of hardware) give access to MMIO
register space. This would potentially permit root to trigger arbitrary
DMA, so lock it down by default.

This also implicitly locks down the KDADDIO, KDDELIO, KDENABIO and
KDDISABIO console ioctls.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: x86@kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/kernel/ioport.c     | 7 +++++--
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 0fe1c8782208..61a89d3c0382 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -11,6 +11,7 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/ioport.h>
+#include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/stddef.h>
 #include <linux/slab.h>
@@ -31,7 +32,8 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
 
 	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
 		return -EINVAL;
-	if (turn_on && !capable(CAP_SYS_RAWIO))
+	if (turn_on && (!capable(CAP_SYS_RAWIO) ||
+			security_locked_down(LOCKDOWN_IOPORT)))
 		return -EPERM;
 
 	/*
@@ -126,7 +128,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
 		return -EINVAL;
 	/* Trying to gain more privileges? */
 	if (level > old) {
-		if (!capable(CAP_SYS_RAWIO))
+		if (!capable(CAP_SYS_RAWIO) ||
+		    security_locked_down(LOCKDOWN_IOPORT))
 			return -EPERM;
 	}
 	regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
diff --git a/include/linux/security.h b/include/linux/security.h
index 2b763f0ee352..cd93fa5d3c6d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -108,6 +108,7 @@ enum lockdown_reason {
 	LOCKDOWN_KEXEC,
 	LOCKDOWN_HIBERNATION,
 	LOCKDOWN_PCI_ACCESS,
+	LOCKDOWN_IOPORT,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 410e90eda848..8b7d65dbb086 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -23,6 +23,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KEXEC] = "kexec of unsigned images",
 	[LOCKDOWN_HIBERNATION] = "hibernation",
 	[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
+	[LOCKDOWN_IOPORT] = "raw io port access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 95f5e95f41dff31b2a4566c5a8975c08a49ae4e3 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:49 -0700
Subject: x86/msr: Restrict MSR access when the kernel is locked down

Writing to MSRs should not be allowed if the kernel is locked down, since
it could lead to execution of arbitrary code in kernel mode.  Based on a
patch by Kees Cook.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
cc: x86@kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/kernel/msr.c        | 8 ++++++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 3db2252b958d..1547be359d7f 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -34,6 +34,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/security.h>
 
 #include <asm/cpufeature.h>
 #include <asm/msr.h>
@@ -79,6 +80,10 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
 	int err = 0;
 	ssize_t bytes = 0;
 
+	err = security_locked_down(LOCKDOWN_MSR);
+	if (err)
+		return err;
+
 	if (count % 8)
 		return -EINVAL;	/* Invalid chunk size */
 
@@ -130,6 +135,9 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
 			err = -EFAULT;
 			break;
 		}
+		err = security_locked_down(LOCKDOWN_MSR);
+		if (err)
+			break;
 		err = wrmsr_safe_regs_on_cpu(cpu, regs);
 		if (err)
 			break;
diff --git a/include/linux/security.h b/include/linux/security.h
index cd93fa5d3c6d..010637a79eac 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -109,6 +109,7 @@ enum lockdown_reason {
 	LOCKDOWN_HIBERNATION,
 	LOCKDOWN_PCI_ACCESS,
 	LOCKDOWN_IOPORT,
+	LOCKDOWN_MSR,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 8b7d65dbb086..b1c1c72440d5 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -24,6 +24,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_HIBERNATION] = "hibernation",
 	[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
 	[LOCKDOWN_IOPORT] = "raw io port access",
+	[LOCKDOWN_MSR] = "raw MSR access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From f474e1486b78ac15322f8a1cda48a32a1deff9d3 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg59@srcf.ucam.org>
Date: Mon, 19 Aug 2019 17:17:50 -0700
Subject: ACPI: Limit access to custom_method when the kernel is locked down

custom_method effectively allows arbitrary access to system memory, making
it possible for an attacker to circumvent restrictions on module loading.
Disable it if the kernel is locked down.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: linux-acpi@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/acpi/custom_method.c | 6 ++++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c
index b2ef4c2ec955..7031307becd7 100644
--- a/drivers/acpi/custom_method.c
+++ b/drivers/acpi/custom_method.c
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 #include <linux/debugfs.h>
 #include <linux/acpi.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -29,6 +30,11 @@ static ssize_t cm_write(struct file *file, const char __user * user_buf,
 
 	struct acpi_table_header table;
 	acpi_status status;
+	int ret;
+
+	ret = security_locked_down(LOCKDOWN_ACPI_TABLES);
+	if (ret)
+		return ret;
 
 	if (!(*ppos)) {
 		/* parse the table header to get the table length */
diff --git a/include/linux/security.h b/include/linux/security.h
index 010637a79eac..390e39395112 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -110,6 +110,7 @@ enum lockdown_reason {
 	LOCKDOWN_PCI_ACCESS,
 	LOCKDOWN_IOPORT,
 	LOCKDOWN_MSR,
+	LOCKDOWN_ACPI_TABLES,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index b1c1c72440d5..6d44db0ddffa 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -25,6 +25,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
 	[LOCKDOWN_IOPORT] = "raw io port access",
 	[LOCKDOWN_MSR] = "raw MSR access",
+	[LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 41fa1ee9c6d687afb05760dd349f361855f1d7f5 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Mon, 19 Aug 2019 17:17:51 -0700
Subject: acpi: Ignore acpi_rsdp kernel param when the kernel has been locked
 down

This option allows userspace to pass the RSDP address to the kernel, which
makes it possible for a user to modify the workings of hardware. Reject
the option when the kernel is locked down. This requires some reworking
of the existing RSDP command line logic, since the early boot code also
makes use of a command-line passed RSDP when locating the SRAT table
before the lockdown code has been initialised. This is achieved by
separating the command line RSDP path in the early boot code from the
generic RSDP path, and then copying the command line RSDP into boot
params in the kernel proper if lockdown is not enabled. If lockdown is
enabled and an RSDP is provided on the command line, this will only be
used when parsing SRAT (which shouldn't permit kernel code execution)
and will be ignored in the rest of the kernel.

(Modified by Matthew Garrett in order to handle the early boot RSDP
environment)

Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: Dave Young <dyoung@redhat.com>
cc: linux-acpi@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/boot/compressed/acpi.c | 19 +++++++++++++------
 arch/x86/include/asm/acpi.h     |  9 +++++++++
 arch/x86/include/asm/x86_init.h |  2 ++
 arch/x86/kernel/acpi/boot.c     |  5 +++++
 arch/x86/kernel/x86_init.c      |  1 +
 drivers/acpi/osl.c              | 14 +++++++++++++-
 include/linux/acpi.h            |  6 ++++++
 7 files changed, 49 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index ad84239e595e..e726e9b44bb1 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -26,7 +26,7 @@ struct mem_vector immovable_mem[MAX_NUMNODES*2];
  */
 #define MAX_ADDR_LEN 19
 
-static acpi_physical_address get_acpi_rsdp(void)
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
 {
 	acpi_physical_address addr = 0;
 
@@ -215,10 +215,7 @@ acpi_physical_address get_rsdp_addr(void)
 {
 	acpi_physical_address pa;
 
-	pa = get_acpi_rsdp();
-
-	if (!pa)
-		pa = boot_params->acpi_rsdp_addr;
+	pa = boot_params->acpi_rsdp_addr;
 
 	if (!pa)
 		pa = efi_get_rsdp_addr();
@@ -240,7 +237,17 @@ static unsigned long get_acpi_srat_table(void)
 	char arg[10];
 	u8 *entry;
 
-	rsdp = (struct acpi_table_rsdp *)(long)boot_params->acpi_rsdp_addr;
+	/*
+	 * Check whether we were given an RSDP on the command line. We don't
+	 * stash this in boot params because the kernel itself may have
+	 * different ideas about whether to trust a command-line parameter.
+	 */
+	rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp();
+
+	if (!rsdp)
+		rsdp = (struct acpi_table_rsdp *)(long)
+			boot_params->acpi_rsdp_addr;
+
 	if (!rsdp)
 		return 0;
 
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index aac686e1e005..bc9693c9107e 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -117,6 +117,12 @@ static inline bool acpi_has_cpu_in_madt(void)
 	return !!acpi_lapic;
 }
 
+#define ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+	x86_init.acpi.set_root_pointer(addr);
+}
+
 #define ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
@@ -125,6 +131,7 @@ static inline u64 acpi_arch_get_root_pointer(void)
 
 void acpi_generic_reduced_hw_init(void);
 
+void x86_default_set_root_pointer(u64 addr);
 u64 x86_default_get_root_pointer(void);
 
 #else /* !CONFIG_ACPI */
@@ -138,6 +145,8 @@ static inline void disable_acpi(void) { }
 
 static inline void acpi_generic_reduced_hw_init(void) { }
 
+static inline void x86_default_set_root_pointer(u64 addr) { }
+
 static inline u64 x86_default_get_root_pointer(void)
 {
 	return 0;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index b85a7c54c6a1..d584128435cb 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -134,10 +134,12 @@ struct x86_hyper_init {
 
 /**
  * struct x86_init_acpi - x86 ACPI init functions
+ * @set_root_poitner:		set RSDP address
  * @get_root_pointer:		get RSDP address
  * @reduced_hw_early_init:	hardware reduced platform early init
  */
 struct x86_init_acpi {
+	void (*set_root_pointer)(u64 addr);
 	u64 (*get_root_pointer)(void);
 	void (*reduced_hw_early_init)(void);
 };
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 17b33ef604f3..04205ce127a1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1760,6 +1760,11 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
 	e820__update_table_print();
 }
 
+void x86_default_set_root_pointer(u64 addr)
+{
+	boot_params.acpi_rsdp_addr = addr;
+}
+
 u64 x86_default_get_root_pointer(void)
 {
 	return boot_params.acpi_rsdp_addr;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 50a2b492fdd6..d0b8f5585a73 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -95,6 +95,7 @@ struct x86_init_ops x86_init __initdata = {
 	},
 
 	.acpi = {
+		.set_root_pointer	= x86_default_set_root_pointer,
 		.get_root_pointer	= x86_default_get_root_pointer,
 		.reduced_hw_early_init	= acpi_generic_reduced_hw_init,
 	},
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index cc7507091dec..b7c3aeb175dd 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/jiffies.h>
 #include <linux/semaphore.h>
+#include <linux/security.h>
 
 #include <asm/io.h>
 #include <linux/uaccess.h>
@@ -180,8 +181,19 @@ acpi_physical_address __init acpi_os_get_root_pointer(void)
 	acpi_physical_address pa;
 
 #ifdef CONFIG_KEXEC
-	if (acpi_rsdp)
+	/*
+	 * We may have been provided with an RSDP on the command line,
+	 * but if a malicious user has done so they may be pointing us
+	 * at modified ACPI tables that could alter kernel behaviour -
+	 * so, we check the lockdown status before making use of
+	 * it. If we trust it then also stash it in an architecture
+	 * specific location (if appropriate) so it can be carried
+	 * over further kexec()s.
+	 */
+	if (acpi_rsdp && !security_locked_down(LOCKDOWN_ACPI_TABLES)) {
+		acpi_arch_set_root_pointer(acpi_rsdp);
 		return acpi_rsdp;
+	}
 #endif
 	pa = acpi_arch_get_root_pointer();
 	if (pa)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index d315d86844e4..268a4d91f54c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -632,6 +632,12 @@ bool acpi_gtdt_c3stop(int type);
 int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count);
 #endif
 
+#ifndef ACPI_HAVE_ARCH_SET_ROOT_POINTER
+static inline void acpi_arch_set_root_pointer(u64 addr)
+{
+}
+#endif
+
 #ifndef ACPI_HAVE_ARCH_GET_ROOT_POINTER
 static inline u64 acpi_arch_get_root_pointer(void)
 {
-- 
cgit v1.2.3


From 3f19cad3fa0d0fff18ee126f03a80420ae7bcbc9 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:53 -0700
Subject: lockdown: Prohibit PCMCIA CIS storage when the kernel is locked down

Prohibit replacement of the PCMCIA Card Information Structure when the
kernel is locked down.

Suggested-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/pcmcia/cistpl.c      | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index abd029945cc8..629359fe3513 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/ioport.h>
 #include <linux/io.h>
+#include <linux/security.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
@@ -1575,6 +1576,10 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
 	struct pcmcia_socket *s;
 	int error;
 
+	error = security_locked_down(LOCKDOWN_PCMCIA_CIS);
+	if (error)
+		return error;
+
 	s = to_socket(container_of(kobj, struct device, kobj));
 
 	if (off)
diff --git a/include/linux/security.h b/include/linux/security.h
index 390e39395112..683f0607e6f2 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -111,6 +111,7 @@ enum lockdown_reason {
 	LOCKDOWN_IOPORT,
 	LOCKDOWN_MSR,
 	LOCKDOWN_ACPI_TABLES,
+	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 6d44db0ddffa..db3477585972 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -26,6 +26,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_IOPORT] = "raw io port access",
 	[LOCKDOWN_MSR] = "raw MSR access",
 	[LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
+	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 794edf30ee6cd088d5f4079b1d4a4cfe5371203e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:54 -0700
Subject: lockdown: Lock down TIOCSSERIAL

Lock down TIOCSSERIAL as that can be used to change the ioport and irq
settings on a serial port.  This only appears to be an issue for the serial
drivers that use the core serial code.  All other drivers seem to either
ignore attempts to change port/irq or give an error.

Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-serial@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/tty/serial/serial_core.c | 5 +++++
 include/linux/security.h         | 1 +
 security/lockdown/lockdown.c     | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 83f4dd0bfd74..bbad407557b9 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -22,6 +22,7 @@
 #include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 
 #include <linux/irq.h>
 #include <linux/uaccess.h>
@@ -862,6 +863,10 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port,
 		goto check_and_exit;
 	}
 
+	retval = security_locked_down(LOCKDOWN_TIOCSSERIAL);
+	if (retval && (change_irq || change_port))
+		goto exit;
+
 	/*
 	 * Ask the low level driver to verify the settings.
 	 */
diff --git a/include/linux/security.h b/include/linux/security.h
index 683f0607e6f2..b4a85badb03a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -112,6 +112,7 @@ enum lockdown_reason {
 	LOCKDOWN_MSR,
 	LOCKDOWN_ACPI_TABLES,
 	LOCKDOWN_PCMCIA_CIS,
+	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index db3477585972..771c77f9c04a 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -27,6 +27,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MSR] = "raw MSR access",
 	[LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
+	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 20657f66ef52e5005369e4ef539d4cbf01eab10d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:55 -0700
Subject: lockdown: Lock down module params that specify hardware parameters
 (eg. ioport)

Provided an annotation for module parameters that specify hardware
parameters (such as io ports, iomem addresses, irqs, dma channels, fixed
dma buffers and other types).

Suggested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Jessica Yu <jeyu@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     |  1 +
 kernel/params.c              | 21 ++++++++++++++++-----
 security/lockdown/lockdown.c |  1 +
 3 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index b4a85badb03a..1a3404f9c060 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -113,6 +113,7 @@ enum lockdown_reason {
 	LOCKDOWN_ACPI_TABLES,
 	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_TIOCSSERIAL,
+	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/kernel/params.c b/kernel/params.c
index cf448785d058..8e56f8b12d8f 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -12,6 +12,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/security.h>
 
 #ifdef CONFIG_SYSFS
 /* Protects all built-in parameters, modules use their own param_lock */
@@ -96,13 +97,19 @@ bool parameq(const char *a, const char *b)
 	return parameqn(a, b, strlen(a)+1);
 }
 
-static void param_check_unsafe(const struct kernel_param *kp)
+static bool param_check_unsafe(const struct kernel_param *kp)
 {
+	if (kp->flags & KERNEL_PARAM_FL_HWPARAM &&
+	    security_locked_down(LOCKDOWN_MODULE_PARAMETERS))
+		return false;
+
 	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
 		pr_notice("Setting dangerous option %s - tainting kernel\n",
 			  kp->name);
 		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 	}
+
+	return true;
 }
 
 static int parse_one(char *param,
@@ -132,8 +139,10 @@ static int parse_one(char *param,
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
 			kernel_param_lock(params[i].mod);
-			param_check_unsafe(&params[i]);
-			err = params[i].ops->set(val, &params[i]);
+			if (param_check_unsafe(&params[i]))
+				err = params[i].ops->set(val, &params[i]);
+			else
+				err = -EPERM;
 			kernel_param_unlock(params[i].mod);
 			return err;
 		}
@@ -553,8 +562,10 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 		return -EPERM;
 
 	kernel_param_lock(mk->mod);
-	param_check_unsafe(attribute->param);
-	err = attribute->param->ops->set(buf, attribute->param);
+	if (param_check_unsafe(attribute->param))
+		err = attribute->param->ops->set(buf, attribute->param);
+	else
+		err = -EPERM;
 	kernel_param_unlock(mk->mod);
 	if (!err)
 		return len;
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 771c77f9c04a..0fa434294667 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -28,6 +28,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_ACPI_TABLES] = "modifying ACPI tables",
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
+	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 906357f77a077508d160e729f917c5f0a4304f25 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:56 -0700
Subject: x86/mmiotrace: Lock down the testmmiotrace module

The testmmiotrace module shouldn't be permitted when the kernel is locked
down as it can be used to arbitrarily read and write MMIO space. This is
a runtime check rather than buildtime in order to allow configurations
where the same kernel may be run in both locked down or permissive modes
depending on local policy.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Howells <dhowells@redhat.com
Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: Thomas Gleixner <tglx@linutronix.de>
cc: Steven Rostedt <rostedt@goodmis.org>
cc: Ingo Molnar <mingo@kernel.org>
cc: "H. Peter Anvin" <hpa@zytor.com>
cc: x86@kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 arch/x86/mm/testmmiotrace.c  | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index 0881e1ff1e58..a8bd952e136d 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/mmiotrace.h>
+#include <linux/security.h>
 
 static unsigned long mmio_address;
 module_param_hw(mmio_address, ulong, iomem, 0);
@@ -115,6 +116,10 @@ static void do_test_bulk_ioremapping(void)
 static int __init init(void)
 {
 	unsigned long size = (read_far) ? (8 << 20) : (16 << 10);
+	int ret = security_locked_down(LOCKDOWN_MMIOTRACE);
+
+	if (ret)
+		return ret;
 
 	if (mmio_address == 0) {
 		pr_err("you have to use the module argument mmio_address.\n");
diff --git a/include/linux/security.h b/include/linux/security.h
index 1a3404f9c060..d8db7ea4c4bf 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -114,6 +114,7 @@ enum lockdown_reason {
 	LOCKDOWN_PCMCIA_CIS,
 	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_MODULE_PARAMETERS,
+	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 0fa434294667..2eadbe0667e7 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -29,6 +29,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_PCMCIA_CIS] = "direct PCMCIA CIS storage",
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
+	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
-- 
cgit v1.2.3


From 02e935bf5b34edcc4cb0dc532dd0e1a1bfb33b51 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:57 -0700
Subject: lockdown: Lock down /proc/kcore

Disallow access to /proc/kcore when the kernel is locked down to prevent
access to cryptographic data. This is limited to lockdown
confidentiality mode and is still permitted in integrity mode.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/proc/kcore.c              | 5 +++++
 include/linux/security.h     | 1 +
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f5834488b67d..ee2c576cc94e 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -31,6 +31,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <linux/sched/task.h>
+#include <linux/security.h>
 #include <asm/sections.h>
 #include "internal.h"
 
@@ -545,6 +546,10 @@ out:
 
 static int open_kcore(struct inode *inode, struct file *filp)
 {
+	int ret = security_locked_down(LOCKDOWN_KCORE);
+
+	if (ret)
+		return ret;
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
diff --git a/include/linux/security.h b/include/linux/security.h
index d8db7ea4c4bf..669e8de5299d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -116,6 +116,7 @@ enum lockdown_reason {
 	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
+	LOCKDOWN_KCORE,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 2eadbe0667e7..403b30357f75 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -31,6 +31,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
+	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
cgit v1.2.3


From a94549dd87f5ea4ca50fee493df08a2dc6256b53 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:58 -0700
Subject: lockdown: Lock down tracing and perf kprobes when in confidentiality
 mode

Disallow the creation of perf and ftrace kprobes when the kernel is
locked down in confidentiality mode by preventing their registration.
This prevents kprobes from being used to access kernel memory to steal
crypto data, but continues to allow the use of kprobes from signed
modules.

Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: davem@davemloft.net
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     | 1 +
 kernel/trace/trace_kprobe.c  | 5 +++++
 security/lockdown/lockdown.c | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 669e8de5299d..0b2529dbf0f4 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -117,6 +117,7 @@ enum lockdown_reason {
 	LOCKDOWN_MMIOTRACE,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
+	LOCKDOWN_KPROBES,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7d736248a070..fcb28b0702b2 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/rculist.h>
 #include <linux/error-injection.h>
+#include <linux/security.h>
 
 #include "trace_dynevent.h"
 #include "trace_kprobe_selftest.h"
@@ -415,6 +416,10 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
 	int i, ret;
 
+	ret = security_locked_down(LOCKDOWN_KPROBES);
+	if (ret)
+		return ret;
+
 	if (trace_probe_is_registered(&tk->tp))
 		return -EINVAL;
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 403b30357f75..27b2cf51e443 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -32,6 +32,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
+	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
cgit v1.2.3


From 9d1f8be5cf42b497a3bddf1d523f2bb142e9318c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:17:59 -0700
Subject: bpf: Restrict bpf when kernel lockdown is in confidentiality mode

bpf_read() and bpf_read_str() could potentially be abused to (eg) allow
private keys in kernel memory to be leaked. Disable them if the kernel
has been locked down in confidentiality mode.

Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
cc: netdev@vger.kernel.org
cc: Chun-Yi Lee <jlee@suse.com>
cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     |  1 +
 kernel/trace/bpf_trace.c     | 10 ++++++++++
 security/lockdown/lockdown.c |  1 +
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 0b2529dbf0f4..e604f4c67f03 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -118,6 +118,7 @@ enum lockdown_reason {
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
+	LOCKDOWN_BPF_READ,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 1c9a4745e596..33a954c367f3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -139,8 +139,13 @@ BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret < 0)
+		goto out;
+
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
+out:
 		memset(dst, 0, size);
 
 	return ret;
@@ -566,6 +571,10 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 {
 	int ret;
 
+	ret = security_locked_down(LOCKDOWN_BPF_READ);
+	if (ret < 0)
+		goto out;
+
 	/*
 	 * The strncpy_from_unsafe() call will likely not fill the entire
 	 * buffer, but that's okay in this circumstance as we're probing
@@ -577,6 +586,7 @@ BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
 	 */
 	ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
+out:
 		memset(dst, 0, size);
 
 	return ret;
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 27b2cf51e443..2397772c56bd 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -33,6 +33,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
+	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
cgit v1.2.3


From b0c8fdc7fdb77586c3d1937050925b960743306e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:18:00 -0700
Subject: lockdown: Lock down perf when in confidentiality mode

Disallow the use of certain perf facilities that might allow userspace to
access kernel data.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h     | 1 +
 kernel/events/core.c         | 7 +++++++
 security/lockdown/lockdown.c | 1 +
 3 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index e604f4c67f03..b94f1e697537 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -119,6 +119,7 @@ enum lockdown_reason {
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
 	LOCKDOWN_BPF_READ,
+	LOCKDOWN_PERF,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f85929ce13be..8732f980a4fc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10798,6 +10798,13 @@ SYSCALL_DEFINE5(perf_event_open,
 	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
 		return -EACCES;
 
+	err = security_locked_down(LOCKDOWN_PERF);
+	if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR))
+		/* REGS_INTR can leak data, lockdown must prevent this */
+		return err;
+
+	err = 0;
+
 	/*
 	 * In cgroup mode, the pid argument is used to pass the fd
 	 * opened to the cgroup directory in cgroupfs. The cpu argument
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 2397772c56bd..3d7b1039457b 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -34,6 +34,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+	[LOCKDOWN_PERF] = "unsafe use of perf",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
cgit v1.2.3


From 29d3c1c8dfe752c01b7115ecd5a3142b232a38e1 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Mon, 19 Aug 2019 17:18:01 -0700
Subject: kexec: Allow kexec_file() with appropriate IMA policy when locked
 down

Systems in lockdown mode should block the kexec of untrusted kernels.
For x86 and ARM we can ensure that a kernel is trustworthy by validating
a PE signature, but this isn't possible on other architectures. On those
platforms we can use IMA digital signatures instead. Add a function to
determine whether IMA has or will verify signatures for a given event type,
and if so permit kexec_file() even if the kernel is otherwise locked down.
This is restricted to cases where CONFIG_INTEGRITY_TRUSTED_KEYRING is set
in order to prevent an attacker from loading additional keys at runtime.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Acked-by: Mimi Zohar <zohar@linux.ibm.com>
Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
Cc: linux-integrity@vger.kernel.org
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/ima.h                 |  9 +++++++
 kernel/kexec_file.c                 | 10 +++++++-
 security/integrity/ima/ima.h        |  2 ++
 security/integrity/ima/ima_main.c   |  2 +-
 security/integrity/ima/ima_policy.c | 50 +++++++++++++++++++++++++++++++++++++
 5 files changed, 71 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ima.h b/include/linux/ima.h
index 00036d2f57c3..8e2f324fb901 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -129,4 +129,13 @@ static inline int ima_inode_removexattr(struct dentry *dentry,
 	return 0;
 }
 #endif /* CONFIG_IMA_APPRAISE */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+extern bool ima_appraise_signature(enum kernel_read_file_id func);
+#else
+static inline bool ima_appraise_signature(enum kernel_read_file_id func)
+{
+	return false;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
 #endif /* _LINUX_IMA_H */
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 43109ef4d6bf..7f4a618fc8c1 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -208,7 +208,15 @@ kimage_validate_signature(struct kimage *image)
 			return ret;
 		}
 
-		return security_locked_down(LOCKDOWN_KEXEC);
+		/* If IMA is guaranteed to appraise a signature on the kexec
+		 * image, permit it even if the kernel is otherwise locked
+		 * down.
+		 */
+		if (!ima_appraise_signature(READING_KEXEC_IMAGE) &&
+		    security_locked_down(LOCKDOWN_KEXEC))
+			return -EPERM;
+
+		return 0;
 
 		/* All other errors are fatal, including nomem, unparseable
 		 * signatures and signature check failures - even if signatures
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index ca10917b5f89..874bd77d3b91 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -111,6 +111,8 @@ struct ima_kexec_hdr {
 	u64 count;
 };
 
+extern const int read_idmap[];
+
 #ifdef CONFIG_HAVE_IMA_KEXEC
 void ima_load_kexec_buffer(void);
 #else
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 1cffda4412b7..1747bc7bcb60 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -469,7 +469,7 @@ int ima_read_file(struct file *file, enum kernel_read_file_id read_id)
 	return 0;
 }
 
-static const int read_idmap[READING_MAX_ID] = {
+const int read_idmap[READING_MAX_ID] = {
 	[READING_FIRMWARE] = FIRMWARE_CHECK,
 	[READING_FIRMWARE_PREALLOC_BUFFER] = FIRMWARE_CHECK,
 	[READING_MODULE] = MODULE_CHECK,
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 7b53f2ca58e2..b8773f05f9da 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -1339,3 +1339,53 @@ int ima_policy_show(struct seq_file *m, void *v)
 	return 0;
 }
 #endif	/* CONFIG_IMA_READ_POLICY */
+
+#if defined(CONFIG_IMA_APPRAISE) && defined(CONFIG_INTEGRITY_TRUSTED_KEYRING)
+/*
+ * ima_appraise_signature: whether IMA will appraise a given function using
+ * an IMA digital signature. This is restricted to cases where the kernel
+ * has a set of built-in trusted keys in order to avoid an attacker simply
+ * loading additional keys.
+ */
+bool ima_appraise_signature(enum kernel_read_file_id id)
+{
+	struct ima_rule_entry *entry;
+	bool found = false;
+	enum ima_hooks func;
+
+	if (id >= READING_MAX_ID)
+		return false;
+
+	func = read_idmap[id] ?: FILE_CHECK;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(entry, ima_rules, list) {
+		if (entry->action != APPRAISE)
+			continue;
+
+		/*
+		 * A generic entry will match, but otherwise require that it
+		 * match the func we're looking for
+		 */
+		if (entry->func && entry->func != func)
+			continue;
+
+		/*
+		 * We require this to be a digital signature, not a raw IMA
+		 * hash.
+		 */
+		if (entry->flags & IMA_DIGSIG_REQUIRED)
+			found = true;
+
+		/*
+		 * We've found a rule that matches, so break now even if it
+		 * didn't require a digital signature - a later rule that does
+		 * won't override it, so would be a false positive.
+		 */
+		break;
+	}
+
+	rcu_read_unlock();
+	return found;
+}
+#endif /* CONFIG_IMA_APPRAISE && CONFIG_INTEGRITY_TRUSTED_KEYRING */
-- 
cgit v1.2.3


From 5496197f9b084f086cb410dd566648b0896fcc74 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 17:18:02 -0700
Subject: debugfs: Restrict debugfs when the kernel is locked down

Disallow opening of debugfs files that might be used to muck around when
the kernel is locked down as various drivers give raw access to hardware
through debugfs.  Given the effort of auditing all 2000 or so files and
manually fixing each one as necessary, I've chosen to apply a heuristic
instead.  The following changes are made:

 (1) chmod and chown are disallowed on debugfs objects (though the root dir
     can be modified by mount and remount, but I'm not worried about that).

 (2) When the kernel is locked down, only files with the following criteria
     are permitted to be opened:

	- The file must have mode 00444
	- The file must not have ioctl methods
	- The file must not have mmap

 (3) When the kernel is locked down, files may only be opened for reading.

Normal device interaction should be done through configfs, sysfs or a
miscdev, not debugfs.

Note that this makes it unnecessary to specifically lock down show_dsts(),
show_devs() and show_call() in the asus-wmi driver.

I would actually prefer to lock down all files by default and have the
the files unlocked by the creator.  This is tricky to manage correctly,
though, as there are 19 creation functions and ~1600 call sites (some of
them in loops scanning tables).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Andy Shevchenko <andy.shevchenko@gmail.com>
cc: acpi4asus-user@lists.sourceforge.net
cc: platform-driver-x86@vger.kernel.org
cc: Matthew Garrett <mjg59@srcf.ucam.org>
cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg KH <greg@kroah.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Matthew Garrett <matthewgarrett@google.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/debugfs/file.c            | 30 ++++++++++++++++++++++++++++++
 fs/debugfs/inode.c           | 32 ++++++++++++++++++++++++++++++--
 include/linux/security.h     |  1 +
 security/lockdown/lockdown.c |  1 +
 4 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index ddd708b09fa1..5d3e449b5988 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/poll.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -136,6 +137,25 @@ void debugfs_file_put(struct dentry *dentry)
 }
 EXPORT_SYMBOL_GPL(debugfs_file_put);
 
+/*
+ * Only permit access to world-readable files when the kernel is locked down.
+ * We also need to exclude any file that has ways to write or alter it as root
+ * can bypass the permissions check.
+ */
+static bool debugfs_is_locked_down(struct inode *inode,
+				   struct file *filp,
+				   const struct file_operations *real_fops)
+{
+	if ((inode->i_mode & 07777) == 0444 &&
+	    !(filp->f_mode & FMODE_WRITE) &&
+	    !real_fops->unlocked_ioctl &&
+	    !real_fops->compat_ioctl &&
+	    !real_fops->mmap)
+		return false;
+
+	return security_locked_down(LOCKDOWN_DEBUGFS);
+}
+
 static int open_proxy_open(struct inode *inode, struct file *filp)
 {
 	struct dentry *dentry = F_DENTRY(filp);
@@ -147,6 +167,11 @@ static int open_proxy_open(struct inode *inode, struct file *filp)
 		return r == -EIO ? -ENOENT : r;
 
 	real_fops = debugfs_real_fops(filp);
+
+	r = debugfs_is_locked_down(inode, filp, real_fops);
+	if (r)
+		goto out;
+
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not clean up after itself at exit? */
@@ -272,6 +297,11 @@ static int full_proxy_open(struct inode *inode, struct file *filp)
 		return r == -EIO ? -ENOENT : r;
 
 	real_fops = debugfs_real_fops(filp);
+
+	r = debugfs_is_locked_down(inode, filp, real_fops);
+	if (r)
+		goto out;
+
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index acef14ad53db..c8613bcad252 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -23,6 +23,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #include "internal.h"
 
@@ -32,6 +33,32 @@ static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 static bool debugfs_registered;
 
+/*
+ * Don't allow access attributes to be changed whilst the kernel is locked down
+ * so that we can use the file mode as part of a heuristic to determine whether
+ * to lock down individual files.
+ */
+static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	int ret = security_locked_down(LOCKDOWN_DEBUGFS);
+
+	if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
+		return ret;
+	return simple_setattr(dentry, ia);
+}
+
+static const struct inode_operations debugfs_file_inode_operations = {
+	.setattr	= debugfs_setattr,
+};
+static const struct inode_operations debugfs_dir_inode_operations = {
+	.lookup		= simple_lookup,
+	.setattr	= debugfs_setattr,
+};
+static const struct inode_operations debugfs_symlink_inode_operations = {
+	.get_link	= simple_get_link,
+	.setattr	= debugfs_setattr,
+};
+
 static struct inode *debugfs_get_inode(struct super_block *sb)
 {
 	struct inode *inode = new_inode(sb);
@@ -355,6 +382,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode,
 	inode->i_mode = mode;
 	inode->i_private = data;
 
+	inode->i_op = &debugfs_file_inode_operations;
 	inode->i_fop = proxy_fops;
 	dentry->d_fsdata = (void *)((unsigned long)real_fops |
 				DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
@@ -515,7 +543,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
 		return failed_creating(dentry);
 
 	inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-	inode->i_op = &simple_dir_inode_operations;
+	inode->i_op = &debugfs_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 
 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -610,7 +638,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
 		return failed_creating(dentry);
 	}
 	inode->i_mode = S_IFLNK | S_IRWXUGO;
-	inode->i_op = &simple_symlink_inode_operations;
+	inode->i_op = &debugfs_symlink_inode_operations;
 	inode->i_link = link;
 	d_instantiate(dentry, inode);
 	return end_creating(dentry);
diff --git a/include/linux/security.h b/include/linux/security.h
index b94f1e697537..152824b6f456 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -115,6 +115,7 @@ enum lockdown_reason {
 	LOCKDOWN_TIOCSSERIAL,
 	LOCKDOWN_MODULE_PARAMETERS,
 	LOCKDOWN_MMIOTRACE,
+	LOCKDOWN_DEBUGFS,
 	LOCKDOWN_INTEGRITY_MAX,
 	LOCKDOWN_KCORE,
 	LOCKDOWN_KPROBES,
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 3d7b1039457b..edd1fff0147d 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -30,6 +30,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_TIOCSSERIAL] = "reconfiguration of serial port IO",
 	[LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
 	[LOCKDOWN_MMIOTRACE] = "unsafe mmio",
+	[LOCKDOWN_DEBUGFS] = "debugfs access",
 	[LOCKDOWN_INTEGRITY_MAX] = "integrity",
 	[LOCKDOWN_KCORE] = "/proc/kcore access",
 	[LOCKDOWN_KPROBES] = "use of kprobes",
-- 
cgit v1.2.3


From ccbd54ff54e8b1880456b81c4aea352ebe208843 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Mon, 19 Aug 2019 17:18:03 -0700
Subject: tracefs: Restrict tracefs when the kernel is locked down

Tracefs may release more information about the kernel than desirable, so
restrict it when the kernel is locked down in confidentiality mode by
preventing open().

(Fixed by Ben Hutchings to avoid a null dereference in
default_file_open())

Signed-off-by: Matthew Garrett <mjg59@google.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/tracefs/inode.c           | 42 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/security.h     |  1 +
 security/lockdown/lockdown.c |  1 +
 3 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index a5bab190a297..761af8ce4015 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -20,6 +20,7 @@
 #include <linux/parser.h>
 #include <linux/magic.h>
 #include <linux/slab.h>
+#include <linux/security.h>
 
 #define TRACEFS_DEFAULT_MODE	0700
 
@@ -27,6 +28,25 @@ static struct vfsmount *tracefs_mount;
 static int tracefs_mount_count;
 static bool tracefs_registered;
 
+static int default_open_file(struct inode *inode, struct file *filp)
+{
+	struct dentry *dentry = filp->f_path.dentry;
+	struct file_operations *real_fops;
+	int ret;
+
+	if (!dentry)
+		return -EINVAL;
+
+	ret = security_locked_down(LOCKDOWN_TRACEFS);
+	if (ret)
+		return ret;
+
+	real_fops = dentry->d_fsdata;
+	if (!real_fops->open)
+		return 0;
+	return real_fops->open(inode, filp);
+}
+
 static ssize_t default_read_file(struct file *file, char __user *buf,
 				 size_t count, loff_t *ppos)
 {
@@ -221,6 +241,12 @@ static int tracefs_apply_options(struct super_block *sb)
 	return 0;
 }
 
+static void tracefs_destroy_inode(struct inode *inode)
+{
+	if (S_ISREG(inode->i_mode))
+		kfree(inode->i_fop);
+}
+
 static int tracefs_remount(struct super_block *sb, int *flags, char *data)
 {
 	int err;
@@ -257,6 +283,7 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
 static const struct super_operations tracefs_super_operations = {
 	.statfs		= simple_statfs,
 	.remount_fs	= tracefs_remount,
+	.destroy_inode  = tracefs_destroy_inode,
 	.show_options	= tracefs_show_options,
 };
 
@@ -387,6 +414,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops)
 {
+	struct file_operations *proxy_fops;
 	struct dentry *dentry;
 	struct inode *inode;
 
@@ -402,8 +430,20 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
 	if (unlikely(!inode))
 		return failed_creating(dentry);
 
+	proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
+	if (unlikely(!proxy_fops)) {
+		iput(inode);
+		return failed_creating(dentry);
+	}
+
+	if (!fops)
+		fops = &tracefs_file_operations;
+
+	dentry->d_fsdata = (void *)fops;
+	memcpy(proxy_fops, fops, sizeof(*proxy_fops));
+	proxy_fops->open = default_open_file;
 	inode->i_mode = mode;
-	inode->i_fop = fops ? fops : &tracefs_file_operations;
+	inode->i_fop = proxy_fops;
 	inode->i_private = data;
 	d_instantiate(dentry, inode);
 	fsnotify_create(dentry->d_parent->d_inode, dentry);
diff --git a/include/linux/security.h b/include/linux/security.h
index 152824b6f456..429f9f03372b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,6 +121,7 @@ enum lockdown_reason {
 	LOCKDOWN_KPROBES,
 	LOCKDOWN_BPF_READ,
 	LOCKDOWN_PERF,
+	LOCKDOWN_TRACEFS,
 	LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index edd1fff0147d..84df03b1f5a7 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -36,6 +36,7 @@ static char *lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
 	[LOCKDOWN_KPROBES] = "use of kprobes",
 	[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
 	[LOCKDOWN_PERF] = "unsafe use of perf",
+	[LOCKDOWN_TRACEFS] = "use of tracefs",
 	[LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
-- 
cgit v1.2.3


From 6311b6521bcc804e4d2fd45a5640562a7b8b5241 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Mon, 5 Aug 2019 14:33:46 -0600
Subject: drivers: qcom: Add BCM vote macro to header

The macro to generate a Bus Controller Manager (BCM) TCS command is used
by the interconnect driver but might also be interesting to other
drivers that need to construct TCS commands for sub processors so move
it out of the sdm845 specific file and into the header.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Acked-by: Stephen Boyd <sboyd@kernel.org>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Georgi Djakov <georgi.djakov@linaro.org>
---
 drivers/clk/qcom/clk-rpmh.c        | 16 +++-------------
 drivers/interconnect/qcom/sdm845.c | 19 +------------------
 include/soc/qcom/tcs.h             | 20 +++++++++++++++++++-
 3 files changed, 23 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
index c3fd632af119..a32bfaeb7e61 100644
--- a/drivers/clk/qcom/clk-rpmh.c
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/clk-provider.h>
@@ -12,23 +12,13 @@
 #include <linux/platform_device.h>
 #include <soc/qcom/cmd-db.h>
 #include <soc/qcom/rpmh.h>
+#include <soc/qcom/tcs.h>
 
 #include <dt-bindings/clock/qcom,rpmh.h>
 
 #define CLK_RPMH_ARC_EN_OFFSET		0
 #define CLK_RPMH_VRM_EN_OFFSET		4
 
-#define BCM_TCS_CMD_COMMIT_MASK		0x40000000
-#define BCM_TCS_CMD_VALID_SHIFT		29
-#define BCM_TCS_CMD_VOTE_MASK		0x3fff
-#define BCM_TCS_CMD_VOTE_SHIFT		0
-
-#define BCM_TCS_CMD(valid, vote)				\
-	(BCM_TCS_CMD_COMMIT_MASK |				\
-	((valid) << BCM_TCS_CMD_VALID_SHIFT) |			\
-	((vote & BCM_TCS_CMD_VOTE_MASK)				\
-	<< BCM_TCS_CMD_VOTE_SHIFT))
-
 /**
  * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager(BCM)
  * @unit: divisor used to convert Hz value to an RPMh msg
@@ -269,7 +259,7 @@ static int clk_rpmh_bcm_send_cmd(struct clk_rpmh *c, bool enable)
 	}
 
 	cmd.addr = c->res_addr;
-	cmd.data = BCM_TCS_CMD(enable, cmd_state);
+	cmd.data = BCM_TCS_CMD(1, enable, 0, cmd_state);
 
 	ret = rpmh_write_async(c->dev, RPMH_ACTIVE_ONLY_STATE, &cmd, 1);
 	if (ret) {
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
index 93df67345b39..57955596bb59 100644
--- a/drivers/interconnect/qcom/sdm845.c
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
  *
  */
 
@@ -20,23 +20,6 @@
 #include <soc/qcom/rpmh.h>
 #include <soc/qcom/tcs.h>
 
-#define BCM_TCS_CMD_COMMIT_SHFT		30
-#define BCM_TCS_CMD_COMMIT_MASK		0x40000000
-#define BCM_TCS_CMD_VALID_SHFT		29
-#define BCM_TCS_CMD_VALID_MASK		0x20000000
-#define BCM_TCS_CMD_VOTE_X_SHFT		14
-#define BCM_TCS_CMD_VOTE_MASK		0x3fff
-#define BCM_TCS_CMD_VOTE_Y_SHFT		0
-#define BCM_TCS_CMD_VOTE_Y_MASK		0xfffc000
-
-#define BCM_TCS_CMD(commit, valid, vote_x, vote_y)		\
-	(((commit) << BCM_TCS_CMD_COMMIT_SHFT) |		\
-	((valid) << BCM_TCS_CMD_VALID_SHFT) |			\
-	((cpu_to_le32(vote_x) &					\
-	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_X_SHFT) |	\
-	((cpu_to_le32(vote_y) &					\
-	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_Y_SHFT))
-
 #define to_qcom_provider(_provider) \
 	container_of(_provider, struct qcom_icc_provider, provider)
 
diff --git a/include/soc/qcom/tcs.h b/include/soc/qcom/tcs.h
index 262876a59e86..7a2a055ba6b0 100644
--- a/include/soc/qcom/tcs.h
+++ b/include/soc/qcom/tcs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016-2019, The Linux Foundation. All rights reserved.
  */
 
 #ifndef __SOC_QCOM_TCS_H__
@@ -53,4 +53,22 @@ struct tcs_request {
 	struct tcs_cmd *cmds;
 };
 
+#define BCM_TCS_CMD_COMMIT_SHFT		30
+#define BCM_TCS_CMD_COMMIT_MASK		0x40000000
+#define BCM_TCS_CMD_VALID_SHFT		29
+#define BCM_TCS_CMD_VALID_MASK		0x20000000
+#define BCM_TCS_CMD_VOTE_X_SHFT		14
+#define BCM_TCS_CMD_VOTE_MASK		0x3fff
+#define BCM_TCS_CMD_VOTE_Y_SHFT		0
+#define BCM_TCS_CMD_VOTE_Y_MASK		0xfffc000
+
+/* Construct a Bus Clock Manager (BCM) specific TCS command */
+#define BCM_TCS_CMD(commit, valid, vote_x, vote_y)		\
+	(((commit) << BCM_TCS_CMD_COMMIT_SHFT) |		\
+	((valid) << BCM_TCS_CMD_VALID_SHFT) |			\
+	((cpu_to_le32(vote_x) &					\
+	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_X_SHFT) |	\
+	((cpu_to_le32(vote_y) &					\
+	BCM_TCS_CMD_VOTE_MASK) << BCM_TCS_CMD_VOTE_Y_SHFT))
+
 #endif /* __SOC_QCOM_TCS_H__ */
-- 
cgit v1.2.3


From f52a0c7b5eb907a12bc08a33cc998ed02dad3507 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Fri, 16 Aug 2019 21:38:12 +1000
Subject: gpio: stubs in headers should be inline

Fixes: fdd61a013a24 ("gpio: Add support for hierarchical IRQ domains")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/r/20190816213812.40a130db@canb.auug.org.au
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 0e6d3b0c0211..72d48a2bab65 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -543,14 +543,14 @@ void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
 
 #else
 
-static void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
+static inline void gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *chip,
 						    struct irq_fwspec *fwspec,
 						    unsigned int parent_hwirq,
 						    unsigned int parent_type)
 {
 }
 
-static void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
+static inline void gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *chip,
 						     struct irq_fwspec *fwspec,
 						     unsigned int parent_hwirq,
 						     unsigned int parent_type)
-- 
cgit v1.2.3


From c9fc5aff217267a9ef3a76450760534488870c69 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 19 Aug 2019 10:49:04 +0200
Subject: gpio: Pass mask and size with the init_valid_mask()

It is more helpful for drivers to have the affected fields
directly available when we use the callback to set up the
valid mask. Change this and switch over the only user
(MSM) to use the passed parameters. If we do this we can
also move the mask out of publicly visible struct fields.

Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20190819084904.30027-1-linus.walleij@linaro.or
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c             |  8 +++++---
 drivers/pinctrl/qcom/pinctrl-msm.c | 19 ++++++++++---------
 include/linux/gpio/driver.h        |  4 +++-
 3 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5277b8f1ff7c..22b87c6e8cd5 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -375,10 +375,12 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc)
 	return 0;
 }
 
-static int gpiochip_init_valid_mask(struct gpio_chip *gpiochip)
+static int gpiochip_init_valid_mask(struct gpio_chip *gc)
 {
-	if (gpiochip->init_valid_mask)
-		return gpiochip->init_valid_mask(gpiochip);
+	if (gc->init_valid_mask)
+		return gc->init_valid_mask(gc,
+					   gc->valid_mask,
+					   gc->ngpio);
 
 	return 0;
 }
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 7f35c196bb3e..a5d8f75da4a7 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -593,24 +593,25 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 #define msm_gpio_dbg_show NULL
 #endif
 
-static int msm_gpio_init_valid_mask(struct gpio_chip *chip)
+static int msm_gpio_init_valid_mask(struct gpio_chip *gc,
+				    unsigned long *valid_mask,
+				    unsigned int ngpios)
 {
-	struct msm_pinctrl *pctrl = gpiochip_get_data(chip);
+	struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
 	int ret;
 	unsigned int len, i;
-	unsigned int max_gpios = pctrl->soc->ngpios;
 	const int *reserved = pctrl->soc->reserved_gpios;
 	u16 *tmp;
 
 	/* Driver provided reserved list overrides DT and ACPI */
 	if (reserved) {
-		bitmap_fill(chip->valid_mask, max_gpios);
+		bitmap_fill(valid_mask, ngpios);
 		for (i = 0; reserved[i] >= 0; i++) {
-			if (i >= max_gpios || reserved[i] >= max_gpios) {
+			if (i >= ngpios || reserved[i] >= ngpios) {
 				dev_err(pctrl->dev, "invalid list of reserved GPIOs\n");
 				return -EINVAL;
 			}
-			clear_bit(reserved[i], chip->valid_mask);
+			clear_bit(reserved[i], valid_mask);
 		}
 
 		return 0;
@@ -622,7 +623,7 @@ static int msm_gpio_init_valid_mask(struct gpio_chip *chip)
 	if (ret < 0)
 		return 0;
 
-	if (ret > max_gpios)
+	if (ret > ngpios)
 		return -EINVAL;
 
 	tmp = kmalloc_array(len, sizeof(*tmp), GFP_KERNEL);
@@ -635,9 +636,9 @@ static int msm_gpio_init_valid_mask(struct gpio_chip *chip)
 		goto out;
 	}
 
-	bitmap_zero(chip->valid_mask, max_gpios);
+	bitmap_zero(valid_mask, ngpios);
 	for (i = 0; i < len; i++)
-		set_bit(tmp[i], chip->valid_mask);
+		set_bit(tmp[i], valid_mask);
 
 out:
 	kfree(tmp);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 72d48a2bab65..dc03323897ef 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -363,7 +363,9 @@ struct gpio_chip {
 	void			(*dbg_show)(struct seq_file *s,
 						struct gpio_chip *chip);
 
-	int			(*init_valid_mask)(struct gpio_chip *chip);
+	int			(*init_valid_mask)(struct gpio_chip *chip,
+						   unsigned long *valid_mask,
+						   unsigned int ngpios);
 
 	int			base;
 	u16			ngpio;
-- 
cgit v1.2.3


From eb1e8bd6e3900e3a3b9776cd1b09ecfc05573619 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 19 Aug 2019 11:30:58 +0200
Subject: gpio: Use callback presence to determine need of valid_mask

After we switched the two drivers that have .need_valid_mask
set to use the callback for setting up the .valid_mask,
we can just use the presence of the .init_valid_mask()
callback (or the OF reserved ranges, nota bene) to determine
whether to allocate the mask or not and we can drop the
.need_valid_mask field altogether.

Cc: Benjamin Gaignard <benjamin.gaignard@st.com>
Cc: Amelie Delaunay <amelie.delaunay@st.com>
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20190819093058.10863-1-linus.walleij@linaro.org
---
 drivers/gpio/gpiolib.c             | 4 +---
 drivers/pinctrl/pinctrl-stmfx.c    | 1 -
 drivers/pinctrl/qcom/pinctrl-msm.c | 4 ++--
 include/linux/gpio/driver.h        | 9 ---------
 4 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 22b87c6e8cd5..01aa5440454c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -363,9 +363,7 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *chip)
 
 static int gpiochip_alloc_valid_mask(struct gpio_chip *gc)
 {
-	if (of_gpio_need_valid_mask(gc))
-		gc->need_valid_mask = true;
-	if (!gc->need_valid_mask)
+	if (!(of_gpio_need_valid_mask(gc) || gc->init_valid_mask))
 		return 0;
 
 	gc->valid_mask = gpiochip_allocate_mask(gc);
diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c
index 13b6d6f72bcc..dd5aa9a2dfe5 100644
--- a/drivers/pinctrl/pinctrl-stmfx.c
+++ b/drivers/pinctrl/pinctrl-stmfx.c
@@ -662,7 +662,6 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev)
 	pctl->gpio_chip.ngpio = pctl->pctl_desc.npins;
 	pctl->gpio_chip.can_sleep = true;
 	pctl->gpio_chip.of_node = np;
-	pctl->gpio_chip.need_valid_mask = true;
 	pctl->gpio_chip.init_valid_mask = stmfx_pinctrl_gpio_init_valid_mask;
 
 	ret = devm_gpiochip_add_data(pctl->dev, &pctl->gpio_chip, pctl);
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index a5d8f75da4a7..b8a1c43222f8 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -654,7 +654,6 @@ static const struct gpio_chip msm_gpio_template = {
 	.request          = gpiochip_generic_request,
 	.free             = gpiochip_generic_free,
 	.dbg_show         = msm_gpio_dbg_show,
-	.init_valid_mask  = msm_gpio_init_valid_mask,
 };
 
 /* For dual-edge interrupts in software, since some hardware has no
@@ -1016,7 +1015,8 @@ static int msm_gpio_init(struct msm_pinctrl *pctrl)
 	chip->parent = pctrl->dev;
 	chip->owner = THIS_MODULE;
 	chip->of_node = pctrl->dev->of_node;
-	chip->need_valid_mask = msm_gpio_needs_valid_mask(pctrl);
+	if (msm_gpio_needs_valid_mask(pctrl))
+		chip->init_valid_mask = msm_gpio_init_valid_mask;
 
 	pctrl->irq_chip.name = "msmgpio";
 	pctrl->irq_chip.irq_enable = msm_gpio_irq_enable;
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index dc03323897ef..340121c7d2fb 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -403,15 +403,6 @@ struct gpio_chip {
 	struct gpio_irq_chip irq;
 #endif /* CONFIG_GPIOLIB_IRQCHIP */
 
-	/**
-	 * @need_valid_mask:
-	 *
-	 * If set core allocates @valid_mask with all its values initialized
-	 * with init_valid_mask() or set to one if init_valid_mask() is not
-	 * defined
-	 */
-	bool need_valid_mask;
-
 	/**
 	 * @valid_mask:
 	 *
-- 
cgit v1.2.3


From 211bddd210a6746e4fdfa9b6cdfbdb15026530a7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 16 Jul 2019 15:17:31 +0100
Subject: irqchip/gic-v3: Add ESPI range support

Add the required support for the ESPI range, which behave exactly like
the SPIs of old, only with new funky INTIDs.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-gic-v3.c       | 85 +++++++++++++++++++++++++++++++-------
 include/linux/irqchip/arm-gic-v3.h | 17 +++++++-
 2 files changed, 85 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 660ec43b8d2d..0afc942170a4 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -51,13 +51,16 @@ struct gic_chip_data {
 	u32			nr_redist_regions;
 	u64			flags;
 	bool			has_rss;
-	unsigned int		irq_nr;
 	struct partition_desc	*ppi_descs[16];
 };
 
 static struct gic_chip_data gic_data __read_mostly;
 static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
+#define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
+#define GIC_LINE_NR	max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
+#define GIC_ESPI_NR	GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
+
 /*
  * The behaviours of RPR and PMR registers differ depending on the value of
  * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the
@@ -100,6 +103,7 @@ static DEFINE_PER_CPU(bool, has_rss);
 enum gic_intid_range {
 	PPI_RANGE,
 	SPI_RANGE,
+	ESPI_RANGE,
 	LPI_RANGE,
 	__INVALID_RANGE__
 };
@@ -111,6 +115,8 @@ static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq)
 		return PPI_RANGE;
 	case 32 ... 1019:
 		return SPI_RANGE;
+	case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023):
+		return ESPI_RANGE;
 	case 8192 ... GENMASK(23, 0):
 		return LPI_RANGE;
 	default:
@@ -141,6 +147,7 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
 		return gic_data_rdist_sgi_base();
 
 	case SPI_RANGE:
+	case ESPI_RANGE:
 		/* SPI -> dist_base */
 		return gic_data.dist_base;
 
@@ -234,6 +241,31 @@ static u32 convert_offset_index(struct irq_data *d, u32 offset, u32 *index)
 	case SPI_RANGE:
 		*index = d->hwirq;
 		return offset;
+	case ESPI_RANGE:
+		*index = d->hwirq - ESPI_BASE_INTID;
+		switch (offset) {
+		case GICD_ISENABLER:
+			return GICD_ISENABLERnE;
+		case GICD_ICENABLER:
+			return GICD_ICENABLERnE;
+		case GICD_ISPENDR:
+			return GICD_ISPENDRnE;
+		case GICD_ICPENDR:
+			return GICD_ICPENDRnE;
+		case GICD_ISACTIVER:
+			return GICD_ISACTIVERnE;
+		case GICD_ICACTIVER:
+			return GICD_ICACTIVERnE;
+		case GICD_IPRIORITYR:
+			return GICD_IPRIORITYRnE;
+		case GICD_ICFGR:
+			return GICD_ICFGRnE;
+		case GICD_IROUTER:
+			return GICD_IROUTERnE;
+		default:
+			break;
+		}
+		break;
 	default:
 		break;
 	}
@@ -316,7 +348,7 @@ static int gic_irq_set_irqchip_state(struct irq_data *d,
 {
 	u32 reg;
 
-	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
+	if (d->hwirq >= 8192) /* PPI/SPI only */
 		return -EINVAL;
 
 	switch (which) {
@@ -343,7 +375,7 @@ static int gic_irq_set_irqchip_state(struct irq_data *d,
 static int gic_irq_get_irqchip_state(struct irq_data *d,
 				     enum irqchip_irq_state which, bool *val)
 {
-	if (d->hwirq >= gic_data.irq_nr) /* PPI/SPI only */
+	if (d->hwirq >= 8192) /* PPI/SPI only */
 		return -EINVAL;
 
 	switch (which) {
@@ -567,7 +599,12 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 		gic_arch_enable_irqs();
 	}
 
-	if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
+	/* Check for special IDs first */
+	if ((irqnr >= 1020 && irqnr <= 1023))
+		return;
+
+	/* Treat anything but SGIs in a uniform way */
+	if (likely(irqnr > 15)) {
 		int err;
 
 		if (static_branch_likely(&supports_deactivate_key))
@@ -655,10 +692,26 @@ static void __init gic_dist_init(void)
 	 * do the right thing if the kernel is running in secure mode,
 	 * but that's not the intended use case anyway.
 	 */
-	for (i = 32; i < gic_data.irq_nr; i += 32)
+	for (i = 32; i < GIC_LINE_NR; i += 32)
 		writel_relaxed(~0, base + GICD_IGROUPR + i / 8);
 
-	gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp);
+	/* Extended SPI range, not handled by the GICv2/GICv3 common code */
+	for (i = 0; i < GIC_ESPI_NR; i += 32) {
+		writel_relaxed(~0U, base + GICD_ICENABLERnE + i / 8);
+		writel_relaxed(~0U, base + GICD_ICACTIVERnE + i / 8);
+	}
+
+	for (i = 0; i < GIC_ESPI_NR; i += 32)
+		writel_relaxed(~0U, base + GICD_IGROUPRnE + i / 8);
+
+	for (i = 0; i < GIC_ESPI_NR; i += 16)
+		writel_relaxed(0, base + GICD_ICFGRnE + i / 4);
+
+	for (i = 0; i < GIC_ESPI_NR; i += 4)
+		writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i);
+
+	/* Now do the common stuff, and wait for the distributor to drain */
+	gic_dist_config(base, GIC_LINE_NR, gic_dist_wait_for_rwp);
 
 	/* Enable distributor with ARE, Group1 */
 	writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1,
@@ -669,8 +722,11 @@ static void __init gic_dist_init(void)
 	 * enabled.
 	 */
 	affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id()));
-	for (i = 32; i < gic_data.irq_nr; i++)
+	for (i = 32; i < GIC_LINE_NR; i++)
 		gic_write_irouter(affinity, base + GICD_IROUTER + i * 8);
+
+	for (i = 0; i < GIC_ESPI_NR; i++)
+		gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8);
 }
 
 static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *))
@@ -1134,8 +1190,6 @@ static struct irq_chip gic_eoimode1_chip = {
 				  IRQCHIP_MASK_ON_SUSPEND,
 };
 
-#define GIC_ID_NR	(1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
-
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
@@ -1153,6 +1207,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 		break;
 
 	case SPI_RANGE:
+	case ESPI_RANGE:
 		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_fasteoi_irq, NULL, NULL);
 		irq_set_probe(irq);
@@ -1192,6 +1247,9 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 		case GIC_IRQ_TYPE_PARTITION:
 			*hwirq = fwspec->param[1] + 16;
 			break;
+		case 2:			/* ESPI */
+			*hwirq = fwspec->param[1] + ESPI_BASE_INTID;
+			break;
 		case GIC_IRQ_TYPE_LPI:	/* LPI */
 			*hwirq = fwspec->param[1];
 			break;
@@ -1346,7 +1404,6 @@ static int __init gic_init_bases(void __iomem *dist_base,
 				 struct fwnode_handle *handle)
 {
 	u32 typer;
-	int gic_irqs;
 	int err;
 
 	if (!is_hyp_mode_available())
@@ -1363,15 +1420,11 @@ static int __init gic_init_bases(void __iomem *dist_base,
 
 	/*
 	 * Find out how many interrupts are supported.
-	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
 	 */
 	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
 	gic_data.rdists.gicd_typer = typer;
-	gic_irqs = GICD_TYPER_IRQS(typer);
-	if (gic_irqs > 1020)
-		gic_irqs = 1020;
-	gic_data.irq_nr = gic_irqs;
-
+	pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32);
+	pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR);
 	gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops,
 						 &gic_data);
 	irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED);
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 67c4b9806d43..c523bf1faa55 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -30,10 +30,22 @@
 #define GICD_ICFGR			0x0C00
 #define GICD_IGRPMODR			0x0D00
 #define GICD_NSACR			0x0E00
+#define GICD_IGROUPRnE			0x1000
+#define GICD_ISENABLERnE		0x1200
+#define GICD_ICENABLERnE		0x1400
+#define GICD_ISPENDRnE			0x1600
+#define GICD_ICPENDRnE			0x1800
+#define GICD_ISACTIVERnE		0x1A00
+#define GICD_ICACTIVERnE		0x1C00
+#define GICD_IPRIORITYRnE		0x2000
+#define GICD_ICFGRnE			0x3000
 #define GICD_IROUTER			0x6000
+#define GICD_IROUTERnE			0x8000
 #define GICD_IDREGS			0xFFD0
 #define GICD_PIDR2			0xFFE8
 
+#define ESPI_BASE_INTID			4096
+
 /*
  * Those registers are actually from GICv2, but the spec demands that they
  * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
@@ -69,10 +81,13 @@
 #define GICD_TYPER_RSS			(1U << 26)
 #define GICD_TYPER_LPIS			(1U << 17)
 #define GICD_TYPER_MBIS			(1U << 16)
+#define GICD_TYPER_ESPI			(1U << 8)
 
 #define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
 #define GICD_TYPER_NUM_LPIS(typer)	((((typer) >> 11) & 0x1f) + 1)
-#define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_SPIS(typer)		((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_ESPIS(typer)						\
+	(((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
 
 #define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
 #define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
-- 
cgit v1.2.3


From 5f51f803826e4f4aedff415ddaf14efa707be5a7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 18 Jul 2019 13:19:25 +0100
Subject: irqchip/gic-v3: Add EPPI range support

Expand the pre-existing PPI support to be able to deal with the
Extended PPI range (EPPI). This includes obtaining the number of PPIs
from each individual redistributor, and compute the minimum set
(just in case someone builds something really clever...).

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-gic-v3.c       | 42 +++++++++++++++++++++++++++++++-------
 include/linux/irqchip/arm-gic-v3.h | 12 +++++++++++
 2 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index f5dbdbf3e98d..d3727e877872 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -104,6 +104,7 @@ static DEFINE_PER_CPU(bool, has_rss);
 enum gic_intid_range {
 	PPI_RANGE,
 	SPI_RANGE,
+	EPPI_RANGE,
 	ESPI_RANGE,
 	LPI_RANGE,
 	__INVALID_RANGE__
@@ -116,6 +117,8 @@ static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq)
 		return PPI_RANGE;
 	case 32 ... 1019:
 		return SPI_RANGE;
+	case EPPI_BASE_INTID ... (EPPI_BASE_INTID + 63):
+		return EPPI_RANGE;
 	case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023):
 		return ESPI_RANGE;
 	case 8192 ... GENMASK(23, 0):
@@ -137,13 +140,15 @@ static inline unsigned int gic_irq(struct irq_data *d)
 
 static inline int gic_irq_in_rdist(struct irq_data *d)
 {
-	return get_intid_range(d) == PPI_RANGE;
+	enum gic_intid_range range = get_intid_range(d);
+	return range == PPI_RANGE || range == EPPI_RANGE;
 }
 
 static inline void __iomem *gic_dist_base(struct irq_data *d)
 {
 	switch (get_intid_range(d)) {
 	case PPI_RANGE:
+	case EPPI_RANGE:
 		/* SGI+PPI -> SGI_base for this CPU */
 		return gic_data_rdist_sgi_base();
 
@@ -242,6 +247,14 @@ static u32 convert_offset_index(struct irq_data *d, u32 offset, u32 *index)
 	case SPI_RANGE:
 		*index = d->hwirq;
 		return offset;
+	case EPPI_RANGE:
+		/*
+		 * Contrary to the ESPI range, the EPPI range is contiguous
+		 * to the PPI range in the registers, so let's adjust the
+		 * displacement accordingly. Consistency is overrated.
+		 */
+		*index = d->hwirq - EPPI_BASE_INTID + 32;
+		return offset;
 	case ESPI_RANGE:
 		*index = d->hwirq - ESPI_BASE_INTID;
 		switch (offset) {
@@ -414,6 +427,8 @@ static u32 gic_get_ppi_index(struct irq_data *d)
 	switch (get_intid_range(d)) {
 	case PPI_RANGE:
 		return d->hwirq - 16;
+	case EPPI_RANGE:
+		return d->hwirq - EPPI_BASE_INTID + 16;
 	default:
 		unreachable();
 	}
@@ -507,6 +522,7 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
 
 static int gic_set_type(struct irq_data *d, unsigned int type)
 {
+	enum gic_intid_range range;
 	unsigned int irq = gic_irq(d);
 	void (*rwp_wait)(void);
 	void __iomem *base;
@@ -517,9 +533,11 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	if (irq < 16)
 		return -EINVAL;
 
+	range = get_intid_range(d);
+
 	/* SPIs have restrictions on the supported types */
-	if (irq >= 32 && type != IRQ_TYPE_LEVEL_HIGH &&
-			 type != IRQ_TYPE_EDGE_RISING)
+	if ((range == SPI_RANGE || range == ESPI_RANGE) &&
+	    type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING)
 		return -EINVAL;
 
 	if (gic_irq_in_rdist(d)) {
@@ -533,9 +551,9 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 	offset = convert_offset_index(d, GICD_ICFGR, &index);
 
 	ret = gic_configure_irq(index, type, base + offset, rwp_wait);
-	if (ret && irq < 32) {
+	if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) {
 		/* Misconfigured PPIs are usually not fatal */
-		pr_warn("GIC: PPI%d is secure or misconfigured\n", irq - 16);
+		pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq);
 		ret = 0;
 	}
 
@@ -833,7 +851,7 @@ static int __gic_update_rdist_properties(struct redist_region *region,
 	u64 typer = gic_read_typer(ptr + GICR_TYPER);
 	gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS);
 	gic_data.rdists.has_direct_lpi &= !!(typer & GICR_TYPER_DirectLPIS);
-	gic_data.ppi_nr = 16;
+	gic_data.ppi_nr = min(GICR_TYPER_NR_PPIS(typer), gic_data.ppi_nr);
 
 	return 1;
 }
@@ -1222,6 +1240,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 
 	switch (__get_intid_range(hw)) {
 	case PPI_RANGE:
+	case EPPI_RANGE:
 		irq_set_percpu_devid(irq);
 		irq_domain_set_info(d, irq, hw, chip, d->host_data,
 				    handle_percpu_devid_irq, NULL, NULL);
@@ -1266,15 +1285,24 @@ static int gic_irq_domain_translate(struct irq_domain *d,
 			*hwirq = fwspec->param[1] + 32;
 			break;
 		case 1:			/* PPI */
-		case GIC_IRQ_TYPE_PARTITION:
 			*hwirq = fwspec->param[1] + 16;
 			break;
 		case 2:			/* ESPI */
 			*hwirq = fwspec->param[1] + ESPI_BASE_INTID;
 			break;
+		case 3:			/* EPPI */
+			*hwirq = fwspec->param[1] + EPPI_BASE_INTID;
+			break;
 		case GIC_IRQ_TYPE_LPI:	/* LPI */
 			*hwirq = fwspec->param[1];
 			break;
+		case GIC_IRQ_TYPE_PARTITION:
+			*hwirq = fwspec->param[1];
+			if (fwspec->param[1] >= 16)
+				*hwirq += EPPI_BASE_INTID - 16;
+			else
+				*hwirq += 16;
+			break;
 		default:
 			return -EINVAL;
 		}
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index c523bf1faa55..9ec3349dee04 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -124,6 +124,18 @@
 
 #define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)
 
+#define EPPI_BASE_INTID			1056
+
+#define GICR_TYPER_NR_PPIS(r)						\
+	({								\
+		unsigned int __ppinum = ((r) >> 27) & 0x1f;		\
+		unsigned int __nr_ppis = 16;				\
+		if (__ppinum == 1 || __ppinum == 2)			\
+			__nr_ppis +=  __ppinum * 32;			\
+									\
+		__nr_ppis;						\
+	 })
+
 #define GICR_WAKER_ProcessorSleep	(1U << 1)
 #define GICR_WAKER_ChildrenAsleep	(1U << 2)
 
-- 
cgit v1.2.3


From ad5a78d3da81836c88d1f2d53310484462660997 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 25 Jul 2019 15:30:51 +0100
Subject: irqchip/gic-v3: Warn about inconsistent implementations of extended
 ranges

As is it usual for the GIC, it isn't disallowed to put together a system
that is majorly inconsistent, with a distributor supporting the
extended ranges while some of the CPUs don't.

Kindly tell the user that things are sailing isn't going to be smooth.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 drivers/irqchip/irq-gic-v3.c       | 5 +++++
 include/linux/irqchip/arm-gic-v3.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index d3727e877872..8af08dd674f8 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1014,6 +1014,11 @@ static void gic_cpu_init(void)
 
 	gic_enable_redist(true);
 
+	WARN((gic_data.ppi_nr > 16 || GIC_ESPI_NR != 0) &&
+	     !(gic_read_ctlr() & ICC_CTLR_EL1_ExtRange),
+	     "Distributor has extended ranges, but CPU%d doesn't\n",
+	     smp_processor_id());
+
 	rbase = gic_data_rdist_sgi_base();
 
 	/* Configure SGIs/PPIs as non-secure Group-1 */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 9ec3349dee04..5cc10cf7cb3e 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -496,6 +496,7 @@
 #define ICC_CTLR_EL1_A3V_SHIFT		15
 #define ICC_CTLR_EL1_A3V_MASK		(0x1 << ICC_CTLR_EL1_A3V_SHIFT)
 #define ICC_CTLR_EL1_RSS		(0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange		(0x1 << 19)
 #define ICC_PMR_EL1_SHIFT		0
 #define ICC_PMR_EL1_MASK		(0xff << ICC_PMR_EL1_SHIFT)
 #define ICC_BPR0_EL1_SHIFT		0
-- 
cgit v1.2.3


From a512584abd7ab860560ac21e6daac1aaebc1c14f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 19 Aug 2019 16:45:34 +0900
Subject: irqchip: Add include guard to irq-partition-percpu.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/linux/irqchip/irq-partition-percpu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h
index a783ddb58444..2f6ae7551748 100644
--- a/include/linux/irqchip/irq-partition-percpu.h
+++ b/include/linux/irqchip/irq-partition-percpu.h
@@ -4,6 +4,9 @@
  * Author: Marc Zyngier <marc.zyngier@arm.com>
  */
 
+#ifndef __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
+#define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H
+
 #include <linux/fwnode.h>
 #include <linux/cpumask.h>
 #include <linux/irqdomain.h>
@@ -46,3 +49,5 @@ struct irq_domain *partition_get_domain(struct partition_desc *dsc)
 	return NULL;
 }
 #endif
+
+#endif /* __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H */
-- 
cgit v1.2.3


From 0688587a71200911721a0cb025fe805ddc0404b6 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Mon, 12 Aug 2019 14:32:52 +0200
Subject: dt-bindings: clock: meson: add resets to the audio clock controller

Add the documentation and bindings for the resets provided by the g12a
audio clock controller

Reviewed-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 .../bindings/clock/amlogic,axg-audio-clkc.txt      |  1 +
 .../reset/amlogic,meson-g12a-audio-reset.h         | 38 ++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100644 include/dt-bindings/reset/amlogic,meson-g12a-audio-reset.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
index 0f777749f4f1..b3957d10d241 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,axg-audio-clkc.txt
@@ -22,6 +22,7 @@ Required Properties:
 				       components.
 - resets	: phandle of the internal reset line
 - #clock-cells	: should be 1.
+- #reset-cells  : should be 1 on the g12a (and following) soc family
 
 Each clock is assigned an identifier and client nodes can use this identifier
 to specify the clock which they consume. All available clocks are defined as
diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-audio-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-audio-reset.h
new file mode 100644
index 000000000000..14b78dabed0e
--- /dev/null
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-audio-reset.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 BayLibre, SAS.
+ * Author: Jerome Brunet <jbrunet@baylibre.com>
+ *
+ */
+
+#ifndef _DT_BINDINGS_AMLOGIC_MESON_G12A_AUDIO_RESET_H
+#define _DT_BINDINGS_AMLOGIC_MESON_G12A_AUDIO_RESET_H
+
+#define AUD_RESET_PDM		0
+#define AUD_RESET_TDMIN_A	1
+#define AUD_RESET_TDMIN_B	2
+#define AUD_RESET_TDMIN_C	3
+#define AUD_RESET_TDMIN_LB	4
+#define AUD_RESET_LOOPBACK	5
+#define AUD_RESET_TODDR_A	6
+#define AUD_RESET_TODDR_B	7
+#define AUD_RESET_TODDR_C	8
+#define AUD_RESET_FRDDR_A	9
+#define AUD_RESET_FRDDR_B	10
+#define AUD_RESET_FRDDR_C	11
+#define AUD_RESET_TDMOUT_A	12
+#define AUD_RESET_TDMOUT_B	13
+#define AUD_RESET_TDMOUT_C	14
+#define AUD_RESET_SPDIFOUT	15
+#define AUD_RESET_SPDIFOUT_B	16
+#define AUD_RESET_SPDIFIN	17
+#define AUD_RESET_EQDRC		18
+#define AUD_RESET_RESAMPLE	19
+#define AUD_RESET_DDRARB	20
+#define AUD_RESET_POWDET	21
+#define AUD_RESET_TORAM		22
+#define AUD_RESET_TOACODEC	23
+#define AUD_RESET_TOHDMITX	24
+#define AUD_RESET_CLKTREE	25
+
+#endif
-- 
cgit v1.2.3


From 30cc0ed73e3390af7c5573b159826e8162f09fe7 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 14 Aug 2019 11:22:21 +0200
Subject: can: rcar_can: Remove unused platform data support

All R-Car platforms use DT for describing CAN controllers. R-Car CAN
platform data support was never used in any upstream kernel.

Move the Clock Select Register settings enum into the driver, and remove
platform data support and the corresponding header file.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/rcar/rcar_can.c       | 22 +++++++++-------------
 include/linux/can/platform/rcar_can.h | 18 ------------------
 2 files changed, 9 insertions(+), 31 deletions(-)
 delete mode 100644 include/linux/can/platform/rcar_can.h

(limited to 'include')

diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c
index cf218949a8fb..bf5adea9c0a3 100644
--- a/drivers/net/can/rcar/rcar_can.c
+++ b/drivers/net/can/rcar/rcar_can.c
@@ -15,11 +15,17 @@
 #include <linux/can/led.h>
 #include <linux/can/dev.h>
 #include <linux/clk.h>
-#include <linux/can/platform/rcar_can.h>
 #include <linux/of.h>
 
 #define RCAR_CAN_DRV_NAME	"rcar_can"
 
+/* Clock Select Register settings */
+enum CLKR {
+	CLKR_CLKP1 = 0, /* Peripheral clock (clkp1) */
+	CLKR_CLKP2 = 1, /* Peripheral clock (clkp2) */
+	CLKR_CLKEXT = 3, /* Externally input clock */
+};
+
 #define RCAR_SUPPORTED_CLOCKS	(BIT(CLKR_CLKP1) | BIT(CLKR_CLKP2) | \
 				 BIT(CLKR_CLKEXT))
 
@@ -736,7 +742,6 @@ static const char * const clock_names[] = {
 
 static int rcar_can_probe(struct platform_device *pdev)
 {
-	struct rcar_can_platform_data *pdata;
 	struct rcar_can_priv *priv;
 	struct net_device *ndev;
 	struct resource *mem;
@@ -745,17 +750,8 @@ static int rcar_can_probe(struct platform_device *pdev)
 	int err = -ENODEV;
 	int irq;
 
-	if (pdev->dev.of_node) {
-		of_property_read_u32(pdev->dev.of_node,
-				     "renesas,can-clock-select", &clock_select);
-	} else {
-		pdata = dev_get_platdata(&pdev->dev);
-		if (!pdata) {
-			dev_err(&pdev->dev, "No platform data provided!\n");
-			goto fail;
-		}
-		clock_select = pdata->clock_select;
-	}
+	of_property_read_u32(pdev->dev.of_node, "renesas,can-clock-select",
+			     &clock_select);
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h
deleted file mode 100644
index a43dcd0cf79e..000000000000
--- a/include/linux/can/platform/rcar_can.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _CAN_PLATFORM_RCAR_CAN_H_
-#define _CAN_PLATFORM_RCAR_CAN_H_
-
-#include <linux/types.h>
-
-/* Clock Select Register settings */
-enum CLKR {
-	CLKR_CLKP1 = 0,	/* Peripheral clock (clkp1) */
-	CLKR_CLKP2 = 1,	/* Peripheral clock (clkp2) */
-	CLKR_CLKEXT = 3	/* Externally input clock */
-};
-
-struct rcar_can_platform_data {
-	enum CLKR clock_select;	/* Clock source select */
-};
-
-#endif	/* !_CAN_PLATFORM_RCAR_CAN_H_ */
-- 
cgit v1.2.3


From c7d8b7824ff9de866a356e1892dbe9f191aa5d06 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 6 Aug 2019 20:15:42 -0300
Subject: hmm: use mmu_notifier_get/put for 'struct hmm'

This is a significant simplification, it eliminates all the remaining
'hmm' stuff in mm_struct, eliminates krefing along the critical notifier
paths, and takes away all the ugly locking and abuse of page_table_lock.

mmu_notifier_get() provides the single struct hmm per struct mm which
eliminates mm->hmm.

It also directly guarantees that no mmu_notifier op callback is callable
while concurrent free is possible, this eliminates all the krefs inside
the mmu_notifier callbacks.

The remaining krefs in the range code were overly cautious, drivers are
already not permitted to free the mirror while a range exists.

Link: https://lore.kernel.org/r/20190806231548.25242-6-jgg@ziepe.ca
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |   2 +
 drivers/gpu/drm/nouveau/nouveau_drm.c   |   3 +
 include/linux/hmm.h                     |  12 +---
 include/linux/mm_types.h                |   6 --
 kernel/fork.c                           |   1 -
 mm/hmm.c                                | 121 ++++++++------------------------
 6 files changed, 34 insertions(+), 111 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f2e8b4238efd..abc233088201 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_probe_helper.h>
+#include <linux/mmu_notifier.h>
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -1464,6 +1465,7 @@ static void __exit amdgpu_exit(void)
 	amdgpu_unregister_atpx_handler();
 	amdgpu_sync_fini();
 	amdgpu_fence_slab_fini();
+	mmu_notifier_synchronize();
 }
 
 module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 7c2fcaba42d6..a0e48a482452 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/mmu_notifier.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
@@ -1292,6 +1293,8 @@ nouveau_drm_exit(void)
 #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
 	platform_driver_unregister(&nouveau_platform_driver);
 #endif
+	if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM))
+		mmu_notifier_synchronize();
 }
 
 module_init(nouveau_drm_init);
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 51e18fbb8953..3fec513b9c00 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -84,15 +84,12 @@
  * @notifiers: count of active mmu notifiers
  */
 struct hmm {
-	struct mm_struct	*mm;
-	struct kref		kref;
+	struct mmu_notifier	mmu_notifier;
 	spinlock_t		ranges_lock;
 	struct list_head	ranges;
 	struct list_head	mirrors;
-	struct mmu_notifier	mmu_notifier;
 	struct rw_semaphore	mirrors_sem;
 	wait_queue_head_t	wq;
-	struct rcu_head		rcu;
 	long			notifiers;
 };
 
@@ -409,13 +406,6 @@ long hmm_range_dma_unmap(struct hmm_range *range,
  */
 #define HMM_RANGE_DEFAULT_TIMEOUT 1000
 
-/* Below are for HMM internal use only! Not to be used by device driver! */
-static inline void hmm_mm_init(struct mm_struct *mm)
-{
-	mm->hmm = NULL;
-}
-#else /* IS_ENABLED(CONFIG_HMM_MIRROR) */
-static inline void hmm_mm_init(struct mm_struct *mm) {}
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 #endif /* LINUX_HMM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3a37a89eb7a7..525d25d93330 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -25,7 +25,6 @@
 
 struct address_space;
 struct mem_cgroup;
-struct hmm;
 
 /*
  * Each physical page in the system has a struct page associated with
@@ -502,11 +501,6 @@ struct mm_struct {
 		atomic_long_t hugetlb_usage;
 #endif
 		struct work_struct async_put_work;
-
-#ifdef CONFIG_HMM_MIRROR
-		/* HMM needs to track a few things per mm */
-		struct hmm *hmm;
-#endif
 	} __randomize_layout;
 
 	/*
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..bd4a0762f12f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1007,7 +1007,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_owner(mm, p);
 	RCU_INIT_POINTER(mm->exe_file, NULL);
 	mmu_notifier_mm_init(mm);
-	hmm_mm_init(mm);
 	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
diff --git a/mm/hmm.c b/mm/hmm.c
index ef553e664061..49eace16f9f8 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -26,101 +26,37 @@
 #include <linux/mmu_notifier.h>
 #include <linux/memory_hotplug.h>
 
-static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
-
-/**
- * hmm_get_or_create - register HMM against an mm (HMM internal)
- *
- * @mm: mm struct to attach to
- * Return: an HMM object, either by referencing the existing
- *          (per-process) object, or by creating a new one.
- *
- * This is not intended to be used directly by device drivers. If mm already
- * has an HMM struct then it get a reference on it and returns it. Otherwise
- * it allocates an HMM struct, initializes it, associate it with the mm and
- * returns it.
- */
-static struct hmm *hmm_get_or_create(struct mm_struct *mm)
+static struct mmu_notifier *hmm_alloc_notifier(struct mm_struct *mm)
 {
 	struct hmm *hmm;
 
-	lockdep_assert_held_write(&mm->mmap_sem);
-
-	/* Abuse the page_table_lock to also protect mm->hmm. */
-	spin_lock(&mm->page_table_lock);
-	hmm = mm->hmm;
-	if (mm->hmm && kref_get_unless_zero(&mm->hmm->kref))
-		goto out_unlock;
-	spin_unlock(&mm->page_table_lock);
-
-	hmm = kmalloc(sizeof(*hmm), GFP_KERNEL);
+	hmm = kzalloc(sizeof(*hmm), GFP_KERNEL);
 	if (!hmm)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
+
 	init_waitqueue_head(&hmm->wq);
 	INIT_LIST_HEAD(&hmm->mirrors);
 	init_rwsem(&hmm->mirrors_sem);
-	hmm->mmu_notifier.ops = NULL;
 	INIT_LIST_HEAD(&hmm->ranges);
 	spin_lock_init(&hmm->ranges_lock);
-	kref_init(&hmm->kref);
 	hmm->notifiers = 0;
-	hmm->mm = mm;
-
-	hmm->mmu_notifier.ops = &hmm_mmu_notifier_ops;
-	if (__mmu_notifier_register(&hmm->mmu_notifier, mm)) {
-		kfree(hmm);
-		return NULL;
-	}
-
-	mmgrab(hmm->mm);
-
-	/*
-	 * We hold the exclusive mmap_sem here so we know that mm->hmm is
-	 * still NULL or 0 kref, and is safe to update.
-	 */
-	spin_lock(&mm->page_table_lock);
-	mm->hmm = hmm;
-
-out_unlock:
-	spin_unlock(&mm->page_table_lock);
-	return hmm;
+	return &hmm->mmu_notifier;
 }
 
-static void hmm_free_rcu(struct rcu_head *rcu)
+static void hmm_free_notifier(struct mmu_notifier *mn)
 {
-	struct hmm *hmm = container_of(rcu, struct hmm, rcu);
+	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 
-	mmdrop(hmm->mm);
+	WARN_ON(!list_empty(&hmm->ranges));
+	WARN_ON(!list_empty(&hmm->mirrors));
 	kfree(hmm);
 }
 
-static void hmm_free(struct kref *kref)
-{
-	struct hmm *hmm = container_of(kref, struct hmm, kref);
-
-	spin_lock(&hmm->mm->page_table_lock);
-	if (hmm->mm->hmm == hmm)
-		hmm->mm->hmm = NULL;
-	spin_unlock(&hmm->mm->page_table_lock);
-
-	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, hmm->mm);
-	mmu_notifier_call_srcu(&hmm->rcu, hmm_free_rcu);
-}
-
-static inline void hmm_put(struct hmm *hmm)
-{
-	kref_put(&hmm->kref, hmm_free);
-}
-
 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 {
 	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 	struct hmm_mirror *mirror;
 
-	/* Bail out if hmm is in the process of being freed */
-	if (!kref_get_unless_zero(&hmm->kref))
-		return;
-
 	/*
 	 * Since hmm_range_register() holds the mmget() lock hmm_release() is
 	 * prevented as long as a range exists.
@@ -137,8 +73,6 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 			mirror->ops->release(mirror);
 	}
 	up_read(&hmm->mirrors_sem);
-
-	hmm_put(hmm);
 }
 
 static void notifiers_decrement(struct hmm *hmm)
@@ -169,9 +103,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 	unsigned long flags;
 	int ret = 0;
 
-	if (!kref_get_unless_zero(&hmm->kref))
-		return 0;
-
 	spin_lock_irqsave(&hmm->ranges_lock, flags);
 	hmm->notifiers++;
 	list_for_each_entry(range, &hmm->ranges, list) {
@@ -206,7 +137,6 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
 out:
 	if (ret)
 		notifiers_decrement(hmm);
-	hmm_put(hmm);
 	return ret;
 }
 
@@ -215,17 +145,15 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 {
 	struct hmm *hmm = container_of(mn, struct hmm, mmu_notifier);
 
-	if (!kref_get_unless_zero(&hmm->kref))
-		return;
-
 	notifiers_decrement(hmm);
-	hmm_put(hmm);
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
 	.release		= hmm_release,
 	.invalidate_range_start	= hmm_invalidate_range_start,
 	.invalidate_range_end	= hmm_invalidate_range_end,
+	.alloc_notifier		= hmm_alloc_notifier,
+	.free_notifier		= hmm_free_notifier,
 };
 
 /*
@@ -237,18 +165,27 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
  *
  * To start mirroring a process address space, the device driver must register
  * an HMM mirror struct.
+ *
+ * The caller cannot unregister the hmm_mirror while any ranges are
+ * registered.
+ *
+ * Callers using this function must put a call to mmu_notifier_synchronize()
+ * in their module exit functions.
  */
 int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
 {
+	struct mmu_notifier *mn;
+
 	lockdep_assert_held_write(&mm->mmap_sem);
 
 	/* Sanity check */
 	if (!mm || !mirror || !mirror->ops)
 		return -EINVAL;
 
-	mirror->hmm = hmm_get_or_create(mm);
-	if (!mirror->hmm)
-		return -ENOMEM;
+	mn = mmu_notifier_get_locked(&hmm_mmu_notifier_ops, mm);
+	if (IS_ERR(mn))
+		return PTR_ERR(mn);
+	mirror->hmm = container_of(mn, struct hmm, mmu_notifier);
 
 	down_write(&mirror->hmm->mirrors_sem);
 	list_add(&mirror->list, &mirror->hmm->mirrors);
@@ -272,7 +209,7 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror)
 	down_write(&hmm->mirrors_sem);
 	list_del(&mirror->list);
 	up_write(&hmm->mirrors_sem);
-	hmm_put(hmm);
+	mmu_notifier_put(&hmm->mmu_notifier);
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
@@ -854,14 +791,13 @@ int hmm_range_register(struct hmm_range *range, struct hmm_mirror *mirror)
 		return -EINVAL;
 
 	/* Prevent hmm_release() from running while the range is valid */
-	if (!mmget_not_zero(hmm->mm))
+	if (!mmget_not_zero(hmm->mmu_notifier.mm))
 		return -EFAULT;
 
 	/* Initialize range to track CPU page table updates. */
 	spin_lock_irqsave(&hmm->ranges_lock, flags);
 
 	range->hmm = hmm;
-	kref_get(&hmm->kref);
 	list_add(&range->list, &hmm->ranges);
 
 	/*
@@ -893,8 +829,7 @@ void hmm_range_unregister(struct hmm_range *range)
 	spin_unlock_irqrestore(&hmm->ranges_lock, flags);
 
 	/* Drop reference taken by hmm_range_register() */
-	mmput(hmm->mm);
-	hmm_put(hmm);
+	mmput(hmm->mmu_notifier.mm);
 
 	/*
 	 * The range is now invalid and the ref on the hmm is dropped, so
@@ -944,14 +879,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 	struct mm_walk mm_walk;
 	int ret;
 
-	lockdep_assert_held(&hmm->mm->mmap_sem);
+	lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem);
 
 	do {
 		/* If range is no longer valid force retry. */
 		if (!range->valid)
 			return -EBUSY;
 
-		vma = find_vma(hmm->mm, start);
+		vma = find_vma(hmm->mmu_notifier.mm, start);
 		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-- 
cgit v1.2.3


From a7d1f22bb74f32cf3cd93f52776007e161f1a738 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Aug 2019 09:59:19 +0200
Subject: mm: turn migrate_vma upside down

There isn't any good reason to pass callbacks to migrate_vma.  Instead
we can just export the three steps done by this function to drivers and
let them sequence the operation without callbacks.  This removes a lot
of boilerplate code as-is, and will allow the drivers to drastically
improve code flow and error handling further on.

Link: https://lore.kernel.org/r/20190814075928.23766-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 Documentation/vm/hmm.rst               |  54 +-------
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 122 +++++++++--------
 include/linux/migrate.h                | 118 +++-------------
 mm/migrate.c                           | 244 +++++++++++++++------------------
 4 files changed, 194 insertions(+), 344 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index e63c11f7e0e0..0a5960beccf7 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -339,58 +339,8 @@ Migration to and from device memory
 ===================================
 
 Because the CPU cannot access device memory, migration must use the device DMA
-engine to perform copy from and to device memory. For this we need a new
-migration helper::
-
- int migrate_vma(const struct migrate_vma_ops *ops,
-                 struct vm_area_struct *vma,
-                 unsigned long mentries,
-                 unsigned long start,
-                 unsigned long end,
-                 unsigned long *src,
-                 unsigned long *dst,
-                 void *private);
-
-Unlike other migration functions it works on a range of virtual address, there
-are two reasons for that. First, device DMA copy has a high setup overhead cost
-and thus batching multiple pages is needed as otherwise the migration overhead
-makes the whole exercise pointless. The second reason is because the
-migration might be for a range of addresses the device is actively accessing.
-
-The migrate_vma_ops struct defines two callbacks. First one (alloc_and_copy())
-controls destination memory allocation and copy operation. Second one is there
-to allow the device driver to perform cleanup operations after migration::
-
- struct migrate_vma_ops {
-     void (*alloc_and_copy)(struct vm_area_struct *vma,
-                            const unsigned long *src,
-                            unsigned long *dst,
-                            unsigned long start,
-                            unsigned long end,
-                            void *private);
-     void (*finalize_and_map)(struct vm_area_struct *vma,
-                              const unsigned long *src,
-                              const unsigned long *dst,
-                              unsigned long start,
-                              unsigned long end,
-                              void *private);
- };
-
-It is important to stress that these migration helpers allow for holes in the
-virtual address range. Some pages in the range might not be migrated for all
-the usual reasons (page is pinned, page is locked, ...). This helper does not
-fail but just skips over those pages.
-
-The alloc_and_copy() might decide to not migrate all pages in the
-range (for reasons under the callback control). For those, the callback just
-has to leave the corresponding dst entry empty.
-
-Finally, the migration of the struct page might fail (for file backed page) for
-various reasons (failure to freeze reference, or update page cache, ...). If
-that happens, then the finalize_and_map() can catch any pages that were not
-migrated. Note those pages were still copied to a new page and thus we wasted
-bandwidth but this is considered as a rare event and a price that we are
-willing to pay to keep all the code simpler.
+engine to perform copy from and to device memory. For this we need to use
+migrate_vma_setup(), migrate_vma_pages(), and migrate_vma_finalize() helpers.
 
 
 Memory cgroup (memcg) and rss accounting
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 345c63cb752a..38416798abd4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -131,9 +131,8 @@ nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
 				  unsigned long *dst_pfns,
 				  unsigned long start,
 				  unsigned long end,
-				  void *private)
+				  struct nouveau_dmem_fault *fault)
 {
-	struct nouveau_dmem_fault *fault = private;
 	struct nouveau_drm *drm = fault->drm;
 	struct device *dev = drm->dev->dev;
 	unsigned long addr, i, npages = 0;
@@ -230,14 +229,9 @@ error:
 	}
 }
 
-void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
-					 const unsigned long *src_pfns,
-					 const unsigned long *dst_pfns,
-					 unsigned long start,
-					 unsigned long end,
-					 void *private)
+static void
+nouveau_dmem_fault_finalize_and_map(struct nouveau_dmem_fault *fault)
 {
-	struct nouveau_dmem_fault *fault = private;
 	struct nouveau_drm *drm = fault->drm;
 
 	if (fault->fence) {
@@ -257,29 +251,35 @@ void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
 	kfree(fault->dma);
 }
 
-static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
-	.alloc_and_copy		= nouveau_dmem_fault_alloc_and_copy,
-	.finalize_and_map	= nouveau_dmem_fault_finalize_and_map,
-};
-
 static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 {
 	struct nouveau_dmem *dmem = page_to_dmem(vmf->page);
 	unsigned long src[1] = {0}, dst[1] = {0};
+	struct migrate_vma args = {
+		.vma		= vmf->vma,
+		.start		= vmf->address,
+		.end		= vmf->address + PAGE_SIZE,
+		.src		= src,
+		.dst		= dst,
+	};
 	struct nouveau_dmem_fault fault = { .drm = dmem->drm };
-	int ret;
 
 	/*
 	 * FIXME what we really want is to find some heuristic to migrate more
 	 * than just one page on CPU fault. When such fault happens it is very
 	 * likely that more surrounding page will CPU fault too.
 	 */
-	ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vmf->vma,
-			vmf->address, vmf->address + PAGE_SIZE,
-			src, dst, &fault);
-	if (ret)
+	if (migrate_vma_setup(&args) < 0)
 		return VM_FAULT_SIGBUS;
+	if (!args.cpages)
+		return 0;
+
+	nouveau_dmem_fault_alloc_and_copy(args.vma, src, dst, args.start,
+			args.end, &fault);
+	migrate_vma_pages(&args);
+	nouveau_dmem_fault_finalize_and_map(&fault);
 
+	migrate_vma_finalize(&args);
 	if (dst[0] == MIGRATE_PFN_ERROR)
 		return VM_FAULT_SIGBUS;
 
@@ -648,9 +648,8 @@ nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
 				    unsigned long *dst_pfns,
 				    unsigned long start,
 				    unsigned long end,
-				    void *private)
+				    struct nouveau_migrate *migrate)
 {
-	struct nouveau_migrate *migrate = private;
 	struct nouveau_drm *drm = migrate->drm;
 	struct device *dev = drm->dev->dev;
 	unsigned long addr, i, npages = 0;
@@ -747,14 +746,9 @@ error:
 	}
 }
 
-void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
-					   const unsigned long *src_pfns,
-					   const unsigned long *dst_pfns,
-					   unsigned long start,
-					   unsigned long end,
-					   void *private)
+static void
+nouveau_dmem_migrate_finalize_and_map(struct nouveau_migrate *migrate)
 {
-	struct nouveau_migrate *migrate = private;
 	struct nouveau_drm *drm = migrate->drm;
 
 	if (migrate->fence) {
@@ -779,10 +773,15 @@ void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
 	 */
 }
 
-static const struct migrate_vma_ops nouveau_dmem_migrate_ops = {
-	.alloc_and_copy		= nouveau_dmem_migrate_alloc_and_copy,
-	.finalize_and_map	= nouveau_dmem_migrate_finalize_and_map,
-};
+static void nouveau_dmem_migrate_chunk(struct migrate_vma *args,
+		struct nouveau_migrate *migrate)
+{
+	nouveau_dmem_migrate_alloc_and_copy(args->vma, args->src, args->dst,
+			args->start, args->end, migrate);
+	migrate_vma_pages(args);
+	nouveau_dmem_migrate_finalize_and_map(migrate);
+	migrate_vma_finalize(args);
+}
 
 int
 nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
@@ -790,40 +789,45 @@ nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
 			 unsigned long start,
 			 unsigned long end)
 {
-	unsigned long *src_pfns, *dst_pfns, npages;
-	struct nouveau_migrate migrate = {0};
-	unsigned long i, c, max;
-	int ret = 0;
-
-	npages = (end - start) >> PAGE_SHIFT;
-	max = min(SG_MAX_SINGLE_ALLOC, npages);
-	src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
-	if (src_pfns == NULL)
-		return -ENOMEM;
-	dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
-	if (dst_pfns == NULL) {
-		kfree(src_pfns);
-		return -ENOMEM;
-	}
+	unsigned long npages = (end - start) >> PAGE_SHIFT;
+	unsigned long max = min(SG_MAX_SINGLE_ALLOC, npages);
+	struct migrate_vma args = {
+		.vma		= vma,
+		.start		= start,
+	};
+	struct nouveau_migrate migrate = {
+		.drm		= drm,
+		.vma		= vma,
+		.npages		= npages,
+	};
+	unsigned long c, i;
+	int ret = -ENOMEM;
+
+	args.src = kzalloc(sizeof(long) * max, GFP_KERNEL);
+	if (!args.src)
+		goto out;
+	args.dst = kzalloc(sizeof(long) * max, GFP_KERNEL);
+	if (!args.dst)
+		goto out_free_src;
 
-	migrate.drm = drm;
-	migrate.vma = vma;
-	migrate.npages = npages;
 	for (i = 0; i < npages; i += c) {
-		unsigned long next;
-
 		c = min(SG_MAX_SINGLE_ALLOC, npages);
-		next = start + (c << PAGE_SHIFT);
-		ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start,
-				  next, src_pfns, dst_pfns, &migrate);
+		args.end = start + (c << PAGE_SHIFT);
+		ret = migrate_vma_setup(&args);
 		if (ret)
-			goto out;
-		start = next;
+			goto out_free_dst;
+
+		if (args.cpages)
+			nouveau_dmem_migrate_chunk(&args, &migrate);
+		args.start = args.end;
 	}
 
+	ret = 0;
+out_free_dst:
+	kfree(args.dst);
+out_free_src:
+	kfree(args.src);
 out:
-	kfree(dst_pfns);
-	kfree(src_pfns);
 	return ret;
 }
 
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7f04754c7f2b..18156d379ebf 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -182,107 +182,27 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
 	return (pfn << MIGRATE_PFN_SHIFT) | MIGRATE_PFN_VALID;
 }
 
-/*
- * struct migrate_vma_ops - migrate operation callback
- *
- * @alloc_and_copy: alloc destination memory and copy source memory to it
- * @finalize_and_map: allow caller to map the successfully migrated pages
- *
- *
- * The alloc_and_copy() callback happens once all source pages have been locked,
- * unmapped and checked (checked whether pinned or not). All pages that can be
- * migrated will have an entry in the src array set with the pfn value of the
- * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set (other
- * flags might be set but should be ignored by the callback).
- *
- * The alloc_and_copy() callback can then allocate destination memory and copy
- * source memory to it for all those entries (ie with MIGRATE_PFN_VALID and
- * MIGRATE_PFN_MIGRATE flag set). Once these are allocated and copied, the
- * callback must update each corresponding entry in the dst array with the pfn
- * value of the destination page and with the MIGRATE_PFN_VALID and
- * MIGRATE_PFN_LOCKED flags set (destination pages must have their struct pages
- * locked, via lock_page()).
- *
- * At this point the alloc_and_copy() callback is done and returns.
- *
- * Note that the callback does not have to migrate all the pages that are
- * marked with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration
- * from device memory to system memory (ie the MIGRATE_PFN_DEVICE flag is also
- * set in the src array entry). If the device driver cannot migrate a device
- * page back to system memory, then it must set the corresponding dst array
- * entry to MIGRATE_PFN_ERROR. This will trigger a SIGBUS if CPU tries to
- * access any of the virtual addresses originally backed by this page. Because
- * a SIGBUS is such a severe result for the userspace process, the device
- * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an
- * unrecoverable state.
- *
- * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we
- * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
- * allowing device driver to allocate device memory for those unback virtual
- * address. For this the device driver simply have to allocate device memory
- * and properly set the destination entry like for regular migration. Note that
- * this can still fails and thus inside the device driver must check if the
- * migration was successful for those entry inside the finalize_and_map()
- * callback just like for regular migration.
- *
- * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES
- * OR BAD THINGS WILL HAPPEN !
- *
- *
- * The finalize_and_map() callback happens after struct page migration from
- * source to destination (destination struct pages are the struct pages for the
- * memory allocated by the alloc_and_copy() callback).  Migration can fail, and
- * thus the finalize_and_map() allows the driver to inspect which pages were
- * successfully migrated, and which were not. Successfully migrated pages will
- * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
- *
- * It is safe to update device page table from within the finalize_and_map()
- * callback because both destination and source page are still locked, and the
- * mmap_sem is held in read mode (hence no one can unmap the range being
- * migrated).
- *
- * Once callback is done cleaning up things and updating its page table (if it
- * chose to do so, this is not an obligation) then it returns. At this point,
- * the HMM core will finish up the final steps, and the migration is complete.
- *
- * THE finalize_and_map() CALLBACK MUST NOT CHANGE ANY OF THE SRC OR DST ARRAY
- * ENTRIES OR BAD THINGS WILL HAPPEN !
- */
-struct migrate_vma_ops {
-	void (*alloc_and_copy)(struct vm_area_struct *vma,
-			       const unsigned long *src,
-			       unsigned long *dst,
-			       unsigned long start,
-			       unsigned long end,
-			       void *private);
-	void (*finalize_and_map)(struct vm_area_struct *vma,
-				 const unsigned long *src,
-				 const unsigned long *dst,
-				 unsigned long start,
-				 unsigned long end,
-				 void *private);
+struct migrate_vma {
+	struct vm_area_struct	*vma;
+	/*
+	 * Both src and dst array must be big enough for
+	 * (end - start) >> PAGE_SHIFT entries.
+	 *
+	 * The src array must not be modified by the caller after
+	 * migrate_vma_setup(), and must not change the dst array after
+	 * migrate_vma_pages() returns.
+	 */
+	unsigned long		*dst;
+	unsigned long		*src;
+	unsigned long		cpages;
+	unsigned long		npages;
+	unsigned long		start;
+	unsigned long		end;
 };
 
-#if defined(CONFIG_MIGRATE_VMA_HELPER)
-int migrate_vma(const struct migrate_vma_ops *ops,
-		struct vm_area_struct *vma,
-		unsigned long start,
-		unsigned long end,
-		unsigned long *src,
-		unsigned long *dst,
-		void *private);
-#else
-static inline int migrate_vma(const struct migrate_vma_ops *ops,
-			      struct vm_area_struct *vma,
-			      unsigned long start,
-			      unsigned long end,
-			      unsigned long *src,
-			      unsigned long *dst,
-			      void *private)
-{
-	return -EINVAL;
-}
-#endif /* IS_ENABLED(CONFIG_MIGRATE_VMA_HELPER) */
+int migrate_vma_setup(struct migrate_vma *args);
+void migrate_vma_pages(struct migrate_vma *migrate);
+void migrate_vma_finalize(struct migrate_vma *migrate);
 
 #endif /* CONFIG_MIGRATION */
 
diff --git a/mm/migrate.c b/mm/migrate.c
index 8992741f10aa..8111e031fa2b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2118,16 +2118,6 @@ out_unlock:
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_MIGRATE_VMA_HELPER)
-struct migrate_vma {
-	struct vm_area_struct	*vma;
-	unsigned long		*dst;
-	unsigned long		*src;
-	unsigned long		cpages;
-	unsigned long		npages;
-	unsigned long		start;
-	unsigned long		end;
-};
-
 static int migrate_vma_collect_hole(unsigned long start,
 				    unsigned long end,
 				    struct mm_walk *walk)
@@ -2578,6 +2568,110 @@ restore:
 	}
 }
 
+/**
+ * migrate_vma_setup() - prepare to migrate a range of memory
+ * @args: contains the vma, start, and and pfns arrays for the migration
+ *
+ * Returns: negative errno on failures, 0 when 0 or more pages were migrated
+ * without an error.
+ *
+ * Prepare to migrate a range of memory virtual address range by collecting all
+ * the pages backing each virtual address in the range, saving them inside the
+ * src array.  Then lock those pages and unmap them. Once the pages are locked
+ * and unmapped, check whether each page is pinned or not.  Pages that aren't
+ * pinned have the MIGRATE_PFN_MIGRATE flag set (by this function) in the
+ * corresponding src array entry.  Then restores any pages that are pinned, by
+ * remapping and unlocking those pages.
+ *
+ * The caller should then allocate destination memory and copy source memory to
+ * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
+ * flag set).  Once these are allocated and copied, the caller must update each
+ * corresponding entry in the dst array with the pfn value of the destination
+ * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
+ * (destination pages must have their struct pages locked, via lock_page()).
+ *
+ * Note that the caller does not have to migrate all the pages that are marked
+ * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
+ * device memory to system memory.  If the caller cannot migrate a device page
+ * back to system memory, then it must return VM_FAULT_SIGBUS, which has severe
+ * consequences for the userspace process, so it must be avoided if at all
+ * possible.
+ *
+ * For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
+ * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
+ * allowing the caller to allocate device memory for those unback virtual
+ * address.  For this the caller simply has to allocate device memory and
+ * properly set the destination entry like for regular migration.  Note that
+ * this can still fails and thus inside the device driver must check if the
+ * migration was successful for those entries after calling migrate_vma_pages()
+ * just like for regular migration.
+ *
+ * After that, the callers must call migrate_vma_pages() to go over each entry
+ * in the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
+ * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
+ * then migrate_vma_pages() to migrate struct page information from the source
+ * struct page to the destination struct page.  If it fails to migrate the
+ * struct page information, then it clears the MIGRATE_PFN_MIGRATE flag in the
+ * src array.
+ *
+ * At this point all successfully migrated pages have an entry in the src
+ * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
+ * array entry with MIGRATE_PFN_VALID flag set.
+ *
+ * Once migrate_vma_pages() returns the caller may inspect which pages were
+ * successfully migrated, and which were not.  Successfully migrated pages will
+ * have the MIGRATE_PFN_MIGRATE flag set for their src array entry.
+ *
+ * It is safe to update device page table after migrate_vma_pages() because
+ * both destination and source page are still locked, and the mmap_sem is held
+ * in read mode (hence no one can unmap the range being migrated).
+ *
+ * Once the caller is done cleaning up things and updating its page table (if it
+ * chose to do so, this is not an obligation) it finally calls
+ * migrate_vma_finalize() to update the CPU page table to point to new pages
+ * for successfully migrated pages or otherwise restore the CPU page table to
+ * point to the original source pages.
+ */
+int migrate_vma_setup(struct migrate_vma *args)
+{
+	long nr_pages = (args->end - args->start) >> PAGE_SHIFT;
+
+	args->start &= PAGE_MASK;
+	args->end &= PAGE_MASK;
+	if (!args->vma || is_vm_hugetlb_page(args->vma) ||
+	    (args->vma->vm_flags & VM_SPECIAL) || vma_is_dax(args->vma))
+		return -EINVAL;
+	if (nr_pages <= 0)
+		return -EINVAL;
+	if (args->start < args->vma->vm_start ||
+	    args->start >= args->vma->vm_end)
+		return -EINVAL;
+	if (args->end <= args->vma->vm_start || args->end > args->vma->vm_end)
+		return -EINVAL;
+	if (!args->src || !args->dst)
+		return -EINVAL;
+
+	memset(args->src, 0, sizeof(*args->src) * nr_pages);
+	args->cpages = 0;
+	args->npages = 0;
+
+	migrate_vma_collect(args);
+
+	if (args->cpages)
+		migrate_vma_prepare(args);
+	if (args->cpages)
+		migrate_vma_unmap(args);
+
+	/*
+	 * At this point pages are locked and unmapped, and thus they have
+	 * stable content and can safely be copied to destination memory that
+	 * is allocated by the drivers.
+	 */
+	return 0;
+
+}
+EXPORT_SYMBOL(migrate_vma_setup);
+
 static void migrate_vma_insert_page(struct migrate_vma *migrate,
 				    unsigned long addr,
 				    struct page *page,
@@ -2709,7 +2803,7 @@ abort:
 	*src &= ~MIGRATE_PFN_MIGRATE;
 }
 
-/*
+/**
  * migrate_vma_pages() - migrate meta-data from src page to dst page
  * @migrate: migrate struct containing all migration information
  *
@@ -2717,7 +2811,7 @@ abort:
  * struct page. This effectively finishes the migration from source page to the
  * destination page.
  */
-static void migrate_vma_pages(struct migrate_vma *migrate)
+void migrate_vma_pages(struct migrate_vma *migrate)
 {
 	const unsigned long npages = migrate->npages;
 	const unsigned long start = migrate->start;
@@ -2791,8 +2885,9 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
 	if (notified)
 		mmu_notifier_invalidate_range_only_end(&range);
 }
+EXPORT_SYMBOL(migrate_vma_pages);
 
-/*
+/**
  * migrate_vma_finalize() - restore CPU page table entry
  * @migrate: migrate struct containing all migration information
  *
@@ -2803,7 +2898,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate)
  * This also unlocks the pages and puts them back on the lru, or drops the extra
  * refcount, for device pages.
  */
-static void migrate_vma_finalize(struct migrate_vma *migrate)
+void migrate_vma_finalize(struct migrate_vma *migrate)
 {
 	const unsigned long npages = migrate->npages;
 	unsigned long i;
@@ -2846,124 +2941,5 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
 		}
 	}
 }
-
-/*
- * migrate_vma() - migrate a range of memory inside vma
- *
- * @ops: migration callback for allocating destination memory and copying
- * @vma: virtual memory area containing the range to be migrated
- * @start: start address of the range to migrate (inclusive)
- * @end: end address of the range to migrate (exclusive)
- * @src: array of hmm_pfn_t containing source pfns
- * @dst: array of hmm_pfn_t containing destination pfns
- * @private: pointer passed back to each of the callback
- * Returns: 0 on success, error code otherwise
- *
- * This function tries to migrate a range of memory virtual address range, using
- * callbacks to allocate and copy memory from source to destination. First it
- * collects all the pages backing each virtual address in the range, saving this
- * inside the src array. Then it locks those pages and unmaps them. Once the pages
- * are locked and unmapped, it checks whether each page is pinned or not. Pages
- * that aren't pinned have the MIGRATE_PFN_MIGRATE flag set (by this function)
- * in the corresponding src array entry. It then restores any pages that are
- * pinned, by remapping and unlocking those pages.
- *
- * At this point it calls the alloc_and_copy() callback. For documentation on
- * what is expected from that callback, see struct migrate_vma_ops comments in
- * include/linux/migrate.h
- *
- * After the alloc_and_copy() callback, this function goes over each entry in
- * the src array that has the MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag
- * set. If the corresponding entry in dst array has MIGRATE_PFN_VALID flag set,
- * then the function tries to migrate struct page information from the source
- * struct page to the destination struct page. If it fails to migrate the struct
- * page information, then it clears the MIGRATE_PFN_MIGRATE flag in the src
- * array.
- *
- * At this point all successfully migrated pages have an entry in the src
- * array with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE flag set and the dst
- * array entry with MIGRATE_PFN_VALID flag set.
- *
- * It then calls the finalize_and_map() callback. See comments for "struct
- * migrate_vma_ops", in include/linux/migrate.h for details about
- * finalize_and_map() behavior.
- *
- * After the finalize_and_map() callback, for successfully migrated pages, this
- * function updates the CPU page table to point to new pages, otherwise it
- * restores the CPU page table to point to the original source pages.
- *
- * Function returns 0 after the above steps, even if no pages were migrated
- * (The function only returns an error if any of the arguments are invalid.)
- *
- * Both src and dst array must be big enough for (end - start) >> PAGE_SHIFT
- * unsigned long entries.
- */
-int migrate_vma(const struct migrate_vma_ops *ops,
-		struct vm_area_struct *vma,
-		unsigned long start,
-		unsigned long end,
-		unsigned long *src,
-		unsigned long *dst,
-		void *private)
-{
-	struct migrate_vma migrate;
-
-	/* Sanity check the arguments */
-	start &= PAGE_MASK;
-	end &= PAGE_MASK;
-	if (!vma || is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
-			vma_is_dax(vma))
-		return -EINVAL;
-	if (start < vma->vm_start || start >= vma->vm_end)
-		return -EINVAL;
-	if (end <= vma->vm_start || end > vma->vm_end)
-		return -EINVAL;
-	if (!ops || !src || !dst || start >= end)
-		return -EINVAL;
-
-	memset(src, 0, sizeof(*src) * ((end - start) >> PAGE_SHIFT));
-	migrate.src = src;
-	migrate.dst = dst;
-	migrate.start = start;
-	migrate.npages = 0;
-	migrate.cpages = 0;
-	migrate.end = end;
-	migrate.vma = vma;
-
-	/* Collect, and try to unmap source pages */
-	migrate_vma_collect(&migrate);
-	if (!migrate.cpages)
-		return 0;
-
-	/* Lock and isolate page */
-	migrate_vma_prepare(&migrate);
-	if (!migrate.cpages)
-		return 0;
-
-	/* Unmap pages */
-	migrate_vma_unmap(&migrate);
-	if (!migrate.cpages)
-		return 0;
-
-	/*
-	 * At this point pages are locked and unmapped, and thus they have
-	 * stable content and can safely be copied to destination memory that
-	 * is allocated by the callback.
-	 *
-	 * Note that migration can fail in migrate_vma_struct_page() for each
-	 * individual page.
-	 */
-	ops->alloc_and_copy(vma, src, dst, start, end, private);
-
-	/* This does the real migration of struct page */
-	migrate_vma_pages(&migrate);
-
-	ops->finalize_and_map(vma, src, dst, start, end, private);
-
-	/* Unlock and remap pages */
-	migrate_vma_finalize(&migrate);
-
-	return 0;
-}
-EXPORT_SYMBOL(migrate_vma);
+EXPORT_SYMBOL(migrate_vma_finalize);
 #endif /* defined(MIGRATE_VMA_HELPER) */
-- 
cgit v1.2.3


From 2a915acf88ac0c4c583cd654bdc87371e38016c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Aug 2019 09:59:26 +0200
Subject: mm: remove the unused MIGRATE_PFN_ERROR flag

Now that we can rely errors in the normal control flow there is no
need for this flag, remove it.

Link: https://lore.kernel.org/r/20190814075928.23766-9-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/migrate.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 18156d379ebf..1e67dcfd318f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -167,7 +167,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 #define MIGRATE_PFN_LOCKED	(1UL << 2)
 #define MIGRATE_PFN_WRITE	(1UL << 3)
 #define MIGRATE_PFN_DEVICE	(1UL << 4)
-#define MIGRATE_PFN_ERROR	(1UL << 5)
 #define MIGRATE_PFN_SHIFT	6
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
-- 
cgit v1.2.3


From 06d462beb470d361ffa8bd7b3d865509a8606987 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 14 Aug 2019 09:59:27 +0200
Subject: mm: remove the unused MIGRATE_PFN_DEVICE flag

No one ever checks this flag, and we could easily get that information
from the page if needed.

Link: https://lore.kernel.org/r/20190814075928.23766-10-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 3 +--
 include/linux/migrate.h                | 1 -
 mm/migrate.c                           | 4 ++--
 3 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index d96b987b9982..fa1439941596 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -580,8 +580,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 			*dma_addr))
 		goto out_dma_unmap;
 
-	return migrate_pfn(page_to_pfn(dpage)) |
-		MIGRATE_PFN_LOCKED | MIGRATE_PFN_DEVICE;
+	return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
 
 out_dma_unmap:
 	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 1e67dcfd318f..72120061b7d4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -166,7 +166,6 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 #define MIGRATE_PFN_MIGRATE	(1UL << 1)
 #define MIGRATE_PFN_LOCKED	(1UL << 2)
 #define MIGRATE_PFN_WRITE	(1UL << 3)
-#define MIGRATE_PFN_DEVICE	(1UL << 4)
 #define MIGRATE_PFN_SHIFT	6
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
diff --git a/mm/migrate.c b/mm/migrate.c
index 8111e031fa2b..f4f5ae5ae44f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2237,8 +2237,8 @@ again:
 				goto next;
 
 			page = device_private_entry_to_page(entry);
-			mpfn = migrate_pfn(page_to_pfn(page))|
-				MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
+			mpfn = migrate_pfn(page_to_pfn(page)) |
+					MIGRATE_PFN_MIGRATE;
 			if (is_write_device_private_entry(entry))
 				mpfn |= MIGRATE_PFN_WRITE;
 		} else {
-- 
cgit v1.2.3


From 0c385190392d8c7128fb7517b3c676e19c7b8808 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 18 Aug 2019 11:05:54 +0200
Subject: resource: add a not device managed request_free_mem_region variant

Factor out the guts of devm_request_free_mem_region so that we can
implement both a device managed and a manually release version as tiny
wrappers around it.

Link: https://lore.kernel.org/r/20190818090557.17853-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/ioport.h |  2 ++
 kernel/resource.c      | 45 +++++++++++++++++++++++++++++++--------------
 2 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5b6a7121c9f0..7bddddfc76d6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -297,6 +297,8 @@ static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
 
 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
+struct resource *request_free_mem_region(struct resource *base,
+		unsigned long size, const char *name);
 
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/kernel/resource.c b/kernel/resource.c
index 7ea4306503c5..74877e9d90ca 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1644,19 +1644,8 @@ void resource_list_free(struct list_head *head)
 EXPORT_SYMBOL(resource_list_free);
 
 #ifdef CONFIG_DEVICE_PRIVATE
-/**
- * devm_request_free_mem_region - find free region for device private memory
- *
- * @dev: device struct to bind the resource to
- * @size: size in bytes of the device memory to add
- * @base: resource tree to look in
- *
- * This function tries to find an empty range of physical address big enough to
- * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE
- * memory, which in turn allocates struct pages.
- */
-struct resource *devm_request_free_mem_region(struct device *dev,
-		struct resource *base, unsigned long size)
+static struct resource *__request_free_mem_region(struct device *dev,
+		struct resource *base, unsigned long size, const char *name)
 {
 	resource_size_t end, addr;
 	struct resource *res;
@@ -1670,7 +1659,10 @@ struct resource *devm_request_free_mem_region(struct device *dev,
 				REGION_DISJOINT)
 			continue;
 
-		res = devm_request_mem_region(dev, addr, size, dev_name(dev));
+		if (dev)
+			res = devm_request_mem_region(dev, addr, size, name);
+		else
+			res = request_mem_region(addr, size, name);
 		if (!res)
 			return ERR_PTR(-ENOMEM);
 		res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
@@ -1679,7 +1671,32 @@ struct resource *devm_request_free_mem_region(struct device *dev,
 
 	return ERR_PTR(-ERANGE);
 }
+
+/**
+ * devm_request_free_mem_region - find free region for device private memory
+ *
+ * @dev: device struct to bind the resource to
+ * @size: size in bytes of the device memory to add
+ * @base: resource tree to look in
+ *
+ * This function tries to find an empty range of physical address big enough to
+ * contain the new resource, so that it can later be hotplugged as ZONE_DEVICE
+ * memory, which in turn allocates struct pages.
+ */
+struct resource *devm_request_free_mem_region(struct device *dev,
+		struct resource *base, unsigned long size)
+{
+	return __request_free_mem_region(dev, base, size, dev_name(dev));
+}
 EXPORT_SYMBOL_GPL(devm_request_free_mem_region);
+
+struct resource *request_free_mem_region(struct resource *base,
+		unsigned long size, const char *name)
+{
+	return __request_free_mem_region(NULL, base, size, name);
+}
+EXPORT_SYMBOL_GPL(request_free_mem_region);
+
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 static int __init strict_iomem(char *str)
-- 
cgit v1.2.3


From fdc029b19dfd190840d23e5c70bd231140b0e4a4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 18 Aug 2019 11:05:55 +0200
Subject: memremap: remove the dev field in struct dev_pagemap

The dev field in struct dev_pagemap is only used to print dev_name in two
places, which are at best nice to have.  Just remove the field and thus
the name in those two messages.

Link: https://lore.kernel.org/r/20190818090557.17853-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/memremap.h          | 1 -
 kernel/memremap.c                 | 6 +-----
 mm/page_alloc.c                   | 2 +-
 tools/testing/nvdimm/test/iomap.c | 1 -
 4 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f8a5b2a19945..8f0013e18e14 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -109,7 +109,6 @@ struct dev_pagemap {
 	struct percpu_ref *ref;
 	struct percpu_ref internal_ref;
 	struct completion done;
-	struct device *dev;
 	enum memory_type type;
 	unsigned int flags;
 	u64 pci_p2pdma_bus_offset;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 6ee03a816d67..600a14cbe663 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -96,7 +96,6 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 static void devm_memremap_pages_release(void *data)
 {
 	struct dev_pagemap *pgmap = data;
-	struct device *dev = pgmap->dev;
 	struct resource *res = &pgmap->res;
 	unsigned long pfn;
 	int nid;
@@ -123,8 +122,7 @@ static void devm_memremap_pages_release(void *data)
 
 	untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
 	pgmap_array_delete(res);
-	dev_WARN_ONCE(dev, pgmap->altmap.alloc,
-		      "%s: failed to free all reserved pages\n", __func__);
+	WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
 }
 
 static void dev_pagemap_percpu_release(struct percpu_ref *ref)
@@ -245,8 +243,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		goto err_array;
 	}
 
-	pgmap->dev = dev;
-
 	error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
 				PHYS_PFN(res->end), pgmap, GFP_KERNEL));
 	if (error)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 272c6de1bf4e..b39baa2b1faf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5982,7 +5982,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
 		}
 	}
 
-	pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
+	pr_info("%s initialised %lu pages in %ums\n", __func__,
 		size, jiffies_to_msecs(jiffies - start));
 }
 
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index cd040b5abffe..3f55f2f99112 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -132,7 +132,6 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	if (!nfit_res)
 		return devm_memremap_pages(dev, pgmap);
 
-	pgmap->dev = dev;
 	if (!pgmap->ref) {
 		if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
 			return ERR_PTR(-EINVAL);
-- 
cgit v1.2.3


From 6869b7b206595ae0e326f59719090351eb8f4f5d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 18 Aug 2019 11:05:57 +0200
Subject: memremap: provide a not device managed memremap_pages

The kvmppc ultravisor code wants a device private memory pool that is
system wide and not attached to a device.  Instead of faking up one
provide a low-level memremap_pages for it.

Link: https://lore.kernel.org/r/20190818090557.17853-5-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Bharata B Rao <bharata@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/memremap.h |  2 ++
 kernel/memremap.c        | 84 ++++++++++++++++++++++++++++++------------------
 2 files changed, 54 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 8f0013e18e14..fb2a0bd826b9 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -123,6 +123,8 @@ static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
 }
 
 #ifdef CONFIG_ZONE_DEVICE
+void *memremap_pages(struct dev_pagemap *pgmap, int nid);
+void memunmap_pages(struct dev_pagemap *pgmap);
 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 09a087ca30ff..77a77704eb28 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -96,9 +96,8 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 	}
 }
 
-static void devm_memremap_pages_release(void *data)
+void memunmap_pages(struct dev_pagemap *pgmap)
 {
-	struct dev_pagemap *pgmap = data;
 	struct resource *res = &pgmap->res;
 	unsigned long pfn;
 	int nid;
@@ -128,6 +127,12 @@ static void devm_memremap_pages_release(void *data)
 	WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
 	devmap_managed_enable_put();
 }
+EXPORT_SYMBOL_GPL(memunmap_pages);
+
+static void devm_memremap_pages_release(void *data)
+{
+	memunmap_pages(data);
+}
 
 static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 {
@@ -137,27 +142,12 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 	complete(&pgmap->done);
 }
 
-/**
- * devm_memremap_pages - remap and provide memmap backing for the given resource
- * @dev: hosting device for @res
- * @pgmap: pointer to a struct dev_pagemap
- *
- * Notes:
- * 1/ At a minimum the res and type members of @pgmap must be initialized
- *    by the caller before passing it to this function
- *
- * 2/ The altmap field may optionally be initialized, in which case
- *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
- *
- * 3/ The ref field may optionally be provided, in which pgmap->ref must be
- *    'live' on entry and will be killed and reaped at
- *    devm_memremap_pages_release() time, or if this routine fails.
- *
- * 4/ res is expected to be a host memory range that could feasibly be
- *    treated as a "System RAM" range, i.e. not a device mmio range, but
- *    this is not enforced.
+/*
+ * Not device managed version of dev_memremap_pages, undone by
+ * memunmap_pages().  Please use dev_memremap_pages if you have a struct
+ * device available.
  */
-void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 {
 	struct resource *res = &pgmap->res;
 	struct dev_pagemap *conflict_pgmap;
@@ -168,7 +158,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 		.altmap = pgmap_altmap(pgmap),
 	};
 	pgprot_t pgprot = PAGE_KERNEL;
-	int error, nid, is_ram;
+	int error, is_ram;
 	bool need_devmap_managed = true;
 
 	switch (pgmap->type) {
@@ -223,7 +213,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 
 	conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
 	if (conflict_pgmap) {
-		dev_WARN(dev, "Conflicting mapping in same section\n");
+		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
 		error = -ENOMEM;
 		goto err_array;
@@ -231,7 +221,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 
 	conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
 	if (conflict_pgmap) {
-		dev_WARN(dev, "Conflicting mapping in same section\n");
+		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
 		error = -ENOMEM;
 		goto err_array;
@@ -252,7 +242,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	if (error)
 		goto err_array;
 
-	nid = dev_to_node(dev);
 	if (nid < 0)
 		nid = numa_mem_id();
 
@@ -308,12 +297,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 				PHYS_PFN(res->start),
 				PHYS_PFN(resource_size(res)), pgmap);
 	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-
-	error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
-			pgmap);
-	if (error)
-		return ERR_PTR(error);
-
 	return __va(res->start);
 
  err_add_memory:
@@ -328,6 +311,43 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	devmap_managed_enable_put();
 	return ERR_PTR(error);
 }
+EXPORT_SYMBOL_GPL(memremap_pages);
+
+/**
+ * devm_memremap_pages - remap and provide memmap backing for the given resource
+ * @dev: hosting device for @res
+ * @pgmap: pointer to a struct dev_pagemap
+ *
+ * Notes:
+ * 1/ At a minimum the res and type members of @pgmap must be initialized
+ *    by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case
+ *    PGMAP_ALTMAP_VALID must be set in pgmap->flags.
+ *
+ * 3/ The ref field may optionally be provided, in which pgmap->ref must be
+ *    'live' on entry and will be killed and reaped at
+ *    devm_memremap_pages_release() time, or if this routine fails.
+ *
+ * 4/ res is expected to be a host memory range that could feasibly be
+ *    treated as a "System RAM" range, i.e. not a device mmio range, but
+ *    this is not enforced.
+ */
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+	int error;
+	void *ret;
+
+	ret = memremap_pages(pgmap, dev_to_node(dev));
+	if (IS_ERR(ret))
+		return ret;
+
+	error = devm_add_action_or_reset(dev, devm_memremap_pages_release,
+			pgmap);
+	if (error)
+		return ERR_PTR(error);
+	return ret;
+}
 EXPORT_SYMBOL_GPL(devm_memremap_pages);
 
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap)
-- 
cgit v1.2.3


From 5f912f7ced427db56729e6419b1fdff9226e0728 Mon Sep 17 00:00:00 2001
From: Peter Griffin <peter.griffin@linaro.org>
Date: Mon, 19 Aug 2019 23:48:40 +0000
Subject: dt-bindings: reset: hisilicon: Add ao reset controller

This is required to bring Mali450 gpu out of reset.

Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Philipp Zabel <p.zabel@pengutronix.de>
Cc: dri-devel@lists.freedesktop.org
Cc: devicetree@vger.kernel.org
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/dt-bindings/reset/hisi,hi6220-resets.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/reset/hisi,hi6220-resets.h b/include/dt-bindings/reset/hisi,hi6220-resets.h
index e7c362a81a97..63aff7d8aa45 100644
--- a/include/dt-bindings/reset/hisi,hi6220-resets.h
+++ b/include/dt-bindings/reset/hisi,hi6220-resets.h
@@ -73,4 +73,11 @@
 #define MEDIA_MMU                       6
 #define MEDIA_XG2RAM1                   7
 
+#define AO_G3D                          1
+#define AO_CODECISP                     2
+#define AO_MCPU                         4
+#define AO_BBPHARQMEM                   5
+#define AO_HIFI                         8
+#define AO_ACPUSCUL2C                   12
+
 #endif /*_DT_BINDINGS_RESET_CONTROLLER_HI6220*/
-- 
cgit v1.2.3


From 691b45ddbd182a4ce0bc91953c70c845cf0935f1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Aug 2019 18:36:19 -0400
Subject: SUNRPC: Remove rpc_wake_up_queued_task_on_wq()

Clean up: commit c544577daddb ("SUNRPC: Clean up transport write
space handling") appears to have removed the last caller of
rpc_wake_up_queued_task_on_wq().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/sched.h |  3 ---
 net/sunrpc/sched.c           | 27 ++++-----------------------
 2 files changed, 4 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index baa3ecdb882f..d1283bddd218 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -243,9 +243,6 @@ void		rpc_sleep_on_priority_timeout(struct rpc_wait_queue *queue,
 void		rpc_sleep_on_priority(struct rpc_wait_queue *,
 					struct rpc_task *,
 					int priority);
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue,
-		struct rpc_task *task);
 void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 					struct rpc_task *);
 void		rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1f275aba786f..f25c4b9ba185 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -541,33 +541,14 @@ rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
 	return NULL;
 }
 
-static void
-rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue, struct rpc_task *task)
-{
-	rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
-}
-
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
+					  struct rpc_task *task)
 {
-	rpc_wake_up_task_on_wq_queue_locked(rpciod_workqueue, queue, task);
-}
-
-/*
- * Wake up a task on a specific queue
- */
-void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue,
-		struct rpc_task *task)
-{
-	if (!RPC_IS_QUEUED(task))
-		return;
-	spin_lock(&queue->lock);
-	rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-	spin_unlock(&queue->lock);
+	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+						   task, NULL, NULL);
 }
 
 /*
-- 
cgit v1.2.3


From dc617f29dbe5ef0c8ced65ce62c464af1daaab3d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 20 Aug 2019 07:55:16 -0700
Subject: vfs: don't allow writes to swap files

Don't let userspace write to an active swap file because the kernel
effectively has a long term lease on the storage and things could get
seriously corrupted if we let this happen.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/block_dev.c     |  3 +++
 include/linux/fs.h | 11 +++++++++++
 mm/filemap.c       |  3 +++
 mm/memory.c        |  4 ++++
 mm/mmap.c          |  8 ++++++--
 mm/swapfile.c      | 12 +++++++++++-
 6 files changed, 38 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index c2a85b587922..d9bab63a9b81 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1978,6 +1978,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (bdev_read_only(I_BDEV(bd_inode)))
 		return -EPERM;
 
+	if (IS_SWAPFILE(bd_inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56b8e358af5c..a2e3d446ba8e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3547,4 +3547,15 @@ static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags)
 	fa->fsx_xflags = xflags;
 }
 
+/*
+ * Flush file data before changing attributes.  Caller must hold any locks
+ * required to prevent further writes to this file until we're done setting
+ * flags.
+ */
+static inline int inode_drain_writes(struct inode *inode)
+{
+	inode_dio_wait(inode);
+	return filemap_write_and_wait(inode->i_mapping);
+}
+
 #endif /* _LINUX_FS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index d0cf700bf201..40667c2f3383 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2988,6 +2988,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	loff_t count;
 	int ret;
 
+	if (IS_SWAPFILE(inode))
+		return -ETXTBSY;
+
 	if (!iov_iter_count(from))
 		return 0;
 
diff --git a/mm/memory.c b/mm/memory.c
index e2bb51b6242e..b1dff75640b7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2196,6 +2196,10 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
 
 	vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
 
+	if (vmf->vma->vm_file &&
+	    IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host))
+		return VM_FAULT_SIGBUS;
+
 	ret = vmf->vma->vm_ops->page_mkwrite(vmf);
 	/* Restore original flags so that caller is not surprised */
 	vmf->flags = old_flags;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7e8c3e8ae75f..6bc21fca20bc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1483,8 +1483,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
 		case MAP_SHARED_VALIDATE:
 			if (flags & ~flags_mask)
 				return -EOPNOTSUPP;
-			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
-				return -EACCES;
+			if (prot & PROT_WRITE) {
+				if (!(file->f_mode & FMODE_WRITE))
+					return -EACCES;
+				if (IS_SWAPFILE(file->f_mapping->host))
+					return -ETXTBSY;
+			}
 
 			/*
 			 * Make sure we don't allow writing to an append-only
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a53b7c49b40e..dab43523afdd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3275,6 +3275,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (error)
 		goto bad_swap;
 
+	/*
+	 * Flush any pending IO and dirty mappings before we start using this
+	 * swap device.
+	 */
+	inode->i_flags |= S_SWAPFILE;
+	error = inode_drain_writes(inode);
+	if (error) {
+		inode->i_flags &= ~S_SWAPFILE;
+		goto bad_swap;
+	}
+
 	mutex_lock(&swapon_mutex);
 	prio = -1;
 	if (swap_flags & SWAP_FLAG_PREFER)
@@ -3295,7 +3306,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	atomic_inc(&proc_poll_event);
 	wake_up_interruptible(&proc_poll_wait);
 
-	inode->i_flags |= S_SWAPFILE;
 	error = 0;
 	goto out;
 bad_swap:
-- 
cgit v1.2.3


From 69ecfdaa534943efd95ee12b53fbb0f344e42e7e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 20 Aug 2019 01:10:35 +0900
Subject: bpf: add include guard to tnum.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/tnum.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index c7dc2b5902c0..c17af77f3fae 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -5,6 +5,10 @@
  * propagate the unknown bits such that the tnum result represents all the
  * possible results for possible values of the operands.
  */
+
+#ifndef _LINUX_TNUM_H
+#define _LINUX_TNUM_H
+
 #include <linux/types.h>
 
 struct tnum {
@@ -81,3 +85,5 @@ bool tnum_in(struct tnum a, struct tnum b);
 int tnum_strn(char *str, size_t size, struct tnum a);
 /* Format a tnum as tristate binary expansion */
 int tnum_sbin(char *str, size_t size, struct tnum a);
+
+#endif /* _LINUX_TNUM_H */
-- 
cgit v1.2.3


From aeaed4848234c97fb720b0e51a0c56dc8de0eeed Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Aug 2019 18:40:58 -0400
Subject: xprtrdma: Boost client's max slot table size to match Linux server

I've heard rumors of an NFS/RDMA server implementation that has a
default credit limit of 1024. The client's default setting remains
at 128.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprtrdma.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 86fc38ff0355..16c239e0d6dd 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -49,9 +49,9 @@
  * fully-chunked NFS message (read chunks are the largest). Note only
  * a single chunk type per message is supported currently.
  */
-#define RPCRDMA_MIN_SLOT_TABLE	(2U)
+#define RPCRDMA_MIN_SLOT_TABLE	(4U)
 #define RPCRDMA_DEF_SLOT_TABLE	(128U)
-#define RPCRDMA_MAX_SLOT_TABLE	(256U)
+#define RPCRDMA_MAX_SLOT_TABLE	(16384U)
 
 #define RPCRDMA_MIN_INLINE  (1024)	/* min inline thresh */
 #define RPCRDMA_DEF_INLINE  (4096)	/* default inline thresh */
-- 
cgit v1.2.3


From 1b9ed84ecf268904d89edf2908426a8eb3b5a4ba Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Tue, 20 Aug 2019 10:31:50 +0100
Subject: bpf: add new BPF_BTF_GET_NEXT_ID syscall command

Add a new command for the bpf() system call: BPF_BTF_GET_NEXT_ID is used
to cycle through all BTF objects loaded on the system.

The motivation is to be able to inspect (list) all BTF objects presents
on the system.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h      | 3 +++
 include/uapi/linux/bpf.h | 1 +
 kernel/bpf/btf.c         | 4 ++--
 kernel/bpf/syscall.c     | 4 ++++
 4 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 15ae49862b82..5b9d22338606 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -24,6 +24,9 @@ struct seq_file;
 struct btf;
 struct btf_type;
 
+extern struct idr btf_idr;
+extern spinlock_t btf_idr_lock;
+
 /* map is generic key/value storage optionally accesible by eBPF programs */
 struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0ef594ac3899..8aa6126f0b6e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -106,6 +106,7 @@ enum bpf_cmd {
 	BPF_TASK_FD_QUERY,
 	BPF_MAP_LOOKUP_AND_DELETE_ELEM,
 	BPF_MAP_FREEZE,
+	BPF_BTF_GET_NEXT_ID,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 6b403dc18486..adb3adcebe3c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -195,8 +195,8 @@
 	     i < btf_type_vlen(struct_type);					\
 	     i++, member++)
 
-static DEFINE_IDR(btf_idr);
-static DEFINE_SPINLOCK(btf_idr_lock);
+DEFINE_IDR(btf_idr);
+DEFINE_SPINLOCK(btf_idr_lock);
 
 struct btf {
 	void *data;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index cf8052b016e7..c0f62fd67c6b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2884,6 +2884,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_obj_get_next_id(&attr, uattr,
 					  &map_idr, &map_idr_lock);
 		break;
+	case BPF_BTF_GET_NEXT_ID:
+		err = bpf_obj_get_next_id(&attr, uattr,
+					  &btf_idr, &btf_idr_lock);
+		break;
 	case BPF_PROG_GET_FD_BY_ID:
 		err = bpf_prog_get_fd_by_id(&attr);
 		break;
-- 
cgit v1.2.3


From b2299e83815c59ab59c4ee4fb4842b3b28e5072f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 19 Aug 2019 14:45:47 +0300
Subject: RDMA: Delete DEBUG code

There is no need to keep DEBUG defines for out-of-the tree testing.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190819114547.20704-1-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/fmr_pool.c | 13 -------------
 include/rdma/ib_verbs.h            |  3 ---
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 7d841b689a1e..e08aec427027 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -148,13 +148,6 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
 		hlist_del_init(&fmr->cache_node);
 		fmr->remap_count = 0;
 		list_add_tail(&fmr->fmr->list, &fmr_list);
-
-#ifdef DEBUG
-		if (fmr->ref_count !=0) {
-			pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
-				fmr, fmr->ref_count);
-		}
-#endif
 	}
 
 	list_splice_init(&pool->dirty_list, &unmap_list);
@@ -496,12 +489,6 @@ void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 		}
 	}
 
-#ifdef DEBUG
-	if (fmr->ref_count < 0)
-		pr_warn(PFX "FMR %p has ref count %d < 0\n",
-			fmr, fmr->ref_count);
-#endif
-
 	spin_unlock_irqrestore(&pool->pool_lock, flags);
 }
 EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 391499008a22..08e966c8081a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -98,9 +98,6 @@ void ibdev_info(const struct ib_device *ibdev, const char *format, ...);
 #if defined(CONFIG_DYNAMIC_DEBUG)
 #define ibdev_dbg(__dev, format, args...)                       \
 	dynamic_ibdev_dbg(__dev, format, ##args)
-#elif defined(DEBUG)
-#define ibdev_dbg(__dev, format, args...)                       \
-	ibdev_printk(KERN_DEBUG, __dev, format, ##args)
 #else
 __printf(2, 3) __cold
 static inline
-- 
cgit v1.2.3


From 60c78668ae50d6b815ead4a62216822a92097125 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 15 Aug 2019 11:38:29 +0300
Subject: RDMA/restrack: Rewrite PID namespace check to be reliable

task_active_pid_ns() is wrong API to check PID namespace because it
posses some restrictions and return PID namespace where the process
was allocated. It created mismatches with current namespace, which
can be different.

Rewrite whole rdma_is_visible_in_pid_ns() logic to provide reliable
results without any relation to allocated PID namespace.

Fixes: 8be565e65fa9 ("RDMA/nldev: Factor out the PID namespace check")
Fixes: 6a6c306a09b5 ("RDMA/restrack: Make is_visible_in_pid_ns() as an API")
Reviewed-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190815083834.9245-4-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/nldev.c    |  3 +--
 drivers/infiniband/core/restrack.c | 15 +++++++--------
 include/rdma/restrack.h            |  3 +--
 3 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 87d40d1ecdde..020c26976558 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -382,8 +382,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
 	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
 		if (!names[i])
 			continue;
-		curr = rdma_restrack_count(device, i,
-					   task_active_pid_ns(current));
+		curr = rdma_restrack_count(device, i);
 		ret = fill_res_info_entry(msg, names[i], curr);
 		if (ret)
 			goto err;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index bddff426ee0f..a07665f7ef8c 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -107,10 +107,8 @@ void rdma_restrack_clean(struct ib_device *dev)
  * rdma_restrack_count() - the current usage of specific object
  * @dev:  IB device
  * @type: actual type of object to operate
- * @ns:   PID namespace
  */
-int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
-			struct pid_namespace *ns)
+int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type)
 {
 	struct rdma_restrack_root *rt = &dev->res[type];
 	struct rdma_restrack_entry *e;
@@ -119,10 +117,9 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type,
 
 	xa_lock(&rt->xa);
 	xas_for_each(&xas, e, U32_MAX) {
-		if (ns == &init_pid_ns ||
-		    (!rdma_is_kernel_res(e) &&
-		     ns == task_active_pid_ns(e->task)))
-			cnt++;
+		if (!rdma_is_visible_in_pid_ns(e))
+			continue;
+		cnt++;
 	}
 	xa_unlock(&rt->xa);
 	return cnt;
@@ -360,5 +357,7 @@ bool rdma_is_visible_in_pid_ns(struct rdma_restrack_entry *res)
 	 */
 	if (rdma_is_kernel_res(res))
 		return task_active_pid_ns(current) == &init_pid_ns;
-	return task_active_pid_ns(current) == task_active_pid_ns(res->task);
+
+	/* PID 0 means that resource is not found in current namespace */
+	return task_pid_vnr(res->task);
 }
diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h
index b0fc6b26bdf5..83df1ec6664e 100644
--- a/include/rdma/restrack.h
+++ b/include/rdma/restrack.h
@@ -105,8 +105,7 @@ struct rdma_restrack_entry {
 };
 
 int rdma_restrack_count(struct ib_device *dev,
-			enum rdma_restrack_type type,
-			struct pid_namespace *ns);
+			enum rdma_restrack_type type);
 
 void rdma_restrack_kadd(struct rdma_restrack_entry *res);
 void rdma_restrack_uadd(struct rdma_restrack_entry *res);
-- 
cgit v1.2.3


From 2dfdcd88cf0ea66eec0478de82283ef20eb6f421 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Aug 2019 18:41:44 -0400
Subject: xprtrdma: Rename CQE field in Receive trace points

Make the field name the same for all trace points that handle
pointers to struct rpcrdma_rep. That makes it easy to grep for
matching rep points in trace output.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcrdma.h | 21 +++++++++++----------
 net/sunrpc/xprtrdma/verbs.c    |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index f6a4eaa85a3e..6e6055eb67e7 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -623,21 +623,21 @@ TRACE_EVENT(xprtrdma_post_send,
 
 TRACE_EVENT(xprtrdma_post_recv,
 	TP_PROTO(
-		const struct ib_cqe *cqe
+		const struct rpcrdma_rep *rep
 	),
 
-	TP_ARGS(cqe),
+	TP_ARGS(rep),
 
 	TP_STRUCT__entry(
-		__field(const void *, cqe)
+		__field(const void *, rep)
 	),
 
 	TP_fast_assign(
-		__entry->cqe = cqe;
+		__entry->rep = rep;
 	),
 
-	TP_printk("cqe=%p",
-		__entry->cqe
+	TP_printk("rep=%p",
+		__entry->rep
 	)
 );
 
@@ -715,14 +715,15 @@ TRACE_EVENT(xprtrdma_wc_receive,
 	TP_ARGS(wc),
 
 	TP_STRUCT__entry(
-		__field(const void *, cqe)
+		__field(const void *, rep)
 		__field(u32, byte_len)
 		__field(unsigned int, status)
 		__field(u32, vendor_err)
 	),
 
 	TP_fast_assign(
-		__entry->cqe = wc->wr_cqe;
+		__entry->rep = container_of(wc->wr_cqe, struct rpcrdma_rep,
+					    rr_cqe);
 		__entry->status = wc->status;
 		if (wc->status) {
 			__entry->byte_len = 0;
@@ -733,8 +734,8 @@ TRACE_EVENT(xprtrdma_wc_receive,
 		}
 	),
 
-	TP_printk("cqe=%p %u bytes: %s (%u/0x%x)",
-		__entry->cqe, __entry->byte_len,
+	TP_printk("rep=%p %u bytes: %s (%u/0x%x)",
+		__entry->rep, __entry->byte_len,
 		rdma_show_wc_status(__entry->status),
 		__entry->status, __entry->vendor_err
 	)
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e639ea0faf19..3c275a7a4e4c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1531,7 +1531,7 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 		if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
 			goto release_wrs;
 
-		trace_xprtrdma_post_recv(rep->rr_recv_wr.wr_cqe);
+		trace_xprtrdma_post_recv(rep);
 		++count;
 	}
 
-- 
cgit v1.2.3


From e95656ea15e54d4e6a192d560d84008b53fc1eb5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 18 Apr 2017 17:28:30 -0700
Subject: Input: add support for polling to input devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Separating "normal" and "polled" input devices was a mistake, as often we
want to allow the very same device work on both interrupt-driven and
polled mode, depending on the board on which the device is used.

This introduces new APIs:

- input_setup_polling
- input_set_poll_interval
- input_set_min_poll_interval
- input_set_max_poll_interval

These new APIs allow switching an input device into polled mode with sysfs
attributes matching drivers using input_polled_dev APIs that will be
eventually removed.

Tested-by: Michal Vokáč <michal.vokac@ysoft.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/Makefile       |   2 +-
 drivers/input/input-poller.c | 213 +++++++++++++++++++++++++++++++++++++++++++
 drivers/input/input-poller.h |  18 ++++
 drivers/input/input.c        |  36 ++++++--
 include/linux/input.h        |  12 +++
 5 files changed, 273 insertions(+), 8 deletions(-)
 create mode 100644 drivers/input/input-poller.c
 create mode 100644 drivers/input/input-poller.h

(limited to 'include')

diff --git a/drivers/input/Makefile b/drivers/input/Makefile
index 40de6a7be641..e35650930371 100644
--- a/drivers/input/Makefile
+++ b/drivers/input/Makefile
@@ -6,7 +6,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_INPUT)		+= input-core.o
-input-core-y := input.o input-compat.o input-mt.o ff-core.o
+input-core-y := input.o input-compat.o input-mt.o input-poller.o ff-core.o
 
 obj-$(CONFIG_INPUT_FF_MEMLESS)	+= ff-memless.o
 obj-$(CONFIG_INPUT_POLLDEV)	+= input-polldev.o
diff --git a/drivers/input/input-poller.c b/drivers/input/input-poller.c
new file mode 100644
index 000000000000..1b3d28964bb2
--- /dev/null
+++ b/drivers/input/input-poller.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Support for polling mode for input devices.
+ */
+
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/jiffies.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include "input-poller.h"
+
+struct input_dev_poller {
+	void (*poll)(struct input_dev *dev);
+
+	unsigned int poll_interval; /* msec */
+	unsigned int poll_interval_max; /* msec */
+	unsigned int poll_interval_min; /* msec */
+
+	struct input_dev *input;
+	struct delayed_work work;
+};
+
+static void input_dev_poller_queue_work(struct input_dev_poller *poller)
+{
+	unsigned long delay;
+
+	delay = msecs_to_jiffies(poller->poll_interval);
+	if (delay >= HZ)
+		delay = round_jiffies_relative(delay);
+
+	queue_delayed_work(system_freezable_wq, &poller->work, delay);
+}
+
+static void input_dev_poller_work(struct work_struct *work)
+{
+	struct input_dev_poller *poller =
+		container_of(work, struct input_dev_poller, work.work);
+
+	poller->poll(poller->input);
+	input_dev_poller_queue_work(poller);
+}
+
+void input_dev_poller_finalize(struct input_dev_poller *poller)
+{
+	if (!poller->poll_interval)
+		poller->poll_interval = 500;
+	if (!poller->poll_interval_max)
+		poller->poll_interval_max = poller->poll_interval;
+}
+
+void input_dev_poller_start(struct input_dev_poller *poller)
+{
+	/* Only start polling if polling is enabled */
+	if (poller->poll_interval > 0) {
+		poller->poll(poller->input);
+		input_dev_poller_queue_work(poller);
+	}
+}
+
+void input_dev_poller_stop(struct input_dev_poller *poller)
+{
+	cancel_delayed_work_sync(&poller->work);
+}
+
+int input_setup_polling(struct input_dev *dev,
+			void (*poll_fn)(struct input_dev *dev))
+{
+	struct input_dev_poller *poller;
+
+	poller = kzalloc(sizeof(*poller), GFP_KERNEL);
+	if (!poller) {
+		/*
+		 * We want to show message even though kzalloc() may have
+		 * printed backtrace as knowing what instance of input
+		 * device we were dealing with is helpful.
+		 */
+		dev_err(dev->dev.parent ?: &dev->dev,
+			"%s: unable to allocate poller structure\n", __func__);
+		return -ENOMEM;
+	}
+
+	INIT_DELAYED_WORK(&poller->work, input_dev_poller_work);
+	poller->input = dev;
+	poller->poll = poll_fn;
+
+	dev->poller = poller;
+	return 0;
+}
+EXPORT_SYMBOL(input_setup_polling);
+
+static bool input_dev_ensure_poller(struct input_dev *dev)
+{
+	if (!dev->poller) {
+		dev_err(dev->dev.parent ?: &dev->dev,
+			"poller structure has not been set up\n");
+		return false;
+	}
+
+	return true;
+}
+
+void input_set_poll_interval(struct input_dev *dev, unsigned int interval)
+{
+	if (input_dev_ensure_poller(dev))
+		dev->poller->poll_interval = interval;
+}
+EXPORT_SYMBOL(input_set_poll_interval);
+
+void input_set_min_poll_interval(struct input_dev *dev, unsigned int interval)
+{
+	if (input_dev_ensure_poller(dev))
+		dev->poller->poll_interval_min = interval;
+}
+EXPORT_SYMBOL(input_set_min_poll_interval);
+
+void input_set_max_poll_interval(struct input_dev *dev, unsigned int interval)
+{
+	if (input_dev_ensure_poller(dev))
+		dev->poller->poll_interval_max = interval;
+}
+EXPORT_SYMBOL(input_set_max_poll_interval);
+
+/* SYSFS interface */
+
+static ssize_t input_dev_get_poll_interval(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct input_dev *input = to_input_dev(dev);
+
+	return sprintf(buf, "%d\n", input->poller->poll_interval);
+}
+
+static ssize_t input_dev_set_poll_interval(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct input_dev *input = to_input_dev(dev);
+	struct input_dev_poller *poller = input->poller;
+	unsigned int interval;
+	int err;
+
+	err = kstrtouint(buf, 0, &interval);
+	if (err)
+		return err;
+
+	if (interval < poller->poll_interval_min)
+		return -EINVAL;
+
+	if (interval > poller->poll_interval_max)
+		return -EINVAL;
+
+	mutex_lock(&input->mutex);
+
+	poller->poll_interval = interval;
+
+	if (input->users) {
+		cancel_delayed_work_sync(&poller->work);
+		if (poller->poll_interval > 0)
+			input_dev_poller_queue_work(poller);
+	}
+
+	mutex_unlock(&input->mutex);
+
+	return count;
+}
+
+static DEVICE_ATTR(poll, 0644,
+		   input_dev_get_poll_interval, input_dev_set_poll_interval);
+
+static ssize_t input_dev_get_poll_max(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct input_dev *input = to_input_dev(dev);
+
+	return sprintf(buf, "%d\n", input->poller->poll_interval_max);
+}
+
+static DEVICE_ATTR(max, 0444, input_dev_get_poll_max, NULL);
+
+static ssize_t input_dev_get_poll_min(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct input_dev *input = to_input_dev(dev);
+
+	return sprintf(buf, "%d\n", input->poller->poll_interval_min);
+}
+
+static DEVICE_ATTR(min, 0444, input_dev_get_poll_min, NULL);
+
+static umode_t input_poller_attrs_visible(struct kobject *kobj,
+					  struct attribute *attr, int n)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct input_dev *input = to_input_dev(dev);
+
+	return input->poller ? attr->mode : 0;
+}
+
+static struct attribute *input_poller_attrs[] = {
+	&dev_attr_poll.attr,
+	&dev_attr_max.attr,
+	&dev_attr_min.attr,
+	NULL
+};
+
+struct attribute_group input_poller_attribute_group = {
+	.is_visible	= input_poller_attrs_visible,
+	.attrs		= input_poller_attrs,
+};
diff --git a/drivers/input/input-poller.h b/drivers/input/input-poller.h
new file mode 100644
index 000000000000..e3fca0be1d32
--- /dev/null
+++ b/drivers/input/input-poller.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _INPUT_POLLER_H
+#define _INPUT_POLLER_H
+
+/*
+ * Support for polling mode for input devices.
+ */
+#include <linux/sysfs.h>
+
+struct input_dev_poller;
+
+void input_dev_poller_finalize(struct input_dev_poller *poller);
+void input_dev_poller_start(struct input_dev_poller *poller);
+void input_dev_poller_stop(struct input_dev_poller *poller);
+
+extern struct attribute_group input_poller_attribute_group;
+
+#endif /* _INPUT_POLLER_H */
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7494a0dede79..c08aa3596144 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -24,6 +24,7 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include "input-compat.h"
+#include "input-poller.h"
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@suse.cz>");
 MODULE_DESCRIPTION("Input core");
@@ -603,20 +604,31 @@ int input_open_device(struct input_handle *handle)
 
 	handle->open++;
 
-	if (!dev->users++ && dev->open)
-		retval = dev->open(dev);
+	if (dev->users++) {
+		/*
+		 * Device is already opened, so we can exit immediately and
+		 * report success.
+		 */
+		goto out;
+	}
 
-	if (retval) {
-		dev->users--;
-		if (!--handle->open) {
+	if (dev->open) {
+		retval = dev->open(dev);
+		if (retval) {
+			dev->users--;
+			handle->open--;
 			/*
 			 * Make sure we are not delivering any more events
 			 * through this handle
 			 */
 			synchronize_rcu();
+			goto out;
 		}
 	}
 
+	if (dev->poller)
+		input_dev_poller_start(dev->poller);
+
  out:
 	mutex_unlock(&dev->mutex);
 	return retval;
@@ -655,8 +667,13 @@ void input_close_device(struct input_handle *handle)
 
 	__input_release_device(handle);
 
-	if (!--dev->users && dev->close)
-		dev->close(dev);
+	if (!--dev->users) {
+		if (dev->poller)
+			input_dev_poller_stop(dev->poller);
+
+		if (dev->close)
+			dev->close(dev);
+	}
 
 	if (!--handle->open) {
 		/*
@@ -1502,6 +1519,7 @@ static const struct attribute_group *input_dev_attr_groups[] = {
 	&input_dev_attr_group,
 	&input_dev_id_attr_group,
 	&input_dev_caps_attr_group,
+	&input_poller_attribute_group,
 	NULL
 };
 
@@ -1511,6 +1529,7 @@ static void input_dev_release(struct device *device)
 
 	input_ff_destroy(dev);
 	input_mt_destroy_slots(dev);
+	kfree(dev->poller);
 	kfree(dev->absinfo);
 	kfree(dev->vals);
 	kfree(dev);
@@ -2175,6 +2194,9 @@ int input_register_device(struct input_dev *dev)
 	if (!dev->setkeycode)
 		dev->setkeycode = input_default_setkeycode;
 
+	if (dev->poller)
+		input_dev_poller_finalize(dev->poller);
+
 	error = device_add(&dev->dev);
 	if (error)
 		goto err_free_vals;
diff --git a/include/linux/input.h b/include/linux/input.h
index e95a439d8bd5..94f277cd806a 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -21,6 +21,8 @@
 #include <linux/timer.h>
 #include <linux/mod_devicetable.h>
 
+struct input_dev_poller;
+
 /**
  * struct input_value - input value representation
  * @type: type of value (EV_KEY, EV_ABS, etc)
@@ -71,6 +73,8 @@ enum input_clock_type {
  *	not sleep
  * @ff: force feedback structure associated with the device if device
  *	supports force feedback effects
+ * @poller: poller structure associated with the device if device is
+ *	set up to use polling mode
  * @repeat_key: stores key code of the last key pressed; used to implement
  *	software autorepeat
  * @timer: timer for software autorepeat
@@ -156,6 +160,8 @@ struct input_dev {
 
 	struct ff_device *ff;
 
+	struct input_dev_poller *poller;
+
 	unsigned int repeat_key;
 	struct timer_list timer;
 
@@ -372,6 +378,12 @@ void input_unregister_device(struct input_dev *);
 
 void input_reset_device(struct input_dev *);
 
+int input_setup_polling(struct input_dev *dev,
+			void (*poll_fn)(struct input_dev *dev));
+void input_set_poll_interval(struct input_dev *dev, unsigned int interval);
+void input_set_min_poll_interval(struct input_dev *dev, unsigned int interval);
+void input_set_max_poll_interval(struct input_dev *dev, unsigned int interval);
+
 int __must_check input_register_handler(struct input_handler *);
 void input_unregister_handler(struct input_handler *);
 
-- 
cgit v1.2.3


From ce03f613461642669d6150c405dd28f4bfd54bbb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 19 Aug 2019 16:31:42 +0200
Subject: posix-timers: Cleanup forward declarations and includes

 - Rename struct siginfo to kernel_siginfo as that is used and required
 - Add a forward declaration for task_struct and remove sched.h include
 - Remove timex.h include as it is not needed

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190819143801.472005793@linutronix.de
---
 include/linux/posix-timers.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 604cec0e41ba..26c636d1485b 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -4,11 +4,10 @@
 
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/sched.h>
-#include <linux/timex.h>
 #include <linux/alarmtimer.h>
 
-struct siginfo;
+struct kernel_siginfo;
+struct task_struct;
 
 struct cpu_timer_list {
 	struct list_head entry;
-- 
cgit v1.2.3


From 3758b0f86ef502e2f342055caef6d2232c2558b7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 19 Aug 2019 16:31:43 +0200
Subject: alarmtimers: Avoid rtc.h include

rtc.h is not needed in alarmtimers when a forward declaration of struct
rtc_device is provided. That allows to include posix-timers.h without
adding more includes to alarmtimer.h or creating circular include
dependencies.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190819143801.565389536@linutronix.de
---
 include/linux/alarmtimer.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 0760ca1cb009..74748e306f4b 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -5,7 +5,8 @@
 #include <linux/time.h>
 #include <linux/hrtimer.h>
 #include <linux/timerqueue.h>
-#include <linux/rtc.h>
+
+struct rtc_device;
 
 enum alarmtimer_type {
 	ALARM_REALTIME,
-- 
cgit v1.2.3


From 3b39f52a02d4b3322744a0a32d59142e01afa435 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Aug 2019 18:45:37 -0400
Subject: xprtrdma: Move rpcrdma_mr_get out of frwr_map

Refactor: Retrieve an MR and handle error recovery entirely in
rpc_rdma.c, as this is not a device-specific function.

Note that since commit 89f90fe1ad8b ("SUNRPC: Allow calls to
xprt_transmit() to drain the entire transmit queue"), the
xprt_transmit function handles the cond_resched. The transport no
longer has to do this itself.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcrdma.h  | 29 ++++++++++++++++++++++++++++-
 net/sunrpc/xprtrdma/frwr_ops.c  | 23 +++++------------------
 net/sunrpc/xprtrdma/rpc_rdma.c  | 30 ++++++++++++++++++++++++------
 net/sunrpc/xprtrdma/verbs.c     | 21 ++++-----------------
 net/sunrpc/xprtrdma/xprt_rdma.h |  4 ++--
 5 files changed, 63 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 6e6055eb67e7..83c4dfd7feea 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -464,7 +464,34 @@ TRACE_EVENT(xprtrdma_createmrs,
 	)
 );
 
-DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
+TRACE_EVENT(xprtrdma_nomrs,
+	TP_PROTO(
+		const struct rpcrdma_req *req
+	),
+
+	TP_ARGS(req),
+
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+	),
+
+	TP_fast_assign(
+		const struct rpc_rqst *rqst = &req->rl_slot;
+
+		__entry->req = req;
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x req=%p",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->req
+	)
+);
 
 DEFINE_RDCH_EVENT(read);
 DEFINE_WRCH_EVENT(write);
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 97e1804139b8..362056f4f48d 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -291,31 +291,25 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
  * @nsegs: number of segments remaining
  * @writing: true when RDMA Write will be used
  * @xid: XID of RPC using the registered memory
- * @out: initialized MR
+ * @mr: MR to fill in
  *
  * Prepare a REG_MR Work Request to register a memory region
  * for remote access via RDMA READ or RDMA WRITE.
  *
  * Returns the next segment or a negative errno pointer.
- * On success, the prepared MR is planted in @out.
+ * On success, @mr is filled in.
  */
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 				struct rpcrdma_mr_seg *seg,
 				int nsegs, bool writing, __be32 xid,
-				struct rpcrdma_mr **out)
+				struct rpcrdma_mr *mr)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-	bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
-	struct rpcrdma_mr *mr;
-	struct ib_mr *ibmr;
 	struct ib_reg_wr *reg_wr;
+	struct ib_mr *ibmr;
 	int i, n;
 	u8 key;
 
-	mr = rpcrdma_mr_get(r_xprt);
-	if (!mr)
-		goto out_getmr_err;
-
 	if (nsegs > ia->ri_max_frwr_depth)
 		nsegs = ia->ri_max_frwr_depth;
 	for (i = 0; i < nsegs;) {
@@ -330,7 +324,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 
 		++seg;
 		++i;
-		if (holes_ok)
+		if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS)
 			continue;
 		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
 		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
@@ -365,22 +359,15 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 	mr->mr_offset = ibmr->iova;
 	trace_xprtrdma_mr_map(mr);
 
-	*out = mr;
 	return seg;
 
-out_getmr_err:
-	xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
-	return ERR_PTR(-EAGAIN);
-
 out_dmamap_err:
 	mr->mr_dir = DMA_NONE;
 	trace_xprtrdma_frwr_sgerr(mr, i);
-	rpcrdma_mr_put(mr);
 	return ERR_PTR(-EIO);
 
 out_mapmr_err:
 	trace_xprtrdma_frwr_maperr(mr, n);
-	rpcrdma_mr_recycle(mr);
 	return ERR_PTR(-EIO);
 }
 
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0ac096a6348a..34772cb19286 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -342,6 +342,27 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
 	return 0;
 }
 
+static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
+						 struct rpcrdma_req *req,
+						 struct rpcrdma_mr_seg *seg,
+						 int nsegs, bool writing,
+						 struct rpcrdma_mr **mr)
+{
+	*mr = rpcrdma_mr_get(r_xprt);
+	if (!*mr)
+		goto out_getmr_err;
+
+	rpcrdma_mr_push(*mr, &req->rl_registered);
+	return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
+
+out_getmr_err:
+	trace_xprtrdma_nomrs(req);
+	xprt_wait_for_buffer_space(&r_xprt->rx_xprt);
+	if (r_xprt->rx_ep.rep_connected != -ENODEV)
+		schedule_work(&r_xprt->rx_buf.rb_refresh_worker);
+	return ERR_PTR(-EAGAIN);
+}
+
 /* Register and XDR encode the Read list. Supports encoding a list of read
  * segments that belong to a single read chunk.
  *
@@ -379,10 +400,9 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 		return nsegs;
 
 	do {
-		seg = frwr_map(r_xprt, seg, nsegs, false, rqst->rq_xid, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, false, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_read_segment(xdr, mr, pos) < 0)
 			return -EMSGSIZE;
@@ -440,10 +460,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
 	nchunks = 0;
 	do {
-		seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
 			return -EMSGSIZE;
@@ -501,10 +520,9 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
 
 	nchunks = 0;
 	do {
-		seg = frwr_map(r_xprt, seg, nsegs, true, rqst->rq_xid, &mr);
+		seg = rpcrdma_mr_prepare(r_xprt, req, seg, nsegs, true, &mr);
 		if (IS_ERR(seg))
 			return PTR_ERR(seg);
-		rpcrdma_mr_push(mr, &req->rl_registered);
 
 		if (encode_rdma_segment(xdr, mr) < 0)
 			return -EMSGSIZE;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 5e0b774ed522..c9fa0f27b10a 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -408,7 +408,7 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
 	struct rpcrdma_req *req;
 	struct rpcrdma_rep *rep;
 
-	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+	cancel_work_sync(&buf->rb_refresh_worker);
 
 	/* This is similar to rpcrdma_ep_destroy, but:
 	 * - Don't cancel the connect worker.
@@ -975,7 +975,7 @@ static void
 rpcrdma_mr_refresh_worker(struct work_struct *work)
 {
 	struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
-						  rb_refresh_worker.work);
+						  rb_refresh_worker);
 	struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
 						   rx_buf);
 
@@ -1086,8 +1086,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 	spin_lock_init(&buf->rb_lock);
 	INIT_LIST_HEAD(&buf->rb_mrs);
 	INIT_LIST_HEAD(&buf->rb_all_mrs);
-	INIT_DELAYED_WORK(&buf->rb_refresh_worker,
-			  rpcrdma_mr_refresh_worker);
+	INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker);
 
 	rpcrdma_mrs_create(r_xprt);
 
@@ -1177,7 +1176,7 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
-	cancel_delayed_work_sync(&buf->rb_refresh_worker);
+	cancel_work_sync(&buf->rb_refresh_worker);
 
 	rpcrdma_sendctxs_destroy(buf);
 
@@ -1218,19 +1217,7 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 	spin_lock(&buf->rb_mrlock);
 	mr = rpcrdma_mr_pop(&buf->rb_mrs);
 	spin_unlock(&buf->rb_mrlock);
-	if (!mr)
-		goto out_nomrs;
 	return mr;
-
-out_nomrs:
-	trace_xprtrdma_nomrs(r_xprt);
-	if (r_xprt->rx_ep.rep_connected != -ENODEV)
-		schedule_delayed_work(&buf->rb_refresh_worker, 0);
-
-	/* Allow the reply handler and refresh worker to run */
-	cond_resched();
-
-	return NULL;
 }
 
 /**
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 3e0839c2cda2..9573587ca602 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -379,7 +379,7 @@ struct rpcrdma_buffer {
 	u32			rb_bc_srv_max_requests;
 	u32			rb_bc_max_requests;
 
-	struct delayed_work	rb_refresh_worker;
+	struct work_struct	rb_refresh_worker;
 };
 
 /*
@@ -548,7 +548,7 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
 struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
 				struct rpcrdma_mr_seg *seg,
 				int nsegs, bool writing, __be32 xid,
-				struct rpcrdma_mr **mr);
+				struct rpcrdma_mr *mr);
 int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
 void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);
 void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-- 
cgit v1.2.3


From 30b10e89f2ae2c7b7b32548e04f57ab7eb030dfb Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 15 Aug 2019 19:46:11 +0000
Subject: net/mlx5: Add support for VNIC_ENV internal rq counter

Add mlx5 interface support for reading internal rq out of buffer counter
as part of QUERY_VNIC_ENV command. The command is used by the driver to
query vnic diagnostic statistics from FW.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index ab6ae723aae6..c788f895b350 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1116,7 +1116,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         cache_line_128byte[0x1];
 	u8         reserved_at_165[0x4];
 	u8         rts2rts_qp_counters_set_id[0x1];
-	u8         reserved_at_16a[0x5];
+	u8         reserved_at_16a[0x2];
+	u8         vnic_env_int_rq_oob[0x1];
+	u8         reserved_at_16d[0x2];
 	u8         qcam_reg[0x1];
 	u8         gid_table_size[0x10];
 
@@ -2772,7 +2774,11 @@ struct mlx5_ifc_vnic_diagnostic_statistics_bits {
 
 	u8         transmit_discard_vport_down[0x40];
 
-	u8         reserved_at_140[0xec0];
+	u8         reserved_at_140[0xa0];
+
+	u8         internal_rq_out_of_buffer[0x20];
+
+	u8         reserved_at_200[0xe00];
 };
 
 struct mlx5_ifc_traffic_counter_bits {
-- 
cgit v1.2.3


From caa1854735449d7afac6781679621fb9142fe810 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Thu, 15 Aug 2019 19:46:14 +0000
Subject: net/mlx5: Expose IP-in-IP capability bit

Expose Fw indication that it supports Stateless Offloads for IP over IP
tunneled packets. The following offloads are supported for the inner
packets: RSS, RX & TX Checksum Offloads, LSO and Flow Steering.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c788f895b350..2837fe4d8901 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -808,7 +808,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         swp_csum[0x1];
 	u8         swp_lso[0x1];
 	u8         cqe_checksum_full[0x1];
-	u8         reserved_at_24[0xc];
+	u8         reserved_at_24[0x5];
+	u8         tunnel_stateless_ip_over_ip[0x1];
+	u8         reserved_at_2a[0x6];
 	u8         max_vxlan_udp_ports[0x8];
 	u8         reserved_at_38[0x6];
 	u8         max_geneve_opt_len[0x1];
-- 
cgit v1.2.3


From 1eba383f4e36637ba859cb82b40e198c415db802 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 15 Aug 2019 19:46:16 +0000
Subject: net/mlx5: Add lag_tx_port_affinity capability bit

Add the lag_tx_port_affinity HCA capability bit that indicates that
setting port affinity of TISes is supported.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 2837fe4d8901..1e55cf73e88c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1249,7 +1249,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_263[0x8];
 	u8         log_bf_reg_size[0x5];
 
-	u8         reserved_at_270[0xb];
+	u8         reserved_at_270[0x8];
+	u8         lag_tx_port_affinity[0x1];
+	u8         reserved_at_279[0x2];
 	u8         lag_master[0x1];
 	u8         num_lag_ports[0x4];
 
-- 
cgit v1.2.3


From 67a72420a326b45514deb3f212085fb2cd1595b5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 16 Aug 2019 14:43:21 -0700
Subject: ACPICA: Increase total number of possible Owner IDs

ACPICA commit 1f1652dad88b9d767767bc1f7eb4f7d99e6b5324

From 255 to 4095 possible IDs.

Link: https://github.com/acpica/acpica/commit/1f1652da
Reported-by: Hedi Berriche <hedi.berriche @hpe.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/aclocal.h   |  4 ++--
 drivers/acpi/acpica/acobject.h  |  2 +-
 drivers/acpi/acpica/acstruct.h  |  2 +-
 drivers/acpi/acpica/acutils.h   |  7 ++++---
 drivers/acpi/acpica/dbmethod.c  |  4 ++++
 drivers/acpi/acpica/exdump.c    |  6 +++---
 drivers/acpi/acpica/nsalloc.c   |  2 +-
 drivers/acpi/acpica/nsdump.c    |  2 +-
 drivers/acpi/acpica/tbdata.c    |  1 +
 drivers/acpi/acpica/uterror.c   |  6 +++---
 drivers/acpi/acpica/utownerid.c | 12 ++++++------
 include/acpi/acconfig.h         |  4 ++--
 include/acpi/actypes.h          |  4 ++--
 13 files changed, 31 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 13d513b81589..1ea52576f0a2 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -134,12 +134,12 @@ struct acpi_namespace_node {
 	union acpi_operand_object *object;	/* Interpreter object */
 	u8 descriptor_type;	/* Differentiate object descriptor types */
 	u8 type;		/* ACPI Type associated with this name */
-	u8 flags;		/* Miscellaneous flags */
-	acpi_owner_id owner_id;	/* Node creator */
+	u16 flags;		/* Miscellaneous flags */
 	union acpi_name_union name;	/* ACPI Name, always 4 chars per ACPI spec */
 	struct acpi_namespace_node *parent;	/* Parent node */
 	struct acpi_namespace_node *child;	/* First child */
 	struct acpi_namespace_node *peer;	/* First peer */
+	acpi_owner_id owner_id;	/* Node creator */
 
 	/*
 	 * The following fields are used by the ASL compiler and disassembler only
diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index b2ef703d7df8..8def0e3d690f 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -153,8 +153,8 @@ struct acpi_object_method {
 	} dispatch;
 
 	u32 aml_length;
-	u8 thread_count;
 	acpi_owner_id owner_id;
+	u8 thread_count;
 };
 
 /* Flags for info_flags field above */
diff --git a/drivers/acpi/acpica/acstruct.h b/drivers/acpi/acpica/acstruct.h
index 8a4e6b4aaf2c..218ff4c8b817 100644
--- a/drivers/acpi/acpica/acstruct.h
+++ b/drivers/acpi/acpica/acstruct.h
@@ -167,9 +167,9 @@ struct acpi_evaluate_info {
 	u32 return_flags;	/* Used for return value analysis */
 	u32 return_btype;	/* Bitmapped type of the returned object */
 	u16 param_count;	/* Count of the input argument list */
+	u16 node_flags;		/* Same as Node->Flags */
 	u8 pass_number;		/* Parser pass number */
 	u8 return_object_type;	/* Object type of the returned object */
-	u8 node_flags;		/* Same as Node->Flags */
 	u8 flags;		/* General flags */
 };
 
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 9022537567e9..bcbc51a92dde 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -690,18 +690,19 @@ void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_warning(const char *module_name,
 			   u32 line_number,
 			   char *pathname,
-			   u8 node_flags, const char *format, ...);
+			   u16 node_flags, const char *format, ...);
 
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_info(const char *module_name,
 			u32 line_number,
-			char *pathname, u8 node_flags, const char *format, ...);
+			char *pathname,
+			u16 node_flags, const char *format, ...);
 
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_bios_error(const char *module_name,
 			      u32 line_number,
 			      char *pathname,
-			      u8 node_flags, const char *format, ...);
+			      u16 node_flags, const char *format, ...);
 
 void
 acpi_ut_prefixed_namespace_error(const char *module_name,
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index d8b7a0fe92ec..76a15b6ffc5d 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -302,6 +302,10 @@ acpi_status acpi_db_disassemble_method(char *name)
 	}
 
 	status = acpi_ut_allocate_owner_id(&obj_desc->method.owner_id);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
 	walk_state->owner_id = obj_desc->method.owner_id;
 
 	/* Push start scope on scope stack and make it current */
diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c
index 6526b2deeaad..a9bc938a3b55 100644
--- a/drivers/acpi/acpica/exdump.c
+++ b/drivers/acpi/acpica/exdump.c
@@ -94,7 +94,7 @@ static struct acpi_exdump_info acpi_ex_dump_method[9] = {
 	 "Parameter Count"},
 	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.sync_level), "Sync Level"},
 	{ACPI_EXD_POINTER, ACPI_EXD_OFFSET(method.mutex), "Mutex"},
-	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.owner_id), "Owner Id"},
+	{ACPI_EXD_UINT16, ACPI_EXD_OFFSET(method.owner_id), "Owner Id"},
 	{ACPI_EXD_UINT8, ACPI_EXD_OFFSET(method.thread_count), "Thread Count"},
 	{ACPI_EXD_UINT32, ACPI_EXD_OFFSET(method.aml_length), "Aml Length"},
 	{ACPI_EXD_POINTER, ACPI_EXD_OFFSET(method.aml_start), "Aml Start"}
@@ -269,8 +269,8 @@ static struct acpi_exdump_info acpi_ex_dump_field_common[7] = {
 
 static struct acpi_exdump_info acpi_ex_dump_node[7] = {
 	{ACPI_EXD_INIT, ACPI_EXD_TABLE_SIZE(acpi_ex_dump_node), NULL},
-	{ACPI_EXD_UINT8, ACPI_EXD_NSOFFSET(flags), "Flags"},
-	{ACPI_EXD_UINT8, ACPI_EXD_NSOFFSET(owner_id), "Owner Id"},
+	{ACPI_EXD_UINT16, ACPI_EXD_NSOFFSET(flags), "Flags"},
+	{ACPI_EXD_UINT16, ACPI_EXD_NSOFFSET(owner_id), "Owner Id"},
 	{ACPI_EXD_LIST, ACPI_EXD_NSOFFSET(object), "Object List"},
 	{ACPI_EXD_NODE, ACPI_EXD_NSOFFSET(parent), "Parent"},
 	{ACPI_EXD_NODE, ACPI_EXD_NSOFFSET(child), "Child"},
diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index 6eb63db72249..fe9b3639a87d 100644
--- a/drivers/acpi/acpica/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -241,7 +241,7 @@ void acpi_ns_install_node(struct acpi_walk_state *walk_state, struct acpi_namesp
 	node->type = (u8) type;
 
 	ACPI_DEBUG_PRINT((ACPI_DB_NAMES,
-			  "%4.4s (%s) [Node %p Owner %X] added to %4.4s (%s) [Node %p]\n",
+			  "%4.4s (%s) [Node %p Owner %3.3X] added to %4.4s (%s) [Node %p]\n",
 			  acpi_ut_get_node_name(node),
 			  acpi_ut_get_type_name(node->type), node, owner_id,
 			  acpi_ut_get_node_name(parent_node),
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index 1b12c172e115..9731d7cf1b83 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -197,7 +197,7 @@ acpi_ns_dump_one_object(acpi_handle obj_handle,
 
 	/* Now we can print out the pertinent information */
 
-	acpi_os_printf(" %-12s %p %2.2X ",
+	acpi_os_printf(" %-12s %p %3.3X ",
 		       acpi_ut_get_type_name(type), this_node,
 		       this_node->owner_id);
 
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 91a4b984f224..309440010ab2 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -750,6 +750,7 @@ acpi_status acpi_tb_delete_namespace_by_owner(u32 table_index)
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
+
 	acpi_ns_delete_namespace_by_owner(owner_id);
 	acpi_ut_release_write_lock(&acpi_gbl_namespace_rw_lock);
 	return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/uterror.c b/drivers/acpi/acpica/uterror.c
index 075457341bad..918aca7c4db4 100644
--- a/drivers/acpi/acpica/uterror.c
+++ b/drivers/acpi/acpica/uterror.c
@@ -39,7 +39,7 @@ void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_warning(const char *module_name,
 			   u32 line_number,
 			   char *pathname,
-			   u8 node_flags, const char *format, ...)
+			   u16 node_flags, const char *format, ...)
 {
 	va_list arg_list;
 
@@ -81,7 +81,7 @@ acpi_ut_predefined_warning(const char *module_name,
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_info(const char *module_name,
 			u32 line_number,
-			char *pathname, u8 node_flags, const char *format, ...)
+			char *pathname, u16 node_flags, const char *format, ...)
 {
 	va_list arg_list;
 
@@ -124,7 +124,7 @@ void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_bios_error(const char *module_name,
 			      u32 line_number,
 			      char *pathname,
-			      u8 node_flags, const char *format, ...)
+			      u16 node_flags, const char *format, ...)
 {
 	va_list arg_list;
 
diff --git a/drivers/acpi/acpica/utownerid.c b/drivers/acpi/acpica/utownerid.c
index 5eb8b76ce9d8..d3525ef8ed28 100644
--- a/drivers/acpi/acpica/utownerid.c
+++ b/drivers/acpi/acpica/utownerid.c
@@ -38,7 +38,7 @@ acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id)
 
 	if (*owner_id) {
 		ACPI_ERROR((AE_INFO,
-			    "Owner ID [0x%2.2X] already exists", *owner_id));
+			    "Owner ID [0x%3.3X] already exists", *owner_id));
 		return_ACPI_STATUS(AE_ALREADY_EXISTS);
 	}
 
@@ -88,14 +88,14 @@ acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id)
 				/*
 				 * Construct encoded ID from the index and bit position
 				 *
-				 * Note: Last [j].k (bit 255) is never used and is marked
+				 * Note: Last [j].k (bit 4095) is never used and is marked
 				 * permanently allocated (prevents +1 overflow)
 				 */
 				*owner_id =
 				    (acpi_owner_id)((k + 1) + ACPI_MUL_32(j));
 
 				ACPI_DEBUG_PRINT((ACPI_DB_VALUES,
-						  "Allocated OwnerId: %2.2X\n",
+						  "Allocated OwnerId: 0x%3.3X\n",
 						  (unsigned int)*owner_id));
 				goto exit;
 			}
@@ -116,7 +116,7 @@ acpi_status acpi_ut_allocate_owner_id(acpi_owner_id *owner_id)
 	 */
 	status = AE_OWNER_ID_LIMIT;
 	ACPI_ERROR((AE_INFO,
-		    "Could not allocate new OwnerId (255 max), AE_OWNER_ID_LIMIT"));
+		    "Could not allocate new OwnerId (4095 max), AE_OWNER_ID_LIMIT"));
 
 exit:
 	(void)acpi_ut_release_mutex(ACPI_MTX_CACHES);
@@ -153,7 +153,7 @@ void acpi_ut_release_owner_id(acpi_owner_id *owner_id_ptr)
 	/* Zero is not a valid owner_ID */
 
 	if (owner_id == 0) {
-		ACPI_ERROR((AE_INFO, "Invalid OwnerId: 0x%2.2X", owner_id));
+		ACPI_ERROR((AE_INFO, "Invalid OwnerId: 0x%3.3X", owner_id));
 		return_VOID;
 	}
 
@@ -179,7 +179,7 @@ void acpi_ut_release_owner_id(acpi_owner_id *owner_id_ptr)
 		acpi_gbl_owner_id_mask[index] ^= bit;
 	} else {
 		ACPI_ERROR((AE_INFO,
-			    "Release of non-allocated OwnerId: 0x%2.2X",
+			    "Attempted release of non-allocated OwnerId: 0x%3.3X",
 			    owner_id + 1));
 	}
 
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index 16a83959e616..42d573172e53 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -95,9 +95,9 @@
 
 #define ACPI_DEFAULT_PAGE_SIZE          4096	/* Must be power of 2 */
 
-/* owner_id tracking. 8 entries allows for 255 owner_ids */
+/* owner_id tracking. 128 entries allows for 4095 owner_ids */
 
-#define ACPI_NUM_OWNERID_MASKS          8
+#define ACPI_NUM_OWNERID_MASKS          128
 
 /* Size of the root table array is increased by this increment */
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index ad6892a24015..e31ef75a22a1 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -442,8 +442,8 @@ typedef void *acpi_handle;	/* Actually a ptr to a NS Node */
 
 /* Owner IDs are used to track namespace nodes for selective deletion */
 
-typedef u8 acpi_owner_id;
-#define ACPI_OWNER_ID_MAX               0xFF
+typedef u16 acpi_owner_id;
+#define ACPI_OWNER_ID_MAX               0xFFF	/* 4095 possible owner IDs */
 
 #define ACPI_INTEGER_BIT_SIZE           64
 #define ACPI_MAX_DECIMAL_DIGITS         20	/* 2^64 = 18,446,744,073,709,551,616 */
-- 
cgit v1.2.3


From 779cc7ce3dc519b05f274b79fb5a6c1703f373b5 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 16 Aug 2019 14:43:22 -0700
Subject: ACPICA: Macros: remove pointer math on a null pointer

ACPICA commit 02bbca5070e42d298c9b824300aa0eb8a082d797

Causes warnings on some compilers and/or tools.
Changed ACPI_TO_POINTER to use ACPI_CAST_PTR instead of using
arithmetic.

Link: https://github.com/acpica/acpica/commit/02bbca50
Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/actypes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index e31ef75a22a1..f52c83eaedc4 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -506,7 +506,7 @@ typedef u64 acpi_integer;
 
 /* Pointer/Integer type conversions */
 
-#define ACPI_TO_POINTER(i)              ACPI_ADD_PTR (void, (void *) 0, (acpi_size) (i))
+#define ACPI_TO_POINTER(i)              ACPI_CAST_PTR (void, (acpi_size) (i))
 #define ACPI_TO_INTEGER(p)              ACPI_PTR_DIFF (p, (void *) 0)
 #define ACPI_OFFSET(d, f)               ACPI_PTR_DIFF (&(((d *) 0)->f), (void *) 0)
 #define ACPI_PHYSADDR_TO_PTR(i)         ACPI_TO_POINTER(i)
-- 
cgit v1.2.3


From 36056d0cd677bbde0dc75457be71119a630ebfc0 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 16 Aug 2019 14:43:25 -0700
Subject: ACPICA: Fully deploy ACPI_PRINTF_LIKE macro

ACPICA commit d06def132a8852d02c9c7fee60f17b2011066e8e

Macro was not being used across all "printf-like" functions.

Also, clean up all calls to such functions now that they are
analyzed by the compiler (gcc). Both in 32-bit mode and 64-bit
mode.

Link: https://github.com/acpica/acpica/commit/d06def13
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/acutils.h  |  3 ++
 drivers/acpi/acpica/dbinput.c  |  8 ++--
 drivers/acpi/acpica/dbstats.c  | 94 +++++++++++++++++++++---------------------
 drivers/acpi/acpica/nsaccess.c |  2 +-
 drivers/acpi/acpica/utdebug.c  |  4 +-
 include/acpi/acpiosxf.h        |  1 +
 6 files changed, 58 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index bcbc51a92dde..601808be86d1 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -686,18 +686,21 @@ void acpi_ut_delete_address_lists(void);
 /*
  * utxferror - various error/warning output functions
  */
+ACPI_PRINTF_LIKE(5)
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_warning(const char *module_name,
 			   u32 line_number,
 			   char *pathname,
 			   u16 node_flags, const char *format, ...);
 
+ACPI_PRINTF_LIKE(5)
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_info(const char *module_name,
 			u32 line_number,
 			char *pathname,
 			u16 node_flags, const char *format, ...);
 
+ACPI_PRINTF_LIKE(5)
 void ACPI_INTERNAL_VAR_XFACE
 acpi_ut_predefined_bios_error(const char *module_name,
 			      u32 line_number,
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 32e4f0c6b5df..55a7e10998d8 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -853,24 +853,24 @@ acpi_db_command_dispatch(char *input_buffer,
 
 		if (param_count == 0) {
 			acpi_os_printf
-			    ("Current debug level for file output is:    %8.8lX\n",
+			    ("Current debug level for file output is:    %8.8X\n",
 			     acpi_gbl_db_debug_level);
 			acpi_os_printf
-			    ("Current debug level for console output is: %8.8lX\n",
+			    ("Current debug level for console output is: %8.8X\n",
 			     acpi_gbl_db_console_debug_level);
 		} else if (param_count == 2) {
 			temp = acpi_gbl_db_console_debug_level;
 			acpi_gbl_db_console_debug_level =
 			    strtoul(acpi_gbl_db_args[1], NULL, 16);
 			acpi_os_printf
-			    ("Debug Level for console output was %8.8lX, now %8.8lX\n",
+			    ("Debug Level for console output was %8.8X, now %8.8X\n",
 			     temp, acpi_gbl_db_console_debug_level);
 		} else {
 			temp = acpi_gbl_db_debug_level;
 			acpi_gbl_db_debug_level =
 			    strtoul(acpi_gbl_db_args[1], NULL, 16);
 			acpi_os_printf
-			    ("Debug Level for file output was %8.8lX, now %8.8lX\n",
+			    ("Debug Level for file output was %8.8X, now %8.8X\n",
 			     temp, acpi_gbl_db_debug_level);
 		}
 		break;
diff --git a/drivers/acpi/acpica/dbstats.c b/drivers/acpi/acpica/dbstats.c
index 4f00351dcbb1..3af88e70238c 100644
--- a/drivers/acpi/acpica/dbstats.c
+++ b/drivers/acpi/acpica/dbstats.c
@@ -341,17 +341,17 @@ acpi_status acpi_db_display_statistics(char *type_arg)
 			       "ACPI_TYPE", "NODES", "OBJECTS");
 
 		for (i = 0; i < ACPI_TYPE_NS_NODE_MAX; i++) {
-			acpi_os_printf("%16.16s % 10u% 10u\n",
+			acpi_os_printf("%16.16s %10u %10u\n",
 				       acpi_ut_get_type_name(i),
 				       acpi_gbl_node_type_count[i],
 				       acpi_gbl_obj_type_count[i]);
 		}
 
-		acpi_os_printf("%16.16s % 10u% 10u\n", "Misc/Unknown",
+		acpi_os_printf("%16.16s %10u %10u\n", "Misc/Unknown",
 			       acpi_gbl_node_type_count_misc,
 			       acpi_gbl_obj_type_count_misc);
 
-		acpi_os_printf("%16.16s % 10u% 10u\n", "TOTALS:",
+		acpi_os_printf("%16.16s %10u %10u\n", "TOTALS:",
 			       acpi_gbl_num_nodes, acpi_gbl_num_objects);
 		break;
 
@@ -379,16 +379,16 @@ acpi_status acpi_db_display_statistics(char *type_arg)
 	case CMD_STAT_MISC:
 
 		acpi_os_printf("\nMiscellaneous Statistics:\n\n");
-		acpi_os_printf("%-28s:     %7lu\n", "Calls to AcpiPsFind",
-			       (u64)acpi_gbl_ps_find_count);
-		acpi_os_printf("%-28s:     %7lu\n", "Calls to AcpiNsLookup",
-			       (u64)acpi_gbl_ns_lookup_count);
+		acpi_os_printf("%-28s:     %7u\n", "Calls to AcpiPsFind",
+			       acpi_gbl_ps_find_count);
+		acpi_os_printf("%-28s:     %7u\n", "Calls to AcpiNsLookup",
+			       acpi_gbl_ns_lookup_count);
 
 		acpi_os_printf("\nMutex usage:\n\n");
 		for (i = 0; i < ACPI_NUM_MUTEX; i++) {
-			acpi_os_printf("%-28s:     %7lu\n",
+			acpi_os_printf("%-28s:     %7u\n",
 				       acpi_ut_get_mutex_name(i),
-				       (u64)acpi_gbl_mutex_info[i].use_count);
+				       acpi_gbl_mutex_info[i].use_count);
 		}
 		break;
 
@@ -397,87 +397,87 @@ acpi_status acpi_db_display_statistics(char *type_arg)
 		acpi_os_printf("\nInternal object sizes:\n\n");
 
 		acpi_os_printf("Common         %3d\n",
-			       sizeof(struct acpi_object_common));
+			       (u32)sizeof(struct acpi_object_common));
 		acpi_os_printf("Number         %3d\n",
-			       sizeof(struct acpi_object_integer));
+			       (u32)sizeof(struct acpi_object_integer));
 		acpi_os_printf("String         %3d\n",
-			       sizeof(struct acpi_object_string));
+			       (u32)sizeof(struct acpi_object_string));
 		acpi_os_printf("Buffer         %3d\n",
-			       sizeof(struct acpi_object_buffer));
+			       (u32)sizeof(struct acpi_object_buffer));
 		acpi_os_printf("Package        %3d\n",
-			       sizeof(struct acpi_object_package));
+			       (u32)sizeof(struct acpi_object_package));
 		acpi_os_printf("BufferField    %3d\n",
-			       sizeof(struct acpi_object_buffer_field));
+			       (u32)sizeof(struct acpi_object_buffer_field));
 		acpi_os_printf("Device         %3d\n",
-			       sizeof(struct acpi_object_device));
+			       (u32)sizeof(struct acpi_object_device));
 		acpi_os_printf("Event          %3d\n",
-			       sizeof(struct acpi_object_event));
+			       (u32)sizeof(struct acpi_object_event));
 		acpi_os_printf("Method         %3d\n",
-			       sizeof(struct acpi_object_method));
+			       (u32)sizeof(struct acpi_object_method));
 		acpi_os_printf("Mutex          %3d\n",
-			       sizeof(struct acpi_object_mutex));
+			       (u32)sizeof(struct acpi_object_mutex));
 		acpi_os_printf("Region         %3d\n",
-			       sizeof(struct acpi_object_region));
+			       (u32)sizeof(struct acpi_object_region));
 		acpi_os_printf("PowerResource  %3d\n",
-			       sizeof(struct acpi_object_power_resource));
+			       (u32)sizeof(struct acpi_object_power_resource));
 		acpi_os_printf("Processor      %3d\n",
-			       sizeof(struct acpi_object_processor));
+			       (u32)sizeof(struct acpi_object_processor));
 		acpi_os_printf("ThermalZone    %3d\n",
-			       sizeof(struct acpi_object_thermal_zone));
+			       (u32)sizeof(struct acpi_object_thermal_zone));
 		acpi_os_printf("RegionField    %3d\n",
-			       sizeof(struct acpi_object_region_field));
+			       (u32)sizeof(struct acpi_object_region_field));
 		acpi_os_printf("BankField      %3d\n",
-			       sizeof(struct acpi_object_bank_field));
+			       (u32)sizeof(struct acpi_object_bank_field));
 		acpi_os_printf("IndexField     %3d\n",
-			       sizeof(struct acpi_object_index_field));
+			       (u32)sizeof(struct acpi_object_index_field));
 		acpi_os_printf("Reference      %3d\n",
-			       sizeof(struct acpi_object_reference));
+			       (u32)sizeof(struct acpi_object_reference));
 		acpi_os_printf("Notify         %3d\n",
-			       sizeof(struct acpi_object_notify_handler));
+			       (u32)sizeof(struct acpi_object_notify_handler));
 		acpi_os_printf("AddressSpace   %3d\n",
-			       sizeof(struct acpi_object_addr_handler));
+			       (u32)sizeof(struct acpi_object_addr_handler));
 		acpi_os_printf("Extra          %3d\n",
-			       sizeof(struct acpi_object_extra));
+			       (u32)sizeof(struct acpi_object_extra));
 		acpi_os_printf("Data           %3d\n",
-			       sizeof(struct acpi_object_data));
+			       (u32)sizeof(struct acpi_object_data));
 
 		acpi_os_printf("\n");
 
 		acpi_os_printf("ParseObject    %3d\n",
-			       sizeof(struct acpi_parse_obj_common));
+			       (u32)sizeof(struct acpi_parse_obj_common));
 		acpi_os_printf("ParseObjectNamed %3d\n",
-			       sizeof(struct acpi_parse_obj_named));
+			       (u32)sizeof(struct acpi_parse_obj_named));
 		acpi_os_printf("ParseObjectAsl %3d\n",
-			       sizeof(struct acpi_parse_obj_asl));
+			       (u32)sizeof(struct acpi_parse_obj_asl));
 		acpi_os_printf("OperandObject  %3d\n",
-			       sizeof(union acpi_operand_object));
+			       (u32)sizeof(union acpi_operand_object));
 		acpi_os_printf("NamespaceNode  %3d\n",
-			       sizeof(struct acpi_namespace_node));
+			       (u32)sizeof(struct acpi_namespace_node));
 		acpi_os_printf("AcpiObject     %3d\n",
-			       sizeof(union acpi_object));
+			       (u32)sizeof(union acpi_object));
 
 		acpi_os_printf("\n");
 
 		acpi_os_printf("Generic State  %3d\n",
-			       sizeof(union acpi_generic_state));
+			       (u32)sizeof(union acpi_generic_state));
 		acpi_os_printf("Common State   %3d\n",
-			       sizeof(struct acpi_common_state));
+			       (u32)sizeof(struct acpi_common_state));
 		acpi_os_printf("Control State  %3d\n",
-			       sizeof(struct acpi_control_state));
+			       (u32)sizeof(struct acpi_control_state));
 		acpi_os_printf("Update State   %3d\n",
-			       sizeof(struct acpi_update_state));
+			       (u32)sizeof(struct acpi_update_state));
 		acpi_os_printf("Scope State    %3d\n",
-			       sizeof(struct acpi_scope_state));
+			       (u32)sizeof(struct acpi_scope_state));
 		acpi_os_printf("Parse Scope    %3d\n",
-			       sizeof(struct acpi_pscope_state));
+			       (u32)sizeof(struct acpi_pscope_state));
 		acpi_os_printf("Package State  %3d\n",
-			       sizeof(struct acpi_pkg_state));
+			       (u32)sizeof(struct acpi_pkg_state));
 		acpi_os_printf("Thread State   %3d\n",
-			       sizeof(struct acpi_thread_state));
+			       (u32)sizeof(struct acpi_thread_state));
 		acpi_os_printf("Result Values  %3d\n",
-			       sizeof(struct acpi_result_values));
+			       (u32)sizeof(struct acpi_result_values));
 		acpi_os_printf("Notify Info    %3d\n",
-			       sizeof(struct acpi_notify_info));
+			       (u32)sizeof(struct acpi_notify_info));
 		break;
 
 	case CMD_STAT_STACK:
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 2566e2d4c780..3f045b5953b2 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -598,7 +598,7 @@ acpi_ns_lookup(union acpi_generic_state *scope_info,
 				if (flags & ACPI_NS_PREFIX_MUST_EXIST) {
 					acpi_os_printf(ACPI_MSG_BIOS_ERROR
 						       "Object does not exist: %4.4s\n",
-						       &simple_name);
+						       (char *)&simple_name);
 				}
 #endif
 				/* Name not found in ACPI namespace */
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 01b1b36c8a8e..5b169b5f0f1a 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -158,7 +158,7 @@ acpi_debug_print(u32 requested_debug_level,
 	 * Display the module name, current line number, thread ID (if requested),
 	 * current procedure nesting level, and the current procedure name
 	 */
-	acpi_os_printf("%9s-%04ld ", module_name, line_number);
+	acpi_os_printf("%9s-%04d ", module_name, line_number);
 
 #ifdef ACPI_APPLICATION
 	/*
@@ -177,7 +177,7 @@ acpi_debug_print(u32 requested_debug_level,
 		fill_count = 0;
 	}
 
-	acpi_os_printf("[%02ld] %*s",
+	acpi_os_printf("[%02d] %*s",
 		       acpi_gbl_nesting_level, acpi_gbl_nesting_level + 1, " ");
 	acpi_os_printf("%s%*s: ",
 		       acpi_ut_trim_function_name(function_name), fill_count,
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 1b59fb61f67d..2e63b7b390f5 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -330,6 +330,7 @@ acpi_status acpi_os_enter_sleep(u8 sleep_state, u32 rega_value, u32 regb_value);
  * Debug print routines
  */
 #ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_printf
+ACPI_PRINTF_LIKE(1)
 void ACPI_INTERNAL_VAR_XFACE acpi_os_printf(const char *format, ...);
 #endif
 
-- 
cgit v1.2.3


From be0381cf2d57c21d06ebf8d8f8a3d5b8bfb2e4ab Mon Sep 17 00:00:00 2001
From: Jung-uk Kim <jkim@free_BSD.org>
Date: Fri, 16 Aug 2019 14:43:26 -0700
Subject: ACPICA: Differentiate Windows 8.1 from Windows 8.

ACPICA commit 66db7b38f61e63f11e48a0ea993d92b12e0a17ca

Link: https://github.com/acpica/acpica/commit/66db7b38
Signed-off-by: Jung-uk Kim <jkim@free_BSD.org>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utosi.c |  2 +-
 include/acpi/actypes.h      | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index 688c61a90725..fc09d234fe62 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -65,7 +65,7 @@ static struct acpi_interface_info acpi_default_supported_interfaces[] = {
 	{"Windows 2006 SP2", NULL, 0, ACPI_OSI_WIN_VISTA_SP2},	/* Windows Vista SP2 - Added 09/2010 */
 	{"Windows 2009", NULL, 0, ACPI_OSI_WIN_7},	/* Windows 7 and Server 2008 R2 - Added 09/2009 */
 	{"Windows 2012", NULL, 0, ACPI_OSI_WIN_8},	/* Windows 8 and Server 2012 - Added 08/2012 */
-	{"Windows 2013", NULL, 0, ACPI_OSI_WIN_8},	/* Windows 8.1 and Server 2012 R2 - Added 01/2014 */
+	{"Windows 2013", NULL, 0, ACPI_OSI_WIN_8_1},	/* Windows 8.1 and Server 2012 R2 - Added 01/2014 */
 	{"Windows 2015", NULL, 0, ACPI_OSI_WIN_10},	/* Windows 10 - Added 03/2015 */
 	{"Windows 2016", NULL, 0, ACPI_OSI_WIN_10_RS1},	/* Windows 10 version 1607 - Added 12/2017 */
 	{"Windows 2017", NULL, 0, ACPI_OSI_WIN_10_RS2},	/* Windows 10 version 1703 - Added 12/2017 */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index f52c83eaedc4..10c10b9d18ae 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1265,12 +1265,13 @@ typedef enum {
 #define ACPI_OSI_WIN_VISTA_SP2          0x0A
 #define ACPI_OSI_WIN_7                  0x0B
 #define ACPI_OSI_WIN_8                  0x0C
-#define ACPI_OSI_WIN_10                 0x0D
-#define ACPI_OSI_WIN_10_RS1             0x0E
-#define ACPI_OSI_WIN_10_RS2             0x0F
-#define ACPI_OSI_WIN_10_RS3             0x10
-#define ACPI_OSI_WIN_10_RS4             0x11
-#define ACPI_OSI_WIN_10_RS5             0x12
+#define ACPI_OSI_WIN_8_1                0x0D
+#define ACPI_OSI_WIN_10                 0x0E
+#define ACPI_OSI_WIN_10_RS1             0x0F
+#define ACPI_OSI_WIN_10_RS2             0x10
+#define ACPI_OSI_WIN_10_RS3             0x11
+#define ACPI_OSI_WIN_10_RS4             0x12
+#define ACPI_OSI_WIN_10_RS5             0x13
 
 /* Definitions of getopt */
 
-- 
cgit v1.2.3


From 8696beed34d1c895f54279be19de82becf7a869b Mon Sep 17 00:00:00 2001
From: Jung-uk Kim <jkim@free_BSD.org>
Date: Fri, 16 Aug 2019 14:43:27 -0700
Subject: ACPICA: Add "Windows 2019" string to _OSI support.

ACPICA commit 32fffb242800b0202986e86d9b0e16f88a23de66

Link: https://github.com/acpica/acpica/commit/32fffb24
Signed-off-by: Jung-uk Kim <jkim@free_BSD.org>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/utosi.c | 1 +
 include/acpi/actypes.h      | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index fc09d234fe62..ad2b218039d0 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -72,6 +72,7 @@ static struct acpi_interface_info acpi_default_supported_interfaces[] = {
 	{"Windows 2017.2", NULL, 0, ACPI_OSI_WIN_10_RS3},	/* Windows 10 version 1709 - Added 02/2018 */
 	{"Windows 2018", NULL, 0, ACPI_OSI_WIN_10_RS4},	/* Windows 10 version 1803 - Added 11/2018 */
 	{"Windows 2018.2", NULL, 0, ACPI_OSI_WIN_10_RS5},	/* Windows 10 version 1809 - Added 11/2018 */
+	{"Windows 2019", NULL, 0, ACPI_OSI_WIN_10_19H1},	/* Windows 10 version 1903 - Added 08/2019 */
 
 	/* Feature Group Strings */
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 10c10b9d18ae..2f3f28c7cea3 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -1272,6 +1272,7 @@ typedef enum {
 #define ACPI_OSI_WIN_10_RS3             0x11
 #define ACPI_OSI_WIN_10_RS4             0x12
 #define ACPI_OSI_WIN_10_RS5             0x13
+#define ACPI_OSI_WIN_10_19H1            0x14
 
 /* Definitions of getopt */
 
-- 
cgit v1.2.3


From 71bb4d9a4085ec6e2a90ca62c6afcafb945b55aa Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 16 Aug 2019 14:43:28 -0700
Subject: ACPICA: Update version to 20190816.

ACPICA commit db29ce57ced39ec610667c4b946bc3bb38bc8efa

Version 20190816.

Link: https://github.com/acpica/acpica/commit/db29ce57
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 3845c8fcc94e..d66fc7cce6a7 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20190703
+#define ACPI_CA_VERSION                 0x20190816
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
-- 
cgit v1.2.3


From 90ae409f9eb3bcaf38688f9ec22375816053a08e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 20 Aug 2019 11:45:49 +0900
Subject: dma-direct: fix zone selection after an unaddressable CMA allocation

The new dma_alloc_contiguous hides if we allocate CMA or regular
pages, and thus fails to retry a ZONE_NORMAL allocation if the CMA
allocation succeeds but isn't addressable.  That means we either fail
outright or dip into a small zone that might not succeed either.

Thanks to Hillf Danton for debugging this issue.

Fixes: b1d2dc009dec ("dma-contiguous: add dma_{alloc,free}_contiguous() helpers")
Reported-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Tobias Klausmann <tobias.johannes.klausmann@mni.thm.de>
---
 drivers/iommu/dma-iommu.c      |  3 +++
 include/linux/dma-contiguous.h |  5 +----
 kernel/dma/contiguous.c        |  8 ++------
 kernel/dma/direct.c            | 10 +++++++++-
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d991d40f797f..f68a62c3c32b 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -965,10 +965,13 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
 {
 	bool coherent = dev_is_dma_coherent(dev);
 	size_t alloc_size = PAGE_ALIGN(size);
+	int node = dev_to_node(dev);
 	struct page *page = NULL;
 	void *cpu_addr;
 
 	page = dma_alloc_contiguous(dev, alloc_size, gfp);
+	if (!page)
+		page = alloc_pages_node(node, gfp, get_order(alloc_size));
 	if (!page)
 		return NULL;
 
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index c05d4e661489..03f8e98e3bcc 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -160,10 +160,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size,
 		gfp_t gfp)
 {
-	int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
-	size_t align = get_order(PAGE_ALIGN(size));
-
-	return alloc_pages_node(node, gfp, align);
+	return NULL;
 }
 
 static inline void dma_free_contiguous(struct device *dev, struct page *page,
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 2bd410f934b3..69cfb4345388 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -230,9 +230,7 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
  */
 struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
 {
-	int node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
-	size_t count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	size_t align = get_order(PAGE_ALIGN(size));
+	size_t count = size >> PAGE_SHIFT;
 	struct page *page = NULL;
 	struct cma *cma = NULL;
 
@@ -243,14 +241,12 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
 
 	/* CMA can be used only in the context which permits sleeping */
 	if (cma && gfpflags_allow_blocking(gfp)) {
+		size_t align = get_order(size);
 		size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
 
 		page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
 	}
 
-	/* Fallback allocation of normal pages */
-	if (!page)
-		page = alloc_pages_node(node, gfp, align);
 	return page;
 }
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 795c9b095d75..706113c6bebc 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -85,6 +85,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
+	size_t alloc_size = PAGE_ALIGN(size);
+	int node = dev_to_node(dev);
 	struct page *page = NULL;
 	u64 phys_mask;
 
@@ -95,8 +97,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
 	gfp &= ~__GFP_ZERO;
 	gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
 			&phys_mask);
+	page = dma_alloc_contiguous(dev, alloc_size, gfp);
+	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+		dma_free_contiguous(dev, page, alloc_size);
+		page = NULL;
+	}
 again:
-	page = dma_alloc_contiguous(dev, size, gfp);
+	if (!page)
+		page = alloc_pages_node(node, gfp, get_order(alloc_size));
 	if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
 		dma_free_contiguous(dev, page, size);
 		page = NULL;
-- 
cgit v1.2.3


From 0d105d0f25386ee5b74603db6d249ebed7590cbc Mon Sep 17 00:00:00 2001
From: Tri Vo <trong@android.com>
Date: Tue, 6 Aug 2019 18:48:44 -0700
Subject: PM / wakeup: Drop wakeup_source_init(), wakeup_source_prepare()

wakeup_source_init() has no users. Remove it.

As a result, wakeup_source_prepare() is only called from
wakeup_source_create(). Merge wakeup_source_prepare() into
wakeup_source_create() and remove it.

Change wakeup_source_create() behavior so that assigning NULL to wakeup
source's name throws an error.

Signed-off-by: Tri Vo <trong@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 33 +++++++++++++--------------------
 include/linux/pm_wakeup.h   | 11 -----------
 2 files changed, 13 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index ee31d4f8d856..3938892c8903 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -72,23 +72,6 @@ static struct wakeup_source deleted_ws = {
 	.lock =  __SPIN_LOCK_UNLOCKED(deleted_ws.lock),
 };
 
-/**
- * wakeup_source_prepare - Prepare a new wakeup source for initialization.
- * @ws: Wakeup source to prepare.
- * @name: Pointer to the name of the new wakeup source.
- *
- * Callers must ensure that the @name string won't be freed when @ws is still in
- * use.
- */
-void wakeup_source_prepare(struct wakeup_source *ws, const char *name)
-{
-	if (ws) {
-		memset(ws, 0, sizeof(*ws));
-		ws->name = name;
-	}
-}
-EXPORT_SYMBOL_GPL(wakeup_source_prepare);
-
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -96,13 +79,23 @@ EXPORT_SYMBOL_GPL(wakeup_source_prepare);
 struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
+	const char *ws_name;
 
-	ws = kmalloc(sizeof(*ws), GFP_KERNEL);
+	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
 	if (!ws)
-		return NULL;
+		goto err_ws;
+
+	ws_name = kstrdup_const(name, GFP_KERNEL);
+	if (!ws_name)
+		goto err_name;
+	ws->name = ws_name;
 
-	wakeup_source_prepare(ws, name ? kstrdup_const(name, GFP_KERNEL) : NULL);
 	return ws;
+
+err_name:
+	kfree(ws);
+err_ws:
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(wakeup_source_create);
 
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 91027602d137..c0cad2d8f800 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -81,7 +81,6 @@ static inline void device_set_wakeup_path(struct device *dev)
 }
 
 /* drivers/base/power/wakeup.c */
-extern void wakeup_source_prepare(struct wakeup_source *ws, const char *name);
 extern struct wakeup_source *wakeup_source_create(const char *name);
 extern void wakeup_source_destroy(struct wakeup_source *ws);
 extern void wakeup_source_add(struct wakeup_source *ws);
@@ -112,9 +111,6 @@ static inline bool device_can_wakeup(struct device *dev)
 	return dev->power.can_wakeup;
 }
 
-static inline void wakeup_source_prepare(struct wakeup_source *ws,
-					 const char *name) {}
-
 static inline struct wakeup_source *wakeup_source_create(const char *name)
 {
 	return NULL;
@@ -181,13 +177,6 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
 
 #endif /* !CONFIG_PM_SLEEP */
 
-static inline void wakeup_source_init(struct wakeup_source *ws,
-				      const char *name)
-{
-	wakeup_source_prepare(ws, name);
-	wakeup_source_add(ws);
-}
-
 static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 {
 	return pm_wakeup_ws_event(ws, msec, false);
-- 
cgit v1.2.3


From c8377adfa78103be5380200eb9dab764d7ca890e Mon Sep 17 00:00:00 2001
From: Tri Vo <trong@android.com>
Date: Tue, 6 Aug 2019 18:48:46 -0700
Subject: PM / wakeup: Show wakeup sources stats in sysfs

Add an ID and a device pointer to 'struct wakeup_source'. Use them to to
expose wakeup sources statistics in sysfs under
/sys/class/wakeup/wakeup<ID>/*.

Co-developed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Co-developed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Tri Vo <trong@android.com>
Tested-by: Kalesh Singh <kaleshsingh@google.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-class-wakeup |  76 ++++++++++
 drivers/acpi/device_pm.c                     |   3 +-
 drivers/base/power/Makefile                  |   2 +-
 drivers/base/power/power.h                   |   9 ++
 drivers/base/power/wakeup.c                  |  28 +++-
 drivers/base/power/wakeup_stats.c            | 203 +++++++++++++++++++++++++++
 fs/eventpoll.c                               |   4 +-
 include/linux/pm_wakeup.h                    |  10 +-
 kernel/power/autosleep.c                     |   2 +-
 kernel/power/wakelock.c                      |   2 +-
 kernel/time/alarmtimer.c                     |   2 +-
 11 files changed, 328 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-class-wakeup
 create mode 100644 drivers/base/power/wakeup_stats.c

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-wakeup b/Documentation/ABI/testing/sysfs-class-wakeup
new file mode 100644
index 000000000000..754aab8b6dcd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-wakeup
@@ -0,0 +1,76 @@
+What:		/sys/class/wakeup/
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		The /sys/class/wakeup/ directory contains pointers to all
+		wakeup sources in the kernel at that moment in time.
+
+What:		/sys/class/wakeup/.../name
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the name of the wakeup source.
+
+What:		/sys/class/wakeup/.../active_count
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the number of times the wakeup source was
+		activated.
+
+What:		/sys/class/wakeup/.../event_count
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the number of signaled wakeup events
+		associated with the wakeup source.
+
+What:		/sys/class/wakeup/.../wakeup_count
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the number of times the wakeup source might
+		abort suspend.
+
+What:		/sys/class/wakeup/.../expire_count
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the number of times the wakeup source's
+		timeout has expired.
+
+What:		/sys/class/wakeup/.../active_time_ms
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the amount of time the wakeup source has
+		been continuously active, in milliseconds.  If the wakeup
+		source is not active, this file contains '0'.
+
+What:		/sys/class/wakeup/.../total_time_ms
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the total amount of time this wakeup source
+		has been active, in milliseconds.
+
+What:		/sys/class/wakeup/.../max_time_ms
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the maximum amount of time this wakeup
+		source has been continuously active, in milliseconds.
+
+What:		/sys/class/wakeup/.../last_change_ms
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		This file contains the monotonic clock time when the wakeup
+		source was touched last time, in milliseconds.
+
+What:		/sys/class/wakeup/.../prevent_suspend_time_ms
+Date:		June 2019
+Contact:	Tri Vo <trong@android.com>
+Description:
+		The file contains the total amount of time this wakeup source
+		has been preventing autosleep, in milliseconds.
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index f616b16c1f0b..3f8958007a93 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -497,7 +497,8 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 		goto out;
 
 	mutex_lock(&acpi_pm_notifier_lock);
-	adev->wakeup.ws = wakeup_source_register(dev_name(&adev->dev));
+	adev->wakeup.ws = wakeup_source_register(&adev->dev,
+						 dev_name(&adev->dev));
 	adev->wakeup.context.dev = dev;
 	adev->wakeup.context.func = func;
 	adev->wakeup.flags.notifier_present = true;
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index e1bb691cf8f1..ec5bb190b9d0 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_PM)	+= sysfs.o generic_ops.o common.o qos.o runtime.o wakeirq.o
-obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o
+obj-$(CONFIG_PM_SLEEP)	+= main.o wakeup.o wakeup_stats.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS)	+=  domain.o domain_governor.o
 obj-$(CONFIG_HAVE_CLK)	+= clock_ops.o
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index ec33fbdb919b..57b1d1d88c8e 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -149,3 +149,12 @@ static inline void device_pm_init(struct device *dev)
 	device_pm_sleep_init(dev);
 	pm_runtime_init(dev);
 }
+
+#ifdef CONFIG_PM_SLEEP
+
+/* drivers/base/power/wakeup_stats.c */
+extern int wakeup_source_sysfs_add(struct device *parent,
+				   struct wakeup_source *ws);
+extern void wakeup_source_sysfs_remove(struct wakeup_source *ws);
+
+#endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 3938892c8903..973675f24314 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -72,6 +72,8 @@ static struct wakeup_source deleted_ws = {
 	.lock =  __SPIN_LOCK_UNLOCKED(deleted_ws.lock),
 };
 
+static DEFINE_IDA(wakeup_ida);
+
 /**
  * wakeup_source_create - Create a struct wakeup_source object.
  * @name: Name of the new wakeup source.
@@ -80,6 +82,7 @@ struct wakeup_source *wakeup_source_create(const char *name)
 {
 	struct wakeup_source *ws;
 	const char *ws_name;
+	int id;
 
 	ws = kzalloc(sizeof(*ws), GFP_KERNEL);
 	if (!ws)
@@ -90,8 +93,15 @@ struct wakeup_source *wakeup_source_create(const char *name)
 		goto err_name;
 	ws->name = ws_name;
 
+	id = ida_alloc(&wakeup_ida, GFP_KERNEL);
+	if (id < 0)
+		goto err_id;
+	ws->id = id;
+
 	return ws;
 
+err_id:
+	kfree_const(ws->name);
 err_name:
 	kfree(ws);
 err_ws:
@@ -140,6 +150,7 @@ void wakeup_source_destroy(struct wakeup_source *ws)
 
 	__pm_relax(ws);
 	wakeup_source_record(ws);
+	ida_free(&wakeup_ida, ws->id);
 	kfree_const(ws->name);
 	kfree(ws);
 }
@@ -193,16 +204,24 @@ EXPORT_SYMBOL_GPL(wakeup_source_remove);
 
 /**
  * wakeup_source_register - Create wakeup source and add it to the list.
+ * @dev: Device this wakeup source is associated with (or NULL if virtual).
  * @name: Name of the wakeup source to register.
  */
-struct wakeup_source *wakeup_source_register(const char *name)
+struct wakeup_source *wakeup_source_register(struct device *dev,
+					     const char *name)
 {
 	struct wakeup_source *ws;
+	int ret;
 
 	ws = wakeup_source_create(name);
-	if (ws)
+	if (ws) {
+		ret = wakeup_source_sysfs_add(dev, ws);
+		if (ret) {
+			wakeup_source_destroy(ws);
+			return NULL;
+		}
 		wakeup_source_add(ws);
-
+	}
 	return ws;
 }
 EXPORT_SYMBOL_GPL(wakeup_source_register);
@@ -215,6 +234,7 @@ void wakeup_source_unregister(struct wakeup_source *ws)
 {
 	if (ws) {
 		wakeup_source_remove(ws);
+		wakeup_source_sysfs_remove(ws);
 		wakeup_source_destroy(ws);
 	}
 }
@@ -258,7 +278,7 @@ int device_wakeup_enable(struct device *dev)
 	if (pm_suspend_target_state != PM_SUSPEND_ON)
 		dev_dbg(dev, "Suspicious %s() during system transition!\n", __func__);
 
-	ws = wakeup_source_register(dev_name(dev));
+	ws = wakeup_source_register(dev, dev_name(dev));
 	if (!ws)
 		return -ENOMEM;
 
diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c
new file mode 100644
index 000000000000..220a20ff8918
--- /dev/null
+++ b/drivers/base/power/wakeup_stats.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wakeup statistics in sysfs
+ *
+ * Copyright (c) 2019 Linux Foundation
+ * Copyright (c) 2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (c) 2019 Google Inc.
+ */
+
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/timekeeping.h>
+
+#include "power.h"
+
+static struct class *wakeup_class;
+
+#define wakeup_attr(_name)						\
+static ssize_t _name##_show(struct device *dev,				\
+			    struct device_attribute *attr, char *buf)	\
+{									\
+	struct wakeup_source *ws = dev_get_drvdata(dev);		\
+									\
+	return sprintf(buf, "%lu\n", ws->_name);			\
+}									\
+static DEVICE_ATTR_RO(_name)
+
+wakeup_attr(active_count);
+wakeup_attr(event_count);
+wakeup_attr(wakeup_count);
+wakeup_attr(expire_count);
+
+static ssize_t active_time_ms_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+	ktime_t active_time =
+		ws->active ? ktime_sub(ktime_get(), ws->last_time) : 0;
+
+	return sprintf(buf, "%lld\n", ktime_to_ms(active_time));
+}
+static DEVICE_ATTR_RO(active_time_ms);
+
+static ssize_t total_time_ms_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+	ktime_t active_time;
+	ktime_t total_time = ws->total_time;
+
+	if (ws->active) {
+		active_time = ktime_sub(ktime_get(), ws->last_time);
+		total_time = ktime_add(total_time, active_time);
+	}
+	return sprintf(buf, "%lld\n", ktime_to_ms(total_time));
+}
+static DEVICE_ATTR_RO(total_time_ms);
+
+static ssize_t max_time_ms_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+	ktime_t active_time;
+	ktime_t max_time = ws->max_time;
+
+	if (ws->active) {
+		active_time = ktime_sub(ktime_get(), ws->last_time);
+		if (active_time > max_time)
+			max_time = active_time;
+	}
+	return sprintf(buf, "%lld\n", ktime_to_ms(max_time));
+}
+static DEVICE_ATTR_RO(max_time_ms);
+
+static ssize_t last_change_ms_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%lld\n", ktime_to_ms(ws->last_time));
+}
+static DEVICE_ATTR_RO(last_change_ms);
+
+static ssize_t name_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%s\n", ws->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t prevent_suspend_time_ms_show(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	struct wakeup_source *ws = dev_get_drvdata(dev);
+	ktime_t prevent_sleep_time = ws->prevent_sleep_time;
+
+	if (ws->active && ws->autosleep_enabled) {
+		prevent_sleep_time = ktime_add(prevent_sleep_time,
+			ktime_sub(ktime_get(), ws->start_prevent_time));
+	}
+	return sprintf(buf, "%lld\n", ktime_to_ms(prevent_sleep_time));
+}
+static DEVICE_ATTR_RO(prevent_suspend_time_ms);
+
+static struct attribute *wakeup_source_attrs[] = {
+	&dev_attr_name.attr,
+	&dev_attr_active_count.attr,
+	&dev_attr_event_count.attr,
+	&dev_attr_wakeup_count.attr,
+	&dev_attr_expire_count.attr,
+	&dev_attr_active_time_ms.attr,
+	&dev_attr_total_time_ms.attr,
+	&dev_attr_max_time_ms.attr,
+	&dev_attr_last_change_ms.attr,
+	&dev_attr_prevent_suspend_time_ms.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(wakeup_source);
+
+static void device_create_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct device *wakeup_source_device_create(struct device *parent,
+						  struct wakeup_source *ws)
+{
+	struct device *dev = NULL;
+	int retval = -ENODEV;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	device_initialize(dev);
+	dev->devt = MKDEV(0, 0);
+	dev->class = wakeup_class;
+	dev->parent = parent;
+	dev->groups = wakeup_source_groups;
+	dev->release = device_create_release;
+	dev_set_drvdata(dev, ws);
+	device_set_pm_not_required(dev);
+
+	retval = kobject_set_name(&dev->kobj, "wakeup%d", ws->id);
+	if (retval)
+		goto error;
+
+	retval = device_add(dev);
+	if (retval)
+		goto error;
+
+	return dev;
+
+error:
+	put_device(dev);
+	return ERR_PTR(retval);
+}
+
+/**
+ * wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs.
+ * @parent: Device given wakeup source is associated with (or NULL if virtual).
+ * @ws: Wakeup source to be added in sysfs.
+ */
+int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws)
+{
+	struct device *dev;
+
+	dev = wakeup_source_device_create(parent, ws);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+	ws->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wakeup_source_sysfs_add);
+
+/**
+ * wakeup_source_sysfs_remove - Remove wakeup_source attributes from sysfs.
+ * @ws: Wakeup source to be removed from sysfs.
+ */
+void wakeup_source_sysfs_remove(struct wakeup_source *ws)
+{
+	device_unregister(ws->dev);
+}
+EXPORT_SYMBOL_GPL(wakeup_source_sysfs_remove);
+
+static int __init wakeup_sources_sysfs_init(void)
+{
+	wakeup_class = class_create(THIS_MODULE, "wakeup");
+
+	return PTR_ERR_OR_ZERO(wakeup_class);
+}
+postcore_initcall(wakeup_sources_sysfs_init);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index d7f1f5011fac..c4159bcc05d9 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1459,13 +1459,13 @@ static int ep_create_wakeup_source(struct epitem *epi)
 	struct wakeup_source *ws;
 
 	if (!epi->ep->ws) {
-		epi->ep->ws = wakeup_source_register("eventpoll");
+		epi->ep->ws = wakeup_source_register(NULL, "eventpoll");
 		if (!epi->ep->ws)
 			return -ENOMEM;
 	}
 
 	name = epi->ffd.file->f_path.dentry->d_name.name;
-	ws = wakeup_source_register(name);
+	ws = wakeup_source_register(NULL, name);
 
 	if (!ws)
 		return -ENOMEM;
diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index c0cad2d8f800..661efa029c96 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -21,6 +21,7 @@ struct wake_irq;
  * struct wakeup_source - Representation of wakeup sources
  *
  * @name: Name of the wakeup source
+ * @id: Wakeup source id
  * @entry: Wakeup source list entry
  * @lock: Wakeup source lock
  * @wakeirq: Optional device specific wakeirq
@@ -35,11 +36,13 @@ struct wake_irq;
  * @relax_count: Number of times the wakeup source was deactivated.
  * @expire_count: Number of times the wakeup source's timeout has expired.
  * @wakeup_count: Number of times the wakeup source might abort suspend.
+ * @dev: Struct device for sysfs statistics about the wakeup source.
  * @active: Status of the wakeup source.
  * @autosleep_enabled: Autosleep is active, so update @prevent_sleep_time.
  */
 struct wakeup_source {
 	const char 		*name;
+	int			id;
 	struct list_head	entry;
 	spinlock_t		lock;
 	struct wake_irq		*wakeirq;
@@ -55,6 +58,7 @@ struct wakeup_source {
 	unsigned long		relax_count;
 	unsigned long		expire_count;
 	unsigned long		wakeup_count;
+	struct device		*dev;
 	bool			active:1;
 	bool			autosleep_enabled:1;
 };
@@ -85,7 +89,8 @@ extern struct wakeup_source *wakeup_source_create(const char *name);
 extern void wakeup_source_destroy(struct wakeup_source *ws);
 extern void wakeup_source_add(struct wakeup_source *ws);
 extern void wakeup_source_remove(struct wakeup_source *ws);
-extern struct wakeup_source *wakeup_source_register(const char *name);
+extern struct wakeup_source *wakeup_source_register(struct device *dev,
+						    const char *name);
 extern void wakeup_source_unregister(struct wakeup_source *ws);
 extern int device_wakeup_enable(struct device *dev);
 extern int device_wakeup_disable(struct device *dev);
@@ -122,7 +127,8 @@ static inline void wakeup_source_add(struct wakeup_source *ws) {}
 
 static inline void wakeup_source_remove(struct wakeup_source *ws) {}
 
-static inline struct wakeup_source *wakeup_source_register(const char *name)
+static inline struct wakeup_source *wakeup_source_register(struct device *dev,
+							   const char *name)
 {
 	return NULL;
 }
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c
index 41e83a779e19..9af5a50d3489 100644
--- a/kernel/power/autosleep.c
+++ b/kernel/power/autosleep.c
@@ -116,7 +116,7 @@ int pm_autosleep_set_state(suspend_state_t state)
 
 int __init pm_autosleep_init(void)
 {
-	autosleep_ws = wakeup_source_register("autosleep");
+	autosleep_ws = wakeup_source_register(NULL, "autosleep");
 	if (!autosleep_ws)
 		return -ENOMEM;
 
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index d1eb7fd98b64..105df4dfc783 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -188,7 +188,7 @@ static struct wakelock *wakelock_lookup_add(const char *name, size_t len,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	wl->ws = wakeup_source_register(wl->name);
+	wl->ws = wakeup_source_register(NULL, wl->name);
 	if (!wl->ws) {
 		kfree(wl->name);
 		kfree(wl);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 57518efc3810..93b382d9701c 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -97,7 +97,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
 	if (!device_may_wakeup(rtc->dev.parent))
 		return -1;
 
-	__ws = wakeup_source_register("alarmtimer");
+	__ws = wakeup_source_register(dev, "alarmtimer");
 
 	spin_lock_irqsave(&rtcdev_lock, flags);
 	if (!rtcdev) {
-- 
cgit v1.2.3


From c3082a674f46fe49383b157882c41dfabaa37113 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@linaro.org>
Date: Thu, 11 Jul 2019 19:51:25 +0530
Subject: PM: QoS: Get rid of unused flags

The network_latency and network_throughput flags for PM-QoS have not
found much use in drivers or in userspace since they were introduced.

Commit 4a733ef1bea7 ("mac80211: remove PM-QoS listener") removed the
only user PM_QOS_NETWORK_LATENCY in the kernel a while ago and there
don't seem to be any userspace tools using the character device files
either.

PM_QOS_MEMORY_BANDWIDTH was never even added to the trace events.

Remove all the flags except cpu_dma_latency.

Signed-off-by: Amit Kucheria <amit.kucheria@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.rst |  5 ++--
 include/linux/pm_qos.h                   |  6 ----
 include/trace/events/power.h             |  8 ++----
 kernel/power/qos.c                       | 48 --------------------------------
 4 files changed, 4 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst
index 69921f072ce1..3097694fba69 100644
--- a/Documentation/power/pm_qos_interface.rst
+++ b/Documentation/power/pm_qos_interface.rst
@@ -7,8 +7,7 @@ performance expectations by drivers, subsystems and user space applications on
 one of the parameters.
 
 Two different PM QoS frameworks are available:
-1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
-memory_bandwidth.
+1. PM QoS classes for cpu_dma_latency
 2. the per-device PM QoS framework provides the API to manage the per-device latency
 constraints and PM QoS flags.
 
@@ -79,7 +78,7 @@ cleanup of a process, the interface requires the process to register its
 parameter requests in the following way:
 
 To register the default pm_qos target for the specific parameter, the process
-must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
+must open /dev/cpu_dma_latency
 
 As long as the device node is held open that process has a registered
 request on the parameter.
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 2aebbc5b9950..222c3e01397c 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -13,9 +13,6 @@
 enum {
 	PM_QOS_RESERVED = 0,
 	PM_QOS_CPU_DMA_LATENCY,
-	PM_QOS_NETWORK_LATENCY,
-	PM_QOS_NETWORK_THROUGHPUT,
-	PM_QOS_MEMORY_BANDWIDTH,
 
 	/* insert new class ID */
 	PM_QOS_NUM_CLASSES,
@@ -33,9 +30,6 @@ enum pm_qos_flags_status {
 #define PM_QOS_LATENCY_ANY_NS	((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC)
 
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
-#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
-#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
-#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE	0
 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE	PM_QOS_LATENCY_ANY
 #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT	PM_QOS_LATENCY_ANY
 #define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS	PM_QOS_LATENCY_ANY_NS
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index f7aece721aed..7457e238e1b7 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -379,9 +379,7 @@ DECLARE_EVENT_CLASS(pm_qos_request,
 
 	TP_printk("pm_qos_class=%s value=%d",
 		  __print_symbolic(__entry->pm_qos_class,
-			{ PM_QOS_CPU_DMA_LATENCY,	"CPU_DMA_LATENCY" },
-			{ PM_QOS_NETWORK_LATENCY,	"NETWORK_LATENCY" },
-			{ PM_QOS_NETWORK_THROUGHPUT,	"NETWORK_THROUGHPUT" }),
+			{ PM_QOS_CPU_DMA_LATENCY,	"CPU_DMA_LATENCY" }),
 		  __entry->value)
 );
 
@@ -426,9 +424,7 @@ TRACE_EVENT(pm_qos_update_request_timeout,
 
 	TP_printk("pm_qos_class=%s value=%d, timeout_us=%ld",
 		  __print_symbolic(__entry->pm_qos_class,
-			{ PM_QOS_CPU_DMA_LATENCY,	"CPU_DMA_LATENCY" },
-			{ PM_QOS_NETWORK_LATENCY,	"NETWORK_LATENCY" },
-			{ PM_QOS_NETWORK_THROUGHPUT,	"NETWORK_THROUGHPUT" }),
+			{ PM_QOS_CPU_DMA_LATENCY,	"CPU_DMA_LATENCY" }),
 		  __entry->value, __entry->timeout_us)
 );
 
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 33e3febaba53..9568a2fe7c11 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -78,57 +78,9 @@ static struct pm_qos_object cpu_dma_pm_qos = {
 	.name = "cpu_dma_latency",
 };
 
-static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
-static struct pm_qos_constraints network_lat_constraints = {
-	.list = PLIST_HEAD_INIT(network_lat_constraints.list),
-	.target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
-	.default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
-	.no_constraint_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
-	.type = PM_QOS_MIN,
-	.notifiers = &network_lat_notifier,
-};
-static struct pm_qos_object network_lat_pm_qos = {
-	.constraints = &network_lat_constraints,
-	.name = "network_latency",
-};
-
-
-static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
-static struct pm_qos_constraints network_tput_constraints = {
-	.list = PLIST_HEAD_INIT(network_tput_constraints.list),
-	.target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
-	.default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
-	.no_constraint_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
-	.type = PM_QOS_MAX,
-	.notifiers = &network_throughput_notifier,
-};
-static struct pm_qos_object network_throughput_pm_qos = {
-	.constraints = &network_tput_constraints,
-	.name = "network_throughput",
-};
-
-
-static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
-static struct pm_qos_constraints memory_bw_constraints = {
-	.list = PLIST_HEAD_INIT(memory_bw_constraints.list),
-	.target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
-	.default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
-	.no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
-	.type = PM_QOS_SUM,
-	.notifiers = &memory_bandwidth_notifier,
-};
-static struct pm_qos_object memory_bandwidth_pm_qos = {
-	.constraints = &memory_bw_constraints,
-	.name = "memory_bandwidth",
-};
-
-
 static struct pm_qos_object *pm_qos_array[] = {
 	&null_pm_qos,
 	&cpu_dma_pm_qos,
-	&network_lat_pm_qos,
-	&network_throughput_pm_qos,
-	&memory_bandwidth_pm_qos,
 };
 
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
-- 
cgit v1.2.3


From c5b9a7f826735228a38fab4a7b2707f032468c88 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend.vanspriel@broadcom.com>
Date: Fri, 2 Aug 2019 13:30:58 +0200
Subject: nl80211: add 6GHz band definition to enum nl80211_band

In the 802.11ax specification a new band is introduced, which
is also proposed by FCC for unlicensed use. This band is referred
to as 6GHz spanning frequency range from 5925 to 7125 MHz.

Reviewed-by: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Reviewed-by: Leon Zegers <leon.zegers@broadcom.com>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Link: https://lore.kernel.org/r/1564745465-21234-2-git-send-email-arend.vanspriel@broadcom.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 ++
 net/mac80211/tx.c            | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 822851d369ab..4d5988f47118 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4543,6 +4543,7 @@ enum nl80211_txrate_gi {
  * @NL80211_BAND_2GHZ: 2.4 GHz ISM band
  * @NL80211_BAND_5GHZ: around 5 GHz band (4.9 - 5.7 GHz)
  * @NL80211_BAND_60GHZ: around 60 GHz band (58.32 - 69.12 GHz)
+ * @NL80211_BAND_6GHZ: around 6 GHz band (5.9 - 7.2 GHz)
  * @NUM_NL80211_BANDS: number of bands, avoid using this in userspace
  *	since newer kernel versions may support more bands
  */
@@ -4550,6 +4551,7 @@ enum nl80211_band {
 	NL80211_BAND_2GHZ,
 	NL80211_BAND_5GHZ,
 	NL80211_BAND_60GHZ,
+	NL80211_BAND_6GHZ,
 
 	NUM_NL80211_BANDS,
 };
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 235c6377a203..1fa422782905 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -162,6 +162,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,
 			break;
 		}
 		case NL80211_BAND_5GHZ:
+		case NL80211_BAND_6GHZ:
 			if (r->flags & IEEE80211_RATE_MANDATORY_A)
 				mrate = r->bitrate;
 			break;
-- 
cgit v1.2.3


From 6c7a00339e2a64b068c986301f37bd31eb83d7e9 Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 9 Aug 2019 11:00:00 -0700
Subject: cfg80211: Support assoc-at timer in sta-info

Report timestamp of when sta became associated.

This is the boottime clock, units are nano-seconds.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Link: https://lore.kernel.org/r/20190809180001.26393-1-greearb@candelatech.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 2 ++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8140c4837122..1e32b11e1730 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1330,6 +1330,7 @@ struct cfg80211_tid_stats {
  *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
+ * @assoc_at: bootime (ns) of the last association
  * @rx_bytes: bytes (size of MPDUs) received from this station
  * @tx_bytes: bytes (size of MPDUs) transmitted to this station
  * @llid: mesh local link id
@@ -1390,6 +1391,7 @@ struct station_info {
 	u64 filled;
 	u32 connected_time;
 	u32 inactive_time;
+	u64 assoc_at;
 	u64 rx_bytes;
 	u64 tx_bytes;
 	u16 llid;
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 4d5988f47118..04b58295c8d9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3201,6 +3201,8 @@ enum nl80211_sta_bss_param {
  *	sent to the station (u64, usec)
  * @NL80211_STA_INFO_AIRTIME_WEIGHT: current airtime weight for station (u16)
  * @NL80211_STA_INFO_AIRTIME_LINK_METRIC: airtime link metric for mesh station
+ * @NL80211_STA_INFO_ASSOC_AT_BOOTTIME: Timestamp (CLOCK_BOOTTIME, nanoseconds)
+ *	of STA's association
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -3247,6 +3249,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_TX_DURATION,
 	NL80211_STA_INFO_AIRTIME_WEIGHT,
 	NL80211_STA_INFO_AIRTIME_LINK_METRIC,
+	NL80211_STA_INFO_ASSOC_AT_BOOTTIME,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9a642219a8c7..cacd96704647 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5032,6 +5032,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
 	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+	PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at);
 
 	if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) |
 			     BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) &&
-- 
cgit v1.2.3


From 05c8afe42559c98f89022431e457bfbdaf01c806 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 5 Aug 2019 19:55:06 -0500
Subject: soundwire: intel: prevent possible dereference in hw_params

This should not happen in production systems but we should test for
all callback arguments before invoking the config_stream callback.

Update the prototype to clarify that the first argument is mandatory.

Also use local variable instead of multiple dereferences to improve
readability.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190806005522.22642-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 6 ++++--
 include/linux/soundwire/sdw_intel.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index c82ca4e13de7..97e3205d95a2 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -397,8 +397,10 @@ static int intel_config_stream(struct sdw_intel *sdw,
 			       struct snd_soc_dai *dai,
 			       struct snd_pcm_hw_params *hw_params, int link_id)
 {
-	if (sdw->res->ops && sdw->res->ops->config_stream)
-		return sdw->res->ops->config_stream(sdw->res->arg,
+	struct sdw_intel_link_res *res = sdw->res;
+
+	if (res->ops && res->ops->config_stream && res->arg)
+		return res->ops->config_stream(res->arg,
 				substream, dai, hw_params, link_id);
 
 	return -EIO;
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 4d70da45363d..c9427cb6020b 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -8,6 +8,7 @@
  * struct sdw_intel_ops: Intel audio driver callback ops
  *
  * @config_stream: configure the stream with the hw_params
+ * the first argument containing the context is mandatory
  */
 struct sdw_intel_ops {
 	int (*config_stream)(void *arg, void *substream,
-- 
cgit v1.2.3


From ce3304d8da8fa8e20001ed6128c7d04f703be305 Mon Sep 17 00:00:00 2001
From: Bard liao <yung-chuan.liao@linux.intel.com>
Date: Mon, 5 Aug 2019 19:55:12 -0500
Subject: soundwire: include mod_devicetable.h to avoid compiling warnings

When integrating SoundWire, kbuild throws this warning with randconfig:

>> include/linux/soundwire/sdw.h:571:17: warning: 'struct
   sdw_device_id' declared inside parameter list will not be visible
   outside of this definition or declaration

       const struct sdw_device_id *id);
                    ^~~~~~~~~~~~~

Fix by adding the relevant include

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Bard liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190806005522.22642-8-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index bea46bd8b6ce..28b5ab0d868c 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -4,6 +4,8 @@
 #ifndef __SOUNDWIRE_H
 #define __SOUNDWIRE_H
 
+#include <linux/mod_devicetable.h>
+
 struct sdw_bus;
 struct sdw_slave;
 
-- 
cgit v1.2.3


From 82fc8d06501a0694bfb1ada8ba69372cbc297fb9 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Mon, 5 Aug 2019 19:55:15 -0500
Subject: soundwire: add new mclk_freq field for properties

To help pass platform-specific values, add a new field that can either
be set by the Master driver or read from firmware (BIOS/DT).

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190806005522.22642-11-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/soundwire/sdw.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 28b5ab0d868c..131d49ef1cb4 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -379,6 +379,7 @@ struct sdw_slave_prop {
  * @dynamic_frame: Dynamic frame shape supported
  * @err_threshold: Number of times that software may retry sending a single
  * command
+ * @mclk_freq: clock reference passed to SoundWire Master, in Hz.
  */
 struct sdw_master_prop {
 	u32 revision;
@@ -393,6 +394,7 @@ struct sdw_master_prop {
 	u32 default_col;
 	bool dynamic_frame;
 	u32 err_threshold;
+	u32 mclk_freq;
 };
 
 int sdw_master_read_prop(struct sdw_bus *bus);
-- 
cgit v1.2.3


From 2a38075cd0beefa4da326380cf54c7b365ddc035 Mon Sep 17 00:00:00 2001
From: Alexei Avshalom Lazar <ailizaro@codeaurora.org>
Date: Sun, 18 Aug 2019 17:35:17 +0300
Subject: nl80211: Add support for EDMG channels

802.11ay specification defines Enhanced Directional Multi-Gigabit
(EDMG) STA and AP which allow channel bonding of 2 channels and more.

Introduce new NL attributes that are needed for enabling and
configuring EDMG support.

Two new attributes are used by kernel to publish driver's EDMG
capabilities to the userspace:
NL80211_BAND_ATTR_EDMG_CHANNELS - bitmap field that indicates the 2.16
GHz channel(s) that are supported by the driver.
When this attribute is not set it means driver does not support EDMG.
NL80211_BAND_ATTR_EDMG_BW_CONFIG - represent the channel bandwidth
configurations supported by the driver.

Additional two new attributes are used by the userspace for connect
command and for AP configuration:
NL80211_ATTR_WIPHY_EDMG_CHANNELS
NL80211_ATTR_WIPHY_EDMG_BW_CONFIG

New rate info flag - RATE_INFO_FLAGS_EDMG, can be reported from driver
and used for bitrate calculation that will take into account EDMG
according to the 802.11ay specification.

Signed-off-by: Alexei Avshalom Lazar <ailizaro@codeaurora.org>
Link: https://lore.kernel.org/r/1566138918-3823-2-git-send-email-ailizaro@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/wil6210/cfg80211.c |   2 +-
 include/net/cfg80211.h                      |  86 ++++++++++++++-
 include/uapi/linux/nl80211.h                |  24 +++++
 net/mac80211/mlme.c                         |   2 +-
 net/mac80211/status.c                       |   6 +-
 net/wireless/chan.c                         | 159 ++++++++++++++++++++++++++++
 net/wireless/nl80211.c                      |  37 +++++++
 net/wireless/util.c                         |  42 +++++++-
 8 files changed, 349 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 2fb4258941a5..2414f574bf69 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -351,7 +351,7 @@ int wil_cid_fill_sinfo(struct wil6210_vif *vif, int cid,
 			BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC) |
 			BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 
-	sinfo->txrate.flags = RATE_INFO_FLAGS_60G;
+	sinfo->txrate.flags = RATE_INFO_FLAGS_DMG;
 	sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs);
 	sinfo->rxrate.mcs = stats->last_mcs_rx;
 	sinfo->rx_bytes = stats->rx_bytes;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1e32b11e1730..5253e7f667bd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -330,6 +330,60 @@ struct ieee80211_sband_iftype_data {
 	struct ieee80211_sta_he_cap he_cap;
 };
 
+/**
+ * enum ieee80211_edmg_bw_config - allowed channel bandwidth configurations
+ *
+ * @IEEE80211_EDMG_BW_CONFIG_4: 2.16GHz
+ * @IEEE80211_EDMG_BW_CONFIG_5: 2.16GHz and 4.32GHz
+ * @IEEE80211_EDMG_BW_CONFIG_6: 2.16GHz, 4.32GHz and 6.48GHz
+ * @IEEE80211_EDMG_BW_CONFIG_7: 2.16GHz, 4.32GHz, 6.48GHz and 8.64GHz
+ * @IEEE80211_EDMG_BW_CONFIG_8: 2.16GHz and 2.16GHz + 2.16GHz
+ * @IEEE80211_EDMG_BW_CONFIG_9: 2.16GHz, 4.32GHz and 2.16GHz + 2.16GHz
+ * @IEEE80211_EDMG_BW_CONFIG_10: 2.16GHz, 4.32GHz, 6.48GHz and 2.16GHz+2.16GHz
+ * @IEEE80211_EDMG_BW_CONFIG_11: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz and
+ *	2.16GHz+2.16GHz
+ * @IEEE80211_EDMG_BW_CONFIG_12: 2.16GHz, 2.16GHz + 2.16GHz and
+ *	4.32GHz + 4.32GHz
+ * @IEEE80211_EDMG_BW_CONFIG_13: 2.16GHz, 4.32GHz, 2.16GHz + 2.16GHz and
+ *	4.32GHz + 4.32GHz
+ * @IEEE80211_EDMG_BW_CONFIG_14: 2.16GHz, 4.32GHz, 6.48GHz, 2.16GHz + 2.16GHz
+ *	and 4.32GHz + 4.32GHz
+ * @IEEE80211_EDMG_BW_CONFIG_15: 2.16GHz, 4.32GHz, 6.48GHz, 8.64GHz,
+ *	2.16GHz + 2.16GHz and 4.32GHz + 4.32GHz
+ */
+enum ieee80211_edmg_bw_config {
+	IEEE80211_EDMG_BW_CONFIG_4	= 4,
+	IEEE80211_EDMG_BW_CONFIG_5	= 5,
+	IEEE80211_EDMG_BW_CONFIG_6	= 6,
+	IEEE80211_EDMG_BW_CONFIG_7	= 7,
+	IEEE80211_EDMG_BW_CONFIG_8	= 8,
+	IEEE80211_EDMG_BW_CONFIG_9	= 9,
+	IEEE80211_EDMG_BW_CONFIG_10	= 10,
+	IEEE80211_EDMG_BW_CONFIG_11	= 11,
+	IEEE80211_EDMG_BW_CONFIG_12	= 12,
+	IEEE80211_EDMG_BW_CONFIG_13	= 13,
+	IEEE80211_EDMG_BW_CONFIG_14	= 14,
+	IEEE80211_EDMG_BW_CONFIG_15	= 15,
+};
+
+/**
+ * struct ieee80211_edmg - EDMG configuration
+ *
+ * This structure describes most essential parameters needed
+ * to describe 802.11ay EDMG configuration
+ *
+ * @channels: bitmap that indicates the 2.16 GHz channel(s)
+ *	that are allowed to be used for transmissions.
+ *	Bit 0 indicates channel 1, bit 1 indicates channel 2, etc.
+ *	Set to 0 indicate EDMG not supported.
+ * @bw_config: Channel BW Configuration subfield encodes
+ *	the allowed channel bandwidth configurations
+ */
+struct ieee80211_edmg {
+	u8 channels;
+	enum ieee80211_edmg_bw_config bw_config;
+};
+
 /**
  * struct ieee80211_supported_band - frequency band definition
  *
@@ -346,6 +400,7 @@ struct ieee80211_sband_iftype_data {
  * @n_bitrates: Number of bitrates in @bitrates
  * @ht_cap: HT capabilities in this band
  * @vht_cap: VHT capabilities in this band
+ * @edmg_cap: EDMG capabilities in this band
  * @n_iftype_data: number of iftype data entries
  * @iftype_data: interface type data entries.  Note that the bits in
  *	@types_mask inside this structure cannot overlap (i.e. only
@@ -360,6 +415,7 @@ struct ieee80211_supported_band {
 	int n_bitrates;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
+	struct ieee80211_edmg edmg_cap;
 	u16 n_iftype_data;
 	const struct ieee80211_sband_iftype_data *iftype_data;
 };
@@ -527,12 +583,17 @@ struct key_params {
  * @center_freq1: center frequency of first segment
  * @center_freq2: center frequency of second segment
  *	(only with 80+80 MHz)
+ * @edmg: define the EDMG channels configuration.
+ *	If edmg is requested (i.e. the .channels member is non-zero),
+ *	chan will define the primary channel and all other
+ *	parameters are ignored.
  */
 struct cfg80211_chan_def {
 	struct ieee80211_channel *chan;
 	enum nl80211_chan_width width;
 	u32 center_freq1;
 	u32 center_freq2;
+	struct ieee80211_edmg edmg;
 };
 
 /**
@@ -590,6 +651,19 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
 		chandef1->center_freq2 == chandef2->center_freq2);
 }
 
+/**
+ * cfg80211_chandef_is_edmg - check if chandef represents an EDMG channel
+ *
+ * @chandef: the channel definition
+ *
+ * Return: %true if EDMG defined, %false otherwise.
+ */
+static inline bool
+cfg80211_chandef_is_edmg(const struct cfg80211_chan_def *chandef)
+{
+	return chandef->edmg.channels || chandef->edmg.bw_config;
+}
+
 /**
  * cfg80211_chandef_compatible - check if two channel definitions are compatible
  * @chandef1: first channel definition
@@ -1177,15 +1251,17 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
- * @RATE_INFO_FLAGS_60G: 60GHz MCS
+ * @RATE_INFO_FLAGS_DMG: 60GHz MCS
  * @RATE_INFO_FLAGS_HE_MCS: HE MCS information
+ * @RATE_INFO_FLAGS_EDMG: 60GHz MCS in EDMG mode
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
-	RATE_INFO_FLAGS_60G			= BIT(3),
+	RATE_INFO_FLAGS_DMG			= BIT(3),
 	RATE_INFO_FLAGS_HE_MCS			= BIT(4),
+	RATE_INFO_FLAGS_EDMG			= BIT(5),
 };
 
 /**
@@ -1225,6 +1301,7 @@ enum rate_info_bw {
  * @he_dcm: HE DCM value
  * @he_ru_alloc: HE RU allocation (from &enum nl80211_he_ru_alloc,
  *	only valid if bw is %RATE_INFO_BW_HE_RU)
+ * @n_bonded_ch: In case of EDMG the number of bonded channels (1-4)
  */
 struct rate_info {
 	u8 flags;
@@ -1235,6 +1312,7 @@ struct rate_info {
 	u8 he_gi;
 	u8 he_dcm;
 	u8 he_ru_alloc;
+	u8 n_bonded_ch;
 };
 
 /**
@@ -2438,6 +2516,9 @@ struct cfg80211_bss_selection {
  * @fils_erp_rrk_len: Length of @fils_erp_rrk in octets.
  * @want_1x: indicates user-space supports and wants to use 802.1X driver
  *	offload of 4-way handshake.
+ * @edmg: define the EDMG channels.
+ *	This may specify multiple channels and bonding options for the driver
+ *	to choose from, based on BSS configuration.
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -2471,6 +2552,7 @@ struct cfg80211_connect_params {
 	const u8 *fils_erp_rrk;
 	size_t fils_erp_rrk_len;
 	bool want_1x;
+	struct ieee80211_edmg edmg;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 04b58295c8d9..bf7c4222f512 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -52,6 +52,11 @@
 #define NL80211_MULTICAST_GROUP_NAN		"nan"
 #define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
 
+#define NL80211_EDMG_BW_CONFIG_MIN	4
+#define NL80211_EDMG_BW_CONFIG_MAX	15
+#define NL80211_EDMG_CHANNELS_MIN	1
+#define NL80211_EDMG_CHANNELS_MAX	0x3c /* 0b00111100 */
+
 /**
  * DOC: Station handling
  *
@@ -2361,6 +2366,13 @@ enum nl80211_commands {
  * @NL80211_ATTR_HE_OBSS_PD: nested attribute for OBSS Packet Detection
  *	functionality.
  *
+ * @NL80211_ATTR_WIPHY_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ *	channel(s) that are allowed to be used for EDMG transmissions.
+ *	Defined by IEEE P802.11ay/D4.0 section 9.4.2.251. (u8 attribute)
+ * @NL80211_ATTR_WIPHY_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ *	the allowed channel bandwidth configurations. (u8 attribute)
+ *	Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2820,6 +2832,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_HE_OBSS_PD,
 
+	NL80211_ATTR_WIPHY_EDMG_CHANNELS,
+	NL80211_ATTR_WIPHY_EDMG_BW_CONFIG,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3431,6 +3446,12 @@ enum nl80211_band_iftype_attr {
  * @NL80211_BAND_ATTR_VHT_CAPA: VHT capabilities, as in the HT information IE
  * @NL80211_BAND_ATTR_IFTYPE_DATA: nested array attribute, with each entry using
  *	attributes from &enum nl80211_band_iftype_attr
+ * @NL80211_BAND_ATTR_EDMG_CHANNELS: bitmap that indicates the 2.16 GHz
+ *	channel(s) that are allowed to be used for EDMG transmissions.
+ *	Defined by IEEE P802.11ay/D4.0 section 9.4.2.251.
+ * @NL80211_BAND_ATTR_EDMG_BW_CONFIG: Channel BW Configuration subfield encodes
+ *	the allowed channel bandwidth configurations.
+ *	Defined by IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13.
  * @NL80211_BAND_ATTR_MAX: highest band attribute currently defined
  * @__NL80211_BAND_ATTR_AFTER_LAST: internal use
  */
@@ -3448,6 +3469,9 @@ enum nl80211_band_attr {
 	NL80211_BAND_ATTR_VHT_CAPA,
 	NL80211_BAND_ATTR_IFTYPE_DATA,
 
+	NL80211_BAND_ATTR_EDMG_CHANNELS,
+	NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+
 	/* keep last */
 	__NL80211_BAND_ATTR_AFTER_LAST,
 	NL80211_BAND_ATTR_MAX = __NL80211_BAND_ATTR_AFTER_LAST - 1
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 641876982ab9..6471f552a942 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -158,10 +158,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
 	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
 
+	memset(chandef, 0, sizeof(struct cfg80211_chan_def));
 	chandef->chan = channel;
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 	chandef->center_freq1 = channel->center_freq;
-	chandef->center_freq2 = 0;
 
 	if (!ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index f88f94d1f177..ab8ba5835ca0 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -262,7 +262,8 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info,
 	/* IEEE80211_RADIOTAP_RATE rate */
 	if (status && status->rate && !(status->rate->flags &
 					(RATE_INFO_FLAGS_MCS |
-					 RATE_INFO_FLAGS_60G |
+					 RATE_INFO_FLAGS_DMG |
+					 RATE_INFO_FLAGS_EDMG |
 					 RATE_INFO_FLAGS_VHT_MCS |
 					 RATE_INFO_FLAGS_HE_MCS)))
 		len += 2;
@@ -329,7 +330,8 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 
 	if (status && status->rate) {
 		if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS |
-					     RATE_INFO_FLAGS_60G |
+					     RATE_INFO_FLAGS_DMG |
+					     RATE_INFO_FLAGS_EDMG |
 					     RATE_INFO_FLAGS_VHT_MCS |
 					     RATE_INFO_FLAGS_HE_MCS)))
 			legacy_rate = status->rate->legacy;
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 7c9d204838d4..e851cafd8e2f 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -14,6 +14,11 @@
 #include "core.h"
 #include "rdev-ops.h"
 
+static bool cfg80211_valid_60g_freq(u32 freq)
+{
+	return freq >= 58320 && freq <= 70200;
+}
+
 void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
 			     struct ieee80211_channel *chan,
 			     enum nl80211_channel_type chan_type)
@@ -23,6 +28,8 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
 
 	chandef->chan = chan;
 	chandef->center_freq2 = 0;
+	chandef->edmg.bw_config = 0;
+	chandef->edmg.channels = 0;
 
 	switch (chan_type) {
 	case NL80211_CHAN_NO_HT:
@@ -47,6 +54,91 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
 }
 EXPORT_SYMBOL(cfg80211_chandef_create);
 
+static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
+{
+	int max_contiguous = 0;
+	int num_of_enabled = 0;
+	int contiguous = 0;
+	int i;
+
+	if (!chandef->edmg.channels || !chandef->edmg.bw_config)
+		return false;
+
+	if (!cfg80211_valid_60g_freq(chandef->chan->center_freq))
+		return false;
+
+	for (i = 0; i < 6; i++) {
+		if (chandef->edmg.channels & BIT(i)) {
+			contiguous++;
+			num_of_enabled++;
+		} else {
+			contiguous = 0;
+		}
+
+		max_contiguous = max(contiguous, max_contiguous);
+	}
+	/* basic verification of edmg configuration according to
+	 * IEEE P802.11ay/D4.0 section 9.4.2.251
+	 */
+	/* check bw_config against contiguous edmg channels */
+	switch (chandef->edmg.bw_config) {
+	case IEEE80211_EDMG_BW_CONFIG_4:
+	case IEEE80211_EDMG_BW_CONFIG_8:
+	case IEEE80211_EDMG_BW_CONFIG_12:
+		if (max_contiguous < 1)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_5:
+	case IEEE80211_EDMG_BW_CONFIG_9:
+	case IEEE80211_EDMG_BW_CONFIG_13:
+		if (max_contiguous < 2)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_6:
+	case IEEE80211_EDMG_BW_CONFIG_10:
+	case IEEE80211_EDMG_BW_CONFIG_14:
+		if (max_contiguous < 3)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_7:
+	case IEEE80211_EDMG_BW_CONFIG_11:
+	case IEEE80211_EDMG_BW_CONFIG_15:
+		if (max_contiguous < 4)
+			return false;
+		break;
+
+	default:
+		return false;
+	}
+
+	/* check bw_config against aggregated (non contiguous) edmg channels */
+	switch (chandef->edmg.bw_config) {
+	case IEEE80211_EDMG_BW_CONFIG_4:
+	case IEEE80211_EDMG_BW_CONFIG_5:
+	case IEEE80211_EDMG_BW_CONFIG_6:
+	case IEEE80211_EDMG_BW_CONFIG_7:
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_8:
+	case IEEE80211_EDMG_BW_CONFIG_9:
+	case IEEE80211_EDMG_BW_CONFIG_10:
+	case IEEE80211_EDMG_BW_CONFIG_11:
+		if (num_of_enabled < 2)
+			return false;
+		break;
+	case IEEE80211_EDMG_BW_CONFIG_12:
+	case IEEE80211_EDMG_BW_CONFIG_13:
+	case IEEE80211_EDMG_BW_CONFIG_14:
+	case IEEE80211_EDMG_BW_CONFIG_15:
+		if (num_of_enabled < 4 || max_contiguous < 2)
+			return false;
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 {
 	u32 control_freq;
@@ -112,6 +204,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 		return false;
 	}
 
+	if (cfg80211_chandef_is_edmg(chandef) &&
+	    !cfg80211_edmg_chandef_valid(chandef))
+		return false;
+
 	return true;
 }
 EXPORT_SYMBOL(cfg80211_chandef_valid);
@@ -721,12 +817,66 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
 	return true;
 }
 
+/* check if the operating channels are valid and supported */
+static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels,
+				 enum ieee80211_edmg_bw_config edmg_bw_config,
+				 int primary_channel,
+				 struct ieee80211_edmg *edmg_cap)
+{
+	struct ieee80211_channel *chan;
+	int i, freq;
+	int channels_counter = 0;
+
+	if (!edmg_channels && !edmg_bw_config)
+		return true;
+
+	if ((!edmg_channels && edmg_bw_config) ||
+	    (edmg_channels && !edmg_bw_config))
+		return false;
+
+	if (!(edmg_channels & BIT(primary_channel - 1)))
+		return false;
+
+	/* 60GHz channels 1..6 */
+	for (i = 0; i < 6; i++) {
+		if (!(edmg_channels & BIT(i)))
+			continue;
+
+		if (!(edmg_cap->channels & BIT(i)))
+			return false;
+
+		channels_counter++;
+
+		freq = ieee80211_channel_to_frequency(i + 1,
+						      NL80211_BAND_60GHZ);
+		chan = ieee80211_get_channel(wiphy, freq);
+		if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
+			return false;
+	}
+
+	/* IEEE802.11 allows max 4 channels */
+	if (channels_counter > 4)
+		return false;
+
+	/* check bw_config is a subset of what driver supports
+	 * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13)
+	 */
+	if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4))
+		return false;
+
+	if (edmg_bw_config > edmg_cap->bw_config)
+		return false;
+
+	return true;
+}
+
 bool cfg80211_chandef_usable(struct wiphy *wiphy,
 			     const struct cfg80211_chan_def *chandef,
 			     u32 prohibited_flags)
 {
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
+	struct ieee80211_edmg *edmg_cap;
 	u32 width, control_freq, cap;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
@@ -734,6 +884,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 
 	ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap;
 	vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap;
+	edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap;
+
+	if (edmg_cap->channels &&
+	    !cfg80211_edmg_usable(wiphy,
+				  chandef->edmg.channels,
+				  chandef->edmg.bw_config,
+				  chandef->chan->hw_value,
+				  edmg_cap))
+		return false;
 
 	control_freq = chandef->chan->center_freq;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index cacd96704647..4565d7385884 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -298,6 +298,13 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 
 	[NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8,
+						NL80211_EDMG_CHANNELS_MIN,
+						NL80211_EDMG_CHANNELS_MAX),
+	[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8,
+						NL80211_EDMG_BW_CONFIG_MIN,
+						NL80211_EDMG_BW_CONFIG_MAX),
+
 	[NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 },
 	[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
 	[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
@@ -1574,6 +1581,15 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 		nla_nest_end(msg, nl_iftype_data);
 	}
 
+	/* add EDMG info */
+	if (sband->edmg_cap.channels &&
+	    (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS,
+		       sband->edmg_cap.channels) ||
+	    nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG,
+		       sband->edmg_cap.bw_config)))
+
+		return -ENOBUFS;
+
 	/* add bitrates */
 	nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES);
 	if (!nl_rates)
@@ -2677,6 +2693,18 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 				nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+		chandef->edmg.channels =
+		      nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+		if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+			chandef->edmg.bw_config =
+		     nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+	} else {
+		chandef->edmg.bw_config = 0;
+		chandef->edmg.channels = 0;
+	}
+
 	if (!cfg80211_chandef_valid(chandef)) {
 		NL_SET_ERR_MSG(extack, "invalid channel definition");
 		return -EINVAL;
@@ -9894,6 +9922,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) {
+		connect.edmg.channels =
+		      nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]);
+
+		if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG])
+			connect.edmg.bw_config =
+				nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]);
+	}
+
 	if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
 		connkeys = nl80211_parse_connkeys(rdev, info, NULL);
 		if (IS_ERR(connkeys))
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 59ed0808c13e..c99939067bb0 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1043,7 +1043,7 @@ static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate)
 	return (bitrate + 50000) / 100000;
 }
 
-static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
+static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate)
 {
 	static const u32 __mcs2bitrate[] = {
 		/* control PHY */
@@ -1090,6 +1090,40 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate)
 	return __mcs2bitrate[rate->mcs];
 }
 
+static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate)
+{
+	static const u32 __mcs2bitrate[] = {
+		/* control PHY */
+		[0] =   275,
+		/* SC PHY */
+		[1] =  3850,
+		[2] =  7700,
+		[3] =  9625,
+		[4] = 11550,
+		[5] = 12512, /* 1251.25 mbps */
+		[6] = 13475,
+		[7] = 15400,
+		[8] = 19250,
+		[9] = 23100,
+		[10] = 25025,
+		[11] = 26950,
+		[12] = 30800,
+		[13] = 38500,
+		[14] = 46200,
+		[15] = 50050,
+		[16] = 53900,
+		[17] = 57750,
+		[18] = 69300,
+		[19] = 75075,
+		[20] = 80850,
+	};
+
+	if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate)))
+		return 0;
+
+	return __mcs2bitrate[rate->mcs] * rate->n_bonded_ch;
+}
+
 static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 {
 	static const u32 base[4][10] = {
@@ -1262,8 +1296,10 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 {
 	if (rate->flags & RATE_INFO_FLAGS_MCS)
 		return cfg80211_calculate_bitrate_ht(rate);
-	if (rate->flags & RATE_INFO_FLAGS_60G)
-		return cfg80211_calculate_bitrate_60g(rate);
+	if (rate->flags & RATE_INFO_FLAGS_DMG)
+		return cfg80211_calculate_bitrate_dmg(rate);
+	if (rate->flags & RATE_INFO_FLAGS_EDMG)
+		return cfg80211_calculate_bitrate_edmg(rate);
 	if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
 		return cfg80211_calculate_bitrate_vht(rate);
 	if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
-- 
cgit v1.2.3


From b03bfaec1d52123d5d941488f71e06964535e471 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 20 Aug 2019 14:04:54 +0900
Subject: ASoC: soc-core: merge snd_soc_initialize_card_lists()

snd_soc_initialize_card_lists() is doing card related
INIT_LIST_HEAD(), but, it is already doing at
snd_soc_register_card(). We don't need to do it separately.
This patch merges these.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/877e781ldq.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h  | 10 ----------
 sound/soc/soc-core.c | 13 ++++++++-----
 2 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 5c841c2ee814..f264c6509f00 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1220,16 +1220,6 @@ static inline void *snd_soc_card_get_drvdata(struct snd_soc_card *card)
 	return card->drvdata;
 }
 
-static inline void snd_soc_initialize_card_lists(struct snd_soc_card *card)
-{
-	INIT_LIST_HEAD(&card->widgets);
-	INIT_LIST_HEAD(&card->paths);
-	INIT_LIST_HEAD(&card->dapm_list);
-	INIT_LIST_HEAD(&card->aux_comp_list);
-	INIT_LIST_HEAD(&card->component_dev_list);
-	INIT_LIST_HEAD(&card->list);
-}
-
 static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc)
 {
 	if (mc->reg == mc->rreg && mc->shift == mc->rshift)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index b3f820fb53e6..d428491d51a7 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -2370,15 +2370,18 @@ int snd_soc_register_card(struct snd_soc_card *card)
 
 	dev_set_drvdata(card->dev, card);
 
-	snd_soc_initialize_card_lists(card);
-
+	INIT_LIST_HEAD(&card->widgets);
+	INIT_LIST_HEAD(&card->paths);
+	INIT_LIST_HEAD(&card->dapm_list);
+	INIT_LIST_HEAD(&card->aux_comp_list);
+	INIT_LIST_HEAD(&card->component_dev_list);
+	INIT_LIST_HEAD(&card->list);
 	INIT_LIST_HEAD(&card->dai_link_list);
-
 	INIT_LIST_HEAD(&card->rtd_list);
-	card->num_rtd = 0;
-
 	INIT_LIST_HEAD(&card->dapm_dirty);
 	INIT_LIST_HEAD(&card->dobj_list);
+
+	card->num_rtd = 0;
 	card->instantiated = 0;
 	mutex_init(&card->mutex);
 	mutex_init(&card->dapm_mutex);
-- 
cgit v1.2.3


From 49ec9177b8ec9c081850744468811e708a7c9841 Mon Sep 17 00:00:00 2001
From: Santosh Sivaraj <santosh@fossix.org>
Date: Tue, 20 Aug 2019 13:43:49 +0530
Subject: extable: Add function to search only kernel exception table

Certain architecture specific operating modes (e.g., in powerpc machine
check handler that is unable to access vmalloc memory), the
search_exception_tables cannot be called because it also searches the
module exception tables if entry is not found in the kernel exception
table.

Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20190820081352.8641-5-santosh@fossix.org
---
 include/linux/extable.h |  2 ++
 kernel/extable.c        | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/extable.h b/include/linux/extable.h
index 41c5b3a25f67..81ecfaa83ad3 100644
--- a/include/linux/extable.h
+++ b/include/linux/extable.h
@@ -19,6 +19,8 @@ void trim_init_extable(struct module *m);
 
 /* Given an address, look for it in the exception tables */
 const struct exception_table_entry *search_exception_tables(unsigned long add);
+const struct exception_table_entry *
+search_kernel_exception_table(unsigned long addr);
 
 #ifdef CONFIG_MODULES
 /* For extable.c to search modules' exception tables. */
diff --git a/kernel/extable.c b/kernel/extable.c
index e23cce6e6092..f6c9406eec7d 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,13 +40,20 @@ void __init sort_main_extable(void)
 	}
 }
 
+/* Given an address, look for it in the kernel exception table */
+const
+struct exception_table_entry *search_kernel_exception_table(unsigned long addr)
+{
+	return search_extable(__start___ex_table,
+			      __stop___ex_table - __start___ex_table, addr);
+}
+
 /* Given an address, look for it in the exception tables. */
 const struct exception_table_entry *search_exception_tables(unsigned long addr)
 {
 	const struct exception_table_entry *e;
 
-	e = search_extable(__start___ex_table,
-			   __stop___ex_table - __start___ex_table, addr);
+	e = search_kernel_exception_table(addr);
 	if (!e)
 		e = search_module_extables(addr);
 	return e;
-- 
cgit v1.2.3


From e6806e9a63a759e445383915bb9d2ec85a90aebf Mon Sep 17 00:00:00 2001
From: Mark Zhang <markz@mellanox.com>
Date: Mon, 19 Aug 2019 14:36:25 +0300
Subject: net/mlx5: Create bypass and loopback flow steering namespaces for
 RDMA RX

Use different namespaces for bypass and switchdev loopback because they
have different priorities and default table miss action requirement:
1. bypass: with multiple priorities support, and
   MLX5_FLOW_TABLE_MISS_ACTION_DEF as the default table miss action;
2. switchdev loopback: with single priority support, and
   MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN as the default table miss
   action.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 49 ++++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/rdma.c    |  2 +-
 include/linux/mlx5/fs.h                           |  1 +
 3 files changed, 41 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fb3cfdfbafbe..7bdec442f0ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -182,6 +182,26 @@ static struct init_tree_node egress_root_fs = {
 	}
 };
 
+#define RDMA_RX_BYPASS_PRIO 0
+#define RDMA_RX_KERNEL_PRIO 1
+static struct init_tree_node rdma_rx_root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 2,
+	.children = (struct init_tree_node[]) {
+		[RDMA_RX_BYPASS_PRIO] =
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
+						  BY_PASS_PRIO_NUM_LEVELS))),
+		[RDMA_RX_KERNEL_PRIO] =
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
+				ADD_MULTIPLE_PRIO(1, 1))),
+	}
+};
+
 enum fs_i_lock_class {
 	FS_LOCK_GRANDPARENT,
 	FS_LOCK_PARENT,
@@ -2067,16 +2087,18 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		if (steering->sniffer_tx_root_ns)
 			return &steering->sniffer_tx_root_ns->ns;
 		return NULL;
-	case MLX5_FLOW_NAMESPACE_RDMA_RX:
-		if (steering->rdma_rx_root_ns)
-			return &steering->rdma_rx_root_ns->ns;
-		return NULL;
 	default:
 		break;
 	}
 
 	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
 		root_ns = steering->egress_root_ns;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_BYPASS_PRIO;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_KERNEL_PRIO;
 	} else { /* Must be NIC RX */
 		root_ns = steering->root_ns;
 		prio = type;
@@ -2507,18 +2529,25 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
 
 static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	int err;
 
 	steering->rdma_rx_root_ns = create_root_ns(steering, FS_FT_RDMA_RX);
 	if (!steering->rdma_rx_root_ns)
 		return -ENOMEM;
 
-	steering->rdma_rx_root_ns->ns.def_miss_action =
-		MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN;
+	err = init_root_tree(steering, &rdma_rx_root_fs,
+			     &steering->rdma_rx_root_ns->ns.node);
+	if (err)
+		goto out_err;
 
-	/* Create single prio */
-	prio = fs_create_prio(&steering->rdma_rx_root_ns->ns, 0, 1);
-	return PTR_ERR_OR_ZERO(prio);
+	set_prio_attrs(steering->rdma_rx_root_ns);
+
+	return 0;
+
+out_err:
+	cleanup_root_ns(steering->rdma_rx_root_ns);
+	steering->rdma_rx_root_ns = NULL;
+	return err;
 }
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
index 17ce9dd56b13..18af6981e0be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
@@ -51,7 +51,7 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
 		return -ENOMEM;
 	}
 
-	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX);
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL);
 	if (!ns) {
 		mlx5_core_err(dev, "Failed to get RDMA RX namespace");
 		err = -EOPNOTSUPP;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 04a569568eac..5235b09a8ef3 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -75,6 +75,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
 	MLX5_FLOW_NAMESPACE_EGRESS,
 	MLX5_FLOW_NAMESPACE_RDMA_RX,
+	MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
 };
 
 enum {
-- 
cgit v1.2.3


From 6dc6ec9e04c468d994bff6eb660f3146f94cbfd9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 Aug 2019 18:47:10 -0400
Subject: xprtrdma: Cache free MRs in each rpcrdma_req

Instead of a globally-contended MR free list, cache MRs in each
rpcrdma_req as they are released. This means acquiring and releasing
an MR will be lock-free in the common case, even outside the
transport send lock.

The original idea of per-rpcrdma_req MR free lists was suggested by
Shirley Ma <shirley.ma@oracle.com> several years ago. I just now
figured out how to make that idea work with on-demand MR allocation.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/trace/events/rpcrdma.h  | 38 ++++++++++++++++++++++++++++++++++++--
 net/sunrpc/xprtrdma/frwr_ops.c  |  9 ++++++---
 net/sunrpc/xprtrdma/rpc_rdma.c  | 11 ++++++++---
 net/sunrpc/xprtrdma/verbs.c     | 18 +++++++++++++++---
 net/sunrpc/xprtrdma/xprt_rdma.h |  7 ++++---
 5 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 83c4dfd7feea..a13830616107 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -451,16 +451,50 @@ TRACE_EVENT(xprtrdma_createmrs,
 
 	TP_STRUCT__entry(
 		__field(const void *, r_xprt)
+		__string(addr, rpcrdma_addrstr(r_xprt))
+		__string(port, rpcrdma_portstr(r_xprt))
 		__field(unsigned int, count)
 	),
 
 	TP_fast_assign(
 		__entry->r_xprt = r_xprt;
 		__entry->count = count;
+		__assign_str(addr, rpcrdma_addrstr(r_xprt));
+		__assign_str(port, rpcrdma_portstr(r_xprt));
 	),
 
-	TP_printk("r_xprt=%p: created %u MRs",
-		__entry->r_xprt, __entry->count
+	TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
+		__get_str(addr), __get_str(port), __entry->r_xprt,
+		__entry->count
+	)
+);
+
+TRACE_EVENT(xprtrdma_mr_get,
+	TP_PROTO(
+		const struct rpcrdma_req *req
+	),
+
+	TP_ARGS(req),
+
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(unsigned int, task_id)
+		__field(unsigned int, client_id)
+		__field(u32, xid)
+	),
+
+	TP_fast_assign(
+		const struct rpc_rqst *rqst = &req->rl_slot;
+
+		__entry->req = req;
+		__entry->task_id = rqst->rq_task->tk_pid;
+		__entry->client_id = rqst->rq_task->tk_client->cl_clid;
+		__entry->xid = be32_to_cpu(rqst->rq_xid);
+	),
+
+	TP_printk("task:%u@%u xid=0x%08x req=%p",
+		__entry->task_id, __entry->client_id, __entry->xid,
+		__entry->req
 	)
 );
 
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 1f2e3dda7401..0e740bae2d80 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -488,8 +488,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
 
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_li_wake(wc, frwr);
-	complete(&frwr->fr_linv_done);
 	__frwr_release_mr(wc, mr);
+	complete(&frwr->fr_linv_done);
 }
 
 /**
@@ -587,11 +587,15 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct rpcrdma_frwr *frwr =
 		container_of(cqe, struct rpcrdma_frwr, fr_cqe);
 	struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+	struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
 
 	/* WARNING: Only wr_cqe and status are reliable at this point */
 	trace_xprtrdma_wc_li_done(wc, frwr);
-	rpcrdma_complete_rqst(frwr->fr_req->rl_reply);
 	__frwr_release_mr(wc, mr);
+
+	/* Ensure @rep is generated before __frwr_release_mr */
+	smp_rmb();
+	rpcrdma_complete_rqst(rep);
 }
 
 /**
@@ -624,7 +628,6 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
 
 		frwr = &mr->frwr;
 		frwr->fr_cqe.done = frwr_wc_localinv;
-		frwr->fr_req = req;
 		last = &frwr->fr_invwr;
 		last->next = NULL;
 		last->wr_cqe = &frwr->fr_cqe;
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 34772cb19286..ffeb4dfebd46 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -348,9 +348,14 @@ static struct rpcrdma_mr_seg *rpcrdma_mr_prepare(struct rpcrdma_xprt *r_xprt,
 						 int nsegs, bool writing,
 						 struct rpcrdma_mr **mr)
 {
-	*mr = rpcrdma_mr_get(r_xprt);
-	if (!*mr)
-		goto out_getmr_err;
+	*mr = rpcrdma_mr_pop(&req->rl_free_mrs);
+	if (!*mr) {
+		*mr = rpcrdma_mr_get(r_xprt);
+		if (!*mr)
+			goto out_getmr_err;
+		trace_xprtrdma_mr_get(req);
+		(*mr)->mr_req = req;
+	}
 
 	rpcrdma_mr_push(*mr, &req->rl_registered);
 	return frwr_map(r_xprt, seg, nsegs, writing, req->rl_slot.rq_xid, *mr);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index cb6df58488bb..69753ec73c36 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -77,6 +77,7 @@
 static void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
 static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr);
 static struct rpcrdma_regbuf *
 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
 		     gfp_t flags);
@@ -1022,6 +1023,7 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size,
 	if (!req->rl_recvbuf)
 		goto out4;
 
+	INIT_LIST_HEAD(&req->rl_free_mrs);
 	INIT_LIST_HEAD(&req->rl_registered);
 	spin_lock(&buffer->rb_lock);
 	list_add(&req->rl_all, &buffer->rb_allreqs);
@@ -1130,11 +1132,13 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
  * This function assumes that the caller prevents concurrent device
  * unload and transport tear-down.
  */
-void
-rpcrdma_req_destroy(struct rpcrdma_req *req)
+void rpcrdma_req_destroy(struct rpcrdma_req *req)
 {
 	list_del(&req->rl_all);
 
+	while (!list_empty(&req->rl_free_mrs))
+		rpcrdma_mr_free(rpcrdma_mr_pop(&req->rl_free_mrs));
+
 	rpcrdma_regbuf_free(req->rl_recvbuf);
 	rpcrdma_regbuf_free(req->rl_sendbuf);
 	rpcrdma_regbuf_free(req->rl_rdmabuf);
@@ -1228,7 +1232,6 @@ rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
 void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 {
 	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
 
 	if (mr->mr_dir != DMA_NONE) {
 		trace_xprtrdma_mr_unmap(mr);
@@ -1237,6 +1240,15 @@ void rpcrdma_mr_put(struct rpcrdma_mr *mr)
 		mr->mr_dir = DMA_NONE;
 	}
 
+	rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs);
+}
+
+static void rpcrdma_mr_free(struct rpcrdma_mr *mr)
+{
+	struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+
+	mr->mr_req = NULL;
 	spin_lock(&buf->rb_mrlock);
 	rpcrdma_mr_push(mr, &buf->rb_mrs);
 	spin_unlock(&buf->rb_mrlock);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 9573587ca602..c375b0e434ac 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -234,20 +234,20 @@ struct rpcrdma_sendctx {
  * An external memory region is any buffer or page that is registered
  * on the fly (ie, not pre-registered).
  */
-struct rpcrdma_req;
 struct rpcrdma_frwr {
 	struct ib_mr			*fr_mr;
 	struct ib_cqe			fr_cqe;
 	struct completion		fr_linv_done;
-	struct rpcrdma_req		*fr_req;
 	union {
 		struct ib_reg_wr	fr_regwr;
 		struct ib_send_wr	fr_invwr;
 	};
 };
 
+struct rpcrdma_req;
 struct rpcrdma_mr {
 	struct list_head	mr_list;
+	struct rpcrdma_req	*mr_req;
 	struct scatterlist	*mr_sg;
 	int			mr_nents;
 	enum dma_data_direction	mr_dir;
@@ -325,7 +325,8 @@ struct rpcrdma_req {
 	struct list_head	rl_all;
 	struct kref		rl_kref;
 
-	struct list_head	rl_registered;	/* registered segments */
+	struct list_head	rl_free_mrs;
+	struct list_head	rl_registered;
 	struct rpcrdma_mr_seg	rl_segments[RPCRDMA_MAX_SEGS];
 };
 
-- 
cgit v1.2.3


From 7cc2e18f21008f4093b49099264ca4d65b9aa223 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 19 Aug 2019 14:16:59 +0300
Subject: RDMA/odp: Use the common interval tree library instead of generic

ODP is working with userspace VA's in the interval tree which always fit
into an unsigned long, so we can use the common code.

This comes at a cost of a 16 byte increase in ib_umem_odp struct size due
to storing the interval tree start/last in addition to the umem
addr/length. However these values were computed and are performance
critical for the interval lookup, so this seems like a worthwhile trade
off.

Removes 2k of .text from the kernel.

Link: https://lore.kernel.org/r/20190819111710.18440-2-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/Kconfig         |  1 +
 drivers/infiniband/core/umem_odp.c | 72 ++++++++++----------------------------
 include/rdma/ib_umem_odp.h         | 20 ++++++-----
 3 files changed, 31 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 85e103b147cc..b44b1c322ec8 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -55,6 +55,7 @@ config INFINIBAND_ON_DEMAND_PAGING
 	bool "InfiniBand on-demand paging support"
 	depends on INFINIBAND_USER_MEM
 	select MMU_NOTIFIER
+	select INTERVAL_TREE
 	default y
 	---help---
 	  On demand paging support for the InfiniBand subsystem.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index c0e15db34680..6c17a7c3a565 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -39,45 +39,13 @@
 #include <linux/export.h>
 #include <linux/vmalloc.h>
 #include <linux/hugetlb.h>
-#include <linux/interval_tree_generic.h>
+#include <linux/interval_tree.h>
 #include <linux/pagemap.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem_odp.h>
 
-/*
- * The ib_umem list keeps track of memory regions for which the HW
- * device request to receive notification when the related memory
- * mapping is changed.
- *
- * ib_umem_lock protects the list.
- */
-
-static u64 node_start(struct umem_odp_node *n)
-{
-	struct ib_umem_odp *umem_odp =
-			container_of(n, struct ib_umem_odp, interval_tree);
-
-	return ib_umem_start(umem_odp);
-}
-
-/* Note that the representation of the intervals in the interval tree
- * considers the ending point as contained in the interval, while the
- * function ib_umem_end returns the first address which is not contained
- * in the umem.
- */
-static u64 node_last(struct umem_odp_node *n)
-{
-	struct ib_umem_odp *umem_odp =
-			container_of(n, struct ib_umem_odp, interval_tree);
-
-	return ib_umem_end(umem_odp) - 1;
-}
-
-INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
-		     node_start, node_last, static, rbt_ib_umem)
-
 static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
 {
 	mutex_lock(&umem_odp->umem_mutex);
@@ -205,9 +173,18 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
 	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
 
 	down_write(&per_mm->umem_rwsem);
-	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-		rbt_ib_umem_insert(&umem_odp->interval_tree,
-				   &per_mm->umem_tree);
+	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
+		/*
+		 * Note that the representation of the intervals in the
+		 * interval tree considers the ending point as contained in
+		 * the interval, while the function ib_umem_end returns the
+		 * first address which is not contained in the umem.
+		 */
+		umem_odp->interval_tree.start = ib_umem_start(umem_odp);
+		umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
+		interval_tree_insert(&umem_odp->interval_tree,
+				     &per_mm->umem_tree);
+	}
 	up_write(&per_mm->umem_rwsem);
 }
 
@@ -217,8 +194,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
 
 	down_write(&per_mm->umem_rwsem);
 	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-		rbt_ib_umem_remove(&umem_odp->interval_tree,
-				   &per_mm->umem_tree);
+		interval_tree_remove(&umem_odp->interval_tree,
+				     &per_mm->umem_tree);
 	complete_all(&umem_odp->notifier_completion);
 
 	up_write(&per_mm->umem_rwsem);
@@ -761,18 +738,18 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 				  void *cookie)
 {
 	int ret_val = 0;
-	struct umem_odp_node *node, *next;
+	struct interval_tree_node *node, *next;
 	struct ib_umem_odp *umem;
 
 	if (unlikely(start == last))
 		return ret_val;
 
-	for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+	for (node = interval_tree_iter_first(root, start, last - 1);
 			node; node = next) {
 		/* TODO move the blockable decision up to the callback */
 		if (!blockable)
 			return -EAGAIN;
-		next = rbt_ib_umem_iter_next(node, start, last - 1);
+		next = interval_tree_iter_next(node, start, last - 1);
 		umem = container_of(node, struct ib_umem_odp, interval_tree);
 		ret_val = cb(umem, start, last, cookie) || ret_val;
 	}
@@ -780,16 +757,3 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
 	return ret_val;
 }
 EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
-
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-				       u64 addr, u64 length)
-{
-	struct umem_odp_node *node;
-
-	node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
-	if (node)
-		return container_of(node, struct ib_umem_odp, interval_tree);
-	return NULL;
-
-}
-EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 479db5c98ff6..030d5cbad02c 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -37,11 +37,6 @@
 #include <rdma/ib_verbs.h>
 #include <linux/interval_tree.h>
 
-struct umem_odp_node {
-	u64 __subtree_last;
-	struct rb_node rb;
-};
-
 struct ib_umem_odp {
 	struct ib_umem umem;
 	struct ib_ucontext_per_mm *per_mm;
@@ -72,7 +67,7 @@ struct ib_umem_odp {
 	int npages;
 
 	/* Tree tracking */
-	struct umem_odp_node	interval_tree;
+	struct interval_tree_node interval_tree;
 
 	struct completion	notifier_completion;
 	int			dying;
@@ -163,8 +158,17 @@ int rbt_ib_umem_for_each_in_range(struct rb_root_cached *root,
  * Find first region intersecting with address range.
  * Return NULL if not found
  */
-struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root_cached *root,
-				       u64 addr, u64 length);
+static inline struct ib_umem_odp *
+rbt_ib_umem_lookup(struct rb_root_cached *root, u64 addr, u64 length)
+{
+	struct interval_tree_node *node;
+
+	node = interval_tree_iter_first(root, addr, addr + length - 1);
+	if (!node)
+		return NULL;
+	return container_of(node, struct ib_umem_odp, interval_tree);
+
+}
 
 static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
 					     unsigned long mmu_seq)
-- 
cgit v1.2.3


From 7b81cb6bddd2c4f2489506771070924bd0ae9902 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Aug 2019 08:24:32 +0200
Subject: usb: add a HCD_DMA flag instead of guestimating DMA capabilities

The usb core is the only major place in the kernel that checks for
a non-NULL device dma_mask to see if a device is DMA capable.  This
is generally a bad idea, as all major busses always set up a DMA mask,
even if the device is not DMA capable - in fact bus layers like PCI
can't even know if a device is DMA capable at enumeration time.  This
leads to lots of workaround in HCD drivers, and also prevented us from
setting up a DMA mask for platform devices by default last time we
tried.

Replace this guess with an explicit HCD_DMA that is set by drivers that
appear to have DMA support.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20190816062435.881-4-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/octeon-usb/octeon-hcd.c | 2 +-
 drivers/usb/core/hcd.c                  | 1 -
 drivers/usb/dwc2/hcd.c                  | 6 +++---
 drivers/usb/host/ehci-grlib.c           | 2 +-
 drivers/usb/host/ehci-hcd.c             | 2 +-
 drivers/usb/host/ehci-pmcmsp.c          | 2 +-
 drivers/usb/host/ehci-ppc-of.c          | 2 +-
 drivers/usb/host/ehci-ps3.c             | 2 +-
 drivers/usb/host/ehci-sh.c              | 2 +-
 drivers/usb/host/ehci-xilinx-of.c       | 2 +-
 drivers/usb/host/fhci-hcd.c             | 2 +-
 drivers/usb/host/fotg210-hcd.c          | 2 +-
 drivers/usb/host/imx21-hcd.c            | 2 +-
 drivers/usb/host/isp116x-hcd.c          | 6 ------
 drivers/usb/host/isp1362-hcd.c          | 5 -----
 drivers/usb/host/ohci-hcd.c             | 2 +-
 drivers/usb/host/ohci-ppc-of.c          | 2 +-
 drivers/usb/host/ohci-ps3.c             | 2 +-
 drivers/usb/host/ohci-sa1111.c          | 2 +-
 drivers/usb/host/ohci-sm501.c           | 2 +-
 drivers/usb/host/ohci-tmio.c            | 2 +-
 drivers/usb/host/oxu210hp-hcd.c         | 3 ---
 drivers/usb/host/r8a66597-hcd.c         | 6 ------
 drivers/usb/host/sl811-hcd.c            | 6 ------
 drivers/usb/host/u132-hcd.c             | 2 --
 drivers/usb/host/uhci-grlib.c           | 2 +-
 drivers/usb/host/uhci-pci.c             | 2 +-
 drivers/usb/host/uhci-platform.c        | 2 +-
 drivers/usb/host/xhci.c                 | 2 +-
 drivers/usb/isp1760/isp1760-core.c      | 3 ---
 drivers/usb/isp1760/isp1760-if.c        | 1 -
 drivers/usb/musb/musb_host.c            | 2 +-
 drivers/usb/renesas_usbhs/mod_host.c    | 2 +-
 include/linux/usb.h                     | 1 -
 include/linux/usb/hcd.h                 | 7 +++++--
 35 files changed, 31 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/octeon-usb/octeon-hcd.c b/drivers/staging/octeon-usb/octeon-hcd.c
index cd2b777073c4..a5321cc692c5 100644
--- a/drivers/staging/octeon-usb/octeon-hcd.c
+++ b/drivers/staging/octeon-usb/octeon-hcd.c
@@ -3512,7 +3512,7 @@ static const struct hc_driver octeon_hc_driver = {
 	.product_desc		= "Octeon Host Controller",
 	.hcd_priv_size		= sizeof(struct octeon_hcd),
 	.irq			= octeon_usb_irq,
-	.flags			= HCD_MEMORY | HCD_USB2,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2,
 	.start			= octeon_usb_start,
 	.stop			= octeon_usb_stop,
 	.urb_enqueue		= octeon_usb_urb_enqueue,
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 8592c0344fe8..add2af4af766 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2454,7 +2454,6 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver,
 	hcd->self.controller = dev;
 	hcd->self.sysdev = sysdev;
 	hcd->self.bus_name = bus_name;
-	hcd->self.uses_dma = (sysdev->dma_mask != NULL);
 
 	timer_setup(&hcd->rh_timer, rh_timer_func, 0);
 #ifdef CONFIG_PM
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 111787a137ee..81afe553aa66 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -5062,13 +5062,13 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg)
 		dwc2_hc_driver.reset_device = dwc2_reset_device;
 	}
 
+	if (hsotg->params.host_dma)
+		dwc2_hc_driver.flags |= HCD_DMA;
+
 	hcd = usb_create_hcd(&dwc2_hc_driver, hsotg->dev, dev_name(hsotg->dev));
 	if (!hcd)
 		goto error1;
 
-	if (!hsotg->params.host_dma)
-		hcd->self.uses_dma = 0;
-
 	hcd->has_tt = 1;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/drivers/usb/host/ehci-grlib.c b/drivers/usb/host/ehci-grlib.c
index 656b8c08efc8..a2c3b4ec8a8b 100644
--- a/drivers/usb/host/ehci-grlib.c
+++ b/drivers/usb/host/ehci-grlib.c
@@ -30,7 +30,7 @@ static const struct hc_driver ehci_grlib_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq			= ehci_irq,
-	.flags			= HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 9da7e22848c9..cf2b7ae93b7e 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -1193,7 +1193,7 @@ static const struct hc_driver ehci_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			ehci_irq,
-	.flags =		HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags =		HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-pmcmsp.c b/drivers/usb/host/ehci-pmcmsp.c
index 46e160370d6e..a2b610dbedfc 100644
--- a/drivers/usb/host/ehci-pmcmsp.c
+++ b/drivers/usb/host/ehci-pmcmsp.c
@@ -250,7 +250,7 @@ static const struct hc_driver ehci_msp_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			ehci_irq,
-	.flags =		HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags =		HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
index 576f7d79ad4e..6bbaee74f7e7 100644
--- a/drivers/usb/host/ehci-ppc-of.c
+++ b/drivers/usb/host/ehci-ppc-of.c
@@ -31,7 +31,7 @@ static const struct hc_driver ehci_ppc_of_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq			= ehci_irq,
-	.flags			= HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-ps3.c b/drivers/usb/host/ehci-ps3.c
index 454d8c624a3f..fb52133c3557 100644
--- a/drivers/usb/host/ehci-ps3.c
+++ b/drivers/usb/host/ehci-ps3.c
@@ -59,7 +59,7 @@ static const struct hc_driver ps3_ehci_hc_driver = {
 	.product_desc		= "PS3 EHCI Host Controller",
 	.hcd_priv_size		= sizeof(struct ehci_hcd),
 	.irq			= ehci_irq,
-	.flags			= HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 	.reset			= ps3_ehci_hc_reset,
 	.start			= ehci_run,
 	.stop			= ehci_stop,
diff --git a/drivers/usb/host/ehci-sh.c b/drivers/usb/host/ehci-sh.c
index ef75b9d70eb4..2afde14dc425 100644
--- a/drivers/usb/host/ehci-sh.c
+++ b/drivers/usb/host/ehci-sh.c
@@ -33,7 +33,7 @@ static const struct hc_driver ehci_sh_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq				= ehci_irq,
-	.flags				= HCD_USB2 | HCD_MEMORY | HCD_BH,
+	.flags				= HCD_USB2 | HCD_DMA | HCD_MEMORY | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ehci-xilinx-of.c b/drivers/usb/host/ehci-xilinx-of.c
index d2a27578e440..67a6ee8cb5d8 100644
--- a/drivers/usb/host/ehci-xilinx-of.c
+++ b/drivers/usb/host/ehci-xilinx-of.c
@@ -66,7 +66,7 @@ static const struct hc_driver ehci_xilinx_of_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq			= ehci_irq,
-	.flags			= HCD_MEMORY | HCD_USB2 | HCD_BH,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2 | HCD_BH,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/fhci-hcd.c b/drivers/usb/host/fhci-hcd.c
index 48fe9e6c2465..04733876c9c6 100644
--- a/drivers/usb/host/fhci-hcd.c
+++ b/drivers/usb/host/fhci-hcd.c
@@ -538,7 +538,7 @@ static const struct hc_driver fhci_driver = {
 
 	/* generic hardware linkage */
 	.irq = fhci_irq,
-	.flags = HCD_USB11 | HCD_MEMORY,
+	.flags = HCD_DMA | HCD_USB11 | HCD_MEMORY,
 
 	/* basic lifecycle operation */
 	.start = fhci_start,
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 0dbfa5c10703..9e0c98d6bdb0 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -5508,7 +5508,7 @@ static const struct hc_driver fotg210_fotg210_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq			= fotg210_irq,
-	.flags			= HCD_MEMORY | HCD_USB2,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB2,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c
index e406c5459a97..5835f9966204 100644
--- a/drivers/usb/host/imx21-hcd.c
+++ b/drivers/usb/host/imx21-hcd.c
@@ -1771,7 +1771,7 @@ static const struct hc_driver imx21_hc_driver = {
 	.product_desc = "IMX21 USB Host Controller",
 	.hcd_priv_size = sizeof(struct imx21),
 
-	.flags = HCD_USB11,
+	.flags = HCD_DMA | HCD_USB11,
 	.irq = imx21_irq,
 
 	.reset = imx21_hc_reset,
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 74da136d322a..a87c0b26279e 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -1581,12 +1581,6 @@ static int isp116x_probe(struct platform_device *pdev)
 	irq = ires->start;
 	irqflags = ires->flags & IRQF_TRIGGER_MASK;
 
-	if (pdev->dev.dma_mask) {
-		DBG("DMA not supported\n");
-		ret = -EINVAL;
-		goto err1;
-	}
-
 	if (!request_mem_region(addr->start, 2, hcd_name)) {
 		ret = -EBUSY;
 		goto err1;
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index 28bf8bfb091e..96f8daa11f25 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -2645,11 +2645,6 @@ static int isp1362_probe(struct platform_device *pdev)
 	if (pdev->num_resources < 3)
 		return -ENODEV;
 
-	if (pdev->dev.dma_mask) {
-		DBG(1, "won't do DMA");
-		return -ENODEV;
-	}
-
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!irq_res)
 		return -ENODEV;
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index b457fdaff297..1eb8d17e19db 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -1178,7 +1178,7 @@ static const struct hc_driver ohci_hc_driver = {
 	 * generic hardware linkage
 	*/
 	.irq =                  ohci_irq,
-	.flags =                HCD_MEMORY | HCD_USB11,
+	.flags =                HCD_MEMORY | HCD_DMA | HCD_USB11,
 
 	/*
 	* basic lifecycle operations
diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c
index 76a9b40b08f1..45f7cceb6df3 100644
--- a/drivers/usb/host/ohci-ppc-of.c
+++ b/drivers/usb/host/ohci-ppc-of.c
@@ -50,7 +50,7 @@ static const struct hc_driver ohci_ppc_of_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			ohci_irq,
-	.flags =		HCD_USB11 | HCD_MEMORY,
+	.flags =		HCD_USB11 | HCD_DMA | HCD_MEMORY,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ohci-ps3.c b/drivers/usb/host/ohci-ps3.c
index 395f9d3bc849..f77cd6af0ccf 100644
--- a/drivers/usb/host/ohci-ps3.c
+++ b/drivers/usb/host/ohci-ps3.c
@@ -46,7 +46,7 @@ static const struct hc_driver ps3_ohci_hc_driver = {
 	.product_desc		= "PS3 OHCI Host Controller",
 	.hcd_priv_size		= sizeof(struct ohci_hcd),
 	.irq			= ohci_irq,
-	.flags			= HCD_MEMORY | HCD_USB11,
+	.flags			= HCD_MEMORY | HCD_DMA | HCD_USB11,
 	.reset			= ps3_ohci_hc_reset,
 	.start			= ps3_ohci_hc_start,
 	.stop			= ohci_stop,
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c
index ebec9a7699e3..8e19a5eb5b62 100644
--- a/drivers/usb/host/ohci-sa1111.c
+++ b/drivers/usb/host/ohci-sa1111.c
@@ -84,7 +84,7 @@ static const struct hc_driver ohci_sa1111_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			ohci_irq,
-	.flags =		HCD_USB11 | HCD_MEMORY,
+	.flags =		HCD_USB11 | HCD_DMA | HCD_MEMORY,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ohci-sm501.c b/drivers/usb/host/ohci-sm501.c
index c158cda9e4b9..0b2aea6e28d4 100644
--- a/drivers/usb/host/ohci-sm501.c
+++ b/drivers/usb/host/ohci-sm501.c
@@ -49,7 +49,7 @@ static const struct hc_driver ohci_sm501_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			ohci_irq,
-	.flags =		HCD_USB11 | HCD_MEMORY,
+	.flags =		HCD_USB11 | HCD_DMA | HCD_MEMORY,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c
index fb6f5e9ae5c6..221593d75556 100644
--- a/drivers/usb/host/ohci-tmio.c
+++ b/drivers/usb/host/ohci-tmio.c
@@ -156,7 +156,7 @@ static const struct hc_driver ohci_tmio_hc_driver = {
 
 	/* generic hardware linkage */
 	.irq =			ohci_irq,
-	.flags =		HCD_USB11 | HCD_MEMORY,
+	.flags =		HCD_USB11 | HCD_DMA | HCD_MEMORY,
 
 	/* basic lifecycle operations */
 	.start =		ohci_tmio_start,
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 09cc19df798e..e67242e437ed 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -3088,9 +3088,6 @@ static int oxu_reset(struct usb_hcd *hcd)
 	INIT_LIST_HEAD(&oxu->urb_list);
 	oxu->urb_len = 0;
 
-	/* FIMXE */
-	hcd->self.controller->dma_mask = NULL;
-
 	if (oxu->is_otg) {
 		oxu->caps = hcd->regs + OXU_OTG_CAP_OFFSET;
 		oxu->regs = hcd->regs + OXU_OTG_CAP_OFFSET + \
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 42668aeca57c..0c03ac6b0213 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -2411,12 +2411,6 @@ static int r8a66597_probe(struct platform_device *pdev)
 	if (usb_disabled())
 		return -ENODEV;
 
-	if (pdev->dev.dma_mask) {
-		ret = -EINVAL;
-		dev_err(&pdev->dev, "dma not supported\n");
-		goto clean_up;
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		ret = -ENODEV;
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 5b061e599948..72a34a1eb618 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1632,12 +1632,6 @@ sl811h_probe(struct platform_device *dev)
 	irq = ires->start;
 	irqflags = ires->flags & IRQF_TRIGGER_MASK;
 
-	/* refuse to confuse usbcore */
-	if (dev->dev.dma_mask) {
-		dev_dbg(&dev->dev, "no we won't dma\n");
-		return -EINVAL;
-	}
-
 	/* the chip may be wired for either kind of addressing */
 	addr = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	data = platform_get_resource(dev, IORESOURCE_MEM, 1);
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 400c40bc43a6..4efee34f154f 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -3077,8 +3077,6 @@ static int u132_probe(struct platform_device *pdev)
 	retval = ftdi_read_pcimem(pdev, roothub.a, &rh_a);
 	if (retval)
 		return retval;
-	if (pdev->dev.dma_mask)
-		return -EINVAL;
 
 	hcd = usb_create_hcd(&u132_hc_driver, &pdev->dev, dev_name(&pdev->dev));
 	if (!hcd) {
diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c
index 2103b1ed0f8f..0a201a73b196 100644
--- a/drivers/usb/host/uhci-grlib.c
+++ b/drivers/usb/host/uhci-grlib.c
@@ -63,7 +63,7 @@ static const struct hc_driver uhci_grlib_hc_driver = {
 
 	/* Generic hardware linkage */
 	.irq =			uhci_irq,
-	.flags =		HCD_MEMORY | HCD_USB11,
+	.flags =		HCD_MEMORY | HCD_DMA | HCD_USB11,
 
 	/* Basic lifecycle operations */
 	.reset =		uhci_grlib_init,
diff --git a/drivers/usb/host/uhci-pci.c b/drivers/usb/host/uhci-pci.c
index 0dd944277c99..0fa3d72bae26 100644
--- a/drivers/usb/host/uhci-pci.c
+++ b/drivers/usb/host/uhci-pci.c
@@ -261,7 +261,7 @@ static const struct hc_driver uhci_driver = {
 
 	/* Generic hardware linkage */
 	.irq =			uhci_irq,
-	.flags =		HCD_USB11,
+	.flags =		HCD_DMA | HCD_USB11,
 
 	/* Basic lifecycle operations */
 	.reset =		uhci_pci_init,
diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c
index 89700e26fb29..70dbd95c3f06 100644
--- a/drivers/usb/host/uhci-platform.c
+++ b/drivers/usb/host/uhci-platform.c
@@ -41,7 +41,7 @@ static const struct hc_driver uhci_platform_hc_driver = {
 
 	/* Generic hardware linkage */
 	.irq =			uhci_irq,
-	.flags =		HCD_MEMORY | HCD_USB11,
+	.flags =		HCD_MEMORY | HCD_DMA | HCD_USB11,
 
 	/* Basic lifecycle operations */
 	.reset =		uhci_platform_init,
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 03d1e552769b..e315c0158e90 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -5217,7 +5217,7 @@ static const struct hc_driver xhci_hc_driver = {
 	 * generic hardware linkage
 	 */
 	.irq =			xhci_irq,
-	.flags =		HCD_MEMORY | HCD_USB3 | HCD_SHARED,
+	.flags =		HCD_MEMORY | HCD_DMA | HCD_USB3 | HCD_SHARED,
 
 	/*
 	 * basic lifecycle operations
diff --git a/drivers/usb/isp1760/isp1760-core.c b/drivers/usb/isp1760/isp1760-core.c
index 55b94fd10331..fdeb4cf97cc5 100644
--- a/drivers/usb/isp1760/isp1760-core.c
+++ b/drivers/usb/isp1760/isp1760-core.c
@@ -120,9 +120,6 @@ int isp1760_register(struct resource *mem, int irq, unsigned long irqflags,
 	    (!IS_ENABLED(CONFIG_USB_ISP1761_UDC) || udc_disabled))
 		return -ENODEV;
 
-	/* prevent usb-core allocating DMA pages */
-	dev->dma_mask = NULL;
-
 	isp = devm_kzalloc(dev, sizeof(*isp), GFP_KERNEL);
 	if (!isp)
 		return -ENOMEM;
diff --git a/drivers/usb/isp1760/isp1760-if.c b/drivers/usb/isp1760/isp1760-if.c
index 241a00d75027..07cc82ff327c 100644
--- a/drivers/usb/isp1760/isp1760-if.c
+++ b/drivers/usb/isp1760/isp1760-if.c
@@ -139,7 +139,6 @@ static int isp1761_pci_probe(struct pci_dev *dev,
 
 	pci_set_master(dev);
 
-	dev->dev.dma_mask = NULL;
 	ret = isp1760_register(&dev->resource[3], dev->irq, 0, &dev->dev,
 			       devflags);
 	if (ret < 0)
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index eb308ec35c66..5a44b70372d9 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2689,7 +2689,7 @@ static const struct hc_driver musb_hc_driver = {
 	.description		= "musb-hcd",
 	.product_desc		= "MUSB HDRC host driver",
 	.hcd_priv_size		= sizeof(struct musb *),
-	.flags			= HCD_USB2 | HCD_MEMORY,
+	.flags			= HCD_USB2 | HCD_DMA | HCD_MEMORY,
 
 	/* not using irq handler or reset hooks from usbcore, since
 	 * those must be shared with peripheral code for OTG configs
diff --git a/drivers/usb/renesas_usbhs/mod_host.c b/drivers/usb/renesas_usbhs/mod_host.c
index ddd3be48f948..ae54221011c3 100644
--- a/drivers/usb/renesas_usbhs/mod_host.c
+++ b/drivers/usb/renesas_usbhs/mod_host.c
@@ -1283,7 +1283,7 @@ static const struct hc_driver usbhsh_driver = {
 	/*
 	 * generic hardware linkage
 	 */
-	.flags =		HCD_USB2,
+	.flags =		HCD_DMA | HCD_USB2,
 
 	.start =		usbhsh_host_start,
 	.stop =			usbhsh_host_stop,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index b548c530f988..e656e7b4b1e4 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -426,7 +426,6 @@ struct usb_bus {
 	struct device *sysdev;		/* as seen from firmware or bus */
 	int busnum;			/* Bus number (in order of reg) */
 	const char *bus_name;		/* stable id (PCI slot_name etc) */
-	u8 uses_dma;			/* Does the host controller use DMA? */
 	u8 uses_pio_for_control;	/*
 					 * Does the host controller use PIO
 					 * for control transfers?
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index a20e7815d814..8d3869c7de85 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -256,6 +256,7 @@ struct hc_driver {
 
 	int	flags;
 #define	HCD_MEMORY	0x0001		/* HC regs use memory (else I/O) */
+#define	HCD_DMA		0x0002		/* HC uses DMA */
 #define	HCD_SHARED	0x0004		/* Two (or more) usb_hcds share HW */
 #define	HCD_USB11	0x0010		/* USB 1.1 */
 #define	HCD_USB2	0x0020		/* USB 2.0 */
@@ -422,8 +423,10 @@ static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd,
 	return hcd->high_prio_bh.completing_ep == ep;
 }
 
-#define hcd_uses_dma(hcd) \
-	(IS_ENABLED(CONFIG_HAS_DMA) && (hcd)->self.uses_dma)
+static inline bool hcd_uses_dma(struct usb_hcd *hcd)
+{
+	return IS_ENABLED(CONFIG_HAS_DMA) && (hcd->driver->flags & HCD_DMA);
+}
 
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
 extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
-- 
cgit v1.2.3


From bd5defaee872da9b81e3c72045eb6794445cd2e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Aug 2019 08:24:34 +0200
Subject: dma-mapping: remove is_device_dma_capable

No users left.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20190816062435.881-6-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dma-mapping.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea32c78..14702e2d6fa8 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -149,11 +149,6 @@ static inline int valid_dma_direction(int dma_direction)
 		(dma_direction == DMA_FROM_DEVICE));
 }
 
-static inline int is_device_dma_capable(struct device *dev)
-{
-	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
-}
-
 #ifdef CONFIG_DMA_DECLARE_COHERENT
 /*
  * These three functions are only for dma allocator.
-- 
cgit v1.2.3


From fd7dbf035edcfb035977423e2a5102832c1427f4 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 19 Aug 2019 14:17:01 +0300
Subject: RDMA/odp: Make it clearer when a umem is an implicit ODP umem

Implicit ODP umems are special, they don't have any page lists, they don't
exist in the interval tree and they are never DMA mapped.

Instead of trying to guess this based on a zero length use an explicit
flag.

Further, do not allow non-implicit umems to be 0 size.

Link: https://lore.kernel.org/r/20190819111710.18440-4-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c | 54 +++++++++++++++++++++-----------------
 drivers/infiniband/hw/mlx5/mr.c    |  2 +-
 drivers/infiniband/hw/mlx5/odp.c   |  2 +-
 include/rdma/ib_umem_odp.h         |  8 ++++++
 4 files changed, 40 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 77adf405e23c..7300d0a10d1e 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -176,18 +176,15 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
 	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
 
 	down_write(&per_mm->umem_rwsem);
-	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp))) {
-		/*
-		 * Note that the representation of the intervals in the
-		 * interval tree considers the ending point as contained in
-		 * the interval, while the function ib_umem_end returns the
-		 * first address which is not contained in the umem.
-		 */
-		umem_odp->interval_tree.start = ib_umem_start(umem_odp);
-		umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
-		interval_tree_insert(&umem_odp->interval_tree,
-				     &per_mm->umem_tree);
-	}
+	/*
+	 * Note that the representation of the intervals in the interval tree
+	 * considers the ending point as contained in the interval, while the
+	 * function ib_umem_end returns the first address which is not
+	 * contained in the umem.
+	 */
+	umem_odp->interval_tree.start = ib_umem_start(umem_odp);
+	umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
+	interval_tree_insert(&umem_odp->interval_tree, &per_mm->umem_tree);
 	up_write(&per_mm->umem_rwsem);
 }
 
@@ -196,11 +193,8 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
 	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
 
 	down_write(&per_mm->umem_rwsem);
-	if (likely(ib_umem_start(umem_odp) != ib_umem_end(umem_odp)))
-		interval_tree_remove(&umem_odp->interval_tree,
-				     &per_mm->umem_tree);
+	interval_tree_remove(&umem_odp->interval_tree, &per_mm->umem_tree);
 	complete_all(&umem_odp->notifier_completion);
-
 	up_write(&per_mm->umem_rwsem);
 }
 
@@ -320,6 +314,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 	int pages = size >> PAGE_SHIFT;
 	int ret;
 
+	if (!size)
+		return ERR_PTR(-EINVAL);
+
 	odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
 	if (!odp_data)
 		return ERR_PTR(-ENOMEM);
@@ -381,6 +378,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 	struct mm_struct *mm = umem->owning_mm;
 	int ret_val;
 
+	if (umem_odp->umem.address == 0 && umem_odp->umem.length == 0)
+		umem_odp->is_implicit_odp = 1;
+
 	umem_odp->page_shift = PAGE_SHIFT;
 	if (access & IB_ACCESS_HUGETLB) {
 		struct vm_area_struct *vma;
@@ -401,7 +401,10 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 
 	init_completion(&umem_odp->notifier_completion);
 
-	if (ib_umem_odp_num_pages(umem_odp)) {
+	if (!umem_odp->is_implicit_odp) {
+		if (!ib_umem_odp_num_pages(umem_odp))
+			return -EINVAL;
+
 		umem_odp->page_list =
 			vzalloc(array_size(sizeof(*umem_odp->page_list),
 					   ib_umem_odp_num_pages(umem_odp)));
@@ -420,7 +423,9 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 	ret_val = get_per_mm(umem_odp);
 	if (ret_val)
 		goto out_dma_list;
-	add_umem_to_per_mm(umem_odp);
+
+	if (!umem_odp->is_implicit_odp)
+		add_umem_to_per_mm(umem_odp);
 
 	return 0;
 
@@ -439,13 +444,14 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 	 * It is the driver's responsibility to ensure, before calling us,
 	 * that the hardware will not attempt to access the MR any more.
 	 */
-	ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
-				    ib_umem_end(umem_odp));
-
-	remove_umem_from_per_mm(umem_odp);
+	if (!umem_odp->is_implicit_odp) {
+		ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+					    ib_umem_end(umem_odp));
+		remove_umem_from_per_mm(umem_odp);
+		vfree(umem_odp->dma_list);
+		vfree(umem_odp->page_list);
+	}
 	put_per_mm(umem_odp);
-	vfree(umem_odp->dma_list);
-	vfree(umem_odp->page_list);
 }
 
 /*
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index b74fad08412f..ba2ec495b6e3 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1600,7 +1600,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 		/* Wait for all running page-fault handlers to finish. */
 		synchronize_srcu(&dev->mr_srcu);
 		/* Destroy all page mappings */
-		if (umem_odp->page_list)
+		if (!umem_odp->is_implicit_odp)
 			mlx5_ib_invalidate_range(umem_odp,
 						 ib_umem_start(umem_odp),
 						 ib_umem_end(umem_odp));
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 82b716a28ec1..80c07d85b966 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -584,7 +584,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 	struct ib_umem_odp *odp;
 	size_t size;
 
-	if (!odp_mr->page_list) {
+	if (odp_mr->is_implicit_odp) {
 		odp = implicit_mr_get_data(mr, io_virt, bcnt);
 
 		if (IS_ERR(odp))
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 030d5cbad02c..14b38b4459c5 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -69,6 +69,14 @@ struct ib_umem_odp {
 	/* Tree tracking */
 	struct interval_tree_node interval_tree;
 
+	/*
+	 * An implicit odp umem cannot be DMA mapped, has 0 length, and serves
+	 * only as an anchor for the driver to hold onto the per_mm. FIXME:
+	 * This should be removed and drivers should work with the per_mm
+	 * directly.
+	 */
+	bool is_implicit_odp;
+
 	struct completion	notifier_completion;
 	int			dying;
 	unsigned int		page_shift;
-- 
cgit v1.2.3


From f20bef6a951b6ef619655ed846113f706d0824d7 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 19 Aug 2019 14:17:03 +0300
Subject: RDMA/odp: Make the three ways to create a umem_odp clear

The three paths to build the umem_odps are kind of muddled, they are:
- As a normal ib_mr umem
- As a child in an implicit ODP umem tree
- As the root of an implicit ODP umem tree

Only the first two are actually umem's, the last is an abuse.

The implicit case can only be triggered by explicit driver request, it
should never be co-mingled with the normal case. While we are here, make
sensible function names and add some comments to make this clearer.

Link: https://lore.kernel.org/r/20190819111710.18440-6-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c | 80 +++++++++++++++++++++++++++++++++++---
 drivers/infiniband/hw/mlx5/odp.c   | 23 ++++++-----
 include/rdma/ib_umem_odp.h         |  6 ++-
 3 files changed, 89 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 1643ff374b58..198c0ce04998 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -46,6 +46,8 @@
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem_odp.h>
 
+#include "uverbs.h"
+
 static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
 {
 	mutex_lock(&umem_odp->umem_mutex);
@@ -344,8 +346,67 @@ out_page_list:
 	return ret;
 }
 
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
-				      unsigned long addr, size_t size)
+/**
+ * ib_umem_odp_alloc_implicit - Allocate a parent implicit ODP umem
+ *
+ * Implicit ODP umems do not have a VA range and do not have any page lists.
+ * They exist only to hold the per_mm reference to help the driver create
+ * children umems.
+ *
+ * @udata: udata from the syscall being used to create the umem
+ * @access: ib_reg_mr access flags
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+					       int access)
+{
+	struct ib_ucontext *context =
+		container_of(udata, struct uverbs_attr_bundle, driver_udata)
+			->context;
+	struct ib_umem *umem;
+	struct ib_umem_odp *umem_odp;
+	int ret;
+
+	if (access & IB_ACCESS_HUGETLB)
+		return ERR_PTR(-EINVAL);
+
+	if (!context)
+		return ERR_PTR(-EIO);
+	if (WARN_ON_ONCE(!context->invalidate_range))
+		return ERR_PTR(-EINVAL);
+
+	umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
+	if (!umem_odp)
+		return ERR_PTR(-ENOMEM);
+	umem = &umem_odp->umem;
+	umem->context = context;
+	umem->writable = ib_access_writable(access);
+	umem->owning_mm = current->mm;
+	umem_odp->is_implicit_odp = 1;
+	umem_odp->page_shift = PAGE_SHIFT;
+
+	ret = ib_init_umem_odp(umem_odp, NULL);
+	if (ret) {
+		kfree(umem_odp);
+		return ERR_PTR(ret);
+	}
+
+	mmgrab(umem->owning_mm);
+
+	return umem_odp;
+}
+EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
+
+/**
+ * ib_umem_odp_alloc_child - Allocate a child ODP umem under an implicit
+ *                           parent ODP umem
+ *
+ * @root: The parent umem enclosing the child. This must be allocated using
+ *        ib_alloc_implicit_odp_umem()
+ * @addr: The starting userspace VA
+ * @size: The length of the userspace VA
+ */
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
+					    unsigned long addr, size_t size)
 {
 	/*
 	 * Caller must ensure that root cannot be freed during the call to
@@ -355,6 +416,9 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 	struct ib_umem *umem;
 	int ret;
 
+	if (WARN_ON(!root->is_implicit_odp))
+		return ERR_PTR(-EINVAL);
+
 	odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
 	if (!odp_data)
 		return ERR_PTR(-ENOMEM);
@@ -376,8 +440,15 @@ struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root,
 
 	return odp_data;
 }
-EXPORT_SYMBOL(ib_alloc_odp_umem);
+EXPORT_SYMBOL(ib_umem_odp_alloc_child);
 
+/**
+ * ib_umem_odp_get - Complete ib_umem_get()
+ *
+ * @umem_odp: The partially configured umem from ib_umem_get()
+ * @addr: The starting userspace VA
+ * @access: ib_reg_mr access flags
+ */
 int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 {
 	/*
@@ -386,9 +457,6 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 	 */
 	struct mm_struct *mm = umem_odp->umem.owning_mm;
 
-	if (umem_odp->umem.address == 0 && umem_odp->umem.length == 0)
-		umem_odp->is_implicit_odp = 1;
-
 	umem_odp->page_shift = PAGE_SHIFT;
 	if (access & IB_ACCESS_HUGETLB) {
 		struct vm_area_struct *vma;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 80c07d85b966..ad209ae44f05 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -384,7 +384,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
 }
 
 static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
-					    struct ib_umem *umem,
+					    struct ib_umem_odp *umem_odp,
 					    bool ksm, int access_flags)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -402,7 +402,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
 	mr->dev = dev;
 	mr->access_flags = access_flags;
 	mr->mmkey.iova = 0;
-	mr->umem = umem;
+	mr->umem = &umem_odp->umem;
 
 	if (ksm) {
 		err = mlx5_ib_update_xlt(mr, 0,
@@ -462,14 +462,13 @@ next_mr:
 		if (nentries)
 			nentries++;
 	} else {
-		odp = ib_alloc_odp_umem(odp_mr, addr,
-					MLX5_IMR_MTT_SIZE);
+		odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
 		if (IS_ERR(odp)) {
 			mutex_unlock(&odp_mr->umem_mutex);
 			return ERR_CAST(odp);
 		}
 
-		mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+		mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
 					mr->access_flags);
 		if (IS_ERR(mtt)) {
 			mutex_unlock(&odp_mr->umem_mutex);
@@ -519,19 +518,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 					     int access_flags)
 {
 	struct mlx5_ib_mr *imr;
-	struct ib_umem *umem;
+	struct ib_umem_odp *umem_odp;
 
-	umem = ib_umem_get(udata, 0, 0, access_flags, 0);
-	if (IS_ERR(umem))
-		return ERR_CAST(umem);
+	umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags);
+	if (IS_ERR(umem_odp))
+		return ERR_CAST(umem_odp);
 
-	imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+	imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags);
 	if (IS_ERR(imr)) {
-		ib_umem_release(umem);
+		ib_umem_release(&umem_odp->umem);
 		return ERR_CAST(imr);
 	}
 
-	imr->umem = umem;
+	imr->umem = &umem_odp->umem;
 	init_waitqueue_head(&imr->q_leaf_free);
 	atomic_set(&imr->num_leaf_free, 0);
 	atomic_set(&imr->num_pending_prefetch, 0);
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 14b38b4459c5..219fe7015e7d 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -140,8 +140,10 @@ struct ib_ucontext_per_mm {
 };
 
 int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
-struct ib_umem_odp *ib_alloc_odp_umem(struct ib_umem_odp *root_umem,
-				      unsigned long addr, size_t size);
+struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
+					       int access);
+struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
+					    unsigned long addr, size_t size);
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp);
 
 int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset,
-- 
cgit v1.2.3


From 261dc53f8ee037bf2fbf68f90c319b04062a126c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 19 Aug 2019 14:17:04 +0300
Subject: RDMA/odp: Split creating a umem_odp from ib_umem_get

This is the last creation API that is overloaded for both, there is very
little code sharing and a driver has to be specifically ready for a
umem_odp to be created to use the odp version.

Link: https://lore.kernel.org/r/20190819111710.18440-7-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem.c     | 30 +++--------------
 drivers/infiniband/core/umem_odp.c | 67 ++++++++++++++++++++++++++++----------
 drivers/infiniband/hw/mlx5/mem.c   | 13 --------
 drivers/infiniband/hw/mlx5/mr.c    | 34 ++++++++++++++-----
 include/rdma/ib_umem_odp.h         |  9 +++--
 5 files changed, 86 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 56553668256f..9a39c45cd1e6 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -184,9 +184,6 @@ EXPORT_SYMBOL(ib_umem_find_best_pgsz);
 /**
  * ib_umem_get - Pin and DMA map userspace memory.
  *
- * If access flags indicate ODP memory, avoid pinning. Instead, stores
- * the mm for future page fault handling in conjunction with MMU notifiers.
- *
  * @udata: userspace context to pin memory for
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
@@ -231,17 +228,12 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	if (!can_do_mlock())
 		return ERR_PTR(-EPERM);
 
-	if (access & IB_ACCESS_ON_DEMAND) {
-		umem = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
-		if (!umem)
-			return ERR_PTR(-ENOMEM);
-		umem->is_odp = 1;
-	} else {
-		umem = kzalloc(sizeof(*umem), GFP_KERNEL);
-		if (!umem)
-			return ERR_PTR(-ENOMEM);
-	}
+	if (access & IB_ACCESS_ON_DEMAND)
+		return ERR_PTR(-EOPNOTSUPP);
 
+	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+	if (!umem)
+		return ERR_PTR(-ENOMEM);
 	umem->context    = context;
 	umem->length     = size;
 	umem->address    = addr;
@@ -249,18 +241,6 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	umem->owning_mm = mm = current->mm;
 	mmgrab(mm);
 
-	if (access & IB_ACCESS_ON_DEMAND) {
-		if (WARN_ON_ONCE(!context->invalidate_range)) {
-			ret = -EINVAL;
-			goto umem_kfree;
-		}
-
-		ret = ib_umem_odp_get(to_ib_umem_odp(umem), access);
-		if (ret)
-			goto umem_kfree;
-		return umem;
-	}
-
 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 	if (!page_list) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 198c0ce04998..6a88bd0fcb33 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -335,6 +335,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 				     &per_mm->umem_tree);
 		up_write(&per_mm->umem_rwsem);
 	}
+	mmgrab(umem_odp->umem.owning_mm);
 
 	return 0;
 
@@ -389,9 +390,6 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
 		kfree(umem_odp);
 		return ERR_PTR(ret);
 	}
-
-	mmgrab(umem->owning_mm);
-
 	return umem_odp;
 }
 EXPORT_SYMBOL(ib_umem_odp_alloc_implicit);
@@ -435,27 +433,51 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
 		kfree(odp_data);
 		return ERR_PTR(ret);
 	}
-
-	mmgrab(umem->owning_mm);
-
 	return odp_data;
 }
 EXPORT_SYMBOL(ib_umem_odp_alloc_child);
 
 /**
- * ib_umem_odp_get - Complete ib_umem_get()
+ * ib_umem_odp_get - Create a umem_odp for a userspace va
  *
- * @umem_odp: The partially configured umem from ib_umem_get()
- * @addr: The starting userspace VA
- * @access: ib_reg_mr access flags
+ * @udata: userspace context to pin memory for
+ * @addr: userspace virtual address to start at
+ * @size: length of region to pin
+ * @access: IB_ACCESS_xxx flags for memory being pinned
+ *
+ * The driver should use when the access flags indicate ODP memory. It avoids
+ * pinning, instead, stores the mm for future page fault handling in
+ * conjunction with MMU notifiers.
  */
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+				    size_t size, int access)
 {
-	/*
-	 * NOTE: This must called in a process context where umem->owning_mm
-	 * == current->mm
-	 */
-	struct mm_struct *mm = umem_odp->umem.owning_mm;
+	struct ib_umem_odp *umem_odp;
+	struct ib_ucontext *context;
+	struct mm_struct *mm;
+	int ret;
+
+	if (!udata)
+		return ERR_PTR(-EIO);
+
+	context = container_of(udata, struct uverbs_attr_bundle, driver_udata)
+			  ->context;
+	if (!context)
+		return ERR_PTR(-EIO);
+
+	if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) ||
+	    WARN_ON_ONCE(!context->invalidate_range))
+		return ERR_PTR(-EINVAL);
+
+	umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
+	if (!umem_odp)
+		return ERR_PTR(-ENOMEM);
+
+	umem_odp->umem.context = context;
+	umem_odp->umem.length = size;
+	umem_odp->umem.address = addr;
+	umem_odp->umem.writable = ib_access_writable(access);
+	umem_odp->umem.owning_mm = mm = current->mm;
 
 	umem_odp->page_shift = PAGE_SHIFT;
 	if (access & IB_ACCESS_HUGETLB) {
@@ -466,15 +488,24 @@ int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
 		vma = find_vma(mm, ib_umem_start(umem_odp));
 		if (!vma || !is_vm_hugetlb_page(vma)) {
 			up_read(&mm->mmap_sem);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_free;
 		}
 		h = hstate_vma(vma);
 		umem_odp->page_shift = huge_page_shift(h);
 		up_read(&mm->mmap_sem);
 	}
 
-	return ib_init_umem_odp(umem_odp, NULL);
+	ret = ib_init_umem_odp(umem_odp, NULL);
+	if (ret)
+		goto err_free;
+	return umem_odp;
+
+err_free:
+	kfree(umem_odp);
+	return ERR_PTR(ret);
 }
+EXPORT_SYMBOL(ib_umem_odp_get);
 
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index a40e0abf2338..b5aece786b36 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -56,19 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
 	struct scatterlist *sg;
 	int entry;
 
-	if (umem->is_odp) {
-		struct ib_umem_odp *odp = to_ib_umem_odp(umem);
-		unsigned int page_shift = odp->page_shift;
-
-		*ncont = ib_umem_odp_num_pages(odp);
-		*count = *ncont << (page_shift - PAGE_SHIFT);
-		*shift = page_shift;
-		if (order)
-			*order = ilog2(roundup_pow_of_two(*ncont));
-
-		return;
-	}
-
 	addr = addr >> PAGE_SHIFT;
 	tmp = (unsigned long)addr;
 	m = find_first_bit(&tmp, BITS_PER_LONG);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ba2ec495b6e3..fc1106ddc3f6 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -784,19 +784,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 		       int *ncont, int *order)
 {
 	struct ib_umem *u;
-	int err;
 
 	*umem = NULL;
 
-	u = ib_umem_get(udata, start, length, access_flags, 0);
-	err = PTR_ERR_OR_ZERO(u);
-	if (err) {
-		mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
-		return err;
+	if (access_flags & IB_ACCESS_ON_DEMAND) {
+		struct ib_umem_odp *odp;
+
+		odp = ib_umem_odp_get(udata, start, length, access_flags);
+		if (IS_ERR(odp)) {
+			mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
+				    PTR_ERR(odp));
+			return PTR_ERR(odp);
+		}
+
+		u = &odp->umem;
+
+		*page_shift = odp->page_shift;
+		*ncont = ib_umem_odp_num_pages(odp);
+		*npages = *ncont << (*page_shift - PAGE_SHIFT);
+		if (order)
+			*order = ilog2(roundup_pow_of_two(*ncont));
+	} else {
+		u = ib_umem_get(udata, start, length, access_flags, 0);
+		if (IS_ERR(u)) {
+			mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
+			return PTR_ERR(u);
+		}
+
+		mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+				   page_shift, ncont, order);
 	}
 
-	mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
-			   page_shift, ncont, order);
 	if (!*npages) {
 		mlx5_ib_warn(dev, "avoid zero region\n");
 		ib_umem_release(u);
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 219fe7015e7d..5efb67f97b0a 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -139,7 +139,8 @@ struct ib_ucontext_per_mm {
 	struct rcu_head rcu;
 };
 
-int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access);
+struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
+				    size_t size, int access);
 struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
 					       int access);
 struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem,
@@ -199,9 +200,11 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
-static inline int ib_umem_odp_get(struct ib_umem_odp *umem_odp, int access)
+static inline struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata,
+						  unsigned long addr,
+						  size_t size, int access)
 {
-	return -EINVAL;
+	return ERR_PTR(-EINVAL);
 }
 
 static inline void ib_umem_odp_release(struct ib_umem_odp *umem_odp) {}
-- 
cgit v1.2.3


From 204e3e5630c5d41948fc11d8419c07da8f3e5a4d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Mon, 19 Aug 2019 14:17:06 +0300
Subject: RDMA/odp: Check for overflow when computing the umem_odp end

Since the page size can be extended in the ODP case by IB_ACCESS_HUGETLB
the existing overflow checks done by ib_umem_get() are not
sufficient. Check for overflow again.

Further, remove the unchecked math from the inlines and just use the
precomputed value stored in the interval_tree_node.

Link: https://lore.kernel.org/r/20190819111710.18440-9-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c | 25 +++++++++++++++++++------
 include/rdma/ib_umem_odp.h         |  5 ++---
 2 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 3d4bbafa441c..d0ef7d86213e 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -287,19 +287,32 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 
 	umem_odp->umem.is_odp = 1;
 	if (!umem_odp->is_implicit_odp) {
-		size_t pages = ib_umem_odp_num_pages(umem_odp);
-
+		size_t page_size = 1UL << umem_odp->page_shift;
+		size_t pages;
+
+		umem_odp->interval_tree.start =
+			ALIGN_DOWN(umem_odp->umem.address, page_size);
+		if (check_add_overflow(umem_odp->umem.address,
+				       umem_odp->umem.length,
+				       &umem_odp->interval_tree.last))
+			return -EOVERFLOW;
+		umem_odp->interval_tree.last =
+			ALIGN(umem_odp->interval_tree.last, page_size);
+		if (unlikely(umem_odp->interval_tree.last < page_size))
+			return -EOVERFLOW;
+
+		pages = (umem_odp->interval_tree.last -
+			 umem_odp->interval_tree.start) >>
+			umem_odp->page_shift;
 		if (!pages)
 			return -EINVAL;
 
 		/*
 		 * Note that the representation of the intervals in the
 		 * interval tree considers the ending point as contained in
-		 * the interval, while the function ib_umem_end returns the
-		 * first address which is not contained in the umem.
+		 * the interval.
 		 */
-		umem_odp->interval_tree.start = ib_umem_start(umem_odp);
-		umem_odp->interval_tree.last = ib_umem_end(umem_odp) - 1;
+		umem_odp->interval_tree.last--;
 
 		umem_odp->page_list = vzalloc(
 			array_size(sizeof(*umem_odp->page_list), pages));
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index 5efb67f97b0a..b37c674b7fe6 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -91,14 +91,13 @@ static inline struct ib_umem_odp *to_ib_umem_odp(struct ib_umem *umem)
 /* Returns the first page of an ODP umem. */
 static inline unsigned long ib_umem_start(struct ib_umem_odp *umem_odp)
 {
-	return ALIGN_DOWN(umem_odp->umem.address, 1UL << umem_odp->page_shift);
+	return umem_odp->interval_tree.start;
 }
 
 /* Returns the address of the page after the last one of an ODP umem. */
 static inline unsigned long ib_umem_end(struct ib_umem_odp *umem_odp)
 {
-	return ALIGN(umem_odp->umem.address + umem_odp->umem.length,
-		     1UL << umem_odp->page_shift);
+	return umem_odp->interval_tree.last + 1;
 }
 
 static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
-- 
cgit v1.2.3


From ce51346feede2ea41de0ad58af2b514223e11dad Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Mon, 19 Aug 2019 14:17:08 +0300
Subject: RDMA/core: Make invalidate_range a device operation

The callback function 'invalidate_range' is implemented in a driver so the
place for it is in the ib_device_ops structure and not in ib_ucontext.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Link: https://lore.kernel.org/r/20190819111710.18440-11-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/device.c     |  1 +
 drivers/infiniband/core/umem_odp.c   | 10 +++++-----
 drivers/infiniband/core/uverbs_cmd.c |  2 --
 drivers/infiniband/hw/mlx5/main.c    |  4 ----
 drivers/infiniband/hw/mlx5/odp.c     |  1 +
 include/rdma/ib_verbs.h              |  4 ++--
 6 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ea8661a00651..b5631b8a0397 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2562,6 +2562,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 	SET_DEVICE_OP(dev_ops, get_vf_config);
 	SET_DEVICE_OP(dev_ops, get_vf_stats);
 	SET_DEVICE_OP(dev_ops, init_port);
+	SET_DEVICE_OP(dev_ops, invalidate_range);
 	SET_DEVICE_OP(dev_ops, iw_accept);
 	SET_DEVICE_OP(dev_ops, iw_add_ref);
 	SET_DEVICE_OP(dev_ops, iw_connect);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 72765b110f4b..32fb0b579dec 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -96,7 +96,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 		 */
 		ib_umem_notifier_start_account(umem_odp);
 		complete_all(&umem_odp->notifier_completion);
-		umem_odp->umem.context->invalidate_range(
+		umem_odp->umem.context->device->ops.invalidate_range(
 			umem_odp, ib_umem_start(umem_odp),
 			ib_umem_end(umem_odp));
 	}
@@ -109,7 +109,7 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
 					     u64 start, u64 end, void *cookie)
 {
 	ib_umem_notifier_start_account(item);
-	item->umem.context->invalidate_range(item, start, end);
+	item->umem.context->device->ops.invalidate_range(item, start, end);
 	return 0;
 }
 
@@ -385,7 +385,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
 
 	if (!context)
 		return ERR_PTR(-EIO);
-	if (WARN_ON_ONCE(!context->invalidate_range))
+	if (WARN_ON_ONCE(!context->device->ops.invalidate_range))
 		return ERR_PTR(-EINVAL);
 
 	umem_odp = kzalloc(sizeof(*umem_odp), GFP_KERNEL);
@@ -479,7 +479,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
 		return ERR_PTR(-EIO);
 
 	if (WARN_ON_ONCE(!(access & IB_ACCESS_ON_DEMAND)) ||
-	    WARN_ON_ONCE(!context->invalidate_range))
+	    WARN_ON_ONCE(!context->device->ops.invalidate_range))
 		return ERR_PTR(-EINVAL);
 
 	umem_odp = kzalloc(sizeof(struct ib_umem_odp), GFP_KERNEL);
@@ -607,7 +607,7 @@ out:
 
 	if (remove_existing_mapping) {
 		ib_umem_notifier_start_account(umem_odp);
-		context->invalidate_range(
+		dev->ops.invalidate_range(
 			umem_odp,
 			ib_umem_start(umem_odp) +
 				(page_index << umem_odp->page_shift),
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 7ddd0e5bc6b3..8f4fd4fac159 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -275,8 +275,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	ret = ib_dev->ops.alloc_ucontext(ucontext, &attrs->driver_udata);
 	if (ret)
 		goto err_file;
-	if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
-		ucontext->invalidate_range = NULL;
 
 	rdma_restrack_uadd(&ucontext->res);
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e12a4404096b..7d5c2763b691 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 	if (err)
 		goto out_sys_pages;
 
-	if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING)
-		context->ibucontext.invalidate_range =
-			&mlx5_ib_invalidate_range;
-
 	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
 		err = mlx5_ib_devx_create(dev, true);
 		if (err < 0)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index a660dc2b21f4..6d940d8f5247 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1612,6 +1612,7 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
 
 static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
 	.advise_mr = mlx5_ib_advise_mr,
+	.invalidate_range = mlx5_ib_invalidate_range,
 };
 
 int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4f225175cb91..c2b39dda44cc 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1417,8 +1417,6 @@ struct ib_ucontext {
 
 	bool cleanup_retryable;
 
-	void (*invalidate_range)(struct ib_umem_odp *umem_odp,
-				 unsigned long start, unsigned long end);
 	struct mutex per_mm_list_lock;
 	struct list_head per_mm_list;
 
@@ -2378,6 +2376,8 @@ struct ib_device_ops {
 			    u64 iova);
 	int (*unmap_fmr)(struct list_head *fmr_list);
 	int (*dealloc_fmr)(struct ib_fmr *fmr);
+	void (*invalidate_range)(struct ib_umem_odp *umem_odp,
+				 unsigned long start, unsigned long end);
 	int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
 	int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid);
 	struct ib_xrcd *(*alloc_xrcd)(struct ib_device *device,
-- 
cgit v1.2.3


From 8050a395112db5bedb7caa6cb673e711a3c04cd9 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Wed, 21 Aug 2019 00:08:58 +0100
Subject: bpf: fix 'struct pt_reg' typo in documentation

There is no 'struct pt_reg'.

Signed-off-by: Peter Wu <peter@lekensteyn.nl>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8aa6126f0b6e..267544e140be 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1018,7 +1018,7 @@ union bpf_attr {
  * 		The realm of the route for the packet associated to *skb*, or 0
  * 		if none was found.
  *
- * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * int bpf_perf_event_output(struct pt_regs *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
  * 	Description
  * 		Write raw *data* blob into a special BPF perf event held by
  * 		*map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1080,7 +1080,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * int bpf_get_stackid(struct pt_regs *ctx, struct bpf_map *map, u64 flags)
  * 	Description
  * 		Walk a user or a kernel stack and return its id. To achieve
  * 		this, the helper needs *ctx*, which is a pointer to the context
@@ -1729,7 +1729,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
  * 	Description
  * 		Used for error injection, this helper uses kprobes to override
  * 		the return value of the probed function, and to set it to *rc*.
-- 
cgit v1.2.3


From 55c33dfbeb831eb3ab7cc1a3e295b0d4d57f23a3 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Wed, 21 Aug 2019 00:08:59 +0100
Subject: bpf: clarify when bpf_trace_printk discards lines

I opened /sys/kernel/tracing/trace once and kept reading from it.
bpf_trace_printk somehow did not seem to work, no entries were appended
to that trace file. It turns out that tracing is disabled when that file
is open. Save the next person some time and document this.

The trace file is described in Documentation/trace/ftrace.rst, however
the implication "tracing is disabled" did not immediate translate to
"bpf_trace_printk silently discards entries".

Signed-off-by: Peter Wu <peter@lekensteyn.nl>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 267544e140be..b5889257cc33 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -580,6 +580,8 @@ union bpf_attr {
  * 		limited to five).
  *
  * 		Each time the helper is called, it appends a line to the trace.
+ * 		Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * 		open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
  * 		The format of the trace is customizable, and the exact output
  * 		one will get depends on the options set in
  * 		*\/sys/kernel/debug/tracing/trace_options* (see also the
-- 
cgit v1.2.3


From d1dec5ca5fb87d64d1e7405bd6006728915ea5c6 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 20 Aug 2019 00:56:02 +0900
Subject: ARM: s3c64xx: squash samsung_usb_phy.h into setup-usb-phy.c

This is only used by arch/arm/mach-s3c64xx/setup-usb-phy.c

$ git grep samsung_usb_phy_type
include/linux/usb/samsung_usb_phy.h:enum samsung_usb_phy_type {
$ git grep USB_PHY_TYPE_DEVICE
arch/arm/mach-s3c64xx/setup-usb-phy.c:  if (type == USB_PHY_TYPE_DEVICE)
arch/arm/mach-s3c64xx/setup-usb-phy.c:  if (type == USB_PHY_TYPE_DEVICE)
include/linux/usb/samsung_usb_phy.h:    USB_PHY_TYPE_DEVICE,
$ git grep USB_PHY_TYPE_HOST
include/linux/usb/samsung_usb_phy.h:    USB_PHY_TYPE_HOST,

Actually, 'enum samsung_usb_phy_type' is unused; the 'type' parameter
has 'int' type. Anyway, there is no need to declare this enum in the
globally visible header. Squash the header.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
---
 arch/arm/mach-s3c64xx/setup-usb-phy.c        |  5 +++++
 arch/arm/plat-samsung/include/plat/usb-phy.h |  2 --
 include/linux/usb/samsung_usb_phy.h          | 17 -----------------
 3 files changed, 5 insertions(+), 19 deletions(-)
 delete mode 100644 include/linux/usb/samsung_usb_phy.h

(limited to 'include')

diff --git a/arch/arm/mach-s3c64xx/setup-usb-phy.c b/arch/arm/mach-s3c64xx/setup-usb-phy.c
index 46a9e955607f..6aaaa1d8e8b9 100644
--- a/arch/arm/mach-s3c64xx/setup-usb-phy.c
+++ b/arch/arm/mach-s3c64xx/setup-usb-phy.c
@@ -15,6 +15,11 @@
 #include "regs-sys.h"
 #include "regs-usb-hsotg-phy.h"
 
+enum samsung_usb_phy_type {
+	USB_PHY_TYPE_DEVICE,
+	USB_PHY_TYPE_HOST,
+};
+
 static int s3c_usb_otgphy_init(struct platform_device *pdev)
 {
 	struct clk *xusbxti;
diff --git a/arch/arm/plat-samsung/include/plat/usb-phy.h b/arch/arm/plat-samsung/include/plat/usb-phy.h
index 6d0c788beb9d..94da89ecbd3b 100644
--- a/arch/arm/plat-samsung/include/plat/usb-phy.h
+++ b/arch/arm/plat-samsung/include/plat/usb-phy.h
@@ -7,8 +7,6 @@
 #ifndef __PLAT_SAMSUNG_USB_PHY_H
 #define __PLAT_SAMSUNG_USB_PHY_H __FILE__
 
-#include <linux/usb/samsung_usb_phy.h>
-
 extern int s5p_usb_phy_init(struct platform_device *pdev, int type);
 extern int s5p_usb_phy_exit(struct platform_device *pdev, int type);
 
diff --git a/include/linux/usb/samsung_usb_phy.h b/include/linux/usb/samsung_usb_phy.h
deleted file mode 100644
index dc0071741695..000000000000
--- a/include/linux/usb/samsung_usb_phy.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- *		http://www.samsung.com/
- *
- * Defines phy types for samsung usb phy controllers - HOST or DEIVCE.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-enum samsung_usb_phy_type {
-	USB_PHY_TYPE_DEVICE,
-	USB_PHY_TYPE_HOST,
-};
-- 
cgit v1.2.3


From dce3e8fd039cc1b62760b3ad6822cf04c262cd0e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 19 Aug 2019 16:31:47 +0200
Subject: posix-cpu-timers: Remove tsk argument from run_posix_cpu_timers()

It's always current. Don't give people wrong ideas.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190819143801.945469967@linutronix.de
---
 include/linux/posix-timers.h   | 2 +-
 kernel/time/posix-cpu-timers.c | 5 +++--
 kernel/time/timer.c            | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 26c636d1485b..033374b99767 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -118,7 +118,7 @@ struct k_itimer {
 	struct rcu_head		rcu;
 };
 
-void run_posix_cpu_timers(struct task_struct *task);
+void run_posix_cpu_timers(void);
 void posix_cpu_timers_exit(struct task_struct *task);
 void posix_cpu_timers_exit_group(struct task_struct *task);
 void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 98223d2805c2..387e0e86e1b8 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1137,11 +1137,12 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
  * already updated our counts.  We need to check if any timers fire now.
  * Interrupts are disabled.
  */
-void run_posix_cpu_timers(struct task_struct *tsk)
+void run_posix_cpu_timers(void)
 {
-	LIST_HEAD(firing);
+	struct task_struct *tsk = current;
 	struct k_itimer *timer, *next;
 	unsigned long flags;
+	LIST_HEAD(firing);
 
 	lockdep_assert_irqs_disabled();
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 673c6a0f0c45..0e315a2e77ae 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1728,7 +1728,7 @@ void update_process_times(int user_tick)
 #endif
 	scheduler_tick();
 	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
-		run_posix_cpu_timers(p);
+		run_posix_cpu_timers();
 }
 
 /**
-- 
cgit v1.2.3


From aad12c2394189f606ce0308ab65505fdd9081a10 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Wed, 21 Aug 2019 14:29:29 +0300
Subject: trivial: netns: fix typo in 'struct net.passive' description

Replace 'decided' with 'decide' so that comment would be

/* To decide when the network namespace should be freed. */

Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index cb668bc2692d..ab40d7afdc54 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -52,7 +52,7 @@ struct bpf_prog;
 #define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 
 struct net {
-	refcount_t		passive;	/* To decided when the network
+	refcount_t		passive;	/* To decide when the network
 						 * namespace should be freed.
 						 */
 	refcount_t		count;		/* To decided when the network
-- 
cgit v1.2.3


From 9b9e9e5b461789668ce0271f968d8eb5c5d3a332 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:11 -0300
Subject: media: rc: add keymap for Amediatech X96-MAX remote

The X96-Max Android STB ships with a simple NEC remote. It includes
a TV section with preset buttons for controlling a TV. These are not
configurable, but are noted to aid visual recognition of the device.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile    |  1 +
 drivers/media/rc/keymaps/rc-x96max.c | 83 ++++++++++++++++++++++++++++++++++++
 include/media/rc-map.h               |  1 +
 3 files changed, 85 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-x96max.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 5b1399af6b3a..b88c4e76cdc6 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -117,4 +117,5 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-winfast-usbii-deluxe.o \
 			rc-su3000.o \
 			rc-xbox-dvd.o \
+			rc-x96max.o \
 			rc-zx-irdec.o
diff --git a/drivers/media/rc/keymaps/rc-x96max.c b/drivers/media/rc/keymaps/rc-x96max.c
new file mode 100644
index 000000000000..0998ec3320e4
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-x96max.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (C) 2019 Christian Hewitt <christianshewitt@gmail.com>
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+//
+// Keytable for the X96-max STB remote control
+//
+
+static struct rc_map_table x96max[] = {
+	{ 0x140, KEY_POWER },
+
+	// ** TV CONTROL **
+	// SET
+	// AV/TV
+	// POWER
+	// VOLUME UP
+	// VOLUME DOWN
+
+	{ 0x118, KEY_VOLUMEUP },
+	{ 0x110, KEY_VOLUMEDOWN },
+
+	{ 0x143, KEY_MUTE }, // config
+
+	{ 0x100, KEY_EPG }, // mouse
+	{ 0x119, KEY_BACK },
+
+	{ 0x116, KEY_UP },
+	{ 0x151, KEY_LEFT },
+	{ 0x150, KEY_RIGHT },
+	{ 0x11a, KEY_DOWN },
+	{ 0x113, KEY_OK },
+
+	{ 0x111, KEY_HOME },
+	{ 0x14c, KEY_CONTEXT_MENU },
+
+	{ 0x159, KEY_PREVIOUS },
+	{ 0x15a, KEY_PLAYPAUSE },
+	{ 0x158, KEY_NEXT },
+
+	{ 0x147, KEY_MENU }, // @ key
+	{ 0x101, KEY_NUMERIC_0 },
+	{ 0x142, KEY_BACKSPACE },
+
+	{ 0x14e, KEY_NUMERIC_1 },
+	{ 0x10d, KEY_NUMERIC_2 },
+	{ 0x10c, KEY_NUMERIC_3 },
+
+	{ 0x14a, KEY_NUMERIC_4 },
+	{ 0x109, KEY_NUMERIC_5 },
+	{ 0x108, KEY_NUMERIC_6 },
+
+	{ 0x146, KEY_NUMERIC_7 },
+	{ 0x105, KEY_NUMERIC_8 },
+	{ 0x104, KEY_NUMERIC_9 },
+};
+
+static struct rc_map_list x96max_map = {
+	.map = {
+		.scan     = x96max,
+		.size     = ARRAY_SIZE(x96max),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_X96MAX,
+	}
+};
+
+static int __init init_rc_map_x96max(void)
+{
+	return rc_map_register(&x96max_map);
+}
+
+static void __exit exit_rc_map_x96max(void)
+{
+	rc_map_unregister(&x96max_map);
+}
+
+module_init(init_rc_map_x96max)
+module_exit(exit_rc_map_x96max)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 32e49dcd1348..bff5a6b4cbc6 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -271,6 +271,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_WINFAST_USBII_DELUXE      "rc-winfast-usbii-deluxe"
 #define RC_MAP_SU3000                    "rc-su3000"
 #define RC_MAP_XBOX_DVD                  "rc-xbox-dvd"
+#define RC_MAP_X96MAX                    "rc-x96max"
 #define RC_MAP_ZX_IRDEC                  "rc-zx-irdec"
 
 /*
-- 
cgit v1.2.3


From a53dee7b70384840b13cbe92588fa68dce2c0b62 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:12 -0300
Subject: media: rc: add keymap for Khadas VIM/EDGE remote

Khadas VIM and Edge SBC devices use the same NEC remote device. The
remote includes a mouse button for Android use. This has been mapped
to KEY_MUTE.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile    |  1 +
 drivers/media/rc/keymaps/rc-khadas.c | 54 ++++++++++++++++++++++++++++++++++++
 include/media/rc-map.h               |  1 +
 3 files changed, 56 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-khadas.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index b88c4e76cdc6..39192b0abf91 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-it913x-v1.o \
 			rc-it913x-v2.o \
 			rc-kaiomy.o \
+			rc-khadas.o \
 			rc-kworld-315u.o \
 			rc-kworld-pc150u.o \
 			rc-kworld-plus-tv-analog.o \
diff --git a/drivers/media/rc/keymaps/rc-khadas.c b/drivers/media/rc/keymaps/rc-khadas.c
new file mode 100644
index 000000000000..ce4938444d90
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-khadas.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (C) 2019 Christian Hewitt <christianshewitt@gmail.com>
+
+/*
+ * Keytable for the Khadas VIM/EDGE SBC remote control
+ */
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+static struct rc_map_table khadas[] = {
+	{ 0x14, KEY_POWER },
+
+	{ 0x03, KEY_UP },
+	{ 0x02, KEY_DOWN },
+	{ 0x0e, KEY_LEFT },
+	{ 0x1a, KEY_RIGHT },
+	{ 0x07, KEY_OK },
+
+	{ 0x01, KEY_BACK },
+	{ 0x5b, KEY_MUTE }, // mouse
+	{ 0x13, KEY_MENU },
+
+	{ 0x58, KEY_VOLUMEDOWN },
+	{ 0x0b, KEY_VOLUMEUP },
+
+	{ 0x48, KEY_HOME },
+};
+
+static struct rc_map_list khadas_map = {
+	.map = {
+		.scan     = khadas,
+		.size     = ARRAY_SIZE(khadas),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_KHADAS,
+	}
+};
+
+static int __init init_rc_map_khadas(void)
+{
+	return rc_map_register(&khadas_map);
+}
+
+static void __exit exit_rc_map_khadas(void)
+{
+	rc_map_unregister(&khadas_map);
+}
+
+module_init(init_rc_map_khadas)
+module_exit(exit_rc_map_khadas)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index bff5a6b4cbc6..9754017518a0 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -211,6 +211,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_IT913X_V1                 "rc-it913x-v1"
 #define RC_MAP_IT913X_V2                 "rc-it913x-v2"
 #define RC_MAP_KAIOMY                    "rc-kaiomy"
+#define RC_MAP_KHADAS                    "rc-khadas"
 #define RC_MAP_KWORLD_315U               "rc-kworld-315u"
 #define RC_MAP_KWORLD_PC150U             "rc-kworld-pc150u"
 #define RC_MAP_KWORLD_PLUS_TV_ANALOG     "rc-kworld-plus-tv-analog"
-- 
cgit v1.2.3


From e30399e1bd6e215ec20981612646ec73a4385c33 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:13 -0300
Subject: media: rc: add keymap for Tanix TX3 mini remote

The Tanix TX3 mini Android STB ships with a simple NEC remote.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile           |  1 +
 drivers/media/rc/keymaps/rc-tanix-tx3mini.c | 77 +++++++++++++++++++++++++++++
 include/media/rc-map.h                      |  1 +
 3 files changed, 79 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-tanix-tx3mini.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 39192b0abf91..31720d842f88 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -95,6 +95,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-snapstream-firefly.o \
 			rc-streamzap.o \
 			rc-tango.o \
+			rc-tanix-tx3mini.o \
 			rc-tbs-nec.o \
 			rc-technisat-ts35.o \
 			rc-technisat-usb2.o \
diff --git a/drivers/media/rc/keymaps/rc-tanix-tx3mini.c b/drivers/media/rc/keymaps/rc-tanix-tx3mini.c
new file mode 100644
index 000000000000..d486cd69afb2
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-tanix-tx3mini.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2018 Christian Hewitt
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+/*
+ * Keymap for the Tanix TX3 mini STB remote control
+ */
+
+static struct rc_map_table tanix_tx3mini[] = {
+	{ 0x8051, KEY_POWER },
+	{ 0x804d, KEY_MUTE },
+
+	{ 0x8009, KEY_RED },
+	{ 0x8011, KEY_GREEN },
+	{ 0x8054, KEY_YELLOW },
+	{ 0x804f, KEY_BLUE },
+
+	{ 0x8056, KEY_VOLUMEDOWN },
+	{ 0x80bd, KEY_PREVIOUS },
+	{ 0x80bb, KEY_NEXT },
+	{ 0x804e, KEY_VOLUMEUP },
+
+	{ 0x8053, KEY_HOME },
+	{ 0x801b, KEY_BACK },
+
+	{ 0x8026, KEY_UP },
+	{ 0x8028, KEY_DOWN },
+	{ 0x8025, KEY_LEFT },
+	{ 0x8027, KEY_RIGHT },
+	{ 0x800d, KEY_OK },
+
+	{ 0x8049, KEY_MENU },
+	{ 0x8052, KEY_EPG }, // mouse
+
+	{ 0x8031, KEY_1 },
+	{ 0x8032, KEY_2 },
+	{ 0x8033, KEY_3 },
+
+	{ 0x8034, KEY_4 },
+	{ 0x8035, KEY_5 },
+	{ 0x8036, KEY_6 },
+
+	{ 0x8037, KEY_7 },
+	{ 0x8038, KEY_8 },
+	{ 0x8039, KEY_9 },
+
+	{ 0x8058, KEY_SUBTITLE }, // 1/a
+	{ 0x8030, KEY_0 },
+	{ 0x8044, KEY_DELETE },
+};
+
+static struct rc_map_list tanix_tx3mini_map = {
+	.map = {
+		.scan     = tanix_tx3mini,
+		.size     = ARRAY_SIZE(tanix_tx3mini),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_TANIX_TX3MINI,
+	}
+};
+
+static int __init init_rc_map_tanix_tx3mini(void)
+{
+	return rc_map_register(&tanix_tx3mini_map);
+}
+
+static void __exit exit_rc_map_tanix_tx3mini(void)
+{
+	rc_map_unregister(&tanix_tx3mini_map);
+}
+
+module_init(init_rc_map_tanix_tx3mini)
+module_exit(exit_rc_map_tanix_tx3mini)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 9754017518a0..b8929d0c5d6b 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -249,6 +249,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_SNAPSTREAM_FIREFLY        "rc-snapstream-firefly"
 #define RC_MAP_STREAMZAP                 "rc-streamzap"
 #define RC_MAP_TANGO                     "rc-tango"
+#define RC_MAP_TANIX_TX3MINI             "rc-tanix-tx3mini"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
 #define RC_MAP_TECHNISAT_TS35            "rc-technisat-ts35"
 #define RC_MAP_TECHNISAT_USB2            "rc-technisat-usb2"
-- 
cgit v1.2.3


From 7bb53f361c59b68e521a05fce579ccfa8021c3a0 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:14 -0300
Subject: media: rc: add keymap for Tanix TX5 max remote

The Tanix TX5 max Android STB ships with a simple NEC remote.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile          |  1 +
 drivers/media/rc/keymaps/rc-tanix-tx5max.c | 68 ++++++++++++++++++++++++++++++
 include/media/rc-map.h                     |  1 +
 3 files changed, 70 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-tanix-tx5max.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 31720d842f88..85423cc84149 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-streamzap.o \
 			rc-tango.o \
 			rc-tanix-tx3mini.o \
+			rc-tanix-tx5max.o \
 			rc-tbs-nec.o \
 			rc-technisat-ts35.o \
 			rc-technisat-usb2.o \
diff --git a/drivers/media/rc/keymaps/rc-tanix-tx5max.c b/drivers/media/rc/keymaps/rc-tanix-tx5max.c
new file mode 100644
index 000000000000..59aaabed80dd
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-tanix-tx5max.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2018 Christian Hewitt
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+/*
+ * Keymap for the Tanix TX5 max STB remote control
+ */
+
+static struct rc_map_table tanix_tx5max[] = {
+	{ 0x40404d, KEY_POWER },
+	{ 0x404043, KEY_MUTE },
+
+	{ 0x404017, KEY_VOLUMEDOWN },
+	{ 0x404018, KEY_VOLUMEUP },
+
+	{ 0x40400b, KEY_UP },
+	{ 0x404010, KEY_LEFT },
+	{ 0x404011, KEY_RIGHT },
+	{ 0x40400e, KEY_DOWN },
+	{ 0x40400d, KEY_OK },
+
+	{ 0x40401a, KEY_HOME },
+	{ 0x404045, KEY_MENU },
+	{ 0x404042, KEY_BACK },
+
+	{ 0x404001, KEY_1 },
+	{ 0x404002, KEY_2 },
+	{ 0x404003, KEY_3 },
+
+	{ 0x404004, KEY_4 },
+	{ 0x404005, KEY_5 },
+	{ 0x404006, KEY_6 },
+
+	{ 0x404007, KEY_7 },
+	{ 0x404008, KEY_8 },
+	{ 0x404009, KEY_9 },
+
+	{ 0x404047, KEY_SUBTITLE }, // mouse
+	{ 0x404000, KEY_0 },
+	{ 0x40400c, KEY_DELETE },
+};
+
+static struct rc_map_list tanix_tx5max_map = {
+	.map = {
+		.scan     = tanix_tx5max,
+		.size     = ARRAY_SIZE(tanix_tx5max),
+		.rc_proto = RC_PROTO_NECX,
+		.name     = RC_MAP_TANIX_TX5MAX,
+	}
+};
+
+static int __init init_rc_map_tanix_tx5max(void)
+{
+	return rc_map_register(&tanix_tx5max_map);
+}
+
+static void __exit exit_rc_map_tanix_tx5max(void)
+{
+	rc_map_unregister(&tanix_tx5max_map);
+}
+
+module_init(init_rc_map_tanix_tx5max)
+module_exit(exit_rc_map_tanix_tx5max)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index b8929d0c5d6b..a2ebe4868567 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -250,6 +250,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_STREAMZAP                 "rc-streamzap"
 #define RC_MAP_TANGO                     "rc-tango"
 #define RC_MAP_TANIX_TX3MINI             "rc-tanix-tx3mini"
+#define RC_MAP_TANIX_TX5MAX              "rc-tanix-tx5max"
 #define RC_MAP_TBS_NEC                   "rc-tbs-nec"
 #define RC_MAP_TECHNISAT_TS35            "rc-technisat-ts35"
 #define RC_MAP_TECHNISAT_USB2            "rc-technisat-usb2"
-- 
cgit v1.2.3


From 373078971272e9dfcb5f80e309f148a88e36dba8 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:15 -0300
Subject: media: rc: add keymap for WeTek Hub remote

The WeTek Hub Android STB ships with a simple NEC remote.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile       |  1 +
 drivers/media/rc/keymaps/rc-wetek-hub.c | 53 +++++++++++++++++++++++++++++++++
 include/media/rc-map.h                  |  1 +
 3 files changed, 55 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-wetek-hub.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 85423cc84149..6d744aca74a2 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -116,6 +116,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-videomate-m1f.o \
 			rc-videomate-s350.o \
 			rc-videomate-tv-pvr.o \
+			rc-wetek-hub.o \
 			rc-winfast.o \
 			rc-winfast-usbii-deluxe.o \
 			rc-su3000.o \
diff --git a/drivers/media/rc/keymaps/rc-wetek-hub.c b/drivers/media/rc/keymaps/rc-wetek-hub.c
new file mode 100644
index 000000000000..b5a21aff45f5
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-wetek-hub.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (c) 2018 Christian Hewitt
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+/*
+ * This keymap is used with the WeTek Hub STB.
+ */
+
+static struct rc_map_table wetek_hub[] = {
+	{ 0x77f1, KEY_POWER },
+
+	{ 0x77f2, KEY_HOME },
+	{ 0x77f3, KEY_MUTE }, // mouse
+
+	{ 0x77f4, KEY_UP },
+	{ 0x77f5, KEY_DOWN },
+	{ 0x77f6, KEY_LEFT },
+	{ 0x77f7, KEY_RIGHT },
+	{ 0x77f8, KEY_OK },
+
+	{ 0x77f9, KEY_BACK },
+	{ 0x77fa, KEY_MENU },
+
+	{ 0x77fb, KEY_VOLUMEUP },
+	{ 0x77fc, KEY_VOLUMEDOWN },
+};
+
+static struct rc_map_list wetek_hub_map = {
+	.map = {
+		.scan     = wetek_hub,
+		.size     = ARRAY_SIZE(wetek_hub),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_WETEK_HUB,
+	}
+};
+
+static int __init init_rc_map_wetek_hub(void)
+{
+	return rc_map_register(&wetek_hub_map);
+}
+
+static void __exit exit_rc_map_wetek_hub(void)
+{
+	rc_map_unregister(&wetek_hub_map);
+}
+
+module_init(init_rc_map_wetek_hub)
+module_exit(exit_rc_map_wetek_hub)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com>");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index a2ebe4868567..032e989418ac 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -270,6 +270,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_VIDEOMATE_K100            "rc-videomate-k100"
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
 #define RC_MAP_VIDEOMATE_TV_PVR          "rc-videomate-tv-pvr"
+#define RC_MAP_WETEK_HUB                 "rc-wetek-hub"
 #define RC_MAP_WINFAST                   "rc-winfast"
 #define RC_MAP_WINFAST_USBII_DELUXE      "rc-winfast-usbii-deluxe"
 #define RC_MAP_SU3000                    "rc-su3000"
-- 
cgit v1.2.3


From fa992b335aa778db112e91aac19c45e4d914a1f4 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:16 -0300
Subject: media: rc: add keymap for WeTeK Play 2 remote

The WeTek Play 2 Android STB ships with an unusual remote where the
main up/down/left/right/enter controls are surrounded with an outer
ring of additional keys which are listed in clockwise order.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile         |  1 +
 drivers/media/rc/keymaps/rc-wetek-play2.c | 93 +++++++++++++++++++++++++++++++
 include/media/rc-map.h                    |  1 +
 3 files changed, 95 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-wetek-play2.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index 6d744aca74a2..d316a9966716 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -117,6 +117,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-videomate-s350.o \
 			rc-videomate-tv-pvr.o \
 			rc-wetek-hub.o \
+			rc-wetek-play2.o \
 			rc-winfast.o \
 			rc-winfast-usbii-deluxe.o \
 			rc-su3000.o \
diff --git a/drivers/media/rc/keymaps/rc-wetek-play2.c b/drivers/media/rc/keymaps/rc-wetek-play2.c
new file mode 100644
index 000000000000..bbbb11fa3c11
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-wetek-play2.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (C) 2019 Christian Hewitt <christianshewitt@gmail.com>
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+//
+// Keytable for the WeTek Play 2 STB remote control
+//
+
+static struct rc_map_table wetek_play2[] = {
+	{ 0x5e5f02, KEY_POWER },
+	{ 0x5e5f46, KEY_SLEEP }, // tv
+	{ 0x5e5f10, KEY_MUTE },
+
+	{ 0x5e5f22, KEY_1 },
+	{ 0x5e5f23, KEY_2 },
+	{ 0x5e5f24, KEY_3 },
+
+	{ 0x5e5f25, KEY_4 },
+	{ 0x5e5f26, KEY_5 },
+	{ 0x5e5f27, KEY_6 },
+
+	{ 0x5e5f28, KEY_7 },
+	{ 0x5e5f29, KEY_8 },
+	{ 0x5e5f30, KEY_9 },
+
+	{ 0x5e5f71, KEY_BACK },
+	{ 0x5e5f21, KEY_0 },
+	{ 0x5e5f72, KEY_CAPSLOCK },
+
+	// outer ring clockwide from top
+	{ 0x5e5f03, KEY_HOME },
+	{ 0x5e5f61, KEY_BACK },
+	{ 0x5e5f77, KEY_CONFIG }, // mouse
+	{ 0x5e5f83, KEY_EPG },
+	{ 0x5e5f84, KEY_SCREEN }, // square
+	{ 0x5e5f48, KEY_MENU },
+
+	// inner ring
+	{ 0x5e5f50, KEY_UP },
+	{ 0x5e5f4b, KEY_DOWN },
+	{ 0x5e5f4c, KEY_LEFT },
+	{ 0x5e5f4d, KEY_RIGHT },
+	{ 0x5e5f47, KEY_OK },
+
+	{ 0x5e5f44, KEY_VOLUMEUP },
+	{ 0x5e5f43, KEY_VOLUMEDOWN },
+	{ 0x5e5f4f, KEY_FAVORITES },
+	{ 0x5e5f82, KEY_SUBTITLE }, // txt
+	{ 0x5e5f41, KEY_PAGEUP },
+	{ 0x5e5f42, KEY_PAGEDOWN },
+
+	{ 0x5e5f73, KEY_RED },
+	{ 0x5e5f74, KEY_GREEN },
+	{ 0x5e5f75, KEY_YELLOW },
+	{ 0x5e5f76, KEY_BLUE },
+
+	{ 0x5e5f67, KEY_PREVIOUSSONG },
+	{ 0x5e5f79, KEY_REWIND },
+	{ 0x5e5f80, KEY_FASTFORWARD },
+	{ 0x5e5f81, KEY_NEXTSONG },
+
+	{ 0x5e5f04, KEY_RECORD },
+	{ 0x5e5f2c, KEY_PLAYPAUSE },
+	{ 0x5e5f2b, KEY_STOP },
+};
+
+static struct rc_map_list wetek_play2_map = {
+	.map = {
+		.scan     = wetek_play2,
+		.size     = ARRAY_SIZE(wetek_play2),
+		.rc_proto = RC_PROTO_NECX,
+		.name     = RC_MAP_WETEK_PLAY2,
+	}
+};
+
+static int __init init_rc_map_wetek_play2(void)
+{
+	return rc_map_register(&wetek_play2_map);
+}
+
+static void __exit exit_rc_map_wetek_play2(void)
+{
+	rc_map_unregister(&wetek_play2_map);
+}
+
+module_init(init_rc_map_wetek_play2)
+module_exit(exit_rc_map_wetek_play2)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 032e989418ac..9eab8f5088dc 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -271,6 +271,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_VIDEOMATE_S350            "rc-videomate-s350"
 #define RC_MAP_VIDEOMATE_TV_PVR          "rc-videomate-tv-pvr"
 #define RC_MAP_WETEK_HUB                 "rc-wetek-hub"
+#define RC_MAP_WETEK_PLAY2               "rc-wetek-play2"
 #define RC_MAP_WINFAST                   "rc-winfast"
 #define RC_MAP_WINFAST_USBII_DELUXE      "rc-winfast-usbii-deluxe"
 #define RC_MAP_SU3000                    "rc-su3000"
-- 
cgit v1.2.3


From 8f5f33f71caefd85befddcf902df96de362c9d18 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 15 Aug 2019 11:59:17 -0300
Subject: media: rc: add keymap for HardKernel ODROID remote

This is a simple NEC remote control device shipped with the HardKernel
ODROID range of SBC devices.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Sean Young <sean@mess.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/rc/keymaps/Makefile    |  1 +
 drivers/media/rc/keymaps/rc-odroid.c | 54 ++++++++++++++++++++++++++++++++++++
 include/media/rc-map.h               |  1 +
 3 files changed, 56 insertions(+)
 create mode 100644 drivers/media/rc/keymaps/rc-odroid.c

(limited to 'include')

diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile
index d316a9966716..a56fc634d2d6 100644
--- a/drivers/media/rc/keymaps/Makefile
+++ b/drivers/media/rc/keymaps/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
 			rc-nec-terratec-cinergy-xs.o \
 			rc-norwood.o \
 			rc-npgtech.o \
+			rc-odroid.o \
 			rc-pctv-sedna.o \
 			rc-pinnacle-color.o \
 			rc-pinnacle-grey.o \
diff --git a/drivers/media/rc/keymaps/rc-odroid.c b/drivers/media/rc/keymaps/rc-odroid.c
new file mode 100644
index 000000000000..c6fbb64b5c41
--- /dev/null
+++ b/drivers/media/rc/keymaps/rc-odroid.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (C) 2019 Christian Hewitt <christianshewitt@gmail.com>
+
+#include <media/rc-map.h>
+#include <linux/module.h>
+
+//
+// Keytable for the HardKernel ODROID remote control
+//
+
+static struct rc_map_table odroid[] = {
+	{ 0xb2dc, KEY_POWER },
+
+	{ 0xb288, KEY_MUTE },
+	{ 0xb282, KEY_HOME },
+
+	{ 0xb2ca, KEY_UP },
+	{ 0xb299, KEY_LEFT },
+	{ 0xb2ce, KEY_OK },
+	{ 0xb2c1, KEY_RIGHT },
+	{ 0xb2d2, KEY_DOWN },
+
+	{ 0xb2c5, KEY_MENU },
+	{ 0xb29a, KEY_BACK },
+
+	{ 0xb281, KEY_VOLUMEDOWN },
+	{ 0xb280, KEY_VOLUMEUP },
+};
+
+static struct rc_map_list odroid_map = {
+	.map = {
+		.scan     = odroid,
+		.size     = ARRAY_SIZE(odroid),
+		.rc_proto = RC_PROTO_NEC,
+		.name     = RC_MAP_ODROID,
+	}
+};
+
+static int __init init_rc_map_odroid(void)
+{
+	return rc_map_register(&odroid_map);
+}
+
+static void __exit exit_rc_map_odroid(void)
+{
+	rc_map_unregister(&odroid_map);
+}
+
+module_init(init_rc_map_odroid)
+module_exit(exit_rc_map_odroid)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Hewitt <christianshewitt@gmail.com");
diff --git a/include/media/rc-map.h b/include/media/rc-map.h
index 9eab8f5088dc..afd2ab31bdf2 100644
--- a/include/media/rc-map.h
+++ b/include/media/rc-map.h
@@ -229,6 +229,7 @@ struct rc_map *rc_map_get(const char *name);
 #define RC_MAP_NEC_TERRATEC_CINERGY_XS   "rc-nec-terratec-cinergy-xs"
 #define RC_MAP_NORWOOD                   "rc-norwood"
 #define RC_MAP_NPGTECH                   "rc-npgtech"
+#define RC_MAP_ODROID                    "rc-odroid"
 #define RC_MAP_PCTV_SEDNA                "rc-pctv-sedna"
 #define RC_MAP_PINNACLE_COLOR            "rc-pinnacle-color"
 #define RC_MAP_PINNACLE_GREY             "rc-pinnacle-grey"
-- 
cgit v1.2.3


From 7a978759b4e0e7a2ad3f10cbf9077915a85ec956 Mon Sep 17 00:00:00 2001
From: Dmytro Linkin <dmitrolin@mellanox.com>
Date: Thu, 27 Jun 2019 10:55:02 +0000
Subject: net/mlx5e: Add tc flower tracepoints

Implemented following tracepoints:
1. Configure flower (mlx5e_configure_flower)
2. Delete flower (mlx5e_delete_flower)
3. Stats flower (mlx5e_stats_flower)

Usage example:
 ># cd /sys/kernel/debug/tracing
 ># echo mlx5:mlx5e_configure_flower >> set_event
 ># cat trace
    ...
    tc-6535  [019] ...1  2672.404466: mlx5e_configure_flower: cookie=0000000067874a55 actions= REDIRECT

Added corresponding documentation in
    Documentation/networking/device-driver/mellanox/mlx5.rst

Signed-off-by: Dmytro Linkin <dmitrolin@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../networking/device_drivers/mellanox/mlx5.rst    | 32 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 .../mellanox/mlx5/core/diag/en_tc_tracepoint.c     | 58 +++++++++++++++
 .../mellanox/mlx5/core/diag/en_tc_tracepoint.h     | 83 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  4 ++
 include/net/flow_offload.h                         |  1 +
 6 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h

(limited to 'include')

diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst
index cfda464e52de..1339dbf52431 100644
--- a/Documentation/networking/device_drivers/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst
@@ -12,6 +12,7 @@ Contents
 - `Enabling the driver and kconfig options`_
 - `Devlink info`_
 - `Devlink health reporters`_
+- `mlx5 tracepoints`_
 
 Enabling the driver and kconfig options
 ================================================
@@ -219,3 +220,34 @@ User commands examples:
     $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
 
 NOTE: This command can run only on PF.
+
+mlx5 tracepoints
+================
+
+mlx5 driver provides internal trace points for tracking and debugging using
+kernel tracepoints interfaces (refer to Documentation/trace/ftrase.rst).
+
+For the list of support mlx5 events check /sys/kernel/debug/tracing/events/mlx5/
+
+tc and eswitch offloads tracepoints:
+
+- mlx5e_configure_flower: trace flower filter actions and cookies offloaded to mlx5::
+
+    $ echo mlx5:mlx5e_configure_flower >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    tc-6535  [019] ...1  2672.404466: mlx5e_configure_flower: cookie=0000000067874a55 actions= REDIRECT
+
+- mlx5e_delete_flower: trace flower filter actions and cookies deleted from mlx5::
+
+    $ echo mlx5:mlx5e_delete_flower >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    tc-6569  [010] .N.1  2686.379075: mlx5e_delete_flower: cookie=0000000067874a55 actions= NULL
+
+- mlx5e_stats_flower: trace flower stats request::
+
+    $ echo mlx5:mlx5e_stats_flower >> /sys/kernel/debug/tracing/set_event
+    $ cat /sys/kernel/debug/tracing/trace
+    ...
+    tc-6546  [010] ...1  2679.704889: mlx5e_stats_flower: cookie=0000000060eb3d6a bytes=0 packets=0 lastused=4295560217
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index a3b9659649a8..bcf36552f069 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
 					lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
-					en/tc_tun_geneve.o
+					en/tc_tun_geneve.o diag/en_tc_tracepoint.o
 
 #
 # Core extra
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
new file mode 100644
index 000000000000..c5dc6c50fa87
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#define CREATE_TRACE_POINTS
+#include "en_tc_tracepoint.h"
+
+void put_ids_to_array(int *ids,
+		      const struct flow_action_entry *entries,
+		      unsigned int num)
+{
+	unsigned int i;
+
+	for (i = 0; i < num; i++)
+		ids[i] = entries[i].id;
+}
+
+#define NAME_SIZE 16
+
+static const char FLOWACT2STR[NUM_FLOW_ACTIONS][NAME_SIZE] = {
+	[FLOW_ACTION_ACCEPT]	= "ACCEPT",
+	[FLOW_ACTION_DROP]	= "DROP",
+	[FLOW_ACTION_TRAP]	= "TRAP",
+	[FLOW_ACTION_GOTO]	= "GOTO",
+	[FLOW_ACTION_REDIRECT]	= "REDIRECT",
+	[FLOW_ACTION_MIRRED]	= "MIRRED",
+	[FLOW_ACTION_VLAN_PUSH]	= "VLAN_PUSH",
+	[FLOW_ACTION_VLAN_POP]	= "VLAN_POP",
+	[FLOW_ACTION_VLAN_MANGLE]	= "VLAN_MANGLE",
+	[FLOW_ACTION_TUNNEL_ENCAP]	= "TUNNEL_ENCAP",
+	[FLOW_ACTION_TUNNEL_DECAP]	= "TUNNEL_DECAP",
+	[FLOW_ACTION_MANGLE]	= "MANGLE",
+	[FLOW_ACTION_ADD]	= "ADD",
+	[FLOW_ACTION_CSUM]	= "CSUM",
+	[FLOW_ACTION_MARK]	= "MARK",
+	[FLOW_ACTION_WAKE]	= "WAKE",
+	[FLOW_ACTION_QUEUE]	= "QUEUE",
+	[FLOW_ACTION_SAMPLE]	= "SAMPLE",
+	[FLOW_ACTION_POLICE]	= "POLICE",
+	[FLOW_ACTION_CT]	= "CT",
+};
+
+const char *parse_action(struct trace_seq *p,
+			 int *ids,
+			 unsigned int num)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+	unsigned int i;
+
+	for (i = 0; i < num; i++) {
+		if (ids[i] < NUM_FLOW_ACTIONS)
+			trace_seq_printf(p, "%s ", FLOWACT2STR[ids[i]]);
+		else
+			trace_seq_printf(p, "UNKNOWN ");
+	}
+
+	trace_seq_putc(p, 0);
+	return ret;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
new file mode 100644
index 000000000000..a362100fe6d3
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/en_tc_tracepoint.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_TC_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_TC_TP_
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+#include <net/flow_offload.h>
+
+#define __parse_action(ids, num) parse_action(p, ids, num)
+
+void put_ids_to_array(int *ids,
+		      const struct flow_action_entry *entries,
+		      unsigned int num);
+
+const char *parse_action(struct trace_seq *p,
+			 int *ids,
+			 unsigned int num);
+
+DECLARE_EVENT_CLASS(mlx5e_flower_template,
+		    TP_PROTO(const struct flow_cls_offload *f),
+		    TP_ARGS(f),
+		    TP_STRUCT__entry(__field(void *, cookie)
+				     __field(unsigned int, num)
+				     __dynamic_array(int, ids, f->rule ?
+					     f->rule->action.num_entries : 0)
+				     ),
+		    TP_fast_assign(__entry->cookie = (void *)f->cookie;
+			__entry->num = (f->rule ?
+				f->rule->action.num_entries : 0);
+			if (__entry->num)
+				put_ids_to_array(__get_dynamic_array(ids),
+						 f->rule->action.entries,
+						 f->rule->action.num_entries);
+			),
+		    TP_printk("cookie=%p actions= %s\n",
+			      __entry->cookie, __entry->num ?
+				      __parse_action(__get_dynamic_array(ids),
+						     __entry->num) : "NULL"
+			      )
+);
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_configure_flower,
+	     TP_PROTO(const struct flow_cls_offload *f),
+	     TP_ARGS(f)
+	     );
+
+DEFINE_EVENT(mlx5e_flower_template, mlx5e_delete_flower,
+	     TP_PROTO(const struct flow_cls_offload *f),
+	     TP_ARGS(f)
+	     );
+
+TRACE_EVENT(mlx5e_stats_flower,
+	    TP_PROTO(const struct flow_cls_offload *f),
+	    TP_ARGS(f),
+	    TP_STRUCT__entry(__field(void *, cookie)
+			     __field(u64, bytes)
+			     __field(u64, packets)
+			     __field(u64, lastused)
+			     ),
+	    TP_fast_assign(__entry->cookie = (void *)f->cookie;
+		__entry->bytes = f->stats.bytes;
+		__entry->packets = f->stats.pkts;
+		__entry->lastused = f->stats.lastused;
+		),
+	    TP_printk("cookie=%p bytes=%llu packets=%llu lastused=%llu\n",
+		      __entry->cookie, __entry->bytes,
+		      __entry->packets, __entry->lastused
+		      )
+);
+
+#endif /* _MLX5_TC_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ./diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE en_tc_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 5d4ce3d58832..c40cca08c8cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -56,6 +56,7 @@
 #include "en/tc_tun.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
+#include "diag/en_tc_tracepoint.h"
 
 struct mlx5_nic_flow_attr {
 	u32 action;
@@ -3769,6 +3770,7 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 		goto out;
 	}
 
+	trace_mlx5e_configure_flower(f);
 	err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
 	if (err)
 		goto out;
@@ -3818,6 +3820,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
 	rcu_read_unlock();
 
+	trace_mlx5e_delete_flower(f);
 	mlx5e_flow_put(priv, flow);
 
 	return 0;
@@ -3887,6 +3890,7 @@ no_peer_counter:
 	mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 out:
 	flow_stats_update(&f->stats, bytes, packets, lastuse);
+	trace_mlx5e_stats_flower(f);
 errout:
 	mlx5e_flow_put(priv, flow);
 	return err;
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e8069b6c474c..757fa84de654 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -138,6 +138,7 @@ enum flow_action_id {
 	FLOW_ACTION_MPLS_PUSH,
 	FLOW_ACTION_MPLS_POP,
 	FLOW_ACTION_MPLS_MANGLE,
+	NUM_FLOW_ACTIONS,
 };
 
 /* This is mirroring enum pedit_header_type definition for easy mapping between
-- 
cgit v1.2.3


From c571feca2dc972dc5afeba9036d08239f1c51af1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 6 Aug 2019 20:15:43 -0300
Subject: RDMA/odp: use mmu_notifier_get/put for 'struct ib_ucontext_per_mm'

This is a significant simplification, no extra list is kept per FD, and
the interval tree is now shared between all the ucontexts, reducing
overhead if there are multiple ucontexts active.

Link: https://lore.kernel.org/r/20190806231548.25242-7-jgg@ziepe.ca
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c    | 168 +++++++++++-----------------------
 drivers/infiniband/core/uverbs_cmd.c  |   3 -
 drivers/infiniband/core/uverbs_main.c |   1 +
 drivers/infiniband/hw/mlx5/main.c     |   5 -
 include/rdma/ib_umem_odp.h            |  10 +-
 include/rdma/ib_verbs.h               |   3 -
 6 files changed, 55 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 32fb0b579dec..bf36f2c5fb3a 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -82,7 +82,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 	struct rb_node *node;
 
 	down_read(&per_mm->umem_rwsem);
-	if (!per_mm->active)
+	if (!per_mm->mn.users)
 		goto out;
 
 	for (node = rb_first_cached(&per_mm->umem_tree); node;
@@ -125,10 +125,10 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	else if (!down_read_trylock(&per_mm->umem_rwsem))
 		return -EAGAIN;
 
-	if (!per_mm->active) {
+	if (!per_mm->mn.users) {
 		up_read(&per_mm->umem_rwsem);
 		/*
-		 * At this point active is permanently set and visible to this
+		 * At this point users is permanently zero and visible to this
 		 * CPU without a lock, that fact is relied on to skip the unlock
 		 * in range_end.
 		 */
@@ -158,7 +158,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	struct ib_ucontext_per_mm *per_mm =
 		container_of(mn, struct ib_ucontext_per_mm, mn);
 
-	if (unlikely(!per_mm->active))
+	if (unlikely(!per_mm->mn.users))
 		return;
 
 	rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
@@ -167,122 +167,47 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	up_read(&per_mm->umem_rwsem);
 }
 
-static const struct mmu_notifier_ops ib_umem_notifiers = {
-	.release                    = ib_umem_notifier_release,
-	.invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
-	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
-};
-
-static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
-{
-	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-
-	down_write(&per_mm->umem_rwsem);
-	interval_tree_remove(&umem_odp->interval_tree, &per_mm->umem_tree);
-	complete_all(&umem_odp->notifier_completion);
-	up_write(&per_mm->umem_rwsem);
-}
-
-static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
-					       struct mm_struct *mm)
+static struct mmu_notifier *ib_umem_alloc_notifier(struct mm_struct *mm)
 {
 	struct ib_ucontext_per_mm *per_mm;
-	int ret;
 
 	per_mm = kzalloc(sizeof(*per_mm), GFP_KERNEL);
 	if (!per_mm)
 		return ERR_PTR(-ENOMEM);
 
-	per_mm->context = ctx;
-	per_mm->mm = mm;
 	per_mm->umem_tree = RB_ROOT_CACHED;
 	init_rwsem(&per_mm->umem_rwsem);
-	per_mm->active = true;
 
+	WARN_ON(mm != current->mm);
 	rcu_read_lock();
 	per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
 	rcu_read_unlock();
-
-	WARN_ON(mm != current->mm);
-
-	per_mm->mn.ops = &ib_umem_notifiers;
-	ret = mmu_notifier_register(&per_mm->mn, per_mm->mm);
-	if (ret) {
-		dev_err(&ctx->device->dev,
-			"Failed to register mmu_notifier %d\n", ret);
-		goto out_pid;
-	}
-
-	list_add(&per_mm->ucontext_list, &ctx->per_mm_list);
-	return per_mm;
-
-out_pid:
-	put_pid(per_mm->tgid);
-	kfree(per_mm);
-	return ERR_PTR(ret);
-}
-
-static struct ib_ucontext_per_mm *get_per_mm(struct ib_umem_odp *umem_odp)
-{
-	struct ib_ucontext *ctx = umem_odp->umem.context;
-	struct ib_ucontext_per_mm *per_mm;
-
-	lockdep_assert_held(&ctx->per_mm_list_lock);
-
-	/*
-	 * Generally speaking we expect only one or two per_mm in this list,
-	 * so no reason to optimize this search today.
-	 */
-	list_for_each_entry(per_mm, &ctx->per_mm_list, ucontext_list) {
-		if (per_mm->mm == umem_odp->umem.owning_mm)
-			return per_mm;
-	}
-
-	return alloc_per_mm(ctx, umem_odp->umem.owning_mm);
-}
-
-static void free_per_mm(struct rcu_head *rcu)
-{
-	kfree(container_of(rcu, struct ib_ucontext_per_mm, rcu));
+	return &per_mm->mn;
 }
 
-static void put_per_mm(struct ib_umem_odp *umem_odp)
+static void ib_umem_free_notifier(struct mmu_notifier *mn)
 {
-	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
-	struct ib_ucontext *ctx = umem_odp->umem.context;
-	bool need_free;
-
-	mutex_lock(&ctx->per_mm_list_lock);
-	umem_odp->per_mm = NULL;
-	per_mm->odp_mrs_count--;
-	need_free = per_mm->odp_mrs_count == 0;
-	if (need_free)
-		list_del(&per_mm->ucontext_list);
-	mutex_unlock(&ctx->per_mm_list_lock);
-
-	if (!need_free)
-		return;
-
-	/*
-	 * NOTE! mmu_notifier_unregister() can happen between a start/end
-	 * callback, resulting in an start/end, and thus an unbalanced
-	 * lock. This doesn't really matter to us since we are about to kfree
-	 * the memory that holds the lock, however LOCKDEP doesn't like this.
-	 */
-	down_write(&per_mm->umem_rwsem);
-	per_mm->active = false;
-	up_write(&per_mm->umem_rwsem);
+	struct ib_ucontext_per_mm *per_mm =
+		container_of(mn, struct ib_ucontext_per_mm, mn);
 
 	WARN_ON(!RB_EMPTY_ROOT(&per_mm->umem_tree.rb_root));
-	mmu_notifier_unregister_no_release(&per_mm->mn, per_mm->mm);
+
 	put_pid(per_mm->tgid);
-	mmu_notifier_call_srcu(&per_mm->rcu, free_per_mm);
+	kfree(per_mm);
 }
 
-static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
-				   struct ib_ucontext_per_mm *per_mm)
+static const struct mmu_notifier_ops ib_umem_notifiers = {
+	.release                    = ib_umem_notifier_release,
+	.invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
+	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
+	.alloc_notifier		    = ib_umem_alloc_notifier,
+	.free_notifier		    = ib_umem_free_notifier,
+};
+
+static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp)
 {
-	struct ib_ucontext *ctx = umem_odp->umem.context;
+	struct ib_ucontext_per_mm *per_mm;
+	struct mmu_notifier *mn;
 	int ret;
 
 	umem_odp->umem.is_odp = 1;
@@ -327,17 +252,13 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 		}
 	}
 
-	mutex_lock(&ctx->per_mm_list_lock);
-	if (!per_mm) {
-		per_mm = get_per_mm(umem_odp);
-		if (IS_ERR(per_mm)) {
-			ret = PTR_ERR(per_mm);
-			goto out_unlock;
-		}
+	mn = mmu_notifier_get(&ib_umem_notifiers, umem_odp->umem.owning_mm);
+	if (IS_ERR(mn)) {
+		ret = PTR_ERR(mn);
+		goto out_dma_list;
 	}
-	umem_odp->per_mm = per_mm;
-	per_mm->odp_mrs_count++;
-	mutex_unlock(&ctx->per_mm_list_lock);
+	umem_odp->per_mm = per_mm =
+		container_of(mn, struct ib_ucontext_per_mm, mn);
 
 	mutex_init(&umem_odp->umem_mutex);
 	init_completion(&umem_odp->notifier_completion);
@@ -352,8 +273,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
 
 	return 0;
 
-out_unlock:
-	mutex_unlock(&ctx->per_mm_list_lock);
+out_dma_list:
 	kvfree(umem_odp->dma_list);
 out_page_list:
 	kvfree(umem_odp->page_list);
@@ -398,7 +318,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
 	umem_odp->is_implicit_odp = 1;
 	umem_odp->page_shift = PAGE_SHIFT;
 
-	ret = ib_init_umem_odp(umem_odp, NULL);
+	ret = ib_init_umem_odp(umem_odp);
 	if (ret) {
 		kfree(umem_odp);
 		return ERR_PTR(ret);
@@ -441,7 +361,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
 	umem->owning_mm  = root->umem.owning_mm;
 	odp_data->page_shift = PAGE_SHIFT;
 
-	ret = ib_init_umem_odp(odp_data, root->per_mm);
+	ret = ib_init_umem_odp(odp_data);
 	if (ret) {
 		kfree(odp_data);
 		return ERR_PTR(ret);
@@ -509,7 +429,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
 		up_read(&mm->mmap_sem);
 	}
 
-	ret = ib_init_umem_odp(umem_odp, NULL);
+	ret = ib_init_umem_odp(umem_odp);
 	if (ret)
 		goto err_free;
 	return umem_odp;
@@ -522,6 +442,8 @@ EXPORT_SYMBOL(ib_umem_odp_get);
 
 void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 {
+	struct ib_ucontext_per_mm *per_mm = umem_odp->per_mm;
+
 	/*
 	 * Ensure that no more pages are mapped in the umem.
 	 *
@@ -531,11 +453,27 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
 	if (!umem_odp->is_implicit_odp) {
 		ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
 					    ib_umem_end(umem_odp));
-		remove_umem_from_per_mm(umem_odp);
 		kvfree(umem_odp->dma_list);
 		kvfree(umem_odp->page_list);
 	}
-	put_per_mm(umem_odp);
+
+	down_write(&per_mm->umem_rwsem);
+	if (!umem_odp->is_implicit_odp) {
+		interval_tree_remove(&umem_odp->interval_tree,
+				     &per_mm->umem_tree);
+		complete_all(&umem_odp->notifier_completion);
+	}
+	/*
+	 * NOTE! mmu_notifier_unregister() can happen between a start/end
+	 * callback, resulting in a missing end, and thus an unbalanced
+	 * lock. This doesn't really matter to us since we are about to kfree
+	 * the memory that holds the lock, however LOCKDEP doesn't like this.
+	 * Thus we call the mmu_notifier_put under the rwsem and test the
+	 * internal users count to reliably see if we are past this point.
+	 */
+	mmu_notifier_put(&per_mm->mn);
+	up_write(&per_mm->umem_rwsem);
+
 	mmdrop(umem_odp->umem.owning_mm);
 	kfree(umem_odp);
 }
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 8f4fd4fac159..7c10dfe417a4 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -252,9 +252,6 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
 	ucontext->closing = false;
 	ucontext->cleanup_retryable = false;
 
-	mutex_init(&ucontext->per_mm_list_lock);
-	INIT_LIST_HEAD(&ucontext->per_mm_list);
-
 	ret = get_unused_fd_flags(O_CLOEXEC);
 	if (ret < 0)
 		goto err_free;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 11c13c1381cf..e369ac0d6f51 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1487,6 +1487,7 @@ static void __exit ib_uverbs_cleanup(void)
 				 IB_UVERBS_NUM_FIXED_MINOR);
 	unregister_chrdev_region(dynamic_uverbs_dev,
 				 IB_UVERBS_NUM_DYNAMIC_MINOR);
+	mmu_notifier_synchronize();
 }
 
 module_init(ib_uverbs_init);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 7d5c2763b691..4ba73a95475a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1995,11 +1995,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 	struct mlx5_bfreg_info *bfregi;
 
-	/* All umem's must be destroyed before destroying the ucontext. */
-	mutex_lock(&ibcontext->per_mm_list_lock);
-	WARN_ON(!list_empty(&ibcontext->per_mm_list));
-	mutex_unlock(&ibcontext->per_mm_list_lock);
-
 	bfregi = &context->bfregi;
 	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 
diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
index b37c674b7fe6..253df1a1fa54 100644
--- a/include/rdma/ib_umem_odp.h
+++ b/include/rdma/ib_umem_odp.h
@@ -122,20 +122,12 @@ static inline size_t ib_umem_odp_num_pages(struct ib_umem_odp *umem_odp)
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
 struct ib_ucontext_per_mm {
-	struct ib_ucontext *context;
-	struct mm_struct *mm;
+	struct mmu_notifier mn;
 	struct pid *tgid;
-	bool active;
 
 	struct rb_root_cached umem_tree;
 	/* Protects umem_tree */
 	struct rw_semaphore umem_rwsem;
-
-	struct mmu_notifier mn;
-	unsigned int odp_mrs_count;
-
-	struct list_head ucontext_list;
-	struct rcu_head rcu;
 };
 
 struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c2b39dda44cc..f659f4a02aa9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1417,9 +1417,6 @@ struct ib_ucontext {
 
 	bool cleanup_retryable;
 
-	struct mutex per_mm_list_lock;
-	struct list_head per_mm_list;
-
 	struct ib_rdmacg_object	cg_obj;
 	/*
 	 * Implementation details of the RDMA core, don't use in drivers:
-- 
cgit v1.2.3


From 47f725ee7b5f5cae1f83512961bcf8b41a7a5794 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 6 Aug 2019 20:15:44 -0300
Subject: RDMA/odp: remove ib_ucontext from ib_umem

At this point the ucontext is only being stored to access the ib_device,
so just store the ib_device directly instead. This is more natural and
logical as the umem has nothing to do with the ucontext.

Link: https://lore.kernel.org/r/20190806231548.25242-8-jgg@ziepe.ca
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem.c     |  4 ++--
 drivers/infiniband/core/umem_odp.c | 15 +++++++--------
 include/rdma/ib_umem.h             |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 37eb8643ec29..41f9e268e3fb 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -234,7 +234,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
 	if (!umem)
 		return ERR_PTR(-ENOMEM);
-	umem->context    = context;
+	umem->ibdev = context->device;
 	umem->length     = size;
 	umem->address    = addr;
 	umem->writable   = ib_access_writable(access);
@@ -337,7 +337,7 @@ void ib_umem_release(struct ib_umem *umem)
 	if (umem->is_odp)
 		return ib_umem_odp_release(to_ib_umem_odp(umem));
 
-	__ib_umem_release(umem->context->device, umem, 1);
+	__ib_umem_release(umem->ibdev, umem, 1);
 
 	atomic64_sub(ib_umem_num_pages(umem), &umem->owning_mm->pinned_vm);
 	mmdrop(umem->owning_mm);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index bf36f2c5fb3a..9aebe9ce8b07 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -96,7 +96,7 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
 		 */
 		ib_umem_notifier_start_account(umem_odp);
 		complete_all(&umem_odp->notifier_completion);
-		umem_odp->umem.context->device->ops.invalidate_range(
+		umem_odp->umem.ibdev->ops.invalidate_range(
 			umem_odp, ib_umem_start(umem_odp),
 			ib_umem_end(umem_odp));
 	}
@@ -109,7 +109,7 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
 					     u64 start, u64 end, void *cookie)
 {
 	ib_umem_notifier_start_account(item);
-	item->umem.context->device->ops.invalidate_range(item, start, end);
+	item->umem.ibdev->ops.invalidate_range(item, start, end);
 	return 0;
 }
 
@@ -312,7 +312,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_implicit(struct ib_udata *udata,
 	if (!umem_odp)
 		return ERR_PTR(-ENOMEM);
 	umem = &umem_odp->umem;
-	umem->context = context;
+	umem->ibdev = context->device;
 	umem->writable = ib_access_writable(access);
 	umem->owning_mm = current->mm;
 	umem_odp->is_implicit_odp = 1;
@@ -354,7 +354,7 @@ struct ib_umem_odp *ib_umem_odp_alloc_child(struct ib_umem_odp *root,
 	if (!odp_data)
 		return ERR_PTR(-ENOMEM);
 	umem = &odp_data->umem;
-	umem->context    = root->umem.context;
+	umem->ibdev = root->umem.ibdev;
 	umem->length     = size;
 	umem->address    = addr;
 	umem->writable   = root->umem.writable;
@@ -406,7 +406,7 @@ struct ib_umem_odp *ib_umem_odp_get(struct ib_udata *udata, unsigned long addr,
 	if (!umem_odp)
 		return ERR_PTR(-ENOMEM);
 
-	umem_odp->umem.context = context;
+	umem_odp->umem.ibdev = context->device;
 	umem_odp->umem.length = size;
 	umem_odp->umem.address = addr;
 	umem_odp->umem.writable = ib_access_writable(access);
@@ -504,8 +504,7 @@ static int ib_umem_odp_map_dma_single_page(
 		u64 access_mask,
 		unsigned long current_seq)
 {
-	struct ib_ucontext *context = umem_odp->umem.context;
-	struct ib_device *dev = context->device;
+	struct ib_device *dev = umem_odp->umem.ibdev;
 	dma_addr_t dma_addr;
 	int remove_existing_mapping = 0;
 	int ret = 0;
@@ -718,7 +717,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
 {
 	int idx;
 	u64 addr;
-	struct ib_device *dev = umem_odp->umem.context->device;
+	struct ib_device *dev = umem_odp->umem.ibdev;
 
 	virt = max_t(u64, virt, ib_umem_start(umem_odp));
 	bound = min_t(u64, bound, ib_umem_end(umem_odp));
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 1052d0d62be7..a91b2af64ec4 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -42,7 +42,7 @@ struct ib_ucontext;
 struct ib_umem_odp;
 
 struct ib_umem {
-	struct ib_ucontext     *context;
+	struct ib_device       *ibdev;
 	struct mm_struct       *owning_mm;
 	size_t			length;
 	unsigned long		address;
-- 
cgit v1.2.3


From c96245148c1ec7af086da322481bf4119d1141d3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>
Date: Tue, 6 Aug 2019 20:15:48 -0300
Subject: mm/mmu_notifiers: remove unregister_no_release

mmu_notifier_unregister_no_release() and mmu_notifier_call_srcu() no
longer have any users, they have all been converted to use
mmu_notifier_put().

So delete this difficult to use interface.

Link: https://lore.kernel.org/r/20190806231548.25242-12-jgg@ziepe.ca
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Tested-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/mmu_notifier.h |  5 -----
 mm/mmu_notifier.c            | 31 -------------------------------
 2 files changed, 36 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 31aa971315a1..52929e5ef708 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -271,8 +271,6 @@ extern int __mmu_notifier_register(struct mmu_notifier *mn,
 				   struct mm_struct *mm);
 extern void mmu_notifier_unregister(struct mmu_notifier *mn,
 				    struct mm_struct *mm);
-extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
-					       struct mm_struct *mm);
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
@@ -513,9 +511,6 @@ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range,
 	set_pte_at(___mm, ___address, __ptep, ___pte);			\
 })
 
-extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
-				   void (*func)(struct rcu_head *rcu));
-
 #else /* CONFIG_MMU_NOTIFIER */
 
 struct mmu_notifier_range {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index d76ea27e2bbb..3ebdc748b168 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,18 +21,6 @@
 /* global SRCU for all MMs */
 DEFINE_STATIC_SRCU(srcu);
 
-/*
- * This function allows mmu_notifier::release callback to delay a call to
- * a function that will free appropriate resources. The function must be
- * quick and must not block.
- */
-void mmu_notifier_call_srcu(struct rcu_head *rcu,
-			    void (*func)(struct rcu_head *rcu))
-{
-	call_srcu(&srcu, rcu, func);
-}
-EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu);
-
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -455,25 +443,6 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
 
-/*
- * Same as mmu_notifier_unregister but no callback and no srcu synchronization.
- */
-void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
-					struct mm_struct *mm)
-{
-	spin_lock(&mm->mmu_notifier_mm->lock);
-	/*
-	 * Can not use list_del_rcu() since __mmu_notifier_release
-	 * can delete it before we hold the lock.
-	 */
-	hlist_del_init_rcu(&mn->hlist);
-	spin_unlock(&mm->mmu_notifier_mm->lock);
-
-	BUG_ON(atomic_read(&mm->mm_count) <= 0);
-	mmdrop(mm);
-}
-EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
-
 static void mmu_notifier_free_rcu(struct rcu_head *rcu)
 {
 	struct mmu_notifier *mn = container_of(rcu, struct mmu_notifier, rcu);
-- 
cgit v1.2.3


From 6ee41e5420d0afa8cddf09aa7384dabe570f8dc7 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 15 Aug 2019 12:00:43 +0300
Subject: crypto: des/3des_ede - add new helpers to verify keys

The recently added helper routine to perform key strength validation
of triple DES keys is slightly inadequate, since it comes in two versions,
neither of which are highly useful for anything other than skciphers (and
many drivers still use the older blkcipher interfaces).

So let's add a new helper and, considering that this is a helper function
that is only intended to be used by crypto code itself, put it in a new
des.h header under crypto/internal.

While at it, implement a similar helper for single DES, so that we can
start replacing the pattern of calling des_ekey() into a temp buffer
that occurs in many drivers in drivers/crypto.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/des_generic.c          |  13 ----
 include/crypto/internal/des.h | 141 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 141 insertions(+), 13 deletions(-)
 create mode 100644 include/crypto/internal/des.h

(limited to 'include')

diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index dc085514408a..c4d8ecda4ddf 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -841,19 +841,6 @@ static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	d[1] = cpu_to_le32(L);
 }
 
-/*
- * RFC2451:
- *
- *   For DES-EDE3, there is no known need to reject weak or
- *   complementation keys.  Any weakness is obviated by the use of
- *   multiple keys.
- *
- *   However, if the first two or last two independent 64-bit keys are
- *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
- *   same as DES.  Implementers MUST reject keys that exhibit this
- *   property.
- *
- */
 int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
 		      unsigned int keylen)
 {
diff --git a/include/crypto/internal/des.h b/include/crypto/internal/des.h
new file mode 100644
index 000000000000..f5d2e696522e
--- /dev/null
+++ b/include/crypto/internal/des.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DES & Triple DES EDE key verification helpers
+ */
+
+#ifndef __CRYPTO_INTERNAL_DES_H
+#define __CRYPTO_INTERNAL_DES_H
+
+#include <linux/crypto.h>
+#include <linux/fips.h>
+#include <crypto/des.h>
+#include <crypto/aead.h>
+#include <crypto/skcipher.h>
+
+/**
+ * crypto_des_verify_key - Check whether a DES key is weak
+ * @tfm: the crypto algo
+ * @key: the key buffer
+ *
+ * Returns -EINVAL if the key is weak and the crypto TFM does not permit weak
+ * keys. Otherwise, 0 is returned.
+ *
+ * It is the job of the caller to ensure that the size of the key equals
+ * DES_KEY_SIZE.
+ */
+static inline int crypto_des_verify_key(struct crypto_tfm *tfm, const u8 *key)
+{
+	u32 tmp[DES_EXPKEY_WORDS];
+	int err = 0;
+
+	if (!(crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS))
+		return 0;
+
+	if (!des_ekey(tmp, key)) {
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+		err = -EINVAL;
+	}
+
+	memzero_explicit(tmp, sizeof(tmp));
+	return err;
+}
+
+/*
+ * RFC2451:
+ *
+ *   For DES-EDE3, there is no known need to reject weak or
+ *   complementation keys.  Any weakness is obviated by the use of
+ *   multiple keys.
+ *
+ *   However, if the first two or last two independent 64-bit keys are
+ *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ *   same as DES.  Implementers MUST reject keys that exhibit this
+ *   property.
+ *
+ */
+
+/**
+ * crypto_des3_ede_verify_key - Check whether a DES3-EDE key is weak
+ * @tfm: the crypto algo
+ * @key: the key buffer
+ *
+ * Returns -EINVAL if the key is weak and the crypto TFM does not permit weak
+ * keys or when running in FIPS mode. Otherwise, 0 is returned. Note that some
+ * keys are rejected in FIPS mode even if weak keys are permitted by the TFM
+ * flags.
+ *
+ * It is the job of the caller to ensure that the size of the key equals
+ * DES3_EDE_KEY_SIZE.
+ */
+static inline int crypto_des3_ede_verify_key(struct crypto_tfm *tfm,
+					     const u8 *key)
+{
+	int err = -EINVAL;
+	u32 K[6];
+
+	memcpy(K, key, DES3_EDE_KEY_SIZE);
+
+	if ((!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+	     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+	    (fips_enabled || (crypto_tfm_get_flags(tfm) &
+		              CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)))
+		goto bad;
+
+	if ((!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
+		goto bad;
+
+	err = 0;
+out:
+	memzero_explicit(K, DES3_EDE_KEY_SIZE);
+	return err;
+
+bad:
+	crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	goto out;
+}
+
+static inline int verify_skcipher_des_key(struct crypto_skcipher *tfm,
+					  const u8 *key)
+{
+	return crypto_des_verify_key(crypto_skcipher_tfm(tfm), key);
+}
+
+static inline int verify_skcipher_des3_key(struct crypto_skcipher *tfm,
+					   const u8 *key)
+{
+	return crypto_des3_ede_verify_key(crypto_skcipher_tfm(tfm), key);
+}
+
+static inline int verify_ablkcipher_des_key(struct crypto_ablkcipher *tfm,
+					    const u8 *key)
+{
+	return crypto_des_verify_key(crypto_ablkcipher_tfm(tfm), key);
+}
+
+static inline int verify_ablkcipher_des3_key(struct crypto_ablkcipher *tfm,
+					     const u8 *key)
+{
+	return crypto_des3_ede_verify_key(crypto_ablkcipher_tfm(tfm), key);
+}
+
+static inline int verify_aead_des_key(struct crypto_aead *tfm, const u8 *key,
+				      int keylen)
+{
+	if (keylen != DES_KEY_SIZE) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return crypto_des_verify_key(crypto_aead_tfm(tfm), key);
+}
+
+static inline int verify_aead_des3_key(struct crypto_aead *tfm, const u8 *key,
+				       int keylen)
+{
+	if (keylen != DES3_EDE_KEY_SIZE) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return crypto_des3_ede_verify_key(crypto_aead_tfm(tfm), key);
+}
+
+#endif /* __CRYPTO_INTERNAL_DES_H */
-- 
cgit v1.2.3


From 6b5c4818621fb0d3eda1d4059634b38e09bd7243 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 15 Aug 2019 12:01:08 +0300
Subject: crypto: des - remove unused function

Remove the old DES3 verification functions that are no longer used.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/des.h | 41 -----------------------------------------
 1 file changed, 41 deletions(-)

(limited to 'include')

diff --git a/include/crypto/des.h b/include/crypto/des.h
index 72c7c8e5a5a7..31b04ba835b1 100644
--- a/include/crypto/des.h
+++ b/include/crypto/des.h
@@ -19,47 +19,6 @@
 #define DES3_EDE_EXPKEY_WORDS	(3 * DES_EXPKEY_WORDS)
 #define DES3_EDE_BLOCK_SIZE	DES_BLOCK_SIZE
 
-static inline int __des3_verify_key(u32 *flags, const u8 *key)
-{
-	int err = -EINVAL;
-	u32 K[6];
-
-	memcpy(K, key, DES3_EDE_KEY_SIZE);
-
-	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
-		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-		     (fips_enabled ||
-		      (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)))
-		goto bad;
-
-	if (unlikely(!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
-		goto bad;
-
-	err = 0;
-
-out:
-	memzero_explicit(K, DES3_EDE_KEY_SIZE);
-
-	return err;
-
-bad:
-	*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-	goto out;
-}
-
-static inline int des3_verify_key(struct crypto_skcipher *tfm, const u8 *key)
-{
-	u32 flags;
-	int err;
-
-	flags = crypto_skcipher_get_flags(tfm);
-	err = __des3_verify_key(&flags, key);
-	crypto_skcipher_set_flags(tfm, flags);
-	return err;
-}
-
-extern unsigned long des_ekey(u32 *pe, const u8 *k);
-
 extern int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
 			     unsigned int keylen);
 
-- 
cgit v1.2.3


From 04007b0e6cbbab5836ac891626e91edf10d46341 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 15 Aug 2019 12:01:09 +0300
Subject: crypto: des - split off DES library from generic DES cipher driver

Another one for the cipher museum: split off DES core processing into
a separate module so other drivers (mostly for crypto accelerators)
can reuse the code without pulling in the generic DES cipher itself.
This will also permit the cipher interface to be made private to the
crypto API itself once we move the only user in the kernel (CIFS) to
this library interface.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/des3_ede_glue.c                |   2 +-
 crypto/Kconfig                                 |   8 +-
 crypto/des_generic.c                           | 917 ++-----------------------
 drivers/crypto/Kconfig                         |  28 +-
 drivers/crypto/caam/Kconfig                    |   2 +-
 drivers/crypto/cavium/nitrox/Kconfig           |   2 +-
 drivers/crypto/inside-secure/safexcel_cipher.c |   2 +-
 drivers/crypto/stm32/Kconfig                   |   2 +-
 drivers/crypto/ux500/Kconfig                   |   2 +-
 include/crypto/des.h                           |  43 +-
 include/crypto/internal/des.h                  |  69 +-
 lib/crypto/Makefile                            |   3 +
 lib/crypto/des.c                               | 902 ++++++++++++++++++++++++
 13 files changed, 1053 insertions(+), 929 deletions(-)
 create mode 100644 lib/crypto/des.c

(limited to 'include')

diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index ec608babc22b..f730a312ce35 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -11,7 +11,7 @@
  */
 
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 455a3354e291..42a17fe97703 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1306,9 +1306,13 @@ config CRYPTO_CAST6_AVX_X86_64
 	  This module provides the Cast6 cipher algorithm that processes
 	  eight blocks parallel using the AVX instruction set.
 
+config CRYPTO_LIB_DES
+	tristate
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_DES
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
@@ -1316,7 +1320,7 @@ config CRYPTO_DES_SPARC64
 	tristate "DES and Triple DES EDE cipher algorithms (SPARC64)"
 	depends on SPARC64
 	select CRYPTO_ALGAPI
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
 	  optimized using SPARC64 crypto opcodes.
@@ -1325,7 +1329,7 @@ config CRYPTO_DES3_EDE_X86_64
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
 
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index f15ae7660f1b..e021a321f584 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -13,832 +13,42 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/crypto.h>
-#include <linux/types.h>
 
-#include <crypto/des.h>
-
-#define ROL(x, r) ((x) = rol32((x), (r)))
-#define ROR(x, r) ((x) = ror32((x), (r)))
-
-struct des_ctx {
-	u32 expkey[DES_EXPKEY_WORDS];
-};
-
-struct des3_ede_ctx {
-	u32 expkey[DES3_EDE_EXPKEY_WORDS];
-};
-
-/* Lookup tables for key expansion */
-
-static const u8 pc1[256] = {
-	0x00, 0x00, 0x40, 0x04, 0x10, 0x10, 0x50, 0x14,
-	0x04, 0x40, 0x44, 0x44, 0x14, 0x50, 0x54, 0x54,
-	0x02, 0x02, 0x42, 0x06, 0x12, 0x12, 0x52, 0x16,
-	0x06, 0x42, 0x46, 0x46, 0x16, 0x52, 0x56, 0x56,
-	0x80, 0x08, 0xc0, 0x0c, 0x90, 0x18, 0xd0, 0x1c,
-	0x84, 0x48, 0xc4, 0x4c, 0x94, 0x58, 0xd4, 0x5c,
-	0x82, 0x0a, 0xc2, 0x0e, 0x92, 0x1a, 0xd2, 0x1e,
-	0x86, 0x4a, 0xc6, 0x4e, 0x96, 0x5a, 0xd6, 0x5e,
-	0x20, 0x20, 0x60, 0x24, 0x30, 0x30, 0x70, 0x34,
-	0x24, 0x60, 0x64, 0x64, 0x34, 0x70, 0x74, 0x74,
-	0x22, 0x22, 0x62, 0x26, 0x32, 0x32, 0x72, 0x36,
-	0x26, 0x62, 0x66, 0x66, 0x36, 0x72, 0x76, 0x76,
-	0xa0, 0x28, 0xe0, 0x2c, 0xb0, 0x38, 0xf0, 0x3c,
-	0xa4, 0x68, 0xe4, 0x6c, 0xb4, 0x78, 0xf4, 0x7c,
-	0xa2, 0x2a, 0xe2, 0x2e, 0xb2, 0x3a, 0xf2, 0x3e,
-	0xa6, 0x6a, 0xe6, 0x6e, 0xb6, 0x7a, 0xf6, 0x7e,
-	0x08, 0x80, 0x48, 0x84, 0x18, 0x90, 0x58, 0x94,
-	0x0c, 0xc0, 0x4c, 0xc4, 0x1c, 0xd0, 0x5c, 0xd4,
-	0x0a, 0x82, 0x4a, 0x86, 0x1a, 0x92, 0x5a, 0x96,
-	0x0e, 0xc2, 0x4e, 0xc6, 0x1e, 0xd2, 0x5e, 0xd6,
-	0x88, 0x88, 0xc8, 0x8c, 0x98, 0x98, 0xd8, 0x9c,
-	0x8c, 0xc8, 0xcc, 0xcc, 0x9c, 0xd8, 0xdc, 0xdc,
-	0x8a, 0x8a, 0xca, 0x8e, 0x9a, 0x9a, 0xda, 0x9e,
-	0x8e, 0xca, 0xce, 0xce, 0x9e, 0xda, 0xde, 0xde,
-	0x28, 0xa0, 0x68, 0xa4, 0x38, 0xb0, 0x78, 0xb4,
-	0x2c, 0xe0, 0x6c, 0xe4, 0x3c, 0xf0, 0x7c, 0xf4,
-	0x2a, 0xa2, 0x6a, 0xa6, 0x3a, 0xb2, 0x7a, 0xb6,
-	0x2e, 0xe2, 0x6e, 0xe6, 0x3e, 0xf2, 0x7e, 0xf6,
-	0xa8, 0xa8, 0xe8, 0xac, 0xb8, 0xb8, 0xf8, 0xbc,
-	0xac, 0xe8, 0xec, 0xec, 0xbc, 0xf8, 0xfc, 0xfc,
-	0xaa, 0xaa, 0xea, 0xae, 0xba, 0xba, 0xfa, 0xbe,
-	0xae, 0xea, 0xee, 0xee, 0xbe, 0xfa, 0xfe, 0xfe
-};
-
-static const u8 rs[256] = {
-	0x00, 0x00, 0x80, 0x80, 0x02, 0x02, 0x82, 0x82,
-	0x04, 0x04, 0x84, 0x84, 0x06, 0x06, 0x86, 0x86,
-	0x08, 0x08, 0x88, 0x88, 0x0a, 0x0a, 0x8a, 0x8a,
-	0x0c, 0x0c, 0x8c, 0x8c, 0x0e, 0x0e, 0x8e, 0x8e,
-	0x10, 0x10, 0x90, 0x90, 0x12, 0x12, 0x92, 0x92,
-	0x14, 0x14, 0x94, 0x94, 0x16, 0x16, 0x96, 0x96,
-	0x18, 0x18, 0x98, 0x98, 0x1a, 0x1a, 0x9a, 0x9a,
-	0x1c, 0x1c, 0x9c, 0x9c, 0x1e, 0x1e, 0x9e, 0x9e,
-	0x20, 0x20, 0xa0, 0xa0, 0x22, 0x22, 0xa2, 0xa2,
-	0x24, 0x24, 0xa4, 0xa4, 0x26, 0x26, 0xa6, 0xa6,
-	0x28, 0x28, 0xa8, 0xa8, 0x2a, 0x2a, 0xaa, 0xaa,
-	0x2c, 0x2c, 0xac, 0xac, 0x2e, 0x2e, 0xae, 0xae,
-	0x30, 0x30, 0xb0, 0xb0, 0x32, 0x32, 0xb2, 0xb2,
-	0x34, 0x34, 0xb4, 0xb4, 0x36, 0x36, 0xb6, 0xb6,
-	0x38, 0x38, 0xb8, 0xb8, 0x3a, 0x3a, 0xba, 0xba,
-	0x3c, 0x3c, 0xbc, 0xbc, 0x3e, 0x3e, 0xbe, 0xbe,
-	0x40, 0x40, 0xc0, 0xc0, 0x42, 0x42, 0xc2, 0xc2,
-	0x44, 0x44, 0xc4, 0xc4, 0x46, 0x46, 0xc6, 0xc6,
-	0x48, 0x48, 0xc8, 0xc8, 0x4a, 0x4a, 0xca, 0xca,
-	0x4c, 0x4c, 0xcc, 0xcc, 0x4e, 0x4e, 0xce, 0xce,
-	0x50, 0x50, 0xd0, 0xd0, 0x52, 0x52, 0xd2, 0xd2,
-	0x54, 0x54, 0xd4, 0xd4, 0x56, 0x56, 0xd6, 0xd6,
-	0x58, 0x58, 0xd8, 0xd8, 0x5a, 0x5a, 0xda, 0xda,
-	0x5c, 0x5c, 0xdc, 0xdc, 0x5e, 0x5e, 0xde, 0xde,
-	0x60, 0x60, 0xe0, 0xe0, 0x62, 0x62, 0xe2, 0xe2,
-	0x64, 0x64, 0xe4, 0xe4, 0x66, 0x66, 0xe6, 0xe6,
-	0x68, 0x68, 0xe8, 0xe8, 0x6a, 0x6a, 0xea, 0xea,
-	0x6c, 0x6c, 0xec, 0xec, 0x6e, 0x6e, 0xee, 0xee,
-	0x70, 0x70, 0xf0, 0xf0, 0x72, 0x72, 0xf2, 0xf2,
-	0x74, 0x74, 0xf4, 0xf4, 0x76, 0x76, 0xf6, 0xf6,
-	0x78, 0x78, 0xf8, 0xf8, 0x7a, 0x7a, 0xfa, 0xfa,
-	0x7c, 0x7c, 0xfc, 0xfc, 0x7e, 0x7e, 0xfe, 0xfe
-};
-
-static const u32 pc2[1024] = {
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00040000, 0x00000000, 0x04000000, 0x00100000,
-	0x00400000, 0x00000008, 0x00000800, 0x40000000,
-	0x00440000, 0x00000008, 0x04000800, 0x40100000,
-	0x00000400, 0x00000020, 0x08000000, 0x00000100,
-	0x00040400, 0x00000020, 0x0c000000, 0x00100100,
-	0x00400400, 0x00000028, 0x08000800, 0x40000100,
-	0x00440400, 0x00000028, 0x0c000800, 0x40100100,
-	0x80000000, 0x00000010, 0x00000000, 0x00800000,
-	0x80040000, 0x00000010, 0x04000000, 0x00900000,
-	0x80400000, 0x00000018, 0x00000800, 0x40800000,
-	0x80440000, 0x00000018, 0x04000800, 0x40900000,
-	0x80000400, 0x00000030, 0x08000000, 0x00800100,
-	0x80040400, 0x00000030, 0x0c000000, 0x00900100,
-	0x80400400, 0x00000038, 0x08000800, 0x40800100,
-	0x80440400, 0x00000038, 0x0c000800, 0x40900100,
-	0x10000000, 0x00000000, 0x00200000, 0x00001000,
-	0x10040000, 0x00000000, 0x04200000, 0x00101000,
-	0x10400000, 0x00000008, 0x00200800, 0x40001000,
-	0x10440000, 0x00000008, 0x04200800, 0x40101000,
-	0x10000400, 0x00000020, 0x08200000, 0x00001100,
-	0x10040400, 0x00000020, 0x0c200000, 0x00101100,
-	0x10400400, 0x00000028, 0x08200800, 0x40001100,
-	0x10440400, 0x00000028, 0x0c200800, 0x40101100,
-	0x90000000, 0x00000010, 0x00200000, 0x00801000,
-	0x90040000, 0x00000010, 0x04200000, 0x00901000,
-	0x90400000, 0x00000018, 0x00200800, 0x40801000,
-	0x90440000, 0x00000018, 0x04200800, 0x40901000,
-	0x90000400, 0x00000030, 0x08200000, 0x00801100,
-	0x90040400, 0x00000030, 0x0c200000, 0x00901100,
-	0x90400400, 0x00000038, 0x08200800, 0x40801100,
-	0x90440400, 0x00000038, 0x0c200800, 0x40901100,
-	0x00000200, 0x00080000, 0x00000000, 0x00000004,
-	0x00040200, 0x00080000, 0x04000000, 0x00100004,
-	0x00400200, 0x00080008, 0x00000800, 0x40000004,
-	0x00440200, 0x00080008, 0x04000800, 0x40100004,
-	0x00000600, 0x00080020, 0x08000000, 0x00000104,
-	0x00040600, 0x00080020, 0x0c000000, 0x00100104,
-	0x00400600, 0x00080028, 0x08000800, 0x40000104,
-	0x00440600, 0x00080028, 0x0c000800, 0x40100104,
-	0x80000200, 0x00080010, 0x00000000, 0x00800004,
-	0x80040200, 0x00080010, 0x04000000, 0x00900004,
-	0x80400200, 0x00080018, 0x00000800, 0x40800004,
-	0x80440200, 0x00080018, 0x04000800, 0x40900004,
-	0x80000600, 0x00080030, 0x08000000, 0x00800104,
-	0x80040600, 0x00080030, 0x0c000000, 0x00900104,
-	0x80400600, 0x00080038, 0x08000800, 0x40800104,
-	0x80440600, 0x00080038, 0x0c000800, 0x40900104,
-	0x10000200, 0x00080000, 0x00200000, 0x00001004,
-	0x10040200, 0x00080000, 0x04200000, 0x00101004,
-	0x10400200, 0x00080008, 0x00200800, 0x40001004,
-	0x10440200, 0x00080008, 0x04200800, 0x40101004,
-	0x10000600, 0x00080020, 0x08200000, 0x00001104,
-	0x10040600, 0x00080020, 0x0c200000, 0x00101104,
-	0x10400600, 0x00080028, 0x08200800, 0x40001104,
-	0x10440600, 0x00080028, 0x0c200800, 0x40101104,
-	0x90000200, 0x00080010, 0x00200000, 0x00801004,
-	0x90040200, 0x00080010, 0x04200000, 0x00901004,
-	0x90400200, 0x00080018, 0x00200800, 0x40801004,
-	0x90440200, 0x00080018, 0x04200800, 0x40901004,
-	0x90000600, 0x00080030, 0x08200000, 0x00801104,
-	0x90040600, 0x00080030, 0x0c200000, 0x00901104,
-	0x90400600, 0x00080038, 0x08200800, 0x40801104,
-	0x90440600, 0x00080038, 0x0c200800, 0x40901104,
-	0x00000002, 0x00002000, 0x20000000, 0x00000001,
-	0x00040002, 0x00002000, 0x24000000, 0x00100001,
-	0x00400002, 0x00002008, 0x20000800, 0x40000001,
-	0x00440002, 0x00002008, 0x24000800, 0x40100001,
-	0x00000402, 0x00002020, 0x28000000, 0x00000101,
-	0x00040402, 0x00002020, 0x2c000000, 0x00100101,
-	0x00400402, 0x00002028, 0x28000800, 0x40000101,
-	0x00440402, 0x00002028, 0x2c000800, 0x40100101,
-	0x80000002, 0x00002010, 0x20000000, 0x00800001,
-	0x80040002, 0x00002010, 0x24000000, 0x00900001,
-	0x80400002, 0x00002018, 0x20000800, 0x40800001,
-	0x80440002, 0x00002018, 0x24000800, 0x40900001,
-	0x80000402, 0x00002030, 0x28000000, 0x00800101,
-	0x80040402, 0x00002030, 0x2c000000, 0x00900101,
-	0x80400402, 0x00002038, 0x28000800, 0x40800101,
-	0x80440402, 0x00002038, 0x2c000800, 0x40900101,
-	0x10000002, 0x00002000, 0x20200000, 0x00001001,
-	0x10040002, 0x00002000, 0x24200000, 0x00101001,
-	0x10400002, 0x00002008, 0x20200800, 0x40001001,
-	0x10440002, 0x00002008, 0x24200800, 0x40101001,
-	0x10000402, 0x00002020, 0x28200000, 0x00001101,
-	0x10040402, 0x00002020, 0x2c200000, 0x00101101,
-	0x10400402, 0x00002028, 0x28200800, 0x40001101,
-	0x10440402, 0x00002028, 0x2c200800, 0x40101101,
-	0x90000002, 0x00002010, 0x20200000, 0x00801001,
-	0x90040002, 0x00002010, 0x24200000, 0x00901001,
-	0x90400002, 0x00002018, 0x20200800, 0x40801001,
-	0x90440002, 0x00002018, 0x24200800, 0x40901001,
-	0x90000402, 0x00002030, 0x28200000, 0x00801101,
-	0x90040402, 0x00002030, 0x2c200000, 0x00901101,
-	0x90400402, 0x00002038, 0x28200800, 0x40801101,
-	0x90440402, 0x00002038, 0x2c200800, 0x40901101,
-	0x00000202, 0x00082000, 0x20000000, 0x00000005,
-	0x00040202, 0x00082000, 0x24000000, 0x00100005,
-	0x00400202, 0x00082008, 0x20000800, 0x40000005,
-	0x00440202, 0x00082008, 0x24000800, 0x40100005,
-	0x00000602, 0x00082020, 0x28000000, 0x00000105,
-	0x00040602, 0x00082020, 0x2c000000, 0x00100105,
-	0x00400602, 0x00082028, 0x28000800, 0x40000105,
-	0x00440602, 0x00082028, 0x2c000800, 0x40100105,
-	0x80000202, 0x00082010, 0x20000000, 0x00800005,
-	0x80040202, 0x00082010, 0x24000000, 0x00900005,
-	0x80400202, 0x00082018, 0x20000800, 0x40800005,
-	0x80440202, 0x00082018, 0x24000800, 0x40900005,
-	0x80000602, 0x00082030, 0x28000000, 0x00800105,
-	0x80040602, 0x00082030, 0x2c000000, 0x00900105,
-	0x80400602, 0x00082038, 0x28000800, 0x40800105,
-	0x80440602, 0x00082038, 0x2c000800, 0x40900105,
-	0x10000202, 0x00082000, 0x20200000, 0x00001005,
-	0x10040202, 0x00082000, 0x24200000, 0x00101005,
-	0x10400202, 0x00082008, 0x20200800, 0x40001005,
-	0x10440202, 0x00082008, 0x24200800, 0x40101005,
-	0x10000602, 0x00082020, 0x28200000, 0x00001105,
-	0x10040602, 0x00082020, 0x2c200000, 0x00101105,
-	0x10400602, 0x00082028, 0x28200800, 0x40001105,
-	0x10440602, 0x00082028, 0x2c200800, 0x40101105,
-	0x90000202, 0x00082010, 0x20200000, 0x00801005,
-	0x90040202, 0x00082010, 0x24200000, 0x00901005,
-	0x90400202, 0x00082018, 0x20200800, 0x40801005,
-	0x90440202, 0x00082018, 0x24200800, 0x40901005,
-	0x90000602, 0x00082030, 0x28200000, 0x00801105,
-	0x90040602, 0x00082030, 0x2c200000, 0x00901105,
-	0x90400602, 0x00082038, 0x28200800, 0x40801105,
-	0x90440602, 0x00082038, 0x2c200800, 0x40901105,
-
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000008, 0x00080000, 0x10000000,
-	0x02000000, 0x00000000, 0x00000080, 0x00001000,
-	0x02000000, 0x00000008, 0x00080080, 0x10001000,
-	0x00004000, 0x00000000, 0x00000040, 0x00040000,
-	0x00004000, 0x00000008, 0x00080040, 0x10040000,
-	0x02004000, 0x00000000, 0x000000c0, 0x00041000,
-	0x02004000, 0x00000008, 0x000800c0, 0x10041000,
-	0x00020000, 0x00008000, 0x08000000, 0x00200000,
-	0x00020000, 0x00008008, 0x08080000, 0x10200000,
-	0x02020000, 0x00008000, 0x08000080, 0x00201000,
-	0x02020000, 0x00008008, 0x08080080, 0x10201000,
-	0x00024000, 0x00008000, 0x08000040, 0x00240000,
-	0x00024000, 0x00008008, 0x08080040, 0x10240000,
-	0x02024000, 0x00008000, 0x080000c0, 0x00241000,
-	0x02024000, 0x00008008, 0x080800c0, 0x10241000,
-	0x00000000, 0x01000000, 0x00002000, 0x00000020,
-	0x00000000, 0x01000008, 0x00082000, 0x10000020,
-	0x02000000, 0x01000000, 0x00002080, 0x00001020,
-	0x02000000, 0x01000008, 0x00082080, 0x10001020,
-	0x00004000, 0x01000000, 0x00002040, 0x00040020,
-	0x00004000, 0x01000008, 0x00082040, 0x10040020,
-	0x02004000, 0x01000000, 0x000020c0, 0x00041020,
-	0x02004000, 0x01000008, 0x000820c0, 0x10041020,
-	0x00020000, 0x01008000, 0x08002000, 0x00200020,
-	0x00020000, 0x01008008, 0x08082000, 0x10200020,
-	0x02020000, 0x01008000, 0x08002080, 0x00201020,
-	0x02020000, 0x01008008, 0x08082080, 0x10201020,
-	0x00024000, 0x01008000, 0x08002040, 0x00240020,
-	0x00024000, 0x01008008, 0x08082040, 0x10240020,
-	0x02024000, 0x01008000, 0x080020c0, 0x00241020,
-	0x02024000, 0x01008008, 0x080820c0, 0x10241020,
-	0x00000400, 0x04000000, 0x00100000, 0x00000004,
-	0x00000400, 0x04000008, 0x00180000, 0x10000004,
-	0x02000400, 0x04000000, 0x00100080, 0x00001004,
-	0x02000400, 0x04000008, 0x00180080, 0x10001004,
-	0x00004400, 0x04000000, 0x00100040, 0x00040004,
-	0x00004400, 0x04000008, 0x00180040, 0x10040004,
-	0x02004400, 0x04000000, 0x001000c0, 0x00041004,
-	0x02004400, 0x04000008, 0x001800c0, 0x10041004,
-	0x00020400, 0x04008000, 0x08100000, 0x00200004,
-	0x00020400, 0x04008008, 0x08180000, 0x10200004,
-	0x02020400, 0x04008000, 0x08100080, 0x00201004,
-	0x02020400, 0x04008008, 0x08180080, 0x10201004,
-	0x00024400, 0x04008000, 0x08100040, 0x00240004,
-	0x00024400, 0x04008008, 0x08180040, 0x10240004,
-	0x02024400, 0x04008000, 0x081000c0, 0x00241004,
-	0x02024400, 0x04008008, 0x081800c0, 0x10241004,
-	0x00000400, 0x05000000, 0x00102000, 0x00000024,
-	0x00000400, 0x05000008, 0x00182000, 0x10000024,
-	0x02000400, 0x05000000, 0x00102080, 0x00001024,
-	0x02000400, 0x05000008, 0x00182080, 0x10001024,
-	0x00004400, 0x05000000, 0x00102040, 0x00040024,
-	0x00004400, 0x05000008, 0x00182040, 0x10040024,
-	0x02004400, 0x05000000, 0x001020c0, 0x00041024,
-	0x02004400, 0x05000008, 0x001820c0, 0x10041024,
-	0x00020400, 0x05008000, 0x08102000, 0x00200024,
-	0x00020400, 0x05008008, 0x08182000, 0x10200024,
-	0x02020400, 0x05008000, 0x08102080, 0x00201024,
-	0x02020400, 0x05008008, 0x08182080, 0x10201024,
-	0x00024400, 0x05008000, 0x08102040, 0x00240024,
-	0x00024400, 0x05008008, 0x08182040, 0x10240024,
-	0x02024400, 0x05008000, 0x081020c0, 0x00241024,
-	0x02024400, 0x05008008, 0x081820c0, 0x10241024,
-	0x00000800, 0x00010000, 0x20000000, 0x00000010,
-	0x00000800, 0x00010008, 0x20080000, 0x10000010,
-	0x02000800, 0x00010000, 0x20000080, 0x00001010,
-	0x02000800, 0x00010008, 0x20080080, 0x10001010,
-	0x00004800, 0x00010000, 0x20000040, 0x00040010,
-	0x00004800, 0x00010008, 0x20080040, 0x10040010,
-	0x02004800, 0x00010000, 0x200000c0, 0x00041010,
-	0x02004800, 0x00010008, 0x200800c0, 0x10041010,
-	0x00020800, 0x00018000, 0x28000000, 0x00200010,
-	0x00020800, 0x00018008, 0x28080000, 0x10200010,
-	0x02020800, 0x00018000, 0x28000080, 0x00201010,
-	0x02020800, 0x00018008, 0x28080080, 0x10201010,
-	0x00024800, 0x00018000, 0x28000040, 0x00240010,
-	0x00024800, 0x00018008, 0x28080040, 0x10240010,
-	0x02024800, 0x00018000, 0x280000c0, 0x00241010,
-	0x02024800, 0x00018008, 0x280800c0, 0x10241010,
-	0x00000800, 0x01010000, 0x20002000, 0x00000030,
-	0x00000800, 0x01010008, 0x20082000, 0x10000030,
-	0x02000800, 0x01010000, 0x20002080, 0x00001030,
-	0x02000800, 0x01010008, 0x20082080, 0x10001030,
-	0x00004800, 0x01010000, 0x20002040, 0x00040030,
-	0x00004800, 0x01010008, 0x20082040, 0x10040030,
-	0x02004800, 0x01010000, 0x200020c0, 0x00041030,
-	0x02004800, 0x01010008, 0x200820c0, 0x10041030,
-	0x00020800, 0x01018000, 0x28002000, 0x00200030,
-	0x00020800, 0x01018008, 0x28082000, 0x10200030,
-	0x02020800, 0x01018000, 0x28002080, 0x00201030,
-	0x02020800, 0x01018008, 0x28082080, 0x10201030,
-	0x00024800, 0x01018000, 0x28002040, 0x00240030,
-	0x00024800, 0x01018008, 0x28082040, 0x10240030,
-	0x02024800, 0x01018000, 0x280020c0, 0x00241030,
-	0x02024800, 0x01018008, 0x280820c0, 0x10241030,
-	0x00000c00, 0x04010000, 0x20100000, 0x00000014,
-	0x00000c00, 0x04010008, 0x20180000, 0x10000014,
-	0x02000c00, 0x04010000, 0x20100080, 0x00001014,
-	0x02000c00, 0x04010008, 0x20180080, 0x10001014,
-	0x00004c00, 0x04010000, 0x20100040, 0x00040014,
-	0x00004c00, 0x04010008, 0x20180040, 0x10040014,
-	0x02004c00, 0x04010000, 0x201000c0, 0x00041014,
-	0x02004c00, 0x04010008, 0x201800c0, 0x10041014,
-	0x00020c00, 0x04018000, 0x28100000, 0x00200014,
-	0x00020c00, 0x04018008, 0x28180000, 0x10200014,
-	0x02020c00, 0x04018000, 0x28100080, 0x00201014,
-	0x02020c00, 0x04018008, 0x28180080, 0x10201014,
-	0x00024c00, 0x04018000, 0x28100040, 0x00240014,
-	0x00024c00, 0x04018008, 0x28180040, 0x10240014,
-	0x02024c00, 0x04018000, 0x281000c0, 0x00241014,
-	0x02024c00, 0x04018008, 0x281800c0, 0x10241014,
-	0x00000c00, 0x05010000, 0x20102000, 0x00000034,
-	0x00000c00, 0x05010008, 0x20182000, 0x10000034,
-	0x02000c00, 0x05010000, 0x20102080, 0x00001034,
-	0x02000c00, 0x05010008, 0x20182080, 0x10001034,
-	0x00004c00, 0x05010000, 0x20102040, 0x00040034,
-	0x00004c00, 0x05010008, 0x20182040, 0x10040034,
-	0x02004c00, 0x05010000, 0x201020c0, 0x00041034,
-	0x02004c00, 0x05010008, 0x201820c0, 0x10041034,
-	0x00020c00, 0x05018000, 0x28102000, 0x00200034,
-	0x00020c00, 0x05018008, 0x28182000, 0x10200034,
-	0x02020c00, 0x05018000, 0x28102080, 0x00201034,
-	0x02020c00, 0x05018008, 0x28182080, 0x10201034,
-	0x00024c00, 0x05018000, 0x28102040, 0x00240034,
-	0x00024c00, 0x05018008, 0x28182040, 0x10240034,
-	0x02024c00, 0x05018000, 0x281020c0, 0x00241034,
-	0x02024c00, 0x05018008, 0x281820c0, 0x10241034
-};
-
-/* S-box lookup tables */
-
-static const u32 S1[64] = {
-	0x01010400, 0x00000000, 0x00010000, 0x01010404,
-	0x01010004, 0x00010404, 0x00000004, 0x00010000,
-	0x00000400, 0x01010400, 0x01010404, 0x00000400,
-	0x01000404, 0x01010004, 0x01000000, 0x00000004,
-	0x00000404, 0x01000400, 0x01000400, 0x00010400,
-	0x00010400, 0x01010000, 0x01010000, 0x01000404,
-	0x00010004, 0x01000004, 0x01000004, 0x00010004,
-	0x00000000, 0x00000404, 0x00010404, 0x01000000,
-	0x00010000, 0x01010404, 0x00000004, 0x01010000,
-	0x01010400, 0x01000000, 0x01000000, 0x00000400,
-	0x01010004, 0x00010000, 0x00010400, 0x01000004,
-	0x00000400, 0x00000004, 0x01000404, 0x00010404,
-	0x01010404, 0x00010004, 0x01010000, 0x01000404,
-	0x01000004, 0x00000404, 0x00010404, 0x01010400,
-	0x00000404, 0x01000400, 0x01000400, 0x00000000,
-	0x00010004, 0x00010400, 0x00000000, 0x01010004
-};
-
-static const u32 S2[64] = {
-	0x80108020, 0x80008000, 0x00008000, 0x00108020,
-	0x00100000, 0x00000020, 0x80100020, 0x80008020,
-	0x80000020, 0x80108020, 0x80108000, 0x80000000,
-	0x80008000, 0x00100000, 0x00000020, 0x80100020,
-	0x00108000, 0x00100020, 0x80008020, 0x00000000,
-	0x80000000, 0x00008000, 0x00108020, 0x80100000,
-	0x00100020, 0x80000020, 0x00000000, 0x00108000,
-	0x00008020, 0x80108000, 0x80100000, 0x00008020,
-	0x00000000, 0x00108020, 0x80100020, 0x00100000,
-	0x80008020, 0x80100000, 0x80108000, 0x00008000,
-	0x80100000, 0x80008000, 0x00000020, 0x80108020,
-	0x00108020, 0x00000020, 0x00008000, 0x80000000,
-	0x00008020, 0x80108000, 0x00100000, 0x80000020,
-	0x00100020, 0x80008020, 0x80000020, 0x00100020,
-	0x00108000, 0x00000000, 0x80008000, 0x00008020,
-	0x80000000, 0x80100020, 0x80108020, 0x00108000
-};
-
-static const u32 S3[64] = {
-	0x00000208, 0x08020200, 0x00000000, 0x08020008,
-	0x08000200, 0x00000000, 0x00020208, 0x08000200,
-	0x00020008, 0x08000008, 0x08000008, 0x00020000,
-	0x08020208, 0x00020008, 0x08020000, 0x00000208,
-	0x08000000, 0x00000008, 0x08020200, 0x00000200,
-	0x00020200, 0x08020000, 0x08020008, 0x00020208,
-	0x08000208, 0x00020200, 0x00020000, 0x08000208,
-	0x00000008, 0x08020208, 0x00000200, 0x08000000,
-	0x08020200, 0x08000000, 0x00020008, 0x00000208,
-	0x00020000, 0x08020200, 0x08000200, 0x00000000,
-	0x00000200, 0x00020008, 0x08020208, 0x08000200,
-	0x08000008, 0x00000200, 0x00000000, 0x08020008,
-	0x08000208, 0x00020000, 0x08000000, 0x08020208,
-	0x00000008, 0x00020208, 0x00020200, 0x08000008,
-	0x08020000, 0x08000208, 0x00000208, 0x08020000,
-	0x00020208, 0x00000008, 0x08020008, 0x00020200
-};
-
-static const u32 S4[64] = {
-	0x00802001, 0x00002081, 0x00002081, 0x00000080,
-	0x00802080, 0x00800081, 0x00800001, 0x00002001,
-	0x00000000, 0x00802000, 0x00802000, 0x00802081,
-	0x00000081, 0x00000000, 0x00800080, 0x00800001,
-	0x00000001, 0x00002000, 0x00800000, 0x00802001,
-	0x00000080, 0x00800000, 0x00002001, 0x00002080,
-	0x00800081, 0x00000001, 0x00002080, 0x00800080,
-	0x00002000, 0x00802080, 0x00802081, 0x00000081,
-	0x00800080, 0x00800001, 0x00802000, 0x00802081,
-	0x00000081, 0x00000000, 0x00000000, 0x00802000,
-	0x00002080, 0x00800080, 0x00800081, 0x00000001,
-	0x00802001, 0x00002081, 0x00002081, 0x00000080,
-	0x00802081, 0x00000081, 0x00000001, 0x00002000,
-	0x00800001, 0x00002001, 0x00802080, 0x00800081,
-	0x00002001, 0x00002080, 0x00800000, 0x00802001,
-	0x00000080, 0x00800000, 0x00002000, 0x00802080
-};
-
-static const u32 S5[64] = {
-	0x00000100, 0x02080100, 0x02080000, 0x42000100,
-	0x00080000, 0x00000100, 0x40000000, 0x02080000,
-	0x40080100, 0x00080000, 0x02000100, 0x40080100,
-	0x42000100, 0x42080000, 0x00080100, 0x40000000,
-	0x02000000, 0x40080000, 0x40080000, 0x00000000,
-	0x40000100, 0x42080100, 0x42080100, 0x02000100,
-	0x42080000, 0x40000100, 0x00000000, 0x42000000,
-	0x02080100, 0x02000000, 0x42000000, 0x00080100,
-	0x00080000, 0x42000100, 0x00000100, 0x02000000,
-	0x40000000, 0x02080000, 0x42000100, 0x40080100,
-	0x02000100, 0x40000000, 0x42080000, 0x02080100,
-	0x40080100, 0x00000100, 0x02000000, 0x42080000,
-	0x42080100, 0x00080100, 0x42000000, 0x42080100,
-	0x02080000, 0x00000000, 0x40080000, 0x42000000,
-	0x00080100, 0x02000100, 0x40000100, 0x00080000,
-	0x00000000, 0x40080000, 0x02080100, 0x40000100
-};
-
-static const u32 S6[64] = {
-	0x20000010, 0x20400000, 0x00004000, 0x20404010,
-	0x20400000, 0x00000010, 0x20404010, 0x00400000,
-	0x20004000, 0x00404010, 0x00400000, 0x20000010,
-	0x00400010, 0x20004000, 0x20000000, 0x00004010,
-	0x00000000, 0x00400010, 0x20004010, 0x00004000,
-	0x00404000, 0x20004010, 0x00000010, 0x20400010,
-	0x20400010, 0x00000000, 0x00404010, 0x20404000,
-	0x00004010, 0x00404000, 0x20404000, 0x20000000,
-	0x20004000, 0x00000010, 0x20400010, 0x00404000,
-	0x20404010, 0x00400000, 0x00004010, 0x20000010,
-	0x00400000, 0x20004000, 0x20000000, 0x00004010,
-	0x20000010, 0x20404010, 0x00404000, 0x20400000,
-	0x00404010, 0x20404000, 0x00000000, 0x20400010,
-	0x00000010, 0x00004000, 0x20400000, 0x00404010,
-	0x00004000, 0x00400010, 0x20004010, 0x00000000,
-	0x20404000, 0x20000000, 0x00400010, 0x20004010
-};
-
-static const u32 S7[64] = {
-	0x00200000, 0x04200002, 0x04000802, 0x00000000,
-	0x00000800, 0x04000802, 0x00200802, 0x04200800,
-	0x04200802, 0x00200000, 0x00000000, 0x04000002,
-	0x00000002, 0x04000000, 0x04200002, 0x00000802,
-	0x04000800, 0x00200802, 0x00200002, 0x04000800,
-	0x04000002, 0x04200000, 0x04200800, 0x00200002,
-	0x04200000, 0x00000800, 0x00000802, 0x04200802,
-	0x00200800, 0x00000002, 0x04000000, 0x00200800,
-	0x04000000, 0x00200800, 0x00200000, 0x04000802,
-	0x04000802, 0x04200002, 0x04200002, 0x00000002,
-	0x00200002, 0x04000000, 0x04000800, 0x00200000,
-	0x04200800, 0x00000802, 0x00200802, 0x04200800,
-	0x00000802, 0x04000002, 0x04200802, 0x04200000,
-	0x00200800, 0x00000000, 0x00000002, 0x04200802,
-	0x00000000, 0x00200802, 0x04200000, 0x00000800,
-	0x04000002, 0x04000800, 0x00000800, 0x00200002
-};
-
-static const u32 S8[64] = {
-	0x10001040, 0x00001000, 0x00040000, 0x10041040,
-	0x10000000, 0x10001040, 0x00000040, 0x10000000,
-	0x00040040, 0x10040000, 0x10041040, 0x00041000,
-	0x10041000, 0x00041040, 0x00001000, 0x00000040,
-	0x10040000, 0x10000040, 0x10001000, 0x00001040,
-	0x00041000, 0x00040040, 0x10040040, 0x10041000,
-	0x00001040, 0x00000000, 0x00000000, 0x10040040,
-	0x10000040, 0x10001000, 0x00041040, 0x00040000,
-	0x00041040, 0x00040000, 0x10041000, 0x00001000,
-	0x00000040, 0x10040040, 0x00001000, 0x00041040,
-	0x10001000, 0x00000040, 0x10000040, 0x10040000,
-	0x10040040, 0x10000000, 0x00040000, 0x10001040,
-	0x00000000, 0x10041040, 0x00040040, 0x10000040,
-	0x10040000, 0x10001000, 0x10001040, 0x00000000,
-	0x10041040, 0x00041000, 0x00041000, 0x00001040,
-	0x00001040, 0x00040040, 0x10000000, 0x10041000
-};
-
-/* Encryption components: IP, FP, and round function */
-
-#define IP(L, R, T)		\
-	ROL(R, 4);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xf0f0f0f0;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 12);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xffff0000;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 14);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xcccccccc;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 6);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xff00ff00;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 7);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xaaaaaaaa;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(L, 1);
-
-#define FP(L, R, T)		\
-	ROR(L, 1);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xaaaaaaaa;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 7);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xff00ff00;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 6);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xcccccccc;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 14);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xffff0000;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 12);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xf0f0f0f0;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 4);
-
-#define ROUND(L, R, A, B, K, d)					\
-	B = K[0];			A = K[1];	K += d;	\
-	B ^= R;				A ^= R;			\
-	B &= 0x3f3f3f3f;		ROR(A, 4);		\
-	L ^= S8[0xff & B];		A &= 0x3f3f3f3f;	\
-	L ^= S6[0xff & (B >> 8)];	B >>= 16;		\
-	L ^= S7[0xff & A];					\
-	L ^= S5[0xff & (A >> 8)];	A >>= 16;		\
-	L ^= S4[0xff & B];					\
-	L ^= S2[0xff & (B >> 8)];				\
-	L ^= S3[0xff & A];					\
-	L ^= S1[0xff & (A >> 8)];
-
-/*
- * PC2 lookup tables are organized as 2 consecutive sets of 4 interleaved
- * tables of 128 elements.  One set is for C_i and the other for D_i, while
- * the 4 interleaved tables correspond to four 7-bit subsets of C_i or D_i.
- *
- * After PC1 each of the variables a,b,c,d contains a 7 bit subset of C_i
- * or D_i in bits 7-1 (bit 0 being the least significant).
- */
-
-#define T1(x) pt[2 * (x) + 0]
-#define T2(x) pt[2 * (x) + 1]
-#define T3(x) pt[2 * (x) + 2]
-#define T4(x) pt[2 * (x) + 3]
-
-#define DES_PC2(a, b, c, d) (T4(d) | T3(c) | T2(b) | T1(a))
-
-/*
- * Encryption key expansion
- *
- * RFC2451: Weak key checks SHOULD be performed.
- *
- * FIPS 74:
- *
- *   Keys having duals are keys which produce all zeros, all ones, or
- *   alternating zero-one patterns in the C and D registers after Permuted
- *   Choice 1 has operated on the key.
- *
- */
-unsigned long des_ekey(u32 *pe, const u8 *k)
-{
-	/* K&R: long is at least 32 bits */
-	unsigned long a, b, c, d, w;
-	const u32 *pt = pc2;
-
-	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
-	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
-	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
-	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
-
-	pe[15 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[14 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[13 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[12 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[11 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[10 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 9 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 8 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 7 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 6 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 5 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 4 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 3 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 2 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 1 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[ 0 * 2 + 0] = DES_PC2(b, c, d, a);
-
-	/* Check if first half is weak */
-	w  = (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
-
-	/* Skip to next table set */
-	pt += 512;
-
-	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
-	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
-	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
-	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
-
-	/* Check if second half is weak */
-	w |= (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
-
-	pe[15 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[14 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[13 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[12 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[11 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[10 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 9 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 8 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 7 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 6 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 5 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 4 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 3 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 2 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 1 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[ 0 * 2 + 1] = DES_PC2(b, c, d, a);
-
-	/* Fixup: 2413 5768 -> 1357 2468 */
-	for (d = 0; d < 16; ++d) {
-		a = pe[2 * d];
-		b = pe[2 * d + 1];
-		c = a ^ b;
-		c &= 0xffff0000;
-		a ^= c;
-		b ^= c;
-		ROL(b, 18);
-		pe[2 * d] = a;
-		pe[2 * d + 1] = b;
-	}
-
-	/* Zero if weak key */
-	return w;
-}
-EXPORT_SYMBOL_GPL(des_ekey);
-
-/*
- * Decryption key expansion
- *
- * No weak key checking is performed, as this is only used by triple DES
- *
- */
-static void dkey(u32 *pe, const u8 *k)
-{
-	/* K&R: long is at least 32 bits */
-	unsigned long a, b, c, d;
-	const u32 *pt = pc2;
-
-	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
-	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
-	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
-	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
-
-	pe[ 0 * 2] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[ 1 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 2 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 3 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 4 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 5 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 6 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 7 * 2] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 8 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 9 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[10 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[11 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[12 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[13 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[14 * 2] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[15 * 2] = DES_PC2(b, c, d, a);
-
-	/* Skip to next table set */
-	pt += 512;
-
-	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
-	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
-	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
-	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
-
-	pe[ 0 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[ 1 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 2 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 3 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 4 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 5 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 6 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 7 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 8 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 9 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[10 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[11 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[12 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[13 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[14 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[15 * 2 + 1] = DES_PC2(b, c, d, a);
-
-	/* Fixup: 2413 5768 -> 1357 2468 */
-	for (d = 0; d < 16; ++d) {
-		a = pe[2 * d];
-		b = pe[2 * d + 1];
-		c = a ^ b;
-		c &= 0xffff0000;
-		a ^= c;
-		b ^= c;
-		ROL(b, 18);
-		pe[2 * d] = a;
-		pe[2 * d + 1] = b;
-	}
-}
+#include <crypto/internal/des.h>
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		      unsigned int keylen)
 {
 	struct des_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
-
-	/* Expand to tmp */
-	ret = des_ekey(tmp, key);
+	int err;
 
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
+	err = des_expand_key(dctx, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
 	}
 
-	/* Copy to output */
-	memcpy(dctx->expkey, tmp, sizeof(dctx->expkey));
-
-	return 0;
+	if (err) {
+		memset(dctx, 0, sizeof(*dctx));
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	}
+	return err;
 }
 
-static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *K = ctx->expkey;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	FP(R, L, A);
+	const struct des_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des_encrypt(dctx, dst, src);
 }
 
-static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
-	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *K = ctx->expkey + DES_EXPKEY_WORDS - 2;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
+	const struct des_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des_decrypt(dctx, dst, src);
 }
 
 int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
@@ -858,76 +68,37 @@ static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen)
 {
 	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *expkey = dctx->expkey;
 	int err;
 
-	err = crypto_des3_ede_verify_key(tfm, key);
-	if (err)
-		return err;
+	err = des3_ede_expand_key(dctx, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
 
-	return __des3_ede_setkey(expkey, flags, key, keylen);
+	if (err) {
+		memset(dctx, 0, sizeof(*dctx));
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	}
+	return err;
 }
 
-static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst,
+				    const u8 *src)
 {
-	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u32 *K = dctx->expkey;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
+	const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(R, L, A, B, K, 2);
-		ROUND(L, R, A, B, K, 2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des3_ede_encrypt(dctx, dst, src);
 }
 
-static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst,
+				    const u8 *src)
 {
-	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(R, L, A, B, K, -2);
-		ROUND(L, R, A, B, K, -2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	FP(R, L, A);
+	const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des3_ede_decrypt(dctx, dst, src);
 }
 
 static struct crypto_alg des_algs[2] = { {
@@ -938,13 +109,12 @@ static struct crypto_alg des_algs[2] = { {
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_alignmask		=	3,
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES_KEY_SIZE,
 	.cia_max_keysize	=	DES_KEY_SIZE,
 	.cia_setkey		=	des_setkey,
-	.cia_encrypt		=	des_encrypt,
-	.cia_decrypt		=	des_decrypt } }
+	.cia_encrypt		=	crypto_des_encrypt,
+	.cia_decrypt		=	crypto_des_decrypt } }
 }, {
 	.cra_name		=	"des3_ede",
 	.cra_driver_name	=	"des3_ede-generic",
@@ -953,13 +123,12 @@ static struct crypto_alg des_algs[2] = { {
 	.cra_blocksize		=	DES3_EDE_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des3_ede_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_alignmask		=	3,
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_max_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_setkey		=	des3_ede_setkey,
-	.cia_encrypt		=	des3_ede_encrypt,
-	.cia_decrypt		=	des3_ede_decrypt } }
+	.cia_encrypt		=	crypto_des3_ede_encrypt,
+	.cia_decrypt		=	crypto_des3_ede_decrypt } }
 } };
 
 static int __init des_generic_mod_init(void)
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index b8c50871f11b..5cd6e3d12bac 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -150,7 +150,7 @@ config CRYPTO_DES_S390
 	depends on S390
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
@@ -215,7 +215,7 @@ config CRYPTO_DEV_MARVELL_CESA
 	tristate "Marvell's Cryptographic Engine driver"
 	depends on PLAT_ORION || ARCH_MVEBU
 	select CRYPTO_LIB_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_HASH
 	select SRAM
@@ -227,7 +227,7 @@ config CRYPTO_DEV_MARVELL_CESA
 
 config CRYPTO_DEV_NIAGARA2
        tristate "Niagara2 Stream Processing Unit driver"
-       select CRYPTO_DES
+       select CRYPTO_LIB_DES
        select CRYPTO_BLKCIPHER
        select CRYPTO_HASH
        select CRYPTO_MD5
@@ -244,7 +244,7 @@ config CRYPTO_DEV_NIAGARA2
 
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG
 	depends on PCI
@@ -300,7 +300,7 @@ config CRYPTO_DEV_TALITOS2
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
@@ -366,7 +366,7 @@ config CRYPTO_DEV_OMAP_AES
 config CRYPTO_DEV_OMAP_DES
 	tristate "Support for OMAP DES/3DES hw engine"
 	depends on ARCH_OMAP2PLUS
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_ENGINE
 	help
@@ -384,7 +384,7 @@ config CRYPTO_DEV_PICOXCELL
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_SEQIV
@@ -497,7 +497,7 @@ config CRYPTO_DEV_ATMEL_AES
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
 	depends on ARCH_AT91 || COMPILE_TEST
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	help
 	  Some Atmel processors have DES/TDES hw accelerator.
@@ -595,7 +595,7 @@ config CRYPTO_DEV_QCE
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on HAS_IOMEM
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_ECB
 	select CRYPTO_CBC
 	select CRYPTO_XTS
@@ -643,7 +643,7 @@ config CRYPTO_DEV_SUN4I_SS
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	help
 	  Some Allwinner SoC have a crypto accelerator named
@@ -666,7 +666,7 @@ config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -703,7 +703,7 @@ config CRYPTO_DEV_BCM_SPU
 	depends on MAILBOX
 	default m
 	select CRYPTO_AUTHENC
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -722,7 +722,7 @@ config CRYPTO_DEV_SAFEXCEL
 	select CRYPTO_LIB_AES
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_HASH
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
@@ -760,7 +760,7 @@ config CRYPTO_DEV_CCREE
 	default n
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_SHA1
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index e4fdf545ac90..137ed3df0c74 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -98,7 +98,7 @@ config CRYPTO_DEV_FSL_CAAM_CRYPTO_API
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  Selecting this will offload crypto for users of the
 	  scatterlist crypto API (such as the linux native IPSec
diff --git a/drivers/crypto/cavium/nitrox/Kconfig b/drivers/crypto/cavium/nitrox/Kconfig
index dab162af41b8..7b1e751bb9cd 100644
--- a/drivers/crypto/cavium/nitrox/Kconfig
+++ b/drivers/crypto/cavium/nitrox/Kconfig
@@ -6,7 +6,7 @@ config CRYPTO_DEV_NITROX
 	tristate
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select FW_LOADER
 
 config CRYPTO_DEV_NITROX_CNN55XX
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index 16c4d5460334..b68b6a7c0a32 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -13,7 +13,7 @@
 #include <crypto/aes.h>
 #include <crypto/authenc.h>
 #include <crypto/ctr.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
 #include <crypto/internal/aead.h>
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index d6576280fc9b..1aba9372cd23 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -25,7 +25,7 @@ config CRYPTO_DEV_STM32_CRYP
 	depends on ARCH_STM32
 	select CRYPTO_HASH
 	select CRYPTO_ENGINE
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
           This enables support for the CRYP (AES/DES/TDES) hw accelerator which
 	  can be found on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig
index 349d34eaac13..b1c6f739f77b 100644
--- a/drivers/crypto/ux500/Kconfig
+++ b/drivers/crypto/ux500/Kconfig
@@ -9,7 +9,7 @@ config CRYPTO_DEV_UX500_CRYP
 	depends on CRYPTO_DEV_UX500
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
         This selects the crypto driver for the UX500_CRYP hardware. It supports
         AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes.
diff --git a/include/crypto/des.h b/include/crypto/des.h
index 31b04ba835b1..2c864a4e6707 100644
--- a/include/crypto/des.h
+++ b/include/crypto/des.h
@@ -6,10 +6,7 @@
 #ifndef __CRYPTO_DES_H
 #define __CRYPTO_DES_H
 
-#include <crypto/skcipher.h>
-#include <linux/compiler.h>
-#include <linux/fips.h>
-#include <linux/string.h>
+#include <linux/types.h>
 
 #define DES_KEY_SIZE		8
 #define DES_EXPKEY_WORDS	32
@@ -19,6 +16,44 @@
 #define DES3_EDE_EXPKEY_WORDS	(3 * DES_EXPKEY_WORDS)
 #define DES3_EDE_BLOCK_SIZE	DES_BLOCK_SIZE
 
+struct des_ctx {
+	u32 expkey[DES_EXPKEY_WORDS];
+};
+
+struct des3_ede_ctx {
+	u32 expkey[DES3_EDE_EXPKEY_WORDS];
+};
+
+void des_encrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src);
+void des_decrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src);
+
+void des3_ede_encrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src);
+void des3_ede_decrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src);
+
+/**
+ * des_expand_key - Expand a DES input key into a key schedule
+ * @ctx: the key schedule
+ * @key: buffer containing the input key
+ * @len: size of the buffer contents
+ *
+ * Returns 0 on success, -EINVAL if the input key is rejected and -ENOKEY if
+ * the key is accepted but has been found to be weak.
+ */
+int des_expand_key(struct des_ctx *ctx, const u8 *key, unsigned int keylen);
+
+/**
+ * des3_ede_expand_key - Expand a triple DES input key into a key schedule
+ * @ctx: the key schedule
+ * @key: buffer containing the input key
+ * @len: size of the buffer contents
+ *
+ * Returns 0 on success, -EINVAL if the input key is rejected and -ENOKEY if
+ * the key is accepted but has been found to be weak. Note that weak keys will
+ * be rejected (and -EINVAL will be returned) when running in FIPS mode.
+ */
+int des3_ede_expand_key(struct des3_ede_ctx *ctx, const u8 *key,
+			unsigned int keylen);
+
 extern int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
 			     unsigned int keylen);
 
diff --git a/include/crypto/internal/des.h b/include/crypto/internal/des.h
index f5d2e696522e..81ea1a425e9c 100644
--- a/include/crypto/internal/des.h
+++ b/include/crypto/internal/des.h
@@ -25,18 +25,21 @@
  */
 static inline int crypto_des_verify_key(struct crypto_tfm *tfm, const u8 *key)
 {
-	u32 tmp[DES_EXPKEY_WORDS];
-	int err = 0;
-
-	if (!(crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS))
-		return 0;
+	struct des_ctx tmp;
+	int err;
+
+	err = des_expand_key(&tmp, key, DES_KEY_SIZE);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
 
-	if (!des_ekey(tmp, key)) {
+	if (err)
 		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-		err = -EINVAL;
-	}
 
-	memzero_explicit(tmp, sizeof(tmp));
+	memzero_explicit(&tmp, sizeof(tmp));
 	return err;
 }
 
@@ -53,6 +56,28 @@ static inline int crypto_des_verify_key(struct crypto_tfm *tfm, const u8 *key)
  *   property.
  *
  */
+static inline int des3_ede_verify_key(const u8 *key, unsigned int key_len,
+				      bool check_weak)
+{
+	int ret = fips_enabled ? -EINVAL : -ENOKEY;
+	u32 K[6];
+
+	memcpy(K, key, DES3_EDE_KEY_SIZE);
+
+	if ((!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+	     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+	    (fips_enabled || check_weak))
+		goto bad;
+
+	if ((!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
+		goto bad;
+
+	ret = 0;
+bad:
+	memzero_explicit(K, DES3_EDE_KEY_SIZE);
+
+	return ret;
+}
 
 /**
  * crypto_des3_ede_verify_key - Check whether a DES3-EDE key is weak
@@ -70,28 +95,14 @@ static inline int crypto_des_verify_key(struct crypto_tfm *tfm, const u8 *key)
 static inline int crypto_des3_ede_verify_key(struct crypto_tfm *tfm,
 					     const u8 *key)
 {
-	int err = -EINVAL;
-	u32 K[6];
-
-	memcpy(K, key, DES3_EDE_KEY_SIZE);
-
-	if ((!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
-	     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-	    (fips_enabled || (crypto_tfm_get_flags(tfm) &
-		              CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)))
-		goto bad;
-
-	if ((!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
-		goto bad;
+	int err;
 
-	err = 0;
-out:
-	memzero_explicit(K, DES3_EDE_KEY_SIZE);
+	err = des3_ede_verify_key(key, DES3_EDE_KEY_SIZE,
+				  crypto_tfm_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
+	if (err)
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
 	return err;
-
-bad:
-	crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-	goto out;
 }
 
 static inline int verify_skcipher_des_key(struct crypto_skcipher *tfm,
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 42a91c62d96d..101a321b8a99 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -5,3 +5,6 @@ libaes-y := aes.o
 
 obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
 libarc4-y := arc4.o
+
+obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
+libdes-y := des.o
diff --git a/lib/crypto/des.c b/lib/crypto/des.c
new file mode 100644
index 000000000000..ef5bb8822aba
--- /dev/null
+++ b/lib/crypto/des.c
@@ -0,0 +1,902 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API.
+ *
+ * DES & Triple DES EDE Cipher Algorithms.
+ *
+ * Copyright (c) 2005 Dag Arne Osvik <da@osvik.no>
+ */
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/unaligned.h>
+
+#include <crypto/des.h>
+#include <crypto/internal/des.h>
+
+#define ROL(x, r) ((x) = rol32((x), (r)))
+#define ROR(x, r) ((x) = ror32((x), (r)))
+
+/* Lookup tables for key expansion */
+
+static const u8 pc1[256] = {
+	0x00, 0x00, 0x40, 0x04, 0x10, 0x10, 0x50, 0x14,
+	0x04, 0x40, 0x44, 0x44, 0x14, 0x50, 0x54, 0x54,
+	0x02, 0x02, 0x42, 0x06, 0x12, 0x12, 0x52, 0x16,
+	0x06, 0x42, 0x46, 0x46, 0x16, 0x52, 0x56, 0x56,
+	0x80, 0x08, 0xc0, 0x0c, 0x90, 0x18, 0xd0, 0x1c,
+	0x84, 0x48, 0xc4, 0x4c, 0x94, 0x58, 0xd4, 0x5c,
+	0x82, 0x0a, 0xc2, 0x0e, 0x92, 0x1a, 0xd2, 0x1e,
+	0x86, 0x4a, 0xc6, 0x4e, 0x96, 0x5a, 0xd6, 0x5e,
+	0x20, 0x20, 0x60, 0x24, 0x30, 0x30, 0x70, 0x34,
+	0x24, 0x60, 0x64, 0x64, 0x34, 0x70, 0x74, 0x74,
+	0x22, 0x22, 0x62, 0x26, 0x32, 0x32, 0x72, 0x36,
+	0x26, 0x62, 0x66, 0x66, 0x36, 0x72, 0x76, 0x76,
+	0xa0, 0x28, 0xe0, 0x2c, 0xb0, 0x38, 0xf0, 0x3c,
+	0xa4, 0x68, 0xe4, 0x6c, 0xb4, 0x78, 0xf4, 0x7c,
+	0xa2, 0x2a, 0xe2, 0x2e, 0xb2, 0x3a, 0xf2, 0x3e,
+	0xa6, 0x6a, 0xe6, 0x6e, 0xb6, 0x7a, 0xf6, 0x7e,
+	0x08, 0x80, 0x48, 0x84, 0x18, 0x90, 0x58, 0x94,
+	0x0c, 0xc0, 0x4c, 0xc4, 0x1c, 0xd0, 0x5c, 0xd4,
+	0x0a, 0x82, 0x4a, 0x86, 0x1a, 0x92, 0x5a, 0x96,
+	0x0e, 0xc2, 0x4e, 0xc6, 0x1e, 0xd2, 0x5e, 0xd6,
+	0x88, 0x88, 0xc8, 0x8c, 0x98, 0x98, 0xd8, 0x9c,
+	0x8c, 0xc8, 0xcc, 0xcc, 0x9c, 0xd8, 0xdc, 0xdc,
+	0x8a, 0x8a, 0xca, 0x8e, 0x9a, 0x9a, 0xda, 0x9e,
+	0x8e, 0xca, 0xce, 0xce, 0x9e, 0xda, 0xde, 0xde,
+	0x28, 0xa0, 0x68, 0xa4, 0x38, 0xb0, 0x78, 0xb4,
+	0x2c, 0xe0, 0x6c, 0xe4, 0x3c, 0xf0, 0x7c, 0xf4,
+	0x2a, 0xa2, 0x6a, 0xa6, 0x3a, 0xb2, 0x7a, 0xb6,
+	0x2e, 0xe2, 0x6e, 0xe6, 0x3e, 0xf2, 0x7e, 0xf6,
+	0xa8, 0xa8, 0xe8, 0xac, 0xb8, 0xb8, 0xf8, 0xbc,
+	0xac, 0xe8, 0xec, 0xec, 0xbc, 0xf8, 0xfc, 0xfc,
+	0xaa, 0xaa, 0xea, 0xae, 0xba, 0xba, 0xfa, 0xbe,
+	0xae, 0xea, 0xee, 0xee, 0xbe, 0xfa, 0xfe, 0xfe
+};
+
+static const u8 rs[256] = {
+	0x00, 0x00, 0x80, 0x80, 0x02, 0x02, 0x82, 0x82,
+	0x04, 0x04, 0x84, 0x84, 0x06, 0x06, 0x86, 0x86,
+	0x08, 0x08, 0x88, 0x88, 0x0a, 0x0a, 0x8a, 0x8a,
+	0x0c, 0x0c, 0x8c, 0x8c, 0x0e, 0x0e, 0x8e, 0x8e,
+	0x10, 0x10, 0x90, 0x90, 0x12, 0x12, 0x92, 0x92,
+	0x14, 0x14, 0x94, 0x94, 0x16, 0x16, 0x96, 0x96,
+	0x18, 0x18, 0x98, 0x98, 0x1a, 0x1a, 0x9a, 0x9a,
+	0x1c, 0x1c, 0x9c, 0x9c, 0x1e, 0x1e, 0x9e, 0x9e,
+	0x20, 0x20, 0xa0, 0xa0, 0x22, 0x22, 0xa2, 0xa2,
+	0x24, 0x24, 0xa4, 0xa4, 0x26, 0x26, 0xa6, 0xa6,
+	0x28, 0x28, 0xa8, 0xa8, 0x2a, 0x2a, 0xaa, 0xaa,
+	0x2c, 0x2c, 0xac, 0xac, 0x2e, 0x2e, 0xae, 0xae,
+	0x30, 0x30, 0xb0, 0xb0, 0x32, 0x32, 0xb2, 0xb2,
+	0x34, 0x34, 0xb4, 0xb4, 0x36, 0x36, 0xb6, 0xb6,
+	0x38, 0x38, 0xb8, 0xb8, 0x3a, 0x3a, 0xba, 0xba,
+	0x3c, 0x3c, 0xbc, 0xbc, 0x3e, 0x3e, 0xbe, 0xbe,
+	0x40, 0x40, 0xc0, 0xc0, 0x42, 0x42, 0xc2, 0xc2,
+	0x44, 0x44, 0xc4, 0xc4, 0x46, 0x46, 0xc6, 0xc6,
+	0x48, 0x48, 0xc8, 0xc8, 0x4a, 0x4a, 0xca, 0xca,
+	0x4c, 0x4c, 0xcc, 0xcc, 0x4e, 0x4e, 0xce, 0xce,
+	0x50, 0x50, 0xd0, 0xd0, 0x52, 0x52, 0xd2, 0xd2,
+	0x54, 0x54, 0xd4, 0xd4, 0x56, 0x56, 0xd6, 0xd6,
+	0x58, 0x58, 0xd8, 0xd8, 0x5a, 0x5a, 0xda, 0xda,
+	0x5c, 0x5c, 0xdc, 0xdc, 0x5e, 0x5e, 0xde, 0xde,
+	0x60, 0x60, 0xe0, 0xe0, 0x62, 0x62, 0xe2, 0xe2,
+	0x64, 0x64, 0xe4, 0xe4, 0x66, 0x66, 0xe6, 0xe6,
+	0x68, 0x68, 0xe8, 0xe8, 0x6a, 0x6a, 0xea, 0xea,
+	0x6c, 0x6c, 0xec, 0xec, 0x6e, 0x6e, 0xee, 0xee,
+	0x70, 0x70, 0xf0, 0xf0, 0x72, 0x72, 0xf2, 0xf2,
+	0x74, 0x74, 0xf4, 0xf4, 0x76, 0x76, 0xf6, 0xf6,
+	0x78, 0x78, 0xf8, 0xf8, 0x7a, 0x7a, 0xfa, 0xfa,
+	0x7c, 0x7c, 0xfc, 0xfc, 0x7e, 0x7e, 0xfe, 0xfe
+};
+
+static const u32 pc2[1024] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00040000, 0x00000000, 0x04000000, 0x00100000,
+	0x00400000, 0x00000008, 0x00000800, 0x40000000,
+	0x00440000, 0x00000008, 0x04000800, 0x40100000,
+	0x00000400, 0x00000020, 0x08000000, 0x00000100,
+	0x00040400, 0x00000020, 0x0c000000, 0x00100100,
+	0x00400400, 0x00000028, 0x08000800, 0x40000100,
+	0x00440400, 0x00000028, 0x0c000800, 0x40100100,
+	0x80000000, 0x00000010, 0x00000000, 0x00800000,
+	0x80040000, 0x00000010, 0x04000000, 0x00900000,
+	0x80400000, 0x00000018, 0x00000800, 0x40800000,
+	0x80440000, 0x00000018, 0x04000800, 0x40900000,
+	0x80000400, 0x00000030, 0x08000000, 0x00800100,
+	0x80040400, 0x00000030, 0x0c000000, 0x00900100,
+	0x80400400, 0x00000038, 0x08000800, 0x40800100,
+	0x80440400, 0x00000038, 0x0c000800, 0x40900100,
+	0x10000000, 0x00000000, 0x00200000, 0x00001000,
+	0x10040000, 0x00000000, 0x04200000, 0x00101000,
+	0x10400000, 0x00000008, 0x00200800, 0x40001000,
+	0x10440000, 0x00000008, 0x04200800, 0x40101000,
+	0x10000400, 0x00000020, 0x08200000, 0x00001100,
+	0x10040400, 0x00000020, 0x0c200000, 0x00101100,
+	0x10400400, 0x00000028, 0x08200800, 0x40001100,
+	0x10440400, 0x00000028, 0x0c200800, 0x40101100,
+	0x90000000, 0x00000010, 0x00200000, 0x00801000,
+	0x90040000, 0x00000010, 0x04200000, 0x00901000,
+	0x90400000, 0x00000018, 0x00200800, 0x40801000,
+	0x90440000, 0x00000018, 0x04200800, 0x40901000,
+	0x90000400, 0x00000030, 0x08200000, 0x00801100,
+	0x90040400, 0x00000030, 0x0c200000, 0x00901100,
+	0x90400400, 0x00000038, 0x08200800, 0x40801100,
+	0x90440400, 0x00000038, 0x0c200800, 0x40901100,
+	0x00000200, 0x00080000, 0x00000000, 0x00000004,
+	0x00040200, 0x00080000, 0x04000000, 0x00100004,
+	0x00400200, 0x00080008, 0x00000800, 0x40000004,
+	0x00440200, 0x00080008, 0x04000800, 0x40100004,
+	0x00000600, 0x00080020, 0x08000000, 0x00000104,
+	0x00040600, 0x00080020, 0x0c000000, 0x00100104,
+	0x00400600, 0x00080028, 0x08000800, 0x40000104,
+	0x00440600, 0x00080028, 0x0c000800, 0x40100104,
+	0x80000200, 0x00080010, 0x00000000, 0x00800004,
+	0x80040200, 0x00080010, 0x04000000, 0x00900004,
+	0x80400200, 0x00080018, 0x00000800, 0x40800004,
+	0x80440200, 0x00080018, 0x04000800, 0x40900004,
+	0x80000600, 0x00080030, 0x08000000, 0x00800104,
+	0x80040600, 0x00080030, 0x0c000000, 0x00900104,
+	0x80400600, 0x00080038, 0x08000800, 0x40800104,
+	0x80440600, 0x00080038, 0x0c000800, 0x40900104,
+	0x10000200, 0x00080000, 0x00200000, 0x00001004,
+	0x10040200, 0x00080000, 0x04200000, 0x00101004,
+	0x10400200, 0x00080008, 0x00200800, 0x40001004,
+	0x10440200, 0x00080008, 0x04200800, 0x40101004,
+	0x10000600, 0x00080020, 0x08200000, 0x00001104,
+	0x10040600, 0x00080020, 0x0c200000, 0x00101104,
+	0x10400600, 0x00080028, 0x08200800, 0x40001104,
+	0x10440600, 0x00080028, 0x0c200800, 0x40101104,
+	0x90000200, 0x00080010, 0x00200000, 0x00801004,
+	0x90040200, 0x00080010, 0x04200000, 0x00901004,
+	0x90400200, 0x00080018, 0x00200800, 0x40801004,
+	0x90440200, 0x00080018, 0x04200800, 0x40901004,
+	0x90000600, 0x00080030, 0x08200000, 0x00801104,
+	0x90040600, 0x00080030, 0x0c200000, 0x00901104,
+	0x90400600, 0x00080038, 0x08200800, 0x40801104,
+	0x90440600, 0x00080038, 0x0c200800, 0x40901104,
+	0x00000002, 0x00002000, 0x20000000, 0x00000001,
+	0x00040002, 0x00002000, 0x24000000, 0x00100001,
+	0x00400002, 0x00002008, 0x20000800, 0x40000001,
+	0x00440002, 0x00002008, 0x24000800, 0x40100001,
+	0x00000402, 0x00002020, 0x28000000, 0x00000101,
+	0x00040402, 0x00002020, 0x2c000000, 0x00100101,
+	0x00400402, 0x00002028, 0x28000800, 0x40000101,
+	0x00440402, 0x00002028, 0x2c000800, 0x40100101,
+	0x80000002, 0x00002010, 0x20000000, 0x00800001,
+	0x80040002, 0x00002010, 0x24000000, 0x00900001,
+	0x80400002, 0x00002018, 0x20000800, 0x40800001,
+	0x80440002, 0x00002018, 0x24000800, 0x40900001,
+	0x80000402, 0x00002030, 0x28000000, 0x00800101,
+	0x80040402, 0x00002030, 0x2c000000, 0x00900101,
+	0x80400402, 0x00002038, 0x28000800, 0x40800101,
+	0x80440402, 0x00002038, 0x2c000800, 0x40900101,
+	0x10000002, 0x00002000, 0x20200000, 0x00001001,
+	0x10040002, 0x00002000, 0x24200000, 0x00101001,
+	0x10400002, 0x00002008, 0x20200800, 0x40001001,
+	0x10440002, 0x00002008, 0x24200800, 0x40101001,
+	0x10000402, 0x00002020, 0x28200000, 0x00001101,
+	0x10040402, 0x00002020, 0x2c200000, 0x00101101,
+	0x10400402, 0x00002028, 0x28200800, 0x40001101,
+	0x10440402, 0x00002028, 0x2c200800, 0x40101101,
+	0x90000002, 0x00002010, 0x20200000, 0x00801001,
+	0x90040002, 0x00002010, 0x24200000, 0x00901001,
+	0x90400002, 0x00002018, 0x20200800, 0x40801001,
+	0x90440002, 0x00002018, 0x24200800, 0x40901001,
+	0x90000402, 0x00002030, 0x28200000, 0x00801101,
+	0x90040402, 0x00002030, 0x2c200000, 0x00901101,
+	0x90400402, 0x00002038, 0x28200800, 0x40801101,
+	0x90440402, 0x00002038, 0x2c200800, 0x40901101,
+	0x00000202, 0x00082000, 0x20000000, 0x00000005,
+	0x00040202, 0x00082000, 0x24000000, 0x00100005,
+	0x00400202, 0x00082008, 0x20000800, 0x40000005,
+	0x00440202, 0x00082008, 0x24000800, 0x40100005,
+	0x00000602, 0x00082020, 0x28000000, 0x00000105,
+	0x00040602, 0x00082020, 0x2c000000, 0x00100105,
+	0x00400602, 0x00082028, 0x28000800, 0x40000105,
+	0x00440602, 0x00082028, 0x2c000800, 0x40100105,
+	0x80000202, 0x00082010, 0x20000000, 0x00800005,
+	0x80040202, 0x00082010, 0x24000000, 0x00900005,
+	0x80400202, 0x00082018, 0x20000800, 0x40800005,
+	0x80440202, 0x00082018, 0x24000800, 0x40900005,
+	0x80000602, 0x00082030, 0x28000000, 0x00800105,
+	0x80040602, 0x00082030, 0x2c000000, 0x00900105,
+	0x80400602, 0x00082038, 0x28000800, 0x40800105,
+	0x80440602, 0x00082038, 0x2c000800, 0x40900105,
+	0x10000202, 0x00082000, 0x20200000, 0x00001005,
+	0x10040202, 0x00082000, 0x24200000, 0x00101005,
+	0x10400202, 0x00082008, 0x20200800, 0x40001005,
+	0x10440202, 0x00082008, 0x24200800, 0x40101005,
+	0x10000602, 0x00082020, 0x28200000, 0x00001105,
+	0x10040602, 0x00082020, 0x2c200000, 0x00101105,
+	0x10400602, 0x00082028, 0x28200800, 0x40001105,
+	0x10440602, 0x00082028, 0x2c200800, 0x40101105,
+	0x90000202, 0x00082010, 0x20200000, 0x00801005,
+	0x90040202, 0x00082010, 0x24200000, 0x00901005,
+	0x90400202, 0x00082018, 0x20200800, 0x40801005,
+	0x90440202, 0x00082018, 0x24200800, 0x40901005,
+	0x90000602, 0x00082030, 0x28200000, 0x00801105,
+	0x90040602, 0x00082030, 0x2c200000, 0x00901105,
+	0x90400602, 0x00082038, 0x28200800, 0x40801105,
+	0x90440602, 0x00082038, 0x2c200800, 0x40901105,
+
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000008, 0x00080000, 0x10000000,
+	0x02000000, 0x00000000, 0x00000080, 0x00001000,
+	0x02000000, 0x00000008, 0x00080080, 0x10001000,
+	0x00004000, 0x00000000, 0x00000040, 0x00040000,
+	0x00004000, 0x00000008, 0x00080040, 0x10040000,
+	0x02004000, 0x00000000, 0x000000c0, 0x00041000,
+	0x02004000, 0x00000008, 0x000800c0, 0x10041000,
+	0x00020000, 0x00008000, 0x08000000, 0x00200000,
+	0x00020000, 0x00008008, 0x08080000, 0x10200000,
+	0x02020000, 0x00008000, 0x08000080, 0x00201000,
+	0x02020000, 0x00008008, 0x08080080, 0x10201000,
+	0x00024000, 0x00008000, 0x08000040, 0x00240000,
+	0x00024000, 0x00008008, 0x08080040, 0x10240000,
+	0x02024000, 0x00008000, 0x080000c0, 0x00241000,
+	0x02024000, 0x00008008, 0x080800c0, 0x10241000,
+	0x00000000, 0x01000000, 0x00002000, 0x00000020,
+	0x00000000, 0x01000008, 0x00082000, 0x10000020,
+	0x02000000, 0x01000000, 0x00002080, 0x00001020,
+	0x02000000, 0x01000008, 0x00082080, 0x10001020,
+	0x00004000, 0x01000000, 0x00002040, 0x00040020,
+	0x00004000, 0x01000008, 0x00082040, 0x10040020,
+	0x02004000, 0x01000000, 0x000020c0, 0x00041020,
+	0x02004000, 0x01000008, 0x000820c0, 0x10041020,
+	0x00020000, 0x01008000, 0x08002000, 0x00200020,
+	0x00020000, 0x01008008, 0x08082000, 0x10200020,
+	0x02020000, 0x01008000, 0x08002080, 0x00201020,
+	0x02020000, 0x01008008, 0x08082080, 0x10201020,
+	0x00024000, 0x01008000, 0x08002040, 0x00240020,
+	0x00024000, 0x01008008, 0x08082040, 0x10240020,
+	0x02024000, 0x01008000, 0x080020c0, 0x00241020,
+	0x02024000, 0x01008008, 0x080820c0, 0x10241020,
+	0x00000400, 0x04000000, 0x00100000, 0x00000004,
+	0x00000400, 0x04000008, 0x00180000, 0x10000004,
+	0x02000400, 0x04000000, 0x00100080, 0x00001004,
+	0x02000400, 0x04000008, 0x00180080, 0x10001004,
+	0x00004400, 0x04000000, 0x00100040, 0x00040004,
+	0x00004400, 0x04000008, 0x00180040, 0x10040004,
+	0x02004400, 0x04000000, 0x001000c0, 0x00041004,
+	0x02004400, 0x04000008, 0x001800c0, 0x10041004,
+	0x00020400, 0x04008000, 0x08100000, 0x00200004,
+	0x00020400, 0x04008008, 0x08180000, 0x10200004,
+	0x02020400, 0x04008000, 0x08100080, 0x00201004,
+	0x02020400, 0x04008008, 0x08180080, 0x10201004,
+	0x00024400, 0x04008000, 0x08100040, 0x00240004,
+	0x00024400, 0x04008008, 0x08180040, 0x10240004,
+	0x02024400, 0x04008000, 0x081000c0, 0x00241004,
+	0x02024400, 0x04008008, 0x081800c0, 0x10241004,
+	0x00000400, 0x05000000, 0x00102000, 0x00000024,
+	0x00000400, 0x05000008, 0x00182000, 0x10000024,
+	0x02000400, 0x05000000, 0x00102080, 0x00001024,
+	0x02000400, 0x05000008, 0x00182080, 0x10001024,
+	0x00004400, 0x05000000, 0x00102040, 0x00040024,
+	0x00004400, 0x05000008, 0x00182040, 0x10040024,
+	0x02004400, 0x05000000, 0x001020c0, 0x00041024,
+	0x02004400, 0x05000008, 0x001820c0, 0x10041024,
+	0x00020400, 0x05008000, 0x08102000, 0x00200024,
+	0x00020400, 0x05008008, 0x08182000, 0x10200024,
+	0x02020400, 0x05008000, 0x08102080, 0x00201024,
+	0x02020400, 0x05008008, 0x08182080, 0x10201024,
+	0x00024400, 0x05008000, 0x08102040, 0x00240024,
+	0x00024400, 0x05008008, 0x08182040, 0x10240024,
+	0x02024400, 0x05008000, 0x081020c0, 0x00241024,
+	0x02024400, 0x05008008, 0x081820c0, 0x10241024,
+	0x00000800, 0x00010000, 0x20000000, 0x00000010,
+	0x00000800, 0x00010008, 0x20080000, 0x10000010,
+	0x02000800, 0x00010000, 0x20000080, 0x00001010,
+	0x02000800, 0x00010008, 0x20080080, 0x10001010,
+	0x00004800, 0x00010000, 0x20000040, 0x00040010,
+	0x00004800, 0x00010008, 0x20080040, 0x10040010,
+	0x02004800, 0x00010000, 0x200000c0, 0x00041010,
+	0x02004800, 0x00010008, 0x200800c0, 0x10041010,
+	0x00020800, 0x00018000, 0x28000000, 0x00200010,
+	0x00020800, 0x00018008, 0x28080000, 0x10200010,
+	0x02020800, 0x00018000, 0x28000080, 0x00201010,
+	0x02020800, 0x00018008, 0x28080080, 0x10201010,
+	0x00024800, 0x00018000, 0x28000040, 0x00240010,
+	0x00024800, 0x00018008, 0x28080040, 0x10240010,
+	0x02024800, 0x00018000, 0x280000c0, 0x00241010,
+	0x02024800, 0x00018008, 0x280800c0, 0x10241010,
+	0x00000800, 0x01010000, 0x20002000, 0x00000030,
+	0x00000800, 0x01010008, 0x20082000, 0x10000030,
+	0x02000800, 0x01010000, 0x20002080, 0x00001030,
+	0x02000800, 0x01010008, 0x20082080, 0x10001030,
+	0x00004800, 0x01010000, 0x20002040, 0x00040030,
+	0x00004800, 0x01010008, 0x20082040, 0x10040030,
+	0x02004800, 0x01010000, 0x200020c0, 0x00041030,
+	0x02004800, 0x01010008, 0x200820c0, 0x10041030,
+	0x00020800, 0x01018000, 0x28002000, 0x00200030,
+	0x00020800, 0x01018008, 0x28082000, 0x10200030,
+	0x02020800, 0x01018000, 0x28002080, 0x00201030,
+	0x02020800, 0x01018008, 0x28082080, 0x10201030,
+	0x00024800, 0x01018000, 0x28002040, 0x00240030,
+	0x00024800, 0x01018008, 0x28082040, 0x10240030,
+	0x02024800, 0x01018000, 0x280020c0, 0x00241030,
+	0x02024800, 0x01018008, 0x280820c0, 0x10241030,
+	0x00000c00, 0x04010000, 0x20100000, 0x00000014,
+	0x00000c00, 0x04010008, 0x20180000, 0x10000014,
+	0x02000c00, 0x04010000, 0x20100080, 0x00001014,
+	0x02000c00, 0x04010008, 0x20180080, 0x10001014,
+	0x00004c00, 0x04010000, 0x20100040, 0x00040014,
+	0x00004c00, 0x04010008, 0x20180040, 0x10040014,
+	0x02004c00, 0x04010000, 0x201000c0, 0x00041014,
+	0x02004c00, 0x04010008, 0x201800c0, 0x10041014,
+	0x00020c00, 0x04018000, 0x28100000, 0x00200014,
+	0x00020c00, 0x04018008, 0x28180000, 0x10200014,
+	0x02020c00, 0x04018000, 0x28100080, 0x00201014,
+	0x02020c00, 0x04018008, 0x28180080, 0x10201014,
+	0x00024c00, 0x04018000, 0x28100040, 0x00240014,
+	0x00024c00, 0x04018008, 0x28180040, 0x10240014,
+	0x02024c00, 0x04018000, 0x281000c0, 0x00241014,
+	0x02024c00, 0x04018008, 0x281800c0, 0x10241014,
+	0x00000c00, 0x05010000, 0x20102000, 0x00000034,
+	0x00000c00, 0x05010008, 0x20182000, 0x10000034,
+	0x02000c00, 0x05010000, 0x20102080, 0x00001034,
+	0x02000c00, 0x05010008, 0x20182080, 0x10001034,
+	0x00004c00, 0x05010000, 0x20102040, 0x00040034,
+	0x00004c00, 0x05010008, 0x20182040, 0x10040034,
+	0x02004c00, 0x05010000, 0x201020c0, 0x00041034,
+	0x02004c00, 0x05010008, 0x201820c0, 0x10041034,
+	0x00020c00, 0x05018000, 0x28102000, 0x00200034,
+	0x00020c00, 0x05018008, 0x28182000, 0x10200034,
+	0x02020c00, 0x05018000, 0x28102080, 0x00201034,
+	0x02020c00, 0x05018008, 0x28182080, 0x10201034,
+	0x00024c00, 0x05018000, 0x28102040, 0x00240034,
+	0x00024c00, 0x05018008, 0x28182040, 0x10240034,
+	0x02024c00, 0x05018000, 0x281020c0, 0x00241034,
+	0x02024c00, 0x05018008, 0x281820c0, 0x10241034
+};
+
+/* S-box lookup tables */
+
+static const u32 S1[64] = {
+	0x01010400, 0x00000000, 0x00010000, 0x01010404,
+	0x01010004, 0x00010404, 0x00000004, 0x00010000,
+	0x00000400, 0x01010400, 0x01010404, 0x00000400,
+	0x01000404, 0x01010004, 0x01000000, 0x00000004,
+	0x00000404, 0x01000400, 0x01000400, 0x00010400,
+	0x00010400, 0x01010000, 0x01010000, 0x01000404,
+	0x00010004, 0x01000004, 0x01000004, 0x00010004,
+	0x00000000, 0x00000404, 0x00010404, 0x01000000,
+	0x00010000, 0x01010404, 0x00000004, 0x01010000,
+	0x01010400, 0x01000000, 0x01000000, 0x00000400,
+	0x01010004, 0x00010000, 0x00010400, 0x01000004,
+	0x00000400, 0x00000004, 0x01000404, 0x00010404,
+	0x01010404, 0x00010004, 0x01010000, 0x01000404,
+	0x01000004, 0x00000404, 0x00010404, 0x01010400,
+	0x00000404, 0x01000400, 0x01000400, 0x00000000,
+	0x00010004, 0x00010400, 0x00000000, 0x01010004
+};
+
+static const u32 S2[64] = {
+	0x80108020, 0x80008000, 0x00008000, 0x00108020,
+	0x00100000, 0x00000020, 0x80100020, 0x80008020,
+	0x80000020, 0x80108020, 0x80108000, 0x80000000,
+	0x80008000, 0x00100000, 0x00000020, 0x80100020,
+	0x00108000, 0x00100020, 0x80008020, 0x00000000,
+	0x80000000, 0x00008000, 0x00108020, 0x80100000,
+	0x00100020, 0x80000020, 0x00000000, 0x00108000,
+	0x00008020, 0x80108000, 0x80100000, 0x00008020,
+	0x00000000, 0x00108020, 0x80100020, 0x00100000,
+	0x80008020, 0x80100000, 0x80108000, 0x00008000,
+	0x80100000, 0x80008000, 0x00000020, 0x80108020,
+	0x00108020, 0x00000020, 0x00008000, 0x80000000,
+	0x00008020, 0x80108000, 0x00100000, 0x80000020,
+	0x00100020, 0x80008020, 0x80000020, 0x00100020,
+	0x00108000, 0x00000000, 0x80008000, 0x00008020,
+	0x80000000, 0x80100020, 0x80108020, 0x00108000
+};
+
+static const u32 S3[64] = {
+	0x00000208, 0x08020200, 0x00000000, 0x08020008,
+	0x08000200, 0x00000000, 0x00020208, 0x08000200,
+	0x00020008, 0x08000008, 0x08000008, 0x00020000,
+	0x08020208, 0x00020008, 0x08020000, 0x00000208,
+	0x08000000, 0x00000008, 0x08020200, 0x00000200,
+	0x00020200, 0x08020000, 0x08020008, 0x00020208,
+	0x08000208, 0x00020200, 0x00020000, 0x08000208,
+	0x00000008, 0x08020208, 0x00000200, 0x08000000,
+	0x08020200, 0x08000000, 0x00020008, 0x00000208,
+	0x00020000, 0x08020200, 0x08000200, 0x00000000,
+	0x00000200, 0x00020008, 0x08020208, 0x08000200,
+	0x08000008, 0x00000200, 0x00000000, 0x08020008,
+	0x08000208, 0x00020000, 0x08000000, 0x08020208,
+	0x00000008, 0x00020208, 0x00020200, 0x08000008,
+	0x08020000, 0x08000208, 0x00000208, 0x08020000,
+	0x00020208, 0x00000008, 0x08020008, 0x00020200
+};
+
+static const u32 S4[64] = {
+	0x00802001, 0x00002081, 0x00002081, 0x00000080,
+	0x00802080, 0x00800081, 0x00800001, 0x00002001,
+	0x00000000, 0x00802000, 0x00802000, 0x00802081,
+	0x00000081, 0x00000000, 0x00800080, 0x00800001,
+	0x00000001, 0x00002000, 0x00800000, 0x00802001,
+	0x00000080, 0x00800000, 0x00002001, 0x00002080,
+	0x00800081, 0x00000001, 0x00002080, 0x00800080,
+	0x00002000, 0x00802080, 0x00802081, 0x00000081,
+	0x00800080, 0x00800001, 0x00802000, 0x00802081,
+	0x00000081, 0x00000000, 0x00000000, 0x00802000,
+	0x00002080, 0x00800080, 0x00800081, 0x00000001,
+	0x00802001, 0x00002081, 0x00002081, 0x00000080,
+	0x00802081, 0x00000081, 0x00000001, 0x00002000,
+	0x00800001, 0x00002001, 0x00802080, 0x00800081,
+	0x00002001, 0x00002080, 0x00800000, 0x00802001,
+	0x00000080, 0x00800000, 0x00002000, 0x00802080
+};
+
+static const u32 S5[64] = {
+	0x00000100, 0x02080100, 0x02080000, 0x42000100,
+	0x00080000, 0x00000100, 0x40000000, 0x02080000,
+	0x40080100, 0x00080000, 0x02000100, 0x40080100,
+	0x42000100, 0x42080000, 0x00080100, 0x40000000,
+	0x02000000, 0x40080000, 0x40080000, 0x00000000,
+	0x40000100, 0x42080100, 0x42080100, 0x02000100,
+	0x42080000, 0x40000100, 0x00000000, 0x42000000,
+	0x02080100, 0x02000000, 0x42000000, 0x00080100,
+	0x00080000, 0x42000100, 0x00000100, 0x02000000,
+	0x40000000, 0x02080000, 0x42000100, 0x40080100,
+	0x02000100, 0x40000000, 0x42080000, 0x02080100,
+	0x40080100, 0x00000100, 0x02000000, 0x42080000,
+	0x42080100, 0x00080100, 0x42000000, 0x42080100,
+	0x02080000, 0x00000000, 0x40080000, 0x42000000,
+	0x00080100, 0x02000100, 0x40000100, 0x00080000,
+	0x00000000, 0x40080000, 0x02080100, 0x40000100
+};
+
+static const u32 S6[64] = {
+	0x20000010, 0x20400000, 0x00004000, 0x20404010,
+	0x20400000, 0x00000010, 0x20404010, 0x00400000,
+	0x20004000, 0x00404010, 0x00400000, 0x20000010,
+	0x00400010, 0x20004000, 0x20000000, 0x00004010,
+	0x00000000, 0x00400010, 0x20004010, 0x00004000,
+	0x00404000, 0x20004010, 0x00000010, 0x20400010,
+	0x20400010, 0x00000000, 0x00404010, 0x20404000,
+	0x00004010, 0x00404000, 0x20404000, 0x20000000,
+	0x20004000, 0x00000010, 0x20400010, 0x00404000,
+	0x20404010, 0x00400000, 0x00004010, 0x20000010,
+	0x00400000, 0x20004000, 0x20000000, 0x00004010,
+	0x20000010, 0x20404010, 0x00404000, 0x20400000,
+	0x00404010, 0x20404000, 0x00000000, 0x20400010,
+	0x00000010, 0x00004000, 0x20400000, 0x00404010,
+	0x00004000, 0x00400010, 0x20004010, 0x00000000,
+	0x20404000, 0x20000000, 0x00400010, 0x20004010
+};
+
+static const u32 S7[64] = {
+	0x00200000, 0x04200002, 0x04000802, 0x00000000,
+	0x00000800, 0x04000802, 0x00200802, 0x04200800,
+	0x04200802, 0x00200000, 0x00000000, 0x04000002,
+	0x00000002, 0x04000000, 0x04200002, 0x00000802,
+	0x04000800, 0x00200802, 0x00200002, 0x04000800,
+	0x04000002, 0x04200000, 0x04200800, 0x00200002,
+	0x04200000, 0x00000800, 0x00000802, 0x04200802,
+	0x00200800, 0x00000002, 0x04000000, 0x00200800,
+	0x04000000, 0x00200800, 0x00200000, 0x04000802,
+	0x04000802, 0x04200002, 0x04200002, 0x00000002,
+	0x00200002, 0x04000000, 0x04000800, 0x00200000,
+	0x04200800, 0x00000802, 0x00200802, 0x04200800,
+	0x00000802, 0x04000002, 0x04200802, 0x04200000,
+	0x00200800, 0x00000000, 0x00000002, 0x04200802,
+	0x00000000, 0x00200802, 0x04200000, 0x00000800,
+	0x04000002, 0x04000800, 0x00000800, 0x00200002
+};
+
+static const u32 S8[64] = {
+	0x10001040, 0x00001000, 0x00040000, 0x10041040,
+	0x10000000, 0x10001040, 0x00000040, 0x10000000,
+	0x00040040, 0x10040000, 0x10041040, 0x00041000,
+	0x10041000, 0x00041040, 0x00001000, 0x00000040,
+	0x10040000, 0x10000040, 0x10001000, 0x00001040,
+	0x00041000, 0x00040040, 0x10040040, 0x10041000,
+	0x00001040, 0x00000000, 0x00000000, 0x10040040,
+	0x10000040, 0x10001000, 0x00041040, 0x00040000,
+	0x00041040, 0x00040000, 0x10041000, 0x00001000,
+	0x00000040, 0x10040040, 0x00001000, 0x00041040,
+	0x10001000, 0x00000040, 0x10000040, 0x10040000,
+	0x10040040, 0x10000000, 0x00040000, 0x10001040,
+	0x00000000, 0x10041040, 0x00040040, 0x10000040,
+	0x10040000, 0x10001000, 0x10001040, 0x00000000,
+	0x10041040, 0x00041000, 0x00041000, 0x00001040,
+	0x00001040, 0x00040040, 0x10000000, 0x10041000
+};
+
+/* Encryption components: IP, FP, and round function */
+
+#define IP(L, R, T)		\
+	ROL(R, 4);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xf0f0f0f0;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 12);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xffff0000;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 14);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xcccccccc;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 6);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xff00ff00;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 7);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xaaaaaaaa;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(L, 1);
+
+#define FP(L, R, T)		\
+	ROR(L, 1);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xaaaaaaaa;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 7);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xff00ff00;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 6);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xcccccccc;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 14);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xffff0000;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 12);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xf0f0f0f0;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 4);
+
+#define ROUND(L, R, A, B, K, d)					\
+	B = K[0];			A = K[1];	K += d;	\
+	B ^= R;				A ^= R;			\
+	B &= 0x3f3f3f3f;		ROR(A, 4);		\
+	L ^= S8[0xff & B];		A &= 0x3f3f3f3f;	\
+	L ^= S6[0xff & (B >> 8)];	B >>= 16;		\
+	L ^= S7[0xff & A];					\
+	L ^= S5[0xff & (A >> 8)];	A >>= 16;		\
+	L ^= S4[0xff & B];					\
+	L ^= S2[0xff & (B >> 8)];				\
+	L ^= S3[0xff & A];					\
+	L ^= S1[0xff & (A >> 8)];
+
+/*
+ * PC2 lookup tables are organized as 2 consecutive sets of 4 interleaved
+ * tables of 128 elements.  One set is for C_i and the other for D_i, while
+ * the 4 interleaved tables correspond to four 7-bit subsets of C_i or D_i.
+ *
+ * After PC1 each of the variables a,b,c,d contains a 7 bit subset of C_i
+ * or D_i in bits 7-1 (bit 0 being the least significant).
+ */
+
+#define T1(x) pt[2 * (x) + 0]
+#define T2(x) pt[2 * (x) + 1]
+#define T3(x) pt[2 * (x) + 2]
+#define T4(x) pt[2 * (x) + 3]
+
+#define DES_PC2(a, b, c, d) (T4(d) | T3(c) | T2(b) | T1(a))
+
+/*
+ * Encryption key expansion
+ *
+ * RFC2451: Weak key checks SHOULD be performed.
+ *
+ * FIPS 74:
+ *
+ *   Keys having duals are keys which produce all zeros, all ones, or
+ *   alternating zero-one patterns in the C and D registers after Permuted
+ *   Choice 1 has operated on the key.
+ *
+ */
+static unsigned long des_ekey(u32 *pe, const u8 *k)
+{
+	/* K&R: long is at least 32 bits */
+	unsigned long a, b, c, d, w;
+	const u32 *pt = pc2;
+
+	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
+	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
+	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
+	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
+
+	pe[15 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[14 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[13 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[12 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[11 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[10 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 9 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 8 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 7 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 6 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 5 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 4 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 3 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 2 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 1 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[ 0 * 2 + 0] = DES_PC2(b, c, d, a);
+
+	/* Check if first half is weak */
+	w  = (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
+
+	/* Skip to next table set */
+	pt += 512;
+
+	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
+	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
+	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
+	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
+
+	/* Check if second half is weak */
+	w |= (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
+
+	pe[15 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[14 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[13 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[12 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[11 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[10 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 9 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 8 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 7 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 6 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 5 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 4 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 3 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 2 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 1 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[ 0 * 2 + 1] = DES_PC2(b, c, d, a);
+
+	/* Fixup: 2413 5768 -> 1357 2468 */
+	for (d = 0; d < 16; ++d) {
+		a = pe[2 * d];
+		b = pe[2 * d + 1];
+		c = a ^ b;
+		c &= 0xffff0000;
+		a ^= c;
+		b ^= c;
+		ROL(b, 18);
+		pe[2 * d] = a;
+		pe[2 * d + 1] = b;
+	}
+
+	/* Zero if weak key */
+	return w;
+}
+
+int des_expand_key(struct des_ctx *ctx, const u8 *key, unsigned int keylen)
+{
+	if (keylen != DES_KEY_SIZE)
+		return -EINVAL;
+
+	return des_ekey(ctx->expkey, key) ? 0 : -ENOKEY;
+}
+EXPORT_SYMBOL_GPL(des_expand_key);
+
+/*
+ * Decryption key expansion
+ *
+ * No weak key checking is performed, as this is only used by triple DES
+ *
+ */
+static void dkey(u32 *pe, const u8 *k)
+{
+	/* K&R: long is at least 32 bits */
+	unsigned long a, b, c, d;
+	const u32 *pt = pc2;
+
+	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
+	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
+	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
+	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
+
+	pe[ 0 * 2] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[ 1 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 2 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 3 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 4 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 5 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 6 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 7 * 2] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 8 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 9 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[10 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[11 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[12 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[13 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[14 * 2] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[15 * 2] = DES_PC2(b, c, d, a);
+
+	/* Skip to next table set */
+	pt += 512;
+
+	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
+	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
+	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
+	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
+
+	pe[ 0 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[ 1 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 2 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 3 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 4 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 5 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 6 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 7 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 8 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 9 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[10 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[11 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[12 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[13 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[14 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[15 * 2 + 1] = DES_PC2(b, c, d, a);
+
+	/* Fixup: 2413 5768 -> 1357 2468 */
+	for (d = 0; d < 16; ++d) {
+		a = pe[2 * d];
+		b = pe[2 * d + 1];
+		c = a ^ b;
+		c &= 0xffff0000;
+		a ^= c;
+		b ^= c;
+		ROL(b, 18);
+		pe[2 * d] = a;
+		pe[2 * d + 1] = b;
+	}
+}
+
+void des_encrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = ctx->expkey;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des_encrypt);
+
+void des_decrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = ctx->expkey + DES_EXPKEY_WORDS - 2;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des_decrypt);
+
+int des3_ede_expand_key(struct des3_ede_ctx *ctx, const u8 *key,
+			unsigned int keylen)
+{
+	u32 *pe = ctx->expkey;
+	int err;
+
+	if (keylen != DES3_EDE_KEY_SIZE)
+		return -EINVAL;
+
+	err = des3_ede_verify_key(key, keylen, true);
+	if (err && err != -ENOKEY)
+		return err;
+
+	des_ekey(pe, key); pe += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
+	dkey(pe, key); pe += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
+	des_ekey(pe, key);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(des3_ede_expand_key);
+
+void des3_ede_encrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = dctx->expkey;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(R, L, A, B, K, 2);
+		ROUND(L, R, A, B, K, 2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des3_ede_encrypt);
+
+void des3_ede_decrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(R, L, A, B, K, -2);
+		ROUND(L, R, A, B, K, -2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des3_ede_decrypt);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 18fbe0da8e98fe167fbfe1757003e2a2a74d24f3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 15 Aug 2019 12:01:11 +0300
Subject: crypto: des - remove now unused __des3_ede_setkey()

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/des_generic.c | 13 -------------
 include/crypto/des.h |  3 ---
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index e021a321f584..6e13a4a29ecb 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -51,19 +51,6 @@ static void crypto_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 	des_decrypt(dctx, dst, src);
 }
 
-int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
-		      unsigned int keylen)
-{
-	int err;
-
-	des_ekey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
-	dkey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
-	des_ekey(expkey, key);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__des3_ede_setkey);
-
 static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen)
 {
diff --git a/include/crypto/des.h b/include/crypto/des.h
index 2c864a4e6707..7812b4331ae4 100644
--- a/include/crypto/des.h
+++ b/include/crypto/des.h
@@ -54,7 +54,4 @@ int des_expand_key(struct des_ctx *ctx, const u8 *key, unsigned int keylen);
 int des3_ede_expand_key(struct des3_ede_ctx *ctx, const u8 *key,
 			unsigned int keylen);
 
-extern int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
-			     unsigned int keylen);
-
 #endif /* __CRYPTO_DES_H */
-- 
cgit v1.2.3


From ad767ee858b38af634c957a792cb001d54a7b981 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 17 Aug 2019 16:24:31 +0200
Subject: crypto: sha256 - Move lib/sha256.c to lib/crypto

Generic crypto implementations belong under lib/crypto not directly in
lib, likewise the header should be in include/crypto, not include/linux.

Note that the code in lib/crypto/sha256.c is not yet available for
generic use after this commit, it is still only used by the s390 and x86
purgatory code. Making it suitable for generic use is done in further
patches in this series.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/purgatory/Makefile    |   2 +-
 arch/s390/purgatory/purgatory.c |   2 +-
 arch/x86/purgatory/Makefile     |   2 +-
 arch/x86/purgatory/purgatory.c  |   2 +-
 include/crypto/sha256.h         |  28 ++++
 include/linux/sha256.h          |  28 ----
 lib/crypto/sha256.c             | 279 ++++++++++++++++++++++++++++++++++++++++
 lib/sha256.c                    | 279 ----------------------------------------
 8 files changed, 311 insertions(+), 311 deletions(-)
 create mode 100644 include/crypto/sha256.h
 delete mode 100644 include/linux/sha256.h
 create mode 100644 lib/crypto/sha256.c
 delete mode 100644 lib/sha256.c

(limited to 'include')

diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index dc1ae4ff79d7..85b05c9e40f5 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -7,7 +7,7 @@ purgatory-y := head.o purgatory.o string.o sha256.o mem.o
 targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
 $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 3528e6da4e87..a80c78da9985 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -8,8 +8,8 @@
  */
 
 #include <linux/kexec.h>
-#include <linux/sha256.h>
 #include <linux/string.h>
+#include <crypto/sha256.h>
 #include <asm/purgatory.h>
 
 int verify_sha256_digest(void)
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 3cf302b26332..026fa0006f0b 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -6,7 +6,7 @@ purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string
 targets += $(purgatory-y)
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 6d8d5a34c377..7cd7a2618180 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/bug.h>
-#include <linux/sha256.h>
+#include <crypto/sha256.h>
 #include <asm/purgatory.h>
 
 #include "../boot/string.h"
diff --git a/include/crypto/sha256.h b/include/crypto/sha256.h
new file mode 100644
index 000000000000..b1f9c6781082
--- /dev/null
+++ b/include/crypto/sha256.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 2014 Red Hat Inc.
+ *
+ *  Author: Vivek Goyal <vgoyal@redhat.com>
+ */
+
+#ifndef SHA256_H
+#define SHA256_H
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+/*
+ * Stand-alone implementation of the SHA256 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should use the implementation in
+ * crypto/.
+ *
+ * For details see lib/crypto/sha256.c
+ */
+
+extern int sha256_init(struct sha256_state *sctx);
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+#endif /* SHA256_H */
diff --git a/include/linux/sha256.h b/include/linux/sha256.h
deleted file mode 100644
index 26972b9e92db..000000000000
--- a/include/linux/sha256.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2014 Red Hat Inc.
- *
- *  Author: Vivek Goyal <vgoyal@redhat.com>
- */
-
-#ifndef SHA256_H
-#define SHA256_H
-
-#include <linux/types.h>
-#include <crypto/sha.h>
-
-/*
- * Stand-alone implementation of the SHA256 algorithm. It is designed to
- * have as little dependencies as possible so it can be used in the
- * kexec_file purgatory. In other cases you should use the implementation in
- * crypto/.
- *
- * For details see lib/sha256.c
- */
-
-extern int sha256_init(struct sha256_state *sctx);
-extern int sha256_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha256_final(struct sha256_state *sctx, u8 *hash);
-
-#endif /* SHA256_H */
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
new file mode 100644
index 000000000000..b8114028d06f
--- /dev/null
+++ b/lib/crypto/sha256.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/string.h>
+#include <crypto/sha256.h>
+#include <asm/byteorder.h>
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+	return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+	return (x & y) | (z & (x | y));
+}
+
+#define e0(x)       (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x)       (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x)       (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x)       (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+	W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+	u32 a, b, c, d, e, f, g, h, t1, t2;
+	u32 W[64];
+	int i;
+
+	/* load the input */
+	for (i = 0; i < 16; i++)
+		LOAD_OP(i, W, input);
+
+	/* now blend */
+	for (i = 16; i < 64; i++)
+		BLEND_OP(i, W);
+
+	/* load the state into our registers */
+	a = state[0];  b = state[1];  c = state[2];  d = state[3];
+	e = state[4];  f = state[5];  g = state[6];  h = state[7];
+
+	/* now iterate */
+	t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
+	t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
+	t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
+	t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
+	t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
+	t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
+	t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
+	t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
+
+	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+	/* clear any sensitive info... */
+	a = b = c = d = e = f = g = h = t1 = t2 = 0;
+	memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+	unsigned int partial, done;
+	const u8 *src;
+
+	partial = sctx->count & 0x3f;
+	sctx->count += len;
+	done = 0;
+	src = data;
+
+	if ((partial + len) > 63) {
+		if (partial) {
+			done = -partial;
+			memcpy(sctx->buf + partial, data, done + 64);
+			src = sctx->buf;
+		}
+
+		do {
+			sha256_transform(sctx->state, src);
+			done += 64;
+			src = data + done;
+		} while (done + 63 < len);
+
+		partial = 0;
+	}
+	memcpy(sctx->buf + partial, src, len - done);
+
+	return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+	__be32 *dst = (__be32 *)out;
+	__be64 bits;
+	unsigned int index, pad_len;
+	int i;
+	static const u8 padding[64] = { 0x80, };
+
+	/* Save number of bits */
+	bits = cpu_to_be64(sctx->count << 3);
+
+	/* Pad out to 56 mod 64. */
+	index = sctx->count & 0x3f;
+	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+	sha256_update(sctx, padding, pad_len);
+
+	/* Append length (before padding) */
+	sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+	/* Store state in digest */
+	for (i = 0; i < 8; i++)
+		dst[i] = cpu_to_be32(sctx->state[i]);
+
+	/* Zeroize sensitive information. */
+	memset(sctx, 0, sizeof(*sctx));
+
+	return 0;
+}
diff --git a/lib/sha256.c b/lib/sha256.c
deleted file mode 100644
index ba4dce0b3711..000000000000
--- a/lib/sha256.c
+++ /dev/null
@@ -1,279 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
- *
- * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- * Copyright (c) 2014 Red Hat Inc.
- */
-
-#include <linux/bitops.h>
-#include <linux/sha256.h>
-#include <linux/string.h>
-#include <asm/byteorder.h>
-
-static inline u32 Ch(u32 x, u32 y, u32 z)
-{
-	return z ^ (x & (y ^ z));
-}
-
-static inline u32 Maj(u32 x, u32 y, u32 z)
-{
-	return (x & y) | (z & (x | y));
-}
-
-#define e0(x)       (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
-#define e1(x)       (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
-#define s0(x)       (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
-#define s1(x)       (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
-	W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
-	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
-	u32 a, b, c, d, e, f, g, h, t1, t2;
-	u32 W[64];
-	int i;
-
-	/* load the input */
-	for (i = 0; i < 16; i++)
-		LOAD_OP(i, W, input);
-
-	/* now blend */
-	for (i = 16; i < 64; i++)
-		BLEND_OP(i, W);
-
-	/* load the state into our registers */
-	a = state[0];  b = state[1];  c = state[2];  d = state[3];
-	e = state[4];  f = state[5];  g = state[6];  h = state[7];
-
-	/* now iterate */
-	t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
-	t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
-	t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
-	t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
-	t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
-	t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
-	t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
-	t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
-
-	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
-	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
-	/* clear any sensitive info... */
-	a = b = c = d = e = f = g = h = t1 = t2 = 0;
-	memset(W, 0, 64 * sizeof(u32));
-}
-
-int sha256_init(struct sha256_state *sctx)
-{
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
-}
-
-int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
-{
-	unsigned int partial, done;
-	const u8 *src;
-
-	partial = sctx->count & 0x3f;
-	sctx->count += len;
-	done = 0;
-	src = data;
-
-	if ((partial + len) > 63) {
-		if (partial) {
-			done = -partial;
-			memcpy(sctx->buf + partial, data, done + 64);
-			src = sctx->buf;
-		}
-
-		do {
-			sha256_transform(sctx->state, src);
-			done += 64;
-			src = data + done;
-		} while (done + 63 < len);
-
-		partial = 0;
-	}
-	memcpy(sctx->buf + partial, src, len - done);
-
-	return 0;
-}
-
-int sha256_final(struct sha256_state *sctx, u8 *out)
-{
-	__be32 *dst = (__be32 *)out;
-	__be64 bits;
-	unsigned int index, pad_len;
-	int i;
-	static const u8 padding[64] = { 0x80, };
-
-	/* Save number of bits */
-	bits = cpu_to_be64(sctx->count << 3);
-
-	/* Pad out to 56 mod 64. */
-	index = sctx->count & 0x3f;
-	pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
-	sha256_update(sctx, padding, pad_len);
-
-	/* Append length (before padding) */
-	sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
-
-	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
-
-	/* Zeroize sensitive information. */
-	memset(sctx, 0, sizeof(*sctx));
-
-	return 0;
-}
-- 
cgit v1.2.3


From 01d3aee86625bd798a5e69afb92517d5530c7ed1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 17 Aug 2019 16:24:33 +0200
Subject: crypto: sha256 - Make lib/crypto/sha256.c suitable for generic use

Before this commit lib/crypto/sha256.c has only been used in the s390 and
x86 purgatory code, make it suitable for generic use:

* Export interesting symbols
* Add  -D__DISABLE_EXPORTS to CFLAGS_sha256.o for purgatory builds to
  avoid the exports for the purgatory builds
* Add to lib/crypto/Makefile and crypto/Kconfig

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/purgatory/Makefile | 2 ++
 arch/x86/purgatory/Makefile  | 2 ++
 crypto/Kconfig               | 3 +++
 include/crypto/sha256.h      | 5 +++--
 lib/crypto/Makefile          | 3 +++
 lib/crypto/sha256.c          | 4 ++++
 6 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index 85b05c9e40f5..bc0d7a0d0394 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -10,6 +10,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
 $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
 	$(call if_changed_rule,as_o_S)
 
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 026fa0006f0b..ea86982aba27 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -9,6 +9,8 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 42a17fe97703..e96b321b51af 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -849,6 +849,9 @@ config CRYPTO_SHA1_PPC_SPE
 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
 	  using powerpc SPE SIMD instruction set.
 
+config CRYPTO_LIB_SHA256
+	tristate
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
diff --git a/include/crypto/sha256.h b/include/crypto/sha256.h
index b1f9c6781082..9cbb3589b8b3 100644
--- a/include/crypto/sha256.h
+++ b/include/crypto/sha256.h
@@ -14,8 +14,9 @@
 /*
  * Stand-alone implementation of the SHA256 algorithm. It is designed to
  * have as little dependencies as possible so it can be used in the
- * kexec_file purgatory. In other cases you should use the implementation in
- * crypto/.
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
  *
  * For details see lib/crypto/sha256.c
  */
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 101a321b8a99..cbe0b6a6450d 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -8,3 +8,6 @@ libarc4-y := arc4.o
 
 obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
 libdes-y := des.o
+
+obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
+libsha256-y := sha256.o
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 1458a20d53a5..f2ed75ae6910 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/export.h>
 #include <linux/string.h>
 #include <crypto/sha256.h>
 #include <asm/unaligned.h>
@@ -218,6 +219,7 @@ int sha256_init(struct sha256_state *sctx)
 
 	return 0;
 }
+EXPORT_SYMBOL(sha256_init);
 
 int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
@@ -248,6 +250,7 @@ int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 
 	return 0;
 }
+EXPORT_SYMBOL(sha256_update);
 
 int sha256_final(struct sha256_state *sctx, u8 *out)
 {
@@ -277,3 +280,4 @@ int sha256_final(struct sha256_state *sctx, u8 *out)
 
 	return 0;
 }
+EXPORT_SYMBOL(sha256_final);
-- 
cgit v1.2.3


From 7d2f5b0c43e0bb346fbf78daefd68cd0bfc56ca3 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 17 Aug 2019 16:24:34 +0200
Subject: crypto: sha256 - Add sha224 support to sha256 library code

Add sha224 support to the lib/crypto/sha256 library code. This will allow
us to replace both the sha256 and sha224 parts of crypto/sha256_generic.c
when we remove the code duplication in further patches in this series.

Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sha256.h |  5 +++++
 lib/crypto/sha256.c     | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/crypto/sha256.h b/include/crypto/sha256.h
index 9cbb3589b8b3..a75998d65a41 100644
--- a/include/crypto/sha256.h
+++ b/include/crypto/sha256.h
@@ -26,4 +26,9 @@ extern int sha256_update(struct sha256_state *sctx, const u8 *input,
 			 unsigned int length);
 extern int sha256_final(struct sha256_state *sctx, u8 *hash);
 
+extern int sha224_init(struct sha256_state *sctx);
+extern int sha224_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha224_final(struct sha256_state *sctx, u8 *hash);
+
 #endif /* SHA256_H */
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index f2ed75ae6910..45ad87520769 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -221,6 +221,22 @@ int sha256_init(struct sha256_state *sctx)
 }
 EXPORT_SYMBOL(sha256_init);
 
+int sha224_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(sha224_init);
+
 int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
 	unsigned int partial, done;
@@ -252,7 +268,13 @@ int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 }
 EXPORT_SYMBOL(sha256_update);
 
-int sha256_final(struct sha256_state *sctx, u8 *out)
+int sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+	return sha256_update(sctx, data, len);
+}
+EXPORT_SYMBOL(sha224_update);
+
+static int __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
 {
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
@@ -272,7 +294,7 @@ int sha256_final(struct sha256_state *sctx, u8 *out)
 	sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
-	for (i = 0; i < 8; i++)
+	for (i = 0; i < digest_words; i++)
 		put_unaligned_be32(sctx->state[i], &dst[i]);
 
 	/* Zeroize sensitive information. */
@@ -280,4 +302,15 @@ int sha256_final(struct sha256_state *sctx, u8 *out)
 
 	return 0;
 }
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+	return __sha256_final(sctx, out, 8);
+}
 EXPORT_SYMBOL(sha256_final);
+
+int sha224_final(struct sha256_state *sctx, u8 *out)
+{
+	return __sha256_final(sctx, out, 7);
+}
+EXPORT_SYMBOL(sha224_final);
-- 
cgit v1.2.3


From e5d2f910cfeca852f6e2dc19dfa8dab264ce0cde Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 22 Aug 2019 05:05:37 +0000
Subject: PCI: hv: Add a paravirtual backchannel in software

Windows SR-IOV provides a backchannel mechanism in software for communication
between a VF driver and a PF driver.  These "configuration blocks" are
similar in concept to PCI configuration space, but instead of doing reads and
writes in 32-bit chunks through a very slow path, packets of up to 128 bytes
can be sent or received asynchronously.

Nearly every SR-IOV device contains just such a communications channel in
hardware, so using this one in software is usually optional.  Using the
software channel, however, allows driver implementers to leverage software
tools that fuzz the communications channel looking for vulnerabilities.

The usage model for these packets puts the responsibility for reading or
writing on the VF driver.  The VF driver sends a read or a write packet,
indicating which "block" is being referred to by number.

If the PF driver wishes to initiate communication, it can "invalidate" one or
more of the first 64 blocks.  This invalidation is delivered via a callback
supplied by the VF driver by this driver.

No protocol is implied, except that supplied by the PF and VF drivers.

Signed-off-by: Jake Oshins <jakeo@microsoft.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/pci/controller/pci-hyperv.c | 302 ++++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h              |  15 ++
 2 files changed, 317 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 40b625458afa..57adeca7bda9 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -365,6 +365,39 @@ struct pci_delete_interrupt {
 	struct tran_int_desc int_desc;
 } __packed;
 
+/*
+ * Note: the VM must pass a valid block id, wslot and bytes_requested.
+ */
+struct pci_read_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 bytes_requested;
+} __packed;
+
+struct pci_read_block_response {
+	struct vmpacket_descriptor hdr;
+	u32 status;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+/*
+ * Note: the VM must pass a valid block id, wslot and byte_count.
+ */
+struct pci_write_block {
+	struct pci_message message_type;
+	u32 block_id;
+	union win_slot_encoding wslot;
+	u32 byte_count;
+	u8 bytes[HV_CONFIG_BLOCK_SIZE_MAX];
+} __packed;
+
+struct pci_dev_inval_block {
+	struct pci_incoming_message incoming;
+	union win_slot_encoding wslot;
+	u64 block_mask;
+} __packed;
+
 struct pci_dev_incoming {
 	struct pci_incoming_message incoming;
 	union win_slot_encoding wslot;
@@ -499,6 +532,9 @@ struct hv_pci_dev {
 	struct hv_pcibus_device *hbus;
 	struct work_struct wrk;
 
+	void (*block_invalidate)(void *context, u64 block_mask);
+	void *invalidate_context;
+
 	/*
 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
 	 * read it back, for each of the BAR offsets within config space.
@@ -817,6 +853,256 @@ static struct pci_ops hv_pcifront_ops = {
 	.write = hv_pcifront_write_config,
 };
 
+/*
+ * Paravirtual backchannel
+ *
+ * Hyper-V SR-IOV provides a backchannel mechanism in software for
+ * communication between a VF driver and a PF driver.  These
+ * "configuration blocks" are similar in concept to PCI configuration space,
+ * but instead of doing reads and writes in 32-bit chunks through a very slow
+ * path, packets of up to 128 bytes can be sent or received asynchronously.
+ *
+ * Nearly every SR-IOV device contains just such a communications channel in
+ * hardware, so using this one in software is usually optional.  Using the
+ * software channel, however, allows driver implementers to leverage software
+ * tools that fuzz the communications channel looking for vulnerabilities.
+ *
+ * The usage model for these packets puts the responsibility for reading or
+ * writing on the VF driver.  The VF driver sends a read or a write packet,
+ * indicating which "block" is being referred to by number.
+ *
+ * If the PF driver wishes to initiate communication, it can "invalidate" one or
+ * more of the first 64 blocks.  This invalidation is delivered via a callback
+ * supplied by the VF driver by this driver.
+ *
+ * No protocol is implied, except that supplied by the PF and VF drivers.
+ */
+
+struct hv_read_config_compl {
+	struct hv_pci_compl comp_pkt;
+	void *buf;
+	unsigned int len;
+	unsigned int bytes_returned;
+};
+
+/**
+ * hv_pci_read_config_compl() - Invoked when a response packet
+ * for a read config block operation arrives.
+ * @context:		Identifies the read config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_read_config_compl(void *context, struct pci_response *resp,
+				     int resp_packet_size)
+{
+	struct hv_read_config_compl *comp = context;
+	struct pci_read_block_response *read_resp =
+		(struct pci_read_block_response *)resp;
+	unsigned int data_len, hdr_len;
+
+	hdr_len = offsetof(struct pci_read_block_response, bytes);
+	if (resp_packet_size < hdr_len) {
+		comp->comp_pkt.completion_status = -1;
+		goto out;
+	}
+
+	data_len = resp_packet_size - hdr_len;
+	if (data_len > 0 && read_resp->status == 0) {
+		comp->bytes_returned = min(comp->len, data_len);
+		memcpy(comp->buf, read_resp->bytes, comp->bytes_returned);
+	} else {
+		comp->bytes_returned = 0;
+	}
+
+	comp->comp_pkt.completion_status = read_resp->status;
+out:
+	complete(&comp->comp_pkt.host_event);
+}
+
+/**
+ * hv_read_config_block() - Sends a read config block request to
+ * the back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer into which the config block will be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which has been requested.
+ * @bytes_returned:	Size which came back from the back-end driver.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+			 unsigned int block_id, unsigned int *bytes_returned)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_read_block)];
+	} pkt;
+	struct hv_read_config_compl comp_pkt;
+	struct pci_read_block *read_blk;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.comp_pkt.host_event);
+	comp_pkt.buf = buf;
+	comp_pkt.len = len;
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_read_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	read_blk = (struct pci_read_block *)&pkt.pkt.message;
+	read_blk->message_type.type = PCI_READ_BLOCK;
+	read_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	read_blk->block_id = block_id;
+	read_blk->bytes_requested = len;
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, read_blk,
+			       sizeof(*read_blk), (unsigned long)&pkt.pkt,
+			       VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.comp_pkt.completion_status != 0 ||
+	    comp_pkt.bytes_returned == 0) {
+		dev_err(&hbus->hdev->device,
+			"Read Config Block failed: 0x%x, bytes_returned=%d\n",
+			comp_pkt.comp_pkt.completion_status,
+			comp_pkt.bytes_returned);
+		return -EIO;
+	}
+
+	*bytes_returned = comp_pkt.bytes_returned;
+	return 0;
+}
+EXPORT_SYMBOL(hv_read_config_block);
+
+/**
+ * hv_pci_write_config_compl() - Invoked when a response packet for a write
+ * config block operation arrives.
+ * @context:		Identifies the write config operation
+ * @resp:		The response packet itself
+ * @resp_packet_size:	Size in bytes of the response packet
+ */
+static void hv_pci_write_config_compl(void *context, struct pci_response *resp,
+				      int resp_packet_size)
+{
+	struct hv_pci_compl *comp_pkt = context;
+
+	comp_pkt->completion_status = resp->status;
+	complete(&comp_pkt->host_event);
+}
+
+/**
+ * hv_write_config_block() - Sends a write config block request to the
+ * back-end driver running in the Hyper-V parent partition.
+ * @pdev:		The PCI driver's representation for this device.
+ * @buf:		Buffer from which the config block will	be copied.
+ * @len:		Size in bytes of buf.
+ * @block_id:		Identifies the config block which is being written.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
+			  unsigned int block_id)
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct {
+		struct pci_packet pkt;
+		char buf[sizeof(struct pci_write_block)];
+		u32 reserved;
+	} pkt;
+	struct hv_pci_compl comp_pkt;
+	struct pci_write_block *write_blk;
+	u32 pkt_size;
+	int ret;
+
+	if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX)
+		return -EINVAL;
+
+	init_completion(&comp_pkt.host_event);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.pkt.completion_func = hv_pci_write_config_compl;
+	pkt.pkt.compl_ctxt = &comp_pkt;
+	write_blk = (struct pci_write_block *)&pkt.pkt.message;
+	write_blk->message_type.type = PCI_WRITE_BLOCK;
+	write_blk->wslot.slot = devfn_to_wslot(pdev->devfn);
+	write_blk->block_id = block_id;
+	write_blk->byte_count = len;
+	memcpy(write_blk->bytes, buf, len);
+	pkt_size = offsetof(struct pci_write_block, bytes) + len;
+	/*
+	 * This quirk is required on some hosts shipped around 2018, because
+	 * these hosts don't check the pkt_size correctly (new hosts have been
+	 * fixed since early 2019). The quirk is also safe on very old hosts
+	 * and new hosts, because, on them, what really matters is the length
+	 * specified in write_blk->byte_count.
+	 */
+	pkt_size += sizeof(pkt.reserved);
+
+	ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size,
+			       (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND,
+			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
+	if (ret)
+		return ret;
+
+	ret = wait_for_response(hbus->hdev, &comp_pkt.host_event);
+	if (ret)
+		return ret;
+
+	if (comp_pkt.completion_status != 0) {
+		dev_err(&hbus->hdev->device,
+			"Write Config Block failed: 0x%x\n",
+			comp_pkt.completion_status);
+		return -EIO;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(hv_write_config_block);
+
+/**
+ * hv_register_block_invalidate() - Invoked when a config block invalidation
+ * arrives from the back-end driver.
+ * @pdev:		The PCI driver's representation for this device.
+ * @context:		Identifies the device.
+ * @block_invalidate:	Identifies all of the blocks being invalidated.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
+				 void (*block_invalidate)(void *context,
+							  u64 block_mask))
+{
+	struct hv_pcibus_device *hbus =
+		container_of(pdev->bus->sysdata, struct hv_pcibus_device,
+			     sysdata);
+	struct hv_pci_dev *hpdev;
+
+	hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
+	if (!hpdev)
+		return -ENODEV;
+
+	hpdev->block_invalidate = block_invalidate;
+	hpdev->invalidate_context = context;
+
+	put_pcichild(hpdev);
+	return 0;
+
+}
+EXPORT_SYMBOL(hv_register_block_invalidate);
+
 /* Interrupt management hooks */
 static void hv_int_desc_free(struct hv_pci_dev *hpdev,
 			     struct tran_int_desc *int_desc)
@@ -1968,6 +2254,7 @@ static void hv_pci_onchannelcallback(void *context)
 	struct pci_response *response;
 	struct pci_incoming_message *new_message;
 	struct pci_bus_relations *bus_rel;
+	struct pci_dev_inval_block *inval;
 	struct pci_dev_incoming *dev_message;
 	struct hv_pci_dev *hpdev;
 
@@ -2045,6 +2332,21 @@ static void hv_pci_onchannelcallback(void *context)
 				}
 				break;
 
+			case PCI_INVALIDATE_BLOCK:
+
+				inval = (struct pci_dev_inval_block *)buffer;
+				hpdev = get_pcichild_wslot(hbus,
+							   inval->wslot.slot);
+				if (hpdev) {
+					if (hpdev->block_invalidate) {
+						hpdev->block_invalidate(
+						    hpdev->invalidate_context,
+						    inval->block_mask);
+					}
+					put_pcichild(hpdev);
+				}
+				break;
+
 			default:
 				dev_warn(&hbus->hdev->device,
 					"Unimplemented protocol message %x\n",
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc34c4a6..9d37f8cf1245 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1578,4 +1578,19 @@ hv_pkt_iter_next(struct vmbus_channel *channel,
 	for (pkt = hv_pkt_iter_first(channel); pkt; \
 	    pkt = hv_pkt_iter_next(channel, pkt))
 
+/*
+ * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
+ * sends requests to read and write blocks. Each block must be 128 bytes or
+ * smaller. Optionally, the VF driver can register a callback function which
+ * will be invoked when the host says that one or more of the first 64 block
+ * IDs is "invalid" which means that the VF driver should reread them.
+ */
+#define HV_CONFIG_BLOCK_SIZE_MAX 128
+int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			 unsigned int block_id, unsigned int *bytes_returned);
+int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
+			  unsigned int block_id);
+int hv_register_block_invalidate(struct pci_dev *dev, void *context,
+				 void (*block_invalidate)(void *context,
+							  u64 block_mask));
 #endif /* _HYPERV_H */
-- 
cgit v1.2.3


From 348dd93e40c112afda3cd07daa6f470e474d29dc Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Thu, 22 Aug 2019 05:05:41 +0000
Subject: PCI: hv: Add a Hyper-V PCI interface driver for software backchannel
 interface

This interface driver is a helper driver allows other drivers to
have a common interface with the Hyper-V PCI frontend driver.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                              |  1 +
 drivers/pci/Kconfig                      |  1 +
 drivers/pci/controller/Kconfig           |  7 ++++
 drivers/pci/controller/Makefile          |  1 +
 drivers/pci/controller/pci-hyperv-intf.c | 67 ++++++++++++++++++++++++++++++++
 drivers/pci/controller/pci-hyperv.c      | 12 ++++--
 include/linux/hyperv.h                   | 30 ++++++++++----
 7 files changed, 108 insertions(+), 11 deletions(-)
 create mode 100644 drivers/pci/controller/pci-hyperv-intf.c

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index a406947b369e..986085351d79 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7469,6 +7469,7 @@ F:	drivers/hid/hid-hyperv.c
 F:	drivers/hv/
 F:	drivers/input/serio/hyperv-keyboard.c
 F:	drivers/pci/controller/pci-hyperv.c
+F:	drivers/pci/controller/pci-hyperv-intf.c
 F:	drivers/net/hyperv/
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab92409210a..c313de96a357 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -182,6 +182,7 @@ config PCI_LABEL
 config PCI_HYPERV
         tristate "Hyper-V PCI Frontend"
         depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	select PCI_HYPERV_INTERFACE
         help
           The PCI device frontend driver allows the kernel to import arbitrary
           PCI devices from a PCI backend to support PCI driver domains.
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index fe9f9f13ce11..70e078238899 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -281,5 +281,12 @@ config VMD
 	  To compile this driver as a module, choose M here: the
 	  module will be called vmd.
 
+config PCI_HYPERV_INTERFACE
+	tristate "Hyper-V PCI Interface"
+	depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+	help
+	  The Hyper-V PCI Interface is a helper driver allows other drivers to
+	  have a common interface with the Hyper-V PCI frontend driver.
+
 source "drivers/pci/controller/dwc/Kconfig"
 endmenu
diff --git a/drivers/pci/controller/Makefile b/drivers/pci/controller/Makefile
index d56a507495c5..a2a22c9d91af 100644
--- a/drivers/pci/controller/Makefile
+++ b/drivers/pci/controller/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_PCIE_CADENCE_HOST) += pcie-cadence-host.o
 obj-$(CONFIG_PCIE_CADENCE_EP) += pcie-cadence-ep.o
 obj-$(CONFIG_PCI_FTPCI100) += pci-ftpci100.o
 obj-$(CONFIG_PCI_HYPERV) += pci-hyperv.o
+obj-$(CONFIG_PCI_HYPERV_INTERFACE) += pci-hyperv-intf.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
 obj-$(CONFIG_PCI_AARDVARK) += pci-aardvark.o
 obj-$(CONFIG_PCI_TEGRA) += pci-tegra.o
diff --git a/drivers/pci/controller/pci-hyperv-intf.c b/drivers/pci/controller/pci-hyperv-intf.c
new file mode 100644
index 000000000000..cc96be450360
--- /dev/null
+++ b/drivers/pci/controller/pci-hyperv-intf.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) Microsoft Corporation.
+ *
+ * Author:
+ *   Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This small module is a helper driver allows other drivers to
+ * have a common interface with the Hyper-V PCI frontend driver.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/hyperv.h>
+
+struct hyperv_pci_block_ops hvpci_block_ops;
+EXPORT_SYMBOL_GPL(hvpci_block_ops);
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned)
+{
+	if (!hvpci_block_ops.read_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.read_block(dev, buf, buf_len, block_id,
+					  bytes_returned);
+}
+EXPORT_SYMBOL_GPL(hyperv_read_cfg_blk);
+
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id)
+{
+	if (!hvpci_block_ops.write_block)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.write_block(dev, buf, len, block_id);
+}
+EXPORT_SYMBOL_GPL(hyperv_write_cfg_blk);
+
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask))
+{
+	if (!hvpci_block_ops.reg_blk_invalidate)
+		return -EOPNOTSUPP;
+
+	return hvpci_block_ops.reg_blk_invalidate(dev, context,
+						  block_invalidate);
+}
+EXPORT_SYMBOL_GPL(hyperv_reg_block_invalidate);
+
+static void __exit exit_hv_pci_intf(void)
+{
+}
+
+static int __init init_hv_pci_intf(void)
+{
+	return 0;
+}
+
+module_init(init_hv_pci_intf);
+module_exit(exit_hv_pci_intf);
+
+MODULE_DESCRIPTION("Hyper-V PCI Interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 57adeca7bda9..9c93ac2215b7 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -983,7 +983,6 @@ int hv_read_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 	*bytes_returned = comp_pkt.bytes_returned;
 	return 0;
 }
-EXPORT_SYMBOL(hv_read_config_block);
 
 /**
  * hv_pci_write_config_compl() - Invoked when a response packet for a write
@@ -1070,7 +1069,6 @@ int hv_write_config_block(struct pci_dev *pdev, void *buf, unsigned int len,
 
 	return 0;
 }
-EXPORT_SYMBOL(hv_write_config_block);
 
 /**
  * hv_register_block_invalidate() - Invoked when a config block invalidation
@@ -1101,7 +1099,6 @@ int hv_register_block_invalidate(struct pci_dev *pdev, void *context,
 	return 0;
 
 }
-EXPORT_SYMBOL(hv_register_block_invalidate);
 
 /* Interrupt management hooks */
 static void hv_int_desc_free(struct hv_pci_dev *hpdev,
@@ -3045,10 +3042,19 @@ static struct hv_driver hv_pci_drv = {
 static void __exit exit_hv_pci_drv(void)
 {
 	vmbus_driver_unregister(&hv_pci_drv);
+
+	hvpci_block_ops.read_block = NULL;
+	hvpci_block_ops.write_block = NULL;
+	hvpci_block_ops.reg_blk_invalidate = NULL;
 }
 
 static int __init init_hv_pci_drv(void)
 {
+	/* Initialize PCI block r/w interface */
+	hvpci_block_ops.read_block = hv_read_config_block;
+	hvpci_block_ops.write_block = hv_write_config_block;
+	hvpci_block_ops.reg_blk_invalidate = hv_register_block_invalidate;
+
 	return vmbus_driver_register(&hv_pci_drv);
 }
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 9d37f8cf1245..2afe6fdc1dda 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1579,18 +1579,32 @@ hv_pkt_iter_next(struct vmbus_channel *channel,
 	    pkt = hv_pkt_iter_next(channel, pkt))
 
 /*
- * Functions for passing data between SR-IOV PF and VF drivers.  The VF driver
+ * Interface for passing data between SR-IOV PF and VF drivers. The VF driver
  * sends requests to read and write blocks. Each block must be 128 bytes or
  * smaller. Optionally, the VF driver can register a callback function which
  * will be invoked when the host says that one or more of the first 64 block
  * IDs is "invalid" which means that the VF driver should reread them.
  */
 #define HV_CONFIG_BLOCK_SIZE_MAX 128
-int hv_read_config_block(struct pci_dev *dev, void *buf, unsigned int buf_len,
-			 unsigned int block_id, unsigned int *bytes_returned);
-int hv_write_config_block(struct pci_dev *dev, void *buf, unsigned int len,
-			  unsigned int block_id);
-int hv_register_block_invalidate(struct pci_dev *dev, void *context,
-				 void (*block_invalidate)(void *context,
-							  u64 block_mask));
+
+int hyperv_read_cfg_blk(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			unsigned int block_id, unsigned int *bytes_returned);
+int hyperv_write_cfg_blk(struct pci_dev *dev, void *buf, unsigned int len,
+			 unsigned int block_id);
+int hyperv_reg_block_invalidate(struct pci_dev *dev, void *context,
+				void (*block_invalidate)(void *context,
+							 u64 block_mask));
+
+struct hyperv_pci_block_ops {
+	int (*read_block)(struct pci_dev *dev, void *buf, unsigned int buf_len,
+			  unsigned int block_id, unsigned int *bytes_returned);
+	int (*write_block)(struct pci_dev *dev, void *buf, unsigned int len,
+			   unsigned int block_id);
+	int (*reg_blk_invalidate)(struct pci_dev *dev, void *context,
+				  void (*block_invalidate)(void *context,
+							   u64 block_mask));
+};
+
+extern struct hyperv_pci_block_ops hvpci_block_ops;
+
 #endif /* _HYPERV_H */
-- 
cgit v1.2.3


From 87175120defd2907d42592653c35feea9de0437a Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 22 Aug 2019 05:05:51 +0000
Subject: net/mlx5: Add HV VHCA infrastructure

HV VHCA is a layer which provides PF to VF communication channel based on
HyperV PCI config channel. It implements Mellanox's Inter VHCA control
communication protocol. The protocol contains control block in order to
pass messages between the PF and VF drivers, and data blocks in order to
pass actual data.

The infrastructure is agent based. Each agent will be responsible of
contiguous buffer blocks in the VHCA config space. This infrastructure will
bind agents to their blocks, and those agents can only access read/write
the buffer blocks assigned to them. Each agent will provide three
callbacks (control, invalidate, cleanup). Control will be invoked when
block-0 is invalidated with a command that concerns this agent. Invalidate
callback will be invoked if one of the blocks assigned to this agent was
invalidated. Cleanup will be invoked before the agent is being freed in
order to clean all of its open resources or deferred works.

Block-0 serves as the control block. All execution commands from the PF
will be written by the PF over this block. VF will ack on those by
writing on block-0 as well. Its format is described by struct
mlx5_hv_vhca_control_block layout.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c  | 253 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h  | 102 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   7 +
 include/linux/mlx5/driver.h                        |   2 +
 5 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index fd32a5beba72..8d443fca199e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -45,7 +45,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offlo
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
-mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o
+mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o
 
 #
 # Ipoib netdev
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
new file mode 100644
index 000000000000..84d1d75a1608
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2018 Mellanox Technologies
+
+#include <linux/hyperv.h>
+#include "mlx5_core.h"
+#include "lib/hv.h"
+#include "lib/hv_vhca.h"
+
+struct mlx5_hv_vhca {
+	struct mlx5_core_dev       *dev;
+	struct workqueue_struct    *work_queue;
+	struct mlx5_hv_vhca_agent  *agents[MLX5_HV_VHCA_AGENT_MAX];
+	struct mutex                agents_lock; /* Protect agents array */
+};
+
+struct mlx5_hv_vhca_work {
+	struct work_struct     invalidate_work;
+	struct mlx5_hv_vhca   *hv_vhca;
+	u64                    block_mask;
+};
+
+struct mlx5_hv_vhca_data_block {
+	u16     sequence;
+	u16     offset;
+	u8      reserved[4];
+	u64     data[15];
+};
+
+struct mlx5_hv_vhca_agent {
+	enum mlx5_hv_vhca_agent_type	 type;
+	struct mlx5_hv_vhca		*hv_vhca;
+	void				*priv;
+	u16                              seq;
+	void (*control)(struct mlx5_hv_vhca_agent *agent,
+			struct mlx5_hv_vhca_control_block *block);
+	void (*invalidate)(struct mlx5_hv_vhca_agent *agent,
+			   u64 block_mask);
+	void (*cleanup)(struct mlx5_hv_vhca_agent *agent);
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_hv_vhca *hv_vhca = NULL;
+
+	hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL);
+	if (!hv_vhca)
+		return ERR_PTR(-ENOMEM);
+
+	hv_vhca->work_queue = create_singlethread_workqueue("mlx5_hv_vhca");
+	if (!hv_vhca->work_queue) {
+		kfree(hv_vhca);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	hv_vhca->dev = dev;
+	mutex_init(&hv_vhca->agents_lock);
+
+	return hv_vhca;
+}
+
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	destroy_workqueue(hv_vhca->work_queue);
+	kfree(hv_vhca);
+}
+
+static void mlx5_hv_vhca_invalidate_work(struct work_struct *work)
+{
+	struct mlx5_hv_vhca_work *hwork;
+	struct mlx5_hv_vhca *hv_vhca;
+	int i;
+
+	hwork = container_of(work, struct mlx5_hv_vhca_work, invalidate_work);
+	hv_vhca = hwork->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++) {
+		struct mlx5_hv_vhca_agent *agent = hv_vhca->agents[i];
+
+		if (!agent || !agent->invalidate)
+			continue;
+
+		if (!(BIT(agent->type) & hwork->block_mask))
+			continue;
+
+		agent->invalidate(agent, hwork->block_mask);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	kfree(hwork);
+}
+
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask)
+{
+	struct mlx5_hv_vhca *hv_vhca = (struct mlx5_hv_vhca *)context;
+	struct mlx5_hv_vhca_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return;
+
+	INIT_WORK(&work->invalidate_work, mlx5_hv_vhca_invalidate_work);
+	work->hv_vhca    = hv_vhca;
+	work->block_mask = block_mask;
+
+	queue_work(hv_vhca->work_queue, &work->invalidate_work);
+}
+
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return IS_ERR_OR_NULL(hv_vhca);
+
+	return mlx5_hv_register_invalidate(hv_vhca->dev, hv_vhca,
+					   mlx5_hv_vhca_invalidate);
+}
+
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	for (i = 0; i < MLX5_HV_VHCA_AGENT_MAX; i++)
+		WARN_ON(hv_vhca->agents[i]);
+
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	mlx5_hv_unregister_invalidate(hv_vhca->dev);
+}
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleaup)(struct mlx5_hv_vhca_agent *agent),
+			  void *priv)
+{
+	struct mlx5_hv_vhca_agent *agent;
+
+	if (IS_ERR_OR_NULL(hv_vhca))
+		return ERR_PTR(-ENOMEM);
+
+	if (type >= MLX5_HV_VHCA_AGENT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&hv_vhca->agents_lock);
+	if (hv_vhca->agents[type]) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return ERR_PTR(-EINVAL);
+	}
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	agent = kzalloc(sizeof(*agent), GFP_KERNEL);
+	if (!agent)
+		return ERR_PTR(-ENOMEM);
+
+	agent->type      = type;
+	agent->hv_vhca   = hv_vhca;
+	agent->priv      = priv;
+	agent->control   = control;
+	agent->invalidate = invalidate;
+	agent->cleanup   = cleaup;
+
+	mutex_lock(&hv_vhca->agents_lock);
+	hv_vhca->agents[type] = agent;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	return agent;
+}
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+	struct mlx5_hv_vhca *hv_vhca = agent->hv_vhca;
+
+	mutex_lock(&hv_vhca->agents_lock);
+
+	if (WARN_ON(agent != hv_vhca->agents[agent->type])) {
+		mutex_unlock(&hv_vhca->agents_lock);
+		return;
+	}
+
+	hv_vhca->agents[agent->type] = NULL;
+	mutex_unlock(&hv_vhca->agents_lock);
+
+	if (agent->cleanup)
+		agent->cleanup(agent);
+
+	kfree(agent);
+}
+
+static int mlx5_hv_vhca_data_block_prepare(struct mlx5_hv_vhca_agent *agent,
+					   struct mlx5_hv_vhca_data_block *data_block,
+					   void *src, int len, int *offset)
+{
+	int bytes = min_t(int, (int)sizeof(data_block->data), len);
+
+	data_block->sequence = agent->seq;
+	data_block->offset   = (*offset)++;
+	memcpy(data_block->data, src, bytes);
+
+	return bytes;
+}
+
+static void mlx5_hv_vhca_agent_seq_update(struct mlx5_hv_vhca_agent *agent)
+{
+	agent->seq++;
+}
+
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len)
+{
+	int offset = agent->type * HV_CONFIG_BLOCK_SIZE_MAX;
+	int block_offset = 0;
+	int total = 0;
+	int err;
+
+	while (len) {
+		struct mlx5_hv_vhca_data_block data_block = {0};
+		int bytes;
+
+		bytes = mlx5_hv_vhca_data_block_prepare(agent, &data_block,
+							buf + total,
+							len, &block_offset);
+		if (!bytes)
+			return -ENOMEM;
+
+		err = mlx5_hv_write_config(agent->hv_vhca->dev, &data_block,
+					   sizeof(data_block), offset);
+		if (err)
+			return err;
+
+		total += bytes;
+		len   -= bytes;
+	}
+
+	mlx5_hv_vhca_agent_seq_update(agent);
+
+	return 0;
+}
+
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent)
+{
+	return agent->priv;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
new file mode 100644
index 000000000000..cdf13039489c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2019 Mellanox Technologies. */
+
+#ifndef __LIB_HV_VHCA_H__
+#define __LIB_HV_VHCA_H__
+
+#include "en.h"
+#include "lib/hv.h"
+
+struct mlx5_hv_vhca_agent;
+struct mlx5_hv_vhca;
+struct mlx5_hv_vhca_control_block;
+
+enum mlx5_hv_vhca_agent_type {
+	MLX5_HV_VHCA_AGENT_MAX = 32,
+};
+
+#if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
+
+struct mlx5_hv_vhca_control_block {
+	u32     capabilities;
+	u32     control;
+	u16     command;
+	u16     command_ack;
+	u16     version;
+	u16     rings;
+	u32     reserved1[28];
+};
+
+struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev);
+void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca);
+int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca);
+void mlx5_hv_vhca_invalidate(void *context, u64 block_mask);
+
+struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context);
+
+void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent);
+int mlx5_hv_vhca_agent_write(struct mlx5_hv_vhca_agent *agent,
+			     void *buf, int len);
+void *mlx5_hv_vhca_agent_priv(struct mlx5_hv_vhca_agent *agent);
+
+#else
+
+static inline struct mlx5_hv_vhca *
+mlx5_hv_vhca_create(struct mlx5_core_dev *dev)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_destroy(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline int mlx5_hv_vhca_init(struct mlx5_hv_vhca *hv_vhca)
+{
+	return 0;
+}
+
+static inline void mlx5_hv_vhca_cleanup(struct mlx5_hv_vhca *hv_vhca)
+{
+}
+
+static inline void mlx5_hv_vhca_invalidate(void *context,
+					   u64 block_mask)
+{
+}
+
+static inline struct mlx5_hv_vhca_agent *
+mlx5_hv_vhca_agent_create(struct mlx5_hv_vhca *hv_vhca,
+			  enum mlx5_hv_vhca_agent_type type,
+			  void (*control)(struct mlx5_hv_vhca_agent*,
+					  struct mlx5_hv_vhca_control_block *block),
+			  void (*invalidate)(struct mlx5_hv_vhca_agent*,
+					     u64 block_mask),
+			  void (*cleanup)(struct mlx5_hv_vhca_agent *agent),
+			  void *context)
+{
+	return NULL;
+}
+
+static inline void mlx5_hv_vhca_agent_destroy(struct mlx5_hv_vhca_agent *agent)
+{
+}
+
+static inline int
+mlx5_hv_vhca_write_agent(struct mlx5_hv_vhca_agent *agent,
+			 void *buf, int len)
+{
+	return 0;
+}
+#endif
+
+#endif /* __LIB_HV_VHCA_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0b70b1d6338d..61388ca7233b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -69,6 +69,7 @@
 #include "lib/pci_vsc.h"
 #include "diag/fw_tracer.h"
 #include "ecpf.h"
+#include "lib/hv_vhca.h"
 
 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@ -870,6 +871,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	}
 
 	dev->tracer = mlx5_fw_tracer_create(dev);
+	dev->hv_vhca = mlx5_hv_vhca_create(dev);
 
 	return 0;
 
@@ -900,6 +902,7 @@ err_devcom:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
 	mlx5_fpga_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
@@ -1067,6 +1070,8 @@ static int mlx5_load(struct mlx5_core_dev *dev)
 		goto err_fw_tracer;
 	}
 
+	mlx5_hv_vhca_init(dev->hv_vhca);
+
 	err = mlx5_fpga_device_start(dev);
 	if (err) {
 		mlx5_core_err(dev, "fpga device start failed %d\n", err);
@@ -1122,6 +1127,7 @@ err_tls_start:
 err_ipsec_start:
 	mlx5_fpga_device_stop(dev);
 err_fpga_start:
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 err_fw_tracer:
 	mlx5_eq_table_destroy(dev);
@@ -1142,6 +1148,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 	mlx5_accel_ipsec_cleanup(dev);
 	mlx5_accel_tls_cleanup(dev);
 	mlx5_fpga_device_stop(dev);
+	mlx5_hv_vhca_cleanup(dev->hv_vhca);
 	mlx5_fw_tracer_cleanup(dev->tracer);
 	mlx5_eq_table_destroy(dev);
 	mlx5_irq_table_destroy(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index df23f17eed64..13b4cf22f3ab 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -659,6 +659,7 @@ struct mlx5_clock {
 struct mlx5_fw_tracer;
 struct mlx5_vxlan;
 struct mlx5_geneve;
+struct mlx5_hv_vhca;
 
 struct mlx5_core_dev {
 	struct device *device;
@@ -706,6 +707,7 @@ struct mlx5_core_dev {
 	struct mlx5_ib_clock_info  *clock_info;
 	struct mlx5_fw_tracer   *tracer;
 	u32                      vsc_addr;
+	struct mlx5_hv_vhca	*hv_vhca;
 };
 
 struct mlx5_db {
-- 
cgit v1.2.3


From 5c498950f730aa17c5f8a2cdcb903524e4002ed2 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Fri, 19 Jul 2019 15:32:19 +0100
Subject: libceph: allow ceph_buffer_put() to receive a NULL ceph_buffer

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/buffer.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 5e58bb29b1a3..11cdc7c60480 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -30,7 +30,8 @@ static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b)
 
 static inline void ceph_buffer_put(struct ceph_buffer *b)
 {
-	kref_put(&b->kref, ceph_buffer_release);
+	if (b)
+		kref_put(&b->kref, ceph_buffer_release);
 }
 
 extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end);
-- 
cgit v1.2.3


From cc212241df0b8975bb0e6d7f9028405a9c664e49 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Thu, 11 Jul 2019 10:14:23 +0800
Subject: arm: dts: mediatek: add basic support for MT7629 SoC

This adds basic support for MT7629 reference board.

Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm/boot/dts/Makefile                |   1 +
 arch/arm/boot/dts/mt7629-rfb.dts          | 263 ++++++++++++++++
 arch/arm/boot/dts/mt7629.dtsi             | 481 ++++++++++++++++++++++++++++++
 include/dt-bindings/reset/mt7629-resets.h |  71 +++++
 4 files changed, 816 insertions(+)
 create mode 100644 arch/arm/boot/dts/mt7629-rfb.dts
 create mode 100644 arch/arm/boot/dts/mt7629.dtsi
 create mode 100644 include/dt-bindings/reset/mt7629-resets.h

(limited to 'include')

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 9159fa2cea90..ff5d6ae7d502 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -1262,6 +1262,7 @@ dtb-$(CONFIG_ARCH_MEDIATEK) += \
 	mt7623a-rfb-nand.dtb \
 	mt7623n-rfb-emmc.dtb \
 	mt7623n-bananapi-bpi-r2.dtb \
+	mt7629-rfb.dtb \
 	mt8127-moose.dtb \
 	mt8135-evbp1.dtb
 dtb-$(CONFIG_ARCH_MILBEAUT) += milbeaut-m10v-evb.dtb
diff --git a/arch/arm/boot/dts/mt7629-rfb.dts b/arch/arm/boot/dts/mt7629-rfb.dts
new file mode 100644
index 000000000000..3621b7d2b22a
--- /dev/null
+++ b/arch/arm/boot/dts/mt7629-rfb.dts
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ */
+
+/dts-v1/;
+#include <dt-bindings/input/input.h>
+#include "mt7629.dtsi"
+
+/ {
+	model = "MediaTek MT7629 reference board";
+	compatible = "mediatek,mt7629-rfb", "mediatek,mt7629";
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	chosen {
+		stdout-path = "serial0:115200n8";
+	};
+
+	gpio-keys {
+		compatible = "gpio-keys";
+
+		reset {
+			label = "factory";
+			linux,code = <KEY_RESTART>;
+			gpios = <&pio 60 GPIO_ACTIVE_LOW>;
+		};
+
+		wps {
+			label = "wps";
+			linux,code = <KEY_WPS_BUTTON>;
+			gpios = <&pio 58 GPIO_ACTIVE_LOW>;
+		};
+	};
+
+	memory@40000000 {
+		device_type = "memory";
+		reg = <0x40000000 0x10000000>;
+	};
+
+	reg_3p3v: regulator-3p3v {
+		compatible = "regulator-fixed";
+		regulator-name = "fixed-3.3V";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+
+	reg_5v: regulator-5v {
+		compatible = "regulator-fixed";
+		regulator-name = "fixed-5V";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		regulator-boot-on;
+		regulator-always-on;
+	};
+};
+
+&eth {
+	pinctrl-names = "default";
+	pinctrl-0 = <&eth_pins>;
+	pinctrl-1 = <&ephy_leds_pins>;
+	status = "okay";
+
+	gmac1: mac@1 {
+		compatible = "mediatek,eth-mac";
+		reg = <1>;
+		phy-handle = <&phy0>;
+	};
+
+	mdio: mdio-bus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+			phy-mode = "gmii";
+		};
+	};
+};
+
+&i2c {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c_pins>;
+	status = "okay";
+};
+
+&qspi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&qspi_pins>;
+	status = "okay";
+
+	flash@0 {
+		compatible = "jedec,spi-nor";
+		reg = <0>;
+
+		partitions {
+			compatible = "fixed-partitions";
+			#address-cells = <1>;
+			#size-cells = <1>;
+
+			partition@0 {
+				label = "u-boot";
+				reg = <0x00000 0x60000>;
+				read-only;
+			};
+
+			partition@60000 {
+				label = "u-boot-env";
+				reg = <0x60000 0x10000>;
+				read-only;
+			};
+
+			factory: partition@70000 {
+				label = "factory";
+				reg = <0x70000 0x40000>;
+				read-only;
+			};
+
+			partition@b0000 {
+				label = "kernel";
+				reg = <0xb0000 0xb50000>;
+			};
+		};
+	};
+};
+
+&pcie {
+	pinctrl-names = "default";
+	pinctrl-0 = <&pcie_pins>;
+};
+
+&pciephy1 {
+	status = "okay";
+};
+
+&pio {
+	eth_pins: eth-pins {
+		mux {
+			function = "eth";
+			groups = "mdc_mdio";
+		};
+	};
+
+	ephy_leds_pins: ephy-leds-pins {
+		mux {
+			function = "led";
+			groups = "gphy_leds_0", "ephy_leds";
+		};
+	};
+
+	i2c_pins: i2c-pins {
+		mux {
+			function = "i2c";
+			groups =  "i2c_0";
+		};
+
+		conf {
+			pins = "I2C_SDA", "I2C_SCL";
+			drive-strength = <4>;
+			bias-disable;
+		};
+	};
+
+	pcie_pins: pcie-pins {
+		mux {
+			function = "pcie";
+			groups = "pcie_clkreq",
+				 "pcie_pereset",
+				 "pcie_wake";
+		};
+	};
+
+	pwm_pins: pwm-pins {
+		mux {
+			function = "pwm";
+			groups = "pwm_0";
+		};
+	};
+
+	/* SPI-NOR is shared pin with serial NAND */
+	qspi_pins: qspi-pins {
+		mux {
+			function = "flash";
+			groups = "spi_nor";
+		};
+	};
+
+	/* Serial NAND is shared pin with SPI-NOR */
+	serial_nand_pins: serial-nand-pins {
+		mux {
+			function = "flash";
+			groups = "snfi";
+		};
+	};
+
+	spi_pins: spi-pins {
+		mux {
+			function = "spi";
+			groups = "spi_0";
+		};
+	};
+
+	uart0_pins: uart0-pins {
+		mux {
+			function = "uart";
+			groups = "uart0_txd_rxd" ;
+		};
+	};
+
+	uart1_pins: uart1-pins {
+		mux {
+			function = "uart";
+			groups = "uart1_0_tx_rx" ;
+		};
+	};
+
+	uart2_pins: uart2-pins {
+		mux {
+			function = "uart";
+			groups = "uart2_0_txd_rxd" ;
+		};
+	};
+
+	watchdog_pins: watchdog-pins {
+		mux {
+			function = "watchdog";
+			groups = "watchdog";
+		};
+	};
+};
+
+&spi {
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi_pins>;
+	status = "okay";
+};
+
+&ssusb {
+	vusb33-supply = <&reg_3p3v>;
+	vbus-supply = <&reg_5v>;
+	status = "okay";
+};
+
+&u3phy0 {
+	status = "okay";
+};
+
+&uart0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&uart0_pins>;
+	status = "okay";
+};
+
+&watchdog {
+	pinctrl-names = "default";
+	pinctrl-0 = <&watchdog_pins>;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/mt7629.dtsi b/arch/arm/boot/dts/mt7629.dtsi
new file mode 100644
index 000000000000..9608bc2ccb3f
--- /dev/null
+++ b/arch/arm/boot/dts/mt7629.dtsi
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ *
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+#include <dt-bindings/clock/mt7629-clk.h>
+#include <dt-bindings/power/mt7622-power.h>
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/reset/mt7629-resets.h>
+
+/ {
+	compatible = "mediatek,mt7629";
+	interrupt-parent = <&sysirq>;
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		enable-method = "mediatek,mt6589-smp";
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x0>;
+			clock-frequency = <1250000000>;
+			cci-control-port = <&cci_control2>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a7";
+			reg = <0x1>;
+			clock-frequency = <1250000000>;
+			cci-control-port = <&cci_control2>;
+		};
+	};
+
+	pmu {
+		compatible = "arm,cortex-a7-pmu";
+		interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_SPI 9 IRQ_TYPE_LEVEL_LOW>;
+		interrupt-affinity = <&cpu0>, <&cpu1>;
+	};
+
+	clk20m: oscillator-0 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <20000000>;
+		clock-output-names = "clk20m";
+	};
+
+	clk40m: oscillator-1 {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <40000000>;
+		clock-output-names = "clkxtal";
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupt-parent = <&gic>;
+		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>,
+			     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>;
+		clock-frequency = <20000000>;
+	};
+
+	soc {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		infracfg: syscon@10000000 {
+			compatible = "mediatek,mt7629-infracfg", "syscon";
+			reg = <0x10000000 0x1000>;
+			#clock-cells = <1>;
+		};
+
+		pericfg: syscon@10002000 {
+			compatible = "mediatek,mt7629-pericfg", "syscon";
+			reg = <0x10002000 0x1000>;
+			#clock-cells = <1>;
+		};
+
+		scpsys: scpsys@10006000 {
+			compatible = "mediatek,mt7629-scpsys",
+				     "mediatek,mt7622-scpsys";
+			#power-domain-cells = <1>;
+			reg = <0x10006000 0x1000>;
+			clocks = <&topckgen CLK_TOP_HIF_SEL>;
+			clock-names = "hif_sel";
+			assigned-clocks = <&topckgen CLK_TOP_HIF_SEL>;
+			assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>;
+			infracfg = <&infracfg>;
+		};
+
+		timer: timer@10009000 {
+			compatible = "mediatek,mt7629-timer",
+				     "mediatek,mt6765-timer";
+			reg = <0x10009000 0x60>;
+			interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH>;
+			clocks = <&clk20m>;
+			clock-names = "clk20m";
+		};
+
+		sysirq: interrupt-controller@10200a80 {
+			compatible = "mediatek,mt7629-sysirq",
+				     "mediatek,mt6577-sysirq";
+			reg = <0x10200a80 0x20>;
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
+		};
+
+		apmixedsys: syscon@10209000 {
+			compatible = "mediatek,mt7629-apmixedsys", "syscon";
+			reg = <0x10209000 0x1000>;
+			#clock-cells = <1>;
+		};
+
+		rng: rng@1020f000 {
+			compatible = "mediatek,mt7629-rng",
+				     "mediatek,mt7623-rng";
+			reg = <0x1020f000 0x100>;
+			clocks = <&infracfg CLK_INFRA_TRNG_PD>;
+			clock-names = "rng";
+		};
+
+		topckgen: syscon@10210000 {
+			compatible = "mediatek,mt7629-topckgen", "syscon";
+			reg = <0x10210000 0x1000>;
+			#clock-cells = <1>;
+		};
+
+		watchdog: watchdog@10212000 {
+			compatible = "mediatek,mt7629-wdt",
+				     "mediatek,mt6589-wdt";
+			reg = <0x10212000 0x100>;
+		};
+
+		pio: pinctrl@10217000 {
+			compatible = "mediatek,mt7629-pinctrl";
+			reg = <0x10217000 0x8000>,
+			      <0x10005000 0x1000>;
+			reg-names = "base", "eint";
+			gpio-controller;
+			gpio-ranges = <&pio 0 0 79>;
+			#gpio-cells = <2>;
+			#interrupt-cells = <2>;
+			interrupt-controller;
+			interrupts = <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-parent = <&gic>;
+		};
+
+		gic: interrupt-controller@10300000 {
+			compatible = "arm,gic-400";
+			interrupt-controller;
+			#interrupt-cells = <3>;
+			interrupt-parent = <&gic>;
+			reg = <0x10310000 0x1000>,
+			      <0x10320000 0x1000>,
+			      <0x10340000 0x2000>,
+			      <0x10360000 0x2000>;
+		};
+
+		cci: cci@10390000 {
+			compatible = "arm,cci-400";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			reg = <0x10390000 0x1000>;
+			ranges = <0 0x10390000 0x10000>;
+
+			cci_control0: slave-if@1000 {
+				compatible = "arm,cci-400-ctrl-if";
+				interface-type = "ace-lite";
+				reg = <0x1000 0x1000>;
+			};
+
+			cci_control1: slave-if@4000 {
+				compatible = "arm,cci-400-ctrl-if";
+				interface-type = "ace";
+				reg = <0x4000 0x1000>;
+			};
+
+			cci_control2: slave-if@5000 {
+				compatible = "arm,cci-400-ctrl-if";
+				interface-type = "ace";
+				reg = <0x5000 0x1000>;
+			};
+
+			pmu@9000 {
+				compatible = "arm,cci-400-pmu,r1";
+				reg = <0x9000 0x5000>;
+				interrupts = <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>,
+					     <GIC_SPI 62 IRQ_TYPE_LEVEL_HIGH>;
+			};
+		};
+
+		uart0: serial@11002000 {
+			compatible = "mediatek,mt7629-uart",
+				     "mediatek,mt6577-uart";
+			reg = <0x11002000 0x400>;
+			interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_UART_SEL>,
+				 <&pericfg CLK_PERI_UART0_PD>;
+			clock-names = "baud", "bus";
+			status = "disabled";
+		};
+
+		uart1: serial@11003000 {
+			compatible = "mediatek,mt7629-uart",
+				     "mediatek,mt6577-uart";
+			reg = <0x11003000 0x400>;
+			interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_UART_SEL>,
+				 <&pericfg CLK_PERI_UART1_PD>;
+			clock-names = "baud", "bus";
+			status = "disabled";
+		};
+
+		uart2: serial@11004000 {
+			compatible = "mediatek,mt7629-uart",
+				     "mediatek,mt6577-uart";
+			reg = <0x11004000 0x400>;
+			interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_UART_SEL>,
+				 <&pericfg CLK_PERI_UART2_PD>;
+			clock-names = "baud", "bus";
+			status = "disabled";
+		};
+
+		i2c: i2c@11007000 {
+			compatible = "mediatek,mt7629-i2c",
+				     "mediatek,mt2712-i2c";
+			reg = <0x11007000 0x90>,
+			      <0x11000100 0x80>;
+			interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_LOW>;
+			clock-div = <4>;
+			clocks = <&pericfg CLK_PERI_I2C0_PD>,
+				 <&pericfg CLK_PERI_AP_DMA_PD>;
+			clock-names = "main", "dma";
+			assigned-clocks = <&topckgen CLK_TOP_AXI_SEL>;
+			assigned-clock-parents = <&topckgen CLK_TOP_SYSPLL1_D2>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		spi: spi@1100a000 {
+			compatible = "mediatek,mt7629-spi",
+				     "mediatek,mt7622-spi";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			reg = <0x1100a000 0x100>;
+			interrupts = <GIC_SPI 118 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_SYSPLL3_D2>,
+				 <&topckgen CLK_TOP_SPI0_SEL>,
+				 <&pericfg CLK_PERI_SPI0_PD>;
+			clock-names = "parent-clk", "sel-clk", "spi-clk";
+			status = "disabled";
+		};
+
+		qspi: spi@11014000 {
+			compatible = "mediatek,mt7629-nor",
+				     "mediatek,mt8173-nor";
+			reg = <0x11014000 0xe0>;
+			clocks = <&pericfg CLK_PERI_FLASH_PD>,
+				 <&topckgen CLK_TOP_FLASH_SEL>;
+			clock-names = "spi", "sf";
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		ssusbsys: syscon@1a000000 {
+			compatible = "mediatek,mt7629-ssusbsys", "syscon";
+			reg = <0x1a000000 0x1000>;
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
+
+		ssusb: usb@1a0c0000 {
+			compatible = "mediatek,mt7629-xhci",
+				     "mediatek,mtk-xhci";
+			reg = <0x1a0c0000 0x01000>,
+			      <0x1a0c3e00 0x0100>;
+			reg-names = "mac", "ippc";
+			interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&ssusbsys CLK_SSUSB_SYS_EN>,
+				 <&ssusbsys CLK_SSUSB_REF_EN>,
+				 <&ssusbsys CLK_SSUSB_MCU_EN>,
+				 <&ssusbsys CLK_SSUSB_DMA_EN>;
+			clock-names = "sys_ck", "ref_ck", "mcu_ck", "dma_ck";
+			assigned-clocks = <&topckgen CLK_TOP_AXI_SEL>,
+					  <&topckgen CLK_TOP_SATA_SEL>,
+					  <&topckgen CLK_TOP_HIF_SEL>;
+			assigned-clock-parents = <&topckgen CLK_TOP_SYSPLL1_D2>,
+						 <&topckgen CLK_TOP_UNIVPLL2_D4>,
+						 <&topckgen CLK_TOP_UNIVPLL1_D2>;
+			power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF1>;
+			phys = <&u2port0 PHY_TYPE_USB2>,
+			       <&u3port0 PHY_TYPE_USB3>;
+			status = "disabled";
+		};
+
+		u3phy0: usb-phy@1a0c4000 {
+			compatible = "mediatek,generic-tphy-v2";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x1a0c4000 0xe00>;
+			status = "disabled";
+
+			u2port0: usb-phy@0 {
+				reg = <0 0x700>;
+				clocks = <&ssusbsys CLK_SSUSB_U2_PHY_EN>;
+				clock-names = "ref";
+				#phy-cells = <1>;
+				status = "okay";
+			};
+
+			u3port0: usb-phy@700 {
+				reg = <0x700 0x700>;
+				clocks = <&clk20m>;
+				clock-names = "ref";
+				#phy-cells = <1>;
+				status = "okay";
+			};
+		};
+
+		pciesys: syscon@1a100800 {
+			compatible = "mediatek,mt7629-pciesys", "syscon";
+			reg = <0x1a100800 0x1000>;
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
+
+		pcie: pcie@1a140000 {
+			compatible = "mediatek,mt7629-pcie";
+			device_type = "pci";
+			reg = <0x1a140000 0x1000>,
+			      <0x1a145000 0x1000>;
+			reg-names = "subsys","port1";
+			#address-cells = <3>;
+			#size-cells = <2>;
+			interrupts = <GIC_SPI 176 IRQ_TYPE_LEVEL_LOW>,
+				     <GIC_SPI 229 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&pciesys CLK_PCIE_P1_MAC_EN>,
+				 <&pciesys CLK_PCIE_P0_AHB_EN>,
+				 <&pciesys CLK_PCIE_P1_AUX_EN>,
+				 <&pciesys CLK_PCIE_P1_AXI_EN>,
+				 <&pciesys CLK_PCIE_P1_OBFF_EN>,
+				 <&pciesys CLK_PCIE_P1_PIPE_EN>;
+			clock-names = "sys_ck1", "ahb_ck1",
+				      "aux_ck1", "axi_ck1",
+				      "obff_ck1", "pipe_ck1";
+			assigned-clocks = <&topckgen CLK_TOP_SATA_SEL>,
+					  <&topckgen CLK_TOP_AXI_SEL>,
+					  <&topckgen CLK_TOP_HIF_SEL>;
+			assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL2_D4>,
+						 <&topckgen CLK_TOP_SYSPLL1_D2>,
+						 <&topckgen CLK_TOP_UNIVPLL1_D2>;
+			phys = <&pcieport1 PHY_TYPE_PCIE>;
+			phy-names = "pcie-phy1";
+			power-domains = <&scpsys MT7622_POWER_DOMAIN_HIF0>;
+			bus-range = <0x00 0xff>;
+			ranges = <0x82000000 0 0x20000000 0x20000000 0 0x10000000>;
+
+			pcie1: pcie@1,0 {
+				device_type = "pci";
+				reg = <0x0800 0 0 0 0>;
+				#address-cells = <3>;
+				#size-cells = <2>;
+				#interrupt-cells = <1>;
+				ranges;
+				num-lanes = <1>;
+				interrupt-map-mask = <0 0 0 7>;
+				interrupt-map = <0 0 0 1 &pcie_intc1 0>,
+						<0 0 0 2 &pcie_intc1 1>,
+						<0 0 0 3 &pcie_intc1 2>,
+						<0 0 0 4 &pcie_intc1 3>;
+
+				pcie_intc1: interrupt-controller {
+					interrupt-controller;
+					#address-cells = <0>;
+					#interrupt-cells = <1>;
+				};
+			};
+		};
+
+		pciephy1: pcie-phy@1a14a000 {
+			compatible = "mediatek,generic-tphy-v2";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0 0x1a14a000 0x1000>;
+			status = "disabled";
+
+			pcieport1: port1phy@0 {
+				reg = <0 0x1000>;
+				clocks = <&clk20m>;
+				clock-names = "ref";
+				#phy-cells = <1>;
+				status = "okay";
+			};
+		};
+
+		ethsys: syscon@1b000000 {
+			compatible = "mediatek,mt7629-ethsys", "syscon";
+			reg = <0x1b000000 0x1000>;
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
+
+		eth: ethernet@1b100000 {
+			compatible = "mediatek,mt7629-eth","syscon";
+			reg = <0x1b100000 0x20000>;
+			interrupts = <GIC_SPI 223 IRQ_TYPE_LEVEL_LOW>,
+				     <GIC_SPI 224 IRQ_TYPE_LEVEL_LOW>,
+				     <GIC_SPI 225 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_ETH_SEL>,
+				 <&topckgen CLK_TOP_F10M_REF_SEL>,
+				 <&ethsys CLK_ETH_ESW_EN>,
+				 <&ethsys CLK_ETH_GP0_EN>,
+				 <&ethsys CLK_ETH_GP1_EN>,
+				 <&ethsys CLK_ETH_GP2_EN>,
+				 <&ethsys CLK_ETH_FE_EN>,
+				 <&sgmiisys0 CLK_SGMII_TX_EN>,
+				 <&sgmiisys0 CLK_SGMII_RX_EN>,
+				 <&sgmiisys0 CLK_SGMII_CDR_REF>,
+				 <&sgmiisys0 CLK_SGMII_CDR_FB>,
+				 <&sgmiisys1 CLK_SGMII_TX_EN>,
+				 <&sgmiisys1 CLK_SGMII_RX_EN>,
+				 <&sgmiisys1 CLK_SGMII_CDR_REF>,
+				 <&sgmiisys1 CLK_SGMII_CDR_FB>,
+				 <&apmixedsys CLK_APMIXED_SGMIPLL>,
+				 <&apmixedsys CLK_APMIXED_ETH2PLL>;
+			clock-names = "ethif", "sgmiitop", "esw", "gp0", "gp1",
+				      "gp2", "fe", "sgmii_tx250m", "sgmii_rx250m",
+				      "sgmii_cdr_ref", "sgmii_cdr_fb",
+				      "sgmii2_tx250m", "sgmii2_rx250m",
+				      "sgmii2_cdr_ref", "sgmii2_cdr_fb",
+				      "sgmii_ck", "eth2pll";
+			assigned-clocks = <&topckgen CLK_TOP_ETH_SEL>,
+					  <&topckgen CLK_TOP_F10M_REF_SEL>;
+			assigned-clock-parents = <&topckgen CLK_TOP_UNIVPLL1_D2>,
+						 <&topckgen CLK_TOP_SGMIIPLL_D2>;
+			power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>;
+			mediatek,ethsys = <&ethsys>;
+			mediatek,sgmiisys = <&sgmiisys0>, <&sgmiisys1>;
+			mediatek,infracfg = <&infracfg>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		sgmiisys0: syscon@1b128000 {
+			compatible = "mediatek,mt7629-sgmiisys", "syscon";
+			reg = <0x1b128000 0x3000>;
+			#clock-cells = <1>;
+			mediatek,physpeed = "2500";
+		};
+
+		sgmiisys1: syscon@1b130000 {
+			compatible = "mediatek,mt7629-sgmiisys", "syscon";
+			reg = <0x1b130000 0x3000>;
+			#clock-cells = <1>;
+			mediatek,physpeed = "2500";
+		};
+	};
+};
diff --git a/include/dt-bindings/reset/mt7629-resets.h b/include/dt-bindings/reset/mt7629-resets.h
new file mode 100644
index 000000000000..6bb85734f68d
--- /dev/null
+++ b/include/dt-bindings/reset/mt7629-resets.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 MediaTek Inc.
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT7629
+#define _DT_BINDINGS_RESET_CONTROLLER_MT7629
+
+/* INFRACFG resets */
+#define MT7629_INFRA_EMI_MPU_RST		0
+#define MT7629_INFRA_UART5_RST			2
+#define MT7629_INFRA_CIRQ_EINT_RST		3
+#define MT7629_INFRA_APXGPT_RST			4
+#define MT7629_INFRA_SCPSYS_RST			5
+#define MT7629_INFRA_KP_RST			6
+#define MT7629_INFRA_SPI1_RST			7
+#define MT7629_INFRA_SPI4_RST			8
+#define MT7629_INFRA_SYSTIMER_RST		9
+#define MT7629_INFRA_IRRX_RST			10
+#define MT7629_INFRA_AO_BUS_RST			16
+#define MT7629_INFRA_EMI_RST			32
+#define MT7629_INFRA_APMIXED_RST		35
+#define MT7629_INFRA_MIPI_RST			36
+#define MT7629_INFRA_TRNG_RST			37
+#define MT7629_INFRA_SYSCIRQ_RST		38
+#define MT7629_INFRA_MIPI_CSI_RST		39
+#define MT7629_INFRA_GCE_FAXI_RST		40
+#define MT7629_INFRA_I2C_SRAM_RST		41
+#define MT7629_INFRA_IOMMU_RST			47
+
+/* PERICFG resets */
+#define MT7629_PERI_UART0_SW_RST		0
+#define MT7629_PERI_UART1_SW_RST		1
+#define MT7629_PERI_UART2_SW_RST		2
+#define MT7629_PERI_BTIF_SW_RST			6
+#define MT7629_PERI_PWN_SW_RST			8
+#define MT7629_PERI_DMA_SW_RST			11
+#define MT7629_PERI_NFI_SW_RST			14
+#define MT7629_PERI_I2C0_SW_RST			22
+#define MT7629_PERI_SPI0_SW_RST			33
+#define MT7629_PERI_SPI1_SW_RST			34
+#define MT7629_PERI_FLASHIF_SW_RST		36
+
+/* PCIe Subsystem resets */
+#define MT7629_PCIE1_CORE_RST			19
+#define MT7629_PCIE1_MMIO_RST			20
+#define MT7629_PCIE1_HRST			21
+#define MT7629_PCIE1_USER_RST			22
+#define MT7629_PCIE1_PIPE_RST			23
+#define MT7629_PCIE0_CORE_RST			27
+#define MT7629_PCIE0_MMIO_RST			28
+#define MT7629_PCIE0_HRST			29
+#define MT7629_PCIE0_USER_RST			30
+#define MT7629_PCIE0_PIPE_RST			31
+
+/* SSUSB Subsystem resets */
+#define MT7629_SSUSB_PHY_PWR_RST		3
+#define MT7629_SSUSB_MAC_PWR_RST		4
+
+/* ETH Subsystem resets */
+#define MT7629_ETHSYS_SYS_RST			0
+#define MT7629_ETHSYS_MCM_RST			2
+#define MT7629_ETHSYS_HSDMA_RST			5
+#define MT7629_ETHSYS_FE_RST			6
+#define MT7629_ETHSYS_ESW_RST			16
+#define MT7629_ETHSYS_GMAC_RST			23
+#define MT7629_ETHSYS_EPHY_RST			24
+#define MT7629_ETHSYS_CRYPTO_RST		29
+#define MT7629_ETHSYS_PPE_RST			31
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT7629 */
-- 
cgit v1.2.3


From 7542c6dedbc1caa284ca4cbd6b64f99023ff1b97 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 18 Jun 2019 12:09:26 +0900
Subject: jffs2: Remove C++ style comments from uapi header

Linux kernel tolerates C++ style comments these days. Actually, the
SPDX License tags for .c files start with //.

On the other hand, uapi headers are written in more strict C, where
the C++ comment style is forbidden.

I simply dropped these lines instead of fixing the comment style.

This code has been always commented out since it was added around
Linux 2.4.9 (i.e. commented out for more than 17 years).

'Maybe later...' will never happen.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 include/uapi/linux/jffs2.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/jffs2.h b/include/uapi/linux/jffs2.h
index a18b719f49d4..784ba0b9690a 100644
--- a/include/uapi/linux/jffs2.h
+++ b/include/uapi/linux/jffs2.h
@@ -77,11 +77,6 @@
 
 #define JFFS2_ACL_VERSION		0x0001
 
-// Maybe later...
-//#define JFFS2_NODETYPE_CHECKPOINT (JFFS2_FEATURE_RWCOMPAT_DELETE | JFFS2_NODE_ACCURATE | 3)
-//#define JFFS2_NODETYPE_OPTIONS (JFFS2_FEATURE_RWCOMPAT_COPY | JFFS2_NODE_ACCURATE | 4)
-
-
 #define JFFS2_INO_FLAG_PREREAD	  1	/* Do read_inode() for this one at
 					   mount time, don't wait for it to
 					   happen later */
-- 
cgit v1.2.3


From cdfee5623290bc893f595636b44fa28e8207c5b3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 16 Aug 2019 08:24:35 +0200
Subject: driver core: initialize a default DMA mask for platform device

We still treat devices without a DMA mask as defaulting to 32-bits for
both mask, but a few releases ago we've started warning about such
cases, as they require special cases to work around this sloppyness.
Add a dma_mask field to struct platform_device so that we can initialize
the dma_mask pointer in struct device and initialize both masks to
32-bits by default, replacing similar functionality in m68k and
powerpc.  The arch_setup_pdev_archdata hooks is now unused and removed.

Note that the code looks a little odd with the various conditionals
because we have to support platform_device structures that are
statically allocated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20190816062435.881-7-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/m68k/kernel/dma.c               |  9 ---------
 arch/powerpc/kernel/setup-common.c   |  6 ------
 arch/sh/boards/mach-ap325rxa/setup.c |  1 -
 arch/sh/boards/mach-ecovec24/setup.c |  2 --
 arch/sh/boards/mach-kfr2r09/setup.c  |  1 -
 arch/sh/boards/mach-migor/setup.c    |  1 -
 arch/sh/boards/mach-se/7724/setup.c  |  2 --
 drivers/base/platform.c              | 37 ++++++++++++++++--------------------
 include/linux/platform_device.h      |  2 +-
 9 files changed, 17 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 30cd59caf037..447849d1d645 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -79,12 +79,3 @@ void arch_sync_dma_for_device(struct device *dev, phys_addr_t handle,
 		break;
 	}
 }
-
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
-	if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE &&
-	    pdev->dev.dma_mask == NULL) {
-		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
-	}
-}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..5e6543aba1b3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -778,12 +778,6 @@ void ppc_printk_progress(char *s, unsigned short hex)
 	pr_info("%s\n", s);
 }
 
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
-	pdev->archdata.dma_mask = DMA_BIT_MASK(32);
-	pdev->dev.dma_mask = &pdev->archdata.dma_mask;
-}
-
 static __init void print_system_info(void)
 {
 	pr_info("-----------------------------------------------------\n");
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index 8301a4378f50..665cad452798 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -527,7 +527,6 @@ static int __init ap325rxa_devices_setup(void)
 
 	/* Initialize CEU platform device separately to map memory first */
 	device_initialize(&ap325rxa_ceu_device.dev);
-	arch_setup_pdev_archdata(&ap325rxa_ceu_device);
 	dma_declare_coherent_memory(&ap325rxa_ceu_device.dev,
 			ceu_dma_membase, ceu_dma_membase,
 			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index f402aa741bf3..acaa97459531 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1440,7 +1440,6 @@ static int __init arch_setup(void)
 
 	/* Initialize CEU platform devices separately to map memory first */
 	device_initialize(&ecovec_ceu_devices[0]->dev);
-	arch_setup_pdev_archdata(ecovec_ceu_devices[0]);
 	dma_declare_coherent_memory(&ecovec_ceu_devices[0]->dev,
 				    ceu0_dma_membase, ceu0_dma_membase,
 				    ceu0_dma_membase +
@@ -1448,7 +1447,6 @@ static int __init arch_setup(void)
 	platform_device_add(ecovec_ceu_devices[0]);
 
 	device_initialize(&ecovec_ceu_devices[1]->dev);
-	arch_setup_pdev_archdata(ecovec_ceu_devices[1]);
 	dma_declare_coherent_memory(&ecovec_ceu_devices[1]->dev,
 				    ceu1_dma_membase, ceu1_dma_membase,
 				    ceu1_dma_membase +
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index 1cf9a47ac90e..96538ba3aa32 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -601,7 +601,6 @@ static int __init kfr2r09_devices_setup(void)
 
 	/* Initialize CEU platform device separately to map memory first */
 	device_initialize(&kfr2r09_ceu_device.dev);
-	arch_setup_pdev_archdata(&kfr2r09_ceu_device);
 	dma_declare_coherent_memory(&kfr2r09_ceu_device.dev,
 			ceu_dma_membase, ceu_dma_membase,
 			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 90702740f207..9ed369dad62d 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -602,7 +602,6 @@ static int __init migor_devices_setup(void)
 
 	/* Initialize CEU platform device separately to map memory first */
 	device_initialize(&migor_ceu_device.dev);
-	arch_setup_pdev_archdata(&migor_ceu_device);
 	dma_declare_coherent_memory(&migor_ceu_device.dev,
 			ceu_dma_membase, ceu_dma_membase,
 			ceu_dma_membase + CEU_BUFFER_MEMORY_SIZE - 1);
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 3674064816c7..32f5dd944889 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -937,7 +937,6 @@ static int __init devices_setup(void)
 
 	/* Initialize CEU platform devices separately to map memory first */
 	device_initialize(&ms7724se_ceu_devices[0]->dev);
-	arch_setup_pdev_archdata(ms7724se_ceu_devices[0]);
 	dma_declare_coherent_memory(&ms7724se_ceu_devices[0]->dev,
 				    ceu0_dma_membase, ceu0_dma_membase,
 				    ceu0_dma_membase +
@@ -945,7 +944,6 @@ static int __init devices_setup(void)
 	platform_device_add(ms7724se_ceu_devices[0]);
 
 	device_initialize(&ms7724se_ceu_devices[1]->dev);
-	arch_setup_pdev_archdata(ms7724se_ceu_devices[1]);
 	dma_declare_coherent_memory(&ms7724se_ceu_devices[1]->dev,
 				    ceu1_dma_membase, ceu1_dma_membase,
 				    ceu1_dma_membase +
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index ec974ba9c0c4..600913aea73b 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -39,25 +39,6 @@ struct device platform_bus = {
 };
 EXPORT_SYMBOL_GPL(platform_bus);
 
-/**
- * arch_setup_pdev_archdata - Allow manipulation of archdata before its used
- * @pdev: platform device
- *
- * This is called before platform_device_add() such that any pdev_archdata may
- * be setup before the platform_notifier is called.  So if a user needs to
- * manipulate any relevant information in the pdev_archdata they can do:
- *
- *	platform_device_alloc()
- *	... manipulate ...
- *	platform_device_add()
- *
- * And if they don't care they can just call platform_device_register() and
- * everything will just work out.
- */
-void __weak arch_setup_pdev_archdata(struct platform_device *pdev)
-{
-}
-
 /**
  * platform_get_resource - get a resource for a device
  * @dev: platform device
@@ -264,6 +245,20 @@ struct platform_object {
 	char name[];
 };
 
+/*
+ * Set up default DMA mask for platform devices if the they weren't
+ * previously set by the architecture / DT.
+ */
+static void setup_pdev_dma_masks(struct platform_device *pdev)
+{
+	if (!pdev->dev.coherent_dma_mask)
+		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	if (!pdev->dma_mask)
+		pdev->dma_mask = DMA_BIT_MASK(32);
+	if (!pdev->dev.dma_mask)
+		pdev->dev.dma_mask = &pdev->dma_mask;
+};
+
 /**
  * platform_device_put - destroy a platform device
  * @pdev: platform device to free
@@ -310,7 +305,7 @@ struct platform_device *platform_device_alloc(const char *name, int id)
 		pa->pdev.id = id;
 		device_initialize(&pa->pdev.dev);
 		pa->pdev.dev.release = platform_device_release;
-		arch_setup_pdev_archdata(&pa->pdev);
+		setup_pdev_dma_masks(&pa->pdev);
 	}
 
 	return pa ? &pa->pdev : NULL;
@@ -512,7 +507,7 @@ EXPORT_SYMBOL_GPL(platform_device_del);
 int platform_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
-	arch_setup_pdev_archdata(pdev);
+	setup_pdev_dma_masks(pdev);
 	return platform_device_add(pdev);
 }
 EXPORT_SYMBOL_GPL(platform_device_register);
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 9bc36b589827..34a3d8ed8ba7 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -24,6 +24,7 @@ struct platform_device {
 	int		id;
 	bool		id_auto;
 	struct device	dev;
+	u64		dma_mask;
 	u32		num_resources;
 	struct resource	*resource;
 
@@ -48,7 +49,6 @@ extern void platform_device_unregister(struct platform_device *);
 extern struct bus_type platform_bus_type;
 extern struct device platform_bus;
 
-extern void arch_setup_pdev_archdata(struct platform_device *);
 extern struct resource *platform_get_resource(struct platform_device *,
 					      unsigned int, unsigned int);
 extern void __iomem *
-- 
cgit v1.2.3


From 6b8ac43c33b912bc5435192ce3b3f051f6f2dc9d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 20 Aug 2019 14:05:28 +0900
Subject: ASoC: soc-dai: use bit field for bus_control

.bus_control can be bit field.
this patch do it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Link: https://lore.kernel.org/r/87v9uszazh.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dai.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h
index dc48fe081a20..939c73db6a03 100644
--- a/include/sound/soc-dai.h
+++ b/include/sound/soc-dai.h
@@ -293,8 +293,6 @@ struct snd_soc_dai_driver {
 	/* Optional Callback used at pcm creation*/
 	int (*pcm_new)(struct snd_soc_pcm_runtime *rtd,
 		       struct snd_soc_dai *dai);
-	/* DAI is also used for the control bus */
-	bool bus_control;
 
 	/* ops */
 	const struct snd_soc_dai_ops *ops;
@@ -306,6 +304,7 @@ struct snd_soc_dai_driver {
 	unsigned int symmetric_rates:1;
 	unsigned int symmetric_channels:1;
 	unsigned int symmetric_samplebits:1;
+	unsigned int bus_control:1; /* DAI is also used for the control bus */
 
 	/* probe ordering - for components with runtime dependencies */
 	int probe_order;
-- 
cgit v1.2.3


From b99328a60a482108f5195b4d611f90992ca016ba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Aug 2019 13:00:15 +0200
Subject: timekeeping/vsyscall: Prevent math overflow in BOOTTIME update

The VDSO update for CLOCK_BOOTTIME has a overflow issue as it shifts the
nanoseconds based boot time offset left by the clocksource shift. That
overflows once the boot time offset becomes large enough. As a consequence
CLOCK_BOOTTIME in the VDSO becomes a random number causing applications to
misbehave.

Fix it by storing a timespec64 representation of the offset when boot time
is adjusted and add that to the MONOTONIC base time value in the vdso data
page. Using the timespec64 representation avoids a 64bit division in the
update code.

Fixes: 44f57d788e7d ("timekeeping: Provide a generic update_vsyscall() implementation")
Reported-by: Chris Clayton <chris2553@googlemail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chris Clayton <chris2553@googlemail.com>
Tested-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908221257580.1983@nanos.tec.linutronix.de
---
 include/linux/timekeeper_internal.h |  5 +++++
 kernel/time/timekeeping.c           |  5 +++++
 kernel/time/vsyscall.c              | 22 +++++++++++++---------
 3 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 7acb953298a7..84ff2844df2a 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -57,6 +57,7 @@ struct tk_read_base {
  * @cs_was_changed_seq:	The sequence number of clocksource change events
  * @next_leap_ktime:	CLOCK_MONOTONIC time value of a pending leap-second
  * @raw_sec:		CLOCK_MONOTONIC_RAW  time in seconds
+ * @monotonic_to_boot:	CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
  * @cycle_interval:	Number of clock cycles in one NTP interval
  * @xtime_interval:	Number of clock shifted nano seconds in one NTP
  *			interval.
@@ -84,6 +85,9 @@ struct tk_read_base {
  *
  * wall_to_monotonic is no longer the boot time, getboottime must be
  * used instead.
+ *
+ * @monotonic_to_boottime is a timespec64 representation of @offs_boot to
+ * accelerate the VDSO update for CLOCK_BOOTTIME.
  */
 struct timekeeper {
 	struct tk_read_base	tkr_mono;
@@ -99,6 +103,7 @@ struct timekeeper {
 	u8			cs_was_changed_seq;
 	ktime_t			next_leap_ktime;
 	u64			raw_sec;
+	struct timespec64	monotonic_to_boot;
 
 	/* The following members are for timekeeping internal use */
 	u64			cycle_interval;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d911c8470149..ca69290bee2a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -146,6 +146,11 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
 static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
 {
 	tk->offs_boot = ktime_add(tk->offs_boot, delta);
+	/*
+	 * Timespec representation for VDSO update to avoid 64bit division
+	 * on every update.
+	 */
+	tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
 }
 
 /*
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 8cf3596a4ce6..4bc37ac3bb05 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -17,7 +17,7 @@ static inline void update_vdso_data(struct vdso_data *vdata,
 				    struct timekeeper *tk)
 {
 	struct vdso_timestamp *vdso_ts;
-	u64 nsec;
+	u64 nsec, sec;
 
 	vdata[CS_HRES_COARSE].cycle_last	= tk->tkr_mono.cycle_last;
 	vdata[CS_HRES_COARSE].mask		= tk->tkr_mono.mask;
@@ -45,23 +45,27 @@ static inline void update_vdso_data(struct vdso_data *vdata,
 	}
 	vdso_ts->nsec	= nsec;
 
-	/* CLOCK_MONOTONIC_RAW */
-	vdso_ts		= &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
-	vdso_ts->sec	= tk->raw_sec;
-	vdso_ts->nsec	= tk->tkr_raw.xtime_nsec;
+	/* Copy MONOTONIC time for BOOTTIME */
+	sec	= vdso_ts->sec;
+	/* Add the boot offset */
+	sec	+= tk->monotonic_to_boot.tv_sec;
+	nsec	+= (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
 
 	/* CLOCK_BOOTTIME */
 	vdso_ts		= &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
-	vdso_ts->sec	= tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-	nsec = tk->tkr_mono.xtime_nsec;
-	nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
-		       ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+	vdso_ts->sec	= sec;
+
 	while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
 		nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
 		vdso_ts->sec++;
 	}
 	vdso_ts->nsec	= nsec;
 
+	/* CLOCK_MONOTONIC_RAW */
+	vdso_ts		= &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+	vdso_ts->sec	= tk->raw_sec;
+	vdso_ts->nsec	= tk->tkr_raw.xtime_nsec;
+
 	/* CLOCK_TAI */
 	vdso_ts		= &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
 	vdso_ts->sec	= tk->xtime_sec + (s64)tk->tai_offset;
-- 
cgit v1.2.3


From 088e88be5a380cc4e81963a9a02815da465d144f Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Sat, 27 Jul 2019 14:04:12 +0200
Subject: dt-bindings: phy: add binding for the Lantiq VRX200 and ARX300 PCIe
 PHYs

Add the bindings for the PCIe PHY on Lantiq VRX200 and ARX300 SoCs.
The IP block contains settings for the PHY and a PLL.
The PLL mode is configurable through a dedicated #phy-cell in .dts.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 .../bindings/phy/lantiq,vrx200-pcie-phy.yaml       | 95 ++++++++++++++++++++++
 include/dt-bindings/phy/phy-lantiq-vrx200-pcie.h   | 11 +++
 2 files changed, 106 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml
 create mode 100644 include/dt-bindings/phy/phy-lantiq-vrx200-pcie.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml
new file mode 100644
index 000000000000..8a56a8526cef
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/lantiq,vrx200-pcie-phy.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/lantiq,vrx200-pcie-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lantiq VRX200 and ARX300 PCIe PHY Device Tree Bindings
+
+maintainers:
+  - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+properties:
+  "#phy-cells":
+    const: 1
+    description: selects the PHY mode as defined in <dt-bindings/phy/phy-lantiq-vrx200-pcie.h>
+
+  compatible:
+    enum:
+      - lantiq,vrx200-pcie-phy
+      - lantiq,arx300-pcie-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: PHY module clock
+      - description: PDI register clock
+
+  clock-names:
+    items:
+      - const: phy
+      - const: pdi
+
+  resets:
+    items:
+      - description: exclusive PHY reset line
+      - description: shared reset line between the PCIe PHY and PCIe controller
+
+  resets-names:
+    items:
+      - const: phy
+      - const: pcie
+
+  lantiq,rcu:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: phandle to the RCU syscon
+
+  lantiq,rcu-endian-offset:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: the offset of the endian registers for this PHY instance in the RCU syscon
+
+  lantiq,rcu-big-endian-mask:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: the mask to set the PDI (PHY) registers for this PHY instance to big endian
+
+  big-endian:
+    description: Configures the PDI (PHY) registers in big-endian mode
+    type: boolean
+
+  little-endian:
+    description: Configures the PDI (PHY) registers in big-endian mode
+    type: boolean
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - lantiq,rcu
+  - lantiq,rcu-endian-offset
+  - lantiq,rcu-big-endian-mask
+
+additionalProperties: false
+
+examples:
+  - |
+    pcie0_phy: phy@106800 {
+        compatible = "lantiq,vrx200-pcie-phy";
+        reg = <0x106800 0x100>;
+        lantiq,rcu = <&rcu0>;
+        lantiq,rcu-endian-offset = <0x4c>;
+        lantiq,rcu-big-endian-mask = <0x80>; /* bit 7 */
+        big-endian;
+        clocks = <&pmu 32>, <&pmu 36>;
+        clock-names = "phy", "pdi";
+        resets = <&reset0 12 24>, <&reset0 22 22>;
+        reset-names = "phy", "pcie";
+        #phy-cells = <1>;
+    };
+
+...
diff --git a/include/dt-bindings/phy/phy-lantiq-vrx200-pcie.h b/include/dt-bindings/phy/phy-lantiq-vrx200-pcie.h
new file mode 100644
index 000000000000..95a7896356d6
--- /dev/null
+++ b/include/dt-bindings/phy/phy-lantiq-vrx200-pcie.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+ */
+
+#define LANTIQ_PCIE_PHY_MODE_25MHZ		0
+#define LANTIQ_PCIE_PHY_MODE_25MHZ_SSC		1
+#define LANTIQ_PCIE_PHY_MODE_36MHZ		2
+#define LANTIQ_PCIE_PHY_MODE_36MHZ_SSC		3
+#define LANTIQ_PCIE_PHY_MODE_100MHZ		4
+#define LANTIQ_PCIE_PHY_MODE_100MHZ_SSC		5
-- 
cgit v1.2.3


From bf03473d5bcc85fbe9533fa042f67809d8520c4e Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 21 Aug 2019 13:58:18 -0500
Subject: soundwire: add debugfs support

Add base debugfs mechanism for SoundWire bus by creating soundwire
root and master-N and slave-x hierarchy.

Also add SDW Slave SCP, DP0 and DP-N register debug file.

Registers not implemented will print as "XX"

Credits: this patch is based on an earlier internal contribution by
Vinod Koul, Sanyog Kale, Shreyas Nc and Hardik Shah.

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190821185821.12690-2-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/Makefile    |   4 ++
 drivers/soundwire/bus.c       |   6 ++
 drivers/soundwire/bus.h       |  16 +++++
 drivers/soundwire/bus_type.c  |   3 +
 drivers/soundwire/debugfs.c   | 151 ++++++++++++++++++++++++++++++++++++++++++
 drivers/soundwire/slave.c     |   1 +
 include/linux/soundwire/sdw.h |   8 +++
 7 files changed, 189 insertions(+)
 create mode 100644 drivers/soundwire/debugfs.c

(limited to 'include')

diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile
index fd99a831b92a..34bbd36a9851 100644
--- a/drivers/soundwire/Makefile
+++ b/drivers/soundwire/Makefile
@@ -7,6 +7,10 @@
 soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o stream.o
 obj-$(CONFIG_SOUNDWIRE_BUS) += soundwire-bus.o
 
+ifdef CONFIG_DEBUG_FS
+soundwire-bus-objs += debugfs.o
+endif
+
 #Cadence Objs
 soundwire-cadence-objs := cadence_master.o
 obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 50f9cc5eb5f6..728db3ebad6e 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -49,6 +49,8 @@ int sdw_add_bus_master(struct sdw_bus *bus)
 		}
 	}
 
+	sdw_bus_debugfs_init(bus);
+
 	/*
 	 * Device numbers in SoundWire are 0 through 15. Enumeration device
 	 * number (0), Broadcast device number (15), Group numbers (12 and
@@ -109,6 +111,8 @@ static int sdw_delete_slave(struct device *dev, void *data)
 	struct sdw_slave *slave = dev_to_sdw_dev(dev);
 	struct sdw_bus *bus = slave->bus;
 
+	sdw_slave_debugfs_exit(slave);
+
 	mutex_lock(&bus->bus_lock);
 
 	if (slave->dev_num) /* clear dev_num if assigned */
@@ -130,6 +134,8 @@ static int sdw_delete_slave(struct device *dev, void *data)
 void sdw_delete_bus_master(struct sdw_bus *bus)
 {
 	device_for_each_child(bus->dev, NULL, sdw_delete_slave);
+
+	sdw_bus_debugfs_exit(bus);
 }
 EXPORT_SYMBOL(sdw_delete_bus_master);
 
diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h
index 4429c51c5f86..9d6ea7e447ff 100644
--- a/drivers/soundwire/bus.h
+++ b/drivers/soundwire/bus.h
@@ -18,6 +18,22 @@ static inline int sdw_acpi_find_slaves(struct sdw_bus *bus)
 void sdw_extract_slave_id(struct sdw_bus *bus,
 			  u64 addr, struct sdw_slave_id *id);
 
+#ifdef CONFIG_DEBUG_FS
+void sdw_bus_debugfs_init(struct sdw_bus *bus);
+void sdw_bus_debugfs_exit(struct sdw_bus *bus);
+void sdw_slave_debugfs_init(struct sdw_slave *slave);
+void sdw_slave_debugfs_exit(struct sdw_slave *slave);
+void sdw_debugfs_init(void);
+void sdw_debugfs_exit(void);
+#else
+static inline void sdw_bus_debugfs_init(struct sdw_bus *bus) {}
+static inline void sdw_bus_debugfs_exit(struct sdw_bus *bus) {}
+static inline void sdw_slave_debugfs_init(struct sdw_slave *slave) {}
+static inline void sdw_slave_debugfs_exit(struct sdw_slave *slave) {}
+static inline void sdw_debugfs_init(void) {}
+static inline void sdw_debugfs_exit(void) {}
+#endif
+
 enum {
 	SDW_MSG_FLAG_READ = 0,
 	SDW_MSG_FLAG_WRITE,
diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c
index 2655602f0cfb..4a465f55039f 100644
--- a/drivers/soundwire/bus_type.c
+++ b/drivers/soundwire/bus_type.c
@@ -6,6 +6,7 @@
 #include <linux/pm_domain.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_type.h>
+#include "bus.h"
 
 /**
  * sdw_get_device_id - find the matching SoundWire device id
@@ -177,11 +178,13 @@ EXPORT_SYMBOL_GPL(sdw_unregister_driver);
 
 static int __init sdw_bus_init(void)
 {
+	sdw_debugfs_init();
 	return bus_register(&sdw_bus_type);
 }
 
 static void __exit sdw_bus_exit(void)
 {
+	sdw_debugfs_exit();
 	bus_unregister(&sdw_bus_type);
 }
 
diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c
new file mode 100644
index 000000000000..fb1140e82b86
--- /dev/null
+++ b/drivers/soundwire/debugfs.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2017-2019 Intel Corporation.
+
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_registers.h>
+#include "bus.h"
+
+static struct dentry *sdw_debugfs_root;
+
+void sdw_bus_debugfs_init(struct sdw_bus *bus)
+{
+	char name[16];
+
+	if (!sdw_debugfs_root)
+		return;
+
+	/* create the debugfs master-N */
+	snprintf(name, sizeof(name), "master-%d", bus->link_id);
+	bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root);
+}
+
+void sdw_bus_debugfs_exit(struct sdw_bus *bus)
+{
+	debugfs_remove_recursive(bus->debugfs);
+}
+
+#define RD_BUF (3 * PAGE_SIZE)
+
+static ssize_t sdw_sprintf(struct sdw_slave *slave,
+			   char *buf, size_t pos, unsigned int reg)
+{
+	int value;
+
+	value = sdw_read(slave, reg);
+
+	if (value < 0)
+		return scnprintf(buf + pos, RD_BUF - pos, "%3x\tXX\n", reg);
+	else
+		return scnprintf(buf + pos, RD_BUF - pos,
+				"%3x\t%2x\n", reg, value);
+}
+
+static int sdw_slave_reg_show(struct seq_file *s_file, void *data)
+{
+	struct sdw_slave *slave = s_file->private;
+	char *buf;
+	ssize_t ret;
+	int i, j;
+
+	buf = kzalloc(RD_BUF, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = scnprintf(buf, RD_BUF, "Register  Value\n");
+
+	/* DP0 non-banked registers */
+	ret += scnprintf(buf + ret, RD_BUF - ret, "\nDP0\n");
+	for (i = SDW_DP0_INT; i <= SDW_DP0_PREPARECTRL; i++)
+		ret += sdw_sprintf(slave, buf, ret, i);
+
+	/* DP0 Bank 0 registers */
+	ret += scnprintf(buf + ret, RD_BUF - ret, "Bank0\n");
+	ret += sdw_sprintf(slave, buf, ret, SDW_DP0_CHANNELEN);
+	for (i = SDW_DP0_SAMPLECTRL1; i <= SDW_DP0_LANECTRL; i++)
+		ret += sdw_sprintf(slave, buf, ret, i);
+
+	/* DP0 Bank 1 registers */
+	ret += scnprintf(buf + ret, RD_BUF - ret, "Bank1\n");
+	ret += sdw_sprintf(slave, buf, ret,
+			SDW_DP0_CHANNELEN + SDW_BANK1_OFFSET);
+	for (i = SDW_DP0_SAMPLECTRL1 + SDW_BANK1_OFFSET;
+			i <= SDW_DP0_LANECTRL + SDW_BANK1_OFFSET; i++)
+		ret += sdw_sprintf(slave, buf, ret, i);
+
+	/* SCP registers */
+	ret += scnprintf(buf + ret, RD_BUF - ret, "\nSCP\n");
+	for (i = SDW_SCP_INT1; i <= SDW_SCP_BANKDELAY; i++)
+		ret += sdw_sprintf(slave, buf, ret, i);
+	for (i = SDW_SCP_DEVID_0; i <= SDW_SCP_DEVID_5; i++)
+		ret += sdw_sprintf(slave, buf, ret, i);
+
+	/*
+	 * SCP Bank 0/1 registers are read-only and cannot be
+	 * retrieved from the Slave. The Master typically keeps track
+	 * of the current frame size so the information can be found
+	 * in other places
+	 */
+
+	/* DP1..14 registers */
+	for (i = 1; SDW_VALID_PORT_RANGE(i); i++) {
+
+		/* DPi registers */
+		ret += scnprintf(buf + ret, RD_BUF - ret, "\nDP%d\n", i);
+		for (j = SDW_DPN_INT(i); j <= SDW_DPN_PREPARECTRL(i); j++)
+			ret += sdw_sprintf(slave, buf, ret, j);
+
+		/* DPi Bank0 registers */
+		ret += scnprintf(buf + ret, RD_BUF - ret, "Bank0\n");
+		for (j = SDW_DPN_CHANNELEN_B0(i);
+		     j <= SDW_DPN_LANECTRL_B0(i); j++)
+			ret += sdw_sprintf(slave, buf, ret, j);
+
+		/* DPi Bank1 registers */
+		ret += scnprintf(buf + ret, RD_BUF - ret, "Bank1\n");
+		for (j = SDW_DPN_CHANNELEN_B1(i);
+		     j <= SDW_DPN_LANECTRL_B1(i); j++)
+			ret += sdw_sprintf(slave, buf, ret, j);
+	}
+
+	seq_printf(s_file, "%s", buf);
+	kfree(buf);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(sdw_slave_reg);
+
+void sdw_slave_debugfs_init(struct sdw_slave *slave)
+{
+	struct dentry *master;
+	struct dentry *d;
+	char name[32];
+
+	master = slave->bus->debugfs;
+
+	/* create the debugfs slave-name */
+	snprintf(name, sizeof(name), "%s", dev_name(&slave->dev));
+	d = debugfs_create_dir(name, master);
+
+	debugfs_create_file("registers", 0400, d, slave, &sdw_slave_reg_fops);
+
+	slave->debugfs = d;
+}
+
+void sdw_slave_debugfs_exit(struct sdw_slave *slave)
+{
+	debugfs_remove_recursive(slave->debugfs);
+}
+
+void sdw_debugfs_init(void)
+{
+	sdw_debugfs_root = debugfs_create_dir("soundwire", NULL);
+}
+
+void sdw_debugfs_exit(void)
+{
+	debugfs_remove_recursive(sdw_debugfs_root);
+}
diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c
index f39a5815e25d..4b522f6d1238 100644
--- a/drivers/soundwire/slave.c
+++ b/drivers/soundwire/slave.c
@@ -56,6 +56,7 @@ static int sdw_slave_add(struct sdw_bus *bus,
 		mutex_unlock(&bus->bus_lock);
 		put_device(&slave->dev);
 	}
+	sdw_slave_debugfs_init(slave);
 
 	return ret;
 }
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 131d49ef1cb4..2028318a4c62 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -542,6 +542,7 @@ struct sdw_slave_ops {
  * @bus: Bus handle
  * @ops: Slave callback ops
  * @prop: Slave properties
+ * @debugfs: Slave debugfs
  * @node: node for bus list
  * @port_ready: Port ready completion flag for each Slave port
  * @dev_num: Device Number assigned by Bus
@@ -553,6 +554,9 @@ struct sdw_slave {
 	struct sdw_bus *bus;
 	const struct sdw_slave_ops *ops;
 	struct sdw_slave_prop prop;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
 	struct list_head node;
 	struct completion *port_ready;
 	u16 dev_num;
@@ -729,6 +733,7 @@ struct sdw_master_ops {
  * @m_rt_list: List of Master instance of all stream(s) running on Bus. This
  * is used to compute and program bus bandwidth, clock, frame shape,
  * transport and port parameters
+ * @debugfs: Bus debugfs
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
  * @bank_switch_timeout: Bank switch timeout computed
@@ -748,6 +753,9 @@ struct sdw_bus {
 	struct sdw_bus_params params;
 	struct sdw_master_prop prop;
 	struct list_head m_rt_list;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
 	struct sdw_defer defer_msg;
 	unsigned int clk_stop_timeout;
 	u32 bank_switch_timeout;
-- 
cgit v1.2.3


From 395713d8ca434967808670000e6baa3505e8e947 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Wed, 21 Aug 2019 13:58:21 -0500
Subject: soundwire: intel: handle disabled links

On most hardware platforms, SoundWire interfaces are pin-muxed with
other interfaces (typically DMIC or I2S) and the status of each link
needs to be checked at boot time.

For Intel platforms, the BIOS provides a menu to enable/disable the
links separately, and the information is provided to the OS with an
Intel-specific _DSD property. The same capability will be added to
revisions of the MIPI DisCo specification.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190821185821.12690-5-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c     | 25 ++++++++++++++++++++++---
 include/linux/soundwire/sdw.h |  2 ++
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 85014181a40d..f1e38a293967 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -91,6 +91,8 @@
 #define SDW_ALH_STRMZCFG_DMAT		GENMASK(7, 0)
 #define SDW_ALH_STRMZCFG_CHN		GENMASK(19, 16)
 
+#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE	BIT(1)
+
 enum intel_pdi_type {
 	INTEL_PDI_IN = 0,
 	INTEL_PDI_OUT = 1,
@@ -944,6 +946,7 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus)
 	struct sdw_master_prop *prop = &bus->prop;
 	struct fwnode_handle *link;
 	char name[32];
+	u32 quirk_mask;
 
 	/* Find master handle */
 	snprintf(name, sizeof(name),
@@ -958,6 +961,14 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus)
 	fwnode_property_read_u32(link,
 				 "intel-sdw-ip-clock",
 				 &prop->mclk_freq);
+
+	fwnode_property_read_u32(link,
+				 "intel-quirk-mask",
+				 &quirk_mask);
+
+	if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+		prop->hw_disabled = true;
+
 	return 0;
 }
 
@@ -1018,6 +1029,12 @@ static int intel_probe(struct platform_device *pdev)
 		goto err_master_reg;
 	}
 
+	if (sdw->cdns.bus.prop.hw_disabled) {
+		dev_info(&pdev->dev, "SoundWire master %d is disabled, ignoring\n",
+			 sdw->cdns.bus.link_id);
+		return 0;
+	}
+
 	/* Initialize shim and controller */
 	intel_link_power_up(sdw);
 	intel_shim_init(sdw);
@@ -1071,9 +1088,11 @@ static int intel_remove(struct platform_device *pdev)
 
 	sdw = platform_get_drvdata(pdev);
 
-	intel_debugfs_exit(sdw);
-	free_irq(sdw->res->irq, sdw);
-	snd_soc_unregister_component(sdw->cdns.dev);
+	if (!sdw->cdns.bus.prop.hw_disabled) {
+		intel_debugfs_exit(sdw);
+		free_irq(sdw->res->irq, sdw);
+		snd_soc_unregister_component(sdw->cdns.dev);
+	}
 	sdw_delete_bus_master(&sdw->cdns.bus);
 
 	return 0;
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 2028318a4c62..be9fe08d4e9c 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -380,6 +380,7 @@ struct sdw_slave_prop {
  * @err_threshold: Number of times that software may retry sending a single
  * command
  * @mclk_freq: clock reference passed to SoundWire Master, in Hz.
+ * @hw_disabled: if true, the Master is not functional, typically due to pin-mux
  */
 struct sdw_master_prop {
 	u32 revision;
@@ -395,6 +396,7 @@ struct sdw_master_prop {
 	bool dynamic_frame;
 	u32 err_threshold;
 	u32 mclk_freq;
+	bool hw_disabled;
 };
 
 int sdw_master_read_prop(struct sdw_bus *bus);
-- 
cgit v1.2.3


From 8a69961c7f7583742ab9064feab5ea533a6b1b97 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Mon, 19 Aug 2019 15:22:47 +0200
Subject: iommu: Add helpers to set/get default domain type

Add a couple of functions to allow changing the default
domain type from architecture code and a function for iommu
drivers to request whether the default domain is
passthrough.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 22 ++++++++++++++++++++++
 include/linux/iommu.h | 16 ++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0ae34cca0d4a..c5e0fc5ffe8b 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2213,6 +2213,28 @@ int iommu_request_dma_domain_for_dev(struct device *dev)
 	return request_default_domain_for_dev(dev, IOMMU_DOMAIN_DMA);
 }
 
+void iommu_set_default_passthrough(bool cmd_line)
+{
+	if (cmd_line)
+		iommu_set_cmd_line_dma_api();
+
+	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
+}
+
+void iommu_set_default_translated(bool cmd_line)
+{
+	if (cmd_line)
+		iommu_set_cmd_line_dma_api();
+
+	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+}
+
+bool iommu_default_passthrough(void)
+{
+	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
+}
+EXPORT_SYMBOL_GPL(iommu_default_passthrough);
+
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
 	const struct iommu_ops *ops = NULL;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 64ebaff33455..29bac5345563 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -436,6 +436,9 @@ extern void iommu_get_resv_regions(struct device *dev, struct list_head *list);
 extern void iommu_put_resv_regions(struct device *dev, struct list_head *list);
 extern int iommu_request_dm_for_dev(struct device *dev);
 extern int iommu_request_dma_domain_for_dev(struct device *dev);
+extern void iommu_set_default_passthrough(bool cmd_line);
+extern void iommu_set_default_translated(bool cmd_line);
+extern bool iommu_default_passthrough(void);
 extern struct iommu_resv_region *
 iommu_alloc_resv_region(phys_addr_t start, size_t length, int prot,
 			enum iommu_resv_type type);
@@ -736,6 +739,19 @@ static inline int iommu_request_dma_domain_for_dev(struct device *dev)
 	return -ENODEV;
 }
 
+static inline void iommu_set_default_passthrough(bool cmd_line)
+{
+}
+
+static inline void iommu_set_default_translated(bool cmd_line)
+{
+}
+
+static inline bool iommu_default_passthrough(void)
+{
+	return true;
+}
+
 static inline int iommu_attach_group(struct iommu_domain *domain,
 				     struct iommu_group *group)
 {
-- 
cgit v1.2.3


From 472a61e777fe78cdcb6cb6f25efee0ae9f629aca Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 14 Aug 2019 14:00:35 +0300
Subject: pinctrl/gpio: Take MUX usage into account

The user space like gpioinfo only see the GPIO usage but not the
MUX usage (e.g. I2C or SPI usage) of a pin. As a user we want
to know which pin is free/safe to use. So take the MUX usage of
strict pinmux controllers into account to get a more realistic
view for ioctl GPIO_GET_LINEINFO_IOCTL.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Ramon Fried <rfried.dev@gmail.com>
Signed-off-by: Ramon Fried <rfried.dev@gmail.com>
Link: https://lore.kernel.org/r/20190814110035.13451-1-ramon.fried@linux.intel.com
Acked-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c           |  3 ++-
 drivers/pinctrl/core.c           | 28 ++++++++++++++++++++++++++++
 drivers/pinctrl/pinmux.c         | 24 ++++++++++++++++++++++++
 drivers/pinctrl/pinmux.h         |  8 ++++++++
 include/linux/pinctrl/consumer.h |  6 ++++++
 5 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f497003f119c..52937bf8e514 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1084,7 +1084,8 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		    test_bit(FLAG_IS_HOGGED, &desc->flags) ||
 		    test_bit(FLAG_USED_AS_IRQ, &desc->flags) ||
 		    test_bit(FLAG_EXPORT, &desc->flags) ||
-		    test_bit(FLAG_SYSFS, &desc->flags))
+		    test_bit(FLAG_SYSFS, &desc->flags) ||
+		    !pinctrl_gpio_can_use_line(chip->base + lineinfo.line_offset))
 			lineinfo.flags |= GPIOLINE_FLAG_KERNEL;
 		if (test_bit(FLAG_IS_OUT, &desc->flags))
 			lineinfo.flags |= GPIOLINE_FLAG_IS_OUT;
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index b70df27874d1..2bbd8ee93507 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -736,6 +736,34 @@ int pinctrl_get_group_selector(struct pinctrl_dev *pctldev,
 	return -EINVAL;
 }
 
+bool pinctrl_gpio_can_use_line(unsigned gpio)
+{
+	struct pinctrl_dev *pctldev;
+	struct pinctrl_gpio_range *range;
+	bool result;
+	int pin;
+
+	/*
+	 * Try to obtain GPIO range, if it fails
+	 * we're probably dealing with GPIO driver
+	 * without a backing pin controller - bail out.
+	 */
+	if (pinctrl_get_device_gpio_range(gpio, &pctldev, &range))
+		return true;
+
+	mutex_lock(&pctldev->mutex);
+
+	/* Convert to the pin controllers number space */
+	pin = gpio_to_pin(range, gpio);
+
+	result = pinmux_can_be_used_for_gpio(pctldev, pin);
+
+	mutex_unlock(&pctldev->mutex);
+
+	return result;
+}
+EXPORT_SYMBOL_GPL(pinctrl_gpio_can_use_line);
+
 /**
  * pinctrl_gpio_request() - request a single pin to be used as GPIO
  * @gpio: the GPIO pin number from the GPIO subsystem number space
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index 020e54f843f9..e914f6efd39e 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -70,6 +70,30 @@ int pinmux_validate_map(const struct pinctrl_map *map, int i)
 	return 0;
 }
 
+/**
+ * pinmux_can_be_used_for_gpio() - check if a specific pin
+ *	is either muxed to a different function or used as gpio.
+ *
+ * @pin: the pin number in the global pin space
+ *
+ * Controllers not defined as strict will always return true,
+ * menaning that the gpio can be used.
+ */
+bool pinmux_can_be_used_for_gpio(struct pinctrl_dev *pctldev, unsigned pin)
+{
+	struct pin_desc *desc = pin_desc_get(pctldev, pin);
+	const struct pinmux_ops *ops = pctldev->desc->pmxops;
+
+	/* Can't inspect pin, assume it can be used */
+	if (!desc)
+		return true;
+
+	if (ops->strict && desc->mux_usecount)
+		return false;
+
+	return !(ops->strict && !!desc->gpio_owner);
+}
+
 /**
  * pin_request() - request a single pin to be muxed in, typically for GPIO
  * @pin: the pin number in the global pin space
diff --git a/drivers/pinctrl/pinmux.h b/drivers/pinctrl/pinmux.h
index 794cb3a003ff..78c3a31be882 100644
--- a/drivers/pinctrl/pinmux.h
+++ b/drivers/pinctrl/pinmux.h
@@ -15,6 +15,8 @@ int pinmux_check_ops(struct pinctrl_dev *pctldev);
 
 int pinmux_validate_map(const struct pinctrl_map *map, int i);
 
+bool pinmux_can_be_used_for_gpio(struct pinctrl_dev *pctldev, unsigned pin);
+
 int pinmux_request_gpio(struct pinctrl_dev *pctldev,
 			struct pinctrl_gpio_range *range,
 			unsigned pin, unsigned gpio);
@@ -42,6 +44,12 @@ static inline int pinmux_validate_map(const struct pinctrl_map *map, int i)
 	return 0;
 }
 
+static inline bool pinmux_can_be_used_for_gpio(struct pinctrl_dev *pctldev,
+					       unsigned pin)
+{
+	return true;
+}
+
 static inline int pinmux_request_gpio(struct pinctrl_dev *pctldev,
 			struct pinctrl_gpio_range *range,
 			unsigned pin, unsigned gpio)
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index 86720a5a384f..7f8c7d9583d3 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -24,6 +24,7 @@ struct device;
 #ifdef CONFIG_PINCTRL
 
 /* External interface to pin control */
+extern bool pinctrl_gpio_can_use_line(unsigned gpio);
 extern int pinctrl_gpio_request(unsigned gpio);
 extern void pinctrl_gpio_free(unsigned gpio);
 extern int pinctrl_gpio_direction_input(unsigned gpio);
@@ -61,6 +62,11 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev)
 
 #else /* !CONFIG_PINCTRL */
 
+static inline bool pinctrl_gpio_can_use_line(unsigned gpio)
+{
+	return true;
+}
+
 static inline int pinctrl_gpio_request(unsigned gpio)
 {
 	return 0;
-- 
cgit v1.2.3


From c7663fa2a6631e5dddc1b7b2ba2e905ddffaff90 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 22 Aug 2019 11:18:17 +0800
Subject: gpio: Move gpiochip_lock/unlock_as_irq to gpio/driver.h

If CONFIG_GPIOLIB is not, gpiochip_lock/unlock_as_irq will
conflict as this:

In file included from sound/soc/codecs/wm5100.c:18:0:
./include/linux/gpio.h:224:19: error: static declaration of gpiochip_lock_as_irq follows non-static declaration
 static inline int gpiochip_lock_as_irq(struct gpio_chip *chip,
                   ^~~~~~~~~~~~~~~~~~~~
In file included from sound/soc/codecs/wm5100.c:17:0:
./include/linux/gpio/driver.h:494:5: note: previous declaration of gpiochip_lock_as_irq was here
 int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
     ^~~~~~~~~~~~~~~~~~~~
In file included from sound/soc/codecs/wm5100.c:18:0:
./include/linux/gpio.h:231:20: error: static declaration of gpiochip_unlock_as_irq follows non-static declaration
 static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
                    ^~~~~~~~~~~~~~~~~~~~~~
In file included from sound/soc/codecs/wm5100.c:17:0:
./include/linux/gpio/driver.h:495:6: note: previous declaration of gpiochip_unlock_as_irq was here
 void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
     ^~~~~~~~~~~~~~~~~~~~~~

Move them to gpio/driver.h and use CONFIG_GPIOLIB guard this.

Reported-by: Hulk Robot <hulkci@huawei.com>
Fixes: d74be6dfea1b ("gpio: remove gpiod_lock/unlock_as_irq()")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20190822031817.32888-1-yuehaibing@huawei.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio.h        | 13 -------------
 include/linux/gpio/driver.h | 19 ++++++++++++++++---
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 40915b461f18..d20e224804e6 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -221,19 +221,6 @@ static inline int gpio_to_irq(unsigned gpio)
 	return -EINVAL;
 }
 
-static inline int gpiochip_lock_as_irq(struct gpio_chip *chip,
-				       unsigned int offset)
-{
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
-					  unsigned int offset)
-{
-	WARN_ON(1);
-}
-
 static inline int irq_to_gpio(unsigned irq)
 {
 	/* irq can never have been returned from gpio_to_irq() */
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 340121c7d2fb..8d06a054abc8 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -495,9 +495,6 @@ extern int devm_gpiochip_add_data(struct device *dev, struct gpio_chip *chip,
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-/* lock/unlock as IRQ */
-int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
-void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 int gpiochip_reqres_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_relres_irq(struct gpio_chip *chip, unsigned int offset);
@@ -723,6 +720,10 @@ void gpiochip_free_own_desc(struct gpio_desc *desc);
 void devprop_gpiochip_set_names(struct gpio_chip *chip,
 				const struct fwnode_handle *fwnode);
 
+/* lock/unlock as IRQ */
+int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
+void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
+
 #else /* CONFIG_GPIOLIB */
 
 static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
@@ -732,6 +733,18 @@ static inline struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc)
 	return ERR_PTR(-ENODEV);
 }
 
+static inline int gpiochip_lock_as_irq(struct gpio_chip *chip,
+				       unsigned int offset)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
+					  unsigned int offset)
+{
+	WARN_ON(1);
+}
 #endif /* CONFIG_GPIOLIB */
 
 #endif
-- 
cgit v1.2.3


From 1a92f989126ef0743650d43f75bfc431a5c3a310 Mon Sep 17 00:00:00 2001
From: Bibby Hsieh <bibby.hsieh@mediatek.com>
Date: Tue, 20 Aug 2019 16:49:27 +0800
Subject: soc: mediatek: cmdq: reorder the parameter

The order of gce instructions is [subsys offset value]
so reorder the parameter of cmdq_pkt_write_mask
and cmdq_pkt_write function.

Signed-off-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 drivers/soc/mediatek/mtk-cmdq-helper.c |  6 +++---
 include/linux/soc/mediatek/mtk-cmdq.h  | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
index ff9fef5a032b..082b8978651e 100644
--- a/drivers/soc/mediatek/mtk-cmdq-helper.c
+++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
@@ -136,7 +136,7 @@ static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code,
 	return 0;
 }
 
-int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 value, u32 subsys, u32 offset)
+int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value)
 {
 	u32 arg_a = (offset & CMDQ_ARG_A_WRITE_MASK) |
 		    (subsys << CMDQ_SUBSYS_SHIFT);
@@ -145,8 +145,8 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 value, u32 subsys, u32 offset)
 }
 EXPORT_SYMBOL(cmdq_pkt_write);
 
-int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 value,
-			u32 subsys, u32 offset, u32 mask)
+int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
+			u32 offset, u32 value, u32 mask)
 {
 	u32 offset_mask = offset;
 	int err = 0;
diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
index 54ade13a9b15..4bba1c8d97fa 100644
--- a/include/linux/soc/mediatek/mtk-cmdq.h
+++ b/include/linux/soc/mediatek/mtk-cmdq.h
@@ -63,26 +63,26 @@ void cmdq_pkt_destroy(struct cmdq_pkt *pkt);
 /**
  * cmdq_pkt_write() - append write command to the CMDQ packet
  * @pkt:	the CMDQ packet
- * @value:	the specified target register value
  * @subsys:	the CMDQ sub system code
  * @offset:	register offset from CMDQ sub system
+ * @value:	the specified target register value
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 value, u32 subsys, u32 offset);
+int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value);
 
 /**
  * cmdq_pkt_write_mask() - append write command with mask to the CMDQ packet
  * @pkt:	the CMDQ packet
- * @value:	the specified target register value
  * @subsys:	the CMDQ sub system code
  * @offset:	register offset from CMDQ sub system
+ * @value:	the specified target register value
  * @mask:	the specified target register mask
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 value,
-			u32 subsys, u32 offset, u32 mask);
+int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
+			u32 offset, u32 value, u32 mask);
 
 /**
  * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet
-- 
cgit v1.2.3


From 556030f0604f3cf5f1ea91307c0695541e5c74ca Mon Sep 17 00:00:00 2001
From: Bibby Hsieh <bibby.hsieh@mediatek.com>
Date: Tue, 20 Aug 2019 16:49:28 +0800
Subject: soc: mediatek: cmdq: change the type of input parameter

According to the cmdq hardware design, the subsys is u8,
the offset is u16 and the event id is u16.
This patch changes the type of subsys, offset and event id
to the correct type.

Signed-off-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 drivers/soc/mediatek/mtk-cmdq-helper.c | 10 +++++-----
 include/linux/soc/mediatek/mtk-cmdq.h  | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c
index 082b8978651e..7aa0517ff2f3 100644
--- a/drivers/soc/mediatek/mtk-cmdq-helper.c
+++ b/drivers/soc/mediatek/mtk-cmdq-helper.c
@@ -136,7 +136,7 @@ static int cmdq_pkt_append_command(struct cmdq_pkt *pkt, enum cmdq_code code,
 	return 0;
 }
 
-int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value)
+int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value)
 {
 	u32 arg_a = (offset & CMDQ_ARG_A_WRITE_MASK) |
 		    (subsys << CMDQ_SUBSYS_SHIFT);
@@ -145,8 +145,8 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value)
 }
 EXPORT_SYMBOL(cmdq_pkt_write);
 
-int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
-			u32 offset, u32 value, u32 mask)
+int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys,
+			u16 offset, u32 value, u32 mask)
 {
 	u32 offset_mask = offset;
 	int err = 0;
@@ -161,7 +161,7 @@ int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
 }
 EXPORT_SYMBOL(cmdq_pkt_write_mask);
 
-int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event)
+int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event)
 {
 	u32 arg_b;
 
@@ -181,7 +181,7 @@ int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event)
 }
 EXPORT_SYMBOL(cmdq_pkt_wfe);
 
-int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u32 event)
+int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event)
 {
 	if (event >= CMDQ_MAX_EVENT)
 		return -EINVAL;
diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
index 4bba1c8d97fa..f3ae45d02e80 100644
--- a/include/linux/soc/mediatek/mtk-cmdq.h
+++ b/include/linux/soc/mediatek/mtk-cmdq.h
@@ -69,7 +69,7 @@ void cmdq_pkt_destroy(struct cmdq_pkt *pkt);
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value);
+int cmdq_pkt_write(struct cmdq_pkt *pkt, u8 subsys, u16 offset, u32 value);
 
 /**
  * cmdq_pkt_write_mask() - append write command with mask to the CMDQ packet
@@ -81,8 +81,8 @@ int cmdq_pkt_write(struct cmdq_pkt *pkt, u32 subsys, u32 offset, u32 value);
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
-			u32 offset, u32 value, u32 mask);
+int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u8 subsys,
+			u16 offset, u32 value, u32 mask);
 
 /**
  * cmdq_pkt_wfe() - append wait for event command to the CMDQ packet
@@ -91,7 +91,7 @@ int cmdq_pkt_write_mask(struct cmdq_pkt *pkt, u32 subsys,
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event);
+int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u16 event);
 
 /**
  * cmdq_pkt_clear_event() - append clear event command to the CMDQ packet
@@ -100,7 +100,7 @@ int cmdq_pkt_wfe(struct cmdq_pkt *pkt, u32 event);
  *
  * Return: 0 for success; else the error code is returned
  */
-int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u32 event);
+int cmdq_pkt_clear_event(struct cmdq_pkt *pkt, u16 event);
 
 /**
  * cmdq_pkt_flush_async() - trigger CMDQ to asynchronously execute the CMDQ
-- 
cgit v1.2.3


From c72db71ed61ff51c2b8189ac9889dd18f22eb612 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Fri, 12 Jul 2019 16:55:42 +0800
Subject: f2fs: fix panic of IO alignment feature

Since 07173c3ec276 ("block: enable multipage bvecs"), one bio vector
can store multi pages, so that we can not calculate max IO size of
bio as PAGE_SIZE * bio->bi_max_vecs. However IO alignment feature of
f2fs always has that assumption, so finally, it may cause panic during
IO submission as below stack.

 kernel BUG at fs/f2fs/data.c:317!
 RIP: 0010:__submit_merged_bio+0x8b0/0x8c0
 Call Trace:
  f2fs_submit_page_write+0x3cd/0xdd0
  do_write_page+0x15d/0x360
  f2fs_outplace_write_data+0xd7/0x210
  f2fs_do_write_data_page+0x43b/0xf30
  __write_data_page+0xcf6/0x1140
  f2fs_write_cache_pages+0x3ba/0xb40
  f2fs_write_data_pages+0x3dd/0x8b0
  do_writepages+0xbb/0x1e0
  __writeback_single_inode+0xb6/0x800
  writeback_sb_inodes+0x441/0x910
  wb_writeback+0x261/0x650
  wb_workfn+0x1f9/0x7a0
  process_one_work+0x503/0x970
  worker_thread+0x7d/0x820
  kthread+0x1ad/0x210
  ret_from_fork+0x35/0x40

This patch adds one extra condition to check left space in bio while
trying merging page to bio, to avoid panic.

This bug was reported in bugzilla:

https://bugzilla.kernel.org/show_bug.cgi?id=204043

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          | 10 ++++++++++
 fs/f2fs/super.c         |  2 +-
 include/linux/f2fs_fs.h |  1 +
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0686306ed988..5bce20005add 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -503,6 +503,16 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 					block_t last_blkaddr,
 					block_t cur_blkaddr)
 {
+	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
+		unsigned int filled_blocks =
+				F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
+		unsigned int io_size = F2FS_IO_SIZE(sbi);
+		unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
+
+		/* IOs in bio is aligned and left space of vectors is not enough */
+		if (!(filled_blocks % io_size) && left_vecs < io_size)
+			return false;
+	}
 	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
 		return false;
 	return io_type_is_mergeable(io, fio);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 78a1b873e48a..720f2e6d6f0a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3202,7 +3202,7 @@ try_onemore:
 	if (err)
 		goto free_bio_info;
 
-	if (F2FS_IO_SIZE(sbi) > 1) {
+	if (F2FS_IO_ALIGNED(sbi)) {
 		sbi->write_io_dummy =
 			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
 		if (!sbi->write_io_dummy) {
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 65559900d4d7..52af9ac164b4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -41,6 +41,7 @@
 #define F2FS_IO_SIZE_BYTES(sbi)	(1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */
 #define F2FS_IO_SIZE_BITS(sbi)	(F2FS_OPTION(sbi).write_io_size_bits) /* power of 2 */
 #define F2FS_IO_SIZE_MASK(sbi)	(F2FS_IO_SIZE(sbi) - 1)
+#define F2FS_IO_ALIGNED(sbi)	(F2FS_IO_SIZE(sbi) > 1)
 
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
-- 
cgit v1.2.3


From 71e90b4654a9298f9e2375cc733d57b8bf92ce73 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Tue, 23 Jul 2019 16:05:27 -0700
Subject: fs: Reserve flag for casefolding

In preparation for including the casefold feature within f2fs, elevate
the EXT4_CASEFOLD_FL flag to FS_CASEFOLD_FL.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/uapi/linux/fs.h       | 1 +
 tools/include/uapi/linux/fs.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 59c71fa8c553..2a616aa3f686 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -311,6 +311,7 @@ struct fscrypt_key {
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
 #define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
+#define FS_CASEFOLD_FL			0x40000000 /* Folder is case insensitive */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 59c71fa8c553..2a616aa3f686 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -311,6 +311,7 @@ struct fscrypt_key {
 #define FS_NOCOW_FL			0x00800000 /* Do not cow file */
 #define FS_INLINE_DATA_FL		0x10000000 /* Reserved for ext4 */
 #define FS_PROJINHERIT_FL		0x20000000 /* Create with parents projid */
+#define FS_CASEFOLD_FL			0x40000000 /* Folder is case insensitive */
 #define FS_RESERVED_FL			0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-- 
cgit v1.2.3


From 5aba54302a46fdd589040b928d5d010e5ace1234 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Tue, 23 Jul 2019 16:05:28 -0700
Subject: f2fs: include charset encoding information in the superblock

Add charset encoding to f2fs to support casefolding. It is modeled after
the same feature introduced in commit c83ad55eaa91 ("ext4: include charset
encoding information in the superblock")

Currently this is not compatible with encryption, similar to the current
ext4 imlpementation. This will change in the future.

>From the ext4 patch:
"""
The s_encoding field stores a magic number indicating the encoding
format and version used globally by file and directory names in the
filesystem.  The s_encoding_flags defines policies for using the charset
encoding, like how to handle invalid sequences.  The magic number is
mapped to the exact charset table, but the mapping is specific to ext4.
Since we don't have any commitment to support old encodings, the only
encoding I am supporting right now is utf8-12.1.0.

The current implementation prevents the user from enabling encoding and
per-directory encryption on the same filesystem at the same time.  The
incompatibility between these features lies in how we do efficient
directory searches when we cannot be sure the encryption of the user
provided fname will match the actual hash stored in the disk without
decrypting every directory entry, because of normalization cases.  My
quickest solution is to simply block the concurrent use of these
features for now, and enable it later, once we have a better solution.
"""

Signed-off-by: Daniel Rosenberg <drosen@google.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  7 +++
 Documentation/filesystems/f2fs.txt      |  3 ++
 fs/f2fs/f2fs.h                          |  6 +++
 fs/f2fs/super.c                         | 95 +++++++++++++++++++++++++++++++++
 fs/f2fs/sysfs.c                         | 23 ++++++++
 include/linux/f2fs_fs.h                 |  9 +++-
 6 files changed, 142 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index dca326e0ee3e..7ab2b1b5e255 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -251,3 +251,10 @@ Description:
 		If checkpoint=disable, it displays the number of blocks that are unusable.
                 If checkpoint=enable it displays the enumber of blocks that would be unusable
                 if checkpoint=disable were to be set.
+
+What:		/sys/fs/f2fs/<disk>/encoding
+Date		July 2019
+Contact:	"Daniel Rosenberg" <drosen@google.com>
+Description:
+		Displays name and version of the encoding set for the filesystem.
+                If no encoding is set, displays (none)
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 496fa28b2492..5fa38ab373ca 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -413,6 +413,9 @@ Files in /sys/fs/f2fs/<devname>
                               that would be unusable if checkpoint=disable were
                               to be set.
 
+encoding 	              This shows the encoding used for casefolding.
+                              If casefolding is not enabled, returns (none)
+
 ================================================================================
 USAGE
 ================================================================================
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4fa9a618b31a..dd69e1559839 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -153,6 +153,7 @@ struct f2fs_mount_info {
 #define F2FS_FEATURE_LOST_FOUND		0x0200
 #define F2FS_FEATURE_VERITY		0x0400	/* reserved */
 #define F2FS_FEATURE_SB_CHKSUM		0x0800
+#define F2FS_FEATURE_CASEFOLD		0x1000
 
 #define __F2FS_HAS_FEATURE(raw_super, mask)				\
 	((raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -1169,6 +1170,10 @@ struct f2fs_sb_info {
 	int valid_super_block;			/* valid super block no */
 	unsigned long s_flag;				/* flags for sbi */
 	struct mutex writepages;		/* mutex for writepages() */
+#ifdef CONFIG_UNICODE
+	struct unicode_map *s_encoding;
+	__u16 s_encoding_flags;
+#endif
 
 #ifdef CONFIG_BLK_DEV_ZONED
 	unsigned int blocks_per_blkz;		/* F2FS blocks per zone */
@@ -3565,6 +3570,7 @@ F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
 F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
 F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
 F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
+F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 9167deb0c417..8bebee8e0186 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -23,6 +23,7 @@
 #include <linux/f2fs_fs.h>
 #include <linux/sysfs.h>
 #include <linux/quota.h>
+#include <linux/unicode.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -222,6 +223,36 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
 	va_end(args);
 }
 
+#ifdef CONFIG_UNICODE
+static const struct f2fs_sb_encodings {
+	__u16 magic;
+	char *name;
+	char *version;
+} f2fs_sb_encoding_map[] = {
+	{F2FS_ENC_UTF8_12_1, "utf8", "12.1.0"},
+};
+
+static int f2fs_sb_read_encoding(const struct f2fs_super_block *sb,
+				 const struct f2fs_sb_encodings **encoding,
+				 __u16 *flags)
+{
+	__u16 magic = le16_to_cpu(sb->s_encoding);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(f2fs_sb_encoding_map); i++)
+		if (magic == f2fs_sb_encoding_map[i].magic)
+			break;
+
+	if (i >= ARRAY_SIZE(f2fs_sb_encoding_map))
+		return -EINVAL;
+
+	*encoding = &f2fs_sb_encoding_map[i];
+	*flags = le16_to_cpu(sb->s_encoding_flags);
+
+	return 0;
+}
+#endif
+
 static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
 {
 	block_t limit = min((sbi->user_block_count << 1) / 1000,
@@ -798,6 +829,13 @@ static int parse_options(struct super_block *sb, char *options)
 		return -EINVAL;
 	}
 #endif
+#ifndef CONFIG_UNICODE
+	if (f2fs_sb_has_casefold(sbi)) {
+		f2fs_err(sbi,
+			"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
+		return -EINVAL;
+	}
+#endif
 
 	if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
 		f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
@@ -1103,6 +1141,9 @@ static void f2fs_put_super(struct super_block *sb)
 	destroy_percpu_info(sbi);
 	for (i = 0; i < NR_PAGE_TYPE; i++)
 		kvfree(sbi->write_io[i]);
+#ifdef CONFIG_UNICODE
+	utf8_unload(sbi->s_encoding);
+#endif
 	kvfree(sbi);
 }
 
@@ -3075,6 +3116,52 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 	return 0;
 }
 
+static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_UNICODE
+	if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+		const struct f2fs_sb_encodings *encoding_info;
+		struct unicode_map *encoding;
+		__u16 encoding_flags;
+
+		if (f2fs_sb_has_encrypt(sbi)) {
+			f2fs_err(sbi,
+				"Can't mount with encoding and encryption");
+			return -EINVAL;
+		}
+
+		if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
+					  &encoding_flags)) {
+			f2fs_err(sbi,
+				 "Encoding requested by superblock is unknown");
+			return -EINVAL;
+		}
+
+		encoding = utf8_load(encoding_info->version);
+		if (IS_ERR(encoding)) {
+			f2fs_err(sbi,
+				 "can't mount with superblock charset: %s-%s "
+				 "not supported by the kernel. flags: 0x%x.",
+				 encoding_info->name, encoding_info->version,
+				 encoding_flags);
+			return PTR_ERR(encoding);
+		}
+		f2fs_info(sbi, "Using encoding defined by superblock: "
+			 "%s-%s with flags 0x%hx", encoding_info->name,
+			 encoding_info->version?:"\b", encoding_flags);
+
+		sbi->s_encoding = encoding;
+		sbi->s_encoding_flags = encoding_flags;
+	}
+#else
+	if (f2fs_sb_has_casefold(sbi)) {
+		f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
+		return -EINVAL;
+	}
+#endif
+	return 0;
+}
+
 static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_sm_info *sm_i = SM_I(sbi);
@@ -3171,6 +3258,10 @@ try_onemore:
 				le32_to_cpu(raw_super->log_blocksize);
 	sb->s_max_links = F2FS_LINK_MAX;
 
+	err = f2fs_setup_casefold(sbi);
+	if (err)
+		goto free_options;
+
 #ifdef CONFIG_QUOTA
 	sb->dq_op = &f2fs_quota_operations;
 	sb->s_qcop = &f2fs_quotactl_ops;
@@ -3521,6 +3612,10 @@ free_percpu:
 free_bio_info:
 	for (i = 0; i < NR_PAGE_TYPE; i++)
 		kvfree(sbi->write_io[i]);
+
+#ifdef CONFIG_UNICODE
+	utf8_unload(sbi->s_encoding);
+#endif
 free_options:
 #ifdef CONFIG_QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 3aeacd0aacfd..f9fcca695db9 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -10,6 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/f2fs_fs.h>
 #include <linux/seq_file.h>
+#include <linux/unicode.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -81,6 +82,19 @@ static ssize_t unusable_show(struct f2fs_attr *a,
 		(unsigned long long)unusable);
 }
 
+static ssize_t encoding_show(struct f2fs_attr *a,
+		struct f2fs_sb_info *sbi, char *buf)
+{
+#ifdef CONFIG_UNICODE
+	if (f2fs_sb_has_casefold(sbi))
+		return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
+			sbi->s_encoding->charset,
+			(sbi->s_encoding->version >> 16) & 0xff,
+			(sbi->s_encoding->version >> 8) & 0xff,
+			sbi->s_encoding->version & 0xff);
+#endif
+	return snprintf(buf, PAGE_SIZE, "(none)");
+}
 
 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
@@ -134,6 +148,9 @@ static ssize_t features_show(struct f2fs_attr *a,
 	if (f2fs_sb_has_sb_chksum(sbi))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "sb_checksum");
+	if (f2fs_sb_has_casefold(sbi))
+		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+				len ? ", " : "", "casefold");
 	len += snprintf(buf + len, PAGE_SIZE - len, "\n");
 	return len;
 }
@@ -365,6 +382,7 @@ enum feat_id {
 	FEAT_INODE_CRTIME,
 	FEAT_LOST_FOUND,
 	FEAT_SB_CHECKSUM,
+	FEAT_CASEFOLD,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -382,6 +400,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
 	case FEAT_INODE_CRTIME:
 	case FEAT_LOST_FOUND:
 	case FEAT_SB_CHECKSUM:
+	case FEAT_CASEFOLD:
 		return snprintf(buf, PAGE_SIZE, "supported\n");
 	}
 	return 0;
@@ -455,6 +474,7 @@ F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
 F2FS_GENERAL_RO_ATTR(features);
 F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
 F2FS_GENERAL_RO_ATTR(unusable);
+F2FS_GENERAL_RO_ATTR(encoding);
 
 #ifdef CONFIG_FS_ENCRYPTION
 F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -471,6 +491,7 @@ F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
 F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
 F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
 F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
+F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -515,6 +536,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(features),
 	ATTR_LIST(reserved_blocks),
 	ATTR_LIST(current_reserved_blocks),
+	ATTR_LIST(encoding),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
@@ -535,6 +557,7 @@ static struct attribute *f2fs_feat_attrs[] = {
 	ATTR_LIST(inode_crtime),
 	ATTR_LIST(lost_found),
 	ATTR_LIST(sb_checksum),
+	ATTR_LIST(casefold),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs_feat);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 52af9ac164b4..284738996028 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,11 @@
 
 #define F2FS_MAX_QUOTAS		3
 
+#define F2FS_ENC_UTF8_12_1	1
+#define F2FS_ENC_STRICT_MODE_FL	(1 << 0)
+#define f2fs_has_strict_mode(sbi) \
+	(sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL)
+
 #define F2FS_IO_SIZE(sbi)	(1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */
 #define F2FS_IO_SIZE_KB(sbi)	(1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */
 #define F2FS_IO_SIZE_BYTES(sbi)	(1 << (F2FS_OPTION(sbi).write_io_size_bits + 12)) /* B */
@@ -110,7 +115,9 @@ struct f2fs_super_block {
 	struct f2fs_device devs[MAX_DEVICES];	/* device list */
 	__le32 qf_ino[F2FS_MAX_QUOTAS];	/* quota inode numbers */
 	__u8 hot_ext_count;		/* # of hot file extension */
-	__u8 reserved[310];		/* valid reserved region */
+	__le16  s_encoding;		/* Filename charset encoding */
+	__le16  s_encoding_flags;	/* Filename charset encoding flags */
+	__u8 reserved[306];		/* valid reserved region */
 	__le32 crc;			/* checksum of superblock */
 } __packed;
 
-- 
cgit v1.2.3


From bd00cd52d5be655a2f217e2ed74b91a71cb2b14f Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Wed, 14 Aug 2019 20:32:16 +0800
Subject: clocksource/drivers/hyperv: Add Hyper-V specific sched clock function

Hyper-V guests use the default native_sched_clock() in
pv_ops.time.sched_clock on x86. But native_sched_clock() directly uses the
raw TSC value, which can be discontinuous in a Hyper-V VM.

Add the generic hv_setup_sched_clock() to set the sched clock function
appropriately. On x86, this sets pv_ops.time.sched_clock to read the
Hyper-V reference TSC value that is scaled and adjusted to be continuous.

Also move the Hyper-V reference TSC initialization much earlier in the boot
process so no discontinuity is observed when pv_ops.time.sched_clock
calculates its offset.

[ tglx: Folded build fix ]

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lkml.kernel.org/r/20190814123216.32245-3-Tianyu.Lan@microsoft.com
---
 arch/x86/hyperv/hv_init.c          |  2 --
 arch/x86/kernel/cpu/mshyperv.c     |  8 ++++++++
 drivers/clocksource/hyperv_timer.c | 22 ++++++++++++----------
 include/asm-generic/mshyperv.h     |  1 +
 4 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 0d258688c8cf..866dfb3dca48 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -301,8 +301,6 @@ void __init hyperv_init(void)
 
 	x86_init.pci.arch_init = hv_pci_init;
 
-	/* Register Hyper-V specific clocksource */
-	hv_init_clocksource();
 	return;
 
 remove_cpuhp_state:
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 062f77279ce3..53afd33990eb 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -29,6 +29,7 @@
 #include <asm/timer.h>
 #include <asm/reboot.h>
 #include <asm/nmi.h>
+#include <clocksource/hyperv_timer.h>
 
 struct ms_hyperv_info ms_hyperv;
 EXPORT_SYMBOL_GPL(ms_hyperv);
@@ -338,9 +339,16 @@ static void __init ms_hyperv_init_platform(void)
 		x2apic_phys = 1;
 # endif
 
+	/* Register Hyper-V specific clocksource */
+	hv_init_clocksource();
 #endif
 }
 
+void hv_setup_sched_clock(void *sched_clock)
+{
+	pv_ops.time.sched_clock = sched_clock;
+}
+
 const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.name			= "Microsoft Hyper-V",
 	.detect			= ms_hyperv_platform,
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 432aa331df04..c322ab4d3689 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -22,6 +22,7 @@
 #include <asm/mshyperv.h>
 
 static struct clock_event_device __percpu *hv_clock_event;
+static u64 hv_sched_clock_offset __ro_after_init;
 
 /*
  * If false, we're using the old mechanism for stimer0 interrupts
@@ -222,7 +223,7 @@ struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 }
 EXPORT_SYMBOL_GPL(hv_get_tsc_page);
 
-static u64 notrace read_hv_sched_clock_tsc(void)
+static u64 notrace read_hv_clock_tsc(struct clocksource *arg)
 {
 	u64 current_tick = hv_read_tsc_page(&tsc_pg);
 
@@ -232,9 +233,9 @@ static u64 notrace read_hv_sched_clock_tsc(void)
 	return current_tick;
 }
 
-static u64 read_hv_clock_tsc(struct clocksource *arg)
+static u64 read_hv_sched_clock_tsc(void)
 {
-	return read_hv_sched_clock_tsc();
+	return read_hv_clock_tsc(NULL) - hv_sched_clock_offset;
 }
 
 static struct clocksource hyperv_cs_tsc = {
@@ -246,7 +247,7 @@ static struct clocksource hyperv_cs_tsc = {
 };
 #endif
 
-static u64 notrace read_hv_sched_clock_msr(void)
+static u64 notrace read_hv_clock_msr(struct clocksource *arg)
 {
 	u64 current_tick;
 	/*
@@ -258,9 +259,9 @@ static u64 notrace read_hv_sched_clock_msr(void)
 	return current_tick;
 }
 
-static u64 read_hv_clock_msr(struct clocksource *arg)
+static u64 read_hv_sched_clock_msr(void)
 {
-	return read_hv_sched_clock_msr();
+	return read_hv_clock_msr(NULL) - hv_sched_clock_offset;
 }
 
 static struct clocksource hyperv_cs_msr = {
@@ -298,8 +299,9 @@ static bool __init hv_init_tsc_clocksource(void)
 	hv_set_clocksource_vdso(hyperv_cs_tsc);
 	clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
 
-	/* sched_clock_register is needed on ARM64 but is a no-op on x86 */
-	sched_clock_register(read_hv_sched_clock_tsc, 64, HV_CLOCK_HZ);
+	hv_sched_clock_offset = hyperv_cs->read(hyperv_cs);
+	hv_setup_sched_clock(read_hv_sched_clock_tsc);
+
 	return true;
 }
 #else
@@ -329,7 +331,7 @@ void __init hv_init_clocksource(void)
 	hyperv_cs = &hyperv_cs_msr;
 	clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100);
 
-	/* sched_clock_register is needed on ARM64 but is a no-op on x86 */
-	sched_clock_register(read_hv_sched_clock_msr, 64, HV_CLOCK_HZ);
+	hv_sched_clock_offset = hyperv_cs->read(hyperv_cs);
+	hv_setup_sched_clock(read_hv_sched_clock_msr);
 }
 EXPORT_SYMBOL_GPL(hv_init_clocksource);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 0becb7d9704d..18d8e2d8210f 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -167,6 +167,7 @@ void hyperv_report_panic(struct pt_regs *regs, long err);
 void hyperv_report_panic_msg(phys_addr_t pa, size_t size);
 bool hv_is_hyperv_initialized(void);
 void hyperv_cleanup(void);
+void hv_setup_sched_clock(void *sched_clock);
 #else /* CONFIG_HYPERV */
 static inline bool hv_is_hyperv_initialized(void) { return false; }
 static inline void hyperv_cleanup(void) {}
-- 
cgit v1.2.3


From 3e2d94535adb2df15f3907e4b4c7cd8a5a4c2b5a Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 22 Aug 2019 10:36:30 +0200
Subject: clocksource/drivers/hyperv: Enable TSC page clocksource on 32bit

There is no particular reason to not enable TSC page clocksource on
32-bit. mul_u64_u64_shr() is available and despite the increased
computational complexity (compared to 64bit) TSC page is still a huge win
compared to MSR-based clocksource.

In-kernel reads:
  MSR based clocksource: 3361 cycles
  TSC page clocksource: 49 cycles

Reads from userspace (utilizing vDSO in case of TSC page):
  MSR based clocksource: 5664 cycles
  TSC page clocksource: 131 cycles

Enabling TSC page on 32bits allows to get rid of CONFIG_HYPERV_TSCPAGE as
it is now not any different from CONFIG_HYPERV_TIMER.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lkml.kernel.org/r/20190822083630.17059-1-vkuznets@redhat.com
---
 arch/x86/include/asm/vdso/gettimeofday.h |  6 +++---
 drivers/clocksource/hyperv_timer.c       | 11 -----------
 drivers/hv/Kconfig                       |  3 ---
 include/clocksource/hyperv_timer.h       |  8 +++-----
 4 files changed, 6 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index ae91429129a6..bcbf901befbe 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -51,7 +51,7 @@ extern struct pvclock_vsyscall_time_info pvclock_page
 	__attribute__((visibility("hidden")));
 #endif
 
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 extern struct ms_hyperv_tsc_page hvclock_page
 	__attribute__((visibility("hidden")));
 #endif
@@ -192,7 +192,7 @@ static u64 vread_pvclock(void)
 }
 #endif
 
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 static u64 vread_hvclock(void)
 {
 	return hv_read_tsc_page(&hvclock_page);
@@ -215,7 +215,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode)
 		return vread_pvclock();
 	}
 #endif
-#ifdef CONFIG_HYPERV_TSCPAGE
+#ifdef CONFIG_HYPERV_TIMER
 	if (clock_mode == VCLOCK_HVCLOCK) {
 		barrier();
 		return vread_hvclock();
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index c322ab4d3689..2317d4e3daaf 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -213,8 +213,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
 struct clocksource *hyperv_cs;
 EXPORT_SYMBOL_GPL(hyperv_cs);
 
-#ifdef CONFIG_HYPERV_TSCPAGE
-
 static struct ms_hyperv_tsc_page tsc_pg __aligned(PAGE_SIZE);
 
 struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
@@ -245,7 +243,6 @@ static struct clocksource hyperv_cs_tsc = {
 	.mask	= CLOCKSOURCE_MASK(64),
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
-#endif
 
 static u64 notrace read_hv_clock_msr(struct clocksource *arg)
 {
@@ -272,7 +269,6 @@ static struct clocksource hyperv_cs_msr = {
 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-#ifdef CONFIG_HYPERV_TSCPAGE
 static bool __init hv_init_tsc_clocksource(void)
 {
 	u64		tsc_msr;
@@ -304,13 +300,6 @@ static bool __init hv_init_tsc_clocksource(void)
 
 	return true;
 }
-#else
-static bool __init hv_init_tsc_clocksource(void)
-{
-	return false;
-}
-#endif
-
 
 void __init hv_init_clocksource(void)
 {
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 9a59957922d4..79e5356a737a 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -14,9 +14,6 @@ config HYPERV
 config HYPERV_TIMER
 	def_bool HYPERV
 
-config HYPERV_TSCPAGE
-       def_bool HYPERV && X86_64
-
 config HYPERV_UTILS
 	tristate "Microsoft Hyper-V Utilities driver"
 	depends on HYPERV && CONNECTOR && NLS
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index a821deb8ecb2..422f5e5237be 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -28,12 +28,10 @@ extern void hv_stimer_cleanup(unsigned int cpu);
 extern void hv_stimer_global_cleanup(void);
 extern void hv_stimer0_isr(void);
 
-#if IS_ENABLED(CONFIG_HYPERV)
+#ifdef CONFIG_HYPERV_TIMER
 extern struct clocksource *hyperv_cs;
 extern void hv_init_clocksource(void);
-#endif /* CONFIG_HYPERV */
 
-#ifdef CONFIG_HYPERV_TSCPAGE
 extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
 
 static inline notrace u64
@@ -91,7 +89,7 @@ hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
 	return hv_read_tsc_page_tsc(tsc_pg, &cur_tsc);
 }
 
-#else /* CONFIG_HYPERV_TSC_PAGE */
+#else /* CONFIG_HYPERV_TIMER */
 static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
 {
 	return NULL;
@@ -102,6 +100,6 @@ static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg,
 {
 	return U64_MAX;
 }
-#endif /* CONFIG_HYPERV_TSCPAGE */
+#endif /* CONFIG_HYPERV_TIMER */
 
 #endif
-- 
cgit v1.2.3


From 428826f5358c922dc378830a1717b682c0823160 Mon Sep 17 00:00:00 2001
From: Hsin-Yi Wang <hsinyi@chromium.org>
Date: Fri, 23 Aug 2019 14:24:51 +0800
Subject: fdt: add support for rng-seed

Introducing a chosen node, rng-seed, which is an entropy that can be
passed to kernel called very early to increase initial device
randomness. Bootloader should provide this entropy and the value is
read from /chosen/rng-seed in DT.

Obtain of_fdt_crc32 for CRC check after early_init_dt_scan_nodes(),
since early_init_dt_scan_chosen() would modify fdt to erase rng-seed.

Add a new interface add_bootloader_randomness() for rng-seed use case.
Depends on whether the seed is trustworthy, rng seed would be passed to
add_hwgenerator_randomness(). Otherwise it would be passed to
add_device_randomness(). Decision is controlled by kernel config
RANDOM_TRUST_BOOTLOADER.

Signed-off-by: Hsin-Yi Wang <hsinyi@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Theodore Ts'o <tytso@mit.edu> # drivers/char/random.c
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/char/Kconfig   |  9 +++++++++
 drivers/char/random.c  | 14 ++++++++++++++
 drivers/of/fdt.c       | 14 ++++++++++++--
 include/linux/random.h |  1 +
 4 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 3e866885a405..2794f4b3f62d 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -573,3 +573,12 @@ config RANDOM_TRUST_CPU
 	has not installed a hidden back door to compromise the CPU's
 	random number generation facilities. This can also be configured
 	at boot with "random.trust_cpu=on/off".
+
+config RANDOM_TRUST_BOOTLOADER
+	bool "Trust the bootloader to initialize Linux's CRNG"
+	help
+	Some bootloaders can provide entropy to increase the kernel's initial
+	device randomness. Say Y here to assume the entropy provided by the
+	booloader is trustworthy so it will be added to the kernel's entropy
+	pool. Otherwise, say N here so it will be regarded as device input that
+	only mixes the entropy pool.
\ No newline at end of file
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5d5ea4ce1442..566922df4b7b 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2445,3 +2445,17 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
 	credit_entropy_bits(poolp, entropy);
 }
 EXPORT_SYMBOL_GPL(add_hwgenerator_randomness);
+
+/* Handle random seed passed by bootloader.
+ * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise
+ * it would be regarded as device data.
+ * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER.
+ */
+void add_bootloader_randomness(const void *buf, unsigned int size)
+{
+	if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER))
+		add_hwgenerator_randomness(buf, size, size * 8);
+	else
+		add_device_randomness(buf, size);
+}
+EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9cdf14b9aaab..7d97ab6d0e31 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -24,6 +24,7 @@
 #include <linux/debugfs.h>
 #include <linux/serial_core.h>
 #include <linux/sysfs.h>
+#include <linux/random.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #include <asm/page.h>
@@ -1044,6 +1045,7 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 {
 	int l;
 	const char *p;
+	const void *rng_seed;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
@@ -1078,6 +1080,14 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 
 	pr_debug("Command line is: %s\n", (char*)data);
 
+	rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l);
+	if (rng_seed && l > 0) {
+		add_bootloader_randomness(rng_seed, l);
+
+		/* try to clear seed so it won't be found. */
+		fdt_nop_property(initial_boot_params, node, "rng-seed");
+	}
+
 	/* break now */
 	return 1;
 }
@@ -1166,8 +1176,6 @@ bool __init early_init_dt_verify(void *params)
 
 	/* Setup flat device-tree pointer */
 	initial_boot_params = params;
-	of_fdt_crc32 = crc32_be(~0, initial_boot_params,
-				fdt_totalsize(initial_boot_params));
 	return true;
 }
 
@@ -1197,6 +1205,8 @@ bool __init early_init_dt_scan(void *params)
 		return false;
 
 	early_init_dt_scan_nodes();
+	of_fdt_crc32 = crc32_be(~0, initial_boot_params,
+				fdt_totalsize(initial_boot_params));
 	return true;
 }
 
diff --git a/include/linux/random.h b/include/linux/random.h
index 1f7dced2bba6..f189c927fdea 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -19,6 +19,7 @@ struct random_ready_callback {
 };
 
 extern void add_device_randomness(const void *, unsigned int);
+extern void add_bootloader_randomness(const void *, unsigned int);
 
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 static inline void add_latent_entropy(void)
-- 
cgit v1.2.3


From bd1200b79510a68554890af2f48d92be6eb1daf8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 23 Aug 2019 18:47:21 +0300
Subject: drop_monitor: Make timestamps y2038 safe

Timestamps are currently communicated to user space as 'struct
timespec', which is not considered y2038 safe since it uses a 32-bit
signed value for seconds.

Fix this while the API is still not part of any official kernel release
by using 64-bit nanoseconds timestamps instead.

Fixes: ca30707dee2b ("drop_monitor: Add packet alert mode")
Fixes: 5e58109b1ea4 ("drop_monitor: Add support for packet alert mode for hardware drops")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/net_dropmon.h |  2 +-
 net/core/drop_monitor.c          | 14 ++++++--------
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 75a35dccb675..8bf79a9eb234 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -75,7 +75,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_PC,				/* u64 */
 	NET_DM_ATTR_SYMBOL,			/* string */
 	NET_DM_ATTR_IN_PORT,			/* nested */
-	NET_DM_ATTR_TIMESTAMP,			/* struct timespec */
+	NET_DM_ATTR_TIMESTAMP,			/* u64 */
 	NET_DM_ATTR_PROTO,			/* u16 */
 	NET_DM_ATTR_PAYLOAD,			/* binary */
 	NET_DM_ATTR_PAD,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index bfc024024aa3..cc60cc22e2db 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -552,7 +552,7 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	       /* NET_DM_ATTR_IN_PORT */
 	       net_dm_in_port_size() +
 	       /* NET_DM_ATTR_TIMESTAMP */
-	       nla_total_size(sizeof(struct timespec)) +
+	       nla_total_size(sizeof(u64)) +
 	       /* NET_DM_ATTR_ORIG_LEN */
 	       nla_total_size(sizeof(u32)) +
 	       /* NET_DM_ATTR_PROTO */
@@ -592,7 +592,6 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
 	char buf[NET_DM_MAX_SYMBOL_LEN];
 	struct nlattr *attr;
-	struct timespec ts;
 	void *hdr;
 	int rc;
 
@@ -615,8 +614,8 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	if (rc)
 		goto nla_put_failure;
 
-	if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
-	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+			      ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
 		goto nla_put_failure;
 
 	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
@@ -716,7 +715,7 @@ net_dm_hw_packet_report_size(size_t payload_len,
 	       /* NET_DM_ATTR_IN_PORT */
 	       net_dm_in_port_size() +
 	       /* NET_DM_ATTR_TIMESTAMP */
-	       nla_total_size(sizeof(struct timespec)) +
+	       nla_total_size(sizeof(u64)) +
 	       /* NET_DM_ATTR_ORIG_LEN */
 	       nla_total_size(sizeof(u32)) +
 	       /* NET_DM_ATTR_PROTO */
@@ -730,7 +729,6 @@ static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
 {
 	struct net_dm_hw_metadata *hw_metadata;
 	struct nlattr *attr;
-	struct timespec ts;
 	void *hdr;
 
 	hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
@@ -761,8 +759,8 @@ static int net_dm_hw_packet_report_fill(struct sk_buff *msg,
 			goto nla_put_failure;
 	}
 
-	if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
-	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP,
+			      ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD))
 		goto nla_put_failure;
 
 	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
-- 
cgit v1.2.3


From 47e4937a4a7ca4184fd282791dfee76c6799966a Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@aol.com>
Date: Fri, 23 Aug 2019 05:36:59 +0800
Subject: erofs: move erofs out of staging

EROFS filesystem has been merged into linux-staging for a year.

EROFS is designed to be a better solution of saving extra storage
space with guaranteed end-to-end performance for read-only files
with the help of reduced metadata, fixed-sized output compression
and decompression inplace technologies.

In the past year, EROFS was greatly improved by many people as
a staging driver, self-tested, betaed by a large number of our
internal users, successfully applied to almost all in-service
HUAWEI smartphones as the part of EMUI 9.1 and proven to be stable
enough to be moved out of staging.

EROFS is a self-contained filesystem driver. Although there are
still some TODOs to be more generic, we have a dedicated team
actively keeping on working on EROFS in order to make it better
with the evolution of Linux kernel as the other in-kernel filesystems.

As Pavel suggested, it's better to do as one commit since git
can do moves and all histories will be saved in this way.

Let's promote it from staging and enhance it more actively as
a "real" part of kernel for more wider scenarios!

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Pavel Machek <pavel@denx.de>
Cc: David Sterba <dsterba@suse.cz>
Cc: Amir Goldstein <amir73il@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Darrick J . Wong <darrick.wong@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Richard Weinberger <richard@nod.at>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Chao Yu <yuchao0@huawei.com>
Cc: Miao Xie <miaoxie@huawei.com>
Cc: Li Guifu <bluce.liguifu@huawei.com>
Cc: Fang Wei <fangwei1@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Link: https://lore.kernel.org/r/20190822213659.5501-1-hsiangkao@aol.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/erofs.txt                |  219 +++
 MAINTAINERS                                        |   14 +-
 drivers/staging/Kconfig                            |    2 -
 drivers/staging/Makefile                           |    1 -
 .../erofs/Documentation/filesystems/erofs.txt      |  223 ---
 drivers/staging/erofs/Kconfig                      |   98 --
 drivers/staging/erofs/Makefile                     |   13 -
 drivers/staging/erofs/TODO                         |   46 -
 drivers/staging/erofs/compress.h                   |   62 -
 drivers/staging/erofs/data.c                       |  425 ------
 drivers/staging/erofs/decompressor.c               |  360 -----
 drivers/staging/erofs/dir.c                        |  141 --
 drivers/staging/erofs/erofs_fs.h                   |  310 -----
 drivers/staging/erofs/include/trace/events/erofs.h |  256 ----
 drivers/staging/erofs/inode.c                      |  334 -----
 drivers/staging/erofs/internal.h                   |  554 --------
 drivers/staging/erofs/namei.c                      |  253 ----
 drivers/staging/erofs/super.c                      |  671 ---------
 drivers/staging/erofs/tagptr.h                     |  110 --
 drivers/staging/erofs/utils.c                      |  335 -----
 drivers/staging/erofs/xattr.c                      |  705 ----------
 drivers/staging/erofs/xattr.h                      |   94 --
 drivers/staging/erofs/zdata.c                      | 1434 --------------------
 drivers/staging/erofs/zdata.h                      |  195 ---
 drivers/staging/erofs/zmap.c                       |  468 -------
 drivers/staging/erofs/zpvec.h                      |  159 ---
 fs/Kconfig                                         |    1 +
 fs/Makefile                                        |    1 +
 fs/erofs/Kconfig                                   |   98 ++
 fs/erofs/Makefile                                  |   11 +
 fs/erofs/compress.h                                |   60 +
 fs/erofs/data.c                                    |  423 ++++++
 fs/erofs/decompressor.c                            |  358 +++++
 fs/erofs/dir.c                                     |  139 ++
 fs/erofs/erofs_fs.h                                |  307 +++++
 fs/erofs/inode.c                                   |  332 +++++
 fs/erofs/internal.h                                |  553 ++++++++
 fs/erofs/namei.c                                   |  251 ++++
 fs/erofs/super.c                                   |  669 +++++++++
 fs/erofs/tagptr.h                                  |  110 ++
 fs/erofs/utils.c                                   |  333 +++++
 fs/erofs/xattr.c                                   |  703 ++++++++++
 fs/erofs/xattr.h                                   |   92 ++
 fs/erofs/zdata.c                                   | 1432 +++++++++++++++++++
 fs/erofs/zdata.h                                   |  193 +++
 fs/erofs/zmap.c                                    |  466 +++++++
 fs/erofs/zpvec.h                                   |  157 +++
 include/trace/events/erofs.h                       |  256 ++++
 include/uapi/linux/magic.h                         |    1 +
 49 files changed, 7172 insertions(+), 7256 deletions(-)
 create mode 100644 Documentation/filesystems/erofs.txt
 delete mode 100644 drivers/staging/erofs/Documentation/filesystems/erofs.txt
 delete mode 100644 drivers/staging/erofs/Kconfig
 delete mode 100644 drivers/staging/erofs/Makefile
 delete mode 100644 drivers/staging/erofs/TODO
 delete mode 100644 drivers/staging/erofs/compress.h
 delete mode 100644 drivers/staging/erofs/data.c
 delete mode 100644 drivers/staging/erofs/decompressor.c
 delete mode 100644 drivers/staging/erofs/dir.c
 delete mode 100644 drivers/staging/erofs/erofs_fs.h
 delete mode 100644 drivers/staging/erofs/include/trace/events/erofs.h
 delete mode 100644 drivers/staging/erofs/inode.c
 delete mode 100644 drivers/staging/erofs/internal.h
 delete mode 100644 drivers/staging/erofs/namei.c
 delete mode 100644 drivers/staging/erofs/super.c
 delete mode 100644 drivers/staging/erofs/tagptr.h
 delete mode 100644 drivers/staging/erofs/utils.c
 delete mode 100644 drivers/staging/erofs/xattr.c
 delete mode 100644 drivers/staging/erofs/xattr.h
 delete mode 100644 drivers/staging/erofs/zdata.c
 delete mode 100644 drivers/staging/erofs/zdata.h
 delete mode 100644 drivers/staging/erofs/zmap.c
 delete mode 100644 drivers/staging/erofs/zpvec.h
 create mode 100644 fs/erofs/Kconfig
 create mode 100644 fs/erofs/Makefile
 create mode 100644 fs/erofs/compress.h
 create mode 100644 fs/erofs/data.c
 create mode 100644 fs/erofs/decompressor.c
 create mode 100644 fs/erofs/dir.c
 create mode 100644 fs/erofs/erofs_fs.h
 create mode 100644 fs/erofs/inode.c
 create mode 100644 fs/erofs/internal.h
 create mode 100644 fs/erofs/namei.c
 create mode 100644 fs/erofs/super.c
 create mode 100644 fs/erofs/tagptr.h
 create mode 100644 fs/erofs/utils.c
 create mode 100644 fs/erofs/xattr.c
 create mode 100644 fs/erofs/xattr.h
 create mode 100644 fs/erofs/zdata.c
 create mode 100644 fs/erofs/zdata.h
 create mode 100644 fs/erofs/zmap.c
 create mode 100644 fs/erofs/zpvec.h
 create mode 100644 include/trace/events/erofs.h

(limited to 'include')

diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.txt
new file mode 100644
index 000000000000..38aa9126ec98
--- /dev/null
+++ b/Documentation/filesystems/erofs.txt
@@ -0,0 +1,219 @@
+Overview
+========
+
+EROFS file-system stands for Enhanced Read-Only File System. Different
+from other read-only file systems, it aims to be designed for flexibility,
+scalability, but be kept simple and high performance.
+
+It is designed as a better filesystem solution for the following scenarios:
+ - read-only storage media or
+
+ - part of a fully trusted read-only solution, which means it needs to be
+   immutable and bit-for-bit identical to the official golden image for
+   their releases due to security and other considerations and
+
+ - hope to save some extra storage space with guaranteed end-to-end performance
+   by using reduced metadata and transparent file compression, especially
+   for those embedded devices with limited memory (ex, smartphone);
+
+Here is the main features of EROFS:
+ - Little endian on-disk design;
+
+ - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
+
+ - Metadata & data could be mixed by design;
+
+ - 2 inode versions for different requirements:
+                          v1            v2
+   Inode metadata size:   32 bytes      64 bytes
+   Max file size:         4 GB          16 EB (also limited by max. vol size)
+   Max uids/gids:         65536         4294967296
+   File creation time:    no            yes (64 + 32-bit timestamp)
+   Max hardlinks:         65536         4294967296
+   Metadata reserved:     4 bytes       14 bytes
+
+ - Support extended attributes (xattrs) as an option;
+
+ - Support xattr inline and tail-end data inline for all files;
+
+ - Support POSIX.1e ACLs by using xattrs;
+
+ - Support transparent file compression as an option:
+   LZ4 algorithm with 4 KB fixed-output compression for high performance;
+
+The following git tree provides the file system user-space tools under
+development (ex, formatting tool mkfs.erofs):
+>> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
+
+Bugs and patches are welcome, please kindly help us and send to the following
+linux-erofs mailing list:
+>> linux-erofs mailing list   <linux-erofs@lists.ozlabs.org>
+
+Mount options
+=============
+
+fault_injection=%d     Enable fault injection in all supported types with
+                       specified injection rate. Supported injection type:
+                       Type_Name                Type_Value
+                       FAULT_KMALLOC            0x000000001
+                       FAULT_READ_IO            0x000000002
+(no)user_xattr         Setup Extended User Attributes. Note: xattr is enabled
+                       by default if CONFIG_EROFS_FS_XATTR is selected.
+(no)acl                Setup POSIX Access Control List. Note: acl is enabled
+                       by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
+cache_strategy=%s      Select a strategy for cached decompression from now on:
+                         disabled: In-place I/O decompression only;
+                        readahead: Cache the last incomplete compressed physical
+                                   cluster for further reading. It still does
+                                   in-place I/O decompression for the rest
+                                   compressed physical clusters;
+                       readaround: Cache the both ends of incomplete compressed
+                                   physical clusters for further reading.
+                                   It still does in-place I/O decompression
+                                   for the rest compressed physical clusters.
+
+Module parameters
+=================
+use_vmap=[0|1]         Use vmap() instead of vm_map_ram() (default 0).
+
+On-disk details
+===============
+
+Summary
+-------
+Different from other read-only file systems, an EROFS volume is designed
+to be as simple as possible:
+
+                                |-> aligned with the block size
+   ____________________________________________________________
+  | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
+  |_|__|_|_____|__________|_____|______|__________|_____|______|
+  0 +1K
+
+All data areas should be aligned with the block size, but metadata areas
+may not. All metadatas can be now observed in two different spaces (views):
+ 1. Inode metadata space
+    Each valid inode should be aligned with an inode slot, which is a fixed
+    value (32 bytes) and designed to be kept in line with v1 inode size.
+
+    Each inode can be directly found with the following formula:
+         inode offset = meta_blkaddr * block_size + 32 * nid
+
+                                |-> aligned with 8B
+                                           |-> followed closely
+    + meta_blkaddr blocks                                      |-> another slot
+     _____________________________________________________________________
+    |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
+    |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+             |-> aligned with the inode slot size
+                  .                   .
+                .                         .
+              .                              .
+            .                                    .
+          .                                         .
+        .                                              .
+      .____________________________________________________|-> aligned with 4B
+      | xattr_ibody_header | shared xattrs | inline xattrs |
+      |____________________|_______________|_______________|
+      |->    12 bytes    <-|->x * 4 bytes<-|               .
+                          .                .                 .
+                    .                      .                   .
+               .                           .                     .
+           ._______________________________.______________________.
+           | id | id | id | id |  ... | id | ent | ... | ent| ... |
+           |____|____|____|____|______|____|_____|_____|____|_____|
+                                           |-> aligned with 4B
+                                                       |-> aligned with 4B
+
+    Inode could be 32 or 64 bytes, which can be distinguished from a common
+    field which all inode versions have -- i_advise:
+
+        __________________               __________________
+       |     i_advise     |             |     i_advise     |
+       |__________________|             |__________________|
+       |        ...       |             |        ...       |
+       |                  |             |                  |
+       |__________________| 32 bytes    |                  |
+                                        |                  |
+                                        |__________________| 64 bytes
+
+    Xattrs, extents, data inline are followed by the corresponding inode with
+    proper alignes, and they could be optional for different data mappings,
+    _currently_ there are totally 3 valid data mappings supported:
+
+     1) flat file data without data inline (no extent);
+     2) fixed-output size data compression (must have extents);
+     3) flat file data with tail-end data inline (no extent);
+
+    The size of the optional xattrs is indicated by i_xattr_count in inode
+    header. Large xattrs or xattrs shared by many different files can be
+    stored in shared xattrs metadata rather than inlined right after inode.
+
+ 2. Shared xattrs metadata space
+    Shared xattrs space is similar to the above inode space, started with
+    a specific block indicated by xattr_blkaddr, organized one by one with
+    proper align.
+
+    Each share xattr can also be directly found by the following formula:
+         xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
+
+                           |-> aligned by  4 bytes
+    + xattr_blkaddr blocks                     |-> aligned with 4 bytes
+     _________________________________________________________________________
+    |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
+    |________|_____________|_____________|_____|______________|_______________
+
+Directories
+-----------
+All directories are now organized in a compact on-disk format. Note that
+each directory block is divided into index and name areas in order to support
+random file lookup, and all directory entries are _strictly_ recorded in
+alphabetical order in order to support improved prefix binary search
+algorithm (could refer to the related source code).
+
+                 ___________________________
+                /                           |
+               /              ______________|________________
+              /              /              | nameoff1       | nameoffN-1
+ ____________.______________._______________v________________v__________
+| dirent | dirent | ... | dirent | filename | filename | ... | filename |
+|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+     \                           ^
+      \                          |                           * could have
+       \                         |                             trailing '\0'
+        \________________________| nameoff0
+
+                             Directory block
+
+Note that apart from the offset of the first filename, nameoff0 also indicates
+the total number of directory entries in this block since it is no need to
+introduce another on-disk field at all.
+
+Compression
+-----------
+Currently, EROFS supports 4KB fixed-output clustersize transparent file
+compression, as illustrated below:
+
+         |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
+         clusterofs                      clusterofs            clusterofs
+         |                               |                     |   logical data
+_________v_______________________________v_____________________v_______________
+... |    .        |             |        .    |             |  .          | ...
+____|____.________|_____________|________.____|_____________|__.__________|____
+    |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
+         size          size          size          size          size
+          .                             .                .                   .
+           .                       .               .                  .
+            .                  .              .                .
+      _______._____________._____________._____________._____________________
+         ... |             |             |             | ... physical data
+      _______|_____________|_____________|_____________|_____________________
+             |-> cluster <-|-> cluster <-|-> cluster <-|
+                  size          size          size
+
+Currently each on-disk physical cluster can contain 4KB (un)compressed data
+at most. For each logical cluster, there is a corresponding on-disk index to
+describe its cluster type, physical cluster address, etc.
+
+See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 6847372cfab8..0f38cba2c581 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6046,6 +6046,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kristoffer/linux-hpc.git
 F:	drivers/video/fbdev/s1d13xxxfb.c
 F:	include/video/s1d13xxxfb.h
 
+EROFS FILE SYSTEM
+M:	Gao Xiang <gaoxiang25@huawei.com>
+M:	Chao Yu <yuchao0@huawei.com>
+L:	linux-erofs@lists.ozlabs.org
+S:	Maintained
+F:	fs/erofs/
+
 ERRSEQ ERROR TRACKING INFRASTRUCTURE
 M:	Jeff Layton <jlayton@kernel.org>
 S:	Maintained
@@ -15229,13 +15236,6 @@ M:	H Hartley Sweeten <hsweeten@visionengravers.com>
 S:	Odd Fixes
 F:	drivers/staging/comedi/
 
-STAGING - EROFS FILE SYSTEM
-M:	Gao Xiang <gaoxiang25@huawei.com>
-M:	Chao Yu <yuchao0@huawei.com>
-L:	linux-erofs@lists.ozlabs.org
-S:	Maintained
-F:	drivers/staging/erofs/
-
 STAGING - FIELDBUS SUBSYSTEM
 M:	Sven Van Asbroeck <TheSven73@gmail.com>
 S:	Maintained
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 7c96a01eef6c..d972ec8e71fb 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -112,8 +112,6 @@ source "drivers/staging/gasket/Kconfig"
 
 source "drivers/staging/axis-fifo/Kconfig"
 
-source "drivers/staging/erofs/Kconfig"
-
 source "drivers/staging/fieldbus/Kconfig"
 
 source "drivers/staging/kpc2000/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index fcaac9693b83..6018b9a4a077 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -46,7 +46,6 @@ obj-$(CONFIG_DMA_RALINK)	+= ralink-gdma/
 obj-$(CONFIG_SOC_MT7621)	+= mt7621-dts/
 obj-$(CONFIG_STAGING_GASKET_FRAMEWORK)	+= gasket/
 obj-$(CONFIG_XIL_AXIS_FIFO)	+= axis-fifo/
-obj-$(CONFIG_EROFS_FS)		+= erofs/
 obj-$(CONFIG_FIELDBUS_DEV)     += fieldbus/
 obj-$(CONFIG_KPC2000)		+= kpc2000/
 obj-$(CONFIG_ISDN_CAPI)		+= isdn/
diff --git a/drivers/staging/erofs/Documentation/filesystems/erofs.txt b/drivers/staging/erofs/Documentation/filesystems/erofs.txt
deleted file mode 100644
index 0eab600ca7ca..000000000000
--- a/drivers/staging/erofs/Documentation/filesystems/erofs.txt
+++ /dev/null
@@ -1,223 +0,0 @@
-Overview
-========
-
-EROFS file-system stands for Enhanced Read-Only File System. Different
-from other read-only file systems, it aims to be designed for flexibility,
-scalability, but be kept simple and high performance.
-
-It is designed as a better filesystem solution for the following scenarios:
- - read-only storage media or
-
- - part of a fully trusted read-only solution, which means it needs to be
-   immutable and bit-for-bit identical to the official golden image for
-   their releases due to security and other considerations and
-
- - hope to save some extra storage space with guaranteed end-to-end performance
-   by using reduced metadata and transparent file compression, especially
-   for those embedded devices with limited memory (ex, smartphone);
-
-Here is the main features of EROFS:
- - Little endian on-disk design;
-
- - Currently 4KB block size (nobh) and therefore maximum 16TB address space;
-
- - Metadata & data could be mixed by design;
-
- - 2 inode versions for different requirements:
-                          v1            v2
-   Inode metadata size:   32 bytes      64 bytes
-   Max file size:         4 GB          16 EB (also limited by max. vol size)
-   Max uids/gids:         65536         4294967296
-   File creation time:    no            yes (64 + 32-bit timestamp)
-   Max hardlinks:         65536         4294967296
-   Metadata reserved:     4 bytes       14 bytes
-
- - Support extended attributes (xattrs) as an option;
-
- - Support xattr inline and tail-end data inline for all files;
-
- - Support POSIX.1e ACLs by using xattrs;
-
- - Support transparent file compression as an option:
-   LZ4 algorithm with 4 KB fixed-output compression for high performance;
-
-The following git tree provides the file system user-space tools under
-development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
-
-Bugs and patches are welcome, please kindly help us and send to the following
-linux-erofs mailing list:
->> linux-erofs mailing list   <linux-erofs@lists.ozlabs.org>
-
-Note that EROFS is still working in progress as a Linux staging driver,
-Cc the staging mailing list as well is highly recommended:
->> Linux Driver Project Developer List <devel@driverdev.osuosl.org>
-
-Mount options
-=============
-
-fault_injection=%d     Enable fault injection in all supported types with
-                       specified injection rate. Supported injection type:
-                       Type_Name                Type_Value
-                       FAULT_KMALLOC            0x000000001
-                       FAULT_READ_IO            0x000000002
-(no)user_xattr         Setup Extended User Attributes. Note: xattr is enabled
-                       by default if CONFIG_EROFS_FS_XATTR is selected.
-(no)acl                Setup POSIX Access Control List. Note: acl is enabled
-                       by default if CONFIG_EROFS_FS_POSIX_ACL is selected.
-cache_strategy=%s      Select a strategy for cached decompression from now on:
-                         disabled: In-place I/O decompression only;
-                        readahead: Cache the last incomplete compressed physical
-                                   cluster for further reading. It still does
-                                   in-place I/O decompression for the rest
-                                   compressed physical clusters;
-                       readaround: Cache the both ends of incomplete compressed
-                                   physical clusters for further reading.
-                                   It still does in-place I/O decompression
-                                   for the rest compressed physical clusters.
-
-Module parameters
-=================
-use_vmap=[0|1]         Use vmap() instead of vm_map_ram() (default 0).
-
-On-disk details
-===============
-
-Summary
--------
-Different from other read-only file systems, an EROFS volume is designed
-to be as simple as possible:
-
-                                |-> aligned with the block size
-   ____________________________________________________________
-  | |SB| | ... | Metadata | ... | Data | Metadata | ... | Data |
-  |_|__|_|_____|__________|_____|______|__________|_____|______|
-  0 +1K
-
-All data areas should be aligned with the block size, but metadata areas
-may not. All metadatas can be now observed in two different spaces (views):
- 1. Inode metadata space
-    Each valid inode should be aligned with an inode slot, which is a fixed
-    value (32 bytes) and designed to be kept in line with v1 inode size.
-
-    Each inode can be directly found with the following formula:
-         inode offset = meta_blkaddr * block_size + 32 * nid
-
-                                |-> aligned with 8B
-                                           |-> followed closely
-    + meta_blkaddr blocks                                      |-> another slot
-     _____________________________________________________________________
-    |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
-    |________|_______|(optional)|(optional)|__(optional)_|_____|__________
-             |-> aligned with the inode slot size
-                  .                   .
-                .                         .
-              .                              .
-            .                                    .
-          .                                         .
-        .                                              .
-      .____________________________________________________|-> aligned with 4B
-      | xattr_ibody_header | shared xattrs | inline xattrs |
-      |____________________|_______________|_______________|
-      |->    12 bytes    <-|->x * 4 bytes<-|               .
-                          .                .                 .
-                    .                      .                   .
-               .                           .                     .
-           ._______________________________.______________________.
-           | id | id | id | id |  ... | id | ent | ... | ent| ... |
-           |____|____|____|____|______|____|_____|_____|____|_____|
-                                           |-> aligned with 4B
-                                                       |-> aligned with 4B
-
-    Inode could be 32 or 64 bytes, which can be distinguished from a common
-    field which all inode versions have -- i_advise:
-
-        __________________               __________________
-       |     i_advise     |             |     i_advise     |
-       |__________________|             |__________________|
-       |        ...       |             |        ...       |
-       |                  |             |                  |
-       |__________________| 32 bytes    |                  |
-                                        |                  |
-                                        |__________________| 64 bytes
-
-    Xattrs, extents, data inline are followed by the corresponding inode with
-    proper alignes, and they could be optional for different data mappings,
-    _currently_ there are totally 3 valid data mappings supported:
-
-     1) flat file data without data inline (no extent);
-     2) fixed-output size data compression (must have extents);
-     3) flat file data with tail-end data inline (no extent);
-
-    The size of the optional xattrs is indicated by i_xattr_count in inode
-    header. Large xattrs or xattrs shared by many different files can be
-    stored in shared xattrs metadata rather than inlined right after inode.
-
- 2. Shared xattrs metadata space
-    Shared xattrs space is similar to the above inode space, started with
-    a specific block indicated by xattr_blkaddr, organized one by one with
-    proper align.
-
-    Each share xattr can also be directly found by the following formula:
-         xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
-
-                           |-> aligned by  4 bytes
-    + xattr_blkaddr blocks                     |-> aligned with 4 bytes
-     _________________________________________________________________________
-    |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
-    |________|_____________|_____________|_____|______________|_______________
-
-Directories
------------
-All directories are now organized in a compact on-disk format. Note that
-each directory block is divided into index and name areas in order to support
-random file lookup, and all directory entries are _strictly_ recorded in
-alphabetical order in order to support improved prefix binary search
-algorithm (could refer to the related source code).
-
-                 ___________________________
-                /                           |
-               /              ______________|________________
-              /              /              | nameoff1       | nameoffN-1
- ____________.______________._______________v________________v__________
-| dirent | dirent | ... | dirent | filename | filename | ... | filename |
-|___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
-     \                           ^
-      \                          |                           * could have
-       \                         |                             trailing '\0'
-        \________________________| nameoff0
-
-                             Directory block
-
-Note that apart from the offset of the first filename, nameoff0 also indicates
-the total number of directory entries in this block since it is no need to
-introduce another on-disk field at all.
-
-Compression
------------
-Currently, EROFS supports 4KB fixed-output clustersize transparent file
-compression, as illustrated below:
-
-         |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
-         clusterofs                      clusterofs            clusterofs
-         |                               |                     |   logical data
-_________v_______________________________v_____________________v_______________
-... |    .        |             |        .    |             |  .          | ...
-____|____.________|_____________|________.____|_____________|__.__________|____
-    |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
-         size          size          size          size          size
-          .                             .                .                   .
-           .                       .               .                  .
-            .                  .              .                .
-      _______._____________._____________._____________._____________________
-         ... |             |             |             | ... physical data
-      _______|_____________|_____________|_____________|_____________________
-             |-> cluster <-|-> cluster <-|-> cluster <-|
-                  size          size          size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
-
diff --git a/drivers/staging/erofs/Kconfig b/drivers/staging/erofs/Kconfig
deleted file mode 100644
index 16316d1adca3..000000000000
--- a/drivers/staging/erofs/Kconfig
+++ /dev/null
@@ -1,98 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-config EROFS_FS
-	tristate "EROFS filesystem support"
-	depends on BLOCK
-	help
-	  EROFS (Enhanced Read-Only File System) is a lightweight
-	  read-only file system with modern designs (eg. page-sized
-	  blocks, inline xattrs/data, etc.) for scenarios which need
-	  high-performance read-only requirements, e.g. Android OS
-	  for mobile phones and LIVECDs.
-
-	  It also provides fixed-sized output compression support,
-	  which improves storage density, keeps relatively higher
-	  compression ratios, which is more useful to achieve high
-	  performance for embedded devices with limited memory.
-
-	  If unsure, say N.
-
-config EROFS_FS_DEBUG
-	bool "EROFS debugging feature"
-	depends on EROFS_FS
-	help
-	  Print debugging messages and enable more BUG_ONs which check
-	  filesystem consistency and find potential issues aggressively,
-	  which can be used for Android eng build, for example.
-
-	  For daily use, say N.
-
-config EROFS_FAULT_INJECTION
-	bool "EROFS fault injection facility"
-	depends on EROFS_FS
-	help
-	  Test EROFS to inject faults such as ENOMEM, EIO, and so on.
-	  If unsure, say N.
-
-config EROFS_FS_XATTR
-	bool "EROFS extended attributes"
-	depends on EROFS_FS
-	default y
-	help
-	  Extended attributes are name:value pairs associated with inodes by
-	  the kernel or by users (see the attr(5) manual page, or visit
-	  <http://acl.bestbits.at/> for details).
-
-	  If unsure, say N.
-
-config EROFS_FS_POSIX_ACL
-	bool "EROFS Access Control Lists"
-	depends on EROFS_FS_XATTR
-	select FS_POSIX_ACL
-	default y
-	help
-	  Posix Access Control Lists (ACLs) support permissions for users and
-	  groups beyond the owner/group/world scheme.
-
-	  To learn more about Access Control Lists, visit the POSIX ACLs for
-	  Linux website <http://acl.bestbits.at/>.
-
-	  If you don't know what Access Control Lists are, say N.
-
-config EROFS_FS_SECURITY
-	bool "EROFS Security Labels"
-	depends on EROFS_FS_XATTR
-	default y
-	help
-	  Security labels provide an access control facility to support Linux
-	  Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
-	  Linux. This option enables an extended attribute handler for file
-	  security labels in the erofs filesystem, so that it requires enabling
-	  the extended attribute support in advance.
-
-	  If you are not using a security module, say N.
-
-config EROFS_FS_ZIP
-	bool "EROFS Data Compression Support"
-	depends on EROFS_FS
-	select LZ4_DECOMPRESS
-	default y
-	help
-	  Enable fixed-sized output compression for EROFS.
-
-	  If you don't want to enable compression feature, say N.
-
-config EROFS_FS_CLUSTER_PAGE_LIMIT
-	int "EROFS Cluster Pages Hard Limit"
-	depends on EROFS_FS_ZIP
-	range 1 256
-	default "1"
-	help
-	  Indicates maximum # of pages of a compressed
-	  physical cluster.
-
-	  For example, if files in a image were compressed
-	  into 8k-unit, hard limit should not be configured
-	  less than 2. Otherwise, the image will be refused
-	  to mount on this kernel.
-
diff --git a/drivers/staging/erofs/Makefile b/drivers/staging/erofs/Makefile
deleted file mode 100644
index 5cdae21cb5af..000000000000
--- a/drivers/staging/erofs/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-EROFS_VERSION = "1.0pre1"
-
-ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"
-
-obj-$(CONFIG_EROFS_FS) += erofs.o
-# staging requirement: to be self-contained in its own directory
-ccflags-y += -I $(srctree)/$(src)/include
-erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
-erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
-
diff --git a/drivers/staging/erofs/TODO b/drivers/staging/erofs/TODO
deleted file mode 100644
index a8608b2f72bd..000000000000
--- a/drivers/staging/erofs/TODO
+++ /dev/null
@@ -1,46 +0,0 @@
-
-EROFS is still working in progress, thus it is not suitable
-for all productive uses. play at your own risk :)
-
-TODO List:
- - add the missing error handling code
-   (mainly existed in xattr and decompression submodules);
-
- - finalize erofs ondisk format design  (which means that
-   minor on-disk revisions could happen later);
-
- - documentation and detailed technical analysis;
-
- - general code review and clean up
-   (including confusing variable names and code snippets);
-
- - support larger compressed clustersizes for selection
-   (currently erofs only works as expected with the page-sized
-    compressed cluster configuration, usually 4KB);
-
- - support more lossless data compression algorithms
-   in addition to LZ4 algorithms in VLE approach;
-
- - data deduplication and other useful features.
-
-The following git tree provides the file system user-space
-tools under development (ex, formatting tool mkfs.erofs):
->> git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs-utils.git
-
-The open-source development of erofs-utils is at the early stage.
-Contact the original author Li Guifu <bluce.liguifu@huawei.com> and
-the co-maintainer Fang Wei <fangwei1@huawei.com> for the latest news
-and more details.
-
-Code, suggestions, etc, are welcome. Please feel free to
-ask and send patches,
-
-To:
-  linux-erofs mailing list   <linux-erofs@lists.ozlabs.org>
-  Gao Xiang                  <gaoxiang25@huawei.com>
-  Chao Yu                    <yuchao0@huawei.com>
-
-Cc: (for linux-kernel upstream patches)
-  Greg Kroah-Hartman         <gregkh@linuxfoundation.org>
-  linux-staging mailing list <devel@driverdev.osuosl.org>
-
diff --git a/drivers/staging/erofs/compress.h b/drivers/staging/erofs/compress.h
deleted file mode 100644
index 043013f9ef1b..000000000000
--- a/drivers/staging/erofs/compress.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/compress.h
- *
- * Copyright (C) 2019 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_COMPRESS_H
-#define __EROFS_FS_COMPRESS_H
-
-#include "internal.h"
-
-enum {
-	Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
-	Z_EROFS_COMPRESSION_RUNTIME_MAX
-};
-
-struct z_erofs_decompress_req {
-	struct super_block *sb;
-	struct page **in, **out;
-
-	unsigned short pageofs_out;
-	unsigned int inputsize, outputsize;
-
-	/* indicate the algorithm will be used for decompression */
-	unsigned int alg;
-	bool inplace_io, partial_decoding;
-};
-
-/*
- * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
- * used to mark temporary allocated pages from other
- * file/cached pages and NULL mapping pages.
- */
-#define Z_EROFS_MAPPING_STAGING         ((void *)0x5A110C8D)
-
-/* check if a page is marked as staging */
-static inline bool z_erofs_page_is_staging(struct page *page)
-{
-	return page->mapping == Z_EROFS_MAPPING_STAGING;
-}
-
-static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
-					   struct page *page)
-{
-	if (!z_erofs_page_is_staging(page))
-		return false;
-
-	/* staging pages should not be used by others at the same time */
-	if (page_ref_count(page) > 1)
-		put_page(page);
-	else
-		list_add(&page->lru, pagepool);
-	return true;
-}
-
-int z_erofs_decompress(struct z_erofs_decompress_req *rq,
-		       struct list_head *pagepool);
-
-#endif
-
diff --git a/drivers/staging/erofs/data.c b/drivers/staging/erofs/data.c
deleted file mode 100644
index 72c4b4c5296b..000000000000
--- a/drivers/staging/erofs/data.c
+++ /dev/null
@@ -1,425 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/data.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "internal.h"
-#include <linux/prefetch.h>
-
-#include <trace/events/erofs.h>
-
-static inline void read_endio(struct bio *bio)
-{
-	struct super_block *const sb = bio->bi_private;
-	struct bio_vec *bvec;
-	blk_status_t err = bio->bi_status;
-	struct bvec_iter_all iter_all;
-
-	if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
-		erofs_show_injection_info(FAULT_READ_IO);
-		err = BLK_STS_IOERR;
-	}
-
-	bio_for_each_segment_all(bvec, bio, iter_all) {
-		struct page *page = bvec->bv_page;
-
-		/* page is already locked */
-		DBG_BUGON(PageUptodate(page));
-
-		if (unlikely(err))
-			SetPageError(page);
-		else
-			SetPageUptodate(page);
-
-		unlock_page(page);
-		/* page could be reclaimed now */
-	}
-	bio_put(bio);
-}
-
-/* prio -- true is used for dir */
-struct page *__erofs_get_meta_page(struct super_block *sb,
-				   erofs_blk_t blkaddr, bool prio, bool nofail)
-{
-	struct inode *const bd_inode = sb->s_bdev->bd_inode;
-	struct address_space *const mapping = bd_inode->i_mapping;
-	/* prefer retrying in the allocator to blindly looping below */
-	const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) |
-		(nofail ? __GFP_NOFAIL : 0);
-	unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
-	struct page *page;
-	int err;
-
-repeat:
-	page = find_or_create_page(mapping, blkaddr, gfp);
-	if (unlikely(!page)) {
-		DBG_BUGON(nofail);
-		return ERR_PTR(-ENOMEM);
-	}
-	DBG_BUGON(!PageLocked(page));
-
-	if (!PageUptodate(page)) {
-		struct bio *bio;
-
-		bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
-		if (IS_ERR(bio)) {
-			DBG_BUGON(nofail);
-			err = PTR_ERR(bio);
-			goto err_out;
-		}
-
-		err = bio_add_page(bio, page, PAGE_SIZE, 0);
-		if (unlikely(err != PAGE_SIZE)) {
-			err = -EFAULT;
-			goto err_out;
-		}
-
-		__submit_bio(bio, REQ_OP_READ,
-			     REQ_META | (prio ? REQ_PRIO : 0));
-
-		lock_page(page);
-
-		/* this page has been truncated by others */
-		if (unlikely(page->mapping != mapping)) {
-unlock_repeat:
-			unlock_page(page);
-			put_page(page);
-			goto repeat;
-		}
-
-		/* more likely a read error */
-		if (unlikely(!PageUptodate(page))) {
-			if (io_retries) {
-				--io_retries;
-				goto unlock_repeat;
-			}
-			err = -EIO;
-			goto err_out;
-		}
-	}
-	return page;
-
-err_out:
-	unlock_page(page);
-	put_page(page);
-	return ERR_PTR(err);
-}
-
-static int erofs_map_blocks_flatmode(struct inode *inode,
-				     struct erofs_map_blocks *map,
-				     int flags)
-{
-	int err = 0;
-	erofs_blk_t nblocks, lastblk;
-	u64 offset = map->m_la;
-	struct erofs_vnode *vi = EROFS_V(inode);
-
-	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
-
-	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
-	lastblk = nblocks - is_inode_flat_inline(inode);
-
-	if (unlikely(offset >= inode->i_size)) {
-		/* leave out-of-bound access unmapped */
-		map->m_flags = 0;
-		map->m_plen = 0;
-		goto out;
-	}
-
-	/* there is no hole in flatmode */
-	map->m_flags = EROFS_MAP_MAPPED;
-
-	if (offset < blknr_to_addr(lastblk)) {
-		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
-		map->m_plen = blknr_to_addr(lastblk) - offset;
-	} else if (is_inode_flat_inline(inode)) {
-		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
-		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-
-		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
-			vi->xattr_isize + erofs_blkoff(map->m_la);
-		map->m_plen = inode->i_size - offset;
-
-		/* inline data should be located in one meta block */
-		if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
-			errln("inline data cross block boundary @ nid %llu",
-			      vi->nid);
-			DBG_BUGON(1);
-			err = -EFSCORRUPTED;
-			goto err_out;
-		}
-
-		map->m_flags |= EROFS_MAP_META;
-	} else {
-		errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
-		      vi->nid, inode->i_size, map->m_la);
-		DBG_BUGON(1);
-		err = -EIO;
-		goto err_out;
-	}
-
-out:
-	map->m_llen = map->m_plen;
-
-err_out:
-	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
-	return err;
-}
-
-int erofs_map_blocks(struct inode *inode,
-		     struct erofs_map_blocks *map, int flags)
-{
-	if (unlikely(is_inode_layout_compression(inode))) {
-		int err = z_erofs_map_blocks_iter(inode, map, flags);
-
-		if (map->mpage) {
-			put_page(map->mpage);
-			map->mpage = NULL;
-		}
-		return err;
-	}
-	return erofs_map_blocks_flatmode(inode, map, flags);
-}
-
-static inline struct bio *erofs_read_raw_page(struct bio *bio,
-					      struct address_space *mapping,
-					      struct page *page,
-					      erofs_off_t *last_block,
-					      unsigned int nblocks,
-					      bool ra)
-{
-	struct inode *const inode = mapping->host;
-	struct super_block *const sb = inode->i_sb;
-	erofs_off_t current_block = (erofs_off_t)page->index;
-	int err;
-
-	DBG_BUGON(!nblocks);
-
-	if (PageUptodate(page)) {
-		err = 0;
-		goto has_updated;
-	}
-
-	/* note that for readpage case, bio also equals to NULL */
-	if (bio &&
-	    /* not continuous */
-	    *last_block + 1 != current_block) {
-submit_bio_retry:
-		__submit_bio(bio, REQ_OP_READ, 0);
-		bio = NULL;
-	}
-
-	if (!bio) {
-		struct erofs_map_blocks map = {
-			.m_la = blknr_to_addr(current_block),
-		};
-		erofs_blk_t blknr;
-		unsigned int blkoff;
-
-		err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
-		if (unlikely(err))
-			goto err_out;
-
-		/* zero out the holed page */
-		if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
-			zero_user_segment(page, 0, PAGE_SIZE);
-			SetPageUptodate(page);
-
-			/* imply err = 0, see erofs_map_blocks */
-			goto has_updated;
-		}
-
-		/* for RAW access mode, m_plen must be equal to m_llen */
-		DBG_BUGON(map.m_plen != map.m_llen);
-
-		blknr = erofs_blknr(map.m_pa);
-		blkoff = erofs_blkoff(map.m_pa);
-
-		/* deal with inline page */
-		if (map.m_flags & EROFS_MAP_META) {
-			void *vsrc, *vto;
-			struct page *ipage;
-
-			DBG_BUGON(map.m_plen > PAGE_SIZE);
-
-			ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
-
-			if (IS_ERR(ipage)) {
-				err = PTR_ERR(ipage);
-				goto err_out;
-			}
-
-			vsrc = kmap_atomic(ipage);
-			vto = kmap_atomic(page);
-			memcpy(vto, vsrc + blkoff, map.m_plen);
-			memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
-			kunmap_atomic(vto);
-			kunmap_atomic(vsrc);
-			flush_dcache_page(page);
-
-			SetPageUptodate(page);
-			/* TODO: could we unlock the page earlier? */
-			unlock_page(ipage);
-			put_page(ipage);
-
-			/* imply err = 0, see erofs_map_blocks */
-			goto has_updated;
-		}
-
-		/* pa must be block-aligned for raw reading */
-		DBG_BUGON(erofs_blkoff(map.m_pa));
-
-		/* max # of continuous pages */
-		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
-			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
-		if (nblocks > BIO_MAX_PAGES)
-			nblocks = BIO_MAX_PAGES;
-
-		bio = erofs_grab_bio(sb, blknr, nblocks, sb,
-				     read_endio, false);
-		if (IS_ERR(bio)) {
-			err = PTR_ERR(bio);
-			bio = NULL;
-			goto err_out;
-		}
-	}
-
-	err = bio_add_page(bio, page, PAGE_SIZE, 0);
-	/* out of the extent or bio is full */
-	if (err < PAGE_SIZE)
-		goto submit_bio_retry;
-
-	*last_block = current_block;
-
-	/* shift in advance in case of it followed by too many gaps */
-	if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
-		/* err should reassign to 0 after submitting */
-		err = 0;
-		goto submit_bio_out;
-	}
-
-	return bio;
-
-err_out:
-	/* for sync reading, set page error immediately */
-	if (!ra) {
-		SetPageError(page);
-		ClearPageUptodate(page);
-	}
-has_updated:
-	unlock_page(page);
-
-	/* if updated manually, continuous pages has a gap */
-	if (bio)
-submit_bio_out:
-		__submit_bio(bio, REQ_OP_READ, 0);
-
-	return unlikely(err) ? ERR_PTR(err) : NULL;
-}
-
-/*
- * since we dont have write or truncate flows, so no inode
- * locking needs to be held at the moment.
- */
-static int erofs_raw_access_readpage(struct file *file, struct page *page)
-{
-	erofs_off_t last_block;
-	struct bio *bio;
-
-	trace_erofs_readpage(page, true);
-
-	bio = erofs_read_raw_page(NULL, page->mapping,
-				  page, &last_block, 1, false);
-
-	if (IS_ERR(bio))
-		return PTR_ERR(bio);
-
-	DBG_BUGON(bio);	/* since we have only one bio -- must be NULL */
-	return 0;
-}
-
-static int erofs_raw_access_readpages(struct file *filp,
-				      struct address_space *mapping,
-				      struct list_head *pages,
-				      unsigned int nr_pages)
-{
-	erofs_off_t last_block;
-	struct bio *bio = NULL;
-	gfp_t gfp = readahead_gfp_mask(mapping);
-	struct page *page = list_last_entry(pages, struct page, lru);
-
-	trace_erofs_readpages(mapping->host, page, nr_pages, true);
-
-	for (; nr_pages; --nr_pages) {
-		page = list_entry(pages->prev, struct page, lru);
-
-		prefetchw(&page->flags);
-		list_del(&page->lru);
-
-		if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
-			bio = erofs_read_raw_page(bio, mapping, page,
-						  &last_block, nr_pages, true);
-
-			/* all the page errors are ignored when readahead */
-			if (IS_ERR(bio)) {
-				pr_err("%s, readahead error at page %lu of nid %llu\n",
-				       __func__, page->index,
-				       EROFS_V(mapping->host)->nid);
-
-				bio = NULL;
-			}
-		}
-
-		/* pages could still be locked */
-		put_page(page);
-	}
-	DBG_BUGON(!list_empty(pages));
-
-	/* the rare case (end in gaps) */
-	if (unlikely(bio))
-		__submit_bio(bio, REQ_OP_READ, 0);
-	return 0;
-}
-
-static int erofs_get_block(struct inode *inode, sector_t iblock,
-			   struct buffer_head *bh, int create)
-{
-	struct erofs_map_blocks map = {
-		.m_la = iblock << 9,
-	};
-	int err;
-
-	err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
-	if (err)
-		return err;
-
-	if (map.m_flags & EROFS_MAP_MAPPED)
-		bh->b_blocknr = erofs_blknr(map.m_pa);
-
-	return err;
-}
-
-static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
-{
-	struct inode *inode = mapping->host;
-
-	if (is_inode_flat_inline(inode)) {
-		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
-
-		if (block >> LOG_SECTORS_PER_BLOCK >= blks)
-			return 0;
-	}
-
-	return generic_block_bmap(mapping, block, erofs_get_block);
-}
-
-/* for uncompressed (aligned) files and raw access for other files */
-const struct address_space_operations erofs_raw_access_aops = {
-	.readpage = erofs_raw_access_readpage,
-	.readpages = erofs_raw_access_readpages,
-	.bmap = erofs_bmap,
-};
-
diff --git a/drivers/staging/erofs/decompressor.c b/drivers/staging/erofs/decompressor.c
deleted file mode 100644
index 32a811ac704a..000000000000
--- a/drivers/staging/erofs/decompressor.c
+++ /dev/null
@@ -1,360 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/decompressor.c
- *
- * Copyright (C) 2019 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "compress.h"
-#include <linux/module.h>
-#include <linux/lz4.h>
-
-#ifndef LZ4_DISTANCE_MAX	/* history window size */
-#define LZ4_DISTANCE_MAX 65535	/* set to maximum value by default */
-#endif
-
-#define LZ4_MAX_DISTANCE_PAGES	(DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
-#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
-#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize)  (((srcsize) >> 8) + 32)
-#endif
-
-struct z_erofs_decompressor {
-	/*
-	 * if destpages have sparsed pages, fill them with bounce pages.
-	 * it also check whether destpages indicate continuous physical memory.
-	 */
-	int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
-				 struct list_head *pagepool);
-	int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
-	char *name;
-};
-
-static bool use_vmap;
-module_param(use_vmap, bool, 0444);
-MODULE_PARM_DESC(use_vmap, "Use vmap() instead of vm_map_ram() (default 0)");
-
-static int lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
-				 struct list_head *pagepool)
-{
-	const unsigned int nr =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
-	unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
-					   BITS_PER_LONG)] = { 0 };
-	void *kaddr = NULL;
-	unsigned int i, j, top;
-
-	top = 0;
-	for (i = j = 0; i < nr; ++i, ++j) {
-		struct page *const page = rq->out[i];
-		struct page *victim;
-
-		if (j >= LZ4_MAX_DISTANCE_PAGES)
-			j = 0;
-
-		/* 'valid' bounced can only be tested after a complete round */
-		if (test_bit(j, bounced)) {
-			DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES);
-			DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES);
-			availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES];
-		}
-
-		if (page) {
-			__clear_bit(j, bounced);
-			if (kaddr) {
-				if (kaddr + PAGE_SIZE == page_address(page))
-					kaddr += PAGE_SIZE;
-				else
-					kaddr = NULL;
-			} else if (!i) {
-				kaddr = page_address(page);
-			}
-			continue;
-		}
-		kaddr = NULL;
-		__set_bit(j, bounced);
-
-		if (top) {
-			victim = availables[--top];
-			get_page(victim);
-		} else {
-			victim = erofs_allocpage(pagepool, GFP_KERNEL, false);
-			if (unlikely(!victim))
-				return -ENOMEM;
-			victim->mapping = Z_EROFS_MAPPING_STAGING;
-		}
-		rq->out[i] = victim;
-	}
-	return kaddr ? 1 : 0;
-}
-
-static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
-				       u8 *src, unsigned int pageofs_in)
-{
-	/*
-	 * if in-place decompression is ongoing, those decompressed
-	 * pages should be copied in order to avoid being overlapped.
-	 */
-	struct page **in = rq->in;
-	u8 *const tmp = erofs_get_pcpubuf(0);
-	u8 *tmpp = tmp;
-	unsigned int inlen = rq->inputsize - pageofs_in;
-	unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
-
-	while (tmpp < tmp + inlen) {
-		if (!src)
-			src = kmap_atomic(*in);
-		memcpy(tmpp, src + pageofs_in, count);
-		kunmap_atomic(src);
-		src = NULL;
-		tmpp += count;
-		pageofs_in = 0;
-		count = PAGE_SIZE;
-		++in;
-	}
-	return tmp;
-}
-
-static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
-{
-	unsigned int inputmargin, inlen;
-	u8 *src;
-	bool copied, support_0padding;
-	int ret;
-
-	if (rq->inputsize > PAGE_SIZE)
-		return -EOPNOTSUPP;
-
-	src = kmap_atomic(*rq->in);
-	inputmargin = 0;
-	support_0padding = false;
-
-	/* decompression inplace is only safe when 0padding is enabled */
-	if (EROFS_SB(rq->sb)->requirements & EROFS_REQUIREMENT_LZ4_0PADDING) {
-		support_0padding = true;
-
-		while (!src[inputmargin & ~PAGE_MASK])
-			if (!(++inputmargin & ~PAGE_MASK))
-				break;
-
-		if (inputmargin >= rq->inputsize) {
-			kunmap_atomic(src);
-			return -EIO;
-		}
-	}
-
-	copied = false;
-	inlen = rq->inputsize - inputmargin;
-	if (rq->inplace_io) {
-		const uint oend = (rq->pageofs_out +
-				   rq->outputsize) & ~PAGE_MASK;
-		const uint nr = PAGE_ALIGN(rq->pageofs_out +
-					   rq->outputsize) >> PAGE_SHIFT;
-
-		if (rq->partial_decoding || !support_0padding ||
-		    rq->out[nr - 1] != rq->in[0] ||
-		    rq->inputsize - oend <
-		      LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
-			src = generic_copy_inplace_data(rq, src, inputmargin);
-			inputmargin = 0;
-			copied = true;
-		}
-	}
-
-	ret = LZ4_decompress_safe_partial(src + inputmargin, out,
-					  inlen, rq->outputsize,
-					  rq->outputsize);
-	if (ret < 0) {
-		errln("%s, failed to decompress, in[%p, %u, %u] out[%p, %u]",
-		      __func__, src + inputmargin, inlen, inputmargin,
-		      out, rq->outputsize);
-		WARN_ON(1);
-		print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
-			       16, 1, src + inputmargin, inlen, true);
-		print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
-			       16, 1, out, rq->outputsize, true);
-		ret = -EIO;
-	}
-
-	if (copied)
-		erofs_put_pcpubuf(src);
-	else
-		kunmap_atomic(src);
-	return ret;
-}
-
-static struct z_erofs_decompressor decompressors[] = {
-	[Z_EROFS_COMPRESSION_SHIFTED] = {
-		.name = "shifted"
-	},
-	[Z_EROFS_COMPRESSION_LZ4] = {
-		.prepare_destpages = lz4_prepare_destpages,
-		.decompress = lz4_decompress,
-		.name = "lz4"
-	},
-};
-
-static void copy_from_pcpubuf(struct page **out, const char *dst,
-			      unsigned short pageofs_out,
-			      unsigned int outputsize)
-{
-	const char *end = dst + outputsize;
-	const unsigned int righthalf = PAGE_SIZE - pageofs_out;
-	const char *cur = dst - pageofs_out;
-
-	while (cur < end) {
-		struct page *const page = *out++;
-
-		if (page) {
-			char *buf = kmap_atomic(page);
-
-			if (cur >= dst) {
-				memcpy(buf, cur, min_t(uint, PAGE_SIZE,
-						       end - cur));
-			} else {
-				memcpy(buf + pageofs_out, cur + pageofs_out,
-				       min_t(uint, righthalf, end - cur));
-			}
-			kunmap_atomic(buf);
-		}
-		cur += PAGE_SIZE;
-	}
-}
-
-static void *erofs_vmap(struct page **pages, unsigned int count)
-{
-	int i = 0;
-
-	if (use_vmap)
-		return vmap(pages, count, VM_MAP, PAGE_KERNEL);
-
-	while (1) {
-		void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL);
-
-		/* retry two more times (totally 3 times) */
-		if (addr || ++i >= 3)
-			return addr;
-		vm_unmap_aliases();
-	}
-	return NULL;
-}
-
-static void erofs_vunmap(const void *mem, unsigned int count)
-{
-	if (!use_vmap)
-		vm_unmap_ram(mem, count);
-	else
-		vunmap(mem);
-}
-
-static int decompress_generic(struct z_erofs_decompress_req *rq,
-			      struct list_head *pagepool)
-{
-	const unsigned int nrpages_out =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	const struct z_erofs_decompressor *alg = decompressors + rq->alg;
-	unsigned int dst_maptype;
-	void *dst;
-	int ret;
-
-	if (nrpages_out == 1 && !rq->inplace_io) {
-		DBG_BUGON(!*rq->out);
-		dst = kmap_atomic(*rq->out);
-		dst_maptype = 0;
-		goto dstmap_out;
-	}
-
-	/*
-	 * For the case of small output size (especially much less
-	 * than PAGE_SIZE), memcpy the decompressed data rather than
-	 * compressed data is preferred.
-	 */
-	if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
-		dst = erofs_get_pcpubuf(0);
-		if (IS_ERR(dst))
-			return PTR_ERR(dst);
-
-		rq->inplace_io = false;
-		ret = alg->decompress(rq, dst);
-		if (!ret)
-			copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
-					  rq->outputsize);
-
-		erofs_put_pcpubuf(dst);
-		return ret;
-	}
-
-	ret = alg->prepare_destpages(rq, pagepool);
-	if (ret < 0) {
-		return ret;
-	} else if (ret) {
-		dst = page_address(*rq->out);
-		dst_maptype = 1;
-		goto dstmap_out;
-	}
-
-	dst = erofs_vmap(rq->out, nrpages_out);
-	if (!dst)
-		return -ENOMEM;
-	dst_maptype = 2;
-
-dstmap_out:
-	ret = alg->decompress(rq, dst + rq->pageofs_out);
-
-	if (!dst_maptype)
-		kunmap_atomic(dst);
-	else if (dst_maptype == 2)
-		erofs_vunmap(dst, nrpages_out);
-	return ret;
-}
-
-static int shifted_decompress(const struct z_erofs_decompress_req *rq,
-			      struct list_head *pagepool)
-{
-	const unsigned int nrpages_out =
-		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
-	const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
-	unsigned char *src, *dst;
-
-	if (nrpages_out > 2) {
-		DBG_BUGON(1);
-		return -EIO;
-	}
-
-	if (rq->out[0] == *rq->in) {
-		DBG_BUGON(nrpages_out != 1);
-		return 0;
-	}
-
-	src = kmap_atomic(*rq->in);
-	if (!rq->out[0]) {
-		dst = NULL;
-	} else {
-		dst = kmap_atomic(rq->out[0]);
-		memcpy(dst + rq->pageofs_out, src, righthalf);
-	}
-
-	if (rq->out[1] == *rq->in) {
-		memmove(src, src + righthalf, rq->pageofs_out);
-	} else if (nrpages_out == 2) {
-		if (dst)
-			kunmap_atomic(dst);
-		DBG_BUGON(!rq->out[1]);
-		dst = kmap_atomic(rq->out[1]);
-		memcpy(dst, src + righthalf, rq->pageofs_out);
-	}
-	if (dst)
-		kunmap_atomic(dst);
-	kunmap_atomic(src);
-	return 0;
-}
-
-int z_erofs_decompress(struct z_erofs_decompress_req *rq,
-		       struct list_head *pagepool)
-{
-	if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
-		return shifted_decompress(rq, pagepool);
-	return decompress_generic(rq, pagepool);
-}
-
diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c
deleted file mode 100644
index 77ef856df9f3..000000000000
--- a/drivers/staging/erofs/dir.c
+++ /dev/null
@@ -1,141 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/dir.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "internal.h"
-
-static void debug_one_dentry(unsigned char d_type, const char *de_name,
-			     unsigned int de_namelen)
-{
-#ifdef CONFIG_EROFS_FS_DEBUG
-	/* since the on-disk name could not have the trailing '\0' */
-	unsigned char dbg_namebuf[EROFS_NAME_LEN + 1];
-
-	memcpy(dbg_namebuf, de_name, de_namelen);
-	dbg_namebuf[de_namelen] = '\0';
-
-	debugln("found dirent %s de_len %u d_type %d", dbg_namebuf,
-		de_namelen, d_type);
-#endif
-}
-
-static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
-			       void *dentry_blk, unsigned int *ofs,
-			       unsigned int nameoff, unsigned int maxsize)
-{
-	struct erofs_dirent *de = dentry_blk + *ofs;
-	const struct erofs_dirent *end = dentry_blk + nameoff;
-
-	while (de < end) {
-		const char *de_name;
-		unsigned int de_namelen;
-		unsigned char d_type;
-
-		d_type = fs_ftype_to_dtype(de->file_type);
-
-		nameoff = le16_to_cpu(de->nameoff);
-		de_name = (char *)dentry_blk + nameoff;
-
-		/* the last dirent in the block? */
-		if (de + 1 >= end)
-			de_namelen = strnlen(de_name, maxsize - nameoff);
-		else
-			de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
-
-		/* a corrupted entry is found */
-		if (unlikely(nameoff + de_namelen > maxsize ||
-			     de_namelen > EROFS_NAME_LEN)) {
-			errln("bogus dirent @ nid %llu", EROFS_V(dir)->nid);
-			DBG_BUGON(1);
-			return -EFSCORRUPTED;
-		}
-
-		debug_one_dentry(d_type, de_name, de_namelen);
-		if (!dir_emit(ctx, de_name, de_namelen,
-			      le64_to_cpu(de->nid), d_type))
-			/* stopped by some reason */
-			return 1;
-		++de;
-		*ofs += sizeof(struct erofs_dirent);
-	}
-	*ofs = maxsize;
-	return 0;
-}
-
-static int erofs_readdir(struct file *f, struct dir_context *ctx)
-{
-	struct inode *dir = file_inode(f);
-	struct address_space *mapping = dir->i_mapping;
-	const size_t dirsize = i_size_read(dir);
-	unsigned int i = ctx->pos / EROFS_BLKSIZ;
-	unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
-	int err = 0;
-	bool initial = true;
-
-	while (ctx->pos < dirsize) {
-		struct page *dentry_page;
-		struct erofs_dirent *de;
-		unsigned int nameoff, maxsize;
-
-		dentry_page = read_mapping_page(mapping, i, NULL);
-		if (dentry_page == ERR_PTR(-ENOMEM)) {
-			err = -ENOMEM;
-			break;
-		} else if (IS_ERR(dentry_page)) {
-			errln("fail to readdir of logical block %u of nid %llu",
-			      i, EROFS_V(dir)->nid);
-			err = -EFSCORRUPTED;
-			break;
-		}
-
-		de = (struct erofs_dirent *)kmap(dentry_page);
-
-		nameoff = le16_to_cpu(de->nameoff);
-
-		if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
-			     nameoff >= PAGE_SIZE)) {
-			errln("%s, invalid de[0].nameoff %u @ nid %llu",
-			      __func__, nameoff, EROFS_V(dir)->nid);
-			err = -EFSCORRUPTED;
-			goto skip_this;
-		}
-
-		maxsize = min_t(unsigned int,
-				dirsize - ctx->pos + ofs, PAGE_SIZE);
-
-		/* search dirents at the arbitrary position */
-		if (unlikely(initial)) {
-			initial = false;
-
-			ofs = roundup(ofs, sizeof(struct erofs_dirent));
-			if (unlikely(ofs >= nameoff))
-				goto skip_this;
-		}
-
-		err = erofs_fill_dentries(dir, ctx, de, &ofs,
-					  nameoff, maxsize);
-skip_this:
-		kunmap(dentry_page);
-
-		put_page(dentry_page);
-
-		ctx->pos = blknr_to_addr(i) + ofs;
-
-		if (unlikely(err))
-			break;
-		++i;
-		ofs = 0;
-	}
-	return err < 0 ? err : 0;
-}
-
-const struct file_operations erofs_dir_fops = {
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-	.iterate_shared	= erofs_readdir,
-};
-
diff --git a/drivers/staging/erofs/erofs_fs.h b/drivers/staging/erofs/erofs_fs.h
deleted file mode 100644
index 6db70f395937..000000000000
--- a/drivers/staging/erofs/erofs_fs.h
+++ /dev/null
@@ -1,310 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
-/*
- * linux/drivers/staging/erofs/erofs_fs.h
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_H
-#define __EROFS_FS_H
-
-/* Enhanced(Extended) ROM File System */
-#define EROFS_SUPER_MAGIC_V1    0xE0F5E1E2
-#define EROFS_SUPER_OFFSET      1024
-
-/*
- * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
- * incompatible with this kernel version.
- */
-#define EROFS_REQUIREMENT_LZ4_0PADDING	0x00000001
-#define EROFS_ALL_REQUIREMENTS		EROFS_REQUIREMENT_LZ4_0PADDING
-
-struct erofs_super_block {
-/*  0 */__le32 magic;           /* in the little endian */
-/*  4 */__le32 checksum;        /* crc32c(super_block) */
-/*  8 */__le32 features;        /* (aka. feature_compat) */
-/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
-/* 13 */__u8 reserved;
-
-/* 14 */__le16 root_nid;
-/* 16 */__le64 inos;            /* total valid ino # (== f_files - f_favail) */
-
-/* 24 */__le64 build_time;      /* inode v1 time derivation */
-/* 32 */__le32 build_time_nsec;
-/* 36 */__le32 blocks;          /* used for statfs */
-/* 40 */__le32 meta_blkaddr;
-/* 44 */__le32 xattr_blkaddr;
-/* 48 */__u8 uuid[16];          /* 128-bit uuid for volume */
-/* 64 */__u8 volume_name[16];   /* volume name */
-/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
-
-/* 84 */__u8 reserved2[44];
-} __packed;                     /* 128 bytes */
-
-/*
- * erofs inode data mapping:
- * 0 - inode plain without inline data A:
- * inode, [xattrs], ... | ... | no-holed data
- * 1 - inode VLE compression B (legacy):
- * inode, [xattrs], extents ... | ...
- * 2 - inode plain with inline data C:
- * inode, [xattrs], last_inline_data, ... | ... | no-holed data
- * 3 - inode compression D:
- * inode, [xattrs], map_header, extents ... | ...
- * 4~7 - reserved
- */
-enum {
-	EROFS_INODE_FLAT_PLAIN,
-	EROFS_INODE_FLAT_COMPRESSION_LEGACY,
-	EROFS_INODE_FLAT_INLINE,
-	EROFS_INODE_FLAT_COMPRESSION,
-	EROFS_INODE_LAYOUT_MAX
-};
-
-static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
-{
-	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
-		return true;
-	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
-}
-
-/* bit definitions of inode i_advise */
-#define EROFS_I_VERSION_BITS            1
-#define EROFS_I_DATA_MAPPING_BITS       3
-
-#define EROFS_I_VERSION_BIT             0
-#define EROFS_I_DATA_MAPPING_BIT        1
-
-struct erofs_inode_v1 {
-/*  0 */__le16 i_advise;
-
-/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
-/*  2 */__le16 i_xattr_icount;
-/*  4 */__le16 i_mode;
-/*  6 */__le16 i_nlink;
-/*  8 */__le32 i_size;
-/* 12 */__le32 i_reserved;
-/* 16 */union {
-		/* file total compressed blocks for data mapping 1 */
-		__le32 compressed_blocks;
-		__le32 raw_blkaddr;
-
-		/* for device files, used to indicate old/new device # */
-		__le32 rdev;
-	} i_u __packed;
-/* 20 */__le32 i_ino;           /* only used for 32-bit stat compatibility */
-/* 24 */__le16 i_uid;
-/* 26 */__le16 i_gid;
-/* 28 */__le32 i_reserved2;
-} __packed;
-
-/* 32 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_V1   0
-/* 64 bytes on-disk inode */
-#define EROFS_INODE_LAYOUT_V2   1
-
-struct erofs_inode_v2 {
-/*  0 */__le16 i_advise;
-
-/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
-/*  2 */__le16 i_xattr_icount;
-/*  4 */__le16 i_mode;
-/*  6 */__le16 i_reserved;
-/*  8 */__le64 i_size;
-/* 16 */union {
-		/* file total compressed blocks for data mapping 1 */
-		__le32 compressed_blocks;
-		__le32 raw_blkaddr;
-
-		/* for device files, used to indicate old/new device # */
-		__le32 rdev;
-	} i_u __packed;
-
-	/* only used for 32-bit stat compatibility */
-/* 20 */__le32 i_ino;
-
-/* 24 */__le32 i_uid;
-/* 28 */__le32 i_gid;
-/* 32 */__le64 i_ctime;
-/* 40 */__le32 i_ctime_nsec;
-/* 44 */__le32 i_nlink;
-/* 48 */__u8   i_reserved2[16];
-} __packed;                     /* 64 bytes */
-
-#define EROFS_MAX_SHARED_XATTRS         (128)
-/* h_shared_count between 129 ... 255 are special # */
-#define EROFS_SHARED_XATTR_EXTENT       (255)
-
-/*
- * inline xattrs (n == i_xattr_icount):
- * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
- *          12 bytes           /                   \
- *                            /                     \
- *                           /-----------------------\
- *                           |  erofs_xattr_entries+ |
- *                           +-----------------------+
- * inline xattrs must starts in erofs_xattr_ibody_header,
- * for read-only fs, no need to introduce h_refcount
- */
-struct erofs_xattr_ibody_header {
-	__le32 h_reserved;
-	__u8   h_shared_count;
-	__u8   h_reserved2[7];
-	__le32 h_shared_xattrs[0];      /* shared xattr id array */
-} __packed;
-
-/* Name indexes */
-#define EROFS_XATTR_INDEX_USER              1
-#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS  2
-#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
-#define EROFS_XATTR_INDEX_TRUSTED           4
-#define EROFS_XATTR_INDEX_LUSTRE            5
-#define EROFS_XATTR_INDEX_SECURITY          6
-
-/* xattr entry (for both inline & shared xattrs) */
-struct erofs_xattr_entry {
-	__u8   e_name_len;      /* length of name */
-	__u8   e_name_index;    /* attribute name index */
-	__le16 e_value_size;    /* size of attribute value */
-	/* followed by e_name and e_value */
-	char   e_name[0];       /* attribute name */
-} __packed;
-
-#define ondisk_xattr_ibody_size(count)	({\
-	u32 __count = le16_to_cpu(count); \
-	((__count) == 0) ? 0 : \
-	sizeof(struct erofs_xattr_ibody_header) + \
-		sizeof(__u32) * ((__count) - 1); })
-
-#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
-#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
-	sizeof(struct erofs_xattr_entry) + \
-	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
-
-/* available compression algorithm types */
-enum {
-	Z_EROFS_COMPRESSION_LZ4,
-	Z_EROFS_COMPRESSION_MAX
-};
-
-/*
- * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
- *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
- *                                  (4B) + 2B + (4B) if compacted 2B is on.
- */
-#define Z_EROFS_ADVISE_COMPACTED_2B_BIT         0
-
-#define Z_EROFS_ADVISE_COMPACTED_2B     (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT)
-
-struct z_erofs_map_header {
-	__le32	h_reserved1;
-	__le16	h_advise;
-	/*
-	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
-	 * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
-	 */
-	__u8	h_algorithmtype;
-	/*
-	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
-	 * bit 3-4 : (physical - logical) cluster bits of head 1:
-	 *       For example, if logical clustersize = 4096, 1 for 8192.
-	 * bit 5-7 : (physical - logical) cluster bits of head 2.
-	 */
-	__u8	h_clusterbits;
-};
-
-#define Z_EROFS_VLE_LEGACY_HEADER_PADDING       8
-
-/*
- * Z_EROFS Variable-sized Logical Extent cluster type:
- *    0 - literal (uncompressed) cluster
- *    1 - compressed cluster (for the head logical cluster)
- *    2 - compressed cluster (for the other logical clusters)
- *
- * In detail,
- *    0 - literal (uncompressed) cluster,
- *        di_advise = 0
- *        di_clusterofs = the literal data offset of the cluster
- *        di_blkaddr = the blkaddr of the literal cluster
- *
- *    1 - compressed cluster (for the head logical cluster)
- *        di_advise = 1
- *        di_clusterofs = the decompressed data offset of the cluster
- *        di_blkaddr = the blkaddr of the compressed cluster
- *
- *    2 - compressed cluster (for the other logical clusters)
- *        di_advise = 2
- *        di_clusterofs =
- *           the decompressed data offset in its own head cluster
- *        di_u.delta[0] = distance to its corresponding head cluster
- *        di_u.delta[1] = distance to its corresponding tail cluster
- *                (di_advise could be 0, 1 or 2)
- */
-enum {
-	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
-	Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
-	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
-	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
-	Z_EROFS_VLE_CLUSTER_TYPE_MAX
-};
-
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
-#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
-
-struct z_erofs_vle_decompressed_index {
-	__le16 di_advise;
-	/* where to decompress in the head cluster */
-	__le16 di_clusterofs;
-
-	union {
-		/* for the head cluster */
-		__le32 blkaddr;
-		/*
-		 * for the rest clusters
-		 * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
-		 * [0] - pointing to the head cluster
-		 * [1] - pointing to the tail cluster
-		 */
-		__le16 delta[2];
-	} di_u __packed;		/* 8 bytes */
-} __packed;
-
-#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
-	(round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
-	 sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
-
-/* dirent sorts in alphabet order, thus we can do binary search */
-struct erofs_dirent {
-	__le64 nid;     /*  0, node number */
-	__le16 nameoff; /*  8, start offset of file name */
-	__u8 file_type; /* 10, file type */
-	__u8 reserved;  /* 11, reserved */
-} __packed;
-
-/*
- * EROFS file types should match generic FT_* types and
- * it seems no need to add BUILD_BUG_ONs since potential
- * unmatchness will break other fses as well...
- */
-
-#define EROFS_NAME_LEN      255
-
-/* check the EROFS on-disk layout strictly at compile time */
-static inline void erofs_check_ondisk_layout_definitions(void)
-{
-	BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
-	BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
-	BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
-	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
-	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
-	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
-	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
-	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
-
-	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
-		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
-}
-
-#endif
-
diff --git a/drivers/staging/erofs/include/trace/events/erofs.h b/drivers/staging/erofs/include/trace/events/erofs.h
deleted file mode 100644
index bfb2da9c4eee..000000000000
--- a/drivers/staging/erofs/include/trace/events/erofs.h
+++ /dev/null
@@ -1,256 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM erofs
-
-#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_EROFS_H
-
-#include <linux/tracepoint.h>
-
-#define show_dev(dev)		MAJOR(dev), MINOR(dev)
-#define show_dev_nid(entry)	show_dev(entry->dev), entry->nid
-
-#define show_file_type(type)						\
-	__print_symbolic(type,						\
-		{ 0,		"FILE" },				\
-		{ 1,		"DIR" })
-
-#define show_map_flags(flags) __print_flags(flags, "|",	\
-	{ EROFS_GET_BLOCKS_RAW,	"RAW" })
-
-#define show_mflags(flags) __print_flags(flags, "",	\
-	{ EROFS_MAP_MAPPED,	"M" },			\
-	{ EROFS_MAP_META,	"I" },			\
-	{ EROFS_MAP_ZIPPED,	"Z" })
-
-TRACE_EVENT(erofs_lookup,
-
-	TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
-
-	TP_ARGS(dir, dentry, flags),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev	)
-		__field(erofs_nid_t,	nid	)
-		__field(const char *,	name	)
-		__field(unsigned int,	flags	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= dir->i_sb->s_dev;
-		__entry->nid	= EROFS_V(dir)->nid;
-		__entry->name	= dentry->d_name.name;
-		__entry->flags	= flags;
-	),
-
-	TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x",
-		show_dev_nid(__entry),
-		__entry->name,
-		__entry->flags)
-);
-
-TRACE_EVENT(erofs_fill_inode,
-	TP_PROTO(struct inode *inode, int isdir),
-	TP_ARGS(inode, isdir),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev	)
-		__field(erofs_nid_t,	nid	)
-		__field(erofs_blk_t,	blkaddr )
-		__field(unsigned int,	ofs	)
-		__field(int,		isdir	)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= inode->i_sb->s_dev;
-		__entry->nid		= EROFS_V(inode)->nid;
-		__entry->blkaddr	= erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
-		__entry->ofs		= erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
-		__entry->isdir		= isdir;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d",
-		  show_dev_nid(__entry),
-		  __entry->blkaddr, __entry->ofs,
-		  __entry->isdir)
-);
-
-TRACE_EVENT(erofs_readpage,
-
-	TP_PROTO(struct page *page, bool raw),
-
-	TP_ARGS(page, raw),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev	)
-		__field(erofs_nid_t,    nid     )
-		__field(int,		dir	)
-		__field(pgoff_t,	index	)
-		__field(int,		uptodate)
-		__field(bool,		raw	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= page->mapping->host->i_sb->s_dev;
-		__entry->nid	= EROFS_V(page->mapping->host)->nid;
-		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
-		__entry->index	= page->index;
-		__entry->uptodate = PageUptodate(page);
-		__entry->raw = raw;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d "
-		"raw = %d",
-		show_dev_nid(__entry),
-		show_file_type(__entry->dir),
-		(unsigned long)__entry->index,
-		__entry->uptodate,
-		__entry->raw)
-);
-
-TRACE_EVENT(erofs_readpages,
-
-	TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
-		bool raw),
-
-	TP_ARGS(inode, page, nrpage, raw),
-
-	TP_STRUCT__entry(
-		__field(dev_t,		dev	)
-		__field(erofs_nid_t,	nid	)
-		__field(pgoff_t,	start	)
-		__field(unsigned int,	nrpage	)
-		__field(bool,		raw	)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->nid	= EROFS_V(inode)->nid;
-		__entry->start	= page->index;
-		__entry->nrpage	= nrpage;
-		__entry->raw	= raw;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d",
-		show_dev_nid(__entry),
-		(unsigned long)__entry->start,
-		__entry->nrpage,
-		__entry->raw)
-);
-
-DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags),
-
-	TP_ARGS(inode, map, flags),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,		dev		)
-		__field(	erofs_nid_t,	nid		)
-		__field(	erofs_off_t,	la		)
-		__field(	u64,		llen		)
-		__field(	unsigned int,	flags		)
-	),
-
-	TP_fast_assign(
-		__entry->dev    = inode->i_sb->s_dev;
-		__entry->nid    = EROFS_V(inode)->nid;
-		__entry->la	= map->m_la;
-		__entry->llen	= map->m_llen;
-		__entry->flags	= flags;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s",
-		  show_dev_nid(__entry),
-		  __entry->la, __entry->llen,
-		  __entry->flags ? show_map_flags(__entry->flags) : "NULL")
-);
-
-DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags),
-
-	TP_ARGS(inode, map, flags)
-);
-
-DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags),
-
-	TP_ARGS(inode, map, flags)
-);
-
-DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,		dev		)
-		__field(	erofs_nid_t,	nid		)
-		__field(        unsigned int,   flags           )
-		__field(	erofs_off_t,	la		)
-		__field(	erofs_off_t,	pa		)
-		__field(	u64,		llen		)
-		__field(	u64,		plen		)
-		__field(        unsigned int,	mflags		)
-		__field(	int,		ret		)
-	),
-
-	TP_fast_assign(
-		__entry->dev    = inode->i_sb->s_dev;
-		__entry->nid    = EROFS_V(inode)->nid;
-		__entry->flags	= flags;
-		__entry->la	= map->m_la;
-		__entry->pa	= map->m_pa;
-		__entry->llen	= map->m_llen;
-		__entry->plen	= map->m_plen;
-		__entry->mflags	= map->m_flags;
-		__entry->ret	= ret;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu, flags %s "
-		  "la %llu pa %llu llen %llu plen %llu mflags %s ret %d",
-		  show_dev_nid(__entry),
-		  __entry->flags ? show_map_flags(__entry->flags) : "NULL",
-		  __entry->la, __entry->pa, __entry->llen, __entry->plen,
-		  show_mflags(__entry->mflags), __entry->ret)
-);
-
-DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
-DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit,
-	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
-		 unsigned int flags, int ret),
-
-	TP_ARGS(inode, map, flags, ret)
-);
-
-TRACE_EVENT(erofs_destroy_inode,
-	TP_PROTO(struct inode *inode),
-
-	TP_ARGS(inode),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,		dev		)
-		__field(	erofs_nid_t,	nid		)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->nid	= EROFS_V(inode)->nid;
-	),
-
-	TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
-);
-
-#endif /* _TRACE_EROFS_H */
-
- /* This part must be outside protection */
-#include <trace/define_trace.h>
diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
deleted file mode 100644
index cbc2c342a37f..000000000000
--- a/drivers/staging/erofs/inode.c
+++ /dev/null
@@ -1,334 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/inode.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "xattr.h"
-
-#include <trace/events/erofs.h>
-
-/* no locking */
-static int read_inode(struct inode *inode, void *data)
-{
-	struct erofs_vnode *vi = EROFS_V(inode);
-	struct erofs_inode_v1 *v1 = data;
-	const unsigned int advise = le16_to_cpu(v1->i_advise);
-	erofs_blk_t nblks = 0;
-
-	vi->datamode = __inode_data_mapping(advise);
-
-	if (unlikely(vi->datamode >= EROFS_INODE_LAYOUT_MAX)) {
-		errln("unsupported data mapping %u of nid %llu",
-		      vi->datamode, vi->nid);
-		DBG_BUGON(1);
-		return -EOPNOTSUPP;
-	}
-
-	if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
-		struct erofs_inode_v2 *v2 = data;
-
-		vi->inode_isize = sizeof(struct erofs_inode_v2);
-		vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
-
-		inode->i_mode = le16_to_cpu(v2->i_mode);
-		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		    S_ISLNK(inode->i_mode))
-			vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
-		else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
-			inode->i_rdev =
-				new_decode_dev(le32_to_cpu(v2->i_u.rdev));
-		else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode))
-			inode->i_rdev = 0;
-		else
-			goto bogusimode;
-
-		i_uid_write(inode, le32_to_cpu(v2->i_uid));
-		i_gid_write(inode, le32_to_cpu(v2->i_gid));
-		set_nlink(inode, le32_to_cpu(v2->i_nlink));
-
-		/* ns timestamp */
-		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
-			le64_to_cpu(v2->i_ctime);
-		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
-			le32_to_cpu(v2->i_ctime_nsec);
-
-		inode->i_size = le64_to_cpu(v2->i_size);
-
-		/* total blocks for compressed files */
-		if (is_inode_layout_compression(inode))
-			nblks = le32_to_cpu(v2->i_u.compressed_blocks);
-	} else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
-		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-
-		vi->inode_isize = sizeof(struct erofs_inode_v1);
-		vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
-
-		inode->i_mode = le16_to_cpu(v1->i_mode);
-		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
-		    S_ISLNK(inode->i_mode))
-			vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
-		else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
-			inode->i_rdev =
-				new_decode_dev(le32_to_cpu(v1->i_u.rdev));
-		else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode))
-			inode->i_rdev = 0;
-		else
-			goto bogusimode;
-
-		i_uid_write(inode, le16_to_cpu(v1->i_uid));
-		i_gid_write(inode, le16_to_cpu(v1->i_gid));
-		set_nlink(inode, le16_to_cpu(v1->i_nlink));
-
-		/* use build time to derive all file time */
-		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
-			sbi->build_time;
-		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
-			sbi->build_time_nsec;
-
-		inode->i_size = le32_to_cpu(v1->i_size);
-		if (is_inode_layout_compression(inode))
-			nblks = le32_to_cpu(v1->i_u.compressed_blocks);
-	} else {
-		errln("unsupported on-disk inode version %u of nid %llu",
-		      __inode_version(advise), vi->nid);
-		DBG_BUGON(1);
-		return -EOPNOTSUPP;
-	}
-
-	if (!nblks)
-		/* measure inode.i_blocks as generic filesystems */
-		inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
-	else
-		inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
-	return 0;
-
-bogusimode:
-	errln("bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid);
-	DBG_BUGON(1);
-	return -EFSCORRUPTED;
-}
-
-/*
- * try_lock can be required since locking order is:
- *   file data(fs_inode)
- *        meta(bd_inode)
- * but the majority of the callers is "iget",
- * in that case we are pretty sure no deadlock since
- * no data operations exist. However I tend to
- * try_lock since it takes no much overhead and
- * will success immediately.
- */
-static int fill_inline_data(struct inode *inode, void *data,
-			    unsigned int m_pofs)
-{
-	struct erofs_vnode *vi = EROFS_V(inode);
-	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
-
-	/* should be inode inline C */
-	if (!is_inode_flat_inline(inode))
-		return 0;
-
-	/* fast symlink (following ext4) */
-	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
-		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
-
-		if (unlikely(!lnk))
-			return -ENOMEM;
-
-		m_pofs += vi->inode_isize + vi->xattr_isize;
-
-		/* inline symlink data shouldn't across page boundary as well */
-		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
-			kfree(lnk);
-			errln("inline data cross block boundary @ nid %llu",
-			      vi->nid);
-			DBG_BUGON(1);
-			return -EFSCORRUPTED;
-		}
-
-		/* get in-page inline data */
-		memcpy(lnk, data + m_pofs, inode->i_size);
-		lnk[inode->i_size] = '\0';
-
-		inode->i_link = lnk;
-		set_inode_fast_symlink(inode);
-	}
-	return 0;
-}
-
-static int fill_inode(struct inode *inode, int isdir)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-	struct erofs_vnode *vi = EROFS_V(inode);
-	struct page *page;
-	void *data;
-	int err;
-	erofs_blk_t blkaddr;
-	unsigned int ofs;
-	erofs_off_t inode_loc;
-
-	trace_erofs_fill_inode(inode, isdir);
-	inode_loc = iloc(sbi, vi->nid);
-	blkaddr = erofs_blknr(inode_loc);
-	ofs = erofs_blkoff(inode_loc);
-
-	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
-		__func__, vi->nid, ofs, blkaddr);
-
-	page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
-
-	if (IS_ERR(page)) {
-		errln("failed to get inode (nid: %llu) page, err %ld",
-		      vi->nid, PTR_ERR(page));
-		return PTR_ERR(page);
-	}
-
-	DBG_BUGON(!PageUptodate(page));
-	data = page_address(page);
-
-	err = read_inode(inode, data + ofs);
-	if (!err) {
-		/* setup the new inode */
-		if (S_ISREG(inode->i_mode)) {
-			inode->i_op = &erofs_generic_iops;
-			inode->i_fop = &generic_ro_fops;
-		} else if (S_ISDIR(inode->i_mode)) {
-			inode->i_op = &erofs_dir_iops;
-			inode->i_fop = &erofs_dir_fops;
-		} else if (S_ISLNK(inode->i_mode)) {
-			/* by default, page_get_link is used for symlink */
-			inode->i_op = &erofs_symlink_iops;
-			inode_nohighmem(inode);
-		} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
-			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
-			inode->i_op = &erofs_generic_iops;
-			init_special_inode(inode, inode->i_mode, inode->i_rdev);
-			goto out_unlock;
-		} else {
-			err = -EFSCORRUPTED;
-			goto out_unlock;
-		}
-
-		if (is_inode_layout_compression(inode)) {
-			err = z_erofs_fill_inode(inode);
-			goto out_unlock;
-		}
-
-		inode->i_mapping->a_ops = &erofs_raw_access_aops;
-
-		/* fill last page if inline data is available */
-		err = fill_inline_data(inode, data, ofs);
-	}
-
-out_unlock:
-	unlock_page(page);
-	put_page(page);
-	return err;
-}
-
-/*
- * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
- * we should do more for 32-bit platform to find the right inode.
- */
-#if BITS_PER_LONG == 32
-static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
-{
-	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
-
-	return EROFS_V(inode)->nid == nid;
-}
-
-static int erofs_iget_set_actor(struct inode *inode, void *opaque)
-{
-	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
-
-	inode->i_ino = erofs_inode_hash(nid);
-	return 0;
-}
-#endif
-
-static inline struct inode *erofs_iget_locked(struct super_block *sb,
-					      erofs_nid_t nid)
-{
-	const unsigned long hashval = erofs_inode_hash(nid);
-
-#if BITS_PER_LONG >= 64
-	/* it is safe to use iget_locked for >= 64-bit platform */
-	return iget_locked(sb, hashval);
-#else
-	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
-		erofs_iget_set_actor, &nid);
-#endif
-}
-
-struct inode *erofs_iget(struct super_block *sb,
-			 erofs_nid_t nid,
-			 bool isdir)
-{
-	struct inode *inode = erofs_iget_locked(sb, nid);
-
-	if (unlikely(!inode))
-		return ERR_PTR(-ENOMEM);
-
-	if (inode->i_state & I_NEW) {
-		int err;
-		struct erofs_vnode *vi = EROFS_V(inode);
-
-		vi->nid = nid;
-
-		err = fill_inode(inode, isdir);
-		if (likely(!err))
-			unlock_new_inode(inode);
-		else {
-			iget_failed(inode);
-			inode = ERR_PTR(err);
-		}
-	}
-	return inode;
-}
-
-int erofs_getattr(const struct path *path, struct kstat *stat,
-		  u32 request_mask, unsigned int query_flags)
-{
-	struct inode *const inode = d_inode(path->dentry);
-
-	if (is_inode_layout_compression(inode))
-		stat->attributes |= STATX_ATTR_COMPRESSED;
-
-	stat->attributes |= STATX_ATTR_IMMUTABLE;
-	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
-				  STATX_ATTR_IMMUTABLE);
-
-	generic_fillattr(inode, stat);
-	return 0;
-}
-
-const struct inode_operations erofs_generic_iops = {
-	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
-	.listxattr = erofs_listxattr,
-#endif
-	.get_acl = erofs_get_acl,
-};
-
-const struct inode_operations erofs_symlink_iops = {
-	.get_link = page_get_link,
-	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
-	.listxattr = erofs_listxattr,
-#endif
-	.get_acl = erofs_get_acl,
-};
-
-const struct inode_operations erofs_fast_symlink_iops = {
-	.get_link = simple_get_link,
-	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
-	.listxattr = erofs_listxattr,
-#endif
-	.get_acl = erofs_get_acl,
-};
-
diff --git a/drivers/staging/erofs/internal.h b/drivers/staging/erofs/internal.h
deleted file mode 100644
index 0e8d58546c52..000000000000
--- a/drivers/staging/erofs/internal.h
+++ /dev/null
@@ -1,554 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/internal.h
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_INTERNAL_H
-#define __EROFS_INTERNAL_H
-
-#include <linux/fs.h>
-#include <linux/dcache.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/bio.h>
-#include <linux/buffer_head.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include "erofs_fs.h"
-
-/* redefine pr_fmt "erofs: " */
-#undef pr_fmt
-#define pr_fmt(fmt) "erofs: " fmt
-
-#define errln(x, ...)   pr_err(x "\n", ##__VA_ARGS__)
-#define infoln(x, ...)  pr_info(x "\n", ##__VA_ARGS__)
-#ifdef CONFIG_EROFS_FS_DEBUG
-#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
-#define DBG_BUGON               BUG_ON
-#else
-#define debugln(x, ...)         ((void)0)
-#define DBG_BUGON(x)            ((void)(x))
-#endif	/* !CONFIG_EROFS_FS_DEBUG */
-
-enum {
-	FAULT_KMALLOC,
-	FAULT_READ_IO,
-	FAULT_MAX,
-};
-
-#ifdef CONFIG_EROFS_FAULT_INJECTION
-extern const char *erofs_fault_name[FAULT_MAX];
-#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
-
-struct erofs_fault_info {
-	atomic_t inject_ops;
-	unsigned int inject_rate;
-	unsigned int inject_type;
-};
-#endif	/* CONFIG_EROFS_FAULT_INJECTION */
-
-/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
-#define EROFS_SUPER_MAGIC   EROFS_SUPER_MAGIC_V1
-
-typedef u64 erofs_nid_t;
-typedef u64 erofs_off_t;
-/* data type for filesystem-wide blocks number */
-typedef u32 erofs_blk_t;
-
-struct erofs_sb_info {
-#ifdef CONFIG_EROFS_FS_ZIP
-	/* list for all registered superblocks, mainly for shrinker */
-	struct list_head list;
-	struct mutex umount_mutex;
-
-	/* the dedicated workstation for compression */
-	struct radix_tree_root workstn_tree;
-
-	/* threshold for decompression synchronously */
-	unsigned int max_sync_decompress_pages;
-
-	unsigned int shrinker_run_no;
-
-	/* current strategy of how to use managed cache */
-	unsigned char cache_strategy;
-
-	/* pseudo inode to manage cached pages */
-	struct inode *managed_cache;
-#endif	/* CONFIG_EROFS_FS_ZIP */
-	u32 blocks;
-	u32 meta_blkaddr;
-#ifdef CONFIG_EROFS_FS_XATTR
-	u32 xattr_blkaddr;
-#endif
-
-	/* inode slot unit size in bit shift */
-	unsigned char islotbits;
-
-	u32 build_time_nsec;
-	u64 build_time;
-
-	/* what we really care is nid, rather than ino.. */
-	erofs_nid_t root_nid;
-	/* used for statfs, f_files - f_favail */
-	u64 inos;
-
-	u8 uuid[16];                    /* 128-bit uuid for volume */
-	u8 volume_name[16];             /* volume name */
-	u32 requirements;
-
-	unsigned int mount_opt;
-
-#ifdef CONFIG_EROFS_FAULT_INJECTION
-	struct erofs_fault_info fault_info;	/* For fault injection */
-#endif
-};
-
-#ifdef CONFIG_EROFS_FAULT_INJECTION
-#define erofs_show_injection_info(type)					\
-	infoln("inject %s in %s of %pS", erofs_fault_name[type],        \
-		__func__, __builtin_return_address(0))
-
-static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
-{
-	struct erofs_fault_info *ffi = &sbi->fault_info;
-
-	if (!ffi->inject_rate)
-		return false;
-
-	if (!IS_FAULT_SET(ffi, type))
-		return false;
-
-	atomic_inc(&ffi->inject_ops);
-	if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
-		atomic_set(&ffi->inject_ops, 0);
-		return true;
-	}
-	return false;
-}
-#else
-static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
-{
-	return false;
-}
-
-static inline void erofs_show_injection_info(int type)
-{
-}
-#endif	/* !CONFIG_EROFS_FAULT_INJECTION */
-
-static inline void *erofs_kmalloc(struct erofs_sb_info *sbi,
-					size_t size, gfp_t flags)
-{
-	if (time_to_inject(sbi, FAULT_KMALLOC)) {
-		erofs_show_injection_info(FAULT_KMALLOC);
-		return NULL;
-	}
-	return kmalloc(size, flags);
-}
-
-#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
-#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
-
-/* Mount flags set via mount options or defaults */
-#define EROFS_MOUNT_XATTR_USER		0x00000010
-#define EROFS_MOUNT_POSIX_ACL		0x00000020
-#define EROFS_MOUNT_FAULT_INJECTION	0x00000040
-
-#define clear_opt(sbi, option)	((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
-#define set_opt(sbi, option)	((sbi)->mount_opt |= EROFS_MOUNT_##option)
-#define test_opt(sbi, option)	((sbi)->mount_opt & EROFS_MOUNT_##option)
-
-#ifdef CONFIG_EROFS_FS_ZIP
-enum {
-	EROFS_ZIP_CACHE_DISABLED,
-	EROFS_ZIP_CACHE_READAHEAD,
-	EROFS_ZIP_CACHE_READAROUND
-};
-
-#define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)
-
-/* basic unit of the workstation of a super_block */
-struct erofs_workgroup {
-	/* the workgroup index in the workstation */
-	pgoff_t index;
-
-	/* overall workgroup reference count */
-	atomic_t refcount;
-};
-
-#if defined(CONFIG_SMP)
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
-						 int val)
-{
-	preempt_disable();
-	if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
-		preempt_enable();
-		return false;
-	}
-	return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
-					    int orig_val)
-{
-	/*
-	 * other observers should notice all modifications
-	 * in the freezing period.
-	 */
-	smp_mb();
-	atomic_set(&grp->refcount, orig_val);
-	preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
-	return atomic_cond_read_relaxed(&grp->refcount,
-					VAL != EROFS_LOCKED_MAGIC);
-}
-#else
-static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
-						 int val)
-{
-	preempt_disable();
-	/* no need to spin on UP platforms, let's just disable preemption. */
-	if (val != atomic_read(&grp->refcount)) {
-		preempt_enable();
-		return false;
-	}
-	return true;
-}
-
-static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
-					    int orig_val)
-{
-	preempt_enable();
-}
-
-static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
-{
-	int v = atomic_read(&grp->refcount);
-
-	/* workgroup is never freezed on uniprocessor systems */
-	DBG_BUGON(v == EROFS_LOCKED_MAGIC);
-	return v;
-}
-#endif	/* !CONFIG_SMP */
-
-/* hard limit of pages per compressed cluster */
-#define Z_EROFS_CLUSTER_MAX_PAGES       (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
-#define EROFS_PCPUBUF_NR_PAGES          Z_EROFS_CLUSTER_MAX_PAGES
-#else
-#define EROFS_PCPUBUF_NR_PAGES          0
-#endif	/* !CONFIG_EROFS_FS_ZIP */
-
-/* we strictly follow PAGE_SIZE and no buffer head yet */
-#define LOG_BLOCK_SIZE		PAGE_SHIFT
-
-#undef LOG_SECTORS_PER_BLOCK
-#define LOG_SECTORS_PER_BLOCK	(PAGE_SHIFT - 9)
-
-#undef SECTORS_PER_BLOCK
-#define SECTORS_PER_BLOCK	(1 << SECTORS_PER_BLOCK)
-
-#define EROFS_BLKSIZ		(1 << LOG_BLOCK_SIZE)
-
-#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
-#error erofs cannot be used in this platform
-#endif
-
-#define EROFS_IO_MAX_RETRIES_NOFAIL     5
-
-#define ROOT_NID(sb)		((sb)->root_nid)
-
-#define erofs_blknr(addr)       ((addr) / EROFS_BLKSIZ)
-#define erofs_blkoff(addr)      ((addr) % EROFS_BLKSIZ)
-#define blknr_to_addr(nr)       ((erofs_off_t)(nr) * EROFS_BLKSIZ)
-
-static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
-{
-	return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
-}
-
-/* atomic flag definitions */
-#define EROFS_V_EA_INITED_BIT	0
-#define EROFS_V_Z_INITED_BIT	1
-
-/* bitlock definitions (arranged in reverse order) */
-#define EROFS_V_BL_XATTR_BIT	(BITS_PER_LONG - 1)
-#define EROFS_V_BL_Z_BIT	(BITS_PER_LONG - 2)
-
-struct erofs_vnode {
-	erofs_nid_t nid;
-
-	/* atomic flags (including bitlocks) */
-	unsigned long flags;
-
-	unsigned char datamode;
-	unsigned char inode_isize;
-	unsigned short xattr_isize;
-
-	unsigned int xattr_shared_count;
-	unsigned int *xattr_shared_xattrs;
-
-	union {
-		erofs_blk_t raw_blkaddr;
-#ifdef CONFIG_EROFS_FS_ZIP
-		struct {
-			unsigned short z_advise;
-			unsigned char  z_algorithmtype[2];
-			unsigned char  z_logical_clusterbits;
-			unsigned char  z_physical_clusterbits[2];
-		};
-#endif	/* CONFIG_EROFS_FS_ZIP */
-	};
-	/* the corresponding vfs inode */
-	struct inode vfs_inode;
-};
-
-#define EROFS_V(ptr)	\
-	container_of(ptr, struct erofs_vnode, vfs_inode)
-
-#define __inode_advise(x, bit, bits) \
-	(((x) >> (bit)) & ((1 << (bits)) - 1))
-
-#define __inode_version(advise)	\
-	__inode_advise(advise, EROFS_I_VERSION_BIT,	\
-		EROFS_I_VERSION_BITS)
-
-#define __inode_data_mapping(advise)	\
-	__inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\
-		EROFS_I_DATA_MAPPING_BITS)
-
-static inline unsigned long inode_datablocks(struct inode *inode)
-{
-	/* since i_size cannot be changed */
-	return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-}
-
-static inline bool is_inode_layout_compression(struct inode *inode)
-{
-	return erofs_inode_is_data_compressed(EROFS_V(inode)->datamode);
-}
-
-static inline bool is_inode_flat_inline(struct inode *inode)
-{
-	return EROFS_V(inode)->datamode == EROFS_INODE_FLAT_INLINE;
-}
-
-extern const struct super_operations erofs_sops;
-
-extern const struct address_space_operations erofs_raw_access_aops;
-#ifdef CONFIG_EROFS_FS_ZIP
-extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
-#endif
-
-/*
- * Logical to physical block mapping, used by erofs_map_blocks()
- *
- * Different with other file systems, it is used for 2 access modes:
- *
- * 1) RAW access mode:
- *
- * Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
- * and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
- *
- * Note that m_lblk in the RAW access mode refers to the number of
- * the compressed ondisk block rather than the uncompressed
- * in-memory block for the compressed file.
- *
- * m_pofs equals to m_lofs except for the inline data page.
- *
- * 2) Normal access mode:
- *
- * If the inode is not compressed, it has no difference with
- * the RAW access mode. However, if the inode is compressed,
- * users should pass a valid (m_lblk, m_lofs) pair, and get
- * the needed m_pblk, m_pofs, m_len to get the compressed data
- * and the updated m_lblk, m_lofs which indicates the start
- * of the corresponding uncompressed data in the file.
- */
-enum {
-	BH_Zipped = BH_PrivateStart,
-	BH_FullMapped,
-};
-
-/* Has a disk mapping */
-#define EROFS_MAP_MAPPED	(1 << BH_Mapped)
-/* Located in metadata (could be copied from bd_inode) */
-#define EROFS_MAP_META		(1 << BH_Meta)
-/* The extent has been compressed */
-#define EROFS_MAP_ZIPPED	(1 << BH_Zipped)
-/* The length of extent is full */
-#define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
-
-struct erofs_map_blocks {
-	erofs_off_t m_pa, m_la;
-	u64 m_plen, m_llen;
-
-	unsigned int m_flags;
-
-	struct page *mpage;
-};
-
-/* Flags used by erofs_map_blocks() */
-#define EROFS_GET_BLOCKS_RAW    0x0001
-
-/* zmap.c */
-#ifdef CONFIG_EROFS_FS_ZIP
-int z_erofs_fill_inode(struct inode *inode);
-int z_erofs_map_blocks_iter(struct inode *inode,
-			    struct erofs_map_blocks *map,
-			    int flags);
-#else
-static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; }
-static inline int z_erofs_map_blocks_iter(struct inode *inode,
-					  struct erofs_map_blocks *map,
-					  int flags)
-{
-	return -EOPNOTSUPP;
-}
-#endif	/* !CONFIG_EROFS_FS_ZIP */
-
-/* data.c */
-static inline struct bio *erofs_grab_bio(struct super_block *sb,
-					 erofs_blk_t blkaddr,
-					 unsigned int nr_pages,
-					 void *bi_private, bio_end_io_t endio,
-					 bool nofail)
-{
-	const gfp_t gfp = GFP_NOIO;
-	struct bio *bio;
-
-	do {
-		if (nr_pages == 1) {
-			bio = bio_alloc(gfp | (nofail ? __GFP_NOFAIL : 0), 1);
-			if (unlikely(!bio)) {
-				DBG_BUGON(nofail);
-				return ERR_PTR(-ENOMEM);
-			}
-			break;
-		}
-		bio = bio_alloc(gfp, nr_pages);
-		nr_pages /= 2;
-	} while (unlikely(!bio));
-
-	bio->bi_end_io = endio;
-	bio_set_dev(bio, sb->s_bdev);
-	bio->bi_iter.bi_sector = (sector_t)blkaddr << LOG_SECTORS_PER_BLOCK;
-	bio->bi_private = bi_private;
-	return bio;
-}
-
-static inline void __submit_bio(struct bio *bio, unsigned int op,
-				unsigned int op_flags)
-{
-	bio_set_op_attrs(bio, op, op_flags);
-	submit_bio(bio);
-}
-
-struct page *__erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr,
-				   bool prio, bool nofail);
-
-static inline struct page *erofs_get_meta_page(struct super_block *sb,
-	erofs_blk_t blkaddr, bool prio)
-{
-	return __erofs_get_meta_page(sb, blkaddr, prio, false);
-}
-
-int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
-
-static inline struct page *erofs_get_inline_page(struct inode *inode,
-						 erofs_blk_t blkaddr)
-{
-	return erofs_get_meta_page(inode->i_sb, blkaddr,
-				   S_ISDIR(inode->i_mode));
-}
-
-/* inode.c */
-static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
-{
-#if BITS_PER_LONG == 32
-	return (nid >> 32) ^ (nid & 0xffffffff);
-#else
-	return nid;
-#endif
-}
-
-extern const struct inode_operations erofs_generic_iops;
-extern const struct inode_operations erofs_symlink_iops;
-extern const struct inode_operations erofs_fast_symlink_iops;
-
-static inline void set_inode_fast_symlink(struct inode *inode)
-{
-	inode->i_op = &erofs_fast_symlink_iops;
-}
-
-static inline bool is_inode_fast_symlink(struct inode *inode)
-{
-	return inode->i_op == &erofs_fast_symlink_iops;
-}
-
-struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
-int erofs_getattr(const struct path *path, struct kstat *stat,
-		  u32 request_mask, unsigned int query_flags);
-
-/* namei.c */
-extern const struct inode_operations erofs_dir_iops;
-
-int erofs_namei(struct inode *dir, struct qstr *name,
-		erofs_nid_t *nid, unsigned int *d_type);
-
-/* dir.c */
-extern const struct file_operations erofs_dir_fops;
-
-/* utils.c / zdata.c */
-struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail);
-
-#if (EROFS_PCPUBUF_NR_PAGES > 0)
-void *erofs_get_pcpubuf(unsigned int pagenr);
-#define erofs_put_pcpubuf(buf) do { \
-	(void)&(buf);	\
-	preempt_enable();	\
-} while (0)
-#else
-static inline void *erofs_get_pcpubuf(unsigned int pagenr)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-#define erofs_put_pcpubuf(buf) do {} while (0)
-#endif
-
-#ifdef CONFIG_EROFS_FS_ZIP
-int erofs_workgroup_put(struct erofs_workgroup *grp);
-struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
-					     pgoff_t index, bool *tag);
-int erofs_register_workgroup(struct super_block *sb,
-			     struct erofs_workgroup *grp, bool tag);
-void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
-void erofs_shrinker_register(struct super_block *sb);
-void erofs_shrinker_unregister(struct super_block *sb);
-int __init erofs_init_shrinker(void);
-void erofs_exit_shrinker(void);
-int __init z_erofs_init_zip_subsystem(void);
-void z_erofs_exit_zip_subsystem(void);
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
-				       struct erofs_workgroup *egrp);
-int erofs_try_to_free_cached_page(struct address_space *mapping,
-				  struct page *page);
-#else
-static inline void erofs_shrinker_register(struct super_block *sb) {}
-static inline void erofs_shrinker_unregister(struct super_block *sb) {}
-static inline int erofs_init_shrinker(void) { return 0; }
-static inline void erofs_exit_shrinker(void) {}
-static inline int z_erofs_init_zip_subsystem(void) { return 0; }
-static inline void z_erofs_exit_zip_subsystem(void) {}
-#endif	/* !CONFIG_EROFS_FS_ZIP */
-
-#define EFSCORRUPTED    EUCLEAN         /* Filesystem is corrupted */
-
-#endif	/* __EROFS_INTERNAL_H */
-
diff --git a/drivers/staging/erofs/namei.c b/drivers/staging/erofs/namei.c
deleted file mode 100644
index 8334a910acef..000000000000
--- a/drivers/staging/erofs/namei.c
+++ /dev/null
@@ -1,253 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/namei.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "xattr.h"
-
-#include <trace/events/erofs.h>
-
-struct erofs_qstr {
-	const unsigned char *name;
-	const unsigned char *end;
-};
-
-/* based on the end of qn is accurate and it must have the trailing '\0' */
-static inline int dirnamecmp(const struct erofs_qstr *qn,
-			     const struct erofs_qstr *qd,
-			     unsigned int *matched)
-{
-	unsigned int i = *matched;
-
-	/*
-	 * on-disk error, let's only BUG_ON in the debugging mode.
-	 * otherwise, it will return 1 to just skip the invalid name
-	 * and go on (in consideration of the lookup performance).
-	 */
-	DBG_BUGON(qd->name > qd->end);
-
-	/* qd could not have trailing '\0' */
-	/* However it is absolutely safe if < qd->end */
-	while (qd->name + i < qd->end && qd->name[i] != '\0') {
-		if (qn->name[i] != qd->name[i]) {
-			*matched = i;
-			return qn->name[i] > qd->name[i] ? 1 : -1;
-		}
-		++i;
-	}
-	*matched = i;
-	/* See comments in __d_alloc on the terminating NUL character */
-	return qn->name[i] == '\0' ? 0 : 1;
-}
-
-#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
-
-static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
-					       u8 *data,
-					       unsigned int dirblksize,
-					       const int ndirents)
-{
-	int head, back;
-	unsigned int startprfx, endprfx;
-	struct erofs_dirent *const de = (struct erofs_dirent *)data;
-
-	/* since the 1st dirent has been evaluated previously */
-	head = 1;
-	back = ndirents - 1;
-	startprfx = endprfx = 0;
-
-	while (head <= back) {
-		const int mid = head + (back - head) / 2;
-		const int nameoff = nameoff_from_disk(de[mid].nameoff,
-						      dirblksize);
-		unsigned int matched = min(startprfx, endprfx);
-		struct erofs_qstr dname = {
-			.name = data + nameoff,
-			.end = unlikely(mid >= ndirents - 1) ?
-				data + dirblksize :
-				data + nameoff_from_disk(de[mid + 1].nameoff,
-							 dirblksize)
-		};
-
-		/* string comparison without already matched prefix */
-		int ret = dirnamecmp(name, &dname, &matched);
-
-		if (unlikely(!ret)) {
-			return de + mid;
-		} else if (ret > 0) {
-			head = mid + 1;
-			startprfx = matched;
-		} else {
-			back = mid - 1;
-			endprfx = matched;
-		}
-	}
-
-	return ERR_PTR(-ENOENT);
-}
-
-static struct page *find_target_block_classic(struct inode *dir,
-					      struct erofs_qstr *name,
-					      int *_ndirents)
-{
-	unsigned int startprfx, endprfx;
-	int head, back;
-	struct address_space *const mapping = dir->i_mapping;
-	struct page *candidate = ERR_PTR(-ENOENT);
-
-	startprfx = endprfx = 0;
-	head = 0;
-	back = inode_datablocks(dir) - 1;
-
-	while (head <= back) {
-		const int mid = head + (back - head) / 2;
-		struct page *page = read_mapping_page(mapping, mid, NULL);
-
-		if (!IS_ERR(page)) {
-			struct erofs_dirent *de = kmap_atomic(page);
-			const int nameoff = nameoff_from_disk(de->nameoff,
-							      EROFS_BLKSIZ);
-			const int ndirents = nameoff / sizeof(*de);
-			int diff;
-			unsigned int matched;
-			struct erofs_qstr dname;
-
-			if (unlikely(!ndirents)) {
-				kunmap_atomic(de);
-				put_page(page);
-				errln("corrupted dir block %d @ nid %llu",
-				      mid, EROFS_V(dir)->nid);
-				DBG_BUGON(1);
-				page = ERR_PTR(-EFSCORRUPTED);
-				goto out;
-			}
-
-			matched = min(startprfx, endprfx);
-
-			dname.name = (u8 *)de + nameoff;
-			if (ndirents == 1)
-				dname.end = (u8 *)de + EROFS_BLKSIZ;
-			else
-				dname.end = (u8 *)de +
-					nameoff_from_disk(de[1].nameoff,
-							  EROFS_BLKSIZ);
-
-			/* string comparison without already matched prefix */
-			diff = dirnamecmp(name, &dname, &matched);
-			kunmap_atomic(de);
-
-			if (unlikely(!diff)) {
-				*_ndirents = 0;
-				goto out;
-			} else if (diff > 0) {
-				head = mid + 1;
-				startprfx = matched;
-
-				if (!IS_ERR(candidate))
-					put_page(candidate);
-				candidate = page;
-				*_ndirents = ndirents;
-			} else {
-				put_page(page);
-
-				back = mid - 1;
-				endprfx = matched;
-			}
-			continue;
-		}
-out:		/* free if the candidate is valid */
-		if (!IS_ERR(candidate))
-			put_page(candidate);
-		return page;
-	}
-	return candidate;
-}
-
-int erofs_namei(struct inode *dir,
-		struct qstr *name,
-		erofs_nid_t *nid, unsigned int *d_type)
-{
-	int ndirents;
-	struct page *page;
-	void *data;
-	struct erofs_dirent *de;
-	struct erofs_qstr qn;
-
-	if (unlikely(!dir->i_size))
-		return -ENOENT;
-
-	qn.name = name->name;
-	qn.end = name->name + name->len;
-
-	ndirents = 0;
-	page = find_target_block_classic(dir, &qn, &ndirents);
-
-	if (IS_ERR(page))
-		return PTR_ERR(page);
-
-	data = kmap_atomic(page);
-	/* the target page has been mapped */
-	if (ndirents)
-		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
-	else
-		de = (struct erofs_dirent *)data;
-
-	if (!IS_ERR(de)) {
-		*nid = le64_to_cpu(de->nid);
-		*d_type = de->file_type;
-	}
-
-	kunmap_atomic(data);
-	put_page(page);
-
-	return PTR_ERR_OR_ZERO(de);
-}
-
-/* NOTE: i_mutex is already held by vfs */
-static struct dentry *erofs_lookup(struct inode *dir,
-				   struct dentry *dentry,
-				   unsigned int flags)
-{
-	int err;
-	erofs_nid_t nid;
-	unsigned int d_type;
-	struct inode *inode;
-
-	DBG_BUGON(!d_really_is_negative(dentry));
-	/* dentry must be unhashed in lookup, no need to worry about */
-	DBG_BUGON(!d_unhashed(dentry));
-
-	trace_erofs_lookup(dir, dentry, flags);
-
-	/* file name exceeds fs limit */
-	if (unlikely(dentry->d_name.len > EROFS_NAME_LEN))
-		return ERR_PTR(-ENAMETOOLONG);
-
-	/* false uninitialized warnings on gcc 4.8.x */
-	err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
-
-	if (err == -ENOENT) {
-		/* negative dentry */
-		inode = NULL;
-	} else if (unlikely(err)) {
-		inode = ERR_PTR(err);
-	} else {
-		debugln("%s, %s (nid %llu) found, d_type %u", __func__,
-			dentry->d_name.name, nid, d_type);
-		inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR);
-	}
-	return d_splice_alias(inode, dentry);
-}
-
-const struct inode_operations erofs_dir_iops = {
-	.lookup = erofs_lookup,
-	.getattr = erofs_getattr,
-#ifdef CONFIG_EROFS_FS_XATTR
-	.listxattr = erofs_listxattr,
-#endif
-	.get_acl = erofs_get_acl,
-};
-
diff --git a/drivers/staging/erofs/super.c b/drivers/staging/erofs/super.c
deleted file mode 100644
index 2da471010a86..000000000000
--- a/drivers/staging/erofs/super.c
+++ /dev/null
@@ -1,671 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/super.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include <linux/module.h>
-#include <linux/buffer_head.h>
-#include <linux/statfs.h>
-#include <linux/parser.h>
-#include <linux/seq_file.h>
-#include "xattr.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/erofs.h>
-
-static struct kmem_cache *erofs_inode_cachep __read_mostly;
-
-static void init_once(void *ptr)
-{
-	struct erofs_vnode *vi = ptr;
-
-	inode_init_once(&vi->vfs_inode);
-}
-
-static int __init erofs_init_inode_cache(void)
-{
-	erofs_inode_cachep = kmem_cache_create("erofs_inode",
-					       sizeof(struct erofs_vnode), 0,
-					       SLAB_RECLAIM_ACCOUNT,
-					       init_once);
-
-	return erofs_inode_cachep ? 0 : -ENOMEM;
-}
-
-static void erofs_exit_inode_cache(void)
-{
-	kmem_cache_destroy(erofs_inode_cachep);
-}
-
-static struct inode *alloc_inode(struct super_block *sb)
-{
-	struct erofs_vnode *vi =
-		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
-
-	if (!vi)
-		return NULL;
-
-	/* zero out everything except vfs_inode */
-	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
-	return &vi->vfs_inode;
-}
-
-static void free_inode(struct inode *inode)
-{
-	struct erofs_vnode *vi = EROFS_V(inode);
-
-	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
-	if (is_inode_fast_symlink(inode))
-		kfree(inode->i_link);
-
-	kfree(vi->xattr_shared_xattrs);
-
-	kmem_cache_free(erofs_inode_cachep, vi);
-}
-
-static bool check_layout_compatibility(struct super_block *sb,
-				       struct erofs_super_block *layout)
-{
-	const unsigned int requirements = le32_to_cpu(layout->requirements);
-
-	EROFS_SB(sb)->requirements = requirements;
-
-	/* check if current kernel meets all mandatory requirements */
-	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
-		errln("unidentified requirements %x, please upgrade kernel version",
-		      requirements & ~EROFS_ALL_REQUIREMENTS);
-		return false;
-	}
-	return true;
-}
-
-static int superblock_read(struct super_block *sb)
-{
-	struct erofs_sb_info *sbi;
-	struct buffer_head *bh;
-	struct erofs_super_block *layout;
-	unsigned int blkszbits;
-	int ret;
-
-	bh = sb_bread(sb, 0);
-
-	if (!bh) {
-		errln("cannot read erofs superblock");
-		return -EIO;
-	}
-
-	sbi = EROFS_SB(sb);
-	layout = (struct erofs_super_block *)((u8 *)bh->b_data
-		 + EROFS_SUPER_OFFSET);
-
-	ret = -EINVAL;
-	if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
-		errln("cannot find valid erofs superblock");
-		goto out;
-	}
-
-	blkszbits = layout->blkszbits;
-	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
-	if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
-		errln("blksize %u isn't supported on this platform",
-		      1 << blkszbits);
-		goto out;
-	}
-
-	if (!check_layout_compatibility(sb, layout))
-		goto out;
-
-	sbi->blocks = le32_to_cpu(layout->blocks);
-	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
-#ifdef CONFIG_EROFS_FS_XATTR
-	sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
-#endif
-	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
-	sbi->root_nid = le16_to_cpu(layout->root_nid);
-	sbi->inos = le64_to_cpu(layout->inos);
-
-	sbi->build_time = le64_to_cpu(layout->build_time);
-	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
-
-	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
-
-	ret = strscpy(sbi->volume_name, layout->volume_name,
-		      sizeof(layout->volume_name));
-	if (ret < 0) {	/* -E2BIG */
-		errln("bad volume name without NIL terminator");
-		ret = -EFSCORRUPTED;
-		goto out;
-	}
-	ret = 0;
-out:
-	brelse(bh);
-	return ret;
-}
-
-#ifdef CONFIG_EROFS_FAULT_INJECTION
-const char *erofs_fault_name[FAULT_MAX] = {
-	[FAULT_KMALLOC]		= "kmalloc",
-	[FAULT_READ_IO]		= "read IO error",
-};
-
-static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
-				     unsigned int rate)
-{
-	struct erofs_fault_info *ffi = &sbi->fault_info;
-
-	if (rate) {
-		atomic_set(&ffi->inject_ops, 0);
-		ffi->inject_rate = rate;
-		ffi->inject_type = (1 << FAULT_MAX) - 1;
-	} else {
-		memset(ffi, 0, sizeof(struct erofs_fault_info));
-	}
-
-	set_opt(sbi, FAULT_INJECTION);
-}
-
-static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
-				  substring_t *args)
-{
-	int rate = 0;
-
-	if (args->from && match_int(args, &rate))
-		return -EINVAL;
-
-	__erofs_build_fault_attr(sbi, rate);
-	return 0;
-}
-
-static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
-{
-	return sbi->fault_info.inject_rate;
-}
-#else
-static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
-				     unsigned int rate)
-{
-}
-
-static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
-				  substring_t *args)
-{
-	infoln("fault_injection options not supported");
-	return 0;
-}
-
-static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
-{
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_EROFS_FS_ZIP
-static int erofs_build_cache_strategy(struct erofs_sb_info *sbi,
-				      substring_t *args)
-{
-	const char *cs = match_strdup(args);
-	int err = 0;
-
-	if (!cs) {
-		errln("Not enough memory to store cache strategy");
-		return -ENOMEM;
-	}
-
-	if (!strcmp(cs, "disabled")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED;
-	} else if (!strcmp(cs, "readahead")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD;
-	} else if (!strcmp(cs, "readaround")) {
-		sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
-	} else {
-		errln("Unrecognized cache strategy \"%s\"", cs);
-		err = -EINVAL;
-	}
-	kfree(cs);
-	return err;
-}
-#else
-static int erofs_build_cache_strategy(struct erofs_sb_info *sbi,
-				      substring_t *args)
-{
-	infoln("EROFS compression is disabled, so cache strategy is ignored");
-	return 0;
-}
-#endif
-
-/* set up default EROFS parameters */
-static void default_options(struct erofs_sb_info *sbi)
-{
-#ifdef CONFIG_EROFS_FS_ZIP
-	sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
-	sbi->max_sync_decompress_pages = 3;
-#endif
-#ifdef CONFIG_EROFS_FS_XATTR
-	set_opt(sbi, XATTR_USER);
-#endif
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-	set_opt(sbi, POSIX_ACL);
-#endif
-}
-
-enum {
-	Opt_user_xattr,
-	Opt_nouser_xattr,
-	Opt_acl,
-	Opt_noacl,
-	Opt_fault_injection,
-	Opt_cache_strategy,
-	Opt_err
-};
-
-static match_table_t erofs_tokens = {
-	{Opt_user_xattr, "user_xattr"},
-	{Opt_nouser_xattr, "nouser_xattr"},
-	{Opt_acl, "acl"},
-	{Opt_noacl, "noacl"},
-	{Opt_fault_injection, "fault_injection=%u"},
-	{Opt_cache_strategy, "cache_strategy=%s"},
-	{Opt_err, NULL}
-};
-
-static int parse_options(struct super_block *sb, char *options)
-{
-	substring_t args[MAX_OPT_ARGS];
-	char *p;
-	int err;
-
-	if (!options)
-		return 0;
-
-	while ((p = strsep(&options, ","))) {
-		int token;
-
-		if (!*p)
-			continue;
-
-		args[0].to = args[0].from = NULL;
-		token = match_token(p, erofs_tokens, args);
-
-		switch (token) {
-#ifdef CONFIG_EROFS_FS_XATTR
-		case Opt_user_xattr:
-			set_opt(EROFS_SB(sb), XATTR_USER);
-			break;
-		case Opt_nouser_xattr:
-			clear_opt(EROFS_SB(sb), XATTR_USER);
-			break;
-#else
-		case Opt_user_xattr:
-			infoln("user_xattr options not supported");
-			break;
-		case Opt_nouser_xattr:
-			infoln("nouser_xattr options not supported");
-			break;
-#endif
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-		case Opt_acl:
-			set_opt(EROFS_SB(sb), POSIX_ACL);
-			break;
-		case Opt_noacl:
-			clear_opt(EROFS_SB(sb), POSIX_ACL);
-			break;
-#else
-		case Opt_acl:
-			infoln("acl options not supported");
-			break;
-		case Opt_noacl:
-			infoln("noacl options not supported");
-			break;
-#endif
-		case Opt_fault_injection:
-			err = erofs_build_fault_attr(EROFS_SB(sb), args);
-			if (err)
-				return err;
-			break;
-		case Opt_cache_strategy:
-			err = erofs_build_cache_strategy(EROFS_SB(sb), args);
-			if (err)
-				return err;
-			break;
-		default:
-			errln("Unrecognized mount option \"%s\" or missing value", p);
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-#ifdef CONFIG_EROFS_FS_ZIP
-static const struct address_space_operations managed_cache_aops;
-
-static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
-{
-	int ret = 1;	/* 0 - busy */
-	struct address_space *const mapping = page->mapping;
-
-	DBG_BUGON(!PageLocked(page));
-	DBG_BUGON(mapping->a_ops != &managed_cache_aops);
-
-	if (PagePrivate(page))
-		ret = erofs_try_to_free_cached_page(mapping, page);
-
-	return ret;
-}
-
-static void managed_cache_invalidatepage(struct page *page,
-					 unsigned int offset,
-					 unsigned int length)
-{
-	const unsigned int stop = length + offset;
-
-	DBG_BUGON(!PageLocked(page));
-
-	/* Check for potential overflow in debug mode */
-	DBG_BUGON(stop > PAGE_SIZE || stop < length);
-
-	if (offset == 0 && stop == PAGE_SIZE)
-		while (!managed_cache_releasepage(page, GFP_NOFS))
-			cond_resched();
-}
-
-static const struct address_space_operations managed_cache_aops = {
-	.releasepage = managed_cache_releasepage,
-	.invalidatepage = managed_cache_invalidatepage,
-};
-
-static int erofs_init_managed_cache(struct super_block *sb)
-{
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-	struct inode *const inode = new_inode(sb);
-
-	if (unlikely(!inode))
-		return -ENOMEM;
-
-	set_nlink(inode, 1);
-	inode->i_size = OFFSET_MAX;
-
-	inode->i_mapping->a_ops = &managed_cache_aops;
-	mapping_set_gfp_mask(inode->i_mapping,
-			     GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
-	sbi->managed_cache = inode;
-	return 0;
-}
-#else
-static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
-#endif
-
-static int erofs_fill_super(struct super_block *sb, void *data, int silent)
-{
-	struct inode *inode;
-	struct erofs_sb_info *sbi;
-	int err;
-
-	infoln("fill_super, device -> %s", sb->s_id);
-	infoln("options -> %s", (char *)data);
-
-	sb->s_magic = EROFS_SUPER_MAGIC;
-
-	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
-		errln("failed to set erofs blksize");
-		return -EINVAL;
-	}
-
-	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
-	if (unlikely(!sbi))
-		return -ENOMEM;
-
-	sb->s_fs_info = sbi;
-	err = superblock_read(sb);
-	if (err)
-		return err;
-
-	sb->s_flags |= SB_RDONLY | SB_NOATIME;
-	sb->s_maxbytes = MAX_LFS_FILESIZE;
-	sb->s_time_gran = 1;
-
-	sb->s_op = &erofs_sops;
-
-#ifdef CONFIG_EROFS_FS_XATTR
-	sb->s_xattr = erofs_xattr_handlers;
-#endif
-	/* set erofs default mount options */
-	default_options(sbi);
-
-	err = parse_options(sb, data);
-	if (unlikely(err))
-		return err;
-
-	if (!silent)
-		infoln("root inode @ nid %llu", ROOT_NID(sbi));
-
-	if (test_opt(sbi, POSIX_ACL))
-		sb->s_flags |= SB_POSIXACL;
-	else
-		sb->s_flags &= ~SB_POSIXACL;
-
-#ifdef CONFIG_EROFS_FS_ZIP
-	INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
-#endif
-
-	/* get the root inode */
-	inode = erofs_iget(sb, ROOT_NID(sbi), true);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-
-	if (unlikely(!S_ISDIR(inode->i_mode))) {
-		errln("rootino(nid %llu) is not a directory(i_mode %o)",
-		      ROOT_NID(sbi), inode->i_mode);
-		iput(inode);
-		return -EINVAL;
-	}
-
-	sb->s_root = d_make_root(inode);
-	if (unlikely(!sb->s_root))
-		return -ENOMEM;
-
-	erofs_shrinker_register(sb);
-	/* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
-	err = erofs_init_managed_cache(sb);
-	if (unlikely(err))
-		return err;
-
-	if (!silent)
-		infoln("mounted on %s with opts: %s.", sb->s_id, (char *)data);
-	return 0;
-}
-
-static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags,
-				  const char *dev_name, void *data)
-{
-	return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super);
-}
-
-/*
- * could be triggered after deactivate_locked_super()
- * is called, thus including umount and failed to initialize.
- */
-static void erofs_kill_sb(struct super_block *sb)
-{
-	struct erofs_sb_info *sbi;
-
-	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
-	infoln("unmounting for %s", sb->s_id);
-
-	kill_block_super(sb);
-
-	sbi = EROFS_SB(sb);
-	if (!sbi)
-		return;
-	kfree(sbi);
-	sb->s_fs_info = NULL;
-}
-
-/* called when ->s_root is non-NULL */
-static void erofs_put_super(struct super_block *sb)
-{
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-
-	DBG_BUGON(!sbi);
-
-	erofs_shrinker_unregister(sb);
-#ifdef CONFIG_EROFS_FS_ZIP
-	iput(sbi->managed_cache);
-	sbi->managed_cache = NULL;
-#endif
-}
-
-static struct file_system_type erofs_fs_type = {
-	.owner          = THIS_MODULE,
-	.name           = "erofs",
-	.mount          = erofs_mount,
-	.kill_sb        = erofs_kill_sb,
-	.fs_flags       = FS_REQUIRES_DEV,
-};
-MODULE_ALIAS_FS("erofs");
-
-static int __init erofs_module_init(void)
-{
-	int err;
-
-	erofs_check_ondisk_layout_definitions();
-	infoln("initializing erofs " EROFS_VERSION);
-
-	err = erofs_init_inode_cache();
-	if (err)
-		goto icache_err;
-
-	err = erofs_init_shrinker();
-	if (err)
-		goto shrinker_err;
-
-	err = z_erofs_init_zip_subsystem();
-	if (err)
-		goto zip_err;
-
-	err = register_filesystem(&erofs_fs_type);
-	if (err)
-		goto fs_err;
-
-	infoln("successfully to initialize erofs");
-	return 0;
-
-fs_err:
-	z_erofs_exit_zip_subsystem();
-zip_err:
-	erofs_exit_shrinker();
-shrinker_err:
-	erofs_exit_inode_cache();
-icache_err:
-	return err;
-}
-
-static void __exit erofs_module_exit(void)
-{
-	unregister_filesystem(&erofs_fs_type);
-	z_erofs_exit_zip_subsystem();
-	erofs_exit_shrinker();
-	erofs_exit_inode_cache();
-	infoln("successfully finalize erofs");
-}
-
-/* get filesystem statistics */
-static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	struct super_block *sb = dentry->d_sb;
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
-
-	buf->f_type = sb->s_magic;
-	buf->f_bsize = EROFS_BLKSIZ;
-	buf->f_blocks = sbi->blocks;
-	buf->f_bfree = buf->f_bavail = 0;
-
-	buf->f_files = ULLONG_MAX;
-	buf->f_ffree = ULLONG_MAX - sbi->inos;
-
-	buf->f_namelen = EROFS_NAME_LEN;
-
-	buf->f_fsid.val[0] = (u32)id;
-	buf->f_fsid.val[1] = (u32)(id >> 32);
-	return 0;
-}
-
-static int erofs_show_options(struct seq_file *seq, struct dentry *root)
-{
-	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
-
-#ifdef CONFIG_EROFS_FS_XATTR
-	if (test_opt(sbi, XATTR_USER))
-		seq_puts(seq, ",user_xattr");
-	else
-		seq_puts(seq, ",nouser_xattr");
-#endif
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-	if (test_opt(sbi, POSIX_ACL))
-		seq_puts(seq, ",acl");
-	else
-		seq_puts(seq, ",noacl");
-#endif
-	if (test_opt(sbi, FAULT_INJECTION))
-		seq_printf(seq, ",fault_injection=%u",
-			   erofs_get_fault_rate(sbi));
-#ifdef CONFIG_EROFS_FS_ZIP
-	if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) {
-		seq_puts(seq, ",cache_strategy=disabled");
-	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) {
-		seq_puts(seq, ",cache_strategy=readahead");
-	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) {
-		seq_puts(seq, ",cache_strategy=readaround");
-	} else {
-		seq_puts(seq, ",cache_strategy=(unknown)");
-		DBG_BUGON(1);
-	}
-#endif
-	return 0;
-}
-
-static int erofs_remount(struct super_block *sb, int *flags, char *data)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-	unsigned int org_mnt_opt = sbi->mount_opt;
-	unsigned int org_inject_rate = erofs_get_fault_rate(sbi);
-	int err;
-
-	DBG_BUGON(!sb_rdonly(sb));
-	err = parse_options(sb, data);
-	if (err)
-		goto out;
-
-	if (test_opt(sbi, POSIX_ACL))
-		sb->s_flags |= SB_POSIXACL;
-	else
-		sb->s_flags &= ~SB_POSIXACL;
-
-	*flags |= SB_RDONLY;
-	return 0;
-out:
-	__erofs_build_fault_attr(sbi, org_inject_rate);
-	sbi->mount_opt = org_mnt_opt;
-
-	return err;
-}
-
-const struct super_operations erofs_sops = {
-	.put_super = erofs_put_super,
-	.alloc_inode = alloc_inode,
-	.free_inode = free_inode,
-	.statfs = erofs_statfs,
-	.show_options = erofs_show_options,
-	.remount_fs = erofs_remount,
-};
-
-module_init(erofs_module_init);
-module_exit(erofs_module_exit);
-
-MODULE_DESCRIPTION("Enhanced ROM File System");
-MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/staging/erofs/tagptr.h b/drivers/staging/erofs/tagptr.h
deleted file mode 100644
index a72897c86744..000000000000
--- a/drivers/staging/erofs/tagptr.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * A tagged pointer implementation
- *
- * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_TAGPTR_H
-#define __EROFS_FS_TAGPTR_H
-
-#include <linux/types.h>
-#include <linux/build_bug.h>
-
-/*
- * the name of tagged pointer types are tagptr{1, 2, 3...}_t
- * avoid directly using the internal structs __tagptr{1, 2, 3...}
- */
-#define __MAKE_TAGPTR(n) \
-typedef struct __tagptr##n {	\
-	uintptr_t v;	\
-} tagptr##n##_t;
-
-__MAKE_TAGPTR(1)
-__MAKE_TAGPTR(2)
-__MAKE_TAGPTR(3)
-__MAKE_TAGPTR(4)
-
-#undef __MAKE_TAGPTR
-
-extern void __compiletime_error("bad tagptr tags")
-	__bad_tagptr_tags(void);
-
-extern void __compiletime_error("bad tagptr type")
-	__bad_tagptr_type(void);
-
-/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
-#define __tagptr_mask_1(ptr, n)	\
-	__builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
-		(1UL << (n)) - 1 :
-
-#define __tagptr_mask(ptr)	(\
-	__tagptr_mask_1(ptr, 1) ( \
-	__tagptr_mask_1(ptr, 2) ( \
-	__tagptr_mask_1(ptr, 3) ( \
-	__tagptr_mask_1(ptr, 4) ( \
-	__bad_tagptr_type(), 0)))))
-
-/* generate a tagged pointer from a raw value */
-#define tagptr_init(type, val) \
-	((typeof(type)){ .v = (uintptr_t)(val) })
-
-/*
- * directly cast a tagged pointer to the native pointer type, which
- * could be used for backward compatibility of existing code.
- */
-#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
-
-/* encode tagged pointers */
-#define tagptr_fold(type, ptr, _tags) ({ \
-	const typeof(_tags) tags = (_tags); \
-	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
-		__bad_tagptr_tags(); \
-tagptr_init(type, (uintptr_t)(ptr) | tags); })
-
-/* decode tagged pointers */
-#define tagptr_unfold_ptr(tptr) \
-	((void *)((tptr).v & ~__tagptr_mask(tptr)))
-
-#define tagptr_unfold_tags(tptr) \
-	((tptr).v & __tagptr_mask(tptr))
-
-/* operations for the tagger pointer */
-#define tagptr_eq(_tptr1, _tptr2) ({ \
-	typeof(_tptr1) tptr1 = (_tptr1); \
-	typeof(_tptr2) tptr2 = (_tptr2); \
-	(void)(&tptr1 == &tptr2); \
-(tptr1).v == (tptr2).v; })
-
-/* lock-free CAS operation */
-#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
-	typeof(_ptptr) ptptr = (_ptptr); \
-	typeof(_o) o = (_o); \
-	typeof(_n) n = (_n); \
-	(void)(&o == &n); \
-	(void)(&o == ptptr); \
-tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
-
-/* wrap WRITE_ONCE if atomic update is needed */
-#define tagptr_replace_tags(_ptptr, tags) ({ \
-	typeof(_ptptr) ptptr = (_ptptr); \
-	*ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
-*ptptr; })
-
-#define tagptr_set_tags(_ptptr, _tags) ({ \
-	typeof(_ptptr) ptptr = (_ptptr); \
-	const typeof(_tags) tags = (_tags); \
-	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
-		__bad_tagptr_tags(); \
-	ptptr->v |= tags; \
-*ptptr; })
-
-#define tagptr_clear_tags(_ptptr, _tags) ({ \
-	typeof(_ptptr) ptptr = (_ptptr); \
-	const typeof(_tags) tags = (_tags); \
-	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
-		__bad_tagptr_tags(); \
-	ptptr->v &= ~tags; \
-*ptptr; })
-
-#endif	/* __EROFS_FS_TAGPTR_H */
-
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
deleted file mode 100644
index 814c2ee037ae..000000000000
--- a/drivers/staging/erofs/utils.c
+++ /dev/null
@@ -1,335 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/utils.c
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "internal.h"
-#include <linux/pagevec.h>
-
-struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail)
-{
-	struct page *page;
-
-	if (!list_empty(pool)) {
-		page = lru_to_page(pool);
-		DBG_BUGON(page_ref_count(page) != 1);
-		list_del(&page->lru);
-	} else {
-		page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0);
-	}
-	return page;
-}
-
-#if (EROFS_PCPUBUF_NR_PAGES > 0)
-static struct {
-	u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
-} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
-
-void *erofs_get_pcpubuf(unsigned int pagenr)
-{
-	preempt_disable();
-	return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
-}
-#endif
-
-#ifdef CONFIG_EROFS_FS_ZIP
-/* global shrink count (for all mounted EROFS instances) */
-static atomic_long_t erofs_global_shrink_cnt;
-
-#define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
-#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
-
-static int erofs_workgroup_get(struct erofs_workgroup *grp)
-{
-	int o;
-
-repeat:
-	o = erofs_wait_on_workgroup_freezed(grp);
-	if (unlikely(o <= 0))
-		return -1;
-
-	if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
-		goto repeat;
-
-	/* decrease refcount paired by erofs_workgroup_put */
-	if (unlikely(o == 1))
-		atomic_long_dec(&erofs_global_shrink_cnt);
-	return 0;
-}
-
-struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
-					     pgoff_t index, bool *tag)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-	struct erofs_workgroup *grp;
-
-repeat:
-	rcu_read_lock();
-	grp = radix_tree_lookup(&sbi->workstn_tree, index);
-	if (grp) {
-		*tag = xa_pointer_tag(grp);
-		grp = xa_untag_pointer(grp);
-
-		if (erofs_workgroup_get(grp)) {
-			/* prefer to relax rcu read side */
-			rcu_read_unlock();
-			goto repeat;
-		}
-
-		DBG_BUGON(index != grp->index);
-	}
-	rcu_read_unlock();
-	return grp;
-}
-
-int erofs_register_workgroup(struct super_block *sb,
-			     struct erofs_workgroup *grp,
-			     bool tag)
-{
-	struct erofs_sb_info *sbi;
-	int err;
-
-	/* grp shouldn't be broken or used before */
-	if (unlikely(atomic_read(&grp->refcount) != 1)) {
-		DBG_BUGON(1);
-		return -EINVAL;
-	}
-
-	err = radix_tree_preload(GFP_NOFS);
-	if (err)
-		return err;
-
-	sbi = EROFS_SB(sb);
-	xa_lock(&sbi->workstn_tree);
-
-	grp = xa_tag_pointer(grp, tag);
-
-	/*
-	 * Bump up reference count before making this workgroup
-	 * visible to other users in order to avoid potential UAF
-	 * without serialized by workstn_lock.
-	 */
-	__erofs_workgroup_get(grp);
-
-	err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
-	if (unlikely(err))
-		/*
-		 * it's safe to decrease since the workgroup isn't visible
-		 * and refcount >= 2 (cannot be freezed).
-		 */
-		__erofs_workgroup_put(grp);
-
-	xa_unlock(&sbi->workstn_tree);
-	radix_tree_preload_end();
-	return err;
-}
-
-static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
-{
-	atomic_long_dec(&erofs_global_shrink_cnt);
-	erofs_workgroup_free_rcu(grp);
-}
-
-int erofs_workgroup_put(struct erofs_workgroup *grp)
-{
-	int count = atomic_dec_return(&grp->refcount);
-
-	if (count == 1)
-		atomic_long_inc(&erofs_global_shrink_cnt);
-	else if (!count)
-		__erofs_workgroup_free(grp);
-	return count;
-}
-
-static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
-{
-	erofs_workgroup_unfreeze(grp, 0);
-	__erofs_workgroup_free(grp);
-}
-
-static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
-					   struct erofs_workgroup *grp,
-					   bool cleanup)
-{
-	/*
-	 * If managed cache is on, refcount of workgroups
-	 * themselves could be < 0 (freezed). In other words,
-	 * there is no guarantee that all refcounts > 0.
-	 */
-	if (!erofs_workgroup_try_to_freeze(grp, 1))
-		return false;
-
-	/*
-	 * Note that all cached pages should be unattached
-	 * before deleted from the radix tree. Otherwise some
-	 * cached pages could be still attached to the orphan
-	 * old workgroup when the new one is available in the tree.
-	 */
-	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
-		erofs_workgroup_unfreeze(grp, 1);
-		return false;
-	}
-
-	/*
-	 * It's impossible to fail after the workgroup is freezed,
-	 * however in order to avoid some race conditions, add a
-	 * DBG_BUGON to observe this in advance.
-	 */
-	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
-						     grp->index)) != grp);
-
-	/*
-	 * If managed cache is on, last refcount should indicate
-	 * the related workstation.
-	 */
-	erofs_workgroup_unfreeze_final(grp);
-	return true;
-}
-
-static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
-					      unsigned long nr_shrink,
-					      bool cleanup)
-{
-	pgoff_t first_index = 0;
-	void *batch[PAGEVEC_SIZE];
-	unsigned int freed = 0;
-
-	int i, found;
-repeat:
-	xa_lock(&sbi->workstn_tree);
-
-	found = radix_tree_gang_lookup(&sbi->workstn_tree,
-				       batch, first_index, PAGEVEC_SIZE);
-
-	for (i = 0; i < found; ++i) {
-		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
-
-		first_index = grp->index + 1;
-
-		/* try to shrink each valid workgroup */
-		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
-			continue;
-
-		++freed;
-		if (unlikely(!--nr_shrink))
-			break;
-	}
-	xa_unlock(&sbi->workstn_tree);
-
-	if (i && nr_shrink)
-		goto repeat;
-	return freed;
-}
-
-/* protected by 'erofs_sb_list_lock' */
-static unsigned int shrinker_run_no;
-
-/* protects the mounted 'erofs_sb_list' */
-static DEFINE_SPINLOCK(erofs_sb_list_lock);
-static LIST_HEAD(erofs_sb_list);
-
-void erofs_shrinker_register(struct super_block *sb)
-{
-	struct erofs_sb_info *sbi = EROFS_SB(sb);
-
-	mutex_init(&sbi->umount_mutex);
-
-	spin_lock(&erofs_sb_list_lock);
-	list_add(&sbi->list, &erofs_sb_list);
-	spin_unlock(&erofs_sb_list_lock);
-}
-
-void erofs_shrinker_unregister(struct super_block *sb)
-{
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-
-	mutex_lock(&sbi->umount_mutex);
-	erofs_shrink_workstation(sbi, ~0UL, true);
-
-	spin_lock(&erofs_sb_list_lock);
-	list_del(&sbi->list);
-	spin_unlock(&erofs_sb_list_lock);
-	mutex_unlock(&sbi->umount_mutex);
-}
-
-static unsigned long erofs_shrink_count(struct shrinker *shrink,
-					struct shrink_control *sc)
-{
-	return atomic_long_read(&erofs_global_shrink_cnt);
-}
-
-static unsigned long erofs_shrink_scan(struct shrinker *shrink,
-				       struct shrink_control *sc)
-{
-	struct erofs_sb_info *sbi;
-	struct list_head *p;
-
-	unsigned long nr = sc->nr_to_scan;
-	unsigned int run_no;
-	unsigned long freed = 0;
-
-	spin_lock(&erofs_sb_list_lock);
-	do {
-		run_no = ++shrinker_run_no;
-	} while (run_no == 0);
-
-	/* Iterate over all mounted superblocks and try to shrink them */
-	p = erofs_sb_list.next;
-	while (p != &erofs_sb_list) {
-		sbi = list_entry(p, struct erofs_sb_info, list);
-
-		/*
-		 * We move the ones we do to the end of the list, so we stop
-		 * when we see one we have already done.
-		 */
-		if (sbi->shrinker_run_no == run_no)
-			break;
-
-		if (!mutex_trylock(&sbi->umount_mutex)) {
-			p = p->next;
-			continue;
-		}
-
-		spin_unlock(&erofs_sb_list_lock);
-		sbi->shrinker_run_no = run_no;
-
-		freed += erofs_shrink_workstation(sbi, nr, false);
-
-		spin_lock(&erofs_sb_list_lock);
-		/* Get the next list element before we move this one */
-		p = p->next;
-
-		/*
-		 * Move this one to the end of the list to provide some
-		 * fairness.
-		 */
-		list_move_tail(&sbi->list, &erofs_sb_list);
-		mutex_unlock(&sbi->umount_mutex);
-
-		if (freed >= nr)
-			break;
-	}
-	spin_unlock(&erofs_sb_list_lock);
-	return freed;
-}
-
-static struct shrinker erofs_shrinker_info = {
-	.scan_objects = erofs_shrink_scan,
-	.count_objects = erofs_shrink_count,
-	.seeks = DEFAULT_SEEKS,
-};
-
-int __init erofs_init_shrinker(void)
-{
-	return register_shrinker(&erofs_shrinker_info);
-}
-
-void erofs_exit_shrinker(void)
-{
-	unregister_shrinker(&erofs_shrinker_info);
-}
-#endif	/* !CONFIG_EROFS_FS_ZIP */
-
diff --git a/drivers/staging/erofs/xattr.c b/drivers/staging/erofs/xattr.c
deleted file mode 100644
index e7e5840e3f9d..000000000000
--- a/drivers/staging/erofs/xattr.c
+++ /dev/null
@@ -1,705 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/xattr.c
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include <linux/security.h>
-#include "xattr.h"
-
-struct xattr_iter {
-	struct super_block *sb;
-	struct page *page;
-	void *kaddr;
-
-	erofs_blk_t blkaddr;
-	unsigned int ofs;
-};
-
-static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
-{
-	/* the only user of kunmap() is 'init_inode_xattrs' */
-	if (unlikely(!atomic))
-		kunmap(it->page);
-	else
-		kunmap_atomic(it->kaddr);
-
-	unlock_page(it->page);
-	put_page(it->page);
-}
-
-static inline void xattr_iter_end_final(struct xattr_iter *it)
-{
-	if (!it->page)
-		return;
-
-	xattr_iter_end(it, true);
-}
-
-static int init_inode_xattrs(struct inode *inode)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct xattr_iter it;
-	unsigned int i;
-	struct erofs_xattr_ibody_header *ih;
-	struct super_block *sb;
-	struct erofs_sb_info *sbi;
-	bool atomic_map;
-	int ret = 0;
-
-	/* the most case is that xattrs of this inode are initialized. */
-	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
-		return 0;
-
-	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE))
-		return -ERESTARTSYS;
-
-	/* someone has initialized xattrs for us? */
-	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
-		goto out_unlock;
-
-	/*
-	 * bypass all xattr operations if ->xattr_isize is not greater than
-	 * sizeof(struct erofs_xattr_ibody_header), in detail:
-	 * 1) it is not enough to contain erofs_xattr_ibody_header then
-	 *    ->xattr_isize should be 0 (it means no xattr);
-	 * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk
-	 *    undefined right now (maybe use later with some new sb feature).
-	 */
-	if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
-		errln("xattr_isize %d of nid %llu is not supported yet",
-		      vi->xattr_isize, vi->nid);
-		ret = -EOPNOTSUPP;
-		goto out_unlock;
-	} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
-		if (unlikely(vi->xattr_isize)) {
-			errln("bogus xattr ibody @ nid %llu", vi->nid);
-			DBG_BUGON(1);
-			ret = -EFSCORRUPTED;
-			goto out_unlock;	/* xattr ondisk layout error */
-		}
-		ret = -ENOATTR;
-		goto out_unlock;
-	}
-
-	sb = inode->i_sb;
-	sbi = EROFS_SB(sb);
-	it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
-	it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
-
-	it.page = erofs_get_inline_page(inode, it.blkaddr);
-	if (IS_ERR(it.page)) {
-		ret = PTR_ERR(it.page);
-		goto out_unlock;
-	}
-
-	/* read in shared xattr array (non-atomic, see kmalloc below) */
-	it.kaddr = kmap(it.page);
-	atomic_map = false;
-
-	ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
-
-	vi->xattr_shared_count = ih->h_shared_count;
-	vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
-						sizeof(uint), GFP_KERNEL);
-	if (!vi->xattr_shared_xattrs) {
-		xattr_iter_end(&it, atomic_map);
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	/* let's skip ibody header */
-	it.ofs += sizeof(struct erofs_xattr_ibody_header);
-
-	for (i = 0; i < vi->xattr_shared_count; ++i) {
-		if (unlikely(it.ofs >= EROFS_BLKSIZ)) {
-			/* cannot be unaligned */
-			DBG_BUGON(it.ofs != EROFS_BLKSIZ);
-			xattr_iter_end(&it, atomic_map);
-
-			it.page = erofs_get_meta_page(sb, ++it.blkaddr,
-						      S_ISDIR(inode->i_mode));
-			if (IS_ERR(it.page)) {
-				kfree(vi->xattr_shared_xattrs);
-				vi->xattr_shared_xattrs = NULL;
-				ret = PTR_ERR(it.page);
-				goto out_unlock;
-			}
-
-			it.kaddr = kmap_atomic(it.page);
-			atomic_map = true;
-			it.ofs = 0;
-		}
-		vi->xattr_shared_xattrs[i] =
-			le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
-		it.ofs += sizeof(__le32);
-	}
-	xattr_iter_end(&it, atomic_map);
-
-	set_bit(EROFS_V_EA_INITED_BIT, &vi->flags);
-
-out_unlock:
-	clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags);
-	return ret;
-}
-
-/*
- * the general idea for these return values is
- * if    0 is returned, go on processing the current xattr;
- *       1 (> 0) is returned, skip this round to process the next xattr;
- *    -err (< 0) is returned, an error (maybe ENOXATTR) occurred
- *                            and need to be handled
- */
-struct xattr_iter_handlers {
-	int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
-	int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
-		    unsigned int len);
-	int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
-	void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
-		      unsigned int len);
-};
-
-static inline int xattr_iter_fixup(struct xattr_iter *it)
-{
-	if (it->ofs < EROFS_BLKSIZ)
-		return 0;
-
-	xattr_iter_end(it, true);
-
-	it->blkaddr += erofs_blknr(it->ofs);
-
-	it->page = erofs_get_meta_page(it->sb, it->blkaddr, false);
-	if (IS_ERR(it->page)) {
-		int err = PTR_ERR(it->page);
-
-		it->page = NULL;
-		return err;
-	}
-
-	it->kaddr = kmap_atomic(it->page);
-	it->ofs = erofs_blkoff(it->ofs);
-	return 0;
-}
-
-static int inline_xattr_iter_begin(struct xattr_iter *it,
-				   struct inode *inode)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
-	unsigned int xattr_header_sz, inline_xattr_ofs;
-
-	xattr_header_sz = inlinexattr_header_size(inode);
-	if (unlikely(xattr_header_sz >= vi->xattr_isize)) {
-		DBG_BUGON(xattr_header_sz > vi->xattr_isize);
-		return -ENOATTR;
-	}
-
-	inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
-
-	it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
-	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
-
-	it->page = erofs_get_inline_page(inode, it->blkaddr);
-	if (IS_ERR(it->page))
-		return PTR_ERR(it->page);
-
-	it->kaddr = kmap_atomic(it->page);
-	return vi->xattr_isize - xattr_header_sz;
-}
-
-/*
- * Regardless of success or failure, `xattr_foreach' will end up with
- * `ofs' pointing to the next xattr item rather than an arbitrary position.
- */
-static int xattr_foreach(struct xattr_iter *it,
-			 const struct xattr_iter_handlers *op,
-			 unsigned int *tlimit)
-{
-	struct erofs_xattr_entry entry;
-	unsigned int value_sz, processed, slice;
-	int err;
-
-	/* 0. fixup blkaddr, ofs, ipage */
-	err = xattr_iter_fixup(it);
-	if (err)
-		return err;
-
-	/*
-	 * 1. read xattr entry to the memory,
-	 *    since we do EROFS_XATTR_ALIGN
-	 *    therefore entry should be in the page
-	 */
-	entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
-	if (tlimit) {
-		unsigned int entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry);
-
-		/* xattr on-disk corruption: xattr entry beyond xattr_isize */
-		if (unlikely(*tlimit < entry_sz)) {
-			DBG_BUGON(1);
-			return -EFSCORRUPTED;
-		}
-		*tlimit -= entry_sz;
-	}
-
-	it->ofs += sizeof(struct erofs_xattr_entry);
-	value_sz = le16_to_cpu(entry.e_value_size);
-
-	/* handle entry */
-	err = op->entry(it, &entry);
-	if (err) {
-		it->ofs += entry.e_name_len + value_sz;
-		goto out;
-	}
-
-	/* 2. handle xattr name (ofs will finally be at the end of name) */
-	processed = 0;
-
-	while (processed < entry.e_name_len) {
-		if (it->ofs >= EROFS_BLKSIZ) {
-			DBG_BUGON(it->ofs > EROFS_BLKSIZ);
-
-			err = xattr_iter_fixup(it);
-			if (err)
-				goto out;
-			it->ofs = 0;
-		}
-
-		slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
-			      entry.e_name_len - processed);
-
-		/* handle name */
-		err = op->name(it, processed, it->kaddr + it->ofs, slice);
-		if (err) {
-			it->ofs += entry.e_name_len - processed + value_sz;
-			goto out;
-		}
-
-		it->ofs += slice;
-		processed += slice;
-	}
-
-	/* 3. handle xattr value */
-	processed = 0;
-
-	if (op->alloc_buffer) {
-		err = op->alloc_buffer(it, value_sz);
-		if (err) {
-			it->ofs += value_sz;
-			goto out;
-		}
-	}
-
-	while (processed < value_sz) {
-		if (it->ofs >= EROFS_BLKSIZ) {
-			DBG_BUGON(it->ofs > EROFS_BLKSIZ);
-
-			err = xattr_iter_fixup(it);
-			if (err)
-				goto out;
-			it->ofs = 0;
-		}
-
-		slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
-			      value_sz - processed);
-		op->value(it, processed, it->kaddr + it->ofs, slice);
-		it->ofs += slice;
-		processed += slice;
-	}
-
-out:
-	/* xattrs should be 4-byte aligned (on-disk constraint) */
-	it->ofs = EROFS_XATTR_ALIGN(it->ofs);
-	return err < 0 ? err : 0;
-}
-
-struct getxattr_iter {
-	struct xattr_iter it;
-
-	char *buffer;
-	int buffer_size, index;
-	struct qstr name;
-};
-
-static int xattr_entrymatch(struct xattr_iter *_it,
-			    struct erofs_xattr_entry *entry)
-{
-	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
-	return (it->index != entry->e_name_index ||
-		it->name.len != entry->e_name_len) ? -ENOATTR : 0;
-}
-
-static int xattr_namematch(struct xattr_iter *_it,
-			   unsigned int processed, char *buf, unsigned int len)
-{
-	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
-	return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0;
-}
-
-static int xattr_checkbuffer(struct xattr_iter *_it,
-			     unsigned int value_sz)
-{
-	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-	int err = it->buffer_size < value_sz ? -ERANGE : 0;
-
-	it->buffer_size = value_sz;
-	return !it->buffer ? 1 : err;
-}
-
-static void xattr_copyvalue(struct xattr_iter *_it,
-			    unsigned int processed,
-			    char *buf, unsigned int len)
-{
-	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
-
-	memcpy(it->buffer + processed, buf, len);
-}
-
-static const struct xattr_iter_handlers find_xattr_handlers = {
-	.entry = xattr_entrymatch,
-	.name = xattr_namematch,
-	.alloc_buffer = xattr_checkbuffer,
-	.value = xattr_copyvalue
-};
-
-static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
-{
-	int ret;
-	unsigned int remaining;
-
-	ret = inline_xattr_iter_begin(&it->it, inode);
-	if (ret < 0)
-		return ret;
-
-	remaining = ret;
-	while (remaining) {
-		ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
-		if (ret != -ENOATTR)
-			break;
-	}
-	xattr_iter_end_final(&it->it);
-
-	return ret ? ret : it->buffer_size;
-}
-
-static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct super_block *const sb = inode->i_sb;
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-	unsigned int i;
-	int ret = -ENOATTR;
-
-	for (i = 0; i < vi->xattr_shared_count; ++i) {
-		erofs_blk_t blkaddr =
-			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
-
-		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
-
-		if (!i || blkaddr != it->it.blkaddr) {
-			if (i)
-				xattr_iter_end(&it->it, true);
-
-			it->it.page = erofs_get_meta_page(sb, blkaddr, false);
-			if (IS_ERR(it->it.page))
-				return PTR_ERR(it->it.page);
-
-			it->it.kaddr = kmap_atomic(it->it.page);
-			it->it.blkaddr = blkaddr;
-		}
-
-		ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
-		if (ret != -ENOATTR)
-			break;
-	}
-	if (vi->xattr_shared_count)
-		xattr_iter_end_final(&it->it);
-
-	return ret ? ret : it->buffer_size;
-}
-
-static bool erofs_xattr_user_list(struct dentry *dentry)
-{
-	return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER);
-}
-
-static bool erofs_xattr_trusted_list(struct dentry *dentry)
-{
-	return capable(CAP_SYS_ADMIN);
-}
-
-int erofs_getxattr(struct inode *inode, int index,
-		   const char *name,
-		   void *buffer, size_t buffer_size)
-{
-	int ret;
-	struct getxattr_iter it;
-
-	if (unlikely(!name))
-		return -EINVAL;
-
-	ret = init_inode_xattrs(inode);
-	if (ret)
-		return ret;
-
-	it.index = index;
-
-	it.name.len = strlen(name);
-	if (it.name.len > EROFS_NAME_LEN)
-		return -ERANGE;
-	it.name.name = name;
-
-	it.buffer = buffer;
-	it.buffer_size = buffer_size;
-
-	it.it.sb = inode->i_sb;
-	ret = inline_getxattr(inode, &it);
-	if (ret == -ENOATTR)
-		ret = shared_getxattr(inode, &it);
-	return ret;
-}
-
-static int erofs_xattr_generic_get(const struct xattr_handler *handler,
-				   struct dentry *unused, struct inode *inode,
-				   const char *name, void *buffer, size_t size)
-{
-	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-	switch (handler->flags) {
-	case EROFS_XATTR_INDEX_USER:
-		if (!test_opt(sbi, XATTR_USER))
-			return -EOPNOTSUPP;
-		break;
-	case EROFS_XATTR_INDEX_TRUSTED:
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-		break;
-	case EROFS_XATTR_INDEX_SECURITY:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return erofs_getxattr(inode, handler->flags, name, buffer, size);
-}
-
-const struct xattr_handler erofs_xattr_user_handler = {
-	.prefix	= XATTR_USER_PREFIX,
-	.flags	= EROFS_XATTR_INDEX_USER,
-	.list	= erofs_xattr_user_list,
-	.get	= erofs_xattr_generic_get,
-};
-
-const struct xattr_handler erofs_xattr_trusted_handler = {
-	.prefix	= XATTR_TRUSTED_PREFIX,
-	.flags	= EROFS_XATTR_INDEX_TRUSTED,
-	.list	= erofs_xattr_trusted_list,
-	.get	= erofs_xattr_generic_get,
-};
-
-#ifdef CONFIG_EROFS_FS_SECURITY
-const struct xattr_handler __maybe_unused erofs_xattr_security_handler = {
-	.prefix	= XATTR_SECURITY_PREFIX,
-	.flags	= EROFS_XATTR_INDEX_SECURITY,
-	.get	= erofs_xattr_generic_get,
-};
-#endif
-
-const struct xattr_handler *erofs_xattr_handlers[] = {
-	&erofs_xattr_user_handler,
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-	&posix_acl_access_xattr_handler,
-	&posix_acl_default_xattr_handler,
-#endif
-	&erofs_xattr_trusted_handler,
-#ifdef CONFIG_EROFS_FS_SECURITY
-	&erofs_xattr_security_handler,
-#endif
-	NULL,
-};
-
-struct listxattr_iter {
-	struct xattr_iter it;
-
-	struct dentry *dentry;
-	char *buffer;
-	int buffer_size, buffer_ofs;
-};
-
-static int xattr_entrylist(struct xattr_iter *_it,
-			   struct erofs_xattr_entry *entry)
-{
-	struct listxattr_iter *it =
-		container_of(_it, struct listxattr_iter, it);
-	unsigned int prefix_len;
-	const char *prefix;
-
-	const struct xattr_handler *h =
-		erofs_xattr_handler(entry->e_name_index);
-
-	if (!h || (h->list && !h->list(it->dentry)))
-		return 1;
-
-	prefix = xattr_prefix(h);
-	prefix_len = strlen(prefix);
-
-	if (!it->buffer) {
-		it->buffer_ofs += prefix_len + entry->e_name_len + 1;
-		return 1;
-	}
-
-	if (it->buffer_ofs + prefix_len
-		+ entry->e_name_len + 1 > it->buffer_size)
-		return -ERANGE;
-
-	memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
-	it->buffer_ofs += prefix_len;
-	return 0;
-}
-
-static int xattr_namelist(struct xattr_iter *_it,
-			  unsigned int processed, char *buf, unsigned int len)
-{
-	struct listxattr_iter *it =
-		container_of(_it, struct listxattr_iter, it);
-
-	memcpy(it->buffer + it->buffer_ofs, buf, len);
-	it->buffer_ofs += len;
-	return 0;
-}
-
-static int xattr_skipvalue(struct xattr_iter *_it,
-			   unsigned int value_sz)
-{
-	struct listxattr_iter *it =
-		container_of(_it, struct listxattr_iter, it);
-
-	it->buffer[it->buffer_ofs++] = '\0';
-	return 1;
-}
-
-static const struct xattr_iter_handlers list_xattr_handlers = {
-	.entry = xattr_entrylist,
-	.name = xattr_namelist,
-	.alloc_buffer = xattr_skipvalue,
-	.value = NULL
-};
-
-static int inline_listxattr(struct listxattr_iter *it)
-{
-	int ret;
-	unsigned int remaining;
-
-	ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
-	if (ret < 0)
-		return ret;
-
-	remaining = ret;
-	while (remaining) {
-		ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
-		if (ret)
-			break;
-	}
-	xattr_iter_end_final(&it->it);
-	return ret ? ret : it->buffer_ofs;
-}
-
-static int shared_listxattr(struct listxattr_iter *it)
-{
-	struct inode *const inode = d_inode(it->dentry);
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct super_block *const sb = inode->i_sb;
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-	unsigned int i;
-	int ret = 0;
-
-	for (i = 0; i < vi->xattr_shared_count; ++i) {
-		erofs_blk_t blkaddr =
-			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
-
-		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
-		if (!i || blkaddr != it->it.blkaddr) {
-			if (i)
-				xattr_iter_end(&it->it, true);
-
-			it->it.page = erofs_get_meta_page(sb, blkaddr, false);
-			if (IS_ERR(it->it.page))
-				return PTR_ERR(it->it.page);
-
-			it->it.kaddr = kmap_atomic(it->it.page);
-			it->it.blkaddr = blkaddr;
-		}
-
-		ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
-		if (ret)
-			break;
-	}
-	if (vi->xattr_shared_count)
-		xattr_iter_end_final(&it->it);
-
-	return ret ? ret : it->buffer_ofs;
-}
-
-ssize_t erofs_listxattr(struct dentry *dentry,
-			char *buffer, size_t buffer_size)
-{
-	int ret;
-	struct listxattr_iter it;
-
-	ret = init_inode_xattrs(d_inode(dentry));
-	if (ret)
-		return ret;
-
-	it.dentry = dentry;
-	it.buffer = buffer;
-	it.buffer_size = buffer_size;
-	it.buffer_ofs = 0;
-
-	it.it.sb = dentry->d_sb;
-
-	ret = inline_listxattr(&it);
-	if (ret < 0 && ret != -ENOATTR)
-		return ret;
-	return shared_listxattr(&it);
-}
-
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-struct posix_acl *erofs_get_acl(struct inode *inode, int type)
-{
-	struct posix_acl *acl;
-	int prefix, rc;
-	char *value = NULL;
-
-	switch (type) {
-	case ACL_TYPE_ACCESS:
-		prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS;
-		break;
-	case ACL_TYPE_DEFAULT:
-		prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-
-	rc = erofs_getxattr(inode, prefix, "", NULL, 0);
-	if (rc > 0) {
-		value = kmalloc(rc, GFP_KERNEL);
-		if (!value)
-			return ERR_PTR(-ENOMEM);
-		rc = erofs_getxattr(inode, prefix, "", value, rc);
-	}
-
-	if (rc == -ENOATTR)
-		acl = NULL;
-	else if (rc < 0)
-		acl = ERR_PTR(rc);
-	else
-		acl = posix_acl_from_xattr(&init_user_ns, value, rc);
-	kfree(value);
-	return acl;
-}
-#endif
-
diff --git a/drivers/staging/erofs/xattr.h b/drivers/staging/erofs/xattr.h
deleted file mode 100644
index e20249647541..000000000000
--- a/drivers/staging/erofs/xattr.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/xattr.h
- *
- * Copyright (C) 2017-2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_XATTR_H
-#define __EROFS_XATTR_H
-
-#include "internal.h"
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-/* Attribute not found */
-#define ENOATTR         ENODATA
-
-static inline unsigned int inlinexattr_header_size(struct inode *inode)
-{
-	return sizeof(struct erofs_xattr_ibody_header)
-		+ sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
-}
-
-static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi,
-					  unsigned int xattr_id)
-{
-#ifdef CONFIG_EROFS_FS_XATTR
-	return sbi->xattr_blkaddr +
-		xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
-#else
-	return 0;
-#endif
-}
-
-static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi,
-					     unsigned int xattr_id)
-{
-	return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
-}
-
-#ifdef CONFIG_EROFS_FS_XATTR
-extern const struct xattr_handler erofs_xattr_user_handler;
-extern const struct xattr_handler erofs_xattr_trusted_handler;
-#ifdef CONFIG_EROFS_FS_SECURITY
-extern const struct xattr_handler erofs_xattr_security_handler;
-#endif
-
-static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx)
-{
-static const struct xattr_handler *xattr_handler_map[] = {
-	[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-	[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
-	[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
-		&posix_acl_default_xattr_handler,
-#endif
-	[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
-#ifdef CONFIG_EROFS_FS_SECURITY
-	[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
-#endif
-};
-
-	return idx && idx < ARRAY_SIZE(xattr_handler_map) ?
-		xattr_handler_map[idx] : NULL;
-}
-
-extern const struct xattr_handler *erofs_xattr_handlers[];
-
-int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
-ssize_t erofs_listxattr(struct dentry *, char *, size_t);
-#else
-static inline int erofs_getxattr(struct inode *inode, int index,
-				 const char *name, void *buffer,
-				 size_t buffer_size)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline ssize_t erofs_listxattr(struct dentry *dentry,
-				      char *buffer, size_t buffer_size)
-{
-	return -EOPNOTSUPP;
-}
-#endif	/* !CONFIG_EROFS_FS_XATTR */
-
-#ifdef CONFIG_EROFS_FS_POSIX_ACL
-struct posix_acl *erofs_get_acl(struct inode *inode, int type);
-#else
-#define erofs_get_acl	(NULL)
-#endif
-
-#endif
-
diff --git a/drivers/staging/erofs/zdata.c b/drivers/staging/erofs/zdata.c
deleted file mode 100644
index 60d7c20db87d..000000000000
--- a/drivers/staging/erofs/zdata.c
+++ /dev/null
@@ -1,1434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/zdata.c
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "zdata.h"
-#include "compress.h"
-#include <linux/prefetch.h>
-
-#include <trace/events/erofs.h>
-
-/*
- * a compressed_pages[] placeholder in order to avoid
- * being filled with file pages for in-place decompression.
- */
-#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
-
-/* how to allocate cached pages for a pcluster */
-enum z_erofs_cache_alloctype {
-	DONTALLOC,	/* don't allocate any cached pages */
-	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
-};
-
-/*
- * tagged pointer with 1-bit tag for all compressed pages
- * tag 0 - the page is just found with an extra page reference
- */
-typedef tagptr1_t compressed_page_t;
-
-#define tag_compressed_page_justfound(page) \
-	tagptr_fold(compressed_page_t, page, 1)
-
-static struct workqueue_struct *z_erofs_workqueue __read_mostly;
-static struct kmem_cache *pcluster_cachep __read_mostly;
-
-void z_erofs_exit_zip_subsystem(void)
-{
-	destroy_workqueue(z_erofs_workqueue);
-	kmem_cache_destroy(pcluster_cachep);
-}
-
-static inline int init_unzip_workqueue(void)
-{
-	const unsigned int onlinecpus = num_possible_cpus();
-	const unsigned int flags = WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE;
-
-	/*
-	 * no need to spawn too many threads, limiting threads could minimum
-	 * scheduling overhead, perhaps per-CPU threads should be better?
-	 */
-	z_erofs_workqueue = alloc_workqueue("erofs_unzipd", flags,
-					    onlinecpus + onlinecpus / 4);
-	return z_erofs_workqueue ? 0 : -ENOMEM;
-}
-
-static void init_once(void *ptr)
-{
-	struct z_erofs_pcluster *pcl = ptr;
-	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-	unsigned int i;
-
-	mutex_init(&cl->lock);
-	cl->nr_pages = 0;
-	cl->vcnt = 0;
-	for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
-		pcl->compressed_pages[i] = NULL;
-}
-
-static void init_always(struct z_erofs_pcluster *pcl)
-{
-	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
-
-	atomic_set(&pcl->obj.refcount, 1);
-
-	DBG_BUGON(cl->nr_pages);
-	DBG_BUGON(cl->vcnt);
-}
-
-int __init z_erofs_init_zip_subsystem(void)
-{
-	pcluster_cachep = kmem_cache_create("erofs_compress",
-					    Z_EROFS_WORKGROUP_SIZE, 0,
-					    SLAB_RECLAIM_ACCOUNT, init_once);
-	if (pcluster_cachep) {
-		if (!init_unzip_workqueue())
-			return 0;
-
-		kmem_cache_destroy(pcluster_cachep);
-	}
-	return -ENOMEM;
-}
-
-enum z_erofs_collectmode {
-	COLLECT_SECONDARY,
-	COLLECT_PRIMARY,
-	/*
-	 * The current collection was the tail of an exist chain, in addition
-	 * that the previous processed chained collections are all decided to
-	 * be hooked up to it.
-	 * A new chain will be created for the remaining collections which are
-	 * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
-	 * the next collection cannot reuse the whole page safely in
-	 * the following scenario:
-	 *  ________________________________________________________________
-	 * |      tail (partial) page     |       head (partial) page       |
-	 * |   (belongs to the next cl)   |   (belongs to the current cl)   |
-	 * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
-	 */
-	COLLECT_PRIMARY_HOOKED,
-	COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
-	/*
-	 * The current collection has been linked with the owned chain, and
-	 * could also be linked with the remaining collections, which means
-	 * if the processing page is the tail page of the collection, thus
-	 * the current collection can safely use the whole page (since
-	 * the previous collection is under control) for in-place I/O, as
-	 * illustrated below:
-	 *  ________________________________________________________________
-	 * |  tail (partial) page |          head (partial) page           |
-	 * |  (of the current cl) |      (of the previous collection)      |
-	 * |  PRIMARY_FOLLOWED or |                                        |
-	 * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
-	 *
-	 * [  (*) the above page can be used as inplace I/O.               ]
-	 */
-	COLLECT_PRIMARY_FOLLOWED,
-};
-
-struct z_erofs_collector {
-	struct z_erofs_pagevec_ctor vector;
-
-	struct z_erofs_pcluster *pcl, *tailpcl;
-	struct z_erofs_collection *cl;
-	struct page **compressedpages;
-	z_erofs_next_pcluster_t owned_head;
-
-	enum z_erofs_collectmode mode;
-};
-
-struct z_erofs_decompress_frontend {
-	struct inode *const inode;
-
-	struct z_erofs_collector clt;
-	struct erofs_map_blocks map;
-
-	/* used for applying cache strategy on the fly */
-	bool backmost;
-	erofs_off_t headoffset;
-};
-
-#define COLLECTOR_INIT() { \
-	.owned_head = Z_EROFS_PCLUSTER_TAIL, \
-	.mode = COLLECT_PRIMARY_FOLLOWED }
-
-#define DECOMPRESS_FRONTEND_INIT(__i) { \
-	.inode = __i, .clt = COLLECTOR_INIT(), \
-	.backmost = true, }
-
-static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
-static DEFINE_MUTEX(z_pagemap_global_lock);
-
-static void preload_compressed_pages(struct z_erofs_collector *clt,
-				     struct address_space *mc,
-				     enum z_erofs_cache_alloctype type,
-				     struct list_head *pagepool)
-{
-	const struct z_erofs_pcluster *pcl = clt->pcl;
-	const unsigned int clusterpages = BIT(pcl->clusterbits);
-	struct page **pages = clt->compressedpages;
-	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
-	bool standalone = true;
-
-	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
-		return;
-
-	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
-		struct page *page;
-		compressed_page_t t;
-
-		/* the compressed page was loaded before */
-		if (READ_ONCE(*pages))
-			continue;
-
-		page = find_get_page(mc, index);
-
-		if (page) {
-			t = tag_compressed_page_justfound(page);
-		} else if (type == DELAYEDALLOC) {
-			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
-		} else {	/* DONTALLOC */
-			if (standalone)
-				clt->compressedpages = pages;
-			standalone = false;
-			continue;
-		}
-
-		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
-			continue;
-
-		if (page)
-			put_page(page);
-	}
-
-	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
-		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
-}
-
-/* called by erofs_shrinker to get rid of all compressed_pages */
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
-				       struct erofs_workgroup *grp)
-{
-	struct z_erofs_pcluster *const pcl =
-		container_of(grp, struct z_erofs_pcluster, obj);
-	struct address_space *const mapping = MNGD_MAPPING(sbi);
-	const unsigned int clusterpages = BIT(pcl->clusterbits);
-	int i;
-
-	/*
-	 * refcount of workgroup is now freezed as 1,
-	 * therefore no need to worry about available decompression users.
-	 */
-	for (i = 0; i < clusterpages; ++i) {
-		struct page *page = pcl->compressed_pages[i];
-
-		if (!page)
-			continue;
-
-		/* block other users from reclaiming or migrating the page */
-		if (!trylock_page(page))
-			return -EBUSY;
-
-		if (unlikely(page->mapping != mapping))
-			continue;
-
-		/* barrier is implied in the following 'unlock_page' */
-		WRITE_ONCE(pcl->compressed_pages[i], NULL);
-		set_page_private(page, 0);
-		ClearPagePrivate(page);
-
-		unlock_page(page);
-		put_page(page);
-	}
-	return 0;
-}
-
-int erofs_try_to_free_cached_page(struct address_space *mapping,
-				  struct page *page)
-{
-	struct z_erofs_pcluster *const pcl = (void *)page_private(page);
-	const unsigned int clusterpages = BIT(pcl->clusterbits);
-	int ret = 0;	/* 0 - busy */
-
-	if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
-		unsigned int i;
-
-		for (i = 0; i < clusterpages; ++i) {
-			if (pcl->compressed_pages[i] == page) {
-				WRITE_ONCE(pcl->compressed_pages[i], NULL);
-				ret = 1;
-				break;
-			}
-		}
-		erofs_workgroup_unfreeze(&pcl->obj, 1);
-
-		if (ret) {
-			ClearPagePrivate(page);
-			put_page(page);
-		}
-	}
-	return ret;
-}
-
-/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
-static inline bool try_inplace_io(struct z_erofs_collector *clt,
-				  struct page *page)
-{
-	struct z_erofs_pcluster *const pcl = clt->pcl;
-	const unsigned int clusterpages = BIT(pcl->clusterbits);
-
-	while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
-		if (!cmpxchg(clt->compressedpages++, NULL, page))
-			return true;
-	}
-	return false;
-}
-
-/* callers must be with collection lock held */
-static int z_erofs_attach_page(struct z_erofs_collector *clt,
-			       struct page *page,
-			       enum z_erofs_page_type type)
-{
-	int ret;
-	bool occupied;
-
-	/* give priority for inplaceio */
-	if (clt->mode >= COLLECT_PRIMARY &&
-	    type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
-	    try_inplace_io(clt, page))
-		return 0;
-
-	ret = z_erofs_pagevec_enqueue(&clt->vector,
-				      page, type, &occupied);
-	clt->cl->vcnt += (unsigned int)ret;
-
-	return ret ? 0 : -EAGAIN;
-}
-
-static enum z_erofs_collectmode
-try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
-		      z_erofs_next_pcluster_t *owned_head)
-{
-	/* let's claim these following types of pclusters */
-retry:
-	if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
-		/* type 1, nil pcluster */
-		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
-			    *owned_head) != Z_EROFS_PCLUSTER_NIL)
-			goto retry;
-
-		*owned_head = &pcl->next;
-		/* lucky, I am the followee :) */
-		return COLLECT_PRIMARY_FOLLOWED;
-	} else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
-		/*
-		 * type 2, link to the end of a existing open chain,
-		 * be careful that its submission itself is governed
-		 * by the original owned chain.
-		 */
-		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
-			    *owned_head) != Z_EROFS_PCLUSTER_TAIL)
-			goto retry;
-		*owned_head = Z_EROFS_PCLUSTER_TAIL;
-		return COLLECT_PRIMARY_HOOKED;
-	}
-	return COLLECT_PRIMARY;	/* :( better luck next time */
-}
-
-static struct z_erofs_collection *cllookup(struct z_erofs_collector *clt,
-					   struct inode *inode,
-					   struct erofs_map_blocks *map)
-{
-	struct erofs_workgroup *grp;
-	struct z_erofs_pcluster *pcl;
-	struct z_erofs_collection *cl;
-	unsigned int length;
-	bool tag;
-
-	grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT, &tag);
-	if (!grp)
-		return NULL;
-
-	pcl = container_of(grp, struct z_erofs_pcluster, obj);
-	if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
-		DBG_BUGON(1);
-		erofs_workgroup_put(grp);
-		return ERR_PTR(-EFSCORRUPTED);
-	}
-
-	cl = z_erofs_primarycollection(pcl);
-	if (unlikely(cl->pageofs != (map->m_la & ~PAGE_MASK))) {
-		DBG_BUGON(1);
-		erofs_workgroup_put(grp);
-		return ERR_PTR(-EFSCORRUPTED);
-	}
-
-	length = READ_ONCE(pcl->length);
-	if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
-		if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
-			DBG_BUGON(1);
-			erofs_workgroup_put(grp);
-			return ERR_PTR(-EFSCORRUPTED);
-		}
-	} else {
-		unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
-
-		if (map->m_flags & EROFS_MAP_FULL_MAPPED)
-			llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
-
-		while (llen > length &&
-		       length != cmpxchg_relaxed(&pcl->length, length, llen)) {
-			cpu_relax();
-			length = READ_ONCE(pcl->length);
-		}
-	}
-	mutex_lock(&cl->lock);
-	/* used to check tail merging loop due to corrupted images */
-	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
-		clt->tailpcl = pcl;
-	clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
-	/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
-	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
-		clt->tailpcl = NULL;
-	clt->pcl = pcl;
-	clt->cl = cl;
-	return cl;
-}
-
-static struct z_erofs_collection *clregister(struct z_erofs_collector *clt,
-					     struct inode *inode,
-					     struct erofs_map_blocks *map)
-{
-	struct z_erofs_pcluster *pcl;
-	struct z_erofs_collection *cl;
-	int err;
-
-	/* no available workgroup, let's allocate one */
-	pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
-	if (unlikely(!pcl))
-		return ERR_PTR(-ENOMEM);
-
-	init_always(pcl);
-	pcl->obj.index = map->m_pa >> PAGE_SHIFT;
-
-	pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
-		(map->m_flags & EROFS_MAP_FULL_MAPPED ?
-			Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
-
-	if (map->m_flags & EROFS_MAP_ZIPPED)
-		pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
-	else
-		pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
-
-	pcl->clusterbits = EROFS_V(inode)->z_physical_clusterbits[0];
-	pcl->clusterbits -= PAGE_SHIFT;
-
-	/* new pclusters should be claimed as type 1, primary and followed */
-	pcl->next = clt->owned_head;
-	clt->mode = COLLECT_PRIMARY_FOLLOWED;
-
-	cl = z_erofs_primarycollection(pcl);
-	cl->pageofs = map->m_la & ~PAGE_MASK;
-
-	/*
-	 * lock all primary followed works before visible to others
-	 * and mutex_trylock *never* fails for a new pcluster.
-	 */
-	mutex_trylock(&cl->lock);
-
-	err = erofs_register_workgroup(inode->i_sb, &pcl->obj, 0);
-	if (err) {
-		mutex_unlock(&cl->lock);
-		kmem_cache_free(pcluster_cachep, pcl);
-		return ERR_PTR(-EAGAIN);
-	}
-	/* used to check tail merging loop due to corrupted images */
-	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
-		clt->tailpcl = pcl;
-	clt->owned_head = &pcl->next;
-	clt->pcl = pcl;
-	clt->cl = cl;
-	return cl;
-}
-
-static int z_erofs_collector_begin(struct z_erofs_collector *clt,
-				   struct inode *inode,
-				   struct erofs_map_blocks *map)
-{
-	struct z_erofs_collection *cl;
-
-	DBG_BUGON(clt->cl);
-
-	/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
-	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
-	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
-
-	if (!PAGE_ALIGNED(map->m_pa)) {
-		DBG_BUGON(1);
-		return -EINVAL;
-	}
-
-repeat:
-	cl = cllookup(clt, inode, map);
-	if (!cl) {
-		cl = clregister(clt, inode, map);
-
-		if (unlikely(cl == ERR_PTR(-EAGAIN)))
-			goto repeat;
-	}
-
-	if (IS_ERR(cl))
-		return PTR_ERR(cl);
-
-	z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
-				  cl->pagevec, cl->vcnt);
-
-	clt->compressedpages = clt->pcl->compressed_pages;
-	if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
-		clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
-	return 0;
-}
-
-/*
- * keep in mind that no referenced pclusters will be freed
- * only after a RCU grace period.
- */
-static void z_erofs_rcu_callback(struct rcu_head *head)
-{
-	struct z_erofs_collection *const cl =
-		container_of(head, struct z_erofs_collection, rcu);
-
-	kmem_cache_free(pcluster_cachep,
-			container_of(cl, struct z_erofs_pcluster,
-				     primary_collection));
-}
-
-void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
-{
-	struct z_erofs_pcluster *const pcl =
-		container_of(grp, struct z_erofs_pcluster, obj);
-	struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
-
-	call_rcu(&cl->rcu, z_erofs_rcu_callback);
-}
-
-static void z_erofs_collection_put(struct z_erofs_collection *cl)
-{
-	struct z_erofs_pcluster *const pcl =
-		container_of(cl, struct z_erofs_pcluster, primary_collection);
-
-	erofs_workgroup_put(&pcl->obj);
-}
-
-static bool z_erofs_collector_end(struct z_erofs_collector *clt)
-{
-	struct z_erofs_collection *cl = clt->cl;
-
-	if (!cl)
-		return false;
-
-	z_erofs_pagevec_ctor_exit(&clt->vector, false);
-	mutex_unlock(&cl->lock);
-
-	/*
-	 * if all pending pages are added, don't hold its reference
-	 * any longer if the pcluster isn't hosted by ourselves.
-	 */
-	if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
-		z_erofs_collection_put(cl);
-
-	clt->cl = NULL;
-	return true;
-}
-
-static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
-					       gfp_t gfp)
-{
-	struct page *page = erofs_allocpage(pagepool, gfp, true);
-
-	page->mapping = Z_EROFS_MAPPING_STAGING;
-	return page;
-}
-
-static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
-				       unsigned int cachestrategy,
-				       erofs_off_t la)
-{
-	if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
-		return false;
-
-	if (fe->backmost)
-		return true;
-
-	return cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
-		la < fe->headoffset;
-}
-
-static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
-				struct page *page,
-				struct list_head *pagepool)
-{
-	struct inode *const inode = fe->inode;
-	struct erofs_sb_info *const sbi __maybe_unused = EROFS_I_SB(inode);
-	struct erofs_map_blocks *const map = &fe->map;
-	struct z_erofs_collector *const clt = &fe->clt;
-	const loff_t offset = page_offset(page);
-	bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED);
-
-	enum z_erofs_cache_alloctype cache_strategy;
-	enum z_erofs_page_type page_type;
-	unsigned int cur, end, spiltted, index;
-	int err = 0;
-
-	/* register locked file pages as online pages in pack */
-	z_erofs_onlinepage_init(page);
-
-	spiltted = 0;
-	end = PAGE_SIZE;
-repeat:
-	cur = end - 1;
-
-	/* lucky, within the range of the current map_blocks */
-	if (offset + cur >= map->m_la &&
-	    offset + cur < map->m_la + map->m_llen) {
-		/* didn't get a valid collection previously (very rare) */
-		if (!clt->cl)
-			goto restart_now;
-		goto hitted;
-	}
-
-	/* go ahead the next map_blocks */
-	debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
-
-	if (z_erofs_collector_end(clt))
-		fe->backmost = false;
-
-	map->m_la = offset + cur;
-	map->m_llen = 0;
-	err = z_erofs_map_blocks_iter(inode, map, 0);
-	if (unlikely(err))
-		goto err_out;
-
-restart_now:
-	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
-		goto hitted;
-
-	err = z_erofs_collector_begin(clt, inode, map);
-	if (unlikely(err))
-		goto err_out;
-
-	/* preload all compressed pages (maybe downgrade role if necessary) */
-	if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la))
-		cache_strategy = DELAYEDALLOC;
-	else
-		cache_strategy = DONTALLOC;
-
-	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
-				 cache_strategy, pagepool);
-
-	tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED);
-hitted:
-	cur = end - min_t(unsigned int, offset + end - map->m_la, end);
-	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
-		zero_user_segment(page, cur, end);
-		goto next_part;
-	}
-
-	/* let's derive page type */
-	page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
-		(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-			(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
-
-	if (cur)
-		tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
-
-retry:
-	err = z_erofs_attach_page(clt, page, page_type);
-	/* should allocate an additional staging page for pagevec */
-	if (err == -EAGAIN) {
-		struct page *const newpage =
-			__stagingpage_alloc(pagepool, GFP_NOFS);
-
-		err = z_erofs_attach_page(clt, newpage,
-					  Z_EROFS_PAGE_TYPE_EXCLUSIVE);
-		if (likely(!err))
-			goto retry;
-	}
-
-	if (unlikely(err))
-		goto err_out;
-
-	index = page->index - (map->m_la >> PAGE_SHIFT);
-
-	z_erofs_onlinepage_fixup(page, index, true);
-
-	/* bump up the number of spiltted parts of a page */
-	++spiltted;
-	/* also update nr_pages */
-	clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
-next_part:
-	/* can be used for verification */
-	map->m_llen = offset + cur - map->m_la;
-
-	end = cur;
-	if (end > 0)
-		goto repeat;
-
-out:
-	z_erofs_onlinepage_endio(page);
-
-	debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
-		__func__, page, spiltted, map->m_llen);
-	return err;
-
-	/* if some error occurred while processing this page */
-err_out:
-	SetPageError(page);
-	goto out;
-}
-
-static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
-{
-	tagptr1_t t = tagptr_init(tagptr1_t, ptr);
-	struct z_erofs_unzip_io *io = tagptr_unfold_ptr(t);
-	bool background = tagptr_unfold_tags(t);
-
-	if (!background) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&io->u.wait.lock, flags);
-		if (!atomic_add_return(bios, &io->pending_bios))
-			wake_up_locked(&io->u.wait);
-		spin_unlock_irqrestore(&io->u.wait.lock, flags);
-		return;
-	}
-
-	if (!atomic_add_return(bios, &io->pending_bios))
-		queue_work(z_erofs_workqueue, &io->u.work);
-}
-
-static inline void z_erofs_vle_read_endio(struct bio *bio)
-{
-	struct erofs_sb_info *sbi = NULL;
-	blk_status_t err = bio->bi_status;
-	struct bio_vec *bvec;
-	struct bvec_iter_all iter_all;
-
-	bio_for_each_segment_all(bvec, bio, iter_all) {
-		struct page *page = bvec->bv_page;
-		bool cachemngd = false;
-
-		DBG_BUGON(PageUptodate(page));
-		DBG_BUGON(!page->mapping);
-
-		if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
-			sbi = EROFS_SB(page->mapping->host->i_sb);
-
-			if (time_to_inject(sbi, FAULT_READ_IO)) {
-				erofs_show_injection_info(FAULT_READ_IO);
-				err = BLK_STS_IOERR;
-			}
-		}
-
-		/* sbi should already be gotten if the page is managed */
-		if (sbi)
-			cachemngd = erofs_page_is_managed(sbi, page);
-
-		if (unlikely(err))
-			SetPageError(page);
-		else if (cachemngd)
-			SetPageUptodate(page);
-
-		if (cachemngd)
-			unlock_page(page);
-	}
-
-	z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
-	bio_put(bio);
-}
-
-static int z_erofs_decompress_pcluster(struct super_block *sb,
-				       struct z_erofs_pcluster *pcl,
-				       struct list_head *pagepool)
-{
-	struct erofs_sb_info *const sbi = EROFS_SB(sb);
-	const unsigned int clusterpages = BIT(pcl->clusterbits);
-	struct z_erofs_pagevec_ctor ctor;
-	unsigned int i, outputsize, llen, nr_pages;
-	struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
-	struct page **pages, **compressed_pages, *page;
-
-	enum z_erofs_page_type page_type;
-	bool overlapped, partial;
-	struct z_erofs_collection *cl;
-	int err;
-
-	might_sleep();
-	cl = z_erofs_primarycollection(pcl);
-	DBG_BUGON(!READ_ONCE(cl->nr_pages));
-
-	mutex_lock(&cl->lock);
-	nr_pages = cl->nr_pages;
-
-	if (likely(nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES)) {
-		pages = pages_onstack;
-	} else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
-		   mutex_trylock(&z_pagemap_global_lock)) {
-		pages = z_pagemap_global;
-	} else {
-		gfp_t gfp_flags = GFP_KERNEL;
-
-		if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
-			gfp_flags |= __GFP_NOFAIL;
-
-		pages = kvmalloc_array(nr_pages, sizeof(struct page *),
-				       gfp_flags);
-
-		/* fallback to global pagemap for the lowmem scenario */
-		if (unlikely(!pages)) {
-			mutex_lock(&z_pagemap_global_lock);
-			pages = z_pagemap_global;
-		}
-	}
-
-	for (i = 0; i < nr_pages; ++i)
-		pages[i] = NULL;
-
-	err = 0;
-	z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
-				  cl->pagevec, 0);
-
-	for (i = 0; i < cl->vcnt; ++i) {
-		unsigned int pagenr;
-
-		page = z_erofs_pagevec_dequeue(&ctor, &page_type);
-
-		/* all pages in pagevec ought to be valid */
-		DBG_BUGON(!page);
-		DBG_BUGON(!page->mapping);
-
-		if (z_erofs_put_stagingpage(pagepool, page))
-			continue;
-
-		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
-			pagenr = 0;
-		else
-			pagenr = z_erofs_onlinepage_index(page);
-
-		DBG_BUGON(pagenr >= nr_pages);
-
-		/*
-		 * currently EROFS doesn't support multiref(dedup),
-		 * so here erroring out one multiref page.
-		 */
-		if (unlikely(pages[pagenr])) {
-			DBG_BUGON(1);
-			SetPageError(pages[pagenr]);
-			z_erofs_onlinepage_endio(pages[pagenr]);
-			err = -EFSCORRUPTED;
-		}
-		pages[pagenr] = page;
-	}
-	z_erofs_pagevec_ctor_exit(&ctor, true);
-
-	overlapped = false;
-	compressed_pages = pcl->compressed_pages;
-
-	for (i = 0; i < clusterpages; ++i) {
-		unsigned int pagenr;
-
-		page = compressed_pages[i];
-
-		/* all compressed pages ought to be valid */
-		DBG_BUGON(!page);
-		DBG_BUGON(!page->mapping);
-
-		if (!z_erofs_page_is_staging(page)) {
-			if (erofs_page_is_managed(sbi, page)) {
-				if (unlikely(!PageUptodate(page)))
-					err = -EIO;
-				continue;
-			}
-
-			/*
-			 * only if non-head page can be selected
-			 * for inplace decompression
-			 */
-			pagenr = z_erofs_onlinepage_index(page);
-
-			DBG_BUGON(pagenr >= nr_pages);
-			if (unlikely(pages[pagenr])) {
-				DBG_BUGON(1);
-				SetPageError(pages[pagenr]);
-				z_erofs_onlinepage_endio(pages[pagenr]);
-				err = -EFSCORRUPTED;
-			}
-			pages[pagenr] = page;
-
-			overlapped = true;
-		}
-
-		/* PG_error needs checking for inplaced and staging pages */
-		if (unlikely(PageError(page))) {
-			DBG_BUGON(PageUptodate(page));
-			err = -EIO;
-		}
-	}
-
-	if (unlikely(err))
-		goto out;
-
-	llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
-	if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) {
-		outputsize = llen;
-		partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
-	} else {
-		outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs;
-		partial = true;
-	}
-
-	err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
-					.sb = sb,
-					.in = compressed_pages,
-					.out = pages,
-					.pageofs_out = cl->pageofs,
-					.inputsize = PAGE_SIZE,
-					.outputsize = outputsize,
-					.alg = pcl->algorithmformat,
-					.inplace_io = overlapped,
-					.partial_decoding = partial
-				 }, pagepool);
-
-out:
-	/* must handle all compressed pages before endding pages */
-	for (i = 0; i < clusterpages; ++i) {
-		page = compressed_pages[i];
-
-		if (erofs_page_is_managed(sbi, page))
-			continue;
-
-		/* recycle all individual staging pages */
-		(void)z_erofs_put_stagingpage(pagepool, page);
-
-		WRITE_ONCE(compressed_pages[i], NULL);
-	}
-
-	for (i = 0; i < nr_pages; ++i) {
-		page = pages[i];
-		if (!page)
-			continue;
-
-		DBG_BUGON(!page->mapping);
-
-		/* recycle all individual staging pages */
-		if (z_erofs_put_stagingpage(pagepool, page))
-			continue;
-
-		if (unlikely(err < 0))
-			SetPageError(page);
-
-		z_erofs_onlinepage_endio(page);
-	}
-
-	if (pages == z_pagemap_global)
-		mutex_unlock(&z_pagemap_global_lock);
-	else if (unlikely(pages != pages_onstack))
-		kvfree(pages);
-
-	cl->nr_pages = 0;
-	cl->vcnt = 0;
-
-	/* all cl locks MUST be taken before the following line */
-	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
-
-	/* all cl locks SHOULD be released right now */
-	mutex_unlock(&cl->lock);
-
-	z_erofs_collection_put(cl);
-	return err;
-}
-
-static void z_erofs_vle_unzip_all(struct super_block *sb,
-				  struct z_erofs_unzip_io *io,
-				  struct list_head *pagepool)
-{
-	z_erofs_next_pcluster_t owned = io->head;
-
-	while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
-		struct z_erofs_pcluster *pcl;
-
-		/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
-		DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
-
-		/* no possible that 'owned' equals NULL */
-		DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
-
-		pcl = container_of(owned, struct z_erofs_pcluster, next);
-		owned = READ_ONCE(pcl->next);
-
-		z_erofs_decompress_pcluster(sb, pcl, pagepool);
-	}
-}
-
-static void z_erofs_vle_unzip_wq(struct work_struct *work)
-{
-	struct z_erofs_unzip_io_sb *iosb =
-		container_of(work, struct z_erofs_unzip_io_sb, io.u.work);
-	LIST_HEAD(pagepool);
-
-	DBG_BUGON(iosb->io.head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
-	z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &pagepool);
-
-	put_pages_list(&pagepool);
-	kvfree(iosb);
-}
-
-static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
-					       unsigned int nr,
-					       struct list_head *pagepool,
-					       struct address_space *mc,
-					       gfp_t gfp)
-{
-	/* determined at compile time to avoid too many #ifdefs */
-	const bool nocache = __builtin_constant_p(mc) ? !mc : false;
-	const pgoff_t index = pcl->obj.index;
-	bool tocache = false;
-
-	struct address_space *mapping;
-	struct page *oldpage, *page;
-
-	compressed_page_t t;
-	int justfound;
-
-repeat:
-	page = READ_ONCE(pcl->compressed_pages[nr]);
-	oldpage = page;
-
-	if (!page)
-		goto out_allocpage;
-
-	/*
-	 * the cached page has not been allocated and
-	 * an placeholder is out there, prepare it now.
-	 */
-	if (!nocache && page == PAGE_UNALLOCATED) {
-		tocache = true;
-		goto out_allocpage;
-	}
-
-	/* process the target tagged pointer */
-	t = tagptr_init(compressed_page_t, page);
-	justfound = tagptr_unfold_tags(t);
-	page = tagptr_unfold_ptr(t);
-
-	mapping = READ_ONCE(page->mapping);
-
-	/*
-	 * if managed cache is disabled, it's no way to
-	 * get such a cached-like page.
-	 */
-	if (nocache) {
-		/* if managed cache is disabled, it is impossible `justfound' */
-		DBG_BUGON(justfound);
-
-		/* and it should be locked, not uptodate, and not truncated */
-		DBG_BUGON(!PageLocked(page));
-		DBG_BUGON(PageUptodate(page));
-		DBG_BUGON(!mapping);
-		goto out;
-	}
-
-	/*
-	 * unmanaged (file) pages are all locked solidly,
-	 * therefore it is impossible for `mapping' to be NULL.
-	 */
-	if (mapping && mapping != mc)
-		/* ought to be unmanaged pages */
-		goto out;
-
-	lock_page(page);
-
-	/* only true if page reclaim goes wrong, should never happen */
-	DBG_BUGON(justfound && PagePrivate(page));
-
-	/* the page is still in manage cache */
-	if (page->mapping == mc) {
-		WRITE_ONCE(pcl->compressed_pages[nr], page);
-
-		ClearPageError(page);
-		if (!PagePrivate(page)) {
-			/*
-			 * impossible to be !PagePrivate(page) for
-			 * the current restriction as well if
-			 * the page is already in compressed_pages[].
-			 */
-			DBG_BUGON(!justfound);
-
-			justfound = 0;
-			set_page_private(page, (unsigned long)pcl);
-			SetPagePrivate(page);
-		}
-
-		/* no need to submit io if it is already up-to-date */
-		if (PageUptodate(page)) {
-			unlock_page(page);
-			page = NULL;
-		}
-		goto out;
-	}
-
-	/*
-	 * the managed page has been truncated, it's unsafe to
-	 * reuse this one, let's allocate a new cache-managed page.
-	 */
-	DBG_BUGON(page->mapping);
-	DBG_BUGON(!justfound);
-
-	tocache = true;
-	unlock_page(page);
-	put_page(page);
-out_allocpage:
-	page = __stagingpage_alloc(pagepool, gfp);
-	if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
-		list_add(&page->lru, pagepool);
-		cpu_relax();
-		goto repeat;
-	}
-	if (nocache || !tocache)
-		goto out;
-	if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
-		page->mapping = Z_EROFS_MAPPING_STAGING;
-		goto out;
-	}
-
-	set_page_private(page, (unsigned long)pcl);
-	SetPagePrivate(page);
-out:	/* the only exit (for tracing and debugging) */
-	return page;
-}
-
-static struct z_erofs_unzip_io *jobqueue_init(struct super_block *sb,
-					      struct z_erofs_unzip_io *io,
-					      bool foreground)
-{
-	struct z_erofs_unzip_io_sb *iosb;
-
-	if (foreground) {
-		/* waitqueue available for foreground io */
-		DBG_BUGON(!io);
-
-		init_waitqueue_head(&io->u.wait);
-		atomic_set(&io->pending_bios, 0);
-		goto out;
-	}
-
-	iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL);
-	DBG_BUGON(!iosb);
-
-	/* initialize fields in the allocated descriptor */
-	io = &iosb->io;
-	iosb->sb = sb;
-	INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
-out:
-	io->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
-	return io;
-}
-
-/* define decompression jobqueue types */
-enum {
-	JQ_BYPASS,
-	JQ_SUBMIT,
-	NR_JOBQUEUES,
-};
-
-static void *jobqueueset_init(struct super_block *sb,
-			      z_erofs_next_pcluster_t qtail[],
-			      struct z_erofs_unzip_io *q[],
-			      struct z_erofs_unzip_io *fgq,
-			      bool forcefg)
-{
-	/*
-	 * if managed cache is enabled, bypass jobqueue is needed,
-	 * no need to read from device for all pclusters in this queue.
-	 */
-	q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true);
-	qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
-
-	q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg);
-	qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
-
-	return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg));
-}
-
-static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
-				    z_erofs_next_pcluster_t qtail[],
-				    z_erofs_next_pcluster_t owned_head)
-{
-	z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
-	z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
-
-	DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
-	if (owned_head == Z_EROFS_PCLUSTER_TAIL)
-		owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
-
-	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
-
-	WRITE_ONCE(*submit_qtail, owned_head);
-	WRITE_ONCE(*bypass_qtail, &pcl->next);
-
-	qtail[JQ_BYPASS] = &pcl->next;
-}
-
-static bool postsubmit_is_all_bypassed(struct z_erofs_unzip_io *q[],
-				       unsigned int nr_bios,
-				       bool force_fg)
-{
-	/*
-	 * although background is preferred, no one is pending for submission.
-	 * don't issue workqueue for decompression but drop it directly instead.
-	 */
-	if (force_fg || nr_bios)
-		return false;
-
-	kvfree(container_of(q[JQ_SUBMIT], struct z_erofs_unzip_io_sb, io));
-	return true;
-}
-
-static bool z_erofs_vle_submit_all(struct super_block *sb,
-				   z_erofs_next_pcluster_t owned_head,
-				   struct list_head *pagepool,
-				   struct z_erofs_unzip_io *fgq,
-				   bool force_fg)
-{
-	struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
-	z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
-	struct z_erofs_unzip_io *q[NR_JOBQUEUES];
-	struct bio *bio;
-	void *bi_private;
-	/* since bio will be NULL, no need to initialize last_index */
-	pgoff_t uninitialized_var(last_index);
-	bool force_submit = false;
-	unsigned int nr_bios;
-
-	if (unlikely(owned_head == Z_EROFS_PCLUSTER_TAIL))
-		return false;
-
-	force_submit = false;
-	bio = NULL;
-	nr_bios = 0;
-	bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg);
-
-	/* by default, all need io submission */
-	q[JQ_SUBMIT]->head = owned_head;
-
-	do {
-		struct z_erofs_pcluster *pcl;
-		unsigned int clusterpages;
-		pgoff_t first_index;
-		struct page *page;
-		unsigned int i = 0, bypass = 0;
-		int err;
-
-		/* no possible 'owned_head' equals the following */
-		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
-		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
-
-		pcl = container_of(owned_head, struct z_erofs_pcluster, next);
-
-		clusterpages = BIT(pcl->clusterbits);
-
-		/* close the main owned chain at first */
-		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
-				     Z_EROFS_PCLUSTER_TAIL_CLOSED);
-
-		first_index = pcl->obj.index;
-		force_submit |= (first_index != last_index + 1);
-
-repeat:
-		page = pickup_page_for_submission(pcl, i, pagepool,
-						  MNGD_MAPPING(sbi),
-						  GFP_NOFS);
-		if (!page) {
-			force_submit = true;
-			++bypass;
-			goto skippage;
-		}
-
-		if (bio && force_submit) {
-submit_bio_retry:
-			__submit_bio(bio, REQ_OP_READ, 0);
-			bio = NULL;
-		}
-
-		if (!bio) {
-			bio = erofs_grab_bio(sb, first_index + i,
-					     BIO_MAX_PAGES, bi_private,
-					     z_erofs_vle_read_endio, true);
-			++nr_bios;
-		}
-
-		err = bio_add_page(bio, page, PAGE_SIZE, 0);
-		if (err < PAGE_SIZE)
-			goto submit_bio_retry;
-
-		force_submit = false;
-		last_index = first_index + i;
-skippage:
-		if (++i < clusterpages)
-			goto repeat;
-
-		if (bypass < clusterpages)
-			qtail[JQ_SUBMIT] = &pcl->next;
-		else
-			move_to_bypass_jobqueue(pcl, qtail, owned_head);
-	} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
-
-	if (bio)
-		__submit_bio(bio, REQ_OP_READ, 0);
-
-	if (postsubmit_is_all_bypassed(q, nr_bios, force_fg))
-		return true;
-
-	z_erofs_vle_unzip_kickoff(bi_private, nr_bios);
-	return true;
-}
-
-static void z_erofs_submit_and_unzip(struct super_block *sb,
-				     struct z_erofs_collector *clt,
-				     struct list_head *pagepool,
-				     bool force_fg)
-{
-	struct z_erofs_unzip_io io[NR_JOBQUEUES];
-
-	if (!z_erofs_vle_submit_all(sb, clt->owned_head,
-				    pagepool, io, force_fg))
-		return;
-
-	/* decompress no I/O pclusters immediately */
-	z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool);
-
-	if (!force_fg)
-		return;
-
-	/* wait until all bios are completed */
-	wait_event(io[JQ_SUBMIT].u.wait,
-		   !atomic_read(&io[JQ_SUBMIT].pending_bios));
-
-	/* let's synchronous decompression */
-	z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool);
-}
-
-static int z_erofs_vle_normalaccess_readpage(struct file *file,
-					     struct page *page)
-{
-	struct inode *const inode = page->mapping->host;
-	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-	int err;
-	LIST_HEAD(pagepool);
-
-	trace_erofs_readpage(page, false);
-
-	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
-
-	err = z_erofs_do_read_page(&f, page, &pagepool);
-	(void)z_erofs_collector_end(&f.clt);
-
-	/* if some compressed cluster ready, need submit them anyway */
-	z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, true);
-
-	if (err)
-		errln("%s, failed to read, err [%d]", __func__, err);
-
-	if (f.map.mpage)
-		put_page(f.map.mpage);
-
-	/* clean up the remaining free pages */
-	put_pages_list(&pagepool);
-	return err;
-}
-
-static bool should_decompress_synchronously(struct erofs_sb_info *sbi,
-					    unsigned int nr)
-{
-	return nr <= sbi->max_sync_decompress_pages;
-}
-
-static int z_erofs_vle_normalaccess_readpages(struct file *filp,
-					      struct address_space *mapping,
-					      struct list_head *pages,
-					      unsigned int nr_pages)
-{
-	struct inode *const inode = mapping->host;
-	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-	bool sync = should_decompress_synchronously(sbi, nr_pages);
-	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
-	struct page *head = NULL;
-	LIST_HEAD(pagepool);
-
-	trace_erofs_readpages(mapping->host, lru_to_page(pages),
-			      nr_pages, false);
-
-	f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
-
-	for (; nr_pages; --nr_pages) {
-		struct page *page = lru_to_page(pages);
-
-		prefetchw(&page->flags);
-		list_del(&page->lru);
-
-		/*
-		 * A pure asynchronous readahead is indicated if
-		 * a PG_readahead marked page is hitted at first.
-		 * Let's also do asynchronous decompression for this case.
-		 */
-		sync &= !(PageReadahead(page) && !head);
-
-		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
-			list_add(&page->lru, &pagepool);
-			continue;
-		}
-
-		set_page_private(page, (unsigned long)head);
-		head = page;
-	}
-
-	while (head) {
-		struct page *page = head;
-		int err;
-
-		/* traversal in reverse order */
-		head = (void *)page_private(page);
-
-		err = z_erofs_do_read_page(&f, page, &pagepool);
-		if (err) {
-			struct erofs_vnode *vi = EROFS_V(inode);
-
-			errln("%s, readahead error at page %lu of nid %llu",
-			      __func__, page->index, vi->nid);
-		}
-		put_page(page);
-	}
-
-	(void)z_erofs_collector_end(&f.clt);
-
-	z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, sync);
-
-	if (f.map.mpage)
-		put_page(f.map.mpage);
-
-	/* clean up the remaining free pages */
-	put_pages_list(&pagepool);
-	return 0;
-}
-
-const struct address_space_operations z_erofs_vle_normalaccess_aops = {
-	.readpage = z_erofs_vle_normalaccess_readpage,
-	.readpages = z_erofs_vle_normalaccess_readpages,
-};
-
diff --git a/drivers/staging/erofs/zdata.h b/drivers/staging/erofs/zdata.h
deleted file mode 100644
index e11fe1959ca2..000000000000
--- a/drivers/staging/erofs/zdata.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/zdata.h
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_ZDATA_H
-#define __EROFS_FS_ZDATA_H
-
-#include "internal.h"
-#include "zpvec.h"
-
-#define Z_EROFS_NR_INLINE_PAGEVECS      3
-
-/*
- * Structure fields follow one of the following exclusion rules.
- *
- * I: Modifiable by initialization/destruction paths and read-only
- *    for everyone else;
- *
- * L: Field should be protected by pageset lock;
- *
- * A: Field should be accessed / updated in atomic for parallelized code.
- */
-struct z_erofs_collection {
-	struct mutex lock;
-
-	/* I: page offset of start position of decompression */
-	unsigned short pageofs;
-
-	/* L: maximum relative page index in pagevec[] */
-	unsigned short nr_pages;
-
-	/* L: total number of pages in pagevec[] */
-	unsigned int vcnt;
-
-	union {
-		/* L: inline a certain number of pagevecs for bootstrap */
-		erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
-
-		/* I: can be used to free the pcluster by RCU. */
-		struct rcu_head rcu;
-	};
-};
-
-#define Z_EROFS_PCLUSTER_FULL_LENGTH    0x00000001
-#define Z_EROFS_PCLUSTER_LENGTH_BIT     1
-
-/*
- * let's leave a type here in case of introducing
- * another tagged pointer later.
- */
-typedef void *z_erofs_next_pcluster_t;
-
-struct z_erofs_pcluster {
-	struct erofs_workgroup obj;
-	struct z_erofs_collection primary_collection;
-
-	/* A: point to next chained pcluster or TAILs */
-	z_erofs_next_pcluster_t next;
-
-	/* A: compressed pages (including multi-usage pages) */
-	struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
-
-	/* A: lower limit of decompressed length and if full length or not */
-	unsigned int length;
-
-	/* I: compression algorithm format */
-	unsigned char algorithmformat;
-	/* I: bit shift of physical cluster size */
-	unsigned char clusterbits;
-};
-
-#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
-
-/* let's avoid the valid 32-bit kernel addresses */
-
-/* the chained workgroup has't submitted io (still open) */
-#define Z_EROFS_PCLUSTER_TAIL           ((void *)0x5F0ECAFE)
-/* the chained workgroup has already submitted io */
-#define Z_EROFS_PCLUSTER_TAIL_CLOSED    ((void *)0x5F0EDEAD)
-
-#define Z_EROFS_PCLUSTER_NIL            (NULL)
-
-#define Z_EROFS_WORKGROUP_SIZE  sizeof(struct z_erofs_pcluster)
-
-struct z_erofs_unzip_io {
-	atomic_t pending_bios;
-	z_erofs_next_pcluster_t head;
-
-	union {
-		wait_queue_head_t wait;
-		struct work_struct work;
-	} u;
-};
-
-struct z_erofs_unzip_io_sb {
-	struct z_erofs_unzip_io io;
-	struct super_block *sb;
-};
-
-#define MNGD_MAPPING(sbi)	((sbi)->managed_cache->i_mapping)
-static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
-					 struct page *page)
-{
-	return page->mapping == MNGD_MAPPING(sbi);
-}
-
-#define Z_EROFS_ONLINEPAGE_COUNT_BITS   2
-#define Z_EROFS_ONLINEPAGE_COUNT_MASK   ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
-#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)
-
-/*
- * waiters (aka. ongoing_packs): # to unlock the page
- * sub-index: 0 - for partial page, >= 1 full page sub-index
- */
-typedef atomic_t z_erofs_onlinepage_t;
-
-/* type punning */
-union z_erofs_onlinepage_converter {
-	z_erofs_onlinepage_t *o;
-	unsigned long *v;
-};
-
-static inline unsigned int z_erofs_onlinepage_index(struct page *page)
-{
-	union z_erofs_onlinepage_converter u;
-
-	DBG_BUGON(!PagePrivate(page));
-	u.v = &page_private(page);
-
-	return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-}
-
-static inline void z_erofs_onlinepage_init(struct page *page)
-{
-	union {
-		z_erofs_onlinepage_t o;
-		unsigned long v;
-	/* keep from being unlocked in advance */
-	} u = { .o = ATOMIC_INIT(1) };
-
-	set_page_private(page, u.v);
-	smp_wmb();
-	SetPagePrivate(page);
-}
-
-static inline void z_erofs_onlinepage_fixup(struct page *page,
-	uintptr_t index, bool down)
-{
-	unsigned long *p, o, v, id;
-repeat:
-	p = &page_private(page);
-	o = READ_ONCE(*p);
-
-	id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-	if (id) {
-		if (!index)
-			return;
-
-		DBG_BUGON(id != index);
-	}
-
-	v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
-		((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
-	if (cmpxchg(p, o, v) != o)
-		goto repeat;
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
-	union z_erofs_onlinepage_converter u;
-	unsigned int v;
-
-	DBG_BUGON(!PagePrivate(page));
-	u.v = &page_private(page);
-
-	v = atomic_dec_return(u.o);
-	if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
-		ClearPagePrivate(page);
-		if (!PageError(page))
-			SetPageUptodate(page);
-		unlock_page(page);
-	}
-	debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
-}
-
-#define Z_EROFS_VMAP_ONSTACK_PAGES	\
-	min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
-#define Z_EROFS_VMAP_GLOBAL_PAGES	2048
-
-#endif
-
diff --git a/drivers/staging/erofs/zmap.c b/drivers/staging/erofs/zmap.c
deleted file mode 100644
index 774dacbc5b32..000000000000
--- a/drivers/staging/erofs/zmap.c
+++ /dev/null
@@ -1,468 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/zmap.c
- *
- * Copyright (C) 2018-2019 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "internal.h"
-#include <asm/unaligned.h>
-#include <trace/events/erofs.h>
-
-int z_erofs_fill_inode(struct inode *inode)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-
-	if (vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
-		vi->z_advise = 0;
-		vi->z_algorithmtype[0] = 0;
-		vi->z_algorithmtype[1] = 0;
-		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
-		vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
-		vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
-		set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
-	}
-
-	inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops;
-	return 0;
-}
-
-static int fill_inode_lazy(struct inode *inode)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct super_block *const sb = inode->i_sb;
-	int err;
-	erofs_off_t pos;
-	struct page *page;
-	void *kaddr;
-	struct z_erofs_map_header *h;
-
-	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
-		return 0;
-
-	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE))
-		return -ERESTARTSYS;
-
-	err = 0;
-	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
-		goto out_unlock;
-
-	DBG_BUGON(vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
-
-	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
-		    vi->xattr_isize, 8);
-	page = erofs_get_meta_page(sb, erofs_blknr(pos), false);
-	if (IS_ERR(page)) {
-		err = PTR_ERR(page);
-		goto out_unlock;
-	}
-
-	kaddr = kmap_atomic(page);
-
-	h = kaddr + erofs_blkoff(pos);
-	vi->z_advise = le16_to_cpu(h->h_advise);
-	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
-	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
-
-	if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
-		errln("unknown compression format %u for nid %llu, please upgrade kernel",
-		      vi->z_algorithmtype[0], vi->nid);
-		err = -EOPNOTSUPP;
-		goto unmap_done;
-	}
-
-	vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
-	vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 3) & 3);
-
-	if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
-		errln("unsupported physical clusterbits %u for nid %llu, please upgrade kernel",
-		      vi->z_physical_clusterbits[0], vi->nid);
-		err = -EOPNOTSUPP;
-		goto unmap_done;
-	}
-
-	vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
-					((h->h_clusterbits >> 5) & 7);
-	set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
-unmap_done:
-	kunmap_atomic(kaddr);
-	unlock_page(page);
-	put_page(page);
-out_unlock:
-	clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags);
-	return err;
-}
-
-struct z_erofs_maprecorder {
-	struct inode *inode;
-	struct erofs_map_blocks *map;
-	void *kaddr;
-
-	unsigned long lcn;
-	/* compression extent information gathered */
-	u8  type;
-	u16 clusterofs;
-	u16 delta[2];
-	erofs_blk_t pblk;
-};
-
-static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
-				  erofs_blk_t eblk)
-{
-	struct super_block *const sb = m->inode->i_sb;
-	struct erofs_map_blocks *const map = m->map;
-	struct page *mpage = map->mpage;
-
-	if (mpage) {
-		if (mpage->index == eblk) {
-			if (!m->kaddr)
-				m->kaddr = kmap_atomic(mpage);
-			return 0;
-		}
-
-		if (m->kaddr) {
-			kunmap_atomic(m->kaddr);
-			m->kaddr = NULL;
-		}
-		put_page(mpage);
-	}
-
-	mpage = erofs_get_meta_page(sb, eblk, false);
-	if (IS_ERR(mpage)) {
-		map->mpage = NULL;
-		return PTR_ERR(mpage);
-	}
-	m->kaddr = kmap_atomic(mpage);
-	unlock_page(mpage);
-	map->mpage = mpage;
-	return 0;
-}
-
-static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
-					     unsigned long lcn)
-{
-	struct inode *const inode = m->inode;
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
-	const erofs_off_t pos =
-		Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
-					       vi->xattr_isize) +
-		lcn * sizeof(struct z_erofs_vle_decompressed_index);
-	struct z_erofs_vle_decompressed_index *di;
-	unsigned int advise, type;
-	int err;
-
-	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
-	if (err)
-		return err;
-
-	m->lcn = lcn;
-	di = m->kaddr + erofs_blkoff(pos);
-
-	advise = le16_to_cpu(di->di_advise);
-	type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
-		((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
-	switch (type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
-		m->clusterofs = 1 << vi->z_logical_clusterbits;
-		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
-		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
-		break;
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
-		m->clusterofs = le16_to_cpu(di->di_clusterofs);
-		m->pblk = le32_to_cpu(di->di_u.blkaddr);
-		break;
-	default:
-		DBG_BUGON(1);
-		return -EOPNOTSUPP;
-	}
-	m->type = type;
-	return 0;
-}
-
-static unsigned int decode_compactedbits(unsigned int lobits,
-					 unsigned int lomask,
-					 u8 *in, unsigned int pos, u8 *type)
-{
-	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
-	const unsigned int lo = v & lomask;
-
-	*type = (v >> lobits) & 3;
-	return lo;
-}
-
-static int unpack_compacted_index(struct z_erofs_maprecorder *m,
-				  unsigned int amortizedshift,
-				  unsigned int eofs)
-{
-	struct erofs_vnode *const vi = EROFS_V(m->inode);
-	const unsigned int lclusterbits = vi->z_logical_clusterbits;
-	const unsigned int lomask = (1 << lclusterbits) - 1;
-	unsigned int vcnt, base, lo, encodebits, nblk;
-	int i;
-	u8 *in, type;
-
-	if (1 << amortizedshift == 4)
-		vcnt = 2;
-	else if (1 << amortizedshift == 2 && lclusterbits == 12)
-		vcnt = 16;
-	else
-		return -EOPNOTSUPP;
-
-	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
-	base = round_down(eofs, vcnt << amortizedshift);
-	in = m->kaddr + base;
-
-	i = (eofs - base) >> amortizedshift;
-
-	lo = decode_compactedbits(lclusterbits, lomask,
-				  in, encodebits * i, &type);
-	m->type = type;
-	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
-		m->clusterofs = 1 << lclusterbits;
-		if (i + 1 != vcnt) {
-			m->delta[0] = lo;
-			return 0;
-		}
-		/*
-		 * since the last lcluster in the pack is special,
-		 * of which lo saves delta[1] rather than delta[0].
-		 * Hence, get delta[0] by the previous lcluster indirectly.
-		 */
-		lo = decode_compactedbits(lclusterbits, lomask,
-					  in, encodebits * (i - 1), &type);
-		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
-			lo = 0;
-		m->delta[0] = lo + 1;
-		return 0;
-	}
-	m->clusterofs = lo;
-	m->delta[0] = 0;
-	/* figout out blkaddr (pblk) for HEAD lclusters */
-	nblk = 1;
-	while (i > 0) {
-		--i;
-		lo = decode_compactedbits(lclusterbits, lomask,
-					  in, encodebits * i, &type);
-		if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
-			i -= lo;
-
-		if (i >= 0)
-			++nblk;
-	}
-	in += (vcnt << amortizedshift) - sizeof(__le32);
-	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
-	return 0;
-}
-
-static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
-					    unsigned long lcn)
-{
-	struct inode *const inode = m->inode;
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	const unsigned int lclusterbits = vi->z_logical_clusterbits;
-	const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
-					vi->inode_isize + vi->xattr_isize, 8) +
-		sizeof(struct z_erofs_map_header);
-	const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
-	unsigned int compacted_4b_initial, compacted_2b;
-	unsigned int amortizedshift;
-	erofs_off_t pos;
-	int err;
-
-	if (lclusterbits != 12)
-		return -EOPNOTSUPP;
-
-	if (lcn >= totalidx)
-		return -EINVAL;
-
-	m->lcn = lcn;
-	/* used to align to 32-byte (compacted_2b) alignment */
-	compacted_4b_initial = (32 - ebase % 32) / 4;
-	if (compacted_4b_initial == 32 / 4)
-		compacted_4b_initial = 0;
-
-	if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
-		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
-	else
-		compacted_2b = 0;
-
-	pos = ebase;
-	if (lcn < compacted_4b_initial) {
-		amortizedshift = 2;
-		goto out;
-	}
-	pos += compacted_4b_initial * 4;
-	lcn -= compacted_4b_initial;
-
-	if (lcn < compacted_2b) {
-		amortizedshift = 1;
-		goto out;
-	}
-	pos += compacted_2b * 2;
-	lcn -= compacted_2b;
-	amortizedshift = 2;
-out:
-	pos += lcn * (1 << amortizedshift);
-	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
-	if (err)
-		return err;
-	return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
-}
-
-static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
-				      unsigned int lcn)
-{
-	const unsigned int datamode = EROFS_V(m->inode)->datamode;
-
-	if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
-		return vle_legacy_load_cluster_from_disk(m, lcn);
-
-	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
-		return compacted_load_cluster_from_disk(m, lcn);
-
-	return -EINVAL;
-}
-
-static int vle_extent_lookback(struct z_erofs_maprecorder *m,
-			       unsigned int lookback_distance)
-{
-	struct erofs_vnode *const vi = EROFS_V(m->inode);
-	struct erofs_map_blocks *const map = m->map;
-	const unsigned int lclusterbits = vi->z_logical_clusterbits;
-	unsigned long lcn = m->lcn;
-	int err;
-
-	if (lcn < lookback_distance) {
-		errln("bogus lookback distance @ nid %llu", vi->nid);
-		DBG_BUGON(1);
-		return -EFSCORRUPTED;
-	}
-
-	/* load extent head logical cluster if needed */
-	lcn -= lookback_distance;
-	err = vle_load_cluster_from_disk(m, lcn);
-	if (err)
-		return err;
-
-	switch (m->type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
-		if (unlikely(!m->delta[0])) {
-			errln("invalid lookback distance 0 at nid %llu",
-			      vi->nid);
-			DBG_BUGON(1);
-			return -EFSCORRUPTED;
-		}
-		return vle_extent_lookback(m, m->delta[0]);
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-		map->m_flags &= ~EROFS_MAP_ZIPPED;
-		/* fallthrough */
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
-		map->m_la = (lcn << lclusterbits) | m->clusterofs;
-		break;
-	default:
-		errln("unknown type %u at lcn %lu of nid %llu",
-		      m->type, lcn, vi->nid);
-		DBG_BUGON(1);
-		return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
-int z_erofs_map_blocks_iter(struct inode *inode,
-			    struct erofs_map_blocks *map,
-			    int flags)
-{
-	struct erofs_vnode *const vi = EROFS_V(inode);
-	struct z_erofs_maprecorder m = {
-		.inode = inode,
-		.map = map,
-	};
-	int err = 0;
-	unsigned int lclusterbits, endoff;
-	unsigned long long ofs, end;
-
-	trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
-
-	/* when trying to read beyond EOF, leave it unmapped */
-	if (unlikely(map->m_la >= inode->i_size)) {
-		map->m_llen = map->m_la + 1 - inode->i_size;
-		map->m_la = inode->i_size;
-		map->m_flags = 0;
-		goto out;
-	}
-
-	err = fill_inode_lazy(inode);
-	if (err)
-		goto out;
-
-	lclusterbits = vi->z_logical_clusterbits;
-	ofs = map->m_la;
-	m.lcn = ofs >> lclusterbits;
-	endoff = ofs & ((1 << lclusterbits) - 1);
-
-	err = vle_load_cluster_from_disk(&m, m.lcn);
-	if (err)
-		goto unmap_out;
-
-	map->m_flags = EROFS_MAP_ZIPPED;	/* by default, compressed */
-	end = (m.lcn + 1ULL) << lclusterbits;
-
-	switch (m.type) {
-	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
-		if (endoff >= m.clusterofs)
-			map->m_flags &= ~EROFS_MAP_ZIPPED;
-		/* fallthrough */
-	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
-		if (endoff >= m.clusterofs) {
-			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
-			break;
-		}
-		/* m.lcn should be >= 1 if endoff < m.clusterofs */
-		if (unlikely(!m.lcn)) {
-			errln("invalid logical cluster 0 at nid %llu",
-			      vi->nid);
-			err = -EFSCORRUPTED;
-			goto unmap_out;
-		}
-		end = (m.lcn << lclusterbits) | m.clusterofs;
-		map->m_flags |= EROFS_MAP_FULL_MAPPED;
-		m.delta[0] = 1;
-		/* fallthrough */
-	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
-		/* get the correspoinding first chunk */
-		err = vle_extent_lookback(&m, m.delta[0]);
-		if (unlikely(err))
-			goto unmap_out;
-		break;
-	default:
-		errln("unknown type %u at offset %llu of nid %llu",
-		      m.type, ofs, vi->nid);
-		err = -EOPNOTSUPP;
-		goto unmap_out;
-	}
-
-	map->m_llen = end - map->m_la;
-	map->m_plen = 1 << lclusterbits;
-	map->m_pa = blknr_to_addr(m.pblk);
-	map->m_flags |= EROFS_MAP_MAPPED;
-
-unmap_out:
-	if (m.kaddr)
-		kunmap_atomic(m.kaddr);
-
-out:
-	debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
-		__func__, map->m_la, map->m_pa,
-		map->m_llen, map->m_plen, map->m_flags);
-
-	trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
-
-	/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
-	DBG_BUGON(err < 0 && err != -ENOMEM);
-	return err;
-}
-
diff --git a/drivers/staging/erofs/zpvec.h b/drivers/staging/erofs/zpvec.h
deleted file mode 100644
index 9798f5627786..000000000000
--- a/drivers/staging/erofs/zpvec.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/zpvec.h
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_ZPVEC_H
-#define __EROFS_FS_ZPVEC_H
-
-#include "tagptr.h"
-
-/* page type in pagevec for decompress subsystem */
-enum z_erofs_page_type {
-	/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
-	Z_EROFS_PAGE_TYPE_EXCLUSIVE,
-
-	Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
-
-	Z_EROFS_VLE_PAGE_TYPE_HEAD,
-	Z_EROFS_VLE_PAGE_TYPE_MAX
-};
-
-extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
-	__bad_page_type_exclusive(void);
-
-/* pagevec tagged pointer */
-typedef tagptr2_t	erofs_vtptr_t;
-
-/* pagevec collector */
-struct z_erofs_pagevec_ctor {
-	struct page *curr, *next;
-	erofs_vtptr_t *pages;
-
-	unsigned int nr, index;
-};
-
-static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
-					     bool atomic)
-{
-	if (!ctor->curr)
-		return;
-
-	if (atomic)
-		kunmap_atomic(ctor->pages);
-	else
-		kunmap(ctor->curr);
-}
-
-static inline struct page *
-z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
-			       unsigned int nr)
-{
-	unsigned int index;
-
-	/* keep away from occupied pages */
-	if (ctor->next)
-		return ctor->next;
-
-	for (index = 0; index < nr; ++index) {
-		const erofs_vtptr_t t = ctor->pages[index];
-		const unsigned int tags = tagptr_unfold_tags(t);
-
-		if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
-			return tagptr_unfold_ptr(t);
-	}
-	DBG_BUGON(nr >= ctor->nr);
-	return NULL;
-}
-
-static inline void
-z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
-			      bool atomic)
-{
-	struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
-
-	z_erofs_pagevec_ctor_exit(ctor, atomic);
-
-	ctor->curr = next;
-	ctor->next = NULL;
-	ctor->pages = atomic ?
-		kmap_atomic(ctor->curr) : kmap(ctor->curr);
-
-	ctor->nr = PAGE_SIZE / sizeof(struct page *);
-	ctor->index = 0;
-}
-
-static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
-					     unsigned int nr,
-					     erofs_vtptr_t *pages,
-					     unsigned int i)
-{
-	ctor->nr = nr;
-	ctor->curr = ctor->next = NULL;
-	ctor->pages = pages;
-
-	if (i >= nr) {
-		i -= nr;
-		z_erofs_pagevec_ctor_pagedown(ctor, false);
-		while (i > ctor->nr) {
-			i -= ctor->nr;
-			z_erofs_pagevec_ctor_pagedown(ctor, false);
-		}
-	}
-	ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
-	ctor->index = i;
-}
-
-static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
-					   struct page *page,
-					   enum z_erofs_page_type type,
-					   bool *occupied)
-{
-	*occupied = false;
-	if (unlikely(!ctor->next && type))
-		if (ctor->index + 1 == ctor->nr)
-			return false;
-
-	if (unlikely(ctor->index >= ctor->nr))
-		z_erofs_pagevec_ctor_pagedown(ctor, false);
-
-	/* exclusive page type must be 0 */
-	if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
-		__bad_page_type_exclusive();
-
-	/* should remind that collector->next never equal to 1, 2 */
-	if (type == (uintptr_t)ctor->next) {
-		ctor->next = page;
-		*occupied = true;
-	}
-	ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
-	return true;
-}
-
-static inline struct page *
-z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
-			enum z_erofs_page_type *type)
-{
-	erofs_vtptr_t t;
-
-	if (unlikely(ctor->index >= ctor->nr)) {
-		DBG_BUGON(!ctor->next);
-		z_erofs_pagevec_ctor_pagedown(ctor, true);
-	}
-
-	t = ctor->pages[ctor->index];
-
-	*type = tagptr_unfold_tags(t);
-
-	/* should remind that collector->next never equal to 1, 2 */
-	if (*type == (uintptr_t)ctor->next)
-		ctor->next = tagptr_unfold_ptr(t);
-
-	ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
-	return tagptr_unfold_ptr(t);
-}
-#endif
-
diff --git a/fs/Kconfig b/fs/Kconfig
index bfb1c6095c7a..669d46550e6d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -261,6 +261,7 @@ source "fs/romfs/Kconfig"
 source "fs/pstore/Kconfig"
 source "fs/sysv/Kconfig"
 source "fs/ufs/Kconfig"
+source "fs/erofs/Kconfig"
 
 endif # MISC_FILESYSTEMS
 
diff --git a/fs/Makefile b/fs/Makefile
index d60089fd689b..b2e4973a0bea 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -130,3 +130,4 @@ obj-$(CONFIG_F2FS_FS)		+= f2fs/
 obj-$(CONFIG_CEPH_FS)		+= ceph/
 obj-$(CONFIG_PSTORE)		+= pstore/
 obj-$(CONFIG_EFIVAR_FS)		+= efivarfs/
+obj-$(CONFIG_EROFS_FS)		+= erofs/
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
new file mode 100644
index 000000000000..16316d1adca3
--- /dev/null
+++ b/fs/erofs/Kconfig
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config EROFS_FS
+	tristate "EROFS filesystem support"
+	depends on BLOCK
+	help
+	  EROFS (Enhanced Read-Only File System) is a lightweight
+	  read-only file system with modern designs (eg. page-sized
+	  blocks, inline xattrs/data, etc.) for scenarios which need
+	  high-performance read-only requirements, e.g. Android OS
+	  for mobile phones and LIVECDs.
+
+	  It also provides fixed-sized output compression support,
+	  which improves storage density, keeps relatively higher
+	  compression ratios, which is more useful to achieve high
+	  performance for embedded devices with limited memory.
+
+	  If unsure, say N.
+
+config EROFS_FS_DEBUG
+	bool "EROFS debugging feature"
+	depends on EROFS_FS
+	help
+	  Print debugging messages and enable more BUG_ONs which check
+	  filesystem consistency and find potential issues aggressively,
+	  which can be used for Android eng build, for example.
+
+	  For daily use, say N.
+
+config EROFS_FAULT_INJECTION
+	bool "EROFS fault injection facility"
+	depends on EROFS_FS
+	help
+	  Test EROFS to inject faults such as ENOMEM, EIO, and so on.
+	  If unsure, say N.
+
+config EROFS_FS_XATTR
+	bool "EROFS extended attributes"
+	depends on EROFS_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+config EROFS_FS_POSIX_ACL
+	bool "EROFS Access Control Lists"
+	depends on EROFS_FS_XATTR
+	select FS_POSIX_ACL
+	default y
+	help
+	  Posix Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N.
+
+config EROFS_FS_SECURITY
+	bool "EROFS Security Labels"
+	depends on EROFS_FS_XATTR
+	default y
+	help
+	  Security labels provide an access control facility to support Linux
+	  Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO
+	  Linux. This option enables an extended attribute handler for file
+	  security labels in the erofs filesystem, so that it requires enabling
+	  the extended attribute support in advance.
+
+	  If you are not using a security module, say N.
+
+config EROFS_FS_ZIP
+	bool "EROFS Data Compression Support"
+	depends on EROFS_FS
+	select LZ4_DECOMPRESS
+	default y
+	help
+	  Enable fixed-sized output compression for EROFS.
+
+	  If you don't want to enable compression feature, say N.
+
+config EROFS_FS_CLUSTER_PAGE_LIMIT
+	int "EROFS Cluster Pages Hard Limit"
+	depends on EROFS_FS_ZIP
+	range 1 256
+	default "1"
+	help
+	  Indicates maximum # of pages of a compressed
+	  physical cluster.
+
+	  For example, if files in a image were compressed
+	  into 8k-unit, hard limit should not be configured
+	  less than 2. Otherwise, the image will be refused
+	  to mount on this kernel.
+
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
new file mode 100644
index 000000000000..46f2aa4ba46c
--- /dev/null
+++ b/fs/erofs/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+EROFS_VERSION = "1.0"
+
+ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\"
+
+obj-$(CONFIG_EROFS_FS) += erofs.o
+erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
+erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
+
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
new file mode 100644
index 000000000000..07d279fd5d67
--- /dev/null
+++ b/fs/erofs/compress.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2019 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_COMPRESS_H
+#define __EROFS_FS_COMPRESS_H
+
+#include "internal.h"
+
+enum {
+	Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
+	Z_EROFS_COMPRESSION_RUNTIME_MAX
+};
+
+struct z_erofs_decompress_req {
+	struct super_block *sb;
+	struct page **in, **out;
+
+	unsigned short pageofs_out;
+	unsigned int inputsize, outputsize;
+
+	/* indicate the algorithm will be used for decompression */
+	unsigned int alg;
+	bool inplace_io, partial_decoding;
+};
+
+/*
+ * - 0x5A110C8D ('sallocated', Z_EROFS_MAPPING_STAGING) -
+ * used to mark temporary allocated pages from other
+ * file/cached pages and NULL mapping pages.
+ */
+#define Z_EROFS_MAPPING_STAGING         ((void *)0x5A110C8D)
+
+/* check if a page is marked as staging */
+static inline bool z_erofs_page_is_staging(struct page *page)
+{
+	return page->mapping == Z_EROFS_MAPPING_STAGING;
+}
+
+static inline bool z_erofs_put_stagingpage(struct list_head *pagepool,
+					   struct page *page)
+{
+	if (!z_erofs_page_is_staging(page))
+		return false;
+
+	/* staging pages should not be used by others at the same time */
+	if (page_ref_count(page) > 1)
+		put_page(page);
+	else
+		list_add(&page->lru, pagepool);
+	return true;
+}
+
+int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+		       struct list_head *pagepool);
+
+#endif
+
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
new file mode 100644
index 000000000000..fda16ec8863e
--- /dev/null
+++ b/fs/erofs/data.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+#include <linux/prefetch.h>
+
+#include <trace/events/erofs.h>
+
+static inline void read_endio(struct bio *bio)
+{
+	struct super_block *const sb = bio->bi_private;
+	struct bio_vec *bvec;
+	blk_status_t err = bio->bi_status;
+	struct bvec_iter_all iter_all;
+
+	if (time_to_inject(EROFS_SB(sb), FAULT_READ_IO)) {
+		erofs_show_injection_info(FAULT_READ_IO);
+		err = BLK_STS_IOERR;
+	}
+
+	bio_for_each_segment_all(bvec, bio, iter_all) {
+		struct page *page = bvec->bv_page;
+
+		/* page is already locked */
+		DBG_BUGON(PageUptodate(page));
+
+		if (unlikely(err))
+			SetPageError(page);
+		else
+			SetPageUptodate(page);
+
+		unlock_page(page);
+		/* page could be reclaimed now */
+	}
+	bio_put(bio);
+}
+
+/* prio -- true is used for dir */
+struct page *__erofs_get_meta_page(struct super_block *sb,
+				   erofs_blk_t blkaddr, bool prio, bool nofail)
+{
+	struct inode *const bd_inode = sb->s_bdev->bd_inode;
+	struct address_space *const mapping = bd_inode->i_mapping;
+	/* prefer retrying in the allocator to blindly looping below */
+	const gfp_t gfp = mapping_gfp_constraint(mapping, ~__GFP_FS) |
+		(nofail ? __GFP_NOFAIL : 0);
+	unsigned int io_retries = nofail ? EROFS_IO_MAX_RETRIES_NOFAIL : 0;
+	struct page *page;
+	int err;
+
+repeat:
+	page = find_or_create_page(mapping, blkaddr, gfp);
+	if (unlikely(!page)) {
+		DBG_BUGON(nofail);
+		return ERR_PTR(-ENOMEM);
+	}
+	DBG_BUGON(!PageLocked(page));
+
+	if (!PageUptodate(page)) {
+		struct bio *bio;
+
+		bio = erofs_grab_bio(sb, blkaddr, 1, sb, read_endio, nofail);
+		if (IS_ERR(bio)) {
+			DBG_BUGON(nofail);
+			err = PTR_ERR(bio);
+			goto err_out;
+		}
+
+		err = bio_add_page(bio, page, PAGE_SIZE, 0);
+		if (unlikely(err != PAGE_SIZE)) {
+			err = -EFAULT;
+			goto err_out;
+		}
+
+		__submit_bio(bio, REQ_OP_READ,
+			     REQ_META | (prio ? REQ_PRIO : 0));
+
+		lock_page(page);
+
+		/* this page has been truncated by others */
+		if (unlikely(page->mapping != mapping)) {
+unlock_repeat:
+			unlock_page(page);
+			put_page(page);
+			goto repeat;
+		}
+
+		/* more likely a read error */
+		if (unlikely(!PageUptodate(page))) {
+			if (io_retries) {
+				--io_retries;
+				goto unlock_repeat;
+			}
+			err = -EIO;
+			goto err_out;
+		}
+	}
+	return page;
+
+err_out:
+	unlock_page(page);
+	put_page(page);
+	return ERR_PTR(err);
+}
+
+static int erofs_map_blocks_flatmode(struct inode *inode,
+				     struct erofs_map_blocks *map,
+				     int flags)
+{
+	int err = 0;
+	erofs_blk_t nblocks, lastblk;
+	u64 offset = map->m_la;
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
+
+	nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
+	lastblk = nblocks - is_inode_flat_inline(inode);
+
+	if (unlikely(offset >= inode->i_size)) {
+		/* leave out-of-bound access unmapped */
+		map->m_flags = 0;
+		map->m_plen = 0;
+		goto out;
+	}
+
+	/* there is no hole in flatmode */
+	map->m_flags = EROFS_MAP_MAPPED;
+
+	if (offset < blknr_to_addr(lastblk)) {
+		map->m_pa = blknr_to_addr(vi->raw_blkaddr) + map->m_la;
+		map->m_plen = blknr_to_addr(lastblk) - offset;
+	} else if (is_inode_flat_inline(inode)) {
+		/* 2 - inode inline B: inode, [xattrs], inline last blk... */
+		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+		map->m_pa = iloc(sbi, vi->nid) + vi->inode_isize +
+			vi->xattr_isize + erofs_blkoff(map->m_la);
+		map->m_plen = inode->i_size - offset;
+
+		/* inline data should be located in one meta block */
+		if (erofs_blkoff(map->m_pa) + map->m_plen > PAGE_SIZE) {
+			errln("inline data cross block boundary @ nid %llu",
+			      vi->nid);
+			DBG_BUGON(1);
+			err = -EFSCORRUPTED;
+			goto err_out;
+		}
+
+		map->m_flags |= EROFS_MAP_META;
+	} else {
+		errln("internal error @ nid: %llu (size %llu), m_la 0x%llx",
+		      vi->nid, inode->i_size, map->m_la);
+		DBG_BUGON(1);
+		err = -EIO;
+		goto err_out;
+	}
+
+out:
+	map->m_llen = map->m_plen;
+
+err_out:
+	trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0);
+	return err;
+}
+
+int erofs_map_blocks(struct inode *inode,
+		     struct erofs_map_blocks *map, int flags)
+{
+	if (unlikely(is_inode_layout_compression(inode))) {
+		int err = z_erofs_map_blocks_iter(inode, map, flags);
+
+		if (map->mpage) {
+			put_page(map->mpage);
+			map->mpage = NULL;
+		}
+		return err;
+	}
+	return erofs_map_blocks_flatmode(inode, map, flags);
+}
+
+static inline struct bio *erofs_read_raw_page(struct bio *bio,
+					      struct address_space *mapping,
+					      struct page *page,
+					      erofs_off_t *last_block,
+					      unsigned int nblocks,
+					      bool ra)
+{
+	struct inode *const inode = mapping->host;
+	struct super_block *const sb = inode->i_sb;
+	erofs_off_t current_block = (erofs_off_t)page->index;
+	int err;
+
+	DBG_BUGON(!nblocks);
+
+	if (PageUptodate(page)) {
+		err = 0;
+		goto has_updated;
+	}
+
+	/* note that for readpage case, bio also equals to NULL */
+	if (bio &&
+	    /* not continuous */
+	    *last_block + 1 != current_block) {
+submit_bio_retry:
+		__submit_bio(bio, REQ_OP_READ, 0);
+		bio = NULL;
+	}
+
+	if (!bio) {
+		struct erofs_map_blocks map = {
+			.m_la = blknr_to_addr(current_block),
+		};
+		erofs_blk_t blknr;
+		unsigned int blkoff;
+
+		err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+		if (unlikely(err))
+			goto err_out;
+
+		/* zero out the holed page */
+		if (unlikely(!(map.m_flags & EROFS_MAP_MAPPED))) {
+			zero_user_segment(page, 0, PAGE_SIZE);
+			SetPageUptodate(page);
+
+			/* imply err = 0, see erofs_map_blocks */
+			goto has_updated;
+		}
+
+		/* for RAW access mode, m_plen must be equal to m_llen */
+		DBG_BUGON(map.m_plen != map.m_llen);
+
+		blknr = erofs_blknr(map.m_pa);
+		blkoff = erofs_blkoff(map.m_pa);
+
+		/* deal with inline page */
+		if (map.m_flags & EROFS_MAP_META) {
+			void *vsrc, *vto;
+			struct page *ipage;
+
+			DBG_BUGON(map.m_plen > PAGE_SIZE);
+
+			ipage = erofs_get_meta_page(inode->i_sb, blknr, 0);
+
+			if (IS_ERR(ipage)) {
+				err = PTR_ERR(ipage);
+				goto err_out;
+			}
+
+			vsrc = kmap_atomic(ipage);
+			vto = kmap_atomic(page);
+			memcpy(vto, vsrc + blkoff, map.m_plen);
+			memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen);
+			kunmap_atomic(vto);
+			kunmap_atomic(vsrc);
+			flush_dcache_page(page);
+
+			SetPageUptodate(page);
+			/* TODO: could we unlock the page earlier? */
+			unlock_page(ipage);
+			put_page(ipage);
+
+			/* imply err = 0, see erofs_map_blocks */
+			goto has_updated;
+		}
+
+		/* pa must be block-aligned for raw reading */
+		DBG_BUGON(erofs_blkoff(map.m_pa));
+
+		/* max # of continuous pages */
+		if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
+			nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
+		if (nblocks > BIO_MAX_PAGES)
+			nblocks = BIO_MAX_PAGES;
+
+		bio = erofs_grab_bio(sb, blknr, nblocks, sb,
+				     read_endio, false);
+		if (IS_ERR(bio)) {
+			err = PTR_ERR(bio);
+			bio = NULL;
+			goto err_out;
+		}
+	}
+
+	err = bio_add_page(bio, page, PAGE_SIZE, 0);
+	/* out of the extent or bio is full */
+	if (err < PAGE_SIZE)
+		goto submit_bio_retry;
+
+	*last_block = current_block;
+
+	/* shift in advance in case of it followed by too many gaps */
+	if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
+		/* err should reassign to 0 after submitting */
+		err = 0;
+		goto submit_bio_out;
+	}
+
+	return bio;
+
+err_out:
+	/* for sync reading, set page error immediately */
+	if (!ra) {
+		SetPageError(page);
+		ClearPageUptodate(page);
+	}
+has_updated:
+	unlock_page(page);
+
+	/* if updated manually, continuous pages has a gap */
+	if (bio)
+submit_bio_out:
+		__submit_bio(bio, REQ_OP_READ, 0);
+
+	return unlikely(err) ? ERR_PTR(err) : NULL;
+}
+
+/*
+ * since we dont have write or truncate flows, so no inode
+ * locking needs to be held at the moment.
+ */
+static int erofs_raw_access_readpage(struct file *file, struct page *page)
+{
+	erofs_off_t last_block;
+	struct bio *bio;
+
+	trace_erofs_readpage(page, true);
+
+	bio = erofs_read_raw_page(NULL, page->mapping,
+				  page, &last_block, 1, false);
+
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	DBG_BUGON(bio);	/* since we have only one bio -- must be NULL */
+	return 0;
+}
+
+static int erofs_raw_access_readpages(struct file *filp,
+				      struct address_space *mapping,
+				      struct list_head *pages,
+				      unsigned int nr_pages)
+{
+	erofs_off_t last_block;
+	struct bio *bio = NULL;
+	gfp_t gfp = readahead_gfp_mask(mapping);
+	struct page *page = list_last_entry(pages, struct page, lru);
+
+	trace_erofs_readpages(mapping->host, page, nr_pages, true);
+
+	for (; nr_pages; --nr_pages) {
+		page = list_entry(pages->prev, struct page, lru);
+
+		prefetchw(&page->flags);
+		list_del(&page->lru);
+
+		if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			bio = erofs_read_raw_page(bio, mapping, page,
+						  &last_block, nr_pages, true);
+
+			/* all the page errors are ignored when readahead */
+			if (IS_ERR(bio)) {
+				pr_err("%s, readahead error at page %lu of nid %llu\n",
+				       __func__, page->index,
+				       EROFS_V(mapping->host)->nid);
+
+				bio = NULL;
+			}
+		}
+
+		/* pages could still be locked */
+		put_page(page);
+	}
+	DBG_BUGON(!list_empty(pages));
+
+	/* the rare case (end in gaps) */
+	if (unlikely(bio))
+		__submit_bio(bio, REQ_OP_READ, 0);
+	return 0;
+}
+
+static int erofs_get_block(struct inode *inode, sector_t iblock,
+			   struct buffer_head *bh, int create)
+{
+	struct erofs_map_blocks map = {
+		.m_la = iblock << 9,
+	};
+	int err;
+
+	err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
+	if (err)
+		return err;
+
+	if (map.m_flags & EROFS_MAP_MAPPED)
+		bh->b_blocknr = erofs_blknr(map.m_pa);
+
+	return err;
+}
+
+static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+
+	if (is_inode_flat_inline(inode)) {
+		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
+
+		if (block >> LOG_SECTORS_PER_BLOCK >= blks)
+			return 0;
+	}
+
+	return generic_block_bmap(mapping, block, erofs_get_block);
+}
+
+/* for uncompressed (aligned) files and raw access for other files */
+const struct address_space_operations erofs_raw_access_aops = {
+	.readpage = erofs_raw_access_readpage,
+	.readpages = erofs_raw_access_readpages,
+	.bmap = erofs_bmap,
+};
+
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
new file mode 100644
index 000000000000..5f4b7f302863
--- /dev/null
+++ b/fs/erofs/decompressor.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "compress.h"
+#include <linux/module.h>
+#include <linux/lz4.h>
+
+#ifndef LZ4_DISTANCE_MAX	/* history window size */
+#define LZ4_DISTANCE_MAX 65535	/* set to maximum value by default */
+#endif
+
+#define LZ4_MAX_DISTANCE_PAGES	(DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
+#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize)  (((srcsize) >> 8) + 32)
+#endif
+
+struct z_erofs_decompressor {
+	/*
+	 * if destpages have sparsed pages, fill them with bounce pages.
+	 * it also check whether destpages indicate continuous physical memory.
+	 */
+	int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
+				 struct list_head *pagepool);
+	int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
+	char *name;
+};
+
+static bool use_vmap;
+module_param(use_vmap, bool, 0444);
+MODULE_PARM_DESC(use_vmap, "Use vmap() instead of vm_map_ram() (default 0)");
+
+static int lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
+				 struct list_head *pagepool)
+{
+	const unsigned int nr =
+		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+	struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
+	unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
+					   BITS_PER_LONG)] = { 0 };
+	void *kaddr = NULL;
+	unsigned int i, j, top;
+
+	top = 0;
+	for (i = j = 0; i < nr; ++i, ++j) {
+		struct page *const page = rq->out[i];
+		struct page *victim;
+
+		if (j >= LZ4_MAX_DISTANCE_PAGES)
+			j = 0;
+
+		/* 'valid' bounced can only be tested after a complete round */
+		if (test_bit(j, bounced)) {
+			DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES);
+			DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES);
+			availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES];
+		}
+
+		if (page) {
+			__clear_bit(j, bounced);
+			if (kaddr) {
+				if (kaddr + PAGE_SIZE == page_address(page))
+					kaddr += PAGE_SIZE;
+				else
+					kaddr = NULL;
+			} else if (!i) {
+				kaddr = page_address(page);
+			}
+			continue;
+		}
+		kaddr = NULL;
+		__set_bit(j, bounced);
+
+		if (top) {
+			victim = availables[--top];
+			get_page(victim);
+		} else {
+			victim = erofs_allocpage(pagepool, GFP_KERNEL, false);
+			if (unlikely(!victim))
+				return -ENOMEM;
+			victim->mapping = Z_EROFS_MAPPING_STAGING;
+		}
+		rq->out[i] = victim;
+	}
+	return kaddr ? 1 : 0;
+}
+
+static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
+				       u8 *src, unsigned int pageofs_in)
+{
+	/*
+	 * if in-place decompression is ongoing, those decompressed
+	 * pages should be copied in order to avoid being overlapped.
+	 */
+	struct page **in = rq->in;
+	u8 *const tmp = erofs_get_pcpubuf(0);
+	u8 *tmpp = tmp;
+	unsigned int inlen = rq->inputsize - pageofs_in;
+	unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in);
+
+	while (tmpp < tmp + inlen) {
+		if (!src)
+			src = kmap_atomic(*in);
+		memcpy(tmpp, src + pageofs_in, count);
+		kunmap_atomic(src);
+		src = NULL;
+		tmpp += count;
+		pageofs_in = 0;
+		count = PAGE_SIZE;
+		++in;
+	}
+	return tmp;
+}
+
+static int lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
+{
+	unsigned int inputmargin, inlen;
+	u8 *src;
+	bool copied, support_0padding;
+	int ret;
+
+	if (rq->inputsize > PAGE_SIZE)
+		return -EOPNOTSUPP;
+
+	src = kmap_atomic(*rq->in);
+	inputmargin = 0;
+	support_0padding = false;
+
+	/* decompression inplace is only safe when 0padding is enabled */
+	if (EROFS_SB(rq->sb)->requirements & EROFS_REQUIREMENT_LZ4_0PADDING) {
+		support_0padding = true;
+
+		while (!src[inputmargin & ~PAGE_MASK])
+			if (!(++inputmargin & ~PAGE_MASK))
+				break;
+
+		if (inputmargin >= rq->inputsize) {
+			kunmap_atomic(src);
+			return -EIO;
+		}
+	}
+
+	copied = false;
+	inlen = rq->inputsize - inputmargin;
+	if (rq->inplace_io) {
+		const uint oend = (rq->pageofs_out +
+				   rq->outputsize) & ~PAGE_MASK;
+		const uint nr = PAGE_ALIGN(rq->pageofs_out +
+					   rq->outputsize) >> PAGE_SHIFT;
+
+		if (rq->partial_decoding || !support_0padding ||
+		    rq->out[nr - 1] != rq->in[0] ||
+		    rq->inputsize - oend <
+		      LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
+			src = generic_copy_inplace_data(rq, src, inputmargin);
+			inputmargin = 0;
+			copied = true;
+		}
+	}
+
+	ret = LZ4_decompress_safe_partial(src + inputmargin, out,
+					  inlen, rq->outputsize,
+					  rq->outputsize);
+	if (ret < 0) {
+		errln("%s, failed to decompress, in[%p, %u, %u] out[%p, %u]",
+		      __func__, src + inputmargin, inlen, inputmargin,
+		      out, rq->outputsize);
+		WARN_ON(1);
+		print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
+			       16, 1, src + inputmargin, inlen, true);
+		print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
+			       16, 1, out, rq->outputsize, true);
+		ret = -EIO;
+	}
+
+	if (copied)
+		erofs_put_pcpubuf(src);
+	else
+		kunmap_atomic(src);
+	return ret;
+}
+
+static struct z_erofs_decompressor decompressors[] = {
+	[Z_EROFS_COMPRESSION_SHIFTED] = {
+		.name = "shifted"
+	},
+	[Z_EROFS_COMPRESSION_LZ4] = {
+		.prepare_destpages = lz4_prepare_destpages,
+		.decompress = lz4_decompress,
+		.name = "lz4"
+	},
+};
+
+static void copy_from_pcpubuf(struct page **out, const char *dst,
+			      unsigned short pageofs_out,
+			      unsigned int outputsize)
+{
+	const char *end = dst + outputsize;
+	const unsigned int righthalf = PAGE_SIZE - pageofs_out;
+	const char *cur = dst - pageofs_out;
+
+	while (cur < end) {
+		struct page *const page = *out++;
+
+		if (page) {
+			char *buf = kmap_atomic(page);
+
+			if (cur >= dst) {
+				memcpy(buf, cur, min_t(uint, PAGE_SIZE,
+						       end - cur));
+			} else {
+				memcpy(buf + pageofs_out, cur + pageofs_out,
+				       min_t(uint, righthalf, end - cur));
+			}
+			kunmap_atomic(buf);
+		}
+		cur += PAGE_SIZE;
+	}
+}
+
+static void *erofs_vmap(struct page **pages, unsigned int count)
+{
+	int i = 0;
+
+	if (use_vmap)
+		return vmap(pages, count, VM_MAP, PAGE_KERNEL);
+
+	while (1) {
+		void *addr = vm_map_ram(pages, count, -1, PAGE_KERNEL);
+
+		/* retry two more times (totally 3 times) */
+		if (addr || ++i >= 3)
+			return addr;
+		vm_unmap_aliases();
+	}
+	return NULL;
+}
+
+static void erofs_vunmap(const void *mem, unsigned int count)
+{
+	if (!use_vmap)
+		vm_unmap_ram(mem, count);
+	else
+		vunmap(mem);
+}
+
+static int decompress_generic(struct z_erofs_decompress_req *rq,
+			      struct list_head *pagepool)
+{
+	const unsigned int nrpages_out =
+		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+	const struct z_erofs_decompressor *alg = decompressors + rq->alg;
+	unsigned int dst_maptype;
+	void *dst;
+	int ret;
+
+	if (nrpages_out == 1 && !rq->inplace_io) {
+		DBG_BUGON(!*rq->out);
+		dst = kmap_atomic(*rq->out);
+		dst_maptype = 0;
+		goto dstmap_out;
+	}
+
+	/*
+	 * For the case of small output size (especially much less
+	 * than PAGE_SIZE), memcpy the decompressed data rather than
+	 * compressed data is preferred.
+	 */
+	if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
+		dst = erofs_get_pcpubuf(0);
+		if (IS_ERR(dst))
+			return PTR_ERR(dst);
+
+		rq->inplace_io = false;
+		ret = alg->decompress(rq, dst);
+		if (!ret)
+			copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
+					  rq->outputsize);
+
+		erofs_put_pcpubuf(dst);
+		return ret;
+	}
+
+	ret = alg->prepare_destpages(rq, pagepool);
+	if (ret < 0) {
+		return ret;
+	} else if (ret) {
+		dst = page_address(*rq->out);
+		dst_maptype = 1;
+		goto dstmap_out;
+	}
+
+	dst = erofs_vmap(rq->out, nrpages_out);
+	if (!dst)
+		return -ENOMEM;
+	dst_maptype = 2;
+
+dstmap_out:
+	ret = alg->decompress(rq, dst + rq->pageofs_out);
+
+	if (!dst_maptype)
+		kunmap_atomic(dst);
+	else if (dst_maptype == 2)
+		erofs_vunmap(dst, nrpages_out);
+	return ret;
+}
+
+static int shifted_decompress(const struct z_erofs_decompress_req *rq,
+			      struct list_head *pagepool)
+{
+	const unsigned int nrpages_out =
+		PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+	const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
+	unsigned char *src, *dst;
+
+	if (nrpages_out > 2) {
+		DBG_BUGON(1);
+		return -EIO;
+	}
+
+	if (rq->out[0] == *rq->in) {
+		DBG_BUGON(nrpages_out != 1);
+		return 0;
+	}
+
+	src = kmap_atomic(*rq->in);
+	if (!rq->out[0]) {
+		dst = NULL;
+	} else {
+		dst = kmap_atomic(rq->out[0]);
+		memcpy(dst + rq->pageofs_out, src, righthalf);
+	}
+
+	if (rq->out[1] == *rq->in) {
+		memmove(src, src + righthalf, rq->pageofs_out);
+	} else if (nrpages_out == 2) {
+		if (dst)
+			kunmap_atomic(dst);
+		DBG_BUGON(!rq->out[1]);
+		dst = kmap_atomic(rq->out[1]);
+		memcpy(dst, src + righthalf, rq->pageofs_out);
+	}
+	if (dst)
+		kunmap_atomic(dst);
+	kunmap_atomic(src);
+	return 0;
+}
+
+int z_erofs_decompress(struct z_erofs_decompress_req *rq,
+		       struct list_head *pagepool)
+{
+	if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
+		return shifted_decompress(rq, pagepool);
+	return decompress_generic(rq, pagepool);
+}
+
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
new file mode 100644
index 000000000000..1976e60e5174
--- /dev/null
+++ b/fs/erofs/dir.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+
+static void debug_one_dentry(unsigned char d_type, const char *de_name,
+			     unsigned int de_namelen)
+{
+#ifdef CONFIG_EROFS_FS_DEBUG
+	/* since the on-disk name could not have the trailing '\0' */
+	unsigned char dbg_namebuf[EROFS_NAME_LEN + 1];
+
+	memcpy(dbg_namebuf, de_name, de_namelen);
+	dbg_namebuf[de_namelen] = '\0';
+
+	debugln("found dirent %s de_len %u d_type %d", dbg_namebuf,
+		de_namelen, d_type);
+#endif
+}
+
+static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
+			       void *dentry_blk, unsigned int *ofs,
+			       unsigned int nameoff, unsigned int maxsize)
+{
+	struct erofs_dirent *de = dentry_blk + *ofs;
+	const struct erofs_dirent *end = dentry_blk + nameoff;
+
+	while (de < end) {
+		const char *de_name;
+		unsigned int de_namelen;
+		unsigned char d_type;
+
+		d_type = fs_ftype_to_dtype(de->file_type);
+
+		nameoff = le16_to_cpu(de->nameoff);
+		de_name = (char *)dentry_blk + nameoff;
+
+		/* the last dirent in the block? */
+		if (de + 1 >= end)
+			de_namelen = strnlen(de_name, maxsize - nameoff);
+		else
+			de_namelen = le16_to_cpu(de[1].nameoff) - nameoff;
+
+		/* a corrupted entry is found */
+		if (unlikely(nameoff + de_namelen > maxsize ||
+			     de_namelen > EROFS_NAME_LEN)) {
+			errln("bogus dirent @ nid %llu", EROFS_V(dir)->nid);
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;
+		}
+
+		debug_one_dentry(d_type, de_name, de_namelen);
+		if (!dir_emit(ctx, de_name, de_namelen,
+			      le64_to_cpu(de->nid), d_type))
+			/* stopped by some reason */
+			return 1;
+		++de;
+		*ofs += sizeof(struct erofs_dirent);
+	}
+	*ofs = maxsize;
+	return 0;
+}
+
+static int erofs_readdir(struct file *f, struct dir_context *ctx)
+{
+	struct inode *dir = file_inode(f);
+	struct address_space *mapping = dir->i_mapping;
+	const size_t dirsize = i_size_read(dir);
+	unsigned int i = ctx->pos / EROFS_BLKSIZ;
+	unsigned int ofs = ctx->pos % EROFS_BLKSIZ;
+	int err = 0;
+	bool initial = true;
+
+	while (ctx->pos < dirsize) {
+		struct page *dentry_page;
+		struct erofs_dirent *de;
+		unsigned int nameoff, maxsize;
+
+		dentry_page = read_mapping_page(mapping, i, NULL);
+		if (dentry_page == ERR_PTR(-ENOMEM)) {
+			err = -ENOMEM;
+			break;
+		} else if (IS_ERR(dentry_page)) {
+			errln("fail to readdir of logical block %u of nid %llu",
+			      i, EROFS_V(dir)->nid);
+			err = -EFSCORRUPTED;
+			break;
+		}
+
+		de = (struct erofs_dirent *)kmap(dentry_page);
+
+		nameoff = le16_to_cpu(de->nameoff);
+
+		if (unlikely(nameoff < sizeof(struct erofs_dirent) ||
+			     nameoff >= PAGE_SIZE)) {
+			errln("%s, invalid de[0].nameoff %u @ nid %llu",
+			      __func__, nameoff, EROFS_V(dir)->nid);
+			err = -EFSCORRUPTED;
+			goto skip_this;
+		}
+
+		maxsize = min_t(unsigned int,
+				dirsize - ctx->pos + ofs, PAGE_SIZE);
+
+		/* search dirents at the arbitrary position */
+		if (unlikely(initial)) {
+			initial = false;
+
+			ofs = roundup(ofs, sizeof(struct erofs_dirent));
+			if (unlikely(ofs >= nameoff))
+				goto skip_this;
+		}
+
+		err = erofs_fill_dentries(dir, ctx, de, &ofs,
+					  nameoff, maxsize);
+skip_this:
+		kunmap(dentry_page);
+
+		put_page(dentry_page);
+
+		ctx->pos = blknr_to_addr(i) + ofs;
+
+		if (unlikely(err))
+			break;
+		++i;
+		ofs = 0;
+	}
+	return err < 0 ? err : 0;
+}
+
+const struct file_operations erofs_dir_fops = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.iterate_shared	= erofs_readdir,
+};
+
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
new file mode 100644
index 000000000000..afa7d45ca958
--- /dev/null
+++ b/fs/erofs/erofs_fs.h
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR Apache-2.0 */
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_H
+#define __EROFS_FS_H
+
+/* Enhanced(Extended) ROM File System */
+#define EROFS_SUPER_OFFSET      1024
+
+/*
+ * Any bits that aren't in EROFS_ALL_REQUIREMENTS should be
+ * incompatible with this kernel version.
+ */
+#define EROFS_REQUIREMENT_LZ4_0PADDING	0x00000001
+#define EROFS_ALL_REQUIREMENTS		EROFS_REQUIREMENT_LZ4_0PADDING
+
+struct erofs_super_block {
+/*  0 */__le32 magic;           /* in the little endian */
+/*  4 */__le32 checksum;        /* crc32c(super_block) */
+/*  8 */__le32 features;        /* (aka. feature_compat) */
+/* 12 */__u8 blkszbits;         /* support block_size == PAGE_SIZE only */
+/* 13 */__u8 reserved;
+
+/* 14 */__le16 root_nid;
+/* 16 */__le64 inos;            /* total valid ino # (== f_files - f_favail) */
+
+/* 24 */__le64 build_time;      /* inode v1 time derivation */
+/* 32 */__le32 build_time_nsec;
+/* 36 */__le32 blocks;          /* used for statfs */
+/* 40 */__le32 meta_blkaddr;
+/* 44 */__le32 xattr_blkaddr;
+/* 48 */__u8 uuid[16];          /* 128-bit uuid for volume */
+/* 64 */__u8 volume_name[16];   /* volume name */
+/* 80 */__le32 requirements;    /* (aka. feature_incompat) */
+
+/* 84 */__u8 reserved2[44];
+} __packed;                     /* 128 bytes */
+
+/*
+ * erofs inode data mapping:
+ * 0 - inode plain without inline data A:
+ * inode, [xattrs], ... | ... | no-holed data
+ * 1 - inode VLE compression B (legacy):
+ * inode, [xattrs], extents ... | ...
+ * 2 - inode plain with inline data C:
+ * inode, [xattrs], last_inline_data, ... | ... | no-holed data
+ * 3 - inode compression D:
+ * inode, [xattrs], map_header, extents ... | ...
+ * 4~7 - reserved
+ */
+enum {
+	EROFS_INODE_FLAT_PLAIN,
+	EROFS_INODE_FLAT_COMPRESSION_LEGACY,
+	EROFS_INODE_FLAT_INLINE,
+	EROFS_INODE_FLAT_COMPRESSION,
+	EROFS_INODE_LAYOUT_MAX
+};
+
+static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
+{
+	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+		return true;
+	return datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY;
+}
+
+/* bit definitions of inode i_advise */
+#define EROFS_I_VERSION_BITS            1
+#define EROFS_I_DATA_MAPPING_BITS       3
+
+#define EROFS_I_VERSION_BIT             0
+#define EROFS_I_DATA_MAPPING_BIT        1
+
+struct erofs_inode_v1 {
+/*  0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/*  2 */__le16 i_xattr_icount;
+/*  4 */__le16 i_mode;
+/*  6 */__le16 i_nlink;
+/*  8 */__le32 i_size;
+/* 12 */__le32 i_reserved;
+/* 16 */union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+/* 20 */__le32 i_ino;           /* only used for 32-bit stat compatibility */
+/* 24 */__le16 i_uid;
+/* 26 */__le16 i_gid;
+/* 28 */__le32 i_reserved2;
+} __packed;
+
+/* 32 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V1   0
+/* 64 bytes on-disk inode */
+#define EROFS_INODE_LAYOUT_V2   1
+
+struct erofs_inode_v2 {
+/*  0 */__le16 i_advise;
+
+/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
+/*  2 */__le16 i_xattr_icount;
+/*  4 */__le16 i_mode;
+/*  6 */__le16 i_reserved;
+/*  8 */__le64 i_size;
+/* 16 */union {
+		/* file total compressed blocks for data mapping 1 */
+		__le32 compressed_blocks;
+		__le32 raw_blkaddr;
+
+		/* for device files, used to indicate old/new device # */
+		__le32 rdev;
+	} i_u __packed;
+
+	/* only used for 32-bit stat compatibility */
+/* 20 */__le32 i_ino;
+
+/* 24 */__le32 i_uid;
+/* 28 */__le32 i_gid;
+/* 32 */__le64 i_ctime;
+/* 40 */__le32 i_ctime_nsec;
+/* 44 */__le32 i_nlink;
+/* 48 */__u8   i_reserved2[16];
+} __packed;                     /* 64 bytes */
+
+#define EROFS_MAX_SHARED_XATTRS         (128)
+/* h_shared_count between 129 ... 255 are special # */
+#define EROFS_SHARED_XATTR_EXTENT       (255)
+
+/*
+ * inline xattrs (n == i_xattr_icount):
+ * erofs_xattr_ibody_header(1) + (n - 1) * 4 bytes
+ *          12 bytes           /                   \
+ *                            /                     \
+ *                           /-----------------------\
+ *                           |  erofs_xattr_entries+ |
+ *                           +-----------------------+
+ * inline xattrs must starts in erofs_xattr_ibody_header,
+ * for read-only fs, no need to introduce h_refcount
+ */
+struct erofs_xattr_ibody_header {
+	__le32 h_reserved;
+	__u8   h_shared_count;
+	__u8   h_reserved2[7];
+	__le32 h_shared_xattrs[0];      /* shared xattr id array */
+} __packed;
+
+/* Name indexes */
+#define EROFS_XATTR_INDEX_USER              1
+#define EROFS_XATTR_INDEX_POSIX_ACL_ACCESS  2
+#define EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT 3
+#define EROFS_XATTR_INDEX_TRUSTED           4
+#define EROFS_XATTR_INDEX_LUSTRE            5
+#define EROFS_XATTR_INDEX_SECURITY          6
+
+/* xattr entry (for both inline & shared xattrs) */
+struct erofs_xattr_entry {
+	__u8   e_name_len;      /* length of name */
+	__u8   e_name_index;    /* attribute name index */
+	__le16 e_value_size;    /* size of attribute value */
+	/* followed by e_name and e_value */
+	char   e_name[0];       /* attribute name */
+} __packed;
+
+#define ondisk_xattr_ibody_size(count)	({\
+	u32 __count = le16_to_cpu(count); \
+	((__count) == 0) ? 0 : \
+	sizeof(struct erofs_xattr_ibody_header) + \
+		sizeof(__u32) * ((__count) - 1); })
+
+#define EROFS_XATTR_ALIGN(size) round_up(size, sizeof(struct erofs_xattr_entry))
+#define EROFS_XATTR_ENTRY_SIZE(entry) EROFS_XATTR_ALIGN( \
+	sizeof(struct erofs_xattr_entry) + \
+	(entry)->e_name_len + le16_to_cpu((entry)->e_value_size))
+
+/* available compression algorithm types */
+enum {
+	Z_EROFS_COMPRESSION_LZ4,
+	Z_EROFS_COMPRESSION_MAX
+};
+
+/*
+ * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
+ *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
+ *                                  (4B) + 2B + (4B) if compacted 2B is on.
+ */
+#define Z_EROFS_ADVISE_COMPACTED_2B_BIT         0
+
+#define Z_EROFS_ADVISE_COMPACTED_2B     (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT)
+
+struct z_erofs_map_header {
+	__le32	h_reserved1;
+	__le16	h_advise;
+	/*
+	 * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
+	 * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
+	 */
+	__u8	h_algorithmtype;
+	/*
+	 * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
+	 * bit 3-4 : (physical - logical) cluster bits of head 1:
+	 *       For example, if logical clustersize = 4096, 1 for 8192.
+	 * bit 5-7 : (physical - logical) cluster bits of head 2.
+	 */
+	__u8	h_clusterbits;
+};
+
+#define Z_EROFS_VLE_LEGACY_HEADER_PADDING       8
+
+/*
+ * Z_EROFS Variable-sized Logical Extent cluster type:
+ *    0 - literal (uncompressed) cluster
+ *    1 - compressed cluster (for the head logical cluster)
+ *    2 - compressed cluster (for the other logical clusters)
+ *
+ * In detail,
+ *    0 - literal (uncompressed) cluster,
+ *        di_advise = 0
+ *        di_clusterofs = the literal data offset of the cluster
+ *        di_blkaddr = the blkaddr of the literal cluster
+ *
+ *    1 - compressed cluster (for the head logical cluster)
+ *        di_advise = 1
+ *        di_clusterofs = the decompressed data offset of the cluster
+ *        di_blkaddr = the blkaddr of the compressed cluster
+ *
+ *    2 - compressed cluster (for the other logical clusters)
+ *        di_advise = 2
+ *        di_clusterofs =
+ *           the decompressed data offset in its own head cluster
+ *        di_u.delta[0] = distance to its corresponding head cluster
+ *        di_u.delta[1] = distance to its corresponding tail cluster
+ *                (di_advise could be 0, 1 or 2)
+ */
+enum {
+	Z_EROFS_VLE_CLUSTER_TYPE_PLAIN,
+	Z_EROFS_VLE_CLUSTER_TYPE_HEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD,
+	Z_EROFS_VLE_CLUSTER_TYPE_RESERVED,
+	Z_EROFS_VLE_CLUSTER_TYPE_MAX
+};
+
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS        2
+#define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT         0
+
+struct z_erofs_vle_decompressed_index {
+	__le16 di_advise;
+	/* where to decompress in the head cluster */
+	__le16 di_clusterofs;
+
+	union {
+		/* for the head cluster */
+		__le32 blkaddr;
+		/*
+		 * for the rest clusters
+		 * eg. for 4k page-sized cluster, maximum 4K*64k = 256M)
+		 * [0] - pointing to the head cluster
+		 * [1] - pointing to the tail cluster
+		 */
+		__le16 delta[2];
+	} di_u __packed;		/* 8 bytes */
+} __packed;
+
+#define Z_EROFS_VLE_LEGACY_INDEX_ALIGN(size) \
+	(round_up(size, sizeof(struct z_erofs_vle_decompressed_index)) + \
+	 sizeof(struct z_erofs_map_header) + Z_EROFS_VLE_LEGACY_HEADER_PADDING)
+
+/* dirent sorts in alphabet order, thus we can do binary search */
+struct erofs_dirent {
+	__le64 nid;     /*  0, node number */
+	__le16 nameoff; /*  8, start offset of file name */
+	__u8 file_type; /* 10, file type */
+	__u8 reserved;  /* 11, reserved */
+} __packed;
+
+/*
+ * EROFS file types should match generic FT_* types and
+ * it seems no need to add BUILD_BUG_ONs since potential
+ * unmatchness will break other fses as well...
+ */
+
+#define EROFS_NAME_LEN      255
+
+/* check the EROFS on-disk layout strictly at compile time */
+static inline void erofs_check_ondisk_layout_definitions(void)
+{
+	BUILD_BUG_ON(sizeof(struct erofs_super_block) != 128);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v1) != 32);
+	BUILD_BUG_ON(sizeof(struct erofs_inode_v2) != 64);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12);
+	BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4);
+	BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8);
+	BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8);
+	BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12);
+
+	BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) <
+		     Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1);
+}
+
+#endif
+
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
new file mode 100644
index 000000000000..80f4fe919ee7
--- /dev/null
+++ b/fs/erofs/inode.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+/* no locking */
+static int read_inode(struct inode *inode, void *data)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode_v1 *v1 = data;
+	const unsigned int advise = le16_to_cpu(v1->i_advise);
+	erofs_blk_t nblks = 0;
+
+	vi->datamode = __inode_data_mapping(advise);
+
+	if (unlikely(vi->datamode >= EROFS_INODE_LAYOUT_MAX)) {
+		errln("unsupported data mapping %u of nid %llu",
+		      vi->datamode, vi->nid);
+		DBG_BUGON(1);
+		return -EOPNOTSUPP;
+	}
+
+	if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
+		struct erofs_inode_v2 *v2 = data;
+
+		vi->inode_isize = sizeof(struct erofs_inode_v2);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v2->i_mode);
+		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		    S_ISLNK(inode->i_mode))
+			vi->raw_blkaddr = le32_to_cpu(v2->i_u.raw_blkaddr);
+		else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+			inode->i_rdev =
+				new_decode_dev(le32_to_cpu(v2->i_u.rdev));
+		else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode))
+			inode->i_rdev = 0;
+		else
+			goto bogusimode;
+
+		i_uid_write(inode, le32_to_cpu(v2->i_uid));
+		i_gid_write(inode, le32_to_cpu(v2->i_gid));
+		set_nlink(inode, le32_to_cpu(v2->i_nlink));
+
+		/* ns timestamp */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			le64_to_cpu(v2->i_ctime);
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			le32_to_cpu(v2->i_ctime_nsec);
+
+		inode->i_size = le64_to_cpu(v2->i_size);
+
+		/* total blocks for compressed files */
+		if (is_inode_layout_compression(inode))
+			nblks = le32_to_cpu(v2->i_u.compressed_blocks);
+	} else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
+		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+
+		vi->inode_isize = sizeof(struct erofs_inode_v1);
+		vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
+
+		inode->i_mode = le16_to_cpu(v1->i_mode);
+		if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		    S_ISLNK(inode->i_mode))
+			vi->raw_blkaddr = le32_to_cpu(v1->i_u.raw_blkaddr);
+		else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+			inode->i_rdev =
+				new_decode_dev(le32_to_cpu(v1->i_u.rdev));
+		else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode))
+			inode->i_rdev = 0;
+		else
+			goto bogusimode;
+
+		i_uid_write(inode, le16_to_cpu(v1->i_uid));
+		i_gid_write(inode, le16_to_cpu(v1->i_gid));
+		set_nlink(inode, le16_to_cpu(v1->i_nlink));
+
+		/* use build time to derive all file time */
+		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec =
+			sbi->build_time;
+		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec =
+			sbi->build_time_nsec;
+
+		inode->i_size = le32_to_cpu(v1->i_size);
+		if (is_inode_layout_compression(inode))
+			nblks = le32_to_cpu(v1->i_u.compressed_blocks);
+	} else {
+		errln("unsupported on-disk inode version %u of nid %llu",
+		      __inode_version(advise), vi->nid);
+		DBG_BUGON(1);
+		return -EOPNOTSUPP;
+	}
+
+	if (!nblks)
+		/* measure inode.i_blocks as generic filesystems */
+		inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9;
+	else
+		inode->i_blocks = nblks << LOG_SECTORS_PER_BLOCK;
+	return 0;
+
+bogusimode:
+	errln("bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid);
+	DBG_BUGON(1);
+	return -EFSCORRUPTED;
+}
+
+/*
+ * try_lock can be required since locking order is:
+ *   file data(fs_inode)
+ *        meta(bd_inode)
+ * but the majority of the callers is "iget",
+ * in that case we are pretty sure no deadlock since
+ * no data operations exist. However I tend to
+ * try_lock since it takes no much overhead and
+ * will success immediately.
+ */
+static int fill_inline_data(struct inode *inode, void *data,
+			    unsigned int m_pofs)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+
+	/* should be inode inline C */
+	if (!is_inode_flat_inline(inode))
+		return 0;
+
+	/* fast symlink (following ext4) */
+	if (S_ISLNK(inode->i_mode) && inode->i_size < PAGE_SIZE) {
+		char *lnk = erofs_kmalloc(sbi, inode->i_size + 1, GFP_KERNEL);
+
+		if (unlikely(!lnk))
+			return -ENOMEM;
+
+		m_pofs += vi->inode_isize + vi->xattr_isize;
+
+		/* inline symlink data shouldn't across page boundary as well */
+		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
+			kfree(lnk);
+			errln("inline data cross block boundary @ nid %llu",
+			      vi->nid);
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;
+		}
+
+		/* get in-page inline data */
+		memcpy(lnk, data + m_pofs, inode->i_size);
+		lnk[inode->i_size] = '\0';
+
+		inode->i_link = lnk;
+		set_inode_fast_symlink(inode);
+	}
+	return 0;
+}
+
+static int fill_inode(struct inode *inode, int isdir)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
+	struct erofs_vnode *vi = EROFS_V(inode);
+	struct page *page;
+	void *data;
+	int err;
+	erofs_blk_t blkaddr;
+	unsigned int ofs;
+	erofs_off_t inode_loc;
+
+	trace_erofs_fill_inode(inode, isdir);
+	inode_loc = iloc(sbi, vi->nid);
+	blkaddr = erofs_blknr(inode_loc);
+	ofs = erofs_blkoff(inode_loc);
+
+	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
+		__func__, vi->nid, ofs, blkaddr);
+
+	page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
+
+	if (IS_ERR(page)) {
+		errln("failed to get inode (nid: %llu) page, err %ld",
+		      vi->nid, PTR_ERR(page));
+		return PTR_ERR(page);
+	}
+
+	DBG_BUGON(!PageUptodate(page));
+	data = page_address(page);
+
+	err = read_inode(inode, data + ofs);
+	if (!err) {
+		/* setup the new inode */
+		if (S_ISREG(inode->i_mode)) {
+			inode->i_op = &erofs_generic_iops;
+			inode->i_fop = &generic_ro_fops;
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op = &erofs_dir_iops;
+			inode->i_fop = &erofs_dir_fops;
+		} else if (S_ISLNK(inode->i_mode)) {
+			/* by default, page_get_link is used for symlink */
+			inode->i_op = &erofs_symlink_iops;
+			inode_nohighmem(inode);
+		} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
+			S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+			inode->i_op = &erofs_generic_iops;
+			init_special_inode(inode, inode->i_mode, inode->i_rdev);
+			goto out_unlock;
+		} else {
+			err = -EFSCORRUPTED;
+			goto out_unlock;
+		}
+
+		if (is_inode_layout_compression(inode)) {
+			err = z_erofs_fill_inode(inode);
+			goto out_unlock;
+		}
+
+		inode->i_mapping->a_ops = &erofs_raw_access_aops;
+
+		/* fill last page if inline data is available */
+		err = fill_inline_data(inode, data, ofs);
+	}
+
+out_unlock:
+	unlock_page(page);
+	put_page(page);
+	return err;
+}
+
+/*
+ * erofs nid is 64bits, but i_ino is 'unsigned long', therefore
+ * we should do more for 32-bit platform to find the right inode.
+ */
+#if BITS_PER_LONG == 32
+static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	return EROFS_V(inode)->nid == nid;
+}
+
+static int erofs_iget_set_actor(struct inode *inode, void *opaque)
+{
+	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
+
+	inode->i_ino = erofs_inode_hash(nid);
+	return 0;
+}
+#endif
+
+static inline struct inode *erofs_iget_locked(struct super_block *sb,
+					      erofs_nid_t nid)
+{
+	const unsigned long hashval = erofs_inode_hash(nid);
+
+#if BITS_PER_LONG >= 64
+	/* it is safe to use iget_locked for >= 64-bit platform */
+	return iget_locked(sb, hashval);
+#else
+	return iget5_locked(sb, hashval, erofs_ilookup_test_actor,
+		erofs_iget_set_actor, &nid);
+#endif
+}
+
+struct inode *erofs_iget(struct super_block *sb,
+			 erofs_nid_t nid,
+			 bool isdir)
+{
+	struct inode *inode = erofs_iget_locked(sb, nid);
+
+	if (unlikely(!inode))
+		return ERR_PTR(-ENOMEM);
+
+	if (inode->i_state & I_NEW) {
+		int err;
+		struct erofs_vnode *vi = EROFS_V(inode);
+
+		vi->nid = nid;
+
+		err = fill_inode(inode, isdir);
+		if (likely(!err))
+			unlock_new_inode(inode);
+		else {
+			iget_failed(inode);
+			inode = ERR_PTR(err);
+		}
+	}
+	return inode;
+}
+
+int erofs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int query_flags)
+{
+	struct inode *const inode = d_inode(path->dentry);
+
+	if (is_inode_layout_compression(inode))
+		stat->attributes |= STATX_ATTR_COMPRESSED;
+
+	stat->attributes |= STATX_ATTR_IMMUTABLE;
+	stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
+				  STATX_ATTR_IMMUTABLE);
+
+	generic_fillattr(inode, stat);
+	return 0;
+}
+
+const struct inode_operations erofs_generic_iops = {
+	.getattr = erofs_getattr,
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+	.get_acl = erofs_get_acl,
+};
+
+const struct inode_operations erofs_symlink_iops = {
+	.get_link = page_get_link,
+	.getattr = erofs_getattr,
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+	.get_acl = erofs_get_acl,
+};
+
+const struct inode_operations erofs_fast_symlink_iops = {
+	.get_link = simple_get_link,
+	.getattr = erofs_getattr,
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+	.get_acl = erofs_get_acl,
+};
+
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
new file mode 100644
index 000000000000..620b73fcc416
--- /dev/null
+++ b/fs/erofs/internal.h
@@ -0,0 +1,553 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_INTERNAL_H
+#define __EROFS_INTERNAL_H
+
+#include <linux/fs.h>
+#include <linux/dcache.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/magic.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "erofs_fs.h"
+
+/* redefine pr_fmt "erofs: " */
+#undef pr_fmt
+#define pr_fmt(fmt) "erofs: " fmt
+
+#define errln(x, ...)   pr_err(x "\n", ##__VA_ARGS__)
+#define infoln(x, ...)  pr_info(x "\n", ##__VA_ARGS__)
+#ifdef CONFIG_EROFS_FS_DEBUG
+#define debugln(x, ...) pr_debug(x "\n", ##__VA_ARGS__)
+#define DBG_BUGON               BUG_ON
+#else
+#define debugln(x, ...)         ((void)0)
+#define DBG_BUGON(x)            ((void)(x))
+#endif	/* !CONFIG_EROFS_FS_DEBUG */
+
+enum {
+	FAULT_KMALLOC,
+	FAULT_READ_IO,
+	FAULT_MAX,
+};
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+extern const char *erofs_fault_name[FAULT_MAX];
+#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type)))
+
+struct erofs_fault_info {
+	atomic_t inject_ops;
+	unsigned int inject_rate;
+	unsigned int inject_type;
+};
+#endif	/* CONFIG_EROFS_FAULT_INJECTION */
+
+/* EROFS_SUPER_MAGIC_V1 to represent the whole file system */
+#define EROFS_SUPER_MAGIC   EROFS_SUPER_MAGIC_V1
+
+typedef u64 erofs_nid_t;
+typedef u64 erofs_off_t;
+/* data type for filesystem-wide blocks number */
+typedef u32 erofs_blk_t;
+
+struct erofs_sb_info {
+#ifdef CONFIG_EROFS_FS_ZIP
+	/* list for all registered superblocks, mainly for shrinker */
+	struct list_head list;
+	struct mutex umount_mutex;
+
+	/* the dedicated workstation for compression */
+	struct radix_tree_root workstn_tree;
+
+	/* threshold for decompression synchronously */
+	unsigned int max_sync_decompress_pages;
+
+	unsigned int shrinker_run_no;
+
+	/* current strategy of how to use managed cache */
+	unsigned char cache_strategy;
+
+	/* pseudo inode to manage cached pages */
+	struct inode *managed_cache;
+#endif	/* CONFIG_EROFS_FS_ZIP */
+	u32 blocks;
+	u32 meta_blkaddr;
+#ifdef CONFIG_EROFS_FS_XATTR
+	u32 xattr_blkaddr;
+#endif
+
+	/* inode slot unit size in bit shift */
+	unsigned char islotbits;
+
+	u32 build_time_nsec;
+	u64 build_time;
+
+	/* what we really care is nid, rather than ino.. */
+	erofs_nid_t root_nid;
+	/* used for statfs, f_files - f_favail */
+	u64 inos;
+
+	u8 uuid[16];                    /* 128-bit uuid for volume */
+	u8 volume_name[16];             /* volume name */
+	u32 requirements;
+
+	unsigned int mount_opt;
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+	struct erofs_fault_info fault_info;	/* For fault injection */
+#endif
+};
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+#define erofs_show_injection_info(type)					\
+	infoln("inject %s in %s of %pS", erofs_fault_name[type],        \
+		__func__, __builtin_return_address(0))
+
+static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (!ffi->inject_rate)
+		return false;
+
+	if (!IS_FAULT_SET(ffi, type))
+		return false;
+
+	atomic_inc(&ffi->inject_ops);
+	if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		return true;
+	}
+	return false;
+}
+#else
+static inline bool time_to_inject(struct erofs_sb_info *sbi, int type)
+{
+	return false;
+}
+
+static inline void erofs_show_injection_info(int type)
+{
+}
+#endif	/* !CONFIG_EROFS_FAULT_INJECTION */
+
+static inline void *erofs_kmalloc(struct erofs_sb_info *sbi,
+					size_t size, gfp_t flags)
+{
+	if (time_to_inject(sbi, FAULT_KMALLOC)) {
+		erofs_show_injection_info(FAULT_KMALLOC);
+		return NULL;
+	}
+	return kmalloc(size, flags);
+}
+
+#define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info)
+#define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info)
+
+/* Mount flags set via mount options or defaults */
+#define EROFS_MOUNT_XATTR_USER		0x00000010
+#define EROFS_MOUNT_POSIX_ACL		0x00000020
+#define EROFS_MOUNT_FAULT_INJECTION	0x00000040
+
+#define clear_opt(sbi, option)	((sbi)->mount_opt &= ~EROFS_MOUNT_##option)
+#define set_opt(sbi, option)	((sbi)->mount_opt |= EROFS_MOUNT_##option)
+#define test_opt(sbi, option)	((sbi)->mount_opt & EROFS_MOUNT_##option)
+
+#ifdef CONFIG_EROFS_FS_ZIP
+enum {
+	EROFS_ZIP_CACHE_DISABLED,
+	EROFS_ZIP_CACHE_READAHEAD,
+	EROFS_ZIP_CACHE_READAROUND
+};
+
+#define EROFS_LOCKED_MAGIC     (INT_MIN | 0xE0F510CCL)
+
+/* basic unit of the workstation of a super_block */
+struct erofs_workgroup {
+	/* the workgroup index in the workstation */
+	pgoff_t index;
+
+	/* overall workgroup reference count */
+	atomic_t refcount;
+};
+
+#if defined(CONFIG_SMP)
+static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+						 int val)
+{
+	preempt_disable();
+	if (val != atomic_cmpxchg(&grp->refcount, val, EROFS_LOCKED_MAGIC)) {
+		preempt_enable();
+		return false;
+	}
+	return true;
+}
+
+static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
+					    int orig_val)
+{
+	/*
+	 * other observers should notice all modifications
+	 * in the freezing period.
+	 */
+	smp_mb();
+	atomic_set(&grp->refcount, orig_val);
+	preempt_enable();
+}
+
+static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+{
+	return atomic_cond_read_relaxed(&grp->refcount,
+					VAL != EROFS_LOCKED_MAGIC);
+}
+#else
+static inline bool erofs_workgroup_try_to_freeze(struct erofs_workgroup *grp,
+						 int val)
+{
+	preempt_disable();
+	/* no need to spin on UP platforms, let's just disable preemption. */
+	if (val != atomic_read(&grp->refcount)) {
+		preempt_enable();
+		return false;
+	}
+	return true;
+}
+
+static inline void erofs_workgroup_unfreeze(struct erofs_workgroup *grp,
+					    int orig_val)
+{
+	preempt_enable();
+}
+
+static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp)
+{
+	int v = atomic_read(&grp->refcount);
+
+	/* workgroup is never freezed on uniprocessor systems */
+	DBG_BUGON(v == EROFS_LOCKED_MAGIC);
+	return v;
+}
+#endif	/* !CONFIG_SMP */
+
+/* hard limit of pages per compressed cluster */
+#define Z_EROFS_CLUSTER_MAX_PAGES       (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT)
+#define EROFS_PCPUBUF_NR_PAGES          Z_EROFS_CLUSTER_MAX_PAGES
+#else
+#define EROFS_PCPUBUF_NR_PAGES          0
+#endif	/* !CONFIG_EROFS_FS_ZIP */
+
+/* we strictly follow PAGE_SIZE and no buffer head yet */
+#define LOG_BLOCK_SIZE		PAGE_SHIFT
+
+#undef LOG_SECTORS_PER_BLOCK
+#define LOG_SECTORS_PER_BLOCK	(PAGE_SHIFT - 9)
+
+#undef SECTORS_PER_BLOCK
+#define SECTORS_PER_BLOCK	(1 << SECTORS_PER_BLOCK)
+
+#define EROFS_BLKSIZ		(1 << LOG_BLOCK_SIZE)
+
+#if (EROFS_BLKSIZ % 4096 || !EROFS_BLKSIZ)
+#error erofs cannot be used in this platform
+#endif
+
+#define EROFS_IO_MAX_RETRIES_NOFAIL     5
+
+#define ROOT_NID(sb)		((sb)->root_nid)
+
+#define erofs_blknr(addr)       ((addr) / EROFS_BLKSIZ)
+#define erofs_blkoff(addr)      ((addr) % EROFS_BLKSIZ)
+#define blknr_to_addr(nr)       ((erofs_off_t)(nr) * EROFS_BLKSIZ)
+
+static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
+{
+	return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits);
+}
+
+/* atomic flag definitions */
+#define EROFS_V_EA_INITED_BIT	0
+#define EROFS_V_Z_INITED_BIT	1
+
+/* bitlock definitions (arranged in reverse order) */
+#define EROFS_V_BL_XATTR_BIT	(BITS_PER_LONG - 1)
+#define EROFS_V_BL_Z_BIT	(BITS_PER_LONG - 2)
+
+struct erofs_vnode {
+	erofs_nid_t nid;
+
+	/* atomic flags (including bitlocks) */
+	unsigned long flags;
+
+	unsigned char datamode;
+	unsigned char inode_isize;
+	unsigned short xattr_isize;
+
+	unsigned int xattr_shared_count;
+	unsigned int *xattr_shared_xattrs;
+
+	union {
+		erofs_blk_t raw_blkaddr;
+#ifdef CONFIG_EROFS_FS_ZIP
+		struct {
+			unsigned short z_advise;
+			unsigned char  z_algorithmtype[2];
+			unsigned char  z_logical_clusterbits;
+			unsigned char  z_physical_clusterbits[2];
+		};
+#endif	/* CONFIG_EROFS_FS_ZIP */
+	};
+	/* the corresponding vfs inode */
+	struct inode vfs_inode;
+};
+
+#define EROFS_V(ptr)	\
+	container_of(ptr, struct erofs_vnode, vfs_inode)
+
+#define __inode_advise(x, bit, bits) \
+	(((x) >> (bit)) & ((1 << (bits)) - 1))
+
+#define __inode_version(advise)	\
+	__inode_advise(advise, EROFS_I_VERSION_BIT,	\
+		EROFS_I_VERSION_BITS)
+
+#define __inode_data_mapping(advise)	\
+	__inode_advise(advise, EROFS_I_DATA_MAPPING_BIT,\
+		EROFS_I_DATA_MAPPING_BITS)
+
+static inline unsigned long inode_datablocks(struct inode *inode)
+{
+	/* since i_size cannot be changed */
+	return DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+}
+
+static inline bool is_inode_layout_compression(struct inode *inode)
+{
+	return erofs_inode_is_data_compressed(EROFS_V(inode)->datamode);
+}
+
+static inline bool is_inode_flat_inline(struct inode *inode)
+{
+	return EROFS_V(inode)->datamode == EROFS_INODE_FLAT_INLINE;
+}
+
+extern const struct super_operations erofs_sops;
+
+extern const struct address_space_operations erofs_raw_access_aops;
+#ifdef CONFIG_EROFS_FS_ZIP
+extern const struct address_space_operations z_erofs_vle_normalaccess_aops;
+#endif
+
+/*
+ * Logical to physical block mapping, used by erofs_map_blocks()
+ *
+ * Different with other file systems, it is used for 2 access modes:
+ *
+ * 1) RAW access mode:
+ *
+ * Users pass a valid (m_lblk, m_lofs -- usually 0) pair,
+ * and get the valid m_pblk, m_pofs and the longest m_len(in bytes).
+ *
+ * Note that m_lblk in the RAW access mode refers to the number of
+ * the compressed ondisk block rather than the uncompressed
+ * in-memory block for the compressed file.
+ *
+ * m_pofs equals to m_lofs except for the inline data page.
+ *
+ * 2) Normal access mode:
+ *
+ * If the inode is not compressed, it has no difference with
+ * the RAW access mode. However, if the inode is compressed,
+ * users should pass a valid (m_lblk, m_lofs) pair, and get
+ * the needed m_pblk, m_pofs, m_len to get the compressed data
+ * and the updated m_lblk, m_lofs which indicates the start
+ * of the corresponding uncompressed data in the file.
+ */
+enum {
+	BH_Zipped = BH_PrivateStart,
+	BH_FullMapped,
+};
+
+/* Has a disk mapping */
+#define EROFS_MAP_MAPPED	(1 << BH_Mapped)
+/* Located in metadata (could be copied from bd_inode) */
+#define EROFS_MAP_META		(1 << BH_Meta)
+/* The extent has been compressed */
+#define EROFS_MAP_ZIPPED	(1 << BH_Zipped)
+/* The length of extent is full */
+#define EROFS_MAP_FULL_MAPPED	(1 << BH_FullMapped)
+
+struct erofs_map_blocks {
+	erofs_off_t m_pa, m_la;
+	u64 m_plen, m_llen;
+
+	unsigned int m_flags;
+
+	struct page *mpage;
+};
+
+/* Flags used by erofs_map_blocks() */
+#define EROFS_GET_BLOCKS_RAW    0x0001
+
+/* zmap.c */
+#ifdef CONFIG_EROFS_FS_ZIP
+int z_erofs_fill_inode(struct inode *inode);
+int z_erofs_map_blocks_iter(struct inode *inode,
+			    struct erofs_map_blocks *map,
+			    int flags);
+#else
+static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP; }
+static inline int z_erofs_map_blocks_iter(struct inode *inode,
+					  struct erofs_map_blocks *map,
+					  int flags)
+{
+	return -EOPNOTSUPP;
+}
+#endif	/* !CONFIG_EROFS_FS_ZIP */
+
+/* data.c */
+static inline struct bio *erofs_grab_bio(struct super_block *sb,
+					 erofs_blk_t blkaddr,
+					 unsigned int nr_pages,
+					 void *bi_private, bio_end_io_t endio,
+					 bool nofail)
+{
+	const gfp_t gfp = GFP_NOIO;
+	struct bio *bio;
+
+	do {
+		if (nr_pages == 1) {
+			bio = bio_alloc(gfp | (nofail ? __GFP_NOFAIL : 0), 1);
+			if (unlikely(!bio)) {
+				DBG_BUGON(nofail);
+				return ERR_PTR(-ENOMEM);
+			}
+			break;
+		}
+		bio = bio_alloc(gfp, nr_pages);
+		nr_pages /= 2;
+	} while (unlikely(!bio));
+
+	bio->bi_end_io = endio;
+	bio_set_dev(bio, sb->s_bdev);
+	bio->bi_iter.bi_sector = (sector_t)blkaddr << LOG_SECTORS_PER_BLOCK;
+	bio->bi_private = bi_private;
+	return bio;
+}
+
+static inline void __submit_bio(struct bio *bio, unsigned int op,
+				unsigned int op_flags)
+{
+	bio_set_op_attrs(bio, op, op_flags);
+	submit_bio(bio);
+}
+
+struct page *__erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr,
+				   bool prio, bool nofail);
+
+static inline struct page *erofs_get_meta_page(struct super_block *sb,
+	erofs_blk_t blkaddr, bool prio)
+{
+	return __erofs_get_meta_page(sb, blkaddr, prio, false);
+}
+
+int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int);
+
+static inline struct page *erofs_get_inline_page(struct inode *inode,
+						 erofs_blk_t blkaddr)
+{
+	return erofs_get_meta_page(inode->i_sb, blkaddr,
+				   S_ISDIR(inode->i_mode));
+}
+
+/* inode.c */
+static inline unsigned long erofs_inode_hash(erofs_nid_t nid)
+{
+#if BITS_PER_LONG == 32
+	return (nid >> 32) ^ (nid & 0xffffffff);
+#else
+	return nid;
+#endif
+}
+
+extern const struct inode_operations erofs_generic_iops;
+extern const struct inode_operations erofs_symlink_iops;
+extern const struct inode_operations erofs_fast_symlink_iops;
+
+static inline void set_inode_fast_symlink(struct inode *inode)
+{
+	inode->i_op = &erofs_fast_symlink_iops;
+}
+
+static inline bool is_inode_fast_symlink(struct inode *inode)
+{
+	return inode->i_op == &erofs_fast_symlink_iops;
+}
+
+struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
+int erofs_getattr(const struct path *path, struct kstat *stat,
+		  u32 request_mask, unsigned int query_flags);
+
+/* namei.c */
+extern const struct inode_operations erofs_dir_iops;
+
+int erofs_namei(struct inode *dir, struct qstr *name,
+		erofs_nid_t *nid, unsigned int *d_type);
+
+/* dir.c */
+extern const struct file_operations erofs_dir_fops;
+
+/* utils.c / zdata.c */
+struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail);
+
+#if (EROFS_PCPUBUF_NR_PAGES > 0)
+void *erofs_get_pcpubuf(unsigned int pagenr);
+#define erofs_put_pcpubuf(buf) do { \
+	(void)&(buf);	\
+	preempt_enable();	\
+} while (0)
+#else
+static inline void *erofs_get_pcpubuf(unsigned int pagenr)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+#define erofs_put_pcpubuf(buf) do {} while (0)
+#endif
+
+#ifdef CONFIG_EROFS_FS_ZIP
+int erofs_workgroup_put(struct erofs_workgroup *grp);
+struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
+					     pgoff_t index, bool *tag);
+int erofs_register_workgroup(struct super_block *sb,
+			     struct erofs_workgroup *grp, bool tag);
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp);
+void erofs_shrinker_register(struct super_block *sb);
+void erofs_shrinker_unregister(struct super_block *sb);
+int __init erofs_init_shrinker(void);
+void erofs_exit_shrinker(void);
+int __init z_erofs_init_zip_subsystem(void);
+void z_erofs_exit_zip_subsystem(void);
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+				       struct erofs_workgroup *egrp);
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+				  struct page *page);
+#else
+static inline void erofs_shrinker_register(struct super_block *sb) {}
+static inline void erofs_shrinker_unregister(struct super_block *sb) {}
+static inline int erofs_init_shrinker(void) { return 0; }
+static inline void erofs_exit_shrinker(void) {}
+static inline int z_erofs_init_zip_subsystem(void) { return 0; }
+static inline void z_erofs_exit_zip_subsystem(void) {}
+#endif	/* !CONFIG_EROFS_FS_ZIP */
+
+#define EFSCORRUPTED    EUCLEAN         /* Filesystem is corrupted */
+
+#endif	/* __EROFS_INTERNAL_H */
+
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
new file mode 100644
index 000000000000..8832b5d95d91
--- /dev/null
+++ b/fs/erofs/namei.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "xattr.h"
+
+#include <trace/events/erofs.h>
+
+struct erofs_qstr {
+	const unsigned char *name;
+	const unsigned char *end;
+};
+
+/* based on the end of qn is accurate and it must have the trailing '\0' */
+static inline int dirnamecmp(const struct erofs_qstr *qn,
+			     const struct erofs_qstr *qd,
+			     unsigned int *matched)
+{
+	unsigned int i = *matched;
+
+	/*
+	 * on-disk error, let's only BUG_ON in the debugging mode.
+	 * otherwise, it will return 1 to just skip the invalid name
+	 * and go on (in consideration of the lookup performance).
+	 */
+	DBG_BUGON(qd->name > qd->end);
+
+	/* qd could not have trailing '\0' */
+	/* However it is absolutely safe if < qd->end */
+	while (qd->name + i < qd->end && qd->name[i] != '\0') {
+		if (qn->name[i] != qd->name[i]) {
+			*matched = i;
+			return qn->name[i] > qd->name[i] ? 1 : -1;
+		}
+		++i;
+	}
+	*matched = i;
+	/* See comments in __d_alloc on the terminating NUL character */
+	return qn->name[i] == '\0' ? 0 : 1;
+}
+
+#define nameoff_from_disk(off, sz)	(le16_to_cpu(off) & ((sz) - 1))
+
+static struct erofs_dirent *find_target_dirent(struct erofs_qstr *name,
+					       u8 *data,
+					       unsigned int dirblksize,
+					       const int ndirents)
+{
+	int head, back;
+	unsigned int startprfx, endprfx;
+	struct erofs_dirent *const de = (struct erofs_dirent *)data;
+
+	/* since the 1st dirent has been evaluated previously */
+	head = 1;
+	back = ndirents - 1;
+	startprfx = endprfx = 0;
+
+	while (head <= back) {
+		const int mid = head + (back - head) / 2;
+		const int nameoff = nameoff_from_disk(de[mid].nameoff,
+						      dirblksize);
+		unsigned int matched = min(startprfx, endprfx);
+		struct erofs_qstr dname = {
+			.name = data + nameoff,
+			.end = unlikely(mid >= ndirents - 1) ?
+				data + dirblksize :
+				data + nameoff_from_disk(de[mid + 1].nameoff,
+							 dirblksize)
+		};
+
+		/* string comparison without already matched prefix */
+		int ret = dirnamecmp(name, &dname, &matched);
+
+		if (unlikely(!ret)) {
+			return de + mid;
+		} else if (ret > 0) {
+			head = mid + 1;
+			startprfx = matched;
+		} else {
+			back = mid - 1;
+			endprfx = matched;
+		}
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct page *find_target_block_classic(struct inode *dir,
+					      struct erofs_qstr *name,
+					      int *_ndirents)
+{
+	unsigned int startprfx, endprfx;
+	int head, back;
+	struct address_space *const mapping = dir->i_mapping;
+	struct page *candidate = ERR_PTR(-ENOENT);
+
+	startprfx = endprfx = 0;
+	head = 0;
+	back = inode_datablocks(dir) - 1;
+
+	while (head <= back) {
+		const int mid = head + (back - head) / 2;
+		struct page *page = read_mapping_page(mapping, mid, NULL);
+
+		if (!IS_ERR(page)) {
+			struct erofs_dirent *de = kmap_atomic(page);
+			const int nameoff = nameoff_from_disk(de->nameoff,
+							      EROFS_BLKSIZ);
+			const int ndirents = nameoff / sizeof(*de);
+			int diff;
+			unsigned int matched;
+			struct erofs_qstr dname;
+
+			if (unlikely(!ndirents)) {
+				kunmap_atomic(de);
+				put_page(page);
+				errln("corrupted dir block %d @ nid %llu",
+				      mid, EROFS_V(dir)->nid);
+				DBG_BUGON(1);
+				page = ERR_PTR(-EFSCORRUPTED);
+				goto out;
+			}
+
+			matched = min(startprfx, endprfx);
+
+			dname.name = (u8 *)de + nameoff;
+			if (ndirents == 1)
+				dname.end = (u8 *)de + EROFS_BLKSIZ;
+			else
+				dname.end = (u8 *)de +
+					nameoff_from_disk(de[1].nameoff,
+							  EROFS_BLKSIZ);
+
+			/* string comparison without already matched prefix */
+			diff = dirnamecmp(name, &dname, &matched);
+			kunmap_atomic(de);
+
+			if (unlikely(!diff)) {
+				*_ndirents = 0;
+				goto out;
+			} else if (diff > 0) {
+				head = mid + 1;
+				startprfx = matched;
+
+				if (!IS_ERR(candidate))
+					put_page(candidate);
+				candidate = page;
+				*_ndirents = ndirents;
+			} else {
+				put_page(page);
+
+				back = mid - 1;
+				endprfx = matched;
+			}
+			continue;
+		}
+out:		/* free if the candidate is valid */
+		if (!IS_ERR(candidate))
+			put_page(candidate);
+		return page;
+	}
+	return candidate;
+}
+
+int erofs_namei(struct inode *dir,
+		struct qstr *name,
+		erofs_nid_t *nid, unsigned int *d_type)
+{
+	int ndirents;
+	struct page *page;
+	void *data;
+	struct erofs_dirent *de;
+	struct erofs_qstr qn;
+
+	if (unlikely(!dir->i_size))
+		return -ENOENT;
+
+	qn.name = name->name;
+	qn.end = name->name + name->len;
+
+	ndirents = 0;
+	page = find_target_block_classic(dir, &qn, &ndirents);
+
+	if (IS_ERR(page))
+		return PTR_ERR(page);
+
+	data = kmap_atomic(page);
+	/* the target page has been mapped */
+	if (ndirents)
+		de = find_target_dirent(&qn, data, EROFS_BLKSIZ, ndirents);
+	else
+		de = (struct erofs_dirent *)data;
+
+	if (!IS_ERR(de)) {
+		*nid = le64_to_cpu(de->nid);
+		*d_type = de->file_type;
+	}
+
+	kunmap_atomic(data);
+	put_page(page);
+
+	return PTR_ERR_OR_ZERO(de);
+}
+
+/* NOTE: i_mutex is already held by vfs */
+static struct dentry *erofs_lookup(struct inode *dir,
+				   struct dentry *dentry,
+				   unsigned int flags)
+{
+	int err;
+	erofs_nid_t nid;
+	unsigned int d_type;
+	struct inode *inode;
+
+	DBG_BUGON(!d_really_is_negative(dentry));
+	/* dentry must be unhashed in lookup, no need to worry about */
+	DBG_BUGON(!d_unhashed(dentry));
+
+	trace_erofs_lookup(dir, dentry, flags);
+
+	/* file name exceeds fs limit */
+	if (unlikely(dentry->d_name.len > EROFS_NAME_LEN))
+		return ERR_PTR(-ENAMETOOLONG);
+
+	/* false uninitialized warnings on gcc 4.8.x */
+	err = erofs_namei(dir, &dentry->d_name, &nid, &d_type);
+
+	if (err == -ENOENT) {
+		/* negative dentry */
+		inode = NULL;
+	} else if (unlikely(err)) {
+		inode = ERR_PTR(err);
+	} else {
+		debugln("%s, %s (nid %llu) found, d_type %u", __func__,
+			dentry->d_name.name, nid, d_type);
+		inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+const struct inode_operations erofs_dir_iops = {
+	.lookup = erofs_lookup,
+	.getattr = erofs_getattr,
+#ifdef CONFIG_EROFS_FS_XATTR
+	.listxattr = erofs_listxattr,
+#endif
+	.get_acl = erofs_get_acl,
+};
+
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
new file mode 100644
index 000000000000..6d3a9bcb8daa
--- /dev/null
+++ b/fs/erofs/super.c
@@ -0,0 +1,669 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include <linux/module.h>
+#include <linux/buffer_head.h>
+#include <linux/statfs.h>
+#include <linux/parser.h>
+#include <linux/seq_file.h>
+#include "xattr.h"
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/erofs.h>
+
+static struct kmem_cache *erofs_inode_cachep __read_mostly;
+
+static void init_once(void *ptr)
+{
+	struct erofs_vnode *vi = ptr;
+
+	inode_init_once(&vi->vfs_inode);
+}
+
+static int __init erofs_init_inode_cache(void)
+{
+	erofs_inode_cachep = kmem_cache_create("erofs_inode",
+					       sizeof(struct erofs_vnode), 0,
+					       SLAB_RECLAIM_ACCOUNT,
+					       init_once);
+
+	return erofs_inode_cachep ? 0 : -ENOMEM;
+}
+
+static void erofs_exit_inode_cache(void)
+{
+	kmem_cache_destroy(erofs_inode_cachep);
+}
+
+static struct inode *alloc_inode(struct super_block *sb)
+{
+	struct erofs_vnode *vi =
+		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
+
+	if (!vi)
+		return NULL;
+
+	/* zero out everything except vfs_inode */
+	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+	return &vi->vfs_inode;
+}
+
+static void free_inode(struct inode *inode)
+{
+	struct erofs_vnode *vi = EROFS_V(inode);
+
+	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
+	if (is_inode_fast_symlink(inode))
+		kfree(inode->i_link);
+
+	kfree(vi->xattr_shared_xattrs);
+
+	kmem_cache_free(erofs_inode_cachep, vi);
+}
+
+static bool check_layout_compatibility(struct super_block *sb,
+				       struct erofs_super_block *layout)
+{
+	const unsigned int requirements = le32_to_cpu(layout->requirements);
+
+	EROFS_SB(sb)->requirements = requirements;
+
+	/* check if current kernel meets all mandatory requirements */
+	if (requirements & (~EROFS_ALL_REQUIREMENTS)) {
+		errln("unidentified requirements %x, please upgrade kernel version",
+		      requirements & ~EROFS_ALL_REQUIREMENTS);
+		return false;
+	}
+	return true;
+}
+
+static int superblock_read(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+	struct buffer_head *bh;
+	struct erofs_super_block *layout;
+	unsigned int blkszbits;
+	int ret;
+
+	bh = sb_bread(sb, 0);
+
+	if (!bh) {
+		errln("cannot read erofs superblock");
+		return -EIO;
+	}
+
+	sbi = EROFS_SB(sb);
+	layout = (struct erofs_super_block *)((u8 *)bh->b_data
+		 + EROFS_SUPER_OFFSET);
+
+	ret = -EINVAL;
+	if (le32_to_cpu(layout->magic) != EROFS_SUPER_MAGIC_V1) {
+		errln("cannot find valid erofs superblock");
+		goto out;
+	}
+
+	blkszbits = layout->blkszbits;
+	/* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */
+	if (unlikely(blkszbits != LOG_BLOCK_SIZE)) {
+		errln("blksize %u isn't supported on this platform",
+		      1 << blkszbits);
+		goto out;
+	}
+
+	if (!check_layout_compatibility(sb, layout))
+		goto out;
+
+	sbi->blocks = le32_to_cpu(layout->blocks);
+	sbi->meta_blkaddr = le32_to_cpu(layout->meta_blkaddr);
+#ifdef CONFIG_EROFS_FS_XATTR
+	sbi->xattr_blkaddr = le32_to_cpu(layout->xattr_blkaddr);
+#endif
+	sbi->islotbits = ffs(sizeof(struct erofs_inode_v1)) - 1;
+	sbi->root_nid = le16_to_cpu(layout->root_nid);
+	sbi->inos = le64_to_cpu(layout->inos);
+
+	sbi->build_time = le64_to_cpu(layout->build_time);
+	sbi->build_time_nsec = le32_to_cpu(layout->build_time_nsec);
+
+	memcpy(&sb->s_uuid, layout->uuid, sizeof(layout->uuid));
+
+	ret = strscpy(sbi->volume_name, layout->volume_name,
+		      sizeof(layout->volume_name));
+	if (ret < 0) {	/* -E2BIG */
+		errln("bad volume name without NIL terminator");
+		ret = -EFSCORRUPTED;
+		goto out;
+	}
+	ret = 0;
+out:
+	brelse(bh);
+	return ret;
+}
+
+#ifdef CONFIG_EROFS_FAULT_INJECTION
+const char *erofs_fault_name[FAULT_MAX] = {
+	[FAULT_KMALLOC]		= "kmalloc",
+	[FAULT_READ_IO]		= "read IO error",
+};
+
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+	struct erofs_fault_info *ffi = &sbi->fault_info;
+
+	if (rate) {
+		atomic_set(&ffi->inject_ops, 0);
+		ffi->inject_rate = rate;
+		ffi->inject_type = (1 << FAULT_MAX) - 1;
+	} else {
+		memset(ffi, 0, sizeof(struct erofs_fault_info));
+	}
+
+	set_opt(sbi, FAULT_INJECTION);
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	int rate = 0;
+
+	if (args->from && match_int(args, &rate))
+		return -EINVAL;
+
+	__erofs_build_fault_attr(sbi, rate);
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return sbi->fault_info.inject_rate;
+}
+#else
+static void __erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				     unsigned int rate)
+{
+}
+
+static int erofs_build_fault_attr(struct erofs_sb_info *sbi,
+				  substring_t *args)
+{
+	infoln("fault_injection options not supported");
+	return 0;
+}
+
+static unsigned int erofs_get_fault_rate(struct erofs_sb_info *sbi)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_EROFS_FS_ZIP
+static int erofs_build_cache_strategy(struct erofs_sb_info *sbi,
+				      substring_t *args)
+{
+	const char *cs = match_strdup(args);
+	int err = 0;
+
+	if (!cs) {
+		errln("Not enough memory to store cache strategy");
+		return -ENOMEM;
+	}
+
+	if (!strcmp(cs, "disabled")) {
+		sbi->cache_strategy = EROFS_ZIP_CACHE_DISABLED;
+	} else if (!strcmp(cs, "readahead")) {
+		sbi->cache_strategy = EROFS_ZIP_CACHE_READAHEAD;
+	} else if (!strcmp(cs, "readaround")) {
+		sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+	} else {
+		errln("Unrecognized cache strategy \"%s\"", cs);
+		err = -EINVAL;
+	}
+	kfree(cs);
+	return err;
+}
+#else
+static int erofs_build_cache_strategy(struct erofs_sb_info *sbi,
+				      substring_t *args)
+{
+	infoln("EROFS compression is disabled, so cache strategy is ignored");
+	return 0;
+}
+#endif
+
+/* set up default EROFS parameters */
+static void default_options(struct erofs_sb_info *sbi)
+{
+#ifdef CONFIG_EROFS_FS_ZIP
+	sbi->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
+	sbi->max_sync_decompress_pages = 3;
+#endif
+#ifdef CONFIG_EROFS_FS_XATTR
+	set_opt(sbi, XATTR_USER);
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	set_opt(sbi, POSIX_ACL);
+#endif
+}
+
+enum {
+	Opt_user_xattr,
+	Opt_nouser_xattr,
+	Opt_acl,
+	Opt_noacl,
+	Opt_fault_injection,
+	Opt_cache_strategy,
+	Opt_err
+};
+
+static match_table_t erofs_tokens = {
+	{Opt_user_xattr, "user_xattr"},
+	{Opt_nouser_xattr, "nouser_xattr"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_fault_injection, "fault_injection=%u"},
+	{Opt_cache_strategy, "cache_strategy=%s"},
+	{Opt_err, NULL}
+};
+
+static int parse_options(struct super_block *sb, char *options)
+{
+	substring_t args[MAX_OPT_ARGS];
+	char *p;
+	int err;
+
+	if (!options)
+		return 0;
+
+	while ((p = strsep(&options, ","))) {
+		int token;
+
+		if (!*p)
+			continue;
+
+		args[0].to = args[0].from = NULL;
+		token = match_token(p, erofs_tokens, args);
+
+		switch (token) {
+#ifdef CONFIG_EROFS_FS_XATTR
+		case Opt_user_xattr:
+			set_opt(EROFS_SB(sb), XATTR_USER);
+			break;
+		case Opt_nouser_xattr:
+			clear_opt(EROFS_SB(sb), XATTR_USER);
+			break;
+#else
+		case Opt_user_xattr:
+			infoln("user_xattr options not supported");
+			break;
+		case Opt_nouser_xattr:
+			infoln("nouser_xattr options not supported");
+			break;
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+		case Opt_acl:
+			set_opt(EROFS_SB(sb), POSIX_ACL);
+			break;
+		case Opt_noacl:
+			clear_opt(EROFS_SB(sb), POSIX_ACL);
+			break;
+#else
+		case Opt_acl:
+			infoln("acl options not supported");
+			break;
+		case Opt_noacl:
+			infoln("noacl options not supported");
+			break;
+#endif
+		case Opt_fault_injection:
+			err = erofs_build_fault_attr(EROFS_SB(sb), args);
+			if (err)
+				return err;
+			break;
+		case Opt_cache_strategy:
+			err = erofs_build_cache_strategy(EROFS_SB(sb), args);
+			if (err)
+				return err;
+			break;
+		default:
+			errln("Unrecognized mount option \"%s\" or missing value", p);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+#ifdef CONFIG_EROFS_FS_ZIP
+static const struct address_space_operations managed_cache_aops;
+
+static int managed_cache_releasepage(struct page *page, gfp_t gfp_mask)
+{
+	int ret = 1;	/* 0 - busy */
+	struct address_space *const mapping = page->mapping;
+
+	DBG_BUGON(!PageLocked(page));
+	DBG_BUGON(mapping->a_ops != &managed_cache_aops);
+
+	if (PagePrivate(page))
+		ret = erofs_try_to_free_cached_page(mapping, page);
+
+	return ret;
+}
+
+static void managed_cache_invalidatepage(struct page *page,
+					 unsigned int offset,
+					 unsigned int length)
+{
+	const unsigned int stop = length + offset;
+
+	DBG_BUGON(!PageLocked(page));
+
+	/* Check for potential overflow in debug mode */
+	DBG_BUGON(stop > PAGE_SIZE || stop < length);
+
+	if (offset == 0 && stop == PAGE_SIZE)
+		while (!managed_cache_releasepage(page, GFP_NOFS))
+			cond_resched();
+}
+
+static const struct address_space_operations managed_cache_aops = {
+	.releasepage = managed_cache_releasepage,
+	.invalidatepage = managed_cache_invalidatepage,
+};
+
+static int erofs_init_managed_cache(struct super_block *sb)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+	struct inode *const inode = new_inode(sb);
+
+	if (unlikely(!inode))
+		return -ENOMEM;
+
+	set_nlink(inode, 1);
+	inode->i_size = OFFSET_MAX;
+
+	inode->i_mapping->a_ops = &managed_cache_aops;
+	mapping_set_gfp_mask(inode->i_mapping,
+			     GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE);
+	sbi->managed_cache = inode;
+	return 0;
+}
+#else
+static int erofs_init_managed_cache(struct super_block *sb) { return 0; }
+#endif
+
+static int erofs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct erofs_sb_info *sbi;
+	int err;
+
+	infoln("fill_super, device -> %s", sb->s_id);
+	infoln("options -> %s", (char *)data);
+
+	sb->s_magic = EROFS_SUPER_MAGIC;
+
+	if (unlikely(!sb_set_blocksize(sb, EROFS_BLKSIZ))) {
+		errln("failed to set erofs blksize");
+		return -EINVAL;
+	}
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (unlikely(!sbi))
+		return -ENOMEM;
+
+	sb->s_fs_info = sbi;
+	err = superblock_read(sb);
+	if (err)
+		return err;
+
+	sb->s_flags |= SB_RDONLY | SB_NOATIME;
+	sb->s_maxbytes = MAX_LFS_FILESIZE;
+	sb->s_time_gran = 1;
+
+	sb->s_op = &erofs_sops;
+
+#ifdef CONFIG_EROFS_FS_XATTR
+	sb->s_xattr = erofs_xattr_handlers;
+#endif
+	/* set erofs default mount options */
+	default_options(sbi);
+
+	err = parse_options(sb, data);
+	if (unlikely(err))
+		return err;
+
+	if (!silent)
+		infoln("root inode @ nid %llu", ROOT_NID(sbi));
+
+	if (test_opt(sbi, POSIX_ACL))
+		sb->s_flags |= SB_POSIXACL;
+	else
+		sb->s_flags &= ~SB_POSIXACL;
+
+#ifdef CONFIG_EROFS_FS_ZIP
+	INIT_RADIX_TREE(&sbi->workstn_tree, GFP_ATOMIC);
+#endif
+
+	/* get the root inode */
+	inode = erofs_iget(sb, ROOT_NID(sbi), true);
+	if (IS_ERR(inode))
+		return PTR_ERR(inode);
+
+	if (unlikely(!S_ISDIR(inode->i_mode))) {
+		errln("rootino(nid %llu) is not a directory(i_mode %o)",
+		      ROOT_NID(sbi), inode->i_mode);
+		iput(inode);
+		return -EINVAL;
+	}
+
+	sb->s_root = d_make_root(inode);
+	if (unlikely(!sb->s_root))
+		return -ENOMEM;
+
+	erofs_shrinker_register(sb);
+	/* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
+	err = erofs_init_managed_cache(sb);
+	if (unlikely(err))
+		return err;
+
+	if (!silent)
+		infoln("mounted on %s with opts: %s.", sb->s_id, (char *)data);
+	return 0;
+}
+
+static struct dentry *erofs_mount(struct file_system_type *fs_type, int flags,
+				  const char *dev_name, void *data)
+{
+	return mount_bdev(fs_type, flags, dev_name, data, erofs_fill_super);
+}
+
+/*
+ * could be triggered after deactivate_locked_super()
+ * is called, thus including umount and failed to initialize.
+ */
+static void erofs_kill_sb(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi;
+
+	WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
+	infoln("unmounting for %s", sb->s_id);
+
+	kill_block_super(sb);
+
+	sbi = EROFS_SB(sb);
+	if (!sbi)
+		return;
+	kfree(sbi);
+	sb->s_fs_info = NULL;
+}
+
+/* called when ->s_root is non-NULL */
+static void erofs_put_super(struct super_block *sb)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+
+	DBG_BUGON(!sbi);
+
+	erofs_shrinker_unregister(sb);
+#ifdef CONFIG_EROFS_FS_ZIP
+	iput(sbi->managed_cache);
+	sbi->managed_cache = NULL;
+#endif
+}
+
+static struct file_system_type erofs_fs_type = {
+	.owner          = THIS_MODULE,
+	.name           = "erofs",
+	.mount          = erofs_mount,
+	.kill_sb        = erofs_kill_sb,
+	.fs_flags       = FS_REQUIRES_DEV,
+};
+MODULE_ALIAS_FS("erofs");
+
+static int __init erofs_module_init(void)
+{
+	int err;
+
+	erofs_check_ondisk_layout_definitions();
+	infoln("initializing erofs " EROFS_VERSION);
+
+	err = erofs_init_inode_cache();
+	if (err)
+		goto icache_err;
+
+	err = erofs_init_shrinker();
+	if (err)
+		goto shrinker_err;
+
+	err = z_erofs_init_zip_subsystem();
+	if (err)
+		goto zip_err;
+
+	err = register_filesystem(&erofs_fs_type);
+	if (err)
+		goto fs_err;
+
+	infoln("successfully to initialize erofs");
+	return 0;
+
+fs_err:
+	z_erofs_exit_zip_subsystem();
+zip_err:
+	erofs_exit_shrinker();
+shrinker_err:
+	erofs_exit_inode_cache();
+icache_err:
+	return err;
+}
+
+static void __exit erofs_module_exit(void)
+{
+	unregister_filesystem(&erofs_fs_type);
+	z_erofs_exit_zip_subsystem();
+	erofs_exit_shrinker();
+	erofs_exit_inode_cache();
+	infoln("successfully finalize erofs");
+}
+
+/* get filesystem statistics */
+static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = EROFS_BLKSIZ;
+	buf->f_blocks = sbi->blocks;
+	buf->f_bfree = buf->f_bavail = 0;
+
+	buf->f_files = ULLONG_MAX;
+	buf->f_ffree = ULLONG_MAX - sbi->inos;
+
+	buf->f_namelen = EROFS_NAME_LEN;
+
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+	return 0;
+}
+
+static int erofs_show_options(struct seq_file *seq, struct dentry *root)
+{
+	struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb);
+
+#ifdef CONFIG_EROFS_FS_XATTR
+	if (test_opt(sbi, XATTR_USER))
+		seq_puts(seq, ",user_xattr");
+	else
+		seq_puts(seq, ",nouser_xattr");
+#endif
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	if (test_opt(sbi, POSIX_ACL))
+		seq_puts(seq, ",acl");
+	else
+		seq_puts(seq, ",noacl");
+#endif
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_printf(seq, ",fault_injection=%u",
+			   erofs_get_fault_rate(sbi));
+#ifdef CONFIG_EROFS_FS_ZIP
+	if (sbi->cache_strategy == EROFS_ZIP_CACHE_DISABLED) {
+		seq_puts(seq, ",cache_strategy=disabled");
+	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) {
+		seq_puts(seq, ",cache_strategy=readahead");
+	} else if (sbi->cache_strategy == EROFS_ZIP_CACHE_READAROUND) {
+		seq_puts(seq, ",cache_strategy=readaround");
+	} else {
+		seq_puts(seq, ",cache_strategy=(unknown)");
+		DBG_BUGON(1);
+	}
+#endif
+	return 0;
+}
+
+static int erofs_remount(struct super_block *sb, int *flags, char *data)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	unsigned int org_mnt_opt = sbi->mount_opt;
+	unsigned int org_inject_rate = erofs_get_fault_rate(sbi);
+	int err;
+
+	DBG_BUGON(!sb_rdonly(sb));
+	err = parse_options(sb, data);
+	if (err)
+		goto out;
+
+	if (test_opt(sbi, POSIX_ACL))
+		sb->s_flags |= SB_POSIXACL;
+	else
+		sb->s_flags &= ~SB_POSIXACL;
+
+	*flags |= SB_RDONLY;
+	return 0;
+out:
+	__erofs_build_fault_attr(sbi, org_inject_rate);
+	sbi->mount_opt = org_mnt_opt;
+
+	return err;
+}
+
+const struct super_operations erofs_sops = {
+	.put_super = erofs_put_super,
+	.alloc_inode = alloc_inode,
+	.free_inode = free_inode,
+	.statfs = erofs_statfs,
+	.show_options = erofs_show_options,
+	.remount_fs = erofs_remount,
+};
+
+module_init(erofs_module_init);
+module_exit(erofs_module_exit);
+
+MODULE_DESCRIPTION("Enhanced ROM File System");
+MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc.");
+MODULE_LICENSE("GPL");
+
diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h
new file mode 100644
index 000000000000..a72897c86744
--- /dev/null
+++ b/fs/erofs/tagptr.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * A tagged pointer implementation
+ *
+ * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_TAGPTR_H
+#define __EROFS_FS_TAGPTR_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/*
+ * the name of tagged pointer types are tagptr{1, 2, 3...}_t
+ * avoid directly using the internal structs __tagptr{1, 2, 3...}
+ */
+#define __MAKE_TAGPTR(n) \
+typedef struct __tagptr##n {	\
+	uintptr_t v;	\
+} tagptr##n##_t;
+
+__MAKE_TAGPTR(1)
+__MAKE_TAGPTR(2)
+__MAKE_TAGPTR(3)
+__MAKE_TAGPTR(4)
+
+#undef __MAKE_TAGPTR
+
+extern void __compiletime_error("bad tagptr tags")
+	__bad_tagptr_tags(void);
+
+extern void __compiletime_error("bad tagptr type")
+	__bad_tagptr_type(void);
+
+/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
+#define __tagptr_mask_1(ptr, n)	\
+	__builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
+		(1UL << (n)) - 1 :
+
+#define __tagptr_mask(ptr)	(\
+	__tagptr_mask_1(ptr, 1) ( \
+	__tagptr_mask_1(ptr, 2) ( \
+	__tagptr_mask_1(ptr, 3) ( \
+	__tagptr_mask_1(ptr, 4) ( \
+	__bad_tagptr_type(), 0)))))
+
+/* generate a tagged pointer from a raw value */
+#define tagptr_init(type, val) \
+	((typeof(type)){ .v = (uintptr_t)(val) })
+
+/*
+ * directly cast a tagged pointer to the native pointer type, which
+ * could be used for backward compatibility of existing code.
+ */
+#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
+
+/* encode tagged pointers */
+#define tagptr_fold(type, ptr, _tags) ({ \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
+		__bad_tagptr_tags(); \
+tagptr_init(type, (uintptr_t)(ptr) | tags); })
+
+/* decode tagged pointers */
+#define tagptr_unfold_ptr(tptr) \
+	((void *)((tptr).v & ~__tagptr_mask(tptr)))
+
+#define tagptr_unfold_tags(tptr) \
+	((tptr).v & __tagptr_mask(tptr))
+
+/* operations for the tagger pointer */
+#define tagptr_eq(_tptr1, _tptr2) ({ \
+	typeof(_tptr1) tptr1 = (_tptr1); \
+	typeof(_tptr2) tptr2 = (_tptr2); \
+	(void)(&tptr1 == &tptr2); \
+(tptr1).v == (tptr2).v; })
+
+/* lock-free CAS operation */
+#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	typeof(_o) o = (_o); \
+	typeof(_n) n = (_n); \
+	(void)(&o == &n); \
+	(void)(&o == ptptr); \
+tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
+
+/* wrap WRITE_ONCE if atomic update is needed */
+#define tagptr_replace_tags(_ptptr, tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	*ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
+*ptptr; })
+
+#define tagptr_set_tags(_ptptr, _tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+		__bad_tagptr_tags(); \
+	ptptr->v |= tags; \
+*ptptr; })
+
+#define tagptr_clear_tags(_ptptr, _tags) ({ \
+	typeof(_ptptr) ptptr = (_ptptr); \
+	const typeof(_tags) tags = (_tags); \
+	if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+		__bad_tagptr_tags(); \
+	ptptr->v &= ~tags; \
+*ptptr; })
+
+#endif	/* __EROFS_FS_TAGPTR_H */
+
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
new file mode 100644
index 000000000000..1dd041aa0f5a
--- /dev/null
+++ b/fs/erofs/utils.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+#include <linux/pagevec.h>
+
+struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp, bool nofail)
+{
+	struct page *page;
+
+	if (!list_empty(pool)) {
+		page = lru_to_page(pool);
+		DBG_BUGON(page_ref_count(page) != 1);
+		list_del(&page->lru);
+	} else {
+		page = alloc_pages(gfp | (nofail ? __GFP_NOFAIL : 0), 0);
+	}
+	return page;
+}
+
+#if (EROFS_PCPUBUF_NR_PAGES > 0)
+static struct {
+	u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES];
+} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS];
+
+void *erofs_get_pcpubuf(unsigned int pagenr)
+{
+	preempt_disable();
+	return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE];
+}
+#endif
+
+#ifdef CONFIG_EROFS_FS_ZIP
+/* global shrink count (for all mounted EROFS instances) */
+static atomic_long_t erofs_global_shrink_cnt;
+
+#define __erofs_workgroup_get(grp)	atomic_inc(&(grp)->refcount)
+#define __erofs_workgroup_put(grp)	atomic_dec(&(grp)->refcount)
+
+static int erofs_workgroup_get(struct erofs_workgroup *grp)
+{
+	int o;
+
+repeat:
+	o = erofs_wait_on_workgroup_freezed(grp);
+	if (unlikely(o <= 0))
+		return -1;
+
+	if (unlikely(atomic_cmpxchg(&grp->refcount, o, o + 1) != o))
+		goto repeat;
+
+	/* decrease refcount paired by erofs_workgroup_put */
+	if (unlikely(o == 1))
+		atomic_long_dec(&erofs_global_shrink_cnt);
+	return 0;
+}
+
+struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
+					     pgoff_t index, bool *tag)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+	struct erofs_workgroup *grp;
+
+repeat:
+	rcu_read_lock();
+	grp = radix_tree_lookup(&sbi->workstn_tree, index);
+	if (grp) {
+		*tag = xa_pointer_tag(grp);
+		grp = xa_untag_pointer(grp);
+
+		if (erofs_workgroup_get(grp)) {
+			/* prefer to relax rcu read side */
+			rcu_read_unlock();
+			goto repeat;
+		}
+
+		DBG_BUGON(index != grp->index);
+	}
+	rcu_read_unlock();
+	return grp;
+}
+
+int erofs_register_workgroup(struct super_block *sb,
+			     struct erofs_workgroup *grp,
+			     bool tag)
+{
+	struct erofs_sb_info *sbi;
+	int err;
+
+	/* grp shouldn't be broken or used before */
+	if (unlikely(atomic_read(&grp->refcount) != 1)) {
+		DBG_BUGON(1);
+		return -EINVAL;
+	}
+
+	err = radix_tree_preload(GFP_NOFS);
+	if (err)
+		return err;
+
+	sbi = EROFS_SB(sb);
+	xa_lock(&sbi->workstn_tree);
+
+	grp = xa_tag_pointer(grp, tag);
+
+	/*
+	 * Bump up reference count before making this workgroup
+	 * visible to other users in order to avoid potential UAF
+	 * without serialized by workstn_lock.
+	 */
+	__erofs_workgroup_get(grp);
+
+	err = radix_tree_insert(&sbi->workstn_tree, grp->index, grp);
+	if (unlikely(err))
+		/*
+		 * it's safe to decrease since the workgroup isn't visible
+		 * and refcount >= 2 (cannot be freezed).
+		 */
+		__erofs_workgroup_put(grp);
+
+	xa_unlock(&sbi->workstn_tree);
+	radix_tree_preload_end();
+	return err;
+}
+
+static void  __erofs_workgroup_free(struct erofs_workgroup *grp)
+{
+	atomic_long_dec(&erofs_global_shrink_cnt);
+	erofs_workgroup_free_rcu(grp);
+}
+
+int erofs_workgroup_put(struct erofs_workgroup *grp)
+{
+	int count = atomic_dec_return(&grp->refcount);
+
+	if (count == 1)
+		atomic_long_inc(&erofs_global_shrink_cnt);
+	else if (!count)
+		__erofs_workgroup_free(grp);
+	return count;
+}
+
+static void erofs_workgroup_unfreeze_final(struct erofs_workgroup *grp)
+{
+	erofs_workgroup_unfreeze(grp, 0);
+	__erofs_workgroup_free(grp);
+}
+
+static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
+					   struct erofs_workgroup *grp,
+					   bool cleanup)
+{
+	/*
+	 * If managed cache is on, refcount of workgroups
+	 * themselves could be < 0 (freezed). In other words,
+	 * there is no guarantee that all refcounts > 0.
+	 */
+	if (!erofs_workgroup_try_to_freeze(grp, 1))
+		return false;
+
+	/*
+	 * Note that all cached pages should be unattached
+	 * before deleted from the radix tree. Otherwise some
+	 * cached pages could be still attached to the orphan
+	 * old workgroup when the new one is available in the tree.
+	 */
+	if (erofs_try_to_free_all_cached_pages(sbi, grp)) {
+		erofs_workgroup_unfreeze(grp, 1);
+		return false;
+	}
+
+	/*
+	 * It's impossible to fail after the workgroup is freezed,
+	 * however in order to avoid some race conditions, add a
+	 * DBG_BUGON to observe this in advance.
+	 */
+	DBG_BUGON(xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+						     grp->index)) != grp);
+
+	/*
+	 * If managed cache is on, last refcount should indicate
+	 * the related workstation.
+	 */
+	erofs_workgroup_unfreeze_final(grp);
+	return true;
+}
+
+static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi,
+					      unsigned long nr_shrink,
+					      bool cleanup)
+{
+	pgoff_t first_index = 0;
+	void *batch[PAGEVEC_SIZE];
+	unsigned int freed = 0;
+
+	int i, found;
+repeat:
+	xa_lock(&sbi->workstn_tree);
+
+	found = radix_tree_gang_lookup(&sbi->workstn_tree,
+				       batch, first_index, PAGEVEC_SIZE);
+
+	for (i = 0; i < found; ++i) {
+		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
+
+		first_index = grp->index + 1;
+
+		/* try to shrink each valid workgroup */
+		if (!erofs_try_to_release_workgroup(sbi, grp, cleanup))
+			continue;
+
+		++freed;
+		if (unlikely(!--nr_shrink))
+			break;
+	}
+	xa_unlock(&sbi->workstn_tree);
+
+	if (i && nr_shrink)
+		goto repeat;
+	return freed;
+}
+
+/* protected by 'erofs_sb_list_lock' */
+static unsigned int shrinker_run_no;
+
+/* protects the mounted 'erofs_sb_list' */
+static DEFINE_SPINLOCK(erofs_sb_list_lock);
+static LIST_HEAD(erofs_sb_list);
+
+void erofs_shrinker_register(struct super_block *sb)
+{
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
+
+	mutex_init(&sbi->umount_mutex);
+
+	spin_lock(&erofs_sb_list_lock);
+	list_add(&sbi->list, &erofs_sb_list);
+	spin_unlock(&erofs_sb_list_lock);
+}
+
+void erofs_shrinker_unregister(struct super_block *sb)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+
+	mutex_lock(&sbi->umount_mutex);
+	erofs_shrink_workstation(sbi, ~0UL, true);
+
+	spin_lock(&erofs_sb_list_lock);
+	list_del(&sbi->list);
+	spin_unlock(&erofs_sb_list_lock);
+	mutex_unlock(&sbi->umount_mutex);
+}
+
+static unsigned long erofs_shrink_count(struct shrinker *shrink,
+					struct shrink_control *sc)
+{
+	return atomic_long_read(&erofs_global_shrink_cnt);
+}
+
+static unsigned long erofs_shrink_scan(struct shrinker *shrink,
+				       struct shrink_control *sc)
+{
+	struct erofs_sb_info *sbi;
+	struct list_head *p;
+
+	unsigned long nr = sc->nr_to_scan;
+	unsigned int run_no;
+	unsigned long freed = 0;
+
+	spin_lock(&erofs_sb_list_lock);
+	do {
+		run_no = ++shrinker_run_no;
+	} while (run_no == 0);
+
+	/* Iterate over all mounted superblocks and try to shrink them */
+	p = erofs_sb_list.next;
+	while (p != &erofs_sb_list) {
+		sbi = list_entry(p, struct erofs_sb_info, list);
+
+		/*
+		 * We move the ones we do to the end of the list, so we stop
+		 * when we see one we have already done.
+		 */
+		if (sbi->shrinker_run_no == run_no)
+			break;
+
+		if (!mutex_trylock(&sbi->umount_mutex)) {
+			p = p->next;
+			continue;
+		}
+
+		spin_unlock(&erofs_sb_list_lock);
+		sbi->shrinker_run_no = run_no;
+
+		freed += erofs_shrink_workstation(sbi, nr, false);
+
+		spin_lock(&erofs_sb_list_lock);
+		/* Get the next list element before we move this one */
+		p = p->next;
+
+		/*
+		 * Move this one to the end of the list to provide some
+		 * fairness.
+		 */
+		list_move_tail(&sbi->list, &erofs_sb_list);
+		mutex_unlock(&sbi->umount_mutex);
+
+		if (freed >= nr)
+			break;
+	}
+	spin_unlock(&erofs_sb_list_lock);
+	return freed;
+}
+
+static struct shrinker erofs_shrinker_info = {
+	.scan_objects = erofs_shrink_scan,
+	.count_objects = erofs_shrink_count,
+	.seeks = DEFAULT_SEEKS,
+};
+
+int __init erofs_init_shrinker(void)
+{
+	return register_shrinker(&erofs_shrinker_info);
+}
+
+void erofs_exit_shrinker(void)
+{
+	unregister_shrinker(&erofs_shrinker_info);
+}
+#endif	/* !CONFIG_EROFS_FS_ZIP */
+
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
new file mode 100644
index 000000000000..a8286998a079
--- /dev/null
+++ b/fs/erofs/xattr.c
@@ -0,0 +1,703 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include <linux/security.h>
+#include "xattr.h"
+
+struct xattr_iter {
+	struct super_block *sb;
+	struct page *page;
+	void *kaddr;
+
+	erofs_blk_t blkaddr;
+	unsigned int ofs;
+};
+
+static inline void xattr_iter_end(struct xattr_iter *it, bool atomic)
+{
+	/* the only user of kunmap() is 'init_inode_xattrs' */
+	if (unlikely(!atomic))
+		kunmap(it->page);
+	else
+		kunmap_atomic(it->kaddr);
+
+	unlock_page(it->page);
+	put_page(it->page);
+}
+
+static inline void xattr_iter_end_final(struct xattr_iter *it)
+{
+	if (!it->page)
+		return;
+
+	xattr_iter_end(it, true);
+}
+
+static int init_inode_xattrs(struct inode *inode)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct xattr_iter it;
+	unsigned int i;
+	struct erofs_xattr_ibody_header *ih;
+	struct super_block *sb;
+	struct erofs_sb_info *sbi;
+	bool atomic_map;
+	int ret = 0;
+
+	/* the most case is that xattrs of this inode are initialized. */
+	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
+		return 0;
+
+	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE))
+		return -ERESTARTSYS;
+
+	/* someone has initialized xattrs for us? */
+	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
+		goto out_unlock;
+
+	/*
+	 * bypass all xattr operations if ->xattr_isize is not greater than
+	 * sizeof(struct erofs_xattr_ibody_header), in detail:
+	 * 1) it is not enough to contain erofs_xattr_ibody_header then
+	 *    ->xattr_isize should be 0 (it means no xattr);
+	 * 2) it is just to contain erofs_xattr_ibody_header, which is on-disk
+	 *    undefined right now (maybe use later with some new sb feature).
+	 */
+	if (vi->xattr_isize == sizeof(struct erofs_xattr_ibody_header)) {
+		errln("xattr_isize %d of nid %llu is not supported yet",
+		      vi->xattr_isize, vi->nid);
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	} else if (vi->xattr_isize < sizeof(struct erofs_xattr_ibody_header)) {
+		if (unlikely(vi->xattr_isize)) {
+			errln("bogus xattr ibody @ nid %llu", vi->nid);
+			DBG_BUGON(1);
+			ret = -EFSCORRUPTED;
+			goto out_unlock;	/* xattr ondisk layout error */
+		}
+		ret = -ENOATTR;
+		goto out_unlock;
+	}
+
+	sb = inode->i_sb;
+	sbi = EROFS_SB(sb);
+	it.blkaddr = erofs_blknr(iloc(sbi, vi->nid) + vi->inode_isize);
+	it.ofs = erofs_blkoff(iloc(sbi, vi->nid) + vi->inode_isize);
+
+	it.page = erofs_get_inline_page(inode, it.blkaddr);
+	if (IS_ERR(it.page)) {
+		ret = PTR_ERR(it.page);
+		goto out_unlock;
+	}
+
+	/* read in shared xattr array (non-atomic, see kmalloc below) */
+	it.kaddr = kmap(it.page);
+	atomic_map = false;
+
+	ih = (struct erofs_xattr_ibody_header *)(it.kaddr + it.ofs);
+
+	vi->xattr_shared_count = ih->h_shared_count;
+	vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
+						sizeof(uint), GFP_KERNEL);
+	if (!vi->xattr_shared_xattrs) {
+		xattr_iter_end(&it, atomic_map);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	/* let's skip ibody header */
+	it.ofs += sizeof(struct erofs_xattr_ibody_header);
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		if (unlikely(it.ofs >= EROFS_BLKSIZ)) {
+			/* cannot be unaligned */
+			DBG_BUGON(it.ofs != EROFS_BLKSIZ);
+			xattr_iter_end(&it, atomic_map);
+
+			it.page = erofs_get_meta_page(sb, ++it.blkaddr,
+						      S_ISDIR(inode->i_mode));
+			if (IS_ERR(it.page)) {
+				kfree(vi->xattr_shared_xattrs);
+				vi->xattr_shared_xattrs = NULL;
+				ret = PTR_ERR(it.page);
+				goto out_unlock;
+			}
+
+			it.kaddr = kmap_atomic(it.page);
+			atomic_map = true;
+			it.ofs = 0;
+		}
+		vi->xattr_shared_xattrs[i] =
+			le32_to_cpu(*(__le32 *)(it.kaddr + it.ofs));
+		it.ofs += sizeof(__le32);
+	}
+	xattr_iter_end(&it, atomic_map);
+
+	set_bit(EROFS_V_EA_INITED_BIT, &vi->flags);
+
+out_unlock:
+	clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags);
+	return ret;
+}
+
+/*
+ * the general idea for these return values is
+ * if    0 is returned, go on processing the current xattr;
+ *       1 (> 0) is returned, skip this round to process the next xattr;
+ *    -err (< 0) is returned, an error (maybe ENOXATTR) occurred
+ *                            and need to be handled
+ */
+struct xattr_iter_handlers {
+	int (*entry)(struct xattr_iter *_it, struct erofs_xattr_entry *entry);
+	int (*name)(struct xattr_iter *_it, unsigned int processed, char *buf,
+		    unsigned int len);
+	int (*alloc_buffer)(struct xattr_iter *_it, unsigned int value_sz);
+	void (*value)(struct xattr_iter *_it, unsigned int processed, char *buf,
+		      unsigned int len);
+};
+
+static inline int xattr_iter_fixup(struct xattr_iter *it)
+{
+	if (it->ofs < EROFS_BLKSIZ)
+		return 0;
+
+	xattr_iter_end(it, true);
+
+	it->blkaddr += erofs_blknr(it->ofs);
+
+	it->page = erofs_get_meta_page(it->sb, it->blkaddr, false);
+	if (IS_ERR(it->page)) {
+		int err = PTR_ERR(it->page);
+
+		it->page = NULL;
+		return err;
+	}
+
+	it->kaddr = kmap_atomic(it->page);
+	it->ofs = erofs_blkoff(it->ofs);
+	return 0;
+}
+
+static int inline_xattr_iter_begin(struct xattr_iter *it,
+				   struct inode *inode)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
+	unsigned int xattr_header_sz, inline_xattr_ofs;
+
+	xattr_header_sz = inlinexattr_header_size(inode);
+	if (unlikely(xattr_header_sz >= vi->xattr_isize)) {
+		DBG_BUGON(xattr_header_sz > vi->xattr_isize);
+		return -ENOATTR;
+	}
+
+	inline_xattr_ofs = vi->inode_isize + xattr_header_sz;
+
+	it->blkaddr = erofs_blknr(iloc(sbi, vi->nid) + inline_xattr_ofs);
+	it->ofs = erofs_blkoff(iloc(sbi, vi->nid) + inline_xattr_ofs);
+
+	it->page = erofs_get_inline_page(inode, it->blkaddr);
+	if (IS_ERR(it->page))
+		return PTR_ERR(it->page);
+
+	it->kaddr = kmap_atomic(it->page);
+	return vi->xattr_isize - xattr_header_sz;
+}
+
+/*
+ * Regardless of success or failure, `xattr_foreach' will end up with
+ * `ofs' pointing to the next xattr item rather than an arbitrary position.
+ */
+static int xattr_foreach(struct xattr_iter *it,
+			 const struct xattr_iter_handlers *op,
+			 unsigned int *tlimit)
+{
+	struct erofs_xattr_entry entry;
+	unsigned int value_sz, processed, slice;
+	int err;
+
+	/* 0. fixup blkaddr, ofs, ipage */
+	err = xattr_iter_fixup(it);
+	if (err)
+		return err;
+
+	/*
+	 * 1. read xattr entry to the memory,
+	 *    since we do EROFS_XATTR_ALIGN
+	 *    therefore entry should be in the page
+	 */
+	entry = *(struct erofs_xattr_entry *)(it->kaddr + it->ofs);
+	if (tlimit) {
+		unsigned int entry_sz = EROFS_XATTR_ENTRY_SIZE(&entry);
+
+		/* xattr on-disk corruption: xattr entry beyond xattr_isize */
+		if (unlikely(*tlimit < entry_sz)) {
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;
+		}
+		*tlimit -= entry_sz;
+	}
+
+	it->ofs += sizeof(struct erofs_xattr_entry);
+	value_sz = le16_to_cpu(entry.e_value_size);
+
+	/* handle entry */
+	err = op->entry(it, &entry);
+	if (err) {
+		it->ofs += entry.e_name_len + value_sz;
+		goto out;
+	}
+
+	/* 2. handle xattr name (ofs will finally be at the end of name) */
+	processed = 0;
+
+	while (processed < entry.e_name_len) {
+		if (it->ofs >= EROFS_BLKSIZ) {
+			DBG_BUGON(it->ofs > EROFS_BLKSIZ);
+
+			err = xattr_iter_fixup(it);
+			if (err)
+				goto out;
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
+			      entry.e_name_len - processed);
+
+		/* handle name */
+		err = op->name(it, processed, it->kaddr + it->ofs, slice);
+		if (err) {
+			it->ofs += entry.e_name_len - processed + value_sz;
+			goto out;
+		}
+
+		it->ofs += slice;
+		processed += slice;
+	}
+
+	/* 3. handle xattr value */
+	processed = 0;
+
+	if (op->alloc_buffer) {
+		err = op->alloc_buffer(it, value_sz);
+		if (err) {
+			it->ofs += value_sz;
+			goto out;
+		}
+	}
+
+	while (processed < value_sz) {
+		if (it->ofs >= EROFS_BLKSIZ) {
+			DBG_BUGON(it->ofs > EROFS_BLKSIZ);
+
+			err = xattr_iter_fixup(it);
+			if (err)
+				goto out;
+			it->ofs = 0;
+		}
+
+		slice = min_t(unsigned int, PAGE_SIZE - it->ofs,
+			      value_sz - processed);
+		op->value(it, processed, it->kaddr + it->ofs, slice);
+		it->ofs += slice;
+		processed += slice;
+	}
+
+out:
+	/* xattrs should be 4-byte aligned (on-disk constraint) */
+	it->ofs = EROFS_XATTR_ALIGN(it->ofs);
+	return err < 0 ? err : 0;
+}
+
+struct getxattr_iter {
+	struct xattr_iter it;
+
+	char *buffer;
+	int buffer_size, index;
+	struct qstr name;
+};
+
+static int xattr_entrymatch(struct xattr_iter *_it,
+			    struct erofs_xattr_entry *entry)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	return (it->index != entry->e_name_index ||
+		it->name.len != entry->e_name_len) ? -ENOATTR : 0;
+}
+
+static int xattr_namematch(struct xattr_iter *_it,
+			   unsigned int processed, char *buf, unsigned int len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	return memcmp(buf, it->name.name + processed, len) ? -ENOATTR : 0;
+}
+
+static int xattr_checkbuffer(struct xattr_iter *_it,
+			     unsigned int value_sz)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+	int err = it->buffer_size < value_sz ? -ERANGE : 0;
+
+	it->buffer_size = value_sz;
+	return !it->buffer ? 1 : err;
+}
+
+static void xattr_copyvalue(struct xattr_iter *_it,
+			    unsigned int processed,
+			    char *buf, unsigned int len)
+{
+	struct getxattr_iter *it = container_of(_it, struct getxattr_iter, it);
+
+	memcpy(it->buffer + processed, buf, len);
+}
+
+static const struct xattr_iter_handlers find_xattr_handlers = {
+	.entry = xattr_entrymatch,
+	.name = xattr_namematch,
+	.alloc_buffer = xattr_checkbuffer,
+	.value = xattr_copyvalue
+};
+
+static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+	int ret;
+	unsigned int remaining;
+
+	ret = inline_xattr_iter_begin(&it->it, inode);
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, &remaining);
+		if (ret != -ENOATTR)
+			break;
+	}
+	xattr_iter_end_final(&it->it);
+
+	return ret ? ret : it->buffer_size;
+}
+
+static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct super_block *const sb = inode->i_sb;
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+	unsigned int i;
+	int ret = -ENOATTR;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+
+		if (!i || blkaddr != it->it.blkaddr) {
+			if (i)
+				xattr_iter_end(&it->it, true);
+
+			it->it.page = erofs_get_meta_page(sb, blkaddr, false);
+			if (IS_ERR(it->it.page))
+				return PTR_ERR(it->it.page);
+
+			it->it.kaddr = kmap_atomic(it->it.page);
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &find_xattr_handlers, NULL);
+		if (ret != -ENOATTR)
+			break;
+	}
+	if (vi->xattr_shared_count)
+		xattr_iter_end_final(&it->it);
+
+	return ret ? ret : it->buffer_size;
+}
+
+static bool erofs_xattr_user_list(struct dentry *dentry)
+{
+	return test_opt(EROFS_SB(dentry->d_sb), XATTR_USER);
+}
+
+static bool erofs_xattr_trusted_list(struct dentry *dentry)
+{
+	return capable(CAP_SYS_ADMIN);
+}
+
+int erofs_getxattr(struct inode *inode, int index,
+		   const char *name,
+		   void *buffer, size_t buffer_size)
+{
+	int ret;
+	struct getxattr_iter it;
+
+	if (unlikely(!name))
+		return -EINVAL;
+
+	ret = init_inode_xattrs(inode);
+	if (ret)
+		return ret;
+
+	it.index = index;
+
+	it.name.len = strlen(name);
+	if (it.name.len > EROFS_NAME_LEN)
+		return -ERANGE;
+	it.name.name = name;
+
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+
+	it.it.sb = inode->i_sb;
+	ret = inline_getxattr(inode, &it);
+	if (ret == -ENOATTR)
+		ret = shared_getxattr(inode, &it);
+	return ret;
+}
+
+static int erofs_xattr_generic_get(const struct xattr_handler *handler,
+				   struct dentry *unused, struct inode *inode,
+				   const char *name, void *buffer, size_t size)
+{
+	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+	switch (handler->flags) {
+	case EROFS_XATTR_INDEX_USER:
+		if (!test_opt(sbi, XATTR_USER))
+			return -EOPNOTSUPP;
+		break;
+	case EROFS_XATTR_INDEX_TRUSTED:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		break;
+	case EROFS_XATTR_INDEX_SECURITY:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return erofs_getxattr(inode, handler->flags, name, buffer, size);
+}
+
+const struct xattr_handler erofs_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_USER,
+	.list	= erofs_xattr_user_list,
+	.get	= erofs_xattr_generic_get,
+};
+
+const struct xattr_handler erofs_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_TRUSTED,
+	.list	= erofs_xattr_trusted_list,
+	.get	= erofs_xattr_generic_get,
+};
+
+#ifdef CONFIG_EROFS_FS_SECURITY
+const struct xattr_handler __maybe_unused erofs_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.flags	= EROFS_XATTR_INDEX_SECURITY,
+	.get	= erofs_xattr_generic_get,
+};
+#endif
+
+const struct xattr_handler *erofs_xattr_handlers[] = {
+	&erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	&posix_acl_access_xattr_handler,
+	&posix_acl_default_xattr_handler,
+#endif
+	&erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+	&erofs_xattr_security_handler,
+#endif
+	NULL,
+};
+
+struct listxattr_iter {
+	struct xattr_iter it;
+
+	struct dentry *dentry;
+	char *buffer;
+	int buffer_size, buffer_ofs;
+};
+
+static int xattr_entrylist(struct xattr_iter *_it,
+			   struct erofs_xattr_entry *entry)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+	unsigned int prefix_len;
+	const char *prefix;
+
+	const struct xattr_handler *h =
+		erofs_xattr_handler(entry->e_name_index);
+
+	if (!h || (h->list && !h->list(it->dentry)))
+		return 1;
+
+	prefix = xattr_prefix(h);
+	prefix_len = strlen(prefix);
+
+	if (!it->buffer) {
+		it->buffer_ofs += prefix_len + entry->e_name_len + 1;
+		return 1;
+	}
+
+	if (it->buffer_ofs + prefix_len
+		+ entry->e_name_len + 1 > it->buffer_size)
+		return -ERANGE;
+
+	memcpy(it->buffer + it->buffer_ofs, prefix, prefix_len);
+	it->buffer_ofs += prefix_len;
+	return 0;
+}
+
+static int xattr_namelist(struct xattr_iter *_it,
+			  unsigned int processed, char *buf, unsigned int len)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	memcpy(it->buffer + it->buffer_ofs, buf, len);
+	it->buffer_ofs += len;
+	return 0;
+}
+
+static int xattr_skipvalue(struct xattr_iter *_it,
+			   unsigned int value_sz)
+{
+	struct listxattr_iter *it =
+		container_of(_it, struct listxattr_iter, it);
+
+	it->buffer[it->buffer_ofs++] = '\0';
+	return 1;
+}
+
+static const struct xattr_iter_handlers list_xattr_handlers = {
+	.entry = xattr_entrylist,
+	.name = xattr_namelist,
+	.alloc_buffer = xattr_skipvalue,
+	.value = NULL
+};
+
+static int inline_listxattr(struct listxattr_iter *it)
+{
+	int ret;
+	unsigned int remaining;
+
+	ret = inline_xattr_iter_begin(&it->it, d_inode(it->dentry));
+	if (ret < 0)
+		return ret;
+
+	remaining = ret;
+	while (remaining) {
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, &remaining);
+		if (ret)
+			break;
+	}
+	xattr_iter_end_final(&it->it);
+	return ret ? ret : it->buffer_ofs;
+}
+
+static int shared_listxattr(struct listxattr_iter *it)
+{
+	struct inode *const inode = d_inode(it->dentry);
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct super_block *const sb = inode->i_sb;
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < vi->xattr_shared_count; ++i) {
+		erofs_blk_t blkaddr =
+			xattrblock_addr(sbi, vi->xattr_shared_xattrs[i]);
+
+		it->it.ofs = xattrblock_offset(sbi, vi->xattr_shared_xattrs[i]);
+		if (!i || blkaddr != it->it.blkaddr) {
+			if (i)
+				xattr_iter_end(&it->it, true);
+
+			it->it.page = erofs_get_meta_page(sb, blkaddr, false);
+			if (IS_ERR(it->it.page))
+				return PTR_ERR(it->it.page);
+
+			it->it.kaddr = kmap_atomic(it->it.page);
+			it->it.blkaddr = blkaddr;
+		}
+
+		ret = xattr_foreach(&it->it, &list_xattr_handlers, NULL);
+		if (ret)
+			break;
+	}
+	if (vi->xattr_shared_count)
+		xattr_iter_end_final(&it->it);
+
+	return ret ? ret : it->buffer_ofs;
+}
+
+ssize_t erofs_listxattr(struct dentry *dentry,
+			char *buffer, size_t buffer_size)
+{
+	int ret;
+	struct listxattr_iter it;
+
+	ret = init_inode_xattrs(d_inode(dentry));
+	if (ret)
+		return ret;
+
+	it.dentry = dentry;
+	it.buffer = buffer;
+	it.buffer_size = buffer_size;
+	it.buffer_ofs = 0;
+
+	it.it.sb = dentry->d_sb;
+
+	ret = inline_listxattr(&it);
+	if (ret < 0 && ret != -ENOATTR)
+		return ret;
+	return shared_listxattr(&it);
+}
+
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+struct posix_acl *erofs_get_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	int prefix, rc;
+	char *value = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		prefix = EROFS_XATTR_INDEX_POSIX_ACL_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		prefix = EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+
+	rc = erofs_getxattr(inode, prefix, "", NULL, 0);
+	if (rc > 0) {
+		value = kmalloc(rc, GFP_KERNEL);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		rc = erofs_getxattr(inode, prefix, "", value, rc);
+	}
+
+	if (rc == -ENOATTR)
+		acl = NULL;
+	else if (rc < 0)
+		acl = ERR_PTR(rc);
+	else
+		acl = posix_acl_from_xattr(&init_user_ns, value, rc);
+	kfree(value);
+	return acl;
+}
+#endif
+
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
new file mode 100644
index 000000000000..c5ca47d814dd
--- /dev/null
+++ b/fs/erofs/xattr.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2017-2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_XATTR_H
+#define __EROFS_XATTR_H
+
+#include "internal.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+/* Attribute not found */
+#define ENOATTR         ENODATA
+
+static inline unsigned int inlinexattr_header_size(struct inode *inode)
+{
+	return sizeof(struct erofs_xattr_ibody_header)
+		+ sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
+}
+
+static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi,
+					  unsigned int xattr_id)
+{
+#ifdef CONFIG_EROFS_FS_XATTR
+	return sbi->xattr_blkaddr +
+		xattr_id * sizeof(__u32) / EROFS_BLKSIZ;
+#else
+	return 0;
+#endif
+}
+
+static inline unsigned int xattrblock_offset(struct erofs_sb_info *sbi,
+					     unsigned int xattr_id)
+{
+	return (xattr_id * sizeof(__u32)) % EROFS_BLKSIZ;
+}
+
+#ifdef CONFIG_EROFS_FS_XATTR
+extern const struct xattr_handler erofs_xattr_user_handler;
+extern const struct xattr_handler erofs_xattr_trusted_handler;
+#ifdef CONFIG_EROFS_FS_SECURITY
+extern const struct xattr_handler erofs_xattr_security_handler;
+#endif
+
+static inline const struct xattr_handler *erofs_xattr_handler(unsigned int idx)
+{
+static const struct xattr_handler *xattr_handler_map[] = {
+	[EROFS_XATTR_INDEX_USER] = &erofs_xattr_user_handler,
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+	[EROFS_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler,
+	[EROFS_XATTR_INDEX_POSIX_ACL_DEFAULT] =
+		&posix_acl_default_xattr_handler,
+#endif
+	[EROFS_XATTR_INDEX_TRUSTED] = &erofs_xattr_trusted_handler,
+#ifdef CONFIG_EROFS_FS_SECURITY
+	[EROFS_XATTR_INDEX_SECURITY] = &erofs_xattr_security_handler,
+#endif
+};
+
+	return idx && idx < ARRAY_SIZE(xattr_handler_map) ?
+		xattr_handler_map[idx] : NULL;
+}
+
+extern const struct xattr_handler *erofs_xattr_handlers[];
+
+int erofs_getxattr(struct inode *, int, const char *, void *, size_t);
+ssize_t erofs_listxattr(struct dentry *, char *, size_t);
+#else
+static inline int erofs_getxattr(struct inode *inode, int index,
+				 const char *name, void *buffer,
+				 size_t buffer_size)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline ssize_t erofs_listxattr(struct dentry *dentry,
+				      char *buffer, size_t buffer_size)
+{
+	return -EOPNOTSUPP;
+}
+#endif	/* !CONFIG_EROFS_FS_XATTR */
+
+#ifdef CONFIG_EROFS_FS_POSIX_ACL
+struct posix_acl *erofs_get_acl(struct inode *inode, int type);
+#else
+#define erofs_get_acl	(NULL)
+#endif
+
+#endif
+
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
new file mode 100644
index 000000000000..b32ad585237c
--- /dev/null
+++ b/fs/erofs/zdata.c
@@ -0,0 +1,1432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "zdata.h"
+#include "compress.h"
+#include <linux/prefetch.h>
+
+#include <trace/events/erofs.h>
+
+/*
+ * a compressed_pages[] placeholder in order to avoid
+ * being filled with file pages for in-place decompression.
+ */
+#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
+
+/* how to allocate cached pages for a pcluster */
+enum z_erofs_cache_alloctype {
+	DONTALLOC,	/* don't allocate any cached pages */
+	DELAYEDALLOC,	/* delayed allocation (at the time of submitting io) */
+};
+
+/*
+ * tagged pointer with 1-bit tag for all compressed pages
+ * tag 0 - the page is just found with an extra page reference
+ */
+typedef tagptr1_t compressed_page_t;
+
+#define tag_compressed_page_justfound(page) \
+	tagptr_fold(compressed_page_t, page, 1)
+
+static struct workqueue_struct *z_erofs_workqueue __read_mostly;
+static struct kmem_cache *pcluster_cachep __read_mostly;
+
+void z_erofs_exit_zip_subsystem(void)
+{
+	destroy_workqueue(z_erofs_workqueue);
+	kmem_cache_destroy(pcluster_cachep);
+}
+
+static inline int init_unzip_workqueue(void)
+{
+	const unsigned int onlinecpus = num_possible_cpus();
+	const unsigned int flags = WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE;
+
+	/*
+	 * no need to spawn too many threads, limiting threads could minimum
+	 * scheduling overhead, perhaps per-CPU threads should be better?
+	 */
+	z_erofs_workqueue = alloc_workqueue("erofs_unzipd", flags,
+					    onlinecpus + onlinecpus / 4);
+	return z_erofs_workqueue ? 0 : -ENOMEM;
+}
+
+static void init_once(void *ptr)
+{
+	struct z_erofs_pcluster *pcl = ptr;
+	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
+	unsigned int i;
+
+	mutex_init(&cl->lock);
+	cl->nr_pages = 0;
+	cl->vcnt = 0;
+	for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
+		pcl->compressed_pages[i] = NULL;
+}
+
+static void init_always(struct z_erofs_pcluster *pcl)
+{
+	struct z_erofs_collection *cl = z_erofs_primarycollection(pcl);
+
+	atomic_set(&pcl->obj.refcount, 1);
+
+	DBG_BUGON(cl->nr_pages);
+	DBG_BUGON(cl->vcnt);
+}
+
+int __init z_erofs_init_zip_subsystem(void)
+{
+	pcluster_cachep = kmem_cache_create("erofs_compress",
+					    Z_EROFS_WORKGROUP_SIZE, 0,
+					    SLAB_RECLAIM_ACCOUNT, init_once);
+	if (pcluster_cachep) {
+		if (!init_unzip_workqueue())
+			return 0;
+
+		kmem_cache_destroy(pcluster_cachep);
+	}
+	return -ENOMEM;
+}
+
+enum z_erofs_collectmode {
+	COLLECT_SECONDARY,
+	COLLECT_PRIMARY,
+	/*
+	 * The current collection was the tail of an exist chain, in addition
+	 * that the previous processed chained collections are all decided to
+	 * be hooked up to it.
+	 * A new chain will be created for the remaining collections which are
+	 * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
+	 * the next collection cannot reuse the whole page safely in
+	 * the following scenario:
+	 *  ________________________________________________________________
+	 * |      tail (partial) page     |       head (partial) page       |
+	 * |   (belongs to the next cl)   |   (belongs to the current cl)   |
+	 * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+	 */
+	COLLECT_PRIMARY_HOOKED,
+	COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
+	/*
+	 * The current collection has been linked with the owned chain, and
+	 * could also be linked with the remaining collections, which means
+	 * if the processing page is the tail page of the collection, thus
+	 * the current collection can safely use the whole page (since
+	 * the previous collection is under control) for in-place I/O, as
+	 * illustrated below:
+	 *  ________________________________________________________________
+	 * |  tail (partial) page |          head (partial) page           |
+	 * |  (of the current cl) |      (of the previous collection)      |
+	 * |  PRIMARY_FOLLOWED or |                                        |
+	 * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
+	 *
+	 * [  (*) the above page can be used as inplace I/O.               ]
+	 */
+	COLLECT_PRIMARY_FOLLOWED,
+};
+
+struct z_erofs_collector {
+	struct z_erofs_pagevec_ctor vector;
+
+	struct z_erofs_pcluster *pcl, *tailpcl;
+	struct z_erofs_collection *cl;
+	struct page **compressedpages;
+	z_erofs_next_pcluster_t owned_head;
+
+	enum z_erofs_collectmode mode;
+};
+
+struct z_erofs_decompress_frontend {
+	struct inode *const inode;
+
+	struct z_erofs_collector clt;
+	struct erofs_map_blocks map;
+
+	/* used for applying cache strategy on the fly */
+	bool backmost;
+	erofs_off_t headoffset;
+};
+
+#define COLLECTOR_INIT() { \
+	.owned_head = Z_EROFS_PCLUSTER_TAIL, \
+	.mode = COLLECT_PRIMARY_FOLLOWED }
+
+#define DECOMPRESS_FRONTEND_INIT(__i) { \
+	.inode = __i, .clt = COLLECTOR_INIT(), \
+	.backmost = true, }
+
+static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
+static DEFINE_MUTEX(z_pagemap_global_lock);
+
+static void preload_compressed_pages(struct z_erofs_collector *clt,
+				     struct address_space *mc,
+				     enum z_erofs_cache_alloctype type,
+				     struct list_head *pagepool)
+{
+	const struct z_erofs_pcluster *pcl = clt->pcl;
+	const unsigned int clusterpages = BIT(pcl->clusterbits);
+	struct page **pages = clt->compressedpages;
+	pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages);
+	bool standalone = true;
+
+	if (clt->mode < COLLECT_PRIMARY_FOLLOWED)
+		return;
+
+	for (; pages < pcl->compressed_pages + clusterpages; ++pages) {
+		struct page *page;
+		compressed_page_t t;
+
+		/* the compressed page was loaded before */
+		if (READ_ONCE(*pages))
+			continue;
+
+		page = find_get_page(mc, index);
+
+		if (page) {
+			t = tag_compressed_page_justfound(page);
+		} else if (type == DELAYEDALLOC) {
+			t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+		} else {	/* DONTALLOC */
+			if (standalone)
+				clt->compressedpages = pages;
+			standalone = false;
+			continue;
+		}
+
+		if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
+			continue;
+
+		if (page)
+			put_page(page);
+	}
+
+	if (standalone)		/* downgrade to PRIMARY_FOLLOWED_NOINPLACE */
+		clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+}
+
+/* called by erofs_shrinker to get rid of all compressed_pages */
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+				       struct erofs_workgroup *grp)
+{
+	struct z_erofs_pcluster *const pcl =
+		container_of(grp, struct z_erofs_pcluster, obj);
+	struct address_space *const mapping = MNGD_MAPPING(sbi);
+	const unsigned int clusterpages = BIT(pcl->clusterbits);
+	int i;
+
+	/*
+	 * refcount of workgroup is now freezed as 1,
+	 * therefore no need to worry about available decompression users.
+	 */
+	for (i = 0; i < clusterpages; ++i) {
+		struct page *page = pcl->compressed_pages[i];
+
+		if (!page)
+			continue;
+
+		/* block other users from reclaiming or migrating the page */
+		if (!trylock_page(page))
+			return -EBUSY;
+
+		if (unlikely(page->mapping != mapping))
+			continue;
+
+		/* barrier is implied in the following 'unlock_page' */
+		WRITE_ONCE(pcl->compressed_pages[i], NULL);
+		set_page_private(page, 0);
+		ClearPagePrivate(page);
+
+		unlock_page(page);
+		put_page(page);
+	}
+	return 0;
+}
+
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+				  struct page *page)
+{
+	struct z_erofs_pcluster *const pcl = (void *)page_private(page);
+	const unsigned int clusterpages = BIT(pcl->clusterbits);
+	int ret = 0;	/* 0 - busy */
+
+	if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
+		unsigned int i;
+
+		for (i = 0; i < clusterpages; ++i) {
+			if (pcl->compressed_pages[i] == page) {
+				WRITE_ONCE(pcl->compressed_pages[i], NULL);
+				ret = 1;
+				break;
+			}
+		}
+		erofs_workgroup_unfreeze(&pcl->obj, 1);
+
+		if (ret) {
+			ClearPagePrivate(page);
+			put_page(page);
+		}
+	}
+	return ret;
+}
+
+/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
+static inline bool try_inplace_io(struct z_erofs_collector *clt,
+				  struct page *page)
+{
+	struct z_erofs_pcluster *const pcl = clt->pcl;
+	const unsigned int clusterpages = BIT(pcl->clusterbits);
+
+	while (clt->compressedpages < pcl->compressed_pages + clusterpages) {
+		if (!cmpxchg(clt->compressedpages++, NULL, page))
+			return true;
+	}
+	return false;
+}
+
+/* callers must be with collection lock held */
+static int z_erofs_attach_page(struct z_erofs_collector *clt,
+			       struct page *page,
+			       enum z_erofs_page_type type)
+{
+	int ret;
+	bool occupied;
+
+	/* give priority for inplaceio */
+	if (clt->mode >= COLLECT_PRIMARY &&
+	    type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+	    try_inplace_io(clt, page))
+		return 0;
+
+	ret = z_erofs_pagevec_enqueue(&clt->vector,
+				      page, type, &occupied);
+	clt->cl->vcnt += (unsigned int)ret;
+
+	return ret ? 0 : -EAGAIN;
+}
+
+static enum z_erofs_collectmode
+try_to_claim_pcluster(struct z_erofs_pcluster *pcl,
+		      z_erofs_next_pcluster_t *owned_head)
+{
+	/* let's claim these following types of pclusters */
+retry:
+	if (pcl->next == Z_EROFS_PCLUSTER_NIL) {
+		/* type 1, nil pcluster */
+		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL,
+			    *owned_head) != Z_EROFS_PCLUSTER_NIL)
+			goto retry;
+
+		*owned_head = &pcl->next;
+		/* lucky, I am the followee :) */
+		return COLLECT_PRIMARY_FOLLOWED;
+	} else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) {
+		/*
+		 * type 2, link to the end of a existing open chain,
+		 * be careful that its submission itself is governed
+		 * by the original owned chain.
+		 */
+		if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+			    *owned_head) != Z_EROFS_PCLUSTER_TAIL)
+			goto retry;
+		*owned_head = Z_EROFS_PCLUSTER_TAIL;
+		return COLLECT_PRIMARY_HOOKED;
+	}
+	return COLLECT_PRIMARY;	/* :( better luck next time */
+}
+
+static struct z_erofs_collection *cllookup(struct z_erofs_collector *clt,
+					   struct inode *inode,
+					   struct erofs_map_blocks *map)
+{
+	struct erofs_workgroup *grp;
+	struct z_erofs_pcluster *pcl;
+	struct z_erofs_collection *cl;
+	unsigned int length;
+	bool tag;
+
+	grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT, &tag);
+	if (!grp)
+		return NULL;
+
+	pcl = container_of(grp, struct z_erofs_pcluster, obj);
+	if (clt->owned_head == &pcl->next || pcl == clt->tailpcl) {
+		DBG_BUGON(1);
+		erofs_workgroup_put(grp);
+		return ERR_PTR(-EFSCORRUPTED);
+	}
+
+	cl = z_erofs_primarycollection(pcl);
+	if (unlikely(cl->pageofs != (map->m_la & ~PAGE_MASK))) {
+		DBG_BUGON(1);
+		erofs_workgroup_put(grp);
+		return ERR_PTR(-EFSCORRUPTED);
+	}
+
+	length = READ_ONCE(pcl->length);
+	if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
+		if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
+			DBG_BUGON(1);
+			erofs_workgroup_put(grp);
+			return ERR_PTR(-EFSCORRUPTED);
+		}
+	} else {
+		unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
+
+		if (map->m_flags & EROFS_MAP_FULL_MAPPED)
+			llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
+
+		while (llen > length &&
+		       length != cmpxchg_relaxed(&pcl->length, length, llen)) {
+			cpu_relax();
+			length = READ_ONCE(pcl->length);
+		}
+	}
+	mutex_lock(&cl->lock);
+	/* used to check tail merging loop due to corrupted images */
+	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
+		clt->tailpcl = pcl;
+	clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head);
+	/* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */
+	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
+		clt->tailpcl = NULL;
+	clt->pcl = pcl;
+	clt->cl = cl;
+	return cl;
+}
+
+static struct z_erofs_collection *clregister(struct z_erofs_collector *clt,
+					     struct inode *inode,
+					     struct erofs_map_blocks *map)
+{
+	struct z_erofs_pcluster *pcl;
+	struct z_erofs_collection *cl;
+	int err;
+
+	/* no available workgroup, let's allocate one */
+	pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS);
+	if (unlikely(!pcl))
+		return ERR_PTR(-ENOMEM);
+
+	init_always(pcl);
+	pcl->obj.index = map->m_pa >> PAGE_SHIFT;
+
+	pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
+		(map->m_flags & EROFS_MAP_FULL_MAPPED ?
+			Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
+
+	if (map->m_flags & EROFS_MAP_ZIPPED)
+		pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4;
+	else
+		pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
+
+	pcl->clusterbits = EROFS_V(inode)->z_physical_clusterbits[0];
+	pcl->clusterbits -= PAGE_SHIFT;
+
+	/* new pclusters should be claimed as type 1, primary and followed */
+	pcl->next = clt->owned_head;
+	clt->mode = COLLECT_PRIMARY_FOLLOWED;
+
+	cl = z_erofs_primarycollection(pcl);
+	cl->pageofs = map->m_la & ~PAGE_MASK;
+
+	/*
+	 * lock all primary followed works before visible to others
+	 * and mutex_trylock *never* fails for a new pcluster.
+	 */
+	mutex_trylock(&cl->lock);
+
+	err = erofs_register_workgroup(inode->i_sb, &pcl->obj, 0);
+	if (err) {
+		mutex_unlock(&cl->lock);
+		kmem_cache_free(pcluster_cachep, pcl);
+		return ERR_PTR(-EAGAIN);
+	}
+	/* used to check tail merging loop due to corrupted images */
+	if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL)
+		clt->tailpcl = pcl;
+	clt->owned_head = &pcl->next;
+	clt->pcl = pcl;
+	clt->cl = cl;
+	return cl;
+}
+
+static int z_erofs_collector_begin(struct z_erofs_collector *clt,
+				   struct inode *inode,
+				   struct erofs_map_blocks *map)
+{
+	struct z_erofs_collection *cl;
+
+	DBG_BUGON(clt->cl);
+
+	/* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous collection */
+	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_NIL);
+	DBG_BUGON(clt->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+
+	if (!PAGE_ALIGNED(map->m_pa)) {
+		DBG_BUGON(1);
+		return -EINVAL;
+	}
+
+repeat:
+	cl = cllookup(clt, inode, map);
+	if (!cl) {
+		cl = clregister(clt, inode, map);
+
+		if (unlikely(cl == ERR_PTR(-EAGAIN)))
+			goto repeat;
+	}
+
+	if (IS_ERR(cl))
+		return PTR_ERR(cl);
+
+	z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS,
+				  cl->pagevec, cl->vcnt);
+
+	clt->compressedpages = clt->pcl->compressed_pages;
+	if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */
+		clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES;
+	return 0;
+}
+
+/*
+ * keep in mind that no referenced pclusters will be freed
+ * only after a RCU grace period.
+ */
+static void z_erofs_rcu_callback(struct rcu_head *head)
+{
+	struct z_erofs_collection *const cl =
+		container_of(head, struct z_erofs_collection, rcu);
+
+	kmem_cache_free(pcluster_cachep,
+			container_of(cl, struct z_erofs_pcluster,
+				     primary_collection));
+}
+
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
+{
+	struct z_erofs_pcluster *const pcl =
+		container_of(grp, struct z_erofs_pcluster, obj);
+	struct z_erofs_collection *const cl = z_erofs_primarycollection(pcl);
+
+	call_rcu(&cl->rcu, z_erofs_rcu_callback);
+}
+
+static void z_erofs_collection_put(struct z_erofs_collection *cl)
+{
+	struct z_erofs_pcluster *const pcl =
+		container_of(cl, struct z_erofs_pcluster, primary_collection);
+
+	erofs_workgroup_put(&pcl->obj);
+}
+
+static bool z_erofs_collector_end(struct z_erofs_collector *clt)
+{
+	struct z_erofs_collection *cl = clt->cl;
+
+	if (!cl)
+		return false;
+
+	z_erofs_pagevec_ctor_exit(&clt->vector, false);
+	mutex_unlock(&cl->lock);
+
+	/*
+	 * if all pending pages are added, don't hold its reference
+	 * any longer if the pcluster isn't hosted by ourselves.
+	 */
+	if (clt->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+		z_erofs_collection_put(cl);
+
+	clt->cl = NULL;
+	return true;
+}
+
+static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
+					       gfp_t gfp)
+{
+	struct page *page = erofs_allocpage(pagepool, gfp, true);
+
+	page->mapping = Z_EROFS_MAPPING_STAGING;
+	return page;
+}
+
+static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe,
+				       unsigned int cachestrategy,
+				       erofs_off_t la)
+{
+	if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
+		return false;
+
+	if (fe->backmost)
+		return true;
+
+	return cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
+		la < fe->headoffset;
+}
+
+static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
+				struct page *page,
+				struct list_head *pagepool)
+{
+	struct inode *const inode = fe->inode;
+	struct erofs_sb_info *const sbi __maybe_unused = EROFS_I_SB(inode);
+	struct erofs_map_blocks *const map = &fe->map;
+	struct z_erofs_collector *const clt = &fe->clt;
+	const loff_t offset = page_offset(page);
+	bool tight = (clt->mode >= COLLECT_PRIMARY_HOOKED);
+
+	enum z_erofs_cache_alloctype cache_strategy;
+	enum z_erofs_page_type page_type;
+	unsigned int cur, end, spiltted, index;
+	int err = 0;
+
+	/* register locked file pages as online pages in pack */
+	z_erofs_onlinepage_init(page);
+
+	spiltted = 0;
+	end = PAGE_SIZE;
+repeat:
+	cur = end - 1;
+
+	/* lucky, within the range of the current map_blocks */
+	if (offset + cur >= map->m_la &&
+	    offset + cur < map->m_la + map->m_llen) {
+		/* didn't get a valid collection previously (very rare) */
+		if (!clt->cl)
+			goto restart_now;
+		goto hitted;
+	}
+
+	/* go ahead the next map_blocks */
+	debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
+
+	if (z_erofs_collector_end(clt))
+		fe->backmost = false;
+
+	map->m_la = offset + cur;
+	map->m_llen = 0;
+	err = z_erofs_map_blocks_iter(inode, map, 0);
+	if (unlikely(err))
+		goto err_out;
+
+restart_now:
+	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
+		goto hitted;
+
+	err = z_erofs_collector_begin(clt, inode, map);
+	if (unlikely(err))
+		goto err_out;
+
+	/* preload all compressed pages (maybe downgrade role if necessary) */
+	if (should_alloc_managed_pages(fe, sbi->cache_strategy, map->m_la))
+		cache_strategy = DELAYEDALLOC;
+	else
+		cache_strategy = DONTALLOC;
+
+	preload_compressed_pages(clt, MNGD_MAPPING(sbi),
+				 cache_strategy, pagepool);
+
+	tight &= (clt->mode >= COLLECT_PRIMARY_HOOKED);
+hitted:
+	cur = end - min_t(unsigned int, offset + end - map->m_la, end);
+	if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
+		zero_user_segment(page, cur, end);
+		goto next_part;
+	}
+
+	/* let's derive page type */
+	page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
+		(!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+			(tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+				Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
+
+	if (cur)
+		tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
+
+retry:
+	err = z_erofs_attach_page(clt, page, page_type);
+	/* should allocate an additional staging page for pagevec */
+	if (err == -EAGAIN) {
+		struct page *const newpage =
+			__stagingpage_alloc(pagepool, GFP_NOFS);
+
+		err = z_erofs_attach_page(clt, newpage,
+					  Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+		if (likely(!err))
+			goto retry;
+	}
+
+	if (unlikely(err))
+		goto err_out;
+
+	index = page->index - (map->m_la >> PAGE_SHIFT);
+
+	z_erofs_onlinepage_fixup(page, index, true);
+
+	/* bump up the number of spiltted parts of a page */
+	++spiltted;
+	/* also update nr_pages */
+	clt->cl->nr_pages = max_t(pgoff_t, clt->cl->nr_pages, index + 1);
+next_part:
+	/* can be used for verification */
+	map->m_llen = offset + cur - map->m_la;
+
+	end = cur;
+	if (end > 0)
+		goto repeat;
+
+out:
+	z_erofs_onlinepage_endio(page);
+
+	debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
+		__func__, page, spiltted, map->m_llen);
+	return err;
+
+	/* if some error occurred while processing this page */
+err_out:
+	SetPageError(page);
+	goto out;
+}
+
+static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
+{
+	tagptr1_t t = tagptr_init(tagptr1_t, ptr);
+	struct z_erofs_unzip_io *io = tagptr_unfold_ptr(t);
+	bool background = tagptr_unfold_tags(t);
+
+	if (!background) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&io->u.wait.lock, flags);
+		if (!atomic_add_return(bios, &io->pending_bios))
+			wake_up_locked(&io->u.wait);
+		spin_unlock_irqrestore(&io->u.wait.lock, flags);
+		return;
+	}
+
+	if (!atomic_add_return(bios, &io->pending_bios))
+		queue_work(z_erofs_workqueue, &io->u.work);
+}
+
+static inline void z_erofs_vle_read_endio(struct bio *bio)
+{
+	struct erofs_sb_info *sbi = NULL;
+	blk_status_t err = bio->bi_status;
+	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
+
+	bio_for_each_segment_all(bvec, bio, iter_all) {
+		struct page *page = bvec->bv_page;
+		bool cachemngd = false;
+
+		DBG_BUGON(PageUptodate(page));
+		DBG_BUGON(!page->mapping);
+
+		if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
+			sbi = EROFS_SB(page->mapping->host->i_sb);
+
+			if (time_to_inject(sbi, FAULT_READ_IO)) {
+				erofs_show_injection_info(FAULT_READ_IO);
+				err = BLK_STS_IOERR;
+			}
+		}
+
+		/* sbi should already be gotten if the page is managed */
+		if (sbi)
+			cachemngd = erofs_page_is_managed(sbi, page);
+
+		if (unlikely(err))
+			SetPageError(page);
+		else if (cachemngd)
+			SetPageUptodate(page);
+
+		if (cachemngd)
+			unlock_page(page);
+	}
+
+	z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
+	bio_put(bio);
+}
+
+static int z_erofs_decompress_pcluster(struct super_block *sb,
+				       struct z_erofs_pcluster *pcl,
+				       struct list_head *pagepool)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(sb);
+	const unsigned int clusterpages = BIT(pcl->clusterbits);
+	struct z_erofs_pagevec_ctor ctor;
+	unsigned int i, outputsize, llen, nr_pages;
+	struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
+	struct page **pages, **compressed_pages, *page;
+
+	enum z_erofs_page_type page_type;
+	bool overlapped, partial;
+	struct z_erofs_collection *cl;
+	int err;
+
+	might_sleep();
+	cl = z_erofs_primarycollection(pcl);
+	DBG_BUGON(!READ_ONCE(cl->nr_pages));
+
+	mutex_lock(&cl->lock);
+	nr_pages = cl->nr_pages;
+
+	if (likely(nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES)) {
+		pages = pages_onstack;
+	} else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
+		   mutex_trylock(&z_pagemap_global_lock)) {
+		pages = z_pagemap_global;
+	} else {
+		gfp_t gfp_flags = GFP_KERNEL;
+
+		if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
+			gfp_flags |= __GFP_NOFAIL;
+
+		pages = kvmalloc_array(nr_pages, sizeof(struct page *),
+				       gfp_flags);
+
+		/* fallback to global pagemap for the lowmem scenario */
+		if (unlikely(!pages)) {
+			mutex_lock(&z_pagemap_global_lock);
+			pages = z_pagemap_global;
+		}
+	}
+
+	for (i = 0; i < nr_pages; ++i)
+		pages[i] = NULL;
+
+	err = 0;
+	z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
+				  cl->pagevec, 0);
+
+	for (i = 0; i < cl->vcnt; ++i) {
+		unsigned int pagenr;
+
+		page = z_erofs_pagevec_dequeue(&ctor, &page_type);
+
+		/* all pages in pagevec ought to be valid */
+		DBG_BUGON(!page);
+		DBG_BUGON(!page->mapping);
+
+		if (z_erofs_put_stagingpage(pagepool, page))
+			continue;
+
+		if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
+			pagenr = 0;
+		else
+			pagenr = z_erofs_onlinepage_index(page);
+
+		DBG_BUGON(pagenr >= nr_pages);
+
+		/*
+		 * currently EROFS doesn't support multiref(dedup),
+		 * so here erroring out one multiref page.
+		 */
+		if (unlikely(pages[pagenr])) {
+			DBG_BUGON(1);
+			SetPageError(pages[pagenr]);
+			z_erofs_onlinepage_endio(pages[pagenr]);
+			err = -EFSCORRUPTED;
+		}
+		pages[pagenr] = page;
+	}
+	z_erofs_pagevec_ctor_exit(&ctor, true);
+
+	overlapped = false;
+	compressed_pages = pcl->compressed_pages;
+
+	for (i = 0; i < clusterpages; ++i) {
+		unsigned int pagenr;
+
+		page = compressed_pages[i];
+
+		/* all compressed pages ought to be valid */
+		DBG_BUGON(!page);
+		DBG_BUGON(!page->mapping);
+
+		if (!z_erofs_page_is_staging(page)) {
+			if (erofs_page_is_managed(sbi, page)) {
+				if (unlikely(!PageUptodate(page)))
+					err = -EIO;
+				continue;
+			}
+
+			/*
+			 * only if non-head page can be selected
+			 * for inplace decompression
+			 */
+			pagenr = z_erofs_onlinepage_index(page);
+
+			DBG_BUGON(pagenr >= nr_pages);
+			if (unlikely(pages[pagenr])) {
+				DBG_BUGON(1);
+				SetPageError(pages[pagenr]);
+				z_erofs_onlinepage_endio(pages[pagenr]);
+				err = -EFSCORRUPTED;
+			}
+			pages[pagenr] = page;
+
+			overlapped = true;
+		}
+
+		/* PG_error needs checking for inplaced and staging pages */
+		if (unlikely(PageError(page))) {
+			DBG_BUGON(PageUptodate(page));
+			err = -EIO;
+		}
+	}
+
+	if (unlikely(err))
+		goto out;
+
+	llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
+	if (nr_pages << PAGE_SHIFT >= cl->pageofs + llen) {
+		outputsize = llen;
+		partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
+	} else {
+		outputsize = (nr_pages << PAGE_SHIFT) - cl->pageofs;
+		partial = true;
+	}
+
+	err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+					.sb = sb,
+					.in = compressed_pages,
+					.out = pages,
+					.pageofs_out = cl->pageofs,
+					.inputsize = PAGE_SIZE,
+					.outputsize = outputsize,
+					.alg = pcl->algorithmformat,
+					.inplace_io = overlapped,
+					.partial_decoding = partial
+				 }, pagepool);
+
+out:
+	/* must handle all compressed pages before endding pages */
+	for (i = 0; i < clusterpages; ++i) {
+		page = compressed_pages[i];
+
+		if (erofs_page_is_managed(sbi, page))
+			continue;
+
+		/* recycle all individual staging pages */
+		(void)z_erofs_put_stagingpage(pagepool, page);
+
+		WRITE_ONCE(compressed_pages[i], NULL);
+	}
+
+	for (i = 0; i < nr_pages; ++i) {
+		page = pages[i];
+		if (!page)
+			continue;
+
+		DBG_BUGON(!page->mapping);
+
+		/* recycle all individual staging pages */
+		if (z_erofs_put_stagingpage(pagepool, page))
+			continue;
+
+		if (unlikely(err < 0))
+			SetPageError(page);
+
+		z_erofs_onlinepage_endio(page);
+	}
+
+	if (pages == z_pagemap_global)
+		mutex_unlock(&z_pagemap_global_lock);
+	else if (unlikely(pages != pages_onstack))
+		kvfree(pages);
+
+	cl->nr_pages = 0;
+	cl->vcnt = 0;
+
+	/* all cl locks MUST be taken before the following line */
+	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_NIL);
+
+	/* all cl locks SHOULD be released right now */
+	mutex_unlock(&cl->lock);
+
+	z_erofs_collection_put(cl);
+	return err;
+}
+
+static void z_erofs_vle_unzip_all(struct super_block *sb,
+				  struct z_erofs_unzip_io *io,
+				  struct list_head *pagepool)
+{
+	z_erofs_next_pcluster_t owned = io->head;
+
+	while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
+		struct z_erofs_pcluster *pcl;
+
+		/* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+		DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
+
+		/* no possible that 'owned' equals NULL */
+		DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
+
+		pcl = container_of(owned, struct z_erofs_pcluster, next);
+		owned = READ_ONCE(pcl->next);
+
+		z_erofs_decompress_pcluster(sb, pcl, pagepool);
+	}
+}
+
+static void z_erofs_vle_unzip_wq(struct work_struct *work)
+{
+	struct z_erofs_unzip_io_sb *iosb =
+		container_of(work, struct z_erofs_unzip_io_sb, io.u.work);
+	LIST_HEAD(pagepool);
+
+	DBG_BUGON(iosb->io.head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+	z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &pagepool);
+
+	put_pages_list(&pagepool);
+	kvfree(iosb);
+}
+
+static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
+					       unsigned int nr,
+					       struct list_head *pagepool,
+					       struct address_space *mc,
+					       gfp_t gfp)
+{
+	/* determined at compile time to avoid too many #ifdefs */
+	const bool nocache = __builtin_constant_p(mc) ? !mc : false;
+	const pgoff_t index = pcl->obj.index;
+	bool tocache = false;
+
+	struct address_space *mapping;
+	struct page *oldpage, *page;
+
+	compressed_page_t t;
+	int justfound;
+
+repeat:
+	page = READ_ONCE(pcl->compressed_pages[nr]);
+	oldpage = page;
+
+	if (!page)
+		goto out_allocpage;
+
+	/*
+	 * the cached page has not been allocated and
+	 * an placeholder is out there, prepare it now.
+	 */
+	if (!nocache && page == PAGE_UNALLOCATED) {
+		tocache = true;
+		goto out_allocpage;
+	}
+
+	/* process the target tagged pointer */
+	t = tagptr_init(compressed_page_t, page);
+	justfound = tagptr_unfold_tags(t);
+	page = tagptr_unfold_ptr(t);
+
+	mapping = READ_ONCE(page->mapping);
+
+	/*
+	 * if managed cache is disabled, it's no way to
+	 * get such a cached-like page.
+	 */
+	if (nocache) {
+		/* if managed cache is disabled, it is impossible `justfound' */
+		DBG_BUGON(justfound);
+
+		/* and it should be locked, not uptodate, and not truncated */
+		DBG_BUGON(!PageLocked(page));
+		DBG_BUGON(PageUptodate(page));
+		DBG_BUGON(!mapping);
+		goto out;
+	}
+
+	/*
+	 * unmanaged (file) pages are all locked solidly,
+	 * therefore it is impossible for `mapping' to be NULL.
+	 */
+	if (mapping && mapping != mc)
+		/* ought to be unmanaged pages */
+		goto out;
+
+	lock_page(page);
+
+	/* only true if page reclaim goes wrong, should never happen */
+	DBG_BUGON(justfound && PagePrivate(page));
+
+	/* the page is still in manage cache */
+	if (page->mapping == mc) {
+		WRITE_ONCE(pcl->compressed_pages[nr], page);
+
+		ClearPageError(page);
+		if (!PagePrivate(page)) {
+			/*
+			 * impossible to be !PagePrivate(page) for
+			 * the current restriction as well if
+			 * the page is already in compressed_pages[].
+			 */
+			DBG_BUGON(!justfound);
+
+			justfound = 0;
+			set_page_private(page, (unsigned long)pcl);
+			SetPagePrivate(page);
+		}
+
+		/* no need to submit io if it is already up-to-date */
+		if (PageUptodate(page)) {
+			unlock_page(page);
+			page = NULL;
+		}
+		goto out;
+	}
+
+	/*
+	 * the managed page has been truncated, it's unsafe to
+	 * reuse this one, let's allocate a new cache-managed page.
+	 */
+	DBG_BUGON(page->mapping);
+	DBG_BUGON(!justfound);
+
+	tocache = true;
+	unlock_page(page);
+	put_page(page);
+out_allocpage:
+	page = __stagingpage_alloc(pagepool, gfp);
+	if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
+		list_add(&page->lru, pagepool);
+		cpu_relax();
+		goto repeat;
+	}
+	if (nocache || !tocache)
+		goto out;
+	if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
+		page->mapping = Z_EROFS_MAPPING_STAGING;
+		goto out;
+	}
+
+	set_page_private(page, (unsigned long)pcl);
+	SetPagePrivate(page);
+out:	/* the only exit (for tracing and debugging) */
+	return page;
+}
+
+static struct z_erofs_unzip_io *jobqueue_init(struct super_block *sb,
+					      struct z_erofs_unzip_io *io,
+					      bool foreground)
+{
+	struct z_erofs_unzip_io_sb *iosb;
+
+	if (foreground) {
+		/* waitqueue available for foreground io */
+		DBG_BUGON(!io);
+
+		init_waitqueue_head(&io->u.wait);
+		atomic_set(&io->pending_bios, 0);
+		goto out;
+	}
+
+	iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL);
+	DBG_BUGON(!iosb);
+
+	/* initialize fields in the allocated descriptor */
+	io = &iosb->io;
+	iosb->sb = sb;
+	INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
+out:
+	io->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
+	return io;
+}
+
+/* define decompression jobqueue types */
+enum {
+	JQ_BYPASS,
+	JQ_SUBMIT,
+	NR_JOBQUEUES,
+};
+
+static void *jobqueueset_init(struct super_block *sb,
+			      z_erofs_next_pcluster_t qtail[],
+			      struct z_erofs_unzip_io *q[],
+			      struct z_erofs_unzip_io *fgq,
+			      bool forcefg)
+{
+	/*
+	 * if managed cache is enabled, bypass jobqueue is needed,
+	 * no need to read from device for all pclusters in this queue.
+	 */
+	q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true);
+	qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
+
+	q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg);
+	qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
+
+	return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg));
+}
+
+static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
+				    z_erofs_next_pcluster_t qtail[],
+				    z_erofs_next_pcluster_t owned_head)
+{
+	z_erofs_next_pcluster_t *const submit_qtail = qtail[JQ_SUBMIT];
+	z_erofs_next_pcluster_t *const bypass_qtail = qtail[JQ_BYPASS];
+
+	DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+	if (owned_head == Z_EROFS_PCLUSTER_TAIL)
+		owned_head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
+
+	WRITE_ONCE(pcl->next, Z_EROFS_PCLUSTER_TAIL_CLOSED);
+
+	WRITE_ONCE(*submit_qtail, owned_head);
+	WRITE_ONCE(*bypass_qtail, &pcl->next);
+
+	qtail[JQ_BYPASS] = &pcl->next;
+}
+
+static bool postsubmit_is_all_bypassed(struct z_erofs_unzip_io *q[],
+				       unsigned int nr_bios,
+				       bool force_fg)
+{
+	/*
+	 * although background is preferred, no one is pending for submission.
+	 * don't issue workqueue for decompression but drop it directly instead.
+	 */
+	if (force_fg || nr_bios)
+		return false;
+
+	kvfree(container_of(q[JQ_SUBMIT], struct z_erofs_unzip_io_sb, io));
+	return true;
+}
+
+static bool z_erofs_vle_submit_all(struct super_block *sb,
+				   z_erofs_next_pcluster_t owned_head,
+				   struct list_head *pagepool,
+				   struct z_erofs_unzip_io *fgq,
+				   bool force_fg)
+{
+	struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
+	z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
+	struct z_erofs_unzip_io *q[NR_JOBQUEUES];
+	struct bio *bio;
+	void *bi_private;
+	/* since bio will be NULL, no need to initialize last_index */
+	pgoff_t uninitialized_var(last_index);
+	bool force_submit = false;
+	unsigned int nr_bios;
+
+	if (unlikely(owned_head == Z_EROFS_PCLUSTER_TAIL))
+		return false;
+
+	force_submit = false;
+	bio = NULL;
+	nr_bios = 0;
+	bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg);
+
+	/* by default, all need io submission */
+	q[JQ_SUBMIT]->head = owned_head;
+
+	do {
+		struct z_erofs_pcluster *pcl;
+		unsigned int clusterpages;
+		pgoff_t first_index;
+		struct page *page;
+		unsigned int i = 0, bypass = 0;
+		int err;
+
+		/* no possible 'owned_head' equals the following */
+		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
+		DBG_BUGON(owned_head == Z_EROFS_PCLUSTER_NIL);
+
+		pcl = container_of(owned_head, struct z_erofs_pcluster, next);
+
+		clusterpages = BIT(pcl->clusterbits);
+
+		/* close the main owned chain at first */
+		owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+				     Z_EROFS_PCLUSTER_TAIL_CLOSED);
+
+		first_index = pcl->obj.index;
+		force_submit |= (first_index != last_index + 1);
+
+repeat:
+		page = pickup_page_for_submission(pcl, i, pagepool,
+						  MNGD_MAPPING(sbi),
+						  GFP_NOFS);
+		if (!page) {
+			force_submit = true;
+			++bypass;
+			goto skippage;
+		}
+
+		if (bio && force_submit) {
+submit_bio_retry:
+			__submit_bio(bio, REQ_OP_READ, 0);
+			bio = NULL;
+		}
+
+		if (!bio) {
+			bio = erofs_grab_bio(sb, first_index + i,
+					     BIO_MAX_PAGES, bi_private,
+					     z_erofs_vle_read_endio, true);
+			++nr_bios;
+		}
+
+		err = bio_add_page(bio, page, PAGE_SIZE, 0);
+		if (err < PAGE_SIZE)
+			goto submit_bio_retry;
+
+		force_submit = false;
+		last_index = first_index + i;
+skippage:
+		if (++i < clusterpages)
+			goto repeat;
+
+		if (bypass < clusterpages)
+			qtail[JQ_SUBMIT] = &pcl->next;
+		else
+			move_to_bypass_jobqueue(pcl, qtail, owned_head);
+	} while (owned_head != Z_EROFS_PCLUSTER_TAIL);
+
+	if (bio)
+		__submit_bio(bio, REQ_OP_READ, 0);
+
+	if (postsubmit_is_all_bypassed(q, nr_bios, force_fg))
+		return true;
+
+	z_erofs_vle_unzip_kickoff(bi_private, nr_bios);
+	return true;
+}
+
+static void z_erofs_submit_and_unzip(struct super_block *sb,
+				     struct z_erofs_collector *clt,
+				     struct list_head *pagepool,
+				     bool force_fg)
+{
+	struct z_erofs_unzip_io io[NR_JOBQUEUES];
+
+	if (!z_erofs_vle_submit_all(sb, clt->owned_head,
+				    pagepool, io, force_fg))
+		return;
+
+	/* decompress no I/O pclusters immediately */
+	z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool);
+
+	if (!force_fg)
+		return;
+
+	/* wait until all bios are completed */
+	wait_event(io[JQ_SUBMIT].u.wait,
+		   !atomic_read(&io[JQ_SUBMIT].pending_bios));
+
+	/* let's synchronous decompression */
+	z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool);
+}
+
+static int z_erofs_vle_normalaccess_readpage(struct file *file,
+					     struct page *page)
+{
+	struct inode *const inode = page->mapping->host;
+	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+	int err;
+	LIST_HEAD(pagepool);
+
+	trace_erofs_readpage(page, false);
+
+	f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+
+	err = z_erofs_do_read_page(&f, page, &pagepool);
+	(void)z_erofs_collector_end(&f.clt);
+
+	/* if some compressed cluster ready, need submit them anyway */
+	z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, true);
+
+	if (err)
+		errln("%s, failed to read, err [%d]", __func__, err);
+
+	if (f.map.mpage)
+		put_page(f.map.mpage);
+
+	/* clean up the remaining free pages */
+	put_pages_list(&pagepool);
+	return err;
+}
+
+static bool should_decompress_synchronously(struct erofs_sb_info *sbi,
+					    unsigned int nr)
+{
+	return nr <= sbi->max_sync_decompress_pages;
+}
+
+static int z_erofs_vle_normalaccess_readpages(struct file *filp,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned int nr_pages)
+{
+	struct inode *const inode = mapping->host;
+	struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+	bool sync = should_decompress_synchronously(sbi, nr_pages);
+	struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
+	gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+	struct page *head = NULL;
+	LIST_HEAD(pagepool);
+
+	trace_erofs_readpages(mapping->host, lru_to_page(pages),
+			      nr_pages, false);
+
+	f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
+
+	for (; nr_pages; --nr_pages) {
+		struct page *page = lru_to_page(pages);
+
+		prefetchw(&page->flags);
+		list_del(&page->lru);
+
+		/*
+		 * A pure asynchronous readahead is indicated if
+		 * a PG_readahead marked page is hitted at first.
+		 * Let's also do asynchronous decompression for this case.
+		 */
+		sync &= !(PageReadahead(page) && !head);
+
+		if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+			list_add(&page->lru, &pagepool);
+			continue;
+		}
+
+		set_page_private(page, (unsigned long)head);
+		head = page;
+	}
+
+	while (head) {
+		struct page *page = head;
+		int err;
+
+		/* traversal in reverse order */
+		head = (void *)page_private(page);
+
+		err = z_erofs_do_read_page(&f, page, &pagepool);
+		if (err) {
+			struct erofs_vnode *vi = EROFS_V(inode);
+
+			errln("%s, readahead error at page %lu of nid %llu",
+			      __func__, page->index, vi->nid);
+		}
+		put_page(page);
+	}
+
+	(void)z_erofs_collector_end(&f.clt);
+
+	z_erofs_submit_and_unzip(inode->i_sb, &f.clt, &pagepool, sync);
+
+	if (f.map.mpage)
+		put_page(f.map.mpage);
+
+	/* clean up the remaining free pages */
+	put_pages_list(&pagepool);
+	return 0;
+}
+
+const struct address_space_operations z_erofs_vle_normalaccess_aops = {
+	.readpage = z_erofs_vle_normalaccess_readpage,
+	.readpages = z_erofs_vle_normalaccess_readpages,
+};
+
diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h
new file mode 100644
index 000000000000..4fc547bc01f9
--- /dev/null
+++ b/fs/erofs/zdata.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_ZDATA_H
+#define __EROFS_FS_ZDATA_H
+
+#include "internal.h"
+#include "zpvec.h"
+
+#define Z_EROFS_NR_INLINE_PAGEVECS      3
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Modifiable by initialization/destruction paths and read-only
+ *    for everyone else;
+ *
+ * L: Field should be protected by pageset lock;
+ *
+ * A: Field should be accessed / updated in atomic for parallelized code.
+ */
+struct z_erofs_collection {
+	struct mutex lock;
+
+	/* I: page offset of start position of decompression */
+	unsigned short pageofs;
+
+	/* L: maximum relative page index in pagevec[] */
+	unsigned short nr_pages;
+
+	/* L: total number of pages in pagevec[] */
+	unsigned int vcnt;
+
+	union {
+		/* L: inline a certain number of pagevecs for bootstrap */
+		erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
+
+		/* I: can be used to free the pcluster by RCU. */
+		struct rcu_head rcu;
+	};
+};
+
+#define Z_EROFS_PCLUSTER_FULL_LENGTH    0x00000001
+#define Z_EROFS_PCLUSTER_LENGTH_BIT     1
+
+/*
+ * let's leave a type here in case of introducing
+ * another tagged pointer later.
+ */
+typedef void *z_erofs_next_pcluster_t;
+
+struct z_erofs_pcluster {
+	struct erofs_workgroup obj;
+	struct z_erofs_collection primary_collection;
+
+	/* A: point to next chained pcluster or TAILs */
+	z_erofs_next_pcluster_t next;
+
+	/* A: compressed pages (including multi-usage pages) */
+	struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
+
+	/* A: lower limit of decompressed length and if full length or not */
+	unsigned int length;
+
+	/* I: compression algorithm format */
+	unsigned char algorithmformat;
+	/* I: bit shift of physical cluster size */
+	unsigned char clusterbits;
+};
+
+#define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection)
+
+/* let's avoid the valid 32-bit kernel addresses */
+
+/* the chained workgroup has't submitted io (still open) */
+#define Z_EROFS_PCLUSTER_TAIL           ((void *)0x5F0ECAFE)
+/* the chained workgroup has already submitted io */
+#define Z_EROFS_PCLUSTER_TAIL_CLOSED    ((void *)0x5F0EDEAD)
+
+#define Z_EROFS_PCLUSTER_NIL            (NULL)
+
+#define Z_EROFS_WORKGROUP_SIZE  sizeof(struct z_erofs_pcluster)
+
+struct z_erofs_unzip_io {
+	atomic_t pending_bios;
+	z_erofs_next_pcluster_t head;
+
+	union {
+		wait_queue_head_t wait;
+		struct work_struct work;
+	} u;
+};
+
+struct z_erofs_unzip_io_sb {
+	struct z_erofs_unzip_io io;
+	struct super_block *sb;
+};
+
+#define MNGD_MAPPING(sbi)	((sbi)->managed_cache->i_mapping)
+static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi,
+					 struct page *page)
+{
+	return page->mapping == MNGD_MAPPING(sbi);
+}
+
+#define Z_EROFS_ONLINEPAGE_COUNT_BITS   2
+#define Z_EROFS_ONLINEPAGE_COUNT_MASK   ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
+#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)
+
+/*
+ * waiters (aka. ongoing_packs): # to unlock the page
+ * sub-index: 0 - for partial page, >= 1 full page sub-index
+ */
+typedef atomic_t z_erofs_onlinepage_t;
+
+/* type punning */
+union z_erofs_onlinepage_converter {
+	z_erofs_onlinepage_t *o;
+	unsigned long *v;
+};
+
+static inline unsigned int z_erofs_onlinepage_index(struct page *page)
+{
+	union z_erofs_onlinepage_converter u;
+
+	DBG_BUGON(!PagePrivate(page));
+	u.v = &page_private(page);
+
+	return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+}
+
+static inline void z_erofs_onlinepage_init(struct page *page)
+{
+	union {
+		z_erofs_onlinepage_t o;
+		unsigned long v;
+	/* keep from being unlocked in advance */
+	} u = { .o = ATOMIC_INIT(1) };
+
+	set_page_private(page, u.v);
+	smp_wmb();
+	SetPagePrivate(page);
+}
+
+static inline void z_erofs_onlinepage_fixup(struct page *page,
+	uintptr_t index, bool down)
+{
+	unsigned long *p, o, v, id;
+repeat:
+	p = &page_private(page);
+	o = READ_ONCE(*p);
+
+	id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+	if (id) {
+		if (!index)
+			return;
+
+		DBG_BUGON(id != index);
+	}
+
+	v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+		((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
+	if (cmpxchg(p, o, v) != o)
+		goto repeat;
+}
+
+static inline void z_erofs_onlinepage_endio(struct page *page)
+{
+	union z_erofs_onlinepage_converter u;
+	unsigned int v;
+
+	DBG_BUGON(!PagePrivate(page));
+	u.v = &page_private(page);
+
+	v = atomic_dec_return(u.o);
+	if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+		ClearPagePrivate(page);
+		if (!PageError(page))
+			SetPageUptodate(page);
+		unlock_page(page);
+	}
+	debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
+}
+
+#define Z_EROFS_VMAP_ONSTACK_PAGES	\
+	min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
+#define Z_EROFS_VMAP_GLOBAL_PAGES	2048
+
+#endif
+
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
new file mode 100644
index 000000000000..4dc9cec01297
--- /dev/null
+++ b/fs/erofs/zmap.c
@@ -0,0 +1,466 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2018-2019 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "internal.h"
+#include <asm/unaligned.h>
+#include <trace/events/erofs.h>
+
+int z_erofs_fill_inode(struct inode *inode)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+
+	if (vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
+		vi->z_advise = 0;
+		vi->z_algorithmtype[0] = 0;
+		vi->z_algorithmtype[1] = 0;
+		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
+		vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
+		vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
+		set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+	}
+
+	inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops;
+	return 0;
+}
+
+static int fill_inode_lazy(struct inode *inode)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct super_block *const sb = inode->i_sb;
+	int err;
+	erofs_off_t pos;
+	struct page *page;
+	void *kaddr;
+	struct z_erofs_map_header *h;
+
+	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+		return 0;
+
+	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE))
+		return -ERESTARTSYS;
+
+	err = 0;
+	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+		goto out_unlock;
+
+	DBG_BUGON(vi->datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
+
+	pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize +
+		    vi->xattr_isize, 8);
+	page = erofs_get_meta_page(sb, erofs_blknr(pos), false);
+	if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+		goto out_unlock;
+	}
+
+	kaddr = kmap_atomic(page);
+
+	h = kaddr + erofs_blkoff(pos);
+	vi->z_advise = le16_to_cpu(h->h_advise);
+	vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
+	vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
+
+	if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) {
+		errln("unknown compression format %u for nid %llu, please upgrade kernel",
+		      vi->z_algorithmtype[0], vi->nid);
+		err = -EOPNOTSUPP;
+		goto unmap_done;
+	}
+
+	vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7);
+	vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits +
+					((h->h_clusterbits >> 3) & 3);
+
+	if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) {
+		errln("unsupported physical clusterbits %u for nid %llu, please upgrade kernel",
+		      vi->z_physical_clusterbits[0], vi->nid);
+		err = -EOPNOTSUPP;
+		goto unmap_done;
+	}
+
+	vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
+					((h->h_clusterbits >> 5) & 7);
+	set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+unmap_done:
+	kunmap_atomic(kaddr);
+	unlock_page(page);
+	put_page(page);
+out_unlock:
+	clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags);
+	return err;
+}
+
+struct z_erofs_maprecorder {
+	struct inode *inode;
+	struct erofs_map_blocks *map;
+	void *kaddr;
+
+	unsigned long lcn;
+	/* compression extent information gathered */
+	u8  type;
+	u16 clusterofs;
+	u16 delta[2];
+	erofs_blk_t pblk;
+};
+
+static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m,
+				  erofs_blk_t eblk)
+{
+	struct super_block *const sb = m->inode->i_sb;
+	struct erofs_map_blocks *const map = m->map;
+	struct page *mpage = map->mpage;
+
+	if (mpage) {
+		if (mpage->index == eblk) {
+			if (!m->kaddr)
+				m->kaddr = kmap_atomic(mpage);
+			return 0;
+		}
+
+		if (m->kaddr) {
+			kunmap_atomic(m->kaddr);
+			m->kaddr = NULL;
+		}
+		put_page(mpage);
+	}
+
+	mpage = erofs_get_meta_page(sb, eblk, false);
+	if (IS_ERR(mpage)) {
+		map->mpage = NULL;
+		return PTR_ERR(mpage);
+	}
+	m->kaddr = kmap_atomic(mpage);
+	unlock_page(mpage);
+	map->mpage = mpage;
+	return 0;
+}
+
+static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+					     unsigned long lcn)
+{
+	struct inode *const inode = m->inode;
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
+	const erofs_off_t pos =
+		Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
+					       vi->xattr_isize) +
+		lcn * sizeof(struct z_erofs_vle_decompressed_index);
+	struct z_erofs_vle_decompressed_index *di;
+	unsigned int advise, type;
+	int err;
+
+	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+	if (err)
+		return err;
+
+	m->lcn = lcn;
+	di = m->kaddr + erofs_blkoff(pos);
+
+	advise = le16_to_cpu(di->di_advise);
+	type = (advise >> Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT) &
+		((1 << Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) - 1);
+	switch (type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		m->clusterofs = 1 << vi->z_logical_clusterbits;
+		m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
+		m->delta[1] = le16_to_cpu(di->di_u.delta[1]);
+		break;
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		m->clusterofs = le16_to_cpu(di->di_clusterofs);
+		m->pblk = le32_to_cpu(di->di_u.blkaddr);
+		break;
+	default:
+		DBG_BUGON(1);
+		return -EOPNOTSUPP;
+	}
+	m->type = type;
+	return 0;
+}
+
+static unsigned int decode_compactedbits(unsigned int lobits,
+					 unsigned int lomask,
+					 u8 *in, unsigned int pos, u8 *type)
+{
+	const unsigned int v = get_unaligned_le32(in + pos / 8) >> (pos & 7);
+	const unsigned int lo = v & lomask;
+
+	*type = (v >> lobits) & 3;
+	return lo;
+}
+
+static int unpack_compacted_index(struct z_erofs_maprecorder *m,
+				  unsigned int amortizedshift,
+				  unsigned int eofs)
+{
+	struct erofs_vnode *const vi = EROFS_V(m->inode);
+	const unsigned int lclusterbits = vi->z_logical_clusterbits;
+	const unsigned int lomask = (1 << lclusterbits) - 1;
+	unsigned int vcnt, base, lo, encodebits, nblk;
+	int i;
+	u8 *in, type;
+
+	if (1 << amortizedshift == 4)
+		vcnt = 2;
+	else if (1 << amortizedshift == 2 && lclusterbits == 12)
+		vcnt = 16;
+	else
+		return -EOPNOTSUPP;
+
+	encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt;
+	base = round_down(eofs, vcnt << amortizedshift);
+	in = m->kaddr + base;
+
+	i = (eofs - base) >> amortizedshift;
+
+	lo = decode_compactedbits(lclusterbits, lomask,
+				  in, encodebits * i, &type);
+	m->type = type;
+	if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) {
+		m->clusterofs = 1 << lclusterbits;
+		if (i + 1 != vcnt) {
+			m->delta[0] = lo;
+			return 0;
+		}
+		/*
+		 * since the last lcluster in the pack is special,
+		 * of which lo saves delta[1] rather than delta[0].
+		 * Hence, get delta[0] by the previous lcluster indirectly.
+		 */
+		lo = decode_compactedbits(lclusterbits, lomask,
+					  in, encodebits * (i - 1), &type);
+		if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+			lo = 0;
+		m->delta[0] = lo + 1;
+		return 0;
+	}
+	m->clusterofs = lo;
+	m->delta[0] = 0;
+	/* figout out blkaddr (pblk) for HEAD lclusters */
+	nblk = 1;
+	while (i > 0) {
+		--i;
+		lo = decode_compactedbits(lclusterbits, lomask,
+					  in, encodebits * i, &type);
+		if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD)
+			i -= lo;
+
+		if (i >= 0)
+			++nblk;
+	}
+	in += (vcnt << amortizedshift) - sizeof(__le32);
+	m->pblk = le32_to_cpu(*(__le32 *)in) + nblk;
+	return 0;
+}
+
+static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+					    unsigned long lcn)
+{
+	struct inode *const inode = m->inode;
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	const unsigned int lclusterbits = vi->z_logical_clusterbits;
+	const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
+					vi->inode_isize + vi->xattr_isize, 8) +
+		sizeof(struct z_erofs_map_header);
+	const unsigned int totalidx = DIV_ROUND_UP(inode->i_size, EROFS_BLKSIZ);
+	unsigned int compacted_4b_initial, compacted_2b;
+	unsigned int amortizedshift;
+	erofs_off_t pos;
+	int err;
+
+	if (lclusterbits != 12)
+		return -EOPNOTSUPP;
+
+	if (lcn >= totalidx)
+		return -EINVAL;
+
+	m->lcn = lcn;
+	/* used to align to 32-byte (compacted_2b) alignment */
+	compacted_4b_initial = (32 - ebase % 32) / 4;
+	if (compacted_4b_initial == 32 / 4)
+		compacted_4b_initial = 0;
+
+	if (vi->z_advise & Z_EROFS_ADVISE_COMPACTED_2B)
+		compacted_2b = rounddown(totalidx - compacted_4b_initial, 16);
+	else
+		compacted_2b = 0;
+
+	pos = ebase;
+	if (lcn < compacted_4b_initial) {
+		amortizedshift = 2;
+		goto out;
+	}
+	pos += compacted_4b_initial * 4;
+	lcn -= compacted_4b_initial;
+
+	if (lcn < compacted_2b) {
+		amortizedshift = 1;
+		goto out;
+	}
+	pos += compacted_2b * 2;
+	lcn -= compacted_2b;
+	amortizedshift = 2;
+out:
+	pos += lcn * (1 << amortizedshift);
+	err = z_erofs_reload_indexes(m, erofs_blknr(pos));
+	if (err)
+		return err;
+	return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos));
+}
+
+static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
+				      unsigned int lcn)
+{
+	const unsigned int datamode = EROFS_V(m->inode)->datamode;
+
+	if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
+		return vle_legacy_load_cluster_from_disk(m, lcn);
+
+	if (datamode == EROFS_INODE_FLAT_COMPRESSION)
+		return compacted_load_cluster_from_disk(m, lcn);
+
+	return -EINVAL;
+}
+
+static int vle_extent_lookback(struct z_erofs_maprecorder *m,
+			       unsigned int lookback_distance)
+{
+	struct erofs_vnode *const vi = EROFS_V(m->inode);
+	struct erofs_map_blocks *const map = m->map;
+	const unsigned int lclusterbits = vi->z_logical_clusterbits;
+	unsigned long lcn = m->lcn;
+	int err;
+
+	if (lcn < lookback_distance) {
+		errln("bogus lookback distance @ nid %llu", vi->nid);
+		DBG_BUGON(1);
+		return -EFSCORRUPTED;
+	}
+
+	/* load extent head logical cluster if needed */
+	lcn -= lookback_distance;
+	err = vle_load_cluster_from_disk(m, lcn);
+	if (err)
+		return err;
+
+	switch (m->type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		if (unlikely(!m->delta[0])) {
+			errln("invalid lookback distance 0 at nid %llu",
+			      vi->nid);
+			DBG_BUGON(1);
+			return -EFSCORRUPTED;
+		}
+		return vle_extent_lookback(m, m->delta[0]);
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+		map->m_flags &= ~EROFS_MAP_ZIPPED;
+		/* fallthrough */
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		map->m_la = (lcn << lclusterbits) | m->clusterofs;
+		break;
+	default:
+		errln("unknown type %u at lcn %lu of nid %llu",
+		      m->type, lcn, vi->nid);
+		DBG_BUGON(1);
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+int z_erofs_map_blocks_iter(struct inode *inode,
+			    struct erofs_map_blocks *map,
+			    int flags)
+{
+	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct z_erofs_maprecorder m = {
+		.inode = inode,
+		.map = map,
+	};
+	int err = 0;
+	unsigned int lclusterbits, endoff;
+	unsigned long long ofs, end;
+
+	trace_z_erofs_map_blocks_iter_enter(inode, map, flags);
+
+	/* when trying to read beyond EOF, leave it unmapped */
+	if (unlikely(map->m_la >= inode->i_size)) {
+		map->m_llen = map->m_la + 1 - inode->i_size;
+		map->m_la = inode->i_size;
+		map->m_flags = 0;
+		goto out;
+	}
+
+	err = fill_inode_lazy(inode);
+	if (err)
+		goto out;
+
+	lclusterbits = vi->z_logical_clusterbits;
+	ofs = map->m_la;
+	m.lcn = ofs >> lclusterbits;
+	endoff = ofs & ((1 << lclusterbits) - 1);
+
+	err = vle_load_cluster_from_disk(&m, m.lcn);
+	if (err)
+		goto unmap_out;
+
+	map->m_flags = EROFS_MAP_ZIPPED;	/* by default, compressed */
+	end = (m.lcn + 1ULL) << lclusterbits;
+
+	switch (m.type) {
+	case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+		if (endoff >= m.clusterofs)
+			map->m_flags &= ~EROFS_MAP_ZIPPED;
+		/* fallthrough */
+	case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+		if (endoff >= m.clusterofs) {
+			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+			break;
+		}
+		/* m.lcn should be >= 1 if endoff < m.clusterofs */
+		if (unlikely(!m.lcn)) {
+			errln("invalid logical cluster 0 at nid %llu",
+			      vi->nid);
+			err = -EFSCORRUPTED;
+			goto unmap_out;
+		}
+		end = (m.lcn << lclusterbits) | m.clusterofs;
+		map->m_flags |= EROFS_MAP_FULL_MAPPED;
+		m.delta[0] = 1;
+		/* fallthrough */
+	case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
+		/* get the correspoinding first chunk */
+		err = vle_extent_lookback(&m, m.delta[0]);
+		if (unlikely(err))
+			goto unmap_out;
+		break;
+	default:
+		errln("unknown type %u at offset %llu of nid %llu",
+		      m.type, ofs, vi->nid);
+		err = -EOPNOTSUPP;
+		goto unmap_out;
+	}
+
+	map->m_llen = end - map->m_la;
+	map->m_plen = 1 << lclusterbits;
+	map->m_pa = blknr_to_addr(m.pblk);
+	map->m_flags |= EROFS_MAP_MAPPED;
+
+unmap_out:
+	if (m.kaddr)
+		kunmap_atomic(m.kaddr);
+
+out:
+	debugln("%s, m_la %llu m_pa %llu m_llen %llu m_plen %llu m_flags 0%o",
+		__func__, map->m_la, map->m_pa,
+		map->m_llen, map->m_plen, map->m_flags);
+
+	trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
+
+	/* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
+	DBG_BUGON(err < 0 && err != -ENOMEM);
+	return err;
+}
+
diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h
new file mode 100644
index 000000000000..bd3cee16491c
--- /dev/null
+++ b/fs/erofs/zpvec.h
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_ZPVEC_H
+#define __EROFS_FS_ZPVEC_H
+
+#include "tagptr.h"
+
+/* page type in pagevec for decompress subsystem */
+enum z_erofs_page_type {
+	/* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
+	Z_EROFS_PAGE_TYPE_EXCLUSIVE,
+
+	Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
+
+	Z_EROFS_VLE_PAGE_TYPE_HEAD,
+	Z_EROFS_VLE_PAGE_TYPE_MAX
+};
+
+extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
+	__bad_page_type_exclusive(void);
+
+/* pagevec tagged pointer */
+typedef tagptr2_t	erofs_vtptr_t;
+
+/* pagevec collector */
+struct z_erofs_pagevec_ctor {
+	struct page *curr, *next;
+	erofs_vtptr_t *pages;
+
+	unsigned int nr, index;
+};
+
+static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
+					     bool atomic)
+{
+	if (!ctor->curr)
+		return;
+
+	if (atomic)
+		kunmap_atomic(ctor->pages);
+	else
+		kunmap(ctor->curr);
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
+			       unsigned int nr)
+{
+	unsigned int index;
+
+	/* keep away from occupied pages */
+	if (ctor->next)
+		return ctor->next;
+
+	for (index = 0; index < nr; ++index) {
+		const erofs_vtptr_t t = ctor->pages[index];
+		const unsigned int tags = tagptr_unfold_tags(t);
+
+		if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
+			return tagptr_unfold_ptr(t);
+	}
+	DBG_BUGON(nr >= ctor->nr);
+	return NULL;
+}
+
+static inline void
+z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
+			      bool atomic)
+{
+	struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
+
+	z_erofs_pagevec_ctor_exit(ctor, atomic);
+
+	ctor->curr = next;
+	ctor->next = NULL;
+	ctor->pages = atomic ?
+		kmap_atomic(ctor->curr) : kmap(ctor->curr);
+
+	ctor->nr = PAGE_SIZE / sizeof(struct page *);
+	ctor->index = 0;
+}
+
+static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
+					     unsigned int nr,
+					     erofs_vtptr_t *pages,
+					     unsigned int i)
+{
+	ctor->nr = nr;
+	ctor->curr = ctor->next = NULL;
+	ctor->pages = pages;
+
+	if (i >= nr) {
+		i -= nr;
+		z_erofs_pagevec_ctor_pagedown(ctor, false);
+		while (i > ctor->nr) {
+			i -= ctor->nr;
+			z_erofs_pagevec_ctor_pagedown(ctor, false);
+		}
+	}
+	ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
+	ctor->index = i;
+}
+
+static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
+					   struct page *page,
+					   enum z_erofs_page_type type,
+					   bool *occupied)
+{
+	*occupied = false;
+	if (unlikely(!ctor->next && type))
+		if (ctor->index + 1 == ctor->nr)
+			return false;
+
+	if (unlikely(ctor->index >= ctor->nr))
+		z_erofs_pagevec_ctor_pagedown(ctor, false);
+
+	/* exclusive page type must be 0 */
+	if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
+		__bad_page_type_exclusive();
+
+	/* should remind that collector->next never equal to 1, 2 */
+	if (type == (uintptr_t)ctor->next) {
+		ctor->next = page;
+		*occupied = true;
+	}
+	ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type);
+	return true;
+}
+
+static inline struct page *
+z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor,
+			enum z_erofs_page_type *type)
+{
+	erofs_vtptr_t t;
+
+	if (unlikely(ctor->index >= ctor->nr)) {
+		DBG_BUGON(!ctor->next);
+		z_erofs_pagevec_ctor_pagedown(ctor, true);
+	}
+
+	t = ctor->pages[ctor->index];
+
+	*type = tagptr_unfold_tags(t);
+
+	/* should remind that collector->next never equal to 1, 2 */
+	if (*type == (uintptr_t)ctor->next)
+		ctor->next = tagptr_unfold_ptr(t);
+
+	ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, NULL, 0);
+	return tagptr_unfold_ptr(t);
+}
+#endif
+
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
new file mode 100644
index 000000000000..bfb2da9c4eee
--- /dev/null
+++ b/include/trace/events/erofs.h
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM erofs
+
+#if !defined(_TRACE_EROFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EROFS_H
+
+#include <linux/tracepoint.h>
+
+#define show_dev(dev)		MAJOR(dev), MINOR(dev)
+#define show_dev_nid(entry)	show_dev(entry->dev), entry->nid
+
+#define show_file_type(type)						\
+	__print_symbolic(type,						\
+		{ 0,		"FILE" },				\
+		{ 1,		"DIR" })
+
+#define show_map_flags(flags) __print_flags(flags, "|",	\
+	{ EROFS_GET_BLOCKS_RAW,	"RAW" })
+
+#define show_mflags(flags) __print_flags(flags, "",	\
+	{ EROFS_MAP_MAPPED,	"M" },			\
+	{ EROFS_MAP_META,	"I" },			\
+	{ EROFS_MAP_ZIPPED,	"Z" })
+
+TRACE_EVENT(erofs_lookup,
+
+	TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
+
+	TP_ARGS(dir, dentry, flags),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(const char *,	name	)
+		__field(unsigned int,	flags	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= dir->i_sb->s_dev;
+		__entry->nid	= EROFS_V(dir)->nid;
+		__entry->name	= dentry->d_name.name;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), pnid = %llu, name:%s, flags:%x",
+		show_dev_nid(__entry),
+		__entry->name,
+		__entry->flags)
+);
+
+TRACE_EVENT(erofs_fill_inode,
+	TP_PROTO(struct inode *inode, int isdir),
+	TP_ARGS(inode, isdir),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(erofs_blk_t,	blkaddr )
+		__field(unsigned int,	ofs	)
+		__field(int,		isdir	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->nid		= EROFS_V(inode)->nid;
+		__entry->blkaddr	= erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
+		__entry->ofs		= erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
+		__entry->isdir		= isdir;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u, isdir %d",
+		  show_dev_nid(__entry),
+		  __entry->blkaddr, __entry->ofs,
+		  __entry->isdir)
+);
+
+TRACE_EVENT(erofs_readpage,
+
+	TP_PROTO(struct page *page, bool raw),
+
+	TP_ARGS(page, raw),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,    nid     )
+		__field(int,		dir	)
+		__field(pgoff_t,	index	)
+		__field(int,		uptodate)
+		__field(bool,		raw	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= page->mapping->host->i_sb->s_dev;
+		__entry->nid	= EROFS_V(page->mapping->host)->nid;
+		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
+		__entry->index	= page->index;
+		__entry->uptodate = PageUptodate(page);
+		__entry->raw = raw;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, %s, index = %lu, uptodate = %d "
+		"raw = %d",
+		show_dev_nid(__entry),
+		show_file_type(__entry->dir),
+		(unsigned long)__entry->index,
+		__entry->uptodate,
+		__entry->raw)
+);
+
+TRACE_EVENT(erofs_readpages,
+
+	TP_PROTO(struct inode *inode, struct page *page, unsigned int nrpage,
+		bool raw),
+
+	TP_ARGS(inode, page, nrpage, raw),
+
+	TP_STRUCT__entry(
+		__field(dev_t,		dev	)
+		__field(erofs_nid_t,	nid	)
+		__field(pgoff_t,	start	)
+		__field(unsigned int,	nrpage	)
+		__field(bool,		raw	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->nid	= EROFS_V(inode)->nid;
+		__entry->start	= page->index;
+		__entry->nrpage	= nrpage;
+		__entry->raw	= raw;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, start = %lu nrpage = %u raw = %d",
+		show_dev_nid(__entry),
+		(unsigned long)__entry->start,
+		__entry->nrpage,
+		__entry->raw)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags),
+
+	TP_ARGS(inode, map, flags),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+		__field(	erofs_off_t,	la		)
+		__field(	u64,		llen		)
+		__field(	unsigned int,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->la	= map->m_la;
+		__entry->llen	= map->m_llen;
+		__entry->flags	= flags;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, la %llu llen %llu flags %s",
+		  show_dev_nid(__entry),
+		  __entry->la, __entry->llen,
+		  __entry->flags ? show_map_flags(__entry->flags) : "NULL")
+);
+
+DEFINE_EVENT(erofs__map_blocks_enter, erofs_map_blocks_flatmode_enter,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned flags),
+
+	TP_ARGS(inode, map, flags)
+);
+
+DEFINE_EVENT(erofs__map_blocks_enter, z_erofs_map_blocks_iter_enter,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags),
+
+	TP_ARGS(inode, map, flags)
+);
+
+DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags, int ret),
+
+	TP_ARGS(inode, map, flags, ret),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+		__field(        unsigned int,   flags           )
+		__field(	erofs_off_t,	la		)
+		__field(	erofs_off_t,	pa		)
+		__field(	u64,		llen		)
+		__field(	u64,		plen		)
+		__field(        unsigned int,	mflags		)
+		__field(	int,		ret		)
+	),
+
+	TP_fast_assign(
+		__entry->dev    = inode->i_sb->s_dev;
+		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->flags	= flags;
+		__entry->la	= map->m_la;
+		__entry->pa	= map->m_pa;
+		__entry->llen	= map->m_llen;
+		__entry->plen	= map->m_plen;
+		__entry->mflags	= map->m_flags;
+		__entry->ret	= ret;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu, flags %s "
+		  "la %llu pa %llu llen %llu plen %llu mflags %s ret %d",
+		  show_dev_nid(__entry),
+		  __entry->flags ? show_map_flags(__entry->flags) : "NULL",
+		  __entry->la, __entry->pa, __entry->llen, __entry->plen,
+		  show_mflags(__entry->mflags), __entry->ret)
+);
+
+DEFINE_EVENT(erofs__map_blocks_exit, erofs_map_blocks_flatmode_exit,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned flags, int ret),
+
+	TP_ARGS(inode, map, flags, ret)
+);
+
+DEFINE_EVENT(erofs__map_blocks_exit, z_erofs_map_blocks_iter_exit,
+	TP_PROTO(struct inode *inode, struct erofs_map_blocks *map,
+		 unsigned int flags, int ret),
+
+	TP_ARGS(inode, map, flags, ret)
+);
+
+TRACE_EVENT(erofs_destroy_inode,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,		dev		)
+		__field(	erofs_nid_t,	nid		)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->nid	= EROFS_V(inode)->nid;
+	),
+
+	TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
+);
+
+#endif /* _TRACE_EROFS_H */
+
+ /* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 1274c692e59c..903cc2d2750b 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -19,6 +19,7 @@
 #define SQUASHFS_MAGIC		0x73717368
 #define ECRYPTFS_SUPER_MAGIC	0xf15f
 #define EFS_SUPER_MAGIC		0x414A53
+#define EROFS_SUPER_MAGIC_V1	0xE0F5E1E2
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
 #define XENFS_SUPER_MAGIC	0xabba1974
-- 
cgit v1.2.3


From f3acd33d840d3ea3e1233d234605c85cbbf26054 Mon Sep 17 00:00:00 2001
From: xiaolinkui <xiaolinkui@kylinos.cn>
Date: Thu, 22 Aug 2019 14:58:16 +0800
Subject: net: use unlikely for dql_avail case

This is an unlikely case, use unlikely() on it seems logical.

Signed-off-by: xiaolinkui <xiaolinkui@kylinos.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55ac223553f8..b5d28dadf964 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3272,7 +3272,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 	 */
 	smp_mb();
 
-	if (dql_avail(&dev_queue->dql) < 0)
+	if (unlikely(dql_avail(&dev_queue->dql) < 0))
 		return;
 
 	if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state))
-- 
cgit v1.2.3


From e93fb3e9521abffadb8f965c591a290cdd92b56c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 23 Aug 2019 17:11:38 -0700
Subject: net: route dump netlink NLM_F_MULTI flag missing

An excerpt from netlink(7) man page,

  In multipart messages (multiple nlmsghdr headers with associated payload
  in one byte stream) the first and all following headers have the
  NLM_F_MULTI flag set, except for the last  header  which  has the type
  NLMSG_DONE.

but, after (ee28906) there is a missing NLM_F_MULTI flag in the middle of a
FIB dump. The result is user space applications following above man page
excerpt may get confused and may stop parsing msg believing something went
wrong.

In the golang netlink lib [0] the library logic stops parsing believing the
message is not a multipart message. Found this running Cilium[1] against
net-next while adding a feature to auto-detect routes. I noticed with
multiple route tables we no longer could detect the default routes on net
tree kernels because the library logic was not returning them.

Fix this by handling the fib_dump_info_fnhe() case the same way the
fib_dump_info() handles it by passing the flags argument through the
call chain and adding a flags argument to rt_fill_info().

Tested with Cilium stack and auto-detection of routes works again. Also
annotated libs to dump netlink msgs and inspected NLM_F_MULTI and
NLMSG_DONE flags look correct after this.

Note: In inet_rtm_getroute() pass rt_fill_info() '0' for flags the same
as is done for fib_dump_info() so this looks correct to me.

[0] https://github.com/vishvananda/netlink/
[1] https://github.com/cilium/

Fixes: ee28906fd7a14 ("ipv4: Dump route exceptions if requested")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  2 +-
 net/ipv4/fib_trie.c |  2 +-
 net/ipv4/route.c    | 17 ++++++++++-------
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index 630a0493f1f3..dfce19c9fa96 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -233,7 +233,7 @@ void rt_del_uncached_list(struct rtable *rt);
 
 int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		       u32 table_id, struct fib_info *fi,
-		       int *fa_index, int fa_start);
+		       int *fa_index, int fa_start, unsigned int flags);
 
 static inline void ip_rt_put(struct rtable *rt)
 {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2b2b3d291ab0..1ab2fb6bb37d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2145,7 +2145,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 
 		if (filter->dump_exceptions) {
 			err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
-						 &i_fa, s_fa);
+						 &i_fa, s_fa, flags);
 			if (err < 0)
 				goto stop;
 		}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 517300d587a7..b6a6f18c3dd1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2728,7 +2728,8 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow);
 /* called with rcu_read_lock held */
 static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 			struct rtable *rt, u32 table_id, struct flowi4 *fl4,
-			struct sk_buff *skb, u32 portid, u32 seq)
+			struct sk_buff *skb, u32 portid, u32 seq,
+			unsigned int flags)
 {
 	struct rtmsg *r;
 	struct nlmsghdr *nlh;
@@ -2736,7 +2737,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 	u32 error;
 	u32 metrics[RTAX_MAX];
 
-	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0);
+	nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags);
 	if (!nlh)
 		return -EMSGSIZE;
 
@@ -2860,7 +2861,7 @@ nla_put_failure:
 static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, u32 table_id,
 			    struct fnhe_hash_bucket *bucket, int genid,
-			    int *fa_index, int fa_start)
+			    int *fa_index, int fa_start, unsigned int flags)
 {
 	int i;
 
@@ -2891,7 +2892,7 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 			err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt,
 					   table_id, NULL, skb,
 					   NETLINK_CB(cb->skb).portid,
-					   cb->nlh->nlmsg_seq);
+					   cb->nlh->nlmsg_seq, flags);
 			if (err)
 				return err;
 next:
@@ -2904,7 +2905,7 @@ next:
 
 int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		       u32 table_id, struct fib_info *fi,
-		       int *fa_index, int fa_start)
+		       int *fa_index, int fa_start, unsigned int flags)
 {
 	struct net *net = sock_net(cb->skb->sk);
 	int nhsel, genid = fnhe_genid(net);
@@ -2922,7 +2923,8 @@ int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		err = 0;
 		if (bucket)
 			err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
-					       genid, fa_index, fa_start);
+					       genid, fa_index, fa_start,
+					       flags);
 		rcu_read_unlock();
 		if (err)
 			return err;
@@ -3183,7 +3185,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 				    fl4.flowi4_tos, res.fi, 0);
 	} else {
 		err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb,
-				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+				   NETLINK_CB(in_skb).portid,
+				   nlh->nlmsg_seq, 0);
 	}
 	if (err < 0)
 		goto errout_rcu;
-- 
cgit v1.2.3


From e0e6d062822529dbe9be21939359b0d1e065bb0f Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Fri, 23 Aug 2019 21:04:16 -0400
Subject: net: rds: add service level support in rds-info

>From IB specific 7.6.5 SERVICE LEVEL, Service Level (SL)
is used to identify different flows within an IBA subnet.
It is carried in the local route header of the packet.

Before this commit, run "rds-info -I". The outputs are as
below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   0  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"
After this commit, the output is as below:
"
RDS IB Connections:
 LocalAddr  RemoteAddr Tos SL  LocalDev               RemoteDev
192.2.95.3  192.2.95.1  2   2  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  1   1  fe80::21:28:1a:39  fe80::21:28:10:b9
192.2.95.3  192.2.95.1  0   0  fe80::21:28:1a:39  fe80::21:28:10:b9
"

The commit fe3475af3bdf ("net: rds: add per rds connection cache
statistics") adds cache_allocs in struct rds_info_rdma_connection
as below:
struct rds_info_rdma_connection {
...
        __u32           rdma_mr_max;
        __u32           rdma_mr_size;
        __u8            tos;
        __u32           cache_allocs;
 };
The peer struct in rds-tools of struct rds_info_rdma_connection is as
below:
struct rds_info_rdma_connection {
...
        uint32_t        rdma_mr_max;
        uint32_t        rdma_mr_size;
        uint8_t         tos;
        uint8_t         sl;
        uint32_t        cache_allocs;
};
The difference between userspace and kernel is the member variable sl.
In the kernel struct, the member variable sl is missing. This will
introduce risks. So it is necessary to use this commit to avoid this risk.

Fixes: fe3475af3bdf ("net: rds: add per rds connection cache statistics")
CC: Joe Jin <joe.jin@oracle.com>
CC: JUNXIAO_BI <junxiao.bi@oracle.com>
Suggested-by: Gerd Rausch <gerd.rausch@oracle.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h |  2 ++
 net/rds/ib.c             | 16 ++++++++++------
 net/rds/ib.h             |  1 +
 net/rds/ib_cm.c          |  3 +++
 net/rds/rdma_transport.c | 10 ++++++++--
 5 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index fd6b5f66e2c5..cba368e55863 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -250,6 +250,7 @@ struct rds_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
@@ -265,6 +266,7 @@ struct rds6_info_rdma_connection {
 	__u32		rdma_mr_max;
 	__u32		rdma_mr_size;
 	__u8		tos;
+	__u8		sl;
 	__u32		cache_allocs;
 };
 
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ec05d91aa9a2..45acab2de0cf 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -291,7 +291,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 				    void *buffer)
 {
 	struct rds_info_rdma_connection *iinfo = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -301,15 +301,16 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo->src_addr = conn->c_laddr.s6_addr32[3];
 	iinfo->dst_addr = conn->c_faddr.s6_addr32[3];
-	iinfo->tos = conn->c_tos;
+	if (ic) {
+		iinfo->tos = conn->c_tos;
+		iinfo->sl = ic->i_sl;
+	}
 
 	memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
 	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
-
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid,
 			       (union ib_gid *)&iinfo->dst_gid);
 
@@ -329,7 +330,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 				     void *buffer)
 {
 	struct rds6_info_rdma_connection *iinfo6 = buffer;
-	struct rds_ib_connection *ic;
+	struct rds_ib_connection *ic = conn->c_transport_data;
 
 	/* We will only ever look at IB transports */
 	if (conn->c_trans != &rds_ib_transport)
@@ -337,6 +338,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 
 	iinfo6->src_addr = conn->c_laddr;
 	iinfo6->dst_addr = conn->c_faddr;
+	if (ic) {
+		iinfo6->tos = conn->c_tos;
+		iinfo6->sl = ic->i_sl;
+	}
 
 	memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid));
 	memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid));
@@ -344,7 +349,6 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
 
-		ic = conn->c_transport_data;
 		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
 			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 303c6ee8bdb7..f2b558e8b5ea 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -220,6 +220,7 @@ struct rds_ib_connection {
 	/* Send/Recv vectors */
 	int			i_scq_vector;
 	int			i_rcq_vector;
+	u8			i_sl;
 };
 
 /* This assumes that atomic_t is at least 32 bits */
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index fddaa09f7b0d..233f1368162b 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -152,6 +152,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
 		  RDS_PROTOCOL_MINOR(conn->c_version),
 		  ic->i_flowctl ? ", flow control" : "");
 
+	/* receive sl from the peer */
+	ic->i_sl = ic->i_cm_id->route.path_rec->sl;
+
 	atomic_set(&ic->i_cq_quiesce, 0);
 
 	/* Init rings and fill recv. this needs to wait until protocol
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9986d6065c4d..5f741e51b4ba 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -43,6 +43,9 @@ static struct rdma_cm_id *rds_rdma_listen_id;
 static struct rdma_cm_id *rds6_rdma_listen_id;
 #endif
 
+/* Per IB specification 7.7.3, service level is a 4-bit field. */
+#define TOS_TO_SL(tos)		((tos) & 0xF)
+
 static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 					 struct rdma_cm_event *event,
 					 bool isv6)
@@ -97,10 +100,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
 			struct rds_ib_connection *ibic;
 
 			ibic = conn->c_transport_data;
-			if (ibic && ibic->i_cm_id == cm_id)
+			if (ibic && ibic->i_cm_id == cm_id) {
+				cm_id->route.path_rec[0].sl =
+					TOS_TO_SL(conn->c_tos);
 				ret = trans->cm_initiate_connect(cm_id, isv6);
-			else
+			} else {
 				rds_conn_drop(conn);
+			}
 		}
 		break;
 
-- 
cgit v1.2.3


From 1dd62c66d345a4e5dcfa5a4e81999600515b4309 Mon Sep 17 00:00:00 2001
From: Lukasz Luba <l.luba@partner.samsung.com>
Date: Wed, 5 Jun 2019 11:12:34 +0200
Subject: PM / devfreq: events: extend events by type of counted data

This patch adds posibility to choose what type of data should be counted
by the PPMU counter. Now the type comes from DT where the event has been
defined. When there is no 'event-data-type' the default value is used,
which is 'read+write data in bytes'.
It is needed when you want to know not only read+write data bytes but
i.e. only write data in byte, or number of read requests, etc.

Signed-off-by: Lukasz Luba <l.luba@partner.samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
[Updated property by MyungJoo. data_type --> event_type]
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/event/exynos-ppmu.c | 60 +++++++++++++++++++++++++------------
 include/linux/devfreq-event.h       |  6 ++++
 2 files changed, 47 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 1756c6b5cb09..87b42055e6bc 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -164,9 +164,9 @@ static int exynos_ppmu_set_event(struct devfreq_event_dev *edev)
 	if (ret < 0)
 		return ret;
 
-	/* Set the event of Read/Write data count  */
+	/* Set the event of proper data type monitoring */
 	ret = regmap_write(info->regmap, PPMU_BEVTxSEL(id),
-				PPMU_RO_DATA_CNT | PPMU_WO_DATA_CNT);
+			   edev->desc->event_type);
 	if (ret < 0)
 		return ret;
 
@@ -378,23 +378,11 @@ static int exynos_ppmu_v2_set_event(struct devfreq_event_dev *edev)
 	if (ret < 0)
 		return ret;
 
-	/* Set the event of Read/Write data count  */
-	switch (id) {
-	case PPMU_PMNCNT0:
-	case PPMU_PMNCNT1:
-	case PPMU_PMNCNT2:
-		ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id),
-				PPMU_V2_RO_DATA_CNT | PPMU_V2_WO_DATA_CNT);
-		if (ret < 0)
-			return ret;
-		break;
-	case PPMU_PMNCNT3:
-		ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id),
-				PPMU_V2_EVT3_RW_DATA_CNT);
-		if (ret < 0)
-			return ret;
-		break;
-	}
+	/* Set the event of proper data type monitoring */
+	ret = regmap_write(info->regmap, PPMU_V2_CH_EVx_TYPE(id),
+			   edev->desc->event_type);
+	if (ret < 0)
+		return ret;
 
 	/* Reset cycle counter/performance counter and enable PPMU */
 	ret = regmap_read(info->regmap, PPMU_V2_PMNC, &pmnc);
@@ -510,6 +498,7 @@ static int of_get_devfreq_events(struct device_node *np,
 	struct device_node *events_np, *node;
 	int i, j, count;
 	const struct of_device_id *of_id;
+	int ret;
 
 	events_np = of_get_child_by_name(np, "events");
 	if (!events_np) {
@@ -559,6 +548,39 @@ static int of_get_devfreq_events(struct device_node *np,
 		desc[j].driver_data = info;
 
 		of_property_read_string(node, "event-name", &desc[j].name);
+		ret = of_property_read_u32(node, "event-data-type",
+					   &desc[j].event_type);
+		if (ret) {
+			/* Set the event of proper data type counting.
+			 * Check if the data type has been defined in DT,
+			 * use default if not.
+			 */
+			if (info->ppmu_type == EXYNOS_TYPE_PPMU_V2) {
+				struct devfreq_event_dev edev;
+				int id;
+				/* Not all registers take the same value for
+				 * read+write data count.
+				 */
+				edev.desc = &desc[j];
+				id = exynos_ppmu_find_ppmu_id(&edev);
+
+				switch (id) {
+				case PPMU_PMNCNT0:
+				case PPMU_PMNCNT1:
+				case PPMU_PMNCNT2:
+					desc[j].event_type = PPMU_V2_RO_DATA_CNT
+						| PPMU_V2_WO_DATA_CNT;
+					break;
+				case PPMU_PMNCNT3:
+					desc[j].event_type =
+						PPMU_V2_EVT3_RW_DATA_CNT;
+					break;
+				}
+			} else {
+				desc[j].event_type = PPMU_RO_DATA_CNT |
+					PPMU_WO_DATA_CNT;
+			}
+		}
 
 		j++;
 	}
diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h
index 29fc0dd735ae..f14f17f8cb7f 100644
--- a/include/linux/devfreq-event.h
+++ b/include/linux/devfreq-event.h
@@ -78,14 +78,20 @@ struct devfreq_event_ops {
  * struct devfreq_event_desc - the descriptor of devfreq-event device
  *
  * @name	: the name of devfreq-event device.
+ * @event_type	: the type of the event determined and used by driver
  * @driver_data	: the private data for devfreq-event driver.
  * @ops		: the operation to control devfreq-event device.
  *
  * Each devfreq-event device is described with a this structure.
  * This structure contains the various data for devfreq-event device.
+ * The event_type describes what is going to be counted in the register.
+ * It might choose to count e.g. read requests, write data in bytes, etc.
+ * The full supported list of types is present in specyfic header in:
+ * include/dt-bindings/pmu/.
  */
 struct devfreq_event_desc {
 	const char *name;
+	u32 event_type;
 	void *driver_data;
 
 	const struct devfreq_event_ops *ops;
-- 
cgit v1.2.3


From 3109741a8d773b91eec4a1f7764c97a1176ec32d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 23 Aug 2019 19:33:30 +0200
Subject: KVM: arm/arm64: vgic: Use a single IO device per redistributor

At the moment we use 2 IO devices per GICv3 redistributor: one
one for the RD_base frame and one for the SGI_base frame.

Instead we can use a single IO device per redistributor (the 2
frames are contiguous). This saves slots on the KVM_MMIO_BUS
which is currently limited to NR_IOBUS_DEVS (1000).

This change allows to instantiate up to 512 redistributors and may
speed the guest boot with a large number of VCPUs.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 include/kvm/arm_vgic.h           |  1 -
 virt/kvm/arm/vgic/vgic-init.c    |  1 -
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 ++++++++++++----------------------------
 3 files changed, 24 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index ded50a30e2d5..af4f09c02bf1 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -314,7 +314,6 @@ struct vgic_cpu {
 	 * parts of the redistributor.
 	 */
 	struct vgic_io_device	rd_iodev;
-	struct vgic_io_device	sgi_iodev;
 	struct vgic_redist_region *rdreg;
 
 	/* Contains the attributes and gpa of the LPI pending tables. */
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 9175bfd83263..958e2f0d2207 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -193,7 +193,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	int i;
 
 	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
-	vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
 
 	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
 	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index fdcfb7ae4491..7dfd15dbb308 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -517,7 +517,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
 		VGIC_ACCESS_32bit),
 };
 
-static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
+static const struct vgic_register_region vgic_v3_rd_registers[] = {
+	/* RD_base registers */
 	REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
 		vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
 		VGIC_ACCESS_32bit),
@@ -542,44 +543,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
 	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
 		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
 		VGIC_ACCESS_32bit),
-};
-
-static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
-	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
+	/* SGI_base registers */
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
 		vgic_mmio_read_group, vgic_mmio_write_group, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
 		vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
 		vgic_mmio_read_pending, vgic_mmio_write_spending,
 		vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
 		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
 		NULL, vgic_mmio_uaccess_write_sactive,
 		4, VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
 		vgic_mmio_read_active, vgic_mmio_write_cactive,
 		NULL, vgic_mmio_uaccess_write_cactive,
 		4, VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
 		vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
 		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
 		vgic_mmio_read_config, vgic_mmio_write_config, 8,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
 };
@@ -609,9 +608,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
 	struct vgic_redist_region *rdreg;
-	gpa_t rd_base, sgi_base;
+	gpa_t rd_base;
 	int ret;
 
 	if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
@@ -633,52 +631,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	vgic_cpu->rdreg = rdreg;
 
 	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
-	sgi_base = rd_base + SZ_64K;
 
 	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
 	rd_dev->base_addr = rd_base;
 	rd_dev->iodev_type = IODEV_REDIST;
-	rd_dev->regions = vgic_v3_rdbase_registers;
-	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+	rd_dev->regions = vgic_v3_rd_registers;
+	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
 	rd_dev->redist_vcpu = vcpu;
 
 	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
-				      SZ_64K, &rd_dev->dev);
+				      2 * SZ_64K, &rd_dev->dev);
 	mutex_unlock(&kvm->slots_lock);
 
 	if (ret)
 		return ret;
 
-	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
-	sgi_dev->base_addr = sgi_base;
-	sgi_dev->iodev_type = IODEV_REDIST;
-	sgi_dev->regions = vgic_v3_sgibase_registers;
-	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
-	sgi_dev->redist_vcpu = vcpu;
-
-	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
-				      SZ_64K, &sgi_dev->dev);
-	if (ret) {
-		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-					  &rd_dev->dev);
-		goto out;
-	}
-
 	rdreg->free_index++;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return ret;
+	return 0;
 }
 
 static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
 {
 	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
 
 	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
-	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
 }
 
 static int vgic_register_all_redist_iodevs(struct kvm *kvm)
@@ -828,8 +805,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
 		iodev.base_addr = 0;
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
-		iodev.regions = vgic_v3_rdbase_registers;
-		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+		iodev.regions = vgic_v3_rd_registers;
+		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
 		iodev.base_addr = 0;
 		break;
 	}
@@ -987,21 +964,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
 			   int offset, u32 *val)
 {
 	struct vgic_io_device rd_dev = {
-		.regions = vgic_v3_rdbase_registers,
-		.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
+		.regions = vgic_v3_rd_registers,
+		.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
 	};
 
-	struct vgic_io_device sgi_dev = {
-		.regions = vgic_v3_sgibase_registers,
-		.nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
-	};
-
-	/* SGI_base is the next 64K frame after RD_base */
-	if (offset >= SZ_64K)
-		return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
-				    val);
-	else
-		return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+	return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
 }
 
 int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
-- 
cgit v1.2.3


From 9b5f684182403f2b338f797c44eca0061c797dc8 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 25 Aug 2019 07:47:30 -0700
Subject: nexthop: Fix nexthop_num_path for blackhole nexthops

Donald reported this sequence:
  ip next add id 1 blackhole
  ip next add id 2 blackhole
  ip ro add 1.1.1.1/32 nhid 1
  ip ro add 1.1.1.2/32 nhid 2

would cause a crash. Backtrace is:

[  151.302790] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  151.304043] CPU: 1 PID: 277 Comm: ip Not tainted 5.3.0-rc5+ #37
[  151.305078] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.1-1 04/01/2014
[  151.306526] RIP: 0010:fib_add_nexthop+0x8b/0x2aa
[  151.307343] Code: 35 f7 81 48 8d 14 01 c7 02 f1 f1 f1 f1 c7 42 04 01 f4 f4 f4 48 89 f2 48 c1 ea 03 65 48 8b 0c 25 28 00 00 00 48 89 4d d0 31 c9 <80> 3c 02 00 74 08 48 89 f7 e8 1a e8 53 ff be 08 00 00 00 4c 89 e7
[  151.310549] RSP: 0018:ffff888116c27340 EFLAGS: 00010246
[  151.311469] RAX: dffffc0000000000 RBX: ffff8881154ece00 RCX: 0000000000000000
[  151.312713] RDX: 0000000000000004 RSI: 0000000000000020 RDI: ffff888115649b40
[  151.313968] RBP: ffff888116c273d8 R08: ffffed10221e3757 R09: ffff888110f1bab8
[  151.315212] R10: 0000000000000001 R11: ffff888110f1bab3 R12: ffff888115649b40
[  151.316456] R13: 0000000000000020 R14: ffff888116c273b0 R15: ffff888115649b40
[  151.317707] FS:  00007f60b4d8d800(0000) GS:ffff88811ac00000(0000) knlGS:0000000000000000
[  151.319113] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  151.320119] CR2: 0000555671ffdc00 CR3: 00000001136ba005 CR4: 0000000000020ee0
[  151.321367] Call Trace:
[  151.321820]  ? fib_nexthop_info+0x635/0x635
[  151.322572]  fib_dump_info+0xaa4/0xde0
[  151.323247]  ? fib_create_info+0x2431/0x2431
[  151.324008]  ? napi_alloc_frag+0x2a/0x2a
[  151.324711]  rtmsg_fib+0x2c4/0x3be
[  151.325339]  fib_table_insert+0xe2f/0xeee
...

fib_dump_info incorrectly has nhs = 0 for blackhole nexthops, so it
believes the nexthop object is a multipath group (nhs != 1) and ends
up down the nexthop_mpath_fill_node() path which is wrong for a
blackhole.

The blackhole check in nexthop_num_path is leftover from early days
of the blackhole implementation which did not initialize the device.
In the end the design was simpler (fewer special case checks) to set
the device to loopback in nh_info, so the check in nexthop_num_path
should have been removed.

Fixes: 430a049190de ("nexthop: Add support for nexthop groups")
Reported-by: Donald Sharp <sharpd@cumulusnetworks.com>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nexthop.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 25f1f9a8419b..95f766c31c90 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -141,12 +141,6 @@ static inline unsigned int nexthop_num_path(const struct nexthop *nh)
 
 		nh_grp = rcu_dereference_rtnl(nh->nh_grp);
 		rc = nh_grp->num_nh;
-	} else {
-		const struct nh_info *nhi;
-
-		nhi = rcu_dereference_rtnl(nh->nh_info);
-		if (nhi->reject_nh)
-			rc = 0;
 	}
 
 	return rc;
-- 
cgit v1.2.3


From 3342d2f88ef9519c58d76004afb2c8fd93b3e418 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 20 Aug 2019 11:50:57 +0900
Subject: mtd: rawnand: sharpsl: add include guard to linux/mtd/sharpsl.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/sharpsl.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/sharpsl.h b/include/linux/mtd/sharpsl.h
index 01306ebe266d..d2c3cf29e0d1 100644
--- a/include/linux/mtd/sharpsl.h
+++ b/include/linux/mtd/sharpsl.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2008 Dmitry Baryshkov
  */
 
+#ifndef _MTD_SHARPSL_H
+#define _MTD_SHARPSL_H
+
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/partitions.h>
@@ -16,3 +19,5 @@ struct sharpsl_nand_platform_data {
 	unsigned int		nr_partitions;
 	const char *const	*part_parsers;
 };
+
+#endif /* _MTD_SHARPSL_H */
-- 
cgit v1.2.3


From 0c43125f2778dddb469036caaa14533846ce40bd Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 23 Aug 2019 15:39:37 +0000
Subject: mtd: nand: fix typo, s/erasablocks/eraseblocks

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index cebc38b6d6f5..0c7483843a32 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -346,7 +346,7 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
 }
 
 /**
- * nanddev_neraseblocks() - Get the total number of erasablocks
+ * nanddev_neraseblocks() - Get the total number of eraseblocks
  * @nand: NAND device
  *
  * Return: the total number of eraseblocks exposed by @nand.
-- 
cgit v1.2.3


From cda4569137b90f200bee4922d894ca49d4188681 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 26 Aug 2019 09:25:35 +0200
Subject: dt-bindings: clk: meson: add sm1 periph clock controller bindings

Update the documentation to support clock driver for the Amlogic SM1 SoC
and expose the GP1, DSU and the CPU 1, 2 & 3 clocks.

SM1 clock tree is very close, the main differences are :
- each CPU core can achieve a different frequency, albeit a common PLL
- a similar tree as the clock tree has been added for the DynamIQ Shared
  Unit
- has a new GP1 PLL used for the DynamIQ Shared Unit
- SM1 has additional clocks like for CSI, NanoQ an other components

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
---
 Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt | 1 +
 include/dt-bindings/clock/g12a-clkc.h                         | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
index 6eaa52092313..7ccecd5c02c1 100644
--- a/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-clkc.txt
@@ -11,6 +11,7 @@ Required Properties:
 		"amlogic,axg-clkc" for AXG SoC.
 		"amlogic,g12a-clkc" for G12A SoC.
 		"amlogic,g12b-clkc" for G12B SoC.
+		"amlogic,sm1-clkc" for SM1 SoC.
 - clocks : list of clock phandle, one for each entry clock-names.
 - clock-names : should contain the following:
   * "xtal": the platform xtal
diff --git a/include/dt-bindings/clock/g12a-clkc.h b/include/dt-bindings/clock/g12a-clkc.h
index 8ccc29ac7a72..0837c1a7ae49 100644
--- a/include/dt-bindings/clock/g12a-clkc.h
+++ b/include/dt-bindings/clock/g12a-clkc.h
@@ -138,5 +138,10 @@
 #define CLKID_VDEC_HEVCF			210
 #define CLKID_TS				212
 #define CLKID_CPUB_CLK				224
+#define CLKID_GP1_PLL				243
+#define CLKID_DSU_CLK				252
+#define CLKID_CPU1_CLK				253
+#define CLKID_CPU2_CLK				254
+#define CLKID_CPU3_CLK				255
 
 #endif /* __G12A_CLKC_H */
-- 
cgit v1.2.3


From a1b840adafcbdc27da2982e7305c342204b8cfd8 Mon Sep 17 00:00:00 2001
From: Ander Juaristi <a@juaristi.eus>
Date: Sat, 17 Aug 2019 13:17:52 +0200
Subject: netfilter: nf_tables: Introduce new 64-bit helper register functions

Introduce new helper functions to load/store 64-bit values onto/from
registers:

 - nft_reg_store64
 - nft_reg_load64

This commit also re-orders all these helpers from smallest to largest
target bit size.

Signed-off-by: Ander Juaristi <a@juaristi.eus>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 25 ++++++++++++++++++-------
 net/netfilter/nft_byteorder.c     |  9 +++++----
 2 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e73d16f8b870..64765140657b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -2,6 +2,7 @@
 #ifndef _NET_NF_TABLES_H
 #define _NET_NF_TABLES_H
 
+#include <asm/unaligned.h>
 #include <linux/list.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nfnetlink.h>
@@ -102,23 +103,28 @@ struct nft_regs {
 	};
 };
 
-/* Store/load an u16 or u8 integer to/from the u32 data register.
+/* Store/load an u8, u16 or u64 integer to/from the u32 data register.
  *
  * Note, when using concatenations, register allocation happens at 32-bit
  * level. So for store instruction, pad the rest part with zero to avoid
  * garbage values.
  */
 
-static inline void nft_reg_store16(u32 *dreg, u16 val)
+static inline void nft_reg_store8(u32 *dreg, u8 val)
 {
 	*dreg = 0;
-	*(u16 *)dreg = val;
+	*(u8 *)dreg = val;
 }
 
-static inline void nft_reg_store8(u32 *dreg, u8 val)
+static inline u8 nft_reg_load8(u32 *sreg)
+{
+	return *(u8 *)sreg;
+}
+
+static inline void nft_reg_store16(u32 *dreg, u16 val)
 {
 	*dreg = 0;
-	*(u8 *)dreg = val;
+	*(u16 *)dreg = val;
 }
 
 static inline u16 nft_reg_load16(u32 *sreg)
@@ -126,9 +132,14 @@ static inline u16 nft_reg_load16(u32 *sreg)
 	return *(u16 *)sreg;
 }
 
-static inline u8 nft_reg_load8(u32 *sreg)
+static inline void nft_reg_store64(u32 *dreg, u64 val)
 {
-	return *(u8 *)sreg;
+	put_unaligned(val, (u64 *)dreg);
+}
+
+static inline u64 nft_reg_load64(u32 *sreg)
+{
+	return get_unaligned((u64 *)sreg);
 }
 
 static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index e06318428ea0..12bed3f7bbc6 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -43,14 +43,15 @@ void nft_byteorder_eval(const struct nft_expr *expr,
 		switch (priv->op) {
 		case NFT_BYTEORDER_NTOH:
 			for (i = 0; i < priv->len / 8; i++) {
-				src64 = get_unaligned((u64 *)&src[i]);
-				put_unaligned_be64(src64, &dst[i]);
+				src64 = nft_reg_load64(&src[i]);
+				nft_reg_store64(&dst[i], be64_to_cpu(src64));
 			}
 			break;
 		case NFT_BYTEORDER_HTON:
 			for (i = 0; i < priv->len / 8; i++) {
-				src64 = get_unaligned_be64(&src[i]);
-				put_unaligned(src64, (u64 *)&dst[i]);
+				src64 = (__force __u64)
+					cpu_to_be64(nft_reg_load64(&src[i]));
+				nft_reg_store64(&dst[i], src64);
 			}
 			break;
 		}
-- 
cgit v1.2.3


From 63d10e12b00dfc8d8387bea9eaab376881335731 Mon Sep 17 00:00:00 2001
From: Ander Juaristi <a@juaristi.eus>
Date: Sat, 17 Aug 2019 13:17:53 +0200
Subject: netfilter: nft_meta: support for time matching

This patch introduces meta matches in the kernel for time (a UNIX timestamp),
day (a day of week, represented as an integer between 0-6), and
hour (an hour in the current day, or: number of seconds since midnight).

All values are taken as unsigned 64-bit integers.

The 'time' keyword is internally converted to nanoseconds by nft in
userspace, and hence the timestamp is taken in nanoseconds as well.

Signed-off-by: Ander Juaristi <a@juaristi.eus>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  6 +++++
 net/netfilter/nft_meta.c                 | 46 ++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 82abaa183fc3..b83b62eb4b01 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -799,6 +799,9 @@ enum nft_exthdr_attributes {
  * @NFT_META_OIFKIND: packet output interface kind name (dev->rtnl_link_ops->kind)
  * @NFT_META_BRI_IIFPVID: packet input bridge port pvid
  * @NFT_META_BRI_IIFVPROTO: packet input bridge vlan proto
+ * @NFT_META_TIME_NS: time since epoch (in nanoseconds)
+ * @NFT_META_TIME_DAY: day of week (from 0 = Sunday to 6 = Saturday)
+ * @NFT_META_TIME_HOUR: hour of day (in seconds)
  */
 enum nft_meta_keys {
 	NFT_META_LEN,
@@ -831,6 +834,9 @@ enum nft_meta_keys {
 	NFT_META_OIFKIND,
 	NFT_META_BRI_IIFPVID,
 	NFT_META_BRI_IIFVPROTO,
+	NFT_META_TIME_NS,
+	NFT_META_TIME_DAY,
+	NFT_META_TIME_HOUR,
 };
 
 /**
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index f69afb9ff3cb..317e3a9e8c5b 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -26,8 +26,36 @@
 
 #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
 
+#define NFT_META_SECS_PER_MINUTE	60
+#define NFT_META_SECS_PER_HOUR		3600
+#define NFT_META_SECS_PER_DAY		86400
+#define NFT_META_DAYS_PER_WEEK		7
+
 static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
 
+static u8 nft_meta_weekday(unsigned long secs)
+{
+	unsigned int dse;
+	u8 wday;
+
+	secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
+	dse = secs / NFT_META_SECS_PER_DAY;
+	wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
+
+	return wday;
+}
+
+static u32 nft_meta_hour(unsigned long secs)
+{
+	struct tm tm;
+
+	time64_to_tm(secs, 0, &tm);
+
+	return tm.tm_hour * NFT_META_SECS_PER_HOUR
+		+ tm.tm_min * NFT_META_SECS_PER_MINUTE
+		+ tm.tm_sec;
+}
+
 void nft_meta_get_eval(const struct nft_expr *expr,
 		       struct nft_regs *regs,
 		       const struct nft_pktinfo *pkt)
@@ -218,6 +246,15 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 			goto err;
 		strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
 		break;
+	case NFT_META_TIME_NS:
+		nft_reg_store64(dest, ktime_get_real_ns());
+		break;
+	case NFT_META_TIME_DAY:
+		nft_reg_store8(dest, nft_meta_weekday(get_seconds()));
+		break;
+	case NFT_META_TIME_HOUR:
+		*dest = nft_meta_hour(get_seconds());
+		break;
 	default:
 		WARN_ON(1);
 		goto err;
@@ -330,6 +367,15 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
 		len = sizeof(u8);
 		break;
 #endif
+	case NFT_META_TIME_NS:
+		len = sizeof(u64);
+		break;
+	case NFT_META_TIME_DAY:
+		len = sizeof(u8);
+		break;
+	case NFT_META_TIME_HOUR:
+		len = sizeof(u32);
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 65af4a10743b766e319fb53812c5926c6d98b100 Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Tue, 20 Aug 2019 15:11:46 +0200
Subject: netfilter: nfnetlink_log: add support for VLAN information

Currently, there is no vlan information (e.g. when used with a vlan aware
bridge) passed to userspache, HWHEADER will contain an 08 00 (ip) suffix
even for tagged ip packets.

Therefore, add an extra netlink attribute that passes the vlan information
to userspace similarly to 15824ab29f for nfqueue.

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_log.h | 11 ++++++
 net/netfilter/nfnetlink_log.c                | 57 ++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nfnetlink_log.h b/include/uapi/linux/netfilter/nfnetlink_log.h
index 20983cb195a0..45c8d3b027e0 100644
--- a/include/uapi/linux/netfilter/nfnetlink_log.h
+++ b/include/uapi/linux/netfilter/nfnetlink_log.h
@@ -33,6 +33,15 @@ struct nfulnl_msg_packet_timestamp {
 	__aligned_be64	usec;
 };
 
+enum nfulnl_vlan_attr {
+	NFULA_VLAN_UNSPEC,
+	NFULA_VLAN_PROTO,		/* __be16 skb vlan_proto */
+	NFULA_VLAN_TCI,			/* __be16 skb htons(vlan_tci) */
+	__NFULA_VLAN_MAX,
+};
+
+#define NFULA_VLAN_MAX (__NFULA_VLAN_MAX + 1)
+
 enum nfulnl_attr_type {
 	NFULA_UNSPEC,
 	NFULA_PACKET_HDR,
@@ -54,6 +63,8 @@ enum nfulnl_attr_type {
 	NFULA_HWLEN,			/* hardware header length */
 	NFULA_CT,                       /* nf_conntrack_netlink.h */
 	NFULA_CT_INFO,                  /* enum ip_conntrack_info */
+	NFULA_VLAN,			/* nested attribute: packet vlan info */
+	NFULA_L2HDR,			/* full L2 header */
 
 	__NFULA_MAX
 };
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index d69e1863e536..0ba020ca38e6 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -385,6 +385,57 @@ nfulnl_timer(struct timer_list *t)
 	instance_put(inst);
 }
 
+static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
+{
+	u32 size = 0;
+
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		size += nla_total_size(0); /* nested */
+		size += nla_total_size(sizeof(u16)); /* id */
+		size += nla_total_size(sizeof(u16)); /* tag */
+	}
+
+	if (skb->network_header > skb->mac_header)
+		size += nla_total_size(skb->network_header - skb->mac_header);
+
+	return size;
+}
+
+static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
+{
+	if (!skb_mac_header_was_set(skb))
+		return 0;
+
+	if (skb_vlan_tag_present(skb)) {
+		struct nlattr *nest;
+
+		nest = nla_nest_start(inst->skb, NFULA_VLAN);
+		if (!nest)
+			goto nla_put_failure;
+
+		if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) ||
+		    nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto))
+			goto nla_put_failure;
+
+		nla_nest_end(inst->skb, nest);
+	}
+
+	if (skb->mac_header < skb->network_header) {
+		int len = (int)(skb->network_header - skb->mac_header);
+
+		if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
+			goto nla_put_failure;
+	}
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 /* This is an inline function, we don't really care about a long
  * list of arguments */
 static inline int
@@ -580,6 +631,10 @@ __build_packet_message(struct nfnl_log_net *log,
 				 NFULA_CT, NFULA_CT_INFO) < 0)
 		goto nla_put_failure;
 
+	if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) &&
+	    nfulnl_put_bridge(inst, skb) < 0)
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
 		int size = nla_attr_size(data_len);
@@ -687,6 +742,8 @@ nfulnl_log_packet(struct net *net,
 				size += nfnl_ct->build_size(ct);
 		}
 	}
+	if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE)
+		size += nfulnl_get_bridge_size(skb);
 
 	qthreshold = inst->qthreshold;
 	/* per-rule qthreshold overrides per-instance */
-- 
cgit v1.2.3


From 3cdb9446a117d5d63af823bde6fe6babc312e77b Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 16 Jan 2018 22:19:00 +0200
Subject: thunderbolt: Add support for Intel Ice Lake

The Thunderbolt controller is integrated into the Ice Lake CPU itself
and requires special flows to power it on and off using force power bit
in NHI VSEC registers. Runtime PM (RTD3) and Sx flows also differ from
the discrete solutions. Now the firmware notifies the driver whether
RTD3 entry or exit are possible. The driver is responsible of sending
Go2Sx command through link controller mailbox when system enters Sx
states (suspend-to-mem/disk). Rest of the ICM firwmare flows follow
Titan Ridge.

Signed-off-by: Raanan Avargil <raanan.avargil@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Yehezkel Bernat <YehezkelShB@gmail.com>
Tested-by: Mario Limonciello <mario.limonciello@dell.com>
---
 drivers/thunderbolt/Makefile   |   2 +-
 drivers/thunderbolt/icm.c      | 176 +++++++++++++++++++++++++++++++++++++---
 drivers/thunderbolt/nhi.c      | 112 ++++++++++++++++++++++++--
 drivers/thunderbolt/nhi.h      |  22 +++++
 drivers/thunderbolt/nhi_ops.c  | 179 +++++++++++++++++++++++++++++++++++++++++
 drivers/thunderbolt/nhi_regs.h |  37 +++++++++
 drivers/thunderbolt/switch.c   |   2 +
 drivers/thunderbolt/tb_msgs.h  |  16 +++-
 include/linux/thunderbolt.h    |   2 +
 9 files changed, 526 insertions(+), 22 deletions(-)
 create mode 100644 drivers/thunderbolt/nhi_ops.c

(limited to 'include')

diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile
index 3f55cb3c81b2..001187c577bf 100644
--- a/drivers/thunderbolt/Makefile
+++ b/drivers/thunderbolt/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-${CONFIG_THUNDERBOLT} := thunderbolt.o
-thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel.o eeprom.o
+thunderbolt-objs := nhi.o nhi_ops.o ctl.o tb.o switch.o cap.o path.o tunnel.o eeprom.o
 thunderbolt-objs += domain.o dma_port.o icm.o property.o xdomain.o lc.o
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index e9835ab35465..245588f691e7 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -56,16 +56,19 @@
  * @max_boot_acl: Maximum number of preboot ACL entries (%0 if not supported)
  * @rpm: Does the controller support runtime PM (RTD3)
  * @can_upgrade_nvm: Can the NVM firmware be upgrade on this controller
+ * @veto: Is RTD3 veto in effect
  * @is_supported: Checks if we can support ICM on this controller
  * @cio_reset: Trigger CIO reset
  * @get_mode: Read and return the ICM firmware mode (optional)
  * @get_route: Find a route string for given switch
  * @save_devices: Ask ICM to save devices to ACL when suspending (optional)
  * @driver_ready: Send driver ready message to ICM
+ * @set_uuid: Set UUID for the root switch (optional)
  * @device_connected: Handle device connected ICM message
  * @device_disconnected: Handle device disconnected ICM message
  * @xdomain_connected - Handle XDomain connected ICM message
  * @xdomain_disconnected - Handle XDomain disconnected ICM message
+ * @rtd3_veto: Handle RTD3 veto notification ICM message
  */
 struct icm {
 	struct mutex request_lock;
@@ -76,6 +79,7 @@ struct icm {
 	bool safe_mode;
 	bool rpm;
 	bool can_upgrade_nvm;
+	bool veto;
 	bool (*is_supported)(struct tb *tb);
 	int (*cio_reset)(struct tb *tb);
 	int (*get_mode)(struct tb *tb);
@@ -84,6 +88,7 @@ struct icm {
 	int (*driver_ready)(struct tb *tb,
 			    enum tb_security_level *security_level,
 			    size_t *nboot_acl, bool *rpm);
+	void (*set_uuid)(struct tb *tb);
 	void (*device_connected)(struct tb *tb,
 				 const struct icm_pkg_header *hdr);
 	void (*device_disconnected)(struct tb *tb,
@@ -92,6 +97,7 @@ struct icm {
 				  const struct icm_pkg_header *hdr);
 	void (*xdomain_disconnected)(struct tb *tb,
 				     const struct icm_pkg_header *hdr);
+	void (*rtd3_veto)(struct tb *tb, const struct icm_pkg_header *hdr);
 };
 
 struct icm_notification {
@@ -296,6 +302,43 @@ static int icm_request(struct tb *tb, const void *request, size_t request_size,
 	return -ETIMEDOUT;
 }
 
+/*
+ * If rescan is queued to run (we are resuming), postpone it to give the
+ * firmware some more time to send device connected notifications for next
+ * devices in the chain.
+ */
+static void icm_postpone_rescan(struct tb *tb)
+{
+	struct icm *icm = tb_priv(tb);
+
+	if (delayed_work_pending(&icm->rescan_work))
+		mod_delayed_work(tb->wq, &icm->rescan_work,
+				 msecs_to_jiffies(500));
+}
+
+static void icm_veto_begin(struct tb *tb)
+{
+	struct icm *icm = tb_priv(tb);
+
+	if (!icm->veto) {
+		icm->veto = true;
+		/* Keep the domain powered while veto is in effect */
+		pm_runtime_get(&tb->dev);
+	}
+}
+
+static void icm_veto_end(struct tb *tb)
+{
+	struct icm *icm = tb_priv(tb);
+
+	if (icm->veto) {
+		icm->veto = false;
+		/* Allow the domain suspend now */
+		pm_runtime_mark_last_busy(&tb->dev);
+		pm_runtime_put_autosuspend(&tb->dev);
+	}
+}
+
 static bool icm_fr_is_supported(struct tb *tb)
 {
 	return !x86_apple_machine;
@@ -519,14 +562,16 @@ static int icm_fr_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
 	return 0;
 }
 
-static void add_switch(struct tb_switch *parent_sw, u64 route,
-		       const uuid_t *uuid, const u8 *ep_name,
-		       size_t ep_name_size, u8 connection_id, u8 connection_key,
-		       u8 link, u8 depth, enum tb_security_level security_level,
-		       bool authorized, bool boot)
+static struct tb_switch *add_switch(struct tb_switch *parent_sw, u64 route,
+				    const uuid_t *uuid, const u8 *ep_name,
+				    size_t ep_name_size, u8 connection_id,
+				    u8 connection_key, u8 link, u8 depth,
+				    enum tb_security_level security_level,
+				    bool authorized, bool boot)
 {
 	const struct intel_vss *vss;
 	struct tb_switch *sw;
+	int ret;
 
 	pm_runtime_get_sync(&parent_sw->dev);
 
@@ -557,14 +602,18 @@ static void add_switch(struct tb_switch *parent_sw, u64 route,
 	tb_port_at(route, parent_sw)->remote = tb_upstream_port(sw);
 	tb_upstream_port(sw)->remote = tb_port_at(route, parent_sw);
 
-	if (tb_switch_add(sw)) {
+	ret = tb_switch_add(sw);
+	if (ret) {
 		tb_port_at(tb_route(sw), parent_sw)->remote = NULL;
 		tb_switch_put(sw);
+		sw = ERR_PTR(ret);
 	}
 
 out:
 	pm_runtime_mark_last_busy(&parent_sw->dev);
 	pm_runtime_put_autosuspend(&parent_sw->dev);
+
+	return sw;
 }
 
 static void update_switch(struct tb_switch *parent_sw, struct tb_switch *sw,
@@ -656,6 +705,8 @@ icm_fr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 	u64 route;
 	int ret;
 
+	icm_postpone_rescan(tb);
+
 	link = pkg->link_info & ICM_LINK_INFO_LINK_MASK;
 	depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >>
 		ICM_LINK_INFO_DEPTH_SHIFT;
@@ -1086,7 +1137,8 @@ static int icm_tr_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
 }
 
 static void
-icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
+__icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr,
+			  bool force_rtd3)
 {
 	const struct icm_tr_event_device_connected *pkg =
 		(const struct icm_tr_event_device_connected *)hdr;
@@ -1096,6 +1148,8 @@ icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 	bool authorized, boot;
 	u64 route;
 
+	icm_postpone_rescan(tb);
+
 	/*
 	 * Currently we don't use the QoS information coming with the
 	 * device connected message so simply just ignore that extra
@@ -1151,13 +1205,21 @@ icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
 		return;
 	}
 
-	add_switch(parent_sw, route, &pkg->ep_uuid, (const u8 *)pkg->ep_name,
-		   sizeof(pkg->ep_name), pkg->connection_id,
-		   0, 0, 0, security_level, authorized, boot);
+	sw = add_switch(parent_sw, route, &pkg->ep_uuid, (const u8 *)pkg->ep_name,
+			sizeof(pkg->ep_name), pkg->connection_id, 0, 0, 0,
+			security_level, authorized, boot);
+	if (!IS_ERR(sw) && force_rtd3)
+		sw->rpm = true;
 
 	tb_switch_put(parent_sw);
 }
 
+static void
+icm_tr_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
+{
+	__icm_tr_device_connected(tb, hdr, false);
+}
+
 static void
 icm_tr_device_disconnected(struct tb *tb, const struct icm_pkg_header *hdr)
 {
@@ -1468,6 +1530,61 @@ static int icm_ar_set_boot_acl(struct tb *tb, const uuid_t *uuids,
 	return 0;
 }
 
+static int
+icm_icl_driver_ready(struct tb *tb, enum tb_security_level *security_level,
+		    size_t *nboot_acl, bool *rpm)
+{
+	struct icm_tr_pkg_driver_ready_response reply;
+	struct icm_pkg_driver_ready request = {
+		.hdr.code = ICM_DRIVER_READY,
+	};
+	int ret;
+
+	memset(&reply, 0, sizeof(reply));
+	ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply),
+			  1, 20000);
+	if (ret)
+		return ret;
+
+	/* Ice Lake always supports RTD3 */
+	if (rpm)
+		*rpm = true;
+
+	return 0;
+}
+
+static void icm_icl_set_uuid(struct tb *tb)
+{
+	struct tb_nhi *nhi = tb->nhi;
+	u32 uuid[4];
+
+	pci_read_config_dword(nhi->pdev, VS_CAP_10, &uuid[0]);
+	pci_read_config_dword(nhi->pdev, VS_CAP_11, &uuid[1]);
+	uuid[2] = 0xffffffff;
+	uuid[3] = 0xffffffff;
+
+	tb->root_switch->uuid = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+}
+
+static void
+icm_icl_device_connected(struct tb *tb, const struct icm_pkg_header *hdr)
+{
+	__icm_tr_device_connected(tb, hdr, true);
+}
+
+static void icm_icl_rtd3_veto(struct tb *tb, const struct icm_pkg_header *hdr)
+{
+	const struct icm_icl_event_rtd3_veto *pkg =
+		(const struct icm_icl_event_rtd3_veto *)hdr;
+
+	tb_dbg(tb, "ICM rtd3 veto=0x%08x\n", pkg->veto_reason);
+
+	if (pkg->veto_reason)
+		icm_veto_begin(tb);
+	else
+		icm_veto_end(tb);
+}
+
 static void icm_handle_notification(struct work_struct *work)
 {
 	struct icm_notification *n = container_of(work, typeof(*n), work);
@@ -1495,6 +1612,9 @@ static void icm_handle_notification(struct work_struct *work)
 		case ICM_EVENT_XDOMAIN_DISCONNECTED:
 			icm->xdomain_disconnected(tb, n->pkg);
 			break;
+		case ICM_EVENT_RTD3_VETO:
+			icm->rtd3_veto(tb, n->pkg);
+			break;
 		}
 	}
 
@@ -1853,6 +1973,13 @@ static void icm_complete(struct tb *tb)
 	if (tb->nhi->going_away)
 		return;
 
+	/*
+	 * If RTD3 was vetoed before we entered system suspend allow it
+	 * again now before driver ready is sent. Firmware sends a new RTD3
+	 * veto if it is still the case after we have sent it driver ready
+	 * command.
+	 */
+	icm_veto_end(tb);
 	icm_unplug_children(tb->root_switch);
 
 	/*
@@ -1918,6 +2045,9 @@ static int icm_start(struct tb *tb)
 	tb->root_switch->no_nvm_upgrade = !icm->can_upgrade_nvm;
 	tb->root_switch->rpm = icm->rpm;
 
+	if (icm->set_uuid)
+		icm->set_uuid(tb);
+
 	ret = tb_switch_add(tb->root_switch);
 	if (ret) {
 		tb_switch_put(tb->root_switch);
@@ -2002,6 +2132,19 @@ static const struct tb_cm_ops icm_tr_ops = {
 	.disconnect_xdomain_paths = icm_tr_disconnect_xdomain_paths,
 };
 
+/* Ice Lake */
+static const struct tb_cm_ops icm_icl_ops = {
+	.driver_ready = icm_driver_ready,
+	.start = icm_start,
+	.stop = icm_stop,
+	.complete = icm_complete,
+	.runtime_suspend = icm_runtime_suspend,
+	.runtime_resume = icm_runtime_resume,
+	.handle_event = icm_handle_event,
+	.approve_xdomain_paths = icm_tr_approve_xdomain_paths,
+	.disconnect_xdomain_paths = icm_tr_disconnect_xdomain_paths,
+};
+
 struct tb *icm_probe(struct tb_nhi *nhi)
 {
 	struct icm *icm;
@@ -2070,6 +2213,19 @@ struct tb *icm_probe(struct tb_nhi *nhi)
 		icm->xdomain_disconnected = icm_tr_xdomain_disconnected;
 		tb->cm_ops = &icm_tr_ops;
 		break;
+
+	case PCI_DEVICE_ID_INTEL_ICL_NHI0:
+	case PCI_DEVICE_ID_INTEL_ICL_NHI1:
+		icm->is_supported = icm_ar_is_supported;
+		icm->driver_ready = icm_icl_driver_ready;
+		icm->set_uuid = icm_icl_set_uuid;
+		icm->device_connected = icm_icl_device_connected;
+		icm->device_disconnected = icm_tr_device_disconnected;
+		icm->xdomain_connected = icm_tr_xdomain_connected;
+		icm->xdomain_disconnected = icm_tr_xdomain_disconnected;
+		icm->rtd3_veto = icm_icl_rtd3_veto;
+		tb->cm_ops = &icm_icl_ops;
+		break;
 	}
 
 	if (!icm->is_supported || !icm->is_supported(tb)) {
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 9c782706e652..641b21b54460 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -16,6 +16,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/delay.h>
+#include <linux/property.h>
 
 #include "nhi.h"
 #include "nhi_regs.h"
@@ -859,12 +860,52 @@ static irqreturn_t nhi_msi(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int nhi_suspend_noirq(struct device *dev)
+static int __nhi_suspend_noirq(struct device *dev, bool wakeup)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct tb *tb = pci_get_drvdata(pdev);
+	struct tb_nhi *nhi = tb->nhi;
+	int ret;
+
+	ret = tb_domain_suspend_noirq(tb);
+	if (ret)
+		return ret;
+
+	if (nhi->ops && nhi->ops->suspend_noirq) {
+		ret = nhi->ops->suspend_noirq(tb->nhi, wakeup);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int nhi_suspend_noirq(struct device *dev)
+{
+	return __nhi_suspend_noirq(dev, device_may_wakeup(dev));
+}
+
+static bool nhi_wake_supported(struct pci_dev *pdev)
+{
+	u8 val;
+
+	/*
+	 * If power rails are sustainable for wakeup from S4 this
+	 * property is set by the BIOS.
+	 */
+	if (device_property_read_u8(&pdev->dev, "WAKE_SUPPORTED", &val))
+		return !!val;
+
+	return true;
+}
+
+static int nhi_poweroff_noirq(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool wakeup;
 
-	return tb_domain_suspend_noirq(tb);
+	wakeup = device_may_wakeup(dev) && nhi_wake_supported(pdev);
+	return __nhi_suspend_noirq(dev, wakeup);
 }
 
 static void nhi_enable_int_throttling(struct tb_nhi *nhi)
@@ -887,16 +928,24 @@ static int nhi_resume_noirq(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct tb *tb = pci_get_drvdata(pdev);
+	struct tb_nhi *nhi = tb->nhi;
+	int ret;
 
 	/*
 	 * Check that the device is still there. It may be that the user
 	 * unplugged last device which causes the host controller to go
 	 * away on PCs.
 	 */
-	if (!pci_device_is_present(pdev))
-		tb->nhi->going_away = true;
-	else
+	if (!pci_device_is_present(pdev)) {
+		nhi->going_away = true;
+	} else {
+		if (nhi->ops && nhi->ops->resume_noirq) {
+			ret = nhi->ops->resume_noirq(nhi);
+			if (ret)
+				return ret;
+		}
 		nhi_enable_int_throttling(tb->nhi);
+	}
 
 	return tb_domain_resume_noirq(tb);
 }
@@ -929,16 +978,35 @@ static int nhi_runtime_suspend(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct tb *tb = pci_get_drvdata(pdev);
+	struct tb_nhi *nhi = tb->nhi;
+	int ret;
+
+	ret = tb_domain_runtime_suspend(tb);
+	if (ret)
+		return ret;
 
-	return tb_domain_runtime_suspend(tb);
+	if (nhi->ops && nhi->ops->runtime_suspend) {
+		ret = nhi->ops->runtime_suspend(tb->nhi);
+		if (ret)
+			return ret;
+	}
+	return 0;
 }
 
 static int nhi_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct tb *tb = pci_get_drvdata(pdev);
+	struct tb_nhi *nhi = tb->nhi;
+	int ret;
+
+	if (nhi->ops && nhi->ops->runtime_resume) {
+		ret = nhi->ops->runtime_resume(nhi);
+		if (ret)
+			return ret;
+	}
 
-	nhi_enable_int_throttling(tb->nhi);
+	nhi_enable_int_throttling(nhi);
 	return tb_domain_runtime_resume(tb);
 }
 
@@ -966,6 +1034,9 @@ static void nhi_shutdown(struct tb_nhi *nhi)
 		flush_work(&nhi->interrupt_work);
 	}
 	ida_destroy(&nhi->msix_ida);
+
+	if (nhi->ops && nhi->ops->shutdown)
+		nhi->ops->shutdown(nhi);
 }
 
 static int nhi_init_msi(struct tb_nhi *nhi)
@@ -1010,12 +1081,27 @@ static int nhi_init_msi(struct tb_nhi *nhi)
 	return 0;
 }
 
+static bool nhi_imr_valid(struct pci_dev *pdev)
+{
+	u8 val;
+
+	if (!device_property_read_u8(&pdev->dev, "IMR_VALID", &val))
+		return !!val;
+
+	return true;
+}
+
 static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct tb_nhi *nhi;
 	struct tb *tb;
 	int res;
 
+	if (!nhi_imr_valid(pdev)) {
+		dev_warn(&pdev->dev, "firmware image not valid, aborting\n");
+		return -ENODEV;
+	}
+
 	res = pcim_enable_device(pdev);
 	if (res) {
 		dev_err(&pdev->dev, "cannot enable PCI device, aborting\n");
@@ -1033,6 +1119,7 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	nhi->pdev = pdev;
+	nhi->ops = (const struct tb_nhi_ops *)id->driver_data;
 	/* cannot fail - table is allocated bin pcim_iomap_regions */
 	nhi->iobase = pcim_iomap_table(pdev)[0];
 	nhi->hop_count = ioread32(nhi->iobase + REG_HOP_COUNT) & 0x3ff;
@@ -1065,6 +1152,12 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_master(pdev);
 
+	if (nhi->ops && nhi->ops->init) {
+		res = nhi->ops->init(nhi);
+		if (res)
+			return res;
+	}
+
 	tb = icm_probe(nhi);
 	if (!tb)
 		tb = tb_probe(nhi);
@@ -1125,6 +1218,7 @@ static const struct dev_pm_ops nhi_pm_ops = {
 	.restore_noirq = nhi_resume_noirq,
 	.suspend = nhi_suspend,
 	.freeze = nhi_suspend,
+	.poweroff_noirq = nhi_poweroff_noirq,
 	.poweroff = nhi_suspend,
 	.complete = nhi_complete,
 	.runtime_suspend = nhi_runtime_suspend,
@@ -1172,6 +1266,10 @@ static struct pci_device_id nhi_ids[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ALPINE_RIDGE_C_USBONLY_NHI) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_NHI) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_NHI) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICL_NHI0),
+	  .driver_data = (kernel_ulong_t)&icl_nhi_ops },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICL_NHI1),
+	  .driver_data = (kernel_ulong_t)&icl_nhi_ops },
 
 	{ 0,}
 };
diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
index 1b5d47ecd3ed..b7b973949f8e 100644
--- a/drivers/thunderbolt/nhi.h
+++ b/drivers/thunderbolt/nhi.h
@@ -30,6 +30,26 @@ enum nhi_mailbox_cmd {
 int nhi_mailbox_cmd(struct tb_nhi *nhi, enum nhi_mailbox_cmd cmd, u32 data);
 enum nhi_fw_mode nhi_mailbox_mode(struct tb_nhi *nhi);
 
+/**
+ * struct tb_nhi_ops - NHI specific optional operations
+ * @init: NHI specific initialization
+ * @suspend_noirq: NHI specific suspend_noirq hook
+ * @resume_noirq: NHI specific resume_noirq hook
+ * @runtime_suspend: NHI specific runtime_suspend hook
+ * @runtime_resume: NHI specific runtime_resume hook
+ * @shutdown: NHI specific shutdown
+ */
+struct tb_nhi_ops {
+	int (*init)(struct tb_nhi *nhi);
+	int (*suspend_noirq)(struct tb_nhi *nhi, bool wakeup);
+	int (*resume_noirq)(struct tb_nhi *nhi);
+	int (*runtime_suspend)(struct tb_nhi *nhi);
+	int (*runtime_resume)(struct tb_nhi *nhi);
+	void (*shutdown)(struct tb_nhi *nhi);
+};
+
+extern const struct tb_nhi_ops icl_nhi_ops;
+
 /*
  * PCI IDs used in this driver from Win Ridge forward. There is no
  * need for the PCI quirk anymore as we will use ICM also on Apple
@@ -51,5 +71,7 @@ enum nhi_fw_mode nhi_mailbox_mode(struct tb_nhi *nhi);
 #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_BRIDGE	0x15ea
 #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_NHI		0x15eb
 #define PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_BRIDGE	0x15ef
+#define PCI_DEVICE_ID_INTEL_ICL_NHI1			0x8a0d
+#define PCI_DEVICE_ID_INTEL_ICL_NHI0			0x8a17
 
 #endif
diff --git a/drivers/thunderbolt/nhi_ops.c b/drivers/thunderbolt/nhi_ops.c
new file mode 100644
index 000000000000..61cd09cef943
--- /dev/null
+++ b/drivers/thunderbolt/nhi_ops.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHI specific operations
+ *
+ * Copyright (C) 2019, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/suspend.h>
+
+#include "nhi.h"
+#include "nhi_regs.h"
+#include "tb.h"
+
+/* Ice Lake specific NHI operations */
+
+#define ICL_LC_MAILBOX_TIMEOUT	500 /* ms */
+
+static int check_for_device(struct device *dev, void *data)
+{
+	return tb_is_switch(dev);
+}
+
+static bool icl_nhi_is_device_connected(struct tb_nhi *nhi)
+{
+	struct tb *tb = pci_get_drvdata(nhi->pdev);
+	int ret;
+
+	ret = device_for_each_child(&tb->root_switch->dev, NULL,
+				    check_for_device);
+	return ret > 0;
+}
+
+static int icl_nhi_force_power(struct tb_nhi *nhi, bool power)
+{
+	u32 vs_cap;
+
+	/*
+	 * The Thunderbolt host controller is present always in Ice Lake
+	 * but the firmware may not be loaded and running (depending
+	 * whether there is device connected and so on). Each time the
+	 * controller is used we need to "Force Power" it first and wait
+	 * for the firmware to indicate it is up and running. This "Force
+	 * Power" is really not about actually powering on/off the
+	 * controller so it is accessible even if "Force Power" is off.
+	 *
+	 * The actual power management happens inside shared ACPI power
+	 * resources using standard ACPI methods.
+	 */
+	pci_read_config_dword(nhi->pdev, VS_CAP_22, &vs_cap);
+	if (power) {
+		vs_cap &= ~VS_CAP_22_DMA_DELAY_MASK;
+		vs_cap |= 0x22 << VS_CAP_22_DMA_DELAY_SHIFT;
+		vs_cap |= VS_CAP_22_FORCE_POWER;
+	} else {
+		vs_cap &= ~VS_CAP_22_FORCE_POWER;
+	}
+	pci_write_config_dword(nhi->pdev, VS_CAP_22, vs_cap);
+
+	if (power) {
+		unsigned int retries = 10;
+		u32 val;
+
+		/* Wait until the firmware tells it is up and running */
+		do {
+			pci_read_config_dword(nhi->pdev, VS_CAP_9, &val);
+			if (val & VS_CAP_9_FW_READY)
+				return 0;
+			msleep(250);
+		} while (--retries);
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void icl_nhi_lc_mailbox_cmd(struct tb_nhi *nhi, enum icl_lc_mailbox_cmd cmd)
+{
+	u32 data;
+
+	pci_read_config_dword(nhi->pdev, VS_CAP_19, &data);
+	data = (cmd << VS_CAP_19_CMD_SHIFT) & VS_CAP_19_CMD_MASK;
+	pci_write_config_dword(nhi->pdev, VS_CAP_19, data | VS_CAP_19_VALID);
+}
+
+static int icl_nhi_lc_mailbox_cmd_complete(struct tb_nhi *nhi, int timeout)
+{
+	unsigned long end;
+	u32 data;
+
+	if (!timeout)
+		goto clear;
+
+	end = jiffies + msecs_to_jiffies(timeout);
+	do {
+		pci_read_config_dword(nhi->pdev, VS_CAP_18, &data);
+		if (data & VS_CAP_18_DONE)
+			goto clear;
+		msleep(100);
+	} while (time_before(jiffies, end));
+
+	return -ETIMEDOUT;
+
+clear:
+	/* Clear the valid bit */
+	pci_write_config_dword(nhi->pdev, VS_CAP_19, 0);
+	return 0;
+}
+
+static void icl_nhi_set_ltr(struct tb_nhi *nhi)
+{
+	u32 max_ltr, ltr;
+
+	pci_read_config_dword(nhi->pdev, VS_CAP_16, &max_ltr);
+	max_ltr &= 0xffff;
+	/* Program the same value for both snoop and no-snoop */
+	ltr = max_ltr << 16 | max_ltr;
+	pci_write_config_dword(nhi->pdev, VS_CAP_15, ltr);
+}
+
+static int icl_nhi_suspend(struct tb_nhi *nhi)
+{
+	int ret;
+
+	if (icl_nhi_is_device_connected(nhi))
+		return 0;
+
+	/*
+	 * If there is no device connected we need to perform both: a
+	 * handshake through LC mailbox and force power down before
+	 * entering D3.
+	 */
+	icl_nhi_lc_mailbox_cmd(nhi, ICL_LC_PREPARE_FOR_RESET);
+	ret = icl_nhi_lc_mailbox_cmd_complete(nhi, ICL_LC_MAILBOX_TIMEOUT);
+	if (ret)
+		return ret;
+
+	return icl_nhi_force_power(nhi, false);
+}
+
+static int icl_nhi_suspend_noirq(struct tb_nhi *nhi, bool wakeup)
+{
+	enum icl_lc_mailbox_cmd cmd;
+
+	if (!pm_suspend_via_firmware())
+		return icl_nhi_suspend(nhi);
+
+	cmd = wakeup ? ICL_LC_GO2SX : ICL_LC_GO2SX_NO_WAKE;
+	icl_nhi_lc_mailbox_cmd(nhi, cmd);
+	return icl_nhi_lc_mailbox_cmd_complete(nhi, ICL_LC_MAILBOX_TIMEOUT);
+}
+
+static int icl_nhi_resume(struct tb_nhi *nhi)
+{
+	int ret;
+
+	ret = icl_nhi_force_power(nhi, true);
+	if (ret)
+		return ret;
+
+	icl_nhi_set_ltr(nhi);
+	return 0;
+}
+
+static void icl_nhi_shutdown(struct tb_nhi *nhi)
+{
+	icl_nhi_force_power(nhi, false);
+}
+
+const struct tb_nhi_ops icl_nhi_ops = {
+	.init = icl_nhi_resume,
+	.suspend_noirq = icl_nhi_suspend_noirq,
+	.resume_noirq = icl_nhi_resume,
+	.runtime_suspend = icl_nhi_suspend,
+	.runtime_resume = icl_nhi_resume,
+	.shutdown = icl_nhi_shutdown,
+};
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index a60bd98c1d04..0d4970dcef84 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -124,4 +124,41 @@ struct ring_desc {
 #define REG_FW_STS_ICM_EN_INVERT	BIT(1)
 #define REG_FW_STS_ICM_EN		BIT(0)
 
+/* ICL NHI VSEC registers */
+
+/* FW ready */
+#define VS_CAP_9			0xc8
+#define VS_CAP_9_FW_READY		BIT(31)
+/* UUID */
+#define VS_CAP_10			0xcc
+#define VS_CAP_11			0xd0
+/* LTR */
+#define VS_CAP_15			0xe0
+#define VS_CAP_16			0xe4
+/* TBT2PCIe */
+#define VS_CAP_18			0xec
+#define VS_CAP_18_DONE			BIT(0)
+/* PCIe2TBT */
+#define VS_CAP_19			0xf0
+#define VS_CAP_19_VALID			BIT(0)
+#define VS_CAP_19_CMD_SHIFT		1
+#define VS_CAP_19_CMD_MASK		GENMASK(7, 1)
+/* Force power */
+#define VS_CAP_22			0xfc
+#define VS_CAP_22_FORCE_POWER		BIT(1)
+#define VS_CAP_22_DMA_DELAY_MASK	GENMASK(31, 24)
+#define VS_CAP_22_DMA_DELAY_SHIFT	24
+
+/**
+ * enum icl_lc_mailbox_cmd - ICL specific LC mailbox commands
+ * @ICL_LC_GO2SX: Ask LC to enter Sx without wake
+ * @ICL_LC_GO2SX_NO_WAKE: Ask LC to enter Sx with wake
+ * @ICL_LC_PREPARE_FOR_RESET: Prepare LC for reset
+ */
+enum icl_lc_mailbox_cmd {
+	ICL_LC_GO2SX = 0x02,
+	ICL_LC_GO2SX_NO_WAKE = 0x03,
+	ICL_LC_PREPARE_FOR_RESET = 0x21,
+};
+
 #endif
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 531f11fecf75..410bf1bceeee 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -1470,6 +1470,8 @@ static int tb_switch_get_generation(struct tb_switch *sw)
 	case PCI_DEVICE_ID_INTEL_TITAN_RIDGE_2C_BRIDGE:
 	case PCI_DEVICE_ID_INTEL_TITAN_RIDGE_4C_BRIDGE:
 	case PCI_DEVICE_ID_INTEL_TITAN_RIDGE_DD_BRIDGE:
+	case PCI_DEVICE_ID_INTEL_ICL_NHI0:
+	case PCI_DEVICE_ID_INTEL_ICL_NHI1:
 		return 3;
 
 	default:
diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h
index afbe1d29bb03..4b641e4ee0c5 100644
--- a/drivers/thunderbolt/tb_msgs.h
+++ b/drivers/thunderbolt/tb_msgs.h
@@ -104,10 +104,11 @@ enum icm_pkg_code {
 };
 
 enum icm_event_code {
-	ICM_EVENT_DEVICE_CONNECTED = 3,
-	ICM_EVENT_DEVICE_DISCONNECTED = 4,
-	ICM_EVENT_XDOMAIN_CONNECTED = 6,
-	ICM_EVENT_XDOMAIN_DISCONNECTED = 7,
+	ICM_EVENT_DEVICE_CONNECTED = 0x3,
+	ICM_EVENT_DEVICE_DISCONNECTED = 0x4,
+	ICM_EVENT_XDOMAIN_CONNECTED = 0x6,
+	ICM_EVENT_XDOMAIN_DISCONNECTED = 0x7,
+	ICM_EVENT_RTD3_VETO = 0xa,
 };
 
 struct icm_pkg_header {
@@ -463,6 +464,13 @@ struct icm_tr_pkg_disconnect_xdomain_response {
 	uuid_t remote_uuid;
 };
 
+/* Ice Lake messages */
+
+struct icm_icl_event_rtd3_veto {
+	struct icm_pkg_header hdr;
+	u32 veto_reason;
+};
+
 /* XDomain messages */
 
 struct tb_xdomain_header {
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index 2d7e012db03f..ece782ef5466 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -429,6 +429,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc)
  * @lock: Must be held during ring creation/destruction. Is acquired by
  *	  interrupt_work when dispatching interrupts to individual rings.
  * @pdev: Pointer to the PCI device
+ * @ops: NHI specific optional ops
  * @iobase: MMIO space of the NHI
  * @tx_rings: All Tx rings available on this host controller
  * @rx_rings: All Rx rings available on this host controller
@@ -442,6 +443,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc)
 struct tb_nhi {
 	spinlock_t lock;
 	struct pci_dev *pdev;
+	const struct tb_nhi_ops *ops;
 	void __iomem *iobase;
 	struct tb_ring **tx_rings;
 	struct tb_ring **rx_rings;
-- 
cgit v1.2.3


From 1666faedb567d03cde1d656ae24c6cc253e67373 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 19 Aug 2019 13:07:22 +0300
Subject: software node: Add software_node_find_by_name()

Function that searches software nodes by node name.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/swnode.c    | 37 +++++++++++++++++++++++++++++++++++++
 include/linux/property.h |  4 ++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
index e7b3aa3bd55a..ee2a405cca9a 100644
--- a/drivers/base/swnode.c
+++ b/drivers/base/swnode.c
@@ -620,6 +620,43 @@ static const struct fwnode_operations software_node_ops = {
 
 /* -------------------------------------------------------------------------- */
 
+/**
+ * software_node_find_by_name - Find software node by name
+ * @parent: Parent of the software node
+ * @name: Name of the software node
+ *
+ * The function will find a node that is child of @parent and that is named
+ * @name. If no node is found, the function returns NULL.
+ *
+ * NOTE: you will need to drop the reference with fwnode_handle_put() after use.
+ */
+const struct software_node *
+software_node_find_by_name(const struct software_node *parent, const char *name)
+{
+	struct swnode *swnode;
+	struct kobject *k;
+
+	if (!name)
+		return NULL;
+
+	spin_lock(&swnode_kset->list_lock);
+
+	list_for_each_entry(k, &swnode_kset->list, entry) {
+		swnode = kobj_to_swnode(k);
+		if (parent == swnode->node->parent && swnode->node->name &&
+		    !strcmp(name, swnode->node->name)) {
+			kobject_get(&swnode->kobj);
+			break;
+		}
+		swnode = NULL;
+	}
+
+	spin_unlock(&swnode_kset->list_lock);
+
+	return swnode ? swnode->node : NULL;
+}
+EXPORT_SYMBOL_GPL(software_node_find_by_name);
+
 static int
 software_node_register_properties(struct software_node *node,
 				  const struct property_entry *properties)
diff --git a/include/linux/property.h b/include/linux/property.h
index 5a910ad79591..9b3d4ca3a73a 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -421,6 +421,10 @@ bool is_software_node(const struct fwnode_handle *fwnode);
 const struct software_node *to_software_node(struct fwnode_handle *fwnode);
 struct fwnode_handle *software_node_fwnode(const struct software_node *node);
 
+const struct software_node *
+software_node_find_by_name(const struct software_node *parent,
+			   const char *name);
+
 int software_node_register_nodes(const struct software_node *nodes);
 void software_node_unregister_nodes(const struct software_node *nodes);
 
-- 
cgit v1.2.3


From 149f3b87840e7d292ad059f5fc23f1fa2fc98b9e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 23 Aug 2019 23:32:27 +0300
Subject: device property: Remove duplicate test for NULL

There is no need to check twice for a NULL in fwnode_call_bool_op().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/fwnode.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index a11c8c56c78b..ababd6bc82f3 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -110,10 +110,11 @@ struct fwnode_operations {
 	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
 		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : -ENXIO) : \
 	 -EINVAL)
-#define fwnode_call_bool_op(fwnode, op, ...)				\
-	(fwnode ? (fwnode_has_op(fwnode, op) ?				\
-		   (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false) : \
-	 false)
+
+#define fwnode_call_bool_op(fwnode, op, ...)		\
+	(fwnode_has_op(fwnode, op) ?			\
+	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : false)
+
 #define fwnode_call_ptr_op(fwnode, op, ...)		\
 	(fwnode_has_op(fwnode, op) ?			\
 	 (fwnode)->ops->op(fwnode, ## __VA_ARGS__) : NULL)
-- 
cgit v1.2.3


From 3fca9e0be9b5b7f4ccd5f71941143d9719047a05 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 22 Jul 2019 15:44:20 +0200
Subject: fbdev: da8xx: remove panel_power_ctrl() callback from platform data

There are no more users of panel_power_ctrl(). Remove it from the
driver.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 drivers/video/fbdev/da8xx-fb.c | 25 +++++--------------------
 include/video/da8xx-fb.h       |  1 -
 2 files changed, 5 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 02dfe9e32eed..19ed9889c8f8 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -165,7 +165,6 @@ struct da8xx_fb_par {
 	struct notifier_block	freq_transition;
 #endif
 	unsigned int		lcdc_clk_rate;
-	void (*panel_power_ctrl)(int);
 	struct regulator	*lcd_supply;
 	u32 pseudo_palette[16];
 	struct fb_videomode	mode;
@@ -1076,9 +1075,7 @@ static int fb_remove(struct platform_device *dev)
 #ifdef CONFIG_CPU_FREQ
 		lcd_da8xx_cpufreq_deregister(par);
 #endif
-		if (par->panel_power_ctrl) {
-			par->panel_power_ctrl(0);
-		} else if (par->lcd_supply) {
+		if (par->lcd_supply) {
 			ret = regulator_disable(par->lcd_supply);
 			if (ret)
 				return ret;
@@ -1187,9 +1184,7 @@ static int cfb_blank(int blank, struct fb_info *info)
 	case FB_BLANK_UNBLANK:
 		lcd_enable_raster();
 
-		if (par->panel_power_ctrl) {
-			par->panel_power_ctrl(1);
-		} else if (par->lcd_supply) {
+		if (par->lcd_supply) {
 			ret = regulator_enable(par->lcd_supply);
 			if (ret)
 				return ret;
@@ -1199,9 +1194,7 @@ static int cfb_blank(int blank, struct fb_info *info)
 	case FB_BLANK_VSYNC_SUSPEND:
 	case FB_BLANK_HSYNC_SUSPEND:
 	case FB_BLANK_POWERDOWN:
-		if (par->panel_power_ctrl) {
-			par->panel_power_ctrl(0);
-		} else if (par->lcd_supply) {
+		if (par->lcd_supply) {
 			ret = regulator_disable(par->lcd_supply);
 			if (ret)
 				return ret;
@@ -1413,10 +1406,6 @@ static int fb_probe(struct platform_device *device)
 	par->dev = &device->dev;
 	par->lcdc_clk = tmp_lcdc_clk;
 	par->lcdc_clk_rate = clk_get_rate(par->lcdc_clk);
-	if (fb_pdata->panel_power_ctrl) {
-		par->panel_power_ctrl = fb_pdata->panel_power_ctrl;
-		par->panel_power_ctrl(1);
-	}
 
 	par->lcd_supply = devm_regulator_get_optional(&device->dev, "lcd");
 	if (IS_ERR(par->lcd_supply)) {
@@ -1638,9 +1627,7 @@ static int fb_suspend(struct device *dev)
 	int ret;
 
 	console_lock();
-	if (par->panel_power_ctrl) {
-		par->panel_power_ctrl(0);
-	} else if (par->lcd_supply) {
+	if (par->lcd_supply) {
 		ret = regulator_disable(par->lcd_supply);
 		if (ret)
 			return ret;
@@ -1666,9 +1653,7 @@ static int fb_resume(struct device *dev)
 	if (par->blank == FB_BLANK_UNBLANK) {
 		lcd_enable_raster();
 
-		if (par->panel_power_ctrl) {
-			par->panel_power_ctrl(1);
-		} else if (par->lcd_supply) {
+		if (par->lcd_supply) {
 			ret = regulator_enable(par->lcd_supply);
 			if (ret)
 				return ret;
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
index efed3c3383d6..1d19ae62b844 100644
--- a/include/video/da8xx-fb.h
+++ b/include/video/da8xx-fb.h
@@ -32,7 +32,6 @@ struct da8xx_lcdc_platform_data {
 	const char manu_name[10];
 	void *controller_data;
 	const char type[25];
-	void (*panel_power_ctrl)(int);
 };
 
 struct lcd_ctrl_config {
-- 
cgit v1.2.3


From 7c795df5f344482c2ab8c52ebc1d94302d2b9082 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 15 Aug 2019 13:48:02 -0300
Subject: media: v4l2-core: move spi helpers out of v4l2-common.c

Separate the spi helpers to v4l2-spi.c, in order to get rid
of the ifdefery. No functional changes intended, this is
just a cosmetic change to organize the code better.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/Makefile      |  1 +
 drivers/media/v4l2-core/v4l2-common.c | 65 -----------------------------------
 drivers/media/v4l2-core/v4l2-spi.c    | 65 +++++++++++++++++++++++++++++++++++
 include/media/v4l2-common.h           | 18 ++++++++--
 4 files changed, 82 insertions(+), 67 deletions(-)
 create mode 100644 drivers/media/v4l2-core/v4l2-spi.c

(limited to 'include')

diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index 8e2f52f7800b..2deeeac6ee76 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -11,6 +11,7 @@ videodev-objs	:=	v4l2-dev.o v4l2-ioctl.o v4l2-device.o v4l2-fh.o \
 videodev-$(CONFIG_COMPAT) += v4l2-compat-ioctl32.o
 videodev-$(CONFIG_TRACEPOINTS) += v4l2-trace.o
 videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
+videodev-$(CONFIG_SPI) += v4l2-spi.o
 
 obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
 obj-$(CONFIG_VIDEO_V4L2) += videodev.o
diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index 5cbb51864fbb..8ffa758d9342 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -41,9 +41,6 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/i2c.h>
-#if defined(CONFIG_SPI)
-#include <linux/spi/spi.h>
-#endif
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -235,68 +232,6 @@ EXPORT_SYMBOL_GPL(v4l2_i2c_tuner_addrs);
 
 #endif /* defined(CONFIG_I2C) */
 
-#if defined(CONFIG_SPI)
-
-/* Load an spi sub-device. */
-
-void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
-		const struct v4l2_subdev_ops *ops)
-{
-	v4l2_subdev_init(sd, ops);
-	sd->flags |= V4L2_SUBDEV_FL_IS_SPI;
-	/* the owner is the same as the spi_device's driver owner */
-	sd->owner = spi->dev.driver->owner;
-	sd->dev = &spi->dev;
-	/* spi_device and v4l2_subdev point to one another */
-	v4l2_set_subdevdata(sd, spi);
-	spi_set_drvdata(spi, sd);
-	/* initialize name */
-	snprintf(sd->name, sizeof(sd->name), "%s %s",
-		spi->dev.driver->name, dev_name(&spi->dev));
-}
-EXPORT_SYMBOL_GPL(v4l2_spi_subdev_init);
-
-struct v4l2_subdev *v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev,
-		struct spi_master *master, struct spi_board_info *info)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct spi_device *spi = NULL;
-
-	BUG_ON(!v4l2_dev);
-
-	if (info->modalias[0])
-		request_module(info->modalias);
-
-	spi = spi_new_device(master, info);
-
-	if (spi == NULL || spi->dev.driver == NULL)
-		goto error;
-
-	if (!try_module_get(spi->dev.driver->owner))
-		goto error;
-
-	sd = spi_get_drvdata(spi);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(spi->dev.driver->owner);
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (!sd)
-		spi_unregister_device(spi);
-
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_spi_new_subdev);
-
-#endif /* defined(CONFIG_SPI) */
-
 /* Clamp x to be between min and max, aligned to a multiple of 2^align.  min
  * and max don't have to be aligned, but there must be at least one valid
  * value.  E.g., min=17,max=31,align=4 is not allowed as there are no multiples
diff --git a/drivers/media/v4l2-core/v4l2-spi.c b/drivers/media/v4l2-core/v4l2-spi.c
new file mode 100644
index 000000000000..ab5a7eb4205d
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-spi.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * v4l2-spi - SPI helpers for Video4Linux2
+ */
+
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
+
+void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
+		const struct v4l2_subdev_ops *ops)
+{
+	v4l2_subdev_init(sd, ops);
+	sd->flags |= V4L2_SUBDEV_FL_IS_SPI;
+	/* the owner is the same as the spi_device's driver owner */
+	sd->owner = spi->dev.driver->owner;
+	sd->dev = &spi->dev;
+	/* spi_device and v4l2_subdev point to one another */
+	v4l2_set_subdevdata(sd, spi);
+	spi_set_drvdata(spi, sd);
+	/* initialize name */
+	snprintf(sd->name, sizeof(sd->name), "%s %s",
+		spi->dev.driver->name, dev_name(&spi->dev));
+}
+EXPORT_SYMBOL_GPL(v4l2_spi_subdev_init);
+
+struct v4l2_subdev *v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev,
+		struct spi_master *master, struct spi_board_info *info)
+{
+	struct v4l2_subdev *sd = NULL;
+	struct spi_device *spi = NULL;
+
+	BUG_ON(!v4l2_dev);
+
+	if (info->modalias[0])
+		request_module(info->modalias);
+
+	spi = spi_new_device(master, info);
+
+	if (spi == NULL || spi->dev.driver == NULL)
+		goto error;
+
+	if (!try_module_get(spi->dev.driver->owner))
+		goto error;
+
+	sd = spi_get_drvdata(spi);
+
+	/* Register with the v4l2_device which increases the module's
+	   use count as well. */
+	if (v4l2_device_register_subdev(v4l2_dev, sd))
+		sd = NULL;
+
+	/* Decrease the module use count to match the first try_module_get. */
+	module_put(spi->dev.driver->owner);
+
+error:
+	/* If we have a client but no subdev, then something went wrong and
+	   we must unregister the client. */
+	if (!sd)
+		spi_unregister_device(spi);
+
+	return sd;
+}
+EXPORT_SYMBOL_GPL(v4l2_spi_new_subdev);
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 6b319d0d73ad..a1c5288caa6a 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -216,11 +216,10 @@ const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type);
 /* ------------------------------------------------------------------------- */
 
 /* SPI Helper functions */
-#if defined(CONFIG_SPI)
 
 #include <linux/spi/spi.h>
 
-struct spi_device;
+#if defined(CONFIG_SPI)
 
 /**
  *  v4l2_spi_new_subdev - Load an spi module and return an initialized
@@ -246,6 +245,21 @@ struct v4l2_subdev *v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev,
  */
 void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
 		const struct v4l2_subdev_ops *ops);
+
+#else
+
+static inline struct v4l2_subdev *
+v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev,
+		    struct spi_master *master, struct spi_board_info *info)
+{
+	return NULL;
+}
+
+static inline void
+v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
+		     const struct v4l2_subdev_ops *ops)
+{}
+
 #endif
 
 /* ------------------------------------------------------------------------- */
-- 
cgit v1.2.3


From 02283b98b1ac47659b17d575ea24e2b92ead7ede Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 15 Aug 2019 13:48:03 -0300
Subject: media: v4l2-core: move i2c helpers out of v4l2-common.c

Separate the i2c helpers to v4l2-i2c.c, in order to get rid
of the ifdefery. No functional changes intended, this is
just a cosmetic change to organize the code better.

Given I2C is a tristate symbol, a hidden boolean symbol
is introduced, to make the conditional build easier.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/Kconfig       |   5 ++
 drivers/media/v4l2-core/Makefile      |   1 +
 drivers/media/v4l2-core/v4l2-common.c | 145 ---------------------------------
 drivers/media/v4l2-core/v4l2-i2c.c    | 147 ++++++++++++++++++++++++++++++++++
 include/media/v4l2-common.h           | 113 +++++++++++++++++---------
 5 files changed, 229 insertions(+), 182 deletions(-)
 create mode 100644 drivers/media/v4l2-core/v4l2-i2c.c

(limited to 'include')

diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig
index 7c5f62f196e5..39e3fb30ba0b 100644
--- a/drivers/media/v4l2-core/Kconfig
+++ b/drivers/media/v4l2-core/Kconfig
@@ -11,6 +11,11 @@ config VIDEO_V4L2
 	select VIDEOBUF2_V4L2 if VIDEOBUF2_CORE
 	default (I2C || I2C=n) && VIDEO_DEV
 
+config VIDEO_V4L2_I2C
+	bool
+	depends on I2C && VIDEO_V4L2
+	default y
+
 config VIDEO_ADV_DEBUG
 	bool "Enable advanced debug functionality on V4L2 drivers"
 	help
diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile
index 2deeeac6ee76..786bd1ec4d1b 100644
--- a/drivers/media/v4l2-core/Makefile
+++ b/drivers/media/v4l2-core/Makefile
@@ -12,6 +12,7 @@ videodev-$(CONFIG_COMPAT) += v4l2-compat-ioctl32.o
 videodev-$(CONFIG_TRACEPOINTS) += v4l2-trace.o
 videodev-$(CONFIG_MEDIA_CONTROLLER) += v4l2-mc.o
 videodev-$(CONFIG_SPI) += v4l2-spi.o
+videodev-$(CONFIG_VIDEO_V4L2_I2C) += v4l2-i2c.o
 
 obj-$(CONFIG_V4L2_FWNODE) += v4l2-fwnode.o
 obj-$(CONFIG_VIDEO_V4L2) += videodev.o
diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index 8ffa758d9342..62f7aa92ac29 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -40,7 +40,6 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/errno.h>
-#include <linux/i2c.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -88,150 +87,6 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, s32 _min, s32 _max, s32 _
 }
 EXPORT_SYMBOL(v4l2_ctrl_query_fill);
 
-/* I2C Helper functions */
-
-#if IS_ENABLED(CONFIG_I2C)
-
-void v4l2_i2c_subdev_set_name(struct v4l2_subdev *sd, struct i2c_client *client,
-			      const char *devname, const char *postfix)
-{
-	if (!devname)
-		devname = client->dev.driver->name;
-	if (!postfix)
-		postfix = "";
-
-	snprintf(sd->name, sizeof(sd->name), "%s%s %d-%04x", devname, postfix,
-		 i2c_adapter_id(client->adapter), client->addr);
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_set_name);
-
-void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
-		const struct v4l2_subdev_ops *ops)
-{
-	v4l2_subdev_init(sd, ops);
-	sd->flags |= V4L2_SUBDEV_FL_IS_I2C;
-	/* the owner is the same as the i2c_client's driver owner */
-	sd->owner = client->dev.driver->owner;
-	sd->dev = &client->dev;
-	/* i2c_client and v4l2_subdev point to one another */
-	v4l2_set_subdevdata(sd, client);
-	i2c_set_clientdata(client, sd);
-	v4l2_i2c_subdev_set_name(sd, client, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_init);
-
-/* Load an i2c sub-device. */
-struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter, struct i2c_board_info *info,
-		const unsigned short *probe_addrs)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct i2c_client *client;
-
-	BUG_ON(!v4l2_dev);
-
-	request_module(I2C_MODULE_PREFIX "%s", info->type);
-
-	/* Create the i2c client */
-	if (info->addr == 0 && probe_addrs)
-		client = i2c_new_probed_device(adapter, info, probe_addrs,
-					       NULL);
-	else
-		client = i2c_new_device(adapter, info);
-
-	/* Note: by loading the module first we are certain that c->driver
-	   will be set if the driver was found. If the module was not loaded
-	   first, then the i2c core tries to delay-load the module for us,
-	   and then c->driver is still NULL until the module is finally
-	   loaded. This delay-load mechanism doesn't work if other drivers
-	   want to use the i2c device, so explicitly loading the module
-	   is the best alternative. */
-	if (client == NULL || client->dev.driver == NULL)
-		goto error;
-
-	/* Lock the module so we can safely get the v4l2_subdev pointer */
-	if (!try_module_get(client->dev.driver->owner))
-		goto error;
-	sd = i2c_get_clientdata(client);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(client->dev.driver->owner);
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (client && sd == NULL)
-		i2c_unregister_device(client);
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev_board);
-
-struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter, const char *client_type,
-		u8 addr, const unsigned short *probe_addrs)
-{
-	struct i2c_board_info info;
-
-	/* Setup the i2c board info with the device type and
-	   the device address. */
-	memset(&info, 0, sizeof(info));
-	strscpy(info.type, client_type, sizeof(info.type));
-	info.addr = addr;
-
-	return v4l2_i2c_new_subdev_board(v4l2_dev, adapter, &info, probe_addrs);
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev);
-
-/* Return i2c client address of v4l2_subdev. */
-unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd)
-{
-	struct i2c_client *client = v4l2_get_subdevdata(sd);
-
-	return client ? client->addr : I2C_CLIENT_END;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_addr);
-
-/* Return a list of I2C tuner addresses to probe. Use only if the tuner
-   addresses are unknown. */
-const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type)
-{
-	static const unsigned short radio_addrs[] = {
-#if IS_ENABLED(CONFIG_MEDIA_TUNER_TEA5761)
-		0x10,
-#endif
-		0x60,
-		I2C_CLIENT_END
-	};
-	static const unsigned short demod_addrs[] = {
-		0x42, 0x43, 0x4a, 0x4b,
-		I2C_CLIENT_END
-	};
-	static const unsigned short tv_addrs[] = {
-		0x42, 0x43, 0x4a, 0x4b,		/* tda8290 */
-		0x60, 0x61, 0x62, 0x63, 0x64,
-		I2C_CLIENT_END
-	};
-
-	switch (type) {
-	case ADDRS_RADIO:
-		return radio_addrs;
-	case ADDRS_DEMOD:
-		return demod_addrs;
-	case ADDRS_TV:
-		return tv_addrs;
-	case ADDRS_TV_WITH_DEMOD:
-		return tv_addrs + 4;
-	}
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_tuner_addrs);
-
-#endif /* defined(CONFIG_I2C) */
-
 /* Clamp x to be between min and max, aligned to a multiple of 2^align.  min
  * and max don't have to be aligned, but there must be at least one valid
  * value.  E.g., min=17,max=31,align=4 is not allowed as there are no multiples
diff --git a/drivers/media/v4l2-core/v4l2-i2c.c b/drivers/media/v4l2-core/v4l2-i2c.c
new file mode 100644
index 000000000000..f393dd4f1c00
--- /dev/null
+++ b/drivers/media/v4l2-core/v4l2-i2c.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * v4l2-i2c - I2C helpers for Video4Linux2
+ */
+
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
+
+void v4l2_i2c_subdev_set_name(struct v4l2_subdev *sd, struct i2c_client *client,
+			      const char *devname, const char *postfix)
+{
+	if (!devname)
+		devname = client->dev.driver->name;
+	if (!postfix)
+		postfix = "";
+
+	snprintf(sd->name, sizeof(sd->name), "%s%s %d-%04x", devname, postfix,
+		 i2c_adapter_id(client->adapter), client->addr);
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_set_name);
+
+void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
+		const struct v4l2_subdev_ops *ops)
+{
+	v4l2_subdev_init(sd, ops);
+	sd->flags |= V4L2_SUBDEV_FL_IS_I2C;
+	/* the owner is the same as the i2c_client's driver owner */
+	sd->owner = client->dev.driver->owner;
+	sd->dev = &client->dev;
+	/* i2c_client and v4l2_subdev point to one another */
+	v4l2_set_subdevdata(sd, client);
+	i2c_set_clientdata(client, sd);
+	v4l2_i2c_subdev_set_name(sd, client, NULL, NULL);
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_init);
+
+/* Load an i2c sub-device. */
+struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter, struct i2c_board_info *info,
+		const unsigned short *probe_addrs)
+{
+	struct v4l2_subdev *sd = NULL;
+	struct i2c_client *client;
+
+	BUG_ON(!v4l2_dev);
+
+	request_module(I2C_MODULE_PREFIX "%s", info->type);
+
+	/* Create the i2c client */
+	if (info->addr == 0 && probe_addrs)
+		client = i2c_new_probed_device(adapter, info, probe_addrs,
+					       NULL);
+	else
+		client = i2c_new_device(adapter, info);
+
+	/* Note: by loading the module first we are certain that c->driver
+	   will be set if the driver was found. If the module was not loaded
+	   first, then the i2c core tries to delay-load the module for us,
+	   and then c->driver is still NULL until the module is finally
+	   loaded. This delay-load mechanism doesn't work if other drivers
+	   want to use the i2c device, so explicitly loading the module
+	   is the best alternative. */
+	if (client == NULL || client->dev.driver == NULL)
+		goto error;
+
+	/* Lock the module so we can safely get the v4l2_subdev pointer */
+	if (!try_module_get(client->dev.driver->owner))
+		goto error;
+	sd = i2c_get_clientdata(client);
+
+	/* Register with the v4l2_device which increases the module's
+	   use count as well. */
+	if (v4l2_device_register_subdev(v4l2_dev, sd))
+		sd = NULL;
+	/* Decrease the module use count to match the first try_module_get. */
+	module_put(client->dev.driver->owner);
+
+error:
+	/* If we have a client but no subdev, then something went wrong and
+	   we must unregister the client. */
+	if (client && sd == NULL)
+		i2c_unregister_device(client);
+	return sd;
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev_board);
+
+struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
+		struct i2c_adapter *adapter, const char *client_type,
+		u8 addr, const unsigned short *probe_addrs)
+{
+	struct i2c_board_info info;
+
+	/* Setup the i2c board info with the device type and
+	   the device address. */
+	memset(&info, 0, sizeof(info));
+	strscpy(info.type, client_type, sizeof(info.type));
+	info.addr = addr;
+
+	return v4l2_i2c_new_subdev_board(v4l2_dev, adapter, &info, probe_addrs);
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev);
+
+/* Return i2c client address of v4l2_subdev. */
+unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return client ? client->addr : I2C_CLIENT_END;
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_addr);
+
+/* Return a list of I2C tuner addresses to probe. Use only if the tuner
+   addresses are unknown. */
+const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type)
+{
+	static const unsigned short radio_addrs[] = {
+#if IS_ENABLED(CONFIG_MEDIA_TUNER_TEA5761)
+		0x10,
+#endif
+		0x60,
+		I2C_CLIENT_END
+	};
+	static const unsigned short demod_addrs[] = {
+		0x42, 0x43, 0x4a, 0x4b,
+		I2C_CLIENT_END
+	};
+	static const unsigned short tv_addrs[] = {
+		0x42, 0x43, 0x4a, 0x4b,		/* tda8290 */
+		0x60, 0x61, 0x62, 0x63, 0x64,
+		I2C_CLIENT_END
+	};
+
+	switch (type) {
+	case ADDRS_RADIO:
+		return radio_addrs;
+	case ADDRS_DEMOD:
+		return demod_addrs;
+	case ADDRS_TV:
+		return tv_addrs;
+	case ADDRS_TV_WITH_DEMOD:
+		return tv_addrs + 4;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(v4l2_i2c_tuner_addrs);
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index a1c5288caa6a..8e66edddd37b 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -96,16 +96,45 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl,
 
 /* ------------------------------------------------------------------------- */
 
-/* I2C Helper functions */
-
-struct i2c_driver;
-struct i2c_adapter;
-struct i2c_client;
-struct i2c_device_id;
 struct v4l2_device;
 struct v4l2_subdev;
 struct v4l2_subdev_ops;
 
+/* I2C Helper functions */
+#include <linux/i2c.h>
+
+/**
+ * enum v4l2_i2c_tuner_type - specifies the range of tuner address that
+ *	should be used when seeking for I2C devices.
+ *
+ * @ADDRS_RADIO:		Radio tuner addresses.
+ *				Represent the following I2C addresses:
+ *				0x10 (if compiled with tea5761 support)
+ *				and 0x60.
+ * @ADDRS_DEMOD:		Demod tuner addresses.
+ *				Represent the following I2C addresses:
+ *				0x42, 0x43, 0x4a and 0x4b.
+ * @ADDRS_TV:			TV tuner addresses.
+ *				Represent the following I2C addresses:
+ *				0x42, 0x43, 0x4a, 0x4b, 0x60, 0x61, 0x62,
+ *				0x63 and 0x64.
+ * @ADDRS_TV_WITH_DEMOD:	TV tuner addresses if demod is present, this
+ *				excludes addresses used by the demodulator
+ *				from the list of candidates.
+ *				Represent the following I2C addresses:
+ *				0x60, 0x61, 0x62, 0x63 and 0x64.
+ *
+ * NOTE: All I2C addresses above use the 7-bit notation.
+ */
+enum v4l2_i2c_tuner_type {
+	ADDRS_RADIO,
+	ADDRS_DEMOD,
+	ADDRS_TV,
+	ADDRS_TV_WITH_DEMOD,
+};
+
+#if defined(CONFIG_VIDEO_V4L2_I2C)
+
 /**
  * v4l2_i2c_new_subdev - Load an i2c module and return an initialized
  *	&struct v4l2_subdev.
@@ -123,8 +152,6 @@ struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter, const char *client_type,
 		u8 addr, const unsigned short *probe_addrs);
 
-struct i2c_board_info;
-
 /**
  * v4l2_i2c_new_subdev_board - Load an i2c module and return an initialized
  *	&struct v4l2_subdev.
@@ -174,35 +201,6 @@ void v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
  */
 unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd);
 
-/**
- * enum v4l2_i2c_tuner_type - specifies the range of tuner address that
- *	should be used when seeking for I2C devices.
- *
- * @ADDRS_RADIO:		Radio tuner addresses.
- *				Represent the following I2C addresses:
- *				0x10 (if compiled with tea5761 support)
- *				and 0x60.
- * @ADDRS_DEMOD:		Demod tuner addresses.
- *				Represent the following I2C addresses:
- *				0x42, 0x43, 0x4a and 0x4b.
- * @ADDRS_TV:			TV tuner addresses.
- *				Represent the following I2C addresses:
- *				0x42, 0x43, 0x4a, 0x4b, 0x60, 0x61, 0x62,
- *				0x63 and 0x64.
- * @ADDRS_TV_WITH_DEMOD:	TV tuner addresses if demod is present, this
- *				excludes addresses used by the demodulator
- *				from the list of candidates.
- *				Represent the following I2C addresses:
- *				0x60, 0x61, 0x62, 0x63 and 0x64.
- *
- * NOTE: All I2C addresses above use the 7-bit notation.
- */
-enum v4l2_i2c_tuner_type {
-	ADDRS_RADIO,
-	ADDRS_DEMOD,
-	ADDRS_TV,
-	ADDRS_TV_WITH_DEMOD,
-};
 /**
  * v4l2_i2c_tuner_addrs - Return a list of I2C tuner addresses to probe.
  *
@@ -213,6 +211,47 @@ enum v4l2_i2c_tuner_type {
  */
 const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type);
 
+#else
+
+static inline struct v4l2_subdev *
+v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
+		    struct i2c_adapter *adapter, const char *client_type,
+		    u8 addr, const unsigned short *probe_addrs)
+{
+	return NULL;
+}
+
+static inline struct v4l2_subdev *
+v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
+			  struct i2c_adapter *adapter, struct i2c_board_info *info,
+			  const unsigned short *probe_addrs)
+{
+	return NULL;
+}
+
+static inline void
+v4l2_i2c_subdev_set_name(struct v4l2_subdev *sd, struct i2c_client *client,
+			 const char *devname, const char *postfix)
+{}
+
+static inline void
+v4l2_i2c_subdev_init(struct v4l2_subdev *sd, struct i2c_client *client,
+		     const struct v4l2_subdev_ops *ops)
+{}
+
+static inline unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd)
+{
+	return I2C_CLIENT_END;
+}
+
+static inline const unsigned short *
+v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type)
+{
+	return NULL;
+}
+
+#endif
+
 /* ------------------------------------------------------------------------- */
 
 /* SPI Helper functions */
-- 
cgit v1.2.3


From a9cff393c1d78ecbbc33e6196e79bb05ccb4a709 Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 15 Aug 2019 13:48:04 -0300
Subject: media: v4l2-core: introduce a helper to unregister a spi subdev

Introduce a new video4linux2 spi helper, to unregister a subdev.
This allows to get rid of some more ifdefs.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-device.c | 14 ++------------
 drivers/media/v4l2-core/v4l2-spi.c    |  8 ++++++++
 include/media/v4l2-common.h           |  9 +++++++++
 3 files changed, 19 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c
index aa277f5bc862..c2811238996f 100644
--- a/drivers/media/v4l2-core/v4l2-device.c
+++ b/drivers/media/v4l2-core/v4l2-device.c
@@ -11,9 +11,6 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
-#if defined(CONFIG_SPI)
-#include <linux/spi/spi.h>
-#endif
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ctrls.h>
@@ -124,15 +121,8 @@ void v4l2_device_unregister(struct v4l2_device *v4l2_dev)
 			continue;
 		}
 #endif
-#if defined(CONFIG_SPI)
-		if (sd->flags & V4L2_SUBDEV_FL_IS_SPI) {
-			struct spi_device *spi = v4l2_get_subdevdata(sd);
-
-			if (spi && !spi->dev.of_node && !spi->dev.fwnode)
-				spi_unregister_device(spi);
-			continue;
-		}
-#endif
+		else if (sd->flags & V4L2_SUBDEV_FL_IS_SPI)
+			v4l2_spi_subdev_unregister(sd);
 	}
 	/* Mark as unregistered, thus preventing duplicate unregistrations */
 	v4l2_dev->name[0] = '\0';
diff --git a/drivers/media/v4l2-core/v4l2-spi.c b/drivers/media/v4l2-core/v4l2-spi.c
index ab5a7eb4205d..2a7e82e1412d 100644
--- a/drivers/media/v4l2-core/v4l2-spi.c
+++ b/drivers/media/v4l2-core/v4l2-spi.c
@@ -8,6 +8,14 @@
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
 
+void v4l2_spi_subdev_unregister(struct v4l2_subdev *sd)
+{
+	struct spi_device *spi = v4l2_get_subdevdata(sd);
+
+	if (spi && !spi->dev.of_node && !spi->dev.fwnode)
+		spi_unregister_device(spi);
+}
+
 void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
 		const struct v4l2_subdev_ops *ops)
 {
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 8e66edddd37b..e2878654d043 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -285,6 +285,13 @@ struct v4l2_subdev *v4l2_spi_new_subdev(struct v4l2_device *v4l2_dev,
 void v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
 		const struct v4l2_subdev_ops *ops);
 
+/**
+ * v4l2_spi_subdev_unregister - Unregister a v4l2_subdev
+ *
+ * @sd: pointer to &struct v4l2_subdev
+ */
+void v4l2_spi_subdev_unregister(struct v4l2_subdev *sd);
+
 #else
 
 static inline struct v4l2_subdev *
@@ -299,6 +306,8 @@ v4l2_spi_subdev_init(struct v4l2_subdev *sd, struct spi_device *spi,
 		     const struct v4l2_subdev_ops *ops)
 {}
 
+static inline void v4l2_spi_subdev_unregister(struct v4l2_subdev *sd)
+{}
 #endif
 
 /* ------------------------------------------------------------------------- */
-- 
cgit v1.2.3


From 51ff392c280733aa9e6bd47b3f5e83e32bfdf16a Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Thu, 15 Aug 2019 13:48:05 -0300
Subject: media: v4l2-core: introduce a helper to unregister a i2c subdev

Introduce a new video4linux2 i2c helper, to unregister a subdev.
This allows to get rid of yet another ifdef.

Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
[hverkuil-cisco@xs4all.nl: fix checkpatch warning]
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/v4l2-core/v4l2-device.c | 25 ++-----------------------
 drivers/media/v4l2-core/v4l2-i2c.c    | 20 ++++++++++++++++++++
 include/media/v4l2-common.h           | 10 ++++++++++
 3 files changed, 32 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c
index c2811238996f..63d6b147b21e 100644
--- a/drivers/media/v4l2-core/v4l2-device.c
+++ b/drivers/media/v4l2-core/v4l2-device.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 #include <linux/module.h>
-#include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
@@ -99,28 +98,8 @@ void v4l2_device_unregister(struct v4l2_device *v4l2_dev)
 	/* Unregister subdevs */
 	list_for_each_entry_safe(sd, next, &v4l2_dev->subdevs, list) {
 		v4l2_device_unregister_subdev(sd);
-#if IS_ENABLED(CONFIG_I2C)
-		if (sd->flags & V4L2_SUBDEV_FL_IS_I2C) {
-			struct i2c_client *client = v4l2_get_subdevdata(sd);
-
-			/*
-			 * We need to unregister the i2c client
-			 * explicitly. We cannot rely on
-			 * i2c_del_adapter to always unregister
-			 * clients for us, since if the i2c bus is a
-			 * platform bus, then it is never deleted.
-			 *
-			 * Device tree or ACPI based devices must not
-			 * be unregistered as they have not been
-			 * registered by us, and would not be
-			 * re-created by just probing the V4L2 driver.
-			 */
-			if (client &&
-			    !client->dev.of_node && !client->dev.fwnode)
-				i2c_unregister_device(client);
-			continue;
-		}
-#endif
+		if (sd->flags & V4L2_SUBDEV_FL_IS_I2C)
+			v4l2_i2c_subdev_unregister(sd);
 		else if (sd->flags & V4L2_SUBDEV_FL_IS_SPI)
 			v4l2_spi_subdev_unregister(sd);
 	}
diff --git a/drivers/media/v4l2-core/v4l2-i2c.c b/drivers/media/v4l2-core/v4l2-i2c.c
index f393dd4f1c00..a26d48f23a2d 100644
--- a/drivers/media/v4l2-core/v4l2-i2c.c
+++ b/drivers/media/v4l2-core/v4l2-i2c.c
@@ -8,6 +8,26 @@
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
 
+void v4l2_i2c_subdev_unregister(struct v4l2_subdev *sd)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	/*
+	 * We need to unregister the i2c client
+	 * explicitly. We cannot rely on
+	 * i2c_del_adapter to always unregister
+	 * clients for us, since if the i2c bus is a
+	 * platform bus, then it is never deleted.
+	 *
+	 * Device tree or ACPI based devices must not
+	 * be unregistered as they have not been
+	 * registered by us, and would not be
+	 * re-created by just probing the V4L2 driver.
+	 */
+	if (client && !client->dev.of_node && !client->dev.fwnode)
+		i2c_unregister_device(client);
+}
+
 void v4l2_i2c_subdev_set_name(struct v4l2_subdev *sd, struct i2c_client *client,
 			      const char *devname, const char *postfix)
 {
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index e2878654d043..c070d8ae11e5 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -211,6 +211,13 @@ unsigned short v4l2_i2c_subdev_addr(struct v4l2_subdev *sd);
  */
 const unsigned short *v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type);
 
+/**
+ * v4l2_i2c_subdev_unregister - Unregister a v4l2_subdev
+ *
+ * @sd: pointer to &struct v4l2_subdev
+ */
+void v4l2_i2c_subdev_unregister(struct v4l2_subdev *sd);
+
 #else
 
 static inline struct v4l2_subdev *
@@ -250,6 +257,9 @@ v4l2_i2c_tuner_addrs(enum v4l2_i2c_tuner_type type)
 	return NULL;
 }
 
+static inline void v4l2_i2c_subdev_unregister(struct v4l2_subdev *sd)
+{}
+
 #endif
 
 /* ------------------------------------------------------------------------- */
-- 
cgit v1.2.3


From d8abe88450beb96a66e434323eb6ab737654b840 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markz@mellanox.com>
Date: Mon, 19 Aug 2019 14:36:26 +0300
Subject: RDMA/mlx5: RDMA_RX flow type support for user applications

Currently user applications can only steer TCP/IP(NIC RX/RX) traffic.
This patch adds RDMA_RX as a new flow type to allow the user to insert
steering rules to control RDMA traffic.
Two destinations are supported(but not set at the same time): devx
flow table object and QP.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190819113626.20284-4-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/flow.c         | 13 +++++++++++--
 drivers/infiniband/hw/mlx5/main.c         |  7 +++++++
 drivers/infiniband/hw/mlx5/mlx5_ib.h      |  1 +
 include/uapi/rdma/mlx5_user_ioctl_verbs.h |  1 +
 4 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index b8841355fcd5..571bb8539cdb 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -32,6 +32,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
 	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB:
 		*namespace = MLX5_FLOW_NAMESPACE_FDB;
 		break;
+	case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX:
+		*namespace = MLX5_FLOW_NAMESPACE_RDMA_RX;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -101,6 +104,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 	if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx)
 		return -EINVAL;
 
+	/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
+	if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+	    ((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
+		return -EINVAL;
+
 	if (dest_devx) {
 		devx_obj = uverbs_attr_get_obj(
 			attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -112,8 +120,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
 		 */
 		if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
 			return -EINVAL;
-		/* Allow only flow table as dest when inserting to FDB */
-		if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+		/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
+		if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB ||
+		     fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
 		    dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
 			return -EINVAL;
 	} else if (dest_qp) {
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1ec0e667110e..07aecba16019 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3963,6 +3963,11 @@ _get_flow_table(struct mlx5_ib_dev *dev,
 		    esw_encap)
 			flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT;
 		priority = FDB_BYPASS_PATH;
+	} else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) {
+		max_table_size =
+			BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev,
+						       log_max_ft_size));
+		priority = fs_matcher->priority;
 	}
 
 	max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
@@ -3977,6 +3982,8 @@ _get_flow_table(struct mlx5_ib_dev *dev,
 		prio = &dev->flow_db->egress_prios[priority];
 	else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB)
 		prio = &dev->flow_db->fdb;
+	else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX)
+		prio = &dev->flow_db->rdma_rx[priority];
 
 	if (!prio)
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index cb41a7e6255a..6abfbf3a69b7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -200,6 +200,7 @@ struct mlx5_ib_flow_db {
 	struct mlx5_ib_flow_prio	sniffer[MLX5_IB_NUM_SNIFFER_FTS];
 	struct mlx5_ib_flow_prio	egress[MLX5_IB_NUM_EGRESS_FTS];
 	struct mlx5_ib_flow_prio	fdb;
+	struct mlx5_ib_flow_prio	rdma_rx[MLX5_IB_NUM_FLOW_FT];
 	struct mlx5_flow_table		*lag_demux_ft;
 	/* Protect flow steering bypass flow tables
 	 * when add/del flow rules.
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 7e9900b0e746..88b6ca70c2fe 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -43,6 +43,7 @@ enum mlx5_ib_uapi_flow_table_type {
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_RX     = 0x0,
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX	= 0x1,
 	MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB	= 0x2,
+	MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX	= 0x3,
 };
 
 enum mlx5_ib_uapi_flow_action_packet_reformat_type {
-- 
cgit v1.2.3


From d7f563db7794a6a271b6e9dd6e65a437d6a1d933 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 14 Aug 2019 05:18:16 -0700
Subject: bus: ti-sysc: Add module enable quirk for SGX on omap36xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add module enable quirk for SGX needed on omap36xx.

Cc: Adam Ford <aford173@gmail.com>
Cc: Filip Matijević <filip.matijevic.pz@gmail.com>
Cc: "H. Nikolaus Schaller" <hns@goldelico.com>
Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Cc: moaz korena <moaz@korena.xyz>
Cc: Merlijn Wajer <merlijn@wizzup.org>
Cc: Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>
Cc: Philipp Rossak <embed3d@gmail.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 21 +++++++++++++++++++++
 include/linux/platform_data/ti-sysc.h |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 2587a616cee7..d4fc04320ea5 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -73,6 +73,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
  * @clk_enable_quirk: module specific clock enable quirk
  * @clk_disable_quirk: module specific clock disable quirk
  * @reset_done_quirk: module specific reset done quirk
+ * @module_enable_quirk: module specific enable quirk
  */
 struct sysc {
 	struct device *dev;
@@ -98,6 +99,7 @@ struct sysc {
 	void (*clk_enable_quirk)(struct sysc *sysc);
 	void (*clk_disable_quirk)(struct sysc *sysc);
 	void (*reset_done_quirk)(struct sysc *sysc);
+	void (*module_enable_quirk)(struct sysc *sysc);
 };
 
 static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np,
@@ -928,6 +930,9 @@ set_autoidle:
 		sysc_write(ddata, ddata->offsets[SYSC_SYSCONFIG], reg);
 	}
 
+	if (ddata->module_enable_quirk)
+		ddata->module_enable_quirk(ddata);
+
 	return 0;
 }
 
@@ -1241,6 +1246,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 		   SYSC_MODULE_QUIRK_I2C),
 	SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xfffff0f0,
 		   SYSC_MODULE_QUIRK_I2C),
+	SYSC_QUIRK("gpu", 0x50000000, 0x14, -1, -1, 0x00010201, 0xffffffff, 0),
+	SYSC_QUIRK("gpu", 0x50000000, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff,
+		   SYSC_MODULE_QUIRK_SGX),
 	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
 		   SYSC_MODULE_QUIRK_WDT),
 
@@ -1258,6 +1266,7 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("dwc3", 0, 0, 0x10, -1, 0x500a0200, 0xffffffff, 0),
 	SYSC_QUIRK("epwmss", 0, 0, 0x4, -1, 0x47400001, 0xffffffff, 0),
 	SYSC_QUIRK("gpu", 0, 0x1fc00, 0x1fc10, -1, 0, 0, 0),
+	SYSC_QUIRK("gpu", 0, 0xfe00, 0xfe10, -1, 0x40000000 , 0xffffffff, 0),
 	SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
 	SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
 	SYSC_QUIRK("lcdc", 0, 0, 0x54, -1, 0x4f201000, 0xffffffff, 0),
@@ -1409,6 +1418,15 @@ static void sysc_clk_disable_quirk_i2c(struct sysc *ddata)
 	sysc_clk_quirk_i2c(ddata, false);
 }
 
+/* 36xx SGX needs a quirk for to bypass OCP IPG interrupt logic */
+static void sysc_module_enable_quirk_sgx(struct sysc *ddata)
+{
+	int offset = 0xff08;	/* OCP_DEBUG_CONFIG */
+	u32 val = BIT(31);	/* THALIA_INT_BYPASS */
+
+	sysc_write(ddata, offset, val);
+}
+
 /* Watchdog timer needs a disable sequence after reset */
 static void sysc_reset_done_quirk_wdt(struct sysc *ddata)
 {
@@ -1451,6 +1469,9 @@ static void sysc_init_module_quirks(struct sysc *ddata)
 		return;
 	}
 
+	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
+		ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
+
 	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT)
 		ddata->reset_done_quirk = sysc_reset_done_quirk_wdt;
 }
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 1a0905435b32..b5b7a3423ca8 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -49,6 +49,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_MODULE_QUIRK_SGX		BIT(18)
 #define SYSC_MODULE_QUIRK_HDQ1W		BIT(17)
 #define SYSC_MODULE_QUIRK_I2C		BIT(16)
 #define SYSC_MODULE_QUIRK_WDT		BIT(15)
-- 
cgit v1.2.3


From fd5683749472bb3370487009c91c49182eb99694 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 26 Aug 2019 08:47:07 -0700
Subject: clk: ti: add clkctrl data omap5 sgx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks like we have sgx clock missing currently so let's add it.

Cc: Adam Ford <aford173@gmail.com>
Cc: Filip Matijević <filip.matijevic.pz@gmail.com>
Cc: "H. Nikolaus Schaller" <hns@goldelico.com>
Cc: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Cc: moaz korena <moaz@korena.xyz>
Cc: Merlijn Wajer <merlijn@wizzup.org>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>
Cc: Philipp Rossak <embed3d@gmail.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Tero Kristo <t-kristo@ti.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: linux-clk@vger.kernel.org
Acked-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk-54xx.c         | 34 ++++++++++++++++++++++++++++++++++
 include/dt-bindings/clock/omap5.h |  3 +++
 2 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c
index dafef7e70ba8..e675e27f1203 100644
--- a/drivers/clk/ti/clk-54xx.c
+++ b/drivers/clk/ti/clk-54xx.c
@@ -314,6 +314,39 @@ static const struct omap_clkctrl_reg_data omap5_dss_clkctrl_regs[] __initconst =
 	{ 0 },
 };
 
+static const char * const omap5_gpu_core_mux_parents[] __initconst = {
+	"dpll_core_h14x2_ck",
+	"dpll_per_h14x2_ck",
+	NULL,
+};
+
+static const char * const omap5_gpu_hyd_mux_parents[] __initconst = {
+	"dpll_core_h14x2_ck",
+	"dpll_per_h14x2_ck",
+	NULL,
+};
+
+static const char * const omap5_gpu_sys_clk_parents[] __initconst = {
+	"sys_clkin",
+	NULL,
+};
+
+static const struct omap_clkctrl_div_data omap5_gpu_sys_clk_data __initconst = {
+	.max_div = 2,
+};
+
+static const struct omap_clkctrl_bit_data omap5_gpu_core_bit_data[] __initconst = {
+	{ 24, TI_CLK_MUX, omap5_gpu_core_mux_parents, NULL },
+	{ 25, TI_CLK_MUX, omap5_gpu_hyd_mux_parents, NULL },
+	{ 26, TI_CLK_DIVIDER, omap5_gpu_sys_clk_parents, &omap5_gpu_sys_clk_data },
+	{ 0 },
+};
+
+static const struct omap_clkctrl_reg_data omap5_gpu_clkctrl_regs[] __initconst = {
+	{ OMAP5_GPU_CLKCTRL, omap5_gpu_core_bit_data, CLKF_SW_SUP, "gpu_cm:clk:0000:24" },
+	{ 0 },
+};
+
 static const char * const omap5_mmc1_fclk_mux_parents[] __initconst = {
 	"func_128m_clk",
 	"dpll_per_m2x2_ck",
@@ -470,6 +503,7 @@ const struct omap_clkctrl_data omap5_clkctrl_data[] __initconst = {
 	{ 0x4a008e20, omap5_l3instr_clkctrl_regs },
 	{ 0x4a009020, omap5_l4per_clkctrl_regs },
 	{ 0x4a009420, omap5_dss_clkctrl_regs },
+	{ 0x4a009520, omap5_gpu_clkctrl_regs },
 	{ 0x4a009620, omap5_l3init_clkctrl_regs },
 	{ 0x4ae07920, omap5_wkupaon_clkctrl_regs },
 	{ 0 },
diff --git a/include/dt-bindings/clock/omap5.h b/include/dt-bindings/clock/omap5.h
index f3283957f48d..e5411938983c 100644
--- a/include/dt-bindings/clock/omap5.h
+++ b/include/dt-bindings/clock/omap5.h
@@ -89,6 +89,9 @@
 /* dss clocks */
 #define OMAP5_DSS_CORE_CLKCTRL	OMAP5_CLKCTRL_INDEX(0x20)
 
+/* gpu clocks */
+#define OMAP5_GPU_CLKCTRL	OMAP5_CLKCTRL_INDEX(0x20)
+
 /* l3init clocks */
 #define OMAP5_MMC1_CLKCTRL	OMAP5_CLKCTRL_INDEX(0x28)
 #define OMAP5_MMC2_CLKCTRL	OMAP5_CLKCTRL_INDEX(0x30)
-- 
cgit v1.2.3


From 174ae4e96e0f54958cbe3fd3090a3cefeb63af4d Mon Sep 17 00:00:00 2001
From: Mischa Jonker <Mischa.Jonker@synopsys.com>
Date: Wed, 24 Jul 2019 14:04:34 +0200
Subject: ARCv2: IDU-intc: Add support for edge-triggered interrupts

This adds support for an optional extra interrupt cell to specify edge
vs level triggered. It is backward compatible with dts files with only
one cell, and will default to level-triggered in such a case.

Note that I had to make a change to idu_irq_set_affinity as well, as
this function was setting the interrupt type to "level" unconditionally,
since this was the only type supported previously.

Signed-off-by: Mischa Jonker <mischa.jonker@synopsys.com>
Reviewed-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/mcip.c | 60 +++++++++++++++++++++++++++++++++++++++++++++-----
 include/soc/arc/mcip.h | 11 +++++++++
 2 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index 18b493dfb3a8..abf9398cc333 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -202,8 +202,8 @@ static void idu_set_dest(unsigned int cmn_irq, unsigned int cpu_mask)
 	__mcip_cmd_data(CMD_IDU_SET_DEST, cmn_irq, cpu_mask);
 }
 
-static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
-			   unsigned int distr)
+static void idu_set_mode(unsigned int cmn_irq, bool set_lvl, unsigned int lvl,
+			 bool set_distr, unsigned int distr)
 {
 	union {
 		unsigned int word;
@@ -212,8 +212,11 @@ static void idu_set_mode(unsigned int cmn_irq, unsigned int lvl,
 		};
 	} data;
 
-	data.distr = distr;
-	data.lvl = lvl;
+	data.word = __mcip_cmd_read(CMD_IDU_READ_MODE, cmn_irq);
+	if (set_distr)
+		data.distr = distr;
+	if (set_lvl)
+		data.lvl = lvl;
 	__mcip_cmd_data(CMD_IDU_SET_MODE, cmn_irq, data.word);
 }
 
@@ -240,6 +243,25 @@ static void idu_irq_unmask(struct irq_data *data)
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 }
 
+static void idu_irq_ack(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
+static void idu_irq_mask_ack(struct irq_data *data)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+	__mcip_cmd_data(CMD_IDU_SET_MASK, data->hwirq, 1);
+	__mcip_cmd(CMD_IDU_ACK_CIRQ, data->hwirq);
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+}
+
 static int
 idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 		     bool force)
@@ -263,13 +285,36 @@ idu_irq_set_affinity(struct irq_data *data, const struct cpumask *cpumask,
 	else
 		distribution_mode = IDU_M_DISTRI_RR;
 
-	idu_set_mode(data->hwirq, IDU_M_TRIG_LEVEL, distribution_mode);
+	idu_set_mode(data->hwirq, false, 0, true, distribution_mode);
 
 	raw_spin_unlock_irqrestore(&mcip_lock, flags);
 
 	return IRQ_SET_MASK_OK;
 }
 
+static int idu_irq_set_type(struct irq_data *data, u32 type)
+{
+	unsigned long flags;
+
+	/*
+	 * ARCv2 IDU HW does not support inverse polarity, so these are the
+	 * only interrupt types supported.
+	 */
+	if (type & ~(IRQ_TYPE_EDGE_RISING | IRQ_TYPE_LEVEL_HIGH))
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mcip_lock, flags);
+
+	idu_set_mode(data->hwirq, true,
+		     type & IRQ_TYPE_EDGE_RISING ? IDU_M_TRIG_EDGE :
+						   IDU_M_TRIG_LEVEL,
+		     false, 0);
+
+	raw_spin_unlock_irqrestore(&mcip_lock, flags);
+
+	return 0;
+}
+
 static void idu_irq_enable(struct irq_data *data)
 {
 	/*
@@ -289,7 +334,10 @@ static struct irq_chip idu_irq_chip = {
 	.name			= "MCIP IDU Intc",
 	.irq_mask		= idu_irq_mask,
 	.irq_unmask		= idu_irq_unmask,
+	.irq_ack		= idu_irq_ack,
+	.irq_mask_ack		= idu_irq_mask_ack,
 	.irq_enable		= idu_irq_enable,
+	.irq_set_type		= idu_irq_set_type,
 #ifdef CONFIG_SMP
 	.irq_set_affinity       = idu_irq_set_affinity,
 #endif
@@ -317,7 +365,7 @@ static int idu_irq_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t
 }
 
 static const struct irq_domain_ops idu_irq_ops = {
-	.xlate	= irq_domain_xlate_onecell,
+	.xlate	= irq_domain_xlate_onetwocell,
 	.map	= idu_irq_map,
 };
 
diff --git a/include/soc/arc/mcip.h b/include/soc/arc/mcip.h
index 50f49e043668..d1a93c73f006 100644
--- a/include/soc/arc/mcip.h
+++ b/include/soc/arc/mcip.h
@@ -46,7 +46,9 @@ struct mcip_cmd {
 #define CMD_IDU_ENABLE			0x71
 #define CMD_IDU_DISABLE			0x72
 #define CMD_IDU_SET_MODE		0x74
+#define CMD_IDU_READ_MODE		0x75
 #define CMD_IDU_SET_DEST		0x76
+#define CMD_IDU_ACK_CIRQ		0x79
 #define CMD_IDU_SET_MASK		0x7C
 
 #define IDU_M_TRIG_LEVEL		0x0
@@ -119,4 +121,13 @@ static inline void __mcip_cmd_data(unsigned int cmd, unsigned int param,
 	__mcip_cmd(cmd, param);
 }
 
+/*
+ * Read MCIP register
+ */
+static inline unsigned int __mcip_cmd_read(unsigned int cmd, unsigned int param)
+{
+	__mcip_cmd(cmd, param);
+	return read_aux_reg(ARC_REG_MCIP_READBACK);
+}
+
 #endif
-- 
cgit v1.2.3


From d5711920ec6e578f51db95caa6f185f5090b865e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 16 Aug 2019 08:37:26 -0400
Subject: Revert "NFSv4/flexfiles: Abort I/O early if the layout segment was
 invalidated"

This reverts commit a79f194aa4879e9baad118c3f8bb2ca24dbef765.
The mechanism for aborting I/O is racy, since we are not guaranteed that
the request is asleep while we're changing both task->tk_status and
task->tk_action.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: stable@vger.kernel.org # v5.1
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 17 -----------------
 include/linux/sunrpc/sched.h           |  1 -
 net/sunrpc/xprt.c                      |  7 -------
 3 files changed, 25 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index b04e20d28162..2c7e1eca1ed7 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1148,8 +1148,6 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 		break;
 	case -NFS4ERR_RETRY_UNCACHED_REP:
 		break;
-	case -EAGAIN:
-		return -NFS4ERR_RESET_TO_PNFS;
 	/* Invalidate Layout errors */
 	case -NFS4ERR_PNFS_NO_LAYOUT:
 	case -ESTALE:           /* mapped NFS4ERR_STALE */
@@ -1210,7 +1208,6 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
 	case -EBADHANDLE:
 	case -ELOOP:
 	case -ENOSPC:
-	case -EAGAIN:
 		break;
 	case -EJUKEBOX:
 		nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
@@ -1445,16 +1442,6 @@ static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data)
 	ff_layout_read_prepare_common(task, hdr);
 }
 
-static void
-ff_layout_io_prepare_transmit(struct rpc_task *task,
-		void *data)
-{
-	struct nfs_pgio_header *hdr = data;
-
-	if (!pnfs_is_valid_lseg(hdr->lseg))
-		rpc_exit(task, -EAGAIN);
-}
-
 static void ff_layout_read_call_done(struct rpc_task *task, void *data)
 {
 	struct nfs_pgio_header *hdr = data;
@@ -1740,7 +1727,6 @@ static void ff_layout_commit_release(void *data)
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 	.rpc_call_prepare = ff_layout_read_prepare_v3,
-	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_read_call_done,
 	.rpc_count_stats = ff_layout_read_count_stats,
 	.rpc_release = ff_layout_read_release,
@@ -1748,7 +1734,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 	.rpc_call_prepare = ff_layout_read_prepare_v4,
-	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_read_call_done,
 	.rpc_count_stats = ff_layout_read_count_stats,
 	.rpc_release = ff_layout_read_release,
@@ -1756,7 +1741,6 @@ static const struct rpc_call_ops ff_layout_read_call_ops_v4 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 	.rpc_call_prepare = ff_layout_write_prepare_v3,
-	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_write_call_done,
 	.rpc_count_stats = ff_layout_write_count_stats,
 	.rpc_release = ff_layout_write_release,
@@ -1764,7 +1748,6 @@ static const struct rpc_call_ops ff_layout_write_call_ops_v3 = {
 
 static const struct rpc_call_ops ff_layout_write_call_ops_v4 = {
 	.rpc_call_prepare = ff_layout_write_prepare_v4,
-	.rpc_call_prepare_transmit = ff_layout_io_prepare_transmit,
 	.rpc_call_done = ff_layout_write_call_done,
 	.rpc_count_stats = ff_layout_write_count_stats,
 	.rpc_release = ff_layout_write_release,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index baa3ecdb882f..27536b961552 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -98,7 +98,6 @@ typedef void			(*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
 	void (*rpc_call_prepare)(struct rpc_task *, void *);
-	void (*rpc_call_prepare_transmit)(struct rpc_task *, void *);
 	void (*rpc_call_done)(struct rpc_task *, void *);
 	void (*rpc_count_stats)(struct rpc_task *, void *);
 	void (*rpc_release)(void *);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 783748dc5e6f..2e71f5455c6c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1408,13 +1408,6 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 			status = -EBADMSG;
 			goto out_dequeue;
 		}
-		if (task->tk_ops->rpc_call_prepare_transmit) {
-			task->tk_ops->rpc_call_prepare_transmit(task,
-					task->tk_calldata);
-			status = task->tk_status;
-			if (status < 0)
-				goto out_dequeue;
-		}
 		if (RPC_SIGNALLED(task)) {
 			status = -ERESTARTSYS;
 			goto out_dequeue;
-- 
cgit v1.2.3


From 4f8116c85057239ff37519debdd5d45b38ad8130 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:44:57 +0300
Subject: net: sched: protect block offload-related fields with rw_semaphore

In order to remove dependency on rtnl lock, extend tcf_block with 'cb_lock'
rwsem and use it to protect flow_block->cb_list and related counters from
concurrent modification. The lock is taken in read mode for read-only
traversal of cb_list in tc_setup_cb_call() and write mode in all other
cases. This approach ensures that:

- cb_list is not changed concurrently while filters is being offloaded on
  block.

- block->nooffloaddevcnt is checked while holding the lock in read mode,
  but is only changed by bind/unbind code when holding the cb_lock in write
  mode to prevent concurrent modification.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 ++
 net/sched/cls_api.c       | 45 ++++++++++++++++++++++++++++++++++++---------
 2 files changed, 38 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d9f359af0b93..a3eaf5f9d28f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -13,6 +13,7 @@
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 #include <net/flow_offload.h>
@@ -396,6 +397,7 @@ struct tcf_block {
 	refcount_t refcnt;
 	struct net *net;
 	struct Qdisc *q;
+	struct rw_semaphore cb_lock; /* protects cb_list and offload counters */
 	struct flow_block flow_block;
 	struct list_head owner_list;
 	bool keep_dst;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e0d8b456e9f5..959b7ca1ca02 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -568,9 +568,11 @@ static void tc_indr_block_ing_cmd(struct net_device *dev,
 
 	bo.block = &block->flow_block;
 
+	down_write(&block->cb_lock);
 	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
 
 	tcf_block_setup(block, &bo);
+	up_write(&block->cb_lock);
 }
 
 static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
@@ -661,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
 
+	down_write(&block->cb_lock);
 	if (!dev->netdev_ops->ndo_setup_tc)
 		goto no_offload_dev_inc;
 
@@ -669,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	 */
 	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
 		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto err_unlock;
 	}
 
 	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_inc;
 	if (err)
-		return err;
+		goto err_unlock;
 
 	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
+	up_write(&block->cb_lock);
 	return 0;
 
 no_offload_dev_inc:
-	if (tcf_block_offload_in_use(block))
-		return -EOPNOTSUPP;
+	if (tcf_block_offload_in_use(block)) {
+		err = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+	err = 0;
 	block->nooffloaddevcnt++;
 	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
-	return 0;
+err_unlock:
+	up_write(&block->cb_lock);
+	return err;
 }
 
 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
@@ -695,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 	struct net_device *dev = q->dev_queue->dev;
 	int err;
 
+	down_write(&block->cb_lock);
 	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 
 	if (!dev->netdev_ops->ndo_setup_tc)
@@ -702,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
+	up_write(&block->cb_lock);
 	return;
 
 no_offload_dev_dec:
 	WARN_ON(block->nooffloaddevcnt-- == 0);
+	up_write(&block->cb_lock);
 }
 
 static int
@@ -820,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		return ERR_PTR(-ENOMEM);
 	}
 	mutex_init(&block->lock);
+	init_rwsem(&block->cb_lock);
 	flow_block_init(&block->flow_block);
 	INIT_LIST_HEAD(&block->chain_list);
 	INIT_LIST_HEAD(&block->owner_list);
@@ -1355,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
 	struct tcf_proto *tp, *tp_prev;
 	int err;
 
+	lockdep_assert_held(&block->cb_lock);
+
 	for (chain = __tcf_get_next_chain(block, NULL);
 	     chain;
 	     chain_prev = chain,
@@ -1393,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block,
 	struct flow_block_cb *block_cb, *next;
 	int err, i = 0;
 
+	lockdep_assert_held(&block->cb_lock);
+
 	list_for_each_entry(block_cb, &bo->cb_list, list) {
 		err = tcf_block_playback_offloads(block, block_cb->cb,
 						  block_cb->cb_priv, true,
@@ -1427,6 +1445,8 @@ static void tcf_block_unbind(struct tcf_block *block,
 {
 	struct flow_block_cb *block_cb, *next;
 
+	lockdep_assert_held(&block->cb_lock);
+
 	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
 		tcf_block_playback_offloads(block, block_cb->cb,
 					    block_cb->cb_priv, false,
@@ -2987,19 +3007,26 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 	int ok_count = 0;
 	int err;
 
+	down_read(&block->cb_lock);
 	/* Make sure all netdevs sharing this block are offload-capable. */
-	if (block->nooffloaddevcnt && err_stop)
-		return -EOPNOTSUPP;
+	if (block->nooffloaddevcnt && err_stop) {
+		ok_count = -EOPNOTSUPP;
+		goto err_unlock;
+	}
 
 	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err) {
-			if (err_stop)
-				return err;
+			if (err_stop) {
+				ok_count = err;
+				goto err_unlock;
+			}
 		} else {
 			ok_count++;
 		}
 	}
+err_unlock:
+	up_read(&block->cb_lock);
 	return ok_count;
 }
 EXPORT_SYMBOL(tc_setup_cb_call);
-- 
cgit v1.2.3


From 97394bef5622cb32fd1e5d152251090da6c238b9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:44:58 +0300
Subject: net: sched: change tcf block offload counter type to atomic_t

As a preparation for running proto ops functions without rtnl lock, change
offload counter type to atomic. This is necessary to allow updating the
counter by multiple concurrent users when offloading filters to hardware
from unlocked classifiers.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 7 ++++---
 net/sched/cls_api.c       | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3eaf5f9d28f..d778c502decd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -14,6 +14,7 @@
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
+#include <linux/atomic.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 #include <net/flow_offload.h>
@@ -401,7 +402,7 @@ struct tcf_block {
 	struct flow_block flow_block;
 	struct list_head owner_list;
 	bool keep_dst;
-	unsigned int offloadcnt; /* Number of oddloaded filters */
+	atomic_t offloadcnt; /* Number of oddloaded filters */
 	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
 	struct {
 		struct tcf_chain *chain;
@@ -443,7 +444,7 @@ static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
 	if (*flags & TCA_CLS_FLAGS_IN_HW)
 		return;
 	*flags |= TCA_CLS_FLAGS_IN_HW;
-	block->offloadcnt++;
+	atomic_inc(&block->offloadcnt);
 }
 
 static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
@@ -451,7 +452,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
 	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
 		return;
 	*flags &= ~TCA_CLS_FLAGS_IN_HW;
-	block->offloadcnt--;
+	atomic_dec(&block->offloadcnt);
 }
 
 static inline void
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 959b7ca1ca02..f2c2f8159e35 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -629,7 +629,7 @@ static void tc_indr_block_call(struct tcf_block *block,
 
 static bool tcf_block_offload_in_use(struct tcf_block *block)
 {
-	return block->offloadcnt;
+	return atomic_read(&block->offloadcnt);
 }
 
 static int tcf_block_offload_cmd(struct tcf_block *block,
-- 
cgit v1.2.3


From 401192113730947572d280ec465555ab9ff5a597 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:44:59 +0300
Subject: net: sched: refactor block offloads counter usage

Without rtnl lock protection filters can no longer safely manage block
offloads counter themselves. Refactor cls API to protect block offloadcnt
with tcf_block->cb_lock that is already used to protect driver callback
list and nooffloaddevcnt counter. The counter can be modified by concurrent
tasks by new functions that execute block callbacks (which is safe with
previous patch that changed its type to atomic_t), however, block
bind/unbind code that checks the counter value takes cb_lock in write mode
to exclude any concurrent modifications. This approach prevents race
conditions between bind/unbind and callback execution code but allows for
concurrency for tc rule update path.

Move block offload counter, filter in hardware counter and filter flags
management from classifiers into cls hardware offloads API. Make functions
tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API
private. Implement following new cls API to be used instead:

  tc_setup_cb_add() - non-destructive filter add. If filter that wasn't
  already in hardware is successfully offloaded, increment block offloads
  counter, set filter in hardware counter and flag. On failure, previously
  offloaded filter is considered to be intact and offloads counter is not
  decremented.

  tc_setup_cb_replace() - destructive filter replace. Release existing
  filter block offload counter and reset its in hardware counter and flag.
  Set new filter in hardware counter and flag. On failure, previously
  offloaded filter is considered to be destroyed and offload counter is
  decremented.

  tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block
  offloads counter.

  tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and
  call tc_cls_offload_cnt_update() if cb() didn't return an error.

Refactor all offload-capable classifiers to atomically offload filters to
hardware, change block offload counter, and set filter in hardware counter
and flag by means of the new cls API functions.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h     |  17 ++++-
 include/net/sch_generic.h |  31 --------
 net/sched/cls_api.c       | 176 ++++++++++++++++++++++++++++++++++++++++++----
 net/sched/cls_bpf.c       |  38 +++++-----
 net/sched/cls_flower.c    |  38 ++++------
 net/sched/cls_matchall.c  |  27 +++----
 net/sched/cls_u32.c       |  29 ++++----
 7 files changed, 233 insertions(+), 123 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 64999ffcb486..612232492f67 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -506,7 +506,22 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts);
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
-		     void *type_data, bool err_stop);
+		     void *type_data, bool err_stop, bool rtnl_held);
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
+		    enum tc_setup_type type, void *type_data, bool err_stop,
+		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held);
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *old_flags, unsigned int *old_in_hw_count,
+			u32 *new_flags, unsigned int *new_in_hw_count,
+			bool rtnl_held);
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *flags, unsigned int *in_hw_count, bool rtnl_held);
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
+			  bool add, flow_setup_cb_t *cb,
+			  enum tc_setup_type type, void *type_data,
+			  void *cb_priv, u32 *flags, unsigned int *in_hw_count);
 unsigned int tcf_exts_num_actions(struct tcf_exts *exts);
 
 struct tc_cls_u32_knode {
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index d778c502decd..f90e3b2a3065 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -439,37 +439,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp)
 #define tcf_proto_dereference(p, tp)					\
 	rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp))
 
-static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
-{
-	if (*flags & TCA_CLS_FLAGS_IN_HW)
-		return;
-	*flags |= TCA_CLS_FLAGS_IN_HW;
-	atomic_inc(&block->offloadcnt);
-}
-
-static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
-{
-	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
-		return;
-	*flags &= ~TCA_CLS_FLAGS_IN_HW;
-	atomic_dec(&block->offloadcnt);
-}
-
-static inline void
-tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt,
-			  u32 *flags, bool add)
-{
-	if (add) {
-		if (!*cnt)
-			tcf_block_offload_inc(block, flags);
-		(*cnt)++;
-	} else {
-		(*cnt)--;
-		if (!*cnt)
-			tcf_block_offload_dec(block, flags);
-	}
-}
-
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f2c2f8159e35..6e612984e4a6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3000,37 +3000,185 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 }
 EXPORT_SYMBOL(tcf_exts_dump_stats);
 
-int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
-		     void *type_data, bool err_stop)
+static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
+{
+	if (*flags & TCA_CLS_FLAGS_IN_HW)
+		return;
+	*flags |= TCA_CLS_FLAGS_IN_HW;
+	atomic_inc(&block->offloadcnt);
+}
+
+static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+		return;
+	*flags &= ~TCA_CLS_FLAGS_IN_HW;
+	atomic_dec(&block->offloadcnt);
+}
+
+static void tc_cls_offload_cnt_update(struct tcf_block *block,
+				      struct tcf_proto *tp, u32 *cnt,
+				      u32 *flags, u32 diff, bool add)
+{
+	lockdep_assert_held(&block->cb_lock);
+
+	spin_lock(&tp->lock);
+	if (add) {
+		if (!*cnt)
+			tcf_block_offload_inc(block, flags);
+		*cnt += diff;
+	} else {
+		*cnt -= diff;
+		if (!*cnt)
+			tcf_block_offload_dec(block, flags);
+	}
+	spin_unlock(&tp->lock);
+}
+
+static void
+tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
+			 u32 *cnt, u32 *flags)
+{
+	lockdep_assert_held(&block->cb_lock);
+
+	spin_lock(&tp->lock);
+	tcf_block_offload_dec(block, flags);
+	*cnt = 0;
+	spin_unlock(&tp->lock);
+}
+
+static int
+__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+		   void *type_data, bool err_stop)
 {
 	struct flow_block_cb *block_cb;
 	int ok_count = 0;
 	int err;
 
-	down_read(&block->cb_lock);
-	/* Make sure all netdevs sharing this block are offload-capable. */
-	if (block->nooffloaddevcnt && err_stop) {
-		ok_count = -EOPNOTSUPP;
-		goto err_unlock;
-	}
-
 	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
 		err = block_cb->cb(type, type_data, block_cb->cb_priv);
 		if (err) {
-			if (err_stop) {
-				ok_count = err;
-				goto err_unlock;
-			}
+			if (err_stop)
+				return err;
 		} else {
 			ok_count++;
 		}
 	}
-err_unlock:
+	return ok_count;
+}
+
+int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
+		     void *type_data, bool err_stop, bool rtnl_held)
+{
+	int ok_count;
+
+	down_read(&block->cb_lock);
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
 	up_read(&block->cb_lock);
 	return ok_count;
 }
 EXPORT_SYMBOL(tc_setup_cb_call);
 
+/* Non-destructive filter add. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offloads counter. On failure,
+ * previously offloaded filter is considered to be intact and offloads counter
+ * is not decremented.
+ */
+
+int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
+		    enum tc_setup_type type, void *type_data, bool err_stop,
+		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+	int ok_count;
+
+	down_read(&block->cb_lock);
+	/* Make sure all netdevs sharing this block are offload-capable. */
+	if (block->nooffloaddevcnt && err_stop) {
+		ok_count = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count > 0)
+		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
+					  ok_count, true);
+err_unlock:
+	up_read(&block->cb_lock);
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_add);
+
+/* Destructive filter replace. If filter that wasn't already in hardware is
+ * successfully offloaded, increment block offload counter. On failure,
+ * previously offloaded filter is considered to be destroyed and offload counter
+ * is decremented.
+ */
+
+int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *old_flags, unsigned int *old_in_hw_count,
+			u32 *new_flags, unsigned int *new_in_hw_count,
+			bool rtnl_held)
+{
+	int ok_count;
+
+	down_read(&block->cb_lock);
+	/* Make sure all netdevs sharing this block are offload-capable. */
+	if (block->nooffloaddevcnt && err_stop) {
+		ok_count = -EOPNOTSUPP;
+		goto err_unlock;
+	}
+
+	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
+
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count > 0)
+		tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags,
+					  ok_count, true);
+err_unlock:
+	up_read(&block->cb_lock);
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_replace);
+
+/* Destroy filter and decrement block offload counter, if filter was previously
+ * offloaded.
+ */
+
+int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
+			enum tc_setup_type type, void *type_data, bool err_stop,
+			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
+{
+	int ok_count;
+
+	down_read(&block->cb_lock);
+	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+
+	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
+	up_read(&block->cb_lock);
+	return ok_count < 0 ? ok_count : 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_destroy);
+
+int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
+			  bool add, flow_setup_cb_t *cb,
+			  enum tc_setup_type type, void *type_data,
+			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
+{
+	int err = cb(type, type_data, cb_priv);
+
+	if (err) {
+		if (add && tc_skip_sw(*flags))
+			return err;
+	} else {
+		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
+					  add);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(tc_setup_cb_reoffload);
+
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts)
 {
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 3f7a9c02b70c..bf10bdaf5012 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 	cls_bpf.exts_integrated = obj->exts_integrated;
 
 	if (oldprog)
-		tcf_block_offload_dec(block, &oldprog->gen_flags);
+		err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+					  skip_sw, &oldprog->gen_flags,
+					  &oldprog->in_hw_count,
+					  &prog->gen_flags, &prog->in_hw_count,
+					  true);
+	else
+		err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+				      skip_sw, &prog->gen_flags,
+				      &prog->in_hw_count, true);
 
-	err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
-	if (prog) {
-		if (err < 0) {
-			cls_bpf_offload_cmd(tp, oldprog, prog, extack);
-			return err;
-		} else if (err > 0) {
-			prog->in_hw_count = err;
-			tcf_block_offload_inc(block, &prog->gen_flags);
-		}
+	if (prog && err) {
+		cls_bpf_offload_cmd(tp, oldprog, prog, extack);
+		return err;
 	}
 
 	if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
@@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
 	cls_bpf.name = prog->bpf_name;
 	cls_bpf.exts_integrated = prog->exts_integrated;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true);
 }
 
 static int cls_bpf_init(struct tcf_proto *tp)
@@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb
 		cls_bpf.name = prog->bpf_name;
 		cls_bpf.exts_integrated = prog->exts_integrated;
 
-		err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv);
-		if (err) {
-			if (add && tc_skip_sw(prog->gen_flags))
-				return err;
-			continue;
-		}
-
-		tc_cls_offload_cnt_update(block, &prog->in_hw_count,
-					  &prog->gen_flags, add);
+		err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF,
+					    &cls_bpf, cb_priv, &prog->gen_flags,
+					    &prog->in_hw_count);
+		if (err)
+			return err;
 	}
 
 	return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 054123742e32..cb816bbbd376 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -419,10 +419,10 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
 	cls_flower.command = FLOW_CLS_DESTROY;
 	cls_flower.cookie = (unsigned long) f;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
+			    &f->flags, &f->in_hw_count, true);
 	spin_lock(&tp->lock);
 	list_del_init(&f->hw_list);
-	tcf_block_offload_dec(block, &f->flags);
 	spin_unlock(&tp->lock);
 
 	if (!rtnl_held)
@@ -466,18 +466,13 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		goto errout;
 	}
 
-	err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
+			      skip_sw, &f->flags, &f->in_hw_count, true);
 	kfree(cls_flower.rule);
 
-	if (err < 0) {
+	if (err) {
 		fl_hw_destroy_filter(tp, f, true, NULL);
 		goto errout;
-	} else if (err > 0) {
-		f->in_hw_count = err;
-		err = 0;
-		spin_lock(&tp->lock);
-		tcf_block_offload_inc(block, &f->flags);
-		spin_unlock(&tp->lock);
 	}
 
 	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
@@ -509,7 +504,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
 	cls_flower.cookie = (unsigned long) f;
 	cls_flower.classid = f->res.classid;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
 
 	tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
 			      cls_flower.stats.pkts,
@@ -1844,21 +1839,16 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 
 		cls_flower.classid = f->res.classid;
 
-		err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+		err = tc_setup_cb_reoffload(block, tp, add, cb,
+					    TC_SETUP_CLSFLOWER, &cls_flower,
+					    cb_priv, &f->flags,
+					    &f->in_hw_count);
 		kfree(cls_flower.rule);
 
 		if (err) {
-			if (add && tc_skip_sw(f->flags)) {
-				__fl_put(f);
-				return err;
-			}
-			goto next_flow;
+			__fl_put(f);
+			return err;
 		}
-
-		spin_lock(&tp->lock);
-		tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags,
-					  add);
-		spin_unlock(&tp->lock);
 next_flow:
 		__fl_put(f);
 	}
@@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain,
 	/* We don't care if driver (any of them) fails to handle this
 	 * call. It serves just as a hint for it.
 	 */
-	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
 	kfree(cls_flower.rule);
 
 	return 0;
@@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
 	cls_flower.command = FLOW_CLS_TMPLT_DESTROY;
 	cls_flower.cookie = (unsigned long) tmplt;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true);
 }
 
 static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 455ea2793f9b..911d1ea28bb2 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
 	cls_mall.command = TC_CLSMATCHALL_DESTROY;
 	cls_mall.cookie = cookie;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
-	tcf_block_offload_dec(block, &head->flags);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false,
+			    &head->flags, &head->in_hw_count, true);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
@@ -109,15 +109,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 		return err;
 	}
 
-	err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw);
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall,
+			      skip_sw, &head->flags, &head->in_hw_count, true);
 	kfree(cls_mall.rule);
 
-	if (err < 0) {
+	if (err) {
 		mall_destroy_hw_filter(tp, head, cookie, NULL);
 		return err;
-	} else if (err > 0) {
-		head->in_hw_count = err;
-		tcf_block_offload_inc(block, &head->flags);
 	}
 
 	if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
@@ -312,16 +310,13 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 		return 0;
 	}
 
-	err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv);
+	err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL,
+				    &cls_mall, cb_priv, &head->flags,
+				    &head->in_hw_count);
 	kfree(cls_mall.rule);
 
-	if (err) {
-		if (add && tc_skip_sw(head->flags))
-			return err;
-		return 0;
-	}
-
-	tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add);
+	if (err)
+		return err;
 
 	return 0;
 }
@@ -337,7 +332,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
 	cls_mall.command = TC_CLSMATCHALL_STATS;
 	cls_mall.cookie = cookie;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
 
 	tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes,
 			      cls_mall.stats.pkts, cls_mall.stats.lastused);
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 8614088edd1b..a0e6fac613de 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 	cls_u32.hnode.handle = h->handle;
 	cls_u32.hnode.prio = h->prio;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
+	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true);
 }
 
 static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
@@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 	cls_u32.hnode.handle = h->handle;
 	cls_u32.hnode.prio = h->prio;
 
-	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true);
 	if (err < 0) {
 		u32_clear_hw_hnode(tp, h, NULL);
 		return err;
@@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	cls_u32.command = TC_CLSU32_DELETE_KNODE;
 	cls_u32.knode.handle = n->handle;
 
-	tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false);
-	tcf_block_offload_dec(block, &n->flags);
+	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false,
+			    &n->flags, &n->in_hw_count, true);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
@@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 	if (n->ht_down)
 		cls_u32.knode.link_handle = ht->handle;
 
-	err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw);
-	if (err < 0) {
+	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw,
+			      &n->flags, &n->in_hw_count, true);
+	if (err) {
 		u32_remove_hw_knode(tp, n, NULL);
 		return err;
-	} else if (err > 0) {
-		n->in_hw_count = err;
-		tcf_block_offload_inc(block, &n->flags);
 	}
 
 	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 			cls_u32.knode.link_handle = ht->handle;
 	}
 
-	err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv);
-	if (err) {
-		if (add && tc_skip_sw(n->flags))
-			return err;
-		return 0;
-	}
-
-	tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add);
+	err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32,
+				    &cls_u32, cb_priv, &n->flags,
+				    &n->in_hw_count);
+	if (err)
+		return err;
 
 	return 0;
 }
-- 
cgit v1.2.3


From a449a3e77a85fc8b31fef7238451dc87af8ff1af Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:45:00 +0300
Subject: net: sched: notify classifier on successful offload add/delete

To remove dependency on rtnl lock, extend classifier ops with new
ops->hw_add() and ops->hw_del() callbacks. Call them from cls API while
holding cb_lock every time filter if successfully added to or deleted from
hardware.

Implement the new API in flower classifier. Use it to manage hw_filters
list under cb_lock protection, instead of relying on rtnl lock to
synchronize with concurrent fl_reoffload() call.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  4 ++++
 net/sched/cls_api.c       | 19 +++++++++++++++++--
 net/sched/cls_flower.c    | 33 ++++++++++++++++++++++++++-------
 3 files changed, 47 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index f90e3b2a3065..c4fbbaff30a2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -312,6 +312,10 @@ struct tcf_proto_ops {
 	int			(*reoffload)(struct tcf_proto *tp, bool add,
 					     flow_setup_cb_t *cb, void *cb_priv,
 					     struct netlink_ext_ack *extack);
+	void			(*hw_add)(struct tcf_proto *tp,
+					  void *type_data);
+	void			(*hw_del)(struct tcf_proto *tp,
+					  void *type_data);
 	void			(*bind_class)(void *, u32, unsigned long);
 	void *			(*tmplt_create)(struct net *net,
 						struct tcf_chain *chain,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6e612984e4a6..8b807e75fae2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3099,6 +3099,11 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
 	}
 
 	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count < 0)
+		goto err_unlock;
+
+	if (tp->ops->hw_add)
+		tp->ops->hw_add(tp, type_data);
 	if (ok_count > 0)
 		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
 					  ok_count, true);
@@ -3130,11 +3135,18 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
 	}
 
 	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
+	if (tp->ops->hw_del)
+		tp->ops->hw_del(tp, type_data);
 
 	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
+	if (ok_count < 0)
+		goto err_unlock;
+
+	if (tp->ops->hw_add)
+		tp->ops->hw_add(tp, type_data);
 	if (ok_count > 0)
-		tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags,
-					  ok_count, true);
+		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
+					  new_flags, ok_count, true);
 err_unlock:
 	up_read(&block->cb_lock);
 	return ok_count < 0 ? ok_count : 0;
@@ -3155,6 +3167,9 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
 	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
 
 	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
+	if (tp->ops->hw_del)
+		tp->ops->hw_del(tp, type_data);
+
 	up_read(&block->cb_lock);
 	return ok_count < 0 ? ok_count : 0;
 }
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index cb816bbbd376..5cb694469b51 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -421,9 +421,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
 
 	tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false,
 			    &f->flags, &f->in_hw_count, true);
-	spin_lock(&tp->lock);
-	list_del_init(&f->hw_list);
-	spin_unlock(&tp->lock);
 
 	if (!rtnl_held)
 		rtnl_unlock();
@@ -433,7 +430,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 				struct cls_fl_filter *f, bool rtnl_held,
 				struct netlink_ext_ack *extack)
 {
-	struct cls_fl_head *head = fl_head_dereference(tp);
 	struct tcf_block *block = tp->chain->block;
 	struct flow_cls_offload cls_flower = {};
 	bool skip_sw = tc_skip_sw(f->flags);
@@ -480,9 +476,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 		goto errout;
 	}
 
-	spin_lock(&tp->lock);
-	list_add(&f->hw_list, &head->hw_filters);
-	spin_unlock(&tp->lock);
 errout:
 	if (!rtnl_held)
 		rtnl_unlock();
@@ -1856,6 +1849,30 @@ next_flow:
 	return 0;
 }
 
+static void fl_hw_add(struct tcf_proto *tp, void *type_data)
+{
+	struct flow_cls_offload *cls_flower = type_data;
+	struct cls_fl_filter *f =
+		(struct cls_fl_filter *) cls_flower->cookie;
+	struct cls_fl_head *head = fl_head_dereference(tp);
+
+	spin_lock(&tp->lock);
+	list_add(&f->hw_list, &head->hw_filters);
+	spin_unlock(&tp->lock);
+}
+
+static void fl_hw_del(struct tcf_proto *tp, void *type_data)
+{
+	struct flow_cls_offload *cls_flower = type_data;
+	struct cls_fl_filter *f =
+		(struct cls_fl_filter *) cls_flower->cookie;
+
+	spin_lock(&tp->lock);
+	if (!list_empty(&f->hw_list))
+		list_del_init(&f->hw_list);
+	spin_unlock(&tp->lock);
+}
+
 static int fl_hw_create_tmplt(struct tcf_chain *chain,
 			      struct fl_flow_tmplt *tmplt)
 {
@@ -2516,6 +2533,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.delete		= fl_delete,
 	.walk		= fl_walk,
 	.reoffload	= fl_reoffload,
+	.hw_add		= fl_hw_add,
+	.hw_del		= fl_hw_del,
 	.dump		= fl_dump,
 	.bind_class	= fl_bind_class,
 	.tmplt_create	= fl_tmplt_create,
-- 
cgit v1.2.3


From c9f14470d04830de217f9d28fcd0deffd7e8c0b1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:45:01 +0300
Subject: net: sched: add API for registering unlocked offload block callbacks

Extend struct flow_block_offload with "unlocked_driver_cb" flag to allow
registering and unregistering block hardware offload callbacks that do not
require caller to hold rtnl lock. Extend tcf_block with additional
lockeddevcnt counter that is incremented for each non-unlocked driver
callback attached to device. This counter is necessary to conditionally
obtain rtnl lock before calling hardware callbacks in following patches.

Register mlx5 tc block offload callbacks as "unlocked".

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c  | 3 +++
 include/net/flow_offload.h                        | 1 +
 include/net/sch_generic.h                         | 1 +
 net/sched/cls_api.c                               | 6 ++++++
 5 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fa4bf2d4bcd4..8592b98d0e70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3470,10 +3470,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			  void *type_data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct flow_block_offload *f = type_data;
 
 	switch (type) {
 #ifdef CONFIG_MLX5_ESWITCH
 	case TC_SETUP_BLOCK:
+		f->unlocked_driver_cb = true;
 		return flow_block_cb_setup_simple(type_data,
 						  &mlx5e_block_cb_list,
 						  mlx5e_setup_tc_block_cb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 3c0d36b2b91c..e7ac6233037d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -763,6 +763,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev,
 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
+	f->unlocked_driver_cb = true;
 	f->driver_block_list = &mlx5e_block_cb_list;
 
 	switch (f->command) {
@@ -1245,9 +1246,11 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct flow_block_offload *f = type_data;
 
 	switch (type) {
 	case TC_SETUP_BLOCK:
+		f->unlocked_driver_cb = true;
 		return flow_block_cb_setup_simple(type_data,
 						  &mlx5e_rep_block_cb_list,
 						  mlx5e_rep_setup_tc_cb,
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 757fa84de654..fc881875f856 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -284,6 +284,7 @@ struct flow_block_offload {
 	enum flow_block_command command;
 	enum flow_block_binder_type binder_type;
 	bool block_shared;
+	bool unlocked_driver_cb;
 	struct net *net;
 	struct flow_block *block;
 	struct list_head cb_list;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c4fbbaff30a2..43f5b7ed02bd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -408,6 +408,7 @@ struct tcf_block {
 	bool keep_dst;
 	atomic_t offloadcnt; /* Number of oddloaded filters */
 	unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
+	unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
 	struct {
 		struct tcf_chain *chain;
 		struct list_head filter_chain_list;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8b807e75fae2..1a39779bdbad 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1418,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block,
 						  bo->extack);
 		if (err)
 			goto err_unroll;
+		if (!bo->unlocked_driver_cb)
+			block->lockeddevcnt++;
 
 		i++;
 	}
@@ -1433,6 +1435,8 @@ err_unroll:
 						    block_cb->cb_priv, false,
 						    tcf_block_offload_in_use(block),
 						    NULL);
+			if (!bo->unlocked_driver_cb)
+				block->lockeddevcnt--;
 		}
 		flow_block_cb_free(block_cb);
 	}
@@ -1454,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block,
 					    NULL);
 		list_del(&block_cb->list);
 		flow_block_cb_free(block_cb);
+		if (!bo->unlocked_driver_cb)
+			block->lockeddevcnt--;
 	}
 }
 
-- 
cgit v1.2.3


From 9838b20a7fb28c69fa66ac8e68d967ffe1d0ecad Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:45:03 +0300
Subject: net: sched: take rtnl lock in tc_setup_flow_action()

In order to allow using new flow_action infrastructure from unlocked
classifiers, modify tc_setup_flow_action() to accept new 'rtnl_held'
argument. Take rtnl lock before accessing tc_action data. This is necessary
to protect from concurrent action replace.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h    |  2 +-
 net/sched/cls_api.c      | 17 +++++++++++++----
 net/sched/cls_flower.c   |  6 ++++--
 net/sched/cls_matchall.c |  4 ++--
 4 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 612232492f67..a48824bc1489 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -504,7 +504,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 }
 
 int tc_setup_flow_action(struct flow_action *flow_action,
-			 const struct tcf_exts *exts);
+			 const struct tcf_exts *exts, bool rtnl_held);
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop, bool rtnl_held);
 int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3c103cf9fd0d..8751bb8a682f 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3266,14 +3266,17 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
 EXPORT_SYMBOL(tc_setup_cb_reoffload);
 
 int tc_setup_flow_action(struct flow_action *flow_action,
-			 const struct tcf_exts *exts)
+			 const struct tcf_exts *exts, bool rtnl_held)
 {
 	const struct tc_action *act;
-	int i, j, k;
+	int i, j, k, err = 0;
 
 	if (!exts)
 		return 0;
 
+	if (!rtnl_held)
+		rtnl_lock();
+
 	j = 0;
 	tcf_exts_for_each_action(i, act, exts) {
 		struct flow_action_entry *entry;
@@ -3318,6 +3321,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 				entry->vlan.prio = tcf_vlan_push_prio(act);
 				break;
 			default:
+				err = -EOPNOTSUPP;
 				goto err_out;
 			}
 		} else if (is_tcf_tunnel_set(act)) {
@@ -3335,6 +3339,7 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 					entry->id = FLOW_ACTION_ADD;
 					break;
 				default:
+					err = -EOPNOTSUPP;
 					goto err_out;
 				}
 				entry->mangle.htype = tcf_pedit_htype(act, k);
@@ -3393,15 +3398,19 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->id = FLOW_ACTION_PTYPE;
 			entry->ptype = tcf_skbedit_ptype(act);
 		} else {
+			err = -EOPNOTSUPP;
 			goto err_out;
 		}
 
 		if (!is_tcf_pedit(act))
 			j++;
 	}
-	return 0;
+
 err_out:
-	return -EOPNOTSUPP;
+	if (!rtnl_held)
+		rtnl_unlock();
+
+	return err;
 }
 EXPORT_SYMBOL(tc_setup_flow_action);
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5cb694469b51..fb305bd45d93 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -452,7 +452,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 	cls_flower.rule->match.key = &f->mkey;
 	cls_flower.classid = f->res.classid;
 
-	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+	err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+				   true);
 	if (err) {
 		kfree(cls_flower.rule);
 		if (skip_sw)
@@ -1819,7 +1820,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 		cls_flower.rule->match.mask = &f->mask->key;
 		cls_flower.rule->match.key = &f->mkey;
 
-		err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+		err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts,
+					   true);
 		if (err) {
 			kfree(cls_flower.rule);
 			if (tc_skip_sw(f->flags)) {
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 911d1ea28bb2..3266f25011cc 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
 	cls_mall.command = TC_CLSMATCHALL_REPLACE;
 	cls_mall.cookie = cookie;
 
-	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
+	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
 	if (err) {
 		kfree(cls_mall.rule);
 		mall_destroy_hw_filter(tp, head, cookie, NULL);
@@ -300,7 +300,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 		TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY;
 	cls_mall.cookie = (unsigned long)head;
 
-	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts);
+	err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true);
 	if (err) {
 		kfree(cls_mall.rule);
 		if (add && tc_skip_sw(head->flags)) {
-- 
cgit v1.2.3


From 5a6ff4b13d598573fc954f672cd2a267b76a01ec Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:45:04 +0300
Subject: net: sched: take reference to action dev before calling offloads

In order to remove dependency on rtnl lock when calling hardware offload
API, take reference to action mirred dev when initializing flow_action
structure in tc_setup_flow_action(). Implement function
tc_cleanup_flow_action(), use it to release the device after hardware
offload API is done using it.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_cls.h  |  2 ++
 net/sched/cls_api.c    | 32 ++++++++++++++++++++++++++++++++
 net/sched/cls_flower.c |  2 ++
 3 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index a48824bc1489..e553fc80eb23 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -505,6 +505,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
 
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts, bool rtnl_held);
+void tc_cleanup_flow_action(struct flow_action *flow_action);
+
 int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
 		     void *type_data, bool err_stop, bool rtnl_held);
 int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 8751bb8a682f..d988737693e4 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3265,6 +3265,27 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
 }
 EXPORT_SYMBOL(tc_setup_cb_reoffload);
 
+void tc_cleanup_flow_action(struct flow_action *flow_action)
+{
+	struct flow_action_entry *entry;
+	int i;
+
+	flow_action_for_each(i, entry, flow_action) {
+		switch (entry->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED:
+		case FLOW_ACTION_REDIRECT_INGRESS:
+		case FLOW_ACTION_MIRRED_INGRESS:
+			if (entry->dev)
+				dev_put(entry->dev);
+			break;
+		default:
+			break;
+		}
+	}
+}
+EXPORT_SYMBOL(tc_cleanup_flow_action);
+
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts, bool rtnl_held)
 {
@@ -3294,15 +3315,23 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 		} else if (is_tcf_mirred_egress_redirect(act)) {
 			entry->id = FLOW_ACTION_REDIRECT;
 			entry->dev = tcf_mirred_dev(act);
+			if (entry->dev)
+				dev_hold(entry->dev);
 		} else if (is_tcf_mirred_egress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED;
 			entry->dev = tcf_mirred_dev(act);
+			if (entry->dev)
+				dev_hold(entry->dev);
 		} else if (is_tcf_mirred_ingress_redirect(act)) {
 			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
 			entry->dev = tcf_mirred_dev(act);
+			if (entry->dev)
+				dev_hold(entry->dev);
 		} else if (is_tcf_mirred_ingress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED_INGRESS;
 			entry->dev = tcf_mirred_dev(act);
+			if (entry->dev)
+				dev_hold(entry->dev);
 		} else if (is_tcf_vlan(act)) {
 			switch (tcf_vlan_action(act)) {
 			case TCA_VLAN_ACT_PUSH:
@@ -3410,6 +3439,9 @@ err_out:
 	if (!rtnl_held)
 		rtnl_unlock();
 
+	if (err)
+		tc_cleanup_flow_action(flow_action);
+
 	return err;
 }
 EXPORT_SYMBOL(tc_setup_flow_action);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index fb305bd45d93..2852fe6f50d2 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -465,6 +465,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
 
 	err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower,
 			      skip_sw, &f->flags, &f->in_hw_count, true);
+	tc_cleanup_flow_action(&cls_flower.rule->action);
 	kfree(cls_flower.rule);
 
 	if (err) {
@@ -1838,6 +1839,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
 					    TC_SETUP_CLSFLOWER, &cls_flower,
 					    cb_priv, &f->flags,
 					    &f->in_hw_count);
+		tc_cleanup_flow_action(&cls_flower.rule->action);
 		kfree(cls_flower.rule);
 
 		if (err) {
-- 
cgit v1.2.3


From 1444c175a37443d3f6d3db825df050741452c3c3 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 26 Aug 2019 16:45:05 +0300
Subject: net: sched: copy tunnel info when setting flow_action entry->tunnel

In order to remove dependency on rtnl lock, modify tc_setup_flow_action()
to copy tunnel info, instead of just saving pointer to tunnel_key action
tunnel info. This is necessary to prevent concurrent action overwrite from
releasing tunnel info while it is being used by rtnl-unlocked driver.

Implement helper tcf_tunnel_info_copy() that is used to copy tunnel info
with all its options to dynamically allocated memory block. Modify
tc_cleanup_flow_action() to free dynamically allocated tunnel info.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_tunnel_key.h | 17 +++++++++++++++++
 net/sched/cls_api.c                |  9 ++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
index 7c3f777c168c..0689d9bcdf84 100644
--- a/include/net/tc_act/tc_tunnel_key.h
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -59,4 +59,21 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
 	return NULL;
 #endif
 }
+
+static inline struct ip_tunnel_info *
+tcf_tunnel_info_copy(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	struct ip_tunnel_info *tun = tcf_tunnel_info(a);
+
+	if (tun) {
+		size_t tun_size = sizeof(*tun) + tun->options_len;
+		struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size,
+							  GFP_KERNEL);
+
+		return tun_copy;
+	}
+#endif
+	return NULL;
+}
 #endif /* __NET_TC_TUNNEL_KEY_H */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d988737693e4..671ca905dbb5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3279,6 +3279,9 @@ void tc_cleanup_flow_action(struct flow_action *flow_action)
 			if (entry->dev)
 				dev_put(entry->dev);
 			break;
+		case FLOW_ACTION_TUNNEL_ENCAP:
+			kfree(entry->tunnel);
+			break;
 		default:
 			break;
 		}
@@ -3355,7 +3358,11 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			}
 		} else if (is_tcf_tunnel_set(act)) {
 			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
-			entry->tunnel = tcf_tunnel_info(act);
+			entry->tunnel = tcf_tunnel_info_copy(act);
+			if (!entry->tunnel) {
+				err = -ENOMEM;
+				goto err_out;
+			}
 		} else if (is_tcf_tunnel_release(act)) {
 			entry->id = FLOW_ACTION_TUNNEL_DECAP;
 		} else if (is_tcf_pedit(act)) {
-- 
cgit v1.2.3


From 24691069a348f82a95e0fa9697bb5656c6d8c48c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Aug 2019 10:53:43 +0900
Subject: rcu: Don't include <linux/ktime.h> in rcutiny.h

The kbuild reported a built failure due to a header loop when RCUTINY is
enabled with my pending riscv-nommu port.  Switch rcutiny.h to only
include the minimal required header to get HZ instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
---
 include/linux/rcutiny.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8e727f57d814..9bf1dfe7781f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -12,7 +12,7 @@
 #ifndef __LINUX_TINY_H
 #define __LINUX_TINY_H
 
-#include <linux/ktime.h>
+#include <asm/param.h> /* for HZ */
 
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
-- 
cgit v1.2.3


From df2e328126b1b3f71ff07477912d92c20c47148a Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Mon, 26 Aug 2019 20:00:40 -0700
Subject: firmware: ti_sci: Allow for device shared and exclusive requests

Sysfw provides an option for requesting exclusive access for a
device using the flags MSG_FLAG_DEVICE_EXCLUSIVE. If this flag is
not used, the device is meant to be shared across hosts. Once a device
is requested from a host with this flag set, any request to this
device from a different host will be nacked by sysfw. Current tisci
driver enables this flag for every device requests. But this may not
be true for all the devices. So provide a separate commands in driver
for exclusive and shared device requests.

Reviewed-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 drivers/firmware/ti_sci.c              | 45 ++++++++++++++++++++++++++++++++--
 include/linux/soc/ti/ti_sci_protocol.h |  3 +++
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index cdee0b45943d..4126be9e3216 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -635,6 +635,7 @@ fail:
 
 /**
  * ti_sci_cmd_get_device() - command to request for device managed by TISCI
+ *			     that can be shared with other hosts.
  * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
  * @id:		Device Identifier
  *
@@ -642,11 +643,29 @@ fail:
  * usage count by balancing get_device with put_device. No refcounting is
  * managed by driver for that purpose.
  *
- * NOTE: The request is for exclusive access for the processor.
- *
  * Return: 0 if all went fine, else return appropriate error.
  */
 static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id, 0,
+				       MSG_DEVICE_SW_STATE_ON);
+}
+
+/**
+ * ti_sci_cmd_get_device_exclusive() - command to request for device managed by
+ *				       TISCI that is exclusively owned by the
+ *				       requesting host.
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_get_device_exclusive(const struct ti_sci_handle *handle,
+					   u32 id)
 {
 	return ti_sci_set_device_state(handle, id,
 				       MSG_FLAG_DEVICE_EXCLUSIVE,
@@ -665,6 +684,26 @@ static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id)
  * Return: 0 if all went fine, else return appropriate error.
  */
 static int ti_sci_cmd_idle_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id, 0,
+				       MSG_DEVICE_SW_STATE_RETENTION);
+}
+
+/**
+ * ti_sci_cmd_idle_device_exclusive() - Command to idle a device managed by
+ *					TISCI that is exclusively owned by
+ *					requesting host.
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_idle_device_exclusive(const struct ti_sci_handle *handle,
+					    u32 id)
 {
 	return ti_sci_set_device_state(handle, id,
 				       MSG_FLAG_DEVICE_EXCLUSIVE,
@@ -2894,7 +2933,9 @@ static void ti_sci_setup_ops(struct ti_sci_info *info)
 	core_ops->reboot_device = ti_sci_cmd_core_reboot;
 
 	dops->get_device = ti_sci_cmd_get_device;
+	dops->get_device_exclusive = ti_sci_cmd_get_device_exclusive;
 	dops->idle_device = ti_sci_cmd_idle_device;
+	dops->idle_device_exclusive = ti_sci_cmd_idle_device_exclusive;
 	dops->put_device = ti_sci_cmd_put_device;
 
 	dops->is_valid = ti_sci_cmd_dev_is_valid;
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 6c610e188a44..9531ec823298 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -97,7 +97,10 @@ struct ti_sci_core_ops {
  */
 struct ti_sci_dev_ops {
 	int (*get_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*get_device_exclusive)(const struct ti_sci_handle *handle, u32 id);
 	int (*idle_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*idle_device_exclusive)(const struct ti_sci_handle *handle,
+				     u32 id);
 	int (*put_device)(const struct ti_sci_handle *handle, u32 id);
 	int (*is_valid)(const struct ti_sci_handle *handle, u32 id);
 	int (*get_context_loss_count)(const struct ti_sci_handle *handle,
-- 
cgit v1.2.3


From 9e465988228667b61e26ae7f68ca37a85821c86d Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Mon, 26 Aug 2019 20:00:40 -0700
Subject: dt-bindings: ti_sci_pm_domains: Add support for exclusive and shared
 access

TISCI protocol supports for enabling the device either with exclusive
permissions for the requesting host or with sharing across the hosts.
There are certain devices which are exclusive to Linux context and
there are certain devices that are shared across different host contexts.
So add support for getting this information from DT by increasing
the power-domain cells to 2.

Acked-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
---
 Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt | 11 +++++++++--
 MAINTAINERS                                                |  1 +
 include/dt-bindings/soc/ti,sci_pm_domain.h                 |  9 +++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 include/dt-bindings/soc/ti,sci_pm_domain.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
index f7b00a7c0f68..f541d1f776a2 100644
--- a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
+++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
@@ -19,8 +19,15 @@ child of the pmmc node.
 Required Properties:
 --------------------
 - compatible: should be "ti,sci-pm-domain"
-- #power-domain-cells: Must be 1 so that an id can be provided in each
-		       device node.
+- #power-domain-cells: Can be one of the following:
+			1: Containing the device id of each node
+			2: First entry should be device id
+			   Second entry should be one of the floowing:
+			   TI_SCI_PD_EXCLUSIVE: To allow device to be
+						exclusively controlled by
+						the requesting hosts.
+			   TI_SCI_PD_SHARED: To allow device to be shared
+					     by multiple hosts.
 
 Example (K2G):
 -------------
diff --git a/MAINTAINERS b/MAINTAINERS
index 6426db5198f0..fe7406427023 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15853,6 +15853,7 @@ F:	drivers/firmware/ti_sci*
 F:	include/linux/soc/ti/ti_sci_protocol.h
 F:	Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
 F:	drivers/soc/ti/ti_sci_pm_domains.c
+F:	include/dt-bindings/soc/ti,sci_pm_domain.h
 F:	Documentation/devicetree/bindings/reset/ti,sci-reset.txt
 F:	Documentation/devicetree/bindings/clock/ti,sci-clk.txt
 F:	drivers/clk/keystone/sci-clk.c
diff --git a/include/dt-bindings/soc/ti,sci_pm_domain.h b/include/dt-bindings/soc/ti,sci_pm_domain.h
new file mode 100644
index 000000000000..8f2a7360b65e
--- /dev/null
+++ b/include/dt-bindings/soc/ti,sci_pm_domain.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __DT_BINDINGS_TI_SCI_PM_DOMAIN_H
+#define __DT_BINDINGS_TI_SCI_PM_DOMAIN_H
+
+#define TI_SCI_PD_EXCLUSIVE	1
+#define TI_SCI_PD_SHARED	0
+
+#endif /* __DT_BINDINGS_TI_SCI_PM_DOMAIN_H */
-- 
cgit v1.2.3


From 987db9f7cd1e77e611b770a569068c43949aa6fd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 19 Aug 2019 09:25:38 +0100
Subject: rxrpc: Use the tx-phase skb flag to simplify tracing

Use the previously-added transmit-phase skbuff private flag to simplify the
socket buffer tracing a bit.  Which phase the skbuff comes from can now be
divined from the skb rather than having to be guessed from the call state.

We can also reduce the number of rxrpc_skb_trace values by eliminating the
difference between Tx and Rx in the symbols.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 51 +++++++++++++++++++-------------------------
 net/rxrpc/ar-internal.h      |  1 +
 net/rxrpc/call_event.c       |  8 +++----
 net/rxrpc/call_object.c      |  6 ++----
 net/rxrpc/conn_event.c       |  6 +++---
 net/rxrpc/input.c            | 22 +++++++++----------
 net/rxrpc/local_event.c      |  4 ++--
 net/rxrpc/output.c           |  6 +++---
 net/rxrpc/peer_event.c       | 10 ++++-----
 net/rxrpc/recvmsg.c          |  6 +++---
 net/rxrpc/sendmsg.c          | 10 ++++-----
 net/rxrpc/skbuff.c           | 15 +++++++------
 12 files changed, 69 insertions(+), 76 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index fa06b528c73c..e2356c51883b 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -23,20 +23,15 @@
 #define __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY
 
 enum rxrpc_skb_trace {
-	rxrpc_skb_rx_cleaned,
-	rxrpc_skb_rx_freed,
-	rxrpc_skb_rx_got,
-	rxrpc_skb_rx_lost,
-	rxrpc_skb_rx_purged,
-	rxrpc_skb_rx_received,
-	rxrpc_skb_rx_rotated,
-	rxrpc_skb_rx_seen,
-	rxrpc_skb_tx_cleaned,
-	rxrpc_skb_tx_freed,
-	rxrpc_skb_tx_got,
-	rxrpc_skb_tx_new,
-	rxrpc_skb_tx_rotated,
-	rxrpc_skb_tx_seen,
+	rxrpc_skb_cleaned,
+	rxrpc_skb_freed,
+	rxrpc_skb_got,
+	rxrpc_skb_lost,
+	rxrpc_skb_new,
+	rxrpc_skb_purged,
+	rxrpc_skb_received,
+	rxrpc_skb_rotated,
+	rxrpc_skb_seen,
 };
 
 enum rxrpc_local_trace {
@@ -228,20 +223,15 @@ enum rxrpc_tx_point {
  * Declare tracing information enums and their string mappings for display.
  */
 #define rxrpc_skb_traces \
-	EM(rxrpc_skb_rx_cleaned,		"Rx CLN") \
-	EM(rxrpc_skb_rx_freed,			"Rx FRE") \
-	EM(rxrpc_skb_rx_got,			"Rx GOT") \
-	EM(rxrpc_skb_rx_lost,			"Rx *L*") \
-	EM(rxrpc_skb_rx_purged,			"Rx PUR") \
-	EM(rxrpc_skb_rx_received,		"Rx RCV") \
-	EM(rxrpc_skb_rx_rotated,		"Rx ROT") \
-	EM(rxrpc_skb_rx_seen,			"Rx SEE") \
-	EM(rxrpc_skb_tx_cleaned,		"Tx CLN") \
-	EM(rxrpc_skb_tx_freed,			"Tx FRE") \
-	EM(rxrpc_skb_tx_got,			"Tx GOT") \
-	EM(rxrpc_skb_tx_new,			"Tx NEW") \
-	EM(rxrpc_skb_tx_rotated,		"Tx ROT") \
-	E_(rxrpc_skb_tx_seen,			"Tx SEE")
+	EM(rxrpc_skb_cleaned,			"CLN") \
+	EM(rxrpc_skb_freed,			"FRE") \
+	EM(rxrpc_skb_got,			"GOT") \
+	EM(rxrpc_skb_lost,			"*L*") \
+	EM(rxrpc_skb_new,			"NEW") \
+	EM(rxrpc_skb_purged,			"PUR") \
+	EM(rxrpc_skb_received,			"RCV") \
+	EM(rxrpc_skb_rotated,			"ROT") \
+	E_(rxrpc_skb_seen,			"SEE")
 
 #define rxrpc_local_traces \
 	EM(rxrpc_local_got,			"GOT") \
@@ -650,6 +640,7 @@ TRACE_EVENT(rxrpc_skb,
 	    TP_STRUCT__entry(
 		    __field(struct sk_buff *,		skb		)
 		    __field(enum rxrpc_skb_trace,	op		)
+		    __field(u8,				flags		)
 		    __field(int,			usage		)
 		    __field(int,			mod_count	)
 		    __field(const void *,		where		)
@@ -657,14 +648,16 @@ TRACE_EVENT(rxrpc_skb,
 
 	    TP_fast_assign(
 		    __entry->skb = skb;
+		    __entry->flags = rxrpc_skb(skb)->rx_flags;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->mod_count = mod_count;
 		    __entry->where = where;
 			   ),
 
-	    TP_printk("s=%p %s u=%d m=%d p=%pSR",
+	    TP_printk("s=%p %cx %s u=%d m=%d p=%pSR",
 		      __entry->skb,
+		      __entry->flags & RXRPC_SKB_TX_BUFFER ? 'T' : 'R',
 		      __print_symbolic(__entry->op, rxrpc_skb_traces),
 		      __entry->usage,
 		      __entry->mod_count,
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 63d3a91ce5e9..2d5294f3e62f 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -185,6 +185,7 @@ struct rxrpc_host_header {
  * - max 48 bytes (struct sk_buff::cb)
  */
 struct rxrpc_skb_priv {
+	atomic_t	nr_ring_pins;		/* Number of rxtx ring pins */
 	u8		nr_subpackets;		/* Number of subpackets */
 	u8		rx_flags;		/* Received packet flags */
 #define RXRPC_SKB_INCL_LAST	0x01		/* - Includes last packet */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index c767679bfa5d..cedbbb3a7c2e 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -199,7 +199,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 			continue;
 
 		skb = call->rxtx_buffer[ix];
-		rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 
 		if (anno_type == RXRPC_TX_ANNO_UNACK) {
 			if (ktime_after(skb->tstamp, max_age)) {
@@ -255,18 +255,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
 			continue;
 
 		skb = call->rxtx_buffer[ix];
-		rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+		rxrpc_get_skb(skb, rxrpc_skb_got);
 		spin_unlock_bh(&call->lock);
 
 		if (rxrpc_send_data_packet(call, skb, true) < 0) {
-			rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			return;
 		}
 
 		if (rxrpc_is_client_call(call))
 			rxrpc_expose_client_call(call);
 
-		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		spin_lock_bh(&call->lock);
 
 		/* We need to clear the retransmit state, but there are two
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c9ab2da957fe..014548c259ce 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -429,9 +429,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
 	int i;
 
 	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
-		rxrpc_free_skb(call->rxtx_buffer[i],
-			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
-				rxrpc_skb_rx_cleaned));
+		rxrpc_free_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned);
 		call->rxtx_buffer[i] = NULL;
 	}
 }
@@ -587,7 +585,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
 	ASSERTCMP(call->conn, ==, NULL);
 
 	rxrpc_cleanup_ring(call);
-	rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned);
+	rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
 
 	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index df6624c140be..a1ceef4f5cd0 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -472,7 +472,7 @@ void rxrpc_process_connection(struct work_struct *work)
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		ret = rxrpc_process_event(conn, skb, &abort_code);
 		switch (ret) {
 		case -EPROTO:
@@ -484,7 +484,7 @@ void rxrpc_process_connection(struct work_struct *work)
 			goto requeue_and_leave;
 		case -ECONNABORTED:
 		default:
-			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			break;
 		}
 	}
@@ -501,6 +501,6 @@ requeue_and_leave:
 protocol_error:
 	if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
 		goto requeue_and_leave;
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	goto out;
 }
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 140cede77655..31090bdf1fae 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -233,7 +233,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
 		ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
 		skb = call->rxtx_buffer[ix];
 		annotation = call->rxtx_annotations[ix];
-		rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
+		rxrpc_see_skb(skb, rxrpc_skb_rotated);
 		call->rxtx_buffer[ix] = NULL;
 		call->rxtx_annotations[ix] = 0;
 		skb->next = list;
@@ -258,7 +258,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
 		skb = list;
 		list = skb->next;
 		skb_mark_not_on_list(skb);
-		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	return rot_last;
@@ -443,7 +443,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 
 	state = READ_ONCE(call->state);
 	if (state >= RXRPC_CALL_COMPLETE) {
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		return;
 	}
 
@@ -559,7 +559,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 		 * and also rxrpc_fill_out_ack().
 		 */
 		if (!terminal)
-			rxrpc_get_skb(skb, rxrpc_skb_rx_got);
+			rxrpc_get_skb(skb, rxrpc_skb_got);
 		call->rxtx_annotations[ix] = annotation;
 		smp_wmb();
 		call->rxtx_buffer[ix] = skb;
@@ -620,7 +620,7 @@ ack:
 
 unlock:
 	spin_unlock(&call->input_lock);
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	_leave(" [queued]");
 }
 
@@ -1056,7 +1056,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call,
 		break;
 	}
 
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 no_free:
 	_leave("");
 }
@@ -1119,7 +1119,7 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
 		skb_queue_tail(&local->event_queue, skb);
 		rxrpc_queue_local(local);
 	} else {
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 }
 
@@ -1134,7 +1134,7 @@ static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
 		skb_queue_tail(&local->reject_queue, skb);
 		rxrpc_queue_local(local);
 	} else {
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 }
 
@@ -1198,7 +1198,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	if (skb->tstamp == 0)
 		skb->tstamp = ktime_get_real();
 
-	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+	rxrpc_new_skb(skb, rxrpc_skb_received);
 
 	skb_pull(skb, sizeof(struct udphdr));
 
@@ -1215,7 +1215,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 		static int lose;
 		if ((lose++ & 7) == 7) {
 			trace_rxrpc_rx_lose(sp);
-			rxrpc_free_skb(skb, rxrpc_skb_rx_lost);
+			rxrpc_free_skb(skb, rxrpc_skb_lost);
 			return 0;
 		}
 	}
@@ -1389,7 +1389,7 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 	goto out;
 
 discard:
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 out:
 	trace_rxrpc_rx_done(0, 0);
 	return 0;
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index e93a78f7c05e..3ce6d628cd75 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -90,7 +90,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
 	if (skb) {
 		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
 
 		switch (sp->hdr.type) {
@@ -108,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
 			break;
 		}
 
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	_leave("");
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 369e516c4bdf..935bb60fff56 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -565,7 +565,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 	memset(&whdr, 0, sizeof(whdr));
 
 	while ((skb = skb_dequeue(&local->reject_queue))) {
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		sp = rxrpc_skb(skb);
 
 		switch (skb->mark) {
@@ -581,7 +581,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 			ioc = 2;
 			break;
 		default:
-			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+			rxrpc_free_skb(skb, rxrpc_skb_freed);
 			continue;
 		}
 
@@ -606,7 +606,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local)
 						      rxrpc_tx_point_reject);
 		}
 
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 	}
 
 	_leave("");
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 7666ec72d37e..c97ebdc043e4 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -163,11 +163,11 @@ void rxrpc_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
-	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
+	rxrpc_new_skb(skb, rxrpc_skb_received);
 	serr = SKB_EXT_ERR(skb);
 	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 		_leave("UDP empty message");
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		return;
 	}
 
@@ -177,7 +177,7 @@ void rxrpc_error_report(struct sock *sk)
 		peer = NULL;
 	if (!peer) {
 		rcu_read_unlock();
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		_leave(" [no peer]");
 		return;
 	}
@@ -189,7 +189,7 @@ void rxrpc_error_report(struct sock *sk)
 	     serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
 		rxrpc_adjust_mtu(peer, serr);
 		rcu_read_unlock();
-		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+		rxrpc_free_skb(skb, rxrpc_skb_freed);
 		rxrpc_put_peer(peer);
 		_leave(" [MTU update]");
 		return;
@@ -197,7 +197,7 @@ void rxrpc_error_report(struct sock *sk)
 
 	rxrpc_store_error(peer, serr);
 	rcu_read_unlock();
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	rxrpc_put_peer(peer);
 
 	_leave("");
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index e49eacfaf4d6..3b0becb12041 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -190,7 +190,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
 	hard_ack++;
 	ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
 	skb = call->rxtx_buffer[ix];
-	rxrpc_see_skb(skb, rxrpc_skb_rx_rotated);
+	rxrpc_see_skb(skb, rxrpc_skb_rotated);
 	sp = rxrpc_skb(skb);
 
 	subpacket = call->rxtx_annotations[ix] & RXRPC_RX_ANNO_SUBPACKET;
@@ -205,7 +205,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
 	/* Barrier against rxrpc_input_data(). */
 	smp_store_release(&call->rx_hard_ack, hard_ack);
 
-	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 
 	trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
 	if (last) {
@@ -340,7 +340,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
 			break;
 		}
 		smp_rmb();
-		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		rxrpc_see_skb(skb, rxrpc_skb_seen);
 		sp = rxrpc_skb(skb);
 
 		if (!(flags & MSG_PEEK)) {
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 472dc3b7d91f..6a1547b270fe 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -176,7 +176,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 	skb->tstamp = ktime_get_real();
 
 	ix = seq & RXRPC_RXTX_BUFF_MASK;
-	rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+	rxrpc_get_skb(skb, rxrpc_skb_got);
 	call->rxtx_annotations[ix] = annotation;
 	smp_wmb();
 	call->rxtx_buffer[ix] = skb;
@@ -248,7 +248,7 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
 	}
 
 out:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -289,7 +289,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
-	rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+	rxrpc_see_skb(skb, rxrpc_skb_seen);
 
 	copied = 0;
 	do {
@@ -338,7 +338,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 			sp = rxrpc_skb(skb);
 			sp->rx_flags |= RXRPC_SKB_TX_BUFFER;
-			rxrpc_new_skb(skb, rxrpc_skb_tx_new);
+			rxrpc_new_skb(skb, rxrpc_skb_new);
 
 			_debug("ALLOC SEND %p", skb);
 
@@ -440,7 +440,7 @@ out:
 	return ret;
 
 call_terminated:
-	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+	rxrpc_free_skb(skb, rxrpc_skb_freed);
 	_leave(" = %d", call->error);
 	return call->error;
 
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 9ad5045b7c2f..8e6f45f84b9b 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -14,7 +14,8 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
+#define is_tx_skb(skb) (rxrpc_skb(skb)->rx_flags & RXRPC_SKB_TX_BUFFER)
+#define select_skb_count(skb) (is_tx_skb(skb) ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
 
 /*
  * Note the allocation or reception of a socket buffer.
@@ -22,7 +23,7 @@
 void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
-	int n = atomic_inc_return(select_skb_count(op));
+	int n = atomic_inc_return(select_skb_count(skb));
 	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 }
 
@@ -33,7 +34,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	if (skb) {
-		int n = atomic_read(select_skb_count(op));
+		int n = atomic_read(select_skb_count(skb));
 		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	}
 }
@@ -44,7 +45,7 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
-	int n = atomic_inc_return(select_skb_count(op));
+	int n = atomic_inc_return(select_skb_count(skb));
 	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 	skb_get(skb);
 }
@@ -58,7 +59,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 	if (skb) {
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
-		n = atomic_dec_return(select_skb_count(op));
+		n = atomic_dec_return(select_skb_count(skb));
 		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
@@ -72,8 +73,8 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
 	const void *here = __builtin_return_address(0);
 	struct sk_buff *skb;
 	while ((skb = skb_dequeue((list))) != NULL) {
-		int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
-		trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
+		int n = atomic_dec_return(select_skb_count(skb));
+		trace_rxrpc_skb(skb, rxrpc_skb_purged,
 				refcount_read(&skb->users), n, here);
 		kfree_skb(skb);
 	}
-- 
cgit v1.2.3


From d0d5c0cd1e711c98703f3544c1e6fc1372898de5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 27 Aug 2019 10:13:46 +0100
Subject: rxrpc: Use skb_unshare() rather than skb_cow_data()

The in-place decryption routines in AF_RXRPC's rxkad security module
currently call skb_cow_data() to make sure the data isn't shared and that
the skb can be written over.  This has a problem, however, as the softirq
handler may be still holding a ref or the Rx ring may be holding multiple
refs when skb_cow_data() is called in rxkad_verify_packet() - and so
skb_shared() returns true and __pskb_pull_tail() dislikes that.  If this
occurs, something like the following report will be generated.

	kernel BUG at net/core/skbuff.c:1463!
	...
	RIP: 0010:pskb_expand_head+0x253/0x2b0
	...
	Call Trace:
	 __pskb_pull_tail+0x49/0x460
	 skb_cow_data+0x6f/0x300
	 rxkad_verify_packet+0x18b/0xb10 [rxrpc]
	 rxrpc_recvmsg_data.isra.11+0x4a8/0xa10 [rxrpc]
	 rxrpc_kernel_recv_data+0x126/0x240 [rxrpc]
	 afs_extract_data+0x51/0x2d0 [kafs]
	 afs_deliver_fs_fetch_data+0x188/0x400 [kafs]
	 afs_deliver_to_call+0xac/0x430 [kafs]
	 afs_wait_for_call_to_complete+0x22f/0x3d0 [kafs]
	 afs_make_call+0x282/0x3f0 [kafs]
	 afs_fs_fetch_data+0x164/0x300 [kafs]
	 afs_fetch_data+0x54/0x130 [kafs]
	 afs_readpages+0x20d/0x340 [kafs]
	 read_pages+0x66/0x180
	 __do_page_cache_readahead+0x188/0x1a0
	 ondemand_readahead+0x17d/0x2e0
	 generic_file_read_iter+0x740/0xc10
	 __vfs_read+0x145/0x1a0
	 vfs_read+0x8c/0x140
	 ksys_read+0x4a/0xb0
	 do_syscall_64+0x43/0xf0
	 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fix this by using skb_unshare() instead in the input path for DATA packets
that have a security index != 0.  Non-DATA packets don't need in-place
encryption and neither do unencrypted DATA packets.

Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code")
Reported-by: Julian Wollrath <jwollrath@web.de>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/trace/events/rxrpc.h | 12 ++++++++----
 net/rxrpc/ar-internal.h      |  1 +
 net/rxrpc/input.c            | 18 ++++++++++++++++++
 net/rxrpc/rxkad.c            | 32 +++++++++-----------------------
 net/rxrpc/skbuff.c           | 25 ++++++++++++++++++++-----
 5 files changed, 56 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index e2356c51883b..a13a62db3565 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -32,6 +32,8 @@ enum rxrpc_skb_trace {
 	rxrpc_skb_received,
 	rxrpc_skb_rotated,
 	rxrpc_skb_seen,
+	rxrpc_skb_unshared,
+	rxrpc_skb_unshared_nomem,
 };
 
 enum rxrpc_local_trace {
@@ -231,7 +233,9 @@ enum rxrpc_tx_point {
 	EM(rxrpc_skb_purged,			"PUR") \
 	EM(rxrpc_skb_received,			"RCV") \
 	EM(rxrpc_skb_rotated,			"ROT") \
-	E_(rxrpc_skb_seen,			"SEE")
+	EM(rxrpc_skb_seen,			"SEE") \
+	EM(rxrpc_skb_unshared,			"UNS") \
+	E_(rxrpc_skb_unshared_nomem,		"US0")
 
 #define rxrpc_local_traces \
 	EM(rxrpc_local_got,			"GOT") \
@@ -633,9 +637,9 @@ TRACE_EVENT(rxrpc_call,
 
 TRACE_EVENT(rxrpc_skb,
 	    TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op,
-		     int usage, int mod_count, const void *where),
+		     int usage, int mod_count, u8 flags,    const void *where),
 
-	    TP_ARGS(skb, op, usage, mod_count, where),
+	    TP_ARGS(skb, op, usage, mod_count, flags, where),
 
 	    TP_STRUCT__entry(
 		    __field(struct sk_buff *,		skb		)
@@ -648,7 +652,7 @@ TRACE_EVENT(rxrpc_skb,
 
 	    TP_fast_assign(
 		    __entry->skb = skb;
-		    __entry->flags = rxrpc_skb(skb)->rx_flags;
+		    __entry->flags = flags;
 		    __entry->op = op;
 		    __entry->usage = usage;
 		    __entry->mod_count = mod_count;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2d5294f3e62f..852e58781fda 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1110,6 +1110,7 @@ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_packet_destructor(struct sk_buff *);
 void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
 void rxrpc_purge_queue(struct sk_buff_head *);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 31090bdf1fae..d122c53c8697 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1249,6 +1249,24 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
 			goto bad_message;
 		if (!rxrpc_validate_data(skb))
 			goto bad_message;
+
+		/* Unshare the packet so that it can be modified for in-place
+		 * decryption.
+		 */
+		if (sp->hdr.securityIndex != 0) {
+			struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC);
+			if (!nskb) {
+				rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem);
+				goto out;
+			}
+
+			if (nskb != skb) {
+				rxrpc_eaten_skb(skb, rxrpc_skb_received);
+				rxrpc_new_skb(skb, rxrpc_skb_unshared);
+				skb = nskb;
+				sp = rxrpc_skb(skb);
+			}
+		}
 		break;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index ae8cd8926456..c60c520fde7c 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -187,10 +187,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
-	struct sk_buff *trailer;
 	unsigned int len;
 	u16 check;
-	int nsg;
 	int err;
 
 	sp = rxrpc_skb(skb);
@@ -214,15 +212,14 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	crypto_skcipher_encrypt(req);
 
 	/* we want to encrypt the skbuff in-place */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	err = -ENOMEM;
-	if (nsg < 0 || nsg > 16)
+	err = -EMSGSIZE;
+	if (skb_shinfo(skb)->nr_frags > 16)
 		goto out;
 
 	len = data_size + call->conn->size_align - 1;
 	len &= ~(call->conn->size_align - 1);
 
-	sg_init_table(sg, nsg);
+	sg_init_table(sg, ARRAY_SIZE(sg));
 	err = skb_to_sgvec(skb, sg, 0, len);
 	if (unlikely(err < 0))
 		goto out;
@@ -319,11 +316,10 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxkad_level1_hdr sechdr;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
-	struct sk_buff *trailer;
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
-	int nsg, ret;
+	int ret;
 
 	_enter("");
 
@@ -336,11 +332,7 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
 	 */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	if (nsg < 0 || nsg > 16)
-		goto nomem;
-
-	sg_init_table(sg, nsg);
+	sg_init_table(sg, ARRAY_SIZE(sg));
 	ret = skb_to_sgvec(skb, sg, offset, 8);
 	if (unlikely(ret < 0))
 		return ret;
@@ -388,10 +380,6 @@ protocol_error:
 	if (aborted)
 		rxrpc_send_abort_packet(call);
 	return -EPROTO;
-
-nomem:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
 }
 
 /*
@@ -406,7 +394,6 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxkad_level2_hdr sechdr;
 	struct rxrpc_crypt iv;
 	struct scatterlist _sg[4], *sg;
-	struct sk_buff *trailer;
 	bool aborted;
 	u32 data_size, buf;
 	u16 check;
@@ -423,12 +410,11 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
 	 */
-	nsg = skb_cow_data(skb, 0, &trailer);
-	if (nsg < 0)
-		goto nomem;
-
 	sg = _sg;
-	if (unlikely(nsg > 4)) {
+	nsg = skb_shinfo(skb)->nr_frags;
+	if (nsg <= 4) {
+		nsg = 4;
+	} else {
 		sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
 		if (!sg)
 			goto nomem;
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 8e6f45f84b9b..0348d2bf6f7d 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -24,7 +24,8 @@ void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(skb));
-	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+			rxrpc_skb(skb)->rx_flags, here);
 }
 
 /*
@@ -35,7 +36,8 @@ void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 	const void *here = __builtin_return_address(0);
 	if (skb) {
 		int n = atomic_read(select_skb_count(skb));
-		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 	}
 }
 
@@ -46,10 +48,21 @@ void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
 	const void *here = __builtin_return_address(0);
 	int n = atomic_inc_return(select_skb_count(skb));
-	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+	trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+			rxrpc_skb(skb)->rx_flags, here);
 	skb_get(skb);
 }
 
+/*
+ * Note the dropping of a ref on a socket buffer by the core.
+ */
+void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&rxrpc_n_rx_skbs);
+	trace_rxrpc_skb(skb, op, 0, n, 0, here);
+}
+
 /*
  * Note the destruction of a socket buffer.
  */
@@ -60,7 +73,8 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 		int n;
 		CHECK_SLAB_OKAY(&skb->users);
 		n = atomic_dec_return(select_skb_count(skb));
-		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here);
+		trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 		kfree_skb(skb);
 	}
 }
@@ -75,7 +89,8 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
 	while ((skb = skb_dequeue((list))) != NULL) {
 		int n = atomic_dec_return(select_skb_count(skb));
 		trace_rxrpc_skb(skb, rxrpc_skb_purged,
-				refcount_read(&skb->users), n, here);
+				refcount_read(&skb->users), n,
+				rxrpc_skb(skb)->rx_flags, here);
 		kfree_skb(skb);
 	}
 }
-- 
cgit v1.2.3


From dd23e1d566d0f74aa3b68ab3237927bb15f0e644 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 27 Aug 2019 16:37:50 +0200
Subject: ALSA: hda - Allow runtime PM for controller if component notifier is
 used

Currently we disallow the runtime PM of the HD-audio controller if
it's bound with HDMI/DP on Nvidia / AMD unless it's for dGPU.  This is
for keeping the link up to get the proper notification for ELD
hotplug.

As explained in the commit 37a3a98ef601 ("ALSA: hda - Enable runtime
PM only for discrete GPU"), this keep-power-up behavior is rather a
stop-gap solution until the ELD notification via audio component.
And now we finally got the audio component for these graphics drivers
via commit ade49db337a9 ("ALSA: hda/hdmi - Allow audio component for
AMD/ATI and Nvidia HDMI"), so it's time to change.

This patch makes HD-audio controller again runtime-suspendable when
the device gets bound with audio component in HDMI codec driver.  For
making it easier to access from the codec driver, move the flag into
the common hda_bus object instead of hda_intel flag.  Also rename it
to keep_power, to indicate the actual meaning.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/hda_codec.h  |  1 +
 sound/pci/hda/hda_intel.c  | 11 +++++------
 sound/pci/hda/hda_intel.h  |  1 -
 sound/pci/hda/patch_hdmi.c |  4 +++-
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h
index 871993696c5f..9a0393cf024c 100644
--- a/include/sound/hda_codec.h
+++ b/include/sound/hda_codec.h
@@ -59,6 +59,7 @@ struct hda_bus {
 	unsigned int in_reset:1;	/* during reset operation */
 	unsigned int no_response_fallback:1; /* don't fallback at RIRB error */
 	unsigned int bus_probing :1;	/* during probing process */
+	unsigned int keep_power:1;	/* keep power up for notification */
 
 	int primary_dig_out_type;	/* primary digital out PCM type */
 	unsigned int mixer_assigned;	/* codec addr for mixer name */
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 4496fce21300..91e71be42fa4 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -356,7 +356,7 @@ enum {
  */
 #ifdef SUPPORT_VGA_SWITCHEROO
 #define use_vga_switcheroo(chip)	((chip)->use_vga_switcheroo)
-#define needs_eld_notify_link(chip)	((chip)->need_eld_notify_link)
+#define needs_eld_notify_link(chip)	((chip)->bus.keep_power)
 #else
 #define use_vga_switcheroo(chip)	0
 #define needs_eld_notify_link(chip)	false
@@ -1145,7 +1145,7 @@ static int azx_runtime_idle(struct device *dev)
 		return -EBUSY;
 
 	/* ELD notification gets broken when HD-audio bus is off */
-	if (needs_eld_notify_link(hda))
+	if (needs_eld_notify_link(chip))
 		return -EBUSY;
 
 	return 0;
@@ -1256,7 +1256,7 @@ static void setup_vga_switcheroo_runtime_pm(struct azx *chip)
 	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
 	struct hda_codec *codec;
 
-	if (hda->use_vga_switcheroo && !hda->need_eld_notify_link) {
+	if (hda->use_vga_switcheroo && !needs_eld_notify_link(chip)) {
 		list_for_each_codec(codec, &chip->bus)
 			codec->auto_runtime_pm = 1;
 		/* reset the power save setup */
@@ -1270,10 +1270,9 @@ static void azx_vs_gpu_bound(struct pci_dev *pci,
 {
 	struct snd_card *card = pci_get_drvdata(pci);
 	struct azx *chip = card->private_data;
-	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
 
 	if (client_id == VGA_SWITCHEROO_DIS)
-		hda->need_eld_notify_link = 0;
+		chip->bus.keep_power = 0;
 	setup_vga_switcheroo_runtime_pm(chip);
 }
 
@@ -1285,7 +1284,7 @@ static void init_vga_switcheroo(struct azx *chip)
 		dev_info(chip->card->dev,
 			 "Handle vga_switcheroo audio client\n");
 		hda->use_vga_switcheroo = 1;
-		hda->need_eld_notify_link = 1; /* cleared in gpu_bound op */
+		chip->bus.keep_power = 1; /* cleared in either gpu_bound op or codec probe */
 		chip->driver_caps |= AZX_DCAPS_PM_RUNTIME;
 		pci_dev_put(p);
 	}
diff --git a/sound/pci/hda/hda_intel.h b/sound/pci/hda/hda_intel.h
index 1468865e0342..2acfff3da1a0 100644
--- a/sound/pci/hda/hda_intel.h
+++ b/sound/pci/hda/hda_intel.h
@@ -25,7 +25,6 @@ struct hda_intel {
 
 	/* vga_switcheroo setup */
 	unsigned int use_vga_switcheroo:1;
-	unsigned int need_eld_notify_link:1;
 	unsigned int vga_switcheroo_registered:1;
 	unsigned int init_failed:1; /* delayed init failed */
 
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 933c7bf47ef6..83b8b9d27711 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2542,8 +2542,10 @@ static void generic_acomp_init(struct hda_codec *codec,
 	spec->port2pin = port2pin;
 	setup_drm_audio_ops(codec, ops);
 	if (!snd_hdac_acomp_init(&codec->bus->core, &spec->drm_audio_ops,
-				 match_bound_vga, 0))
+				 match_bound_vga, 0)) {
 		spec->acomp_registered = true;
+		codec->bus->keep_power = 0;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From 5b9cce4c7eb0696558dfd4946074ae1fb9d8f05d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 26 Aug 2019 09:06:52 -0700
Subject: writeback: Generalize and expose wb_completion

wb_completion is used to track writeback completions.  We want to use
it from memcg side for foreign inode flushes.  This patch updates it
to remember the target waitq instead of assuming bdi->wb_waitq and
expose it outside of fs-writeback.c.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c                | 47 ++++++++++++----------------------------
 include/linux/backing-dev-defs.h | 20 +++++++++++++++++
 include/linux/backing-dev.h      |  2 ++
 3 files changed, 36 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index fddd8abd839a..9442f1fd6460 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -36,10 +36,6 @@
  */
 #define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_SHIFT - 10))
 
-struct wb_completion {
-	atomic_t		cnt;
-};
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -60,19 +56,6 @@ struct wb_writeback_work {
 	struct wb_completion *done;	/* set if the caller waits */
 };
 
-/*
- * If one wants to wait for one or more wb_writeback_works, each work's
- * ->done should be set to a wb_completion defined using the following
- * macro.  Once all work items are issued with wb_queue_work(), the caller
- * can wait for the completion of all using wb_wait_for_completion().  Work
- * items which are waited upon aren't freed automatically on completion.
- */
-#define DEFINE_WB_COMPLETION_ONSTACK(cmpl)				\
-	struct wb_completion cmpl = {					\
-		.cnt		= ATOMIC_INIT(1),			\
-	}
-
-
 /*
  * If an inode is constantly having its pages dirtied, but then the
  * updates stop dirtytime_expire_interval seconds in the past, it's
@@ -182,7 +165,7 @@ static void finish_writeback_work(struct bdi_writeback *wb,
 	if (work->auto_free)
 		kfree(work);
 	if (done && atomic_dec_and_test(&done->cnt))
-		wake_up_all(&wb->bdi->wb_waitq);
+		wake_up_all(done->waitq);
 }
 
 static void wb_queue_work(struct bdi_writeback *wb,
@@ -206,20 +189,18 @@ static void wb_queue_work(struct bdi_writeback *wb,
 
 /**
  * wb_wait_for_completion - wait for completion of bdi_writeback_works
- * @bdi: bdi work items were issued to
  * @done: target wb_completion
  *
  * Wait for one or more work items issued to @bdi with their ->done field
- * set to @done, which should have been defined with
- * DEFINE_WB_COMPLETION_ONSTACK().  This function returns after all such
- * work items are completed.  Work items which are waited upon aren't freed
+ * set to @done, which should have been initialized with
+ * DEFINE_WB_COMPLETION().  This function returns after all such work items
+ * are completed.  Work items which are waited upon aren't freed
  * automatically on completion.
  */
-static void wb_wait_for_completion(struct backing_dev_info *bdi,
-				   struct wb_completion *done)
+void wb_wait_for_completion(struct wb_completion *done)
 {
 	atomic_dec(&done->cnt);		/* put down the initial count */
-	wait_event(bdi->wb_waitq, !atomic_read(&done->cnt));
+	wait_event(*done->waitq, !atomic_read(&done->cnt));
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -854,7 +835,7 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
 restart:
 	rcu_read_lock();
 	list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
-		DEFINE_WB_COMPLETION_ONSTACK(fallback_work_done);
+		DEFINE_WB_COMPLETION(fallback_work_done, bdi);
 		struct wb_writeback_work fallback_work;
 		struct wb_writeback_work *work;
 		long nr_pages;
@@ -901,7 +882,7 @@ restart:
 		last_wb = wb;
 
 		rcu_read_unlock();
-		wb_wait_for_completion(bdi, &fallback_work_done);
+		wb_wait_for_completion(&fallback_work_done);
 		goto restart;
 	}
 	rcu_read_unlock();
@@ -2373,7 +2354,8 @@ static void wait_sb_inodes(struct super_block *sb)
 static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
 				     enum wb_reason reason, bool skip_if_busy)
 {
-	DEFINE_WB_COMPLETION_ONSTACK(done);
+	struct backing_dev_info *bdi = sb->s_bdi;
+	DEFINE_WB_COMPLETION(done, bdi);
 	struct wb_writeback_work work = {
 		.sb			= sb,
 		.sync_mode		= WB_SYNC_NONE,
@@ -2382,14 +2364,13 @@ static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
 		.nr_pages		= nr,
 		.reason			= reason,
 	};
-	struct backing_dev_info *bdi = sb->s_bdi;
 
 	if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
 		return;
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
-	wb_wait_for_completion(bdi, &done);
+	wb_wait_for_completion(&done);
 }
 
 /**
@@ -2451,7 +2432,8 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
  */
 void sync_inodes_sb(struct super_block *sb)
 {
-	DEFINE_WB_COMPLETION_ONSTACK(done);
+	struct backing_dev_info *bdi = sb->s_bdi;
+	DEFINE_WB_COMPLETION(done, bdi);
 	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_ALL,
@@ -2461,7 +2443,6 @@ void sync_inodes_sb(struct super_block *sb)
 		.reason		= WB_REASON_SYNC,
 		.for_sync	= 1,
 	};
-	struct backing_dev_info *bdi = sb->s_bdi;
 
 	/*
 	 * Can't skip on !bdi_has_dirty() because we should wait for !dirty
@@ -2475,7 +2456,7 @@ void sync_inodes_sb(struct super_block *sb)
 	/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
 	bdi_down_write_wb_switch_rwsem(bdi);
 	bdi_split_work_to_wbs(bdi, &work, false);
-	wb_wait_for_completion(bdi, &done);
+	wb_wait_for_completion(&done);
 	bdi_up_write_wb_switch_rwsem(bdi);
 
 	wait_sb_inodes(sb);
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 6a1a8a314d85..8fb740178d5d 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -67,6 +67,26 @@ enum wb_reason {
 	WB_REASON_MAX,
 };
 
+struct wb_completion {
+	atomic_t		cnt;
+	wait_queue_head_t	*waitq;
+};
+
+#define __WB_COMPLETION_INIT(_waitq)	\
+	(struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) }
+
+/*
+ * If one wants to wait for one or more wb_writeback_works, each work's
+ * ->done should be set to a wb_completion defined using the following
+ * macro.  Once all work items are issued with wb_queue_work(), the caller
+ * can wait for the completion of all using wb_wait_for_completion().  Work
+ * items which are waited upon aren't freed automatically on completion.
+ */
+#define WB_COMPLETION_INIT(bdi)		__WB_COMPLETION_INIT(&(bdi)->wb_waitq)
+
+#define DEFINE_WB_COMPLETION(cmpl, bdi)	\
+	struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
+
 /*
  * For cgroup writeback, multiple wb's may map to the same blkcg.  Those
  * wb's can operate mostly independently but should share the congested
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 35b31d176f74..02650b1253a2 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -44,6 +44,8 @@ void wb_start_background_writeback(struct bdi_writeback *wb);
 void wb_workfn(struct work_struct *work);
 void wb_wakeup_delayed(struct bdi_writeback *wb);
 
+void wb_wait_for_completion(struct wb_completion *done);
+
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
 
-- 
cgit v1.2.3


From 34f8fe501f0624de115d087680c84000b5d9abc9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 26 Aug 2019 09:06:53 -0700
Subject: bdi: Add bdi->id

There currently is no way to universally identify and lookup a bdi
without holding a reference and pointer to it.  This patch adds an
non-recycling bdi->id and implements bdi_get_by_id() which looks up
bdis by their ids.  This will be used by memcg foreign inode flushing.

I left bdi_list alone for simplicity and because while rb_tree does
support rcu assignment it doesn't seem to guarantee lossless walk when
walk is racing aginst tree rebalance operations.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev-defs.h |  2 ++
 include/linux/backing-dev.h      |  1 +
 mm/backing-dev.c                 | 65 ++++++++++++++++++++++++++++++++++++++--
 3 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 8fb740178d5d..1075f2552cfc 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -185,6 +185,8 @@ struct bdi_writeback {
 };
 
 struct backing_dev_info {
+	u64 id;
+	struct rb_node rb_node; /* keyed by ->id */
 	struct list_head bdi_list;
 	unsigned long ra_pages;	/* max readahead in PAGE_SIZE units */
 	unsigned long io_pages;	/* max allowed IO size */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 02650b1253a2..84cdcfbc763f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -24,6 +24,7 @@ static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
 	return bdi;
 }
 
+struct backing_dev_info *bdi_get_by_id(u64 id);
 void bdi_put(struct backing_dev_info *bdi);
 
 __printf(2, 3)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e8e89158adec..612aa7c5ddbd 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/wait.h>
+#include <linux/rbtree.h>
 #include <linux/backing-dev.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -22,10 +23,12 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);
 static struct class *bdi_class;
 
 /*
- * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side
- * locking.
+ * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
+ * reader side locking.
  */
 DEFINE_SPINLOCK(bdi_lock);
+static u64 bdi_id_cursor;
+static struct rb_root bdi_tree = RB_ROOT;
 LIST_HEAD(bdi_list);
 
 /* bdi_wq serves all asynchronous writeback tasks */
@@ -859,9 +862,58 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id)
 }
 EXPORT_SYMBOL(bdi_alloc_node);
 
+static struct rb_node **bdi_lookup_rb_node(u64 id, struct rb_node **parentp)
+{
+	struct rb_node **p = &bdi_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct backing_dev_info *bdi;
+
+	lockdep_assert_held(&bdi_lock);
+
+	while (*p) {
+		parent = *p;
+		bdi = rb_entry(parent, struct backing_dev_info, rb_node);
+
+		if (bdi->id > id)
+			p = &(*p)->rb_left;
+		else if (bdi->id < id)
+			p = &(*p)->rb_right;
+		else
+			break;
+	}
+
+	if (parentp)
+		*parentp = parent;
+	return p;
+}
+
+/**
+ * bdi_get_by_id - lookup and get bdi from its id
+ * @id: bdi id to lookup
+ *
+ * Find bdi matching @id and get it.  Returns NULL if the matching bdi
+ * doesn't exist or is already unregistered.
+ */
+struct backing_dev_info *bdi_get_by_id(u64 id)
+{
+	struct backing_dev_info *bdi = NULL;
+	struct rb_node **p;
+
+	spin_lock_bh(&bdi_lock);
+	p = bdi_lookup_rb_node(id, NULL);
+	if (*p) {
+		bdi = rb_entry(*p, struct backing_dev_info, rb_node);
+		bdi_get(bdi);
+	}
+	spin_unlock_bh(&bdi_lock);
+
+	return bdi;
+}
+
 int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
 {
 	struct device *dev;
+	struct rb_node *parent, **p;
 
 	if (bdi->dev)	/* The driver needs to use separate queues per device */
 		return 0;
@@ -877,7 +929,15 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
 	set_bit(WB_registered, &bdi->wb.state);
 
 	spin_lock_bh(&bdi_lock);
+
+	bdi->id = ++bdi_id_cursor;
+
+	p = bdi_lookup_rb_node(bdi->id, &parent);
+	rb_link_node(&bdi->rb_node, parent, p);
+	rb_insert_color(&bdi->rb_node, &bdi_tree);
+
 	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+
 	spin_unlock_bh(&bdi_lock);
 
 	trace_writeback_bdi_register(bdi);
@@ -918,6 +978,7 @@ EXPORT_SYMBOL(bdi_register_owner);
 static void bdi_remove_from_list(struct backing_dev_info *bdi)
 {
 	spin_lock_bh(&bdi_lock);
+	rb_erase(&bdi->rb_node, &bdi_tree);
 	list_del_rcu(&bdi->bdi_list);
 	spin_unlock_bh(&bdi_lock);
 
-- 
cgit v1.2.3


From ed288dc0d4aa29f65bd25b31b5cb866aa5664ff9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 26 Aug 2019 09:06:54 -0700
Subject: writeback: Separate out wb_get_lookup() from wb_get_create()

Separate out wb_get_lookup() which doesn't try to create one if there
isn't already one from wb_get_create().  This will be used by later
patches.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev.h |  2 ++
 mm/backing-dev.c            | 55 ++++++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 84cdcfbc763f..97967ce06de3 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -230,6 +230,8 @@ static inline int bdi_sched_wait(void *word)
 struct bdi_writeback_congested *
 wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
 void wb_congested_put(struct bdi_writeback_congested *congested);
+struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
+				    struct cgroup_subsys_state *memcg_css);
 struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 				    struct cgroup_subsys_state *memcg_css,
 				    gfp_t gfp);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 612aa7c5ddbd..d9daa3e422d0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -618,13 +618,12 @@ out_put:
 }
 
 /**
- * wb_get_create - get wb for a given memcg, create if necessary
+ * wb_get_lookup - get wb for a given memcg
  * @bdi: target bdi
  * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
- * @gfp: allocation mask to use
  *
- * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
- * create one.  The returned wb has its refcount incremented.
+ * Try to get the wb for @memcg_css on @bdi.  The returned wb has its
+ * refcount incremented.
  *
  * This function uses css_get() on @memcg_css and thus expects its refcnt
  * to be positive on invocation.  IOW, rcu_read_lock() protection on
@@ -641,6 +640,39 @@ out_put:
  * each lookup.  On mismatch, the existing wb is discarded and a new one is
  * created.
  */
+struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
+				    struct cgroup_subsys_state *memcg_css)
+{
+	struct bdi_writeback *wb;
+
+	if (!memcg_css->parent)
+		return &bdi->wb;
+
+	rcu_read_lock();
+	wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
+	if (wb) {
+		struct cgroup_subsys_state *blkcg_css;
+
+		/* see whether the blkcg association has changed */
+		blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys);
+		if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb)))
+			wb = NULL;
+		css_put(blkcg_css);
+	}
+	rcu_read_unlock();
+
+	return wb;
+}
+
+/**
+ * wb_get_create - get wb for a given memcg, create if necessary
+ * @bdi: target bdi
+ * @memcg_css: cgroup_subsys_state of the target memcg (must have positive ref)
+ * @gfp: allocation mask to use
+ *
+ * Try to get the wb for @memcg_css on @bdi.  If it doesn't exist, try to
+ * create one.  See wb_get_lookup() for more details.
+ */
 struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 				    struct cgroup_subsys_state *memcg_css,
 				    gfp_t gfp)
@@ -653,20 +685,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 		return &bdi->wb;
 
 	do {
-		rcu_read_lock();
-		wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id);
-		if (wb) {
-			struct cgroup_subsys_state *blkcg_css;
-
-			/* see whether the blkcg association has changed */
-			blkcg_css = cgroup_get_e_css(memcg_css->cgroup,
-						     &io_cgrp_subsys);
-			if (unlikely(wb->blkcg_css != blkcg_css ||
-				     !wb_tryget(wb)))
-				wb = NULL;
-			css_put(blkcg_css);
-		}
-		rcu_read_unlock();
+		wb = wb_get_lookup(bdi, memcg_css);
 	} while (!wb && !cgwb_create(bdi, memcg_css, gfp));
 
 	return wb;
-- 
cgit v1.2.3


From d62241c7a406f0680d702bd974f6f17e28ab8e5d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 26 Aug 2019 09:06:55 -0700
Subject: writeback, memcg: Implement cgroup_writeback_by_id()

Implement cgroup_writeback_by_id() which initiates cgroup writeback
from bdi and memcg IDs.  This will be used by memcg foreign inode
flushing.

v2: Use wb_get_lookup() instead of wb_get_create() to avoid creating
    spurious wbs.

v3: Interpret 0 @nr as 1.25 * nr_dirty to implement best-effort
    flushing while avoding possible livelocks.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c         | 83 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/writeback.h |  2 ++
 2 files changed, 85 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 9442f1fd6460..658dc16c9e6d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -891,6 +891,89 @@ restart:
 		wb_put(last_wb);
 }
 
+/**
+ * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
+ * @bdi_id: target bdi id
+ * @memcg_id: target memcg css id
+ * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @reason: reason why some writeback work initiated
+ * @done: target wb_completion
+ *
+ * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
+ * with the specified parameters.
+ */
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+			   enum wb_reason reason, struct wb_completion *done)
+{
+	struct backing_dev_info *bdi;
+	struct cgroup_subsys_state *memcg_css;
+	struct bdi_writeback *wb;
+	struct wb_writeback_work *work;
+	int ret;
+
+	/* lookup bdi and memcg */
+	bdi = bdi_get_by_id(bdi_id);
+	if (!bdi)
+		return -ENOENT;
+
+	rcu_read_lock();
+	memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
+	if (memcg_css && !css_tryget(memcg_css))
+		memcg_css = NULL;
+	rcu_read_unlock();
+	if (!memcg_css) {
+		ret = -ENOENT;
+		goto out_bdi_put;
+	}
+
+	/*
+	 * And find the associated wb.  If the wb isn't there already
+	 * there's nothing to flush, don't create one.
+	 */
+	wb = wb_get_lookup(bdi, memcg_css);
+	if (!wb) {
+		ret = -ENOENT;
+		goto out_css_put;
+	}
+
+	/*
+	 * If @nr is zero, the caller is attempting to write out most of
+	 * the currently dirty pages.  Let's take the current dirty page
+	 * count and inflate it by 25% which should be large enough to
+	 * flush out most dirty pages while avoiding getting livelocked by
+	 * concurrent dirtiers.
+	 */
+	if (!nr) {
+		unsigned long filepages, headroom, dirty, writeback;
+
+		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
+				      &writeback);
+		nr = dirty * 10 / 8;
+	}
+
+	/* issue the writeback work */
+	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
+	if (work) {
+		work->nr_pages = nr;
+		work->sync_mode = WB_SYNC_NONE;
+		work->range_cyclic = 1;
+		work->reason = reason;
+		work->done = done;
+		work->auto_free = 1;
+		wb_queue_work(wb, work);
+		ret = 0;
+	} else {
+		ret = -ENOMEM;
+	}
+
+	wb_put(wb);
+out_css_put:
+	css_put(memcg_css);
+out_bdi_put:
+	bdi_put(bdi);
+	return ret;
+}
+
 /**
  * cgroup_writeback_umount - flush inode wb switches for umount
  *
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8945aac31392..a19d845dd7eb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -217,6 +217,8 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
 void wbc_detach_inode(struct writeback_control *wbc);
 void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
 			      size_t bytes);
+int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+			   enum wb_reason reason, struct wb_completion *done);
 void cgroup_writeback_umount(void);
 
 /**
-- 
cgit v1.2.3


From 97b27821b4854ca744946dae32a3f2fd55bcd5bc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 26 Aug 2019 09:06:56 -0700
Subject: writeback, memcg: Implement foreign dirty flushing

There's an inherent mismatch between memcg and writeback.  The former
trackes ownership per-page while the latter per-inode.  This was a
deliberate design decision because honoring per-page ownership in the
writeback path is complicated, may lead to higher CPU and IO overheads
and deemed unnecessary given that write-sharing an inode across
different cgroups isn't a common use-case.

Combined with inode majority-writer ownership switching, this works
well enough in most cases but there are some pathological cases.  For
example, let's say there are two cgroups A and B which keep writing to
different but confined parts of the same inode.  B owns the inode and
A's memory is limited far below B's.  A's dirty ratio can rise enough
to trigger balance_dirty_pages() sleeps but B's can be low enough to
avoid triggering background writeback.  A will be slowed down without
a way to make writeback of the dirty pages happen.

This patch implements foreign dirty recording and foreign mechanism so
that when a memcg encounters a condition as above it can trigger
flushes on bdi_writebacks which can clean its pages.  Please see the
comment on top of mem_cgroup_track_foreign_dirty_slowpath() for
details.

A reproducer follows.

write-range.c::

  #include <stdio.h>
  #include <stdlib.h>
  #include <unistd.h>
  #include <fcntl.h>
  #include <sys/types.h>

  static const char *usage = "write-range FILE START SIZE\n";

  int main(int argc, char **argv)
  {
	  int fd;
	  unsigned long start, size, end, pos;
	  char *endp;
	  char buf[4096];

	  if (argc < 4) {
		  fprintf(stderr, usage);
		  return 1;
	  }

	  fd = open(argv[1], O_WRONLY);
	  if (fd < 0) {
		  perror("open");
		  return 1;
	  }

	  start = strtoul(argv[2], &endp, 0);
	  if (*endp != '\0') {
		  fprintf(stderr, usage);
		  return 1;
	  }

	  size = strtoul(argv[3], &endp, 0);
	  if (*endp != '\0') {
		  fprintf(stderr, usage);
		  return 1;
	  }

	  end = start + size;

	  while (1) {
		  for (pos = start; pos < end; ) {
			  long bread, bwritten = 0;

			  if (lseek(fd, pos, SEEK_SET) < 0) {
				  perror("lseek");
				  return 1;
			  }

			  bread = read(0, buf, sizeof(buf) < end - pos ?
					       sizeof(buf) : end - pos);
			  if (bread < 0) {
				  perror("read");
				  return 1;
			  }
			  if (bread == 0)
				  return 0;

			  while (bwritten < bread) {
				  long this;

				  this = write(fd, buf + bwritten,
					       bread - bwritten);
				  if (this < 0) {
					  perror("write");
					  return 1;
				  }

				  bwritten += this;
				  pos += bwritten;
			  }
		  }
	  }
  }

repro.sh::

  #!/bin/bash

  set -e
  set -x

  sysctl -w vm.dirty_expire_centisecs=300000
  sysctl -w vm.dirty_writeback_centisecs=300000
  sysctl -w vm.dirtytime_expire_seconds=300000
  echo 3 > /proc/sys/vm/drop_caches

  TEST=/sys/fs/cgroup/test
  A=$TEST/A
  B=$TEST/B

  mkdir -p $A $B
  echo "+memory +io" > $TEST/cgroup.subtree_control
  echo $((1<<30)) > $A/memory.high
  echo $((32<<30)) > $B/memory.high

  rm -f testfile
  touch testfile
  fallocate -l 4G testfile

  echo "Starting B"

  (echo $BASHPID > $B/cgroup.procs
   pv -q --rate-limit 70M < /dev/urandom | ./write-range testfile $((2<<30)) $((2<<30))) &

  echo "Waiting 10s to ensure B claims the testfile inode"
  sleep 5
  sync
  sleep 5
  sync
  echo "Starting A"

  (echo $BASHPID > $A/cgroup.procs
   pv < /dev/urandom | ./write-range testfile 0 $((2<<30)))

v2: Added comments explaining why the specific intervals are being used.

v3: Use 0 @nr when calling cgroup_writeback_by_id() to use best-effort
    flushing while avoding possible livelocks.

v4: Use get_jiffies_64() and time_before/after64() instead of raw
    jiffies_64 and arthimetic comparisons as suggested by Jan.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/backing-dev-defs.h |   1 +
 include/linux/memcontrol.h       |  39 ++++++++++++
 mm/memcontrol.c                  | 134 +++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c              |   4 ++
 4 files changed, 178 insertions(+)

(limited to 'include')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 1075f2552cfc..4fc87dee005a 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -63,6 +63,7 @@ enum wb_reason {
 	 * so it has a mismatch name.
 	 */
 	WB_REASON_FORKER_THREAD,
+	WB_REASON_FOREIGN_FLUSH,
 
 	WB_REASON_MAX,
 };
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44c41462be33..bc69d5725760 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -183,6 +183,23 @@ struct memcg_padding {
 #define MEMCG_PADDING(name)
 #endif
 
+/*
+ * Remember four most recent foreign writebacks with dirty pages in this
+ * cgroup.  Inode sharing is expected to be uncommon and, even if we miss
+ * one in a given round, we're likely to catch it later if it keeps
+ * foreign-dirtying, so a fairly low count should be enough.
+ *
+ * See mem_cgroup_track_foreign_dirty_slowpath() for details.
+ */
+#define MEMCG_CGWB_FRN_CNT	4
+
+struct memcg_cgwb_frn {
+	u64 bdi_id;			/* bdi->id of the foreign inode */
+	int memcg_id;			/* memcg->css.id of foreign inode */
+	u64 at;				/* jiffies_64 at the time of dirtying */
+	struct wb_completion done;	/* tracks in-flight foreign writebacks */
+};
+
 /*
  * The memory controller data structure. The memory controller controls both
  * page cache and RSS per cgroup. We would eventually like to provide
@@ -307,6 +324,7 @@ struct mem_cgroup {
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head cgwb_list;
 	struct wb_domain cgwb_domain;
+	struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT];
 #endif
 
 	/* List of events which userspace want to receive */
@@ -1218,6 +1236,18 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 			 unsigned long *pheadroom, unsigned long *pdirty,
 			 unsigned long *pwriteback);
 
+void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+					     struct bdi_writeback *wb);
+
+static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+						  struct bdi_writeback *wb)
+{
+	if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
+		mem_cgroup_track_foreign_dirty_slowpath(page, wb);
+}
+
+void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
@@ -1233,6 +1263,15 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 {
 }
 
+static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+						  struct bdi_writeback *wb)
+{
+}
+
+static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
+{
+}
+
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
 struct sock;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cdbb7a84cb6e..89b65f5ca634 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -87,6 +87,10 @@ int do_swap_account __read_mostly;
 #define do_swap_account		0
 #endif
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq);
+#endif
+
 /* Whether legacy memory+swap accounting is active */
 static bool do_memsw_account(void)
 {
@@ -4145,6 +4149,127 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 	}
 }
 
+/*
+ * Foreign dirty flushing
+ *
+ * There's an inherent mismatch between memcg and writeback.  The former
+ * trackes ownership per-page while the latter per-inode.  This was a
+ * deliberate design decision because honoring per-page ownership in the
+ * writeback path is complicated, may lead to higher CPU and IO overheads
+ * and deemed unnecessary given that write-sharing an inode across
+ * different cgroups isn't a common use-case.
+ *
+ * Combined with inode majority-writer ownership switching, this works well
+ * enough in most cases but there are some pathological cases.  For
+ * example, let's say there are two cgroups A and B which keep writing to
+ * different but confined parts of the same inode.  B owns the inode and
+ * A's memory is limited far below B's.  A's dirty ratio can rise enough to
+ * trigger balance_dirty_pages() sleeps but B's can be low enough to avoid
+ * triggering background writeback.  A will be slowed down without a way to
+ * make writeback of the dirty pages happen.
+ *
+ * Conditions like the above can lead to a cgroup getting repatedly and
+ * severely throttled after making some progress after each
+ * dirty_expire_interval while the underyling IO device is almost
+ * completely idle.
+ *
+ * Solving this problem completely requires matching the ownership tracking
+ * granularities between memcg and writeback in either direction.  However,
+ * the more egregious behaviors can be avoided by simply remembering the
+ * most recent foreign dirtying events and initiating remote flushes on
+ * them when local writeback isn't enough to keep the memory clean enough.
+ *
+ * The following two functions implement such mechanism.  When a foreign
+ * page - a page whose memcg and writeback ownerships don't match - is
+ * dirtied, mem_cgroup_track_foreign_dirty() records the inode owning
+ * bdi_writeback on the page owning memcg.  When balance_dirty_pages()
+ * decides that the memcg needs to sleep due to high dirty ratio, it calls
+ * mem_cgroup_flush_foreign() which queues writeback on the recorded
+ * foreign bdi_writebacks which haven't expired.  Both the numbers of
+ * recorded bdi_writebacks and concurrent in-flight foreign writebacks are
+ * limited to MEMCG_CGWB_FRN_CNT.
+ *
+ * The mechanism only remembers IDs and doesn't hold any object references.
+ * As being wrong occasionally doesn't matter, updates and accesses to the
+ * records are lockless and racy.
+ */
+void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+					     struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg = page->mem_cgroup;
+	struct memcg_cgwb_frn *frn;
+	u64 now = get_jiffies_64();
+	u64 oldest_at = now;
+	int oldest = -1;
+	int i;
+
+	/*
+	 * Pick the slot to use.  If there is already a slot for @wb, keep
+	 * using it.  If not replace the oldest one which isn't being
+	 * written out.
+	 */
+	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
+		frn = &memcg->cgwb_frn[i];
+		if (frn->bdi_id == wb->bdi->id &&
+		    frn->memcg_id == wb->memcg_css->id)
+			break;
+		if (time_before64(frn->at, oldest_at) &&
+		    atomic_read(&frn->done.cnt) == 1) {
+			oldest = i;
+			oldest_at = frn->at;
+		}
+	}
+
+	if (i < MEMCG_CGWB_FRN_CNT) {
+		/*
+		 * Re-using an existing one.  Update timestamp lazily to
+		 * avoid making the cacheline hot.  We want them to be
+		 * reasonably up-to-date and significantly shorter than
+		 * dirty_expire_interval as that's what expires the record.
+		 * Use the shorter of 1s and dirty_expire_interval / 8.
+		 */
+		unsigned long update_intv =
+			min_t(unsigned long, HZ,
+			      msecs_to_jiffies(dirty_expire_interval * 10) / 8);
+
+		if (time_before64(frn->at, now - update_intv))
+			frn->at = now;
+	} else if (oldest >= 0) {
+		/* replace the oldest free one */
+		frn = &memcg->cgwb_frn[oldest];
+		frn->bdi_id = wb->bdi->id;
+		frn->memcg_id = wb->memcg_css->id;
+		frn->at = now;
+	}
+}
+
+/* issue foreign writeback flushes for recorded foreign dirtying events */
+void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
+	unsigned long intv = msecs_to_jiffies(dirty_expire_interval * 10);
+	u64 now = jiffies_64;
+	int i;
+
+	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) {
+		struct memcg_cgwb_frn *frn = &memcg->cgwb_frn[i];
+
+		/*
+		 * If the record is older than dirty_expire_interval,
+		 * writeback on it has already started.  No need to kick it
+		 * off again.  Also, don't start a new one if there's
+		 * already one in flight.
+		 */
+		if (time_after64(frn->at, now - intv) &&
+		    atomic_read(&frn->done.cnt) == 1) {
+			frn->at = 0;
+			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+					       WB_REASON_FOREIGN_FLUSH,
+					       &frn->done);
+		}
+	}
+}
+
 #else	/* CONFIG_CGROUP_WRITEBACK */
 
 static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
@@ -4661,6 +4786,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	struct mem_cgroup *memcg;
 	unsigned int size;
 	int node;
+	int __maybe_unused i;
 
 	size = sizeof(struct mem_cgroup);
 	size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
@@ -4704,6 +4830,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 #endif
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&memcg->cgwb_list);
+	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
+		memcg->cgwb_frn[i].done =
+			__WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
 #endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 	return memcg;
@@ -4833,7 +4962,12 @@ static void mem_cgroup_css_released(struct cgroup_subsys_state *css)
 static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	int __maybe_unused i;
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
+		wb_wait_for_completion(&memcg->cgwb_frn[i].done);
+#endif
 	if (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nosocket)
 		static_branch_dec(&memcg_sockets_enabled_key);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1804f64ff43c..50055d2e4ea8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1667,6 +1667,8 @@ static void balance_dirty_pages(struct bdi_writeback *wb,
 		if (unlikely(!writeback_in_progress(wb)))
 			wb_start_background_writeback(wb);
 
+		mem_cgroup_flush_foreign(wb);
+
 		/*
 		 * Calculate global domain's pos_ratio and select the
 		 * global dtc by default.
@@ -2427,6 +2429,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 		task_io_account_write(PAGE_SIZE);
 		current->nr_dirtied++;
 		this_cpu_inc(bdp_ratelimits);
+
+		mem_cgroup_track_foreign_dirty(page, wb);
 	}
 }
 
-- 
cgit v1.2.3


From d0a8d877da976c244092ce859683b2fa116217db Mon Sep 17 00:00:00 2001
From: Ander Juaristi <a@juaristi.eus>
Date: Sat, 17 Aug 2019 13:26:52 +0200
Subject: netfilter: nft_dynset: support for element deletion

This patch implements the delete operation from the ruleset.

It implements a new delete() function in nft_set_rhash. It is simpler
to use than the already existing remove(), because it only takes the set
and the key as arguments, whereas remove() expects a full
nft_set_elem structure.

Signed-off-by: Ander Juaristi <a@juaristi.eus>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 10 +++++++++-
 include/uapi/linux/netfilter/nf_tables.h |  1 +
 net/netfilter/nft_dynset.c               |  6 ++++++
 net/netfilter/nft_set_hash.c             | 19 +++++++++++++++++++
 4 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 64765140657b..498665158ee0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -302,17 +302,23 @@ struct nft_expr;
  *	struct nft_set_ops - nf_tables set operations
  *
  *	@lookup: look up an element within the set
+ *	@update: update an element if exists, add it if doesn't exist
+ *	@delete: delete an element
  *	@insert: insert new element into set
  *	@activate: activate new element in the next generation
  *	@deactivate: lookup for element and deactivate it in the next generation
  *	@flush: deactivate element in the next generation
  *	@remove: remove element from set
- *	@walk: iterate over all set elemeennts
+ *	@walk: iterate over all set elements
  *	@get: get set elements
  *	@privsize: function to return size of set private data
  *	@init: initialize private data of new set instance
  *	@destroy: destroy private data of set instance
  *	@elemsize: element private size
+ *
+ *	Operations lookup, update and delete have simpler interfaces, are faster
+ *	and currently only used in the packet path. All the rest are slower,
+ *	control plane functions.
  */
 struct nft_set_ops {
 	bool				(*lookup)(const struct net *net,
@@ -327,6 +333,8 @@ struct nft_set_ops {
 						  const struct nft_expr *expr,
 						  struct nft_regs *regs,
 						  const struct nft_set_ext **ext);
+	bool				(*delete)(const struct nft_set *set,
+						  const u32 *key);
 
 	int				(*insert)(const struct net *net,
 						  const struct nft_set *set,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b83b62eb4b01..0ff932dadc8e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -636,6 +636,7 @@ enum nft_lookup_attributes {
 enum nft_dynset_ops {
 	NFT_DYNSET_OP_ADD,
 	NFT_DYNSET_OP_UPDATE,
+	NFT_DYNSET_OP_DELETE,
 };
 
 enum nft_dynset_flags {
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 33833a0cb989..8887295414dc 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -84,6 +84,11 @@ void nft_dynset_eval(const struct nft_expr *expr,
 	const struct nft_expr *sexpr;
 	u64 timeout;
 
+	if (priv->op == NFT_DYNSET_OP_DELETE) {
+		set->ops->delete(set, &regs->data[priv->sreg_key]);
+		return;
+	}
+
 	if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
 			     expr, regs, &ext)) {
 		sexpr = NULL;
@@ -161,6 +166,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
 	switch (priv->op) {
 	case NFT_DYNSET_OP_ADD:
+	case NFT_DYNSET_OP_DELETE:
 		break;
 	case NFT_DYNSET_OP_UPDATE:
 		if (!(set->flags & NFT_SET_TIMEOUT))
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index c490451fcebf..b331a3c9a3a8 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -234,6 +234,24 @@ static void nft_rhash_remove(const struct net *net,
 	rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
 }
 
+static bool nft_rhash_delete(const struct nft_set *set,
+			     const u32 *key)
+{
+	struct nft_rhash *priv = nft_set_priv(set);
+	struct nft_rhash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set = set,
+		.key = key,
+	};
+	struct nft_rhash_elem *he;
+
+	he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
+	if (he == NULL)
+		return false;
+
+	return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+}
+
 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 			   struct nft_set_iter *iter)
 {
@@ -662,6 +680,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = {
 		.remove		= nft_rhash_remove,
 		.lookup		= nft_rhash_lookup,
 		.update		= nft_rhash_update,
+		.delete		= nft_rhash_delete,
 		.walk		= nft_rhash_walk,
 		.get		= nft_rhash_get,
 	},
-- 
cgit v1.2.3


From 9685b2270211628e27ea7880a02b52efd4524099 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 27 Aug 2019 19:01:44 +0800
Subject: block: Remove blk_mq_register_dev()

This function has no callers. Hence remove it.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sysfs.c   | 11 -----------
 include/linux/blk-mq.h |  1 -
 2 files changed, 12 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index d6e1a9bd7131..6ddde3774ebe 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -349,17 +349,6 @@ unreg:
 	return ret;
 }
 
-int blk_mq_register_dev(struct device *dev, struct request_queue *q)
-{
-	int ret;
-
-	mutex_lock(&q->sysfs_lock);
-	ret = __blk_mq_register_dev(dev, q);
-	mutex_unlock(&q->sysfs_lock);
-
-	return ret;
-}
-
 void blk_mq_sysfs_unregister(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 21cebe901ac0..62a3bb715899 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -253,7 +253,6 @@ struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 						const struct blk_mq_ops *ops,
 						unsigned int queue_depth,
 						unsigned int set_flags);
-int blk_mq_register_dev(struct device *, struct request_queue *);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
-- 
cgit v1.2.3


From 58c898ba370e68d39470cd0d932b524682c1f9be Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 27 Aug 2019 19:01:47 +0800
Subject: block: add helper for checking if queue is registered

There are 4 users which check if queue is registered, so add one helper
to check it.

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c      | 4 ++--
 block/blk-wbt.c        | 2 +-
 block/elevator.c       | 2 +-
 include/linux/blkdev.h | 1 +
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 977c659dcd18..5b0b5224cfd4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -942,7 +942,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return -ENXIO;
 
-	WARN_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags),
+	WARN_ONCE(blk_queue_registered(q),
 		  "%s is registering an already registered queue\n",
 		  kobject_name(&dev->kobj));
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
@@ -1026,7 +1026,7 @@ void blk_unregister_queue(struct gendisk *disk)
 		return;
 
 	/* Return early if disk->queue was never registered. */
-	if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+	if (!blk_queue_registered(q))
 		return;
 
 	/*
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 313f45a37e9d..c4d3089e47f7 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -656,7 +656,7 @@ void wbt_enable_default(struct request_queue *q)
 		return;
 
 	/* Queue not registered? Maybe shutting down... */
-	if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+	if (!blk_queue_registered(q))
 		return;
 
 	if (queue_is_mq(q) && IS_ENABLED(CONFIG_BLK_WBT_MQ))
diff --git a/block/elevator.c b/block/elevator.c
index 33c15fb54ed1..03d923196569 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -656,7 +656,7 @@ static int __elevator_change(struct request_queue *q, const char *name)
 	struct elevator_type *e;
 
 	/* Make sure queue is not in the middle of being removed */
-	if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+	if (!blk_queue_registered(q))
 		return -ENOENT;
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4798bb25f1ee..d5077f3fdfd6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -643,6 +643,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
 #define blk_queue_pm_only(q)	atomic_read(&(q)->pm_only)
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
+#define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From cecf5d87ff2035127bb5a9ee054d0023a4a7cad3 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 27 Aug 2019 19:01:48 +0800
Subject: block: split .sysfs_lock into two locks

The kernfs built-in lock of 'kn->count' is held in sysfs .show/.store
path. Meantime, inside block's .show/.store callback, q->sysfs_lock is
required.

However, when mq & iosched kobjects are removed via
blk_mq_unregister_dev() & elv_unregister_queue(), q->sysfs_lock is held
too. This way causes AB-BA lock because the kernfs built-in lock of
'kn-count' is required inside kobject_del() too, see the lockdep warning[1].

On the other hand, it isn't necessary to acquire q->sysfs_lock for
both blk_mq_unregister_dev() & elv_unregister_queue() because
clearing REGISTERED flag prevents storing to 'queue/scheduler'
from being happened. Also sysfs write(store) is exclusive, so no
necessary to hold the lock for elv_unregister_queue() when it is
called in switching elevator path.

So split .sysfs_lock into two: one is still named as .sysfs_lock for
covering sync .store, the other one is named as .sysfs_dir_lock
for covering kobjects and related status change.

sysfs itself can handle the race between add/remove kobjects and
showing/storing attributes under kobjects. For switching scheduler
via storing to 'queue/scheduler', we use the queue flag of
QUEUE_FLAG_REGISTERED with .sysfs_lock for avoiding the race, then
we can avoid to hold .sysfs_lock during removing/adding kobjects.

[1]  lockdep warning
    ======================================================
    WARNING: possible circular locking dependency detected
    5.3.0-rc3-00044-g73277fc75ea0 #1380 Not tainted
    ------------------------------------------------------
    rmmod/777 is trying to acquire lock:
    00000000ac50e981 (kn->count#202){++++}, at: kernfs_remove_by_name_ns+0x59/0x72

    but task is already holding lock:
    00000000fb16ae21 (&q->sysfs_lock){+.+.}, at: blk_unregister_queue+0x78/0x10b

    which lock already depends on the new lock.

    the existing dependency chain (in reverse order) is:

    -> #1 (&q->sysfs_lock){+.+.}:
           __lock_acquire+0x95f/0xa2f
           lock_acquire+0x1b4/0x1e8
           __mutex_lock+0x14a/0xa9b
           blk_mq_hw_sysfs_show+0x63/0xb6
           sysfs_kf_seq_show+0x11f/0x196
           seq_read+0x2cd/0x5f2
           vfs_read+0xc7/0x18c
           ksys_read+0xc4/0x13e
           do_syscall_64+0xa7/0x295
           entry_SYSCALL_64_after_hwframe+0x49/0xbe

    -> #0 (kn->count#202){++++}:
           check_prev_add+0x5d2/0xc45
           validate_chain+0xed3/0xf94
           __lock_acquire+0x95f/0xa2f
           lock_acquire+0x1b4/0x1e8
           __kernfs_remove+0x237/0x40b
           kernfs_remove_by_name_ns+0x59/0x72
           remove_files+0x61/0x96
           sysfs_remove_group+0x81/0xa4
           sysfs_remove_groups+0x3b/0x44
           kobject_del+0x44/0x94
           blk_mq_unregister_dev+0x83/0xdd
           blk_unregister_queue+0xa0/0x10b
           del_gendisk+0x259/0x3fa
           null_del_dev+0x8b/0x1c3 [null_blk]
           null_exit+0x5c/0x95 [null_blk]
           __se_sys_delete_module+0x204/0x337
           do_syscall_64+0xa7/0x295
           entry_SYSCALL_64_after_hwframe+0x49/0xbe

    other info that might help us debug this:

     Possible unsafe locking scenario:

           CPU0                    CPU1
           ----                    ----
      lock(&q->sysfs_lock);
                                   lock(kn->count#202);
                                   lock(&q->sysfs_lock);
      lock(kn->count#202);

     *** DEADLOCK ***

    2 locks held by rmmod/777:
     #0: 00000000e69bd9de (&lock){+.+.}, at: null_exit+0x2e/0x95 [null_blk]
     #1: 00000000fb16ae21 (&q->sysfs_lock){+.+.}, at: blk_unregister_queue+0x78/0x10b

    stack backtrace:
    CPU: 0 PID: 777 Comm: rmmod Not tainted 5.3.0-rc3-00044-g73277fc75ea0 #1380
    Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS ?-20180724_192412-buildhw-07.phx4
    Call Trace:
     dump_stack+0x9a/0xe6
     check_noncircular+0x207/0x251
     ? print_circular_bug+0x32a/0x32a
     ? find_usage_backwards+0x84/0xb0
     check_prev_add+0x5d2/0xc45
     validate_chain+0xed3/0xf94
     ? check_prev_add+0xc45/0xc45
     ? mark_lock+0x11b/0x804
     ? check_usage_forwards+0x1ca/0x1ca
     __lock_acquire+0x95f/0xa2f
     lock_acquire+0x1b4/0x1e8
     ? kernfs_remove_by_name_ns+0x59/0x72
     __kernfs_remove+0x237/0x40b
     ? kernfs_remove_by_name_ns+0x59/0x72
     ? kernfs_next_descendant_post+0x7d/0x7d
     ? strlen+0x10/0x23
     ? strcmp+0x22/0x44
     kernfs_remove_by_name_ns+0x59/0x72
     remove_files+0x61/0x96
     sysfs_remove_group+0x81/0xa4
     sysfs_remove_groups+0x3b/0x44
     kobject_del+0x44/0x94
     blk_mq_unregister_dev+0x83/0xdd
     blk_unregister_queue+0xa0/0x10b
     del_gendisk+0x259/0x3fa
     ? disk_events_poll_msecs_store+0x12b/0x12b
     ? check_flags+0x1ea/0x204
     ? mark_held_locks+0x1f/0x7a
     null_del_dev+0x8b/0x1c3 [null_blk]
     null_exit+0x5c/0x95 [null_blk]
     __se_sys_delete_module+0x204/0x337
     ? free_module+0x39f/0x39f
     ? blkcg_maybe_throttle_current+0x8a/0x718
     ? rwlock_bug+0x62/0x62
     ? __blkcg_punt_bio_submit+0xd0/0xd0
     ? trace_hardirqs_on_thunk+0x1a/0x20
     ? mark_held_locks+0x1f/0x7a
     ? do_syscall_64+0x4c/0x295
     do_syscall_64+0xa7/0x295
     entry_SYSCALL_64_after_hwframe+0x49/0xbe
    RIP: 0033:0x7fb696cdbe6b
    Code: 73 01 c3 48 8b 0d 1d 20 0c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 008
    RSP: 002b:00007ffec9588788 EFLAGS: 00000206 ORIG_RAX: 00000000000000b0
    RAX: ffffffffffffffda RBX: 0000559e589137c0 RCX: 00007fb696cdbe6b
    RDX: 000000000000000a RSI: 0000000000000800 RDI: 0000559e58913828
    RBP: 0000000000000000 R08: 00007ffec9587701 R09: 0000000000000000
    R10: 00007fb696d4eae0 R11: 0000000000000206 R12: 00007ffec95889b0
    R13: 00007ffec95896b3 R14: 0000559e58913260 R15: 0000559e589137c0

Cc: Christoph Hellwig <hch@infradead.org>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  1 +
 block/blk-mq-sysfs.c   | 12 +++++------
 block/blk-sysfs.c      | 46 ++++++++++++++++++++++++-----------------
 block/blk.h            |  2 +-
 block/elevator.c       | 55 ++++++++++++++++++++++++++++++++++++++++++--------
 include/linux/blkdev.h |  1 +
 6 files changed, 84 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 5d0d7441a443..77807a5d7f9e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	mutex_init(&q->blk_trace_mutex);
 #endif
 	mutex_init(&q->sysfs_lock);
+	mutex_init(&q->sysfs_dir_lock);
 	spin_lock_init(&q->queue_lock);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 6ddde3774ebe..a0d3ce30fa08 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -270,7 +270,7 @@ void blk_mq_unregister_dev(struct device *dev, struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	lockdep_assert_held(&q->sysfs_lock);
+	lockdep_assert_held(&q->sysfs_dir_lock);
 
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
@@ -320,7 +320,7 @@ int __blk_mq_register_dev(struct device *dev, struct request_queue *q)
 	int ret, i;
 
 	WARN_ON_ONCE(!q->kobj.parent);
-	lockdep_assert_held(&q->sysfs_lock);
+	lockdep_assert_held(&q->sysfs_dir_lock);
 
 	ret = kobject_add(q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
@@ -354,7 +354,7 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	mutex_lock(&q->sysfs_lock);
+	mutex_lock(&q->sysfs_dir_lock);
 	if (!q->mq_sysfs_init_done)
 		goto unlock;
 
@@ -362,7 +362,7 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 		blk_mq_unregister_hctx(hctx);
 
 unlock:
-	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
 }
 
 int blk_mq_sysfs_register(struct request_queue *q)
@@ -370,7 +370,7 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i, ret = 0;
 
-	mutex_lock(&q->sysfs_lock);
+	mutex_lock(&q->sysfs_dir_lock);
 	if (!q->mq_sysfs_init_done)
 		goto unlock;
 
@@ -381,7 +381,7 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	}
 
 unlock:
-	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
 
 	return ret;
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 5b0b5224cfd4..107513495220 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -938,6 +938,7 @@ int blk_register_queue(struct gendisk *disk)
 	int ret;
 	struct device *dev = disk_to_dev(disk);
 	struct request_queue *q = disk->queue;
+	bool has_elevator = false;
 
 	if (WARN_ON(!q))
 		return -ENXIO;
@@ -945,7 +946,6 @@ int blk_register_queue(struct gendisk *disk)
 	WARN_ONCE(blk_queue_registered(q),
 		  "%s is registering an already registered queue\n",
 		  kobject_name(&dev->kobj));
-	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
 
 	/*
 	 * SCSI probing may synchronously create and destroy a lot of
@@ -965,8 +965,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret)
 		return ret;
 
-	/* Prevent changes through sysfs until registration is completed. */
-	mutex_lock(&q->sysfs_lock);
+	mutex_lock(&q->sysfs_dir_lock);
 
 	ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue");
 	if (ret < 0) {
@@ -987,26 +986,36 @@ int blk_register_queue(struct gendisk *disk)
 		blk_mq_debugfs_register(q);
 	}
 
-	kobject_uevent(&q->kobj, KOBJ_ADD);
-
-	wbt_enable_default(q);
-
-	blk_throtl_register_queue(q);
-
+	/*
+	 * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator
+	 * switch won't happen at all.
+	 */
 	if (q->elevator) {
-		ret = elv_register_queue(q);
+		ret = elv_register_queue(q, false);
 		if (ret) {
-			mutex_unlock(&q->sysfs_lock);
-			kobject_uevent(&q->kobj, KOBJ_REMOVE);
+			mutex_unlock(&q->sysfs_dir_lock);
 			kobject_del(&q->kobj);
 			blk_trace_remove_sysfs(dev);
 			kobject_put(&dev->kobj);
 			return ret;
 		}
+		has_elevator = true;
 	}
+
+	mutex_lock(&q->sysfs_lock);
+	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
+	wbt_enable_default(q);
+	blk_throtl_register_queue(q);
+
+	/* Now everything is ready and send out KOBJ_ADD uevent */
+	kobject_uevent(&q->kobj, KOBJ_ADD);
+	if (has_elevator)
+		kobject_uevent(&q->elevator->kobj, KOBJ_ADD);
+	mutex_unlock(&q->sysfs_lock);
+
 	ret = 0;
 unlock:
-	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_register_queue);
@@ -1021,6 +1030,7 @@ EXPORT_SYMBOL_GPL(blk_register_queue);
 void blk_unregister_queue(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
+	bool has_elevator;
 
 	if (WARN_ON(!q))
 		return;
@@ -1035,25 +1045,25 @@ void blk_unregister_queue(struct gendisk *disk)
 	 * concurrent elv_iosched_store() calls.
 	 */
 	mutex_lock(&q->sysfs_lock);
-
 	blk_queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
+	has_elevator = !!q->elevator;
+	mutex_unlock(&q->sysfs_lock);
 
+	mutex_lock(&q->sysfs_dir_lock);
 	/*
 	 * Remove the sysfs attributes before unregistering the queue data
 	 * structures that can be modified through sysfs.
 	 */
 	if (queue_is_mq(q))
 		blk_mq_unregister_dev(disk_to_dev(disk), q);
-	mutex_unlock(&q->sysfs_lock);
 
 	kobject_uevent(&q->kobj, KOBJ_REMOVE);
 	kobject_del(&q->kobj);
 	blk_trace_remove_sysfs(disk_to_dev(disk));
 
-	mutex_lock(&q->sysfs_lock);
-	if (q->elevator)
+	if (has_elevator)
 		elv_unregister_queue(q);
-	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
 
 	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/block/blk.h b/block/blk.h
index de6b2e146d6e..e4619fc5c99a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -188,7 +188,7 @@ int elevator_init_mq(struct request_queue *q);
 int elevator_switch_mq(struct request_queue *q,
 			      struct elevator_type *new_e);
 void __elevator_exit(struct request_queue *, struct elevator_queue *);
-int elv_register_queue(struct request_queue *q);
+int elv_register_queue(struct request_queue *q, bool uevent);
 void elv_unregister_queue(struct request_queue *q);
 
 static inline void elevator_exit(struct request_queue *q,
diff --git a/block/elevator.c b/block/elevator.c
index 03d923196569..4781c4205a5d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -470,13 +470,16 @@ static struct kobj_type elv_ktype = {
 	.release	= elevator_release,
 };
 
-int elv_register_queue(struct request_queue *q)
+/*
+ * elv_register_queue is called from either blk_register_queue or
+ * elevator_switch, elevator switch is prevented from being happen
+ * in the two paths, so it is safe to not hold q->sysfs_lock.
+ */
+int elv_register_queue(struct request_queue *q, bool uevent)
 {
 	struct elevator_queue *e = q->elevator;
 	int error;
 
-	lockdep_assert_held(&q->sysfs_lock);
-
 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
 	if (!error) {
 		struct elv_fs_entry *attr = e->type->elevator_attrs;
@@ -487,24 +490,34 @@ int elv_register_queue(struct request_queue *q)
 				attr++;
 			}
 		}
-		kobject_uevent(&e->kobj, KOBJ_ADD);
+		if (uevent)
+			kobject_uevent(&e->kobj, KOBJ_ADD);
+
+		mutex_lock(&q->sysfs_lock);
 		e->registered = 1;
+		mutex_unlock(&q->sysfs_lock);
 	}
 	return error;
 }
 
+/*
+ * elv_unregister_queue is called from either blk_unregister_queue or
+ * elevator_switch, elevator switch is prevented from being happen
+ * in the two paths, so it is safe to not hold q->sysfs_lock.
+ */
 void elv_unregister_queue(struct request_queue *q)
 {
-	lockdep_assert_held(&q->sysfs_lock);
-
 	if (q) {
 		struct elevator_queue *e = q->elevator;
 
 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
 		kobject_del(&e->kobj);
+
+		mutex_lock(&q->sysfs_lock);
 		e->registered = 0;
 		/* Re-enable throttling in case elevator disabled it */
 		wbt_enable_default(q);
+		mutex_unlock(&q->sysfs_lock);
 	}
 }
 
@@ -567,10 +580,32 @@ int elevator_switch_mq(struct request_queue *q,
 	lockdep_assert_held(&q->sysfs_lock);
 
 	if (q->elevator) {
-		if (q->elevator->registered)
+		if (q->elevator->registered) {
+			mutex_unlock(&q->sysfs_lock);
+
+			/*
+			 * Concurrent elevator switch can't happen becasue
+			 * sysfs write is always exclusively on same file.
+			 *
+			 * Also the elevator queue won't be freed after
+			 * sysfs_lock is released becasue kobject_del() in
+			 * blk_unregister_queue() waits for completion of
+			 * .store & .show on its attributes.
+			 */
 			elv_unregister_queue(q);
+
+			mutex_lock(&q->sysfs_lock);
+		}
 		ioc_clear_queue(q);
 		elevator_exit(q, q->elevator);
+
+		/*
+		 * sysfs_lock may be dropped, so re-check if queue is
+		 * unregistered. If yes, don't switch to new elevator
+		 * any more
+		 */
+		if (!blk_queue_registered(q))
+			return 0;
 	}
 
 	ret = blk_mq_init_sched(q, new_e);
@@ -578,7 +613,11 @@ int elevator_switch_mq(struct request_queue *q,
 		goto out;
 
 	if (new_e) {
-		ret = elv_register_queue(q);
+		mutex_unlock(&q->sysfs_lock);
+
+		ret = elv_register_queue(q, true);
+
+		mutex_lock(&q->sysfs_lock);
 		if (ret) {
 			elevator_exit(q, q->elevator);
 			goto out;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d5077f3fdfd6..1ac790178787 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -535,6 +535,7 @@ struct request_queue {
 	struct delayed_work	requeue_work;
 
 	struct mutex		sysfs_lock;
+	struct mutex		sysfs_dir_lock;
 
 	/*
 	 * for reusing dead hctx instance in case of updating
-- 
cgit v1.2.3


From a1db98f20b818edf15f4ad70072f771b6f185949 Mon Sep 17 00:00:00 2001
From: Gao Xiang <gaoxiang25@huawei.com>
Date: Mon, 26 Aug 2019 21:26:53 +0800
Subject: erofs: fix compile warnings when moving out
 include/trace/events/erofs.h

As Stephon reported [1], many compile warnings are raised when
moving out include/trace/events/erofs.h:

In file included from include/trace/events/erofs.h:8,
                 from <command-line>:
include/trace/events/erofs.h:28:37: warning: 'struct dentry' declared inside parameter list will not be visible outside of this definition or declaration
  TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
                                     ^~~~~~
include/linux/tracepoint.h:233:34: note: in definition of macro '__DECLARE_TRACE'
  static inline void trace_##name(proto)    \
                                  ^~~~~
include/linux/tracepoint.h:396:24: note: in expansion of macro 'PARAMS'
  __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),  \
                        ^~~~~~
include/linux/tracepoint.h:532:2: note: in expansion of macro 'DECLARE_TRACE'
  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
  ^~~~~~~~~~~~~
include/linux/tracepoint.h:532:22: note: in expansion of macro 'PARAMS'
  DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
                      ^~~~~~
include/trace/events/erofs.h:26:1: note: in expansion of macro 'TRACE_EVENT'
 TRACE_EVENT(erofs_lookup,
 ^~~~~~~~~~~
include/trace/events/erofs.h:28:2: note: in expansion of macro 'TP_PROTO'
  TP_PROTO(struct inode *dir, struct dentry *dentry, unsigned int flags),
  ^~~~~~~~

That makes me very confused since most original EROFS tracepoint code
was taken from f2fs, and finally I found

commit 43c78d88036e ("kbuild: compile-test kernel headers to ensure they are self-contained")

It seems these warnings are generated from KERNEL_HEADER_TEST feature and
ext4/f2fs tracepoint files were in blacklist.

Anyway, let's fix these issues for KERNEL_HEADER_TEST feature instead
of adding to blacklist...

[1] https://lore.kernel.org/lkml/20190826162432.11100665@canb.auug.org.au/

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Reviewed-by: Chao Yu <yuchao0@huawei.com>
Link: https://lore.kernel.org/r/20190826132653.100731-1-gaoxiang25@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/trace/events/erofs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index bfb2da9c4eee..d239f39cbc8c 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -6,6 +6,9 @@
 #define _TRACE_EROFS_H
 
 #include <linux/tracepoint.h>
+#include <linux/fs.h>
+
+struct erofs_map_blocks;
 
 #define show_dev(dev)		MAJOR(dev), MINOR(dev)
 #define show_dev_nid(entry)	show_dev(entry->dev), entry->nid
-- 
cgit v1.2.3


From ec0ad868173da8a75121f9dc116a5d5478ff614d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 25 Aug 2019 07:54:27 +0200
Subject: staging: greybus: move core include files to include/linux/greybus/

With the goal of moving the core of the greybus code out of staging, the
include files need to be moved to include/linux/greybus.h and
include/linux/greybus/

Cc: Vaibhav Hiremath <hvaibhav.linux@gmail.com>
Cc: Johan Hovold <johan@kernel.org>
Cc: Vaibhav Agarwal <vaibhav.sr@gmail.com>
Cc: Rui Miguel Silva <rmfrfs@gmail.com>
Cc: David Lin <dtwlin@gmail.com>
Cc: "Bryan O'Donoghue" <pure.logic@nexus-software.ie>
Cc: greybus-dev@lists.linaro.org
Cc: devel@driverdev.osuosl.org
Acked-by: Mark Greer <mgreer@animalcreek.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Acked-by: Alex Elder <elder@kernel.org>
Link: https://lore.kernel.org/r/20190825055429.18547-8-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/greybus/arche-platform.c    |    2 +-
 drivers/staging/greybus/audio_apbridgea.c   |    3 +-
 drivers/staging/greybus/audio_codec.h       |    4 +-
 drivers/staging/greybus/audio_gb.c          |    4 +-
 drivers/staging/greybus/authentication.c    |    3 +-
 drivers/staging/greybus/bootrom.c           |    2 +-
 drivers/staging/greybus/bundle.c            |    2 +-
 drivers/staging/greybus/bundle.h            |   89 --
 drivers/staging/greybus/camera.c            |    2 +-
 drivers/staging/greybus/connection.c        |    2 +-
 drivers/staging/greybus/connection.h        |  128 --
 drivers/staging/greybus/control.c           |    2 +-
 drivers/staging/greybus/control.h           |   57 -
 drivers/staging/greybus/core.c              |    2 +-
 drivers/staging/greybus/debugfs.c           |    3 +-
 drivers/staging/greybus/es2.c               |    3 +-
 drivers/staging/greybus/firmware.h          |    2 +-
 drivers/staging/greybus/fw-core.c           |    2 +-
 drivers/staging/greybus/fw-download.c       |    2 +-
 drivers/staging/greybus/fw-management.c     |    2 +-
 drivers/staging/greybus/gbphy.c             |    2 +-
 drivers/staging/greybus/gpio.c              |    2 +-
 drivers/staging/greybus/greybus.h           |  152 --
 drivers/staging/greybus/greybus_id.h        |   27 -
 drivers/staging/greybus/greybus_manifest.h  |  178 ---
 drivers/staging/greybus/greybus_protocols.h | 2176 ---------------------------
 drivers/staging/greybus/hd.c                |    2 +-
 drivers/staging/greybus/hd.h                |   82 -
 drivers/staging/greybus/hid.c               |    3 +-
 drivers/staging/greybus/i2c.c               |    2 +-
 drivers/staging/greybus/interface.c         |    2 +-
 drivers/staging/greybus/interface.h         |   82 -
 drivers/staging/greybus/light.c             |    4 +-
 drivers/staging/greybus/log.c               |    3 +-
 drivers/staging/greybus/loopback.c          |    5 +-
 drivers/staging/greybus/manifest.c          |    2 +-
 drivers/staging/greybus/manifest.h          |   15 -
 drivers/staging/greybus/module.c            |    2 +-
 drivers/staging/greybus/module.h            |   33 -
 drivers/staging/greybus/operation.c         |    2 +-
 drivers/staging/greybus/operation.h         |  224 ---
 drivers/staging/greybus/power_supply.c      |    3 +-
 drivers/staging/greybus/pwm.c               |    2 +-
 drivers/staging/greybus/raw.c               |    3 +-
 drivers/staging/greybus/sdio.c              |    2 +-
 drivers/staging/greybus/spi.c               |    2 +-
 drivers/staging/greybus/spilib.c            |    2 +-
 drivers/staging/greybus/svc.c               |    3 +-
 drivers/staging/greybus/svc.h               |  101 --
 drivers/staging/greybus/svc_watchdog.c      |    2 +-
 drivers/staging/greybus/uart.c              |    2 +-
 drivers/staging/greybus/usb.c               |    2 +-
 drivers/staging/greybus/vibrator.c          |    3 +-
 include/linux/greybus.h                     |  152 ++
 include/linux/greybus/bundle.h              |   89 ++
 include/linux/greybus/connection.h          |  128 ++
 include/linux/greybus/control.h             |   57 +
 include/linux/greybus/greybus_id.h          |   27 +
 include/linux/greybus/greybus_manifest.h    |  178 +++
 include/linux/greybus/greybus_protocols.h   | 2176 +++++++++++++++++++++++++++
 include/linux/greybus/hd.h                  |   82 +
 include/linux/greybus/interface.h           |   82 +
 include/linux/greybus/manifest.h            |   15 +
 include/linux/greybus/module.h              |   33 +
 include/linux/greybus/operation.h           |  224 +++
 include/linux/greybus/svc.h                 |  101 ++
 66 files changed, 3384 insertions(+), 3403 deletions(-)
 delete mode 100644 drivers/staging/greybus/bundle.h
 delete mode 100644 drivers/staging/greybus/connection.h
 delete mode 100644 drivers/staging/greybus/control.h
 delete mode 100644 drivers/staging/greybus/greybus.h
 delete mode 100644 drivers/staging/greybus/greybus_id.h
 delete mode 100644 drivers/staging/greybus/greybus_manifest.h
 delete mode 100644 drivers/staging/greybus/greybus_protocols.h
 delete mode 100644 drivers/staging/greybus/hd.h
 delete mode 100644 drivers/staging/greybus/interface.h
 delete mode 100644 drivers/staging/greybus/manifest.h
 delete mode 100644 drivers/staging/greybus/module.h
 delete mode 100644 drivers/staging/greybus/operation.h
 delete mode 100644 drivers/staging/greybus/svc.h
 create mode 100644 include/linux/greybus.h
 create mode 100644 include/linux/greybus/bundle.h
 create mode 100644 include/linux/greybus/connection.h
 create mode 100644 include/linux/greybus/control.h
 create mode 100644 include/linux/greybus/greybus_id.h
 create mode 100644 include/linux/greybus/greybus_manifest.h
 create mode 100644 include/linux/greybus/greybus_protocols.h
 create mode 100644 include/linux/greybus/hd.h
 create mode 100644 include/linux/greybus/interface.h
 create mode 100644 include/linux/greybus/manifest.h
 create mode 100644 include/linux/greybus/module.h
 create mode 100644 include/linux/greybus/operation.h
 create mode 100644 include/linux/greybus/svc.h

(limited to 'include')

diff --git a/drivers/staging/greybus/arche-platform.c b/drivers/staging/greybus/arche-platform.c
index 6eb842040c22..eebf0deb39f5 100644
--- a/drivers/staging/greybus/arche-platform.c
+++ b/drivers/staging/greybus/arche-platform.c
@@ -19,8 +19,8 @@
 #include <linux/irq.h>
 #include <linux/suspend.h>
 #include <linux/time.h>
+#include <linux/greybus.h>
 #include "arche_platform.h"
-#include "greybus.h"
 
 #if IS_ENABLED(CONFIG_USB_HSIC_USB3613)
 #include <linux/usb/usb3613.h>
diff --git a/drivers/staging/greybus/audio_apbridgea.c b/drivers/staging/greybus/audio_apbridgea.c
index 7ebb1bde5cb7..26117e390deb 100644
--- a/drivers/staging/greybus/audio_apbridgea.c
+++ b/drivers/staging/greybus/audio_apbridgea.c
@@ -5,8 +5,7 @@
  * Copyright 2015-2016 Google Inc.
  */
 
-#include "greybus.h"
-#include "greybus_protocols.h"
+#include <linux/greybus.h>
 #include "audio_apbridgea.h"
 #include "audio_codec.h"
 
diff --git a/drivers/staging/greybus/audio_codec.h b/drivers/staging/greybus/audio_codec.h
index 9ba09ea9c2fc..cb5d271da1a5 100644
--- a/drivers/staging/greybus/audio_codec.h
+++ b/drivers/staging/greybus/audio_codec.h
@@ -8,12 +8,10 @@
 #ifndef __LINUX_GBAUDIO_CODEC_H
 #define __LINUX_GBAUDIO_CODEC_H
 
+#include <linux/greybus.h>
 #include <sound/soc.h>
 #include <sound/jack.h>
 
-#include "greybus.h"
-#include "greybus_protocols.h"
-
 #define NAME_SIZE	32
 #define MAX_DAIS	2	/* APB1, APB2 */
 
diff --git a/drivers/staging/greybus/audio_gb.c b/drivers/staging/greybus/audio_gb.c
index 8894f1c87d48..9d8994fdb41a 100644
--- a/drivers/staging/greybus/audio_gb.c
+++ b/drivers/staging/greybus/audio_gb.c
@@ -5,9 +5,7 @@
  * Copyright 2015-2016 Google Inc.
  */
 
-#include "greybus.h"
-#include "greybus_protocols.h"
-#include "operation.h"
+#include <linux/greybus.h>
 #include "audio_codec.h"
 
 /* TODO: Split into separate calls */
diff --git a/drivers/staging/greybus/authentication.c b/drivers/staging/greybus/authentication.c
index a5d7c53df987..297e69f011c7 100644
--- a/drivers/staging/greybus/authentication.c
+++ b/drivers/staging/greybus/authentication.c
@@ -6,8 +6,7 @@
  * Copyright 2016 Linaro Ltd.
  */
 
-#include "greybus.h"
-
+#include <linux/greybus.h>
 #include <linux/cdev.h>
 #include <linux/fs.h>
 #include <linux/ioctl.h>
diff --git a/drivers/staging/greybus/bootrom.c b/drivers/staging/greybus/bootrom.c
index 402e6360834f..a8efb86de140 100644
--- a/drivers/staging/greybus/bootrom.c
+++ b/drivers/staging/greybus/bootrom.c
@@ -10,8 +10,8 @@
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "firmware.h"
 
 /* Timeout, in jiffies, within which the next request must be received */
diff --git a/drivers/staging/greybus/bundle.c b/drivers/staging/greybus/bundle.c
index 3f702db9e098..84660729538b 100644
--- a/drivers/staging/greybus/bundle.c
+++ b/drivers/staging/greybus/bundle.c
@@ -6,7 +6,7 @@
  * Copyright 2014-2015 Linaro Ltd.
  */
 
-#include "greybus.h"
+#include <linux/greybus.h>
 #include "greybus_trace.h"
 
 static ssize_t bundle_class_show(struct device *dev,
diff --git a/drivers/staging/greybus/bundle.h b/drivers/staging/greybus/bundle.h
deleted file mode 100644
index 8734d2055657..000000000000
--- a/drivers/staging/greybus/bundle.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus bundles
- *
- * Copyright 2014 Google Inc.
- * Copyright 2014 Linaro Ltd.
- */
-
-#ifndef __BUNDLE_H
-#define __BUNDLE_H
-
-#include <linux/list.h>
-
-#define	BUNDLE_ID_NONE	U8_MAX
-
-/* Greybus "public" definitions" */
-struct gb_bundle {
-	struct device		dev;
-	struct gb_interface	*intf;
-
-	u8			id;
-	u8			class;
-	u8			class_major;
-	u8			class_minor;
-
-	size_t			num_cports;
-	struct greybus_descriptor_cport *cport_desc;
-
-	struct list_head	connections;
-	u8			*state;
-
-	struct list_head	links;	/* interface->bundles */
-};
-#define to_gb_bundle(d) container_of(d, struct gb_bundle, dev)
-
-/* Greybus "private" definitions" */
-struct gb_bundle *gb_bundle_create(struct gb_interface *intf, u8 bundle_id,
-				   u8 class);
-int gb_bundle_add(struct gb_bundle *bundle);
-void gb_bundle_destroy(struct gb_bundle *bundle);
-
-/* Bundle Runtime PM wrappers */
-#ifdef CONFIG_PM
-static inline int gb_pm_runtime_get_sync(struct gb_bundle *bundle)
-{
-	int retval;
-
-	retval = pm_runtime_get_sync(&bundle->dev);
-	if (retval < 0) {
-		dev_err(&bundle->dev,
-			"pm_runtime_get_sync failed: %d\n", retval);
-		pm_runtime_put_noidle(&bundle->dev);
-		return retval;
-	}
-
-	return 0;
-}
-
-static inline int gb_pm_runtime_put_autosuspend(struct gb_bundle *bundle)
-{
-	int retval;
-
-	pm_runtime_mark_last_busy(&bundle->dev);
-	retval = pm_runtime_put_autosuspend(&bundle->dev);
-
-	return retval;
-}
-
-static inline void gb_pm_runtime_get_noresume(struct gb_bundle *bundle)
-{
-	pm_runtime_get_noresume(&bundle->dev);
-}
-
-static inline void gb_pm_runtime_put_noidle(struct gb_bundle *bundle)
-{
-	pm_runtime_put_noidle(&bundle->dev);
-}
-
-#else
-static inline int gb_pm_runtime_get_sync(struct gb_bundle *bundle)
-{ return 0; }
-static inline int gb_pm_runtime_put_autosuspend(struct gb_bundle *bundle)
-{ return 0; }
-
-static inline void gb_pm_runtime_get_noresume(struct gb_bundle *bundle) {}
-static inline void gb_pm_runtime_put_noidle(struct gb_bundle *bundle) {}
-#endif
-
-#endif /* __BUNDLE_H */
diff --git a/drivers/staging/greybus/camera.c b/drivers/staging/greybus/camera.c
index 615c8e7fd51e..b570e13394ac 100644
--- a/drivers/staging/greybus/camera.c
+++ b/drivers/staging/greybus/camera.c
@@ -14,9 +14,9 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/greybus.h>
 
 #include "gb-camera.h"
-#include "greybus.h"
 #include "greybus_protocols.h"
 
 enum gb_camera_debugs_buffer_id {
diff --git a/drivers/staging/greybus/connection.c b/drivers/staging/greybus/connection.c
index eda964208cce..fc8f57f97ce6 100644
--- a/drivers/staging/greybus/connection.c
+++ b/drivers/staging/greybus/connection.c
@@ -7,8 +7,8 @@
  */
 
 #include <linux/workqueue.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "greybus_trace.h"
 
 #define GB_CONNECTION_CPORT_QUIESCE_TIMEOUT	1000
diff --git a/drivers/staging/greybus/connection.h b/drivers/staging/greybus/connection.h
deleted file mode 100644
index 5ca3befc0636..000000000000
--- a/drivers/staging/greybus/connection.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus connections
- *
- * Copyright 2014 Google Inc.
- * Copyright 2014 Linaro Ltd.
- */
-
-#ifndef __CONNECTION_H
-#define __CONNECTION_H
-
-#include <linux/list.h>
-#include <linux/kfifo.h>
-
-#define GB_CONNECTION_FLAG_CSD		BIT(0)
-#define GB_CONNECTION_FLAG_NO_FLOWCTRL	BIT(1)
-#define GB_CONNECTION_FLAG_OFFLOADED	BIT(2)
-#define GB_CONNECTION_FLAG_CDSI1	BIT(3)
-#define GB_CONNECTION_FLAG_CONTROL	BIT(4)
-#define GB_CONNECTION_FLAG_HIGH_PRIO	BIT(5)
-
-#define GB_CONNECTION_FLAG_CORE_MASK	GB_CONNECTION_FLAG_CONTROL
-
-enum gb_connection_state {
-	GB_CONNECTION_STATE_DISABLED		= 0,
-	GB_CONNECTION_STATE_ENABLED_TX		= 1,
-	GB_CONNECTION_STATE_ENABLED		= 2,
-	GB_CONNECTION_STATE_DISCONNECTING	= 3,
-};
-
-struct gb_operation;
-
-typedef int (*gb_request_handler_t)(struct gb_operation *);
-
-struct gb_connection {
-	struct gb_host_device		*hd;
-	struct gb_interface		*intf;
-	struct gb_bundle		*bundle;
-	struct kref			kref;
-	u16				hd_cport_id;
-	u16				intf_cport_id;
-
-	struct list_head		hd_links;
-	struct list_head		bundle_links;
-
-	gb_request_handler_t		handler;
-	unsigned long			flags;
-
-	struct mutex			mutex;
-	spinlock_t			lock;
-	enum gb_connection_state	state;
-	struct list_head		operations;
-
-	char				name[16];
-	struct workqueue_struct		*wq;
-
-	atomic_t			op_cycle;
-
-	void				*private;
-
-	bool				mode_switch;
-};
-
-struct gb_connection *gb_connection_create_static(struct gb_host_device *hd,
-				u16 hd_cport_id, gb_request_handler_t handler);
-struct gb_connection *gb_connection_create_control(struct gb_interface *intf);
-struct gb_connection *gb_connection_create(struct gb_bundle *bundle,
-				u16 cport_id, gb_request_handler_t handler);
-struct gb_connection *gb_connection_create_flags(struct gb_bundle *bundle,
-				u16 cport_id, gb_request_handler_t handler,
-				unsigned long flags);
-struct gb_connection *gb_connection_create_offloaded(struct gb_bundle *bundle,
-				u16 cport_id, unsigned long flags);
-void gb_connection_destroy(struct gb_connection *connection);
-
-static inline bool gb_connection_is_static(struct gb_connection *connection)
-{
-	return !connection->intf;
-}
-
-int gb_connection_enable(struct gb_connection *connection);
-int gb_connection_enable_tx(struct gb_connection *connection);
-void gb_connection_disable_rx(struct gb_connection *connection);
-void gb_connection_disable(struct gb_connection *connection);
-void gb_connection_disable_forced(struct gb_connection *connection);
-
-void gb_connection_mode_switch_prepare(struct gb_connection *connection);
-void gb_connection_mode_switch_complete(struct gb_connection *connection);
-
-void greybus_data_rcvd(struct gb_host_device *hd, u16 cport_id,
-		       u8 *data, size_t length);
-
-void gb_connection_latency_tag_enable(struct gb_connection *connection);
-void gb_connection_latency_tag_disable(struct gb_connection *connection);
-
-static inline bool gb_connection_e2efc_enabled(struct gb_connection *connection)
-{
-	return !(connection->flags & GB_CONNECTION_FLAG_CSD);
-}
-
-static inline bool
-gb_connection_flow_control_disabled(struct gb_connection *connection)
-{
-	return connection->flags & GB_CONNECTION_FLAG_NO_FLOWCTRL;
-}
-
-static inline bool gb_connection_is_offloaded(struct gb_connection *connection)
-{
-	return connection->flags & GB_CONNECTION_FLAG_OFFLOADED;
-}
-
-static inline bool gb_connection_is_control(struct gb_connection *connection)
-{
-	return connection->flags & GB_CONNECTION_FLAG_CONTROL;
-}
-
-static inline void *gb_connection_get_data(struct gb_connection *connection)
-{
-	return connection->private;
-}
-
-static inline void gb_connection_set_data(struct gb_connection *connection,
-					  void *data)
-{
-	connection->private = data;
-}
-
-#endif /* __CONNECTION_H */
diff --git a/drivers/staging/greybus/control.c b/drivers/staging/greybus/control.c
index a9e8b6036cac..359a25841973 100644
--- a/drivers/staging/greybus/control.c
+++ b/drivers/staging/greybus/control.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include "greybus.h"
+#include <linux/greybus.h>
 
 /* Highest control-protocol version supported */
 #define GB_CONTROL_VERSION_MAJOR	0
diff --git a/drivers/staging/greybus/control.h b/drivers/staging/greybus/control.h
deleted file mode 100644
index 3a29ec05f631..000000000000
--- a/drivers/staging/greybus/control.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus CPort control protocol
- *
- * Copyright 2015 Google Inc.
- * Copyright 2015 Linaro Ltd.
- */
-
-#ifndef __CONTROL_H
-#define __CONTROL_H
-
-struct gb_control {
-	struct device dev;
-	struct gb_interface *intf;
-
-	struct gb_connection *connection;
-
-	u8 protocol_major;
-	u8 protocol_minor;
-
-	bool has_bundle_activate;
-	bool has_bundle_version;
-
-	char *vendor_string;
-	char *product_string;
-};
-#define to_gb_control(d) container_of(d, struct gb_control, dev)
-
-struct gb_control *gb_control_create(struct gb_interface *intf);
-int gb_control_enable(struct gb_control *control);
-void gb_control_disable(struct gb_control *control);
-int gb_control_suspend(struct gb_control *control);
-int gb_control_resume(struct gb_control *control);
-int gb_control_add(struct gb_control *control);
-void gb_control_del(struct gb_control *control);
-struct gb_control *gb_control_get(struct gb_control *control);
-void gb_control_put(struct gb_control *control);
-
-int gb_control_get_bundle_versions(struct gb_control *control);
-int gb_control_connected_operation(struct gb_control *control, u16 cport_id);
-int gb_control_disconnected_operation(struct gb_control *control, u16 cport_id);
-int gb_control_disconnecting_operation(struct gb_control *control,
-				       u16 cport_id);
-int gb_control_mode_switch_operation(struct gb_control *control);
-void gb_control_mode_switch_prepare(struct gb_control *control);
-void gb_control_mode_switch_complete(struct gb_control *control);
-int gb_control_get_manifest_size_operation(struct gb_interface *intf);
-int gb_control_get_manifest_operation(struct gb_interface *intf, void *manifest,
-				      size_t size);
-int gb_control_bundle_suspend(struct gb_control *control, u8 bundle_id);
-int gb_control_bundle_resume(struct gb_control *control, u8 bundle_id);
-int gb_control_bundle_deactivate(struct gb_control *control, u8 bundle_id);
-int gb_control_bundle_activate(struct gb_control *control, u8 bundle_id);
-int gb_control_interface_suspend_prepare(struct gb_control *control);
-int gb_control_interface_deactivate_prepare(struct gb_control *control);
-int gb_control_interface_hibernate_abort(struct gb_control *control);
-#endif /* __CONTROL_H */
diff --git a/drivers/staging/greybus/core.c b/drivers/staging/greybus/core.c
index d6b0d49130c0..e546c6431877 100644
--- a/drivers/staging/greybus/core.c
+++ b/drivers/staging/greybus/core.c
@@ -9,7 +9,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #define CREATE_TRACE_POINTS
-#include "greybus.h"
+#include <linux/greybus.h>
 #include "greybus_trace.h"
 
 #define GB_BUNDLE_AUTOSUSPEND_MS	3000
diff --git a/drivers/staging/greybus/debugfs.c b/drivers/staging/greybus/debugfs.c
index 56e20c30feb5..e102d7badb9d 100644
--- a/drivers/staging/greybus/debugfs.c
+++ b/drivers/staging/greybus/debugfs.c
@@ -7,8 +7,7 @@
  */
 
 #include <linux/debugfs.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 static struct dentry *gb_debug_root;
 
diff --git a/drivers/staging/greybus/es2.c b/drivers/staging/greybus/es2.c
index be6af18cec31..366716f11b1a 100644
--- a/drivers/staging/greybus/es2.c
+++ b/drivers/staging/greybus/es2.c
@@ -11,12 +11,11 @@
 #include <linux/kfifo.h>
 #include <linux/debugfs.h>
 #include <linux/list.h>
+#include <linux/greybus.h>
 #include <asm/unaligned.h>
 
 #include "arpc.h"
-#include "greybus.h"
 #include "greybus_trace.h"
-#include "connection.h"
 
 
 /* Default timeout for USB vendor requests. */
diff --git a/drivers/staging/greybus/firmware.h b/drivers/staging/greybus/firmware.h
index 72dfabfa4704..5d2564462ffc 100644
--- a/drivers/staging/greybus/firmware.h
+++ b/drivers/staging/greybus/firmware.h
@@ -9,7 +9,7 @@
 #ifndef __FIRMWARE_H
 #define __FIRMWARE_H
 
-#include "greybus.h"
+#include <linux/greybus.h>
 
 #define FW_NAME_PREFIX	"gmp_"
 
diff --git a/drivers/staging/greybus/fw-core.c b/drivers/staging/greybus/fw-core.c
index 388866d92f5b..57bebf24636b 100644
--- a/drivers/staging/greybus/fw-core.c
+++ b/drivers/staging/greybus/fw-core.c
@@ -8,8 +8,8 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/firmware.h>
+#include <linux/greybus.h>
 #include "firmware.h"
-#include "greybus.h"
 #include "spilib.h"
 
 struct gb_fw_core {
diff --git a/drivers/staging/greybus/fw-download.c b/drivers/staging/greybus/fw-download.c
index d3b7cccbc10d..543692c567f9 100644
--- a/drivers/staging/greybus/fw-download.c
+++ b/drivers/staging/greybus/fw-download.c
@@ -10,8 +10,8 @@
 #include <linux/jiffies.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
+#include <linux/greybus.h>
 #include "firmware.h"
-#include "greybus.h"
 
 /* Estimated minimum buffer size, actual size can be smaller than this */
 #define MIN_FETCH_SIZE		512
diff --git a/drivers/staging/greybus/fw-management.c b/drivers/staging/greybus/fw-management.c
index 71aec14f8181..687c6405c65b 100644
--- a/drivers/staging/greybus/fw-management.c
+++ b/drivers/staging/greybus/fw-management.c
@@ -13,10 +13,10 @@
 #include <linux/idr.h>
 #include <linux/ioctl.h>
 #include <linux/uaccess.h>
+#include <linux/greybus.h>
 
 #include "firmware.h"
 #include "greybus_firmware.h"
-#include "greybus.h"
 
 #define FW_MGMT_TIMEOUT_MS		1000
 
diff --git a/drivers/staging/greybus/gbphy.c b/drivers/staging/greybus/gbphy.c
index 6cb85c3d3572..9fc5c47be9bd 100644
--- a/drivers/staging/greybus/gbphy.c
+++ b/drivers/staging/greybus/gbphy.c
@@ -13,8 +13,8 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 #define GB_GBPHY_AUTOSUSPEND_MS	3000
diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c
index 3151004d26fb..1ff34abd5692 100644
--- a/drivers/staging/greybus/gpio.c
+++ b/drivers/staging/greybus/gpio.c
@@ -13,8 +13,8 @@
 #include <linux/irqdomain.h>
 #include <linux/gpio/driver.h>
 #include <linux/mutex.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 struct gb_gpio_line {
diff --git a/drivers/staging/greybus/greybus.h b/drivers/staging/greybus/greybus.h
deleted file mode 100644
index f0488ffff93e..000000000000
--- a/drivers/staging/greybus/greybus.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus driver and device API
- *
- * Copyright 2014-2015 Google Inc.
- * Copyright 2014-2015 Linaro Ltd.
- */
-
-#ifndef __LINUX_GREYBUS_H
-#define __LINUX_GREYBUS_H
-
-#ifdef __KERNEL__
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pm_runtime.h>
-#include <linux/idr.h>
-
-#include "greybus_id.h"
-#include "greybus_manifest.h"
-#include "greybus_protocols.h"
-#include "manifest.h"
-#include "hd.h"
-#include "svc.h"
-#include "control.h"
-#include "module.h"
-#include "interface.h"
-#include "bundle.h"
-#include "connection.h"
-#include "operation.h"
-
-/* Matches up with the Greybus Protocol specification document */
-#define GREYBUS_VERSION_MAJOR	0x00
-#define GREYBUS_VERSION_MINOR	0x01
-
-#define GREYBUS_ID_MATCH_DEVICE \
-	(GREYBUS_ID_MATCH_VENDOR | GREYBUS_ID_MATCH_PRODUCT)
-
-#define GREYBUS_DEVICE(v, p)					\
-	.match_flags	= GREYBUS_ID_MATCH_DEVICE,		\
-	.vendor		= (v),					\
-	.product	= (p),
-
-#define GREYBUS_DEVICE_CLASS(c)					\
-	.match_flags	= GREYBUS_ID_MATCH_CLASS,		\
-	.class		= (c),
-
-/* Maximum number of CPorts */
-#define CPORT_ID_MAX	4095		/* UniPro max id is 4095 */
-#define CPORT_ID_BAD	U16_MAX
-
-struct greybus_driver {
-	const char *name;
-
-	int (*probe)(struct gb_bundle *bundle,
-		     const struct greybus_bundle_id *id);
-	void (*disconnect)(struct gb_bundle *bundle);
-
-	const struct greybus_bundle_id *id_table;
-
-	struct device_driver driver;
-};
-#define to_greybus_driver(d) container_of(d, struct greybus_driver, driver)
-
-static inline void greybus_set_drvdata(struct gb_bundle *bundle, void *data)
-{
-	dev_set_drvdata(&bundle->dev, data);
-}
-
-static inline void *greybus_get_drvdata(struct gb_bundle *bundle)
-{
-	return dev_get_drvdata(&bundle->dev);
-}
-
-/* Don't call these directly, use the module_greybus_driver() macro instead */
-int greybus_register_driver(struct greybus_driver *driver,
-			    struct module *module, const char *mod_name);
-void greybus_deregister_driver(struct greybus_driver *driver);
-
-/* define to get proper THIS_MODULE and KBUILD_MODNAME values */
-#define greybus_register(driver) \
-	greybus_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
-#define greybus_deregister(driver) \
-	greybus_deregister_driver(driver)
-
-/**
- * module_greybus_driver() - Helper macro for registering a Greybus driver
- * @__greybus_driver: greybus_driver structure
- *
- * Helper macro for Greybus drivers to set up proper module init / exit
- * functions.  Replaces module_init() and module_exit() and keeps people from
- * printing pointless things to the kernel log when their driver is loaded.
- */
-#define module_greybus_driver(__greybus_driver)	\
-	module_driver(__greybus_driver, greybus_register, greybus_deregister)
-
-int greybus_disabled(void);
-
-void gb_debugfs_init(void);
-void gb_debugfs_cleanup(void);
-struct dentry *gb_debugfs_get(void);
-
-extern struct bus_type greybus_bus_type;
-
-extern struct device_type greybus_hd_type;
-extern struct device_type greybus_module_type;
-extern struct device_type greybus_interface_type;
-extern struct device_type greybus_control_type;
-extern struct device_type greybus_bundle_type;
-extern struct device_type greybus_svc_type;
-
-static inline int is_gb_host_device(const struct device *dev)
-{
-	return dev->type == &greybus_hd_type;
-}
-
-static inline int is_gb_module(const struct device *dev)
-{
-	return dev->type == &greybus_module_type;
-}
-
-static inline int is_gb_interface(const struct device *dev)
-{
-	return dev->type == &greybus_interface_type;
-}
-
-static inline int is_gb_control(const struct device *dev)
-{
-	return dev->type == &greybus_control_type;
-}
-
-static inline int is_gb_bundle(const struct device *dev)
-{
-	return dev->type == &greybus_bundle_type;
-}
-
-static inline int is_gb_svc(const struct device *dev)
-{
-	return dev->type == &greybus_svc_type;
-}
-
-static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id)
-{
-	return cport_id != CPORT_ID_BAD && cport_id < hd->num_cports;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __LINUX_GREYBUS_H */
diff --git a/drivers/staging/greybus/greybus_id.h b/drivers/staging/greybus/greybus_id.h
deleted file mode 100644
index f4c8440093e4..000000000000
--- a/drivers/staging/greybus/greybus_id.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* FIXME
- * move this to include/linux/mod_devicetable.h when merging
- */
-
-#ifndef __LINUX_GREYBUS_ID_H
-#define __LINUX_GREYBUS_ID_H
-
-#include <linux/types.h>
-#include <linux/mod_devicetable.h>
-
-
-struct greybus_bundle_id {
-	__u16	match_flags;
-	__u32	vendor;
-	__u32	product;
-	__u8	class;
-
-	kernel_ulong_t	driver_info __aligned(sizeof(kernel_ulong_t));
-};
-
-/* Used to match the greybus_bundle_id */
-#define GREYBUS_ID_MATCH_VENDOR		BIT(0)
-#define GREYBUS_ID_MATCH_PRODUCT	BIT(1)
-#define GREYBUS_ID_MATCH_CLASS		BIT(2)
-
-#endif /* __LINUX_GREYBUS_ID_H */
diff --git a/drivers/staging/greybus/greybus_manifest.h b/drivers/staging/greybus/greybus_manifest.h
deleted file mode 100644
index db68f7e7d5a7..000000000000
--- a/drivers/staging/greybus/greybus_manifest.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus manifest definition
- *
- * See "Greybus Application Protocol" document (version 0.1) for
- * details on these values and structures.
- *
- * Copyright 2014-2015 Google Inc.
- * Copyright 2014-2015 Linaro Ltd.
- *
- * Released under the GPLv2 and BSD licenses.
- */
-
-#ifndef __GREYBUS_MANIFEST_H
-#define __GREYBUS_MANIFEST_H
-
-enum greybus_descriptor_type {
-	GREYBUS_TYPE_INVALID		= 0x00,
-	GREYBUS_TYPE_INTERFACE		= 0x01,
-	GREYBUS_TYPE_STRING		= 0x02,
-	GREYBUS_TYPE_BUNDLE		= 0x03,
-	GREYBUS_TYPE_CPORT		= 0x04,
-};
-
-enum greybus_protocol {
-	GREYBUS_PROTOCOL_CONTROL	= 0x00,
-	/* 0x01 is unused */
-	GREYBUS_PROTOCOL_GPIO		= 0x02,
-	GREYBUS_PROTOCOL_I2C		= 0x03,
-	GREYBUS_PROTOCOL_UART		= 0x04,
-	GREYBUS_PROTOCOL_HID		= 0x05,
-	GREYBUS_PROTOCOL_USB		= 0x06,
-	GREYBUS_PROTOCOL_SDIO		= 0x07,
-	GREYBUS_PROTOCOL_POWER_SUPPLY	= 0x08,
-	GREYBUS_PROTOCOL_PWM		= 0x09,
-	/* 0x0a is unused */
-	GREYBUS_PROTOCOL_SPI		= 0x0b,
-	GREYBUS_PROTOCOL_DISPLAY	= 0x0c,
-	GREYBUS_PROTOCOL_CAMERA_MGMT	= 0x0d,
-	GREYBUS_PROTOCOL_SENSOR		= 0x0e,
-	GREYBUS_PROTOCOL_LIGHTS		= 0x0f,
-	GREYBUS_PROTOCOL_VIBRATOR	= 0x10,
-	GREYBUS_PROTOCOL_LOOPBACK	= 0x11,
-	GREYBUS_PROTOCOL_AUDIO_MGMT	= 0x12,
-	GREYBUS_PROTOCOL_AUDIO_DATA	= 0x13,
-	GREYBUS_PROTOCOL_SVC            = 0x14,
-	GREYBUS_PROTOCOL_BOOTROM	= 0x15,
-	GREYBUS_PROTOCOL_CAMERA_DATA	= 0x16,
-	GREYBUS_PROTOCOL_FW_DOWNLOAD	= 0x17,
-	GREYBUS_PROTOCOL_FW_MANAGEMENT	= 0x18,
-	GREYBUS_PROTOCOL_AUTHENTICATION	= 0x19,
-	GREYBUS_PROTOCOL_LOG		= 0x1a,
-		/* ... */
-	GREYBUS_PROTOCOL_RAW		= 0xfe,
-	GREYBUS_PROTOCOL_VENDOR		= 0xff,
-};
-
-enum greybus_class_type {
-	GREYBUS_CLASS_CONTROL		= 0x00,
-	/* 0x01 is unused */
-	/* 0x02 is unused */
-	/* 0x03 is unused */
-	/* 0x04 is unused */
-	GREYBUS_CLASS_HID		= 0x05,
-	/* 0x06 is unused */
-	/* 0x07 is unused */
-	GREYBUS_CLASS_POWER_SUPPLY	= 0x08,
-	/* 0x09 is unused */
-	GREYBUS_CLASS_BRIDGED_PHY	= 0x0a,
-	/* 0x0b is unused */
-	GREYBUS_CLASS_DISPLAY		= 0x0c,
-	GREYBUS_CLASS_CAMERA		= 0x0d,
-	GREYBUS_CLASS_SENSOR		= 0x0e,
-	GREYBUS_CLASS_LIGHTS		= 0x0f,
-	GREYBUS_CLASS_VIBRATOR		= 0x10,
-	GREYBUS_CLASS_LOOPBACK		= 0x11,
-	GREYBUS_CLASS_AUDIO		= 0x12,
-	/* 0x13 is unused */
-	/* 0x14 is unused */
-	GREYBUS_CLASS_BOOTROM		= 0x15,
-	GREYBUS_CLASS_FW_MANAGEMENT	= 0x16,
-	GREYBUS_CLASS_LOG		= 0x17,
-		/* ... */
-	GREYBUS_CLASS_RAW		= 0xfe,
-	GREYBUS_CLASS_VENDOR		= 0xff,
-};
-
-enum {
-	GREYBUS_INTERFACE_FEATURE_TIMESYNC = BIT(0),
-};
-
-/*
- * The string in a string descriptor is not NUL-terminated.  The
- * size of the descriptor will be rounded up to a multiple of 4
- * bytes, by padding the string with 0x00 bytes if necessary.
- */
-struct greybus_descriptor_string {
-	__u8	length;
-	__u8	id;
-	__u8	string[0];
-} __packed;
-
-/*
- * An interface descriptor describes information about an interface as a whole,
- * *not* the functions within it.
- */
-struct greybus_descriptor_interface {
-	__u8	vendor_stringid;
-	__u8	product_stringid;
-	__u8	features;
-	__u8	pad;
-} __packed;
-
-/*
- * An bundle descriptor defines an identification number and a class for
- * each bundle.
- *
- * @id: Uniquely identifies a bundle within a interface, its sole purpose is to
- * allow CPort descriptors to specify which bundle they are associated with.
- * The first bundle will have id 0, second will have 1 and so on.
- *
- * The largest CPort id associated with an bundle (defined by a
- * CPort descriptor in the manifest) is used to determine how to
- * encode the device id and module number in UniPro packets
- * that use the bundle.
- *
- * @class: It is used by kernel to know the functionality provided by the
- * bundle and will be matched against drivers functinality while probing greybus
- * driver. It should contain one of the values defined in
- * 'enum greybus_class_type'.
- *
- */
-struct greybus_descriptor_bundle {
-	__u8	id;	/* interface-relative id (0..) */
-	__u8	class;
-	__u8	pad[2];
-} __packed;
-
-/*
- * A CPort descriptor indicates the id of the bundle within the
- * module it's associated with, along with the CPort id used to
- * address the CPort.  The protocol id defines the format of messages
- * exchanged using the CPort.
- */
-struct greybus_descriptor_cport {
-	__le16	id;
-	__u8	bundle;
-	__u8	protocol_id;	/* enum greybus_protocol */
-} __packed;
-
-struct greybus_descriptor_header {
-	__le16	size;
-	__u8	type;		/* enum greybus_descriptor_type */
-	__u8	pad;
-} __packed;
-
-struct greybus_descriptor {
-	struct greybus_descriptor_header		header;
-	union {
-		struct greybus_descriptor_string	string;
-		struct greybus_descriptor_interface	interface;
-		struct greybus_descriptor_bundle	bundle;
-		struct greybus_descriptor_cport		cport;
-	};
-} __packed;
-
-struct greybus_manifest_header {
-	__le16	size;
-	__u8	version_major;
-	__u8	version_minor;
-} __packed;
-
-struct greybus_manifest {
-	struct greybus_manifest_header		header;
-	struct greybus_descriptor		descriptors[0];
-} __packed;
-
-#endif /* __GREYBUS_MANIFEST_H */
diff --git a/drivers/staging/greybus/greybus_protocols.h b/drivers/staging/greybus/greybus_protocols.h
deleted file mode 100644
index 5f34d1effb59..000000000000
--- a/drivers/staging/greybus/greybus_protocols.h
+++ /dev/null
@@ -1,2176 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
-/*
- * Copyright(c) 2014 - 2015 Google Inc. All rights reserved.
- * Copyright(c) 2014 - 2015 Linaro Ltd. All rights reserved.
- */
-
-#ifndef __GREYBUS_PROTOCOLS_H
-#define __GREYBUS_PROTOCOLS_H
-
-/* Fixed IDs for control/svc protocols */
-
-/* SVC switch-port device ids */
-#define GB_SVC_DEVICE_ID_SVC			0
-#define GB_SVC_DEVICE_ID_AP			1
-#define GB_SVC_DEVICE_ID_MIN			2
-#define GB_SVC_DEVICE_ID_MAX			31
-
-#define GB_SVC_CPORT_ID				0
-#define GB_CONTROL_BUNDLE_ID			0
-#define GB_CONTROL_CPORT_ID			0
-
-
-/*
- * All operation messages (both requests and responses) begin with
- * a header that encodes the size of the message (header included).
- * This header also contains a unique identifier, that associates a
- * response message with its operation.  The header contains an
- * operation type field, whose interpretation is dependent on what
- * type of protocol is used over the connection.  The high bit
- * (0x80) of the operation type field is used to indicate whether
- * the message is a request (clear) or a response (set).
- *
- * Response messages include an additional result byte, which
- * communicates the result of the corresponding request.  A zero
- * result value means the operation completed successfully.  Any
- * other value indicates an error; in this case, the payload of the
- * response message (if any) is ignored.  The result byte must be
- * zero in the header for a request message.
- *
- * The wire format for all numeric fields in the header is little
- * endian.  Any operation-specific data begins immediately after the
- * header.
- */
-struct gb_operation_msg_hdr {
-	__le16	size;		/* Size in bytes of header + payload */
-	__le16	operation_id;	/* Operation unique id */
-	__u8	type;		/* E.g GB_I2C_TYPE_* or GB_GPIO_TYPE_* */
-	__u8	result;		/* Result of request (in responses only) */
-	__u8	pad[2];		/* must be zero (ignore when read) */
-} __packed;
-
-
-/* Generic request types */
-#define GB_REQUEST_TYPE_CPORT_SHUTDOWN		0x00
-#define GB_REQUEST_TYPE_INVALID			0x7f
-
-struct gb_cport_shutdown_request {
-	__u8 phase;
-} __packed;
-
-
-/* Control Protocol */
-
-/* Greybus control request types */
-#define GB_CONTROL_TYPE_VERSION			0x01
-#define GB_CONTROL_TYPE_PROBE_AP		0x02
-#define GB_CONTROL_TYPE_GET_MANIFEST_SIZE	0x03
-#define GB_CONTROL_TYPE_GET_MANIFEST		0x04
-#define GB_CONTROL_TYPE_CONNECTED		0x05
-#define GB_CONTROL_TYPE_DISCONNECTED		0x06
-#define GB_CONTROL_TYPE_TIMESYNC_ENABLE		0x07
-#define GB_CONTROL_TYPE_TIMESYNC_DISABLE	0x08
-#define GB_CONTROL_TYPE_TIMESYNC_AUTHORITATIVE	0x09
-/*	Unused					0x0a */
-#define GB_CONTROL_TYPE_BUNDLE_VERSION		0x0b
-#define GB_CONTROL_TYPE_DISCONNECTING		0x0c
-#define GB_CONTROL_TYPE_TIMESYNC_GET_LAST_EVENT	0x0d
-#define GB_CONTROL_TYPE_MODE_SWITCH		0x0e
-#define GB_CONTROL_TYPE_BUNDLE_SUSPEND		0x0f
-#define GB_CONTROL_TYPE_BUNDLE_RESUME		0x10
-#define GB_CONTROL_TYPE_BUNDLE_DEACTIVATE	0x11
-#define GB_CONTROL_TYPE_BUNDLE_ACTIVATE		0x12
-#define GB_CONTROL_TYPE_INTF_SUSPEND_PREPARE		0x13
-#define GB_CONTROL_TYPE_INTF_DEACTIVATE_PREPARE	0x14
-#define GB_CONTROL_TYPE_INTF_HIBERNATE_ABORT	0x15
-
-struct gb_control_version_request {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-struct gb_control_version_response {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-struct gb_control_bundle_version_request {
-	__u8	bundle_id;
-} __packed;
-
-struct gb_control_bundle_version_response {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-/* Control protocol manifest get size request has no payload*/
-struct gb_control_get_manifest_size_response {
-	__le16			size;
-} __packed;
-
-/* Control protocol manifest get request has no payload */
-struct gb_control_get_manifest_response {
-	__u8			data[0];
-} __packed;
-
-/* Control protocol [dis]connected request */
-struct gb_control_connected_request {
-	__le16			cport_id;
-} __packed;
-
-struct gb_control_disconnecting_request {
-	__le16			cport_id;
-} __packed;
-/* disconnecting response has no payload */
-
-struct gb_control_disconnected_request {
-	__le16			cport_id;
-} __packed;
-/* Control protocol [dis]connected response has no payload */
-
-/*
- * All Bundle power management operations use the same request and response
- * layout and status codes.
- */
-
-#define GB_CONTROL_BUNDLE_PM_OK		0x00
-#define GB_CONTROL_BUNDLE_PM_INVAL	0x01
-#define GB_CONTROL_BUNDLE_PM_BUSY	0x02
-#define GB_CONTROL_BUNDLE_PM_FAIL	0x03
-#define GB_CONTROL_BUNDLE_PM_NA		0x04
-
-struct gb_control_bundle_pm_request {
-	__u8	bundle_id;
-} __packed;
-
-struct gb_control_bundle_pm_response {
-	__u8	status;
-} __packed;
-
-/*
- * Interface Suspend Prepare and Deactivate Prepare operations use the same
- * response layout and error codes. Define a single response structure and reuse
- * it. Both operations have no payload.
- */
-
-#define GB_CONTROL_INTF_PM_OK		0x00
-#define GB_CONTROL_INTF_PM_BUSY		0x01
-#define GB_CONTROL_INTF_PM_NA		0x02
-
-struct gb_control_intf_pm_response {
-	__u8	status;
-} __packed;
-
-/* APBridge protocol */
-
-/* request APB1 log */
-#define GB_APB_REQUEST_LOG			0x02
-
-/* request to map a cport to bulk in and bulk out endpoints */
-#define GB_APB_REQUEST_EP_MAPPING		0x03
-
-/* request to get the number of cports available */
-#define GB_APB_REQUEST_CPORT_COUNT		0x04
-
-/* request to reset a cport state */
-#define GB_APB_REQUEST_RESET_CPORT		0x05
-
-/* request to time the latency of messages on a given cport */
-#define GB_APB_REQUEST_LATENCY_TAG_EN		0x06
-#define GB_APB_REQUEST_LATENCY_TAG_DIS		0x07
-
-/* request to control the CSI transmitter */
-#define GB_APB_REQUEST_CSI_TX_CONTROL		0x08
-
-/* request to control audio streaming */
-#define GB_APB_REQUEST_AUDIO_CONTROL		0x09
-
-/* TimeSync requests */
-#define GB_APB_REQUEST_TIMESYNC_ENABLE		0x0d
-#define GB_APB_REQUEST_TIMESYNC_DISABLE		0x0e
-#define GB_APB_REQUEST_TIMESYNC_AUTHORITATIVE	0x0f
-#define GB_APB_REQUEST_TIMESYNC_GET_LAST_EVENT	0x10
-
-/* requests to set Greybus CPort flags */
-#define GB_APB_REQUEST_CPORT_FLAGS		0x11
-
-/* ARPC request */
-#define GB_APB_REQUEST_ARPC_RUN			0x12
-
-struct gb_apb_request_cport_flags {
-	__le32	flags;
-#define GB_APB_CPORT_FLAG_CONTROL		0x01
-#define GB_APB_CPORT_FLAG_HIGH_PRIO		0x02
-} __packed;
-
-
-/* Firmware Download Protocol */
-
-/* Request Types */
-#define GB_FW_DOWNLOAD_TYPE_FIND_FIRMWARE	0x01
-#define GB_FW_DOWNLOAD_TYPE_FETCH_FIRMWARE	0x02
-#define GB_FW_DOWNLOAD_TYPE_RELEASE_FIRMWARE	0x03
-
-#define GB_FIRMWARE_TAG_MAX_SIZE		10
-
-/* firmware download find firmware request/response */
-struct gb_fw_download_find_firmware_request {
-	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
-} __packed;
-
-struct gb_fw_download_find_firmware_response {
-	__u8			firmware_id;
-	__le32			size;
-} __packed;
-
-/* firmware download fetch firmware request/response */
-struct gb_fw_download_fetch_firmware_request {
-	__u8			firmware_id;
-	__le32			offset;
-	__le32			size;
-} __packed;
-
-struct gb_fw_download_fetch_firmware_response {
-	__u8			data[0];
-} __packed;
-
-/* firmware download release firmware request */
-struct gb_fw_download_release_firmware_request {
-	__u8			firmware_id;
-} __packed;
-/* firmware download release firmware response has no payload */
-
-
-/* Firmware Management Protocol */
-
-/* Request Types */
-#define GB_FW_MGMT_TYPE_INTERFACE_FW_VERSION	0x01
-#define GB_FW_MGMT_TYPE_LOAD_AND_VALIDATE_FW	0x02
-#define GB_FW_MGMT_TYPE_LOADED_FW		0x03
-#define GB_FW_MGMT_TYPE_BACKEND_FW_VERSION	0x04
-#define GB_FW_MGMT_TYPE_BACKEND_FW_UPDATE	0x05
-#define GB_FW_MGMT_TYPE_BACKEND_FW_UPDATED	0x06
-
-#define GB_FW_LOAD_METHOD_UNIPRO		0x01
-#define GB_FW_LOAD_METHOD_INTERNAL		0x02
-
-#define GB_FW_LOAD_STATUS_FAILED		0x00
-#define GB_FW_LOAD_STATUS_UNVALIDATED		0x01
-#define GB_FW_LOAD_STATUS_VALIDATED		0x02
-#define GB_FW_LOAD_STATUS_VALIDATION_FAILED	0x03
-
-#define GB_FW_BACKEND_FW_STATUS_SUCCESS		0x01
-#define GB_FW_BACKEND_FW_STATUS_FAIL_FIND	0x02
-#define GB_FW_BACKEND_FW_STATUS_FAIL_FETCH	0x03
-#define GB_FW_BACKEND_FW_STATUS_FAIL_WRITE	0x04
-#define GB_FW_BACKEND_FW_STATUS_INT		0x05
-#define GB_FW_BACKEND_FW_STATUS_RETRY		0x06
-#define GB_FW_BACKEND_FW_STATUS_NOT_SUPPORTED	0x07
-
-#define GB_FW_BACKEND_VERSION_STATUS_SUCCESS		0x01
-#define GB_FW_BACKEND_VERSION_STATUS_NOT_AVAILABLE	0x02
-#define GB_FW_BACKEND_VERSION_STATUS_NOT_SUPPORTED	0x03
-#define GB_FW_BACKEND_VERSION_STATUS_RETRY		0x04
-#define GB_FW_BACKEND_VERSION_STATUS_FAIL_INT		0x05
-
-/* firmware management interface firmware version request has no payload */
-struct gb_fw_mgmt_interface_fw_version_response {
-	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
-	__le16			major;
-	__le16			minor;
-} __packed;
-
-/* firmware management load and validate firmware request/response */
-struct gb_fw_mgmt_load_and_validate_fw_request {
-	__u8			request_id;
-	__u8			load_method;
-	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
-} __packed;
-/* firmware management load and validate firmware response has no payload*/
-
-/* firmware management loaded firmware request */
-struct gb_fw_mgmt_loaded_fw_request {
-	__u8			request_id;
-	__u8			status;
-	__le16			major;
-	__le16			minor;
-} __packed;
-/* firmware management loaded firmware response has no payload */
-
-/* firmware management backend firmware version request/response */
-struct gb_fw_mgmt_backend_fw_version_request {
-	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
-} __packed;
-
-struct gb_fw_mgmt_backend_fw_version_response {
-	__le16			major;
-	__le16			minor;
-	__u8			status;
-} __packed;
-
-/* firmware management backend firmware update request */
-struct gb_fw_mgmt_backend_fw_update_request {
-	__u8			request_id;
-	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
-} __packed;
-/* firmware management backend firmware update response has no payload */
-
-/* firmware management backend firmware updated request */
-struct gb_fw_mgmt_backend_fw_updated_request {
-	__u8			request_id;
-	__u8			status;
-} __packed;
-/* firmware management backend firmware updated response has no payload */
-
-
-/* Component Authentication Protocol (CAP) */
-
-/* Request Types */
-#define GB_CAP_TYPE_GET_ENDPOINT_UID	0x01
-#define GB_CAP_TYPE_GET_IMS_CERTIFICATE	0x02
-#define GB_CAP_TYPE_AUTHENTICATE	0x03
-
-/* CAP get endpoint uid request has no payload */
-struct gb_cap_get_endpoint_uid_response {
-	__u8			uid[8];
-} __packed;
-
-/* CAP get endpoint ims certificate request/response */
-struct gb_cap_get_ims_certificate_request {
-	__le32			certificate_class;
-	__le32			certificate_id;
-} __packed;
-
-struct gb_cap_get_ims_certificate_response {
-	__u8			result_code;
-	__u8			certificate[0];
-} __packed;
-
-/* CAP authenticate request/response */
-struct gb_cap_authenticate_request {
-	__le32			auth_type;
-	__u8			uid[8];
-	__u8			challenge[32];
-} __packed;
-
-struct gb_cap_authenticate_response {
-	__u8			result_code;
-	__u8			response[64];
-	__u8			signature[0];
-} __packed;
-
-
-/* Bootrom Protocol */
-
-/* Version of the Greybus bootrom protocol we support */
-#define GB_BOOTROM_VERSION_MAJOR		0x00
-#define GB_BOOTROM_VERSION_MINOR		0x01
-
-/* Greybus bootrom request types */
-#define GB_BOOTROM_TYPE_VERSION			0x01
-#define GB_BOOTROM_TYPE_FIRMWARE_SIZE		0x02
-#define GB_BOOTROM_TYPE_GET_FIRMWARE		0x03
-#define GB_BOOTROM_TYPE_READY_TO_BOOT		0x04
-#define GB_BOOTROM_TYPE_AP_READY		0x05	/* Request with no-payload */
-#define GB_BOOTROM_TYPE_GET_VID_PID		0x06	/* Request with no-payload */
-
-/* Greybus bootrom boot stages */
-#define GB_BOOTROM_BOOT_STAGE_ONE		0x01 /* Reserved for the boot ROM */
-#define GB_BOOTROM_BOOT_STAGE_TWO		0x02 /* Bootrom package to be loaded by the boot ROM */
-#define GB_BOOTROM_BOOT_STAGE_THREE		0x03 /* Module personality package loaded by Stage 2 firmware */
-
-/* Greybus bootrom ready to boot status */
-#define GB_BOOTROM_BOOT_STATUS_INVALID		0x00 /* Firmware blob could not be validated */
-#define GB_BOOTROM_BOOT_STATUS_INSECURE		0x01 /* Firmware blob is valid but insecure */
-#define GB_BOOTROM_BOOT_STATUS_SECURE		0x02 /* Firmware blob is valid and secure */
-
-/* Max bootrom data fetch size in bytes */
-#define GB_BOOTROM_FETCH_MAX			2000
-
-struct gb_bootrom_version_request {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-struct gb_bootrom_version_response {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-/* Bootrom protocol firmware size request/response */
-struct gb_bootrom_firmware_size_request {
-	__u8			stage;
-} __packed;
-
-struct gb_bootrom_firmware_size_response {
-	__le32			size;
-} __packed;
-
-/* Bootrom protocol get firmware request/response */
-struct gb_bootrom_get_firmware_request {
-	__le32			offset;
-	__le32			size;
-} __packed;
-
-struct gb_bootrom_get_firmware_response {
-	__u8			data[0];
-} __packed;
-
-/* Bootrom protocol Ready to boot request */
-struct gb_bootrom_ready_to_boot_request {
-	__u8			status;
-} __packed;
-/* Bootrom protocol Ready to boot response has no payload */
-
-/* Bootrom protocol get VID/PID request has no payload */
-struct gb_bootrom_get_vid_pid_response {
-	__le32			vendor_id;
-	__le32			product_id;
-} __packed;
-
-
-/* Power Supply */
-
-/* Greybus power supply request types */
-#define GB_POWER_SUPPLY_TYPE_GET_SUPPLIES		0x02
-#define GB_POWER_SUPPLY_TYPE_GET_DESCRIPTION		0x03
-#define GB_POWER_SUPPLY_TYPE_GET_PROP_DESCRIPTORS	0x04
-#define GB_POWER_SUPPLY_TYPE_GET_PROPERTY		0x05
-#define GB_POWER_SUPPLY_TYPE_SET_PROPERTY		0x06
-#define GB_POWER_SUPPLY_TYPE_EVENT			0x07
-
-/* Greybus power supply battery technologies types */
-#define GB_POWER_SUPPLY_TECH_UNKNOWN			0x0000
-#define GB_POWER_SUPPLY_TECH_NiMH			0x0001
-#define GB_POWER_SUPPLY_TECH_LION			0x0002
-#define GB_POWER_SUPPLY_TECH_LIPO			0x0003
-#define GB_POWER_SUPPLY_TECH_LiFe			0x0004
-#define GB_POWER_SUPPLY_TECH_NiCd			0x0005
-#define GB_POWER_SUPPLY_TECH_LiMn			0x0006
-
-/* Greybus power supply types */
-#define GB_POWER_SUPPLY_UNKNOWN_TYPE			0x0000
-#define GB_POWER_SUPPLY_BATTERY_TYPE			0x0001
-#define GB_POWER_SUPPLY_UPS_TYPE			0x0002
-#define GB_POWER_SUPPLY_MAINS_TYPE			0x0003
-#define GB_POWER_SUPPLY_USB_TYPE			0x0004
-#define GB_POWER_SUPPLY_USB_DCP_TYPE			0x0005
-#define GB_POWER_SUPPLY_USB_CDP_TYPE			0x0006
-#define GB_POWER_SUPPLY_USB_ACA_TYPE			0x0007
-
-/* Greybus power supply health values */
-#define GB_POWER_SUPPLY_HEALTH_UNKNOWN			0x0000
-#define GB_POWER_SUPPLY_HEALTH_GOOD			0x0001
-#define GB_POWER_SUPPLY_HEALTH_OVERHEAT			0x0002
-#define GB_POWER_SUPPLY_HEALTH_DEAD			0x0003
-#define GB_POWER_SUPPLY_HEALTH_OVERVOLTAGE		0x0004
-#define GB_POWER_SUPPLY_HEALTH_UNSPEC_FAILURE		0x0005
-#define GB_POWER_SUPPLY_HEALTH_COLD			0x0006
-#define GB_POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE	0x0007
-#define GB_POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE	0x0008
-
-/* Greybus power supply status values */
-#define GB_POWER_SUPPLY_STATUS_UNKNOWN			0x0000
-#define GB_POWER_SUPPLY_STATUS_CHARGING			0x0001
-#define GB_POWER_SUPPLY_STATUS_DISCHARGING		0x0002
-#define GB_POWER_SUPPLY_STATUS_NOT_CHARGING		0x0003
-#define GB_POWER_SUPPLY_STATUS_FULL			0x0004
-
-/* Greybus power supply capacity level values */
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN		0x0000
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL		0x0001
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_LOW		0x0002
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_NORMAL		0x0003
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_HIGH		0x0004
-#define GB_POWER_SUPPLY_CAPACITY_LEVEL_FULL		0x0005
-
-/* Greybus power supply scope values */
-#define GB_POWER_SUPPLY_SCOPE_UNKNOWN			0x0000
-#define GB_POWER_SUPPLY_SCOPE_SYSTEM			0x0001
-#define GB_POWER_SUPPLY_SCOPE_DEVICE			0x0002
-
-struct gb_power_supply_get_supplies_response {
-	__u8	supplies_count;
-} __packed;
-
-struct gb_power_supply_get_description_request {
-	__u8	psy_id;
-} __packed;
-
-struct gb_power_supply_get_description_response {
-	__u8	manufacturer[32];
-	__u8	model[32];
-	__u8	serial_number[32];
-	__le16	type;
-	__u8	properties_count;
-} __packed;
-
-struct gb_power_supply_props_desc {
-	__u8	property;
-#define GB_POWER_SUPPLY_PROP_STATUS				0x00
-#define GB_POWER_SUPPLY_PROP_CHARGE_TYPE			0x01
-#define GB_POWER_SUPPLY_PROP_HEALTH				0x02
-#define GB_POWER_SUPPLY_PROP_PRESENT				0x03
-#define GB_POWER_SUPPLY_PROP_ONLINE				0x04
-#define GB_POWER_SUPPLY_PROP_AUTHENTIC				0x05
-#define GB_POWER_SUPPLY_PROP_TECHNOLOGY				0x06
-#define GB_POWER_SUPPLY_PROP_CYCLE_COUNT			0x07
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_MAX			0x08
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_MIN			0x09
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN			0x0A
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN			0x0B
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_NOW			0x0C
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_AVG			0x0D
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_OCV			0x0E
-#define GB_POWER_SUPPLY_PROP_VOLTAGE_BOOT			0x0F
-#define GB_POWER_SUPPLY_PROP_CURRENT_MAX			0x10
-#define GB_POWER_SUPPLY_PROP_CURRENT_NOW			0x11
-#define GB_POWER_SUPPLY_PROP_CURRENT_AVG			0x12
-#define GB_POWER_SUPPLY_PROP_CURRENT_BOOT			0x13
-#define GB_POWER_SUPPLY_PROP_POWER_NOW				0x14
-#define GB_POWER_SUPPLY_PROP_POWER_AVG				0x15
-#define GB_POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN			0x16
-#define GB_POWER_SUPPLY_PROP_CHARGE_EMPTY_DESIGN		0x17
-#define GB_POWER_SUPPLY_PROP_CHARGE_FULL			0x18
-#define GB_POWER_SUPPLY_PROP_CHARGE_EMPTY			0x19
-#define GB_POWER_SUPPLY_PROP_CHARGE_NOW				0x1A
-#define GB_POWER_SUPPLY_PROP_CHARGE_AVG				0x1B
-#define GB_POWER_SUPPLY_PROP_CHARGE_COUNTER			0x1C
-#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT		0x1D
-#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX	0x1E
-#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE		0x1F
-#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX	0x20
-#define GB_POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT		0x21
-#define GB_POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX		0x22
-#define GB_POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT		0x23
-#define GB_POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN			0x24
-#define GB_POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN		0x25
-#define GB_POWER_SUPPLY_PROP_ENERGY_FULL			0x26
-#define GB_POWER_SUPPLY_PROP_ENERGY_EMPTY			0x27
-#define GB_POWER_SUPPLY_PROP_ENERGY_NOW				0x28
-#define GB_POWER_SUPPLY_PROP_ENERGY_AVG				0x29
-#define GB_POWER_SUPPLY_PROP_CAPACITY				0x2A
-#define GB_POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN			0x2B
-#define GB_POWER_SUPPLY_PROP_CAPACITY_ALERT_MAX			0x2C
-#define GB_POWER_SUPPLY_PROP_CAPACITY_LEVEL			0x2D
-#define GB_POWER_SUPPLY_PROP_TEMP				0x2E
-#define GB_POWER_SUPPLY_PROP_TEMP_MAX				0x2F
-#define GB_POWER_SUPPLY_PROP_TEMP_MIN				0x30
-#define GB_POWER_SUPPLY_PROP_TEMP_ALERT_MIN			0x31
-#define GB_POWER_SUPPLY_PROP_TEMP_ALERT_MAX			0x32
-#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT			0x33
-#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN		0x34
-#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX		0x35
-#define GB_POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW			0x36
-#define GB_POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG			0x37
-#define GB_POWER_SUPPLY_PROP_TIME_TO_FULL_NOW			0x38
-#define GB_POWER_SUPPLY_PROP_TIME_TO_FULL_AVG			0x39
-#define GB_POWER_SUPPLY_PROP_TYPE				0x3A
-#define GB_POWER_SUPPLY_PROP_SCOPE				0x3B
-#define GB_POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT		0x3C
-#define GB_POWER_SUPPLY_PROP_CALIBRATE				0x3D
-	__u8	is_writeable;
-} __packed;
-
-struct gb_power_supply_get_property_descriptors_request {
-	__u8	psy_id;
-} __packed;
-
-struct gb_power_supply_get_property_descriptors_response {
-	__u8	properties_count;
-	struct gb_power_supply_props_desc props[];
-} __packed;
-
-struct gb_power_supply_get_property_request {
-	__u8	psy_id;
-	__u8	property;
-} __packed;
-
-struct gb_power_supply_get_property_response {
-	__le32	prop_val;
-};
-
-struct gb_power_supply_set_property_request {
-	__u8	psy_id;
-	__u8	property;
-	__le32	prop_val;
-} __packed;
-
-struct gb_power_supply_event_request {
-	__u8	psy_id;
-	__u8	event;
-#define GB_POWER_SUPPLY_UPDATE		0x01
-} __packed;
-
-
-/* HID */
-
-/* Greybus HID operation types */
-#define GB_HID_TYPE_GET_DESC		0x02
-#define GB_HID_TYPE_GET_REPORT_DESC	0x03
-#define GB_HID_TYPE_PWR_ON		0x04
-#define GB_HID_TYPE_PWR_OFF		0x05
-#define GB_HID_TYPE_GET_REPORT		0x06
-#define GB_HID_TYPE_SET_REPORT		0x07
-#define GB_HID_TYPE_IRQ_EVENT		0x08
-
-/* Report type */
-#define GB_HID_INPUT_REPORT		0
-#define GB_HID_OUTPUT_REPORT		1
-#define GB_HID_FEATURE_REPORT		2
-
-/* Different request/response structures */
-/* HID get descriptor response */
-struct gb_hid_desc_response {
-	__u8				bLength;
-	__le16				wReportDescLength;
-	__le16				bcdHID;
-	__le16				wProductID;
-	__le16				wVendorID;
-	__u8				bCountryCode;
-} __packed;
-
-/* HID get report request/response */
-struct gb_hid_get_report_request {
-	__u8				report_type;
-	__u8				report_id;
-} __packed;
-
-/* HID set report request */
-struct gb_hid_set_report_request {
-	__u8				report_type;
-	__u8				report_id;
-	__u8				report[0];
-} __packed;
-
-/* HID input report request, via interrupt pipe */
-struct gb_hid_input_report_request {
-	__u8				report[0];
-} __packed;
-
-
-/* I2C */
-
-/* Greybus i2c request types */
-#define GB_I2C_TYPE_FUNCTIONALITY	0x02
-#define GB_I2C_TYPE_TRANSFER		0x05
-
-/* functionality request has no payload */
-struct gb_i2c_functionality_response {
-	__le32	functionality;
-} __packed;
-
-/*
- * Outgoing data immediately follows the op count and ops array.
- * The data for each write (master -> slave) op in the array is sent
- * in order, with no (e.g. pad) bytes separating them.
- *
- * Short reads cause the entire transfer request to fail So response
- * payload consists only of bytes read, and the number of bytes is
- * exactly what was specified in the corresponding op.  Like
- * outgoing data, the incoming data is in order and contiguous.
- */
-struct gb_i2c_transfer_op {
-	__le16	addr;
-	__le16	flags;
-	__le16	size;
-} __packed;
-
-struct gb_i2c_transfer_request {
-	__le16				op_count;
-	struct gb_i2c_transfer_op	ops[0];		/* op_count of these */
-} __packed;
-struct gb_i2c_transfer_response {
-	__u8				data[0];	/* inbound data */
-} __packed;
-
-
-/* GPIO */
-
-/* Greybus GPIO request types */
-#define GB_GPIO_TYPE_LINE_COUNT		0x02
-#define GB_GPIO_TYPE_ACTIVATE		0x03
-#define GB_GPIO_TYPE_DEACTIVATE		0x04
-#define GB_GPIO_TYPE_GET_DIRECTION	0x05
-#define GB_GPIO_TYPE_DIRECTION_IN	0x06
-#define GB_GPIO_TYPE_DIRECTION_OUT	0x07
-#define GB_GPIO_TYPE_GET_VALUE		0x08
-#define GB_GPIO_TYPE_SET_VALUE		0x09
-#define GB_GPIO_TYPE_SET_DEBOUNCE	0x0a
-#define GB_GPIO_TYPE_IRQ_TYPE		0x0b
-#define GB_GPIO_TYPE_IRQ_MASK		0x0c
-#define GB_GPIO_TYPE_IRQ_UNMASK		0x0d
-#define GB_GPIO_TYPE_IRQ_EVENT		0x0e
-
-#define GB_GPIO_IRQ_TYPE_NONE		0x00
-#define GB_GPIO_IRQ_TYPE_EDGE_RISING	0x01
-#define GB_GPIO_IRQ_TYPE_EDGE_FALLING	0x02
-#define GB_GPIO_IRQ_TYPE_EDGE_BOTH	0x03
-#define GB_GPIO_IRQ_TYPE_LEVEL_HIGH	0x04
-#define GB_GPIO_IRQ_TYPE_LEVEL_LOW	0x08
-
-/* line count request has no payload */
-struct gb_gpio_line_count_response {
-	__u8	count;
-} __packed;
-
-struct gb_gpio_activate_request {
-	__u8	which;
-} __packed;
-/* activate response has no payload */
-
-struct gb_gpio_deactivate_request {
-	__u8	which;
-} __packed;
-/* deactivate response has no payload */
-
-struct gb_gpio_get_direction_request {
-	__u8	which;
-} __packed;
-struct gb_gpio_get_direction_response {
-	__u8	direction;
-} __packed;
-
-struct gb_gpio_direction_in_request {
-	__u8	which;
-} __packed;
-/* direction in response has no payload */
-
-struct gb_gpio_direction_out_request {
-	__u8	which;
-	__u8	value;
-} __packed;
-/* direction out response has no payload */
-
-struct gb_gpio_get_value_request {
-	__u8	which;
-} __packed;
-struct gb_gpio_get_value_response {
-	__u8	value;
-} __packed;
-
-struct gb_gpio_set_value_request {
-	__u8	which;
-	__u8	value;
-} __packed;
-/* set value response has no payload */
-
-struct gb_gpio_set_debounce_request {
-	__u8	which;
-	__le16	usec;
-} __packed;
-/* debounce response has no payload */
-
-struct gb_gpio_irq_type_request {
-	__u8	which;
-	__u8	type;
-} __packed;
-/* irq type response has no payload */
-
-struct gb_gpio_irq_mask_request {
-	__u8	which;
-} __packed;
-/* irq mask response has no payload */
-
-struct gb_gpio_irq_unmask_request {
-	__u8	which;
-} __packed;
-/* irq unmask response has no payload */
-
-/* irq event requests originate on another module and are handled on the AP */
-struct gb_gpio_irq_event_request {
-	__u8	which;
-} __packed;
-/* irq event has no response */
-
-
-/* PWM */
-
-/* Greybus PWM operation types */
-#define GB_PWM_TYPE_PWM_COUNT		0x02
-#define GB_PWM_TYPE_ACTIVATE		0x03
-#define GB_PWM_TYPE_DEACTIVATE		0x04
-#define GB_PWM_TYPE_CONFIG		0x05
-#define GB_PWM_TYPE_POLARITY		0x06
-#define GB_PWM_TYPE_ENABLE		0x07
-#define GB_PWM_TYPE_DISABLE		0x08
-
-/* pwm count request has no payload */
-struct gb_pwm_count_response {
-	__u8	count;
-} __packed;
-
-struct gb_pwm_activate_request {
-	__u8	which;
-} __packed;
-
-struct gb_pwm_deactivate_request {
-	__u8	which;
-} __packed;
-
-struct gb_pwm_config_request {
-	__u8	which;
-	__le32	duty;
-	__le32	period;
-} __packed;
-
-struct gb_pwm_polarity_request {
-	__u8	which;
-	__u8	polarity;
-} __packed;
-
-struct gb_pwm_enable_request {
-	__u8	which;
-} __packed;
-
-struct gb_pwm_disable_request {
-	__u8	which;
-} __packed;
-
-/* SPI */
-
-/* Should match up with modes in linux/spi/spi.h */
-#define GB_SPI_MODE_CPHA		0x01		/* clock phase */
-#define GB_SPI_MODE_CPOL		0x02		/* clock polarity */
-#define GB_SPI_MODE_MODE_0		(0 | 0)		/* (original MicroWire) */
-#define GB_SPI_MODE_MODE_1		(0 | GB_SPI_MODE_CPHA)
-#define GB_SPI_MODE_MODE_2		(GB_SPI_MODE_CPOL | 0)
-#define GB_SPI_MODE_MODE_3		(GB_SPI_MODE_CPOL | GB_SPI_MODE_CPHA)
-#define GB_SPI_MODE_CS_HIGH		0x04		/* chipselect active high? */
-#define GB_SPI_MODE_LSB_FIRST		0x08		/* per-word bits-on-wire */
-#define GB_SPI_MODE_3WIRE		0x10		/* SI/SO signals shared */
-#define GB_SPI_MODE_LOOP		0x20		/* loopback mode */
-#define GB_SPI_MODE_NO_CS		0x40		/* 1 dev/bus, no chipselect */
-#define GB_SPI_MODE_READY		0x80		/* slave pulls low to pause */
-
-/* Should match up with flags in linux/spi/spi.h */
-#define GB_SPI_FLAG_HALF_DUPLEX		BIT(0)		/* can't do full duplex */
-#define GB_SPI_FLAG_NO_RX		BIT(1)		/* can't do buffer read */
-#define GB_SPI_FLAG_NO_TX		BIT(2)		/* can't do buffer write */
-
-/* Greybus spi operation types */
-#define GB_SPI_TYPE_MASTER_CONFIG	0x02
-#define GB_SPI_TYPE_DEVICE_CONFIG	0x03
-#define GB_SPI_TYPE_TRANSFER		0x04
-
-/* mode request has no payload */
-struct gb_spi_master_config_response {
-	__le32	bits_per_word_mask;
-	__le32	min_speed_hz;
-	__le32	max_speed_hz;
-	__le16	mode;
-	__le16	flags;
-	__u8	num_chipselect;
-} __packed;
-
-struct gb_spi_device_config_request {
-	__u8	chip_select;
-} __packed;
-
-struct gb_spi_device_config_response {
-	__le16	mode;
-	__u8	bits_per_word;
-	__le32	max_speed_hz;
-	__u8	device_type;
-#define GB_SPI_SPI_DEV		0x00
-#define GB_SPI_SPI_NOR		0x01
-#define GB_SPI_SPI_MODALIAS	0x02
-	__u8	name[32];
-} __packed;
-
-/**
- * struct gb_spi_transfer - a read/write buffer pair
- * @speed_hz: Select a speed other than the device default for this transfer. If
- *	0 the default (from @spi_device) is used.
- * @len: size of rx and tx buffers (in bytes)
- * @delay_usecs: microseconds to delay after this transfer before (optionally)
- * 	changing the chipselect status, then starting the next transfer or
- * 	completing this spi_message.
- * @cs_change: affects chipselect after this transfer completes
- * @bits_per_word: select a bits_per_word other than the device default for this
- *	transfer. If 0 the default (from @spi_device) is used.
- */
-struct gb_spi_transfer {
-	__le32		speed_hz;
-	__le32		len;
-	__le16		delay_usecs;
-	__u8		cs_change;
-	__u8		bits_per_word;
-	__u8		xfer_flags;
-#define GB_SPI_XFER_READ	0x01
-#define GB_SPI_XFER_WRITE	0x02
-#define GB_SPI_XFER_INPROGRESS	0x04
-} __packed;
-
-struct gb_spi_transfer_request {
-	__u8			chip_select;	/* of the spi device */
-	__u8			mode;		/* of the spi device */
-	__le16			count;
-	struct gb_spi_transfer	transfers[0];	/* count of these */
-} __packed;
-
-struct gb_spi_transfer_response {
-	__u8			data[0];	/* inbound data */
-} __packed;
-
-/* Version of the Greybus SVC protocol we support */
-#define GB_SVC_VERSION_MAJOR		0x00
-#define GB_SVC_VERSION_MINOR		0x01
-
-/* Greybus SVC request types */
-#define GB_SVC_TYPE_PROTOCOL_VERSION		0x01
-#define GB_SVC_TYPE_SVC_HELLO			0x02
-#define GB_SVC_TYPE_INTF_DEVICE_ID		0x03
-#define GB_SVC_TYPE_INTF_RESET			0x06
-#define GB_SVC_TYPE_CONN_CREATE			0x07
-#define GB_SVC_TYPE_CONN_DESTROY		0x08
-#define GB_SVC_TYPE_DME_PEER_GET		0x09
-#define GB_SVC_TYPE_DME_PEER_SET		0x0a
-#define GB_SVC_TYPE_ROUTE_CREATE		0x0b
-#define GB_SVC_TYPE_ROUTE_DESTROY		0x0c
-#define GB_SVC_TYPE_TIMESYNC_ENABLE		0x0d
-#define GB_SVC_TYPE_TIMESYNC_DISABLE		0x0e
-#define GB_SVC_TYPE_TIMESYNC_AUTHORITATIVE	0x0f
-#define GB_SVC_TYPE_INTF_SET_PWRM		0x10
-#define GB_SVC_TYPE_INTF_EJECT			0x11
-#define GB_SVC_TYPE_PING			0x13
-#define GB_SVC_TYPE_PWRMON_RAIL_COUNT_GET	0x14
-#define GB_SVC_TYPE_PWRMON_RAIL_NAMES_GET	0x15
-#define GB_SVC_TYPE_PWRMON_SAMPLE_GET		0x16
-#define GB_SVC_TYPE_PWRMON_INTF_SAMPLE_GET	0x17
-#define GB_SVC_TYPE_TIMESYNC_WAKE_PINS_ACQUIRE	0x18
-#define GB_SVC_TYPE_TIMESYNC_WAKE_PINS_RELEASE	0x19
-#define GB_SVC_TYPE_TIMESYNC_PING		0x1a
-#define GB_SVC_TYPE_MODULE_INSERTED		0x1f
-#define GB_SVC_TYPE_MODULE_REMOVED		0x20
-#define GB_SVC_TYPE_INTF_VSYS_ENABLE		0x21
-#define GB_SVC_TYPE_INTF_VSYS_DISABLE		0x22
-#define GB_SVC_TYPE_INTF_REFCLK_ENABLE		0x23
-#define GB_SVC_TYPE_INTF_REFCLK_DISABLE		0x24
-#define GB_SVC_TYPE_INTF_UNIPRO_ENABLE		0x25
-#define GB_SVC_TYPE_INTF_UNIPRO_DISABLE		0x26
-#define GB_SVC_TYPE_INTF_ACTIVATE		0x27
-#define GB_SVC_TYPE_INTF_RESUME			0x28
-#define GB_SVC_TYPE_INTF_MAILBOX_EVENT		0x29
-#define GB_SVC_TYPE_INTF_OOPS			0x2a
-
-/* Greybus SVC protocol status values */
-#define GB_SVC_OP_SUCCESS			0x00
-#define GB_SVC_OP_UNKNOWN_ERROR			0x01
-#define GB_SVC_INTF_NOT_DETECTED		0x02
-#define GB_SVC_INTF_NO_UPRO_LINK		0x03
-#define GB_SVC_INTF_UPRO_NOT_DOWN		0x04
-#define GB_SVC_INTF_UPRO_NOT_HIBERNATED		0x05
-#define GB_SVC_INTF_NO_V_SYS			0x06
-#define GB_SVC_INTF_V_CHG			0x07
-#define GB_SVC_INTF_WAKE_BUSY			0x08
-#define GB_SVC_INTF_NO_REFCLK			0x09
-#define GB_SVC_INTF_RELEASING			0x0a
-#define GB_SVC_INTF_NO_ORDER			0x0b
-#define GB_SVC_INTF_MBOX_SET			0x0c
-#define GB_SVC_INTF_BAD_MBOX			0x0d
-#define GB_SVC_INTF_OP_TIMEOUT			0x0e
-#define GB_SVC_PWRMON_OP_NOT_PRESENT		0x0f
-
-struct gb_svc_version_request {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-struct gb_svc_version_response {
-	__u8	major;
-	__u8	minor;
-} __packed;
-
-/* SVC protocol hello request */
-struct gb_svc_hello_request {
-	__le16			endo_id;
-	__u8			interface_id;
-} __packed;
-/* hello response has no payload */
-
-struct gb_svc_intf_device_id_request {
-	__u8	intf_id;
-	__u8	device_id;
-} __packed;
-/* device id response has no payload */
-
-struct gb_svc_intf_reset_request {
-	__u8	intf_id;
-} __packed;
-/* interface reset response has no payload */
-
-struct gb_svc_intf_eject_request {
-	__u8	intf_id;
-} __packed;
-/* interface eject response has no payload */
-
-struct gb_svc_conn_create_request {
-	__u8	intf1_id;
-	__le16	cport1_id;
-	__u8	intf2_id;
-	__le16	cport2_id;
-	__u8	tc;
-	__u8	flags;
-} __packed;
-/* connection create response has no payload */
-
-struct gb_svc_conn_destroy_request {
-	__u8	intf1_id;
-	__le16	cport1_id;
-	__u8	intf2_id;
-	__le16	cport2_id;
-} __packed;
-/* connection destroy response has no payload */
-
-struct gb_svc_dme_peer_get_request {
-	__u8	intf_id;
-	__le16	attr;
-	__le16	selector;
-} __packed;
-
-struct gb_svc_dme_peer_get_response {
-	__le16	result_code;
-	__le32	attr_value;
-} __packed;
-
-struct gb_svc_dme_peer_set_request {
-	__u8	intf_id;
-	__le16	attr;
-	__le16	selector;
-	__le32	value;
-} __packed;
-
-struct gb_svc_dme_peer_set_response {
-	__le16	result_code;
-} __packed;
-
-/* Greybus init-status values, currently retrieved using DME peer gets. */
-#define GB_INIT_SPI_BOOT_STARTED			0x02
-#define GB_INIT_TRUSTED_SPI_BOOT_FINISHED		0x03
-#define GB_INIT_UNTRUSTED_SPI_BOOT_FINISHED		0x04
-#define GB_INIT_BOOTROM_UNIPRO_BOOT_STARTED		0x06
-#define GB_INIT_BOOTROM_FALLBACK_UNIPRO_BOOT_STARTED	0x09
-#define GB_INIT_S2_LOADER_BOOT_STARTED			0x0D
-
-struct gb_svc_route_create_request {
-	__u8	intf1_id;
-	__u8	dev1_id;
-	__u8	intf2_id;
-	__u8	dev2_id;
-} __packed;
-/* route create response has no payload */
-
-struct gb_svc_route_destroy_request {
-	__u8	intf1_id;
-	__u8	intf2_id;
-} __packed;
-/* route destroy response has no payload */
-
-/* used for svc_intf_vsys_{enable,disable} */
-struct gb_svc_intf_vsys_request {
-	__u8	intf_id;
-} __packed;
-
-struct gb_svc_intf_vsys_response {
-	__u8	result_code;
-#define GB_SVC_INTF_VSYS_OK				0x00
-	/* 0x01 is reserved */
-#define GB_SVC_INTF_VSYS_FAIL				0x02
-} __packed;
-
-/* used for svc_intf_refclk_{enable,disable} */
-struct gb_svc_intf_refclk_request {
-	__u8	intf_id;
-} __packed;
-
-struct gb_svc_intf_refclk_response {
-	__u8	result_code;
-#define GB_SVC_INTF_REFCLK_OK				0x00
-	/* 0x01 is reserved */
-#define GB_SVC_INTF_REFCLK_FAIL				0x02
-} __packed;
-
-/* used for svc_intf_unipro_{enable,disable} */
-struct gb_svc_intf_unipro_request {
-	__u8	intf_id;
-} __packed;
-
-struct gb_svc_intf_unipro_response {
-	__u8	result_code;
-#define GB_SVC_INTF_UNIPRO_OK				0x00
-	/* 0x01 is reserved */
-#define GB_SVC_INTF_UNIPRO_FAIL				0x02
-#define GB_SVC_INTF_UNIPRO_NOT_OFF			0x03
-} __packed;
-
-#define GB_SVC_UNIPRO_FAST_MODE			0x01
-#define GB_SVC_UNIPRO_SLOW_MODE			0x02
-#define GB_SVC_UNIPRO_FAST_AUTO_MODE		0x04
-#define GB_SVC_UNIPRO_SLOW_AUTO_MODE		0x05
-#define GB_SVC_UNIPRO_MODE_UNCHANGED		0x07
-#define GB_SVC_UNIPRO_HIBERNATE_MODE		0x11
-#define GB_SVC_UNIPRO_OFF_MODE			0x12
-
-#define GB_SVC_SMALL_AMPLITUDE          0x01
-#define GB_SVC_LARGE_AMPLITUDE          0x02
-
-#define GB_SVC_NO_DE_EMPHASIS           0x00
-#define GB_SVC_SMALL_DE_EMPHASIS        0x01
-#define GB_SVC_LARGE_DE_EMPHASIS        0x02
-
-#define GB_SVC_PWRM_RXTERMINATION		0x01
-#define GB_SVC_PWRM_TXTERMINATION		0x02
-#define GB_SVC_PWRM_LINE_RESET			0x04
-#define GB_SVC_PWRM_SCRAMBLING			0x20
-
-#define GB_SVC_PWRM_QUIRK_HSSER			0x00000001
-
-#define GB_SVC_UNIPRO_HS_SERIES_A		0x01
-#define GB_SVC_UNIPRO_HS_SERIES_B		0x02
-
-#define GB_SVC_SETPWRM_PWR_OK           0x00
-#define GB_SVC_SETPWRM_PWR_LOCAL        0x01
-#define GB_SVC_SETPWRM_PWR_REMOTE       0x02
-#define GB_SVC_SETPWRM_PWR_BUSY         0x03
-#define GB_SVC_SETPWRM_PWR_ERROR_CAP    0x04
-#define GB_SVC_SETPWRM_PWR_FATAL_ERROR  0x05
-
-struct gb_svc_l2_timer_cfg {
-	__le16 tsb_fc0_protection_timeout;
-	__le16 tsb_tc0_replay_timeout;
-	__le16 tsb_afc0_req_timeout;
-	__le16 tsb_fc1_protection_timeout;
-	__le16 tsb_tc1_replay_timeout;
-	__le16 tsb_afc1_req_timeout;
-	__le16 reserved_for_tc2[3];
-	__le16 reserved_for_tc3[3];
-} __packed;
-
-struct gb_svc_intf_set_pwrm_request {
-	__u8	intf_id;
-	__u8	hs_series;
-	__u8	tx_mode;
-	__u8	tx_gear;
-	__u8	tx_nlanes;
-	__u8	tx_amplitude;
-	__u8	tx_hs_equalizer;
-	__u8	rx_mode;
-	__u8	rx_gear;
-	__u8	rx_nlanes;
-	__u8	flags;
-	__le32	quirks;
-	struct gb_svc_l2_timer_cfg local_l2timerdata, remote_l2timerdata;
-} __packed;
-
-struct gb_svc_intf_set_pwrm_response {
-	__u8	result_code;
-} __packed;
-
-struct gb_svc_key_event_request {
-	__le16  key_code;
-#define GB_KEYCODE_ARA         0x00
-
-	__u8    key_event;
-#define GB_SVC_KEY_RELEASED    0x00
-#define GB_SVC_KEY_PRESSED     0x01
-} __packed;
-
-#define GB_SVC_PWRMON_MAX_RAIL_COUNT		254
-
-struct gb_svc_pwrmon_rail_count_get_response {
-	__u8	rail_count;
-} __packed;
-
-#define GB_SVC_PWRMON_RAIL_NAME_BUFSIZE		32
-
-struct gb_svc_pwrmon_rail_names_get_response {
-	__u8	status;
-	__u8	name[0][GB_SVC_PWRMON_RAIL_NAME_BUFSIZE];
-} __packed;
-
-#define GB_SVC_PWRMON_TYPE_CURR			0x01
-#define GB_SVC_PWRMON_TYPE_VOL			0x02
-#define GB_SVC_PWRMON_TYPE_PWR			0x03
-
-#define GB_SVC_PWRMON_GET_SAMPLE_OK		0x00
-#define GB_SVC_PWRMON_GET_SAMPLE_INVAL		0x01
-#define GB_SVC_PWRMON_GET_SAMPLE_NOSUPP		0x02
-#define GB_SVC_PWRMON_GET_SAMPLE_HWERR		0x03
-
-struct gb_svc_pwrmon_sample_get_request {
-	__u8	rail_id;
-	__u8	measurement_type;
-} __packed;
-
-struct gb_svc_pwrmon_sample_get_response {
-	__u8	result;
-	__le32	measurement;
-} __packed;
-
-struct gb_svc_pwrmon_intf_sample_get_request {
-	__u8	intf_id;
-	__u8	measurement_type;
-} __packed;
-
-struct gb_svc_pwrmon_intf_sample_get_response {
-	__u8	result;
-	__le32	measurement;
-} __packed;
-
-#define GB_SVC_MODULE_INSERTED_FLAG_NO_PRIMARY	0x0001
-
-struct gb_svc_module_inserted_request {
-	__u8	primary_intf_id;
-	__u8	intf_count;
-	__le16	flags;
-} __packed;
-/* module_inserted response has no payload */
-
-struct gb_svc_module_removed_request {
-	__u8	primary_intf_id;
-} __packed;
-/* module_removed response has no payload */
-
-struct gb_svc_intf_activate_request {
-	__u8	intf_id;
-} __packed;
-
-#define GB_SVC_INTF_TYPE_UNKNOWN		0x00
-#define GB_SVC_INTF_TYPE_DUMMY			0x01
-#define GB_SVC_INTF_TYPE_UNIPRO			0x02
-#define GB_SVC_INTF_TYPE_GREYBUS		0x03
-
-struct gb_svc_intf_activate_response {
-	__u8	status;
-	__u8	intf_type;
-} __packed;
-
-struct gb_svc_intf_resume_request {
-	__u8	intf_id;
-} __packed;
-
-struct gb_svc_intf_resume_response {
-	__u8	status;
-} __packed;
-
-#define GB_SVC_INTF_MAILBOX_NONE		0x00
-#define GB_SVC_INTF_MAILBOX_AP			0x01
-#define GB_SVC_INTF_MAILBOX_GREYBUS		0x02
-
-struct gb_svc_intf_mailbox_event_request {
-	__u8	intf_id;
-	__le16	result_code;
-	__le32	mailbox;
-} __packed;
-/* intf_mailbox_event response has no payload */
-
-struct gb_svc_intf_oops_request {
-	__u8	intf_id;
-	__u8	reason;
-} __packed;
-/* intf_oops response has no payload */
-
-
-/* RAW */
-
-/* Greybus raw request types */
-#define	GB_RAW_TYPE_SEND			0x02
-
-struct gb_raw_send_request {
-	__le32	len;
-	__u8	data[0];
-} __packed;
-
-
-/* UART */
-
-/* Greybus UART operation types */
-#define GB_UART_TYPE_SEND_DATA			0x02
-#define GB_UART_TYPE_RECEIVE_DATA		0x03	/* Unsolicited data */
-#define GB_UART_TYPE_SET_LINE_CODING		0x04
-#define GB_UART_TYPE_SET_CONTROL_LINE_STATE	0x05
-#define GB_UART_TYPE_SEND_BREAK			0x06
-#define GB_UART_TYPE_SERIAL_STATE		0x07	/* Unsolicited data */
-#define GB_UART_TYPE_RECEIVE_CREDITS		0x08
-#define GB_UART_TYPE_FLUSH_FIFOS		0x09
-
-/* Represents data from AP -> Module */
-struct gb_uart_send_data_request {
-	__le16	size;
-	__u8	data[0];
-} __packed;
-
-/* recv-data-request flags */
-#define GB_UART_RECV_FLAG_FRAMING		0x01	/* Framing error */
-#define GB_UART_RECV_FLAG_PARITY		0x02	/* Parity error */
-#define GB_UART_RECV_FLAG_OVERRUN		0x04	/* Overrun error */
-#define GB_UART_RECV_FLAG_BREAK			0x08	/* Break */
-
-/* Represents data from Module -> AP */
-struct gb_uart_recv_data_request {
-	__le16	size;
-	__u8	flags;
-	__u8	data[0];
-} __packed;
-
-struct gb_uart_receive_credits_request {
-	__le16  count;
-} __packed;
-
-struct gb_uart_set_line_coding_request {
-	__le32	rate;
-	__u8	format;
-#define GB_SERIAL_1_STOP_BITS			0
-#define GB_SERIAL_1_5_STOP_BITS			1
-#define GB_SERIAL_2_STOP_BITS			2
-
-	__u8	parity;
-#define GB_SERIAL_NO_PARITY			0
-#define GB_SERIAL_ODD_PARITY			1
-#define GB_SERIAL_EVEN_PARITY			2
-#define GB_SERIAL_MARK_PARITY			3
-#define GB_SERIAL_SPACE_PARITY			4
-
-	__u8	data_bits;
-
-	__u8	flow_control;
-#define GB_SERIAL_AUTO_RTSCTS_EN		0x1
-} __packed;
-
-/* output control lines */
-#define GB_UART_CTRL_DTR			0x01
-#define GB_UART_CTRL_RTS			0x02
-
-struct gb_uart_set_control_line_state_request {
-	__u8	control;
-} __packed;
-
-struct gb_uart_set_break_request {
-	__u8	state;
-} __packed;
-
-/* input control lines and line errors */
-#define GB_UART_CTRL_DCD			0x01
-#define GB_UART_CTRL_DSR			0x02
-#define GB_UART_CTRL_RI				0x04
-
-struct gb_uart_serial_state_request {
-	__u8	control;
-} __packed;
-
-struct gb_uart_serial_flush_request {
-	__u8    flags;
-#define GB_SERIAL_FLAG_FLUSH_TRANSMITTER	0x01
-#define GB_SERIAL_FLAG_FLUSH_RECEIVER		0x02
-} __packed;
-
-/* Loopback */
-
-/* Greybus loopback request types */
-#define GB_LOOPBACK_TYPE_PING			0x02
-#define GB_LOOPBACK_TYPE_TRANSFER		0x03
-#define GB_LOOPBACK_TYPE_SINK			0x04
-
-/*
- * Loopback request/response header format should be identical
- * to simplify bandwidth and data movement analysis.
- */
-struct gb_loopback_transfer_request {
-	__le32	len;
-	__le32  reserved0;
-	__le32  reserved1;
-	__u8	data[0];
-} __packed;
-
-struct gb_loopback_transfer_response {
-	__le32	len;
-	__le32	reserved0;
-	__le32	reserved1;
-	__u8	data[0];
-} __packed;
-
-/* SDIO */
-/* Greybus SDIO operation types */
-#define GB_SDIO_TYPE_GET_CAPABILITIES		0x02
-#define GB_SDIO_TYPE_SET_IOS			0x03
-#define GB_SDIO_TYPE_COMMAND			0x04
-#define GB_SDIO_TYPE_TRANSFER			0x05
-#define GB_SDIO_TYPE_EVENT			0x06
-
-/* get caps response: request has no payload */
-struct gb_sdio_get_caps_response {
-	__le32	caps;
-#define GB_SDIO_CAP_NONREMOVABLE	0x00000001
-#define GB_SDIO_CAP_4_BIT_DATA		0x00000002
-#define GB_SDIO_CAP_8_BIT_DATA		0x00000004
-#define GB_SDIO_CAP_MMC_HS		0x00000008
-#define GB_SDIO_CAP_SD_HS		0x00000010
-#define GB_SDIO_CAP_ERASE		0x00000020
-#define GB_SDIO_CAP_1_2V_DDR		0x00000040
-#define GB_SDIO_CAP_1_8V_DDR		0x00000080
-#define GB_SDIO_CAP_POWER_OFF_CARD	0x00000100
-#define GB_SDIO_CAP_UHS_SDR12		0x00000200
-#define GB_SDIO_CAP_UHS_SDR25		0x00000400
-#define GB_SDIO_CAP_UHS_SDR50		0x00000800
-#define GB_SDIO_CAP_UHS_SDR104		0x00001000
-#define GB_SDIO_CAP_UHS_DDR50		0x00002000
-#define GB_SDIO_CAP_DRIVER_TYPE_A	0x00004000
-#define GB_SDIO_CAP_DRIVER_TYPE_C	0x00008000
-#define GB_SDIO_CAP_DRIVER_TYPE_D	0x00010000
-#define GB_SDIO_CAP_HS200_1_2V		0x00020000
-#define GB_SDIO_CAP_HS200_1_8V		0x00040000
-#define GB_SDIO_CAP_HS400_1_2V		0x00080000
-#define GB_SDIO_CAP_HS400_1_8V		0x00100000
-
-	/* see possible values below at vdd */
-	__le32 ocr;
-	__le32 f_min;
-	__le32 f_max;
-	__le16 max_blk_count;
-	__le16 max_blk_size;
-} __packed;
-
-/* set ios request: response has no payload */
-struct gb_sdio_set_ios_request {
-	__le32	clock;
-	__le32	vdd;
-#define GB_SDIO_VDD_165_195	0x00000001
-#define GB_SDIO_VDD_20_21	0x00000002
-#define GB_SDIO_VDD_21_22	0x00000004
-#define GB_SDIO_VDD_22_23	0x00000008
-#define GB_SDIO_VDD_23_24	0x00000010
-#define GB_SDIO_VDD_24_25	0x00000020
-#define GB_SDIO_VDD_25_26	0x00000040
-#define GB_SDIO_VDD_26_27	0x00000080
-#define GB_SDIO_VDD_27_28	0x00000100
-#define GB_SDIO_VDD_28_29	0x00000200
-#define GB_SDIO_VDD_29_30	0x00000400
-#define GB_SDIO_VDD_30_31	0x00000800
-#define GB_SDIO_VDD_31_32	0x00001000
-#define GB_SDIO_VDD_32_33	0x00002000
-#define GB_SDIO_VDD_33_34	0x00004000
-#define GB_SDIO_VDD_34_35	0x00008000
-#define GB_SDIO_VDD_35_36	0x00010000
-
-	__u8	bus_mode;
-#define GB_SDIO_BUSMODE_OPENDRAIN	0x00
-#define GB_SDIO_BUSMODE_PUSHPULL	0x01
-
-	__u8	power_mode;
-#define GB_SDIO_POWER_OFF	0x00
-#define GB_SDIO_POWER_UP	0x01
-#define GB_SDIO_POWER_ON	0x02
-#define GB_SDIO_POWER_UNDEFINED	0x03
-
-	__u8	bus_width;
-#define GB_SDIO_BUS_WIDTH_1	0x00
-#define GB_SDIO_BUS_WIDTH_4	0x02
-#define GB_SDIO_BUS_WIDTH_8	0x03
-
-	__u8	timing;
-#define GB_SDIO_TIMING_LEGACY		0x00
-#define GB_SDIO_TIMING_MMC_HS		0x01
-#define GB_SDIO_TIMING_SD_HS		0x02
-#define GB_SDIO_TIMING_UHS_SDR12	0x03
-#define GB_SDIO_TIMING_UHS_SDR25	0x04
-#define GB_SDIO_TIMING_UHS_SDR50	0x05
-#define GB_SDIO_TIMING_UHS_SDR104	0x06
-#define GB_SDIO_TIMING_UHS_DDR50	0x07
-#define GB_SDIO_TIMING_MMC_DDR52	0x08
-#define GB_SDIO_TIMING_MMC_HS200	0x09
-#define GB_SDIO_TIMING_MMC_HS400	0x0A
-
-	__u8	signal_voltage;
-#define GB_SDIO_SIGNAL_VOLTAGE_330	0x00
-#define GB_SDIO_SIGNAL_VOLTAGE_180	0x01
-#define GB_SDIO_SIGNAL_VOLTAGE_120	0x02
-
-	__u8	drv_type;
-#define GB_SDIO_SET_DRIVER_TYPE_B	0x00
-#define GB_SDIO_SET_DRIVER_TYPE_A	0x01
-#define GB_SDIO_SET_DRIVER_TYPE_C	0x02
-#define GB_SDIO_SET_DRIVER_TYPE_D	0x03
-} __packed;
-
-/* command request */
-struct gb_sdio_command_request {
-	__u8	cmd;
-	__u8	cmd_flags;
-#define GB_SDIO_RSP_NONE		0x00
-#define GB_SDIO_RSP_PRESENT		0x01
-#define GB_SDIO_RSP_136			0x02
-#define GB_SDIO_RSP_CRC			0x04
-#define GB_SDIO_RSP_BUSY		0x08
-#define GB_SDIO_RSP_OPCODE		0x10
-
-	__u8	cmd_type;
-#define GB_SDIO_CMD_AC		0x00
-#define GB_SDIO_CMD_ADTC	0x01
-#define GB_SDIO_CMD_BC		0x02
-#define GB_SDIO_CMD_BCR		0x03
-
-	__le32	cmd_arg;
-	__le16	data_blocks;
-	__le16	data_blksz;
-} __packed;
-
-struct gb_sdio_command_response {
-	__le32	resp[4];
-} __packed;
-
-/* transfer request */
-struct gb_sdio_transfer_request {
-	__u8	data_flags;
-#define GB_SDIO_DATA_WRITE	0x01
-#define GB_SDIO_DATA_READ	0x02
-#define GB_SDIO_DATA_STREAM	0x04
-
-	__le16	data_blocks;
-	__le16	data_blksz;
-	__u8	data[0];
-} __packed;
-
-struct gb_sdio_transfer_response {
-	__le16	data_blocks;
-	__le16	data_blksz;
-	__u8	data[0];
-} __packed;
-
-/* event request: generated by module and is defined as unidirectional */
-struct gb_sdio_event_request {
-	__u8	event;
-#define GB_SDIO_CARD_INSERTED	0x01
-#define GB_SDIO_CARD_REMOVED	0x02
-#define GB_SDIO_WP		0x04
-} __packed;
-
-/* Camera */
-
-/* Greybus Camera request types */
-#define GB_CAMERA_TYPE_CAPABILITIES		0x02
-#define GB_CAMERA_TYPE_CONFIGURE_STREAMS	0x03
-#define GB_CAMERA_TYPE_CAPTURE			0x04
-#define GB_CAMERA_TYPE_FLUSH			0x05
-#define GB_CAMERA_TYPE_METADATA			0x06
-
-#define GB_CAMERA_MAX_STREAMS			4
-#define GB_CAMERA_MAX_SETTINGS_SIZE		8192
-
-/* Greybus Camera Configure Streams request payload */
-struct gb_camera_stream_config_request {
-	__le16 width;
-	__le16 height;
-	__le16 format;
-	__le16 padding;
-} __packed;
-
-struct gb_camera_configure_streams_request {
-	__u8 num_streams;
-	__u8 flags;
-#define GB_CAMERA_CONFIGURE_STREAMS_TEST_ONLY	0x01
-	__le16 padding;
-	struct gb_camera_stream_config_request config[0];
-} __packed;
-
-/* Greybus Camera Configure Streams response payload */
-struct gb_camera_stream_config_response {
-	__le16 width;
-	__le16 height;
-	__le16 format;
-	__u8 virtual_channel;
-	__u8 data_type[2];
-	__le16 max_pkt_size;
-	__u8 padding;
-	__le32 max_size;
-} __packed;
-
-struct gb_camera_configure_streams_response {
-	__u8 num_streams;
-#define GB_CAMERA_CONFIGURE_STREAMS_ADJUSTED	0x01
-	__u8 flags;
-	__u8 padding[2];
-	__le32 data_rate;
-	struct gb_camera_stream_config_response config[0];
-};
-
-/* Greybus Camera Capture request payload - response has no payload */
-struct gb_camera_capture_request {
-	__le32 request_id;
-	__u8 streams;
-	__u8 padding;
-	__le16 num_frames;
-	__u8 settings[0];
-} __packed;
-
-/* Greybus Camera Flush response payload - request has no payload */
-struct gb_camera_flush_response {
-	__le32 request_id;
-} __packed;
-
-/* Greybus Camera Metadata request payload - operation has no response */
-struct gb_camera_metadata_request {
-	__le32 request_id;
-	__le16 frame_number;
-	__u8 stream;
-	__u8 padding;
-	__u8 metadata[0];
-} __packed;
-
-/* Lights */
-
-/* Greybus Lights request types */
-#define GB_LIGHTS_TYPE_GET_LIGHTS		0x02
-#define GB_LIGHTS_TYPE_GET_LIGHT_CONFIG		0x03
-#define GB_LIGHTS_TYPE_GET_CHANNEL_CONFIG	0x04
-#define GB_LIGHTS_TYPE_GET_CHANNEL_FLASH_CONFIG	0x05
-#define GB_LIGHTS_TYPE_SET_BRIGHTNESS		0x06
-#define GB_LIGHTS_TYPE_SET_BLINK		0x07
-#define GB_LIGHTS_TYPE_SET_COLOR		0x08
-#define GB_LIGHTS_TYPE_SET_FADE			0x09
-#define GB_LIGHTS_TYPE_EVENT			0x0A
-#define GB_LIGHTS_TYPE_SET_FLASH_INTENSITY	0x0B
-#define GB_LIGHTS_TYPE_SET_FLASH_STROBE		0x0C
-#define GB_LIGHTS_TYPE_SET_FLASH_TIMEOUT	0x0D
-#define GB_LIGHTS_TYPE_GET_FLASH_FAULT		0x0E
-
-/* Greybus Light modes */
-
-/*
- * if you add any specific mode below, update also the
- * GB_CHANNEL_MODE_DEFINED_RANGE value accordingly
- */
-#define GB_CHANNEL_MODE_NONE		0x00000000
-#define GB_CHANNEL_MODE_BATTERY		0x00000001
-#define GB_CHANNEL_MODE_POWER		0x00000002
-#define GB_CHANNEL_MODE_WIRELESS	0x00000004
-#define GB_CHANNEL_MODE_BLUETOOTH	0x00000008
-#define GB_CHANNEL_MODE_KEYBOARD	0x00000010
-#define GB_CHANNEL_MODE_BUTTONS		0x00000020
-#define GB_CHANNEL_MODE_NOTIFICATION	0x00000040
-#define GB_CHANNEL_MODE_ATTENTION	0x00000080
-#define GB_CHANNEL_MODE_FLASH		0x00000100
-#define GB_CHANNEL_MODE_TORCH		0x00000200
-#define GB_CHANNEL_MODE_INDICATOR	0x00000400
-
-/* Lights Mode valid bit values */
-#define GB_CHANNEL_MODE_DEFINED_RANGE	0x000004FF
-#define GB_CHANNEL_MODE_VENDOR_RANGE	0x00F00000
-
-/* Greybus Light Channels Flags */
-#define GB_LIGHT_CHANNEL_MULTICOLOR	0x00000001
-#define GB_LIGHT_CHANNEL_FADER		0x00000002
-#define GB_LIGHT_CHANNEL_BLINK		0x00000004
-
-/* get count of lights in module */
-struct gb_lights_get_lights_response {
-	__u8	lights_count;
-} __packed;
-
-/* light config request payload */
-struct gb_lights_get_light_config_request {
-	__u8	id;
-} __packed;
-
-/* light config response payload */
-struct gb_lights_get_light_config_response {
-	__u8	channel_count;
-	__u8	name[32];
-} __packed;
-
-/* channel config request payload */
-struct gb_lights_get_channel_config_request {
-	__u8	light_id;
-	__u8	channel_id;
-} __packed;
-
-/* channel flash config request payload */
-struct gb_lights_get_channel_flash_config_request {
-	__u8	light_id;
-	__u8	channel_id;
-} __packed;
-
-/* channel config response payload */
-struct gb_lights_get_channel_config_response {
-	__u8	max_brightness;
-	__le32	flags;
-	__le32	color;
-	__u8	color_name[32];
-	__le32	mode;
-	__u8	mode_name[32];
-} __packed;
-
-/* channel flash config response payload */
-struct gb_lights_get_channel_flash_config_response {
-	__le32	intensity_min_uA;
-	__le32	intensity_max_uA;
-	__le32	intensity_step_uA;
-	__le32	timeout_min_us;
-	__le32	timeout_max_us;
-	__le32	timeout_step_us;
-} __packed;
-
-/* blink request payload: response have no payload */
-struct gb_lights_blink_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__le16	time_on_ms;
-	__le16	time_off_ms;
-} __packed;
-
-/* set brightness request payload: response have no payload */
-struct gb_lights_set_brightness_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__u8	brightness;
-} __packed;
-
-/* set color request payload: response have no payload */
-struct gb_lights_set_color_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__le32	color;
-} __packed;
-
-/* set fade request payload: response have no payload */
-struct gb_lights_set_fade_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__u8	fade_in;
-	__u8	fade_out;
-} __packed;
-
-/* event request: generated by module */
-struct gb_lights_event_request {
-	__u8	light_id;
-	__u8	event;
-#define GB_LIGHTS_LIGHT_CONFIG		0x01
-} __packed;
-
-/* set flash intensity request payload: response have no payload */
-struct gb_lights_set_flash_intensity_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__le32	intensity_uA;
-} __packed;
-
-/* set flash strobe state request payload: response have no payload */
-struct gb_lights_set_flash_strobe_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__u8	state;
-} __packed;
-
-/* set flash timeout request payload: response have no payload */
-struct gb_lights_set_flash_timeout_request {
-	__u8	light_id;
-	__u8	channel_id;
-	__le32	timeout_us;
-} __packed;
-
-/* get flash fault request payload */
-struct gb_lights_get_flash_fault_request {
-	__u8	light_id;
-	__u8	channel_id;
-} __packed;
-
-/* get flash fault response payload */
-struct gb_lights_get_flash_fault_response {
-	__le32	fault;
-#define GB_LIGHTS_FLASH_FAULT_OVER_VOLTAGE		0x00000000
-#define GB_LIGHTS_FLASH_FAULT_TIMEOUT			0x00000001
-#define GB_LIGHTS_FLASH_FAULT_OVER_TEMPERATURE		0x00000002
-#define GB_LIGHTS_FLASH_FAULT_SHORT_CIRCUIT		0x00000004
-#define GB_LIGHTS_FLASH_FAULT_OVER_CURRENT		0x00000008
-#define GB_LIGHTS_FLASH_FAULT_INDICATOR			0x00000010
-#define GB_LIGHTS_FLASH_FAULT_UNDER_VOLTAGE		0x00000020
-#define GB_LIGHTS_FLASH_FAULT_INPUT_VOLTAGE		0x00000040
-#define GB_LIGHTS_FLASH_FAULT_LED_OVER_TEMPERATURE	0x00000080
-} __packed;
-
-/* Audio */
-
-#define GB_AUDIO_TYPE_GET_TOPOLOGY_SIZE		0x02
-#define GB_AUDIO_TYPE_GET_TOPOLOGY		0x03
-#define GB_AUDIO_TYPE_GET_CONTROL		0x04
-#define GB_AUDIO_TYPE_SET_CONTROL		0x05
-#define GB_AUDIO_TYPE_ENABLE_WIDGET		0x06
-#define GB_AUDIO_TYPE_DISABLE_WIDGET		0x07
-#define GB_AUDIO_TYPE_GET_PCM			0x08
-#define GB_AUDIO_TYPE_SET_PCM			0x09
-#define GB_AUDIO_TYPE_SET_TX_DATA_SIZE		0x0a
-						/* 0x0b unused */
-#define GB_AUDIO_TYPE_ACTIVATE_TX		0x0c
-#define GB_AUDIO_TYPE_DEACTIVATE_TX		0x0d
-#define GB_AUDIO_TYPE_SET_RX_DATA_SIZE		0x0e
-						/* 0x0f unused */
-#define GB_AUDIO_TYPE_ACTIVATE_RX		0x10
-#define GB_AUDIO_TYPE_DEACTIVATE_RX		0x11
-#define GB_AUDIO_TYPE_JACK_EVENT		0x12
-#define GB_AUDIO_TYPE_BUTTON_EVENT		0x13
-#define GB_AUDIO_TYPE_STREAMING_EVENT		0x14
-#define GB_AUDIO_TYPE_SEND_DATA			0x15
-
-/* Module must be able to buffer 10ms of audio data, minimum */
-#define GB_AUDIO_SAMPLE_BUFFER_MIN_US		10000
-
-#define GB_AUDIO_PCM_NAME_MAX			32
-#define AUDIO_DAI_NAME_MAX			32
-#define AUDIO_CONTROL_NAME_MAX			32
-#define AUDIO_CTL_ELEM_NAME_MAX			44
-#define AUDIO_ENUM_NAME_MAX			64
-#define AUDIO_WIDGET_NAME_MAX			32
-
-/* See SNDRV_PCM_FMTBIT_* in Linux source */
-#define GB_AUDIO_PCM_FMT_S8			BIT(0)
-#define GB_AUDIO_PCM_FMT_U8			BIT(1)
-#define GB_AUDIO_PCM_FMT_S16_LE			BIT(2)
-#define GB_AUDIO_PCM_FMT_S16_BE			BIT(3)
-#define GB_AUDIO_PCM_FMT_U16_LE			BIT(4)
-#define GB_AUDIO_PCM_FMT_U16_BE			BIT(5)
-#define GB_AUDIO_PCM_FMT_S24_LE			BIT(6)
-#define GB_AUDIO_PCM_FMT_S24_BE			BIT(7)
-#define GB_AUDIO_PCM_FMT_U24_LE			BIT(8)
-#define GB_AUDIO_PCM_FMT_U24_BE			BIT(9)
-#define GB_AUDIO_PCM_FMT_S32_LE			BIT(10)
-#define GB_AUDIO_PCM_FMT_S32_BE			BIT(11)
-#define GB_AUDIO_PCM_FMT_U32_LE			BIT(12)
-#define GB_AUDIO_PCM_FMT_U32_BE			BIT(13)
-
-/* See SNDRV_PCM_RATE_* in Linux source */
-#define GB_AUDIO_PCM_RATE_5512			BIT(0)
-#define GB_AUDIO_PCM_RATE_8000			BIT(1)
-#define GB_AUDIO_PCM_RATE_11025			BIT(2)
-#define GB_AUDIO_PCM_RATE_16000			BIT(3)
-#define GB_AUDIO_PCM_RATE_22050			BIT(4)
-#define GB_AUDIO_PCM_RATE_32000			BIT(5)
-#define GB_AUDIO_PCM_RATE_44100			BIT(6)
-#define GB_AUDIO_PCM_RATE_48000			BIT(7)
-#define GB_AUDIO_PCM_RATE_64000			BIT(8)
-#define GB_AUDIO_PCM_RATE_88200			BIT(9)
-#define GB_AUDIO_PCM_RATE_96000			BIT(10)
-#define GB_AUDIO_PCM_RATE_176400		BIT(11)
-#define GB_AUDIO_PCM_RATE_192000		BIT(12)
-
-#define GB_AUDIO_STREAM_TYPE_CAPTURE		0x1
-#define GB_AUDIO_STREAM_TYPE_PLAYBACK		0x2
-
-#define GB_AUDIO_CTL_ELEM_ACCESS_READ		BIT(0)
-#define GB_AUDIO_CTL_ELEM_ACCESS_WRITE		BIT(1)
-
-/* See SNDRV_CTL_ELEM_TYPE_* in Linux source */
-#define GB_AUDIO_CTL_ELEM_TYPE_BOOLEAN		0x01
-#define GB_AUDIO_CTL_ELEM_TYPE_INTEGER		0x02
-#define GB_AUDIO_CTL_ELEM_TYPE_ENUMERATED	0x03
-#define GB_AUDIO_CTL_ELEM_TYPE_INTEGER64	0x06
-
-/* See SNDRV_CTL_ELEM_IFACE_* in Linux source */
-#define GB_AUDIO_CTL_ELEM_IFACE_CARD		0x00
-#define GB_AUDIO_CTL_ELEM_IFACE_HWDEP		0x01
-#define GB_AUDIO_CTL_ELEM_IFACE_MIXER		0x02
-#define GB_AUDIO_CTL_ELEM_IFACE_PCM		0x03
-#define GB_AUDIO_CTL_ELEM_IFACE_RAWMIDI		0x04
-#define GB_AUDIO_CTL_ELEM_IFACE_TIMER		0x05
-#define GB_AUDIO_CTL_ELEM_IFACE_SEQUENCER	0x06
-
-/* SNDRV_CTL_ELEM_ACCESS_* in Linux source */
-#define GB_AUDIO_ACCESS_READ			BIT(0)
-#define GB_AUDIO_ACCESS_WRITE			BIT(1)
-#define GB_AUDIO_ACCESS_VOLATILE		BIT(2)
-#define GB_AUDIO_ACCESS_TIMESTAMP		BIT(3)
-#define GB_AUDIO_ACCESS_TLV_READ		BIT(4)
-#define GB_AUDIO_ACCESS_TLV_WRITE		BIT(5)
-#define GB_AUDIO_ACCESS_TLV_COMMAND		BIT(6)
-#define GB_AUDIO_ACCESS_INACTIVE		BIT(7)
-#define GB_AUDIO_ACCESS_LOCK			BIT(8)
-#define GB_AUDIO_ACCESS_OWNER			BIT(9)
-
-/* enum snd_soc_dapm_type */
-#define GB_AUDIO_WIDGET_TYPE_INPUT		0x0
-#define GB_AUDIO_WIDGET_TYPE_OUTPUT		0x1
-#define GB_AUDIO_WIDGET_TYPE_MUX		0x2
-#define GB_AUDIO_WIDGET_TYPE_VIRT_MUX		0x3
-#define GB_AUDIO_WIDGET_TYPE_VALUE_MUX		0x4
-#define GB_AUDIO_WIDGET_TYPE_MIXER		0x5
-#define GB_AUDIO_WIDGET_TYPE_MIXER_NAMED_CTL	0x6
-#define GB_AUDIO_WIDGET_TYPE_PGA		0x7
-#define GB_AUDIO_WIDGET_TYPE_OUT_DRV		0x8
-#define GB_AUDIO_WIDGET_TYPE_ADC		0x9
-#define GB_AUDIO_WIDGET_TYPE_DAC		0xa
-#define GB_AUDIO_WIDGET_TYPE_MICBIAS		0xb
-#define GB_AUDIO_WIDGET_TYPE_MIC		0xc
-#define GB_AUDIO_WIDGET_TYPE_HP			0xd
-#define GB_AUDIO_WIDGET_TYPE_SPK		0xe
-#define GB_AUDIO_WIDGET_TYPE_LINE		0xf
-#define GB_AUDIO_WIDGET_TYPE_SWITCH		0x10
-#define GB_AUDIO_WIDGET_TYPE_VMID		0x11
-#define GB_AUDIO_WIDGET_TYPE_PRE		0x12
-#define GB_AUDIO_WIDGET_TYPE_POST		0x13
-#define GB_AUDIO_WIDGET_TYPE_SUPPLY		0x14
-#define GB_AUDIO_WIDGET_TYPE_REGULATOR_SUPPLY	0x15
-#define GB_AUDIO_WIDGET_TYPE_CLOCK_SUPPLY	0x16
-#define GB_AUDIO_WIDGET_TYPE_AIF_IN		0x17
-#define GB_AUDIO_WIDGET_TYPE_AIF_OUT		0x18
-#define GB_AUDIO_WIDGET_TYPE_SIGGEN		0x19
-#define GB_AUDIO_WIDGET_TYPE_DAI_IN		0x1a
-#define GB_AUDIO_WIDGET_TYPE_DAI_OUT		0x1b
-#define GB_AUDIO_WIDGET_TYPE_DAI_LINK		0x1c
-
-#define GB_AUDIO_WIDGET_STATE_DISABLED		0x01
-#define GB_AUDIO_WIDGET_STATE_ENAABLED		0x02
-
-#define GB_AUDIO_JACK_EVENT_INSERTION		0x1
-#define GB_AUDIO_JACK_EVENT_REMOVAL		0x2
-
-#define GB_AUDIO_BUTTON_EVENT_PRESS		0x1
-#define GB_AUDIO_BUTTON_EVENT_RELEASE		0x2
-
-#define GB_AUDIO_STREAMING_EVENT_UNSPECIFIED	0x1
-#define GB_AUDIO_STREAMING_EVENT_HALT		0x2
-#define GB_AUDIO_STREAMING_EVENT_INTERNAL_ERROR	0x3
-#define GB_AUDIO_STREAMING_EVENT_PROTOCOL_ERROR	0x4
-#define GB_AUDIO_STREAMING_EVENT_FAILURE	0x5
-#define GB_AUDIO_STREAMING_EVENT_UNDERRUN	0x6
-#define GB_AUDIO_STREAMING_EVENT_OVERRUN	0x7
-#define GB_AUDIO_STREAMING_EVENT_CLOCKING	0x8
-#define GB_AUDIO_STREAMING_EVENT_DATA_LEN	0x9
-
-#define GB_AUDIO_INVALID_INDEX			0xff
-
-/* enum snd_jack_types */
-#define GB_AUDIO_JACK_HEADPHONE			0x0000001
-#define GB_AUDIO_JACK_MICROPHONE		0x0000002
-#define GB_AUDIO_JACK_HEADSET			(GB_AUDIO_JACK_HEADPHONE | \
-						 GB_AUDIO_JACK_MICROPHONE)
-#define GB_AUDIO_JACK_LINEOUT			0x0000004
-#define GB_AUDIO_JACK_MECHANICAL		0x0000008
-#define GB_AUDIO_JACK_VIDEOOUT			0x0000010
-#define GB_AUDIO_JACK_AVOUT			(GB_AUDIO_JACK_LINEOUT | \
-						 GB_AUDIO_JACK_VIDEOOUT)
-#define GB_AUDIO_JACK_LINEIN			0x0000020
-#define GB_AUDIO_JACK_OC_HPHL			0x0000040
-#define GB_AUDIO_JACK_OC_HPHR			0x0000080
-#define GB_AUDIO_JACK_MICROPHONE2		0x0000200
-#define GB_AUDIO_JACK_ANC_HEADPHONE		(GB_AUDIO_JACK_HEADPHONE | \
-						 GB_AUDIO_JACK_MICROPHONE | \
-						 GB_AUDIO_JACK_MICROPHONE2)
-/* Kept separate from switches to facilitate implementation */
-#define GB_AUDIO_JACK_BTN_0			0x4000000
-#define GB_AUDIO_JACK_BTN_1			0x2000000
-#define GB_AUDIO_JACK_BTN_2			0x1000000
-#define GB_AUDIO_JACK_BTN_3			0x0800000
-
-struct gb_audio_pcm {
-	__u8	stream_name[GB_AUDIO_PCM_NAME_MAX];
-	__le32	formats;	/* GB_AUDIO_PCM_FMT_* */
-	__le32	rates;		/* GB_AUDIO_PCM_RATE_* */
-	__u8	chan_min;
-	__u8	chan_max;
-	__u8	sig_bits;	/* number of bits of content */
-} __packed;
-
-struct gb_audio_dai {
-	__u8			name[AUDIO_DAI_NAME_MAX];
-	__le16			data_cport;
-	struct gb_audio_pcm	capture;
-	struct gb_audio_pcm	playback;
-} __packed;
-
-struct gb_audio_integer {
-	__le32	min;
-	__le32	max;
-	__le32	step;
-} __packed;
-
-struct gb_audio_integer64 {
-	__le64	min;
-	__le64	max;
-	__le64	step;
-} __packed;
-
-struct gb_audio_enumerated {
-	__le32	items;
-	__le16	names_length;
-	__u8	names[0];
-} __packed;
-
-struct gb_audio_ctl_elem_info { /* See snd_ctl_elem_info in Linux source */
-	__u8		type;		/* GB_AUDIO_CTL_ELEM_TYPE_* */
-	__le16		dimen[4];
-	union {
-		struct gb_audio_integer		integer;
-		struct gb_audio_integer64	integer64;
-		struct gb_audio_enumerated	enumerated;
-	} value;
-} __packed;
-
-struct gb_audio_ctl_elem_value { /* See snd_ctl_elem_value in Linux source */
-	__le64				timestamp; /* XXX needed? */
-	union {
-		__le32	integer_value[2];	/* consider CTL_DOUBLE_xxx */
-		__le64	integer64_value[2];
-		__le32	enumerated_item[2];
-	} value;
-} __packed;
-
-struct gb_audio_control {
-	__u8	name[AUDIO_CONTROL_NAME_MAX];
-	__u8	id;		/* 0-63 */
-	__u8	iface;		/* GB_AUDIO_IFACE_* */
-	__le16	data_cport;
-	__le32	access;		/* GB_AUDIO_ACCESS_* */
-	__u8    count;		/* count of same elements */
-	__u8	count_values;	/* count of values, max=2 for CTL_DOUBLE_xxx */
-	struct gb_audio_ctl_elem_info	info;
-} __packed;
-
-struct gb_audio_widget {
-	__u8	name[AUDIO_WIDGET_NAME_MAX];
-	__u8	sname[AUDIO_WIDGET_NAME_MAX];
-	__u8	id;
-	__u8	type;		/* GB_AUDIO_WIDGET_TYPE_* */
-	__u8	state;		/* GB_AUDIO_WIDGET_STATE_* */
-	__u8	ncontrols;
-	struct gb_audio_control	ctl[0];	/* 'ncontrols' entries */
-} __packed;
-
-struct gb_audio_route {
-	__u8	source_id;	/* widget id */
-	__u8	destination_id;	/* widget id */
-	__u8	control_id;	/* 0-63 */
-	__u8	index;		/* Selection within the control */
-} __packed;
-
-struct gb_audio_topology {
-	__u8	num_dais;
-	__u8	num_controls;
-	__u8	num_widgets;
-	__u8	num_routes;
-	__le32	size_dais;
-	__le32	size_controls;
-	__le32	size_widgets;
-	__le32	size_routes;
-	__le32	jack_type;
-	/*
-	 * struct gb_audio_dai		dai[num_dais];
-	 * struct gb_audio_control	controls[num_controls];
-	 * struct gb_audio_widget	widgets[num_widgets];
-	 * struct gb_audio_route	routes[num_routes];
-	 */
-	__u8	data[0];
-} __packed;
-
-struct gb_audio_get_topology_size_response {
-	__le16	size;
-} __packed;
-
-struct gb_audio_get_topology_response {
-	struct gb_audio_topology	topology;
-} __packed;
-
-struct gb_audio_get_control_request {
-	__u8	control_id;
-	__u8	index;
-} __packed;
-
-struct gb_audio_get_control_response {
-	struct gb_audio_ctl_elem_value	value;
-} __packed;
-
-struct gb_audio_set_control_request {
-	__u8	control_id;
-	__u8	index;
-	struct gb_audio_ctl_elem_value	value;
-} __packed;
-
-struct gb_audio_enable_widget_request {
-	__u8	widget_id;
-} __packed;
-
-struct gb_audio_disable_widget_request {
-	__u8	widget_id;
-} __packed;
-
-struct gb_audio_get_pcm_request {
-	__le16	data_cport;
-} __packed;
-
-struct gb_audio_get_pcm_response {
-	__le32	format;
-	__le32	rate;
-	__u8	channels;
-	__u8	sig_bits;
-} __packed;
-
-struct gb_audio_set_pcm_request {
-	__le16	data_cport;
-	__le32	format;
-	__le32	rate;
-	__u8	channels;
-	__u8	sig_bits;
-} __packed;
-
-struct gb_audio_set_tx_data_size_request {
-	__le16	data_cport;
-	__le16	size;
-} __packed;
-
-struct gb_audio_activate_tx_request {
-	__le16	data_cport;
-} __packed;
-
-struct gb_audio_deactivate_tx_request {
-	__le16	data_cport;
-} __packed;
-
-struct gb_audio_set_rx_data_size_request {
-	__le16	data_cport;
-	__le16	size;
-} __packed;
-
-struct gb_audio_activate_rx_request {
-	__le16	data_cport;
-} __packed;
-
-struct gb_audio_deactivate_rx_request {
-	__le16	data_cport;
-} __packed;
-
-struct gb_audio_jack_event_request {
-	__u8	widget_id;
-	__u8	jack_attribute;
-	__u8	event;
-} __packed;
-
-struct gb_audio_button_event_request {
-	__u8	widget_id;
-	__u8	button_id;
-	__u8	event;
-} __packed;
-
-struct gb_audio_streaming_event_request {
-	__le16	data_cport;
-	__u8	event;
-} __packed;
-
-struct gb_audio_send_data_request {
-	__le64	timestamp;
-	__u8	data[0];
-} __packed;
-
-
-/* Log */
-
-/* operations */
-#define GB_LOG_TYPE_SEND_LOG	0x02
-
-/* length */
-#define GB_LOG_MAX_LEN		1024
-
-struct gb_log_send_log_request {
-	__le16	len;
-	__u8    msg[0];
-} __packed;
-
-#endif /* __GREYBUS_PROTOCOLS_H */
-
diff --git a/drivers/staging/greybus/hd.c b/drivers/staging/greybus/hd.c
index e2b9ab5f6ec2..72b21bf2d7d3 100644
--- a/drivers/staging/greybus/hd.c
+++ b/drivers/staging/greybus/hd.c
@@ -8,8 +8,8 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "greybus_trace.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(gb_hd_create);
diff --git a/drivers/staging/greybus/hd.h b/drivers/staging/greybus/hd.h
deleted file mode 100644
index 348b76fabc9a..000000000000
--- a/drivers/staging/greybus/hd.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus Host Device
- *
- * Copyright 2014-2015 Google Inc.
- * Copyright 2014-2015 Linaro Ltd.
- */
-
-#ifndef __HD_H
-#define __HD_H
-
-struct gb_host_device;
-struct gb_message;
-
-struct gb_hd_driver {
-	size_t	hd_priv_size;
-
-	int (*cport_allocate)(struct gb_host_device *hd, int cport_id,
-				unsigned long flags);
-	void (*cport_release)(struct gb_host_device *hd, u16 cport_id);
-	int (*cport_enable)(struct gb_host_device *hd, u16 cport_id,
-				unsigned long flags);
-	int (*cport_disable)(struct gb_host_device *hd, u16 cport_id);
-	int (*cport_connected)(struct gb_host_device *hd, u16 cport_id);
-	int (*cport_flush)(struct gb_host_device *hd, u16 cport_id);
-	int (*cport_shutdown)(struct gb_host_device *hd, u16 cport_id,
-				u8 phase, unsigned int timeout);
-	int (*cport_quiesce)(struct gb_host_device *hd, u16 cport_id,
-				size_t peer_space, unsigned int timeout);
-	int (*cport_clear)(struct gb_host_device *hd, u16 cport_id);
-
-	int (*message_send)(struct gb_host_device *hd, u16 dest_cport_id,
-			struct gb_message *message, gfp_t gfp_mask);
-	void (*message_cancel)(struct gb_message *message);
-	int (*latency_tag_enable)(struct gb_host_device *hd, u16 cport_id);
-	int (*latency_tag_disable)(struct gb_host_device *hd, u16 cport_id);
-	int (*output)(struct gb_host_device *hd, void *req, u16 size, u8 cmd,
-		      bool async);
-};
-
-struct gb_host_device {
-	struct device dev;
-	int bus_id;
-	const struct gb_hd_driver *driver;
-
-	struct list_head modules;
-	struct list_head connections;
-	struct ida cport_id_map;
-
-	/* Number of CPorts supported by the UniPro IP */
-	size_t num_cports;
-
-	/* Host device buffer constraints */
-	size_t buffer_size_max;
-
-	struct gb_svc *svc;
-	/* Private data for the host driver */
-	unsigned long hd_priv[0] __aligned(sizeof(s64));
-};
-#define to_gb_host_device(d) container_of(d, struct gb_host_device, dev)
-
-int gb_hd_cport_reserve(struct gb_host_device *hd, u16 cport_id);
-void gb_hd_cport_release_reserved(struct gb_host_device *hd, u16 cport_id);
-int gb_hd_cport_allocate(struct gb_host_device *hd, int cport_id,
-					unsigned long flags);
-void gb_hd_cport_release(struct gb_host_device *hd, u16 cport_id);
-
-struct gb_host_device *gb_hd_create(struct gb_hd_driver *driver,
-					struct device *parent,
-					size_t buffer_size_max,
-					size_t num_cports);
-int gb_hd_add(struct gb_host_device *hd);
-void gb_hd_del(struct gb_host_device *hd);
-void gb_hd_shutdown(struct gb_host_device *hd);
-void gb_hd_put(struct gb_host_device *hd);
-int gb_hd_output(struct gb_host_device *hd, void *req, u16 size, u8 cmd,
-		 bool in_irq);
-
-int gb_hd_init(void);
-void gb_hd_exit(void);
-
-#endif	/* __HD_H */
diff --git a/drivers/staging/greybus/hid.c b/drivers/staging/greybus/hid.c
index 8ab810bf5716..04bfd9110502 100644
--- a/drivers/staging/greybus/hid.c
+++ b/drivers/staging/greybus/hid.c
@@ -12,8 +12,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 /* Greybus HID device's structure */
 struct gb_hid {
diff --git a/drivers/staging/greybus/i2c.c b/drivers/staging/greybus/i2c.c
index b2522043a1a4..ab06fc3b9e7e 100644
--- a/drivers/staging/greybus/i2c.c
+++ b/drivers/staging/greybus/i2c.c
@@ -10,8 +10,8 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 struct gb_i2c_device {
diff --git a/drivers/staging/greybus/interface.c b/drivers/staging/greybus/interface.c
index d7b5b89a2f40..67dbe6fda9a1 100644
--- a/drivers/staging/greybus/interface.c
+++ b/drivers/staging/greybus/interface.c
@@ -7,8 +7,8 @@
  */
 
 #include <linux/delay.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "greybus_trace.h"
 
 #define GB_INTERFACE_MODE_SWITCH_TIMEOUT	2000
diff --git a/drivers/staging/greybus/interface.h b/drivers/staging/greybus/interface.h
deleted file mode 100644
index 8fb1eacda302..000000000000
--- a/drivers/staging/greybus/interface.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus Interface Block code
- *
- * Copyright 2014 Google Inc.
- * Copyright 2014 Linaro Ltd.
- */
-
-#ifndef __INTERFACE_H
-#define __INTERFACE_H
-
-enum gb_interface_type {
-	GB_INTERFACE_TYPE_INVALID = 0,
-	GB_INTERFACE_TYPE_UNKNOWN,
-	GB_INTERFACE_TYPE_DUMMY,
-	GB_INTERFACE_TYPE_UNIPRO,
-	GB_INTERFACE_TYPE_GREYBUS,
-};
-
-#define GB_INTERFACE_QUIRK_NO_CPORT_FEATURES		BIT(0)
-#define GB_INTERFACE_QUIRK_NO_INIT_STATUS		BIT(1)
-#define GB_INTERFACE_QUIRK_NO_GMP_IDS			BIT(2)
-#define GB_INTERFACE_QUIRK_FORCED_DISABLE		BIT(3)
-#define GB_INTERFACE_QUIRK_LEGACY_MODE_SWITCH		BIT(4)
-#define GB_INTERFACE_QUIRK_NO_BUNDLE_ACTIVATE		BIT(5)
-#define GB_INTERFACE_QUIRK_NO_PM			BIT(6)
-
-struct gb_interface {
-	struct device dev;
-	struct gb_control *control;
-
-	struct list_head bundles;
-	struct list_head module_node;
-	struct list_head manifest_descs;
-	u8 interface_id;	/* Physical location within the Endo */
-	u8 device_id;
-	u8 features;		/* Feature flags set in the manifest */
-
-	enum gb_interface_type type;
-
-	u32 ddbl1_manufacturer_id;
-	u32 ddbl1_product_id;
-	u32 vendor_id;
-	u32 product_id;
-	u64 serial_number;
-
-	struct gb_host_device *hd;
-	struct gb_module *module;
-
-	unsigned long quirks;
-
-	struct mutex mutex;
-
-	bool disconnected;
-
-	bool ejected;
-	bool removed;
-	bool active;
-	bool enabled;
-	bool mode_switch;
-	bool dme_read;
-
-	struct work_struct mode_switch_work;
-	struct completion mode_switch_completion;
-};
-#define to_gb_interface(d) container_of(d, struct gb_interface, dev)
-
-struct gb_interface *gb_interface_create(struct gb_module *module,
-					 u8 interface_id);
-int gb_interface_activate(struct gb_interface *intf);
-void gb_interface_deactivate(struct gb_interface *intf);
-int gb_interface_enable(struct gb_interface *intf);
-void gb_interface_disable(struct gb_interface *intf);
-int gb_interface_add(struct gb_interface *intf);
-void gb_interface_del(struct gb_interface *intf);
-void gb_interface_put(struct gb_interface *intf);
-void gb_interface_mailbox_event(struct gb_interface *intf, u16 result,
-								u32 mailbox);
-
-int gb_interface_request_mode_switch(struct gb_interface *intf);
-
-#endif /* __INTERFACE_H */
diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c
index 010ae1e9c7fb..b3b1b253d112 100644
--- a/drivers/staging/greybus/light.c
+++ b/drivers/staging/greybus/light.c
@@ -11,11 +11,9 @@
 #include <linux/led-class-flash.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/greybus.h>
 #include <media/v4l2-flash-led-class.h>
 
-#include "greybus.h"
-#include "greybus_protocols.h"
-
 #define NAMES_MAX	32
 
 struct gb_channel {
diff --git a/drivers/staging/greybus/log.c b/drivers/staging/greybus/log.c
index 4f1f161ff11c..971f36dccac6 100644
--- a/drivers/staging/greybus/log.c
+++ b/drivers/staging/greybus/log.c
@@ -9,8 +9,7 @@
 #include <linux/slab.h>
 #include <linux/sizes.h>
 #include <linux/uaccess.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 struct gb_log {
 	struct gb_connection *connection;
diff --git a/drivers/staging/greybus/loopback.c b/drivers/staging/greybus/loopback.c
index b0ab0eed5c18..583d9708a191 100644
--- a/drivers/staging/greybus/loopback.c
+++ b/drivers/staging/greybus/loopback.c
@@ -25,12 +25,9 @@
 #include <linux/workqueue.h>
 #include <linux/atomic.h>
 #include <linux/pm_runtime.h>
-
+#include <linux/greybus.h>
 #include <asm/div64.h>
 
-#include "greybus.h"
-#include "connection.h"
-
 #define NSEC_PER_DAY 86400000000000ULL
 
 struct gb_loopback_stats {
diff --git a/drivers/staging/greybus/manifest.c b/drivers/staging/greybus/manifest.c
index 4ebbba52b07c..dd7040697bde 100644
--- a/drivers/staging/greybus/manifest.c
+++ b/drivers/staging/greybus/manifest.c
@@ -6,7 +6,7 @@
  * Copyright 2014-2015 Linaro Ltd.
  */
 
-#include "greybus.h"
+#include <linux/greybus.h>
 
 static const char *get_descriptor_type_string(u8 type)
 {
diff --git a/drivers/staging/greybus/manifest.h b/drivers/staging/greybus/manifest.h
deleted file mode 100644
index 88aa7e44cad5..000000000000
--- a/drivers/staging/greybus/manifest.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus manifest parsing
- *
- * Copyright 2014 Google Inc.
- * Copyright 2014 Linaro Ltd.
- */
-
-#ifndef __MANIFEST_H
-#define __MANIFEST_H
-
-struct gb_interface;
-bool gb_manifest_parse(struct gb_interface *intf, void *data, size_t size);
-
-#endif /* __MANIFEST_H */
diff --git a/drivers/staging/greybus/module.c b/drivers/staging/greybus/module.c
index b251a53d0e8e..36f77f9e1d74 100644
--- a/drivers/staging/greybus/module.c
+++ b/drivers/staging/greybus/module.c
@@ -6,7 +6,7 @@
  * Copyright 2016 Linaro Ltd.
  */
 
-#include "greybus.h"
+#include <linux/greybus.h>
 #include "greybus_trace.h"
 
 static ssize_t eject_store(struct device *dev,
diff --git a/drivers/staging/greybus/module.h b/drivers/staging/greybus/module.h
deleted file mode 100644
index 2a27e520ee94..000000000000
--- a/drivers/staging/greybus/module.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus Module code
- *
- * Copyright 2016 Google Inc.
- * Copyright 2016 Linaro Ltd.
- */
-
-#ifndef __MODULE_H
-#define __MODULE_H
-
-struct gb_module {
-	struct device dev;
-	struct gb_host_device *hd;
-
-	struct list_head hd_node;
-
-	u8 module_id;
-	size_t num_interfaces;
-
-	bool disconnected;
-
-	struct gb_interface *interfaces[0];
-};
-#define to_gb_module(d) container_of(d, struct gb_module, dev)
-
-struct gb_module *gb_module_create(struct gb_host_device *hd, u8 module_id,
-				   size_t num_interfaces);
-int gb_module_add(struct gb_module *module);
-void gb_module_del(struct gb_module *module);
-void gb_module_put(struct gb_module *module);
-
-#endif /* __MODULE_H */
diff --git a/drivers/staging/greybus/operation.c b/drivers/staging/greybus/operation.c
index fe268f7b63ed..8459e9bc0749 100644
--- a/drivers/staging/greybus/operation.c
+++ b/drivers/staging/greybus/operation.c
@@ -12,8 +12,8 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "greybus_trace.h"
 
 static struct kmem_cache *gb_operation_cache;
diff --git a/drivers/staging/greybus/operation.h b/drivers/staging/greybus/operation.h
deleted file mode 100644
index 17ba3daf111b..000000000000
--- a/drivers/staging/greybus/operation.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus operations
- *
- * Copyright 2014 Google Inc.
- * Copyright 2014 Linaro Ltd.
- */
-
-#ifndef __OPERATION_H
-#define __OPERATION_H
-
-#include <linux/completion.h>
-
-struct gb_operation;
-
-/* The default amount of time a request is given to complete */
-#define GB_OPERATION_TIMEOUT_DEFAULT	1000	/* milliseconds */
-
-/*
- * The top bit of the type in an operation message header indicates
- * whether the message is a request (bit clear) or response (bit set)
- */
-#define GB_MESSAGE_TYPE_RESPONSE	((u8)0x80)
-
-enum gb_operation_result {
-	GB_OP_SUCCESS		= 0x00,
-	GB_OP_INTERRUPTED	= 0x01,
-	GB_OP_TIMEOUT		= 0x02,
-	GB_OP_NO_MEMORY		= 0x03,
-	GB_OP_PROTOCOL_BAD	= 0x04,
-	GB_OP_OVERFLOW		= 0x05,
-	GB_OP_INVALID		= 0x06,
-	GB_OP_RETRY		= 0x07,
-	GB_OP_NONEXISTENT	= 0x08,
-	GB_OP_UNKNOWN_ERROR	= 0xfe,
-	GB_OP_MALFUNCTION	= 0xff,
-};
-
-#define GB_OPERATION_MESSAGE_SIZE_MIN	sizeof(struct gb_operation_msg_hdr)
-#define GB_OPERATION_MESSAGE_SIZE_MAX	U16_MAX
-
-/*
- * Protocol code should only examine the payload and payload_size fields, and
- * host-controller drivers may use the hcpriv field. All other fields are
- * intended to be private to the operations core code.
- */
-struct gb_message {
-	struct gb_operation		*operation;
-	struct gb_operation_msg_hdr	*header;
-
-	void				*payload;
-	size_t				payload_size;
-
-	void				*buffer;
-
-	void				*hcpriv;
-};
-
-#define GB_OPERATION_FLAG_INCOMING		BIT(0)
-#define GB_OPERATION_FLAG_UNIDIRECTIONAL	BIT(1)
-#define GB_OPERATION_FLAG_SHORT_RESPONSE	BIT(2)
-#define GB_OPERATION_FLAG_CORE			BIT(3)
-
-#define GB_OPERATION_FLAG_USER_MASK	(GB_OPERATION_FLAG_SHORT_RESPONSE | \
-					 GB_OPERATION_FLAG_UNIDIRECTIONAL)
-
-/*
- * A Greybus operation is a remote procedure call performed over a
- * connection between two UniPro interfaces.
- *
- * Every operation consists of a request message sent to the other
- * end of the connection coupled with a reply message returned to
- * the sender.  Every operation has a type, whose interpretation is
- * dependent on the protocol associated with the connection.
- *
- * Only four things in an operation structure are intended to be
- * directly usable by protocol handlers:  the operation's connection
- * pointer; the operation type; the request message payload (and
- * size); and the response message payload (and size).  Note that a
- * message with a 0-byte payload has a null message payload pointer.
- *
- * In addition, every operation has a result, which is an errno
- * value.  Protocol handlers access the operation result using
- * gb_operation_result().
- */
-typedef void (*gb_operation_callback)(struct gb_operation *);
-struct gb_operation {
-	struct gb_connection	*connection;
-	struct gb_message	*request;
-	struct gb_message	*response;
-
-	unsigned long		flags;
-	u8			type;
-	u16			id;
-	int			errno;		/* Operation result */
-
-	struct work_struct	work;
-	gb_operation_callback	callback;
-	struct completion	completion;
-	struct timer_list	timer;
-
-	struct kref		kref;
-	atomic_t		waiters;
-
-	int			active;
-	struct list_head	links;		/* connection->operations */
-
-	void			*private;
-};
-
-static inline bool
-gb_operation_is_incoming(struct gb_operation *operation)
-{
-	return operation->flags & GB_OPERATION_FLAG_INCOMING;
-}
-
-static inline bool
-gb_operation_is_unidirectional(struct gb_operation *operation)
-{
-	return operation->flags & GB_OPERATION_FLAG_UNIDIRECTIONAL;
-}
-
-static inline bool
-gb_operation_short_response_allowed(struct gb_operation *operation)
-{
-	return operation->flags & GB_OPERATION_FLAG_SHORT_RESPONSE;
-}
-
-static inline bool gb_operation_is_core(struct gb_operation *operation)
-{
-	return operation->flags & GB_OPERATION_FLAG_CORE;
-}
-
-void gb_connection_recv(struct gb_connection *connection,
-					void *data, size_t size);
-
-int gb_operation_result(struct gb_operation *operation);
-
-size_t gb_operation_get_payload_size_max(struct gb_connection *connection);
-struct gb_operation *
-gb_operation_create_flags(struct gb_connection *connection,
-				u8 type, size_t request_size,
-				size_t response_size, unsigned long flags,
-				gfp_t gfp);
-
-static inline struct gb_operation *
-gb_operation_create(struct gb_connection *connection,
-				u8 type, size_t request_size,
-				size_t response_size, gfp_t gfp)
-{
-	return gb_operation_create_flags(connection, type, request_size,
-						response_size, 0, gfp);
-}
-
-struct gb_operation *
-gb_operation_create_core(struct gb_connection *connection,
-				u8 type, size_t request_size,
-				size_t response_size, unsigned long flags,
-				gfp_t gfp);
-
-void gb_operation_get(struct gb_operation *operation);
-void gb_operation_put(struct gb_operation *operation);
-
-bool gb_operation_response_alloc(struct gb_operation *operation,
-					size_t response_size, gfp_t gfp);
-
-int gb_operation_request_send(struct gb_operation *operation,
-				gb_operation_callback callback,
-				unsigned int timeout,
-				gfp_t gfp);
-int gb_operation_request_send_sync_timeout(struct gb_operation *operation,
-						unsigned int timeout);
-static inline int
-gb_operation_request_send_sync(struct gb_operation *operation)
-{
-	return gb_operation_request_send_sync_timeout(operation,
-			GB_OPERATION_TIMEOUT_DEFAULT);
-}
-
-void gb_operation_cancel(struct gb_operation *operation, int errno);
-void gb_operation_cancel_incoming(struct gb_operation *operation, int errno);
-
-void greybus_message_sent(struct gb_host_device *hd,
-				struct gb_message *message, int status);
-
-int gb_operation_sync_timeout(struct gb_connection *connection, int type,
-				void *request, int request_size,
-				void *response, int response_size,
-				unsigned int timeout);
-int gb_operation_unidirectional_timeout(struct gb_connection *connection,
-				int type, void *request, int request_size,
-				unsigned int timeout);
-
-static inline int gb_operation_sync(struct gb_connection *connection, int type,
-		      void *request, int request_size,
-		      void *response, int response_size)
-{
-	return gb_operation_sync_timeout(connection, type,
-			request, request_size, response, response_size,
-			GB_OPERATION_TIMEOUT_DEFAULT);
-}
-
-static inline int gb_operation_unidirectional(struct gb_connection *connection,
-				int type, void *request, int request_size)
-{
-	return gb_operation_unidirectional_timeout(connection, type,
-			request, request_size, GB_OPERATION_TIMEOUT_DEFAULT);
-}
-
-static inline void *gb_operation_get_data(struct gb_operation *operation)
-{
-	return operation->private;
-}
-
-static inline void gb_operation_set_data(struct gb_operation *operation,
-					 void *data)
-{
-	operation->private = data;
-}
-
-int gb_operation_init(void);
-void gb_operation_exit(void);
-
-#endif /* !__OPERATION_H */
diff --git a/drivers/staging/greybus/power_supply.c b/drivers/staging/greybus/power_supply.c
index 34b40a409ea3..ec96f28887f9 100644
--- a/drivers/staging/greybus/power_supply.c
+++ b/drivers/staging/greybus/power_supply.c
@@ -10,8 +10,7 @@
 #include <linux/module.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 #define PROP_MAX 32
 
diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c
index 4a6d394b6c44..891a6a672378 100644
--- a/drivers/staging/greybus/pwm.c
+++ b/drivers/staging/greybus/pwm.c
@@ -10,8 +10,8 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/pwm.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 struct gb_pwm_chip {
diff --git a/drivers/staging/greybus/raw.c b/drivers/staging/greybus/raw.c
index 838acbe84ca0..64a17dfe3b6e 100644
--- a/drivers/staging/greybus/raw.c
+++ b/drivers/staging/greybus/raw.c
@@ -13,8 +13,7 @@
 #include <linux/fs.h>
 #include <linux/idr.h>
 #include <linux/uaccess.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 struct gb_raw {
 	struct gb_connection *connection;
diff --git a/drivers/staging/greybus/sdio.c b/drivers/staging/greybus/sdio.c
index a097a8916b3b..68c5718be827 100644
--- a/drivers/staging/greybus/sdio.c
+++ b/drivers/staging/greybus/sdio.c
@@ -12,8 +12,8 @@
 #include <linux/mmc/mmc.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 struct gb_sdio_host {
diff --git a/drivers/staging/greybus/spi.c b/drivers/staging/greybus/spi.c
index 47d896992b35..68e8d272db6d 100644
--- a/drivers/staging/greybus/spi.c
+++ b/drivers/staging/greybus/spi.c
@@ -7,8 +7,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 #include "spilib.h"
 
diff --git a/drivers/staging/greybus/spilib.c b/drivers/staging/greybus/spilib.c
index 2e07c6b41334..fc27c52de74a 100644
--- a/drivers/staging/greybus/spilib.c
+++ b/drivers/staging/greybus/spilib.c
@@ -10,9 +10,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/greybus.h>
 #include <linux/spi/spi.h>
 
-#include "greybus.h"
 #include "spilib.h"
 
 struct gb_spilib {
diff --git a/drivers/staging/greybus/svc.c b/drivers/staging/greybus/svc.c
index 05bc45287b87..ce7740ef449b 100644
--- a/drivers/staging/greybus/svc.c
+++ b/drivers/staging/greybus/svc.c
@@ -8,8 +8,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/workqueue.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 #define SVC_INTF_EJECT_TIMEOUT		9000
 #define SVC_INTF_ACTIVATE_TIMEOUT	6000
diff --git a/drivers/staging/greybus/svc.h b/drivers/staging/greybus/svc.h
deleted file mode 100644
index e7452057cfe4..000000000000
--- a/drivers/staging/greybus/svc.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Greybus SVC code
- *
- * Copyright 2015 Google Inc.
- * Copyright 2015 Linaro Ltd.
- */
-
-#ifndef __SVC_H
-#define __SVC_H
-
-#define GB_SVC_CPORT_FLAG_E2EFC		BIT(0)
-#define GB_SVC_CPORT_FLAG_CSD_N		BIT(1)
-#define GB_SVC_CPORT_FLAG_CSV_N		BIT(2)
-
-enum gb_svc_state {
-	GB_SVC_STATE_RESET,
-	GB_SVC_STATE_PROTOCOL_VERSION,
-	GB_SVC_STATE_SVC_HELLO,
-};
-
-enum gb_svc_watchdog_bite {
-	GB_SVC_WATCHDOG_BITE_RESET_UNIPRO = 0,
-	GB_SVC_WATCHDOG_BITE_PANIC_KERNEL,
-};
-
-struct gb_svc_watchdog;
-
-struct svc_debugfs_pwrmon_rail {
-	u8 id;
-	struct gb_svc *svc;
-};
-
-struct gb_svc {
-	struct device		dev;
-
-	struct gb_host_device	*hd;
-	struct gb_connection	*connection;
-	enum gb_svc_state	state;
-	struct ida		device_id_map;
-	struct workqueue_struct	*wq;
-
-	u16 endo_id;
-	u8 ap_intf_id;
-
-	u8 protocol_major;
-	u8 protocol_minor;
-
-	struct gb_svc_watchdog	*watchdog;
-	enum gb_svc_watchdog_bite action;
-
-	struct dentry *debugfs_dentry;
-	struct svc_debugfs_pwrmon_rail *pwrmon_rails;
-};
-#define to_gb_svc(d) container_of(d, struct gb_svc, dev)
-
-struct gb_svc *gb_svc_create(struct gb_host_device *hd);
-int gb_svc_add(struct gb_svc *svc);
-void gb_svc_del(struct gb_svc *svc);
-void gb_svc_put(struct gb_svc *svc);
-
-int gb_svc_pwrmon_intf_sample_get(struct gb_svc *svc, u8 intf_id,
-				  u8 measurement_type, u32 *value);
-int gb_svc_intf_device_id(struct gb_svc *svc, u8 intf_id, u8 device_id);
-int gb_svc_route_create(struct gb_svc *svc, u8 intf1_id, u8 dev1_id,
-			       u8 intf2_id, u8 dev2_id);
-void gb_svc_route_destroy(struct gb_svc *svc, u8 intf1_id, u8 intf2_id);
-int gb_svc_connection_create(struct gb_svc *svc, u8 intf1_id, u16 cport1_id,
-			     u8 intf2_id, u16 cport2_id, u8 cport_flags);
-void gb_svc_connection_destroy(struct gb_svc *svc, u8 intf1_id, u16 cport1_id,
-			       u8 intf2_id, u16 cport2_id);
-int gb_svc_intf_eject(struct gb_svc *svc, u8 intf_id);
-int gb_svc_intf_vsys_set(struct gb_svc *svc, u8 intf_id, bool enable);
-int gb_svc_intf_refclk_set(struct gb_svc *svc, u8 intf_id, bool enable);
-int gb_svc_intf_unipro_set(struct gb_svc *svc, u8 intf_id, bool enable);
-int gb_svc_intf_activate(struct gb_svc *svc, u8 intf_id, u8 *intf_type);
-int gb_svc_intf_resume(struct gb_svc *svc, u8 intf_id);
-
-int gb_svc_dme_peer_get(struct gb_svc *svc, u8 intf_id, u16 attr, u16 selector,
-			u32 *value);
-int gb_svc_dme_peer_set(struct gb_svc *svc, u8 intf_id, u16 attr, u16 selector,
-			u32 value);
-int gb_svc_intf_set_power_mode(struct gb_svc *svc, u8 intf_id, u8 hs_series,
-			       u8 tx_mode, u8 tx_gear, u8 tx_nlanes,
-			       u8 tx_amplitude, u8 tx_hs_equalizer,
-			       u8 rx_mode, u8 rx_gear, u8 rx_nlanes,
-			       u8 flags, u32 quirks,
-			       struct gb_svc_l2_timer_cfg *local,
-			       struct gb_svc_l2_timer_cfg *remote);
-int gb_svc_intf_set_power_mode_hibernate(struct gb_svc *svc, u8 intf_id);
-int gb_svc_ping(struct gb_svc *svc);
-int gb_svc_watchdog_create(struct gb_svc *svc);
-void gb_svc_watchdog_destroy(struct gb_svc *svc);
-bool gb_svc_watchdog_enabled(struct gb_svc *svc);
-int gb_svc_watchdog_enable(struct gb_svc *svc);
-int gb_svc_watchdog_disable(struct gb_svc *svc);
-
-int gb_svc_protocol_init(void);
-void gb_svc_protocol_exit(void);
-
-#endif /* __SVC_H */
diff --git a/drivers/staging/greybus/svc_watchdog.c b/drivers/staging/greybus/svc_watchdog.c
index 7868ad8211c5..b6b1682c19c4 100644
--- a/drivers/staging/greybus/svc_watchdog.c
+++ b/drivers/staging/greybus/svc_watchdog.c
@@ -8,7 +8,7 @@
 #include <linux/delay.h>
 #include <linux/suspend.h>
 #include <linux/workqueue.h>
-#include "greybus.h"
+#include <linux/greybus.h>
 
 #define SVC_WATCHDOG_PERIOD	(2 * HZ)
 
diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c
index b3bffe91ae99..55c51143bb09 100644
--- a/drivers/staging/greybus/uart.c
+++ b/drivers/staging/greybus/uart.c
@@ -28,8 +28,8 @@
 #include <linux/kfifo.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 #define GB_NUM_MINORS	16	/* 16 is more than enough */
diff --git a/drivers/staging/greybus/usb.c b/drivers/staging/greybus/usb.c
index 1c246c73a085..8e9d9d59a357 100644
--- a/drivers/staging/greybus/usb.c
+++ b/drivers/staging/greybus/usb.c
@@ -10,8 +10,8 @@
 #include <linux/slab.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
+#include <linux/greybus.h>
 
-#include "greybus.h"
 #include "gbphy.h"
 
 /* Greybus USB request types */
diff --git a/drivers/staging/greybus/vibrator.c b/drivers/staging/greybus/vibrator.c
index 3e5dedeacd5c..0e2b188e5ca3 100644
--- a/drivers/staging/greybus/vibrator.c
+++ b/drivers/staging/greybus/vibrator.c
@@ -13,8 +13,7 @@
 #include <linux/kdev_t.h>
 #include <linux/idr.h>
 #include <linux/pm_runtime.h>
-
-#include "greybus.h"
+#include <linux/greybus.h>
 
 struct gb_vibrator_device {
 	struct gb_connection	*connection;
diff --git a/include/linux/greybus.h b/include/linux/greybus.h
new file mode 100644
index 000000000000..18c0fb958b74
--- /dev/null
+++ b/include/linux/greybus.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus driver and device API
+ *
+ * Copyright 2014-2015 Google Inc.
+ * Copyright 2014-2015 Linaro Ltd.
+ */
+
+#ifndef __LINUX_GREYBUS_H
+#define __LINUX_GREYBUS_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/idr.h>
+
+#include <linux/greybus/greybus_id.h>
+#include <linux/greybus/greybus_manifest.h>
+#include <linux/greybus/greybus_protocols.h>
+#include <linux/greybus/manifest.h>
+#include <linux/greybus/hd.h>
+#include <linux/greybus/svc.h>
+#include <linux/greybus/control.h>
+#include <linux/greybus/module.h>
+#include <linux/greybus/interface.h>
+#include <linux/greybus/bundle.h>
+#include <linux/greybus/connection.h>
+#include <linux/greybus/operation.h>
+
+/* Matches up with the Greybus Protocol specification document */
+#define GREYBUS_VERSION_MAJOR	0x00
+#define GREYBUS_VERSION_MINOR	0x01
+
+#define GREYBUS_ID_MATCH_DEVICE \
+	(GREYBUS_ID_MATCH_VENDOR | GREYBUS_ID_MATCH_PRODUCT)
+
+#define GREYBUS_DEVICE(v, p)					\
+	.match_flags	= GREYBUS_ID_MATCH_DEVICE,		\
+	.vendor		= (v),					\
+	.product	= (p),
+
+#define GREYBUS_DEVICE_CLASS(c)					\
+	.match_flags	= GREYBUS_ID_MATCH_CLASS,		\
+	.class		= (c),
+
+/* Maximum number of CPorts */
+#define CPORT_ID_MAX	4095		/* UniPro max id is 4095 */
+#define CPORT_ID_BAD	U16_MAX
+
+struct greybus_driver {
+	const char *name;
+
+	int (*probe)(struct gb_bundle *bundle,
+		     const struct greybus_bundle_id *id);
+	void (*disconnect)(struct gb_bundle *bundle);
+
+	const struct greybus_bundle_id *id_table;
+
+	struct device_driver driver;
+};
+#define to_greybus_driver(d) container_of(d, struct greybus_driver, driver)
+
+static inline void greybus_set_drvdata(struct gb_bundle *bundle, void *data)
+{
+	dev_set_drvdata(&bundle->dev, data);
+}
+
+static inline void *greybus_get_drvdata(struct gb_bundle *bundle)
+{
+	return dev_get_drvdata(&bundle->dev);
+}
+
+/* Don't call these directly, use the module_greybus_driver() macro instead */
+int greybus_register_driver(struct greybus_driver *driver,
+			    struct module *module, const char *mod_name);
+void greybus_deregister_driver(struct greybus_driver *driver);
+
+/* define to get proper THIS_MODULE and KBUILD_MODNAME values */
+#define greybus_register(driver) \
+	greybus_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
+#define greybus_deregister(driver) \
+	greybus_deregister_driver(driver)
+
+/**
+ * module_greybus_driver() - Helper macro for registering a Greybus driver
+ * @__greybus_driver: greybus_driver structure
+ *
+ * Helper macro for Greybus drivers to set up proper module init / exit
+ * functions.  Replaces module_init() and module_exit() and keeps people from
+ * printing pointless things to the kernel log when their driver is loaded.
+ */
+#define module_greybus_driver(__greybus_driver)	\
+	module_driver(__greybus_driver, greybus_register, greybus_deregister)
+
+int greybus_disabled(void);
+
+void gb_debugfs_init(void);
+void gb_debugfs_cleanup(void);
+struct dentry *gb_debugfs_get(void);
+
+extern struct bus_type greybus_bus_type;
+
+extern struct device_type greybus_hd_type;
+extern struct device_type greybus_module_type;
+extern struct device_type greybus_interface_type;
+extern struct device_type greybus_control_type;
+extern struct device_type greybus_bundle_type;
+extern struct device_type greybus_svc_type;
+
+static inline int is_gb_host_device(const struct device *dev)
+{
+	return dev->type == &greybus_hd_type;
+}
+
+static inline int is_gb_module(const struct device *dev)
+{
+	return dev->type == &greybus_module_type;
+}
+
+static inline int is_gb_interface(const struct device *dev)
+{
+	return dev->type == &greybus_interface_type;
+}
+
+static inline int is_gb_control(const struct device *dev)
+{
+	return dev->type == &greybus_control_type;
+}
+
+static inline int is_gb_bundle(const struct device *dev)
+{
+	return dev->type == &greybus_bundle_type;
+}
+
+static inline int is_gb_svc(const struct device *dev)
+{
+	return dev->type == &greybus_svc_type;
+}
+
+static inline bool cport_id_valid(struct gb_host_device *hd, u16 cport_id)
+{
+	return cport_id != CPORT_ID_BAD && cport_id < hd->num_cports;
+}
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_GREYBUS_H */
diff --git a/include/linux/greybus/bundle.h b/include/linux/greybus/bundle.h
new file mode 100644
index 000000000000..8734d2055657
--- /dev/null
+++ b/include/linux/greybus/bundle.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus bundles
+ *
+ * Copyright 2014 Google Inc.
+ * Copyright 2014 Linaro Ltd.
+ */
+
+#ifndef __BUNDLE_H
+#define __BUNDLE_H
+
+#include <linux/list.h>
+
+#define	BUNDLE_ID_NONE	U8_MAX
+
+/* Greybus "public" definitions" */
+struct gb_bundle {
+	struct device		dev;
+	struct gb_interface	*intf;
+
+	u8			id;
+	u8			class;
+	u8			class_major;
+	u8			class_minor;
+
+	size_t			num_cports;
+	struct greybus_descriptor_cport *cport_desc;
+
+	struct list_head	connections;
+	u8			*state;
+
+	struct list_head	links;	/* interface->bundles */
+};
+#define to_gb_bundle(d) container_of(d, struct gb_bundle, dev)
+
+/* Greybus "private" definitions" */
+struct gb_bundle *gb_bundle_create(struct gb_interface *intf, u8 bundle_id,
+				   u8 class);
+int gb_bundle_add(struct gb_bundle *bundle);
+void gb_bundle_destroy(struct gb_bundle *bundle);
+
+/* Bundle Runtime PM wrappers */
+#ifdef CONFIG_PM
+static inline int gb_pm_runtime_get_sync(struct gb_bundle *bundle)
+{
+	int retval;
+
+	retval = pm_runtime_get_sync(&bundle->dev);
+	if (retval < 0) {
+		dev_err(&bundle->dev,
+			"pm_runtime_get_sync failed: %d\n", retval);
+		pm_runtime_put_noidle(&bundle->dev);
+		return retval;
+	}
+
+	return 0;
+}
+
+static inline int gb_pm_runtime_put_autosuspend(struct gb_bundle *bundle)
+{
+	int retval;
+
+	pm_runtime_mark_last_busy(&bundle->dev);
+	retval = pm_runtime_put_autosuspend(&bundle->dev);
+
+	return retval;
+}
+
+static inline void gb_pm_runtime_get_noresume(struct gb_bundle *bundle)
+{
+	pm_runtime_get_noresume(&bundle->dev);
+}
+
+static inline void gb_pm_runtime_put_noidle(struct gb_bundle *bundle)
+{
+	pm_runtime_put_noidle(&bundle->dev);
+}
+
+#else
+static inline int gb_pm_runtime_get_sync(struct gb_bundle *bundle)
+{ return 0; }
+static inline int gb_pm_runtime_put_autosuspend(struct gb_bundle *bundle)
+{ return 0; }
+
+static inline void gb_pm_runtime_get_noresume(struct gb_bundle *bundle) {}
+static inline void gb_pm_runtime_put_noidle(struct gb_bundle *bundle) {}
+#endif
+
+#endif /* __BUNDLE_H */
diff --git a/include/linux/greybus/connection.h b/include/linux/greybus/connection.h
new file mode 100644
index 000000000000..5ca3befc0636
--- /dev/null
+++ b/include/linux/greybus/connection.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus connections
+ *
+ * Copyright 2014 Google Inc.
+ * Copyright 2014 Linaro Ltd.
+ */
+
+#ifndef __CONNECTION_H
+#define __CONNECTION_H
+
+#include <linux/list.h>
+#include <linux/kfifo.h>
+
+#define GB_CONNECTION_FLAG_CSD		BIT(0)
+#define GB_CONNECTION_FLAG_NO_FLOWCTRL	BIT(1)
+#define GB_CONNECTION_FLAG_OFFLOADED	BIT(2)
+#define GB_CONNECTION_FLAG_CDSI1	BIT(3)
+#define GB_CONNECTION_FLAG_CONTROL	BIT(4)
+#define GB_CONNECTION_FLAG_HIGH_PRIO	BIT(5)
+
+#define GB_CONNECTION_FLAG_CORE_MASK	GB_CONNECTION_FLAG_CONTROL
+
+enum gb_connection_state {
+	GB_CONNECTION_STATE_DISABLED		= 0,
+	GB_CONNECTION_STATE_ENABLED_TX		= 1,
+	GB_CONNECTION_STATE_ENABLED		= 2,
+	GB_CONNECTION_STATE_DISCONNECTING	= 3,
+};
+
+struct gb_operation;
+
+typedef int (*gb_request_handler_t)(struct gb_operation *);
+
+struct gb_connection {
+	struct gb_host_device		*hd;
+	struct gb_interface		*intf;
+	struct gb_bundle		*bundle;
+	struct kref			kref;
+	u16				hd_cport_id;
+	u16				intf_cport_id;
+
+	struct list_head		hd_links;
+	struct list_head		bundle_links;
+
+	gb_request_handler_t		handler;
+	unsigned long			flags;
+
+	struct mutex			mutex;
+	spinlock_t			lock;
+	enum gb_connection_state	state;
+	struct list_head		operations;
+
+	char				name[16];
+	struct workqueue_struct		*wq;
+
+	atomic_t			op_cycle;
+
+	void				*private;
+
+	bool				mode_switch;
+};
+
+struct gb_connection *gb_connection_create_static(struct gb_host_device *hd,
+				u16 hd_cport_id, gb_request_handler_t handler);
+struct gb_connection *gb_connection_create_control(struct gb_interface *intf);
+struct gb_connection *gb_connection_create(struct gb_bundle *bundle,
+				u16 cport_id, gb_request_handler_t handler);
+struct gb_connection *gb_connection_create_flags(struct gb_bundle *bundle,
+				u16 cport_id, gb_request_handler_t handler,
+				unsigned long flags);
+struct gb_connection *gb_connection_create_offloaded(struct gb_bundle *bundle,
+				u16 cport_id, unsigned long flags);
+void gb_connection_destroy(struct gb_connection *connection);
+
+static inline bool gb_connection_is_static(struct gb_connection *connection)
+{
+	return !connection->intf;
+}
+
+int gb_connection_enable(struct gb_connection *connection);
+int gb_connection_enable_tx(struct gb_connection *connection);
+void gb_connection_disable_rx(struct gb_connection *connection);
+void gb_connection_disable(struct gb_connection *connection);
+void gb_connection_disable_forced(struct gb_connection *connection);
+
+void gb_connection_mode_switch_prepare(struct gb_connection *connection);
+void gb_connection_mode_switch_complete(struct gb_connection *connection);
+
+void greybus_data_rcvd(struct gb_host_device *hd, u16 cport_id,
+		       u8 *data, size_t length);
+
+void gb_connection_latency_tag_enable(struct gb_connection *connection);
+void gb_connection_latency_tag_disable(struct gb_connection *connection);
+
+static inline bool gb_connection_e2efc_enabled(struct gb_connection *connection)
+{
+	return !(connection->flags & GB_CONNECTION_FLAG_CSD);
+}
+
+static inline bool
+gb_connection_flow_control_disabled(struct gb_connection *connection)
+{
+	return connection->flags & GB_CONNECTION_FLAG_NO_FLOWCTRL;
+}
+
+static inline bool gb_connection_is_offloaded(struct gb_connection *connection)
+{
+	return connection->flags & GB_CONNECTION_FLAG_OFFLOADED;
+}
+
+static inline bool gb_connection_is_control(struct gb_connection *connection)
+{
+	return connection->flags & GB_CONNECTION_FLAG_CONTROL;
+}
+
+static inline void *gb_connection_get_data(struct gb_connection *connection)
+{
+	return connection->private;
+}
+
+static inline void gb_connection_set_data(struct gb_connection *connection,
+					  void *data)
+{
+	connection->private = data;
+}
+
+#endif /* __CONNECTION_H */
diff --git a/include/linux/greybus/control.h b/include/linux/greybus/control.h
new file mode 100644
index 000000000000..3a29ec05f631
--- /dev/null
+++ b/include/linux/greybus/control.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus CPort control protocol
+ *
+ * Copyright 2015 Google Inc.
+ * Copyright 2015 Linaro Ltd.
+ */
+
+#ifndef __CONTROL_H
+#define __CONTROL_H
+
+struct gb_control {
+	struct device dev;
+	struct gb_interface *intf;
+
+	struct gb_connection *connection;
+
+	u8 protocol_major;
+	u8 protocol_minor;
+
+	bool has_bundle_activate;
+	bool has_bundle_version;
+
+	char *vendor_string;
+	char *product_string;
+};
+#define to_gb_control(d) container_of(d, struct gb_control, dev)
+
+struct gb_control *gb_control_create(struct gb_interface *intf);
+int gb_control_enable(struct gb_control *control);
+void gb_control_disable(struct gb_control *control);
+int gb_control_suspend(struct gb_control *control);
+int gb_control_resume(struct gb_control *control);
+int gb_control_add(struct gb_control *control);
+void gb_control_del(struct gb_control *control);
+struct gb_control *gb_control_get(struct gb_control *control);
+void gb_control_put(struct gb_control *control);
+
+int gb_control_get_bundle_versions(struct gb_control *control);
+int gb_control_connected_operation(struct gb_control *control, u16 cport_id);
+int gb_control_disconnected_operation(struct gb_control *control, u16 cport_id);
+int gb_control_disconnecting_operation(struct gb_control *control,
+				       u16 cport_id);
+int gb_control_mode_switch_operation(struct gb_control *control);
+void gb_control_mode_switch_prepare(struct gb_control *control);
+void gb_control_mode_switch_complete(struct gb_control *control);
+int gb_control_get_manifest_size_operation(struct gb_interface *intf);
+int gb_control_get_manifest_operation(struct gb_interface *intf, void *manifest,
+				      size_t size);
+int gb_control_bundle_suspend(struct gb_control *control, u8 bundle_id);
+int gb_control_bundle_resume(struct gb_control *control, u8 bundle_id);
+int gb_control_bundle_deactivate(struct gb_control *control, u8 bundle_id);
+int gb_control_bundle_activate(struct gb_control *control, u8 bundle_id);
+int gb_control_interface_suspend_prepare(struct gb_control *control);
+int gb_control_interface_deactivate_prepare(struct gb_control *control);
+int gb_control_interface_hibernate_abort(struct gb_control *control);
+#endif /* __CONTROL_H */
diff --git a/include/linux/greybus/greybus_id.h b/include/linux/greybus/greybus_id.h
new file mode 100644
index 000000000000..f4c8440093e4
--- /dev/null
+++ b/include/linux/greybus/greybus_id.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* FIXME
+ * move this to include/linux/mod_devicetable.h when merging
+ */
+
+#ifndef __LINUX_GREYBUS_ID_H
+#define __LINUX_GREYBUS_ID_H
+
+#include <linux/types.h>
+#include <linux/mod_devicetable.h>
+
+
+struct greybus_bundle_id {
+	__u16	match_flags;
+	__u32	vendor;
+	__u32	product;
+	__u8	class;
+
+	kernel_ulong_t	driver_info __aligned(sizeof(kernel_ulong_t));
+};
+
+/* Used to match the greybus_bundle_id */
+#define GREYBUS_ID_MATCH_VENDOR		BIT(0)
+#define GREYBUS_ID_MATCH_PRODUCT	BIT(1)
+#define GREYBUS_ID_MATCH_CLASS		BIT(2)
+
+#endif /* __LINUX_GREYBUS_ID_H */
diff --git a/include/linux/greybus/greybus_manifest.h b/include/linux/greybus/greybus_manifest.h
new file mode 100644
index 000000000000..db68f7e7d5a7
--- /dev/null
+++ b/include/linux/greybus/greybus_manifest.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus manifest definition
+ *
+ * See "Greybus Application Protocol" document (version 0.1) for
+ * details on these values and structures.
+ *
+ * Copyright 2014-2015 Google Inc.
+ * Copyright 2014-2015 Linaro Ltd.
+ *
+ * Released under the GPLv2 and BSD licenses.
+ */
+
+#ifndef __GREYBUS_MANIFEST_H
+#define __GREYBUS_MANIFEST_H
+
+enum greybus_descriptor_type {
+	GREYBUS_TYPE_INVALID		= 0x00,
+	GREYBUS_TYPE_INTERFACE		= 0x01,
+	GREYBUS_TYPE_STRING		= 0x02,
+	GREYBUS_TYPE_BUNDLE		= 0x03,
+	GREYBUS_TYPE_CPORT		= 0x04,
+};
+
+enum greybus_protocol {
+	GREYBUS_PROTOCOL_CONTROL	= 0x00,
+	/* 0x01 is unused */
+	GREYBUS_PROTOCOL_GPIO		= 0x02,
+	GREYBUS_PROTOCOL_I2C		= 0x03,
+	GREYBUS_PROTOCOL_UART		= 0x04,
+	GREYBUS_PROTOCOL_HID		= 0x05,
+	GREYBUS_PROTOCOL_USB		= 0x06,
+	GREYBUS_PROTOCOL_SDIO		= 0x07,
+	GREYBUS_PROTOCOL_POWER_SUPPLY	= 0x08,
+	GREYBUS_PROTOCOL_PWM		= 0x09,
+	/* 0x0a is unused */
+	GREYBUS_PROTOCOL_SPI		= 0x0b,
+	GREYBUS_PROTOCOL_DISPLAY	= 0x0c,
+	GREYBUS_PROTOCOL_CAMERA_MGMT	= 0x0d,
+	GREYBUS_PROTOCOL_SENSOR		= 0x0e,
+	GREYBUS_PROTOCOL_LIGHTS		= 0x0f,
+	GREYBUS_PROTOCOL_VIBRATOR	= 0x10,
+	GREYBUS_PROTOCOL_LOOPBACK	= 0x11,
+	GREYBUS_PROTOCOL_AUDIO_MGMT	= 0x12,
+	GREYBUS_PROTOCOL_AUDIO_DATA	= 0x13,
+	GREYBUS_PROTOCOL_SVC            = 0x14,
+	GREYBUS_PROTOCOL_BOOTROM	= 0x15,
+	GREYBUS_PROTOCOL_CAMERA_DATA	= 0x16,
+	GREYBUS_PROTOCOL_FW_DOWNLOAD	= 0x17,
+	GREYBUS_PROTOCOL_FW_MANAGEMENT	= 0x18,
+	GREYBUS_PROTOCOL_AUTHENTICATION	= 0x19,
+	GREYBUS_PROTOCOL_LOG		= 0x1a,
+		/* ... */
+	GREYBUS_PROTOCOL_RAW		= 0xfe,
+	GREYBUS_PROTOCOL_VENDOR		= 0xff,
+};
+
+enum greybus_class_type {
+	GREYBUS_CLASS_CONTROL		= 0x00,
+	/* 0x01 is unused */
+	/* 0x02 is unused */
+	/* 0x03 is unused */
+	/* 0x04 is unused */
+	GREYBUS_CLASS_HID		= 0x05,
+	/* 0x06 is unused */
+	/* 0x07 is unused */
+	GREYBUS_CLASS_POWER_SUPPLY	= 0x08,
+	/* 0x09 is unused */
+	GREYBUS_CLASS_BRIDGED_PHY	= 0x0a,
+	/* 0x0b is unused */
+	GREYBUS_CLASS_DISPLAY		= 0x0c,
+	GREYBUS_CLASS_CAMERA		= 0x0d,
+	GREYBUS_CLASS_SENSOR		= 0x0e,
+	GREYBUS_CLASS_LIGHTS		= 0x0f,
+	GREYBUS_CLASS_VIBRATOR		= 0x10,
+	GREYBUS_CLASS_LOOPBACK		= 0x11,
+	GREYBUS_CLASS_AUDIO		= 0x12,
+	/* 0x13 is unused */
+	/* 0x14 is unused */
+	GREYBUS_CLASS_BOOTROM		= 0x15,
+	GREYBUS_CLASS_FW_MANAGEMENT	= 0x16,
+	GREYBUS_CLASS_LOG		= 0x17,
+		/* ... */
+	GREYBUS_CLASS_RAW		= 0xfe,
+	GREYBUS_CLASS_VENDOR		= 0xff,
+};
+
+enum {
+	GREYBUS_INTERFACE_FEATURE_TIMESYNC = BIT(0),
+};
+
+/*
+ * The string in a string descriptor is not NUL-terminated.  The
+ * size of the descriptor will be rounded up to a multiple of 4
+ * bytes, by padding the string with 0x00 bytes if necessary.
+ */
+struct greybus_descriptor_string {
+	__u8	length;
+	__u8	id;
+	__u8	string[0];
+} __packed;
+
+/*
+ * An interface descriptor describes information about an interface as a whole,
+ * *not* the functions within it.
+ */
+struct greybus_descriptor_interface {
+	__u8	vendor_stringid;
+	__u8	product_stringid;
+	__u8	features;
+	__u8	pad;
+} __packed;
+
+/*
+ * An bundle descriptor defines an identification number and a class for
+ * each bundle.
+ *
+ * @id: Uniquely identifies a bundle within a interface, its sole purpose is to
+ * allow CPort descriptors to specify which bundle they are associated with.
+ * The first bundle will have id 0, second will have 1 and so on.
+ *
+ * The largest CPort id associated with an bundle (defined by a
+ * CPort descriptor in the manifest) is used to determine how to
+ * encode the device id and module number in UniPro packets
+ * that use the bundle.
+ *
+ * @class: It is used by kernel to know the functionality provided by the
+ * bundle and will be matched against drivers functinality while probing greybus
+ * driver. It should contain one of the values defined in
+ * 'enum greybus_class_type'.
+ *
+ */
+struct greybus_descriptor_bundle {
+	__u8	id;	/* interface-relative id (0..) */
+	__u8	class;
+	__u8	pad[2];
+} __packed;
+
+/*
+ * A CPort descriptor indicates the id of the bundle within the
+ * module it's associated with, along with the CPort id used to
+ * address the CPort.  The protocol id defines the format of messages
+ * exchanged using the CPort.
+ */
+struct greybus_descriptor_cport {
+	__le16	id;
+	__u8	bundle;
+	__u8	protocol_id;	/* enum greybus_protocol */
+} __packed;
+
+struct greybus_descriptor_header {
+	__le16	size;
+	__u8	type;		/* enum greybus_descriptor_type */
+	__u8	pad;
+} __packed;
+
+struct greybus_descriptor {
+	struct greybus_descriptor_header		header;
+	union {
+		struct greybus_descriptor_string	string;
+		struct greybus_descriptor_interface	interface;
+		struct greybus_descriptor_bundle	bundle;
+		struct greybus_descriptor_cport		cport;
+	};
+} __packed;
+
+struct greybus_manifest_header {
+	__le16	size;
+	__u8	version_major;
+	__u8	version_minor;
+} __packed;
+
+struct greybus_manifest {
+	struct greybus_manifest_header		header;
+	struct greybus_descriptor		descriptors[0];
+} __packed;
+
+#endif /* __GREYBUS_MANIFEST_H */
diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h
new file mode 100644
index 000000000000..5f34d1effb59
--- /dev/null
+++ b/include/linux/greybus/greybus_protocols.h
@@ -0,0 +1,2176 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2014 - 2015 Google Inc. All rights reserved.
+ * Copyright(c) 2014 - 2015 Linaro Ltd. All rights reserved.
+ */
+
+#ifndef __GREYBUS_PROTOCOLS_H
+#define __GREYBUS_PROTOCOLS_H
+
+/* Fixed IDs for control/svc protocols */
+
+/* SVC switch-port device ids */
+#define GB_SVC_DEVICE_ID_SVC			0
+#define GB_SVC_DEVICE_ID_AP			1
+#define GB_SVC_DEVICE_ID_MIN			2
+#define GB_SVC_DEVICE_ID_MAX			31
+
+#define GB_SVC_CPORT_ID				0
+#define GB_CONTROL_BUNDLE_ID			0
+#define GB_CONTROL_CPORT_ID			0
+
+
+/*
+ * All operation messages (both requests and responses) begin with
+ * a header that encodes the size of the message (header included).
+ * This header also contains a unique identifier, that associates a
+ * response message with its operation.  The header contains an
+ * operation type field, whose interpretation is dependent on what
+ * type of protocol is used over the connection.  The high bit
+ * (0x80) of the operation type field is used to indicate whether
+ * the message is a request (clear) or a response (set).
+ *
+ * Response messages include an additional result byte, which
+ * communicates the result of the corresponding request.  A zero
+ * result value means the operation completed successfully.  Any
+ * other value indicates an error; in this case, the payload of the
+ * response message (if any) is ignored.  The result byte must be
+ * zero in the header for a request message.
+ *
+ * The wire format for all numeric fields in the header is little
+ * endian.  Any operation-specific data begins immediately after the
+ * header.
+ */
+struct gb_operation_msg_hdr {
+	__le16	size;		/* Size in bytes of header + payload */
+	__le16	operation_id;	/* Operation unique id */
+	__u8	type;		/* E.g GB_I2C_TYPE_* or GB_GPIO_TYPE_* */
+	__u8	result;		/* Result of request (in responses only) */
+	__u8	pad[2];		/* must be zero (ignore when read) */
+} __packed;
+
+
+/* Generic request types */
+#define GB_REQUEST_TYPE_CPORT_SHUTDOWN		0x00
+#define GB_REQUEST_TYPE_INVALID			0x7f
+
+struct gb_cport_shutdown_request {
+	__u8 phase;
+} __packed;
+
+
+/* Control Protocol */
+
+/* Greybus control request types */
+#define GB_CONTROL_TYPE_VERSION			0x01
+#define GB_CONTROL_TYPE_PROBE_AP		0x02
+#define GB_CONTROL_TYPE_GET_MANIFEST_SIZE	0x03
+#define GB_CONTROL_TYPE_GET_MANIFEST		0x04
+#define GB_CONTROL_TYPE_CONNECTED		0x05
+#define GB_CONTROL_TYPE_DISCONNECTED		0x06
+#define GB_CONTROL_TYPE_TIMESYNC_ENABLE		0x07
+#define GB_CONTROL_TYPE_TIMESYNC_DISABLE	0x08
+#define GB_CONTROL_TYPE_TIMESYNC_AUTHORITATIVE	0x09
+/*	Unused					0x0a */
+#define GB_CONTROL_TYPE_BUNDLE_VERSION		0x0b
+#define GB_CONTROL_TYPE_DISCONNECTING		0x0c
+#define GB_CONTROL_TYPE_TIMESYNC_GET_LAST_EVENT	0x0d
+#define GB_CONTROL_TYPE_MODE_SWITCH		0x0e
+#define GB_CONTROL_TYPE_BUNDLE_SUSPEND		0x0f
+#define GB_CONTROL_TYPE_BUNDLE_RESUME		0x10
+#define GB_CONTROL_TYPE_BUNDLE_DEACTIVATE	0x11
+#define GB_CONTROL_TYPE_BUNDLE_ACTIVATE		0x12
+#define GB_CONTROL_TYPE_INTF_SUSPEND_PREPARE		0x13
+#define GB_CONTROL_TYPE_INTF_DEACTIVATE_PREPARE	0x14
+#define GB_CONTROL_TYPE_INTF_HIBERNATE_ABORT	0x15
+
+struct gb_control_version_request {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+struct gb_control_version_response {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+struct gb_control_bundle_version_request {
+	__u8	bundle_id;
+} __packed;
+
+struct gb_control_bundle_version_response {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+/* Control protocol manifest get size request has no payload*/
+struct gb_control_get_manifest_size_response {
+	__le16			size;
+} __packed;
+
+/* Control protocol manifest get request has no payload */
+struct gb_control_get_manifest_response {
+	__u8			data[0];
+} __packed;
+
+/* Control protocol [dis]connected request */
+struct gb_control_connected_request {
+	__le16			cport_id;
+} __packed;
+
+struct gb_control_disconnecting_request {
+	__le16			cport_id;
+} __packed;
+/* disconnecting response has no payload */
+
+struct gb_control_disconnected_request {
+	__le16			cport_id;
+} __packed;
+/* Control protocol [dis]connected response has no payload */
+
+/*
+ * All Bundle power management operations use the same request and response
+ * layout and status codes.
+ */
+
+#define GB_CONTROL_BUNDLE_PM_OK		0x00
+#define GB_CONTROL_BUNDLE_PM_INVAL	0x01
+#define GB_CONTROL_BUNDLE_PM_BUSY	0x02
+#define GB_CONTROL_BUNDLE_PM_FAIL	0x03
+#define GB_CONTROL_BUNDLE_PM_NA		0x04
+
+struct gb_control_bundle_pm_request {
+	__u8	bundle_id;
+} __packed;
+
+struct gb_control_bundle_pm_response {
+	__u8	status;
+} __packed;
+
+/*
+ * Interface Suspend Prepare and Deactivate Prepare operations use the same
+ * response layout and error codes. Define a single response structure and reuse
+ * it. Both operations have no payload.
+ */
+
+#define GB_CONTROL_INTF_PM_OK		0x00
+#define GB_CONTROL_INTF_PM_BUSY		0x01
+#define GB_CONTROL_INTF_PM_NA		0x02
+
+struct gb_control_intf_pm_response {
+	__u8	status;
+} __packed;
+
+/* APBridge protocol */
+
+/* request APB1 log */
+#define GB_APB_REQUEST_LOG			0x02
+
+/* request to map a cport to bulk in and bulk out endpoints */
+#define GB_APB_REQUEST_EP_MAPPING		0x03
+
+/* request to get the number of cports available */
+#define GB_APB_REQUEST_CPORT_COUNT		0x04
+
+/* request to reset a cport state */
+#define GB_APB_REQUEST_RESET_CPORT		0x05
+
+/* request to time the latency of messages on a given cport */
+#define GB_APB_REQUEST_LATENCY_TAG_EN		0x06
+#define GB_APB_REQUEST_LATENCY_TAG_DIS		0x07
+
+/* request to control the CSI transmitter */
+#define GB_APB_REQUEST_CSI_TX_CONTROL		0x08
+
+/* request to control audio streaming */
+#define GB_APB_REQUEST_AUDIO_CONTROL		0x09
+
+/* TimeSync requests */
+#define GB_APB_REQUEST_TIMESYNC_ENABLE		0x0d
+#define GB_APB_REQUEST_TIMESYNC_DISABLE		0x0e
+#define GB_APB_REQUEST_TIMESYNC_AUTHORITATIVE	0x0f
+#define GB_APB_REQUEST_TIMESYNC_GET_LAST_EVENT	0x10
+
+/* requests to set Greybus CPort flags */
+#define GB_APB_REQUEST_CPORT_FLAGS		0x11
+
+/* ARPC request */
+#define GB_APB_REQUEST_ARPC_RUN			0x12
+
+struct gb_apb_request_cport_flags {
+	__le32	flags;
+#define GB_APB_CPORT_FLAG_CONTROL		0x01
+#define GB_APB_CPORT_FLAG_HIGH_PRIO		0x02
+} __packed;
+
+
+/* Firmware Download Protocol */
+
+/* Request Types */
+#define GB_FW_DOWNLOAD_TYPE_FIND_FIRMWARE	0x01
+#define GB_FW_DOWNLOAD_TYPE_FETCH_FIRMWARE	0x02
+#define GB_FW_DOWNLOAD_TYPE_RELEASE_FIRMWARE	0x03
+
+#define GB_FIRMWARE_TAG_MAX_SIZE		10
+
+/* firmware download find firmware request/response */
+struct gb_fw_download_find_firmware_request {
+	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
+} __packed;
+
+struct gb_fw_download_find_firmware_response {
+	__u8			firmware_id;
+	__le32			size;
+} __packed;
+
+/* firmware download fetch firmware request/response */
+struct gb_fw_download_fetch_firmware_request {
+	__u8			firmware_id;
+	__le32			offset;
+	__le32			size;
+} __packed;
+
+struct gb_fw_download_fetch_firmware_response {
+	__u8			data[0];
+} __packed;
+
+/* firmware download release firmware request */
+struct gb_fw_download_release_firmware_request {
+	__u8			firmware_id;
+} __packed;
+/* firmware download release firmware response has no payload */
+
+
+/* Firmware Management Protocol */
+
+/* Request Types */
+#define GB_FW_MGMT_TYPE_INTERFACE_FW_VERSION	0x01
+#define GB_FW_MGMT_TYPE_LOAD_AND_VALIDATE_FW	0x02
+#define GB_FW_MGMT_TYPE_LOADED_FW		0x03
+#define GB_FW_MGMT_TYPE_BACKEND_FW_VERSION	0x04
+#define GB_FW_MGMT_TYPE_BACKEND_FW_UPDATE	0x05
+#define GB_FW_MGMT_TYPE_BACKEND_FW_UPDATED	0x06
+
+#define GB_FW_LOAD_METHOD_UNIPRO		0x01
+#define GB_FW_LOAD_METHOD_INTERNAL		0x02
+
+#define GB_FW_LOAD_STATUS_FAILED		0x00
+#define GB_FW_LOAD_STATUS_UNVALIDATED		0x01
+#define GB_FW_LOAD_STATUS_VALIDATED		0x02
+#define GB_FW_LOAD_STATUS_VALIDATION_FAILED	0x03
+
+#define GB_FW_BACKEND_FW_STATUS_SUCCESS		0x01
+#define GB_FW_BACKEND_FW_STATUS_FAIL_FIND	0x02
+#define GB_FW_BACKEND_FW_STATUS_FAIL_FETCH	0x03
+#define GB_FW_BACKEND_FW_STATUS_FAIL_WRITE	0x04
+#define GB_FW_BACKEND_FW_STATUS_INT		0x05
+#define GB_FW_BACKEND_FW_STATUS_RETRY		0x06
+#define GB_FW_BACKEND_FW_STATUS_NOT_SUPPORTED	0x07
+
+#define GB_FW_BACKEND_VERSION_STATUS_SUCCESS		0x01
+#define GB_FW_BACKEND_VERSION_STATUS_NOT_AVAILABLE	0x02
+#define GB_FW_BACKEND_VERSION_STATUS_NOT_SUPPORTED	0x03
+#define GB_FW_BACKEND_VERSION_STATUS_RETRY		0x04
+#define GB_FW_BACKEND_VERSION_STATUS_FAIL_INT		0x05
+
+/* firmware management interface firmware version request has no payload */
+struct gb_fw_mgmt_interface_fw_version_response {
+	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
+	__le16			major;
+	__le16			minor;
+} __packed;
+
+/* firmware management load and validate firmware request/response */
+struct gb_fw_mgmt_load_and_validate_fw_request {
+	__u8			request_id;
+	__u8			load_method;
+	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
+} __packed;
+/* firmware management load and validate firmware response has no payload*/
+
+/* firmware management loaded firmware request */
+struct gb_fw_mgmt_loaded_fw_request {
+	__u8			request_id;
+	__u8			status;
+	__le16			major;
+	__le16			minor;
+} __packed;
+/* firmware management loaded firmware response has no payload */
+
+/* firmware management backend firmware version request/response */
+struct gb_fw_mgmt_backend_fw_version_request {
+	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
+} __packed;
+
+struct gb_fw_mgmt_backend_fw_version_response {
+	__le16			major;
+	__le16			minor;
+	__u8			status;
+} __packed;
+
+/* firmware management backend firmware update request */
+struct gb_fw_mgmt_backend_fw_update_request {
+	__u8			request_id;
+	__u8			firmware_tag[GB_FIRMWARE_TAG_MAX_SIZE];
+} __packed;
+/* firmware management backend firmware update response has no payload */
+
+/* firmware management backend firmware updated request */
+struct gb_fw_mgmt_backend_fw_updated_request {
+	__u8			request_id;
+	__u8			status;
+} __packed;
+/* firmware management backend firmware updated response has no payload */
+
+
+/* Component Authentication Protocol (CAP) */
+
+/* Request Types */
+#define GB_CAP_TYPE_GET_ENDPOINT_UID	0x01
+#define GB_CAP_TYPE_GET_IMS_CERTIFICATE	0x02
+#define GB_CAP_TYPE_AUTHENTICATE	0x03
+
+/* CAP get endpoint uid request has no payload */
+struct gb_cap_get_endpoint_uid_response {
+	__u8			uid[8];
+} __packed;
+
+/* CAP get endpoint ims certificate request/response */
+struct gb_cap_get_ims_certificate_request {
+	__le32			certificate_class;
+	__le32			certificate_id;
+} __packed;
+
+struct gb_cap_get_ims_certificate_response {
+	__u8			result_code;
+	__u8			certificate[0];
+} __packed;
+
+/* CAP authenticate request/response */
+struct gb_cap_authenticate_request {
+	__le32			auth_type;
+	__u8			uid[8];
+	__u8			challenge[32];
+} __packed;
+
+struct gb_cap_authenticate_response {
+	__u8			result_code;
+	__u8			response[64];
+	__u8			signature[0];
+} __packed;
+
+
+/* Bootrom Protocol */
+
+/* Version of the Greybus bootrom protocol we support */
+#define GB_BOOTROM_VERSION_MAJOR		0x00
+#define GB_BOOTROM_VERSION_MINOR		0x01
+
+/* Greybus bootrom request types */
+#define GB_BOOTROM_TYPE_VERSION			0x01
+#define GB_BOOTROM_TYPE_FIRMWARE_SIZE		0x02
+#define GB_BOOTROM_TYPE_GET_FIRMWARE		0x03
+#define GB_BOOTROM_TYPE_READY_TO_BOOT		0x04
+#define GB_BOOTROM_TYPE_AP_READY		0x05	/* Request with no-payload */
+#define GB_BOOTROM_TYPE_GET_VID_PID		0x06	/* Request with no-payload */
+
+/* Greybus bootrom boot stages */
+#define GB_BOOTROM_BOOT_STAGE_ONE		0x01 /* Reserved for the boot ROM */
+#define GB_BOOTROM_BOOT_STAGE_TWO		0x02 /* Bootrom package to be loaded by the boot ROM */
+#define GB_BOOTROM_BOOT_STAGE_THREE		0x03 /* Module personality package loaded by Stage 2 firmware */
+
+/* Greybus bootrom ready to boot status */
+#define GB_BOOTROM_BOOT_STATUS_INVALID		0x00 /* Firmware blob could not be validated */
+#define GB_BOOTROM_BOOT_STATUS_INSECURE		0x01 /* Firmware blob is valid but insecure */
+#define GB_BOOTROM_BOOT_STATUS_SECURE		0x02 /* Firmware blob is valid and secure */
+
+/* Max bootrom data fetch size in bytes */
+#define GB_BOOTROM_FETCH_MAX			2000
+
+struct gb_bootrom_version_request {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+struct gb_bootrom_version_response {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+/* Bootrom protocol firmware size request/response */
+struct gb_bootrom_firmware_size_request {
+	__u8			stage;
+} __packed;
+
+struct gb_bootrom_firmware_size_response {
+	__le32			size;
+} __packed;
+
+/* Bootrom protocol get firmware request/response */
+struct gb_bootrom_get_firmware_request {
+	__le32			offset;
+	__le32			size;
+} __packed;
+
+struct gb_bootrom_get_firmware_response {
+	__u8			data[0];
+} __packed;
+
+/* Bootrom protocol Ready to boot request */
+struct gb_bootrom_ready_to_boot_request {
+	__u8			status;
+} __packed;
+/* Bootrom protocol Ready to boot response has no payload */
+
+/* Bootrom protocol get VID/PID request has no payload */
+struct gb_bootrom_get_vid_pid_response {
+	__le32			vendor_id;
+	__le32			product_id;
+} __packed;
+
+
+/* Power Supply */
+
+/* Greybus power supply request types */
+#define GB_POWER_SUPPLY_TYPE_GET_SUPPLIES		0x02
+#define GB_POWER_SUPPLY_TYPE_GET_DESCRIPTION		0x03
+#define GB_POWER_SUPPLY_TYPE_GET_PROP_DESCRIPTORS	0x04
+#define GB_POWER_SUPPLY_TYPE_GET_PROPERTY		0x05
+#define GB_POWER_SUPPLY_TYPE_SET_PROPERTY		0x06
+#define GB_POWER_SUPPLY_TYPE_EVENT			0x07
+
+/* Greybus power supply battery technologies types */
+#define GB_POWER_SUPPLY_TECH_UNKNOWN			0x0000
+#define GB_POWER_SUPPLY_TECH_NiMH			0x0001
+#define GB_POWER_SUPPLY_TECH_LION			0x0002
+#define GB_POWER_SUPPLY_TECH_LIPO			0x0003
+#define GB_POWER_SUPPLY_TECH_LiFe			0x0004
+#define GB_POWER_SUPPLY_TECH_NiCd			0x0005
+#define GB_POWER_SUPPLY_TECH_LiMn			0x0006
+
+/* Greybus power supply types */
+#define GB_POWER_SUPPLY_UNKNOWN_TYPE			0x0000
+#define GB_POWER_SUPPLY_BATTERY_TYPE			0x0001
+#define GB_POWER_SUPPLY_UPS_TYPE			0x0002
+#define GB_POWER_SUPPLY_MAINS_TYPE			0x0003
+#define GB_POWER_SUPPLY_USB_TYPE			0x0004
+#define GB_POWER_SUPPLY_USB_DCP_TYPE			0x0005
+#define GB_POWER_SUPPLY_USB_CDP_TYPE			0x0006
+#define GB_POWER_SUPPLY_USB_ACA_TYPE			0x0007
+
+/* Greybus power supply health values */
+#define GB_POWER_SUPPLY_HEALTH_UNKNOWN			0x0000
+#define GB_POWER_SUPPLY_HEALTH_GOOD			0x0001
+#define GB_POWER_SUPPLY_HEALTH_OVERHEAT			0x0002
+#define GB_POWER_SUPPLY_HEALTH_DEAD			0x0003
+#define GB_POWER_SUPPLY_HEALTH_OVERVOLTAGE		0x0004
+#define GB_POWER_SUPPLY_HEALTH_UNSPEC_FAILURE		0x0005
+#define GB_POWER_SUPPLY_HEALTH_COLD			0x0006
+#define GB_POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE	0x0007
+#define GB_POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE	0x0008
+
+/* Greybus power supply status values */
+#define GB_POWER_SUPPLY_STATUS_UNKNOWN			0x0000
+#define GB_POWER_SUPPLY_STATUS_CHARGING			0x0001
+#define GB_POWER_SUPPLY_STATUS_DISCHARGING		0x0002
+#define GB_POWER_SUPPLY_STATUS_NOT_CHARGING		0x0003
+#define GB_POWER_SUPPLY_STATUS_FULL			0x0004
+
+/* Greybus power supply capacity level values */
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN		0x0000
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL		0x0001
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_LOW		0x0002
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_NORMAL		0x0003
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_HIGH		0x0004
+#define GB_POWER_SUPPLY_CAPACITY_LEVEL_FULL		0x0005
+
+/* Greybus power supply scope values */
+#define GB_POWER_SUPPLY_SCOPE_UNKNOWN			0x0000
+#define GB_POWER_SUPPLY_SCOPE_SYSTEM			0x0001
+#define GB_POWER_SUPPLY_SCOPE_DEVICE			0x0002
+
+struct gb_power_supply_get_supplies_response {
+	__u8	supplies_count;
+} __packed;
+
+struct gb_power_supply_get_description_request {
+	__u8	psy_id;
+} __packed;
+
+struct gb_power_supply_get_description_response {
+	__u8	manufacturer[32];
+	__u8	model[32];
+	__u8	serial_number[32];
+	__le16	type;
+	__u8	properties_count;
+} __packed;
+
+struct gb_power_supply_props_desc {
+	__u8	property;
+#define GB_POWER_SUPPLY_PROP_STATUS				0x00
+#define GB_POWER_SUPPLY_PROP_CHARGE_TYPE			0x01
+#define GB_POWER_SUPPLY_PROP_HEALTH				0x02
+#define GB_POWER_SUPPLY_PROP_PRESENT				0x03
+#define GB_POWER_SUPPLY_PROP_ONLINE				0x04
+#define GB_POWER_SUPPLY_PROP_AUTHENTIC				0x05
+#define GB_POWER_SUPPLY_PROP_TECHNOLOGY				0x06
+#define GB_POWER_SUPPLY_PROP_CYCLE_COUNT			0x07
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_MAX			0x08
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_MIN			0x09
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_MAX_DESIGN			0x0A
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN			0x0B
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_NOW			0x0C
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_AVG			0x0D
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_OCV			0x0E
+#define GB_POWER_SUPPLY_PROP_VOLTAGE_BOOT			0x0F
+#define GB_POWER_SUPPLY_PROP_CURRENT_MAX			0x10
+#define GB_POWER_SUPPLY_PROP_CURRENT_NOW			0x11
+#define GB_POWER_SUPPLY_PROP_CURRENT_AVG			0x12
+#define GB_POWER_SUPPLY_PROP_CURRENT_BOOT			0x13
+#define GB_POWER_SUPPLY_PROP_POWER_NOW				0x14
+#define GB_POWER_SUPPLY_PROP_POWER_AVG				0x15
+#define GB_POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN			0x16
+#define GB_POWER_SUPPLY_PROP_CHARGE_EMPTY_DESIGN		0x17
+#define GB_POWER_SUPPLY_PROP_CHARGE_FULL			0x18
+#define GB_POWER_SUPPLY_PROP_CHARGE_EMPTY			0x19
+#define GB_POWER_SUPPLY_PROP_CHARGE_NOW				0x1A
+#define GB_POWER_SUPPLY_PROP_CHARGE_AVG				0x1B
+#define GB_POWER_SUPPLY_PROP_CHARGE_COUNTER			0x1C
+#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT		0x1D
+#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX	0x1E
+#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE		0x1F
+#define GB_POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX	0x20
+#define GB_POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT		0x21
+#define GB_POWER_SUPPLY_PROP_CHARGE_CONTROL_LIMIT_MAX		0x22
+#define GB_POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT		0x23
+#define GB_POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN			0x24
+#define GB_POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN		0x25
+#define GB_POWER_SUPPLY_PROP_ENERGY_FULL			0x26
+#define GB_POWER_SUPPLY_PROP_ENERGY_EMPTY			0x27
+#define GB_POWER_SUPPLY_PROP_ENERGY_NOW				0x28
+#define GB_POWER_SUPPLY_PROP_ENERGY_AVG				0x29
+#define GB_POWER_SUPPLY_PROP_CAPACITY				0x2A
+#define GB_POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN			0x2B
+#define GB_POWER_SUPPLY_PROP_CAPACITY_ALERT_MAX			0x2C
+#define GB_POWER_SUPPLY_PROP_CAPACITY_LEVEL			0x2D
+#define GB_POWER_SUPPLY_PROP_TEMP				0x2E
+#define GB_POWER_SUPPLY_PROP_TEMP_MAX				0x2F
+#define GB_POWER_SUPPLY_PROP_TEMP_MIN				0x30
+#define GB_POWER_SUPPLY_PROP_TEMP_ALERT_MIN			0x31
+#define GB_POWER_SUPPLY_PROP_TEMP_ALERT_MAX			0x32
+#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT			0x33
+#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN		0x34
+#define GB_POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX		0x35
+#define GB_POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW			0x36
+#define GB_POWER_SUPPLY_PROP_TIME_TO_EMPTY_AVG			0x37
+#define GB_POWER_SUPPLY_PROP_TIME_TO_FULL_NOW			0x38
+#define GB_POWER_SUPPLY_PROP_TIME_TO_FULL_AVG			0x39
+#define GB_POWER_SUPPLY_PROP_TYPE				0x3A
+#define GB_POWER_SUPPLY_PROP_SCOPE				0x3B
+#define GB_POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT		0x3C
+#define GB_POWER_SUPPLY_PROP_CALIBRATE				0x3D
+	__u8	is_writeable;
+} __packed;
+
+struct gb_power_supply_get_property_descriptors_request {
+	__u8	psy_id;
+} __packed;
+
+struct gb_power_supply_get_property_descriptors_response {
+	__u8	properties_count;
+	struct gb_power_supply_props_desc props[];
+} __packed;
+
+struct gb_power_supply_get_property_request {
+	__u8	psy_id;
+	__u8	property;
+} __packed;
+
+struct gb_power_supply_get_property_response {
+	__le32	prop_val;
+};
+
+struct gb_power_supply_set_property_request {
+	__u8	psy_id;
+	__u8	property;
+	__le32	prop_val;
+} __packed;
+
+struct gb_power_supply_event_request {
+	__u8	psy_id;
+	__u8	event;
+#define GB_POWER_SUPPLY_UPDATE		0x01
+} __packed;
+
+
+/* HID */
+
+/* Greybus HID operation types */
+#define GB_HID_TYPE_GET_DESC		0x02
+#define GB_HID_TYPE_GET_REPORT_DESC	0x03
+#define GB_HID_TYPE_PWR_ON		0x04
+#define GB_HID_TYPE_PWR_OFF		0x05
+#define GB_HID_TYPE_GET_REPORT		0x06
+#define GB_HID_TYPE_SET_REPORT		0x07
+#define GB_HID_TYPE_IRQ_EVENT		0x08
+
+/* Report type */
+#define GB_HID_INPUT_REPORT		0
+#define GB_HID_OUTPUT_REPORT		1
+#define GB_HID_FEATURE_REPORT		2
+
+/* Different request/response structures */
+/* HID get descriptor response */
+struct gb_hid_desc_response {
+	__u8				bLength;
+	__le16				wReportDescLength;
+	__le16				bcdHID;
+	__le16				wProductID;
+	__le16				wVendorID;
+	__u8				bCountryCode;
+} __packed;
+
+/* HID get report request/response */
+struct gb_hid_get_report_request {
+	__u8				report_type;
+	__u8				report_id;
+} __packed;
+
+/* HID set report request */
+struct gb_hid_set_report_request {
+	__u8				report_type;
+	__u8				report_id;
+	__u8				report[0];
+} __packed;
+
+/* HID input report request, via interrupt pipe */
+struct gb_hid_input_report_request {
+	__u8				report[0];
+} __packed;
+
+
+/* I2C */
+
+/* Greybus i2c request types */
+#define GB_I2C_TYPE_FUNCTIONALITY	0x02
+#define GB_I2C_TYPE_TRANSFER		0x05
+
+/* functionality request has no payload */
+struct gb_i2c_functionality_response {
+	__le32	functionality;
+} __packed;
+
+/*
+ * Outgoing data immediately follows the op count and ops array.
+ * The data for each write (master -> slave) op in the array is sent
+ * in order, with no (e.g. pad) bytes separating them.
+ *
+ * Short reads cause the entire transfer request to fail So response
+ * payload consists only of bytes read, and the number of bytes is
+ * exactly what was specified in the corresponding op.  Like
+ * outgoing data, the incoming data is in order and contiguous.
+ */
+struct gb_i2c_transfer_op {
+	__le16	addr;
+	__le16	flags;
+	__le16	size;
+} __packed;
+
+struct gb_i2c_transfer_request {
+	__le16				op_count;
+	struct gb_i2c_transfer_op	ops[0];		/* op_count of these */
+} __packed;
+struct gb_i2c_transfer_response {
+	__u8				data[0];	/* inbound data */
+} __packed;
+
+
+/* GPIO */
+
+/* Greybus GPIO request types */
+#define GB_GPIO_TYPE_LINE_COUNT		0x02
+#define GB_GPIO_TYPE_ACTIVATE		0x03
+#define GB_GPIO_TYPE_DEACTIVATE		0x04
+#define GB_GPIO_TYPE_GET_DIRECTION	0x05
+#define GB_GPIO_TYPE_DIRECTION_IN	0x06
+#define GB_GPIO_TYPE_DIRECTION_OUT	0x07
+#define GB_GPIO_TYPE_GET_VALUE		0x08
+#define GB_GPIO_TYPE_SET_VALUE		0x09
+#define GB_GPIO_TYPE_SET_DEBOUNCE	0x0a
+#define GB_GPIO_TYPE_IRQ_TYPE		0x0b
+#define GB_GPIO_TYPE_IRQ_MASK		0x0c
+#define GB_GPIO_TYPE_IRQ_UNMASK		0x0d
+#define GB_GPIO_TYPE_IRQ_EVENT		0x0e
+
+#define GB_GPIO_IRQ_TYPE_NONE		0x00
+#define GB_GPIO_IRQ_TYPE_EDGE_RISING	0x01
+#define GB_GPIO_IRQ_TYPE_EDGE_FALLING	0x02
+#define GB_GPIO_IRQ_TYPE_EDGE_BOTH	0x03
+#define GB_GPIO_IRQ_TYPE_LEVEL_HIGH	0x04
+#define GB_GPIO_IRQ_TYPE_LEVEL_LOW	0x08
+
+/* line count request has no payload */
+struct gb_gpio_line_count_response {
+	__u8	count;
+} __packed;
+
+struct gb_gpio_activate_request {
+	__u8	which;
+} __packed;
+/* activate response has no payload */
+
+struct gb_gpio_deactivate_request {
+	__u8	which;
+} __packed;
+/* deactivate response has no payload */
+
+struct gb_gpio_get_direction_request {
+	__u8	which;
+} __packed;
+struct gb_gpio_get_direction_response {
+	__u8	direction;
+} __packed;
+
+struct gb_gpio_direction_in_request {
+	__u8	which;
+} __packed;
+/* direction in response has no payload */
+
+struct gb_gpio_direction_out_request {
+	__u8	which;
+	__u8	value;
+} __packed;
+/* direction out response has no payload */
+
+struct gb_gpio_get_value_request {
+	__u8	which;
+} __packed;
+struct gb_gpio_get_value_response {
+	__u8	value;
+} __packed;
+
+struct gb_gpio_set_value_request {
+	__u8	which;
+	__u8	value;
+} __packed;
+/* set value response has no payload */
+
+struct gb_gpio_set_debounce_request {
+	__u8	which;
+	__le16	usec;
+} __packed;
+/* debounce response has no payload */
+
+struct gb_gpio_irq_type_request {
+	__u8	which;
+	__u8	type;
+} __packed;
+/* irq type response has no payload */
+
+struct gb_gpio_irq_mask_request {
+	__u8	which;
+} __packed;
+/* irq mask response has no payload */
+
+struct gb_gpio_irq_unmask_request {
+	__u8	which;
+} __packed;
+/* irq unmask response has no payload */
+
+/* irq event requests originate on another module and are handled on the AP */
+struct gb_gpio_irq_event_request {
+	__u8	which;
+} __packed;
+/* irq event has no response */
+
+
+/* PWM */
+
+/* Greybus PWM operation types */
+#define GB_PWM_TYPE_PWM_COUNT		0x02
+#define GB_PWM_TYPE_ACTIVATE		0x03
+#define GB_PWM_TYPE_DEACTIVATE		0x04
+#define GB_PWM_TYPE_CONFIG		0x05
+#define GB_PWM_TYPE_POLARITY		0x06
+#define GB_PWM_TYPE_ENABLE		0x07
+#define GB_PWM_TYPE_DISABLE		0x08
+
+/* pwm count request has no payload */
+struct gb_pwm_count_response {
+	__u8	count;
+} __packed;
+
+struct gb_pwm_activate_request {
+	__u8	which;
+} __packed;
+
+struct gb_pwm_deactivate_request {
+	__u8	which;
+} __packed;
+
+struct gb_pwm_config_request {
+	__u8	which;
+	__le32	duty;
+	__le32	period;
+} __packed;
+
+struct gb_pwm_polarity_request {
+	__u8	which;
+	__u8	polarity;
+} __packed;
+
+struct gb_pwm_enable_request {
+	__u8	which;
+} __packed;
+
+struct gb_pwm_disable_request {
+	__u8	which;
+} __packed;
+
+/* SPI */
+
+/* Should match up with modes in linux/spi/spi.h */
+#define GB_SPI_MODE_CPHA		0x01		/* clock phase */
+#define GB_SPI_MODE_CPOL		0x02		/* clock polarity */
+#define GB_SPI_MODE_MODE_0		(0 | 0)		/* (original MicroWire) */
+#define GB_SPI_MODE_MODE_1		(0 | GB_SPI_MODE_CPHA)
+#define GB_SPI_MODE_MODE_2		(GB_SPI_MODE_CPOL | 0)
+#define GB_SPI_MODE_MODE_3		(GB_SPI_MODE_CPOL | GB_SPI_MODE_CPHA)
+#define GB_SPI_MODE_CS_HIGH		0x04		/* chipselect active high? */
+#define GB_SPI_MODE_LSB_FIRST		0x08		/* per-word bits-on-wire */
+#define GB_SPI_MODE_3WIRE		0x10		/* SI/SO signals shared */
+#define GB_SPI_MODE_LOOP		0x20		/* loopback mode */
+#define GB_SPI_MODE_NO_CS		0x40		/* 1 dev/bus, no chipselect */
+#define GB_SPI_MODE_READY		0x80		/* slave pulls low to pause */
+
+/* Should match up with flags in linux/spi/spi.h */
+#define GB_SPI_FLAG_HALF_DUPLEX		BIT(0)		/* can't do full duplex */
+#define GB_SPI_FLAG_NO_RX		BIT(1)		/* can't do buffer read */
+#define GB_SPI_FLAG_NO_TX		BIT(2)		/* can't do buffer write */
+
+/* Greybus spi operation types */
+#define GB_SPI_TYPE_MASTER_CONFIG	0x02
+#define GB_SPI_TYPE_DEVICE_CONFIG	0x03
+#define GB_SPI_TYPE_TRANSFER		0x04
+
+/* mode request has no payload */
+struct gb_spi_master_config_response {
+	__le32	bits_per_word_mask;
+	__le32	min_speed_hz;
+	__le32	max_speed_hz;
+	__le16	mode;
+	__le16	flags;
+	__u8	num_chipselect;
+} __packed;
+
+struct gb_spi_device_config_request {
+	__u8	chip_select;
+} __packed;
+
+struct gb_spi_device_config_response {
+	__le16	mode;
+	__u8	bits_per_word;
+	__le32	max_speed_hz;
+	__u8	device_type;
+#define GB_SPI_SPI_DEV		0x00
+#define GB_SPI_SPI_NOR		0x01
+#define GB_SPI_SPI_MODALIAS	0x02
+	__u8	name[32];
+} __packed;
+
+/**
+ * struct gb_spi_transfer - a read/write buffer pair
+ * @speed_hz: Select a speed other than the device default for this transfer. If
+ *	0 the default (from @spi_device) is used.
+ * @len: size of rx and tx buffers (in bytes)
+ * @delay_usecs: microseconds to delay after this transfer before (optionally)
+ * 	changing the chipselect status, then starting the next transfer or
+ * 	completing this spi_message.
+ * @cs_change: affects chipselect after this transfer completes
+ * @bits_per_word: select a bits_per_word other than the device default for this
+ *	transfer. If 0 the default (from @spi_device) is used.
+ */
+struct gb_spi_transfer {
+	__le32		speed_hz;
+	__le32		len;
+	__le16		delay_usecs;
+	__u8		cs_change;
+	__u8		bits_per_word;
+	__u8		xfer_flags;
+#define GB_SPI_XFER_READ	0x01
+#define GB_SPI_XFER_WRITE	0x02
+#define GB_SPI_XFER_INPROGRESS	0x04
+} __packed;
+
+struct gb_spi_transfer_request {
+	__u8			chip_select;	/* of the spi device */
+	__u8			mode;		/* of the spi device */
+	__le16			count;
+	struct gb_spi_transfer	transfers[0];	/* count of these */
+} __packed;
+
+struct gb_spi_transfer_response {
+	__u8			data[0];	/* inbound data */
+} __packed;
+
+/* Version of the Greybus SVC protocol we support */
+#define GB_SVC_VERSION_MAJOR		0x00
+#define GB_SVC_VERSION_MINOR		0x01
+
+/* Greybus SVC request types */
+#define GB_SVC_TYPE_PROTOCOL_VERSION		0x01
+#define GB_SVC_TYPE_SVC_HELLO			0x02
+#define GB_SVC_TYPE_INTF_DEVICE_ID		0x03
+#define GB_SVC_TYPE_INTF_RESET			0x06
+#define GB_SVC_TYPE_CONN_CREATE			0x07
+#define GB_SVC_TYPE_CONN_DESTROY		0x08
+#define GB_SVC_TYPE_DME_PEER_GET		0x09
+#define GB_SVC_TYPE_DME_PEER_SET		0x0a
+#define GB_SVC_TYPE_ROUTE_CREATE		0x0b
+#define GB_SVC_TYPE_ROUTE_DESTROY		0x0c
+#define GB_SVC_TYPE_TIMESYNC_ENABLE		0x0d
+#define GB_SVC_TYPE_TIMESYNC_DISABLE		0x0e
+#define GB_SVC_TYPE_TIMESYNC_AUTHORITATIVE	0x0f
+#define GB_SVC_TYPE_INTF_SET_PWRM		0x10
+#define GB_SVC_TYPE_INTF_EJECT			0x11
+#define GB_SVC_TYPE_PING			0x13
+#define GB_SVC_TYPE_PWRMON_RAIL_COUNT_GET	0x14
+#define GB_SVC_TYPE_PWRMON_RAIL_NAMES_GET	0x15
+#define GB_SVC_TYPE_PWRMON_SAMPLE_GET		0x16
+#define GB_SVC_TYPE_PWRMON_INTF_SAMPLE_GET	0x17
+#define GB_SVC_TYPE_TIMESYNC_WAKE_PINS_ACQUIRE	0x18
+#define GB_SVC_TYPE_TIMESYNC_WAKE_PINS_RELEASE	0x19
+#define GB_SVC_TYPE_TIMESYNC_PING		0x1a
+#define GB_SVC_TYPE_MODULE_INSERTED		0x1f
+#define GB_SVC_TYPE_MODULE_REMOVED		0x20
+#define GB_SVC_TYPE_INTF_VSYS_ENABLE		0x21
+#define GB_SVC_TYPE_INTF_VSYS_DISABLE		0x22
+#define GB_SVC_TYPE_INTF_REFCLK_ENABLE		0x23
+#define GB_SVC_TYPE_INTF_REFCLK_DISABLE		0x24
+#define GB_SVC_TYPE_INTF_UNIPRO_ENABLE		0x25
+#define GB_SVC_TYPE_INTF_UNIPRO_DISABLE		0x26
+#define GB_SVC_TYPE_INTF_ACTIVATE		0x27
+#define GB_SVC_TYPE_INTF_RESUME			0x28
+#define GB_SVC_TYPE_INTF_MAILBOX_EVENT		0x29
+#define GB_SVC_TYPE_INTF_OOPS			0x2a
+
+/* Greybus SVC protocol status values */
+#define GB_SVC_OP_SUCCESS			0x00
+#define GB_SVC_OP_UNKNOWN_ERROR			0x01
+#define GB_SVC_INTF_NOT_DETECTED		0x02
+#define GB_SVC_INTF_NO_UPRO_LINK		0x03
+#define GB_SVC_INTF_UPRO_NOT_DOWN		0x04
+#define GB_SVC_INTF_UPRO_NOT_HIBERNATED		0x05
+#define GB_SVC_INTF_NO_V_SYS			0x06
+#define GB_SVC_INTF_V_CHG			0x07
+#define GB_SVC_INTF_WAKE_BUSY			0x08
+#define GB_SVC_INTF_NO_REFCLK			0x09
+#define GB_SVC_INTF_RELEASING			0x0a
+#define GB_SVC_INTF_NO_ORDER			0x0b
+#define GB_SVC_INTF_MBOX_SET			0x0c
+#define GB_SVC_INTF_BAD_MBOX			0x0d
+#define GB_SVC_INTF_OP_TIMEOUT			0x0e
+#define GB_SVC_PWRMON_OP_NOT_PRESENT		0x0f
+
+struct gb_svc_version_request {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+struct gb_svc_version_response {
+	__u8	major;
+	__u8	minor;
+} __packed;
+
+/* SVC protocol hello request */
+struct gb_svc_hello_request {
+	__le16			endo_id;
+	__u8			interface_id;
+} __packed;
+/* hello response has no payload */
+
+struct gb_svc_intf_device_id_request {
+	__u8	intf_id;
+	__u8	device_id;
+} __packed;
+/* device id response has no payload */
+
+struct gb_svc_intf_reset_request {
+	__u8	intf_id;
+} __packed;
+/* interface reset response has no payload */
+
+struct gb_svc_intf_eject_request {
+	__u8	intf_id;
+} __packed;
+/* interface eject response has no payload */
+
+struct gb_svc_conn_create_request {
+	__u8	intf1_id;
+	__le16	cport1_id;
+	__u8	intf2_id;
+	__le16	cport2_id;
+	__u8	tc;
+	__u8	flags;
+} __packed;
+/* connection create response has no payload */
+
+struct gb_svc_conn_destroy_request {
+	__u8	intf1_id;
+	__le16	cport1_id;
+	__u8	intf2_id;
+	__le16	cport2_id;
+} __packed;
+/* connection destroy response has no payload */
+
+struct gb_svc_dme_peer_get_request {
+	__u8	intf_id;
+	__le16	attr;
+	__le16	selector;
+} __packed;
+
+struct gb_svc_dme_peer_get_response {
+	__le16	result_code;
+	__le32	attr_value;
+} __packed;
+
+struct gb_svc_dme_peer_set_request {
+	__u8	intf_id;
+	__le16	attr;
+	__le16	selector;
+	__le32	value;
+} __packed;
+
+struct gb_svc_dme_peer_set_response {
+	__le16	result_code;
+} __packed;
+
+/* Greybus init-status values, currently retrieved using DME peer gets. */
+#define GB_INIT_SPI_BOOT_STARTED			0x02
+#define GB_INIT_TRUSTED_SPI_BOOT_FINISHED		0x03
+#define GB_INIT_UNTRUSTED_SPI_BOOT_FINISHED		0x04
+#define GB_INIT_BOOTROM_UNIPRO_BOOT_STARTED		0x06
+#define GB_INIT_BOOTROM_FALLBACK_UNIPRO_BOOT_STARTED	0x09
+#define GB_INIT_S2_LOADER_BOOT_STARTED			0x0D
+
+struct gb_svc_route_create_request {
+	__u8	intf1_id;
+	__u8	dev1_id;
+	__u8	intf2_id;
+	__u8	dev2_id;
+} __packed;
+/* route create response has no payload */
+
+struct gb_svc_route_destroy_request {
+	__u8	intf1_id;
+	__u8	intf2_id;
+} __packed;
+/* route destroy response has no payload */
+
+/* used for svc_intf_vsys_{enable,disable} */
+struct gb_svc_intf_vsys_request {
+	__u8	intf_id;
+} __packed;
+
+struct gb_svc_intf_vsys_response {
+	__u8	result_code;
+#define GB_SVC_INTF_VSYS_OK				0x00
+	/* 0x01 is reserved */
+#define GB_SVC_INTF_VSYS_FAIL				0x02
+} __packed;
+
+/* used for svc_intf_refclk_{enable,disable} */
+struct gb_svc_intf_refclk_request {
+	__u8	intf_id;
+} __packed;
+
+struct gb_svc_intf_refclk_response {
+	__u8	result_code;
+#define GB_SVC_INTF_REFCLK_OK				0x00
+	/* 0x01 is reserved */
+#define GB_SVC_INTF_REFCLK_FAIL				0x02
+} __packed;
+
+/* used for svc_intf_unipro_{enable,disable} */
+struct gb_svc_intf_unipro_request {
+	__u8	intf_id;
+} __packed;
+
+struct gb_svc_intf_unipro_response {
+	__u8	result_code;
+#define GB_SVC_INTF_UNIPRO_OK				0x00
+	/* 0x01 is reserved */
+#define GB_SVC_INTF_UNIPRO_FAIL				0x02
+#define GB_SVC_INTF_UNIPRO_NOT_OFF			0x03
+} __packed;
+
+#define GB_SVC_UNIPRO_FAST_MODE			0x01
+#define GB_SVC_UNIPRO_SLOW_MODE			0x02
+#define GB_SVC_UNIPRO_FAST_AUTO_MODE		0x04
+#define GB_SVC_UNIPRO_SLOW_AUTO_MODE		0x05
+#define GB_SVC_UNIPRO_MODE_UNCHANGED		0x07
+#define GB_SVC_UNIPRO_HIBERNATE_MODE		0x11
+#define GB_SVC_UNIPRO_OFF_MODE			0x12
+
+#define GB_SVC_SMALL_AMPLITUDE          0x01
+#define GB_SVC_LARGE_AMPLITUDE          0x02
+
+#define GB_SVC_NO_DE_EMPHASIS           0x00
+#define GB_SVC_SMALL_DE_EMPHASIS        0x01
+#define GB_SVC_LARGE_DE_EMPHASIS        0x02
+
+#define GB_SVC_PWRM_RXTERMINATION		0x01
+#define GB_SVC_PWRM_TXTERMINATION		0x02
+#define GB_SVC_PWRM_LINE_RESET			0x04
+#define GB_SVC_PWRM_SCRAMBLING			0x20
+
+#define GB_SVC_PWRM_QUIRK_HSSER			0x00000001
+
+#define GB_SVC_UNIPRO_HS_SERIES_A		0x01
+#define GB_SVC_UNIPRO_HS_SERIES_B		0x02
+
+#define GB_SVC_SETPWRM_PWR_OK           0x00
+#define GB_SVC_SETPWRM_PWR_LOCAL        0x01
+#define GB_SVC_SETPWRM_PWR_REMOTE       0x02
+#define GB_SVC_SETPWRM_PWR_BUSY         0x03
+#define GB_SVC_SETPWRM_PWR_ERROR_CAP    0x04
+#define GB_SVC_SETPWRM_PWR_FATAL_ERROR  0x05
+
+struct gb_svc_l2_timer_cfg {
+	__le16 tsb_fc0_protection_timeout;
+	__le16 tsb_tc0_replay_timeout;
+	__le16 tsb_afc0_req_timeout;
+	__le16 tsb_fc1_protection_timeout;
+	__le16 tsb_tc1_replay_timeout;
+	__le16 tsb_afc1_req_timeout;
+	__le16 reserved_for_tc2[3];
+	__le16 reserved_for_tc3[3];
+} __packed;
+
+struct gb_svc_intf_set_pwrm_request {
+	__u8	intf_id;
+	__u8	hs_series;
+	__u8	tx_mode;
+	__u8	tx_gear;
+	__u8	tx_nlanes;
+	__u8	tx_amplitude;
+	__u8	tx_hs_equalizer;
+	__u8	rx_mode;
+	__u8	rx_gear;
+	__u8	rx_nlanes;
+	__u8	flags;
+	__le32	quirks;
+	struct gb_svc_l2_timer_cfg local_l2timerdata, remote_l2timerdata;
+} __packed;
+
+struct gb_svc_intf_set_pwrm_response {
+	__u8	result_code;
+} __packed;
+
+struct gb_svc_key_event_request {
+	__le16  key_code;
+#define GB_KEYCODE_ARA         0x00
+
+	__u8    key_event;
+#define GB_SVC_KEY_RELEASED    0x00
+#define GB_SVC_KEY_PRESSED     0x01
+} __packed;
+
+#define GB_SVC_PWRMON_MAX_RAIL_COUNT		254
+
+struct gb_svc_pwrmon_rail_count_get_response {
+	__u8	rail_count;
+} __packed;
+
+#define GB_SVC_PWRMON_RAIL_NAME_BUFSIZE		32
+
+struct gb_svc_pwrmon_rail_names_get_response {
+	__u8	status;
+	__u8	name[0][GB_SVC_PWRMON_RAIL_NAME_BUFSIZE];
+} __packed;
+
+#define GB_SVC_PWRMON_TYPE_CURR			0x01
+#define GB_SVC_PWRMON_TYPE_VOL			0x02
+#define GB_SVC_PWRMON_TYPE_PWR			0x03
+
+#define GB_SVC_PWRMON_GET_SAMPLE_OK		0x00
+#define GB_SVC_PWRMON_GET_SAMPLE_INVAL		0x01
+#define GB_SVC_PWRMON_GET_SAMPLE_NOSUPP		0x02
+#define GB_SVC_PWRMON_GET_SAMPLE_HWERR		0x03
+
+struct gb_svc_pwrmon_sample_get_request {
+	__u8	rail_id;
+	__u8	measurement_type;
+} __packed;
+
+struct gb_svc_pwrmon_sample_get_response {
+	__u8	result;
+	__le32	measurement;
+} __packed;
+
+struct gb_svc_pwrmon_intf_sample_get_request {
+	__u8	intf_id;
+	__u8	measurement_type;
+} __packed;
+
+struct gb_svc_pwrmon_intf_sample_get_response {
+	__u8	result;
+	__le32	measurement;
+} __packed;
+
+#define GB_SVC_MODULE_INSERTED_FLAG_NO_PRIMARY	0x0001
+
+struct gb_svc_module_inserted_request {
+	__u8	primary_intf_id;
+	__u8	intf_count;
+	__le16	flags;
+} __packed;
+/* module_inserted response has no payload */
+
+struct gb_svc_module_removed_request {
+	__u8	primary_intf_id;
+} __packed;
+/* module_removed response has no payload */
+
+struct gb_svc_intf_activate_request {
+	__u8	intf_id;
+} __packed;
+
+#define GB_SVC_INTF_TYPE_UNKNOWN		0x00
+#define GB_SVC_INTF_TYPE_DUMMY			0x01
+#define GB_SVC_INTF_TYPE_UNIPRO			0x02
+#define GB_SVC_INTF_TYPE_GREYBUS		0x03
+
+struct gb_svc_intf_activate_response {
+	__u8	status;
+	__u8	intf_type;
+} __packed;
+
+struct gb_svc_intf_resume_request {
+	__u8	intf_id;
+} __packed;
+
+struct gb_svc_intf_resume_response {
+	__u8	status;
+} __packed;
+
+#define GB_SVC_INTF_MAILBOX_NONE		0x00
+#define GB_SVC_INTF_MAILBOX_AP			0x01
+#define GB_SVC_INTF_MAILBOX_GREYBUS		0x02
+
+struct gb_svc_intf_mailbox_event_request {
+	__u8	intf_id;
+	__le16	result_code;
+	__le32	mailbox;
+} __packed;
+/* intf_mailbox_event response has no payload */
+
+struct gb_svc_intf_oops_request {
+	__u8	intf_id;
+	__u8	reason;
+} __packed;
+/* intf_oops response has no payload */
+
+
+/* RAW */
+
+/* Greybus raw request types */
+#define	GB_RAW_TYPE_SEND			0x02
+
+struct gb_raw_send_request {
+	__le32	len;
+	__u8	data[0];
+} __packed;
+
+
+/* UART */
+
+/* Greybus UART operation types */
+#define GB_UART_TYPE_SEND_DATA			0x02
+#define GB_UART_TYPE_RECEIVE_DATA		0x03	/* Unsolicited data */
+#define GB_UART_TYPE_SET_LINE_CODING		0x04
+#define GB_UART_TYPE_SET_CONTROL_LINE_STATE	0x05
+#define GB_UART_TYPE_SEND_BREAK			0x06
+#define GB_UART_TYPE_SERIAL_STATE		0x07	/* Unsolicited data */
+#define GB_UART_TYPE_RECEIVE_CREDITS		0x08
+#define GB_UART_TYPE_FLUSH_FIFOS		0x09
+
+/* Represents data from AP -> Module */
+struct gb_uart_send_data_request {
+	__le16	size;
+	__u8	data[0];
+} __packed;
+
+/* recv-data-request flags */
+#define GB_UART_RECV_FLAG_FRAMING		0x01	/* Framing error */
+#define GB_UART_RECV_FLAG_PARITY		0x02	/* Parity error */
+#define GB_UART_RECV_FLAG_OVERRUN		0x04	/* Overrun error */
+#define GB_UART_RECV_FLAG_BREAK			0x08	/* Break */
+
+/* Represents data from Module -> AP */
+struct gb_uart_recv_data_request {
+	__le16	size;
+	__u8	flags;
+	__u8	data[0];
+} __packed;
+
+struct gb_uart_receive_credits_request {
+	__le16  count;
+} __packed;
+
+struct gb_uart_set_line_coding_request {
+	__le32	rate;
+	__u8	format;
+#define GB_SERIAL_1_STOP_BITS			0
+#define GB_SERIAL_1_5_STOP_BITS			1
+#define GB_SERIAL_2_STOP_BITS			2
+
+	__u8	parity;
+#define GB_SERIAL_NO_PARITY			0
+#define GB_SERIAL_ODD_PARITY			1
+#define GB_SERIAL_EVEN_PARITY			2
+#define GB_SERIAL_MARK_PARITY			3
+#define GB_SERIAL_SPACE_PARITY			4
+
+	__u8	data_bits;
+
+	__u8	flow_control;
+#define GB_SERIAL_AUTO_RTSCTS_EN		0x1
+} __packed;
+
+/* output control lines */
+#define GB_UART_CTRL_DTR			0x01
+#define GB_UART_CTRL_RTS			0x02
+
+struct gb_uart_set_control_line_state_request {
+	__u8	control;
+} __packed;
+
+struct gb_uart_set_break_request {
+	__u8	state;
+} __packed;
+
+/* input control lines and line errors */
+#define GB_UART_CTRL_DCD			0x01
+#define GB_UART_CTRL_DSR			0x02
+#define GB_UART_CTRL_RI				0x04
+
+struct gb_uart_serial_state_request {
+	__u8	control;
+} __packed;
+
+struct gb_uart_serial_flush_request {
+	__u8    flags;
+#define GB_SERIAL_FLAG_FLUSH_TRANSMITTER	0x01
+#define GB_SERIAL_FLAG_FLUSH_RECEIVER		0x02
+} __packed;
+
+/* Loopback */
+
+/* Greybus loopback request types */
+#define GB_LOOPBACK_TYPE_PING			0x02
+#define GB_LOOPBACK_TYPE_TRANSFER		0x03
+#define GB_LOOPBACK_TYPE_SINK			0x04
+
+/*
+ * Loopback request/response header format should be identical
+ * to simplify bandwidth and data movement analysis.
+ */
+struct gb_loopback_transfer_request {
+	__le32	len;
+	__le32  reserved0;
+	__le32  reserved1;
+	__u8	data[0];
+} __packed;
+
+struct gb_loopback_transfer_response {
+	__le32	len;
+	__le32	reserved0;
+	__le32	reserved1;
+	__u8	data[0];
+} __packed;
+
+/* SDIO */
+/* Greybus SDIO operation types */
+#define GB_SDIO_TYPE_GET_CAPABILITIES		0x02
+#define GB_SDIO_TYPE_SET_IOS			0x03
+#define GB_SDIO_TYPE_COMMAND			0x04
+#define GB_SDIO_TYPE_TRANSFER			0x05
+#define GB_SDIO_TYPE_EVENT			0x06
+
+/* get caps response: request has no payload */
+struct gb_sdio_get_caps_response {
+	__le32	caps;
+#define GB_SDIO_CAP_NONREMOVABLE	0x00000001
+#define GB_SDIO_CAP_4_BIT_DATA		0x00000002
+#define GB_SDIO_CAP_8_BIT_DATA		0x00000004
+#define GB_SDIO_CAP_MMC_HS		0x00000008
+#define GB_SDIO_CAP_SD_HS		0x00000010
+#define GB_SDIO_CAP_ERASE		0x00000020
+#define GB_SDIO_CAP_1_2V_DDR		0x00000040
+#define GB_SDIO_CAP_1_8V_DDR		0x00000080
+#define GB_SDIO_CAP_POWER_OFF_CARD	0x00000100
+#define GB_SDIO_CAP_UHS_SDR12		0x00000200
+#define GB_SDIO_CAP_UHS_SDR25		0x00000400
+#define GB_SDIO_CAP_UHS_SDR50		0x00000800
+#define GB_SDIO_CAP_UHS_SDR104		0x00001000
+#define GB_SDIO_CAP_UHS_DDR50		0x00002000
+#define GB_SDIO_CAP_DRIVER_TYPE_A	0x00004000
+#define GB_SDIO_CAP_DRIVER_TYPE_C	0x00008000
+#define GB_SDIO_CAP_DRIVER_TYPE_D	0x00010000
+#define GB_SDIO_CAP_HS200_1_2V		0x00020000
+#define GB_SDIO_CAP_HS200_1_8V		0x00040000
+#define GB_SDIO_CAP_HS400_1_2V		0x00080000
+#define GB_SDIO_CAP_HS400_1_8V		0x00100000
+
+	/* see possible values below at vdd */
+	__le32 ocr;
+	__le32 f_min;
+	__le32 f_max;
+	__le16 max_blk_count;
+	__le16 max_blk_size;
+} __packed;
+
+/* set ios request: response has no payload */
+struct gb_sdio_set_ios_request {
+	__le32	clock;
+	__le32	vdd;
+#define GB_SDIO_VDD_165_195	0x00000001
+#define GB_SDIO_VDD_20_21	0x00000002
+#define GB_SDIO_VDD_21_22	0x00000004
+#define GB_SDIO_VDD_22_23	0x00000008
+#define GB_SDIO_VDD_23_24	0x00000010
+#define GB_SDIO_VDD_24_25	0x00000020
+#define GB_SDIO_VDD_25_26	0x00000040
+#define GB_SDIO_VDD_26_27	0x00000080
+#define GB_SDIO_VDD_27_28	0x00000100
+#define GB_SDIO_VDD_28_29	0x00000200
+#define GB_SDIO_VDD_29_30	0x00000400
+#define GB_SDIO_VDD_30_31	0x00000800
+#define GB_SDIO_VDD_31_32	0x00001000
+#define GB_SDIO_VDD_32_33	0x00002000
+#define GB_SDIO_VDD_33_34	0x00004000
+#define GB_SDIO_VDD_34_35	0x00008000
+#define GB_SDIO_VDD_35_36	0x00010000
+
+	__u8	bus_mode;
+#define GB_SDIO_BUSMODE_OPENDRAIN	0x00
+#define GB_SDIO_BUSMODE_PUSHPULL	0x01
+
+	__u8	power_mode;
+#define GB_SDIO_POWER_OFF	0x00
+#define GB_SDIO_POWER_UP	0x01
+#define GB_SDIO_POWER_ON	0x02
+#define GB_SDIO_POWER_UNDEFINED	0x03
+
+	__u8	bus_width;
+#define GB_SDIO_BUS_WIDTH_1	0x00
+#define GB_SDIO_BUS_WIDTH_4	0x02
+#define GB_SDIO_BUS_WIDTH_8	0x03
+
+	__u8	timing;
+#define GB_SDIO_TIMING_LEGACY		0x00
+#define GB_SDIO_TIMING_MMC_HS		0x01
+#define GB_SDIO_TIMING_SD_HS		0x02
+#define GB_SDIO_TIMING_UHS_SDR12	0x03
+#define GB_SDIO_TIMING_UHS_SDR25	0x04
+#define GB_SDIO_TIMING_UHS_SDR50	0x05
+#define GB_SDIO_TIMING_UHS_SDR104	0x06
+#define GB_SDIO_TIMING_UHS_DDR50	0x07
+#define GB_SDIO_TIMING_MMC_DDR52	0x08
+#define GB_SDIO_TIMING_MMC_HS200	0x09
+#define GB_SDIO_TIMING_MMC_HS400	0x0A
+
+	__u8	signal_voltage;
+#define GB_SDIO_SIGNAL_VOLTAGE_330	0x00
+#define GB_SDIO_SIGNAL_VOLTAGE_180	0x01
+#define GB_SDIO_SIGNAL_VOLTAGE_120	0x02
+
+	__u8	drv_type;
+#define GB_SDIO_SET_DRIVER_TYPE_B	0x00
+#define GB_SDIO_SET_DRIVER_TYPE_A	0x01
+#define GB_SDIO_SET_DRIVER_TYPE_C	0x02
+#define GB_SDIO_SET_DRIVER_TYPE_D	0x03
+} __packed;
+
+/* command request */
+struct gb_sdio_command_request {
+	__u8	cmd;
+	__u8	cmd_flags;
+#define GB_SDIO_RSP_NONE		0x00
+#define GB_SDIO_RSP_PRESENT		0x01
+#define GB_SDIO_RSP_136			0x02
+#define GB_SDIO_RSP_CRC			0x04
+#define GB_SDIO_RSP_BUSY		0x08
+#define GB_SDIO_RSP_OPCODE		0x10
+
+	__u8	cmd_type;
+#define GB_SDIO_CMD_AC		0x00
+#define GB_SDIO_CMD_ADTC	0x01
+#define GB_SDIO_CMD_BC		0x02
+#define GB_SDIO_CMD_BCR		0x03
+
+	__le32	cmd_arg;
+	__le16	data_blocks;
+	__le16	data_blksz;
+} __packed;
+
+struct gb_sdio_command_response {
+	__le32	resp[4];
+} __packed;
+
+/* transfer request */
+struct gb_sdio_transfer_request {
+	__u8	data_flags;
+#define GB_SDIO_DATA_WRITE	0x01
+#define GB_SDIO_DATA_READ	0x02
+#define GB_SDIO_DATA_STREAM	0x04
+
+	__le16	data_blocks;
+	__le16	data_blksz;
+	__u8	data[0];
+} __packed;
+
+struct gb_sdio_transfer_response {
+	__le16	data_blocks;
+	__le16	data_blksz;
+	__u8	data[0];
+} __packed;
+
+/* event request: generated by module and is defined as unidirectional */
+struct gb_sdio_event_request {
+	__u8	event;
+#define GB_SDIO_CARD_INSERTED	0x01
+#define GB_SDIO_CARD_REMOVED	0x02
+#define GB_SDIO_WP		0x04
+} __packed;
+
+/* Camera */
+
+/* Greybus Camera request types */
+#define GB_CAMERA_TYPE_CAPABILITIES		0x02
+#define GB_CAMERA_TYPE_CONFIGURE_STREAMS	0x03
+#define GB_CAMERA_TYPE_CAPTURE			0x04
+#define GB_CAMERA_TYPE_FLUSH			0x05
+#define GB_CAMERA_TYPE_METADATA			0x06
+
+#define GB_CAMERA_MAX_STREAMS			4
+#define GB_CAMERA_MAX_SETTINGS_SIZE		8192
+
+/* Greybus Camera Configure Streams request payload */
+struct gb_camera_stream_config_request {
+	__le16 width;
+	__le16 height;
+	__le16 format;
+	__le16 padding;
+} __packed;
+
+struct gb_camera_configure_streams_request {
+	__u8 num_streams;
+	__u8 flags;
+#define GB_CAMERA_CONFIGURE_STREAMS_TEST_ONLY	0x01
+	__le16 padding;
+	struct gb_camera_stream_config_request config[0];
+} __packed;
+
+/* Greybus Camera Configure Streams response payload */
+struct gb_camera_stream_config_response {
+	__le16 width;
+	__le16 height;
+	__le16 format;
+	__u8 virtual_channel;
+	__u8 data_type[2];
+	__le16 max_pkt_size;
+	__u8 padding;
+	__le32 max_size;
+} __packed;
+
+struct gb_camera_configure_streams_response {
+	__u8 num_streams;
+#define GB_CAMERA_CONFIGURE_STREAMS_ADJUSTED	0x01
+	__u8 flags;
+	__u8 padding[2];
+	__le32 data_rate;
+	struct gb_camera_stream_config_response config[0];
+};
+
+/* Greybus Camera Capture request payload - response has no payload */
+struct gb_camera_capture_request {
+	__le32 request_id;
+	__u8 streams;
+	__u8 padding;
+	__le16 num_frames;
+	__u8 settings[0];
+} __packed;
+
+/* Greybus Camera Flush response payload - request has no payload */
+struct gb_camera_flush_response {
+	__le32 request_id;
+} __packed;
+
+/* Greybus Camera Metadata request payload - operation has no response */
+struct gb_camera_metadata_request {
+	__le32 request_id;
+	__le16 frame_number;
+	__u8 stream;
+	__u8 padding;
+	__u8 metadata[0];
+} __packed;
+
+/* Lights */
+
+/* Greybus Lights request types */
+#define GB_LIGHTS_TYPE_GET_LIGHTS		0x02
+#define GB_LIGHTS_TYPE_GET_LIGHT_CONFIG		0x03
+#define GB_LIGHTS_TYPE_GET_CHANNEL_CONFIG	0x04
+#define GB_LIGHTS_TYPE_GET_CHANNEL_FLASH_CONFIG	0x05
+#define GB_LIGHTS_TYPE_SET_BRIGHTNESS		0x06
+#define GB_LIGHTS_TYPE_SET_BLINK		0x07
+#define GB_LIGHTS_TYPE_SET_COLOR		0x08
+#define GB_LIGHTS_TYPE_SET_FADE			0x09
+#define GB_LIGHTS_TYPE_EVENT			0x0A
+#define GB_LIGHTS_TYPE_SET_FLASH_INTENSITY	0x0B
+#define GB_LIGHTS_TYPE_SET_FLASH_STROBE		0x0C
+#define GB_LIGHTS_TYPE_SET_FLASH_TIMEOUT	0x0D
+#define GB_LIGHTS_TYPE_GET_FLASH_FAULT		0x0E
+
+/* Greybus Light modes */
+
+/*
+ * if you add any specific mode below, update also the
+ * GB_CHANNEL_MODE_DEFINED_RANGE value accordingly
+ */
+#define GB_CHANNEL_MODE_NONE		0x00000000
+#define GB_CHANNEL_MODE_BATTERY		0x00000001
+#define GB_CHANNEL_MODE_POWER		0x00000002
+#define GB_CHANNEL_MODE_WIRELESS	0x00000004
+#define GB_CHANNEL_MODE_BLUETOOTH	0x00000008
+#define GB_CHANNEL_MODE_KEYBOARD	0x00000010
+#define GB_CHANNEL_MODE_BUTTONS		0x00000020
+#define GB_CHANNEL_MODE_NOTIFICATION	0x00000040
+#define GB_CHANNEL_MODE_ATTENTION	0x00000080
+#define GB_CHANNEL_MODE_FLASH		0x00000100
+#define GB_CHANNEL_MODE_TORCH		0x00000200
+#define GB_CHANNEL_MODE_INDICATOR	0x00000400
+
+/* Lights Mode valid bit values */
+#define GB_CHANNEL_MODE_DEFINED_RANGE	0x000004FF
+#define GB_CHANNEL_MODE_VENDOR_RANGE	0x00F00000
+
+/* Greybus Light Channels Flags */
+#define GB_LIGHT_CHANNEL_MULTICOLOR	0x00000001
+#define GB_LIGHT_CHANNEL_FADER		0x00000002
+#define GB_LIGHT_CHANNEL_BLINK		0x00000004
+
+/* get count of lights in module */
+struct gb_lights_get_lights_response {
+	__u8	lights_count;
+} __packed;
+
+/* light config request payload */
+struct gb_lights_get_light_config_request {
+	__u8	id;
+} __packed;
+
+/* light config response payload */
+struct gb_lights_get_light_config_response {
+	__u8	channel_count;
+	__u8	name[32];
+} __packed;
+
+/* channel config request payload */
+struct gb_lights_get_channel_config_request {
+	__u8	light_id;
+	__u8	channel_id;
+} __packed;
+
+/* channel flash config request payload */
+struct gb_lights_get_channel_flash_config_request {
+	__u8	light_id;
+	__u8	channel_id;
+} __packed;
+
+/* channel config response payload */
+struct gb_lights_get_channel_config_response {
+	__u8	max_brightness;
+	__le32	flags;
+	__le32	color;
+	__u8	color_name[32];
+	__le32	mode;
+	__u8	mode_name[32];
+} __packed;
+
+/* channel flash config response payload */
+struct gb_lights_get_channel_flash_config_response {
+	__le32	intensity_min_uA;
+	__le32	intensity_max_uA;
+	__le32	intensity_step_uA;
+	__le32	timeout_min_us;
+	__le32	timeout_max_us;
+	__le32	timeout_step_us;
+} __packed;
+
+/* blink request payload: response have no payload */
+struct gb_lights_blink_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__le16	time_on_ms;
+	__le16	time_off_ms;
+} __packed;
+
+/* set brightness request payload: response have no payload */
+struct gb_lights_set_brightness_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__u8	brightness;
+} __packed;
+
+/* set color request payload: response have no payload */
+struct gb_lights_set_color_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__le32	color;
+} __packed;
+
+/* set fade request payload: response have no payload */
+struct gb_lights_set_fade_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__u8	fade_in;
+	__u8	fade_out;
+} __packed;
+
+/* event request: generated by module */
+struct gb_lights_event_request {
+	__u8	light_id;
+	__u8	event;
+#define GB_LIGHTS_LIGHT_CONFIG		0x01
+} __packed;
+
+/* set flash intensity request payload: response have no payload */
+struct gb_lights_set_flash_intensity_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__le32	intensity_uA;
+} __packed;
+
+/* set flash strobe state request payload: response have no payload */
+struct gb_lights_set_flash_strobe_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__u8	state;
+} __packed;
+
+/* set flash timeout request payload: response have no payload */
+struct gb_lights_set_flash_timeout_request {
+	__u8	light_id;
+	__u8	channel_id;
+	__le32	timeout_us;
+} __packed;
+
+/* get flash fault request payload */
+struct gb_lights_get_flash_fault_request {
+	__u8	light_id;
+	__u8	channel_id;
+} __packed;
+
+/* get flash fault response payload */
+struct gb_lights_get_flash_fault_response {
+	__le32	fault;
+#define GB_LIGHTS_FLASH_FAULT_OVER_VOLTAGE		0x00000000
+#define GB_LIGHTS_FLASH_FAULT_TIMEOUT			0x00000001
+#define GB_LIGHTS_FLASH_FAULT_OVER_TEMPERATURE		0x00000002
+#define GB_LIGHTS_FLASH_FAULT_SHORT_CIRCUIT		0x00000004
+#define GB_LIGHTS_FLASH_FAULT_OVER_CURRENT		0x00000008
+#define GB_LIGHTS_FLASH_FAULT_INDICATOR			0x00000010
+#define GB_LIGHTS_FLASH_FAULT_UNDER_VOLTAGE		0x00000020
+#define GB_LIGHTS_FLASH_FAULT_INPUT_VOLTAGE		0x00000040
+#define GB_LIGHTS_FLASH_FAULT_LED_OVER_TEMPERATURE	0x00000080
+} __packed;
+
+/* Audio */
+
+#define GB_AUDIO_TYPE_GET_TOPOLOGY_SIZE		0x02
+#define GB_AUDIO_TYPE_GET_TOPOLOGY		0x03
+#define GB_AUDIO_TYPE_GET_CONTROL		0x04
+#define GB_AUDIO_TYPE_SET_CONTROL		0x05
+#define GB_AUDIO_TYPE_ENABLE_WIDGET		0x06
+#define GB_AUDIO_TYPE_DISABLE_WIDGET		0x07
+#define GB_AUDIO_TYPE_GET_PCM			0x08
+#define GB_AUDIO_TYPE_SET_PCM			0x09
+#define GB_AUDIO_TYPE_SET_TX_DATA_SIZE		0x0a
+						/* 0x0b unused */
+#define GB_AUDIO_TYPE_ACTIVATE_TX		0x0c
+#define GB_AUDIO_TYPE_DEACTIVATE_TX		0x0d
+#define GB_AUDIO_TYPE_SET_RX_DATA_SIZE		0x0e
+						/* 0x0f unused */
+#define GB_AUDIO_TYPE_ACTIVATE_RX		0x10
+#define GB_AUDIO_TYPE_DEACTIVATE_RX		0x11
+#define GB_AUDIO_TYPE_JACK_EVENT		0x12
+#define GB_AUDIO_TYPE_BUTTON_EVENT		0x13
+#define GB_AUDIO_TYPE_STREAMING_EVENT		0x14
+#define GB_AUDIO_TYPE_SEND_DATA			0x15
+
+/* Module must be able to buffer 10ms of audio data, minimum */
+#define GB_AUDIO_SAMPLE_BUFFER_MIN_US		10000
+
+#define GB_AUDIO_PCM_NAME_MAX			32
+#define AUDIO_DAI_NAME_MAX			32
+#define AUDIO_CONTROL_NAME_MAX			32
+#define AUDIO_CTL_ELEM_NAME_MAX			44
+#define AUDIO_ENUM_NAME_MAX			64
+#define AUDIO_WIDGET_NAME_MAX			32
+
+/* See SNDRV_PCM_FMTBIT_* in Linux source */
+#define GB_AUDIO_PCM_FMT_S8			BIT(0)
+#define GB_AUDIO_PCM_FMT_U8			BIT(1)
+#define GB_AUDIO_PCM_FMT_S16_LE			BIT(2)
+#define GB_AUDIO_PCM_FMT_S16_BE			BIT(3)
+#define GB_AUDIO_PCM_FMT_U16_LE			BIT(4)
+#define GB_AUDIO_PCM_FMT_U16_BE			BIT(5)
+#define GB_AUDIO_PCM_FMT_S24_LE			BIT(6)
+#define GB_AUDIO_PCM_FMT_S24_BE			BIT(7)
+#define GB_AUDIO_PCM_FMT_U24_LE			BIT(8)
+#define GB_AUDIO_PCM_FMT_U24_BE			BIT(9)
+#define GB_AUDIO_PCM_FMT_S32_LE			BIT(10)
+#define GB_AUDIO_PCM_FMT_S32_BE			BIT(11)
+#define GB_AUDIO_PCM_FMT_U32_LE			BIT(12)
+#define GB_AUDIO_PCM_FMT_U32_BE			BIT(13)
+
+/* See SNDRV_PCM_RATE_* in Linux source */
+#define GB_AUDIO_PCM_RATE_5512			BIT(0)
+#define GB_AUDIO_PCM_RATE_8000			BIT(1)
+#define GB_AUDIO_PCM_RATE_11025			BIT(2)
+#define GB_AUDIO_PCM_RATE_16000			BIT(3)
+#define GB_AUDIO_PCM_RATE_22050			BIT(4)
+#define GB_AUDIO_PCM_RATE_32000			BIT(5)
+#define GB_AUDIO_PCM_RATE_44100			BIT(6)
+#define GB_AUDIO_PCM_RATE_48000			BIT(7)
+#define GB_AUDIO_PCM_RATE_64000			BIT(8)
+#define GB_AUDIO_PCM_RATE_88200			BIT(9)
+#define GB_AUDIO_PCM_RATE_96000			BIT(10)
+#define GB_AUDIO_PCM_RATE_176400		BIT(11)
+#define GB_AUDIO_PCM_RATE_192000		BIT(12)
+
+#define GB_AUDIO_STREAM_TYPE_CAPTURE		0x1
+#define GB_AUDIO_STREAM_TYPE_PLAYBACK		0x2
+
+#define GB_AUDIO_CTL_ELEM_ACCESS_READ		BIT(0)
+#define GB_AUDIO_CTL_ELEM_ACCESS_WRITE		BIT(1)
+
+/* See SNDRV_CTL_ELEM_TYPE_* in Linux source */
+#define GB_AUDIO_CTL_ELEM_TYPE_BOOLEAN		0x01
+#define GB_AUDIO_CTL_ELEM_TYPE_INTEGER		0x02
+#define GB_AUDIO_CTL_ELEM_TYPE_ENUMERATED	0x03
+#define GB_AUDIO_CTL_ELEM_TYPE_INTEGER64	0x06
+
+/* See SNDRV_CTL_ELEM_IFACE_* in Linux source */
+#define GB_AUDIO_CTL_ELEM_IFACE_CARD		0x00
+#define GB_AUDIO_CTL_ELEM_IFACE_HWDEP		0x01
+#define GB_AUDIO_CTL_ELEM_IFACE_MIXER		0x02
+#define GB_AUDIO_CTL_ELEM_IFACE_PCM		0x03
+#define GB_AUDIO_CTL_ELEM_IFACE_RAWMIDI		0x04
+#define GB_AUDIO_CTL_ELEM_IFACE_TIMER		0x05
+#define GB_AUDIO_CTL_ELEM_IFACE_SEQUENCER	0x06
+
+/* SNDRV_CTL_ELEM_ACCESS_* in Linux source */
+#define GB_AUDIO_ACCESS_READ			BIT(0)
+#define GB_AUDIO_ACCESS_WRITE			BIT(1)
+#define GB_AUDIO_ACCESS_VOLATILE		BIT(2)
+#define GB_AUDIO_ACCESS_TIMESTAMP		BIT(3)
+#define GB_AUDIO_ACCESS_TLV_READ		BIT(4)
+#define GB_AUDIO_ACCESS_TLV_WRITE		BIT(5)
+#define GB_AUDIO_ACCESS_TLV_COMMAND		BIT(6)
+#define GB_AUDIO_ACCESS_INACTIVE		BIT(7)
+#define GB_AUDIO_ACCESS_LOCK			BIT(8)
+#define GB_AUDIO_ACCESS_OWNER			BIT(9)
+
+/* enum snd_soc_dapm_type */
+#define GB_AUDIO_WIDGET_TYPE_INPUT		0x0
+#define GB_AUDIO_WIDGET_TYPE_OUTPUT		0x1
+#define GB_AUDIO_WIDGET_TYPE_MUX		0x2
+#define GB_AUDIO_WIDGET_TYPE_VIRT_MUX		0x3
+#define GB_AUDIO_WIDGET_TYPE_VALUE_MUX		0x4
+#define GB_AUDIO_WIDGET_TYPE_MIXER		0x5
+#define GB_AUDIO_WIDGET_TYPE_MIXER_NAMED_CTL	0x6
+#define GB_AUDIO_WIDGET_TYPE_PGA		0x7
+#define GB_AUDIO_WIDGET_TYPE_OUT_DRV		0x8
+#define GB_AUDIO_WIDGET_TYPE_ADC		0x9
+#define GB_AUDIO_WIDGET_TYPE_DAC		0xa
+#define GB_AUDIO_WIDGET_TYPE_MICBIAS		0xb
+#define GB_AUDIO_WIDGET_TYPE_MIC		0xc
+#define GB_AUDIO_WIDGET_TYPE_HP			0xd
+#define GB_AUDIO_WIDGET_TYPE_SPK		0xe
+#define GB_AUDIO_WIDGET_TYPE_LINE		0xf
+#define GB_AUDIO_WIDGET_TYPE_SWITCH		0x10
+#define GB_AUDIO_WIDGET_TYPE_VMID		0x11
+#define GB_AUDIO_WIDGET_TYPE_PRE		0x12
+#define GB_AUDIO_WIDGET_TYPE_POST		0x13
+#define GB_AUDIO_WIDGET_TYPE_SUPPLY		0x14
+#define GB_AUDIO_WIDGET_TYPE_REGULATOR_SUPPLY	0x15
+#define GB_AUDIO_WIDGET_TYPE_CLOCK_SUPPLY	0x16
+#define GB_AUDIO_WIDGET_TYPE_AIF_IN		0x17
+#define GB_AUDIO_WIDGET_TYPE_AIF_OUT		0x18
+#define GB_AUDIO_WIDGET_TYPE_SIGGEN		0x19
+#define GB_AUDIO_WIDGET_TYPE_DAI_IN		0x1a
+#define GB_AUDIO_WIDGET_TYPE_DAI_OUT		0x1b
+#define GB_AUDIO_WIDGET_TYPE_DAI_LINK		0x1c
+
+#define GB_AUDIO_WIDGET_STATE_DISABLED		0x01
+#define GB_AUDIO_WIDGET_STATE_ENAABLED		0x02
+
+#define GB_AUDIO_JACK_EVENT_INSERTION		0x1
+#define GB_AUDIO_JACK_EVENT_REMOVAL		0x2
+
+#define GB_AUDIO_BUTTON_EVENT_PRESS		0x1
+#define GB_AUDIO_BUTTON_EVENT_RELEASE		0x2
+
+#define GB_AUDIO_STREAMING_EVENT_UNSPECIFIED	0x1
+#define GB_AUDIO_STREAMING_EVENT_HALT		0x2
+#define GB_AUDIO_STREAMING_EVENT_INTERNAL_ERROR	0x3
+#define GB_AUDIO_STREAMING_EVENT_PROTOCOL_ERROR	0x4
+#define GB_AUDIO_STREAMING_EVENT_FAILURE	0x5
+#define GB_AUDIO_STREAMING_EVENT_UNDERRUN	0x6
+#define GB_AUDIO_STREAMING_EVENT_OVERRUN	0x7
+#define GB_AUDIO_STREAMING_EVENT_CLOCKING	0x8
+#define GB_AUDIO_STREAMING_EVENT_DATA_LEN	0x9
+
+#define GB_AUDIO_INVALID_INDEX			0xff
+
+/* enum snd_jack_types */
+#define GB_AUDIO_JACK_HEADPHONE			0x0000001
+#define GB_AUDIO_JACK_MICROPHONE		0x0000002
+#define GB_AUDIO_JACK_HEADSET			(GB_AUDIO_JACK_HEADPHONE | \
+						 GB_AUDIO_JACK_MICROPHONE)
+#define GB_AUDIO_JACK_LINEOUT			0x0000004
+#define GB_AUDIO_JACK_MECHANICAL		0x0000008
+#define GB_AUDIO_JACK_VIDEOOUT			0x0000010
+#define GB_AUDIO_JACK_AVOUT			(GB_AUDIO_JACK_LINEOUT | \
+						 GB_AUDIO_JACK_VIDEOOUT)
+#define GB_AUDIO_JACK_LINEIN			0x0000020
+#define GB_AUDIO_JACK_OC_HPHL			0x0000040
+#define GB_AUDIO_JACK_OC_HPHR			0x0000080
+#define GB_AUDIO_JACK_MICROPHONE2		0x0000200
+#define GB_AUDIO_JACK_ANC_HEADPHONE		(GB_AUDIO_JACK_HEADPHONE | \
+						 GB_AUDIO_JACK_MICROPHONE | \
+						 GB_AUDIO_JACK_MICROPHONE2)
+/* Kept separate from switches to facilitate implementation */
+#define GB_AUDIO_JACK_BTN_0			0x4000000
+#define GB_AUDIO_JACK_BTN_1			0x2000000
+#define GB_AUDIO_JACK_BTN_2			0x1000000
+#define GB_AUDIO_JACK_BTN_3			0x0800000
+
+struct gb_audio_pcm {
+	__u8	stream_name[GB_AUDIO_PCM_NAME_MAX];
+	__le32	formats;	/* GB_AUDIO_PCM_FMT_* */
+	__le32	rates;		/* GB_AUDIO_PCM_RATE_* */
+	__u8	chan_min;
+	__u8	chan_max;
+	__u8	sig_bits;	/* number of bits of content */
+} __packed;
+
+struct gb_audio_dai {
+	__u8			name[AUDIO_DAI_NAME_MAX];
+	__le16			data_cport;
+	struct gb_audio_pcm	capture;
+	struct gb_audio_pcm	playback;
+} __packed;
+
+struct gb_audio_integer {
+	__le32	min;
+	__le32	max;
+	__le32	step;
+} __packed;
+
+struct gb_audio_integer64 {
+	__le64	min;
+	__le64	max;
+	__le64	step;
+} __packed;
+
+struct gb_audio_enumerated {
+	__le32	items;
+	__le16	names_length;
+	__u8	names[0];
+} __packed;
+
+struct gb_audio_ctl_elem_info { /* See snd_ctl_elem_info in Linux source */
+	__u8		type;		/* GB_AUDIO_CTL_ELEM_TYPE_* */
+	__le16		dimen[4];
+	union {
+		struct gb_audio_integer		integer;
+		struct gb_audio_integer64	integer64;
+		struct gb_audio_enumerated	enumerated;
+	} value;
+} __packed;
+
+struct gb_audio_ctl_elem_value { /* See snd_ctl_elem_value in Linux source */
+	__le64				timestamp; /* XXX needed? */
+	union {
+		__le32	integer_value[2];	/* consider CTL_DOUBLE_xxx */
+		__le64	integer64_value[2];
+		__le32	enumerated_item[2];
+	} value;
+} __packed;
+
+struct gb_audio_control {
+	__u8	name[AUDIO_CONTROL_NAME_MAX];
+	__u8	id;		/* 0-63 */
+	__u8	iface;		/* GB_AUDIO_IFACE_* */
+	__le16	data_cport;
+	__le32	access;		/* GB_AUDIO_ACCESS_* */
+	__u8    count;		/* count of same elements */
+	__u8	count_values;	/* count of values, max=2 for CTL_DOUBLE_xxx */
+	struct gb_audio_ctl_elem_info	info;
+} __packed;
+
+struct gb_audio_widget {
+	__u8	name[AUDIO_WIDGET_NAME_MAX];
+	__u8	sname[AUDIO_WIDGET_NAME_MAX];
+	__u8	id;
+	__u8	type;		/* GB_AUDIO_WIDGET_TYPE_* */
+	__u8	state;		/* GB_AUDIO_WIDGET_STATE_* */
+	__u8	ncontrols;
+	struct gb_audio_control	ctl[0];	/* 'ncontrols' entries */
+} __packed;
+
+struct gb_audio_route {
+	__u8	source_id;	/* widget id */
+	__u8	destination_id;	/* widget id */
+	__u8	control_id;	/* 0-63 */
+	__u8	index;		/* Selection within the control */
+} __packed;
+
+struct gb_audio_topology {
+	__u8	num_dais;
+	__u8	num_controls;
+	__u8	num_widgets;
+	__u8	num_routes;
+	__le32	size_dais;
+	__le32	size_controls;
+	__le32	size_widgets;
+	__le32	size_routes;
+	__le32	jack_type;
+	/*
+	 * struct gb_audio_dai		dai[num_dais];
+	 * struct gb_audio_control	controls[num_controls];
+	 * struct gb_audio_widget	widgets[num_widgets];
+	 * struct gb_audio_route	routes[num_routes];
+	 */
+	__u8	data[0];
+} __packed;
+
+struct gb_audio_get_topology_size_response {
+	__le16	size;
+} __packed;
+
+struct gb_audio_get_topology_response {
+	struct gb_audio_topology	topology;
+} __packed;
+
+struct gb_audio_get_control_request {
+	__u8	control_id;
+	__u8	index;
+} __packed;
+
+struct gb_audio_get_control_response {
+	struct gb_audio_ctl_elem_value	value;
+} __packed;
+
+struct gb_audio_set_control_request {
+	__u8	control_id;
+	__u8	index;
+	struct gb_audio_ctl_elem_value	value;
+} __packed;
+
+struct gb_audio_enable_widget_request {
+	__u8	widget_id;
+} __packed;
+
+struct gb_audio_disable_widget_request {
+	__u8	widget_id;
+} __packed;
+
+struct gb_audio_get_pcm_request {
+	__le16	data_cport;
+} __packed;
+
+struct gb_audio_get_pcm_response {
+	__le32	format;
+	__le32	rate;
+	__u8	channels;
+	__u8	sig_bits;
+} __packed;
+
+struct gb_audio_set_pcm_request {
+	__le16	data_cport;
+	__le32	format;
+	__le32	rate;
+	__u8	channels;
+	__u8	sig_bits;
+} __packed;
+
+struct gb_audio_set_tx_data_size_request {
+	__le16	data_cport;
+	__le16	size;
+} __packed;
+
+struct gb_audio_activate_tx_request {
+	__le16	data_cport;
+} __packed;
+
+struct gb_audio_deactivate_tx_request {
+	__le16	data_cport;
+} __packed;
+
+struct gb_audio_set_rx_data_size_request {
+	__le16	data_cport;
+	__le16	size;
+} __packed;
+
+struct gb_audio_activate_rx_request {
+	__le16	data_cport;
+} __packed;
+
+struct gb_audio_deactivate_rx_request {
+	__le16	data_cport;
+} __packed;
+
+struct gb_audio_jack_event_request {
+	__u8	widget_id;
+	__u8	jack_attribute;
+	__u8	event;
+} __packed;
+
+struct gb_audio_button_event_request {
+	__u8	widget_id;
+	__u8	button_id;
+	__u8	event;
+} __packed;
+
+struct gb_audio_streaming_event_request {
+	__le16	data_cport;
+	__u8	event;
+} __packed;
+
+struct gb_audio_send_data_request {
+	__le64	timestamp;
+	__u8	data[0];
+} __packed;
+
+
+/* Log */
+
+/* operations */
+#define GB_LOG_TYPE_SEND_LOG	0x02
+
+/* length */
+#define GB_LOG_MAX_LEN		1024
+
+struct gb_log_send_log_request {
+	__le16	len;
+	__u8    msg[0];
+} __packed;
+
+#endif /* __GREYBUS_PROTOCOLS_H */
+
diff --git a/include/linux/greybus/hd.h b/include/linux/greybus/hd.h
new file mode 100644
index 000000000000..348b76fabc9a
--- /dev/null
+++ b/include/linux/greybus/hd.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus Host Device
+ *
+ * Copyright 2014-2015 Google Inc.
+ * Copyright 2014-2015 Linaro Ltd.
+ */
+
+#ifndef __HD_H
+#define __HD_H
+
+struct gb_host_device;
+struct gb_message;
+
+struct gb_hd_driver {
+	size_t	hd_priv_size;
+
+	int (*cport_allocate)(struct gb_host_device *hd, int cport_id,
+				unsigned long flags);
+	void (*cport_release)(struct gb_host_device *hd, u16 cport_id);
+	int (*cport_enable)(struct gb_host_device *hd, u16 cport_id,
+				unsigned long flags);
+	int (*cport_disable)(struct gb_host_device *hd, u16 cport_id);
+	int (*cport_connected)(struct gb_host_device *hd, u16 cport_id);
+	int (*cport_flush)(struct gb_host_device *hd, u16 cport_id);
+	int (*cport_shutdown)(struct gb_host_device *hd, u16 cport_id,
+				u8 phase, unsigned int timeout);
+	int (*cport_quiesce)(struct gb_host_device *hd, u16 cport_id,
+				size_t peer_space, unsigned int timeout);
+	int (*cport_clear)(struct gb_host_device *hd, u16 cport_id);
+
+	int (*message_send)(struct gb_host_device *hd, u16 dest_cport_id,
+			struct gb_message *message, gfp_t gfp_mask);
+	void (*message_cancel)(struct gb_message *message);
+	int (*latency_tag_enable)(struct gb_host_device *hd, u16 cport_id);
+	int (*latency_tag_disable)(struct gb_host_device *hd, u16 cport_id);
+	int (*output)(struct gb_host_device *hd, void *req, u16 size, u8 cmd,
+		      bool async);
+};
+
+struct gb_host_device {
+	struct device dev;
+	int bus_id;
+	const struct gb_hd_driver *driver;
+
+	struct list_head modules;
+	struct list_head connections;
+	struct ida cport_id_map;
+
+	/* Number of CPorts supported by the UniPro IP */
+	size_t num_cports;
+
+	/* Host device buffer constraints */
+	size_t buffer_size_max;
+
+	struct gb_svc *svc;
+	/* Private data for the host driver */
+	unsigned long hd_priv[0] __aligned(sizeof(s64));
+};
+#define to_gb_host_device(d) container_of(d, struct gb_host_device, dev)
+
+int gb_hd_cport_reserve(struct gb_host_device *hd, u16 cport_id);
+void gb_hd_cport_release_reserved(struct gb_host_device *hd, u16 cport_id);
+int gb_hd_cport_allocate(struct gb_host_device *hd, int cport_id,
+					unsigned long flags);
+void gb_hd_cport_release(struct gb_host_device *hd, u16 cport_id);
+
+struct gb_host_device *gb_hd_create(struct gb_hd_driver *driver,
+					struct device *parent,
+					size_t buffer_size_max,
+					size_t num_cports);
+int gb_hd_add(struct gb_host_device *hd);
+void gb_hd_del(struct gb_host_device *hd);
+void gb_hd_shutdown(struct gb_host_device *hd);
+void gb_hd_put(struct gb_host_device *hd);
+int gb_hd_output(struct gb_host_device *hd, void *req, u16 size, u8 cmd,
+		 bool in_irq);
+
+int gb_hd_init(void);
+void gb_hd_exit(void);
+
+#endif	/* __HD_H */
diff --git a/include/linux/greybus/interface.h b/include/linux/greybus/interface.h
new file mode 100644
index 000000000000..8fb1eacda302
--- /dev/null
+++ b/include/linux/greybus/interface.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus Interface Block code
+ *
+ * Copyright 2014 Google Inc.
+ * Copyright 2014 Linaro Ltd.
+ */
+
+#ifndef __INTERFACE_H
+#define __INTERFACE_H
+
+enum gb_interface_type {
+	GB_INTERFACE_TYPE_INVALID = 0,
+	GB_INTERFACE_TYPE_UNKNOWN,
+	GB_INTERFACE_TYPE_DUMMY,
+	GB_INTERFACE_TYPE_UNIPRO,
+	GB_INTERFACE_TYPE_GREYBUS,
+};
+
+#define GB_INTERFACE_QUIRK_NO_CPORT_FEATURES		BIT(0)
+#define GB_INTERFACE_QUIRK_NO_INIT_STATUS		BIT(1)
+#define GB_INTERFACE_QUIRK_NO_GMP_IDS			BIT(2)
+#define GB_INTERFACE_QUIRK_FORCED_DISABLE		BIT(3)
+#define GB_INTERFACE_QUIRK_LEGACY_MODE_SWITCH		BIT(4)
+#define GB_INTERFACE_QUIRK_NO_BUNDLE_ACTIVATE		BIT(5)
+#define GB_INTERFACE_QUIRK_NO_PM			BIT(6)
+
+struct gb_interface {
+	struct device dev;
+	struct gb_control *control;
+
+	struct list_head bundles;
+	struct list_head module_node;
+	struct list_head manifest_descs;
+	u8 interface_id;	/* Physical location within the Endo */
+	u8 device_id;
+	u8 features;		/* Feature flags set in the manifest */
+
+	enum gb_interface_type type;
+
+	u32 ddbl1_manufacturer_id;
+	u32 ddbl1_product_id;
+	u32 vendor_id;
+	u32 product_id;
+	u64 serial_number;
+
+	struct gb_host_device *hd;
+	struct gb_module *module;
+
+	unsigned long quirks;
+
+	struct mutex mutex;
+
+	bool disconnected;
+
+	bool ejected;
+	bool removed;
+	bool active;
+	bool enabled;
+	bool mode_switch;
+	bool dme_read;
+
+	struct work_struct mode_switch_work;
+	struct completion mode_switch_completion;
+};
+#define to_gb_interface(d) container_of(d, struct gb_interface, dev)
+
+struct gb_interface *gb_interface_create(struct gb_module *module,
+					 u8 interface_id);
+int gb_interface_activate(struct gb_interface *intf);
+void gb_interface_deactivate(struct gb_interface *intf);
+int gb_interface_enable(struct gb_interface *intf);
+void gb_interface_disable(struct gb_interface *intf);
+int gb_interface_add(struct gb_interface *intf);
+void gb_interface_del(struct gb_interface *intf);
+void gb_interface_put(struct gb_interface *intf);
+void gb_interface_mailbox_event(struct gb_interface *intf, u16 result,
+								u32 mailbox);
+
+int gb_interface_request_mode_switch(struct gb_interface *intf);
+
+#endif /* __INTERFACE_H */
diff --git a/include/linux/greybus/manifest.h b/include/linux/greybus/manifest.h
new file mode 100644
index 000000000000..88aa7e44cad5
--- /dev/null
+++ b/include/linux/greybus/manifest.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus manifest parsing
+ *
+ * Copyright 2014 Google Inc.
+ * Copyright 2014 Linaro Ltd.
+ */
+
+#ifndef __MANIFEST_H
+#define __MANIFEST_H
+
+struct gb_interface;
+bool gb_manifest_parse(struct gb_interface *intf, void *data, size_t size);
+
+#endif /* __MANIFEST_H */
diff --git a/include/linux/greybus/module.h b/include/linux/greybus/module.h
new file mode 100644
index 000000000000..2a27e520ee94
--- /dev/null
+++ b/include/linux/greybus/module.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus Module code
+ *
+ * Copyright 2016 Google Inc.
+ * Copyright 2016 Linaro Ltd.
+ */
+
+#ifndef __MODULE_H
+#define __MODULE_H
+
+struct gb_module {
+	struct device dev;
+	struct gb_host_device *hd;
+
+	struct list_head hd_node;
+
+	u8 module_id;
+	size_t num_interfaces;
+
+	bool disconnected;
+
+	struct gb_interface *interfaces[0];
+};
+#define to_gb_module(d) container_of(d, struct gb_module, dev)
+
+struct gb_module *gb_module_create(struct gb_host_device *hd, u8 module_id,
+				   size_t num_interfaces);
+int gb_module_add(struct gb_module *module);
+void gb_module_del(struct gb_module *module);
+void gb_module_put(struct gb_module *module);
+
+#endif /* __MODULE_H */
diff --git a/include/linux/greybus/operation.h b/include/linux/greybus/operation.h
new file mode 100644
index 000000000000..17ba3daf111b
--- /dev/null
+++ b/include/linux/greybus/operation.h
@@ -0,0 +1,224 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus operations
+ *
+ * Copyright 2014 Google Inc.
+ * Copyright 2014 Linaro Ltd.
+ */
+
+#ifndef __OPERATION_H
+#define __OPERATION_H
+
+#include <linux/completion.h>
+
+struct gb_operation;
+
+/* The default amount of time a request is given to complete */
+#define GB_OPERATION_TIMEOUT_DEFAULT	1000	/* milliseconds */
+
+/*
+ * The top bit of the type in an operation message header indicates
+ * whether the message is a request (bit clear) or response (bit set)
+ */
+#define GB_MESSAGE_TYPE_RESPONSE	((u8)0x80)
+
+enum gb_operation_result {
+	GB_OP_SUCCESS		= 0x00,
+	GB_OP_INTERRUPTED	= 0x01,
+	GB_OP_TIMEOUT		= 0x02,
+	GB_OP_NO_MEMORY		= 0x03,
+	GB_OP_PROTOCOL_BAD	= 0x04,
+	GB_OP_OVERFLOW		= 0x05,
+	GB_OP_INVALID		= 0x06,
+	GB_OP_RETRY		= 0x07,
+	GB_OP_NONEXISTENT	= 0x08,
+	GB_OP_UNKNOWN_ERROR	= 0xfe,
+	GB_OP_MALFUNCTION	= 0xff,
+};
+
+#define GB_OPERATION_MESSAGE_SIZE_MIN	sizeof(struct gb_operation_msg_hdr)
+#define GB_OPERATION_MESSAGE_SIZE_MAX	U16_MAX
+
+/*
+ * Protocol code should only examine the payload and payload_size fields, and
+ * host-controller drivers may use the hcpriv field. All other fields are
+ * intended to be private to the operations core code.
+ */
+struct gb_message {
+	struct gb_operation		*operation;
+	struct gb_operation_msg_hdr	*header;
+
+	void				*payload;
+	size_t				payload_size;
+
+	void				*buffer;
+
+	void				*hcpriv;
+};
+
+#define GB_OPERATION_FLAG_INCOMING		BIT(0)
+#define GB_OPERATION_FLAG_UNIDIRECTIONAL	BIT(1)
+#define GB_OPERATION_FLAG_SHORT_RESPONSE	BIT(2)
+#define GB_OPERATION_FLAG_CORE			BIT(3)
+
+#define GB_OPERATION_FLAG_USER_MASK	(GB_OPERATION_FLAG_SHORT_RESPONSE | \
+					 GB_OPERATION_FLAG_UNIDIRECTIONAL)
+
+/*
+ * A Greybus operation is a remote procedure call performed over a
+ * connection between two UniPro interfaces.
+ *
+ * Every operation consists of a request message sent to the other
+ * end of the connection coupled with a reply message returned to
+ * the sender.  Every operation has a type, whose interpretation is
+ * dependent on the protocol associated with the connection.
+ *
+ * Only four things in an operation structure are intended to be
+ * directly usable by protocol handlers:  the operation's connection
+ * pointer; the operation type; the request message payload (and
+ * size); and the response message payload (and size).  Note that a
+ * message with a 0-byte payload has a null message payload pointer.
+ *
+ * In addition, every operation has a result, which is an errno
+ * value.  Protocol handlers access the operation result using
+ * gb_operation_result().
+ */
+typedef void (*gb_operation_callback)(struct gb_operation *);
+struct gb_operation {
+	struct gb_connection	*connection;
+	struct gb_message	*request;
+	struct gb_message	*response;
+
+	unsigned long		flags;
+	u8			type;
+	u16			id;
+	int			errno;		/* Operation result */
+
+	struct work_struct	work;
+	gb_operation_callback	callback;
+	struct completion	completion;
+	struct timer_list	timer;
+
+	struct kref		kref;
+	atomic_t		waiters;
+
+	int			active;
+	struct list_head	links;		/* connection->operations */
+
+	void			*private;
+};
+
+static inline bool
+gb_operation_is_incoming(struct gb_operation *operation)
+{
+	return operation->flags & GB_OPERATION_FLAG_INCOMING;
+}
+
+static inline bool
+gb_operation_is_unidirectional(struct gb_operation *operation)
+{
+	return operation->flags & GB_OPERATION_FLAG_UNIDIRECTIONAL;
+}
+
+static inline bool
+gb_operation_short_response_allowed(struct gb_operation *operation)
+{
+	return operation->flags & GB_OPERATION_FLAG_SHORT_RESPONSE;
+}
+
+static inline bool gb_operation_is_core(struct gb_operation *operation)
+{
+	return operation->flags & GB_OPERATION_FLAG_CORE;
+}
+
+void gb_connection_recv(struct gb_connection *connection,
+					void *data, size_t size);
+
+int gb_operation_result(struct gb_operation *operation);
+
+size_t gb_operation_get_payload_size_max(struct gb_connection *connection);
+struct gb_operation *
+gb_operation_create_flags(struct gb_connection *connection,
+				u8 type, size_t request_size,
+				size_t response_size, unsigned long flags,
+				gfp_t gfp);
+
+static inline struct gb_operation *
+gb_operation_create(struct gb_connection *connection,
+				u8 type, size_t request_size,
+				size_t response_size, gfp_t gfp)
+{
+	return gb_operation_create_flags(connection, type, request_size,
+						response_size, 0, gfp);
+}
+
+struct gb_operation *
+gb_operation_create_core(struct gb_connection *connection,
+				u8 type, size_t request_size,
+				size_t response_size, unsigned long flags,
+				gfp_t gfp);
+
+void gb_operation_get(struct gb_operation *operation);
+void gb_operation_put(struct gb_operation *operation);
+
+bool gb_operation_response_alloc(struct gb_operation *operation,
+					size_t response_size, gfp_t gfp);
+
+int gb_operation_request_send(struct gb_operation *operation,
+				gb_operation_callback callback,
+				unsigned int timeout,
+				gfp_t gfp);
+int gb_operation_request_send_sync_timeout(struct gb_operation *operation,
+						unsigned int timeout);
+static inline int
+gb_operation_request_send_sync(struct gb_operation *operation)
+{
+	return gb_operation_request_send_sync_timeout(operation,
+			GB_OPERATION_TIMEOUT_DEFAULT);
+}
+
+void gb_operation_cancel(struct gb_operation *operation, int errno);
+void gb_operation_cancel_incoming(struct gb_operation *operation, int errno);
+
+void greybus_message_sent(struct gb_host_device *hd,
+				struct gb_message *message, int status);
+
+int gb_operation_sync_timeout(struct gb_connection *connection, int type,
+				void *request, int request_size,
+				void *response, int response_size,
+				unsigned int timeout);
+int gb_operation_unidirectional_timeout(struct gb_connection *connection,
+				int type, void *request, int request_size,
+				unsigned int timeout);
+
+static inline int gb_operation_sync(struct gb_connection *connection, int type,
+		      void *request, int request_size,
+		      void *response, int response_size)
+{
+	return gb_operation_sync_timeout(connection, type,
+			request, request_size, response, response_size,
+			GB_OPERATION_TIMEOUT_DEFAULT);
+}
+
+static inline int gb_operation_unidirectional(struct gb_connection *connection,
+				int type, void *request, int request_size)
+{
+	return gb_operation_unidirectional_timeout(connection, type,
+			request, request_size, GB_OPERATION_TIMEOUT_DEFAULT);
+}
+
+static inline void *gb_operation_get_data(struct gb_operation *operation)
+{
+	return operation->private;
+}
+
+static inline void gb_operation_set_data(struct gb_operation *operation,
+					 void *data)
+{
+	operation->private = data;
+}
+
+int gb_operation_init(void);
+void gb_operation_exit(void);
+
+#endif /* !__OPERATION_H */
diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h
new file mode 100644
index 000000000000..e7452057cfe4
--- /dev/null
+++ b/include/linux/greybus/svc.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Greybus SVC code
+ *
+ * Copyright 2015 Google Inc.
+ * Copyright 2015 Linaro Ltd.
+ */
+
+#ifndef __SVC_H
+#define __SVC_H
+
+#define GB_SVC_CPORT_FLAG_E2EFC		BIT(0)
+#define GB_SVC_CPORT_FLAG_CSD_N		BIT(1)
+#define GB_SVC_CPORT_FLAG_CSV_N		BIT(2)
+
+enum gb_svc_state {
+	GB_SVC_STATE_RESET,
+	GB_SVC_STATE_PROTOCOL_VERSION,
+	GB_SVC_STATE_SVC_HELLO,
+};
+
+enum gb_svc_watchdog_bite {
+	GB_SVC_WATCHDOG_BITE_RESET_UNIPRO = 0,
+	GB_SVC_WATCHDOG_BITE_PANIC_KERNEL,
+};
+
+struct gb_svc_watchdog;
+
+struct svc_debugfs_pwrmon_rail {
+	u8 id;
+	struct gb_svc *svc;
+};
+
+struct gb_svc {
+	struct device		dev;
+
+	struct gb_host_device	*hd;
+	struct gb_connection	*connection;
+	enum gb_svc_state	state;
+	struct ida		device_id_map;
+	struct workqueue_struct	*wq;
+
+	u16 endo_id;
+	u8 ap_intf_id;
+
+	u8 protocol_major;
+	u8 protocol_minor;
+
+	struct gb_svc_watchdog	*watchdog;
+	enum gb_svc_watchdog_bite action;
+
+	struct dentry *debugfs_dentry;
+	struct svc_debugfs_pwrmon_rail *pwrmon_rails;
+};
+#define to_gb_svc(d) container_of(d, struct gb_svc, dev)
+
+struct gb_svc *gb_svc_create(struct gb_host_device *hd);
+int gb_svc_add(struct gb_svc *svc);
+void gb_svc_del(struct gb_svc *svc);
+void gb_svc_put(struct gb_svc *svc);
+
+int gb_svc_pwrmon_intf_sample_get(struct gb_svc *svc, u8 intf_id,
+				  u8 measurement_type, u32 *value);
+int gb_svc_intf_device_id(struct gb_svc *svc, u8 intf_id, u8 device_id);
+int gb_svc_route_create(struct gb_svc *svc, u8 intf1_id, u8 dev1_id,
+			       u8 intf2_id, u8 dev2_id);
+void gb_svc_route_destroy(struct gb_svc *svc, u8 intf1_id, u8 intf2_id);
+int gb_svc_connection_create(struct gb_svc *svc, u8 intf1_id, u16 cport1_id,
+			     u8 intf2_id, u16 cport2_id, u8 cport_flags);
+void gb_svc_connection_destroy(struct gb_svc *svc, u8 intf1_id, u16 cport1_id,
+			       u8 intf2_id, u16 cport2_id);
+int gb_svc_intf_eject(struct gb_svc *svc, u8 intf_id);
+int gb_svc_intf_vsys_set(struct gb_svc *svc, u8 intf_id, bool enable);
+int gb_svc_intf_refclk_set(struct gb_svc *svc, u8 intf_id, bool enable);
+int gb_svc_intf_unipro_set(struct gb_svc *svc, u8 intf_id, bool enable);
+int gb_svc_intf_activate(struct gb_svc *svc, u8 intf_id, u8 *intf_type);
+int gb_svc_intf_resume(struct gb_svc *svc, u8 intf_id);
+
+int gb_svc_dme_peer_get(struct gb_svc *svc, u8 intf_id, u16 attr, u16 selector,
+			u32 *value);
+int gb_svc_dme_peer_set(struct gb_svc *svc, u8 intf_id, u16 attr, u16 selector,
+			u32 value);
+int gb_svc_intf_set_power_mode(struct gb_svc *svc, u8 intf_id, u8 hs_series,
+			       u8 tx_mode, u8 tx_gear, u8 tx_nlanes,
+			       u8 tx_amplitude, u8 tx_hs_equalizer,
+			       u8 rx_mode, u8 rx_gear, u8 rx_nlanes,
+			       u8 flags, u32 quirks,
+			       struct gb_svc_l2_timer_cfg *local,
+			       struct gb_svc_l2_timer_cfg *remote);
+int gb_svc_intf_set_power_mode_hibernate(struct gb_svc *svc, u8 intf_id);
+int gb_svc_ping(struct gb_svc *svc);
+int gb_svc_watchdog_create(struct gb_svc *svc);
+void gb_svc_watchdog_destroy(struct gb_svc *svc);
+bool gb_svc_watchdog_enabled(struct gb_svc *svc);
+int gb_svc_watchdog_enable(struct gb_svc *svc);
+int gb_svc_watchdog_disable(struct gb_svc *svc);
+
+int gb_svc_protocol_init(void);
+void gb_svc_protocol_exit(void);
+
+#endif /* __SVC_H */
-- 
cgit v1.2.3


From c10bf3921e743dadb11a6cf59ffaf38cdbeb281b Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rui.silva@linaro.org>
Date: Tue, 27 Aug 2019 16:53:02 +0100
Subject: staging: greybus: add missing includes

Before moving greybus core out of staging and moving header files to
include/linux some greybus header files were missing the necessary
includes. This would trigger compilation faillures with some example
errors logged bellow for with CONFIG_KERNEL_HEADER_TEST=y.

So, add the necessary headers to compile clean before relocating the
header files.

./include/linux/greybus/hd.h:23:50: error: unknown type name 'u16'
  int (*cport_disable)(struct gb_host_device *hd, u16 cport_id); ^~~
./include/linux/greybus/greybus_protocols.h:1314:2: error: unknown type name '__u8'
  __u8 data[0];
  ^~~~
./include/linux/greybus/hd.h:24:52: error: unknown type name 'u16'
  int (*cport_connected)(struct gb_host_device *hd, u16 cport_id); ^~~
./include/linux/greybus/hd.h:25:48: error: unknown type name 'u16'
  int (*cport_flush)(struct gb_host_device *hd, u16 cport_id); ^~~
./include/linux/greybus/hd.h:26:51: error: unknown type name 'u16'
  int (*cport_shutdown)(struct gb_host_device *hd, u16 cport_id, ^~~
./include/linux/greybus/hd.h:27:5: error: unknown type name 'u8'
u8 phase, unsigned int timeout);
     ^~
./include/linux/greybus/hd.h:28:50: error: unknown type name 'u16'
  int (*cport_quiesce)(struct gb_host_device *hd, u16 cport_id, ^~~
./include/linux/greybus/hd.h:29:5: error: unknown type name 'size_t'
     size_t peer_space, unsigned int timeout);
     ^~~~~~
./include/linux/greybus/hd.h:29:5: note: 'size_t' is defined in header '<stddef.h>'; did you forget to '#include <stddef.h>'?
./include/linux/greybus/hd.h:1:1:
+#include <stddef.h>
 /* SPDX-License-Identifier: GPL-2.0 */
./include/linux/greybus/hd.h:29:5:
     size_t peer_space, unsigned int timeout);
     ^~~~~~
./include/linux/greybus/hd.h:30:48: error: unknown type name 'u16'
  int (*cport_clear)(struct gb_host_device *hd, u16 cport_id); ^~~
./include/linux/greybus/hd.h:32:49: error: unknown type name 'u16'
  int (*message_send)(struct gb_host_device *hd, u16 dest_cport_id, ^~~
./include/linux/greybus/hd.h:33:32: error: unknown type name 'gfp_t'
struct gb_message *message, gfp_t gfp_mask); ^~~~~
./include/linux/greybus/hd.h:35:55: error: unknown type name 'u16'
  int (*latency_tag_enable)(struct gb_host_device *hd, u16 cport_id);

Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Gao Xiang <hsiangkao@aol.com>
Signed-off-by: Rui Miguel Silva <rmfrfs@gmail.com>
Signed-off-by: Rui Miguel Silva <rui.silva@linaro.org>
Link: https://lore.kernel.org/r/20190827155302.25704-1-rui.silva@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/greybus/bundle.h            | 3 +++
 include/linux/greybus/connection.h        | 3 +++
 include/linux/greybus/control.h           | 3 +++
 include/linux/greybus/greybus_manifest.h  | 3 +++
 include/linux/greybus/greybus_protocols.h | 2 ++
 include/linux/greybus/hd.h                | 3 +++
 include/linux/greybus/interface.h         | 3 +++
 include/linux/greybus/manifest.h          | 2 ++
 include/linux/greybus/module.h            | 3 +++
 include/linux/greybus/operation.h         | 5 +++++
 include/linux/greybus/svc.h               | 3 +++
 11 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/linux/greybus/bundle.h b/include/linux/greybus/bundle.h
index 8734d2055657..df8d88424cb7 100644
--- a/include/linux/greybus/bundle.h
+++ b/include/linux/greybus/bundle.h
@@ -9,7 +9,10 @@
 #ifndef __BUNDLE_H
 #define __BUNDLE_H
 
+#include <linux/types.h>
 #include <linux/list.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
 
 #define	BUNDLE_ID_NONE	U8_MAX
 
diff --git a/include/linux/greybus/connection.h b/include/linux/greybus/connection.h
index 5ca3befc0636..d59b7fc1de3e 100644
--- a/include/linux/greybus/connection.h
+++ b/include/linux/greybus/connection.h
@@ -9,8 +9,11 @@
 #ifndef __CONNECTION_H
 #define __CONNECTION_H
 
+#include <linux/bits.h>
 #include <linux/list.h>
 #include <linux/kfifo.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
 
 #define GB_CONNECTION_FLAG_CSD		BIT(0)
 #define GB_CONNECTION_FLAG_NO_FLOWCTRL	BIT(1)
diff --git a/include/linux/greybus/control.h b/include/linux/greybus/control.h
index 3a29ec05f631..da11fe871653 100644
--- a/include/linux/greybus/control.h
+++ b/include/linux/greybus/control.h
@@ -9,6 +9,9 @@
 #ifndef __CONTROL_H
 #define __CONTROL_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+
 struct gb_control {
 	struct device dev;
 	struct gb_interface *intf;
diff --git a/include/linux/greybus/greybus_manifest.h b/include/linux/greybus/greybus_manifest.h
index db68f7e7d5a7..6e62fe478712 100644
--- a/include/linux/greybus/greybus_manifest.h
+++ b/include/linux/greybus/greybus_manifest.h
@@ -14,6 +14,9 @@
 #ifndef __GREYBUS_MANIFEST_H
 #define __GREYBUS_MANIFEST_H
 
+#include <linux/bits.h>
+#include <linux/types.h>
+
 enum greybus_descriptor_type {
 	GREYBUS_TYPE_INVALID		= 0x00,
 	GREYBUS_TYPE_INTERFACE		= 0x01,
diff --git a/include/linux/greybus/greybus_protocols.h b/include/linux/greybus/greybus_protocols.h
index 5f34d1effb59..dfbc6c39a74b 100644
--- a/include/linux/greybus/greybus_protocols.h
+++ b/include/linux/greybus/greybus_protocols.h
@@ -7,6 +7,8 @@
 #ifndef __GREYBUS_PROTOCOLS_H
 #define __GREYBUS_PROTOCOLS_H
 
+#include <linux/types.h>
+
 /* Fixed IDs for control/svc protocols */
 
 /* SVC switch-port device ids */
diff --git a/include/linux/greybus/hd.h b/include/linux/greybus/hd.h
index 348b76fabc9a..d3faf0c1a569 100644
--- a/include/linux/greybus/hd.h
+++ b/include/linux/greybus/hd.h
@@ -9,6 +9,9 @@
 #ifndef __HD_H
 #define __HD_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+
 struct gb_host_device;
 struct gb_message;
 
diff --git a/include/linux/greybus/interface.h b/include/linux/greybus/interface.h
index 8fb1eacda302..ce4def881e6f 100644
--- a/include/linux/greybus/interface.h
+++ b/include/linux/greybus/interface.h
@@ -9,6 +9,9 @@
 #ifndef __INTERFACE_H
 #define __INTERFACE_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+
 enum gb_interface_type {
 	GB_INTERFACE_TYPE_INVALID = 0,
 	GB_INTERFACE_TYPE_UNKNOWN,
diff --git a/include/linux/greybus/manifest.h b/include/linux/greybus/manifest.h
index 88aa7e44cad5..830301b7a8bc 100644
--- a/include/linux/greybus/manifest.h
+++ b/include/linux/greybus/manifest.h
@@ -9,6 +9,8 @@
 #ifndef __MANIFEST_H
 #define __MANIFEST_H
 
+#include <linux/types.h>
+
 struct gb_interface;
 bool gb_manifest_parse(struct gb_interface *intf, void *data, size_t size);
 
diff --git a/include/linux/greybus/module.h b/include/linux/greybus/module.h
index 2a27e520ee94..47b839af145d 100644
--- a/include/linux/greybus/module.h
+++ b/include/linux/greybus/module.h
@@ -9,6 +9,9 @@
 #ifndef __MODULE_H
 #define __MODULE_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+
 struct gb_module {
 	struct device dev;
 	struct gb_host_device *hd;
diff --git a/include/linux/greybus/operation.h b/include/linux/greybus/operation.h
index 17ba3daf111b..8ca864bba23e 100644
--- a/include/linux/greybus/operation.h
+++ b/include/linux/greybus/operation.h
@@ -10,6 +10,11 @@
 #define __OPERATION_H
 
 #include <linux/completion.h>
+#include <linux/kref.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
 
 struct gb_operation;
 
diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h
index e7452057cfe4..507f8bd4e4c8 100644
--- a/include/linux/greybus/svc.h
+++ b/include/linux/greybus/svc.h
@@ -9,6 +9,9 @@
 #ifndef __SVC_H
 #define __SVC_H
 
+#include <linux/types.h>
+#include <linux/device.h>
+
 #define GB_SVC_CPORT_FLAG_E2EFC		BIT(0)
 #define GB_SVC_CPORT_FLAG_CSD_N		BIT(1)
 #define GB_SVC_CPORT_FLAG_CSV_N		BIT(2)
-- 
cgit v1.2.3


From db33f00d15a63d269e283bad3f6f61eb00d2bc9d Mon Sep 17 00:00:00 2001
From: Amadeusz Sławiński <amadeuszx.slawinski@intel.com>
Date: Tue, 27 Aug 2019 16:17:09 +0200
Subject: ASoC: dapm: Expose snd_soc_dapm_new_control_unlocked properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We use snd_soc_dapm_new_control_unlocked for topology and have local
declaration, instead declare it properly in header like already declared
snd_soc_dapm_new_control.

Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@intel.com>
Link: https://lore.kernel.org/r/20190827141712.21015-4-amadeuszx.slawinski@linux.intel.com
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h | 3 +++
 sound/soc/soc-topology.c | 6 ------
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index c00a0b8ade08..8a90816a6eb5 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -402,6 +402,9 @@ int snd_soc_dapm_new_controls(struct snd_soc_dapm_context *dapm,
 struct snd_soc_dapm_widget *snd_soc_dapm_new_control(
 		struct snd_soc_dapm_context *dapm,
 		const struct snd_soc_dapm_widget *widget);
+struct snd_soc_dapm_widget *snd_soc_dapm_new_control_unlocked(
+		struct snd_soc_dapm_context *dapm,
+		const struct snd_soc_dapm_widget *widget);
 int snd_soc_dapm_new_dai_widgets(struct snd_soc_dapm_context *dapm,
 				 struct snd_soc_dai *dai);
 int snd_soc_dapm_link_dai_widgets(struct snd_soc_card *card);
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index dc463f1a9e24..2eca85c04a3e 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -80,12 +80,6 @@ struct soc_tplg {
 
 static int soc_tplg_process_headers(struct soc_tplg *tplg);
 static void soc_tplg_complete(struct soc_tplg *tplg);
-struct snd_soc_dapm_widget *
-snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
-			 const struct snd_soc_dapm_widget *widget);
-struct snd_soc_dapm_widget *
-snd_soc_dapm_new_control(struct snd_soc_dapm_context *dapm,
-			 const struct snd_soc_dapm_widget *widget);
 static void soc_tplg_denum_remove_texts(struct soc_enum *se);
 static void soc_tplg_denum_remove_values(struct soc_enum *se);
 
-- 
cgit v1.2.3


From bcca686c11cdad3b4566ed1818cd2688e7901def Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 27 Aug 2019 21:40:50 +0200
Subject: Revert "driver core: Add sync_state driver/bus callback"

This reverts commit 8f8184d6bf676a8680d6f441e40317d166b46f73.

Based on a lot of email and in-person discussions, this patch series is
being reworked to address a number of issues that were pointed out that
needed to be taken care of before it should be merged.  It will be
resubmitted with those changes hopefully soon.

Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 65 --------------------------------------------------
 include/linux/device.h | 26 --------------------
 2 files changed, 91 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 75fd59360940..de775c7a4d7c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -46,8 +46,6 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
 /* Device links support. */
 static LIST_HEAD(wait_for_suppliers);
 static DEFINE_MUTEX(wfs_lock);
-static LIST_HEAD(deferred_sync);
-static unsigned int supplier_sync_state_disabled;
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
@@ -653,62 +651,6 @@ int device_links_check_suppliers(struct device *dev)
 	return ret;
 }
 
-static void __device_links_supplier_sync_state(struct device *dev)
-{
-	struct device_link *link;
-
-	if (dev->state_synced)
-		return;
-
-	list_for_each_entry(link, &dev->links.consumers, s_node) {
-		if (!(link->flags & DL_FLAG_MANAGED))
-			continue;
-		if (link->status != DL_STATE_ACTIVE)
-			return;
-	}
-
-	if (dev->bus->sync_state)
-		dev->bus->sync_state(dev);
-	else if (dev->driver && dev->driver->sync_state)
-		dev->driver->sync_state(dev);
-
-	dev->state_synced = true;
-}
-
-void device_links_supplier_sync_state_pause(void)
-{
-	device_links_write_lock();
-	supplier_sync_state_disabled++;
-	device_links_write_unlock();
-}
-
-void device_links_supplier_sync_state_resume(void)
-{
-	struct device *dev, *tmp;
-
-	device_links_write_lock();
-	if (!supplier_sync_state_disabled) {
-		WARN(true, "Unmatched sync_state pause/resume!");
-		goto out;
-	}
-	supplier_sync_state_disabled--;
-	if (supplier_sync_state_disabled)
-		goto out;
-
-	list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) {
-		__device_links_supplier_sync_state(dev);
-		list_del_init(&dev->links.defer_sync);
-	}
-out:
-	device_links_write_unlock();
-}
-
-static void __device_links_supplier_defer_sync(struct device *sup)
-{
-	if (list_empty(&sup->links.defer_sync))
-		list_add_tail(&sup->links.defer_sync, &deferred_sync);
-}
-
 /**
  * device_links_driver_bound - Update device links after probing its driver.
  * @dev: Device to update the links for.
@@ -753,11 +695,6 @@ void device_links_driver_bound(struct device *dev)
 
 		WARN_ON(link->status != DL_STATE_CONSUMER_PROBE);
 		WRITE_ONCE(link->status, DL_STATE_ACTIVE);
-
-		if (supplier_sync_state_disabled)
-			__device_links_supplier_defer_sync(link->supplier);
-		else
-			__device_links_supplier_sync_state(link->supplier);
 	}
 
 	dev->links.status = DL_DEV_DRIVER_BOUND;
@@ -874,7 +811,6 @@ void device_links_driver_cleanup(struct device *dev)
 		WRITE_ONCE(link->status, DL_STATE_DORMANT);
 	}
 
-	list_del_init(&dev->links.defer_sync);
 	__device_links_no_driver(dev);
 
 	device_links_write_unlock();
@@ -1849,7 +1785,6 @@ void device_initialize(struct device *dev)
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
 	INIT_LIST_HEAD(&dev->links.needs_suppliers);
-	INIT_LIST_HEAD(&dev->links.defer_sync);
 	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
diff --git a/include/linux/device.h b/include/linux/device.h
index 76496497e753..90142ec9ce84 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -84,8 +84,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		available at the time this function is called.  As in, the
  *		function should NOT stop at the first failed device link if
  *		other unlinked supplier devices are present in the system.
- *		This is necessary for the sync_state() callback to work
- *		correctly.
  *
  *		Return 0 if device links have been successfully created to all
  *		the suppliers of this device.  Return an error if some of the
@@ -93,13 +91,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		reattempted in the future.
  * @probe:	Called when a new device or driver add to this bus, and callback
  *		the specific driver's probe to initial the matched device.
- * @sync_state:	Called to sync device state to software state after all the
- *		state tracking consumers linked to this device (present at
- *		the time of late_initcall) have successfully bound to a
- *		driver. If the device has no consumers, this function will
- *		be called at late_initcall_sync level. If the device has
- *		consumers that are never bound to a driver, this function
- *		will never get called until they do.
  * @remove:	Called when a device removed from this bus.
  * @shutdown:	Called at shut-down time to quiesce the device.
  *
@@ -144,7 +135,6 @@ struct bus_type {
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	int (*add_links)(struct device *dev);
 	int (*probe)(struct device *dev);
-	void (*sync_state)(struct device *dev);
 	int (*remove)(struct device *dev);
 	void (*shutdown)(struct device *dev);
 
@@ -376,13 +366,6 @@ enum probe_type {
  * @probe:	Called to query the existence of a specific device,
  *		whether this driver can work with it, and bind the driver
  *		to a specific device.
- * @sync_state:	Called to sync device state to software state after all the
- *		state tracking consumers linked to this device (present at
- *		the time of late_initcall) have successfully bound to a
- *		driver. If the device has no consumers, this function will
- *		be called at late_initcall_sync level. If the device has
- *		consumers that are never bound to a driver, this function
- *		will never get called until they do.
  * @remove:	Called when the device is removed from the system to
  *		unbind a device from this driver.
  * @shutdown:	Called at shut-down time to quiesce the device.
@@ -423,7 +406,6 @@ struct device_driver {
 
 	int (*edit_links)(struct device *dev);
 	int (*probe) (struct device *dev);
-	void (*sync_state)(struct device *dev);
 	int (*remove) (struct device *dev);
 	void (*shutdown) (struct device *dev);
 	int (*suspend) (struct device *dev, pm_message_t state);
@@ -1177,14 +1159,12 @@ enum dl_dev_state {
  * @suppliers: List of links to supplier devices.
  * @consumers: List of links to consumer devices.
  * @needs_suppliers: Hook to global list of devices waiting for suppliers.
- * @defer_sync: Hook to global list of devices that have deferred sync_state.
  * @status: Driver status information.
  */
 struct dev_links_info {
 	struct list_head suppliers;
 	struct list_head consumers;
 	struct list_head needs_suppliers;
-	struct list_head defer_sync;
 	enum dl_dev_state status;
 };
 
@@ -1262,9 +1242,6 @@ struct dev_links_info {
  *              device.
  * @has_edit_links: This device has a driver than is capable of
  *		    editing the device links created by driver core.
- * @state_synced: The hardware state of this device has been synced to match
- *		  the software state of this device by calling the driver/bus
- *		  sync_state() callback.
  * @dma_coherent: this particular device is dma coherent, even if the
  *		architecture supports non-coherent devices.
  *
@@ -1362,7 +1339,6 @@ struct device {
 	bool			offline:1;
 	bool			of_node_reused:1;
 	bool			has_edit_links:1;
-	bool			state_synced:1;
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
@@ -1707,8 +1683,6 @@ struct device_link *device_link_add(struct device *consumer,
 void device_link_del(struct device_link *link);
 void device_link_remove(void *consumer, struct device *supplier);
 void device_link_remove_from_wfs(struct device *consumer);
-void device_links_supplier_sync_state_pause(void);
-void device_links_supplier_sync_state_resume(void);
 
 #ifndef dev_fmt
 #define dev_fmt(fmt) fmt
-- 
cgit v1.2.3


From 33cbfe54499338af08ab906a99afac247ea533f6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 27 Aug 2019 21:41:06 +0200
Subject: Revert "driver core: Add edit_links() callback for drivers"

This reverts commit 134b23eec9e3a3c795a6ceb0efe2fa63e87983b2.

Based on a lot of email and in-person discussions, this patch series is
being reworked to address a number of issues that were pointed out that
needed to be taken care of before it should be merged.  It will be
resubmitted with those changes hopefully soon.

Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 24 ++----------------------
 drivers/base/dd.c      | 29 -----------------------------
 include/linux/device.h | 20 --------------------
 3 files changed, 2 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index de775c7a4d7c..605905de0cca 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -439,19 +439,6 @@ static void device_link_wait_for_supplier(struct device *consumer)
 	mutex_unlock(&wfs_lock);
 }
 
-/**
- * device_link_remove_from_wfs - Unmark device as waiting for supplier
- * @consumer: Consumer device
- *
- * Unmark the consumer device as waiting for suppliers to become available.
- */
-void device_link_remove_from_wfs(struct device *consumer)
-{
-	mutex_lock(&wfs_lock);
-	list_del_init(&consumer->links.needs_suppliers);
-	mutex_unlock(&wfs_lock);
-}
-
 /**
  * device_link_check_waiting_consumers - Try to unmark waiting consumers
  *
@@ -469,19 +456,12 @@ void device_link_remove_from_wfs(struct device *consumer)
 static void device_link_check_waiting_consumers(void)
 {
 	struct device *dev, *tmp;
-	int ret;
 
 	mutex_lock(&wfs_lock);
 	list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
-				 links.needs_suppliers) {
-		ret = 0;
-		if (dev->has_edit_links)
-			ret = driver_edit_links(dev);
-		else if (dev->bus->add_links)
-			ret = dev->bus->add_links(dev);
-		if (!ret)
+				 links.needs_suppliers)
+		if (!dev->bus->add_links(dev))
 			list_del_init(&dev->links.needs_suppliers);
-	}
 	mutex_unlock(&wfs_lock);
 }
 
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 55fbc2467b37..d811e60610d3 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -710,12 +710,6 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
-	if (drv->edit_links) {
-		if (drv->edit_links(dev))
-			dev->has_edit_links = true;
-		else
-			device_link_remove_from_wfs(dev);
-	}
 	pm_runtime_get_suppliers(dev);
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
@@ -804,29 +798,6 @@ struct device_attach_data {
 	bool have_async;
 };
 
-static int __driver_edit_links(struct device_driver *drv, void *data)
-{
-	struct device *dev = data;
-
-	if (!drv->edit_links)
-		return 0;
-
-	if (driver_match_device(drv, dev) <= 0)
-		return 0;
-
-	return drv->edit_links(dev);
-}
-
-int driver_edit_links(struct device *dev)
-{
-	int ret;
-
-	device_lock(dev);
-	ret = bus_for_each_drv(dev->bus, NULL, dev, __driver_edit_links);
-	device_unlock(dev);
-	return ret;
-}
-
 static int __device_attach_driver(struct device_driver *drv, void *_data)
 {
 	struct device_attach_data *data = _data;
diff --git a/include/linux/device.h b/include/linux/device.h
index 90142ec9ce84..73210745cc6b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -349,20 +349,6 @@ enum probe_type {
  * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
- * @edit_links:	Called to allow a matched driver to edit the device links the
- *		bus might have added incorrectly. This will be useful to handle
- *		cases where the bus incorrectly adds functional dependencies
- *		that aren't true or tries to create cyclic dependencies. But
- *		doesn't correctly handle functional dependencies that are
- *		missed by the bus as the supplier's sync_state might get to
- *		execute before the driver for a missing consumer is loaded and
- *		gets to edit the device links for the consumer.
- *
- *		This function might be called multiple times after a new device
- *		is added.  The function is expected to create all the device
- *		links for the new device and return 0 if it was completed
- *		successfully or return an error if it needs to be reattempted
- *		in the future.
  * @probe:	Called to query the existence of a specific device,
  *		whether this driver can work with it, and bind the driver
  *		to a specific device.
@@ -404,7 +390,6 @@ struct device_driver {
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
 
-	int (*edit_links)(struct device *dev);
 	int (*probe) (struct device *dev);
 	int (*remove) (struct device *dev);
 	void (*shutdown) (struct device *dev);
@@ -1240,8 +1225,6 @@ struct dev_links_info {
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
- * @has_edit_links: This device has a driver than is capable of
- *		    editing the device links created by driver core.
  * @dma_coherent: this particular device is dma coherent, even if the
  *		architecture supports non-coherent devices.
  *
@@ -1338,7 +1321,6 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
-	bool			has_edit_links:1;
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
     defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
@@ -1590,7 +1572,6 @@ extern int  __must_check device_attach(struct device *dev);
 extern int __must_check driver_attach(struct device_driver *drv);
 extern void device_initial_probe(struct device *dev);
 extern int __must_check device_reprobe(struct device *dev);
-extern int driver_edit_links(struct device *dev);
 
 extern bool device_is_bound(struct device *dev);
 
@@ -1682,7 +1663,6 @@ struct device_link *device_link_add(struct device *consumer,
 				    struct device *supplier, u32 flags);
 void device_link_del(struct device_link *link);
 void device_link_remove(void *consumer, struct device *supplier);
-void device_link_remove_from_wfs(struct device *consumer);
 
 #ifndef dev_fmt
 #define dev_fmt(fmt) fmt
-- 
cgit v1.2.3


From bfb3943bed670a0655aa2f9dcf82222ff09d6dd1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 27 Aug 2019 21:41:16 +0200
Subject: Revert "driver core: Add support for linking devices during device
 addition"

This reverts commit 5302dd7dd0b6d04c63cdce51d1e9fda9ef0be886.

Based on a lot of email and in-person discussions, this patch series is
being reworked to address a number of issues that were pointed out that
needed to be taken care of before it should be merged.  It will be
resubmitted with those changes hopefully soon.

Cc: Frank Rowand <frowand.list@gmail.com>
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 83 --------------------------------------------------
 include/linux/device.h | 14 ---------
 2 files changed, 97 deletions(-)

(limited to 'include')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 605905de0cca..066badad988c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -44,8 +44,6 @@ early_param("sysfs.deprecated", sysfs_deprecated_setup);
 #endif
 
 /* Device links support. */
-static LIST_HEAD(wait_for_suppliers);
-static DEFINE_MUTEX(wfs_lock);
 
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
@@ -420,51 +418,6 @@ struct device_link *device_link_add(struct device *consumer,
 }
 EXPORT_SYMBOL_GPL(device_link_add);
 
-/**
- * device_link_wait_for_supplier - Mark device as waiting for supplier
- * @consumer: Consumer device
- *
- * Marks the consumer device as waiting for suppliers to become available. The
- * consumer device will never be probed until it's unmarked as waiting for
- * suppliers. The caller is responsible for adding the link to the supplier
- * once the supplier device is present.
- *
- * This function is NOT meant to be called from the probe function of the
- * consumer but rather from code that creates/adds the consumer device.
- */
-static void device_link_wait_for_supplier(struct device *consumer)
-{
-	mutex_lock(&wfs_lock);
-	list_add_tail(&consumer->links.needs_suppliers, &wait_for_suppliers);
-	mutex_unlock(&wfs_lock);
-}
-
-/**
- * device_link_check_waiting_consumers - Try to unmark waiting consumers
- *
- * Loops through all consumers waiting on suppliers and tries to add all their
- * supplier links. If that succeeds, the consumer device is unmarked as waiting
- * for suppliers. Otherwise, they are left marked as waiting on suppliers,
- *
- * The add_links bus callback is expected to return 0 if it has found and added
- * all the supplier links for the consumer device. It should return an error if
- * it isn't able to do so.
- *
- * The caller of device_link_wait_for_supplier() is expected to call this once
- * it's aware of potential suppliers becoming available.
- */
-static void device_link_check_waiting_consumers(void)
-{
-	struct device *dev, *tmp;
-
-	mutex_lock(&wfs_lock);
-	list_for_each_entry_safe(dev, tmp, &wait_for_suppliers,
-				 links.needs_suppliers)
-		if (!dev->bus->add_links(dev))
-			list_del_init(&dev->links.needs_suppliers);
-	mutex_unlock(&wfs_lock);
-}
-
 static void device_link_free(struct device_link *link)
 {
 	while (refcount_dec_not_one(&link->rpm_active))
@@ -599,19 +552,6 @@ int device_links_check_suppliers(struct device *dev)
 	struct device_link *link;
 	int ret = 0;
 
-	/*
-	 * If a device is waiting for one or more suppliers (in
-	 * wait_for_suppliers list), it is not ready to probe yet. So just
-	 * return -EPROBE_DEFER without having to check the links with existing
-	 * suppliers.
-	 */
-	mutex_lock(&wfs_lock);
-	if (!list_empty(&dev->links.needs_suppliers)) {
-		mutex_unlock(&wfs_lock);
-		return -EPROBE_DEFER;
-	}
-	mutex_unlock(&wfs_lock);
-
 	device_links_write_lock();
 
 	list_for_each_entry(link, &dev->links.suppliers, c_node) {
@@ -896,10 +836,6 @@ static void device_links_purge(struct device *dev)
 {
 	struct device_link *link, *ln;
 
-	mutex_lock(&wfs_lock);
-	list_del(&dev->links.needs_suppliers);
-	mutex_unlock(&wfs_lock);
-
 	/*
 	 * Delete all of the remaining links from this device to any other
 	 * devices (either consumers or suppliers).
@@ -1764,7 +1700,6 @@ void device_initialize(struct device *dev)
 #endif
 	INIT_LIST_HEAD(&dev->links.consumers);
 	INIT_LIST_HEAD(&dev->links.suppliers);
-	INIT_LIST_HEAD(&dev->links.needs_suppliers);
 	dev->links.status = DL_DEV_NO_DRIVER;
 }
 EXPORT_SYMBOL_GPL(device_initialize);
@@ -2251,24 +2186,6 @@ int device_add(struct device *dev)
 					     BUS_NOTIFY_ADD_DEVICE, dev);
 
 	kobject_uevent(&dev->kobj, KOBJ_ADD);
-
-	/*
-	 * Check if any of the other devices (consumers) have been waiting for
-	 * this device (supplier) to be added so that they can create a device
-	 * link to it.
-	 *
-	 * This needs to happen after device_pm_add() because device_link_add()
-	 * requires the supplier be registered before it's called.
-	 *
-	 * But this also needs to happe before bus_probe_device() to make sure
-	 * waiting consumers can link to it before the driver is bound to the
-	 * device and the driver sync_state callback is called for this device.
-	 */
-	device_link_check_waiting_consumers();
-
-	if (dev->bus && dev->bus->add_links && dev->bus->add_links(dev))
-		device_link_wait_for_supplier(dev);
-
 	bus_probe_device(dev);
 	if (parent)
 		klist_add_tail(&dev->p->knode_parent,
diff --git a/include/linux/device.h b/include/linux/device.h
index 73210745cc6b..ec598ede9455 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -78,17 +78,6 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *);
  *		-EPROBE_DEFER it will queue the device for deferred probing.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
- * @add_links:	Called, perhaps multiple times per device, after a device is
- *		added to this bus.  The function is expected to create device
- *		links to all the suppliers of the input device that are
- *		available at the time this function is called.  As in, the
- *		function should NOT stop at the first failed device link if
- *		other unlinked supplier devices are present in the system.
- *
- *		Return 0 if device links have been successfully created to all
- *		the suppliers of this device.  Return an error if some of the
- *		suppliers are not yet available and this function needs to be
- *		reattempted in the future.
  * @probe:	Called when a new device or driver add to this bus, and callback
  *		the specific driver's probe to initial the matched device.
  * @remove:	Called when a device removed from this bus.
@@ -133,7 +122,6 @@ struct bus_type {
 
 	int (*match)(struct device *dev, struct device_driver *drv);
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
-	int (*add_links)(struct device *dev);
 	int (*probe)(struct device *dev);
 	int (*remove)(struct device *dev);
 	void (*shutdown)(struct device *dev);
@@ -1143,13 +1131,11 @@ enum dl_dev_state {
  * struct dev_links_info - Device data related to device links.
  * @suppliers: List of links to supplier devices.
  * @consumers: List of links to consumer devices.
- * @needs_suppliers: Hook to global list of devices waiting for suppliers.
  * @status: Driver status information.
  */
 struct dev_links_info {
 	struct list_head suppliers;
 	struct list_head consumers;
-	struct list_head needs_suppliers;
 	enum dl_dev_state status;
 };
 
-- 
cgit v1.2.3


From 981471bd3abf4d572097645d765391533aac327d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sun, 25 Aug 2019 10:01:32 -0700
Subject: net_sched: fix a NULL pointer deref in ipt action

The net pointer in struct xt_tgdtor_param is not explicitly
initialized therefore is still NULL when dereferencing it.
So we have to find a way to pass the correct net pointer to
ipt_destroy_target().

The best way I find is just saving the net pointer inside the per
netns struct tcf_idrinfo, which could make this patch smaller.

Fixes: 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset")
Reported-and-tested-by: itugrok@yahoo.com
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  4 +++-
 net/sched/act_bpf.c        |  2 +-
 net/sched/act_connmark.c   |  2 +-
 net/sched/act_csum.c       |  2 +-
 net/sched/act_ct.c         |  2 +-
 net/sched/act_ctinfo.c     |  2 +-
 net/sched/act_gact.c       |  2 +-
 net/sched/act_ife.c        |  2 +-
 net/sched/act_ipt.c        | 11 ++++++-----
 net/sched/act_mirred.c     |  2 +-
 net/sched/act_mpls.c       |  2 +-
 net/sched/act_nat.c        |  2 +-
 net/sched/act_pedit.c      |  2 +-
 net/sched/act_police.c     |  2 +-
 net/sched/act_sample.c     |  2 +-
 net/sched/act_simple.c     |  2 +-
 net/sched/act_skbedit.c    |  2 +-
 net/sched/act_skbmod.c     |  2 +-
 net/sched/act_tunnel_key.c |  2 +-
 net/sched/act_vlan.c       |  2 +-
 20 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c61a1bf4e3de..3a1a72990fce 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -15,6 +15,7 @@
 struct tcf_idrinfo {
 	struct mutex	lock;
 	struct idr	action_idr;
+	struct net	*net;
 };
 
 struct tc_action_ops;
@@ -108,7 +109,7 @@ struct tc_action_net {
 };
 
 static inline
-int tc_action_net_init(struct tc_action_net *tn,
+int tc_action_net_init(struct net *net, struct tc_action_net *tn,
 		       const struct tc_action_ops *ops)
 {
 	int err = 0;
@@ -117,6 +118,7 @@ int tc_action_net_init(struct tc_action_net *tn,
 	if (!tn->idrinfo)
 		return -ENOMEM;
 	tn->ops = ops;
+	tn->idrinfo->net = net;
 	mutex_init(&tn->idrinfo->lock);
 	idr_init(&tn->idrinfo->action_idr);
 	return err;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index fd1f7e799e23..04b7bd4ec751 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -422,7 +422,7 @@ static __net_init int bpf_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 
-	return tc_action_net_init(tn, &act_bpf_ops);
+	return tc_action_net_init(net, tn, &act_bpf_ops);
 }
 
 static void __net_exit bpf_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 32ac04d77a45..2b43cacf82af 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -231,7 +231,7 @@ static __net_init int connmark_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
 
-	return tc_action_net_init(tn, &act_connmark_ops);
+	return tc_action_net_init(net, tn, &act_connmark_ops);
 }
 
 static void __net_exit connmark_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 9b9288267a54..d3cfad88dc3a 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -714,7 +714,7 @@ static __net_init int csum_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
 
-	return tc_action_net_init(tn, &act_csum_ops);
+	return tc_action_net_init(net, tn, &act_csum_ops);
 }
 
 static void __net_exit csum_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 33a1a7406e87..cdd6f3818097 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -939,7 +939,7 @@ static __net_init int ct_init_net(struct net *net)
 		tn->labels = true;
 	}
 
-	return tc_action_net_init(&tn->tn, &act_ct_ops);
+	return tc_action_net_init(net, &tn->tn, &act_ct_ops);
 }
 
 static void __net_exit ct_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 06ef74b74911..0dbcfd1dca7b 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -376,7 +376,7 @@ static __net_init int ctinfo_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
 
-	return tc_action_net_init(tn, &act_ctinfo_ops);
+	return tc_action_net_init(net, tn, &act_ctinfo_ops);
 }
 
 static void __net_exit ctinfo_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 8f0140c6ca58..324f1d1f6d47 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -278,7 +278,7 @@ static __net_init int gact_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 
-	return tc_action_net_init(tn, &act_gact_ops);
+	return tc_action_net_init(net, tn, &act_gact_ops);
 }
 
 static void __net_exit gact_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 92ee853d43e6..3a31e241c647 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -890,7 +890,7 @@ static __net_init int ife_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 
-	return tc_action_net_init(tn, &act_ife_ops);
+	return tc_action_net_init(net, tn, &act_ife_ops);
 }
 
 static void __net_exit ife_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index ce2c30a591d2..214a03d405cf 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -61,12 +61,13 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t,
 	return 0;
 }
 
-static void ipt_destroy_target(struct xt_entry_target *t)
+static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
 {
 	struct xt_tgdtor_param par = {
 		.target   = t->u.kernel.target,
 		.targinfo = t->data,
 		.family   = NFPROTO_IPV4,
+		.net      = net,
 	};
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
@@ -78,7 +79,7 @@ static void tcf_ipt_release(struct tc_action *a)
 	struct tcf_ipt *ipt = to_ipt(a);
 
 	if (ipt->tcfi_t) {
-		ipt_destroy_target(ipt->tcfi_t);
+		ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
 		kfree(ipt->tcfi_t);
 	}
 	kfree(ipt->tcfi_tname);
@@ -180,7 +181,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 
 	spin_lock_bh(&ipt->tcf_lock);
 	if (ret != ACT_P_CREATED) {
-		ipt_destroy_target(ipt->tcfi_t);
+		ipt_destroy_target(ipt->tcfi_t, net);
 		kfree(ipt->tcfi_tname);
 		kfree(ipt->tcfi_t);
 	}
@@ -350,7 +351,7 @@ static __net_init int ipt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, ipt_net_id);
 
-	return tc_action_net_init(tn, &act_ipt_ops);
+	return tc_action_net_init(net, tn, &act_ipt_ops);
 }
 
 static void __net_exit ipt_exit_net(struct list_head *net_list)
@@ -399,7 +400,7 @@ static __net_init int xt_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, xt_net_id);
 
-	return tc_action_net_init(tn, &act_xt_ops);
+	return tc_action_net_init(net, tn, &act_xt_ops);
 }
 
 static void __net_exit xt_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index be3f88dfc37e..9d1bf508075a 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -453,7 +453,7 @@ static __net_init int mirred_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
 
-	return tc_action_net_init(tn, &act_mirred_ops);
+	return tc_action_net_init(net, tn, &act_mirred_ops);
 }
 
 static void __net_exit mirred_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 0f299e3b618c..e168df0e008a 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -375,7 +375,7 @@ static __net_init int mpls_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, mpls_net_id);
 
-	return tc_action_net_init(tn, &act_mpls_ops);
+	return tc_action_net_init(net, tn, &act_mpls_ops);
 }
 
 static void __net_exit mpls_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7b858c11b1b5..ea4c5359e7df 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -327,7 +327,7 @@ static __net_init int nat_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 
-	return tc_action_net_init(tn, &act_nat_ops);
+	return tc_action_net_init(net, tn, &act_nat_ops);
 }
 
 static void __net_exit nat_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 17360c6faeaa..cdfaa79382a2 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -498,7 +498,7 @@ static __net_init int pedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 
-	return tc_action_net_init(tn, &act_pedit_ops);
+	return tc_action_net_init(net, tn, &act_pedit_ops);
 }
 
 static void __net_exit pedit_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 49cec3e64a4d..6315e0f8d26e 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -371,7 +371,7 @@ static __net_init int police_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
 
-	return tc_action_net_init(tn, &act_police_ops);
+	return tc_action_net_init(net, tn, &act_police_ops);
 }
 
 static void __net_exit police_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 595308d60133..7eff363f9f03 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -265,7 +265,7 @@ static __net_init int sample_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
 
-	return tc_action_net_init(tn, &act_sample_ops);
+	return tc_action_net_init(net, tn, &act_sample_ops);
 }
 
 static void __net_exit sample_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 33aefa25b545..6120e56117ca 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -232,7 +232,7 @@ static __net_init int simp_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 
-	return tc_action_net_init(tn, &act_simp_ops);
+	return tc_action_net_init(net, tn, &act_simp_ops);
 }
 
 static void __net_exit simp_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 37dced00b63d..6a8d3337c577 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -336,7 +336,7 @@ static __net_init int skbedit_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
 
-	return tc_action_net_init(tn, &act_skbedit_ops);
+	return tc_action_net_init(net, tn, &act_skbedit_ops);
 }
 
 static void __net_exit skbedit_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 7da3518e18ef..888437f97ba6 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -287,7 +287,7 @@ static __net_init int skbmod_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
 
-	return tc_action_net_init(tn, &act_skbmod_ops);
+	return tc_action_net_init(net, tn, &act_skbmod_ops);
 }
 
 static void __net_exit skbmod_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 6d0debdc9b97..2f83a79f76aa 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -600,7 +600,7 @@ static __net_init int tunnel_key_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
 
-	return tc_action_net_init(tn, &act_tunnel_key_ops);
+	return tc_action_net_init(net, tn, &act_tunnel_key_ops);
 }
 
 static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index a3c9eea1ee8a..287a30bf8930 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -334,7 +334,7 @@ static __net_init int vlan_init_net(struct net *net)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 
-	return tc_action_net_init(tn, &act_vlan_ops);
+	return tc_action_net_init(net, tn, &act_vlan_ops);
 }
 
 static void __net_exit vlan_exit_net(struct list_head *net_list)
-- 
cgit v1.2.3


From 10d274e880eb208ec6a76261a9f8f8155020f771 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 22 Aug 2019 22:52:12 -0700
Subject: bpf: introduce verifier internal test flag

Introduce BPF_F_TEST_STATE_FREQ flag to stress test parentage chain
and state pruning.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 1 +
 include/uapi/linux/bpf.h     | 3 +++
 kernel/bpf/syscall.c         | 1 +
 kernel/bpf/verifier.c        | 5 ++++-
 4 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5fe99f322b1c..26a6d58ca78c 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -355,6 +355,7 @@ struct bpf_verifier_env {
 	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
 	int stack_size;			/* number of states to be processed */
 	bool strict_alignment;		/* perform strict pointer alignment checks */
+	bool test_state_freq;		/* test verifier with different pruning frequency */
 	struct bpf_verifier_state *cur_state; /* current verifier state */
 	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
 	struct bpf_verifier_state_list *free_list;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b5889257cc33..5d2fb183ee2d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -285,6 +285,9 @@ enum bpf_attach_type {
  */
 #define BPF_F_TEST_RND_HI32	(1U << 2)
 
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ	(1U << 3)
+
 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have
  * two extensions:
  *
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c0f62fd67c6b..ca60eafa6922 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1629,6 +1629,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	if (attr->prog_flags & ~(BPF_F_STRICT_ALIGNMENT |
 				 BPF_F_ANY_ALIGNMENT |
+				 BPF_F_TEST_STATE_FREQ |
 				 BPF_F_TEST_RND_HI32))
 		return -EINVAL;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 10c0ff93f52b..f340cfe53c6e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7222,7 +7222,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state_list *sl, **pprev;
 	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err, states_cnt = 0;
-	bool add_new_state = false;
+	bool add_new_state = env->test_state_freq ? true : false;
 
 	cur->last_insn_idx = env->prev_insn_idx;
 	if (!env->insn_aux_data[insn_idx].prune_point)
@@ -9262,6 +9262,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 
 	env->allow_ptr_leaks = is_priv;
 
+	if (is_priv)
+		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
+
 	ret = replace_map_fd_with_map_ptr(env);
 	if (ret < 0)
 		goto skip_full_check;
-- 
cgit v1.2.3


From e65d45cc351ac5f1c11e6bac5669e536df753664 Mon Sep 17 00:00:00 2001
From: Vivien Didelot <vivien.didelot@gmail.com>
Date: Sun, 25 Aug 2019 13:25:15 -0400
Subject: net: dsa: remove bitmap operations

The bitmap operations were introduced to simplify the switch drivers
in the future, since most of them could implement the common VLAN and
MDB operations (add, del, dump) with simple functions taking all target
ports at once, and thus limiting the number of hardware accesses.

Programming an MDB or VLAN this way in a single operation would clearly
simplify the drivers a lot but would require a new get-set interface
in DSA. The usage of such bitmap from the stack also raised concerned
in the past, leading to the dynamic allocation of a new ds->_bitmap
member in the dsa_switch structure. So let's get rid of them for now.

This commit nicely wraps the ds->ops->port_{mdb,vlan}_{prepare,add}
switch operations into new dsa_switch_{mdb,vlan}_{prepare,add}
variants not using any bitmap argument anymore.

New dsa_switch_{mdb,vlan}_match helpers have been introduced to make
clear which local port of a switch must be programmed with the target
object. While the targeted user port is an obvious candidate, the
DSA links must also be programmed, as well as the CPU port for VLANs.

While at it, also remove local variables that are only used once.

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |   3 --
 net/dsa/dsa2.c    |  14 ------
 net/dsa/switch.c  | 132 ++++++++++++++++++++++++------------------------------
 3 files changed, 59 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 147b757ef8ea..96acb14ec1a8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -275,9 +275,6 @@ struct dsa_switch {
 	 */
 	bool			vlan_filtering;
 
-	unsigned long		*bitmap;
-	unsigned long		_bitmap;
-
 	/* Dynamically allocated ports, keep last */
 	size_t num_ports;
 	struct dsa_port ports[];
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 8c4eccb0cfe6..f8445fa73448 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -834,20 +834,6 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n)
 	if (!ds)
 		return NULL;
 
-	/* We avoid allocating memory outside dsa_switch
-	 * if it is not needed.
-	 */
-	if (n <= sizeof(ds->_bitmap) * 8) {
-		ds->bitmap = &ds->_bitmap;
-	} else {
-		ds->bitmap = devm_kcalloc(dev,
-					  BITS_TO_LONGS(n),
-					  sizeof(unsigned long),
-					  GFP_KERNEL);
-		if (unlikely(!ds->bitmap))
-			return NULL;
-	}
-
 	ds->dev = dev;
 	ds->num_ports = n;
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 09d9286b27cc..489eb7b430a4 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -128,57 +128,51 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds,
 	return ds->ops->port_fdb_del(ds, port, info->addr, info->vid);
 }
 
-static int
-dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds,
-			      const struct switchdev_obj_port_mdb *mdb,
-			      const unsigned long *bitmap)
+static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port,
+				 struct dsa_notifier_mdb_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_mdb_prepare(struct dsa_switch *ds,
+				  struct dsa_notifier_mdb_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = ds->ops->port_mdb_prepare(ds, port, mdb);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_mdb_match(ds, port, info)) {
+			err = ds->ops->port_mdb_prepare(ds, port, info->mdb);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
-				      const struct switchdev_obj_port_mdb *mdb,
-				      const unsigned long *bitmap)
-{
-	int port;
-
-	if (!ds->ops->port_mdb_add)
-		return;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_mdb_add(ds, port, mdb);
-}
-
 static int dsa_switch_mdb_add(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of Multicast group members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
-	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_mdb_prepare(ds, info);
 
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap);
+	if (!ds->ops->port_mdb_add)
+		return 0;
 
-	dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap);
+	for (port = 0; port < ds->num_ports; port++)
+		if (dsa_switch_mdb_match(ds, port, info))
+			ds->ops->port_mdb_add(ds, port, info->mdb);
 
 	return 0;
 }
@@ -186,13 +180,11 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds,
 static int dsa_switch_mdb_del(struct dsa_switch *ds,
 			      struct dsa_notifier_mdb_info *info)
 {
-	const struct switchdev_obj_port_mdb *mdb = info->mdb;
-
 	if (!ds->ops->port_mdb_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_mdb_del(ds, info->port, mdb);
+		return ds->ops->port_mdb_del(ds, info->port, info->mdb);
 
 	return 0;
 }
@@ -234,59 +226,55 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port,
 			     (void *)vlan);
 }
 
-static int
-dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds,
-			       const struct switchdev_obj_port_vlan *vlan,
-			       const unsigned long *bitmap)
+static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port,
+				  struct dsa_notifier_vlan_info *info)
+{
+	if (ds->index == info->sw_index && port == info->port)
+		return true;
+
+	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
+		return true;
+
+	return false;
+}
+
+static int dsa_switch_vlan_prepare(struct dsa_switch *ds,
+				   struct dsa_notifier_vlan_info *info)
 {
 	int port, err;
 
 	if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
 		return -EOPNOTSUPP;
 
-	for_each_set_bit(port, bitmap, ds->num_ports) {
-		err = dsa_port_vlan_check(ds, port, vlan);
-		if (err)
-			return err;
+	for (port = 0; port < ds->num_ports; port++) {
+		if (dsa_switch_vlan_match(ds, port, info)) {
+			err = dsa_port_vlan_check(ds, port, info->vlan);
+			if (err)
+				return err;
 
-		err = ds->ops->port_vlan_prepare(ds, port, vlan);
-		if (err)
-			return err;
+			err = ds->ops->port_vlan_prepare(ds, port, info->vlan);
+			if (err)
+				return err;
+		}
 	}
 
 	return 0;
 }
 
-static void
-dsa_switch_vlan_add_bitmap(struct dsa_switch *ds,
-			   const struct switchdev_obj_port_vlan *vlan,
-			   const unsigned long *bitmap)
-{
-	int port;
-
-	for_each_set_bit(port, bitmap, ds->num_ports)
-		ds->ops->port_vlan_add(ds, port, vlan);
-}
-
 static int dsa_switch_vlan_add(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-	struct switchdev_trans *trans = info->trans;
 	int port;
 
-	/* Build a mask of VLAN members */
-	bitmap_zero(ds->bitmap, ds->num_ports);
-	if (ds->index == info->sw_index)
-		set_bit(info->port, ds->bitmap);
-	for (port = 0; port < ds->num_ports; port++)
-		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))
-			set_bit(port, ds->bitmap);
+	if (switchdev_trans_ph_prepare(info->trans))
+		return dsa_switch_vlan_prepare(ds, info);
 
-	if (switchdev_trans_ph_prepare(trans))
-		return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap);
+	if (!ds->ops->port_vlan_add)
+		return 0;
 
-	dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap);
+	for (port = 0; port < ds->num_ports; port++)
+		if (dsa_switch_vlan_match(ds, port, info))
+			ds->ops->port_vlan_add(ds, port, info->vlan);
 
 	return 0;
 }
@@ -294,13 +282,11 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds,
 static int dsa_switch_vlan_del(struct dsa_switch *ds,
 			       struct dsa_notifier_vlan_info *info)
 {
-	const struct switchdev_obj_port_vlan *vlan = info->vlan;
-
 	if (!ds->ops->port_vlan_del)
 		return -EOPNOTSUPP;
 
 	if (ds->index == info->sw_index)
-		return ds->ops->port_vlan_del(ds, info->port, vlan);
+		return ds->ops->port_vlan_del(ds, info->port, info->vlan);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 94acaeb50ced653bfe2c4d8037c70b107af14124 Mon Sep 17 00:00:00 2001
From: Marco Hartmann <marco.hartmann@nxp.com>
Date: Wed, 21 Aug 2019 11:00:46 +0000
Subject: Add genphy_c45_config_aneg() function to phy-c45.c

Commit 34786005eca3 ("net: phy: prevent PHYs w/o Clause 22 regs from calling
genphy_config_aneg") introduced a check that aborts phy_config_aneg()
if the phy is a C45 phy.
This causes phy_state_machine() to call phy_error() so that the phy
ends up in PHY_HALTED state.

Instead of returning -EOPNOTSUPP, call genphy_c45_config_aneg()
(analogous to the C22 case) so that the state machine can run
correctly.

genphy_c45_config_aneg() closely resembles mv3310_config_aneg()
in drivers/net/phy/marvell10g.c, excluding vendor specific
configurations for 1000BaseT.

Fixes: 22b56e827093 ("net: phy: replace genphy_10g_driver with genphy_c45_driver")

Signed-off-by: Marco Hartmann <marco.hartmann@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 26 ++++++++++++++++++++++++++
 drivers/net/phy/phy.c     |  2 +-
 include/linux/phy.h       |  1 +
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 58bb25e4af10..7935593debb1 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -523,6 +523,32 @@ int genphy_c45_read_status(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_read_status);
 
+/**
+ * genphy_c45_config_aneg - restart auto-negotiation or forced setup
+ * @phydev: target phy_device struct
+ *
+ * Description: If auto-negotiation is enabled, we configure the
+ *   advertising, and then restart auto-negotiation.  If it is not
+ *   enabled, then we force a configuration.
+ */
+int genphy_c45_config_aneg(struct phy_device *phydev)
+{
+	bool changed = false;
+	int ret;
+
+	if (phydev->autoneg == AUTONEG_DISABLE)
+		return genphy_c45_pma_setup_forced(phydev);
+
+	ret = genphy_c45_an_config_aneg(phydev);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		changed = true;
+
+	return genphy_c45_check_and_restart_aneg(phydev, changed);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_config_aneg);
+
 /* The gen10g_* functions are the old Clause 45 stub */
 
 int gen10g_config_aneg(struct phy_device *phydev)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ef7aa738e0dc..6b0f89369b46 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -507,7 +507,7 @@ static int phy_config_aneg(struct phy_device *phydev)
 	 * allowed to call genphy_config_aneg()
 	 */
 	if (phydev->is_c45 && !(phydev->c45_ids.devices_in_package & BIT(0)))
-		return -EOPNOTSUPP;
+		return genphy_c45_config_aneg(phydev);
 
 	return genphy_config_aneg(phydev);
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 462b90b73f93..2fb9c8ffaf10 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1107,6 +1107,7 @@ int genphy_c45_an_disable_aneg(struct phy_device *phydev);
 int genphy_c45_read_mdix(struct phy_device *phydev);
 int genphy_c45_pma_read_abilities(struct phy_device *phydev);
 int genphy_c45_read_status(struct phy_device *phydev);
+int genphy_c45_config_aneg(struct phy_device *phydev);
 
 /* The gen10g_* functions are the old Clause 45 stub */
 int gen10g_config_aneg(struct phy_device *phydev);
-- 
cgit v1.2.3


From 1b0b8114b9549dee6490e728cd787f808b586158 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Aug 2019 16:30:02 +0800
Subject: sctp: make ecn flag per netns and endpoint

This patch is to add ecn flag for both netns_sctp and sctp_endpoint,
net->sctp.ecn_enable is set 1 by default, and ep->ecn_enable will
be initialized with net->sctp.ecn_enable.

asoc->peer.ecn_capable will be set during negotiation only when
ep->ecn_enable is set on both sides.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/sctp.h   |  3 +++
 include/net/sctp/structs.h |  3 ++-
 net/sctp/endpointola.c     |  1 +
 net/sctp/protocol.c        |  3 +++
 net/sctp/sm_make_chunk.c   | 16 ++++++++++++----
 5 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index 0db7fb3e4e15..bdc0f27b8514 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -128,6 +128,9 @@ struct netns_sctp {
 	/* Flag to indicate if stream interleave is enabled */
 	int intl_enable;
 
+	/* Flag to indicate if ecn is enabled */
+	int ecn_enable;
+
 	/*
 	 * Policy to control SCTP IPv4 address scoping
 	 * 0   - Disable IPv4 address scoping
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index daac1eff18c9..503fbc3cd819 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1322,7 +1322,8 @@ struct sctp_endpoint {
 	/* SCTP-AUTH: endpoint shared keys */
 	struct list_head endpoint_shared_keys;
 	__u16 active_key_id;
-	__u8  auth_enable:1,
+	__u8  ecn_enable:1,
+	      auth_enable:1,
 	      intl_enable:1,
 	      prsctp_enable:1,
 	      asconf_enable:1,
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 75a407df32c5..ea53049d1db6 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -106,6 +106,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	 */
 	ep->prsctp_enable = net->sctp.prsctp_enable;
 	ep->reconf_enable = net->sctp.reconf_enable;
+	ep->ecn_enable = net->sctp.ecn_enable;
 
 	/* Remember who we are attached to.  */
 	ep->base.sk = sk;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 2d47adcb4cbe..b48ffe845c31 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1254,6 +1254,9 @@ static int __net_init sctp_defaults_init(struct net *net)
 	/* Disable AUTH by default. */
 	net->sctp.auth_enable = 0;
 
+	/* Enable ECN by default. */
+	net->sctp.ecn_enable = 1;
+
 	/* Set SCOPE policy to enabled */
 	net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE;
 
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 338278f24c24..e41ed2e0ae7d 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -244,7 +244,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 
 	chunksize = sizeof(init) + addrs_len;
 	chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
-	chunksize += sizeof(ecap_param);
+
+	if (asoc->ep->ecn_enable)
+		chunksize += sizeof(ecap_param);
 
 	if (asoc->ep->prsctp_enable)
 		chunksize += sizeof(prsctp_param);
@@ -335,7 +337,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
 	sctp_addto_chunk(retval, sizeof(sat), &sat);
 	sctp_addto_chunk(retval, num_types * sizeof(__u16), &types);
 
-	sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
+	if (asoc->ep->ecn_enable)
+		sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param);
 
 	/* Add the supported extensions parameter.  Be nice and add this
 	 * fist before addiding the parameters for the extensions themselves
@@ -2597,8 +2600,13 @@ do_addr_param:
 		break;
 
 	case SCTP_PARAM_ECN_CAPABLE:
-		asoc->peer.ecn_capable = 1;
-		break;
+		if (asoc->ep->ecn_enable) {
+			asoc->peer.ecn_capable = 1;
+			break;
+		}
+		/* Fall Through */
+		goto fall_through;
+
 
 	case SCTP_PARAM_ADAPTATION_LAYER_IND:
 		asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind);
-- 
cgit v1.2.3


From d5886b919a720ff859aebf569cb0f353b1d977a6 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 26 Aug 2019 16:30:04 +0800
Subject: sctp: allow users to set ep ecn flag by sockopt

SCTP_ECN_SUPPORTED sockopt will be added to allow users to change
ep ecn flag, and it's similar with other feature flags.

Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/sctp.h |  1 +
 net/sctp/socket.c         | 73 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 62527aca8477..6d5b164af55c 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -136,6 +136,7 @@ typedef __s32 sctp_assoc_t;
 #define SCTP_EVENT	127
 #define SCTP_ASCONF_SUPPORTED	128
 #define SCTP_AUTH_SUPPORTED	129
+#define SCTP_ECN_SUPPORTED	130
 
 /* PR-SCTP policies */
 #define SCTP_PR_SCTP_NONE	0x0000
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 82bc25223cfe..3e50a9712fb1 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4560,6 +4560,34 @@ out:
 	return retval;
 }
 
+static int sctp_setsockopt_ecn_supported(struct sock *sk,
+					 char __user *optval,
+					 unsigned int optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EINVAL;
+
+	if (optlen != sizeof(params))
+		goto out;
+
+	if (copy_from_user(&params, optval, optlen)) {
+		retval = -EFAULT;
+		goto out;
+	}
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP))
+		goto out;
+
+	sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value;
+	retval = 0;
+
+out:
+	return retval;
+}
+
 /* API 6.2 setsockopt(), getsockopt()
  *
  * Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4766,6 +4794,9 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
 	case SCTP_AUTH_SUPPORTED:
 		retval = sctp_setsockopt_auth_supported(sk, optval, optlen);
 		break;
+	case SCTP_ECN_SUPPORTED:
+		retval = sctp_setsockopt_ecn_supported(sk, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
@@ -7828,6 +7859,45 @@ out:
 	return retval;
 }
 
+static int sctp_getsockopt_ecn_supported(struct sock *sk, int len,
+					 char __user *optval,
+					 int __user *optlen)
+{
+	struct sctp_assoc_value params;
+	struct sctp_association *asoc;
+	int retval = -EFAULT;
+
+	if (len < sizeof(params)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	len = sizeof(params);
+	if (copy_from_user(&params, optval, len))
+		goto out;
+
+	asoc = sctp_id2assoc(sk, params.assoc_id);
+	if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC &&
+	    sctp_style(sk, UDP)) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	params.assoc_value = asoc ? asoc->peer.ecn_capable
+				  : sctp_sk(sk)->ep->ecn_enable;
+
+	if (put_user(len, optlen))
+		goto out;
+
+	if (copy_to_user(optval, &params, len))
+		goto out;
+
+	retval = 0;
+
+out:
+	return retval;
+}
+
 static int sctp_getsockopt(struct sock *sk, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
@@ -8037,6 +8107,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
 		retval = sctp_getsockopt_auth_supported(sk, len, optval,
 							optlen);
 		break;
+	case SCTP_ECN_SUPPORTED:
+		retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen);
+		break;
 	default:
 		retval = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3


From 190f73ab4c43ecfc8e93843fe249efeff7d69a90 Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Tue, 27 Aug 2019 09:38:11 +0800
Subject: net: stmmac: setup higher frequency clk support for EHL & TGL

EHL DW EQOS is running on a 200MHz clock. Setting up stmmac-clk,
ptp clock and ptp_max_adj to 200MHz.

Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c | 21 +++++++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c |  3 +++
 include/linux/stmmac.h                           |  1 +
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index e969dc9bb9f0..20906287b6d4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -9,6 +9,7 @@
   Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
 *******************************************************************************/
 
+#include <linux/clk-provider.h>
 #include <linux/pci.h>
 #include <linux/dmi.h>
 
@@ -174,6 +175,19 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 	plat->axi->axi_blen[1] = 8;
 	plat->axi->axi_blen[2] = 16;
 
+	plat->ptp_max_adj = plat->clk_ptp_rate;
+
+	/* Set system clock */
+	plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev,
+						   "stmmac-clk", NULL, 0,
+						   plat->clk_ptp_rate);
+
+	if (IS_ERR(plat->stmmac_clk)) {
+		dev_warn(&pdev->dev, "Fail to register stmmac-clk\n");
+		plat->stmmac_clk = NULL;
+	}
+	clk_prepare_enable(plat->stmmac_clk);
+
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
 
@@ -193,6 +207,7 @@ static int ehl_common_data(struct pci_dev *pdev,
 
 	plat->rx_queues_to_use = 8;
 	plat->tx_queues_to_use = 8;
+	plat->clk_ptp_rate = 200000000;
 	ret = intel_mgbe_common_data(pdev, plat);
 	if (ret)
 		return ret;
@@ -233,6 +248,7 @@ static int tgl_common_data(struct pci_dev *pdev,
 
 	plat->rx_queues_to_use = 6;
 	plat->tx_queues_to_use = 4;
+	plat->clk_ptp_rate = 200000000;
 	ret = intel_mgbe_common_data(pdev, plat);
 	if (ret)
 		return ret;
@@ -438,10 +454,15 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
  */
 static void stmmac_pci_remove(struct pci_dev *pdev)
 {
+	struct net_device *ndev = dev_get_drvdata(&pdev->dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
 	int i;
 
 	stmmac_dvr_remove(&pdev->dev);
 
+	if (priv->plat->stmmac_clk)
+		clk_unregister_fixed_rate(priv->plat->stmmac_clk);
+
 	for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
 		if (pci_resource_len(pdev, i) == 0)
 			continue;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index c48224973a37..173493db038c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -194,6 +194,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv)
 		priv->pps[i].available = true;
 	}
 
+	if (priv->plat->ptp_max_adj)
+		stmmac_ptp_clock_ops.max_adj = priv->plat->ptp_max_adj;
+
 	stmmac_ptp_clock_ops.n_per_out = priv->dma_cap.pps_out_num;
 
 	spin_lock_init(&priv->ptp_lock);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 5cc6b6faf359..7ad7ae35cf88 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -168,6 +168,7 @@ struct plat_stmmacenet_data {
 	struct clk *clk_ptp_ref;
 	unsigned int clk_ptp_rate;
 	unsigned int clk_ref_rate;
+	s32 ptp_max_adj;
 	struct reset_control *stmmac_rst;
 	struct stmmac_axi *axi;
 	int has_gmac4;
-- 
cgit v1.2.3


From 00679b631eddaa0aa0ceba719fcb1f60c65da5a3 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@mellanox.com>
Date: Mon, 19 Aug 2019 15:08:13 +0300
Subject: net/mlx5: Set ODP capabilities for DC transport to max

In mlx5_core initialization, query max ODP capabilities for DC transport
from FW and set as current capabilities.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++++++
 include/linux/mlx5/mlx5_ifc.h                  | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fa0e991f1983..7f70ecb1db6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -495,6 +495,12 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.write);
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.read);
 	ODP_CAP_SET_MAX(dev, xrc_odp_caps.atomic);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.srq_receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.send);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.receive);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.write);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
+	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
 
 	if (do_set)
 		err = set_caps(dev, set_ctx, set_sz,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1e55cf73e88c..4e278114d8b3 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -948,7 +948,9 @@ struct mlx5_ifc_odp_cap_bits {
 
 	struct mlx5_ifc_odp_per_transport_service_cap_bits xrc_odp_caps;
 
-	u8         reserved_at_100[0x700];
+	struct mlx5_ifc_odp_per_transport_service_cap_bits dc_odp_caps;
+
+	u8         reserved_at_120[0x6E0];
 };
 
 struct mlx5_ifc_calc_op {
-- 
cgit v1.2.3


From d15ce412737accaba5e4c7d653b184772da47365 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 28 Aug 2019 14:20:13 +0530
Subject: ACPI: cpufreq: Switch to QoS requests instead of cpufreq notifier

The cpufreq core now takes the min/max frequency constraints via QoS
requests and the CPUFREQ_ADJUST notifier shall get removed later on.

Switch over to using the QoS request for maximum frequency constraint
for acpi driver.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_driver.c  |  39 +++++++++++++--
 drivers/acpi/processor_perflib.c | 100 +++++++++++++++------------------------
 drivers/acpi/processor_thermal.c |  84 ++++++++++++++++----------------
 include/acpi/processor.h         |  26 ++++++----
 4 files changed, 132 insertions(+), 117 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index aea8d674a33d..08da9c29f1e9 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -284,6 +284,29 @@ static int acpi_processor_stop(struct device *dev)
 	return 0;
 }
 
+bool acpi_processor_cpufreq_init;
+
+static int acpi_processor_notifier(struct notifier_block *nb,
+				   unsigned long event, void *data)
+{
+	struct cpufreq_policy *policy = data;
+	int cpu = policy->cpu;
+
+	if (event == CPUFREQ_CREATE_POLICY) {
+		acpi_thermal_cpufreq_init(cpu);
+		acpi_processor_ppc_init(cpu);
+	} else if (event == CPUFREQ_REMOVE_POLICY) {
+		acpi_processor_ppc_exit(cpu);
+		acpi_thermal_cpufreq_exit(cpu);
+	}
+
+	return 0;
+}
+
+static struct notifier_block acpi_processor_notifier_block = {
+	.notifier_call = acpi_processor_notifier,
+};
+
 /*
  * We keep the driver loaded even when ACPI is not running.
  * This is needed for the powernow-k8 driver, that works even without
@@ -310,8 +333,12 @@ static int __init acpi_processor_driver_init(void)
 	cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead",
 				  NULL, acpi_soft_cpu_dead);
 
-	acpi_thermal_cpufreq_init();
-	acpi_processor_ppc_init();
+	if (!cpufreq_register_notifier(&acpi_processor_notifier_block,
+				       CPUFREQ_POLICY_NOTIFIER)) {
+		acpi_processor_cpufreq_init = true;
+		acpi_processor_ignore_ppc_init();
+	}
+
 	acpi_processor_throttling_init();
 	return 0;
 err:
@@ -324,8 +351,12 @@ static void __exit acpi_processor_driver_exit(void)
 	if (acpi_disabled)
 		return;
 
-	acpi_processor_ppc_exit();
-	acpi_thermal_cpufreq_exit();
+	if (acpi_processor_cpufreq_init) {
+		cpufreq_unregister_notifier(&acpi_processor_notifier_block,
+					    CPUFREQ_POLICY_NOTIFIER);
+		acpi_processor_cpufreq_init = false;
+	}
+
 	cpuhp_remove_state_nocalls(hp_online);
 	cpuhp_remove_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD);
 	driver_unregister(&acpi_processor_driver);
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index ee87cb6f6e59..2261713d1aec 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -50,57 +50,13 @@ module_param(ignore_ppc, int, 0644);
 MODULE_PARM_DESC(ignore_ppc, "If the frequency of your machine gets wrongly" \
 		 "limited by BIOS, this should help");
 
-#define PPC_REGISTERED   1
-#define PPC_IN_USE       2
-
-static int acpi_processor_ppc_status;
-
-static int acpi_processor_ppc_notifier(struct notifier_block *nb,
-				       unsigned long event, void *data)
-{
-	struct cpufreq_policy *policy = data;
-	struct acpi_processor *pr;
-	unsigned int ppc = 0;
-
-	if (ignore_ppc < 0)
-		ignore_ppc = 0;
-
-	if (ignore_ppc)
-		return 0;
-
-	if (event != CPUFREQ_ADJUST)
-		return 0;
-
-	mutex_lock(&performance_mutex);
-
-	pr = per_cpu(processors, policy->cpu);
-	if (!pr || !pr->performance)
-		goto out;
-
-	ppc = (unsigned int)pr->performance_platform_limit;
-
-	if (ppc >= pr->performance->state_count)
-		goto out;
-
-	cpufreq_verify_within_limits(policy, 0,
-				     pr->performance->states[ppc].
-				     core_frequency * 1000);
-
-      out:
-	mutex_unlock(&performance_mutex);
-
-	return 0;
-}
-
-static struct notifier_block acpi_ppc_notifier_block = {
-	.notifier_call = acpi_processor_ppc_notifier,
-};
+static bool acpi_processor_ppc_in_use;
 
 static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 {
 	acpi_status status = 0;
 	unsigned long long ppc = 0;
-
+	int ret;
 
 	if (!pr)
 		return -EINVAL;
@@ -112,7 +68,7 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 	status = acpi_evaluate_integer(pr->handle, "_PPC", NULL, &ppc);
 
 	if (status != AE_NOT_FOUND)
-		acpi_processor_ppc_status |= PPC_IN_USE;
+		acpi_processor_ppc_in_use = true;
 
 	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PPC"));
@@ -124,6 +80,17 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
 
 	pr->performance_platform_limit = (int)ppc;
 
+	if (ppc >= pr->performance->state_count ||
+	    unlikely(!dev_pm_qos_request_active(&pr->perflib_req)))
+		return 0;
+
+	ret = dev_pm_qos_update_request(&pr->perflib_req,
+			pr->performance->states[ppc].core_frequency * 1000);
+	if (ret < 0) {
+		pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n",
+			pr->id, ret);
+	}
+
 	return 0;
 }
 
@@ -184,23 +151,32 @@ int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 }
 EXPORT_SYMBOL(acpi_processor_get_bios_limit);
 
-void acpi_processor_ppc_init(void)
+void acpi_processor_ignore_ppc_init(void)
 {
-	if (!cpufreq_register_notifier
-	    (&acpi_ppc_notifier_block, CPUFREQ_POLICY_NOTIFIER))
-		acpi_processor_ppc_status |= PPC_REGISTERED;
-	else
-		printk(KERN_DEBUG
-		       "Warning: Processor Platform Limit not supported.\n");
+	if (ignore_ppc < 0)
+		ignore_ppc = 0;
+}
+
+void acpi_processor_ppc_init(int cpu)
+{
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+	int ret;
+
+	ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+				     &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY,
+				     INT_MAX);
+	if (ret < 0) {
+		pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
+		       ret);
+		return;
+	}
 }
 
-void acpi_processor_ppc_exit(void)
+void acpi_processor_ppc_exit(int cpu)
 {
-	if (acpi_processor_ppc_status & PPC_REGISTERED)
-		cpufreq_unregister_notifier(&acpi_ppc_notifier_block,
-					    CPUFREQ_POLICY_NOTIFIER);
+	struct acpi_processor *pr = per_cpu(processors, cpu);
 
-	acpi_processor_ppc_status &= ~PPC_REGISTERED;
+	dev_pm_qos_remove_request(&pr->perflib_req);
 }
 
 static int acpi_processor_get_performance_control(struct acpi_processor *pr)
@@ -477,7 +453,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
 	static int is_done = 0;
 	int result;
 
-	if (!(acpi_processor_ppc_status & PPC_REGISTERED))
+	if (!acpi_processor_cpufreq_init)
 		return -EBUSY;
 
 	if (!try_module_get(calling_module))
@@ -513,7 +489,7 @@ int acpi_processor_notify_smm(struct module *calling_module)
 	 * we can allow the cpufreq driver to be rmmod'ed. */
 	is_done = 1;
 
-	if (!(acpi_processor_ppc_status & PPC_IN_USE))
+	if (!acpi_processor_ppc_in_use)
 		module_put(calling_module);
 
 	return 0;
@@ -742,7 +718,7 @@ acpi_processor_register_performance(struct acpi_processor_performance
 {
 	struct acpi_processor *pr;
 
-	if (!(acpi_processor_ppc_status & PPC_REGISTERED))
+	if (!acpi_processor_cpufreq_init)
 		return -EINVAL;
 
 	mutex_lock(&performance_mutex);
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 50fb0107375e..ec2638f1df4f 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -35,7 +35,6 @@ ACPI_MODULE_NAME("processor_thermal");
 #define CPUFREQ_THERMAL_MAX_STEP 3
 
 static DEFINE_PER_CPU(unsigned int, cpufreq_thermal_reduction_pctg);
-static unsigned int acpi_thermal_cpufreq_is_init = 0;
 
 #define reduction_pctg(cpu) \
 	per_cpu(cpufreq_thermal_reduction_pctg, phys_package_first_cpu(cpu))
@@ -61,35 +60,11 @@ static int phys_package_first_cpu(int cpu)
 static int cpu_has_cpufreq(unsigned int cpu)
 {
 	struct cpufreq_policy policy;
-	if (!acpi_thermal_cpufreq_is_init || cpufreq_get_policy(&policy, cpu))
+	if (!acpi_processor_cpufreq_init || cpufreq_get_policy(&policy, cpu))
 		return 0;
 	return 1;
 }
 
-static int acpi_thermal_cpufreq_notifier(struct notifier_block *nb,
-					 unsigned long event, void *data)
-{
-	struct cpufreq_policy *policy = data;
-	unsigned long max_freq = 0;
-
-	if (event != CPUFREQ_ADJUST)
-		goto out;
-
-	max_freq = (
-	    policy->cpuinfo.max_freq *
-	    (100 - reduction_pctg(policy->cpu) * 20)
-	) / 100;
-
-	cpufreq_verify_within_limits(policy, 0, max_freq);
-
-      out:
-	return 0;
-}
-
-static struct notifier_block acpi_thermal_cpufreq_notifier_block = {
-	.notifier_call = acpi_thermal_cpufreq_notifier,
-};
-
 static int cpufreq_get_max_state(unsigned int cpu)
 {
 	if (!cpu_has_cpufreq(cpu))
@@ -108,7 +83,10 @@ static int cpufreq_get_cur_state(unsigned int cpu)
 
 static int cpufreq_set_cur_state(unsigned int cpu, int state)
 {
-	int i;
+	struct cpufreq_policy *policy;
+	struct acpi_processor *pr;
+	unsigned long max_freq;
+	int i, ret;
 
 	if (!cpu_has_cpufreq(cpu))
 		return 0;
@@ -121,33 +99,53 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
 	 * frequency.
 	 */
 	for_each_online_cpu(i) {
-		if (topology_physical_package_id(i) ==
+		if (topology_physical_package_id(i) !=
 		    topology_physical_package_id(cpu))
-			cpufreq_update_policy(i);
+			continue;
+
+		pr = per_cpu(processors, i);
+
+		if (unlikely(!dev_pm_qos_request_active(&pr->thermal_req)))
+			continue;
+
+		policy = cpufreq_cpu_get(i);
+		if (!policy)
+			return -EINVAL;
+
+		max_freq = (policy->cpuinfo.max_freq * (100 - reduction_pctg(i) * 20)) / 100;
+
+		cpufreq_cpu_put(policy);
+
+		ret = dev_pm_qos_update_request(&pr->thermal_req, max_freq);
+		if (ret < 0) {
+			pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n",
+				pr->id, ret);
+		}
 	}
 	return 0;
 }
 
-void acpi_thermal_cpufreq_init(void)
+void acpi_thermal_cpufreq_init(int cpu)
 {
-	int i;
-
-	i = cpufreq_register_notifier(&acpi_thermal_cpufreq_notifier_block,
-				      CPUFREQ_POLICY_NOTIFIER);
-	if (!i)
-		acpi_thermal_cpufreq_is_init = 1;
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+	int ret;
+
+	ret = dev_pm_qos_add_request(get_cpu_device(cpu),
+				     &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY,
+				     INT_MAX);
+	if (ret < 0) {
+		pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
+		       ret);
+		return;
+	}
 }
 
-void acpi_thermal_cpufreq_exit(void)
+void acpi_thermal_cpufreq_exit(int cpu)
 {
-	if (acpi_thermal_cpufreq_is_init)
-		cpufreq_unregister_notifier
-		    (&acpi_thermal_cpufreq_notifier_block,
-		     CPUFREQ_POLICY_NOTIFIER);
+	struct acpi_processor *pr = per_cpu(processors, cpu);
 
-	acpi_thermal_cpufreq_is_init = 0;
+	dev_pm_qos_remove_request(&pr->thermal_req);
 }
-
 #else				/* ! CONFIG_CPU_FREQ */
 static int cpufreq_get_max_state(unsigned int cpu)
 {
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 1194a4c78d55..f936033cb9e6 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -4,6 +4,8 @@
 
 #include <linux/kernel.h>
 #include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/pm_qos.h>
 #include <linux/thermal.h>
 #include <asm/acpi.h>
 
@@ -230,6 +232,8 @@ struct acpi_processor {
 	struct acpi_processor_limit limit;
 	struct thermal_cooling_device *cdev;
 	struct device *dev; /* Processor device. */
+	struct dev_pm_qos_request perflib_req;
+	struct dev_pm_qos_request thermal_req;
 };
 
 struct acpi_processor_errata {
@@ -296,16 +300,22 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
 /* in processor_perflib.c */
 
 #ifdef CONFIG_CPU_FREQ
-void acpi_processor_ppc_init(void);
-void acpi_processor_ppc_exit(void);
+extern bool acpi_processor_cpufreq_init;
+void acpi_processor_ignore_ppc_init(void);
+void acpi_processor_ppc_init(int cpu);
+void acpi_processor_ppc_exit(int cpu);
 void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag);
 extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit);
 #else
-static inline void acpi_processor_ppc_init(void)
+static inline void acpi_processor_ignore_ppc_init(void)
 {
 	return;
 }
-static inline void acpi_processor_ppc_exit(void)
+static inline void acpi_processor_ppc_init(int cpu)
+{
+	return;
+}
+static inline void acpi_processor_ppc_exit(int cpu)
 {
 	return;
 }
@@ -421,14 +431,14 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr)
 int acpi_processor_get_limit_info(struct acpi_processor *pr);
 extern const struct thermal_cooling_device_ops processor_cooling_ops;
 #if defined(CONFIG_ACPI_CPU_FREQ_PSS) & defined(CONFIG_CPU_FREQ)
-void acpi_thermal_cpufreq_init(void);
-void acpi_thermal_cpufreq_exit(void);
+void acpi_thermal_cpufreq_init(int cpu);
+void acpi_thermal_cpufreq_exit(int cpu);
 #else
-static inline void acpi_thermal_cpufreq_init(void)
+static inline void acpi_thermal_cpufreq_init(int cpu)
 {
 	return;
 }
-static inline void acpi_thermal_cpufreq_exit(void)
+static inline void acpi_thermal_cpufreq_exit(int cpu)
 {
 	return;
 }
-- 
cgit v1.2.3


From ab43762ef010967e4ccd53627f70a2eecbeafefb Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 6 Aug 2019 11:46:00 +0300
Subject: perf: Allow normal events to output AUX data

In some cases, ordinary (non-AUX) events can generate data for AUX events.
For example, PEBS events can come out as records in the Intel PT stream
instead of their usual DS records, if configured to do so.

One requirement for such events is to consistently schedule together, to
ensure that the data from the "AUX output" events isn't lost while their
corresponding AUX event is not scheduled. We use grouping to provide this
guarantee: an "AUX output" event can be added to a group where an AUX event
is a group leader, and provided that the former supports writing to the
latter.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: kan.liang@linux.intel.com
Link: https://lkml.kernel.org/r/20190806084606.4021-2-alexander.shishkin@linux.intel.com
---
 include/linux/perf_event.h      | 14 +++++++
 include/uapi/linux/perf_event.h |  3 +-
 kernel/events/core.c            | 93 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e8ad3c590a23..61448c19a132 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -246,6 +246,7 @@ struct perf_event;
 #define PERF_PMU_CAP_ITRACE			0x20
 #define PERF_PMU_CAP_HETEROGENEOUS_CPUS		0x40
 #define PERF_PMU_CAP_NO_EXCLUDE			0x80
+#define PERF_PMU_CAP_AUX_OUTPUT			0x100
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -446,6 +447,16 @@ struct pmu {
 	void (*addr_filters_sync)	(struct perf_event *event);
 					/* optional */
 
+	/*
+	 * Check if event can be used for aux_output purposes for
+	 * events of this PMU.
+	 *
+	 * Runs from perf_event_open(). Should return 0 for "no match"
+	 * or non-zero for "match".
+	 */
+	int (*aux_output_match)		(struct perf_event *event);
+					/* optional */
+
 	/*
 	 * Filter events for PMU-specific reasons.
 	 */
@@ -681,6 +692,9 @@ struct perf_event {
 	struct perf_addr_filter_range	*addr_filter_ranges;
 	unsigned long			addr_filters_gen;
 
+	/* for aux_output events */
+	struct perf_event		*aux_event;
+
 	void (*destroy)(struct perf_event *);
 	struct rcu_head			rcu_head;
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 7198ddd0c6b1..bb7b271397a6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -374,7 +374,8 @@ struct perf_event_attr {
 				namespaces     :  1, /* include namespaces data */
 				ksymbol        :  1, /* include ksymbol events */
 				bpf_event      :  1, /* include bpf events */
-				__reserved_1   : 33;
+				aux_output     :  1, /* generate AUX records instead of events */
+				__reserved_1   : 32;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0463c1151bae..2aad959e6def 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1887,6 +1887,89 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	ctx->generation++;
 }
 
+static int
+perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
+{
+	if (!has_aux(aux_event))
+		return 0;
+
+	if (!event->pmu->aux_output_match)
+		return 0;
+
+	return event->pmu->aux_output_match(aux_event);
+}
+
+static void put_event(struct perf_event *event);
+static void event_sched_out(struct perf_event *event,
+			    struct perf_cpu_context *cpuctx,
+			    struct perf_event_context *ctx);
+
+static void perf_put_aux_event(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct perf_event *iter;
+
+	/*
+	 * If event uses aux_event tear down the link
+	 */
+	if (event->aux_event) {
+		iter = event->aux_event;
+		event->aux_event = NULL;
+		put_event(iter);
+		return;
+	}
+
+	/*
+	 * If the event is an aux_event, tear down all links to
+	 * it from other events.
+	 */
+	for_each_sibling_event(iter, event->group_leader) {
+		if (iter->aux_event != event)
+			continue;
+
+		iter->aux_event = NULL;
+		put_event(event);
+
+		/*
+		 * If it's ACTIVE, schedule it out and put it into ERROR
+		 * state so that we don't try to schedule it again. Note
+		 * that perf_event_enable() will clear the ERROR status.
+		 */
+		event_sched_out(iter, cpuctx, ctx);
+		perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+	}
+}
+
+static int perf_get_aux_event(struct perf_event *event,
+			      struct perf_event *group_leader)
+{
+	/*
+	 * Our group leader must be an aux event if we want to be
+	 * an aux_output. This way, the aux event will precede its
+	 * aux_output events in the group, and therefore will always
+	 * schedule first.
+	 */
+	if (!group_leader)
+		return 0;
+
+	if (!perf_aux_output_match(event, group_leader))
+		return 0;
+
+	if (!atomic_long_inc_not_zero(&group_leader->refcount))
+		return 0;
+
+	/*
+	 * Link aux_outputs to their aux event; this is undone in
+	 * perf_group_detach() by perf_put_aux_event(). When the
+	 * group in torn down, the aux_output events loose their
+	 * link to the aux_event and can't schedule any more.
+	 */
+	event->aux_event = group_leader;
+
+	return 1;
+}
+
 static void perf_group_detach(struct perf_event *event)
 {
 	struct perf_event *sibling, *tmp;
@@ -1902,6 +1985,8 @@ static void perf_group_detach(struct perf_event *event)
 
 	event->attach_state &= ~PERF_ATTACH_GROUP;
 
+	perf_put_aux_event(event);
+
 	/*
 	 * If this is a sibling, remove it from its group.
 	 */
@@ -10426,6 +10511,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		goto err_ns;
 	}
 
+	if (event->attr.aux_output &&
+	    !(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
+		err = -EOPNOTSUPP;
+		goto err_pmu;
+	}
+
 	err = exclusive_event_init(event);
 	if (err)
 		goto err_pmu;
@@ -11082,6 +11173,8 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
+	if (event->attr.aux_output && !perf_get_aux_event(event, group_leader))
+		goto err_locked;
 
 	/*
 	 * Must be under the same ctx::mutex as perf_install_in_context(),
-- 
cgit v1.2.3


From 92b6d38f1a3d59fb43d36c2d9681b9b8f1e46c05 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Thu, 22 Aug 2019 16:28:15 +0300
Subject: mtd: spi-nor: Remove unused macro

Remove leftover from nor->cmd_buf.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 include/linux/mtd/spi-nor.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 63560b375168..3075ac73b171 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -225,7 +225,6 @@ static inline u8 spi_nor_get_protocol_width(enum spi_nor_protocol proto)
 	return spi_nor_get_protocol_data_nbits(proto);
 }
 
-#define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
 	SPI_NOR_OPS_WRITE,
-- 
cgit v1.2.3


From 47599127a2e8c42843e27fc370e63cc5344a2a38 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 23 Aug 2019 15:53:35 +0000
Subject: mtd: spi-nor: Regroup flash parameter and settings

The scope is to move all [FLASH-SPECIFIC] parameters and settings
from 'struct spi_nor' to 'struct spi_nor_flash_parameter'.

'struct spi_nor_flash_parameter' describes the hardware capabilities
and associated settings of the SPI NOR flash memory. It includes
legacy flash parameters and settings that can be overwritten by the
spi_nor_fixups hooks, or dynamically when parsing the JESD216
Serial Flash Discoverable Parameters (SFDP) tables. All SFDP params
and settings will fit inside 'struct spi_nor_flash_parameter'.

Move spi_nor_hwcaps related code to avoid forward declarations.
Add a forward declaration that we can't avoid: 'struct spi_nor' will
be used in 'struct spi_nor_flash_parameter'.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c |  65 ------------
 include/linux/mtd/spi-nor.h   | 239 +++++++++++++++++++++++++++++-------------
 2 files changed, 164 insertions(+), 140 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 0597cb8257b0..d35dc6a97521 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -40,71 +40,6 @@
 #define SPI_NOR_MAX_ID_LEN	6
 #define SPI_NOR_MAX_ADDR_WIDTH	4
 
-struct spi_nor_read_command {
-	u8			num_mode_clocks;
-	u8			num_wait_states;
-	u8			opcode;
-	enum spi_nor_protocol	proto;
-};
-
-struct spi_nor_pp_command {
-	u8			opcode;
-	enum spi_nor_protocol	proto;
-};
-
-enum spi_nor_read_command_index {
-	SNOR_CMD_READ,
-	SNOR_CMD_READ_FAST,
-	SNOR_CMD_READ_1_1_1_DTR,
-
-	/* Dual SPI */
-	SNOR_CMD_READ_1_1_2,
-	SNOR_CMD_READ_1_2_2,
-	SNOR_CMD_READ_2_2_2,
-	SNOR_CMD_READ_1_2_2_DTR,
-
-	/* Quad SPI */
-	SNOR_CMD_READ_1_1_4,
-	SNOR_CMD_READ_1_4_4,
-	SNOR_CMD_READ_4_4_4,
-	SNOR_CMD_READ_1_4_4_DTR,
-
-	/* Octal SPI */
-	SNOR_CMD_READ_1_1_8,
-	SNOR_CMD_READ_1_8_8,
-	SNOR_CMD_READ_8_8_8,
-	SNOR_CMD_READ_1_8_8_DTR,
-
-	SNOR_CMD_READ_MAX
-};
-
-enum spi_nor_pp_command_index {
-	SNOR_CMD_PP,
-
-	/* Quad SPI */
-	SNOR_CMD_PP_1_1_4,
-	SNOR_CMD_PP_1_4_4,
-	SNOR_CMD_PP_4_4_4,
-
-	/* Octal SPI */
-	SNOR_CMD_PP_1_1_8,
-	SNOR_CMD_PP_1_8_8,
-	SNOR_CMD_PP_8_8_8,
-
-	SNOR_CMD_PP_MAX
-};
-
-struct spi_nor_flash_parameter {
-	u64				size;
-	u32				page_size;
-
-	struct spi_nor_hwcaps		hwcaps;
-	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
-	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
-
-	int (*quad_enable)(struct spi_nor *nor);
-};
-
 struct sfdp_parameter_header {
 	u8		id_lsb;
 	u8		minor;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 3075ac73b171..77ba692d9348 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -333,6 +333,165 @@ struct spi_nor_erase_map {
 	u8				uniform_erase_type;
 };
 
+/**
+ * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
+ * supported by the SPI controller (bus master).
+ * @mask:		the bitmask listing all the supported hw capabilies
+ */
+struct spi_nor_hwcaps {
+	u32	mask;
+};
+
+/*
+ *(Fast) Read capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * As a matter of performances, it is relevant to use Octal SPI protocols first,
+ * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
+ * (Slow) Read.
+ */
+#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
+#define SNOR_HWCAPS_READ		BIT(0)
+#define SNOR_HWCAPS_READ_FAST		BIT(1)
+#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
+
+#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
+#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
+#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
+#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
+#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
+
+#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
+#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
+#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
+#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
+#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
+
+#define SNOR_HWCAPS_READ_OCTAL		GENMASK(14, 11)
+#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
+#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
+#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
+#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
+
+/*
+ * Page Program capabilities.
+ * MUST be ordered by priority: the higher bit position, the higher priority.
+ * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the
+ * legacy SPI 1-1-1 protocol.
+ * Note that Dual Page Programs are not supported because there is no existing
+ * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
+ * implements such commands.
+ */
+#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
+#define SNOR_HWCAPS_PP		BIT(16)
+
+#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
+#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
+#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
+#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
+
+#define SNOR_HWCAPS_PP_OCTAL	GENMASK(22, 20)
+#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
+#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
+#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
+
+#define SNOR_HWCAPS_X_X_X	(SNOR_HWCAPS_READ_2_2_2 |	\
+				 SNOR_HWCAPS_READ_4_4_4 |	\
+				 SNOR_HWCAPS_READ_8_8_8 |	\
+				 SNOR_HWCAPS_PP_4_4_4 |		\
+				 SNOR_HWCAPS_PP_8_8_8)
+
+#define SNOR_HWCAPS_DTR		(SNOR_HWCAPS_READ_1_1_1_DTR |	\
+				 SNOR_HWCAPS_READ_1_2_2_DTR |	\
+				 SNOR_HWCAPS_READ_1_4_4_DTR |	\
+				 SNOR_HWCAPS_READ_1_8_8_DTR)
+
+#define SNOR_HWCAPS_ALL		(SNOR_HWCAPS_READ_MASK |	\
+				 SNOR_HWCAPS_PP_MASK)
+
+struct spi_nor_read_command {
+	u8			num_mode_clocks;
+	u8			num_wait_states;
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+struct spi_nor_pp_command {
+	u8			opcode;
+	enum spi_nor_protocol	proto;
+};
+
+enum spi_nor_read_command_index {
+	SNOR_CMD_READ,
+	SNOR_CMD_READ_FAST,
+	SNOR_CMD_READ_1_1_1_DTR,
+
+	/* Dual SPI */
+	SNOR_CMD_READ_1_1_2,
+	SNOR_CMD_READ_1_2_2,
+	SNOR_CMD_READ_2_2_2,
+	SNOR_CMD_READ_1_2_2_DTR,
+
+	/* Quad SPI */
+	SNOR_CMD_READ_1_1_4,
+	SNOR_CMD_READ_1_4_4,
+	SNOR_CMD_READ_4_4_4,
+	SNOR_CMD_READ_1_4_4_DTR,
+
+	/* Octal SPI */
+	SNOR_CMD_READ_1_1_8,
+	SNOR_CMD_READ_1_8_8,
+	SNOR_CMD_READ_8_8_8,
+	SNOR_CMD_READ_1_8_8_DTR,
+
+	SNOR_CMD_READ_MAX
+};
+
+enum spi_nor_pp_command_index {
+	SNOR_CMD_PP,
+
+	/* Quad SPI */
+	SNOR_CMD_PP_1_1_4,
+	SNOR_CMD_PP_1_4_4,
+	SNOR_CMD_PP_4_4_4,
+
+	/* Octal SPI */
+	SNOR_CMD_PP_1_1_8,
+	SNOR_CMD_PP_1_8_8,
+	SNOR_CMD_PP_8_8_8,
+
+	SNOR_CMD_PP_MAX
+};
+
+/* Forward declaration that will be used in 'struct spi_nor_flash_parameter' */
+struct spi_nor;
+
+/**
+ * struct spi_nor_flash_parameter - SPI NOR flash parameters and settings.
+ * Includes legacy flash parameters and settings that can be overwritten
+ * by the spi_nor_fixups hooks, or dynamically when parsing the JESD216
+ * Serial Flash Discoverable Parameters (SFDP) tables.
+ *
+ * @size:		the flash memory density in bytes.
+ * @page_size:		the page size of the SPI NOR flash memory.
+ * @hwcaps:		describes the read and page program hardware
+ *			capabilities.
+ * @reads:		read capabilities ordered by priority: the higher index
+ *                      in the array, the higher priority.
+ * @page_programs:	page program capabilities ordered by priority: the
+ *                      higher index in the array, the higher priority.
+ * @quad_enable:	enables SPI NOR quad mode.
+ */
+struct spi_nor_flash_parameter {
+	u64				size;
+	u32				page_size;
+
+	struct spi_nor_hwcaps		hwcaps;
+	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
+	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
+
+	int (*quad_enable)(struct spi_nor *nor);
+};
+
 /**
  * struct flash_info - Forward declaration of a structure used internally by
  *		       spi_nor_scan()
@@ -379,6 +538,10 @@ struct flash_info;
  * @quad_enable:	[FLASH-SPECIFIC] enables SPI NOR quad mode
  * @clear_sr_bp:	[FLASH-SPECIFIC] clears the Block Protection Bits from
  *			the SPI NOR Status Register.
+ * @params:		[FLASH-SPECIFIC] SPI-NOR flash parameters and settings.
+ *                      The structure includes legacy flash parameters and
+ *                      settings that can be overwritten by the spi_nor_fixups
+ *                      hooks, or dynamically when parsing the SFDP tables.
  * @priv:		the private data
  */
 struct spi_nor {
@@ -418,6 +581,7 @@ struct spi_nor {
 	int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*quad_enable)(struct spi_nor *nor);
 	int (*clear_sr_bp)(struct spi_nor *nor);
+	struct spi_nor_flash_parameter params;
 
 	void *priv;
 };
@@ -462,81 +626,6 @@ static inline struct device_node *spi_nor_get_flash_node(struct spi_nor *nor)
 	return mtd_get_of_node(&nor->mtd);
 }
 
-/**
- * struct spi_nor_hwcaps - Structure for describing the hardware capabilies
- * supported by the SPI controller (bus master).
- * @mask:		the bitmask listing all the supported hw capabilies
- */
-struct spi_nor_hwcaps {
-	u32	mask;
-};
-
-/*
- *(Fast) Read capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * As a matter of performances, it is relevant to use Octal SPI protocols first,
- * then Quad SPI protocols before Dual SPI protocols, Fast Read and lastly
- * (Slow) Read.
- */
-#define SNOR_HWCAPS_READ_MASK		GENMASK(14, 0)
-#define SNOR_HWCAPS_READ		BIT(0)
-#define SNOR_HWCAPS_READ_FAST		BIT(1)
-#define SNOR_HWCAPS_READ_1_1_1_DTR	BIT(2)
-
-#define SNOR_HWCAPS_READ_DUAL		GENMASK(6, 3)
-#define SNOR_HWCAPS_READ_1_1_2		BIT(3)
-#define SNOR_HWCAPS_READ_1_2_2		BIT(4)
-#define SNOR_HWCAPS_READ_2_2_2		BIT(5)
-#define SNOR_HWCAPS_READ_1_2_2_DTR	BIT(6)
-
-#define SNOR_HWCAPS_READ_QUAD		GENMASK(10, 7)
-#define SNOR_HWCAPS_READ_1_1_4		BIT(7)
-#define SNOR_HWCAPS_READ_1_4_4		BIT(8)
-#define SNOR_HWCAPS_READ_4_4_4		BIT(9)
-#define SNOR_HWCAPS_READ_1_4_4_DTR	BIT(10)
-
-#define SNOR_HWCAPS_READ_OCTAL		GENMASK(14, 11)
-#define SNOR_HWCAPS_READ_1_1_8		BIT(11)
-#define SNOR_HWCAPS_READ_1_8_8		BIT(12)
-#define SNOR_HWCAPS_READ_8_8_8		BIT(13)
-#define SNOR_HWCAPS_READ_1_8_8_DTR	BIT(14)
-
-/*
- * Page Program capabilities.
- * MUST be ordered by priority: the higher bit position, the higher priority.
- * Like (Fast) Read capabilities, Octal/Quad SPI protocols are preferred to the
- * legacy SPI 1-1-1 protocol.
- * Note that Dual Page Programs are not supported because there is no existing
- * JEDEC/SFDP standard to define them. Also at this moment no SPI flash memory
- * implements such commands.
- */
-#define SNOR_HWCAPS_PP_MASK	GENMASK(22, 16)
-#define SNOR_HWCAPS_PP		BIT(16)
-
-#define SNOR_HWCAPS_PP_QUAD	GENMASK(19, 17)
-#define SNOR_HWCAPS_PP_1_1_4	BIT(17)
-#define SNOR_HWCAPS_PP_1_4_4	BIT(18)
-#define SNOR_HWCAPS_PP_4_4_4	BIT(19)
-
-#define SNOR_HWCAPS_PP_OCTAL	GENMASK(22, 20)
-#define SNOR_HWCAPS_PP_1_1_8	BIT(20)
-#define SNOR_HWCAPS_PP_1_8_8	BIT(21)
-#define SNOR_HWCAPS_PP_8_8_8	BIT(22)
-
-#define SNOR_HWCAPS_X_X_X	(SNOR_HWCAPS_READ_2_2_2 |	\
-				 SNOR_HWCAPS_READ_4_4_4 |	\
-				 SNOR_HWCAPS_READ_8_8_8 |	\
-				 SNOR_HWCAPS_PP_4_4_4 |		\
-				 SNOR_HWCAPS_PP_8_8_8)
-
-#define SNOR_HWCAPS_DTR		(SNOR_HWCAPS_READ_1_1_1_DTR |	\
-				 SNOR_HWCAPS_READ_1_2_2_DTR |	\
-				 SNOR_HWCAPS_READ_1_4_4_DTR |	\
-				 SNOR_HWCAPS_READ_1_8_8_DTR)
-
-#define SNOR_HWCAPS_ALL		(SNOR_HWCAPS_READ_MASK |	\
-				 SNOR_HWCAPS_PP_MASK)
-
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
-- 
cgit v1.2.3


From 42f5994724bcbcdbeb743dcc3e2756ec582cb86b Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 23 Aug 2019 15:53:39 +0000
Subject: mtd: spi-nor: Drop quad_enable() from 'struct spi-nor'

All flash parameters and settings should reside inside
'struct spi_nor_flash_parameter'. Drop the local copy of
quad_enable() and use the one from 'struct spi_nor_flash_parameter'.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 40 +++++++++++++++++++++++-----------------
 include/linux/mtd/spi-nor.h   |  2 --
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index e9b9cd70a999..effda372cb33 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -4403,7 +4403,6 @@ static int spi_nor_setup(struct spi_nor *nor,
 {
 	struct spi_nor_flash_parameter *params = &nor->params;
 	u32 ignored_mask, shared_mask;
-	bool enable_quad_io;
 	int err;
 
 	/*
@@ -4457,23 +4456,33 @@ static int spi_nor_setup(struct spi_nor *nor,
 		return err;
 	}
 
-	/* Enable Quad I/O if needed. */
-	enable_quad_io = (spi_nor_get_protocol_width(nor->read_proto) == 4 ||
-			  spi_nor_get_protocol_width(nor->write_proto) == 4);
-	if (enable_quad_io && params->quad_enable)
-		nor->quad_enable = params->quad_enable;
-	else
-		nor->quad_enable = NULL;
-
 	return 0;
 }
 
+/**
+ * spi_nor_quad_enable() - enable Quad I/O if needed.
+ * @nor:                pointer to a 'struct spi_nor'
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_quad_enable(struct spi_nor *nor)
+{
+	if (!nor->params.quad_enable)
+		return 0;
+
+	if (!(spi_nor_get_protocol_width(nor->read_proto) == 4 ||
+	      spi_nor_get_protocol_width(nor->write_proto) == 4))
+		return 0;
+
+	return nor->params.quad_enable(nor);
+}
+
 static int spi_nor_init(struct spi_nor *nor)
 {
 	int err;
 
 	if (nor->clear_sr_bp) {
-		if (nor->quad_enable == spansion_quad_enable)
+		if (nor->params.quad_enable == spansion_quad_enable)
 			nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
 
 		err = nor->clear_sr_bp(nor);
@@ -4484,12 +4493,10 @@ static int spi_nor_init(struct spi_nor *nor)
 		}
 	}
 
-	if (nor->quad_enable) {
-		err = nor->quad_enable(nor);
-		if (err) {
-			dev_err(nor->dev, "quad mode not supported\n");
-			return err;
-		}
+	err = spi_nor_quad_enable(nor);
+	if (err) {
+		dev_err(nor->dev, "quad mode not supported\n");
+		return err;
 	}
 
 	if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES)) {
@@ -4706,7 +4713,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 	 * - select op codes for (Fast) Read, Page Program and Sector Erase.
 	 * - set the number of dummy cycles (mode cycles + wait states).
 	 * - set the SPI protocols for register and memory accesses.
-	 * - set the Quad Enable bit if needed (required by SPI x-y-4 protos).
 	 */
 	ret = spi_nor_setup(nor, hwcaps);
 	if (ret)
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 77ba692d9348..17787238f0e9 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -535,7 +535,6 @@ struct flash_info;
  * @flash_unlock:	[FLASH-SPECIFIC] unlock a region of the SPI NOR
  * @flash_is_locked:	[FLASH-SPECIFIC] check if a region of the SPI NOR is
  *			completely locked
- * @quad_enable:	[FLASH-SPECIFIC] enables SPI NOR quad mode
  * @clear_sr_bp:	[FLASH-SPECIFIC] clears the Block Protection Bits from
  *			the SPI NOR Status Register.
  * @params:		[FLASH-SPECIFIC] SPI-NOR flash parameters and settings.
@@ -579,7 +578,6 @@ struct spi_nor {
 	int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-	int (*quad_enable)(struct spi_nor *nor);
 	int (*clear_sr_bp)(struct spi_nor *nor);
 	struct spi_nor_flash_parameter params;
 
-- 
cgit v1.2.3


From c46872170a54c902425c8580fd0a75a56ac1d147 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Fri, 23 Aug 2019 17:36:03 +0300
Subject: mtd: spi-nor: Move erase_map to 'struct spi_nor_flash_parameter'

All flash parameters and settings should reside inside
'struct spi_nor_flash_parameter'. Move the SMPT parsed erase map
from 'struct spi_nor' to 'struct spi_nor_flash_parameter'.

Please note that there is a roll-back mechanism for the flash
parameter and settings, for cases when SFDP parser fails. The SFDP
parser receives a Stack allocated copy of nor->params, called
sfdp_params, and uses it to retrieve the serial flash discoverable
parameters. JESD216 SFDP is a standard and has a higher priority
than the default initialized flash parameters, so will overwrite the
sfdp_params data when needed. All SFDP code uses the local copy of
nor->params, that will overwrite it in the end, if the parser succeds.

Saving and restoring the nor->params.erase_map is no longer needed,
since the SFDP code does not touch it.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 40 +++++++++++++++++++++-------------------
 include/linux/mtd/spi-nor.h   |  8 +++++---
 2 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index effda372cb33..9dd6cd8cd13c 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -600,7 +600,7 @@ static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
 	nor->erase_opcode = spi_nor_convert_3to4_erase(nor->erase_opcode);
 
 	if (!spi_nor_has_uniform_erase(nor)) {
-		struct spi_nor_erase_map *map = &nor->erase_map;
+		struct spi_nor_erase_map *map = &nor->params.erase_map;
 		struct spi_nor_erase_type *erase;
 		int i;
 
@@ -1133,7 +1133,7 @@ static int spi_nor_init_erase_cmd_list(struct spi_nor *nor,
 				       struct list_head *erase_list,
 				       u64 addr, u32 len)
 {
-	const struct spi_nor_erase_map *map = &nor->erase_map;
+	const struct spi_nor_erase_map *map = &nor->params.erase_map;
 	const struct spi_nor_erase_type *erase, *prev_erase = NULL;
 	struct spi_nor_erase_region *region;
 	struct spi_nor_erase_command *cmd = NULL;
@@ -3328,7 +3328,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 			      const struct sfdp_parameter_header *bfpt_header,
 			      struct spi_nor_flash_parameter *params)
 {
-	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_map *map = &params->erase_map;
 	struct spi_nor_erase_type *erase_type = map->erase_type;
 	struct sfdp_bfpt bfpt;
 	size_t len;
@@ -3409,7 +3409,7 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 	 * Erase Types defined in the bfpt table.
 	 */
 	erase_mask = 0;
-	memset(&nor->erase_map, 0, sizeof(nor->erase_map));
+	memset(&params->erase_map, 0, sizeof(params->erase_map));
 	for (i = 0; i < ARRAY_SIZE(sfdp_bfpt_erases); i++) {
 		const struct sfdp_bfpt_erase *er = &sfdp_bfpt_erases[i];
 		u32 erasesize;
@@ -3684,14 +3684,18 @@ spi_nor_region_check_overlay(struct spi_nor_erase_region *region,
 /**
  * spi_nor_init_non_uniform_erase_map() - initialize the non-uniform erase map
  * @nor:	pointer to a 'struct spi_nor'
+ * @params:     pointer to a duplicate 'struct spi_nor_flash_parameter' that is
+ *              used for storing SFDP parsed data
  * @smpt:	pointer to the sector map parameter table
  *
  * Return: 0 on success, -errno otherwise.
  */
-static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
-					      const u32 *smpt)
+static int
+spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
+				   struct spi_nor_flash_parameter *params,
+				   const u32 *smpt)
 {
-	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_map *map = &params->erase_map;
 	struct spi_nor_erase_type *erase = map->erase_type;
 	struct spi_nor_erase_region *region;
 	u64 offset;
@@ -3770,6 +3774,8 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
  * spi_nor_parse_smpt() - parse Sector Map Parameter Table
  * @nor:		pointer to a 'struct spi_nor'
  * @smpt_header:	sector map parameter table header
+ * @params:		pointer to a duplicate 'struct spi_nor_flash_parameter'
+ *                      that is used for storing SFDP parsed data
  *
  * This table is optional, but when available, we parse it to identify the
  * location and size of sectors within the main data array of the flash memory
@@ -3778,7 +3784,8 @@ static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
  * Return: 0 on success, -errno otherwise.
  */
 static int spi_nor_parse_smpt(struct spi_nor *nor,
-			      const struct sfdp_parameter_header *smpt_header)
+			      const struct sfdp_parameter_header *smpt_header,
+			      struct spi_nor_flash_parameter *params)
 {
 	const u32 *sector_map;
 	u32 *smpt;
@@ -3807,11 +3814,11 @@ static int spi_nor_parse_smpt(struct spi_nor *nor,
 		goto out;
 	}
 
-	ret = spi_nor_init_non_uniform_erase_map(nor, sector_map);
+	ret = spi_nor_init_non_uniform_erase_map(nor, params, sector_map);
 	if (ret)
 		goto out;
 
-	spi_nor_regions_sort_erase_types(&nor->erase_map);
+	spi_nor_regions_sort_erase_types(&params->erase_map);
 	/* fall through */
 out:
 	kfree(smpt);
@@ -3867,7 +3874,7 @@ static int spi_nor_parse_4bait(struct spi_nor *nor,
 		{ 0u /* not used */,		BIT(12) },
 	};
 	struct spi_nor_pp_command *params_pp = params->page_programs;
-	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_map *map = &params->erase_map;
 	struct spi_nor_erase_type *erase_type = map->erase_type;
 	u32 *dwords;
 	size_t len;
@@ -4097,7 +4104,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 
 		switch (SFDP_PARAM_HEADER_ID(param_header)) {
 		case SFDP_SECTOR_MAP_ID:
-			err = spi_nor_parse_smpt(nor, param_header);
+			err = spi_nor_parse_smpt(nor, param_header, params);
 			break;
 
 		case SFDP_4BAIT_ID:
@@ -4129,7 +4136,7 @@ exit:
 static int spi_nor_init_params(struct spi_nor *nor)
 {
 	struct spi_nor_flash_parameter *params = &nor->params;
-	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_map *map = &params->erase_map;
 	const struct flash_info *info = nor->info;
 	u8 i, erase_mask;
 
@@ -4229,17 +4236,12 @@ static int spi_nor_init_params(struct spi_nor *nor)
 	if ((info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
 	    !(info->flags & SPI_NOR_SKIP_SFDP)) {
 		struct spi_nor_flash_parameter sfdp_params;
-		struct spi_nor_erase_map prev_map;
 
 		memcpy(&sfdp_params, params, sizeof(sfdp_params));
-		memcpy(&prev_map, &nor->erase_map, sizeof(prev_map));
 
 		if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
 			nor->addr_width = 0;
 			nor->flags &= ~SNOR_F_4B_OPCODES;
-			/* restore previous erase map */
-			memcpy(&nor->erase_map, &prev_map,
-			       sizeof(nor->erase_map));
 		} else {
 			memcpy(params, &sfdp_params, sizeof(*params));
 		}
@@ -4353,7 +4355,7 @@ spi_nor_select_uniform_erase(struct spi_nor_erase_map *map,
 
 static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
 {
-	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_map *map = &nor->params.erase_map;
 	const struct spi_nor_erase_type *erase = NULL;
 	struct mtd_info *mtd = &nor->mtd;
 	int i;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 17787238f0e9..a86c0d9fb01d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -479,6 +479,8 @@ struct spi_nor;
  *                      in the array, the higher priority.
  * @page_programs:	page program capabilities ordered by priority: the
  *                      higher index in the array, the higher priority.
+ * @erase_map:		the erase map parsed from the SFDP Sector Map Parameter
+ *                      Table.
  * @quad_enable:	enables SPI NOR quad mode.
  */
 struct spi_nor_flash_parameter {
@@ -489,6 +491,8 @@ struct spi_nor_flash_parameter {
 	struct spi_nor_read_command	reads[SNOR_CMD_READ_MAX];
 	struct spi_nor_pp_command	page_programs[SNOR_CMD_PP_MAX];
 
+	struct spi_nor_erase_map        erase_map;
+
 	int (*quad_enable)(struct spi_nor *nor);
 };
 
@@ -519,7 +523,6 @@ struct flash_info;
  * @read_proto:		the SPI protocol for read operations
  * @write_proto:	the SPI protocol for write operations
  * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
- * @erase_map:		the erase map of the SPI NOR
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
  * @unprepare:		[OPTIONAL] do some post work after the
@@ -562,7 +565,6 @@ struct spi_nor {
 	enum spi_nor_protocol	reg_proto;
 	bool			sst_write_second;
 	u32			flags;
-	struct spi_nor_erase_map	erase_map;
 
 	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
 	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
@@ -610,7 +612,7 @@ spi_nor_region_mark_overlay(struct spi_nor_erase_region *region)
 
 static bool __maybe_unused spi_nor_has_uniform_erase(const struct spi_nor *nor)
 {
-	return !!nor->erase_map.uniform_erase_type;
+	return !!nor->params.erase_map.uniform_erase_type;
 }
 
 static inline void spi_nor_set_flash_node(struct spi_nor *nor,
-- 
cgit v1.2.3


From 64c160f32235f725a5378c66fb3583c720357ab9 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sat, 24 Aug 2019 08:45:28 +0300
Subject: mtd: spi-nor: Create a ->set_4byte() method

The procedure used to enable 4 byte addressing mode depends on the NOR
device, so let's provide a hook so that manufacturer specific handling
can be implemented in a sane way.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
[tudor.ambarus@microchip.com: use nor->params.set_4byte() instead of
nor->set_4byte()]
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 76 ++++++++++++++++++++++---------------------
 include/linux/mtd/spi-nor.h   |  2 ++
 2 files changed, 41 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index e1a9e0ec7cec..b903fe9ac60e 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -633,6 +633,17 @@ static int macronix_set_4byte(struct spi_nor *nor, bool enable)
 			      NULL, 0);
 }
 
+static int st_micron_set_4byte(struct spi_nor *nor, bool enable)
+{
+	int ret;
+
+	write_enable(nor);
+	ret = macronix_set_4byte(nor, enable);
+	write_disable(nor);
+
+	return ret;
+}
+
 static int spansion_set_4byte(struct spi_nor *nor, bool enable)
 {
 	nor->bouncebuf[0] = enable << 7;
@@ -667,45 +678,24 @@ static int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
 	return nor->write_reg(nor, SPINOR_OP_WREAR, nor->bouncebuf, 1);
 }
 
-/* Enable/disable 4-byte addressing mode. */
-static int set_4byte(struct spi_nor *nor, bool enable)
+static int winbond_set_4byte(struct spi_nor *nor, bool enable)
 {
-	int status;
-	bool need_wren = false;
-
-	switch (JEDEC_MFR(nor->info)) {
-	case SNOR_MFR_ST:
-	case SNOR_MFR_MICRON:
-		/* Some Micron need WREN command; all will accept it */
-		need_wren = true;
-		/* fall through */
-	case SNOR_MFR_MACRONIX:
-	case SNOR_MFR_WINBOND:
-		if (need_wren)
-			write_enable(nor);
+	int ret;
 
-		status = macronix_set_4byte(nor, enable);
-		if (need_wren)
-			write_disable(nor);
+	ret = macronix_set_4byte(nor, enable);
+	if (ret || enable)
+		return ret;
 
-		if (!status && !enable &&
-		    JEDEC_MFR(nor->info) == SNOR_MFR_WINBOND) {
-			/*
-			 * On Winbond W25Q256FV, leaving 4byte mode causes
-			 * the Extended Address Register to be set to 1, so all
-			 * 3-byte-address reads come from the second 16M.
-			 * We must clear the register to enable normal behavior.
-			 */
-			write_enable(nor);
-			spi_nor_write_ear(nor, 0);
-			write_disable(nor);
-		}
+	/*
+	 * On Winbond W25Q256FV, leaving 4byte mode causes the Extended Address
+	 * Register to be set to 1, so all 3-byte-address reads come from the
+	 * second 16M. We must clear the register to enable normal behavior.
+	 */
+	write_enable(nor);
+	ret = spi_nor_write_ear(nor, 0);
+	write_disable(nor);
 
-		return status;
-	default:
-		/* Spansion style */
-		return spansion_set_4byte(nor, enable);
-	}
+	return ret;
 }
 
 static int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr)
@@ -4153,11 +4143,18 @@ exit:
 static void macronix_set_default_init(struct spi_nor *nor)
 {
 	nor->params.quad_enable = macronix_quad_enable;
+	nor->params.set_4byte = macronix_set_4byte;
 }
 
 static void st_micron_set_default_init(struct spi_nor *nor)
 {
 	nor->params.quad_enable = NULL;
+	nor->params.set_4byte = st_micron_set_4byte;
+}
+
+static void winbond_set_default_init(struct spi_nor *nor)
+{
+	nor->params.set_4byte = winbond_set_4byte;
 }
 
 /**
@@ -4178,6 +4175,10 @@ static void spi_nor_manufacturer_init_params(struct spi_nor *nor)
 		st_micron_set_default_init(nor);
 		break;
 
+	case SNOR_MFR_WINBOND:
+		winbond_set_default_init(nor);
+		break;
+
 	default:
 		break;
 	}
@@ -4222,6 +4223,7 @@ static void spi_nor_info_init_params(struct spi_nor *nor)
 
 	/* Initialize legacy flash parameters and settings. */
 	params->quad_enable = spansion_quad_enable;
+	params->set_4byte = spansion_set_4byte;
 
 	/* Set SPI NOR sizes. */
 	params->size = (u64)info->sector_size * info->n_sectors;
@@ -4587,7 +4589,7 @@ static int spi_nor_init(struct spi_nor *nor)
 		 */
 		WARN_ONCE(nor->flags & SNOR_F_BROKEN_RESET,
 			  "enabling reset hack; may not recover from unexpected reboots\n");
-		set_4byte(nor, true);
+		nor->params.set_4byte(nor, true);
 	}
 
 	return 0;
@@ -4611,7 +4613,7 @@ void spi_nor_restore(struct spi_nor *nor)
 	/* restore the addressing mode */
 	if (nor->addr_width == 4 && !(nor->flags & SNOR_F_4B_OPCODES) &&
 	    nor->flags & SNOR_F_BROKEN_RESET)
-		set_4byte(nor, false);
+		nor->params.set_4byte(nor, false);
 }
 EXPORT_SYMBOL_GPL(spi_nor_restore);
 
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a86c0d9fb01d..7da89dd483cb 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -482,6 +482,7 @@ struct spi_nor;
  * @erase_map:		the erase map parsed from the SFDP Sector Map Parameter
  *                      Table.
  * @quad_enable:	enables SPI NOR quad mode.
+ * @set_4byte:		puts the SPI NOR in 4 byte addressing mode.
  */
 struct spi_nor_flash_parameter {
 	u64				size;
@@ -494,6 +495,7 @@ struct spi_nor_flash_parameter {
 	struct spi_nor_erase_map        erase_map;
 
 	int (*quad_enable)(struct spi_nor *nor);
+	int (*set_4byte)(struct spi_nor *nor, bool enable);
 };
 
 /**
-- 
cgit v1.2.3


From dff972458acb05ab919b6950da99f8172ab14836 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sat, 24 Aug 2019 09:22:03 +0300
Subject: mtd: spi-nor: Rework the SPI NOR lock/unlock logic

Add the SNOR_F_HAS_LOCK flag and set it when SPI_NOR_HAS_LOCK is set
in the flash_info entry or when it's a Micron or ST flash.

Move the locking hooks in a separate struct so that we have just
one field to update when we change the locking implementation.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
[tudor.ambarus@microchip.com: use ->default_init() hook, introduce
spi_nor_late_init_params(), set ops in nor->params]
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 50 ++++++++++++++++++++++++++++++++-----------
 include/linux/mtd/spi-nor.h   | 23 ++++++++++++++------
 2 files changed, 53 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index b903fe9ac60e..f630ec9bad71 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1598,6 +1598,12 @@ static int stm_is_locked(struct spi_nor *nor, loff_t ofs, uint64_t len)
 	return stm_is_locked_sr(nor, ofs, len, status);
 }
 
+static const struct spi_nor_locking_ops stm_locking_ops = {
+	.lock = stm_lock,
+	.unlock = stm_unlock,
+	.is_locked = stm_is_locked,
+};
+
 static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 {
 	struct spi_nor *nor = mtd_to_spi_nor(mtd);
@@ -1607,7 +1613,7 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	ret = nor->flash_lock(nor, ofs, len);
+	ret = nor->params.locking_ops->lock(nor, ofs, len);
 
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
 	return ret;
@@ -1622,7 +1628,7 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	ret = nor->flash_unlock(nor, ofs, len);
+	ret = nor->params.locking_ops->unlock(nor, ofs, len);
 
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
 	return ret;
@@ -1637,7 +1643,7 @@ static int spi_nor_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
 	if (ret)
 		return ret;
 
-	ret = nor->flash_is_locked(nor, ofs, len);
+	ret = nor->params.locking_ops->is_locked(nor, ofs, len);
 
 	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
 	return ret;
@@ -4148,6 +4154,7 @@ static void macronix_set_default_init(struct spi_nor *nor)
 
 static void st_micron_set_default_init(struct spi_nor *nor)
 {
+	nor->flags |= SNOR_F_HAS_LOCK;
 	nor->params.quad_enable = NULL;
 	nor->params.set_4byte = st_micron_set_4byte;
 }
@@ -4291,6 +4298,23 @@ static void spi_nor_info_init_params(struct spi_nor *nor)
 	spi_nor_init_uniform_erase_map(map, erase_mask, params->size);
 }
 
+/**
+ * spi_nor_late_init_params() - Late initialization of default flash parameters.
+ * @nor:	pointer to a 'struct spi_nor'
+ *
+ * Used to set default flash parameters and settings when the ->default_init()
+ * hook or the SFDP parser let voids.
+ */
+static void spi_nor_late_init_params(struct spi_nor *nor)
+{
+	/*
+	 * NOR protection support. When locking_ops are not provided, we pick
+	 * the default ones.
+	 */
+	if (nor->flags & SNOR_F_HAS_LOCK && !nor->params.locking_ops)
+		nor->params.locking_ops = &stm_locking_ops;
+}
+
 /**
  * spi_nor_init_params() - Initialize the flash's parameters and settings.
  * @nor:	pointer to a 'struct spi-nor'.
@@ -4316,6 +4340,10 @@ static void spi_nor_info_init_params(struct spi_nor *nor)
  *    Please note that there is a ->post_bfpt() fixup hook that can overwrite
  *    the flash parameters and settings immediately after parsing the Basic
  *    Flash Parameter Table.
+ *
+ * 4/ Late default flash parameters initialization, used when the
+ * ->default_init() hook or the SFDP parser do not set specific params.
+ *		spi_nor_late_init_params()
  */
 static void spi_nor_init_params(struct spi_nor *nor)
 {
@@ -4326,6 +4354,8 @@ static void spi_nor_init_params(struct spi_nor *nor)
 	if ((nor->info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
 	    !(nor->info->flags & SPI_NOR_SKIP_SFDP))
 		spi_nor_sfdp_init_params(nor);
+
+	spi_nor_late_init_params(nor);
 }
 
 static int spi_nor_select_read(struct spi_nor *nor,
@@ -4707,6 +4737,9 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 	if (info->flags & SPI_S3AN)
 		nor->flags |=  SNOR_F_READY_XSR_RDY;
 
+	if (info->flags & SPI_NOR_HAS_LOCK)
+		nor->flags |= SNOR_F_HAS_LOCK;
+
 	/*
 	 * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
 	 * with the software protection bits set.
@@ -4731,16 +4764,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
 	mtd->_read = spi_nor_read;
 	mtd->_resume = spi_nor_resume;
 
-	/* NOR protection support for STmicro/Micron chips and similar */
-	if (JEDEC_MFR(info) == SNOR_MFR_ST ||
-	    JEDEC_MFR(info) == SNOR_MFR_MICRON ||
-	    info->flags & SPI_NOR_HAS_LOCK) {
-		nor->flash_lock = stm_lock;
-		nor->flash_unlock = stm_unlock;
-		nor->flash_is_locked = stm_is_locked;
-	}
-
-	if (nor->flash_lock && nor->flash_unlock && nor->flash_is_locked) {
+	if (nor->params.locking_ops) {
 		mtd->_lock = spi_nor_lock;
 		mtd->_unlock = spi_nor_unlock;
 		mtd->_is_locked = spi_nor_is_locked;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 7da89dd483cb..ea3bcac54dc2 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -243,6 +243,7 @@ enum spi_nor_option_flags {
 	SNOR_F_BROKEN_RESET	= BIT(6),
 	SNOR_F_4B_OPCODES	= BIT(7),
 	SNOR_F_HAS_4BAIT	= BIT(8),
+	SNOR_F_HAS_LOCK		= BIT(9),
 };
 
 /**
@@ -465,6 +466,18 @@ enum spi_nor_pp_command_index {
 /* Forward declaration that will be used in 'struct spi_nor_flash_parameter' */
 struct spi_nor;
 
+/**
+ * struct spi_nor_locking_ops - SPI NOR locking methods
+ * @lock:	lock a region of the SPI NOR.
+ * @unlock:	unlock a region of the SPI NOR.
+ * @is_locked:	check if a region of the SPI NOR is completely locked
+ */
+struct spi_nor_locking_ops {
+	int (*lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+	int (*unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+	int (*is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+};
+
 /**
  * struct spi_nor_flash_parameter - SPI NOR flash parameters and settings.
  * Includes legacy flash parameters and settings that can be overwritten
@@ -483,6 +496,7 @@ struct spi_nor;
  *                      Table.
  * @quad_enable:	enables SPI NOR quad mode.
  * @set_4byte:		puts the SPI NOR in 4 byte addressing mode.
+ * @locking_ops:	SPI NOR locking methods.
  */
 struct spi_nor_flash_parameter {
 	u64				size;
@@ -496,6 +510,8 @@ struct spi_nor_flash_parameter {
 
 	int (*quad_enable)(struct spi_nor *nor);
 	int (*set_4byte)(struct spi_nor *nor, bool enable);
+
+	const struct spi_nor_locking_ops *locking_ops;
 };
 
 /**
@@ -536,10 +552,6 @@ struct flash_info;
  * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
  *			at the offset @offs; if not provided by the driver,
  *			spi-nor will send the erase opcode via write_reg()
- * @flash_lock:		[FLASH-SPECIFIC] lock a region of the SPI NOR
- * @flash_unlock:	[FLASH-SPECIFIC] unlock a region of the SPI NOR
- * @flash_is_locked:	[FLASH-SPECIFIC] check if a region of the SPI NOR is
- *			completely locked
  * @clear_sr_bp:	[FLASH-SPECIFIC] clears the Block Protection Bits from
  *			the SPI NOR Status Register.
  * @params:		[FLASH-SPECIFIC] SPI-NOR flash parameters and settings.
@@ -579,9 +591,6 @@ struct spi_nor {
 			size_t len, const u_char *write_buf);
 	int (*erase)(struct spi_nor *nor, loff_t offs);
 
-	int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-	int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
-	int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len);
 	int (*clear_sr_bp)(struct spi_nor *nor);
 	struct spi_nor_flash_parameter params;
 
-- 
cgit v1.2.3


From 36499596280359d34a0663afe0abbf34d80862e8 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sat, 24 Aug 2019 10:17:20 +0300
Subject: mtd: spi-nor: Add a ->convert_addr() method

In order to separate manufacturer quirks from the core we need to get
rid of all the manufacturer specific flags, like the
SNOR_F_S3AN_ADDR_DEFAULT one.

This can easily be replaced by a ->convert_addr() hook, which when
implemented will provide the core with an easy way to convert an
absolute address into something the flash understands.

Right now the only user are the S3AN chips, but other manufacturers
can implement it if needed.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 24 ++++++++++++++----------
 include/linux/mtd/spi-nor.h   | 17 ++++++++++-------
 2 files changed, 24 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 4ff3c9f0a017..9005ea8def87 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -899,10 +899,9 @@ static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
  * Addr can safely be unsigned int, the biggest S3AN device is smaller than
  * 4 MiB.
  */
-static loff_t spi_nor_s3an_addr_convert(struct spi_nor *nor, unsigned int addr)
+static u32 s3an_convert_addr(struct spi_nor *nor, u32 addr)
 {
-	unsigned int offset;
-	unsigned int page;
+	u32 offset, page;
 
 	offset = addr % nor->page_size;
 	page = addr / nor->page_size;
@@ -911,6 +910,14 @@ static loff_t spi_nor_s3an_addr_convert(struct spi_nor *nor, unsigned int addr)
 	return page | offset;
 }
 
+static u32 spi_nor_convert_addr(struct spi_nor *nor, loff_t addr)
+{
+	if (!nor->params.convert_addr)
+		return addr;
+
+	return nor->params.convert_addr(nor, addr);
+}
+
 /*
  * Initiate the erasure of a single sector
  */
@@ -918,8 +925,7 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 {
 	int i;
 
-	if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-		addr = spi_nor_s3an_addr_convert(nor, addr);
+	addr = spi_nor_convert_addr(nor, addr);
 
 	if (nor->erase)
 		return nor->erase(nor, addr);
@@ -2535,8 +2541,7 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
 	while (len) {
 		loff_t addr = from;
 
-		if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-			addr = spi_nor_s3an_addr_convert(nor, addr);
+		addr = spi_nor_convert_addr(nor, addr);
 
 		ret = spi_nor_read_data(nor, addr, len, buf);
 		if (ret == 0) {
@@ -2680,8 +2685,7 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 		page_remain = min_t(size_t,
 				    nor->page_size - page_offset, len - i);
 
-		if (nor->flags & SNOR_F_S3AN_ADDR_DEFAULT)
-			addr = spi_nor_s3an_addr_convert(nor, addr);
+		addr = spi_nor_convert_addr(nor, addr);
 
 		write_enable(nor);
 		ret = spi_nor_write_data(nor, addr, page_remain, buf + i);
@@ -2748,7 +2752,7 @@ static int s3an_nor_scan(struct spi_nor *nor)
 		nor->mtd.erasesize = 8 * nor->page_size;
 	} else {
 		/* Flash in Default addressing mode */
-		nor->flags |= SNOR_F_S3AN_ADDR_DEFAULT;
+		nor->params.convert_addr = s3an_convert_addr;
 	}
 
 	return 0;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index ea3bcac54dc2..35aad92a4ff8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -237,13 +237,12 @@ enum spi_nor_option_flags {
 	SNOR_F_USE_FSR		= BIT(0),
 	SNOR_F_HAS_SR_TB	= BIT(1),
 	SNOR_F_NO_OP_CHIP_ERASE	= BIT(2),
-	SNOR_F_S3AN_ADDR_DEFAULT = BIT(3),
-	SNOR_F_READY_XSR_RDY	= BIT(4),
-	SNOR_F_USE_CLSR		= BIT(5),
-	SNOR_F_BROKEN_RESET	= BIT(6),
-	SNOR_F_4B_OPCODES	= BIT(7),
-	SNOR_F_HAS_4BAIT	= BIT(8),
-	SNOR_F_HAS_LOCK		= BIT(9),
+	SNOR_F_READY_XSR_RDY	= BIT(3),
+	SNOR_F_USE_CLSR		= BIT(4),
+	SNOR_F_BROKEN_RESET	= BIT(5),
+	SNOR_F_4B_OPCODES	= BIT(6),
+	SNOR_F_HAS_4BAIT	= BIT(7),
+	SNOR_F_HAS_LOCK		= BIT(8),
 };
 
 /**
@@ -496,6 +495,9 @@ struct spi_nor_locking_ops {
  *                      Table.
  * @quad_enable:	enables SPI NOR quad mode.
  * @set_4byte:		puts the SPI NOR in 4 byte addressing mode.
+ * @convert_addr:	converts an absolute address into something the flash
+ *                      will understand. Particularly useful when pagesize is
+ *                      not a power-of-2.
  * @locking_ops:	SPI NOR locking methods.
  */
 struct spi_nor_flash_parameter {
@@ -510,6 +512,7 @@ struct spi_nor_flash_parameter {
 
 	int (*quad_enable)(struct spi_nor *nor);
 	int (*set_4byte)(struct spi_nor *nor, bool enable);
+	u32 (*convert_addr)(struct spi_nor *nor, u32 addr);
 
 	const struct spi_nor_locking_ops *locking_ops;
 };
-- 
cgit v1.2.3


From 2d7ff858e5f683393f32b07e64e565877a2e4bcb Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Sun, 25 Aug 2019 22:48:36 +0300
Subject: mtd: spi-nor: Add a ->setup() method

nor->params.setup() configures the SPI NOR memory. Useful for SPI NOR
flashes that have peculiarities to the SPI NOR standard, e.g.
different opcodes, specific address calculation, page size, etc.
Right now the only user will be the S3AN chips, but other
manufacturers can implement it if needed.

Move spi_nor_setup() related code in order to avoid a forward
declaration to spi_nor_default_setup().

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 432 +++++++++++++++++++++---------------------
 include/linux/mtd/spi-nor.h   |   5 +
 2 files changed, 226 insertions(+), 211 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 9005ea8def87..449c7bfff2bd 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -4144,6 +4144,226 @@ exit:
 	return err;
 }
 
+static int spi_nor_select_read(struct spi_nor *nor,
+			       u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
+	const struct spi_nor_read_command *read;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_read2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	read = &nor->params.reads[cmd];
+	nor->read_opcode = read->opcode;
+	nor->read_proto = read->proto;
+
+	/*
+	 * In the spi-nor framework, we don't need to make the difference
+	 * between mode clock cycles and wait state clock cycles.
+	 * Indeed, the value of the mode clock cycles is used by a QSPI
+	 * flash memory to know whether it should enter or leave its 0-4-4
+	 * (Continuous Read / XIP) mode.
+	 * eXecution In Place is out of the scope of the mtd sub-system.
+	 * Hence we choose to merge both mode and wait state clock cycles
+	 * into the so called dummy clock cycles.
+	 */
+	nor->read_dummy = read->num_mode_clocks + read->num_wait_states;
+	return 0;
+}
+
+static int spi_nor_select_pp(struct spi_nor *nor,
+			     u32 shared_hwcaps)
+{
+	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
+	const struct spi_nor_pp_command *pp;
+
+	if (best_match < 0)
+		return -EINVAL;
+
+	cmd = spi_nor_hwcaps_pp2cmd(BIT(best_match));
+	if (cmd < 0)
+		return -EINVAL;
+
+	pp = &nor->params.page_programs[cmd];
+	nor->program_opcode = pp->opcode;
+	nor->write_proto = pp->proto;
+	return 0;
+}
+
+/**
+ * spi_nor_select_uniform_erase() - select optimum uniform erase type
+ * @map:		the erase map of the SPI NOR
+ * @wanted_size:	the erase type size to search for. Contains the value of
+ *			info->sector_size or of the "small sector" size in case
+ *			CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is defined.
+ *
+ * Once the optimum uniform sector erase command is found, disable all the
+ * other.
+ *
+ * Return: pointer to erase type on success, NULL otherwise.
+ */
+static const struct spi_nor_erase_type *
+spi_nor_select_uniform_erase(struct spi_nor_erase_map *map,
+			     const u32 wanted_size)
+{
+	const struct spi_nor_erase_type *tested_erase, *erase = NULL;
+	int i;
+	u8 uniform_erase_type = map->uniform_erase_type;
+
+	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+		if (!(uniform_erase_type & BIT(i)))
+			continue;
+
+		tested_erase = &map->erase_type[i];
+
+		/*
+		 * If the current erase size is the one, stop here:
+		 * we have found the right uniform Sector Erase command.
+		 */
+		if (tested_erase->size == wanted_size) {
+			erase = tested_erase;
+			break;
+		}
+
+		/*
+		 * Otherwise, the current erase size is still a valid canditate.
+		 * Select the biggest valid candidate.
+		 */
+		if (!erase && tested_erase->size)
+			erase = tested_erase;
+			/* keep iterating to find the wanted_size */
+	}
+
+	if (!erase)
+		return NULL;
+
+	/* Disable all other Sector Erase commands. */
+	map->uniform_erase_type &= ~SNOR_ERASE_TYPE_MASK;
+	map->uniform_erase_type |= BIT(erase - map->erase_type);
+	return erase;
+}
+
+static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
+{
+	struct spi_nor_erase_map *map = &nor->params.erase_map;
+	const struct spi_nor_erase_type *erase = NULL;
+	struct mtd_info *mtd = &nor->mtd;
+	int i;
+
+	/*
+	 * The previous implementation handling Sector Erase commands assumed
+	 * that the SPI flash memory has an uniform layout then used only one
+	 * of the supported erase sizes for all Sector Erase commands.
+	 * So to be backward compatible, the new implementation also tries to
+	 * manage the SPI flash memory as uniform with a single erase sector
+	 * size, when possible.
+	 */
+#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
+	/* prefer "small sector" erase if possible */
+	wanted_size = 4096u;
+#endif
+
+	if (spi_nor_has_uniform_erase(nor)) {
+		erase = spi_nor_select_uniform_erase(map, wanted_size);
+		if (!erase)
+			return -EINVAL;
+		nor->erase_opcode = erase->opcode;
+		mtd->erasesize = erase->size;
+		return 0;
+	}
+
+	/*
+	 * For non-uniform SPI flash memory, set mtd->erasesize to the
+	 * maximum erase sector size. No need to set nor->erase_opcode.
+	 */
+	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+		if (map->erase_type[i].size) {
+			erase = &map->erase_type[i];
+			break;
+		}
+	}
+
+	if (!erase)
+		return -EINVAL;
+
+	mtd->erasesize = erase->size;
+	return 0;
+}
+
+static int spi_nor_default_setup(struct spi_nor *nor,
+				 const struct spi_nor_hwcaps *hwcaps)
+{
+	struct spi_nor_flash_parameter *params = &nor->params;
+	u32 ignored_mask, shared_mask;
+	int err;
+
+	/*
+	 * Keep only the hardware capabilities supported by both the SPI
+	 * controller and the SPI flash memory.
+	 */
+	shared_mask = hwcaps->mask & params->hwcaps.mask;
+
+	if (nor->spimem) {
+		/*
+		 * When called from spi_nor_probe(), all caps are set and we
+		 * need to discard some of them based on what the SPI
+		 * controller actually supports (using spi_mem_supports_op()).
+		 */
+		spi_nor_spimem_adjust_hwcaps(nor, &shared_mask);
+	} else {
+		/*
+		 * SPI n-n-n protocols are not supported when the SPI
+		 * controller directly implements the spi_nor interface.
+		 * Yet another reason to switch to spi-mem.
+		 */
+		ignored_mask = SNOR_HWCAPS_X_X_X;
+		if (shared_mask & ignored_mask) {
+			dev_dbg(nor->dev,
+				"SPI n-n-n protocols are not supported.\n");
+			shared_mask &= ~ignored_mask;
+		}
+	}
+
+	/* Select the (Fast) Read command. */
+	err = spi_nor_select_read(nor, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select read settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Page Program command. */
+	err = spi_nor_select_pp(nor, shared_mask);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select write settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	/* Select the Sector Erase command. */
+	err = spi_nor_select_erase(nor, nor->info->sector_size);
+	if (err) {
+		dev_err(nor->dev,
+			"can't select erase settings supported by both the SPI controller and memory.\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int spi_nor_setup(struct spi_nor *nor,
+			 const struct spi_nor_hwcaps *hwcaps)
+{
+	if (!nor->params.setup)
+		return 0;
+
+	return nor->params.setup(nor, hwcaps);
+}
+
 static void macronix_set_default_init(struct spi_nor *nor)
 {
 	nor->params.quad_enable = macronix_quad_enable;
@@ -4229,6 +4449,7 @@ static void spi_nor_info_init_params(struct spi_nor *nor)
 	/* Initialize legacy flash parameters and settings. */
 	params->quad_enable = spansion_quad_enable;
 	params->set_4byte = spansion_set_4byte;
+	params->setup = spi_nor_default_setup;
 
 	/* Set SPI NOR sizes. */
 	params->size = (u64)info->sector_size * info->n_sectors;
@@ -4403,217 +4624,6 @@ static void spi_nor_init_params(struct spi_nor *nor)
 	spi_nor_late_init_params(nor);
 }
 
-static int spi_nor_select_read(struct spi_nor *nor,
-			       u32 shared_hwcaps)
-{
-	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_READ_MASK) - 1;
-	const struct spi_nor_read_command *read;
-
-	if (best_match < 0)
-		return -EINVAL;
-
-	cmd = spi_nor_hwcaps_read2cmd(BIT(best_match));
-	if (cmd < 0)
-		return -EINVAL;
-
-	read = &nor->params.reads[cmd];
-	nor->read_opcode = read->opcode;
-	nor->read_proto = read->proto;
-
-	/*
-	 * In the spi-nor framework, we don't need to make the difference
-	 * between mode clock cycles and wait state clock cycles.
-	 * Indeed, the value of the mode clock cycles is used by a QSPI
-	 * flash memory to know whether it should enter or leave its 0-4-4
-	 * (Continuous Read / XIP) mode.
-	 * eXecution In Place is out of the scope of the mtd sub-system.
-	 * Hence we choose to merge both mode and wait state clock cycles
-	 * into the so called dummy clock cycles.
-	 */
-	nor->read_dummy = read->num_mode_clocks + read->num_wait_states;
-	return 0;
-}
-
-static int spi_nor_select_pp(struct spi_nor *nor,
-			     u32 shared_hwcaps)
-{
-	int cmd, best_match = fls(shared_hwcaps & SNOR_HWCAPS_PP_MASK) - 1;
-	const struct spi_nor_pp_command *pp;
-
-	if (best_match < 0)
-		return -EINVAL;
-
-	cmd = spi_nor_hwcaps_pp2cmd(BIT(best_match));
-	if (cmd < 0)
-		return -EINVAL;
-
-	pp = &nor->params.page_programs[cmd];
-	nor->program_opcode = pp->opcode;
-	nor->write_proto = pp->proto;
-	return 0;
-}
-
-/**
- * spi_nor_select_uniform_erase() - select optimum uniform erase type
- * @map:		the erase map of the SPI NOR
- * @wanted_size:	the erase type size to search for. Contains the value of
- *			info->sector_size or of the "small sector" size in case
- *			CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is defined.
- *
- * Once the optimum uniform sector erase command is found, disable all the
- * other.
- *
- * Return: pointer to erase type on success, NULL otherwise.
- */
-static const struct spi_nor_erase_type *
-spi_nor_select_uniform_erase(struct spi_nor_erase_map *map,
-			     const u32 wanted_size)
-{
-	const struct spi_nor_erase_type *tested_erase, *erase = NULL;
-	int i;
-	u8 uniform_erase_type = map->uniform_erase_type;
-
-	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
-		if (!(uniform_erase_type & BIT(i)))
-			continue;
-
-		tested_erase = &map->erase_type[i];
-
-		/*
-		 * If the current erase size is the one, stop here:
-		 * we have found the right uniform Sector Erase command.
-		 */
-		if (tested_erase->size == wanted_size) {
-			erase = tested_erase;
-			break;
-		}
-
-		/*
-		 * Otherwise, the current erase size is still a valid canditate.
-		 * Select the biggest valid candidate.
-		 */
-		if (!erase && tested_erase->size)
-			erase = tested_erase;
-			/* keep iterating to find the wanted_size */
-	}
-
-	if (!erase)
-		return NULL;
-
-	/* Disable all other Sector Erase commands. */
-	map->uniform_erase_type &= ~SNOR_ERASE_TYPE_MASK;
-	map->uniform_erase_type |= BIT(erase - map->erase_type);
-	return erase;
-}
-
-static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
-{
-	struct spi_nor_erase_map *map = &nor->params.erase_map;
-	const struct spi_nor_erase_type *erase = NULL;
-	struct mtd_info *mtd = &nor->mtd;
-	int i;
-
-	/*
-	 * The previous implementation handling Sector Erase commands assumed
-	 * that the SPI flash memory has an uniform layout then used only one
-	 * of the supported erase sizes for all Sector Erase commands.
-	 * So to be backward compatible, the new implementation also tries to
-	 * manage the SPI flash memory as uniform with a single erase sector
-	 * size, when possible.
-	 */
-#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
-	/* prefer "small sector" erase if possible */
-	wanted_size = 4096u;
-#endif
-
-	if (spi_nor_has_uniform_erase(nor)) {
-		erase = spi_nor_select_uniform_erase(map, wanted_size);
-		if (!erase)
-			return -EINVAL;
-		nor->erase_opcode = erase->opcode;
-		mtd->erasesize = erase->size;
-		return 0;
-	}
-
-	/*
-	 * For non-uniform SPI flash memory, set mtd->erasesize to the
-	 * maximum erase sector size. No need to set nor->erase_opcode.
-	 */
-	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
-		if (map->erase_type[i].size) {
-			erase = &map->erase_type[i];
-			break;
-		}
-	}
-
-	if (!erase)
-		return -EINVAL;
-
-	mtd->erasesize = erase->size;
-	return 0;
-}
-
-static int spi_nor_setup(struct spi_nor *nor,
-			 const struct spi_nor_hwcaps *hwcaps)
-{
-	struct spi_nor_flash_parameter *params = &nor->params;
-	u32 ignored_mask, shared_mask;
-	int err;
-
-	/*
-	 * Keep only the hardware capabilities supported by both the SPI
-	 * controller and the SPI flash memory.
-	 */
-	shared_mask = hwcaps->mask & params->hwcaps.mask;
-
-	if (nor->spimem) {
-		/*
-		 * When called from spi_nor_probe(), all caps are set and we
-		 * need to discard some of them based on what the SPI
-		 * controller actually supports (using spi_mem_supports_op()).
-		 */
-		spi_nor_spimem_adjust_hwcaps(nor, &shared_mask);
-	} else {
-		/*
-		 * SPI n-n-n protocols are not supported when the SPI
-		 * controller directly implements the spi_nor interface.
-		 * Yet another reason to switch to spi-mem.
-		 */
-		ignored_mask = SNOR_HWCAPS_X_X_X;
-		if (shared_mask & ignored_mask) {
-			dev_dbg(nor->dev,
-				"SPI n-n-n protocols are not supported.\n");
-			shared_mask &= ~ignored_mask;
-		}
-	}
-
-	/* Select the (Fast) Read command. */
-	err = spi_nor_select_read(nor, shared_mask);
-	if (err) {
-		dev_err(nor->dev,
-			"can't select read settings supported by both the SPI controller and memory.\n");
-		return err;
-	}
-
-	/* Select the Page Program command. */
-	err = spi_nor_select_pp(nor, shared_mask);
-	if (err) {
-		dev_err(nor->dev,
-			"can't select write settings supported by both the SPI controller and memory.\n");
-		return err;
-	}
-
-	/* Select the Sector Erase command. */
-	err = spi_nor_select_erase(nor, nor->info->sector_size);
-	if (err) {
-		dev_err(nor->dev,
-			"can't select erase settings supported by both the SPI controller and memory.\n");
-		return err;
-	}
-
-	return 0;
-}
-
 /**
  * spi_nor_quad_enable() - enable Quad I/O if needed.
  * @nor:                pointer to a 'struct spi_nor'
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 35aad92a4ff8..fc0b4b19c900 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -498,6 +498,10 @@ struct spi_nor_locking_ops {
  * @convert_addr:	converts an absolute address into something the flash
  *                      will understand. Particularly useful when pagesize is
  *                      not a power-of-2.
+ * @setup:              configures the SPI NOR memory. Useful for SPI NOR
+ *                      flashes that have peculiarities to the SPI NOR standard
+ *                      e.g. different opcodes, specific address calculation,
+ *                      page size, etc.
  * @locking_ops:	SPI NOR locking methods.
  */
 struct spi_nor_flash_parameter {
@@ -513,6 +517,7 @@ struct spi_nor_flash_parameter {
 	int (*quad_enable)(struct spi_nor *nor);
 	int (*set_4byte)(struct spi_nor *nor, bool enable);
 	u32 (*convert_addr)(struct spi_nor *nor, u32 addr);
+	int (*setup)(struct spi_nor *nor, const struct spi_nor_hwcaps *hwcaps);
 
 	const struct spi_nor_locking_ops *locking_ops;
 };
-- 
cgit v1.2.3


From 19298fbf453c90a6cf72288155f80c6f55e9139d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:08:51 +0200
Subject: posix-cpu-timers: Provide quick sample function for itimer

get_itimer() needs a sample of the current thread group cputime. It invokes
thread_group_cputimer() - which is a misnomer. That function also starts
eventually the group cputime accouting which is bogus because the
accounting is already active when a timer is armed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192919.599658199@linutronix.de
---
 include/linux/sched/cputime.h  |  2 +-
 kernel/time/posix-cpu-timers.c | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 53f883f5a2fd..6de7b384d2be 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -62,7 +62,7 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
-
+void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times);
 
 /*
  * The following are functions that support scheduler-internal time accounting.
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 4426a0f9c470..c22b6b604a95 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -232,6 +232,27 @@ static inline void sample_cputime_atomic(struct task_cputime *times,
 	times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
 }
 
+/**
+ * thread_group_sample_cputime - Sample cputime for a given task
+ * @tsk:	Task for which cputime needs to be started
+ * @iimes:	Storage for time samples
+ *
+ * Called from sys_getitimer() to calculate the expiry time of an active
+ * timer. That means group cputime accounting is already active. Called
+ * with task sighand lock held.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+void thread_group_sample_cputime(struct task_struct *tsk,
+				struct task_cputime *times)
+{
+	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+
+	WARN_ON_ONCE(!cputimer->running);
+
+	sample_cputime_atomic(times, &cputimer->cputime_atomic);
+}
+
 void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-- 
cgit v1.2.3


From c506bef424ca282f2ad357e86fee940c69018974 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:08:54 +0200
Subject: posix-cpu-timers: Rename thread_group_cputimer() and make it static

thread_group_cputimer() is a complete misnomer. The function does two things:

 - For arming process wide timers it makes sure that the atomic time
   storage is up to date. If no cpu timer is armed yet, then the atomic
   time storage is not updated by the scheduler for performance reasons.

   In that case a full summing up of all threads needs to be done and the
   update needs to be enabled.

- Samples the current time into the caller supplied storage.

Rename it to thread_group_start_cputime(), make it static and fixup the
callsite.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192919.869350319@linutronix.de
---
 include/linux/sched/cputime.h  |  1 -
 kernel/time/posix-cpu-timers.c | 17 +++++++++++++++--
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6de7b384d2be..2638fd0ab3f2 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -61,7 +61,6 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
  * Thread group CPU time accounting.
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
 void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times);
 
 /*
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index cb736787145b..def225ae069a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -253,7 +253,20 @@ void thread_group_sample_cputime(struct task_struct *tsk,
 	sample_cputime_atomic(times, &cputimer->cputime_atomic);
 }
 
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+/**
+ * thread_group_start_cputime - Start cputime and return a sample
+ * @tsk:	Task for which cputime needs to be started
+ * @iimes:	Storage for time samples
+ *
+ * The thread group cputime accouting is avoided when there are no posix
+ * CPU timers armed. Before starting a timer it's required to check whether
+ * the time accounting is active. If not, a full update of the atomic
+ * accounting store needs to be done and the accounting enabled.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+static void
+thread_group_start_cputime(struct task_struct *tsk, struct task_cputime *times)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 	struct task_cputime sum;
@@ -536,7 +549,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 {
 	struct task_cputime cputime;
 
-	thread_group_cputimer(p, &cputime);
+	thread_group_start_cputime(p, &cputime);
 	switch (CPUCLOCK_WHICH(which_clock)) {
 	default:
 		return -EINVAL;
-- 
cgit v1.2.3


From 2b69942f9021bf75bd1b001f53bd2578361fadf3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:04 +0200
Subject: posix-cpu-timers: Create a container struct

Per task/process data of posix CPU timers is all over the place which
makes the code hard to follow and requires ifdeffery.

Create a container to hold all this information in one place, so data is
consolidated and the ifdeffery can be confined to the posix timer header
file and removed from places like fork.

As a first step, move the cpu_timers list head array into the new struct
and clean up the initializers and simplify fork. The remaining #ifdef in
fork will be removed later.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192920.819418976@linutronix.de
---
 include/linux/init_task.h      | 11 -----------
 include/linux/posix-timers.h   | 34 ++++++++++++++++++++++++++++++++++
 include/linux/sched.h          |  4 +++-
 include/linux/sched/signal.h   |  5 +++--
 kernel/fork.c                  | 11 ++++-------
 kernel/time/posix-cpu-timers.c | 20 ++++++++++----------
 6 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6049baa5b8bc..2c620d7ac432 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -36,17 +36,6 @@ extern struct cred init_cred;
 #define INIT_PREV_CPUTIME(x)
 #endif
 
-#ifdef CONFIG_POSIX_TIMERS
-#define INIT_CPU_TIMERS(s)						\
-	.cpu_timers = {							\
-		LIST_HEAD_INIT(s.cpu_timers[0]),			\
-		LIST_HEAD_INIT(s.cpu_timers[1]),			\
-		LIST_HEAD_INIT(s.cpu_timers[2]),			\
-	},
-#else
-#define INIT_CPU_TIMERS(s)
-#endif
-
 #define INIT_TASK_COMM "swapper"
 
 /* Attach to the init_task data structure for proper alignment */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 033374b99767..cdef89750b2c 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -62,6 +62,40 @@ static inline int clockid_to_fd(const clockid_t clk)
 	return ~(clk >> 3);
 }
 
+#ifdef CONFIG_POSIX_TIMERS
+/**
+ * posix_cputimers - Container for posix CPU timer related data
+ * @cpu_timers:		List heads to queue posix CPU timers
+ *
+ * Used in task_struct and signal_struct
+ */
+struct posix_cputimers {
+	struct list_head	cpu_timers[CPUCLOCK_MAX];
+};
+
+static inline void posix_cputimers_init(struct posix_cputimers *pct)
+{
+	INIT_LIST_HEAD(&pct->cpu_timers[0]);
+	INIT_LIST_HEAD(&pct->cpu_timers[1]);
+	INIT_LIST_HEAD(&pct->cpu_timers[2]);
+}
+
+/* Init task static initializer */
+#define INIT_CPU_TIMERLISTS(c)	{					\
+	LIST_HEAD_INIT(c.cpu_timers[0]),				\
+	LIST_HEAD_INIT(c.cpu_timers[1]),				\
+	LIST_HEAD_INIT(c.cpu_timers[2]),				\
+}
+
+#define INIT_CPU_TIMERS(s)						\
+	.posix_cputimers = {						\
+		.cpu_timers = INIT_CPU_TIMERLISTS(s.posix_cputimers),	\
+	},
+#else
+struct posix_cputimers { };
+#define INIT_CPU_TIMERS(s)
+#endif
+
 #define REQUEUE_PENDING 1
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8dc1811487f5..fde844a3b86e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,6 +28,7 @@
 #include <linux/signal_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
+#include <linux/posix-timers.h>
 #include <linux/rseq.h>
 
 /* task_struct member predeclarations (sorted alphabetically): */
@@ -878,8 +879,9 @@ struct task_struct {
 
 #ifdef CONFIG_POSIX_TIMERS
 	struct task_cputime		cputime_expires;
-	struct list_head		cpu_timers[3];
 #endif
+	/* Empty if CONFIG_POSIX_CPUTIMERS=n */
+	struct posix_cputimers		posix_cputimers;
 
 	/* Process credentials: */
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index efd8ce7675ed..88fbb3f1c375 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -9,6 +9,7 @@
 #include <linux/sched/task.h>
 #include <linux/cred.h>
 #include <linux/refcount.h>
+#include <linux/posix-timers.h>
 
 /*
  * Types defining task->signal and task->sighand and APIs using them:
@@ -151,9 +152,9 @@ struct signal_struct {
 	/* Earliest-expiration cache. */
 	struct task_cputime cputime_expires;
 
-	struct list_head cpu_timers[3];
-
 #endif
+	/* Empty if CONFIG_POSIX_TIMERS=n */
+	struct posix_cputimers posix_cputimers;
 
 	/* PID/PID hash table linkage. */
 	struct pid *pids[PIDTYPE_MAX];
diff --git a/kernel/fork.c b/kernel/fork.c
index d8ae0f1b4148..b6a135e4275b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1523,6 +1523,7 @@ void __cleanup_sighand(struct sighand_struct *sighand)
  */
 static void posix_cpu_timers_init_group(struct signal_struct *sig)
 {
+	struct posix_cputimers *pct = &sig->posix_cputimers;
 	unsigned long cpu_limit;
 
 	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
@@ -1531,10 +1532,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 		sig->cputimer.running = true;
 	}
 
-	/* The timer lists. */
-	INIT_LIST_HEAD(&sig->cpu_timers[0]);
-	INIT_LIST_HEAD(&sig->cpu_timers[1]);
-	INIT_LIST_HEAD(&sig->cpu_timers[2]);
+	posix_cputimers_init(pct);
 }
 #else
 static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
@@ -1649,9 +1647,8 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
 	tsk->cputime_expires.prof_exp = 0;
 	tsk->cputime_expires.virt_exp = 0;
 	tsk->cputime_expires.sched_exp = 0;
-	INIT_LIST_HEAD(&tsk->cpu_timers[0]);
-	INIT_LIST_HEAD(&tsk->cpu_timers[1]);
-	INIT_LIST_HEAD(&tsk->cpu_timers[2]);
+
+	posix_cputimers_init(&tsk->posix_cputimers);
 }
 #else
 static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index b1c97664a730..849e2045fb6e 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -407,11 +407,11 @@ static void cleanup_timers_list(struct list_head *head)
  *
  * This must be called with the siglock held.
  */
-static void cleanup_timers(struct list_head *head)
+static void cleanup_timers(struct posix_cputimers *pct)
 {
-	cleanup_timers_list(head);
-	cleanup_timers_list(++head);
-	cleanup_timers_list(++head);
+	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_PROF]);
+	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_VIRT]);
+	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_SCHED]);
 }
 
 /*
@@ -421,11 +421,11 @@ static void cleanup_timers(struct list_head *head)
  */
 void posix_cpu_timers_exit(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->cpu_timers);
+	cleanup_timers(&tsk->posix_cputimers);
 }
 void posix_cpu_timers_exit_group(struct task_struct *tsk)
 {
-	cleanup_timers(tsk->signal->cpu_timers);
+	cleanup_timers(&tsk->signal->posix_cputimers);
 }
 
 static inline int expires_gt(u64 expires, u64 new_exp)
@@ -446,10 +446,10 @@ static void arm_timer(struct k_itimer *timer)
 	struct cpu_timer_list *next;
 
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		head = p->cpu_timers;
+		head = p->posix_cputimers.cpu_timers;
 		cputime_expires = &p->cputime_expires;
 	} else {
-		head = p->signal->cpu_timers;
+		head = p->signal->posix_cputimers.cpu_timers;
 		cputime_expires = &p->signal->cputime_expires;
 	}
 	head += CPUCLOCK_WHICH(timer->it_clock);
@@ -773,8 +773,8 @@ static inline void check_dl_overrun(struct task_struct *tsk)
 static void check_thread_timers(struct task_struct *tsk,
 				struct list_head *firing)
 {
+	struct list_head *timers = tsk->posix_cputimers.cpu_timers;
 	struct task_cputime *tsk_expires = &tsk->cputime_expires;
-	struct list_head *timers = tsk->cpu_timers;
 	u64 expires, stime, utime;
 	unsigned long soft;
 
@@ -879,9 +879,9 @@ static void check_process_timers(struct task_struct *tsk,
 				 struct list_head *firing)
 {
 	struct signal_struct *const sig = tsk->signal;
+	struct list_head *timers = sig->posix_cputimers.cpu_timers;
 	u64 utime, ptime, virt_expires, prof_expires;
 	u64 sum_sched_runtime, sched_expires;
-	struct list_head *timers = sig->cpu_timers;
 	struct task_cputime cputime;
 	unsigned long soft;
 
-- 
cgit v1.2.3


From 9eacb5c7e6607aba00a7322b21cad83fc8b101c8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:05 +0200
Subject: sched: Move struct task_cputime to types.h

For upcoming posix-timer changes to avoid include recursion hell.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190821192920.909530418@linutronix.de
---
 include/linux/sched.h       | 17 +----------------
 include/linux/sched/types.h | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/sched/types.h

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fde844a3b86e..37c39df9b186 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
 #include <linux/resource.h>
 #include <linux/latencytop.h>
 #include <linux/sched/prio.h>
+#include <linux/sched/types.h>
 #include <linux/signal_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
@@ -245,22 +246,6 @@ struct prev_cputime {
 #endif
 };
 
-/**
- * struct task_cputime - collected CPU time counts
- * @utime:		time spent in user mode, in nanoseconds
- * @stime:		time spent in kernel mode, in nanoseconds
- * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
- *
- * This structure groups together three kinds of CPU time that are tracked for
- * threads and thread groups.  Most things considering CPU time want to group
- * these counts together and treat all three of them in parallel.
- */
-struct task_cputime {
-	u64				utime;
-	u64				stime;
-	unsigned long long		sum_exec_runtime;
-};
-
 /* Alternate field names when used on cache expirations: */
 #define virt_exp			utime
 #define prof_exp			stime
diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h
new file mode 100644
index 000000000000..2c5c28ddd9b2
--- /dev/null
+++ b/include/linux/sched/types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SCHED_TYPES_H
+#define _LINUX_SCHED_TYPES_H
+
+#include <linux/types.h>
+
+/**
+ * struct task_cputime - collected CPU time counts
+ * @utime:		time spent in user mode, in nanoseconds
+ * @stime:		time spent in kernel mode, in nanoseconds
+ * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
+ *
+ * This structure groups together three kinds of CPU time that are tracked for
+ * threads and thread groups.  Most things considering CPU time want to group
+ * these counts together and treat all three of them in parallel.
+ */
+struct task_cputime {
+	u64				utime;
+	u64				stime;
+	unsigned long long		sum_exec_runtime;
+};
+
+#endif
-- 
cgit v1.2.3


From 3a245c0f110e2bfcf7f2cd2248a29005c78999e3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:06 +0200
Subject: posix-cpu-timers: Move expiry cache into struct posix_cputimers

The expiry cache belongs into the posix_cputimers container where the other
cpu timers information is.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192921.014444012@linutronix.de
---
 include/linux/posix-timers.h   | 22 +++++++++++++++++++++
 include/linux/sched.h          |  8 --------
 include/linux/sched/signal.h   |  3 ---
 kernel/fork.c                  | 25 +++--------------------
 kernel/sched/rt.c              |  6 ++++--
 kernel/time/posix-cpu-timers.c | 45 +++++++++++++++++++++++++-----------------
 6 files changed, 56 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index cdef89750b2c..a3731ba15bce 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -62,24 +62,43 @@ static inline int clockid_to_fd(const clockid_t clk)
 	return ~(clk >> 3);
 }
 
+/*
+ * Alternate field names for struct task_cputime when used on cache
+ * expirations. Will go away soon.
+ */
+#define virt_exp			utime
+#define prof_exp			stime
+#define sched_exp			sum_exec_runtime
+
 #ifdef CONFIG_POSIX_TIMERS
 /**
  * posix_cputimers - Container for posix CPU timer related data
+ * @cputime_expires:	Earliest-expiration cache
  * @cpu_timers:		List heads to queue posix CPU timers
  *
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
+	struct task_cputime	cputime_expires;
 	struct list_head	cpu_timers[CPUCLOCK_MAX];
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
+	memset(&pct->cputime_expires, 0, sizeof(pct->cputime_expires));
 	INIT_LIST_HEAD(&pct->cpu_timers[0]);
 	INIT_LIST_HEAD(&pct->cpu_timers[1]);
 	INIT_LIST_HEAD(&pct->cpu_timers[2]);
 }
 
+void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
+
+static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
+					       u64 runtime)
+{
+	pct->cputime_expires.sched_exp = runtime;
+}
+
 /* Init task static initializer */
 #define INIT_CPU_TIMERLISTS(c)	{					\
 	LIST_HEAD_INIT(c.cpu_timers[0]),				\
@@ -94,6 +113,9 @@ static inline void posix_cputimers_init(struct posix_cputimers *pct)
 #else
 struct posix_cputimers { };
 #define INIT_CPU_TIMERS(s)
+static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
+static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
+					      u64 cpu_limit) { }
 #endif
 
 #define REQUEUE_PENDING 1
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 37c39df9b186..8cc8e323093f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -246,11 +246,6 @@ struct prev_cputime {
 #endif
 };
 
-/* Alternate field names when used on cache expirations: */
-#define virt_exp			utime
-#define prof_exp			stime
-#define sched_exp			sum_exec_runtime
-
 enum vtime_state {
 	/* Task is sleeping or running in a CPU with VTIME inactive: */
 	VTIME_INACTIVE = 0,
@@ -862,9 +857,6 @@ struct task_struct {
 	unsigned long			min_flt;
 	unsigned long			maj_flt;
 
-#ifdef CONFIG_POSIX_TIMERS
-	struct task_cputime		cputime_expires;
-#endif
 	/* Empty if CONFIG_POSIX_CPUTIMERS=n */
 	struct posix_cputimers		posix_cputimers;
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 88fbb3f1c375..729bd892ee45 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -149,9 +149,6 @@ struct signal_struct {
 	 */
 	struct thread_group_cputimer cputimer;
 
-	/* Earliest-expiration cache. */
-	struct task_cputime cputime_expires;
-
 #endif
 	/* Empty if CONFIG_POSIX_TIMERS=n */
 	struct posix_cputimers posix_cputimers;
diff --git a/kernel/fork.c b/kernel/fork.c
index b6a135e4275b..52bfe7c20ff6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1527,12 +1527,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	unsigned long cpu_limit;
 
 	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
-	if (cpu_limit != RLIM_INFINITY) {
-		sig->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
+	posix_cputimers_group_init(pct, cpu_limit);
+	if (cpu_limit != RLIM_INFINITY)
 		sig->cputimer.running = true;
-	}
-
-	posix_cputimers_init(pct);
 }
 #else
 static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
@@ -1638,22 +1635,6 @@ static void rt_mutex_init_task(struct task_struct *p)
 #endif
 }
 
-#ifdef CONFIG_POSIX_TIMERS
-/*
- * Initialize POSIX timer handling for a single task.
- */
-static void posix_cpu_timers_init(struct task_struct *tsk)
-{
-	tsk->cputime_expires.prof_exp = 0;
-	tsk->cputime_expires.virt_exp = 0;
-	tsk->cputime_expires.sched_exp = 0;
-
-	posix_cputimers_init(&tsk->posix_cputimers);
-}
-#else
-static inline void posix_cpu_timers_init(struct task_struct *tsk) { }
-#endif
-
 static inline void init_task_pid_links(struct task_struct *task)
 {
 	enum pid_type type;
@@ -1932,7 +1913,7 @@ static __latent_entropy struct task_struct *copy_process(
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
-	posix_cpu_timers_init(p);
+	posix_cputimers_init(&p->posix_cputimers);
 
 	p->io_context = NULL;
 	audit_set_context(p, NULL);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index da3e85e61013..d6678f773c96 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2305,8 +2305,10 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 		}
 
 		next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
-		if (p->rt.timeout > next)
-			p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
+		if (p->rt.timeout > next) {
+			posix_cputimers_rt_watchdog(&p->posix_cputimers,
+						    p->se.sum_exec_runtime);
+		}
 	}
 }
 #else
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 849e2045fb6e..3e29d1692437 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -20,11 +20,18 @@
 
 static void posix_cpu_timer_rearm(struct k_itimer *timer);
 
+void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
+{
+	posix_cputimers_init(pct);
+	if (cpu_limit != RLIM_INFINITY)
+		pct->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
+}
+
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
- * tsk->signal->cputime_expires expiration cache if necessary. Needs
- * siglock protection since other code may update expiration cache as
- * well.
+ * tsk->signal->posix_cputimers.cputime_expires expiration cache if
+ * necessary. Needs siglock protection since other code may update
+ * expiration cache as well.
  */
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
 {
@@ -447,10 +454,10 @@ static void arm_timer(struct k_itimer *timer)
 
 	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
 		head = p->posix_cputimers.cpu_timers;
-		cputime_expires = &p->cputime_expires;
+		cputime_expires = &p->posix_cputimers.cputime_expires;
 	} else {
 		head = p->signal->posix_cputimers.cpu_timers;
-		cputime_expires = &p->signal->cputime_expires;
+		cputime_expires = &p->signal->posix_cputimers.cputime_expires;
 	}
 	head += CPUCLOCK_WHICH(timer->it_clock);
 
@@ -774,7 +781,7 @@ static void check_thread_timers(struct task_struct *tsk,
 				struct list_head *firing)
 {
 	struct list_head *timers = tsk->posix_cputimers.cpu_timers;
-	struct task_cputime *tsk_expires = &tsk->cputime_expires;
+	struct task_cputime *tsk_expires = &tsk->posix_cputimers.cputime_expires;
 	u64 expires, stime, utime;
 	unsigned long soft;
 
@@ -785,7 +792,7 @@ static void check_thread_timers(struct task_struct *tsk,
 	 * If cputime_expires is zero, then there are no active
 	 * per thread CPU timers.
 	 */
-	if (task_cputime_zero(&tsk->cputime_expires))
+	if (task_cputime_zero(tsk_expires))
 		return;
 
 	task_cputime(tsk, &utime, &stime);
@@ -954,10 +961,10 @@ static void check_process_timers(struct task_struct *tsk,
 			prof_expires = x;
 	}
 
-	sig->cputime_expires.prof_exp = prof_expires;
-	sig->cputime_expires.virt_exp = virt_expires;
-	sig->cputime_expires.sched_exp = sched_expires;
-	if (task_cputime_zero(&sig->cputime_expires))
+	sig->posix_cputimers.cputime_expires.prof_exp = prof_expires;
+	sig->posix_cputimers.cputime_expires.virt_exp = virt_expires;
+	sig->posix_cputimers.cputime_expires.sched_exp = sched_expires;
+	if (task_cputime_zero(&sig->posix_cputimers.cputime_expires))
 		stop_process_timers(sig);
 
 	sig->cputimer.checking_timer = false;
@@ -1058,12 +1065,13 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 
-	if (!task_cputime_zero(&tsk->cputime_expires)) {
+	if (!task_cputime_zero(&tsk->posix_cputimers.cputime_expires)) {
 		struct task_cputime task_sample;
 
 		task_cputime(tsk, &task_sample.utime, &task_sample.stime);
 		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
-		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
+		if (task_cputime_expired(&task_sample,
+					 &tsk->posix_cputimers.cputime_expires))
 			return 1;
 	}
 
@@ -1088,7 +1096,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 
 		sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
 
-		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+		if (task_cputime_expired(&group_sample,
+					 &sig->posix_cputimers.cputime_expires))
 			return 1;
 	}
 
@@ -1204,12 +1213,12 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 	 */
 	switch (clock_idx) {
 	case CPUCLOCK_PROF:
-		if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
-			tsk->signal->cputime_expires.prof_exp = *newval;
+		if (expires_gt(tsk->signal->posix_cputimers.cputime_expires.prof_exp, *newval))
+			tsk->signal->posix_cputimers.cputime_expires.prof_exp = *newval;
 		break;
 	case CPUCLOCK_VIRT:
-		if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
-			tsk->signal->cputime_expires.virt_exp = *newval;
+		if (expires_gt(tsk->signal->posix_cputimers.cputime_expires.virt_exp, *newval))
+			tsk->signal->posix_cputimers.cputime_expires.virt_exp = *newval;
 		break;
 	}
 
-- 
cgit v1.2.3


From 11b8462f7e1d25f639c88949a2746a9c2667a766 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:07 +0200
Subject: posix-cpu-timers: Provide array based access to expiry cache

Using struct task_cputime for the expiry cache is a pretty odd choice and
comes with magic defines to rename the fields for usage in the expiry
cache.

struct task_cputime is basically a u64 array with 3 members, but it has
distinct members.

The expiry cache content is different than the content of task_cputime
because

  expiry[PROF]  = task_cputime.stime + task_cputime.utime
  expiry[VIRT]  = task_cputime.utime
  expiry[SCHED] = task_cputime.sum_exec_runtime

So there is no direct mapping between task_cputime and the expiry cache and
the #define based remapping is just a horrible hack.

Having the expiry cache array based allows further simplification of the
expiry code.

To avoid an all in one cleanup which is hard to review add a temporary
anonymous union into struct task_cputime which allows array based access to
it. That requires to reorder the members. Add a build time sanity check to
validate that the members are at the same place.

The union and the build time checks will be removed after conversion.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192921.105793824@linutronix.de
---
 include/linux/posix-timers.h   | 24 ++++++++++++++++++------
 include/linux/sched/types.h    |  4 ++--
 kernel/time/posix-cpu-timers.c | 12 +++++++++++-
 3 files changed, 31 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a3731ba15bce..ed0f6ec30aa6 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -65,27 +65,39 @@ static inline int clockid_to_fd(const clockid_t clk)
 /*
  * Alternate field names for struct task_cputime when used on cache
  * expirations. Will go away soon.
+ *
+ * stime corresponds to CLOCKCPU_PROF
+ * utime corresponds to CLOCKCPU_VIRT
+ * sum_exex_runtime corresponds to CLOCKCPU_SCHED
+ *
+ * The ordering is currently enforced so struct task_cputime and the
+ * expiries array in struct posix_cputimers are equivalent.
  */
-#define virt_exp			utime
 #define prof_exp			stime
+#define virt_exp			utime
 #define sched_exp			sum_exec_runtime
 
 #ifdef CONFIG_POSIX_TIMERS
 /**
  * posix_cputimers - Container for posix CPU timer related data
- * @cputime_expires:	Earliest-expiration cache
+ * @cputime_expires:	Earliest-expiration cache task_cputime based
+ * @expiries:		Earliest-expiration cache array based
  * @cpu_timers:		List heads to queue posix CPU timers
  *
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
-	struct task_cputime	cputime_expires;
-	struct list_head	cpu_timers[CPUCLOCK_MAX];
+	/* Temporary union until all users are cleaned up */
+	union {
+		struct task_cputime	cputime_expires;
+		u64			expiries[CPUCLOCK_MAX];
+	};
+	struct list_head		cpu_timers[CPUCLOCK_MAX];
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
-	memset(&pct->cputime_expires, 0, sizeof(pct->cputime_expires));
+	memset(&pct->expiries, 0, sizeof(pct->expiries));
 	INIT_LIST_HEAD(&pct->cpu_timers[0]);
 	INIT_LIST_HEAD(&pct->cpu_timers[1]);
 	INIT_LIST_HEAD(&pct->cpu_timers[2]);
@@ -96,7 +108,7 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
 static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 					       u64 runtime)
 {
-	pct->cputime_expires.sched_exp = runtime;
+	pct->expiries[CPUCLOCK_SCHED] = runtime;
 }
 
 /* Init task static initializer */
diff --git a/include/linux/sched/types.h b/include/linux/sched/types.h
index 2c5c28ddd9b2..3c3e049224ae 100644
--- a/include/linux/sched/types.h
+++ b/include/linux/sched/types.h
@@ -6,8 +6,8 @@
 
 /**
  * struct task_cputime - collected CPU time counts
- * @utime:		time spent in user mode, in nanoseconds
  * @stime:		time spent in kernel mode, in nanoseconds
+ * @utime:		time spent in user mode, in nanoseconds
  * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
  *
  * This structure groups together three kinds of CPU time that are tracked for
@@ -15,8 +15,8 @@
  * these counts together and treat all three of them in parallel.
  */
 struct task_cputime {
-	u64				utime;
 	u64				stime;
+	u64				utime;
 	unsigned long long		sum_exec_runtime;
 };
 
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3e29d1692437..a38b6d04e8b5 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -18,13 +18,23 @@
 
 #include "posix-timers.h"
 
+static inline void temporary_check(void)
+{
+	BUILD_BUG_ON(offsetof(struct task_cputime, stime) !=
+		     CPUCLOCK_PROF * sizeof(u64));
+	BUILD_BUG_ON(offsetof(struct task_cputime, utime) !=
+		     CPUCLOCK_VIRT * sizeof(u64));
+	BUILD_BUG_ON(offsetof(struct task_cputime, sum_exec_runtime) !=
+		     CPUCLOCK_SCHED * sizeof(u64));
+}
+
 static void posix_cpu_timer_rearm(struct k_itimer *timer);
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
 {
 	posix_cputimers_init(pct);
 	if (cpu_limit != RLIM_INFINITY)
-		pct->cputime_expires.prof_exp = cpu_limit * NSEC_PER_SEC;
+		pct->expiries[CPUCLOCK_PROF] = cpu_limit * NSEC_PER_SEC;
 }
 
 /*
-- 
cgit v1.2.3


From bbc9bae1e49bee9862c7f24101a728e73cd9f589 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:11 +0200
Subject: posix-cpu-timers: Remove the odd field rename defines

The last users of the odd define based renaming of struct task_cputime
members are gone. Good riddance.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192921.499058279@linutronix.de
---
 include/linux/posix-timers.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index ed0f6ec30aa6..e36c6fda1af9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -62,21 +62,6 @@ static inline int clockid_to_fd(const clockid_t clk)
 	return ~(clk >> 3);
 }
 
-/*
- * Alternate field names for struct task_cputime when used on cache
- * expirations. Will go away soon.
- *
- * stime corresponds to CLOCKCPU_PROF
- * utime corresponds to CLOCKCPU_VIRT
- * sum_exex_runtime corresponds to CLOCKCPU_SCHED
- *
- * The ordering is currently enforced so struct task_cputime and the
- * expiries array in struct posix_cputimers are equivalent.
- */
-#define prof_exp			stime
-#define virt_exp			utime
-#define sched_exp			sum_exec_runtime
-
 #ifdef CONFIG_POSIX_TIMERS
 /**
  * posix_cputimers - Container for posix CPU timer related data
-- 
cgit v1.2.3


From 46b883995c88520f2c4de6a64cccc04c69d59f83 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:14 +0200
Subject: posix-cpu-timers: Remove cputime_expires

The last users of the magic struct cputime based expiry cache are
gone. Remove the leftovers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192921.790209622@linutronix.de
---
 include/linux/posix-timers.h   |  9 ++-------
 kernel/time/posix-cpu-timers.c | 10 ----------
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index e36c6fda1af9..fd9098467d6d 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -65,19 +65,14 @@ static inline int clockid_to_fd(const clockid_t clk)
 #ifdef CONFIG_POSIX_TIMERS
 /**
  * posix_cputimers - Container for posix CPU timer related data
- * @cputime_expires:	Earliest-expiration cache task_cputime based
  * @expiries:		Earliest-expiration cache array based
  * @cpu_timers:		List heads to queue posix CPU timers
  *
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
-	/* Temporary union until all users are cleaned up */
-	union {
-		struct task_cputime	cputime_expires;
-		u64			expiries[CPUCLOCK_MAX];
-	};
-	struct list_head		cpu_timers[CPUCLOCK_MAX];
+	u64			expiries[CPUCLOCK_MAX];
+	struct list_head	cpu_timers[CPUCLOCK_MAX];
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index e159b039e44a..ffd49181e23d 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -18,16 +18,6 @@
 
 #include "posix-timers.h"
 
-static inline void temporary_check(void)
-{
-	BUILD_BUG_ON(offsetof(struct task_cputime, stime) !=
-		     CPUCLOCK_PROF * sizeof(u64));
-	BUILD_BUG_ON(offsetof(struct task_cputime, utime) !=
-		     CPUCLOCK_VIRT * sizeof(u64));
-	BUILD_BUG_ON(offsetof(struct task_cputime, sum_exec_runtime) !=
-		     CPUCLOCK_SCHED * sizeof(u64));
-}
-
 static void posix_cpu_timer_rearm(struct k_itimer *timer);
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
-- 
cgit v1.2.3


From 87dc64480fb19a6a0fedbdff1e2557be50673287 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 26 Aug 2019 20:22:24 +0200
Subject: posix-cpu-timers: Restructure expiry array

Now that the abused struct task_cputime is gone, it's more natural to
bundle the expiry cache and the list head of each clock into a struct and
have an array of those structs.

Follow the hrtimer naming convention of 'bases' and rename the expiry cache
to 'nextevt' and adapt all usage sites.

Generates also better code .text size shrinks by 80 bytes.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908262021140.1939@nanos.tec.linutronix.de
---
 include/linux/posix-timers.h   |  41 ++++++++++------
 kernel/time/posix-cpu-timers.c | 105 ++++++++++++++++++++++-------------------
 2 files changed, 83 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index fd9098467d6d..64bd10d251fe 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -63,24 +63,33 @@ static inline int clockid_to_fd(const clockid_t clk)
 }
 
 #ifdef CONFIG_POSIX_TIMERS
+
 /**
- * posix_cputimers - Container for posix CPU timer related data
- * @expiries:		Earliest-expiration cache array based
+ * posix_cputimer_base - Container per posix CPU clock
+ * @nextevt:		Earliest-expiration cache
  * @cpu_timers:		List heads to queue posix CPU timers
+ */
+struct posix_cputimer_base {
+	u64			nextevt;
+	struct list_head	cpu_timers;
+};
+
+/**
+ * posix_cputimers - Container for posix CPU timer related data
+ * @bases:		Base container for posix CPU clocks
  *
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
-	u64			expiries[CPUCLOCK_MAX];
-	struct list_head	cpu_timers[CPUCLOCK_MAX];
+	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
-	memset(&pct->expiries, 0, sizeof(pct->expiries));
-	INIT_LIST_HEAD(&pct->cpu_timers[0]);
-	INIT_LIST_HEAD(&pct->cpu_timers[1]);
-	INIT_LIST_HEAD(&pct->cpu_timers[2]);
+	memset(pct->bases, 0, sizeof(pct->bases));
+	INIT_LIST_HEAD(&pct->bases[0].cpu_timers);
+	INIT_LIST_HEAD(&pct->bases[1].cpu_timers);
+	INIT_LIST_HEAD(&pct->bases[2].cpu_timers);
 }
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
@@ -88,19 +97,23 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
 static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 					       u64 runtime)
 {
-	pct->expiries[CPUCLOCK_SCHED] = runtime;
+	pct->bases[CPUCLOCK_SCHED].nextevt = runtime;
 }
 
 /* Init task static initializer */
-#define INIT_CPU_TIMERLISTS(c)	{					\
-	LIST_HEAD_INIT(c.cpu_timers[0]),				\
-	LIST_HEAD_INIT(c.cpu_timers[1]),				\
-	LIST_HEAD_INIT(c.cpu_timers[2]),				\
+#define INIT_CPU_TIMERBASE(b) {						\
+	.cpu_timers = LIST_HEAD_INIT(b.cpu_timers),			\
+}
+
+#define INIT_CPU_TIMERBASES(b) {					\
+	INIT_CPU_TIMERBASE(b[0]),					\
+	INIT_CPU_TIMERBASE(b[1]),					\
+	INIT_CPU_TIMERBASE(b[2]),					\
 }
 
 #define INIT_CPU_TIMERS(s)						\
 	.posix_cputimers = {						\
-		.cpu_timers = INIT_CPU_TIMERLISTS(s.posix_cputimers),	\
+		.bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases),	\
 	},
 #else
 struct posix_cputimers { };
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index ffd49181e23d..9ac601abc4c4 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -24,13 +24,13 @@ void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
 {
 	posix_cputimers_init(pct);
 	if (cpu_limit != RLIM_INFINITY)
-		pct->expiries[CPUCLOCK_PROF] = cpu_limit * NSEC_PER_SEC;
+		pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
 }
 
 /*
  * Called after updating RLIMIT_CPU to run cpu timer and update
- * tsk->signal->posix_cputimers.expiries expiration cache if
- * necessary. Needs siglock protection since other code may update
+ * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
+ * necessary. Needs siglock protection since other code may update the
  * expiration cache as well.
  */
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
@@ -122,9 +122,11 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 	}
 }
 
-static inline bool expiry_cache_is_zero(const u64 *ec)
+static inline bool expiry_cache_is_zero(const struct posix_cputimers *pct)
 {
-	return !(ec[CPUCLOCK_PROF] | ec[CPUCLOCK_VIRT] | ec[CPUCLOCK_SCHED]);
+	return !(pct->bases[CPUCLOCK_PROF].nextevt |
+		 pct->bases[CPUCLOCK_VIRT].nextevt |
+		 pct->bases[CPUCLOCK_SCHED].nextevt);
 }
 
 static int
@@ -432,9 +434,9 @@ static void cleanup_timers_list(struct list_head *head)
  */
 static void cleanup_timers(struct posix_cputimers *pct)
 {
-	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_PROF]);
-	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_VIRT]);
-	cleanup_timers_list(&pct->cpu_timers[CPUCLOCK_SCHED]);
+	cleanup_timers_list(&pct->bases[CPUCLOCK_PROF].cpu_timers);
+	cleanup_timers_list(&pct->bases[CPUCLOCK_VIRT].cpu_timers);
+	cleanup_timers_list(&pct->bases[CPUCLOCK_SCHED].cpu_timers);
 }
 
 /*
@@ -464,21 +466,19 @@ static void arm_timer(struct k_itimer *timer)
 {
 	struct cpu_timer_list *const nt = &timer->it.cpu;
 	int clkidx = CPUCLOCK_WHICH(timer->it_clock);
-	u64 *cpuexp, newexp = timer->it.cpu.expires;
 	struct task_struct *p = timer->it.cpu.task;
+	u64 newexp = timer->it.cpu.expires;
+	struct posix_cputimer_base *base;
 	struct list_head *head, *listpos;
 	struct cpu_timer_list *next;
 
-	if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
-		head = p->posix_cputimers.cpu_timers + clkidx;
-		cpuexp = p->posix_cputimers.expiries + clkidx;
-	} else {
-		head = p->signal->posix_cputimers.cpu_timers + clkidx;
-		cpuexp = p->signal->posix_cputimers.expiries + clkidx;
-	}
+	if (CPUCLOCK_PERTHREAD(timer->it_clock))
+		base = p->posix_cputimers.bases + clkidx;
+	else
+		base = p->signal->posix_cputimers.bases + clkidx;
 
-	listpos = head;
-	list_for_each_entry(next, head, entry) {
+	listpos = head = &base->cpu_timers;
+	list_for_each_entry(next,head, entry) {
 		if (nt->expires < next->expires)
 			break;
 		listpos = &next->entry;
@@ -494,8 +494,8 @@ static void arm_timer(struct k_itimer *timer)
 	 * for process timers we share expiration cache with itimers
 	 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
 	 */
-	if (expires_gt(*cpuexp, newexp))
-		*cpuexp = newexp;
+	if (expires_gt(base->nextevt, newexp))
+		base->nextevt = newexp;
 
 	if (CPUCLOCK_PERTHREAD(timer->it_clock))
 		tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
@@ -783,9 +783,9 @@ static inline void check_dl_overrun(struct task_struct *tsk)
 static void check_thread_timers(struct task_struct *tsk,
 				struct list_head *firing)
 {
-	struct list_head *timers = tsk->posix_cputimers.cpu_timers;
-	u64 stime, utime, *expires = tsk->posix_cputimers.expiries;
+	struct posix_cputimer_base *base = tsk->posix_cputimers.bases;
 	unsigned long soft;
+	u64 stime, utime;
 
 	if (dl_task(tsk))
 		check_dl_overrun(tsk);
@@ -794,14 +794,18 @@ static void check_thread_timers(struct task_struct *tsk,
 	 * If the expiry cache is zero, then there are no active per thread
 	 * CPU timers.
 	 */
-	if (expiry_cache_is_zero(tsk->posix_cputimers.expiries))
+	if (expiry_cache_is_zero(&tsk->posix_cputimers))
 		return;
 
 	task_cputime(tsk, &utime, &stime);
 
-	*expires++ = check_timers_list(timers, firing, utime + stime);
-	*expires++ = check_timers_list(++timers, firing, utime);
-	*expires = check_timers_list(++timers, firing, tsk->se.sum_exec_runtime);
+	base->nextevt = check_timers_list(&base->cpu_timers, firing,
+					  utime + stime);
+	base++;
+	base->nextevt = check_timers_list(&base->cpu_timers, firing, utime);
+	base++;
+	base->nextevt = check_timers_list(&base->cpu_timers, firing,
+					  tsk->se.sum_exec_runtime);
 
 	/*
 	 * Check for the special case thread timers.
@@ -840,7 +844,7 @@ static void check_thread_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (expiry_cache_is_zero(tsk->posix_cputimers.expiries))
+	if (expiry_cache_is_zero(&tsk->posix_cputimers))
 		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
@@ -884,7 +888,7 @@ static void check_process_timers(struct task_struct *tsk,
 				 struct list_head *firing)
 {
 	struct signal_struct *const sig = tsk->signal;
-	struct list_head *timers = sig->posix_cputimers.cpu_timers;
+	struct posix_cputimer_base *base = sig->posix_cputimers.bases;
 	u64 utime, ptime, virt_expires, prof_expires;
 	u64 sum_sched_runtime, sched_expires;
 	struct task_cputime cputime;
@@ -912,9 +916,12 @@ static void check_process_timers(struct task_struct *tsk,
 	ptime = utime + cputime.stime;
 	sum_sched_runtime = cputime.sum_exec_runtime;
 
-	prof_expires = check_timers_list(timers, firing, ptime);
-	virt_expires = check_timers_list(++timers, firing, utime);
-	sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
+	prof_expires = check_timers_list(&base[CPUCLOCK_PROF].cpu_timers,
+					 firing, ptime);
+	virt_expires = check_timers_list(&base[CPUCLOCK_VIRT].cpu_timers,
+					 firing, utime);
+	sched_expires = check_timers_list(&base[CPUCLOCK_SCHED].cpu_timers,
+					  firing, sum_sched_runtime);
 
 	/*
 	 * Check for the special case process timers.
@@ -959,11 +966,11 @@ static void check_process_timers(struct task_struct *tsk,
 			prof_expires = x;
 	}
 
-	sig->posix_cputimers.expiries[CPUCLOCK_PROF] = prof_expires;
-	sig->posix_cputimers.expiries[CPUCLOCK_VIRT] = virt_expires;
-	sig->posix_cputimers.expiries[CPUCLOCK_SCHED] = sched_expires;
+	base[CPUCLOCK_PROF].nextevt = prof_expires;
+	base[CPUCLOCK_VIRT].nextevt = virt_expires;
+	base[CPUCLOCK_SCHED].nextevt = sched_expires;
 
-	if (expiry_cache_is_zero(sig->posix_cputimers.expiries))
+	if (expiry_cache_is_zero(&sig->posix_cputimers))
 		stop_process_timers(sig);
 
 	sig->cputimer.checking_timer = false;
@@ -1028,20 +1035,21 @@ unlock:
 }
 
 /**
- * task_cputimers_expired - Compare two task_cputime entities.
+ * task_cputimers_expired - Check whether posix CPU timers are expired
  *
  * @samples:	Array of current samples for the CPUCLOCK clocks
- * @expiries:	Array of expiry values for the CPUCLOCK clocks
+ * @pct:	Pointer to a posix_cputimers container
  *
- * Returns true if any mmember of @samples is greater than the corresponding
- * member of @expiries if that member is non zero. False otherwise
+ * Returns true if any member of @samples is greater than the corresponding
+ * member of @pct->bases[CLK].nextevt. False otherwise
  */
-static inline bool task_cputimers_expired(const u64 *sample, const u64 *expiries)
+static inline bool
+task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct)
 {
 	int i;
 
 	for (i = 0; i < CPUCLOCK_MAX; i++) {
-		if (expiries[i] && sample[i] >= expiries[i])
+		if (pct->bases[i].nextevt && sample[i] >= pct->bases[i].nextevt)
 			return true;
 	}
 	return false;
@@ -1059,14 +1067,13 @@ static inline bool task_cputimers_expired(const u64 *sample, const u64 *expiries
  */
 static inline bool fastpath_timer_check(struct task_struct *tsk)
 {
-	u64 *expiries = tsk->posix_cputimers.expiries;
 	struct signal_struct *sig;
 
-	if (!expiry_cache_is_zero(expiries)) {
+	if (!expiry_cache_is_zero(&tsk->posix_cputimers)) {
 		u64 samples[CPUCLOCK_MAX];
 
 		task_sample_cputime(tsk, samples);
-		if (task_cputimers_expired(samples, expiries))
+		if (task_cputimers_expired(samples, &tsk->posix_cputimers))
 			return true;
 	}
 
@@ -1092,8 +1099,7 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
 		proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
 					   samples);
 
-		if (task_cputimers_expired(samples,
-					   sig->posix_cputimers.expiries))
+		if (task_cputimers_expired(samples, &sig->posix_cputimers))
 			return true;
 	}
 
@@ -1176,11 +1182,12 @@ void run_posix_cpu_timers(void)
 void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
 			   u64 *newval, u64 *oldval)
 {
-	u64 now, *expiry = tsk->signal->posix_cputimers.expiries + clkid;
+	u64 now, *nextevt;
 
 	if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
 		return;
 
+	nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
 	now = cpu_clock_sample_group(clkid, tsk, true);
 
 	if (oldval) {
@@ -1207,8 +1214,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
 	 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
 	 * expiry cache is also used by RLIMIT_CPU!.
 	 */
-	if (expires_gt(*expiry, *newval))
-		*expiry = *newval;
+	if (expires_gt(*nextevt, *newval))
+		*nextevt = *newval;
 
 	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
 }
-- 
cgit v1.2.3


From b7be4ef1365dcb56fdffc6689e41058b23f5996d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:16 +0200
Subject: posix-cpu-timers: Switch thread group sampling to array

That allows more simplifications in various places.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192921.988426956@linutronix.de
---
 include/linux/sched/cputime.h  |   2 +-
 kernel/time/itimer.c           |  11 ++---
 kernel/time/posix-cpu-timers.c | 104 ++++++++++++++++++-----------------------
 3 files changed, 49 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 2638fd0ab3f2..eefa5dff16b4 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -61,7 +61,7 @@ extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
  * Thread group CPU time accounting.
  */
 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
-void thread_group_sample_cputime(struct task_struct *tsk, struct task_cputime *times);
+void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples);
 
 /*
  * The following are functions that support scheduler-internal time accounting.
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index ae04bc259240..77f1e5635cc1 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -55,15 +55,10 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	val = it->expires;
 	interval = it->incr;
 	if (val) {
-		struct task_cputime cputime;
-		u64 t;
+		u64 t, samples[CPUCLOCK_MAX];
 
-		thread_group_sample_cputime(tsk, &cputime);
-		if (clock_id == CPUCLOCK_PROF)
-			t = cputime.utime + cputime.stime;
-		else
-			/* CPUCLOCK_VIRT */
-			t = cputime.utime;
+		thread_group_sample_cputime(tsk, samples);
+		t = samples[clock_id];
 
 		if (val < t)
 			/* about to fire */
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 9ac601abc4c4..e62139a89375 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -225,22 +225,14 @@ retry:
 	}
 }
 
-static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
+			      struct task_cputime *sum)
 {
 	__update_gt_cputime(&cputime_atomic->utime, sum->utime);
 	__update_gt_cputime(&cputime_atomic->stime, sum->stime);
 	__update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
 }
 
-/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
-static inline void sample_cputime_atomic(struct task_cputime *times,
-					 struct task_cputime_atomic *atomic_times)
-{
-	times->utime = atomic64_read(&atomic_times->utime);
-	times->stime = atomic64_read(&atomic_times->stime);
-	times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
-}
-
 /**
  * thread_group_sample_cputime - Sample cputime for a given task
  * @tsk:	Task for which cputime needs to be started
@@ -252,20 +244,19 @@ static inline void sample_cputime_atomic(struct task_cputime *times,
  *
  * Updates @times with an uptodate sample of the thread group cputimes.
  */
-void thread_group_sample_cputime(struct task_struct *tsk,
-				struct task_cputime *times)
+void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
 	WARN_ON_ONCE(!cputimer->running);
 
-	sample_cputime_atomic(times, &cputimer->cputime_atomic);
+	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
 }
 
 /**
  * thread_group_start_cputime - Start cputime and return a sample
  * @tsk:	Task for which cputime needs to be started
- * @iimes:	Storage for time samples
+ * @samples:	Storage for time samples
  *
  * The thread group cputime accouting is avoided when there are no posix
  * CPU timers armed. Before starting a timer it's required to check whether
@@ -274,14 +265,14 @@ void thread_group_sample_cputime(struct task_struct *tsk,
  *
  * Updates @times with an uptodate sample of the thread group cputimes.
  */
-static void
-thread_group_start_cputime(struct task_struct *tsk, struct task_cputime *times)
+static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
-	struct task_cputime sum;
 
 	/* Check if cputimer isn't running. This is accessed without locking. */
 	if (!READ_ONCE(cputimer->running)) {
+		struct task_cputime sum;
+
 		/*
 		 * The POSIX timer interface allows for absolute time expiry
 		 * values through the TIMER_ABSTIME flag, therefore we have
@@ -299,7 +290,15 @@ thread_group_start_cputime(struct task_struct *tsk, struct task_cputime *times)
 		 */
 		WRITE_ONCE(cputimer->running, true);
 	}
-	sample_cputime_atomic(times, &cputimer->cputime_atomic);
+	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
+}
+
+static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
+{
+	struct task_cputime ct;
+
+	thread_group_cputime(tsk, &ct);
+	store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
 }
 
 /*
@@ -313,28 +312,18 @@ static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
 				  bool start)
 {
 	struct thread_group_cputimer *cputimer = &p->signal->cputimer;
-	struct task_cputime cputime;
+	u64 samples[CPUCLOCK_MAX];
 
 	if (!READ_ONCE(cputimer->running)) {
 		if (start)
-			thread_group_start_cputime(p, &cputime);
+			thread_group_start_cputime(p, samples);
 		else
-			thread_group_cputime(p, &cputime);
+			__thread_group_cputime(p, samples);
 	} else {
-		sample_cputime_atomic(&cputime, &cputimer->cputime_atomic);
+		proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
 	}
 
-	switch (clkid) {
-	case CPUCLOCK_PROF:
-		return cputime.utime + cputime.stime;
-	case CPUCLOCK_VIRT:
-		return cputime.utime;
-	case CPUCLOCK_SCHED:
-		return cputime.sum_exec_runtime;
-	default:
-		WARN_ON_ONCE(1);
-	}
-	return 0;
+	return samples[clkid];
 }
 
 static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
@@ -889,9 +878,7 @@ static void check_process_timers(struct task_struct *tsk,
 {
 	struct signal_struct *const sig = tsk->signal;
 	struct posix_cputimer_base *base = sig->posix_cputimers.bases;
-	u64 utime, ptime, virt_expires, prof_expires;
-	u64 sum_sched_runtime, sched_expires;
-	struct task_cputime cputime;
+	u64 virt_exp, prof_exp, sched_exp, samples[CPUCLOCK_MAX];
 	unsigned long soft;
 
 	/*
@@ -911,30 +898,29 @@ static void check_process_timers(struct task_struct *tsk,
 	 * Collect the current process totals. Group accounting is active
 	 * so the sample can be taken directly.
 	 */
-	sample_cputime_atomic(&cputime, &sig->cputimer.cputime_atomic);
-	utime = cputime.utime;
-	ptime = utime + cputime.stime;
-	sum_sched_runtime = cputime.sum_exec_runtime;
-
-	prof_expires = check_timers_list(&base[CPUCLOCK_PROF].cpu_timers,
-					 firing, ptime);
-	virt_expires = check_timers_list(&base[CPUCLOCK_VIRT].cpu_timers,
-					 firing, utime);
-	sched_expires = check_timers_list(&base[CPUCLOCK_SCHED].cpu_timers,
-					  firing, sum_sched_runtime);
+	proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
+
+	prof_exp = check_timers_list(&base[CPUCLOCK_PROF].cpu_timers,
+				     firing, samples[CPUCLOCK_PROF]);
+	virt_exp = check_timers_list(&base[CPUCLOCK_VIRT].cpu_timers,
+				     firing, samples[CPUCLOCK_VIRT]);
+	sched_exp = check_timers_list(&base[CPUCLOCK_SCHED].cpu_timers,
+				      firing, samples[CPUCLOCK_SCHED]);
 
 	/*
 	 * Check for the special case process timers.
 	 */
-	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
-			 SIGPROF);
-	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
-			 SIGVTALRM);
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_exp,
+			 samples[CPUCLOCK_PROF], SIGPROF);
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_exp,
+			 samples[CPUCLOCK_PROF], SIGVTALRM);
+
 	soft = task_rlimit(tsk, RLIMIT_CPU);
 	if (soft != RLIM_INFINITY) {
-		unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
+		u64 softns, ptime = samples[CPUCLOCK_PROF];
 		unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
-		u64 x;
+		unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
+
 		if (psecs >= hard) {
 			/*
 			 * At the hard limit, we just die.
@@ -961,14 +947,14 @@ static void check_process_timers(struct task_struct *tsk,
 				sig->rlim[RLIMIT_CPU].rlim_cur = soft;
 			}
 		}
-		x = soft * NSEC_PER_SEC;
-		if (!prof_expires || x < prof_expires)
-			prof_expires = x;
+		softns = soft * NSEC_PER_SEC;
+		if (!prof_exp || softns < prof_exp)
+			prof_exp = softns;
 	}
 
-	base[CPUCLOCK_PROF].nextevt = prof_expires;
-	base[CPUCLOCK_VIRT].nextevt = virt_expires;
-	base[CPUCLOCK_SCHED].nextevt = sched_expires;
+	base[CPUCLOCK_PROF].nextevt = prof_exp;
+	base[CPUCLOCK_VIRT].nextevt = virt_exp;
+	base[CPUCLOCK_SCHED].nextevt = sched_exp;
 
 	if (expiry_cache_is_zero(&sig->posix_cputimers))
 		stop_process_timers(sig);
-- 
cgit v1.2.3


From 2bbdbdae05167c688b6d3499a7dab74208b80a22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:19 +0200
Subject: posix-cpu-timers: Get rid of zero checks

Deactivation of the expiry cache is done by setting all clock caches to
0. That requires to have a check for zero in all places which update the
expiry cache:

	if (cache == 0 || new < cache)
		cache = new;

Use U64_MAX as the deactivated value, which allows to remove the zero
checks when updating the cache and reduces it to the obvious check:

	if (new < cache)
		cache = new;

This also removes the weird workaround in do_prlimit() which was required
to convert a RLIMIT_CPU value of 0 (immediate expiry) to 1 because handing
in 0 to the posix CPU timer code would have effectively disarmed it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190821192922.275086128@linutronix.de
---
 include/linux/posix-timers.h   |  7 +++++--
 kernel/sys.c                   |  9 ---------
 kernel/time/posix-cpu-timers.c | 38 +++++++++++++++-----------------------
 3 files changed, 20 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 64bd10d251fe..3ea920e8fe7f 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -86,7 +86,9 @@ struct posix_cputimers {
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
-	memset(pct->bases, 0, sizeof(pct->bases));
+	pct->bases[0].nextevt = U64_MAX;
+	pct->bases[1].nextevt = U64_MAX;
+	pct->bases[2].nextevt = U64_MAX;
 	INIT_LIST_HEAD(&pct->bases[0].cpu_timers);
 	INIT_LIST_HEAD(&pct->bases[1].cpu_timers);
 	INIT_LIST_HEAD(&pct->bases[2].cpu_timers);
@@ -102,7 +104,8 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 
 /* Init task static initializer */
 #define INIT_CPU_TIMERBASE(b) {						\
-	.cpu_timers = LIST_HEAD_INIT(b.cpu_timers),			\
+	.nextevt	= U64_MAX,					\
+	.cpu_timers	= LIST_HEAD_INIT(b.cpu_timers),			\
 }
 
 #define INIT_CPU_TIMERBASES(b) {					\
diff --git a/kernel/sys.c b/kernel/sys.c
index c578b75d7923..2462aa84247f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1557,15 +1557,6 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
 			retval = -EPERM;
 		if (!retval)
 			retval = security_task_setrlimit(tsk, resource, new_rlim);
-		if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
-			/*
-			 * The caller is asking for an immediate RLIMIT_CPU
-			 * expiry.  But we use the zero value to mean "it was
-			 * never set".  So let's cheat and make it one second
-			 * instead
-			 */
-			new_rlim->rlim_cur = 1;
-		}
 	}
 	if (!retval) {
 		if (old_rlim)
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a738d7659915..cf85292575c5 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -122,11 +122,12 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 	}
 }
 
-static inline bool expiry_cache_is_zero(const struct posix_cputimers *pct)
+/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
+static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
 {
-	return !(pct->bases[CPUCLOCK_PROF].nextevt |
-		 pct->bases[CPUCLOCK_VIRT].nextevt |
-		 pct->bases[CPUCLOCK_SCHED].nextevt);
+	return !(~pct->bases[CPUCLOCK_PROF].nextevt |
+		 ~pct->bases[CPUCLOCK_VIRT].nextevt |
+		 ~pct->bases[CPUCLOCK_SCHED].nextevt);
 }
 
 static int
@@ -442,11 +443,6 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 	cleanup_timers(&tsk->signal->posix_cputimers);
 }
 
-static inline int expires_gt(u64 expires, u64 new_exp)
-{
-	return expires == 0 || expires > new_exp;
-}
-
 /*
  * Insert the timer on the appropriate list before any timers that
  * expire later.  This must be called with the sighand lock held.
@@ -483,7 +479,7 @@ static void arm_timer(struct k_itimer *timer)
 	 * for process timers we share expiration cache with itimers
 	 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
 	 */
-	if (expires_gt(base->nextevt, newexp))
+	if (newexp < base->nextevt)
 		base->nextevt = newexp;
 
 	if (CPUCLOCK_PERTHREAD(timer->it_clock))
@@ -753,7 +749,7 @@ check_timers_list(struct list_head *timers,
 		list_move_tail(&t->entry, firing);
 	}
 
-	return 0;
+	return U64_MAX;
 }
 
 static inline void check_dl_overrun(struct task_struct *tsk)
@@ -779,11 +775,7 @@ static void check_thread_timers(struct task_struct *tsk,
 	if (dl_task(tsk))
 		check_dl_overrun(tsk);
 
-	/*
-	 * If the expiry cache is zero, then there are no active per thread
-	 * CPU timers.
-	 */
-	if (expiry_cache_is_zero(&tsk->posix_cputimers))
+	if (expiry_cache_is_inactive(&tsk->posix_cputimers))
 		return;
 
 	task_cputime(tsk, &utime, &stime);
@@ -833,7 +825,7 @@ static void check_thread_timers(struct task_struct *tsk,
 		}
 	}
 
-	if (expiry_cache_is_zero(&tsk->posix_cputimers))
+	if (expiry_cache_is_inactive(&tsk->posix_cputimers))
 		tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
 }
 
@@ -864,7 +856,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-	if (it->expires && (!*expires || it->expires < *expires))
+	if (it->expires && it->expires < *expires)
 		*expires = it->expires;
 }
 
@@ -948,7 +940,7 @@ static void check_process_timers(struct task_struct *tsk,
 			}
 		}
 		softns = soft * NSEC_PER_SEC;
-		if (!prof_exp || softns < prof_exp)
+		if (softns < prof_exp)
 			prof_exp = softns;
 	}
 
@@ -956,7 +948,7 @@ static void check_process_timers(struct task_struct *tsk,
 	base[CPUCLOCK_VIRT].nextevt = virt_exp;
 	base[CPUCLOCK_SCHED].nextevt = sched_exp;
 
-	if (expiry_cache_is_zero(&sig->posix_cputimers))
+	if (expiry_cache_is_inactive(&sig->posix_cputimers))
 		stop_process_timers(sig);
 
 	sig->cputimer.checking_timer = false;
@@ -1035,7 +1027,7 @@ task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct)
 	int i;
 
 	for (i = 0; i < CPUCLOCK_MAX; i++) {
-		if (pct->bases[i].nextevt && sample[i] >= pct->bases[i].nextevt)
+		if (sample[i] >= pct->bases[i].nextevt)
 			return true;
 	}
 	return false;
@@ -1055,7 +1047,7 @@ static inline bool fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 
-	if (!expiry_cache_is_zero(&tsk->posix_cputimers)) {
+	if (!expiry_cache_is_inactive(&tsk->posix_cputimers)) {
 		u64 samples[CPUCLOCK_MAX];
 
 		task_sample_cputime(tsk, samples);
@@ -1200,7 +1192,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
 	 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
 	 * expiry cache is also used by RLIMIT_CPU!.
 	 */
-	if (expires_gt(*nextevt, *newval))
+	if (*newval < *nextevt)
 		*nextevt = *newval;
 
 	tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
-- 
cgit v1.2.3


From 244d49e30653658d4e7e9b2b8427777cbbc5affe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 21 Aug 2019 21:09:24 +0200
Subject: posix-cpu-timers: Move state tracking to struct posix_cputimers

Put it where it belongs and clean up the ifdeffery in fork completely.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190821192922.743229404@linutronix.de
---
 include/linux/posix-timers.h   |  8 +++++
 include/linux/sched/cputime.h  |  9 ++++--
 include/linux/sched/signal.h   |  6 ----
 init/init_task.c               |  2 --
 kernel/fork.c                  |  6 ----
 kernel/time/posix-cpu-timers.c | 73 +++++++++++++++++++++++-------------------
 6 files changed, 54 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 3ea920e8fe7f..a9e3f69d2db4 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -77,15 +77,23 @@ struct posix_cputimer_base {
 /**
  * posix_cputimers - Container for posix CPU timer related data
  * @bases:		Base container for posix CPU clocks
+ * @timers_active:	Timers are queued.
+ * @expiry_active:	Timer expiry is active. Used for
+ *			process wide timers to avoid multiple
+ *			task trying to handle expiry concurrently
  *
  * Used in task_struct and signal_struct
  */
 struct posix_cputimers {
 	struct posix_cputimer_base	bases[CPUCLOCK_MAX];
+	unsigned int			timers_active;
+	unsigned int			expiry_active;
 };
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
+	pct->timers_active = 0;
+	pct->expiry_active = 0;
 	pct->bases[0].nextevt = U64_MAX;
 	pct->bases[1].nextevt = U64_MAX;
 	pct->bases[2].nextevt = U64_MAX;
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index eefa5dff16b4..6c9f19a33865 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -70,7 +70,7 @@ void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples);
  */
 
 /**
- * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running
+ * get_running_cputimer - return &tsk->signal->cputimer if cputimers are active
  *
  * @tsk:	Pointer to target task.
  */
@@ -80,8 +80,11 @@ struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
 
-	/* Check if cputimer isn't running. This is accessed without locking. */
-	if (!READ_ONCE(cputimer->running))
+	/*
+	 * Check whether posix CPU timers are active. If not the thread
+	 * group accounting is not active either. Lockless check.
+	 */
+	if (!READ_ONCE(tsk->signal->posix_cputimers.timers_active))
 		return NULL;
 
 	/*
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 729bd892ee45..88050259c466 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -57,18 +57,12 @@ struct task_cputime_atomic {
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime_atomic:	atomic thread group interval timers.
- * @running:		true when there are timers running and
- *			@cputime_atomic receives updates.
- * @checking_timer:	true when a thread in the group is in the
- *			process of checking for thread group timers.
  *
  * This structure contains the version of task_cputime, above, that is
  * used for thread group CPU timer calculations.
  */
 struct thread_group_cputimer {
 	struct task_cputime_atomic cputime_atomic;
-	bool running;
-	bool checking_timer;
 };
 
 struct multiprocess_signals {
diff --git a/init/init_task.c b/init/init_task.c
index 7ab773b9b3cd..d49692a0ec51 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -30,8 +30,6 @@ static struct signal_struct init_signals = {
 	.posix_timers = LIST_HEAD_INIT(init_signals.posix_timers),
 	.cputimer	= {
 		.cputime_atomic	= INIT_CPUTIME_ATOMIC,
-		.running	= false,
-		.checking_timer = false,
 	},
 #endif
 	INIT_CPU_TIMERS(init_signals)
diff --git a/kernel/fork.c b/kernel/fork.c
index 52bfe7c20ff6..f1228d9f0b11 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1517,7 +1517,6 @@ void __cleanup_sighand(struct sighand_struct *sighand)
 	}
 }
 
-#ifdef CONFIG_POSIX_TIMERS
 /*
  * Initialize POSIX timer handling for a thread group.
  */
@@ -1528,12 +1527,7 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 
 	cpu_limit = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 	posix_cputimers_group_init(pct, cpu_limit);
-	if (cpu_limit != RLIM_INFINITY)
-		sig->cputimer.running = true;
 }
-#else
-static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { }
-#endif
 
 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 {
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index ef39a7a4a95c..52f4c99c1d60 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -23,8 +23,10 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer);
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
 {
 	posix_cputimers_init(pct);
-	if (cpu_limit != RLIM_INFINITY)
+	if (cpu_limit != RLIM_INFINITY) {
 		pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
+		pct->timers_active = true;
+	}
 }
 
 /*
@@ -248,8 +250,9 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
 void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+	struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
 
-	WARN_ON_ONCE(!cputimer->running);
+	WARN_ON_ONCE(!pct->timers_active);
 
 	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
 }
@@ -269,9 +272,10 @@ void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
 static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
 {
 	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+	struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
 
 	/* Check if cputimer isn't running. This is accessed without locking. */
-	if (!READ_ONCE(cputimer->running)) {
+	if (!READ_ONCE(pct->timers_active)) {
 		struct task_cputime sum;
 
 		/*
@@ -283,13 +287,13 @@ static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
 		update_gt_cputime(&cputimer->cputime_atomic, &sum);
 
 		/*
-		 * We're setting cputimer->running without a lock. Ensure
-		 * this only gets written to in one operation. We set
-		 * running after update_gt_cputime() as a small optimization,
-		 * but barriers are not required because update_gt_cputime()
+		 * We're setting timers_active without a lock. Ensure this
+		 * only gets written to in one operation. We set it after
+		 * update_gt_cputime() as a small optimization, but
+		 * barriers are not required because update_gt_cputime()
 		 * can handle concurrent updates.
 		 */
-		WRITE_ONCE(cputimer->running, true);
+		WRITE_ONCE(pct->timers_active, true);
 	}
 	proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
 }
@@ -313,9 +317,10 @@ static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
 				  bool start)
 {
 	struct thread_group_cputimer *cputimer = &p->signal->cputimer;
+	struct posix_cputimers *pct = &p->signal->posix_cputimers;
 	u64 samples[CPUCLOCK_MAX];
 
-	if (!READ_ONCE(cputimer->running)) {
+	if (!READ_ONCE(pct->timers_active)) {
 		if (start)
 			thread_group_start_cputime(p, samples);
 		else
@@ -834,10 +839,10 @@ static void check_thread_timers(struct task_struct *tsk,
 
 static inline void stop_process_timers(struct signal_struct *sig)
 {
-	struct thread_group_cputimer *cputimer = &sig->cputimer;
+	struct posix_cputimers *pct = &sig->posix_cputimers;
 
-	/* Turn off cputimer->running. This is done without locking. */
-	WRITE_ONCE(cputimer->running, false);
+	/* Turn off the active flag. This is done without locking. */
+	WRITE_ONCE(pct->timers_active, false);
 	tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
 }
 
@@ -877,17 +882,17 @@ static void check_process_timers(struct task_struct *tsk,
 	unsigned long soft;
 
 	/*
-	 * If cputimer is not running, then there are no active
-	 * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
+	 * If there are no active process wide timers (POSIX 1.b, itimers,
+	 * RLIMIT_CPU) nothing to check.
 	 */
-	if (!READ_ONCE(sig->cputimer.running))
+	if (!READ_ONCE(pct->timers_active))
 		return;
 
        /*
 	 * Signify that a thread is checking for process timers.
 	 * Write access to this field is protected by the sighand lock.
 	 */
-	sig->cputimer.checking_timer = true;
+	pct->timers_active = true;
 
 	/*
 	 * Collect the current process totals. Group accounting is active
@@ -933,7 +938,7 @@ static void check_process_timers(struct task_struct *tsk,
 	if (expiry_cache_is_inactive(pct))
 		stop_process_timers(sig);
 
-	sig->cputimer.checking_timer = false;
+	pct->expiry_active = false;
 }
 
 /*
@@ -1027,39 +1032,41 @@ task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct)
  */
 static inline bool fastpath_timer_check(struct task_struct *tsk)
 {
+	struct posix_cputimers *pct = &tsk->posix_cputimers;
 	struct signal_struct *sig;
 
-	if (!expiry_cache_is_inactive(&tsk->posix_cputimers)) {
+	if (!expiry_cache_is_inactive(pct)) {
 		u64 samples[CPUCLOCK_MAX];
 
 		task_sample_cputime(tsk, samples);
-		if (task_cputimers_expired(samples, &tsk->posix_cputimers))
+		if (task_cputimers_expired(samples, pct))
 			return true;
 	}
 
 	sig = tsk->signal;
+	pct = &sig->posix_cputimers;
 	/*
-	 * Check if thread group timers expired when the cputimer is
-	 * running and no other thread in the group is already checking
-	 * for thread group cputimers. These fields are read without the
-	 * sighand lock. However, this is fine because this is meant to
-	 * be a fastpath heuristic to determine whether we should try to
-	 * acquire the sighand lock to check/handle timers.
+	 * Check if thread group timers expired when timers are active and
+	 * no other thread in the group is already handling expiry for
+	 * thread group cputimers. These fields are read without the
+	 * sighand lock. However, this is fine because this is meant to be
+	 * a fastpath heuristic to determine whether we should try to
+	 * acquire the sighand lock to handle timer expiry.
 	 *
-	 * In the worst case scenario, if 'running' or 'checking_timer' gets
-	 * set but the current thread doesn't see the change yet, we'll wait
-	 * until the next thread in the group gets a scheduler interrupt to
-	 * handle the timer. This isn't an issue in practice because these
-	 * types of delays with signals actually getting sent are expected.
+	 * In the worst case scenario, if concurrently timers_active is set
+	 * or expiry_active is cleared, but the current thread doesn't see
+	 * the change yet, the timer checks are delayed until the next
+	 * thread in the group gets a scheduler interrupt to handle the
+	 * timer. This isn't an issue in practice because these types of
+	 * delays with signals actually getting sent are expected.
 	 */
-	if (READ_ONCE(sig->cputimer.running) &&
-	    !READ_ONCE(sig->cputimer.checking_timer)) {
+	if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
 		u64 samples[CPUCLOCK_MAX];
 
 		proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
 					   samples);
 
-		if (task_cputimers_expired(samples, &sig->posix_cputimers))
+		if (task_cputimers_expired(samples, pct))
 			return true;
 	}
 
-- 
cgit v1.2.3


From 60bda037f1dd8151e0c9ee5b09f0c091a0f643cd Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 27 Aug 2019 21:31:02 +0200
Subject: posix-cpu-timers: Utilize timerqueue for storage

Using a linear O(N) search for timer insertion affects execution time and
D-cache footprint badly with a larger number of timers.

Switch the storage to a timerqueue which is already used for hrtimers and
alarmtimers. It does not affect the size of struct k_itimer as it.alarm is
still larger.

The extra list head for the expiry list will go away later once the expiry
is moved into task work context.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1908272129220.1939@nanos.tec.linutronix.de
---
 include/linux/posix-timers.h   |  65 ++++++++++----
 include/linux/timerqueue.h     |  10 +++
 kernel/time/posix-cpu-timers.c | 189 +++++++++++++++++++++--------------------
 3 files changed, 155 insertions(+), 109 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index a9e3f69d2db4..f9fbb4c620c1 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,17 +5,11 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/alarmtimer.h>
+#include <linux/timerqueue.h>
 
 struct kernel_siginfo;
 struct task_struct;
 
-struct cpu_timer_list {
-	struct list_head entry;
-	u64 expires;
-	struct task_struct *task;
-	int firing;
-};
-
 /*
  * Bit fields within a clockid:
  *
@@ -64,14 +58,58 @@ static inline int clockid_to_fd(const clockid_t clk)
 
 #ifdef CONFIG_POSIX_TIMERS
 
+/**
+ * cpu_timer - Posix CPU timer representation for k_itimer
+ * @node:	timerqueue node to queue in the task/sig
+ * @head:	timerqueue head on which this timer is queued
+ * @task:	Pointer to target task
+ * @elist:	List head for the expiry list
+ * @firing:	Timer is currently firing
+ */
+struct cpu_timer {
+	struct timerqueue_node	node;
+	struct timerqueue_head	*head;
+	struct task_struct	*task;
+	struct list_head	elist;
+	int			firing;
+};
+
+static inline bool cpu_timer_requeue(struct cpu_timer *ctmr)
+{
+	return timerqueue_add(ctmr->head, &ctmr->node);
+}
+
+static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
+				     struct cpu_timer *ctmr)
+{
+	ctmr->head = head;
+	return timerqueue_add(head, &ctmr->node);
+}
+
+static inline void cpu_timer_dequeue(struct cpu_timer *ctmr)
+{
+	if (!RB_EMPTY_NODE(&ctmr->node.node))
+		timerqueue_del(ctmr->head, &ctmr->node);
+}
+
+static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
+{
+	return ctmr->node.expires;
+}
+
+static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp)
+{
+	ctmr->node.expires = exp;
+}
+
 /**
  * posix_cputimer_base - Container per posix CPU clock
  * @nextevt:		Earliest-expiration cache
- * @cpu_timers:		List heads to queue posix CPU timers
+ * @tqhead:		timerqueue head for cpu_timers
  */
 struct posix_cputimer_base {
 	u64			nextevt;
-	struct list_head	cpu_timers;
+	struct timerqueue_head	tqhead;
 };
 
 /**
@@ -92,14 +130,10 @@ struct posix_cputimers {
 
 static inline void posix_cputimers_init(struct posix_cputimers *pct)
 {
-	pct->timers_active = 0;
-	pct->expiry_active = 0;
+	memset(pct, 0, sizeof(*pct));
 	pct->bases[0].nextevt = U64_MAX;
 	pct->bases[1].nextevt = U64_MAX;
 	pct->bases[2].nextevt = U64_MAX;
-	INIT_LIST_HEAD(&pct->bases[0].cpu_timers);
-	INIT_LIST_HEAD(&pct->bases[1].cpu_timers);
-	INIT_LIST_HEAD(&pct->bases[2].cpu_timers);
 }
 
 void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit);
@@ -113,7 +147,6 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 /* Init task static initializer */
 #define INIT_CPU_TIMERBASE(b) {						\
 	.nextevt	= U64_MAX,					\
-	.cpu_timers	= LIST_HEAD_INIT(b.cpu_timers),			\
 }
 
 #define INIT_CPU_TIMERBASES(b) {					\
@@ -182,7 +215,7 @@ struct k_itimer {
 		struct {
 			struct hrtimer	timer;
 		} real;
-		struct cpu_timer_list	cpu;
+		struct cpu_timer	cpu;
 		struct {
 			struct alarm	alarmtimer;
 		} alarm;
diff --git a/include/linux/timerqueue.h b/include/linux/timerqueue.h
index aff122f1062a..93884086f392 100644
--- a/include/linux/timerqueue.h
+++ b/include/linux/timerqueue.h
@@ -43,6 +43,16 @@ static inline void timerqueue_init(struct timerqueue_node *node)
 	RB_CLEAR_NODE(&node->node);
 }
 
+static inline bool timerqueue_node_queued(struct timerqueue_node *node)
+{
+	return !RB_EMPTY_NODE(&node->node);
+}
+
+static inline bool timerqueue_node_expires(struct timerqueue_node *node)
+{
+	return node->expires;
+}
+
 static inline void timerqueue_init_head(struct timerqueue_head *head)
 {
 	head->rb_root = RB_ROOT_CACHED;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 52f4c99c1d60..73c492ce404b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -96,19 +96,19 @@ static inline int validate_clock_permissions(const clockid_t clock)
  * Update expiry time from increment, and increase overrun count,
  * given the current clock sample.
  */
-static void bump_cpu_timer(struct k_itimer *timer, u64 now)
+static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
 {
+	u64 delta, incr, expires = timer->it.cpu.node.expires;
 	int i;
-	u64 delta, incr;
 
 	if (!timer->it_interval)
-		return;
+		return expires;
 
-	if (now < timer->it.cpu.expires)
-		return;
+	if (now < expires)
+		return expires;
 
 	incr = timer->it_interval;
-	delta = now + incr - timer->it.cpu.expires;
+	delta = now + incr - expires;
 
 	/* Don't use (incr*2 < delta), incr*2 might overflow. */
 	for (i = 0; incr < delta - incr; i++)
@@ -118,10 +118,11 @@ static void bump_cpu_timer(struct k_itimer *timer, u64 now)
 		if (delta < incr)
 			continue;
 
-		timer->it.cpu.expires += incr;
+		timer->it.cpu.node.expires += incr;
 		timer->it_overrun += 1LL << i;
 		delta -= incr;
 	}
+	return timer->it.cpu.node.expires;
 }
 
 /* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
@@ -365,7 +366,7 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
 		return -EINVAL;
 
 	new_timer->kclock = &clock_posix_cpu;
-	INIT_LIST_HEAD(&new_timer->it.cpu.entry);
+	timerqueue_init(&new_timer->it.cpu.node);
 	new_timer->it.cpu.task = p;
 	return 0;
 }
@@ -378,10 +379,11 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer)
  */
 static int posix_cpu_timer_del(struct k_itimer *timer)
 {
-	int ret = 0;
-	unsigned long flags;
+	struct cpu_timer *ctmr = &timer->it.cpu;
+	struct task_struct *p = ctmr->task;
 	struct sighand_struct *sighand;
-	struct task_struct *p = timer->it.cpu.task;
+	unsigned long flags;
+	int ret = 0;
 
 	if (WARN_ON_ONCE(!p))
 		return -EINVAL;
@@ -393,15 +395,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
 	sighand = lock_task_sighand(p, &flags);
 	if (unlikely(sighand == NULL)) {
 		/*
-		 * We raced with the reaping of the task.
-		 * The deletion should have cleared us off the list.
+		 * This raced with the reaping of the task. The exit cleanup
+		 * should have removed this timer from the timer queue.
 		 */
-		WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+		WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
 	} else {
 		if (timer->it.cpu.firing)
 			ret = TIMER_RETRY;
 		else
-			list_del(&timer->it.cpu.entry);
+			cpu_timer_dequeue(ctmr);
 
 		unlock_task_sighand(p, &flags);
 	}
@@ -412,12 +414,16 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
 	return ret;
 }
 
-static void cleanup_timers_list(struct list_head *head)
+static void cleanup_timerqueue(struct timerqueue_head *head)
 {
-	struct cpu_timer_list *timer, *next;
+	struct timerqueue_node *node;
+	struct cpu_timer *ctmr;
 
-	list_for_each_entry_safe(timer, next, head, entry)
-		list_del_init(&timer->entry);
+	while ((node = timerqueue_getnext(head))) {
+		timerqueue_del(head, node);
+		ctmr = container_of(node, struct cpu_timer, node);
+		ctmr->head = NULL;
+	}
 }
 
 /*
@@ -429,9 +435,9 @@ static void cleanup_timers_list(struct list_head *head)
  */
 static void cleanup_timers(struct posix_cputimers *pct)
 {
-	cleanup_timers_list(&pct->bases[CPUCLOCK_PROF].cpu_timers);
-	cleanup_timers_list(&pct->bases[CPUCLOCK_VIRT].cpu_timers);
-	cleanup_timers_list(&pct->bases[CPUCLOCK_SCHED].cpu_timers);
+	cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
+	cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
+	cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
 }
 
 /*
@@ -454,28 +460,18 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
  */
 static void arm_timer(struct k_itimer *timer)
 {
-	struct cpu_timer_list *const nt = &timer->it.cpu;
 	int clkidx = CPUCLOCK_WHICH(timer->it_clock);
-	struct task_struct *p = timer->it.cpu.task;
-	u64 newexp = timer->it.cpu.expires;
+	struct cpu_timer *ctmr = &timer->it.cpu;
+	u64 newexp = cpu_timer_getexpires(ctmr);
+	struct task_struct *p = ctmr->task;
 	struct posix_cputimer_base *base;
-	struct list_head *head, *listpos;
-	struct cpu_timer_list *next;
 
 	if (CPUCLOCK_PERTHREAD(timer->it_clock))
 		base = p->posix_cputimers.bases + clkidx;
 	else
 		base = p->signal->posix_cputimers.bases + clkidx;
 
-	listpos = head = &base->cpu_timers;
-	list_for_each_entry(next,head, entry) {
-		if (nt->expires < next->expires)
-			break;
-		listpos = &next->entry;
-	}
-	list_add(&nt->entry, listpos);
-
-	if (listpos != head)
+	if (!cpu_timer_enqueue(&base->tqhead, ctmr))
 		return;
 
 	/*
@@ -498,24 +494,26 @@ static void arm_timer(struct k_itimer *timer)
  */
 static void cpu_timer_fire(struct k_itimer *timer)
 {
+	struct cpu_timer *ctmr = &timer->it.cpu;
+
 	if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
 		/*
 		 * User don't want any signal.
 		 */
-		timer->it.cpu.expires = 0;
+		cpu_timer_setexpires(ctmr, 0);
 	} else if (unlikely(timer->sigq == NULL)) {
 		/*
 		 * This a special case for clock_nanosleep,
 		 * not a normal timer from sys_timer_create.
 		 */
 		wake_up_process(timer->it_process);
-		timer->it.cpu.expires = 0;
+		cpu_timer_setexpires(ctmr, 0);
 	} else if (!timer->it_interval) {
 		/*
 		 * One-shot timer.  Clear it as soon as it's fired.
 		 */
 		posix_timer_event(timer, 0);
-		timer->it.cpu.expires = 0;
+		cpu_timer_setexpires(ctmr, 0);
 	} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
 		/*
 		 * The signal did not get queued because the signal
@@ -539,10 +537,11 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 {
 	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
 	u64 old_expires, new_expires, old_incr, val;
-	struct task_struct *p = timer->it.cpu.task;
+	struct cpu_timer *ctmr = &timer->it.cpu;
+	struct task_struct *p = ctmr->task;
 	struct sighand_struct *sighand;
 	unsigned long flags;
-	int ret;
+	int ret = 0;
 
 	if (WARN_ON_ONCE(!p))
 		return -EINVAL;
@@ -562,22 +561,21 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	 * If p has just been reaped, we can no
 	 * longer get any information about it at all.
 	 */
-	if (unlikely(sighand == NULL)) {
+	if (unlikely(sighand == NULL))
 		return -ESRCH;
-	}
 
 	/*
 	 * Disarm any old timer after extracting its expiry time.
 	 */
-
-	ret = 0;
 	old_incr = timer->it_interval;
-	old_expires = timer->it.cpu.expires;
+	old_expires = cpu_timer_getexpires(ctmr);
+
 	if (unlikely(timer->it.cpu.firing)) {
 		timer->it.cpu.firing = -1;
 		ret = TIMER_RETRY;
-	} else
-		list_del_init(&timer->it.cpu.entry);
+	} else {
+		cpu_timer_dequeue(ctmr);
+	}
 
 	/*
 	 * We need to sample the current value to convert the new
@@ -598,18 +596,16 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 			old->it_value.tv_nsec = 0;
 		} else {
 			/*
-			 * Update the timer in case it has
-			 * overrun already.  If it has,
-			 * we'll report it as having overrun
-			 * and with the next reloaded timer
-			 * already ticking, though we are
-			 * swallowing that pending
-			 * notification here to install the
-			 * new setting.
+			 * Update the timer in case it has overrun already.
+			 * If it has, we'll report it as having overrun and
+			 * with the next reloaded timer already ticking,
+			 * though we are swallowing that pending
+			 * notification here to install the new setting.
 			 */
-			bump_cpu_timer(timer, val);
-			if (val < timer->it.cpu.expires) {
-				old_expires = timer->it.cpu.expires - val;
+			u64 exp = bump_cpu_timer(timer, val);
+
+			if (val < exp) {
+				old_expires = exp - val;
 				old->it_value = ns_to_timespec64(old_expires);
 			} else {
 				old->it_value.tv_nsec = 1;
@@ -638,7 +634,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 	 * For a timer with no notification action, we don't actually
 	 * arm the timer (we'll just fake it for timer_gettime).
 	 */
-	timer->it.cpu.expires = new_expires;
+	cpu_timer_setexpires(ctmr, new_expires);
 	if (new_expires != 0 && val < new_expires) {
 		arm_timer(timer);
 	}
@@ -680,8 +676,9 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
 static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
 {
 	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
-	struct task_struct *p = timer->it.cpu.task;
-	u64 now;
+	struct cpu_timer *ctmr = &timer->it.cpu;
+	u64 now, expires = cpu_timer_getexpires(ctmr);
+	struct task_struct *p = ctmr->task;
 
 	if (WARN_ON_ONCE(!p))
 		return;
@@ -691,7 +688,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
 	 */
 	itp->it_interval = ktime_to_timespec64(timer->it_interval);
 
-	if (!timer->it.cpu.expires)
+	if (!expires)
 		return;
 
 	/*
@@ -713,9 +710,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
 			/*
 			 * The process has been reaped.
 			 * We can't even collect a sample any more.
-			 * Call the timer disarmed, nothing else to do.
+			 * Disarm the timer, nothing else to do.
 			 */
-			timer->it.cpu.expires = 0;
+			cpu_timer_setexpires(ctmr, 0);
 			return;
 		} else {
 			now = cpu_clock_sample_group(clkid, p, false);
@@ -723,8 +720,8 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
 		}
 	}
 
-	if (now < timer->it.cpu.expires) {
-		itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);
+	if (now < expires) {
+		itp->it_value = ns_to_timespec64(expires - now);
 	} else {
 		/*
 		 * The timer should have expired already, but the firing
@@ -735,37 +732,41 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp
 	}
 }
 
-static unsigned long long
-check_timers_list(struct list_head *timers,
-		  struct list_head *firing,
-		  unsigned long long curr)
-{
-	int maxfire = 20;
-
-	while (!list_empty(timers)) {
-		struct cpu_timer_list *t;
-
-		t = list_first_entry(timers, struct cpu_timer_list, entry);
+#define MAX_COLLECTED	20
 
-		if (!--maxfire || curr < t->expires)
-			return t->expires;
-
-		t->firing = 1;
-		list_move_tail(&t->entry, firing);
+static u64 collect_timerqueue(struct timerqueue_head *head,
+			      struct list_head *firing, u64 now)
+{
+	struct timerqueue_node *next;
+	int i = 0;
+
+	while ((next = timerqueue_getnext(head))) {
+		struct cpu_timer *ctmr;
+		u64 expires;
+
+		ctmr = container_of(next, struct cpu_timer, node);
+		expires = cpu_timer_getexpires(ctmr);
+		/* Limit the number of timers to expire at once */
+		if (++i == MAX_COLLECTED || now < expires)
+			return expires;
+
+		ctmr->firing = 1;
+		cpu_timer_dequeue(ctmr);
+		list_add_tail(&ctmr->elist, firing);
 	}
 
 	return U64_MAX;
 }
 
-static void collect_posix_cputimers(struct posix_cputimers *pct,
-				    u64 *samples, struct list_head *firing)
+static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
+				    struct list_head *firing)
 {
 	struct posix_cputimer_base *base = pct->bases;
 	int i;
 
 	for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
-		base->nextevt = check_timers_list(&base->cpu_timers, firing,
-						   samples[i]);
+		base->nextevt = collect_timerqueue(&base->tqhead, firing,
+						    samples[i]);
 	}
 }
 
@@ -948,7 +949,8 @@ static void check_process_timers(struct task_struct *tsk,
 static void posix_cpu_timer_rearm(struct k_itimer *timer)
 {
 	clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
-	struct task_struct *p = timer->it.cpu.task;
+	struct cpu_timer *ctmr = &timer->it.cpu;
+	struct task_struct *p = ctmr->task;
 	struct sighand_struct *sighand;
 	unsigned long flags;
 	u64 now;
@@ -980,7 +982,7 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
 			 * The process has been reaped.
 			 * We can't even collect a sample any more.
 			 */
-			timer->it.cpu.expires = 0;
+			cpu_timer_setexpires(ctmr, 0);
 			return;
 		} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
 			/* If the process is dying, no need to rearm */
@@ -1124,11 +1126,11 @@ void run_posix_cpu_timers(void)
 	 * each timer's lock before clearing its firing flag, so no
 	 * timer call will interfere.
 	 */
-	list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
+	list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
 		int cpu_firing;
 
 		spin_lock(&timer->it_lock);
-		list_del_init(&timer->it.cpu.entry);
+		list_del_init(&timer->it.cpu.elist);
 		cpu_firing = timer->it.cpu.firing;
 		timer->it.cpu.firing = 0;
 		/*
@@ -1204,6 +1206,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 	timer.it_overrun = -1;
 	error = posix_cpu_timer_create(&timer);
 	timer.it_process = current;
+
 	if (!error) {
 		static struct itimerspec64 zero_it;
 		struct restart_block *restart;
@@ -1219,7 +1222,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		}
 
 		while (!signal_pending(current)) {
-			if (timer.it.cpu.expires == 0) {
+			if (!cpu_timer_getexpires(&timer.it.cpu)) {
 				/*
 				 * Our timer fired and was reset, below
 				 * deletion can not fail.
@@ -1241,7 +1244,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
 		/*
 		 * We were interrupted by a signal.
 		 */
-		expires = timer.it.cpu.expires;
+		expires = cpu_timer_getexpires(&timer.it.cpu);
 		error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
 		if (!error) {
 			/*
-- 
cgit v1.2.3


From 05f0b20b67f4185cc0022e49de08e1b9b2dbeb36 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 19 Aug 2019 18:35:47 -0700
Subject: usb: gadget: Export recommended BESL values

Currently there's no option for the controller driver to report the
recommended Best Effort Service Latency (BESL) when operating with LPM
support. Add new fields in usb_dcd_config_params to export the
recommended baseline and deep BESL values for the function drivers to
set the proper BESL value in the BOS descriptor.

Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 include/linux/usb/gadget.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index fb19141151d8..124462d65eac 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -291,6 +291,9 @@ struct usb_dcd_config_params {
 #define USB_DEFAULT_U1_DEV_EXIT_LAT	0x01	/* Less then 1 microsec */
 	__le16 bU2DevExitLat;	/* U2 Device exit Latency */
 #define USB_DEFAULT_U2_DEV_EXIT_LAT	0x1F4	/* Less then 500 microsec */
+	__u8 besl_baseline;	/* Recommended baseline BESL (0-15) */
+	__u8 besl_deep;		/* Recommended deep BESL (0-15) */
+#define USB_DEFAULT_BESL_UNSPECIFIED	0xFF	/* No recommended value */
 };
 
 
-- 
cgit v1.2.3


From cca3854010c5aebf3a06be0de04f26c4f1ae5810 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 19 Aug 2019 18:36:12 -0700
Subject: usb: gadget: composite: Set recommended BESL values

Set the recommended BESL deep and baseline values based on the gadget's
configuration parameters to the extended BOS descriptor. This feature
helps to optimize power savings by maximizing the opportunity for longer
L1 residency time.

Signed-off-by: Thinh Nguyen <thinhn@synopsys.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/composite.c | 38 ++++++++++++++++++++++++++------------
 include/uapi/linux/usb/ch9.h   |  2 ++
 2 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 9118b42c70b6..7ed009dc0f92 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -612,6 +612,7 @@ static int bos_desc(struct usb_composite_dev *cdev)
 	struct usb_ext_cap_descriptor	*usb_ext;
 	struct usb_dcd_config_params	dcd_config_params;
 	struct usb_bos_descriptor	*bos = cdev->req->buf;
+	unsigned int			besl = 0;
 
 	bos->bLength = USB_DT_BOS_SIZE;
 	bos->bDescriptorType = USB_DT_BOS;
@@ -619,6 +620,29 @@ static int bos_desc(struct usb_composite_dev *cdev)
 	bos->wTotalLength = cpu_to_le16(USB_DT_BOS_SIZE);
 	bos->bNumDeviceCaps = 0;
 
+	/* Get Controller configuration */
+	if (cdev->gadget->ops->get_config_params) {
+		cdev->gadget->ops->get_config_params(cdev->gadget,
+						     &dcd_config_params);
+	} else {
+		dcd_config_params.besl_baseline =
+			USB_DEFAULT_BESL_UNSPECIFIED;
+		dcd_config_params.besl_deep =
+			USB_DEFAULT_BESL_UNSPECIFIED;
+		dcd_config_params.bU1devExitLat =
+			USB_DEFAULT_U1_DEV_EXIT_LAT;
+		dcd_config_params.bU2DevExitLat =
+			cpu_to_le16(USB_DEFAULT_U2_DEV_EXIT_LAT);
+	}
+
+	if (dcd_config_params.besl_baseline != USB_DEFAULT_BESL_UNSPECIFIED)
+		besl = USB_BESL_BASELINE_VALID |
+			USB_SET_BESL_BASELINE(dcd_config_params.besl_baseline);
+
+	if (dcd_config_params.besl_deep != USB_DEFAULT_BESL_UNSPECIFIED)
+		besl |= USB_BESL_DEEP_VALID |
+			USB_SET_BESL_DEEP(dcd_config_params.besl_deep);
+
 	/*
 	 * A SuperSpeed device shall include the USB2.0 extension descriptor
 	 * and shall support LPM when operating in USB2.0 HS mode.
@@ -629,7 +653,8 @@ static int bos_desc(struct usb_composite_dev *cdev)
 	usb_ext->bLength = USB_DT_USB_EXT_CAP_SIZE;
 	usb_ext->bDescriptorType = USB_DT_DEVICE_CAPABILITY;
 	usb_ext->bDevCapabilityType = USB_CAP_TYPE_EXT;
-	usb_ext->bmAttributes = cpu_to_le32(USB_LPM_SUPPORT | USB_BESL_SUPPORT);
+	usb_ext->bmAttributes = cpu_to_le32(USB_LPM_SUPPORT |
+					    USB_BESL_SUPPORT | besl);
 
 	/*
 	 * The Superspeed USB Capability descriptor shall be implemented by all
@@ -650,17 +675,6 @@ static int bos_desc(struct usb_composite_dev *cdev)
 						      USB_HIGH_SPEED_OPERATION |
 						      USB_5GBPS_OPERATION);
 		ss_cap->bFunctionalitySupport = USB_LOW_SPEED_OPERATION;
-
-		/* Get Controller configuration */
-		if (cdev->gadget->ops->get_config_params) {
-			cdev->gadget->ops->get_config_params(cdev->gadget,
-				&dcd_config_params);
-		} else {
-			dcd_config_params.bU1devExitLat =
-				USB_DEFAULT_U1_DEV_EXIT_LAT;
-			dcd_config_params.bU2DevExitLat =
-				cpu_to_le16(USB_DEFAULT_U2_DEV_EXIT_LAT);
-		}
 		ss_cap->bU1devExitLat = dcd_config_params.bU1devExitLat;
 		ss_cap->bU2DevExitLat = dcd_config_params.bU2DevExitLat;
 	}
diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index d5a5caec8fbc..2b623f36af6b 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -894,6 +894,8 @@ struct usb_ext_cap_descriptor {		/* Link Power Management */
 #define USB_BESL_SUPPORT		(1 << 2)	/* supports BESL */
 #define USB_BESL_BASELINE_VALID		(1 << 3)	/* Baseline BESL valid*/
 #define USB_BESL_DEEP_VALID		(1 << 4)	/* Deep BESL valid */
+#define USB_SET_BESL_BASELINE(p)	(((p) & 0xf) << 8)
+#define USB_SET_BESL_DEEP(p)		(((p) & 0xf) << 12)
 #define USB_GET_BESL_BASELINE(p)	(((p) & (0xf << 8)) >> 8)
 #define USB_GET_BESL_DEEP(p)		(((p) & (0xf << 12)) >> 12)
 } __attribute__((packed));
-- 
cgit v1.2.3


From f67ff1bd58f0c102f0194c3888ddbc4a87dd1382 Mon Sep 17 00:00:00 2001
From: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Date: Fri, 23 Aug 2019 11:45:28 +0800
Subject: regulator: mt6358: Add support for MT6358 regulator

The MT6358 is a regulator found on boards based on MediaTek MT8183 and
probably other SoCs. It is a so called pmic and connects as a slave to
SoC using SPI, wrapped inside the pmic-wrapper.

Signed-off-by: Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>
Link: https://lore.kernel.org/r/1566531931-9772-8-git-send-email-hsin-hsiung.wang@mediatek.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/Kconfig                  |   9 +
 drivers/regulator/Makefile                 |   1 +
 drivers/regulator/mt6358-regulator.c       | 549 +++++++++++++++++++++++++++++
 include/linux/regulator/mt6358-regulator.h |  56 +++
 4 files changed, 615 insertions(+)
 create mode 100644 drivers/regulator/mt6358-regulator.c
 create mode 100644 include/linux/regulator/mt6358-regulator.h

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 2e50423b3c08..d6d8785630b1 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -619,6 +619,15 @@ config REGULATOR_MT6323
 	  This driver supports the control of different power rails of device
 	  through regulator interface.
 
+config REGULATOR_MT6358
+	tristate "MediaTek MT6358 PMIC"
+	depends on MFD_MT6397
+	help
+	  Say y here to select this option to enable the power regulator of
+	  MediaTek MT6358 PMIC.
+	  This driver supports the control of different power rails of device
+	  through regulator interface.
+
 config REGULATOR_MT6380
 	tristate "MediaTek MT6380 PMIC"
 	depends on MTK_PMIC_WRAP
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 922bf975070f..2210ba56f9bd 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_REGULATOR_MC13XXX_CORE) +=  mc13xxx-regulator-core.o
 obj-$(CONFIG_REGULATOR_MCP16502) += mcp16502.o
 obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o
 obj-$(CONFIG_REGULATOR_MT6323)	+= mt6323-regulator.o
+obj-$(CONFIG_REGULATOR_MT6358)	+= mt6358-regulator.o
 obj-$(CONFIG_REGULATOR_MT6380)	+= mt6380-regulator.o
 obj-$(CONFIG_REGULATOR_MT6397)	+= mt6397-regulator.o
 obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
new file mode 100644
index 000000000000..ba42682e06f3
--- /dev/null
+++ b/drivers/regulator/mt6358-regulator.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (c) 2019 MediaTek Inc.
+
+#include <linux/mfd/mt6358/registers.h>
+#include <linux/mfd/mt6397/core.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/mt6358-regulator.h>
+#include <linux/regulator/of_regulator.h>
+
+#define MT6358_BUCK_MODE_AUTO	0
+#define MT6358_BUCK_MODE_FORCE_PWM	1
+
+/*
+ * MT6358 regulators' information
+ *
+ * @desc: standard fields of regulator description.
+ * @qi: Mask for query enable signal status of regulators
+ */
+struct mt6358_regulator_info {
+	struct regulator_desc desc;
+	u32 status_reg;
+	u32 qi;
+	const u32 *index_table;
+	unsigned int n_table;
+	u32 vsel_shift;
+	u32 da_vsel_reg;
+	u32 da_vsel_mask;
+	u32 da_vsel_shift;
+	u32 modeset_reg;
+	u32 modeset_mask;
+	u32 modeset_shift;
+};
+
+#define MT6358_BUCK(match, vreg, min, max, step,		\
+	volt_ranges, vosel_mask, _da_vsel_reg, _da_vsel_mask,	\
+	_da_vsel_shift, _modeset_reg, _modeset_shift)		\
+[MT6358_ID_##vreg] = {	\
+	.desc = {	\
+		.name = #vreg,	\
+		.of_match = of_match_ptr(match),	\
+		.ops = &mt6358_volt_range_ops,	\
+		.type = REGULATOR_VOLTAGE,	\
+		.id = MT6358_ID_##vreg,		\
+		.owner = THIS_MODULE,		\
+		.n_voltages = ((max) - (min)) / (step) + 1,	\
+		.linear_ranges = volt_ranges,		\
+		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
+		.vsel_reg = MT6358_BUCK_##vreg##_ELR0,	\
+		.vsel_mask = vosel_mask,	\
+		.enable_reg = MT6358_BUCK_##vreg##_CON0,	\
+		.enable_mask = BIT(0),	\
+		.of_map_mode = mt6358_map_mode,	\
+	},	\
+	.status_reg = MT6358_BUCK_##vreg##_DBG1,	\
+	.qi = BIT(0),	\
+	.da_vsel_reg = _da_vsel_reg,	\
+	.da_vsel_mask = _da_vsel_mask,	\
+	.da_vsel_shift = _da_vsel_shift,	\
+	.modeset_reg = _modeset_reg,	\
+	.modeset_mask = BIT(_modeset_shift),	\
+	.modeset_shift = _modeset_shift	\
+}
+
+#define MT6358_LDO(match, vreg, ldo_volt_table,	\
+	ldo_index_table, enreg, enbit, vosel,	\
+	vosel_mask, vosel_shift)	\
+[MT6358_ID_##vreg] = {	\
+	.desc = {	\
+		.name = #vreg,	\
+		.of_match = of_match_ptr(match),	\
+		.ops = &mt6358_volt_table_ops,	\
+		.type = REGULATOR_VOLTAGE,	\
+		.id = MT6358_ID_##vreg,	\
+		.owner = THIS_MODULE,	\
+		.n_voltages = ARRAY_SIZE(ldo_volt_table),	\
+		.volt_table = ldo_volt_table,	\
+		.vsel_reg = vosel,	\
+		.vsel_mask = vosel_mask,	\
+		.enable_reg = enreg,	\
+		.enable_mask = BIT(enbit),	\
+	},	\
+	.status_reg = MT6358_LDO_##vreg##_CON1,	\
+	.qi = BIT(15),	\
+	.index_table = ldo_index_table,	\
+	.n_table = ARRAY_SIZE(ldo_index_table),	\
+	.vsel_shift = vosel_shift,	\
+}
+
+#define MT6358_LDO1(match, vreg, min, max, step,	\
+	volt_ranges, _da_vsel_reg, _da_vsel_mask,	\
+	_da_vsel_shift, vosel, vosel_mask)	\
+[MT6358_ID_##vreg] = {	\
+	.desc = {	\
+		.name = #vreg,	\
+		.of_match = of_match_ptr(match),	\
+		.ops = &mt6358_volt_range_ops,	\
+		.type = REGULATOR_VOLTAGE,	\
+		.id = MT6358_ID_##vreg,	\
+		.owner = THIS_MODULE,	\
+		.n_voltages = ((max) - (min)) / (step) + 1,	\
+		.linear_ranges = volt_ranges,	\
+		.n_linear_ranges = ARRAY_SIZE(volt_ranges),	\
+		.vsel_reg = vosel,	\
+		.vsel_mask = vosel_mask,	\
+		.enable_reg = MT6358_LDO_##vreg##_CON0,	\
+		.enable_mask = BIT(0),	\
+	},	\
+	.da_vsel_reg = _da_vsel_reg,	\
+	.da_vsel_mask = _da_vsel_mask,	\
+	.da_vsel_shift = _da_vsel_shift,	\
+	.status_reg = MT6358_LDO_##vreg##_DBG1,	\
+	.qi = BIT(0),	\
+}
+
+#define MT6358_REG_FIXED(match, vreg,	\
+	enreg, enbit, volt)	\
+[MT6358_ID_##vreg] = {	\
+	.desc = {	\
+		.name = #vreg,	\
+		.of_match = of_match_ptr(match),	\
+		.ops = &mt6358_volt_fixed_ops,	\
+		.type = REGULATOR_VOLTAGE,	\
+		.id = MT6358_ID_##vreg,	\
+		.owner = THIS_MODULE,	\
+		.n_voltages = 1,	\
+		.enable_reg = enreg,	\
+		.enable_mask = BIT(enbit),	\
+		.min_uV = volt,	\
+	},	\
+	.status_reg = MT6358_LDO_##vreg##_CON1,	\
+	.qi = BIT(15),							\
+}
+
+static const struct regulator_linear_range buck_volt_range1[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 6250),
+};
+
+static const struct regulator_linear_range buck_volt_range2[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x7f, 12500),
+};
+
+static const struct regulator_linear_range buck_volt_range3[] = {
+	REGULATOR_LINEAR_RANGE(500000, 0, 0x3f, 50000),
+};
+
+static const struct regulator_linear_range buck_volt_range4[] = {
+	REGULATOR_LINEAR_RANGE(1000000, 0, 0x7f, 12500),
+};
+
+static const u32 vdram2_voltages[] = {
+	600000, 1800000,
+};
+
+static const u32 vsim_voltages[] = {
+	1700000, 1800000, 2700000, 3000000, 3100000,
+};
+
+static const u32 vibr_voltages[] = {
+	1200000, 1300000, 1500000, 1800000,
+	2000000, 2800000, 3000000, 3300000,
+};
+
+static const u32 vusb_voltages[] = {
+	3000000, 3100000,
+};
+
+static const u32 vcamd_voltages[] = {
+	900000, 1000000, 1100000, 1200000,
+	1300000, 1500000, 1800000,
+};
+
+static const u32 vefuse_voltages[] = {
+	1700000, 1800000, 1900000,
+};
+
+static const u32 vmch_vemc_voltages[] = {
+	2900000, 3000000, 3300000,
+};
+
+static const u32 vcama_voltages[] = {
+	1800000, 2500000, 2700000,
+	2800000, 2900000, 3000000,
+};
+
+static const u32 vcn33_bt_wifi_voltages[] = {
+	3300000, 3400000, 3500000,
+};
+
+static const u32 vmc_voltages[] = {
+	1800000, 2900000, 3000000, 3300000,
+};
+
+static const u32 vldo28_voltages[] = {
+	2800000, 3000000,
+};
+
+static const u32 vdram2_idx[] = {
+	0, 12,
+};
+
+static const u32 vsim_idx[] = {
+	3, 4, 8, 11, 12,
+};
+
+static const u32 vibr_idx[] = {
+	0, 1, 2, 4, 5, 9, 11, 13,
+};
+
+static const u32 vusb_idx[] = {
+	3, 4,
+};
+
+static const u32 vcamd_idx[] = {
+	3, 4, 5, 6, 7, 9, 12,
+};
+
+static const u32 vefuse_idx[] = {
+	11, 12, 13,
+};
+
+static const u32 vmch_vemc_idx[] = {
+	2, 3, 5,
+};
+
+static const u32 vcama_idx[] = {
+	0, 7, 9, 10, 11, 12,
+};
+
+static const u32 vcn33_bt_wifi_idx[] = {
+	1, 2, 3,
+};
+
+static const u32 vmc_idx[] = {
+	4, 10, 11, 13,
+};
+
+static const u32 vldo28_idx[] = {
+	1, 3,
+};
+
+static unsigned int mt6358_map_mode(unsigned int mode)
+{
+	return mode == MT6358_BUCK_MODE_AUTO ?
+		REGULATOR_MODE_NORMAL : REGULATOR_MODE_FAST;
+}
+
+static int mt6358_set_voltage_sel(struct regulator_dev *rdev,
+				  unsigned int selector)
+{
+	int idx, ret;
+	const u32 *pvol;
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+
+	pvol = info->index_table;
+
+	idx = pvol[selector];
+	ret = regmap_update_bits(rdev->regmap, info->desc.vsel_reg,
+				 info->desc.vsel_mask,
+				 idx << info->vsel_shift);
+
+	return ret;
+}
+
+static int mt6358_get_voltage_sel(struct regulator_dev *rdev)
+{
+	int idx, ret;
+	u32 selector;
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+	const u32 *pvol;
+
+	ret = regmap_read(rdev->regmap, info->desc.vsel_reg, &selector);
+	if (ret != 0) {
+		dev_info(&rdev->dev,
+			 "Failed to get mt6358 %s vsel reg: %d\n",
+			 info->desc.name, ret);
+		return ret;
+	}
+
+	selector = (selector & info->desc.vsel_mask) >> info->vsel_shift;
+	pvol = info->index_table;
+	for (idx = 0; idx < info->desc.n_voltages; idx++) {
+		if (pvol[idx] == selector)
+			return idx;
+	}
+
+	return -EINVAL;
+}
+
+static int mt6358_get_buck_voltage_sel(struct regulator_dev *rdev)
+{
+	int ret, regval;
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+
+	ret = regmap_read(rdev->regmap, info->da_vsel_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to get mt6358 Buck %s vsel reg: %d\n",
+			info->desc.name, ret);
+		return ret;
+	}
+
+	ret = (regval >> info->da_vsel_shift) & info->da_vsel_mask;
+
+	return ret;
+}
+
+static int mt6358_get_status(struct regulator_dev *rdev)
+{
+	int ret;
+	u32 regval;
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+
+	ret = regmap_read(rdev->regmap, info->status_reg, &regval);
+	if (ret != 0) {
+		dev_info(&rdev->dev, "Failed to get enable reg: %d\n", ret);
+		return ret;
+	}
+
+	return (regval & info->qi) ? REGULATOR_STATUS_ON : REGULATOR_STATUS_OFF;
+}
+
+static int mt6358_regulator_set_mode(struct regulator_dev *rdev,
+				     unsigned int mode)
+{
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+	int val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = MT6358_BUCK_MODE_FORCE_PWM;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = MT6358_BUCK_MODE_AUTO;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(&rdev->dev, "mt6358 buck set_mode %#x, %#x, %#x, %#x\n",
+		info->modeset_reg, info->modeset_mask,
+		info->modeset_shift, val);
+
+	val <<= info->modeset_shift;
+
+	return regmap_update_bits(rdev->regmap, info->modeset_reg,
+				  info->modeset_mask, val);
+}
+
+static unsigned int mt6358_regulator_get_mode(struct regulator_dev *rdev)
+{
+	struct mt6358_regulator_info *info = rdev_get_drvdata(rdev);
+	int ret, regval;
+
+	ret = regmap_read(rdev->regmap, info->modeset_reg, &regval);
+	if (ret != 0) {
+		dev_err(&rdev->dev,
+			"Failed to get mt6358 buck mode: %d\n", ret);
+		return ret;
+	}
+
+	switch ((regval & info->modeset_mask) >> info->modeset_shift) {
+	case MT6358_BUCK_MODE_AUTO:
+		return REGULATOR_MODE_NORMAL;
+	case MT6358_BUCK_MODE_FORCE_PWM:
+		return REGULATOR_MODE_FAST;
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct regulator_ops mt6358_volt_range_ops = {
+	.list_voltage = regulator_list_voltage_linear_range,
+	.map_voltage = regulator_map_voltage_linear_range,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = mt6358_get_buck_voltage_sel,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6358_get_status,
+	.set_mode = mt6358_regulator_set_mode,
+	.get_mode = mt6358_regulator_get_mode,
+};
+
+static const struct regulator_ops mt6358_volt_table_ops = {
+	.list_voltage = regulator_list_voltage_table,
+	.map_voltage = regulator_map_voltage_iterate,
+	.set_voltage_sel = mt6358_set_voltage_sel,
+	.get_voltage_sel = mt6358_get_voltage_sel,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6358_get_status,
+};
+
+static const struct regulator_ops mt6358_volt_fixed_ops = {
+	.list_voltage = regulator_list_voltage_linear,
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.get_status = mt6358_get_status,
+};
+
+/* The array is indexed by id(MT6358_ID_XXX) */
+static struct mt6358_regulator_info mt6358_regulators[] = {
+	MT6358_BUCK("buck_vdram1", VDRAM1, 500000, 2087500, 12500,
+		    buck_volt_range2, 0x7f, MT6358_BUCK_VDRAM1_DBG0, 0x7f,
+		    0, MT6358_VDRAM1_ANA_CON0, 8),
+	MT6358_BUCK("buck_vcore", VCORE, 500000, 1293750, 6250,
+		    buck_volt_range1, 0x7f, MT6358_BUCK_VCORE_DBG0, 0x7f,
+		    0, MT6358_VCORE_VGPU_ANA_CON0, 1),
+	MT6358_BUCK("buck_vpa", VPA, 500000, 3650000, 50000,
+		    buck_volt_range3, 0x3f, MT6358_BUCK_VPA_DBG0, 0x3f, 0,
+		    MT6358_VPA_ANA_CON0, 3),
+	MT6358_BUCK("buck_vproc11", VPROC11, 500000, 1293750, 6250,
+		    buck_volt_range1, 0x7f, MT6358_BUCK_VPROC11_DBG0, 0x7f,
+		    0, MT6358_VPROC_ANA_CON0, 1),
+	MT6358_BUCK("buck_vproc12", VPROC12, 500000, 1293750, 6250,
+		    buck_volt_range1, 0x7f, MT6358_BUCK_VPROC12_DBG0, 0x7f,
+		    0, MT6358_VPROC_ANA_CON0, 2),
+	MT6358_BUCK("buck_vgpu", VGPU, 500000, 1293750, 6250,
+		    buck_volt_range1, 0x7f, MT6358_BUCK_VGPU_ELR0, 0x7f, 0,
+		    MT6358_VCORE_VGPU_ANA_CON0, 2),
+	MT6358_BUCK("buck_vs2", VS2, 500000, 2087500, 12500,
+		    buck_volt_range2, 0x7f, MT6358_BUCK_VS2_DBG0, 0x7f, 0,
+		    MT6358_VS2_ANA_CON0, 8),
+	MT6358_BUCK("buck_vmodem", VMODEM, 500000, 1293750, 6250,
+		    buck_volt_range1, 0x7f, MT6358_BUCK_VMODEM_DBG0, 0x7f,
+		    0, MT6358_VMODEM_ANA_CON0, 8),
+	MT6358_BUCK("buck_vs1", VS1, 1000000, 2587500, 12500,
+		    buck_volt_range4, 0x7f, MT6358_BUCK_VS1_DBG0, 0x7f, 0,
+		    MT6358_VS1_ANA_CON0, 8),
+	MT6358_REG_FIXED("ldo_vrf12", VRF12,
+			 MT6358_LDO_VRF12_CON0, 0, 1200000),
+	MT6358_REG_FIXED("ldo_vio18", VIO18,
+			 MT6358_LDO_VIO18_CON0, 0, 1800000),
+	MT6358_REG_FIXED("ldo_vcamio", VCAMIO,
+			 MT6358_LDO_VCAMIO_CON0, 0, 1800000),
+	MT6358_REG_FIXED("ldo_vcn18", VCN18, MT6358_LDO_VCN18_CON0, 0, 1800000),
+	MT6358_REG_FIXED("ldo_vfe28", VFE28, MT6358_LDO_VFE28_CON0, 0, 2800000),
+	MT6358_REG_FIXED("ldo_vcn28", VCN28, MT6358_LDO_VCN28_CON0, 0, 2800000),
+	MT6358_REG_FIXED("ldo_vxo22", VXO22, MT6358_LDO_VXO22_CON0, 0, 2200000),
+	MT6358_REG_FIXED("ldo_vaux18", VAUX18,
+			 MT6358_LDO_VAUX18_CON0, 0, 1800000),
+	MT6358_REG_FIXED("ldo_vbif28", VBIF28,
+			 MT6358_LDO_VBIF28_CON0, 0, 2800000),
+	MT6358_REG_FIXED("ldo_vio28", VIO28, MT6358_LDO_VIO28_CON0, 0, 2800000),
+	MT6358_REG_FIXED("ldo_va12", VA12, MT6358_LDO_VA12_CON0, 0, 1200000),
+	MT6358_REG_FIXED("ldo_vrf18", VRF18, MT6358_LDO_VRF18_CON0, 0, 1800000),
+	MT6358_REG_FIXED("ldo_vaud28", VAUD28,
+			 MT6358_LDO_VAUD28_CON0, 0, 2800000),
+	MT6358_LDO("ldo_vdram2", VDRAM2, vdram2_voltages, vdram2_idx,
+		   MT6358_LDO_VDRAM2_CON0, 0, MT6358_LDO_VDRAM2_ELR0, 0x10, 0),
+	MT6358_LDO("ldo_vsim1", VSIM1, vsim_voltages, vsim_idx,
+		   MT6358_LDO_VSIM1_CON0, 0, MT6358_VSIM1_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vibr", VIBR, vibr_voltages, vibr_idx,
+		   MT6358_LDO_VIBR_CON0, 0, MT6358_VIBR_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vusb", VUSB, vusb_voltages, vusb_idx,
+		   MT6358_LDO_VUSB_CON0_0, 0, MT6358_VUSB_ANA_CON0, 0x700, 8),
+	MT6358_LDO("ldo_vcamd", VCAMD, vcamd_voltages, vcamd_idx,
+		   MT6358_LDO_VCAMD_CON0, 0, MT6358_VCAMD_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vefuse", VEFUSE, vefuse_voltages, vefuse_idx,
+		   MT6358_LDO_VEFUSE_CON0, 0, MT6358_VEFUSE_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vmch", VMCH, vmch_vemc_voltages, vmch_vemc_idx,
+		   MT6358_LDO_VMCH_CON0, 0, MT6358_VMCH_ANA_CON0, 0x700, 8),
+	MT6358_LDO("ldo_vcama1", VCAMA1, vcama_voltages, vcama_idx,
+		   MT6358_LDO_VCAMA1_CON0, 0, MT6358_VCAMA1_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vemc", VEMC, vmch_vemc_voltages, vmch_vemc_idx,
+		   MT6358_LDO_VEMC_CON0, 0, MT6358_VEMC_ANA_CON0, 0x700, 8),
+	MT6358_LDO("ldo_vcn33_bt", VCN33_BT, vcn33_bt_wifi_voltages,
+		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_0,
+		   0, MT6358_VCN33_ANA_CON0, 0x300, 8),
+	MT6358_LDO("ldo_vcn33_wifi", VCN33_WIFI, vcn33_bt_wifi_voltages,
+		   vcn33_bt_wifi_idx, MT6358_LDO_VCN33_CON0_1,
+		   0, MT6358_VCN33_ANA_CON0, 0x300, 8),
+	MT6358_LDO("ldo_vcama2", VCAMA2, vcama_voltages, vcama_idx,
+		   MT6358_LDO_VCAMA2_CON0, 0, MT6358_VCAMA2_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vmc", VMC, vmc_voltages, vmc_idx,
+		   MT6358_LDO_VMC_CON0, 0, MT6358_VMC_ANA_CON0, 0xf00, 8),
+	MT6358_LDO("ldo_vldo28", VLDO28, vldo28_voltages, vldo28_idx,
+		   MT6358_LDO_VLDO28_CON0_0, 0,
+		   MT6358_VLDO28_ANA_CON0, 0x300, 8),
+	MT6358_LDO("ldo_vsim2", VSIM2, vsim_voltages, vsim_idx,
+		   MT6358_LDO_VSIM2_CON0, 0, MT6358_VSIM2_ANA_CON0, 0xf00, 8),
+	MT6358_LDO1("ldo_vsram_proc11", VSRAM_PROC11, 500000, 1293750, 6250,
+		    buck_volt_range1, MT6358_LDO_VSRAM_PROC11_DBG0, 0x7f, 8,
+		    MT6358_LDO_VSRAM_CON0, 0x7f),
+	MT6358_LDO1("ldo_vsram_others", VSRAM_OTHERS, 500000, 1293750, 6250,
+		    buck_volt_range1, MT6358_LDO_VSRAM_OTHERS_DBG0, 0x7f, 8,
+		    MT6358_LDO_VSRAM_CON2, 0x7f),
+	MT6358_LDO1("ldo_vsram_gpu", VSRAM_GPU, 500000, 1293750, 6250,
+		    buck_volt_range1, MT6358_LDO_VSRAM_GPU_DBG0, 0x7f, 8,
+		    MT6358_LDO_VSRAM_CON3, 0x7f),
+	MT6358_LDO1("ldo_vsram_proc12", VSRAM_PROC12, 500000, 1293750, 6250,
+		    buck_volt_range1, MT6358_LDO_VSRAM_PROC12_DBG0, 0x7f, 8,
+		    MT6358_LDO_VSRAM_CON1, 0x7f),
+};
+
+static int mt6358_regulator_probe(struct platform_device *pdev)
+{
+	struct mt6397_chip *mt6397 = dev_get_drvdata(pdev->dev.parent);
+	struct regulator_config config = {};
+	struct regulator_dev *rdev;
+	int i;
+
+	for (i = 0; i < MT6358_MAX_REGULATOR; i++) {
+		config.dev = &pdev->dev;
+		config.driver_data = &mt6358_regulators[i];
+		config.regmap = mt6397->regmap;
+
+		rdev = devm_regulator_register(&pdev->dev,
+					       &mt6358_regulators[i].desc,
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(&pdev->dev, "failed to register %s\n",
+				mt6358_regulators[i].desc.name);
+			return PTR_ERR(rdev);
+		}
+	}
+
+	return 0;
+}
+
+static const struct platform_device_id mt6358_platform_ids[] = {
+	{"mt6358-regulator", 0},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(platform, mt6358_platform_ids);
+
+static struct platform_driver mt6358_regulator_driver = {
+	.driver = {
+		.name = "mt6358-regulator",
+	},
+	.probe = mt6358_regulator_probe,
+	.id_table = mt6358_platform_ids,
+};
+
+module_platform_driver(mt6358_regulator_driver);
+
+MODULE_AUTHOR("Hsin-Hsiung Wang <hsin-hsiung.wang@mediatek.com>");
+MODULE_DESCRIPTION("Regulator Driver for MediaTek MT6358 PMIC");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/regulator/mt6358-regulator.h b/include/linux/regulator/mt6358-regulator.h
new file mode 100644
index 000000000000..1cc304946d09
--- /dev/null
+++ b/include/linux/regulator/mt6358-regulator.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ */
+
+#ifndef __LINUX_REGULATOR_MT6358_H
+#define __LINUX_REGULATOR_MT6358_H
+
+enum {
+	MT6358_ID_VDRAM1 = 0,
+	MT6358_ID_VCORE,
+	MT6358_ID_VPA,
+	MT6358_ID_VPROC11,
+	MT6358_ID_VPROC12,
+	MT6358_ID_VGPU,
+	MT6358_ID_VS2,
+	MT6358_ID_VMODEM,
+	MT6358_ID_VS1,
+	MT6358_ID_VDRAM2 = 9,
+	MT6358_ID_VSIM1,
+	MT6358_ID_VIBR,
+	MT6358_ID_VRF12,
+	MT6358_ID_VIO18,
+	MT6358_ID_VUSB,
+	MT6358_ID_VCAMIO,
+	MT6358_ID_VCAMD,
+	MT6358_ID_VCN18,
+	MT6358_ID_VFE28,
+	MT6358_ID_VSRAM_PROC11,
+	MT6358_ID_VCN28,
+	MT6358_ID_VSRAM_OTHERS,
+	MT6358_ID_VSRAM_GPU,
+	MT6358_ID_VXO22,
+	MT6358_ID_VEFUSE,
+	MT6358_ID_VAUX18,
+	MT6358_ID_VMCH,
+	MT6358_ID_VBIF28,
+	MT6358_ID_VSRAM_PROC12,
+	MT6358_ID_VCAMA1,
+	MT6358_ID_VEMC,
+	MT6358_ID_VIO28,
+	MT6358_ID_VA12,
+	MT6358_ID_VRF18,
+	MT6358_ID_VCN33_BT,
+	MT6358_ID_VCN33_WIFI,
+	MT6358_ID_VCAMA2,
+	MT6358_ID_VMC,
+	MT6358_ID_VLDO28,
+	MT6358_ID_VAUD28,
+	MT6358_ID_VSIM2,
+	MT6358_ID_RG_MAX,
+};
+
+#define MT6358_MAX_REGULATOR	MT6358_ID_RG_MAX
+
+#endif /* __LINUX_REGULATOR_MT6358_H */
-- 
cgit v1.2.3


From 4cc4531c310e592cf624148ae59c64f930f12e39 Mon Sep 17 00:00:00 2001
From: Vidyakumar Athota <vathota@codeaurora.org>
Date: Thu, 22 Aug 2019 10:56:50 +0100
Subject: ALSA: pcm: add support for 352.8KHz and 384KHz sample rate

Most of the modern codecs supports 352.8KHz and 384KHz sample rates.
Currenlty HW params fails to set 352.8Kz and 384KHz sample rate
as these are not in known rates list.
Add these new rates to known list to allow them.

This patch also adds defines in pcm.h so that drivers can use it.

Signed-off-by: Vidyakumar Athota <vathota@codeaurora.org>
Signed-off-by: Banajit Goswami <bgoswami@codeaurora.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20190822095653.7200-2-srinivas.kandagatla@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/pcm.h     | 5 +++++
 sound/core/pcm_native.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 1e9bb1c91770..bbe6eb1ff5d2 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -117,6 +117,8 @@ struct snd_pcm_ops {
 #define SNDRV_PCM_RATE_96000		(1<<10)		/* 96000Hz */
 #define SNDRV_PCM_RATE_176400		(1<<11)		/* 176400Hz */
 #define SNDRV_PCM_RATE_192000		(1<<12)		/* 192000Hz */
+#define SNDRV_PCM_RATE_352800		(1<<13)		/* 352800Hz */
+#define SNDRV_PCM_RATE_384000		(1<<14)		/* 384000Hz */
 
 #define SNDRV_PCM_RATE_CONTINUOUS	(1<<30)		/* continuous range */
 #define SNDRV_PCM_RATE_KNOT		(1<<31)		/* supports more non-continuos rates */
@@ -129,6 +131,9 @@ struct snd_pcm_ops {
 					 SNDRV_PCM_RATE_88200|SNDRV_PCM_RATE_96000)
 #define SNDRV_PCM_RATE_8000_192000	(SNDRV_PCM_RATE_8000_96000|SNDRV_PCM_RATE_176400|\
 					 SNDRV_PCM_RATE_192000)
+#define SNDRV_PCM_RATE_8000_384000	(SNDRV_PCM_RATE_8000_192000|\
+					 SNDRV_PCM_RATE_352800|\
+					 SNDRV_PCM_RATE_384000)
 #define _SNDRV_PCM_FMTBIT(fmt)		(1ULL << (__force int)SNDRV_PCM_FORMAT_##fmt)
 #define SNDRV_PCM_FMTBIT_S8		_SNDRV_PCM_FMTBIT(S8)
 #define SNDRV_PCM_FMTBIT_U8		_SNDRV_PCM_FMTBIT(U8)
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 860543a4c840..34390be3fb0f 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2168,7 +2168,7 @@ static int snd_pcm_hw_rule_sample_bits(struct snd_pcm_hw_params *params,
 
 static const unsigned int rates[] = {
 	5512, 8000, 11025, 16000, 22050, 32000, 44100,
-	48000, 64000, 88200, 96000, 176400, 192000
+	48000, 64000, 88200, 96000, 176400, 192000, 352800, 384000
 };
 
 const struct snd_pcm_hw_constraint_list snd_pcm_known_rates = {
-- 
cgit v1.2.3


From a67e408241783575716fcf3f79d0878f6cef0273 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 23 Aug 2019 13:38:44 +0200
Subject: hrtimer: Add kernel doc annotation for HRTIMER_MODE_HARD

Add kernel doc annotation for HRTIMER_MODE_HARD.

Fixes: ae6683d815895 ("hrtimer: Introduce HARD expiry mode")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190823113845.12125-2-bigeasy@linutronix.de
---
 include/linux/hrtimer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5df4bcff96d5..1b9a51a1bccb 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -32,6 +32,8 @@ struct hrtimer_cpu_base;
  *				  when starting the timer)
  * HRTIMER_MODE_SOFT		- Timer callback function will be executed in
  *				  soft irq context
+ * HRTIMER_MODE_HARD		- Timer callback function will be executed in
+ *				  hard irq context even on PREEMPT_RT.
  */
 enum hrtimer_mode {
 	HRTIMER_MODE_ABS	= 0x00,
-- 
cgit v1.2.3


From 7ce2e76a0420801fb4b53b9e6850940e6b326433 Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczynski <kw@linux.com>
Date: Tue, 27 Aug 2019 11:56:20 +0200
Subject: PCI: Move ASPM declarations to linux/pci.h

Move ASPM definitions and function prototypes from include/linux/pci-aspm.h
to include/linux/pci.h so users only need to include <linux/pci.h>:

  PCIE_LINK_STATE_L0S
  PCIE_LINK_STATE_L1
  PCIE_LINK_STATE_CLKPM
  pci_disable_link_state()
  pci_disable_link_state_locked()
  pcie_no_aspm()

No functional changes intended.

Link: https://lore.kernel.org/r/20190827095620.11213-1-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/acpi/pci_root.c                         |  1 -
 drivers/char/xillybus/xillybus_pcie.c           |  1 -
 drivers/net/ethernet/intel/e1000e/e1000.h       |  1 -
 drivers/net/ethernet/jme.c                      |  1 -
 drivers/net/ethernet/realtek/r8169_main.c       |  1 -
 drivers/net/wireless/ath/ath5k/pci.c            |  1 -
 drivers/net/wireless/intel/iwlegacy/3945-mac.c  |  1 -
 drivers/net/wireless/intel/iwlegacy/4965-mac.c  |  1 -
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c |  1 -
 drivers/pci/pci-acpi.c                          |  1 -
 drivers/pci/pcie/aspm.c                         |  1 -
 drivers/pci/quirks.c                            |  1 -
 drivers/scsi/aacraid/linit.c                    |  1 -
 drivers/scsi/hpsa.c                             |  1 -
 drivers/scsi/mpt3sas/mpt3sas_scsih.c            |  1 -
 include/linux/pci-aspm.h                        | 36 -------------------------
 include/linux/pci.h                             | 18 +++++++++++++
 17 files changed, 18 insertions(+), 51 deletions(-)
 delete mode 100644 include/linux/pci-aspm.h

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 314a187ed572..d1e666ef3fcc 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -15,7 +15,6 @@
 #include <linux/pm_runtime.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
-#include <linux/pci-aspm.h>
 #include <linux/dmar.h>
 #include <linux/acpi.h>
 #include <linux/slab.h>
diff --git a/drivers/char/xillybus/xillybus_pcie.c b/drivers/char/xillybus/xillybus_pcie.c
index 02c15952b103..18b0c392bc93 100644
--- a/drivers/char/xillybus/xillybus_pcie.c
+++ b/drivers/char/xillybus/xillybus_pcie.c
@@ -9,7 +9,6 @@
 
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include "xillybus.h"
 
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 34cd67951aec..6c51b1bad8c4 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -13,7 +13,6 @@
 #include <linux/io.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
 #include <linux/timecounter.h>
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 0b668357db4d..57e8aea98969 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 0637c6752a78..a6d8e7f5fde7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -28,7 +28,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/prefetch.h>
-#include <linux/pci-aspm.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
 
diff --git a/drivers/net/wireless/ath/ath5k/pci.c b/drivers/net/wireless/ath/ath5k/pci.c
index c6156cc38940..d5ee32ce9eb3 100644
--- a/drivers/net/wireless/ath/ath5k/pci.c
+++ b/drivers/net/wireless/ath/ath5k/pci.c
@@ -18,7 +18,6 @@
 
 #include <linux/nl80211.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/etherdevice.h>
 #include <linux/module.h>
 #include "../ath.h"
diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
index b82da75a9ae3..4fbcc7fba3cc 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index fa2c02881939..ffb705b18fb1 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -18,7 +18,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index f5df5b370d78..4c308e33ee21 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -62,7 +62,6 @@
  *
  *****************************************************************************/
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/debugfs.h>
 #include <linux/sched.h>
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 45049f558860..dd520fe927db 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -14,7 +14,6 @@
 #include <linux/msi.h>
 #include <linux/pci_hotplug.h>
 #include <linux/module.h>
-#include <linux/pci-aspm.h>
 #include <linux/pci-acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_qos.h>
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e44af7f4d37f..ad6259ec51a6 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -18,7 +18,6 @@
 #include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/delay.h>
-#include <linux/pci-aspm.h>
 #include "../pci.h"
 
 #ifdef MODULE_PARAM_PREFIX
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 208aacf39329..9ac1a7564c9e 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -20,7 +20,6 @@
 #include <linux/delay.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
-#include <linux/pci-aspm.h>
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/ktime.h>
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 644f7f5c61a2..4a858789e6c5 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -27,7 +27,6 @@
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <linux/pci-aspm.h>
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 43a6b5350775..148663373f7d 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -21,7 +21,6 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 717ba0845a2a..27fdbc165446 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -51,7 +51,6 @@
 #include <linux/workqueue.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/pci-aspm.h>
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h
deleted file mode 100644
index 67064145d76e..000000000000
--- a/include/linux/pci-aspm.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *	aspm.h
- *
- *	PCI Express ASPM defines and function prototypes
- *
- *	Copyright (C) 2007 Intel Corp.
- *		Zhang Yanmin (yanmin.zhang@intel.com)
- *		Shaohua Li (shaohua.li@intel.com)
- *
- *	For more information, please consult the following manuals (look at
- *	http://www.pcisig.com/ for how to get them):
- *
- *	PCI Express Specification
- */
-
-#ifndef LINUX_ASPM_H
-#define LINUX_ASPM_H
-
-#include <linux/pci.h>
-
-#define PCIE_LINK_STATE_L0S	1
-#define PCIE_LINK_STATE_L1	2
-#define PCIE_LINK_STATE_CLKPM	4
-
-#ifdef CONFIG_PCIEASPM
-int pci_disable_link_state(struct pci_dev *pdev, int state);
-int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
-void pcie_no_aspm(void);
-#else
-static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
-{ return 0; }
-static inline void pcie_no_aspm(void) { }
-#endif
-
-#endif /* LINUX_ASPM_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..b4e479921e07 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -6,12 +6,18 @@
  *	Copyright 1994, Drew Eckhardt
  *	Copyright 1997--1999 Martin Mares <mj@ucw.cz>
  *
+ *	PCI Express ASPM defines and function prototypes
+ *	Copyright (c) 2007 Intel Corp.
+ *		Zhang Yanmin (yanmin.zhang@intel.com)
+ *		Shaohua Li (shaohua.li@intel.com)
+ *
  *	For more information, please consult the following manuals (look at
  *	http://www.pcisig.com/ for how to get them):
  *
  *	PCI BIOS Specification
  *	PCI Local Bus Specification
  *	PCI to PCI Bridge Specification
+ *	PCI Express Specification
  *	PCI System Design Guide
  */
 #ifndef LINUX_PCI_H
@@ -1565,9 +1571,21 @@ extern bool pcie_ports_native;
 #define pcie_ports_native	false
 #endif
 
+#define PCIE_LINK_STATE_L0S	1
+#define PCIE_LINK_STATE_L1	2
+#define PCIE_LINK_STATE_CLKPM	4
+
 #ifdef CONFIG_PCIEASPM
+int pci_disable_link_state(struct pci_dev *pdev, int state);
+int pci_disable_link_state_locked(struct pci_dev *pdev, int state);
+void pcie_no_aspm(void);
 bool pcie_aspm_support_enabled(void);
 #else
+static inline int pci_disable_link_state(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline int pci_disable_link_state_locked(struct pci_dev *pdev, int state)
+{ return 0; }
+static inline void pcie_no_aspm(void) { }
 static inline bool pcie_aspm_support_enabled(void) { return false; }
 #endif
 
-- 
cgit v1.2.3


From e8e4eb0fbeda570b16464208aebf5caccfb6eb95 Mon Sep 17 00:00:00 2001
From: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Date: Thu, 8 Aug 2019 20:19:48 +0200
Subject: asm-generic/div64: Fix documentation of do_div() parameter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Contrary to the description, the first parameter (n) should not be passed
as a pointer, but directly as an lvalue. This is possible because do_div() is
a macro.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lkml.kernel.org/r/20190808181948.27659-1-j.neuschaefer@gmx.net
---
 include/asm-generic/div64.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index dc9726fdac8f..b2a9c74efa55 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -28,12 +28,12 @@
 
 /**
  * do_div - returns 2 values: calculate remainder and update new dividend
- * @n: pointer to uint64_t dividend (will be updated)
+ * @n: uint64_t dividend (will be updated)
  * @base: uint32_t divisor
  *
  * Summary:
- * ``uint32_t remainder = *n % base;``
- * ``*n = *n / base;``
+ * ``uint32_t remainder = n % base;``
+ * ``n = n / base;``
  *
  * Return: (uint32_t)remainder
  *
-- 
cgit v1.2.3


From edbd7f318cd66cec4588a28ba6985b014456dbca Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 22 Aug 2019 21:12:13 -0500
Subject: drm/shmem: Use mutex_trylock in drm_gem_shmem_purge

Lockdep reports a circular locking dependency with pages_lock taken in
the shrinker callback. The deadlock can't actually happen with current
users at least as a BO will never be purgeable when pages_lock is held.
To be safe, let's use mutex_trylock() instead and bail if a BO is locked
already.

WARNING: possible circular locking dependency detected
5.3.0-rc1+ #100 Tainted: G             L
------------------------------------------------------
kswapd0/171 is trying to acquire lock:
000000009b9823fd (&shmem->pages_lock){+.+.}, at: drm_gem_shmem_purge+0x20/0x40

but task is already holding lock:
00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (fs_reclaim){+.+.}:
       fs_reclaim_acquire.part.18+0x34/0x40
       fs_reclaim_acquire+0x20/0x28
       __kmalloc_node+0x6c/0x4c0
       kvmalloc_node+0x38/0xa8
       drm_gem_get_pages+0x80/0x1d0
       drm_gem_shmem_get_pages+0x58/0xa0
       drm_gem_shmem_get_pages_sgt+0x48/0xd0
       panfrost_mmu_map+0x38/0xf8 [panfrost]
       panfrost_gem_open+0xc0/0xe8 [panfrost]
       drm_gem_handle_create_tail+0xe8/0x198
       drm_gem_handle_create+0x3c/0x50
       panfrost_gem_create_with_handle+0x70/0xa0 [panfrost]
       panfrost_ioctl_create_bo+0x48/0x80 [panfrost]
       drm_ioctl_kernel+0xb8/0x110
       drm_ioctl+0x244/0x3f0
       do_vfs_ioctl+0xbc/0x910
       ksys_ioctl+0x78/0xa8
       __arm64_sys_ioctl+0x1c/0x28
       el0_svc_common.constprop.0+0x90/0x168
       el0_svc_handler+0x28/0x78
       el0_svc+0x8/0xc

-> #0 (&shmem->pages_lock){+.+.}:
       __lock_acquire+0xa2c/0x1d70
       lock_acquire+0xdc/0x228
       __mutex_lock+0x8c/0x800
       mutex_lock_nested+0x1c/0x28
       drm_gem_shmem_purge+0x20/0x40
       panfrost_gem_shrinker_scan+0xc0/0x180 [panfrost]
       do_shrink_slab+0x208/0x500
       shrink_slab+0x10c/0x2c0
       shrink_node+0x28c/0x4d8
       balance_pgdat+0x2c8/0x570
       kswapd+0x22c/0x638
       kthread+0x128/0x130
       ret_from_fork+0x10/0x18

other info that might help us debug this:

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(fs_reclaim);
                               lock(&shmem->pages_lock);
                               lock(fs_reclaim);
  lock(&shmem->pages_lock);

 *** DEADLOCK ***

3 locks held by kswapd0/171:
 #0: 00000000f82369b6 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x0/0x40
 #1: 00000000ceb37808 (shrinker_rwsem){++++}, at: shrink_slab+0xbc/0x2c0
 #2: 00000000f31efa81 (&pfdev->shrinker_lock){+.+.}, at: panfrost_gem_shrinker_scan+0x34/0x180 [panfrost]

Fixes: 17acb9f35ed7 ("drm/shmem: Add madvise state and purge helpers")
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Sean Paul <sean@poorly.run>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190823021216.5862-6-robh@kernel.org
---
 drivers/gpu/drm/drm_gem_shmem_helper.c | 7 +++++--
 include/drm/drm_gem_shmem_helper.h     | 2 +-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 5423ec56b535..f5918707672f 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -415,13 +415,16 @@ void drm_gem_shmem_purge_locked(struct drm_gem_object *obj)
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge_locked);
 
-void drm_gem_shmem_purge(struct drm_gem_object *obj)
+bool drm_gem_shmem_purge(struct drm_gem_object *obj)
 {
 	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
 
-	mutex_lock(&shmem->pages_lock);
+	if (!mutex_trylock(&shmem->pages_lock))
+		return false;
 	drm_gem_shmem_purge_locked(obj);
 	mutex_unlock(&shmem->pages_lock);
+
+	return true;
 }
 EXPORT_SYMBOL(drm_gem_shmem_purge);
 
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index ce1600fdfc3e..01f514521687 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -134,7 +134,7 @@ static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem
 }
 
 void drm_gem_shmem_purge_locked(struct drm_gem_object *obj);
-void drm_gem_shmem_purge(struct drm_gem_object *obj);
+bool drm_gem_shmem_purge(struct drm_gem_object *obj);
 
 struct drm_gem_shmem_object *
 drm_gem_shmem_create_with_handle(struct drm_file *file_priv,
-- 
cgit v1.2.3


From e003f9af9b8dc110dd62b7eaa3a796fd81d4d256 Mon Sep 17 00:00:00 2001
From: Rui Miguel Silva <rui.silva@linaro.org>
Date: Wed, 28 Aug 2019 13:48:25 +0100
Subject: staging: greybus: fix more header declarations

More headers needed to be fixed when moving greybus out of staging and
enabling the COMPILE_TEST option.

Add forward declarations for the needed structures.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Rui Miguel Silva <rui.silva@linaro.org>
Reviewed-by: Johan Hovold <johan@kernel.org>
Link: https://lore.kernel.org/r/20190828124825.20800-1-rui.silva@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/greybus/operation.h | 2 +-
 include/linux/greybus/svc.h       | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/greybus/operation.h b/include/linux/greybus/operation.h
index 8ca864bba23e..cb8e4ef45222 100644
--- a/include/linux/greybus/operation.h
+++ b/include/linux/greybus/operation.h
@@ -15,7 +15,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
-
+struct gb_host_device;
 struct gb_operation;
 
 /* The default amount of time a request is given to complete */
diff --git a/include/linux/greybus/svc.h b/include/linux/greybus/svc.h
index 507f8bd4e4c8..5afaf5f06856 100644
--- a/include/linux/greybus/svc.h
+++ b/include/linux/greybus/svc.h
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <linux/device.h>
 
+struct gb_svc_l2_timer_cfg;
+
 #define GB_SVC_CPORT_FLAG_E2EFC		BIT(0)
 #define GB_SVC_CPORT_FLAG_CSD_N		BIT(1)
 #define GB_SVC_CPORT_FLAG_CSV_N		BIT(2)
-- 
cgit v1.2.3


From e2797ad31fb40f4ff59ebc4314d6f000d713bad9 Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczynski <kw@linux.com>
Date: Tue, 27 Aug 2019 11:49:49 +0200
Subject: PCI/ACPI: Rename _HPX structs from hpp_* to hpx_*

The names of the hpp_type0, hpp_type1 and hpp_type2 structs suggest that
they're related to _HPP, when in fact they're related to _HPX.

The struct hpp_type0 denotes an _HPX Type 0 setting record that supersedes
the _HPP setting record, and it has been used interchangeably for _HPP as
per the ACPI specification (see version 6.3, section 6.2.9.1) which states
that it should be applied to PCI, PCI-X and PCI Express devices, with
settings being ignored if they are not applicable.

Rename them to hpx_type0, hpx_type1 and hpx_type2 to reflect their relation
to _HPX rather than _HPP.

Link: https://lore.kernel.org/r/20190827094951.10613-2-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-acpi.c      | 28 +++++++++---------
 drivers/pci/probe.c         | 72 ++++++++++++++++++++++-----------------------
 include/linux/pci_hotplug.h | 30 +++++++++----------
 3 files changed, 64 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 45049f558860..02addc47edae 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -119,7 +119,7 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
 }
 
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
-					   struct hpp_type0 *hpx0)
+					   struct hpx_type0 *hpx0)
 {
 	int i;
 	union acpi_object *fields = record->package.elements;
@@ -147,7 +147,7 @@ static acpi_status decode_type0_hpx_record(union acpi_object *record,
 }
 
 static acpi_status decode_type1_hpx_record(union acpi_object *record,
-					   struct hpp_type1 *hpx1)
+					   struct hpx_type1 *hpx1)
 {
 	int i;
 	union acpi_object *fields = record->package.elements;
@@ -174,7 +174,7 @@ static acpi_status decode_type1_hpx_record(union acpi_object *record,
 }
 
 static acpi_status decode_type2_hpx_record(union acpi_object *record,
-					   struct hpp_type2 *hpx2)
+					   struct hpx_type2 *hpx2)
 {
 	int i;
 	union acpi_object *fields = record->package.elements;
@@ -277,9 +277,9 @@ static acpi_status acpi_run_hpx(struct pci_dev *dev, acpi_handle handle,
 	acpi_status status;
 	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
 	union acpi_object *package, *record, *fields;
-	struct hpp_type0 hpx0;
-	struct hpp_type1 hpx1;
-	struct hpp_type2 hpx2;
+	struct hpx_type0 hpx0;
+	struct hpx_type1 hpx1;
+	struct hpx_type2 hpx2;
 	u32 type;
 	int i;
 
@@ -353,10 +353,10 @@ static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
 	acpi_status status;
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *package, *fields;
-	struct hpp_type0 hpp0;
+	struct hpx_type0 hpx0;
 	int i;
 
-	memset(&hpp0, 0, sizeof(hpp0));
+	memset(&hpx0, 0, sizeof(hpx0));
 
 	status = acpi_evaluate_object(handle, "_HPP", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -377,13 +377,13 @@ static acpi_status acpi_run_hpp(struct pci_dev *dev, acpi_handle handle,
 		}
 	}
 
-	hpp0.revision        = 1;
-	hpp0.cache_line_size = fields[0].integer.value;
-	hpp0.latency_timer   = fields[1].integer.value;
-	hpp0.enable_serr     = fields[2].integer.value;
-	hpp0.enable_perr     = fields[3].integer.value;
+	hpx0.revision        = 1;
+	hpx0.cache_line_size = fields[0].integer.value;
+	hpx0.latency_timer   = fields[1].integer.value;
+	hpx0.enable_serr     = fields[2].integer.value;
+	hpx0.enable_perr     = fields[3].integer.value;
 
-	hp_ops->program_type0(dev, &hpp0);
+	hp_ops->program_type0(dev, &hpx0);
 
 exit:
 	kfree(buffer.pointer);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a3c7338fad86..120c70b5003b 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1920,7 +1920,7 @@ static void pci_configure_mps(struct pci_dev *dev)
 		 p_mps, mps, mpss);
 }
 
-static struct hpp_type0 pci_default_type0 = {
+static struct hpx_type0 pci_default_type0 = {
 	.revision = 1,
 	.cache_line_size = 8,
 	.latency_timer = 0x40,
@@ -1928,44 +1928,44 @@ static struct hpp_type0 pci_default_type0 = {
 	.enable_perr = 0,
 };
 
-static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
+static void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
 {
 	u16 pci_cmd, pci_bctl;
 
-	if (!hpp)
-		hpp = &pci_default_type0;
+	if (!hpx)
+		hpx = &pci_default_type0;
 
-	if (hpp->revision > 1) {
+	if (hpx->revision > 1) {
 		pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
-			 hpp->revision);
-		hpp = &pci_default_type0;
+			 hpx->revision);
+		hpx = &pci_default_type0;
 	}
 
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpp->cache_line_size);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpp->latency_timer);
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
 	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-	if (hpp->enable_serr)
+	if (hpx->enable_serr)
 		pci_cmd |= PCI_COMMAND_SERR;
-	if (hpp->enable_perr)
+	if (hpx->enable_perr)
 		pci_cmd |= PCI_COMMAND_PARITY;
 	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
 
 	/* Program bridge control value */
 	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-				      hpp->latency_timer);
+				      hpx->latency_timer);
 		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-		if (hpp->enable_perr)
+		if (hpx->enable_perr)
 			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
 		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
 	}
 }
 
-static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
+static void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
 {
 	int pos;
 
-	if (!hpp)
+	if (!hpx)
 		return;
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
@@ -1990,20 +1990,20 @@ static bool pcie_root_rcb_set(struct pci_dev *dev)
 	return false;
 }
 
-static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
+static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
 {
 	int pos;
 	u32 reg32;
 
-	if (!hpp)
+	if (!hpx)
 		return;
 
 	if (!pci_is_pcie(dev))
 		return;
 
-	if (hpp->revision > 1) {
+	if (hpx->revision > 1) {
 		pci_warn(dev, "PCIe settings rev %d not supported\n",
-			 hpp->revision);
+			 hpx->revision);
 		return;
 	}
 
@@ -2012,14 +2012,14 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 	 * those to make sure they're consistent with the rest of the
 	 * platform.
 	 */
-	hpp->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
+	hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
 				    PCI_EXP_DEVCTL_READRQ;
-	hpp->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
+	hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
 				    PCI_EXP_DEVCTL_READRQ);
 
 	/* Initialize Device Control Register */
 	pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
-			~hpp->pci_exp_devctl_and, hpp->pci_exp_devctl_or);
+			~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
 
 	/* Initialize Link Control Register */
 	if (pcie_cap_has_lnkctl(dev)) {
@@ -2028,13 +2028,13 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 		 * If the Root Port supports Read Completion Boundary of
 		 * 128, set RCB to 128.  Otherwise, clear it.
 		 */
-		hpp->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
-		hpp->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
+		hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
+		hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
 		if (pcie_root_rcb_set(dev))
-			hpp->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
+			hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
 
 		pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
-			~hpp->pci_exp_lnkctl_and, hpp->pci_exp_lnkctl_or);
+			~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
 	}
 
 	/* Find Advanced Error Reporting Enhanced Capability */
@@ -2044,22 +2044,22 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 
 	/* Initialize Uncorrectable Error Mask Register */
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
-	reg32 = (reg32 & hpp->unc_err_mask_and) | hpp->unc_err_mask_or;
+	reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
 	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
 
 	/* Initialize Uncorrectable Error Severity Register */
 	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
-	reg32 = (reg32 & hpp->unc_err_sever_and) | hpp->unc_err_sever_or;
+	reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
 	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
 
 	/* Initialize Correctable Error Mask Register */
 	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
-	reg32 = (reg32 & hpp->cor_err_mask_and) | hpp->cor_err_mask_or;
+	reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
 	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
 
 	/* Initialize Advanced Error Capabilities and Control Register */
 	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
-	reg32 = (reg32 & hpp->adv_err_cap_and) | hpp->adv_err_cap_or;
+	reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
 
 	/* Don't enable ECRC generation or checking if unsupported */
 	if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
@@ -2178,15 +2178,15 @@ static void program_hpx_type3_register(struct pci_dev *dev,
 		pos, orig_value, write_reg);
 }
 
-static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx3)
+static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
 {
-	if (!hpx3)
+	if (!hpx)
 		return;
 
 	if (!pci_is_pcie(dev))
 		return;
 
-	program_hpx_type3_register(dev, hpx3);
+	program_hpx_type3_register(dev, hpx);
 }
 
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
@@ -2370,9 +2370,9 @@ static void pci_configure_serr(struct pci_dev *dev)
 static void pci_configure_device(struct pci_dev *dev)
 {
 	static const struct hotplug_program_ops hp_ops = {
-		.program_type0 = program_hpp_type0,
-		.program_type1 = program_hpp_type1,
-		.program_type2 = program_hpp_type2,
+		.program_type0 = program_hpx_type0,
+		.program_type1 = program_hpx_type1,
+		.program_type2 = program_hpx_type2,
 		.program_type3 = program_hpx_type3,
 	};
 
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index f694eb2ca978..247a49ee27b3 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -86,25 +86,25 @@ void pci_hp_deregister(struct hotplug_slot *slot);
 #define pci_hp_initialize(slot, bus, nr, name) \
 	__pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
-/* PCI Setting Record (Type 0) */
-struct hpp_type0 {
-	u32 revision;
-	u8  cache_line_size;
-	u8  latency_timer;
+/* _HPX PCI Setting Record (Type 0); same as _HPP */
+struct hpx_type0 {
+	u32 revision;		/* Not present in _HPP */
+	u8  cache_line_size;	/* Not applicable to PCIe */
+	u8  latency_timer;	/* Not applicable to PCIe */
 	u8  enable_serr;
 	u8  enable_perr;
 };
 
-/* PCI-X Setting Record (Type 1) */
-struct hpp_type1 {
+/* _HPX PCI-X Setting Record (Type 1) */
+struct hpx_type1 {
 	u32 revision;
 	u8  max_mem_read;
 	u8  avg_max_split;
 	u16 tot_max_split;
 };
 
-/* PCI Express Setting Record (Type 2) */
-struct hpp_type2 {
+/* _HPX PCI Express Setting Record (Type 2) */
+struct hpx_type2 {
 	u32 revision;
 	u32 unc_err_mask_and;
 	u32 unc_err_mask_or;
@@ -124,9 +124,7 @@ struct hpp_type2 {
 	u32 sec_unc_err_mask_or;
 };
 
-/*
- * _HPX PCI Express Setting Record (Type 3)
- */
+/* _HPX PCI Express Setting Record (Type 3) */
 struct hpx_type3 {
 	u16 device_type;
 	u16 function_type;
@@ -145,10 +143,10 @@ struct hpx_type3 {
 };
 
 struct hotplug_program_ops {
-	void (*program_type0)(struct pci_dev *dev, struct hpp_type0 *hpp);
-	void (*program_type1)(struct pci_dev *dev, struct hpp_type1 *hpp);
-	void (*program_type2)(struct pci_dev *dev, struct hpp_type2 *hpp);
-	void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpp);
+	void (*program_type0)(struct pci_dev *dev, struct hpx_type0 *hpx);
+	void (*program_type1)(struct pci_dev *dev, struct hpx_type1 *hpx);
+	void (*program_type2)(struct pci_dev *dev, struct hpx_type2 *hpx);
+	void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpx);
 };
 
 enum hpx_type3_dev_type {
-- 
cgit v1.2.3


From 8c3aac6e1b6146ce771b1cabd78e593136d3e5f2 Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczynski <kw@linux.com>
Date: Tue, 27 Aug 2019 11:49:50 +0200
Subject: PCI/ACPI: Move _HPP & _HPX functions to pci-acpi.c

Move program_hpx_type0(), program_hpx_type1(), etc., and enums
hpx_type3_dev_type, hpx_type3_fn_type and hpx_type3_cfg_loc to
drivers/pci/pci-acpi.c as these functions and enums are ACPI-specific.

Move structs hpx_type0, hpx_type1, hpx_type2 and hpx_type3 to
drivers/pci/pci.h as these are shared between drivers/pci/pci-acpi.c and
drivers/pci/probe.c.

Link: https://lore.kernel.org/r/20190827094951.10613-3-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-acpi.c      | 296 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.h           |  79 ++++++++++++
 drivers/pci/probe.c         | 269 ----------------------------------------
 include/linux/pci_hotplug.h |  98 ---------------
 4 files changed, 375 insertions(+), 367 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 02addc47edae..0bfabb3f9931 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -118,6 +118,47 @@ phys_addr_t acpi_pci_root_get_mcfg_addr(acpi_handle handle)
 	return (phys_addr_t)mcfg_addr;
 }
 
+static struct hpx_type0 pci_default_type0 = {
+	.revision = 1,
+	.cache_line_size = 8,
+	.latency_timer = 0x40,
+	.enable_serr = 0,
+	.enable_perr = 0,
+};
+
+void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
+{
+	u16 pci_cmd, pci_bctl;
+
+	if (!hpx)
+		hpx = &pci_default_type0;
+
+	if (hpx->revision > 1) {
+		pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
+			 hpx->revision);
+		hpx = &pci_default_type0;
+	}
+
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
+	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+	if (hpx->enable_serr)
+		pci_cmd |= PCI_COMMAND_SERR;
+	if (hpx->enable_perr)
+		pci_cmd |= PCI_COMMAND_PARITY;
+	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+	/* Program bridge control value */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+				      hpx->latency_timer);
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+		if (hpx->enable_perr)
+			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+	}
+}
+
 static acpi_status decode_type0_hpx_record(union acpi_object *record,
 					   struct hpx_type0 *hpx0)
 {
@@ -146,6 +187,20 @@ static acpi_status decode_type0_hpx_record(union acpi_object *record,
 	return AE_OK;
 }
 
+void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
+{
+	int pos;
+
+	if (!hpx)
+		return;
+
+	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+	if (!pos)
+		return;
+
+	pci_warn(dev, "PCI-X settings not supported\n");
+}
+
 static acpi_status decode_type1_hpx_record(union acpi_object *record,
 					   struct hpx_type1 *hpx1)
 {
@@ -173,6 +228,107 @@ static acpi_status decode_type1_hpx_record(union acpi_object *record,
 	return AE_OK;
 }
 
+static bool pcie_root_rcb_set(struct pci_dev *dev)
+{
+	struct pci_dev *rp = pcie_find_root_port(dev);
+	u16 lnkctl;
+
+	if (!rp)
+		return false;
+
+	pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
+	if (lnkctl & PCI_EXP_LNKCTL_RCB)
+		return true;
+
+	return false;
+}
+
+void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
+{
+	int pos;
+	u32 reg32;
+
+	if (!hpx)
+		return;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	if (hpx->revision > 1) {
+		pci_warn(dev, "PCIe settings rev %d not supported\n",
+			 hpx->revision);
+		return;
+	}
+
+	/*
+	 * Don't allow _HPX to change MPS or MRRS settings.  We manage
+	 * those to make sure they're consistent with the rest of the
+	 * platform.
+	 */
+	hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
+				    PCI_EXP_DEVCTL_READRQ;
+	hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
+				    PCI_EXP_DEVCTL_READRQ);
+
+	/* Initialize Device Control Register */
+	pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
+			~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
+
+	/* Initialize Link Control Register */
+	if (pcie_cap_has_lnkctl(dev)) {
+
+		/*
+		 * If the Root Port supports Read Completion Boundary of
+		 * 128, set RCB to 128.  Otherwise, clear it.
+		 */
+		hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
+		hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
+		if (pcie_root_rcb_set(dev))
+			hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
+
+		pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
+			~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
+	}
+
+	/* Find Advanced Error Reporting Enhanced Capability */
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Initialize Uncorrectable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+	reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+	/* Initialize Uncorrectable Error Severity Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+	reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+	/* Initialize Correctable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+	reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+	/* Initialize Advanced Error Capabilities and Control Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
+
+	/* Don't enable ECRC generation or checking if unsupported */
+	if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
+		reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
+	if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
+		reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	/*
+	 * FIXME: The following two registers are not supported yet.
+	 *
+	 *   o Secondary Uncorrectable Error Severity Register
+	 *   o Secondary Uncorrectable Error Mask Register
+	 */
+}
+
 static acpi_status decode_type2_hpx_record(union acpi_object *record,
 					   struct hpx_type2 *hpx2)
 {
@@ -213,6 +369,146 @@ static acpi_status decode_type2_hpx_record(union acpi_object *record,
 	return AE_OK;
 }
 
+enum hpx_type3_dev_type {
+	HPX_TYPE_ENDPOINT	= BIT(0),
+	HPX_TYPE_LEG_END	= BIT(1),
+	HPX_TYPE_RC_END		= BIT(2),
+	HPX_TYPE_RC_EC		= BIT(3),
+	HPX_TYPE_ROOT_PORT	= BIT(4),
+	HPX_TYPE_UPSTREAM	= BIT(5),
+	HPX_TYPE_DOWNSTREAM	= BIT(6),
+	HPX_TYPE_PCI_BRIDGE	= BIT(7),
+	HPX_TYPE_PCIE_BRIDGE	= BIT(8),
+};
+
+static u16 hpx3_device_type(struct pci_dev *dev)
+{
+	u16 pcie_type = pci_pcie_type(dev);
+	const int pcie_to_hpx3_type[] = {
+		[PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
+		[PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
+		[PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
+		[PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
+		[PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
+		[PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
+		[PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
+		[PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
+		[PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
+	};
+
+	if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
+		return 0;
+
+	return pcie_to_hpx3_type[pcie_type];
+}
+
+enum hpx_type3_fn_type {
+	HPX_FN_NORMAL		= BIT(0),
+	HPX_FN_SRIOV_PHYS	= BIT(1),
+	HPX_FN_SRIOV_VIRT	= BIT(2),
+};
+
+static u8 hpx3_function_type(struct pci_dev *dev)
+{
+	if (dev->is_virtfn)
+		return HPX_FN_SRIOV_VIRT;
+	else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
+		return HPX_FN_SRIOV_PHYS;
+	else
+		return HPX_FN_NORMAL;
+}
+
+static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
+{
+	u8 cap_ver = hpx3_cap_id & 0xf;
+
+	if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
+		return true;
+	else if (cap_ver == pcie_cap_id)
+		return true;
+
+	return false;
+}
+
+enum hpx_type3_cfg_loc {
+	HPX_CFG_PCICFG		= 0,
+	HPX_CFG_PCIE_CAP	= 1,
+	HPX_CFG_PCIE_CAP_EXT	= 2,
+	HPX_CFG_VEND_CAP	= 3,
+	HPX_CFG_DVSEC		= 4,
+	HPX_CFG_MAX,
+};
+
+static void program_hpx_type3_register(struct pci_dev *dev,
+				       const struct hpx_type3 *reg)
+{
+	u32 match_reg, write_reg, header, orig_value;
+	u16 pos;
+
+	if (!(hpx3_device_type(dev) & reg->device_type))
+		return;
+
+	if (!(hpx3_function_type(dev) & reg->function_type))
+		return;
+
+	switch (reg->config_space_location) {
+	case HPX_CFG_PCICFG:
+		pos = 0;
+		break;
+	case HPX_CFG_PCIE_CAP:
+		pos = pci_find_capability(dev, reg->pci_exp_cap_id);
+		if (pos == 0)
+			return;
+
+		break;
+	case HPX_CFG_PCIE_CAP_EXT:
+		pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
+		if (pos == 0)
+			return;
+
+		pci_read_config_dword(dev, pos, &header);
+		if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
+					  reg->pci_exp_cap_ver))
+			return;
+
+		break;
+	case HPX_CFG_VEND_CAP:	/* Fall through */
+	case HPX_CFG_DVSEC:	/* Fall through */
+	default:
+		pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
+		return;
+	}
+
+	pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
+
+	if ((match_reg & reg->match_mask_and) != reg->match_value)
+		return;
+
+	pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
+	orig_value = write_reg;
+	write_reg &= reg->reg_mask_and;
+	write_reg |= reg->reg_mask_or;
+
+	if (orig_value == write_reg)
+		return;
+
+	pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
+
+	pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
+		pos, orig_value, write_reg);
+}
+
+void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
+{
+	if (!hpx)
+		return;
+
+	if (!pci_is_pcie(dev))
+		return;
+
+	program_hpx_type3_register(dev, hpx);
+}
+
 static void parse_hpx3_register(struct hpx_type3 *hpx3_reg,
 				union acpi_object *reg_fields)
 {
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 1be03a97cb92..dad43c64b350 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -608,4 +608,83 @@ static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
 static inline void pci_aer_clear_device_status(struct pci_dev *dev) { }
 #endif
 
+/* _HPX PCI Setting Record (Type 0); same as _HPP */
+struct hpx_type0 {
+	u32 revision;		/* Not present in _HPP */
+	u8  cache_line_size;	/* Not applicable to PCIe */
+	u8  latency_timer;	/* Not applicable to PCIe */
+	u8  enable_serr;
+	u8  enable_perr;
+};
+
+/* _HPX PCI-X Setting Record (Type 1) */
+struct hpx_type1 {
+	u32 revision;
+	u8  max_mem_read;
+	u8  avg_max_split;
+	u16 tot_max_split;
+};
+
+/* _HPX PCI Express Setting Record (Type 2) */
+struct hpx_type2 {
+	u32 revision;
+	u32 unc_err_mask_and;
+	u32 unc_err_mask_or;
+	u32 unc_err_sever_and;
+	u32 unc_err_sever_or;
+	u32 cor_err_mask_and;
+	u32 cor_err_mask_or;
+	u32 adv_err_cap_and;
+	u32 adv_err_cap_or;
+	u16 pci_exp_devctl_and;
+	u16 pci_exp_devctl_or;
+	u16 pci_exp_lnkctl_and;
+	u16 pci_exp_lnkctl_or;
+	u32 sec_unc_err_sever_and;
+	u32 sec_unc_err_sever_or;
+	u32 sec_unc_err_mask_and;
+	u32 sec_unc_err_mask_or;
+};
+
+/* _HPX PCI Express Setting Record (Type 3) */
+struct hpx_type3 {
+	u16 device_type;
+	u16 function_type;
+	u16 config_space_location;
+	u16 pci_exp_cap_id;
+	u16 pci_exp_cap_ver;
+	u16 pci_exp_vendor_id;
+	u16 dvsec_id;
+	u16 dvsec_rev;
+	u16 match_offset;
+	u32 match_mask_and;
+	u32 match_value;
+	u16 reg_offset;
+	u32 reg_mask_and;
+	u32 reg_mask_or;
+};
+
+void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx);
+void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx);
+void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx);
+void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx);
+
+struct hotplug_program_ops {
+	void (*program_type0)(struct pci_dev *dev, struct hpx_type0 *hpx);
+	void (*program_type1)(struct pci_dev *dev, struct hpx_type1 *hpx);
+	void (*program_type2)(struct pci_dev *dev, struct hpx_type2 *hpx);
+	void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpx);
+};
+
+#ifdef CONFIG_ACPI
+int pci_acpi_program_hp_params(struct pci_dev *dev,
+			       const struct hotplug_program_ops *hp_ops);
+#else
+static inline int pci_acpi_program_hp_params(struct pci_dev *dev,
+				    const struct hotplug_program_ops *hp_ops)
+{
+	return -ENODEV;
+}
+#endif
+
 #endif /* DRIVERS_PCI_H */
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 120c70b5003b..5847b557dc9a 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1920,275 +1920,6 @@ static void pci_configure_mps(struct pci_dev *dev)
 		 p_mps, mps, mpss);
 }
 
-static struct hpx_type0 pci_default_type0 = {
-	.revision = 1,
-	.cache_line_size = 8,
-	.latency_timer = 0x40,
-	.enable_serr = 0,
-	.enable_perr = 0,
-};
-
-static void program_hpx_type0(struct pci_dev *dev, struct hpx_type0 *hpx)
-{
-	u16 pci_cmd, pci_bctl;
-
-	if (!hpx)
-		hpx = &pci_default_type0;
-
-	if (hpx->revision > 1) {
-		pci_warn(dev, "PCI settings rev %d not supported; using defaults\n",
-			 hpx->revision);
-		hpx = &pci_default_type0;
-	}
-
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpx->cache_line_size);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpx->latency_timer);
-	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-	if (hpx->enable_serr)
-		pci_cmd |= PCI_COMMAND_SERR;
-	if (hpx->enable_perr)
-		pci_cmd |= PCI_COMMAND_PARITY;
-	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
-
-	/* Program bridge control value */
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-				      hpx->latency_timer);
-		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-		if (hpx->enable_perr)
-			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
-		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
-	}
-}
-
-static void program_hpx_type1(struct pci_dev *dev, struct hpx_type1 *hpx)
-{
-	int pos;
-
-	if (!hpx)
-		return;
-
-	pos = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-	if (!pos)
-		return;
-
-	pci_warn(dev, "PCI-X settings not supported\n");
-}
-
-static bool pcie_root_rcb_set(struct pci_dev *dev)
-{
-	struct pci_dev *rp = pcie_find_root_port(dev);
-	u16 lnkctl;
-
-	if (!rp)
-		return false;
-
-	pcie_capability_read_word(rp, PCI_EXP_LNKCTL, &lnkctl);
-	if (lnkctl & PCI_EXP_LNKCTL_RCB)
-		return true;
-
-	return false;
-}
-
-static void program_hpx_type2(struct pci_dev *dev, struct hpx_type2 *hpx)
-{
-	int pos;
-	u32 reg32;
-
-	if (!hpx)
-		return;
-
-	if (!pci_is_pcie(dev))
-		return;
-
-	if (hpx->revision > 1) {
-		pci_warn(dev, "PCIe settings rev %d not supported\n",
-			 hpx->revision);
-		return;
-	}
-
-	/*
-	 * Don't allow _HPX to change MPS or MRRS settings.  We manage
-	 * those to make sure they're consistent with the rest of the
-	 * platform.
-	 */
-	hpx->pci_exp_devctl_and |= PCI_EXP_DEVCTL_PAYLOAD |
-				    PCI_EXP_DEVCTL_READRQ;
-	hpx->pci_exp_devctl_or &= ~(PCI_EXP_DEVCTL_PAYLOAD |
-				    PCI_EXP_DEVCTL_READRQ);
-
-	/* Initialize Device Control Register */
-	pcie_capability_clear_and_set_word(dev, PCI_EXP_DEVCTL,
-			~hpx->pci_exp_devctl_and, hpx->pci_exp_devctl_or);
-
-	/* Initialize Link Control Register */
-	if (pcie_cap_has_lnkctl(dev)) {
-
-		/*
-		 * If the Root Port supports Read Completion Boundary of
-		 * 128, set RCB to 128.  Otherwise, clear it.
-		 */
-		hpx->pci_exp_lnkctl_and |= PCI_EXP_LNKCTL_RCB;
-		hpx->pci_exp_lnkctl_or &= ~PCI_EXP_LNKCTL_RCB;
-		if (pcie_root_rcb_set(dev))
-			hpx->pci_exp_lnkctl_or |= PCI_EXP_LNKCTL_RCB;
-
-		pcie_capability_clear_and_set_word(dev, PCI_EXP_LNKCTL,
-			~hpx->pci_exp_lnkctl_and, hpx->pci_exp_lnkctl_or);
-	}
-
-	/* Find Advanced Error Reporting Enhanced Capability */
-	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-	if (!pos)
-		return;
-
-	/* Initialize Uncorrectable Error Mask Register */
-	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
-	reg32 = (reg32 & hpx->unc_err_mask_and) | hpx->unc_err_mask_or;
-	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
-
-	/* Initialize Uncorrectable Error Severity Register */
-	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
-	reg32 = (reg32 & hpx->unc_err_sever_and) | hpx->unc_err_sever_or;
-	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
-
-	/* Initialize Correctable Error Mask Register */
-	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
-	reg32 = (reg32 & hpx->cor_err_mask_and) | hpx->cor_err_mask_or;
-	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
-
-	/* Initialize Advanced Error Capabilities and Control Register */
-	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
-	reg32 = (reg32 & hpx->adv_err_cap_and) | hpx->adv_err_cap_or;
-
-	/* Don't enable ECRC generation or checking if unsupported */
-	if (!(reg32 & PCI_ERR_CAP_ECRC_GENC))
-		reg32 &= ~PCI_ERR_CAP_ECRC_GENE;
-	if (!(reg32 & PCI_ERR_CAP_ECRC_CHKC))
-		reg32 &= ~PCI_ERR_CAP_ECRC_CHKE;
-	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
-
-	/*
-	 * FIXME: The following two registers are not supported yet.
-	 *
-	 *   o Secondary Uncorrectable Error Severity Register
-	 *   o Secondary Uncorrectable Error Mask Register
-	 */
-}
-
-static u16 hpx3_device_type(struct pci_dev *dev)
-{
-	u16 pcie_type = pci_pcie_type(dev);
-	const int pcie_to_hpx3_type[] = {
-		[PCI_EXP_TYPE_ENDPOINT]    = HPX_TYPE_ENDPOINT,
-		[PCI_EXP_TYPE_LEG_END]     = HPX_TYPE_LEG_END,
-		[PCI_EXP_TYPE_RC_END]      = HPX_TYPE_RC_END,
-		[PCI_EXP_TYPE_RC_EC]       = HPX_TYPE_RC_EC,
-		[PCI_EXP_TYPE_ROOT_PORT]   = HPX_TYPE_ROOT_PORT,
-		[PCI_EXP_TYPE_UPSTREAM]    = HPX_TYPE_UPSTREAM,
-		[PCI_EXP_TYPE_DOWNSTREAM]  = HPX_TYPE_DOWNSTREAM,
-		[PCI_EXP_TYPE_PCI_BRIDGE]  = HPX_TYPE_PCI_BRIDGE,
-		[PCI_EXP_TYPE_PCIE_BRIDGE] = HPX_TYPE_PCIE_BRIDGE,
-	};
-
-	if (pcie_type >= ARRAY_SIZE(pcie_to_hpx3_type))
-		return 0;
-
-	return pcie_to_hpx3_type[pcie_type];
-}
-
-static u8 hpx3_function_type(struct pci_dev *dev)
-{
-	if (dev->is_virtfn)
-		return HPX_FN_SRIOV_VIRT;
-	else if (pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV) > 0)
-		return HPX_FN_SRIOV_PHYS;
-	else
-		return HPX_FN_NORMAL;
-}
-
-static bool hpx3_cap_ver_matches(u8 pcie_cap_id, u8 hpx3_cap_id)
-{
-	u8 cap_ver = hpx3_cap_id & 0xf;
-
-	if ((hpx3_cap_id & BIT(4)) && cap_ver >= pcie_cap_id)
-		return true;
-	else if (cap_ver == pcie_cap_id)
-		return true;
-
-	return false;
-}
-
-static void program_hpx_type3_register(struct pci_dev *dev,
-				       const struct hpx_type3 *reg)
-{
-	u32 match_reg, write_reg, header, orig_value;
-	u16 pos;
-
-	if (!(hpx3_device_type(dev) & reg->device_type))
-		return;
-
-	if (!(hpx3_function_type(dev) & reg->function_type))
-		return;
-
-	switch (reg->config_space_location) {
-	case HPX_CFG_PCICFG:
-		pos = 0;
-		break;
-	case HPX_CFG_PCIE_CAP:
-		pos = pci_find_capability(dev, reg->pci_exp_cap_id);
-		if (pos == 0)
-			return;
-
-		break;
-	case HPX_CFG_PCIE_CAP_EXT:
-		pos = pci_find_ext_capability(dev, reg->pci_exp_cap_id);
-		if (pos == 0)
-			return;
-
-		pci_read_config_dword(dev, pos, &header);
-		if (!hpx3_cap_ver_matches(PCI_EXT_CAP_VER(header),
-					  reg->pci_exp_cap_ver))
-			return;
-
-		break;
-	case HPX_CFG_VEND_CAP:	/* Fall through */
-	case HPX_CFG_DVSEC:	/* Fall through */
-	default:
-		pci_warn(dev, "Encountered _HPX type 3 with unsupported config space location");
-		return;
-	}
-
-	pci_read_config_dword(dev, pos + reg->match_offset, &match_reg);
-
-	if ((match_reg & reg->match_mask_and) != reg->match_value)
-		return;
-
-	pci_read_config_dword(dev, pos + reg->reg_offset, &write_reg);
-	orig_value = write_reg;
-	write_reg &= reg->reg_mask_and;
-	write_reg |= reg->reg_mask_or;
-
-	if (orig_value == write_reg)
-		return;
-
-	pci_write_config_dword(dev, pos + reg->reg_offset, write_reg);
-
-	pci_dbg(dev, "Applied _HPX3 at [0x%x]: 0x%08x -> 0x%08x",
-		pos, orig_value, write_reg);
-}
-
-static void program_hpx_type3(struct pci_dev *dev, struct hpx_type3 *hpx)
-{
-	if (!hpx)
-		return;
-
-	if (!pci_is_pcie(dev))
-		return;
-
-	program_hpx_type3_register(dev, hpx);
-}
-
 int pci_configure_extended_tags(struct pci_dev *dev, void *ign)
 {
 	struct pci_host_bridge *host;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 247a49ee27b3..b482e42d7153 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -86,112 +86,14 @@ void pci_hp_deregister(struct hotplug_slot *slot);
 #define pci_hp_initialize(slot, bus, nr, name) \
 	__pci_hp_initialize(slot, bus, nr, name, THIS_MODULE, KBUILD_MODNAME)
 
-/* _HPX PCI Setting Record (Type 0); same as _HPP */
-struct hpx_type0 {
-	u32 revision;		/* Not present in _HPP */
-	u8  cache_line_size;	/* Not applicable to PCIe */
-	u8  latency_timer;	/* Not applicable to PCIe */
-	u8  enable_serr;
-	u8  enable_perr;
-};
-
-/* _HPX PCI-X Setting Record (Type 1) */
-struct hpx_type1 {
-	u32 revision;
-	u8  max_mem_read;
-	u8  avg_max_split;
-	u16 tot_max_split;
-};
-
-/* _HPX PCI Express Setting Record (Type 2) */
-struct hpx_type2 {
-	u32 revision;
-	u32 unc_err_mask_and;
-	u32 unc_err_mask_or;
-	u32 unc_err_sever_and;
-	u32 unc_err_sever_or;
-	u32 cor_err_mask_and;
-	u32 cor_err_mask_or;
-	u32 adv_err_cap_and;
-	u32 adv_err_cap_or;
-	u16 pci_exp_devctl_and;
-	u16 pci_exp_devctl_or;
-	u16 pci_exp_lnkctl_and;
-	u16 pci_exp_lnkctl_or;
-	u32 sec_unc_err_sever_and;
-	u32 sec_unc_err_sever_or;
-	u32 sec_unc_err_mask_and;
-	u32 sec_unc_err_mask_or;
-};
-
-/* _HPX PCI Express Setting Record (Type 3) */
-struct hpx_type3 {
-	u16 device_type;
-	u16 function_type;
-	u16 config_space_location;
-	u16 pci_exp_cap_id;
-	u16 pci_exp_cap_ver;
-	u16 pci_exp_vendor_id;
-	u16 dvsec_id;
-	u16 dvsec_rev;
-	u16 match_offset;
-	u32 match_mask_and;
-	u32 match_value;
-	u16 reg_offset;
-	u32 reg_mask_and;
-	u32 reg_mask_or;
-};
-
-struct hotplug_program_ops {
-	void (*program_type0)(struct pci_dev *dev, struct hpx_type0 *hpx);
-	void (*program_type1)(struct pci_dev *dev, struct hpx_type1 *hpx);
-	void (*program_type2)(struct pci_dev *dev, struct hpx_type2 *hpx);
-	void (*program_type3)(struct pci_dev *dev, struct hpx_type3 *hpx);
-};
-
-enum hpx_type3_dev_type {
-	HPX_TYPE_ENDPOINT	= BIT(0),
-	HPX_TYPE_LEG_END	= BIT(1),
-	HPX_TYPE_RC_END		= BIT(2),
-	HPX_TYPE_RC_EC		= BIT(3),
-	HPX_TYPE_ROOT_PORT	= BIT(4),
-	HPX_TYPE_UPSTREAM	= BIT(5),
-	HPX_TYPE_DOWNSTREAM	= BIT(6),
-	HPX_TYPE_PCI_BRIDGE	= BIT(7),
-	HPX_TYPE_PCIE_BRIDGE	= BIT(8),
-};
-
-enum hpx_type3_fn_type {
-	HPX_FN_NORMAL		= BIT(0),
-	HPX_FN_SRIOV_PHYS	= BIT(1),
-	HPX_FN_SRIOV_VIRT	= BIT(2),
-};
-
-enum hpx_type3_cfg_loc {
-	HPX_CFG_PCICFG		= 0,
-	HPX_CFG_PCIE_CAP	= 1,
-	HPX_CFG_PCIE_CAP_EXT	= 2,
-	HPX_CFG_VEND_CAP	= 3,
-	HPX_CFG_DVSEC		= 4,
-	HPX_CFG_MAX,
-};
-
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
-int pci_acpi_program_hp_params(struct pci_dev *dev,
-			       const struct hotplug_program_ops *hp_ops);
 bool pciehp_is_native(struct pci_dev *bridge);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge);
 bool shpchp_is_native(struct pci_dev *bridge);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
 #else
-static inline int pci_acpi_program_hp_params(struct pci_dev *dev,
-				    const struct hotplug_program_ops *hp_ops)
-{
-	return -ENODEV;
-}
-
 static inline int acpi_get_hp_hw_control_from_firmware(struct pci_dev *bridge)
 {
 	return 0;
-- 
cgit v1.2.3


From 8973ea47901c81a1912bd05f1577bed9b5b52506 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 28 Aug 2019 10:34:10 +0200
Subject: driver core: platform: Introduce platform_get_irq_optional()

In some cases the interrupt line of a device is optional. Introduce a
new platform_get_irq_optional() that works much like platform_get_irq()
but does not output an error on failure to find the interrupt.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20190828083411.2496-1-thierry.reding@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c         | 22 ++++++++++++++++++++++
 include/linux/platform_device.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 8ad701068c11..0dda6ade50fd 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -192,6 +192,28 @@ int platform_get_irq(struct platform_device *dev, unsigned int num)
 }
 EXPORT_SYMBOL_GPL(platform_get_irq);
 
+/**
+ * platform_get_irq_optional - get an optional IRQ for a device
+ * @dev: platform device
+ * @num: IRQ number index
+ *
+ * Gets an IRQ for a platform device. Device drivers should check the return
+ * value for errors so as to not pass a negative integer value to the
+ * request_irq() APIs. This is the same as platform_get_irq(), except that it
+ * does not print an error message if an IRQ can not be obtained.
+ *
+ * Example:
+ *		int irq = platform_get_irq_optional(pdev, 0);
+ *		if (irq < 0)
+ *			return irq;
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_optional(struct platform_device *dev, unsigned int num)
+{
+	return __platform_get_irq(dev, num);
+}
+
 /**
  * platform_irq_count - Count the number of IRQs a platform device uses
  * @dev: platform device
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 37e15a935a42..35bc4355a9df 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -58,6 +58,7 @@ extern void __iomem *
 devm_platform_ioremap_resource(struct platform_device *pdev,
 			       unsigned int index);
 extern int platform_get_irq(struct platform_device *, unsigned int);
+extern int platform_get_irq_optional(struct platform_device *, unsigned int);
 extern int platform_irq_count(struct platform_device *);
 extern struct resource *platform_get_resource_byname(struct platform_device *,
 						     unsigned int,
-- 
cgit v1.2.3


From 14105c191e09b698782026827dbac966cbd30446 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 27 Aug 2019 00:08:12 -0700
Subject: ipv6: shrink struct ipv6_mc_socklist

Remove two holes on 64bit arches, to bring the size
to one cache line exactly.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 50037913c9b1..a01981d7108f 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -89,9 +89,9 @@ struct ip6_sf_socklist {
 struct ipv6_mc_socklist {
 	struct in6_addr		addr;
 	int			ifindex;
+	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ipv6_mc_socklist __rcu *next;
 	rwlock_t		sflock;
-	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip6_sf_socklist	*sflist;
 	struct rcu_head		rcu;
 };
-- 
cgit v1.2.3


From dbf47a2a094edf58983265e323ca4bdcdb58b5ee Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 27 Aug 2019 21:49:38 +0300
Subject: net: sched: act_sample: fix psample group handling on overwrite

Action sample doesn't properly handle psample_group pointer in overwrite
case. Following issues need to be fixed:

- In tcf_sample_init() function RCU_INIT_POINTER() is used to set
  s->psample_group, even though we neither setting the pointer to NULL, nor
  preventing concurrent readers from accessing the pointer in some way.
  Use rcu_swap_protected() instead to safely reset the pointer.

- Old value of s->psample_group is not released or deallocated in any way,
  which results resource leak. Use psample_group_put() on non-NULL value
  obtained with rcu_swap_protected().

- The function psample_group_put() that released reference to struct
  psample_group pointed by rcu-pointer s->psample_group doesn't respect rcu
  grace period when deallocating it. Extend struct psample_group with rcu
  head and use kfree_rcu when freeing it.

Fixes: 5c5670fae430 ("net/sched: Introduce sample tc action")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/psample.h  | 1 +
 net/psample/psample.c  | 2 +-
 net/sched/act_sample.c | 6 +++++-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/psample.h b/include/net/psample.h
index 37a4df2325b2..6b578ce69cd8 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -11,6 +11,7 @@ struct psample_group {
 	u32 group_num;
 	u32 refcount;
 	u32 seq;
+	struct rcu_head rcu;
 };
 
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 841f198ea1a8..66e4b61a350d 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -154,7 +154,7 @@ static void psample_group_destroy(struct psample_group *group)
 {
 	psample_group_notify(group, PSAMPLE_CMD_DEL_GROUP);
 	list_del(&group->list);
-	kfree(group);
+	kfree_rcu(group, rcu);
 }
 
 static struct psample_group *
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 7eff363f9f03..10229124a992 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -102,13 +102,17 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
 	s->rate = rate;
 	s->psample_group_num = psample_group_num;
-	RCU_INIT_POINTER(s->psample_group, psample_group);
+	rcu_swap_protected(s->psample_group, psample_group,
+			   lockdep_is_held(&s->tcf_lock));
 
 	if (tb[TCA_SAMPLE_TRUNC_SIZE]) {
 		s->truncate = true;
 		s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
 	}
 	spin_unlock_bh(&s->tcf_lock);
+
+	if (psample_group)
+		psample_group_put(psample_group);
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-- 
cgit v1.2.3


From cf09a8ee19ad1f78b4e18cdde9f2a61133efacf5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 28 Aug 2019 15:05:51 -0700
Subject: blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()

Instead of @node, pass in @q and @blkcg so that the alloc function has
more context.  This doesn't cause any behavior change and will be used
by io.weight implementation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c         | 5 +++--
 block/blk-cgroup.c         | 6 +++---
 block/blk-iolatency.c      | 6 ++++--
 block/blk-throttle.c       | 6 ++++--
 include/linux/blk-cgroup.h | 3 ++-
 5 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 0f6cd688924f..e6fb537b4bfc 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -501,11 +501,12 @@ static void bfq_cpd_free(struct blkcg_policy_data *cpd)
 	kfree(cpd_to_bfqgd(cpd));
 }
 
-static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, int node)
+static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
+					     struct blkcg *blkcg)
 {
 	struct bfq_group *bfqg;
 
-	bfqg = kzalloc_node(sizeof(*bfqg), gfp, node);
+	bfqg = kzalloc_node(sizeof(*bfqg), gfp, q->node);
 	if (!bfqg)
 		return NULL;
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 55a7dc227dfb..6a82ca3fb5cf 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -175,7 +175,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 			continue;
 
 		/* alloc per-policy data and attach it to blkg */
-		pd = pol->pd_alloc_fn(gfp_mask, q->node);
+		pd = pol->pd_alloc_fn(gfp_mask, q, blkcg);
 		if (!pd)
 			goto err_free;
 
@@ -1346,7 +1346,7 @@ int blkcg_activate_policy(struct request_queue *q,
 		blk_mq_freeze_queue(q);
 pd_prealloc:
 	if (!pd_prealloc) {
-		pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
+		pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
 		if (!pd_prealloc) {
 			ret = -ENOMEM;
 			goto out_bypass_end;
@@ -1362,7 +1362,7 @@ pd_prealloc:
 		if (blkg->pd[pol->plid])
 			continue;
 
-		pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q->node);
+		pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
 		if (!pd)
 			swap(pd, pd_prealloc);
 		if (!pd) {
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 0fff7b56df0e..46fa6449f4bb 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -934,11 +934,13 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf,
 }
 
 
-static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp, int node)
+static struct blkg_policy_data *iolatency_pd_alloc(gfp_t gfp,
+						   struct request_queue *q,
+						   struct blkcg *blkcg)
 {
 	struct iolatency_grp *iolat;
 
-	iolat = kzalloc_node(sizeof(*iolat), gfp, node);
+	iolat = kzalloc_node(sizeof(*iolat), gfp, q->node);
 	if (!iolat)
 		return NULL;
 	iolat->stats = __alloc_percpu_gfp(sizeof(struct latency_stat),
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 8ab6c8153223..0445c998c377 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -478,12 +478,14 @@ static void throtl_service_queue_init(struct throtl_service_queue *sq)
 	timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
 }
 
-static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
+static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
+						struct request_queue *q,
+						struct blkcg *blkcg)
 {
 	struct throtl_grp *tg;
 	int rw;
 
-	tg = kzalloc_node(sizeof(*tg), gfp, node);
+	tg = kzalloc_node(sizeof(*tg), gfp, q->node);
 	if (!tg)
 		return NULL;
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 0bb79d858a13..261248e88eb1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -149,7 +149,8 @@ typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
 typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
 typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
 typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
-typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
+typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
+				struct request_queue *q, struct blkcg *blkcg);
 typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
 typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
-- 
cgit v1.2.3


From 015d254cb02b6d8eec4b3366274bf4672f9e0b64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 28 Aug 2019 15:05:53 -0700
Subject: blkcg: separate blkcg_conf_get_disk() out of blkg_conf_prep()

Separate out blkcg_conf_get_disk() so that it can be used by blkcg
policy interface file input parsers before the policy is actually
enabled.  This doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 62 ++++++++++++++++++++++++++++++++--------------
 include/linux/blk-cgroup.h |  1 +
 2 files changed, 44 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 78ccbdcfe723..0e2619c1a422 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -753,6 +753,44 @@ static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
 	return __blkg_lookup(blkcg, q, true /* update_hint */);
 }
 
+/**
+ * blkg_conf_prep - parse and prepare for per-blkg config update
+ * @inputp: input string pointer
+ *
+ * Parse the device node prefix part, MAJ:MIN, of per-blkg config update
+ * from @input and get and return the matching gendisk.  *@inputp is
+ * updated to point past the device node prefix.  Returns an ERR_PTR()
+ * value on error.
+ *
+ * Use this function iff blkg_conf_prep() can't be used for some reason.
+ */
+struct gendisk *blkcg_conf_get_disk(char **inputp)
+{
+	char *input = *inputp;
+	unsigned int major, minor;
+	struct gendisk *disk;
+	int key_len, part;
+
+	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
+		return ERR_PTR(-EINVAL);
+
+	input += key_len;
+	if (!isspace(*input))
+		return ERR_PTR(-EINVAL);
+	input = skip_spaces(input);
+
+	disk = get_gendisk(MKDEV(major, minor), &part);
+	if (!disk)
+		return ERR_PTR(-ENODEV);
+	if (part) {
+		put_disk_and_module(disk);
+		return ERR_PTR(-ENODEV);
+	}
+
+	*inputp = input;
+	return disk;
+}
+
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
  * @blkcg: target block cgroup
@@ -772,25 +810,11 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 	struct gendisk *disk;
 	struct request_queue *q;
 	struct blkcg_gq *blkg;
-	unsigned int major, minor;
-	int key_len, part, ret;
-	char *body;
-
-	if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2)
-		return -EINVAL;
-
-	body = input + key_len;
-	if (!isspace(*body))
-		return -EINVAL;
-	body = skip_spaces(body);
+	int ret;
 
-	disk = get_gendisk(MKDEV(major, minor), &part);
-	if (!disk)
-		return -ENODEV;
-	if (part) {
-		ret = -ENODEV;
-		goto fail;
-	}
+	disk = blkcg_conf_get_disk(&input);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
 
 	q = disk->queue;
 
@@ -856,7 +880,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 success:
 	ctx->disk = disk;
 	ctx->blkg = blkg;
-	ctx->body = body;
+	ctx->body = input;
 	return 0;
 
 fail_unlock:
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 261248e88eb1..bed9e43f9426 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -234,6 +234,7 @@ struct blkg_conf_ctx {
 	char				*body;
 };
 
+struct gendisk *blkcg_conf_get_disk(char **inputp);
 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
-- 
cgit v1.2.3


From 6f816b4b746c2241540e537682d30d8e9997d674 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 28 Aug 2019 15:05:57 -0700
Subject: blk-mq: add optional request->alloc_time_ns

There are currently two start time timestamps - start_time_ns and
io_start_time_ns.  The former marks the request allocation and and the
second issue-to-device time.  The planned io.weight controller needs
to measure the total time bios take to execute after it leaves rq_qos
including the time spent waiting for request to become available,
which can easily dominate on saturated devices.

This patch adds request->alloc_time_ns which records when the request
allocation attempt started.  As it isn't used for the usual stats,
make it optional behind CONFIG_BLK_RQ_ALLOC_TIME and
QUEUE_FLAG_RQ_ALLOC_TIME so that it can be compiled out when there are
no users and it's active only on queues which need it even when
compiled in.

v2: s/pre_start_time/alloc_time/ and add CONFIG_BLK_RQ_ALLOC_TIME
    gating as suggested by Jens.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig          |  3 +++
 block/blk-mq.c         | 13 +++++++++++--
 include/linux/blkdev.h | 13 ++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/Kconfig b/block/Kconfig
index 8b5f8e560eb4..1b62ad6d0e12 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -26,6 +26,9 @@ menuconfig BLOCK
 
 if BLOCK
 
+config BLK_RQ_ALLOC_TIME
+	bool
+
 config BLK_SCSI_REQUEST
 	bool
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index cf768d0c2950..004411236034 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -291,7 +291,7 @@ static inline bool blk_mq_need_time_stamp(struct request *rq)
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
-		unsigned int tag, unsigned int op)
+		unsigned int tag, unsigned int op, u64 alloc_time_ns)
 {
 	struct blk_mq_tags *tags = blk_mq_tags_from_data(data);
 	struct request *rq = tags->static_rqs[tag];
@@ -325,6 +325,9 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	RB_CLEAR_NODE(&rq->rb_node);
 	rq->rq_disk = NULL;
 	rq->part = NULL;
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+	rq->alloc_time_ns = alloc_time_ns;
+#endif
 	if (blk_mq_need_time_stamp(rq))
 		rq->start_time_ns = ktime_get_ns();
 	else
@@ -356,8 +359,14 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 	struct request *rq;
 	unsigned int tag;
 	bool clear_ctx_on_error = false;
+	u64 alloc_time_ns = 0;
 
 	blk_queue_enter_live(q);
+
+	/* alloc_time includes depth and tag waits */
+	if (blk_queue_rq_alloc_time(q))
+		alloc_time_ns = ktime_get_ns();
+
 	data->q = q;
 	if (likely(!data->ctx)) {
 		data->ctx = blk_mq_get_ctx(q);
@@ -393,7 +402,7 @@ static struct request *blk_mq_get_request(struct request_queue *q,
 		return NULL;
 	}
 
-	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags);
+	rq = blk_mq_rq_ctx_init(data, tag, data->cmd_flags, alloc_time_ns);
 	if (!op_is_flush(data->cmd_flags)) {
 		rq->elv.icq = NULL;
 		if (e && e->type->ops.prepare_request) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ac790178787..d0ad21e4771b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -194,7 +194,11 @@ struct request {
 
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
-	/* Time that I/O was submitted to the kernel. */
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+	/* Time that the first bio started allocating this request. */
+	u64 alloc_time_ns;
+#endif
+	/* Time that this request was allocated for this IO. */
 	u64 start_time_ns;
 	/* Time that I/O was submitted to the device. */
 	u64 io_start_time_ns;
@@ -609,6 +613,7 @@ struct request_queue {
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 #define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
+#define QUEUE_FLAG_RQ_ALLOC_TIME 27	/* record rq->alloc_time_ns */
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP))
@@ -637,6 +642,12 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 #define blk_queue_pci_p2pdma(q)	\
 	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+#define blk_queue_rq_alloc_time(q)	\
+	test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags)
+#else
+#define blk_queue_rq_alloc_time(q)	false
+#endif
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
-- 
cgit v1.2.3


From 7caa47151ab2e644dd221f741ec7578d9532c9a3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 28 Aug 2019 15:05:58 -0700
Subject: blkcg: implement blk-iocost

This patchset implements IO cost model based work-conserving
proportional controller.

While io.latency provides the capability to comprehensively prioritize
and protect IOs depending on the cgroups, its protection is binary -
the lowest latency target cgroup which is suffering is protected at
the cost of all others.  In many use cases including stacking multiple
workload containers in a single system, it's necessary to distribute
IO capacity with better granularity.

One challenge of controlling IO resources is the lack of trivially
observable cost metric.  The most common metrics - bandwidth and iops
- can be off by orders of magnitude depending on the device type and
IO pattern.  However, the cost isn't a complete mystery.  Given
several key attributes, we can make fairly reliable predictions on how
expensive a given stream of IOs would be, at least compared to other
IO patterns.

The function which determines the cost of a given IO is the IO cost
model for the device.  This controller distributes IO capacity based
on the costs estimated by such model.  The more accurate the cost
model the better but the controller adapts based on IO completion
latency and as long as the relative costs across differents IO
patterns are consistent and sensible, it'll adapt to the actual
performance of the device.

Currently, the only implemented cost model is a simple linear one with
a few sets of default parameters for different classes of device.
This covers most common devices reasonably well.  All the
infrastructure to tune and add different cost models is already in
place and a later patch will also allow using bpf progs for cost
models.

Please see the top comment in blk-iocost.c and documentation for
more details.

v2: Rebased on top of RQ_ALLOC_TIME changes and folded in Rik's fix
    for a divide-by-zero bug in current_hweight() triggered by zero
    inuse_sum.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andy Newell <newella@fb.com>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst |   94 ++
 block/Kconfig                           |   10 +
 block/Makefile                          |    1 +
 block/blk-iocost.c                      | 2371 +++++++++++++++++++++++++++++++
 block/blk-rq-qos.h                      |    3 +
 include/linux/blk_types.h               |    3 +
 include/trace/events/iocost.h           |  174 +++
 7 files changed, 2656 insertions(+)
 create mode 100644 block/blk-iocost.c
 create mode 100644 include/trace/events/iocost.h

(limited to 'include')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 3b29005aa981..1521c7e554f5 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1435,6 +1435,100 @@ IO Interface Files
 	  8:16 rbytes=1459200 wbytes=314773504 rios=192 wios=353 dbytes=0 dios=0
 	  8:0 rbytes=90430464 wbytes=299008000 rios=8950 wios=1252 dbytes=50331648 dios=3021
 
+  io.cost.qos
+	A read-write nested-keyed file with exists only on the root
+	cgroup.
+
+	This file configures the Quality of Service of the IO cost
+	model based controller (CONFIG_BLK_CGROUP_IOCOST) which
+	currently implements "io.weight" proportional control.  Lines
+	are keyed by $MAJ:$MIN device numbers and not ordered.  The
+	line for a given device is populated on the first write for
+	the device on "io.cost.qos" or "io.cost.model".  The following
+	nested keys are defined.
+
+	  ======	=====================================
+	  enable	Weight-based control enable
+	  ctrl		"auto" or "user"
+	  rpct		Read latency percentile    [0, 100]
+	  rlat		Read latency threshold
+	  wpct		Write latency percentile   [0, 100]
+	  wlat		Write latency threshold
+	  min		Minimum scaling percentage [1, 10000]
+	  max		Maximum scaling percentage [1, 10000]
+	  ======	=====================================
+
+	The controller is disabled by default and can be enabled by
+	setting "enable" to 1.  "rpct" and "wpct" parameters default
+	to zero and the controller uses internal device saturation
+	state to adjust the overall IO rate between "min" and "max".
+
+	When a better control quality is needed, latency QoS
+	parameters can be configured.  For example::
+
+	  8:16 enable=1 ctrl=auto rpct=95.00 rlat=75000 wpct=95.00 wlat=150000 min=50.00 max=150.0
+
+	shows that on sdb, the controller is enabled, will consider
+	the device saturated if the 95th percentile of read completion
+	latencies is above 75ms or write 150ms, and adjust the overall
+	IO issue rate between 50% and 150% accordingly.
+
+	The lower the saturation point, the better the latency QoS at
+	the cost of aggregate bandwidth.  The narrower the allowed
+	adjustment range between "min" and "max", the more conformant
+	to the cost model the IO behavior.  Note that the IO issue
+	base rate may be far off from 100% and setting "min" and "max"
+	blindly can lead to a significant loss of device capacity or
+	control quality.  "min" and "max" are useful for regulating
+	devices which show wide temporary behavior changes - e.g. a
+	ssd which accepts writes at the line speed for a while and
+	then completely stalls for multiple seconds.
+
+	When "ctrl" is "auto", the parameters are controlled by the
+	kernel and may change automatically.  Setting "ctrl" to "user"
+	or setting any of the percentile and latency parameters puts
+	it into "user" mode and disables the automatic changes.  The
+	automatic mode can be restored by setting "ctrl" to "auto".
+
+  io.cost.model
+	A read-write nested-keyed file with exists only on the root
+	cgroup.
+
+	This file configures the cost model of the IO cost model based
+	controller (CONFIG_BLK_CGROUP_IOCOST) which currently
+	implements "io.weight" proportional control.  Lines are keyed
+	by $MAJ:$MIN device numbers and not ordered.  The line for a
+	given device is populated on the first write for the device on
+	"io.cost.qos" or "io.cost.model".  The following nested keys
+	are defined.
+
+	  =====		================================
+	  ctrl		"auto" or "user"
+	  model		The cost model in use - "linear"
+	  =====		================================
+
+	When "ctrl" is "auto", the kernel may change all parameters
+	dynamically.  When "ctrl" is set to "user" or any other
+	parameters are written to, "ctrl" become "user" and the
+	automatic changes are disabled.
+
+	When "model" is "linear", the following model parameters are
+	defined.
+
+	  =============	========================================
+	  [r|w]bps	The maximum sequential IO throughput
+	  [r|w]seqiops	The maximum 4k sequential IOs per second
+	  [r|w]randiops	The maximum 4k random IOs per second
+	  =============	========================================
+
+	From the above, the builtin linear model determines the base
+	costs of a sequential and random IO and the cost coefficient
+	for the IO size.  While simple, this model can cover most
+	common device classes acceptably.
+
+	The IO cost model isn't expected to be accurate in absolute
+	sense and is scaled to the device behavior dynamically.
+
   io.weight
 	A read-write flat-keyed file which exists on non-root cgroups.
 	The default is "default 100".
diff --git a/block/Kconfig b/block/Kconfig
index 1b62ad6d0e12..41c0917ce622 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -135,6 +135,16 @@ config BLK_CGROUP_IOLATENCY
 
 	Note, this is an experimental interface and could be changed someday.
 
+config BLK_CGROUP_IOCOST
+	bool "Enable support for cost model based cgroup IO controller"
+	depends on BLK_CGROUP=y
+	select BLK_RQ_ALLOC_TIME
+	---help---
+	Enabling this option enables the .weight interface for cost
+	model based proportional IO control.  The IO controller
+	distributes IO capacity between different groups based on
+	their share of the overall weight distribution.
+
 config BLK_WBT_MQ
 	bool "Multiqueue writeback throttling"
 	default y
diff --git a/block/Makefile b/block/Makefile
index eee1b4ceecf9..9ef57ace90d4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_BLK_DEV_BSGLIB)	+= bsg-lib.o
 obj-$(CONFIG_BLK_CGROUP)	+= blk-cgroup.o
 obj-$(CONFIG_BLK_DEV_THROTTLING)	+= blk-throttle.o
 obj-$(CONFIG_BLK_CGROUP_IOLATENCY)	+= blk-iolatency.o
+obj-$(CONFIG_BLK_CGROUP_IOCOST)	+= blk-iocost.o
 obj-$(CONFIG_MQ_IOSCHED_DEADLINE)	+= mq-deadline.o
 obj-$(CONFIG_MQ_IOSCHED_KYBER)	+= kyber-iosched.o
 bfq-y				:= bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
new file mode 100644
index 000000000000..680815620095
--- /dev/null
+++ b/block/blk-iocost.c
@@ -0,0 +1,2371 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * IO cost model based controller.
+ *
+ * Copyright (C) 2019 Tejun Heo <tj@kernel.org>
+ * Copyright (C) 2019 Andy Newell <newella@fb.com>
+ * Copyright (C) 2019 Facebook
+ *
+ * One challenge of controlling IO resources is the lack of trivially
+ * observable cost metric.  This is distinguished from CPU and memory where
+ * wallclock time and the number of bytes can serve as accurate enough
+ * approximations.
+ *
+ * Bandwidth and iops are the most commonly used metrics for IO devices but
+ * depending on the type and specifics of the device, different IO patterns
+ * easily lead to multiple orders of magnitude variations rendering them
+ * useless for the purpose of IO capacity distribution.  While on-device
+ * time, with a lot of clutches, could serve as a useful approximation for
+ * non-queued rotational devices, this is no longer viable with modern
+ * devices, even the rotational ones.
+ *
+ * While there is no cost metric we can trivially observe, it isn't a
+ * complete mystery.  For example, on a rotational device, seek cost
+ * dominates while a contiguous transfer contributes a smaller amount
+ * proportional to the size.  If we can characterize at least the relative
+ * costs of these different types of IOs, it should be possible to
+ * implement a reasonable work-conserving proportional IO resource
+ * distribution.
+ *
+ * 1. IO Cost Model
+ *
+ * IO cost model estimates the cost of an IO given its basic parameters and
+ * history (e.g. the end sector of the last IO).  The cost is measured in
+ * device time.  If a given IO is estimated to cost 10ms, the device should
+ * be able to process ~100 of those IOs in a second.
+ *
+ * Currently, there's only one builtin cost model - linear.  Each IO is
+ * classified as sequential or random and given a base cost accordingly.
+ * On top of that, a size cost proportional to the length of the IO is
+ * added.  While simple, this model captures the operational
+ * characteristics of a wide varienty of devices well enough.  Default
+ * paramters for several different classes of devices are provided and the
+ * parameters can be configured from userspace via
+ * /sys/fs/cgroup/io.cost.model.
+ *
+ * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate
+ * device-specific coefficients.
+ *
+ * 2. Control Strategy
+ *
+ * The device virtual time (vtime) is used as the primary control metric.
+ * The control strategy is composed of the following three parts.
+ *
+ * 2-1. Vtime Distribution
+ *
+ * When a cgroup becomes active in terms of IOs, its hierarchical share is
+ * calculated.  Please consider the following hierarchy where the numbers
+ * inside parentheses denote the configured weights.
+ *
+ *           root
+ *         /       \
+ *      A (w:100)  B (w:300)
+ *      /       \
+ *  A0 (w:100)  A1 (w:100)
+ *
+ * If B is idle and only A0 and A1 are actively issuing IOs, as the two are
+ * of equal weight, each gets 50% share.  If then B starts issuing IOs, B
+ * gets 300/(100+300) or 75% share, and A0 and A1 equally splits the rest,
+ * 12.5% each.  The distribution mechanism only cares about these flattened
+ * shares.  They're called hweights (hierarchical weights) and always add
+ * upto 1 (HWEIGHT_WHOLE).
+ *
+ * A given cgroup's vtime runs slower in inverse proportion to its hweight.
+ * For example, with 12.5% weight, A0's time runs 8 times slower (100/12.5)
+ * against the device vtime - an IO which takes 10ms on the underlying
+ * device is considered to take 80ms on A0.
+ *
+ * This constitutes the basis of IO capacity distribution.  Each cgroup's
+ * vtime is running at a rate determined by its hweight.  A cgroup tracks
+ * the vtime consumed by past IOs and can issue a new IO iff doing so
+ * wouldn't outrun the current device vtime.  Otherwise, the IO is
+ * suspended until the vtime has progressed enough to cover it.
+ *
+ * 2-2. Vrate Adjustment
+ *
+ * It's unrealistic to expect the cost model to be perfect.  There are too
+ * many devices and even on the same device the overall performance
+ * fluctuates depending on numerous factors such as IO mixture and device
+ * internal garbage collection.  The controller needs to adapt dynamically.
+ *
+ * This is achieved by adjusting the overall IO rate according to how busy
+ * the device is.  If the device becomes overloaded, we're sending down too
+ * many IOs and should generally slow down.  If there are waiting issuers
+ * but the device isn't saturated, we're issuing too few and should
+ * generally speed up.
+ *
+ * To slow down, we lower the vrate - the rate at which the device vtime
+ * passes compared to the wall clock.  For example, if the vtime is running
+ * at the vrate of 75%, all cgroups added up would only be able to issue
+ * 750ms worth of IOs per second, and vice-versa for speeding up.
+ *
+ * Device business is determined using two criteria - rq wait and
+ * completion latencies.
+ *
+ * When a device gets saturated, the on-device and then the request queues
+ * fill up and a bio which is ready to be issued has to wait for a request
+ * to become available.  When this delay becomes noticeable, it's a clear
+ * indication that the device is saturated and we lower the vrate.  This
+ * saturation signal is fairly conservative as it only triggers when both
+ * hardware and software queues are filled up, and is used as the default
+ * busy signal.
+ *
+ * As devices can have deep queues and be unfair in how the queued commands
+ * are executed, soley depending on rq wait may not result in satisfactory
+ * control quality.  For a better control quality, completion latency QoS
+ * parameters can be configured so that the device is considered saturated
+ * if N'th percentile completion latency rises above the set point.
+ *
+ * The completion latency requirements are a function of both the
+ * underlying device characteristics and the desired IO latency quality of
+ * service.  There is an inherent trade-off - the tighter the latency QoS,
+ * the higher the bandwidth lossage.  Latency QoS is disabled by default
+ * and can be set through /sys/fs/cgroup/io.cost.qos.
+ *
+ * 2-3. Work Conservation
+ *
+ * Imagine two cgroups A and B with equal weights.  A is issuing a small IO
+ * periodically while B is sending out enough parallel IOs to saturate the
+ * device on its own.  Let's say A's usage amounts to 100ms worth of IO
+ * cost per second, i.e., 10% of the device capacity.  The naive
+ * distribution of half and half would lead to 60% utilization of the
+ * device, a significant reduction in the total amount of work done
+ * compared to free-for-all competition.  This is too high a cost to pay
+ * for IO control.
+ *
+ * To conserve the total amount of work done, we keep track of how much
+ * each active cgroup is actually using and yield part of its weight if
+ * there are other cgroups which can make use of it.  In the above case,
+ * A's weight will be lowered so that it hovers above the actual usage and
+ * B would be able to use the rest.
+ *
+ * As we don't want to penalize a cgroup for donating its weight, the
+ * surplus weight adjustment factors in a margin and has an immediate
+ * snapback mechanism in case the cgroup needs more IO vtime for itself.
+ *
+ * Note that adjusting down surplus weights has the same effects as
+ * accelerating vtime for other cgroups and work conservation can also be
+ * implemented by adjusting vrate dynamically.  However, squaring who can
+ * donate and should take back how much requires hweight propagations
+ * anyway making it easier to implement and understand as a separate
+ * mechanism.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/time64.h>
+#include <linux/parser.h>
+#include <linux/sched/signal.h>
+#include <linux/blk-cgroup.h>
+#include "blk-rq-qos.h"
+#include "blk-stat.h"
+#include "blk-wbt.h"
+
+#ifdef CONFIG_TRACEPOINTS
+
+/* copied from TRACE_CGROUP_PATH, see cgroup-internal.h */
+#define TRACE_IOCG_PATH_LEN 1024
+static DEFINE_SPINLOCK(trace_iocg_path_lock);
+static char trace_iocg_path[TRACE_IOCG_PATH_LEN];
+
+#define TRACE_IOCG_PATH(type, iocg, ...)					\
+	do {									\
+		unsigned long flags;						\
+		if (trace_iocost_##type##_enabled()) {				\
+			spin_lock_irqsave(&trace_iocg_path_lock, flags);	\
+			cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup,	\
+				    trace_iocg_path, TRACE_IOCG_PATH_LEN);	\
+			trace_iocost_##type(iocg, trace_iocg_path,		\
+					      ##__VA_ARGS__);			\
+			spin_unlock_irqrestore(&trace_iocg_path_lock, flags);	\
+		}								\
+	} while (0)
+
+#else	/* CONFIG_TRACE_POINTS */
+#define TRACE_IOCG_PATH(type, iocg, ...)	do { } while (0)
+#endif	/* CONFIG_TRACE_POINTS */
+
+enum {
+	MILLION			= 1000000,
+
+	/* timer period is calculated from latency requirements, bound it */
+	MIN_PERIOD		= USEC_PER_MSEC,
+	MAX_PERIOD		= USEC_PER_SEC,
+
+	/*
+	 * A cgroup's vtime can run 50% behind the device vtime, which
+	 * serves as its IO credit buffer.  Surplus weight adjustment is
+	 * immediately canceled if the vtime margin runs below 10%.
+	 */
+	MARGIN_PCT		= 50,
+	INUSE_MARGIN_PCT	= 10,
+
+	/* Have some play in waitq timer operations */
+	WAITQ_TIMER_MARGIN_PCT	= 5,
+
+	/*
+	 * vtime can wrap well within a reasonable uptime when vrate is
+	 * consistently raised.  Don't trust recorded cgroup vtime if the
+	 * period counter indicates that it's older than 5mins.
+	 */
+	VTIME_VALID_DUR		= 300 * USEC_PER_SEC,
+
+	/*
+	 * Remember the past three non-zero usages and use the max for
+	 * surplus calculation.  Three slots guarantee that we remember one
+	 * full period usage from the last active stretch even after
+	 * partial deactivation and re-activation periods.  Don't start
+	 * giving away weight before collecting two data points to prevent
+	 * hweight adjustments based on one partial activation period.
+	 */
+	NR_USAGE_SLOTS		= 3,
+	MIN_VALID_USAGES	= 2,
+
+	/* 1/64k is granular enough and can easily be handled w/ u32 */
+	HWEIGHT_WHOLE		= 1 << 16,
+
+	/*
+	 * As vtime is used to calculate the cost of each IO, it needs to
+	 * be fairly high precision.  For example, it should be able to
+	 * represent the cost of a single page worth of discard with
+	 * suffificient accuracy.  At the same time, it should be able to
+	 * represent reasonably long enough durations to be useful and
+	 * convenient during operation.
+	 *
+	 * 1s worth of vtime is 2^37.  This gives us both sub-nanosecond
+	 * granularity and days of wrap-around time even at extreme vrates.
+	 */
+	VTIME_PER_SEC_SHIFT	= 37,
+	VTIME_PER_SEC		= 1LLU << VTIME_PER_SEC_SHIFT,
+	VTIME_PER_USEC		= VTIME_PER_SEC / USEC_PER_SEC,
+
+	/* bound vrate adjustments within two orders of magnitude */
+	VRATE_MIN_PPM		= 10000,	/* 1% */
+	VRATE_MAX_PPM		= 100000000,	/* 10000% */
+
+	VRATE_MIN		= VTIME_PER_USEC * VRATE_MIN_PPM / MILLION,
+	VRATE_CLAMP_ADJ_PCT	= 4,
+
+	/* if IOs end up waiting for requests, issue less */
+	RQ_WAIT_BUSY_PCT	= 5,
+
+	/* unbusy hysterisis */
+	UNBUSY_THR_PCT		= 75,
+
+	/* don't let cmds which take a very long time pin lagging for too long */
+	MAX_LAGGING_PERIODS	= 10,
+
+	/*
+	 * If usage% * 1.25 + 2% is lower than hweight% by more than 3%,
+	 * donate the surplus.
+	 */
+	SURPLUS_SCALE_PCT	= 125,			/* * 125% */
+	SURPLUS_SCALE_ABS	= HWEIGHT_WHOLE / 50,	/* + 2% */
+	SURPLUS_MIN_ADJ_DELTA	= HWEIGHT_WHOLE / 33,	/* 3% */
+
+	/* switch iff the conditions are met for longer than this */
+	AUTOP_CYCLE_NSEC	= 10LLU * NSEC_PER_SEC,
+
+	/*
+	 * Count IO size in 4k pages.  The 12bit shift helps keeping
+	 * size-proportional components of cost calculation in closer
+	 * numbers of digits to per-IO cost components.
+	 */
+	IOC_PAGE_SHIFT		= 12,
+	IOC_PAGE_SIZE		= 1 << IOC_PAGE_SHIFT,
+	IOC_SECT_TO_PAGE_SHIFT	= IOC_PAGE_SHIFT - SECTOR_SHIFT,
+
+	/* if apart further than 16M, consider randio for linear model */
+	LCOEF_RANDIO_PAGES	= 4096,
+};
+
+enum ioc_running {
+	IOC_IDLE,
+	IOC_RUNNING,
+	IOC_STOP,
+};
+
+/* io.cost.qos controls including per-dev enable of the whole controller */
+enum {
+	QOS_ENABLE,
+	QOS_CTRL,
+	NR_QOS_CTRL_PARAMS,
+};
+
+/* io.cost.qos params */
+enum {
+	QOS_RPPM,
+	QOS_RLAT,
+	QOS_WPPM,
+	QOS_WLAT,
+	QOS_MIN,
+	QOS_MAX,
+	NR_QOS_PARAMS,
+};
+
+/* io.cost.model controls */
+enum {
+	COST_CTRL,
+	COST_MODEL,
+	NR_COST_CTRL_PARAMS,
+};
+
+/* builtin linear cost model coefficients */
+enum {
+	I_LCOEF_RBPS,
+	I_LCOEF_RSEQIOPS,
+	I_LCOEF_RRANDIOPS,
+	I_LCOEF_WBPS,
+	I_LCOEF_WSEQIOPS,
+	I_LCOEF_WRANDIOPS,
+	NR_I_LCOEFS,
+};
+
+enum {
+	LCOEF_RPAGE,
+	LCOEF_RSEQIO,
+	LCOEF_RRANDIO,
+	LCOEF_WPAGE,
+	LCOEF_WSEQIO,
+	LCOEF_WRANDIO,
+	NR_LCOEFS,
+};
+
+enum {
+	AUTOP_INVALID,
+	AUTOP_HDD,
+	AUTOP_SSD_QD1,
+	AUTOP_SSD_DFL,
+	AUTOP_SSD_FAST,
+};
+
+struct ioc_gq;
+
+struct ioc_params {
+	u32				qos[NR_QOS_PARAMS];
+	u64				i_lcoefs[NR_I_LCOEFS];
+	u64				lcoefs[NR_LCOEFS];
+	u32				too_fast_vrate_pct;
+	u32				too_slow_vrate_pct;
+};
+
+struct ioc_missed {
+	u32				nr_met;
+	u32				nr_missed;
+	u32				last_met;
+	u32				last_missed;
+};
+
+struct ioc_pcpu_stat {
+	struct ioc_missed		missed[2];
+
+	u64				rq_wait_ns;
+	u64				last_rq_wait_ns;
+};
+
+/* per device */
+struct ioc {
+	struct rq_qos			rqos;
+
+	bool				enabled;
+
+	struct ioc_params		params;
+	u32				period_us;
+	u32				margin_us;
+	u64				vrate_min;
+	u64				vrate_max;
+
+	spinlock_t			lock;
+	struct timer_list		timer;
+	struct list_head		active_iocgs;	/* active cgroups */
+	struct ioc_pcpu_stat __percpu	*pcpu_stat;
+
+	enum ioc_running		running;
+	atomic64_t			vtime_rate;
+
+	seqcount_t			period_seqcount;
+	u32				period_at;	/* wallclock starttime */
+	u64				period_at_vtime; /* vtime starttime */
+
+	atomic64_t			cur_period;	/* inc'd each period */
+	int				busy_level;	/* saturation history */
+
+	u64				inuse_margin_vtime;
+	bool				weights_updated;
+	atomic_t			hweight_gen;	/* for lazy hweights */
+
+	u64				autop_too_fast_at;
+	u64				autop_too_slow_at;
+	int				autop_idx;
+	bool				user_qos_params:1;
+	bool				user_cost_model:1;
+};
+
+/* per device-cgroup pair */
+struct ioc_gq {
+	struct blkg_policy_data		pd;
+	struct ioc			*ioc;
+
+	/*
+	 * A iocg can get its weight from two sources - an explicit
+	 * per-device-cgroup configuration or the default weight of the
+	 * cgroup.  `cfg_weight` is the explicit per-device-cgroup
+	 * configuration.  `weight` is the effective considering both
+	 * sources.
+	 *
+	 * When an idle cgroup becomes active its `active` goes from 0 to
+	 * `weight`.  `inuse` is the surplus adjusted active weight.
+	 * `active` and `inuse` are used to calculate `hweight_active` and
+	 * `hweight_inuse`.
+	 *
+	 * `last_inuse` remembers `inuse` while an iocg is idle to persist
+	 * surplus adjustments.
+	 */
+	u32				cfg_weight;
+	u32				weight;
+	u32				active;
+	u32				inuse;
+	u32				last_inuse;
+
+	sector_t			cursor;		/* to detect randio */
+
+	/*
+	 * `vtime` is this iocg's vtime cursor which progresses as IOs are
+	 * issued.  If lagging behind device vtime, the delta represents
+	 * the currently available IO budget.  If runnning ahead, the
+	 * overage.
+	 *
+	 * `vtime_done` is the same but progressed on completion rather
+	 * than issue.  The delta behind `vtime` represents the cost of
+	 * currently in-flight IOs.
+	 *
+	 * `last_vtime` is used to remember `vtime` at the end of the last
+	 * period to calculate utilization.
+	 */
+	atomic64_t			vtime;
+	atomic64_t			done_vtime;
+	u64				last_vtime;
+
+	/*
+	 * The period this iocg was last active in.  Used for deactivation
+	 * and invalidating `vtime`.
+	 */
+	atomic64_t			active_period;
+	struct list_head		active_list;
+
+	/* see __propagate_active_weight() and current_hweight() for details */
+	u64				child_active_sum;
+	u64				child_inuse_sum;
+	int				hweight_gen;
+	u32				hweight_active;
+	u32				hweight_inuse;
+	bool				has_surplus;
+
+	struct wait_queue_head		waitq;
+	struct hrtimer			waitq_timer;
+	struct hrtimer			delay_timer;
+
+	/* usage is recorded as fractions of HWEIGHT_WHOLE */
+	int				usage_idx;
+	u32				usages[NR_USAGE_SLOTS];
+
+	/* this iocg's depth in the hierarchy and ancestors including self */
+	int				level;
+	struct ioc_gq			*ancestors[];
+};
+
+/* per cgroup */
+struct ioc_cgrp {
+	struct blkcg_policy_data	cpd;
+	unsigned int			dfl_weight;
+};
+
+struct ioc_now {
+	u64				now_ns;
+	u32				now;
+	u64				vnow;
+	u64				vrate;
+};
+
+struct iocg_wait {
+	struct wait_queue_entry		wait;
+	struct bio			*bio;
+	u64				abs_cost;
+	bool				committed;
+};
+
+struct iocg_wake_ctx {
+	struct ioc_gq			*iocg;
+	u32				hw_inuse;
+	s64				vbudget;
+};
+
+static const struct ioc_params autop[] = {
+	[AUTOP_HDD] = {
+		.qos				= {
+			[QOS_RLAT]		=         50000, /* 50ms */
+			[QOS_WLAT]		=         50000,
+			[QOS_MIN]		= VRATE_MIN_PPM,
+			[QOS_MAX]		= VRATE_MAX_PPM,
+		},
+		.i_lcoefs			= {
+			[I_LCOEF_RBPS]		=     174019176,
+			[I_LCOEF_RSEQIOPS]	=         41708,
+			[I_LCOEF_RRANDIOPS]	=           370,
+			[I_LCOEF_WBPS]		=     178075866,
+			[I_LCOEF_WSEQIOPS]	=         42705,
+			[I_LCOEF_WRANDIOPS]	=           378,
+		},
+	},
+	[AUTOP_SSD_QD1] = {
+		.qos				= {
+			[QOS_RLAT]		=         25000, /* 25ms */
+			[QOS_WLAT]		=         25000,
+			[QOS_MIN]		= VRATE_MIN_PPM,
+			[QOS_MAX]		= VRATE_MAX_PPM,
+		},
+		.i_lcoefs			= {
+			[I_LCOEF_RBPS]		=     245855193,
+			[I_LCOEF_RSEQIOPS]	=         61575,
+			[I_LCOEF_RRANDIOPS]	=          6946,
+			[I_LCOEF_WBPS]		=     141365009,
+			[I_LCOEF_WSEQIOPS]	=         33716,
+			[I_LCOEF_WRANDIOPS]	=         26796,
+		},
+	},
+	[AUTOP_SSD_DFL] = {
+		.qos				= {
+			[QOS_RLAT]		=         25000, /* 25ms */
+			[QOS_WLAT]		=         25000,
+			[QOS_MIN]		= VRATE_MIN_PPM,
+			[QOS_MAX]		= VRATE_MAX_PPM,
+		},
+		.i_lcoefs			= {
+			[I_LCOEF_RBPS]		=     488636629,
+			[I_LCOEF_RSEQIOPS]	=          8932,
+			[I_LCOEF_RRANDIOPS]	=          8518,
+			[I_LCOEF_WBPS]		=     427891549,
+			[I_LCOEF_WSEQIOPS]	=         28755,
+			[I_LCOEF_WRANDIOPS]	=         21940,
+		},
+		.too_fast_vrate_pct		=           500,
+	},
+	[AUTOP_SSD_FAST] = {
+		.qos				= {
+			[QOS_RLAT]		=          5000, /* 5ms */
+			[QOS_WLAT]		=          5000,
+			[QOS_MIN]		= VRATE_MIN_PPM,
+			[QOS_MAX]		= VRATE_MAX_PPM,
+		},
+		.i_lcoefs			= {
+			[I_LCOEF_RBPS]		=    3102524156LLU,
+			[I_LCOEF_RSEQIOPS]	=        724816,
+			[I_LCOEF_RRANDIOPS]	=        778122,
+			[I_LCOEF_WBPS]		=    1742780862LLU,
+			[I_LCOEF_WSEQIOPS]	=        425702,
+			[I_LCOEF_WRANDIOPS]	=	 443193,
+		},
+		.too_slow_vrate_pct		=            10,
+	},
+};
+
+/*
+ * vrate adjust percentages indexed by ioc->busy_level.  We adjust up on
+ * vtime credit shortage and down on device saturation.
+ */
+static u32 vrate_adj_pct[] =
+	{ 0, 0, 0, 0,
+	  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+	  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+	  4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16 };
+
+static struct blkcg_policy blkcg_policy_iocost;
+
+/* accessors and helpers */
+static struct ioc *rqos_to_ioc(struct rq_qos *rqos)
+{
+	return container_of(rqos, struct ioc, rqos);
+}
+
+static struct ioc *q_to_ioc(struct request_queue *q)
+{
+	return rqos_to_ioc(rq_qos_id(q, RQ_QOS_COST));
+}
+
+static const char *q_name(struct request_queue *q)
+{
+	if (test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
+		return kobject_name(q->kobj.parent);
+	else
+		return "<unknown>";
+}
+
+static const char __maybe_unused *ioc_name(struct ioc *ioc)
+{
+	return q_name(ioc->rqos.q);
+}
+
+static struct ioc_gq *pd_to_iocg(struct blkg_policy_data *pd)
+{
+	return pd ? container_of(pd, struct ioc_gq, pd) : NULL;
+}
+
+static struct ioc_gq *blkg_to_iocg(struct blkcg_gq *blkg)
+{
+	return pd_to_iocg(blkg_to_pd(blkg, &blkcg_policy_iocost));
+}
+
+static struct blkcg_gq *iocg_to_blkg(struct ioc_gq *iocg)
+{
+	return pd_to_blkg(&iocg->pd);
+}
+
+static struct ioc_cgrp *blkcg_to_iocc(struct blkcg *blkcg)
+{
+	return container_of(blkcg_to_cpd(blkcg, &blkcg_policy_iocost),
+			    struct ioc_cgrp, cpd);
+}
+
+/*
+ * Scale @abs_cost to the inverse of @hw_inuse.  The lower the hierarchical
+ * weight, the more expensive each IO.
+ */
+static u64 abs_cost_to_cost(u64 abs_cost, u32 hw_inuse)
+{
+	return DIV64_U64_ROUND_UP(abs_cost * HWEIGHT_WHOLE, hw_inuse);
+}
+
+static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, u64 cost)
+{
+	bio->bi_iocost_cost = cost;
+	atomic64_add(cost, &iocg->vtime);
+}
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/iocost.h>
+
+/* latency Qos params changed, update period_us and all the dependent params */
+static void ioc_refresh_period_us(struct ioc *ioc)
+{
+	u32 ppm, lat, multi, period_us;
+
+	lockdep_assert_held(&ioc->lock);
+
+	/* pick the higher latency target */
+	if (ioc->params.qos[QOS_RLAT] >= ioc->params.qos[QOS_WLAT]) {
+		ppm = ioc->params.qos[QOS_RPPM];
+		lat = ioc->params.qos[QOS_RLAT];
+	} else {
+		ppm = ioc->params.qos[QOS_WPPM];
+		lat = ioc->params.qos[QOS_WLAT];
+	}
+
+	/*
+	 * We want the period to be long enough to contain a healthy number
+	 * of IOs while short enough for granular control.  Define it as a
+	 * multiple of the latency target.  Ideally, the multiplier should
+	 * be scaled according to the percentile so that it would nominally
+	 * contain a certain number of requests.  Let's be simpler and
+	 * scale it linearly so that it's 2x >= pct(90) and 10x at pct(50).
+	 */
+	if (ppm)
+		multi = max_t(u32, (MILLION - ppm) / 50000, 2);
+	else
+		multi = 2;
+	period_us = multi * lat;
+	period_us = clamp_t(u32, period_us, MIN_PERIOD, MAX_PERIOD);
+
+	/* calculate dependent params */
+	ioc->period_us = period_us;
+	ioc->margin_us = period_us * MARGIN_PCT / 100;
+	ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP(
+			period_us * VTIME_PER_USEC * INUSE_MARGIN_PCT, 100);
+}
+
+static int ioc_autop_idx(struct ioc *ioc)
+{
+	int idx = ioc->autop_idx;
+	const struct ioc_params *p = &autop[idx];
+	u32 vrate_pct;
+	u64 now_ns;
+
+	/* rotational? */
+	if (!blk_queue_nonrot(ioc->rqos.q))
+		return AUTOP_HDD;
+
+	/* handle SATA SSDs w/ broken NCQ */
+	if (blk_queue_depth(ioc->rqos.q) == 1)
+		return AUTOP_SSD_QD1;
+
+	/* use one of the normal ssd sets */
+	if (idx < AUTOP_SSD_DFL)
+		return AUTOP_SSD_DFL;
+
+	/* if user is overriding anything, maintain what was there */
+	if (ioc->user_qos_params || ioc->user_cost_model)
+		return idx;
+
+	/* step up/down based on the vrate */
+	vrate_pct = div64_u64(atomic64_read(&ioc->vtime_rate) * 100,
+			      VTIME_PER_USEC);
+	now_ns = ktime_get_ns();
+
+	if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) {
+		if (!ioc->autop_too_fast_at)
+			ioc->autop_too_fast_at = now_ns;
+		if (now_ns - ioc->autop_too_fast_at >= AUTOP_CYCLE_NSEC)
+			return idx + 1;
+	} else {
+		ioc->autop_too_fast_at = 0;
+	}
+
+	if (p->too_slow_vrate_pct && p->too_slow_vrate_pct >= vrate_pct) {
+		if (!ioc->autop_too_slow_at)
+			ioc->autop_too_slow_at = now_ns;
+		if (now_ns - ioc->autop_too_slow_at >= AUTOP_CYCLE_NSEC)
+			return idx - 1;
+	} else {
+		ioc->autop_too_slow_at = 0;
+	}
+
+	return idx;
+}
+
+/*
+ * Take the followings as input
+ *
+ *  @bps	maximum sequential throughput
+ *  @seqiops	maximum sequential 4k iops
+ *  @randiops	maximum random 4k iops
+ *
+ * and calculate the linear model cost coefficients.
+ *
+ *  *@page	per-page cost		1s / (@bps / 4096)
+ *  *@seqio	base cost of a seq IO	max((1s / @seqiops) - *@page, 0)
+ *  @randiops	base cost of a rand IO	max((1s / @randiops) - *@page, 0)
+ */
+static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops,
+			u64 *page, u64 *seqio, u64 *randio)
+{
+	u64 v;
+
+	*page = *seqio = *randio = 0;
+
+	if (bps)
+		*page = DIV64_U64_ROUND_UP(VTIME_PER_SEC,
+					   DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE));
+
+	if (seqiops) {
+		v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops);
+		if (v > *page)
+			*seqio = v - *page;
+	}
+
+	if (randiops) {
+		v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, randiops);
+		if (v > *page)
+			*randio = v - *page;
+	}
+}
+
+static void ioc_refresh_lcoefs(struct ioc *ioc)
+{
+	u64 *u = ioc->params.i_lcoefs;
+	u64 *c = ioc->params.lcoefs;
+
+	calc_lcoefs(u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
+		    &c[LCOEF_RPAGE], &c[LCOEF_RSEQIO], &c[LCOEF_RRANDIO]);
+	calc_lcoefs(u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS],
+		    &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]);
+}
+
+static bool ioc_refresh_params(struct ioc *ioc, bool force)
+{
+	const struct ioc_params *p;
+	int idx;
+
+	lockdep_assert_held(&ioc->lock);
+
+	idx = ioc_autop_idx(ioc);
+	p = &autop[idx];
+
+	if (idx == ioc->autop_idx && !force)
+		return false;
+
+	if (idx != ioc->autop_idx)
+		atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
+
+	ioc->autop_idx = idx;
+	ioc->autop_too_fast_at = 0;
+	ioc->autop_too_slow_at = 0;
+
+	if (!ioc->user_qos_params)
+		memcpy(ioc->params.qos, p->qos, sizeof(p->qos));
+	if (!ioc->user_cost_model)
+		memcpy(ioc->params.i_lcoefs, p->i_lcoefs, sizeof(p->i_lcoefs));
+
+	ioc_refresh_period_us(ioc);
+	ioc_refresh_lcoefs(ioc);
+
+	ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] *
+					    VTIME_PER_USEC, MILLION);
+	ioc->vrate_max = div64_u64((u64)ioc->params.qos[QOS_MAX] *
+				   VTIME_PER_USEC, MILLION);
+
+	return true;
+}
+
+/* take a snapshot of the current [v]time and vrate */
+static void ioc_now(struct ioc *ioc, struct ioc_now *now)
+{
+	unsigned seq;
+
+	now->now_ns = ktime_get();
+	now->now = ktime_to_us(now->now_ns);
+	now->vrate = atomic64_read(&ioc->vtime_rate);
+
+	/*
+	 * The current vtime is
+	 *
+	 *   vtime at period start + (wallclock time since the start) * vrate
+	 *
+	 * As a consistent snapshot of `period_at_vtime` and `period_at` is
+	 * needed, they're seqcount protected.
+	 */
+	do {
+		seq = read_seqcount_begin(&ioc->period_seqcount);
+		now->vnow = ioc->period_at_vtime +
+			(now->now - ioc->period_at) * now->vrate;
+	} while (read_seqcount_retry(&ioc->period_seqcount, seq));
+}
+
+static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
+{
+	lockdep_assert_held(&ioc->lock);
+	WARN_ON_ONCE(ioc->running != IOC_RUNNING);
+
+	write_seqcount_begin(&ioc->period_seqcount);
+	ioc->period_at = now->now;
+	ioc->period_at_vtime = now->vnow;
+	write_seqcount_end(&ioc->period_seqcount);
+
+	ioc->timer.expires = jiffies + usecs_to_jiffies(ioc->period_us);
+	add_timer(&ioc->timer);
+}
+
+/*
+ * Update @iocg's `active` and `inuse` to @active and @inuse, update level
+ * weight sums and propagate upwards accordingly.
+ */
+static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+{
+	struct ioc *ioc = iocg->ioc;
+	int lvl;
+
+	lockdep_assert_held(&ioc->lock);
+
+	inuse = min(active, inuse);
+
+	for (lvl = iocg->level - 1; lvl >= 0; lvl--) {
+		struct ioc_gq *parent = iocg->ancestors[lvl];
+		struct ioc_gq *child = iocg->ancestors[lvl + 1];
+		u32 parent_active = 0, parent_inuse = 0;
+
+		/* update the level sums */
+		parent->child_active_sum += (s32)(active - child->active);
+		parent->child_inuse_sum += (s32)(inuse - child->inuse);
+		/* apply the udpates */
+		child->active = active;
+		child->inuse = inuse;
+
+		/*
+		 * The delta between inuse and active sums indicates that
+		 * that much of weight is being given away.  Parent's inuse
+		 * and active should reflect the ratio.
+		 */
+		if (parent->child_active_sum) {
+			parent_active = parent->weight;
+			parent_inuse = DIV64_U64_ROUND_UP(
+				parent_active * parent->child_inuse_sum,
+				parent->child_active_sum);
+		}
+
+		/* do we need to keep walking up? */
+		if (parent_active == parent->active &&
+		    parent_inuse == parent->inuse)
+			break;
+
+		active = parent_active;
+		inuse = parent_inuse;
+	}
+
+	ioc->weights_updated = true;
+}
+
+static void commit_active_weights(struct ioc *ioc)
+{
+	lockdep_assert_held(&ioc->lock);
+
+	if (ioc->weights_updated) {
+		/* paired with rmb in current_hweight(), see there */
+		smp_wmb();
+		atomic_inc(&ioc->hweight_gen);
+		ioc->weights_updated = false;
+	}
+}
+
+static void propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+{
+	__propagate_active_weight(iocg, active, inuse);
+	commit_active_weights(iocg->ioc);
+}
+
+static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep)
+{
+	struct ioc *ioc = iocg->ioc;
+	int lvl;
+	u32 hwa, hwi;
+	int ioc_gen;
+
+	/* hot path - if uptodate, use cached */
+	ioc_gen = atomic_read(&ioc->hweight_gen);
+	if (ioc_gen == iocg->hweight_gen)
+		goto out;
+
+	/*
+	 * Paired with wmb in commit_active_weights().  If we saw the
+	 * updated hweight_gen, all the weight updates from
+	 * __propagate_active_weight() are visible too.
+	 *
+	 * We can race with weight updates during calculation and get it
+	 * wrong.  However, hweight_gen would have changed and a future
+	 * reader will recalculate and we're guaranteed to discard the
+	 * wrong result soon.
+	 */
+	smp_rmb();
+
+	hwa = hwi = HWEIGHT_WHOLE;
+	for (lvl = 0; lvl <= iocg->level - 1; lvl++) {
+		struct ioc_gq *parent = iocg->ancestors[lvl];
+		struct ioc_gq *child = iocg->ancestors[lvl + 1];
+		u32 active_sum = READ_ONCE(parent->child_active_sum);
+		u32 inuse_sum = READ_ONCE(parent->child_inuse_sum);
+		u32 active = READ_ONCE(child->active);
+		u32 inuse = READ_ONCE(child->inuse);
+
+		/* we can race with deactivations and either may read as zero */
+		if (!active_sum || !inuse_sum)
+			continue;
+
+		active_sum = max(active, active_sum);
+		hwa = hwa * active / active_sum;	/* max 16bits * 10000 */
+
+		inuse_sum = max(inuse, inuse_sum);
+		hwi = hwi * inuse / inuse_sum;		/* max 16bits * 10000 */
+	}
+
+	iocg->hweight_active = max_t(u32, hwa, 1);
+	iocg->hweight_inuse = max_t(u32, hwi, 1);
+	iocg->hweight_gen = ioc_gen;
+out:
+	if (hw_activep)
+		*hw_activep = iocg->hweight_active;
+	if (hw_inusep)
+		*hw_inusep = iocg->hweight_inuse;
+}
+
+static void weight_updated(struct ioc_gq *iocg)
+{
+	struct ioc *ioc = iocg->ioc;
+	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
+	struct ioc_cgrp *iocc = blkcg_to_iocc(blkg->blkcg);
+	u32 weight;
+
+	lockdep_assert_held(&ioc->lock);
+
+	weight = iocg->cfg_weight ?: iocc->dfl_weight;
+	if (weight != iocg->weight && iocg->active)
+		propagate_active_weight(iocg, weight,
+			DIV64_U64_ROUND_UP(iocg->inuse * weight, iocg->weight));
+	iocg->weight = weight;
+}
+
+static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
+{
+	struct ioc *ioc = iocg->ioc;
+	u64 last_period, cur_period, max_period_delta;
+	u64 vtime, vmargin, vmin;
+	int i;
+
+	/*
+	 * If seem to be already active, just update the stamp to tell the
+	 * timer that we're still active.  We don't mind occassional races.
+	 */
+	if (!list_empty(&iocg->active_list)) {
+		ioc_now(ioc, now);
+		cur_period = atomic64_read(&ioc->cur_period);
+		if (atomic64_read(&iocg->active_period) != cur_period)
+			atomic64_set(&iocg->active_period, cur_period);
+		return true;
+	}
+
+	/* racy check on internal node IOs, treat as root level IOs */
+	if (iocg->child_active_sum)
+		return false;
+
+	spin_lock_irq(&ioc->lock);
+
+	ioc_now(ioc, now);
+
+	/* update period */
+	cur_period = atomic64_read(&ioc->cur_period);
+	last_period = atomic64_read(&iocg->active_period);
+	atomic64_set(&iocg->active_period, cur_period);
+
+	/* already activated or breaking leaf-only constraint? */
+	for (i = iocg->level; i > 0; i--)
+		if (!list_empty(&iocg->active_list))
+			goto fail_unlock;
+	if (iocg->child_active_sum)
+		goto fail_unlock;
+
+	/*
+	 * vtime may wrap when vrate is raised substantially due to
+	 * underestimated IO costs.  Look at the period and ignore its
+	 * vtime if the iocg has been idle for too long.  Also, cap the
+	 * budget it can start with to the margin.
+	 */
+	max_period_delta = DIV64_U64_ROUND_UP(VTIME_VALID_DUR, ioc->period_us);
+	vtime = atomic64_read(&iocg->vtime);
+	vmargin = ioc->margin_us * now->vrate;
+	vmin = now->vnow - vmargin;
+
+	if (last_period + max_period_delta < cur_period ||
+	    time_before64(vtime, vmin)) {
+		atomic64_add(vmin - vtime, &iocg->vtime);
+		atomic64_add(vmin - vtime, &iocg->done_vtime);
+		vtime = vmin;
+	}
+
+	/*
+	 * Activate, propagate weight and start period timer if not
+	 * running.  Reset hweight_gen to avoid accidental match from
+	 * wrapping.
+	 */
+	iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
+	list_add(&iocg->active_list, &ioc->active_iocgs);
+	propagate_active_weight(iocg, iocg->weight,
+				iocg->last_inuse ?: iocg->weight);
+
+	TRACE_IOCG_PATH(iocg_activate, iocg, now,
+			last_period, cur_period, vtime);
+
+	iocg->last_vtime = vtime;
+
+	if (ioc->running == IOC_IDLE) {
+		ioc->running = IOC_RUNNING;
+		ioc_start_period(ioc, now);
+	}
+
+	spin_unlock_irq(&ioc->lock);
+	return true;
+
+fail_unlock:
+	spin_unlock_irq(&ioc->lock);
+	return false;
+}
+
+static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
+			int flags, void *key)
+{
+	struct iocg_wait *wait = container_of(wq_entry, struct iocg_wait, wait);
+	struct iocg_wake_ctx *ctx = (struct iocg_wake_ctx *)key;
+	u64 cost = abs_cost_to_cost(wait->abs_cost, ctx->hw_inuse);
+
+	ctx->vbudget -= cost;
+
+	if (ctx->vbudget < 0)
+		return -1;
+
+	iocg_commit_bio(ctx->iocg, wait->bio, cost);
+
+	/*
+	 * autoremove_wake_function() removes the wait entry only when it
+	 * actually changed the task state.  We want the wait always
+	 * removed.  Remove explicitly and use default_wake_function().
+	 */
+	list_del_init(&wq_entry->entry);
+	wait->committed = true;
+
+	default_wake_function(wq_entry, mode, flags, key);
+	return 0;
+}
+
+static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
+{
+	struct ioc *ioc = iocg->ioc;
+	struct iocg_wake_ctx ctx = { .iocg = iocg };
+	u64 margin_ns = (u64)(ioc->period_us *
+			      WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
+	u64 vshortage, expires, oexpires;
+
+	lockdep_assert_held(&iocg->waitq.lock);
+
+	/*
+	 * Wake up the ones which are due and see how much vtime we'll need
+	 * for the next one.
+	 */
+	current_hweight(iocg, NULL, &ctx.hw_inuse);
+	ctx.vbudget = now->vnow - atomic64_read(&iocg->vtime);
+	__wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx);
+	if (!waitqueue_active(&iocg->waitq))
+		return;
+	if (WARN_ON_ONCE(ctx.vbudget >= 0))
+		return;
+
+	/* determine next wakeup, add a quarter margin to guarantee chunking */
+	vshortage = -ctx.vbudget;
+	expires = now->now_ns +
+		DIV64_U64_ROUND_UP(vshortage, now->vrate) * NSEC_PER_USEC;
+	expires += margin_ns / 4;
+
+	/* if already active and close enough, don't bother */
+	oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->waitq_timer));
+	if (hrtimer_is_queued(&iocg->waitq_timer) &&
+	    abs(oexpires - expires) <= margin_ns / 4)
+		return;
+
+	hrtimer_start_range_ns(&iocg->waitq_timer, ns_to_ktime(expires),
+			       margin_ns / 4, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
+{
+	struct ioc_gq *iocg = container_of(timer, struct ioc_gq, waitq_timer);
+	struct ioc_now now;
+	unsigned long flags;
+
+	ioc_now(iocg->ioc, &now);
+
+	spin_lock_irqsave(&iocg->waitq.lock, flags);
+	iocg_kick_waitq(iocg, &now);
+	spin_unlock_irqrestore(&iocg->waitq.lock, flags);
+
+	return HRTIMER_NORESTART;
+}
+
+static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
+{
+	struct ioc *ioc = iocg->ioc;
+	struct blkcg_gq *blkg = iocg_to_blkg(iocg);
+	u64 vtime = atomic64_read(&iocg->vtime);
+	u64 vmargin = ioc->margin_us * now->vrate;
+	u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
+	u64 expires, oexpires;
+
+	/* clear or maintain depending on the overage */
+	if (time_before_eq64(vtime, now->vnow)) {
+		blkcg_clear_delay(blkg);
+		return;
+	}
+	if (!atomic_read(&blkg->use_delay) &&
+	    time_before_eq64(vtime, now->vnow + vmargin))
+		return;
+
+	/* use delay */
+	if (cost) {
+		u64 cost_ns = DIV64_U64_ROUND_UP(cost * NSEC_PER_USEC,
+						 now->vrate);
+		blkcg_add_delay(blkg, now->now_ns, cost_ns);
+	}
+	blkcg_use_delay(blkg);
+
+	expires = now->now_ns + DIV64_U64_ROUND_UP(vtime - now->vnow,
+						   now->vrate) * NSEC_PER_USEC;
+
+	/* if already active and close enough, don't bother */
+	oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
+	if (hrtimer_is_queued(&iocg->delay_timer) &&
+	    abs(oexpires - expires) <= margin_ns / 4)
+		return;
+
+	hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
+			       margin_ns / 4, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
+{
+	struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
+	struct ioc_now now;
+
+	ioc_now(iocg->ioc, &now);
+	iocg_kick_delay(iocg, &now, 0);
+
+	return HRTIMER_NORESTART;
+}
+
+static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p)
+{
+	u32 nr_met[2] = { };
+	u32 nr_missed[2] = { };
+	u64 rq_wait_ns = 0;
+	int cpu, rw;
+
+	for_each_online_cpu(cpu) {
+		struct ioc_pcpu_stat *stat = per_cpu_ptr(ioc->pcpu_stat, cpu);
+		u64 this_rq_wait_ns;
+
+		for (rw = READ; rw <= WRITE; rw++) {
+			u32 this_met = READ_ONCE(stat->missed[rw].nr_met);
+			u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed);
+
+			nr_met[rw] += this_met - stat->missed[rw].last_met;
+			nr_missed[rw] += this_missed - stat->missed[rw].last_missed;
+			stat->missed[rw].last_met = this_met;
+			stat->missed[rw].last_missed = this_missed;
+		}
+
+		this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns);
+		rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns;
+		stat->last_rq_wait_ns = this_rq_wait_ns;
+	}
+
+	for (rw = READ; rw <= WRITE; rw++) {
+		if (nr_met[rw] + nr_missed[rw])
+			missed_ppm_ar[rw] =
+				DIV64_U64_ROUND_UP((u64)nr_missed[rw] * MILLION,
+						   nr_met[rw] + nr_missed[rw]);
+		else
+			missed_ppm_ar[rw] = 0;
+	}
+
+	*rq_wait_pct_p = div64_u64(rq_wait_ns * 100,
+				   ioc->period_us * NSEC_PER_USEC);
+}
+
+/* was iocg idle this period? */
+static bool iocg_is_idle(struct ioc_gq *iocg)
+{
+	struct ioc *ioc = iocg->ioc;
+
+	/* did something get issued this period? */
+	if (atomic64_read(&iocg->active_period) ==
+	    atomic64_read(&ioc->cur_period))
+		return false;
+
+	/* is something in flight? */
+	if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime))
+		return false;
+
+	return true;
+}
+
+/* returns usage with margin added if surplus is large enough */
+static u32 surplus_adjusted_hweight_inuse(u32 usage, u32 hw_inuse)
+{
+	/* add margin */
+	usage = DIV_ROUND_UP(usage * SURPLUS_SCALE_PCT, 100);
+	usage += SURPLUS_SCALE_ABS;
+
+	/* don't bother if the surplus is too small */
+	if (usage + SURPLUS_MIN_ADJ_DELTA > hw_inuse)
+		return 0;
+
+	return usage;
+}
+
+static void ioc_timer_fn(struct timer_list *timer)
+{
+	struct ioc *ioc = container_of(timer, struct ioc, timer);
+	struct ioc_gq *iocg, *tiocg;
+	struct ioc_now now;
+	int nr_surpluses = 0, nr_shortages = 0, nr_lagging = 0;
+	u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM];
+	u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM];
+	u32 missed_ppm[2], rq_wait_pct;
+	u64 period_vtime;
+	int i;
+
+	/* how were the latencies during the period? */
+	ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct);
+
+	/* take care of active iocgs */
+	spin_lock_irq(&ioc->lock);
+
+	ioc_now(ioc, &now);
+
+	period_vtime = now.vnow - ioc->period_at_vtime;
+	if (WARN_ON_ONCE(!period_vtime)) {
+		spin_unlock_irq(&ioc->lock);
+		return;
+	}
+
+	/*
+	 * Waiters determine the sleep durations based on the vrate they
+	 * saw at the time of sleep.  If vrate has increased, some waiters
+	 * could be sleeping for too long.  Wake up tardy waiters which
+	 * should have woken up in the last period and expire idle iocgs.
+	 */
+	list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
+		if (!waitqueue_active(&iocg->waitq) && !iocg_is_idle(iocg))
+			continue;
+
+		spin_lock(&iocg->waitq.lock);
+
+		if (waitqueue_active(&iocg->waitq)) {
+			/* might be oversleeping vtime / hweight changes, kick */
+			iocg_kick_waitq(iocg, &now);
+			iocg_kick_delay(iocg, &now, 0);
+		} else if (iocg_is_idle(iocg)) {
+			/* no waiter and idle, deactivate */
+			iocg->last_inuse = iocg->inuse;
+			__propagate_active_weight(iocg, 0, 0);
+			list_del_init(&iocg->active_list);
+		}
+
+		spin_unlock(&iocg->waitq.lock);
+	}
+	commit_active_weights(ioc);
+
+	/* calc usages and see whether some weights need to be moved around */
+	list_for_each_entry(iocg, &ioc->active_iocgs, active_list) {
+		u64 vdone, vtime, vusage, vmargin, vmin;
+		u32 hw_active, hw_inuse, usage;
+
+		/*
+		 * Collect unused and wind vtime closer to vnow to prevent
+		 * iocgs from accumulating a large amount of budget.
+		 */
+		vdone = atomic64_read(&iocg->done_vtime);
+		vtime = atomic64_read(&iocg->vtime);
+		current_hweight(iocg, &hw_active, &hw_inuse);
+
+		/*
+		 * Latency QoS detection doesn't account for IOs which are
+		 * in-flight for longer than a period.  Detect them by
+		 * comparing vdone against period start.  If lagging behind
+		 * IOs from past periods, don't increase vrate.
+		 */
+		if (!atomic_read(&iocg_to_blkg(iocg)->use_delay) &&
+		    time_after64(vtime, vdone) &&
+		    time_after64(vtime, now.vnow -
+				 MAX_LAGGING_PERIODS * period_vtime) &&
+		    time_before64(vdone, now.vnow - period_vtime))
+			nr_lagging++;
+
+		if (waitqueue_active(&iocg->waitq))
+			vusage = now.vnow - iocg->last_vtime;
+		else if (time_before64(iocg->last_vtime, vtime))
+			vusage = vtime - iocg->last_vtime;
+		else
+			vusage = 0;
+
+		iocg->last_vtime += vusage;
+		/*
+		 * Factor in in-flight vtime into vusage to avoid
+		 * high-latency completions appearing as idle.  This should
+		 * be done after the above ->last_time adjustment.
+		 */
+		vusage = max(vusage, vtime - vdone);
+
+		/* calculate hweight based usage ratio and record */
+		if (vusage) {
+			usage = DIV64_U64_ROUND_UP(vusage * hw_inuse,
+						   period_vtime);
+			iocg->usage_idx = (iocg->usage_idx + 1) % NR_USAGE_SLOTS;
+			iocg->usages[iocg->usage_idx] = usage;
+		} else {
+			usage = 0;
+		}
+
+		/* see whether there's surplus vtime */
+		vmargin = ioc->margin_us * now.vrate;
+		vmin = now.vnow - vmargin;
+
+		iocg->has_surplus = false;
+
+		if (!waitqueue_active(&iocg->waitq) &&
+		    time_before64(vtime, vmin)) {
+			u64 delta = vmin - vtime;
+
+			/* throw away surplus vtime */
+			atomic64_add(delta, &iocg->vtime);
+			atomic64_add(delta, &iocg->done_vtime);
+			iocg->last_vtime += delta;
+			/* if usage is sufficiently low, maybe it can donate */
+			if (surplus_adjusted_hweight_inuse(usage, hw_inuse)) {
+				iocg->has_surplus = true;
+				nr_surpluses++;
+			}
+		} else if (hw_inuse < hw_active) {
+			u32 new_hwi, new_inuse;
+
+			/* was donating but might need to take back some */
+			if (waitqueue_active(&iocg->waitq)) {
+				new_hwi = hw_active;
+			} else {
+				new_hwi = max(hw_inuse,
+					      usage * SURPLUS_SCALE_PCT / 100 +
+					      SURPLUS_SCALE_ABS);
+			}
+
+			new_inuse = div64_u64((u64)iocg->inuse * new_hwi,
+					      hw_inuse);
+			new_inuse = clamp_t(u32, new_inuse, 1, iocg->active);
+
+			if (new_inuse > iocg->inuse) {
+				TRACE_IOCG_PATH(inuse_takeback, iocg, &now,
+						iocg->inuse, new_inuse,
+						hw_inuse, new_hwi);
+				__propagate_active_weight(iocg, iocg->weight,
+							  new_inuse);
+			}
+		} else {
+			/* genuninely out of vtime */
+			nr_shortages++;
+		}
+	}
+
+	if (!nr_shortages || !nr_surpluses)
+		goto skip_surplus_transfers;
+
+	/* there are both shortages and surpluses, transfer surpluses */
+	list_for_each_entry(iocg, &ioc->active_iocgs, active_list) {
+		u32 usage, hw_active, hw_inuse, new_hwi, new_inuse;
+		int nr_valid = 0;
+
+		if (!iocg->has_surplus)
+			continue;
+
+		/* base the decision on max historical usage */
+		for (i = 0, usage = 0; i < NR_USAGE_SLOTS; i++) {
+			if (iocg->usages[i]) {
+				usage = max(usage, iocg->usages[i]);
+				nr_valid++;
+			}
+		}
+		if (nr_valid < MIN_VALID_USAGES)
+			continue;
+
+		current_hweight(iocg, &hw_active, &hw_inuse);
+		new_hwi = surplus_adjusted_hweight_inuse(usage, hw_inuse);
+		if (!new_hwi)
+			continue;
+
+		new_inuse = DIV64_U64_ROUND_UP((u64)iocg->inuse * new_hwi,
+					       hw_inuse);
+		if (new_inuse < iocg->inuse) {
+			TRACE_IOCG_PATH(inuse_giveaway, iocg, &now,
+					iocg->inuse, new_inuse,
+					hw_inuse, new_hwi);
+			__propagate_active_weight(iocg, iocg->weight, new_inuse);
+		}
+	}
+skip_surplus_transfers:
+	commit_active_weights(ioc);
+
+	/*
+	 * If q is getting clogged or we're missing too much, we're issuing
+	 * too much IO and should lower vtime rate.  If we're not missing
+	 * and experiencing shortages but not surpluses, we're too stingy
+	 * and should increase vtime rate.
+	 */
+	if (rq_wait_pct > RQ_WAIT_BUSY_PCT ||
+	    missed_ppm[READ] > ppm_rthr ||
+	    missed_ppm[WRITE] > ppm_wthr) {
+		ioc->busy_level = max(ioc->busy_level, 0);
+		ioc->busy_level++;
+	} else if (nr_lagging) {
+		ioc->busy_level = max(ioc->busy_level, 0);
+	} else if (nr_shortages && !nr_surpluses &&
+		   rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 &&
+		   missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 &&
+		   missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) {
+		ioc->busy_level = min(ioc->busy_level, 0);
+		ioc->busy_level--;
+	} else {
+		ioc->busy_level = 0;
+	}
+
+	ioc->busy_level = clamp(ioc->busy_level, -1000, 1000);
+
+	if (ioc->busy_level) {
+		u64 vrate = atomic64_read(&ioc->vtime_rate);
+		u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max;
+
+		/* rq_wait signal is always reliable, ignore user vrate_min */
+		if (rq_wait_pct > RQ_WAIT_BUSY_PCT)
+			vrate_min = VRATE_MIN;
+
+		/*
+		 * If vrate is out of bounds, apply clamp gradually as the
+		 * bounds can change abruptly.  Otherwise, apply busy_level
+		 * based adjustment.
+		 */
+		if (vrate < vrate_min) {
+			vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT),
+					  100);
+			vrate = min(vrate, vrate_min);
+		} else if (vrate > vrate_max) {
+			vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT),
+					  100);
+			vrate = max(vrate, vrate_max);
+		} else {
+			int idx = min_t(int, abs(ioc->busy_level),
+					ARRAY_SIZE(vrate_adj_pct) - 1);
+			u32 adj_pct = vrate_adj_pct[idx];
+
+			if (ioc->busy_level > 0)
+				adj_pct = 100 - adj_pct;
+			else
+				adj_pct = 100 + adj_pct;
+
+			vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100),
+				      vrate_min, vrate_max);
+		}
+
+		trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct,
+					   nr_lagging, nr_shortages,
+					   nr_surpluses);
+
+		atomic64_set(&ioc->vtime_rate, vrate);
+		ioc->inuse_margin_vtime = DIV64_U64_ROUND_UP(
+			ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
+	}
+
+	ioc_refresh_params(ioc, false);
+
+	/*
+	 * This period is done.  Move onto the next one.  If nothing's
+	 * going on with the device, stop the timer.
+	 */
+	atomic64_inc(&ioc->cur_period);
+
+	if (ioc->running != IOC_STOP) {
+		if (!list_empty(&ioc->active_iocgs)) {
+			ioc_start_period(ioc, &now);
+		} else {
+			ioc->busy_level = 0;
+			ioc->running = IOC_IDLE;
+		}
+	}
+
+	spin_unlock_irq(&ioc->lock);
+}
+
+static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
+				    bool is_merge, u64 *costp)
+{
+	struct ioc *ioc = iocg->ioc;
+	u64 coef_seqio, coef_randio, coef_page;
+	u64 pages = max_t(u64, bio_sectors(bio) >> IOC_SECT_TO_PAGE_SHIFT, 1);
+	u64 seek_pages = 0;
+	u64 cost = 0;
+
+	switch (bio_op(bio)) {
+	case REQ_OP_READ:
+		coef_seqio	= ioc->params.lcoefs[LCOEF_RSEQIO];
+		coef_randio	= ioc->params.lcoefs[LCOEF_RRANDIO];
+		coef_page	= ioc->params.lcoefs[LCOEF_RPAGE];
+		break;
+	case REQ_OP_WRITE:
+		coef_seqio	= ioc->params.lcoefs[LCOEF_WSEQIO];
+		coef_randio	= ioc->params.lcoefs[LCOEF_WRANDIO];
+		coef_page	= ioc->params.lcoefs[LCOEF_WPAGE];
+		break;
+	default:
+		goto out;
+	}
+
+	if (iocg->cursor) {
+		seek_pages = abs(bio->bi_iter.bi_sector - iocg->cursor);
+		seek_pages >>= IOC_SECT_TO_PAGE_SHIFT;
+	}
+
+	if (!is_merge) {
+		if (seek_pages > LCOEF_RANDIO_PAGES) {
+			cost += coef_randio;
+		} else {
+			cost += coef_seqio;
+		}
+	}
+	cost += pages * coef_page;
+out:
+	*costp = cost;
+}
+
+static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge)
+{
+	u64 cost;
+
+	calc_vtime_cost_builtin(bio, iocg, is_merge, &cost);
+	return cost;
+}
+
+static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
+{
+	struct blkcg_gq *blkg = bio->bi_blkg;
+	struct ioc *ioc = rqos_to_ioc(rqos);
+	struct ioc_gq *iocg = blkg_to_iocg(blkg);
+	struct ioc_now now;
+	struct iocg_wait wait;
+	u32 hw_active, hw_inuse;
+	u64 abs_cost, cost, vtime;
+
+	/* bypass IOs if disabled or for root cgroup */
+	if (!ioc->enabled || !iocg->level)
+		return;
+
+	/* always activate so that even 0 cost IOs get protected to some level */
+	if (!iocg_activate(iocg, &now))
+		return;
+
+	/* calculate the absolute vtime cost */
+	abs_cost = calc_vtime_cost(bio, iocg, false);
+	if (!abs_cost)
+		return;
+
+	iocg->cursor = bio_end_sector(bio);
+
+	vtime = atomic64_read(&iocg->vtime);
+	current_hweight(iocg, &hw_active, &hw_inuse);
+
+	if (hw_inuse < hw_active &&
+	    time_after_eq64(vtime + ioc->inuse_margin_vtime, now.vnow)) {
+		TRACE_IOCG_PATH(inuse_reset, iocg, &now,
+				iocg->inuse, iocg->weight, hw_inuse, hw_active);
+		spin_lock_irq(&ioc->lock);
+		propagate_active_weight(iocg, iocg->weight, iocg->weight);
+		spin_unlock_irq(&ioc->lock);
+		current_hweight(iocg, &hw_active, &hw_inuse);
+	}
+
+	cost = abs_cost_to_cost(abs_cost, hw_inuse);
+
+	/*
+	 * If no one's waiting and within budget, issue right away.  The
+	 * tests are racy but the races aren't systemic - we only miss once
+	 * in a while which is fine.
+	 */
+	if (!waitqueue_active(&iocg->waitq) &&
+	    time_before_eq64(vtime + cost, now.vnow)) {
+		iocg_commit_bio(iocg, bio, cost);
+		return;
+	}
+
+	if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
+		iocg_commit_bio(iocg, bio, cost);
+		iocg_kick_delay(iocg, &now, cost);
+		return;
+	}
+
+	/*
+	 * Append self to the waitq and schedule the wakeup timer if we're
+	 * the first waiter.  The timer duration is calculated based on the
+	 * current vrate.  vtime and hweight changes can make it too short
+	 * or too long.  Each wait entry records the absolute cost it's
+	 * waiting for to allow re-evaluation using a custom wait entry.
+	 *
+	 * If too short, the timer simply reschedules itself.  If too long,
+	 * the period timer will notice and trigger wakeups.
+	 *
+	 * All waiters are on iocg->waitq and the wait states are
+	 * synchronized using waitq.lock.
+	 */
+	spin_lock_irq(&iocg->waitq.lock);
+
+	/*
+	 * We activated above but w/o any synchronization.  Deactivation is
+	 * synchronized with waitq.lock and we won't get deactivated as
+	 * long as we're waiting, so we're good if we're activated here.
+	 * In the unlikely case that we are deactivated, just issue the IO.
+	 */
+	if (unlikely(list_empty(&iocg->active_list))) {
+		spin_unlock_irq(&iocg->waitq.lock);
+		iocg_commit_bio(iocg, bio, cost);
+		return;
+	}
+
+	init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
+	wait.wait.private = current;
+	wait.bio = bio;
+	wait.abs_cost = abs_cost;
+	wait.committed = false;	/* will be set true by waker */
+
+	__add_wait_queue_entry_tail(&iocg->waitq, &wait.wait);
+	iocg_kick_waitq(iocg, &now);
+
+	spin_unlock_irq(&iocg->waitq.lock);
+
+	while (true) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (wait.committed)
+			break;
+		io_schedule();
+	}
+
+	/* waker already committed us, proceed */
+	finish_wait(&iocg->waitq, &wait.wait);
+}
+
+static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
+			   struct bio *bio)
+{
+	struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg);
+	sector_t bio_end = bio_end_sector(bio);
+	u32 hw_inuse;
+	u64 abs_cost, cost;
+
+	/* add iff the existing request has cost assigned */
+	if (!rq->bio || !rq->bio->bi_iocost_cost)
+		return;
+
+	abs_cost = calc_vtime_cost(bio, iocg, true);
+	if (!abs_cost)
+		return;
+
+	/* update cursor if backmerging into the request at the cursor */
+	if (blk_rq_pos(rq) < bio_end &&
+	    blk_rq_pos(rq) + blk_rq_sectors(rq) == iocg->cursor)
+		iocg->cursor = bio_end;
+
+	current_hweight(iocg, NULL, &hw_inuse);
+	cost = div64_u64(abs_cost * HWEIGHT_WHOLE, hw_inuse);
+	bio->bi_iocost_cost = cost;
+
+	atomic64_add(cost, &iocg->vtime);
+}
+
+static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
+{
+	struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg);
+
+	if (iocg && bio->bi_iocost_cost)
+		atomic64_add(bio->bi_iocost_cost, &iocg->done_vtime);
+}
+
+static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
+{
+	struct ioc *ioc = rqos_to_ioc(rqos);
+	u64 on_q_ns, rq_wait_ns;
+	int pidx, rw;
+
+	if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns)
+		return;
+
+	switch (req_op(rq) & REQ_OP_MASK) {
+	case REQ_OP_READ:
+		pidx = QOS_RLAT;
+		rw = READ;
+		break;
+	case REQ_OP_WRITE:
+		pidx = QOS_WLAT;
+		rw = WRITE;
+		break;
+	default:
+		return;
+	}
+
+	on_q_ns = ktime_get_ns() - rq->alloc_time_ns;
+	rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
+
+	if (on_q_ns <= ioc->params.qos[pidx] * NSEC_PER_USEC)
+		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
+	else
+		this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
+
+	this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns);
+}
+
+static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos)
+{
+	struct ioc *ioc = rqos_to_ioc(rqos);
+
+	spin_lock_irq(&ioc->lock);
+	ioc_refresh_params(ioc, false);
+	spin_unlock_irq(&ioc->lock);
+}
+
+static void ioc_rqos_exit(struct rq_qos *rqos)
+{
+	struct ioc *ioc = rqos_to_ioc(rqos);
+
+	blkcg_deactivate_policy(rqos->q, &blkcg_policy_iocost);
+
+	spin_lock_irq(&ioc->lock);
+	ioc->running = IOC_STOP;
+	spin_unlock_irq(&ioc->lock);
+
+	del_timer_sync(&ioc->timer);
+	free_percpu(ioc->pcpu_stat);
+	kfree(ioc);
+}
+
+static struct rq_qos_ops ioc_rqos_ops = {
+	.throttle = ioc_rqos_throttle,
+	.merge = ioc_rqos_merge,
+	.done_bio = ioc_rqos_done_bio,
+	.done = ioc_rqos_done,
+	.queue_depth_changed = ioc_rqos_queue_depth_changed,
+	.exit = ioc_rqos_exit,
+};
+
+static int blk_iocost_init(struct request_queue *q)
+{
+	struct ioc *ioc;
+	struct rq_qos *rqos;
+	int ret;
+
+	ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
+	if (!ioc)
+		return -ENOMEM;
+
+	ioc->pcpu_stat = alloc_percpu(struct ioc_pcpu_stat);
+	if (!ioc->pcpu_stat) {
+		kfree(ioc);
+		return -ENOMEM;
+	}
+
+	rqos = &ioc->rqos;
+	rqos->id = RQ_QOS_COST;
+	rqos->ops = &ioc_rqos_ops;
+	rqos->q = q;
+
+	spin_lock_init(&ioc->lock);
+	timer_setup(&ioc->timer, ioc_timer_fn, 0);
+	INIT_LIST_HEAD(&ioc->active_iocgs);
+
+	ioc->running = IOC_IDLE;
+	atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC);
+	seqcount_init(&ioc->period_seqcount);
+	ioc->period_at = ktime_to_us(ktime_get());
+	atomic64_set(&ioc->cur_period, 0);
+	atomic_set(&ioc->hweight_gen, 0);
+
+	spin_lock_irq(&ioc->lock);
+	ioc->autop_idx = AUTOP_INVALID;
+	ioc_refresh_params(ioc, true);
+	spin_unlock_irq(&ioc->lock);
+
+	rq_qos_add(q, rqos);
+	ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+	if (ret) {
+		rq_qos_del(q, rqos);
+		kfree(ioc);
+		return ret;
+	}
+	return 0;
+}
+
+static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp)
+{
+	struct ioc_cgrp *iocc;
+
+	iocc = kzalloc(sizeof(struct ioc_cgrp), gfp);
+	iocc->dfl_weight = CGROUP_WEIGHT_DFL;
+
+	return &iocc->cpd;
+}
+
+static void ioc_cpd_free(struct blkcg_policy_data *cpd)
+{
+	kfree(container_of(cpd, struct ioc_cgrp, cpd));
+}
+
+static struct blkg_policy_data *ioc_pd_alloc(gfp_t gfp, struct request_queue *q,
+					     struct blkcg *blkcg)
+{
+	int levels = blkcg->css.cgroup->level + 1;
+	struct ioc_gq *iocg;
+
+	iocg = kzalloc_node(sizeof(*iocg) + levels * sizeof(iocg->ancestors[0]),
+			    gfp, q->node);
+	if (!iocg)
+		return NULL;
+
+	return &iocg->pd;
+}
+
+static void ioc_pd_init(struct blkg_policy_data *pd)
+{
+	struct ioc_gq *iocg = pd_to_iocg(pd);
+	struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd);
+	struct ioc *ioc = q_to_ioc(blkg->q);
+	struct ioc_now now;
+	struct blkcg_gq *tblkg;
+	unsigned long flags;
+
+	ioc_now(ioc, &now);
+
+	iocg->ioc = ioc;
+	atomic64_set(&iocg->vtime, now.vnow);
+	atomic64_set(&iocg->done_vtime, now.vnow);
+	atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
+	INIT_LIST_HEAD(&iocg->active_list);
+	iocg->hweight_active = HWEIGHT_WHOLE;
+	iocg->hweight_inuse = HWEIGHT_WHOLE;
+
+	init_waitqueue_head(&iocg->waitq);
+	hrtimer_init(&iocg->waitq_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	iocg->waitq_timer.function = iocg_waitq_timer_fn;
+	hrtimer_init(&iocg->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	iocg->delay_timer.function = iocg_delay_timer_fn;
+
+	iocg->level = blkg->blkcg->css.cgroup->level;
+
+	for (tblkg = blkg; tblkg; tblkg = tblkg->parent) {
+		struct ioc_gq *tiocg = blkg_to_iocg(tblkg);
+		iocg->ancestors[tiocg->level] = tiocg;
+	}
+
+	spin_lock_irqsave(&ioc->lock, flags);
+	weight_updated(iocg);
+	spin_unlock_irqrestore(&ioc->lock, flags);
+}
+
+static void ioc_pd_free(struct blkg_policy_data *pd)
+{
+	struct ioc_gq *iocg = pd_to_iocg(pd);
+	struct ioc *ioc = iocg->ioc;
+
+	if (ioc) {
+		hrtimer_cancel(&iocg->waitq_timer);
+		hrtimer_cancel(&iocg->delay_timer);
+
+		spin_lock(&ioc->lock);
+		if (!list_empty(&iocg->active_list)) {
+			propagate_active_weight(iocg, 0, 0);
+			list_del_init(&iocg->active_list);
+		}
+		spin_unlock(&ioc->lock);
+	}
+	kfree(iocg);
+}
+
+static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
+			     int off)
+{
+	const char *dname = blkg_dev_name(pd->blkg);
+	struct ioc_gq *iocg = pd_to_iocg(pd);
+
+	if (dname && iocg->cfg_weight)
+		seq_printf(sf, "%s %u\n", dname, iocg->cfg_weight);
+	return 0;
+}
+
+
+static int ioc_weight_show(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+	struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg);
+
+	seq_printf(sf, "default %u\n", iocc->dfl_weight);
+	blkcg_print_blkgs(sf, blkcg, ioc_weight_prfill,
+			  &blkcg_policy_iocost, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
+{
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
+	struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg);
+	struct blkg_conf_ctx ctx;
+	struct ioc_gq *iocg;
+	u32 v;
+	int ret;
+
+	if (!strchr(buf, ':')) {
+		struct blkcg_gq *blkg;
+
+		if (!sscanf(buf, "default %u", &v) && !sscanf(buf, "%u", &v))
+			return -EINVAL;
+
+		if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX)
+			return -EINVAL;
+
+		spin_lock(&blkcg->lock);
+		iocc->dfl_weight = v;
+		hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
+			struct ioc_gq *iocg = blkg_to_iocg(blkg);
+
+			if (iocg) {
+				spin_lock_irq(&iocg->ioc->lock);
+				weight_updated(iocg);
+				spin_unlock_irq(&iocg->ioc->lock);
+			}
+		}
+		spin_unlock(&blkcg->lock);
+
+		return nbytes;
+	}
+
+	ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, buf, &ctx);
+	if (ret)
+		return ret;
+
+	iocg = blkg_to_iocg(ctx.blkg);
+
+	if (!strncmp(ctx.body, "default", 7)) {
+		v = 0;
+	} else {
+		if (!sscanf(ctx.body, "%u", &v))
+			goto einval;
+		if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX)
+			goto einval;
+	}
+
+	spin_lock_irq(&iocg->ioc->lock);
+	iocg->cfg_weight = v;
+	weight_updated(iocg);
+	spin_unlock_irq(&iocg->ioc->lock);
+
+	blkg_conf_finish(&ctx);
+	return nbytes;
+
+einval:
+	blkg_conf_finish(&ctx);
+	return -EINVAL;
+}
+
+static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
+			  int off)
+{
+	const char *dname = blkg_dev_name(pd->blkg);
+	struct ioc *ioc = pd_to_iocg(pd)->ioc;
+
+	if (!dname)
+		return 0;
+
+	seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
+		   dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
+		   ioc->params.qos[QOS_RPPM] / 10000,
+		   ioc->params.qos[QOS_RPPM] % 10000 / 100,
+		   ioc->params.qos[QOS_RLAT],
+		   ioc->params.qos[QOS_WPPM] / 10000,
+		   ioc->params.qos[QOS_WPPM] % 10000 / 100,
+		   ioc->params.qos[QOS_WLAT],
+		   ioc->params.qos[QOS_MIN] / 10000,
+		   ioc->params.qos[QOS_MIN] % 10000 / 100,
+		   ioc->params.qos[QOS_MAX] / 10000,
+		   ioc->params.qos[QOS_MAX] % 10000 / 100);
+	return 0;
+}
+
+static int ioc_qos_show(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+
+	blkcg_print_blkgs(sf, blkcg, ioc_qos_prfill,
+			  &blkcg_policy_iocost, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static const match_table_t qos_ctrl_tokens = {
+	{ QOS_ENABLE,		"enable=%u"	},
+	{ QOS_CTRL,		"ctrl=%s"	},
+	{ NR_QOS_CTRL_PARAMS,	NULL		},
+};
+
+static const match_table_t qos_tokens = {
+	{ QOS_RPPM,		"rpct=%s"	},
+	{ QOS_RLAT,		"rlat=%u"	},
+	{ QOS_WPPM,		"wpct=%s"	},
+	{ QOS_WLAT,		"wlat=%u"	},
+	{ QOS_MIN,		"min=%s"	},
+	{ QOS_MAX,		"max=%s"	},
+	{ NR_QOS_PARAMS,	NULL		},
+};
+
+static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
+			     size_t nbytes, loff_t off)
+{
+	struct gendisk *disk;
+	struct ioc *ioc;
+	u32 qos[NR_QOS_PARAMS];
+	bool enable, user;
+	char *p;
+	int ret;
+
+	disk = blkcg_conf_get_disk(&input);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
+
+	ioc = q_to_ioc(disk->queue);
+	if (!ioc) {
+		ret = blk_iocost_init(disk->queue);
+		if (ret)
+			goto err;
+		ioc = q_to_ioc(disk->queue);
+	}
+
+	spin_lock_irq(&ioc->lock);
+	memcpy(qos, ioc->params.qos, sizeof(qos));
+	enable = ioc->enabled;
+	user = ioc->user_qos_params;
+	spin_unlock_irq(&ioc->lock);
+
+	while ((p = strsep(&input, " \t\n"))) {
+		substring_t args[MAX_OPT_ARGS];
+		char buf[32];
+		int tok;
+		s64 v;
+
+		if (!*p)
+			continue;
+
+		switch (match_token(p, qos_ctrl_tokens, args)) {
+		case QOS_ENABLE:
+			match_u64(&args[0], &v);
+			enable = v;
+			continue;
+		case QOS_CTRL:
+			match_strlcpy(buf, &args[0], sizeof(buf));
+			if (!strcmp(buf, "auto"))
+				user = false;
+			else if (!strcmp(buf, "user"))
+				user = true;
+			else
+				goto einval;
+			continue;
+		}
+
+		tok = match_token(p, qos_tokens, args);
+		switch (tok) {
+		case QOS_RPPM:
+		case QOS_WPPM:
+			if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
+			    sizeof(buf))
+				goto einval;
+			if (cgroup_parse_float(buf, 2, &v))
+				goto einval;
+			if (v < 0 || v > 10000)
+				goto einval;
+			qos[tok] = v * 100;
+			break;
+		case QOS_RLAT:
+		case QOS_WLAT:
+			if (match_u64(&args[0], &v))
+				goto einval;
+			qos[tok] = v;
+			break;
+		case QOS_MIN:
+		case QOS_MAX:
+			if (match_strlcpy(buf, &args[0], sizeof(buf)) >=
+			    sizeof(buf))
+				goto einval;
+			if (cgroup_parse_float(buf, 2, &v))
+				goto einval;
+			if (v < 0)
+				goto einval;
+			qos[tok] = clamp_t(s64, v * 100,
+					   VRATE_MIN_PPM, VRATE_MAX_PPM);
+			break;
+		default:
+			goto einval;
+		}
+		user = true;
+	}
+
+	if (qos[QOS_MIN] > qos[QOS_MAX])
+		goto einval;
+
+	spin_lock_irq(&ioc->lock);
+
+	if (enable) {
+		blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q);
+		ioc->enabled = true;
+	} else {
+		blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, ioc->rqos.q);
+		ioc->enabled = false;
+	}
+
+	if (user) {
+		memcpy(ioc->params.qos, qos, sizeof(qos));
+		ioc->user_qos_params = true;
+	} else {
+		ioc->user_qos_params = false;
+	}
+
+	ioc_refresh_params(ioc, true);
+	spin_unlock_irq(&ioc->lock);
+
+	put_disk_and_module(disk);
+	return nbytes;
+einval:
+	ret = -EINVAL;
+err:
+	put_disk_and_module(disk);
+	return ret;
+}
+
+static u64 ioc_cost_model_prfill(struct seq_file *sf,
+				 struct blkg_policy_data *pd, int off)
+{
+	const char *dname = blkg_dev_name(pd->blkg);
+	struct ioc *ioc = pd_to_iocg(pd)->ioc;
+	u64 *u = ioc->params.i_lcoefs;
+
+	if (!dname)
+		return 0;
+
+	seq_printf(sf, "%s ctrl=%s model=linear "
+		   "rbps=%llu rseqiops=%llu rrandiops=%llu "
+		   "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
+		   dname, ioc->user_cost_model ? "user" : "auto",
+		   u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
+		   u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
+	return 0;
+}
+
+static int ioc_cost_model_show(struct seq_file *sf, void *v)
+{
+	struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+
+	blkcg_print_blkgs(sf, blkcg, ioc_cost_model_prfill,
+			  &blkcg_policy_iocost, seq_cft(sf)->private, false);
+	return 0;
+}
+
+static const match_table_t cost_ctrl_tokens = {
+	{ COST_CTRL,		"ctrl=%s"	},
+	{ COST_MODEL,		"model=%s"	},
+	{ NR_COST_CTRL_PARAMS,	NULL		},
+};
+
+static const match_table_t i_lcoef_tokens = {
+	{ I_LCOEF_RBPS,		"rbps=%u"	},
+	{ I_LCOEF_RSEQIOPS,	"rseqiops=%u"	},
+	{ I_LCOEF_RRANDIOPS,	"rrandiops=%u"	},
+	{ I_LCOEF_WBPS,		"wbps=%u"	},
+	{ I_LCOEF_WSEQIOPS,	"wseqiops=%u"	},
+	{ I_LCOEF_WRANDIOPS,	"wrandiops=%u"	},
+	{ NR_I_LCOEFS,		NULL		},
+};
+
+static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input,
+				    size_t nbytes, loff_t off)
+{
+	struct gendisk *disk;
+	struct ioc *ioc;
+	u64 u[NR_I_LCOEFS];
+	bool user;
+	char *p;
+	int ret;
+
+	disk = blkcg_conf_get_disk(&input);
+	if (IS_ERR(disk))
+		return PTR_ERR(disk);
+
+	ioc = q_to_ioc(disk->queue);
+	if (!ioc) {
+		ret = blk_iocost_init(disk->queue);
+		if (ret)
+			goto err;
+		ioc = q_to_ioc(disk->queue);
+	}
+
+	spin_lock_irq(&ioc->lock);
+	memcpy(u, ioc->params.i_lcoefs, sizeof(u));
+	user = ioc->user_cost_model;
+	spin_unlock_irq(&ioc->lock);
+
+	while ((p = strsep(&input, " \t\n"))) {
+		substring_t args[MAX_OPT_ARGS];
+		char buf[32];
+		int tok;
+		u64 v;
+
+		if (!*p)
+			continue;
+
+		switch (match_token(p, cost_ctrl_tokens, args)) {
+		case COST_CTRL:
+			match_strlcpy(buf, &args[0], sizeof(buf));
+			if (!strcmp(buf, "auto"))
+				user = false;
+			else if (!strcmp(buf, "user"))
+				user = true;
+			else
+				goto einval;
+			continue;
+		case COST_MODEL:
+			match_strlcpy(buf, &args[0], sizeof(buf));
+			if (strcmp(buf, "linear"))
+				goto einval;
+			continue;
+		}
+
+		tok = match_token(p, i_lcoef_tokens, args);
+		if (tok == NR_I_LCOEFS)
+			goto einval;
+		if (match_u64(&args[0], &v))
+			goto einval;
+		u[tok] = v;
+		user = true;
+	}
+
+	spin_lock_irq(&ioc->lock);
+	if (user) {
+		memcpy(ioc->params.i_lcoefs, u, sizeof(u));
+		ioc->user_cost_model = true;
+	} else {
+		ioc->user_cost_model = false;
+	}
+	ioc_refresh_params(ioc, true);
+	spin_unlock_irq(&ioc->lock);
+
+	put_disk_and_module(disk);
+	return nbytes;
+
+einval:
+	ret = -EINVAL;
+err:
+	put_disk_and_module(disk);
+	return ret;
+}
+
+static struct cftype ioc_files[] = {
+	{
+		.name = "weight",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = ioc_weight_show,
+		.write = ioc_weight_write,
+	},
+	{
+		.name = "cost.qos",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = ioc_qos_show,
+		.write = ioc_qos_write,
+	},
+	{
+		.name = "cost.model",
+		.flags = CFTYPE_ONLY_ON_ROOT,
+		.seq_show = ioc_cost_model_show,
+		.write = ioc_cost_model_write,
+	},
+	{}
+};
+
+static struct blkcg_policy blkcg_policy_iocost = {
+	.dfl_cftypes	= ioc_files,
+	.cpd_alloc_fn	= ioc_cpd_alloc,
+	.cpd_free_fn	= ioc_cpd_free,
+	.pd_alloc_fn	= ioc_pd_alloc,
+	.pd_init_fn	= ioc_pd_init,
+	.pd_free_fn	= ioc_pd_free,
+};
+
+static int __init ioc_init(void)
+{
+	return blkcg_policy_register(&blkcg_policy_iocost);
+}
+
+static void __exit ioc_exit(void)
+{
+	return blkcg_policy_unregister(&blkcg_policy_iocost);
+}
+
+module_init(ioc_init);
+module_exit(ioc_exit);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 5f8b75826a98..08a09dbe0f4b 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -15,6 +15,7 @@ struct blk_mq_debugfs_attr;
 enum rq_qos_id {
 	RQ_QOS_WBT,
 	RQ_QOS_LATENCY,
+	RQ_QOS_COST,
 };
 
 struct rq_wait {
@@ -84,6 +85,8 @@ static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
 		return "wbt";
 	case RQ_QOS_LATENCY:
 		return "latency";
+	case RQ_QOS_COST:
+		return "cost";
 	}
 	return "unknown";
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5a1118d4ef7e..d688b96d1d63 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -169,6 +169,9 @@ struct bio {
 	 */
 	struct blkcg_gq		*bi_blkg;
 	struct bio_issue	bi_issue;
+#ifdef CONFIG_BLK_CGROUP_IOCOST
+	u64			bi_iocost_cost;
+#endif
 #endif
 	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
new file mode 100644
index 000000000000..ec2217dd57ac
--- /dev/null
+++ b/include/trace/events/iocost.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iocost
+
+#if !defined(_TRACE_BLK_IOCOST_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLK_IOCOST_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(iocost_iocg_activate,
+
+	TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+		u64 last_period, u64 cur_period, u64 vtime),
+
+	TP_ARGS(iocg, path, now, last_period, cur_period, vtime),
+
+	TP_STRUCT__entry (
+		__string(devname, ioc_name(iocg->ioc))
+		__string(cgroup, path)
+		__field(u64, now)
+		__field(u64, vnow)
+		__field(u64, vrate)
+		__field(u64, last_period)
+		__field(u64, cur_period)
+		__field(u64, last_vtime)
+		__field(u64, vtime)
+		__field(u32, weight)
+		__field(u32, inuse)
+		__field(u64, hweight_active)
+		__field(u64, hweight_inuse)
+	),
+
+	TP_fast_assign(
+		__assign_str(devname, ioc_name(iocg->ioc));
+		__assign_str(cgroup, path);
+		__entry->now = now->now;
+		__entry->vnow = now->vnow;
+		__entry->vrate = now->vrate;
+		__entry->last_period = last_period;
+		__entry->cur_period = cur_period;
+		__entry->last_vtime = iocg->last_vtime;
+		__entry->vtime = vtime;
+		__entry->weight = iocg->weight;
+		__entry->inuse = iocg->inuse;
+		__entry->hweight_active = iocg->hweight_active;
+		__entry->hweight_inuse = iocg->hweight_inuse;
+	),
+
+	TP_printk("[%s:%s] now=%llu:%llu vrate=%llu "
+		  "period=%llu->%llu vtime=%llu->%llu "
+		  "weight=%u/%u hweight=%llu/%llu",
+		__get_str(devname), __get_str(cgroup),
+		__entry->now, __entry->vnow, __entry->vrate,
+		__entry->last_period, __entry->cur_period,
+		__entry->last_vtime, __entry->vtime,
+		__entry->inuse, __entry->weight,
+		__entry->hweight_inuse, __entry->hweight_active
+	)
+);
+
+DECLARE_EVENT_CLASS(iocg_inuse_update,
+
+	TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+		u32 old_inuse, u32 new_inuse,
+		u64 old_hw_inuse, u64 new_hw_inuse),
+
+	TP_ARGS(iocg, path, now, old_inuse, new_inuse,
+		old_hw_inuse, new_hw_inuse),
+
+	TP_STRUCT__entry (
+		__string(devname, ioc_name(iocg->ioc))
+		__string(cgroup, path)
+		__field(u64, now)
+		__field(u32, old_inuse)
+		__field(u32, new_inuse)
+		__field(u64, old_hweight_inuse)
+		__field(u64, new_hweight_inuse)
+	),
+
+	TP_fast_assign(
+		__assign_str(devname, ioc_name(iocg->ioc));
+		__assign_str(cgroup, path);
+		__entry->now = now->now;
+		__entry->old_inuse = old_inuse;
+		__entry->new_inuse = new_inuse;
+		__entry->old_hweight_inuse = old_hw_inuse;
+		__entry->new_hweight_inuse = new_hw_inuse;
+	),
+
+	TP_printk("[%s:%s] now=%llu inuse=%u->%u hw_inuse=%llu->%llu",
+		__get_str(devname), __get_str(cgroup), __entry->now,
+		__entry->old_inuse, __entry->new_inuse,
+		__entry->old_hweight_inuse, __entry->new_hweight_inuse
+	)
+);
+
+DEFINE_EVENT(iocg_inuse_update, iocost_inuse_takeback,
+
+	TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+		u32 old_inuse, u32 new_inuse,
+		u64 old_hw_inuse, u64 new_hw_inuse),
+
+	TP_ARGS(iocg, path, now, old_inuse, new_inuse,
+		old_hw_inuse, new_hw_inuse)
+);
+
+DEFINE_EVENT(iocg_inuse_update, iocost_inuse_giveaway,
+
+	TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+		u32 old_inuse, u32 new_inuse,
+		u64 old_hw_inuse, u64 new_hw_inuse),
+
+	TP_ARGS(iocg, path, now, old_inuse, new_inuse,
+		old_hw_inuse, new_hw_inuse)
+);
+
+DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset,
+
+	TP_PROTO(struct ioc_gq *iocg, const char *path, struct ioc_now *now,
+		u32 old_inuse, u32 new_inuse,
+		u64 old_hw_inuse, u64 new_hw_inuse),
+
+	TP_ARGS(iocg, path, now, old_inuse, new_inuse,
+		old_hw_inuse, new_hw_inuse)
+);
+
+TRACE_EVENT(iocost_ioc_vrate_adj,
+
+	TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 (*missed_ppm)[2],
+		u32 rq_wait_pct, int nr_lagging, int nr_shortages,
+		int nr_surpluses),
+
+	TP_ARGS(ioc, new_vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages,
+		nr_surpluses),
+
+	TP_STRUCT__entry (
+		__string(devname, ioc_name(ioc))
+		__field(u64, old_vrate)
+		__field(u64, new_vrate)
+		__field(int, busy_level)
+		__field(u32, read_missed_ppm)
+		__field(u32, write_missed_ppm)
+		__field(u32, rq_wait_pct)
+		__field(int, nr_lagging)
+		__field(int, nr_shortages)
+		__field(int, nr_surpluses)
+	),
+
+	TP_fast_assign(
+		__assign_str(devname, ioc_name(ioc));
+		__entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
+		__entry->new_vrate = new_vrate;
+		__entry->busy_level = ioc->busy_level;
+		__entry->read_missed_ppm = (*missed_ppm)[READ];
+		__entry->write_missed_ppm = (*missed_ppm)[WRITE];
+		__entry->rq_wait_pct = rq_wait_pct;
+		__entry->nr_lagging = nr_lagging;
+		__entry->nr_shortages = nr_shortages;
+		__entry->nr_surpluses = nr_surpluses;
+	),
+
+	TP_printk("[%s] vrate=%llu->%llu busy=%d missed_ppm=%u:%u rq_wait_pct=%u lagging=%d shortages=%d surpluses=%d",
+		__get_str(devname), __entry->old_vrate, __entry->new_vrate,
+		__entry->busy_level,
+		__entry->read_missed_ppm, __entry->write_missed_ppm,
+		__entry->rq_wait_pct, __entry->nr_lagging, __entry->nr_shortages,
+		__entry->nr_surpluses
+	)
+);
+
+#endif /* _TRACE_BLK_IOCOST_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 8f2edb4a78f7f5fa35c025849152b1d2dfaee4eb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 29 Aug 2019 08:19:40 +0200
Subject: posix-timers: Unbreak CONFIG_POSIX_TIMERS=n build

The rework of the posix-cpu-timers patch series dropped the empty
declaration of struct cpu_timer for the CONFIG_POSIX_TIMERS=n case which
causes the build to fail:

./include/linux/posix-timers.h:218:20: error: field 'cpu' has incomplete type

Add it back.

Fixes: 60bda037f1dd ("posix-cpu-timers: Utilize timerqueue for storage")
Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/posix-timers.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index f9fbb4c620c1..e685916551bf 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -161,6 +161,7 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
 	},
 #else
 struct posix_cputimers { };
+struct cpu_timer { };
 #define INIT_CPU_TIMERS(s)
 static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
 static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
-- 
cgit v1.2.3


From 1018c94be6ea073115f6bcf993d6492138d2b8e3 Mon Sep 17 00:00:00 2001
From: Zhuohao Lee <zhuohao@chromium.org>
Date: Mon, 1 Jul 2019 00:07:10 +0800
Subject: mtd: mtdcore: add debugfs nodes for querying the flash name and id

Currently, we don't have vfs nodes for querying the underlying flash name
and flash id. This information is important especially when we want to
know the flash detail of the defective system. In order to support the
query, we add mtd_debugfs_populate() to create two debugfs nodes
(ie. partname and partid). The upper driver can assign the pointer to
partname and partid before calling mtd_device_register().

Signed-off-by: Zhuohao Lee <zhuohao@chromium.org>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
---
 drivers/mtd/mtdcore.c   | 86 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/mtd/mtd.h |  3 ++
 2 files changed, 80 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 408615f29e57..6cc7ecb0c788 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -335,6 +335,82 @@ static const struct device_type mtd_devtype = {
 	.release	= mtd_release,
 };
 
+static int mtd_partid_show(struct seq_file *s, void *p)
+{
+	struct mtd_info *mtd = s->private;
+
+	seq_printf(s, "%s\n", mtd->dbg.partid);
+
+	return 0;
+}
+
+static int mtd_partid_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtd_partid_show, inode->i_private);
+}
+
+static const struct file_operations mtd_partid_debug_fops = {
+	.open           = mtd_partid_debugfs_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static int mtd_partname_show(struct seq_file *s, void *p)
+{
+	struct mtd_info *mtd = s->private;
+
+	seq_printf(s, "%s\n", mtd->dbg.partname);
+
+	return 0;
+}
+
+static int mtd_partname_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mtd_partname_show, inode->i_private);
+}
+
+static const struct file_operations mtd_partname_debug_fops = {
+	.open           = mtd_partname_debugfs_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static struct dentry *dfs_dir_mtd;
+
+static void mtd_debugfs_populate(struct mtd_info *mtd)
+{
+	struct device *dev = &mtd->dev;
+	struct dentry *root, *dent;
+
+	if (IS_ERR_OR_NULL(dfs_dir_mtd))
+		return;
+
+	root = debugfs_create_dir(dev_name(dev), dfs_dir_mtd);
+	if (IS_ERR_OR_NULL(root)) {
+		dev_dbg(dev, "won't show data in debugfs\n");
+		return;
+	}
+
+	mtd->dbg.dfs_dir = root;
+
+	if (mtd->dbg.partid) {
+		dent = debugfs_create_file("partid", 0400, root, mtd,
+					   &mtd_partid_debug_fops);
+		if (IS_ERR_OR_NULL(dent))
+			dev_err(dev, "can't create debugfs entry for partid\n");
+	}
+
+	if (mtd->dbg.partname) {
+		dent = debugfs_create_file("partname", 0400, root, mtd,
+					   &mtd_partname_debug_fops);
+		if (IS_ERR_OR_NULL(dent))
+			dev_err(dev,
+				"can't create debugfs entry for partname\n");
+	}
+}
+
 #ifndef CONFIG_MMU
 unsigned mtd_mmap_capabilities(struct mtd_info *mtd)
 {
@@ -512,8 +588,6 @@ static int mtd_nvmem_add(struct mtd_info *mtd)
 	return 0;
 }
 
-static struct dentry *dfs_dir_mtd;
-
 /**
  *	add_mtd_device - register an MTD device
  *	@mtd: pointer to new MTD device info structure
@@ -607,13 +681,7 @@ int add_mtd_device(struct mtd_info *mtd)
 	if (error)
 		goto fail_nvmem_add;
 
-	if (!IS_ERR_OR_NULL(dfs_dir_mtd)) {
-		mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(&mtd->dev), dfs_dir_mtd);
-		if (IS_ERR_OR_NULL(mtd->dbg.dfs_dir)) {
-			pr_debug("mtd device %s won't show data in debugfs\n",
-				 dev_name(&mtd->dev));
-		}
-	}
+	mtd_debugfs_populate(mtd);
 
 	device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL,
 		      "mtd%dro", i);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 4ca8c1c845fb..249e8d9bfbcd 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -189,6 +189,9 @@ struct module;	/* only needed for owner field in mtd_info */
  */
 struct mtd_debug_info {
 	struct dentry *dfs_dir;
+
+	const char *partname;
+	const char *partid;
 };
 
 struct mtd_info {
-- 
cgit v1.2.3


From 91f255a26bce80d27b5f3dcbb0333b852b5dbe2c Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Mon, 26 Aug 2019 12:19:27 +0100
Subject: usb: common: Separated decoding functions from dwc3 driver.

Patch moves some decoding functions from driver/usb/dwc3/debug.h driver
to driver/usb/common/debug.c file. These moved functions include:
    dwc3_decode_get_status
    dwc3_decode_set_clear_feature
    dwc3_decode_set_address
    dwc3_decode_get_set_descriptor
    dwc3_decode_get_configuration
    dwc3_decode_set_configuration
    dwc3_decode_get_intf
    dwc3_decode_set_intf
    dwc3_decode_synch_frame
    dwc3_decode_set_sel
    dwc3_decode_set_isoch_delay
    dwc3_decode_ctrl

These functions are used also in inroduced cdns3 driver.

All functions prefixes were changed from dwc3 to usb.
Also, function's parameters has been extended according to the name
of fields in standard SETUP packet.
Additionally, patch adds usb_decode_ctrl function to
include/linux/usb/ch9.h file.

Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/common/Makefile |   1 +
 drivers/usb/common/debug.c  | 268 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/usb/dwc3/debug.h    | 252 -----------------------------------------
 drivers/usb/dwc3/trace.h    |   2 +-
 include/linux/usb/ch9.h     |  27 +++++
 5 files changed, 297 insertions(+), 253 deletions(-)
 create mode 100644 drivers/usb/common/debug.c

(limited to 'include')

diff --git a/drivers/usb/common/Makefile b/drivers/usb/common/Makefile
index 0a7c45e85481..76f3e80623db 100644
--- a/drivers/usb/common/Makefile
+++ b/drivers/usb/common/Makefile
@@ -5,6 +5,7 @@
 
 obj-$(CONFIG_USB_COMMON)	  += usb-common.o
 usb-common-y			  += common.o
+usb-common-$(CONFIG_TRACING)	  += debug.o
 usb-common-$(CONFIG_USB_LED_TRIG) += led.o
 
 obj-$(CONFIG_USB_OTG_FSM) += usb-otg-fsm.o
diff --git a/drivers/usb/common/debug.c b/drivers/usb/common/debug.c
new file mode 100644
index 000000000000..d5a469bc67a3
--- /dev/null
+++ b/drivers/usb/common/debug.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Common USB debugging functions
+ *
+ * Copyright (C) 2010-2011 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors: Felipe Balbi <balbi@ti.com>,
+ *	    Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ */
+
+#include <linux/usb/ch9.h>
+
+static void usb_decode_get_status(__u8 bRequestType, __u16 wIndex,
+				  __u16 wLength, char *str, size_t size)
+{
+	switch (bRequestType & USB_RECIP_MASK) {
+	case USB_RECIP_DEVICE:
+		snprintf(str, size, "Get Device Status(Length = %d)", wLength);
+		break;
+	case USB_RECIP_INTERFACE:
+		snprintf(str, size,
+			 "Get Interface Status(Intf = %d, Length = %d)",
+			 wIndex, wLength);
+		break;
+	case USB_RECIP_ENDPOINT:
+		snprintf(str, size, "Get Endpoint Status(ep%d%s)",
+			 wIndex & ~USB_DIR_IN,
+			 wIndex & USB_DIR_IN ? "in" : "out");
+		break;
+	}
+}
+
+static void usb_decode_set_clear_feature(__u8 bRequestType, __u8 bRequest,
+					 __u16 wValue, __u16 wIndex,
+					 char *str, size_t size)
+{
+	switch (bRequestType & USB_RECIP_MASK) {
+	case USB_RECIP_DEVICE:
+		snprintf(str, size, "%s Device Feature(%s%s)",
+			 bRequest == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
+			 ({char *s;
+				switch (wValue) {
+				case USB_DEVICE_SELF_POWERED:
+					s = "Self Powered";
+					break;
+				case USB_DEVICE_REMOTE_WAKEUP:
+					s = "Remote Wakeup";
+					break;
+				case USB_DEVICE_TEST_MODE:
+					s = "Test Mode";
+					break;
+				case USB_DEVICE_U1_ENABLE:
+					s = "U1 Enable";
+					break;
+				case USB_DEVICE_U2_ENABLE:
+					s = "U2 Enable";
+					break;
+				case USB_DEVICE_LTM_ENABLE:
+					s = "LTM Enable";
+					break;
+				default:
+					s = "UNKNOWN";
+				} s; }),
+			 wValue == USB_DEVICE_TEST_MODE ?
+			 ({ char *s;
+				switch (wIndex) {
+				case TEST_J:
+					s = ": TEST_J";
+					break;
+				case TEST_K:
+					s = ": TEST_K";
+					break;
+				case TEST_SE0_NAK:
+					s = ": TEST_SE0_NAK";
+					break;
+				case TEST_PACKET:
+					s = ": TEST_PACKET";
+					break;
+				case TEST_FORCE_EN:
+					s = ": TEST_FORCE_EN";
+					break;
+				default:
+					s = ": UNKNOWN";
+				} s; }) : "");
+		break;
+	case USB_RECIP_INTERFACE:
+		snprintf(str, size, "%s Interface Feature(%s)",
+			 bRequest == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
+			 wValue == USB_INTRF_FUNC_SUSPEND ?
+			 "Function Suspend" : "UNKNOWN");
+		break;
+	case USB_RECIP_ENDPOINT:
+		snprintf(str, size, "%s Endpoint Feature(%s ep%d%s)",
+			 bRequest == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
+			 wValue == USB_ENDPOINT_HALT ? "Halt" : "UNKNOWN",
+			 wIndex & ~USB_DIR_IN,
+			 wIndex & USB_DIR_IN ? "in" : "out");
+		break;
+	}
+}
+
+static void usb_decode_set_address(__u16 wValue, char *str, size_t size)
+{
+	snprintf(str, size, "Set Address(Addr = %02x)", wValue);
+}
+
+static void usb_decode_get_set_descriptor(__u8 bRequestType, __u8 bRequest,
+					  __u16 wValue, __u16 wIndex,
+					  __u16 wLength, char *str, size_t size)
+{
+	snprintf(str, size, "%s %s Descriptor(Index = %d, Length = %d)",
+		 bRequest == USB_REQ_GET_DESCRIPTOR ? "Get" : "Set",
+		 ({ char *s;
+			switch (wValue >> 8) {
+			case USB_DT_DEVICE:
+				s = "Device";
+				break;
+			case USB_DT_CONFIG:
+				s = "Configuration";
+				break;
+			case USB_DT_STRING:
+				s = "String";
+				break;
+			case USB_DT_INTERFACE:
+				s = "Interface";
+				break;
+			case USB_DT_ENDPOINT:
+				s = "Endpoint";
+				break;
+			case USB_DT_DEVICE_QUALIFIER:
+				s = "Device Qualifier";
+				break;
+			case USB_DT_OTHER_SPEED_CONFIG:
+				s = "Other Speed Config";
+				break;
+			case USB_DT_INTERFACE_POWER:
+				s = "Interface Power";
+				break;
+			case USB_DT_OTG:
+				s = "OTG";
+				break;
+			case USB_DT_DEBUG:
+				s = "Debug";
+				break;
+			case USB_DT_INTERFACE_ASSOCIATION:
+				s = "Interface Association";
+				break;
+			case USB_DT_BOS:
+				s = "BOS";
+				break;
+			case USB_DT_DEVICE_CAPABILITY:
+				s = "Device Capability";
+				break;
+			case USB_DT_PIPE_USAGE:
+				s = "Pipe Usage";
+				break;
+			case USB_DT_SS_ENDPOINT_COMP:
+				s = "SS Endpoint Companion";
+				break;
+			case USB_DT_SSP_ISOC_ENDPOINT_COMP:
+				s = "SSP Isochronous Endpoint Companion";
+				break;
+			default:
+				s = "UNKNOWN";
+				break;
+			} s; }), wValue & 0xff, wLength);
+}
+
+static void usb_decode_get_configuration(__u16 wLength, char *str, size_t size)
+{
+	snprintf(str, size, "Get Configuration(Length = %d)", wLength);
+}
+
+static void usb_decode_set_configuration(__u8 wValue, char *str, size_t size)
+{
+	snprintf(str, size, "Set Configuration(Config = %d)", wValue);
+}
+
+static void usb_decode_get_intf(__u16 wIndex, __u16 wLength, char *str,
+				size_t size)
+{
+	snprintf(str, size, "Get Interface(Intf = %d, Length = %d)",
+		 wIndex, wLength);
+}
+
+static void usb_decode_set_intf(__u8 wValue, __u16 wIndex, char *str,
+				size_t size)
+{
+	snprintf(str, size, "Set Interface(Intf = %d, Alt.Setting = %d)",
+		 wIndex, wValue);
+}
+
+static void usb_decode_synch_frame(__u16 wIndex, __u16 wLength,
+				   char *str, size_t size)
+{
+	snprintf(str, size, "Synch Frame(Endpoint = %d, Length = %d)",
+		 wIndex, wLength);
+}
+
+static void usb_decode_set_sel(__u16 wLength, char *str, size_t size)
+{
+	snprintf(str, size, "Set SEL(Length = %d)", wLength);
+}
+
+static void usb_decode_set_isoch_delay(__u8 wValue, char *str, size_t size)
+{
+	snprintf(str, size, "Set Isochronous Delay(Delay = %d ns)", wValue);
+}
+
+/**
+ * usb_decode_ctrl - returns a string representation of ctrl request
+ */
+const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType,
+			    __u8 bRequest, __u16 wValue, __u16 wIndex,
+			    __u16 wLength)
+{
+	switch (bRequest) {
+	case USB_REQ_GET_STATUS:
+		usb_decode_get_status(bRequestType, wIndex, wLength, str, size);
+		break;
+	case USB_REQ_CLEAR_FEATURE:
+	case USB_REQ_SET_FEATURE:
+		usb_decode_set_clear_feature(bRequestType, bRequest, wValue,
+					     wIndex, str, size);
+		break;
+	case USB_REQ_SET_ADDRESS:
+		usb_decode_set_address(wValue, str, size);
+		break;
+	case USB_REQ_GET_DESCRIPTOR:
+	case USB_REQ_SET_DESCRIPTOR:
+		usb_decode_get_set_descriptor(bRequestType, bRequest, wValue,
+					      wIndex, wLength, str, size);
+		break;
+	case USB_REQ_GET_CONFIGURATION:
+		usb_decode_get_configuration(wLength, str, size);
+		break;
+	case USB_REQ_SET_CONFIGURATION:
+		usb_decode_set_configuration(wValue, str, size);
+		break;
+	case USB_REQ_GET_INTERFACE:
+		usb_decode_get_intf(wIndex, wLength, str, size);
+		break;
+	case USB_REQ_SET_INTERFACE:
+		usb_decode_set_intf(wValue, wIndex, str, size);
+		break;
+	case USB_REQ_SYNCH_FRAME:
+		usb_decode_synch_frame(wIndex, wLength, str, size);
+		break;
+	case USB_REQ_SET_SEL:
+		usb_decode_set_sel(wLength, str, size);
+		break;
+	case USB_REQ_SET_ISOCH_DELAY:
+		usb_decode_set_isoch_delay(wValue, str, size);
+		break;
+	default:
+		snprintf(str, size, "%02x %02x %02x %02x %02x %02x %02x %02x",
+			 bRequestType, bRequest,
+			 (u8)(cpu_to_le16(wValue) & 0xff),
+			 (u8)(cpu_to_le16(wValue) >> 8),
+			 (u8)(cpu_to_le16(wIndex) & 0xff),
+			 (u8)(cpu_to_le16(wIndex) >> 8),
+			 (u8)(cpu_to_le16(wLength) & 0xff),
+			 (u8)(cpu_to_le16(wLength) >> 8));
+	}
+
+	return str;
+}
+EXPORT_SYMBOL_GPL(usb_decode_ctrl);
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index 068259fdfb0c..9baabed87d61 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -246,258 +246,6 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size,
 	return str;
 }
 
-static inline void dwc3_decode_get_status(__u8 t, __u16 i, __u16 l, char *str,
-		size_t size)
-{
-	switch (t & USB_RECIP_MASK) {
-	case USB_RECIP_DEVICE:
-		snprintf(str, size, "Get Device Status(Length = %d)", l);
-		break;
-	case USB_RECIP_INTERFACE:
-		snprintf(str, size, "Get Interface Status(Intf = %d, Length = %d)",
-				i, l);
-		break;
-	case USB_RECIP_ENDPOINT:
-		snprintf(str, size, "Get Endpoint Status(ep%d%s)",
-			i & ~USB_DIR_IN,
-			i & USB_DIR_IN ? "in" : "out");
-		break;
-	}
-}
-
-static inline void dwc3_decode_set_clear_feature(__u8 t, __u8 b, __u16 v,
-		__u16 i, char *str, size_t size)
-{
-	switch (t & USB_RECIP_MASK) {
-	case USB_RECIP_DEVICE:
-		snprintf(str, size, "%s Device Feature(%s%s)",
-			b == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
-			({char *s;
-				switch (v) {
-				case USB_DEVICE_SELF_POWERED:
-					s = "Self Powered";
-					break;
-				case USB_DEVICE_REMOTE_WAKEUP:
-					s = "Remote Wakeup";
-					break;
-				case USB_DEVICE_TEST_MODE:
-					s = "Test Mode";
-					break;
-				case USB_DEVICE_U1_ENABLE:
-					s = "U1 Enable";
-					break;
-				case USB_DEVICE_U2_ENABLE:
-					s = "U2 Enable";
-					break;
-				case USB_DEVICE_LTM_ENABLE:
-					s = "LTM Enable";
-					break;
-				default:
-					s = "UNKNOWN";
-				} s; }),
-			v == USB_DEVICE_TEST_MODE ?
-			({ char *s;
-				switch (i) {
-				case TEST_J:
-					s = ": TEST_J";
-					break;
-				case TEST_K:
-					s = ": TEST_K";
-					break;
-				case TEST_SE0_NAK:
-					s = ": TEST_SE0_NAK";
-					break;
-				case TEST_PACKET:
-					s = ": TEST_PACKET";
-					break;
-				case TEST_FORCE_EN:
-					s = ": TEST_FORCE_EN";
-					break;
-				default:
-					s = ": UNKNOWN";
-				} s; }) : "");
-		break;
-	case USB_RECIP_INTERFACE:
-		snprintf(str, size, "%s Interface Feature(%s)",
-			b == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
-			v == USB_INTRF_FUNC_SUSPEND ?
-			"Function Suspend" : "UNKNOWN");
-		break;
-	case USB_RECIP_ENDPOINT:
-		snprintf(str, size, "%s Endpoint Feature(%s ep%d%s)",
-			b == USB_REQ_CLEAR_FEATURE ? "Clear" : "Set",
-			v == USB_ENDPOINT_HALT ? "Halt" : "UNKNOWN",
-			i & ~USB_DIR_IN,
-			i & USB_DIR_IN ? "in" : "out");
-		break;
-	}
-}
-
-static inline void dwc3_decode_set_address(__u16 v, char *str, size_t size)
-{
-	snprintf(str, size, "Set Address(Addr = %02x)", v);
-}
-
-static inline void dwc3_decode_get_set_descriptor(__u8 t, __u8 b, __u16 v,
-		__u16 i, __u16 l, char *str, size_t size)
-{
-	snprintf(str, size, "%s %s Descriptor(Index = %d, Length = %d)",
-		b == USB_REQ_GET_DESCRIPTOR ? "Get" : "Set",
-		({ char *s;
-			switch (v >> 8) {
-			case USB_DT_DEVICE:
-				s = "Device";
-				break;
-			case USB_DT_CONFIG:
-				s = "Configuration";
-				break;
-			case USB_DT_STRING:
-				s = "String";
-				break;
-			case USB_DT_INTERFACE:
-				s = "Interface";
-				break;
-			case USB_DT_ENDPOINT:
-				s = "Endpoint";
-				break;
-			case USB_DT_DEVICE_QUALIFIER:
-				s = "Device Qualifier";
-				break;
-			case USB_DT_OTHER_SPEED_CONFIG:
-				s = "Other Speed Config";
-				break;
-			case USB_DT_INTERFACE_POWER:
-				s = "Interface Power";
-				break;
-			case USB_DT_OTG:
-				s = "OTG";
-				break;
-			case USB_DT_DEBUG:
-				s = "Debug";
-				break;
-			case USB_DT_INTERFACE_ASSOCIATION:
-				s = "Interface Association";
-				break;
-			case USB_DT_BOS:
-				s = "BOS";
-				break;
-			case USB_DT_DEVICE_CAPABILITY:
-				s = "Device Capability";
-				break;
-			case USB_DT_PIPE_USAGE:
-				s = "Pipe Usage";
-				break;
-			case USB_DT_SS_ENDPOINT_COMP:
-				s = "SS Endpoint Companion";
-				break;
-			case USB_DT_SSP_ISOC_ENDPOINT_COMP:
-				s = "SSP Isochronous Endpoint Companion";
-				break;
-			default:
-				s = "UNKNOWN";
-				break;
-			} s; }), v & 0xff, l);
-}
-
-
-static inline void dwc3_decode_get_configuration(__u16 l, char *str,
-		size_t size)
-{
-	snprintf(str, size, "Get Configuration(Length = %d)", l);
-}
-
-static inline void dwc3_decode_set_configuration(__u8 v, char *str, size_t size)
-{
-	snprintf(str, size, "Set Configuration(Config = %d)", v);
-}
-
-static inline void dwc3_decode_get_intf(__u16 i, __u16 l, char *str,
-		size_t size)
-{
-	snprintf(str, size, "Get Interface(Intf = %d, Length = %d)", i, l);
-}
-
-static inline void dwc3_decode_set_intf(__u8 v, __u16 i, char *str, size_t size)
-{
-	snprintf(str, size, "Set Interface(Intf = %d, Alt.Setting = %d)", i, v);
-}
-
-static inline void dwc3_decode_synch_frame(__u16 i, __u16 l, char *str,
-		size_t size)
-{
-	snprintf(str, size, "Synch Frame(Endpoint = %d, Length = %d)", i, l);
-}
-
-static inline void dwc3_decode_set_sel(__u16 l, char *str, size_t size)
-{
-	snprintf(str, size, "Set SEL(Length = %d)", l);
-}
-
-static inline void dwc3_decode_set_isoch_delay(__u8 v, char *str, size_t size)
-{
-	snprintf(str, size, "Set Isochronous Delay(Delay = %d ns)", v);
-}
-
-/**
- * dwc3_decode_ctrl - returns a string represetion of ctrl request
- */
-static inline const char *dwc3_decode_ctrl(char *str, size_t size,
-		__u8 bRequestType, __u8 bRequest, __u16 wValue, __u16 wIndex,
-		__u16 wLength)
-{
-	switch (bRequest) {
-	case USB_REQ_GET_STATUS:
-		dwc3_decode_get_status(bRequestType, wIndex, wLength, str,
-				size);
-		break;
-	case USB_REQ_CLEAR_FEATURE:
-	case USB_REQ_SET_FEATURE:
-		dwc3_decode_set_clear_feature(bRequestType, bRequest, wValue,
-				wIndex, str, size);
-		break;
-	case USB_REQ_SET_ADDRESS:
-		dwc3_decode_set_address(wValue, str, size);
-		break;
-	case USB_REQ_GET_DESCRIPTOR:
-	case USB_REQ_SET_DESCRIPTOR:
-		dwc3_decode_get_set_descriptor(bRequestType, bRequest, wValue,
-				wIndex, wLength, str, size);
-		break;
-	case USB_REQ_GET_CONFIGURATION:
-		dwc3_decode_get_configuration(wLength, str, size);
-		break;
-	case USB_REQ_SET_CONFIGURATION:
-		dwc3_decode_set_configuration(wValue, str, size);
-		break;
-	case USB_REQ_GET_INTERFACE:
-		dwc3_decode_get_intf(wIndex, wLength, str, size);
-		break;
-	case USB_REQ_SET_INTERFACE:
-		dwc3_decode_set_intf(wValue, wIndex, str, size);
-		break;
-	case USB_REQ_SYNCH_FRAME:
-		dwc3_decode_synch_frame(wIndex, wLength, str, size);
-		break;
-	case USB_REQ_SET_SEL:
-		dwc3_decode_set_sel(wLength, str, size);
-		break;
-	case USB_REQ_SET_ISOCH_DELAY:
-		dwc3_decode_set_isoch_delay(wValue, str, size);
-		break;
-	default:
-		snprintf(str, size, "%02x %02x %02x %02x %02x %02x %02x %02x",
-			bRequestType, bRequest,
-			cpu_to_le16(wValue) & 0xff,
-			cpu_to_le16(wValue) >> 8,
-			cpu_to_le16(wIndex) & 0xff,
-			cpu_to_le16(wIndex) >> 8,
-			cpu_to_le16(wLength) & 0xff,
-			cpu_to_le16(wLength) >> 8);
-	}
-
-	return str;
-}
-
 /**
  * dwc3_ep_event_string - returns event name
  * @event: then event code
diff --git a/drivers/usb/dwc3/trace.h b/drivers/usb/dwc3/trace.h
index 818a63da1a44..9edff17111f7 100644
--- a/drivers/usb/dwc3/trace.h
+++ b/drivers/usb/dwc3/trace.h
@@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(dwc3_log_ctrl,
 		__entry->wIndex = le16_to_cpu(ctrl->wIndex);
 		__entry->wLength = le16_to_cpu(ctrl->wLength);
 	),
-	TP_printk("%s", dwc3_decode_ctrl(__get_str(str), DWC3_MSG_MAX,
+	TP_printk("%s", usb_decode_ctrl(__get_str(str), DWC3_MSG_MAX,
 					__entry->bRequestType,
 					__entry->bRequest, __entry->wValue,
 					__entry->wIndex, __entry->wLength)
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index da82606be605..58b83066bea4 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -70,4 +70,31 @@ extern enum usb_device_speed usb_get_maximum_speed(struct device *dev);
  */
 extern const char *usb_state_string(enum usb_device_state state);
 
+#ifdef CONFIG_TRACING
+/**
+ * usb_decode_ctrl - Returns human readable representation of control request.
+ * @str: buffer to return a human-readable representation of control request.
+ *       This buffer should have about 200 bytes.
+ * @size: size of str buffer.
+ * @bRequestType: matches the USB bmRequestType field
+ * @bRequest: matches the USB bRequest field
+ * @wValue: matches the USB wValue field (CPU byte order)
+ * @wIndex: matches the USB wIndex field (CPU byte order)
+ * @wLength: matches the USB wLength field (CPU byte order)
+ *
+ * Function returns decoded, formatted and human-readable description of
+ * control request packet.
+ *
+ * The usage scenario for this is for tracepoints, so function as a return
+ * use the same value as in parameters. This approach allows to use this
+ * function in TP_printk
+ *
+ * Important: wValue, wIndex, wLength parameters before invoking this function
+ * should be processed by le16_to_cpu macro.
+ */
+extern const char *usb_decode_ctrl(char *str, size_t size, __u8 bRequestType,
+				   __u8 bRequest, __u16 wValue, __u16 wIndex,
+				   __u16 wLength);
+#endif
+
 #endif /* __LINUX_USB_CH9_H */
-- 
cgit v1.2.3


From 7548205ae51c70290642f0e8cc72aa10d0c92cfb Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Fri, 9 Aug 2019 13:59:46 +0530
Subject: dt-bindings: pinctrl: k3: Introduce pinmux definitions for J721E

Add pinctrl macros for J721E SoC. These macro definitions are
similar to that of AM6, but adding new definitions to avoid
any naming confusions in the soc dts files.

Acked-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 include/dt-bindings/pinctrl/k3.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/pinctrl/k3.h b/include/dt-bindings/pinctrl/k3.h
index 45e11b6170ca..499de6216581 100644
--- a/include/dt-bindings/pinctrl/k3.h
+++ b/include/dt-bindings/pinctrl/k3.h
@@ -32,4 +32,7 @@
 #define AM65X_IOPAD(pa, val, muxmode)		(((pa) & 0x1fff)) ((val) | (muxmode))
 #define AM65X_WKUP_IOPAD(pa, val, muxmode)	(((pa) & 0x1fff)) ((val) | (muxmode))
 
+#define J721E_IOPAD(pa, val, muxmode)		(((pa) & 0x1fff)) ((val) | (muxmode))
+#define J721E_WKUP_IOPAD(pa, val, muxmode)	(((pa) & 0x1fff)) ((val) | (muxmode))
+
 #endif
-- 
cgit v1.2.3


From 419e2f1838819e954071dfa1d1f820ab3386ada1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 26 Aug 2019 09:03:44 +0200
Subject: dma-mapping: remove arch_dma_mmap_pgprot

arch_dma_mmap_pgprot is used for two things:

 1) to override the "normal" uncached page attributes for mapping
    memory coherent to devices that can't snoop the CPU caches
 2) to provide the special DMA_ATTR_WRITE_COMBINE semantics on older
    arm systems and some mips platforms

Replace one with the pgprot_dmacoherent macro that is already provided
by arm and much simpler to use, and lift the DMA_ATTR_WRITE_COMBINE
handling to common code with an explicit arch opt-in.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>	# m68k
Acked-by: Paul Burton <paul.burton@mips.com>		# mips
---
 arch/arm/Kconfig                   |  2 +-
 arch/arm/mm/dma-mapping.c          |  6 ------
 arch/arm64/Kconfig                 |  1 -
 arch/arm64/include/asm/pgtable.h   |  4 ++++
 arch/arm64/mm/dma-mapping.c        |  6 ------
 arch/m68k/Kconfig                  |  1 -
 arch/m68k/include/asm/pgtable_mm.h |  3 +++
 arch/m68k/kernel/dma.c             |  3 +--
 arch/mips/Kconfig                  |  2 +-
 arch/mips/mm/dma-noncoherent.c     |  8 --------
 include/linux/dma-noncoherent.h    | 13 +++++++++++--
 kernel/dma/Kconfig                 | 12 +++++++++---
 kernel/dma/mapping.c               |  8 +++++---
 13 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 24360211534a..217083caeabd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,7 +8,7 @@ config ARM
 	select ARCH_HAS_DEBUG_VIRTUAL if MMU
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_COHERENT_TO_PFN if SWIOTLB
-	select ARCH_HAS_DMA_MMAP_PGPROT if SWIOTLB
+	select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_KEEPINITRD
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d42557ee69c2..d27b12f61737 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2402,12 +2402,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 	return dma_to_pfn(dev, dma_addr);
 }
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
-{
-	return __get_dma_pgprot(attrs, prot);
-}
-
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3adcec05b1f6..dab9dda34206 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,7 +13,6 @@ config ARM64
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_DMA_COHERENT_TO_PFN
-	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ELF_RANDOMIZE
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e09760ece844..6700371227d1 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -435,6 +435,10 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
 #define pgprot_device(prot) \
 	__pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define pgprot_dmacoherent(prot) \
+	__pgprot_modify(prot, PTE_ATTRINDX_MASK, \
+			PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
+
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bd2b039f43a6..676efcda51e6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -11,12 +11,6 @@
 
 #include <asm/cacheflush.h>
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
-{
-	return pgprot_writecombine(prot);
-}
-
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 		size_t size, enum dma_data_direction dir)
 {
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c518d695c376..a9e564306d3e 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -4,7 +4,6 @@ config M68K
 	default y
 	select ARCH_32BIT_OFF_T
 	select ARCH_HAS_BINFMT_FLAT
-	select ARCH_HAS_DMA_MMAP_PGPROT if MMU && !COLDFIRE
 	select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA
 	select ARCH_MIGHT_HAVE_PC_PARPORT if ISA
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index fe3ddd73a0cc..fde4534b974f 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -169,6 +169,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	    ? (__pgprot((pgprot_val(prot) & _CACHEMASK040) | _PAGE_NOCACHE_S))	\
 	    : (prot)))
 
+pgprot_t pgprot_dmacoherent(pgprot_t prot);
+#define pgprot_dmacoherent(prot)	pgprot_dmacoherent(prot)
+
 #endif /* CONFIG_COLDFIRE */
 #include <asm-generic/pgtable.h>
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c
index 30cd59caf037..35064150e348 100644
--- a/arch/m68k/kernel/dma.c
+++ b/arch/m68k/kernel/dma.c
@@ -23,8 +23,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	cache_push(page_to_phys(page), size);
 }
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
+pgprot_t pgprot_dmacoherent(pgprot_t prot)
 {
 	if (CPU_IS_040_OR_060) {
 		pgprot_val(prot) &= ~_PAGE_CACHE040;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index d50fafd7bf3a..fc88f68ea1ee 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1119,7 +1119,7 @@ config DMA_PERDEV_COHERENT
 
 config DMA_NONCOHERENT
 	bool
-	select ARCH_HAS_DMA_MMAP_PGPROT
+	select ARCH_HAS_DMA_WRITE_COMBINE
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_UNCACHED_SEGMENT
 	select NEED_DMA_MAP_STATE
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index ed56c6fa7be2..1d4d57dd9acf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -65,14 +65,6 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 	return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr)));
 }
 
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
-{
-	if (attrs & DMA_ATTR_WRITE_COMBINE)
-		return pgprot_writecombine(prot);
-	return pgprot_noncached(prot);
-}
-
 static inline void dma_sync_virt(void *addr, size_t size,
 		enum dma_data_direction dir)
 {
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 0bff3d7fac92..dd3de6d88fc0 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -3,6 +3,7 @@
 #define _LINUX_DMA_NONCOHERENT_H 1
 
 #include <linux/dma-mapping.h>
+#include <asm/pgtable.h>
 
 #ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
 #include <asm/dma-coherence.h>
@@ -42,10 +43,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
 long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 		dma_addr_t dma_addr);
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs);
 
 #ifdef CONFIG_MMU
+/*
+ * Page protection so that devices that can't snoop CPU caches can use the
+ * memory coherently.  We default to pgprot_noncached which is usually used
+ * for ioremap as a safe bet, but architectures can override this with less
+ * strict semantics if possible.
+ */
+#ifndef pgprot_dmacoherent
+#define pgprot_dmacoherent(prot)	pgprot_noncached(prot)
+#endif
+
 pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
 #else
 static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 9decbba255fc..73c5c2b8e824 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -20,6 +20,15 @@ config ARCH_HAS_DMA_COHERENCE_H
 config ARCH_HAS_DMA_SET_MASK
 	bool
 
+#
+# Select this option if the architecture needs special handling for
+# DMA_ATTR_WRITE_COMBINE.  Normally the "uncached" mapping should be what
+# people thing of when saying write combine, so very few platforms should
+# need to enable this.
+#
+config ARCH_HAS_DMA_WRITE_COMBINE
+	bool
+
 config DMA_DECLARE_COHERENT
 	bool
 
@@ -45,9 +54,6 @@ config ARCH_HAS_DMA_PREP_COHERENT
 config ARCH_HAS_DMA_COHERENT_TO_PFN
 	bool
 
-config ARCH_HAS_DMA_MMAP_PGPROT
-	bool
-
 config ARCH_HAS_FORCE_DMA_UNENCRYPTED
 	bool
 
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b0038ca3aa92..1b96616c9f20 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -161,9 +161,11 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
 	    (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
              (attrs & DMA_ATTR_NON_CONSISTENT)))
 		return prot;
-	if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT))
-		return arch_dma_mmap_pgprot(dev, prot, attrs);
-	return pgprot_noncached(prot);
+#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
+	if (attrs & DMA_ATTR_WRITE_COMBINE)
+		return pgprot_writecombine(prot);
+#endif
+	return pgprot_dmacoherent(prot);
 }
 #endif /* CONFIG_MMU */
 
-- 
cgit v1.2.3


From 8e3a68fb55e00e0760bd8023883e064f1f93c62d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Aug 2019 12:42:15 +0300
Subject: dma-mapping: make dma_atomic_pool_init self-contained

The memory allocated for the atomic pool needs to have the same
mapping attributes that we use for remapping, so use
pgprot_dmacoherent instead of open coding it.  Also deduct a
suitable zone to allocate the memory from based on the presence
of the DMA zones.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arc/mm/dma.c           |  6 ------
 arch/arm64/mm/dma-mapping.c |  6 ------
 arch/csky/mm/dma-mapping.c  |  6 ------
 arch/nds32/kernel/dma.c     |  6 ------
 include/linux/dma-mapping.h |  1 -
 kernel/dma/remap.c          | 17 ++++++++++++++---
 6 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 62c210e7ee4c..ff4a5752f8cc 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -104,9 +104,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	dev_info(dev, "use %sncoherent DMA ops\n",
 		 dev->dma_coherent ? "" : "non");
 }
-
-static int __init atomic_pool_init(void)
-{
-	return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 676efcda51e6..a1d05f669f67 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -28,12 +28,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 	__dma_flush_area(page_address(page), size);
 }
 
-static int __init arm64_dma_init(void)
-{
-	return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC));
-}
-arch_initcall(arm64_dma_init);
-
 #ifdef CONFIG_IOMMU_DMA
 void arch_teardown_dma_ops(struct device *dev)
 {
diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c
index 80783bb71c5c..602a60d47a94 100644
--- a/arch/csky/mm/dma-mapping.c
+++ b/arch/csky/mm/dma-mapping.c
@@ -14,12 +14,6 @@
 #include <linux/version.h>
 #include <asm/cache.h>
 
-static int __init atomic_pool_init(void)
-{
-	return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
-
 void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	if (PageHighMem(page)) {
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index 490e3720d694..4206d4b6c8ce 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -80,9 +80,3 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
 {
 	cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
 }
-
-static int __init atomic_pool_init(void)
-{
-	return dma_atomic_pool_init(GFP_KERNEL, pgprot_noncached(PAGE_KERNEL));
-}
-postcore_initcall(atomic_pool_init);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f7d1eea32c78..48ebe8295987 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -624,7 +624,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
 			const void *caller);
 void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
 
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot);
 bool dma_in_atomic_pool(void *start, size_t size);
 void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
 bool dma_free_from_pool(void *start, size_t size);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index ffe78f0b2fe4..838123f79639 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -105,7 +105,16 @@ static int __init early_coherent_pool(char *p)
 }
 early_param("coherent_pool", early_coherent_pool);
 
-int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
+static gfp_t dma_atomic_pool_gfp(void)
+{
+	if (IS_ENABLED(CONFIG_ZONE_DMA))
+		return GFP_DMA;
+	if (IS_ENABLED(CONFIG_ZONE_DMA32))
+		return GFP_DMA32;
+	return GFP_KERNEL;
+}
+
+static int __init dma_atomic_pool_init(void)
 {
 	unsigned int pool_size_order = get_order(atomic_pool_size);
 	unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
@@ -117,7 +126,7 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
 		page = dma_alloc_from_contiguous(NULL, nr_pages,
 						 pool_size_order, false);
 	else
-		page = alloc_pages(gfp, pool_size_order);
+		page = alloc_pages(dma_atomic_pool_gfp(), pool_size_order);
 	if (!page)
 		goto out;
 
@@ -128,7 +137,8 @@ int __init dma_atomic_pool_init(gfp_t gfp, pgprot_t prot)
 		goto free_page;
 
 	addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
-					   prot, __builtin_return_address(0));
+					   pgprot_dmacoherent(PAGE_KERNEL),
+					   __builtin_return_address(0));
 	if (!addr)
 		goto destroy_genpool;
 
@@ -155,6 +165,7 @@ out:
 		atomic_pool_size / 1024);
 	return -ENOMEM;
 }
+postcore_initcall(dma_atomic_pool_init);
 
 bool dma_in_atomic_pool(void *start, size_t size)
 {
-- 
cgit v1.2.3


From 8d1c1560c383004e09c6a39498094671cc664e6b Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 29 Aug 2019 09:43:34 -0600
Subject: blkcg: blk-iocost: predeclare used structs

Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/iocost.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
index ec2217dd57ac..7ecaa65b7106 100644
--- a/include/trace/events/iocost.h
+++ b/include/trace/events/iocost.h
@@ -2,6 +2,10 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM iocost
 
+struct ioc;
+struct ioc_now;
+struct ioc_gq;
+
 #if !defined(_TRACE_BLK_IOCOST_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_BLK_IOCOST_H
 
-- 
cgit v1.2.3


From 7c9eb2dbd770b7c9980d5839dd305a70fbc5df67 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 27 Aug 2019 11:57:07 -0500
Subject: nds32: Mark expected switch fall-throughs

Mark switch cases where we are expecting to fall through.

This patch fixes the following warnings (Building: allmodconfig nds32):

include/math-emu/soft-fp.h:124:8: warning: this statement may fall through [-Wimplicit-fallthrough=]
arch/nds32/kernel/signal.c:362:20: warning: this statement may fall through [-Wimplicit-fallthrough=]
arch/nds32/kernel/signal.c:315:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:417:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:430:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/soft-fp.h:124:8: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:417:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:430:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:310:11: warning: this statement may fall through [-Wimplicit-fallthrough=]
include/math-emu/op-common.h:320:11: warning: this statement may fall through [-Wimplicit-fallthrough=]

Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 arch/nds32/kernel/signal.c   | 2 ++
 include/math-emu/op-common.h | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
index fe61513982b4..330b19fcd990 100644
--- a/arch/nds32/kernel/signal.c
+++ b/arch/nds32/kernel/signal.c
@@ -316,6 +316,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 				regs->uregs[0] = -EINTR;
 				break;
 			}
+			/* Else, fall through */
 		case -ERESTARTNOINTR:
 			regs->uregs[0] = regs->orig_r0;
 			regs->ipc -= 4;
@@ -360,6 +361,7 @@ static void do_signal(struct pt_regs *regs)
 		switch (regs->uregs[0]) {
 		case -ERESTART_RESTARTBLOCK:
 			regs->uregs[15] = __NR_restart_syscall;
+			/* Fall through */
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
diff --git a/include/math-emu/op-common.h b/include/math-emu/op-common.h
index f37d12877754..adcc6a97db61 100644
--- a/include/math-emu/op-common.h
+++ b/include/math-emu/op-common.h
@@ -308,6 +308,7 @@ do {									     \
 									     \
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):			     \
     R##_e = X##_e;							     \
+	  /* Fall through */						     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):			     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):				     \
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):				     \
@@ -318,6 +319,7 @@ do {									     \
 									     \
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):			     \
     R##_e = Y##_e;							     \
+	  /* Fall through */						     \
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):			     \
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):				     \
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):				     \
@@ -415,6 +417,7 @@ do {							\
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):		\
   case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):		\
     R##_s = X##_s;					\
+	/* Fall through */				\
 							\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):		\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
@@ -428,6 +431,7 @@ do {							\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):		\
   case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):		\
     R##_s = Y##_s;					\
+	/* Fall through */				\
 							\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):	\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
@@ -493,6 +497,7 @@ do {							\
 							\
   case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):	\
     FP_SET_EXCEPTION(FP_EX_DIVZERO);			\
+	  /* Fall through */				\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):		\
   case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):	\
     R##_c = FP_CLS_INF;					\
-- 
cgit v1.2.3


From c638984521f19ba218477d5ef9f10f9a6206bab6 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Sun, 4 Aug 2019 16:50:47 +0900
Subject: nvme: add Get LBA Status command opcode

NVMe 1.4 added Get LBA Status command with opcode 0x86.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 01aa6a6c241d..a01277501eae 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -814,6 +814,7 @@ enum nvme_admin_opcode {
 	nvme_admin_security_send	= 0x81,
 	nvme_admin_security_recv	= 0x82,
 	nvme_admin_sanitize_nvm		= 0x84,
+	nvme_admin_get_lba_status	= 0x86,
 };
 
 #define nvme_admin_opcode_name(opcode)	{ opcode, #opcode }
-- 
cgit v1.2.3


From a5ef757204bab6f80268a7437556cb57744ab7d4 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Sun, 4 Aug 2019 16:50:48 +0900
Subject: nvme: trace: support for Get LBA Status opcode parsed

This patch adds Get LBA Status command's opcode to the macro that is
used by the trace feature.  Now we can see "get_lba_status" instead of
the opcode value itself.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/nvme.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a01277501eae..32c25b46ae63 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -841,7 +841,8 @@ enum nvme_admin_opcode {
 		nvme_admin_opcode_name(nvme_admin_format_nvm),		\
 		nvme_admin_opcode_name(nvme_admin_security_send),	\
 		nvme_admin_opcode_name(nvme_admin_security_recv),	\
-		nvme_admin_opcode_name(nvme_admin_sanitize_nvm))
+		nvme_admin_opcode_name(nvme_admin_sanitize_nvm),	\
+		nvme_admin_opcode_name(nvme_admin_get_lba_status))
 
 enum {
 	NVME_QUEUE_PHYS_CONTIG	= (1 << 0),
-- 
cgit v1.2.3


From c1e0cc7e1d319936271dfdd0a9405275c8091381 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 7 Aug 2019 17:51:20 +1000
Subject: nvme-pci: Add support for variable IO SQ element size

The size of a submission queue element should always be 6 (64 bytes)
by spec.

However some controllers such as Apple's are not properly implementing
the standard and require a different size.

This provides the ground work for the subsequent quirks for these
controllers.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 11 ++++++++---
 include/linux/nvme.h    |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a09e6c4e3434..eee93e138c2c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -28,7 +28,7 @@
 #include "trace.h"
 #include "nvme.h"
 
-#define SQ_SIZE(q)	((q)->q_depth * sizeof(struct nvme_command))
+#define SQ_SIZE(q)	((q)->q_depth << (q)->sqes)
 #define CQ_SIZE(q)	((q)->q_depth * sizeof(struct nvme_completion))
 
 #define SGES_PER_PAGE	(PAGE_SIZE / sizeof(struct nvme_sgl_desc))
@@ -100,6 +100,7 @@ struct nvme_dev {
 	unsigned io_queues[HCTX_MAX_TYPES];
 	unsigned int num_vecs;
 	int q_depth;
+	int io_sqes;
 	u32 db_stride;
 	void __iomem *bar;
 	unsigned long bar_mapped_size;
@@ -162,7 +163,7 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
 struct nvme_queue {
 	struct nvme_dev *dev;
 	spinlock_t sq_lock;
-	struct nvme_command *sq_cmds;
+	void *sq_cmds;
 	 /* only used for poll queues: */
 	spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
 	volatile struct nvme_completion *cqes;
@@ -178,6 +179,7 @@ struct nvme_queue {
 	u16 last_cq_head;
 	u16 qid;
 	u8 cq_phase;
+	u8 sqes;
 	unsigned long flags;
 #define NVMEQ_ENABLED		0
 #define NVMEQ_SQ_CMB		1
@@ -488,7 +490,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
 			    bool write_sq)
 {
 	spin_lock(&nvmeq->sq_lock);
-	memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
+	       cmd, sizeof(*cmd));
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
 	nvme_write_sq_db(nvmeq, write_sq);
@@ -1465,6 +1468,7 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
 	if (dev->ctrl.queue_count > qid)
 		return 0;
 
+	nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES;
 	nvmeq->q_depth = depth;
 	nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
 					 &nvmeq->cq_dma_addr, GFP_KERNEL);
@@ -2317,6 +2321,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 	dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
 	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
 	dev->dbs = dev->bar + 4096;
+	dev->io_sqes = NVME_NVM_IOSQES;
 
 	/*
 	 * Temporary fix for the Apple controller found in the MacBook8,1 and
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 32c25b46ae63..f61d6906e59d 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -140,6 +140,7 @@ enum {
  * Submission and Completion Queue Entry Sizes for the NVM command set.
  * (In bytes and specified as a power of two (2^n)).
  */
+#define NVME_ADM_SQES       6
 #define NVME_NVM_IOSQES		6
 #define NVME_NVM_IOCQES		4
 
-- 
cgit v1.2.3


From d78c620a2e824d7b01a6e991208a8aa2c938cabe Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 26 Aug 2019 17:54:54 -0700
Subject: libnvdimm/security: Introduce a 'frozen' attribute

In the process of debugging a system with an NVDIMM that was failing to
unlock it was found that the kernel is reporting 'locked' while the DIMM
security interface is 'frozen'. Unfortunately the security state is
tracked internally as an enum which prevents it from communicating the
difference between 'locked' and 'locked + frozen'. It follows that the
enum also prevents the kernel from communicating 'unlocked + frozen'
which would be useful for debugging why security operations like 'change
passphrase' are disabled.

Ditch the security state enum for a set of flags and introduce a new
sysfs attribute explicitly for the 'frozen' state. The regression risk
is low because the 'frozen' state was already blocked behind the
'locked' state, but will need to revisit if there were cases where
applications need 'frozen' to show up in the primary 'security'
attribute. The expectation is that communicating 'frozen' is mostly a
helper for debug and status monitoring.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Link: https://lore.kernel.org/r/156686729474.184120.5835135644278860826.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/intel.c        | 59 +++++++++++++-----------
 drivers/nvdimm/bus.c             |  2 +-
 drivers/nvdimm/dimm_devs.c       | 59 ++++++++++++++----------
 drivers/nvdimm/nd-core.h         | 21 +++++++--
 drivers/nvdimm/security.c        | 99 +++++++++++++++++++---------------------
 include/linux/libnvdimm.h        |  9 ++--
 tools/testing/nvdimm/dimm_devs.c | 19 ++------
 7 files changed, 141 insertions(+), 127 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c
index cddd0fcf622c..1113b679cd7b 100644
--- a/drivers/acpi/nfit/intel.c
+++ b/drivers/acpi/nfit/intel.c
@@ -7,10 +7,11 @@
 #include "intel.h"
 #include "nfit.h"
 
-static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
+static unsigned long intel_security_flags(struct nvdimm *nvdimm,
 		enum nvdimm_passphrase_type ptype)
 {
 	struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+	unsigned long security_flags = 0;
 	struct {
 		struct nd_cmd_pkg pkg;
 		struct nd_intel_get_security_state cmd;
@@ -27,7 +28,7 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
 	int rc;
 
 	if (!test_bit(NVDIMM_INTEL_GET_SECURITY_STATE, &nfit_mem->dsm_mask))
-		return -ENXIO;
+		return 0;
 
 	/*
 	 * Short circuit the state retrieval while we are doing overwrite.
@@ -35,38 +36,42 @@ static enum nvdimm_security_state intel_security_state(struct nvdimm *nvdimm,
 	 * until the overwrite DSM completes.
 	 */
 	if (nvdimm_in_overwrite(nvdimm) && ptype == NVDIMM_USER)
-		return NVDIMM_SECURITY_OVERWRITE;
+		return BIT(NVDIMM_SECURITY_OVERWRITE);
 
 	rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL);
-	if (rc < 0)
-		return rc;
-	if (nd_cmd.cmd.status)
-		return -EIO;
+	if (rc < 0 || nd_cmd.cmd.status) {
+		pr_err("%s: security state retrieval failed (%d:%#x)\n",
+				nvdimm_name(nvdimm), rc, nd_cmd.cmd.status);
+		return 0;
+	}
 
 	/* check and see if security is enabled and locked */
 	if (ptype == NVDIMM_MASTER) {
 		if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_ENABLED)
-			return NVDIMM_SECURITY_UNLOCKED;
-		else if (nd_cmd.cmd.extended_state &
-				ND_INTEL_SEC_ESTATE_PLIMIT)
-			return NVDIMM_SECURITY_FROZEN;
-	} else {
-		if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED)
-			return -ENXIO;
-		else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) {
-			if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED)
-				return NVDIMM_SECURITY_LOCKED;
-			else if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN
-					|| nd_cmd.cmd.state &
-					ND_INTEL_SEC_STATE_PLIMIT)
-				return NVDIMM_SECURITY_FROZEN;
-			else
-				return NVDIMM_SECURITY_UNLOCKED;
-		}
+			set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+		else
+			set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+		if (nd_cmd.cmd.extended_state & ND_INTEL_SEC_ESTATE_PLIMIT)
+			set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+		return security_flags;
 	}
 
-	/* this should cover master security disabled as well */
-	return NVDIMM_SECURITY_DISABLED;
+	if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_UNSUPPORTED)
+		return 0;
+
+	if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_ENABLED) {
+		if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_FROZEN ||
+		    nd_cmd.cmd.state & ND_INTEL_SEC_STATE_PLIMIT)
+			set_bit(NVDIMM_SECURITY_FROZEN, &security_flags);
+
+		if (nd_cmd.cmd.state & ND_INTEL_SEC_STATE_LOCKED)
+			set_bit(NVDIMM_SECURITY_LOCKED, &security_flags);
+		else
+			set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags);
+	} else
+		set_bit(NVDIMM_SECURITY_DISABLED, &security_flags);
+
+	return security_flags;
 }
 
 static int intel_security_freeze(struct nvdimm *nvdimm)
@@ -371,7 +376,7 @@ static void nvdimm_invalidate_cache(void)
 #endif
 
 static const struct nvdimm_security_ops __intel_security_ops = {
-	.state = intel_security_state,
+	.get_flags = intel_security_flags,
 	.freeze = intel_security_freeze,
 	.change_key = intel_security_change_key,
 	.disable = intel_security_disable,
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 798c5c4aea9c..29479d3b01b0 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -400,7 +400,7 @@ static int child_unregister(struct device *dev, void *data)
 
 		/* We are shutting down. Make state frozen artificially. */
 		nvdimm_bus_lock(dev);
-		nvdimm->sec.state = NVDIMM_SECURITY_FROZEN;
+		set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
 		if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
 			dev_put = true;
 		nvdimm_bus_unlock(dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 29a065e769ea..53330625fe07 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -372,24 +372,27 @@ __weak ssize_t security_show(struct device *dev,
 {
 	struct nvdimm *nvdimm = to_nvdimm(dev);
 
-	switch (nvdimm->sec.state) {
-	case NVDIMM_SECURITY_DISABLED:
+	if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
 		return sprintf(buf, "disabled\n");
-	case NVDIMM_SECURITY_UNLOCKED:
+	if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
 		return sprintf(buf, "unlocked\n");
-	case NVDIMM_SECURITY_LOCKED:
+	if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
 		return sprintf(buf, "locked\n");
-	case NVDIMM_SECURITY_FROZEN:
-		return sprintf(buf, "frozen\n");
-	case NVDIMM_SECURITY_OVERWRITE:
+	if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
 		return sprintf(buf, "overwrite\n");
-	default:
-		return -ENOTTY;
-	}
-
 	return -ENOTTY;
 }
 
+static ssize_t frozen_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+
+	return sprintf(buf, "%d\n", test_bit(NVDIMM_SECURITY_FROZEN,
+				&nvdimm->sec.flags));
+}
+static DEVICE_ATTR_RO(frozen);
+
 #define OPS							\
 	C( OP_FREEZE,		"freeze",		1),	\
 	C( OP_DISABLE,		"disable",		2),	\
@@ -501,6 +504,7 @@ static struct attribute *nvdimm_attributes[] = {
 	&dev_attr_commands.attr,
 	&dev_attr_available_slots.attr,
 	&dev_attr_security.attr,
+	&dev_attr_frozen.attr,
 	NULL,
 };
 
@@ -509,17 +513,24 @@ static umode_t nvdimm_visible(struct kobject *kobj, struct attribute *a, int n)
 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
 	struct nvdimm *nvdimm = to_nvdimm(dev);
 
-	if (a != &dev_attr_security.attr)
+	if (a != &dev_attr_security.attr && a != &dev_attr_frozen.attr)
 		return a->mode;
-	if (nvdimm->sec.state < 0)
+	if (!nvdimm->sec.flags)
 		return 0;
-	/* Are there any state mutation ops? */
-	if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
-			|| nvdimm->sec.ops->change_key
-			|| nvdimm->sec.ops->erase
-			|| nvdimm->sec.ops->overwrite)
+
+	if (a == &dev_attr_security.attr) {
+		/* Are there any state mutation ops (make writable)? */
+		if (nvdimm->sec.ops->freeze || nvdimm->sec.ops->disable
+				|| nvdimm->sec.ops->change_key
+				|| nvdimm->sec.ops->erase
+				|| nvdimm->sec.ops->overwrite)
+			return a->mode;
+		return 0444;
+	}
+
+	if (nvdimm->sec.ops->freeze)
 		return a->mode;
-	return 0444;
+	return 0;
 }
 
 struct attribute_group nvdimm_attribute_group = {
@@ -569,8 +580,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
 	 * attribute visibility.
 	 */
 	/* get security state and extended (master) state */
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-	nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+	nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
 	nd_device_register(dev);
 
 	return nvdimm;
@@ -588,7 +599,7 @@ int nvdimm_security_setup_events(struct device *dev)
 {
 	struct nvdimm *nvdimm = to_nvdimm(dev);
 
-	if (nvdimm->sec.state < 0 || !nvdimm->sec.ops
+	if (!nvdimm->sec.flags || !nvdimm->sec.ops
 			|| !nvdimm->sec.ops->overwrite)
 		return 0;
 	nvdimm->sec.overwrite_state = sysfs_get_dirent(dev->kobj.sd, "security");
@@ -614,7 +625,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->freeze)
 		return -EOPNOTSUPP;
 
-	if (nvdimm->sec.state < 0)
+	if (!nvdimm->sec.flags)
 		return -EIO;
 
 	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
@@ -623,7 +634,7 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
 	}
 
 	rc = nvdimm->sec.ops->freeze(nvdimm);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 
 	return rc;
 }
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 0ac52b6eb00e..da2bbfd56d9f 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -39,21 +39,32 @@ struct nvdimm {
 	const char *dimm_id;
 	struct {
 		const struct nvdimm_security_ops *ops;
-		enum nvdimm_security_state state;
-		enum nvdimm_security_state ext_state;
+		unsigned long flags;
+		unsigned long ext_flags;
 		unsigned int overwrite_tmo;
 		struct kernfs_node *overwrite_state;
 	} sec;
 	struct delayed_work dwork;
 };
 
-static inline enum nvdimm_security_state nvdimm_security_state(
+static inline unsigned long nvdimm_security_flags(
 		struct nvdimm *nvdimm, enum nvdimm_passphrase_type ptype)
 {
+	u64 flags;
+	const u64 state_flags = 1UL << NVDIMM_SECURITY_DISABLED
+		| 1UL << NVDIMM_SECURITY_LOCKED
+		| 1UL << NVDIMM_SECURITY_UNLOCKED
+		| 1UL << NVDIMM_SECURITY_OVERWRITE;
+
 	if (!nvdimm->sec.ops)
-		return -ENXIO;
+		return 0;
 
-	return nvdimm->sec.ops->state(nvdimm, ptype);
+	flags = nvdimm->sec.ops->get_flags(nvdimm, ptype);
+	/* disabled, locked, unlocked, and overwrite are mutually exclusive */
+	dev_WARN_ONCE(&nvdimm->dev, hweight64(flags & state_flags) > 1,
+			"reported invalid security state: %#llx\n",
+			(unsigned long long) flags);
+	return flags;
 }
 int nvdimm_security_freeze(struct nvdimm *nvdimm);
 #if IS_ENABLED(CONFIG_NVDIMM_KEYS)
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index a570f2263a42..5862d0eee9db 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -158,7 +158,7 @@ static int nvdimm_key_revalidate(struct nvdimm *nvdimm)
 	}
 
 	nvdimm_put_key(key);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 	return 0;
 }
 
@@ -174,7 +174,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->unlock
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return -EIO;
 
 	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
@@ -189,7 +189,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
 	 * freeze of the security configuration. I.e. if the OS does not
 	 * have the key, security is being managed pre-OS.
 	 */
-	if (nvdimm->sec.state == NVDIMM_SECURITY_UNLOCKED) {
+	if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) {
 		if (!key_revalidate)
 			return 0;
 
@@ -202,7 +202,7 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm)
 			rc == 0 ? "success" : "fail");
 
 	nvdimm_put_key(key);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 	return rc;
 }
 
@@ -217,6 +217,24 @@ int nvdimm_security_unlock(struct device *dev)
 	return rc;
 }
 
+static int check_security_state(struct nvdimm *nvdimm)
+{
+	struct device *dev = &nvdimm->dev;
+
+	if (test_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags)) {
+		dev_dbg(dev, "Incorrect security state: %#lx\n",
+				nvdimm->sec.flags);
+		return -EIO;
+	}
+
+	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
+		dev_dbg(dev, "Security operation in progress.\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 {
 	struct device *dev = &nvdimm->dev;
@@ -229,19 +247,12 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return -EOPNOTSUPP;
 
-	if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-		dev_dbg(dev, "Incorrect security state: %d\n",
-				nvdimm->sec.state);
-		return -EIO;
-	}
-
-	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-		dev_dbg(dev, "Security operation in progress.\n");
-		return -EBUSY;
-	}
+	rc = check_security_state(nvdimm);
+	if (rc)
+		return rc;
 
 	data = nvdimm_get_user_key_payload(nvdimm, keyid,
 			NVDIMM_BASE_KEY, &key);
@@ -253,7 +264,7 @@ int nvdimm_security_disable(struct nvdimm *nvdimm, unsigned int keyid)
 			rc == 0 ? "success" : "fail");
 
 	nvdimm_put_key(key);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 	return rc;
 }
 
@@ -271,14 +282,12 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->change_key
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return -EOPNOTSUPP;
 
-	if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-		dev_dbg(dev, "Incorrect security state: %d\n",
-				nvdimm->sec.state);
-		return -EIO;
-	}
+	rc = check_security_state(nvdimm);
+	if (rc)
+		return rc;
 
 	data = nvdimm_get_user_key_payload(nvdimm, keyid,
 			NVDIMM_BASE_KEY, &key);
@@ -301,10 +310,10 @@ int nvdimm_security_update(struct nvdimm *nvdimm, unsigned int keyid,
 	nvdimm_put_key(newkey);
 	nvdimm_put_key(key);
 	if (pass_type == NVDIMM_MASTER)
-		nvdimm->sec.ext_state = nvdimm_security_state(nvdimm,
+		nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm,
 				NVDIMM_MASTER);
 	else
-		nvdimm->sec.state = nvdimm_security_state(nvdimm,
+		nvdimm->sec.flags = nvdimm_security_flags(nvdimm,
 				NVDIMM_USER);
 	return rc;
 }
@@ -322,7 +331,7 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->erase
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return -EOPNOTSUPP;
 
 	if (atomic_read(&nvdimm->busy)) {
@@ -330,18 +339,11 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 		return -EBUSY;
 	}
 
-	if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-		dev_dbg(dev, "Incorrect security state: %d\n",
-				nvdimm->sec.state);
-		return -EIO;
-	}
-
-	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-		dev_dbg(dev, "Security operation in progress.\n");
-		return -EBUSY;
-	}
+	rc = check_security_state(nvdimm);
+	if (rc)
+		return rc;
 
-	if (nvdimm->sec.ext_state != NVDIMM_SECURITY_UNLOCKED
+	if (!test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.ext_flags)
 			&& pass_type == NVDIMM_MASTER) {
 		dev_dbg(dev,
 			"Attempt to secure erase in wrong master state.\n");
@@ -359,7 +361,7 @@ int nvdimm_security_erase(struct nvdimm *nvdimm, unsigned int keyid,
 			rc == 0 ? "success" : "fail");
 
 	nvdimm_put_key(key);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 	return rc;
 }
 
@@ -375,7 +377,7 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 	lockdep_assert_held(&nvdimm_bus->reconfig_mutex);
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->overwrite
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return -EOPNOTSUPP;
 
 	if (atomic_read(&nvdimm->busy)) {
@@ -388,16 +390,9 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 		return -EINVAL;
 	}
 
-	if (nvdimm->sec.state >= NVDIMM_SECURITY_FROZEN) {
-		dev_dbg(dev, "Incorrect security state: %d\n",
-				nvdimm->sec.state);
-		return -EIO;
-	}
-
-	if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
-		dev_dbg(dev, "Security operation in progress.\n");
-		return -EBUSY;
-	}
+	rc = check_security_state(nvdimm);
+	if (rc)
+		return rc;
 
 	data = nvdimm_get_user_key_payload(nvdimm, keyid,
 			NVDIMM_BASE_KEY, &key);
@@ -412,7 +407,7 @@ int nvdimm_security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
 	if (rc == 0) {
 		set_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
 		set_bit(NDD_WORK_PENDING, &nvdimm->flags);
-		nvdimm->sec.state = NVDIMM_SECURITY_OVERWRITE;
+		set_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags);
 		/*
 		 * Make sure we don't lose device while doing overwrite
 		 * query.
@@ -443,7 +438,7 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
 	tmo = nvdimm->sec.overwrite_tmo;
 
 	if (!nvdimm->sec.ops || !nvdimm->sec.ops->query_overwrite
-			|| nvdimm->sec.state < 0)
+			|| !nvdimm->sec.flags)
 		return;
 
 	rc = nvdimm->sec.ops->query_overwrite(nvdimm);
@@ -467,8 +462,8 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
 	clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
 	clear_bit(NDD_WORK_PENDING, &nvdimm->flags);
 	put_device(&nvdimm->dev);
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-	nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
 }
 
 void nvdimm_security_overwrite_query(struct work_struct *work)
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 7a64b3ddb408..b6eddf912568 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -160,8 +160,11 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 
 }
 
-enum nvdimm_security_state {
-	NVDIMM_SECURITY_ERROR = -1,
+/*
+ * Note that separate bits for locked + unlocked are defined so that
+ * 'flags == 0' corresponds to an error / not-supported state.
+ */
+enum nvdimm_security_bits {
 	NVDIMM_SECURITY_DISABLED,
 	NVDIMM_SECURITY_UNLOCKED,
 	NVDIMM_SECURITY_LOCKED,
@@ -182,7 +185,7 @@ enum nvdimm_passphrase_type {
 };
 
 struct nvdimm_security_ops {
-	enum nvdimm_security_state (*state)(struct nvdimm *nvdimm,
+	unsigned long (*get_flags)(struct nvdimm *nvdimm,
 			enum nvdimm_passphrase_type pass_type);
 	int (*freeze)(struct nvdimm *nvdimm);
 	int (*change_key)(struct nvdimm *nvdimm,
diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c
index 2d4baf57822f..57bd27dedf1f 100644
--- a/tools/testing/nvdimm/dimm_devs.c
+++ b/tools/testing/nvdimm/dimm_devs.c
@@ -18,24 +18,13 @@ ssize_t security_show(struct device *dev,
 	 * For the test version we need to poll the "hardware" in order
 	 * to get the updated status for unlock testing.
 	 */
-	nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER);
-	nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER);
+	nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
 
-	switch (nvdimm->sec.state) {
-	case NVDIMM_SECURITY_DISABLED:
+	if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
 		return sprintf(buf, "disabled\n");
-	case NVDIMM_SECURITY_UNLOCKED:
+	if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
 		return sprintf(buf, "unlocked\n");
-	case NVDIMM_SECURITY_LOCKED:
+	if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
 		return sprintf(buf, "locked\n");
-	case NVDIMM_SECURITY_FROZEN:
-		return sprintf(buf, "frozen\n");
-	case NVDIMM_SECURITY_OVERWRITE:
-		return sprintf(buf, "overwrite\n");
-	default:
-		return -ENOTTY;
-	}
-
 	return -ENOTTY;
 }
-
-- 
cgit v1.2.3


From bd9eccf14008732f8ea4a7efc7839911e56053a1 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Fri, 23 Aug 2019 11:04:14 +0200
Subject: dt-bindings: power: add Amlogic Everything-Else power domains
 bindings

Add the bindings for the Amlogic Everything-Else power domains,
controlling the Everything-Else peripherals power domains.

The bindings targets the Amlogic G12A and SM1 compatible SoCs,
support for earlier SoCs will be added later.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 .../bindings/power/amlogic,meson-ee-pwrc.yaml      | 93 ++++++++++++++++++++++
 include/dt-bindings/power/meson-g12a-power.h       | 13 +++
 include/dt-bindings/power/meson-sm1-power.h        | 18 +++++
 3 files changed, 124 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
 create mode 100644 include/dt-bindings/power/meson-g12a-power.h
 create mode 100644 include/dt-bindings/power/meson-sm1-power.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
new file mode 100644
index 000000000000..aab70e8b681e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/amlogic,meson-ee-pwrc.yaml
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 BayLibre, SAS
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/power/amlogic,meson-ee-pwrc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Amlogic Meson Everything-Else Power Domains
+
+maintainers:
+  - Neil Armstrong <narmstrong@baylibre.com>
+
+description: |+
+  The Everything-Else Power Domains node should be the child of a syscon
+  node with the required property:
+
+  - compatible: Should be the following:
+                "amlogic,meson-gx-hhi-sysctrl", "simple-mfd", "syscon"
+
+  Refer to the the bindings described in
+  Documentation/devicetree/bindings/mfd/syscon.txt
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson-g12a-pwrc
+      - amlogic,meson-sm1-pwrc
+
+  clocks:
+    minItems: 2
+
+  clock-names:
+    items:
+      - const: vpu
+      - const: vapb
+
+  resets:
+    minItems: 11
+
+  reset-names:
+    items:
+      - const: viu
+      - const: venc
+      - const: vcbus
+      - const: bt656
+      - const: rdma
+      - const: venci
+      - const: vencp
+      - const: vdac
+      - const: vdi6
+      - const: vencl
+      - const: vid_lock
+
+  "#power-domain-cells":
+    const: 1
+
+  amlogic,ao-sysctrl:
+    description: phandle to the AO sysctrl node
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/phandle
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - "#power-domain-cells"
+  - amlogic,ao-sysctrl
+
+examples:
+  - |
+    pwrc: power-controller {
+          compatible = "amlogic,meson-sm1-pwrc";
+          #power-domain-cells = <1>;
+          amlogic,ao-sysctrl = <&rti>;
+          resets = <&reset_viu>,
+                   <&reset_venc>,
+                   <&reset_vcbus>,
+                   <&reset_bt656>,
+                   <&reset_rdma>,
+                   <&reset_venci>,
+                   <&reset_vencp>,
+                   <&reset_vdac>,
+                   <&reset_vdi6>,
+                   <&reset_vencl>,
+                   <&reset_vid_lock>;
+          reset-names = "viu", "venc", "vcbus", "bt656",
+                        "rdma", "venci", "vencp", "vdac",
+                        "vdi6", "vencl", "vid_lock";
+          clocks = <&clk_vpu>, <&clk_vapb>;
+          clock-names = "vpu", "vapb";
+    };
diff --git a/include/dt-bindings/power/meson-g12a-power.h b/include/dt-bindings/power/meson-g12a-power.h
new file mode 100644
index 000000000000..bb5e67a842de
--- /dev/null
+++ b/include/dt-bindings/power/meson-g12a-power.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ */
+
+#ifndef _DT_BINDINGS_MESON_G12A_POWER_H
+#define _DT_BINDINGS_MESON_G12A_POWER_H
+
+#define PWRC_G12A_VPU_ID		0
+#define PWRC_G12A_ETH_ID		1
+
+#endif
diff --git a/include/dt-bindings/power/meson-sm1-power.h b/include/dt-bindings/power/meson-sm1-power.h
new file mode 100644
index 000000000000..a020ab00c134
--- /dev/null
+++ b/include/dt-bindings/power/meson-sm1-power.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (GPL-2.0+ or MIT) */
+/*
+ * Copyright (c) 2019 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ */
+
+#ifndef _DT_BINDINGS_MESON_SM1_POWER_H
+#define _DT_BINDINGS_MESON_SM1_POWER_H
+
+#define PWRC_SM1_VPU_ID		0
+#define PWRC_SM1_NNA_ID		1
+#define PWRC_SM1_USB_ID		2
+#define PWRC_SM1_PCIE_ID	3
+#define PWRC_SM1_GE2D_ID	4
+#define PWRC_SM1_AUDIO_ID	5
+#define PWRC_SM1_ETH_ID		6
+
+#endif
-- 
cgit v1.2.3


From ec8ca8a3489cc87ca43a865af7e7393690841810 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 30 Aug 2019 13:56:10 +1000
Subject: usb: gadget: net2280: Move all "ll" registers in one structure

The split into multiple structures of the "ll" register bank is
impractical. It makes it hard to add ll_lfps_timers_2 which is
at offset 0x794, which is outside of the existing "lfps" structure
and would require us to add yet another one.

Instead, move all the "ll" registers into a single usb338x_ll_regs
structure, and add ll_lfps_timers_2 while at it. It will be used
in a subsequent patch.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/net2280.c | 28 +++++++++++-----------------
 drivers/usb/gadget/udc/net2280.h |  3 ---
 include/linux/usb/usb338x.h      | 35 +++++++++++++++++++++++++----------
 3 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index b6bbe2e448ba..e0191146ba22 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -2244,30 +2244,30 @@ static void usb_reinit_338x(struct net2280 *dev)
 	}
 
 	/* Hardware Defect and Workaround */
-	val = readl(&dev->ll_lfps_regs->ll_lfps_5);
+	val = readl(&dev->llregs->ll_lfps_5);
 	val &= ~(0xf << TIMER_LFPS_6US);
 	val |= 0x5 << TIMER_LFPS_6US;
-	writel(val, &dev->ll_lfps_regs->ll_lfps_5);
+	writel(val, &dev->llregs->ll_lfps_5);
 
-	val = readl(&dev->ll_lfps_regs->ll_lfps_6);
+	val = readl(&dev->llregs->ll_lfps_6);
 	val &= ~(0xffff << TIMER_LFPS_80US);
 	val |= 0x0100 << TIMER_LFPS_80US;
-	writel(val, &dev->ll_lfps_regs->ll_lfps_6);
+	writel(val, &dev->llregs->ll_lfps_6);
 
 	/*
 	 * AA_AB Errata. Issue 4. Workaround for SuperSpeed USB
 	 * Hot Reset Exit Handshake may Fail in Specific Case using
 	 * Default Register Settings. Workaround for Enumeration test.
 	 */
-	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_2);
+	val = readl(&dev->llregs->ll_tsn_counters_2);
 	val &= ~(0x1f << HOT_TX_NORESET_TS2);
 	val |= 0x10 << HOT_TX_NORESET_TS2;
-	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_2);
+	writel(val, &dev->llregs->ll_tsn_counters_2);
 
-	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_3);
+	val = readl(&dev->llregs->ll_tsn_counters_3);
 	val &= ~(0x1f << HOT_RX_RESET_TS2);
 	val |= 0x3 << HOT_RX_RESET_TS2;
-	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_3);
+	writel(val, &dev->llregs->ll_tsn_counters_3);
 
 	/*
 	 * Set Recovery Idle to Recover bit:
@@ -2276,10 +2276,10 @@ static void usb_reinit_338x(struct net2280 *dev)
 	 * - It is safe to set for all connection speeds; all chip revisions.
 	 * - R-M-W to leave other bits undisturbed.
 	 * - Reference PLX TT-7372
-	 */
-	val = readl(&dev->ll_chicken_reg->ll_tsn_chicken_bit);
+	*/
+	val = readl(&dev->llregs->ll_tsn_chicken_bit);
 	val |= BIT(RECOVERY_IDLE_TO_RECOVER_FMW);
-	writel(val, &dev->ll_chicken_reg->ll_tsn_chicken_bit);
+	writel(val, &dev->llregs->ll_tsn_chicken_bit);
 
 	INIT_LIST_HEAD(&dev->gadget.ep0->ep_list);
 
@@ -3669,12 +3669,6 @@ static int net2280_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 							(base + 0x00b4);
 		dev->llregs = (struct usb338x_ll_regs __iomem *)
 							(base + 0x0700);
-		dev->ll_lfps_regs = (struct usb338x_ll_lfps_regs __iomem *)
-							(base + 0x0748);
-		dev->ll_tsn_regs = (struct usb338x_ll_tsn_regs __iomem *)
-							(base + 0x077c);
-		dev->ll_chicken_reg = (struct usb338x_ll_chi_regs __iomem *)
-							(base + 0x079c);
 		dev->plregs = (struct usb338x_pl_regs __iomem *)
 							(base + 0x0800);
 		usbstat = readl(&dev->usb->usbstat);
diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h
index b65a797544d7..85d3ca1698ba 100644
--- a/drivers/usb/gadget/udc/net2280.h
+++ b/drivers/usb/gadget/udc/net2280.h
@@ -178,9 +178,6 @@ struct net2280 {
 	struct net2280_dep_regs		__iomem *dep;
 	struct net2280_ep_regs		__iomem *epregs;
 	struct usb338x_ll_regs		__iomem *llregs;
-	struct usb338x_ll_lfps_regs	__iomem *ll_lfps_regs;
-	struct usb338x_ll_tsn_regs	__iomem *ll_tsn_regs;
-	struct usb338x_ll_chi_regs	__iomem *ll_chicken_reg;
 	struct usb338x_pl_regs		__iomem *plregs;
 
 	struct dma_pool			*requests;
diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h
index 7189e3387bf9..20020c1336d5 100644
--- a/include/linux/usb/usb338x.h
+++ b/include/linux/usb/usb338x.h
@@ -113,7 +113,10 @@ struct usb338x_ll_regs {
 	u32   ll_ltssm_ctrl1;
 	u32   ll_ltssm_ctrl2;
 	u32   ll_ltssm_ctrl3;
-	u32   unused[2];
+	u32   unused1;
+
+	/* 0x710 */
+	u32   unused2;
 	u32   ll_general_ctrl0;
 	u32   ll_general_ctrl1;
 #define     PM_U3_AUTO_EXIT                                     29
@@ -136,29 +139,41 @@ struct usb338x_ll_regs {
 	u32   ll_general_ctrl2;
 #define     SELECT_INVERT_LANE_POLARITY                         7
 #define     FORCE_INVERT_LANE_POLARITY                          6
+
+	/* 0x720 */
 	u32   ll_general_ctrl3;
 	u32   ll_general_ctrl4;
 	u32   ll_error_gen;
-} __packed;
+	u32   unused3;
+
+	/* 0x730 */
+	u32   unused4[4];
 
-struct usb338x_ll_lfps_regs {
-	/* offset 0x748 */
+	/* 0x740 */
+	u32   unused5[2];
 	u32   ll_lfps_5;
 #define     TIMER_LFPS_6US                                      16
 	u32   ll_lfps_6;
 #define     TIMER_LFPS_80US                                     0
-} __packed;
 
-struct usb338x_ll_tsn_regs {
-	/* offset 0x77C */
+	/* 0x750 */
+	u32   unused6[8];
+
+	/* 0x770 */
+	u32   unused7[3];
 	u32   ll_tsn_counters_2;
 #define     HOT_TX_NORESET_TS2                                  24
+
+	/* 0x780 */
 	u32   ll_tsn_counters_3;
 #define     HOT_RX_RESET_TS2                                    0
-} __packed;
+	u32   unused8[3];
 
-struct usb338x_ll_chi_regs {
-	/* offset 0x79C */
+	/* 0x790 */
+	u32   unused9;
+	u32   ll_lfps_timers_2;
+#define     LFPS_TIMERS_2_WORKAROUND_VALUE			0x084d
+	u32   unused10;
 	u32   ll_tsn_chicken_bit;
 #define     RECOVERY_IDLE_TO_RECOVER_FMW                        3
 } __packed;
-- 
cgit v1.2.3


From c8cd6e7f159e6f8d79a23df4aeaa7a540415951b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 28 Aug 2019 12:20:42 +0200
Subject: cfg80211: add local BSS receive time to survey information

This is useful for checking how much airtime is being used up by other
transmissions on the channel, e.g. by calculating (time_rx - time_bss_rx)
or (time_busy - time_bss_rx - time_tx)

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Link: https://lore.kernel.org/r/20190828102042.58016-1-nbd@nbd.name
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 4 ++++
 3 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5253e7f667bd..ff45c3e1abff 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -768,6 +768,7 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  * @SURVEY_INFO_TIME_RX: receive time was filled in
  * @SURVEY_INFO_TIME_TX: transmit time was filled in
  * @SURVEY_INFO_TIME_SCAN: scan time was filled in
+ * @SURVEY_INFO_TIME_BSS_RX: local BSS receive time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
@@ -781,6 +782,7 @@ enum survey_info_flags {
 	SURVEY_INFO_TIME_RX		= BIT(5),
 	SURVEY_INFO_TIME_TX		= BIT(6),
 	SURVEY_INFO_TIME_SCAN		= BIT(7),
+	SURVEY_INFO_TIME_BSS_RX		= BIT(8),
 };
 
 /**
@@ -797,6 +799,7 @@ enum survey_info_flags {
  * @time_rx: amount of time the radio spent receiving data
  * @time_tx: amount of time the radio spent transmitting data
  * @time_scan: amount of time the radio spent for scanning
+ * @time_bss_rx: amount of time the radio spent receiving data on a local BSS
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -811,6 +814,7 @@ struct survey_info {
 	u64 time_rx;
 	u64 time_tx;
 	u64 time_scan;
+	u64 time_bss_rx;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index bf7c4222f512..beee59c831a7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3870,6 +3870,8 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
  *	(on this channel or globally)
  * @NL80211_SURVEY_INFO_PAD: attribute used for padding for 64-bit alignment
+ * @NL80211_SURVEY_INFO_TIME_BSS_RX: amount of time the radio spent
+ *	receiving frames destined to the local BSS
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -3886,6 +3888,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_TX,
 	NL80211_SURVEY_INFO_TIME_SCAN,
 	NL80211_SURVEY_INFO_PAD,
+	NL80211_SURVEY_INFO_TIME_BSS_RX,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8a6cef949210..3e30e18d1d89 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8806,6 +8806,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN,
 			      survey->time_scan, NL80211_SURVEY_INFO_PAD))
 		goto nla_put_failure;
+	if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) &&
+	    nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX,
+			      survey->time_bss_rx, NL80211_SURVEY_INFO_PAD))
+		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
 
-- 
cgit v1.2.3


From 3a8e9ac89e6a5106cfb6b85d4c9cf9bfa3519bc7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 29 Aug 2019 15:47:19 -0700
Subject: writeback: add tracepoints for cgroup foreign writebacks

cgroup foreign inode handling has quite a bit of heuristics and
internal states which sometimes makes it difficult to understand
what's going on.  Add tracepoints to improve visibility.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c                |   5 ++
 include/trace/events/writeback.h | 123 +++++++++++++++++++++++++++++++++++++++
 mm/memcontrol.c                  |   5 ++
 3 files changed, 133 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 658dc16c9e6d..8aaa7eec7b74 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -389,6 +389,8 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
 	if (unlikely(inode->i_state & I_FREEING))
 		goto skip_switch;
 
+	trace_inode_switch_wbs(inode, old_wb, new_wb);
+
 	/*
 	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
 	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
@@ -673,6 +675,9 @@ void wbc_detach_inode(struct writeback_control *wbc)
 		if (wbc->wb_id != max_id)
 			history |= (1U << slots) - 1;
 
+		if (history)
+			trace_inode_foreign_history(inode, wbc, history);
+
 		/*
 		 * Switch if the current wb isn't the consistent winner.
 		 * If there are multiple closely competing dirtiers, the
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index aa7f3aeac740..3dc9fb9e7c78 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -176,6 +176,129 @@ static inline unsigned int __trace_wbc_assign_cgroup(struct writeback_control *w
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 #endif	/* CREATE_TRACE_POINTS */
 
+#ifdef CONFIG_CGROUP_WRITEBACK
+TRACE_EVENT(inode_foreign_history,
+
+	TP_PROTO(struct inode *inode, struct writeback_control *wbc,
+		 unsigned int history),
+
+	TP_ARGS(inode, wbc, history),
+
+	TP_STRUCT__entry(
+		__array(char,		name, 32)
+		__field(unsigned long,	ino)
+		__field(unsigned int,	cgroup_ino)
+		__field(unsigned int,	history)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(inode_to_bdi(inode)->dev), 32);
+		__entry->ino		= inode->i_ino;
+		__entry->cgroup_ino	= __trace_wbc_assign_cgroup(wbc);
+		__entry->history	= history;
+	),
+
+	TP_printk("bdi %s: ino=%lu cgroup_ino=%u history=0x%x",
+		__entry->name,
+		__entry->ino,
+		__entry->cgroup_ino,
+		__entry->history
+	)
+);
+
+TRACE_EVENT(inode_switch_wbs,
+
+	TP_PROTO(struct inode *inode, struct bdi_writeback *old_wb,
+		 struct bdi_writeback *new_wb),
+
+	TP_ARGS(inode, old_wb, new_wb),
+
+	TP_STRUCT__entry(
+		__array(char,		name, 32)
+		__field(unsigned long,	ino)
+		__field(unsigned int,	old_cgroup_ino)
+		__field(unsigned int,	new_cgroup_ino)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name,	dev_name(old_wb->bdi->dev), 32);
+		__entry->ino		= inode->i_ino;
+		__entry->old_cgroup_ino	= __trace_wb_assign_cgroup(old_wb);
+		__entry->new_cgroup_ino	= __trace_wb_assign_cgroup(new_wb);
+	),
+
+	TP_printk("bdi %s: ino=%lu old_cgroup_ino=%u new_cgroup_ino=%u",
+		__entry->name,
+		__entry->ino,
+		__entry->old_cgroup_ino,
+		__entry->new_cgroup_ino
+	)
+);
+
+TRACE_EVENT(track_foreign_dirty,
+
+	TP_PROTO(struct page *page, struct bdi_writeback *wb),
+
+	TP_ARGS(page, wb),
+
+	TP_STRUCT__entry(
+		__array(char,		name, 32)
+		__field(u64,		bdi_id)
+		__field(unsigned long,	ino)
+		__field(unsigned int,	memcg_id)
+		__field(unsigned int,	cgroup_ino)
+		__field(unsigned int,	page_cgroup_ino)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name,	dev_name(wb->bdi->dev), 32);
+		__entry->bdi_id		= wb->bdi->id;
+		__entry->ino		= page->mapping->host->i_ino;
+		__entry->memcg_id	= wb->memcg_css->id;
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
+		__entry->page_cgroup_ino = page->mem_cgroup->css.cgroup->kn->id.ino;
+	),
+
+	TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%u page_cgroup_ino=%u",
+		__entry->name,
+		__entry->bdi_id,
+		__entry->ino,
+		__entry->memcg_id,
+		__entry->cgroup_ino,
+		__entry->page_cgroup_ino
+	)
+);
+
+TRACE_EVENT(flush_foreign,
+
+	TP_PROTO(struct bdi_writeback *wb, unsigned int frn_bdi_id,
+		 unsigned int frn_memcg_id),
+
+	TP_ARGS(wb, frn_bdi_id, frn_memcg_id),
+
+	TP_STRUCT__entry(
+		__array(char,		name, 32)
+		__field(unsigned int,	cgroup_ino)
+		__field(unsigned int,	frn_bdi_id)
+		__field(unsigned int,	frn_memcg_id)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name,	dev_name(wb->bdi->dev), 32);
+		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
+		__entry->frn_bdi_id	= frn_bdi_id;
+		__entry->frn_memcg_id	= frn_memcg_id;
+	),
+
+	TP_printk("bdi %s: cgroup_ino=%u frn_bdi_id=%u frn_memcg_id=%u",
+		__entry->name,
+		__entry->cgroup_ino,
+		__entry->frn_bdi_id,
+		__entry->frn_memcg_id
+	)
+);
+#endif
+
 DECLARE_EVENT_CLASS(writeback_write_inode_template,
 
 	TP_PROTO(struct inode *inode, struct writeback_control *wbc),
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 89b65f5ca634..4390994e8be9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4066,6 +4066,8 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
 
 #ifdef CONFIG_CGROUP_WRITEBACK
 
+#include <trace/events/writeback.h>
+
 static int memcg_wb_domain_init(struct mem_cgroup *memcg, gfp_t gfp)
 {
 	return wb_domain_init(&memcg->cgwb_domain, gfp);
@@ -4203,6 +4205,8 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
 	int oldest = -1;
 	int i;
 
+	trace_track_foreign_dirty(page, wb);
+
 	/*
 	 * Pick the slot to use.  If there is already a slot for @wb, keep
 	 * using it.  If not replace the oldest one which isn't being
@@ -4263,6 +4267,7 @@ void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
 		if (time_after64(frn->at, now - intv) &&
 		    atomic_read(&frn->done.cnt) == 1) {
 			frn->at = 0;
+			trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
 			cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
 					       WB_REASON_FOREIGN_FLUSH,
 					       &frn->done);
-- 
cgit v1.2.3


From 29746d012588f1de8517fc6921683c3844120989 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Sat, 24 Aug 2019 11:01:46 +0800
Subject: dt-bindings: mediatek: Add binding for mt8183 IOMMU and SMI

This patch adds decriptions for mt8183 IOMMU and SMI.

mt8183 has only one M4U like mt8173 and is also MTK IOMMU gen2 which
uses ARM Short-Descriptor translation table format.

The mt8183 M4U-SMI HW diagram is as below:

                          EMI
                           |
                          M4U
                           |
                       ----------
                       |        |
                   gals0-rx   gals1-rx
                       |        |
                       |        |
                   gals0-tx   gals1-tx
                       |        |
                      ------------
                       SMI Common
                      ------------
                           |
  +-----+-----+--------+-----+-----+-------+-------+
  |     |     |        |     |     |       |       |
  |     |  gals-rx  gals-rx  |   gals-rx gals-rx gals-rx
  |     |     |        |     |     |       |       |
  |     |     |        |     |     |       |       |
  |     |  gals-tx  gals-tx  |   gals-tx gals-tx gals-tx
  |     |     |        |     |     |       |       |
larb0 larb1  IPU0    IPU1  larb4  larb5  larb6    CCU
disp  vdec   img     cam    venc   img    cam

All the connections are HW fixed, SW can NOT adjust it.

Compared with mt8173, we add a GALS(Global Async Local Sync) module
between SMI-common and M4U, and additional GALS between larb2/3/5/6
and SMI-common. GALS can help synchronize for the modules in different
clock frequency, it can be seen as a "asynchronous fifo".

GALS can only help transfer the command/data while it doesn't have
the configuring register, thus it has the special "smi" clock and it
doesn't have the "apb" clock. From the diagram above, we add "gals0"
and "gals1" clocks for smi-common and add a "gals" clock for smi-larb.

>From the diagram above, IPU0/IPU1(Image Processor Unit) and CCU(Camera
Control Unit) is connected with smi-common directly, we can take them
as "larb2", "larb3" and "larb7", and their register spaces are
different with the normal larb.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../devicetree/bindings/iommu/mediatek,iommu.txt   |  30 ++++-
 .../memory-controllers/mediatek,smi-common.txt     |  12 +-
 .../memory-controllers/mediatek,smi-larb.txt       |   4 +
 include/dt-bindings/memory/mt8183-larb-port.h      | 130 +++++++++++++++++++++
 4 files changed, 170 insertions(+), 6 deletions(-)
 create mode 100644 include/dt-bindings/memory/mt8183-larb-port.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
index 6922db598def..ce59a505f5a4 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.txt
@@ -11,10 +11,23 @@ ARM Short-Descriptor translation table format for address translation.
                |
               m4u (Multimedia Memory Management Unit)
                |
+          +--------+
+          |        |
+      gals0-rx   gals1-rx    (Global Async Local Sync rx)
+          |        |
+          |        |
+      gals0-tx   gals1-tx    (Global Async Local Sync tx)
+          |        |          Some SoCs may have GALS.
+          +--------+
+               |
            SMI Common(Smart Multimedia Interface Common)
                |
        +----------------+-------
        |                |
+       |             gals-rx        There may be GALS in some larbs.
+       |                |
+       |                |
+       |             gals-tx
        |                |
    SMI larb0        SMI larb1   ... SoCs have several SMI local arbiter(larb).
    (display)         (vdec)
@@ -36,6 +49,10 @@ each local arbiter.
 like display, video decode, and camera. And there are different ports
 in each larb. Take a example, There are many ports like MC, PP, VLD in the
 video decode local arbiter, all these ports are according to the video HW.
+  In some SoCs, there may be a GALS(Global Async Local Sync) module between
+smi-common and m4u, and additional GALS module between smi-larb and
+smi-common. GALS can been seen as a "asynchronous fifo" which could help
+synchronize for the modules in different clock frequency.
 
 Required properties:
 - compatible : must be one of the following string:
@@ -44,18 +61,25 @@ Required properties:
 	"mediatek,mt7623-m4u", "mediatek,mt2701-m4u" for mt7623 which uses
 						     generation one m4u HW.
 	"mediatek,mt8173-m4u" for mt8173 which uses generation two m4u HW.
+	"mediatek,mt8183-m4u" for mt8183 which uses generation two m4u HW.
 - reg : m4u register base and size.
 - interrupts : the interrupt of m4u.
 - clocks : must contain one entry for each clock-names.
-- clock-names : must be "bclk", It is the block clock of m4u.
+- clock-names : Only 1 optional clock:
+  - "bclk": the block clock of m4u.
+  Here is the list which require this "bclk":
+  - mt2701, mt2712, mt7623 and mt8173.
+  Note that m4u use the EMI clock which always has been enabled before kernel
+  if there is no this "bclk".
 - mediatek,larbs : List of phandle to the local arbiters in the current Socs.
 	Refer to bindings/memory-controllers/mediatek,smi-larb.txt. It must sort
 	according to the local arbiter index, like larb0, larb1, larb2...
 - iommu-cells : must be 1. This is the mtk_m4u_id according to the HW.
 	Specifies the mtk_m4u_id as defined in
 	dt-binding/memory/mt2701-larb-port.h for mt2701, mt7623
-	dt-binding/memory/mt2712-larb-port.h for mt2712, and
-	dt-binding/memory/mt8173-larb-port.h for mt8173.
+	dt-binding/memory/mt2712-larb-port.h for mt2712,
+	dt-binding/memory/mt8173-larb-port.h for mt8173, and
+	dt-binding/memory/mt8183-larb-port.h for mt8183.
 
 Example:
 	iommu: iommu@10205000 {
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
index e937ddd871a6..b478ade4da65 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-common.txt
@@ -2,9 +2,10 @@ SMI (Smart Multimedia Interface) Common
 
 The hardware block diagram please check bindings/iommu/mediatek,iommu.txt
 
-Mediatek SMI have two generations of HW architecture, mt2712 and mt8173 use
-the second generation of SMI HW while mt2701 uses the first generation HW of
-SMI.
+Mediatek SMI have two generations of HW architecture, here is the list
+which generation the SoCs use:
+generation 1: mt2701 and mt7623.
+generation 2: mt2712, mt8173 and mt8183.
 
 There's slight differences between the two SMI, for generation 2, the
 register which control the iommu port is at each larb's register base. But
@@ -19,6 +20,7 @@ Required properties:
 	"mediatek,mt2712-smi-common"
 	"mediatek,mt7623-smi-common", "mediatek,mt2701-smi-common"
 	"mediatek,mt8173-smi-common"
+	"mediatek,mt8183-smi-common"
 - reg : the register and size of the SMI block.
 - power-domains : a phandle to the power domain of this local arbiter.
 - clocks : Must contain an entry for each entry in clock-names.
@@ -30,6 +32,10 @@ Required properties:
 	    They may be the same if both source clocks are the same.
   - "async" : asynchronous clock, it help transform the smi clock into the emi
 	      clock domain, this clock is only needed by generation 1 smi HW.
+  and these 2 option clocks for generation 2 smi HW:
+  - "gals0": the path0 clock of GALS(Global Async Local Sync).
+  - "gals1": the path1 clock of GALS(Global Async Local Sync).
+  Here is the list which has this GALS: mt8183.
 
 Example:
 	smi_common: smi@14022000 {
diff --git a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
index 94eddcae77ab..4b369b3e1a69 100644
--- a/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/mediatek,smi-larb.txt
@@ -8,6 +8,7 @@ Required properties:
 		"mediatek,mt2712-smi-larb"
 		"mediatek,mt7623-smi-larb", "mediatek,mt2701-smi-larb"
 		"mediatek,mt8173-smi-larb"
+		"mediatek,mt8183-smi-larb"
 - reg : the register and size of this local arbiter.
 - mediatek,smi : a phandle to the smi_common node.
 - power-domains : a phandle to the power domain of this local arbiter.
@@ -16,6 +17,9 @@ Required properties:
   - "apb" : Advanced Peripheral Bus clock, It's the clock for setting
 	    the register.
   - "smi" : It's the clock for transfer data and command.
+  and this optional clock name:
+  - "gals": the clock for GALS(Global Async Local Sync).
+  Here is the list which has this GALS: mt8183.
 
 Required property for mt2701, mt2712 and mt7623:
 - mediatek,larb-id :the hardware id of this larb.
diff --git a/include/dt-bindings/memory/mt8183-larb-port.h b/include/dt-bindings/memory/mt8183-larb-port.h
new file mode 100644
index 000000000000..2c579f305162
--- /dev/null
+++ b/include/dt-bindings/memory/mt8183-larb-port.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ */
+#ifndef __DTS_IOMMU_PORT_MT8183_H
+#define __DTS_IOMMU_PORT_MT8183_H
+
+#define MTK_M4U_ID(larb, port)		(((larb) << 5) | (port))
+
+#define M4U_LARB0_ID			0
+#define M4U_LARB1_ID			1
+#define M4U_LARB2_ID			2
+#define M4U_LARB3_ID			3
+#define M4U_LARB4_ID			4
+#define M4U_LARB5_ID			5
+#define M4U_LARB6_ID			6
+#define M4U_LARB7_ID			7
+
+/* larb0 */
+#define	M4U_PORT_DISP_OVL0		MTK_M4U_ID(M4U_LARB0_ID, 0)
+#define	M4U_PORT_DISP_2L_OVL0_LARB0     MTK_M4U_ID(M4U_LARB0_ID, 1)
+#define	M4U_PORT_DISP_2L_OVL1_LARB0     MTK_M4U_ID(M4U_LARB0_ID, 2)
+#define	M4U_PORT_DISP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 3)
+#define	M4U_PORT_DISP_RDMA1		MTK_M4U_ID(M4U_LARB0_ID, 4)
+#define	M4U_PORT_DISP_WDMA0		MTK_M4U_ID(M4U_LARB0_ID, 5)
+#define	M4U_PORT_MDP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 6)
+#define	M4U_PORT_MDP_WROT0		MTK_M4U_ID(M4U_LARB0_ID, 7)
+#define	M4U_PORT_MDP_WDMA0		MTK_M4U_ID(M4U_LARB0_ID, 8)
+#define	M4U_PORT_DISP_FAKE0		MTK_M4U_ID(M4U_LARB0_ID, 9)
+
+/* larb1 */
+#define	M4U_PORT_HW_VDEC_MC_EXT		MTK_M4U_ID(M4U_LARB1_ID, 0)
+#define	M4U_PORT_HW_VDEC_PP_EXT         MTK_M4U_ID(M4U_LARB1_ID, 1)
+#define	M4U_PORT_HW_VDEC_VLD_EXT	MTK_M4U_ID(M4U_LARB1_ID, 2)
+#define	M4U_PORT_HW_VDEC_AVC_MV_EXT     MTK_M4U_ID(M4U_LARB1_ID, 3)
+#define	M4U_PORT_HW_VDEC_PRED_RD_EXT	MTK_M4U_ID(M4U_LARB1_ID, 4)
+#define	M4U_PORT_HW_VDEC_PRED_WR_EXT	MTK_M4U_ID(M4U_LARB1_ID, 5)
+#define	M4U_PORT_HW_VDEC_PPWRAP_EXT	MTK_M4U_ID(M4U_LARB1_ID, 6)
+
+/* larb2 VPU0 */
+#define	M4U_PORT_IMG_IPUO		MTK_M4U_ID(M4U_LARB2_ID, 0)
+#define	M4U_PORT_IMG_IPU3O		MTK_M4U_ID(M4U_LARB2_ID, 1)
+#define	M4U_PORT_IMG_IPUI		MTK_M4U_ID(M4U_LARB2_ID, 2)
+
+/* larb3 VPU1 */
+#define	M4U_PORT_CAM_IPUO		MTK_M4U_ID(M4U_LARB3_ID, 0)
+#define	M4U_PORT_CAM_IPU2O		MTK_M4U_ID(M4U_LARB3_ID, 1)
+#define	M4U_PORT_CAM_IPU3O		MTK_M4U_ID(M4U_LARB3_ID, 2)
+#define	M4U_PORT_CAM_IPUI		MTK_M4U_ID(M4U_LARB3_ID, 3)
+#define	M4U_PORT_CAM_IPU2I		MTK_M4U_ID(M4U_LARB3_ID, 4)
+
+/* larb4 */
+#define	M4U_PORT_VENC_RCPU		MTK_M4U_ID(M4U_LARB4_ID, 0)
+#define	M4U_PORT_VENC_REC		MTK_M4U_ID(M4U_LARB4_ID, 1)
+#define	M4U_PORT_VENC_BSDMA		MTK_M4U_ID(M4U_LARB4_ID, 2)
+#define	M4U_PORT_VENC_SV_COMV		MTK_M4U_ID(M4U_LARB4_ID, 3)
+#define	M4U_PORT_VENC_RD_COMV		MTK_M4U_ID(M4U_LARB4_ID, 4)
+#define	M4U_PORT_JPGENC_RDMA		MTK_M4U_ID(M4U_LARB4_ID, 5)
+#define	M4U_PORT_JPGENC_BSDMA		MTK_M4U_ID(M4U_LARB4_ID, 6)
+#define	M4U_PORT_VENC_CUR_LUMA		MTK_M4U_ID(M4U_LARB4_ID, 7)
+#define	M4U_PORT_VENC_CUR_CHROMA	MTK_M4U_ID(M4U_LARB4_ID, 8)
+#define	M4U_PORT_VENC_REF_LUMA		MTK_M4U_ID(M4U_LARB4_ID, 9)
+#define	M4U_PORT_VENC_REF_CHROMA	MTK_M4U_ID(M4U_LARB4_ID, 10)
+
+/* larb5 */
+#define	M4U_PORT_CAM_IMGI		MTK_M4U_ID(M4U_LARB5_ID, 0)
+#define	M4U_PORT_CAM_IMG2O		MTK_M4U_ID(M4U_LARB5_ID, 1)
+#define	M4U_PORT_CAM_IMG3O		MTK_M4U_ID(M4U_LARB5_ID, 2)
+#define	M4U_PORT_CAM_VIPI		MTK_M4U_ID(M4U_LARB5_ID, 3)
+#define	M4U_PORT_CAM_LCEI		MTK_M4U_ID(M4U_LARB5_ID, 4)
+#define	M4U_PORT_CAM_SMXI		MTK_M4U_ID(M4U_LARB5_ID, 5)
+#define	M4U_PORT_CAM_SMXO		MTK_M4U_ID(M4U_LARB5_ID, 6)
+#define	M4U_PORT_CAM_WPE0_RDMA1		MTK_M4U_ID(M4U_LARB5_ID, 7)
+#define	M4U_PORT_CAM_WPE0_RDMA0		MTK_M4U_ID(M4U_LARB5_ID, 8)
+#define	M4U_PORT_CAM_WPE0_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 9)
+#define	M4U_PORT_CAM_FDVT_RP		MTK_M4U_ID(M4U_LARB5_ID, 10)
+#define	M4U_PORT_CAM_FDVT_WR		MTK_M4U_ID(M4U_LARB5_ID, 11)
+#define	M4U_PORT_CAM_FDVT_RB		MTK_M4U_ID(M4U_LARB5_ID, 12)
+#define	M4U_PORT_CAM_WPE1_RDMA0		MTK_M4U_ID(M4U_LARB5_ID, 13)
+#define	M4U_PORT_CAM_WPE1_RDMA1		MTK_M4U_ID(M4U_LARB5_ID, 14)
+#define	M4U_PORT_CAM_WPE1_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 15)
+#define	M4U_PORT_CAM_DPE_RDMA		MTK_M4U_ID(M4U_LARB5_ID, 16)
+#define	M4U_PORT_CAM_DPE_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 17)
+#define	M4U_PORT_CAM_MFB_RDMA0		MTK_M4U_ID(M4U_LARB5_ID, 18)
+#define	M4U_PORT_CAM_MFB_RDMA1		MTK_M4U_ID(M4U_LARB5_ID, 19)
+#define	M4U_PORT_CAM_MFB_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 20)
+#define	M4U_PORT_CAM_RSC_RDMA0		MTK_M4U_ID(M4U_LARB5_ID, 21)
+#define	M4U_PORT_CAM_RSC_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 22)
+#define	M4U_PORT_CAM_OWE_RDMA		MTK_M4U_ID(M4U_LARB5_ID, 23)
+#define	M4U_PORT_CAM_OWE_WDMA		MTK_M4U_ID(M4U_LARB5_ID, 24)
+
+/* larb6 */
+#define	M4U_PORT_CAM_IMGO		MTK_M4U_ID(M4U_LARB6_ID, 0)
+#define	M4U_PORT_CAM_RRZO		MTK_M4U_ID(M4U_LARB6_ID, 1)
+#define	M4U_PORT_CAM_AAO		MTK_M4U_ID(M4U_LARB6_ID, 2)
+#define	M4U_PORT_CAM_AFO		MTK_M4U_ID(M4U_LARB6_ID, 3)
+#define	M4U_PORT_CAM_LSCI0		MTK_M4U_ID(M4U_LARB6_ID, 4)
+#define	M4U_PORT_CAM_LSCI1		MTK_M4U_ID(M4U_LARB6_ID, 5)
+#define	M4U_PORT_CAM_PDO		MTK_M4U_ID(M4U_LARB6_ID, 6)
+#define	M4U_PORT_CAM_BPCI		MTK_M4U_ID(M4U_LARB6_ID, 7)
+#define	M4U_PORT_CAM_LCSO		MTK_M4U_ID(M4U_LARB6_ID, 8)
+#define	M4U_PORT_CAM_CAM_RSSO_A		MTK_M4U_ID(M4U_LARB6_ID, 9)
+#define	M4U_PORT_CAM_UFEO		MTK_M4U_ID(M4U_LARB6_ID, 10)
+#define	M4U_PORT_CAM_SOCO		MTK_M4U_ID(M4U_LARB6_ID, 11)
+#define	M4U_PORT_CAM_SOC1		MTK_M4U_ID(M4U_LARB6_ID, 12)
+#define	M4U_PORT_CAM_SOC2		MTK_M4U_ID(M4U_LARB6_ID, 13)
+#define	M4U_PORT_CAM_CCUI		MTK_M4U_ID(M4U_LARB6_ID, 14)
+#define	M4U_PORT_CAM_CCUO		MTK_M4U_ID(M4U_LARB6_ID, 15)
+#define	M4U_PORT_CAM_RAWI_A		MTK_M4U_ID(M4U_LARB6_ID, 16)
+#define	M4U_PORT_CAM_CCUG		MTK_M4U_ID(M4U_LARB6_ID, 17)
+#define	M4U_PORT_CAM_PSO		MTK_M4U_ID(M4U_LARB6_ID, 18)
+#define	M4U_PORT_CAM_AFO_1		MTK_M4U_ID(M4U_LARB6_ID, 19)
+#define	M4U_PORT_CAM_LSCI_2		MTK_M4U_ID(M4U_LARB6_ID, 20)
+#define	M4U_PORT_CAM_PDI		MTK_M4U_ID(M4U_LARB6_ID, 21)
+#define	M4U_PORT_CAM_FLKO		MTK_M4U_ID(M4U_LARB6_ID, 22)
+#define	M4U_PORT_CAM_LMVO		MTK_M4U_ID(M4U_LARB6_ID, 23)
+#define	M4U_PORT_CAM_UFGO		MTK_M4U_ID(M4U_LARB6_ID, 24)
+#define	M4U_PORT_CAM_SPARE		MTK_M4U_ID(M4U_LARB6_ID, 25)
+#define	M4U_PORT_CAM_SPARE_2		MTK_M4U_ID(M4U_LARB6_ID, 26)
+#define	M4U_PORT_CAM_SPARE_3		MTK_M4U_ID(M4U_LARB6_ID, 27)
+#define	M4U_PORT_CAM_SPARE_4		MTK_M4U_ID(M4U_LARB6_ID, 28)
+#define	M4U_PORT_CAM_SPARE_5		MTK_M4U_ID(M4U_LARB6_ID, 29)
+#define	M4U_PORT_CAM_SPARE_6		MTK_M4U_ID(M4U_LARB6_ID, 30)
+
+/* CCU */
+#define	M4U_PORT_CCU0			MTK_M4U_ID(M4U_LARB7_ID, 0)
+#define	M4U_PORT_CCU1			MTK_M4U_ID(M4U_LARB7_ID, 1)
+
+#endif
-- 
cgit v1.2.3


From 73d50811bc91d2a173213a78b6b43ac762f6cc54 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Sat, 24 Aug 2019 11:01:53 +0800
Subject: iommu/io-pgtable-arm-v7s: Rename the quirk from MTK_4GB to MTK_EXT

In previous mt2712/mt8173, MediaTek extend the v7s to support 4GB dram.
But in the latest mt8183, We extend it to support the PA up to 34bit.
Then the "MTK_4GB" name is not so fit, This patch only change the quirk
name to "MTK_EXT".

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 6 +++---
 drivers/iommu/mtk_iommu.c          | 2 +-
 include/linux/io-pgtable.h         | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index fa1b38f4c271..77cc1eb1243b 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -315,7 +315,7 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
 		pte |= ARM_V7S_ATTR_MTK_4GB;
 
 	return pte;
@@ -737,12 +737,12 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
 			    IO_PGTABLE_QUIRK_NO_PERMS |
 			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+			    IO_PGTABLE_QUIRK_ARM_MTK_EXT |
 			    IO_PGTABLE_QUIRK_NON_STRICT))
 		return NULL;
 
 	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
+	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT &&
 	    !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
 			return NULL;
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 9ba270620767..62edce75c366 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -296,7 +296,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
 	};
 
 	if (data->enable_4GB)
-		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_4GB;
+		dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_EXT;
 
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index b5a450a3bb47..915fb7303aa3 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -65,7 +65,7 @@ struct io_pgtable_cfg {
 	 *	(unmapped) entries but the hardware might do so anyway, perform
 	 *	TLB maintenance when mapping as well as when unmapping.
 	 *
-	 * IO_PGTABLE_QUIRK_ARM_MTK_4GB: (ARM v7s format) Set bit 9 in all
+	 * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) Set bit 9 in all
 	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
 	 *	when the SoC is in "4GB mode" and they can only access the high
 	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
@@ -77,7 +77,7 @@ struct io_pgtable_cfg {
 	#define IO_PGTABLE_QUIRK_ARM_NS		BIT(0)
 	#define IO_PGTABLE_QUIRK_NO_PERMS	BIT(1)
 	#define IO_PGTABLE_QUIRK_TLBI_ON_MAP	BIT(2)
-	#define IO_PGTABLE_QUIRK_ARM_MTK_4GB	BIT(3)
+	#define IO_PGTABLE_QUIRK_ARM_MTK_EXT	BIT(3)
 	#define IO_PGTABLE_QUIRK_NON_STRICT	BIT(4)
 	unsigned long			quirks;
 	unsigned long			pgsize_bitmap;
-- 
cgit v1.2.3


From 4c019de653237674d38cf2b3119153b144ffe173 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Sat, 24 Aug 2019 11:01:54 +0800
Subject: iommu/io-pgtable-arm-v7s: Extend to support PA[33:32] for MediaTek

MediaTek extend the arm v7s descriptor to support up to 34 bits PA where
the bit32 and bit33 are encoded in the bit9 and bit4 of the PTE
respectively. Meanwhile the iova still is 32bits.

Regarding whether the pagetable address could be over 4GB, the mt8183
support it while the previous mt8173 don't, thus keep it as is.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 40 +++++++++++++++++++++++++++++++-------
 include/linux/io-pgtable.h         |  7 +++----
 2 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 77cc1eb1243b..545287148147 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -112,7 +112,9 @@
 #define ARM_V7S_TEX_MASK		0x7
 #define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)
 
-#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits for PA 32bit/33bit */
+#define ARM_V7S_ATTR_MTK_PA_BIT32	BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33	BIT(4)
 
 /* *well, except for TEX on level 2 large pages, of course :( */
 #define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
@@ -176,16 +178,32 @@ static dma_addr_t __arm_v7s_dma_addr(void *pages)
 	return (dma_addr_t)virt_to_phys(pages);
 }
 
+static bool arm_v7s_is_mtk_enabled(struct io_pgtable_cfg *cfg)
+{
+	return IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT) &&
+		(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT);
+}
+
 static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
 				    struct io_pgtable_cfg *cfg)
 {
-	return paddr & ARM_V7S_LVL_MASK(lvl);
+	arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+	if (!arm_v7s_is_mtk_enabled(cfg))
+		return pte;
+
+	if (paddr & BIT_ULL(32))
+		pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+	if (paddr & BIT_ULL(33))
+		pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+	return pte;
 }
 
 static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 				  struct io_pgtable_cfg *cfg)
 {
 	arm_v7s_iopte mask;
+	phys_addr_t paddr;
 
 	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
 		mask = ARM_V7S_TABLE_MASK;
@@ -194,7 +212,15 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
 	else
 		mask = ARM_V7S_LVL_MASK(lvl);
 
-	return pte & mask;
+	paddr = pte & mask;
+	if (!arm_v7s_is_mtk_enabled(cfg))
+		return paddr;
+
+	if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+		paddr |= BIT_ULL(32);
+	if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+		paddr |= BIT_ULL(33);
+	return paddr;
 }
 
 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
@@ -315,9 +341,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
 	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
 		pte |= ARM_V7S_ATTR_NS_SECTION;
 
-	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
-		pte |= ARM_V7S_ATTR_MTK_4GB;
-
 	return pte;
 }
 
@@ -731,7 +754,10 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 {
 	struct arm_v7s_io_pgtable *data;
 
-	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+	if (cfg->ias > ARM_V7S_ADDR_BITS)
+		return NULL;
+
+	if (cfg->oas > (arm_v7s_is_mtk_enabled(cfg) ? 34 : ARM_V7S_ADDR_BITS))
 		return NULL;
 
 	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 915fb7303aa3..a2a52c349fe4 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -65,10 +65,9 @@ struct io_pgtable_cfg {
 	 *	(unmapped) entries but the hardware might do so anyway, perform
 	 *	TLB maintenance when mapping as well as when unmapping.
 	 *
-	 * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) Set bit 9 in all
-	 *	PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
-	 *	when the SoC is in "4GB mode" and they can only access the high
-	 *	remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+	 * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend
+	 *	to support up to 34 bits PA where the bit32 and bit33 are
+	 *	encoded in the bit9 and bit4 of the PTE respectively.
 	 *
 	 * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
 	 *	on unmap, for DMA domains using the flush queue mechanism for
-- 
cgit v1.2.3


From ec2da07ca1202552d87fb01b238d46642817da2b Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Sat, 24 Aug 2019 11:02:07 +0800
Subject: memory: mtk-smi: Get rid of need_larbid

The "mediatek,larb-id" has already been parsed in MTK IOMMU driver.
It's no need to parse it again in SMI driver. Only clean some codes.
This patch is fit for all the current mt2701, mt2712, mt7623, mt8173
and mt8183.

After this patch, the "mediatek,larb-id" only be needed for mt2712
which have 2 M4Us. In the other SoCs, we can get the larb-id from M4U
in which the larbs in the "mediatek,larbs" always are ordered.

Correspondingly, the larb_nr in the "struct mtk_smi_iommu" could also
be deleted.

CC: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: Evan Green <evgreen@chromium.org>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c    |  1 -
 drivers/iommu/mtk_iommu_v1.c |  2 --
 drivers/memory/mtk-smi.c     | 26 ++------------------------
 include/soc/mediatek/smi.h   |  1 -
 4 files changed, 2 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 5d5341c85650..cc81de27c5fe 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -656,7 +656,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 					     "mediatek,larbs", NULL);
 	if (larb_nr < 0)
 		return larb_nr;
-	data->smi_imu.larb_nr = larb_nr;
 
 	for (i = 0; i < larb_nr; i++) {
 		struct device_node *larbnode;
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index abeeac488372..3922358d13bf 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -616,8 +616,6 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 		larb_nr++;
 	}
 
-	data->smi_imu.larb_nr = larb_nr;
-
 	platform_set_drvdata(pdev, data);
 
 	ret = mtk_iommu_hw_init(data);
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index 289e59543a04..d6dc62fd89fe 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -59,7 +59,6 @@ struct mtk_smi_common_plat {
 };
 
 struct mtk_smi_larb_gen {
-	bool need_larbid;
 	int port_in_larb[MTK_LARB_NR_MAX + 1];
 	void (*config_port)(struct device *);
 	unsigned int			larb_direct_to_common_mask;
@@ -147,18 +146,9 @@ mtk_smi_larb_bind(struct device *dev, struct device *master, void *data)
 	struct mtk_smi_iommu *smi_iommu = data;
 	unsigned int         i;
 
-	if (larb->larb_gen->need_larbid) {
-		larb->mmu = &smi_iommu->larb_imu[larb->larbid].mmu;
-		return 0;
-	}
-
-	/*
-	 * If there is no larbid property, Loop to find the corresponding
-	 * iommu information.
-	 */
-	for (i = 0; i < smi_iommu->larb_nr; i++) {
+	for (i = 0; i < MTK_LARB_NR_MAX; i++) {
 		if (dev == smi_iommu->larb_imu[i].dev) {
-			/* The 'mmu' may be updated in iommu-attach/detach. */
+			larb->larbid = i;
 			larb->mmu = &smi_iommu->larb_imu[i].mmu;
 			return 0;
 		}
@@ -237,7 +227,6 @@ static const struct mtk_smi_larb_gen mtk_smi_larb_mt8173 = {
 };
 
 static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = {
-	.need_larbid = true,
 	.port_in_larb = {
 		LARB0_PORT_OFFSET, LARB1_PORT_OFFSET,
 		LARB2_PORT_OFFSET, LARB3_PORT_OFFSET
@@ -246,7 +235,6 @@ static const struct mtk_smi_larb_gen mtk_smi_larb_mt2701 = {
 };
 
 static const struct mtk_smi_larb_gen mtk_smi_larb_mt2712 = {
-	.need_larbid = true,
 	.config_port                = mtk_smi_larb_config_port_gen2_general,
 	.larb_direct_to_common_mask = BIT(8) | BIT(9),      /* bdpsys */
 };
@@ -285,7 +273,6 @@ static int mtk_smi_larb_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *smi_node;
 	struct platform_device *smi_pdev;
-	int err;
 
 	larb = devm_kzalloc(dev, sizeof(*larb), GFP_KERNEL);
 	if (!larb)
@@ -315,15 +302,6 @@ static int mtk_smi_larb_probe(struct platform_device *pdev)
 	}
 	larb->smi.dev = dev;
 
-	if (larb->larb_gen->need_larbid) {
-		err = of_property_read_u32(dev->of_node, "mediatek,larb-id",
-					   &larb->larbid);
-		if (err) {
-			dev_err(dev, "missing larbid property\n");
-			return err;
-		}
-	}
-
 	smi_node = of_parse_phandle(dev->of_node, "mediatek,smi", 0);
 	if (!smi_node)
 		return -EINVAL;
diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h
index 79b74ced9d91..6f0b00cc73ea 100644
--- a/include/soc/mediatek/smi.h
+++ b/include/soc/mediatek/smi.h
@@ -21,7 +21,6 @@ struct mtk_smi_larb_iommu {
 };
 
 struct mtk_smi_iommu {
-	unsigned int larb_nr;
 	struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX];
 };
 
-- 
cgit v1.2.3


From 1ee9feb2c9f893b893c900d2492c6a01dca680f3 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Sat, 24 Aug 2019 11:02:08 +0800
Subject: iommu/mediatek: Clean up struct mtk_smi_iommu

Remove the "struct mtk_smi_iommu" to simplify the code since it has only
one item in it right now.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c    | 4 ++--
 drivers/iommu/mtk_iommu.h    | 6 +++---
 drivers/iommu/mtk_iommu_v1.c | 4 ++--
 drivers/memory/mtk-smi.c     | 6 +++---
 include/soc/mediatek/smi.h   | 4 ----
 5 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index cc81de27c5fe..400066d0d37b 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -278,7 +278,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
 	for (i = 0; i < fwspec->num_ids; ++i) {
 		larbid = MTK_M4U_TO_LARB(fwspec->ids[i]);
 		portid = MTK_M4U_TO_PORT(fwspec->ids[i]);
-		larb_mmu = &data->smi_imu.larb_imu[larbid];
+		larb_mmu = &data->larb_imu[larbid];
 
 		dev_dbg(dev, "%s iommu port: %d\n",
 			enable ? "enable" : "disable", portid);
@@ -680,7 +680,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 			of_node_put(larbnode);
 			return -EPROBE_DEFER;
 		}
-		data->smi_imu.larb_imu[id].dev = &plarbdev->dev;
+		data->larb_imu[id].dev = &plarbdev->dev;
 
 		component_match_add_release(dev, &match, release_of,
 					    compare_of, larbnode);
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 56b579c5a088..fc0f16eabacd 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -56,7 +56,6 @@ struct mtk_iommu_data {
 	struct mtk_iommu_suspend_reg	reg;
 	struct mtk_iommu_domain		*m4u_dom;
 	struct iommu_group		*m4u_group;
-	struct mtk_smi_iommu		smi_imu;      /* SMI larb iommu info */
 	bool                            enable_4GB;
 	bool				tlb_flush_active;
 
@@ -64,6 +63,7 @@ struct mtk_iommu_data {
 	const struct mtk_iommu_plat_data *plat_data;
 
 	struct list_head		list;
+	struct mtk_smi_larb_iommu	larb_imu[MTK_LARB_NR_MAX];
 };
 
 static inline int compare_of(struct device *dev, void *data)
@@ -80,14 +80,14 @@ static inline int mtk_iommu_bind(struct device *dev)
 {
 	struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
-	return component_bind_all(dev, &data->smi_imu);
+	return component_bind_all(dev, &data->larb_imu);
 }
 
 static inline void mtk_iommu_unbind(struct device *dev)
 {
 	struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
-	component_unbind_all(dev, &data->smi_imu);
+	component_unbind_all(dev, &data->larb_imu);
 }
 
 #endif
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 3922358d13bf..860926c65903 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -206,7 +206,7 @@ static void mtk_iommu_config(struct mtk_iommu_data *data,
 	for (i = 0; i < fwspec->num_ids; ++i) {
 		larbid = mt2701_m4u_to_larb(fwspec->ids[i]);
 		portid = mt2701_m4u_to_port(fwspec->ids[i]);
-		larb_mmu = &data->smi_imu.larb_imu[larbid];
+		larb_mmu = &data->larb_imu[larbid];
 
 		dev_dbg(dev, "%s iommu port: %d\n",
 			enable ? "enable" : "disable", portid);
@@ -610,7 +610,7 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 			}
 		}
 
-		data->smi_imu.larb_imu[larb_nr].dev = &plarbdev->dev;
+		data->larb_imu[larb_nr].dev = &plarbdev->dev;
 		component_match_add_release(dev, &match, release_of,
 					    compare_of, larb_spec.np);
 		larb_nr++;
diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c
index d6dc62fd89fe..439d7d886873 100644
--- a/drivers/memory/mtk-smi.c
+++ b/drivers/memory/mtk-smi.c
@@ -143,13 +143,13 @@ static int
 mtk_smi_larb_bind(struct device *dev, struct device *master, void *data)
 {
 	struct mtk_smi_larb *larb = dev_get_drvdata(dev);
-	struct mtk_smi_iommu *smi_iommu = data;
+	struct mtk_smi_larb_iommu *larb_mmu = data;
 	unsigned int         i;
 
 	for (i = 0; i < MTK_LARB_NR_MAX; i++) {
-		if (dev == smi_iommu->larb_imu[i].dev) {
+		if (dev == larb_mmu[i].dev) {
 			larb->larbid = i;
-			larb->mmu = &smi_iommu->larb_imu[i].mmu;
+			larb->mmu = &larb_mmu[i].mmu;
 			return 0;
 		}
 	}
diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h
index 6f0b00cc73ea..5a34b87d89e3 100644
--- a/include/soc/mediatek/smi.h
+++ b/include/soc/mediatek/smi.h
@@ -20,10 +20,6 @@ struct mtk_smi_larb_iommu {
 	unsigned int   mmu;
 };
 
-struct mtk_smi_iommu {
-	struct mtk_smi_larb_iommu larb_imu[MTK_LARB_NR_MAX];
-};
-
 /*
  * mtk_smi_larb_get: Enable the power domain and clocks for this local arbiter.
  *                   It also initialize some basic setting(like iommu).
-- 
cgit v1.2.3


From 188d20bcd1ebd8277d9b8a79525bd66b66d40a2a Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sun, 21 Jan 2018 18:04:23 -0800
Subject: vfs: Add file timestamp range support

Add fields to the superblock to track the min and max
timestamps supported by filesystems.

Initially, when a superblock is allocated, initialize
it to the max and min values the fields can hold.
Individual filesystems override these to match their
actual limits.

Pseudo filesystems are assumed to always support the
min and max allowable values for the fields.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
---
 fs/super.c             | 2 ++
 include/linux/fs.h     | 3 +++
 include/linux/time64.h | 2 ++
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 5960578a4076..3e232e8c51b9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -257,6 +257,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
 	s->s_maxbytes = MAX_NON_LFS;
 	s->s_op = &default_op;
 	s->s_time_gran = 1000000000;
+	s->s_time_min = TIME64_MIN;
+	s->s_time_max = TIME64_MAX;
 	s->cleancache_poolid = CLEANCACHE_NO_POOL;
 
 	s->s_shrink.seeks = DEFAULT_SEEKS;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 997a530ff4e9..4b349851b00c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1448,6 +1448,9 @@ struct super_block {
 
 	/* Granularity of c/m/atime in ns (cannot be worse than a second) */
 	u32			s_time_gran;
+	/* Time limits for c/m/atime in seconds */
+	time64_t		   s_time_min;
+	time64_t		   s_time_max;
 #ifdef CONFIG_FSNOTIFY
 	__u32			s_fsnotify_mask;
 	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
diff --git a/include/linux/time64.h b/include/linux/time64.h
index a620ee610b9f..19125489ae94 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -30,6 +30,8 @@ struct itimerspec64 {
 
 /* Located here for timespec[64]_valid_strict */
 #define TIME64_MAX			((s64)~((u64)1 << 63))
+#define TIME64_MIN			(-TIME64_MAX - 1)
+
 #define KTIME_MAX			((s64)~((u64)1 << 63))
 #define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
 
-- 
cgit v1.2.3


From 50e17c000c467fbc927fc001df99beb4027a5323 Mon Sep 17 00:00:00 2001
From: Deepa Dinamani <deepa.kernel@gmail.com>
Date: Sun, 21 Jan 2018 18:04:25 -0800
Subject: vfs: Add timestamp_truncate() api

timespec_trunc() function is used to truncate a
filesystem timestamp to the right granularity.
But, the function does not clamp tv_sec part of the
timestamps according to the filesystem timestamp limits.

The replacement api: timestamp_truncate() also alters the
signature of the function to accommodate filesystem
timestamp clamping according to flesystem limits.

Note that the tv_nsec part is set to 0 if tv_sec is not within
the range supported for the filesystem.

Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
---
 fs/inode.c         | 33 ++++++++++++++++++++++++++++++++-
 include/linux/fs.h |  2 ++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 0f1e3b563c47..64bf28cf05cd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2166,6 +2166,37 @@ struct timespec64 timespec64_trunc(struct timespec64 t, unsigned gran)
 }
 EXPORT_SYMBOL(timespec64_trunc);
 
+/**
+ * timestamp_truncate - Truncate timespec to a granularity
+ * @t: Timespec
+ * @inode: inode being updated
+ *
+ * Truncate a timespec to the granularity supported by the fs
+ * containing the inode. Always rounds down. gran must
+ * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
+ */
+struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned int gran = sb->s_time_gran;
+
+	t.tv_sec = clamp(t.tv_sec, sb->s_time_min, sb->s_time_max);
+	if (unlikely(t.tv_sec == sb->s_time_max || t.tv_sec == sb->s_time_min))
+		t.tv_nsec = 0;
+
+	/* Avoid division in the common cases 1 ns and 1 s. */
+	if (gran == 1)
+		; /* nothing */
+	else if (gran == NSEC_PER_SEC)
+		t.tv_nsec = 0;
+	else if (gran > 1 && gran < NSEC_PER_SEC)
+		t.tv_nsec -= t.tv_nsec % gran;
+	else
+		WARN(1, "invalid file time granularity: %u", gran);
+	return t;
+}
+EXPORT_SYMBOL(timestamp_truncate);
+
 /**
  * current_time - Return FS time
  * @inode: inode.
@@ -2187,7 +2218,7 @@ struct timespec64 current_time(struct inode *inode)
 		return now;
 	}
 
-	return timespec64_trunc(now, inode->i_sb->s_time_gran);
+	return timestamp_truncate(now, inode);
 }
 EXPORT_SYMBOL(current_time);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b349851b00c..7e6be3bf0ce0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -726,6 +726,8 @@ struct inode {
 	void			*i_private; /* fs or device private pointer */
 } __randomize_layout;
 
+struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
+
 static inline unsigned int i_blocksize(const struct inode *node)
 {
 	return (1 << node->i_blkbits);
-- 
cgit v1.2.3


From d1bbf38aaf882240cccca414e5a48db42e105da7 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 30 Jul 2019 08:04:00 -0500
Subject: PCI: Fix typos and whitespace errors

Fix typos in drivers/pci.  Comment and whitespace changes only.

Link: https://lore.kernel.org/r/20190819115306.27338-1-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>	# armada8k
---
 Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt | 2 +-
 Documentation/devicetree/bindings/pci/pci-armada8k.txt   | 2 +-
 drivers/pci/Kconfig                                      | 2 +-
 drivers/pci/vc.c                                         | 1 -
 include/linux/pci.h                                      | 4 ++--
 5 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
index a7f5f5afa0e6..de4b2baf91e8 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -50,7 +50,7 @@ Additional required properties for imx7d-pcie and imx8mq-pcie:
 - power-domains: Must be set to a phandle pointing to PCIE_PHY power domain
 - resets: Must contain phandles to PCIe-related reset lines exposed by SRC
   IP block
-- reset-names: Must contain the following entires:
+- reset-names: Must contain the following entries:
 	       - "pciephy"
 	       - "apps"
 	       - "turnoff"
diff --git a/Documentation/devicetree/bindings/pci/pci-armada8k.txt b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
index 9e3fc15e1af8..1aaa09254001 100644
--- a/Documentation/devicetree/bindings/pci/pci-armada8k.txt
+++ b/Documentation/devicetree/bindings/pci/pci-armada8k.txt
@@ -11,7 +11,7 @@ Required properties:
 - reg-names:
    - "ctrl" for the control register region
    - "config" for the config space region
-- interrupts: Interrupt specifier for the PCIe controler
+- interrupts: Interrupt specifier for the PCIe controller
 - clocks: reference to the PCIe controller clocks
 - clock-names: mandatory if there is a second clock, in this case the
    name must be "core" for the first clock and "reg" for the second
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab92409210a..46f4912a370d 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -170,7 +170,7 @@ config PCI_P2PDMA
 
 	  Many PCIe root complexes do not support P2P transactions and
 	  it's hard to tell which support it at all, so at this time,
-	  P2P DMA transations must be between devices behind the same root
+	  P2P DMA transactions must be between devices behind the same root
 	  port.
 
 	  If unsure, say N.
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index 5acd9c02683a..b39e854b80ec 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -409,7 +409,6 @@ void pci_restore_vc_state(struct pci_dev *dev)
  * For each type of VC capability, VC/VC9/MFVC, find the capability, size
  * it, and allocate a buffer for save/restore.
  */
-
 void pci_allocate_vc_save_buffers(struct pci_dev *dev)
 {
 	int i;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..c133579c01fa 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -969,7 +969,7 @@ resource_size_t pcibios_align_resource(void *, const struct resource *,
 				resource_size_t,
 				resource_size_t);
 
-/* Weak but can be overriden by arch */
+/* Weak but can be overridden by arch */
 void pci_fixup_cardbus(struct pci_bus *);
 
 /* Generic PCI functions used internally */
@@ -1801,7 +1801,7 @@ static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; }
 
 #include <asm/pci.h>
 
-/* These two functions provide almost identical functionality. Depennding
+/* These two functions provide almost identical functionality. Depending
  * on the architecture, one will be implemented as a wrapper around the
  * other (in drivers/pci/mmap.c).
  *
-- 
cgit v1.2.3


From 3940ba8eea8c6c89f547a1bd153977cece5fecd2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 17 Aug 2019 09:32:41 +0200
Subject: asm-generic: don't provide __ioremap

__ioremap is not a kernel API, but used for helpers with differing
semantics in arch code.  We should not provide it in as-generic.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com>
Tested-by: Paul Walmsley <paul.walmsley@sifive.com> # rv32, rv64 boot
Acked-by: Paul Walmsley <paul.walmsley@sifive.com> # arch/riscv
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index b83e2802c969..d02806513670 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -963,15 +963,6 @@ static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
 }
 #endif
 
-#ifndef __ioremap
-#define __ioremap __ioremap
-static inline void __iomem *__ioremap(phys_addr_t offset, size_t size,
-				      unsigned long flags)
-{
-	return ioremap(offset, size);
-}
-#endif
-
 #ifndef iounmap
 #define iounmap iounmap
 
-- 
cgit v1.2.3


From c05cd3645814724bdeb32a2b4d953b12bdea5f8c Mon Sep 17 00:00:00 2001
From: Kevin Laatz <kevin.laatz@intel.com>
Date: Tue, 27 Aug 2019 02:25:22 +0000
Subject: xsk: add support to allow unaligned chunk placement

Currently, addresses are chunk size aligned. This means, we are very
restricted in terms of where we can place chunk within the umem. For
example, if we have a chunk size of 2k, then our chunks can only be placed
at 0,2k,4k,6k,8k... and so on (ie. every 2k starting from 0).

This patch introduces the ability to use unaligned chunks. With these
changes, we are no longer bound to having to place chunks at a 2k (or
whatever your chunk size is) interval. Since we are no longer dealing with
aligned chunks, they can now cross page boundaries. Checks for page
contiguity have been added in order to keep track of which pages are
followed by a physically contiguous page.

Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h      | 75 ++++++++++++++++++++++++++++++++++--
 include/uapi/linux/if_xdp.h |  9 +++++
 net/xdp/xdp_umem.c          | 19 ++++++---
 net/xdp/xsk.c               | 94 ++++++++++++++++++++++++++++++++++++---------
 net/xdp/xsk_diag.c          |  2 +-
 net/xdp/xsk_queue.h         | 70 +++++++++++++++++++++++++++++----
 6 files changed, 233 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index f023b9940d64..c9398ce7960f 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -16,6 +16,13 @@
 struct net_device;
 struct xsk_queue;
 
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
+
 struct xdp_umem_page {
 	void *addr;
 	dma_addr_t dma;
@@ -27,8 +34,12 @@ struct xdp_umem_fq_reuse {
 	u64 handles[];
 };
 
-/* Flags for the umem flags field. */
-#define XDP_UMEM_USES_NEED_WAKEUP (1 << 0)
+/* Flags for the umem flags field.
+ *
+ * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
+ * flags. See inlude/uapi/include/linux/if_xdp.h.
+ */
+#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
 
 struct xdp_umem {
 	struct xsk_queue *fq;
@@ -124,14 +135,36 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
 int xsk_map_inc(struct xsk_map *map);
 void xsk_map_put(struct xsk_map *map);
 
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+	return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
+}
+
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
-	return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
+	unsigned long page_addr;
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
+
+	return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
 }
 
 static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
 {
-	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
+	addr = xsk_umem_add_offset_to_addr(addr);
+
+	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
 }
 
 /* Reuse-queue aware version of FILL queue helpers */
@@ -172,6 +205,19 @@ static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
 
 	rq->handles[rq->length++] = addr;
 }
+
+/* Handle the offset appropriately depending on aligned or unaligned mode.
+ * For unaligned mode, we store the offset in the upper 16-bits of the address.
+ * For aligned mode, we simply add the offset to the address.
+ */
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
+					 u64 offset)
+{
+	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+		return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+	else
+		return address + offset;
+}
 #else
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
@@ -241,6 +287,21 @@ static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
 	return NULL;
 }
 
+static inline u64 xsk_umem_extract_addr(u64 addr)
+{
+	return 0;
+}
+
+static inline u64 xsk_umem_extract_offset(u64 addr)
+{
+	return 0;
+}
+
+static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
+{
+	return 0;
+}
+
 static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
 {
 	return NULL;
@@ -290,6 +351,12 @@ static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
 	return false;
 }
 
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
+					 u64 offset)
+{
+	return 0;
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index 62b80d57b72a..be328c59389d 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -26,6 +26,9 @@
  */
 #define XDP_USE_NEED_WAKEUP (1 << 3)
 
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+
 struct sockaddr_xdp {
 	__u16 sxdp_family;
 	__u16 sxdp_flags;
@@ -66,6 +69,7 @@ struct xdp_umem_reg {
 	__u64 len; /* Length of packet data area */
 	__u32 chunk_size;
 	__u32 headroom;
+	__u32 flags;
 };
 
 struct xdp_statistics {
@@ -87,6 +91,11 @@ struct xdp_options {
 #define XDP_UMEM_PGOFF_FILL_RING	0x100000000ULL
 #define XDP_UMEM_PGOFF_COMPLETION_RING	0x180000000ULL
 
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+	((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
 /* Rx/Tx descriptor */
 struct xdp_desc {
 	__u64 addr;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 2d65779282a1..e997b263a0dd 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -340,6 +340,7 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
 
 static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 {
+	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 	unsigned int chunks, chunks_per_page;
 	u64 addr = mr->addr, size = mr->len;
@@ -355,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 		return -EINVAL;
 	}
 
-	if (!is_power_of_2(chunk_size))
+	if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG |
+			XDP_UMEM_USES_NEED_WAKEUP))
+		return -EINVAL;
+
+	if (!unaligned_chunks && !is_power_of_2(chunk_size))
 		return -EINVAL;
 
 	if (!PAGE_ALIGNED(addr)) {
@@ -372,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (chunks == 0)
 		return -EINVAL;
 
-	chunks_per_page = PAGE_SIZE / chunk_size;
-	if (chunks < chunks_per_page || chunks % chunks_per_page)
-		return -EINVAL;
+	if (!unaligned_chunks) {
+		chunks_per_page = PAGE_SIZE / chunk_size;
+		if (chunks < chunks_per_page || chunks % chunks_per_page)
+			return -EINVAL;
+	}
 
 	headroom = ALIGN(headroom, 64);
 
@@ -383,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 		return -EINVAL;
 
 	umem->address = (unsigned long)addr;
-	umem->chunk_mask = ~((u64)chunk_size - 1);
+	umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
+					    : ~((u64)chunk_size - 1);
 	umem->size = size;
 	umem->headroom = headroom;
 	umem->chunk_size_nohr = chunk_size - headroom;
 	umem->npgs = size / PAGE_SIZE;
 	umem->pgs = NULL;
 	umem->user = NULL;
+	umem->flags = mr->flags;
 	INIT_LIST_HEAD(&umem->xsk_list);
 	spin_lock_init(&umem->xsk_list_lock);
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index ee4428a892fa..187fd157fcff 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs);
 
 u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
 {
-	return xskq_peek_addr(umem->fq, addr);
+	return xskq_peek_addr(umem->fq, addr, umem);
 }
 EXPORT_SYMBOL(xsk_umem_peek_addr);
 
@@ -115,21 +115,43 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
 }
 EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
 
+/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
+ * each page. This is only required in copy mode.
+ */
+static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
+			     u32 len, u32 metalen)
+{
+	void *to_buf = xdp_umem_get_data(umem, addr);
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+		void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
+		u64 page_start = addr & ~(PAGE_SIZE - 1);
+		u64 first_len = PAGE_SIZE - (addr - page_start);
+
+		memcpy(to_buf, from_buf, first_len + metalen);
+		memcpy(next_pg_addr, from_buf + first_len, len - first_len);
+
+		return;
+	}
+
+	memcpy(to_buf, from_buf, len + metalen);
+}
+
 static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
-	void *to_buf, *from_buf;
+	u64 offset = xs->umem->headroom;
+	u64 addr, memcpy_addr;
+	void *from_buf;
 	u32 metalen;
-	u64 addr;
 	int err;
 
-	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+	if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
 	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
 		xs->rx_dropped++;
 		return -ENOSPC;
 	}
 
-	addr += xs->umem->headroom;
-
 	if (unlikely(xdp_data_meta_unsupported(xdp))) {
 		from_buf = xdp->data;
 		metalen = 0;
@@ -138,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 		metalen = xdp->data - xdp->data_meta;
 	}
 
-	to_buf = xdp_umem_get_data(xs->umem, addr);
-	memcpy(to_buf, from_buf, len + metalen);
-	addr += metalen;
+	memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
+	__xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
+
+	offset += metalen;
+	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
 	err = xskq_produce_batch_desc(xs->rx, addr, len);
 	if (!err) {
 		xskq_discard_addr(xs->umem->fq);
@@ -185,6 +209,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	u32 metalen = xdp->data - xdp->data_meta;
 	u32 len = xdp->data_end - xdp->data;
+	u64 offset = xs->umem->headroom;
 	void *buffer;
 	u64 addr;
 	int err;
@@ -196,17 +221,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 		goto out_unlock;
 	}
 
-	if (!xskq_peek_addr(xs->umem->fq, &addr) ||
+	if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
 	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
 		err = -ENOSPC;
 		goto out_drop;
 	}
 
-	addr += xs->umem->headroom;
-
+	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
 	buffer = xdp_umem_get_data(xs->umem, addr);
 	memcpy(buffer, xdp->data_meta, len + metalen);
-	addr += metalen;
+
+	addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
 	err = xskq_produce_batch_desc(xs->rx, addr, len);
 	if (err)
 		goto out_drop;
@@ -250,7 +275,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
-		if (!xskq_peek_desc(xs->tx, desc))
+		if (!xskq_peek_desc(xs->tx, desc, umem))
 			continue;
 
 		if (xskq_produce_addr_lazy(umem->cq, desc->addr))
@@ -304,7 +329,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 	if (xs->queue_id >= xs->dev->real_num_tx_queues)
 		goto out;
 
-	while (xskq_peek_desc(xs->tx, &desc)) {
+	while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
 		char *buffer;
 		u64 addr;
 		u32 len;
@@ -333,7 +358,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
 		skb->dev = xs->dev;
 		skb->priority = sk->sk_priority;
 		skb->mark = sk->sk_mark;
-		skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
+		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
 		skb->destructor = xsk_destruct_skb;
 
 		err = dev_direct_xmit(skb, xs->queue_id);
@@ -526,6 +551,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
 	return sock;
 }
 
+/* Check if umem pages are contiguous.
+ * If zero-copy mode, use the DMA address to do the page contiguity check
+ * For all other modes we use addr (kernel virtual address)
+ * Store the result in the low bits of addr.
+ */
+static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
+{
+	struct xdp_umem_page *pgs = umem->pages;
+	int i, is_contig;
+
+	for (i = 0; i < umem->npgs - 1; i++) {
+		is_contig = (flags & XDP_ZEROCOPY) ?
+			(pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
+			(pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
+		pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
+	}
+}
+
 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -616,6 +659,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
 		if (err)
 			goto out_unlock;
+
+		xsk_check_page_contiguity(xs->umem, flags);
 	}
 
 	xs->dev = dev;
@@ -636,6 +681,13 @@ out_release:
 	return err;
 }
 
+struct xdp_umem_reg_v1 {
+	__u64 addr; /* Start of packet data area */
+	__u64 len; /* Length of packet data area */
+	__u32 chunk_size;
+	__u32 headroom;
+};
+
 static int xsk_setsockopt(struct socket *sock, int level, int optname,
 			  char __user *optval, unsigned int optlen)
 {
@@ -673,10 +725,16 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 	}
 	case XDP_UMEM_REG:
 	{
-		struct xdp_umem_reg mr;
+		size_t mr_size = sizeof(struct xdp_umem_reg);
+		struct xdp_umem_reg mr = {};
 		struct xdp_umem *umem;
 
-		if (copy_from_user(&mr, optval, sizeof(mr)))
+		if (optlen < sizeof(struct xdp_umem_reg_v1))
+			return -EINVAL;
+		else if (optlen < sizeof(mr))
+			mr_size = sizeof(struct xdp_umem_reg_v1);
+
+		if (copy_from_user(&mr, optval, mr_size))
 			return -EFAULT;
 
 		mutex_lock(&xs->mutex);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index d5e06c8e0cbf..9986a759fe06 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
 	du.id = umem->id;
 	du.size = umem->size;
 	du.num_pages = umem->npgs;
-	du.chunk_size = (__u32)(~umem->chunk_mask + 1);
+	du.chunk_size = umem->chunk_size_nohr + umem->headroom;
 	du.headroom = umem->headroom;
 	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
 	du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index dd9e985c2461..eddae4688862 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -134,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
 
 /* UMEM queue */
 
+static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
+					      u64 length)
+{
+	bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
+	bool next_pg_contig =
+		(unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
+			XSK_NEXT_PG_CONTIG_MASK;
+
+	return cross_pg && !next_pg_contig;
+}
+
 static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
 {
 	if (addr >= q->size) {
@@ -144,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
 	return true;
 }
 
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr)
+static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
+						u64 length,
+						struct xdp_umem *umem)
+{
+	u64 base_addr = xsk_umem_extract_addr(addr);
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	if (base_addr >= q->size || addr >= q->size ||
+	    xskq_crosses_non_contig_pg(umem, addr, length)) {
+		q->invalid_descs++;
+		return false;
+	}
+
+	return true;
+}
+
+static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
+				      struct xdp_umem *umem)
 {
 	while (q->cons_tail != q->cons_head) {
 		struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 		unsigned int idx = q->cons_tail & q->ring_mask;
 
 		*addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+
+		if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+			if (xskq_is_valid_addr_unaligned(q, *addr,
+							 umem->chunk_size_nohr,
+							 umem))
+				return addr;
+			goto out;
+		}
+
 		if (xskq_is_valid_addr(q, *addr))
 			return addr;
 
+out:
 		q->cons_tail++;
 	}
 
 	return NULL;
 }
 
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
+static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
+				  struct xdp_umem *umem)
 {
 	if (q->cons_tail == q->cons_head) {
 		smp_mb(); /* D, matches A */
@@ -171,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr)
 		smp_rmb();
 	}
 
-	return xskq_validate_addr(q, addr);
+	return xskq_validate_addr(q, addr, umem);
 }
 
 static inline void xskq_discard_addr(struct xsk_queue *q)
@@ -230,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q)
 
 /* Rx/Tx queue */
 
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
+static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
+				      struct xdp_umem *umem)
 {
+	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+		if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
+			return false;
+
+		if (d->len > umem->chunk_size_nohr || d->options) {
+			q->invalid_descs++;
+			return false;
+		}
+
+		return true;
+	}
+
 	if (!xskq_is_valid_addr(q, d->addr))
 		return false;
 
@@ -245,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d)
 }
 
 static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
-						  struct xdp_desc *desc)
+						  struct xdp_desc *desc,
+						  struct xdp_umem *umem)
 {
 	while (q->cons_tail != q->cons_head) {
 		struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
 		unsigned int idx = q->cons_tail & q->ring_mask;
 
 		*desc = READ_ONCE(ring->desc[idx]);
-		if (xskq_is_valid_desc(q, desc))
+		if (xskq_is_valid_desc(q, desc, umem))
 			return desc;
 
 		q->cons_tail++;
@@ -262,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
 }
 
 static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
-					      struct xdp_desc *desc)
+					      struct xdp_desc *desc,
+					      struct xdp_umem *umem)
 {
 	if (q->cons_tail == q->cons_head) {
 		smp_mb(); /* D, matches A */
@@ -273,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
 		smp_rmb(); /* C, matches B */
 	}
 
-	return xskq_validate_desc(q, desc);
+	return xskq_validate_desc(q, desc, umem);
 }
 
 static inline void xskq_discard_desc(struct xsk_queue *q)
-- 
cgit v1.2.3


From bee07b33db78d4ee7ed6a2fe810b9473d5471fe4 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 30 Aug 2019 16:04:32 -0700
Subject: mm: memcontrol: flush percpu slab vmstats on kmem offlining

I've noticed that the "slab" value in memory.stat is sometimes 0, even
if some children memory cgroups have a non-zero "slab" value.  The
following investigation showed that this is the result of the kmem_cache
reparenting in combination with the per-cpu batching of slab vmstats.

At the offlining some vmstat value may leave in the percpu cache, not
being propagated upwards by the cgroup hierarchy.  It means that stats
on ancestor levels are lower than actual.  Later when slab pages are
released, the precise number of pages is substracted on the parent
level, making the value negative.  We don't show negative values, 0 is
printed instead.

To fix this issue, let's flush percpu slab memcg and lruvec stats on
memcg offlining.  This guarantees that numbers on all ancestor levels
are accurate and match the actual number of outstanding slab pages.

Link: http://lkml.kernel.org/r/20190819202338.363363-3-guro@fb.com
Fixes: fb2f2b0adb98 ("mm: memcg/slab: reparent memcg kmem_caches on cgroup removal")
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  5 +++--
 mm/memcontrol.c        | 35 +++++++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d77d717c620c..3f38c30d2f13 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -215,8 +215,9 @@ enum node_stat_item {
 	NR_INACTIVE_FILE,	/*  "     "     "   "       "         */
 	NR_ACTIVE_FILE,		/*  "     "     "   "       "         */
 	NR_UNEVICTABLE,		/*  "     "     "   "       "         */
-	NR_SLAB_RECLAIMABLE,
-	NR_SLAB_UNRECLAIMABLE,
+	NR_SLAB_RECLAIMABLE,	/* Please do not reorder this item */
+	NR_SLAB_UNRECLAIMABLE,	/* and this one without looking at
+				 * memcg_flush_percpu_vmstats() first. */
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	WORKINGSET_NODES,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 26e2999af608..1f585d6c77c1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3260,37 +3260,49 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 	}
 }
 
-static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
+static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only)
 {
 	unsigned long stat[MEMCG_NR_STAT];
 	struct mem_cgroup *mi;
 	int node, cpu, i;
+	int min_idx, max_idx;
 
-	for (i = 0; i < MEMCG_NR_STAT; i++)
+	if (slab_only) {
+		min_idx = NR_SLAB_RECLAIMABLE;
+		max_idx = NR_SLAB_UNRECLAIMABLE;
+	} else {
+		min_idx = 0;
+		max_idx = MEMCG_NR_STAT;
+	}
+
+	for (i = min_idx; i < max_idx; i++)
 		stat[i] = 0;
 
 	for_each_online_cpu(cpu)
-		for (i = 0; i < MEMCG_NR_STAT; i++)
+		for (i = min_idx; i < max_idx; i++)
 			stat[i] += raw_cpu_read(memcg->vmstats_percpu->stat[i]);
 
 	for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
-		for (i = 0; i < MEMCG_NR_STAT; i++)
+		for (i = min_idx; i < max_idx; i++)
 			atomic_long_add(stat[i], &mi->vmstats[i]);
 
+	if (!slab_only)
+		max_idx = NR_VM_NODE_STAT_ITEMS;
+
 	for_each_node(node) {
 		struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
 		struct mem_cgroup_per_node *pi;
 
-		for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+		for (i = min_idx; i < max_idx; i++)
 			stat[i] = 0;
 
 		for_each_online_cpu(cpu)
-			for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+			for (i = min_idx; i < max_idx; i++)
 				stat[i] += raw_cpu_read(
 					pn->lruvec_stat_cpu->count[i]);
 
 		for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
-			for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+			for (i = min_idx; i < max_idx; i++)
 				atomic_long_add(stat[i], &pi->lruvec_stat[i]);
 	}
 }
@@ -3363,7 +3375,14 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	if (!parent)
 		parent = root_mem_cgroup;
 
+	/*
+	 * Deactivate and reparent kmem_caches. Then flush percpu
+	 * slab statistics to have precise values at the parent and
+	 * all ancestor levels. It's required to keep slab stats
+	 * accurate after the reparenting of kmem_caches.
+	 */
 	memcg_deactivate_kmem_caches(memcg, parent);
+	memcg_flush_percpu_vmstats(memcg, true);
 
 	kmemcg_id = memcg->kmemcg_id;
 	BUG_ON(kmemcg_id < 0);
@@ -4740,7 +4759,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	 * Flush percpu vmstats and vmevents to guarantee the value correctness
 	 * on parent's and all ancestor levels.
 	 */
-	memcg_flush_percpu_vmstats(memcg);
+	memcg_flush_percpu_vmstats(memcg, false);
 	memcg_flush_percpu_vmevents(memcg);
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
-- 
cgit v1.2.3


From fbb7d9d56d167247f2eb7261038385cab1073c37 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Jul 2019 13:24:54 -0400
Subject: kill LOOKUP_NO_EVAL, don't bother including namei.h from audit.h

The former has no users left; the latter was only to get LOOKUP_...
values to remapper in audit_inode() and that's an ex-parrot now.

All places that use symbols from namei.h include it either directly
or (in a few cases) via a local header, like fs/autofs/autofs_i.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/nfstrace.h     | 2 --
 include/linux/audit.h | 1 -
 include/linux/namei.h | 2 --
 3 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 976d4089e267..361cc10d6f95 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -207,7 +207,6 @@ TRACE_DEFINE_ENUM(LOOKUP_PARENT);
 TRACE_DEFINE_ENUM(LOOKUP_REVAL);
 TRACE_DEFINE_ENUM(LOOKUP_RCU);
 TRACE_DEFINE_ENUM(LOOKUP_NO_REVAL);
-TRACE_DEFINE_ENUM(LOOKUP_NO_EVAL);
 TRACE_DEFINE_ENUM(LOOKUP_OPEN);
 TRACE_DEFINE_ENUM(LOOKUP_CREATE);
 TRACE_DEFINE_ENUM(LOOKUP_EXCL);
@@ -226,7 +225,6 @@ TRACE_DEFINE_ENUM(LOOKUP_DOWN);
 			{ LOOKUP_REVAL, "REVAL" }, \
 			{ LOOKUP_RCU, "RCU" }, \
 			{ LOOKUP_NO_REVAL, "NO_REVAL" }, \
-			{ LOOKUP_NO_EVAL, "NO_EVAL" }, \
 			{ LOOKUP_OPEN, "OPEN" }, \
 			{ LOOKUP_CREATE, "CREATE" }, \
 			{ LOOKUP_EXCL, "EXCL" }, \
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 543763ab0354..aee3dc9eb378 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -11,7 +11,6 @@
 
 #include <linux/sched.h>
 #include <linux/ptrace.h>
-#include <linux/namei.h>  /* LOOKUP_* */
 #include <uapi/linux/audit.h>
 
 #define AUDIT_INO_UNSET ((unsigned long)-1)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 9138b4471dbf..ac665cbc659f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -25,7 +25,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - dentry cache is untrusted; force a real lookup
  *  - suppress terminal automount
  *  - skip revalidation
- *  - don't fetch xattrs on audit_inode
  */
 #define LOOKUP_FOLLOW		0x0001
 #define LOOKUP_DIRECTORY	0x0002
@@ -35,7 +34,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_REVAL		0x0020
 #define LOOKUP_RCU		0x0040
 #define LOOKUP_NO_REVAL		0x0080
-#define LOOKUP_NO_EVAL		0x0100
 
 /*
  * Intent data
-- 
cgit v1.2.3


From 6b61aed06a3b0eb8e8d49142ddd1e101a064444d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Jul 2019 14:00:23 -0400
Subject: namei.h: get the comments on LOOKUP_... in sync with reality

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/namei.h | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index ac665cbc659f..1a504fc7994f 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -16,37 +16,27 @@ enum { MAX_NESTED_LINKS = 8 };
  */
 enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 
-/*
- * The bitmask for a lookup event:
- *  - follow links at the end
- *  - require a directory
- *  - ending slashes ok even for nonexistent files
- *  - internal "there are more path components" flag
- *  - dentry cache is untrusted; force a real lookup
- *  - suppress terminal automount
- *  - skip revalidation
- */
-#define LOOKUP_FOLLOW		0x0001
-#define LOOKUP_DIRECTORY	0x0002
-#define LOOKUP_AUTOMOUNT	0x0004
-
+/* pathwalk mode */
+#define LOOKUP_FOLLOW		0x0001	/* follow links at the end */
+#define LOOKUP_DIRECTORY	0x0002	/* require a directory */
+#define LOOKUP_AUTOMOUNT	0x0004  /* force terminal automount */
+#define LOOKUP_EMPTY		0x4000	/* accept empty path [user_... only] */
+#define LOOKUP_DOWN		0x8000	/* follow mounts in the starting point */
+
+#define LOOKUP_REVAL		0x0020	/* tell ->d_revalidate() to trust no cache */
+#define LOOKUP_RCU		0x0040	/* RCU pathwalk mode; semi-internal */
+
+/* These tell filesystem methods that we are dealing with the final component... */
+#define LOOKUP_OPEN		0x0100	/* ... in open */
+#define LOOKUP_CREATE		0x0200	/* ... in object creation */
+#define LOOKUP_EXCL		0x0400	/* ... in exclusive creation */
+#define LOOKUP_RENAME_TARGET	0x0800	/* ... in destination of rename() */
+
+/* internal use only */
 #define LOOKUP_PARENT		0x0010
-#define LOOKUP_REVAL		0x0020
-#define LOOKUP_RCU		0x0040
 #define LOOKUP_NO_REVAL		0x0080
-
-/*
- * Intent data
- */
-#define LOOKUP_OPEN		0x0100
-#define LOOKUP_CREATE		0x0200
-#define LOOKUP_EXCL		0x0400
-#define LOOKUP_RENAME_TARGET	0x0800
-
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
-#define LOOKUP_EMPTY		0x4000
-#define LOOKUP_DOWN		0x8000
 
 extern int path_pts(struct path *path);
 
-- 
cgit v1.2.3


From ce6595a28a15c874aee374757dcd08f537d7b24d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 14 Jul 2019 16:42:44 -0400
Subject: kill the last users of user_{path,lpath,path_dir}()

old wrappers with few callers remaining; put them out of their misery...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/pioctl.c      |  7 ++-----
 fs/namespace.c        |  8 +++++---
 fs/xfs/xfs_ioctl.c    |  2 +-
 include/linux/namei.h | 16 ----------------
 4 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 644d48c12ce8..3aec27e5eb82 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -63,11 +63,8 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
 	 * Look up the pathname. Note that the pathname is in
 	 * user memory, and namei takes care of this
 	 */
-	if (data.follow)
-		error = user_path(data.path, &path);
-	else
-		error = user_lpath(data.path, &path);
-
+	error = user_path_at(AT_FDCWD, data.path,
+			     data.follow ? LOOKUP_FOLLOW : 0, &path);
 	if (error)
 		return error;
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 697f8820dff5..b73478244356 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3044,7 +3044,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
 		return -EINVAL;
 
 	/* ... and get the mountpoint */
-	retval = user_path(dir_name, &path);
+	retval = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
 	if (retval)
 		return retval;
 
@@ -3591,11 +3591,13 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	if (!may_mount())
 		return -EPERM;
 
-	error = user_path_dir(new_root, &new);
+	error = user_path_at(AT_FDCWD, new_root,
+			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
 	if (error)
 		goto out0;
 
-	error = user_path_dir(put_old, &old);
+	error = user_path_at(AT_FDCWD, put_old,
+			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
 	if (error)
 		goto out1;
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6f7848cd5527..affa557c2337 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -67,7 +67,7 @@ xfs_find_handle(
 			return -EBADF;
 		inode = file_inode(f.file);
 	} else {
-		error = user_lpath((const char __user *)hreq->path, &path);
+		error = user_path_at(AT_FDCWD, hreq->path, 0, &path);
 		if (error)
 			return error;
 		inode = d_inode(path.dentry);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 1a504fc7994f..b7993980516e 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -48,22 +48,6 @@ static inline int user_path_at(int dfd, const char __user *name, unsigned flags,
 	return user_path_at_empty(dfd, name, flags, path, NULL);
 }
 
-static inline int user_path(const char __user *name, struct path *path)
-{
-	return user_path_at_empty(AT_FDCWD, name, LOOKUP_FOLLOW, path, NULL);
-}
-
-static inline int user_lpath(const char __user *name, struct path *path)
-{
-	return user_path_at_empty(AT_FDCWD, name, 0, path, NULL);
-}
-
-static inline int user_path_dir(const char __user *name, struct path *path)
-{
-	return user_path_at_empty(AT_FDCWD, name,
-				  LOOKUP_FOLLOW | LOOKUP_DIRECTORY, path, NULL);
-}
-
 extern int kern_path(const char *, unsigned, struct path *);
 
 extern struct dentry *kern_path_create(int, const char *, struct path *, unsigned int);
-- 
cgit v1.2.3


From 0feacaa21634014148068035b02eade71f853496 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 30 Aug 2019 16:39:54 -0700
Subject: writeback: don't access page->mapping directly in track_foreign_dirty
 TP

page->mapping may encode different values in it and page_mapping()
should always be used to access the mapping pointer.
track_foreign_dirty tracepoint was incorrectly accessing page->mapping
directly.  Use page_mapping() instead.  Also, add NULL checks while at
it.

Fixes: 3a8e9ac89e6a ("writeback: add tracepoints for cgroup foreign writebacks")
Reported-by: Jan Kara <jack@suse.cz>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/writeback.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 3dc9fb9e7c78..3a27335fce2c 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -251,9 +251,12 @@ TRACE_EVENT(track_foreign_dirty,
 	),
 
 	TP_fast_assign(
+		struct address_space *mapping = page_mapping(page);
+		struct inode *inode = mapping ? mapping->host : NULL;
+
 		strncpy(__entry->name,	dev_name(wb->bdi->dev), 32);
 		__entry->bdi_id		= wb->bdi->id;
-		__entry->ino		= page->mapping->host->i_ino;
+		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->memcg_id	= wb->memcg_css->id;
 		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
 		__entry->page_cgroup_ino = page->mem_cgroup->css.cgroup->kn->id.ino;
-- 
cgit v1.2.3


From 974ceb21fcf9c0345570ea194e799a4ee7842f33 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Thu, 29 Aug 2019 19:50:24 +0300
Subject: udp: Remove unlikely() from IS_ERR*() condition

"unlikely(IS_ERR_OR_NULL(x))" is excessive. IS_ERR_OR_NULL() already uses
unlikely() internally.

Signed-off-by: Denis Efremov <efremov@linux.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Joe Perches <joe@perches.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 79d141d2103b..bad74f780831 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -480,7 +480,7 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 	 * CB fragment
 	 */
 	segs = __skb_gso_segment(skb, features, false);
-	if (unlikely(IS_ERR_OR_NULL(segs))) {
+	if (IS_ERR_OR_NULL(segs)) {
 		int segs_nr = skb_shinfo(skb)->gso_segs;
 
 		atomic_add(segs_nr, &sk->sk_drops);
-- 
cgit v1.2.3


From cf1ea0592dbf109e7e7935b7d5b1a47a1ba04174 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 29 Aug 2019 09:04:11 -0700
Subject: fs: Export generic_fadvise()

Filesystems will need to call this function from their fadvise handlers.

CC: stable@vger.kernel.org
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 include/linux/fs.h | 2 ++
 mm/fadvise.c       | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 997a530ff4e9..bc1b40fb0db7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3531,6 +3531,8 @@ extern void inode_nohighmem(struct inode *inode);
 /* mm/fadvise.c */
 extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
 		       int advice);
+extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
+			   int advice);
 
 #if defined(CONFIG_IO_URING)
 extern struct sock *io_uring_get_socket(struct file *file);
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 467bcd032037..4f17c83db575 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -27,8 +27,7 @@
  * deactivate the pages and clear PG_Referenced.
  */
 
-static int generic_fadvise(struct file *file, loff_t offset, loff_t len,
-			   int advice)
+int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 {
 	struct inode *inode;
 	struct address_space *mapping;
@@ -178,6 +177,7 @@ static int generic_fadvise(struct file *file, loff_t offset, loff_t len,
 	}
 	return 0;
 }
+EXPORT_SYMBOL(generic_fadvise);
 
 int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 {
-- 
cgit v1.2.3


From 595a438c78dbdc43d6c9db4f437267f0bd1548bf Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Thu, 4 Jul 2019 20:21:10 +0300
Subject: tracing: Make exported ftrace_set_clr_event non-static

The function ftrace_set_clr_event is declared static and marked
EXPORT_SYMBOL_GPL(), which is at best an odd combination. Because the
function was decided to be a part of API, this commit removes the static
attribute and adds the declaration to the header.

Link: http://lkml.kernel.org/r/20190704172110.27041-1-efremov@linux.com

Fixes: f45d1225adb04 ("tracing: Kernel access to Ftrace instances")
Reviewed-by: Joe Jin <joe.jin@oracle.com>
Signed-off-by: Denis Efremov <efremov@linux.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 1 +
 kernel/trace/trace_events.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 5150436783e8..30a8cdcfd4a4 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -548,6 +548,7 @@ extern int trace_event_get_offsets(struct trace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < (type)1)
 
+int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set);
 int trace_set_clr_event(const char *system, const char *event, int set);
 
 /*
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c7506bc81b75..648930823b57 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -787,7 +787,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 	return ret;
 }
 
-static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
+int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 {
 	char *event = NULL, *sub = NULL, *match;
 	int ret;
-- 
cgit v1.2.3


From 2d4c849530a9771ed38d4046d2b631c1a4e2f9a6 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Fri, 30 Aug 2019 00:42:03 -0700
Subject: qed: Add APIs for reading config id attributes.

The patch adds driver support for reading the config id attributes from NVM
flash partition.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 27 +++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.c  | 29 +++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h  | 15 +++++++++++++++
 include/linux/qed/qed_if.h                 | 11 +++++++++++
 4 files changed, 82 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 7891f8c5a1bc..c9a757177366 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -69,6 +69,8 @@
 #define QED_RDMA_SRQS                   QED_ROCE_QPS
 #define QED_NVM_CFG_SET_FLAGS		0xE
 #define QED_NVM_CFG_SET_PF_FLAGS	0x1E
+#define QED_NVM_CFG_GET_FLAGS		0xA
+#define QED_NVM_CFG_GET_PF_FLAGS	0x1A
 
 static char version[] =
 	"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -2298,6 +2300,30 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
 	return rc;
 }
 
+static int qed_nvm_flash_cfg_read(struct qed_dev *cdev, u8 **data,
+				  u32 cmd, u32 entity_id)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	u32 flags, len;
+	int rc = 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	DP_VERBOSE(cdev, NETIF_MSG_DRV,
+		   "Read config cmd = %d entity id %d\n", cmd, entity_id);
+	flags = entity_id ? QED_NVM_CFG_GET_PF_FLAGS : QED_NVM_CFG_GET_FLAGS;
+	rc = qed_mcp_nvm_get_cfg(hwfn, ptt, cmd, entity_id, flags, *data, &len);
+	if (rc)
+		DP_ERR(cdev, "Error %d reading %d\n", rc, cmd);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static int qed_nvm_flash(struct qed_dev *cdev, const char *name)
 {
 	const struct firmware *image;
@@ -2610,6 +2636,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.db_recovery_del = &qed_db_recovery_del,
 	.read_module_eeprom = &qed_read_module_eeprom,
 	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
+	.read_nvm_cfg = &qed_nvm_flash_cfg_read,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 89462c4a5022..36ddb89856a8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3751,6 +3751,35 @@ int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return 0;
 }
 
+int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 *p_len)
+{
+	u32 mb_param = 0, resp, param;
+	int rc;
+
+	QED_MFW_SET_FIELD(mb_param, DRV_MB_PARAM_NVM_CFG_OPTION_ID, option_id);
+	if (flags & QED_NVM_CFG_OPTION_INIT)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_INIT, 1);
+	if (flags & QED_NVM_CFG_OPTION_FREE)
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_FREE, 1);
+	if (flags & QED_NVM_CFG_OPTION_ENTITY_SEL) {
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_SEL, 1);
+		QED_MFW_SET_FIELD(mb_param,
+				  DRV_MB_PARAM_NVM_CFG_OPTION_ENTITY_ID,
+				  entity_id);
+	}
+
+	rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				DRV_MSG_CODE_GET_NVM_CFG_OPTION,
+				mb_param, &resp, &param, p_len, (u32 *)p_buf);
+
+	return rc;
+}
+
 int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
 			u32 len)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 83649a82977b..9c4c2763de8d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1208,6 +1208,21 @@ int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
  */
 int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
+/**
+ * @brief Get NVM config attribute value.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param option_id
+ * @param entity_id
+ * @param flags
+ * @param p_buf
+ * @param p_len
+ */
+int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
+			u32 *p_len);
+
 /**
  * @brief Set NVM config attribute value.
  *
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index e366399874f3..06fd9580c9e5 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1132,6 +1132,17 @@ struct qed_common_ops {
  * @param cdev
  */
 	u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev);
+
+/**
+ * @brief read_nvm_cfg - Read NVM config attribute value.
+ * @param cdev
+ * @param buf - buffer
+ * @param cmd - NVM CFG command id
+ * @param entity_id - Entity id
+ *
+ */
+	int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd,
+			    u32 entity_id);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 3b86bd076284724489381e391f84a34078b3d5bc Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Fri, 30 Aug 2019 00:42:05 -0700
Subject: qed: Add APIs for configuring grc dump config flags.

The patch adds driver support for configuring the grc dump config flags.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_debug.c | 82 +++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h   | 15 ++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c  | 21 ++++++++
 include/linux/qed/qed_if.h                  |  9 ++++
 4 files changed, 127 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 5ea6c4fc6050..859caa6c1a1f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -1756,6 +1756,15 @@ static u32 qed_read_unaligned_dword(u8 *buf)
 	return dword;
 }
 
+/* Sets the value of the specified GRC param */
+static void qed_grc_set_param(struct qed_hwfn *p_hwfn,
+			      enum dbg_grc_params grc_param, u32 val)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	dev_data->grc.param_val[grc_param] = val;
+}
+
 /* Returns the value of the specified GRC param */
 static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
 			     enum dbg_grc_params grc_param)
@@ -5119,6 +5128,69 @@ bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 	return false;
 }
 
+enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum dbg_grc_params grc_param, u32 val)
+{
+	enum dbg_status status;
+	int i;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+		   "dbg_grc_config: paramId = %d, val = %d\n", grc_param, val);
+
+	status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Initializes the GRC parameters (if not initialized). Needed in order
+	 * to set the default parameter values for the first time.
+	 */
+	qed_dbg_grc_init_params(p_hwfn);
+
+	if (grc_param >= MAX_DBG_GRC_PARAMS)
+		return DBG_STATUS_INVALID_ARGS;
+	if (val < s_grc_param_defs[grc_param].min ||
+	    val > s_grc_param_defs[grc_param].max)
+		return DBG_STATUS_INVALID_ARGS;
+
+	if (s_grc_param_defs[grc_param].is_preset) {
+		/* Preset param */
+
+		/* Disabling a preset is not allowed. Call
+		 * dbg_grc_set_params_default instead.
+		 */
+		if (!val)
+			return DBG_STATUS_INVALID_ARGS;
+
+		/* Update all params with the preset values */
+		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) {
+			u32 preset_val;
+
+			/* Skip persistent params */
+			if (s_grc_param_defs[i].is_persistent)
+				continue;
+
+			/* Find preset value */
+			if (grc_param == DBG_GRC_PARAM_EXCLUDE_ALL)
+				preset_val =
+				    s_grc_param_defs[i].exclude_all_preset_val;
+			else if (grc_param == DBG_GRC_PARAM_CRASH)
+				preset_val =
+				    s_grc_param_defs[i].crash_preset_val;
+			else
+				return DBG_STATUS_INVALID_ARGS;
+
+			qed_grc_set_param(p_hwfn,
+					  (enum dbg_grc_params)i, preset_val);
+		}
+	} else {
+		/* Regular param - set its value */
+		qed_grc_set_param(p_hwfn, grc_param, val);
+	}
+
+	return DBG_STATUS_OK;
+}
+
 /* Assign default GRC param values */
 void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
 {
@@ -7997,9 +8069,16 @@ static u32 qed_calc_regdump_header(enum debug_print_features feature,
 int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 {
 	u8 cur_engine, omit_engine = 0, org_engine;
+	struct qed_hwfn *p_hwfn =
+		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	int grc_params[MAX_DBG_GRC_PARAMS], i;
 	u32 offset = 0, feature_size;
 	int rc;
 
+	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+		grc_params[i] = dev_data->grc.param_val[i];
+
 	if (cdev->num_hwfns == 1)
 		omit_engine = 1;
 
@@ -8087,6 +8166,9 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 			       rc);
 		}
 
+		for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+			dev_data->grc.param_val[i] = grc_params[i];
+
 		/* GRC dump - must be last because when mcp stuck it will
 		 * clutter idle_chk, reg_fifo, ...
 		 */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 557a12ef9815..cf3ceb62e397 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -3024,6 +3024,21 @@ void qed_read_regs(struct qed_hwfn *p_hwfn,
  */
 bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, struct fw_info *fw_info);
+/**
+ * @brief qed_dbg_grc_config - Sets the value of a GRC parameter.
+ *
+ * @param p_hwfn -	HW device data
+ * @param grc_param -	GRC parameter
+ * @param val -		Value to set.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- grc_param is invalid
+ *	- val is outside the allowed boundaries
+ */
+enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum dbg_grc_params grc_param, u32 val);
 
 /**
  * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c9a757177366..ac1511a834d8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2583,6 +2583,26 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
 	return rc;
 }
 
+static int qed_set_grc_config(struct qed_dev *cdev, u32 cfg_id, u32 val)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_dbg_grc_config(hwfn, ptt, cfg_id, val);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return rc;
+}
+
 static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev)
 {
 	return QED_AFFIN_HWFN_IDX(cdev);
@@ -2637,6 +2657,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.read_module_eeprom = &qed_read_module_eeprom,
 	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
 	.read_nvm_cfg = &qed_nvm_flash_cfg_read,
+	.set_grc_config = &qed_set_grc_config,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 06fd9580c9e5..e35463860c84 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1143,6 +1143,15 @@ struct qed_common_ops {
  */
 	int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd,
 			    u32 entity_id);
+
+/**
+ * @brief set_grc_config - Configure value for grc config id.
+ * @param cdev
+ * @param cfg_id - grc config id
+ * @param val - grc config value
+ *
+ */
+	int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val);
 };
 
 #define MASK_FIELD(_name, _value) \
-- 
cgit v1.2.3


From 15a7dea750e0162f273c6e61a94f96944b75b31e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Fri, 30 Aug 2019 12:25:47 +0200
Subject: net/tls: use RCU protection on icsk->icsk_ulp_data

We need to make sure context does not get freed while diag
code is interrogating it. Free struct tls_context with
kfree_rcu().

We add the __rcu annotation directly in icsk, and cast it
away in the datapath accessor. Presumably all ULPs will
do a similar thing.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  2 +-
 include/net/tls.h                  |  9 +++++++--
 net/core/sock_map.c                |  2 +-
 net/tls/tls_device.c               |  2 +-
 net/tls/tls_main.c                 | 26 +++++++++++++++++++-------
 5 files changed, 29 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c57d53e7e02c..895546058a20 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -97,7 +97,7 @@ struct inet_connection_sock {
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
 	const struct tcp_ulp_ops  *icsk_ulp_ops;
-	void			  *icsk_ulp_data;
+	void __rcu		  *icsk_ulp_data;
 	void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq);
 	struct hlist_node         icsk_listen_portaddr_node;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
diff --git a/include/net/tls.h b/include/net/tls.h
index 41b2d41bb1b8..4997742475cd 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -41,6 +41,7 @@
 #include <linux/tcp.h>
 #include <linux/skmsg.h>
 #include <linux/netdevice.h>
+#include <linux/rcupdate.h>
 
 #include <net/tcp.h>
 #include <net/strparser.h>
@@ -290,6 +291,7 @@ struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+	struct rcu_head rcu;
 };
 
 enum tls_offload_ctx_dir {
@@ -348,7 +350,7 @@ struct tls_offload_context_rx {
 #define TLS_OFFLOAD_CONTEXT_SIZE_RX					\
 	(sizeof(struct tls_offload_context_rx) + TLS_DRIVER_STATE_SIZE_RX)
 
-void tls_ctx_free(struct tls_context *ctx);
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx);
 int wait_on_pending_writer(struct sock *sk, long *timeo);
 int tls_sk_query(struct sock *sk, int optname, char __user *optval,
 		int __user *optlen);
@@ -467,7 +469,10 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	return icsk->icsk_ulp_data;
+	/* Use RCU on icsk_ulp_data only for sock diag code,
+	 * TLS data path doesn't need rcu_dereference().
+	 */
+	return (__force void *)icsk->icsk_ulp_data;
 }
 
 static inline void tls_advance_record_sn(struct sock *sk,
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 1330a7442e5b..01998860afaa 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
 		return -EINVAL;
 	if (unlikely(idx >= map->max_entries))
 		return -E2BIG;
-	if (unlikely(icsk->icsk_ulp_data))
+	if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data)))
 		return -EINVAL;
 
 	link = sk_psock_init_link();
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a470df7ffcf9..e188139f0464 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx)
 	if (ctx->rx_conf == TLS_HW)
 		kfree(tls_offload_ctx_rx(ctx));
 
-	tls_ctx_free(ctx);
+	tls_ctx_free(NULL, ctx);
 }
 
 static void tls_device_gc_task(struct work_struct *work)
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 43252a801c3f..f8f2d2c3d627 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -251,14 +251,26 @@ static void tls_write_space(struct sock *sk)
 	ctx->sk_write_space(sk);
 }
 
-void tls_ctx_free(struct tls_context *ctx)
+/**
+ * tls_ctx_free() - free TLS ULP context
+ * @sk:  socket to with @ctx is attached
+ * @ctx: TLS context structure
+ *
+ * Free TLS context. If @sk is %NULL caller guarantees that the socket
+ * to which @ctx was attached has no outstanding references.
+ */
+void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
 {
 	if (!ctx)
 		return;
 
 	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
 	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
-	kfree(ctx);
+
+	if (sk)
+		kfree_rcu(ctx, rcu);
+	else
+		kfree(ctx);
 }
 
 static void tls_sk_proto_cleanup(struct sock *sk,
@@ -306,7 +318,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 
 	write_lock_bh(&sk->sk_callback_lock);
 	if (free_ctx)
-		icsk->icsk_ulp_data = NULL;
+		rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
 	sk->sk_prot = ctx->sk_proto;
 	if (sk->sk_write_space == tls_write_space)
 		sk->sk_write_space = ctx->sk_write_space;
@@ -321,7 +333,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 	ctx->sk_proto_close(sk, timeout);
 
 	if (free_ctx)
-		tls_ctx_free(ctx);
+		tls_ctx_free(sk, ctx);
 }
 
 static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval,
@@ -610,7 +622,7 @@ static struct tls_context *create_ctx(struct sock *sk)
 	if (!ctx)
 		return NULL;
 
-	icsk->icsk_ulp_data = ctx;
+	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
@@ -651,8 +663,8 @@ static void tls_hw_sk_destruct(struct sock *sk)
 
 	ctx->sk_destruct(sk);
 	/* Free ctx */
-	tls_ctx_free(ctx);
-	icsk->icsk_ulp_data = NULL;
+	rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
+	tls_ctx_free(sk, ctx);
 }
 
 static int tls_hw_prot(struct sock *sk)
-- 
cgit v1.2.3


From 61723b393292f1e4ea27f8d123384d50b176c29d Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 30 Aug 2019 12:25:48 +0200
Subject: tcp: ulp: add functions to dump ulp-specific information

currently, only getsockopt(TCP_ULP) can be invoked to know if a ULP is on
top of a TCP socket. Extend idiag_get_aux() and idiag_get_aux_size(),
introduced by commit b37e88407c1d ("inet_diag: allow protocols to provide
additional data"), to report the ULP name and other information that can
be made available by the ULP through optional functions.

Users having CAP_NET_ADMIN privileges will then be able to retrieve this
information through inet_diag_handler, if they specify INET_DIAG_INFO in
the request.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h              |  3 +++
 include/uapi/linux/inet_diag.h |  8 +++++++
 net/ipv4/tcp_diag.c            | 52 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 77fe87f7a992..c9a3f9688223 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2122,6 +2122,9 @@ struct tcp_ulp_ops {
 	void (*update)(struct sock *sk, struct proto *p);
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
+	/* diagnostic */
+	int (*get_info)(const struct sock *sk, struct sk_buff *skb);
+	size_t (*get_info_size)(const struct sock *sk);
 
 	char		name[TCP_ULP_NAME_MAX];
 	struct module	*owner;
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index e8baca85bac6..e2c6273274f3 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -153,11 +153,19 @@ enum {
 	INET_DIAG_BBRINFO,	/* request as INET_DIAG_VEGASINFO */
 	INET_DIAG_CLASS_ID,	/* request as INET_DIAG_TCLASS */
 	INET_DIAG_MD5SIG,
+	INET_DIAG_ULP_INFO,
 	__INET_DIAG_MAX,
 };
 
 #define INET_DIAG_MAX (__INET_DIAG_MAX - 1)
 
+enum {
+	INET_ULP_INFO_UNSPEC,
+	INET_ULP_INFO_NAME,
+	__INET_ULP_INFO_MAX,
+};
+#define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
+
 /* INET_DIAG_MEM */
 
 struct inet_diag_meminfo {
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index a3a386236d93..babc156deabb 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb,
 }
 #endif
 
+static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk,
+			    const struct tcp_ulp_ops *ulp_ops)
+{
+	struct nlattr *nest;
+	int err;
+
+	nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO);
+	if (!nest)
+		return -EMSGSIZE;
+
+	err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name);
+	if (err)
+		goto nla_failure;
+
+	if (ulp_ops->get_info)
+		err = ulp_ops->get_info(sk, skb);
+	if (err)
+		goto nla_failure;
+
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_failure:
+	nla_nest_cancel(skb, nest);
+	return err;
+}
+
 static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
 			    struct sk_buff *skb)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	int err = 0;
+
 #ifdef CONFIG_TCP_MD5SIG
 	if (net_admin) {
 		struct tcp_md5sig_info *md5sig;
-		int err = 0;
 
 		rcu_read_lock();
 		md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
@@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
 	}
 #endif
 
+	if (net_admin) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops)
+			err = tcp_diag_put_ulp(skb, sk, ulp_ops);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
 static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	size_t size = 0;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
 	}
 #endif
 
+	if (net_admin) {
+		const struct tcp_ulp_ops *ulp_ops;
+
+		ulp_ops = icsk->icsk_ulp_ops;
+		if (ulp_ops) {
+			size += nla_total_size(0) +
+				nla_total_size(TCP_ULP_NAME_MAX);
+			if (ulp_ops->get_info_size)
+				size += ulp_ops->get_info_size(sk);
+		}
+	}
 	return size;
 }
 
-- 
cgit v1.2.3


From 26811cc9f55acf835f7fdadc5ff2bbd6f06bc3ac Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 30 Aug 2019 12:25:49 +0200
Subject: net: tls: export protocol version, cipher, tx_conf/rx_conf to socket
 diag

When an application configures kernel TLS on top of a TCP socket, it's
now possible for inet_diag_handler() to collect information regarding the
protocol version, the cipher type and TX / RX configuration, in case
INET_DIAG_INFO is requested.

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h              | 17 +++++++++++
 include/uapi/linux/inet_diag.h |  1 +
 include/uapi/linux/tls.h       | 15 ++++++++++
 net/tls/tls_main.c             | 64 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 97 insertions(+)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 4997742475cd..ec3c3ed2c6c3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -431,6 +431,23 @@ static inline bool is_tx_ready(struct tls_sw_context_tx *ctx)
 	return READ_ONCE(rec->tx_ready);
 }
 
+static inline u16 tls_user_config(struct tls_context *ctx, bool tx)
+{
+	u16 config = tx ? ctx->tx_conf : ctx->rx_conf;
+
+	switch (config) {
+	case TLS_BASE:
+		return TLS_CONF_BASE;
+	case TLS_SW:
+		return TLS_CONF_SW;
+	case TLS_HW:
+		return TLS_CONF_HW;
+	case TLS_HW_RECORD:
+		return TLS_CONF_HW_RECORD;
+	}
+	return 0;
+}
+
 struct sk_buff *
 tls_validate_xmit_skb(struct sock *sk, struct net_device *dev,
 		      struct sk_buff *skb);
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index e2c6273274f3..a1ff345b3f33 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -162,6 +162,7 @@ enum {
 enum {
 	INET_ULP_INFO_UNSPEC,
 	INET_ULP_INFO_NAME,
+	INET_ULP_INFO_TLS,
 	__INET_ULP_INFO_MAX,
 };
 #define INET_ULP_INFO_MAX (__INET_ULP_INFO_MAX - 1)
diff --git a/include/uapi/linux/tls.h b/include/uapi/linux/tls.h
index 5b9c26753e46..bcd2869ed472 100644
--- a/include/uapi/linux/tls.h
+++ b/include/uapi/linux/tls.h
@@ -109,4 +109,19 @@ struct tls12_crypto_info_aes_ccm_128 {
 	unsigned char rec_seq[TLS_CIPHER_AES_CCM_128_REC_SEQ_SIZE];
 };
 
+enum {
+	TLS_INFO_UNSPEC,
+	TLS_INFO_VERSION,
+	TLS_INFO_CIPHER,
+	TLS_INFO_TXCONF,
+	TLS_INFO_RXCONF,
+	__TLS_INFO_MAX,
+};
+#define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
+
+#define TLS_CONF_BASE 1
+#define TLS_CONF_SW 2
+#define TLS_CONF_HW 3
+#define TLS_CONF_HW_RECORD 4
+
 #endif /* _UAPI_LINUX_TLS_H */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f8f2d2c3d627..277f7c209fed 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -39,6 +39,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
 #include <linux/inetdevice.h>
+#include <linux/inet_diag.h>
 
 #include <net/tls.h>
 
@@ -835,6 +836,67 @@ static void tls_update(struct sock *sk, struct proto *p)
 	}
 }
 
+static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+{
+	u16 version, cipher_type;
+	struct tls_context *ctx;
+	struct nlattr *start;
+	int err;
+
+	start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS);
+	if (!start)
+		return -EMSGSIZE;
+
+	rcu_read_lock();
+	ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
+	if (!ctx) {
+		err = 0;
+		goto nla_failure;
+	}
+	version = ctx->prot_info.version;
+	if (version) {
+		err = nla_put_u16(skb, TLS_INFO_VERSION, version);
+		if (err)
+			goto nla_failure;
+	}
+	cipher_type = ctx->prot_info.cipher_type;
+	if (cipher_type) {
+		err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type);
+		if (err)
+			goto nla_failure;
+	}
+	err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true));
+	if (err)
+		goto nla_failure;
+
+	err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false));
+	if (err)
+		goto nla_failure;
+
+	rcu_read_unlock();
+	nla_nest_end(skb, start);
+	return 0;
+
+nla_failure:
+	rcu_read_unlock();
+	nla_nest_cancel(skb, start);
+	return err;
+}
+
+static size_t tls_get_info_size(const struct sock *sk)
+{
+	size_t size = 0;
+
+	size += nla_total_size(0) +		/* INET_ULP_INFO_TLS */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_VERSION */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_CIPHER */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_RXCONF */
+		nla_total_size(sizeof(u16)) +	/* TLS_INFO_TXCONF */
+		0;
+
+	return size;
+}
+
 void tls_register_device(struct tls_device *device)
 {
 	spin_lock_bh(&device_spinlock);
@@ -856,6 +918,8 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = {
 	.owner			= THIS_MODULE,
 	.init			= tls_init,
 	.update			= tls_update,
+	.get_info		= tls_get_info,
+	.get_info_size		= tls_get_info_size,
 };
 
 static int __init tls_register(void)
-- 
cgit v1.2.3


From c7282b501f22a21ac8dfb6d3856aa1a92a7df5d5 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 30 Aug 2019 05:39:44 -0500
Subject: devlink: Make port index data type as unsigned int

Devlink port index attribute is returned to users as u32 through
netlink response.
Change index data type from 'unsigned' to 'unsigned int' to avoid
below checkpatch.pl warning.

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
81: FILE: include/net/devlink.h:81:
+       unsigned index;

Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/devlink.h | 2 +-
 net/core/devlink.c    | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 7f43c48f54cd..13523b0a0642 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -75,7 +75,7 @@ struct devlink_port {
 	struct list_head list;
 	struct list_head param_list;
 	struct devlink *devlink;
-	unsigned index;
+	unsigned int index;
 	bool registered;
 	spinlock_t type_lock; /* Protects type and type_dev
 			       * pointer consistency.
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 650f36379203..b7091329987a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -136,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info)
 }
 
 static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
-						      int port_index)
+						      unsigned int port_index)
 {
 	struct devlink_port *devlink_port;
 
@@ -147,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
 	return NULL;
 }
 
-static bool devlink_port_index_exists(struct devlink *devlink, int port_index)
+static bool devlink_port_index_exists(struct devlink *devlink,
+				      unsigned int port_index)
 {
 	return devlink_port_get_by_index(devlink, port_index);
 }
-- 
cgit v1.2.3


From 7c322056e3564da1b5bdc3f3cb79229582955eb2 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Date: Mon, 26 Aug 2019 22:34:42 +0200
Subject: leds: Replace {devm_}led_classdev_register() macros with inlines

Replace preprocessor macro aliases for legacy LED registration helpers
with inline functions. It will allow to avoid misleading compiler error
messages about missing symbol that actually wasn't explicitly used
in the code. It used to occur when CONFIG_LEDS_CLASS was undefined
and legacy (non-ext) function had been used in the code.

Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
---
 include/linux/leds.h | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index d101fd13e18e..b8df71193329 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -157,18 +157,39 @@ struct led_classdev {
  * @led_cdev: the led_classdev structure for this device
  * @init_data: the LED class device initialization data
  *
+ * Register a new object of LED class, with name derived from init_data.
+ *
  * Returns: 0 on success or negative error value on failure
  */
 extern int led_classdev_register_ext(struct device *parent,
 				     struct led_classdev *led_cdev,
 				     struct led_init_data *init_data);
-#define led_classdev_register(parent, led_cdev)			\
-	led_classdev_register_ext(parent, led_cdev, NULL)
+
+/**
+ * led_classdev_register - register a new object of LED class
+ * @parent: LED controller device this LED is driven by
+ * @led_cdev: the led_classdev structure for this device
+ *
+ * Register a new object of LED class, with name derived from the name property
+ * of passed led_cdev argument.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+static inline int led_classdev_register(struct device *parent,
+					struct led_classdev *led_cdev)
+{
+	return led_classdev_register_ext(parent, led_cdev, NULL);
+}
+
 extern int devm_led_classdev_register_ext(struct device *parent,
 					  struct led_classdev *led_cdev,
 					  struct led_init_data *init_data);
-#define devm_led_classdev_register(parent, led_cdev)		\
-	devm_led_classdev_register_ext(parent, led_cdev, NULL)
+
+static inline int devm_led_classdev_register(struct device *parent,
+					     struct led_classdev *led_cdev)
+{
+	return devm_led_classdev_register_ext(parent, led_cdev, NULL);
+}
 extern void led_classdev_unregister(struct led_classdev *led_cdev);
 extern void devm_led_classdev_unregister(struct device *parent,
 					 struct led_classdev *led_cdev);
-- 
cgit v1.2.3


From 602828c1aade576ac5f3fbd59b4eb014c5fc2414 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 20 Aug 2019 23:05:42 -0400
Subject: __div64_const32(): improve the generic C version

Let's rework that code to avoid large immediate values and convert some
64-bit variables to 32-bit ones when possible. This allows gcc to
produce smaller and better code. This even produces optimal code on
RISC-V.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/div64.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index dc9726fdac8f..33358245b4fa 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 	uint32_t m_hi = m >> 32;
 	uint32_t n_lo = n;
 	uint32_t n_hi = n >> 32;
-	uint64_t res, tmp;
+	uint64_t res;
+	uint32_t res_lo, res_hi, tmp;
 
 	if (!bias) {
 		res = ((uint64_t)m_lo * n_lo) >> 32;
@@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 		res = (m + (uint64_t)m_lo * n_lo) >> 32;
 	} else {
 		res = m + (uint64_t)m_lo * n_lo;
-		tmp = (res < m) ? (1ULL << 32) : 0;
-		res = (res >> 32) + tmp;
+		res_lo = res >> 32;
+		res_hi = (res_lo < m_hi);
+		res = res_lo | ((uint64_t)res_hi << 32);
 	}
 
 	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
@@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 		res += (uint64_t)m_hi * n_lo;
 		res >>= 32;
 	} else {
-		tmp = res += (uint64_t)m_lo * n_hi;
+		res += (uint64_t)m_lo * n_hi;
+		tmp = res >> 32;
 		res += (uint64_t)m_hi * n_lo;
-		tmp = (res < tmp) ? (1ULL << 32) : 0;
-		res = (res >> 32) + tmp;
+		res_lo = res >> 32;
+		res_hi = (res_lo < tmp);
+		res = res_lo | ((uint64_t)res_hi << 32);
 	}
 
 	res += (uint64_t)m_hi * n_hi;
-- 
cgit v1.2.3


From 9b87647c665dbf93173ca2f43986902b59dfbbba Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Thu, 29 Aug 2019 00:09:34 +0300
Subject: asm-generic: add unlikely to default BUG_ON(x)

Add unlikely to default BUG_ON(x) in !CONFIG_BUG. It makes
the define consistent with BUG_ON(x) in CONFIG_BUG.

Signed-off-by: Denis Efremov <efremov@linux.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/bug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index aa6c093d9ce9..7357a3c942a0 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -185,7 +185,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #endif
 
 #ifndef HAVE_ARCH_BUG_ON
-#define BUG_ON(condition) do { if (condition) BUG(); } while (0)
+#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
 #endif
 
 #ifndef HAVE_ARCH_WARN_ON
-- 
cgit v1.2.3


From c9b9dcb430b3cd0ad2b04c360c4e528d73430481 Mon Sep 17 00:00:00 2001
From: Ariel Levkovich <lariel@mellanox.com>
Date: Thu, 29 Aug 2019 23:42:30 +0000
Subject: net/mlx5: Move device memory management to mlx5_core

Move the device memory allocation and deallocation commands
SW ICM memory to mlx5_core to expose this API for all
mlx5_core users.

This comes as preparation for supporting SW steering in kernel
where it will be required to allocate and register device
memory for direct rule insertion.

In addition, an API to register this device memory for future
remote access operations is introduced using the create_mkey
commands.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c                   | 130 ------------
 drivers/infiniband/hw/mlx5/cmd.h                   |   4 -
 drivers/infiniband/hw/mlx5/main.c                  | 102 +++-------
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |   2 -
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c   | 223 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |   5 +
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |   3 +
 include/linux/mlx5/driver.h                        |  14 ++
 9 files changed, 276 insertions(+), 209 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 6c8645033102..4937947400cd 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -186,136 +186,6 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
 	return err;
 }
 
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
-			  u16 uid, phys_addr_t *addr, u32 *obj_id)
-{
-	struct mlx5_core_dev *dev = dm->dev;
-	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
-	u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
-	unsigned long *block_map;
-	u64 icm_start_addr;
-	u32 log_icm_size;
-	u32 num_blocks;
-	u32 max_blocks;
-	u64 block_idx;
-	void *sw_icm;
-	int ret;
-
-	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
-		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
-	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
-	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
-	switch (type) {
-	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
-		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
-						steering_sw_icm_start_address);
-		log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
-		block_map = dm->steering_sw_icm_alloc_blocks;
-		break;
-	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
-		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
-					header_modify_sw_icm_start_address);
-		log_icm_size = MLX5_CAP_DEV_MEM(dev,
-						log_header_modify_sw_icm_size);
-		block_map = dm->header_modify_sw_icm_alloc_blocks;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
-		     MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
-	spin_lock(&dm->lock);
-	block_idx = bitmap_find_next_zero_area(block_map,
-					       max_blocks,
-					       0,
-					       num_blocks, 0);
-
-	if (block_idx < max_blocks)
-		bitmap_set(block_map,
-			   block_idx, num_blocks);
-
-	spin_unlock(&dm->lock);
-
-	if (block_idx >= max_blocks)
-		return -ENOMEM;
-
-	sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
-	icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-	MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
-		   icm_start_addr);
-	MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
-
-	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (ret) {
-		spin_lock(&dm->lock);
-		bitmap_clear(block_map,
-			     block_idx, num_blocks);
-		spin_unlock(&dm->lock);
-
-		return ret;
-	}
-
-	*addr = icm_start_addr;
-	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-
-	return 0;
-}
-
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
-			    u16 uid, phys_addr_t addr, u32 obj_id)
-{
-	struct mlx5_core_dev *dev = dm->dev;
-	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
-	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
-	unsigned long *block_map;
-	u32 num_blocks;
-	u64 start_idx;
-	int err;
-
-	num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >>
-		     MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-
-	switch (type) {
-	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
-		start_idx =
-			(addr - MLX5_CAP64_DEV_MEM(
-					dev, steering_sw_icm_start_address)) >>
-			MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-		block_map = dm->steering_sw_icm_alloc_blocks;
-		break;
-	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
-		start_idx =
-			(addr -
-			 MLX5_CAP64_DEV_MEM(
-				 dev, header_modify_sw_icm_start_address)) >>
-			MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
-		block_map = dm->header_modify_sw_icm_alloc_blocks;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
-		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
-	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
-	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
-	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
-
-	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (err)
-		return err;
-
-	spin_lock(&dm->lock);
-	bitmap_clear(block_map,
-		     start_idx, num_blocks);
-	spin_unlock(&dm->lock);
-
-	return 0;
-}
-
 int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
 {
 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 0572dcba6eae..169cab4915e3 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
 			     u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		     u16 opmod, u8 port);
-int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
-			  u16 uid, phys_addr_t *addr, u32 *obj_id);
-int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
-			    u16 uid, phys_addr_t addr, u32 obj_id);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c2a5780cb394..42fdbbea06f0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2280,6 +2280,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
 			return -EOPNOTSUPP;
 		break;
 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
 		if (!capable(CAP_SYS_RAWIO) ||
 		    !capable(CAP_NET_RAW))
 			return -EPERM;
@@ -2344,20 +2345,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
 				  struct uverbs_attr_bundle *attrs,
 				  int type)
 {
-	struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+	struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
 	u64 act_size;
 	int err;
 
 	/* Allocation size must a multiple of the basic block size
 	 * and a power of 2.
 	 */
-	act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+	act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
 	act_size = roundup_pow_of_two(act_size);
 
 	dm->size = act_size;
-	err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
-				    to_mucontext(ctx)->devx_uid, &dm->dev_addr,
-				    &dm->icm_dm.obj_id);
+	err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
+				   to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+				   &dm->icm_dm.obj_id);
 	if (err)
 		return err;
 
@@ -2365,9 +2366,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
 			     MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
 			     &dm->dev_addr, sizeof(dm->dev_addr));
 	if (err)
-		mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
-					to_mucontext(ctx)->devx_uid,
-					dm->dev_addr, dm->icm_dm.obj_id);
+		mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
+				       to_mucontext(ctx)->devx_uid, dm->dev_addr,
+				       dm->icm_dm.obj_id);
 
 	return err;
 }
@@ -2407,8 +2408,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 					    attrs);
 		break;
 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+		err = handle_alloc_dm_sw_icm(context, dm,
+					     attr, attrs,
+					     MLX5_SW_ICM_TYPE_STEERING);
+		break;
 	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
-		err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
+		err = handle_alloc_dm_sw_icm(context, dm,
+					     attr, attrs,
+					     MLX5_SW_ICM_TYPE_HEADER_MODIFY);
 		break;
 	default:
 		err = -EOPNOTSUPP;
@@ -2428,6 +2435,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
 {
 	struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
 		&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
+	struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
 	struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
 	struct mlx5_ib_dm *dm = to_mdm(ibdm);
 	u32 page_idx;
@@ -2439,19 +2447,23 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
 		if (ret)
 			return ret;
 
-		page_idx = (dm->dev_addr -
-			    pci_resource_start(dm_db->dev->pdev, 0) -
-			    MLX5_CAP64_DEV_MEM(dm_db->dev,
-					       memic_bar_start_addr)) >>
-			   PAGE_SHIFT;
+		page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
+			    MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
+			    PAGE_SHIFT;
 		bitmap_clear(ctx->dm_pages, page_idx,
 			     DIV_ROUND_UP(dm->size, PAGE_SIZE));
 		break;
 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+		ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
+					     dm->size, ctx->devx_uid, dm->dev_addr,
+					     dm->icm_dm.obj_id);
+		if (ret)
+			return ret;
+		break;
 	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
-		ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
-					      ctx->devx_uid, dm->dev_addr,
-					      dm->icm_dm.obj_id);
+		ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+					     dm->size, ctx->devx_uid, dm->dev_addr,
+					     dm->icm_dm.obj_id);
 		if (ret)
 			return ret;
 		break;
@@ -6097,8 +6109,6 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 
 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
-	struct mlx5_core_dev *mdev = dev->mdev;
-
 	mlx5_ib_cleanup_multiport_master(dev);
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
 		srcu_barrier(&dev->mr_srcu);
@@ -6106,29 +6116,11 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 	}
 
 	WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
-
-	WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
-		!bitmap_empty(
-			dev->dm.steering_sw_icm_alloc_blocks,
-			BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
-			    MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
-	kfree(dev->dm.steering_sw_icm_alloc_blocks);
-
-	WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
-		!bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
-			      BIT(MLX5_CAP_DEV_MEM(
-					  mdev, log_header_modify_sw_icm_size) -
-				  MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
-
-	kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
 }
 
 static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
-	u64 header_modify_icm_blocks = 0;
-	u64 steering_icm_blocks = 0;
 	int err;
 	int i;
 
@@ -6173,51 +6165,17 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
 
-	if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
-	    MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
-		if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
-			steering_icm_blocks =
-				BIT(MLX5_CAP_DEV_MEM(mdev,
-						     log_steering_sw_icm_size) -
-				    MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
-			dev->dm.steering_sw_icm_alloc_blocks =
-				kcalloc(BITS_TO_LONGS(steering_icm_blocks),
-					sizeof(unsigned long), GFP_KERNEL);
-			if (!dev->dm.steering_sw_icm_alloc_blocks)
-				goto err_mp;
-		}
-
-		if (MLX5_CAP64_DEV_MEM(mdev,
-				       header_modify_sw_icm_start_address)) {
-			header_modify_icm_blocks = BIT(
-				MLX5_CAP_DEV_MEM(
-					mdev, log_header_modify_sw_icm_size) -
-				MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
-
-			dev->dm.header_modify_sw_icm_alloc_blocks =
-				kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
-					sizeof(unsigned long), GFP_KERNEL);
-			if (!dev->dm.header_modify_sw_icm_alloc_blocks)
-				goto err_dm;
-		}
-	}
-
 	spin_lock_init(&dev->dm.lock);
 	dev->dm.dev = mdev;
 
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
 		err = init_srcu_struct(&dev->mr_srcu);
 		if (err)
-			goto err_dm;
+			goto err_mp;
 	}
 
 	return 0;
 
-err_dm:
-	kfree(dev->dm.steering_sw_icm_alloc_blocks);
-	kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
-
 err_mp:
 	mlx5_ib_cleanup_multiport_master(dev);
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index c482f19958b3..afd69ba33b2b 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -880,8 +880,6 @@ struct mlx5_dm {
 	 */
 	spinlock_t lock;
 	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
-	unsigned long *steering_sw_icm_alloc_blocks;
-	unsigned long *header_modify_sw_icm_alloc_blocks;
 };
 
 struct mlx5_read_counters_attr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 57d2cc666fe3..4eb52e8500c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -15,7 +15,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
-		lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
+		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
 		diag/fw_tracer.o diag/crdump.o devlink.o
 
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
new file mode 100644
index 000000000000..e065c2f68f5a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2019 Mellanox Technologies
+
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/device.h>
+
+#include "mlx5_core.h"
+#include "lib/mlx5.h"
+
+struct mlx5_dm {
+	/* protect access to icm bitmask */
+	spinlock_t lock;
+	unsigned long *steering_sw_icm_alloc_blocks;
+	unsigned long *header_modify_sw_icm_alloc_blocks;
+};
+
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev)
+{
+	u64 header_modify_icm_blocks = 0;
+	u64 steering_icm_blocks = 0;
+	struct mlx5_dm *dm;
+
+	if (!(MLX5_CAP_GEN_64(dev, general_obj_types) & MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM))
+		return 0;
+
+	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
+	if (!dm)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&dm->lock);
+
+	if (MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address)) {
+		steering_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->steering_sw_icm_alloc_blocks =
+			kcalloc(BITS_TO_LONGS(steering_icm_blocks),
+				sizeof(unsigned long), GFP_KERNEL);
+		if (!dm->steering_sw_icm_alloc_blocks)
+			goto err_steering;
+	}
+
+	if (MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address)) {
+		header_modify_icm_blocks =
+			BIT(MLX5_CAP_DEV_MEM(dev, log_header_modify_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+		dm->header_modify_sw_icm_alloc_blocks =
+			kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
+				sizeof(unsigned long), GFP_KERNEL);
+		if (!dm->header_modify_sw_icm_alloc_blocks)
+			goto err_modify_hdr;
+	}
+
+	return dm;
+
+err_modify_hdr:
+	kfree(dm->steering_sw_icm_alloc_blocks);
+
+err_steering:
+	kfree(dm);
+
+	return ERR_PTR(-ENOMEM);
+}
+
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
+{
+	struct mlx5_dm *dm = dev->dm;
+
+	if (!dev->dm)
+		return;
+
+	if (dm->steering_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->steering_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size) -
+					  MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		kfree(dm->steering_sw_icm_alloc_blocks);
+	}
+
+	if (dm->header_modify_sw_icm_alloc_blocks) {
+		WARN_ON(!bitmap_empty(dm->header_modify_sw_icm_alloc_blocks,
+				      BIT(MLX5_CAP_DEV_MEM(dev,
+							   log_header_modify_sw_icm_size) -
+				      MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))));
+		kfree(dm->header_modify_sw_icm_alloc_blocks);
+	}
+
+	kfree(dm);
+}
+
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			 u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u32 log_icm_size;
+	u32 max_blocks;
+	u64 block_idx;
+	void *sw_icm;
+	int ret;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	if (!length || (length & (length - 1)) ||
+	    length & (MLX5_SW_ICM_BLOCK_SIZE(dev) - 1))
+		return -EINVAL;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev,
+						log_header_modify_sw_icm_size);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!block_map)
+		return -EOPNOTSUPP;
+
+	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+	spin_lock(&dm->lock);
+	block_idx = bitmap_find_next_zero_area(block_map,
+					       max_blocks,
+					       0,
+					       num_blocks, 0);
+
+	if (block_idx < max_blocks)
+		bitmap_set(block_map,
+			   block_idx, num_blocks);
+
+	spin_unlock(&dm->lock);
+
+	if (block_idx >= max_blocks)
+		return -ENOMEM;
+
+	sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+	icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+		   icm_start_addr);
+	MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		spin_lock(&dm->lock);
+		bitmap_clear(block_map,
+			     block_idx, num_blocks);
+		spin_unlock(&dm->lock);
+
+		return ret;
+	}
+
+	*addr = icm_start_addr;
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_alloc);
+
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			   u64 length, u16 uid, phys_addr_t addr, u32 obj_id)
+{
+	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	struct mlx5_dm *dm = dev->dm;
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u64 start_idx;
+	int err;
+
+	if (!dev->dm)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case MLX5_SW_ICM_TYPE_STEERING:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, steering_sw_icm_start_address);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_SW_ICM_TYPE_HEADER_MODIFY:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev, header_modify_sw_icm_start_address);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	start_idx = (addr - icm_start_addr) >> MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	spin_lock(&dm->lock);
+	bitmap_clear(block_map,
+		     start_idx, num_blocks);
+	spin_unlock(&dm->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_dm_sw_icm_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7f70ecb1db6d..c1679d11d71f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -879,6 +879,10 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 		goto err_eswitch_cleanup;
 	}
 
+	dev->dm = mlx5_dm_create(dev);
+	if (IS_ERR(dev->dm))
+		mlx5_core_warn(dev, "Failed to init device memory%d\n", err);
+
 	dev->tracer = mlx5_fw_tracer_create(dev);
 
 	return 0;
@@ -912,6 +916,7 @@ err_devcom:
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
 	mlx5_fw_tracer_destroy(dev->tracer);
+	mlx5_dm_cleanup(dev);
 	mlx5_fpga_cleanup(dev);
 	mlx5_eswitch_cleanup(dev->priv.eswitch);
 	mlx5_sriov_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 471bbc48bc1f..bbcf4ee40ad5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -198,6 +198,9 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
 int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
 int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 
+struct mlx5_dm *mlx5_dm_create(struct mlx5_core_dev *dev);
+void mlx5_dm_cleanup(struct mlx5_core_dev *dev);
+
 #define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
 			    MLX5_CAP_GEN((mdev), pps_modify) &&		\
 			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0acd28f2e62c..72bc6ce44b55 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -622,6 +622,11 @@ struct mlx5e_resources {
 	struct mlx5_sq_bfreg       bfreg;
 };
 
+enum mlx5_sw_icm_type {
+	MLX5_SW_ICM_TYPE_STEERING,
+	MLX5_SW_ICM_TYPE_HEADER_MODIFY,
+};
+
 #define MLX5_MAX_RESERVED_GIDS 8
 
 struct mlx5_rsvd_gids {
@@ -653,10 +658,14 @@ struct mlx5_clock {
 	struct mlx5_pps            pps_info;
 };
 
+struct mlx5_dm;
 struct mlx5_fw_tracer;
 struct mlx5_vxlan;
 struct mlx5_geneve;
 
+#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
+#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+
 struct mlx5_core_dev {
 	struct device *device;
 	enum mlx5_coredev_type coredev_type;
@@ -690,6 +699,7 @@ struct mlx5_core_dev {
 	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
+	struct mlx5_dm          *dm;
 	struct mlx5_vxlan       *vxlan;
 	struct mlx5_geneve      *geneve;
 	struct {
@@ -1072,6 +1082,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 size_t *offsets);
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
+int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			 u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id);
+int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
+			   u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
 
 #ifdef CONFIG_MLX5_CORE_IPOIB
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-- 
cgit v1.2.3


From 97b5484ed608605dca9e461c0289d1195ba9608d Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Thu, 29 Aug 2019 23:42:32 +0000
Subject: net/mlx5: Add HW bits and definitions required for SW steering

Add the required Software Steering hardware definitions and
bits to mlx5_ifc.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Yevgeny Klitenik <kliten@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |   7 ++
 include/linux/mlx5/mlx5_ifc.h | 235 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 205 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ce9839c8bc1a..5767d7fab5f3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1162,6 +1162,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_FLOWTABLE(mdev, cap) \
 	MLX5_GET(flow_table_nic_cap, mdev->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
 
+#define MLX5_CAP64_FLOWTABLE(mdev, cap) \
+	MLX5_GET64(flow_table_nic_cap, (mdev)->caps.hca_cur[MLX5_CAP_FLOW_TABLE], cap)
+
 #define MLX5_CAP_FLOWTABLE_MAX(mdev, cap) \
 	MLX5_GET(flow_table_nic_cap, mdev->caps.hca_max[MLX5_CAP_FLOW_TABLE], cap)
 
@@ -1225,6 +1228,10 @@ enum mlx5_qcam_feature_groups {
 	MLX5_GET(e_switch_cap, \
 		 mdev->caps.hca_cur[MLX5_CAP_ESWITCH], cap)
 
+#define MLX5_CAP64_ESW_FLOWTABLE(mdev, cap) \
+	MLX5_GET64(flow_table_eswitch_cap, \
+		(mdev)->caps.hca_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
+
 #define MLX5_CAP_ESW_MAX(mdev, cap) \
 	MLX5_GET(e_switch_cap, \
 		 mdev->caps.hca_max[MLX5_CAP_ESWITCH], cap)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4e278114d8b3..76e945dbc7ed 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -282,6 +282,7 @@ enum {
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
+	MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
 	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
 	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
@@ -485,7 +486,11 @@ union mlx5_ifc_gre_key_bits {
 };
 
 struct mlx5_ifc_fte_match_set_misc_bits {
-	u8         reserved_at_0[0x8];
+	u8         gre_c_present[0x1];
+	u8         reserved_auto1[0x1];
+	u8         gre_k_present[0x1];
+	u8         gre_s_present[0x1];
+	u8         source_vhca_port[0x4];
 	u8         source_sqn[0x18];
 
 	u8         source_eswitch_owner_vhca_id[0x10];
@@ -565,12 +570,38 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
 
 	u8         metadata_reg_a[0x20];
 
-	u8         reserved_at_1a0[0x60];
+	u8         metadata_reg_b[0x20];
+
+	u8         reserved_at_1c0[0x40];
 };
 
 struct mlx5_ifc_fte_match_set_misc3_bits {
-	u8         reserved_at_0[0x120];
+	u8         inner_tcp_seq_num[0x20];
+
+	u8         outer_tcp_seq_num[0x20];
+
+	u8         inner_tcp_ack_num[0x20];
+
+	u8         outer_tcp_ack_num[0x20];
+
+	u8	   reserved_at_80[0x8];
+	u8         outer_vxlan_gpe_vni[0x18];
+
+	u8         outer_vxlan_gpe_next_protocol[0x8];
+	u8         outer_vxlan_gpe_flags[0x8];
+	u8	   reserved_at_b0[0x10];
+
+	u8	   icmp_header_data[0x20];
+
+	u8	   icmpv6_header_data[0x20];
+
+	u8	   icmp_type[0x8];
+	u8	   icmp_code[0x8];
+	u8	   icmpv6_type[0x8];
+	u8	   icmpv6_code[0x8];
+
 	u8         geneve_tlv_option_0_data[0x20];
+
 	u8         reserved_at_140[0xc0];
 };
 
@@ -666,7 +697,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-	u8         reserved_at_e00[0x7200];
+	u8         reserved_at_e00[0x1200];
+
+	u8         sw_steering_nic_rx_action_drop_icm_address[0x40];
+
+	u8         sw_steering_nic_tx_action_drop_icm_address[0x40];
+
+	u8         sw_steering_nic_tx_action_allow_icm_address[0x40];
+
+	u8         reserved_at_20c0[0x5f40];
 };
 
 enum {
@@ -698,7 +737,17 @@ struct mlx5_ifc_flow_table_eswitch_cap_bits {
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_esw_acl_egress;
 
-	u8      reserved_at_800[0x7800];
+	u8      reserved_at_800[0x1000];
+
+	u8      sw_steering_fdb_action_drop_icm_address_rx[0x40];
+
+	u8      sw_steering_fdb_action_drop_icm_address_tx[0x40];
+
+	u8      sw_steering_uplink_icm_address_rx[0x40];
+
+	u8      sw_steering_uplink_icm_address_tx[0x40];
+
+	u8      reserved_at_1900[0x6700];
 };
 
 enum {
@@ -849,6 +898,25 @@ struct mlx5_ifc_roce_cap_bits {
 	u8         reserved_at_100[0x700];
 };
 
+struct mlx5_ifc_sync_steering_in_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0xc0];
+};
+
+struct mlx5_ifc_sync_steering_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
 struct mlx5_ifc_device_mem_cap_bits {
 	u8         memic[0x1];
 	u8         reserved_at_1[0x1f];
@@ -1041,6 +1109,12 @@ enum {
 	MLX5_CAP_UMR_FENCE_NONE		= 0x2,
 };
 
+enum {
+	MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED	= 1 << 7,
+	MLX5_FLEX_PARSER_ICMP_V4_ENABLED	= 1 << 8,
+	MLX5_FLEX_PARSER_ICMP_V6_ENABLED	= 1 << 9,
+};
+
 enum {
 	MLX5_UCTX_CAP_RAW_TX = 1UL << 0,
 	MLX5_UCTX_CAP_INTERNAL_DEV_RES = 1UL << 1,
@@ -1414,7 +1488,14 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_6c0[0x4];
 	u8         flex_parser_id_geneve_tlv_option_0[0x4];
-	u8	   reserved_at_6c8[0x28];
+	u8         flex_parser_id_icmp_dw1[0x4];
+	u8         flex_parser_id_icmp_dw0[0x4];
+	u8         flex_parser_id_icmpv6_dw1[0x4];
+	u8         flex_parser_id_icmpv6_dw0[0x4];
+	u8         flex_parser_id_outer_first_mpls_over_gre[0x4];
+	u8         flex_parser_id_outer_first_mpls_over_udp_label[0x4];
+
+	u8	   reserved_at_6e0[0x10];
 	u8	   sf_base_id[0x10];
 
 	u8	   reserved_at_700[0x80];
@@ -2652,6 +2733,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_debug_cap_bits debug_cap;
 	struct mlx5_ifc_fpga_cap_bits fpga_cap;
 	struct mlx5_ifc_tls_cap_bits tls_cap;
+	struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -3255,7 +3337,11 @@ struct mlx5_ifc_esw_vport_context_bits {
 	u8         cvlan_pcp[0x3];
 	u8         cvlan_id[0xc];
 
-	u8         reserved_at_60[0x7a0];
+	u8         reserved_at_60[0x720];
+
+	u8         sw_steering_vport_icm_address_rx[0x40];
+
+	u8         sw_steering_vport_icm_address_tx[0x40];
 };
 
 enum {
@@ -4941,23 +5027,98 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x40];
+	u8         other_function[0x1];
+	u8         reserved_at_41[0xf];
+	u8         function_id[0x10];
+
+	u8         reserved_at_60[0x20];
 };
 
-struct mlx5_ifc_query_flow_table_out_bits {
+struct mlx5_ifc_other_hca_cap_bits {
+	u8         roce[0x1];
+	u8         reserved_0[0x27f];
+};
+
+struct mlx5_ifc_query_other_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_at_8[0x18];
+	u8         reserved_0[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x80];
+	u8         reserved_1[0x40];
 
-	u8         reserved_at_c0[0x8];
+	struct     mlx5_ifc_other_hca_cap_bits other_capability;
+};
+
+struct mlx5_ifc_query_other_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+
+	u8         reserved_3[0x20];
+};
+
+struct mlx5_ifc_modify_other_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
+struct mlx5_ifc_modify_other_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x10];
+	u8         function_id[0x10];
+	u8         field_select[0x20];
+
+	struct     mlx5_ifc_other_hca_cap_bits other_capability;
+};
+
+struct mlx5_ifc_flow_table_context_bits {
+	u8         reformat_en[0x1];
+	u8         decap_en[0x1];
+	u8         sw_owner[0x1];
+	u8         termination_table[0x1];
+	u8         table_miss_action[0x4];
 	u8         level[0x8];
-	u8         reserved_at_d0[0x8];
+	u8         reserved_at_10[0x8];
 	u8         log_size[0x8];
 
-	u8         reserved_at_e0[0x120];
+	u8         reserved_at_20[0x8];
+	u8         table_miss_id[0x18];
+
+	u8         reserved_at_40[0x8];
+	u8         lag_master_next_table_id[0x18];
+
+	u8         reserved_at_60[0x60];
+
+	u8         sw_owner_icm_root_1[0x40];
+
+	u8         sw_owner_icm_root_0[0x40];
+
+};
+
+struct mlx5_ifc_query_flow_table_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x80];
+
+	struct mlx5_ifc_flow_table_context_bits flow_table_context;
 };
 
 struct mlx5_ifc_query_flow_table_in_bits {
@@ -5227,7 +5388,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
-enum {
+enum mlx5_reformat_ctx_type {
 	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
 	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
 	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
@@ -5323,7 +5484,16 @@ enum {
 	MLX5_ACTION_IN_FIELD_OUT_DIPV4         = 0x16,
 	MLX5_ACTION_IN_FIELD_OUT_FIRST_VID     = 0x17,
 	MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT = 0x47,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_A    = 0x49,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_B    = 0x50,
 	MLX5_ACTION_IN_FIELD_METADATA_REG_C_0  = 0x51,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_C_1  = 0x52,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_C_2  = 0x53,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_C_3  = 0x54,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_C_4  = 0x55,
+	MLX5_ACTION_IN_FIELD_METADATA_REG_C_5  = 0x56,
+	MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM   = 0x59,
+	MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM   = 0x5B,
 };
 
 struct mlx5_ifc_alloc_modify_header_context_out_bits {
@@ -7369,35 +7539,26 @@ struct mlx5_ifc_create_mkey_in_bits {
 	u8         klm_pas_mtt[0][0x20];
 };
 
+enum {
+	MLX5_FLOW_TABLE_TYPE_NIC_RX		= 0x0,
+	MLX5_FLOW_TABLE_TYPE_NIC_TX		= 0x1,
+	MLX5_FLOW_TABLE_TYPE_ESW_EGRESS_ACL	= 0x2,
+	MLX5_FLOW_TABLE_TYPE_ESW_INGRESS_ACL	= 0x3,
+	MLX5_FLOW_TABLE_TYPE_FDB		= 0X4,
+	MLX5_FLOW_TABLE_TYPE_SNIFFER_RX		= 0X5,
+	MLX5_FLOW_TABLE_TYPE_SNIFFER_TX		= 0X6,
+};
+
 struct mlx5_ifc_create_flow_table_out_bits {
 	u8         status[0x8];
-	u8         reserved_at_8[0x18];
+	u8         icm_address_63_40[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x8];
+	u8         icm_address_39_32[0x8];
 	u8         table_id[0x18];
 
-	u8         reserved_at_60[0x20];
-};
-
-struct mlx5_ifc_flow_table_context_bits {
-	u8         reformat_en[0x1];
-	u8         decap_en[0x1];
-	u8         reserved_at_2[0x1];
-	u8         termination_table[0x1];
-	u8         table_miss_action[0x4];
-	u8         level[0x8];
-	u8         reserved_at_10[0x8];
-	u8         log_size[0x8];
-
-	u8         reserved_at_20[0x8];
-	u8         table_miss_id[0x18];
-
-	u8         reserved_at_40[0x8];
-	u8         lag_master_next_table_id[0x18];
-
-	u8         reserved_at_60[0xe0];
+	u8         icm_address_31_0[0x20];
 };
 
 struct mlx5_ifc_create_flow_table_in_bits {
-- 
cgit v1.2.3


From f813cb506b8c9abc106314e1f95c9f2ebf260988 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 29 Aug 2019 23:42:36 +0000
Subject: net/mlx5: Add stub for mlx5_eswitch_mode

Return MLX5_ESWITCH_NONE when CONFIG_MLX5_ESWITCH
is not selected.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/eswitch.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 46b5ba029802..825920d3ca40 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -61,7 +61,6 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw,
 struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
 						u16 vport_num);
 void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type);
-u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
 struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
 				    u16 vport_num, u32 sqn);
@@ -75,7 +74,14 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
 bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw);
 u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw,
 					      u16 vport_num);
+u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw);
 #else  /* CONFIG_MLX5_ESWITCH */
+
+static inline u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw)
+{
+	return MLX5_ESWITCH_NONE;
+}
+
 static inline enum devlink_eswitch_encap_mode
 mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
 {
-- 
cgit v1.2.3


From b9a801dfa59163dc2db8147a98af406eb79e51de Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 1 Aug 2019 22:03:35 +0300
Subject: mfd: Add support for Merrifield Basin Cove PMIC

Add an MFD driver for Intel Merrifield Basin Cove PMIC.

Firmware on the platforms which are using Basin Cove PMIC is "smarter"
than on the rest supported by vanilla kernel. It handles first level
of interrupt itself, while others do it on OS level.

The driver is done in the same way as the rest of Intel PMIC MFD drivers
in the kernel to support the initial design. The design allows to use
one driver among few PMICs without knowing implementation details of
the each hardware version or generation.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                      |  11 +++
 drivers/mfd/Makefile                     |   1 +
 drivers/mfd/intel_soc_pmic_mrfld.c       | 157 +++++++++++++++++++++++++++++++
 include/linux/mfd/intel_soc_pmic_mrfld.h |  81 ++++++++++++++++
 4 files changed, 250 insertions(+)
 create mode 100644 drivers/mfd/intel_soc_pmic_mrfld.c
 create mode 100644 include/linux/mfd/intel_soc_pmic_mrfld.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 4a07afe50b35..a6854d41d1ca 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -597,6 +597,17 @@ config INTEL_SOC_PMIC_CHTDC_TI
 	  Select this option for supporting Dollar Cove (TI version) PMIC
 	  device that is found on some Intel Cherry Trail systems.
 
+config INTEL_SOC_PMIC_MRFLD
+	tristate "Support for Intel Merrifield Basin Cove PMIC"
+	depends on GPIOLIB
+	depends on ACPI
+	depends on INTEL_SCU_IPC
+	select MFD_CORE
+	select REGMAP_IRQ
+	help
+	  Select this option for supporting Basin Cove PMIC device
+	  that is found on Intel Merrifield systems.
+
 config MFD_INTEL_LPSS
 	tristate
 	select COMMON_CLK
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 7b6a6aa4fe42..3ae6fa6552d3 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -242,6 +242,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC_CHTWC)	+= intel_soc_pmic_chtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI)	+= intel_soc_pmic_chtdc_ti.o
 mt6397-objs	:= mt6397-core.o mt6397-irq.o
 obj-$(CONFIG_MFD_MT6397)	+= mt6397.o
+obj-$(CONFIG_INTEL_SOC_PMIC_MRFLD)	+= intel_soc_pmic_mrfld.o
 
 obj-$(CONFIG_MFD_ALTERA_A10SR)	+= altera-a10sr.o
 obj-$(CONFIG_MFD_ALTERA_SYSMGR) += altera-sysmgr.o
diff --git a/drivers/mfd/intel_soc_pmic_mrfld.c b/drivers/mfd/intel_soc_pmic_mrfld.c
new file mode 100644
index 000000000000..26a1551c5faf
--- /dev/null
+++ b/drivers/mfd/intel_soc_pmic_mrfld.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Device access for Basin Cove PMIC
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/intel_soc_pmic.h>
+#include <linux/mfd/intel_soc_pmic_mrfld.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#include <asm/intel_scu_ipc.h>
+
+/*
+ * Level 2 IRQs
+ *
+ * Firmware on the systems with Basin Cove PMIC services Level 1 IRQs
+ * without an assistance. Thus, each of the Level 1 IRQ is represented
+ * as a separate RTE in IOAPIC.
+ */
+static struct resource irq_level2_resources[] = {
+	DEFINE_RES_IRQ(0), /* power button */
+	DEFINE_RES_IRQ(0), /* TMU */
+	DEFINE_RES_IRQ(0), /* thermal */
+	DEFINE_RES_IRQ(0), /* BCU */
+	DEFINE_RES_IRQ(0), /* ADC */
+	DEFINE_RES_IRQ(0), /* charger */
+	DEFINE_RES_IRQ(0), /* GPIO */
+};
+
+static const struct mfd_cell bcove_dev[] = {
+	{
+		.name = "mrfld_bcove_pwrbtn",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[0],
+	}, {
+		.name = "mrfld_bcove_tmu",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[1],
+	}, {
+		.name = "mrfld_bcove_thermal",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[2],
+	}, {
+		.name = "mrfld_bcove_bcu",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[3],
+	}, {
+		.name = "mrfld_bcove_adc",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[4],
+	}, {
+		.name = "mrfld_bcove_charger",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[5],
+	}, {
+		.name = "mrfld_bcove_pwrsrc",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[5],
+	}, {
+		.name = "mrfld_bcove_gpio",
+		.num_resources = 1,
+		.resources = &irq_level2_resources[6],
+	},
+	{	.name = "mrfld_bcove_region", },
+};
+
+static int bcove_ipc_byte_reg_read(void *context, unsigned int reg,
+				    unsigned int *val)
+{
+	u8 ipc_out;
+	int ret;
+
+	ret = intel_scu_ipc_ioread8(reg, &ipc_out);
+	if (ret)
+		return ret;
+
+	*val = ipc_out;
+	return 0;
+}
+
+static int bcove_ipc_byte_reg_write(void *context, unsigned int reg,
+				     unsigned int val)
+{
+	u8 ipc_in = val;
+	int ret;
+
+	ret = intel_scu_ipc_iowrite8(reg, ipc_in);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct regmap_config bcove_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.max_register = 0xff,
+	.reg_write = bcove_ipc_byte_reg_write,
+	.reg_read = bcove_ipc_byte_reg_read,
+};
+
+static int bcove_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct intel_soc_pmic *pmic;
+	unsigned int i;
+	int ret;
+
+	pmic = devm_kzalloc(dev, sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, pmic);
+	pmic->dev = &pdev->dev;
+
+	pmic->regmap = devm_regmap_init(dev, NULL, pmic, &bcove_regmap_config);
+	if (IS_ERR(pmic->regmap))
+		return PTR_ERR(pmic->regmap);
+
+	for (i = 0; i < ARRAY_SIZE(irq_level2_resources); i++) {
+		ret = platform_get_irq(pdev, i);
+		if (ret < 0)
+			return ret;
+
+		irq_level2_resources[i].start = ret;
+		irq_level2_resources[i].end = ret;
+	}
+
+	return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE,
+				    bcove_dev, ARRAY_SIZE(bcove_dev),
+				    NULL, 0, NULL);
+}
+
+static const struct acpi_device_id bcove_acpi_ids[] = {
+	{ "INTC100E" },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, bcove_acpi_ids);
+
+static struct platform_driver bcove_driver = {
+	.driver = {
+		.name = "intel_soc_pmic_mrfld",
+		.acpi_match_table = bcove_acpi_ids,
+	},
+	.probe = bcove_probe,
+};
+module_platform_driver(bcove_driver);
+
+MODULE_DESCRIPTION("IPC driver for Intel SoC Basin Cove PMIC");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/intel_soc_pmic_mrfld.h b/include/linux/mfd/intel_soc_pmic_mrfld.h
new file mode 100644
index 000000000000..4daecd682275
--- /dev/null
+++ b/include/linux/mfd/intel_soc_pmic_mrfld.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for Intel Merrifield Basin Cove PMIC
+ *
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __INTEL_SOC_PMIC_MRFLD_H__
+#define __INTEL_SOC_PMIC_MRFLD_H__
+
+#include <linux/bits.h>
+
+#define BCOVE_ID		0x00
+
+#define BCOVE_ID_MINREV0	GENMASK(2, 0)
+#define BCOVE_ID_MAJREV0	GENMASK(5, 3)
+#define BCOVE_ID_VENDID0	GENMASK(7, 6)
+
+#define BCOVE_MINOR(x)		(unsigned int)(((x) & BCOVE_ID_MINREV0) >> 0)
+#define BCOVE_MAJOR(x)		(unsigned int)(((x) & BCOVE_ID_MAJREV0) >> 3)
+#define BCOVE_VENDOR(x)		(unsigned int)(((x) & BCOVE_ID_VENDID0) >> 6)
+
+#define BCOVE_IRQLVL1		0x01
+
+#define BCOVE_PBIRQ		0x02
+#define BCOVE_TMUIRQ		0x03
+#define BCOVE_THRMIRQ		0x04
+#define BCOVE_BCUIRQ		0x05
+#define BCOVE_ADCIRQ		0x06
+#define BCOVE_CHGRIRQ0		0x07
+#define BCOVE_CHGRIRQ1		0x08
+#define BCOVE_GPIOIRQ		0x09
+#define BCOVE_CRITIRQ		0x0B
+
+#define BCOVE_MIRQLVL1		0x0C
+
+#define BCOVE_MPBIRQ		0x0D
+#define BCOVE_MTMUIRQ		0x0E
+#define BCOVE_MTHRMIRQ		0x0F
+#define BCOVE_MBCUIRQ		0x10
+#define BCOVE_MADCIRQ		0x11
+#define BCOVE_MCHGRIRQ0		0x12
+#define BCOVE_MCHGRIRQ1		0x13
+#define BCOVE_MGPIOIRQ		0x14
+#define BCOVE_MCRITIRQ		0x16
+
+#define BCOVE_SCHGRIRQ0		0x4E
+#define BCOVE_SCHGRIRQ1		0x4F
+
+/* Level 1 IRQs */
+#define BCOVE_LVL1_PWRBTN	BIT(0)	/* power button */
+#define BCOVE_LVL1_TMU		BIT(1)	/* time management unit */
+#define BCOVE_LVL1_THRM		BIT(2)	/* thermal */
+#define BCOVE_LVL1_BCU		BIT(3)	/* burst control unit */
+#define BCOVE_LVL1_ADC		BIT(4)	/* ADC */
+#define BCOVE_LVL1_CHGR		BIT(5)	/* charger */
+#define BCOVE_LVL1_GPIO		BIT(6)	/* GPIO */
+#define BCOVE_LVL1_CRIT		BIT(7)	/* critical event */
+
+/* Level 2 IRQs: power button */
+#define BCOVE_PBIRQ_PBTN	BIT(0)
+#define BCOVE_PBIRQ_UBTN	BIT(1)
+
+/* Level 2 IRQs: ADC */
+#define BCOVE_ADCIRQ_BATTEMP	BIT(2)
+#define BCOVE_ADCIRQ_SYSTEMP	BIT(3)
+#define BCOVE_ADCIRQ_BATTID	BIT(4)
+#define BCOVE_ADCIRQ_VIBATT	BIT(5)
+#define BCOVE_ADCIRQ_CCTICK	BIT(7)
+
+/* Level 2 IRQs: charger */
+#define BCOVE_CHGRIRQ_BAT0ALRT	BIT(4)
+#define BCOVE_CHGRIRQ_BAT1ALRT	BIT(5)
+#define BCOVE_CHGRIRQ_BATCRIT	BIT(6)
+
+#define BCOVE_CHGRIRQ_VBUSDET	BIT(0)
+#define BCOVE_CHGRIRQ_DCDET	BIT(1)
+#define BCOVE_CHGRIRQ_BATTDET	BIT(2)
+#define BCOVE_CHGRIRQ_USBIDDET	BIT(3)
+
+#endif	/* __INTEL_SOC_PMIC_MRFLD_H__ */
-- 
cgit v1.2.3


From 09c1dec470029f812e86aa92908abfd9132c2fab Mon Sep 17 00:00:00 2001
From: Josef Friedl <josef.friedl@speed.at>
Date: Sun, 18 Aug 2019 15:56:04 +0200
Subject: mfd: mt6397: Add mutex include

Add missing mutex.h.

Signed-off-by: Josef Friedl <josef.friedl@speed.at>
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/mt6397/core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mfd/mt6397/core.h b/include/linux/mfd/mt6397/core.h
index 9320c2a1e3e6..fc88d315bdde 100644
--- a/include/linux/mfd/mt6397/core.h
+++ b/include/linux/mfd/mt6397/core.h
@@ -7,6 +7,8 @@
 #ifndef __MFD_MT6397_CORE_H__
 #define __MFD_MT6397_CORE_H__
 
+#include <linux/mutex.h>
+
 enum chip_id {
 	MT6323_CHIP_ID = 0x23,
 	MT6391_CHIP_ID = 0x91,
-- 
cgit v1.2.3


From 7aa703bb8824384baad732043a925b46a4f3efa8 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 2 Sep 2019 11:53:00 +0200
Subject: mfd / platform: cros_ec: Handle chained ECs as platform devices

An MFD is a device that contains several sub-devices (cells). For instance,
the ChromeOS EC fits in this description as usually contains a charger and
can have other devices with different functions like a Real-Time Clock,
an Audio codec, a Real-Time Clock, ...

If you look at the driver, though, we're doing something odd. We have
two MFD cros-ec drivers where one of them (cros-ec-core) instantiates
another MFD driver as sub-driver (cros-ec-dev), and the latest
instantiates the different sub-devices (Real-Time Clock, Audio codec,
etc).

                  MFD
------------------------------------------
   cros-ec-core
       |___ mfd-cellA (cros-ec-dev)
       |       |__ mfd-cell0
       |       |__ mfd-cell1
       |       |__ ...
       |
       |___ mfd-cellB (cros-ec-dev)
               |__ mfd-cell0
               |__ mfd-cell1
               |__ ...

The problem that was trying to solve is to describe some kind of topology for
the case where we have an EC (cros-ec) chained with another EC
(cros-pd). Apart from that this extends the bounds of what MFD was
designed to do we might be interested on have other kinds of topology that
can't be implemented in that way.

Let's prepare the code to move the cros-ec-core part from MFD to
platform/chrome as this is clearly a platform specific thing non-related
to a MFD device.

  platform/chrome  |         MFD
------------------------------------------
                   |
   cros-ec ________|___ cros-ec-dev
                   |       |__ mfd-cell0
                   |       |__ mfd-cell1
                   |       |__ ...
                   |
   cros-pd ________|___ cros-ec-dev
                   |        |__ mfd-cell0
                   |        |__ mfd-cell1
                   |        |__ ...

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c                   | 61 +++++++++++++++++----------------
 drivers/platform/chrome/cros_ec_i2c.c   |  8 +++++
 drivers/platform/chrome/cros_ec_lpc.c   |  3 +-
 drivers/platform/chrome/cros_ec_rpmsg.c |  2 ++
 drivers/platform/chrome/cros_ec_spi.c   |  8 +++++
 include/linux/mfd/cros_ec.h             | 18 ++++++++++
 6 files changed, 69 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 2a9ac5213893..a54ad47c7b02 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -13,7 +13,6 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/mfd/core.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/suspend.h>
 #include <asm/unaligned.h>
@@ -31,18 +30,6 @@ static struct cros_ec_platform pd_p = {
 	.cmd_offset = EC_CMD_PASSTHRU_OFFSET(CROS_EC_DEV_PD_INDEX),
 };
 
-static const struct mfd_cell ec_cell = {
-	.name = "cros-ec-dev",
-	.platform_data = &ec_p,
-	.pdata_size = sizeof(ec_p),
-};
-
-static const struct mfd_cell ec_pd_cell = {
-	.name = "cros-ec-dev",
-	.platform_data = &pd_p,
-	.pdata_size = sizeof(pd_p),
-};
-
 static irqreturn_t ec_irq_thread(int irq, void *data)
 {
 	struct cros_ec_device *ec_dev = data;
@@ -154,38 +141,42 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 		}
 	}
 
-	err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO, &ec_cell,
-				   1, NULL, ec_dev->irq, NULL);
-	if (err) {
-		dev_err(dev,
-			"Failed to register Embedded Controller subdevice %d\n",
-			err);
-		return err;
+	/* Register a platform device for the main EC instance */
+	ec_dev->ec = platform_device_register_data(ec_dev->dev, "cros-ec-dev",
+					PLATFORM_DEVID_AUTO, &ec_p,
+					sizeof(struct cros_ec_platform));
+	if (IS_ERR(ec_dev->ec)) {
+		dev_err(ec_dev->dev,
+			"Failed to create CrOS EC platform device\n");
+		return PTR_ERR(ec_dev->ec);
 	}
 
 	if (ec_dev->max_passthru) {
 		/*
-		 * Register a PD device as well on top of this device.
+		 * Register a platform device for the PD behind the main EC.
 		 * We make the following assumptions:
 		 * - behind an EC, we have a pd
 		 * - only one device added.
 		 * - the EC is responsive at init time (it is not true for a
-		 *   sensor hub.
+		 *   sensor hub).
 		 */
-		err = devm_mfd_add_devices(ec_dev->dev, PLATFORM_DEVID_AUTO,
-				      &ec_pd_cell, 1, NULL, ec_dev->irq, NULL);
-		if (err) {
-			dev_err(dev,
-				"Failed to register Power Delivery subdevice %d\n",
-				err);
-			return err;
+		ec_dev->pd = platform_device_register_data(ec_dev->dev,
+					"cros-ec-dev",
+					PLATFORM_DEVID_AUTO, &pd_p,
+					sizeof(struct cros_ec_platform));
+		if (IS_ERR(ec_dev->pd)) {
+			dev_err(ec_dev->dev,
+				"Failed to create CrOS PD platform device\n");
+			platform_device_unregister(ec_dev->ec);
+			return PTR_ERR(ec_dev->pd);
 		}
 	}
 
 	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
 		err = devm_of_platform_populate(dev);
 		if (err) {
-			mfd_remove_devices(dev);
+			platform_device_unregister(ec_dev->pd);
+			platform_device_unregister(ec_dev->ec);
 			dev_err(dev, "Failed to register sub-devices\n");
 			return err;
 		}
@@ -206,6 +197,16 @@ int cros_ec_register(struct cros_ec_device *ec_dev)
 }
 EXPORT_SYMBOL(cros_ec_register);
 
+int cros_ec_unregister(struct cros_ec_device *ec_dev)
+{
+	if (ec_dev->pd)
+		platform_device_unregister(ec_dev->pd);
+	platform_device_unregister(ec_dev->ec);
+
+	return 0;
+}
+EXPORT_SYMBOL(cros_ec_unregister);
+
 #ifdef CONFIG_PM_SLEEP
 int cros_ec_suspend(struct cros_ec_device *ec_dev)
 {
diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c
index 61d75395f86d..6bb82dfa7dae 100644
--- a/drivers/platform/chrome/cros_ec_i2c.c
+++ b/drivers/platform/chrome/cros_ec_i2c.c
@@ -307,6 +307,13 @@ static int cros_ec_i2c_probe(struct i2c_client *client,
 	return 0;
 }
 
+static int cros_ec_i2c_remove(struct i2c_client *client)
+{
+	struct cros_ec_device *ec_dev = i2c_get_clientdata(client);
+
+	return cros_ec_unregister(ec_dev);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int cros_ec_i2c_suspend(struct device *dev)
 {
@@ -357,6 +364,7 @@ static struct i2c_driver cros_ec_driver = {
 		.pm	= &cros_ec_i2c_pm_ops,
 	},
 	.probe		= cros_ec_i2c_probe,
+	.remove		= cros_ec_i2c_remove,
 	.id_table	= cros_ec_i2c_id,
 };
 
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 2c44c7f3322a..5939c4a5869c 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -421,6 +421,7 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 
 static int cros_ec_lpc_remove(struct platform_device *pdev)
 {
+	struct cros_ec_device *ec_dev = platform_get_drvdata(pdev);
 	struct acpi_device *adev;
 
 	adev = ACPI_COMPANION(&pdev->dev);
@@ -428,7 +429,7 @@ static int cros_ec_lpc_remove(struct platform_device *pdev)
 		acpi_remove_notify_handler(adev->handle, ACPI_ALL_NOTIFY,
 					   cros_ec_lpc_acpi_notify);
 
-	return 0;
+	return cros_ec_unregister(ec_dev);
 }
 
 static const struct acpi_device_id cros_ec_lpc_acpi_device_ids[] = {
diff --git a/drivers/platform/chrome/cros_ec_rpmsg.c b/drivers/platform/chrome/cros_ec_rpmsg.c
index 5d3fb2abad1d..520e507bfa54 100644
--- a/drivers/platform/chrome/cros_ec_rpmsg.c
+++ b/drivers/platform/chrome/cros_ec_rpmsg.c
@@ -233,6 +233,8 @@ static void cros_ec_rpmsg_remove(struct rpmsg_device *rpdev)
 	struct cros_ec_device *ec_dev = dev_get_drvdata(&rpdev->dev);
 	struct cros_ec_rpmsg *ec_rpmsg = ec_dev->priv;
 
+	cros_ec_unregister(ec_dev);
+
 	cancel_work_sync(&ec_rpmsg->host_event_work);
 }
 
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 006a8ff64057..2e21f2776063 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -785,6 +785,13 @@ static int cros_ec_spi_probe(struct spi_device *spi)
 	return 0;
 }
 
+static int cros_ec_spi_remove(struct spi_device *spi)
+{
+	struct cros_ec_device *ec_dev = spi_get_drvdata(spi);
+
+	return cros_ec_unregister(ec_dev);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int cros_ec_spi_suspend(struct device *dev)
 {
@@ -823,6 +830,7 @@ static struct spi_driver cros_ec_driver_spi = {
 		.pm	= &cros_ec_spi_pm_ops,
 	},
 	.probe		= cros_ec_spi_probe,
+	.remove		= cros_ec_spi_remove,
 	.id_table	= cros_ec_spi_id,
 };
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 77805c3f2de7..bcccda0257ff 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -121,6 +121,10 @@ struct cros_ec_command {
  * @event_data: Raw payload transferred with the MKBP event.
  * @event_size: Size in bytes of the event data.
  * @host_event_wake_mask: Mask of host events that cause wake from suspend.
+ * @ec: The platform_device used by the mfd driver to interface with the
+ *      main EC.
+ * @pd: The platform_device used by the mfd driver to interface with the
+ *      PD behind an EC.
  */
 struct cros_ec_device {
 	/* These are used by other drivers that want to talk to the EC */
@@ -157,6 +161,10 @@ struct cros_ec_device {
 	int event_size;
 	u32 host_event_wake_mask;
 	u32 last_resume_result;
+
+	/* The platform devices used by the mfd driver */
+	struct platform_device *ec;
+	struct platform_device *pd;
 };
 
 /**
@@ -291,6 +299,16 @@ int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
  */
 int cros_ec_register(struct cros_ec_device *ec_dev);
 
+/**
+ * cros_ec_unregister() - Remove a ChromeOS EC.
+ * @ec_dev: Device to unregister.
+ *
+ * Call this to deregister a ChromeOS EC, then clean up any private data.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_unregister(struct cros_ec_device *ec_dev);
+
 /**
  * cros_ec_query_all() -  Query the protocol version supported by the
  *         ChromeOS EC.
-- 
cgit v1.2.3


From eda2e30c6684d67288edb841c6125d48c608a242 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 2 Sep 2019 11:53:02 +0200
Subject: mfd / platform: cros_ec: Miscellaneous character device to talk with
 the EC

That's a driver to talk with the ChromeOS Embedded Controller via a
miscellaneous character device, it creates an entry in /dev for every
instance and implements basic file operations for communicating with the
Embedded Controller with an userspace application. The API is moved to
the uapi folder, which is supposed to contain the user space API of the
kernel.

Note that this will replace current character device interface
implemented in the cros-ec-dev driver in the MFD subsystem. The idea is
to move all the functionality that extends the bounds of what MFD was
designed to platform/chrome subsystem.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec_dev.c                     |   4 +-
 drivers/mfd/cros_ec_dev.h                     |  35 ----
 drivers/platform/chrome/Kconfig               |  11 ++
 drivers/platform/chrome/Makefile              |   1 +
 drivers/platform/chrome/cros_ec_chardev.c     | 251 ++++++++++++++++++++++++++
 include/linux/platform_data/cros_ec_chardev.h |  37 ++++
 6 files changed, 303 insertions(+), 36 deletions(-)
 delete mode 100644 drivers/mfd/cros_ec_dev.h
 create mode 100644 drivers/platform/chrome/cros_ec_chardev.c
 create mode 100644 include/linux/platform_data/cros_ec_chardev.h

(limited to 'include')

diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 41dccced5026..4c96445b1bf5 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -7,15 +7,17 @@
 
 #include <linux/fs.h>
 #include <linux/mfd/core.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/platform_data/cros_ec_chardev.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
-#include "cros_ec_dev.h"
 
 #define DRV_NAME "cros-ec-dev"
 
diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h
deleted file mode 100644
index 7a42c3ef50e4..000000000000
--- a/drivers/mfd/cros_ec_dev.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * cros_ec_dev - expose the Chrome OS Embedded Controller to userspace
- *
- * Copyright (C) 2014 Google, Inc.
- */
-
-#ifndef _CROS_EC_DEV_H_
-#define _CROS_EC_DEV_H_
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-#include <linux/mfd/cros_ec.h>
-
-#define CROS_EC_DEV_VERSION "1.0.0"
-
-/**
- * struct cros_ec_readmem - Struct used to read mapped memory.
- * @offset: Within EC_LPC_ADDR_MEMMAP region.
- * @bytes: Number of bytes to read. Zero means "read a string" (including '\0')
- *         At most only EC_MEMMAP_SIZE bytes can be read.
- * @buffer: Where to store the result. The ioctl returns the number of bytes
- *         read or negative on error.
- */
-struct cros_ec_readmem {
-	uint32_t offset;
-	uint32_t bytes;
-	uint8_t buffer[EC_MEMMAP_SIZE];
-};
-
-#define CROS_EC_DEV_IOC       0xEC
-#define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
-#define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
-
-#endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig
index eaeb04e07335..bd3524bd6b37 100644
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -136,6 +136,17 @@ config CROS_KBD_LED_BACKLIGHT
 	  To compile this driver as a module, choose M here: the
 	  module will be called cros_kbd_led_backlight.
 
+config CROS_EC_CHARDEV
+	tristate "ChromeOS EC miscdevice"
+	depends on MFD_CROS_EC_CHARDEV
+	default MFD_CROS_EC_CHARDEV
+	help
+	  This driver adds file operations support to talk with the
+	  ChromeOS EC from userspace via a character device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called cros_ec_chardev.
+
 config CROS_EC_LIGHTBAR
 	tristate "Chromebook Pixel's lightbar support"
 	depends on MFD_CROS_EC_CHARDEV
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile
index 12ff8de5ac7a..477ec3d1d1c9 100644
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -15,6 +15,7 @@ cros_ec_lpcs-objs			:= cros_ec_lpc.o cros_ec_lpc_mec.o
 obj-$(CONFIG_CROS_EC_LPC)		+= cros_ec_lpcs.o
 obj-$(CONFIG_CROS_EC_PROTO)		+= cros_ec_proto.o cros_ec_trace.o
 obj-$(CONFIG_CROS_KBD_LED_BACKLIGHT)	+= cros_kbd_led_backlight.o
+obj-$(CONFIG_CROS_EC_CHARDEV)		+= cros_ec_chardev.o
 obj-$(CONFIG_CROS_EC_LIGHTBAR)		+= cros_ec_lightbar.o
 obj-$(CONFIG_CROS_EC_VBC)		+= cros_ec_vbc.o
 obj-$(CONFIG_CROS_EC_DEBUGFS)		+= cros_ec_debugfs.o
diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c
new file mode 100644
index 000000000000..174f940822c9
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_chardev.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Miscellaneous character driver for ChromeOS Embedded Controller
+ *
+ * Copyright 2014 Google, Inc.
+ * Copyright 2019 Google LLC
+ *
+ * This file is a rework and part of the code is ported from
+ * drivers/mfd/cros_ec_dev.c that was originally written by
+ * Bill Richardson.
+ */
+
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/platform_data/cros_ec_chardev.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#define DRV_NAME		"cros-ec-chardev"
+
+struct chardev_data {
+	struct cros_ec_dev *ec_dev;
+	struct miscdevice misc;
+};
+
+static int ec_get_version(struct cros_ec_dev *ec, char *str, int maxlen)
+{
+	static const char * const current_image_name[] = {
+		"unknown", "read-only", "read-write", "invalid",
+	};
+	struct ec_response_get_version *resp;
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = kzalloc(sizeof(*msg) + sizeof(*resp), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->command = EC_CMD_GET_VERSION + ec->cmd_offset;
+	msg->insize = sizeof(*resp);
+
+	ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
+	if (ret < 0) {
+		snprintf(str, maxlen,
+			 "Unknown EC version, returned error: %d\n",
+			 msg->result);
+		goto exit;
+	}
+
+	resp = (struct ec_response_get_version *)msg->data;
+	if (resp->current_image >= ARRAY_SIZE(current_image_name))
+		resp->current_image = 3; /* invalid */
+
+	snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION,
+		 resp->version_string_ro, resp->version_string_rw,
+		 current_image_name[resp->current_image]);
+
+	ret = 0;
+exit:
+	kfree(msg);
+	return ret;
+}
+
+/*
+ * Device file ops
+ */
+static int cros_ec_chardev_open(struct inode *inode, struct file *filp)
+{
+	struct miscdevice *mdev = filp->private_data;
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(mdev->parent);
+
+	filp->private_data = ec_dev;
+	nonseekable_open(inode, filp);
+
+	return 0;
+}
+
+static ssize_t cros_ec_chardev_read(struct file *filp, char __user *buffer,
+				     size_t length, loff_t *offset)
+{
+	char msg[sizeof(struct ec_response_get_version) +
+		 sizeof(CROS_EC_DEV_VERSION)];
+	struct cros_ec_dev *ec = filp->private_data;
+	size_t count;
+	int ret;
+
+	if (*offset != 0)
+		return 0;
+
+	ret = ec_get_version(ec, msg, sizeof(msg));
+	if (ret)
+		return ret;
+
+	count = min(length, strlen(msg));
+
+	if (copy_to_user(buffer, msg, count))
+		return -EFAULT;
+
+	*offset = count;
+	return count;
+}
+
+/*
+ * Ioctls
+ */
+static long cros_ec_chardev_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
+{
+	struct cros_ec_command *s_cmd;
+	struct cros_ec_command u_cmd;
+	long ret;
+
+	if (copy_from_user(&u_cmd, arg, sizeof(u_cmd)))
+		return -EFAULT;
+
+	if (u_cmd.outsize > EC_MAX_MSG_BYTES ||
+	    u_cmd.insize > EC_MAX_MSG_BYTES)
+		return -EINVAL;
+
+	s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize),
+			GFP_KERNEL);
+	if (!s_cmd)
+		return -ENOMEM;
+
+	if (copy_from_user(s_cmd, arg, sizeof(*s_cmd) + u_cmd.outsize)) {
+		ret = -EFAULT;
+		goto exit;
+	}
+
+	if (u_cmd.outsize != s_cmd->outsize ||
+	    u_cmd.insize != s_cmd->insize) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	s_cmd->command += ec->cmd_offset;
+	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
+	/* Only copy data to userland if data was received. */
+	if (ret < 0)
+		goto exit;
+
+	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
+		ret = -EFAULT;
+exit:
+	kfree(s_cmd);
+	return ret;
+}
+
+static long cros_ec_chardev_ioctl_readmem(struct cros_ec_dev *ec,
+					   void __user *arg)
+{
+	struct cros_ec_device *ec_dev = ec->ec_dev;
+	struct cros_ec_readmem s_mem = { };
+	long num;
+
+	/* Not every platform supports direct reads */
+	if (!ec_dev->cmd_readmem)
+		return -ENOTTY;
+
+	if (copy_from_user(&s_mem, arg, sizeof(s_mem)))
+		return -EFAULT;
+
+	num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes,
+				  s_mem.buffer);
+	if (num <= 0)
+		return num;
+
+	if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem)))
+		return -EFAULT;
+
+	return num;
+}
+
+static long cros_ec_chardev_ioctl(struct file *filp, unsigned int cmd,
+				   unsigned long arg)
+{
+	struct cros_ec_dev *ec = filp->private_data;
+
+	if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC)
+		return -ENOTTY;
+
+	switch (cmd) {
+	case CROS_EC_DEV_IOCXCMD:
+		return cros_ec_chardev_ioctl_xcmd(ec, (void __user *)arg);
+	case CROS_EC_DEV_IOCRDMEM:
+		return cros_ec_chardev_ioctl_readmem(ec, (void __user *)arg);
+	}
+
+	return -ENOTTY;
+}
+
+static const struct file_operations chardev_fops = {
+	.open		= cros_ec_chardev_open,
+	.read		= cros_ec_chardev_read,
+	.unlocked_ioctl	= cros_ec_chardev_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= cros_ec_chardev_ioctl,
+#endif
+};
+
+static int cros_ec_chardev_probe(struct platform_device *pdev)
+{
+	struct cros_ec_dev *ec_dev = dev_get_drvdata(pdev->dev.parent);
+	struct cros_ec_platform *ec_platform = dev_get_platdata(ec_dev->dev);
+	struct chardev_data *data;
+
+	/* Create a char device: we want to create it anew */
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->ec_dev = ec_dev;
+	data->misc.minor = MISC_DYNAMIC_MINOR;
+	data->misc.fops = &chardev_fops;
+	data->misc.name = ec_platform->ec_name;
+	data->misc.parent = pdev->dev.parent;
+
+	dev_set_drvdata(&pdev->dev, data);
+
+	return misc_register(&data->misc);
+}
+
+static int cros_ec_chardev_remove(struct platform_device *pdev)
+{
+	struct chardev_data *data = dev_get_drvdata(&pdev->dev);
+
+	misc_deregister(&data->misc);
+
+	return 0;
+}
+
+static struct platform_driver cros_ec_chardev_driver = {
+	.driver = {
+		.name = DRV_NAME,
+	},
+	.probe = cros_ec_chardev_probe,
+	.remove = cros_ec_chardev_remove,
+};
+
+module_platform_driver(cros_ec_chardev_driver);
+
+MODULE_ALIAS("platform:" DRV_NAME);
+MODULE_AUTHOR("Enric Balletbo i Serra <enric.balletbo@collabora.com>");
+MODULE_DESCRIPTION("ChromeOS EC Miscellaneous Character Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/cros_ec_chardev.h b/include/linux/platform_data/cros_ec_chardev.h
new file mode 100644
index 000000000000..6600b54f531c
--- /dev/null
+++ b/include/linux/platform_data/cros_ec_chardev.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ChromeOS EC device interface.
+ *
+ * Copyright (C) 2014 Google, Inc.
+ */
+
+#ifndef _UAPI_LINUX_CROS_EC_DEV_H_
+#define _UAPI_LINUX_CROS_EC_DEV_H_
+
+#include <linux/bits.h>
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#include <linux/mfd/cros_ec.h>
+
+#define CROS_EC_DEV_VERSION "1.0.0"
+
+/**
+ * struct cros_ec_readmem - Struct used to read mapped memory.
+ * @offset: Within EC_LPC_ADDR_MEMMAP region.
+ * @bytes: Number of bytes to read. Zero means "read a string" (including '\0')
+ *         At most only EC_MEMMAP_SIZE bytes can be read.
+ * @buffer: Where to store the result. The ioctl returns the number of bytes
+ *         read or negative on error.
+ */
+struct cros_ec_readmem {
+	uint32_t offset;
+	uint32_t bytes;
+	uint8_t buffer[EC_MEMMAP_SIZE];
+};
+
+#define CROS_EC_DEV_IOC       0xEC
+#define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
+#define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
+
+#endif /* _CROS_EC_DEV_H_ */
-- 
cgit v1.2.3


From 459aedb9a5d4b92e4aee343ee8ee5aeca8e1d93a Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 2 Sep 2019 11:53:03 +0200
Subject: mfd: cros_ec: Switch to use the new cros-ec-chardev driver

With the purpose of remove the things that far extends the bounds of
what a MFD was designed to do, instantiate the new platform misc
cros-ec-chardev driver and get rid of all the unneeded code. After this
patch the misc chardev driver is a sub-device of the MFD, and all the
new file operations should be implemented there.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec_dev.c   | 220 ++------------------------------------------
 include/linux/mfd/cros_ec.h |   2 -
 2 files changed, 6 insertions(+), 216 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 4c96445b1bf5..0c1c0ce3453e 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -5,76 +5,23 @@
  * Copyright (C) 2014 Google, Inc.
  */
 
-#include <linux/fs.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
-#include <linux/platform_data/cros_ec_chardev.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
-#include <linux/pm.h>
+#include <linux/platform_data/cros_ec_chardev.h>
 #include <linux/slab.h>
-#include <linux/uaccess.h>
-
 
 #define DRV_NAME "cros-ec-dev"
 
-/* Device variables */
-#define CROS_MAX_DEV 128
-static int ec_major;
-
 static struct class cros_class = {
 	.owner          = THIS_MODULE,
 	.name           = "chromeos",
 };
 
-/* Basic communication */
-static int ec_get_version(struct cros_ec_dev *ec, char *str, int maxlen)
-{
-	struct ec_response_get_version *resp;
-	static const char * const current_image_name[] = {
-		"unknown", "read-only", "read-write", "invalid",
-	};
-	struct cros_ec_command *msg;
-	int ret;
-
-	msg = kmalloc(sizeof(*msg) + sizeof(*resp), GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	msg->version = 0;
-	msg->command = EC_CMD_GET_VERSION + ec->cmd_offset;
-	msg->insize = sizeof(*resp);
-	msg->outsize = 0;
-
-	ret = cros_ec_cmd_xfer(ec->ec_dev, msg);
-	if (ret < 0)
-		goto exit;
-
-	if (msg->result != EC_RES_SUCCESS) {
-		snprintf(str, maxlen,
-			 "%s\nUnknown EC version: EC returned %d\n",
-			 CROS_EC_DEV_VERSION, msg->result);
-		ret = -EINVAL;
-		goto exit;
-	}
-
-	resp = (struct ec_response_get_version *)msg->data;
-	if (resp->current_image >= ARRAY_SIZE(current_image_name))
-		resp->current_image = 3; /* invalid */
-
-	snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION,
-		 resp->version_string_ro, resp->version_string_rw,
-		 current_image_name[resp->current_image]);
-
-	ret = 0;
-exit:
-	kfree(msg);
-	return ret;
-}
-
 static int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 {
 	struct cros_ec_command *msg;
@@ -110,142 +57,6 @@ static int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 	return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature);
 }
 
-/* Device file ops */
-static int ec_device_open(struct inode *inode, struct file *filp)
-{
-	struct cros_ec_dev *ec = container_of(inode->i_cdev,
-					      struct cros_ec_dev, cdev);
-	filp->private_data = ec;
-	nonseekable_open(inode, filp);
-	return 0;
-}
-
-static int ec_device_release(struct inode *inode, struct file *filp)
-{
-	return 0;
-}
-
-static ssize_t ec_device_read(struct file *filp, char __user *buffer,
-			      size_t length, loff_t *offset)
-{
-	struct cros_ec_dev *ec = filp->private_data;
-	char msg[sizeof(struct ec_response_get_version) +
-		 sizeof(CROS_EC_DEV_VERSION)];
-	size_t count;
-	int ret;
-
-	if (*offset != 0)
-		return 0;
-
-	ret = ec_get_version(ec, msg, sizeof(msg));
-	if (ret)
-		return ret;
-
-	count = min(length, strlen(msg));
-
-	if (copy_to_user(buffer, msg, count))
-		return -EFAULT;
-
-	*offset = count;
-	return count;
-}
-
-/* Ioctls */
-static long ec_device_ioctl_xcmd(struct cros_ec_dev *ec, void __user *arg)
-{
-	long ret;
-	struct cros_ec_command u_cmd;
-	struct cros_ec_command *s_cmd;
-
-	if (copy_from_user(&u_cmd, arg, sizeof(u_cmd)))
-		return -EFAULT;
-
-	if ((u_cmd.outsize > EC_MAX_MSG_BYTES) ||
-	    (u_cmd.insize > EC_MAX_MSG_BYTES))
-		return -EINVAL;
-
-	s_cmd = kmalloc(sizeof(*s_cmd) + max(u_cmd.outsize, u_cmd.insize),
-			GFP_KERNEL);
-	if (!s_cmd)
-		return -ENOMEM;
-
-	if (copy_from_user(s_cmd, arg, sizeof(*s_cmd) + u_cmd.outsize)) {
-		ret = -EFAULT;
-		goto exit;
-	}
-
-	if (u_cmd.outsize != s_cmd->outsize ||
-	    u_cmd.insize != s_cmd->insize) {
-		ret = -EINVAL;
-		goto exit;
-	}
-
-	s_cmd->command += ec->cmd_offset;
-	ret = cros_ec_cmd_xfer(ec->ec_dev, s_cmd);
-	/* Only copy data to userland if data was received. */
-	if (ret < 0)
-		goto exit;
-
-	if (copy_to_user(arg, s_cmd, sizeof(*s_cmd) + s_cmd->insize))
-		ret = -EFAULT;
-exit:
-	kfree(s_cmd);
-	return ret;
-}
-
-static long ec_device_ioctl_readmem(struct cros_ec_dev *ec, void __user *arg)
-{
-	struct cros_ec_device *ec_dev = ec->ec_dev;
-	struct cros_ec_readmem s_mem = { };
-	long num;
-
-	/* Not every platform supports direct reads */
-	if (!ec_dev->cmd_readmem)
-		return -ENOTTY;
-
-	if (copy_from_user(&s_mem, arg, sizeof(s_mem)))
-		return -EFAULT;
-
-	num = ec_dev->cmd_readmem(ec_dev, s_mem.offset, s_mem.bytes,
-				  s_mem.buffer);
-	if (num <= 0)
-		return num;
-
-	if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem)))
-		return -EFAULT;
-
-	return num;
-}
-
-static long ec_device_ioctl(struct file *filp, unsigned int cmd,
-			    unsigned long arg)
-{
-	struct cros_ec_dev *ec = filp->private_data;
-
-	if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC)
-		return -ENOTTY;
-
-	switch (cmd) {
-	case CROS_EC_DEV_IOCXCMD:
-		return ec_device_ioctl_xcmd(ec, (void __user *)arg);
-	case CROS_EC_DEV_IOCRDMEM:
-		return ec_device_ioctl_readmem(ec, (void __user *)arg);
-	}
-
-	return -ENOTTY;
-}
-
-/* Module initialization */
-static const struct file_operations fops = {
-	.open = ec_device_open,
-	.release = ec_device_release,
-	.read = ec_device_read,
-	.unlocked_ioctl = ec_device_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = ec_device_ioctl,
-#endif
-};
-
 static void cros_ec_class_release(struct device *dev)
 {
 	kfree(to_cros_ec_dev(dev));
@@ -453,6 +264,7 @@ static const struct mfd_cell cros_usbpd_charger_cells[] = {
 };
 
 static const struct mfd_cell cros_ec_platform_cells[] = {
+	{ .name = "cros-ec-chardev" },
 	{ .name = "cros-ec-debugfs" },
 	{ .name = "cros-ec-lightbar" },
 	{ .name = "cros-ec-sysfs" },
@@ -480,7 +292,6 @@ static int ec_device_probe(struct platform_device *pdev)
 	ec->features[0] = -1U; /* Not cached yet */
 	ec->features[1] = -1U; /* Not cached yet */
 	device_initialize(&ec->class_dev);
-	cdev_init(&ec->cdev, &fops);
 
 	/* Check whether this is actually a Fingerprint MCU rather than an EC */
 	if (cros_ec_check_features(ec, EC_FEATURE_FINGERPRINT)) {
@@ -527,10 +338,7 @@ static int ec_device_probe(struct platform_device *pdev)
 
 	/*
 	 * Add the class device
-	 * Link to the character device for creating the /dev entry
-	 * in devtmpfs.
 	 */
-	ec->class_dev.devt = MKDEV(ec_major, pdev->id);
 	ec->class_dev.class = &cros_class;
 	ec->class_dev.parent = dev;
 	ec->class_dev.release = cros_ec_class_release;
@@ -541,6 +349,10 @@ static int ec_device_probe(struct platform_device *pdev)
 		goto failed;
 	}
 
+	retval = device_add(&ec->class_dev);
+	if (retval)
+		goto failed;
+
 	/* check whether this EC is a sensor hub. */
 	if (cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE))
 		cros_ec_sensors_register(ec);
@@ -584,13 +396,6 @@ static int ec_device_probe(struct platform_device *pdev)
 				retval);
 	}
 
-	/* We can now add the sysfs class, we know which parameter to show */
-	retval = cdev_device_add(&ec->cdev, &ec->class_dev);
-	if (retval) {
-		dev_err(dev, "cdev_device_add failed => %d\n", retval);
-		goto failed;
-	}
-
 	retval = mfd_add_devices(ec->dev, PLATFORM_DEVID_AUTO,
 				 cros_ec_platform_cells,
 				 ARRAY_SIZE(cros_ec_platform_cells),
@@ -624,7 +429,6 @@ static int ec_device_remove(struct platform_device *pdev)
 	struct cros_ec_dev *ec = dev_get_drvdata(&pdev->dev);
 
 	mfd_remove_devices(ec->dev);
-	cdev_del(&ec->cdev);
 	device_unregister(&ec->class_dev);
 	return 0;
 }
@@ -647,7 +451,6 @@ static struct platform_driver cros_ec_dev_driver = {
 static int __init cros_ec_dev_init(void)
 {
 	int ret;
-	dev_t dev = 0;
 
 	ret  = class_register(&cros_class);
 	if (ret) {
@@ -655,14 +458,6 @@ static int __init cros_ec_dev_init(void)
 		return ret;
 	}
 
-	/* Get a range of minor numbers (starting with 0) to work with */
-	ret = alloc_chrdev_region(&dev, 0, CROS_MAX_DEV, CROS_EC_DEV_NAME);
-	if (ret < 0) {
-		pr_err(CROS_EC_DEV_NAME ": alloc_chrdev_region() failed\n");
-		goto failed_chrdevreg;
-	}
-	ec_major = MAJOR(dev);
-
 	/* Register the driver */
 	ret = platform_driver_register(&cros_ec_dev_driver);
 	if (ret < 0) {
@@ -672,8 +467,6 @@ static int __init cros_ec_dev_init(void)
 	return 0;
 
 failed_devreg:
-	unregister_chrdev_region(MKDEV(ec_major, 0), CROS_MAX_DEV);
-failed_chrdevreg:
 	class_unregister(&cros_class);
 	return ret;
 }
@@ -681,7 +474,6 @@ failed_chrdevreg:
 static void __exit cros_ec_dev_exit(void)
 {
 	platform_driver_unregister(&cros_ec_dev_driver);
-	unregister_chrdev(ec_major, CROS_EC_DEV_NAME);
 	class_unregister(&cros_class);
 }
 
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index bcccda0257ff..569428ad1cb1 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -192,7 +192,6 @@ struct cros_ec_debugfs;
 /**
  * struct cros_ec_dev - ChromeOS EC device entry point.
  * @class_dev: Device structure used in sysfs.
- * @cdev: Character device structure in /dev.
  * @ec_dev: cros_ec_device structure to talk to the physical device.
  * @dev: Pointer to the platform device.
  * @debug_info: cros_ec_debugfs structure for debugging information.
@@ -202,7 +201,6 @@ struct cros_ec_debugfs;
  */
 struct cros_ec_dev {
 	struct device class_dev;
-	struct cdev cdev;
 	struct cros_ec_device *ec_dev;
 	struct device *dev;
 	struct cros_ec_debugfs *debug_info;
-- 
cgit v1.2.3


From 840d9f131f65b021e0a73f3371f3194897dba6ad Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 2 Sep 2019 11:53:05 +0200
Subject: mfd / platform: cros_ec: Reorganize platform and mfd includes

There is a bit of mess between cros-ec mfd includes and platform
includes. For example, we have a linux/mfd/cros_ec.h include that
exports the interface implemented in platform/chrome/cros_ec_proto.c. Or
we have a linux/mfd/cros_ec_commands.h file that is non related to the
multifunction device (in the sense that is not exporting any function of
the mfd device). This causes crossed includes between mfd and
platform/chrome subsystems and makes the code difficult to read, apart
from creating 'curious' situations where a platform/chrome driver includes
a linux/mfd/cros_ec.h file just to get the exported functions that are
implemented in another platform/chrome driver.

In order to have a better separation on what the cros-ec multifunction
driver does and what the cros-ec core provides move and rework the
affected includes doing:

 - Move cros_ec_commands.h to include/linux/platform_data/cros_ec_commands.h
 - Get rid of the parts that are implemented in the platform/chrome/cros_ec_proto.c
   driver from include/linux/mfd/cros_ec.h to a new file
   include/linux/platform_data/cros_ec_proto.h
 - Update all the drivers with the new includes, so
   - Drivers that only need to know about the protocol include
     - linux/platform_data/cros_ec_proto.h
     - linux/platform_data/cros_ec_commands.h
   - Drivers that need to know about the cros-ec mfd device also include
     - linux/mfd/cros_ec.h

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Neil Armstrong <narmstrong@baylibre.com>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
Tested-by: Gwendal Grignou <gwendal@chromium.org>
Series changes: 3
- Fix dereferencing pointer to incomplete type 'struct cros_ec_dev' (lkp)
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/extcon/extcon-usbc-cros-ec.c               |    3 +-
 drivers/hid/hid-google-hammer.c                    |    4 +-
 drivers/i2c/busses/i2c-cros-ec-tunnel.c            |    4 +-
 drivers/iio/accel/cros_ec_accel_legacy.c           |    3 +-
 .../iio/common/cros_ec_sensors/cros_ec_lid_angle.c |    3 +-
 .../iio/common/cros_ec_sensors/cros_ec_sensors.c   |    3 +-
 .../common/cros_ec_sensors/cros_ec_sensors_core.c  |    3 +-
 drivers/iio/light/cros_ec_light_prox.c             |    3 +-
 drivers/iio/pressure/cros_ec_baro.c                |    3 +-
 drivers/input/keyboard/cros_ec_keyb.c              |    4 +-
 drivers/media/platform/cros-ec-cec/cros-ec-cec.c   |    5 +-
 drivers/mfd/cros_ec_dev.c                          |    3 +-
 drivers/platform/chrome/cros_ec.c                  |    3 +-
 drivers/platform/chrome/cros_ec_chardev.c          |    3 +-
 drivers/platform/chrome/cros_ec_debugfs.c          |    3 +-
 drivers/platform/chrome/cros_ec_i2c.c              |    4 +-
 drivers/platform/chrome/cros_ec_ishtp.c            |    5 +-
 drivers/platform/chrome/cros_ec_lightbar.c         |    3 +-
 drivers/platform/chrome/cros_ec_lpc.c              |    4 +-
 drivers/platform/chrome/cros_ec_proto.c            |    3 +-
 drivers/platform/chrome/cros_ec_rpmsg.c            |    4 +-
 drivers/platform/chrome/cros_ec_spi.c              |    4 +-
 drivers/platform/chrome/cros_ec_sysfs.c            |    3 +-
 drivers/platform/chrome/cros_ec_trace.c            |    2 +-
 drivers/platform/chrome/cros_ec_trace.h            |    4 +-
 drivers/platform/chrome/cros_ec_vbc.c              |    3 +-
 drivers/platform/chrome/cros_usbpd_logger.c        |    5 +-
 drivers/power/supply/cros_usbpd-charger.c          |    5 +-
 drivers/pwm/pwm-cros-ec.c                          |    4 +-
 drivers/rtc/rtc-cros-ec.c                          |    3 +-
 include/Kbuild                                     |    2 +-
 include/linux/iio/common/cros_ec_sensors_core.h    |    3 +-
 include/linux/mfd/cros_ec.h                        |  308 --
 include/linux/mfd/cros_ec_commands.h               | 5713 --------------------
 include/linux/platform_data/cros_ec_chardev.h      |    2 +-
 include/linux/platform_data/cros_ec_commands.h     | 5713 ++++++++++++++++++++
 include/linux/platform_data/cros_ec_proto.h        |  319 ++
 sound/soc/codecs/cros_ec_codec.c                   |    4 +-
 38 files changed, 6100 insertions(+), 6070 deletions(-)
 delete mode 100644 include/linux/mfd/cros_ec_commands.h
 create mode 100644 include/linux/platform_data/cros_ec_commands.h
 create mode 100644 include/linux/platform_data/cros_ec_proto.h

(limited to 'include')

diff --git a/drivers/extcon/extcon-usbc-cros-ec.c b/drivers/extcon/extcon-usbc-cros-ec.c
index 43c0a936ab82..5290cc2d19d9 100644
--- a/drivers/extcon/extcon-usbc-cros-ec.c
+++ b/drivers/extcon/extcon-usbc-cros-ec.c
@@ -6,10 +6,11 @@
 
 #include <linux/extcon-provider.h>
 #include <linux/kernel.h>
-#include <linux/mfd/cros_ec.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/of.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index ee5e0bdcf078..84f8c127ebdc 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -16,9 +16,9 @@
 #include <linux/acpi.h>
 #include <linux/hid.h>
 #include <linux/leds.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/pm_wakeup.h>
 #include <asm/unaligned.h>
diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 82bcd9a78759..c551aa96a2e3 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -5,8 +5,8 @@
 
 #include <linux/module.h>
 #include <linux/i2c.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/iio/accel/cros_ec_accel_legacy.c b/drivers/iio/accel/cros_ec_accel_legacy.c
index 46bb2e421bb9..fd9a634f741e 100644
--- a/drivers/iio/accel/cros_ec_accel_legacy.c
+++ b/drivers/iio/accel/cros_ec_accel_legacy.c
@@ -18,9 +18,10 @@
 #include <linux/iio/triggered_buffer.h>
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 
 #define DRV_NAME	"cros-ec-accel-legacy"
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
index 876dfd176b0e..1dcc2a16ab2d 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c
@@ -20,9 +20,8 @@
 #include <linux/iio/triggered_buffer.h>
 #include <linux/iio/trigger_consumer.h>
 #include <linux/kernel.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
index 17af4e0fd5f8..40dc24ff0ee5 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c
@@ -17,8 +17,9 @@
 #include <linux/iio/triggered_buffer.h>
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index 130362ca421b..37b3f1df0ceb 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -14,9 +14,10 @@
 #include <linux/iio/trigger_consumer.h>
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 
 static char *cros_ec_loc[] = {
diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c
index 308ee6ff2e22..437e0eae9178 100644
--- a/drivers/iio/light/cros_ec_light_prox.c
+++ b/drivers/iio/light/cros_ec_light_prox.c
@@ -15,8 +15,9 @@
 #include <linux/iio/trigger_consumer.h>
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/iio/pressure/cros_ec_baro.c b/drivers/iio/pressure/cros_ec_baro.c
index 034ce98d6e97..956dc01f1295 100644
--- a/drivers/iio/pressure/cros_ec_baro.c
+++ b/drivers/iio/pressure/cros_ec_baro.c
@@ -15,9 +15,10 @@
 #include <linux/iio/trigger_consumer.h>
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 
 /*
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 38cb6d82d8fe..a29e81fdf186 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -22,8 +22,8 @@
 #include <linux/slab.h>
 #include <linux/sysrq.h>
 #include <linux/input/matrix_keypad.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 
 #include <asm/unaligned.h>
 
diff --git a/drivers/media/platform/cros-ec-cec/cros-ec-cec.c b/drivers/media/platform/cros-ec-cec/cros-ec-cec.c
index 068df9888dbf..f9fd4bda2a94 100644
--- a/drivers/media/platform/cros-ec-cec/cros-ec-cec.c
+++ b/drivers/media/platform/cros-ec-cec/cros-ec-cec.c
@@ -14,10 +14,11 @@
 #include <linux/cec.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <media/cec.h>
 #include <media/cec-notifier.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 
 #define DRV_NAME	"cros-ec-cec"
 
diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 0c1c0ce3453e..091d428f5531 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -7,12 +7,13 @@
 
 #include <linux/mfd/core.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/cros_ec_chardev.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/slab.h>
 
 #define DRV_NAME "cros-ec-dev"
diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c
index a54ad47c7b02..fd77e6fa74c2 100644
--- a/drivers/platform/chrome/cros_ec.c
+++ b/drivers/platform/chrome/cros_ec.c
@@ -13,7 +13,8 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/mfd/cros_ec.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/suspend.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c
index 174f940822c9..08abd7e5c7bf 100644
--- a/drivers/platform/chrome/cros_ec_chardev.c
+++ b/drivers/platform/chrome/cros_ec_chardev.c
@@ -14,10 +14,11 @@
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/platform_data/cros_ec_chardev.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/types.h>
diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c
index 8ec1cc2889f2..6ae484989d1f 100644
--- a/drivers/platform/chrome/cros_ec_debugfs.c
+++ b/drivers/platform/chrome/cros_ec_debugfs.c
@@ -8,9 +8,10 @@
 #include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
diff --git a/drivers/platform/chrome/cros_ec_i2c.c b/drivers/platform/chrome/cros_ec_i2c.c
index 6bb82dfa7dae..9bd97bc8454b 100644
--- a/drivers/platform/chrome/cros_ec_i2c.c
+++ b/drivers/platform/chrome/cros_ec_i2c.c
@@ -9,8 +9,8 @@
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c
index e504d255d5ce..7d050db5b1ca 100644
--- a/drivers/platform/chrome/cros_ec_ishtp.c
+++ b/drivers/platform/chrome/cros_ec_ishtp.c
@@ -8,11 +8,10 @@
 // (ISH-TP).
 
 #include <linux/delay.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/intel-ish-client-if.h>
 
 /*
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
index 609598bbb6c3..c0f2eec35a48 100644
--- a/drivers/platform/chrome/cros_ec_lightbar.c
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -9,8 +9,9 @@
 #include <linux/fs.h>
 #include <linux/kobject.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/sched.h>
 #include <linux/types.h>
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 5939c4a5869c..7d10d909435f 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -16,9 +16,9 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/interrupt.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/suspend.h>
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 3d2325197a68..f659f96bda12 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -3,10 +3,11 @@
 //
 // Copyright (C) 2015 Google, Inc
 
-#include <linux/mfd/cros_ec.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/platform/chrome/cros_ec_rpmsg.c b/drivers/platform/chrome/cros_ec_rpmsg.c
index 520e507bfa54..9633e5417686 100644
--- a/drivers/platform/chrome/cros_ec_rpmsg.c
+++ b/drivers/platform/chrome/cros_ec_rpmsg.c
@@ -6,9 +6,9 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/of.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/rpmsg.h>
 #include <linux/slab.h>
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c
index 2e21f2776063..9006e1872942 100644
--- a/drivers/platform/chrome/cros_ec_spi.c
+++ b/drivers/platform/chrome/cros_ec_spi.c
@@ -6,9 +6,9 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/of.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
index 3edb237bf8ed..74d36b8d4f46 100644
--- a/drivers/platform/chrome/cros_ec_sysfs.c
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -9,8 +9,9 @@
 #include <linux/fs.h>
 #include <linux/kobject.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
diff --git a/drivers/platform/chrome/cros_ec_trace.c b/drivers/platform/chrome/cros_ec_trace.c
index 0a76412095a9..6f80ff4532ae 100644
--- a/drivers/platform/chrome/cros_ec_trace.c
+++ b/drivers/platform/chrome/cros_ec_trace.c
@@ -6,7 +6,7 @@
 #define TRACE_SYMBOL(a) {a, #a}
 
 // Generate the list using the following script:
-// sed -n 's/^#define \(EC_CMD_[[:alnum:]_]*\)\s.*/\tTRACE_SYMBOL(\1), \\/p' include/linux/mfd/cros_ec_commands.h
+// sed -n 's/^#define \(EC_CMD_[[:alnum:]_]*\)\s.*/\tTRACE_SYMBOL(\1), \\/p' include/linux/platform_data/cros_ec_commands.h
 #define EC_CMDS \
 	TRACE_SYMBOL(EC_CMD_PROTO_VERSION), \
 	TRACE_SYMBOL(EC_CMD_HELLO), \
diff --git a/drivers/platform/chrome/cros_ec_trace.h b/drivers/platform/chrome/cros_ec_trace.h
index 7ae3b89c78b9..0dd4df30fa89 100644
--- a/drivers/platform/chrome/cros_ec_trace.h
+++ b/drivers/platform/chrome/cros_ec_trace.h
@@ -11,8 +11,10 @@
 #if !defined(_CROS_EC_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
 #define _CROS_EC_TRACE_H_
 
+#include <linux/bits.h>
 #include <linux/types.h>
-#include <linux/mfd/cros_ec.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 
 #include <linux/tracepoint.h>
 
diff --git a/drivers/platform/chrome/cros_ec_vbc.c b/drivers/platform/chrome/cros_ec_vbc.c
index 2aaefed87eb4..f11a1283e5c8 100644
--- a/drivers/platform/chrome/cros_ec_vbc.c
+++ b/drivers/platform/chrome/cros_ec_vbc.c
@@ -7,8 +7,9 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/slab.h>
 
 #define DRV_NAME "cros-ec-vbc"
diff --git a/drivers/platform/chrome/cros_usbpd_logger.c b/drivers/platform/chrome/cros_usbpd_logger.c
index 7c7b267626a0..c549a9b49b56 100644
--- a/drivers/platform/chrome/cros_usbpd_logger.c
+++ b/drivers/platform/chrome/cros_usbpd_logger.c
@@ -6,10 +6,11 @@
  */
 
 #include <linux/ktime.h>
-#include <linux/math64.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/math64.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/power/supply/cros_usbpd-charger.c b/drivers/power/supply/cros_usbpd-charger.c
index 3a9ea94c3de3..6cc7c3910e09 100644
--- a/drivers/power/supply/cros_usbpd-charger.c
+++ b/drivers/power/supply/cros_usbpd-charger.c
@@ -5,9 +5,10 @@
  * Copyright (c) 2014 - 2018 Google, Inc
  */
 
-#include <linux/module.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/slab.h>
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index 98f6ac6cf6ab..85bea2d40b7d 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -6,8 +6,8 @@
  */
 
 #include <linux/module.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index 4d6bf9304ceb..6909e01936d9 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -6,8 +6,9 @@
 
 #include <linux/kernel.h>
 #include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..b30824615814 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -310,7 +310,6 @@ header-test-			+= linux/mfd/adp5520.h
 header-test-			+= linux/mfd/arizona/pdata.h
 header-test-			+= linux/mfd/as3711.h
 header-test-			+= linux/mfd/as3722.h
-header-test-			+= linux/mfd/cros_ec_commands.h
 header-test-			+= linux/mfd/da903x.h
 header-test-			+= linux/mfd/da9055/pdata.h
 header-test-			+= linux/mfd/da9063/pdata.h
@@ -480,6 +479,7 @@ header-test-			+= linux/platform_data/ata-pxa.h
 header-test-			+= linux/platform_data/atmel.h
 header-test-			+= linux/platform_data/bh1770glc.h
 header-test-			+= linux/platform_data/brcmfmac.h
+header-test-			+= linux/platform_data/cros_ec_commands.h
 header-test-			+= linux/platform_data/clk-u300.h
 header-test-			+= linux/platform_data/cyttsp4.h
 header-test-			+= linux/platform_data/dma-coh901318.h
diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h
index 0c636b9fe8d7..77a7d65746b6 100644
--- a/include/linux/iio/common/cros_ec_sensors_core.h
+++ b/include/linux/iio/common/cros_ec_sensors_core.h
@@ -10,7 +10,8 @@
 
 #include <linux/iio/iio.h>
 #include <linux/irqreturn.h>
-#include <linux/mfd/cros_ec.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 
 enum {
 	CROS_EC_SENSOR_X,
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 569428ad1cb1..61c2875c2a40 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -8,186 +8,7 @@
 #ifndef __LINUX_MFD_CROS_EC_H
 #define __LINUX_MFD_CROS_EC_H
 
-#include <linux/cdev.h>
 #include <linux/device.h>
-#include <linux/notifier.h>
-#include <linux/mfd/cros_ec_commands.h>
-#include <linux/mutex.h>
-
-#define CROS_EC_DEV_NAME "cros_ec"
-#define CROS_EC_DEV_FP_NAME "cros_fp"
-#define CROS_EC_DEV_PD_NAME "cros_pd"
-#define CROS_EC_DEV_TP_NAME "cros_tp"
-#define CROS_EC_DEV_ISH_NAME "cros_ish"
-#define CROS_EC_DEV_SCP_NAME "cros_scp"
-
-/*
- * The EC is unresponsive for a time after a reboot command.  Add a
- * simple delay to make sure that the bus stays locked.
- */
-#define EC_REBOOT_DELAY_MS             50
-
-/*
- * Max bus-specific overhead incurred by request/responses.
- * I2C requires 1 additional byte for requests.
- * I2C requires 2 additional bytes for responses.
- * SPI requires up to 32 additional bytes for responses.
- */
-#define EC_PROTO_VERSION_UNKNOWN	0
-#define EC_MAX_REQUEST_OVERHEAD		1
-#define EC_MAX_RESPONSE_OVERHEAD	32
-
-/*
- * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
- */
-enum {
-	EC_MSG_TX_HEADER_BYTES	= 3,
-	EC_MSG_TX_TRAILER_BYTES	= 1,
-	EC_MSG_TX_PROTO_BYTES	= EC_MSG_TX_HEADER_BYTES +
-					EC_MSG_TX_TRAILER_BYTES,
-	EC_MSG_RX_PROTO_BYTES	= 3,
-
-	/* Max length of messages for proto 2*/
-	EC_PROTO2_MSG_BYTES		= EC_PROTO2_MAX_PARAM_SIZE +
-					EC_MSG_TX_PROTO_BYTES,
-
-	EC_MAX_MSG_BYTES		= 64 * 1024,
-};
-
-/**
- * struct cros_ec_command - Information about a ChromeOS EC command.
- * @version: Command version number (often 0).
- * @command: Command to send (EC_CMD_...).
- * @outsize: Outgoing length in bytes.
- * @insize: Max number of bytes to accept from the EC.
- * @result: EC's response to the command (separate from communication failure).
- * @data: Where to put the incoming data from EC and outgoing data to EC.
- */
-struct cros_ec_command {
-	uint32_t version;
-	uint32_t command;
-	uint32_t outsize;
-	uint32_t insize;
-	uint32_t result;
-	uint8_t data[0];
-};
-
-/**
- * struct cros_ec_device - Information about a ChromeOS EC device.
- * @phys_name: Name of physical comms layer (e.g. 'i2c-4').
- * @dev: Device pointer for physical comms device
- * @was_wake_device: True if this device was set to wake the system from
- *                   sleep at the last suspend.
- * @cros_class: The class structure for this device.
- * @cmd_readmem: Direct read of the EC memory-mapped region, if supported.
- *     @offset: Is within EC_LPC_ADDR_MEMMAP region.
- *     @bytes: Number of bytes to read. zero means "read a string" (including
- *             the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be
- *             read. Caller must ensure that the buffer is large enough for the
- *             result when reading a string.
- * @max_request: Max size of message requested.
- * @max_response: Max size of message response.
- * @max_passthru: Max sice of passthru message.
- * @proto_version: The protocol version used for this device.
- * @priv: Private data.
- * @irq: Interrupt to use.
- * @id: Device id.
- * @din: Input buffer (for data from EC). This buffer will always be
- *       dword-aligned and include enough space for up to 7 word-alignment
- *       bytes also, so we can ensure that the body of the message is always
- *       dword-aligned (64-bit). We use this alignment to keep ARM and x86
- *       happy. Probably word alignment would be OK, there might be a small
- *       performance advantage to using dword.
- * @dout: Output buffer (for data to EC). This buffer will always be
- *        dword-aligned and include enough space for up to 7 word-alignment
- *        bytes also, so we can ensure that the body of the message is always
- *        dword-aligned (64-bit). We use this alignment to keep ARM and x86
- *        happy. Probably word alignment would be OK, there might be a small
- *        performance advantage to using dword.
- * @din_size: Size of din buffer to allocate (zero to use static din).
- * @dout_size: Size of dout buffer to allocate (zero to use static dout).
- * @wake_enabled: True if this device can wake the system from sleep.
- * @suspended: True if this device had been suspended.
- * @cmd_xfer: Send command to EC and get response.
- *            Returns the number of bytes received if the communication
- *            succeeded, but that doesn't mean the EC was happy with the
- *            command. The caller should check msg.result for the EC's result
- *            code.
- * @pkt_xfer: Send packet to EC and get response.
- * @lock: One transaction at a time.
- * @mkbp_event_supported: True if this EC supports the MKBP event protocol.
- * @host_sleep_v1: True if this EC supports the sleep v1 command.
- * @event_notifier: Interrupt event notifier for transport devices.
- * @event_data: Raw payload transferred with the MKBP event.
- * @event_size: Size in bytes of the event data.
- * @host_event_wake_mask: Mask of host events that cause wake from suspend.
- * @ec: The platform_device used by the mfd driver to interface with the
- *      main EC.
- * @pd: The platform_device used by the mfd driver to interface with the
- *      PD behind an EC.
- */
-struct cros_ec_device {
-	/* These are used by other drivers that want to talk to the EC */
-	const char *phys_name;
-	struct device *dev;
-	bool was_wake_device;
-	struct class *cros_class;
-	int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset,
-			   unsigned int bytes, void *dest);
-
-	/* These are used to implement the platform-specific interface */
-	u16 max_request;
-	u16 max_response;
-	u16 max_passthru;
-	u16 proto_version;
-	void *priv;
-	int irq;
-	u8 *din;
-	u8 *dout;
-	int din_size;
-	int dout_size;
-	bool wake_enabled;
-	bool suspended;
-	int (*cmd_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
-	int (*pkt_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
-	struct mutex lock;
-	bool mkbp_event_supported;
-	bool host_sleep_v1;
-	struct blocking_notifier_head event_notifier;
-
-	struct ec_response_get_next_event_v1 event_data;
-	int event_size;
-	u32 host_event_wake_mask;
-	u32 last_resume_result;
-
-	/* The platform devices used by the mfd driver */
-	struct platform_device *ec;
-	struct platform_device *pd;
-};
-
-/**
- * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information.
- * @sensor_num: Id of the sensor, as reported by the EC.
- */
-struct cros_ec_sensor_platform {
-	u8 sensor_num;
-};
-
-/**
- * struct cros_ec_platform - ChromeOS EC platform information.
- * @ec_name: Name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
- *           used in /dev/ and sysfs.
- * @cmd_offset: Offset to apply for each command. Set when
- *              registering a device behind another one.
- */
-struct cros_ec_platform {
-	const char *ec_name;
-	u16 cmd_offset;
-};
-
-struct cros_ec_debugfs;
 
 /**
  * struct cros_ec_dev - ChromeOS EC device entry point.
@@ -211,133 +32,4 @@ struct cros_ec_dev {
 
 #define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
 
-/**
- * cros_ec_suspend() - Handle a suspend operation for the ChromeOS EC device.
- * @ec_dev: Device to suspend.
- *
- * This can be called by drivers to handle a suspend event.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_suspend(struct cros_ec_device *ec_dev);
-
-/**
- * cros_ec_resume() - Handle a resume operation for the ChromeOS EC device.
- * @ec_dev: Device to resume.
- *
- * This can be called by drivers to handle a resume event.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_resume(struct cros_ec_device *ec_dev);
-
-/**
- * cros_ec_prepare_tx() - Prepare an outgoing message in the output buffer.
- * @ec_dev: Device to register.
- * @msg: Message to write.
- *
- * This is intended to be used by all ChromeOS EC drivers, but at present
- * only SPI uses it. Once LPC uses the same protocol it can start using it.
- * I2C could use it now, with a refactor of the existing code.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
-		       struct cros_ec_command *msg);
-
-/**
- * cros_ec_check_result() - Check ec_msg->result.
- * @ec_dev: EC device.
- * @msg: Message to check.
- *
- * This is used by ChromeOS EC drivers to check the ec_msg->result for
- * errors and to warn about them.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_check_result(struct cros_ec_device *ec_dev,
-			 struct cros_ec_command *msg);
-
-/**
- * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC.
- * @ec_dev: EC device.
- * @msg: Message to write.
- *
- * Call this to send a command to the ChromeOS EC.  This should be used
- * instead of calling the EC's cmd_xfer() callback directly.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
-		     struct cros_ec_command *msg);
-
-/**
- * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
- * @ec_dev: EC device.
- * @msg: Message to write.
- *
- * This function is identical to cros_ec_cmd_xfer, except it returns success
- * status only if both the command was transmitted successfully and the EC
- * replied with success status. It's not necessary to check msg->result when
- * using this function.
- *
- * Return: The number of bytes transferred on success or negative error code.
- */
-int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
-			    struct cros_ec_command *msg);
-
-/**
- * cros_ec_register() - Register a new ChromeOS EC, using the provided info.
- * @ec_dev: Device to register.
- *
- * Before calling this, allocate a pointer to a new device and then fill
- * in all the fields up to the --private-- marker.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_register(struct cros_ec_device *ec_dev);
-
-/**
- * cros_ec_unregister() - Remove a ChromeOS EC.
- * @ec_dev: Device to unregister.
- *
- * Call this to deregister a ChromeOS EC, then clean up any private data.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_unregister(struct cros_ec_device *ec_dev);
-
-/**
- * cros_ec_query_all() -  Query the protocol version supported by the
- *         ChromeOS EC.
- * @ec_dev: Device to register.
- *
- * Return: 0 on success or negative error code.
- */
-int cros_ec_query_all(struct cros_ec_device *ec_dev);
-
-/**
- * cros_ec_get_next_event() - Fetch next event from the ChromeOS EC.
- * @ec_dev: Device to fetch event from.
- * @wake_event: Pointer to a bool set to true upon return if the event might be
- *              treated as a wake event. Ignored if null.
- *
- * Return: negative error code on errors; 0 for no data; or else number of
- * bytes received (i.e., an event was retrieved successfully). Event types are
- * written out to @ec_dev->event_data.event_type on success.
- */
-int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
-
-/**
- * cros_ec_get_host_event() - Return a mask of event set by the ChromeOS EC.
- * @ec_dev: Device to fetch event from.
- *
- * When MKBP is supported, when the EC raises an interrupt, we collect the
- * events raised and call the functions in the ec notifier. This function
- * is a helper to know which events are raised.
- *
- * Return: 0 on error or non-zero bitmask of one or more EC_HOST_EVENT_*.
- */
-u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
-
 #endif /* __LINUX_MFD_CROS_EC_H */
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
deleted file mode 100644
index 7ccb8757b79d..000000000000
--- a/include/linux/mfd/cros_ec_commands.h
+++ /dev/null
@@ -1,5713 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Host communication command constants for ChromeOS EC
- *
- * Copyright (C) 2012 Google, Inc
- *
- * NOTE: This file is auto-generated from ChromeOS EC Open Source code from
- * https://chromium.googlesource.com/chromiumos/platform/ec/+/master/include/ec_commands.h
- */
-
-/* Host communication command constants for Chrome EC */
-
-#ifndef __CROS_EC_COMMANDS_H
-#define __CROS_EC_COMMANDS_H
-
-
-
-
-#define BUILD_ASSERT(_cond)
-
-/*
- * Current version of this protocol
- *
- * TODO(crosbug.com/p/11223): This is effectively useless; protocol is
- * determined in other ways.  Remove this once the kernel code no longer
- * depends on it.
- */
-#define EC_PROTO_VERSION          0x00000002
-
-/* Command version mask */
-#define EC_VER_MASK(version) BIT(version)
-
-/* I/O addresses for ACPI commands */
-#define EC_LPC_ADDR_ACPI_DATA  0x62
-#define EC_LPC_ADDR_ACPI_CMD   0x66
-
-/* I/O addresses for host command */
-#define EC_LPC_ADDR_HOST_DATA  0x200
-#define EC_LPC_ADDR_HOST_CMD   0x204
-
-/* I/O addresses for host command args and params */
-/* Protocol version 2 */
-#define EC_LPC_ADDR_HOST_ARGS    0x800  /* And 0x801, 0x802, 0x803 */
-#define EC_LPC_ADDR_HOST_PARAM   0x804  /* For version 2 params; size is
-					 * EC_PROTO2_MAX_PARAM_SIZE
-					 */
-/* Protocol version 3 */
-#define EC_LPC_ADDR_HOST_PACKET  0x800  /* Offset of version 3 packet */
-#define EC_LPC_HOST_PACKET_SIZE  0x100  /* Max size of version 3 packet */
-
-/*
- * The actual block is 0x800-0x8ff, but some BIOSes think it's 0x880-0x8ff
- * and they tell the kernel that so we have to think of it as two parts.
- */
-#define EC_HOST_CMD_REGION0    0x800
-#define EC_HOST_CMD_REGION1    0x880
-#define EC_HOST_CMD_REGION_SIZE 0x80
-
-/* EC command register bit functions */
-#define EC_LPC_CMDR_DATA	BIT(0)  /* Data ready for host to read */
-#define EC_LPC_CMDR_PENDING	BIT(1)  /* Write pending to EC */
-#define EC_LPC_CMDR_BUSY	BIT(2)  /* EC is busy processing a command */
-#define EC_LPC_CMDR_CMD		BIT(3)  /* Last host write was a command */
-#define EC_LPC_CMDR_ACPI_BRST	BIT(4)  /* Burst mode (not used) */
-#define EC_LPC_CMDR_SCI		BIT(5)  /* SCI event is pending */
-#define EC_LPC_CMDR_SMI		BIT(6)  /* SMI event is pending */
-
-#define EC_LPC_ADDR_MEMMAP       0x900
-#define EC_MEMMAP_SIZE         255 /* ACPI IO buffer max is 255 bytes */
-#define EC_MEMMAP_TEXT_MAX     8   /* Size of a string in the memory map */
-
-/* The offset address of each type of data in mapped memory. */
-#define EC_MEMMAP_TEMP_SENSOR      0x00 /* Temp sensors 0x00 - 0x0f */
-#define EC_MEMMAP_FAN              0x10 /* Fan speeds 0x10 - 0x17 */
-#define EC_MEMMAP_TEMP_SENSOR_B    0x18 /* More temp sensors 0x18 - 0x1f */
-#define EC_MEMMAP_ID               0x20 /* 0x20 == 'E', 0x21 == 'C' */
-#define EC_MEMMAP_ID_VERSION       0x22 /* Version of data in 0x20 - 0x2f */
-#define EC_MEMMAP_THERMAL_VERSION  0x23 /* Version of data in 0x00 - 0x1f */
-#define EC_MEMMAP_BATTERY_VERSION  0x24 /* Version of data in 0x40 - 0x7f */
-#define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */
-#define EC_MEMMAP_EVENTS_VERSION   0x26 /* Version of data in 0x34 - 0x3f */
-#define EC_MEMMAP_HOST_CMD_FLAGS   0x27 /* Host cmd interface flags (8 bits) */
-/* Unused 0x28 - 0x2f */
-#define EC_MEMMAP_SWITCHES         0x30	/* 8 bits */
-/* Unused 0x31 - 0x33 */
-#define EC_MEMMAP_HOST_EVENTS      0x34 /* 64 bits */
-/* Battery values are all 32 bits, unless otherwise noted. */
-#define EC_MEMMAP_BATT_VOLT        0x40 /* Battery Present Voltage */
-#define EC_MEMMAP_BATT_RATE        0x44 /* Battery Present Rate */
-#define EC_MEMMAP_BATT_CAP         0x48 /* Battery Remaining Capacity */
-#define EC_MEMMAP_BATT_FLAG        0x4c /* Battery State, see below (8-bit) */
-#define EC_MEMMAP_BATT_COUNT       0x4d /* Battery Count (8-bit) */
-#define EC_MEMMAP_BATT_INDEX       0x4e /* Current Battery Data Index (8-bit) */
-/* Unused 0x4f */
-#define EC_MEMMAP_BATT_DCAP        0x50 /* Battery Design Capacity */
-#define EC_MEMMAP_BATT_DVLT        0x54 /* Battery Design Voltage */
-#define EC_MEMMAP_BATT_LFCC        0x58 /* Battery Last Full Charge Capacity */
-#define EC_MEMMAP_BATT_CCNT        0x5c /* Battery Cycle Count */
-/* Strings are all 8 bytes (EC_MEMMAP_TEXT_MAX) */
-#define EC_MEMMAP_BATT_MFGR        0x60 /* Battery Manufacturer String */
-#define EC_MEMMAP_BATT_MODEL       0x68 /* Battery Model Number String */
-#define EC_MEMMAP_BATT_SERIAL      0x70 /* Battery Serial Number String */
-#define EC_MEMMAP_BATT_TYPE        0x78 /* Battery Type String */
-#define EC_MEMMAP_ALS              0x80 /* ALS readings in lux (2 X 16 bits) */
-/* Unused 0x84 - 0x8f */
-#define EC_MEMMAP_ACC_STATUS       0x90 /* Accelerometer status (8 bits )*/
-/* Unused 0x91 */
-#define EC_MEMMAP_ACC_DATA         0x92 /* Accelerometers data 0x92 - 0x9f */
-/* 0x92: Lid Angle if available, LID_ANGLE_UNRELIABLE otherwise */
-/* 0x94 - 0x99: 1st Accelerometer */
-/* 0x9a - 0x9f: 2nd Accelerometer */
-#define EC_MEMMAP_GYRO_DATA        0xa0 /* Gyroscope data 0xa0 - 0xa5 */
-/* Unused 0xa6 - 0xdf */
-
-/*
- * ACPI is unable to access memory mapped data at or above this offset due to
- * limitations of the ACPI protocol. Do not place data in the range 0xe0 - 0xfe
- * which might be needed by ACPI.
- */
-#define EC_MEMMAP_NO_ACPI 0xe0
-
-/* Define the format of the accelerometer mapped memory status byte. */
-#define EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK  0x0f
-#define EC_MEMMAP_ACC_STATUS_BUSY_BIT        BIT(4)
-#define EC_MEMMAP_ACC_STATUS_PRESENCE_BIT    BIT(7)
-
-/* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */
-#define EC_TEMP_SENSOR_ENTRIES     16
-/*
- * Number of temp sensors at EC_MEMMAP_TEMP_SENSOR_B.
- *
- * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2.
- */
-#define EC_TEMP_SENSOR_B_ENTRIES      8
-
-/* Special values for mapped temperature sensors */
-#define EC_TEMP_SENSOR_NOT_PRESENT    0xff
-#define EC_TEMP_SENSOR_ERROR          0xfe
-#define EC_TEMP_SENSOR_NOT_POWERED    0xfd
-#define EC_TEMP_SENSOR_NOT_CALIBRATED 0xfc
-/*
- * The offset of temperature value stored in mapped memory.  This allows
- * reporting a temperature range of 200K to 454K = -73C to 181C.
- */
-#define EC_TEMP_SENSOR_OFFSET      200
-
-/*
- * Number of ALS readings at EC_MEMMAP_ALS
- */
-#define EC_ALS_ENTRIES             2
-
-/*
- * The default value a temperature sensor will return when it is present but
- * has not been read this boot.  This is a reasonable number to avoid
- * triggering alarms on the host.
- */
-#define EC_TEMP_SENSOR_DEFAULT     (296 - EC_TEMP_SENSOR_OFFSET)
-
-#define EC_FAN_SPEED_ENTRIES       4       /* Number of fans at EC_MEMMAP_FAN */
-#define EC_FAN_SPEED_NOT_PRESENT   0xffff  /* Entry not present */
-#define EC_FAN_SPEED_STALLED       0xfffe  /* Fan stalled */
-
-/* Battery bit flags at EC_MEMMAP_BATT_FLAG. */
-#define EC_BATT_FLAG_AC_PRESENT   0x01
-#define EC_BATT_FLAG_BATT_PRESENT 0x02
-#define EC_BATT_FLAG_DISCHARGING  0x04
-#define EC_BATT_FLAG_CHARGING     0x08
-#define EC_BATT_FLAG_LEVEL_CRITICAL 0x10
-/* Set if some of the static/dynamic data is invalid (or outdated). */
-#define EC_BATT_FLAG_INVALID_DATA 0x20
-
-/* Switch flags at EC_MEMMAP_SWITCHES */
-#define EC_SWITCH_LID_OPEN               0x01
-#define EC_SWITCH_POWER_BUTTON_PRESSED   0x02
-#define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04
-/* Was recovery requested via keyboard; now unused. */
-#define EC_SWITCH_IGNORE1		 0x08
-/* Recovery requested via dedicated signal (from servo board) */
-#define EC_SWITCH_DEDICATED_RECOVERY     0x10
-/* Was fake developer mode switch; now unused.  Remove in next refactor. */
-#define EC_SWITCH_IGNORE0                0x20
-
-/* Host command interface flags */
-/* Host command interface supports LPC args (LPC interface only) */
-#define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED  0x01
-/* Host command interface supports version 3 protocol */
-#define EC_HOST_CMD_FLAG_VERSION_3   0x02
-
-/* Wireless switch flags */
-#define EC_WIRELESS_SWITCH_ALL       ~0x00  /* All flags */
-#define EC_WIRELESS_SWITCH_WLAN       0x01  /* WLAN radio */
-#define EC_WIRELESS_SWITCH_BLUETOOTH  0x02  /* Bluetooth radio */
-#define EC_WIRELESS_SWITCH_WWAN       0x04  /* WWAN power */
-#define EC_WIRELESS_SWITCH_WLAN_POWER 0x08  /* WLAN power */
-
-/*****************************************************************************/
-/*
- * ACPI commands
- *
- * These are valid ONLY on the ACPI command/data port.
- */
-
-/*
- * ACPI Read Embedded Controller
- *
- * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
- *
- * Use the following sequence:
- *
- *    - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD
- *    - Wait for EC_LPC_CMDR_PENDING bit to clear
- *    - Write address to EC_LPC_ADDR_ACPI_DATA
- *    - Wait for EC_LPC_CMDR_DATA bit to set
- *    - Read value from EC_LPC_ADDR_ACPI_DATA
- */
-#define EC_CMD_ACPI_READ 0x0080
-
-/*
- * ACPI Write Embedded Controller
- *
- * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
- *
- * Use the following sequence:
- *
- *    - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD
- *    - Wait for EC_LPC_CMDR_PENDING bit to clear
- *    - Write address to EC_LPC_ADDR_ACPI_DATA
- *    - Wait for EC_LPC_CMDR_PENDING bit to clear
- *    - Write value to EC_LPC_ADDR_ACPI_DATA
- */
-#define EC_CMD_ACPI_WRITE 0x0081
-
-/*
- * ACPI Burst Enable Embedded Controller
- *
- * This enables burst mode on the EC to allow the host to issue several
- * commands back-to-back. While in this mode, writes to mapped multi-byte
- * data are locked out to ensure data consistency.
- */
-#define EC_CMD_ACPI_BURST_ENABLE 0x0082
-
-/*
- * ACPI Burst Disable Embedded Controller
- *
- * This disables burst mode on the EC and stops preventing EC writes to mapped
- * multi-byte data.
- */
-#define EC_CMD_ACPI_BURST_DISABLE 0x0083
-
-/*
- * ACPI Query Embedded Controller
- *
- * This clears the lowest-order bit in the currently pending host events, and
- * sets the result code to the 1-based index of the bit (event 0x00000001 = 1,
- * event 0x80000000 = 32), or 0 if no event was pending.
- */
-#define EC_CMD_ACPI_QUERY_EVENT 0x0084
-
-/* Valid addresses in ACPI memory space, for read/write commands */
-
-/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */
-#define EC_ACPI_MEM_VERSION            0x00
-/*
- * Test location; writing value here updates test compliment byte to (0xff -
- * value).
- */
-#define EC_ACPI_MEM_TEST               0x01
-/* Test compliment; writes here are ignored. */
-#define EC_ACPI_MEM_TEST_COMPLIMENT    0x02
-
-/* Keyboard backlight brightness percent (0 - 100) */
-#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03
-/* DPTF Target Fan Duty (0-100, 0xff for auto/none) */
-#define EC_ACPI_MEM_FAN_DUTY           0x04
-
-/*
- * DPTF temp thresholds. Any of the EC's temp sensors can have up to two
- * independent thresholds attached to them. The current value of the ID
- * register determines which sensor is affected by the THRESHOLD and COMMIT
- * registers. The THRESHOLD register uses the same EC_TEMP_SENSOR_OFFSET scheme
- * as the memory-mapped sensors. The COMMIT register applies those settings.
- *
- * The spec does not mandate any way to read back the threshold settings
- * themselves, but when a threshold is crossed the AP needs a way to determine
- * which sensor(s) are responsible. Each reading of the ID register clears and
- * returns one sensor ID that has crossed one of its threshold (in either
- * direction) since the last read. A value of 0xFF means "no new thresholds
- * have tripped". Setting or enabling the thresholds for a sensor will clear
- * the unread event count for that sensor.
- */
-#define EC_ACPI_MEM_TEMP_ID            0x05
-#define EC_ACPI_MEM_TEMP_THRESHOLD     0x06
-#define EC_ACPI_MEM_TEMP_COMMIT        0x07
-/*
- * Here are the bits for the COMMIT register:
- *   bit 0 selects the threshold index for the chosen sensor (0/1)
- *   bit 1 enables/disables the selected threshold (0 = off, 1 = on)
- * Each write to the commit register affects one threshold.
- */
-#define EC_ACPI_MEM_TEMP_COMMIT_SELECT_MASK BIT(0)
-#define EC_ACPI_MEM_TEMP_COMMIT_ENABLE_MASK BIT(1)
-/*
- * Example:
- *
- * Set the thresholds for sensor 2 to 50 C and 60 C:
- *   write 2 to [0x05]      --  select temp sensor 2
- *   write 0x7b to [0x06]   --  C_TO_K(50) - EC_TEMP_SENSOR_OFFSET
- *   write 0x2 to [0x07]    --  enable threshold 0 with this value
- *   write 0x85 to [0x06]   --  C_TO_K(60) - EC_TEMP_SENSOR_OFFSET
- *   write 0x3 to [0x07]    --  enable threshold 1 with this value
- *
- * Disable the 60 C threshold, leaving the 50 C threshold unchanged:
- *   write 2 to [0x05]      --  select temp sensor 2
- *   write 0x1 to [0x07]    --  disable threshold 1
- */
-
-/* DPTF battery charging current limit */
-#define EC_ACPI_MEM_CHARGING_LIMIT     0x08
-
-/* Charging limit is specified in 64 mA steps */
-#define EC_ACPI_MEM_CHARGING_LIMIT_STEP_MA   64
-/* Value to disable DPTF battery charging limit */
-#define EC_ACPI_MEM_CHARGING_LIMIT_DISABLED  0xff
-
-/*
- * Report device orientation
- *  Bits       Definition
- *  3:1        Device DPTF Profile Number (DDPN)
- *               0   = Reserved for backward compatibility (indicates no valid
- *                     profile number. Host should fall back to using TBMD).
- *              1..7 = DPTF Profile number to indicate to host which table needs
- *                     to be loaded.
- *   0         Tablet Mode Device Indicator (TBMD)
- */
-#define EC_ACPI_MEM_DEVICE_ORIENTATION 0x09
-#define EC_ACPI_MEM_TBMD_SHIFT         0
-#define EC_ACPI_MEM_TBMD_MASK          0x1
-#define EC_ACPI_MEM_DDPN_SHIFT         1
-#define EC_ACPI_MEM_DDPN_MASK          0x7
-
-/*
- * Report device features. Uses the same format as the host command, except:
- *
- * bit 0 (EC_FEATURE_LIMITED) changes meaning from "EC code has a limited set
- * of features", which is of limited interest when the system is already
- * interpreting ACPI bytecode, to "EC_FEATURES[0-7] is not supported". Since
- * these are supported, it defaults to 0.
- * This allows detecting the presence of this field since older versions of
- * the EC codebase would simply return 0xff to that unknown address. Check
- * FEATURES0 != 0xff (or FEATURES0[0] == 0) to make sure that the other bits
- * are valid.
- */
-#define EC_ACPI_MEM_DEVICE_FEATURES0 0x0a
-#define EC_ACPI_MEM_DEVICE_FEATURES1 0x0b
-#define EC_ACPI_MEM_DEVICE_FEATURES2 0x0c
-#define EC_ACPI_MEM_DEVICE_FEATURES3 0x0d
-#define EC_ACPI_MEM_DEVICE_FEATURES4 0x0e
-#define EC_ACPI_MEM_DEVICE_FEATURES5 0x0f
-#define EC_ACPI_MEM_DEVICE_FEATURES6 0x10
-#define EC_ACPI_MEM_DEVICE_FEATURES7 0x11
-
-#define EC_ACPI_MEM_BATTERY_INDEX    0x12
-
-/*
- * USB Port Power. Each bit indicates whether the corresponding USB ports' power
- * is enabled (1) or disabled (0).
- *   bit 0 USB port ID 0
- *   ...
- *   bit 7 USB port ID 7
- */
-#define EC_ACPI_MEM_USB_PORT_POWER 0x13
-
-/*
- * ACPI addresses 0x20 - 0xff map to EC_MEMMAP offset 0x00 - 0xdf.  This data
- * is read-only from the AP.  Added in EC_ACPI_MEM_VERSION 2.
- */
-#define EC_ACPI_MEM_MAPPED_BEGIN   0x20
-#define EC_ACPI_MEM_MAPPED_SIZE    0xe0
-
-/* Current version of ACPI memory address space */
-#define EC_ACPI_MEM_VERSION_CURRENT 2
-
-
-/*
- * This header file is used in coreboot both in C and ACPI code.  The ACPI code
- * is pre-processed to handle constants but the ASL compiler is unable to
- * handle actual C code so keep it separate.
- */
-
-
-/*
- * Attributes for EC request and response packets.  Just defining __packed
- * results in inefficient assembly code on ARM, if the structure is actually
- * 32-bit aligned, as it should be for all buffers.
- *
- * Be very careful when adding these to existing structures.  They will round
- * up the structure size to the specified boundary.
- *
- * Also be very careful to make that if a structure is included in some other
- * parent structure that the alignment will still be true given the packing of
- * the parent structure.  This is particularly important if the sub-structure
- * will be passed as a pointer to another function, since that function will
- * not know about the misaligment caused by the parent structure's packing.
- *
- * Also be very careful using __packed - particularly when nesting non-packed
- * structures inside packed ones.  In fact, DO NOT use __packed directly;
- * always use one of these attributes.
- *
- * Once everything is annotated properly, the following search strings should
- * not return ANY matches in this file other than right here:
- *
- * "__packed" - generates inefficient code; all sub-structs must also be packed
- *
- * "struct [^_]" - all structs should be annotated, except for structs that are
- * members of other structs/unions (and their original declarations should be
- * annotated).
- */
-
-/*
- * Packed structures make no assumption about alignment, so they do inefficient
- * byte-wise reads.
- */
-#define __ec_align1 __packed
-#define __ec_align2 __packed
-#define __ec_align4 __packed
-#define __ec_align_size1 __packed
-#define __ec_align_offset1 __packed
-#define __ec_align_offset2 __packed
-#define __ec_todo_packed __packed
-#define __ec_todo_unpacked
-
-
-/* LPC command status byte masks */
-/* EC has written a byte in the data register and host hasn't read it yet */
-#define EC_LPC_STATUS_TO_HOST     0x01
-/* Host has written a command/data byte and the EC hasn't read it yet */
-#define EC_LPC_STATUS_FROM_HOST   0x02
-/* EC is processing a command */
-#define EC_LPC_STATUS_PROCESSING  0x04
-/* Last write to EC was a command, not data */
-#define EC_LPC_STATUS_LAST_CMD    0x08
-/* EC is in burst mode */
-#define EC_LPC_STATUS_BURST_MODE  0x10
-/* SCI event is pending (requesting SCI query) */
-#define EC_LPC_STATUS_SCI_PENDING 0x20
-/* SMI event is pending (requesting SMI query) */
-#define EC_LPC_STATUS_SMI_PENDING 0x40
-/* (reserved) */
-#define EC_LPC_STATUS_RESERVED    0x80
-
-/*
- * EC is busy.  This covers both the EC processing a command, and the host has
- * written a new command but the EC hasn't picked it up yet.
- */
-#define EC_LPC_STATUS_BUSY_MASK \
-	(EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING)
-
-/*
- * Host command response codes (16-bit).  Note that response codes should be
- * stored in a uint16_t rather than directly in a value of this type.
- */
-enum ec_status {
-	EC_RES_SUCCESS = 0,
-	EC_RES_INVALID_COMMAND = 1,
-	EC_RES_ERROR = 2,
-	EC_RES_INVALID_PARAM = 3,
-	EC_RES_ACCESS_DENIED = 4,
-	EC_RES_INVALID_RESPONSE = 5,
-	EC_RES_INVALID_VERSION = 6,
-	EC_RES_INVALID_CHECKSUM = 7,
-	EC_RES_IN_PROGRESS = 8,		/* Accepted, command in progress */
-	EC_RES_UNAVAILABLE = 9,		/* No response available */
-	EC_RES_TIMEOUT = 10,		/* We got a timeout */
-	EC_RES_OVERFLOW = 11,		/* Table / data overflow */
-	EC_RES_INVALID_HEADER = 12,     /* Header contains invalid data */
-	EC_RES_REQUEST_TRUNCATED = 13,  /* Didn't get the entire request */
-	EC_RES_RESPONSE_TOO_BIG = 14,   /* Response was too big to handle */
-	EC_RES_BUS_ERROR = 15,		/* Communications bus error */
-	EC_RES_BUSY = 16,		/* Up but too busy.  Should retry */
-	EC_RES_INVALID_HEADER_VERSION = 17,  /* Header version invalid */
-	EC_RES_INVALID_HEADER_CRC = 18,      /* Header CRC invalid */
-	EC_RES_INVALID_DATA_CRC = 19,        /* Data CRC invalid */
-	EC_RES_DUP_UNAVAILABLE = 20,         /* Can't resend response */
-};
-
-/*
- * Host event codes.  Note these are 1-based, not 0-based, because ACPI query
- * EC command uses code 0 to mean "no event pending".  We explicitly specify
- * each value in the enum listing so they won't change if we delete/insert an
- * item or rearrange the list (it needs to be stable across platforms, not
- * just within a single compiled instance).
- */
-enum host_event_code {
-	EC_HOST_EVENT_LID_CLOSED = 1,
-	EC_HOST_EVENT_LID_OPEN = 2,
-	EC_HOST_EVENT_POWER_BUTTON = 3,
-	EC_HOST_EVENT_AC_CONNECTED = 4,
-	EC_HOST_EVENT_AC_DISCONNECTED = 5,
-	EC_HOST_EVENT_BATTERY_LOW = 6,
-	EC_HOST_EVENT_BATTERY_CRITICAL = 7,
-	EC_HOST_EVENT_BATTERY = 8,
-	EC_HOST_EVENT_THERMAL_THRESHOLD = 9,
-	/* Event generated by a device attached to the EC */
-	EC_HOST_EVENT_DEVICE = 10,
-	EC_HOST_EVENT_THERMAL = 11,
-	EC_HOST_EVENT_USB_CHARGER = 12,
-	EC_HOST_EVENT_KEY_PRESSED = 13,
-	/*
-	 * EC has finished initializing the host interface.  The host can check
-	 * for this event following sending a EC_CMD_REBOOT_EC command to
-	 * determine when the EC is ready to accept subsequent commands.
-	 */
-	EC_HOST_EVENT_INTERFACE_READY = 14,
-	/* Keyboard recovery combo has been pressed */
-	EC_HOST_EVENT_KEYBOARD_RECOVERY = 15,
-
-	/* Shutdown due to thermal overload */
-	EC_HOST_EVENT_THERMAL_SHUTDOWN = 16,
-	/* Shutdown due to battery level too low */
-	EC_HOST_EVENT_BATTERY_SHUTDOWN = 17,
-
-	/* Suggest that the AP throttle itself */
-	EC_HOST_EVENT_THROTTLE_START = 18,
-	/* Suggest that the AP resume normal speed */
-	EC_HOST_EVENT_THROTTLE_STOP = 19,
-
-	/* Hang detect logic detected a hang and host event timeout expired */
-	EC_HOST_EVENT_HANG_DETECT = 20,
-	/* Hang detect logic detected a hang and warm rebooted the AP */
-	EC_HOST_EVENT_HANG_REBOOT = 21,
-
-	/* PD MCU triggering host event */
-	EC_HOST_EVENT_PD_MCU = 22,
-
-	/* Battery Status flags have changed */
-	EC_HOST_EVENT_BATTERY_STATUS = 23,
-
-	/* EC encountered a panic, triggering a reset */
-	EC_HOST_EVENT_PANIC = 24,
-
-	/* Keyboard fastboot combo has been pressed */
-	EC_HOST_EVENT_KEYBOARD_FASTBOOT = 25,
-
-	/* EC RTC event occurred */
-	EC_HOST_EVENT_RTC = 26,
-
-	/* Emulate MKBP event */
-	EC_HOST_EVENT_MKBP = 27,
-
-	/* EC desires to change state of host-controlled USB mux */
-	EC_HOST_EVENT_USB_MUX = 28,
-
-	/* TABLET/LAPTOP mode or detachable base attach/detach event */
-	EC_HOST_EVENT_MODE_CHANGE = 29,
-
-	/* Keyboard recovery combo with hardware reinitialization */
-	EC_HOST_EVENT_KEYBOARD_RECOVERY_HW_REINIT = 30,
-
-	/*
-	 * The high bit of the event mask is not used as a host event code.  If
-	 * it reads back as set, then the entire event mask should be
-	 * considered invalid by the host.  This can happen when reading the
-	 * raw event status via EC_MEMMAP_HOST_EVENTS but the LPC interface is
-	 * not initialized on the EC, or improperly configured on the host.
-	 */
-	EC_HOST_EVENT_INVALID = 32
-};
-/* Host event mask */
-#define EC_HOST_EVENT_MASK(event_code) BIT_ULL((event_code) - 1)
-
-/**
- * struct ec_lpc_host_args - Arguments at EC_LPC_ADDR_HOST_ARGS
- * @flags: The host argument flags.
- * @command_version: Command version.
- * @data_size: The length of data.
- * @checksum: Checksum; sum of command + flags + command_version + data_size +
- *            all params/response data bytes.
- */
-struct ec_lpc_host_args {
-	uint8_t flags;
-	uint8_t command_version;
-	uint8_t data_size;
-	uint8_t checksum;
-} __ec_align4;
-
-/* Flags for ec_lpc_host_args.flags */
-/*
- * Args are from host.  Data area at EC_LPC_ADDR_HOST_PARAM contains command
- * params.
- *
- * If EC gets a command and this flag is not set, this is an old-style command.
- * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with
- * unknown length.  EC must respond with an old-style response (that is,
- * without setting EC_HOST_ARGS_FLAG_TO_HOST).
- */
-#define EC_HOST_ARGS_FLAG_FROM_HOST 0x01
-/*
- * Args are from EC.  Data area at EC_LPC_ADDR_HOST_PARAM contains response.
- *
- * If EC responds to a command and this flag is not set, this is an old-style
- * response.  Command version is 0 and response data from EC is at
- * EC_LPC_ADDR_OLD_PARAM with unknown length.
- */
-#define EC_HOST_ARGS_FLAG_TO_HOST   0x02
-
-/*****************************************************************************/
-/*
- * Byte codes returned by EC over SPI interface.
- *
- * These can be used by the AP to debug the EC interface, and to determine
- * when the EC is not in a state where it will ever get around to responding
- * to the AP.
- *
- * Example of sequence of bytes read from EC for a current good transfer:
- *   1. -                  - AP asserts chip select (CS#)
- *   2. EC_SPI_OLD_READY   - AP sends first byte(s) of request
- *   3. -                  - EC starts handling CS# interrupt
- *   4. EC_SPI_RECEIVING   - AP sends remaining byte(s) of request
- *   5. EC_SPI_PROCESSING  - EC starts processing request; AP is clocking in
- *                           bytes looking for EC_SPI_FRAME_START
- *   6. -                  - EC finishes processing and sets up response
- *   7. EC_SPI_FRAME_START - AP reads frame byte
- *   8. (response packet)  - AP reads response packet
- *   9. EC_SPI_PAST_END    - Any additional bytes read by AP
- *   10 -                  - AP deasserts chip select
- *   11 -                  - EC processes CS# interrupt and sets up DMA for
- *                           next request
- *
- * If the AP is waiting for EC_SPI_FRAME_START and sees any value other than
- * the following byte values:
- *   EC_SPI_OLD_READY
- *   EC_SPI_RX_READY
- *   EC_SPI_RECEIVING
- *   EC_SPI_PROCESSING
- *
- * Then the EC found an error in the request, or was not ready for the request
- * and lost data.  The AP should give up waiting for EC_SPI_FRAME_START,
- * because the EC is unable to tell when the AP is done sending its request.
- */
-
-/*
- * Framing byte which precedes a response packet from the EC.  After sending a
- * request, the AP will clock in bytes until it sees the framing byte, then
- * clock in the response packet.
- */
-#define EC_SPI_FRAME_START    0xec
-
-/*
- * Padding bytes which are clocked out after the end of a response packet.
- */
-#define EC_SPI_PAST_END       0xed
-
-/*
- * EC is ready to receive, and has ignored the byte sent by the AP.  EC expects
- * that the AP will send a valid packet header (starting with
- * EC_COMMAND_PROTOCOL_3) in the next 32 bytes.
- */
-#define EC_SPI_RX_READY       0xf8
-
-/*
- * EC has started receiving the request from the AP, but hasn't started
- * processing it yet.
- */
-#define EC_SPI_RECEIVING      0xf9
-
-/* EC has received the entire request from the AP and is processing it. */
-#define EC_SPI_PROCESSING     0xfa
-
-/*
- * EC received bad data from the AP, such as a packet header with an invalid
- * length.  EC will ignore all data until chip select deasserts.
- */
-#define EC_SPI_RX_BAD_DATA    0xfb
-
-/*
- * EC received data from the AP before it was ready.  That is, the AP asserted
- * chip select and started clocking data before the EC was ready to receive it.
- * EC will ignore all data until chip select deasserts.
- */
-#define EC_SPI_NOT_READY      0xfc
-
-/*
- * EC was ready to receive a request from the AP.  EC has treated the byte sent
- * by the AP as part of a request packet, or (for old-style ECs) is processing
- * a fully received packet but is not ready to respond yet.
- */
-#define EC_SPI_OLD_READY      0xfd
-
-/*****************************************************************************/
-
-/*
- * Protocol version 2 for I2C and SPI send a request this way:
- *
- *	0	EC_CMD_VERSION0 + (command version)
- *	1	Command number
- *	2	Length of params = N
- *	3..N+2	Params, if any
- *	N+3	8-bit checksum of bytes 0..N+2
- *
- * The corresponding response is:
- *
- *	0	Result code (EC_RES_*)
- *	1	Length of params = M
- *	2..M+1	Params, if any
- *	M+2	8-bit checksum of bytes 0..M+1
- */
-#define EC_PROTO2_REQUEST_HEADER_BYTES 3
-#define EC_PROTO2_REQUEST_TRAILER_BYTES 1
-#define EC_PROTO2_REQUEST_OVERHEAD (EC_PROTO2_REQUEST_HEADER_BYTES +	\
-				    EC_PROTO2_REQUEST_TRAILER_BYTES)
-
-#define EC_PROTO2_RESPONSE_HEADER_BYTES 2
-#define EC_PROTO2_RESPONSE_TRAILER_BYTES 1
-#define EC_PROTO2_RESPONSE_OVERHEAD (EC_PROTO2_RESPONSE_HEADER_BYTES +	\
-				     EC_PROTO2_RESPONSE_TRAILER_BYTES)
-
-/* Parameter length was limited by the LPC interface */
-#define EC_PROTO2_MAX_PARAM_SIZE 0xfc
-
-/* Maximum request and response packet sizes for protocol version 2 */
-#define EC_PROTO2_MAX_REQUEST_SIZE (EC_PROTO2_REQUEST_OVERHEAD +	\
-				    EC_PROTO2_MAX_PARAM_SIZE)
-#define EC_PROTO2_MAX_RESPONSE_SIZE (EC_PROTO2_RESPONSE_OVERHEAD +	\
-				     EC_PROTO2_MAX_PARAM_SIZE)
-
-/*****************************************************************************/
-
-/*
- * Value written to legacy command port / prefix byte to indicate protocol
- * 3+ structs are being used.  Usage is bus-dependent.
- */
-#define EC_COMMAND_PROTOCOL_3 0xda
-
-#define EC_HOST_REQUEST_VERSION 3
-
-/**
- * struct ec_host_request - Version 3 request from host.
- * @struct_version: Should be 3. The EC will return EC_RES_INVALID_HEADER if it
- *                  receives a header with a version it doesn't know how to
- *                  parse.
- * @checksum: Checksum of request and data; sum of all bytes including checksum
- *            should total to 0.
- * @command: Command to send (EC_CMD_...)
- * @command_version: Command version.
- * @reserved: Unused byte in current protocol version; set to 0.
- * @data_len: Length of data which follows this header.
- */
-struct ec_host_request {
-	uint8_t struct_version;
-	uint8_t checksum;
-	uint16_t command;
-	uint8_t command_version;
-	uint8_t reserved;
-	uint16_t data_len;
-} __ec_align4;
-
-#define EC_HOST_RESPONSE_VERSION 3
-
-/**
- * struct ec_host_response - Version 3 response from EC.
- * @struct_version: Struct version (=3).
- * @checksum: Checksum of response and data; sum of all bytes including
- *            checksum should total to 0.
- * @result: EC's response to the command (separate from communication failure)
- * @data_len: Length of data which follows this header.
- * @reserved: Unused bytes in current protocol version; set to 0.
- */
-struct ec_host_response {
-	uint8_t struct_version;
-	uint8_t checksum;
-	uint16_t result;
-	uint16_t data_len;
-	uint16_t reserved;
-} __ec_align4;
-
-/*****************************************************************************/
-
-/*
- * Host command protocol V4.
- *
- * Packets always start with a request or response header.  They are followed
- * by data_len bytes of data.  If the data_crc_present flag is set, the data
- * bytes are followed by a CRC-8 of that data, using using x^8 + x^2 + x + 1
- * polynomial.
- *
- * Host algorithm when sending a request q:
- *
- * 101) tries_left=(some value, e.g. 3);
- * 102) q.seq_num++
- * 103) q.seq_dup=0
- * 104) Calculate q.header_crc.
- * 105) Send request q to EC.
- * 106) Wait for response r.  Go to 201 if received or 301 if timeout.
- *
- * 201) If r.struct_version != 4, go to 301.
- * 202) If r.header_crc mismatches calculated CRC for r header, go to 301.
- * 203) If r.data_crc_present and r.data_crc mismatches, go to 301.
- * 204) If r.seq_num != q.seq_num, go to 301.
- * 205) If r.seq_dup == q.seq_dup, return success.
- * 207) If r.seq_dup == 1, go to 301.
- * 208) Return error.
- *
- * 301) If --tries_left <= 0, return error.
- * 302) If q.seq_dup == 1, go to 105.
- * 303) q.seq_dup = 1
- * 304) Go to 104.
- *
- * EC algorithm when receiving a request q.
- * EC has response buffer r, error buffer e.
- *
- * 101) If q.struct_version != 4, set e.result = EC_RES_INVALID_HEADER_VERSION
- *      and go to 301
- * 102) If q.header_crc mismatches calculated CRC, set e.result =
- *      EC_RES_INVALID_HEADER_CRC and go to 301
- * 103) If q.data_crc_present, calculate data CRC.  If that mismatches the CRC
- *      byte at the end of the packet, set e.result = EC_RES_INVALID_DATA_CRC
- *      and go to 301.
- * 104) If q.seq_dup == 0, go to 201.
- * 105) If q.seq_num != r.seq_num, go to 201.
- * 106) If q.seq_dup == r.seq_dup, go to 205, else go to 203.
- *
- * 201) Process request q into response r.
- * 202) r.seq_num = q.seq_num
- * 203) r.seq_dup = q.seq_dup
- * 204) Calculate r.header_crc
- * 205) If r.data_len > 0 and data is no longer available, set e.result =
- *      EC_RES_DUP_UNAVAILABLE and go to 301.
- * 206) Send response r.
- *
- * 301) e.seq_num = q.seq_num
- * 302) e.seq_dup = q.seq_dup
- * 303) Calculate e.header_crc.
- * 304) Send error response e.
- */
-
-/* Version 4 request from host */
-struct ec_host_request4 {
-	/*
-	 * bits 0-3: struct_version: Structure version (=4)
-	 * bit    4: is_response: Is response (=0)
-	 * bits 5-6: seq_num: Sequence number
-	 * bit    7: seq_dup: Sequence duplicate flag
-	 */
-	uint8_t fields0;
-
-	/*
-	 * bits 0-4: command_version: Command version
-	 * bits 5-6: Reserved (set 0, ignore on read)
-	 * bit    7: data_crc_present: Is data CRC present after data
-	 */
-	uint8_t fields1;
-
-	/* Command code (EC_CMD_*) */
-	uint16_t command;
-
-	/* Length of data which follows this header (not including data CRC) */
-	uint16_t data_len;
-
-	/* Reserved (set 0, ignore on read) */
-	uint8_t reserved;
-
-	/* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */
-	uint8_t header_crc;
-} __ec_align4;
-
-/* Version 4 response from EC */
-struct ec_host_response4 {
-	/*
-	 * bits 0-3: struct_version: Structure version (=4)
-	 * bit    4: is_response: Is response (=1)
-	 * bits 5-6: seq_num: Sequence number
-	 * bit    7: seq_dup: Sequence duplicate flag
-	 */
-	uint8_t fields0;
-
-	/*
-	 * bits 0-6: Reserved (set 0, ignore on read)
-	 * bit    7: data_crc_present: Is data CRC present after data
-	 */
-	uint8_t fields1;
-
-	/* Result code (EC_RES_*) */
-	uint16_t result;
-
-	/* Length of data which follows this header (not including data CRC) */
-	uint16_t data_len;
-
-	/* Reserved (set 0, ignore on read) */
-	uint8_t reserved;
-
-	/* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */
-	uint8_t header_crc;
-} __ec_align4;
-
-/* Fields in fields0 byte */
-#define EC_PACKET4_0_STRUCT_VERSION_MASK	0x0f
-#define EC_PACKET4_0_IS_RESPONSE_MASK		0x10
-#define EC_PACKET4_0_SEQ_NUM_SHIFT		5
-#define EC_PACKET4_0_SEQ_NUM_MASK		0x60
-#define EC_PACKET4_0_SEQ_DUP_MASK		0x80
-
-/* Fields in fields1 byte */
-#define EC_PACKET4_1_COMMAND_VERSION_MASK	0x1f  /* (request only) */
-#define EC_PACKET4_1_DATA_CRC_PRESENT_MASK	0x80
-
-/*****************************************************************************/
-/*
- * Notes on commands:
- *
- * Each command is an 16-bit command value.  Commands which take params or
- * return response data specify structures for that data.  If no structure is
- * specified, the command does not input or output data, respectively.
- * Parameter/response length is implicit in the structs.  Some underlying
- * communication protocols (I2C, SPI) may add length or checksum headers, but
- * those are implementation-dependent and not defined here.
- *
- * All commands MUST be #defined to be 4-digit UPPER CASE hex values
- * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work.
- */
-
-/*****************************************************************************/
-/* General / test commands */
-
-/*
- * Get protocol version, used to deal with non-backward compatible protocol
- * changes.
- */
-#define EC_CMD_PROTO_VERSION 0x0000
-
-/**
- * struct ec_response_proto_version - Response to the proto version command.
- * @version: The protocol version.
- */
-struct ec_response_proto_version {
-	uint32_t version;
-} __ec_align4;
-
-/*
- * Hello.  This is a simple command to test the EC is responsive to
- * commands.
- */
-#define EC_CMD_HELLO 0x0001
-
-/**
- * struct ec_params_hello - Parameters to the hello command.
- * @in_data: Pass anything here.
- */
-struct ec_params_hello {
-	uint32_t in_data;
-} __ec_align4;
-
-/**
- * struct ec_response_hello - Response to the hello command.
- * @out_data: Output will be in_data + 0x01020304.
- */
-struct ec_response_hello {
-	uint32_t out_data;
-} __ec_align4;
-
-/* Get version number */
-#define EC_CMD_GET_VERSION 0x0002
-
-enum ec_current_image {
-	EC_IMAGE_UNKNOWN = 0,
-	EC_IMAGE_RO,
-	EC_IMAGE_RW
-};
-
-/**
- * struct ec_response_get_version - Response to the get version command.
- * @version_string_ro: Null-terminated RO firmware version string.
- * @version_string_rw: Null-terminated RW firmware version string.
- * @reserved: Unused bytes; was previously RW-B firmware version string.
- * @current_image: One of ec_current_image.
- */
-struct ec_response_get_version {
-	char version_string_ro[32];
-	char version_string_rw[32];
-	char reserved[32];
-	uint32_t current_image;
-} __ec_align4;
-
-/* Read test */
-#define EC_CMD_READ_TEST 0x0003
-
-/**
- * struct ec_params_read_test - Parameters for the read test command.
- * @offset: Starting value for read buffer.
- * @size: Size to read in bytes.
- */
-struct ec_params_read_test {
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/**
- * struct ec_response_read_test - Response to the read test command.
- * @data: Data returned by the read test command.
- */
-struct ec_response_read_test {
-	uint32_t data[32];
-} __ec_align4;
-
-/*
- * Get build information
- *
- * Response is null-terminated string.
- */
-#define EC_CMD_GET_BUILD_INFO 0x0004
-
-/* Get chip info */
-#define EC_CMD_GET_CHIP_INFO 0x0005
-
-/**
- * struct ec_response_get_chip_info - Response to the get chip info command.
- * @vendor: Null-terminated string for chip vendor.
- * @name: Null-terminated string for chip name.
- * @revision: Null-terminated string for chip mask version.
- */
-struct ec_response_get_chip_info {
-	char vendor[32];
-	char name[32];
-	char revision[32];
-} __ec_align4;
-
-/* Get board HW version */
-#define EC_CMD_GET_BOARD_VERSION 0x0006
-
-/**
- * struct ec_response_board_version - Response to the board version command.
- * @board_version: A monotonously incrementing number.
- */
-struct ec_response_board_version {
-	uint16_t board_version;
-} __ec_align2;
-
-/*
- * Read memory-mapped data.
- *
- * This is an alternate interface to memory-mapped data for bus protocols
- * which don't support direct-mapped memory - I2C, SPI, etc.
- *
- * Response is params.size bytes of data.
- */
-#define EC_CMD_READ_MEMMAP 0x0007
-
-/**
- * struct ec_params_read_memmap - Parameters for the read memory map command.
- * @offset: Offset in memmap (EC_MEMMAP_*).
- * @size: Size to read in bytes.
- */
-struct ec_params_read_memmap {
-	uint8_t offset;
-	uint8_t size;
-} __ec_align1;
-
-/* Read versions supported for a command */
-#define EC_CMD_GET_CMD_VERSIONS 0x0008
-
-/**
- * struct ec_params_get_cmd_versions - Parameters for the get command versions.
- * @cmd: Command to check.
- */
-struct ec_params_get_cmd_versions {
-	uint8_t cmd;
-} __ec_align1;
-
-/**
- * struct ec_params_get_cmd_versions_v1 - Parameters for the get command
- *         versions (v1)
- * @cmd: Command to check.
- */
-struct ec_params_get_cmd_versions_v1 {
-	uint16_t cmd;
-} __ec_align2;
-
-/**
- * struct ec_response_get_cmd_version - Response to the get command versions.
- * @version_mask: Mask of supported versions; use EC_VER_MASK() to compare with
- *                a desired version.
- */
-struct ec_response_get_cmd_versions {
-	uint32_t version_mask;
-} __ec_align4;
-
-/*
- * Check EC communications status (busy). This is needed on i2c/spi but not
- * on lpc since it has its own out-of-band busy indicator.
- *
- * lpc must read the status from the command register. Attempting this on
- * lpc will overwrite the args/parameter space and corrupt its data.
- */
-#define EC_CMD_GET_COMMS_STATUS		0x0009
-
-/* Avoid using ec_status which is for return values */
-enum ec_comms_status {
-	EC_COMMS_STATUS_PROCESSING	= BIT(0),	/* Processing cmd */
-};
-
-/**
- * struct ec_response_get_comms_status - Response to the get comms status
- *         command.
- * @flags: Mask of enum ec_comms_status.
- */
-struct ec_response_get_comms_status {
-	uint32_t flags;		/* Mask of enum ec_comms_status */
-} __ec_align4;
-
-/* Fake a variety of responses, purely for testing purposes. */
-#define EC_CMD_TEST_PROTOCOL		0x000A
-
-/* Tell the EC what to send back to us. */
-struct ec_params_test_protocol {
-	uint32_t ec_result;
-	uint32_t ret_len;
-	uint8_t buf[32];
-} __ec_align4;
-
-/* Here it comes... */
-struct ec_response_test_protocol {
-	uint8_t buf[32];
-} __ec_align4;
-
-/* Get protocol information */
-#define EC_CMD_GET_PROTOCOL_INFO	0x000B
-
-/* Flags for ec_response_get_protocol_info.flags */
-/* EC_RES_IN_PROGRESS may be returned if a command is slow */
-#define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED BIT(0)
-
-/**
- * struct ec_response_get_protocol_info - Response to the get protocol info.
- * @protocol_versions: Bitmask of protocol versions supported (1 << n means
- *                     version n).
- * @max_request_packet_size: Maximum request packet size in bytes.
- * @max_response_packet_size: Maximum response packet size in bytes.
- * @flags: see EC_PROTOCOL_INFO_*
- */
-struct ec_response_get_protocol_info {
-	/* Fields which exist if at least protocol version 3 supported */
-	uint32_t protocol_versions;
-	uint16_t max_request_packet_size;
-	uint16_t max_response_packet_size;
-	uint32_t flags;
-} __ec_align4;
-
-
-/*****************************************************************************/
-/* Get/Set miscellaneous values */
-
-/* The upper byte of .flags tells what to do (nothing means "get") */
-#define EC_GSV_SET        0x80000000
-
-/*
- * The lower three bytes of .flags identifies the parameter, if that has
- * meaning for an individual command.
- */
-#define EC_GSV_PARAM_MASK 0x00ffffff
-
-struct ec_params_get_set_value {
-	uint32_t flags;
-	uint32_t value;
-} __ec_align4;
-
-struct ec_response_get_set_value {
-	uint32_t flags;
-	uint32_t value;
-} __ec_align4;
-
-/* More than one command can use these structs to get/set parameters. */
-#define EC_CMD_GSV_PAUSE_IN_S5	0x000C
-
-/*****************************************************************************/
-/* List the features supported by the firmware */
-#define EC_CMD_GET_FEATURES  0x000D
-
-/* Supported features */
-enum ec_feature_code {
-	/*
-	 * This image contains a limited set of features. Another image
-	 * in RW partition may support more features.
-	 */
-	EC_FEATURE_LIMITED = 0,
-	/*
-	 * Commands for probing/reading/writing/erasing the flash in the
-	 * EC are present.
-	 */
-	EC_FEATURE_FLASH = 1,
-	/*
-	 * Can control the fan speed directly.
-	 */
-	EC_FEATURE_PWM_FAN = 2,
-	/*
-	 * Can control the intensity of the keyboard backlight.
-	 */
-	EC_FEATURE_PWM_KEYB = 3,
-	/*
-	 * Support Google lightbar, introduced on Pixel.
-	 */
-	EC_FEATURE_LIGHTBAR = 4,
-	/* Control of LEDs  */
-	EC_FEATURE_LED = 5,
-	/* Exposes an interface to control gyro and sensors.
-	 * The host goes through the EC to access these sensors.
-	 * In addition, the EC may provide composite sensors, like lid angle.
-	 */
-	EC_FEATURE_MOTION_SENSE = 6,
-	/* The keyboard is controlled by the EC */
-	EC_FEATURE_KEYB = 7,
-	/* The AP can use part of the EC flash as persistent storage. */
-	EC_FEATURE_PSTORE = 8,
-	/* The EC monitors BIOS port 80h, and can return POST codes. */
-	EC_FEATURE_PORT80 = 9,
-	/*
-	 * Thermal management: include TMP specific commands.
-	 * Higher level than direct fan control.
-	 */
-	EC_FEATURE_THERMAL = 10,
-	/* Can switch the screen backlight on/off */
-	EC_FEATURE_BKLIGHT_SWITCH = 11,
-	/* Can switch the wifi module on/off */
-	EC_FEATURE_WIFI_SWITCH = 12,
-	/* Monitor host events, through for example SMI or SCI */
-	EC_FEATURE_HOST_EVENTS = 13,
-	/* The EC exposes GPIO commands to control/monitor connected devices. */
-	EC_FEATURE_GPIO = 14,
-	/* The EC can send i2c messages to downstream devices. */
-	EC_FEATURE_I2C = 15,
-	/* Command to control charger are included */
-	EC_FEATURE_CHARGER = 16,
-	/* Simple battery support. */
-	EC_FEATURE_BATTERY = 17,
-	/*
-	 * Support Smart battery protocol
-	 * (Common Smart Battery System Interface Specification)
-	 */
-	EC_FEATURE_SMART_BATTERY = 18,
-	/* EC can detect when the host hangs. */
-	EC_FEATURE_HANG_DETECT = 19,
-	/* Report power information, for pit only */
-	EC_FEATURE_PMU = 20,
-	/* Another Cros EC device is present downstream of this one */
-	EC_FEATURE_SUB_MCU = 21,
-	/* Support USB Power delivery (PD) commands */
-	EC_FEATURE_USB_PD = 22,
-	/* Control USB multiplexer, for audio through USB port for instance. */
-	EC_FEATURE_USB_MUX = 23,
-	/* Motion Sensor code has an internal software FIFO */
-	EC_FEATURE_MOTION_SENSE_FIFO = 24,
-	/* Support temporary secure vstore */
-	EC_FEATURE_VSTORE = 25,
-	/* EC decides on USB-C SS mux state, muxes configured by host */
-	EC_FEATURE_USBC_SS_MUX_VIRTUAL = 26,
-	/* EC has RTC feature that can be controlled by host commands */
-	EC_FEATURE_RTC = 27,
-	/* The MCU exposes a Fingerprint sensor */
-	EC_FEATURE_FINGERPRINT = 28,
-	/* The MCU exposes a Touchpad */
-	EC_FEATURE_TOUCHPAD = 29,
-	/* The MCU has RWSIG task enabled */
-	EC_FEATURE_RWSIG = 30,
-	/* EC has device events support */
-	EC_FEATURE_DEVICE_EVENT = 31,
-	/* EC supports the unified wake masks for LPC/eSPI systems */
-	EC_FEATURE_UNIFIED_WAKE_MASKS = 32,
-	/* EC supports 64-bit host events */
-	EC_FEATURE_HOST_EVENT64 = 33,
-	/* EC runs code in RAM (not in place, a.k.a. XIP) */
-	EC_FEATURE_EXEC_IN_RAM = 34,
-	/* EC supports CEC commands */
-	EC_FEATURE_CEC = 35,
-	/* EC supports tight sensor timestamping. */
-	EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS = 36,
-	/*
-	 * EC supports tablet mode detection aligned to Chrome and allows
-	 * setting of threshold by host command using
-	 * MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE.
-	 */
-	EC_FEATURE_REFINED_TABLET_MODE_HYSTERESIS = 37,
-	/* EC supports audio codec. */
-	EC_FEATURE_AUDIO_CODEC = 38,
-	/* The MCU is a System Companion Processor (SCP). */
-	EC_FEATURE_SCP = 39,
-	/* The MCU is an Integrated Sensor Hub */
-	EC_FEATURE_ISH = 40,
-};
-
-#define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32)
-#define EC_FEATURE_MASK_1(event_code) BIT(event_code - 32)
-
-struct ec_response_get_features {
-	uint32_t flags[2];
-} __ec_align4;
-
-/*****************************************************************************/
-/* Get the board's SKU ID from EC */
-#define EC_CMD_GET_SKU_ID 0x000E
-
-/* Set SKU ID from AP */
-#define EC_CMD_SET_SKU_ID 0x000F
-
-struct ec_sku_id_info {
-	uint32_t sku_id;
-} __ec_align4;
-
-/*****************************************************************************/
-/* Flash commands */
-
-/* Get flash info */
-#define EC_CMD_FLASH_INFO 0x0010
-#define EC_VER_FLASH_INFO 2
-
-/**
- * struct ec_response_flash_info - Response to the flash info command.
- * @flash_size: Usable flash size in bytes.
- * @write_block_size: Write block size. Write offset and size must be a
- *                    multiple of this.
- * @erase_block_size: Erase block size. Erase offset and size must be a
- *                    multiple of this.
- * @protect_block_size: Protection block size. Protection offset and size
- *                      must be a multiple of this.
- *
- * Version 0 returns these fields.
- */
-struct ec_response_flash_info {
-	uint32_t flash_size;
-	uint32_t write_block_size;
-	uint32_t erase_block_size;
-	uint32_t protect_block_size;
-} __ec_align4;
-
-/*
- * Flags for version 1+ flash info command
- * EC flash erases bits to 0 instead of 1.
- */
-#define EC_FLASH_INFO_ERASE_TO_0 BIT(0)
-
-/*
- * Flash must be selected for read/write/erase operations to succeed.  This may
- * be necessary on a chip where write/erase can be corrupted by other board
- * activity, or where the chip needs to enable some sort of programming voltage,
- * or where the read/write/erase operations require cleanly suspending other
- * chip functionality.
- */
-#define EC_FLASH_INFO_SELECT_REQUIRED BIT(1)
-
-/**
- * struct ec_response_flash_info_1 - Response to the flash info v1 command.
- * @flash_size: Usable flash size in bytes.
- * @write_block_size: Write block size. Write offset and size must be a
- *                    multiple of this.
- * @erase_block_size: Erase block size. Erase offset and size must be a
- *                    multiple of this.
- * @protect_block_size: Protection block size. Protection offset and size
- *                      must be a multiple of this.
- * @write_ideal_size: Ideal write size in bytes.  Writes will be fastest if
- *                    size is exactly this and offset is a multiple of this.
- *                    For example, an EC may have a write buffer which can do
- *                    half-page operations if data is aligned, and a slower
- *                    word-at-a-time write mode.
- * @flags: Flags; see EC_FLASH_INFO_*
- *
- * Version 1 returns the same initial fields as version 0, with additional
- * fields following.
- *
- * gcc anonymous structs don't seem to get along with the __packed directive;
- * if they did we'd define the version 0 structure as a sub-structure of this
- * one.
- *
- * Version 2 supports flash banks of different sizes:
- * The caller specified the number of banks it has preallocated
- * (num_banks_desc)
- * The EC returns the number of banks describing the flash memory.
- * It adds banks descriptions up to num_banks_desc.
- */
-struct ec_response_flash_info_1 {
-	/* Version 0 fields; see above for description */
-	uint32_t flash_size;
-	uint32_t write_block_size;
-	uint32_t erase_block_size;
-	uint32_t protect_block_size;
-
-	/* Version 1 adds these fields: */
-	uint32_t write_ideal_size;
-	uint32_t flags;
-} __ec_align4;
-
-struct ec_params_flash_info_2 {
-	/* Number of banks to describe */
-	uint16_t num_banks_desc;
-	/* Reserved; set 0; ignore on read */
-	uint8_t reserved[2];
-} __ec_align4;
-
-struct ec_flash_bank {
-	/* Number of sector is in this bank. */
-	uint16_t count;
-	/* Size in power of 2 of each sector (8 --> 256 bytes) */
-	uint8_t size_exp;
-	/* Minimal write size for the sectors in this bank */
-	uint8_t write_size_exp;
-	/* Erase size for the sectors in this bank */
-	uint8_t erase_size_exp;
-	/* Size for write protection, usually identical to erase size. */
-	uint8_t protect_size_exp;
-	/* Reserved; set 0; ignore on read */
-	uint8_t reserved[2];
-};
-
-struct ec_response_flash_info_2 {
-	/* Total flash in the EC. */
-	uint32_t flash_size;
-	/* Flags; see EC_FLASH_INFO_* */
-	uint32_t flags;
-	/* Maximum size to use to send data to write to the EC. */
-	uint32_t write_ideal_size;
-	/* Number of banks present in the EC. */
-	uint16_t num_banks_total;
-	/* Number of banks described in banks array. */
-	uint16_t num_banks_desc;
-	struct ec_flash_bank banks[0];
-} __ec_align4;
-
-/*
- * Read flash
- *
- * Response is params.size bytes of data.
- */
-#define EC_CMD_FLASH_READ 0x0011
-
-/**
- * struct ec_params_flash_read - Parameters for the flash read command.
- * @offset: Byte offset to read.
- * @size: Size to read in bytes.
- */
-struct ec_params_flash_read {
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/* Write flash */
-#define EC_CMD_FLASH_WRITE 0x0012
-#define EC_VER_FLASH_WRITE 1
-
-/* Version 0 of the flash command supported only 64 bytes of data */
-#define EC_FLASH_WRITE_VER0_SIZE 64
-
-/**
- * struct ec_params_flash_write - Parameters for the flash write command.
- * @offset: Byte offset to write.
- * @size: Size to write in bytes.
- */
-struct ec_params_flash_write {
-	uint32_t offset;
-	uint32_t size;
-	/* Followed by data to write */
-} __ec_align4;
-
-/* Erase flash */
-#define EC_CMD_FLASH_ERASE 0x0013
-
-/**
- * struct ec_params_flash_erase - Parameters for the flash erase command, v0.
- * @offset: Byte offset to erase.
- * @size: Size to erase in bytes.
- */
-struct ec_params_flash_erase {
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/*
- * v1 add async erase:
- * subcommands can returns:
- * EC_RES_SUCCESS : erased (see ERASE_SECTOR_ASYNC case below).
- * EC_RES_INVALID_PARAM : offset/size are not aligned on a erase boundary.
- * EC_RES_ERROR : other errors.
- * EC_RES_BUSY : an existing erase operation is in progress.
- * EC_RES_ACCESS_DENIED: Trying to erase running image.
- *
- * When ERASE_SECTOR_ASYNC returns EC_RES_SUCCESS, the operation is just
- * properly queued. The user must call ERASE_GET_RESULT subcommand to get
- * the proper result.
- * When ERASE_GET_RESULT returns EC_RES_BUSY, the caller must wait and send
- * ERASE_GET_RESULT again to get the result of ERASE_SECTOR_ASYNC.
- * ERASE_GET_RESULT command may timeout on EC where flash access is not
- * permitted while erasing. (For instance, STM32F4).
- */
-enum ec_flash_erase_cmd {
-	FLASH_ERASE_SECTOR,     /* Erase and wait for result */
-	FLASH_ERASE_SECTOR_ASYNC,  /* Erase and return immediately. */
-	FLASH_ERASE_GET_RESULT,  /* Ask for last erase result */
-};
-
-/**
- * struct ec_params_flash_erase_v1 - Parameters for the flash erase command, v1.
- * @cmd: One of ec_flash_erase_cmd.
- * @reserved: Pad byte; currently always contains 0.
- * @flag: No flags defined yet; set to 0.
- * @params: Same as v0 parameters.
- */
-struct ec_params_flash_erase_v1 {
-	uint8_t  cmd;
-	uint8_t  reserved;
-	uint16_t flag;
-	struct ec_params_flash_erase params;
-} __ec_align4;
-
-/*
- * Get/set flash protection.
- *
- * If mask!=0, sets/clear the requested bits of flags.  Depending on the
- * firmware write protect GPIO, not all flags will take effect immediately;
- * some flags require a subsequent hard reset to take effect.  Check the
- * returned flags bits to see what actually happened.
- *
- * If mask=0, simply returns the current flags state.
- */
-#define EC_CMD_FLASH_PROTECT 0x0015
-#define EC_VER_FLASH_PROTECT 1  /* Command version 1 */
-
-/* Flags for flash protection */
-/* RO flash code protected when the EC boots */
-#define EC_FLASH_PROTECT_RO_AT_BOOT         BIT(0)
-/*
- * RO flash code protected now.  If this bit is set, at-boot status cannot
- * be changed.
- */
-#define EC_FLASH_PROTECT_RO_NOW             BIT(1)
-/* Entire flash code protected now, until reboot. */
-#define EC_FLASH_PROTECT_ALL_NOW            BIT(2)
-/* Flash write protect GPIO is asserted now */
-#define EC_FLASH_PROTECT_GPIO_ASSERTED      BIT(3)
-/* Error - at least one bank of flash is stuck locked, and cannot be unlocked */
-#define EC_FLASH_PROTECT_ERROR_STUCK        BIT(4)
-/*
- * Error - flash protection is in inconsistent state.  At least one bank of
- * flash which should be protected is not protected.  Usually fixed by
- * re-requesting the desired flags, or by a hard reset if that fails.
- */
-#define EC_FLASH_PROTECT_ERROR_INCONSISTENT BIT(5)
-/* Entire flash code protected when the EC boots */
-#define EC_FLASH_PROTECT_ALL_AT_BOOT        BIT(6)
-/* RW flash code protected when the EC boots */
-#define EC_FLASH_PROTECT_RW_AT_BOOT         BIT(7)
-/* RW flash code protected now. */
-#define EC_FLASH_PROTECT_RW_NOW             BIT(8)
-/* Rollback information flash region protected when the EC boots */
-#define EC_FLASH_PROTECT_ROLLBACK_AT_BOOT   BIT(9)
-/* Rollback information flash region protected now */
-#define EC_FLASH_PROTECT_ROLLBACK_NOW       BIT(10)
-
-
-/**
- * struct ec_params_flash_protect - Parameters for the flash protect command.
- * @mask: Bits in flags to apply.
- * @flags: New flags to apply.
- */
-struct ec_params_flash_protect {
-	uint32_t mask;
-	uint32_t flags;
-} __ec_align4;
-
-/**
- * struct ec_response_flash_protect - Response to the flash protect command.
- * @flags: Current value of flash protect flags.
- * @valid_flags: Flags which are valid on this platform. This allows the
- *               caller to distinguish between flags which aren't set vs. flags
- *               which can't be set on this platform.
- * @writable_flags: Flags which can be changed given the current protection
- *                  state.
- */
-struct ec_response_flash_protect {
-	uint32_t flags;
-	uint32_t valid_flags;
-	uint32_t writable_flags;
-} __ec_align4;
-
-/*
- * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash
- * write protect.  These commands may be reused with version > 0.
- */
-
-/* Get the region offset/size */
-#define EC_CMD_FLASH_REGION_INFO 0x0016
-#define EC_VER_FLASH_REGION_INFO 1
-
-enum ec_flash_region {
-	/* Region which holds read-only EC image */
-	EC_FLASH_REGION_RO = 0,
-	/*
-	 * Region which holds active RW image. 'Active' is different from
-	 * 'running'. Active means 'scheduled-to-run'. Since RO image always
-	 * scheduled to run, active/non-active applies only to RW images (for
-	 * the same reason 'update' applies only to RW images. It's a state of
-	 * an image on a flash. Running image can be RO, RW_A, RW_B but active
-	 * image can only be RW_A or RW_B. In recovery mode, an active RW image
-	 * doesn't enter 'running' state but it's still active on a flash.
-	 */
-	EC_FLASH_REGION_ACTIVE,
-	/*
-	 * Region which should be write-protected in the factory (a superset of
-	 * EC_FLASH_REGION_RO)
-	 */
-	EC_FLASH_REGION_WP_RO,
-	/* Region which holds updatable (non-active) RW image */
-	EC_FLASH_REGION_UPDATE,
-	/* Number of regions */
-	EC_FLASH_REGION_COUNT,
-};
-/*
- * 'RW' is vague if there are multiple RW images; we mean the active one,
- * so the old constant is deprecated.
- */
-#define EC_FLASH_REGION_RW EC_FLASH_REGION_ACTIVE
-
-/**
- * struct ec_params_flash_region_info - Parameters for the flash region info
- *         command.
- * @region: Flash region; see EC_FLASH_REGION_*
- */
-struct ec_params_flash_region_info {
-	uint32_t region;
-} __ec_align4;
-
-struct ec_response_flash_region_info {
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/* Read/write VbNvContext */
-#define EC_CMD_VBNV_CONTEXT 0x0017
-#define EC_VER_VBNV_CONTEXT 1
-#define EC_VBNV_BLOCK_SIZE 16
-
-enum ec_vbnvcontext_op {
-	EC_VBNV_CONTEXT_OP_READ,
-	EC_VBNV_CONTEXT_OP_WRITE,
-};
-
-struct ec_params_vbnvcontext {
-	uint32_t op;
-	uint8_t block[EC_VBNV_BLOCK_SIZE];
-} __ec_align4;
-
-struct ec_response_vbnvcontext {
-	uint8_t block[EC_VBNV_BLOCK_SIZE];
-} __ec_align4;
-
-
-/* Get SPI flash information */
-#define EC_CMD_FLASH_SPI_INFO 0x0018
-
-struct ec_response_flash_spi_info {
-	/* JEDEC info from command 0x9F (manufacturer, memory type, size) */
-	uint8_t jedec[3];
-
-	/* Pad byte; currently always contains 0 */
-	uint8_t reserved0;
-
-	/* Manufacturer / device ID from command 0x90 */
-	uint8_t mfr_dev_id[2];
-
-	/* Status registers from command 0x05 and 0x35 */
-	uint8_t sr1, sr2;
-} __ec_align1;
-
-
-/* Select flash during flash operations */
-#define EC_CMD_FLASH_SELECT 0x0019
-
-/**
- * struct ec_params_flash_select - Parameters for the flash select command.
- * @select: 1 to select flash, 0 to deselect flash
- */
-struct ec_params_flash_select {
-	uint8_t select;
-} __ec_align4;
-
-
-/*****************************************************************************/
-/* PWM commands */
-
-/* Get fan target RPM */
-#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x0020
-
-struct ec_response_pwm_get_fan_rpm {
-	uint32_t rpm;
-} __ec_align4;
-
-/* Set target fan RPM */
-#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x0021
-
-/* Version 0 of input params */
-struct ec_params_pwm_set_fan_target_rpm_v0 {
-	uint32_t rpm;
-} __ec_align4;
-
-/* Version 1 of input params */
-struct ec_params_pwm_set_fan_target_rpm_v1 {
-	uint32_t rpm;
-	uint8_t fan_idx;
-} __ec_align_size1;
-
-/* Get keyboard backlight */
-/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */
-#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x0022
-
-struct ec_response_pwm_get_keyboard_backlight {
-	uint8_t percent;
-	uint8_t enabled;
-} __ec_align1;
-
-/* Set keyboard backlight */
-/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */
-#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x0023
-
-struct ec_params_pwm_set_keyboard_backlight {
-	uint8_t percent;
-} __ec_align1;
-
-/* Set target fan PWM duty cycle */
-#define EC_CMD_PWM_SET_FAN_DUTY 0x0024
-
-/* Version 0 of input params */
-struct ec_params_pwm_set_fan_duty_v0 {
-	uint32_t percent;
-} __ec_align4;
-
-/* Version 1 of input params */
-struct ec_params_pwm_set_fan_duty_v1 {
-	uint32_t percent;
-	uint8_t fan_idx;
-} __ec_align_size1;
-
-#define EC_CMD_PWM_SET_DUTY 0x0025
-/* 16 bit duty cycle, 0xffff = 100% */
-#define EC_PWM_MAX_DUTY 0xffff
-
-enum ec_pwm_type {
-	/* All types, indexed by board-specific enum pwm_channel */
-	EC_PWM_TYPE_GENERIC = 0,
-	/* Keyboard backlight */
-	EC_PWM_TYPE_KB_LIGHT,
-	/* Display backlight */
-	EC_PWM_TYPE_DISPLAY_LIGHT,
-	EC_PWM_TYPE_COUNT,
-};
-
-struct ec_params_pwm_set_duty {
-	uint16_t duty;     /* Duty cycle, EC_PWM_MAX_DUTY = 100% */
-	uint8_t pwm_type;  /* ec_pwm_type */
-	uint8_t index;     /* Type-specific index, or 0 if unique */
-} __ec_align4;
-
-#define EC_CMD_PWM_GET_DUTY 0x0026
-
-struct ec_params_pwm_get_duty {
-	uint8_t pwm_type;  /* ec_pwm_type */
-	uint8_t index;     /* Type-specific index, or 0 if unique */
-} __ec_align1;
-
-struct ec_response_pwm_get_duty {
-	uint16_t duty;     /* Duty cycle, EC_PWM_MAX_DUTY = 100% */
-} __ec_align2;
-
-/*****************************************************************************/
-/*
- * Lightbar commands. This looks worse than it is. Since we only use one HOST
- * command to say "talk to the lightbar", we put the "and tell it to do X" part
- * into a subcommand. We'll make separate structs for subcommands with
- * different input args, so that we know how much to expect.
- */
-#define EC_CMD_LIGHTBAR_CMD 0x0028
-
-struct rgb_s {
-	uint8_t r, g, b;
-} __ec_todo_unpacked;
-
-#define LB_BATTERY_LEVELS 4
-
-/*
- * List of tweakable parameters. NOTE: It's __packed so it can be sent in a
- * host command, but the alignment is the same regardless. Keep it that way.
- */
-struct lightbar_params_v0 {
-	/* Timing */
-	int32_t google_ramp_up;
-	int32_t google_ramp_down;
-	int32_t s3s0_ramp_up;
-	int32_t s0_tick_delay[2];		/* AC=0/1 */
-	int32_t s0a_tick_delay[2];		/* AC=0/1 */
-	int32_t s0s3_ramp_down;
-	int32_t s3_sleep_for;
-	int32_t s3_ramp_up;
-	int32_t s3_ramp_down;
-
-	/* Oscillation */
-	uint8_t new_s0;
-	uint8_t osc_min[2];			/* AC=0/1 */
-	uint8_t osc_max[2];			/* AC=0/1 */
-	uint8_t w_ofs[2];			/* AC=0/1 */
-
-	/* Brightness limits based on the backlight and AC. */
-	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
-
-	/* Battery level thresholds */
-	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
-
-	/* Map [AC][battery_level] to color index */
-	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
-	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
-
-	/* Color palette */
-	struct rgb_s color[8];			/* 0-3 are Google colors */
-} __ec_todo_packed;
-
-struct lightbar_params_v1 {
-	/* Timing */
-	int32_t google_ramp_up;
-	int32_t google_ramp_down;
-	int32_t s3s0_ramp_up;
-	int32_t s0_tick_delay[2];		/* AC=0/1 */
-	int32_t s0a_tick_delay[2];		/* AC=0/1 */
-	int32_t s0s3_ramp_down;
-	int32_t s3_sleep_for;
-	int32_t s3_ramp_up;
-	int32_t s3_ramp_down;
-	int32_t s5_ramp_up;
-	int32_t s5_ramp_down;
-	int32_t tap_tick_delay;
-	int32_t tap_gate_delay;
-	int32_t tap_display_time;
-
-	/* Tap-for-battery params */
-	uint8_t tap_pct_red;
-	uint8_t tap_pct_green;
-	uint8_t tap_seg_min_on;
-	uint8_t tap_seg_max_on;
-	uint8_t tap_seg_osc;
-	uint8_t tap_idx[3];
-
-	/* Oscillation */
-	uint8_t osc_min[2];			/* AC=0/1 */
-	uint8_t osc_max[2];			/* AC=0/1 */
-	uint8_t w_ofs[2];			/* AC=0/1 */
-
-	/* Brightness limits based on the backlight and AC. */
-	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
-
-	/* Battery level thresholds */
-	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
-
-	/* Map [AC][battery_level] to color index */
-	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
-	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
-
-	/* s5: single color pulse on inhibited power-up */
-	uint8_t s5_idx;
-
-	/* Color palette */
-	struct rgb_s color[8];			/* 0-3 are Google colors */
-} __ec_todo_packed;
-
-/* Lightbar command params v2
- * crbug.com/467716
- *
- * lightbar_parms_v1 was too big for i2c, therefore in v2, we split them up by
- * logical groups to make it more manageable ( < 120 bytes).
- *
- * NOTE: Each of these groups must be less than 120 bytes.
- */
-
-struct lightbar_params_v2_timing {
-	/* Timing */
-	int32_t google_ramp_up;
-	int32_t google_ramp_down;
-	int32_t s3s0_ramp_up;
-	int32_t s0_tick_delay[2];		/* AC=0/1 */
-	int32_t s0a_tick_delay[2];		/* AC=0/1 */
-	int32_t s0s3_ramp_down;
-	int32_t s3_sleep_for;
-	int32_t s3_ramp_up;
-	int32_t s3_ramp_down;
-	int32_t s5_ramp_up;
-	int32_t s5_ramp_down;
-	int32_t tap_tick_delay;
-	int32_t tap_gate_delay;
-	int32_t tap_display_time;
-} __ec_todo_packed;
-
-struct lightbar_params_v2_tap {
-	/* Tap-for-battery params */
-	uint8_t tap_pct_red;
-	uint8_t tap_pct_green;
-	uint8_t tap_seg_min_on;
-	uint8_t tap_seg_max_on;
-	uint8_t tap_seg_osc;
-	uint8_t tap_idx[3];
-} __ec_todo_packed;
-
-struct lightbar_params_v2_oscillation {
-	/* Oscillation */
-	uint8_t osc_min[2];			/* AC=0/1 */
-	uint8_t osc_max[2];			/* AC=0/1 */
-	uint8_t w_ofs[2];			/* AC=0/1 */
-} __ec_todo_packed;
-
-struct lightbar_params_v2_brightness {
-	/* Brightness limits based on the backlight and AC. */
-	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
-	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
-} __ec_todo_packed;
-
-struct lightbar_params_v2_thresholds {
-	/* Battery level thresholds */
-	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
-} __ec_todo_packed;
-
-struct lightbar_params_v2_colors {
-	/* Map [AC][battery_level] to color index */
-	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
-	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
-
-	/* s5: single color pulse on inhibited power-up */
-	uint8_t s5_idx;
-
-	/* Color palette */
-	struct rgb_s color[8];			/* 0-3 are Google colors */
-} __ec_todo_packed;
-
-/* Lightbar program. */
-#define EC_LB_PROG_LEN 192
-struct lightbar_program {
-	uint8_t size;
-	uint8_t data[EC_LB_PROG_LEN];
-} __ec_todo_unpacked;
-
-struct ec_params_lightbar {
-	uint8_t cmd;		      /* Command (see enum lightbar_command) */
-	union {
-		/*
-		 * The following commands have no args:
-		 *
-		 * dump, off, on, init, get_seq, get_params_v0, get_params_v1,
-		 * version, get_brightness, get_demo, suspend, resume,
-		 * get_params_v2_timing, get_params_v2_tap, get_params_v2_osc,
-		 * get_params_v2_bright, get_params_v2_thlds,
-		 * get_params_v2_colors
-		 *
-		 * Don't use an empty struct, because C++ hates that.
-		 */
-
-		struct __ec_todo_unpacked {
-			uint8_t num;
-		} set_brightness, seq, demo;
-
-		struct __ec_todo_unpacked {
-			uint8_t ctrl, reg, value;
-		} reg;
-
-		struct __ec_todo_unpacked {
-			uint8_t led, red, green, blue;
-		} set_rgb;
-
-		struct __ec_todo_unpacked {
-			uint8_t led;
-		} get_rgb;
-
-		struct __ec_todo_unpacked {
-			uint8_t enable;
-		} manual_suspend_ctrl;
-
-		struct lightbar_params_v0 set_params_v0;
-		struct lightbar_params_v1 set_params_v1;
-
-		struct lightbar_params_v2_timing set_v2par_timing;
-		struct lightbar_params_v2_tap set_v2par_tap;
-		struct lightbar_params_v2_oscillation set_v2par_osc;
-		struct lightbar_params_v2_brightness set_v2par_bright;
-		struct lightbar_params_v2_thresholds set_v2par_thlds;
-		struct lightbar_params_v2_colors set_v2par_colors;
-
-		struct lightbar_program set_program;
-	};
-} __ec_todo_packed;
-
-struct ec_response_lightbar {
-	union {
-		struct __ec_todo_unpacked {
-			struct __ec_todo_unpacked {
-				uint8_t reg;
-				uint8_t ic0;
-				uint8_t ic1;
-			} vals[23];
-		} dump;
-
-		struct __ec_todo_unpacked {
-			uint8_t num;
-		} get_seq, get_brightness, get_demo;
-
-		struct lightbar_params_v0 get_params_v0;
-		struct lightbar_params_v1 get_params_v1;
-
-
-		struct lightbar_params_v2_timing get_params_v2_timing;
-		struct lightbar_params_v2_tap get_params_v2_tap;
-		struct lightbar_params_v2_oscillation get_params_v2_osc;
-		struct lightbar_params_v2_brightness get_params_v2_bright;
-		struct lightbar_params_v2_thresholds get_params_v2_thlds;
-		struct lightbar_params_v2_colors get_params_v2_colors;
-
-		struct __ec_todo_unpacked {
-			uint32_t num;
-			uint32_t flags;
-		} version;
-
-		struct __ec_todo_unpacked {
-			uint8_t red, green, blue;
-		} get_rgb;
-
-		/*
-		 * The following commands have no response:
-		 *
-		 * off, on, init, set_brightness, seq, reg, set_rgb, demo,
-		 * set_params_v0, set_params_v1, set_program,
-		 * manual_suspend_ctrl, suspend, resume, set_v2par_timing,
-		 * set_v2par_tap, set_v2par_osc, set_v2par_bright,
-		 * set_v2par_thlds, set_v2par_colors
-		 */
-	};
-} __ec_todo_packed;
-
-/* Lightbar commands */
-enum lightbar_command {
-	LIGHTBAR_CMD_DUMP = 0,
-	LIGHTBAR_CMD_OFF = 1,
-	LIGHTBAR_CMD_ON = 2,
-	LIGHTBAR_CMD_INIT = 3,
-	LIGHTBAR_CMD_SET_BRIGHTNESS = 4,
-	LIGHTBAR_CMD_SEQ = 5,
-	LIGHTBAR_CMD_REG = 6,
-	LIGHTBAR_CMD_SET_RGB = 7,
-	LIGHTBAR_CMD_GET_SEQ = 8,
-	LIGHTBAR_CMD_DEMO = 9,
-	LIGHTBAR_CMD_GET_PARAMS_V0 = 10,
-	LIGHTBAR_CMD_SET_PARAMS_V0 = 11,
-	LIGHTBAR_CMD_VERSION = 12,
-	LIGHTBAR_CMD_GET_BRIGHTNESS = 13,
-	LIGHTBAR_CMD_GET_RGB = 14,
-	LIGHTBAR_CMD_GET_DEMO = 15,
-	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
-	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
-	LIGHTBAR_CMD_SET_PROGRAM = 18,
-	LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
-	LIGHTBAR_CMD_SUSPEND = 20,
-	LIGHTBAR_CMD_RESUME = 21,
-	LIGHTBAR_CMD_GET_PARAMS_V2_TIMING = 22,
-	LIGHTBAR_CMD_SET_PARAMS_V2_TIMING = 23,
-	LIGHTBAR_CMD_GET_PARAMS_V2_TAP = 24,
-	LIGHTBAR_CMD_SET_PARAMS_V2_TAP = 25,
-	LIGHTBAR_CMD_GET_PARAMS_V2_OSCILLATION = 26,
-	LIGHTBAR_CMD_SET_PARAMS_V2_OSCILLATION = 27,
-	LIGHTBAR_CMD_GET_PARAMS_V2_BRIGHTNESS = 28,
-	LIGHTBAR_CMD_SET_PARAMS_V2_BRIGHTNESS = 29,
-	LIGHTBAR_CMD_GET_PARAMS_V2_THRESHOLDS = 30,
-	LIGHTBAR_CMD_SET_PARAMS_V2_THRESHOLDS = 31,
-	LIGHTBAR_CMD_GET_PARAMS_V2_COLORS = 32,
-	LIGHTBAR_CMD_SET_PARAMS_V2_COLORS = 33,
-	LIGHTBAR_NUM_CMDS
-};
-
-/*****************************************************************************/
-/* LED control commands */
-
-#define EC_CMD_LED_CONTROL 0x0029
-
-enum ec_led_id {
-	/* LED to indicate battery state of charge */
-	EC_LED_ID_BATTERY_LED = 0,
-	/*
-	 * LED to indicate system power state (on or in suspend).
-	 * May be on power button or on C-panel.
-	 */
-	EC_LED_ID_POWER_LED,
-	/* LED on power adapter or its plug */
-	EC_LED_ID_ADAPTER_LED,
-	/* LED to indicate left side */
-	EC_LED_ID_LEFT_LED,
-	/* LED to indicate right side */
-	EC_LED_ID_RIGHT_LED,
-	/* LED to indicate recovery mode with HW_REINIT */
-	EC_LED_ID_RECOVERY_HW_REINIT_LED,
-	/* LED to indicate sysrq debug mode. */
-	EC_LED_ID_SYSRQ_DEBUG_LED,
-
-	EC_LED_ID_COUNT
-};
-
-/* LED control flags */
-#define EC_LED_FLAGS_QUERY BIT(0) /* Query LED capability only */
-#define EC_LED_FLAGS_AUTO  BIT(1) /* Switch LED back to automatic control */
-
-enum ec_led_colors {
-	EC_LED_COLOR_RED = 0,
-	EC_LED_COLOR_GREEN,
-	EC_LED_COLOR_BLUE,
-	EC_LED_COLOR_YELLOW,
-	EC_LED_COLOR_WHITE,
-	EC_LED_COLOR_AMBER,
-
-	EC_LED_COLOR_COUNT
-};
-
-struct ec_params_led_control {
-	uint8_t led_id;     /* Which LED to control */
-	uint8_t flags;      /* Control flags */
-
-	uint8_t brightness[EC_LED_COLOR_COUNT];
-} __ec_align1;
-
-struct ec_response_led_control {
-	/*
-	 * Available brightness value range.
-	 *
-	 * Range 0 means color channel not present.
-	 * Range 1 means on/off control.
-	 * Other values means the LED is control by PWM.
-	 */
-	uint8_t brightness_range[EC_LED_COLOR_COUNT];
-} __ec_align1;
-
-/*****************************************************************************/
-/* Verified boot commands */
-
-/*
- * Note: command code 0x29 version 0 was VBOOT_CMD in Link EVT; it may be
- * reused for other purposes with version > 0.
- */
-
-/* Verified boot hash command */
-#define EC_CMD_VBOOT_HASH 0x002A
-
-struct ec_params_vboot_hash {
-	uint8_t cmd;             /* enum ec_vboot_hash_cmd */
-	uint8_t hash_type;       /* enum ec_vboot_hash_type */
-	uint8_t nonce_size;      /* Nonce size; may be 0 */
-	uint8_t reserved0;       /* Reserved; set 0 */
-	uint32_t offset;         /* Offset in flash to hash */
-	uint32_t size;           /* Number of bytes to hash */
-	uint8_t nonce_data[64];  /* Nonce data; ignored if nonce_size=0 */
-} __ec_align4;
-
-struct ec_response_vboot_hash {
-	uint8_t status;          /* enum ec_vboot_hash_status */
-	uint8_t hash_type;       /* enum ec_vboot_hash_type */
-	uint8_t digest_size;     /* Size of hash digest in bytes */
-	uint8_t reserved0;       /* Ignore; will be 0 */
-	uint32_t offset;         /* Offset in flash which was hashed */
-	uint32_t size;           /* Number of bytes hashed */
-	uint8_t hash_digest[64]; /* Hash digest data */
-} __ec_align4;
-
-enum ec_vboot_hash_cmd {
-	EC_VBOOT_HASH_GET = 0,       /* Get current hash status */
-	EC_VBOOT_HASH_ABORT = 1,     /* Abort calculating current hash */
-	EC_VBOOT_HASH_START = 2,     /* Start computing a new hash */
-	EC_VBOOT_HASH_RECALC = 3,    /* Synchronously compute a new hash */
-};
-
-enum ec_vboot_hash_type {
-	EC_VBOOT_HASH_TYPE_SHA256 = 0, /* SHA-256 */
-};
-
-enum ec_vboot_hash_status {
-	EC_VBOOT_HASH_STATUS_NONE = 0, /* No hash (not started, or aborted) */
-	EC_VBOOT_HASH_STATUS_DONE = 1, /* Finished computing a hash */
-	EC_VBOOT_HASH_STATUS_BUSY = 2, /* Busy computing a hash */
-};
-
-/*
- * Special values for offset for EC_VBOOT_HASH_START and EC_VBOOT_HASH_RECALC.
- * If one of these is specified, the EC will automatically update offset and
- * size to the correct values for the specified image (RO or RW).
- */
-#define EC_VBOOT_HASH_OFFSET_RO		0xfffffffe
-#define EC_VBOOT_HASH_OFFSET_ACTIVE	0xfffffffd
-#define EC_VBOOT_HASH_OFFSET_UPDATE	0xfffffffc
-
-/*
- * 'RW' is vague if there are multiple RW images; we mean the active one,
- * so the old constant is deprecated.
- */
-#define EC_VBOOT_HASH_OFFSET_RW EC_VBOOT_HASH_OFFSET_ACTIVE
-
-/*****************************************************************************/
-/*
- * Motion sense commands. We'll make separate structs for sub-commands with
- * different input args, so that we know how much to expect.
- */
-#define EC_CMD_MOTION_SENSE_CMD 0x002B
-
-/* Motion sense commands */
-enum motionsense_command {
-	/*
-	 * Dump command returns all motion sensor data including motion sense
-	 * module flags and individual sensor flags.
-	 */
-	MOTIONSENSE_CMD_DUMP = 0,
-
-	/*
-	 * Info command returns data describing the details of a given sensor,
-	 * including enum motionsensor_type, enum motionsensor_location, and
-	 * enum motionsensor_chip.
-	 */
-	MOTIONSENSE_CMD_INFO = 1,
-
-	/*
-	 * EC Rate command is a setter/getter command for the EC sampling rate
-	 * in milliseconds.
-	 * It is per sensor, the EC run sample task  at the minimum of all
-	 * sensors EC_RATE.
-	 * For sensors without hardware FIFO, EC_RATE should be equals to 1/ODR
-	 * to collect all the sensor samples.
-	 * For sensor with hardware FIFO, EC_RATE is used as the maximal delay
-	 * to process of all motion sensors in milliseconds.
-	 */
-	MOTIONSENSE_CMD_EC_RATE = 2,
-
-	/*
-	 * Sensor ODR command is a setter/getter command for the output data
-	 * rate of a specific motion sensor in millihertz.
-	 */
-	MOTIONSENSE_CMD_SENSOR_ODR = 3,
-
-	/*
-	 * Sensor range command is a setter/getter command for the range of
-	 * a specified motion sensor in +/-G's or +/- deg/s.
-	 */
-	MOTIONSENSE_CMD_SENSOR_RANGE = 4,
-
-	/*
-	 * Setter/getter command for the keyboard wake angle. When the lid
-	 * angle is greater than this value, keyboard wake is disabled in S3,
-	 * and when the lid angle goes less than this value, keyboard wake is
-	 * enabled. Note, the lid angle measurement is an approximate,
-	 * un-calibrated value, hence the wake angle isn't exact.
-	 */
-	MOTIONSENSE_CMD_KB_WAKE_ANGLE = 5,
-
-	/*
-	 * Returns a single sensor data.
-	 */
-	MOTIONSENSE_CMD_DATA = 6,
-
-	/*
-	 * Return sensor fifo info.
-	 */
-	MOTIONSENSE_CMD_FIFO_INFO = 7,
-
-	/*
-	 * Insert a flush element in the fifo and return sensor fifo info.
-	 * The host can use that element to synchronize its operation.
-	 */
-	MOTIONSENSE_CMD_FIFO_FLUSH = 8,
-
-	/*
-	 * Return a portion of the fifo.
-	 */
-	MOTIONSENSE_CMD_FIFO_READ = 9,
-
-	/*
-	 * Perform low level calibration.
-	 * On sensors that support it, ask to do offset calibration.
-	 */
-	MOTIONSENSE_CMD_PERFORM_CALIB = 10,
-
-	/*
-	 * Sensor Offset command is a setter/getter command for the offset
-	 * used for calibration.
-	 * The offsets can be calculated by the host, or via
-	 * PERFORM_CALIB command.
-	 */
-	MOTIONSENSE_CMD_SENSOR_OFFSET = 11,
-
-	/*
-	 * List available activities for a MOTION sensor.
-	 * Indicates if they are enabled or disabled.
-	 */
-	MOTIONSENSE_CMD_LIST_ACTIVITIES = 12,
-
-	/*
-	 * Activity management
-	 * Enable/Disable activity recognition.
-	 */
-	MOTIONSENSE_CMD_SET_ACTIVITY = 13,
-
-	/*
-	 * Lid Angle
-	 */
-	MOTIONSENSE_CMD_LID_ANGLE = 14,
-
-	/*
-	 * Allow the FIFO to trigger interrupt via MKBP events.
-	 * By default the FIFO does not send interrupt to process the FIFO
-	 * until the AP is ready or it is coming from a wakeup sensor.
-	 */
-	MOTIONSENSE_CMD_FIFO_INT_ENABLE = 15,
-
-	/*
-	 * Spoof the readings of the sensors.  The spoofed readings can be set
-	 * to arbitrary values, or will lock to the last read actual values.
-	 */
-	MOTIONSENSE_CMD_SPOOF = 16,
-
-	/* Set lid angle for tablet mode detection. */
-	MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE = 17,
-
-	/*
-	 * Sensor Scale command is a setter/getter command for the calibration
-	 * scale.
-	 */
-	MOTIONSENSE_CMD_SENSOR_SCALE = 18,
-
-	/* Number of motionsense sub-commands. */
-	MOTIONSENSE_NUM_CMDS
-};
-
-/* List of motion sensor types. */
-enum motionsensor_type {
-	MOTIONSENSE_TYPE_ACCEL = 0,
-	MOTIONSENSE_TYPE_GYRO = 1,
-	MOTIONSENSE_TYPE_MAG = 2,
-	MOTIONSENSE_TYPE_PROX = 3,
-	MOTIONSENSE_TYPE_LIGHT = 4,
-	MOTIONSENSE_TYPE_ACTIVITY = 5,
-	MOTIONSENSE_TYPE_BARO = 6,
-	MOTIONSENSE_TYPE_SYNC = 7,
-	MOTIONSENSE_TYPE_MAX,
-};
-
-/* List of motion sensor locations. */
-enum motionsensor_location {
-	MOTIONSENSE_LOC_BASE = 0,
-	MOTIONSENSE_LOC_LID = 1,
-	MOTIONSENSE_LOC_CAMERA = 2,
-	MOTIONSENSE_LOC_MAX,
-};
-
-/* List of motion sensor chips. */
-enum motionsensor_chip {
-	MOTIONSENSE_CHIP_KXCJ9 = 0,
-	MOTIONSENSE_CHIP_LSM6DS0 = 1,
-	MOTIONSENSE_CHIP_BMI160 = 2,
-	MOTIONSENSE_CHIP_SI1141 = 3,
-	MOTIONSENSE_CHIP_SI1142 = 4,
-	MOTIONSENSE_CHIP_SI1143 = 5,
-	MOTIONSENSE_CHIP_KX022 = 6,
-	MOTIONSENSE_CHIP_L3GD20H = 7,
-	MOTIONSENSE_CHIP_BMA255 = 8,
-	MOTIONSENSE_CHIP_BMP280 = 9,
-	MOTIONSENSE_CHIP_OPT3001 = 10,
-	MOTIONSENSE_CHIP_BH1730 = 11,
-	MOTIONSENSE_CHIP_GPIO = 12,
-	MOTIONSENSE_CHIP_LIS2DH = 13,
-	MOTIONSENSE_CHIP_LSM6DSM = 14,
-	MOTIONSENSE_CHIP_LIS2DE = 15,
-	MOTIONSENSE_CHIP_LIS2MDL = 16,
-	MOTIONSENSE_CHIP_LSM6DS3 = 17,
-	MOTIONSENSE_CHIP_LSM6DSO = 18,
-	MOTIONSENSE_CHIP_LNG2DM = 19,
-	MOTIONSENSE_CHIP_MAX,
-};
-
-/* List of orientation positions */
-enum motionsensor_orientation {
-	MOTIONSENSE_ORIENTATION_LANDSCAPE = 0,
-	MOTIONSENSE_ORIENTATION_PORTRAIT = 1,
-	MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_PORTRAIT = 2,
-	MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_LANDSCAPE = 3,
-	MOTIONSENSE_ORIENTATION_UNKNOWN = 4,
-};
-
-struct ec_response_motion_sensor_data {
-	/* Flags for each sensor. */
-	uint8_t flags;
-	/* Sensor number the data comes from. */
-	uint8_t sensor_num;
-	/* Each sensor is up to 3-axis. */
-	union {
-		int16_t             data[3];
-		struct __ec_todo_packed {
-			uint16_t    reserved;
-			uint32_t    timestamp;
-		};
-		struct __ec_todo_unpacked {
-			uint8_t     activity; /* motionsensor_activity */
-			uint8_t     state;
-			int16_t     add_info[2];
-		};
-	};
-} __ec_todo_packed;
-
-/* Note: used in ec_response_get_next_data */
-struct ec_response_motion_sense_fifo_info {
-	/* Size of the fifo */
-	uint16_t size;
-	/* Amount of space used in the fifo */
-	uint16_t count;
-	/* Timestamp recorded in us.
-	 * aka accurate timestamp when host event was triggered.
-	 */
-	uint32_t timestamp;
-	/* Total amount of vector lost */
-	uint16_t total_lost;
-	/* Lost events since the last fifo_info, per sensors */
-	uint16_t lost[0];
-} __ec_todo_packed;
-
-struct ec_response_motion_sense_fifo_data {
-	uint32_t number_data;
-	struct ec_response_motion_sensor_data data[0];
-} __ec_todo_packed;
-
-/* List supported activity recognition */
-enum motionsensor_activity {
-	MOTIONSENSE_ACTIVITY_RESERVED = 0,
-	MOTIONSENSE_ACTIVITY_SIG_MOTION = 1,
-	MOTIONSENSE_ACTIVITY_DOUBLE_TAP = 2,
-	MOTIONSENSE_ACTIVITY_ORIENTATION = 3,
-};
-
-struct ec_motion_sense_activity {
-	uint8_t sensor_num;
-	uint8_t activity; /* one of enum motionsensor_activity */
-	uint8_t enable;   /* 1: enable, 0: disable */
-	uint8_t reserved;
-	uint16_t parameters[3]; /* activity dependent parameters */
-} __ec_todo_unpacked;
-
-/* Module flag masks used for the dump sub-command. */
-#define MOTIONSENSE_MODULE_FLAG_ACTIVE BIT(0)
-
-/* Sensor flag masks used for the dump sub-command. */
-#define MOTIONSENSE_SENSOR_FLAG_PRESENT BIT(0)
-
-/*
- * Flush entry for synchronization.
- * data contains time stamp
- */
-#define MOTIONSENSE_SENSOR_FLAG_FLUSH BIT(0)
-#define MOTIONSENSE_SENSOR_FLAG_TIMESTAMP BIT(1)
-#define MOTIONSENSE_SENSOR_FLAG_WAKEUP BIT(2)
-#define MOTIONSENSE_SENSOR_FLAG_TABLET_MODE BIT(3)
-#define MOTIONSENSE_SENSOR_FLAG_ODR BIT(4)
-
-/*
- * Send this value for the data element to only perform a read. If you
- * send any other value, the EC will interpret it as data to set and will
- * return the actual value set.
- */
-#define EC_MOTION_SENSE_NO_VALUE -1
-
-#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000
-
-/* MOTIONSENSE_CMD_SENSOR_OFFSET subcommand flag */
-/* Set Calibration information */
-#define MOTION_SENSE_SET_OFFSET BIT(0)
-
-/* Default Scale value, factor 1. */
-#define MOTION_SENSE_DEFAULT_SCALE BIT(15)
-
-#define LID_ANGLE_UNRELIABLE 500
-
-enum motionsense_spoof_mode {
-	/* Disable spoof mode. */
-	MOTIONSENSE_SPOOF_MODE_DISABLE = 0,
-
-	/* Enable spoof mode, but use provided component values. */
-	MOTIONSENSE_SPOOF_MODE_CUSTOM,
-
-	/* Enable spoof mode, but use the current sensor values. */
-	MOTIONSENSE_SPOOF_MODE_LOCK_CURRENT,
-
-	/* Query the current spoof mode status for the sensor. */
-	MOTIONSENSE_SPOOF_MODE_QUERY,
-};
-
-struct ec_params_motion_sense {
-	uint8_t cmd;
-	union {
-		/* Used for MOTIONSENSE_CMD_DUMP. */
-		struct __ec_todo_unpacked {
-			/*
-			 * Maximal number of sensor the host is expecting.
-			 * 0 means the host is only interested in the number
-			 * of sensors controlled by the EC.
-			 */
-			uint8_t max_sensor_count;
-		} dump;
-
-		/*
-		 * Used for MOTIONSENSE_CMD_KB_WAKE_ANGLE.
-		 */
-		struct __ec_todo_unpacked {
-			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read.
-			 * kb_wake_angle: angle to wakup AP.
-			 */
-			int16_t data;
-		} kb_wake_angle;
-
-		/*
-		 * Used for MOTIONSENSE_CMD_INFO, MOTIONSENSE_CMD_DATA
-		 * and MOTIONSENSE_CMD_PERFORM_CALIB.
-		 */
-		struct __ec_todo_unpacked {
-			uint8_t sensor_num;
-		} info, info_3, data, fifo_flush, perform_calib,
-				list_activities;
-
-		/*
-		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR
-		 * and MOTIONSENSE_CMD_SENSOR_RANGE.
-		 */
-		struct __ec_todo_unpacked {
-			uint8_t sensor_num;
-
-			/* Rounding flag, true for round-up, false for down. */
-			uint8_t roundup;
-
-			uint16_t reserved;
-
-			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */
-			int32_t data;
-		} ec_rate, sensor_odr, sensor_range;
-
-		/* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */
-		struct __ec_todo_packed {
-			uint8_t sensor_num;
-
-			/*
-			 * bit 0: If set (MOTION_SENSE_SET_OFFSET), set
-			 * the calibration information in the EC.
-			 * If unset, just retrieve calibration information.
-			 */
-			uint16_t flags;
-
-			/*
-			 * Temperature at calibration, in units of 0.01 C
-			 * 0x8000: invalid / unknown.
-			 * 0x0: 0C
-			 * 0x7fff: +327.67C
-			 */
-			int16_t temp;
-
-			/*
-			 * Offset for calibration.
-			 * Unit:
-			 * Accelerometer: 1/1024 g
-			 * Gyro:          1/1024 deg/s
-			 * Compass:       1/16 uT
-			 */
-			int16_t offset[3];
-		} sensor_offset;
-
-		/* Used for MOTIONSENSE_CMD_SENSOR_SCALE */
-		struct __ec_todo_packed {
-			uint8_t sensor_num;
-
-			/*
-			 * bit 0: If set (MOTION_SENSE_SET_OFFSET), set
-			 * the calibration information in the EC.
-			 * If unset, just retrieve calibration information.
-			 */
-			uint16_t flags;
-
-			/*
-			 * Temperature at calibration, in units of 0.01 C
-			 * 0x8000: invalid / unknown.
-			 * 0x0: 0C
-			 * 0x7fff: +327.67C
-			 */
-			int16_t temp;
-
-			/*
-			 * Scale for calibration:
-			 * By default scale is 1, it is encoded on 16bits:
-			 * 1 = BIT(15)
-			 * ~2 = 0xFFFF
-			 * ~0 = 0.
-			 */
-			uint16_t scale[3];
-		} sensor_scale;
-
-
-		/* Used for MOTIONSENSE_CMD_FIFO_INFO */
-		/* (no params) */
-
-		/* Used for MOTIONSENSE_CMD_FIFO_READ */
-		struct __ec_todo_unpacked {
-			/*
-			 * Number of expected vector to return.
-			 * EC may return less or 0 if none available.
-			 */
-			uint32_t max_data_vector;
-		} fifo_read;
-
-		struct ec_motion_sense_activity set_activity;
-
-		/* Used for MOTIONSENSE_CMD_LID_ANGLE */
-		/* (no params) */
-
-		/* Used for MOTIONSENSE_CMD_FIFO_INT_ENABLE */
-		struct __ec_todo_unpacked {
-			/*
-			 * 1: enable, 0 disable fifo,
-			 * EC_MOTION_SENSE_NO_VALUE return value.
-			 */
-			int8_t enable;
-		} fifo_int_enable;
-
-		/* Used for MOTIONSENSE_CMD_SPOOF */
-		struct __ec_todo_packed {
-			uint8_t sensor_id;
-
-			/* See enum motionsense_spoof_mode. */
-			uint8_t spoof_enable;
-
-			/* Ignored, used for alignment. */
-			uint8_t reserved;
-
-			/* Individual component values to spoof. */
-			int16_t components[3];
-		} spoof;
-
-		/* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */
-		struct __ec_todo_unpacked {
-			/*
-			 * Lid angle threshold for switching between tablet and
-			 * clamshell mode.
-			 */
-			int16_t lid_angle;
-
-			/*
-			 * Hysteresis degree to prevent fluctuations between
-			 * clamshell and tablet mode if lid angle keeps
-			 * changing around the threshold. Lid motion driver will
-			 * use lid_angle + hys_degree to trigger tablet mode and
-			 * lid_angle - hys_degree to trigger clamshell mode.
-			 */
-			int16_t hys_degree;
-		} tablet_mode_threshold;
-	};
-} __ec_todo_packed;
-
-struct ec_response_motion_sense {
-	union {
-		/* Used for MOTIONSENSE_CMD_DUMP */
-		struct __ec_todo_unpacked {
-			/* Flags representing the motion sensor module. */
-			uint8_t module_flags;
-
-			/* Number of sensors managed directly by the EC. */
-			uint8_t sensor_count;
-
-			/*
-			 * Sensor data is truncated if response_max is too small
-			 * for holding all the data.
-			 */
-			struct ec_response_motion_sensor_data sensor[0];
-		} dump;
-
-		/* Used for MOTIONSENSE_CMD_INFO. */
-		struct __ec_todo_unpacked {
-			/* Should be element of enum motionsensor_type. */
-			uint8_t type;
-
-			/* Should be element of enum motionsensor_location. */
-			uint8_t location;
-
-			/* Should be element of enum motionsensor_chip. */
-			uint8_t chip;
-		} info;
-
-		/* Used for MOTIONSENSE_CMD_INFO version 3 */
-		struct __ec_todo_unpacked {
-			/* Should be element of enum motionsensor_type. */
-			uint8_t type;
-
-			/* Should be element of enum motionsensor_location. */
-			uint8_t location;
-
-			/* Should be element of enum motionsensor_chip. */
-			uint8_t chip;
-
-			/* Minimum sensor sampling frequency */
-			uint32_t min_frequency;
-
-			/* Maximum sensor sampling frequency */
-			uint32_t max_frequency;
-
-			/* Max number of sensor events that could be in fifo */
-			uint32_t fifo_max_event_count;
-		} info_3;
-
-		/* Used for MOTIONSENSE_CMD_DATA */
-		struct ec_response_motion_sensor_data data;
-
-		/*
-		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR,
-		 * MOTIONSENSE_CMD_SENSOR_RANGE,
-		 * MOTIONSENSE_CMD_KB_WAKE_ANGLE,
-		 * MOTIONSENSE_CMD_FIFO_INT_ENABLE and
-		 * MOTIONSENSE_CMD_SPOOF.
-		 */
-		struct __ec_todo_unpacked {
-			/* Current value of the parameter queried. */
-			int32_t ret;
-		} ec_rate, sensor_odr, sensor_range, kb_wake_angle,
-		  fifo_int_enable, spoof;
-
-		/*
-		 * Used for MOTIONSENSE_CMD_SENSOR_OFFSET,
-		 * PERFORM_CALIB.
-		 */
-		struct __ec_todo_unpacked  {
-			int16_t temp;
-			int16_t offset[3];
-		} sensor_offset, perform_calib;
-
-		/* Used for MOTIONSENSE_CMD_SENSOR_SCALE */
-		struct __ec_todo_unpacked  {
-			int16_t temp;
-			uint16_t scale[3];
-		} sensor_scale;
-
-		struct ec_response_motion_sense_fifo_info fifo_info, fifo_flush;
-
-		struct ec_response_motion_sense_fifo_data fifo_read;
-
-		struct __ec_todo_packed {
-			uint16_t reserved;
-			uint32_t enabled;
-			uint32_t disabled;
-		} list_activities;
-
-		/* No params for set activity */
-
-		/* Used for MOTIONSENSE_CMD_LID_ANGLE */
-		struct __ec_todo_unpacked {
-			/*
-			 * Angle between 0 and 360 degree if available,
-			 * LID_ANGLE_UNRELIABLE otherwise.
-			 */
-			uint16_t value;
-		} lid_angle;
-
-		/* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */
-		struct __ec_todo_unpacked {
-			/*
-			 * Lid angle threshold for switching between tablet and
-			 * clamshell mode.
-			 */
-			uint16_t lid_angle;
-
-			/* Hysteresis degree. */
-			uint16_t hys_degree;
-		} tablet_mode_threshold;
-
-	};
-} __ec_todo_packed;
-
-/*****************************************************************************/
-/* Force lid open command */
-
-/* Make lid event always open */
-#define EC_CMD_FORCE_LID_OPEN 0x002C
-
-struct ec_params_force_lid_open {
-	uint8_t enabled;
-} __ec_align1;
-
-/*****************************************************************************/
-/* Configure the behavior of the power button */
-#define EC_CMD_CONFIG_POWER_BUTTON 0x002D
-
-enum ec_config_power_button_flags {
-	/* Enable/Disable power button pulses for x86 devices */
-	EC_POWER_BUTTON_ENABLE_PULSE = BIT(0),
-};
-
-struct ec_params_config_power_button {
-	/* See enum ec_config_power_button_flags */
-	uint8_t flags;
-} __ec_align1;
-
-/*****************************************************************************/
-/* USB charging control commands */
-
-/* Set USB port charging mode */
-#define EC_CMD_USB_CHARGE_SET_MODE 0x0030
-
-struct ec_params_usb_charge_set_mode {
-	uint8_t usb_port_id;
-	uint8_t mode:7;
-	uint8_t inhibit_charge:1;
-} __ec_align1;
-
-/*****************************************************************************/
-/* Persistent storage for host */
-
-/* Maximum bytes that can be read/written in a single command */
-#define EC_PSTORE_SIZE_MAX 64
-
-/* Get persistent storage info */
-#define EC_CMD_PSTORE_INFO 0x0040
-
-struct ec_response_pstore_info {
-	/* Persistent storage size, in bytes */
-	uint32_t pstore_size;
-	/* Access size; read/write offset and size must be a multiple of this */
-	uint32_t access_size;
-} __ec_align4;
-
-/*
- * Read persistent storage
- *
- * Response is params.size bytes of data.
- */
-#define EC_CMD_PSTORE_READ 0x0041
-
-struct ec_params_pstore_read {
-	uint32_t offset;   /* Byte offset to read */
-	uint32_t size;     /* Size to read in bytes */
-} __ec_align4;
-
-/* Write persistent storage */
-#define EC_CMD_PSTORE_WRITE 0x0042
-
-struct ec_params_pstore_write {
-	uint32_t offset;   /* Byte offset to write */
-	uint32_t size;     /* Size to write in bytes */
-	uint8_t data[EC_PSTORE_SIZE_MAX];
-} __ec_align4;
-
-/*****************************************************************************/
-/* Real-time clock */
-
-/* RTC params and response structures */
-struct ec_params_rtc {
-	uint32_t time;
-} __ec_align4;
-
-struct ec_response_rtc {
-	uint32_t time;
-} __ec_align4;
-
-/* These use ec_response_rtc */
-#define EC_CMD_RTC_GET_VALUE 0x0044
-#define EC_CMD_RTC_GET_ALARM 0x0045
-
-/* These all use ec_params_rtc */
-#define EC_CMD_RTC_SET_VALUE 0x0046
-#define EC_CMD_RTC_SET_ALARM 0x0047
-
-/* Pass as time param to SET_ALARM to clear the current alarm */
-#define EC_RTC_ALARM_CLEAR 0
-
-/*****************************************************************************/
-/* Port80 log access */
-
-/* Maximum entries that can be read/written in a single command */
-#define EC_PORT80_SIZE_MAX 32
-
-/* Get last port80 code from previous boot */
-#define EC_CMD_PORT80_LAST_BOOT 0x0048
-#define EC_CMD_PORT80_READ 0x0048
-
-enum ec_port80_subcmd {
-	EC_PORT80_GET_INFO = 0,
-	EC_PORT80_READ_BUFFER,
-};
-
-struct ec_params_port80_read {
-	uint16_t subcmd;
-	union {
-		struct __ec_todo_unpacked {
-			uint32_t offset;
-			uint32_t num_entries;
-		} read_buffer;
-	};
-} __ec_todo_packed;
-
-struct ec_response_port80_read {
-	union {
-		struct __ec_todo_unpacked {
-			uint32_t writes;
-			uint32_t history_size;
-			uint32_t last_boot;
-		} get_info;
-		struct __ec_todo_unpacked {
-			uint16_t codes[EC_PORT80_SIZE_MAX];
-		} data;
-	};
-} __ec_todo_packed;
-
-struct ec_response_port80_last_boot {
-	uint16_t code;
-} __ec_align2;
-
-/*****************************************************************************/
-/* Temporary secure storage for host verified boot use */
-
-/* Number of bytes in a vstore slot */
-#define EC_VSTORE_SLOT_SIZE 64
-
-/* Maximum number of vstore slots */
-#define EC_VSTORE_SLOT_MAX 32
-
-/* Get persistent storage info */
-#define EC_CMD_VSTORE_INFO 0x0049
-struct ec_response_vstore_info {
-	/* Indicates which slots are locked */
-	uint32_t slot_locked;
-	/* Total number of slots available */
-	uint8_t slot_count;
-} __ec_align_size1;
-
-/*
- * Read temporary secure storage
- *
- * Response is EC_VSTORE_SLOT_SIZE bytes of data.
- */
-#define EC_CMD_VSTORE_READ 0x004A
-
-struct ec_params_vstore_read {
-	uint8_t slot; /* Slot to read from */
-} __ec_align1;
-
-struct ec_response_vstore_read {
-	uint8_t data[EC_VSTORE_SLOT_SIZE];
-} __ec_align1;
-
-/*
- * Write temporary secure storage and lock it.
- */
-#define EC_CMD_VSTORE_WRITE 0x004B
-
-struct ec_params_vstore_write {
-	uint8_t slot; /* Slot to write to */
-	uint8_t data[EC_VSTORE_SLOT_SIZE];
-} __ec_align1;
-
-/*****************************************************************************/
-/* Thermal engine commands. Note that there are two implementations. We'll
- * reuse the command number, but the data and behavior is incompatible.
- * Version 0 is what originally shipped on Link.
- * Version 1 separates the CPU thermal limits from the fan control.
- */
-
-#define EC_CMD_THERMAL_SET_THRESHOLD 0x0050
-#define EC_CMD_THERMAL_GET_THRESHOLD 0x0051
-
-/* The version 0 structs are opaque. You have to know what they are for
- * the get/set commands to make any sense.
- */
-
-/* Version 0 - set */
-struct ec_params_thermal_set_threshold {
-	uint8_t sensor_type;
-	uint8_t threshold_id;
-	uint16_t value;
-} __ec_align2;
-
-/* Version 0 - get */
-struct ec_params_thermal_get_threshold {
-	uint8_t sensor_type;
-	uint8_t threshold_id;
-} __ec_align1;
-
-struct ec_response_thermal_get_threshold {
-	uint16_t value;
-} __ec_align2;
-
-
-/* The version 1 structs are visible. */
-enum ec_temp_thresholds {
-	EC_TEMP_THRESH_WARN = 0,
-	EC_TEMP_THRESH_HIGH,
-	EC_TEMP_THRESH_HALT,
-
-	EC_TEMP_THRESH_COUNT
-};
-
-/*
- * Thermal configuration for one temperature sensor. Temps are in degrees K.
- * Zero values will be silently ignored by the thermal task.
- *
- * Set 'temp_host' value allows thermal task to trigger some event with 1 degree
- * hysteresis.
- * For example,
- *	temp_host[EC_TEMP_THRESH_HIGH] = 300 K
- *	temp_host_release[EC_TEMP_THRESH_HIGH] = 0 K
- * EC will throttle ap when temperature >= 301 K, and release throttling when
- * temperature <= 299 K.
- *
- * Set 'temp_host_release' value allows thermal task has a custom hysteresis.
- * For example,
- *	temp_host[EC_TEMP_THRESH_HIGH] = 300 K
- *	temp_host_release[EC_TEMP_THRESH_HIGH] = 295 K
- * EC will throttle ap when temperature >= 301 K, and release throttling when
- * temperature <= 294 K.
- *
- * Note that this structure is a sub-structure of
- * ec_params_thermal_set_threshold_v1, but maintains its alignment there.
- */
-struct ec_thermal_config {
-	uint32_t temp_host[EC_TEMP_THRESH_COUNT]; /* levels of hotness */
-	uint32_t temp_host_release[EC_TEMP_THRESH_COUNT]; /* release levels */
-	uint32_t temp_fan_off;		/* no active cooling needed */
-	uint32_t temp_fan_max;		/* max active cooling needed */
-} __ec_align4;
-
-/* Version 1 - get config for one sensor. */
-struct ec_params_thermal_get_threshold_v1 {
-	uint32_t sensor_num;
-} __ec_align4;
-/* This returns a struct ec_thermal_config */
-
-/*
- * Version 1 - set config for one sensor.
- * Use read-modify-write for best results!
- */
-struct ec_params_thermal_set_threshold_v1 {
-	uint32_t sensor_num;
-	struct ec_thermal_config cfg;
-} __ec_align4;
-/* This returns no data */
-
-/****************************************************************************/
-
-/* Toggle automatic fan control */
-#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052
-
-/* Version 1 of input params */
-struct ec_params_auto_fan_ctrl_v1 {
-	uint8_t fan_idx;
-} __ec_align1;
-
-/* Get/Set TMP006 calibration data */
-#define EC_CMD_TMP006_GET_CALIBRATION 0x0053
-#define EC_CMD_TMP006_SET_CALIBRATION 0x0054
-
-/*
- * The original TMP006 calibration only needed four params, but now we need
- * more. Since the algorithm is nothing but magic numbers anyway, we'll leave
- * the params opaque. The v1 "get" response will include the algorithm number
- * and how many params it requires. That way we can change the EC code without
- * needing to update this file. We can also use a different algorithm on each
- * sensor.
- */
-
-/* This is the same struct for both v0 and v1. */
-struct ec_params_tmp006_get_calibration {
-	uint8_t index;
-} __ec_align1;
-
-/* Version 0 */
-struct ec_response_tmp006_get_calibration_v0 {
-	float s0;
-	float b0;
-	float b1;
-	float b2;
-} __ec_align4;
-
-struct ec_params_tmp006_set_calibration_v0 {
-	uint8_t index;
-	uint8_t reserved[3];
-	float s0;
-	float b0;
-	float b1;
-	float b2;
-} __ec_align4;
-
-/* Version 1 */
-struct ec_response_tmp006_get_calibration_v1 {
-	uint8_t algorithm;
-	uint8_t num_params;
-	uint8_t reserved[2];
-	float val[0];
-} __ec_align4;
-
-struct ec_params_tmp006_set_calibration_v1 {
-	uint8_t index;
-	uint8_t algorithm;
-	uint8_t num_params;
-	uint8_t reserved;
-	float val[0];
-} __ec_align4;
-
-
-/* Read raw TMP006 data */
-#define EC_CMD_TMP006_GET_RAW 0x0055
-
-struct ec_params_tmp006_get_raw {
-	uint8_t index;
-} __ec_align1;
-
-struct ec_response_tmp006_get_raw {
-	int32_t t;  /* In 1/100 K */
-	int32_t v;  /* In nV */
-} __ec_align4;
-
-/*****************************************************************************/
-/* MKBP - Matrix KeyBoard Protocol */
-
-/*
- * Read key state
- *
- * Returns raw data for keyboard cols; see ec_response_mkbp_info.cols for
- * expected response size.
- *
- * NOTE: This has been superseded by EC_CMD_MKBP_GET_NEXT_EVENT.  If you wish
- * to obtain the instantaneous state, use EC_CMD_MKBP_INFO with the type
- * EC_MKBP_INFO_CURRENT and event EC_MKBP_EVENT_KEY_MATRIX.
- */
-#define EC_CMD_MKBP_STATE 0x0060
-
-/*
- * Provide information about various MKBP things.  See enum ec_mkbp_info_type.
- */
-#define EC_CMD_MKBP_INFO 0x0061
-
-struct ec_response_mkbp_info {
-	uint32_t rows;
-	uint32_t cols;
-	/* Formerly "switches", which was 0. */
-	uint8_t reserved;
-} __ec_align_size1;
-
-struct ec_params_mkbp_info {
-	uint8_t info_type;
-	uint8_t event_type;
-} __ec_align1;
-
-enum ec_mkbp_info_type {
-	/*
-	 * Info about the keyboard matrix: number of rows and columns.
-	 *
-	 * Returns struct ec_response_mkbp_info.
-	 */
-	EC_MKBP_INFO_KBD = 0,
-
-	/*
-	 * For buttons and switches, info about which specifically are
-	 * supported.  event_type must be set to one of the values in enum
-	 * ec_mkbp_event.
-	 *
-	 * For EC_MKBP_EVENT_BUTTON and EC_MKBP_EVENT_SWITCH, returns a 4 byte
-	 * bitmask indicating which buttons or switches are present.  See the
-	 * bit inidices below.
-	 */
-	EC_MKBP_INFO_SUPPORTED = 1,
-
-	/*
-	 * Instantaneous state of buttons and switches.
-	 *
-	 * event_type must be set to one of the values in enum ec_mkbp_event.
-	 *
-	 * For EC_MKBP_EVENT_KEY_MATRIX, returns uint8_t key_matrix[13]
-	 * indicating the current state of the keyboard matrix.
-	 *
-	 * For EC_MKBP_EVENT_HOST_EVENT, return uint32_t host_event, the raw
-	 * event state.
-	 *
-	 * For EC_MKBP_EVENT_BUTTON, returns uint32_t buttons, indicating the
-	 * state of supported buttons.
-	 *
-	 * For EC_MKBP_EVENT_SWITCH, returns uint32_t switches, indicating the
-	 * state of supported switches.
-	 */
-	EC_MKBP_INFO_CURRENT = 2,
-};
-
-/* Simulate key press */
-#define EC_CMD_MKBP_SIMULATE_KEY 0x0062
-
-struct ec_params_mkbp_simulate_key {
-	uint8_t col;
-	uint8_t row;
-	uint8_t pressed;
-} __ec_align1;
-
-#define EC_CMD_GET_KEYBOARD_ID 0x0063
-
-struct ec_response_keyboard_id {
-	uint32_t keyboard_id;
-} __ec_align4;
-
-enum keyboard_id {
-	KEYBOARD_ID_UNSUPPORTED = 0,
-	KEYBOARD_ID_UNREADABLE = 0xffffffff,
-};
-
-/* Configure keyboard scanning */
-#define EC_CMD_MKBP_SET_CONFIG 0x0064
-#define EC_CMD_MKBP_GET_CONFIG 0x0065
-
-/* flags */
-enum mkbp_config_flags {
-	EC_MKBP_FLAGS_ENABLE = 1,	/* Enable keyboard scanning */
-};
-
-enum mkbp_config_valid {
-	EC_MKBP_VALID_SCAN_PERIOD		= BIT(0),
-	EC_MKBP_VALID_POLL_TIMEOUT		= BIT(1),
-	EC_MKBP_VALID_MIN_POST_SCAN_DELAY	= BIT(3),
-	EC_MKBP_VALID_OUTPUT_SETTLE		= BIT(4),
-	EC_MKBP_VALID_DEBOUNCE_DOWN		= BIT(5),
-	EC_MKBP_VALID_DEBOUNCE_UP		= BIT(6),
-	EC_MKBP_VALID_FIFO_MAX_DEPTH		= BIT(7),
-};
-
-/*
- * Configuration for our key scanning algorithm.
- *
- * Note that this is used as a sub-structure of
- * ec_{params/response}_mkbp_get_config.
- */
-struct ec_mkbp_config {
-	uint32_t valid_mask;		/* valid fields */
-	uint8_t flags;		/* some flags (enum mkbp_config_flags) */
-	uint8_t valid_flags;		/* which flags are valid */
-	uint16_t scan_period_us;	/* period between start of scans */
-	/* revert to interrupt mode after no activity for this long */
-	uint32_t poll_timeout_us;
-	/*
-	 * minimum post-scan relax time. Once we finish a scan we check
-	 * the time until we are due to start the next one. If this time is
-	 * shorter this field, we use this instead.
-	 */
-	uint16_t min_post_scan_delay_us;
-	/* delay between setting up output and waiting for it to settle */
-	uint16_t output_settle_us;
-	uint16_t debounce_down_us;	/* time for debounce on key down */
-	uint16_t debounce_up_us;	/* time for debounce on key up */
-	/* maximum depth to allow for fifo (0 = no keyscan output) */
-	uint8_t fifo_max_depth;
-} __ec_align_size1;
-
-struct ec_params_mkbp_set_config {
-	struct ec_mkbp_config config;
-} __ec_align_size1;
-
-struct ec_response_mkbp_get_config {
-	struct ec_mkbp_config config;
-} __ec_align_size1;
-
-/* Run the key scan emulation */
-#define EC_CMD_KEYSCAN_SEQ_CTRL 0x0066
-
-enum ec_keyscan_seq_cmd {
-	EC_KEYSCAN_SEQ_STATUS = 0,	/* Get status information */
-	EC_KEYSCAN_SEQ_CLEAR = 1,	/* Clear sequence */
-	EC_KEYSCAN_SEQ_ADD = 2,		/* Add item to sequence */
-	EC_KEYSCAN_SEQ_START = 3,	/* Start running sequence */
-	EC_KEYSCAN_SEQ_COLLECT = 4,	/* Collect sequence summary data */
-};
-
-enum ec_collect_flags {
-	/*
-	 * Indicates this scan was processed by the EC. Due to timing, some
-	 * scans may be skipped.
-	 */
-	EC_KEYSCAN_SEQ_FLAG_DONE	= BIT(0),
-};
-
-struct ec_collect_item {
-	uint8_t flags;		/* some flags (enum ec_collect_flags) */
-} __ec_align1;
-
-struct ec_params_keyscan_seq_ctrl {
-	uint8_t cmd;	/* Command to send (enum ec_keyscan_seq_cmd) */
-	union {
-		struct __ec_align1 {
-			uint8_t active;		/* still active */
-			uint8_t num_items;	/* number of items */
-			/* Current item being presented */
-			uint8_t cur_item;
-		} status;
-		struct __ec_todo_unpacked {
-			/*
-			 * Absolute time for this scan, measured from the
-			 * start of the sequence.
-			 */
-			uint32_t time_us;
-			uint8_t scan[0];	/* keyscan data */
-		} add;
-		struct __ec_align1 {
-			uint8_t start_item;	/* First item to return */
-			uint8_t num_items;	/* Number of items to return */
-		} collect;
-	};
-} __ec_todo_packed;
-
-struct ec_result_keyscan_seq_ctrl {
-	union {
-		struct __ec_todo_unpacked {
-			uint8_t num_items;	/* Number of items */
-			/* Data for each item */
-			struct ec_collect_item item[0];
-		} collect;
-	};
-} __ec_todo_packed;
-
-/*
- * Get the next pending MKBP event.
- *
- * Returns EC_RES_UNAVAILABLE if there is no event pending.
- */
-#define EC_CMD_GET_NEXT_EVENT 0x0067
-
-#define EC_MKBP_HAS_MORE_EVENTS_SHIFT 7
-
-/*
- * We use the most significant bit of the event type to indicate to the host
- * that the EC has more MKBP events available to provide.
- */
-#define EC_MKBP_HAS_MORE_EVENTS BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT)
-
-/* The mask to apply to get the raw event type */
-#define EC_MKBP_EVENT_TYPE_MASK (BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT) - 1)
-
-enum ec_mkbp_event {
-	/* Keyboard matrix changed. The event data is the new matrix state. */
-	EC_MKBP_EVENT_KEY_MATRIX = 0,
-
-	/* New host event. The event data is 4 bytes of host event flags. */
-	EC_MKBP_EVENT_HOST_EVENT = 1,
-
-	/* New Sensor FIFO data. The event data is fifo_info structure. */
-	EC_MKBP_EVENT_SENSOR_FIFO = 2,
-
-	/* The state of the non-matrixed buttons have changed. */
-	EC_MKBP_EVENT_BUTTON = 3,
-
-	/* The state of the switches have changed. */
-	EC_MKBP_EVENT_SWITCH = 4,
-
-	/* New Fingerprint sensor event, the event data is fp_events bitmap. */
-	EC_MKBP_EVENT_FINGERPRINT = 5,
-
-	/*
-	 * Sysrq event: send emulated sysrq. The event data is sysrq,
-	 * corresponding to the key to be pressed.
-	 */
-	EC_MKBP_EVENT_SYSRQ = 6,
-
-	/*
-	 * New 64-bit host event.
-	 * The event data is 8 bytes of host event flags.
-	 */
-	EC_MKBP_EVENT_HOST_EVENT64 = 7,
-
-	/* Notify the AP that something happened on CEC */
-	EC_MKBP_EVENT_CEC_EVENT = 8,
-
-	/* Send an incoming CEC message to the AP */
-	EC_MKBP_EVENT_CEC_MESSAGE = 9,
-
-	/* Number of MKBP events */
-	EC_MKBP_EVENT_COUNT,
-};
-BUILD_ASSERT(EC_MKBP_EVENT_COUNT <= EC_MKBP_EVENT_TYPE_MASK);
-
-union __ec_align_offset1 ec_response_get_next_data {
-	uint8_t key_matrix[13];
-
-	/* Unaligned */
-	uint32_t host_event;
-	uint64_t host_event64;
-
-	struct __ec_todo_unpacked {
-		/* For aligning the fifo_info */
-		uint8_t reserved[3];
-		struct ec_response_motion_sense_fifo_info info;
-	} sensor_fifo;
-
-	uint32_t buttons;
-
-	uint32_t switches;
-
-	uint32_t fp_events;
-
-	uint32_t sysrq;
-
-	/* CEC events from enum mkbp_cec_event */
-	uint32_t cec_events;
-};
-
-union __ec_align_offset1 ec_response_get_next_data_v1 {
-	uint8_t key_matrix[16];
-
-	/* Unaligned */
-	uint32_t host_event;
-	uint64_t host_event64;
-
-	struct __ec_todo_unpacked {
-		/* For aligning the fifo_info */
-		uint8_t reserved[3];
-		struct ec_response_motion_sense_fifo_info info;
-	} sensor_fifo;
-
-	uint32_t buttons;
-
-	uint32_t switches;
-
-	uint32_t fp_events;
-
-	uint32_t sysrq;
-
-	/* CEC events from enum mkbp_cec_event */
-	uint32_t cec_events;
-
-	uint8_t cec_message[16];
-};
-BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16);
-
-struct ec_response_get_next_event {
-	uint8_t event_type;
-	/* Followed by event data if any */
-	union ec_response_get_next_data data;
-} __ec_align1;
-
-struct ec_response_get_next_event_v1 {
-	uint8_t event_type;
-	/* Followed by event data if any */
-	union ec_response_get_next_data_v1 data;
-} __ec_align1;
-
-/* Bit indices for buttons and switches.*/
-/* Buttons */
-#define EC_MKBP_POWER_BUTTON	0
-#define EC_MKBP_VOL_UP		1
-#define EC_MKBP_VOL_DOWN	2
-#define EC_MKBP_RECOVERY	3
-
-/* Switches */
-#define EC_MKBP_LID_OPEN	0
-#define EC_MKBP_TABLET_MODE	1
-#define EC_MKBP_BASE_ATTACHED	2
-
-/* Run keyboard factory test scanning */
-#define EC_CMD_KEYBOARD_FACTORY_TEST 0x0068
-
-struct ec_response_keyboard_factory_test {
-	uint16_t shorted;	/* Keyboard pins are shorted */
-} __ec_align2;
-
-/* Fingerprint events in 'fp_events' for EC_MKBP_EVENT_FINGERPRINT */
-#define EC_MKBP_FP_RAW_EVENT(fp_events) ((fp_events) & 0x00FFFFFF)
-#define EC_MKBP_FP_ERRCODE(fp_events)   ((fp_events) & 0x0000000F)
-#define EC_MKBP_FP_ENROLL_PROGRESS_OFFSET 4
-#define EC_MKBP_FP_ENROLL_PROGRESS(fpe) (((fpe) & 0x00000FF0) \
-					 >> EC_MKBP_FP_ENROLL_PROGRESS_OFFSET)
-#define EC_MKBP_FP_MATCH_IDX_OFFSET 12
-#define EC_MKBP_FP_MATCH_IDX_MASK 0x0000F000
-#define EC_MKBP_FP_MATCH_IDX(fpe) (((fpe) & EC_MKBP_FP_MATCH_IDX_MASK) \
-					 >> EC_MKBP_FP_MATCH_IDX_OFFSET)
-#define EC_MKBP_FP_ENROLL               BIT(27)
-#define EC_MKBP_FP_MATCH                BIT(28)
-#define EC_MKBP_FP_FINGER_DOWN          BIT(29)
-#define EC_MKBP_FP_FINGER_UP            BIT(30)
-#define EC_MKBP_FP_IMAGE_READY          BIT(31)
-/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_ENROLL is set */
-#define EC_MKBP_FP_ERR_ENROLL_OK               0
-#define EC_MKBP_FP_ERR_ENROLL_LOW_QUALITY      1
-#define EC_MKBP_FP_ERR_ENROLL_IMMOBILE         2
-#define EC_MKBP_FP_ERR_ENROLL_LOW_COVERAGE     3
-#define EC_MKBP_FP_ERR_ENROLL_INTERNAL         5
-/* Can be used to detect if image was usable for enrollment or not. */
-#define EC_MKBP_FP_ERR_ENROLL_PROBLEM_MASK     1
-/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_MATCH is set */
-#define EC_MKBP_FP_ERR_MATCH_NO                0
-#define EC_MKBP_FP_ERR_MATCH_NO_INTERNAL       6
-#define EC_MKBP_FP_ERR_MATCH_NO_TEMPLATES      7
-#define EC_MKBP_FP_ERR_MATCH_NO_LOW_QUALITY    2
-#define EC_MKBP_FP_ERR_MATCH_NO_LOW_COVERAGE   4
-#define EC_MKBP_FP_ERR_MATCH_YES               1
-#define EC_MKBP_FP_ERR_MATCH_YES_UPDATED       3
-#define EC_MKBP_FP_ERR_MATCH_YES_UPDATE_FAILED 5
-
-
-/*****************************************************************************/
-/* Temperature sensor commands */
-
-/* Read temperature sensor info */
-#define EC_CMD_TEMP_SENSOR_GET_INFO 0x0070
-
-struct ec_params_temp_sensor_get_info {
-	uint8_t id;
-} __ec_align1;
-
-struct ec_response_temp_sensor_get_info {
-	char sensor_name[32];
-	uint8_t sensor_type;
-} __ec_align1;
-
-/*****************************************************************************/
-
-/*
- * Note: host commands 0x80 - 0x87 are reserved to avoid conflict with ACPI
- * commands accidentally sent to the wrong interface.  See the ACPI section
- * below.
- */
-
-/*****************************************************************************/
-/* Host event commands */
-
-
-/* Obsolete. New implementation should use EC_CMD_HOST_EVENT instead */
-/*
- * Host event mask params and response structures, shared by all of the host
- * event commands below.
- */
-struct ec_params_host_event_mask {
-	uint32_t mask;
-} __ec_align4;
-
-struct ec_response_host_event_mask {
-	uint32_t mask;
-} __ec_align4;
-
-/* These all use ec_response_host_event_mask */
-#define EC_CMD_HOST_EVENT_GET_B         0x0087
-#define EC_CMD_HOST_EVENT_GET_SMI_MASK  0x0088
-#define EC_CMD_HOST_EVENT_GET_SCI_MASK  0x0089
-#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x008D
-
-/* These all use ec_params_host_event_mask */
-#define EC_CMD_HOST_EVENT_SET_SMI_MASK  0x008A
-#define EC_CMD_HOST_EVENT_SET_SCI_MASK  0x008B
-#define EC_CMD_HOST_EVENT_CLEAR         0x008C
-#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x008E
-#define EC_CMD_HOST_EVENT_CLEAR_B       0x008F
-
-/*
- * Unified host event programming interface - Should be used by newer versions
- * of BIOS/OS to program host events and masks
- */
-
-struct ec_params_host_event {
-
-	/* Action requested by host - one of enum ec_host_event_action. */
-	uint8_t action;
-
-	/*
-	 * Mask type that the host requested the action on - one of
-	 * enum ec_host_event_mask_type.
-	 */
-	uint8_t mask_type;
-
-	/* Set to 0, ignore on read */
-	uint16_t reserved;
-
-	/* Value to be used in case of set operations. */
-	uint64_t value;
-} __ec_align4;
-
-/*
- * Response structure returned by EC_CMD_HOST_EVENT.
- * Update the value on a GET request. Set to 0 on GET/CLEAR
- */
-
-struct ec_response_host_event {
-
-	/* Mask value in case of get operation */
-	uint64_t value;
-} __ec_align4;
-
-enum ec_host_event_action {
-	/*
-	 * params.value is ignored. Value of mask_type populated
-	 * in response.value
-	 */
-	EC_HOST_EVENT_GET,
-
-	/* Bits in params.value are set */
-	EC_HOST_EVENT_SET,
-
-	/* Bits in params.value are cleared */
-	EC_HOST_EVENT_CLEAR,
-};
-
-enum ec_host_event_mask_type {
-
-	/* Main host event copy */
-	EC_HOST_EVENT_MAIN,
-
-	/* Copy B of host events */
-	EC_HOST_EVENT_B,
-
-	/* SCI Mask */
-	EC_HOST_EVENT_SCI_MASK,
-
-	/* SMI Mask */
-	EC_HOST_EVENT_SMI_MASK,
-
-	/* Mask of events that should be always reported in hostevents */
-	EC_HOST_EVENT_ALWAYS_REPORT_MASK,
-
-	/* Active wake mask */
-	EC_HOST_EVENT_ACTIVE_WAKE_MASK,
-
-	/* Lazy wake mask for S0ix */
-	EC_HOST_EVENT_LAZY_WAKE_MASK_S0IX,
-
-	/* Lazy wake mask for S3 */
-	EC_HOST_EVENT_LAZY_WAKE_MASK_S3,
-
-	/* Lazy wake mask for S5 */
-	EC_HOST_EVENT_LAZY_WAKE_MASK_S5,
-};
-
-#define EC_CMD_HOST_EVENT       0x00A4
-
-/*****************************************************************************/
-/* Switch commands */
-
-/* Enable/disable LCD backlight */
-#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x0090
-
-struct ec_params_switch_enable_backlight {
-	uint8_t enabled;
-} __ec_align1;
-
-/* Enable/disable WLAN/Bluetooth */
-#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x0091
-#define EC_VER_SWITCH_ENABLE_WIRELESS 1
-
-/* Version 0 params; no response */
-struct ec_params_switch_enable_wireless_v0 {
-	uint8_t enabled;
-} __ec_align1;
-
-/* Version 1 params */
-struct ec_params_switch_enable_wireless_v1 {
-	/* Flags to enable now */
-	uint8_t now_flags;
-
-	/* Which flags to copy from now_flags */
-	uint8_t now_mask;
-
-	/*
-	 * Flags to leave enabled in S3, if they're on at the S0->S3
-	 * transition.  (Other flags will be disabled by the S0->S3
-	 * transition.)
-	 */
-	uint8_t suspend_flags;
-
-	/* Which flags to copy from suspend_flags */
-	uint8_t suspend_mask;
-} __ec_align1;
-
-/* Version 1 response */
-struct ec_response_switch_enable_wireless_v1 {
-	/* Flags to enable now */
-	uint8_t now_flags;
-
-	/* Flags to leave enabled in S3 */
-	uint8_t suspend_flags;
-} __ec_align1;
-
-/*****************************************************************************/
-/* GPIO commands. Only available on EC if write protect has been disabled. */
-
-/* Set GPIO output value */
-#define EC_CMD_GPIO_SET 0x0092
-
-struct ec_params_gpio_set {
-	char name[32];
-	uint8_t val;
-} __ec_align1;
-
-/* Get GPIO value */
-#define EC_CMD_GPIO_GET 0x0093
-
-/* Version 0 of input params and response */
-struct ec_params_gpio_get {
-	char name[32];
-} __ec_align1;
-
-struct ec_response_gpio_get {
-	uint8_t val;
-} __ec_align1;
-
-/* Version 1 of input params and response */
-struct ec_params_gpio_get_v1 {
-	uint8_t subcmd;
-	union {
-		struct __ec_align1 {
-			char name[32];
-		} get_value_by_name;
-		struct __ec_align1 {
-			uint8_t index;
-		} get_info;
-	};
-} __ec_align1;
-
-struct ec_response_gpio_get_v1 {
-	union {
-		struct __ec_align1 {
-			uint8_t val;
-		} get_value_by_name, get_count;
-		struct __ec_todo_unpacked {
-			uint8_t val;
-			char name[32];
-			uint32_t flags;
-		} get_info;
-	};
-} __ec_todo_packed;
-
-enum gpio_get_subcmd {
-	EC_GPIO_GET_BY_NAME = 0,
-	EC_GPIO_GET_COUNT = 1,
-	EC_GPIO_GET_INFO = 2,
-};
-
-/*****************************************************************************/
-/* I2C commands. Only available when flash write protect is unlocked. */
-
-/*
- * CAUTION: These commands are deprecated, and are not supported anymore in EC
- * builds >= 8398.0.0 (see crosbug.com/p/23570).
- *
- * Use EC_CMD_I2C_PASSTHRU instead.
- */
-
-/* Read I2C bus */
-#define EC_CMD_I2C_READ 0x0094
-
-struct ec_params_i2c_read {
-	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
-	uint8_t read_size; /* Either 8 or 16. */
-	uint8_t port;
-	uint8_t offset;
-} __ec_align_size1;
-
-struct ec_response_i2c_read {
-	uint16_t data;
-} __ec_align2;
-
-/* Write I2C bus */
-#define EC_CMD_I2C_WRITE 0x0095
-
-struct ec_params_i2c_write {
-	uint16_t data;
-	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
-	uint8_t write_size; /* Either 8 or 16. */
-	uint8_t port;
-	uint8_t offset;
-} __ec_align_size1;
-
-/*****************************************************************************/
-/* Charge state commands. Only available when flash write protect unlocked. */
-
-/* Force charge state machine to stop charging the battery or force it to
- * discharge the battery.
- */
-#define EC_CMD_CHARGE_CONTROL 0x0096
-#define EC_VER_CHARGE_CONTROL 1
-
-enum ec_charge_control_mode {
-	CHARGE_CONTROL_NORMAL = 0,
-	CHARGE_CONTROL_IDLE,
-	CHARGE_CONTROL_DISCHARGE,
-};
-
-struct ec_params_charge_control {
-	uint32_t mode;  /* enum charge_control_mode */
-} __ec_align4;
-
-/*****************************************************************************/
-
-/* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */
-#define EC_CMD_CONSOLE_SNAPSHOT 0x0097
-
-/*
- * Read data from the saved snapshot. If the subcmd parameter is
- * CONSOLE_READ_NEXT, this will return data starting from the beginning of
- * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
- * end of the previous snapshot.
- *
- * The params are only looked at in version >= 1 of this command. Prior
- * versions will just default to CONSOLE_READ_NEXT behavior.
- *
- * Response is null-terminated string.  Empty string, if there is no more
- * remaining output.
- */
-#define EC_CMD_CONSOLE_READ 0x0098
-
-enum ec_console_read_subcmd {
-	CONSOLE_READ_NEXT = 0,
-	CONSOLE_READ_RECENT
-};
-
-struct ec_params_console_read_v1 {
-	uint8_t subcmd; /* enum ec_console_read_subcmd */
-} __ec_align1;
-
-/*****************************************************************************/
-
-/*
- * Cut off battery power immediately or after the host has shut down.
- *
- * return EC_RES_INVALID_COMMAND if unsupported by a board/battery.
- *	  EC_RES_SUCCESS if the command was successful.
- *	  EC_RES_ERROR if the cut off command failed.
- */
-#define EC_CMD_BATTERY_CUT_OFF 0x0099
-
-#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN	BIT(0)
-
-struct ec_params_battery_cutoff {
-	uint8_t flags;
-} __ec_align1;
-
-/*****************************************************************************/
-/* USB port mux control. */
-
-/*
- * Switch USB mux or return to automatic switching.
- */
-#define EC_CMD_USB_MUX 0x009A
-
-struct ec_params_usb_mux {
-	uint8_t mux;
-} __ec_align1;
-
-/*****************************************************************************/
-/* LDOs / FETs control. */
-
-enum ec_ldo_state {
-	EC_LDO_STATE_OFF = 0,	/* the LDO / FET is shut down */
-	EC_LDO_STATE_ON = 1,	/* the LDO / FET is ON / providing power */
-};
-
-/*
- * Switch on/off a LDO.
- */
-#define EC_CMD_LDO_SET 0x009B
-
-struct ec_params_ldo_set {
-	uint8_t index;
-	uint8_t state;
-} __ec_align1;
-
-/*
- * Get LDO state.
- */
-#define EC_CMD_LDO_GET 0x009C
-
-struct ec_params_ldo_get {
-	uint8_t index;
-} __ec_align1;
-
-struct ec_response_ldo_get {
-	uint8_t state;
-} __ec_align1;
-
-/*****************************************************************************/
-/* Power info. */
-
-/*
- * Get power info.
- */
-#define EC_CMD_POWER_INFO 0x009D
-
-struct ec_response_power_info {
-	uint32_t usb_dev_type;
-	uint16_t voltage_ac;
-	uint16_t voltage_system;
-	uint16_t current_system;
-	uint16_t usb_current_limit;
-} __ec_align4;
-
-/*****************************************************************************/
-/* I2C passthru command */
-
-#define EC_CMD_I2C_PASSTHRU 0x009E
-
-/* Read data; if not present, message is a write */
-#define EC_I2C_FLAG_READ	BIT(15)
-
-/* Mask for address */
-#define EC_I2C_ADDR_MASK	0x3ff
-
-#define EC_I2C_STATUS_NAK	BIT(0) /* Transfer was not acknowledged */
-#define EC_I2C_STATUS_TIMEOUT	BIT(1) /* Timeout during transfer */
-
-/* Any error */
-#define EC_I2C_STATUS_ERROR	(EC_I2C_STATUS_NAK | EC_I2C_STATUS_TIMEOUT)
-
-struct ec_params_i2c_passthru_msg {
-	uint16_t addr_flags;	/* I2C slave address (7 or 10 bits) and flags */
-	uint16_t len;		/* Number of bytes to read or write */
-} __ec_align2;
-
-struct ec_params_i2c_passthru {
-	uint8_t port;		/* I2C port number */
-	uint8_t num_msgs;	/* Number of messages */
-	struct ec_params_i2c_passthru_msg msg[];
-	/* Data to write for all messages is concatenated here */
-} __ec_align2;
-
-struct ec_response_i2c_passthru {
-	uint8_t i2c_status;	/* Status flags (EC_I2C_STATUS_...) */
-	uint8_t num_msgs;	/* Number of messages processed */
-	uint8_t data[];		/* Data read by messages concatenated here */
-} __ec_align1;
-
-/*****************************************************************************/
-/* Power button hang detect */
-
-#define EC_CMD_HANG_DETECT 0x009F
-
-/* Reasons to start hang detection timer */
-/* Power button pressed */
-#define EC_HANG_START_ON_POWER_PRESS  BIT(0)
-
-/* Lid closed */
-#define EC_HANG_START_ON_LID_CLOSE    BIT(1)
-
- /* Lid opened */
-#define EC_HANG_START_ON_LID_OPEN     BIT(2)
-
-/* Start of AP S3->S0 transition (booting or resuming from suspend) */
-#define EC_HANG_START_ON_RESUME       BIT(3)
-
-/* Reasons to cancel hang detection */
-
-/* Power button released */
-#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
-
-/* Any host command from AP received */
-#define EC_HANG_STOP_ON_HOST_COMMAND  BIT(9)
-
-/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
-#define EC_HANG_STOP_ON_SUSPEND       BIT(10)
-
-/*
- * If this flag is set, all the other fields are ignored, and the hang detect
- * timer is started.  This provides the AP a way to start the hang timer
- * without reconfiguring any of the other hang detect settings.  Note that
- * you must previously have configured the timeouts.
- */
-#define EC_HANG_START_NOW             BIT(30)
-
-/*
- * If this flag is set, all the other fields are ignored (including
- * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
- * without reconfiguring any of the other hang detect settings.
- */
-#define EC_HANG_STOP_NOW              BIT(31)
-
-struct ec_params_hang_detect {
-	/* Flags; see EC_HANG_* */
-	uint32_t flags;
-
-	/* Timeout in msec before generating host event, if enabled */
-	uint16_t host_event_timeout_msec;
-
-	/* Timeout in msec before generating warm reboot, if enabled */
-	uint16_t warm_reboot_timeout_msec;
-} __ec_align4;
-
-/*****************************************************************************/
-/* Commands for battery charging */
-
-/*
- * This is the single catch-all host command to exchange data regarding the
- * charge state machine (v2 and up).
- */
-#define EC_CMD_CHARGE_STATE 0x00A0
-
-/* Subcommands for this host command */
-enum charge_state_command {
-	CHARGE_STATE_CMD_GET_STATE,
-	CHARGE_STATE_CMD_GET_PARAM,
-	CHARGE_STATE_CMD_SET_PARAM,
-	CHARGE_STATE_NUM_CMDS
-};
-
-/*
- * Known param numbers are defined here. Ranges are reserved for board-specific
- * params, which are handled by the particular implementations.
- */
-enum charge_state_params {
-	CS_PARAM_CHG_VOLTAGE,	      /* charger voltage limit */
-	CS_PARAM_CHG_CURRENT,	      /* charger current limit */
-	CS_PARAM_CHG_INPUT_CURRENT,   /* charger input current limit */
-	CS_PARAM_CHG_STATUS,	      /* charger-specific status */
-	CS_PARAM_CHG_OPTION,	      /* charger-specific options */
-	CS_PARAM_LIMIT_POWER,	      /*
-				       * Check if power is limited due to
-				       * low battery and / or a weak external
-				       * charger. READ ONLY.
-				       */
-	/* How many so far? */
-	CS_NUM_BASE_PARAMS,
-
-	/* Range for CONFIG_CHARGER_PROFILE_OVERRIDE params */
-	CS_PARAM_CUSTOM_PROFILE_MIN = 0x10000,
-	CS_PARAM_CUSTOM_PROFILE_MAX = 0x1ffff,
-
-	/* Range for CONFIG_CHARGE_STATE_DEBUG params */
-	CS_PARAM_DEBUG_MIN = 0x20000,
-	CS_PARAM_DEBUG_CTL_MODE = 0x20000,
-	CS_PARAM_DEBUG_MANUAL_MODE,
-	CS_PARAM_DEBUG_SEEMS_DEAD,
-	CS_PARAM_DEBUG_SEEMS_DISCONNECTED,
-	CS_PARAM_DEBUG_BATT_REMOVED,
-	CS_PARAM_DEBUG_MANUAL_CURRENT,
-	CS_PARAM_DEBUG_MANUAL_VOLTAGE,
-	CS_PARAM_DEBUG_MAX = 0x2ffff,
-
-	/* Other custom param ranges go here... */
-};
-
-struct ec_params_charge_state {
-	uint8_t cmd;				/* enum charge_state_command */
-	union {
-		/* get_state has no args */
-
-		struct __ec_todo_unpacked {
-			uint32_t param;		/* enum charge_state_param */
-		} get_param;
-
-		struct __ec_todo_unpacked {
-			uint32_t param;		/* param to set */
-			uint32_t value;		/* value to set */
-		} set_param;
-	};
-} __ec_todo_packed;
-
-struct ec_response_charge_state {
-	union {
-		struct __ec_align4 {
-			int ac;
-			int chg_voltage;
-			int chg_current;
-			int chg_input_current;
-			int batt_state_of_charge;
-		} get_state;
-
-		struct __ec_align4 {
-			uint32_t value;
-		} get_param;
-
-		/* set_param returns no args */
-	};
-} __ec_align4;
-
-
-/*
- * Set maximum battery charging current.
- */
-#define EC_CMD_CHARGE_CURRENT_LIMIT 0x00A1
-
-struct ec_params_current_limit {
-	uint32_t limit; /* in mA */
-} __ec_align4;
-
-/*
- * Set maximum external voltage / current.
- */
-#define EC_CMD_EXTERNAL_POWER_LIMIT 0x00A2
-
-/* Command v0 is used only on Spring and is obsolete + unsupported */
-struct ec_params_external_power_limit_v1 {
-	uint16_t current_lim; /* in mA, or EC_POWER_LIMIT_NONE to clear limit */
-	uint16_t voltage_lim; /* in mV, or EC_POWER_LIMIT_NONE to clear limit */
-} __ec_align2;
-
-#define EC_POWER_LIMIT_NONE 0xffff
-
-/*
- * Set maximum voltage & current of a dedicated charge port
- */
-#define EC_CMD_OVERRIDE_DEDICATED_CHARGER_LIMIT 0x00A3
-
-struct ec_params_dedicated_charger_limit {
-	uint16_t current_lim; /* in mA */
-	uint16_t voltage_lim; /* in mV */
-} __ec_align2;
-
-/*****************************************************************************/
-/* Hibernate/Deep Sleep Commands */
-
-/* Set the delay before going into hibernation. */
-#define EC_CMD_HIBERNATION_DELAY 0x00A8
-
-struct ec_params_hibernation_delay {
-	/*
-	 * Seconds to wait in G3 before hibernate.  Pass in 0 to read the
-	 * current settings without changing them.
-	 */
-	uint32_t seconds;
-} __ec_align4;
-
-struct ec_response_hibernation_delay {
-	/*
-	 * The current time in seconds in which the system has been in the G3
-	 * state.  This value is reset if the EC transitions out of G3.
-	 */
-	uint32_t time_g3;
-
-	/*
-	 * The current time remaining in seconds until the EC should hibernate.
-	 * This value is also reset if the EC transitions out of G3.
-	 */
-	uint32_t time_remaining;
-
-	/*
-	 * The current time in seconds that the EC should wait in G3 before
-	 * hibernating.
-	 */
-	uint32_t hibernate_delay;
-} __ec_align4;
-
-/* Inform the EC when entering a sleep state */
-#define EC_CMD_HOST_SLEEP_EVENT 0x00A9
-
-enum host_sleep_event {
-	HOST_SLEEP_EVENT_S3_SUSPEND   = 1,
-	HOST_SLEEP_EVENT_S3_RESUME    = 2,
-	HOST_SLEEP_EVENT_S0IX_SUSPEND = 3,
-	HOST_SLEEP_EVENT_S0IX_RESUME  = 4,
-	/* S3 suspend with additional enabled wake sources */
-	HOST_SLEEP_EVENT_S3_WAKEABLE_SUSPEND = 5,
-};
-
-struct ec_params_host_sleep_event {
-	uint8_t sleep_event;
-} __ec_align1;
-
-/*
- * Use a default timeout value (CONFIG_SLEEP_TIMEOUT_MS) for detecting sleep
- * transition failures
- */
-#define EC_HOST_SLEEP_TIMEOUT_DEFAULT 0
-
-/* Disable timeout detection for this sleep transition */
-#define EC_HOST_SLEEP_TIMEOUT_INFINITE 0xFFFF
-
-struct ec_params_host_sleep_event_v1 {
-	/* The type of sleep being entered or exited. */
-	uint8_t sleep_event;
-
-	/* Padding */
-	uint8_t reserved;
-	union {
-		/* Parameters that apply for suspend messages. */
-		struct {
-			/*
-			 * The timeout in milliseconds between when this message
-			 * is received and when the EC will declare sleep
-			 * transition failure if the sleep signal is not
-			 * asserted.
-			 */
-			uint16_t sleep_timeout_ms;
-		} suspend_params;
-
-		/* No parameters for non-suspend messages. */
-	};
-} __ec_align2;
-
-/* A timeout occurred when this bit is set */
-#define EC_HOST_RESUME_SLEEP_TIMEOUT 0x80000000
-
-/*
- * The mask defining which bits correspond to the number of sleep transitions,
- * as well as the maximum number of suspend line transitions that will be
- * reported back to the host.
- */
-#define EC_HOST_RESUME_SLEEP_TRANSITIONS_MASK 0x7FFFFFFF
-
-struct ec_response_host_sleep_event_v1 {
-	union {
-		/* Response fields that apply for resume messages. */
-		struct {
-			/*
-			 * The number of sleep power signal transitions that
-			 * occurred since the suspend message. The high bit
-			 * indicates a timeout occurred.
-			 */
-			uint32_t sleep_transitions;
-		} resume_response;
-
-		/* No response fields for non-resume messages. */
-	};
-} __ec_align4;
-
-/*****************************************************************************/
-/* Device events */
-#define EC_CMD_DEVICE_EVENT 0x00AA
-
-enum ec_device_event {
-	EC_DEVICE_EVENT_TRACKPAD,
-	EC_DEVICE_EVENT_DSP,
-	EC_DEVICE_EVENT_WIFI,
-};
-
-enum ec_device_event_param {
-	/* Get and clear pending device events */
-	EC_DEVICE_EVENT_PARAM_GET_CURRENT_EVENTS,
-	/* Get device event mask */
-	EC_DEVICE_EVENT_PARAM_GET_ENABLED_EVENTS,
-	/* Set device event mask */
-	EC_DEVICE_EVENT_PARAM_SET_ENABLED_EVENTS,
-};
-
-#define EC_DEVICE_EVENT_MASK(event_code) BIT(event_code % 32)
-
-struct ec_params_device_event {
-	uint32_t event_mask;
-	uint8_t param;
-} __ec_align_size1;
-
-struct ec_response_device_event {
-	uint32_t event_mask;
-} __ec_align4;
-
-/*****************************************************************************/
-/* Smart battery pass-through */
-
-/* Get / Set 16-bit smart battery registers */
-#define EC_CMD_SB_READ_WORD   0x00B0
-#define EC_CMD_SB_WRITE_WORD  0x00B1
-
-/* Get / Set string smart battery parameters
- * formatted as SMBUS "block".
- */
-#define EC_CMD_SB_READ_BLOCK  0x00B2
-#define EC_CMD_SB_WRITE_BLOCK 0x00B3
-
-struct ec_params_sb_rd {
-	uint8_t reg;
-} __ec_align1;
-
-struct ec_response_sb_rd_word {
-	uint16_t value;
-} __ec_align2;
-
-struct ec_params_sb_wr_word {
-	uint8_t reg;
-	uint16_t value;
-} __ec_align1;
-
-struct ec_response_sb_rd_block {
-	uint8_t data[32];
-} __ec_align1;
-
-struct ec_params_sb_wr_block {
-	uint8_t reg;
-	uint16_t data[32];
-} __ec_align1;
-
-/*****************************************************************************/
-/* Battery vendor parameters
- *
- * Get or set vendor-specific parameters in the battery. Implementations may
- * differ between boards or batteries. On a set operation, the response
- * contains the actual value set, which may be rounded or clipped from the
- * requested value.
- */
-
-#define EC_CMD_BATTERY_VENDOR_PARAM 0x00B4
-
-enum ec_battery_vendor_param_mode {
-	BATTERY_VENDOR_PARAM_MODE_GET = 0,
-	BATTERY_VENDOR_PARAM_MODE_SET,
-};
-
-struct ec_params_battery_vendor_param {
-	uint32_t param;
-	uint32_t value;
-	uint8_t mode;
-} __ec_align_size1;
-
-struct ec_response_battery_vendor_param {
-	uint32_t value;
-} __ec_align4;
-
-/*****************************************************************************/
-/*
- * Smart Battery Firmware Update Commands
- */
-#define EC_CMD_SB_FW_UPDATE 0x00B5
-
-enum ec_sb_fw_update_subcmd {
-	EC_SB_FW_UPDATE_PREPARE  = 0x0,
-	EC_SB_FW_UPDATE_INFO     = 0x1, /*query sb info */
-	EC_SB_FW_UPDATE_BEGIN    = 0x2, /*check if protected */
-	EC_SB_FW_UPDATE_WRITE    = 0x3, /*check if protected */
-	EC_SB_FW_UPDATE_END      = 0x4,
-	EC_SB_FW_UPDATE_STATUS   = 0x5,
-	EC_SB_FW_UPDATE_PROTECT  = 0x6,
-	EC_SB_FW_UPDATE_MAX      = 0x7,
-};
-
-#define SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE 32
-#define SB_FW_UPDATE_CMD_STATUS_SIZE 2
-#define SB_FW_UPDATE_CMD_INFO_SIZE 8
-
-struct ec_sb_fw_update_header {
-	uint16_t subcmd;  /* enum ec_sb_fw_update_subcmd */
-	uint16_t fw_id;   /* firmware id */
-} __ec_align4;
-
-struct ec_params_sb_fw_update {
-	struct ec_sb_fw_update_header hdr;
-	union {
-		/* EC_SB_FW_UPDATE_PREPARE  = 0x0 */
-		/* EC_SB_FW_UPDATE_INFO     = 0x1 */
-		/* EC_SB_FW_UPDATE_BEGIN    = 0x2 */
-		/* EC_SB_FW_UPDATE_END      = 0x4 */
-		/* EC_SB_FW_UPDATE_STATUS   = 0x5 */
-		/* EC_SB_FW_UPDATE_PROTECT  = 0x6 */
-		/* Those have no args */
-
-		/* EC_SB_FW_UPDATE_WRITE    = 0x3 */
-		struct __ec_align4 {
-			uint8_t  data[SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE];
-		} write;
-	};
-} __ec_align4;
-
-struct ec_response_sb_fw_update {
-	union {
-		/* EC_SB_FW_UPDATE_INFO     = 0x1 */
-		struct __ec_align1 {
-			uint8_t data[SB_FW_UPDATE_CMD_INFO_SIZE];
-		} info;
-
-		/* EC_SB_FW_UPDATE_STATUS   = 0x5 */
-		struct __ec_align1 {
-			uint8_t data[SB_FW_UPDATE_CMD_STATUS_SIZE];
-		} status;
-	};
-} __ec_align1;
-
-/*
- * Entering Verified Boot Mode Command
- * Default mode is VBOOT_MODE_NORMAL if EC did not receive this command.
- * Valid Modes are: normal, developer, and recovery.
- */
-#define EC_CMD_ENTERING_MODE 0x00B6
-
-struct ec_params_entering_mode {
-	int vboot_mode;
-} __ec_align4;
-
-#define VBOOT_MODE_NORMAL    0
-#define VBOOT_MODE_DEVELOPER 1
-#define VBOOT_MODE_RECOVERY  2
-
-/*****************************************************************************/
-/*
- * I2C passthru protection command: Protects I2C tunnels against access on
- * certain addresses (board-specific).
- */
-#define EC_CMD_I2C_PASSTHRU_PROTECT 0x00B7
-
-enum ec_i2c_passthru_protect_subcmd {
-	EC_CMD_I2C_PASSTHRU_PROTECT_STATUS = 0x0,
-	EC_CMD_I2C_PASSTHRU_PROTECT_ENABLE = 0x1,
-};
-
-struct ec_params_i2c_passthru_protect {
-	uint8_t subcmd;
-	uint8_t port;		/* I2C port number */
-} __ec_align1;
-
-struct ec_response_i2c_passthru_protect {
-	uint8_t status;		/* Status flags (0: unlocked, 1: locked) */
-} __ec_align1;
-
-
-/*****************************************************************************/
-/*
- * HDMI CEC commands
- *
- * These commands are for sending and receiving message via HDMI CEC
- */
-
-#define MAX_CEC_MSG_LEN 16
-
-/* CEC message from the AP to be written on the CEC bus */
-#define EC_CMD_CEC_WRITE_MSG 0x00B8
-
-/**
- * struct ec_params_cec_write - Message to write to the CEC bus
- * @msg: message content to write to the CEC bus
- */
-struct ec_params_cec_write {
-	uint8_t msg[MAX_CEC_MSG_LEN];
-} __ec_align1;
-
-/* Set various CEC parameters */
-#define EC_CMD_CEC_SET 0x00BA
-
-/**
- * struct ec_params_cec_set - CEC parameters set
- * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
- * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC
- *	or 1 to enable CEC functionality, in case cmd is
- *	CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical
- *	address between 0 and 15 or 0xff to unregister
- */
-struct ec_params_cec_set {
-	uint8_t cmd; /* enum cec_command */
-	uint8_t val;
-} __ec_align1;
-
-/* Read various CEC parameters */
-#define EC_CMD_CEC_GET 0x00BB
-
-/**
- * struct ec_params_cec_get - CEC parameters get
- * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
- */
-struct ec_params_cec_get {
-	uint8_t cmd; /* enum cec_command */
-} __ec_align1;
-
-/**
- * struct ec_response_cec_get - CEC parameters get response
- * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is
- *	disabled or 1 if CEC functionality is enabled,
- *	in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the
- *	configured logical address between 0 and 15 or 0xff if unregistered
- */
-struct ec_response_cec_get {
-	uint8_t val;
-} __ec_align1;
-
-/* CEC parameters command */
-enum cec_command {
-	/* CEC reading, writing and events enable */
-	CEC_CMD_ENABLE,
-	/* CEC logical address  */
-	CEC_CMD_LOGICAL_ADDRESS,
-};
-
-/* Events from CEC to AP */
-enum mkbp_cec_event {
-	/* Outgoing message was acknowledged by a follower */
-	EC_MKBP_CEC_SEND_OK			= BIT(0),
-	/* Outgoing message was not acknowledged */
-	EC_MKBP_CEC_SEND_FAILED			= BIT(1),
-};
-
-/*****************************************************************************/
-
-/* Commands for I2S recording on audio codec. */
-
-#define EC_CMD_CODEC_I2S 0x00BC
-#define EC_WOV_I2S_SAMPLE_RATE 48000
-
-enum ec_codec_i2s_subcmd {
-	EC_CODEC_SET_SAMPLE_DEPTH = 0x0,
-	EC_CODEC_SET_GAIN = 0x1,
-	EC_CODEC_GET_GAIN = 0x2,
-	EC_CODEC_I2S_ENABLE = 0x3,
-	EC_CODEC_I2S_SET_CONFIG = 0x4,
-	EC_CODEC_I2S_SET_TDM_CONFIG = 0x5,
-	EC_CODEC_I2S_SET_BCLK = 0x6,
-	EC_CODEC_I2S_SUBCMD_COUNT = 0x7,
-};
-
-enum ec_sample_depth_value {
-	EC_CODEC_SAMPLE_DEPTH_16 = 0,
-	EC_CODEC_SAMPLE_DEPTH_24 = 1,
-};
-
-enum ec_i2s_config {
-	EC_DAI_FMT_I2S = 0,
-	EC_DAI_FMT_RIGHT_J = 1,
-	EC_DAI_FMT_LEFT_J = 2,
-	EC_DAI_FMT_PCM_A = 3,
-	EC_DAI_FMT_PCM_B = 4,
-	EC_DAI_FMT_PCM_TDM = 5,
-};
-
-/*
- * For subcommand EC_CODEC_GET_GAIN.
- */
-struct __ec_align1 ec_codec_i2s_gain {
-	uint8_t left;
-	uint8_t right;
-};
-
-struct __ec_todo_unpacked ec_param_codec_i2s_tdm {
-	int16_t ch0_delay; /* 0 to 496 */
-	int16_t ch1_delay; /* -1 to 496 */
-	uint8_t adjacent_to_ch0;
-	uint8_t adjacent_to_ch1;
-};
-
-struct __ec_todo_packed ec_param_codec_i2s {
-	/* enum ec_codec_i2s_subcmd */
-	uint8_t cmd;
-	union {
-		/*
-		 * EC_CODEC_SET_SAMPLE_DEPTH
-		 * Value should be one of ec_sample_depth_value.
-		 */
-		uint8_t depth;
-
-		/*
-		 * EC_CODEC_SET_GAIN
-		 * Value should be 0~43 for both channels.
-		 */
-		struct ec_codec_i2s_gain gain;
-
-		/*
-		 * EC_CODEC_I2S_ENABLE
-		 * 1 to enable, 0 to disable.
-		 */
-		uint8_t i2s_enable;
-
-		/*
-		 * EC_CODEC_I2S_SET_CONFIG
-		 * Value should be one of ec_i2s_config.
-		 */
-		uint8_t i2s_config;
-
-		/*
-		 * EC_CODEC_I2S_SET_TDM_CONFIG
-		 * Value should be one of ec_i2s_config.
-		 */
-		struct ec_param_codec_i2s_tdm tdm_param;
-
-		/*
-		 * EC_CODEC_I2S_SET_BCLK
-		 */
-		uint32_t bclk;
-	};
-};
-
-
-/*****************************************************************************/
-/* System commands */
-
-/*
- * TODO(crosbug.com/p/23747): This is a confusing name, since it doesn't
- * necessarily reboot the EC.  Rename to "image" or something similar?
- */
-#define EC_CMD_REBOOT_EC 0x00D2
-
-/* Command */
-enum ec_reboot_cmd {
-	EC_REBOOT_CANCEL = 0,        /* Cancel a pending reboot */
-	EC_REBOOT_JUMP_RO = 1,       /* Jump to RO without rebooting */
-	EC_REBOOT_JUMP_RW = 2,       /* Jump to active RW without rebooting */
-	/* (command 3 was jump to RW-B) */
-	EC_REBOOT_COLD = 4,          /* Cold-reboot */
-	EC_REBOOT_DISABLE_JUMP = 5,  /* Disable jump until next reboot */
-	EC_REBOOT_HIBERNATE = 6,     /* Hibernate EC */
-	EC_REBOOT_HIBERNATE_CLEAR_AP_OFF = 7, /* and clears AP_OFF flag */
-};
-
-/* Flags for ec_params_reboot_ec.reboot_flags */
-#define EC_REBOOT_FLAG_RESERVED0      BIT(0)  /* Was recovery request */
-#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN BIT(1)  /* Reboot after AP shutdown */
-#define EC_REBOOT_FLAG_SWITCH_RW_SLOT BIT(2)  /* Switch RW slot */
-
-struct ec_params_reboot_ec {
-	uint8_t cmd;           /* enum ec_reboot_cmd */
-	uint8_t flags;         /* See EC_REBOOT_FLAG_* */
-} __ec_align1;
-
-/*
- * Get information on last EC panic.
- *
- * Returns variable-length platform-dependent panic information.  See panic.h
- * for details.
- */
-#define EC_CMD_GET_PANIC_INFO 0x00D3
-
-/*****************************************************************************/
-/*
- * Special commands
- *
- * These do not follow the normal rules for commands.  See each command for
- * details.
- */
-
-/*
- * Reboot NOW
- *
- * This command will work even when the EC LPC interface is busy, because the
- * reboot command is processed at interrupt level.  Note that when the EC
- * reboots, the host will reboot too, so there is no response to this command.
- *
- * Use EC_CMD_REBOOT_EC to reboot the EC more politely.
- */
-#define EC_CMD_REBOOT 0x00D1  /* Think "die" */
-
-/*
- * Resend last response (not supported on LPC).
- *
- * Returns EC_RES_UNAVAILABLE if there is no response available - for example,
- * there was no previous command, or the previous command's response was too
- * big to save.
- */
-#define EC_CMD_RESEND_RESPONSE 0x00DB
-
-/*
- * This header byte on a command indicate version 0. Any header byte less
- * than this means that we are talking to an old EC which doesn't support
- * versioning. In that case, we assume version 0.
- *
- * Header bytes greater than this indicate a later version. For example,
- * EC_CMD_VERSION0 + 1 means we are using version 1.
- *
- * The old EC interface must not use commands 0xdc or higher.
- */
-#define EC_CMD_VERSION0 0x00DC
-
-/*****************************************************************************/
-/*
- * PD commands
- *
- * These commands are for PD MCU communication.
- */
-
-/* EC to PD MCU exchange status command */
-#define EC_CMD_PD_EXCHANGE_STATUS 0x0100
-#define EC_VER_PD_EXCHANGE_STATUS 2
-
-enum pd_charge_state {
-	PD_CHARGE_NO_CHANGE = 0, /* Don't change charge state */
-	PD_CHARGE_NONE,          /* No charging allowed */
-	PD_CHARGE_5V,            /* 5V charging only */
-	PD_CHARGE_MAX            /* Charge at max voltage */
-};
-
-/* Status of EC being sent to PD */
-#define EC_STATUS_HIBERNATING	BIT(0)
-
-struct ec_params_pd_status {
-	uint8_t status;       /* EC status */
-	int8_t batt_soc;      /* battery state of charge */
-	uint8_t charge_state; /* charging state (from enum pd_charge_state) */
-} __ec_align1;
-
-/* Status of PD being sent back to EC */
-#define PD_STATUS_HOST_EVENT      BIT(0) /* Forward host event to AP */
-#define PD_STATUS_IN_RW           BIT(1) /* Running RW image */
-#define PD_STATUS_JUMPED_TO_IMAGE BIT(2) /* Current image was jumped to */
-#define PD_STATUS_TCPC_ALERT_0    BIT(3) /* Alert active in port 0 TCPC */
-#define PD_STATUS_TCPC_ALERT_1    BIT(4) /* Alert active in port 1 TCPC */
-#define PD_STATUS_TCPC_ALERT_2    BIT(5) /* Alert active in port 2 TCPC */
-#define PD_STATUS_TCPC_ALERT_3    BIT(6) /* Alert active in port 3 TCPC */
-#define PD_STATUS_EC_INT_ACTIVE  (PD_STATUS_TCPC_ALERT_0 | \
-				      PD_STATUS_TCPC_ALERT_1 | \
-				      PD_STATUS_HOST_EVENT)
-struct ec_response_pd_status {
-	uint32_t curr_lim_ma;       /* input current limit */
-	uint16_t status;            /* PD MCU status */
-	int8_t active_charge_port;  /* active charging port */
-} __ec_align_size1;
-
-/* AP to PD MCU host event status command, cleared on read */
-#define EC_CMD_PD_HOST_EVENT_STATUS 0x0104
-
-/* PD MCU host event status bits */
-#define PD_EVENT_UPDATE_DEVICE     BIT(0)
-#define PD_EVENT_POWER_CHANGE      BIT(1)
-#define PD_EVENT_IDENTITY_RECEIVED BIT(2)
-#define PD_EVENT_DATA_SWAP         BIT(3)
-struct ec_response_host_event_status {
-	uint32_t status;      /* PD MCU host event status */
-} __ec_align4;
-
-/* Set USB type-C port role and muxes */
-#define EC_CMD_USB_PD_CONTROL 0x0101
-
-enum usb_pd_control_role {
-	USB_PD_CTRL_ROLE_NO_CHANGE = 0,
-	USB_PD_CTRL_ROLE_TOGGLE_ON = 1, /* == AUTO */
-	USB_PD_CTRL_ROLE_TOGGLE_OFF = 2,
-	USB_PD_CTRL_ROLE_FORCE_SINK = 3,
-	USB_PD_CTRL_ROLE_FORCE_SOURCE = 4,
-	USB_PD_CTRL_ROLE_FREEZE = 5,
-	USB_PD_CTRL_ROLE_COUNT
-};
-
-enum usb_pd_control_mux {
-	USB_PD_CTRL_MUX_NO_CHANGE = 0,
-	USB_PD_CTRL_MUX_NONE = 1,
-	USB_PD_CTRL_MUX_USB = 2,
-	USB_PD_CTRL_MUX_DP = 3,
-	USB_PD_CTRL_MUX_DOCK = 4,
-	USB_PD_CTRL_MUX_AUTO = 5,
-	USB_PD_CTRL_MUX_COUNT
-};
-
-enum usb_pd_control_swap {
-	USB_PD_CTRL_SWAP_NONE = 0,
-	USB_PD_CTRL_SWAP_DATA = 1,
-	USB_PD_CTRL_SWAP_POWER = 2,
-	USB_PD_CTRL_SWAP_VCONN = 3,
-	USB_PD_CTRL_SWAP_COUNT
-};
-
-struct ec_params_usb_pd_control {
-	uint8_t port;
-	uint8_t role;
-	uint8_t mux;
-	uint8_t swap;
-} __ec_align1;
-
-#define PD_CTRL_RESP_ENABLED_COMMS      BIT(0) /* Communication enabled */
-#define PD_CTRL_RESP_ENABLED_CONNECTED  BIT(1) /* Device connected */
-#define PD_CTRL_RESP_ENABLED_PD_CAPABLE BIT(2) /* Partner is PD capable */
-
-#define PD_CTRL_RESP_ROLE_POWER         BIT(0) /* 0=SNK/1=SRC */
-#define PD_CTRL_RESP_ROLE_DATA          BIT(1) /* 0=UFP/1=DFP */
-#define PD_CTRL_RESP_ROLE_VCONN         BIT(2) /* Vconn status */
-#define PD_CTRL_RESP_ROLE_DR_POWER      BIT(3) /* Partner is dualrole power */
-#define PD_CTRL_RESP_ROLE_DR_DATA       BIT(4) /* Partner is dualrole data */
-#define PD_CTRL_RESP_ROLE_USB_COMM      BIT(5) /* Partner USB comm capable */
-#define PD_CTRL_RESP_ROLE_EXT_POWERED   BIT(6) /* Partner externally powerd */
-
-struct ec_response_usb_pd_control {
-	uint8_t enabled;
-	uint8_t role;
-	uint8_t polarity;
-	uint8_t state;
-} __ec_align1;
-
-struct ec_response_usb_pd_control_v1 {
-	uint8_t enabled;
-	uint8_t role;
-	uint8_t polarity;
-	char state[32];
-} __ec_align1;
-
-/* Values representing usbc PD CC state */
-#define USBC_PD_CC_NONE		0 /* No accessory connected */
-#define USBC_PD_CC_NO_UFP	1 /* No UFP accessory connected */
-#define USBC_PD_CC_AUDIO_ACC	2 /* Audio accessory connected */
-#define USBC_PD_CC_DEBUG_ACC	3 /* Debug accessory connected */
-#define USBC_PD_CC_UFP_ATTACHED	4 /* UFP attached to usbc */
-#define USBC_PD_CC_DFP_ATTACHED	5 /* DPF attached to usbc */
-
-struct ec_response_usb_pd_control_v2 {
-	uint8_t enabled;
-	uint8_t role;
-	uint8_t polarity;
-	char state[32];
-	uint8_t cc_state; /* USBC_PD_CC_*Encoded cc state */
-	uint8_t dp_mode;  /* Current DP pin mode (MODE_DP_PIN_[A-E]) */
-	/* CL:1500994 Current cable type */
-	uint8_t reserved_cable_type;
-} __ec_align1;
-
-#define EC_CMD_USB_PD_PORTS 0x0102
-
-/* Maximum number of PD ports on a device, num_ports will be <= this */
-#define EC_USB_PD_MAX_PORTS 8
-
-struct ec_response_usb_pd_ports {
-	uint8_t num_ports;
-} __ec_align1;
-
-#define EC_CMD_USB_PD_POWER_INFO 0x0103
-
-#define PD_POWER_CHARGING_PORT 0xff
-struct ec_params_usb_pd_power_info {
-	uint8_t port;
-} __ec_align1;
-
-enum usb_chg_type {
-	USB_CHG_TYPE_NONE,
-	USB_CHG_TYPE_PD,
-	USB_CHG_TYPE_C,
-	USB_CHG_TYPE_PROPRIETARY,
-	USB_CHG_TYPE_BC12_DCP,
-	USB_CHG_TYPE_BC12_CDP,
-	USB_CHG_TYPE_BC12_SDP,
-	USB_CHG_TYPE_OTHER,
-	USB_CHG_TYPE_VBUS,
-	USB_CHG_TYPE_UNKNOWN,
-	USB_CHG_TYPE_DEDICATED,
-};
-enum usb_power_roles {
-	USB_PD_PORT_POWER_DISCONNECTED,
-	USB_PD_PORT_POWER_SOURCE,
-	USB_PD_PORT_POWER_SINK,
-	USB_PD_PORT_POWER_SINK_NOT_CHARGING,
-};
-
-struct usb_chg_measures {
-	uint16_t voltage_max;
-	uint16_t voltage_now;
-	uint16_t current_max;
-	uint16_t current_lim;
-} __ec_align2;
-
-struct ec_response_usb_pd_power_info {
-	uint8_t role;
-	uint8_t type;
-	uint8_t dualrole;
-	uint8_t reserved1;
-	struct usb_chg_measures meas;
-	uint32_t max_power;
-} __ec_align4;
-
-
-/*
- * This command will return the number of USB PD charge port + the number
- * of dedicated port present.
- * EC_CMD_USB_PD_PORTS does NOT include the dedicated ports
- */
-#define EC_CMD_CHARGE_PORT_COUNT 0x0105
-struct ec_response_charge_port_count {
-	uint8_t port_count;
-} __ec_align1;
-
-/* Write USB-PD device FW */
-#define EC_CMD_USB_PD_FW_UPDATE 0x0110
-
-enum usb_pd_fw_update_cmds {
-	USB_PD_FW_REBOOT,
-	USB_PD_FW_FLASH_ERASE,
-	USB_PD_FW_FLASH_WRITE,
-	USB_PD_FW_ERASE_SIG,
-};
-
-struct ec_params_usb_pd_fw_update {
-	uint16_t dev_id;
-	uint8_t cmd;
-	uint8_t port;
-	uint32_t size;     /* Size to write in bytes */
-	/* Followed by data to write */
-} __ec_align4;
-
-/* Write USB-PD Accessory RW_HASH table entry */
-#define EC_CMD_USB_PD_RW_HASH_ENTRY 0x0111
-/* RW hash is first 20 bytes of SHA-256 of RW section */
-#define PD_RW_HASH_SIZE 20
-struct ec_params_usb_pd_rw_hash_entry {
-	uint16_t dev_id;
-	uint8_t dev_rw_hash[PD_RW_HASH_SIZE];
-	uint8_t reserved;        /*
-				  * For alignment of current_image
-				  * TODO(rspangler) but it's not aligned!
-				  * Should have been reserved[2].
-				  */
-	uint32_t current_image;  /* One of ec_current_image */
-} __ec_align1;
-
-/* Read USB-PD Accessory info */
-#define EC_CMD_USB_PD_DEV_INFO 0x0112
-
-struct ec_params_usb_pd_info_request {
-	uint8_t port;
-} __ec_align1;
-
-/* Read USB-PD Device discovery info */
-#define EC_CMD_USB_PD_DISCOVERY 0x0113
-struct ec_params_usb_pd_discovery_entry {
-	uint16_t vid;  /* USB-IF VID */
-	uint16_t pid;  /* USB-IF PID */
-	uint8_t ptype; /* product type (hub,periph,cable,ama) */
-} __ec_align_size1;
-
-/* Override default charge behavior */
-#define EC_CMD_PD_CHARGE_PORT_OVERRIDE 0x0114
-
-/* Negative port parameters have special meaning */
-enum usb_pd_override_ports {
-	OVERRIDE_DONT_CHARGE = -2,
-	OVERRIDE_OFF = -1,
-	/* [0, CONFIG_USB_PD_PORT_COUNT): Port# */
-};
-
-struct ec_params_charge_port_override {
-	int16_t override_port; /* Override port# */
-} __ec_align2;
-
-/*
- * Read (and delete) one entry of PD event log.
- * TODO(crbug.com/751742): Make this host command more generic to accommodate
- * future non-PD logs that use the same internal EC event_log.
- */
-#define EC_CMD_PD_GET_LOG_ENTRY 0x0115
-
-struct ec_response_pd_log {
-	uint32_t timestamp; /* relative timestamp in milliseconds */
-	uint8_t type;       /* event type : see PD_EVENT_xx below */
-	uint8_t size_port;  /* [7:5] port number [4:0] payload size in bytes */
-	uint16_t data;      /* type-defined data payload */
-	uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
-} __ec_align4;
-
-/* The timestamp is the microsecond counter shifted to get about a ms. */
-#define PD_LOG_TIMESTAMP_SHIFT 10 /* 1 LSB = 1024us */
-
-#define PD_LOG_SIZE_MASK  0x1f
-#define PD_LOG_PORT_MASK  0xe0
-#define PD_LOG_PORT_SHIFT    5
-#define PD_LOG_PORT_SIZE(port, size) (((port) << PD_LOG_PORT_SHIFT) | \
-				      ((size) & PD_LOG_SIZE_MASK))
-#define PD_LOG_PORT(size_port) ((size_port) >> PD_LOG_PORT_SHIFT)
-#define PD_LOG_SIZE(size_port) ((size_port) & PD_LOG_SIZE_MASK)
-
-/* PD event log : entry types */
-/* PD MCU events */
-#define PD_EVENT_MCU_BASE       0x00
-#define PD_EVENT_MCU_CHARGE             (PD_EVENT_MCU_BASE+0)
-#define PD_EVENT_MCU_CONNECT            (PD_EVENT_MCU_BASE+1)
-/* Reserved for custom board event */
-#define PD_EVENT_MCU_BOARD_CUSTOM       (PD_EVENT_MCU_BASE+2)
-/* PD generic accessory events */
-#define PD_EVENT_ACC_BASE       0x20
-#define PD_EVENT_ACC_RW_FAIL   (PD_EVENT_ACC_BASE+0)
-#define PD_EVENT_ACC_RW_ERASE  (PD_EVENT_ACC_BASE+1)
-/* PD power supply events */
-#define PD_EVENT_PS_BASE        0x40
-#define PD_EVENT_PS_FAULT      (PD_EVENT_PS_BASE+0)
-/* PD video dongles events */
-#define PD_EVENT_VIDEO_BASE     0x60
-#define PD_EVENT_VIDEO_DP_MODE (PD_EVENT_VIDEO_BASE+0)
-#define PD_EVENT_VIDEO_CODEC   (PD_EVENT_VIDEO_BASE+1)
-/* Returned in the "type" field, when there is no entry available */
-#define PD_EVENT_NO_ENTRY       0xff
-
-/*
- * PD_EVENT_MCU_CHARGE event definition :
- * the payload is "struct usb_chg_measures"
- * the data field contains the port state flags as defined below :
- */
-/* Port partner is a dual role device */
-#define CHARGE_FLAGS_DUAL_ROLE         BIT(15)
-/* Port is the pending override port */
-#define CHARGE_FLAGS_DELAYED_OVERRIDE  BIT(14)
-/* Port is the override port */
-#define CHARGE_FLAGS_OVERRIDE          BIT(13)
-/* Charger type */
-#define CHARGE_FLAGS_TYPE_SHIFT               3
-#define CHARGE_FLAGS_TYPE_MASK       (0xf << CHARGE_FLAGS_TYPE_SHIFT)
-/* Power delivery role */
-#define CHARGE_FLAGS_ROLE_MASK         (7 <<  0)
-
-/*
- * PD_EVENT_PS_FAULT data field flags definition :
- */
-#define PS_FAULT_OCP                          1
-#define PS_FAULT_FAST_OCP                     2
-#define PS_FAULT_OVP                          3
-#define PS_FAULT_DISCH                        4
-
-/*
- * PD_EVENT_VIDEO_CODEC payload is "struct mcdp_info".
- */
-struct mcdp_version {
-	uint8_t major;
-	uint8_t minor;
-	uint16_t build;
-} __ec_align4;
-
-struct mcdp_info {
-	uint8_t family[2];
-	uint8_t chipid[2];
-	struct mcdp_version irom;
-	struct mcdp_version fw;
-} __ec_align4;
-
-/* struct mcdp_info field decoding */
-#define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1])
-#define MCDP_FAMILY(family) ((family[0] << 8) | family[1])
-
-/* Get/Set USB-PD Alternate mode info */
-#define EC_CMD_USB_PD_GET_AMODE 0x0116
-struct ec_params_usb_pd_get_mode_request {
-	uint16_t svid_idx; /* SVID index to get */
-	uint8_t port;      /* port */
-} __ec_align_size1;
-
-struct ec_params_usb_pd_get_mode_response {
-	uint16_t svid;   /* SVID */
-	uint16_t opos;    /* Object Position */
-	uint32_t vdo[6]; /* Mode VDOs */
-} __ec_align4;
-
-#define EC_CMD_USB_PD_SET_AMODE 0x0117
-
-enum pd_mode_cmd {
-	PD_EXIT_MODE = 0,
-	PD_ENTER_MODE = 1,
-	/* Not a command.  Do NOT remove. */
-	PD_MODE_CMD_COUNT,
-};
-
-struct ec_params_usb_pd_set_mode_request {
-	uint32_t cmd;  /* enum pd_mode_cmd */
-	uint16_t svid; /* SVID to set */
-	uint8_t opos;  /* Object Position */
-	uint8_t port;  /* port */
-} __ec_align4;
-
-/* Ask the PD MCU to record a log of a requested type */
-#define EC_CMD_PD_WRITE_LOG_ENTRY 0x0118
-
-struct ec_params_pd_write_log_entry {
-	uint8_t type; /* event type : see PD_EVENT_xx above */
-	uint8_t port; /* port#, or 0 for events unrelated to a given port */
-} __ec_align1;
-
-
-/* Control USB-PD chip */
-#define EC_CMD_PD_CONTROL 0x0119
-
-enum ec_pd_control_cmd {
-	PD_SUSPEND = 0,      /* Suspend the PD chip (EC: stop talking to PD) */
-	PD_RESUME,           /* Resume the PD chip (EC: start talking to PD) */
-	PD_RESET,            /* Force reset the PD chip */
-	PD_CONTROL_DISABLE,  /* Disable further calls to this command */
-	PD_CHIP_ON,          /* Power on the PD chip */
-};
-
-struct ec_params_pd_control {
-	uint8_t chip;         /* chip id */
-	uint8_t subcmd;
-} __ec_align1;
-
-/* Get info about USB-C SS muxes */
-#define EC_CMD_USB_PD_MUX_INFO 0x011A
-
-struct ec_params_usb_pd_mux_info {
-	uint8_t port; /* USB-C port number */
-} __ec_align1;
-
-/* Flags representing mux state */
-#define USB_PD_MUX_USB_ENABLED       BIT(0) /* USB connected */
-#define USB_PD_MUX_DP_ENABLED        BIT(1) /* DP connected */
-#define USB_PD_MUX_POLARITY_INVERTED BIT(2) /* CC line Polarity inverted */
-#define USB_PD_MUX_HPD_IRQ           BIT(3) /* HPD IRQ is asserted */
-#define USB_PD_MUX_HPD_LVL           BIT(4) /* HPD level is asserted */
-
-struct ec_response_usb_pd_mux_info {
-	uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */
-} __ec_align1;
-
-#define EC_CMD_PD_CHIP_INFO		0x011B
-
-struct ec_params_pd_chip_info {
-	uint8_t port;	/* USB-C port number */
-	uint8_t renew;	/* Force renewal */
-} __ec_align1;
-
-struct ec_response_pd_chip_info {
-	uint16_t vendor_id;
-	uint16_t product_id;
-	uint16_t device_id;
-	union {
-		uint8_t fw_version_string[8];
-		uint64_t fw_version_number;
-	};
-} __ec_align2;
-
-struct ec_response_pd_chip_info_v1 {
-	uint16_t vendor_id;
-	uint16_t product_id;
-	uint16_t device_id;
-	union {
-		uint8_t fw_version_string[8];
-		uint64_t fw_version_number;
-	};
-	union {
-		uint8_t min_req_fw_version_string[8];
-		uint64_t min_req_fw_version_number;
-	};
-} __ec_align2;
-
-/* Run RW signature verification and get status */
-#define EC_CMD_RWSIG_CHECK_STATUS	0x011C
-
-struct ec_response_rwsig_check_status {
-	uint32_t status;
-} __ec_align4;
-
-/* For controlling RWSIG task */
-#define EC_CMD_RWSIG_ACTION	0x011D
-
-enum rwsig_action {
-	RWSIG_ACTION_ABORT = 0,		/* Abort RWSIG and prevent jumping */
-	RWSIG_ACTION_CONTINUE = 1,	/* Jump to RW immediately */
-};
-
-struct ec_params_rwsig_action {
-	uint32_t action;
-} __ec_align4;
-
-/* Run verification on a slot */
-#define EC_CMD_EFS_VERIFY	0x011E
-
-struct ec_params_efs_verify {
-	uint8_t region;		/* enum ec_flash_region */
-} __ec_align1;
-
-/*
- * Retrieve info from Cros Board Info store. Response is based on the data
- * type. Integers return a uint32. Strings return a string, using the response
- * size to determine how big it is.
- */
-#define EC_CMD_GET_CROS_BOARD_INFO	0x011F
-/*
- * Write info into Cros Board Info on EEPROM. Write fails if the board has
- * hardware write-protect enabled.
- */
-#define EC_CMD_SET_CROS_BOARD_INFO	0x0120
-
-enum cbi_data_tag {
-	CBI_TAG_BOARD_VERSION = 0, /* uint32_t or smaller */
-	CBI_TAG_OEM_ID = 1,        /* uint32_t or smaller */
-	CBI_TAG_SKU_ID = 2,        /* uint32_t or smaller */
-	CBI_TAG_DRAM_PART_NUM = 3, /* variable length ascii, nul terminated. */
-	CBI_TAG_OEM_NAME = 4,      /* variable length ascii, nul terminated. */
-	CBI_TAG_MODEL_ID = 5,      /* uint32_t or smaller */
-	CBI_TAG_COUNT,
-};
-
-/*
- * Flags to control read operation
- *
- * RELOAD:  Invalidate cache and read data from EEPROM. Useful to verify
- *          write was successful without reboot.
- */
-#define CBI_GET_RELOAD		BIT(0)
-
-struct ec_params_get_cbi {
-	uint32_t tag;		/* enum cbi_data_tag */
-	uint32_t flag;		/* CBI_GET_* */
-} __ec_align4;
-
-/*
- * Flags to control write behavior.
- *
- * NO_SYNC: Makes EC update data in RAM but skip writing to EEPROM. It's
- *          useful when writing multiple fields in a row.
- * INIT:    Need to be set when creating a new CBI from scratch. All fields
- *          will be initialized to zero first.
- */
-#define CBI_SET_NO_SYNC		BIT(0)
-#define CBI_SET_INIT		BIT(1)
-
-struct ec_params_set_cbi {
-	uint32_t tag;		/* enum cbi_data_tag */
-	uint32_t flag;		/* CBI_SET_* */
-	uint32_t size;		/* Data size */
-	uint8_t data[];		/* For string and raw data */
-} __ec_align1;
-
-/*
- * Information about resets of the AP by the EC and the EC's own uptime.
- */
-#define EC_CMD_GET_UPTIME_INFO 0x0121
-
-struct ec_response_uptime_info {
-	/*
-	 * Number of milliseconds since the last EC boot. Sysjump resets
-	 * typically do not restart the EC's time_since_boot epoch.
-	 *
-	 * WARNING: The EC's sense of time is much less accurate than the AP's
-	 * sense of time, in both phase and frequency.  This timebase is similar
-	 * to CLOCK_MONOTONIC_RAW, but with 1% or more frequency error.
-	 */
-	uint32_t time_since_ec_boot_ms;
-
-	/*
-	 * Number of times the AP was reset by the EC since the last EC boot.
-	 * Note that the AP may be held in reset by the EC during the initial
-	 * boot sequence, such that the very first AP boot may count as more
-	 * than one here.
-	 */
-	uint32_t ap_resets_since_ec_boot;
-
-	/*
-	 * The set of flags which describe the EC's most recent reset.  See
-	 * include/system.h RESET_FLAG_* for details.
-	 */
-	uint32_t ec_reset_flags;
-
-	/* Empty log entries have both the cause and timestamp set to zero. */
-	struct ap_reset_log_entry {
-		/*
-		 * See include/chipset.h: enum chipset_{reset,shutdown}_reason
-		 * for details.
-		 */
-		uint16_t reset_cause;
-
-		/* Reserved for protocol growth. */
-		uint16_t reserved;
-
-		/*
-		 * The time of the reset's assertion, in milliseconds since the
-		 * last EC boot, in the same epoch as time_since_ec_boot_ms.
-		 * Set to zero if the log entry is empty.
-		 */
-		uint32_t reset_time_ms;
-	} recent_ap_reset[4];
-} __ec_align4;
-
-/*
- * Add entropy to the device secret (stored in the rollback region).
- *
- * Depending on the chip, the operation may take a long time (e.g. to erase
- * flash), so the commands are asynchronous.
- */
-#define EC_CMD_ADD_ENTROPY	0x0122
-
-enum add_entropy_action {
-	/* Add entropy to the current secret. */
-	ADD_ENTROPY_ASYNC = 0,
-	/*
-	 * Add entropy, and also make sure that the previous secret is erased.
-	 * (this can be implemented by adding entropy multiple times until
-	 * all rolback blocks have been overwritten).
-	 */
-	ADD_ENTROPY_RESET_ASYNC = 1,
-	/* Read back result from the previous operation. */
-	ADD_ENTROPY_GET_RESULT = 2,
-};
-
-struct ec_params_rollback_add_entropy {
-	uint8_t action;
-} __ec_align1;
-
-/*
- * Perform a single read of a given ADC channel.
- */
-#define EC_CMD_ADC_READ		0x0123
-
-struct ec_params_adc_read {
-	uint8_t adc_channel;
-} __ec_align1;
-
-struct ec_response_adc_read {
-	int32_t adc_value;
-} __ec_align4;
-
-/*
- * Read back rollback info
- */
-#define EC_CMD_ROLLBACK_INFO		0x0124
-
-struct ec_response_rollback_info {
-	int32_t id; /* Incrementing number to indicate which region to use. */
-	int32_t rollback_min_version;
-	int32_t rw_rollback_version;
-} __ec_align4;
-
-
-/* Issue AP reset */
-#define EC_CMD_AP_RESET 0x0125
-
-/*****************************************************************************/
-/* The command range 0x200-0x2FF is reserved for Rotor. */
-
-/*****************************************************************************/
-/*
- * Reserve a range of host commands for the CR51 firmware.
- */
-#define EC_CMD_CR51_BASE 0x0300
-#define EC_CMD_CR51_LAST 0x03FF
-
-/*****************************************************************************/
-/* Fingerprint MCU commands: range 0x0400-0x040x */
-
-/* Fingerprint SPI sensor passthru command: prototyping ONLY */
-#define EC_CMD_FP_PASSTHRU 0x0400
-
-#define EC_FP_FLAG_NOT_COMPLETE 0x1
-
-struct ec_params_fp_passthru {
-	uint16_t len;		/* Number of bytes to write then read */
-	uint16_t flags;		/* EC_FP_FLAG_xxx */
-	uint8_t data[];		/* Data to send */
-} __ec_align2;
-
-/* Configure the Fingerprint MCU behavior */
-#define EC_CMD_FP_MODE 0x0402
-
-/* Put the sensor in its lowest power mode */
-#define FP_MODE_DEEPSLEEP      BIT(0)
-/* Wait to see a finger on the sensor */
-#define FP_MODE_FINGER_DOWN    BIT(1)
-/* Poll until the finger has left the sensor */
-#define FP_MODE_FINGER_UP      BIT(2)
-/* Capture the current finger image */
-#define FP_MODE_CAPTURE        BIT(3)
-/* Finger enrollment session on-going */
-#define FP_MODE_ENROLL_SESSION BIT(4)
-/* Enroll the current finger image */
-#define FP_MODE_ENROLL_IMAGE   BIT(5)
-/* Try to match the current finger image */
-#define FP_MODE_MATCH          BIT(6)
-/* Reset and re-initialize the sensor. */
-#define FP_MODE_RESET_SENSOR   BIT(7)
-/* special value: don't change anything just read back current mode */
-#define FP_MODE_DONT_CHANGE    BIT(31)
-
-#define FP_VALID_MODES (FP_MODE_DEEPSLEEP      | \
-			FP_MODE_FINGER_DOWN    | \
-			FP_MODE_FINGER_UP      | \
-			FP_MODE_CAPTURE        | \
-			FP_MODE_ENROLL_SESSION | \
-			FP_MODE_ENROLL_IMAGE   | \
-			FP_MODE_MATCH          | \
-			FP_MODE_RESET_SENSOR   | \
-			FP_MODE_DONT_CHANGE)
-
-/* Capture types defined in bits [30..28] */
-#define FP_MODE_CAPTURE_TYPE_SHIFT 28
-#define FP_MODE_CAPTURE_TYPE_MASK  (0x7 << FP_MODE_CAPTURE_TYPE_SHIFT)
-/*
- * This enum must remain ordered, if you add new values you must ensure that
- * FP_CAPTURE_TYPE_MAX is still the last one.
- */
-enum fp_capture_type {
-	/* Full blown vendor-defined capture (produces 'frame_size' bytes) */
-	FP_CAPTURE_VENDOR_FORMAT = 0,
-	/* Simple raw image capture (produces width x height x bpp bits) */
-	FP_CAPTURE_SIMPLE_IMAGE = 1,
-	/* Self test pattern (e.g. checkerboard) */
-	FP_CAPTURE_PATTERN0 = 2,
-	/* Self test pattern (e.g. inverted checkerboard) */
-	FP_CAPTURE_PATTERN1 = 3,
-	/* Capture for Quality test with fixed contrast */
-	FP_CAPTURE_QUALITY_TEST = 4,
-	/* Capture for pixel reset value test */
-	FP_CAPTURE_RESET_TEST = 5,
-	FP_CAPTURE_TYPE_MAX,
-};
-/* Extracts the capture type from the sensor 'mode' word */
-#define FP_CAPTURE_TYPE(mode) (((mode) & FP_MODE_CAPTURE_TYPE_MASK) \
-				       >> FP_MODE_CAPTURE_TYPE_SHIFT)
-
-struct ec_params_fp_mode {
-	uint32_t mode; /* as defined by FP_MODE_ constants */
-} __ec_align4;
-
-struct ec_response_fp_mode {
-	uint32_t mode; /* as defined by FP_MODE_ constants */
-} __ec_align4;
-
-/* Retrieve Fingerprint sensor information */
-#define EC_CMD_FP_INFO 0x0403
-
-/* Number of dead pixels detected on the last maintenance */
-#define FP_ERROR_DEAD_PIXELS(errors) ((errors) & 0x3FF)
-/* Unknown number of dead pixels detected on the last maintenance */
-#define FP_ERROR_DEAD_PIXELS_UNKNOWN (0x3FF)
-/* No interrupt from the sensor */
-#define FP_ERROR_NO_IRQ    BIT(12)
-/* SPI communication error */
-#define FP_ERROR_SPI_COMM  BIT(13)
-/* Invalid sensor Hardware ID */
-#define FP_ERROR_BAD_HWID  BIT(14)
-/* Sensor initialization failed */
-#define FP_ERROR_INIT_FAIL BIT(15)
-
-struct ec_response_fp_info_v0 {
-	/* Sensor identification */
-	uint32_t vendor_id;
-	uint32_t product_id;
-	uint32_t model_id;
-	uint32_t version;
-	/* Image frame characteristics */
-	uint32_t frame_size;
-	uint32_t pixel_format; /* using V4L2_PIX_FMT_ */
-	uint16_t width;
-	uint16_t height;
-	uint16_t bpp;
-	uint16_t errors; /* see FP_ERROR_ flags above */
-} __ec_align4;
-
-struct ec_response_fp_info {
-	/* Sensor identification */
-	uint32_t vendor_id;
-	uint32_t product_id;
-	uint32_t model_id;
-	uint32_t version;
-	/* Image frame characteristics */
-	uint32_t frame_size;
-	uint32_t pixel_format; /* using V4L2_PIX_FMT_ */
-	uint16_t width;
-	uint16_t height;
-	uint16_t bpp;
-	uint16_t errors; /* see FP_ERROR_ flags above */
-	/* Template/finger current information */
-	uint32_t template_size;  /* max template size in bytes */
-	uint16_t template_max;   /* maximum number of fingers/templates */
-	uint16_t template_valid; /* number of valid fingers/templates */
-	uint32_t template_dirty; /* bitmap of templates with MCU side changes */
-	uint32_t template_version; /* version of the template format */
-} __ec_align4;
-
-/* Get the last captured finger frame or a template content */
-#define EC_CMD_FP_FRAME 0x0404
-
-/* constants defining the 'offset' field which also contains the frame index */
-#define FP_FRAME_INDEX_SHIFT       28
-/* Frame buffer where the captured image is stored */
-#define FP_FRAME_INDEX_RAW_IMAGE    0
-/* First frame buffer holding a template */
-#define FP_FRAME_INDEX_TEMPLATE     1
-#define FP_FRAME_GET_BUFFER_INDEX(offset) ((offset) >> FP_FRAME_INDEX_SHIFT)
-#define FP_FRAME_OFFSET_MASK       0x0FFFFFFF
-
-/* Version of the format of the encrypted templates. */
-#define FP_TEMPLATE_FORMAT_VERSION 3
-
-/* Constants for encryption parameters */
-#define FP_CONTEXT_NONCE_BYTES 12
-#define FP_CONTEXT_USERID_WORDS (32 / sizeof(uint32_t))
-#define FP_CONTEXT_TAG_BYTES 16
-#define FP_CONTEXT_SALT_BYTES 16
-#define FP_CONTEXT_TPM_BYTES 32
-
-struct ec_fp_template_encryption_metadata {
-	/*
-	 * Version of the structure format (N=3).
-	 */
-	uint16_t struct_version;
-	/* Reserved bytes, set to 0. */
-	uint16_t reserved;
-	/*
-	 * The salt is *only* ever used for key derivation. The nonce is unique,
-	 * a different one is used for every message.
-	 */
-	uint8_t nonce[FP_CONTEXT_NONCE_BYTES];
-	uint8_t salt[FP_CONTEXT_SALT_BYTES];
-	uint8_t tag[FP_CONTEXT_TAG_BYTES];
-};
-
-struct ec_params_fp_frame {
-	/*
-	 * The offset contains the template index or FP_FRAME_INDEX_RAW_IMAGE
-	 * in the high nibble, and the real offset within the frame in
-	 * FP_FRAME_OFFSET_MASK.
-	 */
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/* Load a template into the MCU */
-#define EC_CMD_FP_TEMPLATE 0x0405
-
-/* Flag in the 'size' field indicating that the full template has been sent */
-#define FP_TEMPLATE_COMMIT 0x80000000
-
-struct ec_params_fp_template {
-	uint32_t offset;
-	uint32_t size;
-	uint8_t data[];
-} __ec_align4;
-
-/* Clear the current fingerprint user context and set a new one */
-#define EC_CMD_FP_CONTEXT 0x0406
-
-struct ec_params_fp_context {
-	uint32_t userid[FP_CONTEXT_USERID_WORDS];
-} __ec_align4;
-
-#define EC_CMD_FP_STATS 0x0407
-
-#define FPSTATS_CAPTURE_INV  BIT(0)
-#define FPSTATS_MATCHING_INV BIT(1)
-
-struct ec_response_fp_stats {
-	uint32_t capture_time_us;
-	uint32_t matching_time_us;
-	uint32_t overall_time_us;
-	struct {
-		uint32_t lo;
-		uint32_t hi;
-	} overall_t0;
-	uint8_t timestamps_invalid;
-	int8_t template_matched;
-} __ec_align2;
-
-#define EC_CMD_FP_SEED 0x0408
-struct ec_params_fp_seed {
-	/*
-	 * Version of the structure format (N=3).
-	 */
-	uint16_t struct_version;
-	/* Reserved bytes, set to 0. */
-	uint16_t reserved;
-	/* Seed from the TPM. */
-	uint8_t seed[FP_CONTEXT_TPM_BYTES];
-} __ec_align4;
-
-/*****************************************************************************/
-/* Touchpad MCU commands: range 0x0500-0x05FF */
-
-/* Perform touchpad self test */
-#define EC_CMD_TP_SELF_TEST 0x0500
-
-/* Get number of frame types, and the size of each type */
-#define EC_CMD_TP_FRAME_INFO 0x0501
-
-struct ec_response_tp_frame_info {
-	uint32_t n_frames;
-	uint32_t frame_sizes[0];
-} __ec_align4;
-
-/* Create a snapshot of current frame readings */
-#define EC_CMD_TP_FRAME_SNAPSHOT 0x0502
-
-/* Read the frame */
-#define EC_CMD_TP_FRAME_GET 0x0503
-
-struct ec_params_tp_frame_get {
-	uint32_t frame_index;
-	uint32_t offset;
-	uint32_t size;
-} __ec_align4;
-
-/*****************************************************************************/
-/* EC-EC communication commands: range 0x0600-0x06FF */
-
-#define EC_COMM_TEXT_MAX 8
-
-/*
- * Get battery static information, i.e. information that never changes, or
- * very infrequently.
- */
-#define EC_CMD_BATTERY_GET_STATIC 0x0600
-
-/**
- * struct ec_params_battery_static_info - Battery static info parameters
- * @index: Battery index.
- */
-struct ec_params_battery_static_info {
-	uint8_t index;
-} __ec_align_size1;
-
-/**
- * struct ec_response_battery_static_info - Battery static info response
- * @design_capacity: Battery Design Capacity (mAh)
- * @design_voltage: Battery Design Voltage (mV)
- * @manufacturer: Battery Manufacturer String
- * @model: Battery Model Number String
- * @serial: Battery Serial Number String
- * @type: Battery Type String
- * @cycle_count: Battery Cycle Count
- */
-struct ec_response_battery_static_info {
-	uint16_t design_capacity;
-	uint16_t design_voltage;
-	char manufacturer[EC_COMM_TEXT_MAX];
-	char model[EC_COMM_TEXT_MAX];
-	char serial[EC_COMM_TEXT_MAX];
-	char type[EC_COMM_TEXT_MAX];
-	/* TODO(crbug.com/795991): Consider moving to dynamic structure. */
-	uint32_t cycle_count;
-} __ec_align4;
-
-/*
- * Get battery dynamic information, i.e. information that is likely to change
- * every time it is read.
- */
-#define EC_CMD_BATTERY_GET_DYNAMIC 0x0601
-
-/**
- * struct ec_params_battery_dynamic_info - Battery dynamic info parameters
- * @index: Battery index.
- */
-struct ec_params_battery_dynamic_info {
-	uint8_t index;
-} __ec_align_size1;
-
-/**
- * struct ec_response_battery_dynamic_info - Battery dynamic info response
- * @actual_voltage: Battery voltage (mV)
- * @actual_current: Battery current (mA); negative=discharging
- * @remaining_capacity: Remaining capacity (mAh)
- * @full_capacity: Capacity (mAh, might change occasionally)
- * @flags: Flags, see EC_BATT_FLAG_*
- * @desired_voltage: Charging voltage desired by battery (mV)
- * @desired_current: Charging current desired by battery (mA)
- */
-struct ec_response_battery_dynamic_info {
-	int16_t actual_voltage;
-	int16_t actual_current;
-	int16_t remaining_capacity;
-	int16_t full_capacity;
-	int16_t flags;
-	int16_t desired_voltage;
-	int16_t desired_current;
-} __ec_align2;
-
-/*
- * Control charger chip. Used to control charger chip on the slave.
- */
-#define EC_CMD_CHARGER_CONTROL 0x0602
-
-/**
- * struct ec_params_charger_control - Charger control parameters
- * @max_current: Charger current (mA). Positive to allow base to draw up to
- *     max_current and (possibly) charge battery, negative to request current
- *     from base (OTG).
- * @otg_voltage: Voltage (mV) to use in OTG mode, ignored if max_current is
- *     >= 0.
- * @allow_charging: Allow base battery charging (only makes sense if
- *     max_current > 0).
- */
-struct ec_params_charger_control {
-	int16_t max_current;
-	uint16_t otg_voltage;
-	uint8_t allow_charging;
-} __ec_align_size1;
-
-/*****************************************************************************/
-/*
- * Reserve a range of host commands for board-specific, experimental, or
- * special purpose features. These can be (re)used without updating this file.
- *
- * CAUTION: Don't go nuts with this. Shipping products should document ALL
- * their EC commands for easier development, testing, debugging, and support.
- *
- * All commands MUST be #defined to be 4-digit UPPER CASE hex values
- * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work.
- *
- * In your experimental code, you may want to do something like this:
- *
- *   #define EC_CMD_MAGIC_FOO 0x0000
- *   #define EC_CMD_MAGIC_BAR 0x0001
- *   #define EC_CMD_MAGIC_HEY 0x0002
- *
- *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_FOO, magic_foo_handler,
- *      EC_VER_MASK(0);
- *
- *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_BAR, magic_bar_handler,
- *      EC_VER_MASK(0);
- *
- *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_HEY, magic_hey_handler,
- *      EC_VER_MASK(0);
- */
-#define EC_CMD_BOARD_SPECIFIC_BASE 0x3E00
-#define EC_CMD_BOARD_SPECIFIC_LAST 0x3FFF
-
-/*
- * Given the private host command offset, calculate the true private host
- * command value.
- */
-#define EC_PRIVATE_HOST_COMMAND_VALUE(command) \
-	(EC_CMD_BOARD_SPECIFIC_BASE + (command))
-
-/*****************************************************************************/
-/*
- * Passthru commands
- *
- * Some platforms have sub-processors chained to each other.  For example.
- *
- *     AP <--> EC <--> PD MCU
- *
- * The top 2 bits of the command number are used to indicate which device the
- * command is intended for.  Device 0 is always the device receiving the
- * command; other device mapping is board-specific.
- *
- * When a device receives a command to be passed to a sub-processor, it passes
- * it on with the device number set back to 0.  This allows the sub-processor
- * to remain blissfully unaware of whether the command originated on the next
- * device up the chain, or was passed through from the AP.
- *
- * In the above example, if the AP wants to send command 0x0002 to the PD MCU,
- *     AP sends command 0x4002 to the EC
- *     EC sends command 0x0002 to the PD MCU
- *     EC forwards PD MCU response back to the AP
- */
-
-/* Offset and max command number for sub-device n */
-#define EC_CMD_PASSTHRU_OFFSET(n) (0x4000 * (n))
-#define EC_CMD_PASSTHRU_MAX(n) (EC_CMD_PASSTHRU_OFFSET(n) + 0x3fff)
-
-/*****************************************************************************/
-/*
- * Deprecated constants. These constants have been renamed for clarity. The
- * meaning and size has not changed. Programs that use the old names should
- * switch to the new names soon, as the old names may not be carried forward
- * forever.
- */
-#define EC_HOST_PARAM_SIZE      EC_PROTO2_MAX_PARAM_SIZE
-#define EC_LPC_ADDR_OLD_PARAM   EC_HOST_CMD_REGION1
-#define EC_OLD_PARAM_SIZE       EC_HOST_CMD_REGION_SIZE
-
-
-
-#endif  /* __CROS_EC_COMMANDS_H */
diff --git a/include/linux/platform_data/cros_ec_chardev.h b/include/linux/platform_data/cros_ec_chardev.h
index 6600b54f531c..973b2615aa02 100644
--- a/include/linux/platform_data/cros_ec_chardev.h
+++ b/include/linux/platform_data/cros_ec_chardev.h
@@ -12,7 +12,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-#include <linux/mfd/cros_ec.h>
+#include <linux/platform_data/cros_ec_commands.h>
 
 #define CROS_EC_DEV_VERSION "1.0.0"
 
diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h
new file mode 100644
index 000000000000..7ccb8757b79d
--- /dev/null
+++ b/include/linux/platform_data/cros_ec_commands.h
@@ -0,0 +1,5713 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host communication command constants for ChromeOS EC
+ *
+ * Copyright (C) 2012 Google, Inc
+ *
+ * NOTE: This file is auto-generated from ChromeOS EC Open Source code from
+ * https://chromium.googlesource.com/chromiumos/platform/ec/+/master/include/ec_commands.h
+ */
+
+/* Host communication command constants for Chrome EC */
+
+#ifndef __CROS_EC_COMMANDS_H
+#define __CROS_EC_COMMANDS_H
+
+
+
+
+#define BUILD_ASSERT(_cond)
+
+/*
+ * Current version of this protocol
+ *
+ * TODO(crosbug.com/p/11223): This is effectively useless; protocol is
+ * determined in other ways.  Remove this once the kernel code no longer
+ * depends on it.
+ */
+#define EC_PROTO_VERSION          0x00000002
+
+/* Command version mask */
+#define EC_VER_MASK(version) BIT(version)
+
+/* I/O addresses for ACPI commands */
+#define EC_LPC_ADDR_ACPI_DATA  0x62
+#define EC_LPC_ADDR_ACPI_CMD   0x66
+
+/* I/O addresses for host command */
+#define EC_LPC_ADDR_HOST_DATA  0x200
+#define EC_LPC_ADDR_HOST_CMD   0x204
+
+/* I/O addresses for host command args and params */
+/* Protocol version 2 */
+#define EC_LPC_ADDR_HOST_ARGS    0x800  /* And 0x801, 0x802, 0x803 */
+#define EC_LPC_ADDR_HOST_PARAM   0x804  /* For version 2 params; size is
+					 * EC_PROTO2_MAX_PARAM_SIZE
+					 */
+/* Protocol version 3 */
+#define EC_LPC_ADDR_HOST_PACKET  0x800  /* Offset of version 3 packet */
+#define EC_LPC_HOST_PACKET_SIZE  0x100  /* Max size of version 3 packet */
+
+/*
+ * The actual block is 0x800-0x8ff, but some BIOSes think it's 0x880-0x8ff
+ * and they tell the kernel that so we have to think of it as two parts.
+ */
+#define EC_HOST_CMD_REGION0    0x800
+#define EC_HOST_CMD_REGION1    0x880
+#define EC_HOST_CMD_REGION_SIZE 0x80
+
+/* EC command register bit functions */
+#define EC_LPC_CMDR_DATA	BIT(0)  /* Data ready for host to read */
+#define EC_LPC_CMDR_PENDING	BIT(1)  /* Write pending to EC */
+#define EC_LPC_CMDR_BUSY	BIT(2)  /* EC is busy processing a command */
+#define EC_LPC_CMDR_CMD		BIT(3)  /* Last host write was a command */
+#define EC_LPC_CMDR_ACPI_BRST	BIT(4)  /* Burst mode (not used) */
+#define EC_LPC_CMDR_SCI		BIT(5)  /* SCI event is pending */
+#define EC_LPC_CMDR_SMI		BIT(6)  /* SMI event is pending */
+
+#define EC_LPC_ADDR_MEMMAP       0x900
+#define EC_MEMMAP_SIZE         255 /* ACPI IO buffer max is 255 bytes */
+#define EC_MEMMAP_TEXT_MAX     8   /* Size of a string in the memory map */
+
+/* The offset address of each type of data in mapped memory. */
+#define EC_MEMMAP_TEMP_SENSOR      0x00 /* Temp sensors 0x00 - 0x0f */
+#define EC_MEMMAP_FAN              0x10 /* Fan speeds 0x10 - 0x17 */
+#define EC_MEMMAP_TEMP_SENSOR_B    0x18 /* More temp sensors 0x18 - 0x1f */
+#define EC_MEMMAP_ID               0x20 /* 0x20 == 'E', 0x21 == 'C' */
+#define EC_MEMMAP_ID_VERSION       0x22 /* Version of data in 0x20 - 0x2f */
+#define EC_MEMMAP_THERMAL_VERSION  0x23 /* Version of data in 0x00 - 0x1f */
+#define EC_MEMMAP_BATTERY_VERSION  0x24 /* Version of data in 0x40 - 0x7f */
+#define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */
+#define EC_MEMMAP_EVENTS_VERSION   0x26 /* Version of data in 0x34 - 0x3f */
+#define EC_MEMMAP_HOST_CMD_FLAGS   0x27 /* Host cmd interface flags (8 bits) */
+/* Unused 0x28 - 0x2f */
+#define EC_MEMMAP_SWITCHES         0x30	/* 8 bits */
+/* Unused 0x31 - 0x33 */
+#define EC_MEMMAP_HOST_EVENTS      0x34 /* 64 bits */
+/* Battery values are all 32 bits, unless otherwise noted. */
+#define EC_MEMMAP_BATT_VOLT        0x40 /* Battery Present Voltage */
+#define EC_MEMMAP_BATT_RATE        0x44 /* Battery Present Rate */
+#define EC_MEMMAP_BATT_CAP         0x48 /* Battery Remaining Capacity */
+#define EC_MEMMAP_BATT_FLAG        0x4c /* Battery State, see below (8-bit) */
+#define EC_MEMMAP_BATT_COUNT       0x4d /* Battery Count (8-bit) */
+#define EC_MEMMAP_BATT_INDEX       0x4e /* Current Battery Data Index (8-bit) */
+/* Unused 0x4f */
+#define EC_MEMMAP_BATT_DCAP        0x50 /* Battery Design Capacity */
+#define EC_MEMMAP_BATT_DVLT        0x54 /* Battery Design Voltage */
+#define EC_MEMMAP_BATT_LFCC        0x58 /* Battery Last Full Charge Capacity */
+#define EC_MEMMAP_BATT_CCNT        0x5c /* Battery Cycle Count */
+/* Strings are all 8 bytes (EC_MEMMAP_TEXT_MAX) */
+#define EC_MEMMAP_BATT_MFGR        0x60 /* Battery Manufacturer String */
+#define EC_MEMMAP_BATT_MODEL       0x68 /* Battery Model Number String */
+#define EC_MEMMAP_BATT_SERIAL      0x70 /* Battery Serial Number String */
+#define EC_MEMMAP_BATT_TYPE        0x78 /* Battery Type String */
+#define EC_MEMMAP_ALS              0x80 /* ALS readings in lux (2 X 16 bits) */
+/* Unused 0x84 - 0x8f */
+#define EC_MEMMAP_ACC_STATUS       0x90 /* Accelerometer status (8 bits )*/
+/* Unused 0x91 */
+#define EC_MEMMAP_ACC_DATA         0x92 /* Accelerometers data 0x92 - 0x9f */
+/* 0x92: Lid Angle if available, LID_ANGLE_UNRELIABLE otherwise */
+/* 0x94 - 0x99: 1st Accelerometer */
+/* 0x9a - 0x9f: 2nd Accelerometer */
+#define EC_MEMMAP_GYRO_DATA        0xa0 /* Gyroscope data 0xa0 - 0xa5 */
+/* Unused 0xa6 - 0xdf */
+
+/*
+ * ACPI is unable to access memory mapped data at or above this offset due to
+ * limitations of the ACPI protocol. Do not place data in the range 0xe0 - 0xfe
+ * which might be needed by ACPI.
+ */
+#define EC_MEMMAP_NO_ACPI 0xe0
+
+/* Define the format of the accelerometer mapped memory status byte. */
+#define EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK  0x0f
+#define EC_MEMMAP_ACC_STATUS_BUSY_BIT        BIT(4)
+#define EC_MEMMAP_ACC_STATUS_PRESENCE_BIT    BIT(7)
+
+/* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */
+#define EC_TEMP_SENSOR_ENTRIES     16
+/*
+ * Number of temp sensors at EC_MEMMAP_TEMP_SENSOR_B.
+ *
+ * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2.
+ */
+#define EC_TEMP_SENSOR_B_ENTRIES      8
+
+/* Special values for mapped temperature sensors */
+#define EC_TEMP_SENSOR_NOT_PRESENT    0xff
+#define EC_TEMP_SENSOR_ERROR          0xfe
+#define EC_TEMP_SENSOR_NOT_POWERED    0xfd
+#define EC_TEMP_SENSOR_NOT_CALIBRATED 0xfc
+/*
+ * The offset of temperature value stored in mapped memory.  This allows
+ * reporting a temperature range of 200K to 454K = -73C to 181C.
+ */
+#define EC_TEMP_SENSOR_OFFSET      200
+
+/*
+ * Number of ALS readings at EC_MEMMAP_ALS
+ */
+#define EC_ALS_ENTRIES             2
+
+/*
+ * The default value a temperature sensor will return when it is present but
+ * has not been read this boot.  This is a reasonable number to avoid
+ * triggering alarms on the host.
+ */
+#define EC_TEMP_SENSOR_DEFAULT     (296 - EC_TEMP_SENSOR_OFFSET)
+
+#define EC_FAN_SPEED_ENTRIES       4       /* Number of fans at EC_MEMMAP_FAN */
+#define EC_FAN_SPEED_NOT_PRESENT   0xffff  /* Entry not present */
+#define EC_FAN_SPEED_STALLED       0xfffe  /* Fan stalled */
+
+/* Battery bit flags at EC_MEMMAP_BATT_FLAG. */
+#define EC_BATT_FLAG_AC_PRESENT   0x01
+#define EC_BATT_FLAG_BATT_PRESENT 0x02
+#define EC_BATT_FLAG_DISCHARGING  0x04
+#define EC_BATT_FLAG_CHARGING     0x08
+#define EC_BATT_FLAG_LEVEL_CRITICAL 0x10
+/* Set if some of the static/dynamic data is invalid (or outdated). */
+#define EC_BATT_FLAG_INVALID_DATA 0x20
+
+/* Switch flags at EC_MEMMAP_SWITCHES */
+#define EC_SWITCH_LID_OPEN               0x01
+#define EC_SWITCH_POWER_BUTTON_PRESSED   0x02
+#define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04
+/* Was recovery requested via keyboard; now unused. */
+#define EC_SWITCH_IGNORE1		 0x08
+/* Recovery requested via dedicated signal (from servo board) */
+#define EC_SWITCH_DEDICATED_RECOVERY     0x10
+/* Was fake developer mode switch; now unused.  Remove in next refactor. */
+#define EC_SWITCH_IGNORE0                0x20
+
+/* Host command interface flags */
+/* Host command interface supports LPC args (LPC interface only) */
+#define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED  0x01
+/* Host command interface supports version 3 protocol */
+#define EC_HOST_CMD_FLAG_VERSION_3   0x02
+
+/* Wireless switch flags */
+#define EC_WIRELESS_SWITCH_ALL       ~0x00  /* All flags */
+#define EC_WIRELESS_SWITCH_WLAN       0x01  /* WLAN radio */
+#define EC_WIRELESS_SWITCH_BLUETOOTH  0x02  /* Bluetooth radio */
+#define EC_WIRELESS_SWITCH_WWAN       0x04  /* WWAN power */
+#define EC_WIRELESS_SWITCH_WLAN_POWER 0x08  /* WLAN power */
+
+/*****************************************************************************/
+/*
+ * ACPI commands
+ *
+ * These are valid ONLY on the ACPI command/data port.
+ */
+
+/*
+ * ACPI Read Embedded Controller
+ *
+ * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
+ *
+ * Use the following sequence:
+ *
+ *    - Write EC_CMD_ACPI_READ to EC_LPC_ADDR_ACPI_CMD
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write address to EC_LPC_ADDR_ACPI_DATA
+ *    - Wait for EC_LPC_CMDR_DATA bit to set
+ *    - Read value from EC_LPC_ADDR_ACPI_DATA
+ */
+#define EC_CMD_ACPI_READ 0x0080
+
+/*
+ * ACPI Write Embedded Controller
+ *
+ * This reads from ACPI memory space on the EC (EC_ACPI_MEM_*).
+ *
+ * Use the following sequence:
+ *
+ *    - Write EC_CMD_ACPI_WRITE to EC_LPC_ADDR_ACPI_CMD
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write address to EC_LPC_ADDR_ACPI_DATA
+ *    - Wait for EC_LPC_CMDR_PENDING bit to clear
+ *    - Write value to EC_LPC_ADDR_ACPI_DATA
+ */
+#define EC_CMD_ACPI_WRITE 0x0081
+
+/*
+ * ACPI Burst Enable Embedded Controller
+ *
+ * This enables burst mode on the EC to allow the host to issue several
+ * commands back-to-back. While in this mode, writes to mapped multi-byte
+ * data are locked out to ensure data consistency.
+ */
+#define EC_CMD_ACPI_BURST_ENABLE 0x0082
+
+/*
+ * ACPI Burst Disable Embedded Controller
+ *
+ * This disables burst mode on the EC and stops preventing EC writes to mapped
+ * multi-byte data.
+ */
+#define EC_CMD_ACPI_BURST_DISABLE 0x0083
+
+/*
+ * ACPI Query Embedded Controller
+ *
+ * This clears the lowest-order bit in the currently pending host events, and
+ * sets the result code to the 1-based index of the bit (event 0x00000001 = 1,
+ * event 0x80000000 = 32), or 0 if no event was pending.
+ */
+#define EC_CMD_ACPI_QUERY_EVENT 0x0084
+
+/* Valid addresses in ACPI memory space, for read/write commands */
+
+/* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */
+#define EC_ACPI_MEM_VERSION            0x00
+/*
+ * Test location; writing value here updates test compliment byte to (0xff -
+ * value).
+ */
+#define EC_ACPI_MEM_TEST               0x01
+/* Test compliment; writes here are ignored. */
+#define EC_ACPI_MEM_TEST_COMPLIMENT    0x02
+
+/* Keyboard backlight brightness percent (0 - 100) */
+#define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03
+/* DPTF Target Fan Duty (0-100, 0xff for auto/none) */
+#define EC_ACPI_MEM_FAN_DUTY           0x04
+
+/*
+ * DPTF temp thresholds. Any of the EC's temp sensors can have up to two
+ * independent thresholds attached to them. The current value of the ID
+ * register determines which sensor is affected by the THRESHOLD and COMMIT
+ * registers. The THRESHOLD register uses the same EC_TEMP_SENSOR_OFFSET scheme
+ * as the memory-mapped sensors. The COMMIT register applies those settings.
+ *
+ * The spec does not mandate any way to read back the threshold settings
+ * themselves, but when a threshold is crossed the AP needs a way to determine
+ * which sensor(s) are responsible. Each reading of the ID register clears and
+ * returns one sensor ID that has crossed one of its threshold (in either
+ * direction) since the last read. A value of 0xFF means "no new thresholds
+ * have tripped". Setting or enabling the thresholds for a sensor will clear
+ * the unread event count for that sensor.
+ */
+#define EC_ACPI_MEM_TEMP_ID            0x05
+#define EC_ACPI_MEM_TEMP_THRESHOLD     0x06
+#define EC_ACPI_MEM_TEMP_COMMIT        0x07
+/*
+ * Here are the bits for the COMMIT register:
+ *   bit 0 selects the threshold index for the chosen sensor (0/1)
+ *   bit 1 enables/disables the selected threshold (0 = off, 1 = on)
+ * Each write to the commit register affects one threshold.
+ */
+#define EC_ACPI_MEM_TEMP_COMMIT_SELECT_MASK BIT(0)
+#define EC_ACPI_MEM_TEMP_COMMIT_ENABLE_MASK BIT(1)
+/*
+ * Example:
+ *
+ * Set the thresholds for sensor 2 to 50 C and 60 C:
+ *   write 2 to [0x05]      --  select temp sensor 2
+ *   write 0x7b to [0x06]   --  C_TO_K(50) - EC_TEMP_SENSOR_OFFSET
+ *   write 0x2 to [0x07]    --  enable threshold 0 with this value
+ *   write 0x85 to [0x06]   --  C_TO_K(60) - EC_TEMP_SENSOR_OFFSET
+ *   write 0x3 to [0x07]    --  enable threshold 1 with this value
+ *
+ * Disable the 60 C threshold, leaving the 50 C threshold unchanged:
+ *   write 2 to [0x05]      --  select temp sensor 2
+ *   write 0x1 to [0x07]    --  disable threshold 1
+ */
+
+/* DPTF battery charging current limit */
+#define EC_ACPI_MEM_CHARGING_LIMIT     0x08
+
+/* Charging limit is specified in 64 mA steps */
+#define EC_ACPI_MEM_CHARGING_LIMIT_STEP_MA   64
+/* Value to disable DPTF battery charging limit */
+#define EC_ACPI_MEM_CHARGING_LIMIT_DISABLED  0xff
+
+/*
+ * Report device orientation
+ *  Bits       Definition
+ *  3:1        Device DPTF Profile Number (DDPN)
+ *               0   = Reserved for backward compatibility (indicates no valid
+ *                     profile number. Host should fall back to using TBMD).
+ *              1..7 = DPTF Profile number to indicate to host which table needs
+ *                     to be loaded.
+ *   0         Tablet Mode Device Indicator (TBMD)
+ */
+#define EC_ACPI_MEM_DEVICE_ORIENTATION 0x09
+#define EC_ACPI_MEM_TBMD_SHIFT         0
+#define EC_ACPI_MEM_TBMD_MASK          0x1
+#define EC_ACPI_MEM_DDPN_SHIFT         1
+#define EC_ACPI_MEM_DDPN_MASK          0x7
+
+/*
+ * Report device features. Uses the same format as the host command, except:
+ *
+ * bit 0 (EC_FEATURE_LIMITED) changes meaning from "EC code has a limited set
+ * of features", which is of limited interest when the system is already
+ * interpreting ACPI bytecode, to "EC_FEATURES[0-7] is not supported". Since
+ * these are supported, it defaults to 0.
+ * This allows detecting the presence of this field since older versions of
+ * the EC codebase would simply return 0xff to that unknown address. Check
+ * FEATURES0 != 0xff (or FEATURES0[0] == 0) to make sure that the other bits
+ * are valid.
+ */
+#define EC_ACPI_MEM_DEVICE_FEATURES0 0x0a
+#define EC_ACPI_MEM_DEVICE_FEATURES1 0x0b
+#define EC_ACPI_MEM_DEVICE_FEATURES2 0x0c
+#define EC_ACPI_MEM_DEVICE_FEATURES3 0x0d
+#define EC_ACPI_MEM_DEVICE_FEATURES4 0x0e
+#define EC_ACPI_MEM_DEVICE_FEATURES5 0x0f
+#define EC_ACPI_MEM_DEVICE_FEATURES6 0x10
+#define EC_ACPI_MEM_DEVICE_FEATURES7 0x11
+
+#define EC_ACPI_MEM_BATTERY_INDEX    0x12
+
+/*
+ * USB Port Power. Each bit indicates whether the corresponding USB ports' power
+ * is enabled (1) or disabled (0).
+ *   bit 0 USB port ID 0
+ *   ...
+ *   bit 7 USB port ID 7
+ */
+#define EC_ACPI_MEM_USB_PORT_POWER 0x13
+
+/*
+ * ACPI addresses 0x20 - 0xff map to EC_MEMMAP offset 0x00 - 0xdf.  This data
+ * is read-only from the AP.  Added in EC_ACPI_MEM_VERSION 2.
+ */
+#define EC_ACPI_MEM_MAPPED_BEGIN   0x20
+#define EC_ACPI_MEM_MAPPED_SIZE    0xe0
+
+/* Current version of ACPI memory address space */
+#define EC_ACPI_MEM_VERSION_CURRENT 2
+
+
+/*
+ * This header file is used in coreboot both in C and ACPI code.  The ACPI code
+ * is pre-processed to handle constants but the ASL compiler is unable to
+ * handle actual C code so keep it separate.
+ */
+
+
+/*
+ * Attributes for EC request and response packets.  Just defining __packed
+ * results in inefficient assembly code on ARM, if the structure is actually
+ * 32-bit aligned, as it should be for all buffers.
+ *
+ * Be very careful when adding these to existing structures.  They will round
+ * up the structure size to the specified boundary.
+ *
+ * Also be very careful to make that if a structure is included in some other
+ * parent structure that the alignment will still be true given the packing of
+ * the parent structure.  This is particularly important if the sub-structure
+ * will be passed as a pointer to another function, since that function will
+ * not know about the misaligment caused by the parent structure's packing.
+ *
+ * Also be very careful using __packed - particularly when nesting non-packed
+ * structures inside packed ones.  In fact, DO NOT use __packed directly;
+ * always use one of these attributes.
+ *
+ * Once everything is annotated properly, the following search strings should
+ * not return ANY matches in this file other than right here:
+ *
+ * "__packed" - generates inefficient code; all sub-structs must also be packed
+ *
+ * "struct [^_]" - all structs should be annotated, except for structs that are
+ * members of other structs/unions (and their original declarations should be
+ * annotated).
+ */
+
+/*
+ * Packed structures make no assumption about alignment, so they do inefficient
+ * byte-wise reads.
+ */
+#define __ec_align1 __packed
+#define __ec_align2 __packed
+#define __ec_align4 __packed
+#define __ec_align_size1 __packed
+#define __ec_align_offset1 __packed
+#define __ec_align_offset2 __packed
+#define __ec_todo_packed __packed
+#define __ec_todo_unpacked
+
+
+/* LPC command status byte masks */
+/* EC has written a byte in the data register and host hasn't read it yet */
+#define EC_LPC_STATUS_TO_HOST     0x01
+/* Host has written a command/data byte and the EC hasn't read it yet */
+#define EC_LPC_STATUS_FROM_HOST   0x02
+/* EC is processing a command */
+#define EC_LPC_STATUS_PROCESSING  0x04
+/* Last write to EC was a command, not data */
+#define EC_LPC_STATUS_LAST_CMD    0x08
+/* EC is in burst mode */
+#define EC_LPC_STATUS_BURST_MODE  0x10
+/* SCI event is pending (requesting SCI query) */
+#define EC_LPC_STATUS_SCI_PENDING 0x20
+/* SMI event is pending (requesting SMI query) */
+#define EC_LPC_STATUS_SMI_PENDING 0x40
+/* (reserved) */
+#define EC_LPC_STATUS_RESERVED    0x80
+
+/*
+ * EC is busy.  This covers both the EC processing a command, and the host has
+ * written a new command but the EC hasn't picked it up yet.
+ */
+#define EC_LPC_STATUS_BUSY_MASK \
+	(EC_LPC_STATUS_FROM_HOST | EC_LPC_STATUS_PROCESSING)
+
+/*
+ * Host command response codes (16-bit).  Note that response codes should be
+ * stored in a uint16_t rather than directly in a value of this type.
+ */
+enum ec_status {
+	EC_RES_SUCCESS = 0,
+	EC_RES_INVALID_COMMAND = 1,
+	EC_RES_ERROR = 2,
+	EC_RES_INVALID_PARAM = 3,
+	EC_RES_ACCESS_DENIED = 4,
+	EC_RES_INVALID_RESPONSE = 5,
+	EC_RES_INVALID_VERSION = 6,
+	EC_RES_INVALID_CHECKSUM = 7,
+	EC_RES_IN_PROGRESS = 8,		/* Accepted, command in progress */
+	EC_RES_UNAVAILABLE = 9,		/* No response available */
+	EC_RES_TIMEOUT = 10,		/* We got a timeout */
+	EC_RES_OVERFLOW = 11,		/* Table / data overflow */
+	EC_RES_INVALID_HEADER = 12,     /* Header contains invalid data */
+	EC_RES_REQUEST_TRUNCATED = 13,  /* Didn't get the entire request */
+	EC_RES_RESPONSE_TOO_BIG = 14,   /* Response was too big to handle */
+	EC_RES_BUS_ERROR = 15,		/* Communications bus error */
+	EC_RES_BUSY = 16,		/* Up but too busy.  Should retry */
+	EC_RES_INVALID_HEADER_VERSION = 17,  /* Header version invalid */
+	EC_RES_INVALID_HEADER_CRC = 18,      /* Header CRC invalid */
+	EC_RES_INVALID_DATA_CRC = 19,        /* Data CRC invalid */
+	EC_RES_DUP_UNAVAILABLE = 20,         /* Can't resend response */
+};
+
+/*
+ * Host event codes.  Note these are 1-based, not 0-based, because ACPI query
+ * EC command uses code 0 to mean "no event pending".  We explicitly specify
+ * each value in the enum listing so they won't change if we delete/insert an
+ * item or rearrange the list (it needs to be stable across platforms, not
+ * just within a single compiled instance).
+ */
+enum host_event_code {
+	EC_HOST_EVENT_LID_CLOSED = 1,
+	EC_HOST_EVENT_LID_OPEN = 2,
+	EC_HOST_EVENT_POWER_BUTTON = 3,
+	EC_HOST_EVENT_AC_CONNECTED = 4,
+	EC_HOST_EVENT_AC_DISCONNECTED = 5,
+	EC_HOST_EVENT_BATTERY_LOW = 6,
+	EC_HOST_EVENT_BATTERY_CRITICAL = 7,
+	EC_HOST_EVENT_BATTERY = 8,
+	EC_HOST_EVENT_THERMAL_THRESHOLD = 9,
+	/* Event generated by a device attached to the EC */
+	EC_HOST_EVENT_DEVICE = 10,
+	EC_HOST_EVENT_THERMAL = 11,
+	EC_HOST_EVENT_USB_CHARGER = 12,
+	EC_HOST_EVENT_KEY_PRESSED = 13,
+	/*
+	 * EC has finished initializing the host interface.  The host can check
+	 * for this event following sending a EC_CMD_REBOOT_EC command to
+	 * determine when the EC is ready to accept subsequent commands.
+	 */
+	EC_HOST_EVENT_INTERFACE_READY = 14,
+	/* Keyboard recovery combo has been pressed */
+	EC_HOST_EVENT_KEYBOARD_RECOVERY = 15,
+
+	/* Shutdown due to thermal overload */
+	EC_HOST_EVENT_THERMAL_SHUTDOWN = 16,
+	/* Shutdown due to battery level too low */
+	EC_HOST_EVENT_BATTERY_SHUTDOWN = 17,
+
+	/* Suggest that the AP throttle itself */
+	EC_HOST_EVENT_THROTTLE_START = 18,
+	/* Suggest that the AP resume normal speed */
+	EC_HOST_EVENT_THROTTLE_STOP = 19,
+
+	/* Hang detect logic detected a hang and host event timeout expired */
+	EC_HOST_EVENT_HANG_DETECT = 20,
+	/* Hang detect logic detected a hang and warm rebooted the AP */
+	EC_HOST_EVENT_HANG_REBOOT = 21,
+
+	/* PD MCU triggering host event */
+	EC_HOST_EVENT_PD_MCU = 22,
+
+	/* Battery Status flags have changed */
+	EC_HOST_EVENT_BATTERY_STATUS = 23,
+
+	/* EC encountered a panic, triggering a reset */
+	EC_HOST_EVENT_PANIC = 24,
+
+	/* Keyboard fastboot combo has been pressed */
+	EC_HOST_EVENT_KEYBOARD_FASTBOOT = 25,
+
+	/* EC RTC event occurred */
+	EC_HOST_EVENT_RTC = 26,
+
+	/* Emulate MKBP event */
+	EC_HOST_EVENT_MKBP = 27,
+
+	/* EC desires to change state of host-controlled USB mux */
+	EC_HOST_EVENT_USB_MUX = 28,
+
+	/* TABLET/LAPTOP mode or detachable base attach/detach event */
+	EC_HOST_EVENT_MODE_CHANGE = 29,
+
+	/* Keyboard recovery combo with hardware reinitialization */
+	EC_HOST_EVENT_KEYBOARD_RECOVERY_HW_REINIT = 30,
+
+	/*
+	 * The high bit of the event mask is not used as a host event code.  If
+	 * it reads back as set, then the entire event mask should be
+	 * considered invalid by the host.  This can happen when reading the
+	 * raw event status via EC_MEMMAP_HOST_EVENTS but the LPC interface is
+	 * not initialized on the EC, or improperly configured on the host.
+	 */
+	EC_HOST_EVENT_INVALID = 32
+};
+/* Host event mask */
+#define EC_HOST_EVENT_MASK(event_code) BIT_ULL((event_code) - 1)
+
+/**
+ * struct ec_lpc_host_args - Arguments at EC_LPC_ADDR_HOST_ARGS
+ * @flags: The host argument flags.
+ * @command_version: Command version.
+ * @data_size: The length of data.
+ * @checksum: Checksum; sum of command + flags + command_version + data_size +
+ *            all params/response data bytes.
+ */
+struct ec_lpc_host_args {
+	uint8_t flags;
+	uint8_t command_version;
+	uint8_t data_size;
+	uint8_t checksum;
+} __ec_align4;
+
+/* Flags for ec_lpc_host_args.flags */
+/*
+ * Args are from host.  Data area at EC_LPC_ADDR_HOST_PARAM contains command
+ * params.
+ *
+ * If EC gets a command and this flag is not set, this is an old-style command.
+ * Command version is 0 and params from host are at EC_LPC_ADDR_OLD_PARAM with
+ * unknown length.  EC must respond with an old-style response (that is,
+ * without setting EC_HOST_ARGS_FLAG_TO_HOST).
+ */
+#define EC_HOST_ARGS_FLAG_FROM_HOST 0x01
+/*
+ * Args are from EC.  Data area at EC_LPC_ADDR_HOST_PARAM contains response.
+ *
+ * If EC responds to a command and this flag is not set, this is an old-style
+ * response.  Command version is 0 and response data from EC is at
+ * EC_LPC_ADDR_OLD_PARAM with unknown length.
+ */
+#define EC_HOST_ARGS_FLAG_TO_HOST   0x02
+
+/*****************************************************************************/
+/*
+ * Byte codes returned by EC over SPI interface.
+ *
+ * These can be used by the AP to debug the EC interface, and to determine
+ * when the EC is not in a state where it will ever get around to responding
+ * to the AP.
+ *
+ * Example of sequence of bytes read from EC for a current good transfer:
+ *   1. -                  - AP asserts chip select (CS#)
+ *   2. EC_SPI_OLD_READY   - AP sends first byte(s) of request
+ *   3. -                  - EC starts handling CS# interrupt
+ *   4. EC_SPI_RECEIVING   - AP sends remaining byte(s) of request
+ *   5. EC_SPI_PROCESSING  - EC starts processing request; AP is clocking in
+ *                           bytes looking for EC_SPI_FRAME_START
+ *   6. -                  - EC finishes processing and sets up response
+ *   7. EC_SPI_FRAME_START - AP reads frame byte
+ *   8. (response packet)  - AP reads response packet
+ *   9. EC_SPI_PAST_END    - Any additional bytes read by AP
+ *   10 -                  - AP deasserts chip select
+ *   11 -                  - EC processes CS# interrupt and sets up DMA for
+ *                           next request
+ *
+ * If the AP is waiting for EC_SPI_FRAME_START and sees any value other than
+ * the following byte values:
+ *   EC_SPI_OLD_READY
+ *   EC_SPI_RX_READY
+ *   EC_SPI_RECEIVING
+ *   EC_SPI_PROCESSING
+ *
+ * Then the EC found an error in the request, or was not ready for the request
+ * and lost data.  The AP should give up waiting for EC_SPI_FRAME_START,
+ * because the EC is unable to tell when the AP is done sending its request.
+ */
+
+/*
+ * Framing byte which precedes a response packet from the EC.  After sending a
+ * request, the AP will clock in bytes until it sees the framing byte, then
+ * clock in the response packet.
+ */
+#define EC_SPI_FRAME_START    0xec
+
+/*
+ * Padding bytes which are clocked out after the end of a response packet.
+ */
+#define EC_SPI_PAST_END       0xed
+
+/*
+ * EC is ready to receive, and has ignored the byte sent by the AP.  EC expects
+ * that the AP will send a valid packet header (starting with
+ * EC_COMMAND_PROTOCOL_3) in the next 32 bytes.
+ */
+#define EC_SPI_RX_READY       0xf8
+
+/*
+ * EC has started receiving the request from the AP, but hasn't started
+ * processing it yet.
+ */
+#define EC_SPI_RECEIVING      0xf9
+
+/* EC has received the entire request from the AP and is processing it. */
+#define EC_SPI_PROCESSING     0xfa
+
+/*
+ * EC received bad data from the AP, such as a packet header with an invalid
+ * length.  EC will ignore all data until chip select deasserts.
+ */
+#define EC_SPI_RX_BAD_DATA    0xfb
+
+/*
+ * EC received data from the AP before it was ready.  That is, the AP asserted
+ * chip select and started clocking data before the EC was ready to receive it.
+ * EC will ignore all data until chip select deasserts.
+ */
+#define EC_SPI_NOT_READY      0xfc
+
+/*
+ * EC was ready to receive a request from the AP.  EC has treated the byte sent
+ * by the AP as part of a request packet, or (for old-style ECs) is processing
+ * a fully received packet but is not ready to respond yet.
+ */
+#define EC_SPI_OLD_READY      0xfd
+
+/*****************************************************************************/
+
+/*
+ * Protocol version 2 for I2C and SPI send a request this way:
+ *
+ *	0	EC_CMD_VERSION0 + (command version)
+ *	1	Command number
+ *	2	Length of params = N
+ *	3..N+2	Params, if any
+ *	N+3	8-bit checksum of bytes 0..N+2
+ *
+ * The corresponding response is:
+ *
+ *	0	Result code (EC_RES_*)
+ *	1	Length of params = M
+ *	2..M+1	Params, if any
+ *	M+2	8-bit checksum of bytes 0..M+1
+ */
+#define EC_PROTO2_REQUEST_HEADER_BYTES 3
+#define EC_PROTO2_REQUEST_TRAILER_BYTES 1
+#define EC_PROTO2_REQUEST_OVERHEAD (EC_PROTO2_REQUEST_HEADER_BYTES +	\
+				    EC_PROTO2_REQUEST_TRAILER_BYTES)
+
+#define EC_PROTO2_RESPONSE_HEADER_BYTES 2
+#define EC_PROTO2_RESPONSE_TRAILER_BYTES 1
+#define EC_PROTO2_RESPONSE_OVERHEAD (EC_PROTO2_RESPONSE_HEADER_BYTES +	\
+				     EC_PROTO2_RESPONSE_TRAILER_BYTES)
+
+/* Parameter length was limited by the LPC interface */
+#define EC_PROTO2_MAX_PARAM_SIZE 0xfc
+
+/* Maximum request and response packet sizes for protocol version 2 */
+#define EC_PROTO2_MAX_REQUEST_SIZE (EC_PROTO2_REQUEST_OVERHEAD +	\
+				    EC_PROTO2_MAX_PARAM_SIZE)
+#define EC_PROTO2_MAX_RESPONSE_SIZE (EC_PROTO2_RESPONSE_OVERHEAD +	\
+				     EC_PROTO2_MAX_PARAM_SIZE)
+
+/*****************************************************************************/
+
+/*
+ * Value written to legacy command port / prefix byte to indicate protocol
+ * 3+ structs are being used.  Usage is bus-dependent.
+ */
+#define EC_COMMAND_PROTOCOL_3 0xda
+
+#define EC_HOST_REQUEST_VERSION 3
+
+/**
+ * struct ec_host_request - Version 3 request from host.
+ * @struct_version: Should be 3. The EC will return EC_RES_INVALID_HEADER if it
+ *                  receives a header with a version it doesn't know how to
+ *                  parse.
+ * @checksum: Checksum of request and data; sum of all bytes including checksum
+ *            should total to 0.
+ * @command: Command to send (EC_CMD_...)
+ * @command_version: Command version.
+ * @reserved: Unused byte in current protocol version; set to 0.
+ * @data_len: Length of data which follows this header.
+ */
+struct ec_host_request {
+	uint8_t struct_version;
+	uint8_t checksum;
+	uint16_t command;
+	uint8_t command_version;
+	uint8_t reserved;
+	uint16_t data_len;
+} __ec_align4;
+
+#define EC_HOST_RESPONSE_VERSION 3
+
+/**
+ * struct ec_host_response - Version 3 response from EC.
+ * @struct_version: Struct version (=3).
+ * @checksum: Checksum of response and data; sum of all bytes including
+ *            checksum should total to 0.
+ * @result: EC's response to the command (separate from communication failure)
+ * @data_len: Length of data which follows this header.
+ * @reserved: Unused bytes in current protocol version; set to 0.
+ */
+struct ec_host_response {
+	uint8_t struct_version;
+	uint8_t checksum;
+	uint16_t result;
+	uint16_t data_len;
+	uint16_t reserved;
+} __ec_align4;
+
+/*****************************************************************************/
+
+/*
+ * Host command protocol V4.
+ *
+ * Packets always start with a request or response header.  They are followed
+ * by data_len bytes of data.  If the data_crc_present flag is set, the data
+ * bytes are followed by a CRC-8 of that data, using using x^8 + x^2 + x + 1
+ * polynomial.
+ *
+ * Host algorithm when sending a request q:
+ *
+ * 101) tries_left=(some value, e.g. 3);
+ * 102) q.seq_num++
+ * 103) q.seq_dup=0
+ * 104) Calculate q.header_crc.
+ * 105) Send request q to EC.
+ * 106) Wait for response r.  Go to 201 if received or 301 if timeout.
+ *
+ * 201) If r.struct_version != 4, go to 301.
+ * 202) If r.header_crc mismatches calculated CRC for r header, go to 301.
+ * 203) If r.data_crc_present and r.data_crc mismatches, go to 301.
+ * 204) If r.seq_num != q.seq_num, go to 301.
+ * 205) If r.seq_dup == q.seq_dup, return success.
+ * 207) If r.seq_dup == 1, go to 301.
+ * 208) Return error.
+ *
+ * 301) If --tries_left <= 0, return error.
+ * 302) If q.seq_dup == 1, go to 105.
+ * 303) q.seq_dup = 1
+ * 304) Go to 104.
+ *
+ * EC algorithm when receiving a request q.
+ * EC has response buffer r, error buffer e.
+ *
+ * 101) If q.struct_version != 4, set e.result = EC_RES_INVALID_HEADER_VERSION
+ *      and go to 301
+ * 102) If q.header_crc mismatches calculated CRC, set e.result =
+ *      EC_RES_INVALID_HEADER_CRC and go to 301
+ * 103) If q.data_crc_present, calculate data CRC.  If that mismatches the CRC
+ *      byte at the end of the packet, set e.result = EC_RES_INVALID_DATA_CRC
+ *      and go to 301.
+ * 104) If q.seq_dup == 0, go to 201.
+ * 105) If q.seq_num != r.seq_num, go to 201.
+ * 106) If q.seq_dup == r.seq_dup, go to 205, else go to 203.
+ *
+ * 201) Process request q into response r.
+ * 202) r.seq_num = q.seq_num
+ * 203) r.seq_dup = q.seq_dup
+ * 204) Calculate r.header_crc
+ * 205) If r.data_len > 0 and data is no longer available, set e.result =
+ *      EC_RES_DUP_UNAVAILABLE and go to 301.
+ * 206) Send response r.
+ *
+ * 301) e.seq_num = q.seq_num
+ * 302) e.seq_dup = q.seq_dup
+ * 303) Calculate e.header_crc.
+ * 304) Send error response e.
+ */
+
+/* Version 4 request from host */
+struct ec_host_request4 {
+	/*
+	 * bits 0-3: struct_version: Structure version (=4)
+	 * bit    4: is_response: Is response (=0)
+	 * bits 5-6: seq_num: Sequence number
+	 * bit    7: seq_dup: Sequence duplicate flag
+	 */
+	uint8_t fields0;
+
+	/*
+	 * bits 0-4: command_version: Command version
+	 * bits 5-6: Reserved (set 0, ignore on read)
+	 * bit    7: data_crc_present: Is data CRC present after data
+	 */
+	uint8_t fields1;
+
+	/* Command code (EC_CMD_*) */
+	uint16_t command;
+
+	/* Length of data which follows this header (not including data CRC) */
+	uint16_t data_len;
+
+	/* Reserved (set 0, ignore on read) */
+	uint8_t reserved;
+
+	/* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */
+	uint8_t header_crc;
+} __ec_align4;
+
+/* Version 4 response from EC */
+struct ec_host_response4 {
+	/*
+	 * bits 0-3: struct_version: Structure version (=4)
+	 * bit    4: is_response: Is response (=1)
+	 * bits 5-6: seq_num: Sequence number
+	 * bit    7: seq_dup: Sequence duplicate flag
+	 */
+	uint8_t fields0;
+
+	/*
+	 * bits 0-6: Reserved (set 0, ignore on read)
+	 * bit    7: data_crc_present: Is data CRC present after data
+	 */
+	uint8_t fields1;
+
+	/* Result code (EC_RES_*) */
+	uint16_t result;
+
+	/* Length of data which follows this header (not including data CRC) */
+	uint16_t data_len;
+
+	/* Reserved (set 0, ignore on read) */
+	uint8_t reserved;
+
+	/* CRC-8 of above fields, using x^8 + x^2 + x + 1 polynomial */
+	uint8_t header_crc;
+} __ec_align4;
+
+/* Fields in fields0 byte */
+#define EC_PACKET4_0_STRUCT_VERSION_MASK	0x0f
+#define EC_PACKET4_0_IS_RESPONSE_MASK		0x10
+#define EC_PACKET4_0_SEQ_NUM_SHIFT		5
+#define EC_PACKET4_0_SEQ_NUM_MASK		0x60
+#define EC_PACKET4_0_SEQ_DUP_MASK		0x80
+
+/* Fields in fields1 byte */
+#define EC_PACKET4_1_COMMAND_VERSION_MASK	0x1f  /* (request only) */
+#define EC_PACKET4_1_DATA_CRC_PRESENT_MASK	0x80
+
+/*****************************************************************************/
+/*
+ * Notes on commands:
+ *
+ * Each command is an 16-bit command value.  Commands which take params or
+ * return response data specify structures for that data.  If no structure is
+ * specified, the command does not input or output data, respectively.
+ * Parameter/response length is implicit in the structs.  Some underlying
+ * communication protocols (I2C, SPI) may add length or checksum headers, but
+ * those are implementation-dependent and not defined here.
+ *
+ * All commands MUST be #defined to be 4-digit UPPER CASE hex values
+ * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work.
+ */
+
+/*****************************************************************************/
+/* General / test commands */
+
+/*
+ * Get protocol version, used to deal with non-backward compatible protocol
+ * changes.
+ */
+#define EC_CMD_PROTO_VERSION 0x0000
+
+/**
+ * struct ec_response_proto_version - Response to the proto version command.
+ * @version: The protocol version.
+ */
+struct ec_response_proto_version {
+	uint32_t version;
+} __ec_align4;
+
+/*
+ * Hello.  This is a simple command to test the EC is responsive to
+ * commands.
+ */
+#define EC_CMD_HELLO 0x0001
+
+/**
+ * struct ec_params_hello - Parameters to the hello command.
+ * @in_data: Pass anything here.
+ */
+struct ec_params_hello {
+	uint32_t in_data;
+} __ec_align4;
+
+/**
+ * struct ec_response_hello - Response to the hello command.
+ * @out_data: Output will be in_data + 0x01020304.
+ */
+struct ec_response_hello {
+	uint32_t out_data;
+} __ec_align4;
+
+/* Get version number */
+#define EC_CMD_GET_VERSION 0x0002
+
+enum ec_current_image {
+	EC_IMAGE_UNKNOWN = 0,
+	EC_IMAGE_RO,
+	EC_IMAGE_RW
+};
+
+/**
+ * struct ec_response_get_version - Response to the get version command.
+ * @version_string_ro: Null-terminated RO firmware version string.
+ * @version_string_rw: Null-terminated RW firmware version string.
+ * @reserved: Unused bytes; was previously RW-B firmware version string.
+ * @current_image: One of ec_current_image.
+ */
+struct ec_response_get_version {
+	char version_string_ro[32];
+	char version_string_rw[32];
+	char reserved[32];
+	uint32_t current_image;
+} __ec_align4;
+
+/* Read test */
+#define EC_CMD_READ_TEST 0x0003
+
+/**
+ * struct ec_params_read_test - Parameters for the read test command.
+ * @offset: Starting value for read buffer.
+ * @size: Size to read in bytes.
+ */
+struct ec_params_read_test {
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/**
+ * struct ec_response_read_test - Response to the read test command.
+ * @data: Data returned by the read test command.
+ */
+struct ec_response_read_test {
+	uint32_t data[32];
+} __ec_align4;
+
+/*
+ * Get build information
+ *
+ * Response is null-terminated string.
+ */
+#define EC_CMD_GET_BUILD_INFO 0x0004
+
+/* Get chip info */
+#define EC_CMD_GET_CHIP_INFO 0x0005
+
+/**
+ * struct ec_response_get_chip_info - Response to the get chip info command.
+ * @vendor: Null-terminated string for chip vendor.
+ * @name: Null-terminated string for chip name.
+ * @revision: Null-terminated string for chip mask version.
+ */
+struct ec_response_get_chip_info {
+	char vendor[32];
+	char name[32];
+	char revision[32];
+} __ec_align4;
+
+/* Get board HW version */
+#define EC_CMD_GET_BOARD_VERSION 0x0006
+
+/**
+ * struct ec_response_board_version - Response to the board version command.
+ * @board_version: A monotonously incrementing number.
+ */
+struct ec_response_board_version {
+	uint16_t board_version;
+} __ec_align2;
+
+/*
+ * Read memory-mapped data.
+ *
+ * This is an alternate interface to memory-mapped data for bus protocols
+ * which don't support direct-mapped memory - I2C, SPI, etc.
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_READ_MEMMAP 0x0007
+
+/**
+ * struct ec_params_read_memmap - Parameters for the read memory map command.
+ * @offset: Offset in memmap (EC_MEMMAP_*).
+ * @size: Size to read in bytes.
+ */
+struct ec_params_read_memmap {
+	uint8_t offset;
+	uint8_t size;
+} __ec_align1;
+
+/* Read versions supported for a command */
+#define EC_CMD_GET_CMD_VERSIONS 0x0008
+
+/**
+ * struct ec_params_get_cmd_versions - Parameters for the get command versions.
+ * @cmd: Command to check.
+ */
+struct ec_params_get_cmd_versions {
+	uint8_t cmd;
+} __ec_align1;
+
+/**
+ * struct ec_params_get_cmd_versions_v1 - Parameters for the get command
+ *         versions (v1)
+ * @cmd: Command to check.
+ */
+struct ec_params_get_cmd_versions_v1 {
+	uint16_t cmd;
+} __ec_align2;
+
+/**
+ * struct ec_response_get_cmd_version - Response to the get command versions.
+ * @version_mask: Mask of supported versions; use EC_VER_MASK() to compare with
+ *                a desired version.
+ */
+struct ec_response_get_cmd_versions {
+	uint32_t version_mask;
+} __ec_align4;
+
+/*
+ * Check EC communications status (busy). This is needed on i2c/spi but not
+ * on lpc since it has its own out-of-band busy indicator.
+ *
+ * lpc must read the status from the command register. Attempting this on
+ * lpc will overwrite the args/parameter space and corrupt its data.
+ */
+#define EC_CMD_GET_COMMS_STATUS		0x0009
+
+/* Avoid using ec_status which is for return values */
+enum ec_comms_status {
+	EC_COMMS_STATUS_PROCESSING	= BIT(0),	/* Processing cmd */
+};
+
+/**
+ * struct ec_response_get_comms_status - Response to the get comms status
+ *         command.
+ * @flags: Mask of enum ec_comms_status.
+ */
+struct ec_response_get_comms_status {
+	uint32_t flags;		/* Mask of enum ec_comms_status */
+} __ec_align4;
+
+/* Fake a variety of responses, purely for testing purposes. */
+#define EC_CMD_TEST_PROTOCOL		0x000A
+
+/* Tell the EC what to send back to us. */
+struct ec_params_test_protocol {
+	uint32_t ec_result;
+	uint32_t ret_len;
+	uint8_t buf[32];
+} __ec_align4;
+
+/* Here it comes... */
+struct ec_response_test_protocol {
+	uint8_t buf[32];
+} __ec_align4;
+
+/* Get protocol information */
+#define EC_CMD_GET_PROTOCOL_INFO	0x000B
+
+/* Flags for ec_response_get_protocol_info.flags */
+/* EC_RES_IN_PROGRESS may be returned if a command is slow */
+#define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED BIT(0)
+
+/**
+ * struct ec_response_get_protocol_info - Response to the get protocol info.
+ * @protocol_versions: Bitmask of protocol versions supported (1 << n means
+ *                     version n).
+ * @max_request_packet_size: Maximum request packet size in bytes.
+ * @max_response_packet_size: Maximum response packet size in bytes.
+ * @flags: see EC_PROTOCOL_INFO_*
+ */
+struct ec_response_get_protocol_info {
+	/* Fields which exist if at least protocol version 3 supported */
+	uint32_t protocol_versions;
+	uint16_t max_request_packet_size;
+	uint16_t max_response_packet_size;
+	uint32_t flags;
+} __ec_align4;
+
+
+/*****************************************************************************/
+/* Get/Set miscellaneous values */
+
+/* The upper byte of .flags tells what to do (nothing means "get") */
+#define EC_GSV_SET        0x80000000
+
+/*
+ * The lower three bytes of .flags identifies the parameter, if that has
+ * meaning for an individual command.
+ */
+#define EC_GSV_PARAM_MASK 0x00ffffff
+
+struct ec_params_get_set_value {
+	uint32_t flags;
+	uint32_t value;
+} __ec_align4;
+
+struct ec_response_get_set_value {
+	uint32_t flags;
+	uint32_t value;
+} __ec_align4;
+
+/* More than one command can use these structs to get/set parameters. */
+#define EC_CMD_GSV_PAUSE_IN_S5	0x000C
+
+/*****************************************************************************/
+/* List the features supported by the firmware */
+#define EC_CMD_GET_FEATURES  0x000D
+
+/* Supported features */
+enum ec_feature_code {
+	/*
+	 * This image contains a limited set of features. Another image
+	 * in RW partition may support more features.
+	 */
+	EC_FEATURE_LIMITED = 0,
+	/*
+	 * Commands for probing/reading/writing/erasing the flash in the
+	 * EC are present.
+	 */
+	EC_FEATURE_FLASH = 1,
+	/*
+	 * Can control the fan speed directly.
+	 */
+	EC_FEATURE_PWM_FAN = 2,
+	/*
+	 * Can control the intensity of the keyboard backlight.
+	 */
+	EC_FEATURE_PWM_KEYB = 3,
+	/*
+	 * Support Google lightbar, introduced on Pixel.
+	 */
+	EC_FEATURE_LIGHTBAR = 4,
+	/* Control of LEDs  */
+	EC_FEATURE_LED = 5,
+	/* Exposes an interface to control gyro and sensors.
+	 * The host goes through the EC to access these sensors.
+	 * In addition, the EC may provide composite sensors, like lid angle.
+	 */
+	EC_FEATURE_MOTION_SENSE = 6,
+	/* The keyboard is controlled by the EC */
+	EC_FEATURE_KEYB = 7,
+	/* The AP can use part of the EC flash as persistent storage. */
+	EC_FEATURE_PSTORE = 8,
+	/* The EC monitors BIOS port 80h, and can return POST codes. */
+	EC_FEATURE_PORT80 = 9,
+	/*
+	 * Thermal management: include TMP specific commands.
+	 * Higher level than direct fan control.
+	 */
+	EC_FEATURE_THERMAL = 10,
+	/* Can switch the screen backlight on/off */
+	EC_FEATURE_BKLIGHT_SWITCH = 11,
+	/* Can switch the wifi module on/off */
+	EC_FEATURE_WIFI_SWITCH = 12,
+	/* Monitor host events, through for example SMI or SCI */
+	EC_FEATURE_HOST_EVENTS = 13,
+	/* The EC exposes GPIO commands to control/monitor connected devices. */
+	EC_FEATURE_GPIO = 14,
+	/* The EC can send i2c messages to downstream devices. */
+	EC_FEATURE_I2C = 15,
+	/* Command to control charger are included */
+	EC_FEATURE_CHARGER = 16,
+	/* Simple battery support. */
+	EC_FEATURE_BATTERY = 17,
+	/*
+	 * Support Smart battery protocol
+	 * (Common Smart Battery System Interface Specification)
+	 */
+	EC_FEATURE_SMART_BATTERY = 18,
+	/* EC can detect when the host hangs. */
+	EC_FEATURE_HANG_DETECT = 19,
+	/* Report power information, for pit only */
+	EC_FEATURE_PMU = 20,
+	/* Another Cros EC device is present downstream of this one */
+	EC_FEATURE_SUB_MCU = 21,
+	/* Support USB Power delivery (PD) commands */
+	EC_FEATURE_USB_PD = 22,
+	/* Control USB multiplexer, for audio through USB port for instance. */
+	EC_FEATURE_USB_MUX = 23,
+	/* Motion Sensor code has an internal software FIFO */
+	EC_FEATURE_MOTION_SENSE_FIFO = 24,
+	/* Support temporary secure vstore */
+	EC_FEATURE_VSTORE = 25,
+	/* EC decides on USB-C SS mux state, muxes configured by host */
+	EC_FEATURE_USBC_SS_MUX_VIRTUAL = 26,
+	/* EC has RTC feature that can be controlled by host commands */
+	EC_FEATURE_RTC = 27,
+	/* The MCU exposes a Fingerprint sensor */
+	EC_FEATURE_FINGERPRINT = 28,
+	/* The MCU exposes a Touchpad */
+	EC_FEATURE_TOUCHPAD = 29,
+	/* The MCU has RWSIG task enabled */
+	EC_FEATURE_RWSIG = 30,
+	/* EC has device events support */
+	EC_FEATURE_DEVICE_EVENT = 31,
+	/* EC supports the unified wake masks for LPC/eSPI systems */
+	EC_FEATURE_UNIFIED_WAKE_MASKS = 32,
+	/* EC supports 64-bit host events */
+	EC_FEATURE_HOST_EVENT64 = 33,
+	/* EC runs code in RAM (not in place, a.k.a. XIP) */
+	EC_FEATURE_EXEC_IN_RAM = 34,
+	/* EC supports CEC commands */
+	EC_FEATURE_CEC = 35,
+	/* EC supports tight sensor timestamping. */
+	EC_FEATURE_MOTION_SENSE_TIGHT_TIMESTAMPS = 36,
+	/*
+	 * EC supports tablet mode detection aligned to Chrome and allows
+	 * setting of threshold by host command using
+	 * MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE.
+	 */
+	EC_FEATURE_REFINED_TABLET_MODE_HYSTERESIS = 37,
+	/* EC supports audio codec. */
+	EC_FEATURE_AUDIO_CODEC = 38,
+	/* The MCU is a System Companion Processor (SCP). */
+	EC_FEATURE_SCP = 39,
+	/* The MCU is an Integrated Sensor Hub */
+	EC_FEATURE_ISH = 40,
+};
+
+#define EC_FEATURE_MASK_0(event_code) BIT(event_code % 32)
+#define EC_FEATURE_MASK_1(event_code) BIT(event_code - 32)
+
+struct ec_response_get_features {
+	uint32_t flags[2];
+} __ec_align4;
+
+/*****************************************************************************/
+/* Get the board's SKU ID from EC */
+#define EC_CMD_GET_SKU_ID 0x000E
+
+/* Set SKU ID from AP */
+#define EC_CMD_SET_SKU_ID 0x000F
+
+struct ec_sku_id_info {
+	uint32_t sku_id;
+} __ec_align4;
+
+/*****************************************************************************/
+/* Flash commands */
+
+/* Get flash info */
+#define EC_CMD_FLASH_INFO 0x0010
+#define EC_VER_FLASH_INFO 2
+
+/**
+ * struct ec_response_flash_info - Response to the flash info command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ *
+ * Version 0 returns these fields.
+ */
+struct ec_response_flash_info {
+	uint32_t flash_size;
+	uint32_t write_block_size;
+	uint32_t erase_block_size;
+	uint32_t protect_block_size;
+} __ec_align4;
+
+/*
+ * Flags for version 1+ flash info command
+ * EC flash erases bits to 0 instead of 1.
+ */
+#define EC_FLASH_INFO_ERASE_TO_0 BIT(0)
+
+/*
+ * Flash must be selected for read/write/erase operations to succeed.  This may
+ * be necessary on a chip where write/erase can be corrupted by other board
+ * activity, or where the chip needs to enable some sort of programming voltage,
+ * or where the read/write/erase operations require cleanly suspending other
+ * chip functionality.
+ */
+#define EC_FLASH_INFO_SELECT_REQUIRED BIT(1)
+
+/**
+ * struct ec_response_flash_info_1 - Response to the flash info v1 command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ * @write_ideal_size: Ideal write size in bytes.  Writes will be fastest if
+ *                    size is exactly this and offset is a multiple of this.
+ *                    For example, an EC may have a write buffer which can do
+ *                    half-page operations if data is aligned, and a slower
+ *                    word-at-a-time write mode.
+ * @flags: Flags; see EC_FLASH_INFO_*
+ *
+ * Version 1 returns the same initial fields as version 0, with additional
+ * fields following.
+ *
+ * gcc anonymous structs don't seem to get along with the __packed directive;
+ * if they did we'd define the version 0 structure as a sub-structure of this
+ * one.
+ *
+ * Version 2 supports flash banks of different sizes:
+ * The caller specified the number of banks it has preallocated
+ * (num_banks_desc)
+ * The EC returns the number of banks describing the flash memory.
+ * It adds banks descriptions up to num_banks_desc.
+ */
+struct ec_response_flash_info_1 {
+	/* Version 0 fields; see above for description */
+	uint32_t flash_size;
+	uint32_t write_block_size;
+	uint32_t erase_block_size;
+	uint32_t protect_block_size;
+
+	/* Version 1 adds these fields: */
+	uint32_t write_ideal_size;
+	uint32_t flags;
+} __ec_align4;
+
+struct ec_params_flash_info_2 {
+	/* Number of banks to describe */
+	uint16_t num_banks_desc;
+	/* Reserved; set 0; ignore on read */
+	uint8_t reserved[2];
+} __ec_align4;
+
+struct ec_flash_bank {
+	/* Number of sector is in this bank. */
+	uint16_t count;
+	/* Size in power of 2 of each sector (8 --> 256 bytes) */
+	uint8_t size_exp;
+	/* Minimal write size for the sectors in this bank */
+	uint8_t write_size_exp;
+	/* Erase size for the sectors in this bank */
+	uint8_t erase_size_exp;
+	/* Size for write protection, usually identical to erase size. */
+	uint8_t protect_size_exp;
+	/* Reserved; set 0; ignore on read */
+	uint8_t reserved[2];
+};
+
+struct ec_response_flash_info_2 {
+	/* Total flash in the EC. */
+	uint32_t flash_size;
+	/* Flags; see EC_FLASH_INFO_* */
+	uint32_t flags;
+	/* Maximum size to use to send data to write to the EC. */
+	uint32_t write_ideal_size;
+	/* Number of banks present in the EC. */
+	uint16_t num_banks_total;
+	/* Number of banks described in banks array. */
+	uint16_t num_banks_desc;
+	struct ec_flash_bank banks[0];
+} __ec_align4;
+
+/*
+ * Read flash
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_FLASH_READ 0x0011
+
+/**
+ * struct ec_params_flash_read - Parameters for the flash read command.
+ * @offset: Byte offset to read.
+ * @size: Size to read in bytes.
+ */
+struct ec_params_flash_read {
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/* Write flash */
+#define EC_CMD_FLASH_WRITE 0x0012
+#define EC_VER_FLASH_WRITE 1
+
+/* Version 0 of the flash command supported only 64 bytes of data */
+#define EC_FLASH_WRITE_VER0_SIZE 64
+
+/**
+ * struct ec_params_flash_write - Parameters for the flash write command.
+ * @offset: Byte offset to write.
+ * @size: Size to write in bytes.
+ */
+struct ec_params_flash_write {
+	uint32_t offset;
+	uint32_t size;
+	/* Followed by data to write */
+} __ec_align4;
+
+/* Erase flash */
+#define EC_CMD_FLASH_ERASE 0x0013
+
+/**
+ * struct ec_params_flash_erase - Parameters for the flash erase command, v0.
+ * @offset: Byte offset to erase.
+ * @size: Size to erase in bytes.
+ */
+struct ec_params_flash_erase {
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/*
+ * v1 add async erase:
+ * subcommands can returns:
+ * EC_RES_SUCCESS : erased (see ERASE_SECTOR_ASYNC case below).
+ * EC_RES_INVALID_PARAM : offset/size are not aligned on a erase boundary.
+ * EC_RES_ERROR : other errors.
+ * EC_RES_BUSY : an existing erase operation is in progress.
+ * EC_RES_ACCESS_DENIED: Trying to erase running image.
+ *
+ * When ERASE_SECTOR_ASYNC returns EC_RES_SUCCESS, the operation is just
+ * properly queued. The user must call ERASE_GET_RESULT subcommand to get
+ * the proper result.
+ * When ERASE_GET_RESULT returns EC_RES_BUSY, the caller must wait and send
+ * ERASE_GET_RESULT again to get the result of ERASE_SECTOR_ASYNC.
+ * ERASE_GET_RESULT command may timeout on EC where flash access is not
+ * permitted while erasing. (For instance, STM32F4).
+ */
+enum ec_flash_erase_cmd {
+	FLASH_ERASE_SECTOR,     /* Erase and wait for result */
+	FLASH_ERASE_SECTOR_ASYNC,  /* Erase and return immediately. */
+	FLASH_ERASE_GET_RESULT,  /* Ask for last erase result */
+};
+
+/**
+ * struct ec_params_flash_erase_v1 - Parameters for the flash erase command, v1.
+ * @cmd: One of ec_flash_erase_cmd.
+ * @reserved: Pad byte; currently always contains 0.
+ * @flag: No flags defined yet; set to 0.
+ * @params: Same as v0 parameters.
+ */
+struct ec_params_flash_erase_v1 {
+	uint8_t  cmd;
+	uint8_t  reserved;
+	uint16_t flag;
+	struct ec_params_flash_erase params;
+} __ec_align4;
+
+/*
+ * Get/set flash protection.
+ *
+ * If mask!=0, sets/clear the requested bits of flags.  Depending on the
+ * firmware write protect GPIO, not all flags will take effect immediately;
+ * some flags require a subsequent hard reset to take effect.  Check the
+ * returned flags bits to see what actually happened.
+ *
+ * If mask=0, simply returns the current flags state.
+ */
+#define EC_CMD_FLASH_PROTECT 0x0015
+#define EC_VER_FLASH_PROTECT 1  /* Command version 1 */
+
+/* Flags for flash protection */
+/* RO flash code protected when the EC boots */
+#define EC_FLASH_PROTECT_RO_AT_BOOT         BIT(0)
+/*
+ * RO flash code protected now.  If this bit is set, at-boot status cannot
+ * be changed.
+ */
+#define EC_FLASH_PROTECT_RO_NOW             BIT(1)
+/* Entire flash code protected now, until reboot. */
+#define EC_FLASH_PROTECT_ALL_NOW            BIT(2)
+/* Flash write protect GPIO is asserted now */
+#define EC_FLASH_PROTECT_GPIO_ASSERTED      BIT(3)
+/* Error - at least one bank of flash is stuck locked, and cannot be unlocked */
+#define EC_FLASH_PROTECT_ERROR_STUCK        BIT(4)
+/*
+ * Error - flash protection is in inconsistent state.  At least one bank of
+ * flash which should be protected is not protected.  Usually fixed by
+ * re-requesting the desired flags, or by a hard reset if that fails.
+ */
+#define EC_FLASH_PROTECT_ERROR_INCONSISTENT BIT(5)
+/* Entire flash code protected when the EC boots */
+#define EC_FLASH_PROTECT_ALL_AT_BOOT        BIT(6)
+/* RW flash code protected when the EC boots */
+#define EC_FLASH_PROTECT_RW_AT_BOOT         BIT(7)
+/* RW flash code protected now. */
+#define EC_FLASH_PROTECT_RW_NOW             BIT(8)
+/* Rollback information flash region protected when the EC boots */
+#define EC_FLASH_PROTECT_ROLLBACK_AT_BOOT   BIT(9)
+/* Rollback information flash region protected now */
+#define EC_FLASH_PROTECT_ROLLBACK_NOW       BIT(10)
+
+
+/**
+ * struct ec_params_flash_protect - Parameters for the flash protect command.
+ * @mask: Bits in flags to apply.
+ * @flags: New flags to apply.
+ */
+struct ec_params_flash_protect {
+	uint32_t mask;
+	uint32_t flags;
+} __ec_align4;
+
+/**
+ * struct ec_response_flash_protect - Response to the flash protect command.
+ * @flags: Current value of flash protect flags.
+ * @valid_flags: Flags which are valid on this platform. This allows the
+ *               caller to distinguish between flags which aren't set vs. flags
+ *               which can't be set on this platform.
+ * @writable_flags: Flags which can be changed given the current protection
+ *                  state.
+ */
+struct ec_response_flash_protect {
+	uint32_t flags;
+	uint32_t valid_flags;
+	uint32_t writable_flags;
+} __ec_align4;
+
+/*
+ * Note: commands 0x14 - 0x19 version 0 were old commands to get/set flash
+ * write protect.  These commands may be reused with version > 0.
+ */
+
+/* Get the region offset/size */
+#define EC_CMD_FLASH_REGION_INFO 0x0016
+#define EC_VER_FLASH_REGION_INFO 1
+
+enum ec_flash_region {
+	/* Region which holds read-only EC image */
+	EC_FLASH_REGION_RO = 0,
+	/*
+	 * Region which holds active RW image. 'Active' is different from
+	 * 'running'. Active means 'scheduled-to-run'. Since RO image always
+	 * scheduled to run, active/non-active applies only to RW images (for
+	 * the same reason 'update' applies only to RW images. It's a state of
+	 * an image on a flash. Running image can be RO, RW_A, RW_B but active
+	 * image can only be RW_A or RW_B. In recovery mode, an active RW image
+	 * doesn't enter 'running' state but it's still active on a flash.
+	 */
+	EC_FLASH_REGION_ACTIVE,
+	/*
+	 * Region which should be write-protected in the factory (a superset of
+	 * EC_FLASH_REGION_RO)
+	 */
+	EC_FLASH_REGION_WP_RO,
+	/* Region which holds updatable (non-active) RW image */
+	EC_FLASH_REGION_UPDATE,
+	/* Number of regions */
+	EC_FLASH_REGION_COUNT,
+};
+/*
+ * 'RW' is vague if there are multiple RW images; we mean the active one,
+ * so the old constant is deprecated.
+ */
+#define EC_FLASH_REGION_RW EC_FLASH_REGION_ACTIVE
+
+/**
+ * struct ec_params_flash_region_info - Parameters for the flash region info
+ *         command.
+ * @region: Flash region; see EC_FLASH_REGION_*
+ */
+struct ec_params_flash_region_info {
+	uint32_t region;
+} __ec_align4;
+
+struct ec_response_flash_region_info {
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/* Read/write VbNvContext */
+#define EC_CMD_VBNV_CONTEXT 0x0017
+#define EC_VER_VBNV_CONTEXT 1
+#define EC_VBNV_BLOCK_SIZE 16
+
+enum ec_vbnvcontext_op {
+	EC_VBNV_CONTEXT_OP_READ,
+	EC_VBNV_CONTEXT_OP_WRITE,
+};
+
+struct ec_params_vbnvcontext {
+	uint32_t op;
+	uint8_t block[EC_VBNV_BLOCK_SIZE];
+} __ec_align4;
+
+struct ec_response_vbnvcontext {
+	uint8_t block[EC_VBNV_BLOCK_SIZE];
+} __ec_align4;
+
+
+/* Get SPI flash information */
+#define EC_CMD_FLASH_SPI_INFO 0x0018
+
+struct ec_response_flash_spi_info {
+	/* JEDEC info from command 0x9F (manufacturer, memory type, size) */
+	uint8_t jedec[3];
+
+	/* Pad byte; currently always contains 0 */
+	uint8_t reserved0;
+
+	/* Manufacturer / device ID from command 0x90 */
+	uint8_t mfr_dev_id[2];
+
+	/* Status registers from command 0x05 and 0x35 */
+	uint8_t sr1, sr2;
+} __ec_align1;
+
+
+/* Select flash during flash operations */
+#define EC_CMD_FLASH_SELECT 0x0019
+
+/**
+ * struct ec_params_flash_select - Parameters for the flash select command.
+ * @select: 1 to select flash, 0 to deselect flash
+ */
+struct ec_params_flash_select {
+	uint8_t select;
+} __ec_align4;
+
+
+/*****************************************************************************/
+/* PWM commands */
+
+/* Get fan target RPM */
+#define EC_CMD_PWM_GET_FAN_TARGET_RPM 0x0020
+
+struct ec_response_pwm_get_fan_rpm {
+	uint32_t rpm;
+} __ec_align4;
+
+/* Set target fan RPM */
+#define EC_CMD_PWM_SET_FAN_TARGET_RPM 0x0021
+
+/* Version 0 of input params */
+struct ec_params_pwm_set_fan_target_rpm_v0 {
+	uint32_t rpm;
+} __ec_align4;
+
+/* Version 1 of input params */
+struct ec_params_pwm_set_fan_target_rpm_v1 {
+	uint32_t rpm;
+	uint8_t fan_idx;
+} __ec_align_size1;
+
+/* Get keyboard backlight */
+/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */
+#define EC_CMD_PWM_GET_KEYBOARD_BACKLIGHT 0x0022
+
+struct ec_response_pwm_get_keyboard_backlight {
+	uint8_t percent;
+	uint8_t enabled;
+} __ec_align1;
+
+/* Set keyboard backlight */
+/* OBSOLETE - Use EC_CMD_PWM_SET_DUTY */
+#define EC_CMD_PWM_SET_KEYBOARD_BACKLIGHT 0x0023
+
+struct ec_params_pwm_set_keyboard_backlight {
+	uint8_t percent;
+} __ec_align1;
+
+/* Set target fan PWM duty cycle */
+#define EC_CMD_PWM_SET_FAN_DUTY 0x0024
+
+/* Version 0 of input params */
+struct ec_params_pwm_set_fan_duty_v0 {
+	uint32_t percent;
+} __ec_align4;
+
+/* Version 1 of input params */
+struct ec_params_pwm_set_fan_duty_v1 {
+	uint32_t percent;
+	uint8_t fan_idx;
+} __ec_align_size1;
+
+#define EC_CMD_PWM_SET_DUTY 0x0025
+/* 16 bit duty cycle, 0xffff = 100% */
+#define EC_PWM_MAX_DUTY 0xffff
+
+enum ec_pwm_type {
+	/* All types, indexed by board-specific enum pwm_channel */
+	EC_PWM_TYPE_GENERIC = 0,
+	/* Keyboard backlight */
+	EC_PWM_TYPE_KB_LIGHT,
+	/* Display backlight */
+	EC_PWM_TYPE_DISPLAY_LIGHT,
+	EC_PWM_TYPE_COUNT,
+};
+
+struct ec_params_pwm_set_duty {
+	uint16_t duty;     /* Duty cycle, EC_PWM_MAX_DUTY = 100% */
+	uint8_t pwm_type;  /* ec_pwm_type */
+	uint8_t index;     /* Type-specific index, or 0 if unique */
+} __ec_align4;
+
+#define EC_CMD_PWM_GET_DUTY 0x0026
+
+struct ec_params_pwm_get_duty {
+	uint8_t pwm_type;  /* ec_pwm_type */
+	uint8_t index;     /* Type-specific index, or 0 if unique */
+} __ec_align1;
+
+struct ec_response_pwm_get_duty {
+	uint16_t duty;     /* Duty cycle, EC_PWM_MAX_DUTY = 100% */
+} __ec_align2;
+
+/*****************************************************************************/
+/*
+ * Lightbar commands. This looks worse than it is. Since we only use one HOST
+ * command to say "talk to the lightbar", we put the "and tell it to do X" part
+ * into a subcommand. We'll make separate structs for subcommands with
+ * different input args, so that we know how much to expect.
+ */
+#define EC_CMD_LIGHTBAR_CMD 0x0028
+
+struct rgb_s {
+	uint8_t r, g, b;
+} __ec_todo_unpacked;
+
+#define LB_BATTERY_LEVELS 4
+
+/*
+ * List of tweakable parameters. NOTE: It's __packed so it can be sent in a
+ * host command, but the alignment is the same regardless. Keep it that way.
+ */
+struct lightbar_params_v0 {
+	/* Timing */
+	int32_t google_ramp_up;
+	int32_t google_ramp_down;
+	int32_t s3s0_ramp_up;
+	int32_t s0_tick_delay[2];		/* AC=0/1 */
+	int32_t s0a_tick_delay[2];		/* AC=0/1 */
+	int32_t s0s3_ramp_down;
+	int32_t s3_sleep_for;
+	int32_t s3_ramp_up;
+	int32_t s3_ramp_down;
+
+	/* Oscillation */
+	uint8_t new_s0;
+	uint8_t osc_min[2];			/* AC=0/1 */
+	uint8_t osc_max[2];			/* AC=0/1 */
+	uint8_t w_ofs[2];			/* AC=0/1 */
+
+	/* Brightness limits based on the backlight and AC. */
+	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
+
+	/* Battery level thresholds */
+	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
+
+	/* Map [AC][battery_level] to color index */
+	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
+	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
+
+	/* Color palette */
+	struct rgb_s color[8];			/* 0-3 are Google colors */
+} __ec_todo_packed;
+
+struct lightbar_params_v1 {
+	/* Timing */
+	int32_t google_ramp_up;
+	int32_t google_ramp_down;
+	int32_t s3s0_ramp_up;
+	int32_t s0_tick_delay[2];		/* AC=0/1 */
+	int32_t s0a_tick_delay[2];		/* AC=0/1 */
+	int32_t s0s3_ramp_down;
+	int32_t s3_sleep_for;
+	int32_t s3_ramp_up;
+	int32_t s3_ramp_down;
+	int32_t s5_ramp_up;
+	int32_t s5_ramp_down;
+	int32_t tap_tick_delay;
+	int32_t tap_gate_delay;
+	int32_t tap_display_time;
+
+	/* Tap-for-battery params */
+	uint8_t tap_pct_red;
+	uint8_t tap_pct_green;
+	uint8_t tap_seg_min_on;
+	uint8_t tap_seg_max_on;
+	uint8_t tap_seg_osc;
+	uint8_t tap_idx[3];
+
+	/* Oscillation */
+	uint8_t osc_min[2];			/* AC=0/1 */
+	uint8_t osc_max[2];			/* AC=0/1 */
+	uint8_t w_ofs[2];			/* AC=0/1 */
+
+	/* Brightness limits based on the backlight and AC. */
+	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
+
+	/* Battery level thresholds */
+	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
+
+	/* Map [AC][battery_level] to color index */
+	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
+	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
+
+	/* s5: single color pulse on inhibited power-up */
+	uint8_t s5_idx;
+
+	/* Color palette */
+	struct rgb_s color[8];			/* 0-3 are Google colors */
+} __ec_todo_packed;
+
+/* Lightbar command params v2
+ * crbug.com/467716
+ *
+ * lightbar_parms_v1 was too big for i2c, therefore in v2, we split them up by
+ * logical groups to make it more manageable ( < 120 bytes).
+ *
+ * NOTE: Each of these groups must be less than 120 bytes.
+ */
+
+struct lightbar_params_v2_timing {
+	/* Timing */
+	int32_t google_ramp_up;
+	int32_t google_ramp_down;
+	int32_t s3s0_ramp_up;
+	int32_t s0_tick_delay[2];		/* AC=0/1 */
+	int32_t s0a_tick_delay[2];		/* AC=0/1 */
+	int32_t s0s3_ramp_down;
+	int32_t s3_sleep_for;
+	int32_t s3_ramp_up;
+	int32_t s3_ramp_down;
+	int32_t s5_ramp_up;
+	int32_t s5_ramp_down;
+	int32_t tap_tick_delay;
+	int32_t tap_gate_delay;
+	int32_t tap_display_time;
+} __ec_todo_packed;
+
+struct lightbar_params_v2_tap {
+	/* Tap-for-battery params */
+	uint8_t tap_pct_red;
+	uint8_t tap_pct_green;
+	uint8_t tap_seg_min_on;
+	uint8_t tap_seg_max_on;
+	uint8_t tap_seg_osc;
+	uint8_t tap_idx[3];
+} __ec_todo_packed;
+
+struct lightbar_params_v2_oscillation {
+	/* Oscillation */
+	uint8_t osc_min[2];			/* AC=0/1 */
+	uint8_t osc_max[2];			/* AC=0/1 */
+	uint8_t w_ofs[2];			/* AC=0/1 */
+} __ec_todo_packed;
+
+struct lightbar_params_v2_brightness {
+	/* Brightness limits based on the backlight and AC. */
+	uint8_t bright_bl_off_fixed[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_min[2];		/* AC=0/1 */
+	uint8_t bright_bl_on_max[2];		/* AC=0/1 */
+} __ec_todo_packed;
+
+struct lightbar_params_v2_thresholds {
+	/* Battery level thresholds */
+	uint8_t battery_threshold[LB_BATTERY_LEVELS - 1];
+} __ec_todo_packed;
+
+struct lightbar_params_v2_colors {
+	/* Map [AC][battery_level] to color index */
+	uint8_t s0_idx[2][LB_BATTERY_LEVELS];	/* AP is running */
+	uint8_t s3_idx[2][LB_BATTERY_LEVELS];	/* AP is sleeping */
+
+	/* s5: single color pulse on inhibited power-up */
+	uint8_t s5_idx;
+
+	/* Color palette */
+	struct rgb_s color[8];			/* 0-3 are Google colors */
+} __ec_todo_packed;
+
+/* Lightbar program. */
+#define EC_LB_PROG_LEN 192
+struct lightbar_program {
+	uint8_t size;
+	uint8_t data[EC_LB_PROG_LEN];
+} __ec_todo_unpacked;
+
+struct ec_params_lightbar {
+	uint8_t cmd;		      /* Command (see enum lightbar_command) */
+	union {
+		/*
+		 * The following commands have no args:
+		 *
+		 * dump, off, on, init, get_seq, get_params_v0, get_params_v1,
+		 * version, get_brightness, get_demo, suspend, resume,
+		 * get_params_v2_timing, get_params_v2_tap, get_params_v2_osc,
+		 * get_params_v2_bright, get_params_v2_thlds,
+		 * get_params_v2_colors
+		 *
+		 * Don't use an empty struct, because C++ hates that.
+		 */
+
+		struct __ec_todo_unpacked {
+			uint8_t num;
+		} set_brightness, seq, demo;
+
+		struct __ec_todo_unpacked {
+			uint8_t ctrl, reg, value;
+		} reg;
+
+		struct __ec_todo_unpacked {
+			uint8_t led, red, green, blue;
+		} set_rgb;
+
+		struct __ec_todo_unpacked {
+			uint8_t led;
+		} get_rgb;
+
+		struct __ec_todo_unpacked {
+			uint8_t enable;
+		} manual_suspend_ctrl;
+
+		struct lightbar_params_v0 set_params_v0;
+		struct lightbar_params_v1 set_params_v1;
+
+		struct lightbar_params_v2_timing set_v2par_timing;
+		struct lightbar_params_v2_tap set_v2par_tap;
+		struct lightbar_params_v2_oscillation set_v2par_osc;
+		struct lightbar_params_v2_brightness set_v2par_bright;
+		struct lightbar_params_v2_thresholds set_v2par_thlds;
+		struct lightbar_params_v2_colors set_v2par_colors;
+
+		struct lightbar_program set_program;
+	};
+} __ec_todo_packed;
+
+struct ec_response_lightbar {
+	union {
+		struct __ec_todo_unpacked {
+			struct __ec_todo_unpacked {
+				uint8_t reg;
+				uint8_t ic0;
+				uint8_t ic1;
+			} vals[23];
+		} dump;
+
+		struct __ec_todo_unpacked {
+			uint8_t num;
+		} get_seq, get_brightness, get_demo;
+
+		struct lightbar_params_v0 get_params_v0;
+		struct lightbar_params_v1 get_params_v1;
+
+
+		struct lightbar_params_v2_timing get_params_v2_timing;
+		struct lightbar_params_v2_tap get_params_v2_tap;
+		struct lightbar_params_v2_oscillation get_params_v2_osc;
+		struct lightbar_params_v2_brightness get_params_v2_bright;
+		struct lightbar_params_v2_thresholds get_params_v2_thlds;
+		struct lightbar_params_v2_colors get_params_v2_colors;
+
+		struct __ec_todo_unpacked {
+			uint32_t num;
+			uint32_t flags;
+		} version;
+
+		struct __ec_todo_unpacked {
+			uint8_t red, green, blue;
+		} get_rgb;
+
+		/*
+		 * The following commands have no response:
+		 *
+		 * off, on, init, set_brightness, seq, reg, set_rgb, demo,
+		 * set_params_v0, set_params_v1, set_program,
+		 * manual_suspend_ctrl, suspend, resume, set_v2par_timing,
+		 * set_v2par_tap, set_v2par_osc, set_v2par_bright,
+		 * set_v2par_thlds, set_v2par_colors
+		 */
+	};
+} __ec_todo_packed;
+
+/* Lightbar commands */
+enum lightbar_command {
+	LIGHTBAR_CMD_DUMP = 0,
+	LIGHTBAR_CMD_OFF = 1,
+	LIGHTBAR_CMD_ON = 2,
+	LIGHTBAR_CMD_INIT = 3,
+	LIGHTBAR_CMD_SET_BRIGHTNESS = 4,
+	LIGHTBAR_CMD_SEQ = 5,
+	LIGHTBAR_CMD_REG = 6,
+	LIGHTBAR_CMD_SET_RGB = 7,
+	LIGHTBAR_CMD_GET_SEQ = 8,
+	LIGHTBAR_CMD_DEMO = 9,
+	LIGHTBAR_CMD_GET_PARAMS_V0 = 10,
+	LIGHTBAR_CMD_SET_PARAMS_V0 = 11,
+	LIGHTBAR_CMD_VERSION = 12,
+	LIGHTBAR_CMD_GET_BRIGHTNESS = 13,
+	LIGHTBAR_CMD_GET_RGB = 14,
+	LIGHTBAR_CMD_GET_DEMO = 15,
+	LIGHTBAR_CMD_GET_PARAMS_V1 = 16,
+	LIGHTBAR_CMD_SET_PARAMS_V1 = 17,
+	LIGHTBAR_CMD_SET_PROGRAM = 18,
+	LIGHTBAR_CMD_MANUAL_SUSPEND_CTRL = 19,
+	LIGHTBAR_CMD_SUSPEND = 20,
+	LIGHTBAR_CMD_RESUME = 21,
+	LIGHTBAR_CMD_GET_PARAMS_V2_TIMING = 22,
+	LIGHTBAR_CMD_SET_PARAMS_V2_TIMING = 23,
+	LIGHTBAR_CMD_GET_PARAMS_V2_TAP = 24,
+	LIGHTBAR_CMD_SET_PARAMS_V2_TAP = 25,
+	LIGHTBAR_CMD_GET_PARAMS_V2_OSCILLATION = 26,
+	LIGHTBAR_CMD_SET_PARAMS_V2_OSCILLATION = 27,
+	LIGHTBAR_CMD_GET_PARAMS_V2_BRIGHTNESS = 28,
+	LIGHTBAR_CMD_SET_PARAMS_V2_BRIGHTNESS = 29,
+	LIGHTBAR_CMD_GET_PARAMS_V2_THRESHOLDS = 30,
+	LIGHTBAR_CMD_SET_PARAMS_V2_THRESHOLDS = 31,
+	LIGHTBAR_CMD_GET_PARAMS_V2_COLORS = 32,
+	LIGHTBAR_CMD_SET_PARAMS_V2_COLORS = 33,
+	LIGHTBAR_NUM_CMDS
+};
+
+/*****************************************************************************/
+/* LED control commands */
+
+#define EC_CMD_LED_CONTROL 0x0029
+
+enum ec_led_id {
+	/* LED to indicate battery state of charge */
+	EC_LED_ID_BATTERY_LED = 0,
+	/*
+	 * LED to indicate system power state (on or in suspend).
+	 * May be on power button or on C-panel.
+	 */
+	EC_LED_ID_POWER_LED,
+	/* LED on power adapter or its plug */
+	EC_LED_ID_ADAPTER_LED,
+	/* LED to indicate left side */
+	EC_LED_ID_LEFT_LED,
+	/* LED to indicate right side */
+	EC_LED_ID_RIGHT_LED,
+	/* LED to indicate recovery mode with HW_REINIT */
+	EC_LED_ID_RECOVERY_HW_REINIT_LED,
+	/* LED to indicate sysrq debug mode. */
+	EC_LED_ID_SYSRQ_DEBUG_LED,
+
+	EC_LED_ID_COUNT
+};
+
+/* LED control flags */
+#define EC_LED_FLAGS_QUERY BIT(0) /* Query LED capability only */
+#define EC_LED_FLAGS_AUTO  BIT(1) /* Switch LED back to automatic control */
+
+enum ec_led_colors {
+	EC_LED_COLOR_RED = 0,
+	EC_LED_COLOR_GREEN,
+	EC_LED_COLOR_BLUE,
+	EC_LED_COLOR_YELLOW,
+	EC_LED_COLOR_WHITE,
+	EC_LED_COLOR_AMBER,
+
+	EC_LED_COLOR_COUNT
+};
+
+struct ec_params_led_control {
+	uint8_t led_id;     /* Which LED to control */
+	uint8_t flags;      /* Control flags */
+
+	uint8_t brightness[EC_LED_COLOR_COUNT];
+} __ec_align1;
+
+struct ec_response_led_control {
+	/*
+	 * Available brightness value range.
+	 *
+	 * Range 0 means color channel not present.
+	 * Range 1 means on/off control.
+	 * Other values means the LED is control by PWM.
+	 */
+	uint8_t brightness_range[EC_LED_COLOR_COUNT];
+} __ec_align1;
+
+/*****************************************************************************/
+/* Verified boot commands */
+
+/*
+ * Note: command code 0x29 version 0 was VBOOT_CMD in Link EVT; it may be
+ * reused for other purposes with version > 0.
+ */
+
+/* Verified boot hash command */
+#define EC_CMD_VBOOT_HASH 0x002A
+
+struct ec_params_vboot_hash {
+	uint8_t cmd;             /* enum ec_vboot_hash_cmd */
+	uint8_t hash_type;       /* enum ec_vboot_hash_type */
+	uint8_t nonce_size;      /* Nonce size; may be 0 */
+	uint8_t reserved0;       /* Reserved; set 0 */
+	uint32_t offset;         /* Offset in flash to hash */
+	uint32_t size;           /* Number of bytes to hash */
+	uint8_t nonce_data[64];  /* Nonce data; ignored if nonce_size=0 */
+} __ec_align4;
+
+struct ec_response_vboot_hash {
+	uint8_t status;          /* enum ec_vboot_hash_status */
+	uint8_t hash_type;       /* enum ec_vboot_hash_type */
+	uint8_t digest_size;     /* Size of hash digest in bytes */
+	uint8_t reserved0;       /* Ignore; will be 0 */
+	uint32_t offset;         /* Offset in flash which was hashed */
+	uint32_t size;           /* Number of bytes hashed */
+	uint8_t hash_digest[64]; /* Hash digest data */
+} __ec_align4;
+
+enum ec_vboot_hash_cmd {
+	EC_VBOOT_HASH_GET = 0,       /* Get current hash status */
+	EC_VBOOT_HASH_ABORT = 1,     /* Abort calculating current hash */
+	EC_VBOOT_HASH_START = 2,     /* Start computing a new hash */
+	EC_VBOOT_HASH_RECALC = 3,    /* Synchronously compute a new hash */
+};
+
+enum ec_vboot_hash_type {
+	EC_VBOOT_HASH_TYPE_SHA256 = 0, /* SHA-256 */
+};
+
+enum ec_vboot_hash_status {
+	EC_VBOOT_HASH_STATUS_NONE = 0, /* No hash (not started, or aborted) */
+	EC_VBOOT_HASH_STATUS_DONE = 1, /* Finished computing a hash */
+	EC_VBOOT_HASH_STATUS_BUSY = 2, /* Busy computing a hash */
+};
+
+/*
+ * Special values for offset for EC_VBOOT_HASH_START and EC_VBOOT_HASH_RECALC.
+ * If one of these is specified, the EC will automatically update offset and
+ * size to the correct values for the specified image (RO or RW).
+ */
+#define EC_VBOOT_HASH_OFFSET_RO		0xfffffffe
+#define EC_VBOOT_HASH_OFFSET_ACTIVE	0xfffffffd
+#define EC_VBOOT_HASH_OFFSET_UPDATE	0xfffffffc
+
+/*
+ * 'RW' is vague if there are multiple RW images; we mean the active one,
+ * so the old constant is deprecated.
+ */
+#define EC_VBOOT_HASH_OFFSET_RW EC_VBOOT_HASH_OFFSET_ACTIVE
+
+/*****************************************************************************/
+/*
+ * Motion sense commands. We'll make separate structs for sub-commands with
+ * different input args, so that we know how much to expect.
+ */
+#define EC_CMD_MOTION_SENSE_CMD 0x002B
+
+/* Motion sense commands */
+enum motionsense_command {
+	/*
+	 * Dump command returns all motion sensor data including motion sense
+	 * module flags and individual sensor flags.
+	 */
+	MOTIONSENSE_CMD_DUMP = 0,
+
+	/*
+	 * Info command returns data describing the details of a given sensor,
+	 * including enum motionsensor_type, enum motionsensor_location, and
+	 * enum motionsensor_chip.
+	 */
+	MOTIONSENSE_CMD_INFO = 1,
+
+	/*
+	 * EC Rate command is a setter/getter command for the EC sampling rate
+	 * in milliseconds.
+	 * It is per sensor, the EC run sample task  at the minimum of all
+	 * sensors EC_RATE.
+	 * For sensors without hardware FIFO, EC_RATE should be equals to 1/ODR
+	 * to collect all the sensor samples.
+	 * For sensor with hardware FIFO, EC_RATE is used as the maximal delay
+	 * to process of all motion sensors in milliseconds.
+	 */
+	MOTIONSENSE_CMD_EC_RATE = 2,
+
+	/*
+	 * Sensor ODR command is a setter/getter command for the output data
+	 * rate of a specific motion sensor in millihertz.
+	 */
+	MOTIONSENSE_CMD_SENSOR_ODR = 3,
+
+	/*
+	 * Sensor range command is a setter/getter command for the range of
+	 * a specified motion sensor in +/-G's or +/- deg/s.
+	 */
+	MOTIONSENSE_CMD_SENSOR_RANGE = 4,
+
+	/*
+	 * Setter/getter command for the keyboard wake angle. When the lid
+	 * angle is greater than this value, keyboard wake is disabled in S3,
+	 * and when the lid angle goes less than this value, keyboard wake is
+	 * enabled. Note, the lid angle measurement is an approximate,
+	 * un-calibrated value, hence the wake angle isn't exact.
+	 */
+	MOTIONSENSE_CMD_KB_WAKE_ANGLE = 5,
+
+	/*
+	 * Returns a single sensor data.
+	 */
+	MOTIONSENSE_CMD_DATA = 6,
+
+	/*
+	 * Return sensor fifo info.
+	 */
+	MOTIONSENSE_CMD_FIFO_INFO = 7,
+
+	/*
+	 * Insert a flush element in the fifo and return sensor fifo info.
+	 * The host can use that element to synchronize its operation.
+	 */
+	MOTIONSENSE_CMD_FIFO_FLUSH = 8,
+
+	/*
+	 * Return a portion of the fifo.
+	 */
+	MOTIONSENSE_CMD_FIFO_READ = 9,
+
+	/*
+	 * Perform low level calibration.
+	 * On sensors that support it, ask to do offset calibration.
+	 */
+	MOTIONSENSE_CMD_PERFORM_CALIB = 10,
+
+	/*
+	 * Sensor Offset command is a setter/getter command for the offset
+	 * used for calibration.
+	 * The offsets can be calculated by the host, or via
+	 * PERFORM_CALIB command.
+	 */
+	MOTIONSENSE_CMD_SENSOR_OFFSET = 11,
+
+	/*
+	 * List available activities for a MOTION sensor.
+	 * Indicates if they are enabled or disabled.
+	 */
+	MOTIONSENSE_CMD_LIST_ACTIVITIES = 12,
+
+	/*
+	 * Activity management
+	 * Enable/Disable activity recognition.
+	 */
+	MOTIONSENSE_CMD_SET_ACTIVITY = 13,
+
+	/*
+	 * Lid Angle
+	 */
+	MOTIONSENSE_CMD_LID_ANGLE = 14,
+
+	/*
+	 * Allow the FIFO to trigger interrupt via MKBP events.
+	 * By default the FIFO does not send interrupt to process the FIFO
+	 * until the AP is ready or it is coming from a wakeup sensor.
+	 */
+	MOTIONSENSE_CMD_FIFO_INT_ENABLE = 15,
+
+	/*
+	 * Spoof the readings of the sensors.  The spoofed readings can be set
+	 * to arbitrary values, or will lock to the last read actual values.
+	 */
+	MOTIONSENSE_CMD_SPOOF = 16,
+
+	/* Set lid angle for tablet mode detection. */
+	MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE = 17,
+
+	/*
+	 * Sensor Scale command is a setter/getter command for the calibration
+	 * scale.
+	 */
+	MOTIONSENSE_CMD_SENSOR_SCALE = 18,
+
+	/* Number of motionsense sub-commands. */
+	MOTIONSENSE_NUM_CMDS
+};
+
+/* List of motion sensor types. */
+enum motionsensor_type {
+	MOTIONSENSE_TYPE_ACCEL = 0,
+	MOTIONSENSE_TYPE_GYRO = 1,
+	MOTIONSENSE_TYPE_MAG = 2,
+	MOTIONSENSE_TYPE_PROX = 3,
+	MOTIONSENSE_TYPE_LIGHT = 4,
+	MOTIONSENSE_TYPE_ACTIVITY = 5,
+	MOTIONSENSE_TYPE_BARO = 6,
+	MOTIONSENSE_TYPE_SYNC = 7,
+	MOTIONSENSE_TYPE_MAX,
+};
+
+/* List of motion sensor locations. */
+enum motionsensor_location {
+	MOTIONSENSE_LOC_BASE = 0,
+	MOTIONSENSE_LOC_LID = 1,
+	MOTIONSENSE_LOC_CAMERA = 2,
+	MOTIONSENSE_LOC_MAX,
+};
+
+/* List of motion sensor chips. */
+enum motionsensor_chip {
+	MOTIONSENSE_CHIP_KXCJ9 = 0,
+	MOTIONSENSE_CHIP_LSM6DS0 = 1,
+	MOTIONSENSE_CHIP_BMI160 = 2,
+	MOTIONSENSE_CHIP_SI1141 = 3,
+	MOTIONSENSE_CHIP_SI1142 = 4,
+	MOTIONSENSE_CHIP_SI1143 = 5,
+	MOTIONSENSE_CHIP_KX022 = 6,
+	MOTIONSENSE_CHIP_L3GD20H = 7,
+	MOTIONSENSE_CHIP_BMA255 = 8,
+	MOTIONSENSE_CHIP_BMP280 = 9,
+	MOTIONSENSE_CHIP_OPT3001 = 10,
+	MOTIONSENSE_CHIP_BH1730 = 11,
+	MOTIONSENSE_CHIP_GPIO = 12,
+	MOTIONSENSE_CHIP_LIS2DH = 13,
+	MOTIONSENSE_CHIP_LSM6DSM = 14,
+	MOTIONSENSE_CHIP_LIS2DE = 15,
+	MOTIONSENSE_CHIP_LIS2MDL = 16,
+	MOTIONSENSE_CHIP_LSM6DS3 = 17,
+	MOTIONSENSE_CHIP_LSM6DSO = 18,
+	MOTIONSENSE_CHIP_LNG2DM = 19,
+	MOTIONSENSE_CHIP_MAX,
+};
+
+/* List of orientation positions */
+enum motionsensor_orientation {
+	MOTIONSENSE_ORIENTATION_LANDSCAPE = 0,
+	MOTIONSENSE_ORIENTATION_PORTRAIT = 1,
+	MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_PORTRAIT = 2,
+	MOTIONSENSE_ORIENTATION_UPSIDE_DOWN_LANDSCAPE = 3,
+	MOTIONSENSE_ORIENTATION_UNKNOWN = 4,
+};
+
+struct ec_response_motion_sensor_data {
+	/* Flags for each sensor. */
+	uint8_t flags;
+	/* Sensor number the data comes from. */
+	uint8_t sensor_num;
+	/* Each sensor is up to 3-axis. */
+	union {
+		int16_t             data[3];
+		struct __ec_todo_packed {
+			uint16_t    reserved;
+			uint32_t    timestamp;
+		};
+		struct __ec_todo_unpacked {
+			uint8_t     activity; /* motionsensor_activity */
+			uint8_t     state;
+			int16_t     add_info[2];
+		};
+	};
+} __ec_todo_packed;
+
+/* Note: used in ec_response_get_next_data */
+struct ec_response_motion_sense_fifo_info {
+	/* Size of the fifo */
+	uint16_t size;
+	/* Amount of space used in the fifo */
+	uint16_t count;
+	/* Timestamp recorded in us.
+	 * aka accurate timestamp when host event was triggered.
+	 */
+	uint32_t timestamp;
+	/* Total amount of vector lost */
+	uint16_t total_lost;
+	/* Lost events since the last fifo_info, per sensors */
+	uint16_t lost[0];
+} __ec_todo_packed;
+
+struct ec_response_motion_sense_fifo_data {
+	uint32_t number_data;
+	struct ec_response_motion_sensor_data data[0];
+} __ec_todo_packed;
+
+/* List supported activity recognition */
+enum motionsensor_activity {
+	MOTIONSENSE_ACTIVITY_RESERVED = 0,
+	MOTIONSENSE_ACTIVITY_SIG_MOTION = 1,
+	MOTIONSENSE_ACTIVITY_DOUBLE_TAP = 2,
+	MOTIONSENSE_ACTIVITY_ORIENTATION = 3,
+};
+
+struct ec_motion_sense_activity {
+	uint8_t sensor_num;
+	uint8_t activity; /* one of enum motionsensor_activity */
+	uint8_t enable;   /* 1: enable, 0: disable */
+	uint8_t reserved;
+	uint16_t parameters[3]; /* activity dependent parameters */
+} __ec_todo_unpacked;
+
+/* Module flag masks used for the dump sub-command. */
+#define MOTIONSENSE_MODULE_FLAG_ACTIVE BIT(0)
+
+/* Sensor flag masks used for the dump sub-command. */
+#define MOTIONSENSE_SENSOR_FLAG_PRESENT BIT(0)
+
+/*
+ * Flush entry for synchronization.
+ * data contains time stamp
+ */
+#define MOTIONSENSE_SENSOR_FLAG_FLUSH BIT(0)
+#define MOTIONSENSE_SENSOR_FLAG_TIMESTAMP BIT(1)
+#define MOTIONSENSE_SENSOR_FLAG_WAKEUP BIT(2)
+#define MOTIONSENSE_SENSOR_FLAG_TABLET_MODE BIT(3)
+#define MOTIONSENSE_SENSOR_FLAG_ODR BIT(4)
+
+/*
+ * Send this value for the data element to only perform a read. If you
+ * send any other value, the EC will interpret it as data to set and will
+ * return the actual value set.
+ */
+#define EC_MOTION_SENSE_NO_VALUE -1
+
+#define EC_MOTION_SENSE_INVALID_CALIB_TEMP 0x8000
+
+/* MOTIONSENSE_CMD_SENSOR_OFFSET subcommand flag */
+/* Set Calibration information */
+#define MOTION_SENSE_SET_OFFSET BIT(0)
+
+/* Default Scale value, factor 1. */
+#define MOTION_SENSE_DEFAULT_SCALE BIT(15)
+
+#define LID_ANGLE_UNRELIABLE 500
+
+enum motionsense_spoof_mode {
+	/* Disable spoof mode. */
+	MOTIONSENSE_SPOOF_MODE_DISABLE = 0,
+
+	/* Enable spoof mode, but use provided component values. */
+	MOTIONSENSE_SPOOF_MODE_CUSTOM,
+
+	/* Enable spoof mode, but use the current sensor values. */
+	MOTIONSENSE_SPOOF_MODE_LOCK_CURRENT,
+
+	/* Query the current spoof mode status for the sensor. */
+	MOTIONSENSE_SPOOF_MODE_QUERY,
+};
+
+struct ec_params_motion_sense {
+	uint8_t cmd;
+	union {
+		/* Used for MOTIONSENSE_CMD_DUMP. */
+		struct __ec_todo_unpacked {
+			/*
+			 * Maximal number of sensor the host is expecting.
+			 * 0 means the host is only interested in the number
+			 * of sensors controlled by the EC.
+			 */
+			uint8_t max_sensor_count;
+		} dump;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_KB_WAKE_ANGLE.
+		 */
+		struct __ec_todo_unpacked {
+			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read.
+			 * kb_wake_angle: angle to wakup AP.
+			 */
+			int16_t data;
+		} kb_wake_angle;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_INFO, MOTIONSENSE_CMD_DATA
+		 * and MOTIONSENSE_CMD_PERFORM_CALIB.
+		 */
+		struct __ec_todo_unpacked {
+			uint8_t sensor_num;
+		} info, info_3, data, fifo_flush, perform_calib,
+				list_activities;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR
+		 * and MOTIONSENSE_CMD_SENSOR_RANGE.
+		 */
+		struct __ec_todo_unpacked {
+			uint8_t sensor_num;
+
+			/* Rounding flag, true for round-up, false for down. */
+			uint8_t roundup;
+
+			uint16_t reserved;
+
+			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */
+			int32_t data;
+		} ec_rate, sensor_odr, sensor_range;
+
+		/* Used for MOTIONSENSE_CMD_SENSOR_OFFSET */
+		struct __ec_todo_packed {
+			uint8_t sensor_num;
+
+			/*
+			 * bit 0: If set (MOTION_SENSE_SET_OFFSET), set
+			 * the calibration information in the EC.
+			 * If unset, just retrieve calibration information.
+			 */
+			uint16_t flags;
+
+			/*
+			 * Temperature at calibration, in units of 0.01 C
+			 * 0x8000: invalid / unknown.
+			 * 0x0: 0C
+			 * 0x7fff: +327.67C
+			 */
+			int16_t temp;
+
+			/*
+			 * Offset for calibration.
+			 * Unit:
+			 * Accelerometer: 1/1024 g
+			 * Gyro:          1/1024 deg/s
+			 * Compass:       1/16 uT
+			 */
+			int16_t offset[3];
+		} sensor_offset;
+
+		/* Used for MOTIONSENSE_CMD_SENSOR_SCALE */
+		struct __ec_todo_packed {
+			uint8_t sensor_num;
+
+			/*
+			 * bit 0: If set (MOTION_SENSE_SET_OFFSET), set
+			 * the calibration information in the EC.
+			 * If unset, just retrieve calibration information.
+			 */
+			uint16_t flags;
+
+			/*
+			 * Temperature at calibration, in units of 0.01 C
+			 * 0x8000: invalid / unknown.
+			 * 0x0: 0C
+			 * 0x7fff: +327.67C
+			 */
+			int16_t temp;
+
+			/*
+			 * Scale for calibration:
+			 * By default scale is 1, it is encoded on 16bits:
+			 * 1 = BIT(15)
+			 * ~2 = 0xFFFF
+			 * ~0 = 0.
+			 */
+			uint16_t scale[3];
+		} sensor_scale;
+
+
+		/* Used for MOTIONSENSE_CMD_FIFO_INFO */
+		/* (no params) */
+
+		/* Used for MOTIONSENSE_CMD_FIFO_READ */
+		struct __ec_todo_unpacked {
+			/*
+			 * Number of expected vector to return.
+			 * EC may return less or 0 if none available.
+			 */
+			uint32_t max_data_vector;
+		} fifo_read;
+
+		struct ec_motion_sense_activity set_activity;
+
+		/* Used for MOTIONSENSE_CMD_LID_ANGLE */
+		/* (no params) */
+
+		/* Used for MOTIONSENSE_CMD_FIFO_INT_ENABLE */
+		struct __ec_todo_unpacked {
+			/*
+			 * 1: enable, 0 disable fifo,
+			 * EC_MOTION_SENSE_NO_VALUE return value.
+			 */
+			int8_t enable;
+		} fifo_int_enable;
+
+		/* Used for MOTIONSENSE_CMD_SPOOF */
+		struct __ec_todo_packed {
+			uint8_t sensor_id;
+
+			/* See enum motionsense_spoof_mode. */
+			uint8_t spoof_enable;
+
+			/* Ignored, used for alignment. */
+			uint8_t reserved;
+
+			/* Individual component values to spoof. */
+			int16_t components[3];
+		} spoof;
+
+		/* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */
+		struct __ec_todo_unpacked {
+			/*
+			 * Lid angle threshold for switching between tablet and
+			 * clamshell mode.
+			 */
+			int16_t lid_angle;
+
+			/*
+			 * Hysteresis degree to prevent fluctuations between
+			 * clamshell and tablet mode if lid angle keeps
+			 * changing around the threshold. Lid motion driver will
+			 * use lid_angle + hys_degree to trigger tablet mode and
+			 * lid_angle - hys_degree to trigger clamshell mode.
+			 */
+			int16_t hys_degree;
+		} tablet_mode_threshold;
+	};
+} __ec_todo_packed;
+
+struct ec_response_motion_sense {
+	union {
+		/* Used for MOTIONSENSE_CMD_DUMP */
+		struct __ec_todo_unpacked {
+			/* Flags representing the motion sensor module. */
+			uint8_t module_flags;
+
+			/* Number of sensors managed directly by the EC. */
+			uint8_t sensor_count;
+
+			/*
+			 * Sensor data is truncated if response_max is too small
+			 * for holding all the data.
+			 */
+			struct ec_response_motion_sensor_data sensor[0];
+		} dump;
+
+		/* Used for MOTIONSENSE_CMD_INFO. */
+		struct __ec_todo_unpacked {
+			/* Should be element of enum motionsensor_type. */
+			uint8_t type;
+
+			/* Should be element of enum motionsensor_location. */
+			uint8_t location;
+
+			/* Should be element of enum motionsensor_chip. */
+			uint8_t chip;
+		} info;
+
+		/* Used for MOTIONSENSE_CMD_INFO version 3 */
+		struct __ec_todo_unpacked {
+			/* Should be element of enum motionsensor_type. */
+			uint8_t type;
+
+			/* Should be element of enum motionsensor_location. */
+			uint8_t location;
+
+			/* Should be element of enum motionsensor_chip. */
+			uint8_t chip;
+
+			/* Minimum sensor sampling frequency */
+			uint32_t min_frequency;
+
+			/* Maximum sensor sampling frequency */
+			uint32_t max_frequency;
+
+			/* Max number of sensor events that could be in fifo */
+			uint32_t fifo_max_event_count;
+		} info_3;
+
+		/* Used for MOTIONSENSE_CMD_DATA */
+		struct ec_response_motion_sensor_data data;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR,
+		 * MOTIONSENSE_CMD_SENSOR_RANGE,
+		 * MOTIONSENSE_CMD_KB_WAKE_ANGLE,
+		 * MOTIONSENSE_CMD_FIFO_INT_ENABLE and
+		 * MOTIONSENSE_CMD_SPOOF.
+		 */
+		struct __ec_todo_unpacked {
+			/* Current value of the parameter queried. */
+			int32_t ret;
+		} ec_rate, sensor_odr, sensor_range, kb_wake_angle,
+		  fifo_int_enable, spoof;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_SENSOR_OFFSET,
+		 * PERFORM_CALIB.
+		 */
+		struct __ec_todo_unpacked  {
+			int16_t temp;
+			int16_t offset[3];
+		} sensor_offset, perform_calib;
+
+		/* Used for MOTIONSENSE_CMD_SENSOR_SCALE */
+		struct __ec_todo_unpacked  {
+			int16_t temp;
+			uint16_t scale[3];
+		} sensor_scale;
+
+		struct ec_response_motion_sense_fifo_info fifo_info, fifo_flush;
+
+		struct ec_response_motion_sense_fifo_data fifo_read;
+
+		struct __ec_todo_packed {
+			uint16_t reserved;
+			uint32_t enabled;
+			uint32_t disabled;
+		} list_activities;
+
+		/* No params for set activity */
+
+		/* Used for MOTIONSENSE_CMD_LID_ANGLE */
+		struct __ec_todo_unpacked {
+			/*
+			 * Angle between 0 and 360 degree if available,
+			 * LID_ANGLE_UNRELIABLE otherwise.
+			 */
+			uint16_t value;
+		} lid_angle;
+
+		/* Used for MOTIONSENSE_CMD_TABLET_MODE_LID_ANGLE. */
+		struct __ec_todo_unpacked {
+			/*
+			 * Lid angle threshold for switching between tablet and
+			 * clamshell mode.
+			 */
+			uint16_t lid_angle;
+
+			/* Hysteresis degree. */
+			uint16_t hys_degree;
+		} tablet_mode_threshold;
+
+	};
+} __ec_todo_packed;
+
+/*****************************************************************************/
+/* Force lid open command */
+
+/* Make lid event always open */
+#define EC_CMD_FORCE_LID_OPEN 0x002C
+
+struct ec_params_force_lid_open {
+	uint8_t enabled;
+} __ec_align1;
+
+/*****************************************************************************/
+/* Configure the behavior of the power button */
+#define EC_CMD_CONFIG_POWER_BUTTON 0x002D
+
+enum ec_config_power_button_flags {
+	/* Enable/Disable power button pulses for x86 devices */
+	EC_POWER_BUTTON_ENABLE_PULSE = BIT(0),
+};
+
+struct ec_params_config_power_button {
+	/* See enum ec_config_power_button_flags */
+	uint8_t flags;
+} __ec_align1;
+
+/*****************************************************************************/
+/* USB charging control commands */
+
+/* Set USB port charging mode */
+#define EC_CMD_USB_CHARGE_SET_MODE 0x0030
+
+struct ec_params_usb_charge_set_mode {
+	uint8_t usb_port_id;
+	uint8_t mode:7;
+	uint8_t inhibit_charge:1;
+} __ec_align1;
+
+/*****************************************************************************/
+/* Persistent storage for host */
+
+/* Maximum bytes that can be read/written in a single command */
+#define EC_PSTORE_SIZE_MAX 64
+
+/* Get persistent storage info */
+#define EC_CMD_PSTORE_INFO 0x0040
+
+struct ec_response_pstore_info {
+	/* Persistent storage size, in bytes */
+	uint32_t pstore_size;
+	/* Access size; read/write offset and size must be a multiple of this */
+	uint32_t access_size;
+} __ec_align4;
+
+/*
+ * Read persistent storage
+ *
+ * Response is params.size bytes of data.
+ */
+#define EC_CMD_PSTORE_READ 0x0041
+
+struct ec_params_pstore_read {
+	uint32_t offset;   /* Byte offset to read */
+	uint32_t size;     /* Size to read in bytes */
+} __ec_align4;
+
+/* Write persistent storage */
+#define EC_CMD_PSTORE_WRITE 0x0042
+
+struct ec_params_pstore_write {
+	uint32_t offset;   /* Byte offset to write */
+	uint32_t size;     /* Size to write in bytes */
+	uint8_t data[EC_PSTORE_SIZE_MAX];
+} __ec_align4;
+
+/*****************************************************************************/
+/* Real-time clock */
+
+/* RTC params and response structures */
+struct ec_params_rtc {
+	uint32_t time;
+} __ec_align4;
+
+struct ec_response_rtc {
+	uint32_t time;
+} __ec_align4;
+
+/* These use ec_response_rtc */
+#define EC_CMD_RTC_GET_VALUE 0x0044
+#define EC_CMD_RTC_GET_ALARM 0x0045
+
+/* These all use ec_params_rtc */
+#define EC_CMD_RTC_SET_VALUE 0x0046
+#define EC_CMD_RTC_SET_ALARM 0x0047
+
+/* Pass as time param to SET_ALARM to clear the current alarm */
+#define EC_RTC_ALARM_CLEAR 0
+
+/*****************************************************************************/
+/* Port80 log access */
+
+/* Maximum entries that can be read/written in a single command */
+#define EC_PORT80_SIZE_MAX 32
+
+/* Get last port80 code from previous boot */
+#define EC_CMD_PORT80_LAST_BOOT 0x0048
+#define EC_CMD_PORT80_READ 0x0048
+
+enum ec_port80_subcmd {
+	EC_PORT80_GET_INFO = 0,
+	EC_PORT80_READ_BUFFER,
+};
+
+struct ec_params_port80_read {
+	uint16_t subcmd;
+	union {
+		struct __ec_todo_unpacked {
+			uint32_t offset;
+			uint32_t num_entries;
+		} read_buffer;
+	};
+} __ec_todo_packed;
+
+struct ec_response_port80_read {
+	union {
+		struct __ec_todo_unpacked {
+			uint32_t writes;
+			uint32_t history_size;
+			uint32_t last_boot;
+		} get_info;
+		struct __ec_todo_unpacked {
+			uint16_t codes[EC_PORT80_SIZE_MAX];
+		} data;
+	};
+} __ec_todo_packed;
+
+struct ec_response_port80_last_boot {
+	uint16_t code;
+} __ec_align2;
+
+/*****************************************************************************/
+/* Temporary secure storage for host verified boot use */
+
+/* Number of bytes in a vstore slot */
+#define EC_VSTORE_SLOT_SIZE 64
+
+/* Maximum number of vstore slots */
+#define EC_VSTORE_SLOT_MAX 32
+
+/* Get persistent storage info */
+#define EC_CMD_VSTORE_INFO 0x0049
+struct ec_response_vstore_info {
+	/* Indicates which slots are locked */
+	uint32_t slot_locked;
+	/* Total number of slots available */
+	uint8_t slot_count;
+} __ec_align_size1;
+
+/*
+ * Read temporary secure storage
+ *
+ * Response is EC_VSTORE_SLOT_SIZE bytes of data.
+ */
+#define EC_CMD_VSTORE_READ 0x004A
+
+struct ec_params_vstore_read {
+	uint8_t slot; /* Slot to read from */
+} __ec_align1;
+
+struct ec_response_vstore_read {
+	uint8_t data[EC_VSTORE_SLOT_SIZE];
+} __ec_align1;
+
+/*
+ * Write temporary secure storage and lock it.
+ */
+#define EC_CMD_VSTORE_WRITE 0x004B
+
+struct ec_params_vstore_write {
+	uint8_t slot; /* Slot to write to */
+	uint8_t data[EC_VSTORE_SLOT_SIZE];
+} __ec_align1;
+
+/*****************************************************************************/
+/* Thermal engine commands. Note that there are two implementations. We'll
+ * reuse the command number, but the data and behavior is incompatible.
+ * Version 0 is what originally shipped on Link.
+ * Version 1 separates the CPU thermal limits from the fan control.
+ */
+
+#define EC_CMD_THERMAL_SET_THRESHOLD 0x0050
+#define EC_CMD_THERMAL_GET_THRESHOLD 0x0051
+
+/* The version 0 structs are opaque. You have to know what they are for
+ * the get/set commands to make any sense.
+ */
+
+/* Version 0 - set */
+struct ec_params_thermal_set_threshold {
+	uint8_t sensor_type;
+	uint8_t threshold_id;
+	uint16_t value;
+} __ec_align2;
+
+/* Version 0 - get */
+struct ec_params_thermal_get_threshold {
+	uint8_t sensor_type;
+	uint8_t threshold_id;
+} __ec_align1;
+
+struct ec_response_thermal_get_threshold {
+	uint16_t value;
+} __ec_align2;
+
+
+/* The version 1 structs are visible. */
+enum ec_temp_thresholds {
+	EC_TEMP_THRESH_WARN = 0,
+	EC_TEMP_THRESH_HIGH,
+	EC_TEMP_THRESH_HALT,
+
+	EC_TEMP_THRESH_COUNT
+};
+
+/*
+ * Thermal configuration for one temperature sensor. Temps are in degrees K.
+ * Zero values will be silently ignored by the thermal task.
+ *
+ * Set 'temp_host' value allows thermal task to trigger some event with 1 degree
+ * hysteresis.
+ * For example,
+ *	temp_host[EC_TEMP_THRESH_HIGH] = 300 K
+ *	temp_host_release[EC_TEMP_THRESH_HIGH] = 0 K
+ * EC will throttle ap when temperature >= 301 K, and release throttling when
+ * temperature <= 299 K.
+ *
+ * Set 'temp_host_release' value allows thermal task has a custom hysteresis.
+ * For example,
+ *	temp_host[EC_TEMP_THRESH_HIGH] = 300 K
+ *	temp_host_release[EC_TEMP_THRESH_HIGH] = 295 K
+ * EC will throttle ap when temperature >= 301 K, and release throttling when
+ * temperature <= 294 K.
+ *
+ * Note that this structure is a sub-structure of
+ * ec_params_thermal_set_threshold_v1, but maintains its alignment there.
+ */
+struct ec_thermal_config {
+	uint32_t temp_host[EC_TEMP_THRESH_COUNT]; /* levels of hotness */
+	uint32_t temp_host_release[EC_TEMP_THRESH_COUNT]; /* release levels */
+	uint32_t temp_fan_off;		/* no active cooling needed */
+	uint32_t temp_fan_max;		/* max active cooling needed */
+} __ec_align4;
+
+/* Version 1 - get config for one sensor. */
+struct ec_params_thermal_get_threshold_v1 {
+	uint32_t sensor_num;
+} __ec_align4;
+/* This returns a struct ec_thermal_config */
+
+/*
+ * Version 1 - set config for one sensor.
+ * Use read-modify-write for best results!
+ */
+struct ec_params_thermal_set_threshold_v1 {
+	uint32_t sensor_num;
+	struct ec_thermal_config cfg;
+} __ec_align4;
+/* This returns no data */
+
+/****************************************************************************/
+
+/* Toggle automatic fan control */
+#define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x0052
+
+/* Version 1 of input params */
+struct ec_params_auto_fan_ctrl_v1 {
+	uint8_t fan_idx;
+} __ec_align1;
+
+/* Get/Set TMP006 calibration data */
+#define EC_CMD_TMP006_GET_CALIBRATION 0x0053
+#define EC_CMD_TMP006_SET_CALIBRATION 0x0054
+
+/*
+ * The original TMP006 calibration only needed four params, but now we need
+ * more. Since the algorithm is nothing but magic numbers anyway, we'll leave
+ * the params opaque. The v1 "get" response will include the algorithm number
+ * and how many params it requires. That way we can change the EC code without
+ * needing to update this file. We can also use a different algorithm on each
+ * sensor.
+ */
+
+/* This is the same struct for both v0 and v1. */
+struct ec_params_tmp006_get_calibration {
+	uint8_t index;
+} __ec_align1;
+
+/* Version 0 */
+struct ec_response_tmp006_get_calibration_v0 {
+	float s0;
+	float b0;
+	float b1;
+	float b2;
+} __ec_align4;
+
+struct ec_params_tmp006_set_calibration_v0 {
+	uint8_t index;
+	uint8_t reserved[3];
+	float s0;
+	float b0;
+	float b1;
+	float b2;
+} __ec_align4;
+
+/* Version 1 */
+struct ec_response_tmp006_get_calibration_v1 {
+	uint8_t algorithm;
+	uint8_t num_params;
+	uint8_t reserved[2];
+	float val[0];
+} __ec_align4;
+
+struct ec_params_tmp006_set_calibration_v1 {
+	uint8_t index;
+	uint8_t algorithm;
+	uint8_t num_params;
+	uint8_t reserved;
+	float val[0];
+} __ec_align4;
+
+
+/* Read raw TMP006 data */
+#define EC_CMD_TMP006_GET_RAW 0x0055
+
+struct ec_params_tmp006_get_raw {
+	uint8_t index;
+} __ec_align1;
+
+struct ec_response_tmp006_get_raw {
+	int32_t t;  /* In 1/100 K */
+	int32_t v;  /* In nV */
+} __ec_align4;
+
+/*****************************************************************************/
+/* MKBP - Matrix KeyBoard Protocol */
+
+/*
+ * Read key state
+ *
+ * Returns raw data for keyboard cols; see ec_response_mkbp_info.cols for
+ * expected response size.
+ *
+ * NOTE: This has been superseded by EC_CMD_MKBP_GET_NEXT_EVENT.  If you wish
+ * to obtain the instantaneous state, use EC_CMD_MKBP_INFO with the type
+ * EC_MKBP_INFO_CURRENT and event EC_MKBP_EVENT_KEY_MATRIX.
+ */
+#define EC_CMD_MKBP_STATE 0x0060
+
+/*
+ * Provide information about various MKBP things.  See enum ec_mkbp_info_type.
+ */
+#define EC_CMD_MKBP_INFO 0x0061
+
+struct ec_response_mkbp_info {
+	uint32_t rows;
+	uint32_t cols;
+	/* Formerly "switches", which was 0. */
+	uint8_t reserved;
+} __ec_align_size1;
+
+struct ec_params_mkbp_info {
+	uint8_t info_type;
+	uint8_t event_type;
+} __ec_align1;
+
+enum ec_mkbp_info_type {
+	/*
+	 * Info about the keyboard matrix: number of rows and columns.
+	 *
+	 * Returns struct ec_response_mkbp_info.
+	 */
+	EC_MKBP_INFO_KBD = 0,
+
+	/*
+	 * For buttons and switches, info about which specifically are
+	 * supported.  event_type must be set to one of the values in enum
+	 * ec_mkbp_event.
+	 *
+	 * For EC_MKBP_EVENT_BUTTON and EC_MKBP_EVENT_SWITCH, returns a 4 byte
+	 * bitmask indicating which buttons or switches are present.  See the
+	 * bit inidices below.
+	 */
+	EC_MKBP_INFO_SUPPORTED = 1,
+
+	/*
+	 * Instantaneous state of buttons and switches.
+	 *
+	 * event_type must be set to one of the values in enum ec_mkbp_event.
+	 *
+	 * For EC_MKBP_EVENT_KEY_MATRIX, returns uint8_t key_matrix[13]
+	 * indicating the current state of the keyboard matrix.
+	 *
+	 * For EC_MKBP_EVENT_HOST_EVENT, return uint32_t host_event, the raw
+	 * event state.
+	 *
+	 * For EC_MKBP_EVENT_BUTTON, returns uint32_t buttons, indicating the
+	 * state of supported buttons.
+	 *
+	 * For EC_MKBP_EVENT_SWITCH, returns uint32_t switches, indicating the
+	 * state of supported switches.
+	 */
+	EC_MKBP_INFO_CURRENT = 2,
+};
+
+/* Simulate key press */
+#define EC_CMD_MKBP_SIMULATE_KEY 0x0062
+
+struct ec_params_mkbp_simulate_key {
+	uint8_t col;
+	uint8_t row;
+	uint8_t pressed;
+} __ec_align1;
+
+#define EC_CMD_GET_KEYBOARD_ID 0x0063
+
+struct ec_response_keyboard_id {
+	uint32_t keyboard_id;
+} __ec_align4;
+
+enum keyboard_id {
+	KEYBOARD_ID_UNSUPPORTED = 0,
+	KEYBOARD_ID_UNREADABLE = 0xffffffff,
+};
+
+/* Configure keyboard scanning */
+#define EC_CMD_MKBP_SET_CONFIG 0x0064
+#define EC_CMD_MKBP_GET_CONFIG 0x0065
+
+/* flags */
+enum mkbp_config_flags {
+	EC_MKBP_FLAGS_ENABLE = 1,	/* Enable keyboard scanning */
+};
+
+enum mkbp_config_valid {
+	EC_MKBP_VALID_SCAN_PERIOD		= BIT(0),
+	EC_MKBP_VALID_POLL_TIMEOUT		= BIT(1),
+	EC_MKBP_VALID_MIN_POST_SCAN_DELAY	= BIT(3),
+	EC_MKBP_VALID_OUTPUT_SETTLE		= BIT(4),
+	EC_MKBP_VALID_DEBOUNCE_DOWN		= BIT(5),
+	EC_MKBP_VALID_DEBOUNCE_UP		= BIT(6),
+	EC_MKBP_VALID_FIFO_MAX_DEPTH		= BIT(7),
+};
+
+/*
+ * Configuration for our key scanning algorithm.
+ *
+ * Note that this is used as a sub-structure of
+ * ec_{params/response}_mkbp_get_config.
+ */
+struct ec_mkbp_config {
+	uint32_t valid_mask;		/* valid fields */
+	uint8_t flags;		/* some flags (enum mkbp_config_flags) */
+	uint8_t valid_flags;		/* which flags are valid */
+	uint16_t scan_period_us;	/* period between start of scans */
+	/* revert to interrupt mode after no activity for this long */
+	uint32_t poll_timeout_us;
+	/*
+	 * minimum post-scan relax time. Once we finish a scan we check
+	 * the time until we are due to start the next one. If this time is
+	 * shorter this field, we use this instead.
+	 */
+	uint16_t min_post_scan_delay_us;
+	/* delay between setting up output and waiting for it to settle */
+	uint16_t output_settle_us;
+	uint16_t debounce_down_us;	/* time for debounce on key down */
+	uint16_t debounce_up_us;	/* time for debounce on key up */
+	/* maximum depth to allow for fifo (0 = no keyscan output) */
+	uint8_t fifo_max_depth;
+} __ec_align_size1;
+
+struct ec_params_mkbp_set_config {
+	struct ec_mkbp_config config;
+} __ec_align_size1;
+
+struct ec_response_mkbp_get_config {
+	struct ec_mkbp_config config;
+} __ec_align_size1;
+
+/* Run the key scan emulation */
+#define EC_CMD_KEYSCAN_SEQ_CTRL 0x0066
+
+enum ec_keyscan_seq_cmd {
+	EC_KEYSCAN_SEQ_STATUS = 0,	/* Get status information */
+	EC_KEYSCAN_SEQ_CLEAR = 1,	/* Clear sequence */
+	EC_KEYSCAN_SEQ_ADD = 2,		/* Add item to sequence */
+	EC_KEYSCAN_SEQ_START = 3,	/* Start running sequence */
+	EC_KEYSCAN_SEQ_COLLECT = 4,	/* Collect sequence summary data */
+};
+
+enum ec_collect_flags {
+	/*
+	 * Indicates this scan was processed by the EC. Due to timing, some
+	 * scans may be skipped.
+	 */
+	EC_KEYSCAN_SEQ_FLAG_DONE	= BIT(0),
+};
+
+struct ec_collect_item {
+	uint8_t flags;		/* some flags (enum ec_collect_flags) */
+} __ec_align1;
+
+struct ec_params_keyscan_seq_ctrl {
+	uint8_t cmd;	/* Command to send (enum ec_keyscan_seq_cmd) */
+	union {
+		struct __ec_align1 {
+			uint8_t active;		/* still active */
+			uint8_t num_items;	/* number of items */
+			/* Current item being presented */
+			uint8_t cur_item;
+		} status;
+		struct __ec_todo_unpacked {
+			/*
+			 * Absolute time for this scan, measured from the
+			 * start of the sequence.
+			 */
+			uint32_t time_us;
+			uint8_t scan[0];	/* keyscan data */
+		} add;
+		struct __ec_align1 {
+			uint8_t start_item;	/* First item to return */
+			uint8_t num_items;	/* Number of items to return */
+		} collect;
+	};
+} __ec_todo_packed;
+
+struct ec_result_keyscan_seq_ctrl {
+	union {
+		struct __ec_todo_unpacked {
+			uint8_t num_items;	/* Number of items */
+			/* Data for each item */
+			struct ec_collect_item item[0];
+		} collect;
+	};
+} __ec_todo_packed;
+
+/*
+ * Get the next pending MKBP event.
+ *
+ * Returns EC_RES_UNAVAILABLE if there is no event pending.
+ */
+#define EC_CMD_GET_NEXT_EVENT 0x0067
+
+#define EC_MKBP_HAS_MORE_EVENTS_SHIFT 7
+
+/*
+ * We use the most significant bit of the event type to indicate to the host
+ * that the EC has more MKBP events available to provide.
+ */
+#define EC_MKBP_HAS_MORE_EVENTS BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT)
+
+/* The mask to apply to get the raw event type */
+#define EC_MKBP_EVENT_TYPE_MASK (BIT(EC_MKBP_HAS_MORE_EVENTS_SHIFT) - 1)
+
+enum ec_mkbp_event {
+	/* Keyboard matrix changed. The event data is the new matrix state. */
+	EC_MKBP_EVENT_KEY_MATRIX = 0,
+
+	/* New host event. The event data is 4 bytes of host event flags. */
+	EC_MKBP_EVENT_HOST_EVENT = 1,
+
+	/* New Sensor FIFO data. The event data is fifo_info structure. */
+	EC_MKBP_EVENT_SENSOR_FIFO = 2,
+
+	/* The state of the non-matrixed buttons have changed. */
+	EC_MKBP_EVENT_BUTTON = 3,
+
+	/* The state of the switches have changed. */
+	EC_MKBP_EVENT_SWITCH = 4,
+
+	/* New Fingerprint sensor event, the event data is fp_events bitmap. */
+	EC_MKBP_EVENT_FINGERPRINT = 5,
+
+	/*
+	 * Sysrq event: send emulated sysrq. The event data is sysrq,
+	 * corresponding to the key to be pressed.
+	 */
+	EC_MKBP_EVENT_SYSRQ = 6,
+
+	/*
+	 * New 64-bit host event.
+	 * The event data is 8 bytes of host event flags.
+	 */
+	EC_MKBP_EVENT_HOST_EVENT64 = 7,
+
+	/* Notify the AP that something happened on CEC */
+	EC_MKBP_EVENT_CEC_EVENT = 8,
+
+	/* Send an incoming CEC message to the AP */
+	EC_MKBP_EVENT_CEC_MESSAGE = 9,
+
+	/* Number of MKBP events */
+	EC_MKBP_EVENT_COUNT,
+};
+BUILD_ASSERT(EC_MKBP_EVENT_COUNT <= EC_MKBP_EVENT_TYPE_MASK);
+
+union __ec_align_offset1 ec_response_get_next_data {
+	uint8_t key_matrix[13];
+
+	/* Unaligned */
+	uint32_t host_event;
+	uint64_t host_event64;
+
+	struct __ec_todo_unpacked {
+		/* For aligning the fifo_info */
+		uint8_t reserved[3];
+		struct ec_response_motion_sense_fifo_info info;
+	} sensor_fifo;
+
+	uint32_t buttons;
+
+	uint32_t switches;
+
+	uint32_t fp_events;
+
+	uint32_t sysrq;
+
+	/* CEC events from enum mkbp_cec_event */
+	uint32_t cec_events;
+};
+
+union __ec_align_offset1 ec_response_get_next_data_v1 {
+	uint8_t key_matrix[16];
+
+	/* Unaligned */
+	uint32_t host_event;
+	uint64_t host_event64;
+
+	struct __ec_todo_unpacked {
+		/* For aligning the fifo_info */
+		uint8_t reserved[3];
+		struct ec_response_motion_sense_fifo_info info;
+	} sensor_fifo;
+
+	uint32_t buttons;
+
+	uint32_t switches;
+
+	uint32_t fp_events;
+
+	uint32_t sysrq;
+
+	/* CEC events from enum mkbp_cec_event */
+	uint32_t cec_events;
+
+	uint8_t cec_message[16];
+};
+BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16);
+
+struct ec_response_get_next_event {
+	uint8_t event_type;
+	/* Followed by event data if any */
+	union ec_response_get_next_data data;
+} __ec_align1;
+
+struct ec_response_get_next_event_v1 {
+	uint8_t event_type;
+	/* Followed by event data if any */
+	union ec_response_get_next_data_v1 data;
+} __ec_align1;
+
+/* Bit indices for buttons and switches.*/
+/* Buttons */
+#define EC_MKBP_POWER_BUTTON	0
+#define EC_MKBP_VOL_UP		1
+#define EC_MKBP_VOL_DOWN	2
+#define EC_MKBP_RECOVERY	3
+
+/* Switches */
+#define EC_MKBP_LID_OPEN	0
+#define EC_MKBP_TABLET_MODE	1
+#define EC_MKBP_BASE_ATTACHED	2
+
+/* Run keyboard factory test scanning */
+#define EC_CMD_KEYBOARD_FACTORY_TEST 0x0068
+
+struct ec_response_keyboard_factory_test {
+	uint16_t shorted;	/* Keyboard pins are shorted */
+} __ec_align2;
+
+/* Fingerprint events in 'fp_events' for EC_MKBP_EVENT_FINGERPRINT */
+#define EC_MKBP_FP_RAW_EVENT(fp_events) ((fp_events) & 0x00FFFFFF)
+#define EC_MKBP_FP_ERRCODE(fp_events)   ((fp_events) & 0x0000000F)
+#define EC_MKBP_FP_ENROLL_PROGRESS_OFFSET 4
+#define EC_MKBP_FP_ENROLL_PROGRESS(fpe) (((fpe) & 0x00000FF0) \
+					 >> EC_MKBP_FP_ENROLL_PROGRESS_OFFSET)
+#define EC_MKBP_FP_MATCH_IDX_OFFSET 12
+#define EC_MKBP_FP_MATCH_IDX_MASK 0x0000F000
+#define EC_MKBP_FP_MATCH_IDX(fpe) (((fpe) & EC_MKBP_FP_MATCH_IDX_MASK) \
+					 >> EC_MKBP_FP_MATCH_IDX_OFFSET)
+#define EC_MKBP_FP_ENROLL               BIT(27)
+#define EC_MKBP_FP_MATCH                BIT(28)
+#define EC_MKBP_FP_FINGER_DOWN          BIT(29)
+#define EC_MKBP_FP_FINGER_UP            BIT(30)
+#define EC_MKBP_FP_IMAGE_READY          BIT(31)
+/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_ENROLL is set */
+#define EC_MKBP_FP_ERR_ENROLL_OK               0
+#define EC_MKBP_FP_ERR_ENROLL_LOW_QUALITY      1
+#define EC_MKBP_FP_ERR_ENROLL_IMMOBILE         2
+#define EC_MKBP_FP_ERR_ENROLL_LOW_COVERAGE     3
+#define EC_MKBP_FP_ERR_ENROLL_INTERNAL         5
+/* Can be used to detect if image was usable for enrollment or not. */
+#define EC_MKBP_FP_ERR_ENROLL_PROBLEM_MASK     1
+/* code given by EC_MKBP_FP_ERRCODE() when EC_MKBP_FP_MATCH is set */
+#define EC_MKBP_FP_ERR_MATCH_NO                0
+#define EC_MKBP_FP_ERR_MATCH_NO_INTERNAL       6
+#define EC_MKBP_FP_ERR_MATCH_NO_TEMPLATES      7
+#define EC_MKBP_FP_ERR_MATCH_NO_LOW_QUALITY    2
+#define EC_MKBP_FP_ERR_MATCH_NO_LOW_COVERAGE   4
+#define EC_MKBP_FP_ERR_MATCH_YES               1
+#define EC_MKBP_FP_ERR_MATCH_YES_UPDATED       3
+#define EC_MKBP_FP_ERR_MATCH_YES_UPDATE_FAILED 5
+
+
+/*****************************************************************************/
+/* Temperature sensor commands */
+
+/* Read temperature sensor info */
+#define EC_CMD_TEMP_SENSOR_GET_INFO 0x0070
+
+struct ec_params_temp_sensor_get_info {
+	uint8_t id;
+} __ec_align1;
+
+struct ec_response_temp_sensor_get_info {
+	char sensor_name[32];
+	uint8_t sensor_type;
+} __ec_align1;
+
+/*****************************************************************************/
+
+/*
+ * Note: host commands 0x80 - 0x87 are reserved to avoid conflict with ACPI
+ * commands accidentally sent to the wrong interface.  See the ACPI section
+ * below.
+ */
+
+/*****************************************************************************/
+/* Host event commands */
+
+
+/* Obsolete. New implementation should use EC_CMD_HOST_EVENT instead */
+/*
+ * Host event mask params and response structures, shared by all of the host
+ * event commands below.
+ */
+struct ec_params_host_event_mask {
+	uint32_t mask;
+} __ec_align4;
+
+struct ec_response_host_event_mask {
+	uint32_t mask;
+} __ec_align4;
+
+/* These all use ec_response_host_event_mask */
+#define EC_CMD_HOST_EVENT_GET_B         0x0087
+#define EC_CMD_HOST_EVENT_GET_SMI_MASK  0x0088
+#define EC_CMD_HOST_EVENT_GET_SCI_MASK  0x0089
+#define EC_CMD_HOST_EVENT_GET_WAKE_MASK 0x008D
+
+/* These all use ec_params_host_event_mask */
+#define EC_CMD_HOST_EVENT_SET_SMI_MASK  0x008A
+#define EC_CMD_HOST_EVENT_SET_SCI_MASK  0x008B
+#define EC_CMD_HOST_EVENT_CLEAR         0x008C
+#define EC_CMD_HOST_EVENT_SET_WAKE_MASK 0x008E
+#define EC_CMD_HOST_EVENT_CLEAR_B       0x008F
+
+/*
+ * Unified host event programming interface - Should be used by newer versions
+ * of BIOS/OS to program host events and masks
+ */
+
+struct ec_params_host_event {
+
+	/* Action requested by host - one of enum ec_host_event_action. */
+	uint8_t action;
+
+	/*
+	 * Mask type that the host requested the action on - one of
+	 * enum ec_host_event_mask_type.
+	 */
+	uint8_t mask_type;
+
+	/* Set to 0, ignore on read */
+	uint16_t reserved;
+
+	/* Value to be used in case of set operations. */
+	uint64_t value;
+} __ec_align4;
+
+/*
+ * Response structure returned by EC_CMD_HOST_EVENT.
+ * Update the value on a GET request. Set to 0 on GET/CLEAR
+ */
+
+struct ec_response_host_event {
+
+	/* Mask value in case of get operation */
+	uint64_t value;
+} __ec_align4;
+
+enum ec_host_event_action {
+	/*
+	 * params.value is ignored. Value of mask_type populated
+	 * in response.value
+	 */
+	EC_HOST_EVENT_GET,
+
+	/* Bits in params.value are set */
+	EC_HOST_EVENT_SET,
+
+	/* Bits in params.value are cleared */
+	EC_HOST_EVENT_CLEAR,
+};
+
+enum ec_host_event_mask_type {
+
+	/* Main host event copy */
+	EC_HOST_EVENT_MAIN,
+
+	/* Copy B of host events */
+	EC_HOST_EVENT_B,
+
+	/* SCI Mask */
+	EC_HOST_EVENT_SCI_MASK,
+
+	/* SMI Mask */
+	EC_HOST_EVENT_SMI_MASK,
+
+	/* Mask of events that should be always reported in hostevents */
+	EC_HOST_EVENT_ALWAYS_REPORT_MASK,
+
+	/* Active wake mask */
+	EC_HOST_EVENT_ACTIVE_WAKE_MASK,
+
+	/* Lazy wake mask for S0ix */
+	EC_HOST_EVENT_LAZY_WAKE_MASK_S0IX,
+
+	/* Lazy wake mask for S3 */
+	EC_HOST_EVENT_LAZY_WAKE_MASK_S3,
+
+	/* Lazy wake mask for S5 */
+	EC_HOST_EVENT_LAZY_WAKE_MASK_S5,
+};
+
+#define EC_CMD_HOST_EVENT       0x00A4
+
+/*****************************************************************************/
+/* Switch commands */
+
+/* Enable/disable LCD backlight */
+#define EC_CMD_SWITCH_ENABLE_BKLIGHT 0x0090
+
+struct ec_params_switch_enable_backlight {
+	uint8_t enabled;
+} __ec_align1;
+
+/* Enable/disable WLAN/Bluetooth */
+#define EC_CMD_SWITCH_ENABLE_WIRELESS 0x0091
+#define EC_VER_SWITCH_ENABLE_WIRELESS 1
+
+/* Version 0 params; no response */
+struct ec_params_switch_enable_wireless_v0 {
+	uint8_t enabled;
+} __ec_align1;
+
+/* Version 1 params */
+struct ec_params_switch_enable_wireless_v1 {
+	/* Flags to enable now */
+	uint8_t now_flags;
+
+	/* Which flags to copy from now_flags */
+	uint8_t now_mask;
+
+	/*
+	 * Flags to leave enabled in S3, if they're on at the S0->S3
+	 * transition.  (Other flags will be disabled by the S0->S3
+	 * transition.)
+	 */
+	uint8_t suspend_flags;
+
+	/* Which flags to copy from suspend_flags */
+	uint8_t suspend_mask;
+} __ec_align1;
+
+/* Version 1 response */
+struct ec_response_switch_enable_wireless_v1 {
+	/* Flags to enable now */
+	uint8_t now_flags;
+
+	/* Flags to leave enabled in S3 */
+	uint8_t suspend_flags;
+} __ec_align1;
+
+/*****************************************************************************/
+/* GPIO commands. Only available on EC if write protect has been disabled. */
+
+/* Set GPIO output value */
+#define EC_CMD_GPIO_SET 0x0092
+
+struct ec_params_gpio_set {
+	char name[32];
+	uint8_t val;
+} __ec_align1;
+
+/* Get GPIO value */
+#define EC_CMD_GPIO_GET 0x0093
+
+/* Version 0 of input params and response */
+struct ec_params_gpio_get {
+	char name[32];
+} __ec_align1;
+
+struct ec_response_gpio_get {
+	uint8_t val;
+} __ec_align1;
+
+/* Version 1 of input params and response */
+struct ec_params_gpio_get_v1 {
+	uint8_t subcmd;
+	union {
+		struct __ec_align1 {
+			char name[32];
+		} get_value_by_name;
+		struct __ec_align1 {
+			uint8_t index;
+		} get_info;
+	};
+} __ec_align1;
+
+struct ec_response_gpio_get_v1 {
+	union {
+		struct __ec_align1 {
+			uint8_t val;
+		} get_value_by_name, get_count;
+		struct __ec_todo_unpacked {
+			uint8_t val;
+			char name[32];
+			uint32_t flags;
+		} get_info;
+	};
+} __ec_todo_packed;
+
+enum gpio_get_subcmd {
+	EC_GPIO_GET_BY_NAME = 0,
+	EC_GPIO_GET_COUNT = 1,
+	EC_GPIO_GET_INFO = 2,
+};
+
+/*****************************************************************************/
+/* I2C commands. Only available when flash write protect is unlocked. */
+
+/*
+ * CAUTION: These commands are deprecated, and are not supported anymore in EC
+ * builds >= 8398.0.0 (see crosbug.com/p/23570).
+ *
+ * Use EC_CMD_I2C_PASSTHRU instead.
+ */
+
+/* Read I2C bus */
+#define EC_CMD_I2C_READ 0x0094
+
+struct ec_params_i2c_read {
+	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
+	uint8_t read_size; /* Either 8 or 16. */
+	uint8_t port;
+	uint8_t offset;
+} __ec_align_size1;
+
+struct ec_response_i2c_read {
+	uint16_t data;
+} __ec_align2;
+
+/* Write I2C bus */
+#define EC_CMD_I2C_WRITE 0x0095
+
+struct ec_params_i2c_write {
+	uint16_t data;
+	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
+	uint8_t write_size; /* Either 8 or 16. */
+	uint8_t port;
+	uint8_t offset;
+} __ec_align_size1;
+
+/*****************************************************************************/
+/* Charge state commands. Only available when flash write protect unlocked. */
+
+/* Force charge state machine to stop charging the battery or force it to
+ * discharge the battery.
+ */
+#define EC_CMD_CHARGE_CONTROL 0x0096
+#define EC_VER_CHARGE_CONTROL 1
+
+enum ec_charge_control_mode {
+	CHARGE_CONTROL_NORMAL = 0,
+	CHARGE_CONTROL_IDLE,
+	CHARGE_CONTROL_DISCHARGE,
+};
+
+struct ec_params_charge_control {
+	uint32_t mode;  /* enum charge_control_mode */
+} __ec_align4;
+
+/*****************************************************************************/
+
+/* Snapshot console output buffer for use by EC_CMD_CONSOLE_READ. */
+#define EC_CMD_CONSOLE_SNAPSHOT 0x0097
+
+/*
+ * Read data from the saved snapshot. If the subcmd parameter is
+ * CONSOLE_READ_NEXT, this will return data starting from the beginning of
+ * the latest snapshot. If it is CONSOLE_READ_RECENT, it will start from the
+ * end of the previous snapshot.
+ *
+ * The params are only looked at in version >= 1 of this command. Prior
+ * versions will just default to CONSOLE_READ_NEXT behavior.
+ *
+ * Response is null-terminated string.  Empty string, if there is no more
+ * remaining output.
+ */
+#define EC_CMD_CONSOLE_READ 0x0098
+
+enum ec_console_read_subcmd {
+	CONSOLE_READ_NEXT = 0,
+	CONSOLE_READ_RECENT
+};
+
+struct ec_params_console_read_v1 {
+	uint8_t subcmd; /* enum ec_console_read_subcmd */
+} __ec_align1;
+
+/*****************************************************************************/
+
+/*
+ * Cut off battery power immediately or after the host has shut down.
+ *
+ * return EC_RES_INVALID_COMMAND if unsupported by a board/battery.
+ *	  EC_RES_SUCCESS if the command was successful.
+ *	  EC_RES_ERROR if the cut off command failed.
+ */
+#define EC_CMD_BATTERY_CUT_OFF 0x0099
+
+#define EC_BATTERY_CUTOFF_FLAG_AT_SHUTDOWN	BIT(0)
+
+struct ec_params_battery_cutoff {
+	uint8_t flags;
+} __ec_align1;
+
+/*****************************************************************************/
+/* USB port mux control. */
+
+/*
+ * Switch USB mux or return to automatic switching.
+ */
+#define EC_CMD_USB_MUX 0x009A
+
+struct ec_params_usb_mux {
+	uint8_t mux;
+} __ec_align1;
+
+/*****************************************************************************/
+/* LDOs / FETs control. */
+
+enum ec_ldo_state {
+	EC_LDO_STATE_OFF = 0,	/* the LDO / FET is shut down */
+	EC_LDO_STATE_ON = 1,	/* the LDO / FET is ON / providing power */
+};
+
+/*
+ * Switch on/off a LDO.
+ */
+#define EC_CMD_LDO_SET 0x009B
+
+struct ec_params_ldo_set {
+	uint8_t index;
+	uint8_t state;
+} __ec_align1;
+
+/*
+ * Get LDO state.
+ */
+#define EC_CMD_LDO_GET 0x009C
+
+struct ec_params_ldo_get {
+	uint8_t index;
+} __ec_align1;
+
+struct ec_response_ldo_get {
+	uint8_t state;
+} __ec_align1;
+
+/*****************************************************************************/
+/* Power info. */
+
+/*
+ * Get power info.
+ */
+#define EC_CMD_POWER_INFO 0x009D
+
+struct ec_response_power_info {
+	uint32_t usb_dev_type;
+	uint16_t voltage_ac;
+	uint16_t voltage_system;
+	uint16_t current_system;
+	uint16_t usb_current_limit;
+} __ec_align4;
+
+/*****************************************************************************/
+/* I2C passthru command */
+
+#define EC_CMD_I2C_PASSTHRU 0x009E
+
+/* Read data; if not present, message is a write */
+#define EC_I2C_FLAG_READ	BIT(15)
+
+/* Mask for address */
+#define EC_I2C_ADDR_MASK	0x3ff
+
+#define EC_I2C_STATUS_NAK	BIT(0) /* Transfer was not acknowledged */
+#define EC_I2C_STATUS_TIMEOUT	BIT(1) /* Timeout during transfer */
+
+/* Any error */
+#define EC_I2C_STATUS_ERROR	(EC_I2C_STATUS_NAK | EC_I2C_STATUS_TIMEOUT)
+
+struct ec_params_i2c_passthru_msg {
+	uint16_t addr_flags;	/* I2C slave address (7 or 10 bits) and flags */
+	uint16_t len;		/* Number of bytes to read or write */
+} __ec_align2;
+
+struct ec_params_i2c_passthru {
+	uint8_t port;		/* I2C port number */
+	uint8_t num_msgs;	/* Number of messages */
+	struct ec_params_i2c_passthru_msg msg[];
+	/* Data to write for all messages is concatenated here */
+} __ec_align2;
+
+struct ec_response_i2c_passthru {
+	uint8_t i2c_status;	/* Status flags (EC_I2C_STATUS_...) */
+	uint8_t num_msgs;	/* Number of messages processed */
+	uint8_t data[];		/* Data read by messages concatenated here */
+} __ec_align1;
+
+/*****************************************************************************/
+/* Power button hang detect */
+
+#define EC_CMD_HANG_DETECT 0x009F
+
+/* Reasons to start hang detection timer */
+/* Power button pressed */
+#define EC_HANG_START_ON_POWER_PRESS  BIT(0)
+
+/* Lid closed */
+#define EC_HANG_START_ON_LID_CLOSE    BIT(1)
+
+ /* Lid opened */
+#define EC_HANG_START_ON_LID_OPEN     BIT(2)
+
+/* Start of AP S3->S0 transition (booting or resuming from suspend) */
+#define EC_HANG_START_ON_RESUME       BIT(3)
+
+/* Reasons to cancel hang detection */
+
+/* Power button released */
+#define EC_HANG_STOP_ON_POWER_RELEASE BIT(8)
+
+/* Any host command from AP received */
+#define EC_HANG_STOP_ON_HOST_COMMAND  BIT(9)
+
+/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
+#define EC_HANG_STOP_ON_SUSPEND       BIT(10)
+
+/*
+ * If this flag is set, all the other fields are ignored, and the hang detect
+ * timer is started.  This provides the AP a way to start the hang timer
+ * without reconfiguring any of the other hang detect settings.  Note that
+ * you must previously have configured the timeouts.
+ */
+#define EC_HANG_START_NOW             BIT(30)
+
+/*
+ * If this flag is set, all the other fields are ignored (including
+ * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
+ * without reconfiguring any of the other hang detect settings.
+ */
+#define EC_HANG_STOP_NOW              BIT(31)
+
+struct ec_params_hang_detect {
+	/* Flags; see EC_HANG_* */
+	uint32_t flags;
+
+	/* Timeout in msec before generating host event, if enabled */
+	uint16_t host_event_timeout_msec;
+
+	/* Timeout in msec before generating warm reboot, if enabled */
+	uint16_t warm_reboot_timeout_msec;
+} __ec_align4;
+
+/*****************************************************************************/
+/* Commands for battery charging */
+
+/*
+ * This is the single catch-all host command to exchange data regarding the
+ * charge state machine (v2 and up).
+ */
+#define EC_CMD_CHARGE_STATE 0x00A0
+
+/* Subcommands for this host command */
+enum charge_state_command {
+	CHARGE_STATE_CMD_GET_STATE,
+	CHARGE_STATE_CMD_GET_PARAM,
+	CHARGE_STATE_CMD_SET_PARAM,
+	CHARGE_STATE_NUM_CMDS
+};
+
+/*
+ * Known param numbers are defined here. Ranges are reserved for board-specific
+ * params, which are handled by the particular implementations.
+ */
+enum charge_state_params {
+	CS_PARAM_CHG_VOLTAGE,	      /* charger voltage limit */
+	CS_PARAM_CHG_CURRENT,	      /* charger current limit */
+	CS_PARAM_CHG_INPUT_CURRENT,   /* charger input current limit */
+	CS_PARAM_CHG_STATUS,	      /* charger-specific status */
+	CS_PARAM_CHG_OPTION,	      /* charger-specific options */
+	CS_PARAM_LIMIT_POWER,	      /*
+				       * Check if power is limited due to
+				       * low battery and / or a weak external
+				       * charger. READ ONLY.
+				       */
+	/* How many so far? */
+	CS_NUM_BASE_PARAMS,
+
+	/* Range for CONFIG_CHARGER_PROFILE_OVERRIDE params */
+	CS_PARAM_CUSTOM_PROFILE_MIN = 0x10000,
+	CS_PARAM_CUSTOM_PROFILE_MAX = 0x1ffff,
+
+	/* Range for CONFIG_CHARGE_STATE_DEBUG params */
+	CS_PARAM_DEBUG_MIN = 0x20000,
+	CS_PARAM_DEBUG_CTL_MODE = 0x20000,
+	CS_PARAM_DEBUG_MANUAL_MODE,
+	CS_PARAM_DEBUG_SEEMS_DEAD,
+	CS_PARAM_DEBUG_SEEMS_DISCONNECTED,
+	CS_PARAM_DEBUG_BATT_REMOVED,
+	CS_PARAM_DEBUG_MANUAL_CURRENT,
+	CS_PARAM_DEBUG_MANUAL_VOLTAGE,
+	CS_PARAM_DEBUG_MAX = 0x2ffff,
+
+	/* Other custom param ranges go here... */
+};
+
+struct ec_params_charge_state {
+	uint8_t cmd;				/* enum charge_state_command */
+	union {
+		/* get_state has no args */
+
+		struct __ec_todo_unpacked {
+			uint32_t param;		/* enum charge_state_param */
+		} get_param;
+
+		struct __ec_todo_unpacked {
+			uint32_t param;		/* param to set */
+			uint32_t value;		/* value to set */
+		} set_param;
+	};
+} __ec_todo_packed;
+
+struct ec_response_charge_state {
+	union {
+		struct __ec_align4 {
+			int ac;
+			int chg_voltage;
+			int chg_current;
+			int chg_input_current;
+			int batt_state_of_charge;
+		} get_state;
+
+		struct __ec_align4 {
+			uint32_t value;
+		} get_param;
+
+		/* set_param returns no args */
+	};
+} __ec_align4;
+
+
+/*
+ * Set maximum battery charging current.
+ */
+#define EC_CMD_CHARGE_CURRENT_LIMIT 0x00A1
+
+struct ec_params_current_limit {
+	uint32_t limit; /* in mA */
+} __ec_align4;
+
+/*
+ * Set maximum external voltage / current.
+ */
+#define EC_CMD_EXTERNAL_POWER_LIMIT 0x00A2
+
+/* Command v0 is used only on Spring and is obsolete + unsupported */
+struct ec_params_external_power_limit_v1 {
+	uint16_t current_lim; /* in mA, or EC_POWER_LIMIT_NONE to clear limit */
+	uint16_t voltage_lim; /* in mV, or EC_POWER_LIMIT_NONE to clear limit */
+} __ec_align2;
+
+#define EC_POWER_LIMIT_NONE 0xffff
+
+/*
+ * Set maximum voltage & current of a dedicated charge port
+ */
+#define EC_CMD_OVERRIDE_DEDICATED_CHARGER_LIMIT 0x00A3
+
+struct ec_params_dedicated_charger_limit {
+	uint16_t current_lim; /* in mA */
+	uint16_t voltage_lim; /* in mV */
+} __ec_align2;
+
+/*****************************************************************************/
+/* Hibernate/Deep Sleep Commands */
+
+/* Set the delay before going into hibernation. */
+#define EC_CMD_HIBERNATION_DELAY 0x00A8
+
+struct ec_params_hibernation_delay {
+	/*
+	 * Seconds to wait in G3 before hibernate.  Pass in 0 to read the
+	 * current settings without changing them.
+	 */
+	uint32_t seconds;
+} __ec_align4;
+
+struct ec_response_hibernation_delay {
+	/*
+	 * The current time in seconds in which the system has been in the G3
+	 * state.  This value is reset if the EC transitions out of G3.
+	 */
+	uint32_t time_g3;
+
+	/*
+	 * The current time remaining in seconds until the EC should hibernate.
+	 * This value is also reset if the EC transitions out of G3.
+	 */
+	uint32_t time_remaining;
+
+	/*
+	 * The current time in seconds that the EC should wait in G3 before
+	 * hibernating.
+	 */
+	uint32_t hibernate_delay;
+} __ec_align4;
+
+/* Inform the EC when entering a sleep state */
+#define EC_CMD_HOST_SLEEP_EVENT 0x00A9
+
+enum host_sleep_event {
+	HOST_SLEEP_EVENT_S3_SUSPEND   = 1,
+	HOST_SLEEP_EVENT_S3_RESUME    = 2,
+	HOST_SLEEP_EVENT_S0IX_SUSPEND = 3,
+	HOST_SLEEP_EVENT_S0IX_RESUME  = 4,
+	/* S3 suspend with additional enabled wake sources */
+	HOST_SLEEP_EVENT_S3_WAKEABLE_SUSPEND = 5,
+};
+
+struct ec_params_host_sleep_event {
+	uint8_t sleep_event;
+} __ec_align1;
+
+/*
+ * Use a default timeout value (CONFIG_SLEEP_TIMEOUT_MS) for detecting sleep
+ * transition failures
+ */
+#define EC_HOST_SLEEP_TIMEOUT_DEFAULT 0
+
+/* Disable timeout detection for this sleep transition */
+#define EC_HOST_SLEEP_TIMEOUT_INFINITE 0xFFFF
+
+struct ec_params_host_sleep_event_v1 {
+	/* The type of sleep being entered or exited. */
+	uint8_t sleep_event;
+
+	/* Padding */
+	uint8_t reserved;
+	union {
+		/* Parameters that apply for suspend messages. */
+		struct {
+			/*
+			 * The timeout in milliseconds between when this message
+			 * is received and when the EC will declare sleep
+			 * transition failure if the sleep signal is not
+			 * asserted.
+			 */
+			uint16_t sleep_timeout_ms;
+		} suspend_params;
+
+		/* No parameters for non-suspend messages. */
+	};
+} __ec_align2;
+
+/* A timeout occurred when this bit is set */
+#define EC_HOST_RESUME_SLEEP_TIMEOUT 0x80000000
+
+/*
+ * The mask defining which bits correspond to the number of sleep transitions,
+ * as well as the maximum number of suspend line transitions that will be
+ * reported back to the host.
+ */
+#define EC_HOST_RESUME_SLEEP_TRANSITIONS_MASK 0x7FFFFFFF
+
+struct ec_response_host_sleep_event_v1 {
+	union {
+		/* Response fields that apply for resume messages. */
+		struct {
+			/*
+			 * The number of sleep power signal transitions that
+			 * occurred since the suspend message. The high bit
+			 * indicates a timeout occurred.
+			 */
+			uint32_t sleep_transitions;
+		} resume_response;
+
+		/* No response fields for non-resume messages. */
+	};
+} __ec_align4;
+
+/*****************************************************************************/
+/* Device events */
+#define EC_CMD_DEVICE_EVENT 0x00AA
+
+enum ec_device_event {
+	EC_DEVICE_EVENT_TRACKPAD,
+	EC_DEVICE_EVENT_DSP,
+	EC_DEVICE_EVENT_WIFI,
+};
+
+enum ec_device_event_param {
+	/* Get and clear pending device events */
+	EC_DEVICE_EVENT_PARAM_GET_CURRENT_EVENTS,
+	/* Get device event mask */
+	EC_DEVICE_EVENT_PARAM_GET_ENABLED_EVENTS,
+	/* Set device event mask */
+	EC_DEVICE_EVENT_PARAM_SET_ENABLED_EVENTS,
+};
+
+#define EC_DEVICE_EVENT_MASK(event_code) BIT(event_code % 32)
+
+struct ec_params_device_event {
+	uint32_t event_mask;
+	uint8_t param;
+} __ec_align_size1;
+
+struct ec_response_device_event {
+	uint32_t event_mask;
+} __ec_align4;
+
+/*****************************************************************************/
+/* Smart battery pass-through */
+
+/* Get / Set 16-bit smart battery registers */
+#define EC_CMD_SB_READ_WORD   0x00B0
+#define EC_CMD_SB_WRITE_WORD  0x00B1
+
+/* Get / Set string smart battery parameters
+ * formatted as SMBUS "block".
+ */
+#define EC_CMD_SB_READ_BLOCK  0x00B2
+#define EC_CMD_SB_WRITE_BLOCK 0x00B3
+
+struct ec_params_sb_rd {
+	uint8_t reg;
+} __ec_align1;
+
+struct ec_response_sb_rd_word {
+	uint16_t value;
+} __ec_align2;
+
+struct ec_params_sb_wr_word {
+	uint8_t reg;
+	uint16_t value;
+} __ec_align1;
+
+struct ec_response_sb_rd_block {
+	uint8_t data[32];
+} __ec_align1;
+
+struct ec_params_sb_wr_block {
+	uint8_t reg;
+	uint16_t data[32];
+} __ec_align1;
+
+/*****************************************************************************/
+/* Battery vendor parameters
+ *
+ * Get or set vendor-specific parameters in the battery. Implementations may
+ * differ between boards or batteries. On a set operation, the response
+ * contains the actual value set, which may be rounded or clipped from the
+ * requested value.
+ */
+
+#define EC_CMD_BATTERY_VENDOR_PARAM 0x00B4
+
+enum ec_battery_vendor_param_mode {
+	BATTERY_VENDOR_PARAM_MODE_GET = 0,
+	BATTERY_VENDOR_PARAM_MODE_SET,
+};
+
+struct ec_params_battery_vendor_param {
+	uint32_t param;
+	uint32_t value;
+	uint8_t mode;
+} __ec_align_size1;
+
+struct ec_response_battery_vendor_param {
+	uint32_t value;
+} __ec_align4;
+
+/*****************************************************************************/
+/*
+ * Smart Battery Firmware Update Commands
+ */
+#define EC_CMD_SB_FW_UPDATE 0x00B5
+
+enum ec_sb_fw_update_subcmd {
+	EC_SB_FW_UPDATE_PREPARE  = 0x0,
+	EC_SB_FW_UPDATE_INFO     = 0x1, /*query sb info */
+	EC_SB_FW_UPDATE_BEGIN    = 0x2, /*check if protected */
+	EC_SB_FW_UPDATE_WRITE    = 0x3, /*check if protected */
+	EC_SB_FW_UPDATE_END      = 0x4,
+	EC_SB_FW_UPDATE_STATUS   = 0x5,
+	EC_SB_FW_UPDATE_PROTECT  = 0x6,
+	EC_SB_FW_UPDATE_MAX      = 0x7,
+};
+
+#define SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE 32
+#define SB_FW_UPDATE_CMD_STATUS_SIZE 2
+#define SB_FW_UPDATE_CMD_INFO_SIZE 8
+
+struct ec_sb_fw_update_header {
+	uint16_t subcmd;  /* enum ec_sb_fw_update_subcmd */
+	uint16_t fw_id;   /* firmware id */
+} __ec_align4;
+
+struct ec_params_sb_fw_update {
+	struct ec_sb_fw_update_header hdr;
+	union {
+		/* EC_SB_FW_UPDATE_PREPARE  = 0x0 */
+		/* EC_SB_FW_UPDATE_INFO     = 0x1 */
+		/* EC_SB_FW_UPDATE_BEGIN    = 0x2 */
+		/* EC_SB_FW_UPDATE_END      = 0x4 */
+		/* EC_SB_FW_UPDATE_STATUS   = 0x5 */
+		/* EC_SB_FW_UPDATE_PROTECT  = 0x6 */
+		/* Those have no args */
+
+		/* EC_SB_FW_UPDATE_WRITE    = 0x3 */
+		struct __ec_align4 {
+			uint8_t  data[SB_FW_UPDATE_CMD_WRITE_BLOCK_SIZE];
+		} write;
+	};
+} __ec_align4;
+
+struct ec_response_sb_fw_update {
+	union {
+		/* EC_SB_FW_UPDATE_INFO     = 0x1 */
+		struct __ec_align1 {
+			uint8_t data[SB_FW_UPDATE_CMD_INFO_SIZE];
+		} info;
+
+		/* EC_SB_FW_UPDATE_STATUS   = 0x5 */
+		struct __ec_align1 {
+			uint8_t data[SB_FW_UPDATE_CMD_STATUS_SIZE];
+		} status;
+	};
+} __ec_align1;
+
+/*
+ * Entering Verified Boot Mode Command
+ * Default mode is VBOOT_MODE_NORMAL if EC did not receive this command.
+ * Valid Modes are: normal, developer, and recovery.
+ */
+#define EC_CMD_ENTERING_MODE 0x00B6
+
+struct ec_params_entering_mode {
+	int vboot_mode;
+} __ec_align4;
+
+#define VBOOT_MODE_NORMAL    0
+#define VBOOT_MODE_DEVELOPER 1
+#define VBOOT_MODE_RECOVERY  2
+
+/*****************************************************************************/
+/*
+ * I2C passthru protection command: Protects I2C tunnels against access on
+ * certain addresses (board-specific).
+ */
+#define EC_CMD_I2C_PASSTHRU_PROTECT 0x00B7
+
+enum ec_i2c_passthru_protect_subcmd {
+	EC_CMD_I2C_PASSTHRU_PROTECT_STATUS = 0x0,
+	EC_CMD_I2C_PASSTHRU_PROTECT_ENABLE = 0x1,
+};
+
+struct ec_params_i2c_passthru_protect {
+	uint8_t subcmd;
+	uint8_t port;		/* I2C port number */
+} __ec_align1;
+
+struct ec_response_i2c_passthru_protect {
+	uint8_t status;		/* Status flags (0: unlocked, 1: locked) */
+} __ec_align1;
+
+
+/*****************************************************************************/
+/*
+ * HDMI CEC commands
+ *
+ * These commands are for sending and receiving message via HDMI CEC
+ */
+
+#define MAX_CEC_MSG_LEN 16
+
+/* CEC message from the AP to be written on the CEC bus */
+#define EC_CMD_CEC_WRITE_MSG 0x00B8
+
+/**
+ * struct ec_params_cec_write - Message to write to the CEC bus
+ * @msg: message content to write to the CEC bus
+ */
+struct ec_params_cec_write {
+	uint8_t msg[MAX_CEC_MSG_LEN];
+} __ec_align1;
+
+/* Set various CEC parameters */
+#define EC_CMD_CEC_SET 0x00BA
+
+/**
+ * struct ec_params_cec_set - CEC parameters set
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ * @val: in case cmd is CEC_CMD_ENABLE, this field can be 0 to disable CEC
+ *	or 1 to enable CEC functionality, in case cmd is
+ *	CEC_CMD_LOGICAL_ADDRESS, this field encodes the requested logical
+ *	address between 0 and 15 or 0xff to unregister
+ */
+struct ec_params_cec_set {
+	uint8_t cmd; /* enum cec_command */
+	uint8_t val;
+} __ec_align1;
+
+/* Read various CEC parameters */
+#define EC_CMD_CEC_GET 0x00BB
+
+/**
+ * struct ec_params_cec_get - CEC parameters get
+ * @cmd: parameter type, can be CEC_CMD_ENABLE or CEC_CMD_LOGICAL_ADDRESS
+ */
+struct ec_params_cec_get {
+	uint8_t cmd; /* enum cec_command */
+} __ec_align1;
+
+/**
+ * struct ec_response_cec_get - CEC parameters get response
+ * @val: in case cmd was CEC_CMD_ENABLE, this field will 0 if CEC is
+ *	disabled or 1 if CEC functionality is enabled,
+ *	in case cmd was CEC_CMD_LOGICAL_ADDRESS, this will encode the
+ *	configured logical address between 0 and 15 or 0xff if unregistered
+ */
+struct ec_response_cec_get {
+	uint8_t val;
+} __ec_align1;
+
+/* CEC parameters command */
+enum cec_command {
+	/* CEC reading, writing and events enable */
+	CEC_CMD_ENABLE,
+	/* CEC logical address  */
+	CEC_CMD_LOGICAL_ADDRESS,
+};
+
+/* Events from CEC to AP */
+enum mkbp_cec_event {
+	/* Outgoing message was acknowledged by a follower */
+	EC_MKBP_CEC_SEND_OK			= BIT(0),
+	/* Outgoing message was not acknowledged */
+	EC_MKBP_CEC_SEND_FAILED			= BIT(1),
+};
+
+/*****************************************************************************/
+
+/* Commands for I2S recording on audio codec. */
+
+#define EC_CMD_CODEC_I2S 0x00BC
+#define EC_WOV_I2S_SAMPLE_RATE 48000
+
+enum ec_codec_i2s_subcmd {
+	EC_CODEC_SET_SAMPLE_DEPTH = 0x0,
+	EC_CODEC_SET_GAIN = 0x1,
+	EC_CODEC_GET_GAIN = 0x2,
+	EC_CODEC_I2S_ENABLE = 0x3,
+	EC_CODEC_I2S_SET_CONFIG = 0x4,
+	EC_CODEC_I2S_SET_TDM_CONFIG = 0x5,
+	EC_CODEC_I2S_SET_BCLK = 0x6,
+	EC_CODEC_I2S_SUBCMD_COUNT = 0x7,
+};
+
+enum ec_sample_depth_value {
+	EC_CODEC_SAMPLE_DEPTH_16 = 0,
+	EC_CODEC_SAMPLE_DEPTH_24 = 1,
+};
+
+enum ec_i2s_config {
+	EC_DAI_FMT_I2S = 0,
+	EC_DAI_FMT_RIGHT_J = 1,
+	EC_DAI_FMT_LEFT_J = 2,
+	EC_DAI_FMT_PCM_A = 3,
+	EC_DAI_FMT_PCM_B = 4,
+	EC_DAI_FMT_PCM_TDM = 5,
+};
+
+/*
+ * For subcommand EC_CODEC_GET_GAIN.
+ */
+struct __ec_align1 ec_codec_i2s_gain {
+	uint8_t left;
+	uint8_t right;
+};
+
+struct __ec_todo_unpacked ec_param_codec_i2s_tdm {
+	int16_t ch0_delay; /* 0 to 496 */
+	int16_t ch1_delay; /* -1 to 496 */
+	uint8_t adjacent_to_ch0;
+	uint8_t adjacent_to_ch1;
+};
+
+struct __ec_todo_packed ec_param_codec_i2s {
+	/* enum ec_codec_i2s_subcmd */
+	uint8_t cmd;
+	union {
+		/*
+		 * EC_CODEC_SET_SAMPLE_DEPTH
+		 * Value should be one of ec_sample_depth_value.
+		 */
+		uint8_t depth;
+
+		/*
+		 * EC_CODEC_SET_GAIN
+		 * Value should be 0~43 for both channels.
+		 */
+		struct ec_codec_i2s_gain gain;
+
+		/*
+		 * EC_CODEC_I2S_ENABLE
+		 * 1 to enable, 0 to disable.
+		 */
+		uint8_t i2s_enable;
+
+		/*
+		 * EC_CODEC_I2S_SET_CONFIG
+		 * Value should be one of ec_i2s_config.
+		 */
+		uint8_t i2s_config;
+
+		/*
+		 * EC_CODEC_I2S_SET_TDM_CONFIG
+		 * Value should be one of ec_i2s_config.
+		 */
+		struct ec_param_codec_i2s_tdm tdm_param;
+
+		/*
+		 * EC_CODEC_I2S_SET_BCLK
+		 */
+		uint32_t bclk;
+	};
+};
+
+
+/*****************************************************************************/
+/* System commands */
+
+/*
+ * TODO(crosbug.com/p/23747): This is a confusing name, since it doesn't
+ * necessarily reboot the EC.  Rename to "image" or something similar?
+ */
+#define EC_CMD_REBOOT_EC 0x00D2
+
+/* Command */
+enum ec_reboot_cmd {
+	EC_REBOOT_CANCEL = 0,        /* Cancel a pending reboot */
+	EC_REBOOT_JUMP_RO = 1,       /* Jump to RO without rebooting */
+	EC_REBOOT_JUMP_RW = 2,       /* Jump to active RW without rebooting */
+	/* (command 3 was jump to RW-B) */
+	EC_REBOOT_COLD = 4,          /* Cold-reboot */
+	EC_REBOOT_DISABLE_JUMP = 5,  /* Disable jump until next reboot */
+	EC_REBOOT_HIBERNATE = 6,     /* Hibernate EC */
+	EC_REBOOT_HIBERNATE_CLEAR_AP_OFF = 7, /* and clears AP_OFF flag */
+};
+
+/* Flags for ec_params_reboot_ec.reboot_flags */
+#define EC_REBOOT_FLAG_RESERVED0      BIT(0)  /* Was recovery request */
+#define EC_REBOOT_FLAG_ON_AP_SHUTDOWN BIT(1)  /* Reboot after AP shutdown */
+#define EC_REBOOT_FLAG_SWITCH_RW_SLOT BIT(2)  /* Switch RW slot */
+
+struct ec_params_reboot_ec {
+	uint8_t cmd;           /* enum ec_reboot_cmd */
+	uint8_t flags;         /* See EC_REBOOT_FLAG_* */
+} __ec_align1;
+
+/*
+ * Get information on last EC panic.
+ *
+ * Returns variable-length platform-dependent panic information.  See panic.h
+ * for details.
+ */
+#define EC_CMD_GET_PANIC_INFO 0x00D3
+
+/*****************************************************************************/
+/*
+ * Special commands
+ *
+ * These do not follow the normal rules for commands.  See each command for
+ * details.
+ */
+
+/*
+ * Reboot NOW
+ *
+ * This command will work even when the EC LPC interface is busy, because the
+ * reboot command is processed at interrupt level.  Note that when the EC
+ * reboots, the host will reboot too, so there is no response to this command.
+ *
+ * Use EC_CMD_REBOOT_EC to reboot the EC more politely.
+ */
+#define EC_CMD_REBOOT 0x00D1  /* Think "die" */
+
+/*
+ * Resend last response (not supported on LPC).
+ *
+ * Returns EC_RES_UNAVAILABLE if there is no response available - for example,
+ * there was no previous command, or the previous command's response was too
+ * big to save.
+ */
+#define EC_CMD_RESEND_RESPONSE 0x00DB
+
+/*
+ * This header byte on a command indicate version 0. Any header byte less
+ * than this means that we are talking to an old EC which doesn't support
+ * versioning. In that case, we assume version 0.
+ *
+ * Header bytes greater than this indicate a later version. For example,
+ * EC_CMD_VERSION0 + 1 means we are using version 1.
+ *
+ * The old EC interface must not use commands 0xdc or higher.
+ */
+#define EC_CMD_VERSION0 0x00DC
+
+/*****************************************************************************/
+/*
+ * PD commands
+ *
+ * These commands are for PD MCU communication.
+ */
+
+/* EC to PD MCU exchange status command */
+#define EC_CMD_PD_EXCHANGE_STATUS 0x0100
+#define EC_VER_PD_EXCHANGE_STATUS 2
+
+enum pd_charge_state {
+	PD_CHARGE_NO_CHANGE = 0, /* Don't change charge state */
+	PD_CHARGE_NONE,          /* No charging allowed */
+	PD_CHARGE_5V,            /* 5V charging only */
+	PD_CHARGE_MAX            /* Charge at max voltage */
+};
+
+/* Status of EC being sent to PD */
+#define EC_STATUS_HIBERNATING	BIT(0)
+
+struct ec_params_pd_status {
+	uint8_t status;       /* EC status */
+	int8_t batt_soc;      /* battery state of charge */
+	uint8_t charge_state; /* charging state (from enum pd_charge_state) */
+} __ec_align1;
+
+/* Status of PD being sent back to EC */
+#define PD_STATUS_HOST_EVENT      BIT(0) /* Forward host event to AP */
+#define PD_STATUS_IN_RW           BIT(1) /* Running RW image */
+#define PD_STATUS_JUMPED_TO_IMAGE BIT(2) /* Current image was jumped to */
+#define PD_STATUS_TCPC_ALERT_0    BIT(3) /* Alert active in port 0 TCPC */
+#define PD_STATUS_TCPC_ALERT_1    BIT(4) /* Alert active in port 1 TCPC */
+#define PD_STATUS_TCPC_ALERT_2    BIT(5) /* Alert active in port 2 TCPC */
+#define PD_STATUS_TCPC_ALERT_3    BIT(6) /* Alert active in port 3 TCPC */
+#define PD_STATUS_EC_INT_ACTIVE  (PD_STATUS_TCPC_ALERT_0 | \
+				      PD_STATUS_TCPC_ALERT_1 | \
+				      PD_STATUS_HOST_EVENT)
+struct ec_response_pd_status {
+	uint32_t curr_lim_ma;       /* input current limit */
+	uint16_t status;            /* PD MCU status */
+	int8_t active_charge_port;  /* active charging port */
+} __ec_align_size1;
+
+/* AP to PD MCU host event status command, cleared on read */
+#define EC_CMD_PD_HOST_EVENT_STATUS 0x0104
+
+/* PD MCU host event status bits */
+#define PD_EVENT_UPDATE_DEVICE     BIT(0)
+#define PD_EVENT_POWER_CHANGE      BIT(1)
+#define PD_EVENT_IDENTITY_RECEIVED BIT(2)
+#define PD_EVENT_DATA_SWAP         BIT(3)
+struct ec_response_host_event_status {
+	uint32_t status;      /* PD MCU host event status */
+} __ec_align4;
+
+/* Set USB type-C port role and muxes */
+#define EC_CMD_USB_PD_CONTROL 0x0101
+
+enum usb_pd_control_role {
+	USB_PD_CTRL_ROLE_NO_CHANGE = 0,
+	USB_PD_CTRL_ROLE_TOGGLE_ON = 1, /* == AUTO */
+	USB_PD_CTRL_ROLE_TOGGLE_OFF = 2,
+	USB_PD_CTRL_ROLE_FORCE_SINK = 3,
+	USB_PD_CTRL_ROLE_FORCE_SOURCE = 4,
+	USB_PD_CTRL_ROLE_FREEZE = 5,
+	USB_PD_CTRL_ROLE_COUNT
+};
+
+enum usb_pd_control_mux {
+	USB_PD_CTRL_MUX_NO_CHANGE = 0,
+	USB_PD_CTRL_MUX_NONE = 1,
+	USB_PD_CTRL_MUX_USB = 2,
+	USB_PD_CTRL_MUX_DP = 3,
+	USB_PD_CTRL_MUX_DOCK = 4,
+	USB_PD_CTRL_MUX_AUTO = 5,
+	USB_PD_CTRL_MUX_COUNT
+};
+
+enum usb_pd_control_swap {
+	USB_PD_CTRL_SWAP_NONE = 0,
+	USB_PD_CTRL_SWAP_DATA = 1,
+	USB_PD_CTRL_SWAP_POWER = 2,
+	USB_PD_CTRL_SWAP_VCONN = 3,
+	USB_PD_CTRL_SWAP_COUNT
+};
+
+struct ec_params_usb_pd_control {
+	uint8_t port;
+	uint8_t role;
+	uint8_t mux;
+	uint8_t swap;
+} __ec_align1;
+
+#define PD_CTRL_RESP_ENABLED_COMMS      BIT(0) /* Communication enabled */
+#define PD_CTRL_RESP_ENABLED_CONNECTED  BIT(1) /* Device connected */
+#define PD_CTRL_RESP_ENABLED_PD_CAPABLE BIT(2) /* Partner is PD capable */
+
+#define PD_CTRL_RESP_ROLE_POWER         BIT(0) /* 0=SNK/1=SRC */
+#define PD_CTRL_RESP_ROLE_DATA          BIT(1) /* 0=UFP/1=DFP */
+#define PD_CTRL_RESP_ROLE_VCONN         BIT(2) /* Vconn status */
+#define PD_CTRL_RESP_ROLE_DR_POWER      BIT(3) /* Partner is dualrole power */
+#define PD_CTRL_RESP_ROLE_DR_DATA       BIT(4) /* Partner is dualrole data */
+#define PD_CTRL_RESP_ROLE_USB_COMM      BIT(5) /* Partner USB comm capable */
+#define PD_CTRL_RESP_ROLE_EXT_POWERED   BIT(6) /* Partner externally powerd */
+
+struct ec_response_usb_pd_control {
+	uint8_t enabled;
+	uint8_t role;
+	uint8_t polarity;
+	uint8_t state;
+} __ec_align1;
+
+struct ec_response_usb_pd_control_v1 {
+	uint8_t enabled;
+	uint8_t role;
+	uint8_t polarity;
+	char state[32];
+} __ec_align1;
+
+/* Values representing usbc PD CC state */
+#define USBC_PD_CC_NONE		0 /* No accessory connected */
+#define USBC_PD_CC_NO_UFP	1 /* No UFP accessory connected */
+#define USBC_PD_CC_AUDIO_ACC	2 /* Audio accessory connected */
+#define USBC_PD_CC_DEBUG_ACC	3 /* Debug accessory connected */
+#define USBC_PD_CC_UFP_ATTACHED	4 /* UFP attached to usbc */
+#define USBC_PD_CC_DFP_ATTACHED	5 /* DPF attached to usbc */
+
+struct ec_response_usb_pd_control_v2 {
+	uint8_t enabled;
+	uint8_t role;
+	uint8_t polarity;
+	char state[32];
+	uint8_t cc_state; /* USBC_PD_CC_*Encoded cc state */
+	uint8_t dp_mode;  /* Current DP pin mode (MODE_DP_PIN_[A-E]) */
+	/* CL:1500994 Current cable type */
+	uint8_t reserved_cable_type;
+} __ec_align1;
+
+#define EC_CMD_USB_PD_PORTS 0x0102
+
+/* Maximum number of PD ports on a device, num_ports will be <= this */
+#define EC_USB_PD_MAX_PORTS 8
+
+struct ec_response_usb_pd_ports {
+	uint8_t num_ports;
+} __ec_align1;
+
+#define EC_CMD_USB_PD_POWER_INFO 0x0103
+
+#define PD_POWER_CHARGING_PORT 0xff
+struct ec_params_usb_pd_power_info {
+	uint8_t port;
+} __ec_align1;
+
+enum usb_chg_type {
+	USB_CHG_TYPE_NONE,
+	USB_CHG_TYPE_PD,
+	USB_CHG_TYPE_C,
+	USB_CHG_TYPE_PROPRIETARY,
+	USB_CHG_TYPE_BC12_DCP,
+	USB_CHG_TYPE_BC12_CDP,
+	USB_CHG_TYPE_BC12_SDP,
+	USB_CHG_TYPE_OTHER,
+	USB_CHG_TYPE_VBUS,
+	USB_CHG_TYPE_UNKNOWN,
+	USB_CHG_TYPE_DEDICATED,
+};
+enum usb_power_roles {
+	USB_PD_PORT_POWER_DISCONNECTED,
+	USB_PD_PORT_POWER_SOURCE,
+	USB_PD_PORT_POWER_SINK,
+	USB_PD_PORT_POWER_SINK_NOT_CHARGING,
+};
+
+struct usb_chg_measures {
+	uint16_t voltage_max;
+	uint16_t voltage_now;
+	uint16_t current_max;
+	uint16_t current_lim;
+} __ec_align2;
+
+struct ec_response_usb_pd_power_info {
+	uint8_t role;
+	uint8_t type;
+	uint8_t dualrole;
+	uint8_t reserved1;
+	struct usb_chg_measures meas;
+	uint32_t max_power;
+} __ec_align4;
+
+
+/*
+ * This command will return the number of USB PD charge port + the number
+ * of dedicated port present.
+ * EC_CMD_USB_PD_PORTS does NOT include the dedicated ports
+ */
+#define EC_CMD_CHARGE_PORT_COUNT 0x0105
+struct ec_response_charge_port_count {
+	uint8_t port_count;
+} __ec_align1;
+
+/* Write USB-PD device FW */
+#define EC_CMD_USB_PD_FW_UPDATE 0x0110
+
+enum usb_pd_fw_update_cmds {
+	USB_PD_FW_REBOOT,
+	USB_PD_FW_FLASH_ERASE,
+	USB_PD_FW_FLASH_WRITE,
+	USB_PD_FW_ERASE_SIG,
+};
+
+struct ec_params_usb_pd_fw_update {
+	uint16_t dev_id;
+	uint8_t cmd;
+	uint8_t port;
+	uint32_t size;     /* Size to write in bytes */
+	/* Followed by data to write */
+} __ec_align4;
+
+/* Write USB-PD Accessory RW_HASH table entry */
+#define EC_CMD_USB_PD_RW_HASH_ENTRY 0x0111
+/* RW hash is first 20 bytes of SHA-256 of RW section */
+#define PD_RW_HASH_SIZE 20
+struct ec_params_usb_pd_rw_hash_entry {
+	uint16_t dev_id;
+	uint8_t dev_rw_hash[PD_RW_HASH_SIZE];
+	uint8_t reserved;        /*
+				  * For alignment of current_image
+				  * TODO(rspangler) but it's not aligned!
+				  * Should have been reserved[2].
+				  */
+	uint32_t current_image;  /* One of ec_current_image */
+} __ec_align1;
+
+/* Read USB-PD Accessory info */
+#define EC_CMD_USB_PD_DEV_INFO 0x0112
+
+struct ec_params_usb_pd_info_request {
+	uint8_t port;
+} __ec_align1;
+
+/* Read USB-PD Device discovery info */
+#define EC_CMD_USB_PD_DISCOVERY 0x0113
+struct ec_params_usb_pd_discovery_entry {
+	uint16_t vid;  /* USB-IF VID */
+	uint16_t pid;  /* USB-IF PID */
+	uint8_t ptype; /* product type (hub,periph,cable,ama) */
+} __ec_align_size1;
+
+/* Override default charge behavior */
+#define EC_CMD_PD_CHARGE_PORT_OVERRIDE 0x0114
+
+/* Negative port parameters have special meaning */
+enum usb_pd_override_ports {
+	OVERRIDE_DONT_CHARGE = -2,
+	OVERRIDE_OFF = -1,
+	/* [0, CONFIG_USB_PD_PORT_COUNT): Port# */
+};
+
+struct ec_params_charge_port_override {
+	int16_t override_port; /* Override port# */
+} __ec_align2;
+
+/*
+ * Read (and delete) one entry of PD event log.
+ * TODO(crbug.com/751742): Make this host command more generic to accommodate
+ * future non-PD logs that use the same internal EC event_log.
+ */
+#define EC_CMD_PD_GET_LOG_ENTRY 0x0115
+
+struct ec_response_pd_log {
+	uint32_t timestamp; /* relative timestamp in milliseconds */
+	uint8_t type;       /* event type : see PD_EVENT_xx below */
+	uint8_t size_port;  /* [7:5] port number [4:0] payload size in bytes */
+	uint16_t data;      /* type-defined data payload */
+	uint8_t payload[0]; /* optional additional data payload: 0..16 bytes */
+} __ec_align4;
+
+/* The timestamp is the microsecond counter shifted to get about a ms. */
+#define PD_LOG_TIMESTAMP_SHIFT 10 /* 1 LSB = 1024us */
+
+#define PD_LOG_SIZE_MASK  0x1f
+#define PD_LOG_PORT_MASK  0xe0
+#define PD_LOG_PORT_SHIFT    5
+#define PD_LOG_PORT_SIZE(port, size) (((port) << PD_LOG_PORT_SHIFT) | \
+				      ((size) & PD_LOG_SIZE_MASK))
+#define PD_LOG_PORT(size_port) ((size_port) >> PD_LOG_PORT_SHIFT)
+#define PD_LOG_SIZE(size_port) ((size_port) & PD_LOG_SIZE_MASK)
+
+/* PD event log : entry types */
+/* PD MCU events */
+#define PD_EVENT_MCU_BASE       0x00
+#define PD_EVENT_MCU_CHARGE             (PD_EVENT_MCU_BASE+0)
+#define PD_EVENT_MCU_CONNECT            (PD_EVENT_MCU_BASE+1)
+/* Reserved for custom board event */
+#define PD_EVENT_MCU_BOARD_CUSTOM       (PD_EVENT_MCU_BASE+2)
+/* PD generic accessory events */
+#define PD_EVENT_ACC_BASE       0x20
+#define PD_EVENT_ACC_RW_FAIL   (PD_EVENT_ACC_BASE+0)
+#define PD_EVENT_ACC_RW_ERASE  (PD_EVENT_ACC_BASE+1)
+/* PD power supply events */
+#define PD_EVENT_PS_BASE        0x40
+#define PD_EVENT_PS_FAULT      (PD_EVENT_PS_BASE+0)
+/* PD video dongles events */
+#define PD_EVENT_VIDEO_BASE     0x60
+#define PD_EVENT_VIDEO_DP_MODE (PD_EVENT_VIDEO_BASE+0)
+#define PD_EVENT_VIDEO_CODEC   (PD_EVENT_VIDEO_BASE+1)
+/* Returned in the "type" field, when there is no entry available */
+#define PD_EVENT_NO_ENTRY       0xff
+
+/*
+ * PD_EVENT_MCU_CHARGE event definition :
+ * the payload is "struct usb_chg_measures"
+ * the data field contains the port state flags as defined below :
+ */
+/* Port partner is a dual role device */
+#define CHARGE_FLAGS_DUAL_ROLE         BIT(15)
+/* Port is the pending override port */
+#define CHARGE_FLAGS_DELAYED_OVERRIDE  BIT(14)
+/* Port is the override port */
+#define CHARGE_FLAGS_OVERRIDE          BIT(13)
+/* Charger type */
+#define CHARGE_FLAGS_TYPE_SHIFT               3
+#define CHARGE_FLAGS_TYPE_MASK       (0xf << CHARGE_FLAGS_TYPE_SHIFT)
+/* Power delivery role */
+#define CHARGE_FLAGS_ROLE_MASK         (7 <<  0)
+
+/*
+ * PD_EVENT_PS_FAULT data field flags definition :
+ */
+#define PS_FAULT_OCP                          1
+#define PS_FAULT_FAST_OCP                     2
+#define PS_FAULT_OVP                          3
+#define PS_FAULT_DISCH                        4
+
+/*
+ * PD_EVENT_VIDEO_CODEC payload is "struct mcdp_info".
+ */
+struct mcdp_version {
+	uint8_t major;
+	uint8_t minor;
+	uint16_t build;
+} __ec_align4;
+
+struct mcdp_info {
+	uint8_t family[2];
+	uint8_t chipid[2];
+	struct mcdp_version irom;
+	struct mcdp_version fw;
+} __ec_align4;
+
+/* struct mcdp_info field decoding */
+#define MCDP_CHIPID(chipid) ((chipid[0] << 8) | chipid[1])
+#define MCDP_FAMILY(family) ((family[0] << 8) | family[1])
+
+/* Get/Set USB-PD Alternate mode info */
+#define EC_CMD_USB_PD_GET_AMODE 0x0116
+struct ec_params_usb_pd_get_mode_request {
+	uint16_t svid_idx; /* SVID index to get */
+	uint8_t port;      /* port */
+} __ec_align_size1;
+
+struct ec_params_usb_pd_get_mode_response {
+	uint16_t svid;   /* SVID */
+	uint16_t opos;    /* Object Position */
+	uint32_t vdo[6]; /* Mode VDOs */
+} __ec_align4;
+
+#define EC_CMD_USB_PD_SET_AMODE 0x0117
+
+enum pd_mode_cmd {
+	PD_EXIT_MODE = 0,
+	PD_ENTER_MODE = 1,
+	/* Not a command.  Do NOT remove. */
+	PD_MODE_CMD_COUNT,
+};
+
+struct ec_params_usb_pd_set_mode_request {
+	uint32_t cmd;  /* enum pd_mode_cmd */
+	uint16_t svid; /* SVID to set */
+	uint8_t opos;  /* Object Position */
+	uint8_t port;  /* port */
+} __ec_align4;
+
+/* Ask the PD MCU to record a log of a requested type */
+#define EC_CMD_PD_WRITE_LOG_ENTRY 0x0118
+
+struct ec_params_pd_write_log_entry {
+	uint8_t type; /* event type : see PD_EVENT_xx above */
+	uint8_t port; /* port#, or 0 for events unrelated to a given port */
+} __ec_align1;
+
+
+/* Control USB-PD chip */
+#define EC_CMD_PD_CONTROL 0x0119
+
+enum ec_pd_control_cmd {
+	PD_SUSPEND = 0,      /* Suspend the PD chip (EC: stop talking to PD) */
+	PD_RESUME,           /* Resume the PD chip (EC: start talking to PD) */
+	PD_RESET,            /* Force reset the PD chip */
+	PD_CONTROL_DISABLE,  /* Disable further calls to this command */
+	PD_CHIP_ON,          /* Power on the PD chip */
+};
+
+struct ec_params_pd_control {
+	uint8_t chip;         /* chip id */
+	uint8_t subcmd;
+} __ec_align1;
+
+/* Get info about USB-C SS muxes */
+#define EC_CMD_USB_PD_MUX_INFO 0x011A
+
+struct ec_params_usb_pd_mux_info {
+	uint8_t port; /* USB-C port number */
+} __ec_align1;
+
+/* Flags representing mux state */
+#define USB_PD_MUX_USB_ENABLED       BIT(0) /* USB connected */
+#define USB_PD_MUX_DP_ENABLED        BIT(1) /* DP connected */
+#define USB_PD_MUX_POLARITY_INVERTED BIT(2) /* CC line Polarity inverted */
+#define USB_PD_MUX_HPD_IRQ           BIT(3) /* HPD IRQ is asserted */
+#define USB_PD_MUX_HPD_LVL           BIT(4) /* HPD level is asserted */
+
+struct ec_response_usb_pd_mux_info {
+	uint8_t flags; /* USB_PD_MUX_*-encoded USB mux state */
+} __ec_align1;
+
+#define EC_CMD_PD_CHIP_INFO		0x011B
+
+struct ec_params_pd_chip_info {
+	uint8_t port;	/* USB-C port number */
+	uint8_t renew;	/* Force renewal */
+} __ec_align1;
+
+struct ec_response_pd_chip_info {
+	uint16_t vendor_id;
+	uint16_t product_id;
+	uint16_t device_id;
+	union {
+		uint8_t fw_version_string[8];
+		uint64_t fw_version_number;
+	};
+} __ec_align2;
+
+struct ec_response_pd_chip_info_v1 {
+	uint16_t vendor_id;
+	uint16_t product_id;
+	uint16_t device_id;
+	union {
+		uint8_t fw_version_string[8];
+		uint64_t fw_version_number;
+	};
+	union {
+		uint8_t min_req_fw_version_string[8];
+		uint64_t min_req_fw_version_number;
+	};
+} __ec_align2;
+
+/* Run RW signature verification and get status */
+#define EC_CMD_RWSIG_CHECK_STATUS	0x011C
+
+struct ec_response_rwsig_check_status {
+	uint32_t status;
+} __ec_align4;
+
+/* For controlling RWSIG task */
+#define EC_CMD_RWSIG_ACTION	0x011D
+
+enum rwsig_action {
+	RWSIG_ACTION_ABORT = 0,		/* Abort RWSIG and prevent jumping */
+	RWSIG_ACTION_CONTINUE = 1,	/* Jump to RW immediately */
+};
+
+struct ec_params_rwsig_action {
+	uint32_t action;
+} __ec_align4;
+
+/* Run verification on a slot */
+#define EC_CMD_EFS_VERIFY	0x011E
+
+struct ec_params_efs_verify {
+	uint8_t region;		/* enum ec_flash_region */
+} __ec_align1;
+
+/*
+ * Retrieve info from Cros Board Info store. Response is based on the data
+ * type. Integers return a uint32. Strings return a string, using the response
+ * size to determine how big it is.
+ */
+#define EC_CMD_GET_CROS_BOARD_INFO	0x011F
+/*
+ * Write info into Cros Board Info on EEPROM. Write fails if the board has
+ * hardware write-protect enabled.
+ */
+#define EC_CMD_SET_CROS_BOARD_INFO	0x0120
+
+enum cbi_data_tag {
+	CBI_TAG_BOARD_VERSION = 0, /* uint32_t or smaller */
+	CBI_TAG_OEM_ID = 1,        /* uint32_t or smaller */
+	CBI_TAG_SKU_ID = 2,        /* uint32_t or smaller */
+	CBI_TAG_DRAM_PART_NUM = 3, /* variable length ascii, nul terminated. */
+	CBI_TAG_OEM_NAME = 4,      /* variable length ascii, nul terminated. */
+	CBI_TAG_MODEL_ID = 5,      /* uint32_t or smaller */
+	CBI_TAG_COUNT,
+};
+
+/*
+ * Flags to control read operation
+ *
+ * RELOAD:  Invalidate cache and read data from EEPROM. Useful to verify
+ *          write was successful without reboot.
+ */
+#define CBI_GET_RELOAD		BIT(0)
+
+struct ec_params_get_cbi {
+	uint32_t tag;		/* enum cbi_data_tag */
+	uint32_t flag;		/* CBI_GET_* */
+} __ec_align4;
+
+/*
+ * Flags to control write behavior.
+ *
+ * NO_SYNC: Makes EC update data in RAM but skip writing to EEPROM. It's
+ *          useful when writing multiple fields in a row.
+ * INIT:    Need to be set when creating a new CBI from scratch. All fields
+ *          will be initialized to zero first.
+ */
+#define CBI_SET_NO_SYNC		BIT(0)
+#define CBI_SET_INIT		BIT(1)
+
+struct ec_params_set_cbi {
+	uint32_t tag;		/* enum cbi_data_tag */
+	uint32_t flag;		/* CBI_SET_* */
+	uint32_t size;		/* Data size */
+	uint8_t data[];		/* For string and raw data */
+} __ec_align1;
+
+/*
+ * Information about resets of the AP by the EC and the EC's own uptime.
+ */
+#define EC_CMD_GET_UPTIME_INFO 0x0121
+
+struct ec_response_uptime_info {
+	/*
+	 * Number of milliseconds since the last EC boot. Sysjump resets
+	 * typically do not restart the EC's time_since_boot epoch.
+	 *
+	 * WARNING: The EC's sense of time is much less accurate than the AP's
+	 * sense of time, in both phase and frequency.  This timebase is similar
+	 * to CLOCK_MONOTONIC_RAW, but with 1% or more frequency error.
+	 */
+	uint32_t time_since_ec_boot_ms;
+
+	/*
+	 * Number of times the AP was reset by the EC since the last EC boot.
+	 * Note that the AP may be held in reset by the EC during the initial
+	 * boot sequence, such that the very first AP boot may count as more
+	 * than one here.
+	 */
+	uint32_t ap_resets_since_ec_boot;
+
+	/*
+	 * The set of flags which describe the EC's most recent reset.  See
+	 * include/system.h RESET_FLAG_* for details.
+	 */
+	uint32_t ec_reset_flags;
+
+	/* Empty log entries have both the cause and timestamp set to zero. */
+	struct ap_reset_log_entry {
+		/*
+		 * See include/chipset.h: enum chipset_{reset,shutdown}_reason
+		 * for details.
+		 */
+		uint16_t reset_cause;
+
+		/* Reserved for protocol growth. */
+		uint16_t reserved;
+
+		/*
+		 * The time of the reset's assertion, in milliseconds since the
+		 * last EC boot, in the same epoch as time_since_ec_boot_ms.
+		 * Set to zero if the log entry is empty.
+		 */
+		uint32_t reset_time_ms;
+	} recent_ap_reset[4];
+} __ec_align4;
+
+/*
+ * Add entropy to the device secret (stored in the rollback region).
+ *
+ * Depending on the chip, the operation may take a long time (e.g. to erase
+ * flash), so the commands are asynchronous.
+ */
+#define EC_CMD_ADD_ENTROPY	0x0122
+
+enum add_entropy_action {
+	/* Add entropy to the current secret. */
+	ADD_ENTROPY_ASYNC = 0,
+	/*
+	 * Add entropy, and also make sure that the previous secret is erased.
+	 * (this can be implemented by adding entropy multiple times until
+	 * all rolback blocks have been overwritten).
+	 */
+	ADD_ENTROPY_RESET_ASYNC = 1,
+	/* Read back result from the previous operation. */
+	ADD_ENTROPY_GET_RESULT = 2,
+};
+
+struct ec_params_rollback_add_entropy {
+	uint8_t action;
+} __ec_align1;
+
+/*
+ * Perform a single read of a given ADC channel.
+ */
+#define EC_CMD_ADC_READ		0x0123
+
+struct ec_params_adc_read {
+	uint8_t adc_channel;
+} __ec_align1;
+
+struct ec_response_adc_read {
+	int32_t adc_value;
+} __ec_align4;
+
+/*
+ * Read back rollback info
+ */
+#define EC_CMD_ROLLBACK_INFO		0x0124
+
+struct ec_response_rollback_info {
+	int32_t id; /* Incrementing number to indicate which region to use. */
+	int32_t rollback_min_version;
+	int32_t rw_rollback_version;
+} __ec_align4;
+
+
+/* Issue AP reset */
+#define EC_CMD_AP_RESET 0x0125
+
+/*****************************************************************************/
+/* The command range 0x200-0x2FF is reserved for Rotor. */
+
+/*****************************************************************************/
+/*
+ * Reserve a range of host commands for the CR51 firmware.
+ */
+#define EC_CMD_CR51_BASE 0x0300
+#define EC_CMD_CR51_LAST 0x03FF
+
+/*****************************************************************************/
+/* Fingerprint MCU commands: range 0x0400-0x040x */
+
+/* Fingerprint SPI sensor passthru command: prototyping ONLY */
+#define EC_CMD_FP_PASSTHRU 0x0400
+
+#define EC_FP_FLAG_NOT_COMPLETE 0x1
+
+struct ec_params_fp_passthru {
+	uint16_t len;		/* Number of bytes to write then read */
+	uint16_t flags;		/* EC_FP_FLAG_xxx */
+	uint8_t data[];		/* Data to send */
+} __ec_align2;
+
+/* Configure the Fingerprint MCU behavior */
+#define EC_CMD_FP_MODE 0x0402
+
+/* Put the sensor in its lowest power mode */
+#define FP_MODE_DEEPSLEEP      BIT(0)
+/* Wait to see a finger on the sensor */
+#define FP_MODE_FINGER_DOWN    BIT(1)
+/* Poll until the finger has left the sensor */
+#define FP_MODE_FINGER_UP      BIT(2)
+/* Capture the current finger image */
+#define FP_MODE_CAPTURE        BIT(3)
+/* Finger enrollment session on-going */
+#define FP_MODE_ENROLL_SESSION BIT(4)
+/* Enroll the current finger image */
+#define FP_MODE_ENROLL_IMAGE   BIT(5)
+/* Try to match the current finger image */
+#define FP_MODE_MATCH          BIT(6)
+/* Reset and re-initialize the sensor. */
+#define FP_MODE_RESET_SENSOR   BIT(7)
+/* special value: don't change anything just read back current mode */
+#define FP_MODE_DONT_CHANGE    BIT(31)
+
+#define FP_VALID_MODES (FP_MODE_DEEPSLEEP      | \
+			FP_MODE_FINGER_DOWN    | \
+			FP_MODE_FINGER_UP      | \
+			FP_MODE_CAPTURE        | \
+			FP_MODE_ENROLL_SESSION | \
+			FP_MODE_ENROLL_IMAGE   | \
+			FP_MODE_MATCH          | \
+			FP_MODE_RESET_SENSOR   | \
+			FP_MODE_DONT_CHANGE)
+
+/* Capture types defined in bits [30..28] */
+#define FP_MODE_CAPTURE_TYPE_SHIFT 28
+#define FP_MODE_CAPTURE_TYPE_MASK  (0x7 << FP_MODE_CAPTURE_TYPE_SHIFT)
+/*
+ * This enum must remain ordered, if you add new values you must ensure that
+ * FP_CAPTURE_TYPE_MAX is still the last one.
+ */
+enum fp_capture_type {
+	/* Full blown vendor-defined capture (produces 'frame_size' bytes) */
+	FP_CAPTURE_VENDOR_FORMAT = 0,
+	/* Simple raw image capture (produces width x height x bpp bits) */
+	FP_CAPTURE_SIMPLE_IMAGE = 1,
+	/* Self test pattern (e.g. checkerboard) */
+	FP_CAPTURE_PATTERN0 = 2,
+	/* Self test pattern (e.g. inverted checkerboard) */
+	FP_CAPTURE_PATTERN1 = 3,
+	/* Capture for Quality test with fixed contrast */
+	FP_CAPTURE_QUALITY_TEST = 4,
+	/* Capture for pixel reset value test */
+	FP_CAPTURE_RESET_TEST = 5,
+	FP_CAPTURE_TYPE_MAX,
+};
+/* Extracts the capture type from the sensor 'mode' word */
+#define FP_CAPTURE_TYPE(mode) (((mode) & FP_MODE_CAPTURE_TYPE_MASK) \
+				       >> FP_MODE_CAPTURE_TYPE_SHIFT)
+
+struct ec_params_fp_mode {
+	uint32_t mode; /* as defined by FP_MODE_ constants */
+} __ec_align4;
+
+struct ec_response_fp_mode {
+	uint32_t mode; /* as defined by FP_MODE_ constants */
+} __ec_align4;
+
+/* Retrieve Fingerprint sensor information */
+#define EC_CMD_FP_INFO 0x0403
+
+/* Number of dead pixels detected on the last maintenance */
+#define FP_ERROR_DEAD_PIXELS(errors) ((errors) & 0x3FF)
+/* Unknown number of dead pixels detected on the last maintenance */
+#define FP_ERROR_DEAD_PIXELS_UNKNOWN (0x3FF)
+/* No interrupt from the sensor */
+#define FP_ERROR_NO_IRQ    BIT(12)
+/* SPI communication error */
+#define FP_ERROR_SPI_COMM  BIT(13)
+/* Invalid sensor Hardware ID */
+#define FP_ERROR_BAD_HWID  BIT(14)
+/* Sensor initialization failed */
+#define FP_ERROR_INIT_FAIL BIT(15)
+
+struct ec_response_fp_info_v0 {
+	/* Sensor identification */
+	uint32_t vendor_id;
+	uint32_t product_id;
+	uint32_t model_id;
+	uint32_t version;
+	/* Image frame characteristics */
+	uint32_t frame_size;
+	uint32_t pixel_format; /* using V4L2_PIX_FMT_ */
+	uint16_t width;
+	uint16_t height;
+	uint16_t bpp;
+	uint16_t errors; /* see FP_ERROR_ flags above */
+} __ec_align4;
+
+struct ec_response_fp_info {
+	/* Sensor identification */
+	uint32_t vendor_id;
+	uint32_t product_id;
+	uint32_t model_id;
+	uint32_t version;
+	/* Image frame characteristics */
+	uint32_t frame_size;
+	uint32_t pixel_format; /* using V4L2_PIX_FMT_ */
+	uint16_t width;
+	uint16_t height;
+	uint16_t bpp;
+	uint16_t errors; /* see FP_ERROR_ flags above */
+	/* Template/finger current information */
+	uint32_t template_size;  /* max template size in bytes */
+	uint16_t template_max;   /* maximum number of fingers/templates */
+	uint16_t template_valid; /* number of valid fingers/templates */
+	uint32_t template_dirty; /* bitmap of templates with MCU side changes */
+	uint32_t template_version; /* version of the template format */
+} __ec_align4;
+
+/* Get the last captured finger frame or a template content */
+#define EC_CMD_FP_FRAME 0x0404
+
+/* constants defining the 'offset' field which also contains the frame index */
+#define FP_FRAME_INDEX_SHIFT       28
+/* Frame buffer where the captured image is stored */
+#define FP_FRAME_INDEX_RAW_IMAGE    0
+/* First frame buffer holding a template */
+#define FP_FRAME_INDEX_TEMPLATE     1
+#define FP_FRAME_GET_BUFFER_INDEX(offset) ((offset) >> FP_FRAME_INDEX_SHIFT)
+#define FP_FRAME_OFFSET_MASK       0x0FFFFFFF
+
+/* Version of the format of the encrypted templates. */
+#define FP_TEMPLATE_FORMAT_VERSION 3
+
+/* Constants for encryption parameters */
+#define FP_CONTEXT_NONCE_BYTES 12
+#define FP_CONTEXT_USERID_WORDS (32 / sizeof(uint32_t))
+#define FP_CONTEXT_TAG_BYTES 16
+#define FP_CONTEXT_SALT_BYTES 16
+#define FP_CONTEXT_TPM_BYTES 32
+
+struct ec_fp_template_encryption_metadata {
+	/*
+	 * Version of the structure format (N=3).
+	 */
+	uint16_t struct_version;
+	/* Reserved bytes, set to 0. */
+	uint16_t reserved;
+	/*
+	 * The salt is *only* ever used for key derivation. The nonce is unique,
+	 * a different one is used for every message.
+	 */
+	uint8_t nonce[FP_CONTEXT_NONCE_BYTES];
+	uint8_t salt[FP_CONTEXT_SALT_BYTES];
+	uint8_t tag[FP_CONTEXT_TAG_BYTES];
+};
+
+struct ec_params_fp_frame {
+	/*
+	 * The offset contains the template index or FP_FRAME_INDEX_RAW_IMAGE
+	 * in the high nibble, and the real offset within the frame in
+	 * FP_FRAME_OFFSET_MASK.
+	 */
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/* Load a template into the MCU */
+#define EC_CMD_FP_TEMPLATE 0x0405
+
+/* Flag in the 'size' field indicating that the full template has been sent */
+#define FP_TEMPLATE_COMMIT 0x80000000
+
+struct ec_params_fp_template {
+	uint32_t offset;
+	uint32_t size;
+	uint8_t data[];
+} __ec_align4;
+
+/* Clear the current fingerprint user context and set a new one */
+#define EC_CMD_FP_CONTEXT 0x0406
+
+struct ec_params_fp_context {
+	uint32_t userid[FP_CONTEXT_USERID_WORDS];
+} __ec_align4;
+
+#define EC_CMD_FP_STATS 0x0407
+
+#define FPSTATS_CAPTURE_INV  BIT(0)
+#define FPSTATS_MATCHING_INV BIT(1)
+
+struct ec_response_fp_stats {
+	uint32_t capture_time_us;
+	uint32_t matching_time_us;
+	uint32_t overall_time_us;
+	struct {
+		uint32_t lo;
+		uint32_t hi;
+	} overall_t0;
+	uint8_t timestamps_invalid;
+	int8_t template_matched;
+} __ec_align2;
+
+#define EC_CMD_FP_SEED 0x0408
+struct ec_params_fp_seed {
+	/*
+	 * Version of the structure format (N=3).
+	 */
+	uint16_t struct_version;
+	/* Reserved bytes, set to 0. */
+	uint16_t reserved;
+	/* Seed from the TPM. */
+	uint8_t seed[FP_CONTEXT_TPM_BYTES];
+} __ec_align4;
+
+/*****************************************************************************/
+/* Touchpad MCU commands: range 0x0500-0x05FF */
+
+/* Perform touchpad self test */
+#define EC_CMD_TP_SELF_TEST 0x0500
+
+/* Get number of frame types, and the size of each type */
+#define EC_CMD_TP_FRAME_INFO 0x0501
+
+struct ec_response_tp_frame_info {
+	uint32_t n_frames;
+	uint32_t frame_sizes[0];
+} __ec_align4;
+
+/* Create a snapshot of current frame readings */
+#define EC_CMD_TP_FRAME_SNAPSHOT 0x0502
+
+/* Read the frame */
+#define EC_CMD_TP_FRAME_GET 0x0503
+
+struct ec_params_tp_frame_get {
+	uint32_t frame_index;
+	uint32_t offset;
+	uint32_t size;
+} __ec_align4;
+
+/*****************************************************************************/
+/* EC-EC communication commands: range 0x0600-0x06FF */
+
+#define EC_COMM_TEXT_MAX 8
+
+/*
+ * Get battery static information, i.e. information that never changes, or
+ * very infrequently.
+ */
+#define EC_CMD_BATTERY_GET_STATIC 0x0600
+
+/**
+ * struct ec_params_battery_static_info - Battery static info parameters
+ * @index: Battery index.
+ */
+struct ec_params_battery_static_info {
+	uint8_t index;
+} __ec_align_size1;
+
+/**
+ * struct ec_response_battery_static_info - Battery static info response
+ * @design_capacity: Battery Design Capacity (mAh)
+ * @design_voltage: Battery Design Voltage (mV)
+ * @manufacturer: Battery Manufacturer String
+ * @model: Battery Model Number String
+ * @serial: Battery Serial Number String
+ * @type: Battery Type String
+ * @cycle_count: Battery Cycle Count
+ */
+struct ec_response_battery_static_info {
+	uint16_t design_capacity;
+	uint16_t design_voltage;
+	char manufacturer[EC_COMM_TEXT_MAX];
+	char model[EC_COMM_TEXT_MAX];
+	char serial[EC_COMM_TEXT_MAX];
+	char type[EC_COMM_TEXT_MAX];
+	/* TODO(crbug.com/795991): Consider moving to dynamic structure. */
+	uint32_t cycle_count;
+} __ec_align4;
+
+/*
+ * Get battery dynamic information, i.e. information that is likely to change
+ * every time it is read.
+ */
+#define EC_CMD_BATTERY_GET_DYNAMIC 0x0601
+
+/**
+ * struct ec_params_battery_dynamic_info - Battery dynamic info parameters
+ * @index: Battery index.
+ */
+struct ec_params_battery_dynamic_info {
+	uint8_t index;
+} __ec_align_size1;
+
+/**
+ * struct ec_response_battery_dynamic_info - Battery dynamic info response
+ * @actual_voltage: Battery voltage (mV)
+ * @actual_current: Battery current (mA); negative=discharging
+ * @remaining_capacity: Remaining capacity (mAh)
+ * @full_capacity: Capacity (mAh, might change occasionally)
+ * @flags: Flags, see EC_BATT_FLAG_*
+ * @desired_voltage: Charging voltage desired by battery (mV)
+ * @desired_current: Charging current desired by battery (mA)
+ */
+struct ec_response_battery_dynamic_info {
+	int16_t actual_voltage;
+	int16_t actual_current;
+	int16_t remaining_capacity;
+	int16_t full_capacity;
+	int16_t flags;
+	int16_t desired_voltage;
+	int16_t desired_current;
+} __ec_align2;
+
+/*
+ * Control charger chip. Used to control charger chip on the slave.
+ */
+#define EC_CMD_CHARGER_CONTROL 0x0602
+
+/**
+ * struct ec_params_charger_control - Charger control parameters
+ * @max_current: Charger current (mA). Positive to allow base to draw up to
+ *     max_current and (possibly) charge battery, negative to request current
+ *     from base (OTG).
+ * @otg_voltage: Voltage (mV) to use in OTG mode, ignored if max_current is
+ *     >= 0.
+ * @allow_charging: Allow base battery charging (only makes sense if
+ *     max_current > 0).
+ */
+struct ec_params_charger_control {
+	int16_t max_current;
+	uint16_t otg_voltage;
+	uint8_t allow_charging;
+} __ec_align_size1;
+
+/*****************************************************************************/
+/*
+ * Reserve a range of host commands for board-specific, experimental, or
+ * special purpose features. These can be (re)used without updating this file.
+ *
+ * CAUTION: Don't go nuts with this. Shipping products should document ALL
+ * their EC commands for easier development, testing, debugging, and support.
+ *
+ * All commands MUST be #defined to be 4-digit UPPER CASE hex values
+ * (e.g., 0x00AB, not 0xab) for CONFIG_HOSTCMD_SECTION_SORTED to work.
+ *
+ * In your experimental code, you may want to do something like this:
+ *
+ *   #define EC_CMD_MAGIC_FOO 0x0000
+ *   #define EC_CMD_MAGIC_BAR 0x0001
+ *   #define EC_CMD_MAGIC_HEY 0x0002
+ *
+ *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_FOO, magic_foo_handler,
+ *      EC_VER_MASK(0);
+ *
+ *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_BAR, magic_bar_handler,
+ *      EC_VER_MASK(0);
+ *
+ *   DECLARE_PRIVATE_HOST_COMMAND(EC_CMD_MAGIC_HEY, magic_hey_handler,
+ *      EC_VER_MASK(0);
+ */
+#define EC_CMD_BOARD_SPECIFIC_BASE 0x3E00
+#define EC_CMD_BOARD_SPECIFIC_LAST 0x3FFF
+
+/*
+ * Given the private host command offset, calculate the true private host
+ * command value.
+ */
+#define EC_PRIVATE_HOST_COMMAND_VALUE(command) \
+	(EC_CMD_BOARD_SPECIFIC_BASE + (command))
+
+/*****************************************************************************/
+/*
+ * Passthru commands
+ *
+ * Some platforms have sub-processors chained to each other.  For example.
+ *
+ *     AP <--> EC <--> PD MCU
+ *
+ * The top 2 bits of the command number are used to indicate which device the
+ * command is intended for.  Device 0 is always the device receiving the
+ * command; other device mapping is board-specific.
+ *
+ * When a device receives a command to be passed to a sub-processor, it passes
+ * it on with the device number set back to 0.  This allows the sub-processor
+ * to remain blissfully unaware of whether the command originated on the next
+ * device up the chain, or was passed through from the AP.
+ *
+ * In the above example, if the AP wants to send command 0x0002 to the PD MCU,
+ *     AP sends command 0x4002 to the EC
+ *     EC sends command 0x0002 to the PD MCU
+ *     EC forwards PD MCU response back to the AP
+ */
+
+/* Offset and max command number for sub-device n */
+#define EC_CMD_PASSTHRU_OFFSET(n) (0x4000 * (n))
+#define EC_CMD_PASSTHRU_MAX(n) (EC_CMD_PASSTHRU_OFFSET(n) + 0x3fff)
+
+/*****************************************************************************/
+/*
+ * Deprecated constants. These constants have been renamed for clarity. The
+ * meaning and size has not changed. Programs that use the old names should
+ * switch to the new names soon, as the old names may not be carried forward
+ * forever.
+ */
+#define EC_HOST_PARAM_SIZE      EC_PROTO2_MAX_PARAM_SIZE
+#define EC_LPC_ADDR_OLD_PARAM   EC_HOST_CMD_REGION1
+#define EC_OLD_PARAM_SIZE       EC_HOST_CMD_REGION_SIZE
+
+
+
+#endif  /* __CROS_EC_COMMANDS_H */
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
new file mode 100644
index 000000000000..eab7036cda09
--- /dev/null
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -0,0 +1,319 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ChromeOS Embedded Controller protocol interface.
+ *
+ * Copyright (C) 2012 Google, Inc
+ */
+
+#ifndef __LINUX_CROS_EC_PROTO_H
+#define __LINUX_CROS_EC_PROTO_H
+
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+
+#include <linux/platform_data/cros_ec_commands.h>
+
+#define CROS_EC_DEV_NAME	"cros_ec"
+#define CROS_EC_DEV_FP_NAME	"cros_fp"
+#define CROS_EC_DEV_ISH_NAME	"cros_ish"
+#define CROS_EC_DEV_PD_NAME	"cros_pd"
+#define CROS_EC_DEV_SCP_NAME	"cros_scp"
+#define CROS_EC_DEV_TP_NAME	"cros_tp"
+
+/*
+ * The EC is unresponsive for a time after a reboot command.  Add a
+ * simple delay to make sure that the bus stays locked.
+ */
+#define EC_REBOOT_DELAY_MS		50
+
+/*
+ * Max bus-specific overhead incurred by request/responses.
+ * I2C requires 1 additional byte for requests.
+ * I2C requires 2 additional bytes for responses.
+ * SPI requires up to 32 additional bytes for responses.
+ */
+#define EC_PROTO_VERSION_UNKNOWN	0
+#define EC_MAX_REQUEST_OVERHEAD		1
+#define EC_MAX_RESPONSE_OVERHEAD	32
+
+/*
+ * Command interface between EC and AP, for LPC, I2C and SPI interfaces.
+ */
+enum {
+	EC_MSG_TX_HEADER_BYTES	= 3,
+	EC_MSG_TX_TRAILER_BYTES	= 1,
+	EC_MSG_TX_PROTO_BYTES	= EC_MSG_TX_HEADER_BYTES +
+				  EC_MSG_TX_TRAILER_BYTES,
+	EC_MSG_RX_PROTO_BYTES	= 3,
+
+	/* Max length of messages for proto 2*/
+	EC_PROTO2_MSG_BYTES	= EC_PROTO2_MAX_PARAM_SIZE +
+				  EC_MSG_TX_PROTO_BYTES,
+
+	EC_MAX_MSG_BYTES	= 64 * 1024,
+};
+
+/**
+ * struct cros_ec_command - Information about a ChromeOS EC command.
+ * @version: Command version number (often 0).
+ * @command: Command to send (EC_CMD_...).
+ * @outsize: Outgoing length in bytes.
+ * @insize: Max number of bytes to accept from the EC.
+ * @result: EC's response to the command (separate from communication failure).
+ * @data: Where to put the incoming data from EC and outgoing data to EC.
+ */
+struct cros_ec_command {
+	uint32_t version;
+	uint32_t command;
+	uint32_t outsize;
+	uint32_t insize;
+	uint32_t result;
+	uint8_t data[0];
+};
+
+/**
+ * struct cros_ec_device - Information about a ChromeOS EC device.
+ * @phys_name: Name of physical comms layer (e.g. 'i2c-4').
+ * @dev: Device pointer for physical comms device
+ * @was_wake_device: True if this device was set to wake the system from
+ *                   sleep at the last suspend.
+ * @cros_class: The class structure for this device.
+ * @cmd_readmem: Direct read of the EC memory-mapped region, if supported.
+ *     @offset: Is within EC_LPC_ADDR_MEMMAP region.
+ *     @bytes: Number of bytes to read. zero means "read a string" (including
+ *             the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be
+ *             read. Caller must ensure that the buffer is large enough for the
+ *             result when reading a string.
+ * @max_request: Max size of message requested.
+ * @max_response: Max size of message response.
+ * @max_passthru: Max sice of passthru message.
+ * @proto_version: The protocol version used for this device.
+ * @priv: Private data.
+ * @irq: Interrupt to use.
+ * @id: Device id.
+ * @din: Input buffer (for data from EC). This buffer will always be
+ *       dword-aligned and include enough space for up to 7 word-alignment
+ *       bytes also, so we can ensure that the body of the message is always
+ *       dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *       happy. Probably word alignment would be OK, there might be a small
+ *       performance advantage to using dword.
+ * @dout: Output buffer (for data to EC). This buffer will always be
+ *        dword-aligned and include enough space for up to 7 word-alignment
+ *        bytes also, so we can ensure that the body of the message is always
+ *        dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *        happy. Probably word alignment would be OK, there might be a small
+ *        performance advantage to using dword.
+ * @din_size: Size of din buffer to allocate (zero to use static din).
+ * @dout_size: Size of dout buffer to allocate (zero to use static dout).
+ * @wake_enabled: True if this device can wake the system from sleep.
+ * @suspended: True if this device had been suspended.
+ * @cmd_xfer: Send command to EC and get response.
+ *            Returns the number of bytes received if the communication
+ *            succeeded, but that doesn't mean the EC was happy with the
+ *            command. The caller should check msg.result for the EC's result
+ *            code.
+ * @pkt_xfer: Send packet to EC and get response.
+ * @lock: One transaction at a time.
+ * @mkbp_event_supported: True if this EC supports the MKBP event protocol.
+ * @host_sleep_v1: True if this EC supports the sleep v1 command.
+ * @event_notifier: Interrupt event notifier for transport devices.
+ * @event_data: Raw payload transferred with the MKBP event.
+ * @event_size: Size in bytes of the event data.
+ * @host_event_wake_mask: Mask of host events that cause wake from suspend.
+ * @ec: The platform_device used by the mfd driver to interface with the
+ *      main EC.
+ * @pd: The platform_device used by the mfd driver to interface with the
+ *      PD behind an EC.
+ */
+struct cros_ec_device {
+	/* These are used by other drivers that want to talk to the EC */
+	const char *phys_name;
+	struct device *dev;
+	bool was_wake_device;
+	struct class *cros_class;
+	int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset,
+			   unsigned int bytes, void *dest);
+
+	/* These are used to implement the platform-specific interface */
+	u16 max_request;
+	u16 max_response;
+	u16 max_passthru;
+	u16 proto_version;
+	void *priv;
+	int irq;
+	u8 *din;
+	u8 *dout;
+	int din_size;
+	int dout_size;
+	bool wake_enabled;
+	bool suspended;
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
+	int (*pkt_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
+	struct mutex lock;
+	bool mkbp_event_supported;
+	bool host_sleep_v1;
+	struct blocking_notifier_head event_notifier;
+
+	struct ec_response_get_next_event_v1 event_data;
+	int event_size;
+	u32 host_event_wake_mask;
+	u32 last_resume_result;
+
+	/* The platform devices used by the mfd driver */
+	struct platform_device *ec;
+	struct platform_device *pd;
+};
+
+/**
+ * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information.
+ * @sensor_num: Id of the sensor, as reported by the EC.
+ */
+struct cros_ec_sensor_platform {
+	u8 sensor_num;
+};
+
+/**
+ * struct cros_ec_platform - ChromeOS EC platform information.
+ * @ec_name: Name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
+ *           used in /dev/ and sysfs.
+ * @cmd_offset: Offset to apply for each command. Set when
+ *              registering a device behind another one.
+ */
+struct cros_ec_platform {
+	const char *ec_name;
+	u16 cmd_offset;
+};
+
+/**
+ * cros_ec_suspend() - Handle a suspend operation for the ChromeOS EC device.
+ * @ec_dev: Device to suspend.
+ *
+ * This can be called by drivers to handle a suspend event.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_suspend(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_resume() - Handle a resume operation for the ChromeOS EC device.
+ * @ec_dev: Device to resume.
+ *
+ * This can be called by drivers to handle a resume event.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_resume(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_prepare_tx() - Prepare an outgoing message in the output buffer.
+ * @ec_dev: Device to register.
+ * @msg: Message to write.
+ *
+ * This is intended to be used by all ChromeOS EC drivers, but at present
+ * only SPI uses it. Once LPC uses the same protocol it can start using it.
+ * I2C could use it now, with a refactor of the existing code.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
+		       struct cros_ec_command *msg);
+
+/**
+ * cros_ec_check_result() - Check ec_msg->result.
+ * @ec_dev: EC device.
+ * @msg: Message to check.
+ *
+ * This is used by ChromeOS EC drivers to check the ec_msg->result for
+ * errors and to warn about them.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_check_result(struct cros_ec_device *ec_dev,
+			 struct cros_ec_command *msg);
+
+/**
+ * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
+ *
+ * Call this to send a command to the ChromeOS EC.  This should be used
+ * instead of calling the EC's cmd_xfer() callback directly.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+		     struct cros_ec_command *msg);
+
+/**
+ * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
+ *
+ * This function is identical to cros_ec_cmd_xfer, except it returns success
+ * status only if both the command was transmitted successfully and the EC
+ * replied with success status. It's not necessary to check msg->result when
+ * using this function.
+ *
+ * Return: The number of bytes transferred on success or negative error code.
+ */
+int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
+			    struct cros_ec_command *msg);
+
+/**
+ * cros_ec_register() - Register a new ChromeOS EC, using the provided info.
+ * @ec_dev: Device to register.
+ *
+ * Before calling this, allocate a pointer to a new device and then fill
+ * in all the fields up to the --private-- marker.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_register(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_unregister() - Remove a ChromeOS EC.
+ * @ec_dev: Device to unregister.
+ *
+ * Call this to deregister a ChromeOS EC, then clean up any private data.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_unregister(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_query_all() -  Query the protocol version supported by the
+ *         ChromeOS EC.
+ * @ec_dev: Device to register.
+ *
+ * Return: 0 on success or negative error code.
+ */
+int cros_ec_query_all(struct cros_ec_device *ec_dev);
+
+/**
+ * cros_ec_get_next_event() - Fetch next event from the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
+ * @wake_event: Pointer to a bool set to true upon return if the event might be
+ *              treated as a wake event. Ignored if null.
+ *
+ * Return: negative error code on errors; 0 for no data; or else number of
+ * bytes received (i.e., an event was retrieved successfully). Event types are
+ * written out to @ec_dev->event_data.event_type on success.
+ */
+int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
+
+/**
+ * cros_ec_get_host_event() - Return a mask of event set by the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
+ *
+ * When MKBP is supported, when the EC raises an interrupt, we collect the
+ * events raised and call the functions in the ec notifier. This function
+ * is a helper to know which events are raised.
+ *
+ * Return: 0 on error or non-zero bitmask of one or more EC_HOST_EVENT_*.
+ */
+u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
+
+#endif /* __LINUX_CROS_EC_PROTO_H */
diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c
index 85beef265cc8..3c1bd24a1057 100644
--- a/sound/soc/codecs/cros_ec_codec.c
+++ b/sound/soc/codecs/cros_ec_codec.c
@@ -9,9 +9,9 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
-#include <linux/mfd/cros_ec.h>
-#include <linux/mfd/cros_ec_commands.h>
 #include <linux/module.h>
+#include <linux/platform_data/cros_ec_commands.h>
+#include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
-- 
cgit v1.2.3


From 8b6a666a97544bf307190a05947742b8357aa962 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 20 May 2019 08:48:46 +0100
Subject: afs: Provide an RCU-capable key lookup

Provide an RCU-capable key lookup function.  We don't want to call
afs_request_key() in RCU-mode pathwalk as request_key() might sleep, even if
we don't ask it to construct anything as it might find a key that is currently
undergoing construction.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/internal.h   |  1 +
 fs/afs/security.c   | 33 +++++++++++++++++++++++++++++++--
 include/linux/key.h | 14 +++++++++++++-
 3 files changed, 45 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index f66a3be12fd6..9cdfabaeaa0b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1217,6 +1217,7 @@ extern void afs_cache_permit(struct afs_vnode *, struct key *, unsigned int,
 			     struct afs_status_cb *);
 extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
+extern struct key *afs_request_key_rcu(struct afs_cell *);
 extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
 extern int afs_permission(struct inode *, int);
 extern void __exit afs_clean_up_permit_cache(void);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 71e71c07568f..ef2fd34ba282 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -27,8 +27,37 @@ struct key *afs_request_key(struct afs_cell *cell)
 	_enter("{%x}", key_serial(cell->anonymous_key));
 
 	_debug("key %s", cell->anonymous_key->description);
-	key = request_key(&key_type_rxrpc, cell->anonymous_key->description,
-			  NULL);
+	key = request_key_net(&key_type_rxrpc, cell->anonymous_key->description,
+			      cell->net->net, NULL);
+	if (IS_ERR(key)) {
+		if (PTR_ERR(key) != -ENOKEY) {
+			_leave(" = %ld", PTR_ERR(key));
+			return key;
+		}
+
+		/* act as anonymous user */
+		_leave(" = {%x} [anon]", key_serial(cell->anonymous_key));
+		return key_get(cell->anonymous_key);
+	} else {
+		/* act as authorised user */
+		_leave(" = {%x} [auth]", key_serial(key));
+		return key;
+	}
+}
+
+/*
+ * Get a key when pathwalk is in rcuwalk mode.
+ */
+struct key *afs_request_key_rcu(struct afs_cell *cell)
+{
+	struct key *key;
+
+	_enter("{%x}", key_serial(cell->anonymous_key));
+
+	_debug("key %s", cell->anonymous_key->description);
+	key = request_key_net_rcu(&key_type_rxrpc,
+				  cell->anonymous_key->description,
+				  cell->net->net);
 	if (IS_ERR(key)) {
 		if (PTR_ERR(key) != -ENOKEY) {
 			_leave(" = %ld", PTR_ERR(key));
diff --git a/include/linux/key.h b/include/linux/key.h
index 50028338a4cc..6cf8e71cf8b7 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -324,7 +324,7 @@ static inline struct key *request_key(struct key_type *type,
 }
 
 #ifdef CONFIG_NET
-/*
+/**
  * request_key_net - Request a key for a net namespace and wait for construction
  * @type: Type of key.
  * @description: The searchable description of the key.
@@ -341,6 +341,18 @@ static inline struct key *request_key(struct key_type *type,
  */
 #define request_key_net(type, description, net, callout_info) \
 	request_key_tag(type, description, net->key_domain, callout_info);
+
+/**
+ * request_key_net_rcu - Request a key for a net namespace under RCU conditions
+ * @type: Type of key.
+ * @description: The searchable description of the key.
+ * @net: The network namespace that is the key's domain of operation.
+ *
+ * As for request_key_rcu() except that only keys that operate the specified
+ * network namespace are used.
+ */
+#define request_key_net_rcu(type, description, net) \
+	request_key_rcu(type, description, net->key_domain);
 #endif /* CONFIG_NET */
 
 extern int wait_for_key_construction(struct key *key, bool intr);
-- 
cgit v1.2.3


From d0087e72710ca7d1b309bf427286da58418ea89e Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 30 Aug 2019 09:17:37 +0200
Subject: regulator: provide regulator_bulk_set_supply_names()

There are many regulator consumers who - before using the regulator
bulk functions - set the supply names in regulator_bulk_data using
a for loop.

Let's provide a simple helper in the consumer API that allows users
to do the same with a single function call.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/20190830071740.4267-2-brgl@bgdev.pl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/helpers.c        | 21 +++++++++++++++++++++
 include/linux/regulator/consumer.h | 12 ++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 4986cc5064a1..ca3dc3f3bb29 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -860,3 +860,24 @@ int regulator_get_current_limit_regmap(struct regulator_dev *rdev)
 	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(regulator_get_current_limit_regmap);
+
+/**
+ * regulator_bulk_set_supply_names - initialize the 'supply' fields in an array
+ *                                   of regulator_bulk_data structs
+ *
+ * @consumers: array of regulator_bulk_data entries to initialize
+ * @supply_names: array of supply name strings
+ * @num_supplies: number of supply names to initialize
+ *
+ * Note: the 'consumers' array must be the size of 'num_supplies'.
+ */
+void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
+				     const char *const *supply_names,
+				     unsigned int num_supplies)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_supplies; i++)
+		consumers[i].supply = supply_names[i];
+}
+EXPORT_SYMBOL_GPL(regulator_bulk_set_supply_names);
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 815983419375..6d2181a76987 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -281,6 +281,12 @@ void devm_regulator_unregister_notifier(struct regulator *regulator,
 void *regulator_get_drvdata(struct regulator *regulator);
 void regulator_set_drvdata(struct regulator *regulator, void *data);
 
+/* misc helpers */
+
+void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
+				     const char *const *supply_names,
+				     unsigned int num_supplies);
+
 #else
 
 /*
@@ -580,6 +586,12 @@ static inline int regulator_list_voltage(struct regulator *regulator, unsigned s
 	return -EINVAL;
 }
 
+void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
+				     const char *const *supply_names,
+				     unsigned int num_supplies)
+{
+}
+
 #endif
 
 static inline int regulator_set_voltage_triplet(struct regulator *regulator,
-- 
cgit v1.2.3


From 95c267dd20431f0eb54ca204bd73a7d85c532a37 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Fri, 23 Aug 2019 09:58:52 +0900
Subject: ASoC: soc-core: add snd_soc_dapm_init()

It is easy to read code if it is cleanly using paired function/naming,
like start <-> stop, register <-> unregister, etc, etc.
But, current ALSA SoC code is very random, unbalance, not paired, etc.
It is easy to create bug at the such code, and it will be difficult to
debug.

soc-dapm has snd_soc_dapm_free() which cleanups debugfs, widgets, list.
But, there is no paired initialize function.
This patch adds snd_soc_dapm_init() and initilaizing dapm

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87pnkw7lbj.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc-dapm.h |  3 +++
 sound/soc/soc-core.c     | 14 ++------------
 sound/soc/soc-dapm.c     | 21 +++++++++++++++++++++
 3 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h
index 2aa73d6dd7be..dd993dd29229 100644
--- a/include/sound/soc-dapm.h
+++ b/include/sound/soc-dapm.h
@@ -416,6 +416,9 @@ int snd_soc_dapm_update_dai(struct snd_pcm_substream *substream,
 /* dapm path setup */
 int snd_soc_dapm_new_widgets(struct snd_soc_card *card);
 void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm);
+void snd_soc_dapm_init(struct snd_soc_dapm_context *dapm,
+		       struct snd_soc_card *card,
+		       struct snd_soc_component *component);
 int snd_soc_dapm_add_routes(struct snd_soc_dapm_context *dapm,
 			    const struct snd_soc_dapm_route *route, int num);
 int snd_soc_dapm_del_routes(struct snd_soc_dapm_context *dapm,
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 21c005a044e8..8e831ae59eb8 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1023,14 +1023,7 @@ static int soc_probe_component(struct snd_soc_card *card,
 
 	soc_init_component_debugfs(component);
 
-	INIT_LIST_HEAD(&dapm->list);
-	dapm->card		= card;
-	dapm->dev		= component->dev;
-	dapm->component		= component;
-	dapm->bias_level	= SND_SOC_BIAS_OFF;
-	dapm->idle_bias_off	= !component->driver->idle_bias_on;
-	dapm->suspend_bias_off	= component->driver->suspend_bias_off;
-	list_add(&dapm->list, &card->dapm_list);
+	snd_soc_dapm_init(dapm, card, component);
 
 	ret = snd_soc_dapm_new_controls(dapm,
 					component->driver->dapm_widgets,
@@ -1937,10 +1930,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
 	}
 	mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT);
 
-	card->dapm.bias_level = SND_SOC_BIAS_OFF;
-	card->dapm.dev = card->dev;
-	card->dapm.card = card;
-	list_add(&card->dapm.list, &card->dapm_list);
+	snd_soc_dapm_init(&card->dapm, card, NULL);
 
 	/* check whether any platform is ignore machine FE and using topology */
 	soc_check_tplg_fes(card);
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 10819b3e0b98..b6378f025836 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -4717,6 +4717,27 @@ void snd_soc_dapm_free(struct snd_soc_dapm_context *dapm)
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_free);
 
+void snd_soc_dapm_init(struct snd_soc_dapm_context *dapm,
+		       struct snd_soc_card *card,
+		       struct snd_soc_component *component)
+{
+	dapm->card		= card;
+	dapm->component		= component;
+	dapm->bias_level	= SND_SOC_BIAS_OFF;
+
+	if (component) {
+		dapm->dev		= component->dev;
+		dapm->idle_bias_off	= !component->driver->idle_bias_on,
+		dapm->suspend_bias_off	= component->driver->suspend_bias_off;
+	} else {
+		dapm->dev		= card->dev;
+	}
+
+	INIT_LIST_HEAD(&dapm->list);
+	list_add(&dapm->list, &card->dapm_list);
+}
+EXPORT_SYMBOL_GPL(snd_soc_dapm_init);
+
 static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm)
 {
 	struct snd_soc_card *card = dapm->card;
-- 
cgit v1.2.3


From d55c028f8b25bdaaba9ae08026052b5b44d031b0 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Tue, 9 Jul 2019 12:00:05 -0700
Subject: backlight: Expose brightness curve type through sysfs

Backlight brightness curves can have different shapes. The two main
types are linear and non-linear curves. The human eye doesn't
perceive linearly increasing/decreasing brightness as linear (see
also 88ba95bedb79 "backlight: pwm_bl: Compute brightness of LED
linearly to human eye"), hence many backlights use non-linear (often
logarithmic) brightness curves. The type of curve currently is opaque
to userspace, so userspace often uses more or less reliable heuristics
(like the number of brightness levels) to decide whether to treat a
backlight device as linear or non-linear.

Export the type of the brightness curve via the new sysfs attribute
'scale'. The value of the attribute can be 'linear', 'non-linear' or
'unknown'. For devices that don't provide information about the scale
of their brightness curve the value of the 'scale' attribute is 'unknown'.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/ABI/testing/sysfs-class-backlight | 26 +++++++++++++++++++++++++
 MAINTAINERS                                     |  1 +
 drivers/video/backlight/backlight.c             | 19 ++++++++++++++++++
 include/linux/backlight.h                       |  8 ++++++++
 4 files changed, 54 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-backlight

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-class-backlight b/Documentation/ABI/testing/sysfs-class-backlight
new file mode 100644
index 000000000000..3ab175a3f5cb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight
@@ -0,0 +1,26 @@
+What:		/sys/class/backlight/<backlight>/scale
+Date:		July 2019
+KernelVersion:	5.4
+Contact:	Daniel Thompson <daniel.thompson@linaro.org>
+Description:
+		Description of the scale of the brightness curve.
+
+		The human eye senses brightness approximately logarithmically,
+		hence linear changes in brightness are perceived as being
+		non-linear. To achieve a linear perception of brightness changes
+		controls like sliders need to apply a logarithmic mapping for
+		backlights with a linear brightness curve.
+
+		Possible values of the attribute are:
+
+		unknown
+		  The scale of the brightness curve is unknown.
+
+		linear
+		  The brightness changes linearly with each step. Brightness
+		  controls should apply a logarithmic mapping for a linear
+		  perception.
+
+		non-linear
+		  The brightness changes non-linearly with each step. Brightness
+		  controls should use a linear mapping for a linear perception.
diff --git a/MAINTAINERS b/MAINTAINERS
index 5d7bb3024e13..fe75bb96a317 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2907,6 +2907,7 @@ F:	include/linux/backlight.h
 F:	include/linux/pwm_backlight.h
 F:	Documentation/devicetree/bindings/leds/backlight
 F:	Documentation/ABI/stable/sysfs-class-backlight
+F:	Documentation/ABI/testing/sysfs-class-backlight
 
 BATMAN ADVANCED
 M:	Marek Lindner <mareklindner@neomailbox.ch>
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 5dc07106a59e..cac3e35d7630 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -32,6 +32,12 @@ static const char *const backlight_types[] = {
 	[BACKLIGHT_FIRMWARE] = "firmware",
 };
 
+static const char *const backlight_scale_types[] = {
+	[BACKLIGHT_SCALE_UNKNOWN]	= "unknown",
+	[BACKLIGHT_SCALE_LINEAR]	= "linear",
+	[BACKLIGHT_SCALE_NON_LINEAR]	= "non-linear",
+};
+
 #if defined(CONFIG_FB) || (defined(CONFIG_FB_MODULE) && \
 			   defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE))
 /* This callback gets called when something important happens inside a
@@ -246,6 +252,18 @@ static ssize_t actual_brightness_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(actual_brightness);
 
+static ssize_t scale_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct backlight_device *bd = to_backlight_device(dev);
+
+	if (WARN_ON(bd->props.scale > BACKLIGHT_SCALE_NON_LINEAR))
+		return sprintf(buf, "unknown\n");
+
+	return sprintf(buf, "%s\n", backlight_scale_types[bd->props.scale]);
+}
+static DEVICE_ATTR_RO(scale);
+
 static struct class *backlight_class;
 
 #ifdef CONFIG_PM_SLEEP
@@ -292,6 +310,7 @@ static struct attribute *bl_device_attrs[] = {
 	&dev_attr_brightness.attr,
 	&dev_attr_actual_brightness.attr,
 	&dev_attr_max_brightness.attr,
+	&dev_attr_scale.attr,
 	&dev_attr_type.attr,
 	NULL,
 };
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 0b5897446dca..c7d6b2e8c3b5 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -46,6 +46,12 @@ enum backlight_notification {
 	BACKLIGHT_UNREGISTERED,
 };
 
+enum backlight_scale {
+	BACKLIGHT_SCALE_UNKNOWN = 0,
+	BACKLIGHT_SCALE_LINEAR,
+	BACKLIGHT_SCALE_NON_LINEAR,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -80,6 +86,8 @@ struct backlight_properties {
 	enum backlight_type type;
 	/* Flags used to signal drivers of state changes */
 	unsigned int state;
+	/* Type of the brightness scale (linear, non-linear, ...) */
+	enum backlight_scale scale;
 
 #define BL_CORE_SUSPENDED	(1 << 0)	/* backlight is suspended */
 #define BL_CORE_FBBLANK		(1 << 1)	/* backlight is under an fb blank event */
-- 
cgit v1.2.3


From a1b70a44b80af641a441937803cb8251e8e6d8e3 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 8 Aug 2019 11:03:59 -0700
Subject: Input: bu21013_ts - convert to use GPIO descriptors

This driver can use GPIO descriptors rather than GPIO numbers
without any problems, convert it. Name the field variables after
the actual pins on the chip rather than the "reset" and "touch"
names from the devicetree bindings that are vaguely inaccurate.

No in-tree users pass GPIO numbers in platform data so drop
this. Descriptor tables can be used to get these GPIOs from a board
file if need be.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../bindings/input/touchscreen/bu21013.txt         |  5 +-
 drivers/input/touchscreen/bu21013_ts.c             | 86 ++++++++++------------
 include/linux/input/bu21013.h                      |  4 -
 3 files changed, 41 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
index 56d835242af2..43899fc36ecf 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -2,10 +2,11 @@
 
 Required properties:
  - compatible              : "rohm,bu21013_tp"
- - reg                     :  I2C device address
+ - reg                     : I2C device address
+ - reset-gpios             : GPIO pin enabling (selecting) chip (CS)
 
 Optional properties:
- - touch-gpio              : GPIO pin registering a touch event
+ - touch-gpios             : GPIO pin registering a touch event
  - <supply_name>-supply    : Phandle to a regulator supply
  - rohm,touch-max-x        : Maximum outward permitted limit in the X axis
  - rohm,touch-max-y        : Maximum outward permitted limit in the Y axis
diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
index 1d703e230ac3..c20f86f98ffc 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -14,11 +14,9 @@
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
 #include <linux/module.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
-#define PEN_DOWN_INTR	0
 #define MAX_FINGERS	2
 #define RESET_DELAY	30
 #define PENUP_TIMEOUT	(10)
@@ -143,8 +141,9 @@
  * @touch_stopped: touch stop flag
  * @chip: pointer to the touch panel controller
  * @in_dev: pointer to the input device structure
- * @intr_pin: interrupt pin value
  * @regulator: pointer to the Regulator used for touch screen
+ * @cs_gpiod: chip select GPIO line
+ * @int_gpiod: touch interrupt GPIO line
  *
  * Touch panel device data structure
  */
@@ -154,8 +153,9 @@ struct bu21013_ts_data {
 	const struct bu21013_platform_device *chip;
 	struct input_dev *in_dev;
 	struct regulator *regulator;
+	struct gpio_desc *cs_gpiod;
+	struct gpio_desc *int_gpiod;
 	unsigned int irq;
-	unsigned int intr_pin;
 	bool touch_stopped;
 };
 
@@ -257,20 +257,21 @@ static irqreturn_t bu21013_gpio_irq(int irq, void *device_data)
 {
 	struct bu21013_ts_data *data = device_data;
 	struct i2c_client *i2c = data->client;
+	int keep_polling;
 	int retval;
 
 	do {
 		retval = bu21013_do_touch_report(data);
 		if (retval < 0) {
 			dev_err(&i2c->dev, "bu21013_do_touch_report failed\n");
-			return IRQ_NONE;
+			break;
 		}
 
-		data->intr_pin = gpio_get_value(data->chip->touch_pin);
-		if (data->intr_pin == PEN_DOWN_INTR)
+		keep_polling = gpiod_get_value(data->int_gpiod);
+		if (keep_polling)
 			wait_event_timeout(data->wait, data->touch_stopped,
 					   msecs_to_jiffies(2));
-	} while (!data->intr_pin && !data->touch_stopped);
+	} while (keep_polling && !data->touch_stopped);
 
 	return IRQ_HANDLED;
 }
@@ -425,28 +426,6 @@ static void bu21013_free_irq(struct bu21013_ts_data *bu21013_data)
 	free_irq(bu21013_data->irq, bu21013_data);
 }
 
-/**
- * bu21013_cs_disable() - deconfigures the touch panel controller
- * @bu21013_data: device structure pointer
- *
- * This function is used to deconfigure the chip selection
- * for touch panel controller.
- */
-static void bu21013_cs_disable(struct bu21013_ts_data *bu21013_data)
-{
-	int error;
-
-	error = gpio_direction_output(bu21013_data->chip->cs_pin, 0);
-	if (error < 0)
-		dev_warn(&bu21013_data->client->dev,
-			 "%s: gpio direction failed, error: %d\n",
-			 __func__, error);
-	else
-		gpio_set_value(bu21013_data->chip->cs_pin, 0);
-
-	gpio_free(bu21013_data->chip->cs_pin);
-}
-
 #ifdef CONFIG_OF
 static const struct bu21013_platform_device *
 bu21013_parse_dt(struct device *dev)
@@ -471,9 +450,6 @@ bu21013_parse_dt(struct device *dev)
 	of_property_read_u32(np, "rohm,touch-max-x", &pdata->touch_x_max);
 	of_property_read_u32(np, "rohm,touch-max-y", &pdata->touch_y_max);
 
-	pdata->touch_pin = of_get_named_gpio(np, "touch-gpio", 0);
-	pdata->cs_pin = of_get_named_gpio(np, "reset-gpio", 0);
-
 	pdata->ext_clk = false;
 
 	return pdata;
@@ -516,11 +492,6 @@ static int bu21013_probe(struct i2c_client *client,
 			return PTR_ERR(pdata);
 	}
 
-	if (!gpio_is_valid(pdata->touch_pin)) {
-		dev_err(&client->dev, "invalid touch_pin supplied\n");
-		return -EINVAL;
-	}
-
 	bu21013_data = kzalloc(sizeof(struct bu21013_ts_data), GFP_KERNEL);
 	in_dev = input_allocate_device();
 	if (!bu21013_data || !in_dev) {
@@ -529,16 +500,26 @@ static int bu21013_probe(struct i2c_client *client,
 		goto err_free_mem;
 	}
 
+	/* Named "INT" on the chip, DT binding is "touch" */
+	bu21013_data->int_gpiod = gpiod_get(&client->dev, "touch", GPIOD_IN);
+	error = PTR_ERR_OR_ZERO(bu21013_data->int_gpiod);
+	if (error) {
+		if (error != -EPROBE_DEFER)
+			dev_err(&client->dev, "failed to get INT GPIO\n");
+		goto err_free_mem;
+	}
+	gpiod_set_consumer_name(bu21013_data->int_gpiod, "BU21013 INT");
+
 	bu21013_data->in_dev = in_dev;
 	bu21013_data->chip = pdata;
 	bu21013_data->client = client;
-	bu21013_data->irq = gpio_to_irq(pdata->touch_pin);
+	bu21013_data->irq = gpiod_to_irq(bu21013_data->int_gpiod);
 
 	bu21013_data->regulator = regulator_get(&client->dev, "avdd");
 	if (IS_ERR(bu21013_data->regulator)) {
 		dev_err(&client->dev, "regulator_get failed\n");
 		error = PTR_ERR(bu21013_data->regulator);
-		goto err_free_mem;
+		goto err_put_int_gpio;
 	}
 
 	error = regulator_enable(bu21013_data->regulator);
@@ -550,13 +531,16 @@ static int bu21013_probe(struct i2c_client *client,
 	bu21013_data->touch_stopped = false;
 	init_waitqueue_head(&bu21013_data->wait);
 
-	/* configure the gpio pins */
-	error = gpio_request_one(pdata->cs_pin, GPIOF_OUT_INIT_HIGH,
-				 "touchp_reset");
-	if (error < 0) {
-		dev_err(&client->dev, "Unable to request gpio reset_pin\n");
+	/* Named "CS" on the chip, DT binding is "reset" */
+	bu21013_data->cs_gpiod = gpiod_get(&client->dev, "reset",
+					   GPIOD_OUT_HIGH);
+	error = PTR_ERR_OR_ZERO(bu21013_data->cs_gpiod);
+	if (error) {
+		if (error != -EPROBE_DEFER)
+			dev_err(&client->dev, "failed to get CS GPIO\n");
 		goto err_disable_regulator;
 	}
+	gpiod_set_consumer_name(bu21013_data->cs_gpiod, "BU21013 CS");
 
 	/* configure the touch panel controller */
 	error = bu21013_init_chip(bu21013_data);
@@ -604,11 +588,14 @@ static int bu21013_probe(struct i2c_client *client,
 err_free_irq:
 	bu21013_free_irq(bu21013_data);
 err_cs_disable:
-	bu21013_cs_disable(bu21013_data);
+	gpiod_set_value(bu21013_data->cs_gpiod, 0);
+	gpiod_put(bu21013_data->cs_gpiod);
 err_disable_regulator:
 	regulator_disable(bu21013_data->regulator);
 err_put_regulator:
 	regulator_put(bu21013_data->regulator);
+err_put_int_gpio:
+	gpiod_put(bu21013_data->int_gpiod);
 err_free_mem:
 	input_free_device(in_dev);
 	kfree(bu21013_data);
@@ -628,13 +615,16 @@ static int bu21013_remove(struct i2c_client *client)
 
 	bu21013_free_irq(bu21013_data);
 
-	bu21013_cs_disable(bu21013_data);
+	gpiod_set_value(bu21013_data->cs_gpiod, 0);
+	gpiod_put(bu21013_data->cs_gpiod);
 
 	input_unregister_device(bu21013_data->in_dev);
 
 	regulator_disable(bu21013_data->regulator);
 	regulator_put(bu21013_data->regulator);
 
+	gpiod_put(bu21013_data->int_gpiod);
+
 	kfree(bu21013_data);
 
 	return 0;
diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
index 7e5b7e978e8a..58b1a9d44443 100644
--- a/include/linux/input/bu21013.h
+++ b/include/linux/input/bu21013.h
@@ -11,8 +11,6 @@
  * struct bu21013_platform_device - Handle the platform data
  * @touch_x_max: touch x max
  * @touch_y_max: touch y max
- * @cs_pin: chip select pin
- * @touch_pin: touch gpio pin
  * @ext_clk: external clock flag
  * @x_flip: x flip flag
  * @y_flip: y flip flag
@@ -23,8 +21,6 @@
 struct bu21013_platform_device {
 	int touch_x_max;
 	int touch_y_max;
-	unsigned int cs_pin;
-	unsigned int touch_pin;
 	bool ext_clk;
 	bool x_flip;
 	bool y_flip;
-- 
cgit v1.2.3


From 1eb7b4cacc01771ae42fcbc5ae9a4bc1d13c1dbc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 8 Aug 2019 12:09:43 -0700
Subject: Input: bu21013_ts - remove support for platform data

There are no current users of the platform data in the tree, and
any new users should either use device tree, or static device
properties to describe the device.

This change drop the platform data definition and handling and moves the
driver over to generic device properties API. We also drop support for the
external clock. If it is needed we will have to extend the bindings to
supply the clock reference and handle it properly in the driver.

Also, wakeup setting should be coming from I2C client.

Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/bu21013_ts.c | 109 +++++++++++----------------------
 include/linux/input/bu21013.h          |  30 ---------
 2 files changed, 37 insertions(+), 102 deletions(-)
 delete mode 100644 include/linux/input/bu21013.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/bu21013_ts.c b/drivers/input/touchscreen/bu21013_ts.c
index 2b8538a63945..f3b3e4c72c84 100644
--- a/drivers/input/touchscreen/bu21013_ts.c
+++ b/drivers/input/touchscreen/bu21013_ts.c
@@ -4,18 +4,18 @@
  * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
  */
 
-#include <linux/kernel.h>
+#include <linux/bitops.h>
 #include <linux/delay.h>
-#include <linux/interrupt.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
-#include <linux/workqueue.h>
 #include <linux/input.h>
-#include <linux/input/bu21013.h>
-#include <linux/slab.h>
-#include <linux/regulator/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/gpio/consumer.h>
-#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/types.h>
 
 #define MAX_FINGERS	2
 #define RESET_DELAY	30
@@ -137,23 +137,32 @@
 /**
  * struct bu21013_ts - touch panel data structure
  * @client: pointer to the i2c client
- * @touch_stopped: touch stop flag
- * @chip: pointer to the touch panel controller
  * @in_dev: pointer to the input device structure
  * @regulator: pointer to the Regulator used for touch screen
  * @cs_gpiod: chip select GPIO line
  * @int_gpiod: touch interrupt GPIO line
+ * @irq: interrupt number the device is using
+ * @touch_x_max: maximum X coordinate reported by the device
+ * @touch_y_max: maximum Y coordinate reported by the device
+ * @x_flip: indicates that the driver should invert X coordinate before
+ *	reporting
+ * @y_flip: indicates that the driver should invert Y coordinate before
+ *	reporting
+ * @touch_stopped: touch stop flag
  *
  * Touch panel device data structure
  */
 struct bu21013_ts {
 	struct i2c_client *client;
-	const struct bu21013_platform_device *chip;
 	struct input_dev *in_dev;
 	struct regulator *regulator;
 	struct gpio_desc *cs_gpiod;
 	struct gpio_desc *int_gpiod;
 	unsigned int irq;
+	u32 touch_x_max;
+	u32 touch_y_max;
+	bool x_flip;
+	bool y_flip;
 	bool touch_stopped;
 };
 
@@ -208,10 +217,10 @@ static int bu21013_do_touch_report(struct bu21013_ts *ts)
 		}
 
 		for (i = 0; i < finger_down_count; i++) {
-			if (ts->chip->x_flip)
-				pos_x[i] = ts->chip->touch_x_max - pos_x[i];
-			if (ts->chip->y_flip)
-				pos_y[i] = ts->chip->touch_y_max - pos_y[i];
+			if (ts->x_flip)
+				pos_x[i] = ts->touch_x_max - pos_x[i];
+			if (ts->y_flip)
+				pos_y[i] = ts->touch_y_max - pos_y[i];
 
 			input_report_abs(ts->in_dev,
 					 ABS_MT_POSITION_X, pos_x[i]);
@@ -304,14 +313,9 @@ static int bu21013_init_chip(struct bu21013_ts *ts)
 		return error;
 	}
 
-	if (ts->chip->ext_clk)
-		error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
-						  BU21013_CLK_MODE_EXT |
-							BU21013_CLK_MODE_CALIB);
-	else
-		error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
-						  BU21013_CLK_MODE_DIV |
-							BU21013_CLK_MODE_CALIB);
+	error = i2c_smbus_write_byte_data(client, BU21013_CLK_MODE_REG,
+					  BU21013_CLK_MODE_DIV |
+						BU21013_CLK_MODE_CALIB);
 	if (error) {
 		dev_err(&client->dev, "BU21013_CLK_MODE reg write failed\n");
 		return error;
@@ -388,43 +392,6 @@ static int bu21013_init_chip(struct bu21013_ts *ts)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct bu21013_platform_device *
-bu21013_parse_dt(struct device *dev)
-{
-	struct device_node *np = dev->of_node;
-	struct bu21013_platform_device *pdata;
-
-	if (!np) {
-		dev_err(dev, "no device tree or platform data\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
-	if (!pdata)
-		return ERR_PTR(-ENOMEM);
-
-	pdata->y_flip = pdata->x_flip = false;
-
-	pdata->x_flip = of_property_read_bool(np, "rohm,flip-x");
-	pdata->y_flip = of_property_read_bool(np, "rohm,flip-y");
-
-	of_property_read_u32(np, "rohm,touch-max-x", &pdata->touch_x_max);
-	of_property_read_u32(np, "rohm,touch-max-y", &pdata->touch_y_max);
-
-	pdata->ext_clk = false;
-
-	return pdata;
-}
-#else
-static inline const struct bu21013_platform_device *
-bu21013_parse_dt(struct device *dev)
-{
-	dev_err(dev, "no platform data available\n");
-	return ERR_PTR(-EINVAL);
-}
-#endif
-
 static void bu21013_power_off(void *_ts)
 {
 	struct bu21013_ts *ts = _ts;
@@ -442,8 +409,6 @@ static void bu21013_disable_chip(void *_ts)
 static int bu21013_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
-	const struct bu21013_platform_device *pdata =
-					dev_get_platdata(&client->dev);
 	struct bu21013_ts *ts;
 	struct input_dev *in_dev;
 	int error;
@@ -454,19 +419,20 @@ static int bu21013_probe(struct i2c_client *client,
 		return -EIO;
 	}
 
-	if (!pdata) {
-		pdata = bu21013_parse_dt(&client->dev);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
-	}
-
 	ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
 	if (!ts)
 		return -ENOMEM;
 
-	ts->chip = pdata;
 	ts->client = client;
 
+	ts->x_flip = device_property_read_bool(&client->dev, "rohm,flip-x");
+	ts->y_flip = device_property_read_bool(&client->dev, "rohm,flip-y");
+
+	device_property_read_u32(&client->dev, "rohm,touch-max-x",
+				 &ts->touch_x_max);
+	device_property_read_u32(&client->dev, "rohm,touch-max-y",
+				 &ts->touch_y_max);
+
 	in_dev = devm_input_allocate_device(&client->dev);
 	if (!in_dev) {
 		dev_err(&client->dev, "device memory alloc failed\n");
@@ -483,9 +449,9 @@ static int bu21013_probe(struct i2c_client *client,
 	__set_bit(EV_ABS, in_dev->evbit);
 
 	input_set_abs_params(in_dev, ABS_MT_POSITION_X,
-			     0, pdata->touch_x_max, 0, 0);
+			     0, ts->touch_x_max, 0, 0);
 	input_set_abs_params(in_dev, ABS_MT_POSITION_Y,
-			     0, pdata->touch_y_max, 0, 0);
+			     0, ts->touch_y_max, 0, 0);
 	input_set_drvdata(in_dev, ts);
 
 	ts->regulator = devm_regulator_get(&client->dev, "avdd");
@@ -560,7 +526,6 @@ static int bu21013_probe(struct i2c_client *client,
 		return error;
 	}
 
-	device_init_wakeup(&client->dev, pdata->wakeup);
 	i2c_set_clientdata(client, ts);
 
 	return 0;
diff --git a/include/linux/input/bu21013.h b/include/linux/input/bu21013.h
deleted file mode 100644
index 58b1a9d44443..000000000000
--- a/include/linux/input/bu21013.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Naveen Kumar G <naveen.gaddipati@stericsson.com> for ST-Ericsson
- */
-
-#ifndef _BU21013_H
-#define _BU21013_H
-
-/**
- * struct bu21013_platform_device - Handle the platform data
- * @touch_x_max: touch x max
- * @touch_y_max: touch y max
- * @ext_clk: external clock flag
- * @x_flip: x flip flag
- * @y_flip: y flip flag
- * @wakeup: wakeup flag
- *
- * This is used to handle the platform data
- */
-struct bu21013_platform_device {
-	int touch_x_max;
-	int touch_y_max;
-	bool ext_clk;
-	bool x_flip;
-	bool y_flip;
-	bool wakeup;
-};
-
-#endif
-- 
cgit v1.2.3


From df0eea4488081e0698b0b58ccd1e8c8823e22841 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 23 Jul 2019 11:44:09 +0530
Subject: cpufreq: Remove CPUFREQ_ADJUST and CPUFREQ_NOTIFY policy notifier
 events

No driver makes reference to these events now, remove them and the code
related to them.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 36 +++++++-----------------------------
 include/linux/cpufreq.h   |  6 ++----
 2 files changed, 9 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c13dcb59b30c..e0ee23895497 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2360,15 +2360,13 @@ EXPORT_SYMBOL(cpufreq_get_policy);
  * @policy: Policy object to modify.
  * @new_policy: New policy data.
  *
- * Pass @new_policy to the cpufreq driver's ->verify() callback, run the
- * installed policy notifiers for it with the CPUFREQ_ADJUST value, pass it to
- * the driver's ->verify() callback again and run the notifiers for it again
- * with the CPUFREQ_NOTIFY value.  Next, copy the min and max parameters
- * of @new_policy to @policy and either invoke the driver's ->setpolicy()
- * callback (if present) or carry out a governor update for @policy.  That is,
- * run the current governor's ->limits() callback (if the governor field in
- * @new_policy points to the same object as the one in @policy) or replace the
- * governor for @policy with the new one stored in @new_policy.
+ * Pass @new_policy to the cpufreq driver's ->verify() callback. Next, copy the
+ * min and max parameters of @new_policy to @policy and either invoke the
+ * driver's ->setpolicy() callback (if present) or carry out a governor update
+ * for @policy.  That is, run the current governor's ->limits() callback (if the
+ * governor field in @new_policy points to the same object as the one in
+ * @policy) or replace the governor for @policy with the new one stored in
+ * @new_policy.
  *
  * The cpuinfo part of @policy is not updated by this function.
  */
@@ -2396,26 +2394,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
 	if (ret)
 		return ret;
 
-	/*
-	 * The notifier-chain shall be removed once all the users of
-	 * CPUFREQ_ADJUST are moved to use the QoS framework.
-	 */
-	/* adjust if necessary - all reasons */
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_ADJUST, new_policy);
-
-	/*
-	 * verify the cpu speed can be set within this limit, which might be
-	 * different to the first one
-	 */
-	ret = cpufreq_driver->verify(new_policy);
-	if (ret)
-		return ret;
-
-	/* notification of the new policy */
-	blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-			CPUFREQ_NOTIFY, new_policy);
-
 	policy->min = new_policy->min;
 	policy->max = new_policy->max;
 	trace_cpu_frequency_limits(policy);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index afc10384a681..c57e88e85c41 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -456,10 +456,8 @@ static inline void cpufreq_resume(void) {}
 #define CPUFREQ_POSTCHANGE		(1)
 
 /* Policy Notifiers  */
-#define CPUFREQ_ADJUST			(0)
-#define CPUFREQ_NOTIFY			(1)
-#define CPUFREQ_CREATE_POLICY		(2)
-#define CPUFREQ_REMOVE_POLICY		(3)
+#define CPUFREQ_CREATE_POLICY		(0)
+#define CPUFREQ_REMOVE_POLICY		(1)
 
 #ifdef CONFIG_CPU_FREQ
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
-- 
cgit v1.2.3


From 45147fb522bb459e79bdcb7504ee7ec8cfd4c12c Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 28 Aug 2019 21:35:42 +0900
Subject: block: add a helper function to merge the segments

This patch adds a helper function whether a queue can merge
the segments by the DMA MAP layer (e.g. via IOMMU).

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-settings.c   | 23 +++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 2c1831207a8f..c3632fc6d540 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -12,6 +12,7 @@
 #include <linux/lcm.h>
 #include <linux/jiffies.h>
 #include <linux/gfp.h>
+#include <linux/dma-mapping.h>
 
 #include "blk.h"
 #include "blk-wbt.h"
@@ -832,6 +833,28 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
+/**
+ * blk_queue_can_use_dma_map_merging - configure queue for merging segments.
+ * @q:		the request queue for the device
+ * @dev:	the device pointer for dma
+ *
+ * Tell the block layer about merging the segments by dma map of @q.
+ */
+bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
+				       struct device *dev)
+{
+	unsigned long boundary = dma_get_merge_boundary(dev);
+
+	if (!boundary)
+		return false;
+
+	/* No need to update max_segment_size. see blk_queue_virt_boundary() */
+	blk_queue_virt_boundary(q, boundary);
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1ef375dafb1c..f6d55e2490dc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1085,6 +1085,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
+extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
+					      struct device *dev);
 
 /*
  * Number of physical segments as sent to the device.
-- 
cgit v1.2.3


From 38c38cb73223218f6eedf485280917af1f8a0af2 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 28 Aug 2019 21:35:43 +0900
Subject: mmc: queue: use bigger segments if DMA MAP layer can merge the
 segments

When the max_segs of a mmc host is smaller than 512, the mmc
subsystem tries to use 512 segments if DMA MAP layer can merge
the segments, and then the mmc subsystem exposes such information
to the block layer by using blk_queue_can_use_dma_map_merging().

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/mmc/core/queue.c | 35 ++++++++++++++++++++++++++++++++---
 include/linux/mmc/host.h |  1 +
 2 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 7102e2ebc614..1e29b305767e 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -21,6 +21,8 @@
 #include "card.h"
 #include "host.h"
 
+#define MMC_DMA_MAP_MERGE_SEGMENTS	512
+
 static inline bool mmc_cqe_dcmd_busy(struct mmc_queue *mq)
 {
 	/* Allow only 1 DCMD at a time */
@@ -193,6 +195,12 @@ static void mmc_queue_setup_discard(struct request_queue *q,
 		blk_queue_flag_set(QUEUE_FLAG_SECERASE, q);
 }
 
+static unsigned int mmc_get_max_segments(struct mmc_host *host)
+{
+	return host->can_dma_map_merge ? MMC_DMA_MAP_MERGE_SEGMENTS :
+					 host->max_segs;
+}
+
 /**
  * mmc_init_request() - initialize the MMC-specific per-request data
  * @q: the request queue
@@ -206,7 +214,7 @@ static int __mmc_init_request(struct mmc_queue *mq, struct request *req,
 	struct mmc_card *card = mq->card;
 	struct mmc_host *host = card->host;
 
-	mq_rq->sg = mmc_alloc_sg(host->max_segs, gfp);
+	mq_rq->sg = mmc_alloc_sg(mmc_get_max_segments(host), gfp);
 	if (!mq_rq->sg)
 		return -ENOMEM;
 
@@ -362,13 +370,23 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
 		blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
 	blk_queue_max_hw_sectors(mq->queue,
 		min(host->max_blk_count, host->max_req_size / 512));
-	blk_queue_max_segments(mq->queue, host->max_segs);
+	if (host->can_dma_map_merge)
+		WARN(!blk_queue_can_use_dma_map_merging(mq->queue,
+							mmc_dev(host)),
+		     "merging was advertised but not possible");
+	blk_queue_max_segments(mq->queue, mmc_get_max_segments(host));
 
 	if (mmc_card_mmc(card))
 		block_size = card->ext_csd.data_sector_size;
 
 	blk_queue_logical_block_size(mq->queue, block_size);
-	blk_queue_max_segment_size(mq->queue,
+	/*
+	 * After blk_queue_can_use_dma_map_merging() was called with succeed,
+	 * since it calls blk_queue_virt_boundary(), the mmc should not call
+	 * both blk_queue_max_segment_size().
+	 */
+	if (!host->can_dma_map_merge)
+		blk_queue_max_segment_size(mq->queue,
 			round_down(host->max_seg_size, block_size));
 
 	dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
@@ -418,6 +436,17 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
 	mq->tag_set.cmd_size = sizeof(struct mmc_queue_req);
 	mq->tag_set.driver_data = mq;
 
+	/*
+	 * Since blk_mq_alloc_tag_set() calls .init_request() of mmc_mq_ops,
+	 * the host->can_dma_map_merge should be set before to get max_segs
+	 * from mmc_get_max_segments().
+	 */
+	if (host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
+	    dma_get_merge_boundary(mmc_dev(host)))
+		host->can_dma_map_merge = 1;
+	else
+		host->can_dma_map_merge = 0;
+
 	ret = blk_mq_alloc_tag_set(&mq->tag_set);
 	if (ret)
 		return ret;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4a351cb7f20f..c5662b3e64db 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -396,6 +396,7 @@ struct mmc_host {
 	unsigned int		retune_paused:1; /* re-tuning is temporarily disabled */
 	unsigned int		use_blk_mq:1;	/* use blk-mq */
 	unsigned int		retune_crc_disable:1; /* don't trigger retune upon crc */
+	unsigned int		can_dma_map_merge:1; /* merging can be used */
 
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
-- 
cgit v1.2.3


From 6ba99411b858bd70bae966633561e698cd6de38c Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 28 Aug 2019 21:35:40 +0900
Subject: dma-mapping: introduce dma_get_merge_boundary()

This patch adds a new DMA API "dma_get_merge_boundary". This function
returns the DMA merge boundary if the DMA layer can merge the segments.
This patch also adds the implementation for a new dma_map_ops pointer.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/DMA-API.txt   |  8 ++++++++
 include/linux/dma-mapping.h |  6 ++++++
 kernel/dma/mapping.c        | 11 +++++++++++
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index e47c63bd4887..9c4dd3d779ed 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -204,6 +204,14 @@ Returns the maximum size of a mapping for the device. The size parameter
 of the mapping functions like dma_map_single(), dma_map_page() and
 others should not be larger than the returned value.
 
+::
+
+	unsigned long
+	dma_get_merge_boundary(struct device *dev);
+
+Returns the DMA merge boundary. If the device cannot merge any the DMA address
+segments, the function returns 0.
+
 Part Id - Streaming DMA mappings
 --------------------------------
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 48ebe8295987..de41a4f0b9f6 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -131,6 +131,7 @@ struct dma_map_ops {
 	int (*dma_supported)(struct device *dev, u64 mask);
 	u64 (*get_required_mask)(struct device *dev);
 	size_t (*max_mapping_size)(struct device *dev);
+	unsigned long (*get_merge_boundary)(struct device *dev);
 };
 
 #define DMA_MAPPING_ERROR		(~(dma_addr_t)0)
@@ -467,6 +468,7 @@ int dma_set_mask(struct device *dev, u64 mask);
 int dma_set_coherent_mask(struct device *dev, u64 mask);
 u64 dma_get_required_mask(struct device *dev);
 size_t dma_max_mapping_size(struct device *dev);
+unsigned long dma_get_merge_boundary(struct device *dev);
 #else /* CONFIG_HAS_DMA */
 static inline dma_addr_t dma_map_page_attrs(struct device *dev,
 		struct page *page, size_t offset, size_t size,
@@ -572,6 +574,10 @@ static inline size_t dma_max_mapping_size(struct device *dev)
 {
 	return 0;
 }
+static inline unsigned long dma_get_merge_boundary(struct device *dev)
+{
+	return 0;
+}
 #endif /* CONFIG_HAS_DMA */
 
 static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 1b96616c9f20..72c825c1788e 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -407,3 +407,14 @@ size_t dma_max_mapping_size(struct device *dev)
 	return size;
 }
 EXPORT_SYMBOL_GPL(dma_max_mapping_size);
+
+unsigned long dma_get_merge_boundary(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (!ops || !ops->get_merge_boundary)
+		return 0;	/* can't merge */
+
+	return ops->get_merge_boundary(dev);
+}
+EXPORT_SYMBOL_GPL(dma_get_merge_boundary);
-- 
cgit v1.2.3


From a55c7454a8c887b226a01d7eed088ccb5374d81e Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt@codeblueprint.co.uk>
Date: Thu, 8 Aug 2019 20:53:01 +0100
Subject: sched/topology: Improve load balancing on AMD EPYC systems

SD_BALANCE_{FORK,EXEC} and SD_WAKE_AFFINE are stripped in sd_init()
for any sched domains with a NUMA distance greater than 2 hops
(RECLAIM_DISTANCE). The idea being that it's expensive to balance
across domains that far apart.

However, as is rather unfortunately explained in:

  commit 32e45ff43eaf ("mm: increase RECLAIM_DISTANCE to 30")

the value for RECLAIM_DISTANCE is based on node distance tables from
2011-era hardware.

Current AMD EPYC machines have the following NUMA node distances:

 node distances:
 node   0   1   2   3   4   5   6   7
   0:  10  16  16  16  32  32  32  32
   1:  16  10  16  16  32  32  32  32
   2:  16  16  10  16  32  32  32  32
   3:  16  16  16  10  32  32  32  32
   4:  32  32  32  32  10  16  16  16
   5:  32  32  32  32  16  10  16  16
   6:  32  32  32  32  16  16  10  16
   7:  32  32  32  32  16  16  16  10

where 2 hops is 32.

The result is that the scheduler fails to load balance properly across
NUMA nodes on different sockets -- 2 hops apart.

For example, pinning 16 busy threads to NUMA nodes 0 (CPUs 0-7) and 4
(CPUs 32-39) like so,

  $ numactl -C 0-7,32-39 ./spinner 16

causes all threads to fork and remain on node 0 until the active
balancer kicks in after a few seconds and forcibly moves some threads
to node 4.

Override node_reclaim_distance for AMD Zen.

Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Suravee.Suthikulpanit@amd.com
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas.Lendacky@amd.com
Cc: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20190808195301.13222-3-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/amd.c |  5 +++++
 include/linux/topology.h  | 14 ++++++++++++++
 kernel/sched/topology.c   |  3 ++-
 mm/khugepaged.c           |  2 +-
 mm/page_alloc.c           |  2 +-
 5 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 8d4e50428b68..ceeb8afc7cf3 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/random.h>
+#include <linux/topology.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cacheinfo.h>
@@ -824,6 +825,10 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
 {
 	set_cpu_cap(c, X86_FEATURE_ZEN);
 
+#ifdef CONFIG_NUMA
+	node_reclaim_distance = 32;
+#endif
+
 	/*
 	 * Fix erratum 1076: CPB feature bit not being set in CPUID.
 	 * Always set it, except when running under a hypervisor.
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 47a3e3c08036..579522ec446c 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -59,6 +59,20 @@ int arch_update_cpu_topology(void);
  */
 #define RECLAIM_DISTANCE 30
 #endif
+
+/*
+ * The following tunable allows platforms to override the default node
+ * reclaim distance (RECLAIM_DISTANCE) if remote memory accesses are
+ * sufficiently fast that the default value actually hurts
+ * performance.
+ *
+ * AMD EPYC machines use this because even though the 2-hop distance
+ * is 32 (3.2x slower than a local memory access) performance actually
+ * *improves* if allowed to reclaim memory and load balance tasks
+ * between NUMA nodes 2-hops apart.
+ */
+extern int __read_mostly node_reclaim_distance;
+
 #ifndef PENALTY_FOR_NODE_WITH_CPUS
 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
 #endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8f83e8e3ea9a..b5667a273bf6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1284,6 +1284,7 @@ static int			sched_domains_curr_level;
 int				sched_max_numa_distance;
 static int			*sched_domains_numa_distance;
 static struct cpumask		***sched_domains_numa_masks;
+int __read_mostly		node_reclaim_distance = RECLAIM_DISTANCE;
 #endif
 
 /*
@@ -1402,7 +1403,7 @@ sd_init(struct sched_domain_topology_level *tl,
 
 		sd->flags &= ~SD_PREFER_SIBLING;
 		sd->flags |= SD_SERIALIZE;
-		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
+		if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {
 			sd->flags &= ~(SD_BALANCE_EXEC |
 				       SD_BALANCE_FORK |
 				       SD_WAKE_AFFINE);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index eaaa21b23215..ccede2425c3f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -710,7 +710,7 @@ static bool khugepaged_scan_abort(int nid)
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		if (!khugepaged_node_load[i])
 			continue;
-		if (node_distance(nid, i) > RECLAIM_DISTANCE)
+		if (node_distance(nid, i) > node_reclaim_distance)
 			return true;
 	}
 	return false;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 272c6de1bf4e..0d54cd2c43a4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3522,7 +3522,7 @@ bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
 	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <=
-				RECLAIM_DISTANCE;
+				node_reclaim_distance;
 }
 #else	/* CONFIG_NUMA */
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
-- 
cgit v1.2.3


From 97d3eb9da84cae0548359b0aecb8619faad003b7 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Mon, 2 Sep 2019 11:40:31 +0100
Subject: cpuidle-haltpoll: vcpu hotplug support

When cpus != maxcpus cpuidle-haltpoll will fail to register all vcpus
past the online ones and thus fail to register the idle driver.
This is because cpuidle_add_sysfs() will return with -ENODEV as a
consequence from get_cpu_device() return no device for a non-existing
CPU.

Instead switch to cpuidle_register_driver() and manually register each
of the present cpus through cpuhp_setup_state() callbacks and future
ones that get onlined or offlined. This mimmics similar logic that
intel_idle does.

Fixes: fa86ee90eb11 ("add cpuidle-haltpoll driver")
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/cpuidle_haltpoll.h |  4 +-
 arch/x86/kernel/kvm.c                   | 18 +++------
 drivers/cpuidle/cpuidle-haltpoll.c      | 68 ++++++++++++++++++++++++++++++---
 include/linux/cpuidle_haltpoll.h        |  4 +-
 4 files changed, 73 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h
index ff8607d81526..c8b39c6716ff 100644
--- a/arch/x86/include/asm/cpuidle_haltpoll.h
+++ b/arch/x86/include/asm/cpuidle_haltpoll.h
@@ -2,7 +2,7 @@
 #ifndef _ARCH_HALTPOLL_H
 #define _ARCH_HALTPOLL_H
 
-void arch_haltpoll_enable(void);
-void arch_haltpoll_disable(void);
+void arch_haltpoll_enable(unsigned int cpu);
+void arch_haltpoll_disable(unsigned int cpu);
 
 #endif
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f48401be8ce0..60bab4a3b36b 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -888,32 +888,26 @@ static void kvm_enable_host_haltpoll(void *i)
 	wrmsrl(MSR_KVM_POLL_CONTROL, 1);
 }
 
-void arch_haltpoll_enable(void)
+void arch_haltpoll_enable(unsigned int cpu)
 {
 	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
-		printk(KERN_ERR "kvm: host does not support poll control\n");
-		printk(KERN_ERR "kvm: host upgrade recommended\n");
+		pr_err_once("kvm: host does not support poll control\n");
+		pr_err_once("kvm: host upgrade recommended\n");
 		return;
 	}
 
-	preempt_disable();
 	/* Enable guest halt poll disables host halt poll */
-	kvm_disable_host_haltpoll(NULL);
-	smp_call_function(kvm_disable_host_haltpoll, NULL, 1);
-	preempt_enable();
+	smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
 
-void arch_haltpoll_disable(void)
+void arch_haltpoll_disable(unsigned int cpu)
 {
 	if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
 		return;
 
-	preempt_disable();
 	/* Enable guest halt poll disables host halt poll */
-	kvm_enable_host_haltpoll(NULL);
-	smp_call_function(kvm_enable_host_haltpoll, NULL, 1);
-	preempt_enable();
+	smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
 #endif
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 9ac093dcbb01..56d8ab814466 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -11,12 +11,16 @@
  */
 
 #include <linux/init.h>
+#include <linux/cpu.h>
 #include <linux/cpuidle.h>
 #include <linux/module.h>
 #include <linux/sched/idle.h>
 #include <linux/kvm_para.h>
 #include <linux/cpuidle_haltpoll.h>
 
+static struct cpuidle_device __percpu *haltpoll_cpuidle_devices;
+static enum cpuhp_state haltpoll_hp_state;
+
 static int default_enter_idle(struct cpuidle_device *dev,
 			      struct cpuidle_driver *drv, int index)
 {
@@ -46,6 +50,46 @@ static struct cpuidle_driver haltpoll_driver = {
 	.state_count = 2,
 };
 
+static int haltpoll_cpu_online(unsigned int cpu)
+{
+	struct cpuidle_device *dev;
+
+	dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu);
+	if (!dev->registered) {
+		dev->cpu = cpu;
+		if (cpuidle_register_device(dev)) {
+			pr_notice("cpuidle_register_device %d failed!\n", cpu);
+			return -EIO;
+		}
+		arch_haltpoll_enable(cpu);
+	}
+
+	return 0;
+}
+
+static int haltpoll_cpu_offline(unsigned int cpu)
+{
+	struct cpuidle_device *dev;
+
+	dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu);
+	if (dev->registered) {
+		arch_haltpoll_disable(cpu);
+		cpuidle_unregister_device(dev);
+	}
+
+	return 0;
+}
+
+static void haltpoll_uninit(void)
+{
+	if (haltpoll_hp_state)
+		cpuhp_remove_state(haltpoll_hp_state);
+	cpuidle_unregister_driver(&haltpoll_driver);
+
+	free_percpu(haltpoll_cpuidle_devices);
+	haltpoll_cpuidle_devices = NULL;
+}
+
 static int __init haltpoll_init(void)
 {
 	int ret;
@@ -56,17 +100,31 @@ static int __init haltpoll_init(void)
 	if (!kvm_para_available())
 		return 0;
 
-	ret = cpuidle_register(&haltpoll_driver, NULL);
-	if (ret == 0)
-		arch_haltpoll_enable();
+	ret = cpuidle_register_driver(drv);
+	if (ret < 0)
+		return ret;
+
+	haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device);
+	if (haltpoll_cpuidle_devices == NULL) {
+		cpuidle_unregister_driver(drv);
+		return -ENOMEM;
+	}
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online",
+				haltpoll_cpu_online, haltpoll_cpu_offline);
+	if (ret < 0) {
+		haltpoll_uninit();
+	} else {
+		haltpoll_hp_state = ret;
+		ret = 0;
+	}
 
 	return ret;
 }
 
 static void __exit haltpoll_exit(void)
 {
-	arch_haltpoll_disable();
-	cpuidle_unregister(&haltpoll_driver);
+	haltpoll_uninit();
 }
 
 module_init(haltpoll_init);
diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h
index fe5954c2409e..d50c1e0411a2 100644
--- a/include/linux/cpuidle_haltpoll.h
+++ b/include/linux/cpuidle_haltpoll.h
@@ -5,11 +5,11 @@
 #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
 #include <asm/cpuidle_haltpoll.h>
 #else
-static inline void arch_haltpoll_enable(void)
+static inline void arch_haltpoll_enable(unsigned int cpu)
 {
 }
 
-static inline void arch_haltpoll_disable(void)
+static inline void arch_haltpoll_disable(unsigned int cpu)
 {
 }
 #endif
-- 
cgit v1.2.3


From d7bda73070201514d0d254f451c11bcc9b5890f1 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 27 Aug 2019 13:53:18 +0200
Subject: can: dev: avoid long lines

This patch fixes long lines in the generic CAN device infrastructure.

Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c          | 43 ++++++++++++++++++++++++++----------------
 include/linux/can/dev.h        |  3 ++-
 include/linux/can/rx-offload.h | 13 +++++++++----
 3 files changed, 38 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index b6fe39a26a9b..9f58c4f0344d 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -73,10 +73,11 @@ EXPORT_SYMBOL_GPL(can_len2dlc);
  * registers of the CAN controller. You can find more information
  * in the header file linux/can/netlink.h.
  */
-static int can_update_sample_point(const struct can_bittiming_const *btc,
-			  unsigned int sample_point_nominal, unsigned int tseg,
-			  unsigned int *tseg1_ptr, unsigned int *tseg2_ptr,
-			  unsigned int *sample_point_error_ptr)
+static int
+can_update_sample_point(const struct can_bittiming_const *btc,
+			unsigned int sample_point_nominal, unsigned int tseg,
+			unsigned int *tseg1_ptr, unsigned int *tseg2_ptr,
+			unsigned int *sample_point_error_ptr)
 {
 	unsigned int sample_point_error, best_sample_point_error = UINT_MAX;
 	unsigned int sample_point, best_sample_point = 0;
@@ -84,7 +85,9 @@ static int can_update_sample_point(const struct can_bittiming_const *btc,
 	int i;
 
 	for (i = 0; i <= 1; i++) {
-		tseg2 = tseg + CAN_CALC_SYNC_SEG - (sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) / 1000 - i;
+		tseg2 = tseg + CAN_CALC_SYNC_SEG -
+			(sample_point_nominal * (tseg + CAN_CALC_SYNC_SEG)) /
+			1000 - i;
 		tseg2 = clamp(tseg2, btc->tseg2_min, btc->tseg2_max);
 		tseg1 = tseg - tseg2;
 		if (tseg1 > btc->tseg1_max) {
@@ -92,10 +95,12 @@ static int can_update_sample_point(const struct can_bittiming_const *btc,
 			tseg2 = tseg - tseg1;
 		}
 
-		sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) / (tseg + CAN_CALC_SYNC_SEG);
+		sample_point = 1000 * (tseg + CAN_CALC_SYNC_SEG - tseg2) /
+			(tseg + CAN_CALC_SYNC_SEG);
 		sample_point_error = abs(sample_point_nominal - sample_point);
 
-		if ((sample_point <= sample_point_nominal) && (sample_point_error < best_sample_point_error)) {
+		if ((sample_point <= sample_point_nominal) &&
+		    (sample_point_error < best_sample_point_error)) {
 			best_sample_point = sample_point;
 			best_sample_point_error = sample_point_error;
 			*tseg1_ptr = tseg1;
@@ -160,7 +165,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 		if (bitrate_error < best_bitrate_error)
 			best_sample_point_error = UINT_MAX;
 
-		can_update_sample_point(btc, sample_point_nominal, tseg / 2, &tseg1, &tseg2, &sample_point_error);
+		can_update_sample_point(btc, sample_point_nominal, tseg / 2,
+					&tseg1, &tseg2, &sample_point_error);
 		if (sample_point_error > best_sample_point_error)
 			continue;
 
@@ -189,8 +195,9 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	}
 
 	/* real sample point */
-	bt->sample_point = can_update_sample_point(btc, sample_point_nominal, best_tseg,
-					  &tseg1, &tseg2, NULL);
+	bt->sample_point = can_update_sample_point(btc, sample_point_nominal,
+						   best_tseg, &tseg1, &tseg2,
+						   NULL);
 
 	v64 = (u64)best_brp * 1000 * 1000 * 1000;
 	do_div(v64, priv->clock.freq);
@@ -214,7 +221,8 @@ static int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	bt->brp = best_brp;
 
 	/* real bitrate */
-	bt->bitrate = priv->clock.freq / (bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2));
+	bt->bitrate = priv->clock.freq /
+		(bt->brp * (CAN_CALC_SYNC_SEG + tseg1 + tseg2));
 
 	return 0;
 }
@@ -267,9 +275,10 @@ static int can_fixup_bittiming(struct net_device *dev, struct can_bittiming *bt,
 }
 
 /* Checks the validity of predefined bitrate settings */
-static int can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt,
-				const u32 *bitrate_const,
-				const unsigned int bitrate_const_cnt)
+static int
+can_validate_bitrate(struct net_device *dev, struct can_bittiming *bt,
+		     const u32 *bitrate_const,
+		     const unsigned int bitrate_const_cnt)
 {
 	struct can_priv *priv = netdev_priv(dev);
 	unsigned int i;
@@ -460,7 +469,8 @@ void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(can_put_echo_skb);
 
-struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
+struct sk_buff *
+__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
@@ -569,7 +579,8 @@ restart:
 static void can_restart_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
-	struct can_priv *priv = container_of(dwork, struct can_priv, restart_work);
+	struct can_priv *priv = container_of(dwork, struct can_priv,
+					     restart_work);
 
 	can_restart(priv->dev);
 }
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index f01623aef2f7..9b3c720a31b1 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -169,7 +169,8 @@ void can_change_state(struct net_device *dev, struct can_frame *cf,
 
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		      unsigned int idx);
-struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx, u8 *len_ptr);
+struct sk_buff *__can_get_echo_skb(struct net_device *dev, unsigned int idx,
+				   u8 *len_ptr);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
 void can_free_echo_skb(struct net_device *dev, unsigned int idx);
 
diff --git a/include/linux/can/rx-offload.h b/include/linux/can/rx-offload.h
index 9daa1119ea42..01219f2902bf 100644
--- a/include/linux/can/rx-offload.h
+++ b/include/linux/can/rx-offload.h
@@ -15,7 +15,8 @@
 struct can_rx_offload {
 	struct net_device *dev;
 
-	unsigned int (*mailbox_read)(struct can_rx_offload *offload, struct can_frame *cf,
+	unsigned int (*mailbox_read)(struct can_rx_offload *offload,
+				     struct can_frame *cf,
 				     u32 *timestamp, unsigned int mb);
 
 	struct sk_buff_head skb_queue;
@@ -29,9 +30,13 @@ struct can_rx_offload {
 	bool inc;
 };
 
-int can_rx_offload_add_timestamp(struct net_device *dev, struct can_rx_offload *offload);
-int can_rx_offload_add_fifo(struct net_device *dev, struct can_rx_offload *offload, unsigned int weight);
-int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 reg);
+int can_rx_offload_add_timestamp(struct net_device *dev,
+				 struct can_rx_offload *offload);
+int can_rx_offload_add_fifo(struct net_device *dev,
+			    struct can_rx_offload *offload,
+			    unsigned int weight);
+int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload,
+					 u64 reg);
 int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload);
 int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
 				struct sk_buff *skb, u32 timestamp);
-- 
cgit v1.2.3


From 82e430a6df7f0b5972c7fe717faffea823c6b84a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 2 Aug 2019 19:34:23 +0200
Subject: cpuidle: play_idle: Increase the resolution to usec

The play_idle resolution is 1ms. The intel_powerclamp bases the idle
duration on jiffies. The idle injection API is also using msec based
duration but has no user yet.

Unfortunately, msec based time does not fit well when we want to
inject idle cycle precisely with shallow idle state.

In order to set the scene for the incoming idle injection user, move
the precision up to usec when calling play_idle.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/idle_inject.c           | 2 +-
 drivers/thermal/intel/intel_powerclamp.c | 2 +-
 include/linux/cpu.h                      | 2 +-
 kernel/sched/idle.c                      | 7 ++++---
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index 24ff2a068978..10601f4bdf72 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -138,7 +138,7 @@ static void idle_inject_fn(unsigned int cpu)
 	 */
 	iit->should_run = 0;
 
-	play_idle(READ_ONCE(ii_dev->idle_duration_ms));
+	play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC);
 }
 
 /**
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
index 5149a817456b..53216dcbe173 100644
--- a/drivers/thermal/intel/intel_powerclamp.c
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -430,7 +430,7 @@ static void clamp_idle_injection_func(struct kthread_work *work)
 	if (should_skip)
 		goto balance;
 
-	play_idle(jiffies_to_msecs(w_data->duration_jiffies));
+	play_idle(jiffies_to_usecs(w_data->duration_jiffies));
 
 balance:
 	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index fcb1386bb0d4..88dc0c653925 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void);
 int cpu_report_state(int cpu);
 int cpu_check_up_prepare(int cpu);
 void cpu_set_state_online(int cpu);
-void play_idle(unsigned long duration_ms);
+void play_idle(unsigned long duration_us);
 
 #ifdef CONFIG_HOTPLUG_CPU
 bool cpu_wait_death(unsigned int cpu, int seconds);
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 80940939b733..b98283fc6914 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-void play_idle(unsigned long duration_ms)
+void play_idle(unsigned long duration_us)
 {
 	struct idle_timer it;
 
@@ -323,7 +323,7 @@ void play_idle(unsigned long duration_ms)
 	WARN_ON_ONCE(current->nr_cpus_allowed != 1);
 	WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
 	WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
-	WARN_ON_ONCE(!duration_ms);
+	WARN_ON_ONCE(!duration_us);
 
 	rcu_sleep_check();
 	preempt_disable();
@@ -333,7 +333,8 @@ void play_idle(unsigned long duration_ms)
 	it.done = 0;
 	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	it.timer.function = idle_inject_timer_fn;
-	hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED);
+	hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC),
+		      HRTIMER_MODE_REL_PINNED);
 
 	while (!READ_ONCE(it.done))
 		do_idle();
-- 
cgit v1.2.3


From cd4c0763064f02f42824eed61be38eafdc702281 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 2 Aug 2019 19:34:24 +0200
Subject: powercap: idle_inject: Use higher resolution for idle injection

The resolution of the idle injection is limited to 1ms. If there is
a need for an injection of 1.2 ms, it is not possible.

The idle injection API is not yet used, so it is safe to convert the
existing API to the new time unit instead of adding more functions.

Convert to microsecond in order to use a finer grain time unit when
injecting idle cycles.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/idle_inject.c | 53 +++++++++++++++++++++---------------------
 include/linux/idle_inject.h    |  8 +++----
 2 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c
index 10601f4bdf72..cd1270614cc6 100644
--- a/drivers/powercap/idle_inject.c
+++ b/drivers/powercap/idle_inject.c
@@ -59,14 +59,14 @@ struct idle_inject_thread {
 /**
  * struct idle_inject_device - idle injection data
  * @timer: idle injection period timer
- * @idle_duration_ms: duration of CPU idle time to inject
- * @run_duration_ms: duration of CPU run time to allow
+ * @idle_duration_us: duration of CPU idle time to inject
+ * @run_duration_us: duration of CPU run time to allow
  * @cpumask: mask of CPUs affected by idle injection
  */
 struct idle_inject_device {
 	struct hrtimer timer;
-	unsigned int idle_duration_ms;
-	unsigned int run_duration_ms;
+	unsigned int idle_duration_us;
+	unsigned int run_duration_us;
 	unsigned long int cpumask[0];
 };
 
@@ -104,16 +104,16 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
  */
 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
 {
-	unsigned int duration_ms;
+	unsigned int duration_us;
 	struct idle_inject_device *ii_dev =
 		container_of(timer, struct idle_inject_device, timer);
 
-	duration_ms = READ_ONCE(ii_dev->run_duration_ms);
-	duration_ms += READ_ONCE(ii_dev->idle_duration_ms);
+	duration_us = READ_ONCE(ii_dev->run_duration_us);
+	duration_us += READ_ONCE(ii_dev->idle_duration_us);
 
 	idle_inject_wakeup(ii_dev);
 
-	hrtimer_forward_now(timer, ms_to_ktime(duration_ms));
+	hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC));
 
 	return HRTIMER_RESTART;
 }
@@ -138,35 +138,35 @@ static void idle_inject_fn(unsigned int cpu)
 	 */
 	iit->should_run = 0;
 
-	play_idle(READ_ONCE(ii_dev->idle_duration_ms) * USEC_PER_MSEC);
+	play_idle(READ_ONCE(ii_dev->idle_duration_us));
 }
 
 /**
  * idle_inject_set_duration - idle and run duration update helper
- * @run_duration_ms: CPU run time to allow in milliseconds
- * @idle_duration_ms: CPU idle time to inject in milliseconds
+ * @run_duration_us: CPU run time to allow in microseconds
+ * @idle_duration_us: CPU idle time to inject in microseconds
  */
 void idle_inject_set_duration(struct idle_inject_device *ii_dev,
-			      unsigned int run_duration_ms,
-			      unsigned int idle_duration_ms)
+			      unsigned int run_duration_us,
+			      unsigned int idle_duration_us)
 {
-	if (run_duration_ms && idle_duration_ms) {
-		WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms);
-		WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms);
+	if (run_duration_us && idle_duration_us) {
+		WRITE_ONCE(ii_dev->run_duration_us, run_duration_us);
+		WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us);
 	}
 }
 
 /**
  * idle_inject_get_duration - idle and run duration retrieval helper
- * @run_duration_ms: memory location to store the current CPU run time
- * @idle_duration_ms: memory location to store the current CPU idle time
+ * @run_duration_us: memory location to store the current CPU run time
+ * @idle_duration_us: memory location to store the current CPU idle time
  */
 void idle_inject_get_duration(struct idle_inject_device *ii_dev,
-			      unsigned int *run_duration_ms,
-			      unsigned int *idle_duration_ms)
+			      unsigned int *run_duration_us,
+			      unsigned int *idle_duration_us)
 {
-	*run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
-	*idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
+	*run_duration_us = READ_ONCE(ii_dev->run_duration_us);
+	*idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
 }
 
 /**
@@ -181,10 +181,10 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
  */
 int idle_inject_start(struct idle_inject_device *ii_dev)
 {
-	unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms);
-	unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms);
+	unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
+	unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us);
 
-	if (!idle_duration_ms || !run_duration_ms)
+	if (!idle_duration_us || !run_duration_us)
 		return -EINVAL;
 
 	pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
@@ -193,7 +193,8 @@ int idle_inject_start(struct idle_inject_device *ii_dev)
 	idle_inject_wakeup(ii_dev);
 
 	hrtimer_start(&ii_dev->timer,
-		      ms_to_ktime(idle_duration_ms + run_duration_ms),
+		      ns_to_ktime((idle_duration_us + run_duration_us) *
+				  NSEC_PER_USEC),
 		      HRTIMER_MODE_REL);
 
 	return 0;
diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h
index bdc0293fb6cb..a445cd1a36c5 100644
--- a/include/linux/idle_inject.h
+++ b/include/linux/idle_inject.h
@@ -20,10 +20,10 @@ int idle_inject_start(struct idle_inject_device *ii_dev);
 void idle_inject_stop(struct idle_inject_device *ii_dev);
 
 void idle_inject_set_duration(struct idle_inject_device *ii_dev,
-				 unsigned int run_duration_ms,
-				 unsigned int idle_duration_ms);
+				 unsigned int run_duration_us,
+				 unsigned int idle_duration_us);
 
 void idle_inject_get_duration(struct idle_inject_device *ii_dev,
-				 unsigned int *run_duration_ms,
-				 unsigned int *idle_duration_ms);
+				 unsigned int *run_duration_us,
+				 unsigned int *idle_duration_us);
 #endif /* __IDLE_INJECT_H__ */
-- 
cgit v1.2.3


From d072cb263f9e0ce3f8f6089c17e3466885971fa8 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Mon, 2 Sep 2019 17:13:32 +0200
Subject: regulator: add missing 'static inline' to a helper's stub

The build fails when CONFIG_REGULATOR is not selected because the stub
for regulator_bulk_set_supply_names() is missing the 'static inline'
attribute.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/20190902151332.28058-1-brgl@bgdev.pl
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 6d2181a76987..337a46391527 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -586,9 +586,10 @@ static inline int regulator_list_voltage(struct regulator *regulator, unsigned s
 	return -EINVAL;
 }
 
-void regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
-				     const char *const *supply_names,
-				     unsigned int num_supplies)
+static inline void
+regulator_bulk_set_supply_names(struct regulator_bulk_data *consumers,
+				const char *const *supply_names,
+				unsigned int num_supplies)
 {
 }
 
-- 
cgit v1.2.3


From 8744daf4b0699b724ee0a56b313a6c0c4ea289e3 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Mon, 26 Aug 2019 08:53:29 -0700
Subject: iommu/vt-d: Remove global page flush support

Global pages support is removed from VT-d spec 3.0. Since global pages G
flag only affects first-level paging structures and because DMA request
with PASID are only supported by VT-d spec. 3.0 and onward, we can
safely remove global pages support.

For kernel shared virtual address IOTLB invalidation, PASID
granularity and page selective within PASID will be used. There is
no global granularity supported. Without this fix, IOTLB invalidation
will cause invalid descriptor error in the queued invalidation (QI)
interface.

Fixes: 1c4f88b7f1f9 ("iommu/vt-d: Shared virtual address in scalable mode")
Reported-by: Sanjay K Kumar <sanjay.k.kumar@intel.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-svm.c   | 36 +++++++++++++++---------------------
 include/linux/intel-iommu.h |  3 ---
 2 files changed, 15 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 780de0caafe8..9b159132405d 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -100,24 +100,19 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
 }
 
 static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
-				       unsigned long address, unsigned long pages, int ih, int gl)
+				unsigned long address, unsigned long pages, int ih)
 {
 	struct qi_desc desc;
 
-	if (pages == -1) {
-		/* For global kernel pages we have to flush them in *all* PASIDs
-		 * because that's the only option the hardware gives us. Despite
-		 * the fact that they are actually only accessible through one. */
-		if (gl)
-			desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
-					QI_EIOTLB_DID(sdev->did) |
-					QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) |
-					QI_EIOTLB_TYPE;
-		else
-			desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
-					QI_EIOTLB_DID(sdev->did) |
-					QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
-					QI_EIOTLB_TYPE;
+	/*
+	 * Do PASID granu IOTLB invalidation if page selective capability is
+	 * not available.
+	 */
+	if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) {
+		desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
+			QI_EIOTLB_DID(sdev->did) |
+			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
+			QI_EIOTLB_TYPE;
 		desc.qw1 = 0;
 	} else {
 		int mask = ilog2(__roundup_pow_of_two(pages));
@@ -127,7 +122,6 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 				QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
 				QI_EIOTLB_TYPE;
 		desc.qw1 = QI_EIOTLB_ADDR(address) |
-				QI_EIOTLB_GL(gl) |
 				QI_EIOTLB_IH(ih) |
 				QI_EIOTLB_AM(mask);
 	}
@@ -162,13 +156,13 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
 }
 
 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
-				  unsigned long pages, int ih, int gl)
+				unsigned long pages, int ih)
 {
 	struct intel_svm_dev *sdev;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list)
-		intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
+		intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
 	rcu_read_unlock();
 }
 
@@ -180,7 +174,7 @@ static void intel_invalidate_range(struct mmu_notifier *mn,
 	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
 
 	intel_flush_svm_range(svm, start,
-			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
+			      (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
 }
 
 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -203,7 +197,7 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdev, &svm->devs, list) {
 		intel_pasid_tear_down_entry(svm->iommu, sdev->dev, svm->pasid);
-		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+		intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
 	}
 	rcu_read_unlock();
 
@@ -425,7 +419,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
 				 * large and has to be physically contiguous. So it's
 				 * hard to be as defensive as we might like. */
 				intel_pasid_tear_down_entry(iommu, dev, svm->pasid);
-				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
+				intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
 				kfree_rcu(sdev, rcu);
 
 				if (list_empty(&svm->devs)) {
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index f2ae8a006ff8..4fc6454f7ebb 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -346,7 +346,6 @@ enum {
 #define QI_PC_PASID_SEL		(QI_PC_TYPE | QI_PC_GRAN(1))
 
 #define QI_EIOTLB_ADDR(addr)	((u64)(addr) & VTD_PAGE_MASK)
-#define QI_EIOTLB_GL(gl)	(((u64)gl) << 7)
 #define QI_EIOTLB_IH(ih)	(((u64)ih) << 6)
 #define QI_EIOTLB_AM(am)	(((u64)am))
 #define QI_EIOTLB_PASID(pasid) 	(((u64)pasid) << 32)
@@ -378,8 +377,6 @@ enum {
 #define QI_RESP_INVALID		0x1
 #define QI_RESP_FAILURE		0xf
 
-#define QI_GRAN_ALL_ALL			0
-#define QI_GRAN_NONG_ALL		1
 #define QI_GRAN_NONG_PASID		2
 #define QI_GRAN_PSI_PASID		3
 
-- 
cgit v1.2.3


From 84a2bd39405ffd5fa6d6d77e408c5b9210da98de Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 16 Jul 2019 21:20:17 -0400
Subject: fs/namei.c: keep track of nd->root refcount status

The rules for nd->root are messy:
	* if we have LOOKUP_ROOT, it doesn't contribute to refcounts
	* if we have LOOKUP_RCU, it doesn't contribute to refcounts
	* if nd->root.mnt is NULL, it doesn't contribute to refcounts
	* otherwise it does contribute

terminate_walk() needs to drop the references if they are contributing.
So everything else should be careful not to confuse it, leading to
rather convoluted code.

It's easier to keep track of whether we'd grabbed the reference(s)
explicitly.  Use a new flag for that.  Don't bother with zeroing
nd->root.mnt on unlazy failures and in terminate_walk - it's not
needed anymore (terminate_walk() won't care and the next path_init()
will zero nd->root in !LOOKUP_ROOT case anyway).

Resulting rules for nd->root refcounts are much simpler: they are
contributing iff LOOKUP_ROOT_GRABBED is set in nd->flags.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 41 +++++++++++++++--------------------------
 include/linux/namei.h |  1 +
 2 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 2af485ddc507..671c3c1a3425 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -596,14 +596,12 @@ static void terminate_walk(struct nameidata *nd)
 		path_put(&nd->path);
 		for (i = 0; i < nd->depth; i++)
 			path_put(&nd->stack[i].link);
-		if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
+		if (nd->flags & LOOKUP_ROOT_GRABBED) {
 			path_put(&nd->root);
-			nd->root.mnt = NULL;
+			nd->flags &= ~LOOKUP_ROOT_GRABBED;
 		}
 	} else {
 		nd->flags &= ~LOOKUP_RCU;
-		if (!(nd->flags & LOOKUP_ROOT))
-			nd->root.mnt = NULL;
 		rcu_read_unlock();
 	}
 	nd->depth = 0;
@@ -645,6 +643,7 @@ static bool legitimize_root(struct nameidata *nd)
 {
 	if (!nd->root.mnt || (nd->flags & LOOKUP_ROOT))
 		return true;
+	nd->flags |= LOOKUP_ROOT_GRABBED;
 	return legitimize_path(nd, &nd->root, nd->root_seq);
 }
 
@@ -678,21 +677,18 @@ static int unlazy_walk(struct nameidata *nd)
 
 	nd->flags &= ~LOOKUP_RCU;
 	if (unlikely(!legitimize_links(nd)))
-		goto out2;
-	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
 		goto out1;
+	if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
+		goto out;
 	if (unlikely(!legitimize_root(nd)))
 		goto out;
 	rcu_read_unlock();
 	BUG_ON(nd->inode != parent->d_inode);
 	return 0;
 
-out2:
+out1:
 	nd->path.mnt = NULL;
 	nd->path.dentry = NULL;
-out1:
-	if (!(nd->flags & LOOKUP_ROOT))
-		nd->root.mnt = NULL;
 out:
 	rcu_read_unlock();
 	return -ECHILD;
@@ -732,21 +728,14 @@ static int unlazy_child(struct nameidata *nd, struct dentry *dentry, unsigned se
 	 */
 	if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
 		goto out;
-	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) {
-		rcu_read_unlock();
-		dput(dentry);
-		goto drop_root_mnt;
-	}
+	if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
+		goto out_dput;
 	/*
 	 * Sequence counts matched. Now make sure that the root is
 	 * still valid and get it if required.
 	 */
-	if (unlikely(!legitimize_root(nd))) {
-		rcu_read_unlock();
-		dput(dentry);
-		return -ECHILD;
-	}
-
+	if (unlikely(!legitimize_root(nd)))
+		goto out_dput;
 	rcu_read_unlock();
 	return 0;
 
@@ -756,9 +745,10 @@ out1:
 	nd->path.dentry = NULL;
 out:
 	rcu_read_unlock();
-drop_root_mnt:
-	if (!(nd->flags & LOOKUP_ROOT))
-		nd->root.mnt = NULL;
+	return -ECHILD;
+out_dput:
+	rcu_read_unlock();
+	dput(dentry);
 	return -ECHILD;
 }
 
@@ -822,6 +812,7 @@ static void set_root(struct nameidata *nd)
 		} while (read_seqcount_retry(&fs->seq, seq));
 	} else {
 		get_fs_root(fs, &nd->root);
+		nd->flags |= LOOKUP_ROOT_GRABBED;
 	}
 }
 
@@ -1738,8 +1729,6 @@ static int pick_link(struct nameidata *nd, struct path *link,
 				nd->flags &= ~LOOKUP_RCU;
 				nd->path.mnt = NULL;
 				nd->path.dentry = NULL;
-				if (!(nd->flags & LOOKUP_ROOT))
-					nd->root.mnt = NULL;
 				rcu_read_unlock();
 			} else if (likely(unlazy_walk(nd)) == 0)
 				error = nd_alloc_stack(nd);
diff --git a/include/linux/namei.h b/include/linux/namei.h
index b7993980516e..397a08ade6a2 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -37,6 +37,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_NO_REVAL		0x0080
 #define LOOKUP_JUMPED		0x1000
 #define LOOKUP_ROOT		0x2000
+#define LOOKUP_ROOT_GRABBED	0x0008
 
 extern int path_pts(struct path *path);
 
-- 
cgit v1.2.3


From 977607653cdd06a2c4d1d0f179ff46cc0cbe83a5 Mon Sep 17 00:00:00 2001
From: Yu Chen <chenyu56@huawei.com>
Date: Thu, 29 Aug 2019 17:22:32 +0800
Subject: usb: roles: Introduce stubs for the exiting functions in role.h

This patch adds stubs for the exiting functions while
CONFIG_USB_ROLE_SWITCH does not enabled.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Yu Chen <chenyu56@huawei.com>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/1567070558-29417-6-git-send-email-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/role.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index c05ffa6abda9..da2b9641b877 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -42,6 +42,8 @@ struct usb_role_switch_desc {
 	bool allow_userspace_control;
 };
 
+
+#if IS_ENABLED(CONFIG_USB_ROLE_SWITCH)
 int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role);
 enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw);
 struct usb_role_switch *usb_role_switch_get(struct device *dev);
@@ -51,5 +53,33 @@ struct usb_role_switch *
 usb_role_switch_register(struct device *parent,
 			 const struct usb_role_switch_desc *desc);
 void usb_role_switch_unregister(struct usb_role_switch *sw);
+#else
+static inline int usb_role_switch_set_role(struct usb_role_switch *sw,
+		enum usb_role role)
+{
+	return 0;
+}
+
+static inline enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
+{
+	return USB_ROLE_NONE;
+}
+
+static inline struct usb_role_switch *usb_role_switch_get(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void usb_role_switch_put(struct usb_role_switch *sw) { }
+
+static inline struct usb_role_switch *
+usb_role_switch_register(struct device *parent,
+			 const struct usb_role_switch_desc *desc)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void usb_role_switch_unregister(struct usb_role_switch *sw) { }
+#endif
 
 #endif /* __LINUX_USB_ROLE_H */
-- 
cgit v1.2.3


From 44493062abc38e5895e0cf8d22698f2ca39a1e4d Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 29 Aug 2019 17:22:33 +0800
Subject: device connection: Add fwnode_connection_find_match()

The fwnode_connection_find_match() function is exactly the
same as device_connection_find_match(), except it takes
struct fwnode_handle as parameter instead of struct device.
That allows locating device connections before the device
entries have been created.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/1567070558-29417-7-git-send-email-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devcon.c  | 43 +++++++++++++++++++++++++++++++------------
 include/linux/device.h | 10 +++++++---
 2 files changed, 38 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/base/devcon.c b/drivers/base/devcon.c
index 09f28479b243..986ad89de2f9 100644
--- a/drivers/base/devcon.c
+++ b/drivers/base/devcon.c
@@ -12,9 +12,6 @@
 static DEFINE_MUTEX(devcon_lock);
 static LIST_HEAD(devcon_list);
 
-typedef void *(*devcon_match_fn_t)(struct device_connection *con, int ep,
-				   void *data);
-
 static void *
 fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
 			  void *data, devcon_match_fn_t match)
@@ -60,6 +57,34 @@ fwnode_devcon_match(struct fwnode_handle *fwnode, const char *con_id,
 	return NULL;
 }
 
+/**
+ * fwnode_connection_find_match - Find connection from a device node
+ * @fwnode: Device node with the connection
+ * @con_id: Identifier for the connection
+ * @data: Data for the match function
+ * @match: Function to check and convert the connection description
+ *
+ * Find a connection with unique identifier @con_id between @fwnode and another
+ * device node. @match will be used to convert the connection description to
+ * data the caller is expecting to be returned.
+ */
+void *fwnode_connection_find_match(struct fwnode_handle *fwnode,
+				   const char *con_id, void *data,
+				   devcon_match_fn_t match)
+{
+	void *ret;
+
+	if (!fwnode || !match)
+		return NULL;
+
+	ret = fwnode_graph_devcon_match(fwnode, con_id, data, match);
+	if (ret)
+		return ret;
+
+	return fwnode_devcon_match(fwnode, con_id, data, match);
+}
+EXPORT_SYMBOL_GPL(fwnode_connection_find_match);
+
 /**
  * device_connection_find_match - Find physical connection to a device
  * @dev: Device with the connection
@@ -83,15 +108,9 @@ void *device_connection_find_match(struct device *dev, const char *con_id,
 	if (!match)
 		return NULL;
 
-	if (fwnode) {
-		ret = fwnode_graph_devcon_match(fwnode, con_id, data, match);
-		if (ret)
-			return ret;
-
-		ret = fwnode_devcon_match(fwnode, con_id, data, match);
-		if (ret)
-			return ret;
-	}
+	ret = fwnode_connection_find_match(fwnode, con_id, data, match);
+	if (ret)
+		return ret;
 
 	mutex_lock(&devcon_lock);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 47ccb2029bc3..b3aa7295b4cb 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -781,10 +781,14 @@ struct device_connection {
 	struct list_head	list;
 };
 
+typedef void *(*devcon_match_fn_t)(struct device_connection *con, int ep,
+				   void *data);
+
+void *fwnode_connection_find_match(struct fwnode_handle *fwnode,
+				   const char *con_id, void *data,
+				   devcon_match_fn_t match);
 void *device_connection_find_match(struct device *dev, const char *con_id,
-				void *data,
-				void *(*match)(struct device_connection *con,
-					       int ep, void *data));
+				   void *data, devcon_match_fn_t match);
 
 struct device *device_connection_find(struct device *dev, const char *con_id);
 
-- 
cgit v1.2.3


From a31f01777bc54a0a9d57628956d05f8d454d3418 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 29 Aug 2019 17:22:34 +0800
Subject: usb: roles: Add fwnode_usb_role_switch_get() function

The fwnode_usb_role_switch_get() function is exactly the
same as usb_role_switch_get(), except that it takes struct
fwnode_handle as parameter instead of struct device.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Tested-by: Biju Das <biju.das@bp.renesas.com>
Link: https://lore.kernel.org/r/1567070558-29417-8-git-send-email-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/roles/class.c | 20 ++++++++++++++++++++
 include/linux/usb/role.h  |  7 +++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index 86defca6623e..5b637aaf311f 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -135,6 +135,26 @@ struct usb_role_switch *usb_role_switch_get(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_get);
 
+/**
+ * fwnode_usb_role_switch_get - Find USB role switch linked with the caller
+ * @fwnode: The caller device node
+ *
+ * This is similar to the usb_role_switch_get() function above, but it searches
+ * the switch using fwnode instead of device entry.
+ */
+struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *fwnode)
+{
+	struct usb_role_switch *sw;
+
+	sw = fwnode_connection_find_match(fwnode, "usb-role-switch", NULL,
+					  usb_role_switch_match);
+	if (!IS_ERR_OR_NULL(sw))
+		WARN_ON(!try_module_get(sw->dev.parent->driver->owner));
+
+	return sw;
+}
+EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get);
+
 /**
  * usb_role_switch_put - Release handle to a switch
  * @sw: USB Role Switch
diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h
index da2b9641b877..2d77f97df72d 100644
--- a/include/linux/usb/role.h
+++ b/include/linux/usb/role.h
@@ -47,6 +47,7 @@ struct usb_role_switch_desc {
 int usb_role_switch_set_role(struct usb_role_switch *sw, enum usb_role role);
 enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw);
 struct usb_role_switch *usb_role_switch_get(struct device *dev);
+struct usb_role_switch *fwnode_usb_role_switch_get(struct fwnode_handle *node);
 void usb_role_switch_put(struct usb_role_switch *sw);
 
 struct usb_role_switch *
@@ -70,6 +71,12 @@ static inline struct usb_role_switch *usb_role_switch_get(struct device *dev)
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct usb_role_switch *
+fwnode_usb_role_switch_get(struct fwnode_handle *node)
+{
+	return ERR_PTR(-ENODEV);
+}
+
 static inline void usb_role_switch_put(struct usb_role_switch *sw) { }
 
 static inline struct usb_role_switch *
-- 
cgit v1.2.3


From d62d0ba97b5803183e70cfded7f7b9da76893bf5 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Mon, 26 Aug 2019 13:40:52 +0200
Subject: netfilter: nf_tables: Introduce stateful object update operation

This patch adds the infrastructure needed for the stateful object update
support.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  9 +++++
 net/netfilter/nf_tables_api.c     | 78 +++++++++++++++++++++++++++++++++++----
 2 files changed, 80 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 498665158ee0..3d9e66aa0139 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1127,6 +1127,7 @@ struct nft_object_type {
  *	@init: initialize object from netlink attributes
  *	@destroy: release existing stateful object
  *	@dump: netlink dump stateful object
+ *	@update: update stateful object
  */
 struct nft_object_ops {
 	void				(*eval)(struct nft_object *obj,
@@ -1141,6 +1142,8 @@ struct nft_object_ops {
 	int				(*dump)(struct sk_buff *skb,
 						struct nft_object *obj,
 						bool reset);
+	void				(*update)(struct nft_object *obj,
+						  struct nft_object *newobj);
 	const struct nft_object_type	*type;
 };
 
@@ -1429,10 +1432,16 @@ struct nft_trans_elem {
 
 struct nft_trans_obj {
 	struct nft_object		*obj;
+	struct nft_object		*newobj;
+	bool				update;
 };
 
 #define nft_trans_obj(trans)	\
 	(((struct nft_trans_obj *)trans->data)->obj)
+#define nft_trans_obj_newobj(trans) \
+	(((struct nft_trans_obj *)trans->data)->newobj)
+#define nft_trans_obj_update(trans)	\
+	(((struct nft_trans_obj *)trans->data)->update)
 
 struct nft_trans_flowtable {
 	struct nft_flowtable		*flowtable;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6d00bef023c4..cf767bc58e18 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5131,6 +5131,38 @@ nft_obj_type_get(struct net *net, u32 objtype)
 	return ERR_PTR(-ENOENT);
 }
 
+static int nf_tables_updobj(const struct nft_ctx *ctx,
+			    const struct nft_object_type *type,
+			    const struct nlattr *attr,
+			    struct nft_object *obj)
+{
+	struct nft_object *newobj;
+	struct nft_trans *trans;
+	int err;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
+				sizeof(struct nft_trans_obj));
+	if (!trans)
+		return -ENOMEM;
+
+	newobj = nft_obj_init(ctx, type, attr);
+	if (IS_ERR(newobj)) {
+		err = PTR_ERR(newobj);
+		goto err1;
+	}
+
+	nft_trans_obj(trans) = obj;
+	nft_trans_obj_update(trans) = true;
+	nft_trans_obj_newobj(trans) = newobj;
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+err1:
+	kfree(trans);
+	kfree(newobj);
+	return err;
+}
+
 static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			    struct sk_buff *skb, const struct nlmsghdr *nlh,
 			    const struct nlattr * const nla[],
@@ -5170,7 +5202,13 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
 			NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
 			return -EEXIST;
 		}
-		return 0;
+		if (nlh->nlmsg_flags & NLM_F_REPLACE)
+			return -EOPNOTSUPP;
+
+		type = nft_obj_type_get(net, objtype);
+		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+		return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
 	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
@@ -6431,6 +6469,19 @@ static void nft_chain_commit_update(struct nft_trans *trans)
 	}
 }
 
+static void nft_obj_commit_update(struct nft_trans *trans)
+{
+	struct nft_object *newobj;
+	struct nft_object *obj;
+
+	obj = nft_trans_obj(trans);
+	newobj = nft_trans_obj_newobj(trans);
+
+	obj->ops->update(obj, newobj);
+
+	kfree(newobj);
+}
+
 static void nft_commit_release(struct nft_trans *trans)
 {
 	switch (trans->msg_type) {
@@ -6795,10 +6846,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			te->set->ndeact--;
 			break;
 		case NFT_MSG_NEWOBJ:
-			nft_clear(net, nft_trans_obj(trans));
-			nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
-					     NFT_MSG_NEWOBJ);
-			nft_trans_destroy(trans);
+			if (nft_trans_obj_update(trans)) {
+				nft_obj_commit_update(trans);
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+			} else {
+				nft_clear(net, nft_trans_obj(trans));
+				nf_tables_obj_notify(&trans->ctx,
+						     nft_trans_obj(trans),
+						     NFT_MSG_NEWOBJ);
+				nft_trans_destroy(trans);
+			}
 			break;
 		case NFT_MSG_DELOBJ:
 			nft_obj_del(nft_trans_obj(trans));
@@ -6945,8 +7004,13 @@ static int __nf_tables_abort(struct net *net)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWOBJ:
-			trans->ctx.table->use--;
-			nft_obj_del(nft_trans_obj(trans));
+			if (nft_trans_obj_update(trans)) {
+				kfree(nft_trans_obj_newobj(trans));
+				nft_trans_destroy(trans);
+			} else {
+				trans->ctx.table->use--;
+				nft_obj_del(nft_trans_obj(trans));
+			}
 			break;
 		case NFT_MSG_DELOBJ:
 			trans->ctx.table->use++;
-- 
cgit v1.2.3


From d55c79ac86f78fce3c224bda2b383edf96bb6438 Mon Sep 17 00:00:00 2001
From: Robert Richter <rrichter@marvell.com>
Date: Mon, 2 Sep 2019 12:33:41 +0000
Subject: EDAC: Prefer 'unsigned int' to bare use of 'unsigned'

Use of 'unsigned int' instead of bare use of 'unsigned'. Fix this for
edac_mc*, ghes and the i5100 driver as reported by checkpatch.pl.

While at it, struct member dev_ch_attribute->channel is always used as
unsigned int. Change type to unsigned int to avoid type casts.

 [ bp: Massage. ]

Signed-off-by: Robert Richter <rrichter@marvell.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: "linux-edac@vger.kernel.org" <linux-edac@vger.kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Tony Luck <tony.luck@intel.com>
Link: https://lkml.kernel.org/r/20190902123216.9809-2-rrichter@marvell.com
---
 drivers/edac/edac_mc.c       | 20 ++++++++++----------
 drivers/edac/edac_mc.h       |  6 +++---
 drivers/edac/edac_mc_sysfs.c |  8 ++++----
 drivers/edac/ghes_edac.c     |  2 +-
 drivers/edac/i5100_edac.c    | 16 +++++++++-------
 include/linux/edac.h         | 10 +++++-----
 6 files changed, 32 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index c68f62ab54b0..e6fd079783bd 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -114,8 +114,8 @@ static const struct kernel_param_ops edac_report_ops = {
 
 module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644);
 
-unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
-			         unsigned len)
+unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf,
+				     unsigned int len)
 {
 	struct mem_ctl_info *mci = dimm->mci;
 	int i, n, count = 0;
@@ -236,9 +236,9 @@ EXPORT_SYMBOL_GPL(edac_mem_types);
  * At return, the pointer 'p' will be incremented to be used on a next call
  * to this function.
  */
-void *edac_align_ptr(void **p, unsigned size, int n_elems)
+void *edac_align_ptr(void **p, unsigned int size, int n_elems)
 {
-	unsigned align, r;
+	unsigned int align, r;
 	void *ptr = *p;
 
 	*p += size * n_elems;
@@ -302,10 +302,10 @@ static void _edac_mc_free(struct mem_ctl_info *mci)
 	kfree(mci);
 }
 
-struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
-				   unsigned n_layers,
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+				   unsigned int n_layers,
 				   struct edac_mc_layer *layers,
-				   unsigned sz_pvt)
+				   unsigned int sz_pvt)
 {
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer *layer;
@@ -313,9 +313,9 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 	struct rank_info *chan;
 	struct dimm_info *dimm;
 	u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
-	unsigned pos[EDAC_MAX_LAYERS];
-	unsigned size, tot_dimms = 1, count = 1;
-	unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
+	unsigned int pos[EDAC_MAX_LAYERS];
+	unsigned int size, tot_dimms = 1, count = 1;
+	unsigned int tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
 	void *pvt, *p, *ptr = NULL;
 	int i, j, row, chn, n, len, off;
 	bool per_rank = false;
diff --git a/drivers/edac/edac_mc.h b/drivers/edac/edac_mc.h
index 4165e15995ad..02aac5c61d00 100644
--- a/drivers/edac/edac_mc.h
+++ b/drivers/edac/edac_mc.h
@@ -122,10 +122,10 @@ do {									\
  *	On success, return a pointer to struct mem_ctl_info pointer;
  *	%NULL otherwise
  */
-struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
-				   unsigned n_layers,
+struct mem_ctl_info *edac_mc_alloc(unsigned int mc_num,
+				   unsigned int n_layers,
 				   struct edac_mc_layer *layers,
-				   unsigned sz_pvt);
+				   unsigned int sz_pvt);
 
 /**
  * edac_get_owner - Return the owner's mod_name of EDAC MC
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 4386ea4b9b5a..4eb8c5ceb973 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -131,7 +131,7 @@ static const char * const edac_caps[] = {
 
 struct dev_ch_attribute {
 	struct device_attribute attr;
-	int channel;
+	unsigned int channel;
 };
 
 #define DEVICE_CHANNEL(_name, _mode, _show, _store, _var) \
@@ -200,7 +200,7 @@ static ssize_t channel_dimm_label_show(struct device *dev,
 				       char *data)
 {
 	struct csrow_info *csrow = to_csrow(dev);
-	unsigned chan = to_channel(mattr);
+	unsigned int chan = to_channel(mattr);
 	struct rank_info *rank = csrow->channels[chan];
 
 	/* if field has not been initialized, there is nothing to send */
@@ -216,7 +216,7 @@ static ssize_t channel_dimm_label_store(struct device *dev,
 					const char *data, size_t count)
 {
 	struct csrow_info *csrow = to_csrow(dev);
-	unsigned chan = to_channel(mattr);
+	unsigned int chan = to_channel(mattr);
 	struct rank_info *rank = csrow->channels[chan];
 	size_t copy_count = count;
 
@@ -240,7 +240,7 @@ static ssize_t channel_ce_count_show(struct device *dev,
 				     struct device_attribute *mattr, char *data)
 {
 	struct csrow_info *csrow = to_csrow(dev);
-	unsigned chan = to_channel(mattr);
+	unsigned int chan = to_channel(mattr);
 	struct rank_info *rank = csrow->channels[chan];
 
 	return sprintf(data, "%u\n", rank->ce_count);
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 7f19f1c672c3..d413a0bdc9ad 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -68,7 +68,7 @@ struct memdev_dmi_entry {
 
 struct ghes_edac_dimm_fill {
 	struct mem_ctl_info *mci;
-	unsigned count;
+	unsigned int count;
 };
 
 static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index b506eef6b146..251f2b692785 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -417,7 +417,8 @@ static const char *i5100_err_msg(unsigned err)
 }
 
 /* convert csrow index into a rank (per channel -- 0..5) */
-static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
+static unsigned int i5100_csrow_to_rank(const struct mem_ctl_info *mci,
+					unsigned int csrow)
 {
 	const struct i5100_priv *priv = mci->pvt_info;
 
@@ -425,7 +426,8 @@ static int i5100_csrow_to_rank(const struct mem_ctl_info *mci, int csrow)
 }
 
 /* convert csrow index into a channel (0..1) */
-static int i5100_csrow_to_chan(const struct mem_ctl_info *mci, int csrow)
+static unsigned int i5100_csrow_to_chan(const struct mem_ctl_info *mci,
+					unsigned int csrow)
 {
 	const struct i5100_priv *priv = mci->pvt_info;
 
@@ -653,11 +655,11 @@ static struct pci_dev *pci_get_device_func(unsigned vendor,
 	return ret;
 }
 
-static unsigned long i5100_npages(struct mem_ctl_info *mci, int csrow)
+static unsigned long i5100_npages(struct mem_ctl_info *mci, unsigned int csrow)
 {
 	struct i5100_priv *priv = mci->pvt_info;
-	const unsigned chan_rank = i5100_csrow_to_rank(mci, csrow);
-	const unsigned chan = i5100_csrow_to_chan(mci, csrow);
+	const unsigned int chan_rank = i5100_csrow_to_rank(mci, csrow);
+	const unsigned int chan = i5100_csrow_to_chan(mci, csrow);
 	unsigned addr_lines;
 
 	/* dimm present? */
@@ -852,8 +854,8 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
 	for (i = 0; i < mci->tot_dimms; i++) {
 		struct dimm_info *dimm;
 		const unsigned long npages = i5100_npages(mci, i);
-		const unsigned chan = i5100_csrow_to_chan(mci, i);
-		const unsigned rank = i5100_csrow_to_rank(mci, i);
+		const unsigned int chan = i5100_csrow_to_chan(mci, i);
+		const unsigned int rank = i5100_csrow_to_rank(mci, i);
 
 		if (!npages)
 			continue;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 342dabda9c7e..c19483b90079 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -440,7 +440,7 @@ struct dimm_info {
 	char label[EDAC_MC_LABEL_LEN + 1];	/* DIMM label on motherboard */
 
 	/* Memory location data */
-	unsigned location[EDAC_MAX_LAYERS];
+	unsigned int location[EDAC_MAX_LAYERS];
 
 	struct mem_ctl_info *mci;	/* the parent */
 
@@ -451,7 +451,7 @@ struct dimm_info {
 
 	u32 nr_pages;			/* number of pages on this dimm */
 
-	unsigned csrow, cschannel;	/* Points to the old API data */
+	unsigned int csrow, cschannel;	/* Points to the old API data */
 
 	u16 smbios_handle;              /* Handle for SMBIOS type 17 */
 };
@@ -597,7 +597,7 @@ struct mem_ctl_info {
 					   unsigned long page);
 	int mc_idx;
 	struct csrow_info **csrows;
-	unsigned nr_csrows, num_cschannel;
+	unsigned int nr_csrows, num_cschannel;
 
 	/*
 	 * Memory Controller hierarchy
@@ -608,14 +608,14 @@ struct mem_ctl_info {
 	 * of the recent drivers enumerate memories per DIMM, instead.
 	 * When the memory controller is per rank, csbased is true.
 	 */
-	unsigned n_layers;
+	unsigned int n_layers;
 	struct edac_mc_layer *layers;
 	bool csbased;
 
 	/*
 	 * DIMM info. Will eventually remove the entire csrows_info some day
 	 */
-	unsigned tot_dimms;
+	unsigned int tot_dimms;
 	struct dimm_info **dimms;
 
 	/*
-- 
cgit v1.2.3


From af4e1c5eca95bed1192d8dc45c8ed63aea2209e8 Mon Sep 17 00:00:00 2001
From: Marcel Bocu <marcel.p.bocu@gmail.com>
Date: Mon, 22 Jul 2019 20:45:10 +0300
Subject: x86/amd_nb: Add PCI device IDs for family 17h, model 70h

The AMD Ryzen gen 3 processors came with a different PCI IDs for the
function 3 & 4 which are used to access the SMN interface. The root
PCI address however remained at the same address as the model 30h.

Adding the F3/F4 PCI IDs respectively to the misc and link ids appear
to be sufficient for k10temp, so let's add them and follow up on the
patch if other functions need more tweaking.

Vicki Pfau sent an identical patch after I checked that no-one had
written this patch. I would have been happy about dropping my patch but
unlike for his patch series, I had already Cc:ed the x86 people and
they already reviewed the changes. Since Vicki has not answered to
any email after his initial series, let's assume she is on vacation
and let's avoid duplication of reviews from the maintainers and merge
my series. To acknowledge Vicki's anteriority, I added her S-o-b to
the patch.

v2, suggested by Guenter Roeck and Brian Woods:
 - rename from 71h to 70h

Signed-off-by: Vicki Pfau <vi@endrift.com>
Signed-off-by: Marcel Bocu <marcel.p.bocu@gmail.com>
Tested-by: Marcel Bocu <marcel.p.bocu@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Brian Woods <brian.woods@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# pci_ids.h

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: "Woods, Brian" <Brian.Woods@amd.com>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: linux-hwmon@vger.kernel.org
Link: https://lore.kernel.org/r/20190722174510.2179-1-marcel.p.bocu@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 arch/x86/kernel/amd_nb.c | 3 +++
 include/linux/pci_ids.h  | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index d63e63b7d1d9..251c795b4eb3 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -21,6 +21,7 @@
 #define PCI_DEVICE_ID_AMD_17H_DF_F4	0x1464
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
 
 /* Protect the PCI config register pairs used for SMN and DF indirect access. */
 static DEFINE_MUTEX(smn_mutex);
@@ -50,6 +51,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -63,6 +65,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..4b97f427cc92 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -548,6 +548,7 @@
 #define PCI_DEVICE_ID_AMD_17H_DF_F3	0x1463
 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493
+#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
 #define PCI_DEVICE_ID_AMD_CNB17H_F3	0x1703
 #define PCI_DEVICE_ID_AMD_LANCE		0x2000
 #define PCI_DEVICE_ID_AMD_LANCE_HOME	0x2001
-- 
cgit v1.2.3


From 2b688ea5efdee2868ed23eddfdbe27dbd232edac Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 15 Aug 2019 13:54:17 +0300
Subject: net/mlx5: Add flow steering actions to fs_cmd shim layer

Add flow steering actions: modify header and packet reformat
to the fs_cmd shim layer. This allows each namespace to define
possibly different functionality for alloc/dealloc action commands.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/flow.c                  |  21 +++--
 drivers/infiniband/hw/mlx5/main.c                  |   7 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h               |   5 +-
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |  27 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |  46 +++++----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   6 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  26 ++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  92 +++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |  18 ++++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 105 ++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  11 +++
 include/linux/mlx5/fs.h                            |  33 ++++---
 13 files changed, 289 insertions(+), 110 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index b8841355fcd5..1c8f04abee0c 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -322,11 +322,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
 	switch (maction->flow_action_raw.sub_type) {
 	case MLX5_IB_FLOW_ACTION_MODIFY_HEADER:
 		mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev,
-					   maction->flow_action_raw.action_id);
+					   maction->flow_action_raw.modify_hdr);
 		break;
 	case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT:
 		mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev,
-			maction->flow_action_raw.action_id);
+					     maction->flow_action_raw.pkt_reformat);
 		break;
 	case MLX5_IB_FLOW_ACTION_DECAP:
 		break;
@@ -352,10 +352,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev,
 	if (!maction)
 		return ERR_PTR(-ENOMEM);
 
-	ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in,
-				       &maction->flow_action_raw.action_id);
+	maction->flow_action_raw.modify_hdr =
+		mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in);
 
-	if (ret) {
+	if (IS_ERR(maction->flow_action_raw.modify_hdr)) {
+		ret = PTR_ERR(maction->flow_action_raw.modify_hdr);
 		kfree(maction);
 		return ERR_PTR(ret);
 	}
@@ -479,11 +480,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
 	if (ret)
 		return ret;
 
-	ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
-					 in, namespace,
-					 &maction->flow_action_raw.action_id);
-	if (ret)
+	maction->flow_action_raw.pkt_reformat =
+		mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
+					   in, namespace);
+	if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
+		ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
 		return ret;
+	}
 
 	maction->flow_action_raw.sub_type =
 		MLX5_IB_FLOW_ACTION_PACKET_REFORMAT;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 016373d1d27e..4e9f1507ffd9 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2658,7 +2658,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 			if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 				return -EINVAL;
 			action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-			action->modify_id = maction->flow_action_raw.action_id;
+			action->modify_hdr =
+				maction->flow_action_raw.modify_hdr;
 			return 0;
 		}
 		if (maction->flow_action_raw.sub_type ==
@@ -2675,8 +2676,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
 				return -EINVAL;
 			action->action |=
 				MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-			action->reformat_id =
-				maction->flow_action_raw.action_id;
+			action->pkt_reformat =
+				maction->flow_action_raw.pkt_reformat;
 			return 0;
 		}
 		/* fall through */
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a20d2ee08a3b..125a507c10ed 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -868,7 +868,10 @@ struct mlx5_ib_flow_action {
 		struct {
 			struct mlx5_ib_dev *dev;
 			u32 sub_type;
-			u32 action_id;
+			union {
+				struct mlx5_modify_hdr *modify_hdr;
+				struct mlx5_pkt_reformat *pkt_reformat;
+			};
 		} flow_action_raw;
 	};
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 4c4620db3d31..f8ee18b4da6f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -291,14 +291,14 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
 		 */
 		goto out;
 	}
-
-	err = mlx5_packet_reformat_alloc(priv->mdev,
-					 e->reformat_type,
-					 ipv4_encap_size, encap_header,
-					 MLX5_FLOW_NAMESPACE_FDB,
-					 &e->encap_id);
-	if (err)
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     ipv4_encap_size, encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		err = PTR_ERR(e->pkt_reformat);
 		goto destroy_neigh_entry;
+	}
 
 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
 	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
@@ -407,13 +407,14 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_packet_reformat_alloc(priv->mdev,
-					 e->reformat_type,
-					 ipv6_encap_size, encap_header,
-					 MLX5_FLOW_NAMESPACE_FDB,
-					 &e->encap_id);
-	if (err)
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     ipv6_encap_size, encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		err = PTR_ERR(e->pkt_reformat);
 		goto destroy_neigh_entry;
+	}
 
 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
 	mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index a0ae5069d8c3..8e512216deb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -161,7 +161,7 @@ struct mlx5e_encap_entry {
 	 */
 	struct hlist_node encap_hlist;
 	struct list_head flows;
-	u32 encap_id;
+	struct mlx5_pkt_reformat *pkt_reformat;
 	const struct ip_tunnel_info *tun_info;
 	unsigned char h_dest[ETH_ALEN];	/* destination eth addr	*/
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 67f66412a33c..30d26eba75a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,7 +61,7 @@
 struct mlx5_nic_flow_attr {
 	u32 action;
 	u32 flow_tag;
-	u32 mod_hdr_id;
+	struct mlx5_modify_hdr *modify_hdr;
 	u32 hairpin_tirn;
 	u8 match_level;
 	struct mlx5_flow_table	*hairpin_ft;
@@ -201,7 +201,7 @@ struct mlx5e_mod_hdr_entry {
 
 	struct mod_hdr_key key;
 
-	u32 mod_hdr_id;
+	struct mlx5_modify_hdr *modify_hdr;
 
 	refcount_t refcnt;
 	struct completion res_ready;
@@ -334,7 +334,7 @@ static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
 
 	WARN_ON(!list_empty(&mh->flows));
 	if (mh->compl_result > 0)
-		mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
+		mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
 
 	kfree(mh);
 }
@@ -395,11 +395,11 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 	hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 	mutex_unlock(&tbl->lock);
 
-	err = mlx5_modify_header_alloc(priv->mdev, namespace,
-				       mh->key.num_actions,
-				       mh->key.actions,
-				       &mh->mod_hdr_id);
-	if (err) {
+	mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
+						  mh->key.num_actions,
+						  mh->key.actions);
+	if (IS_ERR(mh->modify_hdr)) {
+		err = PTR_ERR(mh->modify_hdr);
 		mh->compl_result = err;
 		goto alloc_header_err;
 	}
@@ -412,9 +412,9 @@ attach_flow:
 	list_add(&flow->mod_hdr, &mh->flows);
 	spin_unlock(&mh->flows_lock);
 	if (mlx5e_is_eswitch_flow(flow))
-		flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
+		flow->esw_attr->modify_hdr = mh->modify_hdr;
 	else
-		flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
+		flow->nic_attr->modify_hdr = mh->modify_hdr;
 
 	return 0;
 
@@ -906,7 +906,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
-		.reformat_id = 0,
 		.flags    = FLOW_ACT_NO_APPEND,
 	};
 	struct mlx5_fc *counter = NULL;
@@ -947,7 +946,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 		err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
-		flow_act.modify_id = attr->mod_hdr_id;
+		flow_act.modify_hdr = attr->modify_hdr;
 		kfree(parse_attr->mod_hdr_actions);
 		if (err)
 			return err;
@@ -1304,14 +1303,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	int err;
 
-	err = mlx5_packet_reformat_alloc(priv->mdev,
-					 e->reformat_type,
-					 e->encap_size, e->encap_header,
-					 MLX5_FLOW_NAMESPACE_FDB,
-					 &e->encap_id);
-	if (err) {
-		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
-			       err);
+	e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     e->reformat_type,
+						     e->encap_size, e->encap_header,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(e->pkt_reformat)) {
+		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
+			       PTR_ERR(e->pkt_reformat));
 		return;
 	}
 	e->flags |= MLX5_ENCAP_ENTRY_VALID;
@@ -1326,7 +1324,7 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 		esw_attr = flow->esw_attr;
 		spec = &esw_attr->parse_attr->spec;
 
-		esw_attr->dests[flow->tmp_efi_index].encap_id = e->encap_id;
+		esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
 		esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
 		/* Flow can be associated with multiple encap entries.
 		 * Before offloading the flow verify that all of them have
@@ -1395,7 +1393,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 
 	/* we know that the encap is valid */
 	e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
-	mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+	mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 }
 
 static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
@@ -1561,7 +1559,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-			mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
+			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 	}
 
 	kfree(e->encap_header);
@@ -3048,7 +3046,7 @@ attach_flow:
 	flow->encaps[out_index].index = out_index;
 	*encap_dev = e->out_dev;
 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
-		attr->dests[out_index].encap_id = e->encap_id;
+		attr->dests[out_index].pkt_reformat = e->pkt_reformat;
 		attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
 		*encap_valid = true;
 	} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index aba9e7a6ad3c..4f70202db6af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -69,7 +69,7 @@ struct vport_ingress {
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
-	int modify_metadata_id;
+	struct mlx5_modify_hdr   *modify_metadata;
 	struct mlx5_flow_handle  *modify_metadata_rule;
 	struct mlx5_flow_handle  *allow_rule;
 	struct mlx5_flow_handle  *drop_rule;
@@ -385,11 +385,11 @@ struct mlx5_esw_flow_attr {
 	struct {
 		u32 flags;
 		struct mlx5_eswitch_rep *rep;
+		struct mlx5_pkt_reformat *pkt_reformat;
 		struct mlx5_core_dev *mdev;
-		u32 encap_id;
 		struct mlx5_termtbl_handle *termtbl;
 	} dests[MLX5_MAX_FLOW_FWD_VPORTS];
-	u32	mod_hdr_id;
+	struct  mlx5_modify_hdr *modify_hdr;
 	u8	inner_match_level;
 	u8	outer_match_level;
 	struct mlx5_fc *counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 7d3582ee66b7..bee67ff58137 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -190,10 +190,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 						MLX5_FLOW_DEST_VPORT_VHCA_ID;
 				if (attr->dests[j].flags & MLX5_ESW_DEST_ENCAP) {
 					flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
-					flow_act.reformat_id = attr->dests[j].encap_id;
+					flow_act.pkt_reformat = attr->dests[j].pkt_reformat;
 					dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
-					dest[i].vport.reformat_id =
-						attr->dests[j].encap_id;
+					dest[i].vport.pkt_reformat =
+						attr->dests[j].pkt_reformat;
 				}
 				i++;
 			}
@@ -213,7 +213,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
-		flow_act.modify_id = attr->mod_hdr_id;
+		flow_act.modify_hdr = attr->modify_hdr;
 
 	fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!split);
 	if (IS_ERR(fdb)) {
@@ -276,7 +276,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
 		if (attr->dests[i].flags & MLX5_ESW_DEST_ENCAP) {
 			dest[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
-			dest[i].vport.reformat_id = attr->dests[i].encap_id;
+			dest[i].vport.pkt_reformat = attr->dests[i].pkt_reformat;
 		}
 	}
 	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -1734,7 +1734,7 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 
 	if (vport->ingress.modify_metadata_rule) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-		flow_act.modify_id = vport->ingress.modify_metadata_id;
+		flow_act.modify_hdr = vport->ingress.modify_metadata;
 	}
 
 	vport->ingress.allow_rule =
@@ -1770,9 +1770,11 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
 	MLX5_SET(set_action_in, action, data,
 		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport));
 
-	err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-				       1, action, &vport->ingress.modify_metadata_id);
-	if (err) {
+	vport->ingress.modify_metadata =
+		mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+					 1, action);
+	if (IS_ERR(vport->ingress.modify_metadata)) {
+		err = PTR_ERR(vport->ingress.modify_metadata);
 		esw_warn(esw->dev,
 			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
 			 vport->vport, err);
@@ -1780,7 +1782,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
 	}
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
-	flow_act.modify_id = vport->ingress.modify_metadata_id;
+	flow_act.modify_hdr = vport->ingress.modify_metadata;
 	vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl,
 								  &spec, &flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.modify_metadata_rule)) {
@@ -1794,7 +1796,7 @@ static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
 
 out:
 	if (err)
-		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
 	return err;
 }
 
@@ -1803,7 +1805,7 @@ void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
 {
 	if (vport->ingress.modify_metadata_rule) {
 		mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
-		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata);
 
 		vport->ingress.modify_metadata_rule = NULL;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1e3381604b3d..488f50dfb404 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -107,6 +107,34 @@ static int mlx5_cmd_stub_delete_fte(struct mlx5_flow_root_namespace *ns,
 	return 0;
 }
 
+static int mlx5_cmd_stub_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+					       int reformat_type,
+					       size_t size,
+					       void *reformat_data,
+					       enum mlx5_flow_namespace_type namespace,
+					       struct mlx5_pkt_reformat *pkt_reformat)
+{
+	return 0;
+}
+
+static void mlx5_cmd_stub_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+						  struct mlx5_pkt_reformat *pkt_reformat)
+{
+}
+
+static int mlx5_cmd_stub_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+					     u8 namespace, u8 num_actions,
+					     void *modify_actions,
+					     struct mlx5_modify_hdr *modify_hdr)
+{
+	return 0;
+}
+
+static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+						struct mlx5_modify_hdr *modify_hdr)
+{
+}
+
 static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 				   struct mlx5_flow_table *ft, u32 underlay_qpn,
 				   bool disconnect)
@@ -412,11 +440,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	} else {
 		MLX5_SET(flow_context, in_flow_context, action,
 			 fte->action.action);
-		MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
-			 fte->action.reformat_id);
+		if (fte->action.pkt_reformat)
+			MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+				 fte->action.pkt_reformat->id);
 	}
-	MLX5_SET(flow_context, in_flow_context, modify_header_id,
-		 fte->action.modify_id);
+	if (fte->action.modify_hdr)
+		MLX5_SET(flow_context, in_flow_context, modify_header_id,
+			 fte->action.modify_hdr->id);
 
 	vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
 
@@ -468,7 +498,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 						    MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
 					MLX5_SET(extended_dest_format, in_dests,
 						 packet_reformat_id,
-						 dst->dest_attr.vport.reformat_id);
+						 dst->dest_attr.vport.pkt_reformat->id);
 				}
 				break;
 			default:
@@ -643,14 +673,15 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
-int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
-			       int reformat_type,
-			       size_t size,
-			       void *reformat_data,
-			       enum mlx5_flow_namespace_type namespace,
-			       u32 *packet_reformat_id)
+static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+					  int reformat_type,
+					  size_t size,
+					  void *reformat_data,
+					  enum mlx5_flow_namespace_type namespace,
+					  struct mlx5_pkt_reformat *pkt_reformat)
 {
 	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
 	void *packet_reformat_context_in;
 	int max_encap_size;
 	void *reformat;
@@ -693,35 +724,36 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
-				       out, packet_reformat_id);
+	pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
+				    out, packet_reformat_id);
 	kfree(in);
 	return err;
 }
-EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
 
-void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
-				  u32 packet_reformat_id)
+static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+					     struct mlx5_pkt_reformat *pkt_reformat)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
 	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
 
 	memset(in, 0, sizeof(in));
 	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
-		 packet_reformat_id);
+		 pkt_reformat->id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
-EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
 
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id)
+static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+					u8 namespace, u8 num_actions,
+					void *modify_actions,
+					struct mlx5_modify_hdr *modify_hdr)
 {
 	u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
 	int max_actions, actions_size, inlen, err;
+	struct mlx5_core_dev *dev = ns->dev;
 	void *actions_in;
 	u8 table_type;
 	u32 *in;
@@ -772,26 +804,26 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
+	modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
 	kfree(in);
 	return err;
 }
-EXPORT_SYMBOL(mlx5_modify_header_alloc);
 
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
+static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+					   struct mlx5_modify_hdr *modify_hdr)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
 	u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+	struct mlx5_core_dev *dev = ns->dev;
 
 	memset(in, 0, sizeof(in));
 	MLX5_SET(dealloc_modify_header_context_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
-		 modify_header_id);
+		 modify_hdr->id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
-EXPORT_SYMBOL(mlx5_modify_header_dealloc);
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.create_flow_table = mlx5_cmd_create_flow_table,
@@ -803,6 +835,10 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.update_fte = mlx5_cmd_update_fte,
 	.delete_fte = mlx5_cmd_delete_fte,
 	.update_root_ft = mlx5_cmd_update_root_ft,
+	.packet_reformat_alloc = mlx5_cmd_packet_reformat_alloc,
+	.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
+	.modify_header_alloc = mlx5_cmd_modify_header_alloc,
+	.modify_header_dealloc = mlx5_cmd_modify_header_dealloc
 };
 
 static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
@@ -815,6 +851,10 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
 	.update_fte = mlx5_cmd_stub_update_fte,
 	.delete_fte = mlx5_cmd_stub_delete_fte,
 	.update_root_ft = mlx5_cmd_stub_update_root_ft,
+	.packet_reformat_alloc = mlx5_cmd_stub_packet_reformat_alloc,
+	.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
+	.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
+	.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc
 };
 
 static const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index bc4606306009..3268654d6748 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -75,6 +75,24 @@ struct mlx5_flow_cmds {
 			      struct mlx5_flow_table *ft,
 			      u32 underlay_qpn,
 			      bool disconnect);
+
+	int (*packet_reformat_alloc)(struct mlx5_flow_root_namespace *ns,
+				     int reformat_type,
+				     size_t size,
+				     void *reformat_data,
+				     enum mlx5_flow_namespace_type namespace,
+				     struct mlx5_pkt_reformat *pkt_reformat);
+
+	void (*packet_reformat_dealloc)(struct mlx5_flow_root_namespace *ns,
+					struct mlx5_pkt_reformat *pkt_reformat);
+
+	int (*modify_header_alloc)(struct mlx5_flow_root_namespace *ns,
+				   u8 namespace, u8 num_actions,
+				   void *modify_actions,
+				   struct mlx5_modify_hdr *modify_hdr);
+
+	void (*modify_header_dealloc)(struct mlx5_flow_root_namespace *ns,
+				      struct mlx5_modify_hdr *modify_hdr);
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 7bdec442f0ac..1d2333fd3080 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1415,7 +1415,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
 		      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
 		     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
-		      (d1->vport.reformat_id == d2->vport.reformat_id) : true)) ||
+		      (d1->vport.pkt_reformat->id ==
+		       d2->vport.pkt_reformat->id) : true)) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 		     d1->ft == d2->ft) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -2888,3 +2889,105 @@ out:
 	return err;
 }
 EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
+
+static struct mlx5_flow_root_namespace
+*get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type ns_type)
+{
+	struct mlx5_flow_namespace *ns;
+
+	if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
+	    ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
+		ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
+	else
+		ns = mlx5_get_flow_namespace(dev, ns_type);
+	if (!ns)
+		return NULL;
+
+	return find_root(&ns->node);
+}
+
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+						 u8 ns_type, u8 num_actions,
+						 void *modify_actions)
+{
+	struct mlx5_flow_root_namespace *root;
+	struct mlx5_modify_hdr *modify_hdr;
+	int err;
+
+	root = get_root_namespace(dev, ns_type);
+	if (!root)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	modify_hdr = kzalloc(sizeof(*modify_hdr), GFP_KERNEL);
+	if (!modify_hdr)
+		return ERR_PTR(-ENOMEM);
+
+	modify_hdr->ns_type = ns_type;
+	err = root->cmds->modify_header_alloc(root, ns_type, num_actions,
+					      modify_actions, modify_hdr);
+	if (err) {
+		kfree(modify_hdr);
+		return ERR_PTR(err);
+	}
+
+	return modify_hdr;
+}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
+
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+				struct mlx5_modify_hdr *modify_hdr)
+{
+	struct mlx5_flow_root_namespace *root;
+
+	root = get_root_namespace(dev, modify_hdr->ns_type);
+	if (WARN_ON(!root))
+		return;
+	root->cmds->modify_header_dealloc(root, modify_hdr);
+	kfree(modify_hdr);
+}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+						     int reformat_type,
+						     size_t size,
+						     void *reformat_data,
+						     enum mlx5_flow_namespace_type ns_type)
+{
+	struct mlx5_pkt_reformat *pkt_reformat;
+	struct mlx5_flow_root_namespace *root;
+	int err;
+
+	root = get_root_namespace(dev, ns_type);
+	if (!root)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	pkt_reformat = kzalloc(sizeof(*pkt_reformat), GFP_KERNEL);
+	if (!pkt_reformat)
+		return ERR_PTR(-ENOMEM);
+
+	pkt_reformat->ns_type = ns_type;
+	pkt_reformat->reformat_type = reformat_type;
+	err = root->cmds->packet_reformat_alloc(root, reformat_type, size,
+						reformat_data, ns_type,
+						pkt_reformat);
+	if (err) {
+		kfree(pkt_reformat);
+		return ERR_PTR(err);
+	}
+
+	return pkt_reformat;
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
+
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  struct mlx5_pkt_reformat *pkt_reformat)
+{
+	struct mlx5_flow_root_namespace *root;
+
+	root = get_root_namespace(dev, pkt_reformat->ns_type);
+	if (WARN_ON(!root))
+		return;
+	root->cmds->packet_reformat_dealloc(root, pkt_reformat);
+	kfree(pkt_reformat);
+}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 0d16b4b5ab83..ea0f221685ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -38,6 +38,17 @@
 #include <linux/rhashtable.h>
 #include <linux/llist.h>
 
+struct mlx5_modify_hdr {
+	enum mlx5_flow_namespace_type ns_type;
+	u32 id;
+};
+
+struct mlx5_pkt_reformat {
+	enum mlx5_flow_namespace_type ns_type;
+	int reformat_type; /* from mlx5_ifc */
+	u32 id;
+};
+
 /* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
  * and those are in parallel to one another when going over them to connect
  * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 97ec6be62ac4..724d276ea133 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -84,6 +84,8 @@ enum {
 	FDB_SLOW_PATH,
 };
 
+struct mlx5_pkt_reformat;
+struct mlx5_modify_hdr;
 struct mlx5_flow_table;
 struct mlx5_flow_group;
 struct mlx5_flow_namespace;
@@ -121,7 +123,7 @@ struct mlx5_flow_destination {
 		struct {
 			u16		num;
 			u16		vhca_id;
-			u32		reformat_id;
+			struct mlx5_pkt_reformat *pkt_reformat;
 			u8		flags;
 		} vport;
 	};
@@ -195,8 +197,8 @@ enum {
 
 struct mlx5_flow_act {
 	u32 action;
-	u32 reformat_id;
-	u32 modify_id;
+	struct mlx5_modify_hdr  *modify_hdr;
+	struct mlx5_pkt_reformat *pkt_reformat;
 	uintptr_t esp_id;
 	u32 flags;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
@@ -205,8 +207,6 @@ struct mlx5_flow_act {
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
 	struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
-				      .reformat_id = 0, \
-				      .modify_id = 0, \
 				      .flags =  0, }
 
 /* Single destination per rule.
@@ -236,19 +236,18 @@ u32 mlx5_fc_id(struct mlx5_fc *counter);
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id);
+struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+						 u8 ns_type, u8 num_actions,
+						 void *modify_actions);
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
-				u32 modify_header_id);
-
-int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
-			       int reformat_type,
-			       size_t size,
-			       void *reformat_data,
-			       enum mlx5_flow_namespace_type namespace,
-			       u32 *packet_reformat_id);
+				struct mlx5_modify_hdr *modify_hdr);
+
+struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+						     int reformat_type,
+						     size_t size,
+						     void *reformat_data,
+						     enum mlx5_flow_namespace_type ns_type);
 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
-				  u32 packet_reformat_id);
+				  struct mlx5_pkt_reformat *reformat);
 
 #endif
-- 
cgit v1.2.3


From e29ccc188f3dae1cb66f59e10e01e0f150642a54 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 3 Aug 2019 13:31:25 +0300
Subject: dma-mapping: add a dma_can_mmap helper

Add a helper to check if DMA allocations for a specific device can be
mapped to userspace using dma_mmap_*.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-mapping.h |  5 +++++
 kernel/dma/mapping.c        | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index de41a4f0b9f6..10dc9ac3cccb 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -463,6 +463,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
 int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		unsigned long attrs);
+bool dma_can_mmap(struct device *dev);
 int dma_supported(struct device *dev, u64 mask);
 int dma_set_mask(struct device *dev, u64 mask);
 int dma_set_coherent_mask(struct device *dev, u64 mask);
@@ -554,6 +555,10 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 {
 	return -ENXIO;
 }
+static inline bool dma_can_mmap(struct device *dev)
+{
+	return false;
+}
 static inline int dma_supported(struct device *dev, u64 mask)
 {
 	return 0;
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index c8b4e46407ba..18ba1ac93fc1 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -222,6 +222,29 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 #endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
 }
 
+/**
+ * dma_can_mmap - check if a given device supports dma_mmap_*
+ * @dev: device to check
+ *
+ * Returns %true if @dev supports dma_mmap_coherent() and dma_mmap_attrs() to
+ * map DMA allocations to userspace.
+ */
+bool dma_can_mmap(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (IS_ENABLED(CONFIG_ARCH_NO_COHERENT_DMA_MMAP))
+		return false;
+
+	if (dma_is_direct(ops)) {
+		return dev_is_dma_coherent(dev) ||
+			IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN);
+	}
+
+	return ops->mmap != NULL;
+}
+EXPORT_SYMBOL_GPL(dma_can_mmap);
+
 /**
  * dma_mmap_attrs - map a coherent DMA allocation into user space
  * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
-- 
cgit v1.2.3


From 212836a9929f0c91214a8a1879e6e41be0e26a6f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Jul 2019 08:58:36 +0200
Subject: dma-mapping: remove dma_{alloc,free,mmap}_writecombine

We can already use DMA_ATTR_WRITE_COMBINE or the _wc prefixed version,
so remove the third way of doing things.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/dss/dispc.c | 11 +++++------
 include/linux/dma-mapping.h         |  9 ---------
 2 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 785c5546067a..ed0ccbeed70f 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -4609,11 +4609,10 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc)
 	i734_buf.size = i734.ovli.width * i734.ovli.height *
 		color_mode_to_bpp(i734.ovli.fourcc) / 8;
 
-	i734_buf.vaddr = dma_alloc_writecombine(&dispc->pdev->dev,
-						i734_buf.size, &i734_buf.paddr,
-						GFP_KERNEL);
+	i734_buf.vaddr = dma_alloc_wc(&dispc->pdev->dev, i734_buf.size,
+				      &i734_buf.paddr, GFP_KERNEL);
 	if (!i734_buf.vaddr) {
-		dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n",
+		dev_err(&dispc->pdev->dev, "%s: dma_alloc_wc failed\n",
 			__func__);
 		return -ENOMEM;
 	}
@@ -4626,8 +4625,8 @@ static void dispc_errata_i734_wa_fini(struct dispc_device *dispc)
 	if (!dispc->feat->has_gamma_i734_bug)
 		return;
 
-	dma_free_writecombine(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr,
-			      i734_buf.paddr);
+	dma_free_wc(&dispc->pdev->dev, i734_buf.size, i734_buf.vaddr,
+		    i734_buf.paddr);
 }
 
 static void dispc_errata_i734_wa(struct dispc_device *dispc)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 10dc9ac3cccb..41d4e91b0982 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -796,9 +796,6 @@ static inline void *dma_alloc_wc(struct device *dev, size_t size,
 
 	return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs);
 }
-#ifndef dma_alloc_writecombine
-#define dma_alloc_writecombine dma_alloc_wc
-#endif
 
 static inline void dma_free_wc(struct device *dev, size_t size,
 			       void *cpu_addr, dma_addr_t dma_addr)
@@ -806,9 +803,6 @@ static inline void dma_free_wc(struct device *dev, size_t size,
 	return dma_free_attrs(dev, size, cpu_addr, dma_addr,
 			      DMA_ATTR_WRITE_COMBINE);
 }
-#ifndef dma_free_writecombine
-#define dma_free_writecombine dma_free_wc
-#endif
 
 static inline int dma_mmap_wc(struct device *dev,
 			      struct vm_area_struct *vma,
@@ -818,9 +812,6 @@ static inline int dma_mmap_wc(struct device *dev,
 	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size,
 			      DMA_ATTR_WRITE_COMBINE);
 }
-#ifndef dma_mmap_writecombine
-#define dma_mmap_writecombine dma_mmap_wc
-#endif
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
-- 
cgit v1.2.3


From 1fa0682448acd5198f79c1d28ee1292a27ae406d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 30 Aug 2019 08:46:57 +0200
Subject: dma-mapping: remove dma_release_declared_memory

This function is entirely unused given that declared memory is
generally provided by platform setup code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/DMA-API.txt   | 11 -----------
 include/linux/dma-mapping.h |  6 ------
 kernel/dma/coherent.c       | 11 -----------
 3 files changed, 28 deletions(-)

(limited to 'include')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 9c4dd3d779ed..2d8d2fed7317 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -603,17 +603,6 @@ For reasons of efficiency, most platforms choose to track the declared
 region only at the granularity of a page.  For smaller allocations,
 you should use the dma_pool() API.
 
-::
-
-	void
-	dma_release_declared_memory(struct device *dev)
-
-Remove the memory region previously declared from the system.  This
-API performs *no* in-use checking for this region and will return
-unconditionally having removed all the required structures.  It is the
-driver's job to ensure that no parts of this memory region are
-currently in use.
-
 Part III - Debug drivers use of the DMA-API
 -------------------------------------------
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 41d4e91b0982..80063b0fdea8 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -764,7 +764,6 @@ static inline int dma_get_cache_alignment(void)
 #ifdef CONFIG_DMA_DECLARE_COHERENT
 int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size);
-void dma_release_declared_memory(struct device *dev);
 #else
 static inline int
 dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
@@ -772,11 +771,6 @@ dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 {
 	return -ENOSYS;
 }
-
-static inline void
-dma_release_declared_memory(struct device *dev)
-{
-}
 #endif /* CONFIG_DMA_DECLARE_COHERENT */
 
 static inline void *dmam_alloc_coherent(struct device *dev, size_t size,
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 29fd6590dc1e..7271cda86a37 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -124,17 +124,6 @@ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 }
 EXPORT_SYMBOL(dma_declare_coherent_memory);
 
-void dma_release_declared_memory(struct device *dev)
-{
-	struct dma_coherent_mem *mem = dev->dma_mem;
-
-	if (!mem)
-		return;
-	dma_release_coherent_memory(mem);
-	dev->dma_mem = NULL;
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 		ssize_t size, dma_addr_t *dma_handle)
 {
-- 
cgit v1.2.3


From fe9041c245196c6c61091ccc2c74b73ab9a5fc50 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Jun 2019 08:55:13 +0200
Subject: vmalloc: lift the arm flag for coherent mappings to common code

The arm architecture had a VM_ARM_DMA_CONSISTENT flag to mark DMA
coherent remapping for a while.  Lift this flag to common code so
that we can use it generically.  We also check it in the only place
VM_USERMAP is directly check so that we can entirely replace that
flag as well (although I'm not even sure why we'd want to allow
remapping DMA appings, but I'd rather not change behavior).

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/mm/dma-mapping.c | 22 +++++++---------------
 arch/arm/mm/mm.h          |  3 ---
 include/linux/vmalloc.h   |  2 ++
 mm/vmalloc.c              |  5 ++++-
 4 files changed, 13 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index eb903beef2ff..aec31f9b918b 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -343,19 +343,13 @@ static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
 	const void *caller)
 {
-	/*
-	 * DMA allocation can be mapped to user space, so lets
-	 * set VM_USERMAP flags too.
-	 */
-	return dma_common_contiguous_remap(page, size,
-			VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+	return dma_common_contiguous_remap(page, size, VM_DMA_COHERENT,
 			prot, caller);
 }
 
 static void __dma_free_remap(void *cpu_addr, size_t size)
 {
-	dma_common_free_remap(cpu_addr, size,
-			VM_ARM_DMA_CONSISTENT | VM_USERMAP);
+	dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
 }
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K
@@ -1371,8 +1365,8 @@ static void *
 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
 		    const void *caller)
 {
-	return dma_common_pages_remap(pages, size,
-			VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
+	return dma_common_pages_remap(pages, size, VM_DMA_COHERENT, prot,
+			caller);
 }
 
 /*
@@ -1456,7 +1450,7 @@ static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
 		return cpu_addr;
 
 	area = find_vm_area(cpu_addr);
-	if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
+	if (area && (area->flags & VM_DMA_COHERENT))
 		return area->pages;
 	return NULL;
 }
@@ -1614,10 +1608,8 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 		return;
 	}
 
-	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
-		dma_common_free_remap(cpu_addr, size,
-			VM_ARM_DMA_CONSISTENT | VM_USERMAP);
-	}
+	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
+		dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
 
 	__iommu_remove_mapping(dev, handle, size);
 	__iommu_free_buffer(dev, pages, size, attrs);
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
index 941356d95a67..88c121ac14b3 100644
--- a/arch/arm/mm/mm.h
+++ b/arch/arm/mm/mm.h
@@ -70,9 +70,6 @@ extern void __flush_dcache_page(struct address_space *mapping, struct page *page
 #define VM_ARM_MTYPE(mt)		((mt) << 20)
 #define VM_ARM_MTYPE_MASK	(0x1f << 20)
 
-/* consistent regions used by dma_alloc_attrs() */
-#define VM_ARM_DMA_CONSISTENT	0x20000000
-
 
 struct static_vm {
 	struct vm_struct vm;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 9b21d0047710..dfa718ffdd4f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -18,6 +18,7 @@ struct notifier_block;		/* in notifier.h */
 #define VM_ALLOC		0x00000002	/* vmalloc() */
 #define VM_MAP			0x00000004	/* vmap()ed pages */
 #define VM_USERMAP		0x00000008	/* suitable for remap_vmalloc_range */
+#define VM_DMA_COHERENT		0x00000010	/* dma_alloc_coherent */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
 #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
@@ -26,6 +27,7 @@ struct notifier_block;		/* in notifier.h */
  * vfree_atomic().
  */
 #define VM_FLUSH_RESET_PERMS	0x00000100      /* Reset direct map and flush TLB on unmap */
+
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7ba11e12a11f..c1246d77cf75 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2993,7 +2993,7 @@ int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
 	if (!area)
 		return -EINVAL;
 
-	if (!(area->flags & VM_USERMAP))
+	if (!(area->flags & (VM_USERMAP | VM_DMA_COHERENT)))
 		return -EINVAL;
 
 	if (kaddr + size > area->addr + get_vm_area_size(area))
@@ -3496,6 +3496,9 @@ static int s_show(struct seq_file *m, void *p)
 	if (v->flags & VM_USERMAP)
 		seq_puts(m, " user");
 
+	if (v->flags & VM_DMA_COHERENT)
+		seq_puts(m, " dma-coherent");
+
 	if (is_vmalloc_addr(v->pages))
 		seq_puts(m, " vpages");
 
-- 
cgit v1.2.3


From 512317401f6a337e617ec284d20dec5fa3a951ec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 30 Aug 2019 08:51:01 +0200
Subject: dma-mapping: always use VM_DMA_COHERENT for generic DMA remap

Currently the generic dma remap allocator gets a vm_flags passed by
the caller that is a little confusing.  We just introduced a generic
vmalloc-level flag to identify the dma coherent allocations, so use
that everywhere and remove the now pointless argument.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/mm/dma-mapping.c    | 10 ++++------
 arch/xtensa/kernel/pci-dma.c |  4 ++--
 drivers/iommu/dma-iommu.c    |  6 +++---
 include/linux/dma-mapping.h  |  6 ++----
 kernel/dma/remap.c           | 23 ++++++++++-------------
 5 files changed, 21 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index aec31f9b918b..fd02c982e36a 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -343,13 +343,12 @@ static void *
 __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
 	const void *caller)
 {
-	return dma_common_contiguous_remap(page, size, VM_DMA_COHERENT,
-			prot, caller);
+	return dma_common_contiguous_remap(page, size, prot, caller);
 }
 
 static void __dma_free_remap(void *cpu_addr, size_t size)
 {
-	dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
+	dma_common_free_remap(cpu_addr, size);
 }
 
 #define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K
@@ -1365,8 +1364,7 @@ static void *
 __iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
 		    const void *caller)
 {
-	return dma_common_pages_remap(pages, size, VM_DMA_COHERENT, prot,
-			caller);
+	return dma_common_pages_remap(pages, size, prot, caller);
 }
 
 /*
@@ -1609,7 +1607,7 @@ void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
 	}
 
 	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0)
-		dma_common_free_remap(cpu_addr, size, VM_DMA_COHERENT);
+		dma_common_free_remap(cpu_addr, size);
 
 	__iommu_remove_mapping(dev, handle, size);
 	__iommu_free_buffer(dev, pages, size, attrs);
diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c
index 65f05776d827..154979d62b73 100644
--- a/arch/xtensa/kernel/pci-dma.c
+++ b/arch/xtensa/kernel/pci-dma.c
@@ -167,7 +167,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 	if (PageHighMem(page)) {
 		void *p;
 
-		p = dma_common_contiguous_remap(page, size, VM_MAP,
+		p = dma_common_contiguous_remap(page, size,
 						pgprot_noncached(PAGE_KERNEL),
 						__builtin_return_address(0));
 		if (!p) {
@@ -192,7 +192,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		page = virt_to_page(platform_vaddr_to_cached(vaddr));
 	} else {
 #ifdef CONFIG_MMU
-		dma_common_free_remap(vaddr, size, VM_MAP);
+		dma_common_free_remap(vaddr, size);
 #endif
 		page = pfn_to_page(PHYS_PFN(dma_to_phys(dev, dma_handle)));
 	}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index ef407e4eccde..949e341bf2f3 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -617,7 +617,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
 			< size)
 		goto out_free_sg;
 
-	vaddr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
+	vaddr = dma_common_pages_remap(pages, size, prot,
 			__builtin_return_address(0));
 	if (!vaddr)
 		goto out_unmap;
@@ -941,7 +941,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
 		pages = __iommu_dma_get_pages(cpu_addr);
 		if (!pages)
 			page = vmalloc_to_page(cpu_addr);
-		dma_common_free_remap(cpu_addr, alloc_size, VM_USERMAP);
+		dma_common_free_remap(cpu_addr, alloc_size);
 	} else {
 		/* Lowmem means a coherent atomic or CMA allocation */
 		page = virt_to_page(cpu_addr);
@@ -979,7 +979,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
 		pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 
 		cpu_addr = dma_common_contiguous_remap(page, alloc_size,
-				VM_USERMAP, prot, __builtin_return_address(0));
+				prot, __builtin_return_address(0));
 		if (!cpu_addr)
 			goto out_free_pages;
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 80063b0fdea8..86223bc24d82 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -627,13 +627,11 @@ extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 		unsigned long attrs);
 
 void *dma_common_contiguous_remap(struct page *page, size_t size,
-			unsigned long vm_flags,
 			pgprot_t prot, const void *caller);
 
 void *dma_common_pages_remap(struct page **pages, size_t size,
-			unsigned long vm_flags, pgprot_t prot,
-			const void *caller);
-void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
+			pgprot_t prot, const void *caller);
+void dma_common_free_remap(void *cpu_addr, size_t size);
 
 bool dma_in_atomic_pool(void *start, size_t size);
 void *dma_alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags);
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 838123f79639..f6b90521a7e4 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -12,12 +12,11 @@
 #include <linux/vmalloc.h>
 
 static struct vm_struct *__dma_common_pages_remap(struct page **pages,
-			size_t size, unsigned long vm_flags, pgprot_t prot,
-			const void *caller)
+			size_t size, pgprot_t prot, const void *caller)
 {
 	struct vm_struct *area;
 
-	area = get_vm_area_caller(size, vm_flags, caller);
+	area = get_vm_area_caller(size, VM_DMA_COHERENT, caller);
 	if (!area)
 		return NULL;
 
@@ -34,12 +33,11 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages,
  * Cannot be used in non-sleeping contexts
  */
 void *dma_common_pages_remap(struct page **pages, size_t size,
-			unsigned long vm_flags, pgprot_t prot,
-			const void *caller)
+			 pgprot_t prot, const void *caller)
 {
 	struct vm_struct *area;
 
-	area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+	area = __dma_common_pages_remap(pages, size, prot, caller);
 	if (!area)
 		return NULL;
 
@@ -53,7 +51,6 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
  * Cannot be used in non-sleeping contexts
  */
 void *dma_common_contiguous_remap(struct page *page, size_t size,
-			unsigned long vm_flags,
 			pgprot_t prot, const void *caller)
 {
 	int i;
@@ -67,7 +64,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
 	for (i = 0; i < (size >> PAGE_SHIFT); i++)
 		pages[i] = nth_page(page, i);
 
-	area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+	area = __dma_common_pages_remap(pages, size, prot, caller);
 
 	kfree(pages);
 
@@ -79,11 +76,11 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
 /*
  * Unmaps a range previously mapped by dma_common_*_remap
  */
-void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+void dma_common_free_remap(void *cpu_addr, size_t size)
 {
 	struct vm_struct *area = find_vm_area(cpu_addr);
 
-	if (!area || (area->flags & vm_flags) != vm_flags) {
+	if (!area || area->flags != VM_DMA_COHERENT) {
 		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
 		return;
 	}
@@ -136,7 +133,7 @@ static int __init dma_atomic_pool_init(void)
 	if (!atomic_pool)
 		goto free_page;
 
-	addr = dma_common_contiguous_remap(page, atomic_pool_size, VM_USERMAP,
+	addr = dma_common_contiguous_remap(page, atomic_pool_size,
 					   pgprot_dmacoherent(PAGE_KERNEL),
 					   __builtin_return_address(0));
 	if (!addr)
@@ -153,7 +150,7 @@ static int __init dma_atomic_pool_init(void)
 	return 0;
 
 remove_mapping:
-	dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
+	dma_common_free_remap(addr, atomic_pool_size);
 destroy_genpool:
 	gen_pool_destroy(atomic_pool);
 	atomic_pool = NULL;
@@ -228,7 +225,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	arch_dma_prep_coherent(page, size);
 
 	/* create a coherent mapping */
-	ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
+	ret = dma_common_contiguous_remap(page, size,
 			dma_pgprot(dev, PAGE_KERNEL, attrs),
 			__builtin_return_address(0));
 	if (!ret) {
-- 
cgit v1.2.3


From 5cf4537975bbd5691b9ddd015d540bb92f61e322 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Jun 2019 09:14:31 +0200
Subject: dma-mapping: introduce a dma_common_find_pages helper

A helper to find the backing page array based on a virtual address.
This also ensures we do the same vm_flags check everywhere instead
of slightly different or missing ones in a few places.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm/mm/dma-mapping.c   |  7 +------
 drivers/iommu/dma-iommu.c   | 15 +++------------
 include/linux/dma-mapping.h |  1 +
 kernel/dma/remap.c          | 13 +++++++++++--
 4 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index fd02c982e36a..97bf57df87c1 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1439,18 +1439,13 @@ static struct page **__atomic_get_pages(void *addr)
 
 static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
 {
-	struct vm_struct *area;
-
 	if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
 		return __atomic_get_pages(cpu_addr);
 
 	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
 		return cpu_addr;
 
-	area = find_vm_area(cpu_addr);
-	if (area && (area->flags & VM_DMA_COHERENT))
-		return area->pages;
-	return NULL;
+	return dma_common_find_pages(cpu_addr);
 }
 
 static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 949e341bf2f3..e8482c57c24e 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -541,15 +541,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
 	return pages;
 }
 
-static struct page **__iommu_dma_get_pages(void *cpu_addr)
-{
-	struct vm_struct *area = find_vm_area(cpu_addr);
-
-	if (!area || !area->pages)
-		return NULL;
-	return area->pages;
-}
-
 /**
  * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
  * @dev: Device to allocate memory for. Must be a real device
@@ -938,7 +929,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr)
 		 * If it the address is remapped, then it's either non-coherent
 		 * or highmem CMA, or an iommu_dma_alloc_remap() construction.
 		 */
-		pages = __iommu_dma_get_pages(cpu_addr);
+		pages = dma_common_find_pages(cpu_addr);
 		if (!pages)
 			page = vmalloc_to_page(cpu_addr);
 		dma_common_free_remap(cpu_addr, alloc_size);
@@ -1045,7 +1036,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		return -ENXIO;
 
 	if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
-		struct page **pages = __iommu_dma_get_pages(cpu_addr);
+		struct page **pages = dma_common_find_pages(cpu_addr);
 
 		if (pages)
 			return __iommu_dma_mmap(pages, size, vma);
@@ -1067,7 +1058,7 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 	int ret;
 
 	if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) {
-		struct page **pages = __iommu_dma_get_pages(cpu_addr);
+		struct page **pages = dma_common_find_pages(cpu_addr);
 
 		if (pages) {
 			return sg_alloc_table_from_pages(sgt, pages,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 86223bc24d82..746fa5d6850c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -626,6 +626,7 @@ extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		unsigned long attrs);
 
+struct page **dma_common_find_pages(void *cpu_addr);
 void *dma_common_contiguous_remap(struct page *page, size_t size,
 			pgprot_t prot, const void *caller);
 
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index f6b90521a7e4..ca4e5d44b571 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -11,6 +11,15 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+struct page **dma_common_find_pages(void *cpu_addr)
+{
+	struct vm_struct *area = find_vm_area(cpu_addr);
+
+	if (!area || area->flags != VM_DMA_COHERENT)
+		return NULL;
+	return area->pages;
+}
+
 static struct vm_struct *__dma_common_pages_remap(struct page **pages,
 			size_t size, pgprot_t prot, const void *caller)
 {
@@ -78,9 +87,9 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
  */
 void dma_common_free_remap(void *cpu_addr, size_t size)
 {
-	struct vm_struct *area = find_vm_area(cpu_addr);
+	struct page **pages = dma_common_find_pages(cpu_addr);
 
-	if (!area || area->flags != VM_DMA_COHERENT) {
+	if (!pages) {
 		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
 		return;
 	}
-- 
cgit v1.2.3


From c7578c1d6285252d862b324f6fc26dd5adc8517c Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Tue, 13 Aug 2019 09:35:46 +0100
Subject: soundwire: Add compute_params callback

This callback allows masters to compute the bus parameters required.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20190813083550.5877-2-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c    | 10 ++++++++++
 include/linux/soundwire/sdw.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 8d6c13528b68..0bc1b4dbd14b 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -1485,6 +1485,16 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 		bus->params.bandwidth += m_rt->stream->params.rate *
 			m_rt->ch_count * m_rt->stream->params.bps;
 
+		/* Compute params */
+		if (bus->compute_params) {
+			ret = bus->compute_params(bus);
+			if (ret < 0) {
+				dev_err(bus->dev, "Compute params failed: %d",
+					ret);
+				return ret;
+			}
+		}
+
 		/* Program params */
 		ret = sdw_program_params(bus);
 		if (ret < 0) {
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index be9fe08d4e9c..9932eabcb581 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -728,6 +728,7 @@ struct sdw_master_ops {
  * Bit set implies used number, bit clear implies unused number.
  * @bus_lock: bus lock
  * @msg_lock: message lock
+ * @compute_params: points to Bus resource management implementation
  * @ops: Master callback ops
  * @port_ops: Master port callback ops
  * @params: Current bus parameters
@@ -750,6 +751,7 @@ struct sdw_bus {
 	DECLARE_BITMAP(assigned, SDW_MAX_DEVICES);
 	struct mutex bus_lock;
 	struct mutex msg_lock;
+	int (*compute_params)(struct sdw_bus *bus);
 	const struct sdw_master_ops *ops;
 	const struct sdw_master_port_ops *port_ops;
 	struct sdw_bus_params params;
-- 
cgit v1.2.3


From dfcff3f8a5f18a0cfa233522b5647c2e6035fcb5 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Tue, 13 Aug 2019 09:35:47 +0100
Subject: soundwire: stream: make stream name a const pointer

Make stream name const pointer

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20190813083550.5877-3-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c    | 2 +-
 include/linux/soundwire/sdw.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 0bc1b4dbd14b..e69f94a8c3a8 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -865,7 +865,7 @@ EXPORT_SYMBOL(sdw_release_stream);
  * sdw_alloc_stream should be called only once per stream. Typically
  * invoked from ALSA/ASoC machine/platform driver.
  */
-struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
+struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name)
 {
 	struct sdw_stream_runtime *stream;
 
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 9932eabcb581..ea787201c3ac 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -844,7 +844,7 @@ struct sdw_stream_params {
  * @m_rt_count: Count of Master runtime(s) in this stream
  */
 struct sdw_stream_runtime {
-	char *name;
+	const char *name;
 	struct sdw_stream_params params;
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
@@ -852,7 +852,7 @@ struct sdw_stream_runtime {
 	int m_rt_count;
 };
 
-struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
+struct sdw_stream_runtime *sdw_alloc_stream(const char *stream_name);
 void sdw_release_stream(struct sdw_stream_runtime *stream);
 int sdw_stream_add_master(struct sdw_bus *bus,
 		struct sdw_stream_config *stream_config,
-- 
cgit v1.2.3


From bd0d9d159988d1ebb97ea244ae4dfa8365ed7d85 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 22:27:30 +0200
Subject: serial: remove ks8695 driver

The platform is getting removed, so there are no more users
of this driver.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20190809202749.742267-3-arnd@arndb.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig         |  17 -
 drivers/tty/serial/Makefile        |   1 -
 drivers/tty/serial/serial_ks8695.c | 698 -------------------------------------
 include/uapi/linux/serial_core.h   |   3 -
 4 files changed, 719 deletions(-)
 delete mode 100644 drivers/tty/serial/serial_ks8695.c

(limited to 'include')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 3083dbae35f7..7041107ea78d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -197,23 +197,6 @@ config SERIAL_KGDB_NMI
 
 	  If unsure, say N.
 
-config SERIAL_KS8695
-	bool "Micrel KS8695 (Centaur) serial port support"
-	depends on ARCH_KS8695
-	select SERIAL_CORE
-	help
-	  This selects the Micrel Centaur KS8695 UART.  Say Y here.
-
-config SERIAL_KS8695_CONSOLE
-	bool "Support for console on KS8695 (Centaur) serial port"
-	depends on SERIAL_KS8695=y
-	select SERIAL_CORE_CONSOLE
-	help
-	  Say Y here if you wish to use a KS8695 (Centaur) UART as the
-	  system console (the system console is the device which
-	  receives all kernel messages and warnings and which allows
-	  logins in single user mode).
-
 config SERIAL_MESON
 	tristate "Meson serial port support"
 	depends on ARCH_MESON
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 15a0fccadf7e..7f744136489e 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -59,7 +59,6 @@ obj-$(CONFIG_SERIAL_ATMEL) += atmel_serial.o
 obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
 obj-$(CONFIG_SERIAL_MSM) += msm_serial.o
 obj-$(CONFIG_SERIAL_QCOM_GENI) += qcom_geni_serial.o
-obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
 obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
 obj-$(CONFIG_SERIAL_ST_ASC) += st-asc.o
diff --git a/drivers/tty/serial/serial_ks8695.c b/drivers/tty/serial/serial_ks8695.c
deleted file mode 100644
index b461d791188c..000000000000
--- a/drivers/tty/serial/serial_ks8695.c
+++ /dev/null
@@ -1,698 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *  Driver for KS8695 serial ports
- *
- *  Based on drivers/serial/serial_amba.c, by Kam Lee.
- *
- *  Copyright 2002-2005 Micrel Inc.
- */
-#include <linux/module.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/serial.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/device.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/mach/irq.h>
-
-#include <mach/regs-uart.h>
-#include <mach/regs-irq.h>
-
-#if defined(CONFIG_SERIAL_KS8695_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/serial_core.h>
-
-
-#define SERIAL_KS8695_MAJOR	204
-#define SERIAL_KS8695_MINOR	16
-#define SERIAL_KS8695_DEVNAME	"ttyAM"
-
-#define SERIAL_KS8695_NR	1
-
-/*
- * Access macros for the KS8695 UART
- */
-#define UART_GET_CHAR(p)	(__raw_readl((p)->membase + KS8695_URRB) & 0xFF)
-#define UART_PUT_CHAR(p, c)	__raw_writel((c), (p)->membase + KS8695_URTH)
-#define UART_GET_FCR(p)		__raw_readl((p)->membase + KS8695_URFC)
-#define UART_PUT_FCR(p, c)	__raw_writel((c), (p)->membase + KS8695_URFC)
-#define UART_GET_MSR(p)		__raw_readl((p)->membase + KS8695_URMS)
-#define UART_GET_LSR(p)		__raw_readl((p)->membase + KS8695_URLS)
-#define UART_GET_LCR(p)		__raw_readl((p)->membase + KS8695_URLC)
-#define UART_PUT_LCR(p, c)	__raw_writel((c), (p)->membase + KS8695_URLC)
-#define UART_GET_MCR(p)		__raw_readl((p)->membase + KS8695_URMC)
-#define UART_PUT_MCR(p, c)	__raw_writel((c), (p)->membase + KS8695_URMC)
-#define UART_GET_BRDR(p)	__raw_readl((p)->membase + KS8695_URBD)
-#define UART_PUT_BRDR(p, c)	__raw_writel((c), (p)->membase + KS8695_URBD)
-
-#define KS8695_CLR_TX_INT()	__raw_writel(1 << KS8695_IRQ_UART_TX, KS8695_IRQ_VA + KS8695_INTST)
-
-#define UART_DUMMY_LSR_RX	0x100
-#define UART_PORT_SIZE		(KS8695_USR - KS8695_URRB + 4)
-
-static inline int tx_enabled(struct uart_port *port)
-{
-	return port->unused[0] & 1;
-}
-
-static inline int rx_enabled(struct uart_port *port)
-{
-	return port->unused[0] & 2;
-}
-
-static inline int ms_enabled(struct uart_port *port)
-{
-	return port->unused[0] & 4;
-}
-
-static inline void ms_enable(struct uart_port *port, int enabled)
-{
-	if(enabled)
-		port->unused[0] |= 4;
-	else
-		port->unused[0] &= ~4;
-}
-
-static inline void rx_enable(struct uart_port *port, int enabled)
-{
-	if(enabled)
-		port->unused[0] |= 2;
-	else
-		port->unused[0] &= ~2;
-}
-
-static inline void tx_enable(struct uart_port *port, int enabled)
-{
-	if(enabled)
-		port->unused[0] |= 1;
-	else
-		port->unused[0] &= ~1;
-}
-
-
-#ifdef SUPPORT_SYSRQ
-static struct console ks8695_console;
-#endif
-
-static void ks8695uart_stop_tx(struct uart_port *port)
-{
-	if (tx_enabled(port)) {
-		/* use disable_irq_nosync() and not disable_irq() to avoid self
-		 * imposed deadlock by not waiting for irq handler to end,
-		 * since this ks8695uart_stop_tx() is called from interrupt context.
-		 */
-		disable_irq_nosync(KS8695_IRQ_UART_TX);
-		tx_enable(port, 0);
-	}
-}
-
-static void ks8695uart_start_tx(struct uart_port *port)
-{
-	if (!tx_enabled(port)) {
-		enable_irq(KS8695_IRQ_UART_TX);
-		tx_enable(port, 1);
-	}
-}
-
-static void ks8695uart_stop_rx(struct uart_port *port)
-{
-	if (rx_enabled(port)) {
-		disable_irq(KS8695_IRQ_UART_RX);
-		rx_enable(port, 0);
-	}
-}
-
-static void ks8695uart_enable_ms(struct uart_port *port)
-{
-	if (!ms_enabled(port)) {
-		enable_irq(KS8695_IRQ_UART_MODEM_STATUS);
-		ms_enable(port,1);
-	}
-}
-
-static void ks8695uart_disable_ms(struct uart_port *port)
-{
-	if (ms_enabled(port)) {
-		disable_irq(KS8695_IRQ_UART_MODEM_STATUS);
-		ms_enable(port,0);
-	}
-}
-
-static irqreturn_t ks8695uart_rx_chars(int irq, void *dev_id)
-{
-	struct uart_port *port = dev_id;
-	unsigned int status, ch, lsr, flg, max_count = 256;
-
-	status = UART_GET_LSR(port);		/* clears pending LSR interrupts */
-	while ((status & URLS_URDR) && max_count--) {
-		ch = UART_GET_CHAR(port);
-		flg = TTY_NORMAL;
-
-		port->icount.rx++;
-
-		/*
-		 * Note that the error handling code is
-		 * out of the main execution path
-		 */
-		lsr = UART_GET_LSR(port) | UART_DUMMY_LSR_RX;
-		if (unlikely(lsr & (URLS_URBI | URLS_URPE | URLS_URFE | URLS_URROE))) {
-			if (lsr & URLS_URBI) {
-				lsr &= ~(URLS_URFE | URLS_URPE);
-				port->icount.brk++;
-				if (uart_handle_break(port))
-					goto ignore_char;
-			}
-			if (lsr & URLS_URPE)
-				port->icount.parity++;
-			if (lsr & URLS_URFE)
-				port->icount.frame++;
-			if (lsr & URLS_URROE)
-				port->icount.overrun++;
-
-			lsr &= port->read_status_mask;
-
-			if (lsr & URLS_URBI)
-				flg = TTY_BREAK;
-			else if (lsr & URLS_URPE)
-				flg = TTY_PARITY;
-			else if (lsr & URLS_URFE)
-				flg = TTY_FRAME;
-		}
-
-		if (uart_handle_sysrq_char(port, ch))
-			goto ignore_char;
-
-		uart_insert_char(port, lsr, URLS_URROE, ch, flg);
-
-ignore_char:
-		status = UART_GET_LSR(port);
-	}
-	tty_flip_buffer_push(&port->state->port);
-
-	return IRQ_HANDLED;
-}
-
-
-static irqreturn_t ks8695uart_tx_chars(int irq, void *dev_id)
-{
-	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->state->xmit;
-	unsigned int count;
-
-	if (port->x_char) {
-		KS8695_CLR_TX_INT();
-		UART_PUT_CHAR(port, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return IRQ_HANDLED;
-	}
-
-	if (uart_tx_stopped(port) || uart_circ_empty(xmit)) {
-		ks8695uart_stop_tx(port);
-		return IRQ_HANDLED;
-	}
-
-	count = 16;	/* fifo size */
-	while (!uart_circ_empty(xmit) && (count-- > 0)) {
-		KS8695_CLR_TX_INT();
-		UART_PUT_CHAR(port, xmit->buf[xmit->tail]);
-
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (uart_circ_empty(xmit))
-		ks8695uart_stop_tx(port);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ks8695uart_modem_status(int irq, void *dev_id)
-{
-	struct uart_port *port = dev_id;
-	unsigned int status;
-
-	/*
-	 * clear modem interrupt by reading MSR
-	 */
-	status = UART_GET_MSR(port);
-
-	if (status & URMS_URDDCD)
-		uart_handle_dcd_change(port, status & URMS_URDDCD);
-
-	if (status & URMS_URDDST)
-		port->icount.dsr++;
-
-	if (status & URMS_URDCTS)
-		uart_handle_cts_change(port, status & URMS_URDCTS);
-
-	if (status & URMS_URTERI)
-		port->icount.rng++;
-
-	wake_up_interruptible(&port->state->port.delta_msr_wait);
-
-	return IRQ_HANDLED;
-}
-
-static unsigned int ks8695uart_tx_empty(struct uart_port *port)
-{
-	return (UART_GET_LSR(port) & URLS_URTE) ? TIOCSER_TEMT : 0;
-}
-
-static unsigned int ks8695uart_get_mctrl(struct uart_port *port)
-{
-	unsigned int result = 0;
-	unsigned int status;
-
-	status = UART_GET_MSR(port);
-	if (status & URMS_URDCD)
-		result |= TIOCM_CAR;
-	if (status & URMS_URDSR)
-		result |= TIOCM_DSR;
-	if (status & URMS_URCTS)
-		result |= TIOCM_CTS;
-	if (status & URMS_URRI)
-		result |= TIOCM_RI;
-
-	return result;
-}
-
-static void ks8695uart_set_mctrl(struct uart_port *port, u_int mctrl)
-{
-	unsigned int mcr;
-
-	mcr = UART_GET_MCR(port);
-	if (mctrl & TIOCM_RTS)
-		mcr |= URMC_URRTS;
-	else
-		mcr &= ~URMC_URRTS;
-
-	if (mctrl & TIOCM_DTR)
-		mcr |= URMC_URDTR;
-	else
-		mcr &= ~URMC_URDTR;
-
-	UART_PUT_MCR(port, mcr);
-}
-
-static void ks8695uart_break_ctl(struct uart_port *port, int break_state)
-{
-	unsigned int lcr;
-
-	lcr = UART_GET_LCR(port);
-
-	if (break_state == -1)
-		lcr |= URLC_URSBC;
-	else
-		lcr &= ~URLC_URSBC;
-
-	UART_PUT_LCR(port, lcr);
-}
-
-static int ks8695uart_startup(struct uart_port *port)
-{
-	int retval;
-
-	irq_modify_status(KS8695_IRQ_UART_TX, IRQ_NOREQUEST, IRQ_NOAUTOEN);
-	tx_enable(port, 0);
-	rx_enable(port, 1);
-	ms_enable(port, 1);
-
-	/*
-	 * Allocate the IRQ
-	 */
-	retval = request_irq(KS8695_IRQ_UART_TX, ks8695uart_tx_chars, 0, "UART TX", port);
-	if (retval)
-		goto err_tx;
-
-	retval = request_irq(KS8695_IRQ_UART_RX, ks8695uart_rx_chars, 0, "UART RX", port);
-	if (retval)
-		goto err_rx;
-
-	retval = request_irq(KS8695_IRQ_UART_LINE_STATUS, ks8695uart_rx_chars, 0, "UART LineStatus", port);
-	if (retval)
-		goto err_ls;
-
-	retval = request_irq(KS8695_IRQ_UART_MODEM_STATUS, ks8695uart_modem_status, 0, "UART ModemStatus", port);
-	if (retval)
-		goto err_ms;
-
-	return 0;
-
-err_ms:
-	free_irq(KS8695_IRQ_UART_LINE_STATUS, port);
-err_ls:
-	free_irq(KS8695_IRQ_UART_RX, port);
-err_rx:
-	free_irq(KS8695_IRQ_UART_TX, port);
-err_tx:
-	return retval;
-}
-
-static void ks8695uart_shutdown(struct uart_port *port)
-{
-	/*
-	 * Free the interrupt
-	 */
-	free_irq(KS8695_IRQ_UART_RX, port);
-	free_irq(KS8695_IRQ_UART_TX, port);
-	free_irq(KS8695_IRQ_UART_MODEM_STATUS, port);
-	free_irq(KS8695_IRQ_UART_LINE_STATUS, port);
-
-	/* disable break condition and fifos */
-	UART_PUT_LCR(port, UART_GET_LCR(port) & ~URLC_URSBC);
-	UART_PUT_FCR(port, UART_GET_FCR(port) & ~URFC_URFE);
-}
-
-static void ks8695uart_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old)
-{
-	unsigned int lcr, fcr = 0;
-	unsigned long flags;
-	unsigned int baud, quot;
-
-	/*
-	 * Ask the core to calculate the divisor for us.
-	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
-	quot = uart_get_divisor(port, baud);
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		lcr = URCL_5;
-		break;
-	case CS6:
-		lcr = URCL_6;
-		break;
-	case CS7:
-		lcr = URCL_7;
-		break;
-	default:
-		lcr = URCL_8;
-		break;
-	}
-
-	/* stop bits */
-	if (termios->c_cflag & CSTOPB)
-		lcr |= URLC_URSB;
-
-	/* parity */
-	if (termios->c_cflag & PARENB) {
-		if (termios->c_cflag & CMSPAR) {	/* Mark or Space parity */
-			if (termios->c_cflag & PARODD)
-				lcr |= URPE_MARK;
-			else
-				lcr |= URPE_SPACE;
-		}
-		else if (termios->c_cflag & PARODD)
-			lcr |= URPE_ODD;
-		else
-			lcr |= URPE_EVEN;
-	}
-
-	if (port->fifosize > 1)
-		fcr = URFC_URFRT_8 | URFC_URTFR | URFC_URRFR | URFC_URFE;
-
-	spin_lock_irqsave(&port->lock, flags);
-
-	/*
-	 * Update the per-port timeout.
-	 */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	port->read_status_mask = URLS_URROE;
-	if (termios->c_iflag & INPCK)
-		port->read_status_mask |= (URLS_URFE | URLS_URPE);
-	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
-		port->read_status_mask |= URLS_URBI;
-
-	/*
-	 * Characters to ignore
-	 */
-	port->ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		port->ignore_status_mask |= (URLS_URFE | URLS_URPE);
-	if (termios->c_iflag & IGNBRK) {
-		port->ignore_status_mask |= URLS_URBI;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			port->ignore_status_mask |= URLS_URROE;
-	}
-
-	/*
-	 * Ignore all characters if CREAD is not set.
-	 */
-	if ((termios->c_cflag & CREAD) == 0)
-		port->ignore_status_mask |= UART_DUMMY_LSR_RX;
-
-	/* first, disable everything */
-	if (UART_ENABLE_MS(port, termios->c_cflag))
-		ks8695uart_enable_ms(port);
-	else
-		ks8695uart_disable_ms(port);
-
-	/* Set baud rate */
-	UART_PUT_BRDR(port, quot);
-
-	UART_PUT_LCR(port, lcr);
-	UART_PUT_FCR(port, fcr);
-
-	spin_unlock_irqrestore(&port->lock, flags);
-}
-
-static const char *ks8695uart_type(struct uart_port *port)
-{
-	return port->type == PORT_KS8695 ? "KS8695" : NULL;
-}
-
-/*
- * Release the memory region(s) being used by 'port'
- */
-static void ks8695uart_release_port(struct uart_port *port)
-{
-	release_mem_region(port->mapbase, UART_PORT_SIZE);
-}
-
-/*
- * Request the memory region(s) being used by 'port'
- */
-static int ks8695uart_request_port(struct uart_port *port)
-{
-	return request_mem_region(port->mapbase, UART_PORT_SIZE,
-			"serial_ks8695") != NULL ? 0 : -EBUSY;
-}
-
-/*
- * Configure/autoconfigure the port.
- */
-static void ks8695uart_config_port(struct uart_port *port, int flags)
-{
-	if (flags & UART_CONFIG_TYPE) {
-		port->type = PORT_KS8695;
-		ks8695uart_request_port(port);
-	}
-}
-
-/*
- * verify the new serial_struct (for TIOCSSERIAL).
- */
-static int ks8695uart_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	int ret = 0;
-
-	if (ser->type != PORT_UNKNOWN && ser->type != PORT_KS8695)
-		ret = -EINVAL;
-	if (ser->irq != port->irq)
-		ret = -EINVAL;
-	if (ser->baud_base < 9600)
-		ret = -EINVAL;
-	return ret;
-}
-
-static struct uart_ops ks8695uart_pops = {
-	.tx_empty	= ks8695uart_tx_empty,
-	.set_mctrl	= ks8695uart_set_mctrl,
-	.get_mctrl	= ks8695uart_get_mctrl,
-	.stop_tx	= ks8695uart_stop_tx,
-	.start_tx	= ks8695uart_start_tx,
-	.stop_rx	= ks8695uart_stop_rx,
-	.enable_ms	= ks8695uart_enable_ms,
-	.break_ctl	= ks8695uart_break_ctl,
-	.startup	= ks8695uart_startup,
-	.shutdown	= ks8695uart_shutdown,
-	.set_termios	= ks8695uart_set_termios,
-	.type		= ks8695uart_type,
-	.release_port	= ks8695uart_release_port,
-	.request_port	= ks8695uart_request_port,
-	.config_port	= ks8695uart_config_port,
-	.verify_port	= ks8695uart_verify_port,
-};
-
-static struct uart_port ks8695uart_ports[SERIAL_KS8695_NR] = {
-	{
-		.membase	= KS8695_UART_VA,
-		.mapbase	= KS8695_UART_PA,
-		.iotype		= SERIAL_IO_MEM,
-		.irq		= KS8695_IRQ_UART_TX,
-		.uartclk	= KS8695_CLOCK_RATE * 16,
-		.fifosize	= 16,
-		.ops		= &ks8695uart_pops,
-		.flags		= UPF_BOOT_AUTOCONF,
-		.line		= 0,
-	}
-};
-
-#ifdef CONFIG_SERIAL_KS8695_CONSOLE
-static void ks8695_console_putchar(struct uart_port *port, int ch)
-{
-	while (!(UART_GET_LSR(port) & URLS_URTHRE))
-		barrier();
-
-	UART_PUT_CHAR(port, ch);
-}
-
-static void ks8695_console_write(struct console *co, const char *s, u_int count)
-{
-	struct uart_port *port = ks8695uart_ports + co->index;
-
-	uart_console_write(port, s, count, ks8695_console_putchar);
-}
-
-static void __init ks8695_console_get_options(struct uart_port *port, int *baud, int *parity, int *bits)
-{
-	unsigned int lcr;
-
-	lcr = UART_GET_LCR(port);
-
-	switch (lcr & URLC_PARITY) {
-		case URPE_ODD:
-			*parity = 'o';
-			break;
-		case URPE_EVEN:
-			*parity = 'e';
-			break;
-		default:
-			*parity = 'n';
-	}
-
-	switch (lcr & URLC_URCL) {
-		case URCL_5:
-			*bits = 5;
-			break;
-		case URCL_6:
-			*bits = 6;
-			break;
-		case URCL_7:
-			*bits = 7;
-			break;
-		default:
-			*bits = 8;
-	}
-
-	*baud = port->uartclk / (UART_GET_BRDR(port) & 0x0FFF);
-	*baud /= 16;
-	*baud &= 0xFFFFFFF0;
-}
-
-static int __init ks8695_console_setup(struct console *co, char *options)
-{
-	struct uart_port *port;
-	int baud = 115200;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	/*
-	 * Check whether an invalid uart number has been specified, and
-	 * if so, search for the first available port that does have
-	 * console support.
-	 */
-	port = uart_get_console(ks8695uart_ports, SERIAL_KS8695_NR, co);
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-	else
-		ks8695_console_get_options(port, &baud, &parity, &bits);
-
-	return uart_set_options(port, co, baud, parity, bits, flow);
-}
-
-static struct uart_driver ks8695_reg;
-
-static struct console ks8695_console = {
-	.name		= SERIAL_KS8695_DEVNAME,
-	.write		= ks8695_console_write,
-	.device		= uart_console_device,
-	.setup		= ks8695_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &ks8695_reg,
-};
-
-static int __init ks8695_console_init(void)
-{
-	add_preferred_console(SERIAL_KS8695_DEVNAME, 0, NULL);
-	register_console(&ks8695_console);
-	return 0;
-}
-
-console_initcall(ks8695_console_init);
-
-#define KS8695_CONSOLE	&ks8695_console
-#else
-#define KS8695_CONSOLE	NULL
-#endif
-
-static struct uart_driver ks8695_reg = {
-	.owner			= THIS_MODULE,
-	.driver_name		= "serial_ks8695",
-	.dev_name		= SERIAL_KS8695_DEVNAME,
-	.major			= SERIAL_KS8695_MAJOR,
-	.minor			= SERIAL_KS8695_MINOR,
-	.nr			= SERIAL_KS8695_NR,
-	.cons			= KS8695_CONSOLE,
-};
-
-static int __init ks8695uart_init(void)
-{
-	int i, ret;
-
-	printk(KERN_INFO "Serial: Micrel KS8695 UART driver\n");
-
-	ret = uart_register_driver(&ks8695_reg);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < SERIAL_KS8695_NR; i++)
-		uart_add_one_port(&ks8695_reg, &ks8695uart_ports[0]);
-
-	return 0;
-}
-
-static void __exit ks8695uart_exit(void)
-{
-	int i;
-
-	for (i = 0; i < SERIAL_KS8695_NR; i++)
-		uart_remove_one_port(&ks8695_reg, &ks8695uart_ports[0]);
-	uart_unregister_driver(&ks8695_reg);
-}
-
-module_init(ks8695uart_init);
-module_exit(ks8695uart_exit);
-
-MODULE_DESCRIPTION("KS8695 serial port driver");
-MODULE_AUTHOR("Micrel Inc.");
-MODULE_LICENSE("GPL");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 3cc3af1c2ee1..e8dc1787c3c6 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -161,9 +161,6 @@
 /* Blackfin bf5xx */
 #define PORT_BFIN	75
 
-/* Micrel KS8695 */
-#define PORT_KS8695	76
-
 /* Broadcom SB1250, etc. SOC */
 #define PORT_SB1250_DUART	77
 
-- 
cgit v1.2.3


From 8515dbc1f51b4f728908cdb993c849c83743aba7 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Sat, 10 Aug 2019 03:01:29 +0800
Subject: serial: 8250_pci: Add support for Sunix serial boards

Add support to Sunix serial boards with up to 16 ports.

Sunix board need its own setup callback instead of using Timedia's, to
properly support more than 4 ports.

Cc: Morris Ku <morris_ku@sunix.com>
Cc: Debbie Liu <debbie_liu@sunix.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Link: https://lore.kernel.org/r/20190809190130.30773-1-kai.heng.feng@canonical.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c  | 93 ++++++++++++++++++++++++++++++-------
 drivers/tty/serial/8250/8250_port.c |  8 ++++
 include/uapi/linux/serial_core.h    |  3 ++
 3 files changed, 87 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index d0be326a17e5..c0d10a35bf70 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -1803,6 +1803,30 @@ pci_wch_ch38x_setup(struct serial_private *priv,
 	return pci_default_setup(priv, board, port, idx);
 }
 
+static int
+pci_sunix_setup(struct serial_private *priv,
+		const struct pciserial_board *board,
+		struct uart_8250_port *port, int idx)
+{
+	int bar;
+	int offset;
+
+	port->port.flags |= UPF_FIXED_TYPE;
+	port->port.type = PORT_SUNIX;
+
+	if (idx < 4) {
+		bar = 0;
+		offset = idx * board->uart_offset;
+	} else {
+		bar = 1;
+		idx -= 4;
+		idx = div_s64_rem(idx, 4, &offset);
+		offset = idx * 64 + offset * board->uart_offset;
+	}
+
+	return setup_port(priv, port, bar, offset, 0);
+}
+
 #define PCI_VENDOR_ID_SBSMODULARIO	0x124B
 #define PCI_SUBVENDOR_ID_SBSMODULARIO	0x124B
 #define PCI_DEVICE_ID_OCTPRO		0x0001
@@ -2490,21 +2514,14 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
 		.setup		= pci_timedia_setup,
 	},
 	/*
-	 * SUNIX (Timedia) cards
-	 * Do not "probe" for these cards as there is at least one combination
-	 * card that should be handled by parport_pc that doesn't match the
-	 * rule in pci_timedia_probe.
-	 * It is part number is MIO5079A but its subdevice ID is 0x0102.
-	 * There are some boards with part number SER5037AL that report
-	 * subdevice ID 0x0002.
+	 * Sunix PCI serial boards
 	 */
 	{
 		.vendor		= PCI_VENDOR_ID_SUNIX,
 		.device		= PCI_DEVICE_ID_SUNIX_1999,
 		.subvendor	= PCI_VENDOR_ID_SUNIX,
 		.subdevice	= PCI_ANY_ID,
-		.init		= pci_timedia_init,
-		.setup		= pci_timedia_setup,
+		.setup		= pci_sunix_setup,
 	},
 	/*
 	 * Xircom cards
@@ -2965,6 +2982,11 @@ enum pci_board_num_t {
 	pbn_pericom_PI7C9X7952,
 	pbn_pericom_PI7C9X7954,
 	pbn_pericom_PI7C9X7958,
+	pbn_sunix_pci_1s,
+	pbn_sunix_pci_2s,
+	pbn_sunix_pci_4s,
+	pbn_sunix_pci_8s,
+	pbn_sunix_pci_16s,
 };
 
 /*
@@ -3751,6 +3773,31 @@ static struct pciserial_board pci_boards[] = {
 		.base_baud      = 921600,
 		.uart_offset	= 0x8,
 	},
+	[pbn_sunix_pci_1s] = {
+		.num_ports	= 1,
+		.base_baud      = 921600,
+		.uart_offset	= 0x8,
+	},
+	[pbn_sunix_pci_2s] = {
+		.num_ports	= 2,
+		.base_baud      = 921600,
+		.uart_offset	= 0x8,
+	},
+	[pbn_sunix_pci_4s] = {
+		.num_ports	= 4,
+		.base_baud      = 921600,
+		.uart_offset	= 0x8,
+	},
+	[pbn_sunix_pci_8s] = {
+		.num_ports	= 8,
+		.base_baud      = 921600,
+		.uart_offset	= 0x8,
+	},
+	[pbn_sunix_pci_16s] = {
+		.num_ports	= 16,
+		.base_baud      = 921600,
+		.uart_offset	= 0x8,
+	},
 };
 
 static const struct pci_device_id blacklist[] = {
@@ -4788,17 +4835,29 @@ static const struct pci_device_id serial_pci_tbl[] = {
 		pbn_b0_bt_1_921600 },
 
 	/*
-	 * SUNIX (TIMEDIA)
+	 * Sunix PCI serial boards
 	 */
 	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
-		PCI_VENDOR_ID_SUNIX, PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xffff00,
-		pbn_b0_bt_1_921600 },
-
+		PCI_VENDOR_ID_SUNIX, 0x0001, 0, 0,
+		pbn_sunix_pci_1s },
 	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
-		PCI_VENDOR_ID_SUNIX, PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00,
-		pbn_b0_bt_1_921600 },
+		PCI_VENDOR_ID_SUNIX, 0x0002, 0, 0,
+		pbn_sunix_pci_2s },
+	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
+		PCI_VENDOR_ID_SUNIX, 0x0004, 0, 0,
+		pbn_sunix_pci_4s },
+	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
+		PCI_VENDOR_ID_SUNIX, 0x0084, 0, 0,
+		pbn_sunix_pci_4s },
+	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
+		PCI_VENDOR_ID_SUNIX, 0x0008, 0, 0,
+		pbn_sunix_pci_8s },
+	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
+		PCI_VENDOR_ID_SUNIX, 0x0088, 0, 0,
+		pbn_sunix_pci_8s },
+	{	PCI_VENDOR_ID_SUNIX, PCI_DEVICE_ID_SUNIX_1999,
+		PCI_VENDOR_ID_SUNIX, 0x0010, 0, 0,
+		pbn_sunix_pci_16s },
 
 	/*
 	 * AFAVLAB serial card, from Harald Welte <laforge@gnumonks.org>
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 706645f89132..8407166610ce 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -301,6 +301,14 @@ static const struct serial8250_config uart_config[] = {
 		.rxtrig_bytes	= {1, 4, 8, 14},
 		.flags		= UART_CAP_FIFO,
 	},
+	[PORT_SUNIX] = {
+		.name		= "Sunix",
+		.fifo_size	= 128,
+		.tx_loadsz	= 128,
+		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
+		.rxtrig_bytes	= {1, 32, 64, 112},
+		.flags		= UART_CAP_FIFO | UART_CAP_SLEEP,
+	},
 };
 
 /* Uart divisor latch read */
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index e8dc1787c3c6..bd0d9d176a66 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -287,4 +287,7 @@
 /* SiFive UART */
 #define PORT_SIFIVE_V0	120
 
+/* Sunix UART */
+#define PORT_SUNIX	121
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From 09864c1cdf5c537bd01bff45181406e422ea988c Mon Sep 17 00:00:00 2001
From: Stefan-gabriel Mirea <stefan-gabriel.mirea@nxp.com>
Date: Fri, 9 Aug 2019 11:29:16 +0000
Subject: tty: serial: Add linflexuart driver for S32V234

Introduce support for LINFlex driver, based on:
- the version of Freescale LPUART driver after commit b3e3bf2ef2c7 ("Merge
  4.0-rc7 into tty-next");
- commit abf1e0a98083 ("tty: serial: fsl_lpuart: lock port on console
  write").
In this basic version, the driver can be tested using initramfs and relies
on the clocks and pin muxing set up by U-Boot.

Remarks concerning the earlycon support:

- LinFlexD does not allow character transmissions in the INIT mode (see
  section 47.4.2.1 in the reference manual[1]). Therefore, a mutual
  exclusion between the first linflex_setup_watermark/linflex_set_termios
  executions and linflex_earlycon_putchar was employed and the characters
  normally sent to earlycon during initialization are kept in a buffer and
  sent afterwards.

- Empirically, character transmission is also forbidden within the last 1-2
  ms before entering the INIT mode, so we use an explicit timeout
  (PREINIT_DELAY) between linflex_earlycon_putchar and the first call to
  linflex_setup_watermark.

- U-Boot currently uses the UART FIFO mode, while this driver makes the
  transition to the buffer mode. Therefore, the earlycon putchar function
  matches the U-Boot behavior before initializations and the Linux behavior
  after.

[1] https://www.nxp.com/webapp/Download?colCode=S32V234RM

Signed-off-by: Stoica Cosmin-Stefan <cosmin.stoica@nxp.com>
Signed-off-by: Adrian.Nitu <adrian.nitu@freescale.com>
Signed-off-by: Larisa Grigore <Larisa.Grigore@nxp.com>
Signed-off-by: Ana Nedelcu <B56683@freescale.com>
Signed-off-by: Mihaela Martinas <Mihaela.Martinas@freescale.com>
Signed-off-by: Matthew Nunez <matthew.nunez@nxp.com>
[stefan-gabriel.mirea@nxp.com: Reduced for upstreaming and implemented
                               earlycon support]
Signed-off-by: Stefan-Gabriel Mirea <stefan-gabriel.mirea@nxp.com>
Link: https://lore.kernel.org/r/20190809112853.15846-6-stefan-gabriel.mirea@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |   6 +
 drivers/tty/serial/Kconfig                      |  15 +
 drivers/tty/serial/Makefile                     |   1 +
 drivers/tty/serial/fsl_linflexuart.c            | 942 ++++++++++++++++++++++++
 include/uapi/linux/serial_core.h                |   3 +
 5 files changed, 967 insertions(+)
 create mode 100644 drivers/tty/serial/fsl_linflexuart.c

(limited to 'include')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 47d981a86e2f..9ee66d61986b 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1090,6 +1090,12 @@
 			the framebuffer, pass the 'ram' option so that it is
 			mapped with the correct attributes.
 
+		linflex,<addr>
+			Use early console provided by Freescale LinFlex UART
+			serial driver for NXP S32V234 SoCs. A valid base
+			address must be provided, and the serial port must
+			already be setup and configured.
+
 	earlyprintk=	[X86,SH,ARM,M68k,S390]
 			earlyprintk=vga
 			earlyprintk=sclp
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 7041107ea78d..db524e2c6db3 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1416,6 +1416,21 @@ config SERIAL_FSL_LPUART_CONSOLE
 	  If you have enabled the lpuart serial port on the Freescale SoCs,
 	  you can make it the console by answering Y to this option.
 
+config SERIAL_FSL_LINFLEXUART
+	tristate "Freescale linflexuart serial port support"
+	select SERIAL_CORE
+	help
+	  Support for the on-chip linflexuart on some Freescale SOCs.
+
+config SERIAL_FSL_LINFLEXUART_CONSOLE
+	bool "Console on Freescale linflexuart serial port"
+	depends on SERIAL_FSL_LINFLEXUART=y
+	select SERIAL_CORE_CONSOLE
+	select SERIAL_EARLYCON
+	help
+	  If you have enabled the linflexuart serial port on the Freescale
+	  SoCs, you can make it the console by answering Y to this option.
+
 config SERIAL_CONEXANT_DIGICOLOR
 	tristate "Conexant Digicolor CX92xxx USART serial port support"
 	depends on OF
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 7f744136489e..490d74533fbd 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o
 obj-$(CONFIG_SERIAL_ARC)	+= arc_uart.o
 obj-$(CONFIG_SERIAL_RP2)	+= rp2.o
 obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
+obj-$(CONFIG_SERIAL_FSL_LINFLEXUART)	+= fsl_linflexuart.o
 obj-$(CONFIG_SERIAL_CONEXANT_DIGICOLOR)	+= digicolor-usart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
 obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c
new file mode 100644
index 000000000000..26b9601a0952
--- /dev/null
+++ b/drivers/tty/serial/fsl_linflexuart.c
@@ -0,0 +1,942 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale linflexuart serial port driver
+ *
+ * Copyright 2012-2016 Freescale Semiconductor, Inc.
+ * Copyright 2017-2018 NXP
+ */
+
+#if defined(CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE) && \
+	defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/console.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/serial_core.h>
+#include <linux/slab.h>
+#include <linux/tty_flip.h>
+#include <linux/delay.h>
+
+/* All registers are 32-bit width */
+
+#define LINCR1	0x0000	/* LIN control register				*/
+#define LINIER	0x0004	/* LIN interrupt enable register		*/
+#define LINSR	0x0008	/* LIN status register				*/
+#define LINESR	0x000C	/* LIN error status register			*/
+#define UARTCR	0x0010	/* UART mode control register			*/
+#define UARTSR	0x0014	/* UART mode status register			*/
+#define LINTCSR	0x0018	/* LIN timeout control status register		*/
+#define LINOCR	0x001C	/* LIN output compare register			*/
+#define LINTOCR	0x0020	/* LIN timeout control register			*/
+#define LINFBRR	0x0024	/* LIN fractional baud rate register		*/
+#define LINIBRR	0x0028	/* LIN integer baud rate register		*/
+#define LINCFR	0x002C	/* LIN checksum field register			*/
+#define LINCR2	0x0030	/* LIN control register 2			*/
+#define BIDR	0x0034	/* Buffer identifier register			*/
+#define BDRL	0x0038	/* Buffer data register least significant	*/
+#define BDRM	0x003C	/* Buffer data register most significant	*/
+#define IFER	0x0040	/* Identifier filter enable register		*/
+#define IFMI	0x0044	/* Identifier filter match index		*/
+#define IFMR	0x0048	/* Identifier filter mode register		*/
+#define GCR	0x004C	/* Global control register			*/
+#define UARTPTO	0x0050	/* UART preset timeout register			*/
+#define UARTCTO	0x0054	/* UART current timeout register		*/
+
+/*
+ * Register field definitions
+ */
+
+#define LINFLEXD_LINCR1_INIT		BIT(0)
+#define LINFLEXD_LINCR1_MME		BIT(4)
+#define LINFLEXD_LINCR1_BF		BIT(7)
+
+#define LINFLEXD_LINSR_LINS_INITMODE	BIT(12)
+#define LINFLEXD_LINSR_LINS_MASK	(0xF << 12)
+
+#define LINFLEXD_LINIER_SZIE		BIT(15)
+#define LINFLEXD_LINIER_OCIE		BIT(14)
+#define LINFLEXD_LINIER_BEIE		BIT(13)
+#define LINFLEXD_LINIER_CEIE		BIT(12)
+#define LINFLEXD_LINIER_HEIE		BIT(11)
+#define LINFLEXD_LINIER_FEIE		BIT(8)
+#define LINFLEXD_LINIER_BOIE		BIT(7)
+#define LINFLEXD_LINIER_LSIE		BIT(6)
+#define LINFLEXD_LINIER_WUIE		BIT(5)
+#define LINFLEXD_LINIER_DBFIE		BIT(4)
+#define LINFLEXD_LINIER_DBEIETOIE	BIT(3)
+#define LINFLEXD_LINIER_DRIE		BIT(2)
+#define LINFLEXD_LINIER_DTIE		BIT(1)
+#define LINFLEXD_LINIER_HRIE		BIT(0)
+
+#define LINFLEXD_UARTCR_OSR_MASK	(0xF << 24)
+#define LINFLEXD_UARTCR_OSR(uartcr)	(((uartcr) \
+					& LINFLEXD_UARTCR_OSR_MASK) >> 24)
+
+#define LINFLEXD_UARTCR_ROSE		BIT(23)
+
+#define LINFLEXD_UARTCR_RFBM		BIT(9)
+#define LINFLEXD_UARTCR_TFBM		BIT(8)
+#define LINFLEXD_UARTCR_WL1		BIT(7)
+#define LINFLEXD_UARTCR_PC1		BIT(6)
+
+#define LINFLEXD_UARTCR_RXEN		BIT(5)
+#define LINFLEXD_UARTCR_TXEN		BIT(4)
+#define LINFLEXD_UARTCR_PC0		BIT(3)
+
+#define LINFLEXD_UARTCR_PCE		BIT(2)
+#define LINFLEXD_UARTCR_WL0		BIT(1)
+#define LINFLEXD_UARTCR_UART		BIT(0)
+
+#define LINFLEXD_UARTSR_SZF		BIT(15)
+#define LINFLEXD_UARTSR_OCF		BIT(14)
+#define LINFLEXD_UARTSR_PE3		BIT(13)
+#define LINFLEXD_UARTSR_PE2		BIT(12)
+#define LINFLEXD_UARTSR_PE1		BIT(11)
+#define LINFLEXD_UARTSR_PE0		BIT(10)
+#define LINFLEXD_UARTSR_RMB		BIT(9)
+#define LINFLEXD_UARTSR_FEF		BIT(8)
+#define LINFLEXD_UARTSR_BOF		BIT(7)
+#define LINFLEXD_UARTSR_RPS		BIT(6)
+#define LINFLEXD_UARTSR_WUF		BIT(5)
+#define LINFLEXD_UARTSR_4		BIT(4)
+
+#define LINFLEXD_UARTSR_TO		BIT(3)
+
+#define LINFLEXD_UARTSR_DRFRFE		BIT(2)
+#define LINFLEXD_UARTSR_DTFTFF		BIT(1)
+#define LINFLEXD_UARTSR_NF		BIT(0)
+#define LINFLEXD_UARTSR_PE		(LINFLEXD_UARTSR_PE0 |\
+					 LINFLEXD_UARTSR_PE1 |\
+					 LINFLEXD_UARTSR_PE2 |\
+					 LINFLEXD_UARTSR_PE3)
+
+#define LINFLEX_LDIV_MULTIPLIER		(16)
+
+#define DRIVER_NAME	"fsl-linflexuart"
+#define DEV_NAME	"ttyLF"
+#define UART_NR		4
+
+#define EARLYCON_BUFFER_INITIAL_CAP	8
+
+#define PREINIT_DELAY			2000 /* us */
+
+static const struct of_device_id linflex_dt_ids[] = {
+	{
+		.compatible = "fsl,s32-linflexuart",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, linflex_dt_ids);
+
+#ifdef CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE
+static struct uart_port *earlycon_port;
+static bool linflex_earlycon_same_instance;
+static spinlock_t init_lock;
+static bool during_init;
+
+static struct {
+	char *content;
+	unsigned int len, cap;
+} earlycon_buf;
+#endif
+
+static void linflex_stop_tx(struct uart_port *port)
+{
+	unsigned long ier;
+
+	ier = readl(port->membase + LINIER);
+	ier &= ~(LINFLEXD_LINIER_DTIE);
+	writel(ier, port->membase + LINIER);
+}
+
+static void linflex_stop_rx(struct uart_port *port)
+{
+	unsigned long ier;
+
+	ier = readl(port->membase + LINIER);
+	writel(ier & ~LINFLEXD_LINIER_DRIE, port->membase + LINIER);
+}
+
+static inline void linflex_transmit_buffer(struct uart_port *sport)
+{
+	struct circ_buf *xmit = &sport->state->xmit;
+	unsigned char c;
+	unsigned long status;
+
+	while (!uart_circ_empty(xmit)) {
+		c = xmit->buf[xmit->tail];
+		writeb(c, sport->membase + BDRL);
+
+		/* Waiting for data transmission completed. */
+		while (((status = readl(sport->membase + UARTSR)) &
+					LINFLEXD_UARTSR_DTFTFF) !=
+					LINFLEXD_UARTSR_DTFTFF)
+			;
+
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		sport->icount.tx++;
+
+		writel(status | LINFLEXD_UARTSR_DTFTFF,
+		       sport->membase + UARTSR);
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(sport);
+
+	if (uart_circ_empty(xmit))
+		linflex_stop_tx(sport);
+}
+
+static void linflex_start_tx(struct uart_port *port)
+{
+	unsigned long ier;
+
+	linflex_transmit_buffer(port);
+	ier = readl(port->membase + LINIER);
+	writel(ier | LINFLEXD_LINIER_DTIE, port->membase + LINIER);
+}
+
+static irqreturn_t linflex_txint(int irq, void *dev_id)
+{
+	struct uart_port *sport = dev_id;
+	struct circ_buf *xmit = &sport->state->xmit;
+	unsigned long flags;
+	unsigned long status;
+
+	spin_lock_irqsave(&sport->lock, flags);
+
+	if (sport->x_char) {
+		writeb(sport->x_char, sport->membase + BDRL);
+
+		/* waiting for data transmission completed */
+		while (((status = readl(sport->membase + UARTSR)) &
+			LINFLEXD_UARTSR_DTFTFF) != LINFLEXD_UARTSR_DTFTFF)
+			;
+
+		writel(status | LINFLEXD_UARTSR_DTFTFF,
+		       sport->membase + UARTSR);
+
+		goto out;
+	}
+
+	if (uart_circ_empty(xmit) || uart_tx_stopped(sport)) {
+		linflex_stop_tx(sport);
+		goto out;
+	}
+
+	linflex_transmit_buffer(sport);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(sport);
+
+out:
+	spin_unlock_irqrestore(&sport->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t linflex_rxint(int irq, void *dev_id)
+{
+	struct uart_port *sport = dev_id;
+	unsigned int flg;
+	struct tty_port *port = &sport->state->port;
+	unsigned long flags, status;
+	unsigned char rx;
+
+	spin_lock_irqsave(&sport->lock, flags);
+
+	status = readl(sport->membase + UARTSR);
+	while (status & LINFLEXD_UARTSR_RMB) {
+		rx = readb(sport->membase + BDRM);
+		flg = TTY_NORMAL;
+		sport->icount.rx++;
+
+		if (status & (LINFLEXD_UARTSR_BOF | LINFLEXD_UARTSR_SZF |
+			      LINFLEXD_UARTSR_FEF | LINFLEXD_UARTSR_PE)) {
+			if (status & LINFLEXD_UARTSR_SZF)
+				status |= LINFLEXD_UARTSR_SZF;
+			if (status & LINFLEXD_UARTSR_BOF)
+				status |= LINFLEXD_UARTSR_BOF;
+			if (status & LINFLEXD_UARTSR_FEF)
+				status |= LINFLEXD_UARTSR_FEF;
+			if (status & LINFLEXD_UARTSR_PE)
+				status |=  LINFLEXD_UARTSR_PE;
+		}
+
+		writel(status | LINFLEXD_UARTSR_RMB | LINFLEXD_UARTSR_DRFRFE,
+		       sport->membase + UARTSR);
+		status = readl(sport->membase + UARTSR);
+
+		if (uart_handle_sysrq_char(sport, (unsigned char)rx))
+			continue;
+
+#ifdef SUPPORT_SYSRQ
+			sport->sysrq = 0;
+#endif
+		tty_insert_flip_char(port, rx, flg);
+	}
+
+	spin_unlock_irqrestore(&sport->lock, flags);
+
+	tty_flip_buffer_push(port);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t linflex_int(int irq, void *dev_id)
+{
+	struct uart_port *sport = dev_id;
+	unsigned long status;
+
+	status = readl(sport->membase + UARTSR);
+
+	if (status & LINFLEXD_UARTSR_DRFRFE)
+		linflex_rxint(irq, dev_id);
+	if (status & LINFLEXD_UARTSR_DTFTFF)
+		linflex_txint(irq, dev_id);
+
+	return IRQ_HANDLED;
+}
+
+/* return TIOCSER_TEMT when transmitter is not busy */
+static unsigned int linflex_tx_empty(struct uart_port *port)
+{
+	unsigned long status;
+
+	status = readl(port->membase + UARTSR) & LINFLEXD_UARTSR_DTFTFF;
+
+	return status ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int linflex_get_mctrl(struct uart_port *port)
+{
+	return 0;
+}
+
+static void linflex_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+}
+
+static void linflex_break_ctl(struct uart_port *port, int break_state)
+{
+}
+
+static void linflex_setup_watermark(struct uart_port *sport)
+{
+	unsigned long cr, ier, cr1;
+
+	/* Disable transmission/reception */
+	ier = readl(sport->membase + LINIER);
+	ier &= ~(LINFLEXD_LINIER_DRIE | LINFLEXD_LINIER_DTIE);
+	writel(ier, sport->membase + LINIER);
+
+	cr = readl(sport->membase + UARTCR);
+	cr &= ~(LINFLEXD_UARTCR_RXEN | LINFLEXD_UARTCR_TXEN);
+	writel(cr, sport->membase + UARTCR);
+
+	/* Enter initialization mode by setting INIT bit */
+
+	/* set the Linflex in master mode and activate by-pass filter */
+	cr1 = LINFLEXD_LINCR1_BF | LINFLEXD_LINCR1_MME
+	      | LINFLEXD_LINCR1_INIT;
+	writel(cr1, sport->membase + LINCR1);
+
+	/* wait for init mode entry */
+	while ((readl(sport->membase + LINSR)
+		& LINFLEXD_LINSR_LINS_MASK)
+		!= LINFLEXD_LINSR_LINS_INITMODE)
+		;
+
+	/*
+	 *	UART = 0x1;		- Linflex working in UART mode
+	 *	TXEN = 0x1;		- Enable transmission of data now
+	 *	RXEn = 0x1;		- Receiver enabled
+	 *	WL0 = 0x1;		- 8 bit data
+	 *	PCE = 0x0;		- No parity
+	 */
+
+	/* set UART bit to allow writing other bits */
+	writel(LINFLEXD_UARTCR_UART, sport->membase + UARTCR);
+
+	cr = (LINFLEXD_UARTCR_RXEN | LINFLEXD_UARTCR_TXEN |
+	      LINFLEXD_UARTCR_WL0 | LINFLEXD_UARTCR_UART);
+
+	writel(cr, sport->membase + UARTCR);
+
+	cr1 &= ~(LINFLEXD_LINCR1_INIT);
+
+	writel(cr1, sport->membase + LINCR1);
+
+	ier = readl(sport->membase + LINIER);
+	ier |= LINFLEXD_LINIER_DRIE;
+	ier |= LINFLEXD_LINIER_DTIE;
+
+	writel(ier, sport->membase + LINIER);
+}
+
+static int linflex_startup(struct uart_port *port)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	linflex_setup_watermark(port);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	ret = devm_request_irq(port->dev, port->irq, linflex_int, 0,
+			       DRIVER_NAME, port);
+
+	return ret;
+}
+
+static void linflex_shutdown(struct uart_port *port)
+{
+	unsigned long ier;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* disable interrupts */
+	ier = readl(port->membase + LINIER);
+	ier &= ~(LINFLEXD_LINIER_DRIE | LINFLEXD_LINIER_DTIE);
+	writel(ier, port->membase + LINIER);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	devm_free_irq(port->dev, port->irq, port);
+}
+
+static void
+linflex_set_termios(struct uart_port *port, struct ktermios *termios,
+		    struct ktermios *old)
+{
+	unsigned long flags;
+	unsigned long cr, old_cr, cr1;
+	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
+
+	cr = readl(port->membase + UARTCR);
+	old_cr = cr;
+
+	/* Enter initialization mode by setting INIT bit */
+	cr1 = readl(port->membase + LINCR1);
+	cr1 |= LINFLEXD_LINCR1_INIT;
+	writel(cr1, port->membase + LINCR1);
+
+	/* wait for init mode entry */
+	while ((readl(port->membase + LINSR)
+		& LINFLEXD_LINSR_LINS_MASK)
+		!= LINFLEXD_LINSR_LINS_INITMODE)
+		;
+
+	/*
+	 * only support CS8 and CS7, and for CS7 must enable PE.
+	 * supported mode:
+	 *	- (7,e/o,1)
+	 *	- (8,n,1)
+	 *	- (8,e/o,1)
+	 */
+	/* enter the UART into configuration mode */
+
+	while ((termios->c_cflag & CSIZE) != CS8 &&
+	       (termios->c_cflag & CSIZE) != CS7) {
+		termios->c_cflag &= ~CSIZE;
+		termios->c_cflag |= old_csize;
+		old_csize = CS8;
+	}
+
+	if ((termios->c_cflag & CSIZE) == CS7) {
+		/* Word length: WL1WL0:00 */
+		cr = old_cr & ~LINFLEXD_UARTCR_WL1 & ~LINFLEXD_UARTCR_WL0;
+	}
+
+	if ((termios->c_cflag & CSIZE) == CS8) {
+		/* Word length: WL1WL0:01 */
+		cr = (old_cr | LINFLEXD_UARTCR_WL0) & ~LINFLEXD_UARTCR_WL1;
+	}
+
+	if (termios->c_cflag & CMSPAR) {
+		if ((termios->c_cflag & CSIZE) != CS8) {
+			termios->c_cflag &= ~CSIZE;
+			termios->c_cflag |= CS8;
+		}
+		/* has a space/sticky bit */
+		cr |= LINFLEXD_UARTCR_WL0;
+	}
+
+	if (termios->c_cflag & CSTOPB)
+		termios->c_cflag &= ~CSTOPB;
+
+	/* parity must be enabled when CS7 to match 8-bits format */
+	if ((termios->c_cflag & CSIZE) == CS7)
+		termios->c_cflag |= PARENB;
+
+	if ((termios->c_cflag & PARENB)) {
+		cr |= LINFLEXD_UARTCR_PCE;
+		if (termios->c_cflag & PARODD)
+			cr = (cr | LINFLEXD_UARTCR_PC0) &
+			     (~LINFLEXD_UARTCR_PC1);
+		else
+			cr = cr & (~LINFLEXD_UARTCR_PC1 &
+				   ~LINFLEXD_UARTCR_PC0);
+	} else {
+		cr &= ~LINFLEXD_UARTCR_PCE;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	port->read_status_mask = 0;
+
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |=	(LINFLEXD_UARTSR_FEF |
+						 LINFLEXD_UARTSR_PE0 |
+						 LINFLEXD_UARTSR_PE1 |
+						 LINFLEXD_UARTSR_PE2 |
+						 LINFLEXD_UARTSR_PE3);
+	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
+		port->read_status_mask |= LINFLEXD_UARTSR_FEF;
+
+	/* characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask |= LINFLEXD_UARTSR_PE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= LINFLEXD_UARTSR_PE;
+		/*
+		 * if we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= LINFLEXD_UARTSR_BOF;
+	}
+
+	writel(cr, port->membase + UARTCR);
+
+	cr1 &= ~(LINFLEXD_LINCR1_INIT);
+
+	writel(cr1, port->membase + LINCR1);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *linflex_type(struct uart_port *port)
+{
+	return "FSL_LINFLEX";
+}
+
+static void linflex_release_port(struct uart_port *port)
+{
+	/* nothing to do */
+}
+
+static int linflex_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+/* configure/auto-configure the port */
+static void linflex_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_LINFLEXUART;
+}
+
+static const struct uart_ops linflex_pops = {
+	.tx_empty	= linflex_tx_empty,
+	.set_mctrl	= linflex_set_mctrl,
+	.get_mctrl	= linflex_get_mctrl,
+	.stop_tx	= linflex_stop_tx,
+	.start_tx	= linflex_start_tx,
+	.stop_rx	= linflex_stop_rx,
+	.break_ctl	= linflex_break_ctl,
+	.startup	= linflex_startup,
+	.shutdown	= linflex_shutdown,
+	.set_termios	= linflex_set_termios,
+	.type		= linflex_type,
+	.request_port	= linflex_request_port,
+	.release_port	= linflex_release_port,
+	.config_port	= linflex_config_port,
+};
+
+static struct uart_port *linflex_ports[UART_NR];
+
+#ifdef CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE
+static void linflex_console_putchar(struct uart_port *port, int ch)
+{
+	unsigned long cr;
+
+	cr = readl(port->membase + UARTCR);
+
+	writeb(ch, port->membase + BDRL);
+
+	if (!(cr & LINFLEXD_UARTCR_TFBM))
+		while ((readl(port->membase + UARTSR) &
+					LINFLEXD_UARTSR_DTFTFF)
+				!= LINFLEXD_UARTSR_DTFTFF)
+			;
+	else
+		while (readl(port->membase + UARTSR) &
+					LINFLEXD_UARTSR_DTFTFF)
+			;
+
+	if (!(cr & LINFLEXD_UARTCR_TFBM)) {
+		writel((readl(port->membase + UARTSR) |
+					LINFLEXD_UARTSR_DTFTFF),
+					port->membase + UARTSR);
+	}
+}
+
+static void linflex_earlycon_putchar(struct uart_port *port, int ch)
+{
+	unsigned long flags;
+	char *ret;
+
+	if (!linflex_earlycon_same_instance) {
+		linflex_console_putchar(port, ch);
+		return;
+	}
+
+	spin_lock_irqsave(&init_lock, flags);
+	if (!during_init)
+		goto outside_init;
+
+	if (earlycon_buf.len >= 1 << CONFIG_LOG_BUF_SHIFT)
+		goto init_release;
+
+	if (!earlycon_buf.cap) {
+		earlycon_buf.content = kmalloc(EARLYCON_BUFFER_INITIAL_CAP,
+					       GFP_ATOMIC);
+		earlycon_buf.cap = earlycon_buf.content ?
+				   EARLYCON_BUFFER_INITIAL_CAP : 0;
+	} else if (earlycon_buf.len == earlycon_buf.cap) {
+		ret = krealloc(earlycon_buf.content, earlycon_buf.cap << 1,
+			       GFP_ATOMIC);
+		if (ret) {
+			earlycon_buf.content = ret;
+			earlycon_buf.cap <<= 1;
+		}
+	}
+
+	if (earlycon_buf.len < earlycon_buf.cap)
+		earlycon_buf.content[earlycon_buf.len++] = ch;
+
+	goto init_release;
+
+outside_init:
+	linflex_console_putchar(port, ch);
+init_release:
+	spin_unlock_irqrestore(&init_lock, flags);
+}
+
+static void linflex_string_write(struct uart_port *sport, const char *s,
+				 unsigned int count)
+{
+	unsigned long cr, ier = 0;
+
+	ier = readl(sport->membase + LINIER);
+	linflex_stop_tx(sport);
+
+	cr = readl(sport->membase + UARTCR);
+	cr |= (LINFLEXD_UARTCR_TXEN);
+	writel(cr, sport->membase + UARTCR);
+
+	uart_console_write(sport, s, count, linflex_console_putchar);
+
+	writel(ier, sport->membase + LINIER);
+}
+
+static void
+linflex_console_write(struct console *co, const char *s, unsigned int count)
+{
+	struct uart_port *sport = linflex_ports[co->index];
+	unsigned long flags;
+	int locked = 1;
+
+	if (sport->sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock_irqsave(&sport->lock, flags);
+	else
+		spin_lock_irqsave(&sport->lock, flags);
+
+	linflex_string_write(sport, s, count);
+
+	if (locked)
+		spin_unlock_irqrestore(&sport->lock, flags);
+}
+
+/*
+ * if the port was already initialised (eg, by a boot loader),
+ * try to determine the current setup.
+ */
+static void __init
+linflex_console_get_options(struct uart_port *sport, int *parity, int *bits)
+{
+	unsigned long cr;
+
+	cr = readl(sport->membase + UARTCR);
+	cr &= LINFLEXD_UARTCR_RXEN | LINFLEXD_UARTCR_TXEN;
+
+	if (!cr)
+		return;
+
+	/* ok, the port was enabled */
+
+	*parity = 'n';
+	if (cr & LINFLEXD_UARTCR_PCE) {
+		if (cr & LINFLEXD_UARTCR_PC0)
+			*parity = 'o';
+		else
+			*parity = 'e';
+	}
+
+	if ((cr & LINFLEXD_UARTCR_WL0) && ((cr & LINFLEXD_UARTCR_WL1) == 0)) {
+		if (cr & LINFLEXD_UARTCR_PCE)
+			*bits = 9;
+		else
+			*bits = 8;
+	}
+}
+
+static int __init linflex_console_setup(struct console *co, char *options)
+{
+	struct uart_port *sport;
+	int baud = 115200;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+	int ret;
+	int i;
+	unsigned long flags;
+	/*
+	 * check whether an invalid uart number has been specified, and
+	 * if so, search for the first available port that does have
+	 * console support.
+	 */
+	if (co->index == -1 || co->index >= ARRAY_SIZE(linflex_ports))
+		co->index = 0;
+
+	sport = linflex_ports[co->index];
+	if (!sport)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+	else
+		linflex_console_get_options(sport, &parity, &bits);
+
+	if (earlycon_port && sport->mapbase == earlycon_port->mapbase) {
+		linflex_earlycon_same_instance = true;
+
+		spin_lock_irqsave(&init_lock, flags);
+		during_init = true;
+		spin_unlock_irqrestore(&init_lock, flags);
+
+		/* Workaround for character loss or output of many invalid
+		 * characters, when INIT mode is entered shortly after a
+		 * character has just been printed.
+		 */
+		udelay(PREINIT_DELAY);
+	}
+
+	linflex_setup_watermark(sport);
+
+	ret = uart_set_options(sport, co, baud, parity, bits, flow);
+
+	if (!linflex_earlycon_same_instance)
+		goto done;
+
+	spin_lock_irqsave(&init_lock, flags);
+
+	/* Emptying buffer */
+	if (earlycon_buf.len) {
+		for (i = 0; i < earlycon_buf.len; i++)
+			linflex_console_putchar(earlycon_port,
+				earlycon_buf.content[i]);
+
+		kfree(earlycon_buf.content);
+		earlycon_buf.len = 0;
+	}
+
+	during_init = false;
+	spin_unlock_irqrestore(&init_lock, flags);
+
+done:
+	return ret;
+}
+
+static struct uart_driver linflex_reg;
+static struct console linflex_console = {
+	.name		= DEV_NAME,
+	.write		= linflex_console_write,
+	.device		= uart_console_device,
+	.setup		= linflex_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &linflex_reg,
+};
+
+static void linflex_earlycon_write(struct console *con, const char *s,
+				   unsigned int n)
+{
+	struct earlycon_device *dev = con->data;
+
+	uart_console_write(&dev->port, s, n, linflex_earlycon_putchar);
+}
+
+static int __init linflex_early_console_setup(struct earlycon_device *device,
+					      const char *options)
+{
+	if (!device->port.membase)
+		return -ENODEV;
+
+	device->con->write = linflex_earlycon_write;
+	earlycon_port = &device->port;
+
+	return 0;
+}
+
+OF_EARLYCON_DECLARE(linflex, "fsl,s32-linflexuart",
+		    linflex_early_console_setup);
+
+#define LINFLEX_CONSOLE	(&linflex_console)
+#else
+#define LINFLEX_CONSOLE	NULL
+#endif
+
+static struct uart_driver linflex_reg = {
+	.owner		= THIS_MODULE,
+	.driver_name	= DRIVER_NAME,
+	.dev_name	= DEV_NAME,
+	.nr		= ARRAY_SIZE(linflex_ports),
+	.cons		= LINFLEX_CONSOLE,
+};
+
+static int linflex_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct uart_port *sport;
+	struct resource *res;
+	int ret;
+
+	sport = devm_kzalloc(&pdev->dev, sizeof(*sport), GFP_KERNEL);
+	if (!sport)
+		return -ENOMEM;
+
+	ret = of_alias_get_id(np, "serial");
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to get alias id, errno %d\n", ret);
+		return ret;
+	}
+	if (ret >= UART_NR) {
+		dev_err(&pdev->dev, "driver limited to %d serial ports\n",
+			UART_NR);
+		return -ENOMEM;
+	}
+
+	sport->line = ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	sport->mapbase = res->start;
+	sport->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sport->membase))
+		return PTR_ERR(sport->membase);
+
+	sport->dev = &pdev->dev;
+	sport->type = PORT_LINFLEXUART;
+	sport->iotype = UPIO_MEM;
+	sport->irq = platform_get_irq(pdev, 0);
+	sport->ops = &linflex_pops;
+	sport->flags = UPF_BOOT_AUTOCONF;
+
+	linflex_ports[sport->line] = sport;
+
+	platform_set_drvdata(pdev, sport);
+
+	ret = uart_add_one_port(&linflex_reg, sport);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int linflex_remove(struct platform_device *pdev)
+{
+	struct uart_port *sport = platform_get_drvdata(pdev);
+
+	uart_remove_one_port(&linflex_reg, sport);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int linflex_suspend(struct device *dev)
+{
+	struct uart_port *sport = dev_get_drvdata(dev);
+
+	uart_suspend_port(&linflex_reg, sport);
+
+	return 0;
+}
+
+static int linflex_resume(struct device *dev)
+{
+	struct uart_port *sport = dev_get_drvdata(dev);
+
+	uart_resume_port(&linflex_reg, sport);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(linflex_pm_ops, linflex_suspend, linflex_resume);
+
+static struct platform_driver linflex_driver = {
+	.probe		= linflex_probe,
+	.remove		= linflex_remove,
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+		.of_match_table	= linflex_dt_ids,
+		.pm	= &linflex_pm_ops,
+	},
+};
+
+static int __init linflex_serial_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&linflex_reg);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&linflex_driver);
+	if (ret)
+		uart_unregister_driver(&linflex_reg);
+
+#ifdef CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE
+	spin_lock_init(&init_lock);
+#endif
+
+	return ret;
+}
+
+static void __exit linflex_serial_exit(void)
+{
+	platform_driver_unregister(&linflex_driver);
+	uart_unregister_driver(&linflex_reg);
+}
+
+module_init(linflex_serial_init);
+module_exit(linflex_serial_exit);
+
+MODULE_DESCRIPTION("Freescale linflex serial port driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index bd0d9d176a66..0f4f87a6fd54 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -290,4 +290,7 @@
 /* Sunix UART */
 #define PORT_SUNIX	121
 
+/* Freescale Linflex UART */
+#define PORT_LINFLEXUART	121
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit v1.2.3


From a7b121b4b8b0bcc14fc1c2a81d34096109a65dd6 Mon Sep 17 00:00:00 2001
From: Martin Hundebøll <martin@geanix.com>
Date: Mon, 12 Aug 2019 23:12:43 +0200
Subject: tty: n_gsm: add ioctl to map serial device to mux'ed tty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Guessing the first tty for a gsm0710 multiplexed serial device is not
currently possible, which makes it racy to use with multiple modems.

Add a way to map the physical serial tty to its related mux devices
using an ioctl.

Signed-off-by: Martin Hundebøll <martin@geanix.com>
Link: https://lore.kernel.org/r/20190812211243.98686-1-martin@geanix.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/serial/n_gsm.rst | 11 +++++++++--
 drivers/tty/n_gsm.c                       |  4 ++++
 include/uapi/linux/gsmmux.h               |  2 ++
 3 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/driver-api/serial/n_gsm.rst b/Documentation/driver-api/serial/n_gsm.rst
index 0ba731ab00b2..286e7ff4d2d9 100644
--- a/Documentation/driver-api/serial/n_gsm.rst
+++ b/Documentation/driver-api/serial/n_gsm.rst
@@ -18,10 +18,13 @@ How to use it
 2. switch the serial line to using the n_gsm line discipline by using
    TIOCSETD ioctl,
 3. configure the mux using GSMIOC_GETCONF / GSMIOC_SETCONF ioctl,
+4. obtain base gsmtty number for the used serial port,
 
 Major parts of the initialization program :
 (a good starting point is util-linux-ng/sys-utils/ldattach.c)::
 
+  #include <stdio.h>
+  #include <stdint.h>
   #include <linux/gsmmux.h>
   #include <linux/tty.h>
   #define DEFAULT_SPEED	B115200
@@ -30,6 +33,7 @@ Major parts of the initialization program :
 	int ldisc = N_GSM0710;
 	struct gsm_config c;
 	struct termios configuration;
+	uint32_t first;
 
 	/* open the serial port connected to the modem */
 	fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY);
@@ -58,19 +62,22 @@ Major parts of the initialization program :
 	c.mtu = 127;
 	/* set the new configuration */
 	ioctl(fd, GSMIOC_SETCONF, &c);
+	/* get first gsmtty device node */
+	ioctl(fd, GSMIOC_GETFIRST, &first);
+	printf("first muxed line: /dev/gsmtty%i\n", first);
 
 	/* and wait for ever to keep the line discipline enabled */
 	daemon(0,0);
 	pause();
 
-4. use these devices as plain serial ports.
+5. use these devices as plain serial ports.
 
    for example, it's possible:
 
    - and to use gnokii to send / receive SMS on ttygsm1
    - to use ppp to establish a datalink on ttygsm2
 
-5. first close all virtual ports before closing the physical port.
+6. first close all virtual ports before closing the physical port.
 
    Note that after closing the physical port the modem is still in multiplexing
    mode. This may prevent a successful re-opening of the port later. To avoid
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index a60be591f1fc..d30525946892 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2613,6 +2613,7 @@ static int gsmld_ioctl(struct tty_struct *tty, struct file *file,
 {
 	struct gsm_config c;
 	struct gsm_mux *gsm = tty->disc_data;
+	unsigned int base;
 
 	switch (cmd) {
 	case GSMIOC_GETCONF:
@@ -2624,6 +2625,9 @@ static int gsmld_ioctl(struct tty_struct *tty, struct file *file,
 		if (copy_from_user(&c, (void *)arg, sizeof(c)))
 			return -EFAULT;
 		return gsm_config(gsm, &c);
+	case GSMIOC_GETFIRST:
+		base = mux_num_to_base(gsm);
+		return put_user(base + 1, (__u32 __user *)arg);
 	default:
 		return n_tty_ioctl_helper(tty, file, cmd, arg);
 	}
diff --git a/include/uapi/linux/gsmmux.h b/include/uapi/linux/gsmmux.h
index 101d3c469acb..cb8693b39cb7 100644
--- a/include/uapi/linux/gsmmux.h
+++ b/include/uapi/linux/gsmmux.h
@@ -37,5 +37,7 @@ struct gsm_netconfig {
 #define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
 #define GSMIOC_DISABLE_NET     _IO('G', 3)
 
+/* get the base tty number for a configured gsmmux tty */
+#define GSMIOC_GETFIRST		_IOR('G', 4, __u32)
 
 #endif
-- 
cgit v1.2.3


From 6c43bb3a413c0e5a73b48f35525f8a76396cda2f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 8 Oct 2018 09:02:26 +0200
Subject: can: netns: give structs holding the CAN statistics a sensible name

This patch renames both "struct s_stats" and "struct s_pstats", to
"struct can_pkg_stats" and "struct can_rcv_lists_stats" to better
reflect their meaning and improve code readability.

The conversion is done with:

	sed -i \
		-e "s/struct s_stats/struct can_pkg_stats/g" \
		-e "s/struct s_pstats/struct can_rcv_lists_stats/g" \
		net/can/*.[ch] \
		include/net/netns/can.h

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/net/netns/can.h |  8 ++++----
 net/can/af_can.c        |  8 ++++----
 net/can/af_can.h        |  4 ++--
 net/can/proc.c          | 16 ++++++++--------
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index ca9bd9fba5b5..f2e5646e36f2 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -9,8 +9,8 @@
 #include <linux/spinlock.h>
 
 struct can_dev_rcv_lists;
-struct s_stats;
-struct s_pstats;
+struct can_pkg_stats;
+struct can_rcv_lists_stats;
 
 struct netns_can {
 #if IS_ENABLED(CONFIG_PROC_FS)
@@ -31,8 +31,8 @@ struct netns_can {
 	struct can_dev_rcv_lists *can_rx_alldev_list;
 	spinlock_t can_rcvlists_lock;
 	struct timer_list can_stattimer;/* timer for statistics update */
-	struct s_stats *can_stats;	/* packet statistics */
-	struct s_pstats *can_pstats;	/* receive list statistics */
+	struct can_pkg_stats *can_stats;	/* packet statistics */
+	struct can_rcv_lists_stats *can_pstats;	/* receive list statistics */
 
 	/* CAN GW per-net gateway jobs */
 	struct hlist_head cgw_list;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 9a9a51847c7c..0da9e6a573c5 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -198,7 +198,7 @@ int can_send(struct sk_buff *skb, int loop)
 {
 	struct sk_buff *newskb = NULL;
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
-	struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats;
+	struct can_pkg_stats *can_stats = dev_net(skb->dev)->can.can_stats;
 	int err = -EINVAL;
 
 	if (skb->len == CAN_MTU) {
@@ -441,7 +441,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 	struct receiver *r;
 	struct hlist_head *rl;
 	struct can_dev_rcv_lists *d;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
 	int err = 0;
 
 	/* insert new receiver  (dev,canid,mask) -> (func,data) */
@@ -515,7 +515,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 {
 	struct receiver *r = NULL;
 	struct hlist_head *rl;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
 	struct can_dev_rcv_lists *d;
 
 	if (dev && dev->type != ARPHRD_CAN)
@@ -655,7 +655,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 {
 	struct can_dev_rcv_lists *d;
 	struct net *net = dev_net(dev);
-	struct s_stats *can_stats = net->can.can_stats;
+	struct can_pkg_stats *can_stats = net->can.can_stats;
 	int matches;
 
 	/* update statistics */
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 9cdb79083623..25d22e534506 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -78,7 +78,7 @@ struct can_dev_rcv_lists {
 /* statistic structures */
 
 /* can be reset e.g. by can_init_stats() */
-struct s_stats {
+struct can_pkg_stats {
 	unsigned long jiffies_init;
 
 	unsigned long rx_frames;
@@ -103,7 +103,7 @@ struct s_stats {
 };
 
 /* persistent statistics */
-struct s_pstats {
+struct can_rcv_lists_stats {
 	unsigned long stats_reset;
 	unsigned long user_reset;
 	unsigned long rcv_entries;
diff --git a/net/can/proc.c b/net/can/proc.c
index edb822c31902..d05e8c8b420d 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -78,14 +78,14 @@ static const char rx_list_name[][8] = {
 
 static void can_init_stats(struct net *net)
 {
-	struct s_stats *can_stats = net->can.can_stats;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *can_stats = net->can.can_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
 	/*
 	 * This memset function is called from a timer context (when
 	 * can_stattimer is active which is the default) OR in a process
 	 * context (reading the proc_fs when can_stattimer is disabled).
 	 */
-	memset(can_stats, 0, sizeof(struct s_stats));
+	memset(can_stats, 0, sizeof(struct can_pkg_stats));
 	can_stats->jiffies_init = jiffies;
 
 	can_pstats->stats_reset++;
@@ -119,7 +119,7 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 void can_stat_update(struct timer_list *t)
 {
 	struct net *net = from_timer(net, t, can.can_stattimer);
-	struct s_stats *can_stats = net->can.can_stats;
+	struct can_pkg_stats *can_stats = net->can.can_stats;
 	unsigned long j = jiffies; /* snapshot */
 
 	/* restart counting in timer context on user request */
@@ -212,8 +212,8 @@ static void can_print_recv_banner(struct seq_file *m)
 static int can_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct s_stats *can_stats = net->can.can_stats;
-	struct s_pstats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *can_stats = net->can.can_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
 
 	seq_putc(m, '\n');
 	seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
@@ -274,8 +274,8 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct s_pstats *can_pstats = net->can.can_pstats;
-	struct s_stats *can_stats = net->can.can_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *can_stats = net->can.can_stats;
 
 	user_reset = 1;
 
-- 
cgit v1.2.3


From 2341086df44802550a6c4efcc45f6131a74a923d Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 8 Oct 2018 09:02:27 +0200
Subject: can: netns: give members of struct netns_can holding the statistics a
 sensible name

This patch gives the members of the struct netns_can that are holding
the statistics a sensible name, by renaming struct netns_can::can_stats
into struct netns_can::pkg_stats and struct netns_can::can_pstats into
struct netns_can::rcv_lists_stats.

The conversion is done with:

	sed -i \
		-e "s:\(struct[^*]*\*\)can_stats;.*:\1pkg_stats;:" \
		-e "s:\(struct[^*]*\*\)can_pstats;.*:\1rcv_lists_stats;:" \
		-e "s/can\.can_stats/can.pkg_stats/g" \
		-e "s/can\.can_pstats/can.rcv_lists_stats/g" \
		net/can/*.[ch] \
		include/net/netns/can.h

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/net/netns/can.h |  4 ++--
 net/can/af_can.c        | 32 ++++++++++++++++----------------
 net/can/proc.c          | 14 +++++++-------
 3 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index f2e5646e36f2..f684dea0a83e 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -31,8 +31,8 @@ struct netns_can {
 	struct can_dev_rcv_lists *can_rx_alldev_list;
 	spinlock_t can_rcvlists_lock;
 	struct timer_list can_stattimer;/* timer for statistics update */
-	struct can_pkg_stats *can_stats;	/* packet statistics */
-	struct can_rcv_lists_stats *can_pstats;	/* receive list statistics */
+	struct can_pkg_stats *pkg_stats;
+	struct can_rcv_lists_stats *rcv_lists_stats;
 
 	/* CAN GW per-net gateway jobs */
 	struct hlist_head cgw_list;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 0da9e6a573c5..079b00b5e365 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -198,7 +198,7 @@ int can_send(struct sk_buff *skb, int loop)
 {
 	struct sk_buff *newskb = NULL;
 	struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
-	struct can_pkg_stats *can_stats = dev_net(skb->dev)->can.can_stats;
+	struct can_pkg_stats *can_stats = dev_net(skb->dev)->can.pkg_stats;
 	int err = -EINVAL;
 
 	if (skb->len == CAN_MTU) {
@@ -441,7 +441,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 	struct receiver *r;
 	struct hlist_head *rl;
 	struct can_dev_rcv_lists *d;
-	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
+	struct can_rcv_lists_stats *can_pstats = net->can.rcv_lists_stats;
 	int err = 0;
 
 	/* insert new receiver  (dev,canid,mask) -> (func,data) */
@@ -515,7 +515,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 {
 	struct receiver *r = NULL;
 	struct hlist_head *rl;
-	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
+	struct can_rcv_lists_stats *can_pstats = net->can.rcv_lists_stats;
 	struct can_dev_rcv_lists *d;
 
 	if (dev && dev->type != ARPHRD_CAN)
@@ -655,7 +655,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 {
 	struct can_dev_rcv_lists *d;
 	struct net *net = dev_net(dev);
-	struct can_pkg_stats *can_stats = net->can.can_stats;
+	struct can_pkg_stats *can_stats = net->can.pkg_stats;
 	int matches;
 
 	/* update statistics */
@@ -837,12 +837,12 @@ static int can_pernet_init(struct net *net)
 		kzalloc(sizeof(*net->can.can_rx_alldev_list), GFP_KERNEL);
 	if (!net->can.can_rx_alldev_list)
 		goto out;
-	net->can.can_stats = kzalloc(sizeof(*net->can.can_stats), GFP_KERNEL);
-	if (!net->can.can_stats)
-		goto out_free_alldev_list;
-	net->can.can_pstats = kzalloc(sizeof(*net->can.can_pstats), GFP_KERNEL);
-	if (!net->can.can_pstats)
-		goto out_free_can_stats;
+	net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL);
+	if (!net->can.pkg_stats)
+		goto out_free_rx_alldev_list;
+	net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL);
+	if (!net->can.rcv_lists_stats)
+		goto out_free_pkg_stats;
 
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		/* the statistics are updated every second (timer triggered) */
@@ -852,15 +852,15 @@ static int can_pernet_init(struct net *net)
 			mod_timer(&net->can.can_stattimer,
 				  round_jiffies(jiffies + HZ));
 		}
-		net->can.can_stats->jiffies_init = jiffies;
+		net->can.pkg_stats->jiffies_init = jiffies;
 		can_init_proc(net);
 	}
 
 	return 0;
 
- out_free_can_stats:
-	kfree(net->can.can_stats);
- out_free_alldev_list:
+ out_free_pkg_stats:
+	kfree(net->can.pkg_stats);
+ out_free_rx_alldev_list:
 	kfree(net->can.can_rx_alldev_list);
  out:
 	return -ENOMEM;
@@ -890,8 +890,8 @@ static void can_pernet_exit(struct net *net)
 	rcu_read_unlock();
 
 	kfree(net->can.can_rx_alldev_list);
-	kfree(net->can.can_stats);
-	kfree(net->can.can_pstats);
+	kfree(net->can.pkg_stats);
+	kfree(net->can.rcv_lists_stats);
 }
 
 /* af_can module init/exit functions */
diff --git a/net/can/proc.c b/net/can/proc.c
index d05e8c8b420d..30d8da4cef5d 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -78,8 +78,8 @@ static const char rx_list_name[][8] = {
 
 static void can_init_stats(struct net *net)
 {
-	struct can_pkg_stats *can_stats = net->can.can_stats;
-	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *can_stats = net->can.pkg_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.rcv_lists_stats;
 	/*
 	 * This memset function is called from a timer context (when
 	 * can_stattimer is active which is the default) OR in a process
@@ -119,7 +119,7 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 void can_stat_update(struct timer_list *t)
 {
 	struct net *net = from_timer(net, t, can.can_stattimer);
-	struct can_pkg_stats *can_stats = net->can.can_stats;
+	struct can_pkg_stats *can_stats = net->can.pkg_stats;
 	unsigned long j = jiffies; /* snapshot */
 
 	/* restart counting in timer context on user request */
@@ -212,8 +212,8 @@ static void can_print_recv_banner(struct seq_file *m)
 static int can_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct can_pkg_stats *can_stats = net->can.can_stats;
-	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
+	struct can_pkg_stats *can_stats = net->can.pkg_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.rcv_lists_stats;
 
 	seq_putc(m, '\n');
 	seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames);
@@ -274,8 +274,8 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
 static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 {
 	struct net *net = m->private;
-	struct can_rcv_lists_stats *can_pstats = net->can.can_pstats;
-	struct can_pkg_stats *can_stats = net->can.can_stats;
+	struct can_rcv_lists_stats *can_pstats = net->can.rcv_lists_stats;
+	struct can_pkg_stats *can_stats = net->can.pkg_stats;
 
 	user_reset = 1;
 
-- 
cgit v1.2.3


From 564577dfee4e55e33ae64e64a866a212e584d58f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 8 Oct 2018 09:02:30 +0200
Subject: can: netns: remove "can_" prefix from members struct netns_can

This patch improves the code reability by removing the redundant "can_"
prefix from the members of struct netns_can (as the struct netns_can itself
is the member "can" of the struct net.)

The conversion is done with:

	sed -i \
		-e "s/struct can_dev_rcv_lists \*can_rx_alldev_list;/struct can_dev_rcv_lists *rx_alldev_list;/" \
		-e "s/spinlock_t can_rcvlists_lock;/spinlock_t rcvlists_lock;/" \
		-e "s/struct timer_list can_stattimer;/struct timer_list stattimer; /" \
		-e "s/can\.can_rx_alldev_list/can.rx_alldev_list/g" \
		-e "s/can\.can_rcvlists_lock/can.rcvlists_lock/g" \
		-e "s/can\.can_stattimer/can.stattimer/g" \
		include/net/netns/can.h \
		net/can/*.[ch]

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/net/netns/can.h |  6 +++---
 net/can/af_can.c        | 34 +++++++++++++++++-----------------
 net/can/proc.c          | 14 +++++++-------
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/can.h b/include/net/netns/can.h
index f684dea0a83e..b6ab7d1530d7 100644
--- a/include/net/netns/can.h
+++ b/include/net/netns/can.h
@@ -28,9 +28,9 @@ struct netns_can {
 #endif
 
 	/* receive filters subscribed for 'all' CAN devices */
-	struct can_dev_rcv_lists *can_rx_alldev_list;
-	spinlock_t can_rcvlists_lock;
-	struct timer_list can_stattimer;/* timer for statistics update */
+	struct can_dev_rcv_lists *rx_alldev_list;
+	spinlock_t rcvlists_lock;
+	struct timer_list stattimer; /* timer for statistics update */
 	struct can_pkg_stats *pkg_stats;
 	struct can_rcv_lists_stats *rcv_lists_stats;
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 0eadded4a5aa..62b3f2d68287 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -302,7 +302,7 @@ static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net,
 						    struct net_device *dev)
 {
 	if (!dev)
-		return net->can.can_rx_alldev_list;
+		return net->can.rx_alldev_list;
 	else
 		return (struct can_dev_rcv_lists *)dev->ml_priv;
 }
@@ -456,7 +456,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 	if (!r)
 		return -ENOMEM;
 
-	spin_lock(&net->can.can_rcvlists_lock);
+	spin_lock(&net->can.rcvlists_lock);
 
 	d = find_dev_rcv_lists(net, dev);
 	if (d) {
@@ -481,7 +481,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id,
 		err = -ENODEV;
 	}
 
-	spin_unlock(&net->can.can_rcvlists_lock);
+	spin_unlock(&net->can.rcvlists_lock);
 
 	return err;
 }
@@ -524,7 +524,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 	if (dev && !net_eq(net, dev_net(dev)))
 		return;
 
-	spin_lock(&net->can.can_rcvlists_lock);
+	spin_lock(&net->can.rcvlists_lock);
 
 	d = find_dev_rcv_lists(net, dev);
 	if (!d) {
@@ -569,7 +569,7 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 	}
 
  out:
-	spin_unlock(&net->can.can_rcvlists_lock);
+	spin_unlock(&net->can.rcvlists_lock);
 
 	/* schedule the receiver item for deletion */
 	if (r) {
@@ -669,7 +669,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_lock();
 
 	/* deliver the packet to sockets listening on all devices */
-	matches = can_rcv_filter(net->can.can_rx_alldev_list, skb);
+	matches = can_rcv_filter(net->can.rx_alldev_list, skb);
 
 	/* find receive list for this device */
 	d = find_dev_rcv_lists(net, dev);
@@ -807,7 +807,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 		break;
 
 	case NETDEV_UNREGISTER:
-		spin_lock(&dev_net(dev)->can.can_rcvlists_lock);
+		spin_lock(&dev_net(dev)->can.rcvlists_lock);
 
 		d = dev->ml_priv;
 		if (d) {
@@ -822,7 +822,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 			       dev->name);
 		}
 
-		spin_unlock(&dev_net(dev)->can.can_rcvlists_lock);
+		spin_unlock(&dev_net(dev)->can.rcvlists_lock);
 
 		break;
 	}
@@ -832,10 +832,10 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 
 static int can_pernet_init(struct net *net)
 {
-	spin_lock_init(&net->can.can_rcvlists_lock);
-	net->can.can_rx_alldev_list =
-		kzalloc(sizeof(*net->can.can_rx_alldev_list), GFP_KERNEL);
-	if (!net->can.can_rx_alldev_list)
+	spin_lock_init(&net->can.rcvlists_lock);
+	net->can.rx_alldev_list =
+		kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL);
+	if (!net->can.rx_alldev_list)
 		goto out;
 	net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL);
 	if (!net->can.pkg_stats)
@@ -847,9 +847,9 @@ static int can_pernet_init(struct net *net)
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		/* the statistics are updated every second (timer triggered) */
 		if (stats_timer) {
-			timer_setup(&net->can.can_stattimer, can_stat_update,
+			timer_setup(&net->can.stattimer, can_stat_update,
 				    0);
-			mod_timer(&net->can.can_stattimer,
+			mod_timer(&net->can.stattimer,
 				  round_jiffies(jiffies + HZ));
 		}
 		net->can.pkg_stats->jiffies_init = jiffies;
@@ -861,7 +861,7 @@ static int can_pernet_init(struct net *net)
  out_free_pkg_stats:
 	kfree(net->can.pkg_stats);
  out_free_rx_alldev_list:
-	kfree(net->can.can_rx_alldev_list);
+	kfree(net->can.rx_alldev_list);
  out:
 	return -ENOMEM;
 }
@@ -873,7 +873,7 @@ static void can_pernet_exit(struct net *net)
 	if (IS_ENABLED(CONFIG_PROC_FS)) {
 		can_remove_proc(net);
 		if (stats_timer)
-			del_timer_sync(&net->can.can_stattimer);
+			del_timer_sync(&net->can.stattimer);
 	}
 
 	/* remove created dev_rcv_lists from still registered CAN devices */
@@ -889,7 +889,7 @@ static void can_pernet_exit(struct net *net)
 	}
 	rcu_read_unlock();
 
-	kfree(net->can.can_rx_alldev_list);
+	kfree(net->can.rx_alldev_list);
 	kfree(net->can.pkg_stats);
 	kfree(net->can.rcv_lists_stats);
 }
diff --git a/net/can/proc.c b/net/can/proc.c
index 8390243f34c6..6561d74a1012 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -118,7 +118,7 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
 
 void can_stat_update(struct timer_list *t)
 {
-	struct net *net = from_timer(net, t, can.can_stattimer);
+	struct net *net = from_timer(net, t, can.stattimer);
 	struct can_pkg_stats *pkg_stats = net->can.pkg_stats;
 	unsigned long j = jiffies; /* snapshot */
 
@@ -177,7 +177,7 @@ void can_stat_update(struct timer_list *t)
 	pkg_stats->matches_delta   = 0;
 
 	/* restart timer (one second) */
-	mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ));
+	mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ));
 }
 
 /*
@@ -222,7 +222,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
 
 	seq_putc(m, '\n');
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.stattimer.function == can_stat_update) {
 		seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
 				pkg_stats->total_rx_match_ratio);
 
@@ -279,7 +279,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
 
 	user_reset = 1;
 
-	if (net->can.can_stattimer.function == can_stat_update) {
+	if (net->can.stattimer.function == can_stat_update) {
 		seq_printf(m, "Scheduled statistic reset #%ld.\n",
 				rcv_lists_stats->stats_reset + 1);
 	} else {
@@ -323,7 +323,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
 	rcu_read_lock();
 
 	/* receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
+	d = net->can.rx_alldev_list;
 	can_rcvlist_proc_show_one(m, idx, NULL, d);
 
 	/* receive list for registered CAN devices */
@@ -375,7 +375,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
 	rcu_read_lock();
 
 	/* sff receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
+	d = net->can.rx_alldev_list;
 	can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff));
 
 	/* sff receive list for registered CAN devices */
@@ -405,7 +405,7 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
 	rcu_read_lock();
 
 	/* eff receive list for 'all' CAN devices (dev == NULL) */
-	d = net->can.can_rx_alldev_list;
+	d = net->can.rx_alldev_list;
 	can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff));
 
 	/* eff receive list for registered CAN devices */
-- 
cgit v1.2.3


From ffd956eef69b212a724b1cc4cdc61828f3ad9104 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 8 Oct 2018 09:02:38 +0200
Subject: can: introduce CAN midlayer private and allocate it automatically

This patch introduces the CAN midlayer private structure ("struct
can_ml_priv") which should be used to hold protocol specific per device
data structures. For now it's only member is "struct can_dev_rcv_lists".

The CAN midlayer private is allocated via alloc_netdev()'s private and
assigned to "struct net_device::ml_priv" during device creation. This is
done transparently for CAN drivers using alloc_candev(). The slcan, vcan
and vxcan drivers which are not using alloc_candev() have been adopted
manually. The memory layout of the netdev_priv allocated via
alloc_candev() will looke like this:

  +-------------------------+
  | driver's priv           |
  +-------------------------+
  | struct can_ml_priv      |
  +-------------------------+
  | array of struct sk_buff |
  +-------------------------+

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c      | 22 +++++++++++++---
 drivers/net/can/slcan.c    |  5 +++-
 drivers/net/can/vcan.c     |  6 +++--
 drivers/net/can/vxcan.c    |  3 ++-
 include/linux/can/can-ml.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 net/can/af_can.c           |  1 +
 net/can/af_can.h           | 15 -----------
 net/can/proc.c             |  1 +
 8 files changed, 96 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/can/can-ml.h

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 483d270664cc..9e688dc29521 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -12,6 +12,7 @@
 #include <linux/if_arp.h>
 #include <linux/workqueue.h>
 #include <linux/can.h>
+#include <linux/can/can-ml.h>
 #include <linux/can/dev.h>
 #include <linux/can/skb.h>
 #include <linux/can/netlink.h>
@@ -718,11 +719,24 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
 	struct can_priv *priv;
 	int size;
 
+	/* We put the driver's priv, the CAN mid layer priv and the
+	 * echo skb into the netdevice's priv. The memory layout for
+	 * the netdev_priv is like this:
+	 *
+	 * +-------------------------+
+	 * | driver's priv           |
+	 * +-------------------------+
+	 * | struct can_ml_priv      |
+	 * +-------------------------+
+	 * | array of struct sk_buff |
+	 * +-------------------------+
+	 */
+
+	size = ALIGN(sizeof_priv, NETDEV_ALIGN) + sizeof(struct can_ml_priv);
+
 	if (echo_skb_max)
-		size = ALIGN(sizeof_priv, sizeof(struct sk_buff *)) +
+		size = ALIGN(size, sizeof(struct sk_buff *)) +
 			echo_skb_max * sizeof(struct sk_buff *);
-	else
-		size = sizeof_priv;
 
 	dev = alloc_netdev_mqs(size, "can%d", NET_NAME_UNKNOWN, can_setup,
 			       txqs, rxqs);
@@ -735,7 +749,7 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
 	if (echo_skb_max) {
 		priv->echo_skb_max = echo_skb_max;
 		priv->echo_skb = (void *)priv +
-			ALIGN(sizeof_priv, sizeof(struct sk_buff *));
+			(size - echo_skb_max * sizeof(struct sk_buff *));
 	}
 
 	priv->state = CAN_STATE_STOPPED;
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index aa97dbc797b6..5b2e95425e69 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -55,6 +55,7 @@
 #include <linux/workqueue.h>
 #include <linux/can.h>
 #include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
 
 MODULE_ALIAS_LDISC(N_SLCAN);
 MODULE_DESCRIPTION("serial line CAN interface");
@@ -514,6 +515,7 @@ static struct slcan *slc_alloc(void)
 	char name[IFNAMSIZ];
 	struct net_device *dev = NULL;
 	struct slcan       *sl;
+	int size;
 
 	for (i = 0; i < maxdev; i++) {
 		dev = slcan_devs[i];
@@ -527,7 +529,8 @@ static struct slcan *slc_alloc(void)
 		return NULL;
 
 	sprintf(name, "slcan%d", i);
-	dev = alloc_netdev(sizeof(*sl), name, NET_NAME_UNKNOWN, slc_setup);
+	size = ALIGN(sizeof(*sl), NETDEV_ALIGN) + sizeof(struct can_ml_priv);
+	dev = alloc_netdev(size, name, NET_NAME_UNKNOWN, slc_setup);
 	if (!dev)
 		return NULL;
 
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index daf27133887b..6973ae09a37a 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -46,6 +46,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/can.h>
+#include <linux/can/can-ml.h>
 #include <linux/can/dev.h>
 #include <linux/can/skb.h>
 #include <linux/slab.h>
@@ -162,8 +163,9 @@ static void vcan_setup(struct net_device *dev)
 }
 
 static struct rtnl_link_ops vcan_link_ops __read_mostly = {
-	.kind	= DRV_NAME,
-	.setup	= vcan_setup,
+	.kind = DRV_NAME,
+	.priv_size = sizeof(struct can_ml_priv),
+	.setup = vcan_setup,
 };
 
 static __init int vcan_init_module(void)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index b2106292230e..4c3eed796432 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -18,6 +18,7 @@
 #include <linux/can/dev.h>
 #include <linux/can/skb.h>
 #include <linux/can/vxcan.h>
+#include <linux/can/can-ml.h>
 #include <linux/slab.h>
 #include <net/rtnetlink.h>
 
@@ -281,7 +282,7 @@ static struct net *vxcan_get_link_net(const struct net_device *dev)
 
 static struct rtnl_link_ops vxcan_link_ops = {
 	.kind		= DRV_NAME,
-	.priv_size	= sizeof(struct vxcan_priv),
+	.priv_size	= ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN) + sizeof(struct can_ml_priv),
 	.setup		= vxcan_setup,
 	.newlink	= vxcan_newlink,
 	.dellink	= vxcan_dellink,
diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h
new file mode 100644
index 000000000000..0a9d778de8af
--- /dev/null
+++ b/include/linux/can/can-ml.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research
+ * Copyright (c) 2017 Pengutronix, Marc Kleine-Budde <kernel@pengutronix.de>
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Volkswagen nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * Alternatively, provided that this notice is retained in full, this
+ * software may be distributed under the terms of the GNU General
+ * Public License ("GPL") version 2, in which case the provisions of the
+ * GPL apply INSTEAD OF those given above.
+ *
+ * The provided data structures and external interfaces from this code
+ * are not restricted to be used by modules with a GPL compatible license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#ifndef CAN_ML_H
+#define CAN_ML_H
+
+#include <linux/can.h>
+#include <linux/list.h>
+
+#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
+#define CAN_EFF_RCV_HASH_BITS 10
+#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
+
+enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
+
+struct can_dev_rcv_lists {
+	struct hlist_head rx[RX_MAX];
+	struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
+	struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
+	int remove_on_zero_entries;
+	int entries;
+};
+
+struct can_ml_priv {
+	struct can_dev_rcv_lists dev_rcv_lists;
+};
+
+#endif /* CAN_ML_H */
diff --git a/net/can/af_can.c b/net/can/af_can.c
index d65b19003a24..723299daa04e 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -58,6 +58,7 @@
 #include <linux/can.h>
 #include <linux/can/core.h>
 #include <linux/can/skb.h>
+#include <linux/can/can-ml.h>
 #include <linux/ratelimit.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
diff --git a/net/can/af_can.h b/net/can/af_can.h
index 25d22e534506..7c2d9161e224 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -60,21 +60,6 @@ struct receiver {
 	struct rcu_head rcu;
 };
 
-#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
-#define CAN_EFF_RCV_HASH_BITS 10
-#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS)
-
-enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX };
-
-/* per device receive filters linked at dev->ml_priv */
-struct can_dev_rcv_lists {
-	struct hlist_head rx[RX_MAX];
-	struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
-	struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
-	int remove_on_zero_entries;
-	int entries;
-};
-
 /* statistic structures */
 
 /* can be reset e.g. by can_init_stats() */
diff --git a/net/can/proc.c b/net/can/proc.c
index 560fa3c132bf..e6881bfc3ed1 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -45,6 +45,7 @@
 #include <linux/list.h>
 #include <linux/rcupdate.h>
 #include <linux/if_arp.h>
+#include <linux/can/can-ml.h>
 #include <linux/can/core.h>
 
 #include "af_can.h"
-- 
cgit v1.2.3


From 8df9ffb888c021fa68f9075d545f2ec5eca37200 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 8 Oct 2018 09:02:39 +0200
Subject: can: make use of preallocated can_ml_priv for per device struct
 can_dev_rcv_lists

This patch removes the old method of allocating the per device protocol
specific memory via a netdevice_notifier. This had the drawback, that
the allocation can fail, leading to a lot of null pointer checks in the
code. This also makes the live cycle management of this memory quite
complicated.

This patch switches from the allocating the struct can_dev_rcv_lists in
a NETDEV_REGISTER call to using the dev->ml_priv, which is allocated by
the driver since the previous patch.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c      |  2 ++
 drivers/net/can/slcan.c    |  1 +
 drivers/net/can/vcan.c     |  1 +
 drivers/net/can/vxcan.c    |  1 +
 include/linux/can/can-ml.h |  1 -
 net/can/af_can.c           | 45 +++++++--------------------------------------
 6 files changed, 12 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 9e688dc29521..b429550c4cc2 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -746,6 +746,8 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
 	priv = netdev_priv(dev);
 	priv->dev = dev;
 
+	dev->ml_priv = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN);
+
 	if (echo_skb_max) {
 		priv->echo_skb_max = echo_skb_max;
 		priv->echo_skb = (void *)priv +
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 5b2e95425e69..bb6032211043 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -536,6 +536,7 @@ static struct slcan *slc_alloc(void)
 
 	dev->base_addr  = i;
 	sl = netdev_priv(dev);
+	dev->ml_priv = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN);
 
 	/* Initialize channel control data */
 	sl->magic = SLCAN_MAGIC;
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 6973ae09a37a..39ca14b0585d 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -153,6 +153,7 @@ static void vcan_setup(struct net_device *dev)
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 0;
 	dev->flags		= IFF_NOARP;
+	dev->ml_priv		= netdev_priv(dev);
 
 	/* set flags according to driver capabilities */
 	if (echo)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 4c3eed796432..d6ba9426be4d 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -147,6 +147,7 @@ static void vxcan_setup(struct net_device *dev)
 	dev->flags		= (IFF_NOARP|IFF_ECHO);
 	dev->netdev_ops		= &vxcan_netdev_ops;
 	dev->needs_free_netdev	= true;
+	dev->ml_priv		= netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN);
 }
 
 /* forward declaration for rtnl_create_link() */
diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h
index 0a9d778de8af..79ccf6bfa232 100644
--- a/include/linux/can/can-ml.h
+++ b/include/linux/can/can-ml.h
@@ -55,7 +55,6 @@ struct can_dev_rcv_lists {
 	struct hlist_head rx[RX_MAX];
 	struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ];
 	struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ];
-	int remove_on_zero_entries;
 	int entries;
 };
 
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 723299daa04e..6ed85e2f72f0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -302,10 +302,12 @@ EXPORT_SYMBOL(can_send);
 static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
 							struct net_device *dev)
 {
-	if (!dev)
+	if (dev) {
+		struct can_ml_priv *ml_priv = dev->ml_priv;
+		return &ml_priv->dev_rcv_lists;
+	} else {
 		return net->can.rx_alldev_list;
-	else
-		return (struct can_dev_rcv_lists *)dev->ml_priv;
+	}
 }
 
 /**
@@ -561,12 +563,6 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 	if (rcv_lists_stats->rcv_entries > 0)
 		rcv_lists_stats->rcv_entries--;
 
-	/* remove device structure requested by NETDEV_UNREGISTER */
-	if (dev_rcv_lists->remove_on_zero_entries && !dev_rcv_lists->entries) {
-		kfree(dev_rcv_lists);
-		dev->ml_priv = NULL;
-	}
-
  out:
 	spin_unlock(&net->can.rcvlists_lock);
 
@@ -788,41 +784,14 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
 			void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct can_dev_rcv_lists *dev_rcv_lists;
 
 	if (dev->type != ARPHRD_CAN)
 		return NOTIFY_DONE;
 
 	switch (msg) {
 	case NETDEV_REGISTER:
-
-		/* create new dev_rcv_lists for this device */
-		dev_rcv_lists = kzalloc(sizeof(*dev_rcv_lists), GFP_KERNEL);
-		if (!dev_rcv_lists)
-			return NOTIFY_DONE;
-		BUG_ON(dev->ml_priv);
-		dev->ml_priv = dev_rcv_lists;
-
-		break;
-
-	case NETDEV_UNREGISTER:
-		spin_lock(&dev_net(dev)->can.rcvlists_lock);
-
-		dev_rcv_lists = dev->ml_priv;
-		if (dev_rcv_lists) {
-			if (dev_rcv_lists->entries)
-				dev_rcv_lists->remove_on_zero_entries = 1;
-			else {
-				kfree(dev_rcv_lists);
-				dev->ml_priv = NULL;
-			}
-		} else {
-			pr_err("can: notifier: receive list not found for dev %s\n",
-			       dev->name);
-		}
-
-		spin_unlock(&dev_net(dev)->can.rcvlists_lock);
-
+		WARN(!dev->ml_priv,
+		     "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
 		break;
 	}
 
-- 
cgit v1.2.3


From 9868b5d44f3df9dd75247acd23dddff0a42f79be Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Date: Mon, 8 Oct 2018 11:48:33 +0200
Subject: can: introduce CAN_REQUIRED_SIZE macro

The size of this structure will be increased with J1939 support. To stay
binary compatible, the CAN_REQUIRED_SIZE macro is introduced for
existing CAN protocols.

Signed-off-by: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h | 8 ++++++++
 net/can/bcm.c            | 4 ++--
 net/can/raw.c            | 4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 708c10d3417a..8339071ab08b 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -41,6 +41,14 @@ struct can_proto {
 	struct proto *prot;
 };
 
+/* required_size
+ * macro to find the minimum size of a struct
+ * that includes a requested member
+ */
+#define CAN_REQUIRED_SIZE(struct_type, member) \
+	(offsetof(typeof(struct_type), member) + \
+	 sizeof(((typeof(struct_type) *)(NULL))->member))
+
 /* function prototypes for the CAN networklayer core (af_can.c) */
 
 extern int  can_proto_register(const struct can_proto *cp);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 28fd1a1c8487..c96fa0f33db3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1294,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 		/* no bound device as default => check msg_name */
 		DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
 
-		if (msg->msg_namelen < sizeof(*addr))
+		if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 			return -EINVAL;
 
 		if (addr->can_family != AF_CAN)
@@ -1536,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len,
 	struct net *net = sock_net(sk);
 	int ret = 0;
 
-	if (len < sizeof(*addr))
+	if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 		return -EINVAL;
 
 	lock_sock(sk);
diff --git a/net/can/raw.c b/net/can/raw.c
index fdbc36140e9b..59c039d73c6d 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -396,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	int err = 0;
 	int notify_enetdown = 0;
 
-	if (len < sizeof(*addr))
+	if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 		return -EINVAL;
 	if (addr->can_family != AF_CAN)
 		return -EINVAL;
@@ -733,7 +733,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name);
 
-		if (msg->msg_namelen < sizeof(*addr))
+		if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex))
 			return -EINVAL;
 
 		if (addr->can_family != AF_CAN)
-- 
cgit v1.2.3


From 2a0c9aaa6247c817e45bfc1aaa5eaeafe7a331d6 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Date: Mon, 8 Oct 2018 11:48:34 +0200
Subject: can: add socket type for CAN_J1939

This patch is a preparation for SAE J1939 and adds CAN_J1939
socket type.

Signed-off-by: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 0afb7d8e867f..06d92d6be6e6 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -157,7 +157,8 @@ struct canfd_frame {
 #define CAN_TP20	4 /* VAG Transport Protocol v2.0 */
 #define CAN_MCNET	5 /* Bosch MCNet */
 #define CAN_ISOTP	6 /* ISO 15765-2 Transport Protocol */
-#define CAN_NPROTO	7
+#define CAN_J1939	7 /* SAE J1939 */
+#define CAN_NPROTO	8
 
 #define SOL_CAN_BASE 100
 
-- 
cgit v1.2.3


From f5223e9eee651e005c0f6d6d078909087601b7e9 Mon Sep 17 00:00:00 2001
From: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Date: Mon, 8 Oct 2018 11:48:35 +0200
Subject: can: extend sockaddr_can to include j1939 members

This patch prepares struct sockaddr_can for SAE J1939.

Signed-off-by: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 06d92d6be6e6..1e988fdeba34 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -175,6 +175,23 @@ struct sockaddr_can {
 		/* transport protocol class address information (e.g. ISOTP) */
 		struct { canid_t rx_id, tx_id; } tp;
 
+		/* J1939 address information */
+		struct {
+			/* 8 byte name when using dynamic addressing */
+			__u64 name;
+
+			/* pgn:
+			 * 8 bit: PS in PDU2 case, else 0
+			 * 8 bit: PF
+			 * 1 bit: DP
+			 * 1 bit: reserved
+			 */
+			__u32 pgn;
+
+			/* 1 byte address */
+			__u8 addr;
+		} j1939;
+
 		/* reserved for future CAN protocols address information */
 	} can_addr;
 };
-- 
cgit v1.2.3


From b5dc75c915cdaebab9b9875022e45638d6b14a7e Mon Sep 17 00:00:00 2001
From: Richard Gong <richard.gong@intel.com>
Date: Tue, 3 Sep 2019 08:18:18 -0500
Subject: firmware: stratix10-svc: extend svc to support new RSU features

Extend Intel Stratix10 service layer driver to support new RSU notify and
MAX_RETRY with watchdog event.

RSU is used to provide our customers with protection against loading bad
bitstream onto their devices when those devices are booting from flash

RSU notifies provides users with an API to notify the firmware of the
state of hard processor system.

To deal with watchdog event, RSU provides a way for user to retry the
current running image several times before giving up and starting normal
RSU failover flow.

Signed-off-by: Richard Gong <richard.gong@intel.com>
Reviewed-by: Alan Tull <atull@kernel.org>
Link: https://lore.kernel.org/r/1567516701-26026-2-git-send-email-richard.gong@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/stratix10-svc.c                   | 76 +++++++++++++++++++++-
 include/linux/firmware/intel/stratix10-smc.h       | 51 +++++++++++++--
 .../linux/firmware/intel/stratix10-svc-client.h    | 11 +++-
 3 files changed, 128 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c
index 6e6514825ad0..b485321189e1 100644
--- a/drivers/firmware/stratix10-svc.c
+++ b/drivers/firmware/stratix10-svc.c
@@ -38,12 +38,23 @@
 #define FPGA_CONFIG_DATA_CLAIM_TIMEOUT_MS	200
 #define FPGA_CONFIG_STATUS_TIMEOUT_SEC		30
 
+/* stratix10 service layer clients */
+#define STRATIX10_RSU				"stratix10-rsu"
+
 typedef void (svc_invoke_fn)(unsigned long, unsigned long, unsigned long,
 			     unsigned long, unsigned long, unsigned long,
 			     unsigned long, unsigned long,
 			     struct arm_smccc_res *);
 struct stratix10_svc_chan;
 
+/**
+ * struct stratix10_svc - svc private data
+ * @stratix10_svc_rsu: pointer to stratix10 RSU device
+ */
+struct stratix10_svc {
+	struct platform_device *stratix10_svc_rsu;
+};
+
 /**
  * struct stratix10_svc_sh_memory - service shared memory structure
  * @sync_complete: state for a completion
@@ -296,7 +307,12 @@ static void svc_thread_recv_status_ok(struct stratix10_svc_data *p_data,
 		cb_data->status = BIT(SVC_STATUS_RECONFIG_COMPLETED);
 		break;
 	case COMMAND_RSU_UPDATE:
+	case COMMAND_RSU_NOTIFY:
+		cb_data->status = BIT(SVC_STATUS_RSU_OK);
+		break;
+	case COMMAND_RSU_RETRY:
 		cb_data->status = BIT(SVC_STATUS_RSU_OK);
+		cb_data->kaddr1 = &res.a1;
 		break;
 	default:
 		pr_warn("it shouldn't happen\n");
@@ -386,6 +402,16 @@ static int svc_normal_to_secure_thread(void *data)
 			a1 = pdata->arg[0];
 			a2 = 0;
 			break;
+		case COMMAND_RSU_NOTIFY:
+			a0 = INTEL_SIP_SMC_RSU_NOTIFY;
+			a1 = pdata->arg[0];
+			a2 = 0;
+			break;
+		case COMMAND_RSU_RETRY:
+			a0 = INTEL_SIP_SMC_RSU_RETRY_COUNTER;
+			a1 = 0;
+			a2 = 0;
+			break;
 		default:
 			pr_warn("it shouldn't happen\n");
 			break;
@@ -438,7 +464,28 @@ static int svc_normal_to_secure_thread(void *data)
 			pr_debug("%s: STATUS_REJECTED\n", __func__);
 			break;
 		case INTEL_SIP_SMC_FPGA_CONFIG_STATUS_ERROR:
+		case INTEL_SIP_SMC_RSU_ERROR:
 			pr_err("%s: STATUS_ERROR\n", __func__);
+			switch (pdata->command) {
+			/* for FPGA mgr */
+			case COMMAND_RECONFIG_DATA_CLAIM:
+			case COMMAND_RECONFIG:
+			case COMMAND_RECONFIG_DATA_SUBMIT:
+			case COMMAND_RECONFIG_STATUS:
+				cbdata->status =
+					BIT(SVC_STATUS_RECONFIG_ERROR);
+				break;
+
+			/* for RSU */
+			case COMMAND_RSU_STATUS:
+			case COMMAND_RSU_UPDATE:
+			case COMMAND_RSU_NOTIFY:
+			case COMMAND_RSU_RETRY:
+				cbdata->status =
+					BIT(SVC_STATUS_RSU_ERROR);
+				break;
+			}
+
 			cbdata->status = BIT(SVC_STATUS_RECONFIG_ERROR);
 			cbdata->kaddr1 = NULL;
 			cbdata->kaddr2 = NULL;
@@ -530,7 +577,7 @@ static int svc_get_sh_memory(struct platform_device *pdev,
 
 	if (!sh_memory->addr || !sh_memory->size) {
 		dev_err(dev,
-			"fails to get shared memory info from secure world\n");
+			"failed to get shared memory info from secure world\n");
 		return -ENOMEM;
 	}
 
@@ -768,7 +815,7 @@ int stratix10_svc_send(struct stratix10_svc_chan *chan, void *msg)
 					      "svc_smc_hvc_thread");
 			if (IS_ERR(chan->ctrl->task)) {
 				dev_err(chan->ctrl->dev,
-					"fails to create svc_smc_hvc_thread\n");
+					"failed to create svc_smc_hvc_thread\n");
 				kfree(p_data);
 				return -EINVAL;
 			}
@@ -913,6 +960,8 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
 	struct stratix10_svc_chan *chans;
 	struct gen_pool *genpool;
 	struct stratix10_svc_sh_memory *sh_memory;
+	struct stratix10_svc *svc;
+
 	svc_invoke_fn *invoke_fn;
 	size_t fifo_size;
 	int ret;
@@ -957,7 +1006,7 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
 	fifo_size = sizeof(struct stratix10_svc_data) * SVC_NUM_DATA_IN_FIFO;
 	ret = kfifo_alloc(&controller->svc_fifo, fifo_size, GFP_KERNEL);
 	if (ret) {
-		dev_err(dev, "fails to allocate FIFO\n");
+		dev_err(dev, "failed to allocate FIFO\n");
 		return ret;
 	}
 	spin_lock_init(&controller->svc_fifo_lock);
@@ -975,6 +1024,24 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
 	list_add_tail(&controller->node, &svc_ctrl);
 	platform_set_drvdata(pdev, controller);
 
+	/* add svc client device(s) */
+	svc = devm_kzalloc(dev, sizeof(*svc), GFP_KERNEL);
+	if (!svc)
+		return -ENOMEM;
+
+	svc->stratix10_svc_rsu = platform_device_alloc(STRATIX10_RSU, 0);
+	if (!svc->stratix10_svc_rsu) {
+		dev_err(dev, "failed to allocate %s device\n", STRATIX10_RSU);
+		return -ENOMEM;
+	}
+
+	ret = platform_device_add(svc->stratix10_svc_rsu);
+	if (ret) {
+		platform_device_put(svc->stratix10_svc_rsu);
+		return ret;
+	}
+	dev_set_drvdata(dev, svc);
+
 	pr_info("Intel Service Layer Driver Initialized\n");
 
 	return ret;
@@ -982,8 +1049,11 @@ static int stratix10_svc_drv_probe(struct platform_device *pdev)
 
 static int stratix10_svc_drv_remove(struct platform_device *pdev)
 {
+	struct stratix10_svc *svc = dev_get_drvdata(&pdev->dev);
 	struct stratix10_svc_controller *ctrl = platform_get_drvdata(pdev);
 
+	platform_device_unregister(svc->stratix10_svc_rsu);
+
 	kfifo_free(&ctrl->svc_fifo);
 	if (ctrl->task) {
 		kthread_stop(ctrl->task);
diff --git a/include/linux/firmware/intel/stratix10-smc.h b/include/linux/firmware/intel/stratix10-smc.h
index 01684d935580..013ae4819deb 100644
--- a/include/linux/firmware/intel/stratix10-smc.h
+++ b/include/linux/firmware/intel/stratix10-smc.h
@@ -210,7 +210,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_FPGA_CONFIG_LOOPBACK \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_LOOPBACK)
 
-/*
+/**
  * Request INTEL_SIP_SMC_REG_READ
  *
  * Read a protected register at EL3
@@ -229,7 +229,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_REG_READ \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_READ)
 
-/*
+/**
  * Request INTEL_SIP_SMC_REG_WRITE
  *
  * Write a protected register at EL3
@@ -248,7 +248,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_REG_WRITE \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
 
-/*
+/**
  * Request INTEL_SIP_SMC_FUNCID_REG_UPDATE
  *
  * Update one or more bits in a protected register at EL3 using a
@@ -269,7 +269,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_REG_UPDATE \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_UPDATE)
 
-/*
+/**
  * Request INTEL_SIP_SMC_RSU_STATUS
  *
  * Request remote status update boot log, call is synchronous.
@@ -292,7 +292,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_RSU_STATUS \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_STATUS)
 
-/*
+/**
  * Request INTEL_SIP_SMC_RSU_UPDATE
  *
  * Request to set the offset of the bitstream to boot after reboot, call
@@ -310,7 +310,7 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 #define INTEL_SIP_SMC_RSU_UPDATE \
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_UPDATE)
 
-/*
+/**
  * Request INTEL_SIP_SMC_ECC_DBE
  *
  * Sync call used by service driver at EL1 to alert EL3 that a Double
@@ -329,3 +329,42 @@ INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_FPGA_CONFIG_COMPLETED_WRITE)
 	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE)
 
 #endif
+
+/**
+ * Request INTEL_SIP_SMC_RSU_NOTIFY
+ *
+ * Sync call used by service driver at EL1 to report hard processor
+ * system execution stage to firmware
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_RSU_NOTIFY
+ * a1 32bit value representing hard processor system execution stage
+ * a2-7 not used
+ *
+ * Return status
+ * a0 INTEL_SIP_SMC_STATUS_OK
+ */
+#define INTEL_SIP_SMC_FUNCID_RSU_NOTIFY 14
+#define INTEL_SIP_SMC_RSU_NOTIFY \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_NOTIFY)
+
+/**
+ * Request INTEL_SIP_SMC_RSU_RETRY_COUNTER
+ *
+ * Sync call used by service driver at EL1 to query RSU retry counter
+ *
+ * Call register usage:
+ * a0 INTEL_SIP_SMC_RSU_RETRY_COUNTER
+ * a1-7 not used
+ *
+ * Return status
+ * a0 INTEL_SIP_SMC_STATUS_OK
+ * a1 the retry counter
+ *
+ * Or
+ *
+ * a0 INTEL_SIP_SMC_RSU_ERROR
+ */
+#define INTEL_SIP_SMC_FUNCID_RSU_RETRY_COUNTER 15
+#define INTEL_SIP_SMC_RSU_RETRY_COUNTER \
+	INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_RSU_RETRY_COUNTER)
diff --git a/include/linux/firmware/intel/stratix10-svc-client.h b/include/linux/firmware/intel/stratix10-svc-client.h
index e521f172a47a..b6c4302a39e0 100644
--- a/include/linux/firmware/intel/stratix10-svc-client.h
+++ b/include/linux/firmware/intel/stratix10-svc-client.h
@@ -95,6 +95,13 @@ struct stratix10_svc_chan;
  *
  * @COMMAND_RSU_UPDATE: set the offset of the bitstream to boot after reboot,
  * return status is SVC_STATUS_RSU_OK or SVC_STATUS_RSU_ERROR
+ *
+ * @COMMAND_RSU_NOTIFY: report the status of hard processor system
+ * software to firmware, return status is SVC_STATUS_RSU_OK or
+ * SVC_STATUS_RSU_ERROR
+ *
+ * @COMMAND_RSU_RETRY: query firmware for the current image's retry counter,
+ * return status is SVC_STATUS_RSU_OK or SVC_STATUS_RSU_ERROR
  */
 enum stratix10_svc_command_code {
 	COMMAND_NOOP = 0,
@@ -103,7 +110,9 @@ enum stratix10_svc_command_code {
 	COMMAND_RECONFIG_DATA_CLAIM,
 	COMMAND_RECONFIG_STATUS,
 	COMMAND_RSU_STATUS,
-	COMMAND_RSU_UPDATE
+	COMMAND_RSU_UPDATE,
+	COMMAND_RSU_NOTIFY,
+	COMMAND_RSU_RETRY,
 };
 
 /**
-- 
cgit v1.2.3


From 7f905761e15a870761a7f9ba14bbb9e132ab8481 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 26 Aug 2019 17:01:53 +0200
Subject: sysfs: add BIN_ATTR_WO() macro

This variant was missing from sysfs.h, I guess no one noticed it before.

Turns out the powerpc secure variable code can use it, so add it to the
tree for it, and potentially others to take advantage of, instead of
open-coding it.

Reported-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20190826150153.GD18418@kroah.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 965236795750..5420817ed317 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -196,6 +196,12 @@ struct bin_attribute {
 	.size	= _size,						\
 }
 
+#define __BIN_ATTR_WO(_name) {						\
+	.attr	= { .name = __stringify(_name), .mode = 0200 },		\
+	.store	= _name##_store,					\
+	.size	= _size,						\
+}
+
 #define __BIN_ATTR_RW(_name, _size)					\
 	__BIN_ATTR(_name, 0644, _name##_read, _name##_write, _size)
 
@@ -208,6 +214,9 @@ struct bin_attribute bin_attr_##_name = __BIN_ATTR(_name, _mode, _read,	\
 #define BIN_ATTR_RO(_name, _size)					\
 struct bin_attribute bin_attr_##_name = __BIN_ATTR_RO(_name, _size)
 
+#define BIN_ATTR_WO(_name, _size)					\
+struct bin_attribute bin_attr_##_name = __BIN_ATTR_WO(_name, _size)
+
 #define BIN_ATTR_RW(_name, _size)					\
 struct bin_attribute bin_attr_##_name = __BIN_ATTR_RW(_name, _size)
 
-- 
cgit v1.2.3


From 1d5a013f9c0f5b4ef3cf64ac61e9c1f386c7b750 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 19 Aug 2019 16:16:06 +0900
Subject: driver-core: add include guard to linux/container.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20190819071606.10965-1-yamada.masahiro@socionext.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/container.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/container.h b/include/linux/container.h
index 0cc2ee91905c..2566a1baa736 100644
--- a/include/linux/container.h
+++ b/include/linux/container.h
@@ -6,6 +6,9 @@
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  */
 
+#ifndef _LINUX_CONTAINER_H
+#define _LINUX_CONTAINER_H
+
 #include <linux/device.h>
 
 /* drivers/base/power/container.c */
@@ -20,3 +23,5 @@ static inline struct container_dev *to_container_dev(struct device *dev)
 {
 	return container_of(dev, struct container_dev, dev);
 }
+
+#endif /* _LINUX_CONTAINER_H */
-- 
cgit v1.2.3


From 9d71dd0c70099914fcd063135da3c580865e924c Mon Sep 17 00:00:00 2001
From: The j1939 authors <linux-can@vger.kernel.org>
Date: Mon, 8 Oct 2018 11:48:36 +0200
Subject: can: add support of SAE J1939 protocol

SAE J1939 is the vehicle bus recommended practice used for communication
and diagnostics among vehicle components. Originating in the car and
heavy-duty truck industry in the United States, it is now widely used in
other parts of the world.

J1939, ISO 11783 and NMEA 2000 all share the same high level protocol.
SAE J1939 can be considered the replacement for the older SAE J1708 and
SAE J1587 specifications.

Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Bastian Stender <bst@pengutronix.de>
Signed-off-by: Elenita Hinds <ecathinds@gmail.com>
Signed-off-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Kurt Van Dijck <dev.kurt@vandijck-laurijssen.be>
Signed-off-by: Maxime Jayat <maxime.jayat@mobile-devices.fr>
Signed-off-by: Robin van der Gracht <robin@protonic.nl>
Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/index.rst |    1 +
 Documentation/networking/j1939.rst |  422 ++++++++
 MAINTAINERS                        |   10 +
 include/linux/can/can-ml.h         |    3 +
 include/uapi/linux/can/j1939.h     |   99 ++
 net/can/Kconfig                    |    2 +
 net/can/Makefile                   |    2 +
 net/can/j1939/Kconfig              |   15 +
 net/can/j1939/Makefile             |   10 +
 net/can/j1939/address-claim.c      |  230 ++++
 net/can/j1939/bus.c                |  333 ++++++
 net/can/j1939/j1939-priv.h         |  338 ++++++
 net/can/j1939/main.c               |  403 +++++++
 net/can/j1939/socket.c             | 1160 +++++++++++++++++++++
 net/can/j1939/transport.c          | 2027 ++++++++++++++++++++++++++++++++++++
 15 files changed, 5055 insertions(+)
 create mode 100644 Documentation/networking/j1939.rst
 create mode 100644 include/uapi/linux/can/j1939.h
 create mode 100644 net/can/j1939/Kconfig
 create mode 100644 net/can/j1939/Makefile
 create mode 100644 net/can/j1939/address-claim.c
 create mode 100644 net/can/j1939/bus.c
 create mode 100644 net/can/j1939/j1939-priv.h
 create mode 100644 net/can/j1939/main.c
 create mode 100644 net/can/j1939/socket.c
 create mode 100644 net/can/j1939/transport.c

(limited to 'include')

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 37eabc17894c..0481d0ffebed 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -17,6 +17,7 @@ Contents:
    devlink-trap
    devlink-trap-netdevsim
    ieee802154
+   j1939
    kapi
    z8530book
    msg_zerocopy
diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst
new file mode 100644
index 000000000000..ce7e7a044e08
--- /dev/null
+++ b/Documentation/networking/j1939.rst
@@ -0,0 +1,422 @@
+.. SPDX-License-Identifier: (GPL-2.0 OR MIT)
+
+===================
+J1939 Documentation
+===================
+
+Overview / What Is J1939
+========================
+
+SAE J1939 defines a higher layer protocol on CAN. It implements a more
+sophisticated addressing scheme and extends the maximum packet size above 8
+bytes. Several derived specifications exist, which differ from the original
+J1939 on the application level, like MilCAN A, NMEA2000 and especially
+ISO-11783 (ISOBUS). This last one specifies the so-called ETP (Extended
+Transport Protocol) which is has been included in this implementation. This
+results in a maximum packet size of ((2 ^ 24) - 1) * 7 bytes == 111 MiB.
+
+Specifications used
+-------------------
+
+* SAE J1939-21 : data link layer
+* SAE J1939-81 : network management
+* ISO 11783-6  : Virtual Terminal (Extended Transport Protocol)
+
+.. _j1939-motivation:
+
+Motivation
+==========
+
+Given the fact there's something like SocketCAN with an API similar to BSD
+sockets, we found some reasons to justify a kernel implementation for the
+addressing and transport methods used by J1939.
+
+* **Addressing:** when a process on an ECU communicates via J1939, it should
+  not necessarily know its source address. Although at least one process per
+  ECU should know the source address. Other processes should be able to reuse
+  that address. This way, address parameters for different processes
+  cooperating for the same ECU, are not duplicated. This way of working is
+  closely related to the UNIX concept where programs do just one thing, and do
+  it well.
+
+* **Dynamic addressing:** Address Claiming in J1939 is time critical.
+  Furthermore data transport should be handled properly during the address
+  negotiation. Putting this functionality in the kernel eliminates it as a
+  requirement for _every_ user space process that communicates via J1939. This
+  results in a consistent J1939 bus with proper addressing.
+
+* **Transport:** both TP & ETP reuse some PGNs to relay big packets over them.
+  Different processes may thus use the same TP & ETP PGNs without actually
+  knowing it. The individual TP & ETP sessions _must_ be serialized
+  (synchronized) between different processes. The kernel solves this problem
+  properly and eliminates the serialization (synchronization) as a requirement
+  for _every_ user space process that communicates via J1939.
+
+J1939 defines some other features (relaying, gateway, fast packet transport,
+...). In-kernel code for these would not contribute to protocol stability.
+Therefore, these parts are left to user space.
+
+The J1939 sockets operate on CAN network devices (see SocketCAN). Any J1939
+user space library operating on CAN raw sockets will still operate properly.
+Since such library does not communicate with the in-kernel implementation, care
+must be taken that these two do not interfere. In practice, this means they
+cannot share ECU addresses. A single ECU (or virtual ECU) address is used by
+the library exclusively, or by the in-kernel system exclusively.
+
+J1939 concepts
+==============
+
+PGN
+---
+
+The PGN (Parameter Group Number) is a number to identify a packet. The PGN
+is composed as follows:
+1 bit  : Reserved Bit
+1 bit  : Data Page
+8 bits : PF (PDU Format)
+8 bits : PS (PDU Specific)
+
+In J1939-21 distinction is made between PDU1 format (where PF < 240) and PDU2
+format (where PF >= 240). Furthermore, when using PDU2 format, the PS-field
+contains a so-called Group Extension, which is part of the PGN. When using PDU2
+format, the Group Extension is set in the PS-field.
+
+On the other hand, when using PDU1 format, the PS-field contains a so-called
+Destination Address, which is _not_ part of the PGN. When communicating a PGN
+from user space to kernel (or visa versa) and PDU2 format is used, the PS-field
+of the PGN shall be set to zero. The Destination Address shall be set
+elsewhere.
+
+Regarding PGN mapping to 29-bit CAN identifier, the Destination Address shall
+be get/set from/to the appropriate bits of the identifier by the kernel.
+
+
+Addressing
+----------
+
+Both static and dynamic addressing methods can be used.
+
+For static addresses, no extra checks are made by the kernel, and provided
+addresses are considered right. This responsibility is for the OEM or system
+integrator.
+
+For dynamic addressing, so-called Address Claiming, extra support is foreseen
+in the kernel. In J1939 any ECU is known by it's 64-bit NAME. At the moment of
+a successful address claim, the kernel keeps track of both NAME and source
+address being claimed. This serves as a base for filter schemes. By default,
+packets with a destination that is not locally, will be rejected.
+
+Mixed mode packets (from a static to a dynamic address or vice versa) are
+allowed. The BSD sockets define separate API calls for getting/setting the
+local & remote address and are applicable for J1939 sockets.
+
+Filtering
+---------
+
+J1939 defines white list filters per socket that a user can set in order to
+receive a subset of the J1939 traffic. Filtering can be based on:
+
+* SA
+* SOURCE_NAME
+* PGN
+
+When multiple filters are in place for a single socket, and a packet comes in
+that matches several of those filters, the packet is only received once for
+that socket.
+
+How to Use J1939
+================
+
+API Calls
+---------
+
+On CAN, you first need to open a socket for communicating over a CAN network.
+To use J1939, #include <linux/can/j1939.h>. From there, <linux/can.h> will be
+included too. To open a socket, use:
+
+.. code-block:: C
+
+    s = socket(PF_CAN, SOCK_DGRAM, CAN_J1939);
+
+J1939 does use SOCK_DGRAM sockets. In the J1939 specification, connections are
+mentioned in the context of transport protocol sessions. These still deliver
+packets to the other end (using several CAN packets). SOCK_STREAM is not
+supported.
+
+After the successful creation of the socket, you would normally use the bind(2)
+and/or connect(2) system call to bind the socket to a CAN interface.  After
+binding and/or connecting the socket, you can read(2) and write(2) from/to the
+socket or use send(2), sendto(2), sendmsg(2) and the recv*() counterpart
+operations on the socket as usual. There are also J1939 specific socket options
+described below.
+
+In order to send data, a bind(2) must have been successful. bind(2) assigns a
+local address to a socket.
+
+Different from CAN is that the payload data is just the data that get send,
+without it's header info. The header info is derived from the sockaddr supplied
+to bind(2), connect(2), sendto(2) and recvfrom(2). A write(2) with size 4 will
+result in a packet with 4 bytes.
+
+The sockaddr structure has extensions for use with J1939 as specified below:
+
+.. code-block:: C
+
+      struct sockaddr_can {
+         sa_family_t can_family;
+         int         can_ifindex;
+         union {
+            struct {
+               __u64 name;
+                        /* pgn:
+                         * 8 bit: PS in PDU2 case, else 0
+                         * 8 bit: PF
+                         * 1 bit: DP
+                         * 1 bit: reserved
+                         */
+               __u32 pgn;
+               __u8  addr;
+            } j1939;
+         } can_addr;
+      }
+
+can_family & can_ifindex serve the same purpose as for other SocketCAN sockets.
+
+can_addr.j1939.pgn specifies the PGN (max 0x3ffff). Individual bits are
+specified above.
+
+can_addr.j1939.name contains the 64-bit J1939 NAME.
+
+can_addr.j1939.addr contains the address.
+
+The bind(2) system call assigns the local address, i.e. the source address when
+sending packages. If a PGN during bind(2) is set, it's used as a RX filter.
+I.e.  only packets with a matching PGN are received. If an ADDR or NAME is set
+it is used as a receive filter, too. It will match the destination NAME or ADDR
+of the incoming packet. The NAME filter will work only if appropriate Address
+Claiming for this name was done on the CAN bus and registered/cached by the
+kernel.
+
+On the other hand connect(2) assigns the remote address, i.e. the destination
+address. The PGN from connect(2) is used as the default PGN when sending
+packets. If ADDR or NAME is set it will be used as the default destination ADDR
+or NAME. Further a set ADDR or NAME during connect(2) is used as a receive
+filter. It will match the source NAME or ADDR of the incoming packet.
+
+Both write(2) and send(2) will send a packet with local address from bind(2) and
+the remote address from connect(2). Use sendto(2) to overwrite the destination
+address.
+
+If can_addr.j1939.name is set (!= 0) the NAME is looked up by the kernel and
+the corresponding ADDR is used. If can_addr.j1939.name is not set (== 0),
+can_addr.j1939.addr is used.
+
+When creating a socket, reasonable defaults are set. Some options can be
+modified with setsockopt(2) & getsockopt(2).
+
+RX path related options:
+
+- SO_J1939_FILTER - configure array of filters
+- SO_J1939_PROMISC - disable filters set by bind(2) and connect(2)
+
+By default no broadcast packets can be send or received. To enable sending or
+receiving broadcast packets use the socket option SO_BROADCAST:
+
+.. code-block:: C
+
+     int value = 1;
+     setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value));
+
+The following diagram illustrates the RX path:
+
+.. code::
+
+                    +--------------------+
+                    |  incoming packet   |
+                    +--------------------+
+                              |
+                              V
+                    +--------------------+
+                    | SO_J1939_PROMISC?  |
+                    +--------------------+
+                             |  |
+                         no  |  | yes
+                             |  |
+                   .---------'  `---------.
+                   |                      |
+     +---------------------------+        |
+     | bind() + connect() +      |        |
+     | SOCK_BROADCAST filter     |        |
+     +---------------------------+        |
+                   |                      |
+                   |<---------------------'
+                   V
+     +---------------------------+
+     |      SO_J1939_FILTER      |
+     +---------------------------+
+                   |
+                   V
+     +---------------------------+
+     |        socket recv()      |
+     +---------------------------+
+
+TX path related options:
+SO_J1939_SEND_PRIO - change default send priority for the socket
+
+Message Flags during send() and Related System Calls
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+send(2), sendto(2) and sendmsg(2) take a 'flags' argument. Currently
+supported flags are:
+
+* MSG_DONTWAIT, i.e. non-blocking operation.
+
+recvmsg(2)
+^^^^^^^^^
+
+In most cases recvmsg(2) is needed if you want to extract more information than
+recvfrom(2) can provide. For example package priority and timestamp. The
+Destination Address, name and packet priority (if applicable) are attached to
+the msghdr in the recvmsg(2) call. They can be extracted using cmsg(3) macros,
+with cmsg_level == SOL_J1939 && cmsg_type == SCM_J1939_DEST_ADDR,
+SCM_J1939_DEST_NAME or SCM_J1939_PRIO. The returned data is a uint8_t for
+priority and dst_addr, and uint64_t for dst_name.
+
+.. code-block:: C
+
+	uint8_t priority, dst_addr;
+	uint64_t dst_name;
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		switch (cmsg->cmsg_level) {
+		case SOL_CAN_J1939:
+			if (cmsg->cmsg_type == SCM_J1939_DEST_ADDR)
+				dst_addr = *CMSG_DATA(cmsg);
+			else if (cmsg->cmsg_type == SCM_J1939_DEST_NAME)
+				memcpy(&dst_name, CMSG_DATA(cmsg), cmsg->cmsg_len - CMSG_LEN(0));
+			else if (cmsg->cmsg_type == SCM_J1939_PRIO)
+				priority = *CMSG_DATA(cmsg);
+			break;
+		}
+	}
+
+Dynamic Addressing
+------------------
+
+Distinction has to be made between using the claimed address and doing an
+address claim. To use an already claimed address, one has to fill in the
+j1939.name member and provide it to bind(2). If the name had claimed an address
+earlier, all further messages being sent will use that address. And the
+j1939.addr member will be ignored.
+
+An exception on this is PGN 0x0ee00. This is the "Address Claim/Cannot Claim
+Address" message and the kernel will use the j1939.addr member for that PGN if
+necessary.
+
+To claim an address following code example can be used:
+
+.. code-block:: C
+
+	struct sockaddr_can baddr = {
+		.can_family = AF_CAN,
+		.can_addr.j1939 = {
+			.name = name,
+			.addr = J1939_IDLE_ADDR,
+			.pgn = J1939_NO_PGN,	/* to disable bind() rx filter for PGN */
+		},
+		.can_ifindex = if_nametoindex("can0"),
+	};
+
+	bind(sock, (struct sockaddr *)&baddr, sizeof(baddr));
+
+	/* for Address Claiming broadcast must be allowed */
+	int value = 1;
+	setsockopt(sock, SOL_SOCKET, SO_BROADCAST, &value, sizeof(value));
+
+	/* configured advanced RX filter with PGN needed for Address Claiming */
+	const struct j1939_filter filt[] = {
+		{
+			.pgn = J1939_PGN_ADDRESS_CLAIMED,
+			.pgn_mask = J1939_PGN_PDU1_MAX,
+		}, {
+			.pgn = J1939_PGN_ADDRESS_REQUEST,
+			.pgn_mask = J1939_PGN_PDU1_MAX,
+		}, {
+			.pgn = J1939_PGN_ADDRESS_COMMANDED,
+			.pgn_mask = J1939_PGN_MAX,
+		},
+	};
+
+	setsockopt(sock, SOL_CAN_J1939, SO_J1939_FILTER, &filt, sizeof(filt));
+
+	uint64_t dat = htole64(name);
+	const struct sockaddr_can saddr = {
+		.can_family = AF_CAN,
+		.can_addr.j1939 = {
+			.pgn = J1939_PGN_ADDRESS_CLAIMED,
+			.addr = J1939_NO_ADDR,
+		},
+	};
+
+	/* Afterwards do a sendto(2) with data set to the NAME (Little Endian). If the
+	 * NAME provided, does not match the j1939.name provided to bind(2), EPROTO
+	 * will be returned.
+	 */
+	sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
+
+If no-one else contests the address claim within 250ms after transmission, the
+kernel marks the NAME-SA assignment as valid. The valid assignment will be kept
+among other valid NAME-SA assignments. From that point, any socket bound to the
+NAME can send packets.
+
+If another ECU claims the address, the kernel will mark the NAME-SA expired.
+No socket bound to the NAME can send packets (other than address claims). To
+claim another address, some socket bound to NAME, must bind(2) again, but with
+only j1939.addr changed to the new SA, and must then send a valid address claim
+packet. This restarts the state machine in the kernel (and any other
+participant on the bus) for this NAME.
+
+can-utils also include the jacd tool, so it can be used as code example or as
+default Address Claiming daemon.
+
+Send Examples
+-------------
+
+Static Addressing
+^^^^^^^^^^^^^^^^^
+
+This example will send a PGN (0x12300) from SA 0x20 to DA 0x30.
+
+Bind:
+
+.. code-block:: C
+
+	struct sockaddr_can baddr = {
+		.can_family = AF_CAN,
+		.can_addr.j1939 = {
+			.name = J1939_NO_NAME,
+			.addr = 0x20,
+			.pgn = J1939_NO_PGN,
+		},
+		.can_ifindex = if_nametoindex("can0"),
+	};
+
+	bind(sock, (struct sockaddr *)&baddr, sizeof(baddr));
+
+Now, the socket 'sock' is bound to the SA 0x20. Since no connect(2) was called,
+at this point we can use only sendto(2) or sendmsg(2).
+
+Send:
+
+.. code-block:: C
+
+	const struct sockaddr_can saddr = {
+		.can_family = AF_CAN,
+		.can_addr.j1939 = {
+			.name = J1939_NO_NAME;
+			.pgn = 0x30,
+			.addr = 0x12300,
+		},
+	};
+
+	sendto(sock, dat, sizeof(dat), 0, (const struct sockaddr *)&saddr, sizeof(saddr));
diff --git a/MAINTAINERS b/MAINTAINERS
index a081c477d1d1..844f416437c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3669,6 +3669,16 @@ F:	include/uapi/linux/can/bcm.h
 F:	include/uapi/linux/can/raw.h
 F:	include/uapi/linux/can/gw.h
 
+CAN-J1939 NETWORK LAYER
+M:	Robin van der Gracht <robin@protonic.nl>
+M:	Oleksij Rempel <o.rempel@pengutronix.de>
+R:	Pengutronix Kernel Team <kernel@pengutronix.de>
+L:	linux-can@vger.kernel.org
+S:	Maintained
+F:	Documentation/networking/j1939.txt
+F:	net/can/j1939/
+F:	include/uapi/linux/can/j1939.h
+
 CAPABILITIES
 M:	Serge Hallyn <serge@hallyn.com>
 L:	linux-security-module@vger.kernel.org
diff --git a/include/linux/can/can-ml.h b/include/linux/can/can-ml.h
index 79ccf6bfa232..2f5d731ae251 100644
--- a/include/linux/can/can-ml.h
+++ b/include/linux/can/can-ml.h
@@ -60,6 +60,9 @@ struct can_dev_rcv_lists {
 
 struct can_ml_priv {
 	struct can_dev_rcv_lists dev_rcv_lists;
+#ifdef CAN_J1939
+	struct j1939_priv *j1939_priv;
+#endif
 };
 
 #endif /* CAN_ML_H */
diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h
new file mode 100644
index 000000000000..c32325342d30
--- /dev/null
+++ b/include/uapi/linux/can/j1939.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * j1939.h
+ *
+ * Copyright (c) 2010-2011 EIA Electronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI_CAN_J1939_H_
+#define _UAPI_CAN_J1939_H_
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/can.h>
+
+#define J1939_MAX_UNICAST_ADDR 0xfd
+#define J1939_IDLE_ADDR 0xfe
+#define J1939_NO_ADDR 0xff		/* == broadcast or no addr */
+#define J1939_NO_NAME 0
+#define J1939_PGN_REQUEST 0x0ea00		/* Request PG */
+#define J1939_PGN_ADDRESS_CLAIMED 0x0ee00	/* Address Claimed */
+#define J1939_PGN_ADDRESS_COMMANDED 0x0fed8	/* Commanded Address */
+#define J1939_PGN_PDU1_MAX 0x3ff00
+#define J1939_PGN_MAX 0x3ffff
+#define J1939_NO_PGN 0x40000
+
+/* J1939 Parameter Group Number
+ *
+ * bit 0-7	: PDU Specific (PS)
+ * bit 8-15	: PDU Format (PF)
+ * bit 16	: Data Page (DP)
+ * bit 17	: Reserved (R)
+ * bit 19-31	: set to zero
+ */
+typedef __u32 pgn_t;
+
+/* J1939 Priority
+ *
+ * bit 0-2	: Priority (P)
+ * bit 3-7	: set to zero
+ */
+typedef __u8 priority_t;
+
+/* J1939 NAME
+ *
+ * bit 0-20	: Identity Number
+ * bit 21-31	: Manufacturer Code
+ * bit 32-34	: ECU Instance
+ * bit 35-39	: Function Instance
+ * bit 40-47	: Function
+ * bit 48	: Reserved
+ * bit 49-55	: Vehicle System
+ * bit 56-59	: Vehicle System Instance
+ * bit 60-62	: Industry Group
+ * bit 63	: Arbitrary Address Capable
+ */
+typedef __u64 name_t;
+
+/* J1939 socket options */
+#define SOL_CAN_J1939 (SOL_CAN_BASE + CAN_J1939)
+enum {
+	SO_J1939_FILTER = 1,	/* set filters */
+	SO_J1939_PROMISC = 2,	/* set/clr promiscuous mode */
+	SO_J1939_SEND_PRIO = 3,
+	SO_J1939_ERRQUEUE = 4,
+};
+
+enum {
+	SCM_J1939_DEST_ADDR = 1,
+	SCM_J1939_DEST_NAME = 2,
+	SCM_J1939_PRIO = 3,
+	SCM_J1939_ERRQUEUE = 4,
+};
+
+enum {
+	J1939_NLA_PAD,
+	J1939_NLA_BYTES_ACKED,
+};
+
+enum {
+	J1939_EE_INFO_NONE,
+	J1939_EE_INFO_TX_ABORT,
+};
+
+struct j1939_filter {
+	name_t name;
+	name_t name_mask;
+	pgn_t pgn;
+	pgn_t pgn_mask;
+	__u8 addr;
+	__u8 addr_mask;
+};
+
+#define J1939_FILTER_MAX 512 /* maximum number of j1939_filter set via setsockopt() */
+
+#endif /* !_UAPI_CAN_J1939_H_ */
diff --git a/net/can/Kconfig b/net/can/Kconfig
index d4319aa3e1b1..d77042752457 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -53,6 +53,8 @@ config CAN_GW
 	  They can be modified with AND/OR/XOR/SET operations as configured
 	  by the netlink configuration interface known e.g. from iptables.
 
+source "net/can/j1939/Kconfig"
+
 source "drivers/net/can/Kconfig"
 
 endif
diff --git a/net/can/Makefile b/net/can/Makefile
index 1242bbbfe57f..08bd217fc051 100644
--- a/net/can/Makefile
+++ b/net/can/Makefile
@@ -15,3 +15,5 @@ can-bcm-y		:= bcm.o
 
 obj-$(CONFIG_CAN_GW)	+= can-gw.o
 can-gw-y		:= gw.o
+
+obj-$(CONFIG_CAN_J1939)	+= j1939/
diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig
new file mode 100644
index 000000000000..2998298b71ec
--- /dev/null
+++ b/net/can/j1939/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# SAE J1939 network layer core configuration
+#
+
+config CAN_J1939
+	tristate "SAE J1939"
+	depends on CAN
+	help
+	  SAE J1939
+	  Say Y to have in-kernel support for j1939 socket type. This
+	  allows communication according to SAE j1939.
+	  The relevant parts in kernel are
+	  SAE j1939-21 (datalink & transport protocol)
+	  & SAE j1939-81 (network management).
diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile
new file mode 100644
index 000000000000..19181bdae173
--- /dev/null
+++ b/net/can/j1939/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_CAN_J1939) += can-j1939.o
+
+can-j1939-objs := \
+	address-claim.o \
+	bus.o \
+	main.o \
+	socket.o \
+	transport.o
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
new file mode 100644
index 000000000000..f33c47327927
--- /dev/null
+++ b/net/can/j1939/address-claim.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* J1939 Address Claiming.
+ * Address Claiming in the kernel
+ * - keeps track of the AC states of ECU's,
+ * - resolves NAME<=>SA taking into account the AC states of ECU's.
+ *
+ * All Address Claim msgs (including host-originated msg) are processed
+ * at the receive path (a sent msg is always received again via CAN echo).
+ * As such, the processing of AC msgs is done in the order on which msgs
+ * are sent on the bus.
+ *
+ * This module doesn't send msgs itself (e.g. replies on Address Claims),
+ * this is the responsibility of a user space application or daemon.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "j1939-priv.h"
+
+static inline name_t j1939_skb_to_name(const struct sk_buff *skb)
+{
+	return le64_to_cpup((__le64 *)skb->data);
+}
+
+static inline bool j1939_ac_msg_is_request(struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int req_pgn;
+
+	if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST)
+		return false;
+
+	req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16);
+
+	return req_pgn == J1939_PGN_ADDRESS_CLAIMED;
+}
+
+static int j1939_ac_verify_outgoing(struct j1939_priv *priv,
+				    struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+	if (skb->len != 8) {
+		netdev_notice(priv->ndev, "tx address claim with dlc %i\n",
+			      skb->len);
+		return -EPROTO;
+	}
+
+	if (skcb->addr.src_name != j1939_skb_to_name(skb)) {
+		netdev_notice(priv->ndev, "tx address claim with different name\n");
+		return -EPROTO;
+	}
+
+	if (skcb->addr.sa == J1939_NO_ADDR) {
+		netdev_notice(priv->ndev, "tx address claim with broadcast sa\n");
+		return -EPROTO;
+	}
+
+	/* ac must always be a broadcast */
+	if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) {
+		netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n");
+		return -EPROTO;
+	}
+	return 0;
+}
+
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int ret;
+	u8 addr;
+
+	/* network mgmt: address claiming msgs */
+	if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+		struct j1939_ecu *ecu;
+
+		ret = j1939_ac_verify_outgoing(priv, skb);
+		/* return both when failure & when successful */
+		if (ret < 0)
+			return ret;
+		ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name);
+		if (!ecu)
+			return -ENODEV;
+
+		if (ecu->addr != skcb->addr.sa)
+			/* hold further traffic for ecu, remove from parent */
+			j1939_ecu_unmap(ecu);
+		j1939_ecu_put(ecu);
+	} else if (skcb->addr.src_name) {
+		/* assign source address */
+		addr = j1939_name_to_addr(priv, skcb->addr.src_name);
+		if (!j1939_address_is_unicast(addr) &&
+		    !j1939_ac_msg_is_request(skb)) {
+			netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n",
+				      skcb->addr.src_name);
+			return -EADDRNOTAVAIL;
+		}
+		skcb->addr.sa = addr;
+	}
+
+	/* assign destination address */
+	if (skcb->addr.dst_name) {
+		addr = j1939_name_to_addr(priv, skcb->addr.dst_name);
+		if (!j1939_address_is_unicast(addr)) {
+			netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n",
+				      skcb->addr.dst_name);
+			return -EADDRNOTAVAIL;
+		}
+		skcb->addr.da = addr;
+	}
+	return 0;
+}
+
+static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_ecu *ecu, *prev;
+	name_t name;
+
+	if (skb->len != 8) {
+		netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n",
+			      skb->len);
+		return;
+	}
+
+	name = j1939_skb_to_name(skb);
+	skcb->addr.src_name = name;
+	if (!name) {
+		netdev_notice(priv->ndev, "rx address claim without name\n");
+		return;
+	}
+
+	if (!j1939_address_is_valid(skcb->addr.sa)) {
+		netdev_notice(priv->ndev, "rx address claim with broadcast sa\n");
+		return;
+	}
+
+	write_lock_bh(&priv->lock);
+
+	/* Few words on the ECU ref counting:
+	 *
+	 * First we get an ECU handle, either with
+	 * j1939_ecu_get_by_name_locked() (increments the ref counter)
+	 * or j1939_ecu_create_locked() (initializes an ECU object
+	 * with a ref counter of 1).
+	 *
+	 * j1939_ecu_unmap_locked() will decrement the ref counter,
+	 * but only if the ECU was mapped before. So "ecu" still
+	 * belongs to us.
+	 *
+	 * j1939_ecu_timer_start() will increment the ref counter
+	 * before it starts the timer, so we can put the ecu when
+	 * leaving this function.
+	 */
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
+		ecu = j1939_ecu_create_locked(priv, name);
+
+	if (IS_ERR_OR_NULL(ecu))
+		goto out_unlock_bh;
+
+	/* cancel pending (previous) address claim */
+	j1939_ecu_timer_cancel(ecu);
+
+	if (j1939_address_is_idle(skcb->addr.sa)) {
+		j1939_ecu_unmap_locked(ecu);
+		goto out_ecu_put;
+	}
+
+	/* save new addr */
+	if (ecu->addr != skcb->addr.sa)
+		j1939_ecu_unmap_locked(ecu);
+	ecu->addr = skcb->addr.sa;
+
+	prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa);
+	if (prev) {
+		if (ecu->name > prev->name) {
+			j1939_ecu_unmap_locked(ecu);
+			j1939_ecu_put(prev);
+			goto out_ecu_put;
+		} else {
+			/* kick prev if less or equal */
+			j1939_ecu_unmap_locked(prev);
+			j1939_ecu_put(prev);
+		}
+	}
+
+	j1939_ecu_timer_start(ecu);
+ out_ecu_put:
+	j1939_ecu_put(ecu);
+ out_unlock_bh:
+	write_unlock_bh(&priv->lock);
+}
+
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_ecu *ecu;
+
+	/* network mgmt */
+	if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) {
+		j1939_ac_process(priv, skb);
+	} else if (j1939_address_is_unicast(skcb->addr.sa)) {
+		/* assign source name */
+		ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa);
+		if (ecu) {
+			skcb->addr.src_name = ecu->name;
+			j1939_ecu_put(ecu);
+		}
+	}
+
+	/* assign destination name */
+	ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da);
+	if (ecu) {
+		skcb->addr.dst_name = ecu->name;
+		j1939_ecu_put(ecu);
+	}
+}
diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c
new file mode 100644
index 000000000000..486687901602
--- /dev/null
+++ b/net/can/j1939/bus.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* bus for j1939 remote devices
+ * Since rtnetlink, no real bus is used.
+ */
+
+#include <net/sock.h>
+
+#include "j1939-priv.h"
+
+static void __j1939_ecu_release(struct kref *kref)
+{
+	struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref);
+	struct j1939_priv *priv = ecu->priv;
+
+	list_del(&ecu->list);
+	kfree(ecu);
+	j1939_priv_put(priv);
+}
+
+void j1939_ecu_put(struct j1939_ecu *ecu)
+{
+	kref_put(&ecu->kref, __j1939_ecu_release);
+}
+
+static void j1939_ecu_get(struct j1939_ecu *ecu)
+{
+	kref_get(&ecu->kref);
+}
+
+static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+
+	lockdep_assert_held(&priv->lock);
+
+	return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu;
+}
+
+/* ECU device interface */
+/* map ECU to a bus address space */
+static void j1939_ecu_map_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+	struct j1939_addr_ent *ent;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(ecu->addr))
+		return;
+
+	ent = &priv->ents[ecu->addr];
+
+	if (ent->ecu) {
+		netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n",
+			    ecu->addr, ecu->name);
+		return;
+	}
+
+	j1939_ecu_get(ecu);
+	ent->ecu = ecu;
+	ent->nusers += ecu->nusers;
+}
+
+/* unmap ECU from a bus address space */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu)
+{
+	struct j1939_priv *priv = ecu->priv;
+	struct j1939_addr_ent *ent;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(ecu->addr))
+		return;
+
+	if (!j1939_ecu_is_mapped_locked(ecu))
+		return;
+
+	ent = &priv->ents[ecu->addr];
+	ent->ecu = NULL;
+	ent->nusers -= ecu->nusers;
+	j1939_ecu_put(ecu);
+}
+
+void j1939_ecu_unmap(struct j1939_ecu *ecu)
+{
+	write_lock_bh(&ecu->priv->lock);
+	j1939_ecu_unmap_locked(ecu);
+	write_unlock_bh(&ecu->priv->lock);
+}
+
+void j1939_ecu_unmap_all(struct j1939_priv *priv)
+{
+	int i;
+
+	write_lock_bh(&priv->lock);
+	for (i = 0; i < ARRAY_SIZE(priv->ents); i++)
+		if (priv->ents[i].ecu)
+			j1939_ecu_unmap_locked(priv->ents[i].ecu);
+	write_unlock_bh(&priv->lock);
+}
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu)
+{
+	/* The ECU is held here and released in the
+	 * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel().
+	 */
+	j1939_ecu_get(ecu);
+
+	/* Schedule timer in 250 msec to commit address change. */
+	hrtimer_start(&ecu->ac_timer, ms_to_ktime(250),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu)
+{
+	if (hrtimer_cancel(&ecu->ac_timer))
+		j1939_ecu_put(ecu);
+}
+
+static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer)
+{
+	struct j1939_ecu *ecu =
+		container_of(hrtimer, struct j1939_ecu, ac_timer);
+	struct j1939_priv *priv = ecu->priv;
+
+	write_lock_bh(&priv->lock);
+	/* TODO: can we test if ecu->addr is unicast before starting
+	 * the timer?
+	 */
+	j1939_ecu_map_locked(ecu);
+
+	/* The corresponding j1939_ecu_get() is in
+	 * j1939_ecu_timer_start().
+	 */
+	j1939_ecu_put(ecu);
+	write_unlock_bh(&priv->lock);
+
+	return HRTIMER_NORESTART;
+}
+
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	ecu = kzalloc(sizeof(*ecu), gfp_any());
+	if (!ecu)
+		return ERR_PTR(-ENOMEM);
+	kref_init(&ecu->kref);
+	ecu->addr = J1939_IDLE_ADDR;
+	ecu->name = name;
+
+	hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+	ecu->ac_timer.function = j1939_ecu_timer_handler;
+	INIT_LIST_HEAD(&ecu->list);
+
+	j1939_priv_get(priv);
+	ecu->priv = priv;
+	list_add_tail(&ecu->list, &priv->ecus);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+						u8 addr)
+{
+	lockdep_assert_held(&priv->lock);
+
+	return priv->ents[addr].ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!j1939_address_is_unicast(addr))
+		return NULL;
+
+	ecu = j1939_ecu_find_by_addr_locked(priv, addr);
+	if (ecu)
+		j1939_ecu_get(ecu);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr)
+{
+	struct j1939_ecu *ecu;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_get_by_addr_locked(priv, addr);
+	read_unlock_bh(&priv->lock);
+
+	return ecu;
+}
+
+/* get pointer to ecu without increasing ref counter */
+static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv,
+						       name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	list_for_each_entry(ecu, &priv->ecus, list) {
+		if (ecu->name == name)
+			return ecu;
+	}
+
+	return NULL;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+					       name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	lockdep_assert_held(&priv->lock);
+
+	if (!name)
+		return NULL;
+
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (ecu)
+		j1939_ecu_get(ecu);
+
+	return ecu;
+}
+
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	read_unlock_bh(&priv->lock);
+
+	return ecu;
+}
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name)
+{
+	struct j1939_ecu *ecu;
+	int addr = J1939_IDLE_ADDR;
+
+	if (!name)
+		return J1939_NO_ADDR;
+
+	read_lock_bh(&priv->lock);
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (ecu && j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's SA is registered */
+		addr = ecu->addr;
+
+	read_unlock_bh(&priv->lock);
+
+	return addr;
+}
+
+/* TX addr/name accounting
+ * Transport protocol needs to know if a SA is local or not
+ * These functions originate from userspace manipulating sockets,
+ * so locking is straigforward
+ */
+
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa)
+{
+	struct j1939_ecu *ecu;
+	int err = 0;
+
+	write_lock_bh(&priv->lock);
+
+	if (j1939_address_is_unicast(sa))
+		priv->ents[sa].nusers++;
+
+	if (!name)
+		goto done;
+
+	ecu = j1939_ecu_get_by_name_locked(priv, name);
+	if (!ecu)
+		ecu = j1939_ecu_create_locked(priv, name);
+	err = PTR_ERR_OR_ZERO(ecu);
+	if (err)
+		goto done;
+
+	ecu->nusers++;
+	/* TODO: do we care if ecu->addr != sa? */
+	if (j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's sa is active already */
+		priv->ents[ecu->addr].nusers++;
+
+ done:
+	write_unlock_bh(&priv->lock);
+
+	return err;
+}
+
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa)
+{
+	struct j1939_ecu *ecu;
+
+	write_lock_bh(&priv->lock);
+
+	if (j1939_address_is_unicast(sa))
+		priv->ents[sa].nusers--;
+
+	if (!name)
+		goto done;
+
+	ecu = j1939_ecu_find_by_name_locked(priv, name);
+	if (WARN_ON_ONCE(!ecu))
+		goto done;
+
+	ecu->nusers--;
+	/* TODO: do we care if ecu->addr != sa? */
+	if (j1939_ecu_is_mapped_locked(ecu))
+		/* ecu's sa is active already */
+		priv->ents[ecu->addr].nusers--;
+	j1939_ecu_put(ecu);
+
+ done:
+	write_unlock_bh(&priv->lock);
+}
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
new file mode 100644
index 000000000000..12369b604ce9
--- /dev/null
+++ b/net/can/j1939/j1939-priv.h
@@ -0,0 +1,338 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#ifndef _J1939_PRIV_H_
+#define _J1939_PRIV_H_
+
+#include <linux/can/j1939.h>
+#include <net/sock.h>
+
+/* Timeout to receive the abort signal over loop back. In case CAN
+ * bus is open, the timeout should be triggered.
+ */
+#define J1939_XTP_ABORT_TIMEOUT_MS 500
+#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000)
+
+struct j1939_session;
+enum j1939_sk_errqueue_type {
+	J1939_ERRQUEUE_ACK,
+	J1939_ERRQUEUE_SCHED,
+	J1939_ERRQUEUE_ABORT,
+};
+
+/* j1939 devices */
+struct j1939_ecu {
+	struct list_head list;
+	name_t name;
+	u8 addr;
+
+	/* indicates that this ecu successfully claimed @sa as its address */
+	struct hrtimer ac_timer;
+	struct kref kref;
+	struct j1939_priv *priv;
+
+	/* count users, to help transport protocol decide for interaction */
+	int nusers;
+};
+
+struct j1939_priv {
+	struct list_head ecus;
+	/* local list entry in priv
+	 * These allow irq (& softirq) context lookups on j1939 devices
+	 * This approach (separate lists) is done as the other 2 alternatives
+	 * are not easier or even wrong
+	 * 1) using the pure kobject methods involves mutexes, which are not
+	 *    allowed in irq context.
+	 * 2) duplicating data structures would require a lot of synchronization
+	 *    code
+	 * usage:
+	 */
+
+	/* segments need a lock to protect the above list */
+	rwlock_t lock;
+
+	struct net_device *ndev;
+
+	/* list of 256 ecu ptrs, that cache the claimed addresses.
+	 * also protected by the above lock
+	 */
+	struct j1939_addr_ent {
+		struct j1939_ecu *ecu;
+		/* count users, to help transport protocol */
+		int nusers;
+	} ents[256];
+
+	struct kref kref;
+
+	/* List of active sessions to prevent start of conflicting
+	 * one.
+	 *
+	 * Do not start two sessions of same type, addresses and
+	 * direction.
+	 */
+	struct list_head active_session_list;
+
+	/* protects active_session_list */
+	spinlock_t active_session_list_lock;
+
+	unsigned int tp_max_packet_size;
+
+	/* lock for j1939_socks list */
+	spinlock_t j1939_socks_lock;
+	struct list_head j1939_socks;
+
+	struct kref rx_kref;
+};
+
+void j1939_ecu_put(struct j1939_ecu *ecu);
+
+/* keep the cache of what is local */
+int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa);
+void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa);
+
+static inline bool j1939_address_is_unicast(u8 addr)
+{
+	return addr <= J1939_MAX_UNICAST_ADDR;
+}
+
+static inline bool j1939_address_is_idle(u8 addr)
+{
+	return addr == J1939_IDLE_ADDR;
+}
+
+static inline bool j1939_address_is_valid(u8 addr)
+{
+	return addr != J1939_NO_ADDR;
+}
+
+static inline bool j1939_pgn_is_pdu1(pgn_t pgn)
+{
+	/* ignore dp & res bits for this */
+	return (pgn & 0xff00) < 0xf000;
+}
+
+/* utility to correctly unmap an ECU */
+void j1939_ecu_unmap_locked(struct j1939_ecu *ecu);
+void j1939_ecu_unmap(struct j1939_ecu *ecu);
+
+u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv,
+						u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv,
+					       u8 addr);
+struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name);
+struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv,
+					       name_t name);
+
+enum j1939_transfer_type {
+	J1939_TP,
+	J1939_ETP,
+	J1939_SIMPLE,
+};
+
+struct j1939_addr {
+	name_t src_name;
+	name_t dst_name;
+	pgn_t pgn;
+
+	u8 sa;
+	u8 da;
+
+	u8 type;
+};
+
+/* control buffer of the sk_buff */
+struct j1939_sk_buff_cb {
+	/* Offset in bytes within one ETP session */
+	u32 offset;
+
+	/* for tx, MSG_SYN will be used to sync on sockets */
+	u32 msg_flags;
+	u32 tskey;
+
+	struct j1939_addr addr;
+
+	/* Flags for quick lookups during skb processing.
+	 * These are set in the receive path only.
+	 */
+#define J1939_ECU_LOCAL_SRC BIT(0)
+#define J1939_ECU_LOCAL_DST BIT(1)
+	u8 flags;
+
+	priority_t priority;
+};
+
+static inline
+struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb));
+
+	return (struct j1939_sk_buff_cb *)skb->cb;
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb);
+bool j1939_sk_recv_match(struct j1939_priv *priv,
+			 struct j1939_sk_buff_cb *skcb);
+void j1939_sk_send_loop_abort(struct sock *sk, int err);
+void j1939_sk_errqueue(struct j1939_session *session,
+		       enum j1939_sk_errqueue_type type);
+void j1939_sk_queue_activate_next(struct j1939_session *session);
+
+/* stack entries */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+				    struct sk_buff *skb, size_t size);
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb);
+int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb);
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb);
+
+/* network management */
+struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name);
+
+void j1939_ecu_timer_start(struct j1939_ecu *ecu);
+void j1939_ecu_timer_cancel(struct j1939_ecu *ecu);
+void j1939_ecu_unmap_all(struct j1939_priv *priv);
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev);
+void j1939_netdev_stop(struct j1939_priv *priv);
+
+void j1939_priv_put(struct j1939_priv *priv);
+void j1939_priv_get(struct j1939_priv *priv);
+
+/* notify/alert all j1939 sockets bound to ifindex */
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv);
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk);
+void j1939_tp_init(struct j1939_priv *priv);
+
+/* decrement pending skb for a j1939 socket */
+void j1939_sock_pending_del(struct sock *sk);
+
+enum j1939_session_state {
+	J1939_SESSION_NEW,
+	J1939_SESSION_ACTIVE,
+	/* waiting for abort signal on the bus */
+	J1939_SESSION_WAITING_ABORT,
+	J1939_SESSION_ACTIVE_MAX,
+	J1939_SESSION_DONE,
+};
+
+struct j1939_session {
+	struct j1939_priv *priv;
+	struct list_head active_session_list_entry;
+	struct list_head sk_session_queue_entry;
+	struct kref kref;
+	struct sock *sk;
+
+	/* ifindex, src, dst, pgn define the session block
+	 * the are _never_ modified after insertion in the list
+	 * this decreases locking problems a _lot_
+	 */
+	struct j1939_sk_buff_cb skcb;
+	struct sk_buff_head skb_queue;
+
+	/* all tx related stuff (last_txcmd, pkt.tx)
+	 * is protected (modified only) with the txtimer hrtimer
+	 * 'total' & 'block' are never changed,
+	 * last_cmd, last & block are protected by ->lock
+	 * this means that the tx may run after cts is received that should
+	 * have stopped tx, but this time discrepancy is never avoided anyhow
+	 */
+	u8 last_cmd, last_txcmd;
+	bool transmission;
+	bool extd;
+	/* Total message size, number of bytes */
+	unsigned int total_message_size;
+	/* Total number of bytes queue from socket to the session */
+	unsigned int total_queued_size;
+	unsigned int tx_retry;
+
+	int err;
+	u32 tskey;
+	enum j1939_session_state state;
+
+	/* Packets counters for a (extended) transfer session. The packet is
+	 * maximal of 7 bytes.
+	 */
+	struct {
+		/* total - total number of packets for this session */
+		unsigned int total;
+		/* last - last packet of a transfer block after which
+		 * responder should send ETP.CM_CTS and originator
+		 * ETP.CM_DPO
+		 */
+		unsigned int last;
+		/* tx - number of packets send by originator node.
+		 * this counter can be set back if responder node
+		 * didn't received all packets send by originator.
+		 */
+		unsigned int tx;
+		unsigned int tx_acked;
+		/* rx - number of packets received */
+		unsigned int rx;
+		/* block - amount of packets expected in one block */
+		unsigned int block;
+		/* dpo - ETP.CM_DPO, Data Packet Offset */
+		unsigned int dpo;
+	} pkt;
+	struct hrtimer txtimer, rxtimer;
+};
+
+struct j1939_sock {
+	struct sock sk; /* must be first to skip with memset */
+	struct j1939_priv *priv;
+	struct list_head list;
+
+#define J1939_SOCK_BOUND BIT(0)
+#define J1939_SOCK_CONNECTED BIT(1)
+#define J1939_SOCK_PROMISC BIT(2)
+#define J1939_SOCK_ERRQUEUE BIT(3)
+	int state;
+
+	int ifindex;
+	struct j1939_addr addr;
+	struct j1939_filter *filters;
+	int nfilters;
+	pgn_t pgn_rx_filter;
+
+	/* j1939 may emit equal PGN (!= equal CAN-id's) out of order
+	 * when transport protocol comes in.
+	 * To allow emitting in order, keep a 'pending' nr. of packets
+	 */
+	atomic_t skb_pending;
+	wait_queue_head_t waitq;
+
+	/* lock for the sk_session_queue list */
+	spinlock_t sk_session_queue_lock;
+	struct list_head sk_session_queue;
+};
+
+static inline struct j1939_sock *j1939_sk(const struct sock *sk)
+{
+	return container_of(sk, struct j1939_sock, sk);
+}
+
+void j1939_session_get(struct j1939_session *session);
+void j1939_session_put(struct j1939_session *session);
+void j1939_session_skb_queue(struct j1939_session *session,
+			     struct sk_buff *skb);
+int j1939_session_activate(struct j1939_session *session);
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
+void j1939_session_timers_cancel(struct j1939_session *session);
+
+#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
+#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
+
+#define J1939_REGULAR 0
+#define J1939_EXTENDED 1
+
+/* CAN protocol */
+extern const struct can_proto j1939_can_proto;
+
+#endif /* _J1939_PRIV_H_ */
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
new file mode 100644
index 000000000000..def2f813ffce
--- /dev/null
+++ b/net/can/j1939/main.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+/* Core of can-j1939 that links j1939 to CAN. */
+
+#include <linux/can/can-ml.h>
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/if_arp.h>
+#include <linux/module.h>
+
+#include "j1939-priv.h"
+
+MODULE_DESCRIPTION("PF_CAN SAE J1939");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)");
+MODULE_ALIAS("can-proto-" __stringify(CAN_J1939));
+
+/* LOWLEVEL CAN interface */
+
+/* CAN_HDR: #bytes before can_frame data part */
+#define J1939_CAN_HDR (offsetof(struct can_frame, data))
+
+/* CAN_FTR: #bytes beyond data part */
+#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \
+		 sizeof(((struct can_frame *)0)->data))
+
+/* lowest layer */
+static void j1939_can_recv(struct sk_buff *iskb, void *data)
+{
+	struct j1939_priv *priv = data;
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb, *iskcb;
+	struct can_frame *cf;
+
+	/* create a copy of the skb
+	 * j1939 only delivers the real data bytes,
+	 * the header goes into sockaddr.
+	 * j1939 may not touch the incoming skb in such way
+	 */
+	skb = skb_clone(iskb, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	can_skb_set_owner(skb, iskb->sk);
+
+	/* get a pointer to the header of the skb
+	 * the skb payload (pointer) is moved, so that the next skb_data
+	 * returns the actual payload
+	 */
+	cf = (void *)skb->data;
+	skb_pull(skb, J1939_CAN_HDR);
+
+	/* fix length, set to dlc, with 8 maximum */
+	skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8));
+
+	/* set addr */
+	skcb = j1939_skb_to_cb(skb);
+	memset(skcb, 0, sizeof(*skcb));
+
+	iskcb = j1939_skb_to_cb(iskb);
+	skcb->tskey = iskcb->tskey;
+	skcb->priority = (cf->can_id >> 26) & 0x7;
+	skcb->addr.sa = cf->can_id;
+	skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX;
+	/* set default message type */
+	skcb->addr.type = J1939_TP;
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn)) {
+		/* Type 1: with destination address */
+		skcb->addr.da = skcb->addr.pgn;
+		/* normalize pgn: strip dst address */
+		skcb->addr.pgn &= 0x3ff00;
+	} else {
+		/* set broadcast address */
+		skcb->addr.da = J1939_NO_ADDR;
+	}
+
+	/* update localflags */
+	read_lock_bh(&priv->lock);
+	if (j1939_address_is_unicast(skcb->addr.sa) &&
+	    priv->ents[skcb->addr.sa].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_SRC;
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+	read_unlock_bh(&priv->lock);
+
+	/* deliver into the j1939 stack ... */
+	j1939_ac_recv(priv, skb);
+
+	if (j1939_tp_recv(priv, skb))
+		/* this means the transport layer processed the message */
+		goto done;
+
+	j1939_simple_recv(priv, skb);
+	j1939_sk_recv(priv, skb);
+ done:
+	kfree_skb(skb);
+}
+
+/* NETDEV MANAGEMENT */
+
+/* values for can_rx_(un)register */
+#define J1939_CAN_ID CAN_EFF_FLAG
+#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG)
+
+static DEFINE_SPINLOCK(j1939_netdev_lock);
+
+static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return NULL;
+
+	rwlock_init(&priv->lock);
+	INIT_LIST_HEAD(&priv->ecus);
+	priv->ndev = ndev;
+	kref_init(&priv->kref);
+	kref_init(&priv->rx_kref);
+	dev_hold(ndev);
+
+	netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
+
+	return priv;
+}
+
+static inline void j1939_priv_set(struct net_device *ndev,
+				  struct j1939_priv *priv)
+{
+	struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+	can_ml_priv->j1939_priv = priv;
+}
+
+static void __j1939_priv_release(struct kref *kref)
+{
+	struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref);
+	struct net_device *ndev = priv->ndev;
+
+	netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+
+	dev_put(ndev);
+	kfree(priv);
+}
+
+void j1939_priv_put(struct j1939_priv *priv)
+{
+	kref_put(&priv->kref, __j1939_priv_release);
+}
+
+void j1939_priv_get(struct j1939_priv *priv)
+{
+	kref_get(&priv->kref);
+}
+
+static int j1939_can_rx_register(struct j1939_priv *priv)
+{
+	struct net_device *ndev = priv->ndev;
+	int ret;
+
+	j1939_priv_get(priv);
+	ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+			      j1939_can_recv, priv, "j1939", NULL);
+	if (ret < 0) {
+		j1939_priv_put(priv);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void j1939_can_rx_unregister(struct j1939_priv *priv)
+{
+	struct net_device *ndev = priv->ndev;
+
+	can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK,
+			  j1939_can_recv, priv);
+
+	j1939_priv_put(priv);
+}
+
+static void __j1939_rx_release(struct kref *kref)
+	__releases(&j1939_netdev_lock)
+{
+	struct j1939_priv *priv = container_of(kref, struct j1939_priv,
+					       rx_kref);
+
+	j1939_can_rx_unregister(priv);
+	j1939_ecu_unmap_all(priv);
+	j1939_priv_set(priv->ndev, NULL);
+	spin_unlock(&j1939_netdev_lock);
+}
+
+/* get pointer to priv without increasing ref counter */
+static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
+{
+	struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+
+	return can_ml_priv->j1939_priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	lockdep_assert_held(&j1939_netdev_lock);
+
+	if (ndev->type != ARPHRD_CAN)
+		return NULL;
+
+	priv = j1939_ndev_to_priv(ndev);
+	if (priv)
+		j1939_priv_get(priv);
+
+	return priv;
+}
+
+static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev)
+{
+	struct j1939_priv *priv;
+
+	spin_lock(&j1939_netdev_lock);
+	priv = j1939_priv_get_by_ndev_locked(ndev);
+	spin_unlock(&j1939_netdev_lock);
+
+	return priv;
+}
+
+struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
+{
+	struct j1939_priv *priv, *priv_new;
+	int ret;
+
+	priv = j1939_priv_get_by_ndev(ndev);
+	if (priv) {
+		kref_get(&priv->rx_kref);
+		return priv;
+	}
+
+	priv = j1939_priv_create(ndev);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+
+	j1939_tp_init(priv);
+	spin_lock_init(&priv->j1939_socks_lock);
+	INIT_LIST_HEAD(&priv->j1939_socks);
+
+	spin_lock(&j1939_netdev_lock);
+	priv_new = j1939_priv_get_by_ndev_locked(ndev);
+	if (priv_new) {
+		/* Someone was faster than us, use their priv and roll
+		 * back our's.
+		 */
+		spin_unlock(&j1939_netdev_lock);
+		dev_put(ndev);
+		kfree(priv);
+		kref_get(&priv_new->rx_kref);
+		return priv_new;
+	}
+	j1939_priv_set(ndev, priv);
+	spin_unlock(&j1939_netdev_lock);
+
+	ret = j1939_can_rx_register(priv);
+	if (ret < 0)
+		goto out_priv_put;
+
+	return priv;
+
+ out_priv_put:
+	j1939_priv_set(ndev, NULL);
+	dev_put(ndev);
+	kfree(priv);
+
+	return ERR_PTR(ret);
+}
+
+void j1939_netdev_stop(struct j1939_priv *priv)
+{
+	kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock);
+	j1939_priv_put(priv);
+}
+
+int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	int ret, dlc;
+	canid_t canid;
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct can_frame *cf;
+
+	/* apply sanity checks */
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+		skcb->addr.pgn &= J1939_PGN_PDU1_MAX;
+	else
+		skcb->addr.pgn &= J1939_PGN_MAX;
+
+	if (skcb->priority > 7)
+		skcb->priority = 6;
+
+	ret = j1939_ac_fixup(priv, skb);
+	if (unlikely(ret))
+		goto failed;
+	dlc = skb->len;
+
+	/* re-claim the CAN_HDR from the SKB */
+	cf = skb_push(skb, J1939_CAN_HDR);
+
+	/* make it a full can frame again */
+	skb_put(skb, J1939_CAN_FTR + (8 - dlc));
+
+	canid = CAN_EFF_FLAG |
+		(skcb->priority << 26) |
+		(skcb->addr.pgn << 8) |
+		skcb->addr.sa;
+	if (j1939_pgn_is_pdu1(skcb->addr.pgn))
+		canid |= skcb->addr.da << 8;
+
+	cf->can_id = canid;
+	cf->can_dlc = dlc;
+
+	return can_send(skb, 1);
+
+ failed:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int j1939_netdev_notify(struct notifier_block *nb,
+			       unsigned long msg, void *data)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(data);
+	struct j1939_priv *priv;
+
+	priv = j1939_priv_get_by_ndev(ndev);
+	if (!priv)
+		goto notify_done;
+
+	if (ndev->type != ARPHRD_CAN)
+		goto notify_put;
+
+	switch (msg) {
+	case NETDEV_DOWN:
+		j1939_cancel_active_session(priv, NULL);
+		j1939_sk_netdev_event_netdown(priv);
+		j1939_ecu_unmap_all(priv);
+		break;
+	}
+
+notify_put:
+	j1939_priv_put(priv);
+
+notify_done:
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block j1939_netdev_notifier = {
+	.notifier_call = j1939_netdev_notify,
+};
+
+/* MODULE interface */
+static __init int j1939_module_init(void)
+{
+	int ret;
+
+	pr_info("can: SAE J1939\n");
+
+	ret = register_netdevice_notifier(&j1939_netdev_notifier);
+	if (ret)
+		goto fail_notifier;
+
+	ret = can_proto_register(&j1939_can_proto);
+	if (ret < 0) {
+		pr_err("can: registration of j1939 protocol failed\n");
+		goto fail_sk;
+	}
+
+	return 0;
+
+ fail_sk:
+	unregister_netdevice_notifier(&j1939_netdev_notifier);
+ fail_notifier:
+	return ret;
+}
+
+static __exit void j1939_module_exit(void)
+{
+	can_proto_unregister(&j1939_can_proto);
+
+	unregister_netdevice_notifier(&j1939_netdev_notifier);
+}
+
+module_init(j1939_module_init);
+module_exit(j1939_module_exit);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
new file mode 100644
index 000000000000..37c1040bcb9c
--- /dev/null
+++ b/net/can/j1939/socket.c
@@ -0,0 +1,1160 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Pieter Beyens <pieter.beyens@eia.be>
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/can/core.h>
+#include <linux/can/skb.h>
+#include <linux/errqueue.h>
+#include <linux/if_arp.h>
+
+#include "j1939-priv.h"
+
+#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939)
+
+/* conversion function between struct sock::sk_priority from linux and
+ * j1939 priority field
+ */
+static inline priority_t j1939_prio(u32 sk_priority)
+{
+	sk_priority = min(sk_priority, 7U);
+
+	return 7 - sk_priority;
+}
+
+static inline u32 j1939_to_sk_priority(priority_t prio)
+{
+	return 7 - prio;
+}
+
+/* function to see if pgn is to be evaluated */
+static inline bool j1939_pgn_is_valid(pgn_t pgn)
+{
+	return pgn <= J1939_PGN_MAX;
+}
+
+/* test function to avoid non-zero DA placeholder for pdu1 pgn's */
+static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn)
+{
+	if (j1939_pgn_is_pdu1(pgn))
+		return !(pgn & 0xff);
+	else
+		return true;
+}
+
+static inline void j1939_sock_pending_add(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	atomic_inc(&jsk->skb_pending);
+}
+
+static int j1939_sock_pending_get(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	return atomic_read(&jsk->skb_pending);
+}
+
+void j1939_sock_pending_del(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	/* atomic_dec_return returns the new value */
+	if (!atomic_dec_return(&jsk->skb_pending))
+		wake_up(&jsk->waitq);	/* no pending SKB's */
+}
+
+static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+	jsk->state |= J1939_SOCK_BOUND;
+	j1939_priv_get(priv);
+	jsk->priv = priv;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_add_tail(&jsk->list, &priv->j1939_socks);
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
+{
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_del_init(&jsk->list);
+	spin_unlock_bh(&priv->j1939_socks_lock);
+
+	jsk->priv = NULL;
+	j1939_priv_put(priv);
+	jsk->state &= ~J1939_SOCK_BOUND;
+}
+
+static bool j1939_sk_queue_session(struct j1939_session *session)
+{
+	struct j1939_sock *jsk = j1939_sk(session->sk);
+	bool empty;
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	empty = list_empty(&jsk->sk_session_queue);
+	j1939_session_get(session);
+	list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue);
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+	j1939_sock_pending_add(&jsk->sk);
+
+	return empty;
+}
+
+static struct
+j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk)
+{
+	struct j1939_session *session = NULL;
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	if (!list_empty(&jsk->sk_session_queue)) {
+		session = list_last_entry(&jsk->sk_session_queue,
+					  struct j1939_session,
+					  sk_session_queue_entry);
+		if (session->total_queued_size == session->total_message_size)
+			session = NULL;
+		else
+			j1939_session_get(session);
+	}
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+
+	return session;
+}
+
+static void j1939_sk_queue_drop_all(struct j1939_priv *priv,
+				    struct j1939_sock *jsk, int err)
+{
+	struct j1939_session *session, *tmp;
+
+	netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err);
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue,
+				 sk_session_queue_entry) {
+		list_del_init(&session->sk_session_queue_entry);
+		session->err = err;
+		j1939_session_put(session);
+	}
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static void j1939_sk_queue_activate_next_locked(struct j1939_session *session)
+{
+	struct j1939_sock *jsk;
+	struct j1939_session *first;
+	int err;
+
+	/* RX-Session don't have a socket (yet) */
+	if (!session->sk)
+		return;
+
+	jsk = j1939_sk(session->sk);
+	lockdep_assert_held(&jsk->sk_session_queue_lock);
+
+	err = session->err;
+
+	first = list_first_entry_or_null(&jsk->sk_session_queue,
+					 struct j1939_session,
+					 sk_session_queue_entry);
+
+	/* Some else has already activated the next session */
+	if (first != session)
+		return;
+
+activate_next:
+	list_del_init(&first->sk_session_queue_entry);
+	j1939_session_put(first);
+	first = list_first_entry_or_null(&jsk->sk_session_queue,
+					 struct j1939_session,
+					 sk_session_queue_entry);
+	if (!first)
+		return;
+
+	if (WARN_ON_ONCE(j1939_session_activate(first))) {
+		first->err = -EBUSY;
+		goto activate_next;
+	} else {
+		/* Give receiver some time (arbitrary chosen) to recover */
+		int time_ms = 0;
+
+		if (err)
+			time_ms = 10 + prandom_u32_max(16);
+
+		j1939_tp_schedule_txtimer(first, time_ms);
+	}
+}
+
+void j1939_sk_queue_activate_next(struct j1939_session *session)
+{
+	struct j1939_sock *jsk;
+
+	if (!session->sk)
+		return;
+
+	jsk = j1939_sk(session->sk);
+
+	spin_lock_bh(&jsk->sk_session_queue_lock);
+	j1939_sk_queue_activate_next_locked(session);
+	spin_unlock_bh(&jsk->sk_session_queue_lock);
+}
+
+static bool j1939_sk_match_dst(struct j1939_sock *jsk,
+			       const struct j1939_sk_buff_cb *skcb)
+{
+	if ((jsk->state & J1939_SOCK_PROMISC))
+		return true;
+
+	/* Destination address filter */
+	if (jsk->addr.src_name && skcb->addr.dst_name) {
+		if (jsk->addr.src_name != skcb->addr.dst_name)
+			return false;
+	} else {
+		/* receive (all sockets) if
+		 * - all packages that match our bind() address
+		 * - all broadcast on a socket if SO_BROADCAST
+		 *   is set
+		 */
+		if (j1939_address_is_unicast(skcb->addr.da)) {
+			if (jsk->addr.sa != skcb->addr.da)
+				return false;
+		} else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+			/* receiving broadcast without SO_BROADCAST
+			 * flag is not allowed
+			 */
+			return false;
+		}
+	}
+
+	/* Source address filter */
+	if (jsk->state & J1939_SOCK_CONNECTED) {
+		/* receive (all sockets) if
+		 * - all packages that match our connect() name or address
+		 */
+		if (jsk->addr.dst_name && skcb->addr.src_name) {
+			if (jsk->addr.dst_name != skcb->addr.src_name)
+				return false;
+		} else {
+			if (jsk->addr.da != skcb->addr.sa)
+				return false;
+		}
+	}
+
+	/* PGN filter */
+	if (j1939_pgn_is_valid(jsk->pgn_rx_filter) &&
+	    jsk->pgn_rx_filter != skcb->addr.pgn)
+		return false;
+
+	return true;
+}
+
+/* matches skb control buffer (addr) with a j1939 filter */
+static bool j1939_sk_match_filter(struct j1939_sock *jsk,
+				  const struct j1939_sk_buff_cb *skcb)
+{
+	const struct j1939_filter *f = jsk->filters;
+	int nfilter = jsk->nfilters;
+
+	if (!nfilter)
+		/* receive all when no filters are assigned */
+		return true;
+
+	for (; nfilter; ++f, --nfilter) {
+		if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
+			continue;
+		if ((skcb->addr.sa & f->addr_mask) != f->addr)
+			continue;
+		if ((skcb->addr.src_name & f->name_mask) != f->name)
+			continue;
+		return true;
+	}
+	return false;
+}
+
+static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
+				    const struct j1939_sk_buff_cb *skcb)
+{
+	if (!(jsk->state & J1939_SOCK_BOUND))
+		return false;
+
+	if (!j1939_sk_match_dst(jsk, skcb))
+		return false;
+
+	if (!j1939_sk_match_filter(jsk, skcb))
+		return false;
+
+	return true;
+}
+
+static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb)
+{
+	const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb);
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *skb;
+
+	if (oskb->sk == &jsk->sk)
+		return;
+
+	if (!j1939_sk_recv_match_one(jsk, oskcb))
+		return;
+
+	skb = skb_clone(oskb, GFP_ATOMIC);
+	if (!skb) {
+		pr_warn("skb clone failed\n");
+		return;
+	}
+	can_skb_set_owner(skb, oskb->sk);
+
+	skcb = j1939_skb_to_cb(skb);
+	skcb->msg_flags &= ~(MSG_DONTROUTE);
+	if (skb->sk)
+		skcb->msg_flags |= MSG_DONTROUTE;
+
+	if (sock_queue_rcv_skb(&jsk->sk, skb) < 0)
+		kfree_skb(skb);
+}
+
+bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
+{
+	struct j1939_sock *jsk;
+	bool match = false;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		match = j1939_sk_recv_match_one(jsk, skcb);
+		if (match)
+			break;
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+
+	return match;
+}
+
+void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sock *jsk;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		j1939_sk_recv_one(jsk, skb);
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_init(struct sock *sk)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+
+	/* Ensure that "sk" is first member in "struct j1939_sock", so that we
+	 * can skip it during memset().
+	 */
+	BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0);
+	memset((void *)jsk + sizeof(jsk->sk), 0x0,
+	       sizeof(*jsk) - sizeof(jsk->sk));
+
+	INIT_LIST_HEAD(&jsk->list);
+	init_waitqueue_head(&jsk->waitq);
+	jsk->sk.sk_priority = j1939_to_sk_priority(6);
+	jsk->sk.sk_reuse = 1; /* per default */
+	jsk->addr.sa = J1939_NO_ADDR;
+	jsk->addr.da = J1939_NO_ADDR;
+	jsk->addr.pgn = J1939_NO_PGN;
+	jsk->pgn_rx_filter = J1939_NO_PGN;
+	atomic_set(&jsk->skb_pending, 0);
+	spin_lock_init(&jsk->sk_session_queue_lock);
+	INIT_LIST_HEAD(&jsk->sk_session_queue);
+
+	return 0;
+}
+
+static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len)
+{
+	if (!addr)
+		return -EDESTADDRREQ;
+	if (len < J1939_MIN_NAMELEN)
+		return -EINVAL;
+	if (addr->can_family != AF_CAN)
+		return -EINVAL;
+	if (!addr->can_ifindex)
+		return -ENODEV;
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+	    !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct j1939_sock *jsk = j1939_sk(sock->sk);
+	struct j1939_priv *priv = jsk->priv;
+	struct sock *sk = sock->sk;
+	struct net *net = sock_net(sk);
+	int ret = 0;
+
+	ret = j1939_sk_sanity_check(addr, len);
+	if (ret)
+		return ret;
+
+	lock_sock(sock->sk);
+
+	/* Already bound to an interface? */
+	if (jsk->state & J1939_SOCK_BOUND) {
+		/* A re-bind() to a different interface is not
+		 * supported.
+		 */
+		if (jsk->ifindex != addr->can_ifindex) {
+			ret = -EINVAL;
+			goto out_release_sock;
+		}
+
+		/* drop old references */
+		j1939_jsk_del(priv, jsk);
+		j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
+	} else {
+		struct net_device *ndev;
+
+		ndev = dev_get_by_index(net, addr->can_ifindex);
+		if (!ndev) {
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
+		if (ndev->type != ARPHRD_CAN) {
+			dev_put(ndev);
+			ret = -ENODEV;
+			goto out_release_sock;
+		}
+
+		priv = j1939_netdev_start(ndev);
+		dev_put(ndev);
+		if (IS_ERR(priv)) {
+			ret = PTR_ERR(priv);
+			goto out_release_sock;
+		}
+
+		jsk->ifindex = addr->can_ifindex;
+	}
+
+	/* set default transmit pgn */
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+		jsk->pgn_rx_filter = addr->can_addr.j1939.pgn;
+	jsk->addr.src_name = addr->can_addr.j1939.name;
+	jsk->addr.sa = addr->can_addr.j1939.addr;
+
+	/* get new references */
+	ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa);
+	if (ret) {
+		j1939_netdev_stop(priv);
+		goto out_release_sock;
+	}
+
+	j1939_jsk_add(priv, jsk);
+
+ out_release_sock: /* fall through */
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr,
+			    int len, int flags)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct j1939_sock *jsk = j1939_sk(sock->sk);
+	int ret = 0;
+
+	ret = j1939_sk_sanity_check(addr, len);
+	if (ret)
+		return ret;
+
+	lock_sock(sock->sk);
+
+	/* bind() before connect() is mandatory */
+	if (!(jsk->state & J1939_SOCK_BOUND)) {
+		ret = -EINVAL;
+		goto out_release_sock;
+	}
+
+	/* A connect() to a different interface is not supported. */
+	if (jsk->ifindex != addr->can_ifindex) {
+		ret = -EINVAL;
+		goto out_release_sock;
+	}
+
+	if (!addr->can_addr.j1939.name &&
+	    addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+	    !sock_flag(&jsk->sk, SOCK_BROADCAST)) {
+		/* broadcast, but SO_BROADCAST not set */
+		ret = -EACCES;
+		goto out_release_sock;
+	}
+
+	jsk->addr.dst_name = addr->can_addr.j1939.name;
+	jsk->addr.da = addr->can_addr.j1939.addr;
+
+	if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+		jsk->addr.pgn = addr->can_addr.j1939.pgn;
+
+	jsk->state |= J1939_SOCK_CONNECTED;
+
+ out_release_sock: /* fall through */
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr,
+				       const struct j1939_sock *jsk, int peer)
+{
+	addr->can_family = AF_CAN;
+	addr->can_ifindex = jsk->ifindex;
+	addr->can_addr.j1939.pgn = jsk->addr.pgn;
+	if (peer) {
+		addr->can_addr.j1939.name = jsk->addr.dst_name;
+		addr->can_addr.j1939.addr = jsk->addr.da;
+	} else {
+		addr->can_addr.j1939.name = jsk->addr.src_name;
+		addr->can_addr.j1939.addr = jsk->addr.sa;
+	}
+}
+
+static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int peer)
+{
+	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int ret = 0;
+
+	lock_sock(sk);
+
+	if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) {
+		ret = -EADDRNOTAVAIL;
+		goto failure;
+	}
+
+	j1939_sk_sock2sockaddr_can(addr, jsk, peer);
+	ret = J1939_MIN_NAMELEN;
+
+ failure:
+	release_sock(sk);
+
+	return ret;
+}
+
+static int j1939_sk_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk;
+
+	if (!sk)
+		return 0;
+
+	jsk = j1939_sk(sk);
+	lock_sock(sk);
+
+	if (jsk->state & J1939_SOCK_BOUND) {
+		struct j1939_priv *priv = jsk->priv;
+
+		if (wait_event_interruptible(jsk->waitq,
+					     !j1939_sock_pending_get(&jsk->sk))) {
+			j1939_cancel_active_session(priv, sk);
+			j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN);
+		}
+
+		j1939_jsk_del(priv, jsk);
+
+		j1939_local_ecu_put(priv, jsk->addr.src_name,
+				    jsk->addr.sa);
+
+		j1939_netdev_stop(priv);
+	}
+
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	release_sock(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval,
+				    unsigned int optlen, int flag)
+{
+	int tmp;
+
+	if (optlen != sizeof(tmp))
+		return -EINVAL;
+	if (copy_from_user(&tmp, optval, optlen))
+		return -EFAULT;
+	lock_sock(&jsk->sk);
+	if (tmp)
+		jsk->state |= flag;
+	else
+		jsk->state &= ~flag;
+	release_sock(&jsk->sk);
+	return tmp;
+}
+
+static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int tmp, count = 0, ret = 0;
+	struct j1939_filter *filters = NULL, *ofilters;
+
+	if (level != SOL_CAN_J1939)
+		return -EINVAL;
+
+	switch (optname) {
+	case SO_J1939_FILTER:
+		if (optval) {
+			struct j1939_filter *f;
+			int c;
+
+			if (optlen % sizeof(*filters) != 0)
+				return -EINVAL;
+
+			if (optlen > J1939_FILTER_MAX *
+			    sizeof(struct j1939_filter))
+				return -EINVAL;
+
+			count = optlen / sizeof(*filters);
+			filters = memdup_user(optval, optlen);
+			if (IS_ERR(filters))
+				return PTR_ERR(filters);
+
+			for (f = filters, c = count; c; f++, c--) {
+				f->name &= f->name_mask;
+				f->pgn &= f->pgn_mask;
+				f->addr &= f->addr_mask;
+			}
+		}
+
+		lock_sock(&jsk->sk);
+		ofilters = jsk->filters;
+		jsk->filters = filters;
+		jsk->nfilters = count;
+		release_sock(&jsk->sk);
+		kfree(ofilters);
+		return 0;
+	case SO_J1939_PROMISC:
+		return j1939_sk_setsockopt_flag(jsk, optval, optlen,
+						J1939_SOCK_PROMISC);
+	case SO_J1939_ERRQUEUE:
+		ret = j1939_sk_setsockopt_flag(jsk, optval, optlen,
+					       J1939_SOCK_ERRQUEUE);
+		if (ret < 0)
+			return ret;
+
+		if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+			skb_queue_purge(&sk->sk_error_queue);
+		return ret;
+	case SO_J1939_SEND_PRIO:
+		if (optlen != sizeof(tmp))
+			return -EINVAL;
+		if (copy_from_user(&tmp, optval, optlen))
+			return -EFAULT;
+		if (tmp < 0 || tmp > 7)
+			return -EDOM;
+		if (tmp < 2 && !capable(CAP_NET_ADMIN))
+			return -EPERM;
+		lock_sock(&jsk->sk);
+		jsk->sk.sk_priority = j1939_to_sk_priority(tmp);
+		release_sock(&jsk->sk);
+		return 0;
+	default:
+		return -ENOPROTOOPT;
+	}
+}
+
+static int j1939_sk_getsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	int ret, ulen;
+	/* set defaults for using 'int' properties */
+	int tmp = 0;
+	int len = sizeof(tmp);
+	void *val = &tmp;
+
+	if (level != SOL_CAN_J1939)
+		return -EINVAL;
+	if (get_user(ulen, optlen))
+		return -EFAULT;
+	if (ulen < 0)
+		return -EINVAL;
+
+	lock_sock(&jsk->sk);
+	switch (optname) {
+	case SO_J1939_PROMISC:
+		tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0;
+		break;
+	case SO_J1939_ERRQUEUE:
+		tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0;
+		break;
+	case SO_J1939_SEND_PRIO:
+		tmp = j1939_prio(jsk->sk.sk_priority);
+		break;
+	default:
+		ret = -ENOPROTOOPT;
+		goto no_copy;
+	}
+
+	/* copy to user, based on 'len' & 'val'
+	 * but most sockopt's are 'int' properties, and have 'len' & 'val'
+	 * left unchanged, but instead modified 'tmp'
+	 */
+	if (len > ulen)
+		ret = -EFAULT;
+	else if (put_user(len, optlen))
+		ret = -EFAULT;
+	else if (copy_to_user(optval, val, len))
+		ret = -EFAULT;
+	else
+		ret = 0;
+ no_copy:
+	release_sock(&jsk->sk);
+	return ret;
+}
+
+static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+	int ret = 0;
+
+	if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE))
+		return -EINVAL;
+
+	if (flags & MSG_ERRQUEUE)
+		return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939,
+					  SCM_J1939_ERRQUEUE);
+
+	skb = skb_recv_datagram(sk, flags, 0, &ret);
+	if (!skb)
+		return ret;
+
+	if (size < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+	else
+		size = skb->len;
+
+	ret = memcpy_to_msg(msg, skb->data, size);
+	if (ret < 0) {
+		skb_free_datagram(sk, skb);
+		return ret;
+	}
+
+	skcb = j1939_skb_to_cb(skb);
+	if (j1939_address_is_valid(skcb->addr.da))
+		put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR,
+			 sizeof(skcb->addr.da), &skcb->addr.da);
+
+	if (skcb->addr.dst_name)
+		put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME,
+			 sizeof(skcb->addr.dst_name), &skcb->addr.dst_name);
+
+	put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO,
+		 sizeof(skcb->priority), &skcb->priority);
+
+	if (msg->msg_name) {
+		struct sockaddr_can *paddr = msg->msg_name;
+
+		msg->msg_namelen = J1939_MIN_NAMELEN;
+		memset(msg->msg_name, 0, msg->msg_namelen);
+		paddr->can_family = AF_CAN;
+		paddr->can_ifindex = skb->skb_iif;
+		paddr->can_addr.j1939.name = skcb->addr.src_name;
+		paddr->can_addr.j1939.addr = skcb->addr.sa;
+		paddr->can_addr.j1939.pgn = skcb->addr.pgn;
+	}
+
+	sock_recv_ts_and_drops(msg, sk, skb);
+	msg->msg_flags |= skcb->msg_flags;
+	skb_free_datagram(sk, skb);
+
+	return size;
+}
+
+static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev,
+					  struct sock *sk,
+					  struct msghdr *msg, size_t size,
+					  int *errcode)
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = sock_alloc_send_skb(sk,
+				  size +
+				  sizeof(struct can_frame) -
+				  sizeof(((struct can_frame *)NULL)->data) +
+				  sizeof(struct can_skb_priv),
+				  msg->msg_flags & MSG_DONTWAIT, &ret);
+	if (!skb)
+		goto failure;
+
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = ndev->ifindex;
+	can_skb_prv(skb)->skbcnt = 0;
+	skb_reserve(skb, offsetof(struct can_frame, data));
+
+	ret = memcpy_from_msg(skb_put(skb, size), msg, size);
+	if (ret < 0)
+		goto free_skb;
+
+	skb->dev = ndev;
+
+	skcb = j1939_skb_to_cb(skb);
+	memset(skcb, 0, sizeof(*skcb));
+	skcb->addr = jsk->addr;
+	skcb->priority = j1939_prio(sk->sk_priority);
+
+	if (msg->msg_name) {
+		struct sockaddr_can *addr = msg->msg_name;
+
+		if (addr->can_addr.j1939.name ||
+		    addr->can_addr.j1939.addr != J1939_NO_ADDR) {
+			skcb->addr.dst_name = addr->can_addr.j1939.name;
+			skcb->addr.da = addr->can_addr.j1939.addr;
+		}
+		if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn))
+			skcb->addr.pgn = addr->can_addr.j1939.pgn;
+	}
+
+	*errcode = ret;
+	return skb;
+
+free_skb:
+	kfree_skb(skb);
+failure:
+	*errcode = ret;
+	return NULL;
+}
+
+static size_t j1939_sk_opt_stats_get_size(void)
+{
+	return
+		nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */
+		0;
+}
+
+static struct sk_buff *
+j1939_sk_get_timestamping_opt_stats(struct j1939_session *session)
+{
+	struct sk_buff *stats;
+	u32 size;
+
+	stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC);
+	if (!stats)
+		return NULL;
+
+	if (session->skcb.addr.type == J1939_SIMPLE)
+		size = session->total_message_size;
+	else
+		size = min(session->pkt.tx_acked * 7,
+			   session->total_message_size);
+
+	nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size);
+
+	return stats;
+}
+
+void j1939_sk_errqueue(struct j1939_session *session,
+		       enum j1939_sk_errqueue_type type)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sock *sk = session->sk;
+	struct j1939_sock *jsk;
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb;
+	char *state = "UNK";
+	int err;
+
+	/* currently we have no sk for the RX session */
+	if (!sk)
+		return;
+
+	jsk = j1939_sk(sk);
+
+	if (!(jsk->state & J1939_SOCK_ERRQUEUE))
+		return;
+
+	skb = j1939_sk_get_timestamping_opt_stats(session);
+	if (!skb)
+		return;
+
+	skb->tstamp = ktime_get_real();
+
+	BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	switch (type) {
+	case J1939_ERRQUEUE_ACK:
+		if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+			return;
+
+		serr->ee.ee_errno = ENOMSG;
+		serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+		serr->ee.ee_info = SCM_TSTAMP_ACK;
+		state = "ACK";
+		break;
+	case J1939_ERRQUEUE_SCHED:
+		if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+			return;
+
+		serr->ee.ee_errno = ENOMSG;
+		serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+		serr->ee.ee_info = SCM_TSTAMP_SCHED;
+		state = "SCH";
+		break;
+	case J1939_ERRQUEUE_ABORT:
+		serr->ee.ee_errno = session->err;
+		serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+		serr->ee.ee_info = J1939_EE_INFO_TX_ABORT;
+		state = "ABT";
+		break;
+	default:
+		netdev_err(priv->ndev, "Unknown errqueue type %i\n", type);
+	}
+
+	serr->opt_stats = true;
+	if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+		serr->ee.ee_data = session->tskey;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
+		   __func__, session, session->tskey, state);
+	err = sock_queue_err_skb(sk, skb);
+
+	if (err)
+		kfree_skb(skb);
+};
+
+void j1939_sk_send_loop_abort(struct sock *sk, int err)
+{
+	sk->sk_err = err;
+
+	sk->sk_error_report(sk);
+}
+
+static int j1939_sk_send_loop(struct j1939_priv *priv,  struct sock *sk,
+			      struct msghdr *msg, size_t size)
+
+{
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_session *session = j1939_sk_get_incomplete_session(jsk);
+	struct sk_buff *skb;
+	size_t segment_size, todo_size;
+	int ret = 0;
+
+	if (session &&
+	    session->total_message_size != session->total_queued_size + size) {
+		j1939_session_put(session);
+		return -EIO;
+	}
+
+	todo_size = size;
+
+	while (todo_size) {
+		struct j1939_sk_buff_cb *skcb;
+
+		segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE,
+				     todo_size);
+
+		/* Allocate skb for one segment */
+		skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size,
+					 &ret);
+		if (ret)
+			break;
+
+		skcb = j1939_skb_to_cb(skb);
+
+		if (!session) {
+			/* at this point the size should be full size
+			 * of the session
+			 */
+			skcb->offset = 0;
+			session = j1939_tp_send(priv, skb, size);
+			if (IS_ERR(session)) {
+				ret = PTR_ERR(session);
+				goto kfree_skb;
+			}
+			if (j1939_sk_queue_session(session)) {
+				/* try to activate session if we a
+				 * fist in the queue
+				 */
+				if (!j1939_session_activate(session)) {
+					j1939_tp_schedule_txtimer(session, 0);
+				} else {
+					ret = -EBUSY;
+					session->err = ret;
+					j1939_sk_queue_drop_all(priv, jsk,
+								EBUSY);
+					break;
+				}
+			}
+		} else {
+			skcb->offset = session->total_queued_size;
+			j1939_session_skb_queue(session, skb);
+		}
+
+		todo_size -= segment_size;
+		session->total_queued_size += segment_size;
+	}
+
+	switch (ret) {
+	case 0: /* OK */
+		if (todo_size)
+			netdev_warn(priv->ndev,
+				    "no error found and not completely queued?! %zu\n",
+				    todo_size);
+		ret = size;
+		break;
+	case -ERESTARTSYS:
+		ret = -EINTR;
+		/* fall through */
+	case -EAGAIN: /* OK */
+		if (todo_size != size)
+			ret = size - todo_size;
+		break;
+	default: /* ERROR */
+		break;
+	}
+
+	if (session)
+		j1939_session_put(session);
+
+	return ret;
+
+ kfree_skb:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
+			    size_t size)
+{
+	struct sock *sk = sock->sk;
+	struct j1939_sock *jsk = j1939_sk(sk);
+	struct j1939_priv *priv = jsk->priv;
+	int ifindex;
+	int ret;
+
+	/* various socket state tests */
+	if (!(jsk->state & J1939_SOCK_BOUND))
+		return -EBADFD;
+
+	ifindex = jsk->ifindex;
+
+	if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+		/* no source address assigned yet */
+		return -EBADFD;
+
+	/* deal with provided destination address info */
+	if (msg->msg_name) {
+		struct sockaddr_can *addr = msg->msg_name;
+
+		if (msg->msg_namelen < J1939_MIN_NAMELEN)
+			return -EINVAL;
+
+		if (addr->can_family != AF_CAN)
+			return -EINVAL;
+
+		if (addr->can_ifindex && addr->can_ifindex != ifindex)
+			return -EBADFD;
+
+		if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
+		    !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
+			return -EINVAL;
+
+		if (!addr->can_addr.j1939.name &&
+		    addr->can_addr.j1939.addr == J1939_NO_ADDR &&
+		    !sock_flag(sk, SOCK_BROADCAST))
+			/* broadcast, but SO_BROADCAST not set */
+			return -EACCES;
+	} else {
+		if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
+		    !sock_flag(sk, SOCK_BROADCAST))
+			/* broadcast, but SO_BROADCAST not set */
+			return -EACCES;
+	}
+
+	ret = j1939_sk_send_loop(priv, sk, msg, size);
+
+	return ret;
+}
+
+void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
+{
+	struct j1939_sock *jsk;
+	int error_code = ENETDOWN;
+
+	spin_lock_bh(&priv->j1939_socks_lock);
+	list_for_each_entry(jsk, &priv->j1939_socks, list) {
+		jsk->sk.sk_err = error_code;
+		if (!sock_flag(&jsk->sk, SOCK_DEAD))
+			jsk->sk.sk_error_report(&jsk->sk);
+
+		j1939_sk_queue_drop_all(priv, jsk, error_code);
+	}
+	spin_unlock_bh(&priv->j1939_socks_lock);
+}
+
+static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
+				unsigned long arg)
+{
+	/* no ioctls for socket layer -> hand it down to NIC layer */
+	return -ENOIOCTLCMD;
+}
+
+static const struct proto_ops j1939_ops = {
+	.family = PF_CAN,
+	.release = j1939_sk_release,
+	.bind = j1939_sk_bind,
+	.connect = j1939_sk_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = j1939_sk_getname,
+	.poll = datagram_poll,
+	.ioctl = j1939_sk_no_ioctlcmd,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = j1939_sk_setsockopt,
+	.getsockopt = j1939_sk_getsockopt,
+	.sendmsg = j1939_sk_sendmsg,
+	.recvmsg = j1939_sk_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static struct proto j1939_proto __read_mostly = {
+	.name = "CAN_J1939",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof(struct j1939_sock),
+	.init = j1939_sk_init,
+};
+
+const struct can_proto j1939_can_proto = {
+	.type = SOCK_DGRAM,
+	.protocol = CAN_J1939,
+	.ops = &j1939_ops,
+	.prot = &j1939_proto,
+};
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
new file mode 100644
index 000000000000..fe000ea757ea
--- /dev/null
+++ b/net/can/j1939/transport.c
@@ -0,0 +1,2027 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2010-2011 EIA Electronics,
+//                         Kurt Van Dijck <kurt.van.dijck@eia.be>
+// Copyright (c) 2018 Protonic,
+//                         Robin van der Gracht <robin@protonic.nl>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Marc Kleine-Budde <kernel@pengutronix.de>
+// Copyright (c) 2017-2019 Pengutronix,
+//                         Oleksij Rempel <kernel@pengutronix.de>
+
+#include <linux/can/skb.h>
+
+#include "j1939-priv.h"
+
+#define J1939_XTP_TX_RETRY_LIMIT 100
+
+#define J1939_ETP_PGN_CTL 0xc800
+#define J1939_ETP_PGN_DAT 0xc700
+#define J1939_TP_PGN_CTL 0xec00
+#define J1939_TP_PGN_DAT 0xeb00
+
+#define J1939_TP_CMD_RTS 0x10
+#define J1939_TP_CMD_CTS 0x11
+#define J1939_TP_CMD_EOMA 0x13
+#define J1939_TP_CMD_BAM 0x20
+#define J1939_TP_CMD_ABORT 0xff
+
+#define J1939_ETP_CMD_RTS 0x14
+#define J1939_ETP_CMD_CTS 0x15
+#define J1939_ETP_CMD_DPO 0x16
+#define J1939_ETP_CMD_EOMA 0x17
+#define J1939_ETP_CMD_ABORT 0xff
+
+enum j1939_xtp_abort {
+	J1939_XTP_NO_ABORT = 0,
+	J1939_XTP_ABORT_BUSY = 1,
+	/* Already in one or more connection managed sessions and
+	 * cannot support another.
+	 *
+	 * EALREADY:
+	 * Operation already in progress
+	 */
+
+	J1939_XTP_ABORT_RESOURCE = 2,
+	/* System resources were needed for another task so this
+	 * connection managed session was terminated.
+	 *
+	 * EMSGSIZE:
+	 * The socket type requires that message be sent atomically,
+	 * and the size of the message to be sent made this
+	 * impossible.
+	 */
+
+	J1939_XTP_ABORT_TIMEOUT = 3,
+	/* A timeout occurred and this is the connection abort to
+	 * close the session.
+	 *
+	 * EHOSTUNREACH:
+	 * The destination host cannot be reached (probably because
+	 * the host is down or a remote router cannot reach it).
+	 */
+
+	J1939_XTP_ABORT_GENERIC = 4,
+	/* CTS messages received when data transfer is in progress
+	 *
+	 * EBADMSG:
+	 * Not a data message
+	 */
+
+	J1939_XTP_ABORT_FAULT = 5,
+	/* Maximal retransmit request limit reached
+	 *
+	 * ENOTRECOVERABLE:
+	 * State not recoverable
+	 */
+
+	J1939_XTP_ABORT_UNEXPECTED_DATA = 6,
+	/* Unexpected data transfer packet
+	 *
+	 * ENOTCONN:
+	 * Transport endpoint is not connected
+	 */
+
+	J1939_XTP_ABORT_BAD_SEQ = 7,
+	/* Bad sequence number (and software is not able to recover)
+	 *
+	 * EILSEQ:
+	 * Illegal byte sequence
+	 */
+
+	J1939_XTP_ABORT_DUP_SEQ = 8,
+	/* Duplicate sequence number (and software is not able to
+	 * recover)
+	 */
+
+	J1939_XTP_ABORT_EDPO_UNEXPECTED = 9,
+	/* Unexpected EDPO packet (ETP) or Message size > 1785 bytes
+	 * (TP)
+	 */
+
+	J1939_XTP_ABORT_BAD_EDPO_PGN = 10,
+	/* Unexpected EDPO PGN (PGN in EDPO is bad) */
+
+	J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11,
+	/* EDPO number of packets is greater than CTS */
+
+	J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12,
+	/* Bad EDPO offset */
+
+	J1939_XTP_ABORT_OTHER_DEPRECATED = 13,
+	/* Deprecated. Use 250 instead (Any other reason)  */
+
+	J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14,
+	/* Unexpected ECTS PGN (PGN in ECTS is bad) */
+
+	J1939_XTP_ABORT_ECTS_TOO_BIG = 15,
+	/* ECTS requested packets exceeds message size */
+
+	J1939_XTP_ABORT_OTHER = 250,
+	/* Any other reason (if a Connection Abort reason is
+	 * identified that is not listed in the table use code 250)
+	 */
+};
+
+static unsigned int j1939_tp_block = 255;
+static unsigned int j1939_tp_packet_delay;
+static unsigned int j1939_tp_padding = 1;
+
+/* helpers */
+static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort)
+{
+	switch (abort) {
+	case J1939_XTP_ABORT_BUSY:
+		return "Already in one or more connection managed sessions and cannot support another.";
+	case J1939_XTP_ABORT_RESOURCE:
+		return "System resources were needed for another task so this connection managed session was terminated.";
+	case J1939_XTP_ABORT_TIMEOUT:
+		return "A timeout occurred and this is the connection abort to close the session.";
+	case J1939_XTP_ABORT_GENERIC:
+		return "CTS messages received when data transfer is in progress";
+	case J1939_XTP_ABORT_FAULT:
+		return "Maximal retransmit request limit reached";
+	case J1939_XTP_ABORT_UNEXPECTED_DATA:
+		return "Unexpected data transfer packet";
+	case J1939_XTP_ABORT_BAD_SEQ:
+		return "Bad sequence number (and software is not able to recover)";
+	case J1939_XTP_ABORT_DUP_SEQ:
+		return "Duplicate sequence number (and software is not able to recover)";
+	case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+		return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)";
+	case J1939_XTP_ABORT_BAD_EDPO_PGN:
+		return "Unexpected EDPO PGN (PGN in EDPO is bad)";
+	case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+		return "EDPO number of packets is greater than CTS";
+	case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+		return "Bad EDPO offset";
+	case J1939_XTP_ABORT_OTHER_DEPRECATED:
+		return "Deprecated. Use 250 instead (Any other reason)";
+	case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+		return "Unexpected ECTS PGN (PGN in ECTS is bad)";
+	case J1939_XTP_ABORT_ECTS_TOO_BIG:
+		return "ECTS requested packets exceeds message size";
+	case J1939_XTP_ABORT_OTHER:
+		return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)";
+	default:
+		return "<unknown>";
+	}
+}
+
+static int j1939_xtp_abort_to_errno(struct j1939_priv *priv,
+				    enum j1939_xtp_abort abort)
+{
+	int err;
+
+	switch (abort) {
+	case J1939_XTP_NO_ABORT:
+		WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT);
+		err = 0;
+		break;
+	case J1939_XTP_ABORT_BUSY:
+		err = EALREADY;
+		break;
+	case J1939_XTP_ABORT_RESOURCE:
+		err = EMSGSIZE;
+		break;
+	case J1939_XTP_ABORT_TIMEOUT:
+		err = EHOSTUNREACH;
+		break;
+	case J1939_XTP_ABORT_GENERIC:
+		err = EBADMSG;
+		break;
+	case J1939_XTP_ABORT_FAULT:
+		err = ENOTRECOVERABLE;
+		break;
+	case J1939_XTP_ABORT_UNEXPECTED_DATA:
+		err = ENOTCONN;
+		break;
+	case J1939_XTP_ABORT_BAD_SEQ:
+		err = EILSEQ;
+		break;
+	case J1939_XTP_ABORT_DUP_SEQ:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_EDPO_UNEXPECTED:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_BAD_EDPO_PGN:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_EDPO_OUTOF_CTS:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_BAD_EDPO_OFFSET:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_OTHER_DEPRECATED:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_ECTS_TOO_BIG:
+		err = EPROTO;
+		break;
+	case J1939_XTP_ABORT_OTHER:
+		err = EPROTO;
+		break;
+	default:
+		netdev_warn(priv->ndev, "Unknown abort code %i", abort);
+		err = EPROTO;
+	}
+
+	return err;
+}
+
+static inline void j1939_session_list_lock(struct j1939_priv *priv)
+{
+	spin_lock_bh(&priv->active_session_list_lock);
+}
+
+static inline void j1939_session_list_unlock(struct j1939_priv *priv)
+{
+	spin_unlock_bh(&priv->active_session_list_lock);
+}
+
+void j1939_session_get(struct j1939_session *session)
+{
+	kref_get(&session->kref);
+}
+
+/* session completion functions */
+static void __j1939_session_drop(struct j1939_session *session)
+{
+	if (!session->transmission)
+		return;
+
+	j1939_sock_pending_del(session->sk);
+}
+
+static void j1939_session_destroy(struct j1939_session *session)
+{
+	if (session->err)
+		j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT);
+	else
+		j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	skb_queue_purge(&session->skb_queue);
+	__j1939_session_drop(session);
+	j1939_priv_put(session->priv);
+	kfree(session);
+}
+
+static void __j1939_session_release(struct kref *kref)
+{
+	struct j1939_session *session = container_of(kref, struct j1939_session,
+						     kref);
+
+	j1939_session_destroy(session);
+}
+
+void j1939_session_put(struct j1939_session *session)
+{
+	kref_put(&session->kref, __j1939_session_release);
+}
+
+static void j1939_session_txtimer_cancel(struct j1939_session *session)
+{
+	if (hrtimer_cancel(&session->txtimer))
+		j1939_session_put(session);
+}
+
+static void j1939_session_rxtimer_cancel(struct j1939_session *session)
+{
+	if (hrtimer_cancel(&session->rxtimer))
+		j1939_session_put(session);
+}
+
+void j1939_session_timers_cancel(struct j1939_session *session)
+{
+	j1939_session_txtimer_cancel(session);
+	j1939_session_rxtimer_cancel(session);
+}
+
+static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb)
+{
+	return (!skcb->addr.dst_name && (skcb->addr.da == 0xff));
+}
+
+static void j1939_session_skb_drop_old(struct j1939_session *session)
+{
+	struct sk_buff *do_skb;
+	struct j1939_sk_buff_cb *do_skcb;
+	unsigned int offset_start;
+	unsigned long flags;
+
+	if (skb_queue_len(&session->skb_queue) < 2)
+		return;
+
+	offset_start = session->pkt.tx_acked * 7;
+
+	spin_lock_irqsave(&session->skb_queue.lock, flags);
+	do_skb = skb_peek(&session->skb_queue);
+	do_skcb = j1939_skb_to_cb(do_skb);
+
+	if ((do_skcb->offset + do_skb->len) < offset_start) {
+		__skb_unlink(do_skb, &session->skb_queue);
+		kfree_skb(do_skb);
+	}
+	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+}
+
+void j1939_session_skb_queue(struct j1939_session *session,
+			     struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_priv *priv = session->priv;
+
+	j1939_ac_fixup(priv, skb);
+
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+
+	skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+	skb_queue_tail(&session->skb_queue, skb);
+}
+
+static struct sk_buff *j1939_session_skb_find(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *skb = NULL;
+	struct sk_buff *do_skb;
+	struct j1939_sk_buff_cb *do_skcb;
+	unsigned int offset_start;
+	unsigned long flags;
+
+	offset_start = session->pkt.dpo * 7;
+
+	spin_lock_irqsave(&session->skb_queue.lock, flags);
+	skb_queue_walk(&session->skb_queue, do_skb) {
+		do_skcb = j1939_skb_to_cb(do_skb);
+
+		if (offset_start >= do_skcb->offset &&
+		    offset_start < (do_skcb->offset + do_skb->len)) {
+			skb = do_skb;
+		}
+	}
+	spin_unlock_irqrestore(&session->skb_queue.lock, flags);
+
+	if (!skb)
+		netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n",
+			   __func__, session, offset_start,
+			   skb_queue_len(&session->skb_queue));
+
+	return skb;
+}
+
+/* see if we are receiver
+ * returns 0 for broadcasts, although we will receive them
+ */
+static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & J1939_ECU_LOCAL_DST;
+}
+
+/* see if we are sender */
+static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & J1939_ECU_LOCAL_SRC;
+}
+
+/* see if we are involved as either receiver or transmitter */
+static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap)
+{
+	if (swap)
+		return j1939_tp_im_receiver(skcb);
+	else
+		return j1939_tp_im_transmitter(skcb);
+}
+
+static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb)
+{
+	return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+}
+
+/* extract pgn from flow-ctl message */
+static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat)
+{
+	pgn_t pgn;
+
+	pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0);
+	if (j1939_pgn_is_pdu1(pgn))
+		pgn &= 0xffff00;
+	return pgn;
+}
+
+static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat)
+{
+	return (dat[2] << 8) + (dat[1] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat)
+{
+	return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0);
+}
+
+static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat)
+{
+	return (dat[4] << 24) | (dat[3] << 16) |
+		(dat[2] << 8) | (dat[1] << 0);
+}
+
+/* find existing session:
+ * reverse: swap cb's src & dst
+ * there is no problem with matching broadcasts, since
+ * broadcasts (no dst, no da) would never call this
+ * with reverse == true
+ */
+static bool j1939_session_match(struct j1939_addr *se_addr,
+				struct j1939_addr *sk_addr, bool reverse)
+{
+	if (se_addr->type != sk_addr->type)
+		return false;
+
+	if (reverse) {
+		if (se_addr->src_name) {
+			if (se_addr->src_name != sk_addr->dst_name)
+				return false;
+		} else if (se_addr->sa != sk_addr->da) {
+			return false;
+		}
+
+		if (se_addr->dst_name) {
+			if (se_addr->dst_name != sk_addr->src_name)
+				return false;
+		} else if (se_addr->da != sk_addr->sa) {
+			return false;
+		}
+	} else {
+		if (se_addr->src_name) {
+			if (se_addr->src_name != sk_addr->src_name)
+				return false;
+		} else if (se_addr->sa != sk_addr->sa) {
+			return false;
+		}
+
+		if (se_addr->dst_name) {
+			if (se_addr->dst_name != sk_addr->dst_name)
+				return false;
+		} else if (se_addr->da != sk_addr->da) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv,
+						struct list_head *root,
+						struct j1939_addr *addr,
+						bool reverse, bool transmitter)
+{
+	struct j1939_session *session;
+
+	lockdep_assert_held(&priv->active_session_list_lock);
+
+	list_for_each_entry(session, root, active_session_list_entry) {
+		j1939_session_get(session);
+		if (j1939_session_match(&session->skcb.addr, addr, reverse) &&
+		    session->transmission == transmitter)
+			return session;
+		j1939_session_put(session);
+	}
+
+	return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_simple(struct j1939_priv *priv,
+					struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	lockdep_assert_held(&priv->active_session_list_lock);
+
+	list_for_each_entry(session, &priv->active_session_list,
+			    active_session_list_entry) {
+		j1939_session_get(session);
+		if (session->skcb.addr.type == J1939_SIMPLE &&
+		    session->tskey == skcb->tskey && session->sk == skb->sk)
+			return session;
+		j1939_session_put(session);
+	}
+
+	return NULL;
+}
+
+static struct
+j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv,
+					 struct j1939_addr *addr,
+					 bool reverse, bool transmitter)
+{
+	struct j1939_session *session;
+
+	j1939_session_list_lock(priv);
+	session = j1939_session_get_by_addr_locked(priv,
+						   &priv->active_session_list,
+						   addr, reverse, transmitter);
+	j1939_session_list_unlock(priv);
+
+	return session;
+}
+
+static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb)
+{
+	u8 tmp = 0;
+
+	swap(skcb->addr.dst_name, skcb->addr.src_name);
+	swap(skcb->addr.da, skcb->addr.sa);
+
+	/* swap SRC and DST flags, leave other untouched */
+	if (skcb->flags & J1939_ECU_LOCAL_SRC)
+		tmp |= J1939_ECU_LOCAL_DST;
+	if (skcb->flags & J1939_ECU_LOCAL_DST)
+		tmp |= J1939_ECU_LOCAL_SRC;
+	skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST);
+	skcb->flags |= tmp;
+}
+
+static struct
+sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
+			     const struct j1939_sk_buff_cb *re_skcb,
+			     bool ctl,
+			     bool swap_src_dst)
+{
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+
+	skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv),
+			GFP_ATOMIC);
+	if (unlikely(!skb))
+		return ERR_PTR(-ENOMEM);
+
+	skb->dev = priv->ndev;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+	/* reserve CAN header */
+	skb_reserve(skb, offsetof(struct can_frame, data));
+
+	memcpy(skb->cb, re_skcb, sizeof(skb->cb));
+	skcb = j1939_skb_to_cb(skb);
+	if (swap_src_dst)
+		j1939_skbcb_swap(skcb);
+
+	if (ctl) {
+		if (skcb->addr.type == J1939_ETP)
+			skcb->addr.pgn = J1939_ETP_PGN_CTL;
+		else
+			skcb->addr.pgn = J1939_TP_PGN_CTL;
+	} else {
+		if (skcb->addr.type == J1939_ETP)
+			skcb->addr.pgn = J1939_ETP_PGN_DAT;
+		else
+			skcb->addr.pgn = J1939_TP_PGN_DAT;
+	}
+
+	return skb;
+}
+
+/* TP transmit packet functions */
+static int j1939_tp_tx_dat(struct j1939_session *session,
+			   const u8 *dat, int len)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *skb;
+
+	skb = j1939_tp_tx_dat_new(priv, &session->skcb,
+				  false, false);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	skb_put_data(skb, dat, len);
+	if (j1939_tp_padding && len < 8)
+		memset(skb_put(skb, 8 - len), 0xff, 8 - len);
+
+	return j1939_send_one(priv, skb);
+}
+
+static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv,
+			       const struct j1939_sk_buff_cb *re_skcb,
+			       bool swap_src_dst, pgn_t pgn, const u8 *dat)
+{
+	struct sk_buff *skb;
+	u8 *skdat;
+
+	if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+		return 0;
+
+	skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	skdat = skb_put(skb, 8);
+	memcpy(skdat, dat, 5);
+	skdat[5] = (pgn >> 0);
+	skdat[6] = (pgn >> 8);
+	skdat[7] = (pgn >> 16);
+
+	return j1939_send_one(priv, skb);
+}
+
+static inline int j1939_tp_tx_ctl(struct j1939_session *session,
+				  bool swap_src_dst, const u8 *dat)
+{
+	struct j1939_priv *priv = session->priv;
+
+	return j1939_xtp_do_tx_ctl(priv, &session->skcb,
+				   swap_src_dst,
+				   session->skcb.addr.pgn, dat);
+}
+
+static int j1939_xtp_tx_abort(struct j1939_priv *priv,
+			      const struct j1939_sk_buff_cb *re_skcb,
+			      bool swap_src_dst,
+			      enum j1939_xtp_abort err,
+			      pgn_t pgn)
+{
+	u8 dat[5];
+
+	if (!j1939_tp_im_involved(re_skcb, swap_src_dst))
+		return 0;
+
+	memset(dat, 0xff, sizeof(dat));
+	dat[0] = J1939_TP_CMD_ABORT;
+	dat[1] = err;
+	return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat);
+}
+
+void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec)
+{
+	j1939_session_get(session);
+	hrtimer_start(&session->txtimer, ms_to_ktime(msec),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+static inline void j1939_tp_set_rxtimeout(struct j1939_session *session,
+					  int msec)
+{
+	j1939_session_rxtimer_cancel(session);
+	j1939_session_get(session);
+	hrtimer_start(&session->rxtimer, ms_to_ktime(msec),
+		      HRTIMER_MODE_REL_SOFT);
+}
+
+static int j1939_session_tx_rts(struct j1939_session *session)
+{
+	u8 dat[8];
+	int ret;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	dat[1] = (session->total_message_size >> 0);
+	dat[2] = (session->total_message_size >> 8);
+	dat[3] = session->pkt.total;
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		dat[0] = J1939_ETP_CMD_RTS;
+		dat[1] = (session->total_message_size >> 0);
+		dat[2] = (session->total_message_size >> 8);
+		dat[3] = (session->total_message_size >> 16);
+		dat[4] = (session->total_message_size >> 24);
+	} else if (j1939_cb_is_broadcast(&session->skcb)) {
+		dat[0] = J1939_TP_CMD_BAM;
+		/* fake cts for broadcast */
+		session->pkt.tx = 0;
+	} else {
+		dat[0] = J1939_TP_CMD_RTS;
+		dat[4] = dat[3];
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, false, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+	if (dat[0] == J1939_TP_CMD_BAM)
+		j1939_tp_schedule_txtimer(session, 50);
+
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_dpo(struct j1939_session *session)
+{
+	unsigned int pkt;
+	u8 dat[8];
+	int ret;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	dat[0] = J1939_ETP_CMD_DPO;
+	session->pkt.dpo = session->pkt.tx_acked;
+	pkt = session->pkt.dpo;
+	dat[1] = session->pkt.last - session->pkt.tx_acked;
+	dat[2] = (pkt >> 0);
+	dat[3] = (pkt >> 8);
+	dat[4] = (pkt >> 16);
+
+	ret = j1939_tp_tx_ctl(session, false, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 1250);
+	session->pkt.tx = session->pkt.tx_acked;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_dat(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_sk_buff_cb *skcb;
+	int offset, pkt_done, pkt_end;
+	unsigned int len, pdelay;
+	struct sk_buff *se_skb;
+	const u8 *tpdat;
+	int ret = 0;
+	u8 dat[8];
+
+	se_skb = j1939_session_skb_find(session);
+	if (!se_skb)
+		return -ENOBUFS;
+
+	skcb = j1939_skb_to_cb(se_skb);
+	tpdat = se_skb->data;
+	ret = 0;
+	pkt_done = 0;
+	if (session->skcb.addr.type != J1939_ETP &&
+	    j1939_cb_is_broadcast(&session->skcb))
+		pkt_end = session->pkt.total;
+	else
+		pkt_end = session->pkt.last;
+
+	while (session->pkt.tx < pkt_end) {
+		dat[0] = session->pkt.tx - session->pkt.dpo + 1;
+		offset = (session->pkt.tx * 7) - skcb->offset;
+		len =  se_skb->len - offset;
+		if (len > 7)
+			len = 7;
+
+		memcpy(&dat[1], &tpdat[offset], len);
+		ret = j1939_tp_tx_dat(session, dat, len + 1);
+		if (ret < 0) {
+			/* ENOBUS == CAN interface TX queue is full */
+			if (ret != -ENOBUFS)
+				netdev_alert(priv->ndev,
+					     "%s: 0x%p: queue data error: %i\n",
+					     __func__, session, ret);
+			break;
+		}
+
+		session->last_txcmd = 0xff;
+		pkt_done++;
+		session->pkt.tx++;
+		pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 :
+			j1939_tp_packet_delay;
+
+		if (session->pkt.tx < session->pkt.total && pdelay) {
+			j1939_tp_schedule_txtimer(session, pdelay);
+			break;
+		}
+	}
+
+	if (pkt_done)
+		j1939_tp_set_rxtimeout(session, 250);
+
+	return ret;
+}
+
+static int j1939_xtp_txnext_transmiter(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (!j1939_tp_im_transmitter(&session->skcb)) {
+		netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n",
+			     __func__, session);
+		return -EINVAL;
+	}
+
+	switch (session->last_cmd) {
+	case 0:
+		ret = j1939_session_tx_rts(session);
+		break;
+
+	case J1939_ETP_CMD_CTS:
+		if (session->last_txcmd != J1939_ETP_CMD_DPO) {
+			ret = j1939_session_tx_dpo(session);
+			if (ret)
+				return ret;
+		}
+
+		/* fall through */
+	case J1939_TP_CMD_CTS:
+	case 0xff: /* did some data */
+	case J1939_ETP_CMD_DPO:
+	case J1939_TP_CMD_BAM:
+		ret = j1939_session_tx_dat(session);
+
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+			     __func__, session, session->last_cmd);
+	}
+
+	return ret;
+}
+
+static int j1939_session_tx_cts(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	unsigned int pkt, len;
+	int ret;
+	u8 dat[8];
+
+	if (!j1939_sk_recv_match(priv, &session->skcb))
+		return -ENOENT;
+
+	len = session->pkt.total - session->pkt.rx;
+	len = min3(len, session->pkt.block, j1939_tp_block ?: 255);
+	memset(dat, 0xff, sizeof(dat));
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		pkt = session->pkt.rx + 1;
+		dat[0] = J1939_ETP_CMD_CTS;
+		dat[1] = len;
+		dat[2] = (pkt >> 0);
+		dat[3] = (pkt >> 8);
+		dat[4] = (pkt >> 16);
+	} else {
+		dat[0] = J1939_TP_CMD_CTS;
+		dat[1] = len;
+		dat[2] = session->pkt.rx + 1;
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, true, dat);
+	if (ret < 0)
+		return ret;
+
+	if (len)
+		/* only mark cts done when len is set */
+		session->last_txcmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_session_tx_eoma(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	u8 dat[8];
+	int ret;
+
+	if (!j1939_sk_recv_match(priv, &session->skcb))
+		return -ENOENT;
+
+	memset(dat, 0xff, sizeof(dat));
+
+	if (session->skcb.addr.type == J1939_ETP) {
+		dat[0] = J1939_ETP_CMD_EOMA;
+		dat[1] = session->total_message_size >> 0;
+		dat[2] = session->total_message_size >> 8;
+		dat[3] = session->total_message_size >> 16;
+		dat[4] = session->total_message_size >> 24;
+	} else {
+		dat[0] = J1939_TP_CMD_EOMA;
+		dat[1] = session->total_message_size;
+		dat[2] = session->total_message_size >> 8;
+		dat[3] = session->pkt.total;
+	}
+
+	if (dat[0] == session->last_txcmd)
+		/* done already */
+		return 0;
+
+	ret = j1939_tp_tx_ctl(session, true, dat);
+	if (ret < 0)
+		return ret;
+
+	session->last_txcmd = dat[0];
+
+	/* wait for the EOMA packet to come in */
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static int j1939_xtp_txnext_receiver(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (!j1939_tp_im_receiver(&session->skcb)) {
+		netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n",
+			     __func__, session);
+		return -EINVAL;
+	}
+
+	switch (session->last_cmd) {
+	case J1939_TP_CMD_RTS:
+	case J1939_ETP_CMD_RTS:
+		ret = j1939_session_tx_cts(session);
+		break;
+
+	case J1939_ETP_CMD_CTS:
+	case J1939_TP_CMD_CTS:
+	case 0xff: /* did some data */
+	case J1939_ETP_CMD_DPO:
+		if ((session->skcb.addr.type == J1939_TP &&
+		     j1939_cb_is_broadcast(&session->skcb)))
+			break;
+
+		if (session->pkt.rx >= session->pkt.total) {
+			ret = j1939_session_tx_eoma(session);
+		} else if (session->pkt.rx >= session->pkt.last) {
+			session->last_txcmd = 0;
+			ret = j1939_session_tx_cts(session);
+		}
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n",
+			     __func__, session, session->last_cmd);
+	}
+
+	return ret;
+}
+
+static int j1939_simple_txnext(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct sk_buff *se_skb = j1939_session_skb_find(session);
+	struct sk_buff *skb;
+	int ret;
+
+	if (!se_skb)
+		return 0;
+
+	skb = skb_clone(se_skb, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	can_skb_set_owner(skb, se_skb->sk);
+
+	j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS);
+
+	ret = j1939_send_one(priv, skb);
+	if (ret)
+		return ret;
+
+	j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED);
+	j1939_sk_queue_activate_next(session);
+
+	return 0;
+}
+
+static bool j1939_session_deactivate_locked(struct j1939_session *session)
+{
+	bool active = false;
+
+	lockdep_assert_held(&session->priv->active_session_list_lock);
+
+	if (session->state >= J1939_SESSION_ACTIVE &&
+	    session->state < J1939_SESSION_ACTIVE_MAX) {
+		active = true;
+
+		list_del_init(&session->active_session_list_entry);
+		session->state = J1939_SESSION_DONE;
+		j1939_session_put(session);
+	}
+
+	return active;
+}
+
+static bool j1939_session_deactivate(struct j1939_session *session)
+{
+	bool active;
+
+	j1939_session_list_lock(session->priv);
+	active = j1939_session_deactivate_locked(session);
+	j1939_session_list_unlock(session->priv);
+
+	return active;
+}
+
+static void
+j1939_session_deactivate_activate_next(struct j1939_session *session)
+{
+	if (j1939_session_deactivate(session))
+		j1939_sk_queue_activate_next(session);
+}
+
+static void j1939_session_cancel(struct j1939_session *session,
+				 enum j1939_xtp_abort err)
+{
+	struct j1939_priv *priv = session->priv;
+
+	WARN_ON_ONCE(!err);
+
+	session->err = j1939_xtp_abort_to_errno(priv, err);
+	/* do not send aborts on incoming broadcasts */
+	if (!j1939_cb_is_broadcast(&session->skcb)) {
+		session->state = J1939_SESSION_WAITING_ABORT;
+		j1939_xtp_tx_abort(priv, &session->skcb,
+				   !session->transmission,
+				   err, session->skcb.addr.pgn);
+	}
+
+	if (session->sk)
+		j1939_sk_send_loop_abort(session->sk, session->err);
+}
+
+static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
+{
+	struct j1939_session *session =
+		container_of(hrtimer, struct j1939_session, txtimer);
+	struct j1939_priv *priv = session->priv;
+	int ret = 0;
+
+	if (session->skcb.addr.type == J1939_SIMPLE) {
+		ret = j1939_simple_txnext(session);
+	} else {
+		if (session->transmission)
+			ret = j1939_xtp_txnext_transmiter(session);
+		else
+			ret = j1939_xtp_txnext_receiver(session);
+	}
+
+	switch (ret) {
+	case -ENOBUFS:
+		/* Retry limit is currently arbitrary chosen */
+		if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) {
+			session->tx_retry++;
+			j1939_tp_schedule_txtimer(session,
+						  10 + prandom_u32_max(16));
+		} else {
+			netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n",
+				     __func__, session);
+			session->err = -ENETUNREACH;
+			j1939_session_rxtimer_cancel(session);
+			j1939_session_deactivate_activate_next(session);
+		}
+		break;
+	case -ENETDOWN:
+		/* In this case we should get a netdev_event(), all active
+		 * sessions will be cleared by
+		 * j1939_cancel_all_active_sessions(). So handle this as an
+		 * error, but let j1939_cancel_all_active_sessions() do the
+		 * cleanup including propagation of the error to user space.
+		 */
+		break;
+	case 0:
+		session->tx_retry = 0;
+		break;
+	default:
+		netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
+			     __func__, session, ret);
+		if (session->skcb.addr.type != J1939_SIMPLE) {
+			j1939_tp_set_rxtimeout(session,
+					       J1939_XTP_ABORT_TIMEOUT_MS);
+			j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
+		} else {
+			session->err = ret;
+			j1939_session_rxtimer_cancel(session);
+			j1939_session_deactivate_activate_next(session);
+		}
+	}
+
+	j1939_session_put(session);
+
+	return HRTIMER_NORESTART;
+}
+
+static void j1939_session_completed(struct j1939_session *session)
+{
+	struct sk_buff *skb;
+
+	if (!session->transmission) {
+		skb = j1939_session_skb_find(session);
+		/* distribute among j1939 receivers */
+		j1939_sk_recv(session->priv, skb);
+	}
+
+	j1939_session_deactivate_activate_next(session);
+}
+
+static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
+{
+	struct j1939_session *session = container_of(hrtimer,
+						     struct j1939_session,
+						     rxtimer);
+	struct j1939_priv *priv = session->priv;
+
+	if (session->state == J1939_SESSION_WAITING_ABORT) {
+		netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n",
+			     __func__, session);
+
+		j1939_session_deactivate_activate_next(session);
+
+	} else if (session->skcb.addr.type == J1939_SIMPLE) {
+		netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n",
+			     __func__, session);
+
+		/* The message is probably stuck in the CAN controller and can
+		 * be send as soon as CAN bus is in working state again.
+		 */
+		session->err = -ETIME;
+		j1939_session_deactivate(session);
+	} else {
+		netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+			     __func__, session);
+
+		j1939_session_list_lock(session->priv);
+		if (session->state >= J1939_SESSION_ACTIVE &&
+		    session->state < J1939_SESSION_ACTIVE_MAX) {
+			j1939_session_get(session);
+			hrtimer_start(&session->rxtimer,
+				      ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
+				      HRTIMER_MODE_REL_SOFT);
+			j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+		}
+		j1939_session_list_unlock(session->priv);
+	}
+
+	j1939_session_put(session);
+
+	return HRTIMER_NORESTART;
+}
+
+static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session,
+				     const struct sk_buff *skb)
+{
+	const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data);
+	struct j1939_priv *priv = session->priv;
+	enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+	u8 cmd = skb->data[0];
+
+	if (session->skcb.addr.pgn == pgn)
+		return false;
+
+	switch (cmd) {
+	case J1939_TP_CMD_BAM:
+		abort = J1939_XTP_NO_ABORT;
+		break;
+
+	case J1939_ETP_CMD_RTS:
+	case J1939_TP_CMD_RTS: /* fall through */
+		abort = J1939_XTP_ABORT_BUSY;
+		break;
+
+	case J1939_ETP_CMD_CTS:
+	case J1939_TP_CMD_CTS: /* fall through */
+		abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN;
+		break;
+
+	case J1939_ETP_CMD_DPO:
+		abort = J1939_XTP_ABORT_BAD_EDPO_PGN;
+		break;
+
+	case J1939_ETP_CMD_EOMA:
+	case J1939_TP_CMD_EOMA: /* fall through */
+		abort = J1939_XTP_ABORT_OTHER;
+		break;
+
+	case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+		abort = J1939_XTP_NO_ABORT;
+		break;
+
+	default:
+		WARN_ON_ONCE(1);
+		break;
+	}
+
+	netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n",
+		    __func__, session, cmd, pgn, session->skcb.addr.pgn);
+	if (abort != J1939_XTP_NO_ABORT)
+		j1939_xtp_tx_abort(priv, skcb, true, abort, pgn);
+
+	return true;
+}
+
+static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb,
+				   bool reverse, bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	u8 abort = skb->data[1];
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, reverse,
+					    transmitter);
+	if (!session)
+		return;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		goto abort_put;
+
+	netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__,
+		    session, j1939_xtp_ctl_to_pgn(skb->data), abort,
+		    j1939_xtp_abort_to_str(abort));
+
+	j1939_session_timers_cancel(session);
+	session->err = j1939_xtp_abort_to_errno(priv, abort);
+	if (session->sk)
+		j1939_sk_send_loop_abort(session->sk, session->err);
+	j1939_session_deactivate_activate_next(session);
+
+abort_put:
+	j1939_session_put(session);
+}
+
+/* abort packets may come in 2 directions */
+static void
+j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb,
+		   bool transmitter)
+{
+	j1939_xtp_rx_abort_one(priv, skb, false, transmitter);
+	j1939_xtp_rx_abort_one(priv, skb, true, transmitter);
+}
+
+static void
+j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb)
+{
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	session->pkt.tx_acked = session->pkt.total;
+	j1939_session_timers_cancel(session);
+	/* transmitted without problems */
+	j1939_session_completed(session);
+}
+
+static void
+j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb,
+		  bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+					    transmitter);
+	if (!session)
+		return;
+
+	j1939_xtp_rx_eoma_one(session, skb);
+	j1939_session_put(session);
+}
+
+static void
+j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
+{
+	enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT;
+	unsigned int pkt;
+	const u8 *dat;
+
+	dat = skb->data;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	if (session->last_cmd == dat[0]) {
+		err = J1939_XTP_ABORT_DUP_SEQ;
+		goto out_session_cancel;
+	}
+
+	if (session->skcb.addr.type == J1939_ETP)
+		pkt = j1939_etp_ctl_to_packet(dat);
+	else
+		pkt = dat[2];
+
+	if (!pkt)
+		goto out_session_cancel;
+	else if (dat[1] > session->pkt.block /* 0xff for etp */)
+		goto out_session_cancel;
+
+	/* set packet counters only when not CTS(0) */
+	session->pkt.tx_acked = pkt - 1;
+	j1939_session_skb_drop_old(session);
+	session->pkt.last = session->pkt.tx_acked + dat[1];
+	if (session->pkt.last > session->pkt.total)
+		/* safety measure */
+		session->pkt.last = session->pkt.total;
+	/* TODO: do not set tx here, do it in txtimer */
+	session->pkt.tx = session->pkt.tx_acked;
+
+	session->last_cmd = dat[0];
+	if (dat[1]) {
+		j1939_tp_set_rxtimeout(session, 1250);
+		if (session->transmission) {
+			if (session->pkt.tx_acked)
+				j1939_sk_errqueue(session,
+						  J1939_ERRQUEUE_SCHED);
+			j1939_session_txtimer_cancel(session);
+			j1939_tp_schedule_txtimer(session, 0);
+		}
+	} else {
+		/* CTS(0) */
+		j1939_tp_set_rxtimeout(session, 550);
+	}
+	return;
+
+ out_session_cancel:
+	j1939_session_timers_cancel(session);
+	j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+	j1939_session_cancel(session, err);
+}
+
+static void
+j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, true,
+					    transmitter);
+	if (!session)
+		return;
+	j1939_xtp_rx_cts_one(session, skb);
+	j1939_session_put(session);
+}
+
+static struct j1939_session *j1939_session_new(struct j1939_priv *priv,
+					       struct sk_buff *skb, size_t size)
+{
+	struct j1939_session *session;
+	struct j1939_sk_buff_cb *skcb;
+
+	session = kzalloc(sizeof(*session), gfp_any());
+	if (!session)
+		return NULL;
+
+	INIT_LIST_HEAD(&session->active_session_list_entry);
+	INIT_LIST_HEAD(&session->sk_session_queue_entry);
+	kref_init(&session->kref);
+
+	j1939_priv_get(priv);
+	session->priv = priv;
+	session->total_message_size = size;
+	session->state = J1939_SESSION_NEW;
+
+	skb_queue_head_init(&session->skb_queue);
+	skb_queue_tail(&session->skb_queue, skb);
+
+	skcb = j1939_skb_to_cb(skb);
+	memcpy(&session->skcb, skcb, sizeof(session->skcb));
+
+	hrtimer_init(&session->txtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_SOFT);
+	session->txtimer.function = j1939_tp_txtimer;
+	hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL_SOFT);
+	session->rxtimer.function = j1939_tp_rxtimer;
+
+	netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n",
+		   __func__, session, skcb->addr.sa, skcb->addr.da);
+
+	return session;
+}
+
+static struct
+j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
+				       int size,
+				       const struct j1939_sk_buff_cb *rel_skcb)
+{
+	struct sk_buff *skb;
+	struct j1939_sk_buff_cb *skcb;
+	struct j1939_session *session;
+
+	skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb->dev = priv->ndev;
+	can_skb_reserve(skb);
+	can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
+	skcb = j1939_skb_to_cb(skb);
+	memcpy(skcb, rel_skcb, sizeof(*skcb));
+
+	session = j1939_session_new(priv, skb, skb->len);
+	if (!session) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	/* alloc data area */
+	skb_put(skb, size);
+	/* skb is recounted in j1939_session_new() */
+	return session;
+}
+
+int j1939_session_activate(struct j1939_session *session)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_session *active = NULL;
+	int ret = 0;
+
+	j1939_session_list_lock(priv);
+	if (session->skcb.addr.type != J1939_SIMPLE)
+		active = j1939_session_get_by_addr_locked(priv,
+							  &priv->active_session_list,
+							  &session->skcb.addr, false,
+							  session->transmission);
+	if (active) {
+		j1939_session_put(active);
+		ret = -EAGAIN;
+	} else {
+		WARN_ON_ONCE(session->state != J1939_SESSION_NEW);
+		list_add_tail(&session->active_session_list_entry,
+			      &priv->active_session_list);
+		j1939_session_get(session);
+		session->state = J1939_SESSION_ACTIVE;
+
+		netdev_dbg(session->priv->ndev, "%s: 0x%p\n",
+			   __func__, session);
+	}
+	j1939_session_list_unlock(priv);
+
+	return ret;
+}
+
+static struct
+j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
+					    struct sk_buff *skb)
+{
+	enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT;
+	struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	const u8 *dat;
+	pgn_t pgn;
+	int len;
+
+	netdev_dbg(priv->ndev, "%s\n", __func__);
+
+	dat = skb->data;
+	pgn = j1939_xtp_ctl_to_pgn(dat);
+	skcb.addr.pgn = pgn;
+
+	if (!j1939_sk_recv_match(priv, &skcb))
+		return NULL;
+
+	if (skcb.addr.type == J1939_ETP) {
+		len = j1939_etp_ctl_to_size(dat);
+		if (len > J1939_MAX_ETP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+		else if (len > priv->tp_max_packet_size)
+			abort = J1939_XTP_ABORT_RESOURCE;
+		else if (len <= J1939_MAX_TP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+	} else {
+		len = j1939_tp_ctl_to_size(dat);
+		if (len > J1939_MAX_TP_PACKET_SIZE)
+			abort = J1939_XTP_ABORT_FAULT;
+		else if (len > priv->tp_max_packet_size)
+			abort = J1939_XTP_ABORT_RESOURCE;
+	}
+
+	if (abort != J1939_XTP_NO_ABORT) {
+		j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn);
+		return NULL;
+	}
+
+	session = j1939_session_fresh_new(priv, len, &skcb);
+	if (!session) {
+		j1939_xtp_tx_abort(priv, &skcb, true,
+				   J1939_XTP_ABORT_RESOURCE, pgn);
+		return NULL;
+	}
+
+	/* initialize the control buffer: plain copy */
+	session->pkt.total = (len + 6) / 7;
+	session->pkt.block = 0xff;
+	if (skcb.addr.type != J1939_ETP) {
+		if (dat[3] != session->pkt.total)
+			netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n",
+				     __func__, session, session->pkt.total,
+				     dat[3]);
+		session->pkt.total = dat[3];
+		session->pkt.block = min(dat[3], dat[4]);
+	}
+
+	session->pkt.rx = 0;
+	session->pkt.tx = 0;
+
+	WARN_ON_ONCE(j1939_session_activate(session));
+
+	return session;
+}
+
+static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
+					   struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_priv *priv = session->priv;
+
+	if (!session->transmission) {
+		if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+			return -EBUSY;
+
+		/* RTS on active session */
+		j1939_session_timers_cancel(session);
+		j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+		j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+	}
+
+	if (session->last_cmd != 0) {
+		/* we received a second rts on the same connection */
+		netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n",
+			     __func__, session, skcb->addr.sa, skcb->addr.da,
+			     session->last_cmd);
+
+		j1939_session_timers_cancel(session);
+		j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+		j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
+
+		return -EBUSY;
+	}
+
+	if (session->skcb.addr.sa != skcb->addr.sa ||
+	    session->skcb.addr.da != skcb->addr.da)
+		netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n",
+			    __func__, session,
+			    session->skcb.addr.sa, skcb->addr.sa,
+			    session->skcb.addr.da, skcb->addr.da);
+	/* make sure 'sa' & 'da' are correct !
+	 * They may be 'not filled in yet' for sending
+	 * skb's, since they did not pass the Address Claim ever.
+	 */
+	session->skcb.addr.sa = skcb->addr.sa;
+	session->skcb.addr.da = skcb->addr.da;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	return 0;
+}
+
+static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb,
+			     bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	u8 cmd = skb->data[0];
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+					    transmitter);
+
+	if (!session) {
+		if (transmitter) {
+			/* If we're the transmitter and this function is called,
+			 * we received our own RTS. A session has already been
+			 * created.
+			 *
+			 * For some reasons however it might have been destroyed
+			 * already. So don't create a new one here (using
+			 * "j1939_xtp_rx_rts_session_new()") as this will be a
+			 * receiver session.
+			 *
+			 * The reasons the session is already destroyed might
+			 * be:
+			 * - user space closed socket was and the session was
+			 *   aborted
+			 * - session was aborted due to external abort message
+			 */
+			return;
+		}
+		session = j1939_xtp_rx_rts_session_new(priv, skb);
+		if (!session)
+			return;
+	} else {
+		if (j1939_xtp_rx_rts_session_active(session, skb)) {
+			j1939_session_put(session);
+			return;
+		}
+	}
+	session->last_cmd = cmd;
+
+	j1939_tp_set_rxtimeout(session, 1250);
+
+	if (cmd != J1939_TP_CMD_BAM && !session->transmission) {
+		j1939_session_txtimer_cancel(session);
+		j1939_tp_schedule_txtimer(session, 0);
+	}
+
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dpo_one(struct j1939_session *session,
+				 struct sk_buff *skb)
+{
+	const u8 *dat = skb->data;
+
+	if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
+		return;
+
+	netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+
+	/* transmitted without problems */
+	session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data);
+	session->last_cmd = dat[0];
+	j1939_tp_set_rxtimeout(session, 750);
+}
+
+static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
+			     bool transmitter)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+
+	session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+					    transmitter);
+	if (!session) {
+		netdev_info(priv->ndev,
+			    "%s: no connection found\n", __func__);
+		return;
+	}
+
+	j1939_xtp_rx_dpo_one(session, skb);
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat_one(struct j1939_session *session,
+				 struct sk_buff *skb)
+{
+	struct j1939_priv *priv = session->priv;
+	struct j1939_sk_buff_cb *skcb;
+	struct sk_buff *se_skb;
+	const u8 *dat;
+	u8 *tpdat;
+	int offset;
+	int nbytes;
+	bool final = false;
+	bool do_cts_eoma = false;
+	int packet;
+
+	skcb = j1939_skb_to_cb(skb);
+	dat = skb->data;
+	if (skb->len <= 1)
+		/* makes no sense */
+		goto out_session_cancel;
+
+	switch (session->last_cmd) {
+	case 0xff:
+		break;
+	case J1939_ETP_CMD_DPO:
+		if (skcb->addr.type == J1939_ETP)
+			break;
+		/* fall through */
+	case J1939_TP_CMD_BAM: /* fall through */
+	case J1939_TP_CMD_CTS: /* fall through */
+		if (skcb->addr.type != J1939_ETP)
+			break;
+		/* fall through */
+	default:
+		netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__,
+			    session, session->last_cmd);
+		goto out_session_cancel;
+	}
+
+	packet = (dat[0] - 1 + session->pkt.dpo);
+	if (packet > session->pkt.total ||
+	    (session->pkt.rx + 1) > session->pkt.total) {
+		netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n",
+			    __func__, session);
+		goto out_session_cancel;
+	}
+	se_skb = j1939_session_skb_find(session);
+	if (!se_skb) {
+		netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__,
+			    session);
+		goto out_session_cancel;
+	}
+
+	skcb = j1939_skb_to_cb(se_skb);
+	offset = packet * 7 - skcb->offset;
+	nbytes = se_skb->len - offset;
+	if (nbytes > 7)
+		nbytes = 7;
+	if (nbytes <= 0 || (nbytes + 1) > skb->len) {
+		netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n",
+			    __func__, session, nbytes, skb->len);
+		goto out_session_cancel;
+	}
+
+	tpdat = se_skb->data;
+	memcpy(&tpdat[offset], &dat[1], nbytes);
+	if (packet == session->pkt.rx)
+		session->pkt.rx++;
+
+	if (skcb->addr.type != J1939_ETP &&
+	    j1939_cb_is_broadcast(&session->skcb)) {
+		if (session->pkt.rx >= session->pkt.total)
+			final = true;
+	} else {
+		/* never final, an EOMA must follow */
+		if (session->pkt.rx >= session->pkt.last)
+			do_cts_eoma = true;
+	}
+
+	if (final) {
+		j1939_session_completed(session);
+	} else if (do_cts_eoma) {
+		j1939_tp_set_rxtimeout(session, 1250);
+		if (!session->transmission)
+			j1939_tp_schedule_txtimer(session, 0);
+	} else {
+		j1939_tp_set_rxtimeout(session, 250);
+	}
+	session->last_cmd = 0xff;
+	j1939_session_put(session);
+
+	return;
+
+ out_session_cancel:
+	j1939_session_timers_cancel(session);
+	j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+	j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+	j1939_session_put(session);
+}
+
+static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb;
+	struct j1939_session *session;
+
+	skcb = j1939_skb_to_cb(skb);
+
+	if (j1939_tp_im_transmitter(skcb)) {
+		session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+						    true);
+		if (!session)
+			netdev_info(priv->ndev, "%s: no tx connection found\n",
+				    __func__);
+		else
+			j1939_xtp_rx_dat_one(session, skb);
+	}
+
+	if (j1939_tp_im_receiver(skcb)) {
+		session = j1939_session_get_by_addr(priv, &skcb->addr, false,
+						    false);
+		if (!session)
+			netdev_info(priv->ndev, "%s: no rx connection found\n",
+				    __func__);
+		else
+			j1939_xtp_rx_dat_one(session, skb);
+	}
+}
+
+/* j1939 main intf */
+struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
+				    struct sk_buff *skb, size_t size)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	struct j1939_session *session;
+	int ret;
+
+	if (skcb->addr.pgn == J1939_TP_PGN_DAT ||
+	    skcb->addr.pgn == J1939_TP_PGN_CTL ||
+	    skcb->addr.pgn == J1939_ETP_PGN_DAT ||
+	    skcb->addr.pgn == J1939_ETP_PGN_CTL)
+		/* avoid conflict */
+		return ERR_PTR(-EDOM);
+
+	if (size > priv->tp_max_packet_size)
+		return ERR_PTR(-EMSGSIZE);
+
+	if (size <= 8)
+		skcb->addr.type = J1939_SIMPLE;
+	else if (size > J1939_MAX_TP_PACKET_SIZE)
+		skcb->addr.type = J1939_ETP;
+	else
+		skcb->addr.type = J1939_TP;
+
+	if (skcb->addr.type == J1939_ETP &&
+	    j1939_cb_is_broadcast(skcb))
+		return ERR_PTR(-EDESTADDRREQ);
+
+	/* fill in addresses from names */
+	ret = j1939_ac_fixup(priv, skb);
+	if (unlikely(ret))
+		return ERR_PTR(ret);
+
+	/* fix DST flags, it may be used there soon */
+	if (j1939_address_is_unicast(skcb->addr.da) &&
+	    priv->ents[skcb->addr.da].nusers)
+		skcb->flags |= J1939_ECU_LOCAL_DST;
+
+	/* src is always local, I'm sending ... */
+	skcb->flags |= J1939_ECU_LOCAL_SRC;
+
+	/* prepare new session */
+	session = j1939_session_new(priv, skb, size);
+	if (!session)
+		return ERR_PTR(-ENOMEM);
+
+	/* skb is recounted in j1939_session_new() */
+	session->sk = skb->sk;
+	session->transmission = true;
+	session->pkt.total = (size + 6) / 7;
+	session->pkt.block = skcb->addr.type == J1939_ETP ? 255 :
+		min(j1939_tp_block ?: 255, session->pkt.total);
+
+	if (j1939_cb_is_broadcast(&session->skcb))
+		/* set the end-packet for broadcast */
+		session->pkt.last = session->pkt.total;
+
+	skcb->tskey = session->sk->sk_tskey++;
+	session->tskey = skcb->tskey;
+
+	return session;
+}
+
+static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+	int extd = J1939_TP;
+	u8 cmd = skb->data[0];
+
+	switch (cmd) {
+	case J1939_ETP_CMD_RTS:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_BAM: /* fall through */
+	case J1939_TP_CMD_RTS: /* fall through */
+		if (skcb->addr.type != extd)
+			return;
+
+		if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) {
+			netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n",
+				     __func__, skcb->addr.sa);
+			return;
+		}
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_rts(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_rts(priv, skb, false);
+
+		break;
+
+	case J1939_ETP_CMD_CTS:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_CTS:
+		if (skcb->addr.type != extd)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_cts(priv, skb, false);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_cts(priv, skb, true);
+
+		break;
+
+	case J1939_ETP_CMD_DPO:
+		if (skcb->addr.type != J1939_ETP)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_dpo(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_dpo(priv, skb, false);
+
+		break;
+
+	case J1939_ETP_CMD_EOMA:
+		extd = J1939_ETP;
+		/* fall through */
+	case J1939_TP_CMD_EOMA:
+		if (skcb->addr.type != extd)
+			return;
+
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_eoma(priv, skb, false);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_eoma(priv, skb, true);
+
+		break;
+
+	case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */
+		if (j1939_tp_im_transmitter(skcb))
+			j1939_xtp_rx_abort(priv, skb, true);
+
+		if (j1939_tp_im_receiver(skcb))
+			j1939_xtp_rx_abort(priv, skb, false);
+
+		break;
+	default:
+		return;
+	}
+}
+
+int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+
+	if (!j1939_tp_im_involved_anydir(skcb))
+		return 0;
+
+	switch (skcb->addr.pgn) {
+	case J1939_ETP_PGN_DAT:
+		skcb->addr.type = J1939_ETP;
+		/* fall through */
+	case J1939_TP_PGN_DAT:
+		j1939_xtp_rx_dat(priv, skb);
+		break;
+
+	case J1939_ETP_PGN_CTL:
+		skcb->addr.type = J1939_ETP;
+		/* fall through */
+	case J1939_TP_PGN_CTL:
+		if (skb->len < 8)
+			return 0; /* Don't care. Nothing to extract here */
+
+		j1939_tp_cmd_recv(priv, skb);
+		break;
+	default:
+		return 0; /* no problem */
+	}
+	return 1; /* "I processed the message" */
+}
+
+void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb)
+{
+	struct j1939_session *session;
+
+	if (!skb->sk)
+		return;
+
+	j1939_session_list_lock(priv);
+	session = j1939_session_get_simple(priv, skb);
+	j1939_session_list_unlock(priv);
+	if (!session) {
+		netdev_warn(priv->ndev,
+			    "%s: Received already invalidated message\n",
+			    __func__);
+		return;
+	}
+
+	j1939_session_timers_cancel(session);
+	j1939_session_deactivate(session);
+	j1939_session_put(session);
+}
+
+int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
+{
+	struct j1939_session *session, *saved;
+
+	netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk);
+	j1939_session_list_lock(priv);
+	list_for_each_entry_safe(session, saved,
+				 &priv->active_session_list,
+				 active_session_list_entry) {
+		if (!sk || sk == session->sk) {
+			j1939_session_timers_cancel(session);
+			session->err = ESHUTDOWN;
+			j1939_session_deactivate_locked(session);
+		}
+	}
+	j1939_session_list_unlock(priv);
+	return NOTIFY_DONE;
+}
+
+void j1939_tp_init(struct j1939_priv *priv)
+{
+	spin_lock_init(&priv->active_session_list_lock);
+	INIT_LIST_HEAD(&priv->active_session_list);
+	priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE;
+}
-- 
cgit v1.2.3


From 74ded38a8e185b57a4362787815c353ac595013c Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Date: Sat, 31 Aug 2019 10:26:21 +0200
Subject: w1: add 1-wire master driver for IP block found in SGI ASICs

Starting with SGI Origin machines nearly every new SGI ASIC contains
an 1-Wire master. They are used for attaching One-Wire prom devices,
which contain information about part numbers, revision numbers,
serial number etc. and MAC addresses for ethernet interfaces.
This patch adds a master driver to support this IP block.
It also adds an extra field dev_id to struct w1_bus_master, which
could be in used in slave drivers for creating unique device names.

Signed-off-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
Link: https://lore.kernel.org/r/20190831082623.15627-2-tbogendoerfer@suse.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/w1/masters/Kconfig           |   9 +++
 drivers/w1/masters/Makefile          |   1 +
 drivers/w1/masters/sgi_w1.c          | 130 +++++++++++++++++++++++++++++++++++
 include/linux/platform_data/sgi-w1.h |  13 ++++
 include/linux/w1.h                   |   5 ++
 5 files changed, 158 insertions(+)
 create mode 100644 drivers/w1/masters/sgi_w1.c
 create mode 100644 include/linux/platform_data/sgi-w1.h

(limited to 'include')

diff --git a/drivers/w1/masters/Kconfig b/drivers/w1/masters/Kconfig
index 7ae260577901..24b9a8e05f64 100644
--- a/drivers/w1/masters/Kconfig
+++ b/drivers/w1/masters/Kconfig
@@ -65,5 +65,14 @@ config HDQ_MASTER_OMAP
 	  Say Y here if you want support for the 1-wire or HDQ Interface
 	  on an OMAP processor.
 
+config W1_MASTER_SGI
+	tristate "SGI ASIC driver"
+	help
+	  Say Y here if you want support for your 1-wire devices using
+	  SGI ASIC 1-Wire interface
+
+	  This support is also available as a module.  If so, the module
+	  will be called sgi_w1.
+
 endmenu
 
diff --git a/drivers/w1/masters/Makefile b/drivers/w1/masters/Makefile
index 18954cae4256..dae629b7ab49 100644
--- a/drivers/w1/masters/Makefile
+++ b/drivers/w1/masters/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_W1_MASTER_MXC)		+= mxc_w1.o
 obj-$(CONFIG_W1_MASTER_DS1WM)		+= ds1wm.o
 obj-$(CONFIG_W1_MASTER_GPIO)		+= w1-gpio.o
 obj-$(CONFIG_HDQ_MASTER_OMAP)		+= omap_hdq.o
+obj-$(CONFIG_W1_MASTER_SGI)		+= sgi_w1.o
diff --git a/drivers/w1/masters/sgi_w1.c b/drivers/w1/masters/sgi_w1.c
new file mode 100644
index 000000000000..1b2d96b945be
--- /dev/null
+++ b/drivers/w1/masters/sgi_w1.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sgi_w1.c - w1 master driver for one wire support in SGI ASICs
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/sgi-w1.h>
+
+#include <linux/w1.h>
+
+#define MCR_RD_DATA	BIT(0)
+#define MCR_DONE	BIT(1)
+
+#define MCR_PACK(pulse, sample) (((pulse) << 10) | ((sample) << 2))
+
+struct sgi_w1_device {
+	u32 __iomem *mcr;
+	struct w1_bus_master bus_master;
+	char dev_id[64];
+};
+
+static u8 sgi_w1_wait(u32 __iomem *mcr)
+{
+	u32 mcr_val;
+
+	do {
+		mcr_val = readl(mcr);
+	} while (!(mcr_val & MCR_DONE));
+
+	return (mcr_val & MCR_RD_DATA) ? 1 : 0;
+}
+
+/*
+ * this is the low level routine to
+ * reset the device on the One Wire interface
+ * on the hardware
+ */
+static u8 sgi_w1_reset_bus(void *data)
+{
+	struct sgi_w1_device *dev = data;
+	u8 ret;
+
+	writel(MCR_PACK(520, 65), dev->mcr);
+	ret = sgi_w1_wait(dev->mcr);
+	udelay(500); /* recovery time */
+	return ret;
+}
+
+/*
+ * this is the low level routine to read/write a bit on the One Wire
+ * interface on the hardware. It does write 0 if parameter bit is set
+ * to 0, otherwise a write 1/read.
+ */
+static u8 sgi_w1_touch_bit(void *data, u8 bit)
+{
+	struct sgi_w1_device *dev = data;
+	u8 ret;
+
+	if (bit)
+		writel(MCR_PACK(6, 13), dev->mcr);
+	else
+		writel(MCR_PACK(80, 30), dev->mcr);
+
+	ret = sgi_w1_wait(dev->mcr);
+	if (bit)
+		udelay(100); /* recovery */
+	return ret;
+}
+
+static int sgi_w1_probe(struct platform_device *pdev)
+{
+	struct sgi_w1_device *sdev;
+	struct sgi_w1_platform_data *pdata;
+	struct resource *res;
+
+	sdev = devm_kzalloc(&pdev->dev, sizeof(struct sgi_w1_device),
+			    GFP_KERNEL);
+	if (!sdev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sdev->mcr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sdev->mcr))
+		return PTR_ERR(sdev->mcr);
+
+	sdev->bus_master.data = sdev;
+	sdev->bus_master.reset_bus = sgi_w1_reset_bus;
+	sdev->bus_master.touch_bit = sgi_w1_touch_bit;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (pdata) {
+		strlcpy(sdev->dev_id, pdata->dev_id, sizeof(sdev->dev_id));
+		sdev->bus_master.dev_id = sdev->dev_id;
+	}
+
+	platform_set_drvdata(pdev, sdev);
+
+	return w1_add_master_device(&sdev->bus_master);
+}
+
+/*
+ * disassociate the w1 device from the driver
+ */
+static int sgi_w1_remove(struct platform_device *pdev)
+{
+	struct sgi_w1_device *sdev = platform_get_drvdata(pdev);
+
+	w1_remove_master_device(&sdev->bus_master);
+
+	return 0;
+}
+
+static struct platform_driver sgi_w1_driver = {
+	.driver = {
+		.name = "sgi_w1",
+	},
+	.probe = sgi_w1_probe,
+	.remove = sgi_w1_remove,
+};
+module_platform_driver(sgi_w1_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Thomas Bogendoerfer");
+MODULE_DESCRIPTION("Driver for One-Wire IP in SGI ASICs");
diff --git a/include/linux/platform_data/sgi-w1.h b/include/linux/platform_data/sgi-w1.h
new file mode 100644
index 000000000000..e28c8a90ff84
--- /dev/null
+++ b/include/linux/platform_data/sgi-w1.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * SGI One-Wire (W1) IP
+ */
+
+#ifndef PLATFORM_DATA_SGI_W1_H
+#define PLATFORM_DATA_SGI_W1_H
+
+struct sgi_w1_platform_data {
+	char dev_id[64];
+};
+
+#endif /* PLATFORM_DATA_SGI_W1_H */
diff --git a/include/linux/w1.h b/include/linux/w1.h
index e0b5156f78fd..7da0c7588e04 100644
--- a/include/linux/w1.h
+++ b/include/linux/w1.h
@@ -118,6 +118,9 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64);
  * w1_master* is passed to the slave found callback.
  * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH
  *
+ * @dev_id: Optional device id string, which w1 slaves could use for
+ * creating names, which then give a connection to the w1 master
+ *
  * Note: read_bit and write_bit are very low level functions and should only
  * be used with hardware that doesn't really support 1-wire operations,
  * like a parallel/serial port.
@@ -150,6 +153,8 @@ struct w1_bus_master {
 
 	void		(*search)(void *, struct w1_master *,
 		u8, w1_slave_found_callback);
+
+	char		*dev_id;
 };
 
 /**
-- 
cgit v1.2.3


From 305e503b14a879e619974b8e82236d5f32d0921e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 22:27:35 +0200
Subject: spi: remove w90x900 driver

The ARM w90x900 platform is getting removed, so this driver is obsolete.

Link: https://lore.kernel.org/r/20190809202749.742267-8-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/spi/Kconfig                      |   7 -
 drivers/spi/Makefile                     |   1 -
 drivers/spi/spi-nuc900.c                 | 429 -------------------------------
 include/Kbuild                           |   1 -
 include/linux/platform_data/spi-nuc900.h |  29 ---
 5 files changed, 467 deletions(-)
 delete mode 100644 drivers/spi/spi-nuc900.c
 delete mode 100644 include/linux/platform_data/spi-nuc900.h

(limited to 'include')

diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 3a1d8f1170de..949b18ed9d6b 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -440,13 +440,6 @@ config SPI_NPCM_PSPI
 	  This driver provides support for Nuvoton NPCM BMC
 	  Peripheral SPI controller in master mode.
 
-config SPI_NUC900
-	tristate "Nuvoton NUC900 series SPI"
-	depends on ARCH_W90X900
-	select SPI_BITBANG
-	help
-	  SPI driver for Nuvoton NUC900 series ARM SoCs
-
 config SPI_LANTIQ_SSC
 	tristate "Lantiq SSC SPI controller"
 	depends on LANTIQ || COMPILE_TEST
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 63dcab552bcb..b1131809a294 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -64,7 +64,6 @@ obj-$(CONFIG_SPI_MT7621)		+= spi-mt7621.o
 obj-$(CONFIG_SPI_MXIC)			+= spi-mxic.o
 obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
 obj-$(CONFIG_SPI_NPCM_PSPI)		+= spi-npcm-pspi.o
-obj-$(CONFIG_SPI_NUC900)		+= spi-nuc900.o
 obj-$(CONFIG_SPI_NXP_FLEXSPI)		+= spi-nxp-fspi.o
 obj-$(CONFIG_SPI_OC_TINY)		+= spi-oc-tiny.o
 spi-octeon-objs				:= spi-cavium.o spi-cavium-octeon.o
diff --git a/drivers/spi/spi-nuc900.c b/drivers/spi/spi-nuc900.c
deleted file mode 100644
index 37e2034ad4d5..000000000000
--- a/drivers/spi/spi-nuc900.c
+++ /dev/null
@@ -1,429 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2009 Nuvoton technology.
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-
-#include <linux/spi/spi.h>
-#include <linux/spi/spi_bitbang.h>
-
-#include <linux/platform_data/spi-nuc900.h>
-
-/* usi registers offset */
-#define USI_CNT		0x00
-#define USI_DIV		0x04
-#define USI_SSR		0x08
-#define USI_RX0		0x10
-#define USI_TX0		0x10
-
-/* usi register bit */
-#define ENINT		(0x01 << 17)
-#define ENFLG		(0x01 << 16)
-#define SLEEP		(0x0f << 12)
-#define TXNUM		(0x03 << 8)
-#define TXBITLEN	(0x1f << 3)
-#define TXNEG		(0x01 << 2)
-#define RXNEG		(0x01 << 1)
-#define LSB		(0x01 << 10)
-#define SELECTLEV	(0x01 << 2)
-#define SELECTPOL	(0x01 << 31)
-#define SELECTSLAVE	0x01
-#define GOBUSY		0x01
-
-struct nuc900_spi {
-	struct spi_bitbang	 bitbang;
-	struct completion	 done;
-	void __iomem		*regs;
-	int			 irq;
-	int			 len;
-	int			 count;
-	const unsigned char	*tx;
-	unsigned char		*rx;
-	struct clk		*clk;
-	struct spi_master	*master;
-	struct nuc900_spi_info *pdata;
-	spinlock_t		lock;
-};
-
-static inline struct nuc900_spi *to_hw(struct spi_device *sdev)
-{
-	return spi_master_get_devdata(sdev->master);
-}
-
-static void nuc900_slave_select(struct spi_device *spi, unsigned int ssr)
-{
-	struct nuc900_spi *hw = to_hw(spi);
-	unsigned int val;
-	unsigned int cs = spi->mode & SPI_CS_HIGH ? 1 : 0;
-	unsigned int cpol = spi->mode & SPI_CPOL ? 1 : 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_SSR);
-
-	if (!cs)
-		val &= ~SELECTLEV;
-	else
-		val |= SELECTLEV;
-
-	if (!ssr)
-		val &= ~SELECTSLAVE;
-	else
-		val |= SELECTSLAVE;
-
-	__raw_writel(val, hw->regs + USI_SSR);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (!cpol)
-		val &= ~SELECTPOL;
-	else
-		val |= SELECTPOL;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_spi_chipsel(struct spi_device *spi, int value)
-{
-	switch (value) {
-	case BITBANG_CS_INACTIVE:
-		nuc900_slave_select(spi, 0);
-		break;
-
-	case BITBANG_CS_ACTIVE:
-		nuc900_slave_select(spi, 1);
-		break;
-	}
-}
-
-static void nuc900_spi_setup_txnum(struct nuc900_spi *hw, unsigned int txnum)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~TXNUM;
-
-	if (txnum)
-		val |= txnum << 0x08;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-
-}
-
-static void nuc900_spi_setup_txbitlen(struct nuc900_spi *hw,
-							unsigned int txbitlen)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~TXBITLEN;
-
-	val |= (txbitlen << 0x03);
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_spi_gobusy(struct nuc900_spi *hw)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	val |= GOBUSY;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static inline unsigned int hw_txbyte(struct nuc900_spi *hw, int count)
-{
-	return hw->tx ? hw->tx[count] : 0;
-}
-
-static int nuc900_spi_txrx(struct spi_device *spi, struct spi_transfer *t)
-{
-	struct nuc900_spi *hw = to_hw(spi);
-
-	hw->tx = t->tx_buf;
-	hw->rx = t->rx_buf;
-	hw->len = t->len;
-	hw->count = 0;
-
-	__raw_writel(hw_txbyte(hw, 0x0), hw->regs + USI_TX0);
-
-	nuc900_spi_gobusy(hw);
-
-	wait_for_completion(&hw->done);
-
-	return hw->count;
-}
-
-static irqreturn_t nuc900_spi_irq(int irq, void *dev)
-{
-	struct nuc900_spi *hw = dev;
-	unsigned int status;
-	unsigned int count = hw->count;
-
-	status = __raw_readl(hw->regs + USI_CNT);
-	__raw_writel(status, hw->regs + USI_CNT);
-
-	if (status & ENFLG) {
-		hw->count++;
-
-		if (hw->rx)
-			hw->rx[count] = __raw_readl(hw->regs + USI_RX0);
-		count++;
-
-		if (count < hw->len) {
-			__raw_writel(hw_txbyte(hw, count), hw->regs + USI_TX0);
-			nuc900_spi_gobusy(hw);
-		} else {
-			complete(&hw->done);
-		}
-
-		return IRQ_HANDLED;
-	}
-
-	complete(&hw->done);
-	return IRQ_HANDLED;
-}
-
-static void nuc900_tx_edge(struct nuc900_spi *hw, unsigned int edge)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (edge)
-		val |= TXNEG;
-	else
-		val &= ~TXNEG;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_rx_edge(struct nuc900_spi *hw, unsigned int edge)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (edge)
-		val |= RXNEG;
-	else
-		val &= ~RXNEG;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_send_first(struct nuc900_spi *hw, unsigned int lsb)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	if (lsb)
-		val |= LSB;
-	else
-		val &= ~LSB;
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_set_sleep(struct nuc900_spi *hw, unsigned int sleep)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT) & ~SLEEP;
-
-	if (sleep)
-		val |= (sleep << 12);
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_enable_int(struct nuc900_spi *hw)
-{
-	unsigned int val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&hw->lock, flags);
-
-	val = __raw_readl(hw->regs + USI_CNT);
-
-	val |= ENINT;
-
-	__raw_writel(val, hw->regs + USI_CNT);
-
-	spin_unlock_irqrestore(&hw->lock, flags);
-}
-
-static void nuc900_set_divider(struct nuc900_spi *hw)
-{
-	__raw_writel(hw->pdata->divider, hw->regs + USI_DIV);
-}
-
-static void nuc900_init_spi(struct nuc900_spi *hw)
-{
-	clk_enable(hw->clk);
-	spin_lock_init(&hw->lock);
-
-	nuc900_tx_edge(hw, hw->pdata->txneg);
-	nuc900_rx_edge(hw, hw->pdata->rxneg);
-	nuc900_send_first(hw, hw->pdata->lsb);
-	nuc900_set_sleep(hw, hw->pdata->sleep);
-	nuc900_spi_setup_txbitlen(hw, hw->pdata->txbitlen);
-	nuc900_spi_setup_txnum(hw, hw->pdata->txnum);
-	nuc900_set_divider(hw);
-	nuc900_enable_int(hw);
-}
-
-static int nuc900_spi_probe(struct platform_device *pdev)
-{
-	struct nuc900_spi *hw;
-	struct spi_master *master;
-	struct resource *res;
-	int err = 0;
-
-	master = spi_alloc_master(&pdev->dev, sizeof(struct nuc900_spi));
-	if (master == NULL) {
-		dev_err(&pdev->dev, "No memory for spi_master\n");
-		return -ENOMEM;
-	}
-
-	hw = spi_master_get_devdata(master);
-	hw->master = master;
-	hw->pdata  = dev_get_platdata(&pdev->dev);
-
-	if (hw->pdata == NULL) {
-		dev_err(&pdev->dev, "No platform data supplied\n");
-		err = -ENOENT;
-		goto err_pdata;
-	}
-
-	platform_set_drvdata(pdev, hw);
-	init_completion(&hw->done);
-
-	master->mode_bits          = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	if (hw->pdata->lsb)
-		master->mode_bits |= SPI_LSB_FIRST;
-	master->num_chipselect     = hw->pdata->num_cs;
-	master->bus_num            = hw->pdata->bus_num;
-	hw->bitbang.master         = hw->master;
-	hw->bitbang.chipselect     = nuc900_spi_chipsel;
-	hw->bitbang.txrx_bufs      = nuc900_spi_txrx;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hw->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(hw->regs)) {
-		err = PTR_ERR(hw->regs);
-		goto err_pdata;
-	}
-
-	hw->irq = platform_get_irq(pdev, 0);
-	if (hw->irq < 0) {
-		dev_err(&pdev->dev, "No IRQ specified\n");
-		err = -ENOENT;
-		goto err_pdata;
-	}
-
-	err = devm_request_irq(&pdev->dev, hw->irq, nuc900_spi_irq, 0,
-				pdev->name, hw);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot claim IRQ\n");
-		goto err_pdata;
-	}
-
-	hw->clk = devm_clk_get(&pdev->dev, "spi");
-	if (IS_ERR(hw->clk)) {
-		dev_err(&pdev->dev, "No clock for device\n");
-		err = PTR_ERR(hw->clk);
-		goto err_pdata;
-	}
-
-	mfp_set_groupg(&pdev->dev, NULL);
-	nuc900_init_spi(hw);
-
-	err = spi_bitbang_start(&hw->bitbang);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to register SPI master\n");
-		goto err_register;
-	}
-
-	return 0;
-
-err_register:
-	clk_disable(hw->clk);
-err_pdata:
-	spi_master_put(hw->master);
-	return err;
-}
-
-static int nuc900_spi_remove(struct platform_device *dev)
-{
-	struct nuc900_spi *hw = platform_get_drvdata(dev);
-
-	spi_bitbang_stop(&hw->bitbang);
-	clk_disable(hw->clk);
-	spi_master_put(hw->master);
-	return 0;
-}
-
-static struct platform_driver nuc900_spi_driver = {
-	.probe		= nuc900_spi_probe,
-	.remove		= nuc900_spi_remove,
-	.driver		= {
-		.name	= "nuc900-spi",
-	},
-};
-module_platform_driver(nuc900_spi_driver);
-
-MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
-MODULE_DESCRIPTION("nuc900 spi driver!");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:nuc900-spi");
diff --git a/include/Kbuild b/include/Kbuild
index c38f0d46b267..5e0642d79dce 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -550,7 +550,6 @@ header-test-			+= linux/platform_data/sky81452-backlight.h
 header-test-			+= linux/platform_data/spi-davinci.h
 header-test-			+= linux/platform_data/spi-ep93xx.h
 header-test-			+= linux/platform_data/spi-mt65xx.h
-header-test-			+= linux/platform_data/spi-nuc900.h
 header-test-			+= linux/platform_data/st_sensors_pdata.h
 header-test-			+= linux/platform_data/ti-sysc.h
 header-test-			+= linux/platform_data/timer-ixp4xx.h
diff --git a/include/linux/platform_data/spi-nuc900.h b/include/linux/platform_data/spi-nuc900.h
deleted file mode 100644
index ca3510877000..000000000000
--- a/include/linux/platform_data/spi-nuc900.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2009 Nuvoton technology corporation.
- *
- * Wan ZongShun <mcuos.com@gmail.com>
- */
-
-#ifndef __SPI_NUC900_H
-#define __SPI_NUC900_H
-
-extern void mfp_set_groupg(struct device *dev, const char *subname);
-
-struct nuc900_spi_info {
-	unsigned int num_cs;
-	unsigned int lsb;
-	unsigned int txneg;
-	unsigned int rxneg;
-	unsigned int divider;
-	unsigned int sleep;
-	unsigned int txnum;
-	unsigned int txbitlen;
-	int bus_num;
-};
-
-struct nuc900_spi_chip {
-	unsigned char bits_per_word;
-};
-
-#endif /* __SPI_NUC900_H */
-- 
cgit v1.2.3


From c7b46e0c33c594623a279db4e1725d7ae477280f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 9 Aug 2019 22:27:37 +0200
Subject: fbdev: remove w90x900/nuc900 platform drivers

The ARM w90x900 platform is getting removed, so this driver is obsolete.

Link: https://lore.kernel.org/r/20190809202749.742267-10-arnd@arndb.de
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/video/fbdev/Kconfig                  |  14 -
 drivers/video/fbdev/Makefile                 |   1 -
 drivers/video/fbdev/nuc900fb.c               | 760 ---------------------------
 drivers/video/fbdev/nuc900fb.h               |  51 --
 include/Kbuild                               |   1 -
 include/linux/platform_data/video-nuc900fb.h |  79 ---
 6 files changed, 906 deletions(-)
 delete mode 100644 drivers/video/fbdev/nuc900fb.c
 delete mode 100644 drivers/video/fbdev/nuc900fb.h
 delete mode 100644 include/linux/platform_data/video-nuc900fb.h

(limited to 'include')

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 6b2de93bd302..5f83cd715387 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -1924,20 +1924,6 @@ config FB_S3C2410_DEBUG
 	  Turn on debugging messages. Note that you can set/unset at run time
 	  through sysfs
 
-config FB_NUC900
-	tristate "NUC900 LCD framebuffer support"
-	depends on FB && ARCH_W90X900
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	---help---
-	  Frame buffer driver for the built-in LCD controller in the Nuvoton
-	  NUC900 processor
-
-config GPM1040A0_320X240
-	bool "Giantplus Technology GPM1040A0 320x240 Color TFT LCD"
-	depends on FB_NUC900
-
 config FB_SM501
 	tristate "Silicon Motion SM501 framebuffer support"
 	depends on FB && MFD_SM501
diff --git a/drivers/video/fbdev/Makefile b/drivers/video/fbdev/Makefile
index 7dc4861a93e6..aab7155884ea 100644
--- a/drivers/video/fbdev/Makefile
+++ b/drivers/video/fbdev/Makefile
@@ -116,7 +116,6 @@ obj-y                             += omap2/
 obj-$(CONFIG_XEN_FBDEV_FRONTEND)  += xen-fbfront.o
 obj-$(CONFIG_FB_CARMINE)          += carminefb.o
 obj-$(CONFIG_FB_MB862XX)	  += mb862xx/
-obj-$(CONFIG_FB_NUC900)           += nuc900fb.o
 obj-$(CONFIG_FB_JZ4740)		  += jz4740_fb.o
 obj-$(CONFIG_FB_PUV3_UNIGFX)      += fb-puv3.o
 obj-$(CONFIG_FB_HYPERV)		  += hyperv_fb.o
diff --git a/drivers/video/fbdev/nuc900fb.c b/drivers/video/fbdev/nuc900fb.c
deleted file mode 100644
index 4fd851598584..000000000000
--- a/drivers/video/fbdev/nuc900fb.c
+++ /dev/null
@@ -1,760 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *
- * Copyright (c) 2009 Nuvoton technology corporation
- * All rights reserved.
- *
- *  Description:
- *    Nuvoton LCD Controller Driver
- *  Author:
- *    Wang Qiang (rurality.linux@gmail.com) 2009/12/11
- */
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/tty.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
-#include <linux/fb.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/wait.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/io.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-
-#include <mach/map.h>
-#include <mach/regs-clock.h>
-#include <mach/regs-ldm.h>
-#include <linux/platform_data/video-nuc900fb.h>
-
-#include "nuc900fb.h"
-
-
-/*
- *  Initialize the nuc900 video (dual) buffer address
- */
-static void nuc900fb_set_lcdaddr(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	void __iomem *regs = fbi->io;
-	unsigned long vbaddr1, vbaddr2;
-
-	vbaddr1  = info->fix.smem_start;
-	vbaddr2  = info->fix.smem_start;
-	vbaddr2 += info->fix.line_length * info->var.yres;
-
-	/* set frambuffer start phy addr*/
-	writel(vbaddr1, regs + REG_LCM_VA_BADDR0);
-	writel(vbaddr2, regs + REG_LCM_VA_BADDR1);
-
-	writel(fbi->regs.lcd_va_fbctrl, regs + REG_LCM_VA_FBCTRL);
-	writel(fbi->regs.lcd_va_scale, regs + REG_LCM_VA_SCALE);
-}
-
-/*
- *	calculate divider for lcd div
- */
-static unsigned int nuc900fb_calc_pixclk(struct nuc900fb_info *fbi,
-					 unsigned long pixclk)
-{
-	unsigned long clk = fbi->clk_rate;
-	unsigned long long div;
-
-	/* pixclk is in picseconds. our clock is in Hz*/
-	/* div = (clk * pixclk)/10^12 */
-	div = (unsigned long long)clk * pixclk;
-	div >>= 12;
-	do_div(div, 625 * 625UL * 625);
-
-	dev_dbg(fbi->dev, "pixclk %ld, divisor is %lld\n", pixclk, div);
-
-	return div;
-}
-
-/*
- *	Check the video params of 'var'.
- */
-static int nuc900fb_check_var(struct fb_var_screeninfo *var,
-			       struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	struct nuc900fb_mach_info *mach_info = dev_get_platdata(fbi->dev);
-	struct nuc900fb_display *display = NULL;
-	struct nuc900fb_display *default_display = mach_info->displays +
-						   mach_info->default_display;
-	int i;
-
-	dev_dbg(fbi->dev, "check_var(var=%p, info=%p)\n", var, info);
-
-	/* validate x/y resolution */
-	/* choose default mode if possible */
-	if (var->xres == default_display->xres &&
-	    var->yres == default_display->yres &&
-	    var->bits_per_pixel == default_display->bpp)
-		display = default_display;
-	else
-		for (i = 0; i < mach_info->num_displays; i++)
-			if (var->xres == mach_info->displays[i].xres &&
-			    var->yres == mach_info->displays[i].yres &&
-			    var->bits_per_pixel == mach_info->displays[i].bpp) {
-				display = mach_info->displays + i;
-				break;
-			}
-
-	if (display == NULL) {
-		printk(KERN_ERR "wrong resolution or depth %dx%d at %d bit per pixel\n",
-			var->xres, var->yres, var->bits_per_pixel);
-		return -EINVAL;
-	}
-
-	/* it should be the same size as the display */
-	var->xres_virtual	= display->xres;
-	var->yres_virtual	= display->yres;
-	var->height		= display->height;
-	var->width		= display->width;
-
-	/* copy lcd settings */
-	var->pixclock		= display->pixclock;
-	var->left_margin	= display->left_margin;
-	var->right_margin	= display->right_margin;
-	var->upper_margin	= display->upper_margin;
-	var->lower_margin	= display->lower_margin;
-	var->vsync_len		= display->vsync_len;
-	var->hsync_len		= display->hsync_len;
-
-	var->transp.offset	= 0;
-	var->transp.length	= 0;
-
-	fbi->regs.lcd_dccs = display->dccs;
-	fbi->regs.lcd_device_ctrl = display->devctl;
-	fbi->regs.lcd_va_fbctrl = display->fbctrl;
-	fbi->regs.lcd_va_scale = display->scale;
-
-	/* set R/G/B possions */
-	switch (var->bits_per_pixel) {
-	case 1:
-	case 2:
-	case 4:
-	case 8:
-	default:
-		var->red.offset 	= 0;
-		var->red.length 	= var->bits_per_pixel;
-		var->green 		= var->red;
-		var->blue		= var->red;
-		break;
-	case 12:
-		var->red.length		= 4;
-		var->green.length	= 4;
-		var->blue.length	= 4;
-		var->red.offset		= 8;
-		var->green.offset	= 4;
-		var->blue.offset	= 0;
-		break;
-	case 16:
-		var->red.length		= 5;
-		var->green.length	= 6;
-		var->blue.length	= 5;
-		var->red.offset		= 11;
-		var->green.offset	= 5;
-		var->blue.offset	= 0;
-		break;
-	case 18:
-		var->red.length		= 6;
-		var->green.length	= 6;
-		var->blue.length	= 6;
-		var->red.offset		= 12;
-		var->green.offset	= 6;
-		var->blue.offset	= 0;
-		break;
-	case 32:
-		var->red.length		= 8;
-		var->green.length	= 8;
-		var->blue.length	= 8;
-		var->red.offset		= 16;
-		var->green.offset	= 8;
-		var->blue.offset	= 0;
-		break;
-	}
-
-	return 0;
-}
-
-/*
- *	Calculate lcd register values from var setting & save into hw
- */
-static void nuc900fb_calculate_lcd_regs(const struct fb_info *info,
-					struct nuc900fb_hw *regs)
-{
-	const struct fb_var_screeninfo *var = &info->var;
-	int vtt = var->height + var->upper_margin + var->lower_margin;
-	int htt = var->width + var->left_margin + var->right_margin;
-	int hsync = var->width + var->right_margin;
-	int vsync = var->height + var->lower_margin;
-
-	regs->lcd_crtc_size = LCM_CRTC_SIZE_VTTVAL(vtt) |
-			      LCM_CRTC_SIZE_HTTVAL(htt);
-	regs->lcd_crtc_dend = LCM_CRTC_DEND_VDENDVAL(var->height) |
-			      LCM_CRTC_DEND_HDENDVAL(var->width);
-	regs->lcd_crtc_hr = LCM_CRTC_HR_EVAL(var->width + 5) |
-			    LCM_CRTC_HR_SVAL(var->width + 1);
-	regs->lcd_crtc_hsync = LCM_CRTC_HSYNC_EVAL(hsync + var->hsync_len) |
-			       LCM_CRTC_HSYNC_SVAL(hsync);
-	regs->lcd_crtc_vr = LCM_CRTC_VR_EVAL(vsync + var->vsync_len) |
-			    LCM_CRTC_VR_SVAL(vsync);
-
-}
-
-/*
- *	Activate (set) the controller from the given framebuffer
- *	information
- */
-static void nuc900fb_activate_var(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	void __iomem *regs = fbi->io;
-	struct fb_var_screeninfo *var = &info->var;
-	int clkdiv;
-
-	clkdiv = nuc900fb_calc_pixclk(fbi, var->pixclock) - 1;
-	if (clkdiv < 0)
-		clkdiv = 0;
-
-	nuc900fb_calculate_lcd_regs(info, &fbi->regs);
-
-	/* set the new lcd registers*/
-
-	dev_dbg(fbi->dev, "new lcd register set:\n");
-	dev_dbg(fbi->dev, "dccs       = 0x%08x\n", fbi->regs.lcd_dccs);
-	dev_dbg(fbi->dev, "dev_ctl    = 0x%08x\n", fbi->regs.lcd_device_ctrl);
-	dev_dbg(fbi->dev, "crtc_size  = 0x%08x\n", fbi->regs.lcd_crtc_size);
-	dev_dbg(fbi->dev, "crtc_dend  = 0x%08x\n", fbi->regs.lcd_crtc_dend);
-	dev_dbg(fbi->dev, "crtc_hr    = 0x%08x\n", fbi->regs.lcd_crtc_hr);
-	dev_dbg(fbi->dev, "crtc_hsync = 0x%08x\n", fbi->regs.lcd_crtc_hsync);
-	dev_dbg(fbi->dev, "crtc_vr    = 0x%08x\n", fbi->regs.lcd_crtc_vr);
-
-	writel(fbi->regs.lcd_device_ctrl, regs + REG_LCM_DEV_CTRL);
-	writel(fbi->regs.lcd_crtc_size, regs + REG_LCM_CRTC_SIZE);
-	writel(fbi->regs.lcd_crtc_dend, regs + REG_LCM_CRTC_DEND);
-	writel(fbi->regs.lcd_crtc_hr, regs + REG_LCM_CRTC_HR);
-	writel(fbi->regs.lcd_crtc_hsync, regs + REG_LCM_CRTC_HSYNC);
-	writel(fbi->regs.lcd_crtc_vr, regs + REG_LCM_CRTC_VR);
-
-	/* set lcd address pointers */
-	nuc900fb_set_lcdaddr(info);
-
-	writel(fbi->regs.lcd_dccs, regs + REG_LCM_DCCS);
-}
-
-/*
- *      Alters the hardware state.
- *
- */
-static int nuc900fb_set_par(struct fb_info *info)
-{
-	struct fb_var_screeninfo *var = &info->var;
-
-	switch (var->bits_per_pixel) {
-	case 32:
-	case 24:
-	case 18:
-	case 16:
-	case 12:
-		info->fix.visual = FB_VISUAL_TRUECOLOR;
-		break;
-	case 1:
-		info->fix.visual = FB_VISUAL_MONO01;
-		break;
-	default:
-		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
-		break;
-	}
-
-	info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8;
-
-	/* activate this new configuration */
-	nuc900fb_activate_var(info);
-	return 0;
-}
-
-static inline unsigned int chan_to_field(unsigned int chan,
-					 struct fb_bitfield *bf)
-{
-	chan &= 0xffff;
-	chan >>= 16 - bf->length;
-	return chan << bf->offset;
-}
-
-static int nuc900fb_setcolreg(unsigned regno,
-			       unsigned red, unsigned green, unsigned blue,
-			       unsigned transp, struct fb_info *info)
-{
-	unsigned int val;
-
-	switch (info->fix.visual) {
-	case FB_VISUAL_TRUECOLOR:
-		/* true-colour, use pseuo-palette */
-		if (regno < 16) {
-			u32 *pal = info->pseudo_palette;
-
-			val  = chan_to_field(red, &info->var.red);
-			val |= chan_to_field(green, &info->var.green);
-			val |= chan_to_field(blue, &info->var.blue);
-			pal[regno] = val;
-		}
-		break;
-
-	default:
-		return 1;   /* unknown type */
-	}
-	return 0;
-}
-
-/**
- *      nuc900fb_blank
- *
- */
-static int nuc900fb_blank(int blank_mode, struct fb_info *info)
-{
-
-	return 0;
-}
-
-static struct fb_ops nuc900fb_ops = {
-	.owner			= THIS_MODULE,
-	.fb_check_var		= nuc900fb_check_var,
-	.fb_set_par		= nuc900fb_set_par,
-	.fb_blank		= nuc900fb_blank,
-	.fb_setcolreg		= nuc900fb_setcolreg,
-	.fb_fillrect		= cfb_fillrect,
-	.fb_copyarea		= cfb_copyarea,
-	.fb_imageblit		= cfb_imageblit,
-};
-
-
-static inline void modify_gpio(void __iomem *reg,
-			       unsigned long set, unsigned long mask)
-{
-	unsigned long tmp;
-	tmp = readl(reg) & ~mask;
-	writel(tmp | set, reg);
-}
-
-/*
- * Initialise LCD-related registers
- */
-static int nuc900fb_init_registers(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	struct nuc900fb_mach_info *mach_info = dev_get_platdata(fbi->dev);
-	void __iomem *regs = fbi->io;
-
-	/*reset the display engine*/
-	writel(0, regs + REG_LCM_DCCS);
-	writel(readl(regs + REG_LCM_DCCS) | LCM_DCCS_ENG_RST,
-	       regs + REG_LCM_DCCS);
-	ndelay(100);
-	writel(readl(regs + REG_LCM_DCCS) & (~LCM_DCCS_ENG_RST),
-	       regs + REG_LCM_DCCS);
-	ndelay(100);
-
-	writel(0, regs + REG_LCM_DEV_CTRL);
-
-	/* config gpio output */
-	modify_gpio(W90X900_VA_GPIO + 0x54, mach_info->gpio_dir,
-		    mach_info->gpio_dir_mask);
-	modify_gpio(W90X900_VA_GPIO + 0x58, mach_info->gpio_data,
-		    mach_info->gpio_data_mask);
-
-	return 0;
-}
-
-
-/*
- *    Alloc the SDRAM region of NUC900 for the frame buffer.
- *    The buffer should be a non-cached, non-buffered, memory region
- *    to allow palette and pixel writes without flushing the cache.
- */
-static int nuc900fb_map_video_memory(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	dma_addr_t map_dma;
-	unsigned long map_size = PAGE_ALIGN(info->fix.smem_len);
-
-	dev_dbg(fbi->dev, "nuc900fb_map_video_memory(fbi=%p) map_size %lu\n",
-		fbi, map_size);
-
-	info->screen_base = dma_alloc_wc(fbi->dev, map_size, &map_dma,
-					 GFP_KERNEL);
-
-	if (!info->screen_base)
-		return -ENOMEM;
-
-	memset(info->screen_base, 0x00, map_size);
-	info->fix.smem_start = map_dma;
-
-	return 0;
-}
-
-static inline void nuc900fb_unmap_video_memory(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len),
-		    info->screen_base, info->fix.smem_start);
-}
-
-static irqreturn_t nuc900fb_irqhandler(int irq, void *dev_id)
-{
-	struct nuc900fb_info *fbi = dev_id;
-	void __iomem *regs = fbi->io;
-	void __iomem *irq_base = fbi->irq_base;
-	unsigned long lcdirq = readl(regs + REG_LCM_INT_CS);
-
-	if (lcdirq & LCM_INT_CS_DISP_F_STATUS) {
-		writel(readl(irq_base) | 1<<30, irq_base);
-
-		/* wait VA_EN low */
-		if ((readl(regs + REG_LCM_DCCS) &
-		    LCM_DCCS_SINGLE) == LCM_DCCS_SINGLE)
-			while ((readl(regs + REG_LCM_DCCS) &
-			       LCM_DCCS_VA_EN) == LCM_DCCS_VA_EN)
-				;
-		/* display_out-enable */
-		writel(readl(regs + REG_LCM_DCCS) | LCM_DCCS_DISP_OUT_EN,
-			regs + REG_LCM_DCCS);
-		/* va-enable*/
-		writel(readl(regs + REG_LCM_DCCS) | LCM_DCCS_VA_EN,
-			regs + REG_LCM_DCCS);
-	} else if (lcdirq & LCM_INT_CS_UNDERRUN_INT) {
-		writel(readl(irq_base) | LCM_INT_CS_UNDERRUN_INT, irq_base);
-	} else if (lcdirq & LCM_INT_CS_BUS_ERROR_INT) {
-		writel(readl(irq_base) | LCM_INT_CS_BUS_ERROR_INT, irq_base);
-	}
-
-	return IRQ_HANDLED;
-}
-
-#ifdef CONFIG_CPU_FREQ
-
-static int nuc900fb_cpufreq_transition(struct notifier_block *nb,
-				       unsigned long val, void *data)
-{
-	struct nuc900fb_info *info;
-	struct fb_info *fbinfo;
-	long delta_f;
-	info = container_of(nb, struct nuc900fb_info, freq_transition);
-	fbinfo = dev_get_drvdata(info->dev);
-
-	delta_f = info->clk_rate - clk_get_rate(info->clk);
-
-	if ((val == CPUFREQ_POSTCHANGE && delta_f > 0) ||
-	   (val == CPUFREQ_PRECHANGE && delta_f < 0)) {
-		info->clk_rate = clk_get_rate(info->clk);
-		nuc900fb_activate_var(fbinfo);
-	}
-
-	return 0;
-}
-
-static inline int nuc900fb_cpufreq_register(struct nuc900fb_info *fbi)
-{
-	fbi->freq_transition.notifier_call = nuc900fb_cpufreq_transition;
-	return cpufreq_register_notifier(&fbi->freq_transition,
-				  CPUFREQ_TRANSITION_NOTIFIER);
-}
-
-static inline void nuc900fb_cpufreq_deregister(struct nuc900fb_info *fbi)
-{
-	cpufreq_unregister_notifier(&fbi->freq_transition,
-				    CPUFREQ_TRANSITION_NOTIFIER);
-}
-#else
-static inline int nuc900fb_cpufreq_transition(struct notifier_block *nb,
-				       unsigned long val, void *data)
-{
-	return 0;
-}
-
-static inline int nuc900fb_cpufreq_register(struct nuc900fb_info *fbi)
-{
-	return 0;
-}
-
-static inline void nuc900fb_cpufreq_deregister(struct nuc900fb_info *info)
-{
-}
-#endif
-
-static char driver_name[] = "nuc900fb";
-
-static int nuc900fb_probe(struct platform_device *pdev)
-{
-	struct nuc900fb_info *fbi;
-	struct nuc900fb_display *display;
-	struct fb_info	   *fbinfo;
-	struct nuc900fb_mach_info *mach_info;
-	struct resource *res;
-	int ret;
-	int irq;
-	int i;
-	int size;
-
-	dev_dbg(&pdev->dev, "devinit\n");
-	mach_info = dev_get_platdata(&pdev->dev);
-	if (mach_info == NULL) {
-		dev_err(&pdev->dev,
-			"no platform data for lcd, cannot attach\n");
-		return -EINVAL;
-	}
-
-	if (mach_info->default_display > mach_info->num_displays) {
-		dev_err(&pdev->dev,
-			"default display No. is %d but only %d displays \n",
-			mach_info->default_display, mach_info->num_displays);
-		return -EINVAL;
-	}
-
-
-	display = mach_info->displays + mach_info->default_display;
-
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "no irq for device\n");
-		return -ENOENT;
-	}
-
-	fbinfo = framebuffer_alloc(sizeof(struct nuc900fb_info), &pdev->dev);
-	if (!fbinfo)
-		return -ENOMEM;
-
-	platform_set_drvdata(pdev, fbinfo);
-
-	fbi = fbinfo->par;
-	fbi->dev = &pdev->dev;
-
-#ifdef CONFIG_CPU_NUC950
-	fbi->drv_type = LCDDRV_NUC950;
-#endif
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	size = resource_size(res);
-	fbi->mem = request_mem_region(res->start, size, pdev->name);
-	if (fbi->mem == NULL) {
-		dev_err(&pdev->dev, "failed to alloc memory region\n");
-		ret = -ENOENT;
-		goto free_fb;
-	}
-
-	fbi->io = ioremap(res->start, size);
-	if (fbi->io == NULL) {
-		dev_err(&pdev->dev, "ioremap() of lcd registers failed\n");
-		ret = -ENXIO;
-		goto release_mem_region;
-	}
-
-	fbi->irq_base = fbi->io + REG_LCM_INT_CS;
-
-
-	/* Stop the LCD */
-	writel(0, fbi->io + REG_LCM_DCCS);
-
-	/* fill the fbinfo*/
-	strcpy(fbinfo->fix.id, driver_name);
-	fbinfo->fix.type		= FB_TYPE_PACKED_PIXELS;
-	fbinfo->fix.type_aux		= 0;
-	fbinfo->fix.xpanstep		= 0;
-	fbinfo->fix.ypanstep		= 0;
-	fbinfo->fix.ywrapstep		= 0;
-	fbinfo->fix.accel		= FB_ACCEL_NONE;
-	fbinfo->var.nonstd		= 0;
-	fbinfo->var.activate		= FB_ACTIVATE_NOW;
-	fbinfo->var.accel_flags		= 0;
-	fbinfo->var.vmode		= FB_VMODE_NONINTERLACED;
-	fbinfo->fbops			= &nuc900fb_ops;
-	fbinfo->flags			= FBINFO_FLAG_DEFAULT;
-	fbinfo->pseudo_palette		= &fbi->pseudo_pal;
-
-	ret = request_irq(irq, nuc900fb_irqhandler, 0, pdev->name, fbi);
-	if (ret) {
-		dev_err(&pdev->dev, "cannot register irq handler %d -err %d\n",
-			irq, ret);
-		ret = -EBUSY;
-		goto release_regs;
-	}
-
-	fbi->clk = clk_get(&pdev->dev, NULL);
-	if (IS_ERR(fbi->clk)) {
-		printk(KERN_ERR "nuc900-lcd:failed to get lcd clock source\n");
-		ret = PTR_ERR(fbi->clk);
-		goto release_irq;
-	}
-
-	clk_enable(fbi->clk);
-	dev_dbg(&pdev->dev, "got and enabled clock\n");
-
-	fbi->clk_rate = clk_get_rate(fbi->clk);
-
-	/* calutate the video buffer size */
-	for (i = 0; i < mach_info->num_displays; i++) {
-		unsigned long smem_len = mach_info->displays[i].xres;
-		smem_len *= mach_info->displays[i].yres;
-		smem_len *= mach_info->displays[i].bpp;
-		smem_len >>= 3;
-		if (fbinfo->fix.smem_len < smem_len)
-			fbinfo->fix.smem_len = smem_len;
-	}
-
-	/* Initialize Video Memory */
-	ret = nuc900fb_map_video_memory(fbinfo);
-	if (ret) {
-		printk(KERN_ERR "Failed to allocate video RAM: %x\n", ret);
-		goto release_clock;
-	}
-
-	dev_dbg(&pdev->dev, "got video memory\n");
-
-	fbinfo->var.xres = display->xres;
-	fbinfo->var.yres = display->yres;
-	fbinfo->var.bits_per_pixel = display->bpp;
-
-	nuc900fb_init_registers(fbinfo);
-
-	nuc900fb_check_var(&fbinfo->var, fbinfo);
-
-	ret = nuc900fb_cpufreq_register(fbi);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register cpufreq\n");
-		goto free_video_memory;
-	}
-
-	ret = register_framebuffer(fbinfo);
-	if (ret) {
-		printk(KERN_ERR "failed to register framebuffer device: %d\n",
-			ret);
-		goto free_cpufreq;
-	}
-
-	fb_info(fbinfo, "%s frame buffer device\n", fbinfo->fix.id);
-
-	return 0;
-
-free_cpufreq:
-	nuc900fb_cpufreq_deregister(fbi);
-free_video_memory:
-	nuc900fb_unmap_video_memory(fbinfo);
-release_clock:
-	clk_disable(fbi->clk);
-	clk_put(fbi->clk);
-release_irq:
-	free_irq(irq, fbi);
-release_regs:
-	iounmap(fbi->io);
-release_mem_region:
-	release_mem_region(res->start, size);
-free_fb:
-	framebuffer_release(fbinfo);
-	return ret;
-}
-
-/*
- * shutdown the lcd controller
- */
-static void nuc900fb_stop_lcd(struct fb_info *info)
-{
-	struct nuc900fb_info *fbi = info->par;
-	void __iomem *regs = fbi->io;
-
-	writel((~LCM_DCCS_DISP_INT_EN) | (~LCM_DCCS_VA_EN) | (~LCM_DCCS_OSD_EN),
-		regs + REG_LCM_DCCS);
-}
-
-/*
- *  Cleanup
- */
-static int nuc900fb_remove(struct platform_device *pdev)
-{
-	struct fb_info *fbinfo = platform_get_drvdata(pdev);
-	struct nuc900fb_info *fbi = fbinfo->par;
-	int irq;
-
-	nuc900fb_stop_lcd(fbinfo);
-	msleep(1);
-
-	unregister_framebuffer(fbinfo);
-	nuc900fb_cpufreq_deregister(fbi);
-	nuc900fb_unmap_video_memory(fbinfo);
-
-	iounmap(fbi->io);
-
-	irq = platform_get_irq(pdev, 0);
-	free_irq(irq, fbi);
-
-	release_resource(fbi->mem);
-	kfree(fbi->mem);
-
-	framebuffer_release(fbinfo);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-
-/*
- *	suspend and resume support for the lcd controller
- */
-
-static int nuc900fb_suspend(struct platform_device *dev, pm_message_t state)
-{
-	struct fb_info	   *fbinfo = platform_get_drvdata(dev);
-	struct nuc900fb_info *info = fbinfo->par;
-
-	nuc900fb_stop_lcd(fbinfo);
-	msleep(1);
-	clk_disable(info->clk);
-	return 0;
-}
-
-static int nuc900fb_resume(struct platform_device *dev)
-{
-	struct fb_info	   *fbinfo = platform_get_drvdata(dev);
-	struct nuc900fb_info *fbi = fbinfo->par;
-
-	printk(KERN_INFO "nuc900fb resume\n");
-
-	clk_enable(fbi->clk);
-	msleep(1);
-
-	nuc900fb_init_registers(fbinfo);
-	nuc900fb_activate_var(fbinfo);
-
-	return 0;
-}
-
-#else
-#define nuc900fb_suspend NULL
-#define nuc900fb_resume  NULL
-#endif
-
-static struct platform_driver nuc900fb_driver = {
-	.probe		= nuc900fb_probe,
-	.remove		= nuc900fb_remove,
-	.suspend	= nuc900fb_suspend,
-	.resume		= nuc900fb_resume,
-	.driver		= {
-		.name	= "nuc900-lcd",
-	},
-};
-
-module_platform_driver(nuc900fb_driver);
-
-MODULE_DESCRIPTION("Framebuffer driver for the NUC900");
-MODULE_LICENSE("GPL");
diff --git a/drivers/video/fbdev/nuc900fb.h b/drivers/video/fbdev/nuc900fb.h
deleted file mode 100644
index 055ae9297931..000000000000
--- a/drivers/video/fbdev/nuc900fb.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- *
- * Copyright (c) 2009 Nuvoton technology corporation
- * All rights reserved.
- *
- *   Author:
- *        Wang Qiang(rurality.linux@gmail.com)  2009/12/16
- */
-
-#ifndef __NUC900FB_H
-#define __NUC900FB_H
-
-#include <mach/map.h>
-#include <linux/platform_data/video-nuc900fb.h>
-
-enum nuc900_lcddrv_type {
-	LCDDRV_NUC910,
-	LCDDRV_NUC930,
-	LCDDRV_NUC932,
-	LCDDRV_NUC950,
-	LCDDRV_NUC960,
-};
-
-
-#define PALETTE_BUFFER_SIZE	256
-#define PALETTE_BUFF_CLEAR 	(0x80000000) /* entry is clear/invalid */
-
-struct nuc900fb_info {
-	struct device		*dev;
-	struct clk		*clk;
-
-	struct resource		*mem;
-	void __iomem		*io;
-	void __iomem		*irq_base;
-	int 			drv_type;
-	struct nuc900fb_hw	regs;
-	unsigned long		clk_rate;
-
-#ifdef CONFIG_CPU_FREQ
-	struct notifier_block	freq_transition;
-#endif
-
-	/* keep these registers in case we need to re-write palette */
-	u32			palette_buffer[PALETTE_BUFFER_SIZE];
-	u32			pseudo_pal[16];
-};
-
-int nuc900fb_init(void);
-
-#endif /* __NUC900FB_H */
diff --git a/include/Kbuild b/include/Kbuild
index 5e0642d79dce..4d5a03a81fb5 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -568,7 +568,6 @@ header-test-			+= linux/platform_data/usb3503.h
 header-test-			+= linux/platform_data/ux500_wdt.h
 header-test-			+= linux/platform_data/video-clcd-versatile.h
 header-test-			+= linux/platform_data/video-imxfb.h
-header-test-			+= linux/platform_data/video-nuc900fb.h
 header-test-			+= linux/platform_data/video-pxafb.h
 header-test-			+= linux/platform_data/video_s3c.h
 header-test-			+= linux/platform_data/voltage-omap.h
diff --git a/include/linux/platform_data/video-nuc900fb.h b/include/linux/platform_data/video-nuc900fb.h
deleted file mode 100644
index 3da504460c91..000000000000
--- a/include/linux/platform_data/video-nuc900fb.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* linux/include/asm/arch-nuc900/fb.h
- *
- * Copyright (c) 2008 Nuvoton technology corporation
- * All rights reserved.
- *
- * Changelog:
- *
- *   2008/08/26     vincen.zswan modify this file for LCD.
- */
-
-#ifndef __ASM_ARM_FB_H
-#define __ASM_ARM_FB_H
-
-
-
-/* LCD Controller Hardware Desc */
-struct nuc900fb_hw {
-	unsigned int lcd_dccs;
-	unsigned int lcd_device_ctrl;
-	unsigned int lcd_mpulcd_cmd;
-	unsigned int lcd_int_cs;
-	unsigned int lcd_crtc_size;
-	unsigned int lcd_crtc_dend;
-	unsigned int lcd_crtc_hr;
-	unsigned int lcd_crtc_hsync;
-	unsigned int lcd_crtc_vr;
-	unsigned int lcd_va_baddr0;
-	unsigned int lcd_va_baddr1;
-	unsigned int lcd_va_fbctrl;
-	unsigned int lcd_va_scale;
-	unsigned int lcd_va_test;
-	unsigned int lcd_va_win;
-	unsigned int lcd_va_stuff;
-};
-
-/* LCD Display Description */
-struct nuc900fb_display {
-	/* LCD Image type */
-	unsigned type;
-
-	/* LCD Screen Size */
-	unsigned short width;
-	unsigned short height;
-
-	/* LCD Screen Info */
-	unsigned short xres;
-	unsigned short yres;
-	unsigned short bpp;
-
-	unsigned long pixclock;
-	unsigned short left_margin;
-	unsigned short right_margin;
-	unsigned short hsync_len;
-	unsigned short upper_margin;
-	unsigned short lower_margin;
-	unsigned short vsync_len;
-
-	/* hardware special register value */
-	unsigned int dccs;
-	unsigned int devctl;
-	unsigned int fbctrl;
-	unsigned int scale;
-};
-
-struct nuc900fb_mach_info {
-	struct nuc900fb_display *displays;
-	unsigned num_displays;
-	unsigned default_display;
-	/* GPIO Setting  Info */
-	unsigned gpio_dir;
-	unsigned gpio_dir_mask;
-	unsigned gpio_data;
-	unsigned gpio_data_mask;
-};
-
-extern void __init nuc900_fb_set_platdata(struct nuc900fb_mach_info *);
-
-#endif /* __ASM_ARM_FB_H */
-- 
cgit v1.2.3


From 45b659ee75d7bcff7d0206967d150d879f1d3eb9 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Mon, 26 Aug 2019 20:00:40 -0700
Subject: firmware: ti_sci: Allow for device shared and exclusive requests

Sysfw provides an option for requesting exclusive access for a
device using the flags MSG_FLAG_DEVICE_EXCLUSIVE. If this flag is
not used, the device is meant to be shared across hosts. Once a device
is requested from a host with this flag set, any request to this
device from a different host will be nacked by sysfw. Current tisci
driver enables this flag for every device requests. But this may not
be true for all the devices. So provide a separate commands in driver
for exclusive and shared device requests.

Reviewed-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/firmware/ti_sci.c              | 45 ++++++++++++++++++++++++++++++++--
 include/linux/soc/ti/ti_sci_protocol.h |  3 +++
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/firmware/ti_sci.c b/drivers/firmware/ti_sci.c
index cdee0b45943d..4126be9e3216 100644
--- a/drivers/firmware/ti_sci.c
+++ b/drivers/firmware/ti_sci.c
@@ -635,6 +635,7 @@ fail:
 
 /**
  * ti_sci_cmd_get_device() - command to request for device managed by TISCI
+ *			     that can be shared with other hosts.
  * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
  * @id:		Device Identifier
  *
@@ -642,11 +643,29 @@ fail:
  * usage count by balancing get_device with put_device. No refcounting is
  * managed by driver for that purpose.
  *
- * NOTE: The request is for exclusive access for the processor.
- *
  * Return: 0 if all went fine, else return appropriate error.
  */
 static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id, 0,
+				       MSG_DEVICE_SW_STATE_ON);
+}
+
+/**
+ * ti_sci_cmd_get_device_exclusive() - command to request for device managed by
+ *				       TISCI that is exclusively owned by the
+ *				       requesting host.
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_get_device_exclusive(const struct ti_sci_handle *handle,
+					   u32 id)
 {
 	return ti_sci_set_device_state(handle, id,
 				       MSG_FLAG_DEVICE_EXCLUSIVE,
@@ -665,6 +684,26 @@ static int ti_sci_cmd_get_device(const struct ti_sci_handle *handle, u32 id)
  * Return: 0 if all went fine, else return appropriate error.
  */
 static int ti_sci_cmd_idle_device(const struct ti_sci_handle *handle, u32 id)
+{
+	return ti_sci_set_device_state(handle, id, 0,
+				       MSG_DEVICE_SW_STATE_RETENTION);
+}
+
+/**
+ * ti_sci_cmd_idle_device_exclusive() - Command to idle a device managed by
+ *					TISCI that is exclusively owned by
+ *					requesting host.
+ * @handle:	Pointer to TISCI handle as retrieved by *ti_sci_get_handle
+ * @id:		Device Identifier
+ *
+ * Request for the device - NOTE: the client MUST maintain integrity of
+ * usage count by balancing get_device with put_device. No refcounting is
+ * managed by driver for that purpose.
+ *
+ * Return: 0 if all went fine, else return appropriate error.
+ */
+static int ti_sci_cmd_idle_device_exclusive(const struct ti_sci_handle *handle,
+					    u32 id)
 {
 	return ti_sci_set_device_state(handle, id,
 				       MSG_FLAG_DEVICE_EXCLUSIVE,
@@ -2894,7 +2933,9 @@ static void ti_sci_setup_ops(struct ti_sci_info *info)
 	core_ops->reboot_device = ti_sci_cmd_core_reboot;
 
 	dops->get_device = ti_sci_cmd_get_device;
+	dops->get_device_exclusive = ti_sci_cmd_get_device_exclusive;
 	dops->idle_device = ti_sci_cmd_idle_device;
+	dops->idle_device_exclusive = ti_sci_cmd_idle_device_exclusive;
 	dops->put_device = ti_sci_cmd_put_device;
 
 	dops->is_valid = ti_sci_cmd_dev_is_valid;
diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h
index 6c610e188a44..9531ec823298 100644
--- a/include/linux/soc/ti/ti_sci_protocol.h
+++ b/include/linux/soc/ti/ti_sci_protocol.h
@@ -97,7 +97,10 @@ struct ti_sci_core_ops {
  */
 struct ti_sci_dev_ops {
 	int (*get_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*get_device_exclusive)(const struct ti_sci_handle *handle, u32 id);
 	int (*idle_device)(const struct ti_sci_handle *handle, u32 id);
+	int (*idle_device_exclusive)(const struct ti_sci_handle *handle,
+				     u32 id);
 	int (*put_device)(const struct ti_sci_handle *handle, u32 id);
 	int (*is_valid)(const struct ti_sci_handle *handle, u32 id);
 	int (*get_context_loss_count)(const struct ti_sci_handle *handle,
-- 
cgit v1.2.3


From 7a800c418c684372146f98f857d632caa30125f4 Mon Sep 17 00:00:00 2001
From: Lokesh Vutla <lokeshvutla@ti.com>
Date: Mon, 26 Aug 2019 20:00:40 -0700
Subject: dt-bindings: ti_sci_pm_domains: Add support for exclusive and shared
 access

TISCI protocol supports for enabling the device either with exclusive
permissions for the requesting host or with sharing across the hosts.
There are certain devices which are exclusive to Linux context and
there are certain devices that are shared across different host contexts.
So add support for getting this information from DT by increasing
the power-domain cells to 2.

Acked-by: Tero Kristo <t-kristo@ti.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Lokesh Vutla <lokeshvutla@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt | 11 +++++++++--
 MAINTAINERS                                                |  1 +
 include/dt-bindings/soc/ti,sci_pm_domain.h                 |  9 +++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
 create mode 100644 include/dt-bindings/soc/ti,sci_pm_domain.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
index f7b00a7c0f68..f541d1f776a2 100644
--- a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
+++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
@@ -19,8 +19,15 @@ child of the pmmc node.
 Required Properties:
 --------------------
 - compatible: should be "ti,sci-pm-domain"
-- #power-domain-cells: Must be 1 so that an id can be provided in each
-		       device node.
+- #power-domain-cells: Can be one of the following:
+			1: Containing the device id of each node
+			2: First entry should be device id
+			   Second entry should be one of the floowing:
+			   TI_SCI_PD_EXCLUSIVE: To allow device to be
+						exclusively controlled by
+						the requesting hosts.
+			   TI_SCI_PD_SHARED: To allow device to be shared
+					     by multiple hosts.
 
 Example (K2G):
 -------------
diff --git a/MAINTAINERS b/MAINTAINERS
index ad386626c9fa..3ecca5d04e51 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15869,6 +15869,7 @@ F:	drivers/firmware/ti_sci*
 F:	include/linux/soc/ti/ti_sci_protocol.h
 F:	Documentation/devicetree/bindings/soc/ti/sci-pm-domain.txt
 F:	drivers/soc/ti/ti_sci_pm_domains.c
+F:	include/dt-bindings/soc/ti,sci_pm_domain.h
 F:	Documentation/devicetree/bindings/reset/ti,sci-reset.txt
 F:	Documentation/devicetree/bindings/clock/ti,sci-clk.txt
 F:	drivers/clk/keystone/sci-clk.c
diff --git a/include/dt-bindings/soc/ti,sci_pm_domain.h b/include/dt-bindings/soc/ti,sci_pm_domain.h
new file mode 100644
index 000000000000..8f2a7360b65e
--- /dev/null
+++ b/include/dt-bindings/soc/ti,sci_pm_domain.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __DT_BINDINGS_TI_SCI_PM_DOMAIN_H
+#define __DT_BINDINGS_TI_SCI_PM_DOMAIN_H
+
+#define TI_SCI_PD_EXCLUSIVE	1
+#define TI_SCI_PD_SHARED	0
+
+#endif /* __DT_BINDINGS_TI_SCI_PM_DOMAIN_H */
-- 
cgit v1.2.3


From 15322a0d90b6fd62ae8f22e5b87f735c3fdfeff7 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Wed, 4 Sep 2019 18:53:39 -0400
Subject: lsm: remove current_security()

There are no remaining callers and it really is unsafe in the brave
new world of LSM stacking.

Acked-by: James Morris <jamorris@linux.microsoft.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 include/linux/cred.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index efb6edf32de7..98b0a23ddd23 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -384,7 +384,6 @@ static inline void put_cred(const struct cred *_cred)
 #define current_fsgid() 	(current_cred_xxx(fsgid))
 #define current_cap()		(current_cred_xxx(cap_effective))
 #define current_user()		(current_cred_xxx(user))
-#define current_security()	(current_cred_xxx(security))
 
 extern struct user_namespace init_user_ns;
 #ifdef CONFIG_USER_NS
-- 
cgit v1.2.3


From 34d6245fbc81e764806a65fceaeb3ab3274a1e63 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Sep 2019 22:35:31 +0200
Subject: crypto: sha256 - Merge crypto/sha256.h into crypto/sha.h

The generic sha256 implementation from lib/crypto/sha256.c uses data
structs defined in crypto/sha.h, so lets move the function prototypes
there too.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/s390/purgatory/purgatory.c |  2 +-
 arch/x86/purgatory/purgatory.c  |  2 +-
 crypto/sha256_generic.c         |  1 -
 include/crypto/sha.h            | 21 +++++++++++++++++++++
 include/crypto/sha256.h         | 34 ----------------------------------
 lib/crypto/sha256.c             |  2 +-
 6 files changed, 24 insertions(+), 38 deletions(-)
 delete mode 100644 include/crypto/sha256.h

(limited to 'include')

diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index a80c78da9985..0a423bcf6746 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -9,7 +9,7 @@
 
 #include <linux/kexec.h>
 #include <linux/string.h>
-#include <crypto/sha256.h>
+#include <crypto/sha.h>
 #include <asm/purgatory.h>
 
 int verify_sha256_digest(void)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 7cd7a2618180..c4295256cc0c 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/bug.h>
-#include <crypto/sha256.h>
+#include <crypto/sha.h>
 #include <asm/purgatory.h>
 
 #include "../boot/string.h"
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index eafd10f9bf86..f2d7095d4f2d 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <crypto/sha.h>
-#include <crypto/sha256.h>
 #include <crypto/sha256_base.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 8a46202b1857..535955c84187 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -112,4 +112,25 @@ extern int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
 
 extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
 			       unsigned int len, u8 *hash);
+
+/*
+ * Stand-alone implementation of the SHA256 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
+ *
+ * For details see lib/crypto/sha256.c
+ */
+
+extern int sha256_init(struct sha256_state *sctx);
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+extern int sha224_init(struct sha256_state *sctx);
+extern int sha224_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha224_final(struct sha256_state *sctx, u8 *hash);
+
 #endif
diff --git a/include/crypto/sha256.h b/include/crypto/sha256.h
deleted file mode 100644
index a75998d65a41..000000000000
--- a/include/crypto/sha256.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2014 Red Hat Inc.
- *
- *  Author: Vivek Goyal <vgoyal@redhat.com>
- */
-
-#ifndef SHA256_H
-#define SHA256_H
-
-#include <linux/types.h>
-#include <crypto/sha.h>
-
-/*
- * Stand-alone implementation of the SHA256 algorithm. It is designed to
- * have as little dependencies as possible so it can be used in the
- * kexec_file purgatory. In other cases you should generally use the
- * hash APIs from include/crypto/hash.h. Especially when hashing large
- * amounts of data as those APIs may be hw-accelerated.
- *
- * For details see lib/crypto/sha256.c
- */
-
-extern int sha256_init(struct sha256_state *sctx);
-extern int sha256_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha256_final(struct sha256_state *sctx, u8 *hash);
-
-extern int sha224_init(struct sha256_state *sctx);
-extern int sha224_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha224_final(struct sha256_state *sctx, u8 *hash);
-
-#endif /* SHA256_H */
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 42d75e490a97..220b74c2bbd8 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -15,7 +15,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/string.h>
-#include <crypto/sha256.h>
+#include <crypto/sha.h>
 #include <asm/unaligned.h>
 
 static inline u32 Ch(u32 x, u32 y, u32 z)
-- 
cgit v1.2.3


From c75c66bbaa56f130e2be095402422e56f608aa62 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 1 Sep 2019 22:35:32 +0200
Subject: crypto: sha256 - Remove sha256/224_init code duplication

lib/crypto/sha256.c and include/crypto/sha256_base.h define
99% identical functions to init a sha256_state struct for sha224 or
sha256 use.

This commit moves the functions from lib/crypto/sha256.c to
include/crypto/sha.h (making them static inline) and makes the
sha224/256_base_init static inline functions from
include/crypto/sha256_base.h wrappers around the now also
static inline include/crypto/sha.h functions.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/sha.h         | 30 ++++++++++++++++++++++++++++--
 include/crypto/sha256_base.h | 24 ++----------------------
 lib/crypto/sha256.c          | 32 --------------------------------
 3 files changed, 30 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 535955c84187..5c2132c71900 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -123,12 +123,38 @@ extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
  * For details see lib/crypto/sha256.c
  */
 
-extern int sha256_init(struct sha256_state *sctx);
+static inline int sha256_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
 extern int sha256_update(struct sha256_state *sctx, const u8 *input,
 			 unsigned int length);
 extern int sha256_final(struct sha256_state *sctx, u8 *hash);
 
-extern int sha224_init(struct sha256_state *sctx);
+static inline int sha224_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
 extern int sha224_update(struct sha256_state *sctx, const u8 *input,
 			 unsigned int length);
 extern int sha224_final(struct sha256_state *sctx, u8 *hash);
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
index b50a035a2bc7..cea60cff80bd 100644
--- a/include/crypto/sha256_base.h
+++ b/include/crypto/sha256_base.h
@@ -22,34 +22,14 @@ static inline int sha224_base_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	sctx->state[0] = SHA224_H0;
-	sctx->state[1] = SHA224_H1;
-	sctx->state[2] = SHA224_H2;
-	sctx->state[3] = SHA224_H3;
-	sctx->state[4] = SHA224_H4;
-	sctx->state[5] = SHA224_H5;
-	sctx->state[6] = SHA224_H6;
-	sctx->state[7] = SHA224_H7;
-	sctx->count = 0;
-
-	return 0;
+	return sha224_init(sctx);
 }
 
 static inline int sha256_base_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
+	return sha256_init(sctx);
 }
 
 static inline int sha256_base_do_update(struct shash_desc *desc,
diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c
index 220b74c2bbd8..66cb04b0cf4e 100644
--- a/lib/crypto/sha256.c
+++ b/lib/crypto/sha256.c
@@ -206,38 +206,6 @@ static void sha256_transform(u32 *state, const u8 *input)
 	memzero_explicit(W, 64 * sizeof(u32));
 }
 
-int sha256_init(struct sha256_state *sctx)
-{
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
-}
-EXPORT_SYMBOL(sha256_init);
-
-int sha224_init(struct sha256_state *sctx)
-{
-	sctx->state[0] = SHA224_H0;
-	sctx->state[1] = SHA224_H1;
-	sctx->state[2] = SHA224_H2;
-	sctx->state[3] = SHA224_H3;
-	sctx->state[4] = SHA224_H4;
-	sctx->state[5] = SHA224_H5;
-	sctx->state[6] = SHA224_H6;
-	sctx->state[7] = SHA224_H7;
-	sctx->count = 0;
-
-	return 0;
-}
-EXPORT_SYMBOL(sha224_init);
-
 int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
 {
 	unsigned int partial, done;
-- 
cgit v1.2.3


From 7d5aa9a524db04c1bf65a49442ac0b5186f70857 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Tue, 3 Sep 2019 15:28:03 -0700
Subject: devlink: Add new info version tags for ASIC and FW

The current tag set is still rather small and needs a couple
more tags to help with ASIC identification and to have a
more generic FW version.

Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-info-versions.rst | 16 ++++++++++++++++
 include/net/devlink.h                              |  7 +++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
index 4316342b7746..4914f581b1fd 100644
--- a/Documentation/networking/devlink-info-versions.rst
+++ b/Documentation/networking/devlink-info-versions.rst
@@ -14,11 +14,27 @@ board.rev
 
 Board design revision.
 
+asic.id
+=======
+
+ASIC design identifier.
+
+asic.rev
+========
+
+ASIC design revision.
+
 board.manufacture
 =================
 
 An identifier of the company or the facility which produced the part.
 
+fw
+==
+
+Overall firmware version, often representing the collection of
+fw.mgmt, fw.app, etc.
+
 fw.mgmt
 =======
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 13523b0a0642..460bc629d1a4 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -458,6 +458,13 @@ enum devlink_param_generic_id {
 /* Maker of the board */
 #define DEVLINK_INFO_VERSION_GENERIC_BOARD_MANUFACTURE	"board.manufacture"
 
+/* Part number, identifier of asic design */
+#define DEVLINK_INFO_VERSION_GENERIC_ASIC_ID	"asic.id"
+/* Revision of asic design */
+#define DEVLINK_INFO_VERSION_GENERIC_ASIC_REV	"asic.rev"
+
+/* Overall FW version */
+#define DEVLINK_INFO_VERSION_GENERIC_FW		"fw"
 /* Control processor FW version */
 #define DEVLINK_INFO_VERSION_GENERIC_FW_MGMT	"fw.mgmt"
 /* Data path microcode controlling high-speed packet processing */
-- 
cgit v1.2.3


From 842841ece540f7d7739bec3e9b79bdf9669d77d7 Mon Sep 17 00:00:00 2001
From: Dave Taht <dave.taht@gmail.com>
Date: Mon, 2 Sep 2019 16:29:36 -0700
Subject: Convert usage of IN_MULTICAST to ipv4_is_multicast
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IN_MULTICAST's primary intent is as a uapi macro.

Elsewhere in the kernel we use ipv4_is_multicast consistently.

This patch unifies linux's multicast checks to use that function
rather than this macro.

Signed-off-by: Dave Taht <dave.taht@gmail.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c | 2 +-
 include/net/vxlan.h  | 4 ++--
 net/rds/af_rds.c     | 4 ++--
 net/rds/bind.c       | 4 ++--
 net/rds/send.c       | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cb2ea8facd8d..3ab24fdccd3b 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1345,7 +1345,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
 		info->key.u.ipv4.dst =
 			nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
 
-		if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
+		if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
 			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
 					    "Remote IPv4 address cannot be Multicast");
 			return -EINVAL;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index dc1583a1fb8a..335283dbe9b3 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -391,7 +391,7 @@ static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
 	if (ipa->sa.sa_family == AF_INET6)
 		return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
 	else
-		return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+		return ipv4_is_multicast(ipa->sin.sin_addr.s_addr);
 }
 
 #else /* !IS_ENABLED(CONFIG_IPV6) */
@@ -403,7 +403,7 @@ static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
 
 static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
 {
-	return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+	return ipv4_is_multicast(ipa->sin.sin_addr.s_addr);
 }
 
 #endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 2977137c28eb..1a5bf3fa4578 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 			ret = -EDESTADDRREQ;
 			break;
 		}
-		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) ||
+		if (ipv4_is_multicast(sin->sin_addr.s_addr) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) {
 			ret = -EINVAL;
 			break;
@@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
 			addr4 = sin6->sin6_addr.s6_addr32[3];
 			if (addr4 == htonl(INADDR_ANY) ||
 			    addr4 == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(addr4))) {
+			    ipv4_is_multicast(addr4)) {
 				ret = -EPROTOTYPE;
 				break;
 			}
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 0f4398e7f2a7..6dbb763bc1fd 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (addr_len < sizeof(struct sockaddr_in) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 		    sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
-		    IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+		    ipv4_is_multicast(sin->sin_addr.s_addr))
 			return -EINVAL;
 		ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr);
 		binding_addr = &v6addr;
@@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 			addr4 = sin6->sin6_addr.s6_addr32[3];
 			if (addr4 == htonl(INADDR_ANY) ||
 			    addr4 == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(addr4)))
+			    ipv4_is_multicast(addr4))
 				return -EINVAL;
 		}
 		/* The scope ID must be specified for link local address. */
diff --git a/net/rds/send.c b/net/rds/send.c
index 9ce552abf9e9..82dcd8b84fe7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1144,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		case AF_INET:
 			if (usin->sin_addr.s_addr == htonl(INADDR_ANY) ||
 			    usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) ||
-			    IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) {
+			    ipv4_is_multicast(usin->sin_addr.s_addr)) {
 				ret = -EINVAL;
 				goto out;
 			}
@@ -1175,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 				addr4 = sin6->sin6_addr.s6_addr32[3];
 				if (addr4 == htonl(INADDR_ANY) ||
 				    addr4 == htonl(INADDR_BROADCAST) ||
-				    IN_MULTICAST(ntohl(addr4))) {
+				    ipv4_is_multicast(addr4)) {
 					ret = -EINVAL;
 					goto out;
 				}
-- 
cgit v1.2.3


From be7bbea114d6ab2688b9e59cd24a306d21e51c27 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 2 Sep 2019 21:31:02 -0700
Subject: net/tls: use the full sk_proto pointer

Since we already have the pointer to the full original sk_proto
stored use that instead of storing all individual callback
pointers as well.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_main.c |  6 ++++--
 include/net/tls.h                         | 10 ----------
 net/tls/tls_main.c                        | 27 ++++++++++-----------------
 3 files changed, 14 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 635bb4b447fb..e6df5b95ed47 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -474,7 +474,8 @@ static int chtls_getsockopt(struct sock *sk, int level, int optname,
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->getsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->getsockopt(sk, level,
+						 optname, optval, optlen);
 
 	return do_chtls_getsockopt(sk, optval, optlen);
 }
@@ -541,7 +542,8 @@ static int chtls_setsockopt(struct sock *sk, int level, int optname,
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->setsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->setsockopt(sk, level,
+						 optname, optval, optlen);
 
 	return do_chtls_setsockopt(sk, optname, optval, optlen);
 }
diff --git a/include/net/tls.h b/include/net/tls.h
index ec3c3ed2c6c3..6dab6683e42f 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -275,16 +275,6 @@ struct tls_context {
 	struct proto *sk_proto;
 
 	void (*sk_destruct)(struct sock *sk);
-	void (*sk_proto_close)(struct sock *sk, long timeout);
-
-	int  (*setsockopt)(struct sock *sk, int level,
-			   int optname, char __user *optval,
-			   unsigned int optlen);
-	int  (*getsockopt)(struct sock *sk, int level,
-			   int optname, char __user *optval,
-			   int __user *optlen);
-	int  (*hash)(struct sock *sk);
-	void (*unhash)(struct sock *sk);
 
 	union tls_crypto_context crypto_send;
 	union tls_crypto_context crypto_recv;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 277f7c209fed..2df1ae8b77fa 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -331,7 +331,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 		tls_sw_strparser_done(ctx);
 	if (ctx->rx_conf == TLS_SW)
 		tls_sw_free_ctx_rx(ctx);
-	ctx->sk_proto_close(sk, timeout);
+	ctx->sk_proto->close(sk, timeout);
 
 	if (free_ctx)
 		tls_ctx_free(sk, ctx);
@@ -451,7 +451,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname,
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->getsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->getsockopt(sk, level,
+						 optname, optval, optlen);
 
 	return do_tls_getsockopt(sk, optname, optval, optlen);
 }
@@ -609,7 +610,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname,
 	struct tls_context *ctx = tls_get_ctx(sk);
 
 	if (level != SOL_TLS)
-		return ctx->setsockopt(sk, level, optname, optval, optlen);
+		return ctx->sk_proto->setsockopt(sk, level, optname, optval,
+						 optlen);
 
 	return do_tls_setsockopt(sk, optname, optval, optlen);
 }
@@ -624,10 +626,7 @@ static struct tls_context *create_ctx(struct sock *sk)
 		return NULL;
 
 	rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
-	ctx->setsockopt = sk->sk_prot->setsockopt;
-	ctx->getsockopt = sk->sk_prot->getsockopt;
-	ctx->sk_proto_close = sk->sk_prot->close;
-	ctx->unhash = sk->sk_prot->unhash;
+	ctx->sk_proto = sk->sk_prot;
 	return ctx;
 }
 
@@ -683,9 +682,6 @@ static int tls_hw_prot(struct sock *sk)
 
 			spin_unlock_bh(&device_spinlock);
 			tls_build_proto(sk);
-			ctx->hash = sk->sk_prot->hash;
-			ctx->unhash = sk->sk_prot->unhash;
-			ctx->sk_proto_close = sk->sk_prot->close;
 			ctx->sk_destruct = sk->sk_destruct;
 			sk->sk_destruct = tls_hw_sk_destruct;
 			ctx->rx_conf = TLS_HW_RECORD;
@@ -717,7 +713,7 @@ static void tls_hw_unhash(struct sock *sk)
 		}
 	}
 	spin_unlock_bh(&device_spinlock);
-	ctx->unhash(sk);
+	ctx->sk_proto->unhash(sk);
 }
 
 static int tls_hw_hash(struct sock *sk)
@@ -726,7 +722,7 @@ static int tls_hw_hash(struct sock *sk)
 	struct tls_device *dev;
 	int err;
 
-	err = ctx->hash(sk);
+	err = ctx->sk_proto->hash(sk);
 	spin_lock_bh(&device_spinlock);
 	list_for_each_entry(dev, &device_list, dev_list) {
 		if (dev->hash) {
@@ -816,7 +812,6 @@ static int tls_init(struct sock *sk)
 
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
-	ctx->sk_proto = sk->sk_prot;
 	update_sk_prot(sk, ctx);
 out:
 	write_unlock_bh(&sk->sk_callback_lock);
@@ -828,12 +823,10 @@ static void tls_update(struct sock *sk, struct proto *p)
 	struct tls_context *ctx;
 
 	ctx = tls_get_ctx(sk);
-	if (likely(ctx)) {
-		ctx->sk_proto_close = p->close;
+	if (likely(ctx))
 		ctx->sk_proto = p;
-	} else {
+	else
 		sk->sk_prot = p;
-	}
 }
 
 static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
-- 
cgit v1.2.3


From be2fbc155fc8c0ff6e499753354d965cd9cf1bb0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Mon, 2 Sep 2019 21:31:05 -0700
Subject: net/tls: clean up the number of #ifdefs for CONFIG_TLS_DEVICE

TLS code has a number of #ifdefs which make the code a little
harder to follow. Recent fixes removed the ifdef around the
TLS_HW define, so we can switch to the often used pattern
of defining tls_device functions as empty static inlines
in the header when CONFIG_TLS_DEVICE=n.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: John Hurley <john.hurley@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h  | 38 ++++++++++++++++++++++++++++++++------
 net/tls/tls_main.c | 19 +------------------
 net/tls/tls_sw.c   |  6 ++----
 3 files changed, 35 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/tls.h b/include/net/tls.h
index 6dab6683e42f..c664e6dba0d1 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -366,13 +366,9 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 			   struct pipe_inode_info *pipe,
 			   size_t len, unsigned int flags);
 
-int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
 int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_device_sendpage(struct sock *sk, struct page *page,
 			int offset, size_t size, int flags);
-void tls_device_free_resources_tx(struct sock *sk);
-void tls_device_init(void);
-void tls_device_cleanup(void);
 int tls_tx_records(struct sock *sk, int flags);
 
 struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
@@ -649,7 +645,6 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg,
 		      unsigned char *record_type);
 void tls_register_device(struct tls_device *device);
 void tls_unregister_device(struct tls_device *device);
-int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
 int decrypt_skb(struct sock *sk, struct sk_buff *skb,
 		struct scatterlist *sgout);
 struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
@@ -662,9 +657,40 @@ int tls_sw_fallback_init(struct sock *sk,
 			 struct tls_offload_context_tx *offload_ctx,
 			 struct tls_crypto_info *crypto_info);
 
+#ifdef CONFIG_TLS_DEVICE
+void tls_device_init(void);
+void tls_device_cleanup(void);
+int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
+void tls_device_free_resources_tx(struct sock *sk);
 int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
-
 void tls_device_offload_cleanup_rx(struct sock *sk);
 void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
+int tls_device_decrypted(struct sock *sk, struct sk_buff *skb);
+#else
+static inline void tls_device_init(void) {}
+static inline void tls_device_cleanup(void) {}
 
+static inline int
+tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void tls_device_free_resources_tx(struct sock *sk) {}
+
+static inline int
+tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void tls_device_offload_cleanup_rx(struct sock *sk) {}
+static inline void
+tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq) {}
+
+static inline int tls_device_decrypted(struct sock *sk, struct sk_buff *skb)
+{
+	return 0;
+}
+#endif
 #endif /* _TLS_OFFLOAD_H */
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 2df1ae8b77fa..ac88877dcade 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -286,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk,
 		kfree(ctx->tx.rec_seq);
 		kfree(ctx->tx.iv);
 		tls_sw_release_resources_tx(sk);
-#ifdef CONFIG_TLS_DEVICE
 	} else if (ctx->tx_conf == TLS_HW) {
 		tls_device_free_resources_tx(sk);
-#endif
 	}
 
 	if (ctx->rx_conf == TLS_SW)
 		tls_sw_release_resources_rx(sk);
-
-#ifdef CONFIG_TLS_DEVICE
-	if (ctx->rx_conf == TLS_HW)
+	else if (ctx->rx_conf == TLS_HW)
 		tls_device_offload_cleanup_rx(sk);
-#endif
 }
 
 static void tls_sk_proto_close(struct sock *sk, long timeout)
@@ -537,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval,
 	}
 
 	if (tx) {
-#ifdef CONFIG_TLS_DEVICE
 		rc = tls_set_device_offload(sk, ctx);
 		conf = TLS_HW;
 		if (rc) {
-#else
-		{
-#endif
 			rc = tls_set_sw_offload(sk, ctx, 1);
 			if (rc)
 				goto err_crypto_info;
 			conf = TLS_SW;
 		}
 	} else {
-#ifdef CONFIG_TLS_DEVICE
 		rc = tls_set_device_offload_rx(sk, ctx);
 		conf = TLS_HW;
 		if (rc) {
-#else
-		{
-#endif
 			rc = tls_set_sw_offload(sk, ctx, 0);
 			if (rc)
 				goto err_crypto_info;
@@ -920,9 +907,7 @@ static int __init tls_register(void)
 	tls_sw_proto_ops = inet_stream_ops;
 	tls_sw_proto_ops.splice_read = tls_sw_splice_read;
 
-#ifdef CONFIG_TLS_DEVICE
 	tls_device_init();
-#endif
 	tcp_register_ulp(&tcp_tls_ulp_ops);
 
 	return 0;
@@ -931,9 +916,7 @@ static int __init tls_register(void)
 static void __exit tls_unregister(void)
 {
 	tcp_unregister_ulp(&tcp_tls_ulp_ops);
-#ifdef CONFIG_TLS_DEVICE
 	tls_device_cleanup();
-#endif
 }
 
 module_init(tls_register);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 91d21b048a9b..c2b5e0d2ba1a 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb,
 	int pad, err = 0;
 
 	if (!ctx->decrypted) {
-#ifdef CONFIG_TLS_DEVICE
 		if (tls_ctx->rx_conf == TLS_HW) {
 			err = tls_device_decrypted(sk, skb);
 			if (err < 0)
 				return err;
 		}
-#endif
+
 		/* Still not decrypted after tls_device */
 		if (!ctx->decrypted) {
 			err = decrypt_internal(sk, skb, dest, NULL, chunk, zc,
@@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
 		ret = -EINVAL;
 		goto read_failure;
 	}
-#ifdef CONFIG_TLS_DEVICE
+
 	tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE,
 				     TCP_SKB_CB(skb)->seq + rxm->offset);
-#endif
 	return data_len + TLS_HEADER_SIZE;
 
 read_failure:
-- 
cgit v1.2.3


From e336b4027775cb458dc713745e526fa1a1996b2a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 3 Sep 2019 20:08:21 +0900
Subject: kprobes: Prohibit probing on BUG() and WARN() address

Since BUG() and WARN() may use a trap (e.g. UD2 on x86) to
get the address where the BUG() has occurred, kprobes can not
do single-step out-of-line that instruction. So prohibit
probing on such address.

Without this fix, if someone put a kprobe on WARN(), the
kernel will crash with invalid opcode error instead of
outputing warning message, because kernel can not find
correct bug address.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: David S . Miller <davem@davemloft.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Naveen N . Rao <naveen.n.rao@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/156750890133.19112.3393666300746167111.stgit@devnote2
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/bug.h | 5 +++++
 kernel/kprobes.c    | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bug.h b/include/linux/bug.h
index fe5916550da8..f639bd0122f3 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -47,6 +47,11 @@ void generic_bug_clear_once(void);
 
 #else	/* !CONFIG_GENERIC_BUG */
 
+static inline void *find_bug(unsigned long bugaddr)
+{
+	return NULL;
+}
+
 static inline enum bug_trap_type report_bug(unsigned long bug_addr,
 					    struct pt_regs *regs)
 {
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d9770a5393c8..ebe8315a756a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1514,7 +1514,8 @@ static int check_kprobe_address_safe(struct kprobe *p,
 	/* Ensure it is not in reserved area nor out of text */
 	if (!kernel_text_address((unsigned long) p->addr) ||
 	    within_kprobe_blacklist((unsigned long) p->addr) ||
-	    jump_label_text_reserved(p->addr, p->addr)) {
+	    jump_label_text_reserved(p->addr, p->addr) ||
+	    find_bug((unsigned long)p->addr)) {
 		ret = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3


From ae0755b56da9db4190288155ea884331993ed51b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 5 Sep 2019 11:20:25 +0200
Subject: gpio: Fix up merge collision in include file

The merge of two different patch sets cleaning around in the
main driver include file collided making the function
declarations for gpiochip_[un]lock_as_irq() be defined twice
when gpiolib was unselected. Fix it up.

Cc: YueHaibing <yuehaibing@huawei.com>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index b74a3bee85e5..c667ad0c099d 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -702,11 +702,12 @@ void gpiochip_free_own_desc(struct gpio_desc *desc);
 void devprop_gpiochip_set_names(struct gpio_chip *chip,
 				const struct fwnode_handle *fwnode);
 
+#ifdef CONFIG_GPIOLIB
+
 /* lock/unlock as IRQ */
 int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
-#ifdef CONFIG_GPIOLIB
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-- 
cgit v1.2.3


From efb7740f25d6fff5bf94eb3bfa242b2fe9a6dc36 Mon Sep 17 00:00:00 2001
From: Finley Xiao <finley.xiao@rock-chips.com>
Date: Tue, 3 Sep 2019 19:59:46 +0800
Subject: clk: rockchip: Add dt-binding header for rk3308

Add the dt-bindings header for the rk3308, that gets shared between
the clock controller and the clock references in the dts.

Signed-off-by: Finley Xiao <finley.xiao@rock-chips.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
---
 include/dt-bindings/clock/rk3308-cru.h | 387 +++++++++++++++++++++++++++++++++
 1 file changed, 387 insertions(+)
 create mode 100644 include/dt-bindings/clock/rk3308-cru.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3308-cru.h b/include/dt-bindings/clock/rk3308-cru.h
new file mode 100644
index 000000000000..d97840f9ee2e
--- /dev/null
+++ b/include/dt-bindings/clock/rk3308-cru.h
@@ -0,0 +1,387 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 Rockchip Electronics Co. Ltd.
+ * Author: Finley Xiao <finley.xiao@rock-chips.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_ROCKCHIP_RK3308_H
+#define _DT_BINDINGS_CLK_ROCKCHIP_RK3308_H
+
+/* core clocks */
+#define PLL_APLL		1
+#define PLL_DPLL		2
+#define PLL_VPLL0		3
+#define PLL_VPLL1		4
+#define ARMCLK			5
+
+/* sclk (special clocks) */
+#define USB480M			14
+#define SCLK_RTC32K		15
+#define SCLK_PVTM_CORE		16
+#define SCLK_UART0		17
+#define SCLK_UART1		18
+#define SCLK_UART2		19
+#define SCLK_UART3		20
+#define SCLK_UART4		21
+#define SCLK_I2C0		22
+#define SCLK_I2C1		23
+#define SCLK_I2C2		24
+#define SCLK_I2C3		25
+#define SCLK_PWM0		26
+#define SCLK_SPI0		27
+#define SCLK_SPI1		28
+#define SCLK_SPI2		29
+#define SCLK_TIMER0		30
+#define SCLK_TIMER1		31
+#define SCLK_TIMER2		32
+#define SCLK_TIMER3		33
+#define SCLK_TIMER4		34
+#define SCLK_TIMER5		35
+#define SCLK_TSADC		36
+#define SCLK_SARADC		37
+#define SCLK_OTP		38
+#define SCLK_OTP_USR		39
+#define SCLK_CPU_BOOST		40
+#define SCLK_CRYPTO		41
+#define SCLK_CRYPTO_APK		42
+#define SCLK_NANDC_DIV		43
+#define SCLK_NANDC_DIV50	44
+#define SCLK_NANDC		45
+#define SCLK_SDMMC_DIV		46
+#define SCLK_SDMMC_DIV50	47
+#define SCLK_SDMMC		48
+#define SCLK_SDMMC_DRV		49
+#define SCLK_SDMMC_SAMPLE	50
+#define SCLK_SDIO_DIV		51
+#define SCLK_SDIO_DIV50		52
+#define SCLK_SDIO		53
+#define SCLK_SDIO_DRV		54
+#define SCLK_SDIO_SAMPLE	55
+#define SCLK_EMMC_DIV		56
+#define SCLK_EMMC_DIV50		57
+#define SCLK_EMMC		58
+#define SCLK_EMMC_DRV		59
+#define SCLK_EMMC_SAMPLE	60
+#define SCLK_SFC		61
+#define SCLK_OTG_ADP		62
+#define SCLK_MAC_SRC		63
+#define SCLK_MAC		64
+#define SCLK_MAC_REF		65
+#define SCLK_MAC_RX_TX		66
+#define SCLK_MAC_RMII		67
+#define SCLK_DDR_MON_TIMER	68
+#define SCLK_DDR_MON		69
+#define SCLK_DDRCLK		70
+#define SCLK_PMU		71
+#define SCLK_USBPHY_REF		72
+#define SCLK_WIFI		73
+#define SCLK_PVTM_PMU		74
+#define SCLK_PDM		75
+#define SCLK_I2S0_8CH_TX	76
+#define SCLK_I2S0_8CH_TX_OUT	77
+#define SCLK_I2S0_8CH_RX	78
+#define SCLK_I2S0_8CH_RX_OUT	79
+#define SCLK_I2S1_8CH_TX	80
+#define SCLK_I2S1_8CH_TX_OUT	81
+#define SCLK_I2S1_8CH_RX	82
+#define SCLK_I2S1_8CH_RX_OUT	83
+#define SCLK_I2S2_8CH_TX	84
+#define SCLK_I2S2_8CH_TX_OUT	85
+#define SCLK_I2S2_8CH_RX	86
+#define SCLK_I2S2_8CH_RX_OUT	87
+#define SCLK_I2S3_8CH_TX	88
+#define SCLK_I2S3_8CH_TX_OUT	89
+#define SCLK_I2S3_8CH_RX	90
+#define SCLK_I2S3_8CH_RX_OUT	91
+#define SCLK_I2S0_2CH		92
+#define SCLK_I2S0_2CH_OUT	93
+#define SCLK_I2S1_2CH		94
+#define SCLK_I2S1_2CH_OUT	95
+#define SCLK_SPDIF_TX_DIV	96
+#define SCLK_SPDIF_TX_DIV50	97
+#define SCLK_SPDIF_TX		98
+#define SCLK_SPDIF_RX_DIV	99
+#define SCLK_SPDIF_RX_DIV50	100
+#define SCLK_SPDIF_RX		101
+#define SCLK_I2S0_8CH_TX_MUX	102
+#define SCLK_I2S0_8CH_RX_MUX	103
+#define SCLK_I2S1_8CH_TX_MUX	104
+#define SCLK_I2S1_8CH_RX_MUX	105
+#define SCLK_I2S2_8CH_TX_MUX	106
+#define SCLK_I2S2_8CH_RX_MUX	107
+#define SCLK_I2S3_8CH_TX_MUX	108
+#define SCLK_I2S3_8CH_RX_MUX	109
+#define SCLK_I2S0_8CH_TX_SRC	110
+#define SCLK_I2S0_8CH_RX_SRC	111
+#define SCLK_I2S1_8CH_TX_SRC	112
+#define SCLK_I2S1_8CH_RX_SRC	113
+#define SCLK_I2S2_8CH_TX_SRC	114
+#define SCLK_I2S2_8CH_RX_SRC	115
+#define SCLK_I2S3_8CH_TX_SRC	116
+#define SCLK_I2S3_8CH_RX_SRC	117
+#define SCLK_I2S0_2CH_SRC	118
+#define SCLK_I2S1_2CH_SRC	119
+#define SCLK_PWM1		120
+#define SCLK_PWM2		121
+#define SCLK_OWIRE		122
+
+/* dclk */
+#define DCLK_VOP		125
+
+/* aclk */
+#define ACLK_BUS_SRC		130
+#define ACLK_BUS		131
+#define ACLK_PERI_SRC		132
+#define ACLK_PERI		133
+#define ACLK_MAC		134
+#define ACLK_CRYPTO		135
+#define ACLK_VOP		136
+#define ACLK_GIC		137
+#define ACLK_DMAC0		138
+#define ACLK_DMAC1		139
+
+/* hclk */
+#define HCLK_BUS		150
+#define HCLK_PERI		151
+#define HCLK_AUDIO		152
+#define HCLK_NANDC		153
+#define HCLK_SDMMC		154
+#define HCLK_SDIO		155
+#define HCLK_EMMC		156
+#define HCLK_SFC		157
+#define HCLK_OTG		158
+#define HCLK_HOST		159
+#define HCLK_HOST_ARB		160
+#define HCLK_PDM		161
+#define HCLK_SPDIFTX		162
+#define HCLK_SPDIFRX		163
+#define HCLK_I2S0_8CH		164
+#define HCLK_I2S1_8CH		165
+#define HCLK_I2S2_8CH		166
+#define HCLK_I2S3_8CH		167
+#define HCLK_I2S0_2CH		168
+#define HCLK_I2S1_2CH		169
+#define HCLK_VAD		170
+#define HCLK_CRYPTO		171
+#define HCLK_VOP		172
+
+/* pclk */
+#define PCLK_BUS		190
+#define PCLK_DDR		191
+#define PCLK_PERI		192
+#define PCLK_PMU		193
+#define PCLK_AUDIO		194
+#define PCLK_MAC		195
+#define PCLK_ACODEC		196
+#define PCLK_UART0		197
+#define PCLK_UART1		198
+#define PCLK_UART2		199
+#define PCLK_UART3		200
+#define PCLK_UART4		201
+#define PCLK_I2C0		202
+#define PCLK_I2C1		203
+#define PCLK_I2C2		204
+#define PCLK_I2C3		205
+#define PCLK_PWM0		206
+#define PCLK_SPI0		207
+#define PCLK_SPI1		208
+#define PCLK_SPI2		209
+#define PCLK_SARADC		210
+#define PCLK_TSADC		211
+#define PCLK_TIMER		212
+#define PCLK_OTP_NS		213
+#define PCLK_WDT		214
+#define PCLK_GPIO0		215
+#define PCLK_GPIO1		216
+#define PCLK_GPIO2		217
+#define PCLK_GPIO3		218
+#define PCLK_GPIO4		219
+#define PCLK_SGRF		220
+#define PCLK_GRF		221
+#define PCLK_USBSD_DET		222
+#define PCLK_DDR_UPCTL		223
+#define PCLK_DDR_MON		224
+#define PCLK_DDRPHY		225
+#define PCLK_DDR_STDBY		226
+#define PCLK_USB_GRF		227
+#define PCLK_CRU		228
+#define PCLK_OTP_PHY		229
+#define PCLK_CPU_BOOST		230
+#define PCLK_PWM1		231
+#define PCLK_PWM2		232
+#define PCLK_CAN		233
+#define PCLK_OWIRE		234
+
+#define CLK_NR_CLKS		(PCLK_OWIRE + 1)
+
+/* soft-reset indices */
+
+/* cru_softrst_con0 */
+#define SRST_CORE0_PO		0
+#define SRST_CORE1_PO		1
+#define SRST_CORE2_PO		2
+#define SRST_CORE3_PO		3
+#define SRST_CORE0		4
+#define SRST_CORE1		5
+#define SRST_CORE2		6
+#define SRST_CORE3		7
+#define SRST_CORE0_DBG		8
+#define SRST_CORE1_DBG		9
+#define SRST_CORE2_DBG		10
+#define SRST_CORE3_DBG		11
+#define SRST_TOPDBG		12
+#define SRST_CORE_NOC		13
+#define SRST_STRC_A		14
+#define SRST_L2C		15
+
+/* cru_softrst_con1 */
+#define SRST_DAP		16
+#define SRST_CORE_PVTM		17
+#define SRST_CORE_PRF		18
+#define SRST_CORE_GRF		19
+#define SRST_DDRUPCTL		20
+#define SRST_DDRUPCTL_P		22
+#define SRST_MSCH		23
+#define SRST_DDRMON_P		25
+#define SRST_DDRSTDBY_P		26
+#define SRST_DDRSTDBY		27
+#define SRST_DDRPHY		28
+#define SRST_DDRPHY_DIV		29
+#define SRST_DDRPHY_P		30
+
+/* cru_softrst_con2 */
+#define SRST_BUS_NIU_H		32
+#define SRST_USB_NIU_P		33
+#define SRST_CRYPTO_A		34
+#define SRST_CRYPTO_H		35
+#define SRST_CRYPTO		36
+#define SRST_CRYPTO_APK		37
+#define SRST_VOP_A		38
+#define SRST_VOP_H		39
+#define SRST_VOP_D		40
+#define SRST_INTMEM_A		41
+#define SRST_ROM_H		42
+#define SRST_GIC_A		43
+#define SRST_UART0_P		44
+#define SRST_UART0		45
+#define SRST_UART1_P		46
+#define SRST_UART1		47
+
+/* cru_softrst_con3 */
+#define SRST_UART2_P		48
+#define SRST_UART2		49
+#define SRST_UART3_P		50
+#define SRST_UART3		51
+#define SRST_UART4_P		52
+#define SRST_UART4		53
+#define SRST_I2C0_P		54
+#define SRST_I2C0		55
+#define SRST_I2C1_P		56
+#define SRST_I2C1		57
+#define SRST_I2C2_P		58
+#define SRST_I2C2		59
+#define SRST_I2C3_P		60
+#define SRST_I2C3		61
+#define SRST_PWM0_P		62
+#define SRST_PWM0		63
+
+/* cru_softrst_con4 */
+#define SRST_SPI0_P		64
+#define SRST_SPI0		65
+#define SRST_SPI1_P		66
+#define SRST_SPI1		67
+#define SRST_SPI2_P		68
+#define SRST_SPI2		69
+#define SRST_SARADC_P		70
+#define SRST_TSADC_P		71
+#define SRST_TSADC		72
+#define SRST_TIMER0_P		73
+#define SRST_TIMER0		74
+#define SRST_TIMER1		75
+#define SRST_TIMER2		76
+#define SRST_TIMER3		77
+#define SRST_TIMER4		78
+#define SRST_TIMER5		79
+
+/* cru_softrst_con5 */
+#define SRST_OTP_NS_P		80
+#define SRST_OTP_NS_SBPI	81
+#define SRST_OTP_NS_USR		82
+#define SRST_OTP_PHY_P		83
+#define SRST_OTP_PHY		84
+#define SRST_GPIO0_P		86
+#define SRST_GPIO1_P		87
+#define SRST_GPIO2_P		88
+#define SRST_GPIO3_P		89
+#define SRST_GPIO4_P		90
+#define SRST_GRF_P		91
+#define SRST_USBSD_DET_P	92
+#define SRST_PMU		93
+#define SRST_PMU_PVTM		94
+#define SRST_USB_GRF_P		95
+
+/* cru_softrst_con6 */
+#define SRST_CPU_BOOST		96
+#define SRST_CPU_BOOST_P	97
+#define SRST_PWM1_P		98
+#define SRST_PWM1		99
+#define SRST_PWM2_P		100
+#define SRST_PWM2		101
+#define SRST_PERI_NIU_A		104
+#define SRST_PERI_NIU_H		105
+#define SRST_PERI_NIU_p		106
+#define SRST_USB2OTG_H		107
+#define SRST_USB2OTG		108
+#define SRST_USB2OTG_ADP	109
+#define SRST_USB2HOST_H		110
+#define SRST_USB2HOST_ARB_H	111
+
+/* cru_softrst_con7 */
+#define SRST_USB2HOST_AUX_H	112
+#define SRST_USB2HOST_EHCI	113
+#define SRST_USB2HOST		114
+#define SRST_USBPHYPOR		115
+#define SRST_UTMI0		116
+#define SRST_UTMI1		117
+#define SRST_SDIO_H		118
+#define SRST_EMMC_H		119
+#define SRST_SFC_H		120
+#define SRST_SFC		121
+#define SRST_SD_H		122
+#define SRST_NANDC_H		123
+#define SRST_NANDC_N		124
+#define SRST_MAC_A		125
+#define SRST_CAN_P		126
+#define SRST_OWIRE_P		127
+
+/* cru_softrst_con8 */
+#define SRST_AUDIO_NIU_H	128
+#define SRST_AUDIO_NIU_P	129
+#define SRST_PDM_H		130
+#define SRST_PDM_M		131
+#define SRST_SPDIFTX_H		132
+#define SRST_SPDIFTX_M		133
+#define SRST_SPDIFRX_H		134
+#define SRST_SPDIFRX_M		135
+#define SRST_I2S0_8CH_H		136
+#define SRST_I2S0_8CH_TX_M	137
+#define SRST_I2S0_8CH_RX_M	138
+#define SRST_I2S1_8CH_H		139
+#define SRST_I2S1_8CH_TX_M	140
+#define SRST_I2S1_8CH_RX_M	141
+#define SRST_I2S2_8CH_H		142
+#define SRST_I2S2_8CH_TX_M	143
+
+/* cru_softrst_con9 */
+#define SRST_I2S2_8CH_RX_M	144
+#define SRST_I2S3_8CH_H		145
+#define SRST_I2S3_8CH_TX_M	146
+#define SRST_I2S3_8CH_RX_M	147
+#define SRST_I2S0_2CH_H		148
+#define SRST_I2S0_2CH_M		149
+#define SRST_I2S1_2CH_H		150
+#define SRST_I2S1_2CH_M		151
+#define SRST_VAD_H		152
+#define SRST_ACODEC_P		153
+
+#endif
-- 
cgit v1.2.3


From ebe26aca98fcf9fbe5017b5cbe216413cee69df5 Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Tue, 3 Sep 2019 11:46:52 -0700
Subject: net: fixed_phy: Add forward declaration for struct gpio_desc;

Add forward declaration for struct gpio_desc in order to address
the following:

./include/linux/phy_fixed.h:48:17: error: 'struct gpio_desc' declared inside parameter list [-Werror]
./include/linux/phy_fixed.h:48:17: error: its scope is only this definition or declaration, which is probably not what you want [-Werror]

Fixes: 71bd106d2567 ("net: fixed-phy: Add fixed_phy_register_with_gpiod() API")
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 1e5d86ebdaeb..52bc8e487ef7 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -11,6 +11,7 @@ struct fixed_phy_status {
 };
 
 struct device_node;
+struct gpio_desc;
 
 #if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_change_carrier(struct net_device *dev, bool new_carrier);
-- 
cgit v1.2.3


From 36b1a2fcd0d263cec3d1a896386611195329af84 Mon Sep 17 00:00:00 2001
From: Harini Katakam <harini.katakam@xilinx.com>
Date: Wed, 4 Sep 2019 19:30:20 +0530
Subject: include: mdio: Add driver data helpers

Add set/get drv_data helpers for mdio device.

Signed-off-by: Harini Katakam <harini.katakam@xilinx.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index e8242ad88c81..a7604248777b 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -68,6 +68,17 @@ struct mdio_driver {
 #define to_mdio_driver(d)						\
 	container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv)
 
+/* device driver data */
+static inline void mdiodev_set_drvdata(struct mdio_device *mdio, void *data)
+{
+	dev_set_drvdata(&mdio->dev, data);
+}
+
+static inline void *mdiodev_get_drvdata(struct mdio_device *mdio)
+{
+	return dev_get_drvdata(&mdio->dev);
+}
+
 void mdio_device_free(struct mdio_device *mdiodev);
 struct mdio_device *mdio_device_create(struct mii_bus *bus, int addr);
 int mdio_device_register(struct mdio_device *mdiodev);
-- 
cgit v1.2.3


From 7bdf4de1267780aa194b3a28c85a6c4d617b0bdb Mon Sep 17 00:00:00 2001
From: Donald Sharp <sharpd@cumulusnetworks.com>
Date: Wed, 4 Sep 2019 10:11:58 -0400
Subject: net: Properly update v4 routes with v6 nexthop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When creating a v4 route that uses a v6 nexthop from a nexthop group.
Allow the kernel to properly send the nexthop as v6 via the RTA_VIA
attribute.

Broken behavior:

$ ip nexthop add via fe80::9 dev eth0
$ ip nexthop show
id 1 via fe80::9 dev eth0 scope link
$ ip route add 4.5.6.7/32 nhid 1
$ ip route show
default via 10.0.2.2 dev eth0
4.5.6.7 nhid 1 via 254.128.0.0 dev eth0
10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15
$

Fixed behavior:

$ ip nexthop add via fe80::9 dev eth0
$ ip nexthop show
id 1 via fe80::9 dev eth0 scope link
$ ip route add 4.5.6.7/32 nhid 1
$ ip route show
default via 10.0.2.2 dev eth0
4.5.6.7 nhid 1 via inet6 fe80::9 dev eth0
10.0.2.0/24 dev eth0 proto kernel scope link src 10.0.2.15
$

v2, v3: Addresses code review comments from David Ahern

Fixes: dcb1ecb50edf (“ipv4: Prepare for fib6_nh from a nexthop object”)
Signed-off-by: Donald Sharp <sharpd@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h     |  4 ++--
 include/net/nexthop.h    |  5 +++--
 net/ipv4/fib_semantics.c | 15 ++++++++-------
 net/ipv6/route.c         | 11 ++++++-----
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4c81846ccce8..ab1ca9e238d2 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -513,7 +513,7 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
 			  struct netlink_callback *cb);
 
 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh,
-		     unsigned char *flags, bool skip_oif);
+		     u8 rt_family, unsigned char *flags, bool skip_oif);
 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh,
-		    int nh_weight);
+		    int nh_weight, u8 rt_family);
 #endif  /* _NET_FIB_H */
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index 95f766c31c90..331ebbc94fe7 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -161,7 +161,8 @@ struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
 }
 
 static inline
-int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh)
+int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh,
+			    u8 rt_family)
 {
 	struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 	int i;
@@ -172,7 +173,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh)
 		struct fib_nh_common *nhc = &nhi->fib_nhc;
 		int weight = nhg->nh_entries[i].weight;
 
-		if (fib_add_nexthop(skb, nhc, weight) < 0)
+		if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0)
 			return -EMSGSIZE;
 	}
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2db089e10ba0..0913a090b2bf 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1582,7 +1582,7 @@ failure:
 }
 
 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
-		     unsigned char *flags, bool skip_oif)
+		     u8 rt_family, unsigned char *flags, bool skip_oif)
 {
 	if (nhc->nhc_flags & RTNH_F_DEAD)
 		*flags |= RTNH_F_DEAD;
@@ -1613,7 +1613,7 @@ int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc,
 		/* if gateway family does not match nexthop family
 		 * gateway is encoded as RTA_VIA
 		 */
-		if (nhc->nhc_gw_family != nhc->nhc_family) {
+		if (rt_family != nhc->nhc_gw_family) {
 			int alen = sizeof(struct in6_addr);
 			struct nlattr *nla;
 			struct rtvia *via;
@@ -1654,7 +1654,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
 
 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
-		    int nh_weight)
+		    int nh_weight, u8 rt_family)
 {
 	const struct net_device *dev = nhc->nhc_dev;
 	struct rtnexthop *rtnh;
@@ -1667,7 +1667,7 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
 	rtnh->rtnh_hops = nh_weight - 1;
 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
 
-	if (fib_nexthop_info(skb, nhc, &flags, true) < 0)
+	if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0)
 		goto nla_put_failure;
 
 	rtnh->rtnh_flags = flags;
@@ -1693,13 +1693,14 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
 		goto nla_put_failure;
 
 	if (unlikely(fi->nh)) {
-		if (nexthop_mpath_fill_node(skb, fi->nh) < 0)
+		if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0)
 			goto nla_put_failure;
 		goto mp_end;
 	}
 
 	for_nexthops(fi) {
-		if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0)
+		if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+				    AF_INET) < 0)
 			goto nla_put_failure;
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (nh->nh_tclassid &&
@@ -1775,7 +1776,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		const struct fib_nh_common *nhc = fib_info_nhc(fi, 0);
 		unsigned char flags = 0;
 
-		if (fib_nexthop_info(skb, nhc, &flags, false) < 0)
+		if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0)
 			goto nla_put_failure;
 
 		rtm->rtm_flags = flags;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 977cfd09e2a3..85bcd97f7a72 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5333,7 +5333,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
 		if (!mp)
 			goto nla_put_failure;
 
-		if (nexthop_mpath_fill_node(skb, nh))
+		if (nexthop_mpath_fill_node(skb, nh, AF_INET6))
 			goto nla_put_failure;
 
 		nla_nest_end(skb, mp);
@@ -5341,7 +5341,7 @@ static int rt6_fill_node_nexthop(struct sk_buff *skb, struct nexthop *nh,
 		struct fib6_nh *fib6_nh;
 
 		fib6_nh = nexthop_fib6_nh(nh);
-		if (fib_nexthop_info(skb, &fib6_nh->nh_common,
+		if (fib_nexthop_info(skb, &fib6_nh->nh_common, AF_INET6,
 				     flags, false) < 0)
 			goto nla_put_failure;
 	}
@@ -5470,13 +5470,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 			goto nla_put_failure;
 
 		if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
-				    rt->fib6_nh->fib_nh_weight) < 0)
+				    rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
 			goto nla_put_failure;
 
 		list_for_each_entry_safe(sibling, next_sibling,
 					 &rt->fib6_siblings, fib6_siblings) {
 			if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
-					    sibling->fib6_nh->fib_nh_weight) < 0)
+					    sibling->fib6_nh->fib_nh_weight,
+					    AF_INET6) < 0)
 				goto nla_put_failure;
 		}
 
@@ -5493,7 +5494,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 
 		rtm->rtm_flags |= nh_flags;
 	} else {
-		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
+		if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common, AF_INET6,
 				     &nh_flags, false) < 0)
 			goto nla_put_failure;
 
-- 
cgit v1.2.3


From f388ec7c16ad8676ee516a735a6b7588252f971d Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Tue, 16 Jul 2019 08:55:04 +0300
Subject: habanalabs: add comments on INFO IOCTL

This patch adds some in-code documentation on the different opcodes of the
INFO IOCTL.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
---
 include/uapi/misc/habanalabs.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 3956c226ca35..a5a1d0e7ec82 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -75,7 +75,19 @@ enum hl_device_status {
 	HL_DEVICE_STATUS_MALFUNCTION
 };
 
-/* Opcode for management ioctl */
+/* Opcode for management ioctl
+ *
+ * HW_IP_INFO         - Receive information about different IP blocks in the
+ *                      device.
+ * HL_INFO_HW_EVENTS  - Receive an array describing how many times each event
+ *                      occurred since the last hard reset.
+ * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
+ *                      specific context. This is relevant only for GOYA device.
+ * HL_INFO_HW_IDLE    - Retrieve information about the idle status of each
+ *                      internal engine.
+ * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
+ *                         require an open context.
+ */
 #define HL_INFO_HW_IP_INFO	0
 #define HL_INFO_HW_EVENTS	1
 #define HL_INFO_DRAM_USAGE	2
-- 
cgit v1.2.3


From 4fd2cb15cd0894520422b3e7d9cf68b7e6548b13 Mon Sep 17 00:00:00 2001
From: Dotan Barak <dbarak@habana.ai>
Date: Mon, 12 Aug 2019 10:23:33 +0300
Subject: habanalabs: explicitly set the queue-id enumerated numbers

When looking at kernel log messages and when debugging user applications,
we only see the queue id. This patch explicitly set the queue id in the
queue enumeration which will be helpful for finding the queue name when we
have its id.

Signed-off-by: Dotan Barak <dbarak@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/misc/habanalabs.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a5a1d0e7ec82..6cf50177cd21 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -28,20 +28,20 @@
 
 enum goya_queue_id {
 	GOYA_QUEUE_ID_DMA_0 = 0,
-	GOYA_QUEUE_ID_DMA_1,
-	GOYA_QUEUE_ID_DMA_2,
-	GOYA_QUEUE_ID_DMA_3,
-	GOYA_QUEUE_ID_DMA_4,
-	GOYA_QUEUE_ID_CPU_PQ,
-	GOYA_QUEUE_ID_MME,	/* Internal queues start here */
-	GOYA_QUEUE_ID_TPC0,
-	GOYA_QUEUE_ID_TPC1,
-	GOYA_QUEUE_ID_TPC2,
-	GOYA_QUEUE_ID_TPC3,
-	GOYA_QUEUE_ID_TPC4,
-	GOYA_QUEUE_ID_TPC5,
-	GOYA_QUEUE_ID_TPC6,
-	GOYA_QUEUE_ID_TPC7,
+	GOYA_QUEUE_ID_DMA_1 = 1,
+	GOYA_QUEUE_ID_DMA_2 = 2,
+	GOYA_QUEUE_ID_DMA_3 = 3,
+	GOYA_QUEUE_ID_DMA_4 = 4,
+	GOYA_QUEUE_ID_CPU_PQ = 5,
+	GOYA_QUEUE_ID_MME = 6,	/* Internal queues start here */
+	GOYA_QUEUE_ID_TPC0 = 7,
+	GOYA_QUEUE_ID_TPC1 = 8,
+	GOYA_QUEUE_ID_TPC2 = 9,
+	GOYA_QUEUE_ID_TPC3 = 10,
+	GOYA_QUEUE_ID_TPC4 = 11,
+	GOYA_QUEUE_ID_TPC5 = 12,
+	GOYA_QUEUE_ID_TPC6 = 13,
+	GOYA_QUEUE_ID_TPC7 = 14,
 	GOYA_QUEUE_ID_SIZE
 };
 
-- 
cgit v1.2.3


From 413cf576fd50297429e967b050d63067c997645c Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Tue, 27 Aug 2019 16:14:18 +0000
Subject: habanalabs: Make the Coresight timestamp perpetual

The Coresight timestamp is enabled for a specific debug session using
the HL_DEBUG_OP_TIMESTAMP opcode of the debug IOCTL.
In order to have a perpetual timestamp that would be comparable between
various debug sessions, this patch moves the timestamp enablement to be
part of the HW initialization.
The HL_DEBUG_OP_TIMESTAMP opcode turns to be deprecated and shouldn't be
used. Old user-space that will call it won't see any change in the
behavior of the debug session.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c           | 23 +++++++++++++++++++++++
 drivers/misc/habanalabs/goya/goya_coresight.c | 17 ++---------------
 include/uapi/misc/habanalabs.h                |  2 +-
 3 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index de275cb3bb98..0dd0b4429fee 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2062,6 +2062,25 @@ static void goya_disable_msix(struct hl_device *hdev)
 	goya->hw_cap_initialized &= ~HW_CAP_MSIX;
 }
 
+static void goya_enable_timestamp(struct hl_device *hdev)
+{
+	/* Disable the timestamp counter */
+	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+
+	/* Zero the lower/upper parts of the 64-bit counter */
+	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
+	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
+
+	/* Enable the counter */
+	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
+}
+
+static void goya_disable_timestamp(struct hl_device *hdev)
+{
+	/* Disable the timestamp counter */
+	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+}
+
 static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
 {
 	u32 wait_timeout_ms, cpu_timeout_ms;
@@ -2102,6 +2121,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
 	goya_disable_external_queues(hdev);
 	goya_disable_internal_queues(hdev);
 
+	goya_disable_timestamp(hdev);
+
 	if (hard_reset) {
 		goya_disable_msix(hdev);
 		goya_mmu_remove_device_cpu_mappings(hdev);
@@ -2504,6 +2525,8 @@ static int goya_hw_init(struct hl_device *hdev)
 
 	goya_init_tpc_qmans(hdev);
 
+	goya_enable_timestamp(hdev);
+
 	/* MSI-X must be enabled before CPU queues are initialized */
 	rc = goya_enable_msix(hdev);
 	if (rc)
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index 3d77b1c20336..b4d406af1bed 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -636,24 +636,11 @@ static int goya_config_spmu(struct hl_device *hdev,
 	return 0;
 }
 
-static int goya_config_timestamp(struct hl_device *hdev,
-		struct hl_debug_params *params)
-{
-	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
-	if (params->enable) {
-		WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
-		WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
-		WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
-	}
-
-	return 0;
-}
-
 int goya_debug_coresight(struct hl_device *hdev, void *data)
 {
 	struct hl_debug_params *params = data;
 	u32 val;
-	int rc;
+	int rc = 0;
 
 	switch (params->op) {
 	case HL_DEBUG_OP_STM:
@@ -675,7 +662,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data)
 		rc = goya_config_spmu(hdev, params);
 		break;
 	case HL_DEBUG_OP_TIMESTAMP:
-		rc = goya_config_timestamp(hdev, params);
+		/* Do nothing as this opcode is deprecated */
 		break;
 
 	default:
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 6cf50177cd21..266bf85056d4 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -451,7 +451,7 @@ struct hl_debug_params_spmu {
 #define HL_DEBUG_OP_BMON	4
 /* Opcode for SPMU component */
 #define HL_DEBUG_OP_SPMU	5
-/* Opcode for timestamp */
+/* Opcode for timestamp (deprecated) */
 #define HL_DEBUG_OP_TIMESTAMP	6
 /* Opcode for setting the device into or out of debug mode. The enable
  * variable should be 1 for enabling debug mode and 0 for disabling it
-- 
cgit v1.2.3


From 75b3cb2bb080372d043e8f0c0aeae8f52461136b Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Wed, 28 Aug 2019 17:32:04 +0300
Subject: habanalabs: add uapi to retrieve device utilization

Users and sysadmins usually want to know what is the device utilization as
a level 0 indication if they are efficiently using the device.

Add a new opcode to the INFO IOCTL that will return the device utilization
over the last period of 100-1000ms. The return value is 0-100,
representing as percentage the total utilization rate.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
---
 drivers/misc/habanalabs/command_submission.c |  20 ++++-
 drivers/misc/habanalabs/device.c             | 116 ++++++++++++++++++++++++++-
 drivers/misc/habanalabs/habanalabs.h         |  23 +++++-
 drivers/misc/habanalabs/habanalabs_ioctl.c   |  27 +++++++
 drivers/misc/habanalabs/hw_queue.c           |   8 +-
 drivers/misc/habanalabs/include/goya/goya.h  |   2 +
 include/uapi/misc/habanalabs.h               |  49 +++++++----
 7 files changed, 222 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index e4dd3e83df8b..4777ec4c2b55 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -178,11 +178,23 @@ static void cs_do_release(struct kref *ref)
 
 	/* We also need to update CI for internal queues */
 	if (cs->submitted) {
-		int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
+		hdev->asic_funcs->hw_queues_lock(hdev);
 
-		WARN_ONCE((cs_cnt < 0),
-			"hl%d: error in CS active cnt %d\n",
-			hdev->id, cs_cnt);
+		hdev->cs_active_cnt--;
+		if (!hdev->cs_active_cnt) {
+			struct hl_device_idle_busy_ts *ts;
+
+			ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
+			ts->busy_to_idle_ts = ktime_get();
+
+			if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
+				hdev->idle_busy_ts_idx = 0;
+		} else if (hdev->cs_active_cnt < 0) {
+			dev_crit(hdev->dev, "CS active cnt %d is negative\n",
+				hdev->cs_active_cnt);
+		}
+
+		hdev->asic_funcs->hw_queues_unlock(hdev);
 
 		hl_int_hw_queue_update_ci(cs);
 
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 41c3ddbca351..cebdceb72298 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -293,6 +293,14 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_eq_wq;
 	}
 
+	hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
+					sizeof(struct hl_device_idle_busy_ts),
+					(GFP_KERNEL | __GFP_ZERO));
+	if (!hdev->idle_busy_ts_arr) {
+		rc = -ENOMEM;
+		goto free_chip_info;
+	}
+
 	hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 
 	mutex_init(&hdev->send_cpu_message_lock);
@@ -303,10 +311,11 @@ static int device_early_init(struct hl_device *hdev)
 	INIT_LIST_HEAD(&hdev->fpriv_list);
 	mutex_init(&hdev->fpriv_list_lock);
 	atomic_set(&hdev->in_reset, 0);
-	atomic_set(&hdev->cs_active_cnt, 0);
 
 	return 0;
 
+free_chip_info:
+	kfree(hdev->hl_chip_info);
 free_eq_wq:
 	destroy_workqueue(hdev->eq_wq);
 free_cq_wq:
@@ -336,6 +345,7 @@ static void device_early_fini(struct hl_device *hdev)
 
 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 
+	kfree(hdev->idle_busy_ts_arr);
 	kfree(hdev->hl_chip_info);
 
 	destroy_workqueue(hdev->eq_wq);
@@ -451,6 +461,102 @@ static void device_late_fini(struct hl_device *hdev)
 	hdev->late_init_done = false;
 }
 
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
+{
+	struct hl_device_idle_busy_ts *ts;
+	ktime_t zero_ktime, curr = ktime_get();
+	u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
+	s64 period_us, last_start_us, last_end_us, last_busy_time_us,
+		total_busy_time_us = 0, total_busy_time_ms;
+
+	zero_ktime = ktime_set(0, 0);
+	period_us = period_ms * USEC_PER_MSEC;
+	ts = &hdev->idle_busy_ts_arr[last_index];
+
+	/* check case that device is currently in idle */
+	if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
+			!ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {
+
+		last_index--;
+		/* Handle case idle_busy_ts_idx was 0 */
+		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+		ts = &hdev->idle_busy_ts_arr[last_index];
+	}
+
+	while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
+		/* Check if we are in last sample case. i.e. if the sample
+		 * begun before the sampling period. This could be a real
+		 * sample or 0 so need to handle both cases
+		 */
+		last_start_us = ktime_to_us(
+				ktime_sub(curr, ts->idle_to_busy_ts));
+
+		if (last_start_us > period_us) {
+
+			/* First check two cases:
+			 * 1. If the device is currently busy
+			 * 2. If the device was idle during the whole sampling
+			 *    period
+			 */
+
+			if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
+				/* Check if the device is currently busy */
+				if (ktime_compare(ts->idle_to_busy_ts,
+						zero_ktime))
+					return 100;
+
+				/* We either didn't have any activity or we
+				 * reached an entry which is 0. Either way,
+				 * exit and return what was accumulated so far
+				 */
+				break;
+			}
+
+			/* If sample has finished, check it is relevant */
+			last_end_us = ktime_to_us(
+					ktime_sub(curr, ts->busy_to_idle_ts));
+
+			if (last_end_us > period_us)
+				break;
+
+			/* It is relevant so add it but with adjustment */
+			last_busy_time_us = ktime_to_us(
+						ktime_sub(ts->busy_to_idle_ts,
+						ts->idle_to_busy_ts));
+			total_busy_time_us += last_busy_time_us -
+					(last_start_us - period_us);
+			break;
+		}
+
+		/* Check if the sample is finished or still open */
+		if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
+			last_busy_time_us = ktime_to_us(
+						ktime_sub(ts->busy_to_idle_ts,
+						ts->idle_to_busy_ts));
+		else
+			last_busy_time_us = ktime_to_us(
+					ktime_sub(curr, ts->idle_to_busy_ts));
+
+		total_busy_time_us += last_busy_time_us;
+
+		last_index--;
+		/* Handle case idle_busy_ts_idx was 0 */
+		if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
+			last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;
+
+		ts = &hdev->idle_busy_ts_arr[last_index];
+
+		overlap_cnt++;
+	}
+
+	total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
+						USEC_PER_MSEC);
+
+	return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
+}
+
 /*
  * hl_device_set_frequency - set the frequency of the device
  *
@@ -808,6 +914,14 @@ again:
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		hl_cq_reset(hdev, &hdev->completion_queue[i]);
 
+	hdev->idle_busy_ts_idx = 0;
+	hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
+	hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);
+
+	if (hdev->cs_active_cnt)
+		dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
+			hdev->cs_active_cnt);
+
 	mutex_lock(&hdev->fpriv_list_lock);
 
 	/* Make sure the context switch phase will run again */
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index a4d929f5bad8..23b86b7f9732 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -45,6 +45,8 @@
 /* MUST BE POWER OF 2 and larger than 1 */
 #define HL_MAX_PENDING_CS		64
 
+#define HL_IDLE_BUSY_TS_ARR_SIZE	4096
+
 /* Memory */
 #define MEM_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
@@ -1156,6 +1158,16 @@ struct hl_device_reset_work {
 	struct hl_device		*hdev;
 };
 
+/**
+ * struct hl_device_idle_busy_ts - used for calculating device utilization rate.
+ * @idle_to_busy_ts: timestamp where device changed from idle to busy.
+ * @busy_to_idle_ts: timestamp where device changed from busy to idle.
+ */
+struct hl_device_idle_busy_ts {
+	ktime_t				idle_to_busy_ts;
+	ktime_t				busy_to_idle_ts;
+};
+
 /**
  * struct hl_device - habanalabs device structure.
  * @pdev: pointer to PCI device, can be NULL in case of simulator device.
@@ -1203,19 +1215,22 @@ struct hl_device_reset_work {
  *              when a user opens the device
  * @fpriv_list_lock: protects the fpriv_list
  * @compute_ctx: current compute context executing.
+ * @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
+ *                    and vice-versa
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
  *             value is saved so in case of hard-reset, KMD will restore this
  *             value and update the F/W after the re-initialization
  * @in_reset: is device in reset flow.
+ * @curr_pll_profile: current PLL profile.
  * @cs_active_cnt: number of active command submissions on this device (active
  *                 means already in H/W queues)
- * @curr_pll_profile: current PLL profile.
  * @major: habanalabs KMD major.
  * @high_pll: high PLL profile frequency.
  * @soft_reset_cnt: number of soft reset since KMD loading.
  * @hard_reset_cnt: number of hard reset since KMD loading.
+ * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
  * @id: device minor.
  * @id_control: minor of the control device
  * @disabled: is device disabled.
@@ -1285,16 +1300,19 @@ struct hl_device {
 
 	struct hl_ctx			*compute_ctx;
 
+	struct hl_device_idle_busy_ts	*idle_busy_ts_arr;
+
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
 	u64				max_power;
 	atomic_t			in_reset;
-	atomic_t			cs_active_cnt;
 	enum hl_pll_frequency		curr_pll_profile;
+	int				cs_active_cnt;
 	u32				major;
 	u32				high_pll;
 	u32				soft_reset_cnt;
 	u32				hard_reset_cnt;
+	u32				idle_busy_ts_idx;
 	u16				id;
 	u16				id_control;
 	u8				disabled;
@@ -1457,6 +1475,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
 void hl_hpriv_get(struct hl_fpriv *hpriv);
 void hl_hpriv_put(struct hl_fpriv *hpriv);
 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
+uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);
 
 int hl_build_hwmon_channel_info(struct hl_device *hdev,
 		struct armcp_sensor *sensors_arr);
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 8fbca2c0f44b..f958568f7996 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -197,6 +197,29 @@ out:
 	return rc;
 }
 
+static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_device_utilization device_util = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	if ((args->period_ms < 100) || (args->period_ms > 1000) ||
+		(args->period_ms % 100)) {
+		dev_err(hdev->dev,
+			"period %u must be between 100 - 1000 and must be divisible by 100\n",
+			args->period_ms);
+		return -EINVAL;
+	}
+
+	device_util.utilization = hl_device_utilization(hdev, args->period_ms);
+
+	return copy_to_user(out, &device_util,
+		min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -239,6 +262,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 		rc = hw_idle(hdev, args);
 		break;
 
+	case HL_INFO_DEVICE_UTILIZATION:
+		rc = device_utilization(hdev, args);
+		break;
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 696cf7d206c6..55b383b2a116 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -364,7 +364,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 		spin_unlock(&hdev->hw_queues_mirror_lock);
 	}
 
-	atomic_inc(&hdev->cs_active_cnt);
+	if (!hdev->cs_active_cnt++) {
+		struct hl_device_idle_busy_ts *ts;
+
+		ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
+		ts->busy_to_idle_ts = ktime_set(0, 0);
+		ts->idle_to_busy_ts = ktime_get();
+	}
 
 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 		if (job->ext_queue)
diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h
index 3f02a52ba4ce..43d241891e45 100644
--- a/drivers/misc/habanalabs/include/goya/goya.h
+++ b/drivers/misc/habanalabs/include/goya/goya.h
@@ -38,4 +38,6 @@
 
 #define TPC_MAX_NUM		8
 
+#define MME_MAX_NUM		1
+
 #endif /* GOYA_H */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 266bf85056d4..73ee212d7fa6 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -77,22 +77,29 @@ enum hl_device_status {
 
 /* Opcode for management ioctl
  *
- * HW_IP_INFO         - Receive information about different IP blocks in the
- *                      device.
- * HL_INFO_HW_EVENTS  - Receive an array describing how many times each event
- *                      occurred since the last hard reset.
- * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
- *                      specific context. This is relevant only for GOYA device.
- * HL_INFO_HW_IDLE    - Retrieve information about the idle status of each
- *                      internal engine.
+ * HW_IP_INFO            - Receive information about different IP blocks in the
+ *                         device.
+ * HL_INFO_HW_EVENTS     - Receive an array describing how many times each event
+ *                         occurred since the last hard reset.
+ * HL_INFO_DRAM_USAGE    - Retrieve the dram usage inside the device and of the
+ *                         specific context. This is relevant only for devices
+ *                         where the dram is managed by the kernel driver
+ * HL_INFO_HW_IDLE       - Retrieve information about the idle status of each
+ *                         internal engine.
  * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
  *                         require an open context.
+ * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device
+ *                              over the last period specified by the user.
+ *                              The period can be between 100ms to 1s, in
+ *                              resolution of 100ms. The return value is a
+ *                              percentage of the utilization rate.
  */
-#define HL_INFO_HW_IP_INFO	0
-#define HL_INFO_HW_EVENTS	1
-#define HL_INFO_DRAM_USAGE	2
-#define HL_INFO_HW_IDLE		3
-#define HL_INFO_DEVICE_STATUS	4
+#define HL_INFO_HW_IP_INFO		0
+#define HL_INFO_HW_EVENTS		1
+#define HL_INFO_DRAM_USAGE		2
+#define HL_INFO_HW_IDLE			3
+#define HL_INFO_DEVICE_STATUS		4
+#define HL_INFO_DEVICE_UTILIZATION	6
 
 #define HL_INFO_VERSION_MAX_LEN	128
 
@@ -134,6 +141,11 @@ struct hl_info_device_status {
 	__u32 pad;
 };
 
+struct hl_info_device_utilization {
+	__u32 utilization;
+	__u32 pad;
+};
+
 struct hl_info_args {
 	/* Location of relevant struct in userspace */
 	__u64 return_pointer;
@@ -149,8 +161,15 @@ struct hl_info_args {
 	/* HL_INFO_* */
 	__u32 op;
 
-	/* Context ID - Currently not in use */
-	__u32 ctx_id;
+	union {
+		/* Context ID - Currently not in use */
+		__u32 ctx_id;
+		/* Period value for utilization rate (100ms - 1000ms, in 100ms
+		 * resolution.
+		 */
+		__u32 period_ms;
+	};
+
 	__u32 pad;
 };
 
-- 
cgit v1.2.3


From e9730763a21a5441d46511f124d703d76a5ef6e6 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Wed, 28 Aug 2019 21:51:52 +0300
Subject: habanalabs: add uapi to retrieve aggregate H/W events

Add a new opcode to INFO IOCTL to retrieve aggregate H/W events. i.e. the
events counters are NOT cleared upon device reset, but count from the
loading of the driver.

Add the code to support it in the device event handling function.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
---
 drivers/misc/habanalabs/goya/goya.c        |  9 +++++++--
 drivers/misc/habanalabs/goya/goyaP.h       |  3 ++-
 drivers/misc/habanalabs/habanalabs.h       |  3 ++-
 drivers/misc/habanalabs/habanalabs_ioctl.c | 11 ++++++++---
 include/uapi/misc/habanalabs.h             |  3 +++
 5 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 0dd0b4429fee..1267ec75b19f 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4469,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 	struct goya_device *goya = hdev->asic_specific;
 
 	goya->events_stat[event_type]++;
+	goya->events_stat_aggregate[event_type]++;
 
 	switch (event_type) {
 	case GOYA_ASYNC_EVENT_ID_PCIE_IF:
@@ -4550,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 	}
 }
 
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
 {
 	struct goya_device *goya = hdev->asic_specific;
 
-	*size = (u32) sizeof(goya->events_stat);
+	if (aggregate) {
+		*size = (u32) sizeof(goya->events_stat_aggregate);
+		return goya->events_stat_aggregate;
+	}
 
+	*size = (u32) sizeof(goya->events_stat);
 	return goya->events_stat;
 }
 
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index d7f48c9c41cd..f830cfd5c04d 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -162,6 +162,7 @@ struct goya_device {
 
 	u64		ddr_bar_cur_addr;
 	u32		events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
+	u32		events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
 	u32		hw_cap_initialized;
 	u8		device_cpu_mmu_mappings_done;
 };
@@ -215,7 +216,7 @@ int goya_suspend(struct hl_device *hdev);
 int goya_resume(struct hl_device *hdev);
 
 void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
-void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
+void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
 
 void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
 				u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 23b86b7f9732..aa7aaa710f12 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -558,7 +558,8 @@ struct hl_asic_funcs {
 				struct hl_eq_entry *eq_entry);
 	void (*set_pll_profile)(struct hl_device *hdev,
 			enum hl_pll_frequency freq);
-	void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
+	void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
+				u32 *size);
 	u64 (*read_pte)(struct hl_device *hdev, u64 addr);
 	void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
 	void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index f958568f7996..66d9c710073c 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 		min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
 
-static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
+static int hw_events_info(struct hl_device *hdev, bool aggregate,
+			struct hl_info_args *args)
 {
 	u32 size, max_size = args->return_size;
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
@@ -84,7 +85,7 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	arr = hdev->asic_funcs->get_events_stat(hdev, &size);
+	arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
 
 	return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
 }
@@ -251,7 +252,7 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 
 	switch (args->op) {
 	case HL_INFO_HW_EVENTS:
-		rc = hw_events_info(hdev, args);
+		rc = hw_events_info(hdev, false, args);
 		break;
 
 	case HL_INFO_DRAM_USAGE:
@@ -266,6 +267,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 		rc = device_utilization(hdev, args);
 		break;
 
+	case HL_INFO_HW_EVENTS_AGGREGATE:
+		rc = hw_events_info(hdev, true, args);
+		break;
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 73ee212d7fa6..19f8039db2ea 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -93,6 +93,8 @@ enum hl_device_status {
  *                              The period can be between 100ms to 1s, in
  *                              resolution of 100ms. The return value is a
  *                              percentage of the utilization rate.
+ * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
+ *                               event occurred since the driver was loaded.
  */
 #define HL_INFO_HW_IP_INFO		0
 #define HL_INFO_HW_EVENTS		1
@@ -100,6 +102,7 @@ enum hl_device_status {
 #define HL_INFO_HW_IDLE			3
 #define HL_INFO_DEVICE_STATUS		4
 #define HL_INFO_DEVICE_UTILIZATION	6
+#define HL_INFO_HW_EVENTS_AGGREGATE	7
 
 #define HL_INFO_VERSION_MAX_LEN	128
 
-- 
cgit v1.2.3


From 4c172bbfaa4e1aa26dab58781301902c7b3e4ebc Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Fri, 30 Aug 2019 16:59:33 +0300
Subject: habanalabs: stop using the acronym KMD

We want to stop using the acronym KMD. Therefore, replace all locations
(except for register names we can't modify) where KMD is written to other
terms such as "Linux kernel driver" or "Host kernel driver", etc.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Reviewed-by: Omer Shpigelman <oshpigelman@habana.ai>
---
 drivers/misc/habanalabs/asid.c               |  2 +-
 drivers/misc/habanalabs/command_buffer.c     |  3 +-
 drivers/misc/habanalabs/command_submission.c |  5 +-
 drivers/misc/habanalabs/context.c            |  2 +-
 drivers/misc/habanalabs/goya/goya.c          | 22 ++++-----
 drivers/misc/habanalabs/goya/goyaP.h         | 14 +++---
 drivers/misc/habanalabs/habanalabs.h         | 32 ++++++-------
 drivers/misc/habanalabs/include/armcp_if.h   | 70 ++++++++++++++--------------
 include/uapi/misc/habanalabs.h               | 22 ++++-----
 9 files changed, 88 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/habanalabs/asid.c b/drivers/misc/habanalabs/asid.c
index 2c01461701a3..a2fdf31cf27c 100644
--- a/drivers/misc/habanalabs/asid.c
+++ b/drivers/misc/habanalabs/asid.c
@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)
 
 	mutex_init(&hdev->asid_mutex);
 
-	/* ASID 0 is reserved for KMD and device CPU */
+	/* ASID 0 is reserved for the kernel driver and device CPU */
 	set_bit(0, hdev->asid_bitmap);
 
 	return 0;
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c
index e495f44064fa..53fddbd8e693 100644
--- a/drivers/misc/habanalabs/command_buffer.c
+++ b/drivers/misc/habanalabs/command_buffer.c
@@ -397,7 +397,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
 	rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
 			HL_KERNEL_ASID_ID);
 	if (rc) {
-		dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc);
+		dev_err(hdev->dev,
+			"Failed to allocate CB for the kernel driver %d\n", rc);
 		return NULL;
 	}
 
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 4777ec4c2b55..a9ac045dcfde 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -409,8 +409,9 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
 		return NULL;
 	}
 
-	if (hw_queue_prop->kmd_only) {
-		dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
+	if (hw_queue_prop->driver_only) {
+		dev_err(hdev->dev,
+			"Queue index %d is restricted for the kernel driver\n",
 			chunk->queue_index);
 		return NULL;
 	} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index bc0dec57a983..17db7b3dfb4c 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -128,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 	ctx->thread_ctx_switch_wait_token = 0;
 
 	if (is_kernel_ctx) {
-		ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
+		ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
 		rc = hl_mmu_ctx_init(ctx);
 		if (rc) {
 			dev_err(hdev->dev, "Failed to init mmu ctx module\n");
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c88c2fea97b9..6fba14b81f90 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -42,8 +42,8 @@
  * PQ, CQ and CP are not secured.
  * PQ, CB and the data are on the SRAM/DRAM.
  *
- * Since QMAN DMA is secured, KMD is parsing the DMA CB:
- *     - KMD checks DMA pointer
+ * Since QMAN DMA is secured, the driver is parsing the DMA CB:
+ *     - checks DMA pointer
  *     - WREG, MSG_PROT are not allowed.
  *     - MSG_LONG/SHORT are allowed.
  *
@@ -56,15 +56,15 @@
  * QMAN DMA: PQ, CQ and CP are secured.
  * MMU is set to bypass on the Secure props register of the QMAN.
  * The reasons we don't enable MMU for PQ, CQ and CP are:
- *     - PQ entry is in kernel address space and KMD doesn't map it.
+ *     - PQ entry is in kernel address space and the driver doesn't map it.
  *     - CP writes to MSIX register and to kernel address space (completion
  *       queue).
  *
- * DMA is not secured but because CP is secured, KMD still needs to parse the
- * CB, but doesn't need to check the DMA addresses.
+ * DMA is not secured but because CP is secured, the driver still needs to parse
+ * the CB, but doesn't need to check the DMA addresses.
  *
- * For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
- * doesn't map memory in MMU.
+ * For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
+ * the driver doesn't map memory in MMU.
  *
  * QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
  *
@@ -336,18 +336,18 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 
 	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
-		prop->hw_queues_props[i].kmd_only = 0;
+		prop->hw_queues_props[i].driver_only = 0;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
-		prop->hw_queues_props[i].kmd_only = 1;
+		prop->hw_queues_props[i].driver_only = 1;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 			NUMBER_OF_INT_HW_QUEUES; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
-		prop->hw_queues_props[i].kmd_only = 0;
+		prop->hw_queues_props[i].driver_only = 0;
 	}
 
 	for (; i < HL_MAX_QUEUES; i++)
@@ -2853,7 +2853,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
 
 	if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
 		dev_err_ratelimited(hdev->dev,
-			"Can't send KMD job on QMAN0 because the device is not idle\n");
+			"Can't send driver job on QMAN0 because the device is not idle\n");
 		return -EBUSY;
 	}
 
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 06da71e8d7ea..89b6574f8e4f 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -70,19 +70,19 @@
 						MMU_PAGE_TABLES_SIZE)
 #define MMU_CACHE_MNG_ADDR		(MMU_DRAM_DEFAULT_PAGE_ADDR + \
 					MMU_DRAM_DEFAULT_PAGE_SIZE)
-#define DRAM_KMD_END_ADDR		(MMU_CACHE_MNG_ADDR + \
+#define DRAM_DRIVER_END_ADDR		(MMU_CACHE_MNG_ADDR + \
 						MMU_CACHE_MNG_SIZE)
 
 #define DRAM_BASE_ADDR_USER		0x20000000
 
-#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER)
-#error "KMD must reserve no more than 512MB"
+#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER)
+#error "Driver must reserve no more than 512MB"
 #endif
 
 /*
- * SRAM Memory Map for KMD
+ * SRAM Memory Map for Driver
  *
- * KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
+ * Driver occupies DRIVER_SRAM_SIZE bytes from the start of SRAM. It is used for
  * MME/TPC QMANs
  *
  */
@@ -108,10 +108,10 @@
 #define TPC7_QMAN_BASE_OFFSET	(TPC6_QMAN_BASE_OFFSET + \
 				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
 
-#define SRAM_KMD_RES_OFFSET	(TPC7_QMAN_BASE_OFFSET + \
+#define SRAM_DRIVER_RES_OFFSET	(TPC7_QMAN_BASE_OFFSET + \
 				(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
 
-#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
+#if (SRAM_DRIVER_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
 #error "MME/TPC QMANs SRAM space exceeds limit"
 #endif
 
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index aa7aaa710f12..c39e07d665c4 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -96,12 +96,12 @@ enum hl_queue_type {
 /**
  * struct hw_queue_properties - queue information.
  * @type: queue type.
- * @kmd_only: true if only KMD is allowed to send a job to this queue, false
- *            otherwise.
+ * @driver_only: true if only the driver is allowed to send a job to this queue,
+ *               false otherwise.
  */
 struct hw_queue_properties {
 	enum hl_queue_type	type;
-	u8			kmd_only;
+	u8			driver_only;
 };
 
 /**
@@ -324,7 +324,7 @@ struct hl_cs_job;
 #define HL_EQ_LENGTH			64
 #define HL_EQ_SIZE_IN_BYTES		(HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
 
-/* KMD <-> ArmCP shared memory size */
+/* Host <-> ArmCP shared memory size */
 #define HL_CPU_ACCESSIBLE_MEM_SIZE	SZ_2M
 
 /**
@@ -405,7 +405,7 @@ struct hl_cs_parser;
 
 /**
  * enum hl_pm_mng_profile - power management profile.
- * @PM_AUTO: internal clock is set by KMD.
+ * @PM_AUTO: internal clock is set by the Linux driver.
  * @PM_MANUAL: internal clock is set by the user.
  * @PM_LAST: last power management type.
  */
@@ -613,7 +613,7 @@ struct hl_va_range {
  *		descriptor (hl_vm_phys_pg_list or hl_userptr).
  * @mmu_phys_hash: holds a mapping from physical address to pgt_info structure.
  * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
- * @hpriv: pointer to the private (KMD) data of the process (fd).
+ * @hpriv: pointer to the private (Kernel Driver) data of the process (fd).
  * @hdev: pointer to the device structure.
  * @refcount: reference counter for the context. Context is released only when
  *		this hits 0l. It is incremented on CS and CS_WAIT.
@@ -1185,19 +1185,19 @@ struct hl_device_idle_busy_ts {
  * @completion_queue: array of hl_cq.
  * @cq_wq: work queue of completion queues for executing work in process context
  * @eq_wq: work queue of event queue for executing work in process context.
- * @kernel_ctx: KMD context structure.
+ * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @hw_queues_mirror_list: CS mirror list for TDR.
  * @hw_queues_mirror_lock: protects hw_queues_mirror_list.
  * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
  * @event_queue: event queue for IRQ from ArmCP.
  * @dma_pool: DMA pool for small allocations.
- * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
- * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
- * @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
+ * @cpu_accessible_dma_mem: Host <-> ArmCP shared memory CPU address.
+ * @cpu_accessible_dma_address: Host <-> ArmCP shared memory DMA address.
+ * @cpu_accessible_dma_pool: Host <-> ArmCP shared memory pool.
  * @asid_bitmap: holds used/available ASIDs.
  * @asid_mutex: protects asid_bitmap.
- * @send_cpu_message_lock: enforces only one message in KMD <-> ArmCP queue.
+ * @send_cpu_message_lock: enforces only one message in Host <-> ArmCP queue.
  * @debug_lock: protects critical section of setting debug mode for device
  * @asic_prop: ASIC specific immutable properties.
  * @asic_funcs: ASIC specific functions.
@@ -1221,16 +1221,16 @@ struct hl_device_idle_busy_ts {
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
- *             value is saved so in case of hard-reset, KMD will restore this
- *             value and update the F/W after the re-initialization
+ *             value is saved so in case of hard-reset, the driver will restore
+ *             this value and update the F/W after the re-initialization
  * @in_reset: is device in reset flow.
  * @curr_pll_profile: current PLL profile.
  * @cs_active_cnt: number of active command submissions on this device (active
  *                 means already in H/W queues)
- * @major: habanalabs KMD major.
+ * @major: habanalabs kernel driver major.
  * @high_pll: high PLL profile frequency.
- * @soft_reset_cnt: number of soft reset since KMD loading.
- * @hard_reset_cnt: number of hard reset since KMD loading.
+ * @soft_reset_cnt: number of soft reset since the driver was loaded.
+ * @hard_reset_cnt: number of hard reset since the driver was loaded.
  * @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
  * @id: device minor.
  * @id_control: minor of the control device
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index 5565bce60bc9..e4c6699a1868 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -41,33 +41,34 @@ enum pq_init_status {
 /*
  * ArmCP Primary Queue Packets
  *
- * During normal operation, KMD needs to send various messages to ArmCP,
- * usually either to SET some value into a H/W periphery or to GET the current
- * value of some H/W periphery. For example, SET the frequency of MME/TPC and
- * GET the value of the thermal sensor.
- *
- * These messages can be initiated either by the User application or by KMD
- * itself, e.g. power management code. In either case, the communication from
- * KMD to ArmCP will *always* be in synchronous mode, meaning that KMD will
- * send a single message and poll until the message was acknowledged and the
- * results are ready (if results are needed).
- *
- * This means that only a single message can be sent at a time and KMD must
- * wait for its result before sending the next message. Having said that,
- * because these are control messages which are sent in a relatively low
+ * During normal operation, the host's kernel driver needs to send various
+ * messages to ArmCP, usually either to SET some value into a H/W periphery or
+ * to GET the current value of some H/W periphery. For example, SET the
+ * frequency of MME/TPC and GET the value of the thermal sensor.
+ *
+ * These messages can be initiated either by the User application or by the
+ * host's driver itself, e.g. power management code. In either case, the
+ * communication from the host's driver to ArmCP will *always* be in
+ * synchronous mode, meaning that the host will send a single message and poll
+ * until the message was acknowledged and the results are ready (if results are
+ * needed).
+ *
+ * This means that only a single message can be sent at a time and the host's
+ * driver must wait for its result before sending the next message. Having said
+ * that, because these are control messages which are sent in a relatively low
  * frequency, this limitation seems acceptable. It's important to note that
  * in case of multiple devices, messages to different devices *can* be sent
  * at the same time.
  *
  * The message, inputs/outputs (if relevant) and fence object will be located
- * on the device DDR at an address that will be determined by KMD. During
- * device initialization phase, KMD will pass to ArmCP that address.  Most of
- * the message types will contain inputs/outputs inside the message itself.
- * The common part of each message will contain the opcode of the message (its
- * type) and a field representing a fence object.
- *
- * When KMD wishes to send a message to ArmCP, it will write the message
- * contents to the device DDR, clear the fence object and then write the
+ * on the device DDR at an address that will be determined by the host's driver.
+ * During device initialization phase, the host will pass to ArmCP that address.
+ * Most of the message types will contain inputs/outputs inside the message
+ * itself. The common part of each message will contain the opcode of the
+ * message (its type) and a field representing a fence object.
+ *
+ * When the host's driver wishes to send a message to ArmCP, it will write the
+ * message contents to the device DDR, clear the fence object and then write the
  * value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
  * the 484 interrupt-id to the ARM core.
  *
@@ -78,12 +79,13 @@ enum pq_init_status {
  * device DDR and then write to the fence object. If an error occurred, ArmCP
  * will fill the rc field with the right error code.
  *
- * In the meantime, KMD will poll on the fence object. Once KMD sees that the
- * fence object is signaled, it will read the results from the device DDR
- * (if relevant) and resume the code execution in KMD.
+ * In the meantime, the host's driver will poll on the fence object. Once the
+ * host sees that the fence object is signaled, it will read the results from
+ * the device DDR (if relevant) and resume the code execution in the host's
+ * driver.
  *
  * To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
- * so the value being put by the KMD matches the value read by ArmCP
+ * so the value being put by the host's driver matches the value read by ArmCP
  *
  * Non-QMAN packets should be limited to values 1 through (2^8 - 1)
  *
@@ -148,9 +150,9 @@ enum pq_init_status {
  *
  * ARMCP_PACKET_INFO_GET -
  *       Fetch information from the device as specified in the packet's
- *       structure. KMD passes the max size it allows the ArmCP to write to
- *       the structure, to prevent data corruption in case of mismatched
- *       KMD/FW versions.
+ *       structure. The host's driver passes the max size it allows the ArmCP to
+ *       write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
  *
  * ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
  *
@@ -183,9 +185,9 @@ enum pq_init_status {
  * ARMCP_PACKET_EEPROM_DATA_GET -
  *       Get EEPROM data from the ArmCP kernel. The buffer is specified in the
  *       addr field. The CPU will put the returned data size in the result
- *       field. In addition, KMD passes the max size it allows the ArmCP to
- *       write to the structure, to prevent data corruption in case of
- *       mismatched KMD/FW versions.
+ *       field. In addition, the host's driver passes the max size it allows the
+ *       ArmCP to write to the structure, to prevent data corruption in case of
+ *       mismatched driver/FW versions.
  *
  */
 
@@ -231,7 +233,7 @@ struct armcp_packet {
 
 	__le32 ctl;
 
-	__le32 fence;		/* Signal to KMD that message is completed */
+	__le32 fence;		/* Signal to host that message is completed */
 
 	union {
 		struct {/* For temperature/current/voltage/fan/pwm get/set */
@@ -320,7 +322,7 @@ struct armcp_sensor {
 };
 
 /**
- * struct armcp_info - host driver's necessary info from ArmCP.
+ * struct armcp_info - Info from ArmCP that is necessary to the host's driver
  * @sensors: available sensors description.
  * @kernel_version: ArmCP linux kernel version.
  * @reserved: reserved field.
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 19f8039db2ea..39c4ea51a719 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
  *
- * Copyright 2016-2018 HabanaLabs, Ltd.
+ * Copyright 2016-2019 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -329,12 +329,12 @@ struct hl_mem_in {
 		struct {
 			/*
 			 * Requested virtual address of mapped memory.
-			 * KMD will try to map the requested region to this
-			 * hint address, as long as the address is valid and
-			 * not already mapped. The user should check the
+			 * The driver will try to map the requested region to
+			 * this hint address, as long as the address is valid
+			 * and not already mapped. The user should check the
 			 * returned address of the IOCTL to make sure he got
-			 * the hint address. Passing 0 here means that KMD
-			 * will choose the address itself.
+			 * the hint address. Passing 0 here means that the
+			 * driver will choose the address itself.
 			 */
 			__u64 hint_addr;
 			/* Handle returned from HL_MEM_OP_ALLOC */
@@ -347,12 +347,12 @@ struct hl_mem_in {
 			__u64 host_virt_addr;
 			/*
 			 * Requested virtual address of mapped memory.
-			 * KMD will try to map the requested region to this
-			 * hint address, as long as the address is valid and
-			 * not already mapped. The user should check the
+			 * The driver will try to map the requested region to
+			 * this hint address, as long as the address is valid
+			 * and not already mapped. The user should check the
 			 * returned address of the IOCTL to make sure he got
-			 * the hint address. Passing 0 here means that KMD
-			 * will choose the address itself.
+			 * the hint address. Passing 0 here means that the
+			 * driver will choose the address itself.
 			 */
 			__u64 hint_addr;
 			/* Size of allocated host memory */
-- 
cgit v1.2.3


From ad4a6795e0cfd7f2954ff004e83f00e0aa097f4c Mon Sep 17 00:00:00 2001
From: Spoorthi Ravishankar Koppad <spoorthix.k@intel.com>
Date: Mon, 15 Jul 2019 17:05:22 +0530
Subject: Bluetooth: Add support for utilizing Fast Advertising Interval

Changes made to add support for fast advertising interval
as per core 4.1 specification, section 9.3.11.2.

A peripheral device entering any of the following GAP modes and
sending either non-connectable advertising events or scannable
undirected advertising events should use adv_fast_interval2
(100ms - 150ms) for adv_fast_period(30s).

         - Non-Discoverable Mode
         - Non-Connectable Mode
         - Limited Discoverable Mode
         - General Discoverable Mode

Signed-off-by: Spoorthi Ravishankar Koppad <spoorthix.k@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_request.c      | 29 ++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ffc95b382eb5..b689aceb636b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1517,6 +1517,8 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c);
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 #define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
+#define DISCOV_LE_FAST_ADV_INT_MIN     100     /* msec */
+#define DISCOV_LE_FAST_ADV_INT_MAX     150     /* msec */
 
 void mgmt_fill_version_info(void *ver);
 int mgmt_new_settings(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 621f1a97d803..7f6a581b5b7e 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1054,6 +1054,7 @@ void __hci_req_enable_advertising(struct hci_request *req)
 	struct hci_cp_le_set_adv_param cp;
 	u8 own_addr_type, enable = 0x01;
 	bool connectable;
+	u16 adv_min_interval, adv_max_interval;
 	u32 flags;
 
 	flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance);
@@ -1087,16 +1088,30 @@ void __hci_req_enable_advertising(struct hci_request *req)
 		return;
 
 	memset(&cp, 0, sizeof(cp));
-	cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval);
-	cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval);
 
-	if (connectable)
+	if (connectable) {
 		cp.type = LE_ADV_IND;
-	else if (get_cur_adv_instance_scan_rsp_len(hdev))
-		cp.type = LE_ADV_SCAN_IND;
-	else
-		cp.type = LE_ADV_NONCONN_IND;
 
+		adv_min_interval = hdev->le_adv_min_interval;
+		adv_max_interval = hdev->le_adv_max_interval;
+	} else {
+		if (get_cur_adv_instance_scan_rsp_len(hdev))
+			cp.type = LE_ADV_SCAN_IND;
+		else
+			cp.type = LE_ADV_NONCONN_IND;
+
+		if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) ||
+		    hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) {
+			adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN;
+			adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX;
+		} else {
+			adv_min_interval = hdev->le_adv_min_interval;
+			adv_max_interval = hdev->le_adv_max_interval;
+		}
+	}
+
+	cp.min_interval = cpu_to_le16(adv_min_interval);
+	cp.max_interval = cpu_to_le16(adv_max_interval);
 	cp.own_address_type = own_addr_type;
 	cp.channel_map = hdev->le_adv_channel_map;
 
-- 
cgit v1.2.3


From a5876e24f13f13483fbd602b972d35801fb80b74 Mon Sep 17 00:00:00 2001
From: Gao Xiang <gaoxiang25@huawei.com>
Date: Wed, 4 Sep 2019 10:08:56 +0800
Subject: erofs: use erofs_inode naming

As Christoph suggested [1], "Why is this called vnode instead
of inode?  That seems like a rather odd naming for a Linux
file system."

[1] https://lore.kernel.org/r/20190829101545.GC20598@infradead.org/
Reported-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Link: https://lore.kernel.org/r/20190904020912.63925-10-gaoxiang25@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/erofs/data.c              |  8 ++++----
 fs/erofs/dir.c               |  6 +++---
 fs/erofs/inode.c             | 12 ++++++------
 fs/erofs/internal.h          | 14 +++++++-------
 fs/erofs/namei.c             |  2 +-
 fs/erofs/super.c             | 10 +++++-----
 fs/erofs/xattr.c             | 18 +++++++++---------
 fs/erofs/xattr.h             |  4 ++--
 fs/erofs/zdata.c             |  9 +++------
 fs/erofs/zmap.c              | 28 ++++++++++++++--------------
 include/trace/events/erofs.h | 14 +++++++-------
 11 files changed, 61 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 4d9b07991d07..be11b5ea9d2e 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -112,7 +112,7 @@ static int erofs_map_blocks_flatmode(struct inode *inode,
 	int err = 0;
 	erofs_blk_t nblocks, lastblk;
 	u64 offset = map->m_la;
-	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode *vi = EROFS_I(inode);
 	bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
 
 	trace_erofs_map_blocks_flatmode_enter(inode, map, flags);
@@ -170,7 +170,7 @@ err_out:
 int erofs_map_blocks(struct inode *inode,
 		     struct erofs_map_blocks *map, int flags)
 {
-	if (erofs_inode_is_data_compressed(EROFS_V(inode)->datalayout)) {
+	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) {
 		int err = z_erofs_map_blocks_iter(inode, map, flags);
 
 		if (map->mpage) {
@@ -365,7 +365,7 @@ static int erofs_raw_access_readpages(struct file *filp,
 			if (IS_ERR(bio)) {
 				pr_err("%s, readahead error at page %lu of nid %llu\n",
 				       __func__, page->index,
-				       EROFS_V(mapping->host)->nid);
+				       EROFS_I(mapping->host)->nid);
 
 				bio = NULL;
 			}
@@ -404,7 +404,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 
-	if (EROFS_V(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
+	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
 		erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
 
 		if (block >> LOG_SECTORS_PER_BLOCK >= blks)
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index 6a5b43f7fb29..a032c8217071 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -47,7 +47,7 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx,
 		/* a corrupted entry is found */
 		if (nameoff + de_namelen > maxsize ||
 		    de_namelen > EROFS_NAME_LEN) {
-			errln("bogus dirent @ nid %llu", EROFS_V(dir)->nid);
+			errln("bogus dirent @ nid %llu", EROFS_I(dir)->nid);
 			DBG_BUGON(1);
 			return -EFSCORRUPTED;
 		}
@@ -85,7 +85,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
 			break;
 		} else if (IS_ERR(dentry_page)) {
 			errln("fail to readdir of logical block %u of nid %llu",
-			      i, EROFS_V(dir)->nid);
+			      i, EROFS_I(dir)->nid);
 			err = -EFSCORRUPTED;
 			break;
 		}
@@ -97,7 +97,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
 		if (nameoff < sizeof(struct erofs_dirent) ||
 		    nameoff >= PAGE_SIZE) {
 			errln("%s, invalid de[0].nameoff %u @ nid %llu",
-			      __func__, nameoff, EROFS_V(dir)->nid);
+			      __func__, nameoff, EROFS_I(dir)->nid);
 			err = -EFSCORRUPTED;
 			goto skip_this;
 		}
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 494b35e5830a..f6dfd0271261 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -11,7 +11,7 @@
 /* no locking */
 static int read_inode(struct inode *inode, void *data)
 {
-	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode *vi = EROFS_I(inode);
 	struct erofs_inode_compact *dic = data;
 	struct erofs_inode_extended *die;
 
@@ -140,7 +140,7 @@ bogusimode:
 static int fill_inline_data(struct inode *inode, void *data,
 			    unsigned int m_pofs)
 {
-	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode *vi = EROFS_I(inode);
 	struct erofs_sb_info *sbi = EROFS_I_SB(inode);
 
 	/* should be tail-packing data inline */
@@ -178,7 +178,7 @@ static int fill_inline_data(struct inode *inode, void *data,
 static int fill_inode(struct inode *inode, int isdir)
 {
 	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode *vi = EROFS_I(inode);
 	struct page *page;
 	void *data;
 	int err;
@@ -260,7 +260,7 @@ static int erofs_ilookup_test_actor(struct inode *inode, void *opaque)
 {
 	const erofs_nid_t nid = *(erofs_nid_t *)opaque;
 
-	return EROFS_V(inode)->nid == nid;
+	return EROFS_I(inode)->nid == nid;
 }
 
 static int erofs_iget_set_actor(struct inode *inode, void *opaque)
@@ -297,7 +297,7 @@ struct inode *erofs_iget(struct super_block *sb,
 
 	if (inode->i_state & I_NEW) {
 		int err;
-		struct erofs_vnode *vi = EROFS_V(inode);
+		struct erofs_inode *vi = EROFS_I(inode);
 
 		vi->nid = nid;
 
@@ -317,7 +317,7 @@ int erofs_getattr(const struct path *path, struct kstat *stat,
 {
 	struct inode *const inode = d_inode(path->dentry);
 
-	if (erofs_inode_is_data_compressed(EROFS_V(inode)->datalayout))
+	if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout))
 		stat->attributes |= STATX_ATTR_COMPRESSED;
 
 	stat->attributes |= STATX_ATTR_IMMUTABLE;
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 0f5cbf0a7570..10497ee07cae 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -272,14 +272,14 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid)
 }
 
 /* atomic flag definitions */
-#define EROFS_V_EA_INITED_BIT	0
-#define EROFS_V_Z_INITED_BIT	1
+#define EROFS_I_EA_INITED_BIT	0
+#define EROFS_I_Z_INITED_BIT	1
 
 /* bitlock definitions (arranged in reverse order) */
-#define EROFS_V_BL_XATTR_BIT	(BITS_PER_LONG - 1)
-#define EROFS_V_BL_Z_BIT	(BITS_PER_LONG - 2)
+#define EROFS_I_BL_XATTR_BIT	(BITS_PER_LONG - 1)
+#define EROFS_I_BL_Z_BIT	(BITS_PER_LONG - 2)
 
-struct erofs_vnode {
+struct erofs_inode {
 	erofs_nid_t nid;
 
 	/* atomic flags (including bitlocks) */
@@ -307,8 +307,8 @@ struct erofs_vnode {
 	struct inode vfs_inode;
 };
 
-#define EROFS_V(ptr)	\
-	container_of(ptr, struct erofs_vnode, vfs_inode)
+#define EROFS_I(ptr)	\
+	container_of(ptr, struct erofs_inode, vfs_inode)
 
 static inline unsigned long inode_datablocks(struct inode *inode)
 {
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index c1068ad0535e..a6b6a4ab1403 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -117,7 +117,7 @@ static struct page *find_target_block_classic(struct inode *dir,
 				kunmap_atomic(de);
 				put_page(page);
 				errln("corrupted dir block %d @ nid %llu",
-				      mid, EROFS_V(dir)->nid);
+				      mid, EROFS_I(dir)->nid);
 				DBG_BUGON(1);
 				page = ERR_PTR(-EFSCORRUPTED);
 				goto out;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 499dc7f5d0e6..3986be582dbb 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -18,27 +18,27 @@ static struct kmem_cache *erofs_inode_cachep __read_mostly;
 
 static void init_once(void *ptr)
 {
-	struct erofs_vnode *vi = ptr;
+	struct erofs_inode *vi = ptr;
 
 	inode_init_once(&vi->vfs_inode);
 }
 
 static struct inode *alloc_inode(struct super_block *sb)
 {
-	struct erofs_vnode *vi =
+	struct erofs_inode *vi =
 		kmem_cache_alloc(erofs_inode_cachep, GFP_KERNEL);
 
 	if (!vi)
 		return NULL;
 
 	/* zero out everything except vfs_inode */
-	memset(vi, 0, offsetof(struct erofs_vnode, vfs_inode));
+	memset(vi, 0, offsetof(struct erofs_inode, vfs_inode));
 	return &vi->vfs_inode;
 }
 
 static void free_inode(struct inode *inode)
 {
-	struct erofs_vnode *vi = EROFS_V(inode);
+	struct erofs_inode *vi = EROFS_I(inode);
 
 	/* be careful RCU symlink path (see ext4_inode_info->i_data)! */
 	if (is_inode_fast_symlink(inode))
@@ -517,7 +517,7 @@ static int __init erofs_module_init(void)
 	infoln("initializing erofs " EROFS_VERSION);
 
 	erofs_inode_cachep = kmem_cache_create("erofs_inode",
-					       sizeof(struct erofs_vnode), 0,
+					       sizeof(struct erofs_inode), 0,
 					       SLAB_RECLAIM_ACCOUNT,
 					       init_once);
 	if (!erofs_inode_cachep) {
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 620cbc15f4d0..d5b7fe0bee45 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -38,7 +38,7 @@ static inline void xattr_iter_end_final(struct xattr_iter *it)
 
 static int init_inode_xattrs(struct inode *inode)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct xattr_iter it;
 	unsigned int i;
 	struct erofs_xattr_ibody_header *ih;
@@ -48,14 +48,14 @@ static int init_inode_xattrs(struct inode *inode)
 	int ret = 0;
 
 	/* the most case is that xattrs of this inode are initialized. */
-	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
+	if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags))
 		return 0;
 
-	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_XATTR_BIT, TASK_KILLABLE))
+	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE))
 		return -ERESTARTSYS;
 
 	/* someone has initialized xattrs for us? */
-	if (test_bit(EROFS_V_EA_INITED_BIT, &vi->flags))
+	if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags))
 		goto out_unlock;
 
 	/*
@@ -136,10 +136,10 @@ static int init_inode_xattrs(struct inode *inode)
 	}
 	xattr_iter_end(&it, atomic_map);
 
-	set_bit(EROFS_V_EA_INITED_BIT, &vi->flags);
+	set_bit(EROFS_I_EA_INITED_BIT, &vi->flags);
 
 out_unlock:
-	clear_and_wake_up_bit(EROFS_V_BL_XATTR_BIT, &vi->flags);
+	clear_and_wake_up_bit(EROFS_I_BL_XATTR_BIT, &vi->flags);
 	return ret;
 }
 
@@ -184,7 +184,7 @@ static inline int xattr_iter_fixup(struct xattr_iter *it)
 static int inline_xattr_iter_begin(struct xattr_iter *it,
 				   struct inode *inode)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct erofs_sb_info *const sbi = EROFS_SB(inode->i_sb);
 	unsigned int xattr_header_sz, inline_xattr_ofs;
 
@@ -385,7 +385,7 @@ static int inline_getxattr(struct inode *inode, struct getxattr_iter *it)
 
 static int shared_getxattr(struct inode *inode, struct getxattr_iter *it)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct super_block *const sb = inode->i_sb;
 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
 	unsigned int i;
@@ -608,7 +608,7 @@ static int inline_listxattr(struct listxattr_iter *it)
 static int shared_listxattr(struct listxattr_iter *it)
 {
 	struct inode *const inode = d_inode(it->dentry);
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct super_block *const sb = inode->i_sb;
 	struct erofs_sb_info *const sbi = EROFS_SB(sb);
 	unsigned int i;
diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h
index c5ca47d814dd..3585b84d2f20 100644
--- a/fs/erofs/xattr.h
+++ b/fs/erofs/xattr.h
@@ -16,8 +16,8 @@
 
 static inline unsigned int inlinexattr_header_size(struct inode *inode)
 {
-	return sizeof(struct erofs_xattr_ibody_header)
-		+ sizeof(u32) * EROFS_V(inode)->xattr_shared_count;
+	return sizeof(struct erofs_xattr_ibody_header) +
+		sizeof(u32) * EROFS_I(inode)->xattr_shared_count;
 }
 
 static inline erofs_blk_t xattrblock_addr(struct erofs_sb_info *sbi,
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 653bde0a619a..f06a2fad7af2 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -421,7 +421,7 @@ static struct z_erofs_collection *clregister(struct z_erofs_collector *clt,
 	else
 		pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED;
 
-	pcl->clusterbits = EROFS_V(inode)->z_physical_clusterbits[0];
+	pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0];
 	pcl->clusterbits -= PAGE_SHIFT;
 
 	/* new pclusters should be claimed as type 1, primary and followed */
@@ -1404,12 +1404,9 @@ static int z_erofs_vle_normalaccess_readpages(struct file *filp,
 		head = (void *)page_private(page);
 
 		err = z_erofs_do_read_page(&f, page, &pagepool);
-		if (err) {
-			struct erofs_vnode *vi = EROFS_V(inode);
-
+		if (err)
 			errln("%s, readahead error at page %lu of nid %llu",
-			      __func__, page->index, vi->nid);
-		}
+			      __func__, page->index, EROFS_I(inode)->nid);
 		put_page(page);
 	}
 
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 6a06fb80ef3f..30a5171637d7 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -10,7 +10,7 @@
 
 int z_erofs_fill_inode(struct inode *inode)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 
 	if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) {
 		vi->z_advise = 0;
@@ -19,7 +19,7 @@ int z_erofs_fill_inode(struct inode *inode)
 		vi->z_logical_clusterbits = LOG_BLOCK_SIZE;
 		vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits;
 		vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits;
-		set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+		set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
 	}
 
 	inode->i_mapping->a_ops = &z_erofs_vle_normalaccess_aops;
@@ -28,7 +28,7 @@ int z_erofs_fill_inode(struct inode *inode)
 
 static int fill_inode_lazy(struct inode *inode)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct super_block *const sb = inode->i_sb;
 	int err;
 	erofs_off_t pos;
@@ -36,14 +36,14 @@ static int fill_inode_lazy(struct inode *inode)
 	void *kaddr;
 	struct z_erofs_map_header *h;
 
-	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
 		return 0;
 
-	if (wait_on_bit_lock(&vi->flags, EROFS_V_BL_Z_BIT, TASK_KILLABLE))
+	if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE))
 		return -ERESTARTSYS;
 
 	err = 0;
-	if (test_bit(EROFS_V_Z_INITED_BIT, &vi->flags))
+	if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags))
 		goto out_unlock;
 
 	DBG_BUGON(vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY);
@@ -83,13 +83,13 @@ static int fill_inode_lazy(struct inode *inode)
 
 	vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits +
 					((h->h_clusterbits >> 5) & 7);
-	set_bit(EROFS_V_Z_INITED_BIT, &vi->flags);
+	set_bit(EROFS_I_Z_INITED_BIT, &vi->flags);
 unmap_done:
 	kunmap_atomic(kaddr);
 	unlock_page(page);
 	put_page(page);
 out_unlock:
-	clear_and_wake_up_bit(EROFS_V_BL_Z_BIT, &vi->flags);
+	clear_and_wake_up_bit(EROFS_I_BL_Z_BIT, &vi->flags);
 	return err;
 }
 
@@ -142,7 +142,7 @@ static int vle_legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 					     unsigned long lcn)
 {
 	struct inode *const inode = m->inode;
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	const erofs_off_t ibase = iloc(EROFS_I_SB(inode), vi->nid);
 	const erofs_off_t pos =
 		Z_EROFS_VLE_LEGACY_INDEX_ALIGN(ibase + vi->inode_isize +
@@ -196,7 +196,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m,
 				  unsigned int amortizedshift,
 				  unsigned int eofs)
 {
-	struct erofs_vnode *const vi = EROFS_V(m->inode);
+	struct erofs_inode *const vi = EROFS_I(m->inode);
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
 	const unsigned int lomask = (1 << lclusterbits) - 1;
 	unsigned int vcnt, base, lo, encodebits, nblk;
@@ -260,7 +260,7 @@ static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 					    unsigned long lcn)
 {
 	struct inode *const inode = m->inode;
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
 	const erofs_off_t ebase = ALIGN(iloc(EROFS_I_SB(inode), vi->nid) +
 					vi->inode_isize + vi->xattr_isize, 8) +
@@ -314,7 +314,7 @@ out:
 static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 				      unsigned int lcn)
 {
-	const unsigned int datamode = EROFS_V(m->inode)->datalayout;
+	const unsigned int datamode = EROFS_I(m->inode)->datalayout;
 
 	if (datamode == EROFS_INODE_FLAT_COMPRESSION_LEGACY)
 		return vle_legacy_load_cluster_from_disk(m, lcn);
@@ -328,7 +328,7 @@ static int vle_load_cluster_from_disk(struct z_erofs_maprecorder *m,
 static int vle_extent_lookback(struct z_erofs_maprecorder *m,
 			       unsigned int lookback_distance)
 {
-	struct erofs_vnode *const vi = EROFS_V(m->inode);
+	struct erofs_inode *const vi = EROFS_I(m->inode);
 	struct erofs_map_blocks *const map = m->map;
 	const unsigned int lclusterbits = vi->z_logical_clusterbits;
 	unsigned long lcn = m->lcn;
@@ -374,7 +374,7 @@ int z_erofs_map_blocks_iter(struct inode *inode,
 			    struct erofs_map_blocks *map,
 			    int flags)
 {
-	struct erofs_vnode *const vi = EROFS_V(inode);
+	struct erofs_inode *const vi = EROFS_I(inode);
 	struct z_erofs_maprecorder m = {
 		.inode = inode,
 		.map = map,
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index d239f39cbc8c..27f5caa6299a 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -41,7 +41,7 @@ TRACE_EVENT(erofs_lookup,
 
 	TP_fast_assign(
 		__entry->dev	= dir->i_sb->s_dev;
-		__entry->nid	= EROFS_V(dir)->nid;
+		__entry->nid	= EROFS_I(dir)->nid;
 		__entry->name	= dentry->d_name.name;
 		__entry->flags	= flags;
 	),
@@ -66,7 +66,7 @@ TRACE_EVENT(erofs_fill_inode,
 
 	TP_fast_assign(
 		__entry->dev		= inode->i_sb->s_dev;
-		__entry->nid		= EROFS_V(inode)->nid;
+		__entry->nid		= EROFS_I(inode)->nid;
 		__entry->blkaddr	= erofs_blknr(iloc(EROFS_I_SB(inode), __entry->nid));
 		__entry->ofs		= erofs_blkoff(iloc(EROFS_I_SB(inode), __entry->nid));
 		__entry->isdir		= isdir;
@@ -95,7 +95,7 @@ TRACE_EVENT(erofs_readpage,
 
 	TP_fast_assign(
 		__entry->dev	= page->mapping->host->i_sb->s_dev;
-		__entry->nid	= EROFS_V(page->mapping->host)->nid;
+		__entry->nid	= EROFS_I(page->mapping->host)->nid;
 		__entry->dir	= S_ISDIR(page->mapping->host->i_mode);
 		__entry->index	= page->index;
 		__entry->uptodate = PageUptodate(page);
@@ -128,7 +128,7 @@ TRACE_EVENT(erofs_readpages,
 
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
-		__entry->nid	= EROFS_V(inode)->nid;
+		__entry->nid	= EROFS_I(inode)->nid;
 		__entry->start	= page->index;
 		__entry->nrpage	= nrpage;
 		__entry->raw	= raw;
@@ -157,7 +157,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_enter,
 
 	TP_fast_assign(
 		__entry->dev    = inode->i_sb->s_dev;
-		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->nid    = EROFS_I(inode)->nid;
 		__entry->la	= map->m_la;
 		__entry->llen	= map->m_llen;
 		__entry->flags	= flags;
@@ -203,7 +203,7 @@ DECLARE_EVENT_CLASS(erofs__map_blocks_exit,
 
 	TP_fast_assign(
 		__entry->dev    = inode->i_sb->s_dev;
-		__entry->nid    = EROFS_V(inode)->nid;
+		__entry->nid    = EROFS_I(inode)->nid;
 		__entry->flags	= flags;
 		__entry->la	= map->m_la;
 		__entry->pa	= map->m_pa;
@@ -247,7 +247,7 @@ TRACE_EVENT(erofs_destroy_inode,
 
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
-		__entry->nid	= EROFS_V(inode)->nid;
+		__entry->nid	= EROFS_I(inode)->nid;
 	),
 
 	TP_printk("dev = (%d,%d), nid = %llu", show_dev_nid(__entry))
-- 
cgit v1.2.3


From 7f1c62c443a453deb6eb3515e3c05650ffe0dcf0 Mon Sep 17 00:00:00 2001
From: Krzysztof Wilczynski <kw@linux.com>
Date: Mon, 26 Aug 2019 00:46:16 +0200
Subject: PCI: Add pci_info_ratelimited() to ratelimit PCI separately

Do not use printk_ratelimit() in drivers/pci/pci.c as it shares the rate
limiting state with all other callers to the printk_ratelimit().

Add pci_info_ratelimited() (similar to pci_notice_ratelimited() added in
the commit a88a7b3eb076 ("vfio: Use dev_printk() when possible")) and use
it instead of printk_ratelimit() + pci_info().

Link: https://lore.kernel.org/r/20190825224616.8021-1-kw@linux.com
Signed-off-by: Krzysztof Wilczynski <kw@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 4 ++--
 include/linux/pci.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 29ed5ec1ac27..08dfb16bd084 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -890,8 +890,8 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 
 	pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr);
 	dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK);
-	if (dev->current_state != state && printk_ratelimit())
-		pci_info(dev, "Refused to change power state, currently in D%d\n",
+	if (dev->current_state != state)
+		pci_info_ratelimited(dev, "Refused to change power state, currently in D%d\n",
 			 dev->current_state);
 
 	/*
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..f5bab312995d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2433,4 +2433,7 @@ void pci_uevent_ers(struct pci_dev *pdev, enum  pci_ers_result err_type);
 #define pci_notice_ratelimited(pdev, fmt, arg...) \
 	dev_notice_ratelimited(&(pdev)->dev, fmt, ##arg)
 
+#define pci_info_ratelimited(pdev, fmt, arg...) \
+	dev_info_ratelimited(&(pdev)->dev, fmt, ##arg)
+
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From 533770cc0ae84890624dc129609f3d75855c8982 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 3 Sep 2019 19:05:48 -0400
Subject: new helper: get_tree_keyed()

For vfs_get_keyed_super users.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfsd/nfsctl.c           |  3 +--
 fs/proc/root.c             |  3 +--
 fs/super.c                 | 10 ++++++++++
 include/linux/fs_context.h |  7 ++++++-
 ipc/mqueue.c               |  3 +--
 net/sunrpc/rpc_pipe.c      |  3 +--
 6 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 13c548733860..695223394985 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1386,8 +1386,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
 
 static int nfsd_fs_get_tree(struct fs_context *fc)
 {
-	fc->s_fs_info = get_net(fc->net_ns);
-	return vfs_get_super(fc, vfs_get_keyed_super, nfsd_fill_super);
+	return get_tree_keyed(fc, nfsd_fill_super, get_net(fc->net_ns));
 }
 
 static void nfsd_fs_free_fc(struct fs_context *fc)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 33f72d1b92cc..0b7c8dffc9ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -157,8 +157,7 @@ static int proc_get_tree(struct fs_context *fc)
 {
 	struct proc_fs_context *ctx = fc->fs_private;
 
-	fc->s_fs_info = ctx->pid_ns;
-	return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
+	return get_tree_keyed(fc, proc_fill_super, ctx->pid_ns);
 }
 
 static void proc_fs_context_free(struct fs_context *fc)
diff --git a/fs/super.c b/fs/super.c
index 5960578a4076..0220def9baba 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1211,6 +1211,16 @@ int get_tree_single(struct fs_context *fc,
 }
 EXPORT_SYMBOL(get_tree_single);
 
+int get_tree_keyed(struct fs_context *fc,
+		  int (*fill_super)(struct super_block *sb,
+				    struct fs_context *fc),
+		void *key)
+{
+	fc->s_fs_info = key;
+	return vfs_get_super(fc, vfs_get_keyed_super, fill_super);
+}
+EXPORT_SYMBOL(get_tree_keyed);
+
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
 {
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 7c6fe3d47fa6..aad5e68d58e2 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -136,7 +136,7 @@ extern int vfs_get_tree(struct fs_context *fc);
 extern void put_fs_context(struct fs_context *fc);
 
 /*
- * sget() wrapper to be called from the ->get_tree() op.
+ * sget() wrappers to be called from the ->get_tree() op.
  */
 enum vfs_get_super_keying {
 	vfs_get_single_super,	/* Only one such superblock may exist */
@@ -147,12 +147,17 @@ extern int vfs_get_super(struct fs_context *fc,
 			 enum vfs_get_super_keying keying,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
+
 extern int get_tree_nodev(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
 extern int get_tree_single(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
+extern int get_tree_keyed(struct fs_context *fc,
+			 int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc),
+			 void *key);
 
 extern const struct file_operations fscontext_fops;
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 7a5a8edc3de3..7c15729d9d25 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -364,8 +364,7 @@ static int mqueue_get_tree(struct fs_context *fc)
 {
 	struct mqueue_fs_context *ctx = fc->fs_private;
 
-	fc->s_fs_info = ctx->ipc_ns;
-	return vfs_get_super(fc, vfs_get_keyed_super, mqueue_fill_super);
+	return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
 }
 
 static void mqueue_fs_context_free(struct fs_context *fc)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 748bac601e47..b71a39ded930 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1416,8 +1416,7 @@ EXPORT_SYMBOL_GPL(gssd_running);
 
 static int rpc_fs_get_tree(struct fs_context *fc)
 {
-	fc->s_fs_info = get_net(fc->net_ns);
-	return vfs_get_super(fc, vfs_get_keyed_super, rpc_fill_super);
+	return get_tree_keyed(fc, rpc_fill_super, get_net(fc->net_ns));
 }
 
 static void rpc_fs_free_fc(struct fs_context *fc)
-- 
cgit v1.2.3


From fe62c3a4e17ddfe672710425ab6eba2ba7203526 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 27 Mar 2019 14:15:16 +0000
Subject: vfs: Create fs_context-aware mount_bdev() replacement

Create a function, get_tree_bdev(), that is fs_context-aware and a
->get_tree() counterpart of mount_bdev().

It caches the block device pointer in the fs_context struct so that this
information can be passed into sget_fc()'s test and set functions.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jens Axboe <axboe@kernel.dk>
cc: linux-block@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c                 | 94 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/fs_context.h |  5 +++
 2 files changed, 99 insertions(+)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 0220def9baba..da223b4cfbca 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1222,6 +1222,7 @@ int get_tree_keyed(struct fs_context *fc,
 EXPORT_SYMBOL(get_tree_keyed);
 
 #ifdef CONFIG_BLOCK
+
 static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
@@ -1231,6 +1232,99 @@ static int set_bdev_super(struct super_block *s, void *data)
 	return 0;
 }
 
+static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
+{
+	return set_bdev_super(s, fc->sget_key);
+}
+
+static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
+{
+	return s->s_bdev == fc->sget_key;
+}
+
+/**
+ * get_tree_bdev - Get a superblock based on a single block device
+ * @fc: The filesystem context holding the parameters
+ * @fill_super: Helper to initialise a new superblock
+ */
+int get_tree_bdev(struct fs_context *fc,
+		int (*fill_super)(struct super_block *,
+				  struct fs_context *))
+{
+	struct block_device *bdev;
+	struct super_block *s;
+	fmode_t mode = FMODE_READ | FMODE_EXCL;
+	int error = 0;
+
+	if (!(fc->sb_flags & SB_RDONLY))
+		mode |= FMODE_WRITE;
+
+	if (!fc->source)
+		return invalf(fc, "No source specified");
+
+	bdev = blkdev_get_by_path(fc->source, mode, fc->fs_type);
+	if (IS_ERR(bdev)) {
+		errorf(fc, "%s: Can't open blockdev", fc->source);
+		return PTR_ERR(bdev);
+	}
+
+	/* Once the superblock is inserted into the list by sget_fc(), s_umount
+	 * will protect the lockfs code from trying to start a snapshot while
+	 * we are mounting
+	 */
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (bdev->bd_fsfreeze_count > 0) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+		return -EBUSY;
+	}
+
+	fc->sb_flags |= SB_NOSEC;
+	fc->sget_key = bdev;
+	s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	if (s->s_root) {
+		/* Don't summarily change the RO/RW state. */
+		if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
+			warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+			deactivate_locked_super(s);
+			blkdev_put(bdev, mode);
+			return -EBUSY;
+		}
+
+		/*
+		 * s_umount nests inside bd_mutex during
+		 * __invalidate_device().  blkdev_put() acquires
+		 * bd_mutex and can't be called under s_umount.  Drop
+		 * s_umount temporarily.  This is safe as we're
+		 * holding an active reference.
+		 */
+		up_write(&s->s_umount);
+		blkdev_put(bdev, mode);
+		down_write(&s->s_umount);
+	} else {
+		s->s_mode = mode;
+		snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
+		sb_set_blocksize(s, block_size(bdev));
+		error = fill_super(s, fc);
+		if (error) {
+			deactivate_locked_super(s);
+			return error;
+		}
+
+		s->s_flags |= SB_ACTIVE;
+		bdev->bd_super = s;
+	}
+
+	BUG_ON(fc->root);
+	fc->root = dget(s->s_root);
+	return 0;
+}
+EXPORT_SYMBOL(get_tree_bdev);
+
 static int test_bdev_super(struct super_block *s, void *data)
 {
 	return (void *)s->s_bdev == data;
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index aad5e68d58e2..84a5eaa09f19 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -88,6 +88,7 @@ struct fs_context {
 	struct mutex		uapi_mutex;	/* Userspace access mutex */
 	struct file_system_type	*fs_type;
 	void			*fs_private;	/* The filesystem's context */
+	void			*sget_key;
 	struct dentry		*root;		/* The root and superblock */
 	struct user_namespace	*user_ns;	/* The user namespace for this mount */
 	struct net		*net_ns;	/* The network namespace for this mount */
@@ -159,6 +160,10 @@ extern int get_tree_keyed(struct fs_context *fc,
 					   struct fs_context *fc),
 			 void *key);
 
+extern int get_tree_bdev(struct fs_context *fc,
+			       int (*fill_super)(struct super_block *sb,
+						 struct fs_context *fc));
+
 extern const struct file_operations fscontext_fops;
 
 /*
-- 
cgit v1.2.3


From 43ce4c1feadbc84c772518a2d1974f6ba1b15089 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 21 Mar 2019 09:22:36 +0000
Subject: vfs: Add a single-or-reconfig keying to vfs_get_super()

Add an additional keying mode to vfs_get_super() to indicate that only a
single superblock should exist in the system, and that, if it does, further
mounts should invoke reconfiguration upon it.

This allows mount_single() to be replaced.

[Fix by Eric Biggers folded in]

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c                 | 35 ++++++++++++++++++++++++++++-------
 include/linux/fs_context.h |  4 ++++
 2 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index da223b4cfbca..beaf076d9733 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1160,9 +1160,11 @@ int vfs_get_super(struct fs_context *fc,
 {
 	int (*test)(struct super_block *, struct fs_context *);
 	struct super_block *sb;
+	int err;
 
 	switch (keying) {
 	case vfs_get_single_super:
+	case vfs_get_single_reconf_super:
 		test = test_single_super;
 		break;
 	case vfs_get_keyed_super:
@@ -1180,18 +1182,29 @@ int vfs_get_super(struct fs_context *fc,
 		return PTR_ERR(sb);
 
 	if (!sb->s_root) {
-		int err = fill_super(sb, fc);
-		if (err) {
-			deactivate_locked_super(sb);
-			return err;
-		}
+		err = fill_super(sb, fc);
+		if (err)
+			goto error;
 
 		sb->s_flags |= SB_ACTIVE;
+		fc->root = dget(sb->s_root);
+	} else {
+		fc->root = dget(sb->s_root);
+		if (keying == vfs_get_single_reconf_super) {
+			err = reconfigure_super(fc);
+			if (err < 0) {
+				dput(fc->root);
+				fc->root = NULL;
+				goto error;
+			}
+		}
 	}
 
-	BUG_ON(fc->root);
-	fc->root = dget(sb->s_root);
 	return 0;
+
+error:
+	deactivate_locked_super(sb);
+	return err;
 }
 EXPORT_SYMBOL(vfs_get_super);
 
@@ -1211,6 +1224,14 @@ int get_tree_single(struct fs_context *fc,
 }
 EXPORT_SYMBOL(get_tree_single);
 
+int get_tree_single_reconf(struct fs_context *fc,
+		  int (*fill_super)(struct super_block *sb,
+				    struct fs_context *fc))
+{
+	return vfs_get_super(fc, vfs_get_single_reconf_super, fill_super);
+}
+EXPORT_SYMBOL(get_tree_single_reconf);
+
 int get_tree_keyed(struct fs_context *fc,
 		  int (*fill_super)(struct super_block *sb,
 				    struct fs_context *fc),
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 84a5eaa09f19..0424df7f6e6b 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -141,6 +141,7 @@ extern void put_fs_context(struct fs_context *fc);
  */
 enum vfs_get_super_keying {
 	vfs_get_single_super,	/* Only one such superblock may exist */
+	vfs_get_single_reconf_super, /* As above, but reconfigure if it exists */
 	vfs_get_keyed_super,	/* Superblocks with different s_fs_info keys may exist */
 	vfs_get_independent_super, /* Multiple independent superblocks may exist */
 };
@@ -155,6 +156,9 @@ extern int get_tree_nodev(struct fs_context *fc,
 extern int get_tree_single(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc));
+extern int get_tree_single_reconf(struct fs_context *fc,
+			 int (*fill_super)(struct super_block *sb,
+					   struct fs_context *fc));
 extern int get_tree_keyed(struct fs_context *fc,
 			 int (*fill_super)(struct super_block *sb,
 					   struct fs_context *fc),
-- 
cgit v1.2.3


From 0f071004109d9c8de7023b9a64fa2ba3fa87cbed Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Mar 2019 16:38:31 +0000
Subject: mtd: Provide fs_context-aware mount_mtd() replacement

Provide a function, get_tree_mtd(), to replace mount_mtd(), using an
fs_context struct to hold the parameters.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: David Woodhouse <dwmw2@infradead.org>
cc: Brian Norris <computersforpeace@gmail.com>
cc: Boris Brezillon <bbrezillon@kernel.org>
cc: Marek Vasut <marek.vasut@gmail.com>
cc: Richard Weinberger <richard@nod.at>
cc: linux-mtd@lists.infradead.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/mtdcore.h     |   1 +
 drivers/mtd/mtdsuper.c    | 179 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/super.h |   3 +
 3 files changed, 181 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index b31c868019ad..b5eefeabf310 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -5,6 +5,7 @@
  */
 
 extern struct mutex mtd_table_mutex;
+extern struct backing_dev_info *mtd_bdi;
 
 struct mtd_info *__mtd_next_device(int i);
 int __must_check add_mtd_device(struct mtd_info *mtd);
diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 4f042a3653ce..3f9a3b7b12c5 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -15,6 +15,183 @@
 #include <linux/slab.h>
 #include <linux/major.h>
 #include <linux/backing-dev.h>
+#include <linux/fs_context.h>
+#include "mtdcore.h"
+
+/*
+ * compare superblocks to see if they're equivalent
+ * - they are if the underlying MTD device is the same
+ */
+static int mtd_test_super(struct super_block *sb, struct fs_context *fc)
+{
+	struct mtd_info *mtd = fc->sget_key;
+
+	if (sb->s_mtd == fc->sget_key) {
+		pr_debug("MTDSB: Match on device %d (\"%s\")\n",
+			 mtd->index, mtd->name);
+		return 1;
+	}
+
+	pr_debug("MTDSB: No match, device %d (\"%s\"), device %d (\"%s\")\n",
+		 sb->s_mtd->index, sb->s_mtd->name, mtd->index, mtd->name);
+	return 0;
+}
+
+/*
+ * mark the superblock by the MTD device it is using
+ * - set the device number to be the correct MTD block device for pesuperstence
+ *   of NFS exports
+ */
+static int mtd_set_super(struct super_block *sb, struct fs_context *fc)
+{
+	sb->s_mtd = fc->sget_key;
+	sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
+	sb->s_bdi = bdi_get(mtd_bdi);
+	return 0;
+}
+
+/*
+ * get a superblock on an MTD-backed filesystem
+ */
+static int mtd_get_sb(struct fs_context *fc,
+		      struct mtd_info *mtd,
+		      int (*fill_super)(struct super_block *,
+					struct fs_context *))
+{
+	struct super_block *sb;
+	int ret;
+
+	fc->sget_key = mtd;
+	sb = sget_fc(fc, mtd_test_super, mtd_set_super);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (sb->s_root) {
+		/* new mountpoint for an already mounted superblock */
+		pr_debug("MTDSB: Device %d (\"%s\") is already mounted\n",
+			 mtd->index, mtd->name);
+		put_mtd_device(mtd);
+	} else {
+		/* fresh new superblock */
+		pr_debug("MTDSB: New superblock for device %d (\"%s\")\n",
+			 mtd->index, mtd->name);
+
+		ret = fill_super(sb, fc);
+		if (ret < 0)
+			goto error_sb;
+
+		sb->s_flags |= SB_ACTIVE;
+	}
+
+	BUG_ON(fc->root);
+	fc->root = dget(sb->s_root);
+	return 0;
+
+error_sb:
+	deactivate_locked_super(sb);
+	return ret;
+}
+
+/*
+ * get a superblock on an MTD-backed filesystem by MTD device number
+ */
+static int mtd_get_sb_by_nr(struct fs_context *fc, int mtdnr,
+			    int (*fill_super)(struct super_block *,
+					      struct fs_context *))
+{
+	struct mtd_info *mtd;
+
+	mtd = get_mtd_device(NULL, mtdnr);
+	if (IS_ERR(mtd)) {
+		errorf(fc, "MTDSB: Device #%u doesn't appear to exist\n", mtdnr);
+		return PTR_ERR(mtd);
+	}
+
+	return mtd_get_sb(fc, mtd, fill_super);
+}
+
+/**
+ * get_tree_mtd - Get a superblock based on a single MTD device
+ * @fc: The filesystem context holding the parameters
+ * @fill_super: Helper to initialise a new superblock
+ */
+int get_tree_mtd(struct fs_context *fc,
+	      int (*fill_super)(struct super_block *sb,
+				struct fs_context *fc))
+{
+#ifdef CONFIG_BLOCK
+	struct block_device *bdev;
+	int ret, major;
+#endif
+	int mtdnr;
+
+	if (!fc->source)
+		return invalf(fc, "No source specified");
+
+	pr_debug("MTDSB: dev_name \"%s\"\n", fc->source);
+
+	/* the preferred way of mounting in future; especially when
+	 * CONFIG_BLOCK=n - we specify the underlying MTD device by number or
+	 * by name, so that we don't require block device support to be present
+	 * in the kernel.
+	 */
+	if (fc->source[0] == 'm' &&
+	    fc->source[1] == 't' &&
+	    fc->source[2] == 'd') {
+		if (fc->source[3] == ':') {
+			struct mtd_info *mtd;
+
+			/* mount by MTD device name */
+			pr_debug("MTDSB: mtd:%%s, name \"%s\"\n",
+				 fc->source + 4);
+
+			mtd = get_mtd_device_nm(fc->source + 4);
+			if (!IS_ERR(mtd))
+				return mtd_get_sb(fc, mtd, fill_super);
+
+			errorf(fc, "MTD: MTD device with name \"%s\" not found",
+			       fc->source + 4);
+
+		} else if (isdigit(fc->source[3])) {
+			/* mount by MTD device number name */
+			char *endptr;
+
+			mtdnr = simple_strtoul(fc->source + 3, &endptr, 0);
+			if (!*endptr) {
+				/* It was a valid number */
+				pr_debug("MTDSB: mtd%%d, mtdnr %d\n", mtdnr);
+				return mtd_get_sb_by_nr(fc, mtdnr, fill_super);
+			}
+		}
+	}
+
+#ifdef CONFIG_BLOCK
+	/* try the old way - the hack where we allowed users to mount
+	 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev
+	 */
+	bdev = lookup_bdev(fc->source);
+	if (IS_ERR(bdev)) {
+		ret = PTR_ERR(bdev);
+		errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret);
+		return ret;
+	}
+	pr_debug("MTDSB: lookup_bdev() returned 0\n");
+
+	major = MAJOR(bdev->bd_dev);
+	mtdnr = MINOR(bdev->bd_dev);
+	bdput(bdev);
+
+	if (major == MTD_BLOCK_MAJOR)
+		return mtd_get_sb_by_nr(fc, mtdnr, fill_super);
+
+#endif /* CONFIG_BLOCK */
+
+	if (!(fc->sb_flags & SB_SILENT))
+		errorf(fc, "MTD: Attempt to mount non-MTD device \"%s\"",
+		       fc->source);
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(get_tree_mtd);
 
 /*
  * compare superblocks to see if they're equivalent
@@ -35,8 +212,6 @@ static int get_sb_mtd_compare(struct super_block *sb, void *_mtd)
 	return 0;
 }
 
-extern struct backing_dev_info *mtd_bdi;
-
 /*
  * mark the superblock by the MTD device it is using
  * - set the device number to be the correct MTD block device for pesuperstence
diff --git a/include/linux/mtd/super.h b/include/linux/mtd/super.h
index 056c9932c723..42db3f8e8136 100644
--- a/include/linux/mtd/super.h
+++ b/include/linux/mtd/super.h
@@ -14,6 +14,9 @@
 #include <linux/fs.h>
 #include <linux/mount.h>
 
+extern int get_tree_mtd(struct fs_context *fc,
+		     int (*fill_super)(struct super_block *sb,
+				       struct fs_context *fc));
 extern struct dentry *mount_mtd(struct file_system_type *fs_type, int flags,
 		      const char *dev_name, void *data,
 		      int (*fill_super)(struct super_block *, void *, int));
-- 
cgit v1.2.3


From 6d56e4184368e4fe611df0cd5aee78431f5eda3e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Mar 2019 16:38:32 +0000
Subject: mtd: Kill mount_mtd()

Kill mount_mtd() as it has now been replaced by vfs_get_mtd_super() in the
new mount API and nothing now uses it.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: David Woodhouse <dwmw2@infradead.org>
cc: Brian Norris <computersforpeace@gmail.com>
cc: Boris Brezillon <bbrezillon@kernel.org>
cc: Marek Vasut <marek.vasut@gmail.com>
cc: Richard Weinberger <richard@nod.at>
cc: linux-mtd@lists.infradead.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/mtdsuper.c    | 189 ----------------------------------------------
 include/linux/mtd/super.h |   3 -
 2 files changed, 192 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 3f9a3b7b12c5..c3e2098372f2 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -193,195 +193,6 @@ int get_tree_mtd(struct fs_context *fc,
 }
 EXPORT_SYMBOL_GPL(get_tree_mtd);
 
-/*
- * compare superblocks to see if they're equivalent
- * - they are if the underlying MTD device is the same
- */
-static int get_sb_mtd_compare(struct super_block *sb, void *_mtd)
-{
-	struct mtd_info *mtd = _mtd;
-
-	if (sb->s_mtd == mtd) {
-		pr_debug("MTDSB: Match on device %d (\"%s\")\n",
-		      mtd->index, mtd->name);
-		return 1;
-	}
-
-	pr_debug("MTDSB: No match, device %d (\"%s\"), device %d (\"%s\")\n",
-	      sb->s_mtd->index, sb->s_mtd->name, mtd->index, mtd->name);
-	return 0;
-}
-
-/*
- * mark the superblock by the MTD device it is using
- * - set the device number to be the correct MTD block device for pesuperstence
- *   of NFS exports
- */
-static int get_sb_mtd_set(struct super_block *sb, void *_mtd)
-{
-	struct mtd_info *mtd = _mtd;
-
-	sb->s_mtd = mtd;
-	sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, mtd->index);
-	sb->s_bdi = bdi_get(mtd_bdi);
-
-	return 0;
-}
-
-/*
- * get a superblock on an MTD-backed filesystem
- */
-static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags,
-			  const char *dev_name, void *data,
-			  struct mtd_info *mtd,
-			  int (*fill_super)(struct super_block *, void *, int))
-{
-	struct super_block *sb;
-	int ret;
-
-	sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, flags, mtd);
-	if (IS_ERR(sb))
-		goto out_error;
-
-	if (sb->s_root)
-		goto already_mounted;
-
-	/* fresh new superblock */
-	pr_debug("MTDSB: New superblock for device %d (\"%s\")\n",
-	      mtd->index, mtd->name);
-
-	ret = fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
-	if (ret < 0) {
-		deactivate_locked_super(sb);
-		return ERR_PTR(ret);
-	}
-
-	/* go */
-	sb->s_flags |= SB_ACTIVE;
-	return dget(sb->s_root);
-
-	/* new mountpoint for an already mounted superblock */
-already_mounted:
-	pr_debug("MTDSB: Device %d (\"%s\") is already mounted\n",
-	      mtd->index, mtd->name);
-	put_mtd_device(mtd);
-	return dget(sb->s_root);
-
-out_error:
-	put_mtd_device(mtd);
-	return ERR_CAST(sb);
-}
-
-/*
- * get a superblock on an MTD-backed filesystem by MTD device number
- */
-static struct dentry *mount_mtd_nr(struct file_system_type *fs_type, int flags,
-			 const char *dev_name, void *data, int mtdnr,
-			 int (*fill_super)(struct super_block *, void *, int))
-{
-	struct mtd_info *mtd;
-
-	mtd = get_mtd_device(NULL, mtdnr);
-	if (IS_ERR(mtd)) {
-		pr_debug("MTDSB: Device #%u doesn't appear to exist\n", mtdnr);
-		return ERR_CAST(mtd);
-	}
-
-	return mount_mtd_aux(fs_type, flags, dev_name, data, mtd, fill_super);
-}
-
-/*
- * set up an MTD-based superblock
- */
-struct dentry *mount_mtd(struct file_system_type *fs_type, int flags,
-	       const char *dev_name, void *data,
-	       int (*fill_super)(struct super_block *, void *, int))
-{
-#ifdef CONFIG_BLOCK
-	struct block_device *bdev;
-	int ret, major;
-#endif
-	int mtdnr;
-
-	if (!dev_name)
-		return ERR_PTR(-EINVAL);
-
-	pr_debug("MTDSB: dev_name \"%s\"\n", dev_name);
-
-	/* the preferred way of mounting in future; especially when
-	 * CONFIG_BLOCK=n - we specify the underlying MTD device by number or
-	 * by name, so that we don't require block device support to be present
-	 * in the kernel. */
-	if (dev_name[0] == 'm' && dev_name[1] == 't' && dev_name[2] == 'd') {
-		if (dev_name[3] == ':') {
-			struct mtd_info *mtd;
-
-			/* mount by MTD device name */
-			pr_debug("MTDSB: mtd:%%s, name \"%s\"\n",
-			      dev_name + 4);
-
-			mtd = get_mtd_device_nm(dev_name + 4);
-			if (!IS_ERR(mtd))
-				return mount_mtd_aux(
-					fs_type, flags,
-					dev_name, data, mtd,
-					fill_super);
-
-			printk(KERN_NOTICE "MTD:"
-			       " MTD device with name \"%s\" not found.\n",
-			       dev_name + 4);
-
-		} else if (isdigit(dev_name[3])) {
-			/* mount by MTD device number name */
-			char *endptr;
-
-			mtdnr = simple_strtoul(dev_name + 3, &endptr, 0);
-			if (!*endptr) {
-				/* It was a valid number */
-				pr_debug("MTDSB: mtd%%d, mtdnr %d\n",
-				      mtdnr);
-				return mount_mtd_nr(fs_type, flags,
-						     dev_name, data,
-						     mtdnr, fill_super);
-			}
-		}
-	}
-
-#ifdef CONFIG_BLOCK
-	/* try the old way - the hack where we allowed users to mount
-	 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev
-	 */
-	bdev = lookup_bdev(dev_name);
-	if (IS_ERR(bdev)) {
-		ret = PTR_ERR(bdev);
-		pr_debug("MTDSB: lookup_bdev() returned %d\n", ret);
-		return ERR_PTR(ret);
-	}
-	pr_debug("MTDSB: lookup_bdev() returned 0\n");
-
-	ret = -EINVAL;
-
-	major = MAJOR(bdev->bd_dev);
-	mtdnr = MINOR(bdev->bd_dev);
-	bdput(bdev);
-
-	if (major != MTD_BLOCK_MAJOR)
-		goto not_an_MTD_device;
-
-	return mount_mtd_nr(fs_type, flags, dev_name, data, mtdnr, fill_super);
-
-not_an_MTD_device:
-#endif /* CONFIG_BLOCK */
-
-	if (!(flags & SB_SILENT))
-		printk(KERN_NOTICE
-		       "MTD: Attempt to mount non-MTD device \"%s\"\n",
-		       dev_name);
-	return ERR_PTR(-EINVAL);
-}
-
-EXPORT_SYMBOL_GPL(mount_mtd);
-
 /*
  * destroy an MTD-based superblock
  */
diff --git a/include/linux/mtd/super.h b/include/linux/mtd/super.h
index 42db3f8e8136..3608a6c36fac 100644
--- a/include/linux/mtd/super.h
+++ b/include/linux/mtd/super.h
@@ -17,9 +17,6 @@
 extern int get_tree_mtd(struct fs_context *fc,
 		     int (*fill_super)(struct super_block *sb,
 				       struct fs_context *fc));
-extern struct dentry *mount_mtd(struct file_system_type *fs_type, int flags,
-		      const char *dev_name, void *data,
-		      int (*fill_super)(struct super_block *, void *, int));
 extern void kill_mtd_super(struct super_block *sb);
 
 
-- 
cgit v1.2.3


From df02450217c98e01b8b22f805314470df71f2b9b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Jun 2019 18:51:15 -0400
Subject: make ramfs_fill_super() static

all users should just call ramfs_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ramfs/inode.c      | 2 +-
 include/linux/ramfs.h | 2 --
 init/do_mounts.c      | 6 ++----
 3 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 733c6b4193dc..b85d1e77e934 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -217,7 +217,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
 	return 0;
 }
 
-int ramfs_fill_super(struct super_block *sb, void *data, int silent)
+static int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ramfs_fs_info *fsi;
 	struct inode *inode;
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index ee582bdb7fda..e4d7d141545e 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -20,6 +20,4 @@ extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
 extern const struct file_operations ramfs_file_operations;
 extern const struct vm_operations_struct generic_file_vm_ops;
 
-int ramfs_fill_super(struct super_block *sb, void *data, int silent);
-
 #endif
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 53cb37b66227..baedc2ef579e 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -630,12 +630,10 @@ static bool is_tmpfs;
 static struct dentry *rootfs_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
-	void *fill = ramfs_fill_super;
-
 	if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
-		fill = shmem_fill_super;
+		return mount_nodev(fs_type, flags, data, shmem_fill_super);
 
-	return mount_nodev(fs_type, flags, data, fill);
+	return ramfs_mount(fs_type, flags, dev_name, data);
 }
 
 struct file_system_type rootfs_fs_type = {
-- 
cgit v1.2.3


From 7e30d2a5eb0b2d5853f06cb8a2d44937d80a6bd6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Jun 2019 18:56:53 -0400
Subject: make shmem_fill_super() static

... have callers use shmem_mount()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c  | 2 +-
 include/linux/shmem_fs.h | 3 ++-
 init/do_mounts.c         | 2 +-
 mm/shmem.c               | 4 ++--
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index bafdf86b0497..dabf5006a254 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -71,7 +71,7 @@ static struct dentry *dev_mount(struct file_system_type *fs_type, int flags,
 		      const char *dev_name, void *data)
 {
 #ifdef CONFIG_TMPFS
-	return mount_nodev(fs_type, flags, data, shmem_fill_super);
+	return shmem_mount(fs_type, flags, dev_name, data);
 #else
 	return ramfs_mount(fs_type, flags, dev_name, data);
 #endif
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 20d815a33145..88cb98b0e49b 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -50,7 +50,8 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
  * Functions in mm/shmem.c called directly from elsewhere:
  */
 extern int shmem_init(void);
-extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
+extern struct dentry *shmem_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index baedc2ef579e..16c29e57f224 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -631,7 +631,7 @@ static struct dentry *rootfs_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
-		return mount_nodev(fs_type, flags, data, shmem_fill_super);
+		return shmem_mount(fs_type, flags, dev_name, data);
 
 	return ramfs_mount(fs_type, flags, dev_name, data);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 2bed4761f279..40f574c06375 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3547,7 +3547,7 @@ static void shmem_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-int shmem_fill_super(struct super_block *sb, void *data, int silent)
+static int shmem_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 	struct shmem_sb_info *sbinfo;
@@ -3759,7 +3759,7 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
-static struct dentry *shmem_mount(struct file_system_type *fs_type,
+struct dentry *shmem_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
 	return mount_nodev(fs_type, flags, data, shmem_fill_super);
-- 
cgit v1.2.3


From 00d9e47f8ec2a293db9ebed86aab0583d9a49533 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 5 Sep 2019 14:03:40 +0200
Subject: posix-cpu-timers: Always clear head pointer on dequeue

The head pointer in struct cpu_timer is checked to be NULL in
posix_cpu_timer_del() when the delete raced with the exit cleanup. The
works correctly as long as the timer is actually dequeued via
posix_cpu_timers_exit*().

But if the timer was dequeued due to expiry the head pointer is still set
and triggers the warning.

In fact keeping the head pointer around after any dequeue is pointless as
is has no meaning at all after that.

Clear the head pointer always on dequeue and remove the unused requeue
function while at it.

Fixes: 60bda037f1dd ("posix-cpu-timers: Utilize timerqueue for storage")
Reported-by: syzbot+55acd54b57bb4b3840a4@syzkaller.appspotmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lkml.kernel.org/r/20190905120539.707986830@linutronix.de
---
 include/linux/posix-timers.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index e685916551bf..3d10c84a97a9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -74,11 +74,6 @@ struct cpu_timer {
 	int			firing;
 };
 
-static inline bool cpu_timer_requeue(struct cpu_timer *ctmr)
-{
-	return timerqueue_add(ctmr->head, &ctmr->node);
-}
-
 static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
 				     struct cpu_timer *ctmr)
 {
@@ -88,8 +83,10 @@ static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
 
 static inline void cpu_timer_dequeue(struct cpu_timer *ctmr)
 {
-	if (!RB_EMPTY_NODE(&ctmr->node.node))
+	if (ctmr->head) {
 		timerqueue_del(ctmr->head, &ctmr->node);
+		ctmr->head = NULL;
+	}
 }
 
 static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr)
-- 
cgit v1.2.3


From 106feb2fdced0c240ca8ecb3d5db91b2b64e9a48 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Tue, 3 Sep 2019 14:10:20 +0300
Subject: PCI: pciehp: Remove pciehp_set_attention_status()

Remove pciehp_set_attention_status() and use pciehp_set_indicators()
instead, since the code is mostly the same.

Link: https://lore.kernel.org/r/20190903111021.1559-4-efremov@linux.com
Signed-off-by: Denis Efremov <efremov@linux.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/hotplug/pciehp.h      |  1 -
 drivers/pci/hotplug/pciehp_core.c |  9 +++++++--
 drivers/pci/hotplug/pciehp_hpc.c  | 25 -------------------------
 include/uapi/linux/pci_regs.h     |  1 +
 4 files changed, 8 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 2ca0f35a6a23..1f69f43a3f29 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -170,7 +170,6 @@ void pciehp_get_power_status(struct controller *ctrl, u8 *status);
 #define INDICATOR_NOOP -1	/* Leave indicator unchanged */
 void pciehp_set_indicators(struct controller *ctrl, int pwr, int attn);
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 status);
 void pciehp_get_latch_status(struct controller *ctrl, u8 *status);
 int pciehp_query_power_fault(struct controller *ctrl);
 void pciehp_green_led_on(struct controller *ctrl);
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 6ad0d86762cb..b3122c151b80 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -95,15 +95,20 @@ static void cleanup_slot(struct controller *ctrl)
 }
 
 /*
- * set_attention_status - Turns the Amber LED for a slot on, off or blink
+ * set_attention_status - Turns the Attention Indicator on, off or blinking
  */
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
 	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl->pcie->port;
 
+	if (status)
+		status <<= PCI_EXP_SLTCTL_ATTN_IND_SHIFT;
+	else
+		status = PCI_EXP_SLTCTL_ATTN_IND_OFF;
+
 	pci_config_pm_runtime_get(pdev);
-	pciehp_set_attention_status(ctrl, status);
+	pciehp_set_indicators(ctrl, INDICATOR_NOOP, status);
 	pci_config_pm_runtime_put(pdev);
 	return 0;
 }
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index a900bfd22447..6527345ccd8e 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -418,31 +418,6 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-void pciehp_set_attention_status(struct controller *ctrl, u8 value)
-{
-	u16 slot_cmd;
-
-	if (!ATTN_LED(ctrl))
-		return;
-
-	switch (value) {
-	case 0:		/* turn off */
-		slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_OFF;
-		break;
-	case 1:		/* turn on */
-		slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_ON;
-		break;
-	case 2:		/* turn blink */
-		slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_BLINK;
-		break;
-	default:
-		return;
-	}
-	pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC);
-	ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
-		 pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
-}
-
 /**
  * pciehp_set_indicators() - set attention indicator, power indicator, or both
  * @ctrl: PCIe hotplug controller
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f28e562d7ca8..de3e58afc564 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -591,6 +591,7 @@
 #define  PCI_EXP_SLTCTL_CCIE	0x0010	/* Command Completed Interrupt Enable */
 #define  PCI_EXP_SLTCTL_HPIE	0x0020	/* Hot-Plug Interrupt Enable */
 #define  PCI_EXP_SLTCTL_AIC	0x00c0	/* Attention Indicator Control */
+#define  PCI_EXP_SLTCTL_ATTN_IND_SHIFT 6      /* Attention Indicator shift */
 #define  PCI_EXP_SLTCTL_ATTN_IND_ON    0x0040 /* Attention Indicator on */
 #define  PCI_EXP_SLTCTL_ATTN_IND_BLINK 0x0080 /* Attention Indicator blinking */
 #define  PCI_EXP_SLTCTL_ATTN_IND_OFF   0x00c0 /* Attention Indicator off */
-- 
cgit v1.2.3


From 948d3f90e9e2f385556f9ebbf792713e5b497b7a Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Sun, 1 Sep 2019 14:10:35 +0300
Subject: net/mlx5: Expose HW capability bits for port buffer per priority
 congestion counters

Map capability bit indicating that HCA supports port buffer's congestion
counters. Also map registers with the corresponding counters.

Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 29 ++++++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8dd081051a79..f3773e8536bb 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1316,6 +1316,7 @@ enum {
 	MLX5_PER_PRIORITY_COUNTERS_GROUP      = 0x10,
 	MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11,
 	MLX5_PHYSICAL_LAYER_COUNTERS_GROUP    = 0x12,
+	MLX5_PER_TRAFFIC_CLASS_CONGESTION_GROUP = 0x13,
 	MLX5_PHYSICAL_LAYER_STATISTICAL_GROUP = 0x16,
 	MLX5_INFINIBAND_PORT_COUNTERS_GROUP   = 0x20,
 };
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 7d65c0578ac9..a487b681b516 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1196,7 +1196,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         rts2rts_qp_counters_set_id[0x1];
 	u8         reserved_at_16a[0x2];
 	u8         vnic_env_int_rq_oob[0x1];
-	u8         reserved_at_16d[0x2];
+	u8         sbcam_reg[0x1];
+	u8         reserved_at_16e[0x1];
 	u8         qcam_reg[0x1];
 	u8         gid_table_size[0x10];
 
@@ -1960,12 +1961,28 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits {
 	u8         port_xmit_wait[0x20];
 };
 
-struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits {
+struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits {
 	u8         transmit_queue_high[0x20];
 
 	u8         transmit_queue_low[0x20];
 
-	u8         reserved_at_40[0x780];
+	u8         no_buffer_discard_uc_high[0x20];
+
+	u8         no_buffer_discard_uc_low[0x20];
+
+	u8         reserved_at_80[0x740];
+};
+
+struct mlx5_ifc_eth_per_tc_congest_prio_grp_data_layout_bits {
+	u8         wred_discard_high[0x20];
+
+	u8         wred_discard_low[0x20];
+
+	u8         ecn_marked_tc_high[0x20];
+
+	u8         ecn_marked_tc_low[0x20];
+
+	u8         reserved_at_80[0x740];
 };
 
 struct mlx5_ifc_eth_per_prio_grp_data_layout_bits {
@@ -3642,7 +3659,8 @@ union mlx5_ifc_eth_cntrs_grp_data_layout_auto_bits {
 	struct mlx5_ifc_eth_3635_cntrs_grp_data_layout_bits eth_3635_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
-	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
+	struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits eth_per_tc_prio_grp_data_layout;
+	struct mlx5_ifc_eth_per_tc_congest_prio_grp_data_layout_bits eth_per_tc_congest_prio_grp_data_layout;
 	struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits ib_port_cntrs_grp_data_layout;
 	struct mlx5_ifc_phys_layer_cntrs_bits phys_layer_cntrs;
 	struct mlx5_ifc_phys_layer_statistical_cntrs_bits phys_layer_statistical_cntrs;
@@ -9422,7 +9440,8 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_eth_802_3_cntrs_grp_data_layout_bits eth_802_3_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_extended_cntrs_grp_data_layout_bits eth_extended_cntrs_grp_data_layout;
 	struct mlx5_ifc_eth_per_prio_grp_data_layout_bits eth_per_prio_grp_data_layout;
-	struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits eth_per_traffic_grp_data_layout;
+	struct mlx5_ifc_eth_per_tc_prio_grp_data_layout_bits eth_per_tc_prio_grp_data_layout;
+	struct mlx5_ifc_eth_per_tc_congest_prio_grp_data_layout_bits eth_per_tc_congest_prio_grp_data_layout;
 	struct mlx5_ifc_lane_2_module_mapping_bits lane_2_module_mapping;
 	struct mlx5_ifc_pamp_reg_bits pamp_reg;
 	struct mlx5_ifc_paos_reg_bits paos_reg;
-- 
cgit v1.2.3


From 68c43f133a754c7bf5cb1018bb16dc0821cc43a1 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 5 Sep 2019 18:51:31 +0900
Subject: block: Introduce elevator features

Introduce the definition of elevator features through the
elevator_features flags in the elevator_type structure. Each flag can
represent a feature supported by an elevator. The first feature defined
by this patch is support for zoned block device sequential write
constraint with the flag ELEVATOR_F_ZBD_SEQ_WRITE, which is implemented
by the mq-deadline elevator using zone write locking.

Other possible features are IO priorities, write hints, latency targets
or single-LUN dual-actuator disks (for which the elevator could maintain
one LBA ordered list per actuator).

The required_elevator_features field is also added to the request_queue
structure to allow a device driver to specify elevator feature flags
that an elevator must support for the correct operation of the device
(e.g. device drivers for zoned block devices can have the
ELEVATOR_F_ZBD_SEQ_WRITE flag as a required feature).
The helper function blk_queue_required_elevator_features() is
defined for setting this new field.

With these two new fields in place, the elevator functions
elevator_match() and elevator_find() are modified to allow a user to set
only an elevator with a set of features that satisfies the device
required features. Elevators not matching the device requirements are
not shown in the device sysfs queue/scheduler file to prevent their use.

The "none" elevator can always be selected as before.

Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-settings.c     | 16 ++++++++++++++++
 block/elevator.c         | 49 +++++++++++++++++++++++++++++++++++++-----------
 block/mq-deadline.c      |  1 +
 include/linux/blkdev.h   |  4 ++++
 include/linux/elevator.h |  8 ++++++++
 5 files changed, 67 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index a058997b9cce..6bd1e3b082d8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -832,6 +832,22 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
 }
 EXPORT_SYMBOL_GPL(blk_queue_write_cache);
 
+/**
+ * blk_queue_required_elevator_features - Set a queue required elevator features
+ * @q:		the request queue for the target device
+ * @features:	Required elevator features OR'ed together
+ *
+ * Tell the block layer that for the device controlled through @q, only the
+ * only elevators that can be used are those that implement at least the set of
+ * features specified by @features.
+ */
+void blk_queue_required_elevator_features(struct request_queue *q,
+					  unsigned int features)
+{
+	q->required_elevator_features = features;
+}
+EXPORT_SYMBOL_GPL(blk_queue_required_elevator_features);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/elevator.c b/block/elevator.c
index 2944c129760c..ac7c8ad580ba 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -83,8 +83,26 @@ bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
 }
 EXPORT_SYMBOL(elv_bio_merge_ok);
 
-static bool elevator_match(const struct elevator_type *e, const char *name)
+static inline bool elv_support_features(unsigned int elv_features,
+					unsigned int required_features)
 {
+	return (required_features & elv_features) == required_features;
+}
+
+/**
+ * elevator_match - Test an elevator name and features
+ * @e: Scheduler to test
+ * @name: Elevator name to test
+ * @required_features: Features that the elevator must provide
+ *
+ * Return true is the elevator @e name matches @name and if @e provides all the
+ * the feratures spcified by @required_features.
+ */
+static bool elevator_match(const struct elevator_type *e, const char *name,
+			   unsigned int required_features)
+{
+	if (!elv_support_features(e->elevator_features, required_features))
+		return false;
 	if (!strcmp(e->elevator_name, name))
 		return true;
 	if (e->elevator_alias && !strcmp(e->elevator_alias, name))
@@ -93,15 +111,21 @@ static bool elevator_match(const struct elevator_type *e, const char *name)
 	return false;
 }
 
-/*
- * Return scheduler with name 'name'
+/**
+ * elevator_find - Find an elevator
+ * @name: Name of the elevator to find
+ * @required_features: Features that the elevator must provide
+ *
+ * Return the first registered scheduler with name @name and supporting the
+ * features @required_features and NULL otherwise.
  */
-static struct elevator_type *elevator_find(const char *name)
+static struct elevator_type *elevator_find(const char *name,
+					   unsigned int required_features)
 {
 	struct elevator_type *e;
 
 	list_for_each_entry(e, &elv_list, list) {
-		if (elevator_match(e, name))
+		if (elevator_match(e, name, required_features))
 			return e;
 	}
 
@@ -120,12 +144,12 @@ static struct elevator_type *elevator_get(struct request_queue *q,
 
 	spin_lock(&elv_list_lock);
 
-	e = elevator_find(name);
+	e = elevator_find(name, q->required_elevator_features);
 	if (!e && try_loading) {
 		spin_unlock(&elv_list_lock);
 		request_module("%s-iosched", name);
 		spin_lock(&elv_list_lock);
-		e = elevator_find(name);
+		e = elevator_find(name, q->required_elevator_features);
 	}
 
 	if (e && !try_module_get(e->elevator_owner))
@@ -525,7 +549,7 @@ int elv_register(struct elevator_type *e)
 
 	/* register, don't allow duplicate names */
 	spin_lock(&elv_list_lock);
-	if (elevator_find(e->elevator_name)) {
+	if (elevator_find(e->elevator_name, 0)) {
 		spin_unlock(&elv_list_lock);
 		kmem_cache_destroy(e->icq_cache);
 		return -EBUSY;
@@ -709,7 +733,8 @@ static int __elevator_change(struct request_queue *q, const char *name)
 	if (!e)
 		return -EINVAL;
 
-	if (q->elevator && elevator_match(q->elevator->type, elevator_name)) {
+	if (q->elevator &&
+	    elevator_match(q->elevator->type, elevator_name, 0)) {
 		elevator_put(e);
 		return 0;
 	}
@@ -749,11 +774,13 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name)
 
 	spin_lock(&elv_list_lock);
 	list_for_each_entry(__e, &elv_list, list) {
-		if (elv && elevator_match(elv, __e->elevator_name)) {
+		if (elv && elevator_match(elv, __e->elevator_name, 0)) {
 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
 			continue;
 		}
-		if (elv_support_iosched(q))
+		if (elv_support_iosched(q) &&
+		    elevator_match(__e, __e->elevator_name,
+				   q->required_elevator_features))
 			len += sprintf(name+len, "%s ", __e->elevator_name);
 	}
 	spin_unlock(&elv_list_lock);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 35e84bc0ec8c..b490f47fd553 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -794,6 +794,7 @@ static struct elevator_type mq_deadline = {
 	.elevator_attrs = deadline_attrs,
 	.elevator_name = "mq-deadline",
 	.elevator_alias = "deadline",
+	.elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE,
 	.elevator_owner = THIS_MODULE,
 };
 MODULE_ALIAS("mq-deadline-iosched");
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d0ad21e4771b..b196124e3240 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -496,6 +496,8 @@ struct request_queue {
 
 	struct queue_limits	limits;
 
+	unsigned int		required_elevator_features;
+
 #ifdef CONFIG_BLK_DEV_ZONED
 	/*
 	 * Zoned block device information for request dispatch control.
@@ -1097,6 +1099,8 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
+extern void blk_queue_required_elevator_features(struct request_queue *q,
+						 unsigned int features);
 
 /*
  * Number of physical segments as sent to the device.
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 1dd014c9c87b..901bda352dcb 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -76,6 +76,7 @@ struct elevator_type
 	struct elv_fs_entry *elevator_attrs;
 	const char *elevator_name;
 	const char *elevator_alias;
+	const unsigned int elevator_features;
 	struct module *elevator_owner;
 #ifdef CONFIG_BLK_DEBUG_FS
 	const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
@@ -165,5 +166,12 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_fifo_clear(rq)	list_del_init(&(rq)->queuelist)
 
+/*
+ * Elevator features.
+ */
+
+/* Supports zoned block devices sequential write constraint */
+#define ELEVATOR_F_ZBD_SEQ_WRITE	(1U << 0)
+
 #endif /* CONFIG_BLOCK */
 #endif
-- 
cgit v1.2.3


From 737eb78e82d52d35df166d29af32bf61992de71d Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Thu, 5 Sep 2019 18:51:33 +0900
Subject: block: Delay default elevator initialization

When elevator_init_mq() is called from blk_mq_init_allocated_queue(),
the only information known about the device is the number of hardware
queues as the block device scan by the device driver is not completed
yet for most drivers. The device type and elevator required features
are not set yet, preventing to correctly select the default elevator
most suitable for the device.

This currently affects all multi-queue zoned block devices which default
to the "none" elevator instead of the required "mq-deadline" elevator.
These drives currently include host-managed SMR disks connected to a
smartpqi HBA and null_blk block devices with zoned mode enabled.
Upcoming NVMe Zoned Namespace devices will also be affected.

Fix this by adding the boolean elevator_init argument to
blk_mq_init_allocated_queue() to control the execution of
elevator_init_mq(). Two cases exist:
1) elevator_init = false is used for calls to
   blk_mq_init_allocated_queue() within blk_mq_init_queue(). In this
   case, a call to elevator_init_mq() is added to __device_add_disk(),
   resulting in the delayed initialization of the queue elevator
   after the device driver finished probing the device information. This
   effectively allows elevator_init_mq() access to more information
   about the device.
2) elevator_init = true preserves the current behavior of initializing
   the elevator directly from blk_mq_init_allocated_queue(). This case
   is used for the special request based DM devices where the device
   gendisk is created before the queue initialization and device
   information (e.g. queue limits) is already known when the queue
   initialization is executed.

Additionally, to make sure that the elevator initialization is never
done while requests are in-flight (there should be none when the device
driver calls device_add_disk()), freeze and quiesce the device request
queue before calling blk_mq_init_sched() in elevator_init_mq().

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 12 +++++++++---
 block/elevator.c       |  7 +++++++
 block/genhd.c          |  9 +++++++++
 drivers/md/dm-rq.c     |  2 +-
 include/linux/blk-mq.h |  3 ++-
 5 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d10a7ab4207a..3647776a0f6e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2695,7 +2695,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!uninit_q)
 		return ERR_PTR(-ENOMEM);
 
-	q = blk_mq_init_allocated_queue(set, uninit_q);
+	/*
+	 * Initialize the queue without an elevator. device_add_disk() will do
+	 * the initialization.
+	 */
+	q = blk_mq_init_allocated_queue(set, uninit_q, false);
 	if (IS_ERR(q))
 		blk_cleanup_queue(uninit_q);
 
@@ -2846,7 +2850,8 @@ static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
 }
 
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-						  struct request_queue *q)
+						  struct request_queue *q,
+						  bool elevator_init)
 {
 	/* mark the queue as mq asap */
 	q->mq_ops = set->ops;
@@ -2908,7 +2913,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	blk_mq_add_queue_tag_set(set, q);
 	blk_mq_map_swqueue(q);
 
-	elevator_init_mq(q);
+	if (elevator_init)
+		elevator_init_mq(q);
 
 	return q;
 
diff --git a/block/elevator.c b/block/elevator.c
index 520d6b224b74..096a670d22d7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -712,7 +712,14 @@ void elevator_init_mq(struct request_queue *q)
 	if (!e)
 		return;
 
+	blk_mq_freeze_queue(q);
+	blk_mq_quiesce_queue(q);
+
 	err = blk_mq_init_sched(q, e);
+
+	blk_mq_unquiesce_queue(q);
+	blk_mq_unfreeze_queue(q);
+
 	if (err) {
 		pr_warn("\"%s\" elevator initialization failed, "
 			"falling back to \"none\"\n", e->elevator_name);
diff --git a/block/genhd.c b/block/genhd.c
index 54f1f0d381f4..26b31fcae217 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -695,6 +695,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	dev_t devt;
 	int retval;
 
+	/*
+	 * The disk queue should now be all set with enough information about
+	 * the device for the elevator code to pick an adequate default
+	 * elevator if one is needed, that is, for devices requesting queue
+	 * registration.
+	 */
+	if (register_queue)
+		elevator_init_mq(disk->queue);
+
 	/* minors == 0 indicates to use ext devt from part0 and should
 	 * be accompanied with EXT_DEVT flag.  Make sure all
 	 * parameters make sense.
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 21d5c1784d0c..3f8577e2c13b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -563,7 +563,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 	if (err)
 		goto out_kfree_tag_set;
 
-	q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
+	q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
 	if (IS_ERR(q)) {
 		err = PTR_ERR(q);
 		goto out_tag_set;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 62a3bb715899..0bf056de5cc3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -248,7 +248,8 @@ enum {
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
-						  struct request_queue *q);
+						  struct request_queue *q,
+						  bool elevator_init);
 struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
 						const struct blk_mq_ops *ops,
 						unsigned int queue_depth,
-- 
cgit v1.2.3


From 95a7233c452a58a4c2310c456c73997853b2ec46 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Wed, 4 Sep 2019 16:56:37 +0300
Subject: net: openvswitch: Set OvS recirc_id from tc chain index

Offloaded OvS datapath rules are translated one to one to tc rules,
for example the following simplified OvS rule:

recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)

Will be translated to the following tc rule:

$ tc filter add dev dev1 ingress \
	    prio 1 chain 0 proto ip \
		flower tcp ct_state -trk \
		action ct pipe \
		action goto chain 2

Received packets will first travel though tc, and if they aren't stolen
by it, like in the above rule, they will continue to OvS datapath.
Since we already did some actions (action ct in this case) which might
modify the packets, and updated action stats, we would like to continue
the proccessing with the correct recirc_id in OvS (here recirc_id(2))
where we left off.

To support this, introduce a new skb extension for tc, which
will be used for translating tc chain to ovs recirc_id to
handle these miss cases. Last tc chain index will be set
by tc goto chain action and read by OvS datapath.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h           | 13 +++++++++++++
 include/uapi/linux/openvswitch.h |  3 +++
 net/core/skbuff.c                |  6 ++++++
 net/openvswitch/datapath.c       | 38 +++++++++++++++++++++++++++++++++-----
 net/openvswitch/datapath.h       |  2 ++
 net/openvswitch/flow.c           | 13 +++++++++++++
 net/sched/Kconfig                | 13 +++++++++++++
 net/sched/cls_api.c              | 12 ++++++++++++
 8 files changed, 95 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 77c6dc88e95d..028e684fa974 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -279,6 +279,16 @@ struct nf_bridge_info {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+/* Chain in tc_skb_ext will be used to share the tc chain with
+ * ovs recirc_id. It will be set to the current chain by tc
+ * and read by ovs to recirc_id.
+ */
+struct tc_skb_ext {
+	__u32 chain;
+};
+#endif
+
 struct sk_buff_head {
 	/* These two members must be first. */
 	struct sk_buff	*next;
@@ -4057,6 +4067,9 @@ enum skb_ext_id {
 #endif
 #ifdef CONFIG_XFRM
 	SKB_EXT_SEC_PATH,
+#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	TC_SKB_EXT,
 #endif
 	SKB_EXT_NUM, /* must be last */
 };
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f271f1ec50ae..1887a451c388 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -123,6 +123,9 @@ struct ovs_vport_stats {
 /* Allow datapath to associate multiple Netlink PIDs to each vport */
 #define OVS_DP_F_VPORT_PIDS	(1 << 1)
 
+/* Allow tc offload recirc sharing */
+#define OVS_DP_F_TC_RECIRC_SHARING	(1 << 2)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ea8e8d332d85..2b40b5a9425b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4087,6 +4087,9 @@ static const u8 skb_ext_type_len[] = {
 #ifdef CONFIG_XFRM
 	[SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
 #endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
+#endif
 };
 
 static __always_inline unsigned int skb_ext_total_length(void)
@@ -4097,6 +4100,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
 #endif
 #ifdef CONFIG_XFRM
 		skb_ext_type_len[SKB_EXT_SEC_PATH] +
+#endif
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		skb_ext_type_len[TC_SKB_EXT] +
 #endif
 		0;
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 65122bbccd27..dde9d762edee 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1545,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
 	dp->user_features = 0;
 }
 
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-	if (a[OVS_DP_ATTR_USER_FEATURES])
-		dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+	u32 user_features = 0;
+
+	if (a[OVS_DP_ATTR_USER_FEATURES]) {
+		user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+		if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+				      OVS_DP_F_UNALIGNED |
+				      OVS_DP_F_TC_RECIRC_SHARING))
+			return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+		if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+			return -EOPNOTSUPP;
+#endif
+	}
+
+	dp->user_features = user_features;
+
+	if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+		static_branch_enable(&tc_recirc_sharing_support);
+	else
+		static_branch_disable(&tc_recirc_sharing_support);
+
+	return 0;
 }
 
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1610,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	parms.port_no = OVSP_LOCAL;
 	parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
 
-	ovs_dp_change(dp, a);
+	err = ovs_dp_change(dp, a);
+	if (err)
+		goto err_destroy_meters;
 
 	/* So far only local changes have been made, now need the lock. */
 	ovs_lock();
@@ -1736,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	if (IS_ERR(dp))
 		goto err_unlock_free;
 
-	ovs_dp_change(dp, info->attrs);
+	err = ovs_dp_change(dp, info->attrs);
+	if (err)
+		goto err_unlock_free;
 
 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
 				   info->snd_seq, 0, OVS_DP_CMD_SET);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 751d34accdf9..81e85dde8217 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9d81d2c7bf82..38147e6a20f5 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
 int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 			 struct sk_buff *skb, struct sw_flow_key *key)
 {
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	struct tc_skb_ext *tc_ext;
+#endif
 	int res, err;
 
 	/* Extract metadata from packet. */
@@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 	if (res < 0)
 		return res;
 	key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+		tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+		key->recirc_id = tc_ext ? tc_ext->chain : 0;
+	} else {
+		key->recirc_id = 0;
+	}
+#else
 	key->recirc_id = 0;
+#endif
 
 	err = key_extract(skb, key);
 	if (!err)
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index afd2ba157a13..b3faafeafab9 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX
         tristate "Support to encoding decoding skb tcindex on IFE action"
         depends on NET_ACT_IFE
 
+config NET_TC_SKB_EXT
+	bool "TC recirculation support"
+	depends on NET_CLS_ACT
+	default y if NET_CLS_ACT
+	select SKB_EXTENSIONS
+
+	help
+	  Say Y here to allow tc chain misses to continue in OvS datapath in
+	  the correct recirc_id, and hardware chain misses to continue in
+	  the correct chain in tc software datapath.
+
+	  Say N here if you won't be using tc<->ovs offload or tc chains offload.
+
 endif # NET_SCHED
 
 config NET_SCH_FIFO
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 671ca905dbb5..05c4fe1c3ca2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1514,6 +1514,18 @@ reclassify:
 			goto reset;
 		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
 			first_tp = res->goto_tp;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+			{
+				struct tc_skb_ext *ext;
+
+				ext = skb_ext_add(skb, TC_SKB_EXT);
+				if (WARN_ON_ONCE(!ext))
+					return TC_ACT_SHOT;
+
+				ext->chain = err & TC_ACT_EXT_VAL_MASK;
+			}
+#endif
 			goto reset;
 		}
 #endif
-- 
cgit v1.2.3


From d1967e495a8d1dd6b8ff2df9d3e045c5d60a37a6 Mon Sep 17 00:00:00 2001
From: David Dai <zdai@linux.vnet.ibm.com>
Date: Wed, 4 Sep 2019 10:03:43 -0500
Subject: net_sched: act_police: add 2 new attributes to support police 64bit
 rate and peakrate

For high speed adapter like Mellanox CX-5 card, it can reach upto
100 Gbits per second bandwidth. Currently htb already supports 64bit rate
in tc utility. However police action rate and peakrate are still limited
to 32bit value (upto 32 Gbits per second). Add 2 new attributes
TCA_POLICE_RATE64 and TCA_POLICE_RATE64 in kernel for 64bit support
so that tc utility can use them for 64bit rate and peakrate value to
break the 32bit limit, and still keep the backward binary compatibility.

Tested-by: David Dai <zdai@linux.vnet.ibm.com>
Signed-off-by: David Dai <zdai@linux.vnet.ibm.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  2 ++
 net/sched/act_police.c       | 27 +++++++++++++++++++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index b057aeeb6338..a6aa466fac9e 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -160,6 +160,8 @@ enum {
 	TCA_POLICE_RESULT,
 	TCA_POLICE_TM,
 	TCA_POLICE_PAD,
+	TCA_POLICE_RATE64,
+	TCA_POLICE_PEAKRATE64,
 	__TCA_POLICE_MAX
 #define TCA_POLICE_RESULT TCA_POLICE_RESULT
 };
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 6315e0f8d26e..89c04c52af3d 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 	[TCA_POLICE_PEAKRATE]	= { .len = TC_RTAB_SIZE },
 	[TCA_POLICE_AVRATE]	= { .type = NLA_U32 },
 	[TCA_POLICE_RESULT]	= { .type = NLA_U32 },
+	[TCA_POLICE_RATE64]     = { .type = NLA_U64 },
+	[TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 },
 };
 
 static int tcf_police_init(struct net *net, struct nlattr *nla,
@@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	struct tcf_police_params *new;
 	bool exists = false;
 	u32 index;
+	u64 rate64, prate64;
 
 	if (nla == NULL)
 		return -EINVAL;
@@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	}
 	if (R_tab) {
 		new->rate_present = true;
-		psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0);
+		rate64 = tb[TCA_POLICE_RATE64] ?
+			 nla_get_u64(tb[TCA_POLICE_RATE64]) : 0;
+		psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64);
 		qdisc_put_rtab(R_tab);
 	} else {
 		new->rate_present = false;
 	}
 	if (P_tab) {
 		new->peak_present = true;
-		psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0);
+		prate64 = tb[TCA_POLICE_PEAKRATE64] ?
+			  nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0;
+		psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64);
 		qdisc_put_rtab(P_tab);
 	} else {
 		new->peak_present = false;
@@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a,
 				      lockdep_is_held(&police->tcf_lock));
 	opt.mtu = p->tcfp_mtu;
 	opt.burst = PSCHED_NS2TICKS(p->tcfp_burst);
-	if (p->rate_present)
+	if (p->rate_present) {
 		psched_ratecfg_getrate(&opt.rate, &p->rate);
-	if (p->peak_present)
+		if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) &&
+		    nla_put_u64_64bit(skb, TCA_POLICE_RATE64,
+				      police->params->rate.rate_bytes_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
+	if (p->peak_present) {
 		psched_ratecfg_getrate(&opt.peakrate, &p->peak);
+		if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) &&
+		    nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64,
+				      police->params->peak.rate_bytes_ps,
+				      TCA_POLICE_PAD))
+			goto nla_put_failure;
+	}
 	if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt))
 		goto nla_put_failure;
 	if (p->tcfp_result &&
-- 
cgit v1.2.3


From ac90f249e15cd2a850daa9e36e15f81ce1ff6550 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 6 Sep 2019 10:26:21 -0600
Subject: io_uring: expose single mmap capability

After commit 75b28affdd6a we can get by with just a single mmap to
map both the sq and cq ring. However, userspace doesn't know that.

Add a features variable to io_uring_params, and notify userspace
that the kernel has this ability. This can then be used in liburing
(or in applications directly) to avoid the second mmap.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 2 ++
 include/uapi/linux/io_uring.h | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 17dfe57c57f8..be24596e90d7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3391,6 +3391,8 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
 	p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries);
 	p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
 	p->cq_off.cqes = offsetof(struct io_rings, cqes);
+
+	p->features = IORING_FEAT_SINGLE_MMAP;
 	return ret;
 err:
 	io_ring_ctx_wait_and_kill(ctx);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1e1652f25cc1..96ee9d94b73e 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -128,11 +128,17 @@ struct io_uring_params {
 	__u32 flags;
 	__u32 sq_thread_cpu;
 	__u32 sq_thread_idle;
-	__u32 resv[5];
+	__u32 features;
+	__u32 resv[4];
 	struct io_sqring_offsets sq_off;
 	struct io_cqring_offsets cq_off;
 };
 
+/*
+ * io_uring_params->features flags
+ */
+#define IORING_FEAT_SINGLE_MMAP		(1U << 0)
+
 /*
  * io_uring_register(2) opcodes and arguments
  */
-- 
cgit v1.2.3


From ed56ef675ae6ef0e6f7d42b9c42dae61172f0960 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 5 Sep 2019 23:01:16 +0000
Subject: Drivers: hv: vmbus: Add a helper function is_sub_channel()

The existing method of telling if a channel is sub-channel in
vmbus_process_offer() is cumbersome. This new simple helper function
is preferred in future.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/hyperv.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6256cc34c4a6..2d39248cff96 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
 		} pipe;
 	} u;
 	/*
-	 * The sub_channel_index is defined in win8.
+	 * The sub_channel_index is defined in Win8: a value of zero means a
+	 * primary channel and a value of non-zero means a sub-channel.
+	 *
+	 * Before Win8, the field is reserved, meaning it's always zero.
 	 */
 	u16 sub_channel_index;
 	u16 reserved3;
@@ -934,6 +937,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
 		  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+	return c->offermsg.offer.sub_channel_index != 0;
+}
+
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
 					      enum hv_numa_policy policy)
 {
-- 
cgit v1.2.3


From 271b2224d42f88870e6b060924ee374871c131fc Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 5 Sep 2019 23:01:17 +0000
Subject: Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for
 hibernation

The high-level VSC drivers will implement device-specific callbacks.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/vmbus_drv.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hyperv.h |  3 +++
 2 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 2ef375ce58ac..a30c70adf9a0 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -911,6 +911,43 @@ static void vmbus_shutdown(struct device *child_device)
 		drv->shutdown(dev);
 }
 
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->suspend)
+		return -EOPNOTSUPP;
+
+	return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+	struct hv_driver *drv;
+	struct hv_device *dev = device_to_hv_device(child_device);
+
+	/* The device may not be attached yet */
+	if (!child_device->driver)
+		return 0;
+
+	drv = drv_to_hv_drv(child_device->driver);
+	if (!drv->resume)
+		return -EOPNOTSUPP;
+
+	return drv->resume(dev);
+}
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -926,6 +963,14 @@ static void vmbus_device_release(struct device *device)
 	kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
 	.name =		"vmbus",
@@ -936,6 +981,7 @@ static struct bus_type  hv_bus = {
 	.uevent =		vmbus_uevent,
 	.dev_groups =		vmbus_dev_groups,
 	.drv_groups =		vmbus_drv_groups,
+	.pm =			&vmbus_pm,
 };
 
 struct onmessage_work_context {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2d39248cff96..8a60e7766037 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1157,6 +1157,9 @@ struct hv_driver {
 	int (*remove)(struct hv_device *);
 	void (*shutdown)(struct hv_device *);
 
+	int (*suspend)(struct hv_device *);
+	int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
-- 
cgit v1.2.3


From d8bd2d442bb2688b428ac7164e5dc6d95d4fa65b Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 5 Sep 2019 23:01:22 +0000
Subject: Drivers: hv: vmbus: Resume after fixing up old primary channels

When the host re-offers the primary channels upon resume, the host only
guarantees the Instance GUID  doesn't change, so vmbus_bus_suspend()
should invalidate channel->offermsg.child_relid and figure out the
number of primary channels that need to be fixed up upon resume.

Upon resume, vmbus_onoffer() finds the old channel structs, and maps
the new offers to the old channels, and fixes up the old structs,
and finally the resume callbacks of the VSC drivers will re-open
the channels.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/channel_mgmt.c | 85 ++++++++++++++++++++++++++++++++++++-----------
 drivers/hv/connection.c   |  2 ++
 drivers/hv/hyperv_vmbus.h | 14 ++++++++
 drivers/hv/vmbus_drv.c    | 17 ++++++++++
 include/linux/hyperv.h    |  3 ++
 5 files changed, 101 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 5518d031f62a..8eb167540b4f 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
 		cpumask_clear_cpu(channel->target_cpu,
 				  &primary_channel->alloced_cpus_in_node);
 
-	vmbus_release_relid(channel->offermsg.child_relid);
+	/*
+	 * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+	 * the relid is invalidated; after hibernation, when the user-space app
+	 * destroys the channel, the relid is INVALID_RELID, and in this case
+	 * it's unnecessary and unsafe to release the old relid, since the same
+	 * relid can refer to a completely different channel now.
+	 */
+	if (channel->offermsg.child_relid != INVALID_RELID)
+		vmbus_release_relid(channel->offermsg.child_relid);
 
 	free_channel(channel);
 }
@@ -851,6 +859,36 @@ void vmbus_initiate_unload(bool crash)
 		vmbus_wait_for_unload();
 }
 
+static void check_ready_for_resume_event(void)
+{
+	/*
+	 * If all the old primary channels have been fixed up, then it's safe
+	 * to resume.
+	 */
+	if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+		complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+				      struct vmbus_channel_offer_channel *offer)
+{
+	/*
+	 * Setup state for signalling the host.
+	 */
+	channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+	if (vmbus_proto_version != VERSION_WS2008) {
+		channel->is_dedicated_interrupt =
+				(offer->is_dedicated_interrupt != 0);
+		channel->sig_event = offer->connection_id;
+	}
+
+	memcpy(&channel->offermsg, offer,
+	       sizeof(struct vmbus_channel_offer_channel));
+	channel->monitor_grp = (u8)offer->monitorid / 32;
+	channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
 /*
  * find_primary_channel_by_offer - Get the channel object given the new offer.
  * This is only used in the resume path of hibernation.
@@ -902,14 +940,29 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		atomic_dec(&vmbus_connection.offer_in_progress);
 
 		/*
-		 * We're resuming from hibernation: we expect the host to send
-		 * exactly the same offers that we had before the hibernation.
+		 * We're resuming from hibernation: all the sub-channel and
+		 * hv_sock channels we had before the hibernation should have
+		 * been cleaned up, and now we must be seeing a re-offered
+		 * primary channel that we had before the hibernation.
 		 */
+
+		WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+		/* Fix up the relid. */
+		oldchannel->offermsg.child_relid = offer->child_relid;
+
 		offer_sz = sizeof(*offer);
-		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
+		if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+			check_ready_for_resume_event();
 			return;
+		}
 
-		pr_debug("Mismatched offer from the host (relid=%d)\n",
+		/*
+		 * This is not an error, since the host can also change the
+		 * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+		 * the offer->connection_id of the Mellanox VF vmbus device
+		 * can change when the host reoffers the device upon resume.
+		 */
+		pr_debug("vmbus offer changed: relid=%d\n",
 			 offer->child_relid);
 
 		print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
@@ -917,6 +970,12 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 				     false);
 		print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
 				     16, 4, offer, offer_sz, false);
+
+		/* Fix up the old channel. */
+		vmbus_setup_channel_state(oldchannel, offer);
+
+		check_ready_for_resume_event();
+
 		return;
 	}
 
@@ -929,21 +988,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 		return;
 	}
 
-	/*
-	 * Setup state for signalling the host.
-	 */
-	newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
-	if (vmbus_proto_version != VERSION_WS2008) {
-		newchannel->is_dedicated_interrupt =
-				(offer->is_dedicated_interrupt != 0);
-		newchannel->sig_event = offer->connection_id;
-	}
-
-	memcpy(&newchannel->offermsg, offer,
-	       sizeof(struct vmbus_channel_offer_channel));
-	newchannel->monitor_grp = (u8)offer->monitorid / 32;
-	newchannel->monitor_bit = (u8)offer->monitorid % 32;
+	vmbus_setup_channel_state(newchannel, offer);
 
 	vmbus_process_offer(newchannel);
 }
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 99851ea682eb..6e4c015783ff 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -29,6 +29,8 @@ struct vmbus_connection vmbus_connection = {
 
 	.ready_for_suspend_event= COMPLETION_INITIALIZER(
 				  vmbus_connection.ready_for_suspend_event),
+	.ready_for_resume_event	= COMPLETION_INITIALIZER(
+				  vmbus_connection.ready_for_resume_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 974b747ca1fc..f7a5f5615f34 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -272,6 +272,20 @@ struct vmbus_connection {
 	 * drop to zero.
 	 */
 	struct completion ready_for_suspend_event;
+
+	/*
+	 * The number of primary channels that should be "fixed up"
+	 * upon resume: these channels are re-offered upon resume, and some
+	 * fields of the channel offers (i.e. child_relid and connection_id)
+	 * can change, so the old offermsg must be fixed up, before the resume
+	 * callbacks of the VSC drivers start to further touch the channels.
+	 */
+	atomic_t nr_chan_fixup_on_resume;
+	/*
+	 * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+	 * drop to zero.
+	 */
+	struct completion ready_for_resume_event;
 };
 
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 32ec951d334f..391f0b225c9a 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2164,9 +2164,17 @@ static int vmbus_bus_suspend(struct device *dev)
 	if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
 		wait_for_completion(&vmbus_connection.ready_for_suspend_event);
 
+	WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
 	mutex_lock(&vmbus_connection.channel_mutex);
 
 	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+		/*
+		 * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+		 * up the field, and the other fields (if necessary).
+		 */
+		channel->offermsg.child_relid = INVALID_RELID;
+
 		if (is_hvsock_channel(channel)) {
 			if (!channel->rescind) {
 				pr_err("hv_sock channel not rescinded!\n");
@@ -2181,6 +2189,8 @@ static int vmbus_bus_suspend(struct device *dev)
 			WARN_ON_ONCE(1);
 		}
 		spin_unlock_irqrestore(&channel->lock, flags);
+
+		atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
 	}
 
 	mutex_unlock(&vmbus_connection.channel_mutex);
@@ -2189,6 +2199,9 @@ static int vmbus_bus_suspend(struct device *dev)
 
 	vmbus_connection.conn_state = DISCONNECTED;
 
+	/* Reset the event for the next resume. */
+	reinit_completion(&vmbus_connection.ready_for_resume_event);
+
 	return 0;
 }
 
@@ -2223,8 +2236,12 @@ static int vmbus_bus_resume(struct device *dev)
 	if (ret != 0)
 		return ret;
 
+	WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
 	vmbus_request_offers();
 
+	wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
 	/* Reset the event for the next suspend. */
 	reinit_completion(&vmbus_connection.ready_for_suspend_event);
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 8a60e7766037..a3aa9e9ef6f2 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -426,6 +426,9 @@ enum vmbus_channel_message_type {
 	CHANNELMSG_COUNT
 };
 
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID	U32_MAX
+
 struct vmbus_channel_message_header {
 	enum vmbus_channel_message_type msgtype;
 	u32 padding;
-- 
cgit v1.2.3


From c7eb6869632a5d33b41d0a00d683b8395392b7ee Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Mar 2019 16:38:31 +0000
Subject: vfs: subtype handling moved to fuse

The unused vfs code can be removed.  Don't pass empty subtype (same as if
->parse callback isn't called).

The bits that are left involve determining whether it's permitted to split the
filesystem type string passed in to mount(2).  Consequently, this means that we
cannot get rid of the FS_HAS_SUBTYPE flag unless we define that a type string
with a dot in it always indicates a subtype specification.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fs_context.c            | 14 --------------
 fs/fuse/inode.c            |  3 +--
 fs/namespace.c             |  2 --
 fs/proc_namespace.c        |  2 +-
 fs/super.c                 |  5 -----
 include/linux/fs_context.h |  1 -
 6 files changed, 2 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/fs/fs_context.c b/fs/fs_context.c
index 87c2c9687d90..138b5b4d621d 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -504,7 +504,6 @@ void put_fs_context(struct fs_context *fc)
 	put_net(fc->net_ns);
 	put_user_ns(fc->user_ns);
 	put_cred(fc->cred);
-	kfree(fc->subtype);
 	put_fc_log(fc);
 	put_filesystem(fc->fs_type);
 	kfree(fc->source);
@@ -571,17 +570,6 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
 		return 0;
 	}
 
-	if ((fc->fs_type->fs_flags & FS_HAS_SUBTYPE) &&
-	    strcmp(param->key, "subtype") == 0) {
-		if (param->type != fs_value_is_string)
-			return invalf(fc, "VFS: Legacy: Non-string subtype");
-		if (fc->subtype)
-			return invalf(fc, "VFS: Legacy: Multiple subtype");
-		fc->subtype = param->string;
-		param->string = NULL;
-		return 0;
-	}
-
 	if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
 		return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
 
@@ -738,8 +726,6 @@ void vfs_clean_context(struct fs_context *fc)
 	fc->s_fs_info = NULL;
 	fc->sb_flags = 0;
 	security_free_mnt_opts(&fc->security);
-	kfree(fc->subtype);
-	fc->subtype = NULL;
 	kfree(fc->source);
 	fc->source = NULL;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c334f95e799a..2183967261a4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -473,8 +473,7 @@ static const struct fs_parameter_spec fuse_param_specs[] = {
 	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
 	fsparam_u32	("max_read",		OPT_MAX_READ),
 	fsparam_u32	("blksize",		OPT_BLKSIZE),
-	__fsparam(fs_param_is_string, "subtype", OPT_SUBTYPE,
-		  fs_param_v_optional),
+	fsparam_string	("subtype",		OPT_SUBTYPE),
 	{}
 };
 
diff --git a/fs/namespace.c b/fs/namespace.c
index d28d30b13043..105f995543f6 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2768,8 +2768,6 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
 				put_filesystem(type);
 				return -EINVAL;
 			}
-		} else {
-			subtype = "";
 		}
 	}
 
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index e16fb8f2049e..273ee82d8aa9 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -88,7 +88,7 @@ static inline void mangle(struct seq_file *m, const char *s)
 static void show_type(struct seq_file *m, struct super_block *sb)
 {
 	mangle(m, sb->s_type->name);
-	if (sb->s_subtype && sb->s_subtype[0]) {
+	if (sb->s_subtype) {
 		seq_putc(m, '.');
 		mangle(m, sb->s_subtype);
 	}
diff --git a/fs/super.c b/fs/super.c
index da223b4cfbca..a6ceed7bcd89 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1530,11 +1530,6 @@ int vfs_get_tree(struct fs_context *fc)
 	sb = fc->root->d_sb;
 	WARN_ON(!sb->s_bdi);
 
-	if (fc->subtype && !sb->s_subtype) {
-		sb->s_subtype = fc->subtype;
-		fc->subtype = NULL;
-	}
-
 	/*
 	 * Write barrier is for super_cache_count(). We place it before setting
 	 * SB_BORN as the data dependency between the two functions is the
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index 84a5eaa09f19..f6c86d58ea91 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -95,7 +95,6 @@ struct fs_context {
 	const struct cred	*cred;		/* The mounter's credentials */
 	struct fc_log		*log;		/* Logging buffer */
 	const char		*source;	/* The source name (eg. dev path) */
-	const char		*subtype;	/* The subtype to set on the superblock */
 	void			*security;	/* Linux S&M options */
 	void			*s_fs_info;	/* Proposed s_fs_info */
 	unsigned int		sb_flags;	/* Proposed superblock flags (SB_*) */
-- 
cgit v1.2.3


From 78e05972c5e6c8e9ca4c00ccc6985409da69f904 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 5 Sep 2019 16:20:09 +0200
Subject: ipc: fix semtimedop for generic 32-bit architectures

As Vincent noticed, the y2038 conversion of semtimedop in linux-5.1
broke when commit 00bf25d693e7 ("y2038: use time32 syscall names on
32-bit") changed all system calls on all architectures that take
a 32-bit time_t to point to the _time32 implementation, but left out
semtimedop in the asm-generic header.

This affects all 32-bit architectures using asm-generic/unistd.h:
h8300, unicore32, openrisc, nios2, hexagon, c6x, arc, nds32 and csky.

The notable exception is riscv32, which has dropped support for the
time32 system calls entirely.

Reported-by: Vincent Chen <deanbo422@gmail.com>
Cc: stable@vger.kernel.org
Cc: Vincent Chen <deanbo422@gmail.com>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Aurelien Jacquiot <jacquiot.aurelien@gmail.com>
Cc: Guo Ren <guoren@kernel.org>
Fixes: 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/unistd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1be0e798e362..1fc8faa6e973 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -569,7 +569,7 @@ __SYSCALL(__NR_semget, sys_semget)
 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
 #if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32
 #define __NR_semtimedop 192
-__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32)
+__SC_3264(__NR_semtimedop, sys_semtimedop_time32, sys_semtimedop)
 #endif
 #define __NR_semop 193
 __SYSCALL(__NR_semop, sys_semop)
-- 
cgit v1.2.3


From 175fca3bf91a1111b7e46f6655666640556b9059 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@stackframe.org>
Date: Fri, 23 Aug 2019 21:49:13 +0200
Subject: kexec: add KEXEC_ELF

Right now powerpc provides an implementation to read elf files
with the kexec_file_load() syscall. Make that available as a public
kexec interface so it can be re-used on other architectures.

Signed-off-by: Sven Schnelle <svens@stackframe.org>
Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/Kconfig                       |   3 +
 arch/powerpc/Kconfig               |   1 +
 arch/powerpc/kernel/kexec_elf_64.c | 545 +-----------------------------------
 include/linux/kexec.h              |  24 ++
 kernel/Makefile                    |   1 +
 kernel/kexec_elf.c                 | 549 +++++++++++++++++++++++++++++++++++++
 6 files changed, 583 insertions(+), 540 deletions(-)
 create mode 100644 kernel/kexec_elf.c

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index a7b57dd42c26..01244412f393 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -18,6 +18,9 @@ config KEXEC_CORE
 	select CRASH_CORE
 	bool
 
+config KEXEC_ELF
+	bool
+
 config HAVE_IMA_KEXEC
 	bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..3c85ef270338 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -512,6 +512,7 @@ config KEXEC_FILE
 	select KEXEC_CORE
 	select HAVE_IMA_KEXEC
 	select BUILD_BIN2C
+	select KEXEC_ELF
 	depends on PPC64
 	depends on CRYPTO=y
 	depends on CRYPTO_SHA256=y
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 83cf7b852876..3072fd6dbe94 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -23,541 +23,6 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#define PURGATORY_STACK_SIZE	(16 * 1024)
-
-#define elf_addr_to_cpu	elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel		Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
-	/*
-	 * Where the ELF binary contents are kept.
-	 * Memory managed by the user of the struct.
-	 */
-	const char *buffer;
-
-	const struct elfhdr *ehdr;
-	const struct elf_phdr *proghdrs;
-	struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
-       return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
-	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-		value = le64_to_cpu(value);
-	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-		value = be64_to_cpu(value);
-
-	return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
-	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-		value = le16_to_cpu(value);
-	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-		value = be16_to_cpu(value);
-
-	return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
-	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
-		value = le32_to_cpu(value);
-	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
-		value = be32_to_cpu(value);
-
-	return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len:	size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
-	if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
-		pr_debug("Bad program header size.\n");
-		return false;
-	} else if (ehdr->e_shnum > 0 &&
-		   ehdr->e_shentsize != sizeof(struct elf_shdr)) {
-		pr_debug("Bad section header size.\n");
-		return false;
-	} else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
-		   ehdr->e_version != EV_CURRENT) {
-		pr_debug("Unknown ELF version.\n");
-		return false;
-	}
-
-	if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
-		size_t phdr_size;
-
-		/*
-		 * e_phnum is at most 65535 so calculating the size of the
-		 * program header cannot overflow.
-		 */
-		phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
-		/* Sanity check the program header table location. */
-		if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
-			pr_debug("Program headers at invalid location.\n");
-			return false;
-		} else if (ehdr->e_phoff + phdr_size > buf_len) {
-			pr_debug("Program headers truncated.\n");
-			return false;
-		}
-	}
-
-	if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
-		size_t shdr_size;
-
-		/*
-		 * e_shnum is at most 65536 so calculating
-		 * the size of the section header cannot overflow.
-		 */
-		shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
-		/* Sanity check the section header table location. */
-		if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
-			pr_debug("Section headers at invalid location.\n");
-			return false;
-		} else if (ehdr->e_shoff + shdr_size > buf_len) {
-			pr_debug("Section headers truncated.\n");
-			return false;
-		}
-	}
-
-	return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
-	struct elfhdr *buf_ehdr;
-
-	if (len < sizeof(*buf_ehdr)) {
-		pr_debug("Buffer is too small to hold ELF header.\n");
-		return -ENOEXEC;
-	}
-
-	memset(ehdr, 0, sizeof(*ehdr));
-	memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
-	if (!elf_is_elf_file(ehdr)) {
-		pr_debug("No ELF header magic.\n");
-		return -ENOEXEC;
-	}
-
-	if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
-		pr_debug("Not a supported ELF class.\n");
-		return -ENOEXEC;
-	} else  if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
-		ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
-		pr_debug("Not a supported ELF data format.\n");
-		return -ENOEXEC;
-	}
-
-	buf_ehdr = (struct elfhdr *) buf;
-	if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
-		pr_debug("Bad ELF header size.\n");
-		return -ENOEXEC;
-	}
-
-	ehdr->e_type      = elf16_to_cpu(ehdr, buf_ehdr->e_type);
-	ehdr->e_machine   = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
-	ehdr->e_version   = elf32_to_cpu(ehdr, buf_ehdr->e_version);
-	ehdr->e_entry     = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
-	ehdr->e_phoff     = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
-	ehdr->e_shoff     = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
-	ehdr->e_flags     = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
-	ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
-	ehdr->e_phnum     = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
-	ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
-	ehdr->e_shnum     = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
-	ehdr->e_shstrndx  = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
-	return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len:	size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
-	if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
-		pr_debug("ELF segment location wraps around.\n");
-		return false;
-	} else if (phdr->p_offset + phdr->p_filesz > buf_len) {
-		pr_debug("ELF segment not in file.\n");
-		return false;
-	} else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
-		pr_debug("ELF segment address wraps around.\n");
-		return false;
-	}
-
-	return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
-			 int idx)
-{
-	/* Override the const in proghdrs, we are the ones doing the loading. */
-	struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
-	const char *pbuf;
-	struct elf_phdr *buf_phdr;
-
-	pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
-	buf_phdr = (struct elf_phdr *) pbuf;
-
-	phdr->p_type   = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
-	phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
-	phdr->p_paddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
-	phdr->p_vaddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
-	phdr->p_flags  = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
-	/*
-	 * The following fields have a type equivalent to Elf_Addr
-	 * both in 32 bit and 64 bit ELF.
-	 */
-	phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
-	phdr->p_memsz  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
-	phdr->p_align  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
-	return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
-			  struct elf_info *elf_info)
-{
-	size_t phdr_size, i;
-	const struct elfhdr *ehdr = elf_info->ehdr;
-
-	/*
-	 * e_phnum is at most 65535 so calculating the size of the
-	 * program header cannot overflow.
-	 */
-	phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
-	elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
-	if (!elf_info->proghdrs)
-		return -ENOMEM;
-
-	for (i = 0; i < ehdr->e_phnum; i++) {
-		int ret;
-
-		ret = elf_read_phdr(buf, len, elf_info, i);
-		if (ret) {
-			kfree(elf_info->proghdrs);
-			elf_info->proghdrs = NULL;
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len:	size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
-	bool size_ok;
-
-	/* SHT_NULL headers have undefined values, so we can't check them. */
-	if (shdr->sh_type == SHT_NULL)
-		return true;
-
-	/* Now verify sh_entsize */
-	switch (shdr->sh_type) {
-	case SHT_SYMTAB:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
-		break;
-	case SHT_RELA:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
-		break;
-	case SHT_DYNAMIC:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
-		break;
-	case SHT_REL:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
-		break;
-	case SHT_NOTE:
-	case SHT_PROGBITS:
-	case SHT_HASH:
-	case SHT_NOBITS:
-	default:
-		/*
-		 * This is a section whose entsize requirements
-		 * I don't care about.  If I don't know about
-		 * the section I can't care about it's entsize
-		 * requirements.
-		 */
-		size_ok = true;
-		break;
-	}
-
-	if (!size_ok) {
-		pr_debug("ELF section with wrong entry size.\n");
-		return false;
-	} else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
-		pr_debug("ELF section address wraps around.\n");
-		return false;
-	}
-
-	if (shdr->sh_type != SHT_NOBITS) {
-		if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
-			pr_debug("ELF section location wraps around.\n");
-			return false;
-		} else if (shdr->sh_offset + shdr->sh_size > buf_len) {
-			pr_debug("ELF section not in file.\n");
-			return false;
-		}
-	}
-
-	return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
-			 int idx)
-{
-	struct elf_shdr *shdr = &elf_info->sechdrs[idx];
-	const struct elfhdr *ehdr = elf_info->ehdr;
-	const char *sbuf;
-	struct elf_shdr *buf_shdr;
-
-	sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
-	buf_shdr = (struct elf_shdr *) sbuf;
-
-	shdr->sh_name      = elf32_to_cpu(ehdr, buf_shdr->sh_name);
-	shdr->sh_type      = elf32_to_cpu(ehdr, buf_shdr->sh_type);
-	shdr->sh_addr      = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
-	shdr->sh_offset    = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
-	shdr->sh_link      = elf32_to_cpu(ehdr, buf_shdr->sh_link);
-	shdr->sh_info      = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
-	/*
-	 * The following fields have a type equivalent to Elf_Addr
-	 * both in 32 bit and 64 bit ELF.
-	 */
-	shdr->sh_flags     = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
-	shdr->sh_size      = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
-	shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
-	shdr->sh_entsize   = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
-	return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
-			  struct elf_info *elf_info)
-{
-	size_t shdr_size, i;
-
-	/*
-	 * e_shnum is at most 65536 so calculating
-	 * the size of the section header cannot overflow.
-	 */
-	shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
-	elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
-	if (!elf_info->sechdrs)
-		return -ENOMEM;
-
-	for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
-		int ret;
-
-		ret = elf_read_shdr(buf, len, elf_info, i);
-		if (ret) {
-			kfree(elf_info->sechdrs);
-			elf_info->sechdrs = NULL;
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf:	Buffer to read ELF file from.
- * @len:	Size of @buf.
- * @ehdr:	Pointer to existing struct which will be populated.
- * @elf_info:	Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
-			 struct elf_info *elf_info)
-{
-	int ret;
-
-	ret = elf_read_ehdr(buf, len, ehdr);
-	if (ret)
-		return ret;
-
-	elf_info->buffer = buf;
-	elf_info->ehdr = ehdr;
-	if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
-		ret = elf_read_phdrs(buf, len, elf_info);
-		if (ret)
-			return ret;
-	}
-	if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
-		ret = elf_read_shdrs(buf, len, elf_info);
-		if (ret) {
-			kfree(elf_info->proghdrs);
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
-	kfree(elf_info->proghdrs);
-	kfree(elf_info->sechdrs);
-	memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
-			       struct elf_info *elf_info)
-{
-	int i;
-	int ret;
-
-	ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
-	if (ret)
-		return ret;
-
-	/* Big endian vmlinux has type ET_DYN. */
-	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
-		pr_err("Not an ELF executable.\n");
-		goto error;
-	} else if (!elf_info->proghdrs) {
-		pr_err("No ELF program header.\n");
-		goto error;
-	}
-
-	for (i = 0; i < ehdr->e_phnum; i++) {
-		/*
-		 * Kexec does not support loading interpreters.
-		 * In addition this check keeps us from attempting
-		 * to kexec ordinay executables.
-		 */
-		if (elf_info->proghdrs[i].p_type == PT_INTERP) {
-			pr_err("Requires an ELF interpreter.\n");
-			goto error;
-		}
-	}
-
-	return 0;
-error:
-	elf_free_info(elf_info);
-	return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
-	struct elfhdr ehdr;
-	struct elf_info elf_info;
-	int ret;
-
-	ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
-	if (ret)
-		return ret;
-
-	elf_free_info(&elf_info);
-
-	return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr:	On return, will be the address where the first PT_LOAD
- *			section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
-			 struct elf_info *elf_info,
-			 unsigned long *lowest_load_addr)
-{
-	unsigned long base = 0, lowest_addr = UINT_MAX;
-	int ret;
-	size_t i;
-	struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
-				  .top_down = false };
-
-	/* Read in the PT_LOAD segments. */
-	for (i = 0; i < ehdr->e_phnum; i++) {
-		unsigned long load_addr;
-		size_t size;
-		const struct elf_phdr *phdr;
-
-		phdr = &elf_info->proghdrs[i];
-		if (phdr->p_type != PT_LOAD)
-			continue;
-
-		size = phdr->p_filesz;
-		if (size > phdr->p_memsz)
-			size = phdr->p_memsz;
-
-		kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
-		kbuf.bufsz = size;
-		kbuf.memsz = phdr->p_memsz;
-		kbuf.buf_align = phdr->p_align;
-		kbuf.buf_min = phdr->p_paddr + base;
-		kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
-		ret = kexec_add_buffer(&kbuf);
-		if (ret)
-			goto out;
-		load_addr = kbuf.mem;
-
-		if (load_addr < lowest_addr)
-			lowest_addr = load_addr;
-	}
-
-	/* Update entry point to reflect new load address. */
-	ehdr->e_entry += base;
-
-	*lowest_load_addr = lowest_addr;
-	ret = 0;
- out:
-	return ret;
-}
-
 static void *elf64_load(struct kimage *image, char *kernel_buf,
 			unsigned long kernel_len, char *initrd,
 			unsigned long initrd_len, char *cmdline,
@@ -570,18 +35,18 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 	void *fdt;
 	const void *slave_code;
 	struct elfhdr ehdr;
-	struct elf_info elf_info;
+	struct kexec_elf_info elf_info;
 	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 				  .buf_max = ppc64_rma_size };
 	struct kexec_buf pbuf = { .image = image, .buf_min = 0,
 				  .buf_max = ppc64_rma_size, .top_down = true,
 				  .mem = KEXEC_BUF_MEM_UNKNOWN };
 
-	ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+	ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
 	if (ret)
 		goto out;
 
-	ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
 	if (ret)
 		goto out;
 
@@ -648,13 +113,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
 		pr_err("Error setting up the purgatory.\n");
 
 out:
-	elf_free_info(&elf_info);
+	kexec_free_elf_info(&elf_info);
 
 	/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
 	return ret ? ERR_PTR(ret) : fdt;
 }
 
 const struct kexec_file_ops kexec_elf64_ops = {
-	.probe = elf64_probe,
+	.probe = kexec_elf_probe,
 	.load = elf64_load,
 };
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index b9b1bc5f9669..da2a6b1d69e7 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -216,6 +216,30 @@ extern int crash_prepare_elf64_headers(struct crash_mem *mem, int kernel_map,
 				       void **addr, unsigned long *sz);
 #endif /* CONFIG_KEXEC_FILE */
 
+#ifdef CONFIG_KEXEC_ELF
+struct kexec_elf_info {
+	/*
+	 * Where the ELF binary contents are kept.
+	 * Memory managed by the user of the struct.
+	 */
+	const char *buffer;
+
+	const struct elfhdr *ehdr;
+	const struct elf_phdr *proghdrs;
+	struct elf_shdr *sechdrs;
+};
+
+int kexec_build_elf_info(const char *buf, size_t len, struct elfhdr *ehdr,
+			       struct kexec_elf_info *elf_info);
+
+int kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+			 struct kexec_elf_info *elf_info,
+			 struct kexec_buf *kbuf,
+			 unsigned long *lowest_load_addr);
+
+void kexec_free_elf_info(struct kexec_elf_info *elf_info);
+int kexec_elf_probe(const char *buf, unsigned long len);
+#endif
 struct kimage {
 	kimage_entry_t head;
 	kimage_entry_t *entry;
diff --git a/kernel/Makefile b/kernel/Makefile
index a8d923b5481b..002220b7ca7c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -64,6 +64,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
 obj-$(CONFIG_KEXEC_CORE) += kexec_core.o
 obj-$(CONFIG_KEXEC) += kexec.o
 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
+obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
 obj-$(CONFIG_COMPAT) += compat.o
 obj-$(CONFIG_CGROUPS) += cgroup/
diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
new file mode 100644
index 000000000000..26c6310167a0
--- /dev/null
+++ b/kernel/kexec_elf.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt)	"kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define PURGATORY_STACK_SIZE	(16 * 1024)
+
+#define elf_addr_to_cpu	elf64_to_cpu
+
+#ifndef Elf_Rel
+#define Elf_Rel		Elf64_Rel
+#endif /* Elf_Rel */
+
+static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
+{
+	return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
+}
+
+static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
+{
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+		value = le64_to_cpu(value);
+	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+		value = be64_to_cpu(value);
+
+	return value;
+}
+
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+{
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+		value = le16_to_cpu(value);
+	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+		value = be16_to_cpu(value);
+
+	return value;
+}
+
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+{
+	if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+		value = le32_to_cpu(value);
+	else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+		value = be32_to_cpu(value);
+
+	return value;
+}
+
+/**
+ * elf_is_ehdr_sane - check that it is safe to use the ELF header
+ * @buf_len:	size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
+{
+	if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
+		pr_debug("Bad program header size.\n");
+		return false;
+	} else if (ehdr->e_shnum > 0 &&
+		   ehdr->e_shentsize != sizeof(struct elf_shdr)) {
+		pr_debug("Bad section header size.\n");
+		return false;
+	} else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
+		   ehdr->e_version != EV_CURRENT) {
+		pr_debug("Unknown ELF version.\n");
+		return false;
+	}
+
+	if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+		size_t phdr_size;
+
+		/*
+		 * e_phnum is at most 65535 so calculating the size of the
+		 * program header cannot overflow.
+		 */
+		phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+		/* Sanity check the program header table location. */
+		if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
+			pr_debug("Program headers at invalid location.\n");
+			return false;
+		} else if (ehdr->e_phoff + phdr_size > buf_len) {
+			pr_debug("Program headers truncated.\n");
+			return false;
+		}
+	}
+
+	if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+		size_t shdr_size;
+
+		/*
+		 * e_shnum is at most 65536 so calculating
+		 * the size of the section header cannot overflow.
+		 */
+		shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
+
+		/* Sanity check the section header table location. */
+		if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
+			pr_debug("Section headers at invalid location.\n");
+			return false;
+		} else if (ehdr->e_shoff + shdr_size > buf_len) {
+			pr_debug("Section headers truncated.\n");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
+{
+	struct elfhdr *buf_ehdr;
+
+	if (len < sizeof(*buf_ehdr)) {
+		pr_debug("Buffer is too small to hold ELF header.\n");
+		return -ENOEXEC;
+	}
+
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
+	if (!elf_is_elf_file(ehdr)) {
+		pr_debug("No ELF header magic.\n");
+		return -ENOEXEC;
+	}
+
+	if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
+		pr_debug("Not a supported ELF class.\n");
+		return -ENOEXEC;
+	} else  if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
+		ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+		pr_debug("Not a supported ELF data format.\n");
+		return -ENOEXEC;
+	}
+
+	buf_ehdr = (struct elfhdr *) buf;
+	if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
+		pr_debug("Bad ELF header size.\n");
+		return -ENOEXEC;
+	}
+
+	ehdr->e_type      = elf16_to_cpu(ehdr, buf_ehdr->e_type);
+	ehdr->e_machine   = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
+	ehdr->e_version   = elf32_to_cpu(ehdr, buf_ehdr->e_version);
+	ehdr->e_entry     = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
+	ehdr->e_phoff     = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
+	ehdr->e_shoff     = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
+	ehdr->e_flags     = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
+	ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
+	ehdr->e_phnum     = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
+	ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
+	ehdr->e_shnum     = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
+	ehdr->e_shstrndx  = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
+
+	return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_is_phdr_sane - check that it is safe to use the program header
+ * @buf_len:	size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
+{
+
+	if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
+		pr_debug("ELF segment location wraps around.\n");
+		return false;
+	} else if (phdr->p_offset + phdr->p_filesz > buf_len) {
+		pr_debug("ELF segment not in file.\n");
+		return false;
+	} else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
+		pr_debug("ELF segment address wraps around.\n");
+		return false;
+	}
+
+	return true;
+}
+
+static int elf_read_phdr(const char *buf, size_t len,
+			 struct kexec_elf_info *elf_info,
+			 int idx)
+{
+	/* Override the const in proghdrs, we are the ones doing the loading. */
+	struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
+	const char *pbuf;
+	struct elf_phdr *buf_phdr;
+
+	pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
+	buf_phdr = (struct elf_phdr *) pbuf;
+
+	phdr->p_type   = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
+	phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
+	phdr->p_paddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
+	phdr->p_vaddr  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
+	phdr->p_flags  = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
+
+	/*
+	 * The following fields have a type equivalent to Elf_Addr
+	 * both in 32 bit and 64 bit ELF.
+	 */
+	phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
+	phdr->p_memsz  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
+	phdr->p_align  = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+
+	return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_phdrs - read the program headers from the buffer
+ *
+ * This function assumes that the program header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_phdrs(const char *buf, size_t len,
+			  struct kexec_elf_info *elf_info)
+{
+	size_t phdr_size, i;
+	const struct elfhdr *ehdr = elf_info->ehdr;
+
+	/*
+	 * e_phnum is at most 65535 so calculating the size of the
+	 * program header cannot overflow.
+	 */
+	phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+	elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
+	if (!elf_info->proghdrs)
+		return -ENOMEM;
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		int ret;
+
+		ret = elf_read_phdr(buf, len, elf_info, i);
+		if (ret) {
+			kfree(elf_info->proghdrs);
+			elf_info->proghdrs = NULL;
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * elf_is_shdr_sane - check that it is safe to use the section header
+ * @buf_len:	size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
+{
+	bool size_ok;
+
+	/* SHT_NULL headers have undefined values, so we can't check them. */
+	if (shdr->sh_type == SHT_NULL)
+		return true;
+
+	/* Now verify sh_entsize */
+	switch (shdr->sh_type) {
+	case SHT_SYMTAB:
+		size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
+		break;
+	case SHT_RELA:
+		size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
+		break;
+	case SHT_DYNAMIC:
+		size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
+		break;
+	case SHT_REL:
+		size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
+		break;
+	case SHT_NOTE:
+	case SHT_PROGBITS:
+	case SHT_HASH:
+	case SHT_NOBITS:
+	default:
+		/*
+		 * This is a section whose entsize requirements
+		 * I don't care about.  If I don't know about
+		 * the section I can't care about it's entsize
+		 * requirements.
+		 */
+		size_ok = true;
+		break;
+	}
+
+	if (!size_ok) {
+		pr_debug("ELF section with wrong entry size.\n");
+		return false;
+	} else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
+		pr_debug("ELF section address wraps around.\n");
+		return false;
+	}
+
+	if (shdr->sh_type != SHT_NOBITS) {
+		if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
+			pr_debug("ELF section location wraps around.\n");
+			return false;
+		} else if (shdr->sh_offset + shdr->sh_size > buf_len) {
+			pr_debug("ELF section not in file.\n");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+static int elf_read_shdr(const char *buf, size_t len,
+			 struct kexec_elf_info *elf_info,
+			 int idx)
+{
+	struct elf_shdr *shdr = &elf_info->sechdrs[idx];
+	const struct elfhdr *ehdr = elf_info->ehdr;
+	const char *sbuf;
+	struct elf_shdr *buf_shdr;
+
+	sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
+	buf_shdr = (struct elf_shdr *) sbuf;
+
+	shdr->sh_name      = elf32_to_cpu(ehdr, buf_shdr->sh_name);
+	shdr->sh_type      = elf32_to_cpu(ehdr, buf_shdr->sh_type);
+	shdr->sh_addr      = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
+	shdr->sh_offset    = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
+	shdr->sh_link      = elf32_to_cpu(ehdr, buf_shdr->sh_link);
+	shdr->sh_info      = elf32_to_cpu(ehdr, buf_shdr->sh_info);
+
+	/*
+	 * The following fields have a type equivalent to Elf_Addr
+	 * both in 32 bit and 64 bit ELF.
+	 */
+	shdr->sh_flags     = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
+	shdr->sh_size      = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
+	shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
+	shdr->sh_entsize   = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
+
+	return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_shdrs - read the section headers from the buffer
+ *
+ * This function assumes that the section header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_shdrs(const char *buf, size_t len,
+			  struct kexec_elf_info *elf_info)
+{
+	size_t shdr_size, i;
+
+	/*
+	 * e_shnum is at most 65536 so calculating
+	 * the size of the section header cannot overflow.
+	 */
+	shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
+
+	elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
+	if (!elf_info->sechdrs)
+		return -ENOMEM;
+
+	for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
+		int ret;
+
+		ret = elf_read_shdr(buf, len, elf_info, i);
+		if (ret) {
+			kfree(elf_info->sechdrs);
+			elf_info->sechdrs = NULL;
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
+ * @buf:	Buffer to read ELF file from.
+ * @len:	Size of @buf.
+ * @ehdr:	Pointer to existing struct which will be populated.
+ * @elf_info:	Pointer to existing struct which will be populated.
+ *
+ * This function allows reading ELF files with different byte order than
+ * the kernel, byte-swapping the fields as needed.
+ *
+ * Return:
+ * On success returns 0, and the caller should call
+ * kexec_free_elf_info(elf_info) to free the memory allocated for the section
+ * and program headers.
+ */
+static int elf_read_from_buffer(const char *buf, size_t len,
+				struct elfhdr *ehdr,
+				struct kexec_elf_info *elf_info)
+{
+	int ret;
+
+	ret = elf_read_ehdr(buf, len, ehdr);
+	if (ret)
+		return ret;
+
+	elf_info->buffer = buf;
+	elf_info->ehdr = ehdr;
+	if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+		ret = elf_read_phdrs(buf, len, elf_info);
+		if (ret)
+			return ret;
+	}
+	if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+		ret = elf_read_shdrs(buf, len, elf_info);
+		if (ret) {
+			kfree(elf_info->proghdrs);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kexec_free_elf_info - free memory allocated by elf_read_from_buffer
+ */
+void kexec_free_elf_info(struct kexec_elf_info *elf_info)
+{
+	kfree(elf_info->proghdrs);
+	kfree(elf_info->sechdrs);
+	memset(elf_info, 0, sizeof(*elf_info));
+}
+/**
+ * kexec_build_elf_info - read ELF executable and check that we can use it
+ */
+int kexec_build_elf_info(const char *buf, size_t len, struct elfhdr *ehdr,
+			       struct kexec_elf_info *elf_info)
+{
+	int i;
+	int ret;
+
+	ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
+	if (ret)
+		return ret;
+
+	/* Big endian vmlinux has type ET_DYN. */
+	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
+		pr_err("Not an ELF executable.\n");
+		goto error;
+	} else if (!elf_info->proghdrs) {
+		pr_err("No ELF program header.\n");
+		goto error;
+	}
+
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		/*
+		 * Kexec does not support loading interpreters.
+		 * In addition this check keeps us from attempting
+		 * to kexec ordinay executables.
+		 */
+		if (elf_info->proghdrs[i].p_type == PT_INTERP) {
+			pr_err("Requires an ELF interpreter.\n");
+			goto error;
+		}
+	}
+
+	return 0;
+error:
+	kexec_free_elf_info(elf_info);
+	return -ENOEXEC;
+}
+
+
+int kexec_elf_probe(const char *buf, unsigned long len)
+{
+	struct elfhdr ehdr;
+	struct kexec_elf_info elf_info;
+	int ret;
+
+	ret = kexec_build_elf_info(buf, len, &ehdr, &elf_info);
+	if (ret)
+		return ret;
+
+	kexec_free_elf_info(&elf_info);
+
+	return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
+}
+
+/**
+ * kexec_elf_load - load ELF executable image
+ * @lowest_load_addr:	On return, will be the address where the first PT_LOAD
+ *			section will be loaded in memory.
+ *
+ * Return:
+ * 0 on success, negative value on failure.
+ */
+int kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
+			 struct kexec_elf_info *elf_info,
+			 struct kexec_buf *kbuf,
+			 unsigned long *lowest_load_addr)
+{
+	unsigned long base = 0, lowest_addr = UINT_MAX;
+	int ret;
+	size_t i;
+
+	/* Read in the PT_LOAD segments. */
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		unsigned long load_addr;
+		size_t size;
+		const struct elf_phdr *phdr;
+
+		phdr = &elf_info->proghdrs[i];
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		size = phdr->p_filesz;
+		if (size > phdr->p_memsz)
+			size = phdr->p_memsz;
+
+		kbuf->buffer = (void *) elf_info->buffer + phdr->p_offset;
+		kbuf->bufsz = size;
+		kbuf->memsz = phdr->p_memsz;
+		kbuf->buf_align = phdr->p_align;
+		kbuf->buf_min = phdr->p_paddr + base;
+		kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+		ret = kexec_add_buffer(kbuf);
+		if (ret)
+			goto out;
+		load_addr = kbuf->mem;
+
+		if (load_addr < lowest_addr)
+			lowest_addr = load_addr;
+	}
+
+	/* Update entry point to reflect new load address. */
+	ehdr->e_entry += base;
+
+	*lowest_load_addr = lowest_addr;
+	ret = 0;
+ out:
+	return ret;
+}
-- 
cgit v1.2.3


From 5f71d977206f3b2990ba304766ddaa9e81dbe700 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@stackframe.org>
Date: Fri, 23 Aug 2019 21:49:15 +0200
Subject: kexec_elf: remove parsing of section headers

We're not using them, so we can drop the parsing.

Signed-off-by: Sven Schnelle <svens@stackframe.org>
Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/kexec.h |   1 -
 kernel/kexec_elf.c    | 137 --------------------------------------------------
 2 files changed, 138 deletions(-)

(limited to 'include')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index da2a6b1d69e7..f0b809258ed3 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -226,7 +226,6 @@ struct kexec_elf_info {
 
 	const struct elfhdr *ehdr;
 	const struct elf_phdr *proghdrs;
-	struct elf_shdr *sechdrs;
 };
 
 int kexec_build_elf_info(const char *buf, size_t len, struct elfhdr *ehdr,
diff --git a/kernel/kexec_elf.c b/kernel/kexec_elf.c
index 34376fbc55be..137037603117 100644
--- a/kernel/kexec_elf.c
+++ b/kernel/kexec_elf.c
@@ -257,134 +257,6 @@ static int elf_read_phdrs(const char *buf, size_t len,
 	return 0;
 }
 
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len:	size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
-	bool size_ok;
-
-	/* SHT_NULL headers have undefined values, so we can't check them. */
-	if (shdr->sh_type == SHT_NULL)
-		return true;
-
-	/* Now verify sh_entsize */
-	switch (shdr->sh_type) {
-	case SHT_SYMTAB:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
-		break;
-	case SHT_RELA:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
-		break;
-	case SHT_DYNAMIC:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
-		break;
-	case SHT_REL:
-		size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
-		break;
-	case SHT_NOTE:
-	case SHT_PROGBITS:
-	case SHT_HASH:
-	case SHT_NOBITS:
-	default:
-		/*
-		 * This is a section whose entsize requirements
-		 * I don't care about.  If I don't know about
-		 * the section I can't care about it's entsize
-		 * requirements.
-		 */
-		size_ok = true;
-		break;
-	}
-
-	if (!size_ok) {
-		pr_debug("ELF section with wrong entry size.\n");
-		return false;
-	} else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
-		pr_debug("ELF section address wraps around.\n");
-		return false;
-	}
-
-	if (shdr->sh_type != SHT_NOBITS) {
-		if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
-			pr_debug("ELF section location wraps around.\n");
-			return false;
-		} else if (shdr->sh_offset + shdr->sh_size > buf_len) {
-			pr_debug("ELF section not in file.\n");
-			return false;
-		}
-	}
-
-	return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len,
-			 struct kexec_elf_info *elf_info,
-			 int idx)
-{
-	struct elf_shdr *shdr = &elf_info->sechdrs[idx];
-	const struct elfhdr *ehdr = elf_info->ehdr;
-	const char *sbuf;
-	struct elf_shdr *buf_shdr;
-
-	sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
-	buf_shdr = (struct elf_shdr *) sbuf;
-
-	shdr->sh_name      = elf32_to_cpu(ehdr, buf_shdr->sh_name);
-	shdr->sh_type      = elf32_to_cpu(ehdr, buf_shdr->sh_type);
-	shdr->sh_addr      = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
-	shdr->sh_offset    = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
-	shdr->sh_link      = elf32_to_cpu(ehdr, buf_shdr->sh_link);
-	shdr->sh_info      = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
-	/*
-	 * The following fields have a type equivalent to Elf_Addr
-	 * both in 32 bit and 64 bit ELF.
-	 */
-	shdr->sh_flags     = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
-	shdr->sh_size      = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
-	shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
-	shdr->sh_entsize   = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
-	return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
-			  struct kexec_elf_info *elf_info)
-{
-	size_t shdr_size, i;
-
-	/*
-	 * e_shnum is at most 65536 so calculating
-	 * the size of the section header cannot overflow.
-	 */
-	shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
-	elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
-	if (!elf_info->sechdrs)
-		return -ENOMEM;
-
-	for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
-		int ret;
-
-		ret = elf_read_shdr(buf, len, elf_info, i);
-		if (ret) {
-			kfree(elf_info->sechdrs);
-			elf_info->sechdrs = NULL;
-			return ret;
-		}
-	}
-
-	return 0;
-}
-
 /**
  * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
  * @buf:	Buffer to read ELF file from.
@@ -417,14 +289,6 @@ static int elf_read_from_buffer(const char *buf, size_t len,
 		if (ret)
 			return ret;
 	}
-	if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
-		ret = elf_read_shdrs(buf, len, elf_info);
-		if (ret) {
-			kfree(elf_info->proghdrs);
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -434,7 +298,6 @@ static int elf_read_from_buffer(const char *buf, size_t len,
 void kexec_free_elf_info(struct kexec_elf_info *elf_info)
 {
 	kfree(elf_info->proghdrs);
-	kfree(elf_info->sechdrs);
 	memset(elf_info, 0, sizeof(*elf_info));
 }
 /**
-- 
cgit v1.2.3


From d3d04f6c330a60ce7170a1076b06f31c77ba7873 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Sun, 25 Aug 2019 23:48:48 +0930
Subject: clk: Add support for AST2600 SoC

The ast2600 is a new BMC SoC from ASPEED. It contains many more clocks
than the previous iterations, so support is broken out into it's own
driver.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Link: https://lkml.kernel.org/r/20190825141848.17346-3-joel@jms.id.au
[sboyd@kernel.org: Mark arrays const]
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/Makefile                      |   1 +
 drivers/clk/clk-ast2600.c                 | 704 ++++++++++++++++++++++++++++++
 include/dt-bindings/clock/ast2600-clock.h | 113 +++++
 3 files changed, 818 insertions(+)
 create mode 100644 drivers/clk/clk-ast2600.c
 create mode 100644 include/dt-bindings/clock/ast2600-clock.h

(limited to 'include')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 0cad76021297..0138fb14e6f8 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_ARCH_EFM32)		+= clk-efm32gg.o
 obj-$(CONFIG_COMMON_CLK_FIXED_MMIO)	+= clk-fixed-mmio.o
 obj-$(CONFIG_COMMON_CLK_GEMINI)		+= clk-gemini.o
 obj-$(CONFIG_COMMON_CLK_ASPEED)		+= clk-aspeed.o
+obj-$(CONFIG_MACH_ASPEED_G6)		+= clk-ast2600.o
 obj-$(CONFIG_ARCH_HIGHBANK)		+= clk-highbank.o
 obj-$(CONFIG_CLK_HSDK)			+= clk-hsdk-pll.o
 obj-$(CONFIG_COMMON_CLK_LOCHNAGAR)	+= clk-lochnagar.o
diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
new file mode 100644
index 000000000000..1c1bb39bb04e
--- /dev/null
+++ b/drivers/clk/clk-ast2600.c
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright IBM Corp
+// Copyright ASPEED Technology
+
+#define pr_fmt(fmt) "clk-ast2600: " fmt
+
+#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/clock/ast2600-clock.h>
+
+#include "clk-aspeed.h"
+
+#define ASPEED_G6_NUM_CLKS		67
+
+#define ASPEED_G6_SILICON_REV		0x004
+
+#define ASPEED_G6_RESET_CTRL		0x040
+#define ASPEED_G6_RESET_CTRL2		0x050
+
+#define ASPEED_G6_CLK_STOP_CTRL		0x080
+#define ASPEED_G6_CLK_STOP_CTRL2	0x090
+
+#define ASPEED_G6_MISC_CTRL		0x0C0
+#define  UART_DIV13_EN			BIT(12)
+
+#define ASPEED_G6_CLK_SELECTION1	0x300
+#define ASPEED_G6_CLK_SELECTION2	0x304
+#define ASPEED_G6_CLK_SELECTION4	0x310
+
+#define ASPEED_HPLL_PARAM		0x200
+#define ASPEED_APLL_PARAM		0x210
+#define ASPEED_MPLL_PARAM		0x220
+#define ASPEED_EPLL_PARAM		0x240
+#define ASPEED_DPLL_PARAM		0x260
+
+#define ASPEED_G6_STRAP1		0x500
+
+/* Globally visible clocks */
+static DEFINE_SPINLOCK(aspeed_g6_clk_lock);
+
+/* Keeps track of all clocks */
+static struct clk_hw_onecell_data *aspeed_g6_clk_data;
+
+static void __iomem *scu_g6_base;
+
+/*
+ * Clocks marked with CLK_IS_CRITICAL:
+ *
+ *  ref0 and ref1 are essential for the SoC to operate
+ *  mpll is required if SDRAM is used
+ */
+static const struct aspeed_gate_data aspeed_g6_gates[] = {
+	/*				    clk rst  name		parent	 flags */
+	[ASPEED_CLK_GATE_MCLK]		= {  0, -1, "mclk-gate",	"mpll",	 CLK_IS_CRITICAL }, /* SDRAM */
+	[ASPEED_CLK_GATE_ECLK]		= {  1, -1, "eclk-gate",	"eclk",	 0 },	/* Video Engine */
+	[ASPEED_CLK_GATE_GCLK]		= {  2,  7, "gclk-gate",	NULL,	 0 },	/* 2D engine */
+	/* vclk parent - dclk/d1clk/hclk/mclk */
+	[ASPEED_CLK_GATE_VCLK]		= {  3,  6, "vclk-gate",	NULL,	 0 },	/* Video Capture */
+	[ASPEED_CLK_GATE_BCLK]		= {  4,  8, "bclk-gate",	"bclk",	 0 }, /* PCIe/PCI */
+	/* From dpll */
+	[ASPEED_CLK_GATE_DCLK]		= {  5, -1, "dclk-gate",	NULL,	 CLK_IS_CRITICAL }, /* DAC */
+	[ASPEED_CLK_GATE_REF0CLK]	= {  6, -1, "ref0clk-gate",	"clkin", CLK_IS_CRITICAL },
+	[ASPEED_CLK_GATE_USBPORT2CLK]	= {  7,  3, "usb-port2-gate",	NULL,	 0 },	/* USB2.0 Host port 2 */
+	/* Reserved 8 */
+	[ASPEED_CLK_GATE_USBUHCICLK]	= {  9, 15, "usb-uhci-gate",	NULL,	 0 },	/* USB1.1 (requires port 2 enabled) */
+	/* From dpll/epll/40mhz usb p1 phy/gpioc6/dp phy pll */
+	[ASPEED_CLK_GATE_D1CLK]		= { 10, 13, "d1clk-gate",	"d1clk", 0 },	/* GFX CRT */
+	/* Reserved 11/12 */
+	[ASPEED_CLK_GATE_YCLK]		= { 13,  4, "yclk-gate",	NULL,	 0 },	/* HAC */
+	[ASPEED_CLK_GATE_USBPORT1CLK]	= { 14, 14, "usb-port1-gate",	NULL,	 0 },	/* USB2 hub/USB2 host port 1/USB1.1 dev */
+	[ASPEED_CLK_GATE_UART5CLK]	= { 15, -1, "uart5clk-gate",	"uart",	 0 },	/* UART5 */
+	/* Reserved 16/19 */
+	[ASPEED_CLK_GATE_MAC1CLK]	= { 20, 11, "mac1clk-gate",	"mac12", 0 },	/* MAC1 */
+	[ASPEED_CLK_GATE_MAC2CLK]	= { 21, 12, "mac2clk-gate",	"mac12", 0 },	/* MAC2 */
+	/* Reserved 22/23 */
+	[ASPEED_CLK_GATE_RSACLK]	= { 24,  4, "rsaclk-gate",	NULL,	 0 },	/* HAC */
+	[ASPEED_CLK_GATE_RVASCLK]	= { 25,  9, "rvasclk-gate",	NULL,	 0 },	/* RVAS */
+	/* Reserved 26 */
+	[ASPEED_CLK_GATE_EMMCCLK]	= { 27, 16, "emmcclk-gate",	NULL,	 0 },	/* For card clk */
+	/* Reserved 28/29/30 */
+	[ASPEED_CLK_GATE_LCLK]		= { 32, 32, "lclk-gate",	NULL,	 0 }, /* LPC */
+	[ASPEED_CLK_GATE_ESPICLK]	= { 33, -1, "espiclk-gate",	NULL,	 0 }, /* eSPI */
+	[ASPEED_CLK_GATE_REF1CLK]	= { 34, -1, "ref1clk-gate",	"clkin", CLK_IS_CRITICAL },
+	/* Reserved 35 */
+	[ASPEED_CLK_GATE_SDCLK]		= { 36, 56, "sdclk-gate",	NULL,	 0 },	/* SDIO/SD */
+	[ASPEED_CLK_GATE_LHCCLK]	= { 37, -1, "lhclk-gate",	"lhclk", 0 },	/* LPC master/LPC+ */
+	/* Reserved 38 RSA: no longer used */
+	/* Reserved 39 */
+	[ASPEED_CLK_GATE_I3C0CLK]	= { 40,  40, "i3c0clk-gate",	NULL,	 0 },	/* I3C0 */
+	[ASPEED_CLK_GATE_I3C1CLK]	= { 41,  41, "i3c1clk-gate",	NULL,	 0 },	/* I3C1 */
+	[ASPEED_CLK_GATE_I3C2CLK]	= { 42,  42, "i3c2clk-gate",	NULL,	 0 },	/* I3C2 */
+	[ASPEED_CLK_GATE_I3C3CLK]	= { 43,  43, "i3c3clk-gate",	NULL,	 0 },	/* I3C3 */
+	[ASPEED_CLK_GATE_I3C4CLK]	= { 44,  44, "i3c4clk-gate",	NULL,	 0 },	/* I3C4 */
+	[ASPEED_CLK_GATE_I3C5CLK]	= { 45,  45, "i3c5clk-gate",	NULL,	 0 },	/* I3C5 */
+	[ASPEED_CLK_GATE_I3C6CLK]	= { 46,  46, "i3c6clk-gate",	NULL,	 0 },	/* I3C6 */
+	[ASPEED_CLK_GATE_I3C7CLK]	= { 47,  47, "i3c7clk-gate",	NULL,	 0 },	/* I3C7 */
+	[ASPEED_CLK_GATE_UART1CLK]	= { 48,  -1, "uart1clk-gate",	"uart",	 0 },	/* UART1 */
+	[ASPEED_CLK_GATE_UART2CLK]	= { 49,  -1, "uart2clk-gate",	"uart",	 0 },	/* UART2 */
+	[ASPEED_CLK_GATE_UART3CLK]	= { 50,  -1, "uart3clk-gate",	"uart",  0 },	/* UART3 */
+	[ASPEED_CLK_GATE_UART4CLK]	= { 51,  -1, "uart4clk-gate",	"uart",	 0 },	/* UART4 */
+	[ASPEED_CLK_GATE_MAC3CLK]	= { 52,  52, "mac3clk-gate",	"mac34", 0 },	/* MAC3 */
+	[ASPEED_CLK_GATE_MAC4CLK]	= { 53,  53, "mac4clk-gate",	"mac34", 0 },	/* MAC4 */
+	[ASPEED_CLK_GATE_UART6CLK]	= { 54,  -1, "uart6clk-gate",	"uartx", 0 },	/* UART6 */
+	[ASPEED_CLK_GATE_UART7CLK]	= { 55,  -1, "uart7clk-gate",	"uartx", 0 },	/* UART7 */
+	[ASPEED_CLK_GATE_UART8CLK]	= { 56,  -1, "uart8clk-gate",	"uartx", 0 },	/* UART8 */
+	[ASPEED_CLK_GATE_UART9CLK]	= { 57,  -1, "uart9clk-gate",	"uartx", 0 },	/* UART9 */
+	[ASPEED_CLK_GATE_UART10CLK]	= { 58,  -1, "uart10clk-gate",	"uartx", 0 },	/* UART10 */
+	[ASPEED_CLK_GATE_UART11CLK]	= { 59,  -1, "uart11clk-gate",	"uartx", 0 },	/* UART11 */
+	[ASPEED_CLK_GATE_UART12CLK]	= { 60,  -1, "uart12clk-gate",	"uartx", 0 },	/* UART12 */
+	[ASPEED_CLK_GATE_UART13CLK]	= { 61,  -1, "uart13clk-gate",	"uartx", 0 },	/* UART13 */
+	[ASPEED_CLK_GATE_FSICLK]	= { 62,  59, "fsiclk-gate",	NULL,	 0 },	/* FSI */
+};
+
+static const char * const eclk_parent_names[] = { "mpll", "hpll", "dpll" };
+
+static const struct clk_div_table ast2600_eclk_div_table[] = {
+	{ 0x0, 2 },
+	{ 0x1, 2 },
+	{ 0x2, 3 },
+	{ 0x3, 4 },
+	{ 0x4, 5 },
+	{ 0x5, 6 },
+	{ 0x6, 7 },
+	{ 0x7, 8 },
+	{ 0 }
+};
+
+static const struct clk_div_table ast2600_mac_div_table[] = {
+	{ 0x0, 4 },
+	{ 0x1, 4 },
+	{ 0x2, 6 },
+	{ 0x3, 8 },
+	{ 0x4, 10 },
+	{ 0x5, 12 },
+	{ 0x6, 14 },
+	{ 0x7, 16 },
+	{ 0 }
+};
+
+static const struct clk_div_table ast2600_div_table[] = {
+	{ 0x0, 4 },
+	{ 0x1, 8 },
+	{ 0x2, 12 },
+	{ 0x3, 16 },
+	{ 0x4, 20 },
+	{ 0x5, 24 },
+	{ 0x6, 28 },
+	{ 0x7, 32 },
+	{ 0 }
+};
+
+/* For hpll/dpll/epll/mpll */
+static struct clk_hw *ast2600_calc_pll(const char *name, u32 val)
+{
+	unsigned int mult, div;
+
+	if (val & BIT(24)) {
+		/* Pass through mode */
+		mult = div = 1;
+	} else {
+		/* F = 25Mhz * [(M + 2) / (n + 1)] / (p + 1) */
+		u32 m = val  & 0x1fff;
+		u32 n = (val >> 13) & 0x3f;
+		u32 p = (val >> 19) & 0xf;
+		mult = (m + 1) / (n + 1);
+		div = (p + 1);
+	}
+	return clk_hw_register_fixed_factor(NULL, name, "clkin", 0,
+			mult, div);
+};
+
+static struct clk_hw *ast2600_calc_apll(const char *name, u32 val)
+{
+	unsigned int mult, div;
+
+	if (val & BIT(20)) {
+		/* Pass through mode */
+		mult = div = 1;
+	} else {
+		/* F = 25Mhz * (2-od) * [(m + 2) / (n + 1)] */
+		u32 m = (val >> 5) & 0x3f;
+		u32 od = (val >> 4) & 0x1;
+		u32 n = val & 0xf;
+
+		mult = (2 - od) * (m + 2);
+		div = n + 1;
+	}
+	return clk_hw_register_fixed_factor(NULL, name, "clkin", 0,
+			mult, div);
+};
+
+static u32 get_bit(u8 idx)
+{
+	return BIT(idx % 32);
+}
+
+static u32 get_reset_reg(struct aspeed_clk_gate *gate)
+{
+	if (gate->reset_idx < 32)
+		return ASPEED_G6_RESET_CTRL;
+
+	return ASPEED_G6_RESET_CTRL2;
+}
+
+static u32 get_clock_reg(struct aspeed_clk_gate *gate)
+{
+	if (gate->clock_idx < 32)
+		return ASPEED_G6_CLK_STOP_CTRL;
+
+	return ASPEED_G6_CLK_STOP_CTRL2;
+}
+
+static int aspeed_g6_clk_is_enabled(struct clk_hw *hw)
+{
+	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+	u32 clk = get_bit(gate->clock_idx);
+	u32 rst = get_bit(gate->reset_idx);
+	u32 reg;
+	u32 enval;
+
+	/*
+	 * If the IP is in reset, treat the clock as not enabled,
+	 * this happens with some clocks such as the USB one when
+	 * coming from cold reset. Without this, aspeed_clk_enable()
+	 * will fail to lift the reset.
+	 */
+	if (gate->reset_idx >= 0) {
+		regmap_read(gate->map, get_reset_reg(gate), &reg);
+
+		if (reg & rst)
+			return 0;
+	}
+
+	regmap_read(gate->map, get_clock_reg(gate), &reg);
+
+	enval = (gate->flags & CLK_GATE_SET_TO_DISABLE) ? 0 : clk;
+
+	return ((reg & clk) == enval) ? 1 : 0;
+}
+
+static int aspeed_g6_clk_enable(struct clk_hw *hw)
+{
+	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+	unsigned long flags;
+	u32 clk = get_bit(gate->clock_idx);
+	u32 rst = get_bit(gate->reset_idx);
+
+	spin_lock_irqsave(gate->lock, flags);
+
+	if (aspeed_g6_clk_is_enabled(hw)) {
+		spin_unlock_irqrestore(gate->lock, flags);
+		return 0;
+	}
+
+	if (gate->reset_idx >= 0) {
+		/* Put IP in reset */
+		regmap_write(gate->map, get_reset_reg(gate), rst);
+		/* Delay 100us */
+		udelay(100);
+	}
+
+	/* Enable clock */
+	if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
+		regmap_write(gate->map, get_clock_reg(gate), clk);
+	} else {
+		/* Use set to clear register */
+		regmap_write(gate->map, get_clock_reg(gate) + 0x04, clk);
+	}
+
+	if (gate->reset_idx >= 0) {
+		/* A delay of 10ms is specified by the ASPEED docs */
+		mdelay(10);
+		/* Take IP out of reset */
+		regmap_write(gate->map, get_reset_reg(gate) + 0x4, rst);
+	}
+
+	spin_unlock_irqrestore(gate->lock, flags);
+
+	return 0;
+}
+
+static void aspeed_g6_clk_disable(struct clk_hw *hw)
+{
+	struct aspeed_clk_gate *gate = to_aspeed_clk_gate(hw);
+	unsigned long flags;
+	u32 clk = get_bit(gate->clock_idx);
+
+	spin_lock_irqsave(gate->lock, flags);
+
+	if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
+		regmap_write(gate->map, get_clock_reg(gate), clk);
+	} else {
+		/* Use set to clear register */
+		regmap_write(gate->map, get_clock_reg(gate) + 0x4, clk);
+	}
+
+	spin_unlock_irqrestore(gate->lock, flags);
+}
+
+static const struct clk_ops aspeed_g6_clk_gate_ops = {
+	.enable = aspeed_g6_clk_enable,
+	.disable = aspeed_g6_clk_disable,
+	.is_enabled = aspeed_g6_clk_is_enabled,
+};
+
+static int aspeed_g6_reset_deassert(struct reset_controller_dev *rcdev,
+				    unsigned long id)
+{
+	struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+	u32 rst = get_bit(id);
+	u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+	/* Use set to clear register */
+	return regmap_write(ar->map, reg + 0x04, rst);
+}
+
+static int aspeed_g6_reset_assert(struct reset_controller_dev *rcdev,
+				  unsigned long id)
+{
+	struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+	u32 rst = get_bit(id);
+	u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+	return regmap_write(ar->map, reg, rst);
+}
+
+static int aspeed_g6_reset_status(struct reset_controller_dev *rcdev,
+				  unsigned long id)
+{
+	struct aspeed_reset *ar = to_aspeed_reset(rcdev);
+	int ret;
+	u32 val;
+	u32 rst = get_bit(id);
+	u32 reg = id >= 32 ? ASPEED_G6_RESET_CTRL2 : ASPEED_G6_RESET_CTRL;
+
+	ret = regmap_read(ar->map, reg, &val);
+	if (ret)
+		return ret;
+
+	return !!(val & rst);
+}
+
+static const struct reset_control_ops aspeed_g6_reset_ops = {
+	.assert = aspeed_g6_reset_assert,
+	.deassert = aspeed_g6_reset_deassert,
+	.status = aspeed_g6_reset_status,
+};
+
+static struct clk_hw *aspeed_g6_clk_hw_register_gate(struct device *dev,
+		const char *name, const char *parent_name, unsigned long flags,
+		struct regmap *map, u8 clock_idx, u8 reset_idx,
+		u8 clk_gate_flags, spinlock_t *lock)
+{
+	struct aspeed_clk_gate *gate;
+	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
+
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &aspeed_g6_clk_gate_ops;
+	init.flags = flags;
+	init.parent_names = parent_name ? &parent_name : NULL;
+	init.num_parents = parent_name ? 1 : 0;
+
+	gate->map = map;
+	gate->clock_idx = clock_idx;
+	gate->reset_idx = reset_idx;
+	gate->flags = clk_gate_flags;
+	gate->lock = lock;
+	gate->hw.init = &init;
+
+	hw = &gate->hw;
+	ret = clk_hw_register(dev, hw);
+	if (ret) {
+		kfree(gate);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static const char * const vclk_parent_names[] = {
+	"dpll",
+	"d1pll",
+	"hclk",
+	"mclk",
+};
+
+static const char * const d1clk_parent_names[] = {
+	"dpll",
+	"epll",
+	"usb-phy-40m",
+	"gpioc6_clkin",
+	"dp_phy_pll",
+};
+
+static int aspeed_g6_clk_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct aspeed_reset *ar;
+	struct regmap *map;
+	struct clk_hw *hw;
+	u32 val, rate;
+	int i, ret;
+
+	map = syscon_node_to_regmap(dev->of_node);
+	if (IS_ERR(map)) {
+		dev_err(dev, "no syscon regmap\n");
+		return PTR_ERR(map);
+	}
+
+	ar = devm_kzalloc(dev, sizeof(*ar), GFP_KERNEL);
+	if (!ar)
+		return -ENOMEM;
+
+	ar->map = map;
+
+	ar->rcdev.owner = THIS_MODULE;
+	ar->rcdev.nr_resets = 64;
+	ar->rcdev.ops = &aspeed_g6_reset_ops;
+	ar->rcdev.of_node = dev->of_node;
+
+	ret = devm_reset_controller_register(dev, &ar->rcdev);
+	if (ret) {
+		dev_err(dev, "could not register reset controller\n");
+		return ret;
+	}
+
+	/* UART clock div13 setting */
+	regmap_read(map, ASPEED_G6_MISC_CTRL, &val);
+	if (val & UART_DIV13_EN)
+		rate = 24000000 / 13;
+	else
+		rate = 24000000;
+	hw = clk_hw_register_fixed_rate(dev, "uart", NULL, 0, rate);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_UART] = hw;
+
+	/* UART6~13 clock div13 setting */
+	regmap_read(map, 0x80, &val);
+	if (val & BIT(31))
+		rate = 24000000 / 13;
+	else
+		rate = 24000000;
+	hw = clk_hw_register_fixed_rate(dev, "uartx", NULL, 0, rate);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_UARTX] = hw;
+
+	/* EMMC ext clock divider */
+	hw = clk_hw_register_gate(dev, "emmc_extclk_gate", "hpll", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 15, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	hw = clk_hw_register_divider_table(dev, "emmc_extclk", "emmc_extclk_gate", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 12, 3, 0,
+			ast2600_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_EMMC] = hw;
+
+	/* SD/SDIO clock divider and gate */
+	hw = clk_hw_register_gate(dev, "sd_extclk_gate", "hpll", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION4, 31, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	hw = clk_hw_register_divider_table(dev, "sd_extclk", "sd_extclk_gate",
+			0, scu_g6_base + ASPEED_G6_CLK_SELECTION4, 28, 3, 0,
+			ast2600_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_SDIO] = hw;
+
+	/* MAC1/2 AHB bus clock divider */
+	hw = clk_hw_register_divider_table(dev, "mac12", "hpll", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 16, 3, 0,
+			ast2600_mac_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC12] = hw;
+
+	/* MAC3/4 AHB bus clock divider */
+	hw = clk_hw_register_divider_table(dev, "mac34", "hpll", 0,
+			scu_g6_base + 0x310, 24, 3, 0,
+			ast2600_mac_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MAC34] = hw;
+
+	/* LPC Host (LHCLK) clock divider */
+	hw = clk_hw_register_divider_table(dev, "lhclk", "hpll", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
+			ast2600_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_LHCLK] = hw;
+
+	/* gfx d1clk : use dp clk */
+	regmap_update_bits(map, ASPEED_G6_CLK_SELECTION1, GENMASK(10, 8), BIT(10));
+	/* SoC Display clock selection */
+	hw = clk_hw_register_mux(dev, "d1clk", d1clk_parent_names,
+			ARRAY_SIZE(d1clk_parent_names), 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 8, 3, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_D1CLK] = hw;
+
+	/* d1 clk div 0x308[17:15] x [14:12] - 8,7,6,5,4,3,2,1 */
+	regmap_write(map, 0x308, 0x12000); /* 3x3 = 9 */
+
+	/* P-Bus (BCLK) clock divider */
+	hw = clk_hw_register_divider_table(dev, "bclk", "hpll", 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 20, 3, 0,
+			ast2600_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_BCLK] = hw;
+
+	/* Video Capture clock selection */
+	hw = clk_hw_register_mux(dev, "vclk", vclk_parent_names,
+			ARRAY_SIZE(vclk_parent_names), 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION2, 12, 3, 0,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_VCLK] = hw;
+
+	/* Video Engine clock divider */
+	hw = clk_hw_register_divider_table(dev, "eclk", NULL, 0,
+			scu_g6_base + ASPEED_G6_CLK_SELECTION1, 28, 3, 0,
+			ast2600_eclk_div_table,
+			&aspeed_g6_clk_lock);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_ECLK] = hw;
+
+	for (i = 0; i < ARRAY_SIZE(aspeed_g6_gates); i++) {
+		const struct aspeed_gate_data *gd = &aspeed_g6_gates[i];
+		u32 gate_flags;
+
+		/*
+		 * Special case: the USB port 1 clock (bit 14) is always
+		 * working the opposite way from the other ones.
+		 */
+		gate_flags = (gd->clock_idx == 14) ? 0 : CLK_GATE_SET_TO_DISABLE;
+		hw = aspeed_g6_clk_hw_register_gate(dev,
+				gd->name,
+				gd->parent_name,
+				gd->flags,
+				map,
+				gd->clock_idx,
+				gd->reset_idx,
+				gate_flags,
+				&aspeed_g6_clk_lock);
+		if (IS_ERR(hw))
+			return PTR_ERR(hw);
+		aspeed_g6_clk_data->hws[i] = hw;
+	}
+
+	return 0;
+};
+
+static const struct of_device_id aspeed_g6_clk_dt_ids[] = {
+	{ .compatible = "aspeed,ast2600-scu" },
+	{ }
+};
+
+static struct platform_driver aspeed_g6_clk_driver = {
+	.probe  = aspeed_g6_clk_probe,
+	.driver = {
+		.name = "ast2600-clk",
+		.of_match_table = aspeed_g6_clk_dt_ids,
+		.suppress_bind_attrs = true,
+	},
+};
+builtin_platform_driver(aspeed_g6_clk_driver);
+
+static const u32 ast2600_a0_axi_ahb_div_table[] = {
+	2, 2, 3, 5,
+};
+
+static const u32 ast2600_a1_axi_ahb_div_table[] = {
+	4, 6, 2, 4,
+};
+
+static void __init aspeed_g6_cc(struct regmap *map)
+{
+	struct clk_hw *hw;
+	u32 val, div, chip_id, axi_div, ahb_div;
+
+	clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000);
+
+	/*
+	 * High-speed PLL clock derived from the crystal. This the CPU clock,
+	 * and we assume that it is enabled
+	 */
+	regmap_read(map, ASPEED_HPLL_PARAM, &val);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_HPLL] = ast2600_calc_pll("hpll", val);
+
+	regmap_read(map, ASPEED_MPLL_PARAM, &val);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_MPLL] = ast2600_calc_pll("mpll", val);
+
+	regmap_read(map, ASPEED_DPLL_PARAM, &val);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_DPLL] = ast2600_calc_pll("dpll", val);
+
+	regmap_read(map, ASPEED_EPLL_PARAM, &val);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_EPLL] = ast2600_calc_pll("epll", val);
+
+	regmap_read(map, ASPEED_APLL_PARAM, &val);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_APLL] = ast2600_calc_apll("apll", val);
+
+	/* Strap bits 12:11 define the AXI/AHB clock frequency ratio (aka HCLK)*/
+	regmap_read(map, ASPEED_G6_STRAP1, &val);
+	if (val & BIT(16))
+		axi_div = 1;
+	else
+		axi_div = 2;
+
+	regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id);
+	if (chip_id & BIT(16))
+		ahb_div = ast2600_a1_axi_ahb_div_table[(val >> 11) & 0x3];
+	else
+		ahb_div = ast2600_a0_axi_ahb_div_table[(val >> 11) & 0x3];
+
+	hw = clk_hw_register_fixed_factor(NULL, "ahb", "hpll", 0, 1, axi_div * ahb_div);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_AHB] = hw;
+
+	regmap_read(map, ASPEED_G6_CLK_SELECTION1, &val);
+	val = (val >> 23) & 0x7;
+	div = 4 * (val + 1);
+	hw = clk_hw_register_fixed_factor(NULL, "apb1", "hpll", 0, 1, div);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_APB1] = hw;
+
+	regmap_read(map, ASPEED_G6_CLK_SELECTION4, &val);
+	val = (val >> 9) & 0x7;
+	div = 2 * (val + 1);
+	hw = clk_hw_register_fixed_factor(NULL, "apb2", "ahb", 0, 1, div);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_APB2] = hw;
+
+	/* USB 2.0 port1 phy 40MHz clock */
+	hw = clk_hw_register_fixed_rate(NULL, "usb-phy-40m", NULL, 0, 40000000);
+	aspeed_g6_clk_data->hws[ASPEED_CLK_USBPHY_40M] = hw;
+};
+
+static void __init aspeed_g6_cc_init(struct device_node *np)
+{
+	struct regmap *map;
+	int ret;
+	int i;
+
+	scu_g6_base = of_iomap(np, 0);
+	if (!scu_g6_base)
+		return;
+
+	aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws,
+				      ASPEED_G6_NUM_CLKS), GFP_KERNEL);
+	if (!aspeed_g6_clk_data)
+		return;
+
+	/*
+	 * This way all clocks fetched before the platform device probes,
+	 * except those we assign here for early use, will be deferred.
+	 */
+	for (i = 0; i < ASPEED_G6_NUM_CLKS; i++)
+		aspeed_g6_clk_data->hws[i] = ERR_PTR(-EPROBE_DEFER);
+
+	/*
+	 * We check that the regmap works on this very first access,
+	 * but as this is an MMIO-backed regmap, subsequent regmap
+	 * access is not going to fail and we skip error checks from
+	 * this point.
+	 */
+	map = syscon_node_to_regmap(np);
+	if (IS_ERR(map)) {
+		pr_err("no syscon regmap\n");
+		return;
+	}
+
+	aspeed_g6_cc(map);
+	aspeed_g6_clk_data->num = ASPEED_G6_NUM_CLKS;
+	ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, aspeed_g6_clk_data);
+	if (ret)
+		pr_err("failed to add DT provider: %d\n", ret);
+};
+CLK_OF_DECLARE_DRIVER(aspeed_cc_g6, "aspeed,ast2600-scu", aspeed_g6_cc_init);
diff --git a/include/dt-bindings/clock/ast2600-clock.h b/include/dt-bindings/clock/ast2600-clock.h
new file mode 100644
index 000000000000..38074a5f7296
--- /dev/null
+++ b/include/dt-bindings/clock/ast2600-clock.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */
+#ifndef DT_BINDINGS_AST2600_CLOCK_H
+#define DT_BINDINGS_AST2600_CLOCK_H
+
+#define ASPEED_CLK_GATE_ECLK		0
+#define ASPEED_CLK_GATE_GCLK		1
+
+#define ASPEED_CLK_GATE_MCLK		2
+
+#define ASPEED_CLK_GATE_VCLK		3
+#define ASPEED_CLK_GATE_BCLK		4
+#define ASPEED_CLK_GATE_DCLK		5
+
+#define ASPEED_CLK_GATE_LCLK		6
+#define ASPEED_CLK_GATE_LHCCLK		7
+
+#define ASPEED_CLK_GATE_D1CLK		8
+#define ASPEED_CLK_GATE_YCLK		9
+
+#define ASPEED_CLK_GATE_REF0CLK		10
+#define ASPEED_CLK_GATE_REF1CLK		11
+
+#define ASPEED_CLK_GATE_ESPICLK		12
+
+#define ASPEED_CLK_GATE_USBUHCICLK	13
+#define ASPEED_CLK_GATE_USBPORT1CLK	14
+#define ASPEED_CLK_GATE_USBPORT2CLK	15
+
+#define ASPEED_CLK_GATE_RSACLK		16
+#define ASPEED_CLK_GATE_RVASCLK		17
+
+#define ASPEED_CLK_GATE_MAC1CLK		18
+#define ASPEED_CLK_GATE_MAC2CLK		19
+#define ASPEED_CLK_GATE_MAC3CLK		20
+#define ASPEED_CLK_GATE_MAC4CLK		21
+
+#define ASPEED_CLK_GATE_UART1CLK	22
+#define ASPEED_CLK_GATE_UART2CLK	23
+#define ASPEED_CLK_GATE_UART3CLK	24
+#define ASPEED_CLK_GATE_UART4CLK	25
+#define ASPEED_CLK_GATE_UART5CLK	26
+#define ASPEED_CLK_GATE_UART6CLK	27
+#define ASPEED_CLK_GATE_UART7CLK	28
+#define ASPEED_CLK_GATE_UART8CLK	29
+#define ASPEED_CLK_GATE_UART9CLK	30
+#define ASPEED_CLK_GATE_UART10CLK	31
+#define ASPEED_CLK_GATE_UART11CLK	32
+#define ASPEED_CLK_GATE_UART12CLK	33
+#define ASPEED_CLK_GATE_UART13CLK	34
+
+#define ASPEED_CLK_GATE_SDCLK		35
+#define ASPEED_CLK_GATE_EMMCCLK		36
+
+#define ASPEED_CLK_GATE_I3C0CLK		37
+#define ASPEED_CLK_GATE_I3C1CLK		38
+#define ASPEED_CLK_GATE_I3C2CLK		39
+#define ASPEED_CLK_GATE_I3C3CLK		40
+#define ASPEED_CLK_GATE_I3C4CLK		41
+#define ASPEED_CLK_GATE_I3C5CLK		42
+#define ASPEED_CLK_GATE_I3C6CLK		43
+#define ASPEED_CLK_GATE_I3C7CLK		44
+
+#define ASPEED_CLK_GATE_FSICLK		45
+
+#define ASPEED_CLK_HPLL			46
+#define ASPEED_CLK_MPLL			47
+#define ASPEED_CLK_DPLL			48
+#define ASPEED_CLK_EPLL			49
+#define ASPEED_CLK_APLL			50
+#define ASPEED_CLK_AHB			51
+#define ASPEED_CLK_APB1			52
+#define ASPEED_CLK_APB2			53
+#define ASPEED_CLK_BCLK			54
+#define ASPEED_CLK_D1CLK		55
+#define ASPEED_CLK_VCLK			56
+#define ASPEED_CLK_LHCLK		57
+#define ASPEED_CLK_UART			58
+#define ASPEED_CLK_UARTX		59
+#define ASPEED_CLK_SDIO			60
+#define ASPEED_CLK_EMMC			61
+#define ASPEED_CLK_ECLK			62
+#define ASPEED_CLK_ECLK_MUX		63
+#define ASPEED_CLK_MAC12		64
+#define ASPEED_CLK_MAC34		65
+#define ASPEED_CLK_USBPHY_40M		66
+
+/* Only list resets here that are not part of a gate */
+#define ASPEED_RESET_ADC		55
+#define ASPEED_RESET_JTAG_MASTER2	54
+#define ASPEED_RESET_I3C_DMA		39
+#define ASPEED_RESET_PWM		37
+#define ASPEED_RESET_PECI		36
+#define ASPEED_RESET_MII		35
+#define ASPEED_RESET_I2C		34
+#define ASPEED_RESET_H2X		31
+#define ASPEED_RESET_GP_MCU		30
+#define ASPEED_RESET_DP_MCU		29
+#define ASPEED_RESET_DP			28
+#define ASPEED_RESET_RC_XDMA		27
+#define ASPEED_RESET_GRAPHICS		26
+#define ASPEED_RESET_DEV_XDMA		25
+#define ASPEED_RESET_DEV_MCTP		24
+#define ASPEED_RESET_RC_MCTP		23
+#define ASPEED_RESET_JTAG_MASTER	22
+#define ASPEED_RESET_PCIE_DEV_O		21
+#define ASPEED_RESET_PCIE_DEV_OEN	20
+#define ASPEED_RESET_PCIE_RC_O		19
+#define ASPEED_RESET_PCIE_RC_OEN	18
+#define ASPEED_RESET_PCI_DP		5
+#define ASPEED_RESET_AHB		1
+#define ASPEED_RESET_SDRAM		0
+
+#endif
-- 
cgit v1.2.3


From 0c043d70d04711fe6c380df9065fdc44192c49bf Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Fri, 6 Sep 2019 12:02:32 -0700
Subject: Input: elan_i2c - remove Lenovo Legion Y7000 PnpID

Looks like the Bios of the Lenovo Legion Y7000 is using ELAN061B
when the actual device is supposed to be used with hid-multitouch.

Remove it from the list of the supported device, hoping that
no one will complain about the loss in functionality.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=203467
Fixes: 738c06d0e456 ("Input: elan_i2c - add hardware ID for multiple Lenovo laptops")
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/elan-i2c-ids.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/input/elan-i2c-ids.h b/include/linux/input/elan-i2c-ids.h
index ceabb01a6a7d..1ecb6b45812c 100644
--- a/include/linux/input/elan-i2c-ids.h
+++ b/include/linux/input/elan-i2c-ids.h
@@ -48,7 +48,7 @@ static const struct acpi_device_id elan_acpi_id[] = {
 	{ "ELAN0618", 0 },
 	{ "ELAN0619", 0 },
 	{ "ELAN061A", 0 },
-	{ "ELAN061B", 0 },
+/*	{ "ELAN061B", 0 }, not working on the Lenovo Legion Y7000 */
 	{ "ELAN061C", 0 },
 	{ "ELAN061D", 0 },
 	{ "ELAN061E", 0 },
-- 
cgit v1.2.3


From 23b68395c7c78a764e8963fc15a7cfd318bf187f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 26 Aug 2019 22:14:21 +0200
Subject: mm/mmu_notifiers: add a lockdep map for invalidate_range_start/end

This is a similar idea to the fs_reclaim fake lockdep lock. It's fairly
easy to provoke a specific notifier to be run on a specific range: Just
prep it, and then munmap() it.

A bit harder, but still doable, is to provoke the mmu notifiers for all
the various callchains that might lead to them. But both at the same time
is really hard to reliably hit, especially when you want to exercise paths
like direct reclaim or compaction, where it's not easy to control what
exactly will be unmapped.

By introducing a lockdep map to tie them all together we allow lockdep to
see a lot more dependencies, without having to actually hit them in a
single challchain while testing.

On Jason's suggestion this is is rolled out for both
invalidate_range_start and invalidate_range_end. They both have the same
calling context, hence we can share the same lockdep map. Note that the
annotation for invalidate_ranage_start is outside of the
mm_has_notifiers(), to make sure lockdep is informed about all paths
leading to this context irrespective of whether mmu notifiers are present
for a given context. We don't do that on the invalidate_range_end side to
avoid paying the overhead twice, there the lockdep annotation is pushed
down behind the mm_has_notifiers() check.

Link: https://lore.kernel.org/r/20190826201425.17547-2-daniel.vetter@ffwll.ch
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/mmu_notifier.h | 14 ++++++++++++--
 mm/mmu_notifier.c            |  8 ++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 52929e5ef708..4dfe996dafd2 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -42,6 +42,10 @@ enum mmu_notifier_event {
 
 #ifdef CONFIG_MMU_NOTIFIER
 
+#ifdef CONFIG_LOCKDEP
+extern struct lockdep_map __mmu_notifier_invalidate_range_start_map;
+#endif
+
 /*
  * The mmu notifier_mm structure is allocated and installed in
  * mm->mmu_notifier_mm inside the mm_take_all_locks() protected
@@ -339,20 +343,26 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 static inline void
 mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 {
+	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	if (mm_has_notifiers(range->mm)) {
 		range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
 		__mmu_notifier_invalidate_range_start(range);
 	}
+	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
 }
 
 static inline int
 mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 {
+	int ret = 0;
+
+	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	if (mm_has_notifiers(range->mm)) {
 		range->flags &= ~MMU_NOTIFIER_RANGE_BLOCKABLE;
-		return __mmu_notifier_invalidate_range_start(range);
+		ret = __mmu_notifier_invalidate_range_start(range);
 	}
-	return 0;
+	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
+	return ret;
 }
 
 static inline void
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 9e2125ae10a5..05d98167da7b 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -21,6 +21,12 @@
 /* global SRCU for all MMs */
 DEFINE_STATIC_SRCU(srcu);
 
+#ifdef CONFIG_LOCKDEP
+struct lockdep_map __mmu_notifier_invalidate_range_start_map = {
+	.name = "mmu_notifier_invalidate_range_start"
+};
+#endif
+
 /*
  * This function can't run concurrently against mmu_notifier_register
  * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap
@@ -184,6 +190,7 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
 	struct mmu_notifier *mn;
 	int id;
 
+	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &range->mm->mmu_notifier_mm->list, hlist) {
 		/*
@@ -207,6 +214,7 @@ void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range,
 			mn->ops->invalidate_range_end(mn, range);
 	}
 	srcu_read_unlock(&srcu, id);
+	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
 }
 
 void __mmu_notifier_invalidate_range(struct mm_struct *mm,
-- 
cgit v1.2.3


From 810e24e009cf71bf85a1524f272a744c54ca6591 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 26 Aug 2019 22:14:25 +0200
Subject: mm/mmu_notifiers: annotate with might_sleep()

Since mmu notifiers don't exist for many processes, but could block in
interesting places, add some annotations. This should help make sure the
core mm keeps up its end of the mmu notifier contract.

The checks here are outside of all notifier checks because of that.
They compile away without CONFIG_DEBUG_ATOMIC_SLEEP.

Link: https://lore.kernel.org/r/20190826201425.17547-6-daniel.vetter@ffwll.ch
Suggested-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/mmu_notifier.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 4dfe996dafd2..1bd8e6a09a3c 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -343,6 +343,8 @@ static inline void mmu_notifier_change_pte(struct mm_struct *mm,
 static inline void
 mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
 {
+	might_sleep();
+
 	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
 	if (mm_has_notifiers(range->mm)) {
 		range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
@@ -368,6 +370,9 @@ mmu_notifier_invalidate_range_start_nonblock(struct mmu_notifier_range *range)
 static inline void
 mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
 {
+	if (mmu_notifier_range_blockable(range))
+		might_sleep();
+
 	if (mm_has_notifiers(range->mm))
 		__mmu_notifier_invalidate_range_end(range, false);
 }
-- 
cgit v1.2.3


From a520110e4a15ceb385304d9cab22bb51438f6080 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Aug 2019 16:19:53 +0200
Subject: mm: split out a new pagewalk.h header from mm.h

Add a new header for the two handful of users of the walk_page_range /
walk_page_vma interface instead of polluting all users of mm.h with it.

Link: https://lore.kernel.org/r/20190828141955.22210-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 arch/openrisc/kernel/dma.c              |  1 +
 arch/powerpc/mm/book3s64/subpage_prot.c |  2 +-
 arch/s390/mm/gmap.c                     |  2 +-
 fs/proc/task_mmu.c                      |  2 +-
 include/linux/mm.h                      | 46 ----------------------------
 include/linux/pagewalk.h                | 54 +++++++++++++++++++++++++++++++++
 mm/hmm.c                                |  2 +-
 mm/madvise.c                            |  1 +
 mm/memcontrol.c                         |  2 +-
 mm/mempolicy.c                          |  2 +-
 mm/migrate.c                            |  1 +
 mm/mincore.c                            |  2 +-
 mm/mprotect.c                           |  2 +-
 mm/pagewalk.c                           |  2 +-
 14 files changed, 66 insertions(+), 55 deletions(-)
 create mode 100644 include/linux/pagewalk.h

(limited to 'include')

diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index b41a79fcdbd9..c7812e6effa2 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -16,6 +16,7 @@
  */
 
 #include <linux/dma-noncoherent.h>
+#include <linux/pagewalk.h>
 
 #include <asm/cpuinfo.h>
 #include <asm/spr_defs.h>
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 9ba07e55c489..236f0a861ecc 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -7,7 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hugetlb.h>
 #include <linux/syscalls.h>
 
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 39c3a6e3d262..cf80feae970d 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 731642e0f5a0..8857da830b86 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/vmacache.h>
 #include <linux/hugetlb.h>
 #include <linux/huge_mm.h>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..7cf955feb823 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1430,54 +1430,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long start, unsigned long end);
 
-/**
- * mm_walk - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- *	       this handler should only handle pud_trans_huge() puds.
- *	       the pmd_entry or pte_entry callbacks will be used for
- *	       regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
- *	       this handler is required to be able to handle
- *	       pmd_trans_huge() pmds.  They may simply choose to
- *	       split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
- * @hugetlb_entry: if set, called for each hugetlb entry
- * @test_walk: caller specific callback function to determine whether
- *             we walk over the current vma or not. Returning 0
- *             value means "do page table walk over the current vma,"
- *             and a negative one means "abort current page table walk
- *             right now." 1 means "skip the current vma."
- * @mm:        mm_struct representing the target process of page table walk
- * @vma:       vma currently walked (NULL if walking outside vmas)
- * @private:   private data for callbacks' usage
- *
- * (see the comment on walk_page_range() for more details)
- */
-struct mm_walk {
-	int (*pud_entry)(pud_t *pud, unsigned long addr,
-			 unsigned long next, struct mm_walk *walk);
-	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
-			 unsigned long next, struct mm_walk *walk);
-	int (*pte_entry)(pte_t *pte, unsigned long addr,
-			 unsigned long next, struct mm_walk *walk);
-	int (*pte_hole)(unsigned long addr, unsigned long next,
-			struct mm_walk *walk);
-	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
-			     unsigned long addr, unsigned long next,
-			     struct mm_walk *walk);
-	int (*test_walk)(unsigned long addr, unsigned long next,
-			struct mm_walk *walk);
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	void *private;
-};
-
 struct mmu_notifier_range;
 
-int walk_page_range(unsigned long addr, unsigned long end,
-		struct mm_walk *walk);
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
new file mode 100644
index 000000000000..df278a94086d
--- /dev/null
+++ b/include/linux/pagewalk.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PAGEWALK_H
+#define _LINUX_PAGEWALK_H
+
+#include <linux/mm.h>
+
+/**
+ * mm_walk - callbacks for walk_page_range
+ * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
+ *	       this handler should only handle pud_trans_huge() puds.
+ *	       the pmd_entry or pte_entry callbacks will be used for
+ *	       regular PUDs.
+ * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
+ *	       this handler is required to be able to handle
+ *	       pmd_trans_huge() pmds.  They may simply choose to
+ *	       split_huge_page() instead of handling it explicitly.
+ * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
+ * @pte_hole: if set, called for each hole at all levels
+ * @hugetlb_entry: if set, called for each hugetlb entry
+ * @test_walk: caller specific callback function to determine whether
+ *             we walk over the current vma or not. Returning 0
+ *             value means "do page table walk over the current vma,"
+ *             and a negative one means "abort current page table walk
+ *             right now." 1 means "skip the current vma."
+ * @mm:        mm_struct representing the target process of page table walk
+ * @vma:       vma currently walked (NULL if walking outside vmas)
+ * @private:   private data for callbacks' usage
+ *
+ * (see the comment on walk_page_range() for more details)
+ */
+struct mm_walk {
+	int (*pud_entry)(pud_t *pud, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk);
+	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk);
+	int (*pte_entry)(pte_t *pte, unsigned long addr,
+			 unsigned long next, struct mm_walk *walk);
+	int (*pte_hole)(unsigned long addr, unsigned long next,
+			struct mm_walk *walk);
+	int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
+			     unsigned long addr, unsigned long next,
+			     struct mm_walk *walk);
+	int (*test_walk)(unsigned long addr, unsigned long next,
+			struct mm_walk *walk);
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	void *private;
+};
+
+int walk_page_range(unsigned long addr, unsigned long end,
+		struct mm_walk *walk);
+int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
+
+#endif /* _LINUX_PAGEWALK_H */
diff --git a/mm/hmm.c b/mm/hmm.c
index 4882b83aeccb..26916ff6c8df 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -8,7 +8,7 @@
  * Refer to include/linux/hmm.h for information about heterogeneous memory
  * management or HMM for short.
  */
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hmm.h>
 #include <linux/init.h>
 #include <linux/rmap.h>
diff --git a/mm/madvise.c b/mm/madvise.c
index 968df3aa069f..80a78bb16782 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -20,6 +20,7 @@
 #include <linux/file.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
+#include <linux/pagewalk.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/shmem_fs.h>
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6f5c0c517c49..4c3af5d71ab1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -25,7 +25,7 @@
 #include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/sched/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/hugetlb.h>
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 65e0874fce17..3a96def1e796 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -68,7 +68,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/mempolicy.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/highmem.h>
 #include <linux/hugetlb.h>
 #include <linux/kernel.h>
diff --git a/mm/migrate.c b/mm/migrate.c
index 962cb62c621f..c9c73a35aca7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -38,6 +38,7 @@
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
 #include <linux/gfp.h>
+#include <linux/pagewalk.h>
 #include <linux/pfn_t.h>
 #include <linux/memremap.h>
 #include <linux/userfaultfd_k.h>
diff --git a/mm/mincore.c b/mm/mincore.c
index 4fe91d497436..3b051b6ab3fe 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -10,7 +10,7 @@
  */
 #include <linux/pagemap.h>
 #include <linux/gfp.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/mman.h>
 #include <linux/syscalls.h>
 #include <linux/swap.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index bf38dfbbb4b4..cc73318dbc25 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -9,7 +9,7 @@
  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
  */
 
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/hugetlb.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index c3084ff2569d..8a92a961a2ee 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
-- 
cgit v1.2.3


From 7b86ac3371b70c3fd8fd95501719beb1faab719f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Aug 2019 16:19:54 +0200
Subject: pagewalk: separate function pointers from iterator data

The mm_walk structure currently mixed data and code.  Split out the
operations vectors into a new mm_walk_ops structure, and while we are
changing the API also declare the mm_walk structure inside the
walk_page_range and walk_page_vma functions.

Based on patch from Linus Torvalds.

Link: https://lore.kernel.org/r/20190828141955.22210-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 arch/openrisc/kernel/dma.c              |  22 +++---
 arch/powerpc/mm/book3s64/subpage_prot.c |  10 +--
 arch/s390/mm/gmap.c                     |  33 ++++-----
 fs/proc/task_mmu.c                      |  78 ++++++++++----------
 include/linux/pagewalk.h                |  64 ++++++++++-------
 mm/hmm.c                                |  23 +++---
 mm/madvise.c                            |  41 ++++-------
 mm/memcontrol.c                         |  23 +++---
 mm/mempolicy.c                          |  15 ++--
 mm/migrate.c                            |  23 +++---
 mm/mincore.c                            |  15 ++--
 mm/mprotect.c                           |  24 +++----
 mm/pagewalk.c                           | 124 ++++++++++++++++++--------------
 13 files changed, 251 insertions(+), 244 deletions(-)

(limited to 'include')

diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index c7812e6effa2..4d5b8bd1d795 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -44,6 +44,10 @@ page_set_nocache(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
+static const struct mm_walk_ops set_nocache_walk_ops = {
+	.pte_entry		= page_set_nocache,
+};
+
 static int
 page_clear_nocache(pte_t *pte, unsigned long addr,
 		   unsigned long next, struct mm_walk *walk)
@@ -59,6 +63,10 @@ page_clear_nocache(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
+static const struct mm_walk_ops clear_nocache_walk_ops = {
+	.pte_entry		= page_clear_nocache,
+};
+
 /*
  * Alloc "coherent" memory, which for OpenRISC means simply uncached.
  *
@@ -81,10 +89,6 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 {
 	unsigned long va;
 	void *page;
-	struct mm_walk walk = {
-		.pte_entry = page_set_nocache,
-		.mm = &init_mm
-	};
 
 	page = alloc_pages_exact(size, gfp | __GFP_ZERO);
 	if (!page)
@@ -99,7 +103,8 @@ arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	 * We need to iterate through the pages, clearing the dcache for
 	 * them and setting the cache-inhibit bit.
 	 */
-	if (walk_page_range(va, va + size, &walk)) {
+	if (walk_page_range(&init_mm, va, va + size, &set_nocache_walk_ops,
+			NULL)) {
 		free_pages_exact(page, size);
 		return NULL;
 	}
@@ -112,13 +117,10 @@ arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long va = (unsigned long)vaddr;
-	struct mm_walk walk = {
-		.pte_entry = page_clear_nocache,
-		.mm = &init_mm
-	};
 
 	/* walk_page_range shouldn't be able to fail here */
-	WARN_ON(walk_page_range(va, va + size, &walk));
+	WARN_ON(walk_page_range(&init_mm, va, va + size,
+			&clear_nocache_walk_ops, NULL));
 
 	free_pages_exact(vaddr, size);
 }
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 236f0a861ecc..2ef24a53f4c9 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -139,14 +139,14 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 	return 0;
 }
 
+static const struct mm_walk_ops subpage_walk_ops = {
+	.pmd_entry	= subpage_walk_pmd_entry,
+};
+
 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
 				    unsigned long len)
 {
 	struct vm_area_struct *vma;
-	struct mm_walk subpage_proto_walk = {
-		.mm = mm,
-		.pmd_entry = subpage_walk_pmd_entry,
-	};
 
 	/*
 	 * We don't try too hard, we just mark all the vma in that range
@@ -163,7 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
 		if (vma->vm_start >= (addr + len))
 			break;
 		vma->vm_flags |= VM_NOHUGEPAGE;
-		walk_page_vma(vma, &subpage_proto_walk);
+		walk_page_vma(vma, &subpage_walk_ops, NULL);
 		vma = vma->vm_next;
 	}
 }
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index cf80feae970d..bd78d504fdad 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2521,13 +2521,9 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
 	return 0;
 }
 
-static inline void zap_zero_pages(struct mm_struct *mm)
-{
-	struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
-
-	walk.mm = mm;
-	walk_page_range(0, TASK_SIZE, &walk);
-}
+static const struct mm_walk_ops zap_zero_walk_ops = {
+	.pmd_entry	= __zap_zero_pages,
+};
 
 /*
  * switch on pgstes for its userspace process (for kvm)
@@ -2546,7 +2542,7 @@ int s390_enable_sie(void)
 	mm->context.has_pgste = 1;
 	/* split thp mappings and disable thp for future mappings */
 	thp_split_mm(mm);
-	zap_zero_pages(mm);
+	walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
 	up_write(&mm->mmap_sem);
 	return 0;
 }
@@ -2589,12 +2585,13 @@ static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
+static const struct mm_walk_ops enable_skey_walk_ops = {
+	.hugetlb_entry		= __s390_enable_skey_hugetlb,
+	.pte_entry		= __s390_enable_skey_pte,
+};
+
 int s390_enable_skey(void)
 {
-	struct mm_walk walk = {
-		.hugetlb_entry = __s390_enable_skey_hugetlb,
-		.pte_entry = __s390_enable_skey_pte,
-	};
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	int rc = 0;
@@ -2614,8 +2611,7 @@ int s390_enable_skey(void)
 	}
 	mm->def_flags &= ~VM_MERGEABLE;
 
-	walk.mm = mm;
-	walk_page_range(0, TASK_SIZE, &walk);
+	walk_page_range(mm, 0, TASK_SIZE, &enable_skey_walk_ops, NULL);
 
 out_up:
 	up_write(&mm->mmap_sem);
@@ -2633,13 +2629,14 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
 	return 0;
 }
 
+static const struct mm_walk_ops reset_cmma_walk_ops = {
+	.pte_entry		= __s390_reset_cmma,
+};
+
 void s390_reset_cmma(struct mm_struct *mm)
 {
-	struct mm_walk walk = { .pte_entry = __s390_reset_cmma };
-
 	down_write(&mm->mmap_sem);
-	walk.mm = mm;
-	walk_page_range(0, TASK_SIZE, &walk);
+	walk_page_range(mm, 0, TASK_SIZE, &reset_cmma_walk_ops, NULL);
 	up_write(&mm->mmap_sem);
 }
 EXPORT_SYMBOL_GPL(s390_reset_cmma);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8857da830b86..bf43d1d60059 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -513,7 +513,9 @@ static int smaps_pte_hole(unsigned long addr, unsigned long end,
 
 	return 0;
 }
-#endif
+#else
+#define smaps_pte_hole		NULL
+#endif /* CONFIG_SHMEM */
 
 static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 		struct mm_walk *walk)
@@ -729,21 +731,24 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 	}
 	return 0;
 }
+#else
+#define smaps_hugetlb_range	NULL
 #endif /* HUGETLB_PAGE */
 
+static const struct mm_walk_ops smaps_walk_ops = {
+	.pmd_entry		= smaps_pte_range,
+	.hugetlb_entry		= smaps_hugetlb_range,
+};
+
+static const struct mm_walk_ops smaps_shmem_walk_ops = {
+	.pmd_entry		= smaps_pte_range,
+	.hugetlb_entry		= smaps_hugetlb_range,
+	.pte_hole		= smaps_pte_hole,
+};
+
 static void smap_gather_stats(struct vm_area_struct *vma,
 			     struct mem_size_stats *mss)
 {
-	struct mm_walk smaps_walk = {
-		.pmd_entry = smaps_pte_range,
-#ifdef CONFIG_HUGETLB_PAGE
-		.hugetlb_entry = smaps_hugetlb_range,
-#endif
-		.mm = vma->vm_mm,
-	};
-
-	smaps_walk.private = mss;
-
 #ifdef CONFIG_SHMEM
 	/* In case of smaps_rollup, reset the value from previous vma */
 	mss->check_shmem_swap = false;
@@ -765,12 +770,13 @@ static void smap_gather_stats(struct vm_area_struct *vma,
 			mss->swap += shmem_swapped;
 		} else {
 			mss->check_shmem_swap = true;
-			smaps_walk.pte_hole = smaps_pte_hole;
+			walk_page_vma(vma, &smaps_shmem_walk_ops, mss);
+			return;
 		}
 	}
 #endif
 	/* mmap_sem is held in m_start */
-	walk_page_vma(vma, &smaps_walk);
+	walk_page_vma(vma, &smaps_walk_ops, mss);
 }
 
 #define SEQ_PUT_DEC(str, val) \
@@ -1118,6 +1124,11 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 	return 0;
 }
 
+static const struct mm_walk_ops clear_refs_walk_ops = {
+	.pmd_entry		= clear_refs_pte_range,
+	.test_walk		= clear_refs_test_walk,
+};
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
@@ -1151,12 +1162,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 		struct clear_refs_private cp = {
 			.type = type,
 		};
-		struct mm_walk clear_refs_walk = {
-			.pmd_entry = clear_refs_pte_range,
-			.test_walk = clear_refs_test_walk,
-			.mm = mm,
-			.private = &cp,
-		};
 
 		if (type == CLEAR_REFS_MM_HIWATER_RSS) {
 			if (down_write_killable(&mm->mmap_sem)) {
@@ -1217,7 +1222,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 						0, NULL, mm, 0, -1UL);
 			mmu_notifier_invalidate_range_start(&range);
 		}
-		walk_page_range(0, mm->highest_vm_end, &clear_refs_walk);
+		walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops,
+				&cp);
 		if (type == CLEAR_REFS_SOFT_DIRTY)
 			mmu_notifier_invalidate_range_end(&range);
 		tlb_finish_mmu(&tlb, 0, -1);
@@ -1489,8 +1495,16 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
 
 	return err;
 }
+#else
+#define pagemap_hugetlb_range	NULL
 #endif /* HUGETLB_PAGE */
 
+static const struct mm_walk_ops pagemap_ops = {
+	.pmd_entry	= pagemap_pmd_range,
+	.pte_hole	= pagemap_pte_hole,
+	.hugetlb_entry	= pagemap_hugetlb_range,
+};
+
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -1522,7 +1536,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 {
 	struct mm_struct *mm = file->private_data;
 	struct pagemapread pm;
-	struct mm_walk pagemap_walk = {};
 	unsigned long src;
 	unsigned long svpfn;
 	unsigned long start_vaddr;
@@ -1550,14 +1563,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 	if (!pm.buffer)
 		goto out_mm;
 
-	pagemap_walk.pmd_entry = pagemap_pmd_range;
-	pagemap_walk.pte_hole = pagemap_pte_hole;
-#ifdef CONFIG_HUGETLB_PAGE
-	pagemap_walk.hugetlb_entry = pagemap_hugetlb_range;
-#endif
-	pagemap_walk.mm = mm;
-	pagemap_walk.private = &pm;
-
 	src = *ppos;
 	svpfn = src / PM_ENTRY_BYTES;
 	start_vaddr = svpfn << PAGE_SHIFT;
@@ -1586,7 +1591,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		ret = down_read_killable(&mm->mmap_sem);
 		if (ret)
 			goto out_free;
-		ret = walk_page_range(start_vaddr, end, &pagemap_walk);
+		ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
 		up_read(&mm->mmap_sem);
 		start_vaddr = end;
 
@@ -1798,6 +1803,11 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 }
 #endif
 
+static const struct mm_walk_ops show_numa_ops = {
+	.hugetlb_entry = gather_hugetlb_stats,
+	.pmd_entry = gather_pte_stats,
+};
+
 /*
  * Display pages allocated per node and memory policy via /proc.
  */
@@ -1809,12 +1819,6 @@ static int show_numa_map(struct seq_file *m, void *v)
 	struct numa_maps *md = &numa_priv->md;
 	struct file *file = vma->vm_file;
 	struct mm_struct *mm = vma->vm_mm;
-	struct mm_walk walk = {
-		.hugetlb_entry = gather_hugetlb_stats,
-		.pmd_entry = gather_pte_stats,
-		.private = md,
-		.mm = mm,
-	};
 	struct mempolicy *pol;
 	char buffer[64];
 	int nid;
@@ -1848,7 +1852,7 @@ static int show_numa_map(struct seq_file *m, void *v)
 		seq_puts(m, " huge");
 
 	/* mmap_sem is held by m_start */
-	walk_page_vma(vma, &walk);
+	walk_page_vma(vma, &show_numa_ops, md);
 
 	if (!md->pages)
 		goto out;
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index df278a94086d..bddd9759bab9 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -4,31 +4,28 @@
 
 #include <linux/mm.h>
 
+struct mm_walk;
+
 /**
- * mm_walk - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- *	       this handler should only handle pud_trans_huge() puds.
- *	       the pmd_entry or pte_entry callbacks will be used for
- *	       regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
- *	       this handler is required to be able to handle
- *	       pmd_trans_huge() pmds.  They may simply choose to
- *	       split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
- * @hugetlb_entry: if set, called for each hugetlb entry
- * @test_walk: caller specific callback function to determine whether
- *             we walk over the current vma or not. Returning 0
- *             value means "do page table walk over the current vma,"
- *             and a negative one means "abort current page table walk
- *             right now." 1 means "skip the current vma."
- * @mm:        mm_struct representing the target process of page table walk
- * @vma:       vma currently walked (NULL if walking outside vmas)
- * @private:   private data for callbacks' usage
- *
- * (see the comment on walk_page_range() for more details)
+ * mm_walk_ops - callbacks for walk_page_range
+ * @pud_entry:		if set, called for each non-empty PUD (2nd-level) entry
+ *			this handler should only handle pud_trans_huge() puds.
+ *			the pmd_entry or pte_entry callbacks will be used for
+ *			regular PUDs.
+ * @pmd_entry:		if set, called for each non-empty PMD (3rd-level) entry
+ *			this handler is required to be able to handle
+ *			pmd_trans_huge() pmds.  They may simply choose to
+ *			split_huge_page() instead of handling it explicitly.
+ * @pte_entry:		if set, called for each non-empty PTE (4th-level) entry
+ * @pte_hole:		if set, called for each hole at all levels
+ * @hugetlb_entry:	if set, called for each hugetlb entry
+ * @test_walk:		caller specific callback function to determine whether
+ *			we walk over the current vma or not. Returning 0 means
+ *			"do page table walk over the current vma", returning
+ *			a negative value means "abort current page table walk
+ *			right now" and returning 1 means "skip the current vma"
  */
-struct mm_walk {
+struct mm_walk_ops {
 	int (*pud_entry)(pud_t *pud, unsigned long addr,
 			 unsigned long next, struct mm_walk *walk);
 	int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
@@ -42,13 +39,28 @@ struct mm_walk {
 			     struct mm_walk *walk);
 	int (*test_walk)(unsigned long addr, unsigned long next,
 			struct mm_walk *walk);
+};
+
+/**
+ * mm_walk - walk_page_range data
+ * @ops:	operation to call during the walk
+ * @mm:		mm_struct representing the target process of page table walk
+ * @vma:	vma currently walked (NULL if walking outside vmas)
+ * @private:	private data for callbacks' usage
+ *
+ * (see the comment on walk_page_range() for more details)
+ */
+struct mm_walk {
+	const struct mm_walk_ops *ops;
 	struct mm_struct *mm;
 	struct vm_area_struct *vma;
 	void *private;
 };
 
-int walk_page_range(unsigned long addr, unsigned long end,
-		struct mm_walk *walk);
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
+int walk_page_range(struct mm_struct *mm, unsigned long start,
+		unsigned long end, const struct mm_walk_ops *ops,
+		void *private);
+int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
+		void *private);
 
 #endif /* _LINUX_PAGEWALK_H */
diff --git a/mm/hmm.c b/mm/hmm.c
index 26916ff6c8df..902f5fa6bf93 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -852,6 +852,13 @@ void hmm_range_unregister(struct hmm_range *range)
 }
 EXPORT_SYMBOL(hmm_range_unregister);
 
+static const struct mm_walk_ops hmm_walk_ops = {
+	.pud_entry	= hmm_vma_walk_pud,
+	.pmd_entry	= hmm_vma_walk_pmd,
+	.pte_hole	= hmm_vma_walk_hole,
+	.hugetlb_entry	= hmm_vma_walk_hugetlb_entry,
+};
+
 /**
  * hmm_range_fault - try to fault some address in a virtual address range
  * @range:	range being faulted
@@ -887,7 +894,6 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 	struct hmm_vma_walk hmm_vma_walk;
 	struct hmm *hmm = range->hmm;
 	struct vm_area_struct *vma;
-	struct mm_walk mm_walk;
 	int ret;
 
 	lockdep_assert_held(&hmm->mmu_notifier.mm->mmap_sem);
@@ -916,21 +922,14 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 		hmm_vma_walk.last = start;
 		hmm_vma_walk.flags = flags;
 		hmm_vma_walk.range = range;
-		mm_walk.private = &hmm_vma_walk;
 		end = min(range->end, vma->vm_end);
 
-		mm_walk.vma = vma;
-		mm_walk.mm = vma->vm_mm;
-		mm_walk.pte_entry = NULL;
-		mm_walk.test_walk = NULL;
-		mm_walk.hugetlb_entry = NULL;
-		mm_walk.pud_entry = hmm_vma_walk_pud;
-		mm_walk.pmd_entry = hmm_vma_walk_pmd;
-		mm_walk.pte_hole = hmm_vma_walk_hole;
-		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
+		walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
+				&hmm_vma_walk);
 
 		do {
-			ret = walk_page_range(start, end, &mm_walk);
+			ret = walk_page_range(vma->vm_mm, start, end,
+					&hmm_walk_ops, &hmm_vma_walk);
 			start = hmm_vma_walk.last;
 
 			/* Keep trying while the range is valid. */
diff --git a/mm/madvise.c b/mm/madvise.c
index 80a78bb16782..afe2b015ea58 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -226,19 +226,9 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 	return 0;
 }
 
-static void force_swapin_readahead(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end)
-{
-	struct mm_walk walk = {
-		.mm = vma->vm_mm,
-		.pmd_entry = swapin_walk_pmd_entry,
-		.private = vma,
-	};
-
-	walk_page_range(start, end, &walk);
-
-	lru_add_drain();	/* Push any new pages onto the LRU now */
-}
+static const struct mm_walk_ops swapin_walk_ops = {
+	.pmd_entry		= swapin_walk_pmd_entry,
+};
 
 static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end,
@@ -280,7 +270,8 @@ static long madvise_willneed(struct vm_area_struct *vma,
 	*prev = vma;
 #ifdef CONFIG_SWAP
 	if (!file) {
-		force_swapin_readahead(vma, start, end);
+		walk_page_range(vma->vm_mm, start, end, &swapin_walk_ops, vma);
+		lru_add_drain(); /* Push any new pages onto the LRU now */
 		return 0;
 	}
 
@@ -441,20 +432,9 @@ next:
 	return 0;
 }
 
-static void madvise_free_page_range(struct mmu_gather *tlb,
-			     struct vm_area_struct *vma,
-			     unsigned long addr, unsigned long end)
-{
-	struct mm_walk free_walk = {
-		.pmd_entry = madvise_free_pte_range,
-		.mm = vma->vm_mm,
-		.private = tlb,
-	};
-
-	tlb_start_vma(tlb, vma);
-	walk_page_range(addr, end, &free_walk);
-	tlb_end_vma(tlb, vma);
-}
+static const struct mm_walk_ops madvise_free_walk_ops = {
+	.pmd_entry		= madvise_free_pte_range,
+};
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
 			unsigned long start_addr, unsigned long end_addr)
@@ -481,7 +461,10 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 	update_hiwater_rss(mm);
 
 	mmu_notifier_invalidate_range_start(&range);
-	madvise_free_page_range(&tlb, vma, range.start, range.end);
+	tlb_start_vma(&tlb, vma);
+	walk_page_range(vma->vm_mm, range.start, range.end,
+			&madvise_free_walk_ops, &tlb);
+	tlb_end_vma(&tlb, vma);
 	mmu_notifier_invalidate_range_end(&range);
 	tlb_finish_mmu(&tlb, range.start, range.end);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4c3af5d71ab1..9b2516a76be2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5283,17 +5283,16 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 	return 0;
 }
 
+static const struct mm_walk_ops precharge_walk_ops = {
+	.pmd_entry	= mem_cgroup_count_precharge_pte_range,
+};
+
 static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
 {
 	unsigned long precharge;
 
-	struct mm_walk mem_cgroup_count_precharge_walk = {
-		.pmd_entry = mem_cgroup_count_precharge_pte_range,
-		.mm = mm,
-	};
 	down_read(&mm->mmap_sem);
-	walk_page_range(0, mm->highest_vm_end,
-			&mem_cgroup_count_precharge_walk);
+	walk_page_range(mm, 0, mm->highest_vm_end, &precharge_walk_ops, NULL);
 	up_read(&mm->mmap_sem);
 
 	precharge = mc.precharge;
@@ -5562,13 +5561,12 @@ put:			/* get_mctgt_type() gets the page */
 	return ret;
 }
 
+static const struct mm_walk_ops charge_walk_ops = {
+	.pmd_entry	= mem_cgroup_move_charge_pte_range,
+};
+
 static void mem_cgroup_move_charge(void)
 {
-	struct mm_walk mem_cgroup_move_charge_walk = {
-		.pmd_entry = mem_cgroup_move_charge_pte_range,
-		.mm = mc.mm,
-	};
-
 	lru_add_drain_all();
 	/*
 	 * Signal lock_page_memcg() to take the memcg's move_lock
@@ -5594,7 +5592,8 @@ retry:
 	 * When we have consumed all precharges and failed in doing
 	 * additional charge, the page walk just aborts.
 	 */
-	walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk);
+	walk_page_range(mc.mm, 0, mc.mm->highest_vm_end, &charge_walk_ops,
+			NULL);
 
 	up_read(&mc.mm->mmap_sem);
 	atomic_dec(&mc.from->moving_account);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 3a96def1e796..f000771558d8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -655,6 +655,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
 	return 1;
 }
 
+static const struct mm_walk_ops queue_pages_walk_ops = {
+	.hugetlb_entry		= queue_pages_hugetlb,
+	.pmd_entry		= queue_pages_pte_range,
+	.test_walk		= queue_pages_test_walk,
+};
+
 /*
  * Walk through page tables and collect pages to be migrated.
  *
@@ -679,15 +685,8 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
 		.nmask = nodes,
 		.prev = NULL,
 	};
-	struct mm_walk queue_pages_walk = {
-		.hugetlb_entry = queue_pages_hugetlb,
-		.pmd_entry = queue_pages_pte_range,
-		.test_walk = queue_pages_test_walk,
-		.mm = mm,
-		.private = &qp,
-	};
 
-	return walk_page_range(start, end, &queue_pages_walk);
+	return walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
 }
 
 /*
diff --git a/mm/migrate.c b/mm/migrate.c
index c9c73a35aca7..9f4ed4e985c1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2320,6 +2320,11 @@ next:
 	return 0;
 }
 
+static const struct mm_walk_ops migrate_vma_walk_ops = {
+	.pmd_entry		= migrate_vma_collect_pmd,
+	.pte_hole		= migrate_vma_collect_hole,
+};
+
 /*
  * migrate_vma_collect() - collect pages over a range of virtual addresses
  * @migrate: migrate struct containing all migration information
@@ -2331,21 +2336,15 @@ next:
 static void migrate_vma_collect(struct migrate_vma *migrate)
 {
 	struct mmu_notifier_range range;
-	struct mm_walk mm_walk = {
-		.pmd_entry = migrate_vma_collect_pmd,
-		.pte_hole = migrate_vma_collect_hole,
-		.vma = migrate->vma,
-		.mm = migrate->vma->vm_mm,
-		.private = migrate,
-	};
 
-	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm_walk.mm,
-				migrate->start,
-				migrate->end);
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL,
+			migrate->vma->vm_mm, migrate->start, migrate->end);
 	mmu_notifier_invalidate_range_start(&range);
-	walk_page_range(migrate->start, migrate->end, &mm_walk);
-	mmu_notifier_invalidate_range_end(&range);
 
+	walk_page_range(migrate->vma->vm_mm, migrate->start, migrate->end,
+			&migrate_vma_walk_ops, migrate);
+
+	mmu_notifier_invalidate_range_end(&range);
 	migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
 }
 
diff --git a/mm/mincore.c b/mm/mincore.c
index 3b051b6ab3fe..f9a9dbe8cd33 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -193,6 +193,12 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
 		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
 }
 
+static const struct mm_walk_ops mincore_walk_ops = {
+	.pmd_entry		= mincore_pte_range,
+	.pte_hole		= mincore_unmapped_range,
+	.hugetlb_entry		= mincore_hugetlb,
+};
+
 /*
  * Do a chunk of "sys_mincore()". We've already checked
  * all the arguments, we hold the mmap semaphore: we should
@@ -203,12 +209,6 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
 	struct vm_area_struct *vma;
 	unsigned long end;
 	int err;
-	struct mm_walk mincore_walk = {
-		.pmd_entry = mincore_pte_range,
-		.pte_hole = mincore_unmapped_range,
-		.hugetlb_entry = mincore_hugetlb,
-		.private = vec,
-	};
 
 	vma = find_vma(current->mm, addr);
 	if (!vma || addr < vma->vm_start)
@@ -219,8 +219,7 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
 		memset(vec, 1, pages);
 		return pages;
 	}
-	mincore_walk.mm = vma->vm_mm;
-	err = walk_page_range(addr, end, &mincore_walk);
+	err = walk_page_range(vma->vm_mm, addr, end, &mincore_walk_ops, vec);
 	if (err < 0)
 		return err;
 	return (end - addr) >> PAGE_SHIFT;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index cc73318dbc25..675e5d34a507 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -329,20 +329,11 @@ static int prot_none_test(unsigned long addr, unsigned long next,
 	return 0;
 }
 
-static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
-			   unsigned long end, unsigned long newflags)
-{
-	pgprot_t new_pgprot = vm_get_page_prot(newflags);
-	struct mm_walk prot_none_walk = {
-		.pte_entry = prot_none_pte_entry,
-		.hugetlb_entry = prot_none_hugetlb_entry,
-		.test_walk = prot_none_test,
-		.mm = current->mm,
-		.private = &new_pgprot,
-	};
-
-	return walk_page_range(start, end, &prot_none_walk);
-}
+static const struct mm_walk_ops prot_none_walk_ops = {
+	.pte_entry		= prot_none_pte_entry,
+	.hugetlb_entry		= prot_none_hugetlb_entry,
+	.test_walk		= prot_none_test,
+};
 
 int
 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
@@ -369,7 +360,10 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 	if (arch_has_pfn_modify_check() &&
 	    (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
 	    (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
-		error = prot_none_walk(vma, start, end, newflags);
+		pgprot_t new_pgprot = vm_get_page_prot(newflags);
+
+		error = walk_page_range(current->mm, start, end,
+				&prot_none_walk_ops, &new_pgprot);
 		if (error)
 			return error;
 	}
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 8a92a961a2ee..b8762b673a3d 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -9,10 +9,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 {
 	pte_t *pte;
 	int err = 0;
+	const struct mm_walk_ops *ops = walk->ops;
 
 	pte = pte_offset_map(pmd, addr);
 	for (;;) {
-		err = walk->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
+		err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
 		if (err)
 		       break;
 		addr += PAGE_SIZE;
@@ -30,6 +31,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 {
 	pmd_t *pmd;
 	unsigned long next;
+	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
 
 	pmd = pmd_offset(pud, addr);
@@ -37,8 +39,8 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
 again:
 		next = pmd_addr_end(addr, end);
 		if (pmd_none(*pmd) || !walk->vma) {
-			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+			if (ops->pte_hole)
+				err = ops->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
@@ -47,8 +49,8 @@ again:
 		 * This implies that each ->pmd_entry() handler
 		 * needs to know about pmd_trans_huge() pmds
 		 */
-		if (walk->pmd_entry)
-			err = walk->pmd_entry(pmd, addr, next, walk);
+		if (ops->pmd_entry)
+			err = ops->pmd_entry(pmd, addr, next, walk);
 		if (err)
 			break;
 
@@ -56,7 +58,7 @@ again:
 		 * Check this here so we only break down trans_huge
 		 * pages when we _need_ to
 		 */
-		if (!walk->pte_entry)
+		if (!ops->pte_entry)
 			continue;
 
 		split_huge_pmd(walk->vma, pmd, addr);
@@ -75,6 +77,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 {
 	pud_t *pud;
 	unsigned long next;
+	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
 
 	pud = pud_offset(p4d, addr);
@@ -82,18 +85,18 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
  again:
 		next = pud_addr_end(addr, end);
 		if (pud_none(*pud) || !walk->vma) {
-			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+			if (ops->pte_hole)
+				err = ops->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
 
-		if (walk->pud_entry) {
+		if (ops->pud_entry) {
 			spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma);
 
 			if (ptl) {
-				err = walk->pud_entry(pud, addr, next, walk);
+				err = ops->pud_entry(pud, addr, next, walk);
 				spin_unlock(ptl);
 				if (err)
 					break;
@@ -105,7 +108,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
 		if (pud_none(*pud))
 			goto again;
 
-		if (walk->pmd_entry || walk->pte_entry)
+		if (ops->pmd_entry || ops->pte_entry)
 			err = walk_pmd_range(pud, addr, next, walk);
 		if (err)
 			break;
@@ -119,19 +122,20 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
 {
 	p4d_t *p4d;
 	unsigned long next;
+	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
 
 	p4d = p4d_offset(pgd, addr);
 	do {
 		next = p4d_addr_end(addr, end);
 		if (p4d_none_or_clear_bad(p4d)) {
-			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+			if (ops->pte_hole)
+				err = ops->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
-		if (walk->pmd_entry || walk->pte_entry)
+		if (ops->pmd_entry || ops->pte_entry)
 			err = walk_pud_range(p4d, addr, next, walk);
 		if (err)
 			break;
@@ -145,19 +149,20 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 {
 	pgd_t *pgd;
 	unsigned long next;
+	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
 
 	pgd = pgd_offset(walk->mm, addr);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
-			if (walk->pte_hole)
-				err = walk->pte_hole(addr, next, walk);
+			if (ops->pte_hole)
+				err = ops->pte_hole(addr, next, walk);
 			if (err)
 				break;
 			continue;
 		}
-		if (walk->pmd_entry || walk->pte_entry)
+		if (ops->pmd_entry || ops->pte_entry)
 			err = walk_p4d_range(pgd, addr, next, walk);
 		if (err)
 			break;
@@ -183,6 +188,7 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 	unsigned long hmask = huge_page_mask(h);
 	unsigned long sz = huge_page_size(h);
 	pte_t *pte;
+	const struct mm_walk_ops *ops = walk->ops;
 	int err = 0;
 
 	do {
@@ -190,9 +196,9 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
 		pte = huge_pte_offset(walk->mm, addr & hmask, sz);
 
 		if (pte)
-			err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
-		else if (walk->pte_hole)
-			err = walk->pte_hole(addr, next, walk);
+			err = ops->hugetlb_entry(pte, hmask, addr, next, walk);
+		else if (ops->pte_hole)
+			err = ops->pte_hole(addr, next, walk);
 
 		if (err)
 			break;
@@ -220,9 +226,10 @@ static int walk_page_test(unsigned long start, unsigned long end,
 			struct mm_walk *walk)
 {
 	struct vm_area_struct *vma = walk->vma;
+	const struct mm_walk_ops *ops = walk->ops;
 
-	if (walk->test_walk)
-		return walk->test_walk(start, end, walk);
+	if (ops->test_walk)
+		return ops->test_walk(start, end, walk);
 
 	/*
 	 * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP
@@ -234,8 +241,8 @@ static int walk_page_test(unsigned long start, unsigned long end,
 	 */
 	if (vma->vm_flags & VM_PFNMAP) {
 		int err = 1;
-		if (walk->pte_hole)
-			err = walk->pte_hole(start, end, walk);
+		if (ops->pte_hole)
+			err = ops->pte_hole(start, end, walk);
 		return err ? err : 1;
 	}
 	return 0;
@@ -248,7 +255,7 @@ static int __walk_page_range(unsigned long start, unsigned long end,
 	struct vm_area_struct *vma = walk->vma;
 
 	if (vma && is_vm_hugetlb_page(vma)) {
-		if (walk->hugetlb_entry)
+		if (walk->ops->hugetlb_entry)
 			err = walk_hugetlb_range(start, end, walk);
 	} else
 		err = walk_pgd_range(start, end, walk);
@@ -258,11 +265,13 @@ static int __walk_page_range(unsigned long start, unsigned long end,
 
 /**
  * walk_page_range - walk page table with caller specific callbacks
- * @start: start address of the virtual address range
- * @end: end address of the virtual address range
- * @walk: mm_walk structure defining the callbacks and the target address space
+ * @mm:		mm_struct representing the target process of page table walk
+ * @start:	start address of the virtual address range
+ * @end:	end address of the virtual address range
+ * @ops:	operation to call during the walk
+ * @private:	private data for callbacks' usage
  *
- * Recursively walk the page table tree of the process represented by @walk->mm
+ * Recursively walk the page table tree of the process represented by @mm
  * within the virtual address range [@start, @end). During walking, we can do
  * some caller-specific works for each entry, by setting up pmd_entry(),
  * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these
@@ -278,47 +287,52 @@ static int __walk_page_range(unsigned long start, unsigned long end,
  *
  * Before starting to walk page table, some callers want to check whether
  * they really want to walk over the current vma, typically by checking
- * its vm_flags. walk_page_test() and @walk->test_walk() are used for this
+ * its vm_flags. walk_page_test() and @ops->test_walk() are used for this
  * purpose.
  *
  * struct mm_walk keeps current values of some common data like vma and pmd,
  * which are useful for the access from callbacks. If you want to pass some
- * caller-specific data to callbacks, @walk->private should be helpful.
+ * caller-specific data to callbacks, @private should be helpful.
  *
  * Locking:
- *   Callers of walk_page_range() and walk_page_vma() should hold
- *   @walk->mm->mmap_sem, because these function traverse vma list and/or
- *   access to vma's data.
+ *   Callers of walk_page_range() and walk_page_vma() should hold @mm->mmap_sem,
+ *   because these function traverse vma list and/or access to vma's data.
  */
-int walk_page_range(unsigned long start, unsigned long end,
-		    struct mm_walk *walk)
+int walk_page_range(struct mm_struct *mm, unsigned long start,
+		unsigned long end, const struct mm_walk_ops *ops,
+		void *private)
 {
 	int err = 0;
 	unsigned long next;
 	struct vm_area_struct *vma;
+	struct mm_walk walk = {
+		.ops		= ops,
+		.mm		= mm,
+		.private	= private,
+	};
 
 	if (start >= end)
 		return -EINVAL;
 
-	if (!walk->mm)
+	if (!walk.mm)
 		return -EINVAL;
 
-	VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm);
+	VM_BUG_ON_MM(!rwsem_is_locked(&walk.mm->mmap_sem), walk.mm);
 
-	vma = find_vma(walk->mm, start);
+	vma = find_vma(walk.mm, start);
 	do {
 		if (!vma) { /* after the last vma */
-			walk->vma = NULL;
+			walk.vma = NULL;
 			next = end;
 		} else if (start < vma->vm_start) { /* outside vma */
-			walk->vma = NULL;
+			walk.vma = NULL;
 			next = min(end, vma->vm_start);
 		} else { /* inside vma */
-			walk->vma = vma;
+			walk.vma = vma;
 			next = min(end, vma->vm_end);
 			vma = vma->vm_next;
 
-			err = walk_page_test(start, next, walk);
+			err = walk_page_test(start, next, &walk);
 			if (err > 0) {
 				/*
 				 * positive return values are purely for
@@ -331,28 +345,34 @@ int walk_page_range(unsigned long start, unsigned long end,
 			if (err < 0)
 				break;
 		}
-		if (walk->vma || walk->pte_hole)
-			err = __walk_page_range(start, next, walk);
+		if (walk.vma || walk.ops->pte_hole)
+			err = __walk_page_range(start, next, &walk);
 		if (err)
 			break;
 	} while (start = next, start < end);
 	return err;
 }
 
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk)
+int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
+		void *private)
 {
+	struct mm_walk walk = {
+		.ops		= ops,
+		.mm		= vma->vm_mm,
+		.vma		= vma,
+		.private	= private,
+	};
 	int err;
 
-	if (!walk->mm)
+	if (!walk.mm)
 		return -EINVAL;
 
-	VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem));
-	VM_BUG_ON(!vma);
-	walk->vma = vma;
-	err = walk_page_test(vma->vm_start, vma->vm_end, walk);
+	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+
+	err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
 	if (err > 0)
 		return 0;
 	if (err < 0)
 		return err;
-	return __walk_page_range(vma->vm_start, vma->vm_end, walk);
+	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
-- 
cgit v1.2.3


From 312364f3534cc974b79a96d062bde2386315201f Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Mon, 26 Aug 2019 22:14:23 +0200
Subject: kernel.h: Add non_block_start/end()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some special cases we must not block, but there's not a spinlock,
preempt-off, irqs-off or similar critical section already that arms the
might_sleep() debug checks. Add a non_block_start/end() pair to annotate
these.

This will be used in the oom paths of mmu-notifiers, where blocking is not
allowed to make sure there's forward progress. Quoting Michal:

"The notifier is called from quite a restricted context - oom_reaper -
which shouldn't depend on any locks or sleepable conditionals. The code
should be swift as well but we mostly do care about it to make a forward
progress. Checking for sleepable context is the best thing we could come
up with that would describe these demands at least partially."

Peter also asked whether we want to catch spinlocks on top, but Michal
said those are less of a problem because spinlocks can't have an indirect
dependency upon the page allocator and hence close the loop with the oom
reaper.

Suggested by Michal Hocko.

Link: https://lore.kernel.org/r/20190826201425.17547-4-daniel.vetter@ffwll.ch
Acked-by: Christian König <christian.koenig@amd.com> (v1)
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 include/linux/kernel.h | 23 ++++++++++++++++++++++-
 include/linux/sched.h  |  4 ++++
 kernel/sched/core.c    | 19 ++++++++++++++-----
 3 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4fa360a13c1e..d83d403dac2e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -217,7 +217,9 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
  * might_sleep - annotation for functions that can sleep
  *
  * this macro will print a stack trace if it is executed in an atomic
- * context (spinlock, irq-handler, ...).
+ * context (spinlock, irq-handler, ...). Additional sections where blocking is
+ * not allowed can be annotated with non_block_start() and non_block_end()
+ * pairs.
  *
  * This is a useful debugging help to be able to catch problems early and not
  * be bitten later when the calling function happens to sleep when it is not
@@ -233,6 +235,23 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
 # define cant_sleep() \
 	do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
 # define sched_annotate_sleep()	(current->task_state_change = 0)
+/**
+ * non_block_start - annotate the start of section where sleeping is prohibited
+ *
+ * This is on behalf of the oom reaper, specifically when it is calling the mmu
+ * notifiers. The problem is that if the notifier were to block on, for example,
+ * mutex_lock() and if the process which holds that mutex were to perform a
+ * sleeping memory allocation, the oom reaper is now blocked on completion of
+ * that memory allocation. Other blocking calls like wait_event() pose similar
+ * issues.
+ */
+# define non_block_start() (current->non_block_count++)
+/**
+ * non_block_end - annotate the end of section where sleeping is prohibited
+ *
+ * Closes a section opened by non_block_start().
+ */
+# define non_block_end() WARN_ON(current->non_block_count-- == 0)
 #else
   static inline void ___might_sleep(const char *file, int line,
 				   int preempt_offset) { }
@@ -241,6 +260,8 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
 # define sched_annotate_sleep() do { } while (0)
+# define non_block_start() do { } while (0)
+# define non_block_end() do { } while (0)
 #endif
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f51932bd543..c5630f3dca1f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -974,6 +974,10 @@ struct task_struct {
 	struct mutex_waiter		*blocked_on;
 #endif
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+	int				non_block_count;
+#endif
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	unsigned int			irq_events;
 	unsigned long			hardirq_enable_ip;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2b037f195473..57245770d6cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3700,13 +3700,22 @@ static noinline void __schedule_bug(struct task_struct *prev)
 /*
  * Various schedule()-time debugging checks and statistics:
  */
-static inline void schedule_debug(struct task_struct *prev)
+static inline void schedule_debug(struct task_struct *prev, bool preempt)
 {
 #ifdef CONFIG_SCHED_STACK_END_CHECK
 	if (task_stack_end_corrupted(prev))
 		panic("corrupted stack end detected inside scheduler\n");
 #endif
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+	if (!preempt && prev->state && prev->non_block_count) {
+		printk(KERN_ERR "BUG: scheduling in a non-blocking section: %s/%d/%i\n",
+			prev->comm, prev->pid, prev->non_block_count);
+		dump_stack();
+		add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+	}
+#endif
+
 	if (unlikely(in_atomic_preempt_off())) {
 		__schedule_bug(prev);
 		preempt_count_set(PREEMPT_DISABLED);
@@ -3813,7 +3822,7 @@ static void __sched notrace __schedule(bool preempt)
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
 
-	schedule_debug(prev);
+	schedule_debug(prev, preempt);
 
 	if (sched_feat(HRTICK))
 		hrtick_clear(rq);
@@ -6570,7 +6579,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 	rcu_sleep_check();
 
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
-	     !is_idle_task(current)) ||
+	     !is_idle_task(current) && !current->non_block_count) ||
 	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
 	    oops_in_progress)
 		return;
@@ -6586,8 +6595,8 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 		"BUG: sleeping function called from invalid context at %s:%d\n",
 			file, line);
 	printk(KERN_ERR
-		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
-			in_atomic(), irqs_disabled(),
+		"in_atomic(): %d, irqs_disabled(): %d, non_block: %d, pid: %d, name: %s\n",
+			in_atomic(), irqs_disabled(), current->non_block_count,
 			current->pid, current->comm);
 
 	if (task_stack_end_corrupted(current))
-- 
cgit v1.2.3


From ca78410403dd64ac0ee0e3cc8646b38335271bfd Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 22 Aug 2019 11:55:53 +0300
Subject: PCI: Get rid of dev->has_secondary_link flag

In some systems, the Device/Port Type in the PCI Express Capabilities
register incorrectly identifies upstream ports as downstream ports.

d0751b98dfa3 ("PCI: Add dev->has_secondary_link to track downstream PCIe
links") addressed this by adding pci_dev.has_secondary_link, which is set
for downstream ports.  But this is confusing because pci_pcie_type()
sometimes gives the wrong answer, and it's not obvious that we should use
pci_dev.has_secondary_link instead.

Reduce the confusion by correcting the type of the port itself so that
pci_pcie_type() returns the actual type regardless of what the Device/Port
Type register claims it is.  Update the users to call pci_pcie_type() and
pcie_downstream_port() accordingly, and remove pci_dev.has_secondary_link
completely.

Link: https://lore.kernel.org/linux-pci/20190703133953.GK128603@google.com/
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20190822085553.62697-2-mika.westerberg@linux.intel.com
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/pci/pci.c       |  2 +-
 drivers/pci/pcie/aspm.c |  8 ++++----
 drivers/pci/pcie/err.c  |  2 +-
 drivers/pci/probe.c     | 48 ++++++++++++++++++++++++++++--------------------
 drivers/pci/vc.c        |  4 +++-
 include/linux/pci.h     |  1 -
 6 files changed, 37 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 29ed5ec1ac27..97e7f6e0821e 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3576,7 +3576,7 @@ int pci_enable_atomic_ops_to_root(struct pci_dev *dev, u32 cap_mask)
 		}
 
 		/* Ensure upstream ports don't block AtomicOps on egress */
-		if (!bridge->has_secondary_link) {
+		if (pci_pcie_type(bridge) == PCI_EXP_TYPE_UPSTREAM) {
 			pcie_capability_read_dword(bridge, PCI_EXP_DEVCTL2,
 						   &ctl2);
 			if (ctl2 & PCI_EXP_DEVCTL2_ATOMIC_EGRESS_BLOCK)
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e44af7f4d37f..dcda96818eb6 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -913,10 +913,10 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev)
 
 	/*
 	 * We allocate pcie_link_state for the component on the upstream
-	 * end of a Link, so there's nothing to do unless this device has a
-	 * Link on its secondary side.
+	 * end of a Link, so there's nothing to do unless this device is
+	 * downstream port.
 	 */
-	if (!pdev->has_secondary_link)
+	if (!pcie_downstream_port(pdev))
 		return;
 
 	/* VIA has a strange chipset, root port is under a bridge */
@@ -1070,7 +1070,7 @@ static int __pci_disable_link_state(struct pci_dev *pdev, int state, bool sem)
 	if (!pci_is_pcie(pdev))
 		return 0;
 
-	if (pdev->has_secondary_link)
+	if (pcie_downstream_port(pdev))
 		parent = pdev;
 	if (!parent || !parent->link_state)
 		return -EINVAL;
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index 773197a12568..b0e6048a9208 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -166,7 +166,7 @@ static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service)
 	driver = pcie_port_find_service(dev, service);
 	if (driver && driver->reset_link) {
 		status = driver->reset_link(dev);
-	} else if (dev->has_secondary_link) {
+	} else if (pcie_downstream_port(dev)) {
 		status = default_reset_link(dev);
 	} else {
 		pci_printk(KERN_DEBUG, dev, "no link-reset support at upstream device %s\n",
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index cdf34a3c531a..3bfa57d58402 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1431,26 +1431,38 @@ void set_pcie_port_type(struct pci_dev *pdev)
 	pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, &reg16);
 	pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD;
 
+	parent = pci_upstream_bridge(pdev);
+	if (!parent)
+		return;
+
 	/*
-	 * A Root Port or a PCI-to-PCIe bridge is always the upstream end
-	 * of a Link.  No PCIe component has two Links.  Two Links are
-	 * connected by a Switch that has a Port on each Link and internal
-	 * logic to connect the two Ports.
+	 * Some systems do not identify their upstream/downstream ports
+	 * correctly so detect impossible configurations here and correct
+	 * the port type accordingly.
 	 */
 	type = pci_pcie_type(pdev);
-	if (type == PCI_EXP_TYPE_ROOT_PORT ||
-	    type == PCI_EXP_TYPE_PCIE_BRIDGE)
-		pdev->has_secondary_link = 1;
-	else if (type == PCI_EXP_TYPE_UPSTREAM ||
-		 type == PCI_EXP_TYPE_DOWNSTREAM) {
-		parent = pci_upstream_bridge(pdev);
-
+	if (type == PCI_EXP_TYPE_DOWNSTREAM) {
 		/*
-		 * Usually there's an upstream device (Root Port or Switch
-		 * Downstream Port), but we can't assume one exists.
+		 * If pdev claims to be downstream port but the parent
+		 * device is also downstream port assume pdev is actually
+		 * upstream port.
 		 */
-		if (parent && !parent->has_secondary_link)
-			pdev->has_secondary_link = 1;
+		if (pcie_downstream_port(parent)) {
+			pci_info(pdev, "claims to be downstream port but is acting as upstream port, correcting type\n");
+			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+			pdev->pcie_flags_reg |= PCI_EXP_TYPE_UPSTREAM;
+		}
+	} else if (type == PCI_EXP_TYPE_UPSTREAM) {
+		/*
+		 * If pdev claims to be upstream port but the parent
+		 * device is also upstream port assume pdev is actually
+		 * downstream port.
+		 */
+		if (pci_pcie_type(parent) == PCI_EXP_TYPE_UPSTREAM) {
+			pci_info(pdev, "claims to be upstream port but is acting as downstream port, correcting type\n");
+			pdev->pcie_flags_reg &= ~PCI_EXP_FLAGS_TYPE;
+			pdev->pcie_flags_reg |= PCI_EXP_TYPE_DOWNSTREAM;
+		}
 	}
 }
 
@@ -2488,12 +2500,8 @@ static int only_one_child(struct pci_bus *bus)
 	 * A PCIe Downstream Port normally leads to a Link with only Device
 	 * 0 on it (PCIe spec r3.1, sec 7.3.1).  As an optimization, scan
 	 * only for Device 0 in that situation.
-	 *
-	 * Checking has_secondary_link is a hack to identify Downstream
-	 * Ports because sometimes Switches are configured such that the
-	 * PCIe Port Type labels are backwards.
 	 */
-	if (bridge && pci_is_pcie(bridge) && bridge->has_secondary_link)
+	if (bridge && pci_is_pcie(bridge) && pcie_downstream_port(bridge))
 		return 1;
 
 	return 0;
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index 5acd9c02683a..9ae9fb9339e8 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -13,6 +13,8 @@
 #include <linux/pci_regs.h>
 #include <linux/types.h>
 
+#include "pci.h"
+
 /**
  * pci_vc_save_restore_dwords - Save or restore a series of dwords
  * @dev: device
@@ -105,7 +107,7 @@ static void pci_vc_enable(struct pci_dev *dev, int pos, int res)
 	struct pci_dev *link = NULL;
 
 	/* Enable VCs from the downstream device */
-	if (!dev->has_secondary_link)
+	if (!pci_is_pcie(dev) || !pcie_downstream_port(dev))
 		return;
 
 	ctrl_pos = pos + PCI_VC_RES_CTRL + (res * PCI_CAP_VC_PER_VC_SIZEOF);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..0153ede227e5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -418,7 +418,6 @@ struct pci_dev {
 	unsigned int	broken_intx_masking:1;	/* INTx masking can't be used */
 	unsigned int	io_window_1k:1;		/* Intel bridge 1K I/O windows */
 	unsigned int	irq_managed:1;
-	unsigned int	has_secondary_link:1;
 	unsigned int	non_compliant_bars:1;	/* Broken BARs; ignore them */
 	unsigned int	is_probed:1;		/* Device probing in progress */
 	unsigned int	link_active_reporting:1;/* Device capable of reporting link active */
-- 
cgit v1.2.3


From 3dd97a08271f031bd54c61e65b6b0ebfb0c404b7 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 5 Sep 2019 20:06:56 +0200
Subject: net: fib_notifier: move fib_notifier_ops from struct net into per-net
 struct

No need for fib_notifier_ops to be in struct net. It is used only by
fib_notifier as a private data. Use net_generic to introduce per-net
fib_notifier struct and move fib_notifier_ops there.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  3 ---
 net/core/fib_notifier.c     | 29 +++++++++++++++++++++++------
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ab40d7afdc54..64bcb589a610 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -103,9 +103,6 @@ struct net {
 	/* core fib_rules */
 	struct list_head	rules_ops;
 
-	struct list_head	fib_notifier_ops;  /* Populated by
-						    * register_pernet_subsys()
-						    */
 	struct net_device       *loopback_dev;          /* The loopback */
 	struct netns_core	core;
 	struct netns_mib	mib;
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 13a40b831d6d..470a606d5e8d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -5,8 +5,15 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/fib_notifier.h>
 
+static unsigned int fib_notifier_net_id;
+
+struct fib_notifier_net {
+	struct list_head fib_notifier_ops;
+};
+
 static ATOMIC_NOTIFIER_HEAD(fib_chain);
 
 int call_fib_notifier(struct notifier_block *nb, struct net *net,
@@ -34,6 +41,7 @@ EXPORT_SYMBOL(call_fib_notifiers);
 
 static unsigned int fib_seq_sum(void)
 {
+	struct fib_notifier_net *fn_net;
 	struct fib_notifier_ops *ops;
 	unsigned int fib_seq = 0;
 	struct net *net;
@@ -41,8 +49,9 @@ static unsigned int fib_seq_sum(void)
 	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net) {
+		fn_net = net_generic(net, fib_notifier_net_id);
 		rcu_read_lock();
-		list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+		list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
 			if (!try_module_get(ops->owner))
 				continue;
 			fib_seq += ops->fib_seq_read(net);
@@ -58,9 +67,10 @@ static unsigned int fib_seq_sum(void)
 
 static int fib_net_dump(struct net *net, struct notifier_block *nb)
 {
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
 	struct fib_notifier_ops *ops;
 
-	list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
+	list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) {
 		int err;
 
 		if (!try_module_get(ops->owner))
@@ -127,12 +137,13 @@ EXPORT_SYMBOL(unregister_fib_notifier);
 static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
 				       struct net *net)
 {
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
 	struct fib_notifier_ops *o;
 
-	list_for_each_entry(o, &net->fib_notifier_ops, list)
+	list_for_each_entry(o, &fn_net->fib_notifier_ops, list)
 		if (ops->family == o->family)
 			return -EEXIST;
-	list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
+	list_add_tail_rcu(&ops->list, &fn_net->fib_notifier_ops);
 	return 0;
 }
 
@@ -167,18 +178,24 @@ EXPORT_SYMBOL(fib_notifier_ops_unregister);
 
 static int __net_init fib_notifier_net_init(struct net *net)
 {
-	INIT_LIST_HEAD(&net->fib_notifier_ops);
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+	INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
 	return 0;
 }
 
 static void __net_exit fib_notifier_net_exit(struct net *net)
 {
-	WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
+	struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);
+
+	WARN_ON_ONCE(!list_empty(&fn_net->fib_notifier_ops));
 }
 
 static struct pernet_operations fib_notifier_net_ops = {
 	.init = fib_notifier_net_init,
 	.exit = fib_notifier_net_exit,
+	.id = &fib_notifier_net_id,
+	.size = sizeof(struct fib_notifier_net),
 };
 
 static int __init fib_notifier_init(void)
-- 
cgit v1.2.3


From fe163e534e5eecdfd7b5920b0dfd24c458ee85d6 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 5 Sep 2019 19:36:37 -0700
Subject: isdn/capi: check message length in capi_write()

syzbot reported:

    BUG: KMSAN: uninit-value in capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700
    CPU: 0 PID: 10025 Comm: syz-executor379 Not tainted 4.20.0-rc7+ #2
    Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
    Call Trace:
      __dump_stack lib/dump_stack.c:77 [inline]
      dump_stack+0x173/0x1d0 lib/dump_stack.c:113
      kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613
      __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313
      capi_write+0x791/0xa90 drivers/isdn/capi/capi.c:700
      do_loop_readv_writev fs/read_write.c:703 [inline]
      do_iter_write+0x83e/0xd80 fs/read_write.c:961
      vfs_writev fs/read_write.c:1004 [inline]
      do_writev+0x397/0x840 fs/read_write.c:1039
      __do_sys_writev fs/read_write.c:1112 [inline]
      __se_sys_writev+0x9b/0xb0 fs/read_write.c:1109
      __x64_sys_writev+0x4a/0x70 fs/read_write.c:1109
      do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291
      entry_SYSCALL_64_after_hwframe+0x63/0xe7
    [...]

The problem is that capi_write() is reading past the end of the message.
Fix it by checking the message's length in the needed places.

Reported-and-tested-by: syzbot+0849c524d9c634f5ae66@syzkaller.appspotmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/capi.c          | 10 +++++++++-
 include/uapi/linux/isdn/capicmd.h |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 3c3ad42f22bf..c92b405b7646 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -688,6 +688,9 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos
 	if (!cdev->ap.applid)
 		return -ENODEV;
 
+	if (count < CAPIMSG_BASELEN)
+		return -EINVAL;
+
 	skb = alloc_skb(count, GFP_USER);
 	if (!skb)
 		return -ENOMEM;
@@ -698,7 +701,8 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos
 	}
 	mlen = CAPIMSG_LEN(skb->data);
 	if (CAPIMSG_CMD(skb->data) == CAPI_DATA_B3_REQ) {
-		if ((size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) {
+		if (count < CAPI_DATA_B3_REQ_LEN ||
+		    (size_t)(mlen + CAPIMSG_DATALEN(skb->data)) != count) {
 			kfree_skb(skb);
 			return -EINVAL;
 		}
@@ -711,6 +715,10 @@ capi_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos
 	CAPIMSG_SETAPPID(skb->data, cdev->ap.applid);
 
 	if (CAPIMSG_CMD(skb->data) == CAPI_DISCONNECT_B3_RESP) {
+		if (count < CAPI_DISCONNECT_B3_RESP_LEN) {
+			kfree_skb(skb);
+			return -EINVAL;
+		}
 		mutex_lock(&cdev->lock);
 		capincci_free(cdev, CAPIMSG_NCCI(skb->data));
 		mutex_unlock(&cdev->lock);
diff --git a/include/uapi/linux/isdn/capicmd.h b/include/uapi/linux/isdn/capicmd.h
index 4941628a4fb9..5ec88e7548a9 100644
--- a/include/uapi/linux/isdn/capicmd.h
+++ b/include/uapi/linux/isdn/capicmd.h
@@ -16,6 +16,7 @@
 #define CAPI_MSG_BASELEN		8
 #define CAPI_DATA_B3_REQ_LEN		(CAPI_MSG_BASELEN+4+4+2+2+2)
 #define CAPI_DATA_B3_RESP_LEN		(CAPI_MSG_BASELEN+4+2)
+#define CAPI_DISCONNECT_B3_RESP_LEN	(CAPI_MSG_BASELEN+4)
 
 /*----- CAPI commands -----*/
 #define CAPI_ALERT		    0x01
-- 
cgit v1.2.3


From fb377eb80c80339b580831a3c0fcce34a4c9d1ad Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 5 Sep 2019 16:48:38 +0200
Subject: ipc: fix sparc64 ipc() wrapper

Matt bisected a sparc64 specific issue with semctl, shmctl and msgctl
to a commit from my y2038 series in linux-5.1, as I missed the custom
sys_ipc() wrapper that sparc64 uses in place of the generic version that
I patched.

The problem is that the sys_{sem,shm,msg}ctl() functions in the kernel
now do not allow being called with the IPC_64 flag any more, resulting
in a -EINVAL error when they don't recognize the command.

Instead, the correct way to do this now is to call the internal
ksys_old_{sem,shm,msg}ctl() functions to select the API version.

As we generally move towards these functions anyway, change all of
sparc_ipc() to consistently use those in place of the sys_*() versions,
and move the required ksys_*() declarations into linux/syscalls.h

The IS_ENABLED(CONFIG_SYSVIPC) check is required to avoid link
errors when ipc is disabled.

Reported-by: Matt Turner <mattst88@gmail.com>
Fixes: 275f22148e87 ("ipc: rename old-style shmctl/semctl/msgctl syscalls")
Cc: stable@vger.kernel.org
Tested-by: Matt Turner <mattst88@gmail.com>
Tested-by: Anatoly Pugachev <matorola@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/sparc/kernel/sys_sparc_64.c | 33 ++++++++++++++++++---------------
 include/linux/syscalls.h         | 19 +++++++++++++++++++
 ipc/util.h                       | 25 ++-----------------------
 3 files changed, 39 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index ccc88926bc00..9f41a6f5a032 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -336,25 +336,28 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
 {
 	long err;
 
+	if (!IS_ENABLED(CONFIG_SYSVIPC))
+		return -ENOSYS;
+
 	/* No need for backward compatibility. We can start fresh... */
 	if (call <= SEMTIMEDOP) {
 		switch (call) {
 		case SEMOP:
-			err = sys_semtimedop(first, ptr,
-					     (unsigned int)second, NULL);
+			err = ksys_semtimedop(first, ptr,
+					      (unsigned int)second, NULL);
 			goto out;
 		case SEMTIMEDOP:
-			err = sys_semtimedop(first, ptr, (unsigned int)second,
+			err = ksys_semtimedop(first, ptr, (unsigned int)second,
 				(const struct __kernel_timespec __user *)
-					     (unsigned long) fifth);
+					      (unsigned long) fifth);
 			goto out;
 		case SEMGET:
-			err = sys_semget(first, (int)second, (int)third);
+			err = ksys_semget(first, (int)second, (int)third);
 			goto out;
 		case SEMCTL: {
-			err = sys_semctl(first, second,
-					 (int)third | IPC_64,
-					 (unsigned long) ptr);
+			err = ksys_old_semctl(first, second,
+					      (int)third | IPC_64,
+					      (unsigned long) ptr);
 			goto out;
 		}
 		default:
@@ -365,18 +368,18 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
 	if (call <= MSGCTL) {
 		switch (call) {
 		case MSGSND:
-			err = sys_msgsnd(first, ptr, (size_t)second,
+			err = ksys_msgsnd(first, ptr, (size_t)second,
 					 (int)third);
 			goto out;
 		case MSGRCV:
-			err = sys_msgrcv(first, ptr, (size_t)second, fifth,
+			err = ksys_msgrcv(first, ptr, (size_t)second, fifth,
 					 (int)third);
 			goto out;
 		case MSGGET:
-			err = sys_msgget((key_t)first, (int)second);
+			err = ksys_msgget((key_t)first, (int)second);
 			goto out;
 		case MSGCTL:
-			err = sys_msgctl(first, (int)second | IPC_64, ptr);
+			err = ksys_old_msgctl(first, (int)second | IPC_64, ptr);
 			goto out;
 		default:
 			err = -ENOSYS;
@@ -396,13 +399,13 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
 			goto out;
 		}
 		case SHMDT:
-			err = sys_shmdt(ptr);
+			err = ksys_shmdt(ptr);
 			goto out;
 		case SHMGET:
-			err = sys_shmget(first, (size_t)second, (int)third);
+			err = ksys_shmget(first, (size_t)second, (int)third);
 			goto out;
 		case SHMCTL:
-			err = sys_shmctl(first, (int)second | IPC_64, ptr);
+			err = ksys_old_shmctl(first, (int)second | IPC_64, ptr);
 			goto out;
 		default:
 			err = -ENOSYS;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 88145da7d140..f7c561c4dcdd 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1402,4 +1402,23 @@ static inline unsigned int ksys_personality(unsigned int personality)
 	return old;
 }
 
+/* for __ARCH_WANT_SYS_IPC */
+long ksys_semtimedop(int semid, struct sembuf __user *tsops,
+		     unsigned int nsops,
+		     const struct __kernel_timespec __user *timeout);
+long ksys_semget(key_t key, int nsems, int semflg);
+long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg);
+long ksys_msgget(key_t key, int msgflg);
+long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
+long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
+		 long msgtyp, int msgflg);
+long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz,
+		 int msgflg);
+long ksys_shmget(key_t key, size_t size, int shmflg);
+long ksys_shmdt(char __user *shmaddr);
+long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
+long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
+			    unsigned int nsops,
+			    const struct old_timespec32 __user *timeout);
+
 #endif
diff --git a/ipc/util.h b/ipc/util.h
index 0fcf8e719b76..5766c61aed0e 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -276,29 +276,7 @@ static inline int compat_ipc_parse_version(int *cmd)
 	*cmd &= ~IPC_64;
 	return version;
 }
-#endif
 
-/* for __ARCH_WANT_SYS_IPC */
-long ksys_semtimedop(int semid, struct sembuf __user *tsops,
-		     unsigned int nsops,
-		     const struct __kernel_timespec __user *timeout);
-long ksys_semget(key_t key, int nsems, int semflg);
-long ksys_old_semctl(int semid, int semnum, int cmd, unsigned long arg);
-long ksys_msgget(key_t key, int msgflg);
-long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf);
-long ksys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
-		 long msgtyp, int msgflg);
-long ksys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz,
-		 int msgflg);
-long ksys_shmget(key_t key, size_t size, int shmflg);
-long ksys_shmdt(char __user *shmaddr);
-long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
-
-/* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */
-long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
-			    unsigned int nsops,
-			    const struct old_timespec32 __user *timeout);
-#ifdef CONFIG_COMPAT
 long compat_ksys_old_semctl(int semid, int semnum, int cmd, int arg);
 long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr);
 long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
@@ -306,6 +284,7 @@ long compat_ksys_msgrcv(int msqid, compat_uptr_t msgp, compat_ssize_t msgsz,
 long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp,
 		       compat_ssize_t msgsz, int msgflg);
 long compat_ksys_old_shmctl(int shmid, int cmd, void __user *uptr);
-#endif /* CONFIG_COMPAT */
+
+#endif
 
 #endif
-- 
cgit v1.2.3


From 789492f0c86505e63369907bcb1afdf52dec9366 Mon Sep 17 00:00:00 2001
From: Tim Blechmann <tim@klingt.org>
Date: Fri, 6 Sep 2019 16:21:19 +0800
Subject: ALSA: lx6464es - add support for LX6464ESe pci express variant

The pci express variant of the digigram lx6464es card has a different
device ID, but works without changes to the driver.
Thanks to Nikolas Slottke for reporting and testing.

Signed-off-by: Tim Blechmann <tim@klingt.org>
Link: https://lore.kernel.org/r/20190906082119.40971-1-tim@klingt.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/pci_ids.h       | 2 ++
 sound/pci/lx6464es/lx6464es.c | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..901c5b2f76de 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1951,6 +1951,8 @@
 #define PCI_VENDOR_ID_DIGIGRAM		0x1369
 #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_SERIAL_SUBSYSTEM	0xc001
 #define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM	0xc002
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ESE_SERIAL_SUBSYSTEM		0xc021
+#define PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ESE_CAE_SERIAL_SUBSYSTEM	0xc022
 
 #define PCI_VENDOR_ID_KAWASAKI		0x136b
 #define PCI_DEVICE_ID_MCHIP_KL5A72002	0xff01
diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c
index 583ca7384d83..fe10714380f2 100644
--- a/sound/pci/lx6464es/lx6464es.c
+++ b/sound/pci/lx6464es/lx6464es.c
@@ -49,6 +49,14 @@ static const struct pci_device_id snd_lx6464es_ids[] = {
 			 PCI_VENDOR_ID_DIGIGRAM,
 			 PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ES_CAE_SERIAL_SUBSYSTEM),
 	},			/* LX6464ES-CAE */
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES,
+			 PCI_VENDOR_ID_DIGIGRAM,
+			 PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ESE_SERIAL_SUBSYSTEM),
+	},			/* LX6464ESe */
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_LX6464ES,
+			 PCI_VENDOR_ID_DIGIGRAM,
+			 PCI_SUBDEVICE_ID_DIGIGRAM_LX6464ESE_CAE_SERIAL_SUBSYSTEM),
+	},			/* LX6464ESe-CAE */
 	{ 0, },
 };
 
-- 
cgit v1.2.3


From bfafddd8de426d894fcf3e062370b1efaa195ebc Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Wed, 28 Aug 2019 15:55:23 -0700
Subject: include/linux/compiler.h: fix Oops for Clang-compiled kernels

GCC unescapes escaped string section names while Clang does not. Because
__section uses the `#` stringification operator for the section name, it
doesn't need to be escaped.

This fixes an Oops observed in distro's that use systemd and not
net.core.bpf_jit_enable=1, when their kernels are compiled with Clang.

Link: https://github.com/ClangBuiltLinux/linux/issues/619
Link: https://bugs.llvm.org/show_bug.cgi?id=42950
Link: https://marc.info/?l=linux-netdev&m=156412960619946&w=2
Link: https://lore.kernel.org/lkml/20190904181740.GA19688@gmail.com/
Acked-by: Will Deacon <will@kernel.org>
Reported-by: Sedat Dilek <sedat.dilek@gmail.com>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
[Cherry-picked from the __section cleanup series for 5.3]
[Adjusted commit message]
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f0fd5636fddb..5e88e7e33abe 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			long ______r;					\
 			static struct ftrace_likely_data		\
 				__aligned(4)				\
-				__section("_ftrace_annotated_branch")	\
+				__section(_ftrace_annotated_branch)	\
 				______f = {				\
 				.data.func = __func__,			\
 				.data.file = __FILE__,			\
@@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define __trace_if_value(cond) ({			\
 	static struct ftrace_branch_data		\
 		__aligned(4)				\
-		__section("_ftrace_branch")		\
+		__section(_ftrace_branch)		\
 		__if_trace = {				\
 			.func = __func__,		\
 			.file = __FILE__,		\
@@ -118,7 +118,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	".popsection\n\t"
 
 /* Annotate a C jump table to allow objtool to follow the code flow */
-#define __annotate_jump_table __section(".rodata..c_jump_table")
+#define __annotate_jump_table __section(.rodata..c_jump_table)
 
 #else
 #define annotate_reachable()
@@ -298,7 +298,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-	static void * __section(".discard.addressable") __used \
+	static void * __section(.discard.addressable) __used \
 		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
-- 
cgit v1.2.3


From fc697dc0c26a5908d467454e49440862d7fe96d0 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@stackframe.org>
Date: Sun, 8 Sep 2019 11:33:04 +0200
Subject: parisc: add kexec syscall support

Signed-off-by: Sven Schnelle <svens@stackframe.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/Kconfig                  |  13 +++
 arch/parisc/include/asm/fixmap.h     |   1 +
 arch/parisc/include/asm/kexec.h      |  37 +++++++++
 arch/parisc/kernel/Makefile          |   1 +
 arch/parisc/kernel/kexec.c           | 105 ++++++++++++++++++++++++
 arch/parisc/kernel/relocate_kernel.S | 149 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/kexec.h           |   1 +
 7 files changed, 307 insertions(+)
 create mode 100644 arch/parisc/include/asm/kexec.h
 create mode 100644 arch/parisc/kernel/kexec.c
 create mode 100644 arch/parisc/kernel/relocate_kernel.S

(limited to 'include')

diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index ee59171edffe..548c767f4358 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -346,6 +346,19 @@ config NR_CPUS
 	depends on SMP
 	default "4"
 
+config KEXEC
+	bool "Kexec system call"
+	select KEXEC_CORE
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  shutdown, so do not be surprised if this code does not
+	  initially work for you.
+
 endmenu
 
 
diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h
index 288da73d4cc0..e480b2c05407 100644
--- a/arch/parisc/include/asm/fixmap.h
+++ b/arch/parisc/include/asm/fixmap.h
@@ -30,6 +30,7 @@
 enum fixed_addresses {
 	/* Support writing RO kernel text via kprobes, jump labels, etc. */
 	FIX_TEXT_POKE0,
+	FIX_TEXT_KEXEC,
 	FIX_BITMAP_COUNT
 };
 
diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h
new file mode 100644
index 000000000000..a99ea747d7ed
--- /dev/null
+++ b/arch/parisc/include/asm/kexec.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PARISC_KEXEC_H
+#define _ASM_PARISC_KEXEC_H
+
+#ifdef CONFIG_KEXEC
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_PARISC
+#define ARCH_HAS_KIMAGE_ARCH
+
+#ifndef __ASSEMBLY__
+
+struct kimage_arch {
+	unsigned long initrd_start;
+	unsigned long initrd_end;
+	unsigned long cmdline;
+};
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+	/* Dummy implementation for now */
+}
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _ASM_PARISC_KEXEC_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index c232266b517c..487cf88866a8 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -37,3 +37,4 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
 obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 obj-$(CONFIG_KGDB)			+= kgdb.o
 obj-$(CONFIG_KPROBES)			+= kprobes.o
+obj-$(CONFIG_KEXEC)			+= kexec.o relocate_kernel.o
diff --git a/arch/parisc/kernel/kexec.c b/arch/parisc/kernel/kexec.c
new file mode 100644
index 000000000000..a92d265a2261
--- /dev/null
+++ b/arch/parisc/kernel/kexec.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <asm/cacheflush.h>
+#include <asm/sections.h>
+
+extern void relocate_new_kernel(unsigned long head,
+				unsigned long start,
+				unsigned long phys);
+
+extern const unsigned int relocate_new_kernel_size;
+extern unsigned int kexec_initrd_start_offset;
+extern unsigned int kexec_initrd_end_offset;
+extern unsigned int kexec_cmdline_offset;
+extern unsigned int kexec_free_mem_offset;
+
+static void kexec_show_segment_info(const struct kimage *kimage,
+				    unsigned long n)
+{
+	pr_debug("    segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
+			n,
+			kimage->segment[n].mem,
+			kimage->segment[n].mem + kimage->segment[n].memsz,
+			(unsigned long)kimage->segment[n].memsz,
+			(unsigned long)kimage->segment[n].memsz /  PAGE_SIZE);
+}
+
+static void kexec_image_info(const struct kimage *kimage)
+{
+	unsigned long i;
+
+	pr_debug("kexec kimage info:\n");
+	pr_debug("  type:        %d\n", kimage->type);
+	pr_debug("  start:       %lx\n", kimage->start);
+	pr_debug("  head:        %lx\n", kimage->head);
+	pr_debug("  nr_segments: %lu\n", kimage->nr_segments);
+
+	for (i = 0; i < kimage->nr_segments; i++)
+		kexec_show_segment_info(kimage, i);
+}
+
+void machine_kexec_cleanup(struct kimage *kimage)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
+void machine_shutdown(void)
+{
+	smp_send_stop();
+	while (num_online_cpus() > 1) {
+		cpu_relax();
+		mdelay(1);
+	}
+}
+
+void machine_kexec(struct kimage *image)
+{
+#ifdef CONFIG_64BIT
+	Elf64_Fdesc desc;
+#endif
+	void (*reloc)(unsigned long head,
+		      unsigned long start,
+		      unsigned long phys);
+
+	unsigned long phys = page_to_phys(image->control_code_page);
+	void *virt = (void *)__fix_to_virt(FIX_TEXT_KEXEC);
+	struct kimage_arch *arch = &image->arch;
+
+	set_fixmap(FIX_TEXT_KEXEC, phys);
+
+	flush_cache_all();
+
+#ifdef CONFIG_64BIT
+	reloc = (void *)&desc;
+	desc.addr = (long long)virt;
+#else
+	reloc = (void *)virt;
+#endif
+
+	memcpy(virt, dereference_function_descriptor(relocate_new_kernel),
+		relocate_new_kernel_size);
+
+	*(unsigned long *)(virt + kexec_cmdline_offset) = arch->cmdline;
+	*(unsigned long *)(virt + kexec_initrd_start_offset) = arch->initrd_start;
+	*(unsigned long *)(virt + kexec_initrd_end_offset) = arch->initrd_end;
+	*(unsigned long *)(virt + kexec_free_mem_offset) = PAGE0->mem_free;
+
+	flush_cache_all();
+	flush_tlb_all();
+	local_irq_disable();
+
+	reloc(image->head & PAGE_MASK, image->start, phys);
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	kexec_image_info(image);
+	return 0;
+}
diff --git a/arch/parisc/kernel/relocate_kernel.S b/arch/parisc/kernel/relocate_kernel.S
new file mode 100644
index 000000000000..2561e52b8d9b
--- /dev/null
+++ b/arch/parisc/kernel/relocate_kernel.S
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <linux/kexec.h>
+
+#include <asm/assembly.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <asm/psw.h>
+
+.level PA_ASM_LEVEL
+
+.macro	kexec_param name
+.align 8
+ENTRY(kexec\()_\name)
+#ifdef CONFIG_64BIT
+	.dword 0
+#else
+	.word 0
+#endif
+
+ENTRY(kexec\()_\name\()_offset)
+	.word kexec\()_\name - relocate_new_kernel
+.endm
+
+.text
+
+/* args:
+ * r26 - kimage->head
+ * r25 - start address of kernel
+ * r24 - physical address of relocate code
+ */
+
+ENTRY_CFI(relocate_new_kernel)
+0:	copy	%arg1, %rp
+	/* disable I and Q bit, so we are allowed to execute RFI */
+	rsm PSW_SM_I, %r0
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	rsm PSW_SM_Q, %r0
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+	nop
+
+	/*
+	 * After return-from-interrupt, we want to run without Code/Data
+	 * translation enabled just like on a normal boot.
+	 */
+
+	/* calculate new physical execution address */
+	ldo	1f-0b(%arg2), %r1
+	mtctl	%r0, %cr17 /* IIASQ */
+	mtctl	%r0, %cr17 /* IIASQ */
+	mtctl	%r1, %cr18 /* IIAOQ */
+	ldo	4(%r1),%r1
+	mtctl	%r1, %cr18 /* IIAOQ */
+#ifdef CONFIG_64BIT
+	depdi,z	1, PSW_W_BIT, 1, %r1
+	mtctl	%r1, %cr22 /* IPSW */
+#else
+	mtctl	%r0, %cr22 /* IPSW */
+#endif
+	/* lets go... */
+	rfi
+1:	nop
+	nop
+
+.Lloop:
+	LDREG,ma	REG_SZ(%arg0), %r3
+	/* If crash kernel, no copy needed */
+	cmpib,COND(=),n 0,%r3,boot
+
+	bb,<,n		%r3, 31 - IND_DONE_BIT, boot
+	bb,>=,n		%r3, 31 - IND_INDIRECTION_BIT, .Lnotind
+	/* indirection, load and restart */
+	movb		%r3, %arg0, .Lloop
+	depi		0, 31, PAGE_SHIFT, %arg0
+
+.Lnotind:
+	bb,>=,n		%r3, 31 - IND_DESTINATION_BIT, .Lnotdest
+	b		.Lloop
+	copy		%r3, %r20
+
+.Lnotdest:
+	bb,>=		%r3, 31 - IND_SOURCE_BIT, .Lloop
+	depi		0, 31, PAGE_SHIFT, %r3
+	copy		%r3, %r21
+
+	/* copy page */
+	copy		%r0, %r18
+	zdepi		1, 31 - PAGE_SHIFT, 1, %r18
+	add		%r20, %r18, %r17
+
+	depi		0, 31, PAGE_SHIFT, %r20
+.Lcopy:
+	copy		%r20, %r12
+	LDREG,ma	REG_SZ(%r21), %r8
+	LDREG,ma	REG_SZ(%r21), %r9
+	LDREG,ma	REG_SZ(%r21), %r10
+	LDREG,ma	REG_SZ(%r21), %r11
+	STREG,ma	%r8, REG_SZ(%r20)
+	STREG,ma	%r9, REG_SZ(%r20)
+	STREG,ma	%r10, REG_SZ(%r20)
+	STREG,ma	%r11, REG_SZ(%r20)
+
+#ifndef CONFIG_64BIT
+	LDREG,ma	REG_SZ(%r21), %r8
+	LDREG,ma	REG_SZ(%r21), %r9
+	LDREG,ma	REG_SZ(%r21), %r10
+	LDREG,ma	REG_SZ(%r21), %r11
+	STREG,ma	%r8, REG_SZ(%r20)
+	STREG,ma	%r9, REG_SZ(%r20)
+	STREG,ma	%r10, REG_SZ(%r20)
+	STREG,ma	%r11, REG_SZ(%r20)
+#endif
+
+	fdc		%r0(%r12)
+	cmpb,COND(<<)	%r20,%r17,.Lcopy
+	fic		(%sr4, %r12)
+	b,n		.Lloop
+
+boot:
+	mtctl	%r0, %cr15
+
+	LDREG	kexec_free_mem-0b(%arg2), %arg0
+	LDREG	kexec_cmdline-0b(%arg2), %arg1
+	LDREG	kexec_initrd_end-0b(%arg2), %arg3
+	LDREG	kexec_initrd_start-0b(%arg2), %arg2
+	bv,n %r0(%rp)
+
+ENDPROC_CFI(relocate_new_kernel);
+
+ENTRY(relocate_new_kernel_size)
+       .word relocate_new_kernel_size - relocate_new_kernel
+
+kexec_param cmdline
+kexec_param initrd_start
+kexec_param initrd_end
+kexec_param free_mem
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 6d112868272d..05669c87a0af 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -31,6 +31,7 @@
 #define KEXEC_ARCH_DEFAULT ( 0 << 16)
 #define KEXEC_ARCH_386     ( 3 << 16)
 #define KEXEC_ARCH_68K     ( 4 << 16)
+#define KEXEC_ARCH_PARISC  (15 << 16)
 #define KEXEC_ARCH_X86_64  (62 << 16)
 #define KEXEC_ARCH_PPC     (20 << 16)
 #define KEXEC_ARCH_PPC64   (21 << 16)
-- 
cgit v1.2.3


From 3474a2c62ff9694ee627bdb51cf9a60021d9e814 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 2 Sep 2019 23:11:31 +0200
Subject: netfilter: nf_tables_offload: move indirect flow_block callback logic
 to core

Add nft_offload_init() and nft_offload_exit() function to deal with the
init and the exit path of the offload infrastructure.

Rename nft_indr_block_get_and_ing_cmd() to nft_indr_block_cb().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h |  7 +++----
 net/netfilter/nf_tables_api.c             | 10 +++-------
 net/netfilter/nf_tables_offload.c         | 22 ++++++++++++++++++----
 3 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index db104665a9e4..6de896ebcf30 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -64,10 +64,6 @@ struct nft_rule;
 struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
-void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
-				    flow_indr_block_bind_cb_t *cb,
-				    void *cb_priv,
-				    enum flow_block_command command);
 
 #define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg)		\
 	(__reg)->base_offset	=					\
@@ -80,4 +76,7 @@ void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 
 int nft_chain_offload_priority(struct nft_base_chain *basechain);
 
+void nft_offload_init(void);
+void nft_offload_exit(void);
+
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7def31ae3022..efd0c97cc2a3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7669,11 +7669,6 @@ static struct pernet_operations nf_tables_net_ops = {
 	.exit	= nf_tables_exit_net,
 };
 
-static struct flow_indr_block_ing_entry block_ing_entry = {
-	.cb = nft_indr_block_get_and_ing_cmd,
-	.list = LIST_HEAD_INIT(block_ing_entry.list),
-};
-
 static int __init nf_tables_module_init(void)
 {
 	int err;
@@ -7705,7 +7700,8 @@ static int __init nf_tables_module_init(void)
 		goto err5;
 
 	nft_chain_route_init();
-	flow_indr_add_block_ing_cb(&block_ing_entry);
+	nft_offload_init();
+
 	return err;
 err5:
 	rhltable_destroy(&nft_objname_ht);
@@ -7722,7 +7718,7 @@ err1:
 
 static void __exit nf_tables_module_exit(void)
 {
-	flow_indr_del_block_ing_cb(&block_ing_entry);
+	nft_offload_exit();
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 	nft_chain_filter_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index fabe2997188b..8abf193f8012 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -354,10 +354,9 @@ int nft_flow_rule_offload_commit(struct net *net)
 	return err;
 }
 
-void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
-				    flow_indr_block_bind_cb_t *cb,
-				    void *cb_priv,
-				    enum flow_block_command command)
+static void nft_indr_block_cb(struct net_device *dev,
+			      flow_indr_block_bind_cb_t *cb, void *cb_priv,
+			      enum flow_block_command command)
 {
 	struct net *net = dev_net(dev);
 	const struct nft_table *table;
@@ -383,3 +382,18 @@ void nft_indr_block_get_and_ing_cmd(struct net_device *dev,
 		}
 	}
 }
+
+static struct flow_indr_block_ing_entry block_ing_entry = {
+	.cb	= nft_indr_block_cb,
+	.list	= LIST_HEAD_INIT(block_ing_entry.list),
+};
+
+void nft_offload_init(void)
+{
+	flow_indr_add_block_ing_cb(&block_ing_entry);
+}
+
+void nft_offload_exit(void)
+{
+	flow_indr_del_block_ing_cb(&block_ing_entry);
+}
-- 
cgit v1.2.3


From 6017826b494008895632e092cc49c0bdb3019eab Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 3 Sep 2019 09:43:30 -0700
Subject: crypto: skcipher - add the ability to abort a skcipher walk

After starting a skcipher walk, the only way to ensure that all
resources it has tied up are released is to complete it. In some
cases, it will be useful to be able to abort a walk cleanly after
it has started, so add this ability to the skcipher walk API.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/skcipher.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index d68faa5759ad..734b6f7081b8 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -148,6 +148,11 @@ int skcipher_walk_aead_decrypt(struct skcipher_walk *walk,
 			       struct aead_request *req, bool atomic);
 void skcipher_walk_complete(struct skcipher_walk *walk, int err);
 
+static inline void skcipher_walk_abort(struct skcipher_walk *walk)
+{
+	skcipher_walk_done(walk, -ECANCELED);
+}
+
 static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
 					       int err)
 {
-- 
cgit v1.2.3


From 92f35b751c71d14250a401246f2c792e3aa5b386 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 18 Aug 2019 14:09:47 +0100
Subject: KVM: arm/arm64: vgic: Allow more than 256 vcpus for KVM_IRQ_LINE

While parts of the VGIC support a large number of vcpus (we
bravely allow up to 512), other parts are more limited.

One of these limits is visible in the KVM_IRQ_LINE ioctl, which
only allows 256 vcpus to be signalled when using the CPU or PPI
types. Unfortunately, we've cornered ourselves badly by allocating
all the bits in the irq field.

Since the irq_type subfield (8 bit wide) is currently only taking
the values 0, 1 and 2 (and we have been careful not to allow anything
else), let's reduce this field to only 4 bits, and allocate the
remaining 4 bits to a vcpu2_index, which acts as a multiplier:

  vcpu_id = 256 * vcpu2_index + vcpu_index

With that, and a new capability (KVM_CAP_ARM_IRQ_LINE_LAYOUT_2)
allowing this to be discovered, it becomes possible to inject
PPIs to up to 4096 vcpus. But please just don't.

Whilst we're there, add a clarification about the use of KVM_IRQ_LINE
on arm, which is not completely conditionned by KVM_CAP_IRQCHIP.

Reported-by: Zenghui Yu <yuzenghui@huawei.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 Documentation/virt/kvm/api.txt    | 12 ++++++++++--
 arch/arm/include/uapi/asm/kvm.h   |  4 +++-
 arch/arm64/include/uapi/asm/kvm.h |  4 +++-
 include/uapi/linux/kvm.h          |  1 +
 virt/kvm/arm/arm.c                |  2 ++
 5 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 2d067767b617..25931ca1cb38 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -753,8 +753,8 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
 use PPIs designated for specific cpus.  The irq field is interpreted
 like this:
 
-  bits:  | 31 ... 24 | 23  ... 16 | 15    ...    0 |
-  field: | irq_type  | vcpu_index |     irq_id     |
+  bits:  |  31 ... 28  | 27 ... 24 | 23  ... 16 | 15 ... 0 |
+  field: | vcpu2_index | irq_type  | vcpu_index |  irq_id  |
 
 The irq_type field has the following values:
 - irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
@@ -766,6 +766,14 @@ The irq_type field has the following values:
 
 In both cases, level is used to assert/deassert the line.
 
+When KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 is supported, the target vcpu is
+identified as (256 * vcpu2_index + vcpu_index). Otherwise, vcpu2_index
+must be zero.
+
+Note that on arm/arm64, the KVM_CAP_IRQCHIP capability only conditions
+injection of interrupts for the in-kernel irqchip. KVM_IRQ_LINE can always
+be used for a userspace interrupt controller.
+
 struct kvm_irq_level {
 	union {
 		__u32 irq;     /* GSI */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index a4217c1a5d01..2769360f195c 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -266,8 +266,10 @@ struct kvm_vcpu_events {
 #define   KVM_DEV_ARM_ITS_CTRL_RESET		4
 
 /* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT		28
+#define KVM_ARM_IRQ_VCPU2_MASK		0xf
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
-#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_TYPE_MASK		0xf
 #define KVM_ARM_IRQ_VCPU_SHIFT		16
 #define KVM_ARM_IRQ_VCPU_MASK		0xff
 #define KVM_ARM_IRQ_NUM_SHIFT		0
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 9a507716ae2f..67c21f9bdbad 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -325,8 +325,10 @@ struct kvm_vcpu_events {
 #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER		1
 
 /* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT		28
+#define KVM_ARM_IRQ_VCPU2_MASK		0xf
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
-#define KVM_ARM_IRQ_TYPE_MASK		0xff
+#define KVM_ARM_IRQ_TYPE_MASK		0xf
 #define KVM_ARM_IRQ_VCPU_SHIFT		16
 #define KVM_ARM_IRQ_VCPU_MASK		0xff
 #define KVM_ARM_IRQ_NUM_SHIFT		0
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5e3f12d5359e..5414b6588fbb 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -996,6 +996,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
+#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 35a069815baf..86c6aa1cb58e 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -196,6 +196,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_IMMEDIATE_EXIT:
 	case KVM_CAP_VCPU_EVENTS:
+	case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
 		r = 1;
 		break;
 	case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -888,6 +889,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 
 	irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK;
 	vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK;
+	vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1);
 	irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK;
 
 	trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level);
-- 
cgit v1.2.3


From 5044ed4f394cb371fb85db3e3ec0296487b7d324 Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Thu, 25 Jul 2019 11:27:28 +0300
Subject: btrfs: Remove unused locking functions

Those were split out of btrfs_clear_lock_blocking_rw by
aa12c02778a9 ("btrfs: split btrfs_clear_lock_blocking_rw to read and write helpers")
however at that time this function was unused due to commit
523983401644 ("Btrfs: kill btrfs_clear_path_blocking"). Put the final
nail in the coffin of those 2 functions.

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/locking.c           | 36 ------------------------------------
 fs/btrfs/locking.h           |  2 --
 include/trace/events/btrfs.h |  2 --
 3 files changed, 40 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 393eceda57c8..e4309bcf0b5f 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -119,42 +119,6 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
 	}
 }
 
-void btrfs_clear_lock_blocking_read(struct extent_buffer *eb)
-{
-	trace_btrfs_clear_lock_blocking_read(eb);
-	/*
-	 * No lock is required.  The lock owner may change if we have a read
-	 * lock, but it won't change to or away from us.  If we have the write
-	 * lock, we are the owner and it'll never change.
-	 */
-	if (eb->lock_nested && current->pid == eb->lock_owner)
-		return;
-	BUG_ON(atomic_read(&eb->blocking_readers) == 0);
-	read_lock(&eb->lock);
-	btrfs_assert_spinning_readers_get(eb);
-	/* atomic_dec_and_test implies a barrier */
-	if (atomic_dec_and_test(&eb->blocking_readers))
-		cond_wake_up_nomb(&eb->read_lock_wq);
-}
-
-void btrfs_clear_lock_blocking_write(struct extent_buffer *eb)
-{
-	trace_btrfs_clear_lock_blocking_write(eb);
-	/*
-	 * no lock is required.  The lock owner may change if
-	 * we have a read lock, but it won't change to or away
-	 * from us.  If we have the write lock, we are the owner
-	 * and it'll never change.
-	 */
-	if (eb->lock_nested && current->pid == eb->lock_owner)
-		return;
-	write_lock(&eb->lock);
-	BUG_ON(eb->blocking_writers != 1);
-	btrfs_assert_spinning_writers_get(eb);
-	if (--eb->blocking_writers == 0)
-		cond_wake_up(&eb->write_lock_wq);
-}
-
 /*
  * take a spinning read lock.  This will wait for any blocking
  * writers
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index 595014f64830..b775a4207ed9 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -19,8 +19,6 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb);
 void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
 void btrfs_set_lock_blocking_read(struct extent_buffer *eb);
 void btrfs_set_lock_blocking_write(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking_read(struct extent_buffer *eb);
-void btrfs_clear_lock_blocking_write(struct extent_buffer *eb);
 void btrfs_assert_tree_locked(struct extent_buffer *eb);
 int btrfs_try_tree_read_lock(struct extent_buffer *eb);
 int btrfs_try_tree_write_lock(struct extent_buffer *eb);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 2f6a669408bb..5cb95646b94e 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -2086,8 +2086,6 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_read);
-DEFINE_BTRFS_LOCK_EVENT(btrfs_clear_lock_blocking_write);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
 DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
-- 
cgit v1.2.3


From 40cf931fa81bedea08823dda9e6e73630db41b70 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 17 Jul 2019 12:39:20 -0500
Subject: btrfs: use common vfs LABEL ioctl definitions

I lifted the btrfs label get/set ioctls to the vfs some time ago, but
never followed up to use those common definitions directly in btrfs.

This patch does that.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c           | 8 ++++----
 include/uapi/linux/btrfs.h | 6 ++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0a0c54e99b22..9eaf78d7b8eb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5453,6 +5453,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_setflags(file, argp);
 	case FS_IOC_GETVERSION:
 		return btrfs_ioctl_getversion(file, argp);
+	case FS_IOC_GETFSLABEL:
+		return btrfs_ioctl_get_fslabel(file, argp);
+	case FS_IOC_SETFSLABEL:
+		return btrfs_ioctl_set_fslabel(file, argp);
 	case FITRIM:
 		return btrfs_ioctl_fitrim(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
@@ -5564,10 +5568,6 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_quota_rescan_wait(file, argp);
 	case BTRFS_IOC_DEV_REPLACE:
 		return btrfs_ioctl_dev_replace(fs_info, argp);
-	case BTRFS_IOC_GET_FSLABEL:
-		return btrfs_ioctl_get_fslabel(file, argp);
-	case BTRFS_IOC_SET_FSLABEL:
-		return btrfs_ioctl_set_fslabel(file, argp);
 	case BTRFS_IOC_GET_SUPPORTED_FEATURES:
 		return btrfs_ioctl_get_supported_features(argp);
 	case BTRFS_IOC_GET_FEATURES:
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index c195896d478f..7885d79f7515 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -917,10 +917,8 @@ enum btrfs_err_code {
 #define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
 			       struct btrfs_ioctl_quota_rescan_args)
 #define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46)
-#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
-				   char[BTRFS_LABEL_SIZE])
-#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
-				   char[BTRFS_LABEL_SIZE])
+#define BTRFS_IOC_GET_FSLABEL 	FS_IOC_GETFSLABEL
+#define BTRFS_IOC_SET_FSLABEL	FS_IOC_SETFSLABEL
 #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
 				      struct btrfs_ioctl_get_dev_stats)
 #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
-- 
cgit v1.2.3


From 73a3ca20934dd02c0912bcf32463fffec6139399 Mon Sep 17 00:00:00 2001
From: Hans van Kranenburg <hans.van.kranenburg@mendix.com>
Date: Sat, 3 Aug 2019 23:36:34 +0200
Subject: btrfs: clarify btrfs_ioctl_get_dev_stats padding

In commit c11d2c236cc26 ("Btrfs: add ioctl to get and reset the device
stats") the get_dev_stats ioctl was added.

Shortly thereafter, in commit b27f7c0c150f7 ("btrfs: join DEV_STATS
ioctls to one") , the flags field was added.  However, the calculation
for unused padding space was not updated, which also invalidated the
comment.

Clarify what happened to reduce confusion and wasted time for anyone
implementing this.

Signed-off-by: Hans van Kranenburg <hans.van.kranenburg@mendix.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 7885d79f7515..3ee0678c0a83 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -665,7 +665,12 @@ struct btrfs_ioctl_get_dev_stats {
 	/* out values: */
 	__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
 
-	__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
+	/*
+	 * This pads the struct to 1032 bytes. It was originally meant to pad to
+	 * 1024 bytes, but when adding the flags field, the padding calculation
+	 * was not adjusted.
+	 */
+	__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX];
 };
 
 #define BTRFS_QUOTA_CTL_ENABLE	1
-- 
cgit v1.2.3


From 844245b4548499efad26e33e408a459b1fe3a346 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 1 Aug 2019 18:19:33 -0400
Subject: btrfs: add a flush step for delayed iputs

Delayed iputs could very well free up enough space without needing to
commit the transaction, so make this step it's own step.  This will
allow us to skip the step for evictions in a later patch.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h             | 3 ++-
 fs/btrfs/space-info.c        | 5 +++--
 include/trace/events/btrfs.h | 1 +
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 85b808e3ea42..4ad4715a7941 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2548,7 +2548,8 @@ enum btrfs_flush_state {
 	FLUSH_DELALLOC_WAIT	=	6,
 	ALLOC_CHUNK		=	7,
 	ALLOC_CHUNK_FORCE	=	8,
-	COMMIT_TRANS		=	9,
+	RUN_DELAYED_IPUTS	=	9,
+	COMMIT_TRANS		=	10,
 };
 
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index e9406b2133d1..1f4e97070f33 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -664,7 +664,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		if (ret > 0 || ret == -ENOSPC)
 			ret = 0;
 		break;
-	case COMMIT_TRANS:
+	case RUN_DELAYED_IPUTS:
 		/*
 		 * If we have pending delayed iputs then we could free up a
 		 * bunch of pinned space, so make sure we run the iputs before
@@ -672,7 +672,8 @@ static void flush_space(struct btrfs_fs_info *fs_info,
 		 */
 		btrfs_run_delayed_iputs(fs_info);
 		btrfs_wait_on_delayed_iputs(fs_info);
-
+		break;
+	case COMMIT_TRANS:
 		ret = may_commit_transaction(fs_info, space_info);
 		break;
 	default:
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 5cb95646b94e..5df604de4f11 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -1088,6 +1088,7 @@ TRACE_EVENT(btrfs_trigger_flush,
 		{ FLUSH_DELAYED_REFS,		"FLUSH_ELAYED_REFS"},		\
 		{ ALLOC_CHUNK,			"ALLOC_CHUNK"},			\
 		{ ALLOC_CHUNK_FORCE,		"ALLOC_CHUNK_FORCE"},		\
+		{ RUN_DELAYED_IPUTS,		"RUN_DELAYED_IPUTS"},		\
 		{ COMMIT_TRANS,			"COMMIT_TRANS"})
 
 TRACE_EVENT(btrfs_flush_space,
-- 
cgit v1.2.3


From 27e022a9c6fe97dd80e31c038328d4f79b2080c2 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Thu, 8 Aug 2019 12:32:44 +0800
Subject: btrfs: replace: BTRFS_DEV_REPLACE_ITEM_STATE_x defines should go

The BTRFS_DEV_REPLACE_ITEM_STATE_x defines, as shown in [1], are
unused in both kernel and btrfs-progs (except for one instance of
BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED in kernel).

[1]
btrfs.h:#define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED        2
btrfs.h:#define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED        3
btrfs.h:#define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED       4

Further these define-values are different form its counterpart
BTRFS_IOCTL_DEV_REPLACE_STATE_x series as shown in [2].

[2]
btrfs_tree.h:#define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED   2
btrfs_tree.h:#define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED    3
btrfs_tree.h:#define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED    4

So this patch deletes the BTRFS_DEV_REPLACE_ITEM_STATE_x altogether, and
one instance of BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED is replaced
with BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED in the kernel.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c          | 2 +-
 include/uapi/linux/btrfs_tree.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 6b2e9aa83ffa..00ea828beb00 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -56,7 +56,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
 no_valid_dev_replace_entry_found:
 		ret = 0;
 		dev_replace->replace_state =
-			BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED;
+			BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED;
 		dev_replace->cont_reading_from_srcdev_mode =
 		    BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS;
 		dev_replace->time_started = 0;
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 34d5b34286fa..71246c1941aa 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -806,11 +806,6 @@ struct btrfs_dev_stats_item {
 
 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
-#define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED	0
-#define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED		1
-#define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED		2
-#define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED		3
-#define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED		4
 
 struct btrfs_dev_replace_item {
 	/*
-- 
cgit v1.2.3


From e35b79a1070d681b4842dad27b1edaf9811da7e9 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 30 Aug 2019 13:36:08 +0200
Subject: btrfs: turn checksum type define into an enum

Turn the checksum type definition into a enum. This eases later addition
of new checksums.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/uapi/linux/btrfs_tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index 71246c1941aa..b65c7ee75bc7 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -300,7 +300,9 @@
 #define BTRFS_CSUM_SIZE 32
 
 /* csum types */
-#define BTRFS_CSUM_TYPE_CRC32	0
+enum btrfs_csum_type {
+	BTRFS_CSUM_TYPE_CRC32	= 0,
+};
 
 /*
  * flags definitions for directory entry item type
-- 
cgit v1.2.3


From 6fa5963c37a2e3335eba0b7455e35a01318ebc15 Mon Sep 17 00:00:00 2001
From: Cheng-Yi Chiang <cychiang@chromium.org>
Date: Wed, 17 Jul 2019 16:33:23 +0800
Subject: ASoC: hdmi-codec: Add an op to set callback function for plug event

Add an op in hdmi_codec_ops so codec driver can register callback
function to handle plug event.

Driver in DRM can use this callback function to report connector status.

Signed-off-by: Cheng-Yi Chiang <cychiang@chromium.org>
Link: https://lore.kernel.org/r/20190717083327.47646-2-cychiang@chromium.org
Reviewed-by: Tzung-Bi Shih <tzungbi@google.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/hdmi-codec.h    | 17 ++++++++++++++++
 sound/soc/codecs/hdmi-codec.c | 46 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include')

diff --git a/include/sound/hdmi-codec.h b/include/sound/hdmi-codec.h
index 7fea496f1f34..83b17682e01c 100644
--- a/include/sound/hdmi-codec.h
+++ b/include/sound/hdmi-codec.h
@@ -47,6 +47,9 @@ struct hdmi_codec_params {
 	int channels;
 };
 
+typedef void (*hdmi_codec_plugged_cb)(struct device *dev,
+				      bool plugged);
+
 struct hdmi_codec_pdata;
 struct hdmi_codec_ops {
 	/*
@@ -88,6 +91,14 @@ struct hdmi_codec_ops {
 	 */
 	int (*get_dai_id)(struct snd_soc_component *comment,
 			  struct device_node *endpoint);
+
+	/*
+	 * Hook callback function to handle connector plug event.
+	 * Optional
+	 */
+	int (*hook_plugged_cb)(struct device *dev, void *data,
+			       hdmi_codec_plugged_cb fn,
+			       struct device *codec_dev);
 };
 
 /* HDMI codec initalization data */
@@ -99,6 +110,12 @@ struct hdmi_codec_pdata {
 	void *data;
 };
 
+struct snd_soc_component;
+struct snd_soc_jack;
+
+int hdmi_codec_set_jack_detect(struct snd_soc_component *component,
+			       struct snd_soc_jack *jack);
+
 #define HDMI_CODEC_DRV_NAME "hdmi-audio-codec"
 
 #endif /* __HDMI_CODEC_H__ */
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 0bf1c8cad108..b5fd8f08726e 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <sound/core.h>
+#include <sound/jack.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
 #include <sound/soc.h>
@@ -274,6 +275,8 @@ struct hdmi_codec_priv {
 	struct snd_pcm_chmap *chmap_info;
 	unsigned int chmap_idx;
 	struct mutex lock;
+	struct snd_soc_jack *jack;
+	unsigned int jack_status;
 };
 
 static const struct snd_soc_dapm_widget hdmi_widgets[] = {
@@ -663,6 +666,49 @@ static int hdmi_dai_probe(struct snd_soc_dai *dai)
 	return 0;
 }
 
+static void hdmi_codec_jack_report(struct hdmi_codec_priv *hcp,
+				   unsigned int jack_status)
+{
+	if (hcp->jack && jack_status != hcp->jack_status) {
+		snd_soc_jack_report(hcp->jack, jack_status, SND_JACK_LINEOUT);
+		hcp->jack_status = jack_status;
+	}
+}
+
+static void plugged_cb(struct device *dev, bool plugged)
+{
+	struct hdmi_codec_priv *hcp = dev_get_drvdata(dev);
+
+	if (plugged)
+		hdmi_codec_jack_report(hcp, SND_JACK_LINEOUT);
+	else
+		hdmi_codec_jack_report(hcp, 0);
+}
+
+/**
+ * hdmi_codec_set_jack_detect - register HDMI plugged callback
+ * @component: the hdmi-codec instance
+ * @jack: ASoC jack to report (dis)connection events on
+ */
+int hdmi_codec_set_jack_detect(struct snd_soc_component *component,
+			       struct snd_soc_jack *jack)
+{
+	struct hdmi_codec_priv *hcp = snd_soc_component_get_drvdata(component);
+	int ret = -EOPNOTSUPP;
+
+	if (hcp->hcd.ops->hook_plugged_cb) {
+		hcp->jack = jack;
+		ret = hcp->hcd.ops->hook_plugged_cb(component->dev->parent,
+						    hcp->hcd.data,
+						    plugged_cb,
+						    component->dev);
+		if (ret)
+			hcp->jack = NULL;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hdmi_codec_set_jack_detect);
+
 static int hdmi_dai_spdif_probe(struct snd_soc_dai *dai)
 {
 	struct hdmi_codec_daifmt *cf = dai->playback_dma_data;
-- 
cgit v1.2.3


From 6863f5643dd717376c2fdc85a47a00f9d738a834 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sat, 7 Sep 2019 11:52:36 +0900
Subject: kbuild: allow Clang to find unused static inline functions for W=1
 build

GCC and Clang have different policy for -Wunused-function; GCC does not
warn unused static inline functions at all whereas Clang does if they
are defined in source files instead of included headers although it has
been suppressed since commit abb2ea7dfd82 ("compiler, clang: suppress
warning for unused static inline functions").

We often miss to delete unused functions where 'static inline' is used
in *.c files since there is no tool to detect them. Unused code remains
until somebody notices. For example, commit 075ddd75680f ("regulator:
core: remove unused rdev_get_supply()").

Let's remove __maybe_unused from the inline macro to allow Clang to
start finding unused static inline functions. For now, we do this only
for W=1 build since it is not a good idea to sprinkle warnings for the
normal build (e.g. 35 warnings for arch/x86/configs/x86_64_defconfig).

My initial attempt was to add -Wno-unused-function for no W= build
(https://lore.kernel.org/patchwork/patch/1120594/)

Nathan Chancellor pointed out that would weaken Clang's checks since
we would no longer get -Wunused-function without W=1. It is true GCC
would catch unused static non-inline functions, but it would weaken
Clang as a standalone compiler, at least.

Hence, here is a counter implementation. The current problem is, W=...
only controls compiler flags, which are globally effective. There is
no way to address only 'static inline' functions.

This commit defines KBUILD_EXTRA_WARN[123] corresponding to W=[123].
When KBUILD_EXTRA_WARN1 is defined, __maybe_unused is omitted from
the 'inline' macro.

The new macro __inline_maybe_unused makes the code a bit uglier, so I
hope we can remove it entirely after fixing most of the warnings.

If you contribute to code clean-up, please run "make CC=clang W=1"
and check -Wunused-function warnings. You will find lots of unused
functions.

Some of them are false-positives because the call-sites are disabled
by #ifdef. I do not like to abuse the inline keyword for suppressing
unused-function warnings because it is intended to be a hint for the
compiler optimization. I prefer #ifdef around the definition, or
__maybe_unused if #ifdef would make the code too ugly.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
---
 include/linux/compiler_types.h | 20 ++++++++++++++------
 scripts/Makefile.extrawarn     |  6 ++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 599c27b56c29..b056a40116da 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -130,10 +130,6 @@ struct ftrace_likely_data {
 
 /*
  * Force always-inline if the user requests it so via the .config.
- * GCC does not warn about unused static inline functions for
- * -Wunused-function.  This turns out to avoid the need for complex #ifdef
- * directives.  Suppress the warning in clang as well by using "unused"
- * function attribute, which is redundant but not harmful for gcc.
  * Prefer gnu_inline, so that extern inline functions do not emit an
  * externally visible function. This makes extern inline behave as per gnu89
  * semantics rather than c99. This prevents multiple symbol definition errors
@@ -144,15 +140,27 @@ struct ftrace_likely_data {
  */
 #if !defined(CONFIG_OPTIMIZE_INLINING)
 #define inline inline __attribute__((__always_inline__)) __gnu_inline \
-	__maybe_unused notrace
+	__inline_maybe_unused notrace
 #else
 #define inline inline                                    __gnu_inline \
-	__maybe_unused notrace
+	__inline_maybe_unused notrace
 #endif
 
 #define __inline__ inline
 #define __inline   inline
 
+/*
+ * GCC does not warn about unused static inline functions for -Wunused-function.
+ * Suppress the warning in clang as well by using __maybe_unused, but enable it
+ * for W=1 build. This will allow clang to find unused functions. Remove the
+ * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings.
+ */
+#ifdef KBUILD_EXTRA_WARN1
+#define __inline_maybe_unused
+#else
+#define __inline_maybe_unused __maybe_unused
+#endif
+
 /*
  * Rather then using noinline to prevent stack consumption, use
  * noinline_for_stack instead.  For documentation reasons.
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 53eb7e0c6a5a..ecddf83ac142 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -36,6 +36,8 @@ KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
 KBUILD_CFLAGS += -Wno-missing-field-initializers
 KBUILD_CFLAGS += -Wno-sign-compare
 
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
+
 else
 
 # Some diagnostics enabled by default are noisy.
@@ -65,6 +67,8 @@ KBUILD_CFLAGS += -Wsign-compare
 KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
 KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
 
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
+
 endif
 
 #
@@ -82,4 +86,6 @@ KBUILD_CFLAGS += -Wredundant-decls
 KBUILD_CFLAGS += -Wswitch-default
 KBUILD_CFLAGS += $(call cc-option, -Wpacked-bitfield-compat)
 
+KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3
+
 endif
-- 
cgit v1.2.3


From 85b18fe7049466cb15e7d33b51aa51854e0e05e7 Mon Sep 17 00:00:00 2001
From: mtk01761 <wendell.lin@mediatek.com>
Date: Mon, 19 Aug 2019 17:21:40 +0800
Subject: clk: mediatek: Add dt-bindings for MT6779 clocks

Add MT6779 clock dt-bindings, include topckgen, apmixedsys,
infracfg, and subsystem clocks.

Signed-off-by: mtk01761 <wendell.lin@mediatek.com>
Link: https://lkml.kernel.org/r/1566206502-4347-10-git-send-email-mars.cheng@mediatek.com
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/dt-bindings/clock/mt6779-clk.h | 436 +++++++++++++++++++++++++++++++++
 1 file changed, 436 insertions(+)
 create mode 100644 include/dt-bindings/clock/mt6779-clk.h

(limited to 'include')

diff --git a/include/dt-bindings/clock/mt6779-clk.h b/include/dt-bindings/clock/mt6779-clk.h
new file mode 100644
index 000000000000..b083139afbd2
--- /dev/null
+++ b/include/dt-bindings/clock/mt6779-clk.h
@@ -0,0 +1,436 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Wendell Lin <wendell.lin@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT6779_H
+#define _DT_BINDINGS_CLK_MT6779_H
+
+/* TOPCKGEN */
+#define CLK_TOP_AXI			1
+#define CLK_TOP_MM			2
+#define CLK_TOP_CAM			3
+#define CLK_TOP_MFG			4
+#define CLK_TOP_CAMTG			5
+#define CLK_TOP_UART			6
+#define CLK_TOP_SPI			7
+#define CLK_TOP_MSDC50_0_HCLK		8
+#define CLK_TOP_MSDC50_0		9
+#define CLK_TOP_MSDC30_1		10
+#define CLK_TOP_MSDC30_2		11
+#define CLK_TOP_AUD			12
+#define CLK_TOP_AUD_INTBUS		13
+#define CLK_TOP_FPWRAP_ULPOSC		14
+#define CLK_TOP_SCP			15
+#define CLK_TOP_ATB			16
+#define CLK_TOP_SSPM			17
+#define CLK_TOP_DPI0			18
+#define CLK_TOP_SCAM			19
+#define CLK_TOP_AUD_1			20
+#define CLK_TOP_AUD_2			21
+#define CLK_TOP_DISP_PWM		22
+#define CLK_TOP_SSUSB_TOP_XHCI		23
+#define CLK_TOP_USB_TOP			24
+#define CLK_TOP_SPM			25
+#define CLK_TOP_I2C			26
+#define CLK_TOP_F52M_MFG		27
+#define CLK_TOP_SENINF			28
+#define CLK_TOP_DXCC			29
+#define CLK_TOP_CAMTG2			30
+#define CLK_TOP_AUD_ENG1		31
+#define CLK_TOP_AUD_ENG2		32
+#define CLK_TOP_FAES_UFSFDE		33
+#define CLK_TOP_FUFS			34
+#define CLK_TOP_IMG			35
+#define CLK_TOP_DSP			36
+#define CLK_TOP_DSP1			37
+#define CLK_TOP_DSP2			38
+#define CLK_TOP_IPU_IF			39
+#define CLK_TOP_CAMTG3			40
+#define CLK_TOP_CAMTG4			41
+#define CLK_TOP_PMICSPI			42
+#define CLK_TOP_MAINPLL_CK		43
+#define CLK_TOP_MAINPLL_D2		44
+#define CLK_TOP_MAINPLL_D3		45
+#define CLK_TOP_MAINPLL_D5		46
+#define CLK_TOP_MAINPLL_D7		47
+#define CLK_TOP_MAINPLL_D2_D2		48
+#define CLK_TOP_MAINPLL_D2_D4		49
+#define CLK_TOP_MAINPLL_D2_D8		50
+#define CLK_TOP_MAINPLL_D2_D16		51
+#define CLK_TOP_MAINPLL_D3_D2		52
+#define CLK_TOP_MAINPLL_D3_D4		53
+#define CLK_TOP_MAINPLL_D3_D8		54
+#define CLK_TOP_MAINPLL_D5_D2		55
+#define CLK_TOP_MAINPLL_D5_D4		56
+#define CLK_TOP_MAINPLL_D7_D2		57
+#define CLK_TOP_MAINPLL_D7_D4		58
+#define CLK_TOP_UNIVPLL_CK		59
+#define CLK_TOP_UNIVPLL_D2		60
+#define CLK_TOP_UNIVPLL_D3		61
+#define CLK_TOP_UNIVPLL_D5		62
+#define CLK_TOP_UNIVPLL_D7		63
+#define CLK_TOP_UNIVPLL_D2_D2		64
+#define CLK_TOP_UNIVPLL_D2_D4		65
+#define CLK_TOP_UNIVPLL_D2_D8		66
+#define CLK_TOP_UNIVPLL_D3_D2		67
+#define CLK_TOP_UNIVPLL_D3_D4		68
+#define CLK_TOP_UNIVPLL_D3_D8		69
+#define CLK_TOP_UNIVPLL_D5_D2		70
+#define CLK_TOP_UNIVPLL_D5_D4		71
+#define CLK_TOP_UNIVPLL_D5_D8		72
+#define CLK_TOP_APLL1_CK		73
+#define CLK_TOP_APLL1_D2		74
+#define CLK_TOP_APLL1_D4		75
+#define CLK_TOP_APLL1_D8		76
+#define CLK_TOP_APLL2_CK		77
+#define CLK_TOP_APLL2_D2		78
+#define CLK_TOP_APLL2_D4		79
+#define CLK_TOP_APLL2_D8		80
+#define CLK_TOP_TVDPLL_CK		81
+#define CLK_TOP_TVDPLL_D2		82
+#define CLK_TOP_TVDPLL_D4		83
+#define CLK_TOP_TVDPLL_D8		84
+#define CLK_TOP_TVDPLL_D16		85
+#define CLK_TOP_MSDCPLL_CK		86
+#define CLK_TOP_MSDCPLL_D2		87
+#define CLK_TOP_MSDCPLL_D4		88
+#define CLK_TOP_MSDCPLL_D8		89
+#define CLK_TOP_MSDCPLL_D16		90
+#define CLK_TOP_AD_OSC_CK		91
+#define CLK_TOP_OSC_D2			92
+#define CLK_TOP_OSC_D4			93
+#define CLK_TOP_OSC_D8			94
+#define CLK_TOP_OSC_D16			95
+#define CLK_TOP_F26M_CK_D2		96
+#define CLK_TOP_MFGPLL_CK		97
+#define CLK_TOP_UNIVP_192M_CK		98
+#define CLK_TOP_UNIVP_192M_D2		99
+#define CLK_TOP_UNIVP_192M_D4		100
+#define CLK_TOP_UNIVP_192M_D8		101
+#define CLK_TOP_UNIVP_192M_D16		102
+#define CLK_TOP_UNIVP_192M_D32		103
+#define CLK_TOP_MMPLL_CK		104
+#define CLK_TOP_MMPLL_D4		105
+#define CLK_TOP_MMPLL_D4_D2		106
+#define CLK_TOP_MMPLL_D4_D4		107
+#define CLK_TOP_MMPLL_D5		108
+#define CLK_TOP_MMPLL_D5_D2		109
+#define CLK_TOP_MMPLL_D5_D4		110
+#define CLK_TOP_MMPLL_D6		111
+#define CLK_TOP_MMPLL_D7		112
+#define CLK_TOP_CLK26M			113
+#define CLK_TOP_CLK13M			114
+#define CLK_TOP_ADSP			115
+#define CLK_TOP_DPMAIF			116
+#define CLK_TOP_VENC			117
+#define CLK_TOP_VDEC			118
+#define CLK_TOP_CAMTM			119
+#define CLK_TOP_PWM			120
+#define CLK_TOP_ADSPPLL_CK		121
+#define CLK_TOP_I2S0_M_SEL		122
+#define CLK_TOP_I2S1_M_SEL		123
+#define CLK_TOP_I2S2_M_SEL		124
+#define CLK_TOP_I2S3_M_SEL		125
+#define CLK_TOP_I2S4_M_SEL		126
+#define CLK_TOP_I2S5_M_SEL		127
+#define CLK_TOP_APLL12_DIV0		128
+#define CLK_TOP_APLL12_DIV1		129
+#define CLK_TOP_APLL12_DIV2		130
+#define CLK_TOP_APLL12_DIV3		131
+#define CLK_TOP_APLL12_DIV4		132
+#define CLK_TOP_APLL12_DIVB		133
+#define CLK_TOP_APLL12_DIV5		134
+#define CLK_TOP_IPE			135
+#define CLK_TOP_DPE			136
+#define CLK_TOP_CCU			137
+#define CLK_TOP_DSP3			138
+#define CLK_TOP_SENINF1			139
+#define CLK_TOP_SENINF2			140
+#define CLK_TOP_AUD_H			141
+#define CLK_TOP_CAMTG5			142
+#define CLK_TOP_TVDPLL_MAINPLL_D2_CK	143
+#define CLK_TOP_AD_OSC2_CK		144
+#define CLK_TOP_OSC2_D2			145
+#define CLK_TOP_OSC2_D3			146
+#define CLK_TOP_FMEM_466M_CK		147
+#define CLK_TOP_ADSPPLL_D4		148
+#define CLK_TOP_ADSPPLL_D5		149
+#define CLK_TOP_ADSPPLL_D6		150
+#define CLK_TOP_OSC_D10			151
+#define CLK_TOP_UNIVPLL_D3_D16		152
+#define CLK_TOP_NR_CLK			153
+
+/* APMIXED */
+#define CLK_APMIXED_ARMPLL_LL		1
+#define CLK_APMIXED_ARMPLL_BL		2
+#define CLK_APMIXED_ARMPLL_BB		3
+#define CLK_APMIXED_CCIPLL		4
+#define CLK_APMIXED_MAINPLL		5
+#define CLK_APMIXED_UNIV2PLL		6
+#define CLK_APMIXED_MSDCPLL		7
+#define CLK_APMIXED_ADSPPLL		8
+#define CLK_APMIXED_MMPLL		9
+#define CLK_APMIXED_MFGPLL		10
+#define CLK_APMIXED_TVDPLL		11
+#define CLK_APMIXED_APLL1		12
+#define CLK_APMIXED_APLL2		13
+#define CLK_APMIXED_SSUSB26M		14
+#define CLK_APMIXED_APPLL26M		15
+#define CLK_APMIXED_MIPIC0_26M		16
+#define CLK_APMIXED_MDPLLGP26M		17
+#define CLK_APMIXED_MM_F26M		18
+#define CLK_APMIXED_UFS26M		19
+#define CLK_APMIXED_MIPIC1_26M		20
+#define CLK_APMIXED_MEMPLL26M		21
+#define CLK_APMIXED_CLKSQ_LVPLL_26M	22
+#define CLK_APMIXED_MIPID0_26M		23
+#define CLK_APMIXED_MIPID1_26M		24
+#define CLK_APMIXED_NR_CLK		25
+
+/* CAMSYS */
+#define CLK_CAM_LARB10			1
+#define CLK_CAM_DFP_VAD			2
+#define CLK_CAM_LARB11			3
+#define CLK_CAM_LARB9			4
+#define CLK_CAM_CAM			5
+#define CLK_CAM_CAMTG			6
+#define CLK_CAM_SENINF			7
+#define CLK_CAM_CAMSV0			8
+#define CLK_CAM_CAMSV1			9
+#define CLK_CAM_CAMSV2			10
+#define CLK_CAM_CAMSV3			11
+#define CLK_CAM_CCU			12
+#define CLK_CAM_FAKE_ENG		13
+#define CLK_CAM_NR_CLK			14
+
+/* INFRA */
+#define CLK_INFRA_PMIC_TMR		1
+#define CLK_INFRA_PMIC_AP		2
+#define CLK_INFRA_PMIC_MD		3
+#define CLK_INFRA_PMIC_CONN		4
+#define CLK_INFRA_SCPSYS		5
+#define CLK_INFRA_SEJ			6
+#define CLK_INFRA_APXGPT		7
+#define CLK_INFRA_ICUSB			8
+#define CLK_INFRA_GCE			9
+#define CLK_INFRA_THERM			10
+#define CLK_INFRA_I2C0			11
+#define CLK_INFRA_I2C1			12
+#define CLK_INFRA_I2C2			13
+#define CLK_INFRA_I2C3			14
+#define CLK_INFRA_PWM_HCLK		15
+#define CLK_INFRA_PWM1			16
+#define CLK_INFRA_PWM2			17
+#define CLK_INFRA_PWM3			18
+#define CLK_INFRA_PWM4			19
+#define CLK_INFRA_PWM			20
+#define CLK_INFRA_UART0			21
+#define CLK_INFRA_UART1			22
+#define CLK_INFRA_UART2			23
+#define CLK_INFRA_UART3			24
+#define CLK_INFRA_GCE_26M		25
+#define CLK_INFRA_CQ_DMA_FPC		26
+#define CLK_INFRA_BTIF			27
+#define CLK_INFRA_SPI0			28
+#define CLK_INFRA_MSDC0			29
+#define CLK_INFRA_MSDC1			30
+#define CLK_INFRA_MSDC2			31
+#define CLK_INFRA_MSDC0_SCK		32
+#define CLK_INFRA_DVFSRC		33
+#define CLK_INFRA_GCPU			34
+#define CLK_INFRA_TRNG			35
+#define CLK_INFRA_AUXADC		36
+#define CLK_INFRA_CPUM			37
+#define CLK_INFRA_CCIF1_AP		38
+#define CLK_INFRA_CCIF1_MD		39
+#define CLK_INFRA_AUXADC_MD		40
+#define CLK_INFRA_MSDC1_SCK		41
+#define CLK_INFRA_MSDC2_SCK		42
+#define CLK_INFRA_AP_DMA		43
+#define CLK_INFRA_XIU			44
+#define CLK_INFRA_DEVICE_APC		45
+#define CLK_INFRA_CCIF_AP		46
+#define CLK_INFRA_DEBUGSYS		47
+#define CLK_INFRA_AUD			48
+#define CLK_INFRA_CCIF_MD		49
+#define CLK_INFRA_DXCC_SEC_CORE		50
+#define CLK_INFRA_DXCC_AO		51
+#define CLK_INFRA_DRAMC_F26M		52
+#define CLK_INFRA_IRTX			53
+#define CLK_INFRA_DISP_PWM		54
+#define CLK_INFRA_DPMAIF_CK		55
+#define CLK_INFRA_AUD_26M_BCLK		56
+#define CLK_INFRA_SPI1			57
+#define CLK_INFRA_I2C4			58
+#define CLK_INFRA_MODEM_TEMP_SHARE	59
+#define CLK_INFRA_SPI2			60
+#define CLK_INFRA_SPI3			61
+#define CLK_INFRA_UNIPRO_SCK		62
+#define CLK_INFRA_UNIPRO_TICK		63
+#define CLK_INFRA_UFS_MP_SAP_BCLK	64
+#define CLK_INFRA_MD32_BCLK		65
+#define CLK_INFRA_SSPM			66
+#define CLK_INFRA_UNIPRO_MBIST		67
+#define CLK_INFRA_SSPM_BUS_HCLK		68
+#define CLK_INFRA_I2C5			69
+#define CLK_INFRA_I2C5_ARBITER		70
+#define CLK_INFRA_I2C5_IMM		71
+#define CLK_INFRA_I2C1_ARBITER		72
+#define CLK_INFRA_I2C1_IMM		73
+#define CLK_INFRA_I2C2_ARBITER		74
+#define CLK_INFRA_I2C2_IMM		75
+#define CLK_INFRA_SPI4			76
+#define CLK_INFRA_SPI5			77
+#define CLK_INFRA_CQ_DMA		78
+#define CLK_INFRA_UFS			79
+#define CLK_INFRA_AES_UFSFDE		80
+#define CLK_INFRA_UFS_TICK		81
+#define CLK_INFRA_MSDC0_SELF		82
+#define CLK_INFRA_MSDC1_SELF		83
+#define CLK_INFRA_MSDC2_SELF		84
+#define CLK_INFRA_SSPM_26M_SELF		85
+#define CLK_INFRA_SSPM_32K_SELF		86
+#define CLK_INFRA_UFS_AXI		87
+#define CLK_INFRA_I2C6			88
+#define CLK_INFRA_AP_MSDC0		89
+#define CLK_INFRA_MD_MSDC0		90
+#define CLK_INFRA_USB			91
+#define CLK_INFRA_DEVMPU_BCLK		92
+#define CLK_INFRA_CCIF2_AP		93
+#define CLK_INFRA_CCIF2_MD		94
+#define CLK_INFRA_CCIF3_AP		95
+#define CLK_INFRA_CCIF3_MD		96
+#define CLK_INFRA_SEJ_F13M		97
+#define CLK_INFRA_AES_BCLK		98
+#define CLK_INFRA_I2C7			99
+#define CLK_INFRA_I2C8			100
+#define CLK_INFRA_FBIST2FPC		101
+#define CLK_INFRA_CCIF4_AP		102
+#define CLK_INFRA_CCIF4_MD		103
+#define CLK_INFRA_FADSP			104
+#define CLK_INFRA_SSUSB_XHCI		105
+#define CLK_INFRA_SPI6			106
+#define CLK_INFRA_SPI7			107
+#define CLK_INFRA_NR_CLK		108
+
+/* MFGCFG */
+#define CLK_MFGCFG_BG3D			1
+#define CLK_MFGCFG_NR_CLK		2
+
+/* IMG */
+#define CLK_IMG_WPE_A			1
+#define CLK_IMG_MFB			2
+#define CLK_IMG_DIP			3
+#define CLK_IMG_LARB6			4
+#define CLK_IMG_LARB5			5
+#define CLK_IMG_NR_CLK			6
+
+/* IPE */
+#define CLK_IPE_LARB7			1
+#define CLK_IPE_LARB8			2
+#define CLK_IPE_SMI_SUBCOM		3
+#define CLK_IPE_FD			4
+#define CLK_IPE_FE			5
+#define CLK_IPE_RSC			6
+#define CLK_IPE_DPE			7
+#define CLK_IPE_NR_CLK			8
+
+/* MM_CONFIG */
+#define CLK_MM_SMI_COMMON		1
+#define CLK_MM_SMI_LARB0		2
+#define CLK_MM_SMI_LARB1		3
+#define CLK_MM_GALS_COMM0		4
+#define CLK_MM_GALS_COMM1		5
+#define CLK_MM_GALS_CCU2MM		6
+#define CLK_MM_GALS_IPU12MM		7
+#define CLK_MM_GALS_IMG2MM		8
+#define CLK_MM_GALS_CAM2MM		9
+#define CLK_MM_GALS_IPU2MM		10
+#define CLK_MM_MDP_DL_TXCK		11
+#define CLK_MM_IPU_DL_TXCK		12
+#define CLK_MM_MDP_RDMA0		13
+#define CLK_MM_MDP_RDMA1		14
+#define CLK_MM_MDP_RSZ0			15
+#define CLK_MM_MDP_RSZ1			16
+#define CLK_MM_MDP_TDSHP		17
+#define CLK_MM_MDP_WROT0		18
+#define CLK_MM_FAKE_ENG			19
+#define CLK_MM_DISP_OVL0		20
+#define CLK_MM_DISP_OVL0_2L		21
+#define CLK_MM_DISP_OVL1_2L		22
+#define CLK_MM_DISP_RDMA0		23
+#define CLK_MM_DISP_RDMA1		24
+#define CLK_MM_DISP_WDMA0		25
+#define CLK_MM_DISP_COLOR0		26
+#define CLK_MM_DISP_CCORR0		27
+#define CLK_MM_DISP_AAL0		28
+#define CLK_MM_DISP_GAMMA0		29
+#define CLK_MM_DISP_DITHER0		30
+#define CLK_MM_DISP_SPLIT		31
+#define CLK_MM_DSI0_MM_CK		32
+#define CLK_MM_DSI0_IF_CK		33
+#define CLK_MM_DPI_MM_CK		34
+#define CLK_MM_DPI_IF_CK		35
+#define CLK_MM_FAKE_ENG2		36
+#define CLK_MM_MDP_DL_RX_CK		37
+#define CLK_MM_IPU_DL_RX_CK		38
+#define CLK_MM_26M			39
+#define CLK_MM_MM_R2Y			40
+#define CLK_MM_DISP_RSZ			41
+#define CLK_MM_MDP_WDMA0		42
+#define CLK_MM_MDP_AAL			43
+#define CLK_MM_MDP_HDR			44
+#define CLK_MM_DBI_MM_CK		45
+#define CLK_MM_DBI_IF_CK		46
+#define CLK_MM_MDP_WROT1		47
+#define CLK_MM_DISP_POSTMASK0		48
+#define CLK_MM_DISP_HRT_BW		49
+#define CLK_MM_DISP_OVL_FBDC		50
+#define CLK_MM_NR_CLK			51
+
+/* VDEC_GCON */
+#define CLK_VDEC_VDEC			1
+#define CLK_VDEC_LARB1			2
+#define CLK_VDEC_GCON_NR_CLK		3
+
+/* VENC_GCON */
+#define CLK_VENC_GCON_LARB		1
+#define CLK_VENC_GCON_VENC		2
+#define CLK_VENC_GCON_JPGENC		3
+#define CLK_VENC_GCON_GALS		4
+#define CLK_VENC_GCON_NR_CLK		5
+
+/* AUD */
+#define CLK_AUD_AFE			1
+#define CLK_AUD_22M			2
+#define CLK_AUD_24M			3
+#define CLK_AUD_APLL2_TUNER		4
+#define CLK_AUD_APLL_TUNER		5
+#define CLK_AUD_TDM			6
+#define CLK_AUD_ADC			7
+#define CLK_AUD_DAC			8
+#define CLK_AUD_DAC_PREDIS		9
+#define CLK_AUD_TML			10
+#define CLK_AUD_NLE			11
+#define CLK_AUD_I2S1_BCLK_SW		12
+#define CLK_AUD_I2S2_BCLK_SW		13
+#define CLK_AUD_I2S3_BCLK_SW		14
+#define CLK_AUD_I2S4_BCLK_SW		15
+#define CLK_AUD_I2S5_BCLK_SW		16
+#define CLK_AUD_CONN_I2S_ASRC		17
+#define CLK_AUD_GENERAL1_ASRC		18
+#define CLK_AUD_GENERAL2_ASRC		19
+#define CLK_AUD_DAC_HIRES		20
+#define CLK_AUD_PDN_ADDA6_ADC		21
+#define CLK_AUD_ADC_HIRES		22
+#define CLK_AUD_ADC_HIRES_TML		23
+#define CLK_AUD_ADDA6_ADC_HIRES		24
+#define CLK_AUD_3RD_DAC			25
+#define CLK_AUD_3RD_DAC_PREDIS		26
+#define CLK_AUD_3RD_DAC_TML		27
+#define CLK_AUD_3RD_DAC_HIRES		28
+#define CLK_AUD_NR_CLK			29
+
+#endif /* _DT_BINDINGS_CLK_MT6779_H */
-- 
cgit v1.2.3


From 7c28503db19cfa28e394a394aca61c79fbf3f969 Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Mon, 9 Sep 2019 19:31:26 +0200
Subject: platform/x86: asus-wmi: Reorder ASUS_WMI_CHARGE_THRESHOLD

At the same time add a comment explaining what it is used for.

Signed-off-by: Kristian Klausen <kristian@klausen.dk>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/platform_data/x86/asus-wmi.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 53934ef38d98..21f0426c8272 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -61,7 +61,6 @@
 
 /* Misc */
 #define ASUS_WMI_DEVID_CAMERA		0x00060013
-#define ASUS_WMI_CHARGE_THRESHOLD	0x00120057
 
 /* Storage */
 #define ASUS_WMI_DEVID_CARDREADER	0x00080013
@@ -82,6 +81,9 @@
 /* Deep S3 / Resume on LID open */
 #define ASUS_WMI_DEVID_LID_RESUME	0x00120031
 
+/* Maximum charging percentage */
+#define ASUS_WMI_CHARGE_THRESHOLD	0x00120057
+
 /* DSTS masks */
 #define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
 #define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
-- 
cgit v1.2.3


From 0c37f44845557b5e5b91ab320f256a4fd5059648 Mon Sep 17 00:00:00 2001
From: Kristian Klausen <kristian@klausen.dk>
Date: Mon, 9 Sep 2019 19:31:27 +0200
Subject: platform/x86: asus-wmi: Rename CHARGE_THRESHOLD to RSOC

The device is officially called "Relative state of charge" (RSOC).
At the same time add the missing DEVID from the name.

Signed-off-by: Kristian Klausen <kristian@klausen.dk>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c            | 6 +++---
 include/linux/platform_data/x86/asus-wmi.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 848b23764fc3..92c149dc2e6e 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -2067,7 +2067,7 @@ static ssize_t charge_threshold_store(struct device *dev,
 	if (value < 0 || value > 100)
 		return -EINVAL;
 
-	ret = asus_wmi_set_devstate(ASUS_WMI_CHARGE_THRESHOLD, value, &rv);
+	ret = asus_wmi_set_devstate(ASUS_WMI_DEVID_RSOC, value, &rv);
 	if (ret)
 		return ret;
 
@@ -2124,7 +2124,7 @@ static umode_t asus_sysfs_is_visible(struct kobject *kobj,
 	else if (attr == &dev_attr_fan_boost_mode.attr)
 		ok = asus->fan_boost_mode_available;
 	else if (attr == &dev_attr_charge_threshold.attr)
-		devid = ASUS_WMI_CHARGE_THRESHOLD;
+		devid = ASUS_WMI_DEVID_RSOC;
 
 	if (devid != -1)
 		ok = !(asus_wmi_get_devstate_simple(asus, devid) < 0);
@@ -2455,7 +2455,7 @@ static int asus_wmi_add(struct platform_device *pdev)
 	 * and we can't get the current threshold so let set it to 100% on
 	 * module load.
 	 */
-	asus_wmi_set_devstate(ASUS_WMI_CHARGE_THRESHOLD, 100, NULL);
+	asus_wmi_set_devstate(ASUS_WMI_DEVID_RSOC, 100, NULL);
 	asus->charge_threshold = 100;
 
 	return 0;
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
index 21f0426c8272..60249e22e844 100644
--- a/include/linux/platform_data/x86/asus-wmi.h
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -82,7 +82,7 @@
 #define ASUS_WMI_DEVID_LID_RESUME	0x00120031
 
 /* Maximum charging percentage */
-#define ASUS_WMI_CHARGE_THRESHOLD	0x00120057
+#define ASUS_WMI_DEVID_RSOC		0x00120057
 
 /* DSTS masks */
 #define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
-- 
cgit v1.2.3


From ed13fc33f763035a7c290f11c7111877c3a5daab Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Fri, 6 Sep 2019 11:32:26 +0100
Subject: export: explicitly align struct kernel_symbol

This change allows growing struct kernel_symbol without wasting bytes to
alignment. It also concretized the alignment of ksymtab entries if
relative references are used for ksymtab entries.

struct kernel_symbol was already implicitly being aligned to the word
size, except on x86_64 and m68k, where it is aligned to 16 and 2 bytes,
respectively.

As far as I can tell there is no requirement for aligning struct
kernel_symbol to 16 bytes on x86_64, but gcc aligns structs to their
size, and the linker aligns the custom __ksymtab sections to the largest
data type contained within, so setting KSYM_ALIGN to 16 was necessary to
stay consistent with the code generated for non-ASM EXPORT_SYMBOL(). Now
that non-ASM EXPORT_SYMBOL() explicitly aligns to word size (8),
KSYM_ALIGN is no longer necessary.

In case of relative references, the alignment has been changed
accordingly to not waste space when adding new struct members.

As for m68k, struct kernel_symbol is aligned to 2 bytes even though the
structure itself is 8 bytes; using a 4-byte alignment shouldn't hurt.

I manually verified the output of the __ksymtab sections didn't change
on x86, x86_64, arm, arm64 and m68k. As expected, the section contents
didn't change, and the ELF section alignment only changed on x86_64 and
m68k. Feedback from other archs more than welcome.

Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 arch/m68k/include/asm/export.h | 1 -
 include/asm-generic/export.h   | 8 +++-----
 include/linux/export.h         | 3 ++-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/include/asm/export.h b/arch/m68k/include/asm/export.h
index 0af20f48bd07..b53008b67ce1 100644
--- a/arch/m68k/include/asm/export.h
+++ b/arch/m68k/include/asm/export.h
@@ -1,3 +1,2 @@
-#define KSYM_ALIGN 2
 #define KCRC_ALIGN 2
 #include <asm-generic/export.h>
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 294d6ae785d4..63f54907317b 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -4,15 +4,13 @@
 #ifndef KSYM_FUNC
 #define KSYM_FUNC(x) x
 #endif
-#ifdef CONFIG_64BIT
-#ifndef KSYM_ALIGN
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define KSYM_ALIGN 4
+#elif defined(CONFIG_64BIT)
 #define KSYM_ALIGN 8
-#endif
 #else
-#ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
-#endif
 #ifndef KCRC_ALIGN
 #define KCRC_ALIGN 4
 #endif
diff --git a/include/linux/export.h b/include/linux/export.h
index fd8711ed9ac4..28a4d2150689 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -52,7 +52,7 @@ extern struct module __this_module;
 #define __KSYMTAB_ENTRY(sym, sec)					\
 	__ADDRESSABLE(sym)						\
 	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
-	    "	.balign	8					\n"	\
+	    "	.balign 4					\n"	\
 	    "__ksymtab_" #sym ":				\n"	\
 	    "	.long	" #sym "- .				\n"	\
 	    "	.long	__kstrtab_" #sym "- .			\n"	\
@@ -66,6 +66,7 @@ struct kernel_symbol {
 #define __KSYMTAB_ENTRY(sym, sec)					\
 	static const struct kernel_symbol __ksymtab_##sym		\
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
+	__aligned(sizeof(void *))					\
 	= { (unsigned long)&sym, __kstrtab_##sym }
 
 struct kernel_symbol {
-- 
cgit v1.2.3


From 8651ec01daedad26290f76beeb4736f9d2da4b87 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Fri, 6 Sep 2019 11:32:27 +0100
Subject: module: add support for symbol namespaces.

The EXPORT_SYMBOL_NS() and EXPORT_SYMBOL_NS_GPL() macros can be used to
export a symbol to a specific namespace.  There are no _GPL_FUTURE and
_UNUSED variants because these are currently unused, and I'm not sure
they are necessary.

I didn't add EXPORT_SYMBOL_NS() for ASM exports; this patch sets the
namespace of ASM exports to NULL by default. In case of relative
references, it will be relocatable to NULL. If there's a need, this
should be pretty easy to add.

A module that wants to use a symbol exported to a namespace must add a
MODULE_IMPORT_NS() statement to their module code; otherwise, modpost
will complain when building the module, and the kernel module loader
will emit an error and fail when loading the module.

MODULE_IMPORT_NS() adds a modinfo tag 'import_ns' to the module. That
tag can be observed by the modinfo command, modpost and kernel/module.c
at the time of loading the module.

The ELF symbols are renamed to include the namespace with an asm label;
for example, symbol 'usb_stor_suspend' in namespace USB_STORAGE becomes
'usb_stor_suspend.USB_STORAGE'.  This allows modpost to do namespace
checking, without having to go through all the effort of parsing ELF and
relocation records just to get to the struct kernel_symbols.

On x86_64 I saw no difference in binary size (compression), but at
runtime this will require a word of memory per export to hold the
namespace. An alternative could be to store namespaced symbols in their
own section and use a separate 'struct namespaced_kernel_symbol' for
that section, at the cost of making the module loader more complex.

Co-developed-by: Martijn Coenen <maco@android.com>
Signed-off-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/asm-generic/export.h |  6 +--
 include/linux/export.h       | 91 ++++++++++++++++++++++++++++++++++++--------
 include/linux/module.h       |  2 +
 kernel/module.c              | 43 +++++++++++++++++++++
 4 files changed, 123 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 63f54907317b..e2b5d0f569d3 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -17,11 +17,11 @@
 
 .macro __put, val, name
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-	.long	\val - ., \name - .
+	.long	\val - ., \name - ., 0 - .
 #elif defined(CONFIG_64BIT)
-	.quad	\val, \name
+	.quad	\val, \name, 0
 #else
-	.long	\val, \name
+	.long	\val, \name, 0
 #endif
 .endm
 
diff --git a/include/linux/export.h b/include/linux/export.h
index 28a4d2150689..d59461e71478 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -20,6 +20,8 @@ extern struct module __this_module;
 
 #ifdef CONFIG_MODULES
 
+#define NS_SEPARATOR "."
+
 #if defined(__KERNEL__) && !defined(__GENKSYMS__)
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
@@ -29,13 +31,13 @@ extern struct module __this_module;
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
 	    "	.weak	__crc_" #sym "				\n"	\
 	    "	.long	__crc_" #sym " - .			\n"	\
-	    "	.previous					\n");
+	    "	.previous					\n")
 #else
 #define __CRC_SYMBOL(sym, sec)						\
 	asm("	.section \"___kcrctab" sec "+" #sym "\", \"a\"	\n"	\
 	    "	.weak	__crc_" #sym "				\n"	\
 	    "	.long	__crc_" #sym "				\n"	\
-	    "	.previous					\n");
+	    "	.previous					\n")
 #endif
 #else
 #define __CRC_SYMBOL(sym, sec)
@@ -49,6 +51,16 @@ extern struct module __this_module;
  * absolute relocations that require runtime processing on relocatable
  * kernels.
  */
+#define __KSYMTAB_ENTRY_NS(sym, sec, ns)				\
+	__ADDRESSABLE(sym)						\
+	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
+	    "	.balign	4					\n"	\
+	    "__ksymtab_" #sym NS_SEPARATOR #ns ":		\n"	\
+	    "	.long	" #sym "- .				\n"	\
+	    "	.long	__kstrtab_" #sym "- .			\n"	\
+	    "	.long	__kstrtab_ns_" #sym "- .		\n"	\
+	    "	.previous					\n")
+
 #define __KSYMTAB_ENTRY(sym, sec)					\
 	__ADDRESSABLE(sym)						\
 	asm("	.section \"___ksymtab" sec "+" #sym "\", \"a\"	\n"	\
@@ -56,32 +68,53 @@ extern struct module __this_module;
 	    "__ksymtab_" #sym ":				\n"	\
 	    "	.long	" #sym "- .				\n"	\
 	    "	.long	__kstrtab_" #sym "- .			\n"	\
+	    "	.long	0 - .					\n"	\
 	    "	.previous					\n")
 
 struct kernel_symbol {
 	int value_offset;
 	int name_offset;
+	int namespace_offset;
 };
 #else
+#define __KSYMTAB_ENTRY_NS(sym, sec, ns)				\
+	static const struct kernel_symbol __ksymtab_##sym##__##ns	\
+	asm("__ksymtab_" #sym NS_SEPARATOR #ns)				\
+	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
+	__aligned(sizeof(void *))					\
+	= { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym }
+
 #define __KSYMTAB_ENTRY(sym, sec)					\
 	static const struct kernel_symbol __ksymtab_##sym		\
+	asm("__ksymtab_" #sym)						\
 	__attribute__((section("___ksymtab" sec "+" #sym), used))	\
 	__aligned(sizeof(void *))					\
-	= { (unsigned long)&sym, __kstrtab_##sym }
+	= { (unsigned long)&sym, __kstrtab_##sym, NULL }
 
 struct kernel_symbol {
 	unsigned long value;
 	const char *name;
+	const char *namespace;
 };
 #endif
 
-/* For every exported symbol, place a struct in the __ksymtab section */
-#define ___EXPORT_SYMBOL(sym, sec)					\
+#define ___export_symbol_common(sym, sec)				\
 	extern typeof(sym) sym;						\
-	__CRC_SYMBOL(sym, sec)						\
+	__CRC_SYMBOL(sym, sec);						\
 	static const char __kstrtab_##sym[]				\
 	__attribute__((section("__ksymtab_strings"), used, aligned(1)))	\
-	= #sym;								\
+	= #sym								\
+
+/* For every exported symbol, place a struct in the __ksymtab section */
+#define ___EXPORT_SYMBOL_NS(sym, sec, ns)				\
+	___export_symbol_common(sym, sec);				\
+	static const char __kstrtab_ns_##sym[]				\
+	__attribute__((section("__ksymtab_strings"), used, aligned(1)))	\
+	= #ns;								\
+	__KSYMTAB_ENTRY_NS(sym, sec, ns)
+
+#define ___EXPORT_SYMBOL(sym, sec)					\
+	___export_symbol_common(sym, sec);				\
 	__KSYMTAB_ENTRY(sym, sec)
 
 #if defined(__DISABLE_EXPORTS)
@@ -91,6 +124,7 @@ struct kernel_symbol {
  * be reused in other execution contexts such as the UEFI stub or the
  * decompressor.
  */
+#define __EXPORT_SYMBOL_NS(sym, sec, ns)
 #define __EXPORT_SYMBOL(sym, sec)
 
 #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
@@ -117,18 +151,26 @@ struct kernel_symbol {
 #define __cond_export_sym_1(sym, sec) ___EXPORT_SYMBOL(sym, sec)
 #define __cond_export_sym_0(sym, sec) /* nothing */
 
+#define __EXPORT_SYMBOL_NS(sym, sec, ns)				\
+	__ksym_marker(sym);						\
+	__cond_export_ns_sym(sym, sec, ns, __is_defined(__KSYM_##sym))
+#define __cond_export_ns_sym(sym, sec, ns, conf)			\
+	___cond_export_ns_sym(sym, sec, ns, conf)
+#define ___cond_export_ns_sym(sym, sec, ns, enabled)			\
+	__cond_export_ns_sym_##enabled(sym, sec, ns)
+#define __cond_export_ns_sym_1(sym, sec, ns) ___EXPORT_SYMBOL_NS(sym, sec, ns)
+#define __cond_export_ns_sym_0(sym, sec, ns) /* nothing */
+
 #else
+#define __EXPORT_SYMBOL_NS ___EXPORT_SYMBOL_NS
 #define __EXPORT_SYMBOL ___EXPORT_SYMBOL
 #endif
 
-#define EXPORT_SYMBOL(sym)					\
-	__EXPORT_SYMBOL(sym, "")
-
-#define EXPORT_SYMBOL_GPL(sym)					\
-	__EXPORT_SYMBOL(sym, "_gpl")
-
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)				\
-	__EXPORT_SYMBOL(sym, "_gpl_future")
+#define EXPORT_SYMBOL(sym) __EXPORT_SYMBOL(sym, "")
+#define EXPORT_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_gpl")
+#define EXPORT_SYMBOL_GPL_FUTURE(sym) __EXPORT_SYMBOL(sym, "_gpl_future")
+#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL_NS(sym, "", ns)
+#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL_NS(sym, "_gpl", ns)
 
 #ifdef CONFIG_UNUSED_SYMBOLS
 #define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
@@ -138,11 +180,28 @@ struct kernel_symbol {
 #define EXPORT_UNUSED_SYMBOL_GPL(sym)
 #endif
 
-#endif	/* __GENKSYMS__ */
+#endif	/* __KERNEL__ && !__GENKSYMS__ */
+
+#if defined(__GENKSYMS__)
+/*
+ * When we're running genksyms, ignore the namespace and make the _NS
+ * variants look like the normal ones. There are two reasons for this:
+ * 1) In the normal definition of EXPORT_SYMBOL_NS, the 'ns' macro
+ *    argument is itself not expanded because it's always tokenized or
+ *    concatenated; but when running genksyms, a blank definition of the
+ *    macro does allow the argument to be expanded; if a namespace
+ *    happens to collide with a #define, this can cause issues.
+ * 2) There's no need to modify genksyms to deal with the _NS variants
+ */
+#define EXPORT_SYMBOL_NS(sym, ns) EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_NS_GPL(sym, ns) EXPORT_SYMBOL_GPL(sym)
+#endif
 
 #else /* !CONFIG_MODULES... */
 
 #define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_NS(sym, ns)
+#define EXPORT_SYMBOL_NS_GPL(sym, ns)
 #define EXPORT_SYMBOL_GPL(sym)
 #define EXPORT_SYMBOL_GPL_FUTURE(sym)
 #define EXPORT_UNUSED_SYMBOL(sym)
diff --git a/include/linux/module.h b/include/linux/module.h
index 1455812dd325..b3611e749f72 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -280,6 +280,8 @@ struct notifier_block;
 
 #ifdef CONFIG_MODULES
 
+#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+
 extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
diff --git a/kernel/module.c b/kernel/module.c
index 3ee507c0a92f..6bb9b938f9c7 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -544,6 +544,15 @@ static const char *kernel_symbol_name(const struct kernel_symbol *sym)
 #endif
 }
 
+static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
+{
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	return offset_to_ptr(&sym->namespace_offset);
+#else
+	return sym->namespace;
+#endif
+}
+
 static int cmp_name(const void *va, const void *vb)
 {
 	const char *a;
@@ -1379,6 +1388,34 @@ static inline int same_magic(const char *amagic, const char *bmagic,
 }
 #endif /* CONFIG_MODVERSIONS */
 
+static char *get_modinfo(const struct load_info *info, const char *tag);
+static char *get_next_modinfo(const struct load_info *info, const char *tag,
+			      char *prev);
+
+static int verify_namespace_is_imported(const struct load_info *info,
+					const struct kernel_symbol *sym,
+					struct module *mod)
+{
+	const char *namespace;
+	char *imported_namespace;
+
+	namespace = kernel_symbol_namespace(sym);
+	if (namespace) {
+		imported_namespace = get_modinfo(info, "import_ns");
+		while (imported_namespace) {
+			if (strcmp(namespace, imported_namespace) == 0)
+				return 0;
+			imported_namespace = get_next_modinfo(
+				info, "import_ns", imported_namespace);
+		}
+		pr_err("%s: module uses symbol (%s) from namespace %s, but does not import it.\n",
+		       mod->name, kernel_symbol_name(sym), namespace);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+
 /* Resolve a symbol for this module.  I.e. if we find one, record usage. */
 static const struct kernel_symbol *resolve_symbol(struct module *mod,
 						  const struct load_info *info,
@@ -1407,6 +1444,12 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
 		goto getname;
 	}
 
+	err = verify_namespace_is_imported(info, sym, mod);
+	if (err) {
+		sym = ERR_PTR(err);
+		goto getname;
+	}
+
 	err = ref_module(mod, owner);
 	if (err) {
 		sym = ERR_PTR(err);
-- 
cgit v1.2.3


From 8e2adc6a00cd96c07fcfa8adfbedbb4be681b8a7 Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Fri, 6 Sep 2019 11:32:30 +0100
Subject: export: allow definition default namespaces in Makefiles or sources

To avoid excessive usage of EXPORT_SYMBOL_NS(sym, MY_NAMESPACE), where
MY_NAMESPACE will always be the namespace we are exporting to, allow
exporting all definitions of EXPORT_SYMBOL() and friends by defining
DEFAULT_SYMBOL_NAMESPACE.

For example, to export all symbols defined in usb-common into the
namespace USB_COMMON, add a line like this to drivers/usb/common/Makefile:

  ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=USB_COMMON

That is equivalent to changing all EXPORT_SYMBOL(sym) definitions to
EXPORT_SYMBOL_NS(sym, USB_COMMON). Subsequently all symbol namespaces
functionality will apply.

Another way of making use of this feature is to define the namespace
within source or header files similar to how TRACE_SYSTEM defines are
used:
  #undef DEFAULT_SYMBOL_NAMESPACE
  #define DEFAULT_SYMBOL_NAMESPACE USB_COMMON

Please note that, as opposed to TRACE_SYSTEM, DEFAULT_SYMBOL_NAMESPACE
has to be defined before including include/linux/export.h.

If DEFAULT_SYMBOL_NAMESPACE is defined, a symbol can still be exported
to another namespace by using EXPORT_SYMBOL_NS() and friends with
explicitly specifying the namespace.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Martijn Coenen <maco@android.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Matthias Maennich <maennich@google.com>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/linux/export.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/export.h b/include/linux/export.h
index d59461e71478..2c5468d8ea9a 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -166,6 +166,12 @@ struct kernel_symbol {
 #define __EXPORT_SYMBOL ___EXPORT_SYMBOL
 #endif
 
+#ifdef DEFAULT_SYMBOL_NAMESPACE
+#undef __EXPORT_SYMBOL
+#define __EXPORT_SYMBOL(sym, sec)				\
+	__EXPORT_SYMBOL_NS(sym, sec, DEFAULT_SYMBOL_NAMESPACE)
+#endif
+
 #define EXPORT_SYMBOL(sym) __EXPORT_SYMBOL(sym, "")
 #define EXPORT_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_gpl")
 #define EXPORT_SYMBOL_GPL_FUTURE(sym) __EXPORT_SYMBOL(sym, "_gpl_future")
-- 
cgit v1.2.3


From 11a60d159259dbadf9188534b508e5003769af61 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Mon, 9 Sep 2019 16:10:30 -0400
Subject: nfsd: add a "GetVersion" upcall for nfsdcld

Add a "GetVersion" upcall to allow nfsd to determine the maximum upcall
version that the nfsdcld userspace daemon supports.  If the daemon
responds with -EOPNOTSUPP, then we know it only supports v1.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c         | 167 +++++++++++++++++++++++++++++-------------
 include/uapi/linux/nfsd/cld.h |  11 ++-
 2 files changed, 127 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 87679557d0d6..58a61339d40c 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -59,8 +59,12 @@ struct nfsd4_client_tracking_ops {
 	void (*remove)(struct nfs4_client *);
 	int (*check)(struct nfs4_client *);
 	void (*grace_done)(struct nfsd_net *);
+	uint8_t version;
+	size_t msglen;
 };
 
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
 
@@ -718,6 +722,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
 	.remove		= nfsd4_remove_clid_dir,
 	.check		= nfsd4_check_legacy_client,
 	.grace_done	= nfsd4_recdir_purge_old,
+	.version	= 1,
+	.msglen		= 0,
 };
 
 /* Globals */
@@ -737,19 +743,24 @@ struct cld_upcall {
 	struct list_head	 cu_list;
 	struct cld_net		*cu_net;
 	struct completion	 cu_done;
-	struct cld_msg		 cu_msg;
+	union {
+		struct cld_msg_hdr	 cu_hdr;
+		struct cld_msg		 cu_msg;
+	} cu_u;
 };
 
 static int
-__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+__cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
 	int ret;
 	struct rpc_pipe_msg msg;
-	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
+	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_u);
+	struct nfsd_net *nn = net_generic(pipe->dentry->d_sb->s_fs_info,
+					  nfsd_net_id);
 
 	memset(&msg, 0, sizeof(msg));
 	msg.data = cmsg;
-	msg.len = sizeof(*cmsg);
+	msg.len = nn->client_tracking_ops->msglen;
 
 	ret = rpc_queue_upcall(pipe, &msg);
 	if (ret < 0) {
@@ -765,7 +776,7 @@ out:
 }
 
 static int
-cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
+cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 {
 	int ret;
 
@@ -809,7 +820,7 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
 			kfree(name.data);
 			return -EFAULT;
 		}
-		return sizeof(*cmsg);
+		return nn->client_tracking_ops->msglen;
 	}
 	return -EFAULT;
 }
@@ -818,6 +829,7 @@ static ssize_t
 cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
 	struct cld_upcall *tmp, *cup;
+	struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
 	struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
 	uint32_t xid;
 	struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
@@ -825,14 +837,14 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	struct cld_net *cn = nn->cld_net;
 	int16_t status;
 
-	if (mlen != sizeof(*cmsg)) {
+	if (mlen != nn->client_tracking_ops->msglen) {
 		dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
-			sizeof(*cmsg));
+			nn->client_tracking_ops->msglen);
 		return -EINVAL;
 	}
 
 	/* copy just the xid so we can try to find that */
-	if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
+	if (copy_from_user(&xid, &hdr->cm_xid, sizeof(xid)) != 0) {
 		dprintk("%s: error when copying xid from userspace", __func__);
 		return -EFAULT;
 	}
@@ -842,7 +854,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	 * list (for -EINPROGRESS, we just want to make sure the xid is
 	 * valid, not remove the upcall from the list)
 	 */
-	if (get_user(status, &cmsg->cm_status)) {
+	if (get_user(status, &hdr->cm_status)) {
 		dprintk("%s: error when copying status from userspace", __func__);
 		return -EFAULT;
 	}
@@ -851,7 +863,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	cup = NULL;
 	spin_lock(&cn->cn_lock);
 	list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-		if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
+		if (get_unaligned(&tmp->cu_u.cu_hdr.cm_xid) == xid) {
 			cup = tmp;
 			if (status != -EINPROGRESS)
 				list_del_init(&cup->cu_list);
@@ -869,7 +881,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (status == -EINPROGRESS)
 		return __cld_pipe_inprogress_downcall(cmsg, nn);
 
-	if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
+	if (copy_from_user(&cup->cu_u.cu_msg, src, mlen) != 0)
 		return -EFAULT;
 
 	complete(&cup->cu_done);
@@ -881,7 +893,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 {
 	struct cld_msg *cmsg = msg->data;
 	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
-						 cu_msg);
+						 cu_u.cu_msg);
 
 	/* errno >= 0 means we got a downcall */
 	if (msg->errno >= 0)
@@ -1012,9 +1024,10 @@ nfsd4_remove_cld_pipe(struct net *net)
 }
 
 static struct cld_upcall *
-alloc_cld_upcall(struct cld_net *cn)
+alloc_cld_upcall(struct nfsd_net *nn)
 {
 	struct cld_upcall *new, *tmp;
+	struct cld_net *cn = nn->cld_net;
 
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
@@ -1024,20 +1037,20 @@ alloc_cld_upcall(struct cld_net *cn)
 restart_search:
 	spin_lock(&cn->cn_lock);
 	list_for_each_entry(tmp, &cn->cn_list, cu_list) {
-		if (tmp->cu_msg.cm_xid == cn->cn_xid) {
+		if (tmp->cu_u.cu_msg.cm_xid == cn->cn_xid) {
 			cn->cn_xid++;
 			spin_unlock(&cn->cn_lock);
 			goto restart_search;
 		}
 	}
 	init_completion(&new->cu_done);
-	new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
-	put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
+	new->cu_u.cu_msg.cm_vers = nn->client_tracking_ops->version;
+	put_unaligned(cn->cn_xid++, &new->cu_u.cu_msg.cm_xid);
 	new->cu_net = cn;
 	list_add(&new->cu_list, &cn->cn_list);
 	spin_unlock(&cn->cn_lock);
 
-	dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
+	dprintk("%s: allocated xid %u\n", __func__, new->cu_u.cu_msg.cm_xid);
 
 	return new;
 }
@@ -1066,20 +1079,20 @@ nfsd4_cld_create(struct nfs4_client *clp)
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_Create;
-	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+	cup->cu_u.cu_msg.cm_cmd = Cld_Create;
+	cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
 			clp->cl_name.len);
 
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret) {
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 	}
 
@@ -1103,20 +1116,20 @@ nfsd4_cld_remove(struct nfs4_client *clp)
 	if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_Remove;
-	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+	cup->cu_u.cu_msg.cm_cmd = Cld_Remove;
+	cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
 			clp->cl_name.len);
 
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret) {
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 		clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 	}
 
@@ -1145,21 +1158,21 @@ nfsd4_cld_check_v0(struct nfs4_client *clp)
 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
 		return 0;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		printk(KERN_ERR "NFSD: Unable to check client record on "
 				"stable storage: %d\n", -ENOMEM);
 		return -ENOMEM;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_Check;
-	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
-	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
+	cup->cu_u.cu_msg.cm_cmd = Cld_Check;
+	cup->cu_u.cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
+	memcpy(cup->cu_u.cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
 			clp->cl_name.len);
 
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret) {
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
 	}
 
@@ -1223,16 +1236,16 @@ nfsd4_cld_grace_start(struct nfsd_net *nn)
 	struct cld_upcall *cup;
 	struct cld_net *cn = nn->cld_net;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_GraceStart;
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	cup->cu_u.cu_msg.cm_cmd = Cld_GraceStart;
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret)
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 
 	free_cld_upcall(cup);
 out_err:
@@ -1250,17 +1263,17 @@ nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
 	struct cld_upcall *cup;
 	struct cld_net *cn = nn->cld_net;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_GraceDone;
-	cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+	cup->cu_u.cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret)
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 
 	free_cld_upcall(cup);
 out_err:
@@ -1279,16 +1292,16 @@ nfsd4_cld_grace_done(struct nfsd_net *nn)
 	struct cld_upcall *cup;
 	struct cld_net *cn = nn->cld_net;
 
-	cup = alloc_cld_upcall(cn);
+	cup = alloc_cld_upcall(nn);
 	if (!cup) {
 		ret = -ENOMEM;
 		goto out_err;
 	}
 
-	cup->cu_msg.cm_cmd = Cld_GraceDone;
-	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
+	cup->cu_u.cu_msg.cm_cmd = Cld_GraceDone;
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
 	if (!ret)
-		ret = cup->cu_msg.cm_status;
+		ret = cup->cu_u.cu_msg.cm_status;
 
 	free_cld_upcall(cup);
 out_err:
@@ -1336,6 +1349,50 @@ cld_running(struct nfsd_net *nn)
 	return pipe->nreaders || pipe->nwriters;
 }
 
+static int
+nfsd4_cld_get_version(struct nfsd_net *nn)
+{
+	int ret = 0;
+	struct cld_upcall *cup;
+	struct cld_net *cn = nn->cld_net;
+	uint8_t version;
+
+	cup = alloc_cld_upcall(nn);
+	if (!cup) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+	cup->cu_u.cu_msg.cm_cmd = Cld_GetVersion;
+	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_u.cu_msg);
+	if (!ret) {
+		ret = cup->cu_u.cu_msg.cm_status;
+		if (ret)
+			goto out_free;
+		version = cup->cu_u.cu_msg.cm_u.cm_version;
+		dprintk("%s: userspace returned version %u\n",
+				__func__, version);
+		if (version < 1)
+			version = 1;
+		else if (version > CLD_UPCALL_VERSION)
+			version = CLD_UPCALL_VERSION;
+
+		switch (version) {
+		case 1:
+			nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
+			break;
+		default:
+			break;
+		}
+	}
+out_free:
+	free_cld_upcall(cup);
+out_err:
+	if (ret)
+		dprintk("%s: Unable to get version from userspace: %d\n",
+			__func__, ret);
+	return ret;
+}
+
 static int
 nfsd4_cld_tracking_init(struct net *net)
 {
@@ -1368,10 +1425,14 @@ nfsd4_cld_tracking_init(struct net *net)
 		goto err_remove;
 	}
 
+	status = nfsd4_cld_get_version(nn);
+	if (status == -EOPNOTSUPP)
+		pr_warn("NFSD: nfsdcld GetVersion upcall failed. Please upgrade nfsdcld.\n");
+
 	status = nfsd4_cld_grace_start(nn);
 	if (status) {
 		if (status == -EOPNOTSUPP)
-			printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
+			pr_warn("NFSD: nfsdcld GraceStart upcall failed. Please upgrade nfsdcld.\n");
 		nfs4_release_reclaim(nn);
 		goto err_remove;
 	} else
@@ -1403,6 +1464,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
 	.remove		= nfsd4_cld_remove,
 	.check		= nfsd4_cld_check_v0,
 	.grace_done	= nfsd4_cld_grace_done_v0,
+	.version	= 1,
+	.msglen		= sizeof(struct cld_msg),
 };
 
 /* For newer nfsdcld's */
@@ -1413,6 +1476,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
 	.remove		= nfsd4_cld_remove,
 	.check		= nfsd4_cld_check,
 	.grace_done	= nfsd4_cld_grace_done,
+	.version	= 1,
+	.msglen		= sizeof(struct cld_msg),
 };
 
 /* upcall via usermodehelper */
@@ -1760,6 +1825,8 @@ static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
 	.remove		= nfsd4_umh_cltrack_remove,
 	.check		= nfsd4_umh_cltrack_check,
 	.grace_done	= nfsd4_umh_cltrack_grace_done,
+	.version	= 1,
+	.msglen		= 0,
 };
 
 int
diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h
index b1e9de4f07d5..c5aad16d10c0 100644
--- a/include/uapi/linux/nfsd/cld.h
+++ b/include/uapi/linux/nfsd/cld.h
@@ -36,7 +36,8 @@ enum cld_command {
 	Cld_Remove,		/* remove record of this cm_id */
 	Cld_Check,		/* is this cm_id allowed? */
 	Cld_GraceDone,		/* grace period is complete */
-	Cld_GraceStart,
+	Cld_GraceStart,		/* grace start (upload client records) */
+	Cld_GetVersion,		/* query max supported upcall version */
 };
 
 /* representation of long-form NFSv4 client ID */
@@ -54,7 +55,15 @@ struct cld_msg {
 	union {
 		__s64		cm_gracetime;	/* grace period start time */
 		struct cld_name	cm_name;
+		__u8		cm_version;	/* for getting max version */
 	} __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+struct cld_msg_hdr {
+	__u8		cm_vers;		/* upcall version */
+	__u8		cm_cmd;			/* upcall command */
+	__s16		cm_status;		/* return code */
+	__u32		cm_xid;			/* transaction id */
+} __attribute__((packed));
+
 #endif /* !_NFSD_CLD_H */
-- 
cgit v1.2.3


From 6ee95d1c899186c0798cafd25998d436bcdb9618 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Mon, 9 Sep 2019 16:10:31 -0400
Subject: nfsd: add support for upcall version 2

Version 2 upcalls will allow the nfsd to include a hash of the kerberos
principal string in the Cld_Create upcall.  If a principal is present in
the svc_cred, then the hash will be included in the Cld_Create upcall.
We attempt to use the svc_cred.cr_raw_principal (which is returned by
gssproxy) first, and then fall back to using the svc_cred.cr_principal
(which is returned by both gssproxy and rpc.svcgssd).  Upon a subsequent
restart, the hash will be returned in the Cld_Gracestart downcall and
stored in the reclaim_str_hashtbl so it can be used when handling
reclaim opens.

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4recover.c         | 223 +++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/nfs4state.c           |   6 +-
 fs/nfsd/state.h               |   3 +-
 include/uapi/linux/nfsd/cld.h |  30 +++++-
 4 files changed, 245 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 58a61339d40c..cdc75ad4438b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -64,6 +64,7 @@ struct nfsd4_client_tracking_ops {
 };
 
 static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops;
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2;
 
 /* Globals */
 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
@@ -177,6 +178,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
 		const char *dname, int len, struct nfsd_net *nn)
 {
 	struct xdr_netobj name;
+	struct xdr_netobj princhash = { .len = 0, .data = NULL };
 	struct nfs4_client_reclaim *crp;
 
 	name.data = kmemdup(dname, len, GFP_KERNEL);
@@ -186,7 +188,7 @@ __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
 		return;
 	}
 	name.len = len;
-	crp = nfs4_client_to_reclaim(name, nn);
+	crp = nfs4_client_to_reclaim(name, princhash, nn);
 	if (!crp) {
 		kfree(name.data);
 		return;
@@ -486,6 +488,7 @@ static int
 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 {
 	struct xdr_netobj name;
+	struct xdr_netobj princhash = { .len = 0, .data = NULL };
 
 	if (child->d_name.len != HEXDIR_LEN - 1) {
 		printk("%s: illegal name %pd in recovery directory\n",
@@ -500,7 +503,7 @@ load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
 		goto out;
 	}
 	name.len = HEXDIR_LEN;
-	if (!nfs4_client_to_reclaim(name, nn))
+	if (!nfs4_client_to_reclaim(name, princhash, nn))
 		kfree(name.data);
 out:
 	return 0;
@@ -737,6 +740,7 @@ struct cld_net {
 	struct list_head	 cn_list;
 	unsigned int		 cn_xid;
 	bool			 cn_has_legacy;
+	struct crypto_shash	*cn_tfm;
 };
 
 struct cld_upcall {
@@ -746,6 +750,7 @@ struct cld_upcall {
 	union {
 		struct cld_msg_hdr	 cu_hdr;
 		struct cld_msg		 cu_msg;
+		struct cld_msg_v2	 cu_msg_v2;
 	} cu_u;
 };
 
@@ -792,11 +797,11 @@ cld_pipe_upcall(struct rpc_pipe *pipe, void *cmsg)
 }
 
 static ssize_t
-__cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
+__cld_pipe_inprogress_downcall(const struct cld_msg_v2 __user *cmsg,
 		struct nfsd_net *nn)
 {
-	uint8_t cmd;
-	struct xdr_netobj name;
+	uint8_t cmd, princhashlen;
+	struct xdr_netobj name, princhash = { .len = 0, .data = NULL };
 	uint16_t namelen;
 	struct cld_net *cn = nn->cld_net;
 
@@ -805,19 +810,45 @@ __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
 		return -EFAULT;
 	}
 	if (cmd == Cld_GraceStart) {
-		if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
-			return -EFAULT;
-		name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
-		if (IS_ERR_OR_NULL(name.data))
-			return -EFAULT;
-		name.len = namelen;
+		if (nn->client_tracking_ops->version >= 2) {
+			const struct cld_clntinfo __user *ci;
+
+			ci = &cmsg->cm_u.cm_clntinfo;
+			if (get_user(namelen, &ci->cc_name.cn_len))
+				return -EFAULT;
+			name.data = memdup_user(&ci->cc_name.cn_id, namelen);
+			if (IS_ERR_OR_NULL(name.data))
+				return -EFAULT;
+			name.len = namelen;
+			get_user(princhashlen, &ci->cc_princhash.cp_len);
+			if (princhashlen > 0) {
+				princhash.data = memdup_user(
+						&ci->cc_princhash.cp_data,
+						princhashlen);
+				if (IS_ERR_OR_NULL(princhash.data))
+					return -EFAULT;
+				princhash.len = princhashlen;
+			} else
+				princhash.len = 0;
+		} else {
+			const struct cld_name __user *cnm;
+
+			cnm = &cmsg->cm_u.cm_name;
+			if (get_user(namelen, &cnm->cn_len))
+				return -EFAULT;
+			name.data = memdup_user(&cnm->cn_id, namelen);
+			if (IS_ERR_OR_NULL(name.data))
+				return -EFAULT;
+			name.len = namelen;
+		}
 		if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
 			name.len = name.len - 5;
 			memmove(name.data, name.data + 5, name.len);
 			cn->cn_has_legacy = true;
 		}
-		if (!nfs4_client_to_reclaim(name, nn)) {
+		if (!nfs4_client_to_reclaim(name, princhash, nn)) {
 			kfree(name.data);
+			kfree(princhash.data);
 			return -EFAULT;
 		}
 		return nn->client_tracking_ops->msglen;
@@ -830,7 +861,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
 	struct cld_upcall *tmp, *cup;
 	struct cld_msg_hdr __user *hdr = (struct cld_msg_hdr __user *)src;
-	struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
+	struct cld_msg_v2 __user *cmsg = (struct cld_msg_v2 __user *)src;
 	uint32_t xid;
 	struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
 						nfsd_net_id);
@@ -881,7 +912,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (status == -EINPROGRESS)
 		return __cld_pipe_inprogress_downcall(cmsg, nn);
 
-	if (copy_from_user(&cup->cu_u.cu_msg, src, mlen) != 0)
+	if (copy_from_user(&cup->cu_u.cu_msg_v2, src, mlen) != 0)
 		return -EFAULT;
 
 	complete(&cup->cu_done);
@@ -1019,6 +1050,8 @@ nfsd4_remove_cld_pipe(struct net *net)
 
 	nfsd4_cld_unregister_net(net, cn->cn_pipe);
 	rpc_destroy_pipe_data(cn->cn_pipe);
+	if (cn->cn_tfm)
+		crypto_free_shash(cn->cn_tfm);
 	kfree(nn->cld_net);
 	nn->cld_net = NULL;
 }
@@ -1103,6 +1136,75 @@ out_err:
 				"record on stable storage: %d\n", ret);
 }
 
+/* Ask daemon to create a new record */
+static void
+nfsd4_cld_create_v2(struct nfs4_client *clp)
+{
+	int ret;
+	struct cld_upcall *cup;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+	struct cld_net *cn = nn->cld_net;
+	struct cld_msg_v2 *cmsg;
+	struct crypto_shash *tfm = cn->cn_tfm;
+	struct xdr_netobj cksum;
+	char *principal = NULL;
+	SHASH_DESC_ON_STACK(desc, tfm);
+
+	/* Don't upcall if it's already stored */
+	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+		return;
+
+	cup = alloc_cld_upcall(nn);
+	if (!cup) {
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	cmsg = &cup->cu_u.cu_msg_v2;
+	cmsg->cm_cmd = Cld_Create;
+	cmsg->cm_u.cm_clntinfo.cc_name.cn_len = clp->cl_name.len;
+	memcpy(cmsg->cm_u.cm_clntinfo.cc_name.cn_id, clp->cl_name.data,
+			clp->cl_name.len);
+	if (clp->cl_cred.cr_raw_principal)
+		principal = clp->cl_cred.cr_raw_principal;
+	else if (clp->cl_cred.cr_principal)
+		principal = clp->cl_cred.cr_principal;
+	if (principal) {
+		desc->tfm = tfm;
+		cksum.len = crypto_shash_digestsize(tfm);
+		cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+		if (cksum.data == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ret = crypto_shash_digest(desc, principal, strlen(principal),
+					  cksum.data);
+		shash_desc_zero(desc);
+		if (ret) {
+			kfree(cksum.data);
+			goto out;
+		}
+		cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len;
+		memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data,
+		       cksum.data, cksum.len);
+		kfree(cksum.data);
+	} else
+		cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0;
+
+	ret = cld_pipe_upcall(cn->cn_pipe, cmsg);
+	if (!ret) {
+		ret = cmsg->cm_status;
+		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
+	}
+
+out:
+	free_cld_upcall(cup);
+out_err:
+	if (ret)
+		pr_err("NFSD: Unable to create client record on stable storage: %d\n",
+				ret);
+}
+
 /* Ask daemon to create a new record */
 static void
 nfsd4_cld_remove(struct nfs4_client *clp)
@@ -1229,6 +1331,79 @@ found:
 	return 0;
 }
 
+static int
+nfsd4_cld_check_v2(struct nfs4_client *clp)
+{
+	struct nfs4_client_reclaim *crp;
+	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+	struct cld_net *cn = nn->cld_net;
+	int status;
+	char dname[HEXDIR_LEN];
+	struct xdr_netobj name;
+	struct crypto_shash *tfm = cn->cn_tfm;
+	struct xdr_netobj cksum;
+	char *principal = NULL;
+	SHASH_DESC_ON_STACK(desc, tfm);
+
+	/* did we already find that this client is stable? */
+	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
+		return 0;
+
+	/* look for it in the reclaim hashtable otherwise */
+	crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
+	if (crp)
+		goto found;
+
+	if (cn->cn_has_legacy) {
+		status = nfs4_make_rec_clidname(dname, &clp->cl_name);
+		if (status)
+			return -ENOENT;
+
+		name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
+		if (!name.data) {
+			dprintk("%s: failed to allocate memory for name.data\n",
+					__func__);
+			return -ENOENT;
+		}
+		name.len = HEXDIR_LEN;
+		crp = nfsd4_find_reclaim_client(name, nn);
+		kfree(name.data);
+		if (crp)
+			goto found;
+
+	}
+	return -ENOENT;
+found:
+	if (crp->cr_princhash.len) {
+		if (clp->cl_cred.cr_raw_principal)
+			principal = clp->cl_cred.cr_raw_principal;
+		else if (clp->cl_cred.cr_principal)
+			principal = clp->cl_cred.cr_principal;
+		if (principal == NULL)
+			return -ENOENT;
+		desc->tfm = tfm;
+		cksum.len = crypto_shash_digestsize(tfm);
+		cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+		if (cksum.data == NULL)
+			return -ENOENT;
+		status = crypto_shash_digest(desc, principal, strlen(principal),
+					     cksum.data);
+		shash_desc_zero(desc);
+		if (status) {
+			kfree(cksum.data);
+			return -ENOENT;
+		}
+		if (memcmp(crp->cr_princhash.data, cksum.data,
+				crp->cr_princhash.len)) {
+			kfree(cksum.data);
+			return -ENOENT;
+		}
+		kfree(cksum.data);
+	}
+	crp->cr_clp = clp;
+	return 0;
+}
+
 static int
 nfsd4_cld_grace_start(struct nfsd_net *nn)
 {
@@ -1380,6 +1555,9 @@ nfsd4_cld_get_version(struct nfsd_net *nn)
 		case 1:
 			nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
 			break;
+		case 2:
+			nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v2;
+			break;
 		default:
 			break;
 		}
@@ -1408,6 +1586,11 @@ nfsd4_cld_tracking_init(struct net *net)
 	status = __nfsd4_init_cld_pipe(net);
 	if (status)
 		goto err_shutdown;
+	nn->cld_net->cn_tfm = crypto_alloc_shash("sha256", 0, 0);
+	if (IS_ERR(nn->cld_net->cn_tfm)) {
+		status = PTR_ERR(nn->cld_net->cn_tfm);
+		goto err_remove;
+	}
 
 	/*
 	 * rpc pipe upcalls take 30 seconds to time out, so we don't want to
@@ -1480,6 +1663,18 @@ static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
 	.msglen		= sizeof(struct cld_msg),
 };
 
+/* v2 create/check ops include the principal, if available */
+static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v2 = {
+	.init		= nfsd4_cld_tracking_init,
+	.exit		= nfsd4_cld_tracking_exit,
+	.create		= nfsd4_cld_create_v2,
+	.remove		= nfsd4_cld_remove,
+	.check		= nfsd4_cld_check_v2,
+	.grace_done	= nfsd4_cld_grace_done,
+	.version	= 2,
+	.msglen		= sizeof(struct cld_msg_v2),
+};
+
 /* upcall via usermodehelper */
 static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
 module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 906e214dcce8..da1093dd5016 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6887,7 +6887,8 @@ nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn)
  * will be freed in nfs4_remove_reclaim_record in the normal case).
  */
 struct nfs4_client_reclaim *
-nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
+nfs4_client_to_reclaim(struct xdr_netobj name, struct xdr_netobj princhash,
+		struct nfsd_net *nn)
 {
 	unsigned int strhashval;
 	struct nfs4_client_reclaim *crp;
@@ -6900,6 +6901,8 @@ nfs4_client_to_reclaim(struct xdr_netobj name, struct nfsd_net *nn)
 		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
 		crp->cr_name.data = name.data;
 		crp->cr_name.len = name.len;
+		crp->cr_princhash.data = princhash.data;
+		crp->cr_princhash.len = princhash.len;
 		crp->cr_clp = NULL;
 		nn->reclaim_str_hashtbl_size++;
 	}
@@ -6911,6 +6914,7 @@ nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
 {
 	list_del(&crp->cr_strhash);
 	kfree(crp->cr_name.data);
+	kfree(crp->cr_princhash.data);
 	kfree(crp);
 	nn->reclaim_str_hashtbl_size--;
 }
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d00c86d05beb..46f56afb6cb8 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -378,6 +378,7 @@ struct nfs4_client_reclaim {
 	struct list_head	cr_strhash;	/* hash by cr_name */
 	struct nfs4_client	*cr_clp;	/* pointer to associated clp */
 	struct xdr_netobj	cr_name;	/* recovery dir name */
+	struct xdr_netobj	cr_princhash;
 };
 
 /* A reasonable value for REPLAY_ISIZE was estimated as follows:  
@@ -645,7 +646,7 @@ extern void nfsd4_shutdown_callback(struct nfs4_client *);
 extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
 extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
-							struct nfsd_net *nn);
+				struct xdr_netobj princhash, struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
 
 struct nfs4_file *find_file(struct knfsd_fh *fh);
diff --git a/include/uapi/linux/nfsd/cld.h b/include/uapi/linux/nfsd/cld.h
index c5aad16d10c0..a519313af953 100644
--- a/include/uapi/linux/nfsd/cld.h
+++ b/include/uapi/linux/nfsd/cld.h
@@ -26,11 +26,15 @@
 #include <linux/types.h>
 
 /* latest upcall version available */
-#define CLD_UPCALL_VERSION 1
+#define CLD_UPCALL_VERSION 2
 
 /* defined by RFC3530 */
 #define NFS4_OPAQUE_LIMIT 1024
 
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE      32
+#endif
+
 enum cld_command {
 	Cld_Create,		/* create a record for this cm_id */
 	Cld_Remove,		/* remove record of this cm_id */
@@ -46,6 +50,17 @@ struct cld_name {
 	unsigned char	cn_id[NFS4_OPAQUE_LIMIT];	/* client-provided */
 } __attribute__((packed));
 
+/* sha256 hash of the kerberos principal */
+struct cld_princhash {
+	__u8		cp_len;				/* length of cp_data */
+	unsigned char	cp_data[SHA256_DIGEST_SIZE];	/* hash of principal */
+} __attribute__((packed));
+
+struct cld_clntinfo {
+	struct cld_name		cc_name;
+	struct cld_princhash	cc_princhash;
+} __attribute__((packed));
+
 /* message struct for communication with userspace */
 struct cld_msg {
 	__u8		cm_vers;		/* upcall version */
@@ -59,6 +74,19 @@ struct cld_msg {
 	} __attribute__((packed)) cm_u;
 } __attribute__((packed));
 
+/* version 2 message can include hash of kerberos principal */
+struct cld_msg_v2 {
+	__u8		cm_vers;		/* upcall version */
+	__u8		cm_cmd;			/* upcall command */
+	__s16		cm_status;		/* return code */
+	__u32		cm_xid;			/* transaction id */
+	union {
+		struct cld_name	cm_name;
+		__u8		cm_version;	/* for getting max version */
+		struct cld_clntinfo cm_clntinfo; /* name & princ hash */
+	} __attribute__((packed)) cm_u;
+} __attribute__((packed));
+
 struct cld_msg_hdr {
 	__u8		cm_vers;		/* upcall version */
 	__u8		cm_cmd;			/* upcall command */
-- 
cgit v1.2.3


From a0469f989fe1d051820cda7ead496f1a7371f3d8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 9 Sep 2019 19:53:16 +0900
Subject: export.h: remove defined(__KERNEL__), which is no longer needed

The conditional, define(__KERNEL__), was added by commit f235541699bc
("export.h: allow for per-symbol configurable EXPORT_SYMBOL()").

It was needed at that time to avoid the build error of modpost
with CONFIG_TRIM_UNUSED_KSYMS=y.

Since commit b2c5cdcfd4bc ("modpost: remove symbol prefix support"),
modpost no longer includes linux/export.h, thus the define(__KERNEL__)
is unneeded.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
---
 include/linux/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/export.h b/include/linux/export.h
index fd8711ed9ac4..cdd98a0d918c 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -20,7 +20,7 @@ extern struct module __this_module;
 
 #ifdef CONFIG_MODULES
 
-#if defined(__KERNEL__) && !defined(__GENKSYMS__)
+#if !defined(__GENKSYMS__)
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
-- 
cgit v1.2.3


From e019a3b17f0d79894917e6d83b17e87e8f809deb Mon Sep 17 00:00:00 2001
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Date: Mon, 9 Sep 2019 00:54:17 +0100
Subject: devlink: extend 'fw_load_policy' values

Add the 'disk' value to the generic 'fw_load_policy' devlink parameter.
This value indicates that firmware should always be loaded from disk
only.

Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-params.txt | 2 ++
 include/uapi/linux/devlink.h                | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
index 2d26434ddcf8..fadb5436188d 100644
--- a/Documentation/networking/devlink-params.txt
+++ b/Documentation/networking/devlink-params.txt
@@ -48,4 +48,6 @@ fw_load_policy		[DEVICE, GENERIC]
 			  Load firmware version preferred by the driver.
 			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH (1)
 			  Load firmware currently stored in flash.
+			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
+			  Load firmware currently available on host's disk.
 			Type: u8
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 546e75dd74ac..c25cc29a6647 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -202,6 +202,7 @@ enum devlink_param_cmode {
 enum devlink_param_fw_load_policy_value {
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
 };
 
 enum {
-- 
cgit v1.2.3


From 5bbd21df5a075a59ab53030d25f9848ccca93d73 Mon Sep 17 00:00:00 2001
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Date: Mon, 9 Sep 2019 00:54:18 +0100
Subject: devlink: add 'reset_dev_on_drv_probe' param

Add the 'reset_dev_on_drv_probe' devlink parameter, controlling the
device reset policy on driver probe.

This parameter is useful in conjunction with the existing
'fw_load_policy' parameter.

Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink-params.txt | 14 ++++++++++++++
 include/net/devlink.h                       |  5 +++++
 include/uapi/linux/devlink.h                |  7 +++++++
 net/core/devlink.c                          |  5 +++++
 4 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
index fadb5436188d..ddba3e9b55b1 100644
--- a/Documentation/networking/devlink-params.txt
+++ b/Documentation/networking/devlink-params.txt
@@ -51,3 +51,17 @@ fw_load_policy		[DEVICE, GENERIC]
 			* DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
 			  Load firmware currently available on host's disk.
 			Type: u8
+
+reset_dev_on_drv_probe	[DEVICE, GENERIC]
+			Controls the device's reset policy on driver probe.
+			Valid values:
+			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN (0)
+			  Unknown or invalid value.
+			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS (1)
+			  Always reset device on driver probe.
+			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER (2)
+			  Never reset device on driver probe.
+			* DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK (3)
+			  Reset only if device firmware can be found in the
+			  filesystem.
+			Type: u8
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 460bc629d1a4..03e4d9244ff3 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -398,6 +398,7 @@ enum devlink_param_generic_id {
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
 	DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
 	DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY,
+	DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
 
 	/* add new param generic ids above here*/
 	__DEVLINK_PARAM_GENERIC_ID_MAX,
@@ -428,6 +429,10 @@ enum devlink_param_generic_id {
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME "fw_load_policy"
 #define DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE DEVLINK_PARAM_TYPE_U8
 
+#define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME \
+	"reset_dev_on_drv_probe"
+#define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE DEVLINK_PARAM_TYPE_U8
+
 #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate)	\
 {									\
 	.id = DEVLINK_PARAM_GENERIC_ID_##_id,				\
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index c25cc29a6647..1da3e83f1fd4 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -205,6 +205,13 @@ enum devlink_param_fw_load_policy_value {
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
 };
 
+enum devlink_param_reset_dev_on_drv_probe_value {
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER,
+	DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK,
+};
+
 enum {
 	DEVLINK_ATTR_STATS_RX_PACKETS,		/* u64 */
 	DEVLINK_ATTR_STATS_RX_BYTES,		/* u64 */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6e52d639dac6..4a2fb94c44cf 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2852,6 +2852,11 @@ static const struct devlink_param devlink_param_generic[] = {
 		.name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME,
 		.type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE,
 	},
+	{
+		.id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE,
+		.name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME,
+		.type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE,
+	},
 };
 
 static int devlink_param_generic_verify(const struct devlink_param *param)
-- 
cgit v1.2.3


From ee394f96ad7517fbc0de9106dcc7ce9efb14f264 Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Sat, 7 Sep 2019 18:04:26 +0200
Subject: netfilter: nft_synproxy: add synproxy stateful object support

Register a new synproxy stateful object type into the stateful object
infrastructure.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |   3 +-
 net/netfilter/nft_synproxy.c             | 143 ++++++++++++++++++++++++++-----
 2 files changed, 124 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 0ff932dadc8e..ed8881ad18ed 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1481,7 +1481,8 @@ enum nft_ct_expectation_attributes {
 #define NFT_OBJECT_CT_TIMEOUT	7
 #define NFT_OBJECT_SECMARK	8
 #define NFT_OBJECT_CT_EXPECT	9
-#define __NFT_OBJECT_MAX	10
+#define NFT_OBJECT_SYNPROXY	10
+#define __NFT_OBJECT_MAX	11
 #define NFT_OBJECT_MAX		(__NFT_OBJECT_MAX - 1)
 
 /**
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index db4c23f5dfcb..e2c1fc608841 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -24,7 +24,7 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 				     const struct tcphdr *tcp,
 				     struct synproxy_net *snet,
 				     struct nf_synproxy_info *info,
-				     struct nft_synproxy *priv)
+				     const struct nft_synproxy *priv)
 {
 	this_cpu_inc(snet->stats->syn_received);
 	if (tcp->ece && tcp->cwr)
@@ -41,14 +41,13 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts,
 				   NF_SYNPROXY_OPT_ECN);
 }
 
-static void nft_synproxy_eval_v4(const struct nft_expr *expr,
+static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt,
 				 const struct tcphdr *tcp,
 				 struct tcphdr *_tcph,
 				 struct synproxy_options *opts)
 {
-	struct nft_synproxy *priv = nft_expr_priv(expr);
 	struct nf_synproxy_info info = priv->info;
 	struct net *net = nft_net(pkt);
 	struct synproxy_net *snet = synproxy_pernet(net);
@@ -73,14 +72,13 @@ static void nft_synproxy_eval_v4(const struct nft_expr *expr,
 }
 
 #if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-static void nft_synproxy_eval_v6(const struct nft_expr *expr,
+static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
 				 struct nft_regs *regs,
 				 const struct nft_pktinfo *pkt,
 				 const struct tcphdr *tcp,
 				 struct tcphdr *_tcph,
 				 struct synproxy_options *opts)
 {
-	struct nft_synproxy *priv = nft_expr_priv(expr);
 	struct nf_synproxy_info info = priv->info;
 	struct net *net = nft_net(pkt);
 	struct synproxy_net *snet = synproxy_pernet(net);
@@ -105,9 +103,9 @@ static void nft_synproxy_eval_v6(const struct nft_expr *expr,
 }
 #endif /* CONFIG_NF_TABLES_IPV6*/
 
-static void nft_synproxy_eval(const struct nft_expr *expr,
-			      struct nft_regs *regs,
-			      const struct nft_pktinfo *pkt)
+static void nft_synproxy_do_eval(const struct nft_synproxy *priv,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
 {
 	struct synproxy_options opts = {};
 	struct sk_buff *skb = pkt->skb;
@@ -140,23 +138,22 @@ static void nft_synproxy_eval(const struct nft_expr *expr,
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts);
+		nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts);
 		return;
 #if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
 	case htons(ETH_P_IPV6):
-		nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts);
+		nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts);
 		return;
 #endif
 	}
 	regs->verdict.code = NFT_BREAK;
 }
 
-static int nft_synproxy_init(const struct nft_ctx *ctx,
-			     const struct nft_expr *expr,
-			     const struct nlattr * const tb[])
+static int nft_synproxy_do_init(const struct nft_ctx *ctx,
+				const struct nlattr * const tb[],
+				struct nft_synproxy *priv)
 {
 	struct synproxy_net *snet = synproxy_pernet(ctx->net);
-	struct nft_synproxy *priv = nft_expr_priv(expr);
 	u32 flags;
 	int err;
 
@@ -206,8 +203,7 @@ nf_ct_failure:
 	return err;
 }
 
-static void nft_synproxy_destroy(const struct nft_ctx *ctx,
-				 const struct nft_expr *expr)
+static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
 {
 	struct synproxy_net *snet = synproxy_pernet(ctx->net);
 
@@ -229,10 +225,8 @@ static void nft_synproxy_destroy(const struct nft_ctx *ctx,
 	nf_ct_netns_put(ctx->net, ctx->family);
 }
 
-static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv)
 {
-	const struct nft_synproxy *priv = nft_expr_priv(expr);
-
 	if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) ||
 	    nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) ||
 	    nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options)))
@@ -244,6 +238,15 @@ nla_put_failure:
 	return -1;
 }
 
+static void nft_synproxy_eval(const struct nft_expr *expr,
+			      struct nft_regs *regs,
+			      const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
 static int nft_synproxy_validate(const struct nft_ctx *ctx,
 				 const struct nft_expr *expr,
 				 const struct nft_data **data)
@@ -252,6 +255,28 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
 						    (1 << NF_INET_FORWARD));
 }
 
+static int nft_synproxy_init(const struct nft_ctx *ctx,
+			     const struct nft_expr *expr,
+			     const struct nlattr * const tb[])
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_destroy(const struct nft_ctx *ctx,
+				 const struct nft_expr *expr)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	struct nft_synproxy *priv = nft_expr_priv(expr);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
 static struct nft_expr_type nft_synproxy_type;
 static const struct nft_expr_ops nft_synproxy_ops = {
 	.eval		= nft_synproxy_eval,
@@ -271,14 +296,89 @@ static struct nft_expr_type nft_synproxy_type __read_mostly = {
 	.maxattr	= NFTA_SYNPROXY_MAX,
 };
 
+static int nft_synproxy_obj_init(const struct nft_ctx *ctx,
+				 const struct nlattr * const tb[],
+				 struct nft_object *obj)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_init(ctx, tb, priv);
+}
+
+static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx,
+				     struct nft_object *obj)
+{
+	nft_synproxy_do_destroy(ctx);
+}
+
+static int nft_synproxy_obj_dump(struct sk_buff *skb,
+				 struct nft_object *obj, bool reset)
+{
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	return nft_synproxy_do_dump(skb, priv);
+}
+
+static void nft_synproxy_obj_eval(struct nft_object *obj,
+				  struct nft_regs *regs,
+				  const struct nft_pktinfo *pkt)
+{
+	const struct nft_synproxy *priv = nft_obj_data(obj);
+
+	nft_synproxy_do_eval(priv, regs, pkt);
+}
+
+static void nft_synproxy_obj_update(struct nft_object *obj,
+				    struct nft_object *newobj)
+{
+	struct nft_synproxy *newpriv = nft_obj_data(newobj);
+	struct nft_synproxy *priv = nft_obj_data(obj);
+
+	priv->info = newpriv->info;
+}
+
+static struct nft_object_type nft_synproxy_obj_type;
+static const struct nft_object_ops nft_synproxy_obj_ops = {
+	.type		= &nft_synproxy_obj_type,
+	.size		= sizeof(struct nft_synproxy),
+	.init		= nft_synproxy_obj_init,
+	.destroy	= nft_synproxy_obj_destroy,
+	.dump		= nft_synproxy_obj_dump,
+	.eval		= nft_synproxy_obj_eval,
+	.update		= nft_synproxy_obj_update,
+};
+
+static struct nft_object_type nft_synproxy_obj_type __read_mostly = {
+	.type		= NFT_OBJECT_SYNPROXY,
+	.ops		= &nft_synproxy_obj_ops,
+	.maxattr	= NFTA_SYNPROXY_MAX,
+	.policy		= nft_synproxy_policy,
+	.owner		= THIS_MODULE,
+};
+
 static int __init nft_synproxy_module_init(void)
 {
-	return nft_register_expr(&nft_synproxy_type);
+	int err;
+
+	err = nft_register_obj(&nft_synproxy_obj_type);
+	if (err < 0)
+		return err;
+
+	err = nft_register_expr(&nft_synproxy_type);
+	if (err < 0)
+		goto err;
+
+	return 0;
+
+err:
+	nft_unregister_obj(&nft_synproxy_obj_type);
+	return err;
 }
 
 static void __exit nft_synproxy_module_exit(void)
 {
-	return nft_unregister_expr(&nft_synproxy_type);
+	nft_unregister_expr(&nft_synproxy_type);
+	nft_unregister_obj(&nft_synproxy_obj_type);
 }
 
 module_init(nft_synproxy_module_init);
@@ -287,3 +387,4 @@ module_exit(nft_synproxy_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>");
 MODULE_ALIAS_NFT_EXPR("synproxy");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY);
-- 
cgit v1.2.3


From be2861dc36d77ff3778979b9c3c79ada4affa131 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 8 Sep 2019 19:32:05 +0200
Subject: netfilter: nft_{fwd,dup}_netdev: add offload support

This patch adds support for packet mirroring and redirection. The
nft_fwd_dup_netdev_offload() function configures the flow_action object
for the fwd and the dup actions.

Extend nft_flow_rule_destroy() to release the net_device object when the
flow_rule object is released, since nft_fwd_dup_netdev_offload() bumps
the net_device reference counter.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: wenxu <wenxu@ucloud.cn>
---
 include/net/netfilter/nf_dup_netdev.h     |  6 ++++++
 include/net/netfilter/nf_tables_offload.h |  3 ++-
 net/netfilter/nf_dup_netdev.c             | 21 +++++++++++++++++++++
 net/netfilter/nf_tables_api.c             |  2 +-
 net/netfilter/nf_tables_offload.c         | 17 ++++++++++++++++-
 net/netfilter/nft_dup_netdev.c            | 12 ++++++++++++
 net/netfilter/nft_fwd_netdev.c            | 12 ++++++++++++
 7 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_dup_netdev.h b/include/net/netfilter/nf_dup_netdev.h
index 181672672160..b175d271aec9 100644
--- a/include/net/netfilter/nf_dup_netdev.h
+++ b/include/net/netfilter/nf_dup_netdev.h
@@ -7,4 +7,10 @@
 void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif);
 void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif);
 
+struct nft_offload_ctx;
+struct nft_flow_rule;
+
+int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       enum flow_action_id id, int oif);
 #endif
diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index 6de896ebcf30..ddd048be4330 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -26,6 +26,7 @@ struct nft_offload_ctx {
 		u8				protonum;
 	} dep;
 	unsigned int				num_actions;
+	struct net				*net;
 	struct nft_offload_reg			regs[NFT_REG32_15 + 1];
 };
 
@@ -61,7 +62,7 @@ struct nft_flow_rule {
 #define NFT_OFFLOAD_F_ACTION	(1 << 0)
 
 struct nft_rule;
-struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule);
+struct nft_flow_rule *nft_flow_rule_create(struct net *net, const struct nft_rule *rule);
 void nft_flow_rule_destroy(struct nft_flow_rule *flow);
 int nft_flow_rule_offload_commit(struct net *net);
 
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index 5a35ef08c3cb..f108a76925dd 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -10,6 +10,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 
 static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
@@ -50,5 +51,25 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif)
 }
 EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
 
+int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx,
+			       struct nft_flow_rule *flow,
+			       enum flow_action_id id, int oif)
+{
+	struct flow_action_entry *entry;
+	struct net_device *dev;
+
+	/* nft_flow_rule_destroy() releases the reference on this device. */
+	dev = dev_get_by_index(ctx->net, oif);
+	if (!dev)
+		return -EOPNOTSUPP;
+
+	entry = &flow->rule->action.entries[ctx->num_actions++];
+	entry->id = id;
+	entry->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index efd0c97cc2a3..c6f59ef96017 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2853,7 +2853,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 		return nft_table_validate(net, table);
 
 	if (chain->flags & NFT_CHAIN_HW_OFFLOAD) {
-		flow = nft_flow_rule_create(rule);
+		flow = nft_flow_rule_create(net, rule);
 		if (IS_ERR(flow))
 			return PTR_ERR(flow);
 
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 8abf193f8012..239cb781ad13 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -28,7 +28,8 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions)
 	return flow;
 }
 
-struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
+struct nft_flow_rule *nft_flow_rule_create(struct net *net,
+					   const struct nft_rule *rule)
 {
 	struct nft_offload_ctx *ctx;
 	struct nft_flow_rule *flow;
@@ -54,6 +55,7 @@ struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule)
 		err = -ENOMEM;
 		goto err_out;
 	}
+	ctx->net = net;
 	ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC;
 
 	while (expr->ops && expr != nft_expr_last(rule)) {
@@ -80,6 +82,19 @@ err_out:
 
 void nft_flow_rule_destroy(struct nft_flow_rule *flow)
 {
+	struct flow_action_entry *entry;
+	int i;
+
+	flow_action_for_each(i, entry, &flow->rule->action) {
+		switch (entry->id) {
+		case FLOW_ACTION_REDIRECT:
+		case FLOW_ACTION_MIRRED:
+			dev_put(entry->dev);
+			break;
+		default:
+			break;
+		}
+	}
 	kfree(flow->rule);
 	kfree(flow);
 }
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index c6052fdd2c40..c2e78c160fd7 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -10,6 +10,7 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 
 struct nft_dup_netdev {
@@ -56,6 +57,16 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_dup_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif);
+}
+
 static struct nft_expr_type nft_dup_netdev_type;
 static const struct nft_expr_ops nft_dup_netdev_ops = {
 	.type		= &nft_dup_netdev_type,
@@ -63,6 +74,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = {
 	.eval		= nft_dup_netdev_eval,
 	.init		= nft_dup_netdev_init,
 	.dump		= nft_dup_netdev_dump,
+	.offload	= nft_dup_netdev_offload,
 };
 
 static struct nft_expr_type nft_dup_netdev_type __read_mostly = {
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 61b7f93ac681..aba11c2333f3 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -12,6 +12,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_offload.h>
 #include <net/netfilter/nf_dup_netdev.h>
 #include <net/neighbour.h>
 #include <net/ip.h>
@@ -63,6 +64,16 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx,
+				  struct nft_flow_rule *flow,
+				  const struct nft_expr *expr)
+{
+	const struct nft_fwd_netdev *priv = nft_expr_priv(expr);
+	int oif = ctx->regs[priv->sreg_dev].data.data[0];
+
+	return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif);
+}
+
 struct nft_fwd_neigh {
 	enum nft_registers	sreg_dev:8;
 	enum nft_registers	sreg_addr:8;
@@ -194,6 +205,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
 	.eval		= nft_fwd_netdev_eval,
 	.init		= nft_fwd_netdev_init,
 	.dump		= nft_fwd_netdev_dump,
+	.offload	= nft_fwd_netdev_offload,
 };
 
 static const struct nft_expr_ops *
-- 
cgit v1.2.3


From 5fbe5b5883f847363ff1b7280e8b1d2980526b8e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 4 Sep 2019 16:01:04 +0200
Subject: gpio: Initialize the irqchip valid_mask with a callback

After changing the valid_mask for the struct gpio_chip
to detect the need and presence of a valid mask with the
presence of a .init_valid_mask() callback to fill it in,
we augment the gpio_irq_chip to use the same logic.

Switch all driver using the gpio_irq_chio valid_mask
over to this new method.

This makes sure the valid_mask for the gpio_irq_chip gets
filled in when we add the gpio_chip, which makes it a
little easier to switch over drivers using the old
way of setting up gpio_irq_chip over to the new method
of passing the gpio_irq_chip along with the gpio_chip.
(See drivers/gpio/TODO for details.)

Cc: Joel Stanley <joel@jms.id.au>
Cc: Thierry Reding <treding@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Patrice Chotard <patrice.chotard@st.com>
Link: https://lore.kernel.org/r/20190904140104.32426-1-linus.walleij@linaro.org
---
 drivers/gpio/gpio-aspeed.c                 | 13 +++++----
 drivers/gpio/gpio-stmpe.c                  | 36 ++++++++++++++++++-------
 drivers/gpio/gpio-tqmx86.c                 | 21 ++++++++-------
 drivers/gpio/gpiolib.c                     | 12 ++++++---
 drivers/pinctrl/intel/pinctrl-baytrail.c   | 16 +++++++++++-
 drivers/pinctrl/intel/pinctrl-cherryview.c | 42 ++++++++++++++++++------------
 drivers/platform/x86/intel_int0002_vgpio.c | 11 +++++---
 include/linux/gpio/driver.h                | 14 +++++++---
 8 files changed, 111 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 9defe25d4721..7bcd83dbc3e3 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -689,8 +689,11 @@ static struct irq_chip aspeed_gpio_irqchip = {
 	.irq_set_type	= aspeed_gpio_set_type,
 };
 
-static void set_irq_valid_mask(struct aspeed_gpio *gpio)
+static void aspeed_init_irq_valid_mask(struct gpio_chip *gc,
+				       unsigned long *valid_mask,
+				       unsigned int ngpios)
 {
+	struct aspeed_gpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_bank_props *props = gpio->config->props;
 
 	while (!is_bank_props_sentinel(props)) {
@@ -704,7 +707,7 @@ static void set_irq_valid_mask(struct aspeed_gpio *gpio)
 			if (i >= gpio->config->nr_gpios)
 				break;
 
-			clear_bit(i, gpio->chip.irq.valid_mask);
+			clear_bit(i, valid_mask);
 		}
 
 		props++;
@@ -1203,7 +1206,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
 		girq->parents[0] = gpio->irq;
 		girq->default_type = IRQ_TYPE_NONE;
 		girq->handler = handle_bad_irq;
-		girq->need_valid_mask = true;
+		girq->init_valid_mask = aspeed_init_irq_valid_mask;
 	}
 
 	gpio->offset_timer =
@@ -1215,10 +1218,6 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev)
 	if (rc < 0)
 		return rc;
 
-	/* Now the valid mask is allocated */
-	if (gpio->irq)
-		set_irq_valid_mask(gpio);
-
 	return 0;
 }
 
diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index dbf9cbe36b2b..994d542daf53 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -429,6 +429,23 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
+static void stmpe_init_irq_valid_mask(struct gpio_chip *gc,
+				      unsigned long *valid_mask,
+				      unsigned int ngpios)
+{
+	struct stmpe_gpio *stmpe_gpio = gpiochip_get_data(gc);
+	int i;
+
+	if (!stmpe_gpio->norequest_mask)
+		return;
+
+	/* Forbid unused lines to be mapped as IRQs */
+	for (i = 0; i < sizeof(u32); i++) {
+		if (stmpe_gpio->norequest_mask & BIT(i))
+			clear_bit(i, valid_mask);
+	}
+}
+
 static int stmpe_gpio_probe(struct platform_device *pdev)
 {
 	struct stmpe *stmpe = dev_get_drvdata(pdev->dev.parent);
@@ -454,14 +471,21 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
 	stmpe_gpio->chip.parent = &pdev->dev;
 	stmpe_gpio->chip.of_node = np;
 	stmpe_gpio->chip.base = -1;
+	/*
+	 * REVISIT: this makes sure the valid mask gets allocated and
+	 * filled in when adding the gpio_chip, but the rest of the
+	 * gpio_irqchip is still filled in using the old method
+	 * in gpiochip_irqchip_add_nested() so clean this up once we
+	 * get the gpio_irqchip to initialize while adding the
+	 * gpio_chip also for threaded irqchips.
+	 */
+	stmpe_gpio->chip.irq.init_valid_mask = stmpe_init_irq_valid_mask;
 
 	if (IS_ENABLED(CONFIG_DEBUG_FS))
                 stmpe_gpio->chip.dbg_show = stmpe_dbg_show;
 
 	of_property_read_u32(np, "st,norequest-mask",
 			&stmpe_gpio->norequest_mask);
-	if (stmpe_gpio->norequest_mask)
-		stmpe_gpio->chip.irq.need_valid_mask = true;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -487,14 +511,6 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "unable to get irq: %d\n", ret);
 			goto out_disable;
 		}
-		if (stmpe_gpio->norequest_mask) {
-			int i;
-
-			/* Forbid unused lines to be mapped as IRQs */
-			for (i = 0; i < sizeof(u32); i++)
-				if (stmpe_gpio->norequest_mask & BIT(i))
-					clear_bit(i, stmpe_gpio->chip.irq.valid_mask);
-		}
 		ret =  gpiochip_irqchip_add_nested(&stmpe_gpio->chip,
 						   &stmpe_gpio_irq_chip,
 						   0,
diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c
index 07050cdbadb9..a3109bcaa0ac 100644
--- a/drivers/gpio/gpio-tqmx86.c
+++ b/drivers/gpio/gpio-tqmx86.c
@@ -214,6 +214,17 @@ static const struct dev_pm_ops tqmx86_gpio_dev_pm_ops = {
 			   tqmx86_gpio_runtime_resume, NULL)
 };
 
+static void tqmx86_init_irq_valid_mask(struct gpio_chip *chip,
+				       unsigned long *valid_mask,
+				       unsigned int ngpios)
+{
+	/* Only GPIOs 4-7 are valid for interrupts. Clear the others */
+	clear_bit(0, valid_mask);
+	clear_bit(1, valid_mask);
+	clear_bit(2, valid_mask);
+	clear_bit(3, valid_mask);
+}
+
 static int tqmx86_gpio_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -260,7 +271,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 	chip->get = tqmx86_gpio_get;
 	chip->set = tqmx86_gpio_set;
 	chip->ngpio = TQMX86_NGPIO;
-	chip->irq.need_valid_mask = true;
 	chip->parent = pdev->dev.parent;
 
 	pm_runtime_enable(&pdev->dev);
@@ -296,6 +306,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		girq->parents[0] = irq;
 		girq->default_type = IRQ_TYPE_NONE;
 		girq->handler = handle_simple_irq;
+		girq->init_valid_mask = tqmx86_init_irq_valid_mask;
 	}
 
 	ret = devm_gpiochip_add_data(dev, chip, gpio);
@@ -304,14 +315,6 @@ static int tqmx86_gpio_probe(struct platform_device *pdev)
 		goto out_pm_dis;
 	}
 
-	/* Only GPIOs 4-7 are valid for interrupts. Clear the others */
-	if (irq) {
-		clear_bit(0, girq->valid_mask);
-		clear_bit(1, girq->valid_mask);
-		clear_bit(2, girq->valid_mask);
-		clear_bit(3, girq->valid_mask);
-	}
-
 	dev_info(dev, "GPIO functionality initialized with %d pins\n",
 		 chip->ngpio);
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 56d0898d94aa..f5b2649e2893 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1615,15 +1615,19 @@ static struct gpio_chip *find_chip_by_name(const char *name)
  * The following is irqchip helper code for gpiochips.
  */
 
-static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip)
+static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
 {
-	if (!gpiochip->irq.need_valid_mask)
+	struct gpio_irq_chip *girq = &gc->irq;
+
+	if (!girq->init_valid_mask)
 		return 0;
 
-	gpiochip->irq.valid_mask = gpiochip_allocate_mask(gpiochip);
-	if (!gpiochip->irq.valid_mask)
+	girq->valid_mask = gpiochip_allocate_mask(gc);
+	if (!girq->valid_mask)
 		return -ENOMEM;
 
+	girq->init_valid_mask(gc, girq->valid_mask, gc->ngpio);
+
 	return 0;
 }
 
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index e5a112a8e067..297b7b5fcb28 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -1455,6 +1455,20 @@ static void byt_gpio_irq_handler(struct irq_desc *desc)
 	chip->irq_eoi(data);
 }
 
+static void byt_init_irq_valid_mask(struct gpio_chip *chip,
+				    unsigned long *valid_mask,
+				    unsigned int ngpios)
+{
+	/*
+	 * FIXME: currently the valid_mask is filled in as part of
+	 * initializing the irq_chip below in byt_gpio_irq_init_hw().
+	 * when converting this driver to the new way of passing the
+	 * gpio_irq_chip along when adding the gpio_chip, move the
+	 * mask initialization into this callback instead. Right now
+	 * this callback is here to make sure the mask gets allocated.
+	 */
+}
+
 static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
 {
 	struct gpio_chip *gc = &vg->chip;
@@ -1525,7 +1539,7 @@ static int byt_gpio_probe(struct byt_gpio *vg)
 	gc->can_sleep	= false;
 	gc->parent	= &vg->pdev->dev;
 	gc->ngpio	= vg->soc_data->npins;
-	gc->irq.need_valid_mask	= true;
+	gc->irq.init_valid_mask	= byt_init_irq_valid_mask;
 
 #ifdef CONFIG_PM_SLEEP
 	vg->saved_context = devm_kcalloc(&vg->pdev->dev, gc->ngpio,
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 03ec7a5d9d0b..ab681d1a3a74 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1543,6 +1543,30 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
 	{}
 };
 
+static void chv_init_irq_valid_mask(struct gpio_chip *chip,
+				    unsigned long *valid_mask,
+				    unsigned int ngpios)
+{
+	struct chv_pinctrl *pctrl = gpiochip_get_data(chip);
+	const struct chv_community *community = pctrl->community;
+	int i;
+
+	/* Do not add GPIOs that can only generate GPEs to the IRQ domain */
+	for (i = 0; i < community->npins; i++) {
+		const struct pinctrl_pin_desc *desc;
+		u32 intsel;
+
+		desc = &community->pins[i];
+
+		intsel = readl(chv_padreg(pctrl, desc->number, CHV_PADCTRL0));
+		intsel &= CHV_PADCTRL0_INTSEL_MASK;
+		intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
+
+		if (intsel >= community->nirqs)
+			clear_bit(i, valid_mask);
+	}
+}
+
 static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 {
 	const struct chv_gpio_pinrange *range;
@@ -1557,7 +1581,8 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 	chip->label = dev_name(pctrl->dev);
 	chip->parent = pctrl->dev;
 	chip->base = -1;
-	chip->irq.need_valid_mask = need_valid_mask;
+	if (need_valid_mask)
+		chip->irq.init_valid_mask = chv_init_irq_valid_mask;
 
 	ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
 	if (ret) {
@@ -1576,21 +1601,6 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
 		}
 	}
 
-	/* Do not add GPIOs that can only generate GPEs to the IRQ domain */
-	for (i = 0; i < community->npins; i++) {
-		const struct pinctrl_pin_desc *desc;
-		u32 intsel;
-
-		desc = &community->pins[i];
-
-		intsel = readl(chv_padreg(pctrl, desc->number, CHV_PADCTRL0));
-		intsel &= CHV_PADCTRL0_INTSEL_MASK;
-		intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
-
-		if (need_valid_mask && intsel >= community->nirqs)
-			clear_bit(i, chip->irq.valid_mask);
-	}
-
 	/*
 	 * The same set of machines in chv_no_valid_mask[] have incorrectly
 	 * configured GPIOs that generate spurious interrupts so we use
diff --git a/drivers/platform/x86/intel_int0002_vgpio.c b/drivers/platform/x86/intel_int0002_vgpio.c
index d9542c661ddc..f57bbbeb2495 100644
--- a/drivers/platform/x86/intel_int0002_vgpio.c
+++ b/drivers/platform/x86/intel_int0002_vgpio.c
@@ -152,6 +152,13 @@ static const struct x86_cpu_id int0002_cpu_ids[] = {
 	{}
 };
 
+static void int0002_init_irq_valid_mask(struct gpio_chip *chip,
+					unsigned long *valid_mask,
+					unsigned int ngpios)
+{
+	bitmap_clear(valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
+}
+
 static int int0002_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -184,7 +191,7 @@ static int int0002_probe(struct platform_device *pdev)
 	chip->direction_output = int0002_gpio_direction_output;
 	chip->base = -1;
 	chip->ngpio = GPE0A_PME_B0_VIRT_GPIO_PIN + 1;
-	chip->irq.need_valid_mask = true;
+	chip->irq.init_valid_mask = int0002_init_irq_valid_mask;
 
 	ret = devm_gpiochip_add_data(&pdev->dev, chip, NULL);
 	if (ret) {
@@ -192,8 +199,6 @@ static int int0002_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	bitmap_clear(chip->irq.valid_mask, 0, GPE0A_PME_B0_VIRT_GPIO_PIN);
-
 	/*
 	 * We manually request the irq here instead of passing a flow-handler
 	 * to gpiochip_set_chained_irqchip, because the irq is shared.
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index c667ad0c099d..f8245d67f070 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -202,11 +202,17 @@ struct gpio_irq_chip {
 	bool threaded;
 
 	/**
-	 * @need_valid_mask:
-	 *
-	 * If set core allocates @valid_mask with all bits set to one.
+	 * @init_valid_mask: optional routine to initialize @valid_mask, to be
+	 * used if not all GPIO lines are valid interrupts. Sometimes some
+	 * lines just cannot fire interrupts, and this routine, when defined,
+	 * is passed a bitmap in "valid_mask" and it will have ngpios
+	 * bits from 0..(ngpios-1) set to "1" as in valid. The callback can
+	 * then directly set some bits to "0" if they cannot be used for
+	 * interrupts.
 	 */
-	bool need_valid_mask;
+	void (*init_valid_mask)(struct gpio_chip *chip,
+				unsigned long *valid_mask,
+				unsigned int ngpios);
 
 	/**
 	 * @valid_mask:
-- 
cgit v1.2.3


From 06354665f92fa8be36124a8ba7113cdfa40d9df5 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Fri, 6 Sep 2019 10:48:57 +0800
Subject: mac80211: allow drivers to set max MTU

Make it possibly for drivers to adjust the default max_mtu
by storing it in the hardware struct and using that value
for all interfaces.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Link: https://lore.kernel.org/r/1567738137-31748-1-git-send-email-wgong@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 3 +++
 net/mac80211/iface.c   | 2 +-
 net/mac80211/main.c    | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 6781d4637557..523c6a09e1c8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2463,6 +2463,8 @@ enum ieee80211_hw_flags {
  *
  * @weight_multiplier: Driver specific airtime weight multiplier used while
  *	refilling deficit of each TXQ.
+ *
+ * @max_mtu: the max mtu could be set.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2500,6 +2502,7 @@ struct ieee80211_hw {
 	u8 max_nan_de_entries;
 	u8 tx_sk_pacing_shift;
 	u8 weight_multiplier;
+	u32 max_mtu;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8dc6580e1787..af8b09214786 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1876,7 +1876,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 		/* MTU range: 256 - 2304 */
 		ndev->min_mtu = 256;
-		ndev->max_mtu = IEEE80211_MAX_DATA_LEN;
+		ndev->max_mtu = local->hw.max_mtu;
 
 		ret = register_netdevice(ndev);
 		if (ret) {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 29b9d57df1a3..aba094b4ccfc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -639,6 +639,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 					 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
 	local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
 	local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
+	local->hw.max_mtu = IEEE80211_MAX_DATA_LEN;
 	local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
 	wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
 	wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
-- 
cgit v1.2.3


From 3fc1ca00653db6371585e3c21c4b873b2f20e60a Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 6 Sep 2019 14:14:48 +0800
Subject: swiotlb: Split size parameter to map/unmap APIs

This splits the size parameter to swiotlb_tbl_map_single() and
swiotlb_tbl_unmap_single() into an alloc_size and a mapping_size
parameter, where the latter one is rounded up to the iommu page
size.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/xen/swiotlb-xen.c |  8 ++++----
 include/linux/swiotlb.h   |  8 ++++++--
 kernel/dma/direct.c       |  2 +-
 kernel/dma/swiotlb.c      | 34 +++++++++++++++++++++-------------
 4 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index ae1df496bf38..adcabd9473eb 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -386,8 +386,8 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 */
 	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys, size, dir,
-				     attrs);
+	map = swiotlb_tbl_map_single(dev, start_dma_addr, phys,
+				     size, size, dir, attrs);
 	if (map == (phys_addr_t)DMA_MAPPING_ERROR)
 		return DMA_MAPPING_ERROR;
 
@@ -397,7 +397,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 * Ensure that the address returned is DMA'ble
 	 */
 	if (unlikely(!dma_capable(dev, dev_addr, size))) {
-		swiotlb_tbl_unmap_single(dev, map, size, dir,
+		swiotlb_tbl_unmap_single(dev, map, size, size, dir,
 				attrs | DMA_ATTR_SKIP_CPU_SYNC);
 		return DMA_MAPPING_ERROR;
 	}
@@ -433,7 +433,7 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr))
-		swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
+		swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
 }
 
 static void xen_swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 361f62bb4a8e..cde3dc18e21a 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -46,13 +46,17 @@ enum dma_sync_target {
 
 extern phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 					  dma_addr_t tbl_dma_addr,
-					  phys_addr_t phys, size_t size,
+					  phys_addr_t phys,
+					  size_t mapping_size,
+					  size_t alloc_size,
 					  enum dma_data_direction dir,
 					  unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 				     phys_addr_t tlb_addr,
-				     size_t size, enum dma_data_direction dir,
+				     size_t mapping_size,
+				     size_t alloc_size,
+				     enum dma_data_direction dir,
 				     unsigned long attrs);
 
 extern void swiotlb_tbl_sync_single(struct device *hwdev,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 795c9b095d75..a7f2a0163426 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -297,7 +297,7 @@ void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
 		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
 	if (unlikely(is_swiotlb_buffer(phys)))
-		swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
+		swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
 }
 EXPORT_SYMBOL(dma_direct_unmap_page);
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9de232229063..796a44f8ef5a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -444,7 +444,9 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 				   dma_addr_t tbl_dma_addr,
-				   phys_addr_t orig_addr, size_t size,
+				   phys_addr_t orig_addr,
+				   size_t mapping_size,
+				   size_t alloc_size,
 				   enum dma_data_direction dir,
 				   unsigned long attrs)
 {
@@ -464,6 +466,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 		pr_warn_once("%s is active and system is using DMA bounce buffers\n",
 			     sme_active() ? "SME" : "SEV");
 
+	if (mapping_size > alloc_size) {
+		dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+			      mapping_size, alloc_size);
+		return (phys_addr_t)DMA_MAPPING_ERROR;
+	}
+
 	mask = dma_get_seg_boundary(hwdev);
 
 	tbl_dma_addr &= mask;
@@ -471,8 +479,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 
 	/*
- 	 * Carefully handle integer overflow which can occur when mask == ~0UL.
- 	 */
+	 * Carefully handle integer overflow which can occur when mask == ~0UL.
+	 */
 	max_slots = mask + 1
 		    ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
 		    : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
@@ -481,8 +489,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	 * For mappings greater than or equal to a page, we limit the stride
 	 * (and hence alignment) to a page size.
 	 */
-	nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-	if (size >= PAGE_SIZE)
+	nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	if (alloc_size >= PAGE_SIZE)
 		stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
 	else
 		stride = 1;
@@ -547,7 +555,7 @@ not_found:
 	spin_unlock_irqrestore(&io_tlb_lock, flags);
 	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
 		dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-			 size, io_tlb_nslabs, tmp_io_tlb_used);
+			 alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
 	return (phys_addr_t)DMA_MAPPING_ERROR;
 found:
 	io_tlb_used += nslots;
@@ -562,7 +570,7 @@ found:
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
 
 	return tlb_addr;
 }
@@ -571,11 +579,11 @@ found:
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
 void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-			      size_t size, enum dma_data_direction dir,
-			      unsigned long attrs)
+			      size_t mapping_size, size_t alloc_size,
+			      enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long flags;
-	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+	int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
 	int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
 	phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
@@ -585,7 +593,7 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
 	if (orig_addr != INVALID_PHYS_ADDR &&
 	    !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
 	    ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
+		swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
 
 	/*
 	 * Return the buffer to the free list by setting the corresponding
@@ -665,14 +673,14 @@ bool swiotlb_map(struct device *dev, phys_addr_t *phys, dma_addr_t *dma_addr,
 
 	/* Oh well, have to allocate and map a bounce buffer. */
 	*phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
-			*phys, size, dir, attrs);
+			*phys, size, size, dir, attrs);
 	if (*phys == (phys_addr_t)DMA_MAPPING_ERROR)
 		return false;
 
 	/* Ensure that the address returned is DMA'ble */
 	*dma_addr = __phys_to_dma(dev, *phys);
 	if (unlikely(!dma_capable(dev, *dma_addr, size))) {
-		swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+		swiotlb_tbl_unmap_single(dev, *phys, size, size, dir,
 			attrs | DMA_ATTR_SKIP_CPU_SYNC);
 		return false;
 	}
-- 
cgit v1.2.3


From 3b53034c268d550d9e8522e613a14ab53b8840d8 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 6 Sep 2019 14:14:51 +0800
Subject: iommu/vt-d: Add trace events for device dma map/unmap

This adds trace support for the Intel IOMMU driver. It
also declares some events which could be used to trace
the events when an IOVA is being mapped or unmapped in
a domain.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Makefile             |   1 +
 drivers/iommu/intel-iommu.c        |  13 +++--
 drivers/iommu/intel-trace.c        |  14 +++++
 include/trace/events/intel_iommu.h | 106 +++++++++++++++++++++++++++++++++++++
 4 files changed, 131 insertions(+), 3 deletions(-)
 create mode 100644 drivers/iommu/intel-trace.c
 create mode 100644 include/trace/events/intel_iommu.h

(limited to 'include')

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index f13f36ae1af6..bfe27b2755bd 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU) += intel-trace.o
 obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0af7b4669264..12831beead02 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3512,6 +3512,9 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
 
 	start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT;
 	start_paddr += paddr & ~PAGE_MASK;
+
+	trace_map_single(dev, start_paddr, paddr, size << VTD_PAGE_SHIFT);
+
 	return start_paddr;
 
 error:
@@ -3567,10 +3570,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 	if (dev_is_pci(dev))
 		pdev = to_pci_dev(dev);
 
-	dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn);
-
 	freelist = domain_unmap(domain, start_pfn, last_pfn);
-
 	if (intel_iommu_strict || (pdev && pdev->untrusted) ||
 			!has_iova_flush_queue(&domain->iovad)) {
 		iommu_flush_iotlb_psi(iommu, domain, start_pfn,
@@ -3586,6 +3586,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
 		 * cpu used up by the iotlb flush operation...
 		 */
 	}
+
+	trace_unmap_single(dev, dev_addr, size);
 }
 
 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
@@ -3676,6 +3678,8 @@ static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
 	}
 
 	intel_unmap(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
+
+	trace_unmap_sg(dev, startaddr, nrpages << VTD_PAGE_SHIFT);
 }
 
 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
@@ -3732,6 +3736,9 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
 		return 0;
 	}
 
+	trace_map_sg(dev, iova_pfn << PAGE_SHIFT,
+		     sg_phys(sglist), size << VTD_PAGE_SHIFT);
+
 	return nelems;
 }
 
diff --git a/drivers/iommu/intel-trace.c b/drivers/iommu/intel-trace.c
new file mode 100644
index 000000000000..bfb6a6e37a88
--- /dev/null
+++ b/drivers/iommu/intel-trace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/intel_iommu.h>
diff --git a/include/trace/events/intel_iommu.h b/include/trace/events/intel_iommu.h
new file mode 100644
index 000000000000..54e61d456cdf
--- /dev/null
+++ b/include/trace/events/intel_iommu.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Intel IOMMU trace support
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author: Lu Baolu <baolu.lu@linux.intel.com>
+ */
+#ifdef CONFIG_INTEL_IOMMU
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM intel_iommu
+
+#if !defined(_TRACE_INTEL_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_INTEL_IOMMU_H
+
+#include <linux/tracepoint.h>
+#include <linux/intel-iommu.h>
+
+DECLARE_EVENT_CLASS(dma_map,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr,
+		 size_t size),
+
+	TP_ARGS(dev, dev_addr, phys_addr, size),
+
+	TP_STRUCT__entry(
+		__string(dev_name, dev_name(dev))
+		__field(dma_addr_t, dev_addr)
+		__field(phys_addr_t, phys_addr)
+		__field(size_t,	size)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev_name, dev_name(dev));
+		__entry->dev_addr = dev_addr;
+		__entry->phys_addr = phys_addr;
+		__entry->size = size;
+	),
+
+	TP_printk("dev=%s dev_addr=0x%llx phys_addr=0x%llx size=%zu",
+		  __get_str(dev_name),
+		  (unsigned long long)__entry->dev_addr,
+		  (unsigned long long)__entry->phys_addr,
+		  __entry->size)
+);
+
+DEFINE_EVENT(dma_map, map_single,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr,
+		 size_t size),
+	TP_ARGS(dev, dev_addr, phys_addr, size)
+);
+
+DEFINE_EVENT(dma_map, map_sg,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr,
+		 size_t size),
+	TP_ARGS(dev, dev_addr, phys_addr, size)
+);
+
+DEFINE_EVENT(dma_map, bounce_map_single,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, phys_addr_t phys_addr,
+		 size_t size),
+	TP_ARGS(dev, dev_addr, phys_addr, size)
+);
+
+DECLARE_EVENT_CLASS(dma_unmap,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size),
+
+	TP_ARGS(dev, dev_addr, size),
+
+	TP_STRUCT__entry(
+		__string(dev_name, dev_name(dev))
+		__field(dma_addr_t, dev_addr)
+		__field(size_t,	size)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev_name, dev_name(dev));
+		__entry->dev_addr = dev_addr;
+		__entry->size = size;
+	),
+
+	TP_printk("dev=%s dev_addr=0x%llx size=%zu",
+		  __get_str(dev_name),
+		  (unsigned long long)__entry->dev_addr,
+		  __entry->size)
+);
+
+DEFINE_EVENT(dma_unmap, unmap_single,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size),
+	TP_ARGS(dev, dev_addr, size)
+);
+
+DEFINE_EVENT(dma_unmap, unmap_sg,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size),
+	TP_ARGS(dev, dev_addr, size)
+);
+
+DEFINE_EVENT(dma_unmap, bounce_unmap_single,
+	TP_PROTO(struct device *dev, dma_addr_t dev_addr, size_t size),
+	TP_ARGS(dev, dev_addr, size)
+);
+
+#endif /* _TRACE_INTEL_IOMMU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+#endif /* CONFIG_INTEL_IOMMU */
-- 
cgit v1.2.3


From fd730007a06e9b11664e3816fcebd3faa91761ea Mon Sep 17 00:00:00 2001
From: Kyung Min Park <kyung.min.park@intel.com>
Date: Fri, 6 Sep 2019 11:14:02 -0700
Subject: iommu/vt-d: Add Scalable Mode fault information

Intel VT-d specification revision 3 added support for Scalable Mode
Translation for DMA remapping. Add the Scalable Mode fault reasons to
show detailed fault reasons when the translation fault happens.

Link: https://software.intel.com/sites/default/files/managed/c5/15/vt-directed-io-spec.pdf

Reviewed-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        | 77 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/intel-iommu.h |  2 ++
 2 files changed, 75 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 5d0754ed5fa0..eecd6a421667 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1519,6 +1519,64 @@ static const char *dma_remap_fault_reasons[] =
 	"PCE for translation request specifies blocking",
 };
 
+static const char * const dma_remap_sm_fault_reasons[] = {
+	"SM: Invalid Root Table Address",
+	"SM: TTM 0 for request with PASID",
+	"SM: TTM 0 for page group request",
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
+	"SM: Error attempting to access Root Entry",
+	"SM: Present bit in Root Entry is clear",
+	"SM: Non-zero reserved field set in Root Entry",
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
+	"SM: Error attempting to access Context Entry",
+	"SM: Present bit in Context Entry is clear",
+	"SM: Non-zero reserved field set in the Context Entry",
+	"SM: Invalid Context Entry",
+	"SM: DTE field in Context Entry is clear",
+	"SM: PASID Enable field in Context Entry is clear",
+	"SM: PASID is larger than the max in Context Entry",
+	"SM: PRE field in Context-Entry is clear",
+	"SM: RID_PASID field error in Context-Entry",
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
+	"SM: Error attempting to access the PASID Directory Entry",
+	"SM: Present bit in Directory Entry is clear",
+	"SM: Non-zero reserved field set in PASID Directory Entry",
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
+	"SM: Error attempting to access PASID Table Entry",
+	"SM: Present bit in PASID Table Entry is clear",
+	"SM: Non-zero reserved field set in PASID Table Entry",
+	"SM: Invalid Scalable-Mode PASID Table Entry",
+	"SM: ERE field is clear in PASID Table Entry",
+	"SM: SRE field is clear in PASID Table Entry",
+	"Unknown", "Unknown",/* 0x5E-0x5F */
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
+	"SM: Error attempting to access first-level paging entry",
+	"SM: Present bit in first-level paging entry is clear",
+	"SM: Non-zero reserved field set in first-level paging entry",
+	"SM: Error attempting to access FL-PML4 entry",
+	"SM: First-level entry address beyond MGAW in Nested translation",
+	"SM: Read permission error in FL-PML4 entry in Nested translation",
+	"SM: Read permission error in first-level paging entry in Nested translation",
+	"SM: Write permission error in first-level paging entry in Nested translation",
+	"SM: Error attempting to access second-level paging entry",
+	"SM: Read/Write permission error in second-level paging entry",
+	"SM: Non-zero reserved field set in second-level paging entry",
+	"SM: Invalid second-level page table pointer",
+	"SM: A/D bit update needed in second-level entry when set up in no snoop",
+	"Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
+	"SM: Address in first-level translation is not canonical",
+	"SM: U/S set 0 for first-level translation with user privilege",
+	"SM: No execute permission for request with PASID and ER=1",
+	"SM: Address beyond the DMA hardware max",
+	"SM: Second-level entry address beyond the max",
+	"SM: No write permission for Write/AtomicOp request",
+	"SM: No read permission for Read/AtomicOp request",
+	"SM: Invalid address-interrupt address",
+	"Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
+	"SM: A/D bit update needed in first-level entry when set up in no snoop",
+};
+
 static const char *irq_remap_fault_reasons[] =
 {
 	"Detected reserved fields in the decoded interrupt-remapped request",
@@ -1536,6 +1594,10 @@ static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
 					ARRAY_SIZE(irq_remap_fault_reasons))) {
 		*fault_type = INTR_REMAP;
 		return irq_remap_fault_reasons[fault_reason - 0x20];
+	} else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
+			ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
+		*fault_type = DMA_REMAP;
+		return dma_remap_sm_fault_reasons[fault_reason - 0x30];
 	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
 		*fault_type = DMA_REMAP;
 		return dma_remap_fault_reasons[fault_reason];
@@ -1611,7 +1673,8 @@ void dmar_msi_read(int irq, struct msi_msg *msg)
 }
 
 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
-		u8 fault_reason, u16 source_id, unsigned long long addr)
+		u8 fault_reason, int pasid, u16 source_id,
+		unsigned long long addr)
 {
 	const char *reason;
 	int fault_type;
@@ -1624,10 +1687,11 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 			PCI_FUNC(source_id & 0xFF), addr >> 48,
 			fault_reason, reason);
 	else
-		pr_err("[%s] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02d] %s\n",
+		pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
 		       type ? "DMA Read" : "DMA Write",
 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
-		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+		       PCI_FUNC(source_id & 0xFF), pasid, addr,
+		       fault_reason, reason);
 	return 0;
 }
 
@@ -1659,8 +1723,9 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		u8 fault_reason;
 		u16 source_id;
 		u64 guest_addr;
-		int type;
+		int type, pasid;
 		u32 data;
+		bool pasid_present;
 
 		/* highest 32 bits */
 		data = readl(iommu->reg + reg +
@@ -1672,10 +1737,12 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 			fault_reason = dma_frcd_fault_reason(data);
 			type = dma_frcd_type(data);
 
+			pasid = dma_frcd_pasid_value(data);
 			data = readl(iommu->reg + reg +
 				     fault_index * PRIMARY_FAULT_REG_LEN + 8);
 			source_id = dma_frcd_source_id(data);
 
+			pasid_present = dma_frcd_pasid_present(data);
 			guest_addr = dmar_readq(iommu->reg + reg +
 					fault_index * PRIMARY_FAULT_REG_LEN);
 			guest_addr = dma_frcd_page_addr(guest_addr);
@@ -1688,7 +1755,9 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
 		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
 
 		if (!ratelimited)
+			/* Using pasid -1 if pasid is not present */
 			dmar_fault_do_one(iommu, type, fault_reason,
+					  pasid_present ? pasid : -1,
 					  source_id, guest_addr);
 
 		fault_index++;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index f2ae8a006ff8..10e79a49af9d 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -272,6 +272,8 @@
 #define dma_frcd_type(d) ((d >> 30) & 1)
 #define dma_frcd_fault_reason(c) (c & 0xff)
 #define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_pasid_value(c) (((c) >> 8) & 0xfffff)
+#define dma_frcd_pasid_present(c) (((c) >> 31) & 1)
 /* low 64 bit */
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << PAGE_SHIFT))
 
-- 
cgit v1.2.3


From bef4d2037d2143a4df6430bbe1e970fc7e616f6c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Jul 2019 14:02:10 +0200
Subject: xen/arm: consolidate page-coherent.h

Shared the duplicate arm/arm64 code in include/xen/arm/page-coherent.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 arch/arm/include/asm/xen/page-coherent.h   | 75 ----------------------------
 arch/arm64/include/asm/xen/page-coherent.h | 75 ----------------------------
 include/xen/arm/page-coherent.h            | 80 ++++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+), 150 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 602ac02f154c..27e984977402 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -1,77 +1,2 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ARM_XEN_PAGE_COHERENT_H
-#define _ASM_ARM_XEN_PAGE_COHERENT_H
-
-#include <linux/dma-mapping.h>
-#include <asm/page.h>
 #include <xen/arm/page-coherent.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
-	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
-	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_device(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long page_pfn = page_to_xen_pfn(page);
-	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
-	unsigned long compound_pages =
-		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
-	bool local = (page_pfn <= dev_pfn) &&
-		(dev_pfn - page_pfn < compound_pages);
-
-	if (local)
-		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
-	else
-		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
-}
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-	/*
-	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-	 * multiple Xen page, it's not possible to have a mix of local and
-	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
-	 * foreign mfn will always return false. If the page is local we can
-	 * safely call the native dma_ops function, otherwise we call the xen
-	 * specific function.
-	 */
-	if (pfn_valid(pfn))
-		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
-	else
-		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
-}
-
-#endif /* _ASM_ARM_XEN_PAGE_COHERENT_H */
diff --git a/arch/arm64/include/asm/xen/page-coherent.h b/arch/arm64/include/asm/xen/page-coherent.h
index d88e56b90b93..27e984977402 100644
--- a/arch/arm64/include/asm/xen/page-coherent.h
+++ b/arch/arm64/include/asm/xen/page-coherent.h
@@ -1,77 +1,2 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_ARM64_XEN_PAGE_COHERENT_H
-#define _ASM_ARM64_XEN_PAGE_COHERENT_H
-
-#include <linux/dma-mapping.h>
-#include <asm/page.h>
 #include <xen/arm/page-coherent.h>
-
-static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
-{
-	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
-}
-
-static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
-		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
-{
-	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_device(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long page_pfn = page_to_xen_pfn(page);
-	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
-	unsigned long compound_pages =
-		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
-	bool local = (page_pfn <= dev_pfn) &&
-		(dev_pfn - page_pfn < compound_pages);
-
-	if (local)
-		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
-	else
-		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
-}
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-	/*
-	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-	 * multiple Xen page, it's not possible to have a mix of local and
-	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
-	 * foreign mfn will always return false. If the page is local we can
-	 * safely call the native dma_ops function, otherwise we call the xen
-	 * specific function.
-	 */
-	if (pfn_valid(pfn))
-		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
-	else
-		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
-}
-
-#endif /* _ASM_ARM64_XEN_PAGE_COHERENT_H */
diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
index 2ca9164a79bf..a840d6949a87 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
@@ -2,6 +2,9 @@
 #ifndef _XEN_ARM_PAGE_COHERENT_H
 #define _XEN_ARM_PAGE_COHERENT_H
 
+#include <linux/dma-mapping.h>
+#include <asm/page.h>
+
 void __xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
 	     enum dma_data_direction dir, unsigned long attrs);
@@ -13,4 +16,81 @@ void __xen_dma_sync_single_for_cpu(struct device *hwdev,
 void __xen_dma_sync_single_for_device(struct device *hwdev,
 		dma_addr_t handle, size_t size, enum dma_data_direction dir);
 
+static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
+		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
+{
+	return dma_direct_alloc(hwdev, size, dma_handle, flags, attrs);
+}
+
+static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
+		void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs)
+{
+	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
+}
+
+static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+
+	if (pfn_valid(pfn))
+		dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
+	else
+		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
+}
+
+static inline void xen_dma_sync_single_for_device(struct device *hwdev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	if (pfn_valid(pfn))
+		dma_direct_sync_single_for_device(hwdev, handle, size, dir);
+	else
+		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
+}
+
+static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
+	     dma_addr_t dev_addr, unsigned long offset, size_t size,
+	     enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long page_pfn = page_to_xen_pfn(page);
+	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+	unsigned long compound_pages =
+		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+	bool local = (page_pfn <= dev_pfn) &&
+		(dev_pfn - page_pfn < compound_pages);
+
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can span across
+	 * multiple Xen pages, it's not possible for it to contain a
+	 * mix of local and foreign Xen pages. So if the first xen_pfn
+	 * == mfn the page is local otherwise it's a foreign page
+	 * grant-mapped in dom0. If the page is local we can safely
+	 * call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
+	if (local)
+		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
+	else
+		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
+}
+
+static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	unsigned long pfn = PFN_DOWN(handle);
+	/*
+	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+	 * multiple Xen page, it's not possible to have a mix of local and
+	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
+	 * foreign mfn will always return false. If the page is local we can
+	 * safely call the native dma_ops function, otherwise we call the xen
+	 * specific function.
+	 */
+	if (pfn_valid(pfn))
+		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
+	else
+		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
+}
+
 #endif /* _XEN_ARM_PAGE_COHERENT_H */
-- 
cgit v1.2.3


From 0e0d26e779d30086a7e86fcfae2d897bbc5579da Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 24 Jul 2019 14:09:39 +0200
Subject: xen/arm: remove xen_dma_ops

arm and arm64 can just use xen_swiotlb_dma_ops directly like x86, no
need for a pointer indirection.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 arch/arm/mm/dma-mapping.c    | 3 ++-
 arch/arm/xen/mm.c            | 4 ----
 arch/arm64/mm/dma-mapping.c  | 3 ++-
 include/xen/arm/hypervisor.h | 2 --
 4 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 143d7c79b4cb..7d042d5c43e3 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -36,6 +36,7 @@
 #include <asm/mach/map.h>
 #include <asm/system_info.h>
 #include <asm/dma-contiguous.h>
+#include <xen/swiotlb-xen.h>
 
 #include "dma.h"
 #include "mm.h"
@@ -2315,7 +2316,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
 #ifdef CONFIG_XEN
 	if (xen_initial_domain())
-		dev->dma_ops = xen_dma_ops;
+		dev->dma_ops = &xen_swiotlb_dma_ops;
 #endif
 	dev->archdata.dma_ops_setup = true;
 }
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 2fde161733b0..11d5ad26fcfe 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -162,16 +162,12 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
 }
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
-const struct dma_map_ops *xen_dma_ops;
-EXPORT_SYMBOL(xen_dma_ops);
-
 int __init xen_mm_init(void)
 {
 	struct gnttab_cache_flush cflush;
 	if (!xen_initial_domain())
 		return 0;
 	xen_swiotlb_init(1, false);
-	xen_dma_ops = &xen_swiotlb_dma_ops;
 
 	cflush.op = 0;
 	cflush.a.dev_bus_addr = 0;
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index a1d05f669f67..c44eb72d500f 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -8,6 +8,7 @@
 #include <linux/cache.h>
 #include <linux/dma-noncoherent.h>
 #include <linux/dma-iommu.h>
+#include <xen/swiotlb-xen.h>
 
 #include <asm/cacheflush.h>
 
@@ -52,6 +53,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 
 #ifdef CONFIG_XEN
 	if (xen_initial_domain())
-		dev->dma_ops = xen_dma_ops;
+		dev->dma_ops = &xen_swiotlb_dma_ops;
 #endif
 }
diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h
index 2982571f7cc1..43ef24dd030e 100644
--- a/include/xen/arm/hypervisor.h
+++ b/include/xen/arm/hypervisor.h
@@ -19,8 +19,6 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 	return PARAVIRT_LAZY_NONE;
 }
 
-extern const struct dma_map_ops *xen_dma_ops;
-
 #ifdef CONFIG_XEN
 void __init xen_early_init(void);
 #else
-- 
cgit v1.2.3


From c9b6180dbf9ab3fef15ec5482997c99b0c762cd1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 26 Aug 2019 11:23:44 +0200
Subject: swiotlb-xen: use the same foreign page check everywhere

xen_dma_map_page uses a different and more complicated check for foreign
pages than the other three cache maintainance helpers.  Switch it to the
simpler pfn_valid method a well, and document the scheme with a single
improved comment in xen_dma_map_page.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 include/xen/arm/page-coherent.h | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
index a840d6949a87..a8d9c0678c27 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
@@ -53,23 +53,17 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
 	     dma_addr_t dev_addr, unsigned long offset, size_t size,
 	     enum dma_data_direction dir, unsigned long attrs)
 {
-	unsigned long page_pfn = page_to_xen_pfn(page);
-	unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
-	unsigned long compound_pages =
-		(1<<compound_order(page)) * XEN_PFN_PER_PAGE;
-	bool local = (page_pfn <= dev_pfn) &&
-		(dev_pfn - page_pfn < compound_pages);
+	unsigned long pfn = PFN_DOWN(dev_addr);
 
 	/*
-	 * Dom0 is mapped 1:1, while the Linux page can span across
-	 * multiple Xen pages, it's not possible for it to contain a
-	 * mix of local and foreign Xen pages. So if the first xen_pfn
-	 * == mfn the page is local otherwise it's a foreign page
-	 * grant-mapped in dom0. If the page is local we can safely
-	 * call the native dma_ops function, otherwise we call the xen
-	 * specific function.
+	 * Dom0 is mapped 1:1, and while the Linux page can span across multiple
+	 * Xen pages, it is not possible for it to contain a mix of local and
+	 * foreign Xen pages.  Calling pfn_valid on a foreign mfn will always
+	 * return false, so if pfn_valid returns true the pages is local and we
+	 * can use the native dma-direct functions, otherwise we call the Xen
+	 * specific version.
 	 */
-	if (local)
+	if (pfn_valid(pfn))
 		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
 	else
 		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
@@ -79,14 +73,7 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
 	unsigned long pfn = PFN_DOWN(handle);
-	/*
-	 * Dom0 is mapped 1:1, while the Linux page can be spanned accross
-	 * multiple Xen page, it's not possible to have a mix of local and
-	 * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
-	 * foreign mfn will always return false. If the page is local we can
-	 * safely call the native dma_ops function, otherwise we call the xen
-	 * specific function.
-	 */
+
 	if (pfn_valid(pfn))
 		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
 	else
-- 
cgit v1.2.3


From b4dca1512941aa8fec33c28939abc2bba4a2c78c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 5 Sep 2019 10:04:30 +0200
Subject: swiotlb-xen: simplify cache maintainance

Now that we know we always have the dma-noncoherent.h helpers available
if we are on an architecture with support for non-coherent devices,
we can just call them directly, and remove the calls to the dma-direct
routines, including the fact that we call the dma_direct_map_page
routines but ignore the value returned from it.  Instead we now have
Xen wrappers for the arch_sync_dma_for_{device,cpu} helpers that call
the special Xen versions of those routines for foreign pages.

Note that the new helpers get the physical address passed in addition
to the dma address to avoid another translation for the local cache
maintainance.  The pfn_valid checks remain on the dma address as in
the old code, even if that looks a little funny.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/arm/xen/mm.c                        | 64 +++++++++-----------------------
 arch/x86/include/asm/xen/page-coherent.h | 14 -------
 drivers/xen/swiotlb-xen.c                | 20 +++++-----
 include/xen/arm/page-coherent.h          | 63 -------------------------------
 include/xen/swiotlb-xen.h                |  5 +++
 5 files changed, 32 insertions(+), 134 deletions(-)

(limited to 'include')

diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 9d73fa4a5991..2b2c208408bb 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -60,63 +60,33 @@ static void dma_cache_maint(dma_addr_t handle, size_t size, u32 op)
 	} while (size);
 }
 
-static void __xen_dma_page_dev_to_cpu(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir)
+/*
+ * Dom0 is mapped 1:1, and while the Linux page can span across multiple Xen
+ * pages, it is not possible for it to contain a mix of local and foreign Xen
+ * pages.  Calling pfn_valid on a foreign mfn will always return false, so if
+ * pfn_valid returns true the pages is local and we can use the native
+ * dma-direct functions, otherwise we call the Xen specific version.
+ */
+void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
 {
-	if (dir != DMA_TO_DEVICE)
+	if (pfn_valid(PFN_DOWN(handle)))
+		arch_sync_dma_for_cpu(dev, paddr, size, dir);
+	else if (dir != DMA_TO_DEVICE)
 		dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
 }
 
-static void __xen_dma_page_cpu_to_dev(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir)
+void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir)
 {
-	if (dir == DMA_FROM_DEVICE)
+	if (pfn_valid(PFN_DOWN(handle)))
+		arch_sync_dma_for_device(dev, paddr, size, dir);
+	else if (dir == DMA_FROM_DEVICE)
 		dma_cache_maint(handle, size, GNTTAB_CACHE_INVAL);
 	else
 		dma_cache_maint(handle, size, GNTTAB_CACHE_CLEAN);
 }
 
-void __xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs)
-{
-	if (dev_is_dma_coherent(hwdev))
-		return;
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	__xen_dma_page_cpu_to_dev(hwdev, dev_addr, size, dir);
-}
-
-void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-
-{
-	if (dev_is_dma_coherent(hwdev))
-		return;
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void __xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	if (dev_is_dma_coherent(hwdev))
-		return;
-	__xen_dma_page_dev_to_cpu(hwdev, handle, size, dir);
-}
-
-void __xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	if (dev_is_dma_coherent(hwdev))
-		return;
-	__xen_dma_page_cpu_to_dev(hwdev, handle, size, dir);
-}
-
 bool xen_arch_need_swiotlb(struct device *dev,
 			   phys_addr_t phys,
 			   dma_addr_t dev_addr)
diff --git a/arch/x86/include/asm/xen/page-coherent.h b/arch/x86/include/asm/xen/page-coherent.h
index 116777e7f387..63cd41b2e17a 100644
--- a/arch/x86/include/asm/xen/page-coherent.h
+++ b/arch/x86/include/asm/xen/page-coherent.h
@@ -21,18 +21,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
 	free_pages((unsigned long) cpu_addr, get_order(size));
 }
 
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs) { }
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		unsigned long attrs) { }
-
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir) { }
-
 #endif /* _ASM_X86_XEN_PAGE_COHERENT_H */
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b8808677ae1d..f81031f0c1c7 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -28,6 +28,7 @@
 
 #include <linux/memblock.h>
 #include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/export.h>
 #include <xen/swiotlb-xen.h>
 #include <xen/page.h>
@@ -391,6 +392,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	if (map == (phys_addr_t)DMA_MAPPING_ERROR)
 		return DMA_MAPPING_ERROR;
 
+	phys = map;
 	dev_addr = xen_phys_to_bus(map);
 
 	/*
@@ -402,14 +404,9 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 		return DMA_MAPPING_ERROR;
 	}
 
-	page = pfn_to_page(map >> PAGE_SHIFT);
-	offset = map & ~PAGE_MASK;
 done:
-	/*
-	 * we are not interested in the dma_addr returned by xen_dma_map_page,
-	 * only in the potential cache flushes executed by the function.
-	 */
-	xen_dma_map_page(dev, page, dev_addr, offset, size, dir, attrs);
+	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		xen_dma_sync_for_device(dev, dev_addr, phys, size, dir);
 	return dev_addr;
 }
 
@@ -429,7 +426,8 @@ static void xen_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 
 	BUG_ON(dir == DMA_NONE);
 
-	xen_dma_unmap_page(hwdev, dev_addr, size, dir, attrs);
+	if (!dev_is_dma_coherent(hwdev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		xen_dma_sync_for_cpu(hwdev, dev_addr, paddr, size, dir);
 
 	/* NOTE: We use dev_addr here, not paddr! */
 	if (is_xen_swiotlb_buffer(dev_addr))
@@ -449,7 +447,8 @@ xen_swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dma_addr,
 {
 	phys_addr_t paddr = xen_bus_to_phys(dma_addr);
 
-	xen_dma_sync_single_for_cpu(dev, dma_addr, size, dir);
+	if (!dev_is_dma_coherent(dev))
+		xen_dma_sync_for_cpu(dev, dma_addr, paddr, size, dir);
 
 	if (is_xen_swiotlb_buffer(dma_addr))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
@@ -464,7 +463,8 @@ xen_swiotlb_sync_single_for_device(struct device *dev, dma_addr_t dma_addr,
 	if (is_xen_swiotlb_buffer(dma_addr))
 		swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
 
-	xen_dma_sync_single_for_device(dev, dma_addr, size, dir);
+	if (!dev_is_dma_coherent(dev))
+		xen_dma_sync_for_device(dev, dma_addr, paddr, size, dir);
 }
 
 /*
diff --git a/include/xen/arm/page-coherent.h b/include/xen/arm/page-coherent.h
index a8d9c0678c27..b9cc11e887ed 100644
--- a/include/xen/arm/page-coherent.h
+++ b/include/xen/arm/page-coherent.h
@@ -5,17 +5,6 @@
 #include <linux/dma-mapping.h>
 #include <asm/page.h>
 
-void __xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs);
-void __xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir,
-		unsigned long attrs);
-void __xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir);
-void __xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir);
-
 static inline void *xen_alloc_coherent_pages(struct device *hwdev, size_t size,
 		dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs)
 {
@@ -28,56 +17,4 @@ static inline void xen_free_coherent_pages(struct device *hwdev, size_t size,
 	dma_direct_free(hwdev, size, cpu_addr, dma_handle, attrs);
 }
 
-static inline void xen_dma_sync_single_for_cpu(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_cpu(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_cpu(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_sync_single_for_device(struct device *hwdev,
-		dma_addr_t handle, size_t size, enum dma_data_direction dir)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-	if (pfn_valid(pfn))
-		dma_direct_sync_single_for_device(hwdev, handle, size, dir);
-	else
-		__xen_dma_sync_single_for_device(hwdev, handle, size, dir);
-}
-
-static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
-	     dma_addr_t dev_addr, unsigned long offset, size_t size,
-	     enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long pfn = PFN_DOWN(dev_addr);
-
-	/*
-	 * Dom0 is mapped 1:1, and while the Linux page can span across multiple
-	 * Xen pages, it is not possible for it to contain a mix of local and
-	 * foreign Xen pages.  Calling pfn_valid on a foreign mfn will always
-	 * return false, so if pfn_valid returns true the pages is local and we
-	 * can use the native dma-direct functions, otherwise we call the Xen
-	 * specific version.
-	 */
-	if (pfn_valid(pfn))
-		dma_direct_map_page(hwdev, page, offset, size, dir, attrs);
-	else
-		__xen_dma_map_page(hwdev, page, dev_addr, offset, size, dir, attrs);
-}
-
-static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	unsigned long pfn = PFN_DOWN(handle);
-
-	if (pfn_valid(pfn))
-		dma_direct_unmap_page(hwdev, handle, size, dir, attrs);
-	else
-		__xen_dma_unmap_page(hwdev, handle, size, dir, attrs);
-}
-
 #endif /* _XEN_ARM_PAGE_COHERENT_H */
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index 5e4b83f83dbc..d71380f6ed0b 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -4,6 +4,11 @@
 
 #include <linux/swiotlb.h>
 
+void xen_dma_sync_for_cpu(struct device *dev, dma_addr_t handle,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir);
+void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
+		phys_addr_t paddr, size_t size, enum dma_data_direction dir);
+
 extern int xen_swiotlb_init(int verbose, bool early);
 extern const struct dma_map_ops xen_swiotlb_dma_ops;
 
-- 
cgit v1.2.3


From 7396d337cfadc7c0b32dfd46581e9daff6666e84 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 26 Aug 2019 13:16:43 +0300
Subject: KVM: x86: Return to userspace with internal error on unexpected exit
 reason

Receiving an unexpected exit reason from hardware should be considered
as a severe bug in KVM. Therefore, instead of just injecting #UD to
guest and ignore it, exit to userspace on internal error so that
it could handle it properly (probably by terminating guest).

In addition, prefer to use vcpu_unimpl() instead of WARN_ONCE()
as handling unexpected exit reason should be a rare unexpected
event (that was expected to never happen) and we prefer to print
a message on it every time it occurs to guest.

Furthermore, dump VMCS/VMCB to dmesg to assist diagnosing such cases.

Reviewed-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Nikita Leshenko <nikita.leshchenko@oracle.com>
Reviewed-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm.c       | 11 ++++++++---
 arch/x86/kvm/vmx/vmx.c   |  9 +++++++--
 include/uapi/linux/kvm.h |  2 ++
 3 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 890b1bffcf7c..fdeaf8f44949 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4981,9 +4981,14 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
 	    || !svm_exit_handlers[exit_code]) {
-		WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
+		vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
+		dump_vmcb(vcpu);
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+		vcpu->run->internal.ndata = 1;
+		vcpu->run->internal.data[0] = exit_code;
+		return 0;
 	}
 
 	return svm_exit_handlers[exit_code](svm);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 024cb9bf5ad3..d95c2f3c027c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5870,8 +5870,13 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 	else {
 		vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
 				exit_reason);
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
+		dump_vmcs();
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror =
+			KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+		vcpu->run->internal.ndata = 1;
+		vcpu->run->internal.data[0] = exit_reason;
+		return 0;
 	}
 }
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5414b6588fbb..233efbb1c81c 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -243,6 +243,8 @@ struct kvm_hyperv_exit {
 #define KVM_INTERNAL_ERROR_SIMUL_EX	2
 /* Encounter unexpected vm-exit due to delivery event. */
 #define KVM_INTERNAL_ERROR_DELIVERY_EV	3
+/* Encounter unexpected vm-exit reason */
+#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON	4
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
-- 
cgit v1.2.3


From b0c7e73b51dcdc8980a245f89278815ce1837d06 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 6 Sep 2019 10:45:37 +0200
Subject: gpio: of: Make of_gpio_simple_xlate() private

Since commit 9a95e8d25a140ba9 ("gpio: remove etraxfs driver"), there are
no more users of of_gpio_simple_xlate() outside gpiolib-of.c.
All GPIO drivers that need it now rely on of_gpiochip_add() setting it
up as the default translate function.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20190906084539.21838-3-geert+renesas@glider.be
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c |  6 +++---
 include/linux/of_gpio.h   | 11 -----------
 2 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 142033574f70..add7e6f8819e 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -637,8 +637,9 @@ static int of_gpiochip_scan_gpios(struct gpio_chip *chip)
  * GPIO chips. This function performs only one sanity check: whether GPIO
  * is less than ngpios (that is specified in the gpio_chip).
  */
-int of_gpio_simple_xlate(struct gpio_chip *gc,
-			 const struct of_phandle_args *gpiospec, u32 *flags)
+static int of_gpio_simple_xlate(struct gpio_chip *gc,
+				const struct of_phandle_args *gpiospec,
+				u32 *flags)
 {
 	/*
 	 * We're discouraging gpio_cells < 2, since that way you'll have to
@@ -662,7 +663,6 @@ int of_gpio_simple_xlate(struct gpio_chip *gc,
 
 	return gpiospec->args[0];
 }
-EXPORT_SYMBOL(of_gpio_simple_xlate);
 
 /**
  * of_mm_gpiochip_add_data - Add memory mapped GPIO chip (bank)
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index f9737dea9d1f..16967390a3fe 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -61,10 +61,6 @@ static inline int of_mm_gpiochip_add(struct device_node *np,
 }
 extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc);
 
-extern int of_gpio_simple_xlate(struct gpio_chip *gc,
-				const struct of_phandle_args *gpiospec,
-				u32 *flags);
-
 #else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
@@ -77,13 +73,6 @@ static inline int of_get_named_gpio_flags(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
-				       const struct of_phandle_args *gpiospec,
-				       u32 *flags)
-{
-	return -ENOSYS;
-}
-
 #endif /* CONFIG_OF_GPIO */
 
 /**
-- 
cgit v1.2.3


From 4460d68f0b2f9092273531fbc65613e1855c2e07 Mon Sep 17 00:00:00 2001
From: Ben Chuang <ben.chuang@genesyslogic.com.tw>
Date: Tue, 27 Aug 2019 08:33:09 +0800
Subject: PCI: Add Genesys Logic, Inc. Vendor ID

Add the Genesys Logic, Inc. vendor ID to pci_ids.h.

Signed-off-by: Ben Chuang <ben.chuang@genesyslogic.com.tw>
Co-developed-by: Michael K Johnson <johnsonm@danlj.org>
Signed-off-by: Michael K Johnson <johnsonm@danlj.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..87686a3309e1 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2405,6 +2405,8 @@
 #define PCI_DEVICE_ID_RDC_R6061		0x6061
 #define PCI_DEVICE_ID_RDC_D1010		0x1010
 
+#define PCI_VENDOR_ID_GLI		0x17a0
+
 #define PCI_VENDOR_ID_LENOVO		0x17aa
 
 #define PCI_VENDOR_ID_QCOM		0x17cb
-- 
cgit v1.2.3


From 64f658ded48eccd23d429d636d49a03b3f53d741 Mon Sep 17 00:00:00 2001
From: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Date: Wed, 11 Sep 2019 12:08:32 +0100
Subject: devlink: add unknown 'fw_load_policy' value

Similar to the 'reset_dev_on_drv_probe' devlink parameter, it is useful
to have an unknown value which can be used by drivers to report that the
hardware value isn't recognized or is otherwise invalid instead of
failing the operation.

This is especially useful for u8/enum parameters.

Suggested-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/devlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 1da3e83f1fd4..8da5365850cd 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -203,6 +203,7 @@ enum devlink_param_fw_load_policy_value {
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER,
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH,
 	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK,
+	DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_UNKNOWN,
 };
 
 enum devlink_param_reset_dev_on_drv_probe_value {
-- 
cgit v1.2.3


From bd880b00697befb73eff7220ee20bdae4fdd487b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Sun, 8 Sep 2019 12:12:26 +0200
Subject: mmc: core: Add helper function to indicate if SDIO IRQs is enabled

To avoid each host driver supporting SDIO IRQs, from keeping track
internally about if SDIO IRQs has been claimed, let's introduce a common
helper function, sdio_irq_claimed().

The function returns true if SDIO IRQs are claimed, via using the
information about the number of claimed irqs. This is safe, even without
any locks, as long as the helper function is called only from
runtime/system suspend callbacks of the host driver.

Tested-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4a351cb7f20f..cf87c673cbb8 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -493,6 +493,15 @@ void mmc_command_done(struct mmc_host *host, struct mmc_request *mrq);
 
 void mmc_cqe_request_done(struct mmc_host *host, struct mmc_request *mrq);
 
+/*
+ * May be called from host driver's system/runtime suspend/resume callbacks,
+ * to know if SDIO IRQs has been claimed.
+ */
+static inline bool sdio_irq_claimed(struct mmc_host *host)
+{
+	return host->sdio_irqs > 0;
+}
+
 static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 {
 	host->ops->enable_sdio_irq(host, 0);
-- 
cgit v1.2.3


From 2c32dbbb5fc09a26e3a1c39bbf57537c54ed3e91 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Sun, 8 Sep 2019 12:12:31 +0200
Subject: mmc: core: Clarify that the ->ack_sdio_irq() callback is mandatory

For the MMC_CAP2_SDIO_IRQ_NOTHREAD case and when using sdio_signal_irq(),
the ->ack_sdio_irq() is already mandatory, which was not the case for those
host drivers that called sdio_run_irqs() directly.

As there are no longer any drivers calling sdio_run_irqs(), let's clarify
the code by dropping the unnecessary check and explicitly state that the
callback is mandatory in the header file.

Tested-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sdio_irq.c | 3 +--
 include/linux/mmc/host.h    | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 0962a4357d54..d7965b53a6d2 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -115,8 +115,7 @@ static void sdio_run_irqs(struct mmc_host *host)
 	mmc_claim_host(host);
 	if (host->sdio_irqs) {
 		process_sdio_pending_irqs(host);
-		if (host->ops->ack_sdio_irq)
-			host->ops->ack_sdio_irq(host);
+		host->ops->ack_sdio_irq(host);
 	}
 	mmc_release_host(host);
 }
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cf87c673cbb8..4704b77259ee 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -128,6 +128,7 @@ struct mmc_host_ops {
 	int	(*get_cd)(struct mmc_host *host);
 
 	void	(*enable_sdio_irq)(struct mmc_host *host, int enable);
+	/* Mandatory callback when using MMC_CAP2_SDIO_IRQ_NOTHREAD. */
 	void	(*ack_sdio_irq)(struct mmc_host *host);
 
 	/* optional callback for HC quirks */
-- 
cgit v1.2.3


From 9e54ba7c3752f27456ca691acc3f154e2597478c Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Wed, 11 Sep 2019 04:42:50 -0700
Subject: qed*: Fix size of config attribute dump.

Driver currently returns max-buf-size as size of the config attribute.
This patch incorporates changes to read this value from MFW (if available)
and provide it to the user. Also did a trivial clean up in this path.

Fixes: d44a3ced7023 ("qede: Add support for reading the config id attributes.")
Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c      | 26 +++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 14 +++++++------
 include/linux/qed/qed_if.h                      |  8 ++++++++
 3 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index ac1511a834d8..38c0ec3841e0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2300,6 +2300,31 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
 	return rc;
 }
 
+#define QED_MAX_NVM_BUF_LEN	32
+static int qed_nvm_flash_cfg_len(struct qed_dev *cdev, u32 cmd)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 buf[QED_MAX_NVM_BUF_LEN];
+	struct qed_ptt *ptt;
+	u32 len;
+	int rc;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return QED_MAX_NVM_BUF_LEN;
+
+	rc = qed_mcp_nvm_get_cfg(hwfn, ptt, cmd, 0, QED_NVM_CFG_GET_FLAGS, buf,
+				 &len);
+	if (rc || !len) {
+		DP_ERR(cdev, "Error %d reading %d\n", rc, cmd);
+		len = QED_MAX_NVM_BUF_LEN;
+	}
+
+	qed_ptt_release(hwfn, ptt);
+
+	return len;
+}
+
 static int qed_nvm_flash_cfg_read(struct qed_dev *cdev, u8 **data,
 				  u32 cmd, u32 entity_id)
 {
@@ -2657,6 +2682,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.read_module_eeprom = &qed_read_module_eeprom,
 	.get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
 	.read_nvm_cfg = &qed_nvm_flash_cfg_read,
+	.read_nvm_cfg_len = &qed_nvm_flash_cfg_len,
 	.set_grc_config = &qed_set_grc_config,
 };
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index ec27a43230d7..8a426afb6a55 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -49,7 +49,6 @@
 
 #define QEDE_SELFTEST_POLL_COUNT 100
 #define QEDE_DUMP_VERSION	0x1
-#define QEDE_DUMP_NVM_BUF_LEN	32
 #define QEDE_DUMP_NVM_ARG_COUNT	2
 
 static const struct {
@@ -2026,7 +2025,8 @@ static int qede_get_dump_flag(struct net_device *dev,
 	switch (edev->dump_info.cmd) {
 	case QEDE_DUMP_CMD_NVM_CFG:
 		dump->flag = QEDE_DUMP_CMD_NVM_CFG;
-		dump->len = QEDE_DUMP_NVM_BUF_LEN;
+		dump->len = edev->ops->common->read_nvm_cfg_len(edev->cdev,
+						edev->dump_info.args[0]);
 		break;
 	case QEDE_DUMP_CMD_GRCDUMP:
 		dump->flag = QEDE_DUMP_CMD_GRCDUMP;
@@ -2051,9 +2051,8 @@ static int qede_get_dump_data(struct net_device *dev,
 
 	if (!edev->ops || !edev->ops->common) {
 		DP_ERR(edev, "Edev ops not populated\n");
-		edev->dump_info.cmd = QEDE_DUMP_CMD_NONE;
-		edev->dump_info.num_args = 0;
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err;
 	}
 
 	switch (edev->dump_info.cmd) {
@@ -2062,7 +2061,8 @@ static int qede_get_dump_data(struct net_device *dev,
 			DP_ERR(edev, "Arg count = %d required = %d\n",
 			       edev->dump_info.num_args,
 			       QEDE_DUMP_NVM_ARG_COUNT);
-			return -EINVAL;
+			rc = -EINVAL;
+			goto err;
 		}
 		rc =  edev->ops->common->read_nvm_cfg(edev->cdev, (u8 **)&buf,
 						      edev->dump_info.args[0],
@@ -2078,8 +2078,10 @@ static int qede_get_dump_data(struct net_device *dev,
 		break;
 	}
 
+err:
 	edev->dump_info.cmd = QEDE_DUMP_CMD_NONE;
 	edev->dump_info.num_args = 0;
+	memset(edev->dump_info.args, 0, sizeof(edev->dump_info.args));
 
 	return rc;
 }
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index e35463860c84..b5db1ee96d78 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1143,6 +1143,14 @@ struct qed_common_ops {
  */
 	int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd,
 			    u32 entity_id);
+/**
+ * @brief read_nvm_cfg - Read NVM config attribute value.
+ * @param cdev
+ * @param cmd - NVM CFG command id
+ *
+ * @return config id length, 0 on error.
+ */
+	int (*read_nvm_cfg_len)(struct qed_dev *cdev, u32 cmd);
 
 /**
  * @brief set_grc_config - Configure value for grc config id.
-- 
cgit v1.2.3


From 0060c8783330ab60deb96f9d6bb7abfe4664765d Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Fri, 6 Sep 2019 16:02:55 +0300
Subject: net: stmmac: implement support for passive mode converters via dt

In-between the MAC & PHY there can be a mode converter, which converts one
mode to another (e.g. GMII-to-RGMII).

The converter, can be passive (i.e. no driver or OS/SW information
required), so the MAC & PHY need to be configured differently.

For the `stmmac` driver, this is implemented via a `mac-mode` property in
the device-tree, which configures the MAC into a certain mode, and for the
PHY a `phy_interface` field will hold the mode of the PHY. The mode of the
PHY will be passed to the PHY and from there-on it work in a different
mode. If unspecified, the default `phy-mode` will be used for both.

Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  2 +-
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  | 34 +++++++++++++++++++++-
 include/linux/stmmac.h                             |  1 +
 3 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 6e44013b20cc..c61d702fe83a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1036,7 +1036,7 @@ static int stmmac_init_phy(struct net_device *dev)
 static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
 	struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node);
-	int mode = priv->plat->interface;
+	int mode = priv->plat->phy_interface;
 	struct phylink *phylink;
 
 	priv->phylink_config.dev = &priv->dev->dev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 5de754a9fae9..170c3a052b14 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -358,6 +358,32 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 	return 0;
 }
 
+/**
+ * stmmac_of_get_mac_mode - retrieves the interface of the MAC
+ * @np - device-tree node
+ * Description:
+ * Similar to `of_get_phy_mode()`, this function will retrieve (from
+ * the device-tree) the interface mode on the MAC side. This assumes
+ * that there is mode converter in-between the MAC & PHY
+ * (e.g. GMII-to-RGMII).
+ */
+static int stmmac_of_get_mac_mode(struct device_node *np)
+{
+	const char *pm;
+	int err, i;
+
+	err = of_property_read_string(np, "mac-mode", &pm);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) {
+		if (!strcasecmp(pm, phy_modes(i)))
+			return i;
+	}
+
+	return -ENODEV;
+}
+
 /**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
@@ -386,7 +412,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		*mac = NULL;
 	}
 
-	plat->interface = of_get_phy_mode(np);
+	plat->phy_interface = of_get_phy_mode(np);
+	if (plat->phy_interface < 0)
+		return ERR_PTR(plat->phy_interface);
+
+	plat->interface = stmmac_of_get_mac_mode(np);
+	if (plat->interface < 0)
+		plat->interface = plat->phy_interface;
 
 	/* Some wrapper drivers still rely on phy_node. Let's save it while
 	 * they are not converted to phylink. */
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 7ad7ae35cf88..dc60d03c4b60 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -131,6 +131,7 @@ struct plat_stmmacenet_data {
 	int bus_id;
 	int phy_addr;
 	int interface;
+	int phy_interface;
 	struct stmmac_mdio_bus_data *mdio_bus_data;
 	struct device_node *phy_node;
 	struct device_node *phylink_node;
-- 
cgit v1.2.3


From cb5d8c45ab6c3daf8269e550cfb2d5018a876fe3 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Sun, 8 Sep 2019 00:45:21 +0100
Subject: cpuidle: allow governor switch on cpuidle_register_driver()

The recently introduced haltpoll driver is largely only useful with
haltpoll governor. To allow drivers to associate with a particular idle
behaviour, add a @governor property to 'struct cpuidle_driver' and thus
allow a cpuidle driver to switch to a *preferred* governor on idle driver
registration. We save the previous governor, and when an idle driver is
unregistered we switch back to that.

The @governor can be overridden by cpuidle.governor= boot param or
alternatively be ignored if the governor doesn't exist.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.h  |  2 ++
 drivers/cpuidle/driver.c   | 25 +++++++++++++++++++++++++
 drivers/cpuidle/governor.c |  7 ++++---
 include/linux/cpuidle.h    |  3 +++
 4 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
index d6613101af92..9f336af17fa6 100644
--- a/drivers/cpuidle/cpuidle.h
+++ b/drivers/cpuidle/cpuidle.h
@@ -9,6 +9,7 @@
 /* For internal use only */
 extern char param_governor[];
 extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct cpuidle_governor *cpuidle_prev_governor;
 extern struct list_head cpuidle_governors;
 extern struct list_head cpuidle_detected_devices;
 extern struct mutex cpuidle_lock;
@@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void);
 extern void cpuidle_uninstall_idle_handler(void);
 
 /* governors */
+extern struct cpuidle_governor *cpuidle_find_governor(const char *str);
 extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
 
 /* sysfs */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index dc32f34e68d9..80c1a830d991 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
  */
 int cpuidle_register_driver(struct cpuidle_driver *drv)
 {
+	struct cpuidle_governor *gov;
 	int ret;
 
 	spin_lock(&cpuidle_driver_lock);
 	ret = __cpuidle_register_driver(drv);
 	spin_unlock(&cpuidle_driver_lock);
 
+	if (!ret && !strlen(param_governor) && drv->governor &&
+	    (cpuidle_get_driver() == drv)) {
+		mutex_lock(&cpuidle_lock);
+		gov = cpuidle_find_governor(drv->governor);
+		if (gov) {
+			cpuidle_prev_governor = cpuidle_curr_governor;
+			if (cpuidle_switch_governor(gov) < 0)
+				cpuidle_prev_governor = NULL;
+		}
+		mutex_unlock(&cpuidle_lock);
+	}
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpuidle_register_driver);
@@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
  */
 void cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
+	bool enabled = (cpuidle_get_driver() == drv);
+
 	spin_lock(&cpuidle_driver_lock);
 	__cpuidle_unregister_driver(drv);
 	spin_unlock(&cpuidle_driver_lock);
+
+	if (!enabled)
+		return;
+
+	mutex_lock(&cpuidle_lock);
+	if (cpuidle_prev_governor) {
+		if (!cpuidle_switch_governor(cpuidle_prev_governor))
+			cpuidle_prev_governor = NULL;
+	}
+	mutex_unlock(&cpuidle_lock);
 }
 EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
 
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
index 2e3e14192bee..e9801f26c732 100644
--- a/drivers/cpuidle/governor.c
+++ b/drivers/cpuidle/governor.c
@@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN];
 
 LIST_HEAD(cpuidle_governors);
 struct cpuidle_governor *cpuidle_curr_governor;
+struct cpuidle_governor *cpuidle_prev_governor;
 
 /**
- * __cpuidle_find_governor - finds a governor of the specified name
+ * cpuidle_find_governor - finds a governor of the specified name
  * @str: the name
  *
  * Must be called with cpuidle_lock acquired.
  */
-static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+struct cpuidle_governor *cpuidle_find_governor(const char *str)
 {
 	struct cpuidle_governor *gov;
 
@@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
 		return -ENODEV;
 
 	mutex_lock(&cpuidle_lock);
-	if (__cpuidle_find_governor(gov->name) == NULL) {
+	if (cpuidle_find_governor(gov->name) == NULL) {
 		ret = 0;
 		list_add_tail(&gov->governor_list, &cpuidle_governors);
 		if (!cpuidle_curr_governor ||
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1a9f54eb3aa1..2dc4c6b19c25 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -121,6 +121,9 @@ struct cpuidle_driver {
 
 	/* the driver handles the cpus in cpumask */
 	struct cpumask		*cpumask;
+
+	/* preferred governor to switch at register time */
+	const char		*governor;
 };
 
 #ifdef CONFIG_CPU_IDLE
-- 
cgit v1.2.3


From 069e1c07c18ac2ccecdfb5ac287a37d6fb2d7a00 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Wed, 11 Sep 2019 13:26:46 +0100
Subject: module: Fix link failure due to invalid relocation on namespace
 offset

Commit 8651ec01daed ("module: add support for symbol namespaces.")
broke linking for arm64 defconfig:

  | lib/crypto/arc4.o: In function `__ksymtab_arc4_setkey':
  | arc4.c:(___ksymtab+arc4_setkey+0x8): undefined reference to `no symbol'
  | lib/crypto/arc4.o: In function `__ksymtab_arc4_crypt':
  | arc4.c:(___ksymtab+arc4_crypt+0x8): undefined reference to `no symbol'

This is because the dummy initialisation of the 'namespace_offset' field
in 'struct kernel_symbol' when using EXPORT_SYMBOL on architectures with
support for PREL32 locations uses an offset from an absolute address (0)
in an effort to trick 'offset_to_pointer' into behaving as a NOP,
allowing non-namespaced symbols to be treated in the same way as those
belonging to a namespace.

Unfortunately, place-relative relocations require a symbol reference
rather than an absolute value and, although x86 appears to get away with
this due to placing the kernel text at the top of the address space, it
almost certainly results in a runtime failure if the kernel is relocated
dynamically as a result of KASLR.

Rework 'namespace_offset' so that a value of 0, which cannot occur for a
valid namespaced symbol, indicates that the corresponding symbol does
not belong to a namespace.

Cc: Matthias Maennich <maennich@google.com>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Fixes: 8651ec01daed ("module: add support for symbol namespaces.")
Reported-by: kbuild test robot <lkp@intel.com>
Tested-by: Matthias Maennich <maennich@google.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Matthias Maennich <maennich@google.com>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/asm-generic/export.h | 2 +-
 include/linux/export.h       | 2 +-
 kernel/module.c              | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index e2b5d0f569d3..d0912c7ac2fc 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -17,7 +17,7 @@
 
 .macro __put, val, name
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-	.long	\val - ., \name - ., 0 - .
+	.long	\val - ., \name - ., 0
 #elif defined(CONFIG_64BIT)
 	.quad	\val, \name, 0
 #else
diff --git a/include/linux/export.h b/include/linux/export.h
index 2c5468d8ea9a..ef5d015d754a 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -68,7 +68,7 @@ extern struct module __this_module;
 	    "__ksymtab_" #sym ":				\n"	\
 	    "	.long	" #sym "- .				\n"	\
 	    "	.long	__kstrtab_" #sym "- .			\n"	\
-	    "	.long	0 - .					\n"	\
+	    "	.long	0					\n"	\
 	    "	.previous					\n")
 
 struct kernel_symbol {
diff --git a/kernel/module.c b/kernel/module.c
index f76efcf2043e..7ab244c4e1ba 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -547,6 +547,8 @@ static const char *kernel_symbol_name(const struct kernel_symbol *sym)
 static const char *kernel_symbol_namespace(const struct kernel_symbol *sym)
 {
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+	if (!sym->namespace_offset)
+		return NULL;
 	return offset_to_ptr(&sym->namespace_offset);
 #else
 	return sym->namespace;
-- 
cgit v1.2.3


From c34a024e4ee087b6d9e534160990082c3d9e7103 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 10 Sep 2019 16:15:29 +0200
Subject: gpio: htc-egpio: Remove unused exported htc_egpio_get_wakeup_irq()

This function was never used upstream, and is a relic of the original
handhelds.org code the htc-egpio driver was based on.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20190910141529.21030-1-geert+renesas@glider.be
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-htc-egpio.c                | 14 --------------
 include/linux/platform_data/gpio-htc-egpio.h |  3 ---
 2 files changed, 17 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-htc-egpio.c b/drivers/gpio/gpio-htc-egpio.c
index 9d3ac51a765c..6eb56f7ab9c9 100644
--- a/drivers/gpio/gpio-htc-egpio.c
+++ b/drivers/gpio/gpio-htc-egpio.c
@@ -118,20 +118,6 @@ static void egpio_handler(struct irq_desc *desc)
 	}
 }
 
-int htc_egpio_get_wakeup_irq(struct device *dev)
-{
-	struct egpio_info *ei = dev_get_drvdata(dev);
-
-	/* Read current pins. */
-	u16 readval = egpio_readw(ei, ei->ack_register);
-	/* Ack/unmask interrupts. */
-	ack_irqs(ei);
-	/* Return first set pin. */
-	readval &= ei->irqs_enabled;
-	return ei->irq_start + ffs(readval) - 1;
-}
-EXPORT_SYMBOL(htc_egpio_get_wakeup_irq);
-
 static inline int egpio_pos(struct egpio_info *ei, int bit)
 {
 	return bit >> ei->reg_shift;
diff --git a/include/linux/platform_data/gpio-htc-egpio.h b/include/linux/platform_data/gpio-htc-egpio.h
index 9a3e78082883..eaefba0b6465 100644
--- a/include/linux/platform_data/gpio-htc-egpio.h
+++ b/include/linux/platform_data/gpio-htc-egpio.h
@@ -50,7 +50,4 @@ struct htc_egpio_platform_data {
 	int                   num_chips;
 };
 
-/* Determine the wakeup irq, to be called during early resume */
-extern int htc_egpio_get_wakeup_irq(struct device *dev);
-
 #endif
-- 
cgit v1.2.3


From 6565c182094f69e4ffdece337d395eb7ec760efc Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 11 Sep 2019 17:36:50 +0800
Subject: quota: fix wrong condition in is_quota_modification()

Quoted from
commit 3da40c7b0898 ("ext4: only call ext4_truncate when size <= isize")

" At LSF we decided that if we truncate up from isize we shouldn't trim
  fallocated blocks that were fallocated with KEEP_SIZE and are past the
 new i_size.  This patch fixes ext4 to do this. "

And generic/092 of fstest have covered this case for long time, however
is_quota_modification() didn't adjust based on that rule, so that in
below condition, we will lose to quota block change:
- fallocate blocks beyond EOF
- remount
- truncate(file_path, file_size)

Fix it.

Link: https://lore.kernel.org/r/20190911093650.35329-1-yuchao0@huawei.com
Fixes: 3da40c7b0898 ("ext4: only call ext4_truncate when size <= isize")
CC: stable@vger.kernel.org
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/quotaops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index dc905a4ff8d7..185d94829701 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -22,7 +22,7 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 /* i_mutex must being held */
 static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
 {
-	return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) ||
+	return (ia->ia_valid & ATTR_SIZE) ||
 		(ia->ia_valid & ATTR_UID && !uid_eq(ia->ia_uid, inode->i_uid)) ||
 		(ia->ia_valid & ATTR_GID && !gid_eq(ia->ia_gid, inode->i_gid));
 }
-- 
cgit v1.2.3


From 427b00342c5a3ebcf31fac2ce3b21fb993952816 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Thu, 12 Sep 2019 13:13:55 +0900
Subject: mmc: queue: Fix bigger segments usage

The commit 38c38cb73223 ("mmc: queue: use bigger segments if DMA MAP
layer can merge the segments") always enables the bugger segments
if DMA MAP layer can merge the segments, but some controllers (SDHCI)
have strictly limitation about the segments size, and then the commit
breaks on the controllers.

To fix the issue, this patch adds a new flag MMC_CAP2_MERGE_CAPABLE
into the struct mmc_host and the bigger segments usage is disabled
as default.

Reported-by: Thierry Reding <treding@nvidia.com>
Fixes: 38c38cb73223 ("mmc: queue: use bigger segments if DMA MAP layer can merge the segments")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/mmc/core/queue.c | 8 +++++++-
 include/linux/mmc/host.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c
index 1e29b305767e..9edc08685e86 100644
--- a/drivers/mmc/core/queue.c
+++ b/drivers/mmc/core/queue.c
@@ -399,6 +399,11 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
 	init_waitqueue_head(&mq->wait);
 }
 
+static inline bool mmc_merge_capable(struct mmc_host *host)
+{
+	return host->caps2 & MMC_CAP2_MERGE_CAPABLE;
+}
+
 /* Set queue depth to get a reasonable value for q->nr_requests */
 #define MMC_QUEUE_DEPTH 64
 
@@ -441,7 +446,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card)
 	 * the host->can_dma_map_merge should be set before to get max_segs
 	 * from mmc_get_max_segments().
 	 */
-	if (host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
+	if (mmc_merge_capable(host) &&
+	    host->max_segs < MMC_DMA_MAP_MERGE_SEGMENTS &&
 	    dma_get_merge_boundary(mmc_dev(host)))
 		host->can_dma_map_merge = 1;
 	else
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c5662b3e64db..3becb28210f3 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -367,6 +367,7 @@ struct mmc_host {
 #define MMC_CAP2_CQE		(1 << 23)	/* Has eMMC command queue engine */
 #define MMC_CAP2_CQE_DCMD	(1 << 24)	/* CQE can issue a direct command */
 #define MMC_CAP2_AVOID_3_3V	(1 << 25)	/* Host must negotiate down from 3.3V */
+#define MMC_CAP2_MERGE_CAPABLE	(1 << 26)	/* Host can merge a segment over the segment size */
 
 	int			fixed_drv_type;	/* fixed driver type for non-removable media */
 
-- 
cgit v1.2.3


From c4bb667eaf520f21b3a3db0489682becc9c49bcc Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Fri, 2 Aug 2019 18:15:19 +0100
Subject: fuse: reserve values for mapping protocol

SETUPMAPPING is a command for use with 'virtiofsd', a fuse-over-virtio
implementation; it may find use in other fuse impelementations as well in
which the kernel does not have access to the address space of the daemon
directly.

A SETUPMAPPING operation causes a section of a file to be mapped into a
memory window visible to the kernel.  The offsets in the file and the
window are defined by the kernel performing the operation.

The daemon may reject the request, for reasons including permissions and
limited resources.

When a request perfectly overlaps a previous mapping, the previous mapping
is replaced.  When a mapping partially overlaps a previous mapping, the
previous mapping is split into one or two smaller mappings.

REMOVEMAPPING is the complement to SETUPMAPPING; it unmaps a range of
mapped files from the window visible to the kernel.

The map_alignment field communicates the alignment constraint for
FUSE_SETUPMAPPING/FUSE_REMOVEMAPPING and allows the daemon to constrain the
addresses and file offsets chosen by the kernel.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index df2e12fb3381..802b0377a49e 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -133,6 +133,8 @@
  *
  *  7.31
  *  - add FUSE_WRITE_KILL_PRIV flag
+ *  - add FUSE_SETUPMAPPING and FUSE_REMOVEMAPPING
+ *  - add map_alignment to fuse_init_out, add FUSE_MAP_ALIGNMENT flag
  */
 
 #ifndef _LINUX_FUSE_H
@@ -274,6 +276,7 @@ struct fuse_file_lock {
  * FUSE_CACHE_SYMLINKS: cache READLINK responses
  * FUSE_NO_OPENDIR_SUPPORT: kernel supports zero-message opendir
  * FUSE_EXPLICIT_INVAL_DATA: only invalidate cached pages on explicit request
+ * FUSE_MAP_ALIGNMENT: map_alignment field is valid
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -301,6 +304,7 @@ struct fuse_file_lock {
 #define FUSE_CACHE_SYMLINKS	(1 << 23)
 #define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
 #define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
+#define FUSE_MAP_ALIGNMENT	(1 << 26)
 
 /**
  * CUSE INIT request/reply flags
@@ -422,6 +426,8 @@ enum fuse_opcode {
 	FUSE_RENAME2		= 45,
 	FUSE_LSEEK		= 46,
 	FUSE_COPY_FILE_RANGE	= 47,
+	FUSE_SETUPMAPPING	= 48,
+	FUSE_REMOVEMAPPING	= 49,
 
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
@@ -656,7 +662,7 @@ struct fuse_init_out {
 	uint32_t	max_write;
 	uint32_t	time_gran;
 	uint16_t	max_pages;
-	uint16_t	padding;
+	uint16_t	map_alignment;
 	uint32_t	unused[8];
 };
 
-- 
cgit v1.2.3


From 501ae8ecae2ba5122774dee4445003505a7fd01b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 4 Sep 2019 08:36:33 -0400
Subject: fuse: reserve byteswapped init opcodes

virtio fs tunnels fuse over a virtio channel.  One issue is two sides might
be speaking different endian-ness. To detects this, host side looks at the
opcode value in the FUSE_INIT command.  Works fine at the moment but might
fail if a future version of fuse will use such an opcode for
initialization.  Let's reserve this opcode so we remember and don't do
this.

Same for CUSE_INIT.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 include/uapi/linux/fuse.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 2971d29a42e4..df2e12fb3381 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -425,6 +425,10 @@ enum fuse_opcode {
 
 	/* CUSE specific operations */
 	CUSE_INIT		= 4096,
+
+	/* Reserved opcodes: helpful to detect structure endian-ness */
+	CUSE_INIT_BSWAP_RESERVED	= 1048576,	/* CUSE_INIT << 8 */
+	FUSE_INIT_BSWAP_RESERVED	= 436207616,	/* FUSE_INIT << 24 */
 };
 
 enum fuse_notify_code {
-- 
cgit v1.2.3


From 2e6fcfeb9df6048a63fe0d5f7dfa39274bacbb71 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 12 Sep 2019 00:00:18 +0100
Subject: module: Remove leftover '#undef' from export header

Commit 7290d5809571 ("module: use relative references for __ksymtab
entries") converted the '__put' #define into an assembly macro in
asm-generic/export.h but forgot to remove the corresponding '#undef'.

Remove the leftover '#undef'.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
---
 include/asm-generic/export.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index d0912c7ac2fc..fa577978fbbd 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -55,7 +55,6 @@ __kcrctab_\name:
 #endif
 #endif
 .endm
-#undef __put
 
 #if defined(CONFIG_TRIM_UNUSED_KSYMS)
 
-- 
cgit v1.2.3


From 96a0a80738461d6d2421ae64ee9990b702efd2a6 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Mon, 2 Sep 2019 18:08:48 +0200
Subject: platform/chrome: cros_ec_chardev: Add a poll handler to receive MKBP
 events

Allow to poll on the cros_ec device to receive the MKBP events.

The /dev/cros_[ec|fp|..] file operations now implements the poll
operation. The userspace can now receive specific MKBP events by doing
the following:

- Open the /dev/cros_XX file.
- Call the CROS_EC_DEV_IOCEVENTMASK ioctl with the bitmap of the MKBP
  events it wishes to receive as argument.
- Poll on the file descriptor.
- When it gets POLLIN, do a read on the file descriptor, the first
  queued event will be returned (using the struct
  ec_response_get_next_event format: one byte of event type, then
  the payload).

The read() operation returns at most one event even if there are several
queued, and it might be truncated if the buffer is smaller than the
event (but the caller should know the maximum size of the events it is
reading).

read() used to return the EC version string, it still does it when no
event mask or an empty event is set for backward compatibility (despite
nobody really using this feature).

This will be used, for example, by the userspace daemon to receive and
treat the EC_MKBP_EVENT_FINGERPRINT sent by the FP MCU.

Signed-off-by: Vincent Palatin <vpalatin@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Gwendal Grignou <gwendal@chromium.org>
---
 drivers/platform/chrome/cros_ec_chardev.c     | 177 +++++++++++++++++++++++++-
 include/linux/platform_data/cros_ec_chardev.h |   1 +
 2 files changed, 173 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/chrome/cros_ec_chardev.c b/drivers/platform/chrome/cros_ec_chardev.c
index 08abd7e5c7bf..74ded441bb50 100644
--- a/drivers/platform/chrome/cros_ec_chardev.c
+++ b/drivers/platform/chrome/cros_ec_chardev.c
@@ -16,21 +16,42 @@
 #include <linux/mfd/cros_ec.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
+#include <linux/notifier.h>
 #include <linux/platform_data/cros_ec_chardev.h>
 #include <linux/platform_data/cros_ec_commands.h>
 #include <linux/platform_data/cros_ec_proto.h>
 #include <linux/platform_device.h>
+#include <linux/poll.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/uaccess.h>
 
 #define DRV_NAME		"cros-ec-chardev"
 
+/* Arbitrary bounded size for the event queue */
+#define CROS_MAX_EVENT_LEN	PAGE_SIZE
+
 struct chardev_data {
 	struct cros_ec_dev *ec_dev;
 	struct miscdevice misc;
 };
 
+struct chardev_priv {
+	struct cros_ec_dev *ec_dev;
+	struct notifier_block notifier;
+	wait_queue_head_t wait_event;
+	unsigned long event_mask;
+	struct list_head events;
+	size_t event_len;
+};
+
+struct ec_event {
+	struct list_head node;
+	size_t size;
+	u8 event_type;
+	u8 data[0];
+};
+
 static int ec_get_version(struct cros_ec_dev *ec, char *str, int maxlen)
 {
 	static const char * const current_image_name[] = {
@@ -69,6 +90,71 @@ exit:
 	return ret;
 }
 
+static int cros_ec_chardev_mkbp_event(struct notifier_block *nb,
+				      unsigned long queued_during_suspend,
+				      void *_notify)
+{
+	struct chardev_priv *priv = container_of(nb, struct chardev_priv,
+						 notifier);
+	struct cros_ec_device *ec_dev = priv->ec_dev->ec_dev;
+	struct ec_event *event;
+	unsigned long event_bit = 1 << ec_dev->event_data.event_type;
+	int total_size = sizeof(*event) + ec_dev->event_size;
+
+	if (!(event_bit & priv->event_mask) ||
+	    (priv->event_len + total_size) > CROS_MAX_EVENT_LEN)
+		return NOTIFY_DONE;
+
+	event = kzalloc(total_size, GFP_KERNEL);
+	if (!event)
+		return NOTIFY_DONE;
+
+	event->size = ec_dev->event_size;
+	event->event_type = ec_dev->event_data.event_type;
+	memcpy(event->data, &ec_dev->event_data.data, ec_dev->event_size);
+
+	spin_lock(&priv->wait_event.lock);
+	list_add_tail(&event->node, &priv->events);
+	priv->event_len += total_size;
+	wake_up_locked(&priv->wait_event);
+	spin_unlock(&priv->wait_event.lock);
+
+	return NOTIFY_OK;
+}
+
+static struct ec_event *cros_ec_chardev_fetch_event(struct chardev_priv *priv,
+						    bool fetch, bool block)
+{
+	struct ec_event *event;
+	int err;
+
+	spin_lock(&priv->wait_event.lock);
+	if (!block && list_empty(&priv->events)) {
+		event = ERR_PTR(-EWOULDBLOCK);
+		goto out;
+	}
+
+	if (!fetch) {
+		event = NULL;
+		goto out;
+	}
+
+	err = wait_event_interruptible_locked(priv->wait_event,
+					      !list_empty(&priv->events));
+	if (err) {
+		event = ERR_PTR(err);
+		goto out;
+	}
+
+	event = list_first_entry(&priv->events, struct ec_event, node);
+	list_del(&event->node);
+	priv->event_len -= sizeof(*event) + event->size;
+
+out:
+	spin_unlock(&priv->wait_event.lock);
+	return event;
+}
+
 /*
  * Device file ops
  */
@@ -76,11 +162,40 @@ static int cros_ec_chardev_open(struct inode *inode, struct file *filp)
 {
 	struct miscdevice *mdev = filp->private_data;
 	struct cros_ec_dev *ec_dev = dev_get_drvdata(mdev->parent);
+	struct chardev_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
 
-	filp->private_data = ec_dev;
+	priv->ec_dev = ec_dev;
+	filp->private_data = priv;
+	INIT_LIST_HEAD(&priv->events);
+	init_waitqueue_head(&priv->wait_event);
 	nonseekable_open(inode, filp);
 
-	return 0;
+	priv->notifier.notifier_call = cros_ec_chardev_mkbp_event;
+	ret = blocking_notifier_chain_register(&ec_dev->ec_dev->event_notifier,
+					       &priv->notifier);
+	if (ret) {
+		dev_err(ec_dev->dev, "failed to register event notifier\n");
+		kfree(priv);
+	}
+
+	return ret;
+}
+
+static __poll_t cros_ec_chardev_poll(struct file *filp, poll_table *wait)
+{
+	struct chardev_priv *priv = filp->private_data;
+
+	poll_wait(filp, &priv->wait_event, wait);
+
+	if (list_empty(&priv->events))
+		return 0;
+
+	return EPOLLIN | EPOLLRDNORM;
 }
 
 static ssize_t cros_ec_chardev_read(struct file *filp, char __user *buffer,
@@ -88,14 +203,42 @@ static ssize_t cros_ec_chardev_read(struct file *filp, char __user *buffer,
 {
 	char msg[sizeof(struct ec_response_get_version) +
 		 sizeof(CROS_EC_DEV_VERSION)];
-	struct cros_ec_dev *ec = filp->private_data;
+	struct chardev_priv *priv = filp->private_data;
+	struct cros_ec_dev *ec_dev = priv->ec_dev;
 	size_t count;
 	int ret;
 
+	if (priv->event_mask) { /* queued MKBP event */
+		struct ec_event *event;
+
+		event = cros_ec_chardev_fetch_event(priv, length != 0,
+						!(filp->f_flags & O_NONBLOCK));
+		if (IS_ERR(event))
+			return PTR_ERR(event);
+		/*
+		 * length == 0 is special - no IO is done but we check
+		 * for error conditions.
+		 */
+		if (length == 0)
+			return 0;
+
+		/* The event is 1 byte of type plus the payload */
+		count = min(length, event->size + 1);
+		ret = copy_to_user(buffer, &event->event_type, count);
+		kfree(event);
+		if (ret) /* the copy failed */
+			return -EFAULT;
+		*offset = count;
+		return count;
+	}
+
+	/*
+	 * Legacy behavior if no event mask is defined
+	 */
 	if (*offset != 0)
 		return 0;
 
-	ret = ec_get_version(ec, msg, sizeof(msg));
+	ret = ec_get_version(ec_dev, msg, sizeof(msg));
 	if (ret)
 		return ret;
 
@@ -108,6 +251,24 @@ static ssize_t cros_ec_chardev_read(struct file *filp, char __user *buffer,
 	return count;
 }
 
+static int cros_ec_chardev_release(struct inode *inode, struct file *filp)
+{
+	struct chardev_priv *priv = filp->private_data;
+	struct cros_ec_dev *ec_dev = priv->ec_dev;
+	struct ec_event *event, *e;
+
+	blocking_notifier_chain_unregister(&ec_dev->ec_dev->event_notifier,
+					   &priv->notifier);
+
+	list_for_each_entry_safe(event, e, &priv->events, node) {
+		list_del(&event->node);
+		kfree(event);
+	}
+	kfree(priv);
+
+	return 0;
+}
+
 /*
  * Ioctls
  */
@@ -181,7 +342,8 @@ static long cros_ec_chardev_ioctl_readmem(struct cros_ec_dev *ec,
 static long cros_ec_chardev_ioctl(struct file *filp, unsigned int cmd,
 				   unsigned long arg)
 {
-	struct cros_ec_dev *ec = filp->private_data;
+	struct chardev_priv *priv = filp->private_data;
+	struct cros_ec_dev *ec = priv->ec_dev;
 
 	if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC)
 		return -ENOTTY;
@@ -191,6 +353,9 @@ static long cros_ec_chardev_ioctl(struct file *filp, unsigned int cmd,
 		return cros_ec_chardev_ioctl_xcmd(ec, (void __user *)arg);
 	case CROS_EC_DEV_IOCRDMEM:
 		return cros_ec_chardev_ioctl_readmem(ec, (void __user *)arg);
+	case CROS_EC_DEV_IOCEVENTMASK:
+		priv->event_mask = arg;
+		return 0;
 	}
 
 	return -ENOTTY;
@@ -198,7 +363,9 @@ static long cros_ec_chardev_ioctl(struct file *filp, unsigned int cmd,
 
 static const struct file_operations chardev_fops = {
 	.open		= cros_ec_chardev_open,
+	.poll		= cros_ec_chardev_poll,
 	.read		= cros_ec_chardev_read,
+	.release	= cros_ec_chardev_release,
 	.unlocked_ioctl	= cros_ec_chardev_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= cros_ec_chardev_ioctl,
diff --git a/include/linux/platform_data/cros_ec_chardev.h b/include/linux/platform_data/cros_ec_chardev.h
index 973b2615aa02..7de8faaf77df 100644
--- a/include/linux/platform_data/cros_ec_chardev.h
+++ b/include/linux/platform_data/cros_ec_chardev.h
@@ -33,5 +33,6 @@ struct cros_ec_readmem {
 #define CROS_EC_DEV_IOC       0xEC
 #define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
 #define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
+#define CROS_EC_DEV_IOCEVENTMASK _IO(CROS_EC_DEV_IOC, 2)
 
 #endif /* _CROS_EC_DEV_H_ */
-- 
cgit v1.2.3


From f32356261d44d580649a7abce1156d15d49cf20f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 25 Mar 2019 16:38:31 +0000
Subject: vfs: Convert ramfs, shmem, tmpfs, devtmpfs, rootfs to use the new
 mount API

Convert the ramfs, shmem, tmpfs, devtmpfs and rootfs filesystems to the new
internal mount API as the old one will be obsoleted and removed.  This
allows greater flexibility in communication of mount parameters between
userspace, the VFS and the filesystem.

See Documentation/filesystems/mount_api.txt for more information.

Note that tmpfs is slightly tricky as it can contain embedded commas, so it
can't be trivially split up using strsep() to break on commas in
generic_parse_monolithic().  Instead, tmpfs has to supply its own generic
parser.

However, if tmpfs changes, then devtmpfs and rootfs, which are wrappers
around tmpfs or ramfs, must change too - and thus so must ramfs, so these
had to be converted also.

[AV: rewritten]

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Hugh Dickins <hughd@google.com>
cc: linux-mm@kvack.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/base/devtmpfs.c  |  16 ++--
 fs/ramfs/inode.c         |  99 ++++++++++++++-----------
 include/linux/ramfs.h    |   4 +-
 include/linux/shmem_fs.h |   4 +-
 init/do_mounts.c         |   9 +--
 mm/shmem.c               | 187 +++++++++++++++++++++++++++--------------------
 6 files changed, 180 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index dabf5006a254..30d0523014e0 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -67,19 +67,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla
 	return dget(s->s_root);
 }
 
-static struct dentry *dev_mount(struct file_system_type *fs_type, int flags,
-		      const char *dev_name, void *data)
-{
+static struct file_system_type internal_fs_type = {
+	.name = "devtmpfs",
 #ifdef CONFIG_TMPFS
-	return shmem_mount(fs_type, flags, dev_name, data);
+	.init_fs_context = shmem_init_fs_context,
+	.parameters	= &shmem_fs_parameters,
 #else
-	return ramfs_mount(fs_type, flags, dev_name, data);
+	.init_fs_context = ramfs_init_fs_context,
+	.parameters	= &ramfs_fs_parameters,
 #endif
-}
-
-static struct file_system_type internal_fs_type = {
-	.name = "devtmpfs",
-	.mount = dev_mount,
 	.kill_sb = kill_litter_super,
 };
 
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b85d1e77e934..d82636e8eb65 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -36,6 +36,8 @@
 #include <linux/magic.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
 #include "internal.h"
 
 struct ramfs_mount_opts {
@@ -175,62 +177,52 @@ static const struct super_operations ramfs_ops = {
 	.show_options	= ramfs_show_options,
 };
 
-enum {
+enum ramfs_param {
 	Opt_mode,
-	Opt_err
 };
 
-static const match_table_t tokens = {
-	{Opt_mode, "mode=%o"},
-	{Opt_err, NULL}
+static const struct fs_parameter_spec ramfs_param_specs[] = {
+	fsparam_u32oct("mode",	Opt_mode),
+	{}
 };
 
-static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
+const struct fs_parameter_description ramfs_fs_parameters = {
+	.name		= "ramfs",
+	.specs		= ramfs_param_specs,
+};
+
+static int ramfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
-	substring_t args[MAX_OPT_ARGS];
-	int option;
-	int token;
-	char *p;
-
-	opts->mode = RAMFS_DEFAULT_MODE;
-
-	while ((p = strsep(&data, ",")) != NULL) {
-		if (!*p)
-			continue;
-
-		token = match_token(p, tokens, args);
-		switch (token) {
-		case Opt_mode:
-			if (match_octal(&args[0], &option))
-				return -EINVAL;
-			opts->mode = option & S_IALLUGO;
-			break;
+	struct fs_parse_result result;
+	struct ramfs_fs_info *fsi = fc->s_fs_info;
+	int opt;
+
+	opt = fs_parse(fc, &ramfs_fs_parameters, param, &result);
+	if (opt < 0) {
 		/*
 		 * We might like to report bad mount options here;
 		 * but traditionally ramfs has ignored all mount options,
 		 * and as it is used as a !CONFIG_SHMEM simple substitute
 		 * for tmpfs, better continue to ignore other mount options.
 		 */
-		}
+		if (opt == -ENOPARAM)
+			opt = 0;
+		return opt;
+	}
+
+	switch (opt) {
+	case Opt_mode:
+		fsi->mount_opts.mode = result.uint_32 & S_IALLUGO;
+		break;
 	}
 
 	return 0;
 }
 
-static int ramfs_fill_super(struct super_block *sb, void *data, int silent)
+static int ramfs_fill_super(struct super_block *sb, struct fs_context *fc)
 {
-	struct ramfs_fs_info *fsi;
+	struct ramfs_fs_info *fsi = sb->s_fs_info;
 	struct inode *inode;
-	int err;
-
-	fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
-	sb->s_fs_info = fsi;
-	if (!fsi)
-		return -ENOMEM;
-
-	err = ramfs_parse_options(data, &fsi->mount_opts);
-	if (err)
-		return err;
 
 	sb->s_maxbytes		= MAX_LFS_FILESIZE;
 	sb->s_blocksize		= PAGE_SIZE;
@@ -247,10 +239,34 @@ static int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-struct dentry *ramfs_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int ramfs_get_tree(struct fs_context *fc)
 {
-	return mount_nodev(fs_type, flags, data, ramfs_fill_super);
+	return get_tree_nodev(fc, ramfs_fill_super);
+}
+
+static void ramfs_free_fc(struct fs_context *fc)
+{
+	kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations ramfs_context_ops = {
+	.free		= ramfs_free_fc,
+	.parse_param	= ramfs_parse_param,
+	.get_tree	= ramfs_get_tree,
+};
+
+int ramfs_init_fs_context(struct fs_context *fc)
+{
+	struct ramfs_fs_info *fsi;
+
+	fsi = kzalloc(sizeof(*fsi), GFP_KERNEL);
+	if (!fsi)
+		return -ENOMEM;
+
+	fsi->mount_opts.mode = RAMFS_DEFAULT_MODE;
+	fc->s_fs_info = fsi;
+	fc->ops = &ramfs_context_ops;
+	return 0;
 }
 
 static void ramfs_kill_sb(struct super_block *sb)
@@ -261,7 +277,8 @@ static void ramfs_kill_sb(struct super_block *sb)
 
 static struct file_system_type ramfs_fs_type = {
 	.name		= "ramfs",
-	.mount		= ramfs_mount,
+	.init_fs_context = ramfs_init_fs_context,
+	.parameters	= &ramfs_fs_parameters,
 	.kill_sb	= ramfs_kill_sb,
 	.fs_flags	= FS_USERNS_MOUNT,
 };
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index e4d7d141545e..b806a0ff6554 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -4,8 +4,7 @@
 
 struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
 	 umode_t mode, dev_t dev);
-extern struct dentry *ramfs_mount(struct file_system_type *fs_type,
-	 int flags, const char *dev_name, void *data);
+extern int ramfs_init_fs_context(struct fs_context *fc);
 
 #ifdef CONFIG_MMU
 static inline int
@@ -17,6 +16,7 @@ ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
 #endif
 
+extern const struct fs_parameter_description ramfs_fs_parameters;
 extern const struct file_operations ramfs_file_operations;
 extern const struct vm_operations_struct generic_file_vm_ops;
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 88cb98b0e49b..de8e4b71e3ba 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -49,9 +49,9 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 /*
  * Functions in mm/shmem.c called directly from elsewhere:
  */
+extern const struct fs_parameter_description shmem_fs_parameters;
 extern int shmem_init(void);
-extern struct dentry *shmem_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data);
+extern int shmem_init_fs_context(struct fs_context *fc);
 extern struct file *shmem_file_setup(const char *name,
 					loff_t size, unsigned long flags);
 extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 16c29e57f224..9634ecf3743d 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -627,18 +627,17 @@ out:
 }
 
 static bool is_tmpfs;
-static struct dentry *rootfs_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+static int rootfs_init_fs_context(struct fs_context *fc)
 {
 	if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
-		return shmem_mount(fs_type, flags, dev_name, data);
+		return shmem_init_fs_context(fc);
 
-	return ramfs_mount(fs_type, flags, dev_name, data);
+	return ramfs_init_fs_context(fc);
 }
 
 struct file_system_type rootfs_fs_type = {
 	.name		= "rootfs",
-	.mount		= rootfs_mount,
+	.init_fs_context = rootfs_init_fs_context,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 6a41595dd1b3..0f7fd4a85db6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3401,22 +3401,17 @@ const struct fs_parameter_description shmem_fs_parameters = {
 	.enums		= shmem_param_enums,
 };
 
-static int shmem_parse_one(struct shmem_options *ctx,
-			   struct fs_parameter *param)
+static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
 {
-	struct fs_context *fc = NULL;
+	struct shmem_options *ctx = fc->fs_private;
 	struct fs_parse_result result;
 	unsigned long long size;
 	char *rest;
 	int opt;
 
 	opt = fs_parse(fc, &shmem_fs_parameters, param, &result);
-	if (opt < 0) {
-		if (opt == -ENOPARAM)
-			return invalf(fc, "tmpfs: Unknown parameter '%s'",
-				      param->key);
+	if (opt < 0)
 		return opt;
-	}
 
 	switch (opt) {
 	case Opt_size:
@@ -3483,8 +3478,10 @@ bad_value:
 	return invalf(fc, "tmpfs: Bad value for '%s'", param->key);
 }
 
-static int shmem_parse_options(char *options, struct shmem_options *ctx)
+static int shmem_parse_options(struct fs_context *fc, void *data)
 {
+	char *options = data;
+
 	while (options != NULL) {
 		char *this_char = options;
 		for (;;) {
@@ -3504,85 +3501,81 @@ static int shmem_parse_options(char *options, struct shmem_options *ctx)
 		}
 		if (*this_char) {
 			char *value = strchr(this_char,'=');
-			struct fs_parameter param = {
-				.key	= this_char,
-				.type	= fs_value_is_string,
-			};
+			size_t len = 0;
 			int err;
 
 			if (value) {
 				*value++ = '\0';
-				param.size = strlen(value);
-				param.string = kstrdup(value, GFP_KERNEL);
-				if (!param.string)
-					goto error;
+				len = strlen(value);
 			}
-			err = shmem_parse_one(ctx, &param);
-			kfree(param.string);
-			if (err)
-				goto error;
+			err = vfs_parse_fs_string(fc, this_char, value, len);
+			if (err < 0)
+				return err;
 		}
 	}
 	return 0;
-
-error:
-	mpol_put(ctx->mpol);
-	ctx->mpol = NULL;
-	return 1;
-
 }
 
-static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
+/*
+ * Reconfigure a shmem filesystem.
+ *
+ * Note that we disallow change from limited->unlimited blocks/inodes while any
+ * are in use; but we must separately disallow unlimited->limited, because in
+ * that case we have no record of how much is already in use.
+ */
+static int shmem_reconfigure(struct fs_context *fc)
 {
-	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
-	struct shmem_options ctx = {.seen = 0};
+	struct shmem_options *ctx = fc->fs_private;
+	struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
 	unsigned long inodes;
-	int error = -EINVAL;
-
-	if (shmem_parse_options(data, &ctx))
-		return error;
+	const char *err;
 
 	spin_lock(&sbinfo->stat_lock);
 	inodes = sbinfo->max_inodes - sbinfo->free_inodes;
-	/*
-	 * Those tests disallow limited->unlimited while any are in use;
-	 * but we must separately disallow unlimited->limited, because
-	 * in that case we have no record of how much is already in use.
-	 */
-	if ((ctx.seen & SHMEM_SEEN_BLOCKS) && ctx.blocks) {
-		if (!sbinfo->max_blocks)
+	if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
+		if (!sbinfo->max_blocks) {
+			err = "Cannot retroactively limit size";
 			goto out;
+		}
 		if (percpu_counter_compare(&sbinfo->used_blocks,
-					   ctx.blocks) > 0)
+					   ctx->blocks) > 0) {
+			err = "Too small a size for current use";
 			goto out;
+		}
 	}
-	if ((ctx.seen & SHMEM_SEEN_INODES) && ctx.inodes) {
-		if (!sbinfo->max_inodes)
+	if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) {
+		if (!sbinfo->max_inodes) {
+			err = "Cannot retroactively limit inodes";
 			goto out;
-		if (ctx.inodes < inodes)
+		}
+		if (ctx->inodes < inodes) {
+			err = "Too few inodes for current use";
 			goto out;
+		}
 	}
 
-	error = 0;
-	if (ctx.seen & SHMEM_SEEN_HUGE)
-		sbinfo->huge = ctx.huge;
-	if (ctx.seen & SHMEM_SEEN_BLOCKS)
-		sbinfo->max_blocks  = ctx.blocks;
-	if (ctx.seen & SHMEM_SEEN_INODES) {
-		sbinfo->max_inodes  = ctx.inodes;
-		sbinfo->free_inodes = ctx.inodes - inodes;
+	if (ctx->seen & SHMEM_SEEN_HUGE)
+		sbinfo->huge = ctx->huge;
+	if (ctx->seen & SHMEM_SEEN_BLOCKS)
+		sbinfo->max_blocks  = ctx->blocks;
+	if (ctx->seen & SHMEM_SEEN_INODES) {
+		sbinfo->max_inodes  = ctx->inodes;
+		sbinfo->free_inodes = ctx->inodes - inodes;
 	}
 
 	/*
 	 * Preserve previous mempolicy unless mpol remount option was specified.
 	 */
-	if (ctx.mpol) {
+	if (ctx->mpol) {
 		mpol_put(sbinfo->mpol);
-		sbinfo->mpol = ctx.mpol;	/* transfers initial ref */
+		sbinfo->mpol = ctx->mpol;	/* transfers initial ref */
+		ctx->mpol = NULL;
 	}
+	spin_unlock(&sbinfo->stat_lock);
+	return 0;
 out:
 	spin_unlock(&sbinfo->stat_lock);
-	return error;
+	return invalf(fc, "tmpfs: %s", err);
 }
 
 static int shmem_show_options(struct seq_file *seq, struct dentry *root)
@@ -3623,13 +3616,11 @@ static void shmem_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static int shmem_fill_super(struct super_block *sb, void *data, int silent)
+static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 {
+	struct shmem_options *ctx = fc->fs_private;
 	struct inode *inode;
 	struct shmem_sb_info *sbinfo;
-	struct shmem_options ctx = {.mode = 0777 | S_ISVTX,
-				    .uid = current_fsuid(),
-				    .gid = current_fsgid()};
 	int err = -ENOMEM;
 
 	/* Round up to L1_CACHE_BYTES to resist false sharing */
@@ -3647,12 +3638,10 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
 	 * but the internal instance is left unlimited.
 	 */
 	if (!(sb->s_flags & SB_KERNMOUNT)) {
-		ctx.blocks = shmem_default_max_blocks();
-		ctx.inodes = shmem_default_max_inodes();
-		if (shmem_parse_options(data, &ctx)) {
-			err = -EINVAL;
-			goto failed;
-		}
+		if (!(ctx->seen & SHMEM_SEEN_BLOCKS))
+			ctx->blocks = shmem_default_max_blocks();
+		if (!(ctx->seen & SHMEM_SEEN_INODES))
+			ctx->inodes = shmem_default_max_inodes();
 	} else {
 		sb->s_flags |= SB_NOUSER;
 	}
@@ -3661,13 +3650,14 @@ static int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #else
 	sb->s_flags |= SB_NOUSER;
 #endif
-	sbinfo->max_blocks = ctx.blocks;
-	sbinfo->free_inodes = sbinfo->max_inodes = ctx.inodes;
-	sbinfo->uid = ctx.uid;
-	sbinfo->gid = ctx.gid;
-	sbinfo->mode = ctx.mode;
-	sbinfo->huge = ctx.huge;
-	sbinfo->mpol = ctx.mpol;
+	sbinfo->max_blocks = ctx->blocks;
+	sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+	sbinfo->uid = ctx->uid;
+	sbinfo->gid = ctx->gid;
+	sbinfo->mode = ctx->mode;
+	sbinfo->huge = ctx->huge;
+	sbinfo->mpol = ctx->mpol;
+	ctx->mpol = NULL;
 
 	spin_lock_init(&sbinfo->stat_lock);
 	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
@@ -3704,6 +3694,31 @@ failed:
 	return err;
 }
 
+static int shmem_get_tree(struct fs_context *fc)
+{
+	return get_tree_nodev(fc, shmem_fill_super);
+}
+
+static void shmem_free_fc(struct fs_context *fc)
+{
+	struct shmem_options *ctx = fc->fs_private;
+
+	if (ctx) {
+		mpol_put(ctx->mpol);
+		kfree(ctx);
+	}
+}
+
+static const struct fs_context_operations shmem_fs_context_ops = {
+	.free			= shmem_free_fc,
+	.get_tree		= shmem_get_tree,
+#ifdef CONFIG_TMPFS
+	.parse_monolithic	= shmem_parse_options,
+	.parse_param		= shmem_parse_one,
+	.reconfigure		= shmem_reconfigure,
+#endif
+};
+
 static struct kmem_cache *shmem_inode_cachep;
 
 static struct inode *shmem_alloc_inode(struct super_block *sb)
@@ -3820,7 +3835,6 @@ static const struct super_operations shmem_ops = {
 	.destroy_inode	= shmem_destroy_inode,
 #ifdef CONFIG_TMPFS
 	.statfs		= shmem_statfs,
-	.remount_fs	= shmem_remount_fs,
 	.show_options	= shmem_show_options,
 #endif
 	.evict_inode	= shmem_evict_inode,
@@ -3841,16 +3855,30 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
-struct dentry *shmem_mount(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data)
+int shmem_init_fs_context(struct fs_context *fc)
 {
-	return mount_nodev(fs_type, flags, data, shmem_fill_super);
+	struct shmem_options *ctx;
+
+	ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->mode = 0777 | S_ISVTX;
+	ctx->uid = current_fsuid();
+	ctx->gid = current_fsgid();
+
+	fc->fs_private = ctx;
+	fc->ops = &shmem_fs_context_ops;
+	return 0;
 }
 
 static struct file_system_type shmem_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "tmpfs",
-	.mount		= shmem_mount,
+	.init_fs_context = shmem_init_fs_context,
+#ifdef CONFIG_TMPFS
+	.parameters	= &shmem_fs_parameters,
+#endif
 	.kill_sb	= kill_litter_super,
 	.fs_flags	= FS_USERNS_MOUNT,
 };
@@ -3994,7 +4022,8 @@ bool shmem_huge_enabled(struct vm_area_struct *vma)
 
 static struct file_system_type shmem_fs_type = {
 	.name		= "tmpfs",
-	.mount		= ramfs_mount,
+	.init_fs_context = ramfs_init_fs_context,
+	.parameters	= &ramfs_fs_parameters,
 	.kill_sb	= kill_litter_super,
 	.fs_flags	= FS_USERNS_MOUNT,
 };
-- 
cgit v1.2.3


From 06d392cbe3db52c2ce01a2f486afd03eda75743b Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Wed, 11 Sep 2019 12:53:24 +0800
Subject: netfilter: nf_tables_offload: remove rules when the device
 unregisters

If the net_device unregisters, clean up the offload rules before the
chain is destroy.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables_offload.h |  2 +-
 net/netfilter/nf_tables_api.c             | 11 +++++---
 net/netfilter/nf_tables_offload.c         | 43 ++++++++++++++++++++++++++++++-
 3 files changed, 51 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h
index ddd048be4330..03cf5856d76f 100644
--- a/include/net/netfilter/nf_tables_offload.h
+++ b/include/net/netfilter/nf_tables_offload.h
@@ -77,7 +77,7 @@ int nft_flow_rule_offload_commit(struct net *net);
 
 int nft_chain_offload_priority(struct nft_base_chain *basechain);
 
-void nft_offload_init(void);
+int nft_offload_init(void);
 void nft_offload_exit(void);
 
 #endif
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c6f59ef96017..e4a68dc42694 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -7694,15 +7694,20 @@ static int __init nf_tables_module_init(void)
 	if (err < 0)
 		goto err4;
 
+	err = nft_offload_init();
+	if (err < 0)
+		goto err5;
+
 	/* must be last */
 	err = nfnetlink_subsys_register(&nf_tables_subsys);
 	if (err < 0)
-		goto err5;
+		goto err6;
 
 	nft_chain_route_init();
-	nft_offload_init();
 
 	return err;
+err6:
+	nft_offload_exit();
 err5:
 	rhltable_destroy(&nft_objname_ht);
 err4:
@@ -7718,8 +7723,8 @@ err1:
 
 static void __exit nf_tables_module_exit(void)
 {
-	nft_offload_exit();
 	nfnetlink_subsys_unregister(&nf_tables_subsys);
+	nft_offload_exit();
 	unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
 	nft_chain_filter_fini();
 	nft_chain_route_fini();
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 739a79cdb741..21bb772cb4b7 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -426,17 +426,58 @@ static void nft_indr_block_cb(struct net_device *dev,
 	mutex_unlock(&net->nft.commit_mutex);
 }
 
+static void nft_offload_chain_clean(struct nft_chain *chain)
+{
+	struct nft_rule *rule;
+
+	list_for_each_entry(rule, &chain->rules, list) {
+		nft_flow_offload_rule(chain, rule,
+				      NULL, FLOW_CLS_DESTROY);
+	}
+
+	nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
+}
+
+static int nft_offload_netdev_event(struct notifier_block *this,
+				    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct net *net = dev_net(dev);
+	struct nft_chain *chain;
+
+	mutex_lock(&net->nft.commit_mutex);
+	chain = __nft_offload_get_chain(dev);
+	if (chain)
+		nft_offload_chain_clean(chain);
+	mutex_unlock(&net->nft.commit_mutex);
+
+	return NOTIFY_DONE;
+}
+
 static struct flow_indr_block_ing_entry block_ing_entry = {
 	.cb	= nft_indr_block_cb,
 	.list	= LIST_HEAD_INIT(block_ing_entry.list),
 };
 
-void nft_offload_init(void)
+static struct notifier_block nft_offload_netdev_notifier = {
+	.notifier_call	= nft_offload_netdev_event,
+};
+
+int nft_offload_init(void)
 {
+	int err;
+
+	err = register_netdevice_notifier(&nft_offload_netdev_notifier);
+	if (err < 0)
+		return err;
+
 	flow_indr_add_block_ing_cb(&block_ing_entry);
+
+	return 0;
 }
 
 void nft_offload_exit(void)
 {
 	flow_indr_del_block_ing_cb(&block_ing_entry);
+	unregister_netdevice_notifier(&nft_offload_netdev_notifier);
 }
-- 
cgit v1.2.3


From 0286fbc624e2842ececb853e74645b479b55f0a3 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:01 +0100
Subject: netfilter: fix include guards.

nf_conntrack_labels.h has no include guard.  Add it.

The comment following the #endif in the nf_flow_table.h include guard
referred to the wrong macro.  Fix it.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_labels.h | 11 ++++++++---
 include/net/netfilter/nf_flow_table.h       |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index 4eacce6f3bcc..ba916411c4e1 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -1,11 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/types.h>
-#include <net/net_namespace.h>
+
+#ifndef _NF_CONNTRACK_LABELS_H
+#define _NF_CONNTRACK_LABELS_H
+
 #include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
+#include <linux/types.h>
+#include <net/net_namespace.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_extend.h>
-
 #include <uapi/linux/netfilter/xt_connlabel.h>
 
 #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
@@ -51,3 +54,5 @@ static inline void nf_conntrack_labels_fini(void) {}
 static inline int nf_connlabels_get(struct net *net, unsigned int bit) { return 0; }
 static inline void nf_connlabels_put(struct net *net) {}
 #endif
+
+#endif /* _NF_CONNTRACK_LABELS_H */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 609df33b1209..d875be62cdf0 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -127,4 +127,4 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
 
-#endif /* _FLOW_OFFLOAD_H */
+#endif /* _NF_FLOW_TABLE_H */
-- 
cgit v1.2.3


From b0edba2af7154c82c28a4828f483c102ab201326 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:02 +0100
Subject: netfilter: fix coding-style errors.

Several header-files, Kconfig files and Makefiles have trailing
white-space.  Remove it.

In netfilter/Kconfig, indent the type of CONFIG_NETFILTER_NETLINK_ACCT
correctly.

There are semicolons at the end of two function definitions in
include/net/netfilter/nf_conntrack_acct.h and
include/net/netfilter/nf_conntrack_ecache.h. Remove them.

Fix indentation in nf_conntrack_l4proto.h.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h           |  2 +-
 include/linux/netfilter_ipv6.h               |  2 +-
 include/net/netfilter/nf_conntrack_acct.h    |  2 +-
 include/net/netfilter/nf_conntrack_ecache.h  |  2 +-
 include/net/netfilter/nf_conntrack_expect.h  |  2 +-
 include/net/netfilter/nf_conntrack_l4proto.h | 14 +++++++-------
 include/net/netfilter/nf_conntrack_tuple.h   |  2 +-
 net/ipv4/netfilter/Kconfig                   |  8 ++++----
 net/ipv4/netfilter/Makefile                  |  2 +-
 net/netfilter/Kconfig                        |  8 ++++----
 net/netfilter/Makefile                       |  2 +-
 11 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index ae62bf1c6824..b9bc25f57c8e 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -340,7 +340,7 @@ void xt_free_table_info(struct xt_table_info *info);
 
 /**
  * xt_recseq - recursive seqcount for netfilter use
- * 
+ *
  * Packet processing changes the seqcount only if no recursion happened
  * get_counters() can use read_seqcount_begin()/read_seqcount_retry(),
  * because we use the normal seqcount convention :
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 7beb681e1ce5..a889e376d197 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -1,7 +1,7 @@
 /* IPv6-specific defines for netfilter. 
  * (C)1998 Rusty Russell -- This code is GPL.
  * (C)1999 David Jeffery
- *   this header was blatantly ripped from netfilter_ipv4.h 
+ *   this header was blatantly ripped from netfilter_ipv4.h
  *   it's amazing what adding a bunch of 6s can do =8^)
  */
 #ifndef __LINUX_IP6_NETFILTER_H
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index ad9f2172dee1..5b5287bb49db 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -45,7 +45,7 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
 #else
 	return NULL;
 #endif
-};
+}
 
 /* Check if connection tracking accounting is enabled */
 static inline bool nf_ct_acct_enabled(struct net *net)
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 52b44192b43f..0815bfadfefe 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -61,7 +61,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 #else
 	return NULL;
 #endif
-};
+}
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 /* This structure is passed to event handler */
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 573429be4d59..0855b60fba17 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -126,7 +126,7 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t,
 		       const union nf_inet_addr *,
 		       u_int8_t, const __be16 *, const __be16 *);
 void nf_ct_expect_put(struct nf_conntrack_expect *exp);
-int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
+int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 				u32 portid, int report, unsigned int flags);
 static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect,
 				       unsigned int flags)
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index c200b95d27ae..97240f1a3f5f 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -181,41 +181,41 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 static inline struct nf_generic_net *nf_generic_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.generic;
+	return &net->ct.nf_ct_proto.generic;
 }
 
 static inline struct nf_tcp_net *nf_tcp_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.tcp;
+	return &net->ct.nf_ct_proto.tcp;
 }
 
 static inline struct nf_udp_net *nf_udp_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.udp;
+	return &net->ct.nf_ct_proto.udp;
 }
 
 static inline struct nf_icmp_net *nf_icmp_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.icmp;
+	return &net->ct.nf_ct_proto.icmp;
 }
 
 static inline struct nf_icmp_net *nf_icmpv6_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.icmpv6;
+	return &net->ct.nf_ct_proto.icmpv6;
 }
 #endif
 
 #ifdef CONFIG_NF_CT_PROTO_DCCP
 static inline struct nf_dccp_net *nf_dccp_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.dccp;
+	return &net->ct.nf_ct_proto.dccp;
 }
 #endif
 
 #ifdef CONFIG_NF_CT_PROTO_SCTP
 static inline struct nf_sctp_net *nf_sctp_pernet(struct net *net)
 {
-       return &net->ct.nf_ct_proto.sctp;
+	return &net->ct.nf_ct_proto.sctp;
 }
 #endif
 
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 480c87b44a96..68ea9b932736 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -124,7 +124,7 @@ struct nf_conntrack_tuple_hash {
 #if IS_ENABLED(CONFIG_NETFILTER)
 static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1,
 					   const struct nf_conntrack_tuple *t2)
-{ 
+{
 	return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) &&
 		t1->src.u.all == t2->src.u.all &&
 		t1->src.l3num == t2->src.l3num);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 69e76d677f9e..f17b402111ce 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -272,7 +272,7 @@ config IP_NF_TARGET_CLUSTERIP
 	  The CLUSTERIP target allows you to build load-balancing clusters of
 	  network servers without having a dedicated load-balancing
 	  router/server/switch.
-	
+
 	  To compile it as a module, choose M here.  If unsure, say N.
 
 config IP_NF_TARGET_ECN
@@ -281,7 +281,7 @@ config IP_NF_TARGET_ECN
 	depends on NETFILTER_ADVANCED
 	---help---
 	  This option adds a `ECN' target, which can be used in the iptables mangle
-	  table.  
+	  table.
 
 	  You can use this target to remove the ECN bits from the IPv4 header of
 	  an IP packet.  This is particularly useful, if you need to work around
@@ -306,7 +306,7 @@ config IP_NF_RAW
 	  This option adds a `raw' table to iptables. This table is the very
 	  first in the netfilter framework and hooks in at the PREROUTING
 	  and OUTPUT chains.
-	
+
 	  If you want to compile it as a module, say M here and read
 	  <file:Documentation/kbuild/modules.rst>.  If unsure, say `N'.
 
@@ -318,7 +318,7 @@ config IP_NF_SECURITY
 	help
 	  This option adds a `security' table to iptables, for use
 	  with Mandatory Access Control (MAC) policy.
-	 
+
 	  If unsure, say N.
 
 endif # IP_NF_IPTABLES
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c50e0ec095d2..7c497c78105f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
 # flow table support
 obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o
 
-# generic IP tables 
+# generic IP tables
 obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
 
 # the three instances of ip_tables
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 0d65f4d39494..34ec7afec116 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -20,7 +20,7 @@ config NETFILTER_FAMILY_ARP
 	bool
 
 config NETFILTER_NETLINK_ACCT
-tristate "Netfilter NFACCT over NFNETLINK interface"
+	tristate "Netfilter NFACCT over NFNETLINK interface"
 	depends on NETFILTER_ADVANCED
 	select NETFILTER_NETLINK
 	help
@@ -34,7 +34,7 @@ config NETFILTER_NETLINK_QUEUE
 	help
 	  If this option is enabled, the kernel will include support
 	  for queueing packets via NFNETLINK.
-	  
+
 config NETFILTER_NETLINK_LOG
 	tristate "Netfilter LOG over NFNETLINK interface"
 	default m if NETFILTER_ADVANCED=n
@@ -1502,7 +1502,7 @@ config NETFILTER_XT_MATCH_REALM
 	  This option adds a `realm' match, which allows you to use the realm
 	  key from the routing subsystem inside iptables.
 
-	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option 
+	  This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option
 	  in tc world.
 
 	  If you want to compile it as a module, say M here and read
@@ -1523,7 +1523,7 @@ config NETFILTER_XT_MATCH_SCTP
 	depends on NETFILTER_ADVANCED
 	default IP_SCTP
 	help
-	  With this option enabled, you will be able to use the 
+	  With this option enabled, you will be able to use the
 	  `sctp' match in order to match on SCTP source/destination ports
 	  and SCTP chunk types.
 
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 9270a7fae484..4fc075b612fe 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -124,7 +124,7 @@ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
 
 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
 
-# generic X tables 
+# generic X tables
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
 # combos
-- 
cgit v1.2.3


From f5d65c197531e9abb7ede82b052459c1a3cf13c3 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:03 +0100
Subject: netfilter: ip_tables: remove unused function declarations.

Two headers include declarations of functions which are never defined.
Remove them.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4/ip_tables.h  | 2 --
 include/linux/netfilter_ipv6/ip6_tables.h | 3 +--
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index f40a65481df4..0b0d43ad9ed9 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -23,8 +23,6 @@
 #include <linux/init.h>
 #include <uapi/linux/netfilter_ipv4/ip_tables.h>
 
-extern void ipt_init(void) __init;
-
 #if IS_ENABLED(CONFIG_NETFILTER)
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 53b7309613bf..666450c117bf 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -23,9 +23,8 @@
 #include <linux/init.h>
 #include <uapi/linux/netfilter_ipv6/ip6_tables.h>
 
-extern void ip6t_init(void) __init;
-
 extern void *ip6t_alloc_initial_table(const struct xt_table *);
+
 #if IS_ENABLED(CONFIG_NETFILTER)
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
-- 
cgit v1.2.3


From 85cfbc25e5c5ee83307aba05eec4b04517890038 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:04 +0100
Subject: netfilter: inline xt_hashlimit, ebt_802_3 and xt_physdev headers

Three netfilter headers are only included once.  Inline their contents
at those sites and remove them.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_hashlimit.h     | 11 -----------
 include/linux/netfilter/xt_physdev.h       |  8 --------
 include/linux/netfilter_bridge/ebt_802_3.h | 12 ------------
 net/bridge/netfilter/ebt_802_3.c           |  8 +++++++-
 net/netfilter/xt_hashlimit.c               |  7 ++++++-
 net/netfilter/xt_physdev.c                 |  5 +++--
 6 files changed, 16 insertions(+), 35 deletions(-)
 delete mode 100644 include/linux/netfilter/xt_hashlimit.h
 delete mode 100644 include/linux/netfilter/xt_physdev.h
 delete mode 100644 include/linux/netfilter_bridge/ebt_802_3.h

(limited to 'include')

diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
deleted file mode 100644
index 169d03983589..000000000000
--- a/include/linux/netfilter/xt_hashlimit.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XT_HASHLIMIT_H
-#define _XT_HASHLIMIT_H
-
-#include <uapi/linux/netfilter/xt_hashlimit.h>
-
-#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
-			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
-			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
-			  XT_HASHLIMIT_RATE_MATCH)
-#endif /*_XT_HASHLIMIT_H*/
diff --git a/include/linux/netfilter/xt_physdev.h b/include/linux/netfilter/xt_physdev.h
deleted file mode 100644
index 4ca0593949cd..000000000000
--- a/include/linux/netfilter/xt_physdev.h
+++ /dev/null
@@ -1,8 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XT_PHYSDEV_H
-#define _XT_PHYSDEV_H
-
-#include <linux/if.h>
-#include <uapi/linux/netfilter/xt_physdev.h>
-
-#endif /*_XT_PHYSDEV_H*/
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
deleted file mode 100644
index c6147f9c0d80..000000000000
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __LINUX_BRIDGE_EBT_802_3_H
-#define __LINUX_BRIDGE_EBT_802_3_H
-
-#include <linux/skbuff.h>
-#include <uapi/linux/netfilter_bridge/ebt_802_3.h>
-
-static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
-{
-	return (struct ebt_802_3_hdr *)skb_mac_header(skb);
-}
-#endif
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 2c8fe24400e5..68c2519bdc52 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -11,7 +11,13 @@
 #include <linux/module.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
-#include <linux/netfilter_bridge/ebt_802_3.h>
+#include <linux/skbuff.h>
+#include <uapi/linux/netfilter_bridge/ebt_802_3.h>
+
+static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
+{
+	return (struct ebt_802_3_hdr *)skb_mac_header(skb);
+}
 
 static bool
 ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 2d2691dd51e0..ced3fc8fad7c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -34,9 +34,14 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
-#include <linux/netfilter/xt_hashlimit.h>
 #include <linux/mutex.h>
 #include <linux/kernel.h>
+#include <uapi/linux/netfilter/xt_hashlimit.h>
+
+#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \
+			  XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \
+			  XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\
+			  XT_HASHLIMIT_RATE_MATCH)
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index b92b22ce8abd..ec6ed6fda96c 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -5,12 +5,13 @@
 /* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be>
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_bridge.h>
-#include <linux/netfilter/xt_physdev.h>
 #include <linux/netfilter/x_tables.h>
-#include <net/netfilter/br_netfilter.h>
+#include <uapi/linux/netfilter/xt_physdev.h>
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
-- 
cgit v1.2.3


From 40d102cde0a2aabb5e542ab1ab1aa4aaa1fd4372 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:05 +0100
Subject: netfilter: update include directives.

Include some headers in files which require them, and remove others
which are not required.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_core.h  |  3 ++-
 include/net/netfilter/nf_conntrack_zones.h |  3 ++-
 include/net/netfilter/nf_nat.h             | 13 ++++++-------
 include/net/netfilter/nf_nat_masquerade.h  |  1 +
 net/bridge/netfilter/nf_conntrack_bridge.c |  1 -
 net/ipv6/netfilter/nf_socket_ipv6.c        |  1 -
 net/netfilter/nf_conntrack_ecache.c        |  1 +
 net/netfilter/nf_conntrack_expect.c        |  2 ++
 net/netfilter/nf_conntrack_helper.c        |  5 +++--
 net/netfilter/nf_conntrack_timeout.c       |  1 +
 net/netfilter/nf_flow_table_core.c         |  1 +
 net/netfilter/nf_nat_core.c                |  6 +++---
 net/netfilter/nft_flow_offload.c           |  3 ++-
 net/netfilter/xt_connlimit.c               |  2 ++
 net/sched/act_ct.c                         |  2 +-
 15 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 71a2d9cb64ea..d340886e012d 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -14,8 +14,9 @@
 #define _NF_CONNTRACK_CORE_H
 
 #include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 
 /* This header is used to share core functionality between the
    standalone connection tracking module, and the compatibility layer's use
diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 52950baa3ab5..33b91d19cb7d 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -5,7 +5,8 @@
 #include <linux/netfilter/nf_conntrack_zones_common.h>
 
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-#include <net/netfilter/nf_conntrack_extend.h>
+
+#include <net/netfilter/nf_conntrack.h>
 
 static inline const struct nf_conntrack_zone *
 nf_ct_zone(const struct nf_conn *ct)
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index eec208fb9c23..eeb336809679 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,9 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
+
+#include <linux/list.h>
 #include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/nf_nat.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
+#include <uapi/linux/netfilter/nf_nat.h>
 
 enum nf_nat_manip_type {
 	NF_NAT_MANIP_SRC,
@@ -14,10 +19,6 @@ enum nf_nat_manip_type {
 #define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \
 			     (hooknum) != NF_INET_LOCAL_IN)
 
-#include <linux/list.h>
-#include <linux/netfilter/nf_conntrack_pptp.h>
-#include <net/netfilter/nf_conntrack_extend.h>
-
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help {
 	/* insert nat helper private data here */
@@ -26,8 +27,6 @@ union nf_conntrack_nat_help {
 #endif
 };
 
-struct nf_conn;
-
 /* The structure embedded in the conntrack structure. */
 struct nf_conn_nat {
 	union nf_conntrack_nat_help help;
diff --git a/include/net/netfilter/nf_nat_masquerade.h b/include/net/netfilter/nf_nat_masquerade.h
index 54a14d643c34..be7abc9d5f22 100644
--- a/include/net/netfilter/nf_nat_masquerade.h
+++ b/include/net/netfilter/nf_nat_masquerade.h
@@ -2,6 +2,7 @@
 #ifndef _NF_NAT_MASQUERADE_H_
 #define _NF_NAT_MASQUERADE_H_
 
+#include <linux/skbuff.h>
 #include <net/netfilter/nf_nat.h>
 
 unsigned int
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 4f5444d2a526..c9ce321fcac1 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -17,7 +17,6 @@
 #include <net/netfilter/nf_conntrack_bridge.h>
 
 #include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_tables.h>
 
 #include "../br_private.h"
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 437d95545c31..b9df879c48d3 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -12,7 +12,6 @@
 #include <net/sock.h>
 #include <net/inet_sock.h>
 #include <net/inet6_hashtables.h>
-#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <net/netfilter/nf_socket.h>
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 5e2812ee2149..6fba74b5aaf7 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -24,6 +24,7 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
 static DEFINE_MUTEX(nf_ct_ecache_mutex);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 65364de915d1..42557d2b6a90 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -25,8 +25,10 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_expect.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8d729e7c36ff..118f415928ae 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -21,10 +21,11 @@
 #include <linux/rtnetlink.h>
 
 #include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_log.h>
 
 static DEFINE_MUTEX(nf_ct_helper_mutex);
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 13d0f4a92647..14387e0b8008 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -19,6 +19,7 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_extend.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 
 struct nf_ct_timeout *
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 80a8f9ae4c93..09310a1bd91f 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -11,6 +11,7 @@
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
 struct flow_offload_entry {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 3f6023ed4966..bfc555fcbc72 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -18,12 +18,12 @@
 
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_zones.h>
-#include <linux/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <uapi/linux/netfilter/nf_nat.h>
 
 #include "nf_internals.h"
 
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 01705ad74a9a..22cf236eb5d5 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -6,12 +6,13 @@
 #include <linux/netfilter.h>
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/ip.h> /* for ipv4 options. */
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables_core.h>
 #include <net/netfilter/nf_conntrack_core.h>
-#include <linux/netfilter/nf_conntrack_common.h>
+#include <net/netfilter/nf_conntrack_extend.h>
 #include <net/netfilter/nf_flow_table.h>
 
 struct nft_flow_offload {
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index bc6c8ab0fa62..46fcac75f726 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -13,6 +13,8 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index cdd6f3818097..fcc46025e790 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -24,12 +24,12 @@
 #include <uapi/linux/tc_act/tc_ct.h>
 #include <net/tc_act/tc_ct.h>
 
-#include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <uapi/linux/netfilter/nf_nat.h>
 
 static struct tc_action_ops act_ct_ops;
 static unsigned int ct_net_id;
-- 
cgit v1.2.3


From 8bf3cbe32b180836720f735e6de5dee700052317 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:06 +0100
Subject: netfilter: remove nf_conntrack_icmpv6.h header.

nf_conntrack_icmpv6.h contains two object macros which duplicate macros
in linux/icmpv6.h.  The latter definitions are also visible wherever it
is included, so remove it.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/ipv6/nf_conntrack_icmpv6.h | 21 ---------------------
 include/net/netfilter/nf_conntrack.h             |  1 -
 net/netfilter/nf_conntrack_proto_icmpv6.c        |  1 -
 3 files changed, 23 deletions(-)
 delete mode 100644 include/net/netfilter/ipv6/nf_conntrack_icmpv6.h

(limited to 'include')

diff --git a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h b/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
deleted file mode 100644
index c86895bc5eb6..000000000000
--- a/include/net/netfilter/ipv6/nf_conntrack_icmpv6.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * ICMPv6 tracking.
- *
- * 21 Apl 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- *	- separated from nf_conntrack_icmp.h
- *
- * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h
- */
-
-#ifndef _NF_CONNTRACK_ICMPV6_H
-#define _NF_CONNTRACK_ICMPV6_H
-
-#ifndef ICMPV6_NI_QUERY
-#define ICMPV6_NI_QUERY 139
-#endif
-#ifndef ICMPV6_NI_REPLY
-#define ICMPV6_NI_REPLY 140
-#endif
-
-#endif /* _NF_CONNTRACK_ICMPV6_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2cc304efe7f9..22275f42f0bb 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -23,7 +23,6 @@
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #include <linux/netfilter/nf_conntrack_sctp.h>
 #include <linux/netfilter/nf_conntrack_proto_gre.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 
 #include <net/netfilter/nf_conntrack_tuple.h>
 
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 7e317e6698ba..6f9144e1f1c1 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -22,7 +22,6 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_zones.h>
-#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
 #include <net/netfilter/nf_log.h>
 
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-- 
cgit v1.2.3


From 44dde23698a7a8a807d974a5124cf64b7ab2c9d5 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:07 +0100
Subject: netfilter: move inline nf_ip6_ext_hdr() function to a more
 appropriate header.

There is an inline function in ip6_tables.h which is not specific to
ip6tables and is used elswhere in netfilter.  Move it into
netfilter_ipv6.h and update the callers.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6.h            | 12 ++++++++++++
 include/linux/netfilter_ipv6/ip6_tables.h | 12 ------------
 net/ipv6/netfilter/ip6t_ipv6header.c      |  4 ++--
 net/ipv6/netfilter/nf_log_ipv6.c          |  4 ++--
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index a889e376d197..c1500209cfaf 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -10,6 +10,18 @@
 #include <uapi/linux/netfilter_ipv6.h>
 #include <net/tcp.h>
 
+/* Check for an extension */
+static inline int
+nf_ip6_ext_hdr(u8 nexthdr)
+{	return (nexthdr == IPPROTO_HOPOPTS) ||
+	       (nexthdr == IPPROTO_ROUTING) ||
+	       (nexthdr == IPPROTO_FRAGMENT) ||
+	       (nexthdr == IPPROTO_ESP) ||
+	       (nexthdr == IPPROTO_AH) ||
+	       (nexthdr == IPPROTO_NONE) ||
+	       (nexthdr == IPPROTO_DSTOPTS);
+}
+
 /* Extra routing may needed on local out, as the QUEUE target never returns
  * control to the table.
  */
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 666450c117bf..3a0a2bd054cc 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -36,18 +36,6 @@ extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  struct xt_table *table);
 #endif
 
-/* Check for an extension */
-static inline int
-ip6t_ext_hdr(u8 nexthdr)
-{	return (nexthdr == IPPROTO_HOPOPTS) ||
-	       (nexthdr == IPPROTO_ROUTING) ||
-	       (nexthdr == IPPROTO_FRAGMENT) ||
-	       (nexthdr == IPPROTO_ESP) ||
-	       (nexthdr == IPPROTO_AH) ||
-	       (nexthdr == IPPROTO_NONE) ||
-	       (nexthdr == IPPROTO_DSTOPTS);
-}
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 0fc6326ef499..c52ff929c93b 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -16,7 +16,7 @@
 #include <net/ipv6.h>
 
 #include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_ipv6/ip6t_ipv6header.h>
 
 MODULE_LICENSE("GPL");
@@ -42,7 +42,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	len = skb->len - ptr;
 	temp = 0;
 
-	while (ip6t_ext_hdr(nexthdr)) {
+	while (nf_ip6_ext_hdr(nexthdr)) {
 		const struct ipv6_opt_hdr *hp;
 		struct ipv6_opt_hdr _hdr;
 		int hdrlen;
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index f53bd8f01219..22b80db6d882 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -18,7 +18,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
-#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6.h>
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
@@ -70,7 +70,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
 	fragment = 0;
 	ptr = ip6hoff + sizeof(struct ipv6hdr);
 	currenthdr = ih->nexthdr;
-	while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) {
+	while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) {
 		struct ipv6_opt_hdr _hdr;
 		const struct ipv6_opt_hdr *hp;
 
-- 
cgit v1.2.3


From e2f1cbb16508886b2d4de924530c153ff70b3f03 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:08 +0100
Subject: netfilter: synproxy: move code between headers.

There is some non-conntrack code in the nf_conntrack_synproxy.h header.
Move it to the nf_synproxy.h header.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h | 39 ---------------------------
 include/net/netfilter/nf_synproxy.h           | 38 ++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 2f0171d24997..c22f0c11cc82 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -43,43 +43,4 @@ static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
 	return true;
 }
 
-struct synproxy_stats {
-	unsigned int			syn_received;
-	unsigned int			cookie_invalid;
-	unsigned int			cookie_valid;
-	unsigned int			cookie_retrans;
-	unsigned int			conn_reopened;
-};
-
-struct synproxy_net {
-	struct nf_conn			*tmpl;
-	struct synproxy_stats __percpu	*stats;
-	unsigned int			hook_ref4;
-	unsigned int			hook_ref6;
-};
-
-extern unsigned int synproxy_net_id;
-static inline struct synproxy_net *synproxy_pernet(struct net *net)
-{
-	return net_generic(net, synproxy_net_id);
-}
-
-struct synproxy_options {
-	u8				options;
-	u8				wscale;
-	u16				mss_option;
-	u16				mss_encode;
-	u32				tsval;
-	u32				tsecr;
-};
-
-struct tcphdr;
-struct nf_synproxy_info;
-bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
-			    const struct tcphdr *th,
-			    struct synproxy_options *opts);
-
-void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
-				    struct synproxy_options *opts);
-
 #endif /* _NF_CONNTRACK_SYNPROXY_H */
diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h
index dc420b47e3aa..19d1af7a0348 100644
--- a/include/net/netfilter/nf_synproxy.h
+++ b/include/net/netfilter/nf_synproxy.h
@@ -11,6 +11,44 @@
 #include <net/netfilter/nf_conntrack_seqadj.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
 
+struct synproxy_stats {
+	unsigned int			syn_received;
+	unsigned int			cookie_invalid;
+	unsigned int			cookie_valid;
+	unsigned int			cookie_retrans;
+	unsigned int			conn_reopened;
+};
+
+struct synproxy_net {
+	struct nf_conn			*tmpl;
+	struct synproxy_stats __percpu	*stats;
+	unsigned int			hook_ref4;
+	unsigned int			hook_ref6;
+};
+
+extern unsigned int synproxy_net_id;
+static inline struct synproxy_net *synproxy_pernet(struct net *net)
+{
+	return net_generic(net, synproxy_net_id);
+}
+
+struct synproxy_options {
+	u8				options;
+	u8				wscale;
+	u16				mss_option;
+	u16				mss_encode;
+	u32				tsval;
+	u32				tsecr;
+};
+
+struct nf_synproxy_info;
+bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
+			    const struct tcphdr *th,
+			    struct synproxy_options *opts);
+
+void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info,
+				    struct synproxy_options *opts);
+
 void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb,
 				 const struct tcphdr *th,
 				 const struct synproxy_options *opts);
-- 
cgit v1.2.3


From 46705b070c279b352bbbe8118d78aa31b0768245 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:09 +0100
Subject: netfilter: move nf_bridge_frag_data struct definition to a more
 appropriate header.

There is a struct definition function in nf_conntrack_bridge.h which is
not specific to conntrack and is used elswhere in netfilter.  Move it
into netfilter_bridge.h.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h            |  7 +++++++
 include/linux/netfilter_ipv6.h              | 14 +++++++-------
 include/net/netfilter/nf_conntrack_bridge.h |  7 -------
 net/bridge/netfilter/nf_conntrack_bridge.c  | 14 +++++++-------
 net/ipv6/netfilter.c                        |  4 ++--
 5 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 5f2614d02e03..f980edfdd278 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -5,6 +5,13 @@
 #include <uapi/linux/netfilter_bridge.h>
 #include <linux/skbuff.h>
 
+struct nf_bridge_frag_data {
+	char    mac[ETH_HLEN];
+	bool    vlan_present;
+	u16     vlan_tci;
+	__be16  vlan_proto;
+};
+
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index c1500209cfaf..aac42c28fe62 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -32,7 +32,7 @@ struct ip6_rt_info {
 };
 
 struct nf_queue_entry;
-struct nf_ct_bridge_frag_data;
+struct nf_bridge_frag_data;
 
 /*
  * Hook functions for ipv6 to allow xt_* modules to be built-in even
@@ -61,9 +61,9 @@ struct nf_ipv6_ops {
 	int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
 	int (*br_fragment)(struct net *net, struct sock *sk,
 			   struct sk_buff *skb,
-			   struct nf_ct_bridge_frag_data *data,
+			   struct nf_bridge_frag_data *data,
 			   int (*output)(struct net *, struct sock *sk,
-					 const struct nf_ct_bridge_frag_data *data,
+					 const struct nf_bridge_frag_data *data,
 					 struct sk_buff *));
 #endif
 };
@@ -135,16 +135,16 @@ static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
 }
 
 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-		    struct nf_ct_bridge_frag_data *data,
+		    struct nf_bridge_frag_data *data,
 		    int (*output)(struct net *, struct sock *sk,
-				  const struct nf_ct_bridge_frag_data *data,
+				  const struct nf_bridge_frag_data *data,
 				  struct sk_buff *));
 
 static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
 				     struct sk_buff *skb,
-				     struct nf_ct_bridge_frag_data *data,
+				     struct nf_bridge_frag_data *data,
 				     int (*output)(struct net *, struct sock *sk,
-						   const struct nf_ct_bridge_frag_data *data,
+						   const struct nf_bridge_frag_data *data,
 						   struct sk_buff *))
 {
 #if IS_MODULE(CONFIG_IPV6)
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
index 34c28f248b18..01b62fd5efa2 100644
--- a/include/net/netfilter/nf_conntrack_bridge.h
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -16,11 +16,4 @@ struct nf_ct_bridge_info {
 void nf_ct_bridge_register(struct nf_ct_bridge_info *info);
 void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info);
 
-struct nf_ct_bridge_frag_data {
-	char	mac[ETH_HLEN];
-	bool	vlan_present;
-	u16	vlan_tci;
-	__be16	vlan_proto;
-};
-
 #endif
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index c9ce321fcac1..8842798c29e6 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -26,9 +26,9 @@
  */
 static int nf_br_ip_fragment(struct net *net, struct sock *sk,
 			     struct sk_buff *skb,
-			     struct nf_ct_bridge_frag_data *data,
+			     struct nf_bridge_frag_data *data,
 			     int (*output)(struct net *, struct sock *sk,
-					   const struct nf_ct_bridge_frag_data *data,
+					   const struct nf_bridge_frag_data *data,
 					   struct sk_buff *))
 {
 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
@@ -278,7 +278,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
 }
 
 static void nf_ct_bridge_frag_save(struct sk_buff *skb,
-				   struct nf_ct_bridge_frag_data *data)
+				   struct nf_bridge_frag_data *data)
 {
 	if (skb_vlan_tag_present(skb)) {
 		data->vlan_present = true;
@@ -293,10 +293,10 @@ static void nf_ct_bridge_frag_save(struct sk_buff *skb,
 static unsigned int
 nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
 		    int (*output)(struct net *, struct sock *sk,
-				  const struct nf_ct_bridge_frag_data *data,
+				  const struct nf_bridge_frag_data *data,
 				  struct sk_buff *))
 {
-	struct nf_ct_bridge_frag_data data;
+	struct nf_bridge_frag_data data;
 
 	if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
 		return NF_ACCEPT;
@@ -319,7 +319,7 @@ nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
 
 /* Actually only slow path refragmentation needs this. */
 static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
-				     const struct nf_ct_bridge_frag_data *data)
+				     const struct nf_bridge_frag_data *data)
 {
 	int err;
 
@@ -340,7 +340,7 @@ static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
 }
 
 static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
-				    const struct nf_ct_bridge_frag_data *data,
+				    const struct nf_bridge_frag_data *data,
 				    struct sk_buff *skb)
 {
 	int err;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 61819ed858b1..a9bff556d3b2 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -113,9 +113,9 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
 EXPORT_SYMBOL_GPL(__nf_ip6_route);
 
 int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
-		    struct nf_ct_bridge_frag_data *data,
+		    struct nf_bridge_frag_data *data,
 		    int (*output)(struct net *, struct sock *sk,
-				  const struct nf_ct_bridge_frag_data *data,
+				  const struct nf_bridge_frag_data *data,
 				  struct sk_buff *))
 {
 	int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
-- 
cgit v1.2.3


From 16b26cde6f12a759e59f267763c11bb1818d067e Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:10 +0100
Subject: netfilter: conntrack: use consistent style when defining inline
 functions

The header contains some inline functions defined as:

  static inline f (...)
  {
  #ifdef CONFIG_NF_CONNTRACK_EVENTS
    ...
  #else
    ...
  #endif
  }

and a few others as:

  #ifdef CONFIG_NF_CONNTRACK_EVENTS
  static inline f (...)
  {
    ...
  }
  #else
  static inline f (...)
  {
    ...
  }
  #endif

Prefer the former style, which is more numerous.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_ecache.h | 82 ++++++++++++++++++-----------
 1 file changed, 50 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 0815bfadfefe..eb81f9195e28 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -64,6 +64,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp)
 }
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
+
 /* This structure is passed to event handler */
 struct nf_ct_event {
 	struct nf_conn *ct;
@@ -84,9 +85,26 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct);
 int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
 				  u32 portid, int report);
 
+#else
+
+static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct)
+{
+}
+
+static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
+						struct nf_conn *ct,
+						u32 portid,
+						int report)
+{
+	return 0;
+}
+
+#endif
+
 static inline void
 nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
 	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_ecache *e;
 
@@ -98,31 +116,42 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
 		return;
 
 	set_bit(event, &e->cache);
+#endif
 }
 
 static inline int
 nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
 			  u32 portid, int report)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
 	const struct net *net = nf_ct_net(ct);
 
 	if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
 		return 0;
 
 	return nf_conntrack_eventmask_report(1 << event, ct, portid, report);
+#else
+	return 0;
+#endif
 }
 
 static inline int
 nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
 	const struct net *net = nf_ct_net(ct);
 
 	if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb))
 		return 0;
 
 	return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
+#else
+	return 0;
+#endif
 }
 
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+
 struct nf_exp_event {
 	struct nf_conntrack_expect *exp;
 	u32 portid;
@@ -148,41 +177,18 @@ void nf_conntrack_ecache_pernet_fini(struct net *net);
 int nf_conntrack_ecache_init(void);
 void nf_conntrack_ecache_fini(void);
 
-static inline void nf_conntrack_ecache_delayed_work(struct net *net)
+#else /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
+					     struct nf_conntrack_expect *exp,
+					     u32 portid,
+					     int report)
 {
-	if (!delayed_work_pending(&net->ct.ecache_dwork)) {
-		schedule_delayed_work(&net->ct.ecache_dwork, HZ);
-		net->ct.ecache_dwork_pending = true;
-	}
 }
 
-static inline void nf_conntrack_ecache_work(struct net *net)
+static inline void nf_conntrack_ecache_pernet_init(struct net *net)
 {
-	if (net->ct.ecache_dwork_pending) {
-		net->ct.ecache_dwork_pending = false;
-		mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
-	}
 }
-#else /* CONFIG_NF_CONNTRACK_EVENTS */
-static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
-					    struct nf_conn *ct) {}
-static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
-						struct nf_conn *ct,
-						u32 portid,
-						int report) { return 0; }
-static inline int nf_conntrack_event(enum ip_conntrack_events event,
-				     struct nf_conn *ct) { return 0; }
-static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
-					    struct nf_conn *ct,
-					    u32 portid,
-					    int report) { return 0; }
-static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
-static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
-					     struct nf_conntrack_expect *exp,
- 					     u32 portid,
- 					     int report) {}
-
-static inline void nf_conntrack_ecache_pernet_init(struct net *net) {}
 
 static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
 {
@@ -197,14 +203,26 @@ static inline void nf_conntrack_ecache_fini(void)
 {
 }
 
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
 static inline void nf_conntrack_ecache_delayed_work(struct net *net)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	if (!delayed_work_pending(&net->ct.ecache_dwork)) {
+		schedule_delayed_work(&net->ct.ecache_dwork, HZ);
+		net->ct.ecache_dwork_pending = true;
+	}
+#endif
 }
 
 static inline void nf_conntrack_ecache_work(struct net *net)
 {
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	if (net->ct.ecache_dwork_pending) {
+		net->ct.ecache_dwork_pending = false;
+		mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
+	}
+#endif
 }
-#endif /* CONFIG_NF_CONNTRACK_EVENTS */
 
 #endif /*_NF_CONNTRACK_ECACHE_H*/
-
-- 
cgit v1.2.3


From 25d7cbcd2bb5d919b9ba6fcdfe788e72c2df7e6e Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:11 +0100
Subject: netfilter: replace defined(CONFIG...) || defined(CONFIG...MODULE)
 with IS_ENABLED(CONFIG...).

A few headers contain instances of:

  #if defined(CONFIG_XXX) or defined(CONFIG_XXX_MODULE)

Replace them with:

  #if IS_ENABLED(CONFIG_XXX)

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                      | 2 +-
 include/linux/netfilter/ipset/ip_set_getport.h | 2 +-
 include/net/netfilter/nf_conntrack_extend.h    | 2 +-
 include/net/netfilter/nf_nat.h                 | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 049aeb40fa35..754995d028e2 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -422,7 +422,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 }
 #endif /*CONFIG_NETFILTER*/
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <linux/netfilter/nf_conntrack_zones_common.h>
 
 extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu;
diff --git a/include/linux/netfilter/ipset/ip_set_getport.h b/include/linux/netfilter/ipset/ip_set_getport.h
index a906df06948b..d74cd112b88a 100644
--- a/include/linux/netfilter/ipset/ip_set_getport.h
+++ b/include/linux/netfilter/ipset/ip_set_getport.h
@@ -9,7 +9,7 @@
 extern bool ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
 				__be16 *port, u8 *proto);
 
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 extern bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
 				__be16 *port, u8 *proto);
 #else
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index 21f887c5058c..112a6f40dfaf 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -8,7 +8,7 @@
 
 enum nf_ct_ext_id {
 	NF_CT_EXT_HELPER,
-#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT)
 	NF_CT_EXT_NAT,
 #endif
 	NF_CT_EXT_SEQADJ,
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index eeb336809679..362ff94fa6b0 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -22,7 +22,7 @@ enum nf_nat_manip_type {
 /* per conntrack: nat application helper private data */
 union nf_conntrack_nat_help {
 	/* insert nat helper private data here */
-#if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT_PPTP)
 	struct nf_nat_pptp nat_pptp_info;
 #endif
 };
@@ -47,7 +47,7 @@ struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct);
 
 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 {
-#if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE)
+#if IS_ENABLED(CONFIG_NF_NAT)
 	return nf_ct_ext_find(ct, NF_CT_EXT_NAT);
 #else
 	return NULL;
-- 
cgit v1.2.3


From 22e81d7416d04355a7dfa248f187feba641c199e Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:12 +0100
Subject: netfilter: conntrack: wrap two inline functions in config checks.

nf_conntrack_synproxy.h contains three inline functions.  The contents
of two of them are wrapped in CONFIG_NETFILTER_SYNPROXY checks and just
return NULL if it is not enabled.  The third does nothing if they return
NULL, so wrap its contents as well.

nf_ct_timeout_data is only called if CONFIG_NETFILTER_TIMEOUT is
enabled.  Wrap its contents in a CONFIG_NETFILTER_TIMEOUT check like the
other inline functions in nf_conntrack_timeout.h.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_synproxy.h | 2 ++
 include/net/netfilter/nf_conntrack_timeout.h  | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index c22f0c11cc82..6a3ab081e4bf 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -32,6 +32,7 @@ static inline struct nf_conn_synproxy *nfct_synproxy_ext_add(struct nf_conn *ct)
 static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
 				      const struct nf_conn *tmpl)
 {
+#if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
 	if (tmpl && nfct_synproxy(tmpl)) {
 		if (!nfct_seqadj_ext_add(ct))
 			return false;
@@ -39,6 +40,7 @@ static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
 		if (!nfct_synproxy_ext_add(ct))
 			return false;
 	}
+#endif
 
 	return true;
 }
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index 00a8fbb2d735..6dd72396f534 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -32,6 +32,7 @@ struct nf_conn_timeout {
 static inline unsigned int *
 nf_ct_timeout_data(const struct nf_conn_timeout *t)
 {
+#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
 	struct nf_ct_timeout *timeout;
 
 	timeout = rcu_dereference(t->timeout);
@@ -39,6 +40,9 @@ nf_ct_timeout_data(const struct nf_conn_timeout *t)
 		return NULL;
 
 	return (unsigned int *)timeout->data;
+#else
+	return NULL;
+#endif
 }
 
 static inline
-- 
cgit v1.2.3


From f1815650b547db745bef35f74395e113fcf62cac Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:13 +0100
Subject: netfilter: br_netfilter: update stub br_nf_pre_routing_ipv6 parameter
 to `void *priv`.

The real br_nf_pre_routing_ipv6 function, defined when CONFIG_IPV6 is
enabled, expects `void *priv`, not `const struct nf_hook_ops *ops`.
Update the stub br_nf_pre_routing_ipv6, defined when CONFIG_IPV6 is
disabled, to match.

Fixes: 06198b34a3e0 ("netfilter: Pass priv instead of nf_hook_ops to netfilter hooks")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/br_netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index 2a613c84d49f..c53909fd22cd 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -68,7 +68,7 @@ static inline int br_validate_ipv6(struct net *net, struct sk_buff *skb)
 }
 
 static inline unsigned int
-br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb,
 		       const struct nf_hook_state *state)
 {
 	return NF_ACCEPT;
-- 
cgit v1.2.3


From 261db6c2fbd64a2e649fdfa5f75cf161c384d110 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:14 +0100
Subject: netfilter: conntrack: move code to linux/nf_conntrack_common.h.

Move some `struct nf_conntrack` code from linux/skbuff.h to
linux/nf_conntrack_common.h.  Together with a couple of helpers for
getting and setting skb->_nfct, it allows us to remove
CONFIG_NF_CONNTRACK checks from net/netfilter/nf_conntrack.h.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 20 +++++++++++++++++
 include/linux/skbuff.h                        | 32 +++++++++++++--------------
 include/net/netfilter/nf_conntrack.h          | 24 +++++---------------
 net/netfilter/nf_conntrack_standalone.c       |  1 -
 4 files changed, 40 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index e142b2b5f1ea..1db83c931d9c 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -2,6 +2,7 @@
 #ifndef _NF_CONNTRACK_COMMON_H
 #define _NF_CONNTRACK_COMMON_H
 
+#include <linux/atomic.h>
 #include <uapi/linux/netfilter/nf_conntrack_common.h>
 
 struct ip_conntrack_stat {
@@ -19,4 +20,23 @@ struct ip_conntrack_stat {
 	unsigned int search_restart;
 };
 
+#define NFCT_INFOMASK	7UL
+#define NFCT_PTRMASK	~(NFCT_INFOMASK)
+
+struct nf_conntrack {
+	atomic_t use;
+};
+
+void nf_conntrack_destroy(struct nf_conntrack *nfct);
+static inline void nf_conntrack_put(struct nf_conntrack *nfct)
+{
+	if (nfct && atomic_dec_and_test(&nfct->use))
+		nf_conntrack_destroy(nfct);
+}
+static inline void nf_conntrack_get(struct nf_conntrack *nfct)
+{
+	if (nfct)
+		atomic_inc(&nfct->use);
+}
+
 #endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 028e684fa974..907209c0794e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -37,6 +37,9 @@
 #include <linux/in6.h>
 #include <linux/if_packet.h>
 #include <net/flow.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <linux/netfilter/nf_conntrack_common.h>
+#endif
 
 /* The interface for checksum offload between the stack and networking drivers
  * is as follows...
@@ -244,12 +247,6 @@ struct bpf_prog;
 union bpf_attr;
 struct skb_ext;
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-struct nf_conntrack {
-	atomic_t use;
-};
-#endif
-
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
 	enum {
@@ -914,7 +911,6 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb)
 #define SKB_DST_NOREF	1UL
 #define SKB_DST_PTRMASK	~(SKB_DST_NOREF)
 
-#define SKB_NFCT_PTRMASK	~(7UL)
 /**
  * skb_dst - returns skb dst_entry
  * @skb: buffer
@@ -4040,25 +4036,27 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
 static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-	return (void *)(skb->_nfct & SKB_NFCT_PTRMASK);
+	return (void *)(skb->_nfct & NFCT_PTRMASK);
 #else
 	return NULL;
 #endif
 }
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-void nf_conntrack_destroy(struct nf_conntrack *nfct);
-static inline void nf_conntrack_put(struct nf_conntrack *nfct)
+static inline unsigned long skb_get_nfct(const struct sk_buff *skb)
 {
-	if (nfct && atomic_dec_and_test(&nfct->use))
-		nf_conntrack_destroy(nfct);
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	return skb->_nfct;
+#else
+	return 0UL;
+#endif
 }
-static inline void nf_conntrack_get(struct nf_conntrack *nfct)
+
+static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct)
 {
-	if (nfct)
-		atomic_inc(&nfct->use);
-}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+	skb->_nfct = nfct;
 #endif
+}
 
 #ifdef CONFIG_SKB_EXTENSIONS
 enum skb_ext_id {
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 22275f42f0bb..9f551f3b69c6 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -13,12 +13,10 @@
 #ifndef _NF_CONNTRACK_H
 #define _NF_CONNTRACK_H
 
-#include <linux/netfilter/nf_conntrack_common.h>
-
 #include <linux/bitops.h>
 #include <linux/compiler.h>
-#include <linux/atomic.h>
 
+#include <linux/netfilter/nf_conntrack_common.h>
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
 #include <linux/netfilter/nf_conntrack_sctp.h>
@@ -58,7 +56,6 @@ struct nf_conntrack_net {
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
 struct nf_conn {
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	/* Usage count in here is 1 for hash table, 1 per skb,
 	 * plus 1 for any connection(s) we are `master' for
 	 *
@@ -68,7 +65,6 @@ struct nf_conn {
 	 * beware nf_ct_get() is different and don't inc refcnt.
 	 */
 	struct nf_conntrack ct_general;
-#endif
 
 	spinlock_t	lock;
 	/* jiffies32 when this ct is considered dead */
@@ -149,18 +145,14 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
 int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 			     const struct nf_conn *ignored_conntrack);
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-
-#define NFCT_INFOMASK	7UL
-#define NFCT_PTRMASK	~(NFCT_INFOMASK)
-
 /* Return conntrack_info and tuple hash for given skb. */
 static inline struct nf_conn *
 nf_ct_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
 {
-	*ctinfo = skb->_nfct & NFCT_INFOMASK;
+	unsigned long nfct = skb_get_nfct(skb);
 
-	return (struct nf_conn *)(skb->_nfct & NFCT_PTRMASK);
+	*ctinfo = nfct & NFCT_INFOMASK;
+	return (struct nf_conn *)(nfct & NFCT_PTRMASK);
 }
 
 /* decrement reference count on a conntrack */
@@ -170,8 +162,6 @@ static inline void nf_ct_put(struct nf_conn *ct)
 	nf_conntrack_put(&ct->ct_general);
 }
 
-#endif
-
 /* Protocol module loading */
 int nf_ct_l3proto_try_module_get(unsigned short l3proto);
 void nf_ct_l3proto_module_put(unsigned short l3proto);
@@ -323,16 +313,12 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
 
 u32 nf_ct_get_id(const struct nf_conn *ct);
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-
 static inline void
 nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
 {
-	skb->_nfct = (unsigned long)ct | info;
+	skb_set_nfct(skb, (unsigned long)ct | info);
 }
 
-#endif
-
 #define NF_CT_STAT_INC(net, count)	  __this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
 #define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 88d4127df863..410809c669e1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -1167,7 +1167,6 @@ static int __init nf_conntrack_standalone_init(void)
 	if (ret < 0)
 		goto out_start;
 
-	BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK);
 	BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER);
 
 #ifdef CONFIG_SYSCTL
-- 
cgit v1.2.3


From 51a21be42ad8c2a343eb0d44813e38918b6a4df7 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:15 +0100
Subject: netfilter: conntrack: remove CONFIG_NF_CONNTRACK check from
 nf_conntrack_acct.h.

There is a superfluous `#if IS_ENABLED(CONFIG_NF_CONNTRACK)` check
wrapping some function declarations.  Remove it.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_acct.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 5b5287bb49db..f7a060c6eb28 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -65,11 +65,9 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
 #endif
 }
 
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 void nf_conntrack_acct_pernet_init(struct net *net);
 
 int nf_conntrack_acct_init(void);
 void nf_conntrack_acct_fini(void);
-#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
 
 #endif /* _NF_CONNTRACK_ACCT_H */
-- 
cgit v1.2.3


From f19438bdd4bfbfdaac441034c1aaecf02c116e68 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:16 +0100
Subject: netfilter: remove CONFIG_NETFILTER checks from headers.

`struct nf_hook_ops`, `struct nf_hook_state` and the `nf_hookfn`
function typedef appear in function and struct declarations and
definitions in a number of netfilter headers.  The structs and typedef
themselves are defined by linux/netfilter.h but only when
CONFIG_NETFILTER is enabled.  Define them unconditionally and add
forward declarations in order to remove CONFIG_NETFILTER conditionals
from the other headers.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h                    | 2 +-
 include/linux/netfilter/x_tables.h           | 6 ------
 include/linux/netfilter_arp/arp_tables.h     | 2 --
 include/linux/netfilter_bridge/ebtables.h    | 3 +--
 include/linux/netfilter_ipv4/ip_tables.h     | 7 +------
 include/linux/netfilter_ipv6/ip6_tables.h    | 5 +----
 include/net/netfilter/br_netfilter.h         | 2 --
 include/net/netfilter/nf_conntrack_bridge.h  | 4 ++--
 include/net/netfilter/nf_conntrack_core.h    | 5 ++---
 include/net/netfilter/nf_conntrack_l4proto.h | 2 --
 include/net/netfilter/nf_conntrack_tuple.h   | 2 --
 include/net/netfilter/nf_flow_table.h        | 4 ----
 include/net/netfilter/nf_nat.h               | 4 ----
 include/net/netfilter/nf_queue.h             | 4 ----
 include/net/netfilter/nf_synproxy.h          | 6 ++----
 include/net/netfilter/nf_tables.h            | 8 --------
 16 files changed, 10 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 754995d028e2..77ebb61faf48 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -15,7 +15,6 @@
 #include <linux/netdevice.h>
 #include <net/net_namespace.h>
 
-#ifdef CONFIG_NETFILTER
 static inline int NF_DROP_GETERR(int verdict)
 {
 	return -(verdict >> NF_VERDICT_QBITS);
@@ -118,6 +117,7 @@ struct nf_hook_entries {
 	 */
 };
 
+#ifdef CONFIG_NETFILTER
 static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e)
 {
 	unsigned int n = e->num_hook_entries;
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index b9bc25f57c8e..1b261c51b3a3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -35,15 +35,12 @@ struct xt_action_param {
 	union {
 		const void *matchinfo, *targinfo;
 	};
-#if IS_ENABLED(CONFIG_NETFILTER)
 	const struct nf_hook_state *state;
-#endif
 	int fragoff;
 	unsigned int thoff;
 	bool hotdrop;
 };
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 static inline struct net *xt_net(const struct xt_action_param *par)
 {
 	return par->state->net;
@@ -78,7 +75,6 @@ static inline u_int8_t xt_family(const struct xt_action_param *par)
 {
 	return par->state->pf;
 }
-#endif
 
 /**
  * struct xt_mtchk_param - parameters for match extensions'
@@ -450,9 +446,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 	return cnt;
 }
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
-#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 1b7b35bb9c27..e98028f00e47 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -49,7 +49,6 @@ struct arpt_error {
 }
 
 extern void *arpt_alloc_initial_table(const struct xt_table *);
-#if IS_ENABLED(CONFIG_NETFILTER)
 int arpt_register_table(struct net *net, const struct xt_table *table,
 			const struct arpt_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
@@ -58,7 +57,6 @@ void arpt_unregister_table(struct net *net, struct xt_table *table,
 extern unsigned int arpt_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
-#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index b5b2d371f0ef..162f59d0d17a 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -105,7 +105,7 @@ struct ebt_table {
 
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \
 		     ~(__alignof__(struct _xt_align)-1))
-#if IS_ENABLED(CONFIG_NETFILTER)
+
 extern int ebt_register_table(struct net *net,
 			      const struct ebt_table *table,
 			      const struct nf_hook_ops *ops,
@@ -115,7 +115,6 @@ extern void ebt_unregister_table(struct net *net, struct ebt_table *table,
 extern unsigned int ebt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct ebt_table *table);
-#endif
 
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 0b0d43ad9ed9..e9e1ed74cdf1 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -17,19 +17,16 @@
 
 #include <linux/if.h>
 #include <linux/in.h>
+#include <linux/init.h>
 #include <linux/ip.h>
 #include <linux/skbuff.h>
-
-#include <linux/init.h>
 #include <uapi/linux/netfilter_ipv4/ip_tables.h>
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int ipt_register_table(struct net *net, const struct xt_table *table,
 		       const struct ipt_replace *repl,
 		       const struct nf_hook_ops *ops, struct xt_table **res);
 void ipt_unregister_table(struct net *net, struct xt_table *table,
 			  const struct nf_hook_ops *ops);
-#endif
 
 /* Standard entry. */
 struct ipt_standard {
@@ -65,11 +62,9 @@ struct ipt_error {
 }
 
 extern void *ipt_alloc_initial_table(const struct xt_table *);
-#if IS_ENABLED(CONFIG_NETFILTER)
 extern unsigned int ipt_do_table(struct sk_buff *skb,
 				 const struct nf_hook_state *state,
 				 struct xt_table *table);
-#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 3a0a2bd054cc..78ab959c4575 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -17,15 +17,13 @@
 
 #include <linux/if.h>
 #include <linux/in6.h>
+#include <linux/init.h>
 #include <linux/ipv6.h>
 #include <linux/skbuff.h>
-
-#include <linux/init.h>
 #include <uapi/linux/netfilter_ipv6/ip6_tables.h>
 
 extern void *ip6t_alloc_initial_table(const struct xt_table *);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct ip6t_replace *repl,
 			const struct nf_hook_ops *ops, struct xt_table **res);
@@ -34,7 +32,6 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table,
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
 				  const struct nf_hook_state *state,
 				  struct xt_table *table);
-#endif
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index c53909fd22cd..371696ec11b2 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -55,7 +55,6 @@ static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 struct net_device *setup_pre_routing(struct sk_buff *skb,
 				     const struct net *net);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 #if IS_ENABLED(CONFIG_IPV6)
 int br_validate_ipv6(struct net *net, struct sk_buff *skb);
 unsigned int br_nf_pre_routing_ipv6(void *priv,
@@ -74,6 +73,5 @@ br_nf_pre_routing_ipv6(void *priv, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 #endif
-#endif
 
 #endif /* _BR_NETFILTER_H_ */
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
index 01b62fd5efa2..c564281ede5e 100644
--- a/include/net/netfilter/nf_conntrack_bridge.h
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -5,10 +5,10 @@
 #include <linux/types.h>
 #include <uapi/linux/if_ether.h>
 
+struct nf_hook_ops;
+
 struct nf_ct_bridge_info {
-#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops	*ops;
-#endif
 	unsigned int		ops_size;
 	struct module		*me;
 };
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index d340886e012d..09f2efea0b97 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -22,9 +22,8 @@
    standalone connection tracking module, and the compatibility layer's use
    of connection tracking. */
 
-#if IS_ENABLED(CONFIG_NETFILTER)
-unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state);
-#endif
+unsigned int nf_conntrack_in(struct sk_buff *skb,
+			     const struct nf_hook_state *state);
 
 int nf_conntrack_init_net(struct net *net);
 void nf_conntrack_cleanup_net(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 97240f1a3f5f..4cad1f0a327a 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -75,7 +75,6 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
 bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
 				      const struct nf_conntrack_tuple *orig);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
 			    unsigned int dataoff,
 			    const struct nf_hook_state *state,
@@ -132,7 +131,6 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
 			    unsigned int dataoff,
 			    enum ip_conntrack_info ctinfo,
 			    const struct nf_hook_state *state);
-#endif
 
 void nf_conntrack_generic_init_net(struct net *net);
 void nf_conntrack_tcp_init_net(struct net *net);
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 68ea9b932736..9334371c94e2 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -121,7 +121,6 @@ struct nf_conntrack_tuple_hash {
 	struct nf_conntrack_tuple tuple;
 };
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1,
 					   const struct nf_conntrack_tuple *t2)
 {
@@ -184,6 +183,5 @@ nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t,
 	return nf_ct_tuple_src_mask_cmp(t, tuple, mask) &&
 	       __nf_ct_tuple_dst_equal(t, tuple);
 }
-#endif
 
 #endif /* _NF_CONNTRACK_TUPLE_H */
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index d875be62cdf0..b37a7d608134 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -17,9 +17,7 @@ struct nf_flowtable_type {
 	int				family;
 	int				(*init)(struct nf_flowtable *ft);
 	void				(*free)(struct nf_flowtable *ft);
-#if IS_ENABLED(CONFIG_NETFILTER)
 	nf_hookfn			*hook;
-#endif
 	struct module			*owner;
 };
 
@@ -117,12 +115,10 @@ struct flow_ports {
 	__be16 source, dest;
 };
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
 				     const struct nf_hook_state *state);
 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
 				       const struct nf_hook_state *state);
-#endif
 
 #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 362ff94fa6b0..0d412dd63707 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -68,12 +68,10 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum,
 #endif
 }
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
 		       const struct nf_hook_ops *nat_ops, unsigned int ops_count);
 void nf_nat_unregister_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
 			  unsigned int ops_count);
-#endif
 
 unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
 			   unsigned int hooknum, struct sk_buff *skb);
@@ -93,7 +91,6 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct,
 				    enum ip_conntrack_info ctinfo,
 				    unsigned int hooknum, unsigned int hdrlen);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops);
 void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
 
@@ -106,7 +103,6 @@ void nf_nat_inet_unregister_fn(struct net *net, const struct nf_hook_ops *ops);
 unsigned int
 nf_nat_inet_fn(void *priv, struct sk_buff *skb,
 	       const struct nf_hook_state *state);
-#endif
 
 int nf_xfrm_me_harder(struct net *n, struct sk_buff *s, unsigned int family);
 
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 80edb46a1bbc..47088083667b 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -15,9 +15,7 @@ struct nf_queue_entry {
 	unsigned int		id;
 	unsigned int		hook_index;	/* index in hook_entries->hook[] */
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_state	state;
-#endif
 	u16			size; /* sizeof(entry) + saved route keys */
 
 	/* extra space to store route keys */
@@ -123,9 +121,7 @@ nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
 	return queue;
 }
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
 	     unsigned int index, unsigned int verdict);
-#endif
 
 #endif /* _NF_QUEUE_H */
diff --git a/include/net/netfilter/nf_synproxy.h b/include/net/netfilter/nf_synproxy.h
index 19d1af7a0348..a336f9434e73 100644
--- a/include/net/netfilter/nf_synproxy.h
+++ b/include/net/netfilter/nf_synproxy.h
@@ -58,10 +58,10 @@ bool synproxy_recv_client_ack(struct net *net,
 			      const struct tcphdr *th,
 			      struct synproxy_options *opts, u32 recv_seq);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
+struct nf_hook_state;
+
 unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb,
 				const struct nf_hook_state *nhs);
-#endif
 int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net);
 void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net);
 
@@ -75,10 +75,8 @@ bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb,
 				   const struct tcphdr *th,
 				   struct synproxy_options *opts, u32 recv_seq);
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb,
 				const struct nf_hook_state *nhs);
-#endif
 int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net);
 void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net);
 #else
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d9e66aa0139..2655e03dbe1b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -26,7 +26,6 @@ struct nft_pktinfo {
 	struct xt_action_param		xt;
 };
 
-#if IS_ENABLED(CONFIG_NETFILTER)
 static inline struct net *nft_net(const struct nft_pktinfo *pkt)
 {
 	return pkt->xt.state->net;
@@ -59,7 +58,6 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt,
 	pkt->skb = skb;
 	pkt->xt.state = state;
 }
-#endif
 
 static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
 					  struct sk_buff *skb)
@@ -947,11 +945,9 @@ struct nft_chain_type {
 	int				family;
 	struct module			*owner;
 	unsigned int			hook_mask;
-#if IS_ENABLED(CONFIG_NETFILTER)
 	nf_hookfn			*hooks[NF_MAX_HOOKS];
 	int				(*ops_register)(struct net *net, const struct nf_hook_ops *ops);
 	void				(*ops_unregister)(struct net *net, const struct nf_hook_ops *ops);
-#endif
 };
 
 int nft_chain_validate_dependency(const struct nft_chain *chain,
@@ -977,9 +973,7 @@ struct nft_stats {
  *	@flow_block: flow block (for hardware offload)
  */
 struct nft_base_chain {
-#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops		ops;
-#endif
 	const struct nft_chain_type	*type;
 	u8				policy;
 	u8				flags;
@@ -1179,9 +1173,7 @@ struct nft_flowtable {
 					use:30;
 	u64				handle;
 	/* runtime data below here */
-#if IS_ENABLED(CONFIG_NETFILTER)
 	struct nf_hook_ops		*ops ____cacheline_aligned;
-#endif
 	struct nf_flowtable		data;
 };
 
-- 
cgit v1.2.3


From 1f1475f38b6830f40daa5d2e5290b926bcb63981 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:17 +0100
Subject: netfilter: conntrack: remove CONFIG_NF_CONNTRACK checks from
 nf_conntrack_zones.h.

nf_conntrack_zones.h was wrapped in a CONFIG_NF_CONNTRACK check in order
to fix compilation failures:

  37ee3d5b3e97 ("netfilter: nf_defrag_ipv4: fix compilation error with NF_CONNTRACK=n")

Subsequent changes mean that these failures will no longer occur and the
check is unnecessary.  Remove it.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_zones.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_zones.h b/include/net/netfilter/nf_conntrack_zones.h
index 33b91d19cb7d..48dbadb96fb3 100644
--- a/include/net/netfilter/nf_conntrack_zones.h
+++ b/include/net/netfilter/nf_conntrack_zones.h
@@ -3,9 +3,6 @@
 #define _NF_CONNTRACK_ZONES_H
 
 #include <linux/netfilter/nf_conntrack_zones_common.h>
-
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-
 #include <net/netfilter/nf_conntrack.h>
 
 static inline const struct nf_conntrack_zone *
@@ -88,5 +85,5 @@ static inline bool nf_ct_zone_equal_any(const struct nf_conn *a,
 	return true;
 #endif
 }
-#endif /* IS_ENABLED(CONFIG_NF_CONNTRACK) */
+
 #endif /* _NF_CONNTRACK_ZONES_H */
-- 
cgit v1.2.3


From 0d32e7048d927418300b9f5415ca546e44621ef1 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Fri, 13 Sep 2019 09:13:18 +0100
Subject: netfilter: conntrack: remove two unused functions from
 nf_conntrack_timestamp.h.

Two inline functions defined in nf_conntrack_timestamp.h,
`nf_ct_tstamp_enabled` and `nf_ct_set_tstamp`, are not called anywhere.
Remove them.

Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_timestamp.h | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
index 2b8aeba649aa..820ea34b6029 100644
--- a/include/net/netfilter/nf_conntrack_timestamp.h
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -38,22 +38,6 @@ struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp)
 #endif
 };
 
-static inline bool nf_ct_tstamp_enabled(struct net *net)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-	return net->ct.sysctl_tstamp != 0;
-#else
-	return false;
-#endif
-}
-
-static inline void nf_ct_set_tstamp(struct net *net, bool enable)
-{
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-	net->ct.sysctl_tstamp = enable;
-#endif
-}
-
 #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
 void nf_conntrack_tstamp_pernet_init(struct net *net);
 
-- 
cgit v1.2.3


From b128a30409356df65f1a51cff3eb986cac8cfedc Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:21 -0400
Subject: padata: allocate workqueue internally

Move workqueue allocation inside of padata to prepare for further
changes to how padata uses workqueues.

Guarantees the workqueue is created with max_active=1, which padata
relies on to work correctly.  No functional change.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/padata.txt | 12 ++++++------
 crypto/pcrypt.c          | 13 ++-----------
 include/linux/padata.h   |  3 +--
 kernel/padata.c          | 24 +++++++++++++++---------
 4 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/Documentation/padata.txt b/Documentation/padata.txt
index b103d0c82000..b37ba1eaace3 100644
--- a/Documentation/padata.txt
+++ b/Documentation/padata.txt
@@ -16,10 +16,12 @@ overall control of how tasks are to be run::
 
     #include <linux/padata.h>
 
-    struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+    struct padata_instance *padata_alloc(const char *name,
 					 const struct cpumask *pcpumask,
 					 const struct cpumask *cbcpumask);
 
+'name' simply identifies the instance.
+
 The pcpumask describes which processors will be used to execute work
 submitted to this instance in parallel. The cbcpumask defines which
 processors are allowed to be used as the serialization callback processor.
@@ -128,8 +130,7 @@ in that CPU mask or about a not running instance.
 
 Each task submitted to padata_do_parallel() will, in turn, be passed to
 exactly one call to the above-mentioned parallel() function, on one CPU, so
-true parallelism is achieved by submitting multiple tasks.  Despite the
-fact that the workqueue is used to make these calls, parallel() is run with
+true parallelism is achieved by submitting multiple tasks.  parallel() runs with
 software interrupts disabled and thus cannot sleep.  The parallel()
 function gets the padata_priv structure pointer as its lone parameter;
 information about the actual work to be done is probably obtained by using
@@ -148,7 +149,7 @@ fact with a call to::
 At some point in the future, padata_do_serial() will trigger a call to the
 serial() function in the padata_priv structure.  That call will happen on
 the CPU requested in the initial call to padata_do_parallel(); it, too, is
-done through the workqueue, but with local software interrupts disabled.
+run with local software interrupts disabled.
 Note that this call may be deferred for a while since the padata code takes
 pains to ensure that tasks are completed in the order in which they were
 submitted.
@@ -159,5 +160,4 @@ when a padata instance is no longer needed::
     void padata_free(struct padata_instance *pinst);
 
 This function will busy-wait while any remaining tasks are completed, so it
-might be best not to call it while there is work outstanding.  Shutting
-down the workqueue, if necessary, should be done separately.
+might be best not to call it while there is work outstanding.
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 0edf5b54fc77..d67293063c7f 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -20,7 +20,6 @@
 
 struct padata_pcrypt {
 	struct padata_instance *pinst;
-	struct workqueue_struct *wq;
 
 	/*
 	 * Cpumask for callback CPUs. It should be
@@ -397,14 +396,9 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
 
 	get_online_cpus();
 
-	pcrypt->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
-				     1, name);
-	if (!pcrypt->wq)
-		goto err;
-
-	pcrypt->pinst = padata_alloc_possible(pcrypt->wq);
+	pcrypt->pinst = padata_alloc_possible(name);
 	if (!pcrypt->pinst)
-		goto err_destroy_workqueue;
+		goto err;
 
 	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
 	if (!mask)
@@ -437,8 +431,6 @@ err_free_cpumask:
 	kfree(mask);
 err_free_padata:
 	padata_free(pcrypt->pinst);
-err_destroy_workqueue:
-	destroy_workqueue(pcrypt->wq);
 err:
 	put_online_cpus();
 
@@ -452,7 +444,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
 
 	padata_stop(pcrypt->pinst);
 	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
-	destroy_workqueue(pcrypt->wq);
 	padata_free(pcrypt->pinst);
 }
 
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 8da673861d99..839d9319920a 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -151,8 +151,7 @@ struct padata_instance {
 #define	PADATA_INVALID	4
 };
 
-extern struct padata_instance *padata_alloc_possible(
-					struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc_possible(const char *name);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
 			      struct padata_priv *padata, int cb_cpu);
diff --git a/kernel/padata.c b/kernel/padata.c
index b60cc3dcee58..58728cd7f40c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -805,6 +805,7 @@ static void __padata_free(struct padata_instance *pinst)
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+	destroy_workqueue(pinst->wq);
 	kfree(pinst);
 }
 
@@ -938,13 +939,13 @@ static struct kobj_type padata_attr_type = {
  * padata_alloc - allocate and initialize a padata instance and specify
  *                cpumasks for serial and parallel workers.
  *
- * @wq: workqueue to use for the allocated padata instance
+ * @name: used to identify the instance
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
  *
  * Must be called from a cpus_read_lock() protected region
  */
-static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+static struct padata_instance *padata_alloc(const char *name,
 					    const struct cpumask *pcpumask,
 					    const struct cpumask *cbcpumask)
 {
@@ -955,11 +956,16 @@ static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 	if (!pinst)
 		goto err;
 
-	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+	pinst->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
+				    1, name);
+	if (!pinst->wq)
 		goto err_free_inst;
+
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_wq;
 	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
 		free_cpumask_var(pinst->cpumask.pcpu);
-		goto err_free_inst;
+		goto err_free_wq;
 	}
 	if (!padata_validate_cpumask(pinst, pcpumask) ||
 	    !padata_validate_cpumask(pinst, cbcpumask))
@@ -971,8 +977,6 @@ static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 
 	rcu_assign_pointer(pinst->pd, pd);
 
-	pinst->wq = wq;
-
 	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
@@ -990,6 +994,8 @@ static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 err_free_masks:
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+err_free_wq:
+	destroy_workqueue(pinst->wq);
 err_free_inst:
 	kfree(pinst);
 err:
@@ -1001,14 +1007,14 @@ err:
  *                         Use the cpu_possible_mask for serial and
  *                         parallel workers.
  *
- * @wq: workqueue to use for the allocated padata instance
+ * @name: used to identify the instance
  *
  * Must be called from a cpus_read_lock() protected region
  */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+struct padata_instance *padata_alloc_possible(const char *name)
 {
 	lockdep_assert_cpus_held();
-	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+	return padata_alloc(name, cpu_possible_mask, cpu_possible_mask);
 }
 EXPORT_SYMBOL(padata_alloc_possible);
 
-- 
cgit v1.2.3


From 513c98d08682957cc9eba20e7e4bb349970711f3 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:22 -0400
Subject: workqueue: unconfine alloc/apply/free_workqueue_attrs()

padata will use these these interfaces in a later patch, so unconfine them.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/workqueue.h | 4 ++++
 kernel/workqueue.c        | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b7c585b5ec1c..4261d1c6e87b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,6 +435,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
+struct workqueue_attrs *alloc_workqueue_attrs(void);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs);
 int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 601d61150b65..f53705ff3ff1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3329,7 +3329,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context);
  *
  * Undo alloc_workqueue_attrs().
  */
-static void free_workqueue_attrs(struct workqueue_attrs *attrs)
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
 {
 	if (attrs) {
 		free_cpumask_var(attrs->cpumask);
@@ -3345,7 +3345,7 @@ static void free_workqueue_attrs(struct workqueue_attrs *attrs)
  *
  * Return: The allocated new workqueue_attr on success. %NULL on failure.
  */
-static struct workqueue_attrs *alloc_workqueue_attrs(void)
+struct workqueue_attrs *alloc_workqueue_attrs(void)
 {
 	struct workqueue_attrs *attrs;
 
@@ -4032,7 +4032,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
  *
  * Return: 0 on success and -errno on failure.
  */
-static int apply_workqueue_attrs(struct workqueue_struct *wq,
+int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
 	int ret;
-- 
cgit v1.2.3


From e6ce0e0807e90d38a2cefa524ac253d7a85c3f2f Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:24 -0400
Subject: padata: make padata_do_parallel find alternate callback CPU

padata_do_parallel currently returns -EINVAL if the callback CPU isn't
in the callback cpumask.

pcrypt tries to prevent this situation by keeping its own callback
cpumask in sync with padata's and checks that the callback CPU it passes
to padata is valid.  Make padata handle this instead.

padata_do_parallel now takes a pointer to the callback CPU and updates
it for the caller if an alternate CPU is used.  Overall behavior in
terms of which callback CPUs are chosen stays the same.

Prepares for removal of the padata cpumask notifier in pcrypt, which
will fix a lockdep complaint about nested acquisition of the CPU hotplug
lock later in the series.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/pcrypt.c        | 33 ++-------------------------------
 include/linux/padata.h |  2 +-
 kernel/padata.c        | 27 ++++++++++++++++++++-------
 3 files changed, 23 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index d67293063c7f..efca962ab12a 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -57,35 +57,6 @@ struct pcrypt_aead_ctx {
 	unsigned int cb_cpu;
 };
 
-static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
-			      struct padata_pcrypt *pcrypt)
-{
-	unsigned int cpu_index, cpu, i;
-	struct pcrypt_cpumask *cpumask;
-
-	cpu = *cb_cpu;
-
-	rcu_read_lock_bh();
-	cpumask = rcu_dereference_bh(pcrypt->cb_cpumask);
-	if (cpumask_test_cpu(cpu, cpumask->mask))
-			goto out;
-
-	if (!cpumask_weight(cpumask->mask))
-			goto out;
-
-	cpu_index = cpu % cpumask_weight(cpumask->mask);
-
-	cpu = cpumask_first(cpumask->mask);
-	for (i = 0; i < cpu_index; i++)
-		cpu = cpumask_next(cpu, cpumask->mask);
-
-	*cb_cpu = cpu;
-
-out:
-	rcu_read_unlock_bh();
-	return padata_do_parallel(pcrypt->pinst, padata, cpu);
-}
-
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
 			      const u8 *key, unsigned int keylen)
 {
@@ -157,7 +128,7 @@ static int pcrypt_aead_encrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	err = padata_do_parallel(pencrypt.pinst, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -199,7 +170,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req)
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
+	err = padata_do_parallel(pdecrypt.pinst, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 839d9319920a..f7851f8e2190 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -154,7 +154,7 @@ struct padata_instance {
 extern struct padata_instance *padata_alloc_possible(const char *name);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
-			      struct padata_priv *padata, int cb_cpu);
+			      struct padata_priv *padata, int *cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
diff --git a/kernel/padata.c b/kernel/padata.c
index 58728cd7f40c..9a17922ec436 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -94,17 +94,19 @@ static void padata_parallel_worker(struct work_struct *parallel_work)
  *
  * @pinst: padata instance
  * @padata: object to be parallelized
- * @cb_cpu: cpu the serialization callback function will run on,
- *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
+ * @cb_cpu: pointer to the CPU that the serialization callback function should
+ *          run on.  If it's not in the serial cpumask of @pinst
+ *          (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
+ *          none found, returns -EINVAL.
  *
  * The parallelization callback function will run with BHs off.
  * Note: Every object which is parallelized by padata_do_parallel
  * must be seen by padata_do_serial.
  */
 int padata_do_parallel(struct padata_instance *pinst,
-		       struct padata_priv *padata, int cb_cpu)
+		       struct padata_priv *padata, int *cb_cpu)
 {
-	int target_cpu, err;
+	int i, cpu, cpu_index, target_cpu, err;
 	struct padata_parallel_queue *queue;
 	struct parallel_data *pd;
 
@@ -116,8 +118,19 @@ int padata_do_parallel(struct padata_instance *pinst,
 	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 		goto out;
 
-	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
-		goto out;
+	if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
+		if (!cpumask_weight(pd->cpumask.cbcpu))
+			goto out;
+
+		/* Select an alternate fallback CPU and notify the caller. */
+		cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
+
+		cpu = cpumask_first(pd->cpumask.cbcpu);
+		for (i = 0; i < cpu_index; i++)
+			cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
+
+		*cb_cpu = cpu;
+	}
 
 	err =  -EBUSY;
 	if ((pinst->flags & PADATA_RESET))
@@ -129,7 +142,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 	err = 0;
 	atomic_inc(&pd->refcnt);
 	padata->pd = pd;
-	padata->cb_cpu = cb_cpu;
+	padata->cb_cpu = *cb_cpu;
 
 	target_cpu = padata_cpu_hash(pd);
 	padata->cpu = target_cpu;
-- 
cgit v1.2.3


From 45d153c08bc73c8ced640dc20d8f2b749a6cb0d0 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:27 -0400
Subject: padata: use separate workqueues for parallel and serial work

padata currently uses one per-CPU workqueue per instance for all work.

Prepare for running parallel jobs on an unbound workqueue by introducing
dedicated workqueues for parallel and serial work.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |  6 ++++--
 kernel/padata.c        | 28 ++++++++++++++++++----------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index f7851f8e2190..e7978f8942ca 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -127,7 +127,8 @@ struct parallel_data {
  * struct padata_instance - The overall control structure.
  *
  * @cpu_notifier: cpu hotplug notifier.
- * @wq: The workqueue in use.
+ * @parallel_wq: The workqueue used for parallel work.
+ * @serial_wq: The workqueue used for serial work.
  * @pd: The internal control structure.
  * @cpumask: User supplied cpumasks for parallel and serial works.
  * @cpumask_change_notifier: Notifiers chain for user-defined notify
@@ -139,7 +140,8 @@ struct parallel_data {
  */
 struct padata_instance {
 	struct hlist_node		 node;
-	struct workqueue_struct		*wq;
+	struct workqueue_struct		*parallel_wq;
+	struct workqueue_struct		*serial_wq;
 	struct parallel_data		*pd;
 	struct padata_cpumask		cpumask;
 	struct blocking_notifier_head	 cpumask_change_notifier;
diff --git a/kernel/padata.c b/kernel/padata.c
index 8a362923c488..669f5d53d357 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -152,7 +152,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
 
-	queue_work_on(target_cpu, pinst->wq, &queue->work);
+	queue_work_on(target_cpu, pinst->parallel_wq, &queue->work);
 
 out:
 	rcu_read_unlock_bh();
@@ -261,7 +261,7 @@ static void padata_reorder(struct parallel_data *pd)
 		list_add_tail(&padata->list, &squeue->serial.list);
 		spin_unlock(&squeue->serial.lock);
 
-		queue_work_on(cb_cpu, pinst->wq, &squeue->work);
+		queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
 	}
 
 	spin_unlock_bh(&pd->lock);
@@ -278,7 +278,7 @@ static void padata_reorder(struct parallel_data *pd)
 
 	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
 	if (!list_empty(&next_queue->reorder.list))
-		queue_work(pinst->wq, &pd->reorder_work);
+		queue_work(pinst->serial_wq, &pd->reorder_work);
 }
 
 static void invoke_padata_reorder(struct work_struct *work)
@@ -818,7 +818,8 @@ static void __padata_free(struct padata_instance *pinst)
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
-	destroy_workqueue(pinst->wq);
+	destroy_workqueue(pinst->serial_wq);
+	destroy_workqueue(pinst->parallel_wq);
 	kfree(pinst);
 }
 
@@ -967,18 +968,23 @@ static struct padata_instance *padata_alloc(const char *name,
 	if (!pinst)
 		goto err;
 
-	pinst->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
-				    1, name);
-	if (!pinst->wq)
+	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_MEM_RECLAIM |
+					     WQ_CPU_INTENSIVE, 1, name);
+	if (!pinst->parallel_wq)
 		goto err_free_inst;
 
 	get_online_cpus();
 
-	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+	pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
+					   WQ_CPU_INTENSIVE, 1, name);
+	if (!pinst->serial_wq)
 		goto err_put_cpus;
+
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_serial_wq;
 	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
 		free_cpumask_var(pinst->cpumask.pcpu);
-		goto err_put_cpus;
+		goto err_free_serial_wq;
 	}
 	if (!padata_validate_cpumask(pinst, pcpumask) ||
 	    !padata_validate_cpumask(pinst, cbcpumask))
@@ -1010,9 +1016,11 @@ static struct padata_instance *padata_alloc(const char *name,
 err_free_masks:
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+err_free_serial_wq:
+	destroy_workqueue(pinst->serial_wq);
 err_put_cpus:
 	put_online_cpus();
-	destroy_workqueue(pinst->wq);
+	destroy_workqueue(pinst->parallel_wq);
 err_free_inst:
 	kfree(pinst);
 err:
-- 
cgit v1.2.3


From bfde23ce200e6d33291d29b9b8b60cc2f30f0805 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:28 -0400
Subject: padata: unbind parallel jobs from specific CPUs

Padata binds the parallel part of a job to a single CPU and round-robins
over all CPUs in the system for each successive job.  Though the serial
parts rely on per-CPU queues for correct ordering, they're not necessary
for parallel work, and it improves performance to run the job locally on
NUMA machines and let the scheduler pick the CPU within a node on a busy
system.

So, make the parallel workqueue unbound.

Update the parallel workqueue's cpumask when the instance's parallel
cpumask changes.

Now that parallel jobs no longer run on max_active=1 workqueues, two or
more parallel works that hash to the same CPU may run simultaneously,
finish out of order, and so be serialized out of order.  Prevent this by
keeping the works sorted on the reorder list by sequence number and
checking that in the reordering logic.

padata_get_next becomes padata_find_next so it can be reused for the end
of padata_reorder, where it's used to avoid uselessly queueing work when
the next job by sequence number isn't finished yet but a later job that
hashed to the same CPU has.

The ENODATA case in padata_find_next no longer makes sense because
parallel jobs aren't bound to specific CPUs.  The EINPROGRESS case takes
care of the scenario where a parallel job is potentially running on the
same CPU as padata_find_next, and with only one error code left, just
use NULL instead.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |   3 ++
 kernel/padata.c        | 118 +++++++++++++++++++++++++++----------------------
 2 files changed, 68 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index e7978f8942ca..43d3fd9d17fc 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -35,6 +35,7 @@ struct padata_priv {
 	struct parallel_data	*pd;
 	int			cb_cpu;
 	int			cpu;
+	unsigned int		seq_nr;
 	int			info;
 	void                    (*parallel)(struct padata_priv *padata);
 	void                    (*serial)(struct padata_priv *padata);
@@ -105,6 +106,7 @@ struct padata_cpumask {
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
+ * @processed: Number of already processed objects.
  * @cpu: Next CPU to be processed.
  * @cpumask: The cpumasks in use for parallel and serial workers.
  * @reorder_work: work struct for reordering.
@@ -117,6 +119,7 @@ struct parallel_data {
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
 	atomic_t			seq_nr;
+	unsigned int			processed;
 	int				cpu;
 	struct padata_cpumask		cpumask;
 	struct work_struct		reorder_work;
diff --git a/kernel/padata.c b/kernel/padata.c
index 669f5d53d357..832224dcf2e1 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,18 +46,13 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 	return target_cpu;
 }
 
-static int padata_cpu_hash(struct parallel_data *pd)
+static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
 {
-	unsigned int seq_nr;
-	int cpu_index;
-
 	/*
 	 * Hash the sequence numbers to the cpus by taking
 	 * seq_nr mod. number of cpus in use.
 	 */
-
-	seq_nr = atomic_inc_return(&pd->seq_nr);
-	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
+	int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
@@ -144,7 +139,8 @@ int padata_do_parallel(struct padata_instance *pinst,
 	padata->pd = pd;
 	padata->cb_cpu = *cb_cpu;
 
-	target_cpu = padata_cpu_hash(pd);
+	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
+	target_cpu = padata_cpu_hash(pd, padata->seq_nr);
 	padata->cpu = target_cpu;
 	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 
@@ -152,7 +148,7 @@ int padata_do_parallel(struct padata_instance *pinst,
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
 
-	queue_work_on(target_cpu, pinst->parallel_wq, &queue->work);
+	queue_work(pinst->parallel_wq, &queue->work);
 
 out:
 	rcu_read_unlock_bh();
@@ -162,21 +158,19 @@ out:
 EXPORT_SYMBOL(padata_do_parallel);
 
 /*
- * padata_get_next - Get the next object that needs serialization.
+ * padata_find_next - Find the next object that needs serialization.
  *
  * Return values are:
  *
  * A pointer to the control struct of the next object that needs
  * serialization, if present in one of the percpu reorder queues.
  *
- * -EINPROGRESS, if the next object that needs serialization will
+ * NULL, if the next object that needs serialization will
  *  be parallel processed by another cpu and is not yet present in
  *  the cpu's reorder queue.
- *
- * -ENODATA, if this cpu has to do the parallel processing for
- *  the next object.
  */
-static struct padata_priv *padata_get_next(struct parallel_data *pd)
+static struct padata_priv *padata_find_next(struct parallel_data *pd,
+					    bool remove_object)
 {
 	struct padata_parallel_queue *next_queue;
 	struct padata_priv *padata;
@@ -187,28 +181,30 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
 	reorder = &next_queue->reorder;
 
 	spin_lock(&reorder->lock);
-	if (!list_empty(&reorder->list)) {
-		padata = list_entry(reorder->list.next,
-				    struct padata_priv, list);
-
-		list_del_init(&padata->list);
-		atomic_dec(&pd->reorder_objects);
+	if (list_empty(&reorder->list)) {
+		spin_unlock(&reorder->lock);
+		return NULL;
+	}
 
-		pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
-					    false);
+	padata = list_entry(reorder->list.next, struct padata_priv, list);
 
+	/*
+	 * Checks the rare case where two or more parallel jobs have hashed to
+	 * the same CPU and one of the later ones finishes first.
+	 */
+	if (padata->seq_nr != pd->processed) {
 		spin_unlock(&reorder->lock);
-		goto out;
+		return NULL;
 	}
-	spin_unlock(&reorder->lock);
 
-	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
-		padata = ERR_PTR(-ENODATA);
-		goto out;
+	if (remove_object) {
+		list_del_init(&padata->list);
+		atomic_dec(&pd->reorder_objects);
+		++pd->processed;
+		pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
 	}
 
-	padata = ERR_PTR(-EINPROGRESS);
-out:
+	spin_unlock(&reorder->lock);
 	return padata;
 }
 
@@ -234,26 +230,16 @@ static void padata_reorder(struct parallel_data *pd)
 		return;
 
 	while (1) {
-		padata = padata_get_next(pd);
+		padata = padata_find_next(pd, true);
 
 		/*
 		 * If the next object that needs serialization is parallel
 		 * processed by another cpu and is still on it's way to the
 		 * cpu's reorder queue, nothing to do for now.
 		 */
-		if (PTR_ERR(padata) == -EINPROGRESS)
+		if (!padata)
 			break;
 
-		/*
-		 * This cpu has to do the parallel processing of the next
-		 * object. It's waiting in the cpu's parallelization queue,
-		 * so exit immediately.
-		 */
-		if (PTR_ERR(padata) == -ENODATA) {
-			spin_unlock_bh(&pd->lock);
-			return;
-		}
-
 		cb_cpu = padata->cb_cpu;
 		squeue = per_cpu_ptr(pd->squeue, cb_cpu);
 
@@ -277,7 +263,8 @@ static void padata_reorder(struct parallel_data *pd)
 	smp_mb();
 
 	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
-	if (!list_empty(&next_queue->reorder.list))
+	if (!list_empty(&next_queue->reorder.list) &&
+	    padata_find_next(pd, false))
 		queue_work(pinst->serial_wq, &pd->reorder_work);
 }
 
@@ -332,9 +319,14 @@ void padata_do_serial(struct padata_priv *padata)
 	struct parallel_data *pd = padata->pd;
 	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
 							   padata->cpu);
+	struct padata_priv *cur;
 
 	spin_lock(&pqueue->reorder.lock);
-	list_add_tail(&padata->list, &pqueue->reorder.list);
+	/* Sort in ascending order of sequence number. */
+	list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
+		if (cur->seq_nr < padata->seq_nr)
+			break;
+	list_add(&padata->list, &cur->list);
 	atomic_inc(&pd->reorder_objects);
 	spin_unlock(&pqueue->reorder.lock);
 
@@ -353,17 +345,36 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
 				 const struct cpumask *pcpumask,
 				 const struct cpumask *cbcpumask)
 {
-	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
-		return -ENOMEM;
+	struct workqueue_attrs *attrs;
+	int err = -ENOMEM;
 
+	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+		goto out;
 	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
-	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
-		free_cpumask_var(pd->cpumask.pcpu);
-		return -ENOMEM;
-	}
 
+	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
+		goto free_pcpu_mask;
 	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
+
+	attrs = alloc_workqueue_attrs();
+	if (!attrs)
+		goto free_cbcpu_mask;
+
+	/* Restrict parallel_wq workers to pd->cpumask.pcpu. */
+	cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
+	err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
+	free_workqueue_attrs(attrs);
+	if (err < 0)
+		goto free_cbcpu_mask;
+
 	return 0;
+
+free_cbcpu_mask:
+	free_cpumask_var(pd->cpumask.cbcpu);
+free_pcpu_mask:
+	free_cpumask_var(pd->cpumask.pcpu);
+out:
+	return err;
 }
 
 static void __padata_list_init(struct padata_list *pd_list)
@@ -429,6 +440,8 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	pd->squeue = alloc_percpu(struct padata_serial_queue);
 	if (!pd->squeue)
 		goto err_free_pqueue;
+
+	pd->pinst = pinst;
 	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
 		goto err_free_squeue;
 
@@ -437,7 +450,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
-	pd->pinst = pinst;
 	spin_lock_init(&pd->lock);
 	pd->cpu = cpumask_first(pd->cpumask.pcpu);
 	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -968,8 +980,8 @@ static struct padata_instance *padata_alloc(const char *name,
 	if (!pinst)
 		goto err;
 
-	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_MEM_RECLAIM |
-					     WQ_CPU_INTENSIVE, 1, name);
+	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
+					     name);
 	if (!pinst->parallel_wq)
 		goto err_free_inst;
 
-- 
cgit v1.2.3


From c51636a3065491af521187724d14a822548bcfd7 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Thu, 5 Sep 2019 21:40:29 -0400
Subject: padata: remove cpu_index from the parallel_queue

With the removal of the ENODATA case from padata_get_next, the cpu_index
field is no longer useful, so it can go away.

Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: linux-crypto@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/padata.h |  2 --
 kernel/padata.c        | 13 ++-----------
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/padata.h b/include/linux/padata.h
index 43d3fd9d17fc..23717eeaad23 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -75,14 +75,12 @@ struct padata_serial_queue {
  * @swork: work struct for serialization.
  * @work: work struct for parallelization.
  * @num_obj: Number of objects that are processed by this cpu.
- * @cpu_index: Index of the cpu.
  */
 struct padata_parallel_queue {
        struct padata_list    parallel;
        struct padata_list    reorder;
        struct work_struct    work;
        atomic_t              num_obj;
-       int                   cpu_index;
 };
 
 /**
diff --git a/kernel/padata.c b/kernel/padata.c
index 832224dcf2e1..c3fec1413295 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -400,21 +400,12 @@ static void padata_init_squeues(struct parallel_data *pd)
 /* Initialize all percpu queues used by parallel workers */
 static void padata_init_pqueues(struct parallel_data *pd)
 {
-	int cpu_index, cpu;
+	int cpu;
 	struct padata_parallel_queue *pqueue;
 
-	cpu_index = 0;
-	for_each_possible_cpu(cpu) {
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
 		pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
-		if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
-			pqueue->cpu_index = -1;
-			continue;
-		}
-
-		pqueue->cpu_index = cpu_index;
-		cpu_index++;
-
 		__padata_list_init(&pqueue->reorder);
 		__padata_list_init(&pqueue->parallel);
 		INIT_WORK(&pqueue->work, padata_parallel_worker);
-- 
cgit v1.2.3


From 415606588c61230b7b4f0118fc2d64a0c1c4d102 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Wed, 11 Sep 2019 09:16:21 +0300
Subject: PTP: introduce new versions of IOCTLs

The current version of the IOCTL have a small problem which prevents us
from extending the API by making use of reserved fields. In these new
IOCTLs, we are now making sure that flags and rsv fields are zero which
will allow us to extend the API in the future.

Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c      | 63 ++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/ptp_clock.h | 24 +++++++++++++++-
 2 files changed, 86 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 18ffe449efdf..9c18476d8d10 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -126,7 +126,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 	switch (cmd) {
 
 	case PTP_CLOCK_GETCAPS:
+	case PTP_CLOCK_GETCAPS2:
 		memset(&caps, 0, sizeof(caps));
+
 		caps.max_adj = ptp->info->max_adj;
 		caps.n_alarm = ptp->info->n_alarm;
 		caps.n_ext_ts = ptp->info->n_ext_ts;
@@ -139,11 +141,24 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_EXTTS_REQUEST:
+	case PTP_EXTTS_REQUEST2:
+		memset(&req, 0, sizeof(req));
+
 		if (copy_from_user(&req.extts, (void __user *)arg,
 				   sizeof(req.extts))) {
 			err = -EFAULT;
 			break;
 		}
+		if (((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
+			req.extts.rsv[0] || req.extts.rsv[1]) &&
+			cmd == PTP_EXTTS_REQUEST2) {
+			err = -EINVAL;
+			break;
+		} else if (cmd == PTP_EXTTS_REQUEST) {
+			req.extts.flags &= ~PTP_EXTTS_VALID_FLAGS;
+			req.extts.rsv[0] = 0;
+			req.extts.rsv[1] = 0;
+		}
 		if (req.extts.index >= ops->n_ext_ts) {
 			err = -EINVAL;
 			break;
@@ -154,11 +169,27 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_PEROUT_REQUEST:
+	case PTP_PEROUT_REQUEST2:
+		memset(&req, 0, sizeof(req));
+
 		if (copy_from_user(&req.perout, (void __user *)arg,
 				   sizeof(req.perout))) {
 			err = -EFAULT;
 			break;
 		}
+		if (((req.perout.flags & ~PTP_PEROUT_VALID_FLAGS) ||
+			req.perout.rsv[0] || req.perout.rsv[1] ||
+			req.perout.rsv[2] || req.perout.rsv[3]) &&
+			cmd == PTP_PEROUT_REQUEST2) {
+			err = -EINVAL;
+			break;
+		} else if (cmd == PTP_PEROUT_REQUEST) {
+			req.perout.flags &= ~PTP_PEROUT_VALID_FLAGS;
+			req.perout.rsv[0] = 0;
+			req.perout.rsv[1] = 0;
+			req.perout.rsv[2] = 0;
+			req.perout.rsv[3] = 0;
+		}
 		if (req.perout.index >= ops->n_per_out) {
 			err = -EINVAL;
 			break;
@@ -169,6 +200,9 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_ENABLE_PPS:
+	case PTP_ENABLE_PPS2:
+		memset(&req, 0, sizeof(req));
+
 		if (!capable(CAP_SYS_TIME))
 			return -EPERM;
 		req.type = PTP_CLK_REQ_PPS;
@@ -177,6 +211,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_SYS_OFFSET_PRECISE:
+	case PTP_SYS_OFFSET_PRECISE2:
 		if (!ptp->info->getcrosststamp) {
 			err = -EOPNOTSUPP;
 			break;
@@ -201,6 +236,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_SYS_OFFSET_EXTENDED:
+	case PTP_SYS_OFFSET_EXTENDED2:
 		if (!ptp->info->gettimex64) {
 			err = -EOPNOTSUPP;
 			break;
@@ -232,6 +268,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_SYS_OFFSET:
+	case PTP_SYS_OFFSET2:
 		sysoff = memdup_user((void __user *)arg, sizeof(*sysoff));
 		if (IS_ERR(sysoff)) {
 			err = PTR_ERR(sysoff);
@@ -266,10 +303,23 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_PIN_GETFUNC:
+	case PTP_PIN_GETFUNC2:
 		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
 			err = -EFAULT;
 			break;
 		}
+		if ((pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
+				|| pd.rsv[3] || pd.rsv[4])
+			&& cmd == PTP_PIN_GETFUNC2) {
+			err = -EINVAL;
+			break;
+		} else if (cmd == PTP_PIN_GETFUNC) {
+			pd.rsv[0] = 0;
+			pd.rsv[1] = 0;
+			pd.rsv[2] = 0;
+			pd.rsv[3] = 0;
+			pd.rsv[4] = 0;
+		}
 		pin_index = pd.index;
 		if (pin_index >= ops->n_pins) {
 			err = -EINVAL;
@@ -285,10 +335,23 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		break;
 
 	case PTP_PIN_SETFUNC:
+	case PTP_PIN_SETFUNC2:
 		if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) {
 			err = -EFAULT;
 			break;
 		}
+		if ((pd.rsv[0] || pd.rsv[1] || pd.rsv[2]
+				|| pd.rsv[3] || pd.rsv[4])
+			&& cmd == PTP_PIN_SETFUNC2) {
+			err = -EINVAL;
+			break;
+		} else if (cmd == PTP_PIN_SETFUNC) {
+			pd.rsv[0] = 0;
+			pd.rsv[1] = 0;
+			pd.rsv[2] = 0;
+			pd.rsv[3] = 0;
+			pd.rsv[4] = 0;
+		}
 		pin_index = pd.index;
 		if (pin_index >= ops->n_pins) {
 			err = -EINVAL;
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 1bc794ad957a..9a0af3511b68 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -25,10 +25,20 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-/* PTP_xxx bits, for the flags field within the request structures. */
+/*
+ * Bits of the ptp_extts_request.flags field:
+ */
 #define PTP_ENABLE_FEATURE (1<<0)
 #define PTP_RISING_EDGE    (1<<1)
 #define PTP_FALLING_EDGE   (1<<2)
+#define PTP_EXTTS_VALID_FLAGS	(PTP_ENABLE_FEATURE |	\
+				 PTP_RISING_EDGE |	\
+				 PTP_FALLING_EDGE)
+
+/*
+ * Bits of the ptp_perout_request.flags field:
+ */
+#define PTP_PEROUT_VALID_FLAGS (0)
 
 /*
  * struct ptp_clock_time - represents a time value
@@ -149,6 +159,18 @@ struct ptp_pin_desc {
 #define PTP_SYS_OFFSET_EXTENDED \
 	_IOWR(PTP_CLK_MAGIC, 9, struct ptp_sys_offset_extended)
 
+#define PTP_CLOCK_GETCAPS2  _IOR(PTP_CLK_MAGIC, 10, struct ptp_clock_caps)
+#define PTP_EXTTS_REQUEST2  _IOW(PTP_CLK_MAGIC, 11, struct ptp_extts_request)
+#define PTP_PEROUT_REQUEST2 _IOW(PTP_CLK_MAGIC, 12, struct ptp_perout_request)
+#define PTP_ENABLE_PPS2     _IOW(PTP_CLK_MAGIC, 13, int)
+#define PTP_SYS_OFFSET2     _IOW(PTP_CLK_MAGIC, 14, struct ptp_sys_offset)
+#define PTP_PIN_GETFUNC2    _IOWR(PTP_CLK_MAGIC, 15, struct ptp_pin_desc)
+#define PTP_PIN_SETFUNC2    _IOW(PTP_CLK_MAGIC, 16, struct ptp_pin_desc)
+#define PTP_SYS_OFFSET_PRECISE2 \
+	_IOWR(PTP_CLK_MAGIC, 17, struct ptp_sys_offset_precise)
+#define PTP_SYS_OFFSET_EXTENDED2 \
+	_IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended)
+
 struct ptp_extts_event {
 	struct ptp_clock_time t; /* Time event occured. */
 	unsigned int index;      /* Which channel produced the event. */
-- 
cgit v1.2.3


From 823eb2a3c4c7f1b3e749f0dddb70bf8b09a76a10 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Wed, 11 Sep 2019 09:16:22 +0300
Subject: PTP: add support for one-shot output

Some controllers allow for a one-shot output pulse, in contrast to
periodic output. Now that we have extensible versions of our IOCTLs, we
can finally make use of the 'flags' field to pass a bit telling driver
that if we want one-shot pulse output.

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ptp_clock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 9a0af3511b68..f16301015949 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -38,8 +38,8 @@
 /*
  * Bits of the ptp_perout_request.flags field:
  */
-#define PTP_PEROUT_VALID_FLAGS (0)
-
+#define PTP_PEROUT_ONE_SHOT (1<<0)
+#define PTP_PEROUT_VALID_FLAGS	(PTP_PEROUT_ONE_SHOT)
 /*
  * struct ptp_clock_time - represents a time value
  *
@@ -77,7 +77,7 @@ struct ptp_perout_request {
 	struct ptp_clock_time start;  /* Absolute start time. */
 	struct ptp_clock_time period; /* Desired period, zero means disable. */
 	unsigned int index;           /* Which channel to configure. */
-	unsigned int flags;           /* Reserved for future use. */
+	unsigned int flags;
 	unsigned int rsv[4];          /* Reserved for future use. */
 };
 
-- 
cgit v1.2.3


From c6af0c227a22bb6bb8ff72f043e0fb6d99fd6515 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Wed, 11 Sep 2019 15:50:51 -0400
Subject: ip: support SO_MARK cmsg

Enable setting skb->mark for UDP and RAW sockets using cmsg.

This is analogous to existing support for TOS, TTL, txtime, etc.

Packet sockets already support this as of commit c7d39e32632e
("packet: support per-packet fwmark for af_packet sendmsg").

Similar to other fields, implement by
1. initialize the sockcm_cookie.mark from socket option sk_mark
2. optionally overwrite this in ip_cmsg_send/ip6_datagram_send_ctl
3. initialize inet_cork.mark from sockcm_cookie.mark
4. initialize each (usually just one) skb->mark from inet_cork.mark

Step 1 is handled in one location for most protocols by ipcm_init_sk
as of commit 351782067b6b ("ipv4: ipcm_cookie initializers").

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 include/net/ip.h        | 1 +
 net/ipv4/ip_output.c    | 3 ++-
 net/ipv4/ping.c         | 2 +-
 net/ipv4/raw.c          | 4 ++--
 net/ipv4/udp.c          | 2 +-
 net/ipv6/ip6_output.c   | 3 ++-
 net/ipv6/raw.c          | 4 +++-
 net/ipv6/udp.c          | 3 ++-
 9 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 7769c9b36d75..34c4436fd18f 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -160,6 +160,7 @@ struct inet_cork {
 	char			priority;
 	__u16			gso_size;
 	u64			transmit_time;
+	u32			mark;
 };
 
 struct inet_cork_full {
diff --git a/include/net/ip.h b/include/net/ip.h
index 29d89de39822..95bb77f95bcc 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -88,6 +88,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 {
 	ipcm_init(ipcm);
 
+	ipcm->sockc.mark = inet->sk.sk_mark;
 	ipcm->sockc.tsflags = inet->sk.sk_tsflags;
 	ipcm->oif = inet->sk.sk_bound_dev_if;
 	ipcm->addr = inet->inet_saddr;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cc7ef0d05bbd..5eb73775c3f7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1266,6 +1266,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
 	cork->length = 0;
 	cork->ttl = ipc->ttl;
 	cork->tos = ipc->tos;
+	cork->mark = ipc->sockc.mark;
 	cork->priority = ipc->priority;
 	cork->transmit_time = ipc->sockc.transmit_time;
 	cork->tx_flags = 0;
@@ -1529,7 +1530,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	}
 
 	skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = cork->mark;
 	skb->tstamp = cork->transmit_time;
 	/*
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9d24ef5c5d8f..535427292194 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	} else if (!ipc.oif)
 		ipc.oif = inet->uc_index;
 
-	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+	flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
 			   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 			   inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
 			   sk->sk_uid);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 40a6abbc9cf6..80da5a66d5d7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	skb_reserve(skb, hlen);
 
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = sockc->mark;
 	skb->tstamp = sockc->transmit_time;
 	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
@@ -623,7 +623,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		}
 	}
 
-	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
+	flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
 			   RT_SCOPE_UNIVERSE,
 			   hdrincl ? IPPROTO_RAW : sk->sk_protocol,
 			   inet_sk_flowi_flags(sk) |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d88821c794fb..fbcd9be3a470 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1130,7 +1130,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 		fl4 = &fl4_stack;
 
-		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
+		flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
 				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 				   flow_flags,
 				   faddr, saddr, dport, inet->inet_sport,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e49fd62eea9..89a4c7c2e25d 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1294,6 +1294,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 	cork->base.fragsize = mtu;
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
+	cork->base.mark = ipc6->sockc.mark;
 	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
 
 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
@@ -1764,7 +1765,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 	hdr->daddr = *final_dst;
 
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = cork->base.mark;
 
 	skb->tstamp = cork->base.transmit_time;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8a6131991e38..6e1888ee4036 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -646,7 +646,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->priority = sk->sk_priority;
-	skb->mark = sk->sk_mark;
+	skb->mark = sockc->mark;
 	skb->tstamp = sockc->transmit_time;
 
 	skb_put(skb, length);
@@ -810,6 +810,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 	ipcm6_init(&ipc6);
 	ipc6.sockc.tsflags = sk->sk_tsflags;
+	ipc6.sockc.mark = sk->sk_mark;
 
 	if (sin6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -891,6 +892,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	opt = ipv6_fixup_options(&opt_space, opt);
 
 	fl6.flowi6_proto = proto;
+	fl6.flowi6_mark = ipc6.sockc.mark;
 
 	if (!hdrincl) {
 		rfv.msg = msg;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..2c8beb3896d1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1230,6 +1230,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	ipcm6_init(&ipc6);
 	ipc6.gso_size = up->gso_size;
 	ipc6.sockc.tsflags = sk->sk_tsflags;
+	ipc6.sockc.mark = sk->sk_mark;
 
 	/* destination address check */
 	if (sin6) {
@@ -1352,7 +1353,7 @@ do_udp_sendmsg:
 	if (!fl6.flowi6_oif)
 		fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
 
-	fl6.flowi6_mark = sk->sk_mark;
+	fl6.flowi6_mark = ipc6.sockc.mark;
 	fl6.flowi6_uid = sk->sk_uid;
 
 	if (msg->msg_controllen) {
-- 
cgit v1.2.3


From 7199435414868bd656284349edc1a1f528fe3662 Mon Sep 17 00:00:00 2001
From: Kaike Wan <kaike.wan@intel.com>
Date: Wed, 11 Sep 2019 07:30:47 -0400
Subject: IB/{rdmavt, hfi1, qib}: Add a counter for credit waits

This patch adds a counter for credit waits to assist field debugging.

Link: https://lore.kernel.org/r/20190911113047.126040.10857.stgit@awfm-01.aw.intel.com
Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Kaike Wan <kaike.wan@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/hfi1/chip.c     |  2 ++
 drivers/infiniband/hw/hfi1/chip.h     |  1 +
 drivers/infiniband/hw/hfi1/rc.c       | 10 ++--------
 drivers/infiniband/hw/qib/qib_rc.c    | 10 ++--------
 drivers/infiniband/hw/qib/qib_sysfs.c |  2 ++
 include/rdma/rdma_vt.h                |  1 +
 include/rdma/rdmavt_qp.h              | 35 +++++++++++++++++++++++++++++++++++
 7 files changed, 45 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 67052dc3100c..9b1fb84a3d45 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -4101,6 +4101,7 @@ def_access_ibp_counter(rc_dupreq);
 def_access_ibp_counter(rdma_seq);
 def_access_ibp_counter(unaligned);
 def_access_ibp_counter(seq_naks);
+def_access_ibp_counter(rc_crwaits);
 
 static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = {
 [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH),
@@ -5119,6 +5120,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = {
 [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq),
 [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned),
 [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks),
+[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits),
 [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL,
 			       access_sw_cpu_rc_acks),
 [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b76cf81f927f..4ca5ac8d7e9e 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1245,6 +1245,7 @@ enum {
 	C_SW_IBP_RDMA_SEQ,
 	C_SW_IBP_UNALIGNED,
 	C_SW_IBP_SEQ_NAK,
+	C_SW_IBP_RC_CRWAITS,
 	C_SW_CPU_RC_ACKS,
 	C_SW_CPU_RC_QACKS,
 	C_SW_CPU_RC_DELAYED_COMP,
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index eeca08d3b470..513a8aac9ccd 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -595,11 +595,8 @@ check_s_state:
 		case IB_WR_SEND_WITH_IMM:
 		case IB_WR_SEND_WITH_INV:
 			/* If no credit, return. */
-			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-			    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+			if (!rvt_rc_credit_avail(qp, wqe))
 				goto bail;
-			}
 			if (len > pmtu) {
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
@@ -632,11 +629,8 @@ check_s_state:
 			goto no_flow_control;
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-			    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+			if (!rvt_rc_credit_avail(qp, wqe))
 				goto bail;
-			}
 no_flow_control:
 			put_ib_reth_vaddr(
 				wqe->rdma_wr.remote_addr,
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 1d5e2d4ee257..aaf7438258fa 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -313,11 +313,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-			    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+			if (!rvt_rc_credit_avail(qp, wqe))
 				goto bail;
-			}
 			if (len > pmtu) {
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
@@ -344,11 +341,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
 			goto no_flow_control;
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
-			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
-			    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+			if (!rvt_rc_credit_avail(qp, wqe))
 				goto bail;
-			}
 no_flow_control:
 			ohdr->u.rc.reth.vaddr =
 				cpu_to_be64(wqe->rdma_wr.remote_addr);
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 905206a0c2d5..3926be78036e 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -436,6 +436,7 @@ QIB_DIAGC_ATTR(dmawait);
 QIB_DIAGC_ATTR(unaligned);
 QIB_DIAGC_ATTR(rc_dupreq);
 QIB_DIAGC_ATTR(rc_seqnak);
+QIB_DIAGC_ATTR(rc_crwaits);
 
 static struct attribute *diagc_default_attributes[] = {
 	&qib_diagc_attr_rc_resends.attr,
@@ -453,6 +454,7 @@ static struct attribute *diagc_default_attributes[] = {
 	&qib_diagc_attr_unaligned.attr,
 	&qib_diagc_attr_rc_dupreq.attr,
 	&qib_diagc_attr_rc_seqnak.attr,
+	&qib_diagc_attr_rc_crwaits.attr,
 	NULL
 };
 
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 525848e227dc..ac5a9430abb6 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -116,6 +116,7 @@ struct rvt_ibport {
 	u64 n_unaligned;
 	u64 n_rc_dupreq;
 	u64 n_rc_seqnak;
+	u64 n_rc_crwaits;
 	u16 pkey_violations;
 	u16 qkey_violations;
 	u16 mkey_violations;
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index e06c77d76463..b550ae89bf85 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -973,6 +973,41 @@ static inline void rvt_free_rq(struct rvt_rq *rq)
 	rq->wq = NULL;
 }
 
+/**
+ * rvt_to_iport - Get the ibport pointer
+ * @qp: the qp pointer
+ *
+ * This function returns the ibport pointer from the qp pointer.
+ */
+static inline struct rvt_ibport *rvt_to_iport(struct rvt_qp *qp)
+{
+	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+	return rdi->ports[qp->port_num - 1];
+}
+
+/**
+ * rvt_rc_credit_avail - Check if there are enough RC credits for the request
+ * @qp: the qp
+ * @wqe: the request
+ *
+ * This function returns false when there are not enough credits for the given
+ * request and true otherwise.
+ */
+static inline bool rvt_rc_credit_avail(struct rvt_qp *qp, struct rvt_swqe *wqe)
+{
+	lockdep_assert_held(&qp->s_lock);
+	if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
+	    rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+		struct rvt_ibport *rvp = rvt_to_iport(qp);
+
+		qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
+		rvp->n_rc_crwaits++;
+		return false;
+	}
+	return true;
+}
+
 struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
 				     u64 v,
 				     void (*cb)(struct rvt_qp *qp, u64 v));
-- 
cgit v1.2.3


From 33f2c35a54dfd75ad0e7e86918dcbe4de799a56c Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 9 Sep 2019 16:52:29 +1000
Subject: md: add feature flag MD_FEATURE_RAID0_LAYOUT

Due to a bug introduced in Linux 3.14 we cannot determine the
correctly layout for a multi-zone RAID0 array - there are two
possibilities.

It is possible to tell the kernel which to chose using a module
parameter, but this can be clumsy to use.  It would be best if
the choice were recorded in the metadata.
So add a feature flag for this purpose.
If it is set, then the 'layout' field of the superblock is used
to determine which layout to use.

If this flag is not set, then mddev->layout gets set to -1,
which causes the module parameter to be required.

Acked-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 drivers/md/md.c                | 13 +++++++++++++
 drivers/md/raid0.c             |  3 +++
 include/uapi/linux/raid/md_p.h |  2 ++
 3 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 73d5a1b04022..1be7abeb24fd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1237,6 +1237,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
 			mddev->new_layout = mddev->layout;
 			mddev->new_chunk_sectors = mddev->chunk_sectors;
 		}
+		if (mddev->level == 0)
+			mddev->layout = -1;
 
 		if (sb->state & (1<<MD_SB_CLEAN))
 			mddev->recovery_cp = MaxSector;
@@ -1652,6 +1654,10 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
 		rdev->ppl.sector = rdev->sb_start + rdev->ppl.offset;
 	}
 
+	if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT) &&
+	    sb->level != 0)
+		return -EINVAL;
+
 	if (!refdev) {
 		ret = 1;
 	} else {
@@ -1762,6 +1768,10 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
 			mddev->new_chunk_sectors = mddev->chunk_sectors;
 		}
 
+		if (mddev->level == 0 &&
+		    !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RAID0_LAYOUT))
+			mddev->layout = -1;
+
 		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
 			set_bit(MD_HAS_JOURNAL, &mddev->flags);
 
@@ -6898,6 +6908,9 @@ static int set_array_info(struct mddev *mddev, mdu_array_info_t *info)
 	mddev->external	     = 0;
 
 	mddev->layout        = info->layout;
+	if (mddev->level == 0)
+		/* Cannot trust RAID0 layout info here */
+		mddev->layout = -1;
 	mddev->chunk_sectors = info->chunk_size >> 9;
 
 	if (mddev->persistent) {
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ec611abda835..f61693e59684 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -145,6 +145,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 
 	if (conf->nr_strip_zones == 1) {
 		conf->layout = RAID0_ORIG_LAYOUT;
+	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
+		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
+		conf->layout = mddev->layout;
 	} else if (default_layout == RAID0_ORIG_LAYOUT ||
 		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
 		conf->layout = default_layout;
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index b0d15c73f6d7..1f2d8c81f0e0 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -329,6 +329,7 @@ struct mdp_superblock_1 {
 #define	MD_FEATURE_JOURNAL		512 /* support write cache */
 #define	MD_FEATURE_PPL			1024 /* support PPL */
 #define	MD_FEATURE_MULTIPLE_PPLS	2048 /* support for multiple PPLs */
+#define	MD_FEATURE_RAID0_LAYOUT		4096 /* layout is meaningful for RAID0 */
 #define	MD_FEATURE_ALL			(MD_FEATURE_BITMAP_OFFSET	\
 					|MD_FEATURE_RECOVERY_OFFSET	\
 					|MD_FEATURE_RESHAPE_ACTIVE	\
@@ -341,6 +342,7 @@ struct mdp_superblock_1 {
 					|MD_FEATURE_JOURNAL		\
 					|MD_FEATURE_PPL			\
 					|MD_FEATURE_MULTIPLE_PPLS	\
+					|MD_FEATURE_RAID0_LAYOUT	\
 					)
 
 struct r5l_payload_header {
-- 
cgit v1.2.3


From 97691069dc5a4135e413d3d92200d70b46df9fe5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 12 Sep 2019 10:49:45 +0200
Subject: net: devlink: split reload op into two

In order to properly implement failure indication during reload,
split the reload op into two ops, one for down phase and one for
up phase.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c  | 19 +++++++++++++++----
 drivers/net/ethernet/mellanox/mlxsw/core.c | 19 +++++++++++++++----
 drivers/net/netdevsim/dev.c                | 13 ++++++++++---
 include/net/devlink.h                      |  5 ++++-
 net/core/devlink.c                         | 16 ++++++++++++----
 5 files changed, 56 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a39c647c12dc..ef3f3d06ff1e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3935,17 +3935,27 @@ static void mlx4_restart_one_down(struct pci_dev *pdev);
 static int mlx4_restart_one_up(struct pci_dev *pdev, bool reload,
 			       struct devlink *devlink);
 
-static int mlx4_devlink_reload(struct devlink *devlink,
-			       struct netlink_ext_ack *extack)
+static int mlx4_devlink_reload_down(struct devlink *devlink,
+				    struct netlink_ext_ack *extack)
 {
 	struct mlx4_priv *priv = devlink_priv(devlink);
 	struct mlx4_dev *dev = &priv->dev;
 	struct mlx4_dev_persistent *persist = dev->persist;
-	int err;
 
 	if (persist->num_vfs)
 		mlx4_warn(persist->dev, "Reload performed on PF, will cause reset on operating Virtual Functions\n");
 	mlx4_restart_one_down(persist->pdev);
+	return 0;
+}
+
+static int mlx4_devlink_reload_up(struct devlink *devlink,
+				  struct netlink_ext_ack *extack)
+{
+	struct mlx4_priv *priv = devlink_priv(devlink);
+	struct mlx4_dev *dev = &priv->dev;
+	struct mlx4_dev_persistent *persist = dev->persist;
+	int err;
+
 	err = mlx4_restart_one_up(persist->pdev, true, devlink);
 	if (err)
 		mlx4_err(persist->dev, "mlx4_restart_one_up failed, ret=%d\n",
@@ -3956,7 +3966,8 @@ static int mlx4_devlink_reload(struct devlink *devlink,
 
 static const struct devlink_ops mlx4_devlink_ops = {
 	.port_type_set	= mlx4_devlink_port_type_set,
-	.reload		= mlx4_devlink_reload,
+	.reload_down	= mlx4_devlink_reload_down,
+	.reload_up	= mlx4_devlink_reload_up,
 };
 
 static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 963a2b4b61b1..c71a1d9ea17b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -984,16 +984,26 @@ mlxsw_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req,
 	return 0;
 }
 
-static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink,
-						struct netlink_ext_ack *extack)
+static int
+mlxsw_devlink_core_bus_device_reload_down(struct devlink *devlink,
+					  struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	int err;
 
 	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_RESET))
 		return -EOPNOTSUPP;
 
 	mlxsw_core_bus_device_unregister(mlxsw_core, true);
+	return 0;
+}
+
+static int
+mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
+					struct netlink_ext_ack *extack)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
+	int err;
+
 	err = mlxsw_core_bus_device_register(mlxsw_core->bus_info,
 					     mlxsw_core->bus,
 					     mlxsw_core->bus_priv, true,
@@ -1066,7 +1076,8 @@ mlxsw_devlink_trap_group_init(struct devlink *devlink,
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
-	.reload				= mlxsw_devlink_core_bus_device_reload,
+	.reload_down		= mlxsw_devlink_core_bus_device_reload_down,
+	.reload_up		= mlxsw_devlink_core_bus_device_reload_up,
 	.port_type_set			= mlxsw_devlink_port_type_set,
 	.port_split			= mlxsw_devlink_port_split,
 	.port_unsplit			= mlxsw_devlink_port_unsplit,
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 39cdb6c18ec0..7fba7b271a57 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -521,8 +521,14 @@ static void nsim_dev_traps_exit(struct devlink *devlink)
 	kfree(nsim_dev->trap_data);
 }
 
-static int nsim_dev_reload(struct devlink *devlink,
-			   struct netlink_ext_ack *extack)
+static int nsim_dev_reload_down(struct devlink *devlink,
+				struct netlink_ext_ack *extack)
+{
+	return 0;
+}
+
+static int nsim_dev_reload_up(struct devlink *devlink,
+			      struct netlink_ext_ack *extack)
 {
 	enum nsim_resource_id res_ids[] = {
 		NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES,
@@ -638,7 +644,8 @@ nsim_dev_devlink_trap_action_set(struct devlink *devlink,
 }
 
 static const struct devlink_ops nsim_dev_devlink_ops = {
-	.reload = nsim_dev_reload,
+	.reload_down = nsim_dev_reload_down,
+	.reload_up = nsim_dev_reload_up,
 	.flash_update = nsim_dev_flash_update,
 	.trap_init = nsim_dev_devlink_trap_init,
 	.trap_action_set = nsim_dev_devlink_trap_action_set,
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 03e4d9244ff3..ab8d56d12ffd 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -642,7 +642,10 @@ enum devlink_trap_group_generic_id {
 	}
 
 struct devlink_ops {
-	int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack);
+	int (*reload_down)(struct devlink *devlink,
+			   struct netlink_ext_ack *extack);
+	int (*reload_up)(struct devlink *devlink,
+			 struct netlink_ext_ack *extack);
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4a2fb94c44cf..9e522639693d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2672,12 +2672,17 @@ devlink_resources_validate(struct devlink *devlink,
 	return err;
 }
 
+static bool devlink_reload_supported(struct devlink *devlink)
+{
+	return devlink->ops->reload_down && devlink->ops->reload_up;
+}
+
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
 	int err;
 
-	if (!devlink->ops->reload)
+	if (!devlink_reload_supported(devlink))
 		return -EOPNOTSUPP;
 
 	err = devlink_resources_validate(devlink, NULL, info);
@@ -2685,7 +2690,10 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 		NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
 		return err;
 	}
-	return devlink->ops->reload(devlink, info->extack);
+	err = devlink->ops->reload_down(devlink, info->extack);
+	if (err)
+		return err;
+	return devlink->ops->reload_up(devlink, info->extack);
 }
 
 static int devlink_nl_flash_update_fill(struct sk_buff *msg,
@@ -7150,7 +7158,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink,
 int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
 				       union devlink_param_value *init_val)
 {
-	if (!devlink->ops->reload)
+	if (!devlink_reload_supported(devlink))
 		return -EOPNOTSUPP;
 
 	return __devlink_param_driverinit_value_get(&devlink->param_list,
@@ -7197,7 +7205,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port,
 {
 	struct devlink *devlink = devlink_port->devlink;
 
-	if (!devlink->ops->reload)
+	if (!devlink_reload_supported(devlink))
 		return -EOPNOTSUPP;
 
 	return __devlink_param_driverinit_value_get(&devlink_port->param_list,
-- 
cgit v1.2.3


From 2670ac2625f98557fd7e083f8aa22c297e49039e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Thu, 12 Sep 2019 10:49:46 +0200
Subject: net: devlink: move reload fail indication to devlink core and expose
 to user

Currently the fact that devlink reload failed is stored in drivers.
Move this flag into devlink core. Also, expose it to the user.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/core.c | 15 +++++----------
 include/net/devlink.h                      |  3 +++
 include/uapi/linux/devlink.h               |  2 ++
 net/core/devlink.c                         | 21 ++++++++++++++++++++-
 4 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index c71a1d9ea17b..3fa96076e8a5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -80,7 +80,6 @@ struct mlxsw_core {
 	struct mlxsw_thermal *thermal;
 	struct mlxsw_core_port *ports;
 	unsigned int max_ports;
-	bool reload_fail;
 	bool fw_flash_in_progress;
 	unsigned long driver_priv[0];
 	/* driver_priv has to be always the last item */
@@ -1002,15 +1001,11 @@ mlxsw_devlink_core_bus_device_reload_up(struct devlink *devlink,
 					struct netlink_ext_ack *extack)
 {
 	struct mlxsw_core *mlxsw_core = devlink_priv(devlink);
-	int err;
-
-	err = mlxsw_core_bus_device_register(mlxsw_core->bus_info,
-					     mlxsw_core->bus,
-					     mlxsw_core->bus_priv, true,
-					     devlink);
-	mlxsw_core->reload_fail = !!err;
 
-	return err;
+	return mlxsw_core_bus_device_register(mlxsw_core->bus_info,
+					      mlxsw_core->bus,
+					      mlxsw_core->bus_priv, true,
+					      devlink);
 }
 
 static int mlxsw_devlink_flash_update(struct devlink *devlink,
@@ -1254,7 +1249,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
-	if (mlxsw_core->reload_fail) {
+	if (devlink_is_reload_failed(devlink)) {
 		if (!reload)
 			/* Only the parts that were not de-initialized in the
 			 * failed reload attempt need to be de-initialized.
diff --git a/include/net/devlink.h b/include/net/devlink.h
index ab8d56d12ffd..23e4b65ec9df 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -38,6 +38,7 @@ struct devlink {
 	struct device *dev;
 	possible_net_t _net;
 	struct mutex lock;
+	bool reload_failed;
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -945,6 +946,8 @@ void
 devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
 				     enum devlink_health_reporter_state state);
 
+bool devlink_is_reload_failed(const struct devlink *devlink);
+
 void devlink_flash_update_begin_notify(struct devlink *devlink);
 void devlink_flash_update_end_notify(struct devlink *devlink);
 void devlink_flash_update_status_notify(struct devlink *devlink,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 8da5365850cd..580b7a2e40e1 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -419,6 +419,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_TRAP_METADATA,			/* nested */
 	DEVLINK_ATTR_TRAP_GROUP_NAME,			/* string */
 
+	DEVLINK_ATTR_RELOAD_FAILED,			/* u8 0 or 1 */
+
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 9e522639693d..e48680efe54a 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -471,6 +471,8 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
 
 	if (devlink_nl_put_handle(msg, devlink))
 		goto nla_put_failure;
+	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
 	return 0;
@@ -2677,6 +2679,21 @@ static bool devlink_reload_supported(struct devlink *devlink)
 	return devlink->ops->reload_down && devlink->ops->reload_up;
 }
 
+static void devlink_reload_failed_set(struct devlink *devlink,
+				      bool reload_failed)
+{
+	if (devlink->reload_failed == reload_failed)
+		return;
+	devlink->reload_failed = reload_failed;
+	devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+	return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
 static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
@@ -2693,7 +2710,9 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
 	err = devlink->ops->reload_down(devlink, info->extack);
 	if (err)
 		return err;
-	return devlink->ops->reload_up(devlink, info->extack);
+	err = devlink->ops->reload_up(devlink, info->extack);
+	devlink_reload_failed_set(devlink, !!err);
+	return err;
 }
 
 static int devlink_nl_flash_update_fill(struct sk_buff *msg,
-- 
cgit v1.2.3


From 69a94abb82eed2789d52b58665ddf4b454d9adb9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 9 Sep 2019 19:53:17 +0900
Subject: export.h, genksyms: do not make genksyms calculate CRC of trimmed
 symbols

Arnd Bergmann reported false-positive modpost warnings detected by his
randconfig testing of linux-next.

Actually, this happens under the combination of CONFIG_MODVERSIONS
and CONFIG_TRIM_UNUSED_KSYMS since commit 15bfc2348d54 ("modpost:
check for static EXPORT_SYMBOL* functions").

For example, arch/arm/config/multi_v7_defconfig + CONFIG_MODVERSIONS
+ CONFIG_TRIM_UNUSED_KSYMS produces the following false-positives:

WARNING: "__lshrdi3" [vmlinux] is a static (unknown)
WARNING: "__ashrdi3" [vmlinux] is a static (unknown)
WARNING: "__aeabi_lasr" [vmlinux] is a static (unknown)
WARNING: "__aeabi_llsr" [vmlinux] is a static (unknown)
WARNING: "ftrace_set_clr_event" [vmlinux] is a static (unknown)
WARNING: "__muldi3" [vmlinux] is a static (unknown)
WARNING: "__aeabi_ulcmp" [vmlinux] is a static (unknown)
WARNING: "__ucmpdi2" [vmlinux] is a static (unknown)
WARNING: "__aeabi_lmul" [vmlinux] is a static (unknown)
WARNING: "__bswapsi2" [vmlinux] is a static (unknown)
WARNING: "__bswapdi2" [vmlinux] is a static (unknown)
WARNING: "__ashldi3" [vmlinux] is a static (unknown)
WARNING: "__aeabi_llsl" [vmlinux] is a static (unknown)

The root cause of the problem is not in the modpost, but in the
implementation of CONFIG_TRIM_UNUSED_KSYMS.

If there is at least one untrimmed symbol in the file, genksyms is
invoked to calculate CRC of *all* the exported symbols in that file
even if some of them have been trimmed due to no caller existing.

As a result, .tmp_*.ver files contain CRC of trimmed symbols, thus
unneeded, orphan __crc* symbols are added to objects. It had been
harmless until recently.

With commit 15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL*
functions"), it is now harmful because the bogus __crc* symbols make
modpost call sym_update_crc() to add the symbols to the hash table,
but there is no one that clears the ->is_static member.

I gave Fixes to the first commit that uncovered the issue, but the
potential problem has long existed since commit f235541699bc
("export.h: allow for per-symbol configurable EXPORT_SYMBOL()").

Fixes: 15bfc2348d54 ("modpost: check for static EXPORT_SYMBOL* functions")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/export.h      | 42 ++++++++++++++++--------------------------
 scripts/genksyms/keywords.c |  6 +-----
 2 files changed, 17 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/export.h b/include/linux/export.h
index cdd98a0d918c..7d8c112a8b61 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,9 +18,6 @@ extern struct module __this_module;
 #define THIS_MODULE ((struct module *)0)
 #endif
 
-#ifdef CONFIG_MODULES
-
-#if !defined(__GENKSYMS__)
 #ifdef CONFIG_MODVERSIONS
 /* Mark the CRC weak since genksyms apparently decides not to
  * generate a checksums for some symbols */
@@ -74,6 +71,12 @@ struct kernel_symbol {
 };
 #endif
 
+#ifdef __GENKSYMS__
+
+#define ___EXPORT_SYMBOL(sym, sec)	__GENKSYMS_EXPORT_SYMBOL(sym)
+
+#else
+
 /* For every exported symbol, place a struct in the __ksymtab section */
 #define ___EXPORT_SYMBOL(sym, sec)					\
 	extern typeof(sym) sym;						\
@@ -83,7 +86,9 @@ struct kernel_symbol {
 	= #sym;								\
 	__KSYMTAB_ENTRY(sym, sec)
 
-#if defined(__DISABLE_EXPORTS)
+#endif
+
+#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
 
 /*
  * Allow symbol exports to be disabled completely so that C code may
@@ -117,37 +122,22 @@ struct kernel_symbol {
 #define __cond_export_sym_0(sym, sec) /* nothing */
 
 #else
-#define __EXPORT_SYMBOL ___EXPORT_SYMBOL
-#endif
 
-#define EXPORT_SYMBOL(sym)					\
-	__EXPORT_SYMBOL(sym, "")
+#define __EXPORT_SYMBOL(sym, sec)	___EXPORT_SYMBOL(sym, sec)
 
-#define EXPORT_SYMBOL_GPL(sym)					\
-	__EXPORT_SYMBOL(sym, "_gpl")
-
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)				\
-	__EXPORT_SYMBOL(sym, "_gpl_future")
+#endif /* CONFIG_MODULES */
 
+#define EXPORT_SYMBOL(sym)		__EXPORT_SYMBOL(sym, "")
+#define EXPORT_SYMBOL_GPL(sym)		__EXPORT_SYMBOL(sym, "_gpl")
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)	__EXPORT_SYMBOL(sym, "_gpl_future")
 #ifdef CONFIG_UNUSED_SYMBOLS
-#define EXPORT_UNUSED_SYMBOL(sym) __EXPORT_SYMBOL(sym, "_unused")
-#define EXPORT_UNUSED_SYMBOL_GPL(sym) __EXPORT_SYMBOL(sym, "_unused_gpl")
+#define EXPORT_UNUSED_SYMBOL(sym)	__EXPORT_SYMBOL(sym, "_unused")
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)	__EXPORT_SYMBOL(sym, "_unused_gpl")
 #else
 #define EXPORT_UNUSED_SYMBOL(sym)
 #define EXPORT_UNUSED_SYMBOL_GPL(sym)
 #endif
 
-#endif	/* __GENKSYMS__ */
-
-#else /* !CONFIG_MODULES... */
-
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
-
-#endif /* CONFIG_MODULES */
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _LINUX_EXPORT_H */
diff --git a/scripts/genksyms/keywords.c b/scripts/genksyms/keywords.c
index c586d32dd2c3..7a85c4e21175 100644
--- a/scripts/genksyms/keywords.c
+++ b/scripts/genksyms/keywords.c
@@ -3,11 +3,7 @@ static struct resword {
 	const char *name;
 	int token;
 } keywords[] = {
-	{ "EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW },
-	{ "EXPORT_SYMBOL_GPL", EXPORT_SYMBOL_KEYW },
-	{ "EXPORT_SYMBOL_GPL_FUTURE", EXPORT_SYMBOL_KEYW },
-	{ "EXPORT_UNUSED_SYMBOL", EXPORT_SYMBOL_KEYW },
-	{ "EXPORT_UNUSED_SYMBOL_GPL", EXPORT_SYMBOL_KEYW },
+	{ "__GENKSYMS_EXPORT_SYMBOL", EXPORT_SYMBOL_KEYW },
 	{ "__asm", ASM_KEYW },
 	{ "__asm__", ASM_KEYW },
 	{ "__attribute", ATTRIBUTE_KEYW },
-- 
cgit v1.2.3


From c30724e9a061135f8c7b925c0fcdf742510a3bc5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 13 Sep 2019 00:19:24 +0200
Subject: compiler_types.h: don't #define __inline

The spellings __inline and __inline__ should be reserved for uses
where one really wants to refer to the inline keyword, regardless of
whether or not the spelling "inline" has been #defined to something
else. Due to use of __inline__ in uapi headers, we can't easily get
rid of the definition of __inline__. However, almost all users of
__inline have been converted to inline, so we can get rid of that
#define.

The exception is include/acpi/platform/acintel.h. However, that header
is only included when using the intel compiler (does anybody actually
build the kernel with that?), and the ACPI_INLINE macro is only used
in the definition of utterly trivial stub functions, where I doubt a
small change of semantics (lack of __gnu_inline) changes anything.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
[Fix trivial typo in message]
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 599c27b56c29..ee49be6d6088 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -150,8 +150,17 @@ struct ftrace_likely_data {
 	__maybe_unused notrace
 #endif
 
+/*
+ * gcc provides both __inline__ and __inline as alternate spellings of
+ * the inline keyword, though the latter is undocumented. New kernel
+ * code should only use the inline spelling, but some existing code
+ * uses __inline__. Since we #define inline above, to ensure
+ * __inline__ has the same semantics, we need this #define.
+ *
+ * However, the spelling __inline is strictly reserved for referring
+ * to the bare keyword.
+ */
 #define __inline__ inline
-#define __inline   inline
 
 /*
  * Rather then using noinline to prevent stack consumption, use
-- 
cgit v1.2.3


From eb111869301e15b737315a46c913ae82bd19eb9d Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 13 Sep 2019 00:19:25 +0200
Subject: compiler-types.h: add asm_inline definition

This adds an asm_inline macro which expands to "asm inline" [1] when
the compiler supports it. This is currently gcc 9.1+, gcc 8.3
and (once released) gcc 7.5 [2]. It expands to just "asm" for other
compilers.

Using asm inline("foo") instead of asm("foo") overrules gcc's
heuristic estimate of the size of the code represented by the asm()
statement, and makes gcc use the minimum possible size instead. That
can in turn affect gcc's inlining decisions.

I wasn't sure whether to make this a function-like macro or not - this
way, it can be combined with volatile as

  asm_inline volatile()

but perhaps we'd prefer to spell that

  asm_inline_volatile()

anyway.

The Kconfig logic is taken from an RFC patch by Masahiro Yamada [3].

[1] Technically, asm __inline, since both inline and __inline__
are macros that attach various attributes, making gcc barf if one
literally does "asm inline()". However, the third spelling __inline is
available for referring to the bare keyword.

[2] https://lore.kernel.org/lkml/20190907001411.GG9749@gate.crashing.org/

[3] https://lore.kernel.org/lkml/1544695154-15250-1-git-send-email-yamada.masahiro@socionext.com/

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 6 ++++++
 init/Kconfig                   | 3 +++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index ee49be6d6088..2bf316fe0a20 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -198,6 +198,12 @@ struct ftrace_likely_data {
 #define asm_volatile_goto(x...) asm goto(x)
 #endif
 
+#ifdef CONFIG_CC_HAS_ASM_INLINE
+#define asm_inline asm __inline
+#else
+#define asm_inline asm
+#endif
+
 #ifndef __no_fgcse
 # define __no_fgcse
 #endif
diff --git a/init/Kconfig b/init/Kconfig
index bd7d650d4a99..7fee5978dd73 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -30,6 +30,9 @@ config CC_CAN_LINK
 config CC_HAS_ASM_GOTO
 	def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC))
 
+config CC_HAS_ASM_INLINE
+	def_bool $(success,echo 'void foo(void) { asm inline (""); }' | $(CC) -x c - -c -o /dev/null)
+
 config CC_HAS_WARN_MAYBE_UNINITIALIZED
 	def_bool $(cc-option,-Wmaybe-uninitialized)
 	help
-- 
cgit v1.2.3


From d518d2ed8640c1cbbbb6f63939e3e65471817367 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 12 Sep 2019 12:02:42 +0200
Subject: net/sched: fix race between deactivation and dequeue for NOLOCK qdisc

The test implemented by some_qdisc_is_busy() is somewhat loosy for
NOLOCK qdisc, as we may hit the following scenario:

CPU1						CPU2
// in net_tx_action()
clear_bit(__QDISC_STATE_SCHED...);
						// in some_qdisc_is_busy()
						val = (qdisc_is_running(q) ||
						       test_bit(__QDISC_STATE_SCHED,
								&q->state));
						// here val is 0 but...
qdisc_run(q)
// ... CPU1 is going to run the qdisc next

As a conseguence qdisc_run() in net_tx_action() can race with qdisc_reset()
in dev_qdisc_reset(). Such race is not possible for !NOLOCK qdisc as
both the above bit operations are under the root qdisc lock().

After commit 021a17ed796b ("pfifo_fast: drop unneeded additional lock on dequeue")
the race can cause use after free and/or null ptr dereference, but the root
cause is likely older.

This patch addresses the issue explicitly checking for deactivation under
the seqlock for NOLOCK qdisc, so that the qdisc_run() in the critical
scenario becomes a no-op.

Note that the enqueue() op can still execute concurrently with dev_qdisc_reset(),
but that is safe due to the skb_array() locking, and we can't avoid that
for NOLOCK qdiscs.

Fixes: 021a17ed796b ("pfifo_fast: drop unneeded additional lock on dequeue")
Reported-by: Li Shuang <shuali@redhat.com>
Reported-and-tested-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  7 ++++++-
 net/core/dev.c          | 16 ++++++++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index a16fbe9a2a67..aa99c73c3fbd 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -118,7 +118,12 @@ void __qdisc_run(struct Qdisc *q);
 static inline void qdisc_run(struct Qdisc *q)
 {
 	if (qdisc_run_begin(q)) {
-		__qdisc_run(q);
+		/* NOLOCK qdisc must check 'state' under the qdisc seqlock
+		 * to avoid racing with dev_qdisc_reset()
+		 */
+		if (!(q->flags & TCQ_F_NOLOCK) ||
+		    likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+			__qdisc_run(q);
 		qdisc_run_end(q);
 	}
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 5156c0edebe8..4ed9df74eb8a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	qdisc_calculate_pkt_len(skb, q);
 
 	if (q->flags & TCQ_F_NOLOCK) {
-		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			__qdisc_drop(skb, &to_free);
-			rc = NET_XMIT_DROP;
-		} else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
-			   qdisc_run_begin(q)) {
+		if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty &&
+		    qdisc_run_begin(q)) {
+			if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
+					      &q->state))) {
+				__qdisc_drop(skb, &to_free);
+				rc = NET_XMIT_DROP;
+				goto end_run;
+			}
 			qdisc_bstats_cpu_update(q, skb);
 
+			rc = NET_XMIT_SUCCESS;
 			if (sch_direct_xmit(skb, q, dev, txq, NULL, true))
 				__qdisc_run(q);
 
+end_run:
 			qdisc_run_end(q);
-			rc = NET_XMIT_SUCCESS;
 		} else {
 			rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
 			qdisc_run(q);
-- 
cgit v1.2.3


From 3d24430694077313c75c6b89f618db09943621e4 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 21 May 2019 15:59:03 +0800
Subject: block: make rq sector size accessible for block stats

Currently rq->data_len will be decreased by partial completion or
zeroed by completion, so when blk_stat_add() is invoked, data_len
will be zero and there will never be samples in poll_cb because
blk_mq_poll_stats_bkt() will return -1 if data_len is zero.

We could move blk_stat_add() back to __blk_mq_complete_request(),
but that would make the effort of trying to call ktime_get_ns()
once in vain. Instead we can reuse throtl_size field, and use
it for both block stats and block throttle, and adjust the
logic in blk_mq_poll_stats_bkt() accordingly.

Fixes: 4bc6339a583c ("block: move blk_stat_add() to __blk_mq_end_request()")
Tested-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 11 +++++------
 block/blk-throttle.c   |  3 ++-
 include/linux/blkdev.h | 15 ++++++++++++---
 3 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3647776a0f6e..d30fabb583fd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -44,12 +44,12 @@ static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
 static int blk_mq_poll_stats_bkt(const struct request *rq)
 {
-	int ddir, bytes, bucket;
+	int ddir, sectors, bucket;
 
 	ddir = rq_data_dir(rq);
-	bytes = blk_rq_bytes(rq);
+	sectors = blk_rq_stats_sectors(rq);
 
-	bucket = ddir + 2*(ilog2(bytes) - 9);
+	bucket = ddir + 2 * ilog2(sectors);
 
 	if (bucket < 0)
 		return -1;
@@ -333,6 +333,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	else
 		rq->start_time_ns = 0;
 	rq->io_start_time_ns = 0;
+	rq->stats_sectors = 0;
 	rq->nr_phys_segments = 0;
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
@@ -681,9 +682,7 @@ void blk_mq_start_request(struct request *rq)
 
 	if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
 		rq->io_start_time_ns = ktime_get_ns();
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-		rq->throtl_size = blk_rq_sectors(rq);
-#endif
+		rq->stats_sectors = blk_rq_sectors(rq);
 		rq->rq_flags |= RQF_STATS;
 		rq_qos_issue(q, rq);
 	}
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0445c998c377..18f773e52dfb 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2248,7 +2248,8 @@ void blk_throtl_stat_add(struct request *rq, u64 time_ns)
 	struct request_queue *q = rq->q;
 	struct throtl_data *td = q->td;
 
-	throtl_track_latency(td, rq->throtl_size, req_op(rq), time_ns >> 10);
+	throtl_track_latency(td, blk_rq_stats_sectors(rq), req_op(rq),
+			     time_ns >> 10);
 }
 
 void blk_throtl_bio_endio(struct bio *bio)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b196124e3240..3094f2d513b2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -206,9 +206,12 @@ struct request {
 #ifdef CONFIG_BLK_WBT
 	unsigned short wbt_flags;
 #endif
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	unsigned short throtl_size;
-#endif
+	/*
+	 * rq sectors used for blk stats. It has the same value
+	 * with blk_rq_sectors(rq), except that it never be zeroed
+	 * by completion.
+	 */
+	unsigned short stats_sectors;
 
 	/*
 	 * Number of scatter-gather DMA addr+len pairs after
@@ -917,6 +920,7 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
+ * blk_rq_stats_sectors()	: sectors of the entire request used for stats
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -945,6 +949,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
 }
 
+static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
+{
+	return rq->stats_sectors;
+}
+
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline unsigned int blk_rq_zone_no(struct request *rq)
 {
-- 
cgit v1.2.3


From acdcecc61285faed359f1a3568c32089cc3a8329 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 12 Sep 2019 21:16:39 -0400
Subject: udp: correct reuseport selection with connected sockets

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock_reuseport.h | 20 +++++++++++++++++++-
 net/core/sock_reuseport.c    | 15 +++++++++++++--
 net/ipv4/datagram.c          |  2 ++
 net/ipv4/udp.c               |  5 +++--
 net/ipv6/datagram.c          |  2 ++
 net/ipv6/udp.c               |  5 +++--
 6 files changed, 42 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index d9112de85261..43f4a818d88f 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -21,7 +21,8 @@ struct sock_reuseport {
 	unsigned int		synq_overflow_ts;
 	/* ID stays the same even after the size of socks[] grows. */
 	unsigned int		reuseport_id;
-	bool			bind_inany;
+	unsigned int		bind_inany:1;
+	unsigned int		has_conns:1;
 	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
 	struct sock		*socks[0];	/* array of sock pointers */
 };
@@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+	struct sock_reuseport *reuse;
+	bool ret = false;
+
+	rcu_read_lock();
+	reuse = rcu_dereference(sk->sk_reuseport_cb);
+	if (reuse) {
+		if (set)
+			reuse->has_conns = 1;
+		ret = reuse->has_conns;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9408f9264d05..f3ceec93f392 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
 		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		if (!sk2) {
+			int i, j;
+
+			i = j = reciprocal_scale(hash, socks);
+			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+				i++;
+				if (i >= reuse->num_socks)
+					i = 0;
+				if (i == j)
+					goto out;
+			}
+			sk2 = reuse->socks[i];
+		}
 	}
 
 out:
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 7bd29e694603..9a0fe0c2fa02 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	}
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d88821c794fb..16486c8b708b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp_ehashfn(net, daddr, hnum,
 						   saddr, sport);
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			badness = score;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9ab897ded4df..96f939248d2f 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ ipv4_connected:
 		goto out;
 	}
 
+	reuseport_has_conns(sk, true);
 	sk->sk_state = TCP_ESTABLISHED;
 	sk_set_txhash(sk);
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 827fe7385078..5995fdc99d3f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
+			if (sk->sk_reuseport &&
+			    sk->sk_state != TCP_ESTABLISHED) {
 				hash = udp6_ehashfn(net, daddr, hnum,
 						    saddr, sport);
 
 				result = reuseport_select_sock(sk, hash, skb,
 							sizeof(struct udphdr));
-				if (result)
+				if (result && !reuseport_has_conns(sk, false))
 					return result;
 			}
 			result = sk;
-- 
cgit v1.2.3


From 1158958a218bb55d1c358200d7f82808d11bf929 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 13 Sep 2019 18:28:39 +0300
Subject: net: sched: extend flow_action_entry with destructor

Generalize flow_action_entry cleanup by extending the structure with
pointer to destructor function. Set the destructor in
tc_setup_flow_action(). Refactor tc_cleanup_flow_action() to call
entry->destructor() instead of using switch that dispatches by entry->id
and manually executes cleanup.

This refactoring is necessary for following patches in this series that
require destructor to use tc_action->ops callbacks that can't be easily
obtained in tc_cleanup_flow_action().

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h |  6 +++-
 net/sched/cls_api.c        | 77 +++++++++++++++++++++++++++-------------------
 2 files changed, 50 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index fc881875f856..86c567f531f3 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -154,8 +154,12 @@ enum flow_action_mangle_base {
 	FLOW_ACT_MANGLE_HDR_TYPE_UDP,
 };
 
+typedef void (*action_destr)(void *priv);
+
 struct flow_action_entry {
 	enum flow_action_id		id;
+	action_destr			destructor;
+	void				*destructor_priv;
 	union {
 		u32			chain_index;	/* FLOW_ACTION_GOTO */
 		struct net_device	*dev;		/* FLOW_ACTION_REDIRECT */
@@ -170,7 +174,7 @@ struct flow_action_entry {
 			u32		mask;
 			u32		val;
 		} mangle;
-		const struct ip_tunnel_info *tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
+		struct ip_tunnel_info	*tunnel;	/* FLOW_ACTION_TUNNEL_ENCAP */
 		u32			csum_flags;	/* FLOW_ACTION_CSUM */
 		u32			mark;		/* FLOW_ACTION_MARK */
 		u16                     ptype;          /* FLOW_ACTION_PTYPE */
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 05c4fe1c3ca2..c668195379bd 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3282,25 +3282,48 @@ void tc_cleanup_flow_action(struct flow_action *flow_action)
 	struct flow_action_entry *entry;
 	int i;
 
-	flow_action_for_each(i, entry, flow_action) {
-		switch (entry->id) {
-		case FLOW_ACTION_REDIRECT:
-		case FLOW_ACTION_MIRRED:
-		case FLOW_ACTION_REDIRECT_INGRESS:
-		case FLOW_ACTION_MIRRED_INGRESS:
-			if (entry->dev)
-				dev_put(entry->dev);
-			break;
-		case FLOW_ACTION_TUNNEL_ENCAP:
-			kfree(entry->tunnel);
-			break;
-		default:
-			break;
-		}
-	}
+	flow_action_for_each(i, entry, flow_action)
+		if (entry->destructor)
+			entry->destructor(entry->destructor_priv);
 }
 EXPORT_SYMBOL(tc_cleanup_flow_action);
 
+static void tcf_mirred_put_dev(void *priv)
+{
+	struct net_device *dev = priv;
+
+	dev_put(dev);
+}
+
+static void tcf_mirred_get_dev(struct flow_action_entry *entry,
+			       const struct tc_action *act)
+{
+	entry->dev = tcf_mirred_dev(act);
+	if (!entry->dev)
+		return;
+	dev_hold(entry->dev);
+	entry->destructor = tcf_mirred_put_dev;
+	entry->destructor_priv = entry->dev;
+}
+
+static void tcf_tunnel_encap_put_tunnel(void *priv)
+{
+	struct ip_tunnel_info *tunnel = priv;
+
+	kfree(tunnel);
+}
+
+static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
+				       const struct tc_action *act)
+{
+	entry->tunnel = tcf_tunnel_info_copy(act);
+	if (!entry->tunnel)
+		return -ENOMEM;
+	entry->destructor = tcf_tunnel_encap_put_tunnel;
+	entry->destructor_priv = entry->tunnel;
+	return 0;
+}
+
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts, bool rtnl_held)
 {
@@ -3329,24 +3352,16 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->chain_index = tcf_gact_goto_chain_index(act);
 		} else if (is_tcf_mirred_egress_redirect(act)) {
 			entry->id = FLOW_ACTION_REDIRECT;
-			entry->dev = tcf_mirred_dev(act);
-			if (entry->dev)
-				dev_hold(entry->dev);
+			tcf_mirred_get_dev(entry, act);
 		} else if (is_tcf_mirred_egress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED;
-			entry->dev = tcf_mirred_dev(act);
-			if (entry->dev)
-				dev_hold(entry->dev);
+			tcf_mirred_get_dev(entry, act);
 		} else if (is_tcf_mirred_ingress_redirect(act)) {
 			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
-			entry->dev = tcf_mirred_dev(act);
-			if (entry->dev)
-				dev_hold(entry->dev);
+			tcf_mirred_get_dev(entry, act);
 		} else if (is_tcf_mirred_ingress_mirror(act)) {
 			entry->id = FLOW_ACTION_MIRRED_INGRESS;
-			entry->dev = tcf_mirred_dev(act);
-			if (entry->dev)
-				dev_hold(entry->dev);
+			tcf_mirred_get_dev(entry, act);
 		} else if (is_tcf_vlan(act)) {
 			switch (tcf_vlan_action(act)) {
 			case TCA_VLAN_ACT_PUSH:
@@ -3370,11 +3385,9 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			}
 		} else if (is_tcf_tunnel_set(act)) {
 			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
-			entry->tunnel = tcf_tunnel_info_copy(act);
-			if (!entry->tunnel) {
-				err = -ENOMEM;
+			err = tcf_tunnel_encap_get_tunnel(entry, act);
+			if (err)
 				goto err_out;
-			}
 		} else if (is_tcf_tunnel_release(act)) {
 			entry->id = FLOW_ACTION_TUNNEL_DECAP;
 		} else if (is_tcf_pedit(act)) {
-- 
cgit v1.2.3


From 4a5da47d5cb6aba3c26a5cc0dddfb2d577e851e9 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 13 Sep 2019 18:28:40 +0300
Subject: net: sched: take reference to psample group in flow_action infra

With recent patch set that removed rtnl lock dependency from cls hardware
offload API rtnl lock is only taken when reading action data and can be
released after action-specific data is parsed into intermediate
representation. However, sample action psample group is passed by pointer
without obtaining reference to it first, which makes it possible to
concurrently overwrite the action and deallocate object pointed by
psample_group pointer after rtnl lock is released but before driver
finished using the pointer.

To prevent such race condition, obtain reference to psample group while it
is used by flow_action infra. Extend psample API with function
psample_group_take() that increments psample group reference counter.
Extend struct tc_action_ops with new get_psample_group() API. Implement the
API for action sample using psample_group_take() and already existing
psample_group_put() as a destructor. Use it in tc_setup_flow_action() to
take reference to psample group pointed to by entry->sample.psample_group
and release it in tc_cleanup_flow_action().

Disable bh when taking psample_groups_lock. The lock is now taken while
holding action tcf_lock that is used by data path and requires bh to be
disabled, so doing the same for psample_groups_lock is necessary to
preserve SOFTIRQ-irq-safety.

Fixes: 918190f50eb6 ("net: sched: flower: don't take rtnl lock for cls hw offloads API")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h          |  5 +++++
 include/net/psample.h          |  1 +
 include/net/tc_act/tc_sample.h |  6 ------
 net/psample/psample.c          | 20 ++++++++++++++------
 net/sched/act_sample.c         | 27 +++++++++++++++++++++++++++
 net/sched/cls_api.c            | 13 +++++++++++--
 6 files changed, 58 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3a1a72990fce..4be8b0daedf0 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -78,6 +78,8 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
 #define ACT_P_CREATED 1
 #define ACT_P_DELETED 1
 
+typedef void (*tc_action_priv_destructor)(void *priv);
+
 struct tc_action_ops {
 	struct list_head head;
 	char    kind[IFNAMSIZ];
@@ -101,6 +103,9 @@ struct tc_action_ops {
 	size_t  (*get_fill_size)(const struct tc_action *act);
 	struct net_device *(*get_dev)(const struct tc_action *a);
 	void	(*put_dev)(struct net_device *dev);
+	struct psample_group *
+	(*get_psample_group)(const struct tc_action *a,
+			     tc_action_priv_destructor *destructor);
 };
 
 struct tc_action_net {
diff --git a/include/net/psample.h b/include/net/psample.h
index 6b578ce69cd8..68ae16bb0a4a 100644
--- a/include/net/psample.h
+++ b/include/net/psample.h
@@ -15,6 +15,7 @@ struct psample_group {
 };
 
 struct psample_group *psample_group_get(struct net *net, u32 group_num);
+void psample_group_take(struct psample_group *group);
 void psample_group_put(struct psample_group *group);
 
 #if IS_ENABLED(CONFIG_PSAMPLE)
diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index b4fce0fae645..b5d76305e854 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -41,10 +41,4 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a)
 	return to_sample(a)->trunc_size;
 }
 
-static inline struct psample_group *
-tcf_sample_psample_group(const struct tc_action *a)
-{
-	return rcu_dereference_rtnl(to_sample(a)->psample_group);
-}
-
 #endif /* __NET_TC_SAMPLE_H */
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 66e4b61a350d..a6ceb0533b5b 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -73,7 +73,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
 	int idx = 0;
 	int err;
 
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 	list_for_each_entry(group, &psample_groups_list, list) {
 		if (!net_eq(group->net, sock_net(msg->sk)))
 			continue;
@@ -89,7 +89,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg,
 		idx++;
 	}
 
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 	cb->args[0] = idx;
 	return msg->len;
 }
@@ -172,7 +172,7 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num)
 {
 	struct psample_group *group;
 
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 
 	group = psample_group_lookup(net, group_num);
 	if (!group) {
@@ -183,19 +183,27 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num)
 	group->refcount++;
 
 out:
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 	return group;
 }
 EXPORT_SYMBOL_GPL(psample_group_get);
 
+void psample_group_take(struct psample_group *group)
+{
+	spin_lock_bh(&psample_groups_lock);
+	group->refcount++;
+	spin_unlock_bh(&psample_groups_lock);
+}
+EXPORT_SYMBOL_GPL(psample_group_take);
+
 void psample_group_put(struct psample_group *group)
 {
-	spin_lock(&psample_groups_lock);
+	spin_lock_bh(&psample_groups_lock);
 
 	if (--group->refcount == 0)
 		psample_group_destroy(group);
 
-	spin_unlock(&psample_groups_lock);
+	spin_unlock_bh(&psample_groups_lock);
 }
 EXPORT_SYMBOL_GPL(psample_group_put);
 
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 10229124a992..692c4c9040fd 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
 	return tcf_idr_search(tn, a, index);
 }
 
+static void tcf_psample_group_put(void *priv)
+{
+	struct psample_group *group = priv;
+
+	psample_group_put(group);
+}
+
+static struct psample_group *
+tcf_sample_get_group(const struct tc_action *a,
+		     tc_action_priv_destructor *destructor)
+{
+	struct tcf_sample *s = to_sample(a);
+	struct psample_group *group;
+
+	spin_lock_bh(&s->tcf_lock);
+	group = rcu_dereference_protected(s->psample_group,
+					  lockdep_is_held(&s->tcf_lock));
+	if (group) {
+		psample_group_take(group);
+		*destructor = tcf_psample_group_put;
+	}
+	spin_unlock_bh(&s->tcf_lock);
+
+	return group;
+}
+
 static struct tc_action_ops act_sample_ops = {
 	.kind	  = "sample",
 	.id	  = TCA_ID_SAMPLE,
@@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = {
 	.cleanup  = tcf_sample_cleanup,
 	.walk	  = tcf_sample_walker,
 	.lookup	  = tcf_sample_search,
+	.get_psample_group = tcf_sample_get_group,
 	.size	  = sizeof(struct tcf_sample),
 };
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c668195379bd..60d44b14750a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3324,6 +3324,16 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
 	return 0;
 }
 
+static void tcf_sample_get_group(struct flow_action_entry *entry,
+				 const struct tc_action *act)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	entry->sample.psample_group =
+		act->ops->get_psample_group(act, &entry->destructor);
+	entry->destructor_priv = entry->sample.psample_group;
+#endif
+}
+
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts, bool rtnl_held)
 {
@@ -3417,11 +3427,10 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 			entry->mark = tcf_skbedit_mark(act);
 		} else if (is_tcf_sample(act)) {
 			entry->id = FLOW_ACTION_SAMPLE;
-			entry->sample.psample_group =
-				tcf_sample_psample_group(act);
 			entry->sample.trunc_size = tcf_sample_trunc_size(act);
 			entry->sample.truncate = tcf_sample_truncate(act);
 			entry->sample.rate = tcf_sample_rate(act);
+			tcf_sample_get_group(entry, act);
 		} else if (is_tcf_police(act)) {
 			entry->id = FLOW_ACTION_POLICE;
 			entry->police.burst = tcf_police_tcfp_burst(act);
-- 
cgit v1.2.3


From 470d5060e6b3b8fae47d944601855e9ece7a2470 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 13 Sep 2019 18:28:41 +0300
Subject: net: sched: use get_dev() action API in flow_action infra

When filling in hardware intermediate representation tc_setup_flow_action()
directly obtains, checks and takes reference to dev used by mirred action,
instead of using act->ops->get_dev() API created specifically for this
purpose. In order to remove code duplication, refactor flow_action infra to
use action API when obtaining mirred action target dev. Extend get_dev()
with additional argument that is used to provide dev destructor to the
user.

Fixes: 5a6ff4b13d59 ("net: sched: take reference to action dev before calling offloads")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h  |  4 ++--
 net/sched/act_mirred.c | 21 +++++++++++++--------
 net/sched/cls_api.c    | 13 +++----------
 3 files changed, 18 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 4be8b0daedf0..b18c699681ca 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,8 +101,8 @@ struct tc_action_ops {
 			struct netlink_ext_ack *);
 	void	(*stats_update)(struct tc_action *, u64, u32, u64, bool);
 	size_t  (*get_fill_size)(const struct tc_action *act);
-	struct net_device *(*get_dev)(const struct tc_action *a);
-	void	(*put_dev)(struct net_device *dev);
+	struct net_device *(*get_dev)(const struct tc_action *a,
+				      tc_action_priv_destructor *destructor);
 	struct psample_group *
 	(*get_psample_group)(const struct tc_action *a,
 			     tc_action_priv_destructor *destructor);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9d1bf508075a..9ce073a05414 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = {
 	.notifier_call = mirred_device_event,
 };
 
-static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
+static void tcf_mirred_dev_put(void *priv)
+{
+	struct net_device *dev = priv;
+
+	dev_put(dev);
+}
+
+static struct net_device *
+tcf_mirred_get_dev(const struct tc_action *a,
+		   tc_action_priv_destructor *destructor)
 {
 	struct tcf_mirred *m = to_mirred(a);
 	struct net_device *dev;
 
 	rcu_read_lock();
 	dev = rcu_dereference(m->tcfm_dev);
-	if (dev)
+	if (dev) {
 		dev_hold(dev);
+		*destructor = tcf_mirred_dev_put;
+	}
 	rcu_read_unlock();
 
 	return dev;
 }
 
-static void tcf_mirred_put_dev(struct net_device *dev)
-{
-	dev_put(dev);
-}
-
 static size_t tcf_mirred_get_fill_size(const struct tc_action *act)
 {
 	return nla_total_size(sizeof(struct tc_mirred));
@@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = {
 	.get_fill_size	=	tcf_mirred_get_fill_size,
 	.size		=	sizeof(struct tcf_mirred),
 	.get_dev	=	tcf_mirred_get_dev,
-	.put_dev	=	tcf_mirred_put_dev,
 };
 
 static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 60d44b14750a..32577c248968 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3288,22 +3288,15 @@ void tc_cleanup_flow_action(struct flow_action *flow_action)
 }
 EXPORT_SYMBOL(tc_cleanup_flow_action);
 
-static void tcf_mirred_put_dev(void *priv)
-{
-	struct net_device *dev = priv;
-
-	dev_put(dev);
-}
-
 static void tcf_mirred_get_dev(struct flow_action_entry *entry,
 			       const struct tc_action *act)
 {
-	entry->dev = tcf_mirred_dev(act);
+#ifdef CONFIG_NET_CLS_ACT
+	entry->dev = act->ops->get_dev(act, &entry->destructor);
 	if (!entry->dev)
 		return;
-	dev_hold(entry->dev);
-	entry->destructor = tcf_mirred_put_dev;
 	entry->destructor_priv = entry->dev;
+#endif
 }
 
 static void tcf_tunnel_encap_put_tunnel(void *priv)
-- 
cgit v1.2.3


From d895a0f16fadb26d22ab531c49768f7642ae5c3e Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 16 Aug 2019 12:53:00 +0200
Subject: bpf: fix accessing bpf_sysctl.file_pos on s390

"ctx:file_pos sysctl:read write ok" fails on s390 with "Read value  !=
nux". This is because verifier rewrites a complete 32-bit
bpf_sysctl.file_pos update to a partial update of the first 32 bits of
64-bit *bpf_sysctl_kern.ppos, which is not correct on big-endian
systems.

Fix by using an offset on big-endian systems.

Ditto for bpf_sysctl.file_pos reads. Currently the test does not detect
a problem there, since it expects to see 0, which it gets with high
probability in error cases, so change it to seek to offset 3 and expect
3 in bpf_sysctl.file_pos.

Fixes: e1550bfe0de4 ("bpf: Add file_pos field to bpf_sysctl ctx")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20190816105300.49035-1-iii@linux.ibm.com/
---
 include/linux/filter.h                    |  8 ++++----
 kernel/bpf/cgroup.c                       | 10 ++++++++--
 kernel/bpf/verifier.c                     |  4 ++--
 tools/testing/selftests/bpf/test_sysctl.c |  9 ++++++++-
 4 files changed, 22 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 92c6e31fb008..2ce57645f3cd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -749,14 +749,14 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 }
 
 static inline u8
-bpf_ctx_narrow_load_shift(u32 off, u32 size, u32 size_default)
+bpf_ctx_narrow_access_offset(u32 off, u32 size, u32 size_default)
 {
-	u8 load_off = off & (size_default - 1);
+	u8 access_off = off & (size_default - 1);
 
 #ifdef __LITTLE_ENDIAN
-	return load_off * 8;
+	return access_off;
 #else
-	return (size_default - (load_off + size)) * 8;
+	return size_default - (access_off + size);
 #endif
 }
 
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6a6a154cfa7b..ddd8addcdb5c 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1334,6 +1334,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
 				     struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
+	u32 read_size;
 
 	switch (si->off) {
 	case offsetof(struct bpf_sysctl, write):
@@ -1365,7 +1366,9 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
 				treg, si->dst_reg,
 				offsetof(struct bpf_sysctl_kern, ppos));
 			*insn++ = BPF_STX_MEM(
-				BPF_SIZEOF(u32), treg, si->src_reg, 0);
+				BPF_SIZEOF(u32), treg, si->src_reg,
+				bpf_ctx_narrow_access_offset(
+					0, sizeof(u32), sizeof(loff_t)));
 			*insn++ = BPF_LDX_MEM(
 				BPF_DW, treg, si->dst_reg,
 				offsetof(struct bpf_sysctl_kern, tmp_reg));
@@ -1374,8 +1377,11 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
 				BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
 				si->dst_reg, si->src_reg,
 				offsetof(struct bpf_sysctl_kern, ppos));
+			read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
 			*insn++ = BPF_LDX_MEM(
-				BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0);
+				BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
+				bpf_ctx_narrow_access_offset(
+					0, read_size, sizeof(loff_t)));
 		}
 		*target_size = sizeof(u32);
 		break;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3fb50757e812..92a4332b041d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8619,8 +8619,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		if (is_narrower_load && size < target_size) {
-			u8 shift = bpf_ctx_narrow_load_shift(off, size,
-							     size_default);
+			u8 shift = bpf_ctx_narrow_access_offset(
+				off, size, size_default) * 8;
 			if (ctx_field_size <= 4) {
 				if (shift)
 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index fc33ae36b760..4f8ec1f10a80 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -32,6 +32,7 @@ struct sysctl_test {
 	enum bpf_attach_type attach_type;
 	const char *sysctl;
 	int open_flags;
+	int seek;
 	const char *newval;
 	const char *oldval;
 	enum {
@@ -140,7 +141,7 @@ static struct sysctl_test tests[] = {
 			/* If (file_pos == X) */
 			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
 				    offsetof(struct bpf_sysctl, file_pos)),
-			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2),
 
 			/* return ALLOW; */
 			BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -153,6 +154,7 @@ static struct sysctl_test tests[] = {
 		.attach_type = BPF_CGROUP_SYSCTL,
 		.sysctl = "kernel/ostype",
 		.open_flags = O_RDONLY,
+		.seek = 3,
 		.result = SUCCESS,
 	},
 	{
@@ -1481,6 +1483,11 @@ static int access_sysctl(const char *sysctl_path,
 	if (fd < 0)
 		return fd;
 
+	if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) {
+		log_err("lseek(%d) failed", test->seek);
+		goto err;
+	}
+
 	if (test->open_flags == O_RDONLY) {
 		char buf[128];
 
-- 
cgit v1.2.3


From 120a75ea9f4ba02f852171e75d44f29139b9c83e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 25 Jul 2019 20:16:39 +0800
Subject: libceph: add function that reset client's entity addr

This function also re-open connections to OSD/MON, and re-send in-flight
OSD requests after re-opening connections to OSD.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h    |  1 +
 include/linux/ceph/messenger.h  |  1 +
 include/linux/ceph/mon_client.h |  1 +
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/ceph_common.c          |  8 ++++++++
 net/ceph/messenger.c            |  6 ++++++
 net/ceph/mon_client.c           |  7 +++++++
 net/ceph/osd_client.c           | 18 ++++++++++++++++++
 8 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 82156da3c650..b9dbda1c26aa 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -293,6 +293,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private);
 struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client);
 u64 ceph_client_gid(struct ceph_client *client);
 extern void ceph_destroy_client(struct ceph_client *client);
+extern void ceph_reset_client_addr(struct ceph_client *client);
 extern int __ceph_open_session(struct ceph_client *client,
 			       unsigned long started);
 extern int ceph_open_session(struct ceph_client *client);
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 23895d178149..c4458dc6a757 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -337,6 +337,7 @@ extern void ceph_msgr_flush(void);
 extern void ceph_messenger_init(struct ceph_messenger *msgr,
 				struct ceph_entity_addr *myaddr);
 extern void ceph_messenger_fini(struct ceph_messenger *msgr);
+extern void ceph_messenger_reset_nonce(struct ceph_messenger *msgr);
 
 extern void ceph_con_init(struct ceph_connection *con, void *private,
 			const struct ceph_connection_operations *ops,
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index b4d134d3312a..dbb8a6959a73 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -109,6 +109,7 @@ extern int ceph_monmap_contains(struct ceph_monmap *m,
 
 extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl);
 extern void ceph_monc_stop(struct ceph_mon_client *monc);
+extern void ceph_monc_reopen_session(struct ceph_mon_client *monc);
 
 enum {
 	CEPH_SUB_MONMAP = 0,
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index ad7fe5d10dcd..d1c3e829f30d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -381,6 +381,7 @@ extern void ceph_osdc_cleanup(void);
 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
 			  struct ceph_client *client);
 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
+extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
 
 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 				   struct ceph_msg *msg);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 4eeea4d5c3ef..b412a3ccc4fc 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -694,6 +694,14 @@ void ceph_destroy_client(struct ceph_client *client)
 }
 EXPORT_SYMBOL(ceph_destroy_client);
 
+void ceph_reset_client_addr(struct ceph_client *client)
+{
+	ceph_messenger_reset_nonce(&client->msgr);
+	ceph_monc_reopen_session(&client->monc);
+	ceph_osdc_reopen_osds(&client->osdc);
+}
+EXPORT_SYMBOL(ceph_reset_client_addr);
+
 /*
  * true if we have the mon map (and have thus joined the cluster)
  */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 962f521c863e..e4cb3db2ee77 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3031,6 +3031,12 @@ static void con_fault(struct ceph_connection *con)
 }
 
 
+void ceph_messenger_reset_nonce(struct ceph_messenger *msgr)
+{
+	u32 nonce = le32_to_cpu(msgr->inst.addr.nonce) + 1000000;
+	msgr->inst.addr.nonce = cpu_to_le32(nonce);
+	encode_my_addr(msgr);
+}
 
 /*
  * initialize a new messenger instance
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 0520bf9825aa..7256c402ebaa 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -213,6 +213,13 @@ static void reopen_session(struct ceph_mon_client *monc)
 	__open_session(monc);
 }
 
+void ceph_monc_reopen_session(struct ceph_mon_client *monc)
+{
+	mutex_lock(&monc->mutex);
+	reopen_session(monc);
+	mutex_unlock(&monc->mutex);
+}
+
 static void un_backoff(struct ceph_mon_client *monc)
 {
 	monc->hunt_mult /= 2; /* reduce by 50% */
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 78ae6e8c953d..a252ba80dd82 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5086,6 +5086,24 @@ out_put_req:
 }
 EXPORT_SYMBOL(ceph_osdc_call);
 
+/*
+ * reset all osd connections
+ */
+void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc)
+{
+	struct rb_node *n;
+
+	down_write(&osdc->lock);
+	for (n = rb_first(&osdc->osds); n; ) {
+		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node);
+
+		n = rb_next(n);
+		if (!reopen_osd(osd))
+			kick_osd_requests(osd);
+	}
+	up_write(&osdc->lock);
+}
+
 /*
  * init, shutdown
  */
-- 
cgit v1.2.3


From 2cef0ba8032ce0df3f29621921fe7d6372f68d3e Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 25 Jul 2019 20:16:40 +0800
Subject: libceph: add function that clears osd client's abort_err

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 1 +
 net/ceph/osd_client.c           | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index d1c3e829f30d..eaffbdddf89a 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -389,6 +389,7 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
 
 #define osd_req_op_data(oreq, whch, typ, fld)				\
 ({									\
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a252ba80dd82..6f49c59e5e58 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2476,6 +2476,14 @@ void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err)
 }
 EXPORT_SYMBOL(ceph_osdc_abort_requests);
 
+void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc)
+{
+	down_write(&osdc->lock);
+	osdc->abort_err = 0;
+	up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_clear_abort_err);
+
 static void update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
 {
 	if (likely(eb > osdc->epoch_barrier)) {
-- 
cgit v1.2.3


From 4a36a60c34f42f75e8b4f8cd24fcfade26111334 Mon Sep 17 00:00:00 2001
From: Jonathan Chocron <jonnyc@amazon.com>
Date: Thu, 12 Sep 2019 16:00:39 +0300
Subject: PCI: Add Amazon's Annapurna Labs vendor ID

Add Amazon's Annapurna Labs vendor ID to pci_ids.h.

Signed-off-by: Jonathan Chocron <jonnyc@amazon.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Andrew Murray <andrew.murray@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index c842735a4f45..5ce83544f38b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2570,6 +2570,8 @@
 
 #define PCI_VENDOR_ID_ASMEDIA		0x1b21
 
+#define PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS	0x1c36
+
 #define PCI_VENDOR_ID_CIRCUITCO		0x1cc8
 #define PCI_SUBSYSTEM_ID_CIRCUITCO_MINNOWBOARD	0x0001
 
-- 
cgit v1.2.3


From afa179eb603847494aa5061d4f501224a30dd187 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 16 Sep 2019 05:55:42 -0400
Subject: dm: introduce DM_GET_TARGET_VERSION

This commit introduces a new ioctl DM_GET_TARGET_VERSION. It will load a
target that is specified in the "name" entry in the parameter structure
and return its version.

This functionality is intended to be used by cryptsetup, so that it can
query kernel capabilities before activating the device.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-ioctl.c         | 32 +++++++++++++++++++++++++++++---
 include/uapi/linux/dm-ioctl.h |  6 ++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index fb6f8fb1f13d..ac83f5002ce5 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -601,17 +601,27 @@ static void list_version_get_info(struct target_type *tt, void *param)
     info->vers = align_ptr(((void *) ++info->vers) + strlen(tt->name) + 1);
 }
 
-static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
+static int __list_versions(struct dm_ioctl *param, size_t param_size, const char *name)
 {
 	size_t len, needed = 0;
 	struct dm_target_versions *vers;
 	struct vers_iter iter_info;
+	struct target_type *tt = NULL;
+
+	if (name) {
+		tt = dm_get_target_type(name);
+		if (!tt)
+			return -EINVAL;
+	}
 
 	/*
 	 * Loop through all the devices working out how much
 	 * space we need.
 	 */
-	dm_target_iterate(list_version_get_needed, &needed);
+	if (!tt)
+		dm_target_iterate(list_version_get_needed, &needed);
+	else
+		list_version_get_needed(tt, &needed);
 
 	/*
 	 * Grab our output buffer.
@@ -632,13 +642,28 @@ static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param
 	/*
 	 * Now loop through filling out the names & versions.
 	 */
-	dm_target_iterate(list_version_get_info, &iter_info);
+	if (!tt)
+		dm_target_iterate(list_version_get_info, &iter_info);
+	else
+		list_version_get_info(tt, &iter_info);
 	param->flags |= iter_info.flags;
 
  out:
+	if (tt)
+		dm_put_target_type(tt);
 	return 0;
 }
 
+static int list_versions(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+	return __list_versions(param, param_size, NULL);
+}
+
+static int get_target_version(struct file *filp, struct dm_ioctl *param, size_t param_size)
+{
+	return __list_versions(param, param_size, param->name);
+}
+
 static int check_name(const char *name)
 {
 	if (strchr(name, '/')) {
@@ -1664,6 +1689,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
 		{DM_TARGET_MSG_CMD, 0, target_message},
 		{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
 		{DM_DEV_ARM_POLL, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
+		{DM_GET_TARGET_VERSION, 0, get_target_version},
 	};
 
 	if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index f396a82dfd3e..2df8ceca1f9b 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -243,6 +243,7 @@ enum {
 	DM_TARGET_MSG_CMD,
 	DM_DEV_SET_GEOMETRY_CMD,
 	DM_DEV_ARM_POLL_CMD,
+	DM_GET_TARGET_VERSION_CMD,
 };
 
 #define DM_IOCTL 0xfd
@@ -265,14 +266,15 @@ enum {
 #define DM_TABLE_STATUS  _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
 
 #define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+#define DM_GET_TARGET_VERSION _IOWR(DM_IOCTL, DM_GET_TARGET_VERSION_CMD, struct dm_ioctl)
 
 #define DM_TARGET_MSG	 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	40
+#define DM_VERSION_MINOR	41
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2019-01-18)"
+#define DM_VERSION_EXTRA	"-ioctl (2019-09-16)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From f9af2dbbfe01def62765a58af7fbc488351893c3 Mon Sep 17 00:00:00 2001
From: Thomas Higdon <tph@fb.com>
Date: Fri, 13 Sep 2019 23:23:34 +0000
Subject: tcp: Add TCP_INFO counter for packets received out-of-order

For receive-heavy cases on the server-side, we want to track the
connection quality for individual client IPs. This counter, similar to
the existing system-wide TCPOFOQueue counter in /proc/net/netstat,
tracks out-of-order packet reception. By providing this counter in
TCP_INFO, it will allow understanding to what degree receive-heavy
sockets are experiencing out-of-order delivery and packet drops
indicating congestion.

Please note that this is similar to the counter in NetBSD TCP_INFO, and
has the same name.

Also note that we avoid increasing the size of the tcp_sock struct by
taking advantage of a hole.

Signed-off-by: Thomas Higdon <tph@fb.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 2 ++
 include/uapi/linux/tcp.h | 2 ++
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_input.c     | 1 +
 4 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f3a85a7fb4b1..99617e528ea2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -354,6 +354,8 @@ struct tcp_sock {
 #define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
 #endif
 
+	u32 rcv_ooopack; /* Received out-of-order packets, for tcpinfo */
+
 /* Receiver side RTT estimation */
 	u32 rcv_rtt_last_tsecr;
 	struct {
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index b3564f85a762..20237987ccc8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -270,6 +270,8 @@ struct tcp_info {
 	__u64	tcpi_bytes_retrans;  /* RFC4898 tcpEStatsPerfOctetsRetrans */
 	__u32	tcpi_dsack_dups;     /* RFC4898 tcpEStatsStackDSACKDups */
 	__u32	tcpi_reord_seen;     /* reordering events seen */
+
+	__u32	tcpi_rcv_ooopack;    /* Out-of-order packets received */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 94df48bcecc2..4cf58208270e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,6 +2653,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->rx_opt.saw_tstamp = 0;
 	tp->rx_opt.dsack = 0;
 	tp->rx_opt.num_sacks = 0;
+	tp->rcv_ooopack = 0;
 
 
 	/* Clean up fastopen related fields */
@@ -3295,6 +3296,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_bytes_retrans = tp->bytes_retrans;
 	info->tcpi_dsack_dups = tp->dsack_dups;
 	info->tcpi_reord_seen = tp->reord_seen;
+	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7e94223fdb2b..3578357abe30 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4555,6 +4555,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 	tp->pred_flags = 0;
 	inet_csk_schedule_ack(sk);
 
+	tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
 	seq = TCP_SKB_CB(skb)->seq;
 	end_seq = TCP_SKB_CB(skb)->end_seq;
-- 
cgit v1.2.3


From 8f7baad7f03543451af27f5380fc816b008aa1f2 Mon Sep 17 00:00:00 2001
From: Thomas Higdon <tph@fb.com>
Date: Fri, 13 Sep 2019 23:23:35 +0000
Subject: tcp: Add snd_wnd to TCP_INFO

Neal Cardwell mentioned that snd_wnd would be useful for diagnosing TCP
performance problems --
> (1) Usually when we're diagnosing TCP performance problems, we do so
> from the sender, since the sender makes most of the
> performance-critical decisions (cwnd, pacing, TSO size, TSQ, etc).
> From the sender-side the thing that would be most useful is to see
> tp->snd_wnd, the receive window that the receiver has advertised to
> the sender.

This serves the purpose of adding an additional __u32 to avoid the
would-be hole caused by the addition of the tcpi_rcvi_ooopack field.

Signed-off-by: Thomas Higdon <tph@fb.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h | 4 ++++
 net/ipv4/tcp.c           | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 20237987ccc8..81e697978e8b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -272,6 +272,10 @@ struct tcp_info {
 	__u32	tcpi_reord_seen;     /* reordering events seen */
 
 	__u32	tcpi_rcv_ooopack;    /* Out-of-order packets received */
+
+	__u32	tcpi_snd_wnd;	     /* peer's advertised receive window after
+				      * scaling (bytes)
+				      */
 };
 
 /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4cf58208270e..79c325a07ba5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3297,6 +3297,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_dsack_dups = tp->dsack_dups;
 	info->tcpi_reord_seen = tp->reord_seen;
 	info->tcpi_rcv_ooopack = tp->rcv_ooopack;
+	info->tcpi_snd_wnd = tp->snd_wnd;
 	unlock_sock_fast(sk, slow);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
-- 
cgit v1.2.3


From 8eecd1c2e5bc73d33f3a544751305679dbf88eb4 Mon Sep 17 00:00:00 2001
From: "Paulo Alcantara (SUSE)" <paulo@paulo.ac>
Date: Tue, 16 Jul 2019 19:04:50 -0300
Subject: cifs: Add support for root file systems

Introduce a new CONFIG_CIFS_ROOT option to handle root file systems
over a SMB share.

In order to mount the root file system during the init process, make
cifs.ko perform non-blocking socket operations while mounting and
accessing it.

Cc: Steve French <smfrench@gmail.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Paulo Alcantara (SUSE) <paulo@paulo.ac>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 Documentation/filesystems/cifs/cifsroot.txt | 97 +++++++++++++++++++++++++++++
 fs/cifs/Kconfig                             |  8 +++
 fs/cifs/Makefile                            |  2 +
 fs/cifs/cifsglob.h                          |  2 +
 fs/cifs/cifsroot.c                          | 83 ++++++++++++++++++++++++
 fs/cifs/connect.c                           | 17 ++++-
 include/linux/root_dev.h                    |  1 +
 7 files changed, 207 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/filesystems/cifs/cifsroot.txt
 create mode 100644 fs/cifs/cifsroot.c

(limited to 'include')

diff --git a/Documentation/filesystems/cifs/cifsroot.txt b/Documentation/filesystems/cifs/cifsroot.txt
new file mode 100644
index 000000000000..0fa1a2c36a40
--- /dev/null
+++ b/Documentation/filesystems/cifs/cifsroot.txt
@@ -0,0 +1,97 @@
+Mounting root file system via SMB (cifs.ko)
+===========================================
+
+Written 2019 by Paulo Alcantara <palcantara@suse.de>
+Written 2019 by Aurelien Aptel <aaptel@suse.com>
+
+The CONFIG_CIFS_ROOT option enables experimental root file system
+support over the SMB protocol via cifs.ko.
+
+It introduces a new kernel command-line option called 'cifsroot='
+which will tell the kernel to mount the root file system over the
+network by utilizing SMB or CIFS protocol.
+
+In order to mount, the network stack will also need to be set up by
+using 'ip=' config option. For more details, see
+Documentation/filesystems/nfs/nfsroot.txt.
+
+A CIFS root mount currently requires the use of SMB1+UNIX Extensions
+which is only supported by the Samba server. SMB1 is the older
+deprecated version of the protocol but it has been extended to support
+POSIX features (See [1]). The equivalent extensions for the newer
+recommended version of the protocol (SMB3) have not been fully
+implemented yet which means SMB3 doesn't support some required POSIX
+file system objects (e.g. block devices, pipes, sockets).
+
+As a result, a CIFS root will default to SMB1 for now but the version
+to use can nonetheless be changed via the 'vers=' mount option.  This
+default will change once the SMB3 POSIX extensions are fully
+implemented.
+
+Server configuration
+====================
+
+To enable SMB1+UNIX extensions you will need to set these global
+settings in Samba smb.conf:
+
+    [global]
+    server min protocol = NT1
+    unix extension = yes        # default
+
+Kernel command line
+===================
+
+root=/dev/cifs
+
+This is just a virtual device that basically tells the kernel to mount
+the root file system via SMB protocol.
+
+cifsroot=//<server-ip>/<share>[,options]
+
+Enables the kernel to mount the root file system via SMB that are
+located in the <server-ip> and <share> specified in this option.
+
+The default mount options are set in fs/cifs/cifsroot.c.
+
+server-ip
+	IPv4 address of the server.
+
+share
+	Path to SMB share (rootfs).
+
+options
+	Optional mount options. For more information, see mount.cifs(8).
+
+Examples
+========
+
+Export root file system as a Samba share in smb.conf file.
+
+...
+[linux]
+	path = /path/to/rootfs
+	read only = no
+	guest ok = yes
+	force user = root
+	force group = root
+	browseable = yes
+	writeable = yes
+	admin users = root
+	public = yes
+	create mask = 0777
+	directory mask = 0777
+...
+
+Restart smb service.
+
+# systemctl restart smb
+
+Test it under QEMU on a kernel built with CONFIG_CIFS_ROOT and
+CONFIG_IP_PNP options enabled.
+
+# qemu-system-x86_64 -enable-kvm -cpu host -m 1024 \
+  -kernel /path/to/linux/arch/x86/boot/bzImage -nographic \
+  -append "root=/dev/cifs rw ip=dhcp cifsroot=//10.0.2.2/linux,username=foo,password=bar console=ttyS0 3"
+
+
+1: https://wiki.samba.org/index.php/UNIX_Extensions
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index b16219e5dac9..99a22e57660e 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -211,3 +211,11 @@ config CIFS_FSCACHE
 	  Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data
 	  to be cached locally on disk through the general filesystem cache
 	  manager. If unsure, say N.
+
+config CIFS_ROOT
+	bool "SMB root file system (Experimental)"
+	depends on CIFS=y && IP_PNP
+	help
+	  Enables root file system support over SMB protocol.
+
+	  Most people say N here.
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index 41332f20055b..51bae9340842 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -21,3 +21,5 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
 cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
+
+cifs-$(CONFIG_CIFS_ROOT) += cifsroot.o
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 09b60ec5de3e..54e204589cb9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -607,6 +607,7 @@ struct smb_vol {
 	__u32 handle_timeout; /* persistent and durable handle timeout in ms */
 	unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
 	__u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
+	bool rootfs:1; /* if it's a SMB root file system */
 };
 
 /**
@@ -764,6 +765,7 @@ struct TCP_Server_Info {
 	 * reconnect.
 	 */
 	int nr_targets;
+	bool noblockcnt; /* use non-blocking connect() */
 };
 
 struct cifs_credits {
diff --git a/fs/cifs/cifsroot.c b/fs/cifs/cifsroot.c
new file mode 100644
index 000000000000..8760d9cbf25e
--- /dev/null
+++ b/fs/cifs/cifsroot.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * SMB root file system support
+ *
+ * Copyright (c) 2019 Paulo Alcantara <palcantara@suse.de>
+ */
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/root_dev.h>
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <net/ipconfig.h>
+
+#define DEFAULT_MNT_OPTS \
+	"vers=1.0,cifsacl,mfsymlinks,rsize=1048576,wsize=65536,uid=0,gid=0," \
+	"hard,rootfs"
+
+static char root_dev[2048] __initdata = "";
+static char root_opts[1024] __initdata = DEFAULT_MNT_OPTS;
+
+static __be32 __init parse_srvaddr(char *start, char *end)
+{
+	char addr[sizeof("aaa.bbb.ccc.ddd")];
+	int i = 0;
+
+	while (start < end && i < sizeof(addr) - 1) {
+		if (isdigit(*start) || *start == '.')
+			addr[i++] = *start;
+		start++;
+	}
+	addr[i] = '\0';
+	return in_aton(addr);
+}
+
+/* cifsroot=//<server-ip>/<share>[,options] */
+static int __init cifs_root_setup(char *line)
+{
+	char *s;
+	int len;
+	__be32 srvaddr = htonl(INADDR_NONE);
+
+	ROOT_DEV = Root_CIFS;
+
+	if (strlen(line) > 3 && line[0] == '/' && line[1] == '/') {
+		s = strchr(&line[2], '/');
+		if (!s || s[1] == '\0')
+			return 1;
+
+		s = strchrnul(s, ',');
+		len = s - line + 1;
+		if (len <= sizeof(root_dev)) {
+			strlcpy(root_dev, line, len);
+			srvaddr = parse_srvaddr(&line[2], s);
+			if (*s) {
+				snprintf(root_opts, sizeof(root_opts), "%s,%s",
+					 DEFAULT_MNT_OPTS, s + 1);
+			}
+		}
+	}
+
+	root_server_addr = srvaddr;
+
+	return 1;
+}
+
+__setup("cifsroot=", cifs_root_setup);
+
+int __init cifs_root_data(char **dev, char **opts)
+{
+	if (!root_dev[0] || root_server_addr == htonl(INADDR_NONE)) {
+		printk(KERN_ERR "Root-CIFS: no SMB server address\n");
+		return -1;
+	}
+
+	*dev = root_dev;
+	*opts = root_opts;
+
+	return 0;
+}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e16b6cc1e31b..2850c3ce4391 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -96,7 +96,7 @@ enum {
 	Opt_multiuser, Opt_sloppy, Opt_nosharesock,
 	Opt_persistent, Opt_nopersistent,
 	Opt_resilient, Opt_noresilient,
-	Opt_domainauto, Opt_rdma, Opt_modesid,
+	Opt_domainauto, Opt_rdma, Opt_modesid, Opt_rootfs,
 	Opt_compress,
 
 	/* Mount options which take numeric value */
@@ -266,6 +266,7 @@ static const match_table_t cifs_mount_option_tokens = {
 	{ Opt_ignore, "nomand" },
 	{ Opt_ignore, "relatime" },
 	{ Opt_ignore, "_netdev" },
+	{ Opt_rootfs, "rootfs" },
 
 	{ Opt_err, NULL }
 };
@@ -1777,6 +1778,11 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 		case Opt_nodfs:
 			vol->nodfs = 1;
 			break;
+		case Opt_rootfs:
+#ifdef CONFIG_CIFS_ROOT
+			vol->rootfs = true;
+#endif
+			break;
 		case Opt_posixpaths:
 			vol->posix_paths = 1;
 			break;
@@ -2727,7 +2733,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		goto out_err_crypto_release;
 	}
 
-	tcp_ses->noblocksnd = volume_info->noblocksnd;
+	tcp_ses->noblockcnt = volume_info->rootfs;
+	tcp_ses->noblocksnd = volume_info->noblocksnd || volume_info->rootfs;
 	tcp_ses->noautotune = volume_info->noautotune;
 	tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
 	tcp_ses->rdma = volume_info->rdma;
@@ -3873,7 +3880,11 @@ generic_ip_connect(struct TCP_Server_Info *server)
 		 socket->sk->sk_sndbuf,
 		 socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo);
 
-	rc = socket->ops->connect(socket, saddr, slen, 0);
+	rc = socket->ops->connect(socket, saddr, slen,
+				  server->noblockcnt ? O_NONBLOCK : 0);
+
+	if (rc == -EINPROGRESS)
+		rc = 0;
 	if (rc < 0) {
 		cifs_dbg(FYI, "Error %d connecting to server\n", rc);
 		sock_release(socket);
diff --git a/include/linux/root_dev.h b/include/linux/root_dev.h
index bab671b0782f..4e78651371ba 100644
--- a/include/linux/root_dev.h
+++ b/include/linux/root_dev.h
@@ -8,6 +8,7 @@
 
 enum {
 	Root_NFS = MKDEV(UNNAMED_MAJOR, 255),
+	Root_CIFS = MKDEV(UNNAMED_MAJOR, 254),
 	Root_RAM0 = MKDEV(RAMDISK_MAJOR, 0),
 	Root_RAM1 = MKDEV(RAMDISK_MAJOR, 1),
 	Root_FD0 = MKDEV(FLOPPY_MAJOR, 0),
-- 
cgit v1.2.3


From 9c66d15646760eb8982242b4531c4d4fd36118fd Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Sun, 15 Sep 2019 04:59:58 +0300
Subject: taprio: Add support for hardware offloading

This allows taprio to offload the schedule enforcement to capable
network cards, resulting in more precise windows and less CPU usage.

The gate mask acts on traffic classes (groups of queues of same
priority), as specified in IEEE 802.1Q-2018, and following the existing
taprio and mqprio semantics.
It is up to the driver to perform conversion between tc and individual
netdev queues if for some reason it needs to make that distinction.

Full offload is requested from the network interface by specifying
"flags 2" in the tc qdisc creation command, which in turn corresponds to
the TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD bit.

The important detail here is the clockid which is implicitly /dev/ptpN
for full offload, and hence not configurable.

A reference counting API is added to support the use case where Ethernet
drivers need to keep the taprio offload structure locally (i.e. they are
a multi-port switch driver, and configuring a port depends on the
settings of other ports as well). The refcount_t variable is kept in a
private structure (__tc_taprio_qopt_offload) and not exposed to drivers.

In the future, the private structure might also be expanded with a
backpointer to taprio_sched *q, to implement the notification system
described in the patch (of when admin became oper, or an error occurred,
etc, so the offload can be monitored with 'tc qdisc show').

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |   1 +
 include/net/pkt_sched.h        |  23 +++
 include/uapi/linux/pkt_sched.h |   3 +-
 net/sched/sch_taprio.c         | 409 ++++++++++++++++++++++++++++++++++++-----
 4 files changed, 392 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d7d5626002e9..9eda1c31d1f7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -847,6 +847,7 @@ enum tc_setup_type {
 	TC_SETUP_QDISC_ETF,
 	TC_SETUP_ROOT_QDISC,
 	TC_SETUP_QDISC_GRED,
+	TC_SETUP_QDISC_TAPRIO,
 };
 
 /* These structures hold the attributes of bpf state that are being passed
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index a16fbe9a2a67..d1632979622e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -161,4 +161,27 @@ struct tc_etf_qopt_offload {
 	s32 queue;
 };
 
+struct tc_taprio_sched_entry {
+	u8 command; /* TC_TAPRIO_CMD_* */
+
+	/* The gate_mask in the offloading side refers to traffic classes */
+	u32 gate_mask;
+	u32 interval;
+};
+
+struct tc_taprio_qopt_offload {
+	u8 enable;
+	ktime_t base_time;
+	u64 cycle_time;
+	u64 cycle_time_extension;
+
+	size_t num_entries;
+	struct tc_taprio_sched_entry entries[0];
+};
+
+/* Reference counting */
+struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
+						  *offload);
+void taprio_offload_free(struct tc_taprio_qopt_offload *offload);
+
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 18f185299f47..5011259b8f67 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1160,7 +1160,8 @@ enum {
  *       [TCA_TAPRIO_ATTR_SCHED_ENTRY_INTERVAL]
  */
 
-#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST 0x1
+#define TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST	BIT(0)
+#define TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD	BIT(1)
 
 enum {
 	TCA_TAPRIO_ATTR_UNSPEC,
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 84b863e2bdbd..2f7b34205c82 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock);
 
 #define TAPRIO_ALL_GATES_OPEN -1
 
-#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST))
 #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)
+#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)
 
 struct sched_entry {
 	struct list_head list;
@@ -75,9 +75,16 @@ struct taprio_sched {
 	struct sched_gate_list __rcu *admin_sched;
 	struct hrtimer advance_timer;
 	struct list_head taprio_list;
+	struct sk_buff *(*dequeue)(struct Qdisc *sch);
+	struct sk_buff *(*peek)(struct Qdisc *sch);
 	u32 txtime_delay;
 };
 
+struct __tc_taprio_qopt_offload {
+	refcount_t users;
+	struct tc_taprio_qopt_offload offload;
+};
+
 static ktime_t sched_base_time(const struct sched_gate_list *sched)
 {
 	if (!sched)
@@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch)
 	return entry;
 }
 
+static bool taprio_flags_valid(u32 flags)
+{
+	/* Make sure no other flag bits are set. */
+	if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST |
+		      TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+		return false;
+	/* txtime-assist and full offload are mutually exclusive */
+	if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) &&
+	    (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD))
+		return false;
+	return true;
+}
+
 /* This returns the tstamp value set by TCP in terms of the set clock. */
 static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb)
 {
@@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	return qdisc_enqueue(skb, child, to_free);
 }
 
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
+static struct sk_buff *taprio_peek_soft(struct Qdisc *sch)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
@@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch)
 	return NULL;
 }
 
+static struct sk_buff *taprio_peek_offload(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->peek(child);
+		if (!skb)
+			continue;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+
+	return q->peek(sch);
+}
+
 static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
 {
 	atomic_set(&entry->budget,
@@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
 			     atomic64_read(&q->picos_per_byte)));
 }
 
-static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch)
 {
 	struct taprio_sched *q = qdisc_priv(sch);
 	struct net_device *dev = qdisc_dev(sch);
@@ -550,6 +600,40 @@ done:
 	return skb;
 }
 
+static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		struct Qdisc *child = q->qdiscs[i];
+
+		if (unlikely(!child))
+			continue;
+
+		skb = child->ops->dequeue(child);
+		if (unlikely(!skb))
+			continue;
+
+		qdisc_bstats_update(sch, skb);
+		qdisc_qstats_backlog_dec(sch, skb);
+		sch->q.qlen--;
+
+		return skb;
+	}
+
+	return NULL;
+}
+
+static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+
+	return q->dequeue(sch);
+}
+
 static bool should_restart_cycle(const struct sched_gate_list *oper,
 				 const struct sched_entry *entry)
 {
@@ -932,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch,
 	struct taprio_sched *q = qdisc_priv(sch);
 	ktime_t expires;
 
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags))
+		return;
+
 	expires = hrtimer_get_expires(&q->advance_timer);
 	if (expires == 0)
 		expires = KTIME_MAX;
@@ -1011,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q,
 	}
 }
 
+static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries)
+{
+	size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries +
+		      sizeof(struct __tc_taprio_qopt_offload);
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = kzalloc(size, GFP_KERNEL);
+	if (!__offload)
+		return NULL;
+
+	refcount_set(&__offload->users, 1);
+
+	return &__offload->offload;
+}
+
+struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload
+						  *offload)
+{
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
+				 offload);
+
+	refcount_inc(&__offload->users);
+
+	return offload;
+}
+EXPORT_SYMBOL_GPL(taprio_offload_get);
+
+void taprio_offload_free(struct tc_taprio_qopt_offload *offload)
+{
+	struct __tc_taprio_qopt_offload *__offload;
+
+	__offload = container_of(offload, struct __tc_taprio_qopt_offload,
+				 offload);
+
+	if (!refcount_dec_and_test(&__offload->users))
+		return;
+
+	kfree(__offload);
+}
+EXPORT_SYMBOL_GPL(taprio_offload_free);
+
+/* The function will only serve to keep the pointers to the "oper" and "admin"
+ * schedules valid in relation to their base times, so when calling dump() the
+ * users looks at the right schedules.
+ * When using full offload, the admin configuration is promoted to oper at the
+ * base_time in the PHC time domain.  But because the system time is not
+ * necessarily in sync with that, we can't just trigger a hrtimer to call
+ * switch_schedules at the right hardware time.
+ * At the moment we call this by hand right away from taprio, but in the future
+ * it will be useful to create a mechanism for drivers to notify taprio of the
+ * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
+ * This is left as TODO.
+ */
+void taprio_offload_config_changed(struct taprio_sched *q)
+{
+	struct sched_gate_list *oper, *admin;
+
+	spin_lock(&q->current_entry_lock);
+
+	oper = rcu_dereference_protected(q->oper_sched,
+					 lockdep_is_held(&q->current_entry_lock));
+	admin = rcu_dereference_protected(q->admin_sched,
+					  lockdep_is_held(&q->current_entry_lock));
+
+	switch_schedules(q, &admin, &oper);
+
+	spin_unlock(&q->current_entry_lock);
+}
+
+static void taprio_sched_to_offload(struct taprio_sched *q,
+				    struct sched_gate_list *sched,
+				    const struct tc_mqprio_qopt *mqprio,
+				    struct tc_taprio_qopt_offload *offload)
+{
+	struct sched_entry *entry;
+	int i = 0;
+
+	offload->base_time = sched->base_time;
+	offload->cycle_time = sched->cycle_time;
+	offload->cycle_time_extension = sched->cycle_time_extension;
+
+	list_for_each_entry(entry, &sched->entries, list) {
+		struct tc_taprio_sched_entry *e = &offload->entries[i];
+
+		e->command = entry->command;
+		e->interval = entry->interval;
+		e->gate_mask = entry->gate_mask;
+		i++;
+	}
+
+	offload->num_entries = i;
+}
+
+static int taprio_enable_offload(struct net_device *dev,
+				 struct tc_mqprio_qopt *mqprio,
+				 struct taprio_sched *q,
+				 struct sched_gate_list *sched,
+				 struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_taprio_qopt_offload *offload;
+	int err = 0;
+
+	if (!ops->ndo_setup_tc) {
+		NL_SET_ERR_MSG(extack,
+			       "Device does not support taprio offload");
+		return -EOPNOTSUPP;
+	}
+
+	offload = taprio_offload_alloc(sched->num_entries);
+	if (!offload) {
+		NL_SET_ERR_MSG(extack,
+			       "Not enough memory for enabling offload mode");
+		return -ENOMEM;
+	}
+	offload->enable = 1;
+	taprio_sched_to_offload(q, sched, mqprio, offload);
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Device failed to setup taprio offload");
+		goto done;
+	}
+
+	taprio_offload_config_changed(q);
+
+done:
+	taprio_offload_free(offload);
+
+	return err;
+}
+
+static int taprio_disable_offload(struct net_device *dev,
+				  struct taprio_sched *q,
+				  struct netlink_ext_ack *extack)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct tc_taprio_qopt_offload *offload;
+	int err;
+
+	if (!FULL_OFFLOAD_IS_ENABLED(q->flags))
+		return 0;
+
+	if (!ops->ndo_setup_tc)
+		return -EOPNOTSUPP;
+
+	offload = taprio_offload_alloc(0);
+	if (!offload) {
+		NL_SET_ERR_MSG(extack,
+			       "Not enough memory to disable offload mode");
+		return -ENOMEM;
+	}
+	offload->enable = 0;
+
+	err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Device failed to disable offload");
+		goto out;
+	}
+
+out:
+	taprio_offload_free(offload);
+
+	return err;
+}
+
+/* If full offload is enabled, the only possible clockid is the net device's
+ * PHC. For that reason, specifying a clockid through netlink is incorrect.
+ * For txtime-assist, it is implicitly assumed that the device's PHC is kept
+ * in sync with the specified clockid via a user space daemon such as phc2sys.
+ * For both software taprio and txtime-assist, the clockid is used for the
+ * hrtimer that advances the schedule and hence mandatory.
+ */
+static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
+				struct netlink_ext_ack *extack)
+{
+	struct taprio_sched *q = qdisc_priv(sch);
+	struct net_device *dev = qdisc_dev(sch);
+	int err = -EINVAL;
+
+	if (FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+		const struct ethtool_ops *ops = dev->ethtool_ops;
+		struct ethtool_ts_info info = {
+			.cmd = ETHTOOL_GET_TS_INFO,
+			.phc_index = -1,
+		};
+
+		if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+			NL_SET_ERR_MSG(extack,
+				       "The 'clockid' cannot be specified for full offload");
+			goto out;
+		}
+
+		if (ops && ops->get_ts_info)
+			err = ops->get_ts_info(dev, &info);
+
+		if (err || info.phc_index < 0) {
+			NL_SET_ERR_MSG(extack,
+				       "Device does not have a PTP clock");
+			err = -ENOTSUPP;
+			goto out;
+		}
+	} else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
+		int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
+
+		/* We only support static clockids and we don't allow
+		 * for it to be modified after the first init.
+		 */
+		if (clockid < 0 ||
+		    (q->clockid != -1 && q->clockid != clockid)) {
+			NL_SET_ERR_MSG(extack,
+				       "Changing the 'clockid' of a running schedule is not supported");
+			err = -ENOTSUPP;
+			goto out;
+		}
+
+		switch (clockid) {
+		case CLOCK_REALTIME:
+			q->tk_offset = TK_OFFS_REAL;
+			break;
+		case CLOCK_MONOTONIC:
+			q->tk_offset = TK_OFFS_MAX;
+			break;
+		case CLOCK_BOOTTIME:
+			q->tk_offset = TK_OFFS_BOOT;
+			break;
+		case CLOCK_TAI:
+			q->tk_offset = TK_OFFS_TAI;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+			err = -EINVAL;
+			goto out;
+		}
+
+		q->clockid = clockid;
+	} else {
+		NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
+		goto out;
+	}
+out:
+	return err;
+}
+
 static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 			 struct netlink_ext_ack *extack)
 {
@@ -1020,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 	struct net_device *dev = qdisc_dev(sch);
 	struct tc_mqprio_qopt *mqprio = NULL;
 	u32 taprio_flags = 0;
-	int i, err, clockid;
 	unsigned long flags;
 	ktime_t start;
+	int i, err;
 
 	err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt,
 					  taprio_policy, extack);
@@ -1038,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 		if (q->flags != 0 && q->flags != taprio_flags) {
 			NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported");
 			return -EOPNOTSUPP;
-		} else if (!FLAGS_VALID(taprio_flags)) {
+		} else if (!taprio_flags_valid(taprio_flags)) {
 			NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid");
 			return -EINVAL;
 		}
@@ -1078,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 		goto free_sched;
 	}
 
-	if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
-		clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]);
-
-		/* We only support static clockids and we don't allow
-		 * for it to be modified after the first init.
-		 */
-		if (clockid < 0 ||
-		    (q->clockid != -1 && q->clockid != clockid)) {
-			NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported");
-			err = -ENOTSUPP;
-			goto free_sched;
-		}
-
-		q->clockid = clockid;
-	}
-
-	if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) {
-		NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
-		err = -EINVAL;
+	err = taprio_parse_clockid(sch, tb, extack);
+	if (err < 0)
 		goto free_sched;
-	}
 
 	taprio_set_picos_per_byte(dev, q);
 
+	if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+		err = taprio_enable_offload(dev, mqprio, q, new_admin, extack);
+	else
+		err = taprio_disable_offload(dev, q, extack);
+	if (err)
+		goto free_sched;
+
 	/* Protects against enqueue()/dequeue() */
 	spin_lock_bh(qdisc_lock(sch));
 
@@ -1116,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
+	    !FULL_OFFLOAD_IS_ENABLED(taprio_flags) &&
 	    !hrtimer_active(&q->advance_timer)) {
 		hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS);
 		q->advance_timer.function = advance_sched;
@@ -1134,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
 					       mqprio->prio_tc_map[i]);
 	}
 
-	switch (q->clockid) {
-	case CLOCK_REALTIME:
-		q->tk_offset = TK_OFFS_REAL;
-		break;
-	case CLOCK_MONOTONIC:
-		q->tk_offset = TK_OFFS_MAX;
-		break;
-	case CLOCK_BOOTTIME:
-		q->tk_offset = TK_OFFS_BOOT;
-		break;
-	case CLOCK_TAI:
-		q->tk_offset = TK_OFFS_TAI;
-		break;
-	default:
-		NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
-		err = -EINVAL;
-		goto unlock;
+	if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) {
+		q->dequeue = taprio_dequeue_offload;
+		q->peek = taprio_peek_offload;
+	} else {
+		/* Be sure to always keep the function pointers
+		 * in a consistent state.
+		 */
+		q->dequeue = taprio_dequeue_soft;
+		q->peek = taprio_peek_soft;
 	}
 
 	err = taprio_get_start_time(sch, new_admin, &start);
@@ -1212,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch)
 
 	hrtimer_cancel(&q->advance_timer);
 
+	taprio_disable_offload(dev, q, NULL);
+
 	if (q->qdiscs) {
 		for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++)
 			qdisc_put(q->qdiscs[i]);
@@ -1241,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
 	hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS);
 	q->advance_timer.function = advance_sched;
 
+	q->dequeue = taprio_dequeue_soft;
+	q->peek = taprio_peek_soft;
+
 	q->root = sch;
 
 	/* We only support static clockids. Use an invalid value as default
@@ -1423,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt))
 		goto options_error;
 
-	if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
+	if (!FULL_OFFLOAD_IS_ENABLED(q->flags) &&
+	    nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid))
 		goto options_error;
 
 	if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags))
-- 
cgit v1.2.3


From 47d23af29220bf38c312cf434487500d93789c7c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Sun, 15 Sep 2019 04:59:59 +0300
Subject: net: dsa: Pass ndo_setup_tc slave callback to drivers

DSA currently handles shared block filters (for the classifier-action
qdisc) in the core due to what I believe are simply pragmatic reasons -
hiding the complexity from drivers and offerring a simple API for port
mirroring.

Extend the dsa_slave_setup_tc function by passing all other qdisc
offloads to the driver layer, where the driver may choose what it
implements and how. DSA is simply a pass-through in this case.

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Kurt Kanzenbach <kurt@linutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h |  2 ++
 net/dsa/slave.c   | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 96acb14ec1a8..541fb514e31d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -515,6 +515,8 @@ struct dsa_switch_ops {
 				   bool ingress);
 	void	(*port_mirror_del)(struct dsa_switch *ds, int port,
 				   struct dsa_mall_mirror_tc_entry *mirror);
+	int	(*port_setup_tc)(struct dsa_switch *ds, int port,
+				 enum tc_setup_type type, void *type_data);
 
 	/*
 	 * Cross-chip operations
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9a88035517a6..75d58229a4bd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1035,12 +1035,16 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			      void *type_data)
 {
-	switch (type) {
-	case TC_SETUP_BLOCK:
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_switch *ds = dp->ds;
+
+	if (type == TC_SETUP_BLOCK)
 		return dsa_slave_setup_tc_block(dev, type_data);
-	default:
+
+	if (!ds->ops->port_setup_tc)
 		return -EOPNOTSUPP;
-	}
+
+	return ds->ops->port_setup_tc(ds, dp->index, type, type_data);
 }
 
 static void dsa_slave_get_stats64(struct net_device *dev,
-- 
cgit v1.2.3


From 9f2f13f4ffb13ee914105dfc9f860d68cae98108 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Mon, 16 Sep 2019 10:35:25 +0300
Subject: ethtool: implement Energy Detect Powerdown support via phy-tunable

The `phy_tunable_id` has been named `ETHTOOL_PHY_EDPD` since it looks like
this feature is common across other PHYs (like EEE), and defining
`ETHTOOL_PHY_ENERGY_DETECT_POWER_DOWN` seems too long.

The way EDPD works, is that the RX block is put to a lower power mode,
except for link-pulse detection circuits. The TX block is also put to low
power mode, but the PHY wakes-up periodically to send link pulses, to avoid
lock-ups in case the other side is also in EDPD mode.

Currently, there are 2 PHY drivers that look like they could use this new
PHY tunable feature: the `adin` && `micrel` PHYs.

The ADIN's datasheet mentions that TX pulses are at intervals of 1 second
default each, and they can be disabled. For the Micrel KSZ9031 PHY, the
datasheet does not mention whether they can be disabled, but mentions that
they can modified.

The way this change is structured, is similar to the PHY tunable downshift
control:
* a `ETHTOOL_PHY_EDPD_DFLT_TX_MSECS` value is exposed to cover a default
  TX interval; some PHYs could specify a certain value that makes sense
* `ETHTOOL_PHY_EDPD_NO_TX` would disable TX when EDPD is enabled
* `ETHTOOL_PHY_EDPD_DISABLE` will disable EDPD

As noted by the `ETHTOOL_PHY_EDPD_DFLT_TX_MSECS` the interval unit is 1
millisecond, which should cover a reasonable range of intervals:
 - from 1 millisecond, which does not sound like much of a power-saver
 - to ~65 seconds which is quite a lot to wait for a link to come up when
   plugging a cable

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 22 ++++++++++++++++++++++
 net/core/ethtool.c           |  6 ++++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index dd06302aa93e..8938b76c4ee3 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -259,10 +259,32 @@ struct ethtool_tunable {
 #define ETHTOOL_PHY_FAST_LINK_DOWN_ON	0
 #define ETHTOOL_PHY_FAST_LINK_DOWN_OFF	0xff
 
+/* Energy Detect Power Down (EDPD) is a feature supported by some PHYs, where
+ * the PHY's RX & TX blocks are put into a low-power mode when there is no
+ * link detected (typically cable is un-plugged). For RX, only a minimal
+ * link-detection is available, and for TX the PHY wakes up to send link pulses
+ * to avoid any lock-ups in case the peer PHY may also be running in EDPD mode.
+ *
+ * Some PHYs may support configuration of the wake-up interval for TX pulses,
+ * and some PHYs may support only disabling TX pulses entirely. For the latter
+ * a special value is required (ETHTOOL_PHY_EDPD_NO_TX) so that this can be
+ * configured from userspace (should the user want it).
+ *
+ * The interval units for TX wake-up are in milliseconds, since this should
+ * cover a reasonable range of intervals:
+ *  - from 1 millisecond, which does not sound like much of a power-saver
+ *  - to ~65 seconds which is quite a lot to wait for a link to come up when
+ *    plugging a cable
+ */
+#define ETHTOOL_PHY_EDPD_DFLT_TX_MSECS		0xffff
+#define ETHTOOL_PHY_EDPD_NO_TX			0xfffe
+#define ETHTOOL_PHY_EDPD_DISABLE		0
+
 enum phy_tunable_id {
 	ETHTOOL_PHY_ID_UNSPEC,
 	ETHTOOL_PHY_DOWNSHIFT,
 	ETHTOOL_PHY_FAST_LINK_DOWN,
+	ETHTOOL_PHY_EDPD,
 	/*
 	 * Add your fresh new phy tunable attribute above and remember to update
 	 * phy_tunable_strings[] in net/core/ethtool.c
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6288e69e94fc..c763106c73fc 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -133,6 +133,7 @@ phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
 	[ETHTOOL_ID_UNSPEC]     = "Unspec",
 	[ETHTOOL_PHY_DOWNSHIFT]	= "phy-downshift",
 	[ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+	[ETHTOOL_PHY_EDPD]	= "phy-energy-detect-power-down",
 };
 
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
@@ -2451,6 +2452,11 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna)
 		    tuna->type_id != ETHTOOL_TUNABLE_U8)
 			return -EINVAL;
 		break;
+	case ETHTOOL_PHY_EDPD:
+		if (tuna->len != sizeof(u16) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U16)
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 8fedf805fa42842c0ef0aece9dbee957d805a8ea Mon Sep 17 00:00:00 2001
From: Bibby Hsieh <bibby.hsieh@mediatek.com>
Date: Thu, 29 Aug 2019 09:48:08 +0800
Subject: dt-binding: gce: add gce header file for mt8183

Add documentation for the mt8183 gce.

Add gce header file defined the gce hardware event,
subsys number and constant for mt8183.

Signed-off-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/mtk-gce.txt        |   6 +-
 include/dt-bindings/gce/mt8183-gce.h               | 175 +++++++++++++++++++++
 2 files changed, 178 insertions(+), 3 deletions(-)
 create mode 100644 include/dt-bindings/gce/mt8183-gce.h

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
index cfe40b01d164..1f7f8f2a3f49 100644
--- a/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
+++ b/Documentation/devicetree/bindings/mailbox/mtk-gce.txt
@@ -9,7 +9,7 @@ CMDQ driver uses mailbox framework for communication. Please refer to
 mailbox.txt for generic information about mailbox device-tree bindings.
 
 Required properties:
-- compatible: Must be "mediatek,mt8173-gce"
+- compatible: can be "mediatek,mt8173-gce" or "mediatek,mt8183-gce"
 - reg: Address range of the GCE unit
 - interrupts: The interrupt signal from the GCE block
 - clock: Clocks according to the common clock binding
@@ -28,8 +28,8 @@ Required properties for a client device:
 - mediatek,gce-subsys: u32, specify the sub-system id which is corresponding
   to the register address.
 
-Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h'. Such as
-sub-system ids, thread priority, event ids.
+Some vaules of properties are defined in 'dt-bindings/gce/mt8173-gce.h'
+or 'dt-binding/gce/mt8183-gce.h'. Such as sub-system ids, thread priority, event ids.
 
 Example:
 
diff --git a/include/dt-bindings/gce/mt8183-gce.h b/include/dt-bindings/gce/mt8183-gce.h
new file mode 100644
index 000000000000..29c967476f73
--- /dev/null
+++ b/include/dt-bindings/gce/mt8183-gce.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019 MediaTek Inc.
+ * Author: Bibby Hsieh <bibby.hsieh@mediatek.com>
+ *
+ */
+
+#ifndef _DT_BINDINGS_GCE_MT8183_H
+#define _DT_BINDINGS_GCE_MT8183_H
+
+#define CMDQ_NO_TIMEOUT		0xffffffff
+
+/* GCE HW thread priority */
+#define CMDQ_THR_PRIO_LOWEST	0
+#define CMDQ_THR_PRIO_HIGHEST	1
+
+/* GCE SUBSYS */
+#define SUBSYS_1300XXXX		0
+#define SUBSYS_1400XXXX		1
+#define SUBSYS_1401XXXX		2
+#define SUBSYS_1402XXXX		3
+#define SUBSYS_1502XXXX		4
+#define SUBSYS_1880XXXX		5
+#define SUBSYS_1881XXXX		6
+#define SUBSYS_1882XXXX		7
+#define SUBSYS_1883XXXX		8
+#define SUBSYS_1884XXXX		9
+#define SUBSYS_1000XXXX		10
+#define SUBSYS_1001XXXX		11
+#define SUBSYS_1002XXXX		12
+#define SUBSYS_1003XXXX		13
+#define SUBSYS_1004XXXX		14
+#define SUBSYS_1005XXXX		15
+#define SUBSYS_1020XXXX		16
+#define SUBSYS_1028XXXX		17
+#define SUBSYS_1700XXXX		18
+#define SUBSYS_1701XXXX		19
+#define SUBSYS_1702XXXX		20
+#define SUBSYS_1703XXXX		21
+#define SUBSYS_1800XXXX		22
+#define SUBSYS_1801XXXX		23
+#define SUBSYS_1802XXXX		24
+#define SUBSYS_1804XXXX		25
+#define SUBSYS_1805XXXX		26
+#define SUBSYS_1808XXXX		27
+#define SUBSYS_180aXXXX		28
+#define SUBSYS_180bXXXX		29
+
+#define CMDQ_EVENT_DISP_RDMA0_SOF					0
+#define CMDQ_EVENT_DISP_RDMA1_SOF					1
+#define CMDQ_EVENT_MDP_RDMA0_SOF					2
+#define CMDQ_EVENT_MDP_RSZ0_SOF						4
+#define CMDQ_EVENT_MDP_RSZ1_SOF						5
+#define CMDQ_EVENT_MDP_TDSHP_SOF					6
+#define CMDQ_EVENT_MDP_WROT0_SOF					7
+#define CMDQ_EVENT_MDP_WDMA0_SOF					8
+#define CMDQ_EVENT_DISP_OVL0_SOF					9
+#define CMDQ_EVENT_DISP_OVL0_2L_SOF					10
+#define CMDQ_EVENT_DISP_OVL1_2L_SOF					11
+#define CMDQ_EVENT_DISP_WDMA0_SOF					12
+#define CMDQ_EVENT_DISP_COLOR0_SOF					13
+#define CMDQ_EVENT_DISP_CCORR0_SOF					14
+#define CMDQ_EVENT_DISP_AAL0_SOF					15
+#define CMDQ_EVENT_DISP_GAMMA0_SOF					16
+#define CMDQ_EVENT_DISP_DITHER0_SOF					17
+#define CMDQ_EVENT_DISP_PWM0_SOF					18
+#define CMDQ_EVENT_DISP_DSI0_SOF					19
+#define CMDQ_EVENT_DISP_DPI0_SOF					20
+#define CMDQ_EVENT_DISP_RSZ_SOF						22
+#define CMDQ_EVENT_MDP_AAL_SOF						23
+#define CMDQ_EVENT_MDP_CCORR_SOF					24
+#define CMDQ_EVENT_DISP_DBI_SOF						25
+#define CMDQ_EVENT_DISP_RDMA0_EOF					26
+#define CMDQ_EVENT_DISP_RDMA1_EOF					27
+#define CMDQ_EVENT_MDP_RDMA0_EOF					28
+#define CMDQ_EVENT_MDP_RSZ0_EOF						30
+#define CMDQ_EVENT_MDP_RSZ1_EOF						31
+#define CMDQ_EVENT_MDP_TDSHP_EOF					32
+#define CMDQ_EVENT_MDP_WROT0_EOF					33
+#define CMDQ_EVENT_MDP_WDMA0_EOF					34
+#define CMDQ_EVENT_DISP_OVL0_EOF					35
+#define CMDQ_EVENT_DISP_OVL0_2L_EOF					36
+#define CMDQ_EVENT_DISP_OVL1_2L_EOF					37
+#define CMDQ_EVENT_DISP_WDMA0_EOF					38
+#define CMDQ_EVENT_DISP_COLOR0_EOF					39
+#define CMDQ_EVENT_DISP_CCORR0_EOF					40
+#define CMDQ_EVENT_DISP_AAL0_EOF					41
+#define CMDQ_EVENT_DISP_GAMMA0_EOF					42
+#define CMDQ_EVENT_DISP_DITHER0_EOF					43
+#define CMDQ_EVENT_DSI0_EOF						44
+#define CMDQ_EVENT_DPI0_EOF						45
+#define CMDQ_EVENT_DISP_RSZ_EOF						47
+#define CMDQ_EVENT_MDP_AAL_EOF						48
+#define CMDQ_EVENT_MDP_CCORR_EOF					49
+#define CMDQ_EVENT_DBI_EOF						50
+#define CMDQ_EVENT_MUTEX_STREAM_DONE0					130
+#define CMDQ_EVENT_MUTEX_STREAM_DONE1					131
+#define CMDQ_EVENT_MUTEX_STREAM_DONE2					132
+#define CMDQ_EVENT_MUTEX_STREAM_DONE3					133
+#define CMDQ_EVENT_MUTEX_STREAM_DONE4					134
+#define CMDQ_EVENT_MUTEX_STREAM_DONE5					135
+#define CMDQ_EVENT_MUTEX_STREAM_DONE6					136
+#define CMDQ_EVENT_MUTEX_STREAM_DONE7					137
+#define CMDQ_EVENT_MUTEX_STREAM_DONE8					138
+#define CMDQ_EVENT_MUTEX_STREAM_DONE9					139
+#define CMDQ_EVENT_MUTEX_STREAM_DONE10					140
+#define CMDQ_EVENT_MUTEX_STREAM_DONE11					141
+#define CMDQ_EVENT_DISP_RDMA0_BUF_UNDERRUN_EVEN				142
+#define CMDQ_EVENT_DISP_RDMA1_BUF_UNDERRUN_EVEN				143
+#define CMDQ_EVENT_DSI0_TE_EVENT					144
+#define CMDQ_EVENT_DSI0_IRQ_EVENT					145
+#define CMDQ_EVENT_DSI0_DONE_EVENT					146
+#define CMDQ_EVENT_DISP_WDMA0_SW_RST_DONE				150
+#define CMDQ_EVENT_MDP_WDMA_SW_RST_DONE					151
+#define CMDQ_EVENT_MDP_WROT0_SW_RST_DONE				152
+#define CMDQ_EVENT_MDP_RDMA0_SW_RST_DONE				154
+#define CMDQ_EVENT_DISP_OVL0_FRAME_RST_DONE_PULE			155
+#define CMDQ_EVENT_DISP_OVL0_2L_FRAME_RST_DONE_ULSE			156
+#define CMDQ_EVENT_DISP_OVL1_2L_FRAME_RST_DONE_ULSE			157
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_0					257
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_1					258
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_2					259
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_3					260
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_4					261
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_5					262
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_6					263
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_7					264
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_8					265
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_9					266
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_10					267
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_11					268
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_12					269
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_13					270
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_14					271
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_15					272
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_16					273
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_17					274
+#define CMDQ_EVENT_ISP_FRAME_DONE_P2_18					275
+#define CMDQ_EVENT_AMD_FRAME_DONE					276
+#define CMDQ_EVENT_DVE_DONE						277
+#define CMDQ_EVENT_WMFE_DONE						278
+#define CMDQ_EVENT_RSC_DONE						279
+#define CMDQ_EVENT_MFB_DONE						280
+#define CMDQ_EVENT_WPE_A_DONE						281
+#define CMDQ_EVENT_SPE_B_DONE						282
+#define CMDQ_EVENT_OCC_DONE						283
+#define CMDQ_EVENT_VENC_CMDQ_FRAME_DONE					289
+#define CMDQ_EVENT_JPG_ENC_CMDQ_DONE					290
+#define CMDQ_EVENT_JPG_DEC_CMDQ_DONE					291
+#define CMDQ_EVENT_VENC_CMDQ_MB_DONE					292
+#define CMDQ_EVENT_VENC_CMDQ_128BYTE_DONE				293
+#define CMDQ_EVENT_ISP_FRAME_DONE_A					321
+#define CMDQ_EVENT_ISP_FRAME_DONE_B					322
+#define CMDQ_EVENT_CAMSV0_PASS1_DONE					323
+#define CMDQ_EVENT_CAMSV1_PASS1_DONE					324
+#define CMDQ_EVENT_CAMSV2_PASS1_DONE					325
+#define CMDQ_EVENT_TSF_DONE						326
+#define CMDQ_EVENT_SENINF_CAM0_FIFO_FULL				327
+#define CMDQ_EVENT_SENINF_CAM1_FIFO_FULL				328
+#define CMDQ_EVENT_SENINF_CAM2_FIFO_FULL				329
+#define CMDQ_EVENT_SENINF_CAM3_FIFO_FULL				330
+#define CMDQ_EVENT_SENINF_CAM4_FIFO_FULL				331
+#define CMDQ_EVENT_SENINF_CAM5_FIFO_FULL				332
+#define CMDQ_EVENT_SENINF_CAM6_FIFO_FULL				333
+#define CMDQ_EVENT_SENINF_CAM7_FIFO_FULL				334
+#define CMDQ_EVENT_IPU_CORE0_DONE0					353
+#define CMDQ_EVENT_IPU_CORE0_DONE1					354
+#define CMDQ_EVENT_IPU_CORE0_DONE2					355
+#define CMDQ_EVENT_IPU_CORE0_DONE3					356
+#define CMDQ_EVENT_IPU_CORE1_DONE0					385
+#define CMDQ_EVENT_IPU_CORE1_DONE1					386
+#define CMDQ_EVENT_IPU_CORE1_DONE2					387
+#define CMDQ_EVENT_IPU_CORE1_DONE3					388
+
+#endif
-- 
cgit v1.2.3


From 6058f11870b8e6d4f5cc7b591097c00bf69a000d Mon Sep 17 00:00:00 2001
From: Bibby Hsieh <bibby.hsieh@mediatek.com>
Date: Thu, 29 Aug 2019 09:48:12 +0800
Subject: mailbox: mediatek: cmdq: clear the event in cmdq initial flow

GCE hardware stored event information in own internal sysram,
if the initial value in those sysram is not zero value
it will cause a situation that gce can wait the event immediately
after client ask gce to wait event but not really trigger the
corresponding hardware.

In order to make sure that the wait event function is
exactly correct, we need to clear the sysram value in
cmdq initial flow.

Fixes: 623a6143a845 ("mailbox: mediatek: Add Mediatek CMDQ driver")

Signed-off-by: Bibby Hsieh <bibby.hsieh@mediatek.com>
Reviewed-by: CK Hu <ck.hu@mediatek.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mtk-cmdq-mailbox.c       | 5 +++++
 include/linux/mailbox/mtk-cmdq-mailbox.h | 3 +++
 include/linux/soc/mediatek/mtk-cmdq.h    | 3 ---
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mailbox/mtk-cmdq-mailbox.c b/drivers/mailbox/mtk-cmdq-mailbox.c
index 69daaadc3a5f..9a6ce9f5a7db 100644
--- a/drivers/mailbox/mtk-cmdq-mailbox.c
+++ b/drivers/mailbox/mtk-cmdq-mailbox.c
@@ -21,6 +21,7 @@
 #define CMDQ_NUM_CMD(t)			(t->cmd_buf_size / CMDQ_INST_SIZE)
 
 #define CMDQ_CURR_IRQ_STATUS		0x10
+#define CMDQ_SYNC_TOKEN_UPDATE		0x68
 #define CMDQ_THR_SLOT_CYCLES		0x30
 #define CMDQ_THR_BASE			0x100
 #define CMDQ_THR_SIZE			0x80
@@ -104,8 +105,12 @@ static void cmdq_thread_resume(struct cmdq_thread *thread)
 
 static void cmdq_init(struct cmdq *cmdq)
 {
+	int i;
+
 	WARN_ON(clk_enable(cmdq->clock) < 0);
 	writel(CMDQ_THR_ACTIVE_SLOT_CYCLES, cmdq->base + CMDQ_THR_SLOT_CYCLES);
+	for (i = 0; i <= CMDQ_MAX_EVENT; i++)
+		writel(i, cmdq->base + CMDQ_SYNC_TOKEN_UPDATE);
 	clk_disable(cmdq->clock);
 }
 
diff --git a/include/linux/mailbox/mtk-cmdq-mailbox.h b/include/linux/mailbox/mtk-cmdq-mailbox.h
index ccb73422c2fa..e6f54ef6698b 100644
--- a/include/linux/mailbox/mtk-cmdq-mailbox.h
+++ b/include/linux/mailbox/mtk-cmdq-mailbox.h
@@ -20,6 +20,9 @@
 #define CMDQ_WFE_WAIT			BIT(15)
 #define CMDQ_WFE_WAIT_VALUE		0x1
 
+/** cmdq event maximum */
+#define CMDQ_MAX_EVENT			0x3ff
+
 /*
  * CMDQ_CODE_MASK:
  *   set write mask
diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h
index 54ade13a9b15..4e8899972db4 100644
--- a/include/linux/soc/mediatek/mtk-cmdq.h
+++ b/include/linux/soc/mediatek/mtk-cmdq.h
@@ -13,9 +13,6 @@
 
 #define CMDQ_NO_TIMEOUT		0xffffffffu
 
-/** cmdq event maximum */
-#define CMDQ_MAX_EVENT				0x3ff
-
 struct cmdq_pkt;
 
 struct cmdq_client {
-- 
cgit v1.2.3


From 80766f87263c527b6c66a297e8d9c1d961d37eba Mon Sep 17 00:00:00 2001
From: Stefan Wahren <wahrenst@gmx.net>
Date: Sun, 18 Aug 2019 18:23:41 +0200
Subject: dt-bindings: bcm2835-cprman: Add bcm2711 support

The new BCM2711 supports an additional clock for the emmc2 block.
So we need an additional compatible.

Signed-off-by: Stefan Wahren <wahrenst@gmx.net>
Acked-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
---
 Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt | 4 +++-
 include/dt-bindings/clock/bcm2835.h                             | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt b/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
index dd906db34b32..9e0b03a6519b 100644
--- a/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm2835-cprman.txt
@@ -12,7 +12,9 @@ clock generators, but a few (like the ARM or HDMI) will source from
 the PLL dividers directly.
 
 Required properties:
-- compatible:	Should be "brcm,bcm2835-cprman"
+- compatible:	should be one of the following,
+	"brcm,bcm2711-cprman"
+	"brcm,bcm2835-cprman"
 - #clock-cells:	Should be <1>. The permitted clock-specifier values can be
 		  found in include/dt-bindings/clock/bcm2835.h
 - reg:		Specifies base physical address and size of the registers
diff --git a/include/dt-bindings/clock/bcm2835.h b/include/dt-bindings/clock/bcm2835.h
index 2cec01f96897..b60c03430cf1 100644
--- a/include/dt-bindings/clock/bcm2835.h
+++ b/include/dt-bindings/clock/bcm2835.h
@@ -58,3 +58,5 @@
 #define BCM2835_CLOCK_DSI1E		48
 #define BCM2835_CLOCK_DSI0P		49
 #define BCM2835_CLOCK_DSI1P		50
+
+#define BCM2711_CLOCK_EMMC2		51
-- 
cgit v1.2.3


From f9e55ac22ce9246c085e1c97ddda93608ce17eaf Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Wed, 28 Aug 2019 16:22:13 +0800
Subject: clk: mediatek: add pericfg clocks for MT8183

Add pericfg clocks for MT8183, it's used when support USB
remote wakeup

Cc: Weiyi Lu <weiyi.lu@mediatek.com>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lkml.kernel.org/r/1566980533-28282-2-git-send-email-chunfeng.yun@mediatek.com
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/mediatek/clk-mt8183.c      | 30 ++++++++++++++++++++++++++++++
 include/dt-bindings/clock/mt8183-clk.h |  4 ++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c
index 94bbadc0d259..7e7452e56694 100644
--- a/drivers/clk/mediatek/clk-mt8183.c
+++ b/drivers/clk/mediatek/clk-mt8183.c
@@ -1002,6 +1002,20 @@ static const struct mtk_gate infra_clks[] = {
 		"msdc50_0_sel", 24),
 };
 
+static const struct mtk_gate_regs peri_cg_regs = {
+	.set_ofs = 0x20c,
+	.clr_ofs = 0x20c,
+	.sta_ofs = 0x20c,
+};
+
+#define GATE_PERI(_id, _name, _parent, _shift)			\
+	GATE_MTK(_id, _name, _parent, &peri_cg_regs, _shift,	\
+		&mtk_clk_gate_ops_no_setclr_inv)
+
+static const struct mtk_gate peri_clks[] = {
+	GATE_PERI(CLK_PERI_AXI, "peri_axi", "axi_sel", 31),
+};
+
 static const struct mtk_gate_regs apmixed_cg_regs = {
 	.set_ofs = 0x20,
 	.clr_ofs = 0x20,
@@ -1208,6 +1222,19 @@ static int clk_mt8183_infra_probe(struct platform_device *pdev)
 	return r;
 }
 
+static int clk_mt8183_peri_probe(struct platform_device *pdev)
+{
+	struct clk_onecell_data *clk_data;
+	struct device_node *node = pdev->dev.of_node;
+
+	clk_data = mtk_alloc_clk_data(CLK_PERI_NR_CLK);
+
+	mtk_clk_register_gates(node, peri_clks, ARRAY_SIZE(peri_clks),
+			       clk_data);
+
+	return of_clk_add_provider(node, of_clk_src_onecell_get, clk_data);
+}
+
 static int clk_mt8183_mcu_probe(struct platform_device *pdev)
 {
 	struct clk_onecell_data *clk_data;
@@ -1237,6 +1264,9 @@ static const struct of_device_id of_match_clk_mt8183[] = {
 	}, {
 		.compatible = "mediatek,mt8183-infracfg",
 		.data = clk_mt8183_infra_probe,
+	}, {
+		.compatible = "mediatek,mt8183-pericfg",
+		.data = clk_mt8183_peri_probe,
 	}, {
 		.compatible = "mediatek,mt8183-mcucfg",
 		.data = clk_mt8183_mcu_probe,
diff --git a/include/dt-bindings/clock/mt8183-clk.h b/include/dt-bindings/clock/mt8183-clk.h
index 0046506eb24c..a7b470b0ec8a 100644
--- a/include/dt-bindings/clock/mt8183-clk.h
+++ b/include/dt-bindings/clock/mt8183-clk.h
@@ -284,6 +284,10 @@
 #define CLK_INFRA_FBIST2FPC		100
 #define CLK_INFRA_NR_CLK		101
 
+/* PERICFG */
+#define CLK_PERI_AXI			0
+#define CLK_PERI_NR_CLK			1
+
 /* MFGCFG */
 #define CLK_MFG_BG3D			0
 #define CLK_MFG_NR_CLK			1
-- 
cgit v1.2.3


From d7aef6ef96e9ffa93e6248e21d864ce74e3b8b7d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 20 Aug 2019 12:05:36 +0900
Subject: clk: add include guard to clk-conf.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Link: https://lkml.kernel.org/r/20190820030536.1181-1-yamada.masahiro@socionext.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk/clk-conf.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/clk/clk-conf.h b/include/linux/clk/clk-conf.h
index 85f8cf9d1226..eae9652c70cd 100644
--- a/include/linux/clk/clk-conf.h
+++ b/include/linux/clk/clk-conf.h
@@ -4,6 +4,9 @@
  * Sylwester Nawrocki <s.nawrocki@samsung.com>
  */
 
+#ifndef __CLK_CONF_H
+#define __CLK_CONF_H
+
 #include <linux/types.h>
 
 struct device_node;
@@ -17,3 +20,5 @@ static inline int of_clk_set_defaults(struct device_node *node,
 	return 0;
 }
 #endif
+
+#endif /* __CLK_CONF_H */
-- 
cgit v1.2.3


From cc204d01262a69218b2d0db5cdea371de85871d9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Tue, 10 Sep 2019 13:01:35 -0400
Subject: SUNRPC: Dequeue the request from the receive queue while we're
 re-encoding

Ensure that we dequeue the request from the transport receive queue
while we're re-encoding to prevent issues like use-after-free when
we release the bvec.

Fixes: 7536908982047 ("SUNRPC: Ensure the bvecs are reset when we re-encode...")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: stable@vger.kernel.org # v4.20+
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/clnt.c           |  6 ++---
 net/sunrpc/xprt.c           | 54 ++++++++++++++++++++++++++-------------------
 3 files changed, 35 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 13e108bcc9eb..d783e15ba898 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -352,6 +352,7 @@ bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
 void			xprt_request_wait_receive(struct rpc_task *task);
+void			xprt_request_dequeue_xprt(struct rpc_task *task);
 bool			xprt_request_need_retransmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d8679b6027e9..0359466947e2 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1862,6 +1862,7 @@ rpc_xdr_encode(struct rpc_task *task)
 		     req->rq_rbuffer,
 		     req->rq_rcvsize);
 
+	req->rq_reply_bytes_recvd = 0;
 	req->rq_snd_buf.head[0].iov_len = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf,
 			req->rq_snd_buf.head[0].iov_base, req);
@@ -1881,6 +1882,8 @@ call_encode(struct rpc_task *task)
 	if (!rpc_task_need_encode(task))
 		goto out;
 	dprint_status(task);
+	/* Dequeue task from the receive queue while we're encoding */
+	xprt_request_dequeue_xprt(task);
 	/* Encode here so that rpcsec_gss can use correct sequence number. */
 	rpc_xdr_encode(task);
 	/* Did the encode result in an error condition? */
@@ -2501,9 +2504,6 @@ call_decode(struct rpc_task *task)
 		return;
 	case -EAGAIN:
 		task->tk_status = 0;
-		xdr_free_bvec(&req->rq_rcv_buf);
-		req->rq_reply_bytes_recvd = 0;
-		req->rq_rcv_buf.len = 0;
 		if (task->tk_client->cl_discrtry)
 			xprt_conditional_disconnect(req->rq_xprt,
 						    req->rq_connect_cookie);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 783748dc5e6f..02d5b2125c07 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1323,6 +1323,36 @@ xprt_request_dequeue_transmit(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
+/**
+ * xprt_request_dequeue_xprt - remove a task from the transmit+receive queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmit and receive queues, and ensure that
+ * it is not pinned by the receive work item.
+ */
+void
+xprt_request_dequeue_xprt(struct rpc_task *task)
+{
+	struct rpc_rqst	*req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+	    test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+	    xprt_is_pinned_rqst(req)) {
+		spin_lock(&xprt->queue_lock);
+		xprt_request_dequeue_transmit_locked(task);
+		xprt_request_dequeue_receive_locked(task);
+		while (xprt_is_pinned_rqst(req)) {
+			set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+			spin_unlock(&xprt->queue_lock);
+			xprt_wait_on_pinned_rqst(req);
+			spin_lock(&xprt->queue_lock);
+			clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+		}
+		spin_unlock(&xprt->queue_lock);
+	}
+}
+
 /**
  * xprt_request_prepare - prepare an encoded request for transport
  * @req: pointer to rpc_rqst
@@ -1754,28 +1784,6 @@ void xprt_retry_reserve(struct rpc_task *task)
 	xprt_do_reserve(xprt, task);
 }
 
-static void
-xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
-{
-	struct rpc_xprt *xprt = req->rq_xprt;
-
-	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
-	    test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
-	    xprt_is_pinned_rqst(req)) {
-		spin_lock(&xprt->queue_lock);
-		xprt_request_dequeue_transmit_locked(task);
-		xprt_request_dequeue_receive_locked(task);
-		while (xprt_is_pinned_rqst(req)) {
-			set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-			spin_unlock(&xprt->queue_lock);
-			xprt_wait_on_pinned_rqst(req);
-			spin_lock(&xprt->queue_lock);
-			clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
-		}
-		spin_unlock(&xprt->queue_lock);
-	}
-}
-
 /**
  * xprt_release - release an RPC request slot
  * @task: task which is finished with the slot
@@ -1795,7 +1803,7 @@ void xprt_release(struct rpc_task *task)
 	}
 
 	xprt = req->rq_xprt;
-	xprt_request_dequeue_all(task, req);
+	xprt_request_dequeue_xprt(task);
 	spin_lock(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
-- 
cgit v1.2.3


From 54d4e6ab91eb24b47a58403d8561206e916f0242 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 16 Sep 2019 18:44:29 +0300
Subject: block: centralize PI remapping logic to the block layer

Currently t10_pi_prepare/t10_pi_complete functions are called during the
NVMe and SCSi layers command preparetion/completion, but their actual
place should be the block layer since T10-PI is a general data integrity
feature that is used by block storage protocols. Introduce .prepare_fn
and .complete_fn callbacks within the integrity profile that each type
can implement according to its needs.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Suggested-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>

Fixed to not call queue integrity functions if BLK_DEV_INTEGRITY
isn't defined in the config.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          |   7 +++
 block/blk-integrity.c     |  11 ++++
 block/blk-mq.c            |   6 ++
 block/t10-pi.c            | 144 ++++++++++++++++++++++++----------------------
 drivers/md/dm-integrity.c |  10 ++++
 drivers/nvme/host/core.c  |   9 ---
 drivers/scsi/sd.c         |   8 ---
 include/linux/blkdev.h    |   4 ++
 include/linux/t10-pi.h    |  14 -----
 9 files changed, 114 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 875e8d105067..d5e668ec751b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include <linux/ratelimit.h>
 #include <linux/pm_runtime.h>
 #include <linux/blk-cgroup.h>
+#include <linux/t10-pi.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
 #include <linux/psi.h>
@@ -1436,6 +1437,12 @@ bool blk_update_request(struct request *req, blk_status_t error,
 	if (!req->bio)
 		return false;
 
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
+	    error == BLK_STS_OK)
+		req->q->integrity.profile->complete_fn(req, nr_bytes);
+#endif
+
 	if (unlikely(error && !blk_rq_is_passthrough(req) &&
 		     !(req->rq_flags & RQF_QUIET)))
 		print_req_error(req, error, __func__);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index ca39b4624cf8..ff1070edbb40 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -368,10 +368,21 @@ static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter)
 	return BLK_STS_OK;
 }
 
+static void blk_integrity_nop_prepare(struct request *rq)
+{
+}
+
+static void blk_integrity_nop_complete(struct request *rq,
+		unsigned int nr_bytes)
+{
+}
+
 static const struct blk_integrity_profile nop_profile = {
 	.name = "nop",
 	.generate_fn = blk_integrity_nop_fn,
 	.verify_fn = blk_integrity_nop_fn,
+	.prepare_fn = blk_integrity_nop_prepare,
+	.complete_fn = blk_integrity_nop_complete,
 };
 
 /**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 20a49be536b5..29275f5a996f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -30,6 +30,7 @@
 #include <trace/events/block.h>
 
 #include <linux/blk-mq.h>
+#include <linux/t10-pi.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -700,6 +701,11 @@ void blk_mq_start_request(struct request *rq)
 		 */
 		rq->nr_phys_segments++;
 	}
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
+		q->integrity.profile->prepare_fn(rq);
+#endif
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 7fed58705d1a..0c0120a672f9 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -120,76 +120,22 @@ static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
 	return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION);
 }
 
-static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
-{
-	return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
-{
-	return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
-{
-	return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
-{
-	return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
-}
-
-const struct blk_integrity_profile t10_pi_type1_crc = {
-	.name			= "T10-DIF-TYPE1-CRC",
-	.generate_fn		= t10_pi_type1_generate_crc,
-	.verify_fn		= t10_pi_type1_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type1_crc);
-
-const struct blk_integrity_profile t10_pi_type1_ip = {
-	.name			= "T10-DIF-TYPE1-IP",
-	.generate_fn		= t10_pi_type1_generate_ip,
-	.verify_fn		= t10_pi_type1_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type1_ip);
-
-const struct blk_integrity_profile t10_pi_type3_crc = {
-	.name			= "T10-DIF-TYPE3-CRC",
-	.generate_fn		= t10_pi_type3_generate_crc,
-	.verify_fn		= t10_pi_type3_verify_crc,
-};
-EXPORT_SYMBOL(t10_pi_type3_crc);
-
-const struct blk_integrity_profile t10_pi_type3_ip = {
-	.name			= "T10-DIF-TYPE3-IP",
-	.generate_fn		= t10_pi_type3_generate_ip,
-	.verify_fn		= t10_pi_type3_verify_ip,
-};
-EXPORT_SYMBOL(t10_pi_type3_ip);
-
 /**
- * t10_pi_prepare - prepare PI prior submitting request to device
+ * t10_pi_type1_prepare - prepare PI prior submitting request to device
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
  * partitioning, MD/DM cloning, etc. the actual physical start sector is
  * likely to be different. Remap protection information to match the
  * physical LBA.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_prepare(struct request *rq, u8 protection_type)
+static void t10_pi_type1_prepare(struct request *rq)
 {
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
 	__rq_for_each_bio(bio, rq) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
 		u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -222,13 +168,11 @@ void t10_pi_prepare(struct request *rq, u8 protection_type)
 		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 	}
 }
-EXPORT_SYMBOL(t10_pi_prepare);
 
 /**
- * t10_pi_complete - prepare PI prior returning request to the block layer
+ * t10_pi_type1_complete - prepare PI prior returning request to the blk layer
  * @rq:              request with PI that should be prepared
- * @protection_type: PI type (Type 1/Type 2/Type 3)
- * @intervals:       total elements to prepare
+ * @nr_bytes:        total bytes to prepare
  *
  * For Type 1/Type 2, the virtual start sector is the one that was
  * originally submitted by the block layer for the ref_tag usage. Due to
@@ -236,19 +180,14 @@ EXPORT_SYMBOL(t10_pi_prepare);
  * likely to be different. Since the physical start sector was submitted
  * to the device, we should remap it back to virtual values expected by the
  * block layer.
- *
- * Type 3 does not have a reference tag so no remapping is required.
  */
-void t10_pi_complete(struct request *rq, u8 protection_type,
-		     unsigned int intervals)
+static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
 {
+	unsigned intervals = nr_bytes >> rq->q->integrity.interval_exp;
 	const int tuple_sz = rq->q->integrity.tuple_size;
 	u32 ref_tag = t10_pi_ref_tag(rq);
 	struct bio *bio;
 
-	if (protection_type == T10_PI_TYPE3_PROTECTION)
-		return;
-
 	__rq_for_each_bio(bio, rq) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
 		u32 virt = bip_get_seed(bip) & 0xffffffff;
@@ -276,4 +215,73 @@ void t10_pi_complete(struct request *rq, u8 protection_type,
 		}
 	}
 }
-EXPORT_SYMBOL(t10_pi_complete);
+
+static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_prepare(struct request *rq)
+{
+}
+
+/**
+ * Type 3 does not have a reference tag so no remapping is required.
+ */
+static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
+const struct blk_integrity_profile t10_pi_type1_crc = {
+	.name			= "T10-DIF-TYPE1-CRC",
+	.generate_fn		= t10_pi_type1_generate_crc,
+	.verify_fn		= t10_pi_type1_verify_crc,
+	.prepare_fn		= t10_pi_type1_prepare,
+	.complete_fn		= t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+const struct blk_integrity_profile t10_pi_type1_ip = {
+	.name			= "T10-DIF-TYPE1-IP",
+	.generate_fn		= t10_pi_type1_generate_ip,
+	.verify_fn		= t10_pi_type1_verify_ip,
+	.prepare_fn		= t10_pi_type1_prepare,
+	.complete_fn		= t10_pi_type1_complete,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+const struct blk_integrity_profile t10_pi_type3_crc = {
+	.name			= "T10-DIF-TYPE3-CRC",
+	.generate_fn		= t10_pi_type3_generate_crc,
+	.verify_fn		= t10_pi_type3_verify_crc,
+	.prepare_fn		= t10_pi_type3_prepare,
+	.complete_fn		= t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+const struct blk_integrity_profile t10_pi_type3_ip = {
+	.name			= "T10-DIF-TYPE3-IP",
+	.generate_fn		= t10_pi_type3_generate_ip,
+	.verify_fn		= t10_pi_type3_verify_ip,
+	.prepare_fn		= t10_pi_type3_prepare,
+	.complete_fn		= t10_pi_type3_complete,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 9118ab85cb3a..dab4446fe7d8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -345,6 +345,14 @@ static void __DEBUG_bytes(__u8 *bytes, size_t len, const char *msg, ...)
 #define DEBUG_bytes(bytes, len, msg, ...)	do { } while (0)
 #endif
 
+static void dm_integrity_prepare(struct request *rq)
+{
+}
+
+static void dm_integrity_complete(struct request *rq, unsigned int nr_bytes)
+{
+}
+
 /*
  * DM Integrity profile, protection is performed layer above (dm-crypt)
  */
@@ -352,6 +360,8 @@ static const struct blk_integrity_profile dm_integrity_profile = {
 	.name			= "DM-DIF-EXT-TAG",
 	.generate_fn		= NULL,
 	.verify_fn		= NULL,
+	.prepare_fn		= dm_integrity_prepare,
+	.complete_fn		= dm_integrity_complete,
 };
 
 static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1ede1763a5ee..108f60b46804 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -666,8 +666,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
 				return BLK_STS_NOTSUPP;
 			control |= NVME_RW_PRINFO_PRACT;
-		} else if (req_op(req) == REQ_OP_WRITE) {
-			t10_pi_prepare(req, ns->pi_type);
 		}
 
 		switch (ns->pi_type) {
@@ -690,13 +688,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
 
 void nvme_cleanup_cmd(struct request *req)
 {
-	if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ &&
-	    nvme_req(req)->status == 0) {
-		struct nvme_ns *ns = req->rq_disk->private_data;
-
-		t10_pi_complete(req, ns->pi_type,
-				blk_rq_bytes(req) >> ns->lba_shift);
-	}
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
 		struct nvme_ns *ns = req->rq_disk->private_data;
 		struct page *page = req->special_vec.bv_page;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 4b925552458f..0e8941caaa0d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1211,9 +1211,6 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
 	dix = scsi_prot_sg_count(cmd);
 	dif = scsi_host_dif_capable(cmd->device->host, sdkp->protection_type);
 
-	if (write && dix)
-		t10_pi_prepare(cmd->request, sdkp->protection_type);
-
 	if (dif || dix)
 		protect = sd_setup_protect_cmnd(cmd, dix, dif);
 	else
@@ -2054,11 +2051,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 					   "sd_done: completed %d of %d bytes\n",
 					   good_bytes, scsi_bufflen(SCpnt)));
 
-	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt) &&
-	    good_bytes)
-		t10_pi_complete(SCpnt->request, sdkp->protection_type,
-				good_bytes / scsi_prot_interval(SCpnt));
-
 	return good_bytes;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3094f2d513b2..6032bb740cf4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1522,10 +1522,14 @@ struct blk_integrity_iter {
 };
 
 typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
 
 struct blk_integrity_profile {
 	integrity_processing_fn		*generate_fn;
 	integrity_processing_fn		*verify_fn;
+	integrity_prepare_fn		*prepare_fn;
+	integrity_complete_fn		*complete_fn;
 	const char			*name;
 };
 
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 3e2a80cc7b56..96305a64a5a7 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -53,18 +53,4 @@ extern const struct blk_integrity_profile t10_pi_type1_ip;
 extern const struct blk_integrity_profile t10_pi_type3_crc;
 extern const struct blk_integrity_profile t10_pi_type3_ip;
 
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-extern void t10_pi_prepare(struct request *rq, u8 protection_type);
-extern void t10_pi_complete(struct request *rq, u8 protection_type,
-			    unsigned int intervals);
-#else
-static inline void t10_pi_complete(struct request *rq, u8 protection_type,
-				   unsigned int intervals)
-{
-}
-static inline void t10_pi_prepare(struct request *rq, u8 protection_type)
-{
-}
-#endif
-
 #endif
-- 
cgit v1.2.3


From 42fd8baab31f53bed2952485fcf0e92f244c5e55 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 17 Sep 2019 10:34:54 -0400
Subject: sched/core: Convert vcpu_is_preempted() from macro to an inline
 function

Clang reports this warning:

  kernel/locking/osq_lock.c:25:19: warning: unused function 'node_cpu' [-Wunused-function]

due to osq_lock() calling vcpu_is_preempted(node_cpu(node->prev))), but
vcpu_is_preempted() is compiled away. Fix it by converting the dummy
vcpu_is_preempted() from a macro to a proper static inline function.

Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: bsegall@google.com
Cc: dietmar.eggemann@arm.com
Cc: juri.lelli@redhat.com
Cc: rostedt@goodmis.org
Cc: vincent.guittot@linaro.org
Link: https://lkml.kernel.org/r/1568730894-10483-1-git-send-email-cai@lca.pw
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f0edee94834a..e2e91960d79f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1856,7 +1856,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
  * running or not.
  */
 #ifndef vcpu_is_preempted
-# define vcpu_is_preempted(cpu)	false
+static inline bool vcpu_is_preempted(int cpu)
+{
+	return false;
+}
 #endif
 
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
-- 
cgit v1.2.3


From 337c22ab1d4f316f37e7a7c78bdea3d768270542 Mon Sep 17 00:00:00 2001
From: Joshua Clayton <stillcompiling@gmail.com>
Date: Mon, 12 Aug 2019 09:20:20 -0600
Subject: HID: core: reformat and reduce hid_printk macros

Reformat hid_printk macros to use standard __VA_ARGS__ syntax.
Per Joe Perches hid_printk(), hid_emerg(), hid_crit(), and hid_alert() are
unlikely ever to be used. Remove them.

Signed-off-by: Joshua Clayton <stillcompiling@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 include/linux/hid.h | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index d770ab1a0479..e6c7efdb0458 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1154,29 +1154,21 @@ int hid_pidff_init(struct hid_device *hid);
 #define hid_pidff_init NULL
 #endif
 
-#define dbg_hid(format, arg...)						\
+#define dbg_hid(fmt, ...)						\
 do {									\
 	if (hid_debug)							\
-		printk(KERN_DEBUG "%s: " format, __FILE__, ##arg);	\
+		printk(KERN_DEBUG "%s: " fmt, __FILE__, ##__VA_ARGS__);	\
 } while (0)
 
-#define hid_printk(level, hid, fmt, arg...)		\
-	dev_printk(level, &(hid)->dev, fmt, ##arg)
-#define hid_emerg(hid, fmt, arg...)			\
-	dev_emerg(&(hid)->dev, fmt, ##arg)
-#define hid_crit(hid, fmt, arg...)			\
-	dev_crit(&(hid)->dev, fmt, ##arg)
-#define hid_alert(hid, fmt, arg...)			\
-	dev_alert(&(hid)->dev, fmt, ##arg)
-#define hid_err(hid, fmt, arg...)			\
-	dev_err(&(hid)->dev, fmt, ##arg)
-#define hid_notice(hid, fmt, arg...)			\
-	dev_notice(&(hid)->dev, fmt, ##arg)
-#define hid_warn(hid, fmt, arg...)			\
-	dev_warn(&(hid)->dev, fmt, ##arg)
-#define hid_info(hid, fmt, arg...)			\
-	dev_info(&(hid)->dev, fmt, ##arg)
-#define hid_dbg(hid, fmt, arg...)			\
-	dev_dbg(&(hid)->dev, fmt, ##arg)
+#define hid_err(hid, fmt, ...)				\
+	dev_err(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_notice(hid, fmt, ...)			\
+	dev_notice(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_warn(hid, fmt, ...)				\
+	dev_warn(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_info(hid, fmt, ...)				\
+	dev_info(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_dbg(hid, fmt, ...)				\
+	dev_dbg(&(hid)->dev, fmt, ##__VA_ARGS__)
 
 #endif
-- 
cgit v1.2.3


From aaeabb121a6271775cef35ec3197e9ef124fdb2a Mon Sep 17 00:00:00 2001
From: Joshua Clayton <stillcompiling@gmail.com>
Date: Mon, 12 Aug 2019 09:20:21 -0600
Subject: HID: core: Add printk_once variants to hid_warn() etc

hid_warn_once() is needed. Add the others as part of the block.

Signed-off-by: Joshua Clayton <stillcompiling@gmail.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
---
 include/linux/hid.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index e6c7efdb0458..cd41f209043f 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1171,4 +1171,15 @@ do {									\
 #define hid_dbg(hid, fmt, ...)				\
 	dev_dbg(&(hid)->dev, fmt, ##__VA_ARGS__)
 
+#define hid_err_once(hid, fmt, ...)			\
+	dev_err_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_notice_once(hid, fmt, ...)			\
+	dev_notice_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_warn_once(hid, fmt, ...)			\
+	dev_warn_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_info_once(hid, fmt, ...)			\
+	dev_info_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+#define hid_dbg_once(hid, fmt, ...)			\
+	dev_dbg_once(&(hid)->dev, fmt, ##__VA_ARGS__)
+
 #endif
-- 
cgit v1.2.3


From 5262f567987d3c30052b22e78c35c2313d07b230 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 17 Sep 2019 12:26:57 -0600
Subject: io_uring: IORING_OP_TIMEOUT support

There's been a few requests for functionality similar to io_getevents()
and epoll_wait(), where the user can specify a timeout for waiting on
events. I deliberately did not add support for this through the system
call initially to avoid overloading the args, but I can see that the use
cases for this are valid.

This adds support for IORING_OP_TIMEOUT. If a user wants to get woken
when waiting for events, simply submit one of these timeout commands
with your wait call (or before). This ensures that the application
sleeping on the CQ ring waiting for events will get woken. The timeout
command is passed in as a pointer to a struct timespec. Timeouts are
relative. The timeout command also includes a way to auto-cancel after
N events has passed.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c                 | 149 ++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/io_uring.h |   2 +
 2 files changed, 146 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 05a299e80159..9d8e703bc851 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -200,6 +200,7 @@ struct io_ring_ctx {
 		struct io_uring_sqe	*sq_sqes;
 
 		struct list_head	defer_list;
+		struct list_head	timeout_list;
 	} ____cacheline_aligned_in_smp;
 
 	/* IO offload */
@@ -216,6 +217,7 @@ struct io_ring_ctx {
 		struct wait_queue_head	cq_wait;
 		struct fasync_struct	*cq_fasync;
 		struct eventfd_ctx	*cq_ev_fd;
+		atomic_t		cq_timeouts;
 	} ____cacheline_aligned_in_smp;
 
 	struct io_rings	*rings;
@@ -283,6 +285,11 @@ struct io_poll_iocb {
 	struct wait_queue_entry		wait;
 };
 
+struct io_timeout {
+	struct file			*file;
+	struct hrtimer			timer;
+};
+
 /*
  * NOTE! Each of the iocb union members has the file pointer
  * as the first entry in their struct definition. So you can
@@ -294,6 +301,7 @@ struct io_kiocb {
 		struct file		*file;
 		struct kiocb		rw;
 		struct io_poll_iocb	poll;
+		struct io_timeout	timeout;
 	};
 
 	struct sqe_submit	submit;
@@ -313,6 +321,7 @@ struct io_kiocb {
 #define REQ_F_LINK_DONE		128	/* linked sqes done */
 #define REQ_F_FAIL_LINK		256	/* fail rest of links */
 #define REQ_F_SHADOW_DRAIN	512	/* link-drain shadow req */
+#define REQ_F_TIMEOUT		1024	/* timeout request */
 	u64			user_data;
 	u32			result;
 	u32			sequence;
@@ -344,6 +353,8 @@ struct io_submit_state {
 };
 
 static void io_sq_wq_submit_work(struct work_struct *work);
+static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
+				 long res);
 static void __io_free_req(struct io_kiocb *req);
 
 static struct kmem_cache *req_cachep;
@@ -400,26 +411,30 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	INIT_LIST_HEAD(&ctx->poll_list);
 	INIT_LIST_HEAD(&ctx->cancel_list);
 	INIT_LIST_HEAD(&ctx->defer_list);
+	INIT_LIST_HEAD(&ctx->timeout_list);
 	return ctx;
 }
 
 static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
 				     struct io_kiocb *req)
 {
-	if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
+	/* timeout requests always honor sequence */
+	if (!(req->flags & REQ_F_TIMEOUT) &&
+	    (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
 		return false;
 
 	return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
 }
 
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
+					      struct list_head *list)
 {
 	struct io_kiocb *req;
 
-	if (list_empty(&ctx->defer_list))
+	if (list_empty(list))
 		return NULL;
 
-	req = list_first_entry(&ctx->defer_list, struct io_kiocb, list);
+	req = list_first_entry(list, struct io_kiocb, list);
 	if (!io_sequence_defer(ctx, req)) {
 		list_del_init(&req->list);
 		return req;
@@ -428,6 +443,16 @@ static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
+static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+{
+	return __io_get_deferred_req(ctx, &ctx->defer_list);
+}
+
+static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
+{
+	return __io_get_deferred_req(ctx, &ctx->timeout_list);
+}
+
 static void __io_commit_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_rings *rings = ctx->rings;
@@ -460,10 +485,36 @@ static inline void io_queue_async_work(struct io_ring_ctx *ctx,
 	queue_work(ctx->sqo_wq[rw], &req->work);
 }
 
+static void io_kill_timeout(struct io_kiocb *req)
+{
+	int ret;
+
+	ret = hrtimer_try_to_cancel(&req->timeout.timer);
+	if (ret != -1) {
+		atomic_inc(&req->ctx->cq_timeouts);
+		list_del(&req->list);
+		io_cqring_fill_event(req->ctx, req->user_data, 0);
+		__io_free_req(req);
+	}
+}
+
+static void io_kill_timeouts(struct io_ring_ctx *ctx)
+{
+	struct io_kiocb *req, *tmp;
+
+	spin_lock_irq(&ctx->completion_lock);
+	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, list)
+		io_kill_timeout(req);
+	spin_unlock_irq(&ctx->completion_lock);
+}
+
 static void io_commit_cqring(struct io_ring_ctx *ctx)
 {
 	struct io_kiocb *req;
 
+	while ((req = io_get_timeout_req(ctx)) != NULL)
+		io_kill_timeout(req);
+
 	__io_commit_cqring(ctx);
 
 	while ((req = io_get_deferred_req(ctx)) != NULL) {
@@ -1765,6 +1816,81 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	return ipt.error;
 }
 
+static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
+{
+	struct io_ring_ctx *ctx;
+	struct io_kiocb *req;
+	unsigned long flags;
+
+	req = container_of(timer, struct io_kiocb, timeout.timer);
+	ctx = req->ctx;
+	atomic_inc(&ctx->cq_timeouts);
+
+	spin_lock_irqsave(&ctx->completion_lock, flags);
+	list_del(&req->list);
+
+	io_cqring_fill_event(ctx, req->user_data, -ETIME);
+	io_commit_cqring(ctx);
+	spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+	io_cqring_ev_posted(ctx);
+
+	io_put_req(req);
+	return HRTIMER_NORESTART;
+}
+
+static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+	unsigned count, req_dist, tail_index;
+	struct io_ring_ctx *ctx = req->ctx;
+	struct list_head *entry;
+	struct timespec ts;
+
+	if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+		return -EINVAL;
+	if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
+	    sqe->len != 1)
+		return -EINVAL;
+	if (copy_from_user(&ts, (void __user *) (unsigned long) sqe->addr,
+	    sizeof(ts)))
+		return -EFAULT;
+
+	/*
+	 * sqe->off holds how many events that need to occur for this
+	 * timeout event to be satisfied.
+	 */
+	count = READ_ONCE(sqe->off);
+	if (!count)
+		count = 1;
+
+	req->sequence = ctx->cached_sq_head + count - 1;
+	req->flags |= REQ_F_TIMEOUT;
+
+	/*
+	 * Insertion sort, ensuring the first entry in the list is always
+	 * the one we need first.
+	 */
+	tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
+	req_dist = req->sequence - tail_index;
+	spin_lock_irq(&ctx->completion_lock);
+	list_for_each_prev(entry, &ctx->timeout_list) {
+		struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
+		unsigned dist;
+
+		dist = nxt->sequence - tail_index;
+		if (req_dist >= dist)
+			break;
+	}
+	list_add(&req->list, entry);
+	spin_unlock_irq(&ctx->completion_lock);
+
+	hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	req->timeout.timer.function = io_timeout_fn;
+	hrtimer_start(&req->timeout.timer, timespec_to_ktime(ts),
+			HRTIMER_MODE_REL);
+	return 0;
+}
+
 static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			const struct io_uring_sqe *sqe)
 {
@@ -1842,6 +1968,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	case IORING_OP_RECVMSG:
 		ret = io_recvmsg(req, s->sqe, force_nonblock);
 		break;
+	case IORING_OP_TIMEOUT:
+		ret = io_timeout(req, s->sqe);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -2599,6 +2728,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			  const sigset_t __user *sig, size_t sigsz)
 {
 	struct io_rings *rings = ctx->rings;
+	unsigned nr_timeouts;
 	int ret;
 
 	if (io_cqring_events(rings) >= min_events)
@@ -2617,7 +2747,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
 			return ret;
 	}
 
-	ret = wait_event_interruptible(ctx->wait, io_cqring_events(rings) >= min_events);
+	nr_timeouts = atomic_read(&ctx->cq_timeouts);
+	/*
+	 * Return if we have enough events, or if a timeout occured since
+	 * we started waiting. For timeouts, we always want to return to
+	 * userspace.
+	 */
+	ret = wait_event_interruptible(ctx->wait,
+				io_cqring_events(rings) >= min_events ||
+				atomic_read(&ctx->cq_timeouts) != nr_timeouts);
 	restore_saved_sigmask_unless(ret == -ERESTARTSYS);
 	if (ret == -ERESTARTSYS)
 		ret = -EINTR;
@@ -3288,6 +3426,7 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	mutex_unlock(&ctx->uring_lock);
 
+	io_kill_timeouts(ctx);
 	io_poll_remove_all(ctx);
 	io_iopoll_reap_events(ctx);
 	wait_for_completion(&ctx->ctx_done);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 96ee9d94b73e..ea57526a5b89 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -28,6 +28,7 @@ struct io_uring_sqe {
 		__u16		poll_events;
 		__u32		sync_range_flags;
 		__u32		msg_flags;
+		__u32		timeout_flags;
 	};
 	__u64	user_data;	/* data to be passed back at completion time */
 	union {
@@ -61,6 +62,7 @@ struct io_uring_sqe {
 #define IORING_OP_SYNC_FILE_RANGE	8
 #define IORING_OP_SENDMSG	9
 #define IORING_OP_RECVMSG	10
+#define IORING_OP_TIMEOUT	11
 
 /*
  * sqe->fsync_flags
-- 
cgit v1.2.3


From 4d85f45c73a22bc0ee900c7505b7210a87a7966d Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 3 Sep 2019 21:06:42 +0200
Subject: drm/atomic: Rename crtc_state->pageflip_flags to async_flip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It's the only flag anyone actually cares about. Plus if we're unlucky,
the atomic ioctl might need a different flag for async flips. So
better to abstract this away from the uapi a bit.

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Michel Dänzer <michel@daenzer.net>
Cc: Alex Deucher <alexdeucher@gmail.com>
Cc: Adam Jackson <ajax@redhat.com>
Cc: Sean Paul <sean@poorly.run>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Cc: Maxime Ripard <maxime.ripard@bootlin.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
Cc: Leo Li <sunpeng.li@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: David Francis <David.Francis@amd.com>
Cc: Mario Kleiner <mario.kleiner.de@gmail.com>
Cc: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20190903190642.32588-3-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 5 ++---
 drivers/gpu/drm/drm_atomic_helper.c               | 2 +-
 drivers/gpu/drm/drm_atomic_state_helper.c         | 2 +-
 drivers/gpu/drm/nouveau/dispnv50/wndw.c           | 4 ++--
 include/drm/drm_crtc.h                            | 8 ++++----
 5 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 0a71ed1e7762..2f0ef0820f00 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5756,8 +5756,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		 * change FB pitch, DCC state, rotation or mirroing.
 		 */
 		bundle->flip_addrs[planes_count].flip_immediate =
-			(crtc->state->pageflip_flags &
-			 DRM_MODE_PAGE_FLIP_ASYNC) != 0 &&
+			crtc->state->async_flip &&
 			acrtc_state->update_type == UPDATE_TYPE_FAST;
 
 		timestamp_ns = ktime_get_ns();
@@ -6334,7 +6333,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 	amdgpu_dm_enable_crtc_interrupts(dev, state, true);
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, j)
-		if (new_crtc_state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		if (new_crtc_state->async_flip)
 			wait_for_vblank = false;
 
 	/* update planes when needed per crtc*/
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index aa16ea17ff9b..3a67f63c3146 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -3275,7 +3275,7 @@ static int page_flip_common(struct drm_atomic_state *state,
 		return PTR_ERR(crtc_state);
 
 	crtc_state->event = event;
-	crtc_state->pageflip_flags = flags;
+	crtc_state->async_flip = flags & DRM_MODE_PAGE_FLIP_ASYNC;
 
 	plane_state = drm_atomic_get_plane_state(state, plane);
 	if (IS_ERR(plane_state))
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 46dc264a248b..d0a937fb0c56 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -128,7 +128,7 @@ void __drm_atomic_helper_crtc_duplicate_state(struct drm_crtc *crtc,
 	state->zpos_changed = false;
 	state->commit = NULL;
 	state->event = NULL;
-	state->pageflip_flags = 0;
+	state->async_flip = false;
 
 	/* Self refresh should be canceled when a new update is available */
 	state->active = drm_atomic_crtc_effectively_active(state);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 2db029371c91..5193b6257061 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -267,7 +267,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw, bool modeset,
 			asyw->image.pitch[0] = fb->base.pitches[0];
 		}
 
-		if (!(asyh->state.pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC))
+		if (!asyh->state.async_flip)
 			asyw->image.interval = 1;
 		else
 			asyw->image.interval = 0;
@@ -383,7 +383,7 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw,
 	}
 
 	/* Can't do an immediate flip while changing the LUT. */
-	asyh->state.pageflip_flags &= ~DRM_MODE_PAGE_FLIP_ASYNC;
+	asyh->state.async_flip = false;
 }
 
 static int
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 7d14c11bdc0a..c4528eb5d168 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -285,12 +285,12 @@ struct drm_crtc_state {
 	u32 target_vblank;
 
 	/**
-	 * @pageflip_flags:
+	 * @async_flip:
 	 *
-	 * DRM_MODE_PAGE_FLIP_* flags, as passed to the page flip ioctl.
-	 * Zero in any other case.
+	 * This is set when DRM_MODE_PAGE_FLIP_ASYNC is set in the legacy
+	 * PAGE_FLIP IOCTL. It's not wired up for the atomic IOCTL itself yet.
 	 */
-	u32 pageflip_flags;
+	bool async_flip;
 
 	/**
 	 * @vrr_enabled:
-- 
cgit v1.2.3


From a62a8ef9d97da23762a588592c8b8eb50a8deb6a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Tue, 12 Jun 2018 09:41:17 +0100
Subject: virtio-fs: add virtiofs filesystem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a basic file system module for virtio-fs.  This does not yet contain
shared data support between host and guest or metadata coherency speedups.
However it is already significantly faster than virtio-9p.

Design Overview
===============

With the goal of designing something with better performance and local file
system semantics, a bunch of ideas were proposed.

 - Use fuse protocol (instead of 9p) for communication between guest and
   host.  Guest kernel will be fuse client and a fuse server will run on
   host to serve the requests.

 - For data access inside guest, mmap portion of file in QEMU address space
   and guest accesses this memory using dax.  That way guest page cache is
   bypassed and there is only one copy of data (on host).  This will also
   enable mmap(MAP_SHARED) between guests.

 - For metadata coherency, there is a shared memory region which contains
   version number associated with metadata and any guest changing metadata
   updates version number and other guests refresh metadata on next access.
   This is yet to be implemented.

How virtio-fs differs from existing approaches
==============================================

The unique idea behind virtio-fs is to take advantage of the co-location of
the virtual machine and hypervisor to avoid communication (vmexits).

DAX allows file contents to be accessed without communication with the
hypervisor.  The shared memory region for metadata avoids communication in
the common case where metadata is unchanged.

By replacing expensive communication with cheaper shared memory accesses,
we expect to achieve better performance than approaches based on network
file system protocols.  In addition, this also makes it easier to achieve
local file system semantics (coherency).

These techniques are not applicable to network file system protocols since
the communications channel is bypassed by taking advantage of shared memory
on a local machine.  This is why we decided to build virtio-fs rather than
focus on 9P or NFS.

Caching Modes
=============

Like virtio-9p, different caching modes are supported which determine the
coherency level as well.  The “cache=FOO” and “writeback” options control
the level of coherence between the guest and host filesystems.

 - cache=none
   metadata, data and pathname lookup are not cached in guest.  They are
   always fetched from host and any changes are immediately pushed to host.

 - cache=always
   metadata, data and pathname lookup are cached in guest and never expire.

 - cache=auto
   metadata and pathname lookup cache expires after a configured amount of
   time (default is 1 second).  Data is cached while the file is open
   (close to open consistency).

 - writeback/no_writeback
   These options control the writeback strategy.  If writeback is disabled,
   then normal writes will immediately be synchronized with the host fs.
   If writeback is enabled, then writes may be cached in the guest until
   the file is closed or an fsync(2) performed.  This option has no effect
   on mmap-ed writes or writes going through the DAX mechanism.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/fuse/Kconfig                 |   11 +
 fs/fuse/Makefile                |    1 +
 fs/fuse/fuse_i.h                |    9 +
 fs/fuse/inode.c                 |    4 +
 fs/fuse/virtio_fs.c             | 1195 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/virtio_fs.h  |   19 +
 include/uapi/linux/virtio_ids.h |    1 +
 7 files changed, 1240 insertions(+)
 create mode 100644 fs/fuse/virtio_fs.c
 create mode 100644 include/uapi/linux/virtio_fs.h

(limited to 'include')

diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig
index 24fc5a5c1b97..0635cba19971 100644
--- a/fs/fuse/Kconfig
+++ b/fs/fuse/Kconfig
@@ -27,3 +27,14 @@ config CUSE
 
 	  If you want to develop or use a userspace character device
 	  based on CUSE, answer Y or M.
+
+config VIRTIO_FS
+	tristate "Virtio Filesystem"
+	depends on FUSE_FS
+	select VIRTIO
+	help
+	  The Virtio Filesystem allows guests to mount file systems from the
+          host.
+
+	  If you want to share files between guests or with the host, answer Y
+          or M.
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 9485019c2a14..6419a2b3510d 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,5 +5,6 @@
 
 obj-$(CONFIG_FUSE_FS) += fuse.o
 obj-$(CONFIG_CUSE) += cuse.o
+obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o
 
 fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fc89cb40e874..956aeaf961ae 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -353,6 +353,10 @@ struct fuse_req {
 	/** Used to wake up the task waiting for completion of request*/
 	wait_queue_head_t waitq;
 
+#if IS_ENABLED(CONFIG_VIRTIO_FS)
+	/** virtio-fs's physically contiguous buffer for in and out args */
+	void *argbuf;
+#endif
 };
 
 struct fuse_iqueue;
@@ -383,6 +387,11 @@ struct fuse_iqueue_ops {
 	 */
 	void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq)
 		__releases(fiq->lock);
+
+	/**
+	 * Clean up when fuse_iqueue is destroyed
+	 */
+	void (*release)(struct fuse_iqueue *fiq);
 };
 
 /** /dev/fuse input queue operations */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 10d193b24fb8..3d598a5bb5b5 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -630,6 +630,10 @@ EXPORT_SYMBOL_GPL(fuse_conn_init);
 void fuse_conn_put(struct fuse_conn *fc)
 {
 	if (refcount_dec_and_test(&fc->count)) {
+		struct fuse_iqueue *fiq = &fc->iq;
+
+		if (fiq->ops->release)
+			fiq->ops->release(fiq);
 		put_pid_ns(fc->pid_ns);
 		put_user_ns(fc->user_ns);
 		fc->release(fc);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
new file mode 100644
index 000000000000..6af3f131e468
--- /dev/null
+++ b/fs/fuse/virtio_fs.c
@@ -0,0 +1,1195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio-fs: Virtio Filesystem
+ * Copyright (C) 2018 Red Hat, Inc.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_fs.h>
+#include <linux/delay.h>
+#include <linux/fs_context.h>
+#include <linux/highmem.h>
+#include "fuse_i.h"
+
+/* List of virtio-fs device instances and a lock for the list. Also provides
+ * mutual exclusion in device removal and mounting path
+ */
+static DEFINE_MUTEX(virtio_fs_mutex);
+static LIST_HEAD(virtio_fs_instances);
+
+enum {
+	VQ_HIPRIO,
+	VQ_REQUEST
+};
+
+/* Per-virtqueue state */
+struct virtio_fs_vq {
+	spinlock_t lock;
+	struct virtqueue *vq;     /* protected by ->lock */
+	struct work_struct done_work;
+	struct list_head queued_reqs;
+	struct delayed_work dispatch_work;
+	struct fuse_dev *fud;
+	bool connected;
+	long in_flight;
+	char name[24];
+} ____cacheline_aligned_in_smp;
+
+/* A virtio-fs device instance */
+struct virtio_fs {
+	struct kref refcount;
+	struct list_head list;    /* on virtio_fs_instances */
+	char *tag;
+	struct virtio_fs_vq *vqs;
+	unsigned int nvqs;               /* number of virtqueues */
+	unsigned int num_request_queues; /* number of request queues */
+};
+
+struct virtio_fs_forget {
+	struct fuse_in_header ih;
+	struct fuse_forget_in arg;
+	/* This request can be temporarily queued on virt queue */
+	struct list_head list;
+};
+
+static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
+{
+	struct virtio_fs *fs = vq->vdev->priv;
+
+	return &fs->vqs[vq->index];
+}
+
+static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
+{
+	return &vq_to_fsvq(vq)->fud->pq;
+}
+
+static void release_virtio_fs_obj(struct kref *ref)
+{
+	struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
+
+	kfree(vfs->vqs);
+	kfree(vfs);
+}
+
+/* Make sure virtiofs_mutex is held */
+static void virtio_fs_put(struct virtio_fs *fs)
+{
+	kref_put(&fs->refcount, release_virtio_fs_obj);
+}
+
+static void virtio_fs_fiq_release(struct fuse_iqueue *fiq)
+{
+	struct virtio_fs *vfs = fiq->priv;
+
+	mutex_lock(&virtio_fs_mutex);
+	virtio_fs_put(vfs);
+	mutex_unlock(&virtio_fs_mutex);
+}
+
+static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
+{
+	WARN_ON(fsvq->in_flight < 0);
+
+	/* Wait for in flight requests to finish.*/
+	while (1) {
+		spin_lock(&fsvq->lock);
+		if (!fsvq->in_flight) {
+			spin_unlock(&fsvq->lock);
+			break;
+		}
+		spin_unlock(&fsvq->lock);
+		/* TODO use completion instead of timeout */
+		usleep_range(1000, 2000);
+	}
+
+	flush_work(&fsvq->done_work);
+	flush_delayed_work(&fsvq->dispatch_work);
+}
+
+static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
+{
+	struct virtio_fs_forget *forget;
+
+	spin_lock(&fsvq->lock);
+	while (1) {
+		forget = list_first_entry_or_null(&fsvq->queued_reqs,
+						struct virtio_fs_forget, list);
+		if (!forget)
+			break;
+		list_del(&forget->list);
+		kfree(forget);
+	}
+	spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
+{
+	struct virtio_fs_vq *fsvq;
+	int i;
+
+	for (i = 0; i < fs->nvqs; i++) {
+		fsvq = &fs->vqs[i];
+		if (i == VQ_HIPRIO)
+			drain_hiprio_queued_reqs(fsvq);
+
+		virtio_fs_drain_queue(fsvq);
+	}
+}
+
+static void virtio_fs_start_all_queues(struct virtio_fs *fs)
+{
+	struct virtio_fs_vq *fsvq;
+	int i;
+
+	for (i = 0; i < fs->nvqs; i++) {
+		fsvq = &fs->vqs[i];
+		spin_lock(&fsvq->lock);
+		fsvq->connected = true;
+		spin_unlock(&fsvq->lock);
+	}
+}
+
+/* Add a new instance to the list or return -EEXIST if tag name exists*/
+static int virtio_fs_add_instance(struct virtio_fs *fs)
+{
+	struct virtio_fs *fs2;
+	bool duplicate = false;
+
+	mutex_lock(&virtio_fs_mutex);
+
+	list_for_each_entry(fs2, &virtio_fs_instances, list) {
+		if (strcmp(fs->tag, fs2->tag) == 0)
+			duplicate = true;
+	}
+
+	if (!duplicate)
+		list_add_tail(&fs->list, &virtio_fs_instances);
+
+	mutex_unlock(&virtio_fs_mutex);
+
+	if (duplicate)
+		return -EEXIST;
+	return 0;
+}
+
+/* Return the virtio_fs with a given tag, or NULL */
+static struct virtio_fs *virtio_fs_find_instance(const char *tag)
+{
+	struct virtio_fs *fs;
+
+	mutex_lock(&virtio_fs_mutex);
+
+	list_for_each_entry(fs, &virtio_fs_instances, list) {
+		if (strcmp(fs->tag, tag) == 0) {
+			kref_get(&fs->refcount);
+			goto found;
+		}
+	}
+
+	fs = NULL; /* not found */
+
+found:
+	mutex_unlock(&virtio_fs_mutex);
+
+	return fs;
+}
+
+static void virtio_fs_free_devs(struct virtio_fs *fs)
+{
+	unsigned int i;
+
+	for (i = 0; i < fs->nvqs; i++) {
+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+		if (!fsvq->fud)
+			continue;
+
+		fuse_dev_free(fsvq->fud);
+		fsvq->fud = NULL;
+	}
+}
+
+/* Read filesystem name from virtio config into fs->tag (must kfree()). */
+static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs)
+{
+	char tag_buf[sizeof_field(struct virtio_fs_config, tag)];
+	char *end;
+	size_t len;
+
+	virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag),
+			   &tag_buf, sizeof(tag_buf));
+	end = memchr(tag_buf, '\0', sizeof(tag_buf));
+	if (end == tag_buf)
+		return -EINVAL; /* empty tag */
+	if (!end)
+		end = &tag_buf[sizeof(tag_buf)];
+
+	len = end - tag_buf;
+	fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL);
+	if (!fs->tag)
+		return -ENOMEM;
+	memcpy(fs->tag, tag_buf, len);
+	fs->tag[len] = '\0';
+	return 0;
+}
+
+/* Work function for hiprio completion */
+static void virtio_fs_hiprio_done_work(struct work_struct *work)
+{
+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+						 done_work);
+	struct virtqueue *vq = fsvq->vq;
+
+	/* Free completed FUSE_FORGET requests */
+	spin_lock(&fsvq->lock);
+	do {
+		unsigned int len;
+		void *req;
+
+		virtqueue_disable_cb(vq);
+
+		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+			kfree(req);
+			fsvq->in_flight--;
+		}
+	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+	spin_unlock(&fsvq->lock);
+}
+
+static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+{
+}
+
+static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
+{
+	struct virtio_fs_forget *forget;
+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+						 dispatch_work.work);
+	struct virtqueue *vq = fsvq->vq;
+	struct scatterlist sg;
+	struct scatterlist *sgs[] = {&sg};
+	bool notify;
+	int ret;
+
+	pr_debug("virtio-fs: worker %s called.\n", __func__);
+	while (1) {
+		spin_lock(&fsvq->lock);
+		forget = list_first_entry_or_null(&fsvq->queued_reqs,
+					struct virtio_fs_forget, list);
+		if (!forget) {
+			spin_unlock(&fsvq->lock);
+			return;
+		}
+
+		list_del(&forget->list);
+		if (!fsvq->connected) {
+			spin_unlock(&fsvq->lock);
+			kfree(forget);
+			continue;
+		}
+
+		sg_init_one(&sg, forget, sizeof(*forget));
+
+		/* Enqueue the request */
+		dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+		ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+		if (ret < 0) {
+			if (ret == -ENOMEM || ret == -ENOSPC) {
+				pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n",
+					 ret);
+				list_add_tail(&forget->list,
+						&fsvq->queued_reqs);
+				schedule_delayed_work(&fsvq->dispatch_work,
+						msecs_to_jiffies(1));
+			} else {
+				pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+					 ret);
+				kfree(forget);
+			}
+			spin_unlock(&fsvq->lock);
+			return;
+		}
+
+		fsvq->in_flight++;
+		notify = virtqueue_kick_prepare(vq);
+		spin_unlock(&fsvq->lock);
+
+		if (notify)
+			virtqueue_notify(vq);
+		pr_debug("virtio-fs: worker %s dispatched one forget request.\n",
+			 __func__);
+	}
+}
+
+/* Allocate and copy args into req->argbuf */
+static int copy_args_to_argbuf(struct fuse_req *req)
+{
+	struct fuse_args *args = req->args;
+	unsigned int offset = 0;
+	unsigned int num_in;
+	unsigned int num_out;
+	unsigned int len;
+	unsigned int i;
+
+	num_in = args->in_numargs - args->in_pages;
+	num_out = args->out_numargs - args->out_pages;
+	len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
+	      fuse_len_args(num_out, args->out_args);
+
+	req->argbuf = kmalloc(len, GFP_ATOMIC);
+	if (!req->argbuf)
+		return -ENOMEM;
+
+	for (i = 0; i < num_in; i++) {
+		memcpy(req->argbuf + offset,
+		       args->in_args[i].value,
+		       args->in_args[i].size);
+		offset += args->in_args[i].size;
+	}
+
+	return 0;
+}
+
+/* Copy args out of and free req->argbuf */
+static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req)
+{
+	unsigned int remaining;
+	unsigned int offset;
+	unsigned int num_in;
+	unsigned int num_out;
+	unsigned int i;
+
+	remaining = req->out.h.len - sizeof(req->out.h);
+	num_in = args->in_numargs - args->in_pages;
+	num_out = args->out_numargs - args->out_pages;
+	offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args);
+
+	for (i = 0; i < num_out; i++) {
+		unsigned int argsize = args->out_args[i].size;
+
+		if (args->out_argvar &&
+		    i == args->out_numargs - 1 &&
+		    argsize > remaining) {
+			argsize = remaining;
+		}
+
+		memcpy(args->out_args[i].value, req->argbuf + offset, argsize);
+		offset += argsize;
+
+		if (i != args->out_numargs - 1)
+			remaining -= argsize;
+	}
+
+	/* Store the actual size of the variable-length arg */
+	if (args->out_argvar)
+		args->out_args[args->out_numargs - 1].size = remaining;
+
+	kfree(req->argbuf);
+	req->argbuf = NULL;
+}
+
+/* Work function for request completion */
+static void virtio_fs_requests_done_work(struct work_struct *work)
+{
+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+						 done_work);
+	struct fuse_pqueue *fpq = &fsvq->fud->pq;
+	struct fuse_conn *fc = fsvq->fud->fc;
+	struct virtqueue *vq = fsvq->vq;
+	struct fuse_req *req;
+	struct fuse_args_pages *ap;
+	struct fuse_req *next;
+	struct fuse_args *args;
+	unsigned int len, i, thislen;
+	struct page *page;
+	LIST_HEAD(reqs);
+
+	/* Collect completed requests off the virtqueue */
+	spin_lock(&fsvq->lock);
+	do {
+		virtqueue_disable_cb(vq);
+
+		while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
+			spin_lock(&fpq->lock);
+			list_move_tail(&req->list, &reqs);
+			spin_unlock(&fpq->lock);
+		}
+	} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
+	spin_unlock(&fsvq->lock);
+
+	/* End requests */
+	list_for_each_entry_safe(req, next, &reqs, list) {
+		/*
+		 * TODO verify that server properly follows FUSE protocol
+		 * (oh.uniq, oh.len)
+		 */
+		args = req->args;
+		copy_args_from_argbuf(args, req);
+
+		if (args->out_pages && args->page_zeroing) {
+			len = args->out_args[args->out_numargs - 1].size;
+			ap = container_of(args, typeof(*ap), args);
+			for (i = 0; i < ap->num_pages; i++) {
+				thislen = ap->descs[i].length;
+				if (len < thislen) {
+					WARN_ON(ap->descs[i].offset);
+					page = ap->pages[i];
+					zero_user_segment(page, len, thislen);
+					len = 0;
+				} else {
+					len -= thislen;
+				}
+			}
+		}
+
+		spin_lock(&fpq->lock);
+		clear_bit(FR_SENT, &req->flags);
+		list_del_init(&req->list);
+		spin_unlock(&fpq->lock);
+
+		fuse_request_end(fc, req);
+		spin_lock(&fsvq->lock);
+		fsvq->in_flight--;
+		spin_unlock(&fsvq->lock);
+	}
+}
+
+/* Virtqueue interrupt handler */
+static void virtio_fs_vq_done(struct virtqueue *vq)
+{
+	struct virtio_fs_vq *fsvq = vq_to_fsvq(vq);
+
+	dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name);
+
+	schedule_work(&fsvq->done_work);
+}
+
+/* Initialize virtqueues */
+static int virtio_fs_setup_vqs(struct virtio_device *vdev,
+			       struct virtio_fs *fs)
+{
+	struct virtqueue **vqs;
+	vq_callback_t **callbacks;
+	const char **names;
+	unsigned int i;
+	int ret = 0;
+
+	virtio_cread(vdev, struct virtio_fs_config, num_request_queues,
+		     &fs->num_request_queues);
+	if (fs->num_request_queues == 0)
+		return -EINVAL;
+
+	fs->nvqs = 1 + fs->num_request_queues;
+	fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL);
+	if (!fs->vqs)
+		return -ENOMEM;
+
+	vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL);
+	callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]),
+					GFP_KERNEL);
+	names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL);
+	if (!vqs || !callbacks || !names) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	callbacks[VQ_HIPRIO] = virtio_fs_vq_done;
+	snprintf(fs->vqs[VQ_HIPRIO].name, sizeof(fs->vqs[VQ_HIPRIO].name),
+			"hiprio");
+	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
+	INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
+	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+	INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
+			virtio_fs_hiprio_dispatch_work);
+	spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
+
+	/* Initialize the requests virtqueues */
+	for (i = VQ_REQUEST; i < fs->nvqs; i++) {
+		spin_lock_init(&fs->vqs[i].lock);
+		INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
+		INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
+					virtio_fs_dummy_dispatch_work);
+		INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+		snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
+			 "requests.%u", i - VQ_REQUEST);
+		callbacks[i] = virtio_fs_vq_done;
+		names[i] = fs->vqs[i].name;
+	}
+
+	ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL);
+	if (ret < 0)
+		goto out;
+
+	for (i = 0; i < fs->nvqs; i++)
+		fs->vqs[i].vq = vqs[i];
+
+	virtio_fs_start_all_queues(fs);
+out:
+	kfree(names);
+	kfree(callbacks);
+	kfree(vqs);
+	if (ret)
+		kfree(fs->vqs);
+	return ret;
+}
+
+/* Free virtqueues (device must already be reset) */
+static void virtio_fs_cleanup_vqs(struct virtio_device *vdev,
+				  struct virtio_fs *fs)
+{
+	vdev->config->del_vqs(vdev);
+}
+
+static int virtio_fs_probe(struct virtio_device *vdev)
+{
+	struct virtio_fs *fs;
+	int ret;
+
+	fs = kzalloc(sizeof(*fs), GFP_KERNEL);
+	if (!fs)
+		return -ENOMEM;
+	kref_init(&fs->refcount);
+	vdev->priv = fs;
+
+	ret = virtio_fs_read_tag(vdev, fs);
+	if (ret < 0)
+		goto out;
+
+	ret = virtio_fs_setup_vqs(vdev, fs);
+	if (ret < 0)
+		goto out;
+
+	/* TODO vq affinity */
+
+	/* Bring the device online in case the filesystem is mounted and
+	 * requests need to be sent before we return.
+	 */
+	virtio_device_ready(vdev);
+
+	ret = virtio_fs_add_instance(fs);
+	if (ret < 0)
+		goto out_vqs;
+
+	return 0;
+
+out_vqs:
+	vdev->config->reset(vdev);
+	virtio_fs_cleanup_vqs(vdev, fs);
+
+out:
+	vdev->priv = NULL;
+	kfree(fs);
+	return ret;
+}
+
+static void virtio_fs_stop_all_queues(struct virtio_fs *fs)
+{
+	struct virtio_fs_vq *fsvq;
+	int i;
+
+	for (i = 0; i < fs->nvqs; i++) {
+		fsvq = &fs->vqs[i];
+		spin_lock(&fsvq->lock);
+		fsvq->connected = false;
+		spin_unlock(&fsvq->lock);
+	}
+}
+
+static void virtio_fs_remove(struct virtio_device *vdev)
+{
+	struct virtio_fs *fs = vdev->priv;
+
+	mutex_lock(&virtio_fs_mutex);
+	/* This device is going away. No one should get new reference */
+	list_del_init(&fs->list);
+	virtio_fs_stop_all_queues(fs);
+	virtio_fs_drain_all_queues(fs);
+	vdev->config->reset(vdev);
+	virtio_fs_cleanup_vqs(vdev, fs);
+
+	vdev->priv = NULL;
+	/* Put device reference on virtio_fs object */
+	virtio_fs_put(fs);
+	mutex_unlock(&virtio_fs_mutex);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtio_fs_freeze(struct virtio_device *vdev)
+{
+	/* TODO need to save state here */
+	pr_warn("virtio-fs: suspend/resume not yet supported\n");
+	return -EOPNOTSUPP;
+}
+
+static int virtio_fs_restore(struct virtio_device *vdev)
+{
+	 /* TODO need to restore state here */
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+const static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID },
+	{},
+};
+
+const static unsigned int feature_table[] = {};
+
+static struct virtio_driver virtio_fs_driver = {
+	.driver.name		= KBUILD_MODNAME,
+	.driver.owner		= THIS_MODULE,
+	.id_table		= id_table,
+	.feature_table		= feature_table,
+	.feature_table_size	= ARRAY_SIZE(feature_table),
+	.probe			= virtio_fs_probe,
+	.remove			= virtio_fs_remove,
+#ifdef CONFIG_PM_SLEEP
+	.freeze			= virtio_fs_freeze,
+	.restore		= virtio_fs_restore,
+#endif
+};
+
+static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+	struct fuse_forget_link *link;
+	struct virtio_fs_forget *forget;
+	struct scatterlist sg;
+	struct scatterlist *sgs[] = {&sg};
+	struct virtio_fs *fs;
+	struct virtqueue *vq;
+	struct virtio_fs_vq *fsvq;
+	bool notify;
+	u64 unique;
+	int ret;
+
+	link = fuse_dequeue_forget(fiq, 1, NULL);
+	unique = fuse_get_unique(fiq);
+
+	fs = fiq->priv;
+	fsvq = &fs->vqs[VQ_HIPRIO];
+	spin_unlock(&fiq->lock);
+
+	/* Allocate a buffer for the request */
+	forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL);
+
+	forget->ih = (struct fuse_in_header){
+		.opcode = FUSE_FORGET,
+		.nodeid = link->forget_one.nodeid,
+		.unique = unique,
+		.len = sizeof(*forget),
+	};
+	forget->arg = (struct fuse_forget_in){
+		.nlookup = link->forget_one.nlookup,
+	};
+
+	sg_init_one(&sg, forget, sizeof(*forget));
+
+	/* Enqueue the request */
+	spin_lock(&fsvq->lock);
+
+	if (!fsvq->connected) {
+		kfree(forget);
+		spin_unlock(&fsvq->lock);
+		goto out;
+	}
+
+	vq = fsvq->vq;
+	dev_dbg(&vq->vdev->dev, "%s\n", __func__);
+
+	ret = virtqueue_add_sgs(vq, sgs, 1, 0, forget, GFP_ATOMIC);
+	if (ret < 0) {
+		if (ret == -ENOMEM || ret == -ENOSPC) {
+			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later.\n",
+				 ret);
+			list_add_tail(&forget->list, &fsvq->queued_reqs);
+			schedule_delayed_work(&fsvq->dispatch_work,
+					msecs_to_jiffies(1));
+		} else {
+			pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
+				 ret);
+			kfree(forget);
+		}
+		spin_unlock(&fsvq->lock);
+		goto out;
+	}
+
+	fsvq->in_flight++;
+	notify = virtqueue_kick_prepare(vq);
+
+	spin_unlock(&fsvq->lock);
+
+	if (notify)
+		virtqueue_notify(vq);
+out:
+	kfree(link);
+}
+
+static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+	/*
+	 * TODO interrupts.
+	 *
+	 * Normal fs operations on a local filesystems aren't interruptible.
+	 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW)
+	 * with shared lock between host and guest.
+	 */
+	spin_unlock(&fiq->lock);
+}
+
+/* Return the number of scatter-gather list elements required */
+static unsigned int sg_count_fuse_req(struct fuse_req *req)
+{
+	struct fuse_args *args = req->args;
+	struct fuse_args_pages *ap = container_of(args, typeof(*ap), args);
+	unsigned int total_sgs = 1 /* fuse_in_header */;
+
+	if (args->in_numargs - args->in_pages)
+		total_sgs += 1;
+
+	if (args->in_pages)
+		total_sgs += ap->num_pages;
+
+	if (!test_bit(FR_ISREPLY, &req->flags))
+		return total_sgs;
+
+	total_sgs += 1 /* fuse_out_header */;
+
+	if (args->out_numargs - args->out_pages)
+		total_sgs += 1;
+
+	if (args->out_pages)
+		total_sgs += ap->num_pages;
+
+	return total_sgs;
+}
+
+/* Add pages to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
+				       struct page **pages,
+				       struct fuse_page_desc *page_descs,
+				       unsigned int num_pages,
+				       unsigned int total_len)
+{
+	unsigned int i;
+	unsigned int this_len;
+
+	for (i = 0; i < num_pages && total_len; i++) {
+		sg_init_table(&sg[i], 1);
+		this_len =  min(page_descs[i].length, total_len);
+		sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
+		total_len -= this_len;
+	}
+
+	return i;
+}
+
+/* Add args to scatter-gather list and return number of elements used */
+static unsigned int sg_init_fuse_args(struct scatterlist *sg,
+				      struct fuse_req *req,
+				      struct fuse_arg *args,
+				      unsigned int numargs,
+				      bool argpages,
+				      void *argbuf,
+				      unsigned int *len_used)
+{
+	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
+	unsigned int total_sgs = 0;
+	unsigned int len;
+
+	len = fuse_len_args(numargs - argpages, args);
+	if (len)
+		sg_init_one(&sg[total_sgs++], argbuf, len);
+
+	if (argpages)
+		total_sgs += sg_init_fuse_pages(&sg[total_sgs],
+						ap->pages, ap->descs,
+						ap->num_pages,
+						args[numargs - 1].size);
+
+	if (len_used)
+		*len_used = len;
+
+	return total_sgs;
+}
+
+/* Add a request to a virtqueue and kick the device */
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+				 struct fuse_req *req)
+{
+	/* requests need at least 4 elements */
+	struct scatterlist *stack_sgs[6];
+	struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)];
+	struct scatterlist **sgs = stack_sgs;
+	struct scatterlist *sg = stack_sg;
+	struct virtqueue *vq;
+	struct fuse_args *args = req->args;
+	unsigned int argbuf_used = 0;
+	unsigned int out_sgs = 0;
+	unsigned int in_sgs = 0;
+	unsigned int total_sgs;
+	unsigned int i;
+	int ret;
+	bool notify;
+
+	/* Does the sglist fit on the stack? */
+	total_sgs = sg_count_fuse_req(req);
+	if (total_sgs > ARRAY_SIZE(stack_sgs)) {
+		sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
+		sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
+		if (!sgs || !sg) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/* Use a bounce buffer since stack args cannot be mapped */
+	ret = copy_args_to_argbuf(req);
+	if (ret < 0)
+		goto out;
+
+	/* Request elements */
+	sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h));
+	out_sgs += sg_init_fuse_args(&sg[out_sgs], req,
+				     (struct fuse_arg *)args->in_args,
+				     args->in_numargs, args->in_pages,
+				     req->argbuf, &argbuf_used);
+
+	/* Reply elements */
+	if (test_bit(FR_ISREPLY, &req->flags)) {
+		sg_init_one(&sg[out_sgs + in_sgs++],
+			    &req->out.h, sizeof(req->out.h));
+		in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req,
+					    args->out_args, args->out_numargs,
+					    args->out_pages,
+					    req->argbuf + argbuf_used, NULL);
+	}
+
+	WARN_ON(out_sgs + in_sgs != total_sgs);
+
+	for (i = 0; i < total_sgs; i++)
+		sgs[i] = &sg[i];
+
+	spin_lock(&fsvq->lock);
+
+	if (!fsvq->connected) {
+		spin_unlock(&fsvq->lock);
+		ret = -ENOTCONN;
+		goto out;
+	}
+
+	vq = fsvq->vq;
+	ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC);
+	if (ret < 0) {
+		spin_unlock(&fsvq->lock);
+		goto out;
+	}
+
+	fsvq->in_flight++;
+	notify = virtqueue_kick_prepare(vq);
+
+	spin_unlock(&fsvq->lock);
+
+	if (notify)
+		virtqueue_notify(vq);
+
+out:
+	if (ret < 0 && req->argbuf) {
+		kfree(req->argbuf);
+		req->argbuf = NULL;
+	}
+	if (sgs != stack_sgs) {
+		kfree(sgs);
+		kfree(sg);
+	}
+
+	return ret;
+}
+
+static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq)
+__releases(fiq->lock)
+{
+	unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
+	struct virtio_fs *fs;
+	struct fuse_conn *fc;
+	struct fuse_req *req;
+	struct fuse_pqueue *fpq;
+	int ret;
+
+	WARN_ON(list_empty(&fiq->pending));
+	req = list_last_entry(&fiq->pending, struct fuse_req, list);
+	clear_bit(FR_PENDING, &req->flags);
+	list_del_init(&req->list);
+	WARN_ON(!list_empty(&fiq->pending));
+	spin_unlock(&fiq->lock);
+
+	fs = fiq->priv;
+	fc = fs->vqs[queue_id].fud->fc;
+
+	pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
+		  __func__, req->in.h.opcode, req->in.h.unique,
+		 req->in.h.nodeid, req->in.h.len,
+		 fuse_len_args(req->args->out_numargs, req->args->out_args));
+
+	fpq = &fs->vqs[queue_id].fud->pq;
+	spin_lock(&fpq->lock);
+	if (!fpq->connected) {
+		spin_unlock(&fpq->lock);
+		req->out.h.error = -ENODEV;
+		pr_err("virtio-fs: %s disconnected\n", __func__);
+		fuse_request_end(fc, req);
+		return;
+	}
+	list_add_tail(&req->list, fpq->processing);
+	spin_unlock(&fpq->lock);
+	set_bit(FR_SENT, &req->flags);
+	/* matches barrier in request_wait_answer() */
+	smp_mb__after_atomic();
+
+retry:
+	ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+	if (ret < 0) {
+		if (ret == -ENOMEM || ret == -ENOSPC) {
+			/* Virtqueue full. Retry submission */
+			/* TODO use completion instead of timeout */
+			usleep_range(20, 30);
+			goto retry;
+		}
+		req->out.h.error = ret;
+		pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
+		spin_lock(&fpq->lock);
+		clear_bit(FR_SENT, &req->flags);
+		list_del_init(&req->list);
+		spin_unlock(&fpq->lock);
+		fuse_request_end(fc, req);
+		return;
+	}
+}
+
+const static struct fuse_iqueue_ops virtio_fs_fiq_ops = {
+	.wake_forget_and_unlock		= virtio_fs_wake_forget_and_unlock,
+	.wake_interrupt_and_unlock	= virtio_fs_wake_interrupt_and_unlock,
+	.wake_pending_and_unlock	= virtio_fs_wake_pending_and_unlock,
+	.release			= virtio_fs_fiq_release,
+};
+
+static int virtio_fs_fill_super(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	struct virtio_fs *fs = fc->iq.priv;
+	unsigned int i;
+	int err;
+	struct fuse_fs_context ctx = {
+		.rootmode = S_IFDIR,
+		.default_permissions = 1,
+		.allow_other = 1,
+		.max_read = UINT_MAX,
+		.blksize = 512,
+		.destroy = true,
+		.no_control = true,
+		.no_force_umount = true,
+	};
+
+	mutex_lock(&virtio_fs_mutex);
+
+	/* After holding mutex, make sure virtiofs device is still there.
+	 * Though we are holding a reference to it, drive ->remove might
+	 * still have cleaned up virtual queues. In that case bail out.
+	 */
+	err = -EINVAL;
+	if (list_empty(&fs->list)) {
+		pr_info("virtio-fs: tag <%s> not found\n", fs->tag);
+		goto err;
+	}
+
+	err = -ENOMEM;
+	/* Allocate fuse_dev for hiprio and notification queues */
+	for (i = 0; i < VQ_REQUEST; i++) {
+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+		fsvq->fud = fuse_dev_alloc();
+		if (!fsvq->fud)
+			goto err_free_fuse_devs;
+	}
+
+	ctx.fudptr = (void **)&fs->vqs[VQ_REQUEST].fud;
+	err = fuse_fill_super_common(sb, &ctx);
+	if (err < 0)
+		goto err_free_fuse_devs;
+
+	fc = fs->vqs[VQ_REQUEST].fud->fc;
+
+	for (i = 0; i < fs->nvqs; i++) {
+		struct virtio_fs_vq *fsvq = &fs->vqs[i];
+
+		if (i == VQ_REQUEST)
+			continue; /* already initialized */
+		fuse_dev_install(fsvq->fud, fc);
+	}
+
+	/* Previous unmount will stop all queues. Start these again */
+	virtio_fs_start_all_queues(fs);
+	fuse_send_init(fc);
+	mutex_unlock(&virtio_fs_mutex);
+	return 0;
+
+err_free_fuse_devs:
+	virtio_fs_free_devs(fs);
+err:
+	mutex_unlock(&virtio_fs_mutex);
+	return err;
+}
+
+static void virtio_kill_sb(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+	struct virtio_fs *vfs;
+	struct virtio_fs_vq *fsvq;
+
+	/* If mount failed, we can still be called without any fc */
+	if (!fc)
+		return fuse_kill_sb_anon(sb);
+
+	vfs = fc->iq.priv;
+	fsvq = &vfs->vqs[VQ_HIPRIO];
+
+	/* Stop forget queue. Soon destroy will be sent */
+	spin_lock(&fsvq->lock);
+	fsvq->connected = false;
+	spin_unlock(&fsvq->lock);
+	virtio_fs_drain_all_queues(vfs);
+
+	fuse_kill_sb_anon(sb);
+
+	/* fuse_kill_sb_anon() must have sent destroy. Stop all queues
+	 * and drain one more time and free fuse devices. Freeing fuse
+	 * devices will drop their reference on fuse_conn and that in
+	 * turn will drop its reference on virtio_fs object.
+	 */
+	virtio_fs_stop_all_queues(vfs);
+	virtio_fs_drain_all_queues(vfs);
+	virtio_fs_free_devs(vfs);
+}
+
+static int virtio_fs_test_super(struct super_block *sb,
+				struct fs_context *fsc)
+{
+	struct fuse_conn *fc = fsc->s_fs_info;
+
+	return fc->iq.priv == get_fuse_conn_super(sb)->iq.priv;
+}
+
+static int virtio_fs_set_super(struct super_block *sb,
+			       struct fs_context *fsc)
+{
+	int err;
+
+	err = get_anon_bdev(&sb->s_dev);
+	if (!err)
+		fuse_conn_get(fsc->s_fs_info);
+
+	return err;
+}
+
+static int virtio_fs_get_tree(struct fs_context *fsc)
+{
+	struct virtio_fs *fs;
+	struct super_block *sb;
+	struct fuse_conn *fc;
+	int err;
+
+	/* This gets a reference on virtio_fs object. This ptr gets installed
+	 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
+	 * to drop the reference to this object.
+	 */
+	fs = virtio_fs_find_instance(fsc->source);
+	if (!fs) {
+		pr_info("virtio-fs: tag <%s> not found\n", fsc->source);
+		return -EINVAL;
+	}
+
+	fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
+	if (!fc) {
+		mutex_lock(&virtio_fs_mutex);
+		virtio_fs_put(fs);
+		mutex_unlock(&virtio_fs_mutex);
+		return -ENOMEM;
+	}
+
+	fuse_conn_init(fc, get_user_ns(current_user_ns()), &virtio_fs_fiq_ops,
+		       fs);
+	fc->release = fuse_free_conn;
+	fc->delete_stale = true;
+
+	fsc->s_fs_info = fc;
+	sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
+	fuse_conn_put(fc);
+	if (IS_ERR(sb))
+		return PTR_ERR(sb);
+
+	if (!sb->s_root) {
+		err = virtio_fs_fill_super(sb);
+		if (err) {
+			deactivate_locked_super(sb);
+			return err;
+		}
+
+		sb->s_flags |= SB_ACTIVE;
+	}
+
+	WARN_ON(fsc->root);
+	fsc->root = dget(sb->s_root);
+	return 0;
+}
+
+static const struct fs_context_operations virtio_fs_context_ops = {
+	.get_tree	= virtio_fs_get_tree,
+};
+
+static int virtio_fs_init_fs_context(struct fs_context *fsc)
+{
+	fsc->ops = &virtio_fs_context_ops;
+	return 0;
+}
+
+static struct file_system_type virtio_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "virtiofs",
+	.init_fs_context = virtio_fs_init_fs_context,
+	.kill_sb	= virtio_kill_sb,
+};
+
+static int __init virtio_fs_init(void)
+{
+	int ret;
+
+	ret = register_virtio_driver(&virtio_fs_driver);
+	if (ret < 0)
+		return ret;
+
+	ret = register_filesystem(&virtio_fs_type);
+	if (ret < 0) {
+		unregister_virtio_driver(&virtio_fs_driver);
+		return ret;
+	}
+
+	return 0;
+}
+module_init(virtio_fs_init);
+
+static void __exit virtio_fs_exit(void)
+{
+	unregister_filesystem(&virtio_fs_type);
+	unregister_virtio_driver(&virtio_fs_driver);
+}
+module_exit(virtio_fs_exit);
+
+MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>");
+MODULE_DESCRIPTION("Virtio Filesystem");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_FS(KBUILD_MODNAME);
+MODULE_DEVICE_TABLE(virtio, id_table);
diff --git a/include/uapi/linux/virtio_fs.h b/include/uapi/linux/virtio_fs.h
new file mode 100644
index 000000000000..b02eb2ac3d99
--- /dev/null
+++ b/include/uapi/linux/virtio_fs.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+
+#ifndef _UAPI_LINUX_VIRTIO_FS_H
+#define _UAPI_LINUX_VIRTIO_FS_H
+
+#include <linux/types.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_types.h>
+
+struct virtio_fs_config {
+	/* Filesystem name (UTF-8, not NUL-terminated, padded with NULs) */
+	__u8 tag[36];
+
+	/* Number of request queues */
+	__u32 num_request_queues;
+} __attribute__((packed));
+
+#endif /* _UAPI_LINUX_VIRTIO_FS_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 348fd0176f75..585e07b27333 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -44,6 +44,7 @@
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
 #define VIRTIO_ID_IOMMU        23 /* virtio IOMMU */
+#define VIRTIO_ID_FS           26 /* virtio filesystem */
 #define VIRTIO_ID_PMEM         27 /* virtio pmem */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit v1.2.3


From a0791f0df7d212c245761538b17a9ea93607b667 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 17 Sep 2019 10:45:38 -0700
Subject: bpf: fix BTF limits

vmlinux BTF has more than 64k types.
Its string section is also at the offset larger than 64k.
Adjust both limits to make in-kernel BTF verifier successfully parse in-kernel BTF.

Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/uapi/linux/btf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 63ae4a39e58b..c02dec97e1ce 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -22,9 +22,9 @@ struct btf_header {
 };
 
 /* Max # of type identifier */
-#define BTF_MAX_TYPE	0x0000ffff
+#define BTF_MAX_TYPE	0x000fffff
 /* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET	0x0000ffff
+#define BTF_MAX_NAME_OFFSET	0x00ffffff
 /* Max # of struct/union/enum members or func args */
 #define BTF_MAX_VLEN	0xffff
 
-- 
cgit v1.2.3


From 2d2e0b90a08f1e3a47b3ee852b27c219295423ef Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 18 Sep 2019 16:07:28 -0400
Subject: drm: Fix kerneldoc and remove unused struct member in self_refresh
 helper

Artifacts of previous revisions.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link to v1: https://patchwork.freedesktop.org/patch/msgid/20190917200443.64481-1-sean@poorly.run
Link: https://patchwork.freedesktop.org/patch/msgid/20190918200734.149876-1-sean@poorly.run

Changes in v2:
- None
---
 drivers/gpu/drm/drm_self_refresh_helper.c | 1 -
 include/drm/drm_crtc.h                    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
index 4b9424a8f1f1..9095cebf2147 100644
--- a/drivers/gpu/drm/drm_self_refresh_helper.c
+++ b/drivers/gpu/drm/drm_self_refresh_helper.c
@@ -53,7 +53,6 @@
 struct drm_self_refresh_data {
 	struct drm_crtc *crtc;
 	struct delayed_work entry_work;
-	struct drm_atomic_state *save_state;
 	unsigned int entry_delay_ms;
 };
 
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index c4528eb5d168..408b6f4e63c0 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -1108,7 +1108,7 @@ struct drm_crtc {
 	/**
 	 * @self_refresh_data: Holds the state for the self refresh helpers
 	 *
-	 * Initialized via drm_self_refresh_helper_register().
+	 * Initialized via drm_self_refresh_helper_init().
 	 */
 	struct drm_self_refresh_data *self_refresh_data;
 };
-- 
cgit v1.2.3


From d4da4e33341c5e6159543acc03559cb24f520bc2 Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Wed, 18 Sep 2019 16:07:29 -0400
Subject: drm: Measure Self Refresh Entry/Exit times to avoid thrashing

Currently the self refresh idle timer is a const set by the crtc. This
is fine if the self refresh entry/exit times are well-known for all
panels used on that crtc. However panels and workloads can vary quite a
bit, and a timeout which works well for one doesn't work well for
another.

In the extreme, if the timeout is too short we could get in a situation
where the self refresh exits are taking so long we queue up a self refresh
entry before the exit commit is even finished.

This patch changes the idle timeout to a moving average of the entry
times + a moving average of exit times + the crtc constant.

This patch was tested on rockchip, with a kevin CrOS panel the idle
delay averages out to about ~235ms (35 entry + 100 exit + 100 const). On
the same board, the bob panel idle delay lands around ~340ms (90 entry
+ 150 exit + 100 const).

WRT the dedicated mutex in self_refresh_data, it would be nice if we
could rely on drm_crtc.mutex to protect the average times, but there are
a few reasons why a separate lock is a better choice:
- We can't rely on drm_crtc.mutex being held if we're doing a nonblocking
  commit
- We can't grab drm_crtc.mutex since drm_modeset_lock() doesn't tell us
  whether the lock was already held in the acquire context (it eats
  -EALREADY), so we can't tell if we should drop it or not
- We don't need such a heavy-handed lock for what we're trying to do,
  commit ordering doesn't matter, so a point-of-use lock will be less
  contentious

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Link to v1: https://patchwork.freedesktop.org/patch/msgid/20190917200443.64481-2-sean@poorly.run
Link: https://patchwork.freedesktop.org/patch/msgid/20190918200734.149876-2-sean@poorly.run

Changes in v2:
- Migrate locking explanation from comment to commit msg (Daniel)
- Turf constant entry delay and multiply the avg times by 2 (Daniel)
---
 drivers/gpu/drm/drm_atomic_helper.c         | 20 ++++++++
 drivers/gpu/drm/drm_self_refresh_helper.c   | 72 ++++++++++++++++++++++++++---
 drivers/gpu/drm/rockchip/rockchip_drm_vop.c |  5 +-
 include/drm/drm_self_refresh_helper.h       |  6 +--
 4 files changed, 90 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 3a67f63c3146..3ef2ac52ce94 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/dma-fence.h>
+#include <linux/ktime.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
@@ -1580,9 +1581,23 @@ static void commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *dev = old_state->dev;
 	const struct drm_mode_config_helper_funcs *funcs;
+	ktime_t start;
+	s64 commit_time_ms;
 
 	funcs = dev->mode_config.helper_private;
 
+	/*
+	 * We're measuring the _entire_ commit, so the time will vary depending
+	 * on how many fences and objects are involved. For the purposes of self
+	 * refresh, this is desirable since it'll give us an idea of how
+	 * congested things are. This will inform our decision on how often we
+	 * should enter self refresh after idle.
+	 *
+	 * These times will be averaged out in the self refresh helpers to avoid
+	 * overreacting over one outlier frame
+	 */
+	start = ktime_get();
+
 	drm_atomic_helper_wait_for_fences(dev, old_state, false);
 
 	drm_atomic_helper_wait_for_dependencies(old_state);
@@ -1592,6 +1607,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
 	else
 		drm_atomic_helper_commit_tail(old_state);
 
+	commit_time_ms = ktime_ms_delta(ktime_get(), start);
+	if (commit_time_ms > 0)
+		drm_self_refresh_helper_update_avg_times(old_state,
+						 (unsigned long)commit_time_ms);
+
 	drm_atomic_helper_commit_cleanup_done(old_state);
 
 	drm_atomic_state_put(old_state);
diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
index 9095cebf2147..68f4765a5896 100644
--- a/drivers/gpu/drm/drm_self_refresh_helper.c
+++ b/drivers/gpu/drm/drm_self_refresh_helper.c
@@ -5,6 +5,7 @@
  * Authors:
  * Sean Paul <seanpaul@chromium.org>
  */
+#include <linux/average.h>
 #include <linux/bitops.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
@@ -50,10 +51,17 @@
  * atomic_check when &drm_crtc_state.self_refresh_active is true.
  */
 
+#define SELF_REFRESH_AVG_SEED_MS 200
+
+DECLARE_EWMA(psr_time, 4, 4)
+
 struct drm_self_refresh_data {
 	struct drm_crtc *crtc;
 	struct delayed_work entry_work;
-	unsigned int entry_delay_ms;
+
+	struct mutex avg_mutex;
+	struct ewma_psr_time entry_avg_ms;
+	struct ewma_psr_time exit_avg_ms;
 };
 
 static void drm_self_refresh_helper_entry_work(struct work_struct *work)
@@ -121,6 +129,44 @@ out_drop_locks:
 	drm_modeset_acquire_fini(&ctx);
 }
 
+/**
+ * drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
+ * @state: the state which has just been applied to hardware
+ * @commit_time_ms: the amount of time in ms that this commit took to complete
+ *
+ * Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
+ * update the average entry/exit self refresh times on self refresh transitions.
+ * These averages will be used when calculating how long to delay before
+ * entering self refresh mode after activity.
+ */
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+					      unsigned int commit_time_ms)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+	int i;
+
+	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+				      new_crtc_state, i) {
+		struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
+		struct ewma_psr_time *time;
+
+		if (old_crtc_state->self_refresh_active ==
+		    new_crtc_state->self_refresh_active)
+			continue;
+
+		if (new_crtc_state->self_refresh_active)
+			time = &sr_data->entry_avg_ms;
+		else
+			time = &sr_data->exit_avg_ms;
+
+		mutex_lock(&sr_data->avg_mutex);
+		ewma_psr_time_add(time, commit_time_ms);
+		mutex_unlock(&sr_data->avg_mutex);
+	}
+}
+EXPORT_SYMBOL(drm_self_refresh_helper_update_avg_times);
+
 /**
  * drm_self_refresh_helper_alter_state - Alters the atomic state for SR exit
  * @state: the state currently being checked
@@ -152,6 +198,7 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
 
 	for_each_new_crtc_in_state(state, crtc, crtc_state, i) {
 		struct drm_self_refresh_data *sr_data;
+		unsigned int delay;
 
 		/* Don't trigger the entry timer when we're already in SR */
 		if (crtc_state->self_refresh_active)
@@ -161,8 +208,13 @@ void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state)
 		if (!sr_data)
 			continue;
 
+		mutex_lock(&sr_data->avg_mutex);
+		delay = (ewma_psr_time_read(&sr_data->entry_avg_ms) +
+			 ewma_psr_time_read(&sr_data->exit_avg_ms)) * 2;
+		mutex_unlock(&sr_data->avg_mutex);
+
 		mod_delayed_work(system_wq, &sr_data->entry_work,
-				 msecs_to_jiffies(sr_data->entry_delay_ms));
+				 msecs_to_jiffies(delay));
 	}
 }
 EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
@@ -170,12 +222,10 @@ EXPORT_SYMBOL(drm_self_refresh_helper_alter_state);
 /**
  * drm_self_refresh_helper_init - Initializes self refresh helpers for a crtc
  * @crtc: the crtc which supports self refresh supported displays
- * @entry_delay_ms: amount of inactivity to wait before entering self refresh
  *
  * Returns zero if successful or -errno on failure
  */
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-				 unsigned int entry_delay_ms)
+int drm_self_refresh_helper_init(struct drm_crtc *crtc)
 {
 	struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
 
@@ -189,8 +239,18 @@ int drm_self_refresh_helper_init(struct drm_crtc *crtc,
 
 	INIT_DELAYED_WORK(&sr_data->entry_work,
 			  drm_self_refresh_helper_entry_work);
-	sr_data->entry_delay_ms = entry_delay_ms;
 	sr_data->crtc = crtc;
+	mutex_init(&sr_data->avg_mutex);
+	ewma_psr_time_init(&sr_data->entry_avg_ms);
+	ewma_psr_time_init(&sr_data->exit_avg_ms);
+
+	/*
+	 * Seed the averages so they're non-zero (and sufficiently large
+	 * for even poorly performing panels). As time goes on, this will be
+	 * averaged out and the values will trend to their true value.
+	 */
+	ewma_psr_time_add(&sr_data->entry_avg_ms, SELF_REFRESH_AVG_SEED_MS);
+	ewma_psr_time_add(&sr_data->exit_avg_ms, SELF_REFRESH_AVG_SEED_MS);
 
 	crtc->self_refresh_data = sr_data;
 	return 0;
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 2f821c58007c..613404f86668 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -39,8 +39,6 @@
 #include "rockchip_drm_vop.h"
 #include "rockchip_rgb.h"
 
-#define VOP_SELF_REFRESH_ENTRY_DELAY_MS 100
-
 #define VOP_WIN_SET(vop, win, name, v) \
 		vop_reg_set(vop, &win->phy->name, win->base, ~0, v, #name)
 #define VOP_SCL_SET(vop, win, name, v) \
@@ -1563,8 +1561,7 @@ static int vop_create_crtc(struct vop *vop)
 	init_completion(&vop->line_flag_completion);
 	crtc->port = port;
 
-	ret = drm_self_refresh_helper_init(crtc,
-					   VOP_SELF_REFRESH_ENTRY_DELAY_MS);
+	ret = drm_self_refresh_helper_init(crtc);
 	if (ret)
 		DRM_DEV_DEBUG_KMS(vop->dev,
 			"Failed to init %s with SR helpers %d, ignoring\n",
diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h
index 397a583ccca7..5b79d253fb46 100644
--- a/include/drm/drm_self_refresh_helper.h
+++ b/include/drm/drm_self_refresh_helper.h
@@ -12,9 +12,9 @@ struct drm_atomic_state;
 struct drm_crtc;
 
 void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
+void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+					      unsigned int commit_time_ms);
 
-int drm_self_refresh_helper_init(struct drm_crtc *crtc,
-				 unsigned int entry_delay_ms);
-
+int drm_self_refresh_helper_init(struct drm_crtc *crtc);
 void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc);
 #endif
-- 
cgit v1.2.3


From 6fe7b9901400152238e1b76198747f6716c78aad Mon Sep 17 00:00:00 2001
From: Matthew Bobrowski <mbobrowski@mbobrowski.org>
Date: Thu, 19 Sep 2019 15:32:44 -0700
Subject: iomap: split size and error for iomap_dio_rw ->end_io

Modify the calling convention for the iomap_dio_rw ->end_io() callback.
Rather than passing either dio->error or dio->size as the 'size' argument,
instead pass both the dio->error and the dio->size value separately.

In the instance that an error occurred during a write, we currently cannot
determine whether any blocks have been allocated beyond the current EOF and
data has subsequently been written to these blocks within the ->end_io()
callback. As a result, we cannot judge whether we should take the truncate
failed write path. Having both dio->error and dio->size will allow us to
perform such checks within this callback.

Signed-off-by: Matthew Bobrowski <mbobrowski@mbobrowski.org>
[hch: minor cleanups]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 fs/iomap/direct-io.c  | 9 +++------
 fs/xfs/xfs_file.c     | 8 +++++---
 include/linux/iomap.h | 4 ++--
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 10517cea9682..2ccf1c6460d4 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -77,13 +77,10 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (dio->end_io) {
-		ret = dio->end_io(iocb,
-				dio->error ? dio->error : dio->size,
-				dio->flags);
-	} else {
+	if (dio->end_io)
+		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags);
+	else
 		ret = dio->error;
-	}
 
 	if (likely(!ret)) {
 		ret = dio->size;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 28101bbc0b78..74411296f6b5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -369,21 +369,23 @@ static int
 xfs_dio_write_end_io(
 	struct kiocb		*iocb,
 	ssize_t			size,
+	int			error,
 	unsigned		flags)
 {
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
 	loff_t			offset = iocb->ki_pos;
 	unsigned int		nofs_flag;
-	int			error = 0;
 
 	trace_xfs_end_io_direct_write(ip, offset, size);
 
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	if (size <= 0)
-		return size;
+	if (error)
+		return error;
+	if (!size)
+		return 0;
 
 	/*
 	 * Capture amount written on completion as we can't reliably account
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index bc499ceae392..50bb746d2216 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -188,8 +188,8 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
  */
 #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
 #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret,
-		unsigned flags);
+typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error,
+				 unsigned int flags);
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
-- 
cgit v1.2.3


From 838c4f3d7515efe9d0e32c846fb5d102b6d8a29d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 19 Sep 2019 15:32:45 -0700
Subject: iomap: move the iomap_dio_rw ->end_io callback into a structure

Add a new iomap_dio_ops structure that for now just contains the end_io
handler.  This avoid storing the function pointer in a mutable structure,
which is a possible exploit vector for kernel code execution, and prepares
for adding a submit_io handler that btrfs needs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/iomap/direct-io.c  | 21 ++++++++++-----------
 fs/xfs/xfs_file.c     |  6 +++++-
 include/linux/iomap.h | 10 +++++++---
 3 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 2ccf1c6460d4..1fc28c2da279 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -24,7 +24,7 @@
 
 struct iomap_dio {
 	struct kiocb		*iocb;
-	iomap_dio_end_io_t	*end_io;
+	const struct iomap_dio_ops *dops;
 	loff_t			i_size;
 	loff_t			size;
 	atomic_t		ref;
@@ -72,15 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 
 static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
+	const struct iomap_dio_ops *dops = dio->dops;
 	struct kiocb *iocb = dio->iocb;
 	struct inode *inode = file_inode(iocb->ki_filp);
 	loff_t offset = iocb->ki_pos;
-	ssize_t ret;
+	ssize_t ret = dio->error;
 
-	if (dio->end_io)
-		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags);
-	else
-		ret = dio->error;
+	if (dops && dops->end_io)
+		ret = dops->end_io(iocb, dio->size, ret, dio->flags);
 
 	if (likely(!ret)) {
 		ret = dio->size;
@@ -98,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	 * one is a pretty crazy thing to do, so we don't support it 100%.  If
 	 * this invalidation fails, tough, the write still worked...
 	 *
-	 * And this page cache invalidation has to be after dio->end_io(), as
-	 * some filesystems convert unwritten extents to real allocations in
-	 * end_io() when necessary, otherwise a racing buffer read would cache
+	 * And this page cache invalidation has to be after ->end_io(), as some
+	 * filesystems convert unwritten extents to real allocations in
+	 * ->end_io() when necessary, otherwise a racing buffer read would cache
 	 * zeros from unwritten extents.
 	 */
 	if (!dio->error &&
@@ -393,7 +392,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  */
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, iomap_dio_end_io_t end_io)
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -418,7 +417,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	atomic_set(&dio->ref, 1);
 	dio->size = 0;
 	dio->i_size = i_size_read(inode);
-	dio->end_io = end_io;
+	dio->dops = dops;
 	dio->error = 0;
 	dio->flags = 0;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 74411296f6b5..21bd3d575aaa 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -442,6 +442,10 @@ out:
 	return error;
 }
 
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+	.end_io		= xfs_dio_write_end_io,
+};
+
 /*
  * xfs_file_dio_aio_write - handle direct IO writes
  *
@@ -542,7 +546,7 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
-	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
+	ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, &xfs_dio_write_ops);
 
 	/*
 	 * If unaligned, this is the only IO in-flight. If it has not yet
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 50bb746d2216..7aa5d6117936 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -188,10 +188,14 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
  */
 #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
 #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
-typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size, int error,
-				 unsigned int flags);
+
+struct iomap_dio_ops {
+	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
+		      unsigned flags);
+};
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
-		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP
-- 
cgit v1.2.3


From ad652f3811d8644d547506154ec9a9c22c8771cd Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 16 Sep 2019 18:33:08 +0200
Subject: netfilter: nf_tables: add NFT_CHAIN_POLICY_UNSET and use it

Default policy is defined as a unsigned 8-bit field, do not use a
negative value to leave it unset, use this new NFT_CHAIN_POLICY_UNSET
instead.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 2 ++
 net/netfilter/nf_tables_api.c     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 2655e03dbe1b..a26d64056fc8 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -889,6 +889,8 @@ enum nft_chain_flags {
 	NFT_CHAIN_HW_OFFLOAD		= 0x2,
 };
 
+#define NFT_CHAIN_POLICY_UNSET		U8_MAX
+
 /**
  *	struct nft_chain - nf_tables chain
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e4a68dc42694..4a5d6ef2b706 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1715,7 +1715,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
 		goto err2;
 	}
 
-	nft_trans_chain_policy(trans) = -1;
+	nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
 	if (nft_is_base_chain(chain))
 		nft_trans_chain_policy(trans) = policy;
 
-- 
cgit v1.2.3


From dee04eee9182dae91801d0db5bb2acfd5365a749 Mon Sep 17 00:00:00 2001
From: Anup Patel <Anup.Patel@wdc.com>
Date: Wed, 4 Sep 2019 16:13:26 +0000
Subject: KVM: RISC-V: Add KVM_REG_RISCV for ONE_REG interface

We will be using ONE_REG interface accessing VCPU registers from
user-space hence we add KVM_REG_RISCV for RISC-V VCPU registers.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 include/uapi/linux/kvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 233efbb1c81c..18a2b43097f8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1145,6 +1145,7 @@ struct kvm_dirty_tlb {
 #define KVM_REG_S390		0x5000000000000000ULL
 #define KVM_REG_ARM64		0x6000000000000000ULL
 #define KVM_REG_MIPS		0x7000000000000000ULL
+#define KVM_REG_RISCV		0x8000000000000000ULL
 
 #define KVM_REG_SIZE_SHIFT	52
 #define KVM_REG_SIZE_MASK	0x00f0000000000000ULL
-- 
cgit v1.2.3


From f925ab926d1a9c2112d34ecb59fbb050bb58646c Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Mon, 16 Sep 2019 07:59:38 -0400
Subject: SUNRPC: Rename xdr_buf_read_netobj to xdr_buf_read_mic

Let the name reflect the single use.  The function now assumes the GSS MIC
is the last object in the buffer.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xdr.h     |  2 +-
 net/sunrpc/auth_gss/auth_gss.c |  2 +-
 net/sunrpc/xdr.c               | 52 ++++++++++++++++++++++++------------------
 3 files changed, 32 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 8a87d8bcb197..f33e5013bdfb 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -186,7 +186,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
 extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
+extern int xdr_buf_read_mic(struct xdr_buf *, struct xdr_netobj *, unsigned int);
 extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4ce42c62458e..d75fddca44c9 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1960,7 +1960,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
 
 	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
 		goto unwrap_failed;
-	if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
+	if (xdr_buf_read_mic(rcv_buf, &mic, mic_offset))
 		goto unwrap_failed;
 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 24ec2ab5bfa5..14ba9e72a204 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1236,52 +1236,60 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 }
 EXPORT_SYMBOL_GPL(xdr_encode_word);
 
-/* If the netobj starting offset bytes from the start of xdr_buf is contained
- * entirely in the head, pages, or tail, set object to point to it; otherwise
- * shift the buffer until it is contained entirely within the pages or tail.
+/**
+ * xdr_buf_read_mic() - obtain the address of the GSS mic from xdr buf
+ * @buf: pointer to buffer containing a mic
+ * @mic: on success, returns the address of the mic
+ * @offset: the offset in buf where mic may be found
+ *
+ * This function may modify the xdr buf if the mic is found to be straddling
+ * a boundary between head, pages, and tail.  On success the mic can be read
+ * from the address returned.  There is no need to free the mic.
+ *
+ * Return: Success returns 0, otherwise an integer error.
  */
-int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
+int xdr_buf_read_mic(struct xdr_buf *buf, struct xdr_netobj *mic, unsigned int offset)
 {
 	struct xdr_buf subbuf;
 	unsigned int boundary;
 
-	if (xdr_decode_word(buf, offset, &obj->len))
+	if (xdr_decode_word(buf, offset, &mic->len))
 		return -EFAULT;
 	offset += 4;
 
-	/* Is the obj partially in the head? */
+	/* Is the mic partially in the head? */
 	boundary = buf->head[0].iov_len;
-	if (offset < boundary && (offset + obj->len) > boundary)
+	if (offset < boundary && (offset + mic->len) > boundary)
 		xdr_shift_buf(buf, boundary - offset);
 
-	/* Is the obj partially in the pages? */
+	/* Is the mic partially in the pages? */
 	boundary += buf->page_len;
-	if (offset < boundary && (offset + obj->len) > boundary)
+	if (offset < boundary && (offset + mic->len) > boundary)
 		xdr_shrink_pagelen(buf, boundary - offset);
 
-	if (xdr_buf_subsegment(buf, &subbuf, offset, obj->len))
+	if (xdr_buf_subsegment(buf, &subbuf, offset, mic->len))
 		return -EFAULT;
 
-	/* Is the obj contained entirely in the head? */
-	obj->data = subbuf.head[0].iov_base;
-	if (subbuf.head[0].iov_len == obj->len)
+	/* Is the mic contained entirely in the head? */
+	mic->data = subbuf.head[0].iov_base;
+	if (subbuf.head[0].iov_len == mic->len)
 		return 0;
-	/* ..or is the obj contained entirely in the tail? */
-	obj->data = subbuf.tail[0].iov_base;
-	if (subbuf.tail[0].iov_len == obj->len)
+	/* ..or is the mic contained entirely in the tail? */
+	mic->data = subbuf.tail[0].iov_base;
+	if (subbuf.tail[0].iov_len == mic->len)
 		return 0;
 
-	/* Find a contiguous area in @buf to hold all of @obj */
-	if (obj->len > buf->buflen - buf->len)
+	/* Find a contiguous area in @buf to hold all of @mic */
+	if (mic->len > buf->buflen - buf->len)
 		return -ENOMEM;
 	if (buf->tail[0].iov_len != 0)
-		obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+		mic->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
 	else
-		obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
-	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
+		mic->data = buf->head[0].iov_base + buf->head[0].iov_len;
+	__read_bytes_from_xdr_buf(&subbuf, mic->data, mic->len);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(xdr_buf_read_netobj);
+EXPORT_SYMBOL_GPL(xdr_buf_read_mic);
 
 /* Returns 0 on success, or else a negative error code. */
 static int
-- 
cgit v1.2.3


From 406cd91533dcc5e82ef2373c39e6a531d944131e Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 13 Sep 2019 08:29:02 -0400
Subject: NFS: Refactor nfs_instantiate() for dentry referencing callers

Since commit b0c6108ecf64 ("nfs_instantiate(): prevent multiple aliases for
directory inode"), nfs_instantiate() may succeed without actually
instantiating the dentry that was passed in.  That can be problematic for
some callers in NFSv3, so this patch breaks things up so we can get the
actual dentry obtained.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c           | 41 +++++++++++++++++++++++++++--------------
 include/linux/nfs_fs.h |  3 +++
 2 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8d501093660f..d1bbf2fb6ac7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1669,24 +1669,23 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
 #endif /* CONFIG_NFSV4 */
 
-/*
- * Code common to create, mkdir, and mknod.
- */
-int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+struct dentry *
+nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 				struct nfs_fattr *fattr,
 				struct nfs4_label *label)
 {
 	struct dentry *parent = dget_parent(dentry);
 	struct inode *dir = d_inode(parent);
 	struct inode *inode;
-	struct dentry *d;
-	int error = -EACCES;
+	struct dentry *d = NULL;
+	int error;
 
 	d_drop(dentry);
 
 	/* We may have been initialized further down */
 	if (d_really_is_positive(dentry))
 		goto out;
+
 	if (fhandle->size == 0) {
 		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
 		if (error)
@@ -1702,18 +1701,32 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 	}
 	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
 	d = d_splice_alias(inode, dentry);
-	if (IS_ERR(d)) {
-		error = PTR_ERR(d);
-		goto out_error;
-	}
-	dput(d);
 out:
 	dput(parent);
-	return 0;
+	return d;
 out_error:
 	nfs_mark_for_revalidate(dir);
-	dput(parent);
-	return error;
+	d = ERR_PTR(error);
+	goto out;
+}
+EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
+
+/*
+ * Code common to create, mkdir, and mknod.
+ */
+int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
+				struct nfs_fattr *fattr,
+				struct nfs4_label *label)
+{
+	struct dentry *d;
+
+	d = nfs_add_or_obtain(dentry, fhandle, fattr, label);
+	if (IS_ERR(d))
+		return PTR_ERR(d);
+
+	/* Callers don't care */
+	dput(d);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_instantiate);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0a11712a80e3..570a60c2f4f4 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -490,6 +490,9 @@ extern const struct file_operations nfs_dir_operations;
 extern const struct dentry_operations nfs_dentry_operations;
 
 extern void nfs_force_lookup_revalidate(struct inode *dir);
+extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
+			struct nfs_fh *fh, struct nfs_fattr *fattr,
+			struct nfs4_label *label);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
 			struct nfs_fattr *fattr, struct nfs4_label *label);
 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
-- 
cgit v1.2.3


From 77d5bc7e6a6cf8bbeca31aab7f0c5449a5eee762 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 17 Sep 2019 10:39:49 -0700
Subject: ipv4: Revert removal of rt_uses_gateway

Julian noted that rt_uses_gateway has a more subtle use than 'is gateway
set':
    https://lore.kernel.org/netdev/alpine.LFD.2.21.1909151104060.2546@ja.home.ssi.bg/

Revert that part of the commit referenced in the Fixes tag.

Currently, there are no u8 holes in 'struct rtable'. There is a 4-byte hole
in the second cacheline which contains the gateway declaration. So move
rt_gw_family down to the gateway declarations since they are always used
together, and then re-use that u8 for rt_uses_gateway. End result is that
rtable size is unchanged.

Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway")
Reported-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David Ahern <dsahern@gmail.com>
Reviewed-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/infiniband/core/addr.c  |  2 +-
 include/net/route.h             |  3 ++-
 net/ipv4/inet_connection_sock.c |  4 ++--
 net/ipv4/ip_forward.c           |  2 +-
 net/ipv4/ip_output.c            |  2 +-
 net/ipv4/route.c                | 36 +++++++++++++++++++++---------------
 net/ipv4/xfrm4_policy.c         |  1 +
 7 files changed, 29 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 9b76a8fcdd24..bf539c34ccd3 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -352,7 +352,7 @@ static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
 
 	if (family == AF_INET) {
 		rt = container_of(dst, struct rtable, dst);
-		return rt->rt_gw_family == AF_INET;
+		return rt->rt_uses_gateway;
 	}
 
 	rt6 = container_of(dst, struct rt6_info, dst);
diff --git a/include/net/route.h b/include/net/route.h
index dfce19c9fa96..6c516840380d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -53,10 +53,11 @@ struct rtable {
 	unsigned int		rt_flags;
 	__u16			rt_type;
 	__u8			rt_is_input;
-	u8			rt_gw_family;
+	__u8			rt_uses_gateway;
 
 	int			rt_iif;
 
+	u8			rt_gw_family;
 	/* Info on neighbour */
 	union {
 		__be32		rt_gw4;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f5c163d4771b..a9183543ca30 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -560,7 +560,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
 	rcu_read_unlock();
 	return &rt->dst;
@@ -598,7 +598,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
 	rt = ip_route_output_flow(net, fl4, sk);
 	if (IS_ERR(rt))
 		goto no_route;
-	if (opt && opt->opt.is_strictroute && rt->rt_gw_family)
+	if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto route_err;
 	return &rt->dst;
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 06f6f280b9ff..00ec819f949b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -123,7 +123,7 @@ int ip_forward(struct sk_buff *skb)
 
 	rt = skb_rtable(skb);
 
-	if (opt->is_strictroute && rt->rt_gw_family)
+	if (opt->is_strictroute && rt->rt_uses_gateway)
 		goto sr_failed;
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5eb73775c3f7..a77c3a4c24de 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -499,7 +499,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_dst_set_noref(skb, &rt->dst);
 
 packet_routed:
-	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
+	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
 		goto no_route;
 
 	/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b6a6f18c3dd1..7dcce724c78b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -635,6 +635,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
 
 	if (fnhe->fnhe_gw) {
 		rt->rt_flags |= RTCF_REDIRECTED;
+		rt->rt_uses_gateway = 1;
 		rt->rt_gw_family = AF_INET;
 		rt->rt_gw4 = fnhe->fnhe_gw;
 	}
@@ -1313,7 +1314,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 	mtu = READ_ONCE(dst->dev->mtu);
 
 	if (unlikely(ip_mtu_locked(dst))) {
-		if (rt->rt_gw_family && mtu > 576)
+		if (rt->rt_uses_gateway && mtu > 576)
 			mtu = 576;
 	}
 
@@ -1569,6 +1570,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 		struct fib_nh_common *nhc = FIB_RES_NHC(*res);
 
 		if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) {
+			rt->rt_uses_gateway = 1;
 			rt->rt_gw_family = nhc->nhc_gw_family;
 			/* only INET and INET6 are supported */
 			if (likely(nhc->nhc_gw_family == AF_INET))
@@ -1634,6 +1636,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 		rt->rt_iif = 0;
 		rt->rt_pmtu = 0;
 		rt->rt_mtu_locked = 0;
+		rt->rt_uses_gateway = 0;
 		rt->rt_gw_family = 0;
 		rt->rt_gw4 = 0;
 		INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2694,6 +2697,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
 		rt->rt_genid = rt_genid_ipv4(net);
 		rt->rt_flags = ort->rt_flags;
 		rt->rt_type = ort->rt_type;
+		rt->rt_uses_gateway = ort->rt_uses_gateway;
 		rt->rt_gw_family = ort->rt_gw_family;
 		if (rt->rt_gw_family == AF_INET)
 			rt->rt_gw4 = ort->rt_gw4;
@@ -2778,21 +2782,23 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 		if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
 			goto nla_put_failure;
 	}
-	if (rt->rt_gw_family == AF_INET &&
-	    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
-		goto nla_put_failure;
-	} else if (rt->rt_gw_family == AF_INET6) {
-		int alen = sizeof(struct in6_addr);
-		struct nlattr *nla;
-		struct rtvia *via;
-
-		nla = nla_reserve(skb, RTA_VIA, alen + 2);
-		if (!nla)
+	if (rt->rt_uses_gateway) {
+		if (rt->rt_gw_family == AF_INET &&
+		    nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) {
 			goto nla_put_failure;
-
-		via = nla_data(nla);
-		via->rtvia_family = AF_INET6;
-		memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+		} else if (rt->rt_gw_family == AF_INET6) {
+			int alen = sizeof(struct in6_addr);
+			struct nlattr *nla;
+			struct rtvia *via;
+
+			nla = nla_reserve(skb, RTA_VIA, alen + 2);
+			if (!nla)
+				goto nla_put_failure;
+
+			via = nla_data(nla);
+			via->rtvia_family = AF_INET6;
+			memcpy(via->rtvia_addr, &rt->rt_gw6, alen);
+		}
 	}
 
 	expires = rt->dst.expires;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index cdef8f9a3b01..35b84b52b702 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -85,6 +85,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
 					      RTCF_LOCAL);
 	xdst->u.rt.rt_type = rt->rt_type;
+	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
 	xdst->u.rt.rt_gw_family = rt->rt_gw_family;
 	if (rt->rt_gw_family == AF_INET)
 		xdst->u.rt.rt_gw4 = rt->rt_gw4;
-- 
cgit v1.2.3


From 71523d1812aca61e32e742e87ec064e3d8c615e1 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Sat, 24 Aug 2019 17:37:07 +0200
Subject: pwm: Ensure pwm_apply_state() doesn't modify the state argument
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is surprising for a PWM consumer when the variable holding the
requested state is modified by pwm_apply_state(). Consider for example a
driver doing:

        #define PERIOD 5000000
        #define DUTY_LITTLE 10
        ...
        struct pwm_state state = {
                .period = PERIOD,
                .duty_cycle = DUTY_LITTLE,
                .polarity = PWM_POLARITY_NORMAL,
                .enabled = true,
        };

        pwm_apply_state(mypwm, &state);
        ...
        state.duty_cycle = PERIOD / 2;
        pwm_apply_state(mypwm, &state);

For sure the second call to pwm_apply_state() should still have
state.period = PERIOD and not something the hardware driver chose for a
reason that doesn't necessarily apply to the second call.

So declare the state argument as a pointer to a const type and adapt all
drivers' .apply callbacks.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/gpio/gpio-mvebu.c     | 2 +-
 drivers/pwm/core.c            | 6 ++----
 drivers/pwm/pwm-atmel-hlcdc.c | 2 +-
 drivers/pwm/pwm-atmel.c       | 2 +-
 drivers/pwm/pwm-bcm-iproc.c   | 2 +-
 drivers/pwm/pwm-cros-ec.c     | 2 +-
 drivers/pwm/pwm-fsl-ftm.c     | 4 ++--
 drivers/pwm/pwm-hibvt.c       | 2 +-
 drivers/pwm/pwm-imx-tpm.c     | 4 ++--
 drivers/pwm/pwm-imx27.c       | 2 +-
 drivers/pwm/pwm-jz4740.c      | 2 +-
 drivers/pwm/pwm-lpss.c        | 2 +-
 drivers/pwm/pwm-meson.c       | 4 ++--
 drivers/pwm/pwm-rcar.c        | 2 +-
 drivers/pwm/pwm-rockchip.c    | 4 ++--
 drivers/pwm/pwm-sifive.c      | 2 +-
 drivers/pwm/pwm-sprd.c        | 2 +-
 drivers/pwm/pwm-stm32-lp.c    | 2 +-
 drivers/pwm/pwm-stm32.c       | 4 ++--
 drivers/pwm/pwm-sun4i.c       | 4 ++--
 drivers/pwm/pwm-zx.c          | 2 +-
 include/linux/pwm.h           | 4 ++--
 22 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 869d47f89599..6c0687694341 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -694,7 +694,7 @@ static void mvebu_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int mvebu_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	struct mvebu_pwm *mvpwm = to_mvebu_pwm(chip);
 	struct mvebu_gpio_chip *mvchip = mvpwm->mvchip;
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 449ba161877d..6ad51aa60c03 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -448,11 +448,9 @@ EXPORT_SYMBOL_GPL(pwm_free);
 /**
  * pwm_apply_state() - atomically apply a new state to a PWM device
  * @pwm: PWM device
- * @state: new state to apply. This can be adjusted by the PWM driver
- *	   if the requested config is not achievable, for example,
- *	   ->duty_cycle and ->period might be approximated.
+ * @state: new state to apply
  */
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
 {
 	struct pwm_chip *chip;
 	int err;
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index d13a83f430ac..dcbc0489dfd4 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -39,7 +39,7 @@ static inline struct atmel_hlcdc_pwm *to_atmel_hlcdc_pwm(struct pwm_chip *chip)
 }
 
 static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
-				 struct pwm_state *state)
+				 const struct pwm_state *state)
 {
 	struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
 	struct atmel_hlcdc *hlcdc = chip->hlcdc;
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index e5e1eaf372fa..53bc7b9b3581 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -209,7 +209,7 @@ static void atmel_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int atmel_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
 	struct pwm_state cstate;
diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c
index d961a8207b1c..56c38cfae92c 100644
--- a/drivers/pwm/pwm-bcm-iproc.c
+++ b/drivers/pwm/pwm-bcm-iproc.c
@@ -115,7 +115,7 @@ static void iproc_pwmc_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			    struct pwm_state *state)
+			    const struct pwm_state *state)
 {
 	unsigned long prescale = IPROC_PWM_PRESCALE_MIN;
 	struct iproc_pwmc *ip = to_iproc_pwmc(chip);
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index 98f6ac6cf6ab..db5faa79c33f 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -93,7 +93,7 @@ static int cros_ec_pwm_get_duty(struct cros_ec_device *ec, u8 index)
 }
 
 static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			     struct pwm_state *state)
+			     const struct pwm_state *state)
 {
 	struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
 	int duty_cycle;
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 3c9738617ceb..59272a920479 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -227,7 +227,7 @@ static bool fsl_pwm_is_other_pwm_enabled(struct fsl_pwm_chip *fpc,
 
 static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
 				struct pwm_device *pwm,
-				struct pwm_state *newstate)
+				const struct pwm_state *newstate)
 {
 	unsigned int duty;
 	u32 reg_polarity;
@@ -298,7 +298,7 @@ static int fsl_pwm_apply_config(struct fsl_pwm_chip *fpc,
 }
 
 static int fsl_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			 struct pwm_state *newstate)
+			 const struct pwm_state *newstate)
 {
 	struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
 	struct pwm_state *oldstate = &pwm->state;
diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index 753bd58111e4..ad205fdad372 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -149,7 +149,7 @@ static void hibvt_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int hibvt_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-				struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	struct hibvt_pwm_chip *hi_pwm_chip = to_hibvt_pwm_chip(chip);
 
diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c
index e8385c1cf342..9145f6160649 100644
--- a/drivers/pwm/pwm-imx-tpm.c
+++ b/drivers/pwm/pwm-imx-tpm.c
@@ -89,7 +89,7 @@ to_imx_tpm_pwm_chip(struct pwm_chip *chip)
 static int pwm_imx_tpm_round_state(struct pwm_chip *chip,
 				   struct imx_tpm_pwm_param *p,
 				   struct pwm_state *real_state,
-				   struct pwm_state *state)
+				   const struct pwm_state *state)
 {
 	struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
 	u32 rate, prescale, period_count, clock_unit;
@@ -289,7 +289,7 @@ static int pwm_imx_tpm_apply_hw(struct pwm_chip *chip,
 
 static int pwm_imx_tpm_apply(struct pwm_chip *chip,
 			     struct pwm_device *pwm,
-			     struct pwm_state *state)
+			     const struct pwm_state *state)
 {
 	struct imx_tpm_pwm_chip *tpm = to_imx_tpm_pwm_chip(chip);
 	struct imx_tpm_pwm_param param;
diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c
index 91c23cbbc167..ae11d8577f18 100644
--- a/drivers/pwm/pwm-imx27.c
+++ b/drivers/pwm/pwm-imx27.c
@@ -209,7 +209,7 @@ static void pwm_imx27_wait_fifo_slot(struct pwm_chip *chip,
 }
 
 static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	unsigned long period_cycles, duty_cycles, prescale;
 	struct pwm_imx27_chip *imx = to_pwm_imx27_chip(chip);
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index 9d444d012f92..9d78cc21cb12 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -88,7 +88,7 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			    struct pwm_state *state)
+			    const struct pwm_state *state)
 {
 	struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
 	unsigned long long tmp;
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 4098a4601691..75bbfe5f3bc2 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -122,7 +122,7 @@ static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond)
 }
 
 static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			  struct pwm_state *state)
+			  const struct pwm_state *state)
 {
 	struct pwm_lpss_chip *lpwm = to_lpwm(chip);
 	int ret;
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 3cbff5cbb789..6245bbdb6e6c 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -159,7 +159,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
 }
 
 static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm,
-			  struct pwm_state *state)
+			  const struct pwm_state *state)
 {
 	struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
 	unsigned int duty, period, pre_div, cnt, duty_cnt;
@@ -265,7 +265,7 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm)
 }
 
 static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	struct meson_pwm_channel *channel = pwm_get_chip_data(pwm);
 	struct meson_pwm *meson = to_meson_pwm(chip);
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index c8cd43f91efc..852eb2347954 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -158,7 +158,7 @@ static void rcar_pwm_disable(struct rcar_pwm_chip *rp)
 }
 
 static int rcar_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			  struct pwm_state *state)
+			  const struct pwm_state *state)
 {
 	struct rcar_pwm_chip *rp = to_rcar_pwm_chip(chip);
 	struct pwm_state cur_state;
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index 83c7627868d8..73352e6fbccb 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -99,7 +99,7 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip,
 }
 
 static void rockchip_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			       struct pwm_state *state)
+			       const struct pwm_state *state)
 {
 	struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
 	unsigned long period, duty;
@@ -183,7 +183,7 @@ static int rockchip_pwm_enable(struct pwm_chip *chip,
 }
 
 static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			      struct pwm_state *state)
+			      const struct pwm_state *state)
 {
 	struct rockchip_pwm_chip *pc = to_rockchip_pwm_chip(chip);
 	struct pwm_state curstate;
diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
index bb4f02ce4f94..cc63f9baa481 100644
--- a/drivers/pwm/pwm-sifive.c
+++ b/drivers/pwm/pwm-sifive.c
@@ -147,7 +147,7 @@ static int pwm_sifive_enable(struct pwm_chip *chip, bool enable)
 }
 
 static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			    struct pwm_state *state)
+			    const struct pwm_state *state)
 {
 	struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip);
 	struct pwm_state cur_state;
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
index 68c2d9f0411b..be2394227423 100644
--- a/drivers/pwm/pwm-sprd.c
+++ b/drivers/pwm/pwm-sprd.c
@@ -156,7 +156,7 @@ static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm,
 }
 
 static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			  struct pwm_state *state)
+			  const struct pwm_state *state)
 {
 	struct sprd_pwm_chip *spc =
 		container_of(chip, struct sprd_pwm_chip, chip);
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index 2211a642066d..21cb260dc2c0 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -32,7 +32,7 @@ static inline struct stm32_pwm_lp *to_stm32_pwm_lp(struct pwm_chip *chip)
 #define STM32_LPTIM_MAX_PRESCALER	128
 
 static int stm32_pwm_lp_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			      struct pwm_state *state)
+			      const struct pwm_state *state)
 {
 	struct stm32_pwm_lp *priv = to_stm32_pwm_lp(chip);
 	unsigned long long prd, div, dty;
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index 740e2dec8313..359b08596d9e 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -440,7 +440,7 @@ static void stm32_pwm_disable(struct stm32_pwm *priv, int ch)
 }
 
 static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	bool enabled;
 	struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
@@ -468,7 +468,7 @@ static int stm32_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
-				  struct pwm_state *state)
+				  const struct pwm_state *state)
 {
 	struct stm32_pwm *priv = to_stm32_pwm_dev(chip);
 	int ret;
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index 39007a7c0d83..6f5840a1a82d 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -145,7 +145,7 @@ static void sun4i_pwm_get_state(struct pwm_chip *chip,
 }
 
 static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
-			       struct pwm_state *state,
+			       const struct pwm_state *state,
 			       u32 *dty, u32 *prd, unsigned int *prsclr)
 {
 	u64 clk_rate, div = 0;
@@ -196,7 +196,7 @@ static int sun4i_pwm_calculate(struct sun4i_pwm_chip *sun4i_pwm,
 }
 
 static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			   struct pwm_state *state)
+			   const struct pwm_state *state)
 {
 	struct sun4i_pwm_chip *sun4i_pwm = to_sun4i_pwm_chip(chip);
 	struct pwm_state cstate;
diff --git a/drivers/pwm/pwm-zx.c b/drivers/pwm/pwm-zx.c
index e24f4be35316..e2c21cc34a96 100644
--- a/drivers/pwm/pwm-zx.c
+++ b/drivers/pwm/pwm-zx.c
@@ -148,7 +148,7 @@ static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 }
 
 static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-			struct pwm_state *state)
+			const struct pwm_state *state)
 {
 	struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
 	struct pwm_state cstate;
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 24632a7a7d11..b2c9c460947d 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -262,7 +262,7 @@ struct pwm_ops {
 	int (*capture)(struct pwm_chip *chip, struct pwm_device *pwm,
 		       struct pwm_capture *result, unsigned long timeout);
 	int (*apply)(struct pwm_chip *chip, struct pwm_device *pwm,
-		     struct pwm_state *state);
+		     const struct pwm_state *state);
 	void (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
 			  struct pwm_state *state);
 	struct module *owner;
@@ -316,7 +316,7 @@ struct pwm_capture {
 /* PWM user APIs */
 struct pwm_device *pwm_request(int pwm_id, const char *label);
 void pwm_free(struct pwm_device *pwm);
-int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state);
+int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state);
 int pwm_adjust_config(struct pwm_device *pwm);
 
 /**
-- 
cgit v1.2.3


From 0d8006ddbe89cbaedef06a8789ddefa1164a3a77 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 4 Sep 2019 22:26:00 +1000
Subject: PCI: Add pci_irq_vector() and other stubs when !CONFIG_PCI

Add stub functions pci_alloc_irq_vectors_affinity() and pci_irq_vector()
when CONFIG_PCI is off so drivers can use them without resorting to ifdefs.

Also move the PCI_IRQ_* macros outside of the ifdefs so they are always
available.

Fixes: 625f269a5a7a ("crypto: inside-secure - add support for PCI based FPGA development board")
Link: https://lore.kernel.org/r/20190904122600.GA28660@gondor.apana.org.au
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 39 ++++++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index f5bab312995d..23e4a6bcd08d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -925,6 +925,11 @@ enum {
 	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* Scan all, not just dev 0 */
 };
 
+#define PCI_IRQ_LEGACY		(1 << 0) /* Allow legacy interrupts */
+#define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
+#define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
+
 /* These external functions are only available when PCI support is enabled */
 #ifdef CONFIG_PCI
 
@@ -1408,11 +1413,6 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_LEGACY		(1 << 0) /* Allow legacy interrupts */
-#define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
-#define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
-#define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
-
 /*
  * Virtual interrupts allow for more interrupts to be allocated
  * than the device has interrupts for. These are not programmed
@@ -1517,14 +1517,6 @@ static inline int pci_irq_get_node(struct pci_dev *pdev, int vec)
 }
 #endif
 
-static inline int
-pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
-		      unsigned int max_vecs, unsigned int flags)
-{
-	return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
-					      NULL);
-}
-
 /**
  * pci_irqd_intx_xlate() - Translate PCI INTx value to an IRQ domain hwirq
  * @d: the INTx IRQ domain
@@ -1780,8 +1772,29 @@ static inline const struct pci_device_id *pci_match_id(const struct pci_device_i
 							 struct pci_dev *dev)
 { return NULL; }
 static inline bool pci_ats_disabled(void) { return true; }
+
+static inline int pci_irq_vector(struct pci_dev *dev, unsigned int nr)
+{
+	return -EINVAL;
+}
+
+static inline int
+pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
+			       unsigned int max_vecs, unsigned int flags,
+			       struct irq_affinity *aff_desc)
+{
+	return -ENOSPC;
+}
 #endif /* CONFIG_PCI */
 
+static inline int
+pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
+		      unsigned int max_vecs, unsigned int flags)
+{
+	return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags,
+					      NULL);
+}
+
 #ifdef CONFIG_PCI_ATS
 /* Address Translation Service */
 void pci_ats_init(struct pci_dev *dev);
-- 
cgit v1.2.3


From b0e1ee435aba68c4080e3cb67adf6573aa5bcc6d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Sep 2019 22:21:24 +0200
Subject: net: remove netx ethernet driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ARM netx platform got removed in 5.3, so this driver
is now useless.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/Kconfig           |  11 -
 drivers/net/ethernet/Makefile          |   1 -
 drivers/net/ethernet/netx-eth.c        | 497 ---------------------------------
 include/linux/platform_data/eth-netx.h |  13 -
 4 files changed, 522 deletions(-)
 delete mode 100644 drivers/net/ethernet/netx-eth.c
 delete mode 100644 include/linux/platform_data/eth-netx.h

(limited to 'include')

diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 1e2de9d062bf..e8e9c166185d 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -140,17 +140,6 @@ source "drivers/net/ethernet/neterion/Kconfig"
 source "drivers/net/ethernet/netronome/Kconfig"
 source "drivers/net/ethernet/ni/Kconfig"
 source "drivers/net/ethernet/8390/Kconfig"
-
-config NET_NETX
-	tristate "NetX Ethernet support"
-	select MII
-	depends on ARCH_NETX
-	---help---
-	  This is support for the Hilscher netX builtin Ethernet ports
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called netx-eth.
-
 source "drivers/net/ethernet/nvidia/Kconfig"
 source "drivers/net/ethernet/nxp/Kconfig"
 source "drivers/net/ethernet/oki-semi/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 77f9838a76c9..05abebc17804 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -64,7 +64,6 @@ obj-$(CONFIG_NET_VENDOR_NATSEMI) += natsemi/
 obj-$(CONFIG_NET_VENDOR_NETERION) += neterion/
 obj-$(CONFIG_NET_VENDOR_NETRONOME) += netronome/
 obj-$(CONFIG_NET_VENDOR_NI) += ni/
-obj-$(CONFIG_NET_NETX) += netx-eth.o
 obj-$(CONFIG_NET_VENDOR_NVIDIA) += nvidia/
 obj-$(CONFIG_LPC_ENET) += nxp/
 obj-$(CONFIG_NET_VENDOR_OKI) += oki-semi/
diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
deleted file mode 100644
index cf6e7eb1b1e1..000000000000
--- a/drivers/net/ethernet/netx-eth.c
+++ /dev/null
@@ -1,497 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * drivers/net/ethernet/netx-eth.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-
-#include <linux/netdevice.h>
-#include <linux/platform_device.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/mii.h>
-
-#include <asm/io.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-#include <mach/pfifo.h>
-#include <mach/xc.h>
-#include <linux/platform_data/eth-netx.h>
-
-/* XC Fifo Offsets */
-#define EMPTY_PTR_FIFO(xcno)    (0 + ((xcno) << 3))	/* Index of the empty pointer FIFO */
-#define IND_FIFO_PORT_HI(xcno)  (1 + ((xcno) << 3))	/* Index of the FIFO where received */
-							/* Data packages are indicated by XC */
-#define IND_FIFO_PORT_LO(xcno)  (2 + ((xcno) << 3))	/* Index of the FIFO where received */
-							/* Data packages are indicated by XC */
-#define REQ_FIFO_PORT_HI(xcno)  (3 + ((xcno) << 3))	/* Index of the FIFO where Data packages */
-							/* have to be indicated by ARM which */
-							/* shall be sent */
-#define REQ_FIFO_PORT_LO(xcno)  (4 + ((xcno) << 3))	/* Index of the FIFO where Data packages */
-							/* have to be indicated by ARM which shall */
-							/* be sent */
-#define CON_FIFO_PORT_HI(xcno)  (5 + ((xcno) << 3))	/* Index of the FIFO where sent Data packages */
-							/* are confirmed */
-#define CON_FIFO_PORT_LO(xcno)  (6 + ((xcno) << 3))	/* Index of the FIFO where sent Data */
-							/* packages are confirmed */
-#define PFIFO_MASK(xcno)        (0x7f << (xcno*8))
-
-#define FIFO_PTR_FRAMELEN_SHIFT 0
-#define FIFO_PTR_FRAMELEN_MASK  (0x7ff << 0)
-#define FIFO_PTR_FRAMELEN(len)  (((len) << 0) & FIFO_PTR_FRAMELEN_MASK)
-#define FIFO_PTR_TIMETRIG       (1<<11)
-#define FIFO_PTR_MULTI_REQ
-#define FIFO_PTR_ORIGIN         (1<<14)
-#define FIFO_PTR_VLAN           (1<<15)
-#define FIFO_PTR_FRAMENO_SHIFT  16
-#define FIFO_PTR_FRAMENO_MASK   (0x3f << 16)
-#define FIFO_PTR_FRAMENO(no)    (((no) << 16) & FIFO_PTR_FRAMENO_MASK)
-#define FIFO_PTR_SEGMENT_SHIFT  22
-#define FIFO_PTR_SEGMENT_MASK   (0xf << 22)
-#define FIFO_PTR_SEGMENT(seg)   (((seg) & 0xf) << 22)
-#define FIFO_PTR_ERROR_SHIFT    28
-#define FIFO_PTR_ERROR_MASK     (0xf << 28)
-
-#define ISR_LINK_STATUS_CHANGE (1<<4)
-#define ISR_IND_LO             (1<<3)
-#define ISR_CON_LO             (1<<2)
-#define ISR_IND_HI             (1<<1)
-#define ISR_CON_HI             (1<<0)
-
-#define ETH_MAC_LOCAL_CONFIG 0x1560
-#define ETH_MAC_4321         0x1564
-#define ETH_MAC_65           0x1568
-
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT 16
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK (0xf<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT)
-#define MAC_TRAFFIC_CLASS_ARRANGEMENT(x) (((x)<<MAC_TRAFFIC_CLASS_ARRANGEMENT_SHIFT) & MAC_TRAFFIC_CLASS_ARRANGEMENT_MASK)
-#define LOCAL_CONFIG_LINK_STATUS_IRQ_EN (1<<24)
-#define LOCAL_CONFIG_CON_LO_IRQ_EN (1<<23)
-#define LOCAL_CONFIG_CON_HI_IRQ_EN (1<<22)
-#define LOCAL_CONFIG_IND_LO_IRQ_EN (1<<21)
-#define LOCAL_CONFIG_IND_HI_IRQ_EN (1<<20)
-
-#define CARDNAME "netx-eth"
-
-/* LSB must be zero */
-#define INTERNAL_PHY_ADR 0x1c
-
-struct netx_eth_priv {
-	void                    __iomem *sram_base, *xpec_base, *xmac_base;
-	int                     id;
-	struct mii_if_info      mii;
-	u32                     msg_enable;
-	struct xc               *xc;
-	spinlock_t              lock;
-};
-
-static void netx_eth_set_multicast_list(struct net_device *ndev)
-{
-	/* implement me */
-}
-
-static int
-netx_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned char *buf = skb->data;
-	unsigned int len = skb->len;
-
-	spin_lock_irq(&priv->lock);
-	memcpy_toio(priv->sram_base + 1560, (void *)buf, len);
-	if (len < 60) {
-		memset_io(priv->sram_base + 1560 + len, 0, 60 - len);
-		len = 60;
-	}
-
-	pfifo_push(REQ_FIFO_PORT_LO(priv->id),
-	           FIFO_PTR_SEGMENT(priv->id) |
-	           FIFO_PTR_FRAMENO(1) |
-	           FIFO_PTR_FRAMELEN(len));
-
-	ndev->stats.tx_packets++;
-	ndev->stats.tx_bytes += skb->len;
-
-	netif_stop_queue(ndev);
-	spin_unlock_irq(&priv->lock);
-	dev_kfree_skb(skb);
-
-	return NETDEV_TX_OK;
-}
-
-static void netx_eth_receive(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned int val, frameno, seg, len;
-	unsigned char *data;
-	struct sk_buff *skb;
-
-	val = pfifo_pop(IND_FIFO_PORT_LO(priv->id));
-
-	frameno = (val & FIFO_PTR_FRAMENO_MASK) >> FIFO_PTR_FRAMENO_SHIFT;
-	seg = (val & FIFO_PTR_SEGMENT_MASK) >> FIFO_PTR_SEGMENT_SHIFT;
-	len = (val & FIFO_PTR_FRAMELEN_MASK) >> FIFO_PTR_FRAMELEN_SHIFT;
-
-	skb = netdev_alloc_skb(ndev, len);
-	if (unlikely(skb == NULL)) {
-		ndev->stats.rx_dropped++;
-		return;
-	}
-
-	data = skb_put(skb, len);
-
-	memcpy_fromio(data, priv->sram_base + frameno * 1560, len);
-
-	pfifo_push(EMPTY_PTR_FIFO(priv->id),
-		FIFO_PTR_SEGMENT(seg) | FIFO_PTR_FRAMENO(frameno));
-
-	skb->protocol = eth_type_trans(skb, ndev);
-	netif_rx(skb);
-	ndev->stats.rx_packets++;
-	ndev->stats.rx_bytes += len;
-}
-
-static irqreturn_t
-netx_eth_interrupt(int irq, void *dev_id)
-{
-	struct net_device *ndev = dev_id;
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	int status;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-
-	status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-	while (status) {
-		int fill_level;
-		writel(status, NETX_PFIFO_XPEC_ISR(priv->id));
-
-		if ((status & ISR_CON_HI) || (status & ISR_IND_HI))
-			printk("%s: unexpected status: 0x%08x\n",
-			    __func__, status);
-
-		fill_level =
-		    readl(NETX_PFIFO_FILL_LEVEL(IND_FIFO_PORT_LO(priv->id)));
-		while (fill_level--)
-			netx_eth_receive(ndev);
-
-		if (status & ISR_CON_LO)
-			netif_wake_queue(ndev);
-
-		if (status & ISR_LINK_STATUS_CHANGE)
-			mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-
-		status = readl(NETX_PFIFO_XPEC_ISR(priv->id));
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-	return IRQ_HANDLED;
-}
-
-static int netx_eth_open(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	if (request_irq
-	    (ndev->irq, netx_eth_interrupt, IRQF_SHARED, ndev->name, ndev))
-		return -EAGAIN;
-
-	writel(ndev->dev_addr[0] |
-	       ndev->dev_addr[1]<<8 |
-	       ndev->dev_addr[2]<<16 |
-	       ndev->dev_addr[3]<<24,
-	       priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-	writel(ndev->dev_addr[4] |
-	       ndev->dev_addr[5]<<8,
-	       priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-	writel(LOCAL_CONFIG_LINK_STATUS_IRQ_EN |
-		LOCAL_CONFIG_CON_LO_IRQ_EN |
-		LOCAL_CONFIG_CON_HI_IRQ_EN |
-		LOCAL_CONFIG_IND_LO_IRQ_EN |
-		LOCAL_CONFIG_IND_HI_IRQ_EN,
-		priv->xpec_base + NETX_XPEC_RAM_START_OFS +
-		ETH_MAC_LOCAL_CONFIG);
-
-	mii_check_media(&priv->mii, netif_msg_link(priv), 1);
-	netif_start_queue(ndev);
-
-	return 0;
-}
-
-static int netx_eth_close(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	netif_stop_queue(ndev);
-
-	writel(0,
-	    priv->xpec_base + NETX_XPEC_RAM_START_OFS + ETH_MAC_LOCAL_CONFIG);
-
-	free_irq(ndev->irq, ndev);
-
-	return 0;
-}
-
-static void netx_eth_timeout(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	int i;
-
-	printk(KERN_ERR "%s: transmit timed out, resetting\n", ndev->name);
-
-	spin_lock_irq(&priv->lock);
-
-	xc_reset(priv->xc);
-	xc_start(priv->xc);
-
-	for (i=2; i<=18; i++)
-		pfifo_push(EMPTY_PTR_FIFO(priv->id),
-			FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-	spin_unlock_irq(&priv->lock);
-
-	netif_wake_queue(ndev);
-}
-
-static int
-netx_eth_phy_read(struct net_device *ndev, int phy_id, int reg)
-{
-	unsigned int val;
-
-	val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-	      MIIMU_REGADDR(reg) | MIIMU_PHY_NRES;
-
-	writel(val, NETX_MIIMU);
-	while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-
-	return readl(NETX_MIIMU) >> 16;
-
-}
-
-static void
-netx_eth_phy_write(struct net_device *ndev, int phy_id, int reg, int value)
-{
-	unsigned int val;
-
-	val = MIIMU_SNRDY | MIIMU_PREAMBLE | MIIMU_PHYADDR(phy_id) |
-	      MIIMU_REGADDR(reg) | MIIMU_PHY_NRES | MIIMU_OPMODE_WRITE |
-	      MIIMU_DATA(value);
-
-	writel(val, NETX_MIIMU);
-	while (readl(NETX_MIIMU) & MIIMU_SNRDY);
-}
-
-static const struct net_device_ops netx_eth_netdev_ops = {
-	.ndo_open		= netx_eth_open,
-	.ndo_stop		= netx_eth_close,
-	.ndo_start_xmit		= netx_eth_hard_start_xmit,
-	.ndo_tx_timeout		= netx_eth_timeout,
-	.ndo_set_rx_mode	= netx_eth_set_multicast_list,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= eth_mac_addr,
-};
-
-static int netx_eth_enable(struct net_device *ndev)
-{
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-	unsigned int mac4321, mac65;
-	int running, i, ret;
-	bool inv_mac_addr = false;
-
-	ndev->netdev_ops = &netx_eth_netdev_ops;
-	ndev->watchdog_timeo = msecs_to_jiffies(5000);
-
-	priv->msg_enable       = NETIF_MSG_LINK;
-	priv->mii.phy_id_mask  = 0x1f;
-	priv->mii.reg_num_mask = 0x1f;
-	priv->mii.force_media  = 0;
-	priv->mii.full_duplex  = 0;
-	priv->mii.dev	     = ndev;
-	priv->mii.mdio_read    = netx_eth_phy_read;
-	priv->mii.mdio_write   = netx_eth_phy_write;
-	priv->mii.phy_id = INTERNAL_PHY_ADR + priv->id;
-
-	running = xc_running(priv->xc);
-	xc_stop(priv->xc);
-
-	/* if the xc engine is already running, assume the bootloader has
-	 * loaded the firmware for us
-	 */
-	if (running) {
-		/* get Node Address from hardware */
-		mac4321 = readl(priv->xpec_base +
-			NETX_XPEC_RAM_START_OFS + ETH_MAC_4321);
-		mac65 = readl(priv->xpec_base +
-			NETX_XPEC_RAM_START_OFS + ETH_MAC_65);
-
-		ndev->dev_addr[0] = mac4321 & 0xff;
-		ndev->dev_addr[1] = (mac4321 >> 8) & 0xff;
-		ndev->dev_addr[2] = (mac4321 >> 16) & 0xff;
-		ndev->dev_addr[3] = (mac4321 >> 24) & 0xff;
-		ndev->dev_addr[4] = mac65 & 0xff;
-		ndev->dev_addr[5] = (mac65 >> 8) & 0xff;
-	} else {
-		if (xc_request_firmware(priv->xc)) {
-			printk(CARDNAME ": requesting firmware failed\n");
-			return -ENODEV;
-		}
-	}
-
-	xc_reset(priv->xc);
-	xc_start(priv->xc);
-
-	if (!is_valid_ether_addr(ndev->dev_addr))
-		inv_mac_addr = true;
-
-	for (i=2; i<=18; i++)
-		pfifo_push(EMPTY_PTR_FIFO(priv->id),
-			FIFO_PTR_FRAMENO(i) | FIFO_PTR_SEGMENT(priv->id));
-
-	ret = register_netdev(ndev);
-	if (inv_mac_addr)
-		printk("%s: Invalid ethernet MAC address. Please set using ip\n",
-		       ndev->name);
-
-	return ret;
-}
-
-static int netx_eth_drv_probe(struct platform_device *pdev)
-{
-	struct netx_eth_priv *priv;
-	struct net_device *ndev;
-	struct netxeth_platform_data *pdata;
-	int ret;
-
-	ndev = alloc_etherdev(sizeof (struct netx_eth_priv));
-	if (!ndev) {
-		ret = -ENOMEM;
-		goto exit;
-	}
-	SET_NETDEV_DEV(ndev, &pdev->dev);
-
-	platform_set_drvdata(pdev, ndev);
-
-	priv = netdev_priv(ndev);
-
-	pdata = dev_get_platdata(&pdev->dev);
-	priv->xc = request_xc(pdata->xcno, &pdev->dev);
-	if (!priv->xc) {
-		dev_err(&pdev->dev, "unable to request xc engine\n");
-		ret = -ENODEV;
-		goto exit_free_netdev;
-	}
-
-	ndev->irq = priv->xc->irq;
-	priv->id = pdev->id;
-	priv->xpec_base = priv->xc->xpec_base;
-	priv->xmac_base = priv->xc->xmac_base;
-	priv->sram_base = priv->xc->sram_base;
-
-	spin_lock_init(&priv->lock);
-
-	ret = pfifo_request(PFIFO_MASK(priv->id));
-	if (ret) {
-		printk("unable to request PFIFO\n");
-		goto exit_free_xc;
-	}
-
-	ret = netx_eth_enable(ndev);
-	if (ret)
-		goto exit_free_pfifo;
-
-	return 0;
-exit_free_pfifo:
-	pfifo_free(PFIFO_MASK(priv->id));
-exit_free_xc:
-	free_xc(priv->xc);
-exit_free_netdev:
-	free_netdev(ndev);
-exit:
-	return ret;
-}
-
-static int netx_eth_drv_remove(struct platform_device *pdev)
-{
-	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct netx_eth_priv *priv = netdev_priv(ndev);
-
-	unregister_netdev(ndev);
-	xc_stop(priv->xc);
-	free_xc(priv->xc);
-	free_netdev(ndev);
-	pfifo_free(PFIFO_MASK(priv->id));
-
-	return 0;
-}
-
-static int netx_eth_drv_suspend(struct platform_device *pdev, pm_message_t state)
-{
-	dev_err(&pdev->dev, "suspend not implemented\n");
-	return 0;
-}
-
-static int netx_eth_drv_resume(struct platform_device *pdev)
-{
-	dev_err(&pdev->dev, "resume not implemented\n");
-	return 0;
-}
-
-static struct platform_driver netx_eth_driver = {
-	.probe		= netx_eth_drv_probe,
-	.remove		= netx_eth_drv_remove,
-	.suspend	= netx_eth_drv_suspend,
-	.resume		= netx_eth_drv_resume,
-	.driver		= {
-		.name	= CARDNAME,
-	},
-};
-
-static int __init netx_eth_init(void)
-{
-	unsigned int phy_control, val;
-
-	printk("NetX Ethernet driver\n");
-
-	phy_control = PHY_CONTROL_PHY_ADDRESS(INTERNAL_PHY_ADR>>1) |
-		      PHY_CONTROL_PHY1_MODE(PHY_MODE_ALL) |
-		      PHY_CONTROL_PHY1_AUTOMDIX |
-		      PHY_CONTROL_PHY1_EN |
-		      PHY_CONTROL_PHY0_MODE(PHY_MODE_ALL) |
-		      PHY_CONTROL_PHY0_AUTOMDIX |
-		      PHY_CONTROL_PHY0_EN |
-		      PHY_CONTROL_CLK_XLATIN;
-
-	val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-	writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-	writel(phy_control | PHY_CONTROL_RESET, NETX_SYSTEM_PHY_CONTROL);
-	udelay(100);
-
-	val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
-	writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
-	writel(phy_control, NETX_SYSTEM_PHY_CONTROL);
-
-	return platform_driver_register(&netx_eth_driver);
-}
-
-static void __exit netx_eth_cleanup(void)
-{
-	platform_driver_unregister(&netx_eth_driver);
-}
-
-module_init(netx_eth_init);
-module_exit(netx_eth_cleanup);
-
-MODULE_AUTHOR("Sascha Hauer, Pengutronix");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" CARDNAME);
-MODULE_FIRMWARE("xc0.bin");
-MODULE_FIRMWARE("xc1.bin");
-MODULE_FIRMWARE("xc2.bin");
diff --git a/include/linux/platform_data/eth-netx.h b/include/linux/platform_data/eth-netx.h
deleted file mode 100644
index a3a6322668d8..000000000000
--- a/include/linux/platform_data/eth-netx.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#ifndef __ETH_NETX_H
-#define __ETH_NETX_H
-
-struct netxeth_platform_data {
-	unsigned int xcno;	/* number of xmac/xpec engine this eth uses */
-};
-
-#endif
-- 
cgit v1.2.3


From 3e4d890a26d5411d0b64e5e8ecfdcdb435c1d3f8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 22 Sep 2019 11:15:14 -0700
Subject: modules: make MODULE_IMPORT_NS() work even when modular builds are
 disabled

It's an unusual configuration, and was apparently never tested, and not
caught in linux-next because of a combination of travels and it making
it into the tree too late.

The fix is to simply move the #define to outside the CONFIG_MODULE
section, since MODULE_INFO() will do the right thing.

Cc: Martijn Coenen <maco@android.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Matthias Maennich <maennich@google.com>
Cc: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/module.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index b3611e749f72..b1a67352d2dc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -276,12 +276,12 @@ extern typeof(name) __mod_##type##__##name##_device_table		\
  * files require multiple MODULE_FIRMWARE() specifiers */
 #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware)
 
+#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
+
 struct notifier_block;
 
 #ifdef CONFIG_MODULES
 
-#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns)
-
 extern int modules_disabled; /* for sysctl */
 /* Get/put a kernel symbol (calls must be symmetric) */
 void *__symbol_get(const char *symbol);
-- 
cgit v1.2.3


From d32d7c52e08a25d8d4b9f1a7b56400f35b8f72fa Mon Sep 17 00:00:00 2001
From: Yevgeny Kliteynik <kliteyn@mellanox.com>
Date: Sun, 1 Sep 2019 16:28:28 +0300
Subject: net/mlx5: DR, Fix SW steering HW bits and definitions

Fix wrong reserved bits offsets.

Fixes: 97b5484ed608 ("net/mlx5: Add HW bits and definitions required for SW steering")
Signed-off-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a487b681b516..138c50d5a353 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -282,7 +282,6 @@ enum {
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
-	MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
 	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
 	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
@@ -296,6 +295,7 @@ enum {
 	MLX5_CMD_OP_DESTROY_UCTX                  = 0xa06,
 	MLX5_CMD_OP_CREATE_UMEM                   = 0xa08,
 	MLX5_CMD_OP_DESTROY_UMEM                  = 0xa0a,
+	MLX5_CMD_OP_SYNC_STEERING                 = 0xb00,
 	MLX5_CMD_OP_MAX
 };
 
@@ -487,7 +487,7 @@ union mlx5_ifc_gre_key_bits {
 
 struct mlx5_ifc_fte_match_set_misc_bits {
 	u8         gre_c_present[0x1];
-	u8         reserved_auto1[0x1];
+	u8         reserved_at_1[0x1];
 	u8         gre_k_present[0x1];
 	u8         gre_s_present[0x1];
 	u8         source_vhca_port[0x4];
@@ -5054,50 +5054,50 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 
 struct mlx5_ifc_other_hca_cap_bits {
 	u8         roce[0x1];
-	u8         reserved_0[0x27f];
+	u8         reserved_at_1[0x27f];
 };
 
 struct mlx5_ifc_query_other_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 
 	struct     mlx5_ifc_other_hca_cap_bits other_capability;
 };
 
 struct mlx5_ifc_query_other_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 
-	u8         reserved_3[0x20];
+	u8         reserved_at_60[0x20];
 };
 
 struct mlx5_ifc_modify_other_hca_cap_out_bits {
 	u8         status[0x8];
-	u8         reserved_0[0x18];
+	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         reserved_1[0x40];
+	u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_modify_other_hca_cap_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_0[0x10];
+	u8         reserved_at_10[0x10];
 
-	u8         reserved_1[0x10];
+	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_2[0x10];
+	u8         reserved_at_40[0x10];
 	u8         function_id[0x10];
 	u8         field_select[0x20];
 
-- 
cgit v1.2.3


From 344c6c804703841d2bff4d68d7390ba726053874 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Thu, 22 Aug 2019 22:30:20 +0800
Subject: KVM/Hyper-V: Add new KVM capability KVM_CAP_HYPERV_DIRECT_TLBFLUSH

Hyper-V direct tlb flush function should be enabled for
guest that only uses Hyper-V hypercall. User space
hypervisor(e.g, Qemu) can disable KVM identification in
CPUID and just exposes Hyper-V identification to make
sure the precondition. Add new KVM capability KVM_CAP_
HYPERV_DIRECT_TLBFLUSH for user space to enable Hyper-V
direct tlb function and this function is default to be
disabled in KVM.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.txt  | 13 +++++++++++++
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              |  8 ++++++++
 include/uapi/linux/kvm.h        |  1 +
 4 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 136f1eef3712..4833904d32a5 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -5309,3 +5309,16 @@ Architectures: x86
 This capability indicates that KVM supports paravirtualized Hyper-V IPI send
 hypercalls:
 HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
+8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
+
+Architecture: x86
+
+This capability indicates that KVM running on top of Hyper-V hypervisor
+enables Direct TLB flush for its guests meaning that TLB flush
+hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
+Due to the different ABI for hypercall parameters between Hyper-V and
+KVM, enabling this capability effectively disables all hypercall
+handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
+flush hypercalls by Hyper-V) so userspace should disable KVM identification
+in CPUID and only exposes Hyper-V identification. In this case, guest
+thinks it's running on Hyper-V and only use Hyper-V hypercalls.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a3a3ec73fa2f..4765ae0fc506 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1213,6 +1213,7 @@ struct kvm_x86_ops {
 	bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
 
 	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
+	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_arch_async_pf {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bb93771e4170..5becc6753280 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3282,6 +3282,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = kvm_x86_ops->get_nested_state ?
 			kvm_x86_ops->get_nested_state(NULL, NULL, 0) : 0;
 		break;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		r = kvm_x86_ops->enable_direct_tlbflush ? 1 : 0;
+		break;
 	default:
 		break;
 	}
@@ -4055,6 +4058,11 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				r = -EFAULT;
 		}
 		return r;
+	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
+		if (!kvm_x86_ops->enable_direct_tlbflush)
+			return -ENOTTY;
+
+		return kvm_x86_ops->enable_direct_tlbflush(vcpu);
 
 	default:
 		return -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 233efbb1c81c..c73aead0c0a8 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -999,6 +999,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
 #define KVM_CAP_PMU_EVENT_FILTER 173
 #define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v1.2.3


From e1572f1d08be57a5412a464cff0712a23cd0b73e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Mon, 16 Sep 2019 18:22:56 +0200
Subject: cpu/SMT: create and export cpu_smt_possible()

KVM needs to know if SMT is theoretically possible, this means it is
supported and not forcefully disabled ('nosmt=force'). Create and
export cpu_smt_possible() answering this question.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/cpu.h |  2 ++
 kernel/cpu.c        | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 88dc0c653925..d0633ebdaa9c 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -201,12 +201,14 @@ enum cpuhp_smt_control {
 extern enum cpuhp_smt_control cpu_smt_control;
 extern void cpu_smt_disable(bool force);
 extern void cpu_smt_check_topology(void);
+extern bool cpu_smt_possible(void);
 extern int cpuhp_smt_enable(void);
 extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
 #else
 # define cpu_smt_control		(CPU_SMT_NOT_IMPLEMENTED)
 static inline void cpu_smt_disable(bool force) { }
 static inline void cpu_smt_check_topology(void) { }
+static inline bool cpu_smt_possible(void) { return false; }
 static inline int cpuhp_smt_enable(void) { return 0; }
 static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index e1967e9eddc2..fc28e17940e0 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -392,8 +392,7 @@ enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
 
 void __init cpu_smt_disable(bool force)
 {
-	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
-		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
+	if (!cpu_smt_possible())
 		return;
 
 	if (force) {
@@ -438,6 +437,14 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
 	 */
 	return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
+
+/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+bool cpu_smt_possible(void)
+{
+	return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
+		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
+}
+EXPORT_SYMBOL_GPL(cpu_smt_possible);
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
 #endif
-- 
cgit v1.2.3


From f537669978a7abae29c1d3b489f300e4d8f47005 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Thu, 5 Sep 2019 21:16:03 +0530
Subject: libnvdimm/dax: Pick the right alignment default when creating dax
 devices

Allow arch to provide the supported alignments and use hugepage alignment only
if we support hugepage. Right now we depend on compile time configs whereas this
patch switch this to runtime discovery.

Architectures like ppc64 can have THP enabled in code, but then can have
hugepage size disabled by the hypervisor. This allows us to create dax devices
with PAGE_SIZE alignment in this case.

Existing dax namespace with alignment larger than PAGE_SIZE will fail to
initialize in this specific case. We still allow fsdax namespace initialization.

With respect to identifying whether to enable hugepage fault for a dax device,
if THP is enabled during compile, we default to taking hugepage fault and in dax
fault handler if we find the fault size > alignment we retry with PAGE_SIZE
fault size.

This also addresses the below failure scenario on ppc64

ndctl create-namespace --mode=devdax  | grep align
 "align":16777216,
 "align":16777216

cat /sys/devices/ndbus0/region0/dax0.0/supported_alignments
 65536 16777216

daxio.static-debug  -z -o /dev/dax0.0
  Bus error (core dumped)

  $ dmesg | tail
   lpar: Failed hash pte insert with error -4
   hash-mmu: mm: Hashing failure ! EA=0x7fff17000000 access=0x8000000000000006 current=daxio
   hash-mmu:     trap=0x300 vsid=0x22cb7a3 ssize=1 base psize=2 psize 10 pte=0xc000000501002b86
   daxio[3860]: bus error (7) at 7fff17000000 nip 7fff973c007c lr 7fff973bff34 code 2 in libpmem.so.1.0.0[7fff973b0000+20000]
   daxio[3860]: code: 792945e4 7d494b78 e95f0098 7d494b78 f93f00a0 4800012c e93f0088 f93f0120
   daxio[3860]: code: e93f00a0 f93f0128 e93f0120 e95f0128 <f9490000> e93f0088 39290008 f93f0110

The failure was due to guest kernel using wrong page size.

The namespaces created with 16M alignment will appear as below on a config with
16M page size disabled.

$ ndctl list -Ni
[
  {
    "dev":"namespace0.1",
    "mode":"fsdax",
    "map":"dev",
    "size":5351931904,
    "uuid":"fc6e9667-461a-4718-82b4-69b24570bddb",
    "align":16777216,
    "blockdev":"pmem0.1",
    "supported_alignments":[
      65536
    ]
  },
  {
    "dev":"namespace0.0",
    "mode":"fsdax",    <==== devdax 16M alignment marked disabled.
    "map":"mem",
    "size":5368709120,
    "uuid":"a4bdf81a-f2ee-4bc6-91db-7b87eddd0484",
    "state":"disabled"
  }
]

Cc: linux-mm@kvack.org
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Link: https://lore.kernel.org/r/20190905154603.10349-8-aneesh.kumar@linux.ibm.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/nd.h       |  6 +---
 drivers/nvdimm/pfn_devs.c | 75 ++++++++++++++++++++++++++++++++++-------------
 include/linux/huge_mm.h   |  7 ++++-
 3 files changed, 61 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e89af4b2d8e9..ee5c04070ef9 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -289,11 +289,7 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 struct nd_pfn *to_nd_pfn(struct device *dev);
 #if IS_ENABLED(CONFIG_NVDIMM_PFN)
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PFN_DEFAULT_ALIGNMENT HPAGE_PMD_SIZE
-#else
-#define PFN_DEFAULT_ALIGNMENT PAGE_SIZE
-#endif
+#define MAX_NVDIMM_ALIGN	4
 
 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns);
 bool is_nd_pfn(struct device *dev);
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index ce9ef18282dd..934cdcaaae97 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -103,39 +103,42 @@ static ssize_t align_show(struct device *dev,
 	return sprintf(buf, "%ld\n", nd_pfn->align);
 }
 
-static const unsigned long *nd_pfn_supported_alignments(void)
+static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments)
 {
-	/*
-	 * This needs to be a non-static variable because the *_SIZE
-	 * macros aren't always constants.
-	 */
-	const unsigned long supported_alignments[] = {
-		PAGE_SIZE,
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		HPAGE_PMD_SIZE,
-#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-		HPAGE_PUD_SIZE,
-#endif
-#endif
-		0,
-	};
-	static unsigned long data[ARRAY_SIZE(supported_alignments)];
 
-	memcpy(data, supported_alignments, sizeof(data));
+	alignments[0] = PAGE_SIZE;
+
+	if (has_transparent_hugepage()) {
+		alignments[1] = HPAGE_PMD_SIZE;
+		if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
+			alignments[2] = HPAGE_PUD_SIZE;
+	}
+
+	return alignments;
+}
+
+/*
+ * Use pmd mapping if supported as default alignment
+ */
+static unsigned long nd_pfn_default_alignment(void)
+{
 
-	return data;
+	if (has_transparent_hugepage())
+		return HPAGE_PMD_SIZE;
+	return PAGE_SIZE;
 }
 
 static ssize_t align_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
+	unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
 	ssize_t rc;
 
 	nd_device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
-			nd_pfn_supported_alignments());
+			nd_pfn_supported_alignments(aligns));
 	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
 			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
@@ -259,7 +262,10 @@ static DEVICE_ATTR_RO(size);
 static ssize_t supported_alignments_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	return nd_size_select_show(0, nd_pfn_supported_alignments(), buf);
+	unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+	return nd_size_select_show(0,
+			nd_pfn_supported_alignments(aligns), buf);
 }
 static DEVICE_ATTR_RO(supported_alignments);
 
@@ -302,7 +308,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 		return NULL;
 
 	nd_pfn->mode = PFN_MODE_NONE;
-	nd_pfn->align = PFN_DEFAULT_ALIGNMENT;
+	nd_pfn->align = nd_pfn_default_alignment();
 	dev = &nd_pfn->dev;
 	device_initialize(&nd_pfn->dev);
 	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
@@ -412,6 +418,21 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
 	return 0;
 }
 
+static bool nd_supported_alignment(unsigned long align)
+{
+	int i;
+	unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, };
+
+	if (align == 0)
+		return false;
+
+	nd_pfn_supported_alignments(supported);
+	for (i = 0; supported[i]; i++)
+		if (align == supported[i])
+			return true;
+	return false;
+}
+
 /**
  * nd_pfn_validate - read and validate info-block
  * @nd_pfn: fsdax namespace runtime state / properties
@@ -496,6 +517,18 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 		return -EOPNOTSUPP;
 	}
 
+	/*
+	 * Check whether the we support the alignment. For Dax if the
+	 * superblock alignment is not matching, we won't initialize
+	 * the device.
+	 */
+	if (!nd_supported_alignment(align) &&
+			!memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) {
+		dev_err(&nd_pfn->dev, "init failed, alignment mismatch: "
+				"%ld:%ld\n", nd_pfn->align, align);
+		return -EOPNOTSUPP;
+	}
+
 	if (!nd_pfn->uuid) {
 		/*
 		 * When probing a namepace via nd_pfn_probe() the uuid
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 45ede62aa85b..376a81ff2c96 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -108,7 +108,12 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 
 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
 		return true;
-
+	/*
+	 * For dax vmas, try to always use hugepage mappings. If the kernel does
+	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
+	 * mappings, and device-dax namespaces, that try to guarantee a given
+	 * mapping size, will fail to enable
+	 */
 	if (vma_is_dax(vma))
 		return true;
 
-- 
cgit v1.2.3


From cf387d9644d8c78721cf9b77af9f67bb5b04da16 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 10 Sep 2019 11:58:25 +0530
Subject: libnvdimm/altmap: Track namespace boundaries in altmap

With PFN_MODE_PMEM namespace, the memmap area is allocated from the device
area. Some architectures map the memmap area with large page size. On
architectures like ppc64, 16MB page for memap mapping can map 262144 pfns.
This maps a namespace size of 16G.

When populating memmap region with 16MB page from the device area,
make sure the allocated space is not used to map resources outside this
namespace. Such usage of device area will prevent a namespace destroy.

Add resource end pnf in altmap and use that to check if the memmap area
allocation can map pfn outside the namespace. On ppc64 in such case we fallback
to allocation from memory.

This fix kernel crash reported below:

[  132.034989] WARNING: CPU: 13 PID: 13719 at mm/memremap.c:133 devm_memremap_pages_release+0x2d8/0x2e0
[  133.464754] BUG: Unable to handle kernel data access at 0xc00c00010b204000
[  133.464760] Faulting instruction address: 0xc00000000007580c
[  133.464766] Oops: Kernel access of bad area, sig: 11 [#1]
[  133.464771] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
.....
[  133.464901] NIP [c00000000007580c] vmemmap_free+0x2ac/0x3d0
[  133.464906] LR [c0000000000757f8] vmemmap_free+0x298/0x3d0
[  133.464910] Call Trace:
[  133.464914] [c000007cbfd0f7b0] [c0000000000757f8] vmemmap_free+0x298/0x3d0 (unreliable)
[  133.464921] [c000007cbfd0f8d0] [c000000000370a44] section_deactivate+0x1a4/0x240
[  133.464928] [c000007cbfd0f980] [c000000000386270] __remove_pages+0x3a0/0x590
[  133.464935] [c000007cbfd0fa50] [c000000000074158] arch_remove_memory+0x88/0x160
[  133.464942] [c000007cbfd0fae0] [c0000000003be8c0] devm_memremap_pages_release+0x150/0x2e0
[  133.464949] [c000007cbfd0fb70] [c000000000738ea0] devm_action_release+0x30/0x50
[  133.464955] [c000007cbfd0fb90] [c00000000073a5a4] release_nodes+0x344/0x400
[  133.464961] [c000007cbfd0fc40] [c00000000073378c] device_release_driver_internal+0x15c/0x250
[  133.464968] [c000007cbfd0fc80] [c00000000072fd14] unbind_store+0x104/0x110
[  133.464973] [c000007cbfd0fcd0] [c00000000072ee24] drv_attr_store+0x44/0x70
[  133.464981] [c000007cbfd0fcf0] [c0000000004a32bc] sysfs_kf_write+0x6c/0xa0
[  133.464987] [c000007cbfd0fd10] [c0000000004a1dfc] kernfs_fop_write+0x17c/0x250
[  133.464993] [c000007cbfd0fd60] [c0000000003c348c] __vfs_write+0x3c/0x70
[  133.464999] [c000007cbfd0fd80] [c0000000003c75d0] vfs_write+0xd0/0x250

djbw: Aneesh notes that this crash can likely be triggered in any kernel that
supports 'papr_scm', so flagging that commit for -stable consideration.

Fixes: b5beae5e224f ("powerpc/pseries: Add driver for PAPR SCM regions")
Cc: <stable@vger.kernel.org>
Reported-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: Pankaj Gupta <pagupta@redhat.com>
Tested-by: Santosh Sivaraj <santosh@fossix.org>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Link: https://lore.kernel.org/r/20190910062826.10041-1-aneesh.kumar@linux.ibm.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/mm/init_64.c | 17 ++++++++++++++++-
 drivers/nvdimm/pfn_devs.c |  2 ++
 include/linux/memremap.h  |  1 +
 3 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a44f6281ca3a..4e08246acd79 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -172,6 +172,21 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
 	vmemmap_list = vmem_back;
 }
 
+static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+				unsigned long page_size)
+{
+	unsigned long nr_pfn = page_size / sizeof(struct page);
+	unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+	if ((start_pfn + nr_pfn) > altmap->end_pfn)
+		return true;
+
+	if (start_pfn < altmap->base_pfn)
+		return true;
+
+	return false;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
@@ -194,7 +209,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		 * fail due to alignment issues when using 16MB hugepages, so
 		 * fall back to system memory if the altmap allocation fail.
 		 */
-		if (altmap) {
+		if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
 			p = altmap_alloc_block_buf(page_size, altmap);
 			if (!p)
 				pr_debug("altmap block allocation failed, falling back to system memory");
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 934cdcaaae97..80c7992bc538 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -672,9 +672,11 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
 	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
 	resource_size_t base = nsio->res.start + start_pad;
+	resource_size_t end = nsio->res.end - end_trunc;
 	struct vmem_altmap __altmap = {
 		.base_pfn = init_altmap_base(base),
 		.reserve = init_altmap_reserve(base),
+		.end_pfn = PHYS_PFN(end),
 	};
 
 	memcpy(res, &nsio->res, sizeof(*res));
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f8a5b2a19945..c70996fe48c8 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -17,6 +17,7 @@ struct device;
  */
 struct vmem_altmap {
 	const unsigned long base_pfn;
+	const unsigned long end_pfn;
 	const unsigned long reserve;
 	unsigned long free;
 	unsigned long align;
-- 
cgit v1.2.3


From 710ec38b0f633ab3e2581f07a73442d809e28ab0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 23 Sep 2019 15:32:59 -0700
Subject: mm: add dummy can_do_mlock() helper

On kernels without CONFIG_MMU, we get a link error for the siw driver:

drivers/infiniband/sw/siw/siw_mem.o: In function `siw_umem_get':
siw_mem.c:(.text+0x4c8): undefined reference to `can_do_mlock'

This is probably not the only driver that needs the function and could
otherwise build correctly without CONFIG_MMU, so add a dummy variant that
always returns false.

Link: http://lkml.kernel.org/r/20190909204201.931830-1-arnd@arndb.de
Fixes: 2251334dcac9 ("rdma/siw: application buffer management")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7cf955feb823..6e79b3df1582 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1405,7 +1405,11 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
 
+#ifdef CONFIG_MMU
 extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
 extern int user_shm_lock(size_t, struct user_struct *);
 extern void user_shm_unlock(size_t, struct user_struct *);
 
-- 
cgit v1.2.3


From 963abb9aebcdacf5c709a36da9ac0727a33671eb Mon Sep 17 00:00:00 2001
From: Joseph Qi <joseph.qi@linux.alibaba.com>
Date: Mon, 23 Sep 2019 15:33:11 -0700
Subject: jbd2: remove jbd2_journal_inode_add_[write|wait]

Since ext4/ocfs2 are using jbd2_inode dirty range scoping APIs now,
jbd2_journal_inode_add_[write|wait] are not used any more, remove them.

Link: http://lkml.kernel.org/r/1562977611-8412-2-git-send-email-joseph.qi@linux.alibaba.com
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Acked-by: Changwei Ge <chge@linux.alibaba.com>
Cc: Gang He <ghe@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/jbd2/journal.c     |  2 --
 fs/jbd2/transaction.c | 12 ------------
 include/linux/jbd2.h  |  2 --
 3 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 953990eb70a9..1c58859aa592 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
 EXPORT_SYMBOL(jbd2_journal_invalidatepage);
 EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
 EXPORT_SYMBOL(jbd2_journal_force_commit);
-EXPORT_SYMBOL(jbd2_journal_inode_add_write);
-EXPORT_SYMBOL(jbd2_journal_inode_add_wait);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
 EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
 EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index afc06daee5bb..bee8498d7792 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2622,18 +2622,6 @@ done:
 	return 0;
 }
 
-int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode)
-{
-	return jbd2_journal_file_inode(handle, jinode,
-			JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX);
-}
-
-int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode)
-{
-	return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0,
-			LLONG_MAX);
-}
-
 int jbd2_journal_inode_ranged_write(handle_t *handle,
 		struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
 {
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index df03825ad1a1..603fbc4e2f70 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1410,8 +1410,6 @@ extern int	   jbd2_journal_clear_err  (journal_t *);
 extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int	   jbd2_journal_force_commit(journal_t *);
 extern int	   jbd2_journal_force_commit_nested(journal_t *);
-extern int	   jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode);
-extern int	   jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode);
 extern int	   jbd2_journal_inode_ranged_write(handle_t *handle,
 			struct jbd2_inode *inode, loff_t start_byte,
 			loff_t length);
-- 
cgit v1.2.3


From 9adeaa226988b97bc15928e12f40a9863134467c Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Mon, 23 Sep 2019 15:33:49 -0700
Subject: mm, slab: move memcg_cache_params structure to mm/slab.h

The memcg_cache_params structure is only embedded into the kmem_cache of
slab and slub allocators as defined in slab_def.h and slub_def.h and used
internally by mm code.  There is no needed to expose it in a public
header.  So move it from include/linux/slab.h to mm/slab.h.  It is just a
refactoring patch with no code change.

In fact both the slub_def.h and slab_def.h should be moved into the mm
directory as well, but that will probably cause many merge conflicts.

Link: http://lkml.kernel.org/r/20190718180827.18758-1-longman@redhat.com
Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 62 ---------------------------------------------------
 mm/slab.h            | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 56c9c7eed34e..ab2b98ad76e1 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -595,68 +595,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 	return __kmalloc_node(size, flags, node);
 }
 
-struct memcg_cache_array {
-	struct rcu_head rcu;
-	struct kmem_cache *entries[0];
-};
-
-/*
- * This is the main placeholder for memcg-related information in kmem caches.
- * Both the root cache and the child caches will have it. For the root cache,
- * this will hold a dynamically allocated array large enough to hold
- * information about the currently limited memcgs in the system. To allow the
- * array to be accessed without taking any locks, on relocation we free the old
- * version only after a grace period.
- *
- * Root and child caches hold different metadata.
- *
- * @root_cache:	Common to root and child caches.  NULL for root, pointer to
- *		the root cache for children.
- *
- * The following fields are specific to root caches.
- *
- * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
- *		used to index child cachces during allocation and cleared
- *		early during shutdown.
- *
- * @root_caches_node: List node for slab_root_caches list.
- *
- * @children:	List of all child caches.  While the child caches are also
- *		reachable through @memcg_caches, a child cache remains on
- *		this list until it is actually destroyed.
- *
- * The following fields are specific to child caches.
- *
- * @memcg:	Pointer to the memcg this cache belongs to.
- *
- * @children_node: List node for @root_cache->children list.
- *
- * @kmem_caches_node: List node for @memcg->kmem_caches list.
- */
-struct memcg_cache_params {
-	struct kmem_cache *root_cache;
-	union {
-		struct {
-			struct memcg_cache_array __rcu *memcg_caches;
-			struct list_head __root_caches_node;
-			struct list_head children;
-			bool dying;
-		};
-		struct {
-			struct mem_cgroup *memcg;
-			struct list_head children_node;
-			struct list_head kmem_caches_node;
-			struct percpu_ref refcnt;
-
-			void (*work_fn)(struct kmem_cache *);
-			union {
-				struct rcu_head rcu_head;
-				struct work_struct work;
-			};
-		};
-	};
-};
-
 int memcg_update_all_caches(int num_memcgs);
 
 /**
diff --git a/mm/slab.h b/mm/slab.h
index 5bf615cb3f99..68e455f2b698 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -30,6 +30,69 @@ struct kmem_cache {
 	struct list_head list;	/* List of all slab caches on the system */
 };
 
+#else /* !CONFIG_SLOB */
+
+struct memcg_cache_array {
+	struct rcu_head rcu;
+	struct kmem_cache *entries[0];
+};
+
+/*
+ * This is the main placeholder for memcg-related information in kmem caches.
+ * Both the root cache and the child caches will have it. For the root cache,
+ * this will hold a dynamically allocated array large enough to hold
+ * information about the currently limited memcgs in the system. To allow the
+ * array to be accessed without taking any locks, on relocation we free the old
+ * version only after a grace period.
+ *
+ * Root and child caches hold different metadata.
+ *
+ * @root_cache:	Common to root and child caches.  NULL for root, pointer to
+ *		the root cache for children.
+ *
+ * The following fields are specific to root caches.
+ *
+ * @memcg_caches: kmemcg ID indexed table of child caches.  This table is
+ *		used to index child cachces during allocation and cleared
+ *		early during shutdown.
+ *
+ * @root_caches_node: List node for slab_root_caches list.
+ *
+ * @children:	List of all child caches.  While the child caches are also
+ *		reachable through @memcg_caches, a child cache remains on
+ *		this list until it is actually destroyed.
+ *
+ * The following fields are specific to child caches.
+ *
+ * @memcg:	Pointer to the memcg this cache belongs to.
+ *
+ * @children_node: List node for @root_cache->children list.
+ *
+ * @kmem_caches_node: List node for @memcg->kmem_caches list.
+ */
+struct memcg_cache_params {
+	struct kmem_cache *root_cache;
+	union {
+		struct {
+			struct memcg_cache_array __rcu *memcg_caches;
+			struct list_head __root_caches_node;
+			struct list_head children;
+			bool dying;
+		};
+		struct {
+			struct mem_cgroup *memcg;
+			struct list_head children_node;
+			struct list_head kmem_caches_node;
+			struct percpu_ref refcnt;
+
+			void (*work_fn)(struct kmem_cache *);
+			union {
+				struct rcu_head rcu_head;
+				struct work_struct work;
+			};
+		};
+	};
+};
 #endif /* CONFIG_SLOB */
 
 #ifdef CONFIG_SLAB
-- 
cgit v1.2.3


From a50b854e073cd3335bbbada8dcff83a857297dd7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 23 Sep 2019 15:34:25 -0700
Subject: mm: introduce page_size()

Patch series "Make working with compound pages easier", v2.

These three patches add three helpers and convert the appropriate
places to use them.

This patch (of 3):

It's unnecessarily hard to find out the size of a potentially huge page.
Replace 'PAGE_SIZE << compound_order(page)' with page_size(page).

Link: http://lkml.kernel.org/r/20190721104612.19120-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/flush.c                           |  3 +--
 arch/arm64/mm/flush.c                         |  3 +--
 arch/ia64/mm/init.c                           |  2 +-
 drivers/crypto/chelsio/chtls/chtls_io.c       |  5 ++---
 drivers/staging/android/ion/ion_system_heap.c |  4 ++--
 drivers/target/tcm_fc/tfc_io.c                |  3 +--
 fs/io_uring.c                                 |  2 +-
 include/linux/hugetlb.h                       |  2 +-
 include/linux/mm.h                            |  6 ++++++
 lib/iov_iter.c                                |  2 +-
 mm/kasan/common.c                             |  8 +++-----
 mm/nommu.c                                    |  2 +-
 mm/page_vma_mapped.c                          |  3 +--
 mm/rmap.c                                     |  6 ++----
 mm/slob.c                                     |  2 +-
 mm/slub.c                                     | 18 +++++++++---------
 net/xdp/xsk.c                                 |  2 +-
 17 files changed, 35 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 6ecbda87ee46..4c7ebe094a83 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -204,8 +204,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	 * coherent with the kernels mapping.
 	 */
 	if (!PageHighMem(page)) {
-		size_t page_size = PAGE_SIZE << compound_order(page);
-		__cpuc_flush_dcache_area(page_address(page), page_size);
+		__cpuc_flush_dcache_area(page_address(page), page_size(page));
 	} else {
 		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index dc19300309d2..ac485163a4a7 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte)
 	struct page *page = pte_page(pte);
 
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
-		sync_icache_aliases(page_address(page),
-				    PAGE_SIZE << compound_order(page));
+		sync_icache_aliases(page_address(page), page_size(page));
 }
 EXPORT_SYMBOL_GPL(__sync_icache_dcache);
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 678b98a09c85..bf9df2625bc8 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -64,7 +64,7 @@ __ia64_sync_icache_dcache (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
+	flush_icache_range(addr, addr + page_size(page));
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index c70cb5f272cf..0891ab829b1b 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1078,7 +1078,7 @@ new_buf:
 			bool merge;
 
 			if (page)
-				pg_size <<= compound_order(page);
+				pg_size = page_size(page);
 			if (off < pg_size &&
 			    skb_can_coalesce(skb, i, page, off)) {
 				merge = 1;
@@ -1105,8 +1105,7 @@ new_buf:
 							   __GFP_NORETRY,
 							   order);
 					if (page)
-						pg_size <<=
-							compound_order(page);
+						pg_size <<= order;
 				}
 				if (!page) {
 					page = alloc_page(gfp);
diff --git a/drivers/staging/android/ion/ion_system_heap.c b/drivers/staging/android/ion/ion_system_heap.c
index aa8d8425be25..b83a1d16bd89 100644
--- a/drivers/staging/android/ion/ion_system_heap.c
+++ b/drivers/staging/android/ion/ion_system_heap.c
@@ -120,7 +120,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
 		if (!page)
 			goto free_pages;
 		list_add_tail(&page->lru, &pages);
-		size_remaining -= PAGE_SIZE << compound_order(page);
+		size_remaining -= page_size(page);
 		max_order = compound_order(page);
 		i++;
 	}
@@ -133,7 +133,7 @@ static int ion_system_heap_allocate(struct ion_heap *heap,
 
 	sg = table->sgl;
 	list_for_each_entry_safe(page, tmp_page, &pages, lru) {
-		sg_set_page(sg, page, PAGE_SIZE << compound_order(page), 0);
+		sg_set_page(sg, page, page_size(page), 0);
 		sg = sg_next(sg);
 		list_del(&page->lru);
 	}
diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c
index a254792d882c..1354a157e9af 100644
--- a/drivers/target/tcm_fc/tfc_io.c
+++ b/drivers/target/tcm_fc/tfc_io.c
@@ -136,8 +136,7 @@ int ft_queue_data_in(struct se_cmd *se_cmd)
 					   page, off_in_page, tlen);
 			fr_len(fp) += tlen;
 			fp_skb(fp)->data_len += tlen;
-			fp_skb(fp)->truesize +=
-					PAGE_SIZE << compound_order(page);
+			fp_skb(fp)->truesize += page_size(page);
 		} else {
 			BUG_ON(!page);
 			from = kmap_atomic(page + (mem_off >> PAGE_SHIFT));
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0dadbdbead0f..f83de4c6a826 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3319,7 +3319,7 @@ static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	page = virt_to_head_page(ptr);
-	if (sz > (PAGE_SIZE << compound_order(page)))
+	if (sz > page_size(page))
 		return -EINVAL;
 
 	pfn = virt_to_phys(ptr) >> PAGE_SHIFT;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index edfca4278319..53fc34f930d0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -454,7 +454,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 static inline struct hstate *page_hstate(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageHuge(page), page);
-	return size_to_hstate(PAGE_SIZE << compound_order(page));
+	return size_to_hstate(page_size(page));
 }
 
 static inline unsigned hstate_index_to_shift(unsigned index)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e79b3df1582..d46d5585e2a2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -805,6 +805,12 @@ static inline void set_compound_order(struct page *page, unsigned int order)
 	page[1].compound_order = order;
 }
 
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+	return PAGE_SIZE << compound_order(page);
+}
+
 void free_compound_page(struct page *page);
 
 #ifdef CONFIG_MMU
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f1e0569b4539..639d5e7014c1 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -878,7 +878,7 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 	head = compound_head(page);
 	v += (page - head) << PAGE_SHIFT;
 
-	if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
+	if (likely(n <= v && v <= (page_size(head))))
 		return true;
 	WARN_ON(1);
 	return false;
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 6b6f1198c72b..307631d9c62b 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -338,8 +338,7 @@ void kasan_poison_slab(struct page *page)
 
 	for (i = 0; i < (1 << compound_order(page)); i++)
 		page_kasan_tag_reset(page + i);
-	kasan_poison_shadow(page_address(page),
-			PAGE_SIZE << compound_order(page),
+	kasan_poison_shadow(page_address(page), page_size(page),
 			KASAN_KMALLOC_REDZONE);
 }
 
@@ -542,7 +541,7 @@ void * __must_check kasan_kmalloc_large(const void *ptr, size_t size,
 	page = virt_to_page(ptr);
 	redzone_start = round_up((unsigned long)(ptr + size),
 				KASAN_SHADOW_SCALE_SIZE);
-	redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page));
+	redzone_end = (unsigned long)ptr + page_size(page);
 
 	kasan_unpoison_shadow(ptr, size);
 	kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
@@ -578,8 +577,7 @@ void kasan_poison_kfree(void *ptr, unsigned long ip)
 			kasan_report_invalid_free(ptr, ip);
 			return;
 		}
-		kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
-				KASAN_FREE_PAGE);
+		kasan_poison_shadow(ptr, page_size(page), KASAN_FREE_PAGE);
 	} else {
 		__kasan_slab_free(page->slab_cache, ptr, ip, false);
 	}
diff --git a/mm/nommu.c b/mm/nommu.c
index fed1b6e9c89b..99b7ec318824 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -108,7 +108,7 @@ unsigned int kobjsize(const void *objp)
 	 * The ksize() function is only guaranteed to work for pointers
 	 * returned by kmalloc(). So handle arbitrary pointers here.
 	 */
-	return PAGE_SIZE << compound_order(page);
+	return page_size(page);
 }
 
 /**
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 11df03e71288..eff4b4520c8d 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -153,8 +153,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 
 	if (unlikely(PageHuge(pvmw->page))) {
 		/* when pud is not present, pte will be NULL */
-		pvmw->pte = huge_pte_offset(mm, pvmw->address,
-					    PAGE_SIZE << compound_order(page));
+		pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page));
 		if (!pvmw->pte)
 			return false;
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 31352bba197d..f401732b20e8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -898,8 +898,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 	 */
 	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,
 				0, vma, vma->vm_mm, address,
-				min(vma->vm_end, address +
-				    (PAGE_SIZE << compound_order(page))));
+				min(vma->vm_end, address + page_size(page)));
 	mmu_notifier_invalidate_range_start(&range);
 
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -1372,8 +1371,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 */
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
 				address,
-				min(vma->vm_end, address +
-				    (PAGE_SIZE << compound_order(page))));
+				min(vma->vm_end, address + page_size(page)));
 	if (PageHuge(page)) {
 		/*
 		 * If sharing is possible, start and end will be adjusted
diff --git a/mm/slob.c b/mm/slob.c
index 7f421d0ca9ab..cf377beab962 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -539,7 +539,7 @@ size_t __ksize(const void *block)
 
 	sp = virt_to_page(block);
 	if (unlikely(!PageSlab(sp)))
-		return PAGE_SIZE << compound_order(sp);
+		return page_size(sp);
 
 	align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
 	m = (unsigned int *)(block - align);
diff --git a/mm/slub.c b/mm/slub.c
index 17fe1cac11fb..42c1b3af3c98 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -829,7 +829,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
 		return 1;
 
 	start = page_address(page);
-	length = PAGE_SIZE << compound_order(page);
+	length = page_size(page);
 	end = start + length;
 	remainder = length % s->size;
 	if (!remainder)
@@ -1074,13 +1074,14 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
 	init_tracking(s, object);
 }
 
-static void setup_page_debug(struct kmem_cache *s, void *addr, int order)
+static
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr)
 {
 	if (!(s->flags & SLAB_POISON))
 		return;
 
 	metadata_access_enable();
-	memset(addr, POISON_INUSE, PAGE_SIZE << order);
+	memset(addr, POISON_INUSE, page_size(page));
 	metadata_access_disable();
 }
 
@@ -1340,8 +1341,8 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,
 			struct page *page, void *object) {}
-static inline void setup_page_debug(struct kmem_cache *s,
-			void *addr, int order) {}
+static inline
+void setup_page_debug(struct kmem_cache *s, struct page *page, void *addr) {}
 
 static inline int alloc_debug_processing(struct kmem_cache *s,
 	struct page *page, void *object, unsigned long addr) { return 0; }
@@ -1639,7 +1640,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	struct kmem_cache_order_objects oo = s->oo;
 	gfp_t alloc_gfp;
 	void *start, *p, *next;
-	int idx, order;
+	int idx;
 	bool shuffle;
 
 	flags &= gfp_allowed_mask;
@@ -1673,7 +1674,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	page->objects = oo_objects(oo);
 
-	order = compound_order(page);
 	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page_is_pfmemalloc(page))
@@ -1683,7 +1683,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	start = page_address(page);
 
-	setup_page_debug(s, start, order);
+	setup_page_debug(s, page, start);
 
 	shuffle = shuffle_freelist(s, page);
 
@@ -3932,7 +3932,7 @@ size_t __ksize(const void *object)
 
 	if (unlikely(!PageSlab(page))) {
 		WARN_ON(!PageCompound(page));
-		return PAGE_SIZE << compound_order(page);
+		return page_size(page);
 	}
 
 	return slab_ksize(page->slab_cache);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c2f1af3b6a7c..fa8fbb8fa3c8 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -977,7 +977,7 @@ static int xsk_mmap(struct file *file, struct socket *sock,
 	/* Matches the smp_wmb() in xsk_init_queue */
 	smp_rmb();
 	qpg = virt_to_head_page(q->ring);
-	if (size > (PAGE_SIZE << compound_order(qpg)))
+	if (size > page_size(qpg))
 		return -EINVAL;
 
 	pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
-- 
cgit v1.2.3


From 94ad9338109fe9d0b8a4a16828719dd6dcaee4c2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 23 Sep 2019 15:34:28 -0700
Subject: mm: introduce page_shift()

Replace PAGE_SHIFT + compound_order(page) with the new page_shift()
function.  Minor improvements in readability.

[akpm@linux-foundation.org: fix build in tce_page_is_contained()]
  Link: http://lkml.kernel.org/r/201907241853.yNQTrJWd%25lkp@intel.com
Link: http://lkml.kernel.org/r/20190721104612.19120-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/book3s64/iommu_api.c | 7 ++-----
 drivers/vfio/vfio_iommu_spapr_tce.c  | 8 ++++----
 include/linux/mm.h                   | 6 ++++++
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
index b056cae3388b..56cc84520577 100644
--- a/arch/powerpc/mm/book3s64/iommu_api.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -129,11 +129,8 @@ static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
 		 * Allow to use larger than 64k IOMMU pages. Only do that
 		 * if we are backed by hugetlb.
 		 */
-		if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) {
-			struct page *head = compound_head(page);
-
-			pageshift = compound_order(head) + PAGE_SHIFT;
-		}
+		if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+			pageshift = page_shift(compound_head(page));
 		mem->pageshift = min(mem->pageshift, pageshift);
 		/*
 		 * We don't need struct page reference any more, switch
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 3b18fa4d090a..26cef65b41e7 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -176,13 +176,13 @@ put_exit:
 }
 
 static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
-		unsigned int page_shift)
+		unsigned int it_page_shift)
 {
 	struct page *page;
 	unsigned long size = 0;
 
-	if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
-		return size == (1UL << page_shift);
+	if (mm_iommu_is_devmem(mm, hpa, it_page_shift, &size))
+		return size == (1UL << it_page_shift);
 
 	page = pfn_to_page(hpa >> PAGE_SHIFT);
 	/*
@@ -190,7 +190,7 @@ static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
 	 * a page we just found. Otherwise the hardware can get access to
 	 * a bigger memory chunk that it should.
 	 */
-	return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
+	return page_shift(compound_head(page)) >= it_page_shift;
 }
 
 static inline bool tce_groups_attached(struct tce_container *container)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d46d5585e2a2..9238548bdec5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -811,6 +811,12 @@ static inline unsigned long page_size(struct page *page)
 	return PAGE_SIZE << compound_order(page);
 }
 
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+	return PAGE_SHIFT + compound_order(page);
+}
+
 void free_compound_page(struct page *page);
 
 #ifdef CONFIG_MMU
-- 
cgit v1.2.3


From d8c6546b1aea843fbeb4d54a1202f1adda6504be Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 23 Sep 2019 15:34:30 -0700
Subject: mm: introduce compound_nr()

Replace 1 << compound_order(page) with compound_nr(page).  Minor
improvements in readability.

Link: http://lkml.kernel.org/r/20190721104612.19120-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/flush.c           | 4 ++--
 arch/powerpc/mm/hugetlbpage.c | 2 +-
 fs/proc/task_mmu.c            | 2 +-
 include/linux/mm.h            | 6 ++++++
 mm/compaction.c               | 2 +-
 mm/filemap.c                  | 2 +-
 mm/gup.c                      | 2 +-
 mm/hugetlb_cgroup.c           | 2 +-
 mm/kasan/common.c             | 2 +-
 mm/memcontrol.c               | 4 ++--
 mm/memory_hotplug.c           | 4 ++--
 mm/migrate.c                  | 2 +-
 mm/page_alloc.c               | 2 +-
 mm/rmap.c                     | 3 +--
 mm/shmem.c                    | 8 ++++----
 mm/swap_state.c               | 2 +-
 mm/util.c                     | 2 +-
 mm/vmscan.c                   | 4 ++--
 18 files changed, 30 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 4c7ebe094a83..6d89db7895d1 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -208,13 +208,13 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page)
 	} else {
 		unsigned long i;
 		if (cache_is_vipt_nonaliasing()) {
-			for (i = 0; i < (1 << compound_order(page)); i++) {
+			for (i = 0; i < compound_nr(page); i++) {
 				void *addr = kmap_atomic(page + i);
 				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
 				kunmap_atomic(addr);
 			}
 		} else {
-			for (i = 0; i < (1 << compound_order(page)); i++) {
+			for (i = 0; i < compound_nr(page); i++) {
 				void *addr = kmap_high_get(page + i);
 				if (addr) {
 					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a8953f108808..73d4873fc7f8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -667,7 +667,7 @@ void flush_dcache_icache_hugepage(struct page *page)
 
 	BUG_ON(!PageCompound(page));
 
-	for (i = 0; i < (1UL << compound_order(page)); i++) {
+	for (i = 0; i < compound_nr(page); i++) {
 		if (!PageHighMem(page)) {
 			__flush_dcache_icache(page_address(page+i));
 		} else {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index bf43d1d60059..ea1630465474 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -461,7 +461,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
 static void smaps_account(struct mem_size_stats *mss, struct page *page,
 		bool compound, bool young, bool dirty, bool locked)
 {
-	int i, nr = compound ? 1 << compound_order(page) : 1;
+	int i, nr = compound ? compound_nr(page) : 1;
 	unsigned long size = nr * PAGE_SIZE;
 
 	/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9238548bdec5..69b7314c8d24 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -805,6 +805,12 @@ static inline void set_compound_order(struct page *page, unsigned int order)
 	page[1].compound_order = order;
 }
 
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+	return 1UL << compound_order(page);
+}
+
 /* Returns the number of bytes in this potentially compound page. */
 static inline unsigned long page_size(struct page *page)
 {
diff --git a/mm/compaction.c b/mm/compaction.c
index 952dc2fb24e5..777c088e9113 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -969,7 +969,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			 * is safe to read and it's 0 for tail pages.
 			 */
 			if (unlikely(PageCompound(page))) {
-				low_pfn += (1UL << compound_order(page)) - 1;
+				low_pfn += compound_nr(page) - 1;
 				goto isolate_fail;
 			}
 		}
diff --git a/mm/filemap.c b/mm/filemap.c
index 40667c2f3383..5f30aedd7363 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -126,7 +126,7 @@ static void page_cache_delete(struct address_space *mapping,
 	/* hugetlb pages are represented by a single entry in the xarray */
 	if (!PageHuge(page)) {
 		xas_set_order(&xas, page->index, compound_order(page));
-		nr = 1U << compound_order(page);
+		nr = compound_nr(page);
 	}
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
diff --git a/mm/gup.c b/mm/gup.c
index 98f13ab37bac..84a36d80dd2e 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1460,7 +1460,7 @@ check_again:
 		 * gup may start from a tail page. Advance step by the left
 		 * part.
 		 */
-		step = (1 << compound_order(head)) - (pages[i] - head);
+		step = compound_nr(head) - (pages[i] - head);
 		/*
 		 * If we get a page from the CMA zone, since we are going to
 		 * be pinning these entries, we might as well move them out
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 68c2f2f3c05b..f1930fa0b445 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -139,7 +139,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 	if (!page_hcg || page_hcg != h_cg)
 		goto out;
 
-	nr_pages = 1 << compound_order(page);
+	nr_pages = compound_nr(page);
 	if (!parent) {
 		parent = root_h_cgroup;
 		/* root has no limit */
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 307631d9c62b..6814d6d6a023 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -336,7 +336,7 @@ void kasan_poison_slab(struct page *page)
 {
 	unsigned long i;
 
-	for (i = 0; i < (1 << compound_order(page)); i++)
+	for (i = 0; i < compound_nr(page); i++)
 		page_kasan_tag_reset(page + i);
 	kasan_poison_shadow(page_address(page), page_size(page),
 			KASAN_KMALLOC_REDZONE);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f3c15bb07cce..6c6032c03d1d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6511,7 +6511,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 		unsigned int nr_pages = 1;
 
 		if (PageTransHuge(page)) {
-			nr_pages <<= compound_order(page);
+			nr_pages = compound_nr(page);
 			ug->nr_huge += nr_pages;
 		}
 		if (PageAnon(page))
@@ -6523,7 +6523,7 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 		}
 		ug->pgpgout++;
 	} else {
-		ug->nr_kmem += 1 << compound_order(page);
+		ug->nr_kmem += compound_nr(page);
 		__ClearPageKmemcg(page);
 	}
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c73f09913165..5f2c83ce9fde 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1309,7 +1309,7 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 		head = compound_head(page);
 		if (page_huge_active(head))
 			return pfn;
-		skip = (1 << compound_order(head)) - (page - head);
+		skip = compound_nr(head) - (page - head);
 		pfn += skip - 1;
 	}
 	return 0;
@@ -1347,7 +1347,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 
 		if (PageHuge(page)) {
 			struct page *head = compound_head(page);
-			pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+			pfn = page_to_pfn(head) + compound_nr(head) - 1;
 			isolate_huge_page(head, &source);
 			continue;
 		} else if (PageTransHuge(page))
diff --git a/mm/migrate.c b/mm/migrate.c
index 9f4ed4e985c1..aa72b49e0209 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1892,7 +1892,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 	VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page);
 
 	/* Avoid migrating to a node that is nearly full */
-	if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page)))
+	if (!migrate_balanced_pgdat(pgdat, compound_nr(page)))
 		return 0;
 
 	if (isolate_lru_page(page))
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ff5484fdbdf9..df566c0f6729 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8196,7 +8196,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 			if (!hugepage_migration_supported(page_hstate(head)))
 				goto unmovable;
 
-			skip_pages = (1 << compound_order(head)) - (page - head);
+			skip_pages = compound_nr(head) - (page - head);
 			iter += skip_pages - 1;
 			continue;
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index f401732b20e8..26006445c8b5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1520,8 +1520,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (PageHuge(page)) {
-				int nr = 1 << compound_order(page);
-				hugetlb_count_sub(nr, mm);
+				hugetlb_count_sub(compound_nr(page), mm);
 				set_huge_swap_pte_at(mm, address,
 						     pvmw.pte, pteval,
 						     vma_mmu_pagesize(vma));
diff --git a/mm/shmem.c b/mm/shmem.c
index 0f7fd4a85db6..15d26c86e5ef 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -609,7 +609,7 @@ static int shmem_add_to_page_cache(struct page *page,
 {
 	XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
 	unsigned long i = 0;
-	unsigned long nr = 1UL << compound_order(page);
+	unsigned long nr = compound_nr(page);
 
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(index != round_down(index, nr), page);
@@ -1884,7 +1884,7 @@ alloc_nohuge:
 	lru_cache_add_anon(page);
 
 	spin_lock_irq(&info->lock);
-	info->alloced += 1 << compound_order(page);
+	info->alloced += compound_nr(page);
 	inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
 	shmem_recalc_inode(inode);
 	spin_unlock_irq(&info->lock);
@@ -1925,7 +1925,7 @@ clear:
 		struct page *head = compound_head(page);
 		int i;
 
-		for (i = 0; i < (1 << compound_order(head)); i++) {
+		for (i = 0; i < compound_nr(head); i++) {
 			clear_highpage(head + i);
 			flush_dcache_page(head + i);
 		}
@@ -1952,7 +1952,7 @@ clear:
 	 * Error recovery.
 	 */
 unacct:
-	shmem_inode_unacct_blocks(inode, 1 << compound_order(page));
+	shmem_inode_unacct_blocks(inode, compound_nr(page));
 
 	if (PageTransHuge(page)) {
 		unlock_page(page);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 8368621a0fc7..f844af5f09ba 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -116,7 +116,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
 	struct address_space *address_space = swap_address_space(entry);
 	pgoff_t idx = swp_offset(entry);
 	XA_STATE_ORDER(xas, &address_space->i_pages, idx, compound_order(page));
-	unsigned long i, nr = 1UL << compound_order(page);
+	unsigned long i, nr = compound_nr(page);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapCache(page), page);
diff --git a/mm/util.c b/mm/util.c
index e6351a80f248..bab284d69c8c 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -521,7 +521,7 @@ bool page_mapped(struct page *page)
 		return true;
 	if (PageHuge(page))
 		return false;
-	for (i = 0; i < (1 << compound_order(page)); i++) {
+	for (i = 0; i < compound_nr(page); i++) {
 		if (atomic_read(&page[i]._mapcount) >= 0)
 			return true;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a6c5d0b28321..8e03427cb64f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1149,7 +1149,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 
 		VM_BUG_ON_PAGE(PageActive(page), page);
 
-		nr_pages = 1 << compound_order(page);
+		nr_pages = compound_nr(page);
 
 		/* Account the number of base pages even though THP */
 		sc->nr_scanned += nr_pages;
@@ -1705,7 +1705,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
 
 		VM_BUG_ON_PAGE(!PageLRU(page), page);
 
-		nr_pages = 1 << compound_order(page);
+		nr_pages = compound_nr(page);
 		total_scan += nr_pages;
 
 		if (page_zonenum(page) > sc->reclaim_idx) {
-- 
cgit v1.2.3


From 37389167a281f3ccb6bc958c32b2e088c7269fe0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 23 Sep 2019 15:34:39 -0700
Subject: mm, page_owner: keep owner info when freeing the page

For debugging purposes it might be useful to keep the owner info even
after page has been freed, and include it in e.g.  dump_page() when
detecting a bad page state.  For that, change the PAGE_EXT_OWNER flag
meaning to "page owner info has been set at least once" and add new
PAGE_EXT_OWNER_ACTIVE for tracking whether page is supposed to be
currently tracked allocated or free.  Adjust dump_page() accordingly,
distinguishing free and allocated pages.  In the page_owner debugfs file,
keep printing only allocated pages so that existing scripts are not
confused, and also because free pages are irrelevant for the memory
statistics or leak detection that's the typical use case of the file,
anyway.

Link: http://lkml.kernel.org/r/20190820131828.22684-4-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_ext.h |  1 +
 mm/page_owner.c          | 34 ++++++++++++++++++++++++----------
 2 files changed, 25 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 09592951725c..682fd465df06 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -18,6 +18,7 @@ struct page_ext_operations {
 
 enum page_ext_flags {
 	PAGE_EXT_OWNER,
+	PAGE_EXT_OWNER_ACTIVE,
 #if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
 	PAGE_EXT_YOUNG,
 	PAGE_EXT_IDLE,
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 813fcb70547b..4a48e018dbdf 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -111,7 +111,7 @@ void __reset_page_owner(struct page *page, unsigned int order)
 		page_ext = lookup_page_ext(page + i);
 		if (unlikely(!page_ext))
 			continue;
-		__clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
+		__clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
 	}
 }
 
@@ -168,6 +168,7 @@ static inline void __set_page_owner_handle(struct page *page,
 		page_owner->gfp_mask = gfp_mask;
 		page_owner->last_migrate_reason = -1;
 		__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+		__set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
 
 		page_ext = lookup_page_ext(page + i);
 	}
@@ -243,6 +244,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 	 * the new page, which will be freed.
 	 */
 	__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
+	__set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
 }
 
 void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -302,7 +304,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 			if (unlikely(!page_ext))
 				continue;
 
-			if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+			if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
 				continue;
 
 			page_owner = get_page_owner(page_ext);
@@ -413,21 +415,26 @@ void __dump_page_owner(struct page *page)
 	mt = gfpflags_to_migratetype(gfp_mask);
 
 	if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
-		pr_alert("page_owner info is not active (free page?)\n");
+		pr_alert("page_owner info is not present (never set?)\n");
 		return;
 	}
 
+	if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+		pr_alert("page_owner tracks the page as allocated\n");
+	else
+		pr_alert("page_owner tracks the page as freed\n");
+
+	pr_alert("page last allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
+		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
+
 	handle = READ_ONCE(page_owner->handle);
 	if (!handle) {
-		pr_alert("page_owner info is not active (free page?)\n");
-		return;
+		pr_alert("page_owner allocation stack trace missing\n");
+	} else {
+		nr_entries = stack_depot_fetch(handle, &entries);
+		stack_trace_print(entries, nr_entries, 0);
 	}
 
-	nr_entries = stack_depot_fetch(handle, &entries);
-	pr_alert("page allocated via order %u, migratetype %s, gfp_mask %#x(%pGg)\n",
-		 page_owner->order, migratetype_names[mt], gfp_mask, &gfp_mask);
-	stack_trace_print(entries, nr_entries, 0);
-
 	if (page_owner->last_migrate_reason != -1)
 		pr_alert("page has been migrated, last migrate reason: %s\n",
 			migrate_reason_names[page_owner->last_migrate_reason]);
@@ -489,6 +496,13 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 		if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
 			continue;
 
+		/*
+		 * Although we do have the info about past allocation of free
+		 * pages, it's not relevant for current memory usage.
+		 */
+		if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+			continue;
+
 		page_owner = get_page_owner(page_ext);
 
 		/*
-- 
cgit v1.2.3


From 4101196b19d7f905dca5dcf46cd35eb758cf06c0 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 23 Sep 2019 15:34:52 -0700
Subject: mm: page cache: store only head pages in i_pages

Transparent Huge Pages are currently stored in i_pages as pointers to
consecutive subpages.  This patch changes that to storing consecutive
pointers to the head page in preparation for storing huge pages more
efficiently in i_pages.

Large parts of this are "inspired" by Kirill's patch
https://lore.kernel.org/lkml/20170126115819.58875-2-kirill.shutemov@linux.intel.com/

Kirill and Huang Ying contributed several fixes.

[willy@infradead.org: use compound_nr, squish uninit-var warning]
Link: http://lkml.kernel.org/r/20190731210400.7419-1-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Kirill Shutemov <kirill@shutemov.name>
Reviewed-by: Song Liu <songliubraving@fb.com>
Tested-by: Song Liu <songliubraving@fb.com>
Tested-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Tested-by: Qian Cai <cai@lca.pw>
Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Song Liu <songliubraving@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h |  10 ++++
 mm/filemap.c            | 145 +++++++++++++++++++-----------------------------
 mm/huge_memory.c        |  22 +++++++-
 mm/khugepaged.c         |   4 +-
 mm/memfd.c              |   2 +
 mm/migrate.c            |   2 +-
 mm/shmem.c              |   2 +-
 mm/swap_state.c         |   4 +-
 8 files changed, 95 insertions(+), 96 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c7552459a15f..37a4d9e32cd3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -333,6 +333,16 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 			mapping_gfp_mask(mapping));
 }
 
+static inline struct page *find_subpage(struct page *page, pgoff_t offset)
+{
+	if (PageHuge(page))
+		return page;
+
+	VM_BUG_ON_PAGE(PageTail(page), page);
+
+	return page + (offset & (compound_nr(page) - 1));
+}
+
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
diff --git a/mm/filemap.c b/mm/filemap.c
index d5462d706f76..533f271d6839 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -281,11 +281,11 @@ EXPORT_SYMBOL(delete_from_page_cache);
  * @pvec: pagevec with pages to delete
  *
  * The function walks over mapping->i_pages and removes pages passed in @pvec
- * from the mapping. The function expects @pvec to be sorted by page index.
+ * from the mapping. The function expects @pvec to be sorted by page index
+ * and is optimised for it to be dense.
  * It tolerates holes in @pvec (mapping entries at those indices are not
  * modified). The function expects only THP head pages to be present in the
- * @pvec and takes care to delete all corresponding tail pages from the
- * mapping as well.
+ * @pvec.
  *
  * The function expects the i_pages lock to be held.
  */
@@ -294,40 +294,43 @@ static void page_cache_delete_batch(struct address_space *mapping,
 {
 	XA_STATE(xas, &mapping->i_pages, pvec->pages[0]->index);
 	int total_pages = 0;
-	int i = 0, tail_pages = 0;
+	int i = 0;
 	struct page *page;
 
 	mapping_set_update(&xas, mapping);
 	xas_for_each(&xas, page, ULONG_MAX) {
-		if (i >= pagevec_count(pvec) && !tail_pages)
+		if (i >= pagevec_count(pvec))
 			break;
+
+		/* A swap/dax/shadow entry got inserted? Skip it. */
 		if (xa_is_value(page))
 			continue;
-		if (!tail_pages) {
-			/*
-			 * Some page got inserted in our range? Skip it. We
-			 * have our pages locked so they are protected from
-			 * being removed.
-			 */
-			if (page != pvec->pages[i]) {
-				VM_BUG_ON_PAGE(page->index >
-						pvec->pages[i]->index, page);
-				continue;
-			}
-			WARN_ON_ONCE(!PageLocked(page));
-			if (PageTransHuge(page) && !PageHuge(page))
-				tail_pages = HPAGE_PMD_NR - 1;
+		/*
+		 * A page got inserted in our range? Skip it. We have our
+		 * pages locked so they are protected from being removed.
+		 * If we see a page whose index is higher than ours, it
+		 * means our page has been removed, which shouldn't be
+		 * possible because we're holding the PageLock.
+		 */
+		if (page != pvec->pages[i]) {
+			VM_BUG_ON_PAGE(page->index > pvec->pages[i]->index,
+					page);
+			continue;
+		}
+
+		WARN_ON_ONCE(!PageLocked(page));
+
+		if (page->index == xas.xa_index)
 			page->mapping = NULL;
-			/*
-			 * Leave page->index set: truncation lookup relies
-			 * upon it
-			 */
+		/* Leave page->index set: truncation lookup relies on it */
+
+		/*
+		 * Move to the next page in the vector if this is a regular
+		 * page or the index is of the last sub-page of this compound
+		 * page.
+		 */
+		if (page->index + compound_nr(page) - 1 == xas.xa_index)
 			i++;
-		} else {
-			VM_BUG_ON_PAGE(page->index + HPAGE_PMD_NR - tail_pages
-					!= pvec->pages[i]->index, page);
-			tail_pages--;
-		}
 		xas_store(&xas, NULL);
 		total_pages++;
 	}
@@ -1520,7 +1523,7 @@ EXPORT_SYMBOL(page_cache_prev_miss);
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset)
 {
 	XA_STATE(xas, &mapping->i_pages, offset);
-	struct page *head, *page;
+	struct page *page;
 
 	rcu_read_lock();
 repeat:
@@ -1535,25 +1538,19 @@ repeat:
 	if (!page || xa_is_value(page))
 		goto out;
 
-	head = compound_head(page);
-	if (!page_cache_get_speculative(head))
-		goto repeat;
-
-	/* The page was split under us? */
-	if (compound_head(page) != head) {
-		put_page(head);
+	if (!page_cache_get_speculative(page))
 		goto repeat;
-	}
 
 	/*
-	 * Has the page moved?
+	 * Has the page moved or been split?
 	 * This is part of the lockless pagecache protocol. See
 	 * include/linux/pagemap.h for details.
 	 */
 	if (unlikely(page != xas_reload(&xas))) {
-		put_page(head);
+		put_page(page);
 		goto repeat;
 	}
+	page = find_subpage(page, offset);
 out:
 	rcu_read_unlock();
 
@@ -1735,7 +1732,6 @@ unsigned find_get_entries(struct address_space *mapping,
 
 	rcu_read_lock();
 	xas_for_each(&xas, page, ULONG_MAX) {
-		struct page *head;
 		if (xas_retry(&xas, page))
 			continue;
 		/*
@@ -1746,17 +1742,13 @@ unsigned find_get_entries(struct address_space *mapping,
 		if (xa_is_value(page))
 			goto export;
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto retry;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head)
-			goto put_page;
-
-		/* Has the page moved? */
+		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto put_page;
+		page = find_subpage(page, xas.xa_index);
 
 export:
 		indices[ret] = xas.xa_index;
@@ -1765,7 +1757,7 @@ export:
 			break;
 		continue;
 put_page:
-		put_page(head);
+		put_page(page);
 retry:
 		xas_reset(&xas);
 	}
@@ -1807,33 +1799,27 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
 
 	rcu_read_lock();
 	xas_for_each(&xas, page, end) {
-		struct page *head;
 		if (xas_retry(&xas, page))
 			continue;
 		/* Skip over shadow, swap and DAX entries */
 		if (xa_is_value(page))
 			continue;
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto retry;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head)
-			goto put_page;
-
-		/* Has the page moved? */
+		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto put_page;
 
-		pages[ret] = page;
+		pages[ret] = find_subpage(page, xas.xa_index);
 		if (++ret == nr_pages) {
 			*start = xas.xa_index + 1;
 			goto out;
 		}
 		continue;
 put_page:
-		put_page(head);
+		put_page(page);
 retry:
 		xas_reset(&xas);
 	}
@@ -1878,7 +1864,6 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 
 	rcu_read_lock();
 	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
-		struct page *head;
 		if (xas_retry(&xas, page))
 			continue;
 		/*
@@ -1888,24 +1873,19 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 		if (xa_is_value(page))
 			break;
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto retry;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head)
-			goto put_page;
-
-		/* Has the page moved? */
+		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto put_page;
 
-		pages[ret] = page;
+		pages[ret] = find_subpage(page, xas.xa_index);
 		if (++ret == nr_pages)
 			break;
 		continue;
 put_page:
-		put_page(head);
+		put_page(page);
 retry:
 		xas_reset(&xas);
 	}
@@ -1941,7 +1921,6 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 
 	rcu_read_lock();
 	xas_for_each_marked(&xas, page, end, tag) {
-		struct page *head;
 		if (xas_retry(&xas, page))
 			continue;
 		/*
@@ -1952,26 +1931,21 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
 		if (xa_is_value(page))
 			continue;
 
-		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto retry;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head)
-			goto put_page;
-
-		/* Has the page moved? */
+		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto put_page;
 
-		pages[ret] = page;
+		pages[ret] = find_subpage(page, xas.xa_index);
 		if (++ret == nr_pages) {
 			*index = xas.xa_index + 1;
 			goto out;
 		}
 		continue;
 put_page:
-		put_page(head);
+		put_page(page);
 retry:
 		xas_reset(&xas);
 	}
@@ -2652,7 +2626,7 @@ void filemap_map_pages(struct vm_fault *vmf,
 	pgoff_t last_pgoff = start_pgoff;
 	unsigned long max_idx;
 	XA_STATE(xas, &mapping->i_pages, start_pgoff);
-	struct page *head, *page;
+	struct page *page;
 
 	rcu_read_lock();
 	xas_for_each(&xas, page, end_pgoff) {
@@ -2661,24 +2635,19 @@ void filemap_map_pages(struct vm_fault *vmf,
 		if (xa_is_value(page))
 			goto next;
 
-		head = compound_head(page);
-
 		/*
 		 * Check for a locked page first, as a speculative
 		 * reference may adversely influence page migration.
 		 */
-		if (PageLocked(head))
+		if (PageLocked(page))
 			goto next;
-		if (!page_cache_get_speculative(head))
+		if (!page_cache_get_speculative(page))
 			goto next;
 
-		/* The page was split under us? */
-		if (compound_head(page) != head)
-			goto skip;
-
-		/* Has the page moved? */
+		/* Has the page moved or been split? */
 		if (unlikely(page != xas_reload(&xas)))
 			goto skip;
+		page = find_subpage(page, xas.xa_index);
 
 		if (!PageUptodate(page) ||
 				PageReadahead(page) ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de1f15969e27..483b07b2d6ae 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2497,6 +2497,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	struct page *head = compound_head(page);
 	pg_data_t *pgdat = page_pgdat(head);
 	struct lruvec *lruvec;
+	struct address_space *swap_cache = NULL;
+	unsigned long offset = 0;
 	int i;
 
 	lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2504,6 +2506,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	/* complete memcg works before add pages to LRU */
 	mem_cgroup_split_huge_fixup(head);
 
+	if (PageAnon(head) && PageSwapCache(head)) {
+		swp_entry_t entry = { .val = page_private(head) };
+
+		offset = swp_offset(entry);
+		swap_cache = swap_address_space(entry);
+		xa_lock(&swap_cache->i_pages);
+	}
+
 	for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
 		/* Some pages can be beyond i_size: drop them from page cache */
@@ -2513,6 +2523,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 			if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
 				shmem_uncharge(head->mapping->host, 1);
 			put_page(head + i);
+		} else if (!PageAnon(page)) {
+			__xa_store(&head->mapping->i_pages, head[i].index,
+					head + i, 0);
+		} else if (swap_cache) {
+			__xa_store(&swap_cache->i_pages, offset + i,
+					head + i, 0);
 		}
 	}
 
@@ -2523,10 +2539,12 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	/* See comment in __split_huge_page_tail() */
 	if (PageAnon(head)) {
 		/* Additional pin to swap cache */
-		if (PageSwapCache(head))
+		if (PageSwapCache(head)) {
 			page_ref_add(head, 2);
-		else
+			xa_unlock(&swap_cache->i_pages);
+		} else {
 			page_ref_inc(head);
+		}
 	} else {
 		/* Additional pin to page cache */
 		page_ref_add(head, 2);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ccede2425c3f..04a54ff5a8ac 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1378,7 +1378,7 @@ static void collapse_shmem(struct mm_struct *mm,
 				result = SCAN_FAIL;
 				goto xa_locked;
 			}
-			xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+			xas_store(&xas, new_page);
 			nr_none++;
 			continue;
 		}
@@ -1454,7 +1454,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		list_add_tail(&page->lru, &pagelist);
 
 		/* Finally, replace with the new page. */
-		xas_store(&xas, new_page + (index % HPAGE_PMD_NR));
+		xas_store(&xas, new_page);
 		continue;
 out_unlock:
 		unlock_page(page);
diff --git a/mm/memfd.c b/mm/memfd.c
index 650e65a46b9c..2647c898990c 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -39,6 +39,7 @@ static void memfd_tag_pins(struct xa_state *xas)
 	xas_for_each(xas, page, ULONG_MAX) {
 		if (xa_is_value(page))
 			continue;
+		page = find_subpage(page, xas->xa_index);
 		if (page_count(page) - page_mapcount(page) > 1)
 			xas_set_mark(xas, MEMFD_TAG_PINNED);
 
@@ -88,6 +89,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 			bool clear = true;
 			if (xa_is_value(page))
 				continue;
+			page = find_subpage(page, xas.xa_index);
 			if (page_count(page) - page_mapcount(page) != 1) {
 				/*
 				 * On the last scan, we clean up all those tags
diff --git a/mm/migrate.c b/mm/migrate.c
index aa72b49e0209..374ef2fcb722 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -460,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 		for (i = 1; i < HPAGE_PMD_NR; i++) {
 			xas_next(&xas);
-			xas_store(&xas, newpage + i);
+			xas_store(&xas, newpage);
 		}
 	}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 15d26c86e5ef..57a6aedf6649 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -631,7 +631,7 @@ static int shmem_add_to_page_cache(struct page *page,
 		if (xas_error(&xas))
 			goto unlock;
 next:
-		xas_store(&xas, page + i);
+		xas_store(&xas, page);
 		if (++i < nr) {
 			xas_next(&xas);
 			goto next;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index f844af5f09ba..8e7ce9a9bc5e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -133,7 +133,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp)
 		for (i = 0; i < nr; i++) {
 			VM_BUG_ON_PAGE(xas.xa_index != idx + i, page);
 			set_page_private(page + i, entry.val + i);
-			xas_store(&xas, page + i);
+			xas_store(&xas, page);
 			xas_next(&xas);
 		}
 		address_space->nrpages += nr;
@@ -168,7 +168,7 @@ void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
 
 	for (i = 0; i < nr; i++) {
 		void *entry = xas_store(&xas, NULL);
-		VM_BUG_ON_PAGE(entry != page + i, entry);
+		VM_BUG_ON_PAGE(entry != page, entry);
 		set_page_private(page + i, 0);
 		xas_next(&xas);
 	}
-- 
cgit v1.2.3


From 2d15eb31b50a1b93927bdb7cc5d9960b90f7c1e4 Mon Sep 17 00:00:00 2001
From: "akpm@linux-foundation.org" <akpm@linux-foundation.org>
Date: Mon, 23 Sep 2019 15:35:04 -0700
Subject: mm/gup: add make_dirty arg to put_user_pages_dirty_lock()

[11~From: John Hubbard <jhubbard@nvidia.com>
Subject: mm/gup: add make_dirty arg to put_user_pages_dirty_lock()

Patch series "mm/gup: add make_dirty arg to put_user_pages_dirty_lock()",
v3.

There are about 50+ patches in my tree [2], and I'll be sending out the
remaining ones in a few more groups:

* The block/bio related changes (Jerome mostly wrote those, but I've had
  to move stuff around extensively, and add a little code)

* mm/ changes

* other subsystem patches

* an RFC that shows the current state of the tracking patch set.  That
  can only be applied after all call sites are converted, but it's good to
  get an early look at it.

This is part a tree-wide conversion, as described in fc1d8e7cca2d ("mm:
introduce put_user_page*(), placeholder versions").

This patch (of 3):

Provide more capable variation of put_user_pages_dirty_lock(), and delete
put_user_pages_dirty().  This is based on the following:

1.  Lots of call sites become simpler if a bool is passed into
   put_user_page*(), instead of making the call site choose which
   put_user_page*() variant to call.

2.  Christoph Hellwig's observation that set_page_dirty_lock() is
   usually correct, and set_page_dirty() is usually a bug, or at least
   questionable, within a put_user_page*() calling chain.

This leads to the following API choices:

    * put_user_pages_dirty_lock(page, npages, make_dirty)

    * There is no put_user_pages_dirty(). You have to
      hand code that, in the rare case that it's
      required.

[jhubbard@nvidia.com: remove unused variable in siw_free_plist()]
  Link: http://lkml.kernel.org/r/20190729074306.10368-1-jhubbard@nvidia.com
Link: http://lkml.kernel.org/r/20190724044537.10458-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/infiniband/core/umem.c             |   5 +-
 drivers/infiniband/hw/hfi1/user_pages.c    |   5 +-
 drivers/infiniband/hw/qib/qib_user_pages.c |   5 +-
 drivers/infiniband/hw/usnic/usnic_uiom.c   |   5 +-
 drivers/infiniband/sw/siw/siw_mem.c        |  10 +--
 include/linux/mm.h                         |   5 +-
 mm/gup.c                                   | 115 +++++++++++++----------------
 7 files changed, 58 insertions(+), 92 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 41f9e268e3fb..24244a2f68cc 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -54,10 +54,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 
 	for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
 		page = sg_page_iter_page(&sg_iter);
-		if (umem->writable && dirty)
-			put_user_pages_dirty_lock(&page, 1);
-		else
-			put_user_page(page);
+		put_user_pages_dirty_lock(&page, 1, umem->writable && dirty);
 	}
 
 	sg_free_table(&umem->sg_head);
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index b89a9b9aef7a..469acb961fbd 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np
 void hfi1_release_user_pages(struct mm_struct *mm, struct page **p,
 			     size_t npages, bool dirty)
 {
-	if (dirty)
-		put_user_pages_dirty_lock(p, npages);
-	else
-		put_user_pages(p, npages);
+	put_user_pages_dirty_lock(p, npages, dirty);
 
 	if (mm) { /* during close after signal, mm can be NULL */
 		atomic64_sub(npages, &mm->pinned_vm);
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index bfbfbb7e0ff4..6bf764e41891 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -40,10 +40,7 @@
 static void __qib_release_user_pages(struct page **p, size_t num_pages,
 				     int dirty)
 {
-	if (dirty)
-		put_user_pages_dirty_lock(p, num_pages);
-	else
-		put_user_pages(p, num_pages);
+	put_user_pages_dirty_lock(p, num_pages, dirty);
 }
 
 /**
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 0b0237d41613..62e6ffa9ad78 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
 			page = sg_page(sg);
 			pa = sg_phys(sg);
-			if (dirty)
-				put_user_pages_dirty_lock(&page, 1);
-			else
-				put_user_page(page);
+			put_user_pages_dirty_lock(&page, 1, dirty);
 			usnic_dbg("pa: %pa\n", &pa);
 		}
 		kfree(chunk);
diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index 87a56039f0ef..e99983f07663 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -63,15 +63,7 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
 static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
 			   bool dirty)
 {
-	struct page **p = chunk->plist;
-
-	while (num_pages--) {
-		if (!PageDirty(*p) && dirty)
-			put_user_pages_dirty_lock(p, 1);
-		else
-			put_user_page(*p);
-		p++;
-	}
+	put_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
 }
 
 void siw_umem_release(struct siw_umem *umem, bool dirty)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 69b7314c8d24..57a9fa34f159 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1075,8 +1075,9 @@ static inline void put_user_page(struct page *page)
 	put_page(page);
 }
 
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty);
+
 void put_user_pages(struct page **pages, unsigned long npages);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
diff --git a/mm/gup.c b/mm/gup.c
index 84a36d80dd2e..012060efddf1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,85 +29,70 @@ struct follow_page_context {
 	unsigned int page_mask;
 };
 
-typedef int (*set_dirty_func_t)(struct page *page);
-
-static void __put_user_pages_dirty(struct page **pages,
-				   unsigned long npages,
-				   set_dirty_func_t sdf)
-{
-	unsigned long index;
-
-	for (index = 0; index < npages; index++) {
-		struct page *page = compound_head(pages[index]);
-
-		/*
-		 * Checking PageDirty at this point may race with
-		 * clear_page_dirty_for_io(), but that's OK. Two key cases:
-		 *
-		 * 1) This code sees the page as already dirty, so it skips
-		 * the call to sdf(). That could happen because
-		 * clear_page_dirty_for_io() called page_mkclean(),
-		 * followed by set_page_dirty(). However, now the page is
-		 * going to get written back, which meets the original
-		 * intention of setting it dirty, so all is well:
-		 * clear_page_dirty_for_io() goes on to call
-		 * TestClearPageDirty(), and write the page back.
-		 *
-		 * 2) This code sees the page as clean, so it calls sdf().
-		 * The page stays dirty, despite being written back, so it
-		 * gets written back again in the next writeback cycle.
-		 * This is harmless.
-		 */
-		if (!PageDirty(page))
-			sdf(page);
-
-		put_user_page(page);
-	}
-}
-
 /**
- * put_user_pages_dirty() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
+ * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
+ * @pages:  array of pages to be maybe marked dirty, and definitely released.
  * @npages: number of pages in the @pages array.
+ * @make_dirty: whether to mark the pages dirty
  *
  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
  * variants called on that page.
  *
  * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
+ * compound page) dirty, if @make_dirty is true, and if the page was previously
+ * listed as clean. In any case, releases all pages using put_user_page(),
+ * possibly via put_user_pages(), for the non-dirty case.
  *
  * Please see the put_user_page() documentation for details.
  *
- * set_page_dirty(), which does not lock the page, is used here.
- * Therefore, it is the caller's responsibility to ensure that this is
- * safe. If not, then put_user_pages_dirty_lock() should be called instead.
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), put_user_page().
  *
  */
-void put_user_pages_dirty(struct page **pages, unsigned long npages)
+void put_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+			       bool make_dirty)
 {
-	__put_user_pages_dirty(pages, npages, set_page_dirty);
-}
-EXPORT_SYMBOL(put_user_pages_dirty);
+	unsigned long index;
 
-/**
- * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages
- * @pages:  array of pages to be marked dirty and released.
- * @npages: number of pages in the @pages array.
- *
- * For each page in the @pages array, make that page (or its head page, if a
- * compound page) dirty, if it was previously listed as clean. Then, release
- * the page using put_user_page().
- *
- * Please see the put_user_page() documentation for details.
- *
- * This is just like put_user_pages_dirty(), except that it invokes
- * set_page_dirty_lock(), instead of set_page_dirty().
- *
- */
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages)
-{
-	__put_user_pages_dirty(pages, npages, set_page_dirty_lock);
+	/*
+	 * TODO: this can be optimized for huge pages: if a series of pages is
+	 * physically contiguous and part of the same compound page, then a
+	 * single operation to the head page should suffice.
+	 */
+
+	if (!make_dirty) {
+		put_user_pages(pages, npages);
+		return;
+	}
+
+	for (index = 0; index < npages; index++) {
+		struct page *page = compound_head(pages[index]);
+		/*
+		 * Checking PageDirty at this point may race with
+		 * clear_page_dirty_for_io(), but that's OK. Two key
+		 * cases:
+		 *
+		 * 1) This code sees the page as already dirty, so it
+		 * skips the call to set_page_dirty(). That could happen
+		 * because clear_page_dirty_for_io() called
+		 * page_mkclean(), followed by set_page_dirty().
+		 * However, now the page is going to get written back,
+		 * which meets the original intention of setting it
+		 * dirty, so all is well: clear_page_dirty_for_io() goes
+		 * on to call TestClearPageDirty(), and write the page
+		 * back.
+		 *
+		 * 2) This code sees the page as clean, so it calls
+		 * set_page_dirty(). The page stays dirty, despite being
+		 * written back, so it gets written back again in the
+		 * next writeback cycle. This is harmless.
+		 */
+		if (!PageDirty(page))
+			set_page_dirty_lock(page);
+		put_user_page(page);
+	}
 }
 EXPORT_SYMBOL(put_user_pages_dirty_lock);
 
-- 
cgit v1.2.3


From 13224794cb0832caa403ad583d8605202cabc6bc Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 23 Sep 2019 15:35:19 -0700
Subject: mm: remove quicklist page table caches

Patch series "mm: remove quicklist page table caches".

A while ago Nicholas proposed to remove quicklist page table caches [1].

I've rebased his patch on the curren upstream and switched ia64 and sh to
use generic versions of PTE allocation.

[1] https://lore.kernel.org/linux-mm/20190711030339.20892-1-npiggin@gmail.com

This patch (of 3):

Remove page table allocator "quicklists".  These have been around for a
long time, but have not got much traction in the last decade and are only
used on ia64 and sh architectures.

The numbers in the initial commit look interesting but probably don't
apply anymore.  If anybody wants to resurrect this it's in the git
history, but it's unhelpful to have this code and divergent allocator
behaviour for minor archs.

Also it might be better to instead make more general improvements to page
allocator if this is still so slow.

Link: http://lkml.kernel.org/r/1565250728-21721-2-git-send-email-rppt@linux.ibm.com
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/pgalloc.h      |   2 -
 arch/arc/include/asm/pgalloc.h        |   1 -
 arch/arm/include/asm/pgalloc.h        |   2 -
 arch/arm64/include/asm/pgalloc.h      |   2 -
 arch/csky/include/asm/pgalloc.h       |   2 -
 arch/hexagon/include/asm/pgalloc.h    |   2 -
 arch/ia64/Kconfig                     |   4 --
 arch/ia64/include/asm/pgalloc.h       |  32 ++++-------
 arch/m68k/include/asm/pgtable_mm.h    |   2 -
 arch/m68k/include/asm/pgtable_no.h    |   2 -
 arch/microblaze/include/asm/pgalloc.h |  89 +++--------------------------
 arch/microblaze/mm/pgtable.c          |   4 --
 arch/mips/include/asm/pgalloc.h       |   2 -
 arch/nds32/include/asm/pgalloc.h      |   2 -
 arch/nios2/include/asm/pgalloc.h      |   2 -
 arch/openrisc/include/asm/pgalloc.h   |   2 -
 arch/parisc/include/asm/pgalloc.h     |   2 -
 arch/powerpc/include/asm/pgalloc.h    |   2 -
 arch/riscv/include/asm/pgalloc.h      |   4 --
 arch/s390/include/asm/pgtable.h       |   1 -
 arch/sh/include/asm/pgalloc.h         |  22 ++------
 arch/sh/mm/Kconfig                    |   3 -
 arch/sparc/include/asm/pgalloc_32.h   |   2 -
 arch/sparc/include/asm/pgalloc_64.h   |   2 -
 arch/sparc/mm/init_32.c               |   1 -
 arch/um/include/asm/pgalloc.h         |   2 -
 arch/unicore32/include/asm/pgalloc.h  |   2 -
 arch/x86/include/asm/pgtable_32.h     |   1 -
 arch/x86/include/asm/pgtable_64.h     |   1 -
 arch/xtensa/include/asm/tlbflush.h    |   3 -
 fs/proc/meminfo.c                     |   4 --
 include/asm-generic/pgalloc.h         |   5 --
 include/linux/quicklist.h             |  94 -------------------------------
 kernel/sched/idle.c                   |   1 -
 lib/show_mem.c                        |   5 --
 mm/Kconfig                            |   5 --
 mm/Makefile                           |   1 -
 mm/mmu_gather.c                       |   2 -
 mm/quicklist.c                        | 103 ----------------------------------
 39 files changed, 25 insertions(+), 395 deletions(-)
 delete mode 100644 include/linux/quicklist.h
 delete mode 100644 mm/quicklist.c

(limited to 'include')

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 71ded3b7d82d..eb91f1e85629 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -53,6 +53,4 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_page((unsigned long)pmd);
 }
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 9bdb8ed5b0db..4751f2251cd9 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -129,7 +129,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 
 #define __pte_free_tlb(tlb, pte, addr)  pte_free((tlb)->mm, pte)
 
-#define check_pgt_cache()   do { } while (0)
 #define pmd_pgtable(pmd)	((pgtable_t) pmd_page_vaddr(pmd))
 
 #endif /* _ASM_ARC_PGALLOC_H */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index a2a68b751971..069da393110c 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -15,8 +15,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#define check_pgt_cache()		do { } while (0)
-
 #ifdef CONFIG_MMU
 
 #define _PAGE_USER_TABLE	(PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_USER))
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 14d0bc44d451..172d76fa0245 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -15,8 +15,6 @@
 
 #include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
 
-#define check_pgt_cache()		do { } while (0)
-
 #define PGD_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
 
 #if CONFIG_PGTABLE_LEVELS > 2
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index 98c5716708d6..d089113fe41f 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -75,8 +75,6 @@ do {							\
 	tlb_remove_page(tlb, pte);			\
 } while (0)
 
-#define check_pgt_cache()	do {} while (0)
-
 extern void pagetable_init(void);
 extern void pre_mmu_init(void);
 extern void pre_trap_init(void);
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index d6544dc71258..5a6e79e7926d 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -13,8 +13,6 @@
 
 #include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
 
-#define check_pgt_cache() do {} while (0)
-
 extern unsigned long long kmap_generation;
 
 /*
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 685a3df126ca..16714477eef4 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -72,10 +72,6 @@ config 64BIT
 config ZONE_DMA32
 	def_bool y
 
-config QUICKLIST
-	bool
-	default y
-
 config MMU
 	bool
 	default y
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index c9e481023c25..b03d993f5d7e 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -19,18 +19,17 @@
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/threads.h>
-#include <linux/quicklist.h>
 
 #include <asm/mmu_context.h>
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
-	return quicklist_alloc(0, GFP_KERNEL, NULL);
+	return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-	quicklist_free(0, NULL, pgd);
+	free_page((unsigned long)pgd);
 }
 
 #if CONFIG_PGTABLE_LEVELS == 4
@@ -42,12 +41,12 @@ pgd_populate(struct mm_struct *mm, pgd_t * pgd_entry, pud_t * pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return quicklist_alloc(0, GFP_KERNEL, NULL);
+	return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	quicklist_free(0, NULL, pud);
+	free_page((unsigned long)pud);
 }
 #define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
@@ -60,12 +59,12 @@ pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd)
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return quicklist_alloc(0, GFP_KERNEL, NULL);
+	return (pmd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-	quicklist_free(0, NULL, pmd);
+	free_page((unsigned long)pmd);
 }
 
 #define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
@@ -86,14 +85,12 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte)
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
-	void *pg;
 
-	pg = quicklist_alloc(0, GFP_KERNEL, NULL);
-	if (!pg)
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
 		return NULL;
-	page = virt_to_page(pg);
 	if (!pgtable_page_ctor(page)) {
-		quicklist_free(0, NULL, pg);
+		__free_page(page);
 		return NULL;
 	}
 	return page;
@@ -101,23 +98,18 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return quicklist_alloc(0, GFP_KERNEL, NULL);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
 	pgtable_page_dtor(pte);
-	quicklist_free_page(0, NULL, pte);
+	__free_page(pte);
 }
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	quicklist_free(0, NULL, pte);
-}
-
-static inline void check_pgt_cache(void)
-{
-	quicklist_trim(0, NULL, 25, 16);
+	free_page((unsigned long)pte);
 }
 
 #define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index fde4534b974f..cc476c1d72e5 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -181,6 +181,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot);
  */
 #define pgtable_cache_init()	do { } while (0)
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif /* _M68K_PGTABLE_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index fc3a96c77bd8..69e271101223 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -60,6 +60,4 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif /* _M68KNOMMU_PGTABLE_H */
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index f4cc9ffc449e..ac0731881751 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -21,83 +21,20 @@
 #include <asm/cache.h>
 #include <asm/pgtable.h>
 
-#define PGDIR_ORDER	0
-
-/*
- * This is handled very differently on MicroBlaze since out page tables
- * are all 0's and I want to be able to use these zero'd pages elsewhere
- * as well - it gives us quite a speedup.
- * -- Cort
- */
-extern struct pgtable_cache_struct {
-	unsigned long *pgd_cache;
-	unsigned long *pte_cache;
-	unsigned long pgtable_cache_sz;
-} quicklists;
-
-#define pgd_quicklist		(quicklists.pgd_cache)
-#define pmd_quicklist		((unsigned long *)0)
-#define pte_quicklist		(quicklists.pte_cache)
-#define pgtable_cache_size	(quicklists.pgtable_cache_sz)
-
-extern unsigned long *zero_cache; /* head linked list of pre-zero'd pages */
-extern atomic_t zero_sz; /* # currently pre-zero'd pages */
-extern atomic_t zeropage_hits; /* # zero'd pages request that we've done */
-extern atomic_t zeropage_calls; /* # zero'd pages request that've been made */
-extern atomic_t zerototal; /* # pages zero'd over time */
-
-#define zero_quicklist		(zero_cache)
-#define zero_cache_sz	 	(zero_sz)
-#define zero_cache_calls	(zeropage_calls)
-#define zero_cache_hits		(zeropage_hits)
-#define zero_cache_total	(zerototal)
-
-/*
- * return a pre-zero'd page from the list,
- * return NULL if none available -- Cort
- */
-extern unsigned long get_zero_page_fast(void);
-
 extern void __bad_pte(pmd_t *pmd);
 
-static inline pgd_t *get_pgd_slow(void)
+static inline pgd_t *get_pgd(void)
 {
-	pgd_t *ret;
-
-	ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGDIR_ORDER);
-	if (ret != NULL)
-		clear_page(ret);
-	return ret;
+	return (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, 0);
 }
 
-static inline pgd_t *get_pgd_fast(void)
-{
-	unsigned long *ret;
-
-	ret = pgd_quicklist;
-	if (ret != NULL) {
-		pgd_quicklist = (unsigned long *)(*ret);
-		ret[0] = 0;
-		pgtable_cache_size--;
-	} else
-		ret = (unsigned long *)get_pgd_slow();
-	return (pgd_t *)ret;
-}
-
-static inline void free_pgd_fast(pgd_t *pgd)
-{
-	*(unsigned long **)pgd = pgd_quicklist;
-	pgd_quicklist = (unsigned long *) pgd;
-	pgtable_cache_size++;
-}
-
-static inline void free_pgd_slow(pgd_t *pgd)
+static inline void free_pgd(pgd_t *pgd)
 {
 	free_page((unsigned long)pgd);
 }
 
-#define pgd_free(mm, pgd)        free_pgd_fast(pgd)
-#define pgd_alloc(mm)		get_pgd_fast()
+#define pgd_free(mm, pgd)	free_pgd(pgd)
+#define pgd_alloc(mm)		get_pgd()
 
 #define pmd_pgtable(pmd)	pmd_page(pmd)
 
@@ -115,15 +52,14 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 	struct page *ptepage;
 
 #ifdef CONFIG_HIGHPTE
-	int flags = GFP_KERNEL | __GFP_HIGHMEM;
+	int flags = GFP_KERNEL | __GFP_ZERO | __GFP_HIGHMEM;
 #else
-	int flags = GFP_KERNEL;
+	int flags = GFP_KERNEL | __GFP_ZERO;
 #endif
 
 	ptepage = alloc_pages(flags, 0);
 	if (!ptepage)
 		return NULL;
-	clear_highpage(ptepage);
 	if (!pgtable_page_ctor(ptepage)) {
 		__free_page(ptepage);
 		return NULL;
@@ -131,13 +67,6 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 	return ptepage;
 }
 
-static inline void pte_free_fast(pte_t *pte)
-{
-	*(unsigned long **)pte = pte_quicklist;
-	pte_quicklist = (unsigned long *) pte;
-	pgtable_cache_size++;
-}
-
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
 	free_page((unsigned long)pte);
@@ -171,10 +100,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 #define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
-extern int do_check_pgt_cache(int, int);
-
 #endif /* CONFIG_MMU */
 
-#define check_pgt_cache()		do { } while (0)
-
 #endif /* _ASM_MICROBLAZE_PGALLOC_H */
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index 8fe54fda31dc..010bb9cee2e4 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -44,10 +44,6 @@ unsigned long ioremap_base;
 unsigned long ioremap_bot;
 EXPORT_SYMBOL(ioremap_bot);
 
-#ifndef CONFIG_SMP
-struct pgtable_cache_struct quicklists;
-#endif
-
 static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
 		unsigned long flags)
 {
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index aa16b85ddffc..aa73cb187a07 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -105,8 +105,6 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
-#define check_pgt_cache()	do { } while (0)
-
 extern void pagetable_init(void);
 
 #endif /* _ASM_PGALLOC_H */
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index e78b43d8389f..37125e6884d7 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -23,8 +23,6 @@
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
-#define check_pgt_cache()		do { } while (0)
-
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 4bc8cf72067e..750d18d5980b 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -45,6 +45,4 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 		tlb_remove_page((tlb), (pte));			\
 	} while (0)
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif /* _ASM_NIOS2_PGALLOC_H */
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 3d4b397c2d06..787c1b9d2f6d 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -101,6 +101,4 @@ do {					\
 
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()          do { } while (0)
-
 #endif
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index 4f2059a50fae..d98647c29b74 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -124,6 +124,4 @@ pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 2b2c60a1a66d..6dd78a2dc03a 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -64,8 +64,6 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
 extern struct kmem_cache *pgtable_cache[];
 #define PGT_CACHE(shift) pgtable_cache[shift]
 
-static inline void check_pgt_cache(void) { }
-
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgalloc.h>
 #else
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index 56a67d66f72f..f66a00d8cb19 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -82,8 +82,4 @@ do {                                    \
 	tlb_remove_page((tlb), pte);    \
 } while (0)
 
-static inline void check_pgt_cache(void)
-{
-}
-
 #endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0c4600725fc2..8f59454ac407 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1686,7 +1686,6 @@ extern void s390_reset_cmma(struct mm_struct *mm);
  * No page table caches to initialise
  */
 static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 
 #include <asm-generic/pgtable.h>
 
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index b56f908b1395..9e15054858b4 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -2,11 +2,8 @@
 #ifndef __ASM_SH_PGALLOC_H
 #define __ASM_SH_PGALLOC_H
 
-#include <linux/quicklist.h>
 #include <asm/page.h>
 
-#define QUICK_PT 0	/* Other page table pages that are zero on free */
-
 extern pgd_t *pgd_alloc(struct mm_struct *);
 extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
@@ -34,20 +31,18 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
  */
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
-	return quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
+	return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	struct page *page;
-	void *pg;
 
-	pg = quicklist_alloc(QUICK_PT, GFP_KERNEL, NULL);
-	if (!pg)
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
 		return NULL;
-	page = virt_to_page(pg);
 	if (!pgtable_page_ctor(page)) {
-		quicklist_free(QUICK_PT, NULL, pg);
+		__free_page(page);
 		return NULL;
 	}
 	return page;
@@ -55,13 +50,13 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	quicklist_free(QUICK_PT, NULL, pte);
+	free_page((unsigned long)pte);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
 	pgtable_page_dtor(pte);
-	quicklist_free_page(QUICK_PT, NULL, pte);
+	__free_page(pte);
 }
 
 #define __pte_free_tlb(tlb,pte,addr)			\
@@ -79,9 +74,4 @@ do {							\
 } while (0);
 #endif
 
-static inline void check_pgt_cache(void)
-{
-	quicklist_trim(QUICK_PT, NULL, 25, 16);
-}
-
 #endif /* __ASM_SH_PGALLOC_H */
diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig
index 02ed2df25a54..5c8a2ebfc720 100644
--- a/arch/sh/mm/Kconfig
+++ b/arch/sh/mm/Kconfig
@@ -1,9 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 menu "Memory management options"
 
-config QUICKLIST
-	def_bool y
-
 config MMU
         bool "Support for memory management hardware"
 	depends on !CPU_SH2
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 282be50a4adf..10538a4d1a1e 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -17,8 +17,6 @@ void srmmu_free_nocache(void *addr, int size);
 
 extern struct resource sparc_iomap;
 
-#define check_pgt_cache()	do { } while (0)
-
 pgd_t *get_pgd_fast(void);
 static inline void free_pgd_fast(pgd_t *pgd)
 {
diff --git a/arch/sparc/include/asm/pgalloc_64.h b/arch/sparc/include/asm/pgalloc_64.h
index 48abccba4991..9d3e5cc95bbb 100644
--- a/arch/sparc/include/asm/pgalloc_64.h
+++ b/arch/sparc/include/asm/pgalloc_64.h
@@ -69,8 +69,6 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage);
 #define pmd_populate(MM, PMD, PTE)		pmd_set(MM, PMD, PTE)
 #define pmd_pgtable(PMD)			((pte_t *)__pmd_page(PMD))
 
-#define check_pgt_cache()	do { } while (0)
-
 void pgtable_free(void *table, bool is_page);
 
 #ifdef CONFIG_SMP
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 046ab116cc8c..906eda1158b4 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -31,7 +31,6 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/vaddrs.h>
-#include <asm/pgalloc.h>	/* bug in asm-generic/tlb.h: check_pgt_cache */
 #include <asm/setup.h>
 #include <asm/tlb.h>
 #include <asm/prom.h>
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 023599c3fa51..446e0c0f4018 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -43,7 +43,5 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 #define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
-#define check_pgt_cache()	do { } while (0)
-
 #endif
 
diff --git a/arch/unicore32/include/asm/pgalloc.h b/arch/unicore32/include/asm/pgalloc.h
index 3f0903bd98e9..ba1c9a79993b 100644
--- a/arch/unicore32/include/asm/pgalloc.h
+++ b/arch/unicore32/include/asm/pgalloc.h
@@ -18,8 +18,6 @@
 #define __HAVE_ARCH_PTE_ALLOC_ONE
 #include <asm-generic/pgalloc.h>
 
-#define check_pgt_cache()		do { } while (0)
-
 #define _PAGE_USER_TABLE	(PMD_TYPE_TABLE | PMD_PRESENT)
 #define _PAGE_KERNEL_TABLE	(PMD_TYPE_TABLE | PMD_PRESENT)
 
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index c78da8eda8f2..b9b9f8aa963e 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -30,7 +30,6 @@ extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
 static inline void pgtable_cache_init(void) { }
-static inline void check_pgt_cache(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 4990d26dfc73..a26d2d58b9c9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -242,7 +242,6 @@ extern void cleanup_highmap(void);
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
 #define pgtable_cache_init()   do { } while (0)
-#define check_pgt_cache()      do { } while (0)
 
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
diff --git a/arch/xtensa/include/asm/tlbflush.h b/arch/xtensa/include/asm/tlbflush.h
index 06875feb27c2..856e2da2e397 100644
--- a/arch/xtensa/include/asm/tlbflush.h
+++ b/arch/xtensa/include/asm/tlbflush.h
@@ -160,9 +160,6 @@ static inline void invalidate_dtlb_mapping (unsigned address)
 		invalidate_dtlb_entry(tlb_entry);
 }
 
-#define check_pgt_cache()	do { } while (0)
-
-
 /*
  * DO NOT USE THESE FUNCTIONS.  These instructions aren't part of the Xtensa
  * ISA and exist only for test purposes..
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 465ea0153b2a..4bd80e68eb03 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -8,7 +8,6 @@
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
 #include <linux/percpu.h>
-#include <linux/quicklist.h>
 #include <linux/seq_file.h>
 #include <linux/swap.h>
 #include <linux/vmstat.h>
@@ -106,9 +105,6 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		   global_zone_page_state(NR_KERNEL_STACK_KB));
 	show_val_kb(m, "PageTables:     ",
 		    global_zone_page_state(NR_PAGETABLE));
-#ifdef CONFIG_QUICKLIST
-	show_val_kb(m, "Quicklists:     ", quicklist_total_size());
-#endif
 
 	show_val_kb(m, "NFS_Unstable:   ",
 		    global_node_page_state(NR_UNSTABLE_NFS));
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 8476175c07e7..6f8cc06ee44e 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -102,11 +102,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 	__free_page(pte_page);
 }
 
-#else /* CONFIG_MMU */
-
-/* This is enough for a nommu architecture */
-#define check_pgt_cache()          do { } while (0)
-
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
diff --git a/include/linux/quicklist.h b/include/linux/quicklist.h
deleted file mode 100644
index 034982c98c8b..000000000000
--- a/include/linux/quicklist.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_QUICKLIST_H
-#define LINUX_QUICKLIST_H
-/*
- * Fast allocations and disposal of pages. Pages must be in the condition
- * as needed after allocation when they are freed. Per cpu lists of pages
- * are kept that only contain node local pages.
- *
- * (C) 2007, SGI. Christoph Lameter <cl@linux.com>
- */
-#include <linux/kernel.h>
-#include <linux/gfp.h>
-#include <linux/percpu.h>
-
-#ifdef CONFIG_QUICKLIST
-
-struct quicklist {
-	void *page;
-	int nr_pages;
-};
-
-DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
-
-/*
- * The two key functions quicklist_alloc and quicklist_free are inline so
- * that they may be custom compiled for the platform.
- * Specifying a NULL ctor can remove constructor support. Specifying
- * a constant quicklist allows the determination of the exact address
- * in the per cpu area.
- *
- * The fast patch in quicklist_alloc touched only a per cpu cacheline and
- * the first cacheline of the page itself. There is minmal overhead involved.
- */
-static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
-{
-	struct quicklist *q;
-	void **p = NULL;
-
-	q =&get_cpu_var(quicklist)[nr];
-	p = q->page;
-	if (likely(p)) {
-		q->page = p[0];
-		p[0] = NULL;
-		q->nr_pages--;
-	}
-	put_cpu_var(quicklist);
-	if (likely(p))
-		return p;
-
-	p = (void *)__get_free_page(flags | __GFP_ZERO);
-	if (ctor && p)
-		ctor(p);
-	return p;
-}
-
-static inline void __quicklist_free(int nr, void (*dtor)(void *), void *p,
-	struct page *page)
-{
-	struct quicklist *q;
-
-	q = &get_cpu_var(quicklist)[nr];
-	*(void **)p = q->page;
-	q->page = p;
-	q->nr_pages++;
-	put_cpu_var(quicklist);
-}
-
-static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
-{
-	__quicklist_free(nr, dtor, pp, virt_to_page(pp));
-}
-
-static inline void quicklist_free_page(int nr, void (*dtor)(void *),
-							struct page *page)
-{
-	__quicklist_free(nr, dtor, page_address(page), page);
-}
-
-void quicklist_trim(int nr, void (*dtor)(void *),
-	unsigned long min_pages, unsigned long max_free);
-
-unsigned long quicklist_total_size(void);
-
-#else
-
-static inline unsigned long quicklist_total_size(void)
-{
-	return 0;
-}
-
-#endif
-
-#endif /* LINUX_QUICKLIST_H */
-
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c892c6280c9f..8dad5aa600ea 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -238,7 +238,6 @@ static void do_idle(void)
 	tick_nohz_idle_enter();
 
 	while (!need_resched()) {
-		check_pgt_cache();
 		rmb();
 
 		local_irq_disable();
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 5c86ef4c899f..1c26c14ffbb9 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/quicklist.h>
 #include <linux/cma.h>
 
 void show_mem(unsigned int filter, nodemask_t *nodemask)
@@ -39,10 +38,6 @@ void show_mem(unsigned int filter, nodemask_t *nodemask)
 #ifdef CONFIG_CMA
 	printk("%lu pages cma reserved\n", totalcma_pages);
 #endif
-#ifdef CONFIG_QUICKLIST
-	printk("%lu pages in pagetable cache\n",
-		quicklist_total_size());
-#endif
 #ifdef CONFIG_MEMORY_FAILURE
 	printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages));
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 2fe4902ad755..f88be1cbcfb2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -273,11 +273,6 @@ config BOUNCE
 	  by default when ZONE_DMA or HIGHMEM is selected, but you
 	  may say n to override this.
 
-config NR_QUICK
-	int
-	depends on QUICKLIST
-	default "1"
-
 config VIRT_TO_BUS
 	bool
 	help
diff --git a/mm/Makefile b/mm/Makefile
index d0b295c3b764..d11de59e9c3c 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -72,7 +72,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)		+= memtest.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 8c943a6e1696..7d70e5c78f97 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -271,8 +271,6 @@ void tlb_finish_mmu(struct mmu_gather *tlb,
 
 	tlb_flush_mmu(tlb);
 
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
 #ifndef CONFIG_HAVE_MMU_GATHER_NO_GATHER
 	tlb_batch_list_free(tlb);
 #endif
diff --git a/mm/quicklist.c b/mm/quicklist.c
deleted file mode 100644
index 5e98ac78e410..000000000000
--- a/mm/quicklist.c
+++ /dev/null
@@ -1,103 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Quicklist support.
- *
- * Quicklists are light weight lists of pages that have a defined state
- * on alloc and free. Pages must be in the quicklist specific defined state
- * (zero by default) when the page is freed. It seems that the initial idea
- * for such lists first came from Dave Miller and then various other people
- * improved on it.
- *
- * Copyright (C) 2007 SGI,
- * 	Christoph Lameter <cl@linux.com>
- * 		Generalized, added support for multiple lists and
- * 		constructors / destructors.
- */
-#include <linux/kernel.h>
-
-#include <linux/gfp.h>
-#include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/quicklist.h>
-
-DEFINE_PER_CPU(struct quicklist [CONFIG_NR_QUICK], quicklist);
-
-#define FRACTION_OF_NODE_MEM	16
-
-static unsigned long max_pages(unsigned long min_pages)
-{
-	unsigned long node_free_pages, max;
-	int node = numa_node_id();
-	struct zone *zones = NODE_DATA(node)->node_zones;
-	int num_cpus_on_node;
-
-	node_free_pages =
-#ifdef CONFIG_ZONE_DMA
-		zone_page_state(&zones[ZONE_DMA], NR_FREE_PAGES) +
-#endif
-#ifdef CONFIG_ZONE_DMA32
-		zone_page_state(&zones[ZONE_DMA32], NR_FREE_PAGES) +
-#endif
-		zone_page_state(&zones[ZONE_NORMAL], NR_FREE_PAGES);
-
-	max = node_free_pages / FRACTION_OF_NODE_MEM;
-
-	num_cpus_on_node = cpumask_weight(cpumask_of_node(node));
-	max /= num_cpus_on_node;
-
-	return max(max, min_pages);
-}
-
-static long min_pages_to_free(struct quicklist *q,
-	unsigned long min_pages, long max_free)
-{
-	long pages_to_free;
-
-	pages_to_free = q->nr_pages - max_pages(min_pages);
-
-	return min(pages_to_free, max_free);
-}
-
-/*
- * Trim down the number of pages in the quicklist
- */
-void quicklist_trim(int nr, void (*dtor)(void *),
-	unsigned long min_pages, unsigned long max_free)
-{
-	long pages_to_free;
-	struct quicklist *q;
-
-	q = &get_cpu_var(quicklist)[nr];
-	if (q->nr_pages > min_pages) {
-		pages_to_free = min_pages_to_free(q, min_pages, max_free);
-
-		while (pages_to_free > 0) {
-			/*
-			 * We pass a gfp_t of 0 to quicklist_alloc here
-			 * because we will never call into the page allocator.
-			 */
-			void *p = quicklist_alloc(nr, 0, NULL);
-
-			if (dtor)
-				dtor(p);
-			free_page((unsigned long)p);
-			pages_to_free--;
-		}
-	}
-	put_cpu_var(quicklist);
-}
-
-unsigned long quicklist_total_size(void)
-{
-	unsigned long count = 0;
-	int cpu;
-	struct quicklist *ql, *q;
-
-	for_each_online_cpu(cpu) {
-		ql = per_cpu(quicklist, cpu);
-		for (q = ql; q < ql + CONFIG_NR_QUICK; q++)
-			count += q->nr_pages;
-	}
-	return count;
-}
-
-- 
cgit v1.2.3


From 782de70c42930baae55234f3df0dc90774924447 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Mon, 23 Sep 2019 15:35:31 -0700
Subject: mm: consolidate pgtable_cache_init() and pgd_cache_init()

Both pgtable_cache_init() and pgd_cache_init() are used to initialize kmem
cache for page table allocations on several architectures that do not use
PAGE_SIZE tables for one or more levels of the page table hierarchy.

Most architectures do not implement these functions and use __weak default
NOP implementation of pgd_cache_init().  Since there is no such default
for pgtable_cache_init(), its empty stub is duplicated among most
architectures.

Rename the definitions of pgd_cache_init() to pgtable_cache_init() and
drop empty stubs of pgtable_cache_init().

Link: http://lkml.kernel.org/r/1566457046-22637-1-git-send-email-rppt@linux.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Will Deacon <will@kernel.org>		[arm64]
Acked-by: Thomas Gleixner <tglx@linutronix.de>	[x86]
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/pgtable.h      |  5 -----
 arch/arc/include/asm/pgtable.h        |  5 -----
 arch/arm/include/asm/pgtable-nommu.h  |  5 -----
 arch/arm/include/asm/pgtable.h        |  2 --
 arch/arm64/include/asm/pgtable.h      |  2 --
 arch/arm64/mm/pgd.c                   |  2 +-
 arch/c6x/include/asm/pgtable.h        |  5 -----
 arch/csky/include/asm/pgtable.h       |  5 -----
 arch/h8300/include/asm/pgtable.h      |  6 ------
 arch/hexagon/include/asm/pgtable.h    |  3 ---
 arch/hexagon/mm/Makefile              |  2 +-
 arch/hexagon/mm/pgalloc.c             | 10 ----------
 arch/ia64/include/asm/pgtable.h       |  5 -----
 arch/m68k/include/asm/pgtable_mm.h    |  5 -----
 arch/m68k/include/asm/pgtable_no.h    |  5 -----
 arch/microblaze/include/asm/pgtable.h |  7 -------
 arch/mips/include/asm/pgtable.h       |  5 -----
 arch/nds32/include/asm/pgtable.h      |  2 --
 arch/nios2/include/asm/pgtable.h      |  2 --
 arch/openrisc/include/asm/pgtable.h   |  5 -----
 arch/parisc/include/asm/pgtable.h     |  2 --
 arch/powerpc/include/asm/pgtable.h    |  1 -
 arch/riscv/include/asm/pgtable.h      |  5 -----
 arch/s390/include/asm/pgtable.h       |  5 -----
 arch/sh/include/asm/pgtable.h         |  5 -----
 arch/sh/mm/nommu.c                    |  4 ----
 arch/sparc/include/asm/pgtable_32.h   |  5 -----
 arch/sparc/include/asm/pgtable_64.h   |  1 -
 arch/um/include/asm/pgtable.h         |  2 --
 arch/unicore32/include/asm/pgtable.h  |  2 --
 arch/x86/include/asm/pgtable_32.h     |  1 -
 arch/x86/include/asm/pgtable_64.h     |  2 --
 arch/x86/mm/pgtable.c                 |  6 +-----
 arch/xtensa/include/asm/pgtable.h     |  1 -
 include/asm-generic/pgtable.h         |  2 +-
 init/main.c                           |  3 +--
 36 files changed, 5 insertions(+), 130 deletions(-)
 delete mode 100644 arch/hexagon/mm/pgalloc.c

(limited to 'include')

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 89c2032f9960..065b57f408c3 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -359,11 +359,6 @@ extern void paging_init(void);
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 /* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT.  */
 #define HAVE_ARCH_UNMAPPED_AREA
 
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1d87c18a2976..7addd0301c51 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -395,11 +395,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* to cope with aliasing VIPT cache */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index d0de24f06724..010fa1a35a68 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -70,11 +70,6 @@ typedef pte_t *pte_addr_t;
  */
 extern unsigned int kobjsize(const void *objp);
 
-/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()	do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index f2e990dc27e7..3ae120cd1715 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -368,8 +368,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 57427d17580e..7576df00eb50 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -861,8 +861,6 @@ extern int kern_addr_valid(unsigned long addr);
 
 #include <asm-generic/pgtable.h>
 
-static inline void pgtable_cache_init(void) { }
-
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 7548f9ca1f11..4a64089e5771 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 		kmem_cache_free(pgd_cache, pgd);
 }
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
 	if (PGD_SIZE == PAGE_SIZE)
 		return;
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 0bd805964ea6..0b6919c00413 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -59,11 +59,6 @@ extern unsigned long empty_zero_page;
 
 #define swapper_pg_dir ((pgd_t *) 0)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * c6x is !MMU, so define the simpliest implementation
  */
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index c429a6f347de..0040b3a05b61 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -296,11 +296,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)	(1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do {} while (0)
-
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h
index a99caa49d265..4d00152fab58 100644
--- a/arch/h8300/include/asm/pgtable.h
+++ b/arch/h8300/include/asm/pgtable.h
@@ -4,7 +4,6 @@
 #define __ARCH_USE_5LEVEL_HACK
 #include <asm-generic/pgtable-nopud.h>
 #include <asm-generic/pgtable.h>
-#define pgtable_cache_init()   do { } while (0)
 extern void paging_init(void);
 #define PAGE_NONE		__pgprot(0)    /* these mean nothing to NO_MM */
 #define PAGE_SHARED		__pgprot(0)    /* these mean nothing to NO_MM */
@@ -34,11 +33,6 @@ static inline int pte_file(pte_t pte) { return 0; }
 extern unsigned int kobjsize(const void *objp);
 extern int is_in_rom(unsigned long);
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()   do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h
index a3ff6d24c09e..2fec20ad939e 100644
--- a/arch/hexagon/include/asm/pgtable.h
+++ b/arch/hexagon/include/asm/pgtable.h
@@ -431,9 +431,6 @@ static inline int pte_exec(pte_t pte)
 
 #define __pte_offset(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
 
-/*  I think this is in case we have page table caches; needed by init/main.c  */
-#define pgtable_cache_init()    do { } while (0)
-
 /*
  * Swap/file PTE definitions.  If _PAGE_PRESENT is zero, the rest of the PTE is
  * interpreted as swap information.  The remaining free bits are interpreted as
diff --git a/arch/hexagon/mm/Makefile b/arch/hexagon/mm/Makefile
index 1894263ae5bc..893838499591 100644
--- a/arch/hexagon/mm/Makefile
+++ b/arch/hexagon/mm/Makefile
@@ -3,5 +3,5 @@
 # Makefile for Hexagon memory management subsystem
 #
 
-obj-y := init.o pgalloc.o ioremap.o uaccess.o vm_fault.o cache.o
+obj-y := init.o ioremap.o uaccess.o vm_fault.o cache.o
 obj-y += copy_to_user.o copy_from_user.o strnlen_user.o vm_tlb.o
diff --git a/arch/hexagon/mm/pgalloc.c b/arch/hexagon/mm/pgalloc.c
deleted file mode 100644
index 4d4316140237..000000000000
--- a/arch/hexagon/mm/pgalloc.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- */
-
-#include <linux/init.h>
-
-void __init pgtable_cache_init(void)
-{
-}
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index b1e7468eb65a..d602e7c622db 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -566,11 +566,6 @@ extern struct page *zero_page_memmap_ptr;
 #define KERNEL_TR_PAGE_SHIFT	_PAGE_SIZE_64M
 #define KERNEL_TR_PAGE_SIZE	(1 << KERNEL_TR_PAGE_SHIFT)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 /* These tell get_user_pages() that the first gate page is accessible from user-level.  */
 #define FIXADDR_USER_START	GATE_ADDR
 #ifdef HAVE_BUGGY_SEGREL
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index cc476c1d72e5..646c174fff99 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -176,9 +176,4 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot);
 #include <asm-generic/pgtable.h>
 #endif /* !__ASSEMBLY__ */
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 #endif /* _M68K_PGTABLE_H */
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index 69e271101223..c18165b0d904 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -44,11 +44,6 @@ extern void paging_init(void);
  */
 #define ZERO_PAGE(vaddr)	(virt_to_page(0))
 
-/*
- * No page table caches to initialise.
- */
-#define pgtable_cache_init()	do { } while (0)
-
 /*
  * All 32bit addresses are effectively valid for vmalloc...
  * Sort of meaningless for non-VM targets.
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 142d3f004848..954b69af451f 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -46,8 +46,6 @@ extern int mem_init_done;
 
 #define swapper_pg_dir ((pgd_t *) NULL)
 
-#define pgtable_cache_init()	do {} while (0)
-
 #define arch_enter_lazy_cpu_mode()	do {} while (0)
 
 #define pgprot_noncached_wc(prot)	prot
@@ -526,11 +524,6 @@ extern unsigned long iopa(unsigned long addr);
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 #define kern_addr_valid(addr)	(1)
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 void do_page_fault(struct pt_regs *regs, unsigned long address,
 		   unsigned long error_code);
 
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 4dca733d5076..f85bd5b15f51 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -661,9 +661,4 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 #endif /* _ASM_PGTABLE_H */
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index c70cc56bec09..0588ec99725c 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -403,8 +403,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  * into virtual address `from'
  */
 
-#define pgtable_cache_init()       do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASMNDS32_PGTABLE_H */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index 95237b7f6fc1..99985d8b7166 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -291,8 +291,6 @@ static inline void pte_clear(struct mm_struct *mm,
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init()		do { } while (0)
-
 extern void __init paging_init(void);
 extern void __init mmu_init(void);
 
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 2fe9ff5b5d6f..248d22d8faa7 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -443,11 +443,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #include <asm-generic/pgtable.h>
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()		do { } while (0)
-
 typedef pte_t *pte_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 6d58c1739b42..4ac374b3a99f 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -132,8 +132,6 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define PTRS_PER_PTE    (1UL << BITS_PER_PTE)
 
 /* Definitions for 2nd level */
-#define pgtable_cache_init()	do { } while (0)
-
 #define PMD_SHIFT       (PLD_SHIFT + BITS_PER_PTE)
 #define PMD_SIZE	(1UL << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE-1))
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 8b7865a2d576..4053b2ab427c 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -87,7 +87,6 @@ extern unsigned long ioremap_bot;
 unsigned long vmalloc_to_phys(void *vmalloc_addr);
 
 void pgtable_cache_add(unsigned int shift);
-void pgtable_cache_init(void);
 
 #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
 void mark_initmem_nx(void);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 80905b27ee98..c60123f018f5 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -424,11 +424,6 @@ extern void *dtb_early_va;
 extern void setup_bootmem(void);
 extern void paging_init(void);
 
-static inline void pgtable_cache_init(void)
-{
-	/* No page table caches to initialize */
-}
-
 #define VMALLOC_SIZE     (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END      (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 8f59454ac407..36c578c0ff96 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1682,11 +1682,6 @@ extern void s390_reset_cmma(struct mm_struct *mm);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-/*
- * No page table caches to initialise
- */
-static inline void pgtable_cache_init(void) { }
-
 #include <asm-generic/pgtable.h>
 
 #endif /* _S390_PAGE_H */
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 9085d1142fa3..cbd0f3c55a0c 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -123,11 +123,6 @@ typedef pte_t *pte_addr_t;
 
 #define pte_pfn(x)		((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
 
-/*
- * Initialise the page table caches
- */
-extern void pgtable_cache_init(void);
-
 struct vm_area_struct;
 struct mm_struct;
 
diff --git a/arch/sh/mm/nommu.c b/arch/sh/mm/nommu.c
index cc779a90d917..dca946f426c6 100644
--- a/arch/sh/mm/nommu.c
+++ b/arch/sh/mm/nommu.c
@@ -97,7 +97,3 @@ void __init page_table_range_init(unsigned long start, unsigned long end,
 void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 }
-
-void pgtable_cache_init(void)
-{
-}
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 4eebed6c6781..31da44826645 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -445,9 +445,4 @@ static inline int io_remap_pfn_range(struct vm_area_struct *vma,
 /* We provide our own get_unmapped_area to cope with VA holes for userland */
 #define HAVE_ARCH_UNMAPPED_AREA
 
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-
 #endif /* !(_SPARC_PGTABLE_H) */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 1599de730532..b57f9c631eca 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -1135,7 +1135,6 @@ unsigned long get_fb_unmapped_area(struct file *filp, unsigned long,
 				   unsigned long);
 #define HAVE_ARCH_FB_UNMAPPED_AREA
 
-void pgtable_cache_init(void);
 void sun4v_register_fault_status(void);
 void sun4v_ktsb_register(void);
 void __init cheetah_ecache_flush_init(void);
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index e4d3ed980d82..36a44d58f373 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -32,8 +32,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 /* zero page used for uninitialized stuff */
 extern unsigned long *empty_zero_page;
 
-#define pgtable_cache_init() do ; while (0)
-
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
  * current 8MB value just means that there will be a 8MB "hole" after the
  * physical memory until the kernel virtual memory starts.  That means that
diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h
index 126e961a8cb0..c8f7ba12f309 100644
--- a/arch/unicore32/include/asm/pgtable.h
+++ b/arch/unicore32/include/asm/pgtable.h
@@ -285,8 +285,6 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 #include <asm-generic/pgtable.h>
 
-#define pgtable_cache_init() do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __UNICORE_PGTABLE_H__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index b9b9f8aa963e..0dca7f7aeff2 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -29,7 +29,6 @@ extern pgd_t swapper_pg_dir[1024];
 extern pgd_t initial_page_table[1024];
 extern pmd_t initial_pg_pmd[];
 
-static inline void pgtable_cache_init(void) { }
 void paging_init(void);
 void sync_initial_page_table(void);
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index a26d2d58b9c9..0b6c4042942a 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -241,8 +241,6 @@ extern void cleanup_highmap(void);
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 
-#define pgtable_cache_init()   do { } while (0)
-
 #define PAGE_AGP    PAGE_KERNEL_NOCACHE
 #define HAVE_PAGE_AGP 1
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 44816ff6411f..463940faf52f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -357,7 +357,7 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
 
 static struct kmem_cache *pgd_cache;
 
-void __init pgd_cache_init(void)
+void __init pgtable_cache_init(void)
 {
 	/*
 	 * When PAE kernel is running as a Xen domain, it does not use
@@ -402,10 +402,6 @@ static inline void _pgd_free(pgd_t *pgd)
 }
 #else
 
-void __init pgd_cache_init(void)
-{
-}
-
 static inline pgd_t *_pgd_alloc(void)
 {
 	return (pgd_t *)__get_free_pages(GFP_PGTABLE_USER,
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index ce3ff5e591b9..3f7fe5a8c286 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -238,7 +238,6 @@ extern void paging_init(void);
 # define swapper_pg_dir NULL
 static inline void paging_init(void) { }
 #endif
-static inline void pgtable_cache_init(void) { }
 
 /*
  * The pmd contains the kernel virtual address of the pte page.
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 75d9d68a6de7..fae6abb3d586 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1126,7 +1126,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 static inline void init_espfix_bsp(void) { }
 #endif
 
-extern void __init pgd_cache_init(void);
+extern void __init pgtable_cache_init(void);
 
 #ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
 static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
diff --git a/init/main.c b/init/main.c
index 3ca67e8b92fd..99a5f55e0d02 100644
--- a/init/main.c
+++ b/init/main.c
@@ -507,7 +507,7 @@ void __init __weak mem_encrypt_init(void) { }
 
 void __init __weak poking_init(void) { }
 
-void __init __weak pgd_cache_init(void) { }
+void __init __weak pgtable_cache_init(void) { }
 
 bool initcall_debug;
 core_param(initcall_debug, initcall_debug, bool, 0644);
@@ -565,7 +565,6 @@ static void __init mm_init(void)
 	init_espfix_bsp();
 	/* Should be run after espfix64 is set up. */
 	pti_init();
-	pgd_cache_init();
 }
 
 void __init __weak arch_call_rest_init(void)
-- 
cgit v1.2.3


From 902ce63b337381092ff865f542e854ff3d0ebe2b Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 23 Sep 2019 15:35:46 -0700
Subject: driver/base/memory.c: validate memory block size early

Let's validate the memory block size early, when initializing the memory
device infrastructure.  Fail hard in case the value is not suitable.

As nobody checks the return value of memory_dev_init(), turn it into a
void function and fail with a panic in all scenarios instead.  Otherwise,
we'll crash later during boot when core/drivers expect that the memory
device infrastructure (including memory_block_size_bytes()) works as
expected.

I think long term, we should move the whole memory block size
configuration (set_memory_block_size_order() and
memory_block_size_bytes()) into drivers/base/memory.c.

Link: http://lkml.kernel.org/r/20190806090142.22709-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c  | 31 +++++++++----------------------
 include/linux/memory.h |  6 +++---
 2 files changed, 12 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 63fc5aa51c21..763154c82eb3 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -100,21 +100,6 @@ unsigned long __weak memory_block_size_bytes(void)
 }
 EXPORT_SYMBOL_GPL(memory_block_size_bytes);
 
-static unsigned long get_memory_block_size(void)
-{
-	unsigned long block_sz;
-
-	block_sz = memory_block_size_bytes();
-
-	/* Validate blk_sz is a power of 2 and not less than section size */
-	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
-		WARN_ON(1);
-		block_sz = MIN_MEMORY_BLOCK_SIZE;
-	}
-
-	return block_sz;
-}
-
 /*
  * Show the first physical section index (number) of this memory block.
  */
@@ -461,7 +446,7 @@ static DEVICE_ATTR_RO(removable);
 static ssize_t block_size_bytes_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
-	return sprintf(buf, "%lx\n", get_memory_block_size());
+	return sprintf(buf, "%lx\n", memory_block_size_bytes());
 }
 
 static DEVICE_ATTR_RO(block_size_bytes);
@@ -812,19 +797,22 @@ static const struct attribute_group *memory_root_attr_groups[] = {
 /*
  * Initialize the sysfs support for memory devices...
  */
-int __init memory_dev_init(void)
+void __init memory_dev_init(void)
 {
 	int ret;
 	int err;
 	unsigned long block_sz, nr;
 
+	/* Validate the configured memory block size */
+	block_sz = memory_block_size_bytes();
+	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
+		panic("Memory block size not suitable: 0x%lx\n", block_sz);
+	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+
 	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
 	if (ret)
 		goto out;
 
-	block_sz = get_memory_block_size();
-	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-
 	/*
 	 * Create entries for memory sections that were found
 	 * during boot and have been initialized
@@ -840,8 +828,7 @@ int __init memory_dev_init(void)
 
 out:
 	if (ret)
-		printk(KERN_ERR "%s() failed: %d\n", __func__, ret);
-	return ret;
+		panic("%s() failed: %d\n", __func__, ret);
 }
 
 /**
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 02e633f3ede0..b3d9df060626 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -80,9 +80,9 @@ struct mem_section;
 #define IPC_CALLBACK_PRI        10
 
 #ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
-static inline int memory_dev_init(void)
+static inline void memory_dev_init(void)
 {
-	return 0;
+	return;
 }
 static inline int register_memory_notifier(struct notifier_block *nb)
 {
@@ -113,7 +113,7 @@ extern int register_memory_isolate_notifier(struct notifier_block *nb);
 extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size);
 void remove_memory_block_devices(unsigned long start, unsigned long size);
-extern int memory_dev_init(void);
+extern void memory_dev_init(void);
 extern int memory_notify(unsigned long val, void *v);
 extern int memory_isolate_notify(unsigned long val, void *v);
 extern struct memory_block *find_memory_block(struct mem_section *);
-- 
cgit v1.2.3


From b6c88d3b9d38f9448e0fcf44847a075ea81d5ca2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 23 Sep 2019 15:35:49 -0700
Subject: drivers/base/memory.c: don't store end_section_nr in memory blocks

Each memory block spans the same amount of sections/pages/bytes.  The size
is determined before the first memory block is created.  No need to store
what we can easily calculate - and the calculations even look simpler now.

Michal brought up the idea of variable-sized memory blocks.  However, if
we ever implement something like this, we will need an API compatibility
switch and reworks at various places (most code assumes a fixed memory
block size).  So let's cleanup what we have right now.

While at it, fix the variable naming in register_mem_sect_under_node() -
we no longer talk about a single section.

Link: http://lkml.kernel.org/r/20190809110200.2746-1-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c  |  1 -
 drivers/base/node.c    | 10 +++++-----
 include/linux/memory.h |  1 -
 mm/memory_hotplug.c    |  2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 763154c82eb3..6bea4f3f8040 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -656,7 +656,6 @@ static int init_memory_block(struct memory_block **memory,
 		return -ENOMEM;
 
 	mem->start_section_nr = block_id * sections_per_block;
-	mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
 	mem->state = state;
 	start_pfn = section_nr_to_pfn(mem->start_section_nr);
 	mem->phys_device = arch_get_memory_phys_device(start_pfn);
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 840c95baa1d8..257449cf061f 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -756,13 +756,13 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 static int register_mem_sect_under_node(struct memory_block *mem_blk,
 					 void *arg)
 {
+	unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
+	unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
+	unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
 	int ret, nid = *(int *)arg;
-	unsigned long pfn, sect_start_pfn, sect_end_pfn;
+	unsigned long pfn;
 
-	sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
-	sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr);
-	sect_end_pfn += PAGES_PER_SECTION - 1;
-	for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
+	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
 		int page_nid;
 
 		/*
diff --git a/include/linux/memory.h b/include/linux/memory.h
index b3d9df060626..0ebb105eb261 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -25,7 +25,6 @@
 
 struct memory_block {
 	unsigned long start_section_nr;
-	unsigned long end_section_nr;
 	unsigned long state;		/* serialized by the dev->lock */
 	int section_count;		/* serialized by mem_sysfs_mutex */
 	int online_type;		/* for passing data to online routine */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 5b8811945bbb..3706a137d880 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1654,7 +1654,7 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
 		phys_addr_t beginpa, endpa;
 
 		beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));
-		endpa = PFN_PHYS(section_nr_to_pfn(mem->end_section_nr + 1))-1;
+		endpa = beginpa + memory_block_size_bytes() - 1;
 		pr_warn("removing memory fails, because memory [%pa-%pa] is onlined\n",
 			&beginpa, &endpa);
 
-- 
cgit v1.2.3


From 688fcbfc06e4fdfbb7e1d5a942a1460fe6379d2d Mon Sep 17 00:00:00 2001
From: Pengfei Li <lpf.vector@gmail.com>
Date: Mon, 23 Sep 2019 15:36:39 -0700
Subject: mm/vmalloc: modify struct vmap_area to reduce its size

Objective
---------

The current implementation of struct vmap_area wasted space.

After applying this commit, sizeof(struct vmap_area) has been
reduced from 11 words to 8 words.

Description
-----------

1) Pack "subtree_max_size", "vm" and "purge_list".  This is no problem
   because

A) "subtree_max_size" is only used when vmap_area is in "free" tree

B) "vm" is only used when vmap_area is in "busy" tree

C) "purge_list" is only used when vmap_area is in vmap_purge_list

2) Eliminate "flags".

;Since only one flag VM_VM_AREA is being used, and the same thing can be
done by judging whether "vm" is NULL, then the "flags" can be eliminated.

Link: http://lkml.kernel.org/r/20190716152656.12255-3-lpf.vector@gmail.com
Signed-off-by: Pengfei Li <lpf.vector@gmail.com>
Suggested-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sonymobile.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 20 +++++++++++++-------
 mm/vmalloc.c            | 24 ++++++++++--------------
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index dfa718ffdd4f..4e7809408073 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -53,15 +53,21 @@ struct vmap_area {
 	unsigned long va_start;
 	unsigned long va_end;
 
-	/*
-	 * Largest available free size in subtree.
-	 */
-	unsigned long subtree_max_size;
-	unsigned long flags;
 	struct rb_node rb_node;         /* address sorted rbtree */
 	struct list_head list;          /* address sorted list */
-	struct llist_node purge_list;    /* "lazy purge" list */
-	struct vm_struct *vm;
+
+	/*
+	 * The following three variables can be packed, because
+	 * a vmap_area object is always one of the three states:
+	 *    1) in "free" tree (root is vmap_area_root)
+	 *    2) in "busy" tree (root is free_vmap_area_root)
+	 *    3) in purge list  (head is vmap_purge_list)
+	 */
+	union {
+		unsigned long subtree_max_size; /* in "free" tree */
+		struct vm_struct *vm;           /* in "busy" tree */
+		struct llist_node purge_list;   /* in purge list */
+	};
 };
 
 /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d535ef125bda..f095843fc243 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -329,7 +329,6 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define DEBUG_AUGMENT_PROPAGATE_CHECK 0
 #define DEBUG_AUGMENT_LOWEST_MATCH_CHECK 0
 
-#define VM_VM_AREA	0x04
 
 static DEFINE_SPINLOCK(vmap_area_lock);
 /* Export for kexec only */
@@ -1115,7 +1114,7 @@ retry:
 
 	va->va_start = addr;
 	va->va_end = addr + size;
-	va->flags = 0;
+	va->vm = NULL;
 	insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
 
 	spin_unlock(&vmap_area_lock);
@@ -1928,7 +1927,6 @@ void __init vmalloc_init(void)
 		if (WARN_ON_ONCE(!va))
 			continue;
 
-		va->flags = VM_VM_AREA;
 		va->va_start = (unsigned long)tmp->addr;
 		va->va_end = va->va_start + tmp->size;
 		va->vm = tmp;
@@ -2026,7 +2024,6 @@ static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
 	vm->size = va->va_end - va->va_start;
 	vm->caller = caller;
 	va->vm = vm;
-	va->flags |= VM_VM_AREA;
 	spin_unlock(&vmap_area_lock);
 }
 
@@ -2131,10 +2128,10 @@ struct vm_struct *find_vm_area(const void *addr)
 	struct vmap_area *va;
 
 	va = find_vmap_area((unsigned long)addr);
-	if (va && va->flags & VM_VM_AREA)
-		return va->vm;
+	if (!va)
+		return NULL;
 
-	return NULL;
+	return va->vm;
 }
 
 /**
@@ -2155,11 +2152,10 @@ struct vm_struct *remove_vm_area(const void *addr)
 
 	spin_lock(&vmap_area_lock);
 	va = __find_vmap_area((unsigned long)addr);
-	if (va && va->flags & VM_VM_AREA) {
+	if (va && va->vm) {
 		struct vm_struct *vm = va->vm;
 
 		va->vm = NULL;
-		va->flags &= ~VM_VM_AREA;
 		spin_unlock(&vmap_area_lock);
 
 		kasan_free_shadow(vm);
@@ -2862,7 +2858,7 @@ long vread(char *buf, char *addr, unsigned long count)
 		if (!count)
 			break;
 
-		if (!(va->flags & VM_VM_AREA))
+		if (!va->vm)
 			continue;
 
 		vm = va->vm;
@@ -2942,7 +2938,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
 		if (!count)
 			break;
 
-		if (!(va->flags & VM_VM_AREA))
+		if (!va->vm)
 			continue;
 
 		vm = va->vm;
@@ -3485,10 +3481,10 @@ static int s_show(struct seq_file *m, void *p)
 	va = list_entry(p, struct vmap_area, list);
 
 	/*
-	 * s_show can encounter race with remove_vm_area, !VM_VM_AREA on
-	 * behalf of vmap area is being tear down or vm_map_ram allocation.
+	 * s_show can encounter race with remove_vm_area, !vm on behalf
+	 * of vmap area is being tear down or vm_map_ram allocation.
 	 */
-	if (!(va->flags & VM_VM_AREA)) {
+	if (!va->vm) {
 		seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
 			(void *)va->va_start, (void *)va->va_end,
 			va->va_end - va->va_start);
-- 
cgit v1.2.3


From 494330855641269c8a49f1580f0d4e2ead693245 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 23 Sep 2019 15:37:32 -0700
Subject: mm, compaction: raise compaction priority after it withdrawns

Mike Kravetz reports that "hugetlb allocations could stall for minutes or
hours when should_compact_retry() would return true more often then it
should.  Specifically, this was in the case where compact_result was
COMPACT_DEFERRED and COMPACT_PARTIAL_SKIPPED and no progress was being
made."

The problem is that the compaction_withdrawn() test in
should_compact_retry() includes compaction outcomes that are only possible
on low compaction priority, and results in a retry without increasing the
priority.  This may result in furter reclaim, and more incomplete
compaction attempts.

With this patch, compaction priority is raised when possible, or
should_compact_retry() returns false.

The COMPACT_SKIPPED result doesn't really fit together with the other
outcomes in compaction_withdrawn(), as that's a result caused by
insufficient order-0 pages, not due to low compaction priority.  With this
patch, it is moved to a new compaction_needs_reclaim() function, and for
that outcome we keep the current logic of retrying if it looks like
reclaim will be able to help.

Link: http://lkml.kernel.org/r/20190806014744.15446-4-mike.kravetz@oracle.com
Reported-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Tested-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 22 +++++++++++++++++-----
 mm/page_alloc.c            | 16 ++++++++++++----
 2 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 9569e7c786d3..4b898cdbdf05 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -129,11 +129,8 @@ static inline bool compaction_failed(enum compact_result result)
 	return false;
 }
 
-/*
- * Compaction  has backed off for some reason. It might be throttling or
- * lock contention. Retrying is still worthwhile.
- */
-static inline bool compaction_withdrawn(enum compact_result result)
+/* Compaction needs reclaim to be performed first, so it can continue. */
+static inline bool compaction_needs_reclaim(enum compact_result result)
 {
 	/*
 	 * Compaction backed off due to watermark checks for order-0
@@ -142,6 +139,16 @@ static inline bool compaction_withdrawn(enum compact_result result)
 	if (result == COMPACT_SKIPPED)
 		return true;
 
+	return false;
+}
+
+/*
+ * Compaction has backed off for some reason after doing some work or none
+ * at all. It might be throttling or lock contention. Retrying might be still
+ * worthwhile, but with a higher priority if allowed.
+ */
+static inline bool compaction_withdrawn(enum compact_result result)
+{
 	/*
 	 * If compaction is deferred for high-order allocations, it is
 	 * because sync compaction recently failed. If this is the case
@@ -207,6 +214,11 @@ static inline bool compaction_failed(enum compact_result result)
 	return false;
 }
 
+static inline bool compaction_needs_reclaim(enum compact_result result)
+{
+	return false;
+}
+
 static inline bool compaction_withdrawn(enum compact_result result)
 {
 	return true;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index df566c0f6729..591e026534c2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3954,15 +3954,23 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
 	if (compaction_failed(compact_result))
 		goto check_priority;
 
+	/*
+	 * compaction was skipped because there are not enough order-0 pages
+	 * to work with, so we retry only if it looks like reclaim can help.
+	 */
+	if (compaction_needs_reclaim(compact_result)) {
+		ret = compaction_zonelist_suitable(ac, order, alloc_flags);
+		goto out;
+	}
+
 	/*
 	 * make sure the compaction wasn't deferred or didn't bail out early
 	 * due to locks contention before we declare that we should give up.
-	 * But do not retry if the given zonelist is not suitable for
-	 * compaction.
+	 * But the next retry should use a higher priority if allowed, so
+	 * we don't just keep bailing out endlessly.
 	 */
 	if (compaction_withdrawn(compact_result)) {
-		ret = compaction_zonelist_suitable(ac, order, alloc_flags);
-		goto out;
+		goto check_priority;
 	}
 
 	/*
-- 
cgit v1.2.3


From 9ef258bad7af2f6abee2bdfaaa6c41890d4f4db6 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 23 Sep 2019 15:37:41 -0700
Subject: thp: update split_huge_page_pmd() comment

According to 78ddc5347341 ("thp: rename split_huge_page_pmd() to
split_huge_pmd()"), update related comment.

Link: http://lkml.kernel.org/r/20190731033406.185285-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/book3s64/hash_utils.c | 2 +-
 include/asm-generic/pgtable.h         | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 3410ea9f4de1..6c123760164e 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1748,7 +1748,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
 	/*
 	 * IF we try to do a HUGE PTE update after a withdraw is done.
 	 * we will find the below NULL. This happens when we do
-	 * split_huge_page_pmd
+	 * split_huge_pmd
 	 */
 	if (!hpte_slot_array)
 		return;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index fae6abb3d586..818691846c90 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1002,9 +1002,8 @@ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
  * need this). If THP is not enabled, the pmd can't go away under the
  * code even if MADV_DONTNEED runs, but if THP is enabled we need to
  * run a pmd_trans_unstable before walking the ptes after
- * split_huge_page_pmd returns (because it may have run when the pmd
- * become null, but then a page fault can map in a THP and not a
- * regular page).
+ * split_huge_pmd returns (because it may have run when the pmd become
+ * null, but then a page fault can map in a THP and not a regular page).
  */
 static inline int pmd_trans_unstable(pmd_t *pmd)
 {
-- 
cgit v1.2.3


From 60fbf0ab5da1c360e02b7f7d882bf1c0d8f7e32a Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Sep 2019 15:37:54 -0700
Subject: mm,thp: stats for file backed THP

In preparation for non-shmem THP, this patch adds a few stats and exposes
them in /proc/meminfo, /sys/bus/node/devices/<node>/meminfo, and
/proc/<pid>/task/<tid>/smaps.

This patch is mostly a rewrite of Kirill A.  Shutemov's earlier version:
https://lkml.kernel.org/r/20170126115819.58875-5-kirill.shutemov@linux.intel.com/

Link: http://lkml.kernel.org/r/20190801184244.3169074-5-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Acked-by: Rik van Riel <riel@surriel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    | 6 ++++++
 fs/proc/meminfo.c      | 4 ++++
 fs/proc/task_mmu.c     | 4 +++-
 include/linux/mmzone.h | 2 ++
 mm/vmstat.c            | 2 ++
 5 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 257449cf061f..296546ffed6c 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -427,6 +427,8 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       "Node %d AnonHugePages:  %8lu kB\n"
 		       "Node %d ShmemHugePages: %8lu kB\n"
 		       "Node %d ShmemPmdMapped: %8lu kB\n"
+		       "Node %d FileHugePages: %8lu kB\n"
+		       "Node %d FilePmdMapped: %8lu kB\n"
 #endif
 			,
 		       nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
@@ -452,6 +454,10 @@ static ssize_t node_read_meminfo(struct device *dev,
 		       nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
 				       HPAGE_PMD_NR),
 		       nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
+				       HPAGE_PMD_NR),
+		       nid, K(node_page_state(pgdat, NR_FILE_THPS) *
+				       HPAGE_PMD_NR),
+		       nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
 				       HPAGE_PMD_NR)
 #endif
 		       );
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 4bd80e68eb03..ac9247371871 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -132,6 +132,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		    global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
 	show_val_kb(m, "ShmemPmdMapped: ",
 		    global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+	show_val_kb(m, "FileHugePages: ",
+		    global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+	show_val_kb(m, "FilePmdMapped: ",
+		    global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
 #endif
 
 #ifdef CONFIG_CMA
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index ea1630465474..9442631fd4af 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -417,6 +417,7 @@ struct mem_size_stats {
 	unsigned long lazyfree;
 	unsigned long anonymous_thp;
 	unsigned long shmem_thp;
+	unsigned long file_thp;
 	unsigned long swap;
 	unsigned long shared_hugetlb;
 	unsigned long private_hugetlb;
@@ -588,7 +589,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
 	else if (is_zone_device_page(page))
 		/* pass */;
 	else
-		VM_BUG_ON_PAGE(1, page);
+		mss->file_thp += HPAGE_PMD_SIZE;
 	smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked);
 }
 #else
@@ -809,6 +810,7 @@ static void __show_smap(struct seq_file *m, const struct mem_size_stats *mss,
 	SEQ_PUT_DEC(" kB\nLazyFree:       ", mss->lazyfree);
 	SEQ_PUT_DEC(" kB\nAnonHugePages:  ", mss->anonymous_thp);
 	SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
+	SEQ_PUT_DEC(" kB\nFilePmdMapped: ", mss->file_thp);
 	SEQ_PUT_DEC(" kB\nShared_Hugetlb: ", mss->shared_hugetlb);
 	seq_put_decimal_ull_width(m, " kB\nPrivate_Hugetlb: ",
 				  mss->private_hugetlb >> 10, 7);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3f38c30d2f13..aafb7c38c627 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -235,6 +235,8 @@ enum node_stat_item {
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 	NR_SHMEM_THPS,
 	NR_SHMEM_PMDMAPPED,
+	NR_FILE_THPS,
+	NR_FILE_PMDMAPPED,
 	NR_ANON_THPS,
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_VMSCAN_WRITE,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index fd7e16ca6996..6afc892a148a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1158,6 +1158,8 @@ const char * const vmstat_text[] = {
 	"nr_shmem",
 	"nr_shmem_hugepages",
 	"nr_shmem_pmdmapped",
+	"nr_file_hugepages",
+	"nr_file_pmdmapped",
 	"nr_anon_transparent_hugepages",
 	"nr_unstable",
 	"nr_vmscan_write",
-- 
cgit v1.2.3


From 09d91cda0e8207c1f14ee0d572f61a53dbcdaf85 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Sep 2019 15:38:03 -0700
Subject: mm,thp: avoid writes to file with THP in pagecache

In previous patch, an application could put part of its text section in
THP via madvise().  These THPs will be protected from writes when the
application is still running (TXTBSY).  However, after the application
exits, the file is available for writes.

This patch avoids writes to file THP by dropping page cache for the file
when the file is open for write.  A new counter nr_thps is added to struct
address_space.  In do_dentry_open(), if the file is open for write and
nr_thps is non-zero, we drop page cache for the whole file.

Link: http://lkml.kernel.org/r/20190801184244.3169074-8-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Rik van Riel <riel@surriel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c         |  3 +++
 fs/open.c          |  8 ++++++++
 include/linux/fs.h | 32 ++++++++++++++++++++++++++++++++
 mm/filemap.c       |  1 +
 mm/khugepaged.c    |  4 +++-
 5 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 64bf28cf05cd..fef457a42882 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -181,6 +181,9 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->flags = 0;
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	atomic_set(&mapping->nr_thps, 0);
+#endif
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->private_data = NULL;
 	mapping->writeback_index = 0;
diff --git a/fs/open.c b/fs/open.c
index a59abe3c669a..c60cd22cc052 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -818,6 +818,14 @@ static int do_dentry_open(struct file *f,
 		if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO)
 			return -EINVAL;
 	}
+
+	/*
+	 * XXX: Huge page cache doesn't support writing yet. Drop all page
+	 * cache for this file before processing writes.
+	 */
+	if ((f->f_mode & FMODE_WRITE) && filemap_nr_thps(inode->i_mapping))
+		truncate_pagecache(inode, 0);
+
 	return 0;
 
 cleanup_all:
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 866268c2c6e3..b0c6b0d34d02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -429,6 +429,7 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
  * @i_pages: Cached pages.
  * @gfp_mask: Memory allocation flags to use for allocating pages.
  * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @nr_thps: Number of THPs in the pagecache (non-shmem only).
  * @i_mmap: Tree of private and shared mappings.
  * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
  * @nrpages: Number of page entries, protected by the i_pages lock.
@@ -446,6 +447,10 @@ struct address_space {
 	struct xarray		i_pages;
 	gfp_t			gfp_mask;
 	atomic_t		i_mmap_writable;
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	/* number of thp, only for non-shmem files */
+	atomic_t		nr_thps;
+#endif
 	struct rb_root_cached	i_mmap;
 	struct rw_semaphore	i_mmap_rwsem;
 	unsigned long		nrpages;
@@ -2798,6 +2803,33 @@ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 	return errseq_sample(&mapping->wb_err);
 }
 
+static inline int filemap_nr_thps(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	return atomic_read(&mapping->nr_thps);
+#else
+	return 0;
+#endif
+}
+
+static inline void filemap_nr_thps_inc(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	atomic_inc(&mapping->nr_thps);
+#else
+	WARN_ON_ONCE(1);
+#endif
+}
+
+static inline void filemap_nr_thps_dec(struct address_space *mapping)
+{
+#ifdef CONFIG_READ_ONLY_THP_FOR_FS
+	atomic_dec(&mapping->nr_thps);
+#else
+	WARN_ON_ONCE(1);
+#endif
+}
+
 extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
 			   int datasync);
 extern int vfs_fsync(struct file *file, int datasync);
diff --git a/mm/filemap.c b/mm/filemap.c
index 91fe3a08ca4a..1146fcfa3215 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -205,6 +205,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 	} else if (PageTransHuge(page)) {
 		__dec_node_page_state(page, NR_FILE_THPS);
+		filemap_nr_thps_dec(mapping);
 	}
 
 	/*
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8607c77431b3..e89430ec5267 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1514,8 +1514,10 @@ out_unlock:
 
 	if (is_shmem)
 		__inc_node_page_state(new_page, NR_SHMEM_THPS);
-	else
+	else {
 		__inc_node_page_state(new_page, NR_FILE_THPS);
+		filemap_nr_thps_inc(mapping);
+	}
 
 	if (nr_none) {
 		struct zone *zone = page_zone(new_page);
-- 
cgit v1.2.3


From 364c1eebe453f06f0c1e837eb155a5725c9cd272 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Mon, 23 Sep 2019 15:38:06 -0700
Subject: mm: thp: extract split_queue_* into a struct

Patch series "Make deferred split shrinker memcg aware", v6.

Currently THP deferred split shrinker is not memcg aware, this may cause
premature OOM with some configuration.  For example the below test would
run into premature OOM easily:

$ cgcreate -g memory:thp
$ echo 4G > /sys/fs/cgroup/memory/thp/memory/limit_in_bytes
$ cgexec -g memory:thp transhuge-stress 4000

transhuge-stress comes from kernel selftest.

It is easy to hit OOM, but there are still a lot THP on the deferred split
queue, memcg direct reclaim can't touch them since the deferred split
shrinker is not memcg aware.

Convert deferred split shrinker memcg aware by introducing per memcg
deferred split queue.  The THP should be on either per node or per memcg
deferred split queue if it belongs to a memcg.  When the page is
immigrated to the other memcg, it will be immigrated to the target memcg's
deferred split queue too.

Reuse the second tail page's deferred_list for per memcg list since the
same THP can't be on multiple deferred split queues.

Make deferred split shrinker not depend on memcg kmem since it is not
slab.  It doesn't make sense to not shrink THP even though memcg kmem is
disabled.

With the above change the test demonstrated above doesn't trigger OOM even
though with cgroup.memory=nokmem.

This patch (of 4):

Put split_queue, split_queue_lock and split_queue_len into a struct in
order to reduce code duplication when we convert deferred_split to memcg
aware in the later patches.

Link: http://lkml.kernel.org/r/1565144277-36240-2-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Suggested-by: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 12 +++++++++---
 mm/huge_memory.c       | 45 +++++++++++++++++++++++++--------------------
 mm/page_alloc.c        |  8 +++++---
 3 files changed, 39 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index aafb7c38c627..bda20282746b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -679,6 +679,14 @@ struct zonelist {
 extern struct page *mem_map;
 #endif
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+struct deferred_split {
+	spinlock_t split_queue_lock;
+	struct list_head split_queue;
+	unsigned long split_queue_len;
+};
+#endif
+
 /*
  * On NUMA machines, each NUMA node would have a pg_data_t to describe
  * it's memory layout. On UMA machines there is a single pglist_data which
@@ -758,9 +766,7 @@ typedef struct pglist_data {
 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	spinlock_t split_queue_lock;
-	struct list_head split_queue;
-	unsigned long split_queue_len;
+	struct deferred_split deferred_split_queue;
 #endif
 
 	/* Fields commonly accessed by the page reclaim scanner */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 483b07b2d6ae..c642c0394690 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2691,6 +2691,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
 	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
+	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
 	int count, mapcount, extra_pins, ret;
@@ -2777,17 +2778,17 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	}
 
 	/* Prevent deferred_split_scan() touching ->_refcount */
-	spin_lock(&pgdata->split_queue_lock);
+	spin_lock(&ds_queue->split_queue_lock);
 	count = page_count(head);
 	mapcount = total_mapcount(head);
 	if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
 		if (!list_empty(page_deferred_list(head))) {
-			pgdata->split_queue_len--;
+			ds_queue->split_queue_len--;
 			list_del(page_deferred_list(head));
 		}
 		if (mapping)
 			__dec_node_page_state(page, NR_SHMEM_THPS);
-		spin_unlock(&pgdata->split_queue_lock);
+		spin_unlock(&ds_queue->split_queue_lock);
 		__split_huge_page(page, list, end, flags);
 		if (PageSwapCache(head)) {
 			swp_entry_t entry = { .val = page_private(head) };
@@ -2804,7 +2805,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 			dump_page(page, "total_mapcount(head) > 0");
 			BUG();
 		}
-		spin_unlock(&pgdata->split_queue_lock);
+		spin_unlock(&ds_queue->split_queue_lock);
 fail:		if (mapping)
 			xa_unlock(&mapping->i_pages);
 		spin_unlock_irqrestore(&pgdata->lru_lock, flags);
@@ -2827,52 +2828,56 @@ out:
 void free_transhuge_page(struct page *page)
 {
 	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	if (!list_empty(page_deferred_list(page))) {
-		pgdata->split_queue_len--;
+		ds_queue->split_queue_len--;
 		list_del(page_deferred_list(page));
 	}
-	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 	free_compound_page(page);
 }
 
 void deferred_split_huge_page(struct page *page)
 {
 	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
+	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
 	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
-	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	if (list_empty(page_deferred_list(page))) {
 		count_vm_event(THP_DEFERRED_SPLIT_PAGE);
-		list_add_tail(page_deferred_list(page), &pgdata->split_queue);
-		pgdata->split_queue_len++;
+		list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
+		ds_queue->split_queue_len++;
 	}
-	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 }
 
 static unsigned long deferred_split_count(struct shrinker *shrink,
 		struct shrink_control *sc)
 {
 	struct pglist_data *pgdata = NODE_DATA(sc->nid);
-	return READ_ONCE(pgdata->split_queue_len);
+	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+	return READ_ONCE(ds_queue->split_queue_len);
 }
 
 static unsigned long deferred_split_scan(struct shrinker *shrink,
 		struct shrink_control *sc)
 {
 	struct pglist_data *pgdata = NODE_DATA(sc->nid);
+	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
 	unsigned long flags;
 	LIST_HEAD(list), *pos, *next;
 	struct page *page;
 	int split = 0;
 
-	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	/* Take pin on all head pages to avoid freeing them under us */
-	list_for_each_safe(pos, next, &pgdata->split_queue) {
+	list_for_each_safe(pos, next, &ds_queue->split_queue) {
 		page = list_entry((void *)pos, struct page, mapping);
 		page = compound_head(page);
 		if (get_page_unless_zero(page)) {
@@ -2880,12 +2885,12 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 		} else {
 			/* We lost race with put_compound_page() */
 			list_del_init(page_deferred_list(page));
-			pgdata->split_queue_len--;
+			ds_queue->split_queue_len--;
 		}
 		if (!--sc->nr_to_scan)
 			break;
 	}
-	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
 	list_for_each_safe(pos, next, &list) {
 		page = list_entry((void *)pos, struct page, mapping);
@@ -2899,15 +2904,15 @@ next:
 		put_page(page);
 	}
 
-	spin_lock_irqsave(&pgdata->split_queue_lock, flags);
-	list_splice_tail(&list, &pgdata->split_queue);
-	spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
+	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
+	list_splice_tail(&list, &ds_queue->split_queue);
+	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 
 	/*
 	 * Stop shrinker if we didn't split any page, but the queue is empty.
 	 * This can happen if pages were freed under us.
 	 */
-	if (!split && list_empty(&pgdata->split_queue))
+	if (!split && list_empty(&ds_queue->split_queue))
 		return SHRINK_STOP;
 	return split;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 591e026534c2..a41436cca563 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6646,9 +6646,11 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void pgdat_init_split_queue(struct pglist_data *pgdat)
 {
-	spin_lock_init(&pgdat->split_queue_lock);
-	INIT_LIST_HEAD(&pgdat->split_queue);
-	pgdat->split_queue_len = 0;
+	struct deferred_split *ds_queue = &pgdat->deferred_split_queue;
+
+	spin_lock_init(&ds_queue->split_queue_lock);
+	INIT_LIST_HEAD(&ds_queue->split_queue);
+	ds_queue->split_queue_len = 0;
 }
 #else
 static void pgdat_init_split_queue(struct pglist_data *pgdat) {}
-- 
cgit v1.2.3


From 0a432dcbeb32edcd211a5d8f7847d0da7642a8b4 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Mon, 23 Sep 2019 15:38:12 -0700
Subject: mm: shrinker: make shrinker not depend on memcg kmem

Currently shrinker is just allocated and can work when memcg kmem is
enabled.  But, THP deferred split shrinker is not slab shrinker, it
doesn't make too much sense to have such shrinker depend on memcg kmem.
It should be able to reclaim THP even though memcg kmem is disabled.

Introduce a new shrinker flag, SHRINKER_NONSLAB, for non-slab shrinker.
When memcg kmem is disabled, just such shrinkers can be called in
shrinking memcg slab.

[yang.shi@linux.alibaba.com: add comment]
  Link: http://lkml.kernel.org/r/1566496227-84952-4-git-send-email-yang.shi@linux.alibaba.com
Link: http://lkml.kernel.org/r/1565144277-36240-4-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 19 ++++++++-------
 include/linux/shrinker.h   |  7 +++++-
 mm/memcontrol.c            |  9 +------
 mm/vmscan.c                | 60 ++++++++++++++++++++++++----------------------
 4 files changed, 49 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad8f1a397ae4..a3c0a639c824 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -128,9 +128,8 @@ struct mem_cgroup_per_node {
 
 	struct mem_cgroup_reclaim_iter	iter[DEF_PRIORITY + 1];
 
-#ifdef CONFIG_MEMCG_KMEM
 	struct memcg_shrinker_map __rcu	*shrinker_map;
-#endif
+
 	struct rb_node		tree_node;	/* RB tree node */
 	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
@@ -1311,6 +1310,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 	} while ((memcg = parent_mem_cgroup(memcg)));
 	return false;
 }
+
+extern int memcg_expand_shrinker_maps(int new_id);
+
+extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+				   int nid, int shrinker_id);
 #else
 #define mem_cgroup_sockets_enabled 0
 static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
@@ -1319,6 +1323,11 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 {
 	return false;
 }
+
+static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
+					  int nid, int shrinker_id)
+{
+}
 #endif
 
 struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
@@ -1390,10 +1399,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return memcg ? memcg->kmemcg_id : -1;
 }
 
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-				   int nid, int shrinker_id);
 #else
 
 static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
@@ -1435,8 +1440,6 @@ static inline void memcg_put_cache_ids(void)
 {
 }
 
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
-					  int nid, int shrinker_id) { }
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 9443cafd1969..0f80123650e2 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -69,7 +69,7 @@ struct shrinker {
 
 	/* These are for internal use */
 	struct list_head list;
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 	/* ID in shrinker_idr */
 	int id;
 #endif
@@ -81,6 +81,11 @@ struct shrinker {
 /* Flags */
 #define SHRINKER_NUMA_AWARE	(1 << 0)
 #define SHRINKER_MEMCG_AWARE	(1 << 1)
+/*
+ * It just makes sense when the shrinker is also MEMCG_AWARE for now,
+ * non-MEMCG_AWARE shrinker should not have this flag set.
+ */
+#define SHRINKER_NONSLAB	(1 << 2)
 
 extern int prealloc_shrinker(struct shrinker *shrinker);
 extern void register_shrinker_prepared(struct shrinker *shrinker);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7bb2971dc75e..a385a7cb7d9f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -318,6 +318,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_kmem_enabled_key);
 EXPORT_SYMBOL(memcg_kmem_enabled_key);
 
 struct workqueue_struct *memcg_kmem_cache_wq;
+#endif
 
 static int memcg_shrinker_map_size;
 static DEFINE_MUTEX(memcg_shrinker_map_mutex);
@@ -441,14 +442,6 @@ void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
 	}
 }
 
-#else /* CONFIG_MEMCG_KMEM */
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
-	return 0;
-}
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg) { }
-#endif /* CONFIG_MEMCG_KMEM */
-
 /**
  * mem_cgroup_css_from_page - css of the memcg associated with a page
  * @page: page of interest
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c4ef8681637b..4911754c93b7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -171,11 +171,22 @@ int vm_swappiness = 60;
  */
 unsigned long vm_total_pages;
 
+static void set_task_reclaim_state(struct task_struct *task,
+				   struct reclaim_state *rs)
+{
+	/* Check for an overwrite */
+	WARN_ON_ONCE(rs && task->reclaim_state);
+
+	/* Check for the nulling of an already-nulled member */
+	WARN_ON_ONCE(!rs && !task->reclaim_state);
+
+	task->reclaim_state = rs;
+}
+
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
-#ifdef CONFIG_MEMCG_KMEM
-
+#ifdef CONFIG_MEMCG
 /*
  * We allow subsystems to populate their shrinker-related
  * LRU lists before register_shrinker_prepared() is called
@@ -227,30 +238,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
 	idr_remove(&shrinker_idr, id);
 	up_write(&shrinker_rwsem);
 }
-#else /* CONFIG_MEMCG_KMEM */
-static int prealloc_memcg_shrinker(struct shrinker *shrinker)
-{
-	return 0;
-}
 
-static void unregister_memcg_shrinker(struct shrinker *shrinker)
-{
-}
-#endif /* CONFIG_MEMCG_KMEM */
-
-static void set_task_reclaim_state(struct task_struct *task,
-				   struct reclaim_state *rs)
-{
-	/* Check for an overwrite */
-	WARN_ON_ONCE(rs && task->reclaim_state);
-
-	/* Check for the nulling of an already-nulled member */
-	WARN_ON_ONCE(!rs && !task->reclaim_state);
-
-	task->reclaim_state = rs;
-}
-
-#ifdef CONFIG_MEMCG
 static bool global_reclaim(struct scan_control *sc)
 {
 	return !sc->target_mem_cgroup;
@@ -305,6 +293,15 @@ static bool memcg_congested(pg_data_t *pgdat,
 
 }
 #else
+static int prealloc_memcg_shrinker(struct shrinker *shrinker)
+{
+	return 0;
+}
+
+static void unregister_memcg_shrinker(struct shrinker *shrinker)
+{
+}
+
 static bool global_reclaim(struct scan_control *sc)
 {
 	return true;
@@ -591,7 +588,7 @@ static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
 	return freed;
 }
 
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			struct mem_cgroup *memcg, int priority)
 {
@@ -599,7 +596,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 	unsigned long ret, freed = 0;
 	int i;
 
-	if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
+	if (!mem_cgroup_online(memcg))
 		return 0;
 
 	if (!down_read_trylock(&shrinker_rwsem))
@@ -625,6 +622,11 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			continue;
 		}
 
+		/* Call non-slab shrinkers even though kmem is disabled */
+		if (!memcg_kmem_enabled() &&
+		    !(shrinker->flags & SHRINKER_NONSLAB))
+			continue;
+
 		ret = do_shrink_slab(&sc, shrinker, priority);
 		if (ret == SHRINK_EMPTY) {
 			clear_bit(i, map->map);
@@ -661,13 +663,13 @@ unlock:
 	up_read(&shrinker_rwsem);
 	return freed;
 }
-#else /* CONFIG_MEMCG_KMEM */
+#else /* CONFIG_MEMCG */
 static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			struct mem_cgroup *memcg, int priority)
 {
 	return 0;
 }
-#endif /* CONFIG_MEMCG_KMEM */
+#endif /* CONFIG_MEMCG */
 
 /**
  * shrink_slab - shrink slab caches
-- 
cgit v1.2.3


From 87eaceb3faa59b9b4d940ec9554ce251325d83fe Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Mon, 23 Sep 2019 15:38:15 -0700
Subject: mm: thp: make deferred split shrinker memcg aware

Currently THP deferred split shrinker is not memcg aware, this may cause
premature OOM with some configuration.  For example the below test would
run into premature OOM easily:

$ cgcreate -g memory:thp
$ echo 4G > /sys/fs/cgroup/memory/thp/memory/limit_in_bytes
$ cgexec -g memory:thp transhuge-stress 4000

transhuge-stress comes from kernel selftest.

It is easy to hit OOM, but there are still a lot THP on the deferred split
queue, memcg direct reclaim can't touch them since the deferred split
shrinker is not memcg aware.

Convert deferred split shrinker memcg aware by introducing per memcg
deferred split queue.  The THP should be on either per node or per memcg
deferred split queue if it belongs to a memcg.  When the page is
immigrated to the other memcg, it will be immigrated to the target memcg's
deferred split queue too.

Reuse the second tail page's deferred_list for per memcg list since the
same THP can't be on multiple deferred split queues.

[yang.shi@linux.alibaba.com: simplify deferred split queue dereference per Kirill Tkhai]
  Link: http://lkml.kernel.org/r/1566496227-84952-5-git-send-email-yang.shi@linux.alibaba.com
Link: http://lkml.kernel.org/r/1565144277-36240-5-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Qian Cai <cai@lca.pw>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h    |  9 +++++++
 include/linux/memcontrol.h |  4 +++
 include/linux/mm_types.h   |  1 +
 mm/huge_memory.c           | 62 +++++++++++++++++++++++++++++++++++++++-------
 mm/memcontrol.c            | 24 ++++++++++++++++++
 5 files changed, 91 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 45ede62aa85b..61c9ffd89b05 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -267,6 +267,15 @@ static inline bool thp_migration_supported(void)
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+static inline struct list_head *page_deferred_list(struct page *page)
+{
+	/*
+	 * Global or memcg deferred list in the second tail pages is
+	 * occupied by compound_head.
+	 */
+	return &page[2].deferred_list;
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a3c0a639c824..9b60863429cc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -330,6 +330,10 @@ struct mem_cgroup {
 	struct list_head event_list;
 	spinlock_t event_list_lock;
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	struct deferred_split deferred_split_queue;
+#endif
+
 	struct mem_cgroup_per_node *nodeinfo[0];
 	/* WARNING: nodeinfo must be the last member here */
 };
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0b739f360cec..5183e0d77dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -138,6 +138,7 @@ struct page {
 		struct {	/* Second tail page of compound page */
 			unsigned long _compound_pad_1;	/* compound_head */
 			unsigned long _compound_pad_2;
+			/* For both global and memcg */
 			struct list_head deferred_list;
 		};
 		struct {	/* Page table pages */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c642c0394690..73fc517c08d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -496,11 +496,25 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
 	return pmd;
 }
 
-static inline struct list_head *page_deferred_list(struct page *page)
+#ifdef CONFIG_MEMCG
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
 {
-	/* ->lru in the tail pages is occupied by compound_head. */
-	return &page[2].deferred_list;
+	struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+	if (memcg)
+		return &memcg->deferred_split_queue;
+	else
+		return &pgdat->deferred_split_queue;
 }
+#else
+static inline struct deferred_split *get_deferred_split_queue(struct page *page)
+{
+	struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+
+	return &pgdat->deferred_split_queue;
+}
+#endif
 
 void prep_transhuge_page(struct page *page)
 {
@@ -2691,7 +2705,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 {
 	struct page *head = compound_head(page);
 	struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
-	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+	struct deferred_split *ds_queue = get_deferred_split_queue(page);
 	struct anon_vma *anon_vma = NULL;
 	struct address_space *mapping = NULL;
 	int count, mapcount, extra_pins, ret;
@@ -2827,8 +2841,7 @@ out:
 
 void free_transhuge_page(struct page *page)
 {
-	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
-	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+	struct deferred_split *ds_queue = get_deferred_split_queue(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
@@ -2842,17 +2855,37 @@ void free_transhuge_page(struct page *page)
 
 void deferred_split_huge_page(struct page *page)
 {
-	struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
-	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+	struct deferred_split *ds_queue = get_deferred_split_queue(page);
+#ifdef CONFIG_MEMCG
+	struct mem_cgroup *memcg = compound_head(page)->mem_cgroup;
+#endif
 	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 
+	/*
+	 * The try_to_unmap() in page reclaim path might reach here too,
+	 * this may cause a race condition to corrupt deferred split queue.
+	 * And, if page reclaim is already handling the same page, it is
+	 * unnecessary to handle it again in shrinker.
+	 *
+	 * Check PageSwapCache to determine if the page is being
+	 * handled by page reclaim since THP swap would add the page into
+	 * swap cache before calling try_to_unmap().
+	 */
+	if (PageSwapCache(page))
+		return;
+
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	if (list_empty(page_deferred_list(page))) {
 		count_vm_event(THP_DEFERRED_SPLIT_PAGE);
 		list_add_tail(page_deferred_list(page), &ds_queue->split_queue);
 		ds_queue->split_queue_len++;
+#ifdef CONFIG_MEMCG
+		if (memcg)
+			memcg_set_shrinker_bit(memcg, page_to_nid(page),
+					       deferred_split_shrinker.id);
+#endif
 	}
 	spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
 }
@@ -2862,6 +2895,11 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
 {
 	struct pglist_data *pgdata = NODE_DATA(sc->nid);
 	struct deferred_split *ds_queue = &pgdata->deferred_split_queue;
+
+#ifdef CONFIG_MEMCG
+	if (sc->memcg)
+		ds_queue = &sc->memcg->deferred_split_queue;
+#endif
 	return READ_ONCE(ds_queue->split_queue_len);
 }
 
@@ -2875,6 +2913,11 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
 	struct page *page;
 	int split = 0;
 
+#ifdef CONFIG_MEMCG
+	if (sc->memcg)
+		ds_queue = &sc->memcg->deferred_split_queue;
+#endif
+
 	spin_lock_irqsave(&ds_queue->split_queue_lock, flags);
 	/* Take pin on all head pages to avoid freeing them under us */
 	list_for_each_safe(pos, next, &ds_queue->split_queue) {
@@ -2921,7 +2964,8 @@ static struct shrinker deferred_split_shrinker = {
 	.count_objects = deferred_split_count,
 	.scan_objects = deferred_split_scan,
 	.seeks = DEFAULT_SEEKS,
-	.flags = SHRINKER_NUMA_AWARE,
+	.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE |
+		 SHRINKER_NONSLAB,
 };
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a385a7cb7d9f..2156ef775d04 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5070,6 +5070,11 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++)
 		memcg->cgwb_frn[i].done =
 			__WB_COMPLETION_INIT(&memcg_cgwb_frn_waitq);
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	spin_lock_init(&memcg->deferred_split_queue.split_queue_lock);
+	INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
+	memcg->deferred_split_queue.split_queue_len = 0;
 #endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 	return memcg;
@@ -5449,6 +5454,14 @@ static int mem_cgroup_move_account(struct page *page,
 		__mod_memcg_state(to, NR_WRITEBACK, nr_pages);
 	}
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (compound && !list_empty(page_deferred_list(page))) {
+		spin_lock(&from->deferred_split_queue.split_queue_lock);
+		list_del_init(page_deferred_list(page));
+		from->deferred_split_queue.split_queue_len--;
+		spin_unlock(&from->deferred_split_queue.split_queue_lock);
+	}
+#endif
 	/*
 	 * It is safe to change page->mem_cgroup here because the page
 	 * is referenced, charged, and isolated - we can't race with
@@ -5457,6 +5470,17 @@ static int mem_cgroup_move_account(struct page *page,
 
 	/* caller should have done css_get */
 	page->mem_cgroup = to;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (compound && list_empty(page_deferred_list(page))) {
+		spin_lock(&to->deferred_split_queue.split_queue_lock);
+		list_add_tail(page_deferred_list(page),
+			      &to->deferred_split_queue.split_queue);
+		to->deferred_split_queue.split_queue_len++;
+		spin_unlock(&to->deferred_split_queue.split_queue_lock);
+	}
+#endif
+
 	spin_unlock_irqrestore(&from->move_lock, flags);
 
 	ret = 0;
-- 
cgit v1.2.3


From 010c164a5fa7e169deab0a4d8211611f1930c1cd Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Sep 2019 15:38:19 -0700
Subject: mm: move memcmp_pages() and pages_identical()

Patch series "THP aware uprobe", v13.

This patchset makes uprobe aware of THPs.

Currently, when uprobe is attached to text on THP, the page is split by
FOLL_SPLIT.  As a result, uprobe eliminates the performance benefit of
THP.

This set makes uprobe THP-aware.  Instead of FOLL_SPLIT, we introduces
FOLL_SPLIT_PMD, which only split PMD for uprobe.

After all uprobes within the THP are removed, the PTE-mapped pages are
regrouped as huge PMD.

This set (plus a few THP patches) is also available at

   https://github.com/liu-song-6/linux/tree/uprobe-thp

This patch (of 6):

Move memcmp_pages() to mm/util.c and pages_identical() to mm.h, so that we
can use them in other files.

Link: http://lkml.kernel.org/r/20190815164525.1848545-2-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <matthew.wilcox@oracle.com>
Cc: William Kucharski <william.kucharski@oracle.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  7 +++++++
 mm/ksm.c           | 18 ------------------
 mm/util.c          | 13 +++++++++++++
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 57a9fa34f159..0ac87d9ee9d8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2868,5 +2868,12 @@ void __init setup_nr_node_ids(void);
 static inline void setup_nr_node_ids(void) {}
 #endif
 
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+	return !memcmp_pages(page1, page2);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/mm/ksm.c b/mm/ksm.c
index 3dc4346411e4..dbee2eb4dd05 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1029,24 +1029,6 @@ static u32 calc_checksum(struct page *page)
 	return checksum;
 }
 
-static int memcmp_pages(struct page *page1, struct page *page2)
-{
-	char *addr1, *addr2;
-	int ret;
-
-	addr1 = kmap_atomic(page1);
-	addr2 = kmap_atomic(page2);
-	ret = memcmp(addr1, addr2, PAGE_SIZE);
-	kunmap_atomic(addr2);
-	kunmap_atomic(addr1);
-	return ret;
-}
-
-static inline int pages_identical(struct page *page1, struct page *page2)
-{
-	return !memcmp_pages(page1, page2);
-}
-
 static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 			      pte_t *orig_pte)
 {
diff --git a/mm/util.c b/mm/util.c
index bab284d69c8c..37f7b6711514 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -783,3 +783,16 @@ out_mm:
 out:
 	return res;
 }
+
+int memcmp_pages(struct page *page1, struct page *page2)
+{
+	char *addr1, *addr2;
+	int ret;
+
+	addr1 = kmap_atomic(page1);
+	addr2 = kmap_atomic(page2);
+	ret = memcmp(addr1, addr2, PAGE_SIZE);
+	kunmap_atomic(addr2);
+	kunmap_atomic(addr1);
+	return ret;
+}
-- 
cgit v1.2.3


From bfe7b00de6d1e25fee08484c4fbf1c1ed175be78 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Sep 2019 15:38:25 -0700
Subject: mm, thp: introduce FOLL_SPLIT_PMD

Introduce a new foll_flag: FOLL_SPLIT_PMD.  As the name says
FOLL_SPLIT_PMD splits huge pmd for given mm_struct, the underlining huge
page stays as-is.

FOLL_SPLIT_PMD is useful for cases where we need to use regular pages, but
would switch back to huge page and huge pmd on.  One of such example is
uprobe.  The following patches use FOLL_SPLIT_PMD in uprobe.

Link: http://lkml.kernel.org/r/20190815164525.1848545-4-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 mm/gup.c           | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0ac87d9ee9d8..233ad11938db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2591,6 +2591,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_COW	0x4000	/* internal GUP flag */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
 #define FOLL_LONGTERM	0x10000	/* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
 
 /*
  * NOTE on FOLL_LONGTERM:
diff --git a/mm/gup.c b/mm/gup.c
index 012060efddf1..60c3915c8ee6 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -384,7 +384,7 @@ retry_locked:
 		spin_unlock(ptl);
 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 	}
-	if (flags & FOLL_SPLIT) {
+	if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
 		int ret;
 		page = pmd_page(*pmd);
 		if (is_huge_zero_page(page)) {
@@ -393,7 +393,7 @@ retry_locked:
 			split_huge_pmd(vma, pmd, address);
 			if (pmd_trans_unstable(pmd))
 				ret = -EBUSY;
-		} else {
+		} else if (flags & FOLL_SPLIT) {
 			if (unlikely(!try_get_page(page))) {
 				spin_unlock(ptl);
 				return ERR_PTR(-ENOMEM);
@@ -405,6 +405,10 @@ retry_locked:
 			put_page(page);
 			if (pmd_none(*pmd))
 				return no_page_table(vma, flags);
+		} else {  /* flags & FOLL_SPLIT_PMD */
+			spin_unlock(ptl);
+			split_huge_pmd(vma, pmd, address);
+			ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
 		}
 
 		return ret ? ERR_PTR(ret) :
-- 
cgit v1.2.3


From 27e1f8273113adec0e98bf513e4091636b27cc2a Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 23 Sep 2019 15:38:30 -0700
Subject: khugepaged: enable collapse pmd for pte-mapped THP

khugepaged needs exclusive mmap_sem to access page table.  When it fails
to lock mmap_sem, the page will fault in as pte-mapped THP.  As the page
is already a THP, khugepaged will not handle this pmd again.

This patch enables the khugepaged to retry collapse the page table.

struct mm_slot (in khugepaged.c) is extended with an array, containing
addresses of pte-mapped THPs.  We use array here for simplicity.  We can
easily replace it with more advanced data structures when needed.

In khugepaged_scan_mm_slot(), if the mm contains pte-mapped THP, we try to
collapse the page table.

Since collapse may happen at an later time, some pages may already fault
in.  collapse_pte_mapped_thp() is added to properly handle these pages.
collapse_pte_mapped_thp() also double checks whether all ptes in this pmd
are mapping to the same THP.  This is necessary because some subpage of
the THP may be replaced, for example by uprobe.  In such cases, it is not
possible to collapse the pmd.

[kirill.shutemov@linux.intel.com: add comments for retract_page_tables()]
  Link: http://lkml.kernel.org/r/20190816145443.6ard3iilytc6jlgv@box
Link: http://lkml.kernel.org/r/20190815164525.1848545-6-songliubraving@fb.com
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/khugepaged.h |  12 +++
 mm/khugepaged.c            | 192 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 200 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 082d1d2a5216..bc45ea1efbf7 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,14 @@ extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
 extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 				      unsigned long vm_flags);
+#ifdef CONFIG_SHMEM
+extern void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr);
+#else
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+					   unsigned long addr)
+{
+}
+#endif
 
 #define khugepaged_enabled()					       \
 	(transparent_hugepage_flags &				       \
@@ -73,6 +81,10 @@ static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 {
 	return 0;
 }
+static inline void collapse_pte_mapped_thp(struct mm_struct *mm,
+					   unsigned long addr)
+{
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index e89430ec5267..0a1b4b484ac5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -77,6 +77,8 @@ static __read_mostly DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS);
 
 static struct kmem_cache *mm_slot_cache __read_mostly;
 
+#define MAX_PTE_MAPPED_THP 8
+
 /**
  * struct mm_slot - hash lookup from mm to mm_slot
  * @hash: hash collision list
@@ -87,6 +89,10 @@ struct mm_slot {
 	struct hlist_node hash;
 	struct list_head mm_node;
 	struct mm_struct *mm;
+
+	/* pte-mapped THP in this mm */
+	int nr_pte_mapped_thp;
+	unsigned long pte_mapped_thp[MAX_PTE_MAPPED_THP];
 };
 
 /**
@@ -1254,6 +1260,159 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
 }
 
 #if defined(CONFIG_SHMEM) && defined(CONFIG_TRANSPARENT_HUGE_PAGECACHE)
+/*
+ * Notify khugepaged that given addr of the mm is pte-mapped THP. Then
+ * khugepaged should try to collapse the page table.
+ */
+static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm,
+					 unsigned long addr)
+{
+	struct mm_slot *mm_slot;
+
+	VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+
+	spin_lock(&khugepaged_mm_lock);
+	mm_slot = get_mm_slot(mm);
+	if (likely(mm_slot && mm_slot->nr_pte_mapped_thp < MAX_PTE_MAPPED_THP))
+		mm_slot->pte_mapped_thp[mm_slot->nr_pte_mapped_thp++] = addr;
+	spin_unlock(&khugepaged_mm_lock);
+	return 0;
+}
+
+/**
+ * Try to collapse a pte-mapped THP for mm at address haddr.
+ *
+ * This function checks whether all the PTEs in the PMD are pointing to the
+ * right THP. If so, retract the page table so the THP can refault in with
+ * as pmd-mapped.
+ */
+void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
+{
+	unsigned long haddr = addr & HPAGE_PMD_MASK;
+	struct vm_area_struct *vma = find_vma(mm, haddr);
+	struct page *hpage = NULL;
+	pte_t *start_pte, *pte;
+	pmd_t *pmd, _pmd;
+	spinlock_t *ptl;
+	int count = 0;
+	int i;
+
+	if (!vma || !vma->vm_file ||
+	    vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+		return;
+
+	/*
+	 * This vm_flags may not have VM_HUGEPAGE if the page was not
+	 * collapsed by this mm. But we can still collapse if the page is
+	 * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
+	 * will not fail the vma for missing VM_HUGEPAGE
+	 */
+	if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
+		return;
+
+	pmd = mm_find_pmd(mm, haddr);
+	if (!pmd)
+		return;
+
+	start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
+
+	/* step 1: check all mapped PTEs are to the right huge page */
+	for (i = 0, addr = haddr, pte = start_pte;
+	     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+		struct page *page;
+
+		/* empty pte, skip */
+		if (pte_none(*pte))
+			continue;
+
+		/* page swapped out, abort */
+		if (!pte_present(*pte))
+			goto abort;
+
+		page = vm_normal_page(vma, addr, *pte);
+
+		if (!page || !PageCompound(page))
+			goto abort;
+
+		if (!hpage) {
+			hpage = compound_head(page);
+			/*
+			 * The mapping of the THP should not change.
+			 *
+			 * Note that uprobe, debugger, or MAP_PRIVATE may
+			 * change the page table, but the new page will
+			 * not pass PageCompound() check.
+			 */
+			if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
+				goto abort;
+		}
+
+		/*
+		 * Confirm the page maps to the correct subpage.
+		 *
+		 * Note that uprobe, debugger, or MAP_PRIVATE may change
+		 * the page table, but the new page will not pass
+		 * PageCompound() check.
+		 */
+		if (WARN_ON(hpage + i != page))
+			goto abort;
+		count++;
+	}
+
+	/* step 2: adjust rmap */
+	for (i = 0, addr = haddr, pte = start_pte;
+	     i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE, pte++) {
+		struct page *page;
+
+		if (pte_none(*pte))
+			continue;
+		page = vm_normal_page(vma, addr, *pte);
+		page_remove_rmap(page, false);
+	}
+
+	pte_unmap_unlock(start_pte, ptl);
+
+	/* step 3: set proper refcount and mm_counters. */
+	if (hpage) {
+		page_ref_sub(hpage, count);
+		add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
+	}
+
+	/* step 4: collapse pmd */
+	ptl = pmd_lock(vma->vm_mm, pmd);
+	_pmd = pmdp_collapse_flush(vma, addr, pmd);
+	spin_unlock(ptl);
+	mm_dec_nr_ptes(mm);
+	pte_free(mm, pmd_pgtable(_pmd));
+	return;
+
+abort:
+	pte_unmap_unlock(start_pte, ptl);
+}
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+	struct mm_struct *mm = mm_slot->mm;
+	int i;
+
+	if (likely(mm_slot->nr_pte_mapped_thp == 0))
+		return 0;
+
+	if (!down_write_trylock(&mm->mmap_sem))
+		return -EBUSY;
+
+	if (unlikely(khugepaged_test_exit(mm)))
+		goto out;
+
+	for (i = 0; i < mm_slot->nr_pte_mapped_thp; i++)
+		collapse_pte_mapped_thp(mm, mm_slot->pte_mapped_thp[i]);
+
+out:
+	mm_slot->nr_pte_mapped_thp = 0;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
 static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 {
 	struct vm_area_struct *vma;
@@ -1262,7 +1421,22 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 
 	i_mmap_lock_write(mapping);
 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-		/* probably overkill */
+		/*
+		 * Check vma->anon_vma to exclude MAP_PRIVATE mappings that
+		 * got written to. These VMAs are likely not worth investing
+		 * down_write(mmap_sem) as PMD-mapping is likely to be split
+		 * later.
+		 *
+		 * Not that vma->anon_vma check is racy: it can be set up after
+		 * the check but before we took mmap_sem by the fault path.
+		 * But page lock would prevent establishing any new ptes of the
+		 * page, so we are safe.
+		 *
+		 * An alternative would be drop the check, but check that page
+		 * table is clear before calling pmdp_collapse_flush() under
+		 * ptl. It has higher chance to recover THP for the VMA, but
+		 * has higher cost too.
+		 */
 		if (vma->anon_vma)
 			continue;
 		addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
@@ -1275,9 +1449,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 			continue;
 		/*
 		 * We need exclusive mmap_sem to retract page table.
-		 * If trylock fails we would end up with pte-mapped THP after
-		 * re-fault. Not ideal, but it's more important to not disturb
-		 * the system too much.
+		 *
+		 * We use trylock due to lock inversion: we need to acquire
+		 * mmap_sem while holding page lock. Fault path does it in
+		 * reverse order. Trylock is a way to avoid deadlock.
 		 */
 		if (down_write_trylock(&vma->vm_mm->mmap_sem)) {
 			spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
@@ -1287,6 +1462,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
 			up_write(&vma->vm_mm->mmap_sem);
 			mm_dec_nr_ptes(vma->vm_mm);
 			pte_free(vma->vm_mm, pmd_pgtable(_pmd));
+		} else {
+			/* Try again later */
+			khugepaged_add_pte_mapped_thp(vma->vm_mm, addr);
 		}
 	}
 	i_mmap_unlock_write(mapping);
@@ -1709,6 +1887,11 @@ static void khugepaged_scan_file(struct mm_struct *mm,
 {
 	BUILD_BUG();
 }
+
+static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+{
+	return 0;
+}
 #endif
 
 static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
@@ -1733,6 +1916,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 		khugepaged_scan.mm_slot = mm_slot;
 	}
 	spin_unlock(&khugepaged_mm_lock);
+	khugepaged_collapse_pte_mapped_thps(mm_slot);
 
 	mm = mm_slot->mm;
 	/*
-- 
cgit v1.2.3


From 649775be63c8b2e0b56ecc5bbc96d38205ec5259 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Mon, 23 Sep 2019 15:38:37 -0700
Subject: mm, fs: move randomize_stack_top from fs to mm

Patch series "Provide generic top-down mmap layout functions", v6.

This series introduces generic functions to make top-down mmap layout
easily accessible to architectures, in particular riscv which was the
initial goal of this series.  The generic implementation was taken from
arm64 and used successively by arm, mips and finally riscv.

Note that in addition the series fixes 2 issues:

- stack randomization was taken into account even if not necessary.

- [1] fixed an issue with mmap base which did not take into account
  randomization but did not report it to arm and mips, so by moving arm64
  into a generic library, this problem is now fixed for both
  architectures.

This work is an effort to factorize architecture functions to avoid code
duplication and oversights as in [1].

[1]: https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1429066.html

This patch (of 14):

This preparatory commit moves this function so that further introduction
of generic topdown mmap layout is contained only in mm/util.c.

Link: http://lkml.kernel.org/r/20190730055113.23635-2-alex@ghiti.fr
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paul Burton <paul.burton@mips.com>
Cc: James Hogan <jhogan@kernel.org>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c    | 20 --------------------
 include/linux/mm.h |  2 ++
 mm/util.c          | 22 ++++++++++++++++++++++
 3 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d4e11b2e04f6..cec3b4146440 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -670,26 +670,6 @@ out:
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-#ifndef STACK_RND_MASK
-#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
-#endif
-
-static unsigned long randomize_stack_top(unsigned long stack_top)
-{
-	unsigned long random_variable = 0;
-
-	if (current->flags & PF_RANDOMIZE) {
-		random_variable = get_random_long();
-		random_variable &= STACK_RND_MASK;
-		random_variable <<= PAGE_SHIFT;
-	}
-#ifdef CONFIG_STACK_GROWSUP
-	return PAGE_ALIGN(stack_top) + random_variable;
-#else
-	return PAGE_ALIGN(stack_top) - random_variable;
-#endif
-}
-
 static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 233ad11938db..294a67b94147 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2328,6 +2328,8 @@ extern int install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags, struct page **pages);
 
+unsigned long randomize_stack_top(unsigned long stack_top);
+
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
diff --git a/mm/util.c b/mm/util.c
index 37f7b6711514..bf8af5e07c4a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -16,6 +16,8 @@
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/elf.h>
+#include <linux/random.h>
 
 #include <linux/uaccess.h>
 
@@ -293,6 +295,26 @@ int vma_is_stack_for_current(struct vm_area_struct *vma)
 	return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
 }
 
+#ifndef STACK_RND_MASK
+#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
+#endif
+
+unsigned long randomize_stack_top(unsigned long stack_top)
+{
+	unsigned long random_variable = 0;
+
+	if (current->flags & PF_RANDOMIZE) {
+		random_variable = get_random_long();
+		random_variable &= STACK_RND_MASK;
+		random_variable <<= PAGE_SHIFT;
+	}
+#ifdef CONFIG_STACK_GROWSUP
+	return PAGE_ALIGN(stack_top) + random_variable;
+#else
+	return PAGE_ALIGN(stack_top) - random_variable;
+#endif
+}
+
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
 {
-- 
cgit v1.2.3


From c165f25d23ecb2f9f121ced20435415b931219e2 Mon Sep 17 00:00:00 2001
From: Hui Zhu <teawaterz@linux.alibaba.com>
Date: Mon, 23 Sep 2019 15:39:37 -0700
Subject: zpool: add malloc_support_movable to zpool_driver

As a zpool_driver, zsmalloc can allocate movable memory because it support
migate pages.  But zbud and z3fold cannot allocate movable memory.

Add malloc_support_movable to zpool_driver.  If a zpool_driver support
allocate movable memory, set it to true.  And add
zpool_malloc_support_movable check malloc_support_movable to make sure if
a zpool support allocate movable memory.

Link: http://lkml.kernel.org/r/20190605100630.13293-1-teawaterz@linux.alibaba.com
Signed-off-by: Hui Zhu <teawaterz@linux.alibaba.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zpool.h |  3 +++
 mm/zpool.c            | 16 ++++++++++++++++
 mm/zsmalloc.c         | 19 ++++++++++---------
 3 files changed, 29 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 7238865e75b0..51bf43076165 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -46,6 +46,8 @@ const char *zpool_get_type(struct zpool *pool);
 
 void zpool_destroy_pool(struct zpool *pool);
 
+bool zpool_malloc_support_movable(struct zpool *pool);
+
 int zpool_malloc(struct zpool *pool, size_t size, gfp_t gfp,
 			unsigned long *handle);
 
@@ -90,6 +92,7 @@ struct zpool_driver {
 			struct zpool *zpool);
 	void (*destroy)(void *pool);
 
+	bool malloc_support_movable;
 	int (*malloc)(void *pool, size_t size, gfp_t gfp,
 				unsigned long *handle);
 	void (*free)(void *pool, unsigned long handle);
diff --git a/mm/zpool.c b/mm/zpool.c
index a2dd9107857d..863669212070 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -238,6 +238,22 @@ const char *zpool_get_type(struct zpool *zpool)
 	return zpool->driver->type;
 }
 
+/**
+ * zpool_malloc_support_movable() - Check if the zpool support
+ * allocate movable memory
+ * @zpool:	The zpool to check
+ *
+ * This returns if the zpool support allocate movable memory.
+ *
+ * Implementations must guarantee this to be thread-safe.
+ *
+ * Returns: true if if the zpool support allocate movable memory, false if not
+ */
+bool zpool_malloc_support_movable(struct zpool *zpool)
+{
+	return zpool->driver->malloc_support_movable;
+}
+
 /**
  * zpool_malloc() - Allocate memory
  * @zpool:	The zpool to allocate from.
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index e98bb6ab4f7e..646858efd468 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -443,15 +443,16 @@ static u64 zs_zpool_total_size(void *pool)
 }
 
 static struct zpool_driver zs_zpool_driver = {
-	.type =		"zsmalloc",
-	.owner =	THIS_MODULE,
-	.create =	zs_zpool_create,
-	.destroy =	zs_zpool_destroy,
-	.malloc =	zs_zpool_malloc,
-	.free =		zs_zpool_free,
-	.map =		zs_zpool_map,
-	.unmap =	zs_zpool_unmap,
-	.total_size =	zs_zpool_total_size,
+	.type =			  "zsmalloc",
+	.owner =		  THIS_MODULE,
+	.create =		  zs_zpool_create,
+	.destroy =		  zs_zpool_destroy,
+	.malloc_support_movable = true,
+	.malloc =		  zs_zpool_malloc,
+	.free =			  zs_zpool_free,
+	.map =			  zs_zpool_map,
+	.unmap =		  zs_zpool_unmap,
+	.total_size =		  zs_zpool_total_size,
 };
 
 MODULE_ALIAS("zpool-zsmalloc");
-- 
cgit v1.2.3


From cb063a83ca321fbf0cb2b4044186f241d89f3dc1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 28 Aug 2019 15:03:18 +0200
Subject: thermal: db8500: Finalize device tree conversion

At some point there was an attempt to convert the DB8500
thermal sensor to device tree: a probe path was added
and the device tree was augmented for the Snowball board.
The switchover was never completed: instead the thermal
devices came from from the PRCMU MFD device and the probe
on the Snowball was confused as another set of configuration
appeared from the device tree.

Move over to a device-tree only approach, as we fixed up
the device trees.

Cc: Vincent Guittot <vincent.guittot@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/mfd/db8500-prcmu.c                   | 53 +---------------------------
 drivers/thermal/Kconfig                      |  2 +-
 drivers/thermal/db8500_thermal.c             | 30 ++++++++--------
 include/linux/platform_data/db8500_thermal.h | 29 ---------------
 4 files changed, 17 insertions(+), 97 deletions(-)
 delete mode 100644 include/linux/platform_data/db8500_thermal.h

(limited to 'include')

diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index 90e0f21bc49c..518439c79826 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -36,7 +36,6 @@
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
 #include <linux/platform_data/ux500_wdt.h>
-#include <linux/platform_data/db8500_thermal.h>
 #include "dbx500-prcmu-regs.h"
 
 /* Index of different voltages to be used when accessing AVSData */
@@ -2984,53 +2983,6 @@ static struct ux500_wdt_data db8500_wdt_pdata = {
 	.timeout = 600, /* 10 minutes */
 	.has_28_bits_resolution = true,
 };
-/*
- * Thermal Sensor
- */
-
-static struct resource db8500_thsens_resources[] = {
-	{
-		.name = "IRQ_HOTMON_LOW",
-		.start  = IRQ_PRCMU_HOTMON_LOW,
-		.end    = IRQ_PRCMU_HOTMON_LOW,
-		.flags  = IORESOURCE_IRQ,
-	},
-	{
-		.name = "IRQ_HOTMON_HIGH",
-		.start  = IRQ_PRCMU_HOTMON_HIGH,
-		.end    = IRQ_PRCMU_HOTMON_HIGH,
-		.flags  = IORESOURCE_IRQ,
-	},
-};
-
-static struct db8500_thsens_platform_data db8500_thsens_data = {
-	.trip_points[0] = {
-		.temp = 70000,
-		.type = THERMAL_TRIP_ACTIVE,
-		.cdev_name = {
-			[0] = "thermal-cpufreq-0",
-		},
-	},
-	.trip_points[1] = {
-		.temp = 75000,
-		.type = THERMAL_TRIP_ACTIVE,
-		.cdev_name = {
-			[0] = "thermal-cpufreq-0",
-		},
-	},
-	.trip_points[2] = {
-		.temp = 80000,
-		.type = THERMAL_TRIP_ACTIVE,
-		.cdev_name = {
-			[0] = "thermal-cpufreq-0",
-		},
-	},
-	.trip_points[3] = {
-		.temp = 85000,
-		.type = THERMAL_TRIP_CRITICAL,
-	},
-	.num_trips = 4,
-};
 
 static const struct mfd_cell common_prcmu_devs[] = {
 	{
@@ -3054,10 +3006,7 @@ static const struct mfd_cell db8500_prcmu_devs[] = {
 	},
 	{
 		.name = "db8500-thermal",
-		.num_resources = ARRAY_SIZE(db8500_thsens_resources),
-		.resources = db8500_thsens_resources,
-		.platform_data = &db8500_thsens_data,
-		.pdata_size = sizeof(db8500_thsens_data),
+		.of_compatible = "stericsson,db8500-thermal",
 	},
 };
 
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 9966364a6deb..001a21abcc28 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -310,7 +310,7 @@ config DOVE_THERMAL
 
 config DB8500_THERMAL
 	tristate "DB8500 thermal management"
-	depends on MFD_DB8500_PRCMU
+	depends on MFD_DB8500_PRCMU && OF
 	default y
 	help
 	  Adds DB8500 thermal management implementation according to the thermal
diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c
index b71a999d17d6..d650ae5fdf2a 100644
--- a/drivers/thermal/db8500_thermal.c
+++ b/drivers/thermal/db8500_thermal.c
@@ -13,13 +13,24 @@
 #include <linux/mfd/dbx500-prcmu.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/platform_data/db8500_thermal.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
 
 #define PRCMU_DEFAULT_MEASURE_TIME	0xFFF
 #define PRCMU_DEFAULT_LOW_TEMP		0
+#define COOLING_DEV_MAX 8
+
+struct db8500_trip_point {
+	unsigned long temp;
+	enum thermal_trip_type type;
+	char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH];
+};
+
+struct db8500_thsens_platform_data {
+	struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS];
+	int num_trips;
+};
 
 struct db8500_thermal_zone {
 	struct thermal_zone_device *therm_dev;
@@ -301,7 +312,6 @@ static void db8500_thermal_work(struct work_struct *work)
 	dev_dbg(&pzone->therm_dev->device, "thermal work finished.\n");
 }
 
-#ifdef CONFIG_OF
 static struct db8500_thsens_platform_data*
 		db8500_thermal_parse_dt(struct platform_device *pdev)
 {
@@ -370,13 +380,6 @@ err_parse_dt:
 	dev_err(&pdev->dev, "Parsing device tree data error.\n");
 	return NULL;
 }
-#else
-static inline struct db8500_thsens_platform_data*
-		db8500_thermal_parse_dt(struct platform_device *pdev)
-{
-	return NULL;
-}
-#endif
 
 static int db8500_thermal_probe(struct platform_device *pdev)
 {
@@ -386,11 +389,10 @@ static int db8500_thermal_probe(struct platform_device *pdev)
 	int low_irq, high_irq, ret = 0;
 	unsigned long dft_low, dft_high;
 
-	if (np)
-		ptrips = db8500_thermal_parse_dt(pdev);
-	else
-		ptrips = dev_get_platdata(&pdev->dev);
+	if (!np)
+		return -EINVAL;
 
+	ptrips = db8500_thermal_parse_dt(pdev);
 	if (!ptrips)
 		return -EINVAL;
 
@@ -498,13 +500,11 @@ static int db8500_thermal_resume(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id db8500_thermal_match[] = {
 	{ .compatible = "stericsson,db8500-thermal" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, db8500_thermal_match);
-#endif
 
 static struct platform_driver db8500_thermal_driver = {
 	.driver = {
diff --git a/include/linux/platform_data/db8500_thermal.h b/include/linux/platform_data/db8500_thermal.h
deleted file mode 100644
index 55e55750a165..000000000000
--- a/include/linux/platform_data/db8500_thermal.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * db8500_thermal.h - DB8500 Thermal Management Implementation
- *
- * Copyright (C) 2012 ST-Ericsson
- * Copyright (C) 2012 Linaro Ltd.
- *
- * Author: Hongbo Zhang <hongbo.zhang@linaro.com>
- */
-
-#ifndef _DB8500_THERMAL_H_
-#define _DB8500_THERMAL_H_
-
-#include <linux/thermal.h>
-
-#define COOLING_DEV_MAX 8
-
-struct db8500_trip_point {
-	unsigned long temp;
-	enum thermal_trip_type type;
-	char cdev_name[COOLING_DEV_MAX][THERMAL_NAME_LENGTH];
-};
-
-struct db8500_thsens_platform_data {
-	struct db8500_trip_point trip_points[THERMAL_MAX_TRIPS];
-	int num_trips;
-};
-
-#endif /* _DB8500_THERMAL_H_ */
-- 
cgit v1.2.3


From 20ff1cb506727f81acba59acab8a0f37e1a13e43 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 24 Sep 2019 07:40:06 +0900
Subject: netfilter: ebtables: use __u8 instead of uint8_t in uapi header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When CONFIG_UAPI_HEADER_TEST=y, exported headers are compile-tested to
make sure they can be included from user-space.

Currently, linux/netfilter_bridge/ebtables.h is excluded from the test
coverage. To make it join the compile-test, we need to fix the build
errors attached below.

For a case like this, we decided to use __u{8,16,32,64} variable types
in this discussion:

  https://lkml.org/lkml/2019/6/5/18

Build log:

  CC      usr/include/linux/netfilter_bridge/ebtables.h.s
In file included from <command-line>:32:0:
./usr/include/linux/netfilter_bridge/ebtables.h:126:4: error: unknown type name ‘uint8_t’
    uint8_t revision;
    ^~~~~~~
./usr/include/linux/netfilter_bridge/ebtables.h:139:4: error: unknown type name ‘uint8_t’
    uint8_t revision;
    ^~~~~~~
./usr/include/linux/netfilter_bridge/ebtables.h:152:4: error: unknown type name ‘uint8_t’
    uint8_t revision;
    ^~~~~~~

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_bridge/ebtables.h | 6 +++---
 usr/include/Makefile                           | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index 3b86c14ea49d..8076c940ffeb 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -123,7 +123,7 @@ struct ebt_entry_match {
 	union {
 		struct {
 			char name[EBT_EXTENSION_MAXNAMELEN];
-			uint8_t revision;
+			__u8 revision;
 		};
 		struct xt_match *match;
 	} u;
@@ -136,7 +136,7 @@ struct ebt_entry_watcher {
 	union {
 		struct {
 			char name[EBT_EXTENSION_MAXNAMELEN];
-			uint8_t revision;
+			__u8 revision;
 		};
 		struct xt_target *watcher;
 	} u;
@@ -149,7 +149,7 @@ struct ebt_entry_target {
 	union {
 		struct {
 			char name[EBT_EXTENSION_MAXNAMELEN];
-			uint8_t revision;
+			__u8 revision;
 		};
 		struct xt_target *target;
 	} u;
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 1fb6abe29b2f..379cc5abc162 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -38,7 +38,6 @@ header-test- += linux/ivtv.h
 header-test- += linux/jffs2.h
 header-test- += linux/kexec.h
 header-test- += linux/matroxfb.h
-header-test- += linux/netfilter_bridge/ebtables.h
 header-test- += linux/netfilter_ipv4/ipt_LOG.h
 header-test- += linux/netfilter_ipv6/ip6t_LOG.h
 header-test- += linux/nfc.h
-- 
cgit v1.2.3


From 9b05b6e11d5e93a3a517cadc12b9836e0470c255 Mon Sep 17 00:00:00 2001
From: Laura Garcia Liebana <nevola@gmail.com>
Date: Tue, 24 Sep 2019 14:42:44 +0200
Subject: netfilter: nf_tables: bogus EBUSY when deleting flowtable after flush

The deletion of a flowtable after a flush in the same transaction
results in EBUSY. This patch adds an activation and deactivation of
flowtables in order to update the _use_ counter.

Signed-off-by: Laura Garcia Liebana <nevola@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nf_tables_api.c     | 16 ++++++++++++++++
 net/netfilter/nft_flow_offload.c  | 19 +++++++++++++++++++
 3 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index a26d64056fc8..001d294edf57 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1183,6 +1183,10 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
 					   const struct nlattr *nla,
 					   u8 genmask);
 
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+				    struct nft_flowtable *flowtable,
+				    enum nft_trans_phase phase);
+
 void nft_register_flowtable_type(struct nf_flowtable_type *type);
 void nft_unregister_flowtable_type(struct nf_flowtable_type *type);
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6dc46f9b5f7b..d481f9baca2f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5598,6 +5598,22 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
 }
 EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
 
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+				    struct nft_flowtable *flowtable,
+				    enum nft_trans_phase phase)
+{
+	switch (phase) {
+	case NFT_TRANS_PREPARE:
+	case NFT_TRANS_ABORT:
+	case NFT_TRANS_RELEASE:
+		flowtable->use--;
+		/* fall through */
+	default:
+		return;
+	}
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
 static struct nft_flowtable *
 nft_flowtable_lookup_byhandle(const struct nft_table *table,
 			      const struct nlattr *nla, u8 genmask)
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 22cf236eb5d5..f29bbc74c4bf 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -177,6 +177,23 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
 	return nf_ct_netns_get(ctx->net, ctx->family);
 }
 
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+					const struct nft_expr *expr,
+					enum nft_trans_phase phase)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+				      const struct nft_expr *expr)
+{
+	struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+	priv->flowtable->use++;
+}
+
 static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
 				     const struct nft_expr *expr)
 {
@@ -205,6 +222,8 @@ static const struct nft_expr_ops nft_flow_offload_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
 	.eval		= nft_flow_offload_eval,
 	.init		= nft_flow_offload_init,
+	.activate	= nft_flow_offload_activate,
+	.deactivate	= nft_flow_offload_deactivate,
 	.destroy	= nft_flow_offload_destroy,
 	.validate	= nft_flow_offload_validate,
 	.dump		= nft_flow_offload_dump,
-- 
cgit v1.2.3


From 3fbd7ee285b2bbc6eebd15a3c8786d9776a402a8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 14 Sep 2019 07:33:34 -0500
Subject: tasks: Add a count of task RCU users

Add a count of the number of RCU users (currently 1) of the task
struct so that we can later add the scheduler case and get rid of the
very subtle task_rcu_dereference(), and just use rcu_dereference().

As suggested by Oleg have the count overlap rcu_head so that no
additional space in task_struct is required.

Inspired-by: Linus Torvalds <torvalds@linux-foundation.org>
Inspired-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/87woebdplt.fsf_-_@x220.int.ebiederm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h      | 5 ++++-
 include/linux/sched/task.h | 1 +
 kernel/exit.c              | 7 ++++++-
 kernel/fork.c              | 7 +++----
 4 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e2e91960d79f..8e43e54a02c7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1147,7 +1147,10 @@ struct task_struct {
 
 	struct tlbflush_unmap_batch	tlb_ubc;
 
-	struct rcu_head			rcu;
+	union {
+		refcount_t		rcu_users;
+		struct rcu_head		rcu;
+	};
 
 	/* Cache last used pipe for splice(): */
 	struct pipe_inode_info		*splice_pipe;
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 3d90ed8f75f0..153a683646ac 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -120,6 +120,7 @@ static inline void put_task_struct(struct task_struct *t)
 }
 
 struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
 extern int arch_task_struct_size __read_mostly;
diff --git a/kernel/exit.c b/kernel/exit.c
index 22ab6a4bdc51..3bcaec2ea3ba 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 	put_task_struct(tsk);
 }
 
+void put_task_struct_rcu_user(struct task_struct *task)
+{
+	if (refcount_dec_and_test(&task->rcu_users))
+		call_rcu(&task->rcu, delayed_put_task_struct);
+}
 
 void release_task(struct task_struct *p)
 {
@@ -222,7 +227,7 @@ repeat:
 
 	write_unlock_irq(&tasklist_lock);
 	release_thread(p);
-	call_rcu(&p->rcu, delayed_put_task_struct);
+	put_task_struct_rcu_user(p);
 
 	p = leader;
 	if (unlikely(zap_leader))
diff --git a/kernel/fork.c b/kernel/fork.c
index 1d1cd06edbc1..7eefe338d7a2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -902,10 +902,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (orig->cpus_ptr == &orig->cpus_mask)
 		tsk->cpus_ptr = &tsk->cpus_mask;
 
-	/*
-	 * One for us, one for whoever does the "release_task()" (usually
-	 * parent)
-	 */
+	/* One for the user space visible state that goes away when reaped. */
+	refcount_set(&tsk->rcu_users, 1);
+	/* One for the rcu users, and one for the scheduler */
 	refcount_set(&tsk->usage, 2);
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	tsk->btrace_seq = 0;
-- 
cgit v1.2.3


From 154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 14 Sep 2019 07:34:30 -0500
Subject: tasks, sched/core: With a grace period after finish_task_switch(),
 remove unnecessary code

Remove work arounds that were written before there was a grace period
after tasks left the runqueue in finish_task_switch().

In particular now that there tasks exiting the runqueue exprience
a RCU grace period none of the work performed by task_rcu_dereference()
excpet the rcu_dereference() is necessary so replace task_rcu_dereference()
with rcu_dereference().

Remove the code in rcuwait_wait_event() that checks to ensure the current
task has not exited.  It is no longer necessary as it is guaranteed
that any running task will experience a RCU grace period after it
leaves the run queueue.

Remove the comment in rcuwait_wake_up() as it is no longer relevant.

Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery")
Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()")
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rcuwait.h    | 20 +++-----------
 include/linux/sched/task.h |  1 -
 kernel/exit.c              | 67 ----------------------------------------------
 kernel/sched/fair.c        |  2 +-
 kernel/sched/membarrier.c  |  4 +--
 5 files changed, 7 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h
index 563290fc194f..75c97e4bbc57 100644
--- a/include/linux/rcuwait.h
+++ b/include/linux/rcuwait.h
@@ -6,16 +6,11 @@
 
 /*
  * rcuwait provides a way of blocking and waking up a single
- * task in an rcu-safe manner; where it is forbidden to use
- * after exit_notify(). task_struct is not properly rcu protected,
- * unless dealing with rcu-aware lists, ie: find_task_by_*().
+ * task in an rcu-safe manner.
  *
- * Alternatively we have task_rcu_dereference(), but the return
- * semantics have different implications which would break the
- * wakeup side. The only time @task is non-nil is when a user is
- * blocked (or checking if it needs to) on a condition, and reset
- * as soon as we know that the condition has succeeded and are
- * awoken.
+ * The only time @task is non-nil is when a user is blocked (or
+ * checking if it needs to) on a condition, and reset as soon as we
+ * know that the condition has succeeded and are awoken.
  */
 struct rcuwait {
 	struct task_struct __rcu *task;
@@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
  */
 #define rcuwait_wait_event(w, condition)				\
 ({									\
-	/*								\
-	 * Complain if we are called after do_exit()/exit_notify(),     \
-	 * as we cannot rely on the rcu critical region for the		\
-	 * wakeup side.							\
-	 */                                                             \
-	WARN_ON(current->exit_state);                                   \
-									\
 	rcu_assign_pointer((w)->task, current);				\
 	for (;;) {							\
 		/*							\
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 153a683646ac..4b1c3b664f51 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -119,7 +119,6 @@ static inline void put_task_struct(struct task_struct *t)
 		__put_task_struct(t);
 }
 
-struct task_struct *task_rcu_dereference(struct task_struct **ptask);
 void put_task_struct_rcu_user(struct task_struct *task);
 
 #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/kernel/exit.c b/kernel/exit.c
index 3bcaec2ea3ba..a46a50d67002 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -234,69 +234,6 @@ repeat:
 		goto repeat;
 }
 
-/*
- * Note that if this function returns a valid task_struct pointer (!NULL)
- * task->usage must remain >0 for the duration of the RCU critical section.
- */
-struct task_struct *task_rcu_dereference(struct task_struct **ptask)
-{
-	struct sighand_struct *sighand;
-	struct task_struct *task;
-
-	/*
-	 * We need to verify that release_task() was not called and thus
-	 * delayed_put_task_struct() can't run and drop the last reference
-	 * before rcu_read_unlock(). We check task->sighand != NULL,
-	 * but we can read the already freed and reused memory.
-	 */
-retry:
-	task = rcu_dereference(*ptask);
-	if (!task)
-		return NULL;
-
-	probe_kernel_address(&task->sighand, sighand);
-
-	/*
-	 * Pairs with atomic_dec_and_test() in put_task_struct(). If this task
-	 * was already freed we can not miss the preceding update of this
-	 * pointer.
-	 */
-	smp_rmb();
-	if (unlikely(task != READ_ONCE(*ptask)))
-		goto retry;
-
-	/*
-	 * We've re-checked that "task == *ptask", now we have two different
-	 * cases:
-	 *
-	 * 1. This is actually the same task/task_struct. In this case
-	 *    sighand != NULL tells us it is still alive.
-	 *
-	 * 2. This is another task which got the same memory for task_struct.
-	 *    We can't know this of course, and we can not trust
-	 *    sighand != NULL.
-	 *
-	 *    In this case we actually return a random value, but this is
-	 *    correct.
-	 *
-	 *    If we return NULL - we can pretend that we actually noticed that
-	 *    *ptask was updated when the previous task has exited. Or pretend
-	 *    that probe_slab_address(&sighand) reads NULL.
-	 *
-	 *    If we return the new task (because sighand is not NULL for any
-	 *    reason) - this is fine too. This (new) task can't go away before
-	 *    another gp pass.
-	 *
-	 *    And note: We could even eliminate the false positive if re-read
-	 *    task->sighand once again to avoid the falsely NULL. But this case
-	 *    is very unlikely so we don't care.
-	 */
-	if (!sighand)
-		return NULL;
-
-	return task;
-}
-
 void rcuwait_wake_up(struct rcuwait *w)
 {
 	struct task_struct *task;
@@ -316,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
 	 */
 	smp_mb(); /* (B) */
 
-	/*
-	 * Avoid using task_rcu_dereference() magic as long as we are careful,
-	 * see comment in rcuwait_wait_event() regarding ->exit_state.
-	 */
 	task = rcu_dereference(w->task);
 	if (task)
 		wake_up_process(task);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3101c662426d..5bc23996ffae 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1602,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
 		return;
 
 	rcu_read_lock();
-	cur = task_rcu_dereference(&dst_rq->curr);
+	cur = rcu_dereference(dst_rq->curr);
 	if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
 		cur = NULL;
 
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index aa8d75804108..b14250a11608 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -71,7 +71,7 @@ static int membarrier_global_expedited(void)
 			continue;
 
 		rcu_read_lock();
-		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+		p = rcu_dereference(cpu_rq(cpu)->curr);
 		if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
 				   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
 			if (!fallback)
@@ -150,7 +150,7 @@ static int membarrier_private_expedited(int flags)
 		if (cpu == raw_smp_processor_id())
 			continue;
 		rcu_read_lock();
-		p = task_rcu_dereference(&cpu_rq(cpu)->curr);
+		p = rcu_dereference(cpu_rq(cpu)->curr);
 		if (p && p->mm == current->mm) {
 			if (!fallback)
 				__cpumask_set_cpu(cpu, tmpmask);
-- 
cgit v1.2.3


From 2840cf02fae627860156737e83326df354ee4ec6 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 19 Sep 2019 13:37:01 -0400
Subject: sched/membarrier: Call sync_core only before usermode for same mm

When the prev and next task's mm change, switch_mm() provides the core
serializing guarantees before returning to usermode. The only case
where an explicit core serialization is needed is when the scheduler
keeps the same mm for prev and next.

Suggested-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190919173705.2181-4-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/mm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 4a7944078cc3..8557ec664213 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -362,6 +362,8 @@ enum {
 
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 {
+	if (current->mm != mm)
+		return;
 	if (likely(!(atomic_read(&mm->membarrier_state) &
 		     MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
 		return;
-- 
cgit v1.2.3


From 227a4aadc75ba22fcb6c4e1c078817b8cbaae4ce Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Thu, 19 Sep 2019 13:37:02 -0400
Subject: sched/membarrier: Fix p->mm->membarrier_state racy load

The membarrier_state field is located within the mm_struct, which
is not guaranteed to exist when used from runqueue-lock-free iteration
on runqueues by the membarrier system call.

Copy the membarrier_state from the mm_struct into the scheduler runqueue
when the scheduler switches between mm.

When registering membarrier for mm, after setting the registration bit
in the mm membarrier state, issue a synchronize_rcu() to ensure the
scheduler observes the change. In order to take care of the case
where a runqueue keeps executing the target mm without swapping to
other mm, iterate over each runqueue and issue an IPI to copy the
membarrier_state from the mm_struct into each runqueue which have the
same mm which state has just been modified.

Move the mm membarrier_state field closer to pgd in mm_struct to use
a cache line already touched by the scheduler switch_mm.

The membarrier_execve() (now membarrier_exec_mmap) hook now needs to
clear the runqueue's membarrier state in addition to clear the mm
membarrier state, so move its implementation into the scheduler
membarrier code so it can access the runqueue structure.

Add memory barrier in membarrier_exec_mmap() prior to clearing
the membarrier state, ensuring memory accesses executed prior to exec
are not reordered with the stores clearing the membarrier state.

As suggested by Linus, move all membarrier.c RCU read-side locks outside
of the for each cpu loops.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Kirill Tkhai <tkhai@yandex.ru>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux admin <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190919173705.2181-5-mathieu.desnoyers@efficios.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                 |   2 +-
 include/linux/mm_types.h  |  14 +++-
 include/linux/sched/mm.h  |   8 +--
 kernel/sched/core.c       |   4 +-
 kernel/sched/membarrier.c | 175 ++++++++++++++++++++++++++++++++++------------
 kernel/sched/sched.h      |  34 +++++++++
 6 files changed, 183 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index f7f6a140856a..555e93c7dec8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
 	}
 	task_lock(tsk);
 	active_mm = tsk->active_mm;
+	membarrier_exec_mmap(mm);
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
@@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	membarrier_execve(current);
 	rseq_execve(current);
 	acct_update_integrals(current);
 	task_numa_free(current, false);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6a7a1083b6fb..ec9bd3a6c827 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -383,6 +383,16 @@ struct mm_struct {
 		unsigned long highest_vm_end;	/* highest vma end address */
 		pgd_t * pgd;
 
+#ifdef CONFIG_MEMBARRIER
+		/**
+		 * @membarrier_state: Flags controlling membarrier behavior.
+		 *
+		 * This field is close to @pgd to hopefully fit in the same
+		 * cache-line, which needs to be touched by switch_mm().
+		 */
+		atomic_t membarrier_state;
+#endif
+
 		/**
 		 * @mm_users: The number of users including userspace.
 		 *
@@ -452,9 +462,7 @@ struct mm_struct {
 		unsigned long flags; /* Must use atomic bitops to access */
 
 		struct core_state *core_state; /* coredumping support */
-#ifdef CONFIG_MEMBARRIER
-		atomic_t membarrier_state;
-#endif
+
 #ifdef CONFIG_AIO
 		spinlock_t			ioctx_lock;
 		struct kioctx_table __rcu	*ioctx_table;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 8557ec664213..e6770012db18 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -370,10 +370,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
 	sync_core_before_usermode();
 }
 
-static inline void membarrier_execve(struct task_struct *t)
-{
-	atomic_set(&t->mm->membarrier_state, 0);
-}
+extern void membarrier_exec_mmap(struct mm_struct *mm);
+
 #else
 #ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
 static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -382,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
 {
 }
 #endif
-static inline void membarrier_execve(struct task_struct *t)
+static inline void membarrier_exec_mmap(struct mm_struct *mm)
 {
 }
 static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84c71160beb1..2d9a3947bef4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
 		else
 			prev->active_mm = NULL;
 	} else {                                        // to user
+		membarrier_switch_mm(rq, prev->active_mm, next->mm);
 		/*
 		 * sys_membarrier() requires an smp_mb() between setting
-		 * rq->curr and returning to userspace.
+		 * rq->curr / membarrier_switch_mm() and returning to userspace.
 		 *
 		 * The below provides this either through switch_mm(), or in
 		 * case 'prev->active_mm == next->mm' through
 		 * finish_task_switch()'s mmdrop().
 		 */
-
 		switch_mm_irqs_off(prev->active_mm, next->mm, next);
 
 		if (!prev->mm) {                        // from kernel
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 7ccbd0e19626..070cf433bb9a 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -30,6 +30,39 @@ static void ipi_mb(void *info)
 	smp_mb();	/* IPIs should be serializing but paranoid. */
 }
 
+static void ipi_sync_rq_state(void *info)
+{
+	struct mm_struct *mm = (struct mm_struct *) info;
+
+	if (current->mm != mm)
+		return;
+	this_cpu_write(runqueues.membarrier_state,
+		       atomic_read(&mm->membarrier_state));
+	/*
+	 * Issue a memory barrier after setting
+	 * MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
+	 * guarantee that no memory access following registration is reordered
+	 * before registration.
+	 */
+	smp_mb();
+}
+
+void membarrier_exec_mmap(struct mm_struct *mm)
+{
+	/*
+	 * Issue a memory barrier before clearing membarrier_state to
+	 * guarantee that no memory access prior to exec is reordered after
+	 * clearing this state.
+	 */
+	smp_mb();
+	atomic_set(&mm->membarrier_state, 0);
+	/*
+	 * Keep the runqueue membarrier_state in sync with this mm
+	 * membarrier_state.
+	 */
+	this_cpu_write(runqueues.membarrier_state, 0);
+}
+
 static int membarrier_global_expedited(void)
 {
 	int cpu;
@@ -56,6 +89,7 @@ static int membarrier_global_expedited(void)
 	}
 
 	cpus_read_lock();
+	rcu_read_lock();
 	for_each_online_cpu(cpu) {
 		struct task_struct *p;
 
@@ -70,17 +104,25 @@ static int membarrier_global_expedited(void)
 		if (cpu == raw_smp_processor_id())
 			continue;
 
-		rcu_read_lock();
+		if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
+		    MEMBARRIER_STATE_GLOBAL_EXPEDITED))
+			continue;
+
+		/*
+		 * Skip the CPU if it runs a kernel thread. The scheduler
+		 * leaves the prior task mm in place as an optimization when
+		 * scheduling a kthread.
+		 */
 		p = rcu_dereference(cpu_rq(cpu)->curr);
-		if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
-				   MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
-			if (!fallback)
-				__cpumask_set_cpu(cpu, tmpmask);
-			else
-				smp_call_function_single(cpu, ipi_mb, NULL, 1);
-		}
-		rcu_read_unlock();
+		if (p->flags & PF_KTHREAD)
+			continue;
+
+		if (!fallback)
+			__cpumask_set_cpu(cpu, tmpmask);
+		else
+			smp_call_function_single(cpu, ipi_mb, NULL, 1);
 	}
+	rcu_read_unlock();
 	if (!fallback) {
 		preempt_disable();
 		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
@@ -136,6 +178,7 @@ static int membarrier_private_expedited(int flags)
 	}
 
 	cpus_read_lock();
+	rcu_read_lock();
 	for_each_online_cpu(cpu) {
 		struct task_struct *p;
 
@@ -157,8 +200,8 @@ static int membarrier_private_expedited(int flags)
 			else
 				smp_call_function_single(cpu, ipi_mb, NULL, 1);
 		}
-		rcu_read_unlock();
 	}
+	rcu_read_unlock();
 	if (!fallback) {
 		preempt_disable();
 		smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
@@ -177,32 +220,78 @@ static int membarrier_private_expedited(int flags)
 	return 0;
 }
 
+static int sync_runqueues_membarrier_state(struct mm_struct *mm)
+{
+	int membarrier_state = atomic_read(&mm->membarrier_state);
+	cpumask_var_t tmpmask;
+	int cpu;
+
+	if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
+		this_cpu_write(runqueues.membarrier_state, membarrier_state);
+
+		/*
+		 * For single mm user, we can simply issue a memory barrier
+		 * after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
+		 * mm and in the current runqueue to guarantee that no memory
+		 * access following registration is reordered before
+		 * registration.
+		 */
+		smp_mb();
+		return 0;
+	}
+
+	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
+		return -ENOMEM;
+
+	/*
+	 * For mm with multiple users, we need to ensure all future
+	 * scheduler executions will observe @mm's new membarrier
+	 * state.
+	 */
+	synchronize_rcu();
+
+	/*
+	 * For each cpu runqueue, if the task's mm match @mm, ensure that all
+	 * @mm's membarrier state set bits are also set in in the runqueue's
+	 * membarrier state. This ensures that a runqueue scheduling
+	 * between threads which are users of @mm has its membarrier state
+	 * updated.
+	 */
+	cpus_read_lock();
+	rcu_read_lock();
+	for_each_online_cpu(cpu) {
+		struct rq *rq = cpu_rq(cpu);
+		struct task_struct *p;
+
+		p = rcu_dereference(&rq->curr);
+		if (p && p->mm == mm)
+			__cpumask_set_cpu(cpu, tmpmask);
+	}
+	rcu_read_unlock();
+
+	preempt_disable();
+	smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
+	preempt_enable();
+
+	free_cpumask_var(tmpmask);
+	cpus_read_unlock();
+
+	return 0;
+}
+
 static int membarrier_register_global_expedited(void)
 {
 	struct task_struct *p = current;
 	struct mm_struct *mm = p->mm;
+	int ret;
 
 	if (atomic_read(&mm->membarrier_state) &
 	    MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
 		return 0;
 	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
-	if (atomic_read(&mm->mm_users) == 1) {
-		/*
-		 * For single mm user, single threaded process, we can
-		 * simply issue a memory barrier after setting
-		 * MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
-		 * no memory access following registration is reordered
-		 * before registration.
-		 */
-		smp_mb();
-	} else {
-		/*
-		 * For multi-mm user threads, we need to ensure all
-		 * future scheduler executions will observe the new
-		 * thread flag state for this mm.
-		 */
-		synchronize_rcu();
-	}
+	ret = sync_runqueues_membarrier_state(mm);
+	if (ret)
+		return ret;
 	atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
 		  &mm->membarrier_state);
 
@@ -213,12 +302,15 @@ static int membarrier_register_private_expedited(int flags)
 {
 	struct task_struct *p = current;
 	struct mm_struct *mm = p->mm;
-	int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
+	int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
+	    set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
+	    ret;
 
 	if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
 		if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
 			return -EINVAL;
-		state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
+		ready_state =
+			MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
 	}
 
 	/*
@@ -226,20 +318,15 @@ static int membarrier_register_private_expedited(int flags)
 	 * groups, which use the same mm. (CLONE_VM but not
 	 * CLONE_THREAD).
 	 */
-	if ((atomic_read(&mm->membarrier_state) & state) == state)
+	if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
 		return 0;
-	atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
 	if (flags & MEMBARRIER_FLAG_SYNC_CORE)
-		atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
-			  &mm->membarrier_state);
-	if (atomic_read(&mm->mm_users) != 1) {
-		/*
-		 * Ensure all future scheduler executions will observe the
-		 * new thread flag state for this process.
-		 */
-		synchronize_rcu();
-	}
-	atomic_or(state, &mm->membarrier_state);
+		set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
+	atomic_or(set_state, &mm->membarrier_state);
+	ret = sync_runqueues_membarrier_state(mm);
+	if (ret)
+		return ret;
+	atomic_or(ready_state, &mm->membarrier_state);
 
 	return 0;
 }
@@ -253,8 +340,10 @@ static int membarrier_register_private_expedited(int flags)
  * command specified does not exist, not available on the running
  * kernel, or if the command argument is invalid, this system call
  * returns -EINVAL. For a given command, with flags argument set to 0,
- * this system call is guaranteed to always return the same value until
- * reboot.
+ * if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
+ * always return the same value until reboot. In addition, it can return
+ * -ENOMEM if there is not enough memory available to perform the system
+ * call.
  *
  * All memory accesses performed in program order from each targeted thread
  * is guaranteed to be ordered with respect to sys_membarrier(). If we use
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b3cb895d14a2..0db2c1b3361e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -911,6 +911,10 @@ struct rq {
 
 	atomic_t		nr_iowait;
 
+#ifdef CONFIG_MEMBARRIER
+	int membarrier_state;
+#endif
+
 #ifdef CONFIG_SMP
 	struct root_domain		*rd;
 	struct sched_domain __rcu	*sd;
@@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
 static inline bool sched_energy_enabled(void) { return false; }
 
 #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
+
+#ifdef CONFIG_MEMBARRIER
+/*
+ * The scheduler provides memory barriers required by membarrier between:
+ * - prior user-space memory accesses and store to rq->membarrier_state,
+ * - store to rq->membarrier_state and following user-space memory accesses.
+ * In the same way it provides those guarantees around store to rq->curr.
+ */
+static inline void membarrier_switch_mm(struct rq *rq,
+					struct mm_struct *prev_mm,
+					struct mm_struct *next_mm)
+{
+	int membarrier_state;
+
+	if (prev_mm == next_mm)
+		return;
+
+	membarrier_state = atomic_read(&next_mm->membarrier_state);
+	if (READ_ONCE(rq->membarrier_state) == membarrier_state)
+		return;
+
+	WRITE_ONCE(rq->membarrier_state, membarrier_state);
+}
+#else
+static inline void membarrier_switch_mm(struct rq *rq,
+					struct mm_struct *prev_mm,
+					struct mm_struct *next_mm)
+{
+}
+#endif
-- 
cgit v1.2.3


From 541be05095437a7e5e08e7d13a13e03ec0994ae7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 25 Sep 2019 16:45:56 -0700
Subject: linux/coff.h: add include guard

Add a header include guard just in case.

My motivation is to allow Kbuild to detect missing include guard:

https://patchwork.kernel.org/patch/11063011/

Before I enable this checker I want to fix as many headers as possible.

Link: http://lkml.kernel.org/r/20190728154728.11126-1-yamada.masahiro@socionext.com
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/coff.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/coff.h b/include/uapi/linux/coff.h
index e4a79f80b9a0..ab5c7e847eed 100644
--- a/include/uapi/linux/coff.h
+++ b/include/uapi/linux/coff.h
@@ -11,6 +11,9 @@
    more information about COFF, then O'Reilly has a very excellent book.
 */
 
+#ifndef _UAPI_LINUX_COFF_H
+#define _UAPI_LINUX_COFF_H
+
 #define  E_SYMNMLEN  8   /* Number of characters in a symbol name         */
 #define  E_FILNMLEN 14   /* Number of characters in a file name           */
 #define  E_DIMNUM    4   /* Number of array dimensions in auxiliary entry */
@@ -350,3 +353,5 @@ struct COFF_reloc {
 
 /* For new sections we haven't heard of before */
 #define COFF_DEF_SECTION_ALIGNMENT       4
+
+#endif /* _UAPI_LINUX_COFF_H */
-- 
cgit v1.2.3


From 444b8a83f1e01584ff2d53f5951d8e836c0070b5 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Wed, 25 Sep 2019 16:46:04 -0700
Subject: augmented rbtree: add comments for RB_DECLARE_CALLBACKS macro

Patch series "make RB_DECLARE_CALLBACKS more generic", v3.

These changes are intended to make the RB_DECLARE_CALLBACKS macro more
generic (allowing the aubmented subtree information to be a struct instead
of a scalar).

I have verified the compiled lib/interval_tree.o and mm/mmap.o files to
check that they didn't change.  This held as expected for interval_tree.o;
mmap.o did have some changes which could be reverted by marking
__vma_link_rb as noinline.  I did not add such a change to the patchset; I
felt it was reasonable enough to leave the inlining decision up to the
compiler.

This patch (of 3):

Add a short comment summarizing the arguments to RB_DECLARE_CALLBACKS.
The arguments are also now capitalized.  This copies the style of the
INTERVAL_TREE_DEFINE macro.

No functional changes in this commit, only comments and capitalization.

Link: http://lkml.kernel.org/r/20190703040156.56953-2-walken@google.com
Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h       | 54 +++++++++++++++++++++-------------
 tools/include/linux/rbtree_augmented.h | 54 +++++++++++++++++++++-------------
 2 files changed, 66 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 179faab29f52..979941600082 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -60,39 +60,51 @@ rb_insert_augmented_cached(struct rb_node *node,
 	rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
-			     rbtype, rbaugmented, rbcompute)		\
+/*
+ * Template for declaring augmented rbtree callbacks
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	\
+			     RBTYPE, RBAUGMENTED, RBCOMPUTE)		\
 static inline void							\
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
 {									\
 	while (rb != stop) {						\
-		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
-		rbtype augmented = rbcompute(node);			\
-		if (node->rbaugmented == augmented)			\
+		RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);	\
+		RBTYPE augmented = RBCOMPUTE(node);			\
+		if (node->RBAUGMENTED == augmented)			\
 			break;						\
-		node->rbaugmented = augmented;				\
-		rb = rb_parent(&node->rbfield);				\
+		node->RBAUGMENTED = augmented;				\
+		rb = rb_parent(&node->RBFIELD);				\
 	}								\
 }									\
 static inline void							\
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
 {									\
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
-	new->rbaugmented = old->rbaugmented;				\
+	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
+	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
+	new->RBAUGMENTED = old->RBAUGMENTED;				\
 }									\
 static void								\
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
 {									\
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
-	new->rbaugmented = old->rbaugmented;				\
-	old->rbaugmented = rbcompute(old);				\
+	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
+	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
+	new->RBAUGMENTED = old->RBAUGMENTED;				\
+	old->RBAUGMENTED = RBCOMPUTE(old);				\
 }									\
-rbstatic const struct rb_augment_callbacks rbname = {			\
-	.propagate = rbname ## _propagate,				\
-	.copy = rbname ## _copy,					\
-	.rotate = rbname ## _rotate					\
+RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
+	.propagate = RBNAME ## _propagate,				\
+	.copy = RBNAME ## _copy,					\
+	.rotate = RBNAME ## _rotate					\
 };
 
 
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 467a3eefe1d2..de3a480204ba 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -62,39 +62,51 @@ rb_insert_augmented_cached(struct rb_node *node,
 	rb_insert_augmented(node, &root->rb_root, augment);
 }
 
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
-			     rbtype, rbaugmented, rbcompute)		\
+/*
+ * Template for declaring augmented rbtree callbacks
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
+ */
+
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	\
+			     RBTYPE, RBAUGMENTED, RBCOMPUTE)		\
 static inline void							\
-rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
 {									\
 	while (rb != stop) {						\
-		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
-		rbtype augmented = rbcompute(node);			\
-		if (node->rbaugmented == augmented)			\
+		RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);	\
+		RBTYPE augmented = RBCOMPUTE(node);			\
+		if (node->RBAUGMENTED == augmented)			\
 			break;						\
-		node->rbaugmented = augmented;				\
-		rb = rb_parent(&node->rbfield);				\
+		node->RBAUGMENTED = augmented;				\
+		rb = rb_parent(&node->RBFIELD);				\
 	}								\
 }									\
 static inline void							\
-rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+RBNAME ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
 {									\
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
-	new->rbaugmented = old->rbaugmented;				\
+	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
+	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
+	new->RBAUGMENTED = old->RBAUGMENTED;				\
 }									\
 static void								\
-rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
 {									\
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
-	new->rbaugmented = old->rbaugmented;				\
-	old->rbaugmented = rbcompute(old);				\
+	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
+	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
+	new->RBAUGMENTED = old->RBAUGMENTED;				\
+	old->RBAUGMENTED = RBCOMPUTE(old);				\
 }									\
-rbstatic const struct rb_augment_callbacks rbname = {			\
-	.propagate = rbname ## _propagate,				\
-	.copy = rbname ## _copy,					\
-	.rotate = rbname ## _rotate					\
+RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
+	.propagate = RBNAME ## _propagate,				\
+	.copy = RBNAME ## _copy,					\
+	.rotate = RBNAME ## _rotate					\
 };
 
 
-- 
cgit v1.2.3


From 315cc066b8ae8349a27887ad7a34e1916e9797fe Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Wed, 25 Sep 2019 16:46:07 -0700
Subject: augmented rbtree: add new RB_DECLARE_CALLBACKS_MAX macro

Add RB_DECLARE_CALLBACKS_MAX, which generates augmented rbtree callbacks
for the case where the augmented value is a scalar whose definition
follows a max(f(node)) pattern.  This actually covers all present uses of
RB_DECLARE_CALLBACKS, and saves some (source) code duplication in the
various RBCOMPUTE function definitions.

[walken@google.com: fix mm/vmalloc.c]
  Link: http://lkml.kernel.org/r/CANN689FXgK13wDYNh1zKxdipeTuALG4eKvKpsdZqKFJ-rvtGiQ@mail.gmail.com
[walken@google.com: re-add check to check_augmented()]
  Link: http://lkml.kernel.org/r/20190727022027.GA86863@google.com
Link: http://lkml.kernel.org/r/20190703040156.56953-3-walken@google.com
Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat_rbtree.c               | 19 +++--------------
 drivers/block/drbd/drbd_interval.c     | 29 +++-----------------------
 include/linux/interval_tree_generic.h  | 22 ++------------------
 include/linux/rbtree_augmented.h       | 36 ++++++++++++++++++++++++++++++++-
 lib/rbtree_test.c                      | 37 ++++++++++++++++------------------
 mm/mmap.c                              | 29 ++++++++++++++++----------
 mm/vmalloc.c                           |  5 ++---
 tools/include/linux/rbtree_augmented.h | 36 ++++++++++++++++++++++++++++++++-
 8 files changed, 115 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index fa16036fa592..65ebe4b88f7c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -54,23 +54,10 @@ static u64 get_subtree_max_end(struct rb_node *node)
 	return ret;
 }
 
-static u64 compute_subtree_max_end(struct memtype *data)
-{
-	u64 max_end = data->end, child_max_end;
-
-	child_max_end = get_subtree_max_end(data->rb.rb_right);
-	if (child_max_end > max_end)
-		max_end = child_max_end;
-
-	child_max_end = get_subtree_max_end(data->rb.rb_left);
-	if (child_max_end > max_end)
-		max_end = child_max_end;
-
-	return max_end;
-}
+#define NODE_END(node) ((node)->end)
 
-RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
-		     u64, subtree_max_end, compute_subtree_max_end)
+RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
+			 struct memtype, rb, u64, subtree_max_end, NODE_END)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
diff --git a/drivers/block/drbd/drbd_interval.c b/drivers/block/drbd/drbd_interval.c
index c58986556161..651bd0236a99 100644
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -13,33 +13,10 @@ sector_t interval_end(struct rb_node *node)
 	return this->end;
 }
 
-/**
- * compute_subtree_last  -  compute end of @node
- *
- * The end of an interval is the highest (start + (size >> 9)) value of this
- * node and of its children.  Called for @node and its parents whenever the end
- * may have changed.
- */
-static inline sector_t
-compute_subtree_last(struct drbd_interval *node)
-{
-	sector_t max = node->sector + (node->size >> 9);
-
-	if (node->rb.rb_left) {
-		sector_t left = interval_end(node->rb.rb_left);
-		if (left > max)
-			max = left;
-	}
-	if (node->rb.rb_right) {
-		sector_t right = interval_end(node->rb.rb_right);
-		if (right > max)
-			max = right;
-	}
-	return max;
-}
+#define NODE_END(node) ((node)->sector + ((node)->size >> 9))
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct drbd_interval, rb,
-		     sector_t, end, compute_subtree_last);
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+			 struct drbd_interval, rb, sector_t, end, NODE_END);
 
 /**
  * drbd_insert_interval  -  insert a new interval into a tree
diff --git a/include/linux/interval_tree_generic.h b/include/linux/interval_tree_generic.h
index 855476145fe1..aaa8a0767aa3 100644
--- a/include/linux/interval_tree_generic.h
+++ b/include/linux/interval_tree_generic.h
@@ -30,26 +30,8 @@
 									      \
 /* Callbacks for augmented rbtree insert and remove */			      \
 									      \
-static inline ITTYPE ITPREFIX ## _compute_subtree_last(ITSTRUCT *node)	      \
-{									      \
-	ITTYPE max = ITLAST(node), subtree_last;			      \
-	if (node->ITRB.rb_left) {					      \
-		subtree_last = rb_entry(node->ITRB.rb_left,		      \
-					ITSTRUCT, ITRB)->ITSUBTREE;	      \
-		if (max < subtree_last)					      \
-			max = subtree_last;				      \
-	}								      \
-	if (node->ITRB.rb_right) {					      \
-		subtree_last = rb_entry(node->ITRB.rb_right,		      \
-					ITSTRUCT, ITRB)->ITSUBTREE;	      \
-		if (max < subtree_last)					      \
-			max = subtree_last;				      \
-	}								      \
-	return max;							      \
-}									      \
-									      \
-RB_DECLARE_CALLBACKS(static, ITPREFIX ## _augment, ITSTRUCT, ITRB,	      \
-		     ITTYPE, ITSUBTREE, ITPREFIX ## _compute_subtree_last)    \
+RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment,			      \
+			 ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST)	      \
 									      \
 /* Insert / remove interval nodes from the tree */			      \
 									      \
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 979941600082..e5937e387e02 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -61,7 +61,7 @@ rb_insert_augmented_cached(struct rb_node *node,
 }
 
 /*
- * Template for declaring augmented rbtree callbacks
+ * Template for declaring augmented rbtree callbacks (generic case)
  *
  * RBSTATIC:    'static' or empty
  * RBNAME:      name of the rb_augment_callbacks structure
@@ -107,6 +107,40 @@ RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 	.rotate = RBNAME ## _rotate					\
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	      \
+				 RBTYPE, RBAUGMENTED, RBCOMPUTE)	      \
+static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
+{									      \
+	RBSTRUCT *child;						      \
+	RBTYPE max = RBCOMPUTE(node);					      \
+	if (node->RBFIELD.rb_left) {					      \
+		child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+		if (child->RBAUGMENTED > max)				      \
+			max = child->RBAUGMENTED;			      \
+	}								      \
+	if (node->RBFIELD.rb_right) {					      \
+		child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+		if (child->RBAUGMENTED > max)				      \
+			max = child->RBAUGMENTED;			      \
+	}								      \
+	return max;							      \
+}									      \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,		      \
+		     RBTYPE, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define	RB_RED		0
 #define	RB_BLACK	1
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 62b8ee92643d..41ae3c7570d3 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -77,26 +77,10 @@ static inline void erase_cached(struct test_node *node, struct rb_root_cached *r
 }
 
 
-static inline u32 augment_recompute(struct test_node *node)
-{
-	u32 max = node->val, child_augmented;
-	if (node->rb.rb_left) {
-		child_augmented = rb_entry(node->rb.rb_left, struct test_node,
-					   rb)->augmented;
-		if (max < child_augmented)
-			max = child_augmented;
-	}
-	if (node->rb.rb_right) {
-		child_augmented = rb_entry(node->rb.rb_right, struct test_node,
-					   rb)->augmented;
-		if (max < child_augmented)
-			max = child_augmented;
-	}
-	return max;
-}
+#define NODE_VAL(node) ((node)->val)
 
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb,
-		     u32, augmented, augment_recompute)
+RB_DECLARE_CALLBACKS_MAX(static, augment_callbacks,
+			 struct test_node, rb, u32, augmented, NODE_VAL)
 
 static void insert_augmented(struct test_node *node,
 			     struct rb_root_cached *root)
@@ -238,7 +222,20 @@ static void check_augmented(int nr_nodes)
 	check(nr_nodes);
 	for (rb = rb_first(&root.rb_root); rb; rb = rb_next(rb)) {
 		struct test_node *node = rb_entry(rb, struct test_node, rb);
-		WARN_ON_ONCE(node->augmented != augment_recompute(node));
+		u32 subtree, max = node->val;
+		if (node->rb.rb_left) {
+			subtree = rb_entry(node->rb.rb_left, struct test_node,
+					   rb)->augmented;
+			if (max < subtree)
+				max = subtree;
+		}
+		if (node->rb.rb_right) {
+			subtree = rb_entry(node->rb.rb_right, struct test_node,
+					   rb)->augmented;
+			if (max < subtree)
+				max = subtree;
+		}
+		WARN_ON_ONCE(node->augmented != max);
 	}
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index f1e8c7f93e04..14b7da317ec0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -289,9 +289,9 @@ out:
 	return retval;
 }
 
-static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
 {
-	unsigned long max, prev_end, subtree_gap;
+	unsigned long gap, prev_end;
 
 	/*
 	 * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
@@ -299,14 +299,21 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 	 * an unmapped area; whereas when expanding we only require one.
 	 * That's a little inconsistent, but keeps the code here simpler.
 	 */
-	max = vm_start_gap(vma);
+	gap = vm_start_gap(vma);
 	if (vma->vm_prev) {
 		prev_end = vm_end_gap(vma->vm_prev);
-		if (max > prev_end)
-			max -= prev_end;
+		if (gap > prev_end)
+			gap -= prev_end;
 		else
-			max = 0;
+			gap = 0;
 	}
+	return gap;
+}
+
+#ifdef CONFIG_DEBUG_VM_RB
+static unsigned long vma_compute_subtree_gap(struct vm_area_struct *vma)
+{
+	unsigned long max = vma_compute_gap(vma), subtree_gap;
 	if (vma->vm_rb.rb_left) {
 		subtree_gap = rb_entry(vma->vm_rb.rb_left,
 				struct vm_area_struct, vm_rb)->rb_subtree_gap;
@@ -322,7 +329,6 @@ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
 	return max;
 }
 
-#ifdef CONFIG_DEBUG_VM_RB
 static int browse_rb(struct mm_struct *mm)
 {
 	struct rb_root *root = &mm->mm_rb;
@@ -428,8 +434,9 @@ static void validate_mm(struct mm_struct *mm)
 #define validate_mm(mm) do { } while (0)
 #endif
 
-RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
-		     unsigned long, rb_subtree_gap, vma_compute_subtree_gap)
+RB_DECLARE_CALLBACKS_MAX(static, vma_gap_callbacks,
+			 struct vm_area_struct, vm_rb,
+			 unsigned long, rb_subtree_gap, vma_compute_gap)
 
 /*
  * Update augmented rbtree rb_subtree_gap values after vma->vm_start or
@@ -439,8 +446,8 @@ RB_DECLARE_CALLBACKS(static, vma_gap_callbacks, struct vm_area_struct, vm_rb,
 static void vma_gap_update(struct vm_area_struct *vma)
 {
 	/*
-	 * As it turns out, RB_DECLARE_CALLBACKS() already created a callback
-	 * function that does exactly what we want.
+	 * As it turns out, RB_DECLARE_CALLBACKS_MAX() already created
+	 * a callback function that does exactly what we want.
 	 */
 	vma_gap_callbacks_propagate(&vma->vm_rb, NULL);
 }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index fcadd3e25c0c..a3c70e275f4e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -396,9 +396,8 @@ compute_subtree_max_size(struct vmap_area *va)
 		get_subtree_max_size(va->rb_node.rb_right));
 }
 
-RB_DECLARE_CALLBACKS(static, free_vmap_area_rb_augment_cb,
-	struct vmap_area, rb_node, unsigned long, subtree_max_size,
-	compute_subtree_max_size)
+RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb,
+	struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size)
 
 static void purge_vmap_area_lazy(void);
 static BLOCKING_NOTIFIER_HEAD(vmap_notify_list);
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index de3a480204ba..4e8c4c76e9a2 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -63,7 +63,7 @@ rb_insert_augmented_cached(struct rb_node *node,
 }
 
 /*
- * Template for declaring augmented rbtree callbacks
+ * Template for declaring augmented rbtree callbacks (generic case)
  *
  * RBSTATIC:    'static' or empty
  * RBNAME:      name of the rb_augment_callbacks structure
@@ -109,6 +109,40 @@ RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 	.rotate = RBNAME ## _rotate					\
 };
 
+/*
+ * Template for declaring augmented rbtree callbacks,
+ * computing RBAUGMENTED scalar as max(RBCOMPUTE(node)) for all subtree nodes.
+ *
+ * RBSTATIC:    'static' or empty
+ * RBNAME:      name of the rb_augment_callbacks structure
+ * RBSTRUCT:    struct type of the tree nodes
+ * RBFIELD:     name of struct rb_node field within RBSTRUCT
+ * RBTYPE:      type of the RBAUGMENTED field
+ * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBCOMPUTE:   name of function that returns the per-node RBTYPE scalar
+ */
+
+#define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	      \
+				 RBTYPE, RBAUGMENTED, RBCOMPUTE)	      \
+static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
+{									      \
+	RBSTRUCT *child;						      \
+	RBTYPE max = RBCOMPUTE(node);					      \
+	if (node->RBFIELD.rb_left) {					      \
+		child = rb_entry(node->RBFIELD.rb_left, RBSTRUCT, RBFIELD);   \
+		if (child->RBAUGMENTED > max)				      \
+			max = child->RBAUGMENTED;			      \
+	}								      \
+	if (node->RBFIELD.rb_right) {					      \
+		child = rb_entry(node->RBFIELD.rb_right, RBSTRUCT, RBFIELD);  \
+		if (child->RBAUGMENTED > max)				      \
+			max = child->RBAUGMENTED;			      \
+	}								      \
+	return max;							      \
+}									      \
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,		      \
+		     RBTYPE, RBAUGMENTED, RBNAME ## _compute_max)
+
 
 #define	RB_RED		0
 #define	RB_BLACK	1
-- 
cgit v1.2.3


From 6d2052d188d962ffb7ad3d413e6ffd5f276aec94 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Wed, 25 Sep 2019 16:46:10 -0700
Subject: augmented rbtree: rework the RB_DECLARE_CALLBACKS macro definition

Change the definition of the RBCOMPUTE function.  The propagate callback
repeatedly calls RBCOMPUTE as it moves from leaf to root.  it wants to
stop recomputing once the augmented subtree information doesn't change.
This was previously checked using the == operator, but that only works
when the augmented subtree information is a scalar field.  This commit
modifies the RBCOMPUTE function so that it now sets the augmented subtree
information instead of returning it, and returns a boolean value
indicating if the propagate callback should stop.

The motivation for this change is that I want to introduce augmented
rbtree uses where the augmented data for the subtree is a struct instead
of a scalar.

Link: http://lkml.kernel.org/r/20190703040156.56953-4-walken@google.com
Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h       | 24 ++++++++++++------------
 tools/include/linux/rbtree_augmented.h | 24 ++++++++++++------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index e5937e387e02..fdd421b8d9ae 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -67,22 +67,19 @@ rb_insert_augmented_cached(struct rb_node *node,
  * RBNAME:      name of the rb_augment_callbacks structure
  * RBSTRUCT:    struct type of the tree nodes
  * RBFIELD:     name of struct rb_node field within RBSTRUCT
- * RBTYPE:      type of the RBAUGMENTED field
- * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
  * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
  */
 
-#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	\
-			     RBTYPE, RBAUGMENTED, RBCOMPUTE)		\
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,				\
+			     RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE)	\
 static inline void							\
 RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
 {									\
 	while (rb != stop) {						\
 		RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);	\
-		RBTYPE augmented = RBCOMPUTE(node);			\
-		if (node->RBAUGMENTED == augmented)			\
+		if (RBCOMPUTE(node, true))				\
 			break;						\
-		node->RBAUGMENTED = augmented;				\
 		rb = rb_parent(&node->RBFIELD);				\
 	}								\
 }									\
@@ -99,7 +96,7 @@ RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
 	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
 	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
 	new->RBAUGMENTED = old->RBAUGMENTED;				\
-	old->RBAUGMENTED = RBCOMPUTE(old);				\
+	RBCOMPUTE(old, false);						\
 }									\
 RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 	.propagate = RBNAME ## _propagate,				\
@@ -122,7 +119,7 @@ RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 
 #define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	      \
 				 RBTYPE, RBAUGMENTED, RBCOMPUTE)	      \
-static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)	      \
 {									      \
 	RBSTRUCT *child;						      \
 	RBTYPE max = RBCOMPUTE(node);					      \
@@ -136,10 +133,13 @@ static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
 		if (child->RBAUGMENTED > max)				      \
 			max = child->RBAUGMENTED;			      \
 	}								      \
-	return max;							      \
+	if (exit && node->RBAUGMENTED == max)				      \
+		return true;						      \
+	node->RBAUGMENTED = max;					      \
+	return false;							      \
 }									      \
-RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,		      \
-		     RBTYPE, RBAUGMENTED, RBNAME ## _compute_max)
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,					      \
+		     RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
 
 
 #define	RB_RED		0
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 4e8c4c76e9a2..381aa948610d 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -69,22 +69,19 @@ rb_insert_augmented_cached(struct rb_node *node,
  * RBNAME:      name of the rb_augment_callbacks structure
  * RBSTRUCT:    struct type of the tree nodes
  * RBFIELD:     name of struct rb_node field within RBSTRUCT
- * RBTYPE:      type of the RBAUGMENTED field
- * RBAUGMENTED: name of RBTYPE field within RBSTRUCT holding data for subtree
+ * RBAUGMENTED: name of field within RBSTRUCT holding data for subtree
  * RBCOMPUTE:   name of function that recomputes the RBAUGMENTED data
  */
 
-#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	\
-			     RBTYPE, RBAUGMENTED, RBCOMPUTE)		\
+#define RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,				\
+			     RBSTRUCT, RBFIELD, RBAUGMENTED, RBCOMPUTE)	\
 static inline void							\
 RBNAME ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
 {									\
 	while (rb != stop) {						\
 		RBSTRUCT *node = rb_entry(rb, RBSTRUCT, RBFIELD);	\
-		RBTYPE augmented = RBCOMPUTE(node);			\
-		if (node->RBAUGMENTED == augmented)			\
+		if (RBCOMPUTE(node, true))				\
 			break;						\
-		node->RBAUGMENTED = augmented;				\
 		rb = rb_parent(&node->RBFIELD);				\
 	}								\
 }									\
@@ -101,7 +98,7 @@ RBNAME ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
 	RBSTRUCT *old = rb_entry(rb_old, RBSTRUCT, RBFIELD);		\
 	RBSTRUCT *new = rb_entry(rb_new, RBSTRUCT, RBFIELD);		\
 	new->RBAUGMENTED = old->RBAUGMENTED;				\
-	old->RBAUGMENTED = RBCOMPUTE(old);				\
+	RBCOMPUTE(old, false);						\
 }									\
 RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 	.propagate = RBNAME ## _propagate,				\
@@ -124,7 +121,7 @@ RBSTATIC const struct rb_augment_callbacks RBNAME = {			\
 
 #define RB_DECLARE_CALLBACKS_MAX(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,	      \
 				 RBTYPE, RBAUGMENTED, RBCOMPUTE)	      \
-static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
+static inline bool RBNAME ## _compute_max(RBSTRUCT *node, bool exit)	      \
 {									      \
 	RBSTRUCT *child;						      \
 	RBTYPE max = RBCOMPUTE(node);					      \
@@ -138,10 +135,13 @@ static inline RBTYPE RBNAME ## _compute_max(RBSTRUCT *node)		      \
 		if (child->RBAUGMENTED > max)				      \
 			max = child->RBAUGMENTED;			      \
 	}								      \
-	return max;							      \
+	if (exit && node->RBAUGMENTED == max)				      \
+		return true;						      \
+	node->RBAUGMENTED = max;					      \
+	return false;							      \
 }									      \
-RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, RBSTRUCT, RBFIELD,		      \
-		     RBTYPE, RBAUGMENTED, RBNAME ## _compute_max)
+RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME,					      \
+		     RBSTRUCT, RBFIELD, RBAUGMENTED, RBNAME ## _compute_max)
 
 
 #define	RB_RED		0
-- 
cgit v1.2.3


From 917cda2790c4bd624c5191b8d9edd12121749e86 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 25 Sep 2019 16:46:13 -0700
Subject: kernel-doc: core-api: include string.h into core-api

core-api should show all the various string functions including the newly
added stracpy and stracpy_pad.

Miscellanea:

o Update the Returns: value for strscpy
o fix a defect with %NUL)

[joe@perches.com: correct return of -E2BIG descriptions]
  Link: http://lkml.kernel.org/r/29f998b4c1a9d69fbeae70500ba0daa4b340c546.1563889130.git.joe@perches.com
Link: http://lkml.kernel.org/r/224a6ebf39955f4107c0c376d66155d970e46733.1563841972.git.joe@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Stephen Kitt <steve@sk2.org>
Cc: Nitin Gote <nitin.r.gote@intel.com>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/kernel-api.rst |  3 +++
 include/linux/string.h                |  5 +++--
 lib/string.c                          | 10 ++++++----
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 08af5caf036d..f77de49b1d51 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -42,6 +42,9 @@ String Manipulation
 .. kernel-doc:: lib/string.c
    :export:
 
+.. kernel-doc:: include/linux/string.h
+   :internal:
+
 .. kernel-doc:: mm/util.c
    :functions: kstrdup kstrdup_const kstrndup kmemdup kmemdup_nul memdup_user
                vmemdup_user strndup_user memdup_user_nul
diff --git a/include/linux/string.h b/include/linux/string.h
index 4deb11f7976b..b2f9df7f0761 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -474,8 +474,9 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
  * But this can lead to bugs due to typos, or if prefix is a pointer
  * and not a constant. Instead use str_has_prefix().
  *
- * Returns: 0 if @str does not start with @prefix
-         strlen(@prefix) if @str does start with @prefix
+ * Returns:
+ * * strlen(@prefix) if @str starts with @prefix
+ * * 0 if @str does not start with @prefix
  */
 static __always_inline size_t str_has_prefix(const char *str, const char *prefix)
 {
diff --git a/lib/string.c b/lib/string.c
index 461fb620f85f..f7bc10da4259 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -173,8 +173,9 @@ EXPORT_SYMBOL(strlcpy);
  * doesn't unnecessarily force the tail of the destination buffer to be
  * zeroed.  If zeroing is desired please use strscpy_pad().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
@@ -253,8 +254,9 @@ EXPORT_SYMBOL(strscpy);
  * For full explanation of why you may want to consider using the
  * 'strscpy' functions please see the function docstring for strscpy().
  *
- * Return: The number of characters copied (not including the trailing
- *         %NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * Returns:
+ * * The number of characters copied (not including the trailing %NUL)
+ * * -E2BIG if count is 0 or @src was truncated.
  */
 ssize_t strscpy_pad(char *dest, const char *src, size_t count)
 {
-- 
cgit v1.2.3


From d1a445d3b86c9341ce7a0954c23be0edb5c9bec5 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Wed, 25 Sep 2019 16:46:16 -0700
Subject: include/trace/events/writeback.h: fix -Wstringop-truncation warnings

There are many of those warnings.

In file included from ./arch/powerpc/include/asm/paca.h:15,
                 from ./arch/powerpc/include/asm/current.h:13,
                 from ./include/linux/thread_info.h:21,
                 from ./include/asm-generic/preempt.h:5,
                 from ./arch/powerpc/include/generated/asm/preempt.h:1,
                 from ./include/linux/preempt.h:78,
                 from ./include/linux/spinlock.h:51,
                 from fs/fs-writeback.c:19:
In function 'strncpy',
    inlined from 'perf_trace_writeback_page_template' at
./include/trace/events/writeback.h:56:1:
./include/linux/string.h:260:9: warning: '__builtin_strncpy' specified
bound 32 equals destination size [-Wstringop-truncation]
  return __builtin_strncpy(p, q, size);
         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Fix it by using the new strscpy_pad() which was introduced in "lib/string:
Add strscpy_pad() function" and will always be NUL-terminated instead of
strncpy().  Also, change strlcpy() to use strscpy_pad() in this file for
consistency.

Link: http://lkml.kernel.org/r/1564075099-27750-1-git-send-email-cai@lca.pw
Fixes: 455b2864686d ("writeback: Initial tracing support")
Fixes: 028c2dd184c0 ("writeback: Add tracing to balance_dirty_pages")
Fixes: e84d0a4f8e39 ("writeback: trace event writeback_queue_io")
Fixes: b48c104d2211 ("writeback: trace event bdi_dirty_ratelimit")
Fixes: cc1676d917f3 ("writeback: Move requeueing when I_SYNC set to writeback_sb_inodes()")
Fixes: 9fb0a7da0c52 ("writeback: add more tracepoints")
Signed-off-by: Qian Cai <cai@lca.pw>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Tobin C. Harding <tobin@kernel.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joe Perches <joe@perches.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Nitin Gote <nitin.r.gote@intel.com>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Stephen Kitt <steve@sk2.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/writeback.h | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 3a27335fce2c..c2ce6480b4b1 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -66,8 +66,9 @@ DECLARE_EVENT_CLASS(writeback_page_template,
 	),
 
 	TP_fast_assign(
-		strncpy(__entry->name,
-			mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)", 32);
+		strscpy_pad(__entry->name,
+			    mapping ? dev_name(inode_to_bdi(mapping->host)->dev) : "(unknown)",
+			    32);
 		__entry->ino = mapping ? mapping->host->i_ino : 0;
 		__entry->index = page->index;
 	),
@@ -110,8 +111,8 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 
 		/* may be called for files on pseudo FSes w/ unregistered bdi */
-		strncpy(__entry->name,
-			bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
+		strscpy_pad(__entry->name,
+			    bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
 		__entry->ino		= inode->i_ino;
 		__entry->state		= inode->i_state;
 		__entry->flags		= flags;
@@ -316,8 +317,8 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
 	),
 
 	TP_fast_assign(
-		strncpy(__entry->name,
-			dev_name(inode_to_bdi(inode)->dev), 32);
+		strscpy_pad(__entry->name,
+			    dev_name(inode_to_bdi(inode)->dev), 32);
 		__entry->ino		= inode->i_ino;
 		__entry->sync_mode	= wbc->sync_mode;
 		__entry->cgroup_ino	= __trace_wbc_assign_cgroup(wbc);
@@ -360,8 +361,9 @@ DECLARE_EVENT_CLASS(writeback_work_class,
 		__field(unsigned int, cgroup_ino)
 	),
 	TP_fast_assign(
-		strncpy(__entry->name,
-			wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
+		strscpy_pad(__entry->name,
+			    wb->bdi->dev ? dev_name(wb->bdi->dev) :
+			    "(unknown)", 32);
 		__entry->nr_pages = work->nr_pages;
 		__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
 		__entry->sync_mode = work->sync_mode;
@@ -414,7 +416,7 @@ DECLARE_EVENT_CLASS(writeback_class,
 		__field(unsigned int, cgroup_ino)
 	),
 	TP_fast_assign(
-		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+		strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
 		__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
 	),
 	TP_printk("bdi %s: cgroup_ino=%u",
@@ -436,7 +438,7 @@ TRACE_EVENT(writeback_bdi_register,
 		__array(char, name, 32)
 	),
 	TP_fast_assign(
-		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
 	),
 	TP_printk("bdi %s",
 		__entry->name
@@ -461,7 +463,7 @@ DECLARE_EVENT_CLASS(wbc_class,
 	),
 
 	TP_fast_assign(
-		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		strscpy_pad(__entry->name, dev_name(bdi->dev), 32);
 		__entry->nr_to_write	= wbc->nr_to_write;
 		__entry->pages_skipped	= wbc->pages_skipped;
 		__entry->sync_mode	= wbc->sync_mode;
@@ -512,7 +514,7 @@ TRACE_EVENT(writeback_queue_io,
 	),
 	TP_fast_assign(
 		unsigned long *older_than_this = work->older_than_this;
-		strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
+		strscpy_pad(__entry->name, dev_name(wb->bdi->dev), 32);
 		__entry->older	= older_than_this ?  *older_than_this : 0;
 		__entry->age	= older_than_this ?
 				  (jiffies - *older_than_this) * 1000 / HZ : -1;
@@ -598,7 +600,7 @@ TRACE_EVENT(bdi_dirty_ratelimit,
 	),
 
 	TP_fast_assign(
-		strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+		strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
 		__entry->write_bw	= KBps(wb->write_bandwidth);
 		__entry->avg_write_bw	= KBps(wb->avg_write_bandwidth);
 		__entry->dirty_rate	= KBps(dirty_rate);
@@ -663,7 +665,7 @@ TRACE_EVENT(balance_dirty_pages,
 
 	TP_fast_assign(
 		unsigned long freerun = (thresh + bg_thresh) / 2;
-		strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
+		strscpy_pad(__entry->bdi, dev_name(wb->bdi->dev), 32);
 
 		__entry->limit		= global_wb_domain.dirty_limit;
 		__entry->setpoint	= (global_wb_domain.dirty_limit +
@@ -723,8 +725,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
 	),
 
 	TP_fast_assign(
-		strncpy(__entry->name,
-		        dev_name(inode_to_bdi(inode)->dev), 32);
+		strscpy_pad(__entry->name,
+			    dev_name(inode_to_bdi(inode)->dev), 32);
 		__entry->ino		= inode->i_ino;
 		__entry->state		= inode->i_state;
 		__entry->dirtied_when	= inode->dirtied_when;
@@ -797,8 +799,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
 	),
 
 	TP_fast_assign(
-		strncpy(__entry->name,
-			dev_name(inode_to_bdi(inode)->dev), 32);
+		strscpy_pad(__entry->name,
+			    dev_name(inode_to_bdi(inode)->dev), 32);
 		__entry->ino		= inode->i_ino;
 		__entry->state		= inode->i_state;
 		__entry->dirtied_when	= inode->dirtied_when;
-- 
cgit v1.2.3


From 091cb0994edd20d67521094ac9c6ec9804058d9a Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Wed, 25 Sep 2019 16:46:29 -0700
Subject: lib/hexdump: make print_hex_dump_bytes() a nop on !DEBUG builds

I'm seeing a bunch of debug prints from a user of print_hex_dump_bytes()
in my kernel logs, but I don't have CONFIG_DYNAMIC_DEBUG enabled nor do I
have DEBUG defined in my build.  The problem is that
print_hex_dump_bytes() calls a wrapper function in lib/hexdump.c that
calls print_hex_dump() with KERN_DEBUG level.  There are three cases to
consider here

  1. CONFIG_DYNAMIC_DEBUG=y  --> call dynamic_hex_dum()
  2. CONFIG_DYNAMIC_DEBUG=n && DEBUG --> call print_hex_dump()
  3. CONFIG_DYNAMIC_DEBUG=n && !DEBUG --> stub it out

Right now, that last case isn't detected and we still call
print_hex_dump() from the stub wrapper.

Let's make print_hex_dump_bytes() only call print_hex_dump_debug() so that
it works properly in all cases.

Case #1, print_hex_dump_debug() calls dynamic_hex_dump() and we get same
behavior.  Case #2, print_hex_dump_debug() calls print_hex_dump() with
KERN_DEBUG and we get the same behavior.  Case #3, print_hex_dump_debug()
is a nop, changing behavior to what we want, i.e.  print nothing.

Link: http://lkml.kernel.org/r/20190816235624.115280-1-swboyd@chromium.org
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h | 22 +++++++++++++++-------
 lib/hexdump.c          | 21 ---------------------
 2 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index cefd374c47b1..c09d67edda3a 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -488,13 +488,6 @@ extern int hex_dump_to_buffer(const void *buf, size_t len, int rowsize,
 extern void print_hex_dump(const char *level, const char *prefix_str,
 			   int prefix_type, int rowsize, int groupsize,
 			   const void *buf, size_t len, bool ascii);
-#if defined(CONFIG_DYNAMIC_DEBUG)
-#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)	\
-	dynamic_hex_dump(prefix_str, prefix_type, 16, 1, buf, len, true)
-#else
-extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-				 const void *buf, size_t len);
-#endif /* defined(CONFIG_DYNAMIC_DEBUG) */
 #else
 static inline void print_hex_dump(const char *level, const char *prefix_str,
 				  int prefix_type, int rowsize, int groupsize,
@@ -526,4 +519,19 @@ static inline void print_hex_dump_debug(const char *prefix_str, int prefix_type,
 }
 #endif
 
+/**
+ * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
+ * @prefix_str: string to prefix each line with;
+ *  caller supplies trailing spaces for alignment if desired
+ * @prefix_type: controls whether prefix of an offset, address, or none
+ *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
+ * @buf: data blob to dump
+ * @len: number of bytes in the @buf
+ *
+ * Calls print_hex_dump(), with log level of KERN_DEBUG,
+ * rowsize of 16, groupsize of 1, and ASCII output included.
+ */
+#define print_hex_dump_bytes(prefix_str, prefix_type, buf, len)	\
+	print_hex_dump_debug(prefix_str, prefix_type, 16, 1, buf, len, true)
+
 #endif
diff --git a/lib/hexdump.c b/lib/hexdump.c
index b1d55b669ae2..147133f8eb2f 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -270,25 +270,4 @@ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
 }
 EXPORT_SYMBOL(print_hex_dump);
 
-#if !defined(CONFIG_DYNAMIC_DEBUG)
-/**
- * print_hex_dump_bytes - shorthand form of print_hex_dump() with default params
- * @prefix_str: string to prefix each line with;
- *  caller supplies trailing spaces for alignment if desired
- * @prefix_type: controls whether prefix of an offset, address, or none
- *  is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE)
- * @buf: data blob to dump
- * @len: number of bytes in the @buf
- *
- * Calls print_hex_dump(), with log level of KERN_DEBUG,
- * rowsize of 16, groupsize of 1, and ASCII output included.
- */
-void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
-			  const void *buf, size_t len)
-{
-	print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
-		       buf, len, true);
-}
-EXPORT_SYMBOL(print_hex_dump_bytes);
-#endif /* !defined(CONFIG_DYNAMIC_DEBUG) */
 #endif /* defined(CONFIG_PRINTK) */
-- 
cgit v1.2.3


From 8495f7e6732ed248b648d36439795b42ec650b9e Mon Sep 17 00:00:00 2001
From: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Date: Wed, 25 Sep 2019 16:47:27 -0700
Subject: fork: improve error message for corrupted page tables

When a user process exits, the kernel cleans up the mm_struct of the user
process and during cleanup, check_mm() checks the page tables of the user
process for corruption (E.g: unexpected page flags set/cleared).  For
corrupted page tables, the error message printed by check_mm() isn't very
clear as it prints the loop index instead of page table type (E.g:
Resident file mapping pages vs Resident shared memory pages).  The loop
index in check_mm() is used to index rss_stat[] which represents
individual memory type stats.  Hence, instead of printing index, print
memory type, thereby improving error message.

Without patch:
--------------
[  204.836425] mm/pgtable-generic.c:29: bad p4d 0000000089eb4e92(800000025f941467)
[  204.836544] BUG: Bad rss-counter state mm:00000000f75895ea idx:0 val:2
[  204.836615] BUG: Bad rss-counter state mm:00000000f75895ea idx:1 val:5
[  204.836685] BUG: non-zero pgtables_bytes on freeing mm: 20480

With patch:
-----------
[   69.815453] mm/pgtable-generic.c:29: bad p4d 0000000084653642(800000025ca37467)
[   69.815872] BUG: Bad rss-counter state mm:00000000014a6c03 type:MM_FILEPAGES val:2
[   69.815962] BUG: Bad rss-counter state mm:00000000014a6c03 type:MM_ANONPAGES val:5
[   69.816050] BUG: non-zero pgtables_bytes on freeing mm: 20480

Also, change print function (from printk(KERN_ALERT, ..) to pr_alert()) so
that it matches the other print statement.

Link: http://lkml.kernel.org/r/da75b5153f617f4c5739c08ee6ebeb3d19db0fbc.1565123758.git.sai.praneeth.prakhya@intel.com
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types_task.h |  4 ++++
 kernel/fork.c                 | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index d7016dcb245e..c1bc6731125c 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -36,6 +36,10 @@ struct vmacache {
 	struct vm_area_struct *vmas[VMACACHE_SIZE];
 };
 
+/*
+ * When updating this, please also update struct resident_page_types[] in
+ * kernel/fork.c
+ */
 enum {
 	MM_FILEPAGES,	/* Resident file mapping pages */
 	MM_ANONPAGES,	/* Resident anonymous pages */
diff --git a/kernel/fork.c b/kernel/fork.c
index 5a0fd518e04e..60763c043aa3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -125,6 +125,15 @@ int nr_threads;			/* The idle threads do not count.. */
 
 static int max_threads;		/* tunable limit on nr_threads */
 
+#define NAMED_ARRAY_INDEX(x)	[x] = __stringify(x)
+
+static const char * const resident_page_types[] = {
+	NAMED_ARRAY_INDEX(MM_FILEPAGES),
+	NAMED_ARRAY_INDEX(MM_ANONPAGES),
+	NAMED_ARRAY_INDEX(MM_SWAPENTS),
+	NAMED_ARRAY_INDEX(MM_SHMEMPAGES),
+};
+
 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
@@ -645,12 +654,15 @@ static void check_mm(struct mm_struct *mm)
 {
 	int i;
 
+	BUILD_BUG_ON_MSG(ARRAY_SIZE(resident_page_types) != NR_MM_COUNTERS,
+			 "Please make sure 'struct resident_page_types[]' is updated as well");
+
 	for (i = 0; i < NR_MM_COUNTERS; i++) {
 		long x = atomic_long_read(&mm->rss_stat.count[i]);
 
 		if (unlikely(x))
-			printk(KERN_ALERT "BUG: Bad rss-counter state "
-					  "mm:%p idx:%d val:%ld\n", mm, i, x);
+			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
+				 mm, resident_page_types[i], x);
 	}
 
 	if (mm_pgtables_bytes(mm))
-- 
cgit v1.2.3


From 2a4a4082cd4438333b5ecffdd15d1a484e5a83c7 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 25 Sep 2019 16:47:30 -0700
Subject: cpumask: nicer for_each_cpumask_and() signature

Mask arguments can be swapped without changing anything.  Make arguments
names reflect that:

	#define for_each_cpu_and(cpu, mask1, mask2)

Link: http://lkml.kernel.org/r/20190724183350.GA15041@avx2
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b5a5a1ed9efd..78a73eba64dd 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -200,8 +200,8 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_wrap(cpu, mask, start)	\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)(start))
-#define for_each_cpu_and(cpu, mask, and)	\
-	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+#define for_each_cpu_and(cpu, mask1, mask2)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask1, (void)mask2)
 #else
 /**
  * cpumask_first - get the first cpu in a cpumask
@@ -290,20 +290,20 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
 /**
  * for_each_cpu_and - iterate over every cpu in both masks
  * @cpu: the (optionally unsigned) integer iterator
- * @mask: the first cpumask pointer
- * @and: the second cpumask pointer
+ * @mask1: the first cpumask pointer
+ * @mask2: the second cpumask pointer
  *
  * This saves a temporary CPU mask in many places.  It is equivalent to:
  *	struct cpumask tmp;
- *	cpumask_and(&tmp, &mask, &and);
+ *	cpumask_and(&tmp, &mask1, &mask2);
  *	for_each_cpu(cpu, &tmp)
  *		...
  *
  * After the loop, cpu is >= nr_cpu_ids.
  */
-#define for_each_cpu_and(cpu, mask, and)				\
+#define for_each_cpu_and(cpu, mask1, mask2)				\
 	for ((cpu) = -1;						\
-		(cpu) = cpumask_next_and((cpu), (mask), (and)),		\
+		(cpu) = cpumask_next_and((cpu), (mask1), (mask2)),	\
 		(cpu) < nr_cpu_ids;)
 #endif /* SMP */
 
-- 
cgit v1.2.3


From d5372c39132958679c480d0295dd328c741c7a41 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Wed, 25 Sep 2019 16:47:36 -0700
Subject: kexec: restore arch_kexec_kernel_image_probe declaration

arch_kexec_kernel_image_probe function declaration has been removed by
commit 9ec4ecef0af7 ("kexec_file,x86,powerpc: factor out kexec_file_ops
functions").  Still this function is overridden by couple of architectures
and proper prototype declaration is therefore important, so bring it back.
This fixes the following sparse warning on s390:
arch/s390/kernel/machine_kexec_file.c:333:5: warning: symbol
'arch_kexec_kernel_image_probe' was not declared.  Should it be static?

Link: http://lkml.kernel.org/r/patch.git-ff1c9045ebdc.your-ad-here.call-01564402297-ext-5690@work.hours
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Acked-by: Dave Young <dyoung@redhat.com>
Reviewed-by: Bhupesh Sharma <bhsharma@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index f0b809258ed3..cc162f3e6461 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -183,6 +183,8 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
 				   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+					 unsigned long buf_len);
 void * __weak arch_kexec_kernel_image_load(struct kimage *image);
 int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 					    Elf_Shdr *section,
-- 
cgit v1.2.3


From 9dd819a15162f8f82a6001b090caa38c18297b39 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:47:39 -0700
Subject: uaccess: add missing __must_check attributes

The usercopy implementation comments describe that callers of the
copy_*_user() family of functions must always have their return values
checked.  This can be enforced at compile time with __must_check, so add
it where needed.

Link: http://lkml.kernel.org/r/201908251609.ADAD5CAAC1@keescook
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/thread_info.h |  2 +-
 include/linux/uaccess.h     | 21 +++++++++++----------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 8d8821b3689a..659a4400517b 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -134,7 +134,7 @@ static inline void copy_overflow(int size, unsigned long count)
 	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
 }
 
-static __always_inline bool
+static __always_inline __must_check bool
 check_copy_size(const void *addr, size_t bytes, bool is_source)
 {
 	int sz = __compiletime_object_size(addr);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 34a038563d97..70bbdc38dc37 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -55,7 +55,7 @@
  * as usual) and both source and destination can trigger faults.
  */
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
 	kasan_check_write(to, n);
@@ -63,7 +63,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 	return raw_copy_from_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_fault();
@@ -85,7 +85,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
  * The caller should also make sure he pins the user space address
  * so that we don't result in page fault and sleep.
  */
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
 	kasan_check_read(from, n);
@@ -93,7 +93,7 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 	return raw_copy_to_user(to, from, n);
 }
 
-static __always_inline unsigned long
+static __always_inline __must_check unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
@@ -103,7 +103,7 @@ __copy_to_user(void __user *to, const void *from, unsigned long n)
 }
 
 #ifdef INLINE_COPY_FROM_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long res = n;
@@ -117,12 +117,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
 	return res;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_from_user(void *, const void __user *, unsigned long);
 #endif
 
 #ifdef INLINE_COPY_TO_USER
-static inline unsigned long
+static inline __must_check unsigned long
 _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
@@ -133,7 +133,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n)
 	return n;
 }
 #else
-extern unsigned long
+extern __must_check unsigned long
 _copy_to_user(void __user *, const void *, unsigned long);
 #endif
 
@@ -222,8 +222,9 @@ static inline bool pagefault_disabled(void)
 
 #ifndef ARCH_HAS_NOCACHE_UACCESS
 
-static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
-				const void __user *from, unsigned long n)
+static inline __must_check unsigned long
+__copy_from_user_inatomic_nocache(void *to, const void __user *from,
+				  unsigned long n)
 {
 	return __copy_from_user_inatomic(to, from, n);
 }
-- 
cgit v1.2.3


From 7d92bda271ddcbb2d1be2f82733dcb9bf8378010 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Wed, 25 Sep 2019 16:47:45 -0700
Subject: kgdb: don't use a notifier to enter kgdb at panic; call directly

Right now kgdb/kdb hooks up to debug panics by registering for the panic
notifier.  This works OK except that it means that kgdb/kdb gets called
_after_ the CPUs in the system are taken offline.  That means that if
anything important was happening on those CPUs (like something that might
have contributed to the panic) you can't debug them.

Specifically I ran into a case where I got a panic because a task was
"blocked for more than 120 seconds" which was detected on CPU 2.  I nicely
got shown stack traces in the kernel log for all CPUs including CPU 0,
which was running 'PID: 111 Comm: kworker/0:1H' and was in the middle of
__mmc_switch().

I then ended up at the kdb prompt where switched over to kgdb to try to
look at local variables of the process on CPU 0.  I found that I couldn't.
Digging more, I found that I had no info on any tasks running on CPUs
other than CPU 2 and that asking kdb for help showed me "Error: no saved
data for this cpu".  This was because all the CPUs were offline.

Let's move the entry of kdb/kgdb to a direct call from panic() and stop
using the generic notifier.  Putting a direct call in allows us to order
things more properly and it also doesn't seem like we're breaking any
abstractions by calling into the debugger from the panic function.

Daniel said:

: This patch changes the way kdump and kgdb interact with each other.
: However it would seem rather odd to have both tools simultaneously armed
: and, even if they were, the user still has the option to use panic_timeout
: to force a kdump to happen.  Thus I think the change of order is
: acceptable.

Link: http://lkml.kernel.org/r/20190703170354.217312-1-dianders@chromium.org
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Feng Tang <feng.tang@intel.com>
Cc: YueHaibing <yuehaibing@huawei.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kgdb.h      |  2 ++
 kernel/debug/debug_core.c | 31 +++++++++++--------------------
 kernel/panic.c            |  8 ++++++++
 3 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index fbf144aaa749..b072aeb1fd78 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -326,8 +326,10 @@ extern atomic_t			kgdb_active;
 	(raw_smp_processor_id() == atomic_read(&kgdb_active))
 extern bool dbg_is_early;
 extern void __init dbg_late_init(void);
+extern void kgdb_panic(const char *msg);
 #else /* ! CONFIG_KGDB */
 #define in_dbg_master() (0)
 #define dbg_late_init()
+static inline void kgdb_panic(const char *msg) {}
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 10f1187b3907..f76d6f77dd5e 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -893,30 +893,25 @@ static struct sysrq_key_op sysrq_dbg_op = {
 };
 #endif
 
-static int kgdb_panic_event(struct notifier_block *self,
-			    unsigned long val,
-			    void *data)
+void kgdb_panic(const char *msg)
 {
+	if (!kgdb_io_module_registered)
+		return;
+
 	/*
-	 * Avoid entering the debugger if we were triggered due to a panic
-	 * We don't want to get stuck waiting for input from user in such case.
-	 * panic_timeout indicates the system should automatically
+	 * We don't want to get stuck waiting for input from user if
+	 * "panic_timeout" indicates the system should automatically
 	 * reboot on panic.
 	 */
 	if (panic_timeout)
-		return NOTIFY_DONE;
+		return;
 
 	if (dbg_kdb_mode)
-		kdb_printf("PANIC: %s\n", (char *)data);
+		kdb_printf("PANIC: %s\n", msg);
+
 	kgdb_breakpoint();
-	return NOTIFY_DONE;
 }
 
-static struct notifier_block kgdb_panic_event_nb = {
-       .notifier_call	= kgdb_panic_event,
-       .priority	= INT_MAX,
-};
-
 void __weak kgdb_arch_late(void)
 {
 }
@@ -965,8 +960,6 @@ static void kgdb_register_callbacks(void)
 			kgdb_arch_late();
 		register_module_notifier(&dbg_module_load_nb);
 		register_reboot_notifier(&dbg_reboot_notifier);
-		atomic_notifier_chain_register(&panic_notifier_list,
-					       &kgdb_panic_event_nb);
 #ifdef CONFIG_MAGIC_SYSRQ
 		register_sysrq_key('g', &sysrq_dbg_op);
 #endif
@@ -980,16 +973,14 @@ static void kgdb_register_callbacks(void)
 static void kgdb_unregister_callbacks(void)
 {
 	/*
-	 * When this routine is called KGDB should unregister from the
-	 * panic handler and clean up, making sure it is not handling any
+	 * When this routine is called KGDB should unregister from
+	 * handlers and clean up, making sure it is not handling any
 	 * break exceptions at the time.
 	 */
 	if (kgdb_io_module_registered) {
 		kgdb_io_module_registered = 0;
 		unregister_reboot_notifier(&dbg_reboot_notifier);
 		unregister_module_notifier(&dbg_module_load_nb);
-		atomic_notifier_chain_unregister(&panic_notifier_list,
-					       &kgdb_panic_event_nb);
 		kgdb_arch_exit();
 #ifdef CONFIG_MAGIC_SYSRQ
 		unregister_sysrq_key('g', &sysrq_dbg_op);
diff --git a/kernel/panic.c b/kernel/panic.c
index 057540b6eee9..d1ece4c363b9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -12,6 +12,7 @@
 #include <linux/debug_locks.h>
 #include <linux/sched/debug.h>
 #include <linux/interrupt.h>
+#include <linux/kgdb.h>
 #include <linux/kmsg_dump.h>
 #include <linux/kallsyms.h>
 #include <linux/notifier.h>
@@ -219,6 +220,13 @@ void panic(const char *fmt, ...)
 		dump_stack();
 #endif
 
+	/*
+	 * If kgdb is enabled, give it a chance to run before we stop all
+	 * the other CPUs or else we won't be able to debug processes left
+	 * running on them.
+	 */
+	kgdb_panic(buf);
+
 	/*
 	 * If we have crashed and we have a crash kernel loaded let it handle
 	 * everything else.
-- 
cgit v1.2.3


From ee8711336c51708382627ebcaee5f2122b77dfef Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:47:52 -0700
Subject: bug: refactor away warn_slowpath_fmt_taint()

Patch series "Clean up WARN() "cut here" handling", v2.

Christophe Leroy noticed that the fix for missing "cut here" in the WARN()
case was adding explicit printk() calls instead of teaching the exception
handler to add it.  This refactors the bug/warn infrastructure to pass
this information as a new BUGFLAG.

Longer details repeated from the last patch in the series:

bug: move WARN_ON() "cut here" into exception handler

The original cleanup of "cut here" missed the WARN_ON() case (that does
not have a printk message), which was fixed recently by adding an explicit
printk of "cut here".  This had the downside of adding a printk() to every
WARN_ON() caller, which reduces the utility of using an instruction
exception to streamline the resulting code.  By making this a new BUGFLAG,
all of these can be removed and "cut here" can be handled by the exception
handler.

This was very pronounced on PowerPC, but the effect can be seen on x86 as
well.  The resulting text size of a defconfig build shows some small
savings from this patch:

   text    data     bss     dec     hex filename
19691167        5134320 1646664 26472151        193eed7 vmlinux.before
19676362        5134260 1663048 26473670        193f4c6 vmlinux.after

This change also opens the door for creating something like BUG_MSG(),
where a custom printk() before issuing BUG(), without confusing the "cut
here" line.

This patch (of 7):

There's no reason to have specialized helpers for passing the warn taint
down to __warn().  Consolidate and refactor helper macros, removing
__WARN_printf() and warn_slowpath_fmt_taint().

Link: http://lkml.kernel.org/r/20190819234111.9019-2-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 13 ++++---------
 kernel/panic.c            | 18 +++---------------
 2 files changed, 7 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 7357a3c942a0..c3a9c16a2b69 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -90,24 +90,19 @@ struct bug_entry {
  * Use the versions with printk format strings to provide better diagnostics.
  */
 #ifndef __WARN_TAINT
-extern __printf(3, 4)
-void warn_slowpath_fmt(const char *file, const int line,
-		       const char *fmt, ...);
 extern __printf(4, 5)
-void warn_slowpath_fmt_taint(const char *file, const int line, unsigned taint,
-			     const char *fmt, ...);
+void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
+		       const char *fmt, ...);
 extern void warn_slowpath_null(const char *file, const int line);
 #define WANT_WARN_ON_SLOWPATH
 #define __WARN()		warn_slowpath_null(__FILE__, __LINE__)
-#define __WARN_printf(arg...)	warn_slowpath_fmt(__FILE__, __LINE__, arg)
 #define __WARN_printf_taint(taint, arg...)				\
-	warn_slowpath_fmt_taint(__FILE__, __LINE__, taint, arg)
+	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 #define __WARN() do { \
 	printk(KERN_WARNING CUT_HERE); __WARN_TAINT(TAINT_WARN); \
 } while (0)
-#define __WARN_printf(arg...)	__WARN_printf_taint(TAINT_WARN, arg)
 #define __WARN_printf_taint(taint, arg...)				\
 	do { __warn_printk(arg); __WARN_TAINT(taint); } while (0)
 #endif
@@ -132,7 +127,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN(condition, format...) ({					\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
-		__WARN_printf(format);					\
+		__WARN_printf_taint(TAINT_WARN, format);		\
 	unlikely(__ret_warn_on);					\
 })
 #endif
diff --git a/kernel/panic.c b/kernel/panic.c
index d1ece4c363b9..1d89f5423426 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -600,20 +600,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 }
 
 #ifdef WANT_WARN_ON_SLOWPATH
-void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...)
-{
-	struct warn_args args;
-
-	args.fmt = fmt;
-	va_start(args.args, fmt);
-	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL,
-	       &args);
-	va_end(args.args);
-}
-EXPORT_SYMBOL(warn_slowpath_fmt);
-
-void warn_slowpath_fmt_taint(const char *file, int line,
-			     unsigned taint, const char *fmt, ...)
+void warn_slowpath_fmt(const char *file, int line, unsigned taint,
+		       const char *fmt, ...)
 {
 	struct warn_args args;
 
@@ -622,7 +610,7 @@ void warn_slowpath_fmt_taint(const char *file, int line,
 	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 	va_end(args.args);
 }
-EXPORT_SYMBOL(warn_slowpath_fmt_taint);
+EXPORT_SYMBOL(warn_slowpath_fmt);
 
 void warn_slowpath_null(const char *file, int line)
 {
-- 
cgit v1.2.3


From 89348fc31441f0270b040fbeb68b6d7d13504f36 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:47:55 -0700
Subject: bug: rename __WARN_printf_taint() to __WARN_printf()

This just renames the helper to improve readability.

Link: http://lkml.kernel.org/r/20190819234111.9019-3-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index c3a9c16a2b69..2d35bbf687d0 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -96,14 +96,14 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 extern void warn_slowpath_null(const char *file, const int line);
 #define WANT_WARN_ON_SLOWPATH
 #define __WARN()		warn_slowpath_null(__FILE__, __LINE__)
-#define __WARN_printf_taint(taint, arg...)				\
+#define __WARN_printf(taint, arg...)					\
 	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 #define __WARN() do { \
 	printk(KERN_WARNING CUT_HERE); __WARN_TAINT(TAINT_WARN); \
 } while (0)
-#define __WARN_printf_taint(taint, arg...)				\
+#define __WARN_printf(taint, arg...)					\
 	do { __warn_printk(arg); __WARN_TAINT(taint); } while (0)
 #endif
 
@@ -127,7 +127,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN(condition, format...) ({					\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
-		__WARN_printf_taint(TAINT_WARN, format);		\
+		__WARN_printf(TAINT_WARN, format);			\
 	unlikely(__ret_warn_on);					\
 })
 #endif
@@ -135,7 +135,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 #define WARN_TAINT(condition, taint, format...) ({			\
 	int __ret_warn_on = !!(condition);				\
 	if (unlikely(__ret_warn_on))					\
-		__WARN_printf_taint(taint, format);			\
+		__WARN_printf(taint, format);				\
 	unlikely(__ret_warn_on);					\
 })
 
-- 
cgit v1.2.3


From f2f84b05e02b7710a201f0017b3272ad7ef703d1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:47:58 -0700
Subject: bug: consolidate warn_slowpath_fmt() usage

Instead of having a separate helper for no printk output, just consolidate
the logic into warn_slowpath_fmt().

Link: http://lkml.kernel.org/r/20190819234111.9019-4-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h |  3 +--
 kernel/panic.c            | 14 +++++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 2d35bbf687d0..598d7072602f 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -93,9 +93,8 @@ struct bug_entry {
 extern __printf(4, 5)
 void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 		       const char *fmt, ...);
-extern void warn_slowpath_null(const char *file, const int line);
 #define WANT_WARN_ON_SLOWPATH
-#define __WARN()		warn_slowpath_null(__FILE__, __LINE__)
+#define __WARN()		__WARN_printf(TAINT_WARN, NULL)
 #define __WARN_printf(taint, arg...)					\
 	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
diff --git a/kernel/panic.c b/kernel/panic.c
index 1d89f5423426..79c153951b59 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -605,19 +605,19 @@ void warn_slowpath_fmt(const char *file, int line, unsigned taint,
 {
 	struct warn_args args;
 
+	if (!fmt) {
+		pr_warn(CUT_HERE);
+		__warn(file, line, __builtin_return_address(0), taint,
+		       NULL, NULL);
+		return;
+	}
+
 	args.fmt = fmt;
 	va_start(args.args, fmt);
 	__warn(file, line, __builtin_return_address(0), taint, NULL, &args);
 	va_end(args.args);
 }
 EXPORT_SYMBOL(warn_slowpath_fmt);
-
-void warn_slowpath_null(const char *file, int line)
-{
-	pr_warn(CUT_HERE);
-	__warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL);
-}
-EXPORT_SYMBOL(warn_slowpath_null);
 #else
 void __warn_printk(const char *fmt, ...)
 {
-- 
cgit v1.2.3


From d4bce140b4e739bceb4e239d4842cf8f346c1e0f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:48:04 -0700
Subject: bug: clean up helper macros to remove __WARN_TAINT()

In preparation for cleaning up "cut here" even more, this removes the
__WARN_*TAINT() helpers, as they limit the ability to add new BUGFLAG_*
flags to call sites.  They are removed by expanding them into full
__WARN_FLAGS() calls.

Link: http://lkml.kernel.org/r/20190819234111.9019-6-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 598d7072602f..4b18e09094cf 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -62,13 +62,11 @@ struct bug_entry {
 #endif
 
 #ifdef __WARN_FLAGS
-#define __WARN_TAINT(taint)		__WARN_FLAGS(BUGFLAG_TAINT(taint))
-#define __WARN_ONCE_TAINT(taint)	__WARN_FLAGS(BUGFLAG_ONCE|BUGFLAG_TAINT(taint))
-
 #define WARN_ON_ONCE(condition) ({				\
 	int __ret_warn_on = !!(condition);			\
 	if (unlikely(__ret_warn_on))				\
-		__WARN_ONCE_TAINT(TAINT_WARN);			\
+		__WARN_FLAGS(BUGFLAG_ONCE |			\
+			     BUGFLAG_TAINT(TAINT_WARN));	\
 	unlikely(__ret_warn_on);				\
 })
 #endif
@@ -89,7 +87,7 @@ struct bug_entry {
  *
  * Use the versions with printk format strings to provide better diagnostics.
  */
-#ifndef __WARN_TAINT
+#ifndef __WARN_FLAGS
 extern __printf(4, 5)
 void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 		       const char *fmt, ...);
@@ -99,11 +97,14 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#define __WARN() do { \
-	printk(KERN_WARNING CUT_HERE); __WARN_TAINT(TAINT_WARN); \
-} while (0)
-#define __WARN_printf(taint, arg...)					\
-	do { __warn_printk(arg); __WARN_TAINT(taint); } while (0)
+#define __WARN() do {							\
+		printk(KERN_WARNING CUT_HERE);				\
+		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN));		\
+	} while (0)
+#define __WARN_printf(taint, arg...) do {				\
+		__warn_printk(arg);					\
+		__WARN_FLAGS(BUGFLAG_TAINT(taint));			\
+	} while (0)
 #endif
 
 /* used internally by panic.c */
-- 
cgit v1.2.3


From 2da1ead4d5f7fa5f61e5805655de1e245d03a763 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:48:08 -0700
Subject: bug: consolidate __WARN_FLAGS usage

Instead of having separate tests for __WARN_FLAGS, merge the two #ifdef
blocks and replace the synonym WANT_WARN_ON_SLOWPATH macro.

Link: http://lkml.kernel.org/r/20190819234111.9019-7-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h | 18 +++++++-----------
 kernel/panic.c            |  2 +-
 2 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 4b18e09094cf..b4a2639130a0 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -61,16 +61,6 @@ struct bug_entry {
 #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)
 #endif
 
-#ifdef __WARN_FLAGS
-#define WARN_ON_ONCE(condition) ({				\
-	int __ret_warn_on = !!(condition);			\
-	if (unlikely(__ret_warn_on))				\
-		__WARN_FLAGS(BUGFLAG_ONCE |			\
-			     BUGFLAG_TAINT(TAINT_WARN));	\
-	unlikely(__ret_warn_on);				\
-})
-#endif
-
 /*
  * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report
  * significant kernel issues that need prompt attention if they should ever
@@ -91,7 +81,6 @@ struct bug_entry {
 extern __printf(4, 5)
 void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 		       const char *fmt, ...);
-#define WANT_WARN_ON_SLOWPATH
 #define __WARN()		__WARN_printf(TAINT_WARN, NULL)
 #define __WARN_printf(taint, arg...)					\
 	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
@@ -105,6 +94,13 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 		__warn_printk(arg);					\
 		__WARN_FLAGS(BUGFLAG_TAINT(taint));			\
 	} while (0)
+#define WARN_ON_ONCE(condition) ({				\
+	int __ret_warn_on = !!(condition);			\
+	if (unlikely(__ret_warn_on))				\
+		__WARN_FLAGS(BUGFLAG_ONCE |			\
+			     BUGFLAG_TAINT(TAINT_WARN));	\
+	unlikely(__ret_warn_on);				\
+})
 #endif
 
 /* used internally by panic.c */
diff --git a/kernel/panic.c b/kernel/panic.c
index a643e5464296..47e8ebccc22b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -596,7 +596,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 	add_taint(taint, LOCKDEP_STILL_OK);
 }
 
-#ifdef WANT_WARN_ON_SLOWPATH
+#ifndef __WARN_FLAGS
 void warn_slowpath_fmt(const char *file, int line, unsigned taint,
 		       const char *fmt, ...)
 {
-- 
cgit v1.2.3


From a44f71a9ab99b509fec9d5a9f5c222debd89934f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 25 Sep 2019 16:48:11 -0700
Subject: bug: move WARN_ON() "cut here" into exception handler

The original clean up of "cut here" missed the WARN_ON() case (that does
not have a printk message), which was fixed recently by adding an explicit
printk of "cut here".  This had the downside of adding a printk() to every
WARN_ON() caller, which reduces the utility of using an instruction
exception to streamline the resulting code.  By making this a new BUGFLAG,
all of these can be removed and "cut here" can be handled by the exception
handler.

This was very pronounced on PowerPC, but the effect can be seen on x86 as
well.  The resulting text size of a defconfig build shows some small
savings from this patch:

   text    data     bss     dec     hex filename
19691167        5134320 1646664 26472151        193eed7 vmlinux.before
19676362        5134260 1663048 26473670        193f4c6 vmlinux.after

This change also opens the door for creating something like BUG_MSG(),
where a custom printk() before issuing BUG(), without confusing the "cut
here" line.

Link: http://lkml.kernel.org/r/201908200943.601DD59DCE@keescook
Fixes: 6b15f678fb7d ("include/asm-generic/bug.h: fix "cut here" for WARN_ON for __WARN_TAINT architectures")
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christophe Leroy <christophe.leroy@c-s.fr>
Cc: Drew Davenport <ddavenport@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/bug.h |  8 +++-----
 lib/bug.c                 | 11 +++++++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index b4a2639130a0..384b5c835ced 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -10,6 +10,7 @@
 #define BUGFLAG_WARNING		(1 << 0)
 #define BUGFLAG_ONCE		(1 << 1)
 #define BUGFLAG_DONE		(1 << 2)
+#define BUGFLAG_NO_CUT_HERE	(1 << 3)	/* CUT_HERE already sent */
 #define BUGFLAG_TAINT(taint)	((taint) << 8)
 #define BUG_GET_TAINT(bug)	((bug)->flags >> 8)
 #endif
@@ -86,13 +87,10 @@ void warn_slowpath_fmt(const char *file, const int line, unsigned taint,
 	warn_slowpath_fmt(__FILE__, __LINE__, taint, arg)
 #else
 extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
-#define __WARN() do {							\
-		printk(KERN_WARNING CUT_HERE);				\
-		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN));		\
-	} while (0)
+#define __WARN()		__WARN_FLAGS(BUGFLAG_TAINT(TAINT_WARN))
 #define __WARN_printf(taint, arg...) do {				\
 		__warn_printk(arg);					\
-		__WARN_FLAGS(BUGFLAG_TAINT(taint));			\
+		__WARN_FLAGS(BUGFLAG_NO_CUT_HERE | BUGFLAG_TAINT(taint));\
 	} while (0)
 #define WARN_ON_ONCE(condition) ({				\
 	int __ret_warn_on = !!(condition);			\
diff --git a/lib/bug.c b/lib/bug.c
index 1077366f496b..8c98af0bf585 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -181,6 +181,15 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		}
 	}
 
+	/*
+	 * BUG() and WARN_ON() families don't print a custom debug message
+	 * before triggering the exception handler, so we must add the
+	 * "cut here" line now. WARN() issues its own "cut here" before the
+	 * extra debugging message it writes before triggering the handler.
+	 */
+	if ((bug->flags & BUGFLAG_NO_CUT_HERE) == 0)
+		printk(KERN_DEFAULT CUT_HERE);
+
 	if (warning) {
 		/* this is a WARN_ON rather than BUG/BUG_ON */
 		__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
@@ -188,8 +197,6 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_WARN;
 	}
 
-	printk(KERN_DEFAULT CUT_HERE);
-
 	if (file)
 		pr_crit("kernel BUG at %s:%u!\n", file, line);
 	else
-- 
cgit v1.2.3


From 9c276cc65a58faf98be8e56962745ec99ab87636 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Wed, 25 Sep 2019 16:49:08 -0700
Subject: mm: introduce MADV_COLD
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "Introduce MADV_COLD and MADV_PAGEOUT", v7.

- Background

The Android terminology used for forking a new process and starting an app
from scratch is a cold start, while resuming an existing app is a hot
start.  While we continually try to improve the performance of cold
starts, hot starts will always be significantly less power hungry as well
as faster so we are trying to make hot start more likely than cold start.

To increase hot start, Android userspace manages the order that apps
should be killed in a process called ActivityManagerService.
ActivityManagerService tracks every Android app or service that the user
could be interacting with at any time and translates that into a ranked
list for lmkd(low memory killer daemon).  They are likely to be killed by
lmkd if the system has to reclaim memory.  In that sense they are similar
to entries in any other cache.  Those apps are kept alive for
opportunistic performance improvements but those performance improvements
will vary based on the memory requirements of individual workloads.

- Problem

Naturally, cached apps were dominant consumers of memory on the system.
However, they were not significant consumers of swap even though they are
good candidate for swap.  Under investigation, swapping out only begins
once the low zone watermark is hit and kswapd wakes up, but the overall
allocation rate in the system might trip lmkd thresholds and cause a
cached process to be killed(we measured performance swapping out vs.
zapping the memory by killing a process.  Unsurprisingly, zapping is 10x
times faster even though we use zram which is much faster than real
storage) so kill from lmkd will often satisfy the high zone watermark,
resulting in very few pages actually being moved to swap.

- Approach

The approach we chose was to use a new interface to allow userspace to
proactively reclaim entire processes by leveraging platform information.
This allowed us to bypass the inaccuracy of the kernel’s LRUs for pages
that are known to be cold from userspace and to avoid races with lmkd by
reclaiming apps as soon as they entered the cached state.  Additionally,
it could provide many chances for platform to use much information to
optimize memory efficiency.

To achieve the goal, the patchset introduce two new options for madvise.
One is MADV_COLD which will deactivate activated pages and the other is
MADV_PAGEOUT which will reclaim private pages instantly.  These new
options complement MADV_DONTNEED and MADV_FREE by adding non-destructive
ways to gain some free memory space.  MADV_PAGEOUT is similar to
MADV_DONTNEED in a way that it hints the kernel that memory region is not
currently needed and should be reclaimed immediately; MADV_COLD is similar
to MADV_FREE in a way that it hints the kernel that memory region is not
currently needed and should be reclaimed when memory pressure rises.

This patch (of 5):

When a process expects no accesses to a certain memory range, it could
give a hint to kernel that the pages can be reclaimed when memory pressure
happens but data should be preserved for future use.  This could reduce
workingset eviction so it ends up increasing performance.

This patch introduces the new MADV_COLD hint to madvise(2) syscall.
MADV_COLD can be used by a process to mark a memory range as not expected
to be used in the near future.  The hint can help kernel in deciding which
pages to evict early during memory pressure.

It works for every LRU pages like MADV_[DONTNEED|FREE]. IOW, It moves

	active file page -> inactive file LRU
	active anon page -> inacdtive anon LRU

Unlike MADV_FREE, it doesn't move active anonymous pages to inactive file
LRU's head because MADV_COLD is a little bit different symantic.
MADV_FREE means it's okay to discard when the memory pressure because the
content of the page is *garbage* so freeing such pages is almost zero
overhead since we don't need to swap out and access afterward causes just
minor fault.  Thus, it would make sense to put those freeable pages in
inactive file LRU to compete other used-once pages.  It makes sense for
implmentaion point of view, too because it's not swapbacked memory any
longer until it would be re-dirtied.  Even, it could give a bonus to make
them be reclaimed on swapless system.  However, MADV_COLD doesn't mean
garbage so reclaiming them requires swap-out/in in the end so it's bigger
cost.  Since we have designed VM LRU aging based on cost-model, anonymous
cold pages would be better to position inactive anon's LRU list, not file
LRU.  Furthermore, it would help to avoid unnecessary scanning if system
doesn't have a swap device.  Let's start simpler way without adding
complexity at this moment.  However, keep in mind, too that it's a caveat
that workloads with a lot of pages cache are likely to ignore MADV_COLD on
anonymous memory because we rarely age anonymous LRU lists.

* man-page material

MADV_COLD (since Linux x.x)

Pages in the specified regions will be treated as less-recently-accessed
compared to pages in the system with similar access frequencies.  In
contrast to MADV_FREE, the contents of the region are preserved regardless
of subsequent writes to pages.

MADV_COLD cannot be applied to locked pages, Huge TLB pages, or VM_PFNMAP
pages.

[akpm@linux-foundation.org: resolve conflicts with hmm.git]
Link: http://lkml.kernel.org/r/20190726023435.214162-2-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Daniel Colascione <dancol@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tim Murray <timmurray@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/uapi/asm/mman.h     |   2 +
 arch/mips/include/uapi/asm/mman.h      |   2 +
 arch/parisc/include/uapi/asm/mman.h    |   2 +
 arch/xtensa/include/uapi/asm/mman.h    |   2 +
 include/linux/swap.h                   |   1 +
 include/uapi/asm-generic/mman-common.h |   2 +
 mm/internal.h                          |   2 +-
 mm/madvise.c                           | 179 ++++++++++++++++++++++++++++++++-
 mm/oom_kill.c                          |   2 +-
 mm/swap.c                              |  42 ++++++++
 10 files changed, 232 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index ac23379b7a87..f3258fbf03d0 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -68,6 +68,8 @@
 #define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD	20		/* deactivate these pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index c2b40969eb1f..00ad09fc5eb1 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -95,6 +95,8 @@
 #define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD	20		/* deactivate these pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index c98162f494db..eb14e3a7b8f3 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -48,6 +48,8 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_COLD	20		/* deactivate these pages */
+
 #define MADV_MERGEABLE   65		/* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
 
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index ebbb48842190..f926b00ff11f 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -103,6 +103,8 @@
 #define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD	20		/* deactivate these pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/include/linux/swap.h b/include/linux/swap.h
index de2c67a33b7e..0ce997edb8bb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
+extern void deactivate_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
 extern void swap_setup(void);
 
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 63b1f506ea67..23431faf0eb6 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -67,6 +67,8 @@
 #define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
+#define MADV_COLD	20		/* deactivate these pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/mm/internal.h b/mm/internal.h
index e32390802fd3..0d5f720c75ab 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -39,7 +39,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf);
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
-static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
 {
 	return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
 }
diff --git a/mm/madvise.c b/mm/madvise.c
index 1f8a6fdc6878..e1aee62967c3 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
 #include <linux/syscalls.h>
 #include <linux/mempolicy.h>
 #include <linux/page-isolation.h>
+#include <linux/page_idle.h>
 #include <linux/userfaultfd_k.h>
 #include <linux/hugetlb.h>
 #include <linux/falloc.h>
@@ -42,6 +43,7 @@ static int madvise_need_mmap_write(int behavior)
 	case MADV_REMOVE:
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
+	case MADV_COLD:
 	case MADV_FREE:
 		return 0;
 	default:
@@ -289,6 +291,176 @@ static long madvise_willneed(struct vm_area_struct *vma,
 	return 0;
 }
 
+static int madvise_cold_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, struct mm_walk *walk)
+{
+	struct mmu_gather *tlb = walk->private;
+	struct mm_struct *mm = tlb->mm;
+	struct vm_area_struct *vma = walk->vma;
+	pte_t *orig_pte, *pte, ptent;
+	spinlock_t *ptl;
+	struct page *page;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_trans_huge(*pmd)) {
+		pmd_t orig_pmd;
+		unsigned long next = pmd_addr_end(addr, end);
+
+		tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
+		ptl = pmd_trans_huge_lock(pmd, vma);
+		if (!ptl)
+			return 0;
+
+		orig_pmd = *pmd;
+		if (is_huge_zero_pmd(orig_pmd))
+			goto huge_unlock;
+
+		if (unlikely(!pmd_present(orig_pmd))) {
+			VM_BUG_ON(thp_migration_supported() &&
+					!is_pmd_migration_entry(orig_pmd));
+			goto huge_unlock;
+		}
+
+		page = pmd_page(orig_pmd);
+		if (next - addr != HPAGE_PMD_SIZE) {
+			int err;
+
+			if (page_mapcount(page) != 1)
+				goto huge_unlock;
+
+			get_page(page);
+			spin_unlock(ptl);
+			lock_page(page);
+			err = split_huge_page(page);
+			unlock_page(page);
+			put_page(page);
+			if (!err)
+				goto regular_page;
+			return 0;
+		}
+
+		if (pmd_young(orig_pmd)) {
+			pmdp_invalidate(vma, addr, pmd);
+			orig_pmd = pmd_mkold(orig_pmd);
+
+			set_pmd_at(mm, addr, pmd, orig_pmd);
+			tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+		}
+
+		test_and_clear_page_young(page);
+		deactivate_page(page);
+huge_unlock:
+		spin_unlock(ptl);
+		return 0;
+	}
+
+	if (pmd_trans_unstable(pmd))
+		return 0;
+regular_page:
+#endif
+	tlb_change_page_size(tlb, PAGE_SIZE);
+	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	flush_tlb_batched_pending(mm);
+	arch_enter_lazy_mmu_mode();
+	for (; addr < end; pte++, addr += PAGE_SIZE) {
+		ptent = *pte;
+
+		if (pte_none(ptent))
+			continue;
+
+		if (!pte_present(ptent))
+			continue;
+
+		page = vm_normal_page(vma, addr, ptent);
+		if (!page)
+			continue;
+
+		/*
+		 * Creating a THP page is expensive so split it only if we
+		 * are sure it's worth. Split it if we are only owner.
+		 */
+		if (PageTransCompound(page)) {
+			if (page_mapcount(page) != 1)
+				break;
+			get_page(page);
+			if (!trylock_page(page)) {
+				put_page(page);
+				break;
+			}
+			pte_unmap_unlock(orig_pte, ptl);
+			if (split_huge_page(page)) {
+				unlock_page(page);
+				put_page(page);
+				pte_offset_map_lock(mm, pmd, addr, &ptl);
+				break;
+			}
+			unlock_page(page);
+			put_page(page);
+			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+			pte--;
+			addr -= PAGE_SIZE;
+			continue;
+		}
+
+		VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+		if (pte_young(ptent)) {
+			ptent = ptep_get_and_clear_full(mm, addr, pte,
+							tlb->fullmm);
+			ptent = pte_mkold(ptent);
+			set_pte_at(mm, addr, pte, ptent);
+			tlb_remove_tlb_entry(tlb, pte, addr);
+		}
+
+		/*
+		 * We are deactivating a page for accelerating reclaiming.
+		 * VM couldn't reclaim the page unless we clear PG_young.
+		 * As a side effect, it makes confuse idle-page tracking
+		 * because they will miss recent referenced history.
+		 */
+		test_and_clear_page_young(page);
+		deactivate_page(page);
+	}
+
+	arch_leave_lazy_mmu_mode();
+	pte_unmap_unlock(orig_pte, ptl);
+	cond_resched();
+
+	return 0;
+}
+
+static const struct mm_walk_ops cold_walk_ops = {
+	.pmd_entry = madvise_cold_pte_range,
+};
+
+static void madvise_cold_page_range(struct mmu_gather *tlb,
+			     struct vm_area_struct *vma,
+			     unsigned long addr, unsigned long end)
+{
+	tlb_start_vma(tlb, vma);
+	walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL);
+	tlb_end_vma(tlb, vma);
+}
+
+static long madvise_cold(struct vm_area_struct *vma,
+			struct vm_area_struct **prev,
+			unsigned long start_addr, unsigned long end_addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct mmu_gather tlb;
+
+	*prev = vma;
+	if (!can_madv_lru_vma(vma))
+		return -EINVAL;
+
+	lru_add_drain();
+	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+	madvise_cold_page_range(&tlb, vma, start_addr, end_addr);
+	tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+	return 0;
+}
+
 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 				unsigned long end, struct mm_walk *walk)
 
@@ -493,7 +665,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 				  int behavior)
 {
 	*prev = vma;
-	if (!can_madv_dontneed_vma(vma))
+	if (!can_madv_lru_vma(vma))
 		return -EINVAL;
 
 	if (!userfaultfd_remove(vma, start, end)) {
@@ -515,7 +687,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 			 */
 			return -ENOMEM;
 		}
-		if (!can_madv_dontneed_vma(vma))
+		if (!can_madv_lru_vma(vma))
 			return -EINVAL;
 		if (end > vma->vm_end) {
 			/*
@@ -669,6 +841,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		return madvise_remove(vma, prev, start, end);
 	case MADV_WILLNEED:
 		return madvise_willneed(vma, prev, start, end);
+	case MADV_COLD:
+		return madvise_cold(vma, prev, start, end);
 	case MADV_FREE:
 	case MADV_DONTNEED:
 		return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -690,6 +864,7 @@ madvise_behavior_valid(int behavior)
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
 	case MADV_FREE:
+	case MADV_COLD:
 #ifdef CONFIG_KSM
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c1d9496b4c43..71e3acea7817 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -523,7 +523,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
 	set_bit(MMF_UNSTABLE, &mm->flags);
 
 	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-		if (!can_madv_dontneed_vma(vma))
+		if (!can_madv_lru_vma(vma))
 			continue;
 
 		/*
diff --git a/mm/swap.c b/mm/swap.c
index 784dc1620620..38c3fa4308e2 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -47,6 +47,7 @@ int page_cluster;
 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
 #ifdef CONFIG_SMP
 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
@@ -538,6 +539,22 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
 	update_page_reclaim_stat(lruvec, file, 0);
 }
 
+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+			    void *arg)
+{
+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+		int file = page_is_file_cache(page);
+		int lru = page_lru_base_type(page);
+
+		del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
+		ClearPageActive(page);
+		ClearPageReferenced(page);
+		add_page_to_lru_list(page, lruvec, lru);
+
+		__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
+		update_page_reclaim_stat(lruvec, file, 0);
+	}
+}
 
 static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
 			    void *arg)
@@ -590,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
+	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+	if (pagevec_count(pvec))
+		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
 	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
 	if (pagevec_count(pvec))
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
@@ -623,6 +644,26 @@ void deactivate_file_page(struct page *page)
 	}
 }
 
+/*
+ * deactivate_page - deactivate a page
+ * @page: page to deactivate
+ *
+ * deactivate_page() moves @page to the inactive list if @page was on the active
+ * list and was not an unevictable page.  This is done to accelerate the reclaim
+ * of @page.
+ */
+void deactivate_page(struct page *page)
+{
+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+		get_page(page);
+		if (!pagevec_add(pvec, page) || PageCompound(page))
+			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+		put_cpu_var(lru_deactivate_pvecs);
+	}
+}
+
 /**
  * mark_page_lazyfree - make an anon page lazyfree
  * @page: page to deactivate
@@ -687,6 +728,7 @@ void lru_add_drain_all(void)
 		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
 		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
 		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
+		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
 		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
 		    need_activate_page_drain(cpu)) {
 			INIT_WORK(work, lru_add_drain_per_cpu);
-- 
cgit v1.2.3


From 1a4e58cce84ee88129d5d49c064bd2852b481357 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Wed, 25 Sep 2019 16:49:15 -0700
Subject: mm: introduce MADV_PAGEOUT

When a process expects no accesses to a certain memory range for a long
time, it could hint kernel that the pages can be reclaimed instantly but
data should be preserved for future use.  This could reduce workingset
eviction so it ends up increasing performance.

This patch introduces the new MADV_PAGEOUT hint to madvise(2) syscall.
MADV_PAGEOUT can be used by a process to mark a memory range as not
expected to be used for a long time so that kernel reclaims *any LRU*
pages instantly.  The hint can help kernel in deciding which pages to
evict proactively.

A note: It doesn't apply SWAP_CLUSTER_MAX LRU page isolation limit
intentionally because it's automatically bounded by PMD size.  If PMD
size(e.g., 256) makes some trouble, we could fix it later by limit it to
SWAP_CLUSTER_MAX[1].

- man-page material

MADV_PAGEOUT (since Linux x.x)

Do not expect access in the near future so pages in the specified
regions could be reclaimed instantly regardless of memory pressure.
Thus, access in the range after successful operation could cause
major page fault but never lose the up-to-date contents unlike
MADV_DONTNEED. Pages belonging to a shared mapping are only processed
if a write access is allowed for the calling process.

MADV_PAGEOUT cannot be applied to locked pages, Huge TLB pages, or
VM_PFNMAP pages.

[1] https://lore.kernel.org/lkml/20190710194719.GS29695@dhcp22.suse.cz/

[minchan@kernel.org: clear PG_active on MADV_PAGEOUT]
  Link: http://lkml.kernel.org/r/20190802200643.GA181880@google.com
[akpm@linux-foundation.org: resolve conflicts with hmm.git]
Link: http://lkml.kernel.org/r/20190726023435.214162-5-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: kbuild test robot <lkp@intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Chris Zankel <chris@zankel.net>
Cc: Daniel Colascione <dancol@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Oleksandr Natalenko <oleksandr@redhat.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Sonny Rao <sonnyrao@google.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Tim Murray <timmurray@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/uapi/asm/mman.h     |   1 +
 arch/mips/include/uapi/asm/mman.h      |   1 +
 arch/parisc/include/uapi/asm/mman.h    |   1 +
 arch/xtensa/include/uapi/asm/mman.h    |   1 +
 include/linux/swap.h                   |   1 +
 include/uapi/asm-generic/mman-common.h |   1 +
 mm/madvise.c                           | 189 +++++++++++++++++++++++++++++++++
 mm/vmscan.c                            |  56 ++++++++++
 8 files changed, 251 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index f3258fbf03d0..a18ec7f63888 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -69,6 +69,7 @@
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
 #define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 00ad09fc5eb1..57dc2ac4f8bd 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -96,6 +96,7 @@
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
 #define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index eb14e3a7b8f3..6fd8871e4081 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,6 +49,7 @@
 #define MADV_DOFORK	11		/* do inherit across fork */
 
 #define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
 
 #define MADV_MERGEABLE   65		/* KSM may merge identical pages */
 #define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index f926b00ff11f..e5e643752947 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -104,6 +104,7 @@
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
 #define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0ce997edb8bb..063c0c1e112b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -365,6 +365,7 @@ extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
 extern unsigned long vm_total_pages;
 
+extern unsigned long reclaim_pages(struct list_head *page_list);
 #ifdef CONFIG_NUMA
 extern int node_reclaim_mode;
 extern int sysctl_min_unmapped_ratio;
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 23431faf0eb6..c160a5354eb6 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -68,6 +68,7 @@
 #define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
 
 #define MADV_COLD	20		/* deactivate these pages */
+#define MADV_PAGEOUT	21		/* reclaim these pages */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/mm/madvise.c b/mm/madvise.c
index e1aee62967c3..54c5639774b6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -44,6 +44,7 @@ static int madvise_need_mmap_write(int behavior)
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
 	case MADV_COLD:
+	case MADV_PAGEOUT:
 	case MADV_FREE:
 		return 0;
 	default:
@@ -461,6 +462,191 @@ static long madvise_cold(struct vm_area_struct *vma,
 	return 0;
 }
 
+static int madvise_pageout_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, struct mm_walk *walk)
+{
+	struct mmu_gather *tlb = walk->private;
+	struct mm_struct *mm = tlb->mm;
+	struct vm_area_struct *vma = walk->vma;
+	pte_t *orig_pte, *pte, ptent;
+	spinlock_t *ptl;
+	LIST_HEAD(page_list);
+	struct page *page;
+
+	if (fatal_signal_pending(current))
+		return -EINTR;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	if (pmd_trans_huge(*pmd)) {
+		pmd_t orig_pmd;
+		unsigned long next = pmd_addr_end(addr, end);
+
+		tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
+		ptl = pmd_trans_huge_lock(pmd, vma);
+		if (!ptl)
+			return 0;
+
+		orig_pmd = *pmd;
+		if (is_huge_zero_pmd(orig_pmd))
+			goto huge_unlock;
+
+		if (unlikely(!pmd_present(orig_pmd))) {
+			VM_BUG_ON(thp_migration_supported() &&
+					!is_pmd_migration_entry(orig_pmd));
+			goto huge_unlock;
+		}
+
+		page = pmd_page(orig_pmd);
+		if (next - addr != HPAGE_PMD_SIZE) {
+			int err;
+
+			if (page_mapcount(page) != 1)
+				goto huge_unlock;
+			get_page(page);
+			spin_unlock(ptl);
+			lock_page(page);
+			err = split_huge_page(page);
+			unlock_page(page);
+			put_page(page);
+			if (!err)
+				goto regular_page;
+			return 0;
+		}
+
+		if (pmd_young(orig_pmd)) {
+			pmdp_invalidate(vma, addr, pmd);
+			orig_pmd = pmd_mkold(orig_pmd);
+
+			set_pmd_at(mm, addr, pmd, orig_pmd);
+			tlb_remove_tlb_entry(tlb, pmd, addr);
+		}
+
+		ClearPageReferenced(page);
+		test_and_clear_page_young(page);
+
+		if (!isolate_lru_page(page))
+			list_add(&page->lru, &page_list);
+huge_unlock:
+		spin_unlock(ptl);
+		reclaim_pages(&page_list);
+		return 0;
+	}
+
+	if (pmd_trans_unstable(pmd))
+		return 0;
+regular_page:
+#endif
+	tlb_change_page_size(tlb, PAGE_SIZE);
+	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	flush_tlb_batched_pending(mm);
+	arch_enter_lazy_mmu_mode();
+	for (; addr < end; pte++, addr += PAGE_SIZE) {
+		ptent = *pte;
+		if (!pte_present(ptent))
+			continue;
+
+		page = vm_normal_page(vma, addr, ptent);
+		if (!page)
+			continue;
+
+		/*
+		 * creating a THP page is expensive so split it only if we
+		 * are sure it's worth. Split it if we are only owner.
+		 */
+		if (PageTransCompound(page)) {
+			if (page_mapcount(page) != 1)
+				break;
+			get_page(page);
+			if (!trylock_page(page)) {
+				put_page(page);
+				break;
+			}
+			pte_unmap_unlock(orig_pte, ptl);
+			if (split_huge_page(page)) {
+				unlock_page(page);
+				put_page(page);
+				pte_offset_map_lock(mm, pmd, addr, &ptl);
+				break;
+			}
+			unlock_page(page);
+			put_page(page);
+			pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+			pte--;
+			addr -= PAGE_SIZE;
+			continue;
+		}
+
+		VM_BUG_ON_PAGE(PageTransCompound(page), page);
+
+		if (pte_young(ptent)) {
+			ptent = ptep_get_and_clear_full(mm, addr, pte,
+							tlb->fullmm);
+			ptent = pte_mkold(ptent);
+			set_pte_at(mm, addr, pte, ptent);
+			tlb_remove_tlb_entry(tlb, pte, addr);
+		}
+		ClearPageReferenced(page);
+		test_and_clear_page_young(page);
+
+		if (!isolate_lru_page(page))
+			list_add(&page->lru, &page_list);
+	}
+
+	arch_leave_lazy_mmu_mode();
+	pte_unmap_unlock(orig_pte, ptl);
+	reclaim_pages(&page_list);
+	cond_resched();
+
+	return 0;
+}
+
+static void madvise_pageout_page_range(struct mmu_gather *tlb,
+			     struct vm_area_struct *vma,
+			     unsigned long addr, unsigned long end)
+{
+	tlb_start_vma(tlb, vma);
+	walk_page_range(vma->vm_mm, addr, end, &cold_walk_ops, NULL);
+	tlb_end_vma(tlb, vma);
+}
+
+static inline bool can_do_pageout(struct vm_area_struct *vma)
+{
+	if (vma_is_anonymous(vma))
+		return true;
+	if (!vma->vm_file)
+		return false;
+	/*
+	 * paging out pagecache only for non-anonymous mappings that correspond
+	 * to the files the calling process could (if tried) open for writing;
+	 * otherwise we'd be including shared non-exclusive mappings, which
+	 * opens a side channel.
+	 */
+	return inode_owner_or_capable(file_inode(vma->vm_file)) ||
+		inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+}
+
+static long madvise_pageout(struct vm_area_struct *vma,
+			struct vm_area_struct **prev,
+			unsigned long start_addr, unsigned long end_addr)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct mmu_gather tlb;
+
+	*prev = vma;
+	if (!can_madv_lru_vma(vma))
+		return -EINVAL;
+
+	if (!can_do_pageout(vma))
+		return 0;
+
+	lru_add_drain();
+	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+	madvise_pageout_page_range(&tlb, vma, start_addr, end_addr);
+	tlb_finish_mmu(&tlb, start_addr, end_addr);
+
+	return 0;
+}
+
 static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 				unsigned long end, struct mm_walk *walk)
 
@@ -843,6 +1029,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		return madvise_willneed(vma, prev, start, end);
 	case MADV_COLD:
 		return madvise_cold(vma, prev, start, end);
+	case MADV_PAGEOUT:
+		return madvise_pageout(vma, prev, start, end);
 	case MADV_FREE:
 	case MADV_DONTNEED:
 		return madvise_dontneed_free(vma, prev, start, end, behavior);
@@ -865,6 +1053,7 @@ madvise_behavior_valid(int behavior)
 	case MADV_DONTNEED:
 	case MADV_FREE:
 	case MADV_COLD:
+	case MADV_PAGEOUT:
 #ifdef CONFIG_KSM
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d8bbaf068c35..e5d52d6a24af 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2145,6 +2145,62 @@ static void shrink_active_list(unsigned long nr_to_scan,
 			nr_deactivate, nr_rotated, sc->priority, file);
 }
 
+unsigned long reclaim_pages(struct list_head *page_list)
+{
+	int nid = -1;
+	unsigned long nr_reclaimed = 0;
+	LIST_HEAD(node_page_list);
+	struct reclaim_stat dummy_stat;
+	struct page *page;
+	struct scan_control sc = {
+		.gfp_mask = GFP_KERNEL,
+		.priority = DEF_PRIORITY,
+		.may_writepage = 1,
+		.may_unmap = 1,
+		.may_swap = 1,
+	};
+
+	while (!list_empty(page_list)) {
+		page = lru_to_page(page_list);
+		if (nid == -1) {
+			nid = page_to_nid(page);
+			INIT_LIST_HEAD(&node_page_list);
+		}
+
+		if (nid == page_to_nid(page)) {
+			ClearPageActive(page);
+			list_move(&page->lru, &node_page_list);
+			continue;
+		}
+
+		nr_reclaimed += shrink_page_list(&node_page_list,
+						NODE_DATA(nid),
+						&sc, 0,
+						&dummy_stat, false);
+		while (!list_empty(&node_page_list)) {
+			page = lru_to_page(&node_page_list);
+			list_del(&page->lru);
+			putback_lru_page(page);
+		}
+
+		nid = -1;
+	}
+
+	if (!list_empty(&node_page_list)) {
+		nr_reclaimed += shrink_page_list(&node_page_list,
+						NODE_DATA(nid),
+						&sc, 0,
+						&dummy_stat, false);
+		while (!list_empty(&node_page_list)) {
+			page = lru_to_page(&node_page_list);
+			list_del(&page->lru);
+			putback_lru_page(page);
+		}
+	}
+
+	return nr_reclaimed;
+}
+
 /*
  * The inactive anon list should be small enough that the VM never has
  * to do too much work.
-- 
cgit v1.2.3


From b4ed71f557e458257e0f71b11969954acb389240 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 25 Sep 2019 16:49:46 -0700
Subject: mm: treewide: clarify pgtable_page_{ctor,dtor}() naming

The naming of pgtable_page_{ctor,dtor}() seems to have confused a few
people, and until recently arm64 used these erroneously/pointlessly for
other levels of page table.

To make it incredibly clear that these only apply to the PTE level, and to
align with the naming of pgtable_pmd_page_{ctor,dtor}(), let's rename them
to pgtable_pte_page_{ctor,dtor}().

These changes were generated with the following shell script:

----
git grep -lw 'pgtable_page_.tor' | while read FILE; do
    sed -i '{s/pgtable_page_ctor/pgtable_pte_page_ctor/}' $FILE;
    sed -i '{s/pgtable_page_dtor/pgtable_pte_page_dtor/}' $FILE;
done
----

... with the documentation re-flowed to remain under 80 columns, and
whitespace fixed up in macros to keep backslashes aligned.

There should be no functional change as a result of this patch.

Link: http://lkml.kernel.org/r/20190722141133.3116-1-mark.rutland@arm.com
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>	[m68k]
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/split_page_table_lock.rst | 10 +++++-----
 arch/arc/include/asm/pgalloc.h             |  4 ++--
 arch/arm/include/asm/tlb.h                 |  2 +-
 arch/arm/mm/mmu.c                          |  2 +-
 arch/arm64/include/asm/tlb.h               |  2 +-
 arch/arm64/mm/mmu.c                        |  2 +-
 arch/csky/include/asm/pgalloc.h            |  2 +-
 arch/hexagon/include/asm/pgalloc.h         |  2 +-
 arch/m68k/include/asm/mcf_pgalloc.h        |  6 +++---
 arch/m68k/include/asm/motorola_pgalloc.h   |  6 +++---
 arch/m68k/include/asm/sun3_pgalloc.h       |  2 +-
 arch/mips/include/asm/pgalloc.h            |  2 +-
 arch/nios2/include/asm/pgalloc.h           |  2 +-
 arch/openrisc/include/asm/pgalloc.h        |  6 +++---
 arch/powerpc/mm/pgtable-frag.c             |  6 +++---
 arch/riscv/include/asm/pgalloc.h           |  2 +-
 arch/s390/mm/pgalloc.c                     |  6 +++---
 arch/sh/include/asm/pgalloc.h              |  2 +-
 arch/sparc/mm/init_64.c                    |  4 ++--
 arch/sparc/mm/srmmu.c                      |  4 ++--
 arch/um/include/asm/pgalloc.h              |  2 +-
 arch/unicore32/include/asm/tlb.h           |  2 +-
 arch/x86/mm/pgtable.c                      |  2 +-
 arch/xtensa/include/asm/pgalloc.h          |  4 ++--
 include/asm-generic/pgalloc.h              |  8 ++++----
 include/linux/mm.h                         |  4 ++--
 26 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/split_page_table_lock.rst b/Documentation/vm/split_page_table_lock.rst
index 889b00be469f..ff51f4a5494d 100644
--- a/Documentation/vm/split_page_table_lock.rst
+++ b/Documentation/vm/split_page_table_lock.rst
@@ -54,9 +54,9 @@ Hugetlb-specific helpers:
 Support of split page table lock by an architecture
 ===================================================
 
-There's no need in special enabling of PTE split page table lock:
-everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
-which must be called on PTE table allocation / freeing.
+There's no need in special enabling of PTE split page table lock: everything
+required is done by pgtable_pte_page_ctor() and pgtable_pte_page_dtor(), which
+must be called on PTE table allocation / freeing.
 
 Make sure the architecture doesn't use slab allocator for page table
 allocation: slab uses page->slab_cache for its pages.
@@ -74,7 +74,7 @@ paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
 
 With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
 
-NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+NOTE: pgtable_pte_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
 be handled properly.
 
 page->ptl
@@ -94,7 +94,7 @@ trick:
    split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
    one more cache line for indirect access;
 
-The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+The spinlock_t allocated in pgtable_pte_page_ctor() for PTE table and in
 pgtable_pmd_page_ctor() for PMD table.
 
 Please, never access page->ptl directly -- use appropriate helper.
diff --git a/arch/arc/include/asm/pgalloc.h b/arch/arc/include/asm/pgalloc.h
index 4751f2251cd9..b747f2ec2928 100644
--- a/arch/arc/include/asm/pgalloc.h
+++ b/arch/arc/include/asm/pgalloc.h
@@ -108,7 +108,7 @@ pte_alloc_one(struct mm_struct *mm)
 		return 0;
 	memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
 	page = virt_to_page(pte_pg);
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return 0;
 	}
@@ -123,7 +123,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t ptep)
 {
-	pgtable_page_dtor(virt_to_page(ptep));
+	pgtable_pte_page_dtor(virt_to_page(ptep));
 	free_pages((unsigned long)ptep, __get_order_pte());
 }
 
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index b75ea15b85c0..669474add486 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -44,7 +44,7 @@ static inline void __tlb_remove_table(void *_table)
 static inline void
 __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 
 #ifndef CONFIG_ARM_LPAE
 	/*
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 25da9b2d9610..48c2888297dd 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -731,7 +731,7 @@ static void *__init late_alloc(unsigned long sz)
 {
 	void *ptr = (void *)__get_free_pages(GFP_PGTABLE_KERNEL, get_order(sz));
 
-	if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+	if (!ptr || !pgtable_pte_page_ctor(virt_to_page(ptr)))
 		BUG();
 	return ptr;
 }
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index a95d1fcb7e21..b76df828e6b7 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -44,7 +44,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				  unsigned long addr)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	tlb_remove_table(tlb, pte);
 }
 
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 53dc6f24cfb7..60c929f3683b 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -384,7 +384,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift)
 	 * folded, and if so pgtable_pmd_page_ctor() becomes nop.
 	 */
 	if (shift == PAGE_SHIFT)
-		BUG_ON(!pgtable_page_ctor(phys_to_page(pa)));
+		BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa)));
 	else if (shift == PMD_SHIFT)
 		BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
 
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index d089113fe41f..c7c1ed27e348 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -71,7 +71,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 #define __pte_free_tlb(tlb, pte, address)		\
 do {							\
-	pgtable_page_dtor(pte);				\
+	pgtable_pte_page_dtor(pte);			\
 	tlb_remove_page(tlb, pte);			\
 } while (0)
 
diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h
index 5a6e79e7926d..cc9be514a676 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -94,7 +94,7 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
 
 #define __pte_free_tlb(tlb, pte, addr)		\
 do {						\
-	pgtable_page_dtor((pte));		\
+	pgtable_pte_page_dtor((pte));		\
 	tlb_remove_page((tlb), (pte));		\
 } while (0)
 
diff --git a/arch/m68k/include/asm/mcf_pgalloc.h b/arch/m68k/include/asm/mcf_pgalloc.h
index 4399d712f6db..b34d44d666a4 100644
--- a/arch/m68k/include/asm/mcf_pgalloc.h
+++ b/arch/m68k/include/asm/mcf_pgalloc.h
@@ -41,7 +41,7 @@ extern inline pmd_t *pmd_alloc_kernel(pgd_t *pgd, unsigned long address)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 				  unsigned long address)
 {
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	__free_page(page);
 }
 
@@ -54,7 +54,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
 	if (!page)
 		return NULL;
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return NULL;
 	}
@@ -73,7 +73,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, struct page *page)
 {
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	__free_page(page);
 }
 
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index d04d9ba9b976..acab315c851f 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -36,7 +36,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 	page = alloc_pages(GFP_KERNEL|__GFP_ZERO, 0);
 	if(!page)
 		return NULL;
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return NULL;
 	}
@@ -51,7 +51,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 {
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	cache_page(kmap(page));
 	kunmap(page);
 	__free_page(page);
@@ -60,7 +60,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
 				  unsigned long address)
 {
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	cache_page(kmap(page));
 	kunmap(page);
 	__free_page(page);
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index 1a8ddbd0d23c..856121122b91 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -21,7 +21,7 @@ extern const char bad_pmd_string[];
 
 #define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
-	pgtable_page_dtor(pte);				\
+	pgtable_pte_page_dtor(pte);			\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index aa73cb187a07..166842337eb2 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -54,7 +54,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb,pte,address)			\
 do {							\
-	pgtable_page_dtor(pte);				\
+	pgtable_pte_page_dtor(pte);			\
 	tlb_remove_page((tlb), pte);			\
 } while (0)
 
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 750d18d5980b..0b146d773c85 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -41,7 +41,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #define __pte_free_tlb(tlb, pte, addr)				\
 	do {							\
-		pgtable_page_dtor(pte);				\
+		pgtable_pte_page_dtor(pte);			\
 		tlb_remove_page((tlb), (pte));			\
 	} while (0)
 
diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h
index 787c1b9d2f6d..da12a4c38c4b 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -75,7 +75,7 @@ static inline struct page *pte_alloc_one(struct mm_struct *mm)
 	if (!pte)
 		return NULL;
 	clear_page(page_address(pte));
-	if (!pgtable_page_ctor(pte)) {
+	if (!pgtable_pte_page_ctor(pte)) {
 		__free_page(pte);
 		return NULL;
 	}
@@ -89,13 +89,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, struct page *pte)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	__free_page(pte);
 }
 
 #define __pte_free_tlb(tlb, pte, addr)	\
 do {					\
-	pgtable_page_dtor(pte);		\
+	pgtable_pte_page_dtor(pte);	\
 	tlb_remove_page((tlb), (pte));	\
 } while (0)
 
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index a7b05214760c..ee4bd6d38602 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -25,7 +25,7 @@ void pte_frag_destroy(void *pte_frag)
 	count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 	/* We allow PTE_FRAG_NR fragments from a PTE page */
 	if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
-		pgtable_page_dtor(page);
+		pgtable_pte_page_dtor(page);
 		__free_page(page);
 	}
 }
@@ -61,7 +61,7 @@ static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
 		page = alloc_page(PGALLOC_GFP | __GFP_ACCOUNT);
 		if (!page)
 			return NULL;
-		if (!pgtable_page_ctor(page)) {
+		if (!pgtable_pte_page_ctor(page)) {
 			__free_page(page);
 			return NULL;
 		}
@@ -113,7 +113,7 @@ void pte_fragment_free(unsigned long *table, int kernel)
 	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
 	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		if (!kernel)
-			pgtable_page_dtor(page);
+			pgtable_pte_page_dtor(page);
 		__free_page(page);
 	}
 }
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index f66a00d8cb19..d59ea92285ec 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -78,7 +78,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 #define __pte_free_tlb(tlb, pte, buf)   \
 do {                                    \
-	pgtable_page_dtor(pte);         \
+	pgtable_pte_page_dtor(pte);     \
 	tlb_remove_page((tlb), pte);    \
 } while (0)
 
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 54fcdf66ae96..3dd253f81a77 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -210,7 +210,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	page = alloc_page(GFP_KERNEL);
 	if (!page)
 		return NULL;
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return NULL;
 	}
@@ -256,7 +256,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 		atomic_xor_bits(&page->_refcount, 3U << 24);
 	}
 
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	__free_page(page);
 }
 
@@ -308,7 +308,7 @@ void __tlb_remove_table(void *_table)
 	case 3:		/* 4K page table with pgstes */
 		if (mask & 3)
 			atomic_xor_bits(&page->_refcount, 3 << 24);
-		pgtable_page_dtor(page);
+		pgtable_pte_page_dtor(page);
 		__free_page(page);
 		break;
 	}
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 8c6341a4d807..22d968bfe9bb 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -29,7 +29,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 
 #define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
-	pgtable_page_dtor(pte);				\
+	pgtable_pte_page_dtor(pte);			\
 	tlb_remove_page((tlb), (pte));			\
 } while (0)
 
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 4b099dd7a767..e6d91819da92 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2903,7 +2903,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
 	struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page)
 		return NULL;
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		free_unref_page(page);
 		return NULL;
 	}
@@ -2919,7 +2919,7 @@ static void __pte_free(pgtable_t pte)
 {
 	struct page *page = virt_to_page(pte);
 
-	pgtable_page_dtor(page);
+	pgtable_pte_page_dtor(page);
 	__free_page(page);
 }
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index aaebbc00d262..cc3ad64479ac 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -378,7 +378,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
 	if ((pte = (unsigned long)pte_alloc_one_kernel(mm)) == 0)
 		return NULL;
 	page = pfn_to_page(__nocache_pa(pte) >> PAGE_SHIFT);
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return NULL;
 	}
@@ -389,7 +389,7 @@ void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
 	unsigned long p;
 
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	p = (unsigned long)page_address(pte);	/* Cached address (for test) */
 	if (p == 0)
 		BUG();
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 446e0c0f4018..881e76da1938 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -29,7 +29,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 
 #define __pte_free_tlb(tlb,pte, address)		\
 do {							\
-	pgtable_page_dtor(pte);				\
+	pgtable_pte_page_dtor(pte);			\
 	tlb_remove_page((tlb),(pte));			\
 } while (0)
 
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
index 10d2356bfddd..4663d8cc80ef 100644
--- a/arch/unicore32/include/asm/tlb.h
+++ b/arch/unicore32/include/asm/tlb.h
@@ -15,7 +15,7 @@
 
 #define __pte_free_tlb(tlb, pte, addr)				\
 	do {							\
-		pgtable_page_dtor(pte);				\
+		pgtable_pte_page_dtor(pte);			\
 		tlb_remove_page((tlb), (pte));			\
 	} while (0)
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 463940faf52f..3e4b9035bb9a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -45,7 +45,7 @@ early_param("userpte", setup_userpte);
 
 void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
 	paravirt_tlb_remove_table(tlb, pte);
 }
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index dd744aa450fa..1d38f0e755ba 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -55,7 +55,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 	if (!pte)
 		return NULL;
 	page = virt_to_page(pte);
-	if (!pgtable_page_ctor(page)) {
+	if (!pgtable_pte_page_ctor(page)) {
 		__free_page(page);
 		return NULL;
 	}
@@ -69,7 +69,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 {
-	pgtable_page_dtor(pte);
+	pgtable_pte_page_dtor(pte);
 	__free_page(pte);
 }
 #define pmd_pgtable(pmd) pmd_page(pmd)
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 6f8cc06ee44e..73f7421413cb 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -49,7 +49,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
  * @mm: the mm_struct of the current context
  * @gfp: GFP flags to use for the allocation
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * This function is intended for architectures that need
  * anything beyond simple page allocation or must have custom GFP flags.
@@ -63,7 +63,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
 	pte = alloc_page(gfp);
 	if (!pte)
 		return NULL;
-	if (!pgtable_page_ctor(pte)) {
+	if (!pgtable_pte_page_ctor(pte)) {
 		__free_page(pte);
 		return NULL;
 	}
@@ -76,7 +76,7 @@ static inline pgtable_t __pte_alloc_one(struct mm_struct *mm, gfp_t gfp)
  * pte_alloc_one - allocate a page for PTE-level user page table
  * @mm: the mm_struct of the current context
  *
- * Allocates a page and runs the pgtable_page_ctor().
+ * Allocates a page and runs the pgtable_pte_page_ctor().
  *
  * Return: `struct page` initialized as page table or %NULL on error
  */
@@ -98,7 +98,7 @@ static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
  */
 static inline void pte_free(struct mm_struct *mm, struct page *pte_page)
 {
-	pgtable_page_dtor(pte_page);
+	pgtable_pte_page_dtor(pte_page);
 	__free_page(pte_page);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 294a67b94147..cc292273e6ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1949,7 +1949,7 @@ static inline void pgtable_init(void)
 	pgtable_cache_init();
 }
 
-static inline bool pgtable_page_ctor(struct page *page)
+static inline bool pgtable_pte_page_ctor(struct page *page)
 {
 	if (!ptlock_init(page))
 		return false;
@@ -1958,7 +1958,7 @@ static inline bool pgtable_page_ctor(struct page *page)
 	return true;
 }
 
-static inline void pgtable_page_dtor(struct page *page)
+static inline void pgtable_pte_page_dtor(struct page *page)
 {
 	ptlock_free(page);
 	__ClearPageTable(page);
-- 
cgit v1.2.3


From 159d2c7d8106177bd9a986fd005a311fe0d11285 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Sep 2019 13:11:26 -0700
Subject: sch_netem: fix rcu splat in netem_enqueue()

qdisc_root() use from netem_enqueue() triggers a lockdep warning.

__dev_queue_xmit() uses rcu_read_lock_bh() which is
not equivalent to rcu_read_lock() + local_bh_disable_bh as far
as lockdep is concerned.

WARNING: suspicious RCU usage
5.3.0-rc7+ #0 Not tainted
-----------------------------
include/net/sch_generic.h:492 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
3 locks held by syz-executor427/8855:
 #0: 00000000b5525c01 (rcu_read_lock_bh){....}, at: lwtunnel_xmit_redirect include/net/lwtunnel.h:92 [inline]
 #0: 00000000b5525c01 (rcu_read_lock_bh){....}, at: ip_finish_output2+0x2dc/0x2570 net/ipv4/ip_output.c:214
 #1: 00000000b5525c01 (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x20a/0x3650 net/core/dev.c:3804
 #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: spin_lock include/linux/spinlock.h:338 [inline]
 #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: __dev_xmit_skb net/core/dev.c:3502 [inline]
 #2: 00000000364bae92 (&(&sch->q.lock)->rlock){+.-.}, at: __dev_queue_xmit+0x14b8/0x3650 net/core/dev.c:3838

stack backtrace:
CPU: 0 PID: 8855 Comm: syz-executor427 Not tainted 5.3.0-rc7+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 lockdep_rcu_suspicious+0x153/0x15d kernel/locking/lockdep.c:5357
 qdisc_root include/net/sch_generic.h:492 [inline]
 netem_enqueue+0x1cfb/0x2d80 net/sched/sch_netem.c:479
 __dev_xmit_skb net/core/dev.c:3527 [inline]
 __dev_queue_xmit+0x15d2/0x3650 net/core/dev.c:3838
 dev_queue_xmit+0x18/0x20 net/core/dev.c:3902
 neigh_hh_output include/net/neighbour.h:500 [inline]
 neigh_output include/net/neighbour.h:509 [inline]
 ip_finish_output2+0x1726/0x2570 net/ipv4/ip_output.c:228
 __ip_finish_output net/ipv4/ip_output.c:308 [inline]
 __ip_finish_output+0x5fc/0xb90 net/ipv4/ip_output.c:290
 ip_finish_output+0x38/0x1f0 net/ipv4/ip_output.c:318
 NF_HOOK_COND include/linux/netfilter.h:294 [inline]
 ip_mc_output+0x292/0xf40 net/ipv4/ip_output.c:417
 dst_output include/net/dst.h:436 [inline]
 ip_local_out+0xbb/0x190 net/ipv4/ip_output.c:125
 ip_send_skb+0x42/0xf0 net/ipv4/ip_output.c:1555
 udp_send_skb.isra.0+0x6b2/0x1160 net/ipv4/udp.c:887
 udp_sendmsg+0x1e96/0x2820 net/ipv4/udp.c:1174
 inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807
 sock_sendmsg_nosec net/socket.c:637 [inline]
 sock_sendmsg+0xd7/0x130 net/socket.c:657
 ___sys_sendmsg+0x3e2/0x920 net/socket.c:2311
 __sys_sendmmsg+0x1bf/0x4d0 net/socket.c:2413
 __do_sys_sendmmsg net/socket.c:2442 [inline]
 __se_sys_sendmmsg net/socket.c:2439 [inline]
 __x64_sys_sendmmsg+0x9d/0x100 net/socket.c:2439
 do_syscall_64+0xfd/0x6a0 arch/x86/entry/common.c:296
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 5 +++++
 net/sched/sch_netem.c     | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 43f5b7ed02bd..637548d54b3e 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -494,6 +494,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
 	return q;
 }
 
+static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
+{
+	return rcu_dereference_bh(qdisc->dev_queue->qdisc);
+}
+
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->qdisc_sleeping;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index f5cb35e550f8..0e44039e729c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -476,7 +476,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	 * skb will be queued.
 	 */
 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
-		struct Qdisc *rootq = qdisc_root(sch);
+		struct Qdisc *rootq = qdisc_root_bh(sch);
 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
 
 		q->duplicate = 0;
-- 
cgit v1.2.3


From 4f6570d7206bb052f42718d55fbe72977f0318ea Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Sep 2019 08:01:14 -0700
Subject: ipv6: add priority parameter to ip6_xmit()

Currently, ip6_xmit() sets skb->priority based on sk->sk_priority

This is not desirable for TCP since TCP shares the same ctl socket
for a given netns. We want to be able to send RST or ACK packets
with a non zero skb->priority.

This patch has no functional change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h               | 2 +-
 net/dccp/ipv6.c                  | 5 +++--
 net/ipv6/inet6_connection_sock.c | 2 +-
 net/ipv6/ip6_output.c            | 4 ++--
 net/ipv6/tcp_ipv6.c              | 6 ++++--
 net/sctp/ipv6.c                  | 2 +-
 6 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 8dfc65639aa4..009605c56f20 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -981,7 +981,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
  *	upper-layer output functions
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-	     __u32 mark, struct ipv6_txoptions *opt, int tclass);
+	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority);
 
 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
 
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1b7381ff787b..25aab672fc99 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -230,7 +230,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
 		opt = ireq->ipv6_opt;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
-		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass);
+		err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
+			       sk->sk_priority);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
@@ -284,7 +285,7 @@ static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(skb, dst);
-		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0);
+		ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 		DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
 		return;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 4da24aa6c696..0a0945a5b30d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -133,7 +133,7 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
 	fl6.daddr = sk->sk_v6_daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       np->tclass);
+		       np->tclass,  sk->sk_priority);
 	rcu_read_unlock();
 	return res;
 }
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 89a4c7c2e25d..edadee4a7e76 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -193,7 +193,7 @@ bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
  * which are using proper atomic operations or spinlocks.
  */
 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
-	     __u32 mark, struct ipv6_txoptions *opt, int tclass)
+	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 {
 	struct net *net = sock_net(sk);
 	const struct ipv6_pinfo *np = inet6_sk(sk);
@@ -258,7 +258,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	hdr->daddr = *first_hop;
 
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->priority = sk->sk_priority;
+	skb->priority = priority;
 	skb->mark = mark;
 
 	mtu = dst_mtu(dst);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 87f44d3250ee..806064c28867 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -512,7 +512,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 		opt = ireq->ipv6_opt;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
-		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
+			       sk->sk_priority);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
@@ -907,7 +908,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
 	if (!IS_ERR(dst)) {
 		skb_dst_set(buff, dst);
-		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
+		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
+			 0);
 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
 		if (rst)
 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e5f2fc726a98..dd860fea0148 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -215,7 +215,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	rcu_read_lock();
 	res = ip6_xmit(sk, skb, fl6, sk->sk_mark, rcu_dereference(np->opt),
-		       tclass);
+		       tclass, sk->sk_priority);
 	rcu_read_unlock();
 	return res;
 }
-- 
cgit v1.2.3


From f6c0f5d209fa80eb808e08aa4206f6e264041ef6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Sep 2019 08:01:16 -0700
Subject: tcp: honor SO_PRIORITY in TIME_WAIT state

ctl packets sent on behalf of TIME_WAIT sockets currently
have a zero skb->priority, which can cause various problems.

In this patch we :

- add a tw_priority field in struct inet_timewait_sock.

- populate it from sk->sk_priority when a TIME_WAIT is created.

- For IPv4, change ip_send_unicast_reply() and its two
  callers to propagate tw_priority correctly.
  ip_send_unicast_reply() no longer changes sk->sk_priority.

- For IPv6, make sure TIME_WAIT sockets pass their tw_priority
  field to tcp_v6_send_response() and tcp_v6_send_ack().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 1 +
 net/ipv4/ip_output.c             | 1 -
 net/ipv4/tcp_ipv4.c              | 4 ++++
 net/ipv4/tcp_minisocks.c         | 1 +
 net/ipv6/tcp_ipv6.c              | 6 ++++--
 5 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index aef38c140014..dfd919b3119e 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -71,6 +71,7 @@ struct inet_timewait_sock {
 				tw_pad		: 2,	/* 2 bits hole */
 				tw_tos		: 8;
 	u32			tw_txhash;
+	u32			tw_priority;
 	struct timer_list	tw_timer;
 	struct inet_bind_bucket	*tw_tb;
 };
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a77c3a4c24de..28fca408812c 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1694,7 +1694,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 
 	inet_sk(sk)->tos = arg->tos;
 
-	sk->sk_priority = skb->priority;
 	sk->sk_protocol = ip_hdr(skb)->protocol;
 	sk->sk_bound_dev_if = arg->bound_dev_if;
 	sk->sk_sndbuf = sysctl_wmem_default;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd394ad179a0..2ee45e3755e9 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -771,6 +771,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	if (sk) {
 		ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
 				   inet_twsk(sk)->tw_mark : sk->sk_mark;
+		ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+				   inet_twsk(sk)->tw_priority : sk->sk_priority;
 		transmit_time = tcp_transmit_time(sk);
 	}
 	ip_send_unicast_reply(ctl_sk,
@@ -866,6 +868,8 @@ static void tcp_v4_send_ack(const struct sock *sk,
 	ctl_sk = this_cpu_read(*net->ipv4.tcp_sk);
 	ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
 			   inet_twsk(sk)->tw_mark : sk->sk_mark;
+	ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+			   inet_twsk(sk)->tw_priority : sk->sk_priority;
 	transmit_time = tcp_transmit_time(sk);
 	ip_send_unicast_reply(ctl_sk,
 			      skb, &TCP_SKB_CB(skb)->header.h4.opt,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 8bcaf2586b68..bb140a5db8c0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -266,6 +266,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
 		tw->tw_transparent	= inet->transparent;
 		tw->tw_mark		= sk->sk_mark;
+		tw->tw_priority		= sk->sk_priority;
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
 		tcptw->tw_rcv_nxt	= tp->rcv_nxt;
 		tcptw->tw_snd_nxt	= tp->snd_nxt;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f557bf27da2..e3d9f4559c99 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -995,8 +995,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
 				label = ip6_flowlabel(ipv6h);
 			priority = sk->sk_priority;
 		}
-		if (sk->sk_state == TCP_TIME_WAIT)
+		if (sk->sk_state == TCP_TIME_WAIT) {
 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
+			priority = inet_twsk(sk)->tw_priority;
+		}
 	} else {
 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
 			label = ip6_flowlabel(ipv6h);
@@ -1029,7 +1031,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
-			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), 0);
+			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
 
 	inet_twsk_put(tw);
 }
-- 
cgit v1.2.3


From 2df4de1681767df900e15e34195bbf7dc1b23e06 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 25 Sep 2019 19:28:19 -0700
Subject: ptp: correctly disable flags on old ioctls

Commit 415606588c61 ("PTP: introduce new versions of IOCTLs",
2019-09-13) introduced new versions of the PTP ioctls which actually
validate that the flags are acceptable values.

As part of this, it cleared the flags value using a bitwise
and+negation, in an attempt to prevent the old ioctl from accidentally
enabling new features.

This is incorrect for a couple of reasons. First, it results in
accidentally preventing previously working flags on the request ioctl.
By clearing the "valid" flags, we now no longer allow setting the
enable, rising edge, or falling edge flags.

Second, if we add new additional flags in the future, they must not be
set by the old ioctl. (Since the flag wasn't checked before, we could
potentially break userspace programs which sent garbage flag data.

The correct way to resolve this is to check for and clear all but the
originally valid flags.

Create defines indicating which flags are correctly checked and
interpreted by the original ioctls. Use these to clear any bits which
will not be correctly interpreted by the original ioctls.

In the future, new flags must be added to the VALID_FLAGS macros, but
*not* to the V1_VALID_FLAGS macros. In this way, new features may be
exposed over the v2 ioctls, but without breaking previous userspace
which happened to not clear the flags value properly. The old ioctl will
continue to behave the same way, while the new ioctl gains the benefit
of using the flags fields.

Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Felipe Balbi <felipe.balbi@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Christopher Hall <christopher.s.hall@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c      |  4 ++--
 include/uapi/linux/ptp_clock.h | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 9c18476d8d10..67d0199840fd 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -155,7 +155,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		} else if (cmd == PTP_EXTTS_REQUEST) {
-			req.extts.flags &= ~PTP_EXTTS_VALID_FLAGS;
+			req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
 			req.extts.rsv[0] = 0;
 			req.extts.rsv[1] = 0;
 		}
@@ -184,7 +184,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 			err = -EINVAL;
 			break;
 		} else if (cmd == PTP_PEROUT_REQUEST) {
-			req.perout.flags &= ~PTP_PEROUT_VALID_FLAGS;
+			req.perout.flags &= PTP_PEROUT_V1_VALID_FLAGS;
 			req.perout.rsv[0] = 0;
 			req.perout.rsv[1] = 0;
 			req.perout.rsv[2] = 0;
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index f16301015949..59e89a1bc3bb 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -31,15 +31,37 @@
 #define PTP_ENABLE_FEATURE (1<<0)
 #define PTP_RISING_EDGE    (1<<1)
 #define PTP_FALLING_EDGE   (1<<2)
+
+/*
+ * flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
+ */
 #define PTP_EXTTS_VALID_FLAGS	(PTP_ENABLE_FEATURE |	\
 				 PTP_RISING_EDGE |	\
 				 PTP_FALLING_EDGE)
 
+/*
+ * flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
+ * DO NOT ADD NEW FLAGS HERE.
+ */
+#define PTP_EXTTS_V1_VALID_FLAGS	(PTP_ENABLE_FEATURE |	\
+					 PTP_RISING_EDGE |	\
+					 PTP_FALLING_EDGE)
+
 /*
  * Bits of the ptp_perout_request.flags field:
  */
 #define PTP_PEROUT_ONE_SHOT (1<<0)
+
+/*
+ * flag fields valid for the new PTP_PEROUT_REQUEST2 ioctl.
+ */
 #define PTP_PEROUT_VALID_FLAGS	(PTP_PEROUT_ONE_SHOT)
+
+/*
+ * No flags are valid for the original PTP_PEROUT_REQUEST ioctl
+ */
+#define PTP_PEROUT_V1_VALID_FLAGS	(0)
+
 /*
  * struct ptp_clock_time - represents a time value
  *
-- 
cgit v1.2.3


From 174e23810cd3183dc2ca3f5166ef965a55eaaf54 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 26 Sep 2019 20:37:05 +0200
Subject: sk_buff: drop all skb extensions on free and skb scrubbing

Now that we have a 3rd extension, add a new helper that drops the
extension space and use it when we need to scrub an sk_buff.

At this time, scrubbing clears secpath and bridge netfilter data, but
retains the tc skb extension, after this patch all three get cleared.

NAPI reuse/free assumes we can only have a secpath attached to skb, but
it seems better to clear all extensions there as well.

v2: add unlikely hint (Eric Dumazet)

Fixes: 95a7233c452a ("net: openvswitch: Set OvS recirc_id from tc chain index")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 9 +++++++++
 net/core/dev.c         | 4 ++--
 net/core/skbuff.c      | 2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 907209c0794e..e7d3b1a513ef 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4144,8 +4144,17 @@ static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
 
 	return NULL;
 }
+
+static inline void skb_ext_reset(struct sk_buff *skb)
+{
+	if (unlikely(skb->active_extensions)) {
+		__skb_ext_put(skb->extensions);
+		skb->active_extensions = 0;
+	}
+}
 #else
 static inline void skb_ext_put(struct sk_buff *skb) {}
+static inline void skb_ext_reset(struct sk_buff *skb) {}
 static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
 static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
 static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
diff --git a/net/core/dev.c b/net/core/dev.c
index 71b18e80389f..bf3ed413abaf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5666,7 +5666,7 @@ EXPORT_SYMBOL(gro_find_complete_by_type);
 static void napi_skb_free_stolen_head(struct sk_buff *skb)
 {
 	skb_dst_drop(skb);
-	secpath_reset(skb);
+	skb_ext_put(skb);
 	kmem_cache_free(skbuff_head_cache, skb);
 }
 
@@ -5733,7 +5733,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 	skb->encapsulation = 0;
 	skb_shinfo(skb)->gso_type = 0;
 	skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
-	secpath_reset(skb);
+	skb_ext_reset(skb);
 
 	napi->skb = skb;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f12e8a050edb..01d65206f4fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5119,7 +5119,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->skb_iif = 0;
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
-	secpath_reset(skb);
+	skb_ext_reset(skb);
 	nf_reset(skb);
 	nf_reset_trace(skb);
 
-- 
cgit v1.2.3


From ac79f78dab892fcdc11fda8af5cc5e80d09dca8a Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 4 Sep 2019 12:54:18 -0700
Subject: Revert "Revert "mm, thp: restore node-local hugepage allocations""

This reverts commit a8282608c88e08b1782141026eab61204c1e533f.

The commit references the original intended semantic for MADV_HUGEPAGE
which has subsequently taken on three unique purposes:

 - enables or disables thp for a range of memory depending on the system's
   config (is thp "enabled" set to "always" or "madvise"),

 - determines the synchronous compaction behavior for thp allocations at
   fault (is thp "defrag" set to "always", "defer+madvise", or "madvise"),
   and

 - reverts a previous MADV_NOHUGEPAGE (there is no madvise mode to only
   clear previous hugepage advice).

These are the three purposes that currently exist in 5.2 and over the
past several years that userspace has been written around.  Adding a
NUMA locality preference adds a fourth dimension to an already conflated
advice mode.

Based on the semantic that MADV_HUGEPAGE has provided over the past
several years, there exist workloads that use the tunable based on these
principles: specifically that the allocation should attempt to
defragment a local node before falling back.  It is agreed that remote
hugepages typically (but not always) have a better access latency than
remote native pages, although on Naples this is at parity for
intersocket.

The revert commit that this patch reverts allows hugepage allocation to
immediately allocate remotely when local memory is fragmented.  This is
contrary to the semantic of MADV_HUGEPAGE over the past several years:
that is, memory compaction should be attempted locally before falling
back.

The performance degradation of remote hugepages over local hugepages on
Rome, for example, is 53.5% increased access latency.  For this reason,
the goal is to revert back to the 5.2 and previous behavior that would
attempt local defragmentation before falling back.  With the patch that
is reverted by this patch, we see performance degradations at the tail
because the allocator happily allocates the remote hugepage rather than
even attempting to make a local hugepage available.

zone_reclaim_mode is not a solution to this problem since it does not
only impact hugepage allocations but rather changes the memory
allocation strategy for *all* page allocations.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 --
 mm/huge_memory.c          | 42 ++++++++++++++++--------------------------
 mm/mempolicy.c            |  2 +-
 3 files changed, 17 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index bac395f1d00a..5228c62af416 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,8 +139,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
-						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de1f15969e27..62f0d8e9d76b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -648,37 +648,27 @@ release:
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	gfp_t this_node = 0;
-
-#ifdef CONFIG_NUMA
-	struct mempolicy *pol;
-	/*
-	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
-	 * specified, to express a general desire to stay on the current
-	 * node for optimistic allocation attempts. If the defrag mode
-	 * and/or madvise hint requires the direct reclaim then we prefer
-	 * to fallback to other node rather than node reclaim because that
-	 * can lead to excessive reclaim even though there is free memory
-	 * on other nodes. We expect that NUMA preferences are specified
-	 * by memory policies.
-	 */
-	pol = get_vma_policy(vma, addr);
-	if (pol->mode != MPOL_BIND)
-		this_node = __GFP_THISNODE;
-	mpol_cond_put(pol);
-#endif
+	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
+	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | __GFP_THISNODE |
+		       (vma_madvised ? 0 : __GFP_NORETRY);
+
+	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
+		return gfp_mask | __GFP_KSWAPD_RECLAIM;
+
+	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     __GFP_KSWAPD_RECLAIM | this_node);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+						  __GFP_KSWAPD_RECLAIM);
+
+	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     this_node);
-	return GFP_TRANSHUGE_LIGHT | this_node;
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+
+	return gfp_mask;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 65e0874fce17..9c9877a43d58 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1734,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
-- 
cgit v1.2.3


From 19deb7695e072deaff025e03de40c61b525bd57e Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 4 Sep 2019 12:54:20 -0700
Subject: Revert "Revert "Revert "mm, thp: consolidate THP gfp handling into
 alloc_hugepage_direct_gfpmask""

This reverts commit 92717d429b38e4f9f934eed7e605cc42858f1839.

Since commit a8282608c88e ("Revert "mm, thp: restore node-local hugepage
allocations"") is reverted in this series, it is better to restore the
previous 5.2 behavior between the thp allocation and the page allocator
rather than to attempt any consolidation or cleanup for a policy that is
now reverted.  It's less risky during an rc cycle and subsequent patches
in this series further modify the same policy that the pre-5.3 behavior
implements.

Consolidation and cleanup can be done subsequent to a sane default page
allocation strategy, so this patch reverts a cleanup done on a strategy
that is now reverted and thus is the least risky option.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 12 ++++++++----
 mm/huge_memory.c    | 27 +++++++++++++--------------
 mm/mempolicy.c      | 32 +++++++++++++++++++++++++++++---
 mm/shmem.c          |  2 +-
 4 files changed, 51 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f33881688f42..fb07b503dc45 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,18 +510,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node);
+			int node, bool hugepage);
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
+	alloc_pages(gfp_mask, order)
+#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 62f0d8e9d76b..aec462cc5d46 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -645,30 +645,30 @@ release:
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
 	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | __GFP_THISNODE |
-		       (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 
 	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | __GFP_KSWAPD_RECLAIM;
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
 
 	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-						  __GFP_KSWAPD_RECLAIM);
+		return GFP_TRANSHUGE_LIGHT |
+			(vma_madvised ? __GFP_DIRECT_RECLAIM :
+					__GFP_KSWAPD_RECLAIM);
 
 	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+		return GFP_TRANSHUGE_LIGHT |
+		       (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
 
-	return gfp_mask;
+	return GFP_TRANSHUGE_LIGHT;
 }
 
 /* Caller must hold page table lock. */
@@ -740,8 +740,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
+	gfp = alloc_hugepage_direct_gfpmask(vma);
+	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1348,9 +1348,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (__transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
-		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
-				haddr, numa_node_id());
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
+		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9c9877a43d58..547cd403ed02 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
-				address, numa_node_id());
+		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+					 HPAGE_PMD_ORDER);
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -2083,6 +2083,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
+ *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2093,7 +2094,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node)
+		unsigned long addr, int node, bool hugepage)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2111,6 +2112,31 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
+	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+		int hpage_node = node;
+
+		/*
+		 * For hugepage allocation and non-interleave policy which
+		 * allows the current node (or other explicitly preferred
+		 * node) we only try to allocate from the current/preferred
+		 * node and don't fall back to other nodes, as the cost of
+		 * remote accesses would likely offset THP benefits.
+		 *
+		 * If the policy is interleave, or does not allow the current
+		 * node in its nodemask, we allocate the standard way.
+		 */
+		if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
+			hpage_node = pol->v.preferred_node;
+
+		nmask = policy_nodemask(gfp, pol);
+		if (!nmask || node_isset(hpage_node, *nmask)) {
+			mpol_cond_put(pol);
+			page = __alloc_pages_node(hpage_node,
+						gfp | __GFP_THISNODE, order);
+			goto out;
+		}
+	}
+
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2bed4761f279..626d8c74b973 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);
-- 
cgit v1.2.3


From f7d6316fb43735ae8af969c2e582fbee85709483 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Sat, 14 Sep 2019 18:32:15 +0800
Subject: mm, tracing: Print symbol name for call_site in trace events

To improve the readability of raw slab trace points, print the call_site ip
using '%pS'. Then we can grep events with function names.

[002] ....   808.188897: kmem_cache_free: call_site=putname+0x47/0x50 ptr=00000000cef40c80
[002] ....   808.188898: kfree: call_site=security_cred_free+0x42/0x50 ptr=0000000062400820
[002] ....   808.188904: kmem_cache_free: call_site=put_cred_rcu+0x88/0xa0 ptr=0000000058d74ef8
[002] ....   808.188913: kmem_cache_alloc: call_site=prepare_creds+0x26/0x100 ptr=0000000058d74ef8 bytes_req=168 bytes_alloc=576 gfp_flags=GFP_KERNEL
[002] ....   808.188917: kmalloc: call_site=security_prepare_creds+0x77/0xa0 ptr=0000000062400820 bytes_req=8 bytes_alloc=336 gfp_flags=GFP_KERNEL|__GFP_ZERO
[002] ....   808.188920: kmem_cache_alloc: call_site=getname_flags+0x4f/0x1e0 ptr=00000000cef40c80 bytes_req=4096 bytes_alloc=4480 gfp_flags=GFP_KERNEL
[002] ....   808.188925: kmem_cache_free: call_site=putname+0x47/0x50 ptr=00000000cef40c80
[002] ....   808.188926: kfree: call_site=security_cred_free+0x42/0x50 ptr=0000000062400820
[002] ....   808.188931: kmem_cache_free: call_site=put_cred_rcu+0x88/0xa0 ptr=0000000058d74ef8

Link: http://lkml.kernel.org/r/20190914103215.23301-1-changbin.du@gmail.com

Signed-off-by: Changbin Du <changbin.du@gmail.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/trace/events/kmem.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index eb57e3037deb..69e8bb8963db 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -35,8 +35,8 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		__entry->gfp_flags	= gfp_flags;
 	),
 
-	TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
-		__entry->call_site,
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		(void *)__entry->call_site,
 		__entry->ptr,
 		__entry->bytes_req,
 		__entry->bytes_alloc,
@@ -131,7 +131,8 @@ DECLARE_EVENT_CLASS(kmem_free,
 		__entry->ptr		= ptr;
 	),
 
-	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+	TP_printk("call_site=%pS ptr=%p",
+		  (void *)__entry->call_site, __entry->ptr)
 );
 
 DEFINE_EVENT(kmem_free, kfree,
-- 
cgit v1.2.3